3145 lines
759 KiB
JSON
3145 lines
759 KiB
JSON
{"current_steps": 5, "total_steps": 15621, "loss": 2.1603, "lr": 5.118362124120281e-09, "epoch": 0.0003200819409768901, "percentage": 0.03, "elapsed_time": "0:00:00", "remaining_time": "0:48:22", "throughput": 16527.87, "total_tokens": 15360}
|
|
{"current_steps": 10, "total_steps": 15621, "loss": 2.344, "lr": 1.1516314779270634e-08, "epoch": 0.0006401638819537802, "percentage": 0.06, "elapsed_time": "0:00:01", "remaining_time": "0:40:30", "throughput": 19981.57, "total_tokens": 31104}
|
|
{"current_steps": 15, "total_steps": 15621, "loss": 2.115, "lr": 1.7914267434420987e-08, "epoch": 0.0009602458229306702, "percentage": 0.1, "elapsed_time": "0:00:02", "remaining_time": "0:37:17", "throughput": 21482.87, "total_tokens": 46208}
|
|
{"current_steps": 20, "total_steps": 15621, "loss": 2.741, "lr": 2.431222008957134e-08, "epoch": 0.0012803277639075604, "percentage": 0.13, "elapsed_time": "0:00:02", "remaining_time": "0:36:09", "throughput": 22461.03, "total_tokens": 62464}
|
|
{"current_steps": 25, "total_steps": 15621, "loss": 2.0952, "lr": 3.071017274472169e-08, "epoch": 0.0016004097048844504, "percentage": 0.16, "elapsed_time": "0:00:03", "remaining_time": "0:35:34", "throughput": 23114.25, "total_tokens": 79104}
|
|
{"current_steps": 30, "total_steps": 15621, "loss": 2.1934, "lr": 3.710812539987204e-08, "epoch": 0.0019204916458613404, "percentage": 0.19, "elapsed_time": "0:00:04", "remaining_time": "0:34:58", "throughput": 23510.42, "total_tokens": 94912}
|
|
{"current_steps": 35, "total_steps": 15621, "loss": 2.3371, "lr": 4.350607805502239e-08, "epoch": 0.0022405735868382304, "percentage": 0.22, "elapsed_time": "0:00:04", "remaining_time": "0:34:33", "throughput": 23790.4, "total_tokens": 110784}
|
|
{"current_steps": 40, "total_steps": 15621, "loss": 2.1424, "lr": 4.990403071017274e-08, "epoch": 0.002560655527815121, "percentage": 0.26, "elapsed_time": "0:00:05", "remaining_time": "0:34:02", "throughput": 23968.62, "total_tokens": 125696}
|
|
{"current_steps": 45, "total_steps": 15621, "loss": 2.0945, "lr": 5.6301983365323095e-08, "epoch": 0.002880737468792011, "percentage": 0.29, "elapsed_time": "0:00:05", "remaining_time": "0:33:38", "throughput": 24122.75, "total_tokens": 140672}
|
|
{"current_steps": 50, "total_steps": 15621, "loss": 2.0027, "lr": 6.269993602047345e-08, "epoch": 0.003200819409768901, "percentage": 0.32, "elapsed_time": "0:00:06", "remaining_time": "0:33:19", "throughput": 24218.12, "total_tokens": 155456}
|
|
{"current_steps": 55, "total_steps": 15621, "loss": 1.915, "lr": 6.90978886756238e-08, "epoch": 0.003520901350745791, "percentage": 0.35, "elapsed_time": "0:00:07", "remaining_time": "0:33:09", "throughput": 24304.82, "total_tokens": 170816}
|
|
{"current_steps": 60, "total_steps": 15621, "loss": 2.0244, "lr": 7.549584133077414e-08, "epoch": 0.003840983291722681, "percentage": 0.38, "elapsed_time": "0:00:07", "remaining_time": "0:32:50", "throughput": 24355.24, "total_tokens": 185088}
|
|
{"current_steps": 65, "total_steps": 15621, "loss": 1.6385, "lr": 8.18937939859245e-08, "epoch": 0.004161065232699571, "percentage": 0.42, "elapsed_time": "0:00:08", "remaining_time": "0:32:42", "throughput": 24432.92, "total_tokens": 200384}
|
|
{"current_steps": 70, "total_steps": 15621, "loss": 1.6591, "lr": 8.829174664107485e-08, "epoch": 0.004481147173676461, "percentage": 0.45, "elapsed_time": "0:00:08", "remaining_time": "0:32:36", "throughput": 24496.91, "total_tokens": 215744}
|
|
{"current_steps": 75, "total_steps": 15621, "loss": 1.6555, "lr": 9.468969929622521e-08, "epoch": 0.004801229114653352, "percentage": 0.48, "elapsed_time": "0:00:09", "remaining_time": "0:32:27", "throughput": 24521.03, "total_tokens": 230400}
|
|
{"current_steps": 80, "total_steps": 15621, "loss": 1.3232, "lr": 1.0108765195137556e-07, "epoch": 0.005121311055630242, "percentage": 0.51, "elapsed_time": "0:00:10", "remaining_time": "0:32:26", "throughput": 24611.68, "total_tokens": 246592}
|
|
{"current_steps": 85, "total_steps": 15621, "loss": 1.1532, "lr": 1.074856046065259e-07, "epoch": 0.005441392996607132, "percentage": 0.54, "elapsed_time": "0:00:10", "remaining_time": "0:32:23", "throughput": 24668.25, "total_tokens": 262272}
|
|
{"current_steps": 90, "total_steps": 15621, "loss": 1.0452, "lr": 1.1388355726167625e-07, "epoch": 0.005761474937584022, "percentage": 0.58, "elapsed_time": "0:00:11", "remaining_time": "0:32:19", "throughput": 24709.55, "total_tokens": 277760}
|
|
{"current_steps": 95, "total_steps": 15621, "loss": 1.2493, "lr": 1.202815099168266e-07, "epoch": 0.006081556878560912, "percentage": 0.61, "elapsed_time": "0:00:11", "remaining_time": "0:32:14", "throughput": 24747.28, "total_tokens": 292992}
|
|
{"current_steps": 100, "total_steps": 15621, "loss": 1.1191, "lr": 1.2667946257197694e-07, "epoch": 0.006401638819537802, "percentage": 0.64, "elapsed_time": "0:00:12", "remaining_time": "0:32:07", "throughput": 24788.85, "total_tokens": 307840}
|
|
{"current_steps": 105, "total_steps": 15621, "loss": 1.0359, "lr": 1.3307741522712732e-07, "epoch": 0.006721720760514692, "percentage": 0.67, "elapsed_time": "0:00:13", "remaining_time": "0:32:02", "throughput": 24826.2, "total_tokens": 323008}
|
|
{"current_steps": 110, "total_steps": 15621, "loss": 1.0546, "lr": 1.3947536788227767e-07, "epoch": 0.007041802701491582, "percentage": 0.7, "elapsed_time": "0:00:13", "remaining_time": "0:32:03", "throughput": 24882.46, "total_tokens": 339456}
|
|
{"current_steps": 115, "total_steps": 15621, "loss": 1.1286, "lr": 1.45873320537428e-07, "epoch": 0.007361884642468472, "percentage": 0.74, "elapsed_time": "0:00:14", "remaining_time": "0:32:00", "throughput": 24906.28, "total_tokens": 354816}
|
|
{"current_steps": 120, "total_steps": 15621, "loss": 0.8243, "lr": 1.5227127319257838e-07, "epoch": 0.007681966583445362, "percentage": 0.77, "elapsed_time": "0:00:14", "remaining_time": "0:31:54", "throughput": 24922.91, "total_tokens": 369472}
|
|
{"current_steps": 125, "total_steps": 15621, "loss": 0.9582, "lr": 1.586692258477287e-07, "epoch": 0.008002048524422252, "percentage": 0.8, "elapsed_time": "0:00:15", "remaining_time": "0:31:52", "throughput": 24944.57, "total_tokens": 384768}
|
|
{"current_steps": 130, "total_steps": 15621, "loss": 1.0307, "lr": 1.6506717850287908e-07, "epoch": 0.008322130465399142, "percentage": 0.83, "elapsed_time": "0:00:16", "remaining_time": "0:31:49", "throughput": 24970.17, "total_tokens": 400192}
|
|
{"current_steps": 135, "total_steps": 15621, "loss": 0.8953, "lr": 1.7146513115802943e-07, "epoch": 0.008642212406376032, "percentage": 0.86, "elapsed_time": "0:00:16", "remaining_time": "0:31:50", "throughput": 25010.75, "total_tokens": 416640}
|
|
{"current_steps": 140, "total_steps": 15621, "loss": 0.8263, "lr": 1.7786308381317976e-07, "epoch": 0.008962294347352922, "percentage": 0.9, "elapsed_time": "0:00:17", "remaining_time": "0:31:50", "throughput": 25041.03, "total_tokens": 432640}
|
|
{"current_steps": 145, "total_steps": 15621, "loss": 0.8971, "lr": 1.8426103646833014e-07, "epoch": 0.009282376288329812, "percentage": 0.93, "elapsed_time": "0:00:17", "remaining_time": "0:31:50", "throughput": 25057.87, "total_tokens": 448640}
|
|
{"current_steps": 150, "total_steps": 15621, "loss": 0.9544, "lr": 1.9065898912348046e-07, "epoch": 0.009602458229306703, "percentage": 0.96, "elapsed_time": "0:00:18", "remaining_time": "0:31:50", "throughput": 25076.32, "total_tokens": 464448}
|
|
{"current_steps": 155, "total_steps": 15621, "loss": 0.8598, "lr": 1.9705694177863084e-07, "epoch": 0.009922540170283593, "percentage": 0.99, "elapsed_time": "0:00:19", "remaining_time": "0:31:47", "throughput": 25083.95, "total_tokens": 479488}
|
|
{"current_steps": 160, "total_steps": 15621, "loss": 0.7343, "lr": 2.034548944337812e-07, "epoch": 0.010242622111260483, "percentage": 1.02, "elapsed_time": "0:00:19", "remaining_time": "0:31:46", "throughput": 25105.08, "total_tokens": 495296}
|
|
{"current_steps": 165, "total_steps": 15621, "loss": 0.7845, "lr": 2.0985284708893152e-07, "epoch": 0.010562704052237373, "percentage": 1.06, "elapsed_time": "0:00:20", "remaining_time": "0:31:42", "throughput": 25118.24, "total_tokens": 510144}
|
|
{"current_steps": 170, "total_steps": 15621, "loss": 0.8491, "lr": 2.162507997440819e-07, "epoch": 0.010882785993214263, "percentage": 1.09, "elapsed_time": "0:00:20", "remaining_time": "0:31:39", "throughput": 25118.75, "total_tokens": 524928}
|
|
{"current_steps": 175, "total_steps": 15621, "loss": 0.7122, "lr": 2.2264875239923222e-07, "epoch": 0.011202867934191153, "percentage": 1.12, "elapsed_time": "0:00:21", "remaining_time": "0:31:41", "throughput": 25139.89, "total_tokens": 541504}
|
|
{"current_steps": 180, "total_steps": 15621, "loss": 0.7354, "lr": 2.290467050543826e-07, "epoch": 0.011522949875168043, "percentage": 1.15, "elapsed_time": "0:00:22", "remaining_time": "0:31:37", "throughput": 25143.86, "total_tokens": 556096}
|
|
{"current_steps": 185, "total_steps": 15621, "loss": 0.734, "lr": 2.3544465770953295e-07, "epoch": 0.011843031816144933, "percentage": 1.18, "elapsed_time": "0:00:22", "remaining_time": "0:31:38", "throughput": 25167.09, "total_tokens": 572736}
|
|
{"current_steps": 190, "total_steps": 15621, "loss": 0.8565, "lr": 2.418426103646833e-07, "epoch": 0.012163113757121823, "percentage": 1.22, "elapsed_time": "0:00:23", "remaining_time": "0:31:37", "throughput": 25176.28, "total_tokens": 588352}
|
|
{"current_steps": 195, "total_steps": 15621, "loss": 0.9816, "lr": 2.4824056301983363e-07, "epoch": 0.012483195698098713, "percentage": 1.25, "elapsed_time": "0:00:23", "remaining_time": "0:31:36", "throughput": 25180.76, "total_tokens": 603520}
|
|
{"current_steps": 200, "total_steps": 15621, "loss": 0.8158, "lr": 2.54638515674984e-07, "epoch": 0.012803277639075603, "percentage": 1.28, "elapsed_time": "0:00:24", "remaining_time": "0:31:35", "throughput": 25190.16, "total_tokens": 619392}
|
|
{"current_steps": 205, "total_steps": 15621, "loss": 0.8032, "lr": 2.6103646833013433e-07, "epoch": 0.013123359580052493, "percentage": 1.31, "elapsed_time": "0:00:25", "remaining_time": "0:31:35", "throughput": 25209.08, "total_tokens": 635456}
|
|
{"current_steps": 210, "total_steps": 15621, "loss": 0.8716, "lr": 2.6743442098528466e-07, "epoch": 0.013443441521029383, "percentage": 1.34, "elapsed_time": "0:00:25", "remaining_time": "0:31:34", "throughput": 25215.54, "total_tokens": 650880}
|
|
{"current_steps": 215, "total_steps": 15621, "loss": 0.8278, "lr": 2.7383237364043504e-07, "epoch": 0.013763523462006273, "percentage": 1.38, "elapsed_time": "0:00:26", "remaining_time": "0:31:34", "throughput": 25220.34, "total_tokens": 666688}
|
|
{"current_steps": 220, "total_steps": 15621, "loss": 0.7898, "lr": 2.802303262955854e-07, "epoch": 0.014083605402983163, "percentage": 1.41, "elapsed_time": "0:00:27", "remaining_time": "0:31:32", "throughput": 25228.53, "total_tokens": 682112}
|
|
{"current_steps": 225, "total_steps": 15621, "loss": 0.8381, "lr": 2.866282789507358e-07, "epoch": 0.014403687343960053, "percentage": 1.44, "elapsed_time": "0:00:27", "remaining_time": "0:31:31", "throughput": 25239.98, "total_tokens": 697728}
|
|
{"current_steps": 230, "total_steps": 15621, "loss": 0.6829, "lr": 2.9302623160588607e-07, "epoch": 0.014723769284936943, "percentage": 1.47, "elapsed_time": "0:00:28", "remaining_time": "0:31:28", "throughput": 25250.23, "total_tokens": 712704}
|
|
{"current_steps": 235, "total_steps": 15621, "loss": 0.9619, "lr": 2.9942418426103644e-07, "epoch": 0.015043851225913833, "percentage": 1.5, "elapsed_time": "0:00:28", "remaining_time": "0:31:30", "throughput": 25263.62, "total_tokens": 729408}
|
|
{"current_steps": 240, "total_steps": 15621, "loss": 0.7854, "lr": 3.058221369161868e-07, "epoch": 0.015363933166890723, "percentage": 1.54, "elapsed_time": "0:00:29", "remaining_time": "0:31:29", "throughput": 25273.94, "total_tokens": 745344}
|
|
{"current_steps": 245, "total_steps": 15621, "loss": 0.6965, "lr": 3.1222008957133715e-07, "epoch": 0.015684015107867613, "percentage": 1.57, "elapsed_time": "0:00:30", "remaining_time": "0:31:31", "throughput": 25301.1, "total_tokens": 762688}
|
|
{"current_steps": 250, "total_steps": 15621, "loss": 0.7105, "lr": 3.186180422264875e-07, "epoch": 0.016004097048844503, "percentage": 1.6, "elapsed_time": "0:00:30", "remaining_time": "0:31:33", "throughput": 25314.36, "total_tokens": 779392}
|
|
{"current_steps": 255, "total_steps": 15621, "loss": 0.7964, "lr": 3.2501599488163785e-07, "epoch": 0.016324178989821393, "percentage": 1.63, "elapsed_time": "0:00:31", "remaining_time": "0:31:30", "throughput": 25315.93, "total_tokens": 794112}
|
|
{"current_steps": 260, "total_steps": 15621, "loss": 0.8427, "lr": 3.314139475367882e-07, "epoch": 0.016644260930798283, "percentage": 1.66, "elapsed_time": "0:00:31", "remaining_time": "0:31:29", "throughput": 25328.06, "total_tokens": 810112}
|
|
{"current_steps": 265, "total_steps": 15621, "loss": 0.8614, "lr": 3.3781190019193855e-07, "epoch": 0.016964342871775173, "percentage": 1.7, "elapsed_time": "0:00:32", "remaining_time": "0:31:28", "throughput": 25332.87, "total_tokens": 825472}
|
|
{"current_steps": 270, "total_steps": 15621, "loss": 0.9819, "lr": 3.4420985284708893e-07, "epoch": 0.017284424812752063, "percentage": 1.73, "elapsed_time": "0:00:33", "remaining_time": "0:31:25", "throughput": 25331.03, "total_tokens": 840128}
|
|
{"current_steps": 275, "total_steps": 15621, "loss": 0.7825, "lr": 3.5060780550223926e-07, "epoch": 0.017604506753728953, "percentage": 1.76, "elapsed_time": "0:00:33", "remaining_time": "0:31:23", "throughput": 25332.49, "total_tokens": 855104}
|
|
{"current_steps": 280, "total_steps": 15621, "loss": 0.8069, "lr": 3.570057581573896e-07, "epoch": 0.017924588694705843, "percentage": 1.79, "elapsed_time": "0:00:34", "remaining_time": "0:31:23", "throughput": 25332.9, "total_tokens": 870848}
|
|
{"current_steps": 285, "total_steps": 15621, "loss": 0.7403, "lr": 3.6340371081253996e-07, "epoch": 0.018244670635682733, "percentage": 1.82, "elapsed_time": "0:00:34", "remaining_time": "0:31:21", "throughput": 25335.84, "total_tokens": 885760}
|
|
{"current_steps": 290, "total_steps": 15621, "loss": 0.7078, "lr": 3.6980166346769034e-07, "epoch": 0.018564752576659623, "percentage": 1.86, "elapsed_time": "0:00:35", "remaining_time": "0:31:19", "throughput": 25337.39, "total_tokens": 900928}
|
|
{"current_steps": 295, "total_steps": 15621, "loss": 0.793, "lr": 3.7619961612284067e-07, "epoch": 0.018884834517636517, "percentage": 1.89, "elapsed_time": "0:00:36", "remaining_time": "0:31:17", "throughput": 25344.99, "total_tokens": 915968}
|
|
{"current_steps": 300, "total_steps": 15621, "loss": 0.9919, "lr": 3.8259756877799104e-07, "epoch": 0.019204916458613407, "percentage": 1.92, "elapsed_time": "0:00:36", "remaining_time": "0:31:18", "throughput": 25366.47, "total_tokens": 933056}
|
|
{"current_steps": 305, "total_steps": 15621, "loss": 0.7373, "lr": 3.889955214331414e-07, "epoch": 0.019524998399590297, "percentage": 1.95, "elapsed_time": "0:00:37", "remaining_time": "0:31:17", "throughput": 25367.65, "total_tokens": 948416}
|
|
{"current_steps": 310, "total_steps": 15621, "loss": 0.7694, "lr": 3.953934740882917e-07, "epoch": 0.019845080340567187, "percentage": 1.98, "elapsed_time": "0:00:37", "remaining_time": "0:31:14", "throughput": 25365.33, "total_tokens": 962880}
|
|
{"current_steps": 315, "total_steps": 15621, "loss": 0.8088, "lr": 4.0179142674344207e-07, "epoch": 0.020165162281544077, "percentage": 2.02, "elapsed_time": "0:00:38", "remaining_time": "0:31:16", "throughput": 25379.41, "total_tokens": 979904}
|
|
{"current_steps": 320, "total_steps": 15621, "loss": 0.8251, "lr": 4.0818937939859245e-07, "epoch": 0.020485244222520967, "percentage": 2.05, "elapsed_time": "0:00:39", "remaining_time": "0:31:14", "throughput": 25378.55, "total_tokens": 995136}
|
|
{"current_steps": 325, "total_steps": 15621, "loss": 0.7695, "lr": 4.145873320537428e-07, "epoch": 0.020805326163497857, "percentage": 2.08, "elapsed_time": "0:00:39", "remaining_time": "0:31:14", "throughput": 25384.44, "total_tokens": 1011008}
|
|
{"current_steps": 330, "total_steps": 15621, "loss": 0.8335, "lr": 4.2098528470889315e-07, "epoch": 0.021125408104474747, "percentage": 2.11, "elapsed_time": "0:00:40", "remaining_time": "0:31:12", "throughput": 25378.34, "total_tokens": 1025792}
|
|
{"current_steps": 335, "total_steps": 15621, "loss": 0.6901, "lr": 4.273832373640435e-07, "epoch": 0.021445490045451637, "percentage": 2.14, "elapsed_time": "0:00:41", "remaining_time": "0:31:14", "throughput": 25391.22, "total_tokens": 1042944}
|
|
{"current_steps": 340, "total_steps": 15621, "loss": 0.8267, "lr": 4.3378119001919386e-07, "epoch": 0.021765571986428527, "percentage": 2.18, "elapsed_time": "0:00:41", "remaining_time": "0:31:13", "throughput": 25394.56, "total_tokens": 1058688}
|
|
{"current_steps": 345, "total_steps": 15621, "loss": 0.7233, "lr": 4.401791426743442e-07, "epoch": 0.022085653927405417, "percentage": 2.21, "elapsed_time": "0:00:42", "remaining_time": "0:31:13", "throughput": 25398.58, "total_tokens": 1074560}
|
|
{"current_steps": 350, "total_steps": 15621, "loss": 0.6991, "lr": 4.4657709532949456e-07, "epoch": 0.022405735868382307, "percentage": 2.24, "elapsed_time": "0:00:42", "remaining_time": "0:31:11", "throughput": 25399.25, "total_tokens": 1089728}
|
|
{"current_steps": 355, "total_steps": 15621, "loss": 0.9114, "lr": 4.5297504798464494e-07, "epoch": 0.022725817809359197, "percentage": 2.27, "elapsed_time": "0:00:43", "remaining_time": "0:31:10", "throughput": 25400.13, "total_tokens": 1105024}
|
|
{"current_steps": 360, "total_steps": 15621, "loss": 0.7824, "lr": 4.593730006397952e-07, "epoch": 0.023045899750336087, "percentage": 2.3, "elapsed_time": "0:00:44", "remaining_time": "0:31:10", "throughput": 25403.56, "total_tokens": 1121088}
|
|
{"current_steps": 365, "total_steps": 15621, "loss": 0.7048, "lr": 4.657709532949456e-07, "epoch": 0.023365981691312977, "percentage": 2.34, "elapsed_time": "0:00:44", "remaining_time": "0:31:10", "throughput": 25409.38, "total_tokens": 1136896}
|
|
{"current_steps": 370, "total_steps": 15621, "loss": 0.7082, "lr": 4.7216890595009597e-07, "epoch": 0.023686063632289867, "percentage": 2.37, "elapsed_time": "0:00:45", "remaining_time": "0:31:10", "throughput": 25416.44, "total_tokens": 1153280}
|
|
{"current_steps": 375, "total_steps": 15621, "loss": 0.8338, "lr": 4.785668586052463e-07, "epoch": 0.024006145573266757, "percentage": 2.4, "elapsed_time": "0:00:46", "remaining_time": "0:31:10", "throughput": 25423.62, "total_tokens": 1169536}
|
|
{"current_steps": 380, "total_steps": 15621, "loss": 0.7577, "lr": 4.849648112603967e-07, "epoch": 0.024326227514243647, "percentage": 2.43, "elapsed_time": "0:00:46", "remaining_time": "0:31:09", "throughput": 25424.37, "total_tokens": 1185088}
|
|
{"current_steps": 385, "total_steps": 15621, "loss": 0.6664, "lr": 4.91362763915547e-07, "epoch": 0.024646309455220537, "percentage": 2.46, "elapsed_time": "0:00:47", "remaining_time": "0:31:09", "throughput": 25422.81, "total_tokens": 1200832}
|
|
{"current_steps": 390, "total_steps": 15621, "loss": 0.6605, "lr": 4.977607165706974e-07, "epoch": 0.024966391396197427, "percentage": 2.5, "elapsed_time": "0:00:47", "remaining_time": "0:31:08", "throughput": 25426.8, "total_tokens": 1216320}
|
|
{"current_steps": 395, "total_steps": 15621, "loss": 0.7361, "lr": 5.041586692258478e-07, "epoch": 0.025286473337174317, "percentage": 2.53, "elapsed_time": "0:00:48", "remaining_time": "0:31:08", "throughput": 25429.12, "total_tokens": 1232832}
|
|
{"current_steps": 400, "total_steps": 15621, "loss": 0.7037, "lr": 5.10556621880998e-07, "epoch": 0.025606555278151207, "percentage": 2.56, "elapsed_time": "0:00:49", "remaining_time": "0:31:07", "throughput": 25431.99, "total_tokens": 1248384}
|
|
{"current_steps": 405, "total_steps": 15621, "loss": 0.6727, "lr": 5.169545745361484e-07, "epoch": 0.025926637219128097, "percentage": 2.59, "elapsed_time": "0:00:49", "remaining_time": "0:31:06", "throughput": 25436.01, "total_tokens": 1263936}
|
|
{"current_steps": 410, "total_steps": 15621, "loss": 1.118, "lr": 5.233525271912988e-07, "epoch": 0.026246719160104987, "percentage": 2.62, "elapsed_time": "0:00:50", "remaining_time": "0:31:25", "throughput": 25464.26, "total_tokens": 1294208}
|
|
{"current_steps": 415, "total_steps": 15621, "loss": 0.7921, "lr": 5.297504798464492e-07, "epoch": 0.026566801101081877, "percentage": 2.66, "elapsed_time": "0:00:51", "remaining_time": "0:31:23", "throughput": 25464.91, "total_tokens": 1309120}
|
|
{"current_steps": 420, "total_steps": 15621, "loss": 0.8592, "lr": 5.361484325015994e-07, "epoch": 0.026886883042058767, "percentage": 2.69, "elapsed_time": "0:00:52", "remaining_time": "0:31:22", "throughput": 25465.61, "total_tokens": 1324224}
|
|
{"current_steps": 425, "total_steps": 15621, "loss": 0.6829, "lr": 5.425463851567498e-07, "epoch": 0.027206964983035656, "percentage": 2.72, "elapsed_time": "0:00:52", "remaining_time": "0:31:22", "throughput": 25470.88, "total_tokens": 1341056}
|
|
{"current_steps": 430, "total_steps": 15621, "loss": 0.7533, "lr": 5.489443378119002e-07, "epoch": 0.027527046924012546, "percentage": 2.75, "elapsed_time": "0:00:53", "remaining_time": "0:31:21", "throughput": 25471.1, "total_tokens": 1356544}
|
|
{"current_steps": 435, "total_steps": 15621, "loss": 0.6696, "lr": 5.553422904670505e-07, "epoch": 0.027847128864989436, "percentage": 2.78, "elapsed_time": "0:00:53", "remaining_time": "0:31:20", "throughput": 25471.16, "total_tokens": 1371840}
|
|
{"current_steps": 440, "total_steps": 15621, "loss": 0.6825, "lr": 5.61740243122201e-07, "epoch": 0.028167210805966326, "percentage": 2.82, "elapsed_time": "0:00:54", "remaining_time": "0:31:18", "throughput": 25470.53, "total_tokens": 1386816}
|
|
{"current_steps": 445, "total_steps": 15621, "loss": 0.7438, "lr": 5.681381957773512e-07, "epoch": 0.028487292746943216, "percentage": 2.85, "elapsed_time": "0:00:55", "remaining_time": "0:31:16", "throughput": 25470.73, "total_tokens": 1401792}
|
|
{"current_steps": 450, "total_steps": 15621, "loss": 0.6214, "lr": 5.745361484325015e-07, "epoch": 0.028807374687920106, "percentage": 2.88, "elapsed_time": "0:00:55", "remaining_time": "0:31:15", "throughput": 25470.34, "total_tokens": 1416896}
|
|
{"current_steps": 455, "total_steps": 15621, "loss": 0.7517, "lr": 5.80934101087652e-07, "epoch": 0.029127456628896996, "percentage": 2.91, "elapsed_time": "0:00:56", "remaining_time": "0:31:14", "throughput": 25470.16, "total_tokens": 1432704}
|
|
{"current_steps": 460, "total_steps": 15621, "loss": 0.7009, "lr": 5.873320537428022e-07, "epoch": 0.029447538569873886, "percentage": 2.94, "elapsed_time": "0:00:56", "remaining_time": "0:31:14", "throughput": 25469.75, "total_tokens": 1448384}
|
|
{"current_steps": 465, "total_steps": 15621, "loss": 0.7179, "lr": 5.937300063979526e-07, "epoch": 0.029767620510850776, "percentage": 2.98, "elapsed_time": "0:00:57", "remaining_time": "0:31:14", "throughput": 25476.67, "total_tokens": 1464832}
|
|
{"current_steps": 470, "total_steps": 15621, "loss": 0.6785, "lr": 6.00127959053103e-07, "epoch": 0.030087702451827666, "percentage": 3.01, "elapsed_time": "0:00:58", "remaining_time": "0:31:12", "throughput": 25472.49, "total_tokens": 1479424}
|
|
{"current_steps": 475, "total_steps": 15621, "loss": 0.7292, "lr": 6.065259117082533e-07, "epoch": 0.030407784392804556, "percentage": 3.04, "elapsed_time": "0:00:58", "remaining_time": "0:31:10", "throughput": 25469.78, "total_tokens": 1494336}
|
|
{"current_steps": 480, "total_steps": 15621, "loss": 0.6741, "lr": 6.129238643634037e-07, "epoch": 0.030727866333781446, "percentage": 3.07, "elapsed_time": "0:00:59", "remaining_time": "0:31:09", "throughput": 25468.0, "total_tokens": 1509184}
|
|
{"current_steps": 485, "total_steps": 15621, "loss": 0.8032, "lr": 6.19321817018554e-07, "epoch": 0.031047948274758336, "percentage": 3.1, "elapsed_time": "0:00:59", "remaining_time": "0:31:09", "throughput": 25471.4, "total_tokens": 1525504}
|
|
{"current_steps": 490, "total_steps": 15621, "loss": 0.5911, "lr": 6.257197696737044e-07, "epoch": 0.031368030215735226, "percentage": 3.14, "elapsed_time": "0:01:00", "remaining_time": "0:31:08", "throughput": 25475.97, "total_tokens": 1541504}
|
|
{"current_steps": 495, "total_steps": 15621, "loss": 0.6188, "lr": 6.321177223288548e-07, "epoch": 0.03168811215671212, "percentage": 3.17, "elapsed_time": "0:01:01", "remaining_time": "0:31:07", "throughput": 25478.98, "total_tokens": 1557184}
|
|
{"current_steps": 500, "total_steps": 15621, "loss": 0.7662, "lr": 6.385156749840051e-07, "epoch": 0.032008194097689006, "percentage": 3.2, "elapsed_time": "0:01:01", "remaining_time": "0:31:07", "throughput": 25483.55, "total_tokens": 1573440}
|
|
{"current_steps": 505, "total_steps": 15621, "loss": 0.8712, "lr": 6.449136276391554e-07, "epoch": 0.0323282760386659, "percentage": 3.23, "elapsed_time": "0:01:02", "remaining_time": "0:31:06", "throughput": 25484.69, "total_tokens": 1588736}
|
|
{"current_steps": 510, "total_steps": 15621, "loss": 0.6979, "lr": 6.513115802943058e-07, "epoch": 0.032648357979642786, "percentage": 3.26, "elapsed_time": "0:01:02", "remaining_time": "0:31:05", "throughput": 25486.61, "total_tokens": 1604352}
|
|
{"current_steps": 515, "total_steps": 15621, "loss": 0.6574, "lr": 6.577095329494562e-07, "epoch": 0.03296843992061968, "percentage": 3.3, "elapsed_time": "0:01:03", "remaining_time": "0:31:03", "throughput": 25484.33, "total_tokens": 1618816}
|
|
{"current_steps": 520, "total_steps": 15621, "loss": 0.7462, "lr": 6.641074856046065e-07, "epoch": 0.033288521861596566, "percentage": 3.33, "elapsed_time": "0:01:04", "remaining_time": "0:31:03", "throughput": 25494.18, "total_tokens": 1635648}
|
|
{"current_steps": 525, "total_steps": 15621, "loss": 0.719, "lr": 6.705054382597568e-07, "epoch": 0.03360860380257346, "percentage": 3.36, "elapsed_time": "0:01:04", "remaining_time": "0:31:02", "throughput": 25493.63, "total_tokens": 1651328}
|
|
{"current_steps": 530, "total_steps": 15621, "loss": 0.7345, "lr": 6.769033909149072e-07, "epoch": 0.033928685743550346, "percentage": 3.39, "elapsed_time": "0:01:05", "remaining_time": "0:31:03", "throughput": 25501.39, "total_tokens": 1668928}
|
|
{"current_steps": 535, "total_steps": 15621, "loss": 0.6202, "lr": 6.833013435700575e-07, "epoch": 0.03424876768452724, "percentage": 3.42, "elapsed_time": "0:01:06", "remaining_time": "0:31:03", "throughput": 25505.18, "total_tokens": 1685504}
|
|
{"current_steps": 540, "total_steps": 15621, "loss": 0.7053, "lr": 6.89699296225208e-07, "epoch": 0.034568849625504126, "percentage": 3.46, "elapsed_time": "0:01:06", "remaining_time": "0:31:03", "throughput": 25508.8, "total_tokens": 1701952}
|
|
{"current_steps": 545, "total_steps": 15621, "loss": 0.7308, "lr": 6.960972488803583e-07, "epoch": 0.03488893156648102, "percentage": 3.49, "elapsed_time": "0:01:07", "remaining_time": "0:31:02", "throughput": 25507.95, "total_tokens": 1716992}
|
|
{"current_steps": 550, "total_steps": 15621, "loss": 0.5835, "lr": 7.024952015355085e-07, "epoch": 0.035209013507457906, "percentage": 3.52, "elapsed_time": "0:01:07", "remaining_time": "0:31:01", "throughput": 25504.54, "total_tokens": 1732160}
|
|
{"current_steps": 555, "total_steps": 15621, "loss": 0.6553, "lr": 7.08893154190659e-07, "epoch": 0.0355290954484348, "percentage": 3.55, "elapsed_time": "0:01:08", "remaining_time": "0:31:00", "throughput": 25506.91, "total_tokens": 1748416}
|
|
{"current_steps": 560, "total_steps": 15621, "loss": 0.7096, "lr": 7.152911068458093e-07, "epoch": 0.035849177389411686, "percentage": 3.58, "elapsed_time": "0:01:09", "remaining_time": "0:30:59", "throughput": 25506.73, "total_tokens": 1763776}
|
|
{"current_steps": 565, "total_steps": 15621, "loss": 0.6985, "lr": 7.216890595009597e-07, "epoch": 0.03616925933038858, "percentage": 3.62, "elapsed_time": "0:01:09", "remaining_time": "0:30:59", "throughput": 25511.52, "total_tokens": 1780160}
|
|
{"current_steps": 570, "total_steps": 15621, "loss": 0.6057, "lr": 7.2808701215611e-07, "epoch": 0.036489341271365466, "percentage": 3.65, "elapsed_time": "0:01:10", "remaining_time": "0:30:58", "throughput": 25514.51, "total_tokens": 1795968}
|
|
{"current_steps": 575, "total_steps": 15621, "loss": 0.6327, "lr": 7.344849648112603e-07, "epoch": 0.03680942321234236, "percentage": 3.68, "elapsed_time": "0:01:11", "remaining_time": "0:31:00", "throughput": 25527.74, "total_tokens": 1815424}
|
|
{"current_steps": 580, "total_steps": 15621, "loss": 0.8275, "lr": 7.408829174664107e-07, "epoch": 0.037129505153319246, "percentage": 3.71, "elapsed_time": "0:01:11", "remaining_time": "0:31:00", "throughput": 25530.08, "total_tokens": 1831936}
|
|
{"current_steps": 585, "total_steps": 15621, "loss": 0.6155, "lr": 7.472808701215611e-07, "epoch": 0.03744958709429614, "percentage": 3.74, "elapsed_time": "0:01:12", "remaining_time": "0:31:00", "throughput": 25528.16, "total_tokens": 1847424}
|
|
{"current_steps": 590, "total_steps": 15621, "loss": 0.7381, "lr": 7.536788227767114e-07, "epoch": 0.03776966903527303, "percentage": 3.78, "elapsed_time": "0:01:12", "remaining_time": "0:30:58", "throughput": 25525.86, "total_tokens": 1862400}
|
|
{"current_steps": 595, "total_steps": 15621, "loss": 0.7694, "lr": 7.600767754318617e-07, "epoch": 0.03808975097624992, "percentage": 3.81, "elapsed_time": "0:01:13", "remaining_time": "0:30:57", "throughput": 25524.54, "total_tokens": 1876928}
|
|
{"current_steps": 600, "total_steps": 15621, "loss": 0.6363, "lr": 7.664747280870121e-07, "epoch": 0.03840983291722681, "percentage": 3.84, "elapsed_time": "0:01:14", "remaining_time": "0:30:56", "throughput": 25523.44, "total_tokens": 1892608}
|
|
{"current_steps": 605, "total_steps": 15621, "loss": 0.7292, "lr": 7.728726807421625e-07, "epoch": 0.0387299148582037, "percentage": 3.87, "elapsed_time": "0:01:14", "remaining_time": "0:30:56", "throughput": 25528.38, "total_tokens": 1909696}
|
|
{"current_steps": 610, "total_steps": 15621, "loss": 0.7601, "lr": 7.792706333973129e-07, "epoch": 0.03904999679918059, "percentage": 3.9, "elapsed_time": "0:01:15", "remaining_time": "0:30:55", "throughput": 25527.25, "total_tokens": 1924864}
|
|
{"current_steps": 615, "total_steps": 15621, "loss": 0.5592, "lr": 7.856685860524632e-07, "epoch": 0.03937007874015748, "percentage": 3.94, "elapsed_time": "0:01:15", "remaining_time": "0:30:54", "throughput": 25528.5, "total_tokens": 1939968}
|
|
{"current_steps": 620, "total_steps": 15621, "loss": 0.7152, "lr": 7.920665387076135e-07, "epoch": 0.03969016068113437, "percentage": 3.97, "elapsed_time": "0:01:16", "remaining_time": "0:30:53", "throughput": 25526.84, "total_tokens": 1955136}
|
|
{"current_steps": 625, "total_steps": 15621, "loss": 0.7036, "lr": 7.984644913627639e-07, "epoch": 0.04001024262211126, "percentage": 4.0, "elapsed_time": "0:01:17", "remaining_time": "0:30:52", "throughput": 25526.59, "total_tokens": 1970880}
|
|
{"current_steps": 630, "total_steps": 15621, "loss": 0.5794, "lr": 8.048624440179143e-07, "epoch": 0.04033032456308815, "percentage": 4.03, "elapsed_time": "0:01:17", "remaining_time": "0:30:51", "throughput": 25527.05, "total_tokens": 1986752}
|
|
{"current_steps": 635, "total_steps": 15621, "loss": 0.5994, "lr": 8.112603966730645e-07, "epoch": 0.04065040650406504, "percentage": 4.07, "elapsed_time": "0:01:18", "remaining_time": "0:30:50", "throughput": 25525.7, "total_tokens": 2001856}
|
|
{"current_steps": 640, "total_steps": 15621, "loss": 0.6586, "lr": 8.17658349328215e-07, "epoch": 0.04097048844504193, "percentage": 4.1, "elapsed_time": "0:01:19", "remaining_time": "0:30:51", "throughput": 25534.64, "total_tokens": 2019968}
|
|
{"current_steps": 645, "total_steps": 15621, "loss": 0.7047, "lr": 8.240563019833653e-07, "epoch": 0.04129057038601882, "percentage": 4.13, "elapsed_time": "0:01:19", "remaining_time": "0:30:50", "throughput": 25534.91, "total_tokens": 2035328}
|
|
{"current_steps": 650, "total_steps": 15621, "loss": 0.6282, "lr": 8.304542546385156e-07, "epoch": 0.04161065232699571, "percentage": 4.16, "elapsed_time": "0:01:20", "remaining_time": "0:30:53", "throughput": 25544.33, "total_tokens": 2055168}
|
|
{"current_steps": 655, "total_steps": 15621, "loss": 0.7521, "lr": 8.36852207293666e-07, "epoch": 0.0419307342679726, "percentage": 4.19, "elapsed_time": "0:01:21", "remaining_time": "0:30:52", "throughput": 25547.0, "total_tokens": 2071808}
|
|
{"current_steps": 660, "total_steps": 15621, "loss": 0.6527, "lr": 8.432501599488163e-07, "epoch": 0.04225081620894949, "percentage": 4.23, "elapsed_time": "0:01:21", "remaining_time": "0:30:52", "throughput": 25548.33, "total_tokens": 2087424}
|
|
{"current_steps": 665, "total_steps": 15621, "loss": 0.7682, "lr": 8.496481126039667e-07, "epoch": 0.04257089814992638, "percentage": 4.26, "elapsed_time": "0:01:22", "remaining_time": "0:30:50", "throughput": 25547.76, "total_tokens": 2102592}
|
|
{"current_steps": 670, "total_steps": 15621, "loss": 0.6517, "lr": 8.560460652591171e-07, "epoch": 0.04289098009090327, "percentage": 4.29, "elapsed_time": "0:01:22", "remaining_time": "0:30:50", "throughput": 25552.57, "total_tokens": 2119488}
|
|
{"current_steps": 675, "total_steps": 15621, "loss": 0.6454, "lr": 8.624440179142674e-07, "epoch": 0.04321106203188016, "percentage": 4.32, "elapsed_time": "0:01:23", "remaining_time": "0:30:50", "throughput": 25557.13, "total_tokens": 2136000}
|
|
{"current_steps": 680, "total_steps": 15621, "loss": 0.7404, "lr": 8.688419705694177e-07, "epoch": 0.04353114397285705, "percentage": 4.35, "elapsed_time": "0:01:24", "remaining_time": "0:30:50", "throughput": 25560.91, "total_tokens": 2152448}
|
|
{"current_steps": 685, "total_steps": 15621, "loss": 0.6177, "lr": 8.752399232245681e-07, "epoch": 0.04385122591383394, "percentage": 4.39, "elapsed_time": "0:01:24", "remaining_time": "0:30:49", "throughput": 25562.65, "total_tokens": 2168000}
|
|
{"current_steps": 690, "total_steps": 15621, "loss": 0.5953, "lr": 8.816378758797185e-07, "epoch": 0.04417130785481083, "percentage": 4.42, "elapsed_time": "0:01:25", "remaining_time": "0:30:48", "throughput": 25560.85, "total_tokens": 2183552}
|
|
{"current_steps": 695, "total_steps": 15621, "loss": 0.7135, "lr": 8.880358285348688e-07, "epoch": 0.04449138979578772, "percentage": 4.45, "elapsed_time": "0:01:26", "remaining_time": "0:30:47", "throughput": 25563.86, "total_tokens": 2199488}
|
|
{"current_steps": 700, "total_steps": 15621, "loss": 0.6167, "lr": 8.944337811900191e-07, "epoch": 0.04481147173676461, "percentage": 4.48, "elapsed_time": "0:01:26", "remaining_time": "0:30:47", "throughput": 25564.84, "total_tokens": 2215296}
|
|
{"current_steps": 705, "total_steps": 15621, "loss": 0.7051, "lr": 9.008317338451695e-07, "epoch": 0.0451315536777415, "percentage": 4.51, "elapsed_time": "0:01:27", "remaining_time": "0:30:45", "throughput": 25564.86, "total_tokens": 2230016}
|
|
{"current_steps": 710, "total_steps": 15621, "loss": 0.6629, "lr": 9.072296865003198e-07, "epoch": 0.04545163561871839, "percentage": 4.55, "elapsed_time": "0:01:27", "remaining_time": "0:30:44", "throughput": 25562.61, "total_tokens": 2245056}
|
|
{"current_steps": 715, "total_steps": 15621, "loss": 0.6166, "lr": 9.136276391554703e-07, "epoch": 0.04577171755969528, "percentage": 4.58, "elapsed_time": "0:01:28", "remaining_time": "0:30:44", "throughput": 25564.5, "total_tokens": 2261248}
|
|
{"current_steps": 720, "total_steps": 15621, "loss": 0.6516, "lr": 9.200255918106205e-07, "epoch": 0.04609179950067217, "percentage": 4.61, "elapsed_time": "0:01:29", "remaining_time": "0:30:43", "throughput": 25567.01, "total_tokens": 2278016}
|
|
{"current_steps": 725, "total_steps": 15621, "loss": 0.5696, "lr": 9.264235444657708e-07, "epoch": 0.04641188144164906, "percentage": 4.64, "elapsed_time": "0:01:29", "remaining_time": "0:30:42", "throughput": 25564.24, "total_tokens": 2292800}
|
|
{"current_steps": 730, "total_steps": 15621, "loss": 0.6049, "lr": 9.328214971209213e-07, "epoch": 0.04673196338262595, "percentage": 4.67, "elapsed_time": "0:01:30", "remaining_time": "0:30:41", "throughput": 25564.31, "total_tokens": 2308224}
|
|
{"current_steps": 735, "total_steps": 15621, "loss": 0.7005, "lr": 9.392194497760716e-07, "epoch": 0.04705204532360284, "percentage": 4.71, "elapsed_time": "0:01:30", "remaining_time": "0:30:42", "throughput": 25569.77, "total_tokens": 2325760}
|
|
{"current_steps": 740, "total_steps": 15621, "loss": 0.6971, "lr": 9.456174024312221e-07, "epoch": 0.04737212726457973, "percentage": 4.74, "elapsed_time": "0:01:31", "remaining_time": "0:30:41", "throughput": 25571.82, "total_tokens": 2341632}
|
|
{"current_steps": 745, "total_steps": 15621, "loss": 0.7066, "lr": 9.520153550863723e-07, "epoch": 0.04769220920555662, "percentage": 4.77, "elapsed_time": "0:01:32", "remaining_time": "0:30:40", "throughput": 25573.63, "total_tokens": 2357504}
|
|
{"current_steps": 750, "total_steps": 15621, "loss": 0.7294, "lr": 9.584133077415226e-07, "epoch": 0.04801229114653351, "percentage": 4.8, "elapsed_time": "0:01:32", "remaining_time": "0:30:39", "throughput": 25570.4, "total_tokens": 2372608}
|
|
{"current_steps": 755, "total_steps": 15621, "loss": 0.587, "lr": 9.64811260396673e-07, "epoch": 0.0483323730875104, "percentage": 4.83, "elapsed_time": "0:01:33", "remaining_time": "0:30:38", "throughput": 25572.36, "total_tokens": 2388352}
|
|
{"current_steps": 760, "total_steps": 15621, "loss": 0.6934, "lr": 9.712092130518234e-07, "epoch": 0.04865245502848729, "percentage": 4.87, "elapsed_time": "0:01:34", "remaining_time": "0:30:38", "throughput": 25573.11, "total_tokens": 2404480}
|
|
{"current_steps": 765, "total_steps": 15621, "loss": 0.518, "lr": 9.776071657069737e-07, "epoch": 0.04897253696946418, "percentage": 4.9, "elapsed_time": "0:01:34", "remaining_time": "0:30:37", "throughput": 25572.1, "total_tokens": 2419648}
|
|
{"current_steps": 770, "total_steps": 15621, "loss": 0.7121, "lr": 9.840051183621241e-07, "epoch": 0.04929261891044107, "percentage": 4.93, "elapsed_time": "0:01:35", "remaining_time": "0:30:37", "throughput": 25571.5, "total_tokens": 2435584}
|
|
{"current_steps": 775, "total_steps": 15621, "loss": 0.6265, "lr": 9.904030710172743e-07, "epoch": 0.04961270085141796, "percentage": 4.96, "elapsed_time": "0:01:35", "remaining_time": "0:30:36", "throughput": 25571.4, "total_tokens": 2451072}
|
|
{"current_steps": 780, "total_steps": 15621, "loss": 0.764, "lr": 9.968010236724249e-07, "epoch": 0.04993278279239485, "percentage": 4.99, "elapsed_time": "0:01:36", "remaining_time": "0:30:36", "throughput": 25575.18, "total_tokens": 2467968}
|
|
{"current_steps": 782, "total_steps": 15621, "eval_loss": 0.6362079381942749, "epoch": 0.05006081556878561, "percentage": 5.01, "elapsed_time": "0:02:25", "remaining_time": "0:46:08", "throughput": 16960.16, "total_tokens": 2474432}
|
|
{"current_steps": 785, "total_steps": 15621, "loss": 0.669, "lr": 1.0031989763275752e-06, "epoch": 0.05025286473337175, "percentage": 5.03, "elapsed_time": "0:02:59", "remaining_time": "0:56:40", "throughput": 13812.8, "total_tokens": 2484928}
|
|
{"current_steps": 790, "total_steps": 15621, "loss": 0.6777, "lr": 1.0095969289827256e-06, "epoch": 0.05057294667434863, "percentage": 5.06, "elapsed_time": "0:03:00", "remaining_time": "0:56:29", "throughput": 13856.13, "total_tokens": 2501504}
|
|
{"current_steps": 795, "total_steps": 15621, "loss": 0.5188, "lr": 1.0159948816378758e-06, "epoch": 0.050893028615325527, "percentage": 5.09, "elapsed_time": "0:03:01", "remaining_time": "0:56:18", "throughput": 13902.01, "total_tokens": 2518848}
|
|
{"current_steps": 800, "total_steps": 15621, "loss": 0.5482, "lr": 1.0223928342930262e-06, "epoch": 0.05121311055630241, "percentage": 5.12, "elapsed_time": "0:03:01", "remaining_time": "0:56:08", "throughput": 13945.58, "total_tokens": 2535680}
|
|
{"current_steps": 805, "total_steps": 15621, "loss": 0.676, "lr": 1.0287907869481766e-06, "epoch": 0.051533192497279307, "percentage": 5.15, "elapsed_time": "0:03:02", "remaining_time": "0:55:57", "throughput": 13983.26, "total_tokens": 2550976}
|
|
{"current_steps": 810, "total_steps": 15621, "loss": 0.5562, "lr": 1.035188739603327e-06, "epoch": 0.05185327443825619, "percentage": 5.19, "elapsed_time": "0:03:03", "remaining_time": "0:55:46", "throughput": 14022.65, "total_tokens": 2566656}
|
|
{"current_steps": 815, "total_steps": 15621, "loss": 0.6315, "lr": 1.0415866922584773e-06, "epoch": 0.052173356379233086, "percentage": 5.22, "elapsed_time": "0:03:03", "remaining_time": "0:55:35", "throughput": 14058.79, "total_tokens": 2581568}
|
|
{"current_steps": 820, "total_steps": 15621, "loss": 0.6426, "lr": 1.0479846449136277e-06, "epoch": 0.05249343832020997, "percentage": 5.25, "elapsed_time": "0:03:04", "remaining_time": "0:55:25", "throughput": 14095.35, "total_tokens": 2596608}
|
|
{"current_steps": 825, "total_steps": 15621, "loss": 0.6719, "lr": 1.0543825975687779e-06, "epoch": 0.052813520261186866, "percentage": 5.28, "elapsed_time": "0:03:04", "remaining_time": "0:55:14", "throughput": 14132.64, "total_tokens": 2612032}
|
|
{"current_steps": 830, "total_steps": 15621, "loss": 0.7313, "lr": 1.0607805502239282e-06, "epoch": 0.05313360220216375, "percentage": 5.31, "elapsed_time": "0:03:05", "remaining_time": "0:55:04", "throughput": 14169.83, "total_tokens": 2627264}
|
|
{"current_steps": 835, "total_steps": 15621, "loss": 0.548, "lr": 1.0671785028790788e-06, "epoch": 0.053453684143140646, "percentage": 5.35, "elapsed_time": "0:03:06", "remaining_time": "0:54:54", "throughput": 14208.81, "total_tokens": 2643264}
|
|
{"current_steps": 840, "total_steps": 15621, "loss": 0.5474, "lr": 1.073576455534229e-06, "epoch": 0.05377376608411753, "percentage": 5.38, "elapsed_time": "0:03:06", "remaining_time": "0:54:44", "throughput": 14247.58, "total_tokens": 2659264}
|
|
{"current_steps": 845, "total_steps": 15621, "loss": 0.5737, "lr": 1.0799744081893794e-06, "epoch": 0.054093848025094426, "percentage": 5.41, "elapsed_time": "0:03:07", "remaining_time": "0:54:33", "throughput": 14281.71, "total_tokens": 2673856}
|
|
{"current_steps": 850, "total_steps": 15621, "loss": 0.4779, "lr": 1.0863723608445297e-06, "epoch": 0.05441392996607131, "percentage": 5.44, "elapsed_time": "0:03:07", "remaining_time": "0:54:23", "throughput": 14315.13, "total_tokens": 2688448}
|
|
{"current_steps": 855, "total_steps": 15621, "loss": 0.6201, "lr": 1.09277031349968e-06, "epoch": 0.054734011907048206, "percentage": 5.47, "elapsed_time": "0:03:08", "remaining_time": "0:54:13", "throughput": 14351.12, "total_tokens": 2703872}
|
|
{"current_steps": 860, "total_steps": 15621, "loss": 0.6104, "lr": 1.0991682661548305e-06, "epoch": 0.05505409384802509, "percentage": 5.51, "elapsed_time": "0:03:09", "remaining_time": "0:54:04", "throughput": 14385.77, "total_tokens": 2719040}
|
|
{"current_steps": 865, "total_steps": 15621, "loss": 0.6205, "lr": 1.1055662188099809e-06, "epoch": 0.055374175789001986, "percentage": 5.54, "elapsed_time": "0:03:09", "remaining_time": "0:53:54", "throughput": 14423.91, "total_tokens": 2735168}
|
|
{"current_steps": 870, "total_steps": 15621, "loss": 0.5224, "lr": 1.111964171465131e-06, "epoch": 0.05569425772997887, "percentage": 5.57, "elapsed_time": "0:03:10", "remaining_time": "0:53:45", "throughput": 14459.56, "total_tokens": 2750592}
|
|
{"current_steps": 875, "total_steps": 15621, "loss": 0.6572, "lr": 1.1183621241202814e-06, "epoch": 0.056014339670955766, "percentage": 5.6, "elapsed_time": "0:03:10", "remaining_time": "0:53:36", "throughput": 14498.34, "total_tokens": 2767232}
|
|
{"current_steps": 880, "total_steps": 15621, "loss": 0.665, "lr": 1.1247600767754318e-06, "epoch": 0.05633442161193265, "percentage": 5.63, "elapsed_time": "0:03:11", "remaining_time": "0:53:28", "throughput": 14539.57, "total_tokens": 2784768}
|
|
{"current_steps": 885, "total_steps": 15621, "loss": 0.5809, "lr": 1.1311580294305822e-06, "epoch": 0.056654503552909546, "percentage": 5.67, "elapsed_time": "0:03:12", "remaining_time": "0:53:19", "throughput": 14573.29, "total_tokens": 2799872}
|
|
{"current_steps": 890, "total_steps": 15621, "loss": 0.6481, "lr": 1.1375559820857326e-06, "epoch": 0.05697458549388643, "percentage": 5.7, "elapsed_time": "0:03:12", "remaining_time": "0:53:10", "throughput": 14609.65, "total_tokens": 2816000}
|
|
{"current_steps": 895, "total_steps": 15621, "loss": 0.5859, "lr": 1.143953934740883e-06, "epoch": 0.057294667434863326, "percentage": 5.73, "elapsed_time": "0:03:13", "remaining_time": "0:53:01", "throughput": 14644.53, "total_tokens": 2831744}
|
|
{"current_steps": 900, "total_steps": 15621, "loss": 0.6183, "lr": 1.150351887396033e-06, "epoch": 0.05761474937584021, "percentage": 5.76, "elapsed_time": "0:03:13", "remaining_time": "0:52:52", "throughput": 14679.09, "total_tokens": 2847424}
|
|
{"current_steps": 905, "total_steps": 15621, "loss": 0.616, "lr": 1.1567498400511835e-06, "epoch": 0.057934831316817106, "percentage": 5.79, "elapsed_time": "0:03:14", "remaining_time": "0:52:43", "throughput": 14711.76, "total_tokens": 2862272}
|
|
{"current_steps": 910, "total_steps": 15621, "loss": 0.4927, "lr": 1.163147792706334e-06, "epoch": 0.05825491325779399, "percentage": 5.83, "elapsed_time": "0:03:15", "remaining_time": "0:52:34", "throughput": 14743.56, "total_tokens": 2877120}
|
|
{"current_steps": 915, "total_steps": 15621, "loss": 0.5249, "lr": 1.1695457453614842e-06, "epoch": 0.058574995198770886, "percentage": 5.86, "elapsed_time": "0:03:15", "remaining_time": "0:52:26", "throughput": 14783.18, "total_tokens": 2894592}
|
|
{"current_steps": 920, "total_steps": 15621, "loss": 0.6159, "lr": 1.1759436980166346e-06, "epoch": 0.05889507713974777, "percentage": 5.89, "elapsed_time": "0:03:16", "remaining_time": "0:52:18", "throughput": 14815.93, "total_tokens": 2909888}
|
|
{"current_steps": 925, "total_steps": 15621, "loss": 0.6195, "lr": 1.182341650671785e-06, "epoch": 0.059215159080724666, "percentage": 5.92, "elapsed_time": "0:03:17", "remaining_time": "0:52:10", "throughput": 14849.12, "total_tokens": 2925632}
|
|
{"current_steps": 930, "total_steps": 15621, "loss": 0.6153, "lr": 1.1887396033269352e-06, "epoch": 0.05953524102170155, "percentage": 5.95, "elapsed_time": "0:03:17", "remaining_time": "0:52:02", "throughput": 14883.96, "total_tokens": 2941760}
|
|
{"current_steps": 935, "total_steps": 15621, "loss": 0.7076, "lr": 1.1951375559820858e-06, "epoch": 0.059855322962678446, "percentage": 5.99, "elapsed_time": "0:03:18", "remaining_time": "0:51:53", "throughput": 14917.7, "total_tokens": 2957376}
|
|
{"current_steps": 940, "total_steps": 15621, "loss": 0.5704, "lr": 1.2015355086372361e-06, "epoch": 0.06017540490365533, "percentage": 6.02, "elapsed_time": "0:03:18", "remaining_time": "0:51:45", "throughput": 14949.93, "total_tokens": 2972800}
|
|
{"current_steps": 945, "total_steps": 15621, "loss": 0.7172, "lr": 1.2079334612923863e-06, "epoch": 0.060495486844632226, "percentage": 6.05, "elapsed_time": "0:03:19", "remaining_time": "0:51:37", "throughput": 14982.99, "total_tokens": 2988480}
|
|
{"current_steps": 950, "total_steps": 15621, "loss": 0.6613, "lr": 1.2143314139475367e-06, "epoch": 0.06081556878560911, "percentage": 6.08, "elapsed_time": "0:03:20", "remaining_time": "0:51:29", "throughput": 15016.44, "total_tokens": 3004480}
|
|
{"current_steps": 955, "total_steps": 15621, "loss": 0.444, "lr": 1.220729366602687e-06, "epoch": 0.061135650726586006, "percentage": 6.11, "elapsed_time": "0:03:20", "remaining_time": "0:51:21", "throughput": 15049.57, "total_tokens": 3020288}
|
|
{"current_steps": 960, "total_steps": 15621, "loss": 0.6011, "lr": 1.2271273192578374e-06, "epoch": 0.06145573266756289, "percentage": 6.15, "elapsed_time": "0:03:21", "remaining_time": "0:51:14", "throughput": 15081.19, "total_tokens": 3035968}
|
|
{"current_steps": 965, "total_steps": 15621, "loss": 0.7411, "lr": 1.2335252719129878e-06, "epoch": 0.061775814608539786, "percentage": 6.18, "elapsed_time": "0:03:21", "remaining_time": "0:51:06", "throughput": 15113.62, "total_tokens": 3051776}
|
|
{"current_steps": 970, "total_steps": 15621, "loss": 0.5575, "lr": 1.2399232245681382e-06, "epoch": 0.06209589654951667, "percentage": 6.21, "elapsed_time": "0:03:22", "remaining_time": "0:50:58", "throughput": 15142.94, "total_tokens": 3066560}
|
|
{"current_steps": 975, "total_steps": 15621, "loss": 0.6357, "lr": 1.2463211772232884e-06, "epoch": 0.062415978490493566, "percentage": 6.24, "elapsed_time": "0:03:23", "remaining_time": "0:50:51", "throughput": 15175.87, "total_tokens": 3082496}
|
|
{"current_steps": 980, "total_steps": 15621, "loss": 0.6233, "lr": 1.2527191298784387e-06, "epoch": 0.06273606043147045, "percentage": 6.27, "elapsed_time": "0:03:23", "remaining_time": "0:50:43", "throughput": 15206.37, "total_tokens": 3097856}
|
|
{"current_steps": 985, "total_steps": 15621, "loss": 0.5062, "lr": 1.2591170825335893e-06, "epoch": 0.06305614237244735, "percentage": 6.31, "elapsed_time": "0:03:24", "remaining_time": "0:50:36", "throughput": 15237.9, "total_tokens": 3113664}
|
|
{"current_steps": 990, "total_steps": 15621, "loss": 0.6242, "lr": 1.2655150351887395e-06, "epoch": 0.06337622431342424, "percentage": 6.34, "elapsed_time": "0:03:24", "remaining_time": "0:50:29", "throughput": 15270.48, "total_tokens": 3129792}
|
|
{"current_steps": 995, "total_steps": 15621, "loss": 0.5901, "lr": 1.2719129878438899e-06, "epoch": 0.06369630625440113, "percentage": 6.37, "elapsed_time": "0:03:25", "remaining_time": "0:50:21", "throughput": 15300.5, "total_tokens": 3145024}
|
|
{"current_steps": 1000, "total_steps": 15621, "loss": 0.7747, "lr": 1.2783109404990402e-06, "epoch": 0.06401638819537801, "percentage": 6.4, "elapsed_time": "0:03:26", "remaining_time": "0:50:14", "throughput": 15332.27, "total_tokens": 3161216}
|
|
{"current_steps": 1005, "total_steps": 15621, "loss": 0.4118, "lr": 1.2847088931541904e-06, "epoch": 0.0643364701363549, "percentage": 6.43, "elapsed_time": "0:03:26", "remaining_time": "0:50:07", "throughput": 15363.17, "total_tokens": 3176960}
|
|
{"current_steps": 1010, "total_steps": 15621, "loss": 0.607, "lr": 1.291106845809341e-06, "epoch": 0.0646565520773318, "percentage": 6.47, "elapsed_time": "0:03:27", "remaining_time": "0:50:00", "throughput": 15395.02, "total_tokens": 3193088}
|
|
{"current_steps": 1015, "total_steps": 15621, "loss": 0.6808, "lr": 1.2975047984644914e-06, "epoch": 0.0649766340183087, "percentage": 6.5, "elapsed_time": "0:03:28", "remaining_time": "0:49:54", "throughput": 15428.57, "total_tokens": 3210112}
|
|
{"current_steps": 1020, "total_steps": 15621, "loss": 0.5044, "lr": 1.3039027511196418e-06, "epoch": 0.06529671595928557, "percentage": 6.53, "elapsed_time": "0:03:28", "remaining_time": "0:49:46", "throughput": 15456.25, "total_tokens": 3224768}
|
|
{"current_steps": 1025, "total_steps": 15621, "loss": 0.6235, "lr": 1.310300703774792e-06, "epoch": 0.06561679790026247, "percentage": 6.56, "elapsed_time": "0:03:29", "remaining_time": "0:49:39", "throughput": 15485.4, "total_tokens": 3240128}
|
|
{"current_steps": 1030, "total_steps": 15621, "loss": 0.5605, "lr": 1.3166986564299423e-06, "epoch": 0.06593687984123936, "percentage": 6.59, "elapsed_time": "0:03:29", "remaining_time": "0:49:32", "throughput": 15517.48, "total_tokens": 3256576}
|
|
{"current_steps": 1035, "total_steps": 15621, "loss": 0.5942, "lr": 1.3230966090850929e-06, "epoch": 0.06625696178221625, "percentage": 6.63, "elapsed_time": "0:03:30", "remaining_time": "0:49:26", "throughput": 15547.03, "total_tokens": 3272384}
|
|
{"current_steps": 1040, "total_steps": 15621, "loss": 0.4108, "lr": 1.329494561740243e-06, "epoch": 0.06657704372319313, "percentage": 6.66, "elapsed_time": "0:03:31", "remaining_time": "0:49:19", "throughput": 15577.4, "total_tokens": 3288512}
|
|
{"current_steps": 1045, "total_steps": 15621, "loss": 0.4897, "lr": 1.3358925143953934e-06, "epoch": 0.06689712566417003, "percentage": 6.69, "elapsed_time": "0:03:31", "remaining_time": "0:49:13", "throughput": 15612.09, "total_tokens": 3306304}
|
|
{"current_steps": 1050, "total_steps": 15621, "loss": 0.4785, "lr": 1.3422904670505438e-06, "epoch": 0.06721720760514692, "percentage": 6.72, "elapsed_time": "0:03:32", "remaining_time": "0:49:07", "throughput": 15639.31, "total_tokens": 3321344}
|
|
{"current_steps": 1055, "total_steps": 15621, "loss": 0.6127, "lr": 1.348688419705694e-06, "epoch": 0.06753728954612381, "percentage": 6.75, "elapsed_time": "0:03:33", "remaining_time": "0:49:01", "throughput": 15671.92, "total_tokens": 3338560}
|
|
{"current_steps": 1060, "total_steps": 15621, "loss": 0.5135, "lr": 1.3550863723608446e-06, "epoch": 0.06785737148710069, "percentage": 6.79, "elapsed_time": "0:03:33", "remaining_time": "0:48:54", "throughput": 15697.36, "total_tokens": 3353152}
|
|
{"current_steps": 1065, "total_steps": 15621, "loss": 0.5401, "lr": 1.361484325015995e-06, "epoch": 0.06817745342807759, "percentage": 6.82, "elapsed_time": "0:03:34", "remaining_time": "0:48:48", "throughput": 15727.25, "total_tokens": 3369536}
|
|
{"current_steps": 1070, "total_steps": 15621, "loss": 0.6023, "lr": 1.3678822776711451e-06, "epoch": 0.06849753536905448, "percentage": 6.85, "elapsed_time": "0:03:34", "remaining_time": "0:48:41", "throughput": 15754.24, "total_tokens": 3384832}
|
|
{"current_steps": 1075, "total_steps": 15621, "loss": 0.4881, "lr": 1.3742802303262955e-06, "epoch": 0.06881761731003137, "percentage": 6.88, "elapsed_time": "0:03:35", "remaining_time": "0:48:35", "throughput": 15779.5, "total_tokens": 3399424}
|
|
{"current_steps": 1080, "total_steps": 15621, "loss": 0.6565, "lr": 1.3806781829814459e-06, "epoch": 0.06913769925100825, "percentage": 6.91, "elapsed_time": "0:03:36", "remaining_time": "0:48:29", "throughput": 15811.29, "total_tokens": 3416704}
|
|
{"current_steps": 1085, "total_steps": 15621, "loss": 0.5553, "lr": 1.3870761356365963e-06, "epoch": 0.06945778119198515, "percentage": 6.95, "elapsed_time": "0:03:36", "remaining_time": "0:48:22", "throughput": 15837.48, "total_tokens": 3431552}
|
|
{"current_steps": 1090, "total_steps": 15621, "loss": 0.6472, "lr": 1.3934740882917466e-06, "epoch": 0.06977786313296204, "percentage": 6.98, "elapsed_time": "0:03:37", "remaining_time": "0:48:16", "throughput": 15865.44, "total_tokens": 3447488}
|
|
{"current_steps": 1095, "total_steps": 15621, "loss": 0.5137, "lr": 1.399872040946897e-06, "epoch": 0.07009794507393893, "percentage": 7.01, "elapsed_time": "0:03:37", "remaining_time": "0:48:10", "throughput": 15893.59, "total_tokens": 3463424}
|
|
{"current_steps": 1100, "total_steps": 15621, "loss": 0.6527, "lr": 1.4062699936020472e-06, "epoch": 0.07041802701491581, "percentage": 7.04, "elapsed_time": "0:03:38", "remaining_time": "0:48:04", "throughput": 15922.07, "total_tokens": 3479680}
|
|
{"current_steps": 1105, "total_steps": 15621, "loss": 0.5117, "lr": 1.4126679462571976e-06, "epoch": 0.0707381089558927, "percentage": 7.07, "elapsed_time": "0:03:39", "remaining_time": "0:47:58", "throughput": 15950.11, "total_tokens": 3495552}
|
|
{"current_steps": 1110, "total_steps": 15621, "loss": 0.4748, "lr": 1.4190658989123481e-06, "epoch": 0.0710581908968696, "percentage": 7.11, "elapsed_time": "0:03:39", "remaining_time": "0:47:52", "throughput": 15976.14, "total_tokens": 3510976}
|
|
{"current_steps": 1115, "total_steps": 15621, "loss": 0.6499, "lr": 1.4254638515674983e-06, "epoch": 0.0713782728378465, "percentage": 7.14, "elapsed_time": "0:03:40", "remaining_time": "0:47:46", "throughput": 16001.01, "total_tokens": 3526016}
|
|
{"current_steps": 1120, "total_steps": 15621, "loss": 0.5645, "lr": 1.4318618042226487e-06, "epoch": 0.07169835477882337, "percentage": 7.17, "elapsed_time": "0:03:40", "remaining_time": "0:47:40", "throughput": 16025.08, "total_tokens": 3540544}
|
|
{"current_steps": 1125, "total_steps": 15621, "loss": 0.6069, "lr": 1.438259756877799e-06, "epoch": 0.07201843671980027, "percentage": 7.2, "elapsed_time": "0:03:41", "remaining_time": "0:47:34", "throughput": 16051.95, "total_tokens": 3556416}
|
|
{"current_steps": 1130, "total_steps": 15621, "loss": 0.5077, "lr": 1.4446577095329492e-06, "epoch": 0.07233851866077716, "percentage": 7.23, "elapsed_time": "0:03:42", "remaining_time": "0:47:29", "throughput": 16078.66, "total_tokens": 3572096}
|
|
{"current_steps": 1135, "total_steps": 15621, "loss": 0.4993, "lr": 1.4510556621880998e-06, "epoch": 0.07265860060175405, "percentage": 7.27, "elapsed_time": "0:03:42", "remaining_time": "0:47:23", "throughput": 16104.42, "total_tokens": 3587712}
|
|
{"current_steps": 1140, "total_steps": 15621, "loss": 0.5417, "lr": 1.4574536148432502e-06, "epoch": 0.07297868254273093, "percentage": 7.3, "elapsed_time": "0:03:43", "remaining_time": "0:47:18", "throughput": 16134.67, "total_tokens": 3605056}
|
|
{"current_steps": 1145, "total_steps": 15621, "loss": 0.6805, "lr": 1.4638515674984004e-06, "epoch": 0.07329876448370783, "percentage": 7.33, "elapsed_time": "0:03:44", "remaining_time": "0:47:12", "throughput": 16161.75, "total_tokens": 3621184}
|
|
{"current_steps": 1150, "total_steps": 15621, "loss": 0.5834, "lr": 1.4702495201535507e-06, "epoch": 0.07361884642468472, "percentage": 7.36, "elapsed_time": "0:03:44", "remaining_time": "0:47:06", "throughput": 16183.71, "total_tokens": 3635392}
|
|
{"current_steps": 1155, "total_steps": 15621, "loss": 0.5049, "lr": 1.4766474728087011e-06, "epoch": 0.07393892836566161, "percentage": 7.39, "elapsed_time": "0:03:45", "remaining_time": "0:47:00", "throughput": 16206.96, "total_tokens": 3649984}
|
|
{"current_steps": 1160, "total_steps": 15621, "loss": 0.5276, "lr": 1.4830454254638515e-06, "epoch": 0.07425901030663849, "percentage": 7.43, "elapsed_time": "0:03:45", "remaining_time": "0:46:55", "throughput": 16233.21, "total_tokens": 3665920}
|
|
{"current_steps": 1165, "total_steps": 15621, "loss": 0.4587, "lr": 1.4894433781190019e-06, "epoch": 0.07457909224761539, "percentage": 7.46, "elapsed_time": "0:03:46", "remaining_time": "0:46:49", "throughput": 16255.21, "total_tokens": 3680256}
|
|
{"current_steps": 1170, "total_steps": 15621, "loss": 0.5255, "lr": 1.4958413307741523e-06, "epoch": 0.07489917418859228, "percentage": 7.49, "elapsed_time": "0:03:47", "remaining_time": "0:46:44", "throughput": 16284.04, "total_tokens": 3697536}
|
|
{"current_steps": 1175, "total_steps": 15621, "loss": 0.6111, "lr": 1.5022392834293024e-06, "epoch": 0.07521925612956917, "percentage": 7.52, "elapsed_time": "0:03:47", "remaining_time": "0:46:39", "throughput": 16307.83, "total_tokens": 3713088}
|
|
{"current_steps": 1180, "total_steps": 15621, "loss": 0.6712, "lr": 1.5086372360844528e-06, "epoch": 0.07553933807054607, "percentage": 7.55, "elapsed_time": "0:03:48", "remaining_time": "0:46:34", "throughput": 16335.65, "total_tokens": 3729920}
|
|
{"current_steps": 1185, "total_steps": 15621, "loss": 0.5489, "lr": 1.5150351887396034e-06, "epoch": 0.07585942001152295, "percentage": 7.59, "elapsed_time": "0:03:48", "remaining_time": "0:46:29", "throughput": 16360.75, "total_tokens": 3745664}
|
|
{"current_steps": 1190, "total_steps": 15621, "loss": 0.5258, "lr": 1.5214331413947536e-06, "epoch": 0.07617950195249984, "percentage": 7.62, "elapsed_time": "0:03:49", "remaining_time": "0:46:23", "throughput": 16383.57, "total_tokens": 3760576}
|
|
{"current_steps": 1195, "total_steps": 15621, "loss": 0.5085, "lr": 1.527831094049904e-06, "epoch": 0.07649958389347673, "percentage": 7.65, "elapsed_time": "0:03:50", "remaining_time": "0:46:18", "throughput": 16408.2, "total_tokens": 3776576}
|
|
{"current_steps": 1200, "total_steps": 15621, "loss": 0.5857, "lr": 1.5342290467050543e-06, "epoch": 0.07681966583445363, "percentage": 7.68, "elapsed_time": "0:03:50", "remaining_time": "0:46:13", "throughput": 16432.55, "total_tokens": 3792384}
|
|
{"current_steps": 1205, "total_steps": 15621, "loss": 0.6438, "lr": 1.5406269993602045e-06, "epoch": 0.0771397477754305, "percentage": 7.71, "elapsed_time": "0:03:51", "remaining_time": "0:46:07", "throughput": 16452.89, "total_tokens": 3806592}
|
|
{"current_steps": 1210, "total_steps": 15621, "loss": 0.5775, "lr": 1.547024952015355e-06, "epoch": 0.0774598297164074, "percentage": 7.75, "elapsed_time": "0:03:51", "remaining_time": "0:46:02", "throughput": 16476.79, "total_tokens": 3822080}
|
|
{"current_steps": 1215, "total_steps": 15621, "loss": 0.5269, "lr": 1.5534229046705055e-06, "epoch": 0.07777991165738429, "percentage": 7.78, "elapsed_time": "0:03:52", "remaining_time": "0:45:57", "throughput": 16496.07, "total_tokens": 3837120}
|
|
{"current_steps": 1220, "total_steps": 15621, "loss": 0.6994, "lr": 1.5598208573256556e-06, "epoch": 0.07809999359836119, "percentage": 7.81, "elapsed_time": "0:03:53", "remaining_time": "0:45:52", "throughput": 16520.24, "total_tokens": 3852864}
|
|
{"current_steps": 1225, "total_steps": 15621, "loss": 0.515, "lr": 1.566218809980806e-06, "epoch": 0.07842007553933807, "percentage": 7.84, "elapsed_time": "0:03:53", "remaining_time": "0:45:48", "throughput": 16545.72, "total_tokens": 3869184}
|
|
{"current_steps": 1230, "total_steps": 15621, "loss": 0.5388, "lr": 1.5726167626359564e-06, "epoch": 0.07874015748031496, "percentage": 7.87, "elapsed_time": "0:03:54", "remaining_time": "0:45:43", "throughput": 16570.47, "total_tokens": 3885248}
|
|
{"current_steps": 1235, "total_steps": 15621, "loss": 0.4306, "lr": 1.5790147152911068e-06, "epoch": 0.07906023942129185, "percentage": 7.91, "elapsed_time": "0:03:55", "remaining_time": "0:45:38", "throughput": 16593.06, "total_tokens": 3900416}
|
|
{"current_steps": 1240, "total_steps": 15621, "loss": 0.5503, "lr": 1.5854126679462571e-06, "epoch": 0.07938032136226875, "percentage": 7.94, "elapsed_time": "0:03:55", "remaining_time": "0:45:33", "throughput": 16616.15, "total_tokens": 3916096}
|
|
{"current_steps": 1245, "total_steps": 15621, "loss": 0.6993, "lr": 1.5918106206014075e-06, "epoch": 0.07970040330324563, "percentage": 7.97, "elapsed_time": "0:03:56", "remaining_time": "0:45:28", "throughput": 16642.98, "total_tokens": 3933312}
|
|
{"current_steps": 1250, "total_steps": 15621, "loss": 0.6197, "lr": 1.5982085732565577e-06, "epoch": 0.08002048524422252, "percentage": 8.0, "elapsed_time": "0:03:56", "remaining_time": "0:45:24", "throughput": 16667.27, "total_tokens": 3949440}
|
|
{"current_steps": 1255, "total_steps": 15621, "loss": 0.6799, "lr": 1.604606525911708e-06, "epoch": 0.08034056718519941, "percentage": 8.03, "elapsed_time": "0:03:57", "remaining_time": "0:45:19", "throughput": 16690.38, "total_tokens": 3964992}
|
|
{"current_steps": 1260, "total_steps": 15621, "loss": 0.7324, "lr": 1.6110044785668586e-06, "epoch": 0.0806606491261763, "percentage": 8.07, "elapsed_time": "0:03:58", "remaining_time": "0:45:15", "throughput": 16711.69, "total_tokens": 3981696}
|
|
{"current_steps": 1265, "total_steps": 15621, "loss": 0.6136, "lr": 1.617402431222009e-06, "epoch": 0.08098073106715319, "percentage": 8.1, "elapsed_time": "0:03:58", "remaining_time": "0:45:10", "throughput": 16734.6, "total_tokens": 3997248}
|
|
{"current_steps": 1270, "total_steps": 15621, "loss": 0.6689, "lr": 1.6238003838771592e-06, "epoch": 0.08130081300813008, "percentage": 8.13, "elapsed_time": "0:03:59", "remaining_time": "0:45:05", "throughput": 16754.57, "total_tokens": 4011648}
|
|
{"current_steps": 1275, "total_steps": 15621, "loss": 0.5254, "lr": 1.6301983365323096e-06, "epoch": 0.08162089494910697, "percentage": 8.16, "elapsed_time": "0:04:00", "remaining_time": "0:45:01", "throughput": 16778.8, "total_tokens": 4028160}
|
|
{"current_steps": 1280, "total_steps": 15621, "loss": 0.5398, "lr": 1.63659628918746e-06, "epoch": 0.08194097689008387, "percentage": 8.19, "elapsed_time": "0:04:00", "remaining_time": "0:44:56", "throughput": 16800.68, "total_tokens": 4043584}
|
|
{"current_steps": 1285, "total_steps": 15621, "loss": 0.7076, "lr": 1.6429942418426103e-06, "epoch": 0.08226105883106075, "percentage": 8.23, "elapsed_time": "0:04:01", "remaining_time": "0:44:52", "throughput": 16823.34, "total_tokens": 4059456}
|
|
{"current_steps": 1290, "total_steps": 15621, "loss": 0.6103, "lr": 1.6493921944977607e-06, "epoch": 0.08258114077203764, "percentage": 8.26, "elapsed_time": "0:04:01", "remaining_time": "0:44:47", "throughput": 16847.93, "total_tokens": 4076096}
|
|
{"current_steps": 1295, "total_steps": 15621, "loss": 0.6111, "lr": 1.655790147152911e-06, "epoch": 0.08290122271301453, "percentage": 8.29, "elapsed_time": "0:04:02", "remaining_time": "0:44:43", "throughput": 16874.19, "total_tokens": 4093568}
|
|
{"current_steps": 1300, "total_steps": 15621, "loss": 0.6676, "lr": 1.6621880998080612e-06, "epoch": 0.08322130465399143, "percentage": 8.32, "elapsed_time": "0:04:03", "remaining_time": "0:44:38", "throughput": 16895.9, "total_tokens": 4108864}
|
|
{"current_steps": 1305, "total_steps": 15621, "loss": 0.6425, "lr": 1.6685860524632116e-06, "epoch": 0.0835413865949683, "percentage": 8.35, "elapsed_time": "0:04:03", "remaining_time": "0:44:34", "throughput": 16917.1, "total_tokens": 4124224}
|
|
{"current_steps": 1310, "total_steps": 15621, "loss": 0.5516, "lr": 1.6749840051183622e-06, "epoch": 0.0838614685359452, "percentage": 8.39, "elapsed_time": "0:04:04", "remaining_time": "0:44:29", "throughput": 16937.39, "total_tokens": 4139008}
|
|
{"current_steps": 1315, "total_steps": 15621, "loss": 0.5551, "lr": 1.6813819577735124e-06, "epoch": 0.08418155047692209, "percentage": 8.42, "elapsed_time": "0:04:04", "remaining_time": "0:44:25", "throughput": 16959.91, "total_tokens": 4155008}
|
|
{"current_steps": 1320, "total_steps": 15621, "loss": 0.4792, "lr": 1.6877799104286628e-06, "epoch": 0.08450163241789899, "percentage": 8.45, "elapsed_time": "0:04:05", "remaining_time": "0:44:21", "throughput": 16985.7, "total_tokens": 4172544}
|
|
{"current_steps": 1325, "total_steps": 15621, "loss": 0.6306, "lr": 1.6941778630838131e-06, "epoch": 0.08482171435887587, "percentage": 8.48, "elapsed_time": "0:04:06", "remaining_time": "0:44:17", "throughput": 17007.8, "total_tokens": 4188416}
|
|
{"current_steps": 1330, "total_steps": 15621, "loss": 0.5031, "lr": 1.7005758157389633e-06, "epoch": 0.08514179629985276, "percentage": 8.51, "elapsed_time": "0:04:06", "remaining_time": "0:44:12", "throughput": 17026.04, "total_tokens": 4202560}
|
|
{"current_steps": 1335, "total_steps": 15621, "loss": 0.5574, "lr": 1.706973768394114e-06, "epoch": 0.08546187824082965, "percentage": 8.55, "elapsed_time": "0:04:07", "remaining_time": "0:44:08", "throughput": 17049.47, "total_tokens": 4219392}
|
|
{"current_steps": 1340, "total_steps": 15621, "loss": 0.4844, "lr": 1.7133717210492643e-06, "epoch": 0.08578196018180655, "percentage": 8.58, "elapsed_time": "0:04:08", "remaining_time": "0:44:04", "throughput": 17071.02, "total_tokens": 4235328}
|
|
{"current_steps": 1345, "total_steps": 15621, "loss": 0.5778, "lr": 1.7197696737044144e-06, "epoch": 0.08610204212278343, "percentage": 8.61, "elapsed_time": "0:04:08", "remaining_time": "0:43:59", "throughput": 17091.04, "total_tokens": 4250368}
|
|
{"current_steps": 1350, "total_steps": 15621, "loss": 0.4549, "lr": 1.7261676263595648e-06, "epoch": 0.08642212406376032, "percentage": 8.64, "elapsed_time": "0:04:09", "remaining_time": "0:43:55", "throughput": 17111.84, "total_tokens": 4265856}
|
|
{"current_steps": 1355, "total_steps": 15621, "loss": 0.6627, "lr": 1.7325655790147152e-06, "epoch": 0.08674220600473721, "percentage": 8.67, "elapsed_time": "0:04:09", "remaining_time": "0:43:51", "throughput": 17133.31, "total_tokens": 4281792}
|
|
{"current_steps": 1360, "total_steps": 15621, "loss": 0.5873, "lr": 1.7389635316698656e-06, "epoch": 0.0870622879457141, "percentage": 8.71, "elapsed_time": "0:04:10", "remaining_time": "0:43:46", "throughput": 17153.49, "total_tokens": 4297088}
|
|
{"current_steps": 1365, "total_steps": 15621, "loss": 0.5028, "lr": 1.745361484325016e-06, "epoch": 0.087382369886691, "percentage": 8.74, "elapsed_time": "0:04:11", "remaining_time": "0:43:42", "throughput": 17172.61, "total_tokens": 4312192}
|
|
{"current_steps": 1370, "total_steps": 15621, "loss": 0.4819, "lr": 1.7517594369801663e-06, "epoch": 0.08770245182766788, "percentage": 8.77, "elapsed_time": "0:04:11", "remaining_time": "0:43:38", "throughput": 17191.03, "total_tokens": 4326720}
|
|
{"current_steps": 1375, "total_steps": 15621, "loss": 0.7894, "lr": 1.7581573896353165e-06, "epoch": 0.08802253376864477, "percentage": 8.8, "elapsed_time": "0:04:12", "remaining_time": "0:43:33", "throughput": 17210.28, "total_tokens": 4341760}
|
|
{"current_steps": 1380, "total_steps": 15621, "loss": 0.6215, "lr": 1.7645553422904669e-06, "epoch": 0.08834261570962167, "percentage": 8.83, "elapsed_time": "0:04:12", "remaining_time": "0:43:29", "throughput": 17231.69, "total_tokens": 4357760}
|
|
{"current_steps": 1385, "total_steps": 15621, "loss": 0.6267, "lr": 1.7709532949456175e-06, "epoch": 0.08866269765059856, "percentage": 8.87, "elapsed_time": "0:04:13", "remaining_time": "0:43:25", "throughput": 17252.74, "total_tokens": 4373824}
|
|
{"current_steps": 1390, "total_steps": 15621, "loss": 0.4739, "lr": 1.7773512476007676e-06, "epoch": 0.08898277959157544, "percentage": 8.9, "elapsed_time": "0:04:14", "remaining_time": "0:43:21", "throughput": 17272.25, "total_tokens": 4388992}
|
|
{"current_steps": 1395, "total_steps": 15621, "loss": 0.5295, "lr": 1.783749200255918e-06, "epoch": 0.08930286153255233, "percentage": 8.93, "elapsed_time": "0:04:14", "remaining_time": "0:43:17", "throughput": 17291.3, "total_tokens": 4404288}
|
|
{"current_steps": 1400, "total_steps": 15621, "loss": 0.5366, "lr": 1.7901471529110684e-06, "epoch": 0.08962294347352923, "percentage": 8.96, "elapsed_time": "0:04:15", "remaining_time": "0:43:13", "throughput": 17311.67, "total_tokens": 4419840}
|
|
{"current_steps": 1405, "total_steps": 15621, "loss": 0.5109, "lr": 1.7965451055662186e-06, "epoch": 0.08994302541450612, "percentage": 8.99, "elapsed_time": "0:04:15", "remaining_time": "0:43:09", "throughput": 17331.11, "total_tokens": 4435200}
|
|
{"current_steps": 1410, "total_steps": 15621, "loss": 0.6082, "lr": 1.8029430582213691e-06, "epoch": 0.090263107355483, "percentage": 9.03, "elapsed_time": "0:04:16", "remaining_time": "0:43:05", "throughput": 17349.74, "total_tokens": 4450368}
|
|
{"current_steps": 1415, "total_steps": 15621, "loss": 0.4889, "lr": 1.8093410108765195e-06, "epoch": 0.09058318929645989, "percentage": 9.06, "elapsed_time": "0:04:17", "remaining_time": "0:43:01", "throughput": 17369.76, "total_tokens": 4466048}
|
|
{"current_steps": 1420, "total_steps": 15621, "loss": 0.5985, "lr": 1.8157389635316697e-06, "epoch": 0.09090327123743679, "percentage": 9.09, "elapsed_time": "0:04:17", "remaining_time": "0:42:57", "throughput": 17390.02, "total_tokens": 4481920}
|
|
{"current_steps": 1425, "total_steps": 15621, "loss": 0.5671, "lr": 1.82213691618682e-06, "epoch": 0.09122335317841368, "percentage": 9.12, "elapsed_time": "0:04:18", "remaining_time": "0:42:53", "throughput": 17411.01, "total_tokens": 4498112}
|
|
{"current_steps": 1430, "total_steps": 15621, "loss": 0.4306, "lr": 1.8285348688419704e-06, "epoch": 0.09154343511939056, "percentage": 9.15, "elapsed_time": "0:04:19", "remaining_time": "0:42:50", "throughput": 17434.17, "total_tokens": 4515648}
|
|
{"current_steps": 1435, "total_steps": 15621, "loss": 0.5719, "lr": 1.8349328214971208e-06, "epoch": 0.09186351706036745, "percentage": 9.19, "elapsed_time": "0:04:19", "remaining_time": "0:42:46", "throughput": 17454.49, "total_tokens": 4531840}
|
|
{"current_steps": 1440, "total_steps": 15621, "loss": 0.5478, "lr": 1.8413307741522712e-06, "epoch": 0.09218359900134435, "percentage": 9.22, "elapsed_time": "0:04:20", "remaining_time": "0:42:42", "throughput": 17473.75, "total_tokens": 4547456}
|
|
{"current_steps": 1445, "total_steps": 15621, "loss": 0.557, "lr": 1.8477287268074216e-06, "epoch": 0.09250368094232124, "percentage": 9.25, "elapsed_time": "0:04:20", "remaining_time": "0:42:39", "throughput": 17493.48, "total_tokens": 4563328}
|
|
{"current_steps": 1450, "total_steps": 15621, "loss": 0.5856, "lr": 1.8541266794625718e-06, "epoch": 0.09282376288329812, "percentage": 9.28, "elapsed_time": "0:04:21", "remaining_time": "0:42:35", "throughput": 17512.95, "total_tokens": 4579392}
|
|
{"current_steps": 1455, "total_steps": 15621, "loss": 0.6149, "lr": 1.8605246321177221e-06, "epoch": 0.09314384482427501, "percentage": 9.31, "elapsed_time": "0:04:22", "remaining_time": "0:42:31", "throughput": 17533.03, "total_tokens": 4595584}
|
|
{"current_steps": 1460, "total_steps": 15621, "loss": 0.5711, "lr": 1.8669225847728727e-06, "epoch": 0.0934639267652519, "percentage": 9.35, "elapsed_time": "0:04:22", "remaining_time": "0:42:27", "throughput": 17549.28, "total_tokens": 4610112}
|
|
{"current_steps": 1465, "total_steps": 15621, "loss": 0.6948, "lr": 1.8733205374280229e-06, "epoch": 0.0937840087062288, "percentage": 9.38, "elapsed_time": "0:04:23", "remaining_time": "0:42:24", "throughput": 17569.55, "total_tokens": 4626432}
|
|
{"current_steps": 1470, "total_steps": 15621, "loss": 0.5771, "lr": 1.8797184900831733e-06, "epoch": 0.09410409064720568, "percentage": 9.41, "elapsed_time": "0:04:23", "remaining_time": "0:42:20", "throughput": 17587.71, "total_tokens": 4641792}
|
|
{"current_steps": 1475, "total_steps": 15621, "loss": 0.4046, "lr": 1.8861164427383236e-06, "epoch": 0.09442417258818257, "percentage": 9.44, "elapsed_time": "0:04:24", "remaining_time": "0:42:16", "throughput": 17605.07, "total_tokens": 4656896}
|
|
{"current_steps": 1480, "total_steps": 15621, "loss": 0.605, "lr": 1.8925143953934738e-06, "epoch": 0.09474425452915947, "percentage": 9.47, "elapsed_time": "0:04:25", "remaining_time": "0:42:13", "throughput": 17624.7, "total_tokens": 4673472}
|
|
{"current_steps": 1485, "total_steps": 15621, "loss": 0.426, "lr": 1.8989123480486244e-06, "epoch": 0.09506433647013636, "percentage": 9.51, "elapsed_time": "0:04:25", "remaining_time": "0:42:09", "throughput": 17642.54, "total_tokens": 4688896}
|
|
{"current_steps": 1490, "total_steps": 15621, "loss": 0.6785, "lr": 1.9053103007037748e-06, "epoch": 0.09538441841111324, "percentage": 9.54, "elapsed_time": "0:04:26", "remaining_time": "0:42:06", "throughput": 17660.7, "total_tokens": 4704576}
|
|
{"current_steps": 1495, "total_steps": 15621, "loss": 0.6069, "lr": 1.911708253358925e-06, "epoch": 0.09570450035209013, "percentage": 9.57, "elapsed_time": "0:04:26", "remaining_time": "0:42:02", "throughput": 17676.89, "total_tokens": 4719040}
|
|
{"current_steps": 1500, "total_steps": 15621, "loss": 0.4831, "lr": 1.9181062060140753e-06, "epoch": 0.09602458229306703, "percentage": 9.6, "elapsed_time": "0:04:27", "remaining_time": "0:41:58", "throughput": 17693.63, "total_tokens": 4733696}
|
|
{"current_steps": 1505, "total_steps": 15621, "loss": 0.6045, "lr": 1.9245041586692255e-06, "epoch": 0.09634466423404392, "percentage": 9.63, "elapsed_time": "0:04:28", "remaining_time": "0:41:54", "throughput": 17711.34, "total_tokens": 4748992}
|
|
{"current_steps": 1510, "total_steps": 15621, "loss": 0.6876, "lr": 1.930902111324376e-06, "epoch": 0.0966647461750208, "percentage": 9.67, "elapsed_time": "0:04:28", "remaining_time": "0:41:51", "throughput": 17730.21, "total_tokens": 4764992}
|
|
{"current_steps": 1515, "total_steps": 15621, "loss": 0.6773, "lr": 1.9373000639795267e-06, "epoch": 0.09698482811599769, "percentage": 9.7, "elapsed_time": "0:04:29", "remaining_time": "0:41:47", "throughput": 17747.51, "total_tokens": 4780352}
|
|
{"current_steps": 1520, "total_steps": 15621, "loss": 0.5393, "lr": 1.943698016634677e-06, "epoch": 0.09730491005697459, "percentage": 9.73, "elapsed_time": "0:04:29", "remaining_time": "0:41:44", "throughput": 17765.38, "total_tokens": 4796224}
|
|
{"current_steps": 1525, "total_steps": 15621, "loss": 0.5401, "lr": 1.950095969289827e-06, "epoch": 0.09762499199795148, "percentage": 9.76, "elapsed_time": "0:04:30", "remaining_time": "0:41:41", "throughput": 17782.85, "total_tokens": 4811840}
|
|
{"current_steps": 1530, "total_steps": 15621, "loss": 0.5811, "lr": 1.9564939219449776e-06, "epoch": 0.09794507393892836, "percentage": 9.79, "elapsed_time": "0:04:31", "remaining_time": "0:41:37", "throughput": 17798.53, "total_tokens": 4826432}
|
|
{"current_steps": 1535, "total_steps": 15621, "loss": 0.393, "lr": 1.9628918746001278e-06, "epoch": 0.09826515587990525, "percentage": 9.83, "elapsed_time": "0:04:31", "remaining_time": "0:41:33", "throughput": 17815.96, "total_tokens": 4841920}
|
|
{"current_steps": 1540, "total_steps": 15621, "loss": 0.5971, "lr": 1.9692898272552783e-06, "epoch": 0.09858523782088215, "percentage": 9.86, "elapsed_time": "0:04:32", "remaining_time": "0:41:30", "throughput": 17833.68, "total_tokens": 4857536}
|
|
{"current_steps": 1545, "total_steps": 15621, "loss": 0.6844, "lr": 1.9756877799104285e-06, "epoch": 0.09890531976185904, "percentage": 9.89, "elapsed_time": "0:04:32", "remaining_time": "0:41:27", "throughput": 17851.69, "total_tokens": 4873408}
|
|
{"current_steps": 1550, "total_steps": 15621, "loss": 0.5973, "lr": 1.9820857325655787e-06, "epoch": 0.09922540170283592, "percentage": 9.92, "elapsed_time": "0:04:33", "remaining_time": "0:41:23", "throughput": 17869.92, "total_tokens": 4889536}
|
|
{"current_steps": 1555, "total_steps": 15621, "loss": 0.627, "lr": 1.9884836852207293e-06, "epoch": 0.09954548364381281, "percentage": 9.95, "elapsed_time": "0:04:34", "remaining_time": "0:41:20", "throughput": 17885.91, "total_tokens": 4904448}
|
|
{"current_steps": 1560, "total_steps": 15621, "loss": 0.6569, "lr": 1.99488163787588e-06, "epoch": 0.0998655655847897, "percentage": 9.99, "elapsed_time": "0:04:34", "remaining_time": "0:41:16", "throughput": 17902.01, "total_tokens": 4919616}
|
|
{"current_steps": 1564, "total_steps": 15621, "eval_loss": 0.5394634008407593, "epoch": 0.10012163113757122, "percentage": 10.01, "elapsed_time": "0:05:24", "remaining_time": "0:48:36", "throughput": 15199.04, "total_tokens": 4931328}
|
|
{"current_steps": 1565, "total_steps": 15621, "loss": 0.516, "lr": 1.9999999750297625e-06, "epoch": 0.1001856475257666, "percentage": 10.02, "elapsed_time": "0:06:07", "remaining_time": "0:54:58", "throughput": 13437.08, "total_tokens": 4934144}
|
|
{"current_steps": 1570, "total_steps": 15621, "loss": 0.523, "lr": 1.9999991010715873e-06, "epoch": 0.1005057294667435, "percentage": 10.05, "elapsed_time": "0:06:07", "remaining_time": "0:54:51", "throughput": 13458.09, "total_tokens": 4950272}
|
|
{"current_steps": 1575, "total_steps": 15621, "loss": 0.5346, "lr": 1.999996978602793e-06, "epoch": 0.10082581140772037, "percentage": 10.08, "elapsed_time": "0:06:08", "remaining_time": "0:54:45", "throughput": 13476.85, "total_tokens": 4965056}
|
|
{"current_steps": 1580, "total_steps": 15621, "loss": 0.5489, "lr": 1.99999360762603e-06, "epoch": 0.10114589334869727, "percentage": 10.11, "elapsed_time": "0:06:09", "remaining_time": "0:54:39", "throughput": 13495.84, "total_tokens": 4980160}
|
|
{"current_steps": 1585, "total_steps": 15621, "loss": 0.453, "lr": 1.9999889881455065e-06, "epoch": 0.10146597528967416, "percentage": 10.15, "elapsed_time": "0:06:09", "remaining_time": "0:54:33", "throughput": 13517.74, "total_tokens": 4996992}
|
|
{"current_steps": 1590, "total_steps": 15621, "loss": 0.5146, "lr": 1.9999831201669897e-06, "epoch": 0.10178605723065105, "percentage": 10.18, "elapsed_time": "0:06:10", "remaining_time": "0:54:27", "throughput": 13537.87, "total_tokens": 5012608}
|
|
{"current_steps": 1595, "total_steps": 15621, "loss": 0.4848, "lr": 1.9999760036978067e-06, "epoch": 0.10210613917162793, "percentage": 10.21, "elapsed_time": "0:06:10", "remaining_time": "0:54:21", "throughput": 13557.1, "total_tokens": 5027840}
|
|
{"current_steps": 1600, "total_steps": 15621, "loss": 0.5746, "lr": 1.9999676387468417e-06, "epoch": 0.10242622111260483, "percentage": 10.24, "elapsed_time": "0:06:11", "remaining_time": "0:54:15", "throughput": 13575.9, "total_tokens": 5042752}
|
|
{"current_steps": 1605, "total_steps": 15621, "loss": 0.5487, "lr": 1.999958025324539e-06, "epoch": 0.10274630305358172, "percentage": 10.27, "elapsed_time": "0:06:12", "remaining_time": "0:54:09", "throughput": 13596.1, "total_tokens": 5058624}
|
|
{"current_steps": 1610, "total_steps": 15621, "loss": 0.6233, "lr": 1.999947163442901e-06, "epoch": 0.10306638499455861, "percentage": 10.31, "elapsed_time": "0:06:12", "remaining_time": "0:54:03", "throughput": 13617.23, "total_tokens": 5075008}
|
|
{"current_steps": 1615, "total_steps": 15621, "loss": 0.5332, "lr": 1.9999350531154884e-06, "epoch": 0.10338646693553549, "percentage": 10.34, "elapsed_time": "0:06:13", "remaining_time": "0:53:57", "throughput": 13637.33, "total_tokens": 5090880}
|
|
{"current_steps": 1620, "total_steps": 15621, "loss": 0.5713, "lr": 1.9999216943574223e-06, "epoch": 0.10370654887651239, "percentage": 10.37, "elapsed_time": "0:06:13", "remaining_time": "0:53:51", "throughput": 13657.18, "total_tokens": 5106816}
|
|
{"current_steps": 1625, "total_steps": 15621, "loss": 0.4563, "lr": 1.9999070871853796e-06, "epoch": 0.10402663081748928, "percentage": 10.4, "elapsed_time": "0:06:14", "remaining_time": "0:53:46", "throughput": 13678.99, "total_tokens": 5123904}
|
|
{"current_steps": 1630, "total_steps": 15621, "loss": 0.4954, "lr": 1.9998912316175986e-06, "epoch": 0.10434671275846617, "percentage": 10.43, "elapsed_time": "0:06:15", "remaining_time": "0:53:40", "throughput": 13699.5, "total_tokens": 5140160}
|
|
{"current_steps": 1635, "total_steps": 15621, "loss": 0.5159, "lr": 1.9998741276738752e-06, "epoch": 0.10466679469944305, "percentage": 10.47, "elapsed_time": "0:06:15", "remaining_time": "0:53:34", "throughput": 13719.42, "total_tokens": 5156288}
|
|
{"current_steps": 1640, "total_steps": 15621, "loss": 0.5823, "lr": 1.999855775375563e-06, "epoch": 0.10498687664041995, "percentage": 10.5, "elapsed_time": "0:06:16", "remaining_time": "0:53:29", "throughput": 13738.6, "total_tokens": 5171776}
|
|
{"current_steps": 1645, "total_steps": 15621, "loss": 0.683, "lr": 1.999836174745576e-06, "epoch": 0.10530695858139684, "percentage": 10.53, "elapsed_time": "0:06:17", "remaining_time": "0:53:23", "throughput": 13761.13, "total_tokens": 5189504}
|
|
{"current_steps": 1650, "total_steps": 15621, "loss": 0.5783, "lr": 1.9998153258083853e-06, "epoch": 0.10562704052237373, "percentage": 10.56, "elapsed_time": "0:06:17", "remaining_time": "0:53:18", "throughput": 13780.38, "total_tokens": 5205056}
|
|
{"current_steps": 1655, "total_steps": 15621, "loss": 0.586, "lr": 1.9997932285900214e-06, "epoch": 0.10594712246335061, "percentage": 10.59, "elapsed_time": "0:06:18", "remaining_time": "0:53:13", "throughput": 13802.75, "total_tokens": 5222656}
|
|
{"current_steps": 1660, "total_steps": 15621, "loss": 0.6272, "lr": 1.9997698831180726e-06, "epoch": 0.1062672044043275, "percentage": 10.63, "elapsed_time": "0:06:19", "remaining_time": "0:53:07", "throughput": 13822.59, "total_tokens": 5238848}
|
|
{"current_steps": 1665, "total_steps": 15621, "loss": 0.5203, "lr": 1.999745289421686e-06, "epoch": 0.1065872863453044, "percentage": 10.66, "elapsed_time": "0:06:19", "remaining_time": "0:53:02", "throughput": 13843.03, "total_tokens": 5255296}
|
|
{"current_steps": 1670, "total_steps": 15621, "loss": 0.7716, "lr": 1.9997194475315674e-06, "epoch": 0.10690736828628129, "percentage": 10.69, "elapsed_time": "0:06:20", "remaining_time": "0:52:56", "throughput": 13861.05, "total_tokens": 5270336}
|
|
{"current_steps": 1675, "total_steps": 15621, "loss": 0.4842, "lr": 1.9996923574799808e-06, "epoch": 0.10722745022725817, "percentage": 10.72, "elapsed_time": "0:06:20", "remaining_time": "0:52:50", "throughput": 13881.16, "total_tokens": 5286720}
|
|
{"current_steps": 1680, "total_steps": 15621, "loss": 0.6428, "lr": 1.9996640193007476e-06, "epoch": 0.10754753216823507, "percentage": 10.75, "elapsed_time": "0:06:21", "remaining_time": "0:52:45", "throughput": 13898.87, "total_tokens": 5301632}
|
|
{"current_steps": 1685, "total_steps": 15621, "loss": 0.403, "lr": 1.9996344330292495e-06, "epoch": 0.10786761410921196, "percentage": 10.79, "elapsed_time": "0:06:22", "remaining_time": "0:52:39", "throughput": 13916.51, "total_tokens": 5316544}
|
|
{"current_steps": 1690, "total_steps": 15621, "loss": 0.5503, "lr": 1.9996035987024245e-06, "epoch": 0.10818769605018885, "percentage": 10.82, "elapsed_time": "0:06:22", "remaining_time": "0:52:34", "throughput": 13935.64, "total_tokens": 5332544}
|
|
{"current_steps": 1695, "total_steps": 15621, "loss": 0.5388, "lr": 1.99957151635877e-06, "epoch": 0.10850777799116573, "percentage": 10.85, "elapsed_time": "0:06:23", "remaining_time": "0:52:28", "throughput": 13954.12, "total_tokens": 5348096}
|
|
{"current_steps": 1700, "total_steps": 15621, "loss": 0.6275, "lr": 1.999538186038341e-06, "epoch": 0.10882785993214263, "percentage": 10.88, "elapsed_time": "0:06:23", "remaining_time": "0:52:23", "throughput": 13970.51, "total_tokens": 5362368}
|
|
{"current_steps": 1705, "total_steps": 15621, "loss": 0.5426, "lr": 1.999503607782751e-06, "epoch": 0.10914794187311952, "percentage": 10.91, "elapsed_time": "0:06:24", "remaining_time": "0:52:17", "throughput": 13989.29, "total_tokens": 5378176}
|
|
{"current_steps": 1710, "total_steps": 15621, "loss": 0.5163, "lr": 1.999467781635171e-06, "epoch": 0.10946802381409641, "percentage": 10.95, "elapsed_time": "0:06:25", "remaining_time": "0:52:12", "throughput": 14009.26, "total_tokens": 5394752}
|
|
{"current_steps": 1715, "total_steps": 15621, "loss": 0.6991, "lr": 1.9994307076403306e-06, "epoch": 0.10978810575507329, "percentage": 10.98, "elapsed_time": "0:06:25", "remaining_time": "0:52:07", "throughput": 14030.5, "total_tokens": 5412160}
|
|
{"current_steps": 1720, "total_steps": 15621, "loss": 0.5245, "lr": 1.999392385844517e-06, "epoch": 0.11010818769605019, "percentage": 11.01, "elapsed_time": "0:06:26", "remaining_time": "0:52:02", "throughput": 14048.81, "total_tokens": 5427840}
|
|
{"current_steps": 1725, "total_steps": 15621, "loss": 0.4035, "lr": 1.9993528162955753e-06, "epoch": 0.11042826963702708, "percentage": 11.04, "elapsed_time": "0:06:26", "remaining_time": "0:51:57", "throughput": 14068.19, "total_tokens": 5444224}
|
|
{"current_steps": 1730, "total_steps": 15621, "loss": 0.5767, "lr": 1.9993119990429095e-06, "epoch": 0.11074835157800397, "percentage": 11.07, "elapsed_time": "0:06:27", "remaining_time": "0:51:52", "throughput": 14085.98, "total_tokens": 5459648}
|
|
{"current_steps": 1735, "total_steps": 15621, "loss": 0.7821, "lr": 1.9992699341374794e-06, "epoch": 0.11106843351898085, "percentage": 11.11, "elapsed_time": "0:06:28", "remaining_time": "0:51:46", "throughput": 14103.62, "total_tokens": 5475008}
|
|
{"current_steps": 1740, "total_steps": 15621, "loss": 0.5285, "lr": 1.9992266216318033e-06, "epoch": 0.11138851545995775, "percentage": 11.14, "elapsed_time": "0:06:28", "remaining_time": "0:51:41", "throughput": 14122.77, "total_tokens": 5491456}
|
|
{"current_steps": 1745, "total_steps": 15621, "loss": 0.5674, "lr": 1.9991820615799583e-06, "epoch": 0.11170859740093464, "percentage": 11.17, "elapsed_time": "0:06:29", "remaining_time": "0:51:36", "throughput": 14141.45, "total_tokens": 5507520}
|
|
{"current_steps": 1750, "total_steps": 15621, "loss": 0.6917, "lr": 1.999136254037578e-06, "epoch": 0.11202867934191153, "percentage": 11.2, "elapsed_time": "0:06:30", "remaining_time": "0:51:31", "throughput": 14159.28, "total_tokens": 5523072}
|
|
{"current_steps": 1755, "total_steps": 15621, "loss": 0.5094, "lr": 1.999089199061853e-06, "epoch": 0.11234876128288843, "percentage": 11.23, "elapsed_time": "0:06:30", "remaining_time": "0:51:26", "throughput": 14176.49, "total_tokens": 5538304}
|
|
{"current_steps": 1760, "total_steps": 15621, "loss": 0.4612, "lr": 1.9990408967115326e-06, "epoch": 0.1126688432238653, "percentage": 11.27, "elapsed_time": "0:06:31", "remaining_time": "0:51:21", "throughput": 14194.39, "total_tokens": 5553920}
|
|
{"current_steps": 1765, "total_steps": 15621, "loss": 0.4599, "lr": 1.998991347046922e-06, "epoch": 0.1129889251648422, "percentage": 11.3, "elapsed_time": "0:06:31", "remaining_time": "0:51:16", "throughput": 14211.69, "total_tokens": 5569344}
|
|
{"current_steps": 1770, "total_steps": 15621, "loss": 0.5104, "lr": 1.9989405501298857e-06, "epoch": 0.11330900710581909, "percentage": 11.33, "elapsed_time": "0:06:32", "remaining_time": "0:51:11", "throughput": 14231.06, "total_tokens": 5585856}
|
|
{"current_steps": 1775, "total_steps": 15621, "loss": 0.5755, "lr": 1.9988885060238436e-06, "epoch": 0.11362908904679599, "percentage": 11.36, "elapsed_time": "0:06:33", "remaining_time": "0:51:07", "throughput": 14252.32, "total_tokens": 5603840}
|
|
{"current_steps": 1780, "total_steps": 15621, "loss": 0.5167, "lr": 1.9988352147937735e-06, "epoch": 0.11394917098777287, "percentage": 11.39, "elapsed_time": "0:06:33", "remaining_time": "0:51:02", "throughput": 14271.05, "total_tokens": 5620352}
|
|
{"current_steps": 1785, "total_steps": 15621, "loss": 0.5382, "lr": 1.99878067650621e-06, "epoch": 0.11426925292874976, "percentage": 11.43, "elapsed_time": "0:06:34", "remaining_time": "0:50:57", "throughput": 14289.09, "total_tokens": 5636544}
|
|
{"current_steps": 1790, "total_steps": 15621, "loss": 0.5438, "lr": 1.998724891229245e-06, "epoch": 0.11458933486972665, "percentage": 11.46, "elapsed_time": "0:06:35", "remaining_time": "0:50:52", "throughput": 14307.1, "total_tokens": 5652672}
|
|
{"current_steps": 1795, "total_steps": 15621, "loss": 0.4956, "lr": 1.998667859032527e-06, "epoch": 0.11490941681070355, "percentage": 11.49, "elapsed_time": "0:06:35", "remaining_time": "0:50:47", "throughput": 14324.31, "total_tokens": 5668224}
|
|
{"current_steps": 1800, "total_steps": 15621, "loss": 0.4506, "lr": 1.9986095799872613e-06, "epoch": 0.11522949875168043, "percentage": 11.52, "elapsed_time": "0:06:36", "remaining_time": "0:50:43", "throughput": 14342.73, "total_tokens": 5684480}
|
|
{"current_steps": 1805, "total_steps": 15621, "loss": 0.472, "lr": 1.99855005416621e-06, "epoch": 0.11554958069265732, "percentage": 11.55, "elapsed_time": "0:06:36", "remaining_time": "0:50:38", "throughput": 14361.01, "total_tokens": 5700864}
|
|
{"current_steps": 1810, "total_steps": 15621, "loss": 0.5965, "lr": 1.998489281643692e-06, "epoch": 0.11586966263363421, "percentage": 11.59, "elapsed_time": "0:06:37", "remaining_time": "0:50:33", "throughput": 14377.85, "total_tokens": 5716224}
|
|
{"current_steps": 1815, "total_steps": 15621, "loss": 0.4977, "lr": 1.998427262495582e-06, "epoch": 0.1161897445746111, "percentage": 11.62, "elapsed_time": "0:06:38", "remaining_time": "0:50:29", "throughput": 14396.77, "total_tokens": 5733056}
|
|
{"current_steps": 1820, "total_steps": 15621, "loss": 0.6683, "lr": 1.9983639967993124e-06, "epoch": 0.11650982651558799, "percentage": 11.65, "elapsed_time": "0:06:38", "remaining_time": "0:50:24", "throughput": 14414.48, "total_tokens": 5749120}
|
|
{"current_steps": 1825, "total_steps": 15621, "loss": 0.7297, "lr": 1.99829948463387e-06, "epoch": 0.11682990845656488, "percentage": 11.68, "elapsed_time": "0:06:39", "remaining_time": "0:50:19", "throughput": 14430.34, "total_tokens": 5763968}
|
|
{"current_steps": 1830, "total_steps": 15621, "loss": 0.543, "lr": 1.9982337260798e-06, "epoch": 0.11714999039754177, "percentage": 11.71, "elapsed_time": "0:06:40", "remaining_time": "0:50:14", "throughput": 14447.24, "total_tokens": 5779520}
|
|
{"current_steps": 1835, "total_steps": 15621, "loss": 0.5856, "lr": 1.998166721219203e-06, "epoch": 0.11747007233851867, "percentage": 11.75, "elapsed_time": "0:06:40", "remaining_time": "0:50:10", "throughput": 14469.24, "total_tokens": 5798848}
|
|
{"current_steps": 1840, "total_steps": 15621, "loss": 0.5155, "lr": 1.9980984701357338e-06, "epoch": 0.11779015427949555, "percentage": 11.78, "elapsed_time": "0:06:41", "remaining_time": "0:50:06", "throughput": 14485.42, "total_tokens": 5813952}
|
|
{"current_steps": 1845, "total_steps": 15621, "loss": 0.4362, "lr": 1.998028972914606e-06, "epoch": 0.11811023622047244, "percentage": 11.81, "elapsed_time": "0:06:41", "remaining_time": "0:50:01", "throughput": 14502.85, "total_tokens": 5830016}
|
|
{"current_steps": 1850, "total_steps": 15621, "loss": 0.5893, "lr": 1.9979582296425877e-06, "epoch": 0.11843031816144933, "percentage": 11.84, "elapsed_time": "0:06:42", "remaining_time": "0:49:56", "throughput": 14519.04, "total_tokens": 5845312}
|
|
{"current_steps": 1855, "total_steps": 15621, "loss": 0.5851, "lr": 1.9978862404080022e-06, "epoch": 0.11875040010242623, "percentage": 11.88, "elapsed_time": "0:06:43", "remaining_time": "0:49:52", "throughput": 14535.34, "total_tokens": 5860672}
|
|
{"current_steps": 1860, "total_steps": 15621, "loss": 0.5376, "lr": 1.9978130053007295e-06, "epoch": 0.1190704820434031, "percentage": 11.91, "elapsed_time": "0:06:44", "remaining_time": "0:49:53", "throughput": 14524.04, "total_tokens": 5875776}
|
|
{"current_steps": 1865, "total_steps": 15621, "loss": 0.4319, "lr": 1.9977385244122034e-06, "epoch": 0.11939056398438, "percentage": 11.94, "elapsed_time": "0:06:45", "remaining_time": "0:49:48", "throughput": 14540.38, "total_tokens": 5891200}
|
|
{"current_steps": 1870, "total_steps": 15621, "loss": 0.4821, "lr": 1.997662797835415e-06, "epoch": 0.11971064592535689, "percentage": 11.97, "elapsed_time": "0:06:45", "remaining_time": "0:49:43", "throughput": 14557.16, "total_tokens": 5907008}
|
|
{"current_steps": 1875, "total_steps": 15621, "loss": 0.4645, "lr": 1.9975858256649097e-06, "epoch": 0.12003072786633379, "percentage": 12.0, "elapsed_time": "0:06:46", "remaining_time": "0:49:39", "throughput": 14574.47, "total_tokens": 5923264}
|
|
{"current_steps": 1880, "total_steps": 15621, "loss": 0.4911, "lr": 1.997507607996788e-06, "epoch": 0.12035080980731067, "percentage": 12.04, "elapsed_time": "0:06:47", "remaining_time": "0:49:35", "throughput": 14592.14, "total_tokens": 5939648}
|
|
{"current_steps": 1885, "total_steps": 15621, "loss": 0.4557, "lr": 1.997428144928706e-06, "epoch": 0.12067089174828756, "percentage": 12.07, "elapsed_time": "0:06:47", "remaining_time": "0:49:30", "throughput": 14609.34, "total_tokens": 5955520}
|
|
{"current_steps": 1890, "total_steps": 15621, "loss": 0.5237, "lr": 1.9973474365598736e-06, "epoch": 0.12099097368926445, "percentage": 12.1, "elapsed_time": "0:06:48", "remaining_time": "0:49:26", "throughput": 14625.67, "total_tokens": 5971072}
|
|
{"current_steps": 1895, "total_steps": 15621, "loss": 0.5787, "lr": 1.9972654829910568e-06, "epoch": 0.12131105563024135, "percentage": 12.13, "elapsed_time": "0:06:48", "remaining_time": "0:49:21", "throughput": 14642.68, "total_tokens": 5987264}
|
|
{"current_steps": 1900, "total_steps": 15621, "loss": 0.6193, "lr": 1.9971822843245748e-06, "epoch": 0.12163113757121823, "percentage": 12.16, "elapsed_time": "0:06:49", "remaining_time": "0:49:17", "throughput": 14659.05, "total_tokens": 6002880}
|
|
{"current_steps": 1905, "total_steps": 15621, "loss": 0.5277, "lr": 1.997097840664303e-06, "epoch": 0.12195121951219512, "percentage": 12.2, "elapsed_time": "0:06:50", "remaining_time": "0:49:12", "throughput": 14676.91, "total_tokens": 6019520}
|
|
{"current_steps": 1910, "total_steps": 15621, "loss": 0.5641, "lr": 1.99701215211567e-06, "epoch": 0.12227130145317201, "percentage": 12.23, "elapsed_time": "0:06:50", "remaining_time": "0:49:08", "throughput": 14694.24, "total_tokens": 6035904}
|
|
{"current_steps": 1915, "total_steps": 15621, "loss": 0.6009, "lr": 1.9969252187856587e-06, "epoch": 0.1225913833941489, "percentage": 12.26, "elapsed_time": "0:06:51", "remaining_time": "0:49:04", "throughput": 14709.44, "total_tokens": 6050816}
|
|
{"current_steps": 1920, "total_steps": 15621, "loss": 0.4204, "lr": 1.9968370407828065e-06, "epoch": 0.12291146533512579, "percentage": 12.29, "elapsed_time": "0:06:51", "remaining_time": "0:48:59", "throughput": 14725.04, "total_tokens": 6065920}
|
|
{"current_steps": 1925, "total_steps": 15621, "loss": 0.5962, "lr": 1.996747618217205e-06, "epoch": 0.12323154727610268, "percentage": 12.32, "elapsed_time": "0:06:52", "remaining_time": "0:48:55", "throughput": 14741.28, "total_tokens": 6081728}
|
|
{"current_steps": 1930, "total_steps": 15621, "loss": 0.4945, "lr": 1.9966569512004987e-06, "epoch": 0.12355162921707957, "percentage": 12.36, "elapsed_time": "0:06:53", "remaining_time": "0:48:50", "throughput": 14757.77, "total_tokens": 6097472}
|
|
{"current_steps": 1935, "total_steps": 15621, "loss": 0.5101, "lr": 1.996565039845887e-06, "epoch": 0.12387171115805647, "percentage": 12.39, "elapsed_time": "0:06:53", "remaining_time": "0:48:46", "throughput": 14773.84, "total_tokens": 6113152}
|
|
{"current_steps": 1940, "total_steps": 15621, "loss": 0.614, "lr": 1.996471884268122e-06, "epoch": 0.12419179309903335, "percentage": 12.42, "elapsed_time": "0:06:54", "remaining_time": "0:48:42", "throughput": 14790.91, "total_tokens": 6129408}
|
|
{"current_steps": 1945, "total_steps": 15621, "loss": 0.545, "lr": 1.9963774845835097e-06, "epoch": 0.12451187504001024, "percentage": 12.45, "elapsed_time": "0:06:55", "remaining_time": "0:48:38", "throughput": 14806.89, "total_tokens": 6144896}
|
|
{"current_steps": 1950, "total_steps": 15621, "loss": 0.5868, "lr": 1.996281840909909e-06, "epoch": 0.12483195698098713, "percentage": 12.48, "elapsed_time": "0:06:55", "remaining_time": "0:48:33", "throughput": 14822.38, "total_tokens": 6160256}
|
|
{"current_steps": 1955, "total_steps": 15621, "loss": 0.6354, "lr": 1.9961849533667322e-06, "epoch": 0.12515203892196403, "percentage": 12.52, "elapsed_time": "0:06:56", "remaining_time": "0:48:29", "throughput": 14837.33, "total_tokens": 6175104}
|
|
{"current_steps": 1960, "total_steps": 15621, "loss": 0.5185, "lr": 1.9960868220749447e-06, "epoch": 0.1254721208629409, "percentage": 12.55, "elapsed_time": "0:06:56", "remaining_time": "0:48:24", "throughput": 14852.43, "total_tokens": 6190272}
|
|
{"current_steps": 1965, "total_steps": 15621, "loss": 0.5855, "lr": 1.9959874471570644e-06, "epoch": 0.1257922028039178, "percentage": 12.58, "elapsed_time": "0:06:57", "remaining_time": "0:48:20", "throughput": 14868.1, "total_tokens": 6205952}
|
|
{"current_steps": 1970, "total_steps": 15621, "loss": 0.56, "lr": 1.9958868287371625e-06, "epoch": 0.1261122847448947, "percentage": 12.61, "elapsed_time": "0:06:58", "remaining_time": "0:48:16", "throughput": 14885.3, "total_tokens": 6222592}
|
|
{"current_steps": 1975, "total_steps": 15621, "loss": 0.4803, "lr": 1.9957849669408617e-06, "epoch": 0.12643236668587157, "percentage": 12.64, "elapsed_time": "0:06:58", "remaining_time": "0:48:12", "throughput": 14900.38, "total_tokens": 6237696}
|
|
{"current_steps": 1980, "total_steps": 15621, "loss": 0.4858, "lr": 1.995681861895338e-06, "epoch": 0.12675244862684848, "percentage": 12.68, "elapsed_time": "0:06:59", "remaining_time": "0:48:08", "throughput": 14917.17, "total_tokens": 6254080}
|
|
{"current_steps": 1985, "total_steps": 15621, "loss": 0.5741, "lr": 1.9955775137293187e-06, "epoch": 0.12707253056782536, "percentage": 12.71, "elapsed_time": "0:06:59", "remaining_time": "0:48:04", "throughput": 14932.87, "total_tokens": 6270016}
|
|
{"current_steps": 1990, "total_steps": 15621, "loss": 0.6124, "lr": 1.9954719225730845e-06, "epoch": 0.12739261250880227, "percentage": 12.74, "elapsed_time": "0:07:00", "remaining_time": "0:48:00", "throughput": 14947.96, "total_tokens": 6285184}
|
|
{"current_steps": 1995, "total_steps": 15621, "loss": 0.4774, "lr": 1.9953650885584666e-06, "epoch": 0.12771269444977915, "percentage": 12.77, "elapsed_time": "0:07:01", "remaining_time": "0:47:56", "throughput": 14963.51, "total_tokens": 6300992}
|
|
{"current_steps": 2000, "total_steps": 15621, "loss": 0.5445, "lr": 1.995257011818849e-06, "epoch": 0.12803277639075603, "percentage": 12.8, "elapsed_time": "0:07:01", "remaining_time": "0:47:51", "throughput": 14977.13, "total_tokens": 6315392}
|
|
{"current_steps": 2005, "total_steps": 15621, "loss": 0.4739, "lr": 1.9951476924891666e-06, "epoch": 0.12835285833173293, "percentage": 12.84, "elapsed_time": "0:07:02", "remaining_time": "0:47:47", "throughput": 14992.28, "total_tokens": 6331136}
|
|
{"current_steps": 2010, "total_steps": 15621, "loss": 0.5553, "lr": 1.9950371307059056e-06, "epoch": 0.1286729402727098, "percentage": 12.87, "elapsed_time": "0:07:02", "remaining_time": "0:47:43", "throughput": 15008.58, "total_tokens": 6347584}
|
|
{"current_steps": 2015, "total_steps": 15621, "loss": 0.5728, "lr": 1.9949253266071036e-06, "epoch": 0.1289930222136867, "percentage": 12.9, "elapsed_time": "0:07:03", "remaining_time": "0:47:39", "throughput": 15023.08, "total_tokens": 6362560}
|
|
{"current_steps": 2020, "total_steps": 15621, "loss": 0.5075, "lr": 1.9948122803323503e-06, "epoch": 0.1293131041546636, "percentage": 12.93, "elapsed_time": "0:07:04", "remaining_time": "0:47:35", "throughput": 15038.66, "total_tokens": 6378304}
|
|
{"current_steps": 2025, "total_steps": 15621, "loss": 0.5147, "lr": 1.9946979920227844e-06, "epoch": 0.12963318609564048, "percentage": 12.96, "elapsed_time": "0:07:04", "remaining_time": "0:47:31", "throughput": 15053.16, "total_tokens": 6393280}
|
|
{"current_steps": 2030, "total_steps": 15621, "loss": 0.5251, "lr": 1.994582461821096e-06, "epoch": 0.1299532680366174, "percentage": 13.0, "elapsed_time": "0:07:05", "remaining_time": "0:47:27", "throughput": 15069.01, "total_tokens": 6409472}
|
|
{"current_steps": 2035, "total_steps": 15621, "loss": 0.7157, "lr": 1.9944656898715267e-06, "epoch": 0.13027334997759427, "percentage": 13.03, "elapsed_time": "0:07:05", "remaining_time": "0:47:23", "throughput": 15083.99, "total_tokens": 6424960}
|
|
{"current_steps": 2040, "total_steps": 15621, "loss": 0.6057, "lr": 1.994347676319867e-06, "epoch": 0.13059343191857115, "percentage": 13.06, "elapsed_time": "0:07:06", "remaining_time": "0:47:19", "throughput": 15098.35, "total_tokens": 6440000}
|
|
{"current_steps": 2045, "total_steps": 15621, "loss": 0.453, "lr": 1.994228421313459e-06, "epoch": 0.13091351385954805, "percentage": 13.09, "elapsed_time": "0:07:07", "remaining_time": "0:47:16", "throughput": 15116.11, "total_tokens": 6457600}
|
|
{"current_steps": 2050, "total_steps": 15621, "loss": 0.5143, "lr": 1.994107925001193e-06, "epoch": 0.13123359580052493, "percentage": 13.12, "elapsed_time": "0:07:07", "remaining_time": "0:47:12", "throughput": 15130.99, "total_tokens": 6473088}
|
|
{"current_steps": 2055, "total_steps": 15621, "loss": 0.6013, "lr": 1.9939861875335108e-06, "epoch": 0.1315536777415018, "percentage": 13.16, "elapsed_time": "0:07:08", "remaining_time": "0:47:07", "throughput": 15144.51, "total_tokens": 6487680}
|
|
{"current_steps": 2060, "total_steps": 15621, "loss": 0.4831, "lr": 1.9938632090624025e-06, "epoch": 0.13187375968247872, "percentage": 13.19, "elapsed_time": "0:07:08", "remaining_time": "0:47:04", "throughput": 15159.5, "total_tokens": 6503296}
|
|
{"current_steps": 2065, "total_steps": 15621, "loss": 0.5363, "lr": 1.9937389897414087e-06, "epoch": 0.1321938416234556, "percentage": 13.22, "elapsed_time": "0:07:09", "remaining_time": "0:47:00", "throughput": 15174.43, "total_tokens": 6518912}
|
|
{"current_steps": 2070, "total_steps": 15621, "loss": 0.5631, "lr": 1.993613529725618e-06, "epoch": 0.1325139235644325, "percentage": 13.25, "elapsed_time": "0:07:10", "remaining_time": "0:46:56", "throughput": 15189.53, "total_tokens": 6534784}
|
|
{"current_steps": 2075, "total_steps": 15621, "loss": 0.5248, "lr": 1.99348682917167e-06, "epoch": 0.13283400550540939, "percentage": 13.28, "elapsed_time": "0:07:10", "remaining_time": "0:46:52", "throughput": 15204.47, "total_tokens": 6550528}
|
|
{"current_steps": 2080, "total_steps": 15621, "loss": 0.5344, "lr": 1.99335888823775e-06, "epoch": 0.13315408744638627, "percentage": 13.32, "elapsed_time": "0:07:11", "remaining_time": "0:46:48", "throughput": 15219.15, "total_tokens": 6566144}
|
|
{"current_steps": 2085, "total_steps": 15621, "loss": 0.5605, "lr": 1.993229707083595e-06, "epoch": 0.13347416938736317, "percentage": 13.35, "elapsed_time": "0:07:12", "remaining_time": "0:46:45", "throughput": 15236.44, "total_tokens": 6583872}
|
|
{"current_steps": 2090, "total_steps": 15621, "loss": 0.4144, "lr": 1.993099285870489e-06, "epoch": 0.13379425132834005, "percentage": 13.38, "elapsed_time": "0:07:12", "remaining_time": "0:46:42", "throughput": 15254.61, "total_tokens": 6602304}
|
|
{"current_steps": 2095, "total_steps": 15621, "loss": 0.4607, "lr": 1.992967624761264e-06, "epoch": 0.13411433326931693, "percentage": 13.41, "elapsed_time": "0:07:13", "remaining_time": "0:46:38", "throughput": 15269.52, "total_tokens": 6618112}
|
|
{"current_steps": 2100, "total_steps": 15621, "loss": 0.6174, "lr": 1.9928347239203014e-06, "epoch": 0.13443441521029384, "percentage": 13.44, "elapsed_time": "0:07:14", "remaining_time": "0:46:34", "throughput": 15286.51, "total_tokens": 6635584}
|
|
{"current_steps": 2105, "total_steps": 15621, "loss": 0.5339, "lr": 1.9927005835135282e-06, "epoch": 0.13475449715127072, "percentage": 13.48, "elapsed_time": "0:07:14", "remaining_time": "0:46:31", "throughput": 15303.86, "total_tokens": 6653568}
|
|
{"current_steps": 2110, "total_steps": 15621, "loss": 0.4604, "lr": 1.9925652037084214e-06, "epoch": 0.13507457909224763, "percentage": 13.51, "elapsed_time": "0:07:15", "remaining_time": "0:46:27", "throughput": 15317.8, "total_tokens": 6668864}
|
|
{"current_steps": 2115, "total_steps": 15621, "loss": 0.4852, "lr": 1.9924285846740037e-06, "epoch": 0.1353946610332245, "percentage": 13.54, "elapsed_time": "0:07:15", "remaining_time": "0:46:24", "throughput": 15332.05, "total_tokens": 6684416}
|
|
{"current_steps": 2120, "total_steps": 15621, "loss": 0.5927, "lr": 1.9922907265808452e-06, "epoch": 0.13571474297420139, "percentage": 13.57, "elapsed_time": "0:07:16", "remaining_time": "0:46:20", "throughput": 15345.72, "total_tokens": 6699392}
|
|
{"current_steps": 2125, "total_steps": 15621, "loss": 0.5477, "lr": 1.9921516296010643e-06, "epoch": 0.1360348249151783, "percentage": 13.6, "elapsed_time": "0:07:17", "remaining_time": "0:46:16", "throughput": 15359.29, "total_tokens": 6714560}
|
|
{"current_steps": 2130, "total_steps": 15621, "loss": 0.5584, "lr": 1.9920112939083246e-06, "epoch": 0.13635490685615517, "percentage": 13.64, "elapsed_time": "0:07:17", "remaining_time": "0:46:12", "throughput": 15373.5, "total_tokens": 6729920}
|
|
{"current_steps": 2135, "total_steps": 15621, "loss": 0.5555, "lr": 1.9918697196778367e-06, "epoch": 0.13667498879713205, "percentage": 13.67, "elapsed_time": "0:07:18", "remaining_time": "0:46:08", "throughput": 15386.77, "total_tokens": 6744768}
|
|
{"current_steps": 2140, "total_steps": 15621, "loss": 0.4607, "lr": 1.9917269070863578e-06, "epoch": 0.13699507073810896, "percentage": 13.7, "elapsed_time": "0:07:18", "remaining_time": "0:46:05", "throughput": 15400.05, "total_tokens": 6759680}
|
|
{"current_steps": 2145, "total_steps": 15621, "loss": 0.5094, "lr": 1.9915828563121915e-06, "epoch": 0.13731515267908584, "percentage": 13.73, "elapsed_time": "0:07:19", "remaining_time": "0:46:01", "throughput": 15414.24, "total_tokens": 6775168}
|
|
{"current_steps": 2150, "total_steps": 15621, "loss": 0.5364, "lr": 1.9914375675351865e-06, "epoch": 0.13763523462006275, "percentage": 13.76, "elapsed_time": "0:07:20", "remaining_time": "0:45:57", "throughput": 15429.05, "total_tokens": 6791296}
|
|
{"current_steps": 2155, "total_steps": 15621, "loss": 0.43, "lr": 1.991291040936738e-06, "epoch": 0.13795531656103963, "percentage": 13.8, "elapsed_time": "0:07:20", "remaining_time": "0:45:54", "throughput": 15445.37, "total_tokens": 6808640}
|
|
{"current_steps": 2160, "total_steps": 15621, "loss": 0.6627, "lr": 1.9911432766997857e-06, "epoch": 0.1382753985020165, "percentage": 13.83, "elapsed_time": "0:07:21", "remaining_time": "0:45:50", "throughput": 15459.29, "total_tokens": 6824064}
|
|
{"current_steps": 2165, "total_steps": 15621, "loss": 0.4426, "lr": 1.990994275008815e-06, "epoch": 0.1385954804429934, "percentage": 13.86, "elapsed_time": "0:07:22", "remaining_time": "0:45:47", "throughput": 15473.86, "total_tokens": 6839872}
|
|
{"current_steps": 2170, "total_steps": 15621, "loss": 0.5081, "lr": 1.9908440360498565e-06, "epoch": 0.1389155623839703, "percentage": 13.89, "elapsed_time": "0:07:22", "remaining_time": "0:45:43", "throughput": 15487.97, "total_tokens": 6855744}
|
|
{"current_steps": 2175, "total_steps": 15621, "loss": 0.5566, "lr": 1.990692560010485e-06, "epoch": 0.1392356443249472, "percentage": 13.92, "elapsed_time": "0:07:23", "remaining_time": "0:45:39", "throughput": 15499.77, "total_tokens": 6869632}
|
|
{"current_steps": 2180, "total_steps": 15621, "loss": 0.448, "lr": 1.9905398470798206e-06, "epoch": 0.13955572626592408, "percentage": 13.96, "elapsed_time": "0:07:23", "remaining_time": "0:45:36", "throughput": 15514.09, "total_tokens": 6885696}
|
|
{"current_steps": 2185, "total_steps": 15621, "loss": 0.3634, "lr": 1.990385897448527e-06, "epoch": 0.13987580820690096, "percentage": 13.99, "elapsed_time": "0:07:24", "remaining_time": "0:45:32", "throughput": 15528.28, "total_tokens": 6901504}
|
|
{"current_steps": 2190, "total_steps": 15621, "loss": 0.5822, "lr": 1.9902307113088114e-06, "epoch": 0.14019589014787787, "percentage": 14.02, "elapsed_time": "0:07:25", "remaining_time": "0:45:29", "throughput": 15541.55, "total_tokens": 6916480}
|
|
{"current_steps": 2195, "total_steps": 15621, "loss": 0.4818, "lr": 1.9900742888544264e-06, "epoch": 0.14051597208885475, "percentage": 14.05, "elapsed_time": "0:07:25", "remaining_time": "0:45:25", "throughput": 15555.64, "total_tokens": 6932416}
|
|
{"current_steps": 2200, "total_steps": 15621, "loss": 0.534, "lr": 1.989916630280667e-06, "epoch": 0.14083605402983163, "percentage": 14.08, "elapsed_time": "0:07:26", "remaining_time": "0:45:22", "throughput": 15570.44, "total_tokens": 6948992}
|
|
{"current_steps": 2205, "total_steps": 15621, "loss": 0.4636, "lr": 1.989757735784372e-06, "epoch": 0.14115613597080853, "percentage": 14.12, "elapsed_time": "0:07:26", "remaining_time": "0:45:19", "throughput": 15584.04, "total_tokens": 6964416}
|
|
{"current_steps": 2210, "total_steps": 15621, "loss": 0.4218, "lr": 1.989597605563923e-06, "epoch": 0.1414762179117854, "percentage": 14.15, "elapsed_time": "0:07:27", "remaining_time": "0:45:15", "throughput": 15598.44, "total_tokens": 6980544}
|
|
{"current_steps": 2215, "total_steps": 15621, "loss": 0.5658, "lr": 1.9894362398192437e-06, "epoch": 0.14179629985276232, "percentage": 14.18, "elapsed_time": "0:07:28", "remaining_time": "0:45:12", "throughput": 15613.64, "total_tokens": 6997440}
|
|
{"current_steps": 2220, "total_steps": 15621, "loss": 0.4163, "lr": 1.9892736387518023e-06, "epoch": 0.1421163817937392, "percentage": 14.21, "elapsed_time": "0:07:28", "remaining_time": "0:45:08", "throughput": 15626.86, "total_tokens": 7012672}
|
|
{"current_steps": 2225, "total_steps": 15621, "loss": 0.4773, "lr": 1.9891098025646075e-06, "epoch": 0.14243646373471608, "percentage": 14.24, "elapsed_time": "0:07:29", "remaining_time": "0:45:05", "throughput": 15639.6, "total_tokens": 7027648}
|
|
{"current_steps": 2230, "total_steps": 15621, "loss": 0.5303, "lr": 1.9889447314622105e-06, "epoch": 0.142756545675693, "percentage": 14.28, "elapsed_time": "0:07:29", "remaining_time": "0:45:01", "throughput": 15653.05, "total_tokens": 7043200}
|
|
{"current_steps": 2235, "total_steps": 15621, "loss": 0.7152, "lr": 1.9887784256507046e-06, "epoch": 0.14307662761666987, "percentage": 14.31, "elapsed_time": "0:07:30", "remaining_time": "0:44:58", "throughput": 15666.24, "total_tokens": 7058688}
|
|
{"current_steps": 2240, "total_steps": 15621, "loss": 0.6679, "lr": 1.988610885337725e-06, "epoch": 0.14339670955764675, "percentage": 14.34, "elapsed_time": "0:07:31", "remaining_time": "0:44:55", "throughput": 15679.12, "total_tokens": 7074048}
|
|
{"current_steps": 2245, "total_steps": 15621, "loss": 0.5261, "lr": 1.9884421107324476e-06, "epoch": 0.14371679149862365, "percentage": 14.37, "elapsed_time": "0:07:31", "remaining_time": "0:44:51", "throughput": 15692.73, "total_tokens": 7089792}
|
|
{"current_steps": 2250, "total_steps": 15621, "loss": 0.4755, "lr": 1.9882721020455893e-06, "epoch": 0.14403687343960053, "percentage": 14.4, "elapsed_time": "0:07:32", "remaining_time": "0:44:48", "throughput": 15705.0, "total_tokens": 7104640}
|
|
{"current_steps": 2255, "total_steps": 15621, "loss": 0.5019, "lr": 1.988100859489408e-06, "epoch": 0.14435695538057744, "percentage": 14.44, "elapsed_time": "0:07:32", "remaining_time": "0:44:44", "throughput": 15718.1, "total_tokens": 7120064}
|
|
{"current_steps": 2260, "total_steps": 15621, "loss": 0.4754, "lr": 1.9879283832777017e-06, "epoch": 0.14467703732155432, "percentage": 14.47, "elapsed_time": "0:07:33", "remaining_time": "0:44:41", "throughput": 15730.81, "total_tokens": 7135232}
|
|
{"current_steps": 2265, "total_steps": 15621, "loss": 0.5075, "lr": 1.9877546736258096e-06, "epoch": 0.1449971192625312, "percentage": 14.5, "elapsed_time": "0:07:34", "remaining_time": "0:44:38", "throughput": 15742.6, "total_tokens": 7149632}
|
|
{"current_steps": 2270, "total_steps": 15621, "loss": 0.4107, "lr": 1.98757973075061e-06, "epoch": 0.1453172012035081, "percentage": 14.53, "elapsed_time": "0:07:34", "remaining_time": "0:44:34", "throughput": 15754.93, "total_tokens": 7164352}
|
|
{"current_steps": 2275, "total_steps": 15621, "loss": 0.5188, "lr": 1.987403554870521e-06, "epoch": 0.14563728314448499, "percentage": 14.56, "elapsed_time": "0:07:35", "remaining_time": "0:44:31", "throughput": 15767.85, "total_tokens": 7179776}
|
|
{"current_steps": 2280, "total_steps": 15621, "loss": 0.4212, "lr": 1.9872261462055003e-06, "epoch": 0.14595736508546187, "percentage": 14.6, "elapsed_time": "0:07:35", "remaining_time": "0:44:27", "throughput": 15779.62, "total_tokens": 7194240}
|
|
{"current_steps": 2285, "total_steps": 15621, "loss": 0.4335, "lr": 1.987047504977045e-06, "epoch": 0.14627744702643877, "percentage": 14.63, "elapsed_time": "0:07:36", "remaining_time": "0:44:24", "throughput": 15792.52, "total_tokens": 7209472}
|
|
{"current_steps": 2290, "total_steps": 15621, "loss": 0.414, "lr": 1.9868676314081902e-06, "epoch": 0.14659752896741565, "percentage": 14.66, "elapsed_time": "0:07:37", "remaining_time": "0:44:21", "throughput": 15805.72, "total_tokens": 7225088}
|
|
{"current_steps": 2295, "total_steps": 15621, "loss": 0.6901, "lr": 1.9866865257235107e-06, "epoch": 0.14691761090839256, "percentage": 14.69, "elapsed_time": "0:07:37", "remaining_time": "0:44:17", "throughput": 15818.9, "total_tokens": 7240704}
|
|
{"current_steps": 2300, "total_steps": 15621, "loss": 0.4177, "lr": 1.9865041881491188e-06, "epoch": 0.14723769284936944, "percentage": 14.72, "elapsed_time": "0:07:38", "remaining_time": "0:44:14", "throughput": 15832.07, "total_tokens": 7256000}
|
|
{"current_steps": 2305, "total_steps": 15621, "loss": 0.6016, "lr": 1.9863206189126653e-06, "epoch": 0.14755777479034632, "percentage": 14.76, "elapsed_time": "0:07:38", "remaining_time": "0:44:10", "throughput": 15843.56, "total_tokens": 7270336}
|
|
{"current_steps": 2310, "total_steps": 15621, "loss": 0.5612, "lr": 1.9861358182433382e-06, "epoch": 0.14787785673132323, "percentage": 14.79, "elapsed_time": "0:07:39", "remaining_time": "0:44:07", "throughput": 15856.1, "total_tokens": 7285440}
|
|
{"current_steps": 2315, "total_steps": 15621, "loss": 0.4711, "lr": 1.9859497863718634e-06, "epoch": 0.1481979386723001, "percentage": 14.82, "elapsed_time": "0:07:40", "remaining_time": "0:44:04", "throughput": 15868.93, "total_tokens": 7301120}
|
|
{"current_steps": 2320, "total_steps": 15621, "loss": 0.5204, "lr": 1.985762523530504e-06, "epoch": 0.14851802061327699, "percentage": 14.85, "elapsed_time": "0:07:40", "remaining_time": "0:44:01", "throughput": 15881.29, "total_tokens": 7316416}
|
|
{"current_steps": 2325, "total_steps": 15621, "loss": 0.5051, "lr": 1.98557402995306e-06, "epoch": 0.1488381025542539, "percentage": 14.88, "elapsed_time": "0:07:41", "remaining_time": "0:43:58", "throughput": 15894.45, "total_tokens": 7332160}
|
|
{"current_steps": 2330, "total_steps": 15621, "loss": 0.7069, "lr": 1.985384305874868e-06, "epoch": 0.14915818449523077, "percentage": 14.92, "elapsed_time": "0:07:41", "remaining_time": "0:43:54", "throughput": 15907.3, "total_tokens": 7347776}
|
|
{"current_steps": 2335, "total_steps": 15621, "loss": 0.5467, "lr": 1.9851933515328e-06, "epoch": 0.14947826643620768, "percentage": 14.95, "elapsed_time": "0:07:42", "remaining_time": "0:43:51", "throughput": 15919.83, "total_tokens": 7363200}
|
|
{"current_steps": 2340, "total_steps": 15621, "loss": 0.4699, "lr": 1.985001167165265e-06, "epoch": 0.14979834837718456, "percentage": 14.98, "elapsed_time": "0:07:43", "remaining_time": "0:43:48", "throughput": 15932.71, "total_tokens": 7378752}
|
|
{"current_steps": 2345, "total_steps": 15621, "loss": 0.5165, "lr": 1.984807753012208e-06, "epoch": 0.15011843031816144, "percentage": 15.01, "elapsed_time": "0:07:43", "remaining_time": "0:43:45", "throughput": 15945.07, "total_tokens": 7393984}
|
|
{"current_steps": 2346, "total_steps": 15621, "eval_loss": 0.5076366662979126, "epoch": 0.15018244670635683, "percentage": 15.02, "elapsed_time": "0:08:32", "remaining_time": "0:48:22", "throughput": 14419.3, "total_tokens": 7397056}
|
|
{"current_steps": 2350, "total_steps": 15621, "loss": 0.5902, "lr": 1.9846131093151086e-06, "epoch": 0.15043851225913835, "percentage": 15.04, "elapsed_time": "0:09:16", "remaining_time": "0:52:24", "throughput": 13306.39, "total_tokens": 7408832}
|
|
{"current_steps": 2355, "total_steps": 15621, "loss": 0.4582, "lr": 1.9844172363169808e-06, "epoch": 0.15075859420011523, "percentage": 15.08, "elapsed_time": "0:09:17", "remaining_time": "0:52:19", "throughput": 13318.28, "total_tokens": 7423040}
|
|
{"current_steps": 2360, "total_steps": 15621, "loss": 0.5117, "lr": 1.9842201342623756e-06, "epoch": 0.15107867614109213, "percentage": 15.11, "elapsed_time": "0:09:17", "remaining_time": "0:52:15", "throughput": 13331.44, "total_tokens": 7438464}
|
|
{"current_steps": 2365, "total_steps": 15621, "loss": 0.5205, "lr": 1.9840218033973766e-06, "epoch": 0.151398758082069, "percentage": 15.14, "elapsed_time": "0:09:18", "remaining_time": "0:52:10", "throughput": 13344.42, "total_tokens": 7453824}
|
|
{"current_steps": 2370, "total_steps": 15621, "loss": 0.5717, "lr": 1.9838222439696027e-06, "epoch": 0.1517188400230459, "percentage": 15.17, "elapsed_time": "0:09:19", "remaining_time": "0:52:06", "throughput": 13357.56, "total_tokens": 7469312}
|
|
{"current_steps": 2375, "total_steps": 15621, "loss": 0.7065, "lr": 1.9836214562282058e-06, "epoch": 0.1520389219640228, "percentage": 15.2, "elapsed_time": "0:09:19", "remaining_time": "0:52:02", "throughput": 13371.13, "total_tokens": 7485120}
|
|
{"current_steps": 2380, "total_steps": 15621, "loss": 0.4971, "lr": 1.9834194404238715e-06, "epoch": 0.15235900390499968, "percentage": 15.24, "elapsed_time": "0:09:20", "remaining_time": "0:51:57", "throughput": 13384.16, "total_tokens": 7500416}
|
|
{"current_steps": 2385, "total_steps": 15621, "loss": 0.4125, "lr": 1.9832161968088193e-06, "epoch": 0.15267908584597656, "percentage": 15.27, "elapsed_time": "0:09:21", "remaining_time": "0:51:53", "throughput": 13398.2, "total_tokens": 7516672}
|
|
{"current_steps": 2390, "total_steps": 15621, "loss": 0.4764, "lr": 1.9830117256368015e-06, "epoch": 0.15299916778695347, "percentage": 15.3, "elapsed_time": "0:09:21", "remaining_time": "0:51:49", "throughput": 13412.07, "total_tokens": 7532800}
|
|
{"current_steps": 2395, "total_steps": 15621, "loss": 0.4924, "lr": 1.982806027163102e-06, "epoch": 0.15331924972793035, "percentage": 15.33, "elapsed_time": "0:09:22", "remaining_time": "0:51:44", "throughput": 13424.66, "total_tokens": 7547776}
|
|
{"current_steps": 2400, "total_steps": 15621, "loss": 0.5579, "lr": 1.9825991016445386e-06, "epoch": 0.15363933166890725, "percentage": 15.36, "elapsed_time": "0:09:22", "remaining_time": "0:51:40", "throughput": 13436.85, "total_tokens": 7562496}
|
|
{"current_steps": 2405, "total_steps": 15621, "loss": 0.5286, "lr": 1.9823909493394594e-06, "epoch": 0.15395941360988413, "percentage": 15.4, "elapsed_time": "0:09:23", "remaining_time": "0:51:36", "throughput": 13449.68, "total_tokens": 7577920}
|
|
{"current_steps": 2410, "total_steps": 15621, "loss": 0.5331, "lr": 1.9821815705077455e-06, "epoch": 0.154279495550861, "percentage": 15.43, "elapsed_time": "0:09:24", "remaining_time": "0:51:31", "throughput": 13462.55, "total_tokens": 7593216}
|
|
{"current_steps": 2415, "total_steps": 15621, "loss": 0.5768, "lr": 1.9819709654108087e-06, "epoch": 0.15459957749183792, "percentage": 15.46, "elapsed_time": "0:09:24", "remaining_time": "0:51:27", "throughput": 13474.85, "total_tokens": 7608192}
|
|
{"current_steps": 2420, "total_steps": 15621, "loss": 0.4652, "lr": 1.981759134311592e-06, "epoch": 0.1549196594328148, "percentage": 15.49, "elapsed_time": "0:09:25", "remaining_time": "0:51:23", "throughput": 13488.66, "total_tokens": 7624448}
|
|
{"current_steps": 2425, "total_steps": 15621, "loss": 0.4847, "lr": 1.981546077474569e-06, "epoch": 0.15523974137379168, "percentage": 15.52, "elapsed_time": "0:09:25", "remaining_time": "0:51:19", "throughput": 13501.86, "total_tokens": 7640192}
|
|
{"current_steps": 2430, "total_steps": 15621, "loss": 0.5143, "lr": 1.981331795165744e-06, "epoch": 0.15555982331476859, "percentage": 15.56, "elapsed_time": "0:09:26", "remaining_time": "0:51:14", "throughput": 13514.01, "total_tokens": 7654848}
|
|
{"current_steps": 2435, "total_steps": 15621, "loss": 0.6067, "lr": 1.9811162876526498e-06, "epoch": 0.15587990525574547, "percentage": 15.59, "elapsed_time": "0:09:27", "remaining_time": "0:51:10", "throughput": 13527.39, "total_tokens": 7670848}
|
|
{"current_steps": 2440, "total_steps": 15621, "loss": 0.6387, "lr": 1.9808995552043515e-06, "epoch": 0.15619998719672237, "percentage": 15.62, "elapsed_time": "0:09:27", "remaining_time": "0:51:06", "throughput": 13539.94, "total_tokens": 7686016}
|
|
{"current_steps": 2445, "total_steps": 15621, "loss": 0.5478, "lr": 1.9806815980914413e-06, "epoch": 0.15652006913769925, "percentage": 15.65, "elapsed_time": "0:09:28", "remaining_time": "0:51:02", "throughput": 13553.12, "total_tokens": 7701760}
|
|
{"current_steps": 2450, "total_steps": 15621, "loss": 0.5624, "lr": 1.9804624165860417e-06, "epoch": 0.15684015107867613, "percentage": 15.68, "elapsed_time": "0:09:28", "remaining_time": "0:50:58", "throughput": 13566.41, "total_tokens": 7717760}
|
|
{"current_steps": 2455, "total_steps": 15621, "loss": 0.3852, "lr": 1.9802420109618028e-06, "epoch": 0.15716023301965304, "percentage": 15.72, "elapsed_time": "0:09:29", "remaining_time": "0:50:54", "throughput": 13579.22, "total_tokens": 7733376}
|
|
{"current_steps": 2460, "total_steps": 15621, "loss": 0.4984, "lr": 1.980020381493904e-06, "epoch": 0.15748031496062992, "percentage": 15.75, "elapsed_time": "0:09:30", "remaining_time": "0:50:50", "throughput": 13593.65, "total_tokens": 7750464}
|
|
{"current_steps": 2465, "total_steps": 15621, "loss": 0.4942, "lr": 1.979797528459052e-06, "epoch": 0.1578003969016068, "percentage": 15.78, "elapsed_time": "0:09:30", "remaining_time": "0:50:46", "throughput": 13609.27, "total_tokens": 7768576}
|
|
{"current_steps": 2470, "total_steps": 15621, "loss": 0.5334, "lr": 1.979573452135482e-06, "epoch": 0.1581204788425837, "percentage": 15.81, "elapsed_time": "0:09:31", "remaining_time": "0:50:42", "throughput": 13622.28, "total_tokens": 7784256}
|
|
{"current_steps": 2475, "total_steps": 15621, "loss": 0.3186, "lr": 1.979348152802955e-06, "epoch": 0.15844056078356059, "percentage": 15.84, "elapsed_time": "0:09:32", "remaining_time": "0:50:38", "throughput": 13634.46, "total_tokens": 7799232}
|
|
{"current_steps": 2480, "total_steps": 15621, "loss": 0.592, "lr": 1.979121630742761e-06, "epoch": 0.1587606427245375, "percentage": 15.88, "elapsed_time": "0:09:32", "remaining_time": "0:50:34", "throughput": 13647.52, "total_tokens": 7815040}
|
|
{"current_steps": 2485, "total_steps": 15621, "loss": 0.4479, "lr": 1.9788938862377146e-06, "epoch": 0.15908072466551437, "percentage": 15.91, "elapsed_time": "0:09:33", "remaining_time": "0:50:30", "throughput": 13660.01, "total_tokens": 7830400}
|
|
{"current_steps": 2490, "total_steps": 15621, "loss": 0.4818, "lr": 1.9786649195721577e-06, "epoch": 0.15940080660649125, "percentage": 15.94, "elapsed_time": "0:09:33", "remaining_time": "0:50:26", "throughput": 13672.93, "total_tokens": 7846336}
|
|
{"current_steps": 2495, "total_steps": 15621, "loss": 0.6323, "lr": 1.978434731031958e-06, "epoch": 0.15972088854746816, "percentage": 15.97, "elapsed_time": "0:09:34", "remaining_time": "0:50:22", "throughput": 13686.35, "total_tokens": 7862528}
|
|
{"current_steps": 2500, "total_steps": 15621, "loss": 0.4541, "lr": 1.9782033209045085e-06, "epoch": 0.16004097048844504, "percentage": 16.0, "elapsed_time": "0:09:35", "remaining_time": "0:50:18", "throughput": 13700.84, "total_tokens": 7880000}
|
|
{"current_steps": 2505, "total_steps": 15621, "loss": 0.4053, "lr": 1.977970689478727e-06, "epoch": 0.16036105242942192, "percentage": 16.04, "elapsed_time": "0:09:35", "remaining_time": "0:50:14", "throughput": 13713.23, "total_tokens": 7895296}
|
|
{"current_steps": 2510, "total_steps": 15621, "loss": 0.5884, "lr": 1.9777368370450577e-06, "epoch": 0.16068113437039883, "percentage": 16.07, "elapsed_time": "0:09:36", "remaining_time": "0:50:10", "throughput": 13725.99, "total_tokens": 7911104}
|
|
{"current_steps": 2515, "total_steps": 15621, "loss": 0.521, "lr": 1.9775017638954674e-06, "epoch": 0.1610012163113757, "percentage": 16.1, "elapsed_time": "0:09:36", "remaining_time": "0:50:06", "throughput": 13737.84, "total_tokens": 7925952}
|
|
{"current_steps": 2520, "total_steps": 15621, "loss": 0.5943, "lr": 1.9772654703234476e-06, "epoch": 0.1613212982523526, "percentage": 16.13, "elapsed_time": "0:09:37", "remaining_time": "0:50:02", "throughput": 13749.62, "total_tokens": 7940928}
|
|
{"current_steps": 2525, "total_steps": 15621, "loss": 0.5665, "lr": 1.977027956624014e-06, "epoch": 0.1616413801933295, "percentage": 16.16, "elapsed_time": "0:09:38", "remaining_time": "0:49:58", "throughput": 13760.74, "total_tokens": 7955200}
|
|
{"current_steps": 2530, "total_steps": 15621, "loss": 0.5819, "lr": 1.9767892230937046e-06, "epoch": 0.16196146213430637, "percentage": 16.2, "elapsed_time": "0:09:38", "remaining_time": "0:49:54", "throughput": 13773.26, "total_tokens": 7970944}
|
|
{"current_steps": 2535, "total_steps": 15621, "loss": 0.4311, "lr": 1.976549270030581e-06, "epoch": 0.16228154407528328, "percentage": 16.23, "elapsed_time": "0:09:39", "remaining_time": "0:49:50", "throughput": 13785.08, "total_tokens": 7985856}
|
|
{"current_steps": 2540, "total_steps": 15621, "loss": 0.4678, "lr": 1.9763080977342286e-06, "epoch": 0.16260162601626016, "percentage": 16.26, "elapsed_time": "0:09:39", "remaining_time": "0:49:46", "throughput": 13796.11, "total_tokens": 8001088}
|
|
{"current_steps": 2545, "total_steps": 15621, "loss": 0.4965, "lr": 1.9760657065057527e-06, "epoch": 0.16292170795723707, "percentage": 16.29, "elapsed_time": "0:09:40", "remaining_time": "0:49:43", "throughput": 13809.56, "total_tokens": 8017856}
|
|
{"current_steps": 2550, "total_steps": 15621, "loss": 0.4527, "lr": 1.975822096647782e-06, "epoch": 0.16324178989821395, "percentage": 16.32, "elapsed_time": "0:09:41", "remaining_time": "0:49:39", "throughput": 13822.32, "total_tokens": 8033792}
|
|
{"current_steps": 2555, "total_steps": 15621, "loss": 0.4821, "lr": 1.975577268464466e-06, "epoch": 0.16356187183919083, "percentage": 16.36, "elapsed_time": "0:09:41", "remaining_time": "0:49:35", "throughput": 13833.33, "total_tokens": 8048256}
|
|
{"current_steps": 2560, "total_steps": 15621, "loss": 0.5626, "lr": 1.9753312222614765e-06, "epoch": 0.16388195378016773, "percentage": 16.39, "elapsed_time": "0:09:42", "remaining_time": "0:49:31", "throughput": 13845.51, "total_tokens": 8063680}
|
|
{"current_steps": 2565, "total_steps": 15621, "loss": 0.4853, "lr": 1.9750839583460036e-06, "epoch": 0.1642020357211446, "percentage": 16.42, "elapsed_time": "0:09:43", "remaining_time": "0:49:27", "throughput": 13858.27, "total_tokens": 8079744}
|
|
{"current_steps": 2570, "total_steps": 15621, "loss": 0.502, "lr": 1.9748354770267603e-06, "epoch": 0.1645221176621215, "percentage": 16.45, "elapsed_time": "0:09:43", "remaining_time": "0:49:23", "throughput": 13869.94, "total_tokens": 8094656}
|
|
{"current_steps": 2575, "total_steps": 15621, "loss": 0.5116, "lr": 1.9745857786139777e-06, "epoch": 0.1648421996030984, "percentage": 16.48, "elapsed_time": "0:09:44", "remaining_time": "0:49:19", "throughput": 13882.48, "total_tokens": 8110528}
|
|
{"current_steps": 2580, "total_steps": 15621, "loss": 0.6028, "lr": 1.974334863419408e-06, "epoch": 0.16516228154407528, "percentage": 16.52, "elapsed_time": "0:09:44", "remaining_time": "0:49:16", "throughput": 13895.19, "total_tokens": 8126720}
|
|
{"current_steps": 2585, "total_steps": 15621, "loss": 0.518, "lr": 1.9740827317563212e-06, "epoch": 0.1654823634850522, "percentage": 16.55, "elapsed_time": "0:09:45", "remaining_time": "0:49:12", "throughput": 13906.34, "total_tokens": 8141312}
|
|
{"current_steps": 2590, "total_steps": 15621, "loss": 0.4889, "lr": 1.973829383939507e-06, "epoch": 0.16580244542602907, "percentage": 16.58, "elapsed_time": "0:09:46", "remaining_time": "0:49:08", "throughput": 13918.18, "total_tokens": 8156736}
|
|
{"current_steps": 2595, "total_steps": 15621, "loss": 0.4987, "lr": 1.973574820285273e-06, "epoch": 0.16612252736700595, "percentage": 16.61, "elapsed_time": "0:09:46", "remaining_time": "0:49:04", "throughput": 13930.52, "total_tokens": 8172480}
|
|
{"current_steps": 2600, "total_steps": 15621, "loss": 0.5702, "lr": 1.9733190411114443e-06, "epoch": 0.16644260930798285, "percentage": 16.64, "elapsed_time": "0:09:47", "remaining_time": "0:49:01", "throughput": 13942.68, "total_tokens": 8188224}
|
|
{"current_steps": 2605, "total_steps": 15621, "loss": 0.438, "lr": 1.9730620467373654e-06, "epoch": 0.16676269124895973, "percentage": 16.68, "elapsed_time": "0:09:47", "remaining_time": "0:48:57", "throughput": 13955.37, "total_tokens": 8204352}
|
|
{"current_steps": 2610, "total_steps": 15621, "loss": 0.5744, "lr": 1.9728038374838958e-06, "epoch": 0.1670827731899366, "percentage": 16.71, "elapsed_time": "0:09:48", "remaining_time": "0:48:53", "throughput": 13966.83, "total_tokens": 8219328}
|
|
{"current_steps": 2615, "total_steps": 15621, "loss": 0.3913, "lr": 1.972544413673413e-06, "epoch": 0.16740285513091352, "percentage": 16.74, "elapsed_time": "0:09:49", "remaining_time": "0:48:49", "throughput": 13978.34, "total_tokens": 8234560}
|
|
{"current_steps": 2620, "total_steps": 15621, "loss": 0.5779, "lr": 1.9722837756298108e-06, "epoch": 0.1677229370718904, "percentage": 16.77, "elapsed_time": "0:09:49", "remaining_time": "0:48:46", "throughput": 13989.41, "total_tokens": 8249344}
|
|
{"current_steps": 2625, "total_steps": 15621, "loss": 0.5548, "lr": 1.972021923678499e-06, "epoch": 0.1680430190128673, "percentage": 16.8, "elapsed_time": "0:09:50", "remaining_time": "0:48:42", "throughput": 14001.85, "total_tokens": 8265600}
|
|
{"current_steps": 2630, "total_steps": 15621, "loss": 0.4861, "lr": 1.971758858146403e-06, "epoch": 0.16836310095384419, "percentage": 16.84, "elapsed_time": "0:09:50", "remaining_time": "0:48:38", "throughput": 14012.94, "total_tokens": 8280384}
|
|
{"current_steps": 2635, "total_steps": 15621, "loss": 0.4897, "lr": 1.9714945793619626e-06, "epoch": 0.16868318289482107, "percentage": 16.87, "elapsed_time": "0:09:51", "remaining_time": "0:48:35", "throughput": 14024.36, "total_tokens": 8295744}
|
|
{"current_steps": 2640, "total_steps": 15621, "loss": 0.5052, "lr": 1.971229087655133e-06, "epoch": 0.16900326483579797, "percentage": 16.9, "elapsed_time": "0:09:52", "remaining_time": "0:48:31", "throughput": 14036.43, "total_tokens": 8311680}
|
|
{"current_steps": 2645, "total_steps": 15621, "loss": 0.4678, "lr": 1.9709623833573842e-06, "epoch": 0.16932334677677485, "percentage": 16.93, "elapsed_time": "0:09:52", "remaining_time": "0:48:27", "throughput": 14047.57, "total_tokens": 8326592}
|
|
{"current_steps": 2650, "total_steps": 15621, "loss": 0.4588, "lr": 1.9706944668016994e-06, "epoch": 0.16964342871775173, "percentage": 16.96, "elapsed_time": "0:09:53", "remaining_time": "0:48:24", "throughput": 14058.78, "total_tokens": 8341632}
|
|
{"current_steps": 2655, "total_steps": 15621, "loss": 0.4627, "lr": 1.9704253383225756e-06, "epoch": 0.16996351065872864, "percentage": 17.0, "elapsed_time": "0:09:53", "remaining_time": "0:48:20", "throughput": 14071.62, "total_tokens": 8358400}
|
|
{"current_steps": 2660, "total_steps": 15621, "loss": 0.4845, "lr": 1.970154998256023e-06, "epoch": 0.17028359259970552, "percentage": 17.03, "elapsed_time": "0:09:54", "remaining_time": "0:48:17", "throughput": 14083.65, "total_tokens": 8374144}
|
|
{"current_steps": 2665, "total_steps": 15621, "loss": 0.4215, "lr": 1.9698834469395644e-06, "epoch": 0.17060367454068243, "percentage": 17.06, "elapsed_time": "0:09:55", "remaining_time": "0:48:13", "throughput": 14095.08, "total_tokens": 8389440}
|
|
{"current_steps": 2670, "total_steps": 15621, "loss": 0.5408, "lr": 1.969610684712234e-06, "epoch": 0.1709237564816593, "percentage": 17.09, "elapsed_time": "0:09:55", "remaining_time": "0:48:09", "throughput": 14106.47, "total_tokens": 8404672}
|
|
{"current_steps": 2675, "total_steps": 15621, "loss": 0.5508, "lr": 1.9693367119145794e-06, "epoch": 0.17124383842263619, "percentage": 17.12, "elapsed_time": "0:09:56", "remaining_time": "0:48:06", "throughput": 14117.94, "total_tokens": 8420096}
|
|
{"current_steps": 2680, "total_steps": 15621, "loss": 0.6684, "lr": 1.969061528888659e-06, "epoch": 0.1715639203636131, "percentage": 17.16, "elapsed_time": "0:09:57", "remaining_time": "0:48:02", "throughput": 14130.24, "total_tokens": 8436288}
|
|
{"current_steps": 2685, "total_steps": 15621, "loss": 0.5401, "lr": 1.9687851359780415e-06, "epoch": 0.17188400230458997, "percentage": 17.19, "elapsed_time": "0:09:57", "remaining_time": "0:47:59", "throughput": 14142.7, "total_tokens": 8452672}
|
|
{"current_steps": 2690, "total_steps": 15621, "loss": 0.4867, "lr": 1.968507533527807e-06, "epoch": 0.17220408424556685, "percentage": 17.22, "elapsed_time": "0:09:58", "remaining_time": "0:47:56", "throughput": 14155.12, "total_tokens": 8469120}
|
|
{"current_steps": 2695, "total_steps": 15621, "loss": 0.4748, "lr": 1.9682287218845455e-06, "epoch": 0.17252416618654376, "percentage": 17.25, "elapsed_time": "0:09:58", "remaining_time": "0:47:52", "throughput": 14166.61, "total_tokens": 8484736}
|
|
{"current_steps": 2700, "total_steps": 15621, "loss": 0.7367, "lr": 1.967948701396356e-06, "epoch": 0.17284424812752064, "percentage": 17.28, "elapsed_time": "0:09:59", "remaining_time": "0:47:49", "throughput": 14178.37, "total_tokens": 8500480}
|
|
{"current_steps": 2705, "total_steps": 15621, "loss": 0.3977, "lr": 1.9676674724128485e-06, "epoch": 0.17316433006849755, "percentage": 17.32, "elapsed_time": "0:10:00", "remaining_time": "0:47:45", "throughput": 14188.43, "total_tokens": 8514624}
|
|
{"current_steps": 2710, "total_steps": 15621, "loss": 0.4543, "lr": 1.9673850352851397e-06, "epoch": 0.17348441200947443, "percentage": 17.35, "elapsed_time": "0:10:00", "remaining_time": "0:47:41", "throughput": 14199.24, "total_tokens": 8529664}
|
|
{"current_steps": 2715, "total_steps": 15621, "loss": 0.5825, "lr": 1.967101390365856e-06, "epoch": 0.1738044939504513, "percentage": 17.38, "elapsed_time": "0:10:01", "remaining_time": "0:47:38", "throughput": 14210.96, "total_tokens": 8545280}
|
|
{"current_steps": 2720, "total_steps": 15621, "loss": 0.492, "lr": 1.966816538009131e-06, "epoch": 0.1741245758914282, "percentage": 17.41, "elapsed_time": "0:10:01", "remaining_time": "0:47:34", "throughput": 14222.14, "total_tokens": 8560384}
|
|
{"current_steps": 2725, "total_steps": 15621, "loss": 0.5425, "lr": 1.966530478570607e-06, "epoch": 0.1744446578324051, "percentage": 17.44, "elapsed_time": "0:10:02", "remaining_time": "0:47:31", "throughput": 14234.67, "total_tokens": 8576960}
|
|
{"current_steps": 2730, "total_steps": 15621, "loss": 0.4635, "lr": 1.9662432124074325e-06, "epoch": 0.174764739773382, "percentage": 17.48, "elapsed_time": "0:10:03", "remaining_time": "0:47:28", "throughput": 14245.85, "total_tokens": 8592384}
|
|
{"current_steps": 2735, "total_steps": 15621, "loss": 0.4836, "lr": 1.965954739878262e-06, "epoch": 0.17508482171435888, "percentage": 17.51, "elapsed_time": "0:10:03", "remaining_time": "0:47:24", "throughput": 14258.33, "total_tokens": 8609024}
|
|
{"current_steps": 2740, "total_steps": 15621, "loss": 0.4283, "lr": 1.965665061343257e-06, "epoch": 0.17540490365533576, "percentage": 17.54, "elapsed_time": "0:10:04", "remaining_time": "0:47:21", "throughput": 14270.02, "total_tokens": 8624768}
|
|
{"current_steps": 2745, "total_steps": 15621, "loss": 0.4646, "lr": 1.965374177164085e-06, "epoch": 0.17572498559631267, "percentage": 17.57, "elapsed_time": "0:10:05", "remaining_time": "0:47:17", "throughput": 14281.43, "total_tokens": 8640448}
|
|
{"current_steps": 2750, "total_steps": 15621, "loss": 0.5427, "lr": 1.9650820877039182e-06, "epoch": 0.17604506753728955, "percentage": 17.6, "elapsed_time": "0:10:05", "remaining_time": "0:47:14", "throughput": 14292.16, "total_tokens": 8655296}
|
|
{"current_steps": 2755, "total_steps": 15621, "loss": 0.4878, "lr": 1.9647887933274334e-06, "epoch": 0.17636514947826643, "percentage": 17.64, "elapsed_time": "0:10:06", "remaining_time": "0:47:11", "throughput": 14304.36, "total_tokens": 8671872}
|
|
{"current_steps": 2760, "total_steps": 15621, "loss": 0.4822, "lr": 1.9644942944008124e-06, "epoch": 0.17668523141924333, "percentage": 17.67, "elapsed_time": "0:10:06", "remaining_time": "0:47:07", "throughput": 14316.11, "total_tokens": 8687680}
|
|
{"current_steps": 2765, "total_steps": 15621, "loss": 0.5914, "lr": 1.96419859129174e-06, "epoch": 0.1770053133602202, "percentage": 17.7, "elapsed_time": "0:10:07", "remaining_time": "0:47:04", "throughput": 14326.99, "total_tokens": 8702912}
|
|
{"current_steps": 2770, "total_steps": 15621, "loss": 0.4702, "lr": 1.963901684369406e-06, "epoch": 0.17732539530119712, "percentage": 17.73, "elapsed_time": "0:10:08", "remaining_time": "0:47:00", "throughput": 14338.04, "total_tokens": 8718144}
|
|
{"current_steps": 2775, "total_steps": 15621, "loss": 0.4989, "lr": 1.9636035740045013e-06, "epoch": 0.177645477242174, "percentage": 17.76, "elapsed_time": "0:10:08", "remaining_time": "0:46:57", "throughput": 14348.77, "total_tokens": 8732992}
|
|
{"current_steps": 2780, "total_steps": 15621, "loss": 0.6024, "lr": 1.9633042605692207e-06, "epoch": 0.17796555918315088, "percentage": 17.8, "elapsed_time": "0:10:09", "remaining_time": "0:46:54", "throughput": 14360.47, "total_tokens": 8749056}
|
|
{"current_steps": 2785, "total_steps": 15621, "loss": 0.4879, "lr": 1.9630037444372597e-06, "epoch": 0.17828564112412779, "percentage": 17.83, "elapsed_time": "0:10:09", "remaining_time": "0:46:50", "throughput": 14372.21, "total_tokens": 8765184}
|
|
{"current_steps": 2790, "total_steps": 15621, "loss": 0.4133, "lr": 1.9627020259838177e-06, "epoch": 0.17860572306510467, "percentage": 17.86, "elapsed_time": "0:10:10", "remaining_time": "0:46:47", "throughput": 14383.2, "total_tokens": 8780480}
|
|
{"current_steps": 2795, "total_steps": 15621, "loss": 0.5539, "lr": 1.9623991055855925e-06, "epoch": 0.17892580500608155, "percentage": 17.89, "elapsed_time": "0:10:11", "remaining_time": "0:46:44", "throughput": 14394.69, "total_tokens": 8796352}
|
|
{"current_steps": 2800, "total_steps": 15621, "loss": 0.443, "lr": 1.962094983620784e-06, "epoch": 0.17924588694705845, "percentage": 17.92, "elapsed_time": "0:10:11", "remaining_time": "0:46:40", "throughput": 14404.56, "total_tokens": 8810688}
|
|
{"current_steps": 2805, "total_steps": 15621, "loss": 0.4279, "lr": 1.9617896604690925e-06, "epoch": 0.17956596888803533, "percentage": 17.96, "elapsed_time": "0:10:12", "remaining_time": "0:46:37", "throughput": 14415.71, "total_tokens": 8826304}
|
|
{"current_steps": 2810, "total_steps": 15621, "loss": 0.4628, "lr": 1.961483136511717e-06, "epoch": 0.17988605082901224, "percentage": 17.99, "elapsed_time": "0:10:12", "remaining_time": "0:46:34", "throughput": 14426.25, "total_tokens": 8841344}
|
|
{"current_steps": 2815, "total_steps": 15621, "loss": 0.6058, "lr": 1.9611754121313567e-06, "epoch": 0.18020613276998912, "percentage": 18.02, "elapsed_time": "0:10:13", "remaining_time": "0:46:30", "throughput": 14438.2, "total_tokens": 8857664}
|
|
{"current_steps": 2820, "total_steps": 15621, "loss": 0.5762, "lr": 1.960866487712209e-06, "epoch": 0.180526214710966, "percentage": 18.05, "elapsed_time": "0:10:14", "remaining_time": "0:46:27", "throughput": 14449.35, "total_tokens": 8873408}
|
|
{"current_steps": 2825, "total_steps": 15621, "loss": 0.425, "lr": 1.9605563636399695e-06, "epoch": 0.1808462966519429, "percentage": 18.08, "elapsed_time": "0:10:14", "remaining_time": "0:46:24", "throughput": 14460.95, "total_tokens": 8889472}
|
|
{"current_steps": 2830, "total_steps": 15621, "loss": 0.5908, "lr": 1.9602450403018315e-06, "epoch": 0.18116637859291979, "percentage": 18.12, "elapsed_time": "0:10:15", "remaining_time": "0:46:21", "throughput": 14471.6, "total_tokens": 8904640}
|
|
{"current_steps": 2835, "total_steps": 15621, "loss": 0.4446, "lr": 1.9599325180864864e-06, "epoch": 0.18148646053389667, "percentage": 18.15, "elapsed_time": "0:10:15", "remaining_time": "0:46:17", "throughput": 14482.17, "total_tokens": 8919680}
|
|
{"current_steps": 2840, "total_steps": 15621, "loss": 0.4418, "lr": 1.9596187973841216e-06, "epoch": 0.18180654247487357, "percentage": 18.18, "elapsed_time": "0:10:16", "remaining_time": "0:46:14", "throughput": 14493.31, "total_tokens": 8935360}
|
|
{"current_steps": 2845, "total_steps": 15621, "loss": 0.4892, "lr": 1.959303878586421e-06, "epoch": 0.18212662441585045, "percentage": 18.21, "elapsed_time": "0:10:17", "remaining_time": "0:46:11", "throughput": 14504.95, "total_tokens": 8951552}
|
|
{"current_steps": 2850, "total_steps": 15621, "loss": 0.5694, "lr": 1.9589877620865647e-06, "epoch": 0.18244670635682736, "percentage": 18.24, "elapsed_time": "0:10:17", "remaining_time": "0:46:08", "throughput": 14517.28, "total_tokens": 8968576}
|
|
{"current_steps": 2855, "total_steps": 15621, "loss": 0.4559, "lr": 1.9586704482792277e-06, "epoch": 0.18276678829780424, "percentage": 18.28, "elapsed_time": "0:10:18", "remaining_time": "0:46:05", "throughput": 14527.88, "total_tokens": 8983744}
|
|
{"current_steps": 2860, "total_steps": 15621, "loss": 0.4376, "lr": 1.95835193756058e-06, "epoch": 0.18308687023878112, "percentage": 18.31, "elapsed_time": "0:10:18", "remaining_time": "0:46:01", "throughput": 14538.36, "total_tokens": 8999040}
|
|
{"current_steps": 2865, "total_steps": 15621, "loss": 0.4186, "lr": 1.9580322303282858e-06, "epoch": 0.18340695217975803, "percentage": 18.34, "elapsed_time": "0:10:19", "remaining_time": "0:45:58", "throughput": 14550.43, "total_tokens": 9015872}
|
|
{"current_steps": 2870, "total_steps": 15621, "loss": 0.4001, "lr": 1.9577113269815038e-06, "epoch": 0.1837270341207349, "percentage": 18.37, "elapsed_time": "0:10:20", "remaining_time": "0:45:55", "throughput": 14561.67, "total_tokens": 9031744}
|
|
{"current_steps": 2875, "total_steps": 15621, "loss": 0.5877, "lr": 1.957389227920885e-06, "epoch": 0.18404711606171179, "percentage": 18.4, "elapsed_time": "0:10:20", "remaining_time": "0:45:52", "throughput": 14572.88, "total_tokens": 9047872}
|
|
{"current_steps": 2880, "total_steps": 15621, "loss": 0.5101, "lr": 1.957065933548574e-06, "epoch": 0.1843671980026887, "percentage": 18.44, "elapsed_time": "0:10:21", "remaining_time": "0:45:49", "throughput": 14583.23, "total_tokens": 9062976}
|
|
{"current_steps": 2885, "total_steps": 15621, "loss": 0.5899, "lr": 1.956741444268208e-06, "epoch": 0.18468727994366557, "percentage": 18.47, "elapsed_time": "0:10:22", "remaining_time": "0:45:46", "throughput": 14593.81, "total_tokens": 9078208}
|
|
{"current_steps": 2890, "total_steps": 15621, "loss": 0.4744, "lr": 1.9564157604849154e-06, "epoch": 0.18500736188464248, "percentage": 18.5, "elapsed_time": "0:10:22", "remaining_time": "0:45:43", "throughput": 14605.27, "total_tokens": 9094720}
|
|
{"current_steps": 2895, "total_steps": 15621, "loss": 0.5274, "lr": 1.9560888826053163e-06, "epoch": 0.18532744382561936, "percentage": 18.53, "elapsed_time": "0:10:23", "remaining_time": "0:45:39", "throughput": 14616.09, "total_tokens": 9110336}
|
|
{"current_steps": 2900, "total_steps": 15621, "loss": 0.5573, "lr": 1.9557608110375212e-06, "epoch": 0.18564752576659624, "percentage": 18.56, "elapsed_time": "0:10:23", "remaining_time": "0:45:36", "throughput": 14627.7, "total_tokens": 9126912}
|
|
{"current_steps": 2905, "total_steps": 15621, "loss": 0.549, "lr": 1.955431546191132e-06, "epoch": 0.18596760770757315, "percentage": 18.6, "elapsed_time": "0:10:24", "remaining_time": "0:45:33", "throughput": 14638.41, "total_tokens": 9142400}
|
|
{"current_steps": 2910, "total_steps": 15621, "loss": 0.5161, "lr": 1.95510108847724e-06, "epoch": 0.18628768964855003, "percentage": 18.63, "elapsed_time": "0:10:25", "remaining_time": "0:45:30", "throughput": 14648.35, "total_tokens": 9157184}
|
|
{"current_steps": 2915, "total_steps": 15621, "loss": 0.5237, "lr": 1.954769438308424e-06, "epoch": 0.1866077715895269, "percentage": 18.66, "elapsed_time": "0:10:25", "remaining_time": "0:45:27", "throughput": 14659.75, "total_tokens": 9173696}
|
|
{"current_steps": 2920, "total_steps": 15621, "loss": 0.4992, "lr": 1.954436596098754e-06, "epoch": 0.1869278535305038, "percentage": 18.69, "elapsed_time": "0:10:26", "remaining_time": "0:45:24", "throughput": 14670.89, "total_tokens": 9190080}
|
|
{"current_steps": 2925, "total_steps": 15621, "loss": 0.5761, "lr": 1.9541025622637875e-06, "epoch": 0.1872479354714807, "percentage": 18.72, "elapsed_time": "0:10:26", "remaining_time": "0:45:21", "throughput": 14680.2, "total_tokens": 9204352}
|
|
{"current_steps": 2930, "total_steps": 15621, "loss": 0.6098, "lr": 1.95376733722057e-06, "epoch": 0.1875680174124576, "percentage": 18.76, "elapsed_time": "0:10:27", "remaining_time": "0:45:18", "throughput": 14690.18, "total_tokens": 9219200}
|
|
{"current_steps": 2935, "total_steps": 15621, "loss": 0.4702, "lr": 1.9534309213876337e-06, "epoch": 0.18788809935343448, "percentage": 18.79, "elapsed_time": "0:10:28", "remaining_time": "0:45:15", "throughput": 14699.74, "total_tokens": 9233600}
|
|
{"current_steps": 2940, "total_steps": 15621, "loss": 0.4343, "lr": 1.953093315184997e-06, "epoch": 0.18820818129441136, "percentage": 18.82, "elapsed_time": "0:10:28", "remaining_time": "0:45:12", "throughput": 14710.7, "total_tokens": 9249536}
|
|
{"current_steps": 2945, "total_steps": 15621, "loss": 0.6391, "lr": 1.952754519034166e-06, "epoch": 0.18852826323538827, "percentage": 18.85, "elapsed_time": "0:10:29", "remaining_time": "0:45:08", "throughput": 14720.5, "total_tokens": 9264256}
|
|
{"current_steps": 2950, "total_steps": 15621, "loss": 0.4487, "lr": 1.9524145333581313e-06, "epoch": 0.18884834517636515, "percentage": 18.88, "elapsed_time": "0:10:29", "remaining_time": "0:45:05", "throughput": 14730.7, "total_tokens": 9279488}
|
|
{"current_steps": 2955, "total_steps": 15621, "loss": 0.5122, "lr": 1.952073358581369e-06, "epoch": 0.18916842711734205, "percentage": 18.92, "elapsed_time": "0:10:30", "remaining_time": "0:45:02", "throughput": 14740.55, "total_tokens": 9294336}
|
|
{"current_steps": 2960, "total_steps": 15621, "loss": 0.5552, "lr": 1.95173099512984e-06, "epoch": 0.18948850905831893, "percentage": 18.95, "elapsed_time": "0:10:31", "remaining_time": "0:44:59", "throughput": 14750.61, "total_tokens": 9309376}
|
|
{"current_steps": 2965, "total_steps": 15621, "loss": 0.4579, "lr": 1.9513874434309894e-06, "epoch": 0.1898085909992958, "percentage": 18.98, "elapsed_time": "0:10:31", "remaining_time": "0:44:56", "throughput": 14760.33, "total_tokens": 9324224}
|
|
{"current_steps": 2970, "total_steps": 15621, "loss": 0.4466, "lr": 1.951042703913745e-06, "epoch": 0.19012867294027272, "percentage": 19.01, "elapsed_time": "0:10:32", "remaining_time": "0:44:53", "throughput": 14770.17, "total_tokens": 9339136}
|
|
{"current_steps": 2975, "total_steps": 15621, "loss": 0.4491, "lr": 1.950696777008518e-06, "epoch": 0.1904487548812496, "percentage": 19.04, "elapsed_time": "0:10:32", "remaining_time": "0:44:50", "throughput": 14780.57, "total_tokens": 9354688}
|
|
{"current_steps": 2980, "total_steps": 15621, "loss": 0.4917, "lr": 1.9503496631472025e-06, "epoch": 0.19076883682222648, "percentage": 19.08, "elapsed_time": "0:10:33", "remaining_time": "0:44:47", "throughput": 14790.36, "total_tokens": 9369664}
|
|
{"current_steps": 2985, "total_steps": 15621, "loss": 0.6324, "lr": 1.9500013627631746e-06, "epoch": 0.19108891876320339, "percentage": 19.11, "elapsed_time": "0:10:34", "remaining_time": "0:44:44", "throughput": 14800.31, "total_tokens": 9384768}
|
|
{"current_steps": 2990, "total_steps": 15621, "loss": 0.3728, "lr": 1.949651876291291e-06, "epoch": 0.19140900070418027, "percentage": 19.14, "elapsed_time": "0:10:34", "remaining_time": "0:44:41", "throughput": 14810.71, "total_tokens": 9400320}
|
|
{"current_steps": 2995, "total_steps": 15621, "loss": 0.4739, "lr": 1.9493012041678894e-06, "epoch": 0.19172908264515717, "percentage": 19.17, "elapsed_time": "0:10:35", "remaining_time": "0:44:40", "throughput": 14811.23, "total_tokens": 9415872}
|
|
{"current_steps": 3000, "total_steps": 15621, "loss": 0.6013, "lr": 1.9489493468307883e-06, "epoch": 0.19204916458613405, "percentage": 19.2, "elapsed_time": "0:10:36", "remaining_time": "0:44:37", "throughput": 14822.75, "total_tokens": 9432704}
|
|
{"current_steps": 3005, "total_steps": 15621, "loss": 0.5159, "lr": 1.948596304719286e-06, "epoch": 0.19236924652711093, "percentage": 19.24, "elapsed_time": "0:10:36", "remaining_time": "0:44:34", "throughput": 14832.88, "total_tokens": 9448192}
|
|
{"current_steps": 3010, "total_steps": 15621, "loss": 0.4322, "lr": 1.9482420782741594e-06, "epoch": 0.19268932846808784, "percentage": 19.27, "elapsed_time": "0:10:37", "remaining_time": "0:44:31", "throughput": 14843.85, "total_tokens": 9464576}
|
|
{"current_steps": 3015, "total_steps": 15621, "loss": 0.5546, "lr": 1.9478866679376647e-06, "epoch": 0.19300941040906472, "percentage": 19.3, "elapsed_time": "0:10:38", "remaining_time": "0:44:28", "throughput": 14853.97, "total_tokens": 9479936}
|
|
{"current_steps": 3020, "total_steps": 15621, "loss": 0.5447, "lr": 1.9475300741535353e-06, "epoch": 0.1933294923500416, "percentage": 19.33, "elapsed_time": "0:10:38", "remaining_time": "0:44:25", "throughput": 14865.81, "total_tokens": 9497280}
|
|
{"current_steps": 3025, "total_steps": 15621, "loss": 0.4568, "lr": 1.9471722973669833e-06, "epoch": 0.1936495742910185, "percentage": 19.36, "elapsed_time": "0:10:39", "remaining_time": "0:44:22", "throughput": 14877.62, "total_tokens": 9514496}
|
|
{"current_steps": 3030, "total_steps": 15621, "loss": 0.3932, "lr": 1.946813338024697e-06, "epoch": 0.19396965623199539, "percentage": 19.4, "elapsed_time": "0:10:40", "remaining_time": "0:44:19", "throughput": 14887.37, "total_tokens": 9529536}
|
|
{"current_steps": 3035, "total_steps": 15621, "loss": 0.526, "lr": 1.9464531965748414e-06, "epoch": 0.1942897381729723, "percentage": 19.43, "elapsed_time": "0:10:40", "remaining_time": "0:44:17", "throughput": 14897.94, "total_tokens": 9545472}
|
|
{"current_steps": 3040, "total_steps": 15621, "loss": 0.585, "lr": 1.9460918734670573e-06, "epoch": 0.19460982011394917, "percentage": 19.46, "elapsed_time": "0:10:41", "remaining_time": "0:44:14", "throughput": 14907.95, "total_tokens": 9560960}
|
|
{"current_steps": 3045, "total_steps": 15621, "loss": 0.5221, "lr": 1.945729369152461e-06, "epoch": 0.19492990205492605, "percentage": 19.49, "elapsed_time": "0:10:41", "remaining_time": "0:44:11", "throughput": 14917.71, "total_tokens": 9576320}
|
|
{"current_steps": 3050, "total_steps": 15621, "loss": 0.5632, "lr": 1.945365684083643e-06, "epoch": 0.19524998399590296, "percentage": 19.52, "elapsed_time": "0:10:42", "remaining_time": "0:44:08", "throughput": 14927.86, "total_tokens": 9592192}
|
|
{"current_steps": 3055, "total_steps": 15621, "loss": 0.6164, "lr": 1.945000818714668e-06, "epoch": 0.19557006593687984, "percentage": 19.56, "elapsed_time": "0:10:43", "remaining_time": "0:44:05", "throughput": 14938.28, "total_tokens": 9608128}
|
|
{"current_steps": 3060, "total_steps": 15621, "loss": 0.5338, "lr": 1.944634773501076e-06, "epoch": 0.19589014787785672, "percentage": 19.59, "elapsed_time": "0:10:43", "remaining_time": "0:44:02", "throughput": 14948.6, "total_tokens": 9623872}
|
|
{"current_steps": 3065, "total_steps": 15621, "loss": 0.5496, "lr": 1.9442675488998783e-06, "epoch": 0.19621022981883363, "percentage": 19.62, "elapsed_time": "0:10:44", "remaining_time": "0:43:59", "throughput": 14958.52, "total_tokens": 9639488}
|
|
{"current_steps": 3070, "total_steps": 15621, "loss": 0.4913, "lr": 1.9438991453695587e-06, "epoch": 0.1965303117598105, "percentage": 19.65, "elapsed_time": "0:10:45", "remaining_time": "0:43:57", "throughput": 14968.87, "total_tokens": 9655680}
|
|
{"current_steps": 3075, "total_steps": 15621, "loss": 0.5489, "lr": 1.943529563370073e-06, "epoch": 0.1968503937007874, "percentage": 19.69, "elapsed_time": "0:10:45", "remaining_time": "0:43:54", "throughput": 14978.2, "total_tokens": 9670400}
|
|
{"current_steps": 3080, "total_steps": 15621, "loss": 0.3868, "lr": 1.9431588033628495e-06, "epoch": 0.1971704756417643, "percentage": 19.72, "elapsed_time": "0:10:46", "remaining_time": "0:43:51", "throughput": 14987.71, "total_tokens": 9685504}
|
|
{"current_steps": 3085, "total_steps": 15621, "loss": 0.635, "lr": 1.9427868658107862e-06, "epoch": 0.19749055758274117, "percentage": 19.75, "elapsed_time": "0:10:46", "remaining_time": "0:43:48", "throughput": 14998.4, "total_tokens": 9701952}
|
|
{"current_steps": 3090, "total_steps": 15621, "loss": 0.4485, "lr": 1.942413751178251e-06, "epoch": 0.19781063952371808, "percentage": 19.78, "elapsed_time": "0:10:47", "remaining_time": "0:43:45", "throughput": 15007.91, "total_tokens": 9716928}
|
|
{"current_steps": 3095, "total_steps": 15621, "loss": 0.6516, "lr": 1.9420394599310826e-06, "epoch": 0.19813072146469496, "percentage": 19.81, "elapsed_time": "0:10:48", "remaining_time": "0:43:42", "throughput": 15017.46, "total_tokens": 9732096}
|
|
{"current_steps": 3100, "total_steps": 15621, "loss": 0.5307, "lr": 1.941663992536588e-06, "epoch": 0.19845080340567184, "percentage": 19.85, "elapsed_time": "0:10:48", "remaining_time": "0:43:39", "throughput": 15027.24, "total_tokens": 9747648}
|
|
{"current_steps": 3105, "total_steps": 15621, "loss": 0.4371, "lr": 1.941287349463542e-06, "epoch": 0.19877088534664875, "percentage": 19.88, "elapsed_time": "0:10:49", "remaining_time": "0:43:37", "throughput": 15036.88, "total_tokens": 9763072}
|
|
{"current_steps": 3110, "total_steps": 15621, "loss": 0.4726, "lr": 1.940909531182188e-06, "epoch": 0.19909096728762563, "percentage": 19.91, "elapsed_time": "0:10:49", "remaining_time": "0:43:34", "throughput": 15046.35, "total_tokens": 9778176}
|
|
{"current_steps": 3115, "total_steps": 15621, "loss": 0.6129, "lr": 1.9405305381642375e-06, "epoch": 0.19941104922860253, "percentage": 19.94, "elapsed_time": "0:10:50", "remaining_time": "0:43:31", "throughput": 15056.15, "total_tokens": 9793536}
|
|
{"current_steps": 3120, "total_steps": 15621, "loss": 0.4986, "lr": 1.9401503708828665e-06, "epoch": 0.1997311311695794, "percentage": 19.97, "elapsed_time": "0:10:51", "remaining_time": "0:43:28", "throughput": 15065.29, "total_tokens": 9808192}
|
|
{"current_steps": 3125, "total_steps": 15621, "loss": 0.5774, "lr": 1.939769029812719e-06, "epoch": 0.2000512131105563, "percentage": 20.01, "elapsed_time": "0:10:51", "remaining_time": "0:43:25", "throughput": 15074.63, "total_tokens": 9823232}
|
|
{"current_steps": 3128, "total_steps": 15621, "eval_loss": 0.48840755224227905, "epoch": 0.20024326227514244, "percentage": 20.02, "elapsed_time": "0:11:41", "remaining_time": "0:46:40", "throughput": 14022.09, "total_tokens": 9832064}
|
|
{"current_steps": 3130, "total_steps": 15621, "loss": 0.5893, "lr": 1.939386515429904e-06, "epoch": 0.2003712950515332, "percentage": 20.04, "elapsed_time": "0:12:14", "remaining_time": "0:48:51", "throughput": 13395.05, "total_tokens": 9839488}
|
|
{"current_steps": 3135, "total_steps": 15621, "loss": 0.421, "lr": 1.9390028282119942e-06, "epoch": 0.20069137699251008, "percentage": 20.07, "elapsed_time": "0:12:15", "remaining_time": "0:48:48", "throughput": 13406.17, "total_tokens": 9856192}
|
|
{"current_steps": 3140, "total_steps": 15621, "loss": 0.5122, "lr": 1.938617968638029e-06, "epoch": 0.201011458933487, "percentage": 20.1, "elapsed_time": "0:12:15", "remaining_time": "0:48:44", "throughput": 13415.9, "total_tokens": 9871552}
|
|
{"current_steps": 3145, "total_steps": 15621, "loss": 0.5077, "lr": 1.938231937188509e-06, "epoch": 0.20133154087446387, "percentage": 20.13, "elapsed_time": "0:12:16", "remaining_time": "0:48:41", "throughput": 13425.03, "total_tokens": 9886016}
|
|
{"current_steps": 3150, "total_steps": 15621, "loss": 0.6156, "lr": 1.9378447343453995e-06, "epoch": 0.20165162281544075, "percentage": 20.17, "elapsed_time": "0:12:17", "remaining_time": "0:48:38", "throughput": 13436.7, "total_tokens": 9903552}
|
|
{"current_steps": 3155, "total_steps": 15621, "loss": 0.3458, "lr": 1.9374563605921275e-06, "epoch": 0.20197170475641765, "percentage": 20.2, "elapsed_time": "0:12:17", "remaining_time": "0:48:34", "throughput": 13447.73, "total_tokens": 9920320}
|
|
{"current_steps": 3160, "total_steps": 15621, "loss": 0.5926, "lr": 1.937066816413582e-06, "epoch": 0.20229178669739453, "percentage": 20.23, "elapsed_time": "0:12:18", "remaining_time": "0:48:31", "throughput": 13457.85, "total_tokens": 9935936}
|
|
{"current_steps": 3165, "total_steps": 15621, "loss": 0.4757, "lr": 1.9366761022961146e-06, "epoch": 0.2026118686383714, "percentage": 20.26, "elapsed_time": "0:12:18", "remaining_time": "0:48:27", "throughput": 13467.24, "total_tokens": 9950912}
|
|
{"current_steps": 3170, "total_steps": 15621, "loss": 0.5615, "lr": 1.9362842187275354e-06, "epoch": 0.20293195057934832, "percentage": 20.29, "elapsed_time": "0:12:19", "remaining_time": "0:48:24", "throughput": 13476.81, "total_tokens": 9966080}
|
|
{"current_steps": 3175, "total_steps": 15621, "loss": 0.4789, "lr": 1.9358911661971155e-06, "epoch": 0.2032520325203252, "percentage": 20.33, "elapsed_time": "0:12:20", "remaining_time": "0:48:21", "throughput": 13487.08, "total_tokens": 9982080}
|
|
{"current_steps": 3180, "total_steps": 15621, "loss": 0.4647, "lr": 1.9354969451955864e-06, "epoch": 0.2035721144613021, "percentage": 20.36, "elapsed_time": "0:12:20", "remaining_time": "0:48:17", "throughput": 13496.09, "total_tokens": 9996544}
|
|
{"current_steps": 3185, "total_steps": 15621, "loss": 0.5497, "lr": 1.9351015562151375e-06, "epoch": 0.20389219640227899, "percentage": 20.39, "elapsed_time": "0:12:21", "remaining_time": "0:48:14", "throughput": 13505.8, "total_tokens": 10011776}
|
|
{"current_steps": 3190, "total_steps": 15621, "loss": 0.4331, "lr": 1.934704999749416e-06, "epoch": 0.20421227834325587, "percentage": 20.42, "elapsed_time": "0:12:21", "remaining_time": "0:48:11", "throughput": 13515.61, "total_tokens": 10027264}
|
|
{"current_steps": 3195, "total_steps": 15621, "loss": 0.4203, "lr": 1.9343072762935274e-06, "epoch": 0.20453236028423277, "percentage": 20.45, "elapsed_time": "0:12:22", "remaining_time": "0:48:07", "throughput": 13525.24, "total_tokens": 10042432}
|
|
{"current_steps": 3200, "total_steps": 15621, "loss": 0.4135, "lr": 1.933908386344035e-06, "epoch": 0.20485244222520965, "percentage": 20.49, "elapsed_time": "0:12:23", "remaining_time": "0:48:04", "throughput": 13534.8, "total_tokens": 10057792}
|
|
{"current_steps": 3205, "total_steps": 15621, "loss": 0.5222, "lr": 1.9335083303989565e-06, "epoch": 0.20517252416618653, "percentage": 20.52, "elapsed_time": "0:12:23", "remaining_time": "0:48:01", "throughput": 13546.0, "total_tokens": 10074752}
|
|
{"current_steps": 3210, "total_steps": 15621, "loss": 0.576, "lr": 1.9331071089577674e-06, "epoch": 0.20549260610716344, "percentage": 20.55, "elapsed_time": "0:12:24", "remaining_time": "0:47:57", "throughput": 13556.25, "total_tokens": 10090752}
|
|
{"current_steps": 3215, "total_steps": 15621, "loss": 0.4961, "lr": 1.9327047225213963e-06, "epoch": 0.20581268804814032, "percentage": 20.58, "elapsed_time": "0:12:24", "remaining_time": "0:47:54", "throughput": 13565.89, "total_tokens": 10106240}
|
|
{"current_steps": 3220, "total_steps": 15621, "loss": 0.4128, "lr": 1.9323011715922283e-06, "epoch": 0.20613276998911723, "percentage": 20.61, "elapsed_time": "0:12:25", "remaining_time": "0:47:51", "throughput": 13575.7, "total_tokens": 10121856}
|
|
{"current_steps": 3225, "total_steps": 15621, "loss": 0.4764, "lr": 1.931896456674101e-06, "epoch": 0.2064528519300941, "percentage": 20.65, "elapsed_time": "0:12:26", "remaining_time": "0:47:48", "throughput": 13585.52, "total_tokens": 10137408}
|
|
{"current_steps": 3230, "total_steps": 15621, "loss": 0.4548, "lr": 1.931490578272306e-06, "epoch": 0.20677293387107099, "percentage": 20.68, "elapsed_time": "0:12:26", "remaining_time": "0:47:44", "throughput": 13594.98, "total_tokens": 10152640}
|
|
{"current_steps": 3235, "total_steps": 15621, "loss": 0.3538, "lr": 1.9310835368935867e-06, "epoch": 0.2070930158120479, "percentage": 20.71, "elapsed_time": "0:12:27", "remaining_time": "0:47:41", "throughput": 13604.45, "total_tokens": 10167936}
|
|
{"current_steps": 3240, "total_steps": 15621, "loss": 0.4205, "lr": 1.93067533304614e-06, "epoch": 0.20741309775302477, "percentage": 20.74, "elapsed_time": "0:12:27", "remaining_time": "0:47:38", "throughput": 13614.12, "total_tokens": 10183360}
|
|
{"current_steps": 3245, "total_steps": 15621, "loss": 0.5557, "lr": 1.9302659672396128e-06, "epoch": 0.20773317969400165, "percentage": 20.77, "elapsed_time": "0:12:28", "remaining_time": "0:47:34", "throughput": 13623.39, "total_tokens": 10198208}
|
|
{"current_steps": 3250, "total_steps": 15621, "loss": 0.4903, "lr": 1.9298554399851025e-06, "epoch": 0.20805326163497856, "percentage": 20.81, "elapsed_time": "0:12:29", "remaining_time": "0:47:31", "throughput": 13632.9, "total_tokens": 10213568}
|
|
{"current_steps": 3255, "total_steps": 15621, "loss": 0.4833, "lr": 1.929443751795158e-06, "epoch": 0.20837334357595544, "percentage": 20.84, "elapsed_time": "0:12:29", "remaining_time": "0:47:28", "throughput": 13643.36, "total_tokens": 10230080}
|
|
{"current_steps": 3260, "total_steps": 15621, "loss": 0.4759, "lr": 1.929030903183776e-06, "epoch": 0.20869342551693235, "percentage": 20.87, "elapsed_time": "0:12:30", "remaining_time": "0:47:25", "throughput": 13654.0, "total_tokens": 10246912}
|
|
{"current_steps": 3265, "total_steps": 15621, "loss": 0.5368, "lr": 1.9286168946664033e-06, "epoch": 0.20901350745790923, "percentage": 20.9, "elapsed_time": "0:12:31", "remaining_time": "0:47:22", "throughput": 13663.56, "total_tokens": 10262464}
|
|
{"current_steps": 3270, "total_steps": 15621, "loss": 0.6679, "lr": 1.9282017267599352e-06, "epoch": 0.2093335893988861, "percentage": 20.93, "elapsed_time": "0:12:31", "remaining_time": "0:47:19", "throughput": 13673.14, "total_tokens": 10278016}
|
|
{"current_steps": 3275, "total_steps": 15621, "loss": 0.5054, "lr": 1.9277853999827125e-06, "epoch": 0.209653671339863, "percentage": 20.97, "elapsed_time": "0:12:32", "remaining_time": "0:47:16", "throughput": 13682.95, "total_tokens": 10293824}
|
|
{"current_steps": 3280, "total_steps": 15621, "loss": 0.5116, "lr": 1.9273679148545244e-06, "epoch": 0.2099737532808399, "percentage": 21.0, "elapsed_time": "0:12:32", "remaining_time": "0:47:12", "throughput": 13692.8, "total_tokens": 10309568}
|
|
{"current_steps": 3285, "total_steps": 15621, "loss": 0.4229, "lr": 1.9269492718966062e-06, "epoch": 0.21029383522181677, "percentage": 21.03, "elapsed_time": "0:12:33", "remaining_time": "0:47:09", "throughput": 13702.86, "total_tokens": 10325696}
|
|
{"current_steps": 3290, "total_steps": 15621, "loss": 0.5261, "lr": 1.9265294716316384e-06, "epoch": 0.21061391716279368, "percentage": 21.06, "elapsed_time": "0:12:34", "remaining_time": "0:47:06", "throughput": 13713.04, "total_tokens": 10342016}
|
|
{"current_steps": 3295, "total_steps": 15621, "loss": 0.4688, "lr": 1.926108514583747e-06, "epoch": 0.21093399910377056, "percentage": 21.09, "elapsed_time": "0:12:34", "remaining_time": "0:47:03", "throughput": 13722.54, "total_tokens": 10357632}
|
|
{"current_steps": 3300, "total_steps": 15621, "loss": 0.4801, "lr": 1.925686401278501e-06, "epoch": 0.21125408104474747, "percentage": 21.13, "elapsed_time": "0:12:35", "remaining_time": "0:47:00", "throughput": 13731.96, "total_tokens": 10373056}
|
|
{"current_steps": 3305, "total_steps": 15621, "loss": 0.6373, "lr": 1.9252631322429143e-06, "epoch": 0.21157416298572435, "percentage": 21.16, "elapsed_time": "0:12:36", "remaining_time": "0:46:57", "throughput": 13742.05, "total_tokens": 10389248}
|
|
{"current_steps": 3310, "total_steps": 15621, "loss": 0.439, "lr": 1.9248387080054435e-06, "epoch": 0.21189424492670123, "percentage": 21.19, "elapsed_time": "0:12:36", "remaining_time": "0:46:54", "throughput": 13751.68, "total_tokens": 10404864}
|
|
{"current_steps": 3315, "total_steps": 15621, "loss": 0.4878, "lr": 1.9244131290959864e-06, "epoch": 0.21221432686767813, "percentage": 21.22, "elapsed_time": "0:12:37", "remaining_time": "0:46:51", "throughput": 13761.03, "total_tokens": 10420416}
|
|
{"current_steps": 3320, "total_steps": 15621, "loss": 0.4244, "lr": 1.9239863960458845e-06, "epoch": 0.212534408808655, "percentage": 21.25, "elapsed_time": "0:12:37", "remaining_time": "0:46:47", "throughput": 13769.99, "total_tokens": 10435456}
|
|
{"current_steps": 3325, "total_steps": 15621, "loss": 0.4881, "lr": 1.923558509387918e-06, "epoch": 0.21285449074963192, "percentage": 21.29, "elapsed_time": "0:12:38", "remaining_time": "0:46:44", "throughput": 13779.88, "total_tokens": 10451584}
|
|
{"current_steps": 3330, "total_steps": 15621, "loss": 0.3745, "lr": 1.9231294696563086e-06, "epoch": 0.2131745726906088, "percentage": 21.32, "elapsed_time": "0:12:39", "remaining_time": "0:46:41", "throughput": 13789.71, "total_tokens": 10467584}
|
|
{"current_steps": 3335, "total_steps": 15621, "loss": 0.4146, "lr": 1.922699277386718e-06, "epoch": 0.21349465463158568, "percentage": 21.35, "elapsed_time": "0:12:39", "remaining_time": "0:46:38", "throughput": 13799.17, "total_tokens": 10483264}
|
|
{"current_steps": 3340, "total_steps": 15621, "loss": 0.5865, "lr": 1.9222679331162454e-06, "epoch": 0.21381473657256259, "percentage": 21.38, "elapsed_time": "0:12:40", "remaining_time": "0:46:35", "throughput": 13808.34, "total_tokens": 10498560}
|
|
{"current_steps": 3345, "total_steps": 15621, "loss": 0.4515, "lr": 1.92183543738343e-06, "epoch": 0.21413481851353947, "percentage": 21.41, "elapsed_time": "0:12:40", "remaining_time": "0:46:32", "throughput": 13817.71, "total_tokens": 10514176}
|
|
{"current_steps": 3350, "total_steps": 15621, "loss": 0.4363, "lr": 1.9214017907282475e-06, "epoch": 0.21445490045451635, "percentage": 21.45, "elapsed_time": "0:12:41", "remaining_time": "0:46:29", "throughput": 13827.24, "total_tokens": 10529792}
|
|
{"current_steps": 3355, "total_steps": 15621, "loss": 0.4809, "lr": 1.9209669936921105e-06, "epoch": 0.21477498239549325, "percentage": 21.48, "elapsed_time": "0:12:42", "remaining_time": "0:46:26", "throughput": 13836.94, "total_tokens": 10545856}
|
|
{"current_steps": 3360, "total_steps": 15621, "loss": 0.4092, "lr": 1.920531046817869e-06, "epoch": 0.21509506433647013, "percentage": 21.51, "elapsed_time": "0:12:42", "remaining_time": "0:46:23", "throughput": 13846.95, "total_tokens": 10562368}
|
|
{"current_steps": 3365, "total_steps": 15621, "loss": 0.6238, "lr": 1.9200939506498067e-06, "epoch": 0.21541514627744704, "percentage": 21.54, "elapsed_time": "0:12:43", "remaining_time": "0:46:20", "throughput": 13855.72, "total_tokens": 10577280}
|
|
{"current_steps": 3370, "total_steps": 15621, "loss": 0.5817, "lr": 1.9196557057336446e-06, "epoch": 0.21573522821842392, "percentage": 21.57, "elapsed_time": "0:12:43", "remaining_time": "0:46:17", "throughput": 13864.71, "total_tokens": 10592384}
|
|
{"current_steps": 3375, "total_steps": 15621, "loss": 0.4498, "lr": 1.9192163126165354e-06, "epoch": 0.2160553101594008, "percentage": 21.61, "elapsed_time": "0:12:44", "remaining_time": "0:46:14", "throughput": 13874.42, "total_tokens": 10608704}
|
|
{"current_steps": 3380, "total_steps": 15621, "loss": 0.3997, "lr": 1.9187757718470673e-06, "epoch": 0.2163753921003777, "percentage": 21.64, "elapsed_time": "0:12:45", "remaining_time": "0:46:11", "throughput": 13884.35, "total_tokens": 10625280}
|
|
{"current_steps": 3385, "total_steps": 15621, "loss": 0.5339, "lr": 1.9183340839752606e-06, "epoch": 0.21669547404135459, "percentage": 21.67, "elapsed_time": "0:12:45", "remaining_time": "0:46:08", "throughput": 13893.9, "total_tokens": 10641152}
|
|
{"current_steps": 3390, "total_steps": 15621, "loss": 0.4193, "lr": 1.9178912495525672e-06, "epoch": 0.21701555598233147, "percentage": 21.7, "elapsed_time": "0:12:46", "remaining_time": "0:46:05", "throughput": 13903.56, "total_tokens": 10657472}
|
|
{"current_steps": 3395, "total_steps": 15621, "loss": 0.5054, "lr": 1.917447269131872e-06, "epoch": 0.21733563792330837, "percentage": 21.73, "elapsed_time": "0:12:47", "remaining_time": "0:46:02", "throughput": 13913.27, "total_tokens": 10673600}
|
|
{"current_steps": 3400, "total_steps": 15621, "loss": 0.5693, "lr": 1.917002143267489e-06, "epoch": 0.21765571986428525, "percentage": 21.77, "elapsed_time": "0:12:47", "remaining_time": "0:45:59", "throughput": 13922.56, "total_tokens": 10689344}
|
|
{"current_steps": 3405, "total_steps": 15621, "loss": 0.4478, "lr": 1.9165558725151633e-06, "epoch": 0.21797580180526216, "percentage": 21.8, "elapsed_time": "0:12:48", "remaining_time": "0:45:56", "throughput": 13931.3, "total_tokens": 10704384}
|
|
{"current_steps": 3410, "total_steps": 15621, "loss": 0.5002, "lr": 1.9161084574320692e-06, "epoch": 0.21829588374623904, "percentage": 21.83, "elapsed_time": "0:12:48", "remaining_time": "0:45:53", "throughput": 13941.0, "total_tokens": 10720512}
|
|
{"current_steps": 3415, "total_steps": 15621, "loss": 0.4727, "lr": 1.91565989857681e-06, "epoch": 0.21861596568721592, "percentage": 21.86, "elapsed_time": "0:12:49", "remaining_time": "0:45:50", "throughput": 13949.87, "total_tokens": 10735744}
|
|
{"current_steps": 3420, "total_steps": 15621, "loss": 0.4573, "lr": 1.9152101965094162e-06, "epoch": 0.21893604762819283, "percentage": 21.89, "elapsed_time": "0:12:50", "remaining_time": "0:45:47", "throughput": 13958.64, "total_tokens": 10750848}
|
|
{"current_steps": 3425, "total_steps": 15621, "loss": 0.4878, "lr": 1.9147593517913464e-06, "epoch": 0.2192561295691697, "percentage": 21.93, "elapsed_time": "0:12:50", "remaining_time": "0:45:44", "throughput": 13967.07, "total_tokens": 10765632}
|
|
{"current_steps": 3430, "total_steps": 15621, "loss": 0.3856, "lr": 1.914307364985485e-06, "epoch": 0.21957621151014659, "percentage": 21.96, "elapsed_time": "0:12:51", "remaining_time": "0:45:41", "throughput": 13975.87, "total_tokens": 10780928}
|
|
{"current_steps": 3435, "total_steps": 15621, "loss": 0.4217, "lr": 1.913854236656144e-06, "epoch": 0.2198962934511235, "percentage": 21.99, "elapsed_time": "0:12:52", "remaining_time": "0:45:38", "throughput": 13985.17, "total_tokens": 10796864}
|
|
{"current_steps": 3440, "total_steps": 15621, "loss": 0.4653, "lr": 1.9133999673690584e-06, "epoch": 0.22021637539210037, "percentage": 22.02, "elapsed_time": "0:12:52", "remaining_time": "0:45:35", "throughput": 13994.61, "total_tokens": 10812672}
|
|
{"current_steps": 3445, "total_steps": 15621, "loss": 0.4709, "lr": 1.9129445576913886e-06, "epoch": 0.22053645733307728, "percentage": 22.05, "elapsed_time": "0:12:53", "remaining_time": "0:45:32", "throughput": 14004.01, "total_tokens": 10828544}
|
|
{"current_steps": 3450, "total_steps": 15621, "loss": 0.5335, "lr": 1.91248800819172e-06, "epoch": 0.22085653927405416, "percentage": 22.09, "elapsed_time": "0:12:53", "remaining_time": "0:45:30", "throughput": 14013.28, "total_tokens": 10844288}
|
|
{"current_steps": 3455, "total_steps": 15621, "loss": 0.5192, "lr": 1.912030319440059e-06, "epoch": 0.22117662121503104, "percentage": 22.12, "elapsed_time": "0:12:54", "remaining_time": "0:45:27", "throughput": 14022.58, "total_tokens": 10860160}
|
|
{"current_steps": 3460, "total_steps": 15621, "loss": 0.6043, "lr": 1.9115714920078354e-06, "epoch": 0.22149670315600795, "percentage": 22.15, "elapsed_time": "0:12:55", "remaining_time": "0:45:24", "throughput": 14031.77, "total_tokens": 10875968}
|
|
{"current_steps": 3465, "total_steps": 15621, "loss": 0.3252, "lr": 1.9111115264679017e-06, "epoch": 0.22181678509698483, "percentage": 22.18, "elapsed_time": "0:12:55", "remaining_time": "0:45:21", "throughput": 14041.31, "total_tokens": 10892096}
|
|
{"current_steps": 3470, "total_steps": 15621, "loss": 0.4378, "lr": 1.910650423394529e-06, "epoch": 0.2221368670379617, "percentage": 22.21, "elapsed_time": "0:12:56", "remaining_time": "0:45:18", "throughput": 14051.02, "total_tokens": 10908544}
|
|
{"current_steps": 3475, "total_steps": 15621, "loss": 0.4817, "lr": 1.910188183363411e-06, "epoch": 0.2224569489789386, "percentage": 22.25, "elapsed_time": "0:12:56", "remaining_time": "0:45:15", "throughput": 14060.44, "total_tokens": 10924544}
|
|
{"current_steps": 3480, "total_steps": 15621, "loss": 0.4441, "lr": 1.909724806951659e-06, "epoch": 0.2227770309199155, "percentage": 22.28, "elapsed_time": "0:12:57", "remaining_time": "0:45:12", "throughput": 14070.8, "total_tokens": 10941888}
|
|
{"current_steps": 3485, "total_steps": 15621, "loss": 0.4669, "lr": 1.909260294737804e-06, "epoch": 0.2230971128608924, "percentage": 22.31, "elapsed_time": "0:12:58", "remaining_time": "0:45:10", "throughput": 14080.74, "total_tokens": 10958592}
|
|
{"current_steps": 3490, "total_steps": 15621, "loss": 0.555, "lr": 1.9087946473017953e-06, "epoch": 0.22341719480186928, "percentage": 22.34, "elapsed_time": "0:12:58", "remaining_time": "0:45:07", "throughput": 14089.72, "total_tokens": 10974208}
|
|
{"current_steps": 3495, "total_steps": 15621, "loss": 0.4304, "lr": 1.9083278652249992e-06, "epoch": 0.22373727674284616, "percentage": 22.37, "elapsed_time": "0:12:59", "remaining_time": "0:45:04", "throughput": 14098.0, "total_tokens": 10988928}
|
|
{"current_steps": 3500, "total_steps": 15621, "loss": 0.425, "lr": 1.9078599490901983e-06, "epoch": 0.22405735868382307, "percentage": 22.41, "elapsed_time": "0:13:00", "remaining_time": "0:45:01", "throughput": 14108.19, "total_tokens": 11005952}
|
|
{"current_steps": 3505, "total_steps": 15621, "loss": 0.3971, "lr": 1.9073908994815914e-06, "epoch": 0.22437744062479995, "percentage": 22.44, "elapsed_time": "0:13:00", "remaining_time": "0:44:58", "throughput": 14116.41, "total_tokens": 11020608}
|
|
{"current_steps": 3510, "total_steps": 15621, "loss": 0.4862, "lr": 1.9069207169847928e-06, "epoch": 0.22469752256577685, "percentage": 22.47, "elapsed_time": "0:13:01", "remaining_time": "0:44:55", "throughput": 14125.85, "total_tokens": 11036736}
|
|
{"current_steps": 3515, "total_steps": 15621, "loss": 0.3584, "lr": 1.9064494021868302e-06, "epoch": 0.22501760450675373, "percentage": 22.5, "elapsed_time": "0:13:01", "remaining_time": "0:44:53", "throughput": 14135.05, "total_tokens": 11052480}
|
|
{"current_steps": 3520, "total_steps": 15621, "loss": 0.48, "lr": 1.9059769556761464e-06, "epoch": 0.2253376864477306, "percentage": 22.53, "elapsed_time": "0:13:02", "remaining_time": "0:44:50", "throughput": 14144.3, "total_tokens": 11068416}
|
|
{"current_steps": 3525, "total_steps": 15621, "loss": 0.4454, "lr": 1.9055033780425962e-06, "epoch": 0.22565776838870752, "percentage": 22.57, "elapsed_time": "0:13:03", "remaining_time": "0:44:47", "throughput": 14155.15, "total_tokens": 11086400}
|
|
{"current_steps": 3530, "total_steps": 15621, "loss": 0.562, "lr": 1.9050286698774464e-06, "epoch": 0.2259778503296844, "percentage": 22.6, "elapsed_time": "0:13:03", "remaining_time": "0:44:44", "throughput": 14164.63, "total_tokens": 11102848}
|
|
{"current_steps": 3535, "total_steps": 15621, "loss": 0.5359, "lr": 1.904552831773376e-06, "epoch": 0.22629793227066128, "percentage": 22.63, "elapsed_time": "0:13:04", "remaining_time": "0:44:41", "throughput": 14173.27, "total_tokens": 11118080}
|
|
{"current_steps": 3540, "total_steps": 15621, "loss": 0.4967, "lr": 1.9040758643244748e-06, "epoch": 0.22661801421163819, "percentage": 22.66, "elapsed_time": "0:13:05", "remaining_time": "0:44:39", "throughput": 14181.72, "total_tokens": 11133120}
|
|
{"current_steps": 3545, "total_steps": 15621, "loss": 0.4694, "lr": 1.903597768126242e-06, "epoch": 0.22693809615261507, "percentage": 22.69, "elapsed_time": "0:13:05", "remaining_time": "0:44:36", "throughput": 14191.56, "total_tokens": 11150144}
|
|
{"current_steps": 3550, "total_steps": 15621, "loss": 0.4787, "lr": 1.9031185437755862e-06, "epoch": 0.22725817809359197, "percentage": 22.73, "elapsed_time": "0:13:06", "remaining_time": "0:44:33", "throughput": 14200.35, "total_tokens": 11165760}
|
|
{"current_steps": 3555, "total_steps": 15621, "loss": 0.4582, "lr": 1.9026381918708246e-06, "epoch": 0.22757826003456885, "percentage": 22.76, "elapsed_time": "0:13:06", "remaining_time": "0:44:30", "throughput": 14208.27, "total_tokens": 11180096}
|
|
{"current_steps": 3560, "total_steps": 15621, "loss": 0.3618, "lr": 1.9021567130116822e-06, "epoch": 0.22789834197554573, "percentage": 22.79, "elapsed_time": "0:13:07", "remaining_time": "0:44:27", "throughput": 14216.77, "total_tokens": 11195584}
|
|
{"current_steps": 3565, "total_steps": 15621, "loss": 0.3909, "lr": 1.9016741077992916e-06, "epoch": 0.22821842391652264, "percentage": 22.82, "elapsed_time": "0:13:08", "remaining_time": "0:44:25", "throughput": 14225.4, "total_tokens": 11210944}
|
|
{"current_steps": 3570, "total_steps": 15621, "loss": 0.4052, "lr": 1.90119037683619e-06, "epoch": 0.22853850585749952, "percentage": 22.85, "elapsed_time": "0:13:08", "remaining_time": "0:44:22", "throughput": 14234.87, "total_tokens": 11227392}
|
|
{"current_steps": 3575, "total_steps": 15621, "loss": 0.6492, "lr": 1.9007055207263223e-06, "epoch": 0.2288585877984764, "percentage": 22.89, "elapsed_time": "0:13:09", "remaining_time": "0:44:19", "throughput": 14244.66, "total_tokens": 11244416}
|
|
{"current_steps": 3580, "total_steps": 15621, "loss": 0.3588, "lr": 1.900219540075036e-06, "epoch": 0.2291786697394533, "percentage": 22.92, "elapsed_time": "0:13:10", "remaining_time": "0:44:17", "throughput": 14253.93, "total_tokens": 11260672}
|
|
{"current_steps": 3585, "total_steps": 15621, "loss": 0.4749, "lr": 1.8997324354890845e-06, "epoch": 0.22949875168043019, "percentage": 22.95, "elapsed_time": "0:13:10", "remaining_time": "0:44:14", "throughput": 14263.61, "total_tokens": 11277504}
|
|
{"current_steps": 3590, "total_steps": 15621, "loss": 0.539, "lr": 1.8992442075766233e-06, "epoch": 0.2298188336214071, "percentage": 22.98, "elapsed_time": "0:13:11", "remaining_time": "0:44:11", "throughput": 14272.31, "total_tokens": 11293184}
|
|
{"current_steps": 3595, "total_steps": 15621, "loss": 0.3191, "lr": 1.8987548569472105e-06, "epoch": 0.23013891556238397, "percentage": 23.01, "elapsed_time": "0:13:11", "remaining_time": "0:44:08", "throughput": 14280.9, "total_tokens": 11308480}
|
|
{"current_steps": 3600, "total_steps": 15621, "loss": 0.396, "lr": 1.8982643842118064e-06, "epoch": 0.23045899750336085, "percentage": 23.05, "elapsed_time": "0:13:12", "remaining_time": "0:44:06", "throughput": 14289.43, "total_tokens": 11323840}
|
|
{"current_steps": 3605, "total_steps": 15621, "loss": 0.5821, "lr": 1.8977727899827716e-06, "epoch": 0.23077907944433776, "percentage": 23.08, "elapsed_time": "0:13:13", "remaining_time": "0:44:03", "throughput": 14298.21, "total_tokens": 11339456}
|
|
{"current_steps": 3610, "total_steps": 15621, "loss": 0.6554, "lr": 1.8972800748738678e-06, "epoch": 0.23109916138531464, "percentage": 23.11, "elapsed_time": "0:13:13", "remaining_time": "0:44:00", "throughput": 14306.66, "total_tokens": 11354880}
|
|
{"current_steps": 3615, "total_steps": 15621, "loss": 0.5226, "lr": 1.896786239500255e-06, "epoch": 0.23141924332629152, "percentage": 23.14, "elapsed_time": "0:13:14", "remaining_time": "0:43:57", "throughput": 14315.04, "total_tokens": 11369984}
|
|
{"current_steps": 3620, "total_steps": 15621, "loss": 0.429, "lr": 1.8962912844784928e-06, "epoch": 0.23173932526726843, "percentage": 23.17, "elapsed_time": "0:13:14", "remaining_time": "0:43:55", "throughput": 14323.08, "total_tokens": 11384640}
|
|
{"current_steps": 3625, "total_steps": 15621, "loss": 0.4945, "lr": 1.8957952104265384e-06, "epoch": 0.2320594072082453, "percentage": 23.21, "elapsed_time": "0:13:15", "remaining_time": "0:43:52", "throughput": 14332.44, "total_tokens": 11401152}
|
|
{"current_steps": 3630, "total_steps": 15621, "loss": 0.4535, "lr": 1.8952980179637458e-06, "epoch": 0.2323794891492222, "percentage": 23.24, "elapsed_time": "0:13:16", "remaining_time": "0:43:49", "throughput": 14341.2, "total_tokens": 11416896}
|
|
{"current_steps": 3635, "total_steps": 15621, "loss": 0.4899, "lr": 1.8947997077108662e-06, "epoch": 0.2326995710901991, "percentage": 23.27, "elapsed_time": "0:13:16", "remaining_time": "0:43:47", "throughput": 14350.12, "total_tokens": 11432832}
|
|
{"current_steps": 3640, "total_steps": 15621, "loss": 0.4807, "lr": 1.894300280290045e-06, "epoch": 0.23301965303117597, "percentage": 23.3, "elapsed_time": "0:13:17", "remaining_time": "0:43:44", "throughput": 14358.63, "total_tokens": 11448320}
|
|
{"current_steps": 3645, "total_steps": 15621, "loss": 0.5674, "lr": 1.8937997363248237e-06, "epoch": 0.23333973497215288, "percentage": 23.33, "elapsed_time": "0:13:17", "remaining_time": "0:43:41", "throughput": 14366.83, "total_tokens": 11463488}
|
|
{"current_steps": 3650, "total_steps": 15621, "loss": 0.4527, "lr": 1.8932980764401373e-06, "epoch": 0.23365981691312976, "percentage": 23.37, "elapsed_time": "0:13:18", "remaining_time": "0:43:38", "throughput": 14375.11, "total_tokens": 11478592}
|
|
{"current_steps": 3655, "total_steps": 15621, "loss": 0.3564, "lr": 1.8927953012623141e-06, "epoch": 0.23397989885410664, "percentage": 23.4, "elapsed_time": "0:13:19", "remaining_time": "0:43:36", "throughput": 14383.89, "total_tokens": 11494720}
|
|
{"current_steps": 3660, "total_steps": 15621, "loss": 0.4846, "lr": 1.8922914114190744e-06, "epoch": 0.23429998079508355, "percentage": 23.43, "elapsed_time": "0:13:19", "remaining_time": "0:43:33", "throughput": 14392.95, "total_tokens": 11511232}
|
|
{"current_steps": 3665, "total_steps": 15621, "loss": 0.5093, "lr": 1.8917864075395312e-06, "epoch": 0.23462006273606043, "percentage": 23.46, "elapsed_time": "0:13:20", "remaining_time": "0:43:31", "throughput": 14401.62, "total_tokens": 11527040}
|
|
{"current_steps": 3670, "total_steps": 15621, "loss": 0.4461, "lr": 1.8912802902541873e-06, "epoch": 0.23494014467703733, "percentage": 23.49, "elapsed_time": "0:13:21", "remaining_time": "0:43:28", "throughput": 14410.09, "total_tokens": 11542528}
|
|
{"current_steps": 3675, "total_steps": 15621, "loss": 0.4974, "lr": 1.8907730601949362e-06, "epoch": 0.2352602266180142, "percentage": 23.53, "elapsed_time": "0:13:21", "remaining_time": "0:43:25", "throughput": 14418.27, "total_tokens": 11557696}
|
|
{"current_steps": 3680, "total_steps": 15621, "loss": 0.4648, "lr": 1.8902647179950608e-06, "epoch": 0.2355803085589911, "percentage": 23.56, "elapsed_time": "0:13:22", "remaining_time": "0:43:23", "throughput": 14427.94, "total_tokens": 11574848}
|
|
{"current_steps": 3685, "total_steps": 15621, "loss": 0.5108, "lr": 1.889755264289232e-06, "epoch": 0.235900390499968, "percentage": 23.59, "elapsed_time": "0:13:22", "remaining_time": "0:43:20", "throughput": 14435.88, "total_tokens": 11589696}
|
|
{"current_steps": 3690, "total_steps": 15621, "loss": 0.384, "lr": 1.8892446997135087e-06, "epoch": 0.23622047244094488, "percentage": 23.62, "elapsed_time": "0:13:23", "remaining_time": "0:43:17", "throughput": 14445.5, "total_tokens": 11606848}
|
|
{"current_steps": 3695, "total_steps": 15621, "loss": 0.6707, "lr": 1.888733024905337e-06, "epoch": 0.23654055438192176, "percentage": 23.65, "elapsed_time": "0:13:24", "remaining_time": "0:43:15", "throughput": 14455.04, "total_tokens": 11623744}
|
|
{"current_steps": 3700, "total_steps": 15621, "loss": 0.4755, "lr": 1.888220240503549e-06, "epoch": 0.23686063632289867, "percentage": 23.69, "elapsed_time": "0:13:24", "remaining_time": "0:43:12", "throughput": 14464.13, "total_tokens": 11640256}
|
|
{"current_steps": 3705, "total_steps": 15621, "loss": 0.412, "lr": 1.8877063471483618e-06, "epoch": 0.23718071826387555, "percentage": 23.72, "elapsed_time": "0:13:25", "remaining_time": "0:43:10", "throughput": 14472.46, "total_tokens": 11655744}
|
|
{"current_steps": 3710, "total_steps": 15621, "loss": 0.2935, "lr": 1.8871913454813772e-06, "epoch": 0.23750080020485245, "percentage": 23.75, "elapsed_time": "0:13:25", "remaining_time": "0:43:07", "throughput": 14480.84, "total_tokens": 11671104}
|
|
{"current_steps": 3715, "total_steps": 15621, "loss": 0.3898, "lr": 1.886675236145581e-06, "epoch": 0.23782088214582933, "percentage": 23.78, "elapsed_time": "0:13:26", "remaining_time": "0:43:04", "throughput": 14489.49, "total_tokens": 11686848}
|
|
{"current_steps": 3720, "total_steps": 15621, "loss": 0.5018, "lr": 1.8861580197853422e-06, "epoch": 0.2381409640868062, "percentage": 23.81, "elapsed_time": "0:13:27", "remaining_time": "0:43:02", "throughput": 14497.6, "total_tokens": 11701952}
|
|
{"current_steps": 3725, "total_steps": 15621, "loss": 0.4647, "lr": 1.8856396970464105e-06, "epoch": 0.23846104602778312, "percentage": 23.85, "elapsed_time": "0:13:27", "remaining_time": "0:42:59", "throughput": 14506.76, "total_tokens": 11718592}
|
|
{"current_steps": 3730, "total_steps": 15621, "loss": 0.5143, "lr": 1.8851202685759189e-06, "epoch": 0.23878112796876, "percentage": 23.88, "elapsed_time": "0:13:28", "remaining_time": "0:42:57", "throughput": 14515.19, "total_tokens": 11734208}
|
|
{"current_steps": 3735, "total_steps": 15621, "loss": 0.407, "lr": 1.8845997350223792e-06, "epoch": 0.2391012099097369, "percentage": 23.91, "elapsed_time": "0:13:28", "remaining_time": "0:42:54", "throughput": 14523.03, "total_tokens": 11748992}
|
|
{"current_steps": 3740, "total_steps": 15621, "loss": 0.4217, "lr": 1.8840780970356842e-06, "epoch": 0.23942129185071379, "percentage": 23.94, "elapsed_time": "0:13:29", "remaining_time": "0:42:51", "throughput": 14531.41, "total_tokens": 11764608}
|
|
{"current_steps": 3745, "total_steps": 15621, "loss": 0.4078, "lr": 1.8835553552671048e-06, "epoch": 0.23974137379169067, "percentage": 23.97, "elapsed_time": "0:13:30", "remaining_time": "0:42:49", "throughput": 14540.21, "total_tokens": 11780800}
|
|
{"current_steps": 3750, "total_steps": 15621, "loss": 0.4593, "lr": 1.8830315103692902e-06, "epoch": 0.24006145573266757, "percentage": 24.01, "elapsed_time": "0:13:30", "remaining_time": "0:42:46", "throughput": 14548.11, "total_tokens": 11795776}
|
|
{"current_steps": 3755, "total_steps": 15621, "loss": 0.5071, "lr": 1.8825065629962669e-06, "epoch": 0.24038153767364445, "percentage": 24.04, "elapsed_time": "0:13:31", "remaining_time": "0:42:44", "throughput": 14556.71, "total_tokens": 11811776}
|
|
{"current_steps": 3760, "total_steps": 15621, "loss": 0.4852, "lr": 1.881980513803438e-06, "epoch": 0.24070161961462133, "percentage": 24.07, "elapsed_time": "0:13:32", "remaining_time": "0:42:41", "throughput": 14565.55, "total_tokens": 11828224}
|
|
{"current_steps": 3765, "total_steps": 15621, "loss": 0.5035, "lr": 1.881453363447582e-06, "epoch": 0.24102170155559824, "percentage": 24.1, "elapsed_time": "0:13:32", "remaining_time": "0:42:39", "throughput": 14573.72, "total_tokens": 11843904}
|
|
{"current_steps": 3770, "total_steps": 15621, "loss": 0.5574, "lr": 1.880925112586852e-06, "epoch": 0.24134178349657512, "percentage": 24.13, "elapsed_time": "0:13:33", "remaining_time": "0:42:36", "throughput": 14581.9, "total_tokens": 11859392}
|
|
{"current_steps": 3775, "total_steps": 15621, "loss": 0.4427, "lr": 1.8803957618807762e-06, "epoch": 0.24166186543755203, "percentage": 24.17, "elapsed_time": "0:13:33", "remaining_time": "0:42:34", "throughput": 14590.89, "total_tokens": 11875968}
|
|
{"current_steps": 3780, "total_steps": 15621, "loss": 0.4404, "lr": 1.8798653119902548e-06, "epoch": 0.2419819473785289, "percentage": 24.2, "elapsed_time": "0:13:34", "remaining_time": "0:42:31", "throughput": 14599.03, "total_tokens": 11891584}
|
|
{"current_steps": 3785, "total_steps": 15621, "loss": 0.5029, "lr": 1.8793337635775603e-06, "epoch": 0.24230202931950579, "percentage": 24.23, "elapsed_time": "0:13:35", "remaining_time": "0:42:29", "throughput": 14607.08, "total_tokens": 11906944}
|
|
{"current_steps": 3790, "total_steps": 15621, "loss": 0.4729, "lr": 1.8788011173063376e-06, "epoch": 0.2426221112604827, "percentage": 24.26, "elapsed_time": "0:13:35", "remaining_time": "0:42:26", "throughput": 14615.19, "total_tokens": 11922368}
|
|
{"current_steps": 3795, "total_steps": 15621, "loss": 0.5181, "lr": 1.8782673738416018e-06, "epoch": 0.24294219320145957, "percentage": 24.29, "elapsed_time": "0:13:36", "remaining_time": "0:42:23", "throughput": 14623.78, "total_tokens": 11938432}
|
|
{"current_steps": 3800, "total_steps": 15621, "loss": 0.5078, "lr": 1.877732533849737e-06, "epoch": 0.24326227514243645, "percentage": 24.33, "elapsed_time": "0:13:37", "remaining_time": "0:42:21", "throughput": 14633.86, "total_tokens": 11956608}
|
|
{"current_steps": 3805, "total_steps": 15621, "loss": 0.4394, "lr": 1.8771965979984988e-06, "epoch": 0.24358235708341336, "percentage": 24.36, "elapsed_time": "0:13:37", "remaining_time": "0:42:19", "throughput": 14642.08, "total_tokens": 11972480}
|
|
{"current_steps": 3810, "total_steps": 15621, "loss": 0.3889, "lr": 1.8766595669570084e-06, "epoch": 0.24390243902439024, "percentage": 24.39, "elapsed_time": "0:13:38", "remaining_time": "0:42:16", "throughput": 14649.52, "total_tokens": 11987072}
|
|
{"current_steps": 3815, "total_steps": 15621, "loss": 0.4361, "lr": 1.8761214413957553e-06, "epoch": 0.24422252096536715, "percentage": 24.42, "elapsed_time": "0:13:38", "remaining_time": "0:42:14", "throughput": 14657.28, "total_tokens": 12002112}
|
|
{"current_steps": 3820, "total_steps": 15621, "loss": 0.3493, "lr": 1.8755822219865963e-06, "epoch": 0.24454260290634403, "percentage": 24.45, "elapsed_time": "0:13:39", "remaining_time": "0:42:11", "throughput": 14664.84, "total_tokens": 12016960}
|
|
{"current_steps": 3825, "total_steps": 15621, "loss": 0.4331, "lr": 1.875041909402752e-06, "epoch": 0.2448626848473209, "percentage": 24.49, "elapsed_time": "0:13:40", "remaining_time": "0:42:08", "throughput": 14673.04, "total_tokens": 12032576}
|
|
{"current_steps": 3830, "total_steps": 15621, "loss": 0.3638, "lr": 1.8745005043188102e-06, "epoch": 0.2451827667882978, "percentage": 24.52, "elapsed_time": "0:13:40", "remaining_time": "0:42:06", "throughput": 14681.55, "total_tokens": 12048768}
|
|
{"current_steps": 3835, "total_steps": 15621, "loss": 0.395, "lr": 1.8739580074107208e-06, "epoch": 0.2455028487292747, "percentage": 24.55, "elapsed_time": "0:13:41", "remaining_time": "0:42:04", "throughput": 14690.29, "total_tokens": 12065088}
|
|
{"current_steps": 3840, "total_steps": 15621, "loss": 0.6844, "lr": 1.873414419355798e-06, "epoch": 0.24582293067025157, "percentage": 24.58, "elapsed_time": "0:13:41", "remaining_time": "0:42:01", "throughput": 14698.43, "total_tokens": 12080704}
|
|
{"current_steps": 3845, "total_steps": 15621, "loss": 0.4292, "lr": 1.872869740832717e-06, "epoch": 0.24614301261122848, "percentage": 24.61, "elapsed_time": "0:13:42", "remaining_time": "0:41:59", "throughput": 14706.76, "total_tokens": 12096704}
|
|
{"current_steps": 3850, "total_steps": 15621, "loss": 0.6103, "lr": 1.8723239725215165e-06, "epoch": 0.24646309455220536, "percentage": 24.65, "elapsed_time": "0:13:43", "remaining_time": "0:41:56", "throughput": 14714.21, "total_tokens": 12111488}
|
|
{"current_steps": 3855, "total_steps": 15621, "loss": 0.4206, "lr": 1.871777115103594e-06, "epoch": 0.24678317649318227, "percentage": 24.68, "elapsed_time": "0:13:43", "remaining_time": "0:41:54", "throughput": 14723.18, "total_tokens": 12128192}
|
|
{"current_steps": 3860, "total_steps": 15621, "loss": 0.4786, "lr": 1.8712291692617074e-06, "epoch": 0.24710325843415915, "percentage": 24.71, "elapsed_time": "0:13:44", "remaining_time": "0:41:51", "throughput": 14731.19, "total_tokens": 12143808}
|
|
{"current_steps": 3865, "total_steps": 15621, "loss": 0.4804, "lr": 1.8706801356799735e-06, "epoch": 0.24742334037513602, "percentage": 24.74, "elapsed_time": "0:13:44", "remaining_time": "0:41:49", "throughput": 14739.03, "total_tokens": 12159232}
|
|
{"current_steps": 3870, "total_steps": 15621, "loss": 0.4465, "lr": 1.8701300150438674e-06, "epoch": 0.24774342231611293, "percentage": 24.77, "elapsed_time": "0:13:45", "remaining_time": "0:41:46", "throughput": 14747.5, "total_tokens": 12175360}
|
|
{"current_steps": 3875, "total_steps": 15621, "loss": 0.4191, "lr": 1.869578808040221e-06, "epoch": 0.2480635042570898, "percentage": 24.81, "elapsed_time": "0:13:46", "remaining_time": "0:41:44", "throughput": 14755.18, "total_tokens": 12190272}
|
|
{"current_steps": 3880, "total_steps": 15621, "loss": 0.5149, "lr": 1.869026515357223e-06, "epoch": 0.2483835861980667, "percentage": 24.84, "elapsed_time": "0:13:46", "remaining_time": "0:41:42", "throughput": 14764.96, "total_tokens": 12208448}
|
|
{"current_steps": 3885, "total_steps": 15621, "loss": 0.6372, "lr": 1.8684731376844169e-06, "epoch": 0.2487036681390436, "percentage": 24.87, "elapsed_time": "0:13:47", "remaining_time": "0:41:39", "throughput": 14774.23, "total_tokens": 12225984}
|
|
{"current_steps": 3890, "total_steps": 15621, "loss": 0.4965, "lr": 1.8679186757127014e-06, "epoch": 0.24902375008002048, "percentage": 24.9, "elapsed_time": "0:13:48", "remaining_time": "0:41:37", "throughput": 14782.03, "total_tokens": 12241408}
|
|
{"current_steps": 3895, "total_steps": 15621, "loss": 0.4381, "lr": 1.8673631301343288e-06, "epoch": 0.24934383202099739, "percentage": 24.93, "elapsed_time": "0:13:48", "remaining_time": "0:41:34", "throughput": 14789.33, "total_tokens": 12256064}
|
|
{"current_steps": 3900, "total_steps": 15621, "loss": 0.4388, "lr": 1.8668065016429044e-06, "epoch": 0.24966391396197427, "percentage": 24.97, "elapsed_time": "0:13:49", "remaining_time": "0:41:32", "throughput": 14798.12, "total_tokens": 12272832}
|
|
{"current_steps": 3905, "total_steps": 15621, "loss": 0.5257, "lr": 1.866248790933385e-06, "epoch": 0.24998399590295114, "percentage": 25.0, "elapsed_time": "0:13:49", "remaining_time": "0:41:30", "throughput": 14806.56, "total_tokens": 12289024}
|
|
{"current_steps": 3910, "total_steps": 15621, "loss": 0.4226, "lr": 1.8656899987020795e-06, "epoch": 0.25030407784392805, "percentage": 25.03, "elapsed_time": "0:13:50", "remaining_time": "0:41:27", "throughput": 14814.22, "total_tokens": 12304064}
|
|
{"current_steps": 3910, "total_steps": 15621, "eval_loss": 0.4644124507904053, "epoch": 0.25030407784392805, "percentage": 25.03, "elapsed_time": "0:14:39", "remaining_time": "0:43:55", "throughput": 13985.65, "total_tokens": 12304064}
|
|
{"current_steps": 3915, "total_steps": 15621, "loss": 0.4605, "lr": 1.865130125646646e-06, "epoch": 0.25062415978490493, "percentage": 25.06, "elapsed_time": "0:15:12", "remaining_time": "0:45:29", "throughput": 13496.98, "total_tokens": 12320256}
|
|
{"current_steps": 3920, "total_steps": 15621, "loss": 0.4394, "lr": 1.8645691724660933e-06, "epoch": 0.2509442417258818, "percentage": 25.09, "elapsed_time": "0:15:13", "remaining_time": "0:45:26", "throughput": 13504.66, "total_tokens": 12335360}
|
|
{"current_steps": 3925, "total_steps": 15621, "loss": 0.4616, "lr": 1.8640071398607774e-06, "epoch": 0.2512643236668587, "percentage": 25.13, "elapsed_time": "0:15:14", "remaining_time": "0:45:23", "throughput": 13513.11, "total_tokens": 12351488}
|
|
{"current_steps": 3930, "total_steps": 15621, "loss": 0.6203, "lr": 1.8634440285324024e-06, "epoch": 0.2515844056078356, "percentage": 25.16, "elapsed_time": "0:15:14", "remaining_time": "0:45:20", "throughput": 13520.36, "total_tokens": 12365952}
|
|
{"current_steps": 3935, "total_steps": 15621, "loss": 0.469, "lr": 1.8628798391840205e-06, "epoch": 0.2519044875488125, "percentage": 25.19, "elapsed_time": "0:15:15", "remaining_time": "0:45:17", "throughput": 13528.46, "total_tokens": 12381376}
|
|
{"current_steps": 3940, "total_steps": 15621, "loss": 0.4588, "lr": 1.8623145725200277e-06, "epoch": 0.2522245694897894, "percentage": 25.22, "elapsed_time": "0:15:15", "remaining_time": "0:45:15", "throughput": 13536.01, "total_tokens": 12396160}
|
|
{"current_steps": 3945, "total_steps": 15621, "loss": 0.4468, "lr": 1.8617482292461664e-06, "epoch": 0.25254465143076626, "percentage": 25.25, "elapsed_time": "0:15:16", "remaining_time": "0:45:12", "throughput": 13543.48, "total_tokens": 12410944}
|
|
{"current_steps": 3950, "total_steps": 15621, "loss": 0.4172, "lr": 1.861180810069523e-06, "epoch": 0.25286473337174314, "percentage": 25.29, "elapsed_time": "0:15:16", "remaining_time": "0:45:09", "throughput": 13551.33, "total_tokens": 12426304}
|
|
{"current_steps": 3955, "total_steps": 15621, "loss": 0.4599, "lr": 1.8606123156985268e-06, "epoch": 0.2531848153127201, "percentage": 25.32, "elapsed_time": "0:15:17", "remaining_time": "0:45:06", "throughput": 13559.69, "total_tokens": 12442432}
|
|
{"current_steps": 3960, "total_steps": 15621, "loss": 0.4617, "lr": 1.8600427468429496e-06, "epoch": 0.25350489725369696, "percentage": 25.35, "elapsed_time": "0:15:18", "remaining_time": "0:45:03", "throughput": 13567.82, "total_tokens": 12458368}
|
|
{"current_steps": 3965, "total_steps": 15621, "loss": 0.4302, "lr": 1.8594721042139052e-06, "epoch": 0.25382497919467384, "percentage": 25.38, "elapsed_time": "0:15:18", "remaining_time": "0:45:01", "throughput": 13576.03, "total_tokens": 12474368}
|
|
{"current_steps": 3970, "total_steps": 15621, "loss": 0.4147, "lr": 1.858900388523847e-06, "epoch": 0.2541450611356507, "percentage": 25.41, "elapsed_time": "0:15:19", "remaining_time": "0:44:58", "throughput": 13584.13, "total_tokens": 12490176}
|
|
{"current_steps": 3975, "total_steps": 15621, "loss": 0.4639, "lr": 1.8583276004865694e-06, "epoch": 0.2544651430766276, "percentage": 25.45, "elapsed_time": "0:15:20", "remaining_time": "0:44:55", "throughput": 13593.43, "total_tokens": 12507840}
|
|
{"current_steps": 3980, "total_steps": 15621, "loss": 0.3452, "lr": 1.8577537408172046e-06, "epoch": 0.25478522501760453, "percentage": 25.48, "elapsed_time": "0:15:20", "remaining_time": "0:44:53", "throughput": 13601.33, "total_tokens": 12523520}
|
|
{"current_steps": 3985, "total_steps": 15621, "loss": 0.5365, "lr": 1.8571788102322234e-06, "epoch": 0.2551053069585814, "percentage": 25.51, "elapsed_time": "0:15:21", "remaining_time": "0:44:50", "throughput": 13610.34, "total_tokens": 12540736}
|
|
{"current_steps": 3990, "total_steps": 15621, "loss": 0.4704, "lr": 1.8566028094494332e-06, "epoch": 0.2554253888995583, "percentage": 25.54, "elapsed_time": "0:15:22", "remaining_time": "0:44:47", "throughput": 13618.38, "total_tokens": 12556352}
|
|
{"current_steps": 3995, "total_steps": 15621, "loss": 0.3726, "lr": 1.8560257391879778e-06, "epoch": 0.25574547084053517, "percentage": 25.57, "elapsed_time": "0:15:22", "remaining_time": "0:44:44", "throughput": 13625.45, "total_tokens": 12570688}
|
|
{"current_steps": 4000, "total_steps": 15621, "loss": 0.4038, "lr": 1.855447600168336e-06, "epoch": 0.25606555278151205, "percentage": 25.61, "elapsed_time": "0:15:23", "remaining_time": "0:44:42", "throughput": 13633.22, "total_tokens": 12585984}
|
|
{"current_steps": 4005, "total_steps": 15621, "loss": 0.4665, "lr": 1.8548683931123215e-06, "epoch": 0.25638563472248893, "percentage": 25.64, "elapsed_time": "0:15:23", "remaining_time": "0:44:39", "throughput": 13640.95, "total_tokens": 12601216}
|
|
{"current_steps": 4010, "total_steps": 15621, "loss": 0.4408, "lr": 1.8542881187430807e-06, "epoch": 0.25670571666346587, "percentage": 25.67, "elapsed_time": "0:15:24", "remaining_time": "0:44:36", "throughput": 13650.07, "total_tokens": 12618624}
|
|
{"current_steps": 4015, "total_steps": 15621, "loss": 0.5792, "lr": 1.8537067777850935e-06, "epoch": 0.25702579860444275, "percentage": 25.7, "elapsed_time": "0:15:25", "remaining_time": "0:44:34", "throughput": 13658.95, "total_tokens": 12635840}
|
|
{"current_steps": 4020, "total_steps": 15621, "loss": 0.3554, "lr": 1.8531243709641704e-06, "epoch": 0.2573458805454196, "percentage": 25.73, "elapsed_time": "0:15:25", "remaining_time": "0:44:31", "throughput": 13667.06, "total_tokens": 12651904}
|
|
{"current_steps": 4025, "total_steps": 15621, "loss": 0.4923, "lr": 1.8525408990074533e-06, "epoch": 0.2576659624863965, "percentage": 25.77, "elapsed_time": "0:15:26", "remaining_time": "0:44:28", "throughput": 13674.61, "total_tokens": 12666944}
|
|
{"current_steps": 4030, "total_steps": 15621, "loss": 0.4155, "lr": 1.851956362643414e-06, "epoch": 0.2579860444273734, "percentage": 25.8, "elapsed_time": "0:15:26", "remaining_time": "0:44:26", "throughput": 13682.52, "total_tokens": 12682688}
|
|
{"current_steps": 4035, "total_steps": 15621, "loss": 0.5472, "lr": 1.851370762601853e-06, "epoch": 0.2583061263683503, "percentage": 25.83, "elapsed_time": "0:15:27", "remaining_time": "0:44:23", "throughput": 13690.41, "total_tokens": 12698304}
|
|
{"current_steps": 4040, "total_steps": 15621, "loss": 0.4995, "lr": 1.8507840996138983e-06, "epoch": 0.2586262083093272, "percentage": 25.86, "elapsed_time": "0:15:28", "remaining_time": "0:44:20", "throughput": 13697.53, "total_tokens": 12712896}
|
|
{"current_steps": 4045, "total_steps": 15621, "loss": 0.39, "lr": 1.8501963744120062e-06, "epoch": 0.2589462902503041, "percentage": 25.89, "elapsed_time": "0:15:28", "remaining_time": "0:44:17", "throughput": 13704.67, "total_tokens": 12727488}
|
|
{"current_steps": 4050, "total_steps": 15621, "loss": 0.4037, "lr": 1.849607587729958e-06, "epoch": 0.25926637219128096, "percentage": 25.93, "elapsed_time": "0:15:29", "remaining_time": "0:44:15", "throughput": 13712.24, "total_tokens": 12742720}
|
|
{"current_steps": 4055, "total_steps": 15621, "loss": 0.3918, "lr": 1.8490177403028615e-06, "epoch": 0.25958645413225784, "percentage": 25.96, "elapsed_time": "0:15:29", "remaining_time": "0:44:12", "throughput": 13719.84, "total_tokens": 12757760}
|
|
{"current_steps": 4060, "total_steps": 15621, "loss": 0.4879, "lr": 1.8484268328671475e-06, "epoch": 0.2599065360732348, "percentage": 25.99, "elapsed_time": "0:15:30", "remaining_time": "0:44:09", "throughput": 13727.64, "total_tokens": 12773312}
|
|
{"current_steps": 4065, "total_steps": 15621, "loss": 0.553, "lr": 1.847834866160571e-06, "epoch": 0.26022661801421165, "percentage": 26.02, "elapsed_time": "0:15:31", "remaining_time": "0:44:07", "throughput": 13736.41, "total_tokens": 12790336}
|
|
{"current_steps": 4070, "total_steps": 15621, "loss": 0.4995, "lr": 1.847241840922209e-06, "epoch": 0.26054669995518853, "percentage": 26.05, "elapsed_time": "0:15:31", "remaining_time": "0:44:04", "throughput": 13743.85, "total_tokens": 12805632}
|
|
{"current_steps": 4075, "total_steps": 15621, "loss": 0.4861, "lr": 1.8466477578924616e-06, "epoch": 0.2608667818961654, "percentage": 26.09, "elapsed_time": "0:15:32", "remaining_time": "0:44:01", "throughput": 13751.59, "total_tokens": 12821184}
|
|
{"current_steps": 4080, "total_steps": 15621, "loss": 0.5037, "lr": 1.8460526178130472e-06, "epoch": 0.2611868638371423, "percentage": 26.12, "elapsed_time": "0:15:32", "remaining_time": "0:43:59", "throughput": 13759.11, "total_tokens": 12836544}
|
|
{"current_steps": 4085, "total_steps": 15621, "loss": 0.4307, "lr": 1.8454564214270056e-06, "epoch": 0.26150694577811917, "percentage": 26.15, "elapsed_time": "0:15:33", "remaining_time": "0:43:56", "throughput": 13766.73, "total_tokens": 12852032}
|
|
{"current_steps": 4090, "total_steps": 15621, "loss": 0.446, "lr": 1.8448591694786955e-06, "epoch": 0.2618270277190961, "percentage": 26.18, "elapsed_time": "0:15:34", "remaining_time": "0:43:53", "throughput": 13774.31, "total_tokens": 12867456}
|
|
{"current_steps": 4095, "total_steps": 15621, "loss": 0.3206, "lr": 1.8442608627137925e-06, "epoch": 0.262147109660073, "percentage": 26.21, "elapsed_time": "0:15:34", "remaining_time": "0:43:51", "throughput": 13783.53, "total_tokens": 12885184}
|
|
{"current_steps": 4100, "total_steps": 15621, "loss": 0.3815, "lr": 1.8436615018792897e-06, "epoch": 0.26246719160104987, "percentage": 26.25, "elapsed_time": "0:15:35", "remaining_time": "0:43:48", "throughput": 13791.12, "total_tokens": 12900416}
|
|
{"current_steps": 4105, "total_steps": 15621, "loss": 0.5722, "lr": 1.8430610877234957e-06, "epoch": 0.26278727354202674, "percentage": 26.28, "elapsed_time": "0:15:36", "remaining_time": "0:43:45", "throughput": 13798.64, "total_tokens": 12915648}
|
|
{"current_steps": 4110, "total_steps": 15621, "loss": 0.4491, "lr": 1.8424596209960356e-06, "epoch": 0.2631073554830036, "percentage": 26.31, "elapsed_time": "0:15:36", "remaining_time": "0:43:43", "throughput": 13805.85, "total_tokens": 12930368}
|
|
{"current_steps": 4115, "total_steps": 15621, "loss": 0.5253, "lr": 1.8418571024478466e-06, "epoch": 0.26342743742398056, "percentage": 26.34, "elapsed_time": "0:15:37", "remaining_time": "0:43:40", "throughput": 13813.32, "total_tokens": 12945472}
|
|
{"current_steps": 4120, "total_steps": 15621, "loss": 0.4884, "lr": 1.8412535328311812e-06, "epoch": 0.26374751936495744, "percentage": 26.37, "elapsed_time": "0:15:37", "remaining_time": "0:43:37", "throughput": 13821.15, "total_tokens": 12961472}
|
|
{"current_steps": 4125, "total_steps": 15621, "loss": 0.5935, "lr": 1.8406489128996023e-06, "epoch": 0.2640676013059343, "percentage": 26.41, "elapsed_time": "0:15:38", "remaining_time": "0:43:35", "throughput": 13828.09, "total_tokens": 12975872}
|
|
{"current_steps": 4130, "total_steps": 15621, "loss": 0.5286, "lr": 1.8400432434079853e-06, "epoch": 0.2643876832469112, "percentage": 26.44, "elapsed_time": "0:15:38", "remaining_time": "0:43:32", "throughput": 13836.16, "total_tokens": 12992128}
|
|
{"current_steps": 4135, "total_steps": 15621, "loss": 0.4112, "lr": 1.8394365251125162e-06, "epoch": 0.2647077651878881, "percentage": 26.47, "elapsed_time": "0:15:40", "remaining_time": "0:43:31", "throughput": 13850.93, "total_tokens": 13021184}
|
|
{"current_steps": 4140, "total_steps": 15621, "loss": 0.4385, "lr": 1.8388287587706888e-06, "epoch": 0.265027847128865, "percentage": 26.5, "elapsed_time": "0:15:40", "remaining_time": "0:43:28", "throughput": 13859.06, "total_tokens": 13037568}
|
|
{"current_steps": 4145, "total_steps": 15621, "loss": 0.4655, "lr": 1.8382199451413074e-06, "epoch": 0.2653479290698419, "percentage": 26.53, "elapsed_time": "0:15:41", "remaining_time": "0:43:26", "throughput": 13866.81, "total_tokens": 13053440}
|
|
{"current_steps": 4150, "total_steps": 15621, "loss": 0.5121, "lr": 1.837610084984483e-06, "epoch": 0.26566801101081877, "percentage": 26.57, "elapsed_time": "0:15:41", "remaining_time": "0:43:23", "throughput": 13874.65, "total_tokens": 13069440}
|
|
{"current_steps": 4155, "total_steps": 15621, "loss": 0.5466, "lr": 1.8369991790616327e-06, "epoch": 0.26598809295179565, "percentage": 26.6, "elapsed_time": "0:15:42", "remaining_time": "0:43:21", "throughput": 13881.73, "total_tokens": 13084224}
|
|
{"current_steps": 4160, "total_steps": 15621, "loss": 0.6597, "lr": 1.8363872281354795e-06, "epoch": 0.26630817489277253, "percentage": 26.63, "elapsed_time": "0:15:43", "remaining_time": "0:43:18", "throughput": 13888.65, "total_tokens": 13098688}
|
|
{"current_steps": 4165, "total_steps": 15621, "loss": 0.4049, "lr": 1.835774232970052e-06, "epoch": 0.26662825683374947, "percentage": 26.66, "elapsed_time": "0:15:43", "remaining_time": "0:43:15", "throughput": 13896.1, "total_tokens": 13114112}
|
|
{"current_steps": 4170, "total_steps": 15621, "loss": 0.4672, "lr": 1.8351601943306815e-06, "epoch": 0.26694833877472635, "percentage": 26.69, "elapsed_time": "0:15:44", "remaining_time": "0:43:13", "throughput": 13904.08, "total_tokens": 13130240}
|
|
{"current_steps": 4175, "total_steps": 15621, "loss": 0.3994, "lr": 1.8345451129840025e-06, "epoch": 0.2672684207157032, "percentage": 26.73, "elapsed_time": "0:15:44", "remaining_time": "0:43:10", "throughput": 13911.45, "total_tokens": 13145536}
|
|
{"current_steps": 4180, "total_steps": 15621, "loss": 0.552, "lr": 1.8339289896979515e-06, "epoch": 0.2675885026566801, "percentage": 26.76, "elapsed_time": "0:15:45", "remaining_time": "0:43:07", "throughput": 13918.54, "total_tokens": 13160256}
|
|
{"current_steps": 4185, "total_steps": 15621, "loss": 0.5336, "lr": 1.8333118252417651e-06, "epoch": 0.267908584597657, "percentage": 26.79, "elapsed_time": "0:15:46", "remaining_time": "0:43:05", "throughput": 13926.85, "total_tokens": 13177088}
|
|
{"current_steps": 4190, "total_steps": 15621, "loss": 0.5098, "lr": 1.832693620385981e-06, "epoch": 0.26822866653863386, "percentage": 26.82, "elapsed_time": "0:15:46", "remaining_time": "0:43:02", "throughput": 13934.48, "total_tokens": 13192768}
|
|
{"current_steps": 4195, "total_steps": 15621, "loss": 0.5183, "lr": 1.8320743759024352e-06, "epoch": 0.2685487484796108, "percentage": 26.85, "elapsed_time": "0:15:47", "remaining_time": "0:43:00", "throughput": 13941.86, "total_tokens": 13208192}
|
|
{"current_steps": 4200, "total_steps": 15621, "loss": 0.5242, "lr": 1.831454092564261e-06, "epoch": 0.2688688304205877, "percentage": 26.89, "elapsed_time": "0:15:47", "remaining_time": "0:42:57", "throughput": 13949.46, "total_tokens": 13223872}
|
|
{"current_steps": 4205, "total_steps": 15621, "loss": 0.4714, "lr": 1.8308327711458899e-06, "epoch": 0.26918891236156456, "percentage": 26.92, "elapsed_time": "0:15:48", "remaining_time": "0:42:55", "throughput": 13956.65, "total_tokens": 13239104}
|
|
{"current_steps": 4210, "total_steps": 15621, "loss": 0.3844, "lr": 1.830210412423049e-06, "epoch": 0.26950899430254144, "percentage": 26.95, "elapsed_time": "0:15:49", "remaining_time": "0:42:52", "throughput": 13964.09, "total_tokens": 13254464}
|
|
{"current_steps": 4215, "total_steps": 15621, "loss": 0.3647, "lr": 1.8295870171727605e-06, "epoch": 0.2698290762435183, "percentage": 26.98, "elapsed_time": "0:15:49", "remaining_time": "0:42:50", "throughput": 13971.45, "total_tokens": 13269824}
|
|
{"current_steps": 4220, "total_steps": 15621, "loss": 0.4194, "lr": 1.8289625861733408e-06, "epoch": 0.27014915818449525, "percentage": 27.01, "elapsed_time": "0:15:50", "remaining_time": "0:42:47", "throughput": 13980.79, "total_tokens": 13288448}
|
|
{"current_steps": 4225, "total_steps": 15621, "loss": 0.5194, "lr": 1.8283371202043991e-06, "epoch": 0.27046924012547213, "percentage": 27.05, "elapsed_time": "0:15:51", "remaining_time": "0:42:45", "throughput": 13988.37, "total_tokens": 13304320}
|
|
{"current_steps": 4230, "total_steps": 15621, "loss": 0.5503, "lr": 1.827710620046837e-06, "epoch": 0.270789322066449, "percentage": 27.08, "elapsed_time": "0:15:51", "remaining_time": "0:42:42", "throughput": 13997.18, "total_tokens": 13321920}
|
|
{"current_steps": 4235, "total_steps": 15621, "loss": 0.4687, "lr": 1.8270830864828474e-06, "epoch": 0.2711094040074259, "percentage": 27.11, "elapsed_time": "0:15:52", "remaining_time": "0:42:40", "throughput": 14004.47, "total_tokens": 13337280}
|
|
{"current_steps": 4240, "total_steps": 15621, "loss": 0.4287, "lr": 1.8264545202959133e-06, "epoch": 0.27142948594840277, "percentage": 27.14, "elapsed_time": "0:15:53", "remaining_time": "0:42:38", "throughput": 14012.62, "total_tokens": 13354112}
|
|
{"current_steps": 4245, "total_steps": 15621, "loss": 0.4321, "lr": 1.8258249222708067e-06, "epoch": 0.2717495678893797, "percentage": 27.17, "elapsed_time": "0:15:53", "remaining_time": "0:42:35", "throughput": 14019.85, "total_tokens": 13369600}
|
|
{"current_steps": 4250, "total_steps": 15621, "loss": 0.4464, "lr": 1.8251942931935886e-06, "epoch": 0.2720696498303566, "percentage": 27.21, "elapsed_time": "0:15:54", "remaining_time": "0:42:33", "throughput": 14027.38, "total_tokens": 13385536}
|
|
{"current_steps": 4255, "total_steps": 15621, "loss": 0.3788, "lr": 1.8245626338516069e-06, "epoch": 0.27238973177133347, "percentage": 27.24, "elapsed_time": "0:15:54", "remaining_time": "0:42:30", "throughput": 14034.6, "total_tokens": 13400832}
|
|
{"current_steps": 4260, "total_steps": 15621, "loss": 0.3397, "lr": 1.823929945033495e-06, "epoch": 0.27270981371231034, "percentage": 27.27, "elapsed_time": "0:15:55", "remaining_time": "0:42:28", "throughput": 14041.76, "total_tokens": 13416000}
|
|
{"current_steps": 4265, "total_steps": 15621, "loss": 0.5015, "lr": 1.8232962275291728e-06, "epoch": 0.2730298956532872, "percentage": 27.3, "elapsed_time": "0:15:56", "remaining_time": "0:42:25", "throughput": 14049.09, "total_tokens": 13431360}
|
|
{"current_steps": 4270, "total_steps": 15621, "loss": 0.4342, "lr": 1.822661482129844e-06, "epoch": 0.2733499775942641, "percentage": 27.33, "elapsed_time": "0:15:56", "remaining_time": "0:42:23", "throughput": 14056.47, "total_tokens": 13446976}
|
|
{"current_steps": 4275, "total_steps": 15621, "loss": 0.3796, "lr": 1.8220257096279956e-06, "epoch": 0.27367005953524104, "percentage": 27.37, "elapsed_time": "0:15:57", "remaining_time": "0:42:20", "throughput": 14064.09, "total_tokens": 13463040}
|
|
{"current_steps": 4280, "total_steps": 15621, "loss": 0.6798, "lr": 1.8213889108173972e-06, "epoch": 0.2739901414762179, "percentage": 27.4, "elapsed_time": "0:15:57", "remaining_time": "0:42:18", "throughput": 14071.47, "total_tokens": 13478656}
|
|
{"current_steps": 4285, "total_steps": 15621, "loss": 0.4843, "lr": 1.8207510864930992e-06, "epoch": 0.2743102234171948, "percentage": 27.43, "elapsed_time": "0:15:58", "remaining_time": "0:42:15", "throughput": 14079.3, "total_tokens": 13495296}
|
|
{"current_steps": 4290, "total_steps": 15621, "loss": 0.5024, "lr": 1.8201122374514336e-06, "epoch": 0.2746303053581717, "percentage": 27.46, "elapsed_time": "0:15:59", "remaining_time": "0:42:13", "throughput": 14086.61, "total_tokens": 13510912}
|
|
{"current_steps": 4295, "total_steps": 15621, "loss": 0.4465, "lr": 1.8194723644900099e-06, "epoch": 0.27495038729914856, "percentage": 27.5, "elapsed_time": "0:15:59", "remaining_time": "0:42:10", "throughput": 14093.53, "total_tokens": 13525952}
|
|
{"current_steps": 4300, "total_steps": 15621, "loss": 0.5334, "lr": 1.8188314684077173e-06, "epoch": 0.2752704692401255, "percentage": 27.53, "elapsed_time": "0:16:00", "remaining_time": "0:42:08", "throughput": 14103.96, "total_tokens": 13546752}
|
|
{"current_steps": 4305, "total_steps": 15621, "loss": 0.5659, "lr": 1.8181895500047226e-06, "epoch": 0.2755905511811024, "percentage": 27.56, "elapsed_time": "0:16:01", "remaining_time": "0:42:06", "throughput": 14110.89, "total_tokens": 13561728}
|
|
{"current_steps": 4310, "total_steps": 15621, "loss": 0.4559, "lr": 1.817546610082468e-06, "epoch": 0.27591063312207925, "percentage": 27.59, "elapsed_time": "0:16:01", "remaining_time": "0:42:03", "throughput": 14118.12, "total_tokens": 13577344}
|
|
{"current_steps": 4315, "total_steps": 15621, "loss": 0.4806, "lr": 1.816902649443672e-06, "epoch": 0.27623071506305613, "percentage": 27.62, "elapsed_time": "0:16:02", "remaining_time": "0:42:01", "throughput": 14124.89, "total_tokens": 13592256}
|
|
{"current_steps": 4320, "total_steps": 15621, "loss": 0.5351, "lr": 1.8162576688923262e-06, "epoch": 0.276550797004033, "percentage": 27.66, "elapsed_time": "0:16:02", "remaining_time": "0:41:58", "throughput": 14132.82, "total_tokens": 13608832}
|
|
{"current_steps": 4325, "total_steps": 15621, "loss": 0.5544, "lr": 1.815611669233697e-06, "epoch": 0.27687087894500995, "percentage": 27.69, "elapsed_time": "0:16:03", "remaining_time": "0:41:56", "throughput": 14139.82, "total_tokens": 13624128}
|
|
{"current_steps": 4330, "total_steps": 15621, "loss": 0.5301, "lr": 1.8149646512743222e-06, "epoch": 0.2771909608859868, "percentage": 27.72, "elapsed_time": "0:16:04", "remaining_time": "0:41:54", "throughput": 14147.52, "total_tokens": 13640576}
|
|
{"current_steps": 4335, "total_steps": 15621, "loss": 0.4513, "lr": 1.8143166158220118e-06, "epoch": 0.2775110428269637, "percentage": 27.75, "elapsed_time": "0:16:04", "remaining_time": "0:41:51", "throughput": 14154.55, "total_tokens": 13655872}
|
|
{"current_steps": 4340, "total_steps": 15621, "loss": 0.6679, "lr": 1.8136675636858454e-06, "epoch": 0.2778311247679406, "percentage": 27.78, "elapsed_time": "0:16:05", "remaining_time": "0:41:49", "throughput": 14162.29, "total_tokens": 13672384}
|
|
{"current_steps": 4345, "total_steps": 15621, "loss": 0.3988, "lr": 1.8130174956761723e-06, "epoch": 0.27815120670891746, "percentage": 27.82, "elapsed_time": "0:16:05", "remaining_time": "0:41:46", "throughput": 14169.09, "total_tokens": 13687296}
|
|
{"current_steps": 4350, "total_steps": 15621, "loss": 0.5363, "lr": 1.81236641260461e-06, "epoch": 0.2784712886498944, "percentage": 27.85, "elapsed_time": "0:16:06", "remaining_time": "0:41:44", "throughput": 14176.06, "total_tokens": 13702528}
|
|
{"current_steps": 4355, "total_steps": 15621, "loss": 0.5002, "lr": 1.811714315284043e-06, "epoch": 0.2787913705908713, "percentage": 27.88, "elapsed_time": "0:16:07", "remaining_time": "0:41:42", "throughput": 14182.86, "total_tokens": 13717568}
|
|
{"current_steps": 4360, "total_steps": 15621, "loss": 0.4016, "lr": 1.8110612045286229e-06, "epoch": 0.27911145253184816, "percentage": 27.91, "elapsed_time": "0:16:07", "remaining_time": "0:41:39", "throughput": 14190.25, "total_tokens": 13733568}
|
|
{"current_steps": 4365, "total_steps": 15621, "loss": 0.3744, "lr": 1.8104070811537661e-06, "epoch": 0.27943153447282504, "percentage": 27.94, "elapsed_time": "0:16:08", "remaining_time": "0:41:37", "throughput": 14197.52, "total_tokens": 13749312}
|
|
{"current_steps": 4370, "total_steps": 15621, "loss": 0.4299, "lr": 1.8097519459761533e-06, "epoch": 0.2797516164138019, "percentage": 27.98, "elapsed_time": "0:16:09", "remaining_time": "0:41:34", "throughput": 14205.22, "total_tokens": 13765952}
|
|
{"current_steps": 4375, "total_steps": 15621, "loss": 0.495, "lr": 1.8090957998137283e-06, "epoch": 0.2800716983547788, "percentage": 28.01, "elapsed_time": "0:16:09", "remaining_time": "0:41:32", "throughput": 14212.22, "total_tokens": 13781440}
|
|
{"current_steps": 4380, "total_steps": 15621, "loss": 0.4471, "lr": 1.8084386434856978e-06, "epoch": 0.28039178029575573, "percentage": 28.04, "elapsed_time": "0:16:10", "remaining_time": "0:41:30", "throughput": 14219.22, "total_tokens": 13796864}
|
|
{"current_steps": 4385, "total_steps": 15621, "loss": 0.4915, "lr": 1.8077804778125283e-06, "epoch": 0.2807118622367326, "percentage": 28.07, "elapsed_time": "0:16:10", "remaining_time": "0:41:27", "throughput": 14226.52, "total_tokens": 13812736}
|
|
{"current_steps": 4390, "total_steps": 15621, "loss": 0.4966, "lr": 1.807121303615948e-06, "epoch": 0.2810319441777095, "percentage": 28.1, "elapsed_time": "0:16:11", "remaining_time": "0:41:25", "throughput": 14233.62, "total_tokens": 13828288}
|
|
{"current_steps": 4395, "total_steps": 15621, "loss": 0.4125, "lr": 1.8064611217189434e-06, "epoch": 0.28135202611868637, "percentage": 28.14, "elapsed_time": "0:16:12", "remaining_time": "0:41:23", "throughput": 14241.78, "total_tokens": 13845568}
|
|
{"current_steps": 4400, "total_steps": 15621, "loss": 0.398, "lr": 1.8057999329457596e-06, "epoch": 0.28167210805966325, "percentage": 28.17, "elapsed_time": "0:16:12", "remaining_time": "0:41:20", "throughput": 14248.62, "total_tokens": 13860608}
|
|
{"current_steps": 4405, "total_steps": 15621, "loss": 0.5663, "lr": 1.8051377381218984e-06, "epoch": 0.2819921900006402, "percentage": 28.2, "elapsed_time": "0:16:13", "remaining_time": "0:41:18", "throughput": 14255.99, "total_tokens": 13876608}
|
|
{"current_steps": 4410, "total_steps": 15621, "loss": 0.5656, "lr": 1.8044745380741177e-06, "epoch": 0.28231227194161707, "percentage": 28.23, "elapsed_time": "0:16:14", "remaining_time": "0:41:16", "throughput": 14263.88, "total_tokens": 13893632}
|
|
{"current_steps": 4415, "total_steps": 15621, "loss": 0.3896, "lr": 1.8038103336304306e-06, "epoch": 0.28263235388259395, "percentage": 28.26, "elapsed_time": "0:16:14", "remaining_time": "0:41:13", "throughput": 14270.95, "total_tokens": 13909312}
|
|
{"current_steps": 4420, "total_steps": 15621, "loss": 0.5699, "lr": 1.8031451256201042e-06, "epoch": 0.2829524358235708, "percentage": 28.3, "elapsed_time": "0:16:15", "remaining_time": "0:41:11", "throughput": 14278.54, "total_tokens": 13925824}
|
|
{"current_steps": 4425, "total_steps": 15621, "loss": 0.5385, "lr": 1.8024789148736589e-06, "epoch": 0.2832725177645477, "percentage": 28.33, "elapsed_time": "0:16:15", "remaining_time": "0:41:09", "throughput": 14286.19, "total_tokens": 13942336}
|
|
{"current_steps": 4430, "total_steps": 15621, "loss": 0.392, "lr": 1.8018117022228655e-06, "epoch": 0.28359259970552464, "percentage": 28.36, "elapsed_time": "0:16:16", "remaining_time": "0:41:06", "throughput": 14293.08, "total_tokens": 13957760}
|
|
{"current_steps": 4435, "total_steps": 15621, "loss": 0.4997, "lr": 1.8011434885007479e-06, "epoch": 0.2839126816465015, "percentage": 28.39, "elapsed_time": "0:16:17", "remaining_time": "0:41:04", "throughput": 14299.94, "total_tokens": 13972992}
|
|
{"current_steps": 4440, "total_steps": 15621, "loss": 0.4308, "lr": 1.8004742745415787e-06, "epoch": 0.2842327635874784, "percentage": 28.42, "elapsed_time": "0:16:17", "remaining_time": "0:41:02", "throughput": 14307.03, "total_tokens": 13988736}
|
|
{"current_steps": 4445, "total_steps": 15621, "loss": 0.5427, "lr": 1.799804061180879e-06, "epoch": 0.2845528455284553, "percentage": 28.46, "elapsed_time": "0:16:18", "remaining_time": "0:40:59", "throughput": 14313.44, "total_tokens": 14003520}
|
|
{"current_steps": 4450, "total_steps": 15621, "loss": 0.518, "lr": 1.799132849255418e-06, "epoch": 0.28487292746943216, "percentage": 28.49, "elapsed_time": "0:16:19", "remaining_time": "0:40:57", "throughput": 14321.32, "total_tokens": 14020608}
|
|
{"current_steps": 4455, "total_steps": 15621, "loss": 0.4011, "lr": 1.798460639603212e-06, "epoch": 0.28519300941040904, "percentage": 28.52, "elapsed_time": "0:16:19", "remaining_time": "0:40:55", "throughput": 14327.73, "total_tokens": 14035328}
|
|
{"current_steps": 4460, "total_steps": 15621, "loss": 0.4805, "lr": 1.7977874330635224e-06, "epoch": 0.285513091351386, "percentage": 28.55, "elapsed_time": "0:16:20", "remaining_time": "0:40:52", "throughput": 14334.57, "total_tokens": 14050816}
|
|
{"current_steps": 4465, "total_steps": 15621, "loss": 0.3289, "lr": 1.7971132304768555e-06, "epoch": 0.28583317329236285, "percentage": 28.58, "elapsed_time": "0:16:20", "remaining_time": "0:40:50", "throughput": 14341.77, "total_tokens": 14066880}
|
|
{"current_steps": 4470, "total_steps": 15621, "loss": 0.4937, "lr": 1.7964380326849612e-06, "epoch": 0.28615325523333973, "percentage": 28.62, "elapsed_time": "0:16:21", "remaining_time": "0:40:48", "throughput": 14348.32, "total_tokens": 14081728}
|
|
{"current_steps": 4475, "total_steps": 15621, "loss": 0.4941, "lr": 1.795761840530832e-06, "epoch": 0.2864733371743166, "percentage": 28.65, "elapsed_time": "0:16:22", "remaining_time": "0:40:46", "throughput": 14355.74, "total_tokens": 14097984}
|
|
{"current_steps": 4480, "total_steps": 15621, "loss": 0.4208, "lr": 1.7950846548587015e-06, "epoch": 0.2867934191152935, "percentage": 28.68, "elapsed_time": "0:16:22", "remaining_time": "0:40:43", "throughput": 14363.79, "total_tokens": 14115264}
|
|
{"current_steps": 4485, "total_steps": 15621, "loss": 0.2799, "lr": 1.7944064765140445e-06, "epoch": 0.2871135010562704, "percentage": 28.71, "elapsed_time": "0:16:23", "remaining_time": "0:40:41", "throughput": 14369.93, "total_tokens": 14129472}
|
|
{"current_steps": 4490, "total_steps": 15621, "loss": 0.55, "lr": 1.7937273063435735e-06, "epoch": 0.2874335829972473, "percentage": 28.74, "elapsed_time": "0:16:23", "remaining_time": "0:40:39", "throughput": 14376.78, "total_tokens": 14144896}
|
|
{"current_steps": 4495, "total_steps": 15621, "loss": 0.3622, "lr": 1.7930471451952416e-06, "epoch": 0.2877536649382242, "percentage": 28.78, "elapsed_time": "0:16:24", "remaining_time": "0:40:36", "throughput": 14383.29, "total_tokens": 14159744}
|
|
{"current_steps": 4500, "total_steps": 15621, "loss": 0.4915, "lr": 1.7923659939182377e-06, "epoch": 0.28807374687920106, "percentage": 28.81, "elapsed_time": "0:16:25", "remaining_time": "0:40:34", "throughput": 14390.92, "total_tokens": 14176384}
|
|
{"current_steps": 4505, "total_steps": 15621, "loss": 0.5376, "lr": 1.7916838533629866e-06, "epoch": 0.28839382882017794, "percentage": 28.84, "elapsed_time": "0:16:25", "remaining_time": "0:40:32", "throughput": 14398.08, "total_tokens": 14192320}
|
|
{"current_steps": 4510, "total_steps": 15621, "loss": 0.397, "lr": 1.7910007243811493e-06, "epoch": 0.2887139107611549, "percentage": 28.87, "elapsed_time": "0:16:26", "remaining_time": "0:40:29", "throughput": 14405.13, "total_tokens": 14208192}
|
|
{"current_steps": 4515, "total_steps": 15621, "loss": 0.5486, "lr": 1.7903166078256202e-06, "epoch": 0.28903399270213176, "percentage": 28.9, "elapsed_time": "0:16:26", "remaining_time": "0:40:27", "throughput": 14411.64, "total_tokens": 14223104}
|
|
{"current_steps": 4520, "total_steps": 15621, "loss": 0.4153, "lr": 1.789631504550527e-06, "epoch": 0.28935407464310864, "percentage": 28.94, "elapsed_time": "0:16:27", "remaining_time": "0:40:25", "throughput": 14418.45, "total_tokens": 14238464}
|
|
{"current_steps": 4525, "total_steps": 15621, "loss": 0.384, "lr": 1.7889454154112288e-06, "epoch": 0.2896741565840855, "percentage": 28.97, "elapsed_time": "0:16:28", "remaining_time": "0:40:23", "throughput": 14425.65, "total_tokens": 14254656}
|
|
{"current_steps": 4530, "total_steps": 15621, "loss": 0.3983, "lr": 1.7882583412643167e-06, "epoch": 0.2899942385250624, "percentage": 29.0, "elapsed_time": "0:16:28", "remaining_time": "0:40:20", "throughput": 14431.8, "total_tokens": 14268928}
|
|
{"current_steps": 4535, "total_steps": 15621, "loss": 0.4326, "lr": 1.78757028296761e-06, "epoch": 0.29031432046603933, "percentage": 29.03, "elapsed_time": "0:16:29", "remaining_time": "0:40:18", "throughput": 14439.47, "total_tokens": 14285952}
|
|
{"current_steps": 4540, "total_steps": 15621, "loss": 0.3522, "lr": 1.7868812413801582e-06, "epoch": 0.2906344024070162, "percentage": 29.06, "elapsed_time": "0:16:29", "remaining_time": "0:40:16", "throughput": 14446.55, "total_tokens": 14301760}
|
|
{"current_steps": 4545, "total_steps": 15621, "loss": 0.4976, "lr": 1.7861912173622372e-06, "epoch": 0.2909544843479931, "percentage": 29.1, "elapsed_time": "0:16:30", "remaining_time": "0:40:14", "throughput": 14453.88, "total_tokens": 14318208}
|
|
{"current_steps": 4550, "total_steps": 15621, "loss": 0.4597, "lr": 1.7855002117753504e-06, "epoch": 0.29127456628896997, "percentage": 29.13, "elapsed_time": "0:16:31", "remaining_time": "0:40:11", "throughput": 14460.97, "total_tokens": 14334144}
|
|
{"current_steps": 4555, "total_steps": 15621, "loss": 0.5283, "lr": 1.7848082254822266e-06, "epoch": 0.29159464822994685, "percentage": 29.16, "elapsed_time": "0:16:31", "remaining_time": "0:40:09", "throughput": 14467.45, "total_tokens": 14349120}
|
|
{"current_steps": 4560, "total_steps": 15621, "loss": 0.4868, "lr": 1.7841152593468185e-06, "epoch": 0.29191473017092373, "percentage": 29.19, "elapsed_time": "0:16:32", "remaining_time": "0:40:07", "throughput": 14474.74, "total_tokens": 14365376}
|
|
{"current_steps": 4565, "total_steps": 15621, "loss": 0.4582, "lr": 1.7834213142343026e-06, "epoch": 0.29223481211190067, "percentage": 29.22, "elapsed_time": "0:16:33", "remaining_time": "0:40:05", "throughput": 14481.79, "total_tokens": 14381568}
|
|
{"current_steps": 4570, "total_steps": 15621, "loss": 0.4626, "lr": 1.7827263910110777e-06, "epoch": 0.29255489405287755, "percentage": 29.26, "elapsed_time": "0:16:33", "remaining_time": "0:40:02", "throughput": 14488.67, "total_tokens": 14397312}
|
|
{"current_steps": 4575, "total_steps": 15621, "loss": 0.4372, "lr": 1.7820304905447632e-06, "epoch": 0.2928749759938544, "percentage": 29.29, "elapsed_time": "0:16:34", "remaining_time": "0:40:00", "throughput": 14495.51, "total_tokens": 14412928}
|
|
{"current_steps": 4580, "total_steps": 15621, "loss": 0.446, "lr": 1.7813336137041991e-06, "epoch": 0.2931950579348313, "percentage": 29.32, "elapsed_time": "0:16:34", "remaining_time": "0:39:58", "throughput": 14501.93, "total_tokens": 14427968}
|
|
{"current_steps": 4585, "total_steps": 15621, "loss": 0.3693, "lr": 1.7806357613594447e-06, "epoch": 0.2935151398758082, "percentage": 29.35, "elapsed_time": "0:16:35", "remaining_time": "0:39:56", "throughput": 14508.39, "total_tokens": 14442944}
|
|
{"current_steps": 4590, "total_steps": 15621, "loss": 0.4481, "lr": 1.7799369343817764e-06, "epoch": 0.2938352218167851, "percentage": 29.38, "elapsed_time": "0:16:36", "remaining_time": "0:39:53", "throughput": 14515.1, "total_tokens": 14458176}
|
|
{"current_steps": 4595, "total_steps": 15621, "loss": 0.3566, "lr": 1.7792371336436883e-06, "epoch": 0.294155303757762, "percentage": 29.42, "elapsed_time": "0:16:36", "remaining_time": "0:39:51", "throughput": 14521.76, "total_tokens": 14473600}
|
|
{"current_steps": 4600, "total_steps": 15621, "loss": 0.6518, "lr": 1.7785363600188892e-06, "epoch": 0.2944753856987389, "percentage": 29.45, "elapsed_time": "0:16:37", "remaining_time": "0:39:49", "throughput": 14528.34, "total_tokens": 14488896}
|
|
{"current_steps": 4605, "total_steps": 15621, "loss": 0.5881, "lr": 1.7778346143823038e-06, "epoch": 0.29479546763971576, "percentage": 29.48, "elapsed_time": "0:16:37", "remaining_time": "0:39:47", "throughput": 14534.09, "total_tokens": 14502784}
|
|
{"current_steps": 4610, "total_steps": 15621, "loss": 0.4293, "lr": 1.7771318976100696e-06, "epoch": 0.29511554958069264, "percentage": 29.51, "elapsed_time": "0:16:38", "remaining_time": "0:39:44", "throughput": 14541.86, "total_tokens": 14520000}
|
|
{"current_steps": 4615, "total_steps": 15621, "loss": 0.3401, "lr": 1.7764282105795364e-06, "epoch": 0.2954356315216696, "percentage": 29.54, "elapsed_time": "0:16:39", "remaining_time": "0:39:42", "throughput": 14548.99, "total_tokens": 14536320}
|
|
{"current_steps": 4620, "total_steps": 15621, "loss": 0.4524, "lr": 1.7757235541692663e-06, "epoch": 0.29575571346264645, "percentage": 29.58, "elapsed_time": "0:16:39", "remaining_time": "0:39:40", "throughput": 14555.72, "total_tokens": 14551808}
|
|
{"current_steps": 4625, "total_steps": 15621, "loss": 0.3157, "lr": 1.7750179292590306e-06, "epoch": 0.29607579540362333, "percentage": 29.61, "elapsed_time": "0:16:40", "remaining_time": "0:39:38", "throughput": 14562.19, "total_tokens": 14566976}
|
|
{"current_steps": 4630, "total_steps": 15621, "loss": 0.3475, "lr": 1.7743113367298107e-06, "epoch": 0.2963958773446002, "percentage": 29.64, "elapsed_time": "0:16:40", "remaining_time": "0:39:36", "throughput": 14569.18, "total_tokens": 14583104}
|
|
{"current_steps": 4635, "total_steps": 15621, "loss": 0.4454, "lr": 1.7736037774637955e-06, "epoch": 0.2967159592855771, "percentage": 29.67, "elapsed_time": "0:16:41", "remaining_time": "0:39:33", "throughput": 14575.75, "total_tokens": 14598336}
|
|
{"current_steps": 4640, "total_steps": 15621, "loss": 0.5142, "lr": 1.772895252344381e-06, "epoch": 0.29703604122655397, "percentage": 29.7, "elapsed_time": "0:16:42", "remaining_time": "0:39:31", "throughput": 14583.28, "total_tokens": 14615232}
|
|
{"current_steps": 4645, "total_steps": 15621, "loss": 0.3932, "lr": 1.7721857622561692e-06, "epoch": 0.2973561231675309, "percentage": 29.74, "elapsed_time": "0:16:42", "remaining_time": "0:39:29", "throughput": 14590.02, "total_tokens": 14630848}
|
|
{"current_steps": 4650, "total_steps": 15621, "loss": 0.4601, "lr": 1.7714753080849664e-06, "epoch": 0.2976762051085078, "percentage": 29.77, "elapsed_time": "0:16:43", "remaining_time": "0:39:27", "throughput": 14597.09, "total_tokens": 14647040}
|
|
{"current_steps": 4655, "total_steps": 15621, "loss": 0.4116, "lr": 1.7707638907177837e-06, "epoch": 0.29799628704948466, "percentage": 29.8, "elapsed_time": "0:16:44", "remaining_time": "0:39:25", "throughput": 14603.36, "total_tokens": 14661888}
|
|
{"current_steps": 4660, "total_steps": 15621, "loss": 0.7093, "lr": 1.7700515110428336e-06, "epoch": 0.29831636899046154, "percentage": 29.83, "elapsed_time": "0:16:44", "remaining_time": "0:39:23", "throughput": 14610.19, "total_tokens": 14677696}
|
|
{"current_steps": 4665, "total_steps": 15621, "loss": 0.4799, "lr": 1.7693381699495307e-06, "epoch": 0.2986364509314384, "percentage": 29.86, "elapsed_time": "0:16:45", "remaining_time": "0:39:20", "throughput": 14616.9, "total_tokens": 14693184}
|
|
{"current_steps": 4670, "total_steps": 15621, "loss": 0.3643, "lr": 1.7686238683284894e-06, "epoch": 0.29895653287241536, "percentage": 29.9, "elapsed_time": "0:16:45", "remaining_time": "0:39:18", "throughput": 14623.13, "total_tokens": 14707904}
|
|
{"current_steps": 4675, "total_steps": 15621, "loss": 0.3608, "lr": 1.7679086070715237e-06, "epoch": 0.29927661481339224, "percentage": 29.93, "elapsed_time": "0:16:46", "remaining_time": "0:39:16", "throughput": 14630.15, "total_tokens": 14724096}
|
|
{"current_steps": 4680, "total_steps": 15621, "loss": 0.4544, "lr": 1.7671923870716459e-06, "epoch": 0.2995966967543691, "percentage": 29.96, "elapsed_time": "0:16:47", "remaining_time": "0:39:14", "throughput": 14636.22, "total_tokens": 14738752}
|
|
{"current_steps": 4685, "total_steps": 15621, "loss": 0.3486, "lr": 1.7664752092230652e-06, "epoch": 0.299916778695346, "percentage": 29.99, "elapsed_time": "0:16:47", "remaining_time": "0:39:11", "throughput": 14642.49, "total_tokens": 14753664}
|
|
{"current_steps": 4690, "total_steps": 15621, "loss": 0.3784, "lr": 1.7657570744211863e-06, "epoch": 0.3002368606363229, "percentage": 30.02, "elapsed_time": "0:16:48", "remaining_time": "0:39:09", "throughput": 14649.14, "total_tokens": 14769152}
|
|
{"current_steps": 4692, "total_steps": 15621, "eval_loss": 0.4629112482070923, "epoch": 0.30036489341271366, "percentage": 30.04, "elapsed_time": "0:17:37", "remaining_time": "0:41:03", "throughput": 13970.66, "total_tokens": 14775488}
|
|
{"current_steps": 4695, "total_steps": 15621, "loss": 0.5028, "lr": 1.765037983562609e-06, "epoch": 0.3005569425772998, "percentage": 30.06, "elapsed_time": "0:18:32", "remaining_time": "0:43:07", "throughput": 13294.29, "total_tokens": 14784128}
|
|
{"current_steps": 4700, "total_steps": 15621, "loss": 0.4459, "lr": 1.7643179375451264e-06, "epoch": 0.3008770245182767, "percentage": 30.09, "elapsed_time": "0:18:32", "remaining_time": "0:43:05", "throughput": 13301.25, "total_tokens": 14799936}
|
|
{"current_steps": 4705, "total_steps": 15621, "loss": 0.6083, "lr": 1.7635969372677252e-06, "epoch": 0.30119710645925357, "percentage": 30.12, "elapsed_time": "0:18:33", "remaining_time": "0:43:02", "throughput": 13307.22, "total_tokens": 14814208}
|
|
{"current_steps": 4710, "total_steps": 15621, "loss": 0.483, "lr": 1.7628749836305818e-06, "epoch": 0.30151718840023045, "percentage": 30.15, "elapsed_time": "0:18:33", "remaining_time": "0:43:00", "throughput": 13313.77, "total_tokens": 14829504}
|
|
{"current_steps": 4715, "total_steps": 15621, "loss": 0.3949, "lr": 1.7621520775350645e-06, "epoch": 0.30183727034120733, "percentage": 30.18, "elapsed_time": "0:18:34", "remaining_time": "0:42:57", "throughput": 13319.92, "total_tokens": 14843968}
|
|
{"current_steps": 4720, "total_steps": 15621, "loss": 0.4567, "lr": 1.7614282198837293e-06, "epoch": 0.30215735228218427, "percentage": 30.22, "elapsed_time": "0:18:35", "remaining_time": "0:42:55", "throughput": 13326.9, "total_tokens": 14859840}
|
|
{"current_steps": 4725, "total_steps": 15621, "loss": 0.473, "lr": 1.7607034115803219e-06, "epoch": 0.30247743422316115, "percentage": 30.25, "elapsed_time": "0:18:35", "remaining_time": "0:42:52", "throughput": 13333.65, "total_tokens": 14875648}
|
|
{"current_steps": 4730, "total_steps": 15621, "loss": 0.4192, "lr": 1.7599776535297734e-06, "epoch": 0.302797516164138, "percentage": 30.28, "elapsed_time": "0:18:36", "remaining_time": "0:42:50", "throughput": 13339.91, "total_tokens": 14890560}
|
|
{"current_steps": 4735, "total_steps": 15621, "loss": 0.4702, "lr": 1.7592509466382012e-06, "epoch": 0.3031175981051149, "percentage": 30.31, "elapsed_time": "0:18:36", "remaining_time": "0:42:47", "throughput": 13346.91, "total_tokens": 14906688}
|
|
{"current_steps": 4740, "total_steps": 15621, "loss": 0.5561, "lr": 1.7585232918129076e-06, "epoch": 0.3034376800460918, "percentage": 30.34, "elapsed_time": "0:18:37", "remaining_time": "0:42:45", "throughput": 13353.7, "total_tokens": 14922496}
|
|
{"current_steps": 4745, "total_steps": 15621, "loss": 0.4601, "lr": 1.757794689962378e-06, "epoch": 0.30375776198706866, "percentage": 30.38, "elapsed_time": "0:18:38", "remaining_time": "0:42:42", "throughput": 13360.71, "total_tokens": 14938880}
|
|
{"current_steps": 4750, "total_steps": 15621, "loss": 0.4968, "lr": 1.7570651419962807e-06, "epoch": 0.3040778439280456, "percentage": 30.41, "elapsed_time": "0:18:38", "remaining_time": "0:42:40", "throughput": 13367.11, "total_tokens": 14954112}
|
|
{"current_steps": 4755, "total_steps": 15621, "loss": 0.448, "lr": 1.7563346488254647e-06, "epoch": 0.3043979258690225, "percentage": 30.44, "elapsed_time": "0:18:39", "remaining_time": "0:42:37", "throughput": 13373.69, "total_tokens": 14969536}
|
|
{"current_steps": 4760, "total_steps": 15621, "loss": 0.3373, "lr": 1.755603211361959e-06, "epoch": 0.30471800780999936, "percentage": 30.47, "elapsed_time": "0:18:39", "remaining_time": "0:42:35", "throughput": 13380.63, "total_tokens": 14985728}
|
|
{"current_steps": 4765, "total_steps": 15621, "loss": 0.452, "lr": 1.7548708305189722e-06, "epoch": 0.30503808975097624, "percentage": 30.5, "elapsed_time": "0:18:40", "remaining_time": "0:42:33", "throughput": 13388.73, "total_tokens": 15003904}
|
|
{"current_steps": 4770, "total_steps": 15621, "loss": 0.5662, "lr": 1.7541375072108905e-06, "epoch": 0.3053581716919531, "percentage": 30.54, "elapsed_time": "0:18:41", "remaining_time": "0:42:30", "throughput": 13395.27, "total_tokens": 15019328}
|
|
{"current_steps": 4775, "total_steps": 15621, "loss": 0.4597, "lr": 1.7534032423532766e-06, "epoch": 0.30567825363293005, "percentage": 30.57, "elapsed_time": "0:18:41", "remaining_time": "0:42:28", "throughput": 13401.28, "total_tokens": 15033856}
|
|
{"current_steps": 4780, "total_steps": 15621, "loss": 0.3603, "lr": 1.7526680368628685e-06, "epoch": 0.30599833557390693, "percentage": 30.6, "elapsed_time": "0:18:42", "remaining_time": "0:42:25", "throughput": 13408.84, "total_tokens": 15051200}
|
|
{"current_steps": 4785, "total_steps": 15621, "loss": 0.4471, "lr": 1.751931891657579e-06, "epoch": 0.3063184175148838, "percentage": 30.63, "elapsed_time": "0:18:43", "remaining_time": "0:42:23", "throughput": 13415.23, "total_tokens": 15066368}
|
|
{"current_steps": 4790, "total_steps": 15621, "loss": 0.3494, "lr": 1.7511948076564943e-06, "epoch": 0.3066384994558607, "percentage": 30.66, "elapsed_time": "0:18:43", "remaining_time": "0:42:20", "throughput": 13421.6, "total_tokens": 15081600}
|
|
{"current_steps": 4795, "total_steps": 15621, "loss": 0.5308, "lr": 1.7504567857798722e-06, "epoch": 0.30695858139683757, "percentage": 30.7, "elapsed_time": "0:18:44", "remaining_time": "0:42:18", "throughput": 13428.35, "total_tokens": 15097536}
|
|
{"current_steps": 4800, "total_steps": 15621, "loss": 0.5013, "lr": 1.7497178269491417e-06, "epoch": 0.3072786633378145, "percentage": 30.73, "elapsed_time": "0:18:44", "remaining_time": "0:42:16", "throughput": 13435.28, "total_tokens": 15113728}
|
|
{"current_steps": 4805, "total_steps": 15621, "loss": 0.5561, "lr": 1.7489779320869014e-06, "epoch": 0.3075987452787914, "percentage": 30.76, "elapsed_time": "0:18:45", "remaining_time": "0:42:13", "throughput": 13442.26, "total_tokens": 15130048}
|
|
{"current_steps": 4810, "total_steps": 15621, "loss": 0.3673, "lr": 1.7482371021169193e-06, "epoch": 0.30791882721976827, "percentage": 30.79, "elapsed_time": "0:18:46", "remaining_time": "0:42:11", "throughput": 13448.84, "total_tokens": 15145600}
|
|
{"current_steps": 4815, "total_steps": 15621, "loss": 0.3935, "lr": 1.7474953379641297e-06, "epoch": 0.30823890916074514, "percentage": 30.82, "elapsed_time": "0:18:46", "remaining_time": "0:42:08", "throughput": 13456.05, "total_tokens": 15162368}
|
|
{"current_steps": 4820, "total_steps": 15621, "loss": 0.4323, "lr": 1.746752640554634e-06, "epoch": 0.308558991101722, "percentage": 30.86, "elapsed_time": "0:18:47", "remaining_time": "0:42:06", "throughput": 13462.86, "total_tokens": 15178368}
|
|
{"current_steps": 4825, "total_steps": 15621, "loss": 0.5467, "lr": 1.7460090108156988e-06, "epoch": 0.3088790730426989, "percentage": 30.89, "elapsed_time": "0:18:48", "remaining_time": "0:42:03", "throughput": 13469.18, "total_tokens": 15193408}
|
|
{"current_steps": 4830, "total_steps": 15621, "loss": 0.3081, "lr": 1.7452644496757548e-06, "epoch": 0.30919915498367584, "percentage": 30.92, "elapsed_time": "0:18:48", "remaining_time": "0:42:01", "throughput": 13475.54, "total_tokens": 15208640}
|
|
{"current_steps": 4835, "total_steps": 15621, "loss": 0.4533, "lr": 1.7445189580643946e-06, "epoch": 0.3095192369246527, "percentage": 30.95, "elapsed_time": "0:18:49", "remaining_time": "0:41:59", "throughput": 13482.16, "total_tokens": 15224192}
|
|
{"current_steps": 4840, "total_steps": 15621, "loss": 0.5119, "lr": 1.7437725369123737e-06, "epoch": 0.3098393188656296, "percentage": 30.98, "elapsed_time": "0:18:49", "remaining_time": "0:41:56", "throughput": 13488.59, "total_tokens": 15239616}
|
|
{"current_steps": 4845, "total_steps": 15621, "loss": 0.4595, "lr": 1.7430251871516077e-06, "epoch": 0.3101594008066065, "percentage": 31.02, "elapsed_time": "0:18:50", "remaining_time": "0:41:54", "throughput": 13495.39, "total_tokens": 15255680}
|
|
{"current_steps": 4850, "total_steps": 15621, "loss": 0.4886, "lr": 1.7422769097151715e-06, "epoch": 0.31047948274758336, "percentage": 31.05, "elapsed_time": "0:18:51", "remaining_time": "0:41:51", "throughput": 13501.91, "total_tokens": 15271232}
|
|
{"current_steps": 4855, "total_steps": 15621, "loss": 0.4938, "lr": 1.7415277055372982e-06, "epoch": 0.3107995646885603, "percentage": 31.08, "elapsed_time": "0:18:51", "remaining_time": "0:41:49", "throughput": 13508.5, "total_tokens": 15287040}
|
|
{"current_steps": 4860, "total_steps": 15621, "loss": 0.5025, "lr": 1.7407775755533778e-06, "epoch": 0.31111964662953717, "percentage": 31.11, "elapsed_time": "0:18:52", "remaining_time": "0:41:47", "throughput": 13515.9, "total_tokens": 15304256}
|
|
{"current_steps": 4865, "total_steps": 15621, "loss": 0.3567, "lr": 1.7400265206999568e-06, "epoch": 0.31143972857051405, "percentage": 31.14, "elapsed_time": "0:18:52", "remaining_time": "0:41:44", "throughput": 13523.61, "total_tokens": 15322112}
|
|
{"current_steps": 4870, "total_steps": 15621, "loss": 0.5436, "lr": 1.7392745419147362e-06, "epoch": 0.31175981051149093, "percentage": 31.18, "elapsed_time": "0:18:53", "remaining_time": "0:41:42", "throughput": 13529.87, "total_tokens": 15337216}
|
|
{"current_steps": 4875, "total_steps": 15621, "loss": 0.4521, "lr": 1.7385216401365693e-06, "epoch": 0.3120798924524678, "percentage": 31.21, "elapsed_time": "0:18:54", "remaining_time": "0:41:40", "throughput": 13536.98, "total_tokens": 15354048}
|
|
{"current_steps": 4880, "total_steps": 15621, "loss": 0.4933, "lr": 1.7377678163054638e-06, "epoch": 0.31239997439344475, "percentage": 31.24, "elapsed_time": "0:18:54", "remaining_time": "0:41:37", "throughput": 13543.24, "total_tokens": 15369344}
|
|
{"current_steps": 4885, "total_steps": 15621, "loss": 0.4949, "lr": 1.7370130713625775e-06, "epoch": 0.3127200563344216, "percentage": 31.27, "elapsed_time": "0:18:55", "remaining_time": "0:41:35", "throughput": 13550.26, "total_tokens": 15385920}
|
|
{"current_steps": 4890, "total_steps": 15621, "loss": 0.3867, "lr": 1.736257406250218e-06, "epoch": 0.3130401382753985, "percentage": 31.3, "elapsed_time": "0:18:56", "remaining_time": "0:41:33", "throughput": 13556.71, "total_tokens": 15401536}
|
|
{"current_steps": 4895, "total_steps": 15621, "loss": 0.4501, "lr": 1.735500821911842e-06, "epoch": 0.3133602202163754, "percentage": 31.34, "elapsed_time": "0:18:56", "remaining_time": "0:41:30", "throughput": 13563.15, "total_tokens": 15417152}
|
|
{"current_steps": 4900, "total_steps": 15621, "loss": 0.4949, "lr": 1.7347433192920544e-06, "epoch": 0.31368030215735226, "percentage": 31.37, "elapsed_time": "0:18:57", "remaining_time": "0:41:28", "throughput": 13569.13, "total_tokens": 15431872}
|
|
{"current_steps": 4905, "total_steps": 15621, "loss": 0.4021, "lr": 1.7339848993366056e-06, "epoch": 0.3140003840983292, "percentage": 31.4, "elapsed_time": "0:18:57", "remaining_time": "0:41:25", "throughput": 13575.7, "total_tokens": 15447552}
|
|
{"current_steps": 4910, "total_steps": 15621, "loss": 0.4615, "lr": 1.7332255629923922e-06, "epoch": 0.3143204660393061, "percentage": 31.43, "elapsed_time": "0:18:58", "remaining_time": "0:41:23", "throughput": 13582.78, "total_tokens": 15464384}
|
|
{"current_steps": 4915, "total_steps": 15621, "loss": 0.4968, "lr": 1.732465311207454e-06, "epoch": 0.31464054798028296, "percentage": 31.46, "elapsed_time": "0:18:59", "remaining_time": "0:41:21", "throughput": 13589.02, "total_tokens": 15479808}
|
|
{"current_steps": 4920, "total_steps": 15621, "loss": 0.4973, "lr": 1.731704144930975e-06, "epoch": 0.31496062992125984, "percentage": 31.5, "elapsed_time": "0:18:59", "remaining_time": "0:41:19", "throughput": 13595.97, "total_tokens": 15496512}
|
|
{"current_steps": 4925, "total_steps": 15621, "loss": 0.4094, "lr": 1.7309420651132797e-06, "epoch": 0.3152807118622367, "percentage": 31.53, "elapsed_time": "0:19:00", "remaining_time": "0:41:16", "throughput": 13602.79, "total_tokens": 15512896}
|
|
{"current_steps": 4930, "total_steps": 15621, "loss": 0.3234, "lr": 1.7301790727058343e-06, "epoch": 0.3156007938032136, "percentage": 31.56, "elapsed_time": "0:19:01", "remaining_time": "0:41:14", "throughput": 13608.97, "total_tokens": 15528064}
|
|
{"current_steps": 4935, "total_steps": 15621, "loss": 0.3618, "lr": 1.7294151686612431e-06, "epoch": 0.31592087574419053, "percentage": 31.59, "elapsed_time": "0:19:01", "remaining_time": "0:41:12", "throughput": 13615.23, "total_tokens": 15543424}
|
|
{"current_steps": 4940, "total_steps": 15621, "loss": 0.5609, "lr": 1.7286503539332495e-06, "epoch": 0.3162409576851674, "percentage": 31.62, "elapsed_time": "0:19:02", "remaining_time": "0:41:09", "throughput": 13622.14, "total_tokens": 15560192}
|
|
{"current_steps": 4945, "total_steps": 15621, "loss": 0.3968, "lr": 1.7278846294767337e-06, "epoch": 0.3165610396261443, "percentage": 31.66, "elapsed_time": "0:19:02", "remaining_time": "0:41:07", "throughput": 13628.6, "total_tokens": 15576128}
|
|
{"current_steps": 4950, "total_steps": 15621, "loss": 0.7032, "lr": 1.7271179962477118e-06, "epoch": 0.31688112156712117, "percentage": 31.69, "elapsed_time": "0:19:03", "remaining_time": "0:41:05", "throughput": 13635.39, "total_tokens": 15592576}
|
|
{"current_steps": 4955, "total_steps": 15621, "loss": 0.4261, "lr": 1.7263504552033341e-06, "epoch": 0.31720120350809805, "percentage": 31.72, "elapsed_time": "0:19:04", "remaining_time": "0:41:02", "throughput": 13641.48, "total_tokens": 15607744}
|
|
{"current_steps": 4960, "total_steps": 15621, "loss": 0.4846, "lr": 1.725582007301885e-06, "epoch": 0.317521285449075, "percentage": 31.75, "elapsed_time": "0:19:04", "remaining_time": "0:41:00", "throughput": 13647.82, "total_tokens": 15623360}
|
|
{"current_steps": 4965, "total_steps": 15621, "loss": 0.4213, "lr": 1.7248126535027806e-06, "epoch": 0.31784136739005187, "percentage": 31.78, "elapsed_time": "0:19:05", "remaining_time": "0:40:58", "throughput": 13654.08, "total_tokens": 15638656}
|
|
{"current_steps": 4970, "total_steps": 15621, "loss": 0.4632, "lr": 1.7240423947665678e-06, "epoch": 0.31816144933102875, "percentage": 31.82, "elapsed_time": "0:19:05", "remaining_time": "0:40:55", "throughput": 13660.48, "total_tokens": 15654400}
|
|
{"current_steps": 4975, "total_steps": 15621, "loss": 0.3822, "lr": 1.723271232054924e-06, "epoch": 0.3184815312720056, "percentage": 31.85, "elapsed_time": "0:19:06", "remaining_time": "0:40:53", "throughput": 13666.78, "total_tokens": 15670016}
|
|
{"current_steps": 4980, "total_steps": 15621, "loss": 0.4977, "lr": 1.722499166330655e-06, "epoch": 0.3188016132129825, "percentage": 31.88, "elapsed_time": "0:19:07", "remaining_time": "0:40:51", "throughput": 13673.46, "total_tokens": 15686208}
|
|
{"current_steps": 4985, "total_steps": 15621, "loss": 0.44, "lr": 1.7217261985576936e-06, "epoch": 0.31912169515395944, "percentage": 31.91, "elapsed_time": "0:19:07", "remaining_time": "0:40:49", "throughput": 13680.14, "total_tokens": 15702592}
|
|
{"current_steps": 4990, "total_steps": 15621, "loss": 0.5176, "lr": 1.7209523297010992e-06, "epoch": 0.3194417770949363, "percentage": 31.94, "elapsed_time": "0:19:08", "remaining_time": "0:40:46", "throughput": 13686.17, "total_tokens": 15717696}
|
|
{"current_steps": 4995, "total_steps": 15621, "loss": 0.4644, "lr": 1.7201775607270564e-06, "epoch": 0.3197618590359132, "percentage": 31.98, "elapsed_time": "0:19:09", "remaining_time": "0:40:44", "throughput": 13692.38, "total_tokens": 15733184}
|
|
{"current_steps": 5000, "total_steps": 15621, "loss": 0.5267, "lr": 1.7194018926028733e-06, "epoch": 0.3200819409768901, "percentage": 32.01, "elapsed_time": "0:19:09", "remaining_time": "0:40:42", "throughput": 13699.14, "total_tokens": 15749888}
|
|
{"current_steps": 5005, "total_steps": 15621, "loss": 0.3621, "lr": 1.7186253262969803e-06, "epoch": 0.32040202291786696, "percentage": 32.04, "elapsed_time": "0:19:10", "remaining_time": "0:40:40", "throughput": 13706.86, "total_tokens": 15768384}
|
|
{"current_steps": 5010, "total_steps": 15621, "loss": 0.3269, "lr": 1.7178478627789299e-06, "epoch": 0.32072210485884384, "percentage": 32.07, "elapsed_time": "0:19:11", "remaining_time": "0:40:37", "throughput": 13713.33, "total_tokens": 15784448}
|
|
{"current_steps": 5015, "total_steps": 15621, "loss": 0.4088, "lr": 1.7170695030193944e-06, "epoch": 0.3210421867998208, "percentage": 32.1, "elapsed_time": "0:19:11", "remaining_time": "0:40:35", "throughput": 13719.76, "total_tokens": 15800512}
|
|
{"current_steps": 5020, "total_steps": 15621, "loss": 0.4744, "lr": 1.716290247990165e-06, "epoch": 0.32136226874079765, "percentage": 32.14, "elapsed_time": "0:19:12", "remaining_time": "0:40:33", "throughput": 13725.72, "total_tokens": 15815680}
|
|
{"current_steps": 5025, "total_steps": 15621, "loss": 0.3939, "lr": 1.715510098664151e-06, "epoch": 0.32168235068177453, "percentage": 32.17, "elapsed_time": "0:19:12", "remaining_time": "0:40:30", "throughput": 13731.52, "total_tokens": 15830528}
|
|
{"current_steps": 5030, "total_steps": 15621, "loss": 0.4933, "lr": 1.7147290560153777e-06, "epoch": 0.3220024326227514, "percentage": 32.2, "elapsed_time": "0:19:13", "remaining_time": "0:40:28", "throughput": 13737.4, "total_tokens": 15845568}
|
|
{"current_steps": 5035, "total_steps": 15621, "loss": 0.4531, "lr": 1.7139471210189862e-06, "epoch": 0.3223225145637283, "percentage": 32.23, "elapsed_time": "0:19:14", "remaining_time": "0:40:26", "throughput": 13743.91, "total_tokens": 15861632}
|
|
{"current_steps": 5040, "total_steps": 15621, "loss": 0.5187, "lr": 1.7131642946512312e-06, "epoch": 0.3226425965047052, "percentage": 32.26, "elapsed_time": "0:19:14", "remaining_time": "0:40:24", "throughput": 13750.3, "total_tokens": 15877632}
|
|
{"current_steps": 5045, "total_steps": 15621, "loss": 0.37, "lr": 1.712380577889481e-06, "epoch": 0.3229626784456821, "percentage": 32.3, "elapsed_time": "0:19:15", "remaining_time": "0:40:21", "throughput": 13756.51, "total_tokens": 15893184}
|
|
{"current_steps": 5050, "total_steps": 15621, "loss": 0.3955, "lr": 1.711595971712215e-06, "epoch": 0.323282760386659, "percentage": 32.33, "elapsed_time": "0:19:15", "remaining_time": "0:40:19", "throughput": 13762.45, "total_tokens": 15908416}
|
|
{"current_steps": 5055, "total_steps": 15621, "loss": 0.4074, "lr": 1.7108104770990234e-06, "epoch": 0.32360284232763586, "percentage": 32.36, "elapsed_time": "0:19:16", "remaining_time": "0:40:17", "throughput": 13768.69, "total_tokens": 15924224}
|
|
{"current_steps": 5060, "total_steps": 15621, "loss": 0.2532, "lr": 1.7100240950306052e-06, "epoch": 0.32392292426861274, "percentage": 32.39, "elapsed_time": "0:19:17", "remaining_time": "0:40:15", "throughput": 13774.97, "total_tokens": 15940032}
|
|
{"current_steps": 5065, "total_steps": 15621, "loss": 0.4556, "lr": 1.7092368264887677e-06, "epoch": 0.3242430062095897, "percentage": 32.42, "elapsed_time": "0:19:17", "remaining_time": "0:40:12", "throughput": 13780.77, "total_tokens": 15954944}
|
|
{"current_steps": 5070, "total_steps": 15621, "loss": 0.4923, "lr": 1.7084486724564252e-06, "epoch": 0.32456308815056656, "percentage": 32.46, "elapsed_time": "0:19:18", "remaining_time": "0:40:10", "throughput": 13787.04, "total_tokens": 15970624}
|
|
{"current_steps": 5075, "total_steps": 15621, "loss": 0.418, "lr": 1.707659633917597e-06, "epoch": 0.32488317009154344, "percentage": 32.49, "elapsed_time": "0:19:19", "remaining_time": "0:40:08", "throughput": 13793.44, "total_tokens": 15986688}
|
|
{"current_steps": 5080, "total_steps": 15621, "loss": 0.4172, "lr": 1.7068697118574064e-06, "epoch": 0.3252032520325203, "percentage": 32.52, "elapsed_time": "0:19:19", "remaining_time": "0:40:06", "throughput": 13799.83, "total_tokens": 16002752}
|
|
{"current_steps": 5085, "total_steps": 15621, "loss": 0.4924, "lr": 1.7060789072620816e-06, "epoch": 0.3255233339734972, "percentage": 32.55, "elapsed_time": "0:19:20", "remaining_time": "0:40:03", "throughput": 13805.97, "total_tokens": 16018112}
|
|
{"current_steps": 5090, "total_steps": 15621, "loss": 0.411, "lr": 1.7052872211189509e-06, "epoch": 0.32584341591447413, "percentage": 32.58, "elapsed_time": "0:19:20", "remaining_time": "0:40:01", "throughput": 13812.27, "total_tokens": 16033984}
|
|
{"current_steps": 5095, "total_steps": 15621, "loss": 0.3263, "lr": 1.7044946544164431e-06, "epoch": 0.326163497855451, "percentage": 32.62, "elapsed_time": "0:19:21", "remaining_time": "0:39:59", "throughput": 13818.4, "total_tokens": 16049536}
|
|
{"current_steps": 5100, "total_steps": 15621, "loss": 0.3722, "lr": 1.703701208144088e-06, "epoch": 0.3264835797964279, "percentage": 32.65, "elapsed_time": "0:19:22", "remaining_time": "0:39:57", "throughput": 13825.22, "total_tokens": 16066304}
|
|
{"current_steps": 5105, "total_steps": 15621, "loss": 0.4627, "lr": 1.702906883292512e-06, "epoch": 0.32680366173740477, "percentage": 32.68, "elapsed_time": "0:19:22", "remaining_time": "0:39:55", "throughput": 13831.18, "total_tokens": 16081536}
|
|
{"current_steps": 5110, "total_steps": 15621, "loss": 0.5501, "lr": 1.7021116808534393e-06, "epoch": 0.32712374367838165, "percentage": 32.71, "elapsed_time": "0:19:23", "remaining_time": "0:39:52", "throughput": 13837.28, "total_tokens": 16096896}
|
|
{"current_steps": 5115, "total_steps": 15621, "loss": 0.4294, "lr": 1.7013156018196893e-06, "epoch": 0.32744382561935853, "percentage": 32.74, "elapsed_time": "0:19:23", "remaining_time": "0:39:50", "throughput": 13843.72, "total_tokens": 16112960}
|
|
{"current_steps": 5120, "total_steps": 15621, "loss": 0.4168, "lr": 1.7005186471851759e-06, "epoch": 0.32776390756033547, "percentage": 32.78, "elapsed_time": "0:19:24", "remaining_time": "0:39:48", "throughput": 13850.33, "total_tokens": 16129344}
|
|
{"current_steps": 5125, "total_steps": 15621, "loss": 0.5931, "lr": 1.6997208179449066e-06, "epoch": 0.32808398950131235, "percentage": 32.81, "elapsed_time": "0:19:25", "remaining_time": "0:39:46", "throughput": 13857.98, "total_tokens": 16147776}
|
|
{"current_steps": 5130, "total_steps": 15621, "loss": 0.3523, "lr": 1.6989221150949806e-06, "epoch": 0.3284040714422892, "percentage": 32.84, "elapsed_time": "0:19:25", "remaining_time": "0:39:44", "throughput": 13863.83, "total_tokens": 16162880}
|
|
{"current_steps": 5135, "total_steps": 15621, "loss": 0.2737, "lr": 1.6981225396325873e-06, "epoch": 0.3287241533832661, "percentage": 32.87, "elapsed_time": "0:19:26", "remaining_time": "0:39:41", "throughput": 13870.47, "total_tokens": 16179392}
|
|
{"current_steps": 5140, "total_steps": 15621, "loss": 0.5036, "lr": 1.6973220925560067e-06, "epoch": 0.329044235324243, "percentage": 32.9, "elapsed_time": "0:19:27", "remaining_time": "0:39:39", "throughput": 13876.34, "total_tokens": 16194560}
|
|
{"current_steps": 5145, "total_steps": 15621, "loss": 0.4281, "lr": 1.696520774864606e-06, "epoch": 0.3293643172652199, "percentage": 32.94, "elapsed_time": "0:19:27", "remaining_time": "0:39:37", "throughput": 13882.5, "total_tokens": 16210112}
|
|
{"current_steps": 5150, "total_steps": 15621, "loss": 0.4646, "lr": 1.69571858755884e-06, "epoch": 0.3296843992061968, "percentage": 32.97, "elapsed_time": "0:19:28", "remaining_time": "0:39:35", "throughput": 13888.76, "total_tokens": 16225856}
|
|
{"current_steps": 5155, "total_steps": 15621, "loss": 0.4177, "lr": 1.6949155316402487e-06, "epoch": 0.3300044811471737, "percentage": 33.0, "elapsed_time": "0:19:28", "remaining_time": "0:39:33", "throughput": 13894.82, "total_tokens": 16241536}
|
|
{"current_steps": 5160, "total_steps": 15621, "loss": 0.3777, "lr": 1.6941116081114566e-06, "epoch": 0.33032456308815056, "percentage": 33.03, "elapsed_time": "0:19:29", "remaining_time": "0:39:30", "throughput": 13900.43, "total_tokens": 16256384}
|
|
{"current_steps": 5165, "total_steps": 15621, "loss": 0.3937, "lr": 1.6933068179761722e-06, "epoch": 0.33064464502912744, "percentage": 33.06, "elapsed_time": "0:19:30", "remaining_time": "0:39:28", "throughput": 13906.22, "total_tokens": 16271360}
|
|
{"current_steps": 5170, "total_steps": 15621, "loss": 0.4118, "lr": 1.6925011622391857e-06, "epoch": 0.3309647269701044, "percentage": 33.1, "elapsed_time": "0:19:30", "remaining_time": "0:39:26", "throughput": 13912.14, "total_tokens": 16286656}
|
|
{"current_steps": 5175, "total_steps": 15621, "loss": 0.4038, "lr": 1.6916946419063667e-06, "epoch": 0.33128480891108125, "percentage": 33.13, "elapsed_time": "0:19:31", "remaining_time": "0:39:24", "throughput": 13918.39, "total_tokens": 16302592}
|
|
{"current_steps": 5180, "total_steps": 15621, "loss": 0.5252, "lr": 1.690887257984666e-06, "epoch": 0.33160489085205813, "percentage": 33.16, "elapsed_time": "0:19:31", "remaining_time": "0:39:22", "throughput": 13924.66, "total_tokens": 16318656}
|
|
{"current_steps": 5185, "total_steps": 15621, "loss": 0.4784, "lr": 1.690079011482112e-06, "epoch": 0.331924972793035, "percentage": 33.19, "elapsed_time": "0:19:32", "remaining_time": "0:39:19", "throughput": 13930.71, "total_tokens": 16334016}
|
|
{"current_steps": 5190, "total_steps": 15621, "loss": 0.5322, "lr": 1.6892699034078096e-06, "epoch": 0.3322450547340119, "percentage": 33.22, "elapsed_time": "0:19:33", "remaining_time": "0:39:17", "throughput": 13936.97, "total_tokens": 16349888}
|
|
{"current_steps": 5195, "total_steps": 15621, "loss": 0.5017, "lr": 1.68845993477194e-06, "epoch": 0.33256513667498877, "percentage": 33.26, "elapsed_time": "0:19:33", "remaining_time": "0:39:15", "throughput": 13942.83, "total_tokens": 16365056}
|
|
{"current_steps": 5200, "total_steps": 15621, "loss": 0.3857, "lr": 1.6876491065857584e-06, "epoch": 0.3328852186159657, "percentage": 33.29, "elapsed_time": "0:19:34", "remaining_time": "0:39:13", "throughput": 13948.53, "total_tokens": 16380032}
|
|
{"current_steps": 5205, "total_steps": 15621, "loss": 0.6437, "lr": 1.6868374198615928e-06, "epoch": 0.3332053005569426, "percentage": 33.32, "elapsed_time": "0:19:34", "remaining_time": "0:39:11", "throughput": 13954.13, "total_tokens": 16394752}
|
|
{"current_steps": 5210, "total_steps": 15621, "loss": 0.4782, "lr": 1.6860248756128448e-06, "epoch": 0.33352538249791946, "percentage": 33.35, "elapsed_time": "0:19:35", "remaining_time": "0:39:08", "throughput": 13960.15, "total_tokens": 16410368}
|
|
{"current_steps": 5215, "total_steps": 15621, "loss": 0.3992, "lr": 1.6852114748539844e-06, "epoch": 0.33384546443889634, "percentage": 33.38, "elapsed_time": "0:19:36", "remaining_time": "0:39:06", "throughput": 13965.68, "total_tokens": 16425088}
|
|
{"current_steps": 5220, "total_steps": 15621, "loss": 0.3352, "lr": 1.6843972186005525e-06, "epoch": 0.3341655463798732, "percentage": 33.42, "elapsed_time": "0:19:36", "remaining_time": "0:39:04", "throughput": 13972.0, "total_tokens": 16441152}
|
|
{"current_steps": 5225, "total_steps": 15621, "loss": 0.4641, "lr": 1.6835821078691577e-06, "epoch": 0.33448562832085016, "percentage": 33.45, "elapsed_time": "0:19:37", "remaining_time": "0:39:02", "throughput": 13978.8, "total_tokens": 16458240}
|
|
{"current_steps": 5230, "total_steps": 15621, "loss": 0.4142, "lr": 1.6827661436774746e-06, "epoch": 0.33480571026182704, "percentage": 33.48, "elapsed_time": "0:19:37", "remaining_time": "0:39:00", "throughput": 13984.99, "total_tokens": 16474112}
|
|
{"current_steps": 5235, "total_steps": 15621, "loss": 0.3955, "lr": 1.681949327044245e-06, "epoch": 0.3351257922028039, "percentage": 33.51, "elapsed_time": "0:19:38", "remaining_time": "0:38:58", "throughput": 13991.49, "total_tokens": 16490560}
|
|
{"current_steps": 5240, "total_steps": 15621, "loss": 0.6757, "lr": 1.6811316589892734e-06, "epoch": 0.3354458741437808, "percentage": 33.54, "elapsed_time": "0:19:39", "remaining_time": "0:38:56", "throughput": 13997.26, "total_tokens": 16505728}
|
|
{"current_steps": 5245, "total_steps": 15621, "loss": 0.4257, "lr": 1.6803131405334284e-06, "epoch": 0.3357659560847577, "percentage": 33.58, "elapsed_time": "0:19:39", "remaining_time": "0:38:54", "throughput": 14003.58, "total_tokens": 16521856}
|
|
{"current_steps": 5250, "total_steps": 15621, "loss": 0.4271, "lr": 1.6794937726986396e-06, "epoch": 0.3360860380257346, "percentage": 33.61, "elapsed_time": "0:19:40", "remaining_time": "0:38:51", "throughput": 14009.7, "total_tokens": 16537792}
|
|
{"current_steps": 5255, "total_steps": 15621, "loss": 0.434, "lr": 1.6786735565078974e-06, "epoch": 0.3364061199667115, "percentage": 33.64, "elapsed_time": "0:19:41", "remaining_time": "0:38:49", "throughput": 14015.68, "total_tokens": 16553408}
|
|
{"current_steps": 5260, "total_steps": 15621, "loss": 0.4297, "lr": 1.677852492985251e-06, "epoch": 0.33672620190768837, "percentage": 33.67, "elapsed_time": "0:19:41", "remaining_time": "0:38:47", "throughput": 14022.23, "total_tokens": 16570112}
|
|
{"current_steps": 5265, "total_steps": 15621, "loss": 0.4931, "lr": 1.6770305831558086e-06, "epoch": 0.33704628384866525, "percentage": 33.7, "elapsed_time": "0:19:42", "remaining_time": "0:38:45", "throughput": 14028.45, "total_tokens": 16586304}
|
|
{"current_steps": 5270, "total_steps": 15621, "loss": 0.3922, "lr": 1.6762078280457342e-06, "epoch": 0.33736636578964213, "percentage": 33.74, "elapsed_time": "0:19:42", "remaining_time": "0:38:43", "throughput": 14034.35, "total_tokens": 16601920}
|
|
{"current_steps": 5275, "total_steps": 15621, "loss": 0.4797, "lr": 1.6753842286822465e-06, "epoch": 0.33768644773061907, "percentage": 33.77, "elapsed_time": "0:19:43", "remaining_time": "0:38:41", "throughput": 14040.72, "total_tokens": 16618240}
|
|
{"current_steps": 5280, "total_steps": 15621, "loss": 0.59, "lr": 1.6745597860936199e-06, "epoch": 0.33800652967159595, "percentage": 33.8, "elapsed_time": "0:19:44", "remaining_time": "0:38:39", "throughput": 14046.49, "total_tokens": 16633408}
|
|
{"current_steps": 5285, "total_steps": 15621, "loss": 0.439, "lr": 1.6737345013091794e-06, "epoch": 0.3383266116125728, "percentage": 33.83, "elapsed_time": "0:19:44", "remaining_time": "0:38:37", "throughput": 14052.76, "total_tokens": 16649664}
|
|
{"current_steps": 5290, "total_steps": 15621, "loss": 0.4602, "lr": 1.672908375359304e-06, "epoch": 0.3386466935535497, "percentage": 33.86, "elapsed_time": "0:19:45", "remaining_time": "0:38:34", "throughput": 14058.51, "total_tokens": 16664896}
|
|
{"current_steps": 5295, "total_steps": 15621, "loss": 0.5433, "lr": 1.6720814092754209e-06, "epoch": 0.3389667754945266, "percentage": 33.9, "elapsed_time": "0:19:46", "remaining_time": "0:38:32", "throughput": 14064.34, "total_tokens": 16680384}
|
|
{"current_steps": 5300, "total_steps": 15621, "loss": 0.3696, "lr": 1.6712536040900075e-06, "epoch": 0.33928685743550346, "percentage": 33.93, "elapsed_time": "0:19:46", "remaining_time": "0:38:30", "throughput": 14070.46, "total_tokens": 16696192}
|
|
{"current_steps": 5305, "total_steps": 15621, "loss": 0.4752, "lr": 1.6704249608365878e-06, "epoch": 0.3396069393764804, "percentage": 33.96, "elapsed_time": "0:19:47", "remaining_time": "0:38:29", "throughput": 14082.79, "total_tokens": 16727104}
|
|
{"current_steps": 5310, "total_steps": 15621, "loss": 0.4154, "lr": 1.669595480549733e-06, "epoch": 0.3399270213174573, "percentage": 33.99, "elapsed_time": "0:19:48", "remaining_time": "0:38:27", "throughput": 14088.26, "total_tokens": 16741696}
|
|
{"current_steps": 5315, "total_steps": 15621, "loss": 0.432, "lr": 1.6687651642650587e-06, "epoch": 0.34024710325843416, "percentage": 34.02, "elapsed_time": "0:19:48", "remaining_time": "0:38:25", "throughput": 14094.09, "total_tokens": 16757120}
|
|
{"current_steps": 5320, "total_steps": 15621, "loss": 0.4471, "lr": 1.6679340130192245e-06, "epoch": 0.34056718519941104, "percentage": 34.06, "elapsed_time": "0:19:49", "remaining_time": "0:38:23", "throughput": 14099.78, "total_tokens": 16772416}
|
|
{"current_steps": 5325, "total_steps": 15621, "loss": 0.3172, "lr": 1.667102027849933e-06, "epoch": 0.3408872671403879, "percentage": 34.09, "elapsed_time": "0:19:50", "remaining_time": "0:38:21", "throughput": 14105.92, "total_tokens": 16788352}
|
|
{"current_steps": 5330, "total_steps": 15621, "loss": 0.3456, "lr": 1.6662692097959266e-06, "epoch": 0.34120734908136485, "percentage": 34.12, "elapsed_time": "0:19:50", "remaining_time": "0:38:19", "throughput": 14111.65, "total_tokens": 16803648}
|
|
{"current_steps": 5335, "total_steps": 15621, "loss": 0.4708, "lr": 1.6654355598969894e-06, "epoch": 0.34152743102234173, "percentage": 34.15, "elapsed_time": "0:19:51", "remaining_time": "0:38:16", "throughput": 14117.4, "total_tokens": 16818944}
|
|
{"current_steps": 5340, "total_steps": 15621, "loss": 0.5078, "lr": 1.6646010791939423e-06, "epoch": 0.3418475129633186, "percentage": 34.18, "elapsed_time": "0:19:51", "remaining_time": "0:38:14", "throughput": 14122.99, "total_tokens": 16833984}
|
|
{"current_steps": 5345, "total_steps": 15621, "loss": 0.5507, "lr": 1.6637657687286446e-06, "epoch": 0.3421675949042955, "percentage": 34.22, "elapsed_time": "0:19:52", "remaining_time": "0:38:12", "throughput": 14128.81, "total_tokens": 16849280}
|
|
{"current_steps": 5350, "total_steps": 15621, "loss": 0.3979, "lr": 1.6629296295439912e-06, "epoch": 0.34248767684527237, "percentage": 34.25, "elapsed_time": "0:19:53", "remaining_time": "0:38:10", "throughput": 14135.01, "total_tokens": 16865664}
|
|
{"current_steps": 5355, "total_steps": 15621, "loss": 0.4884, "lr": 1.6620926626839116e-06, "epoch": 0.3428077587862493, "percentage": 34.28, "elapsed_time": "0:19:53", "remaining_time": "0:38:08", "throughput": 14140.91, "total_tokens": 16881536}
|
|
{"current_steps": 5360, "total_steps": 15621, "loss": 0.4395, "lr": 1.661254869193369e-06, "epoch": 0.3431278407272262, "percentage": 34.31, "elapsed_time": "0:19:54", "remaining_time": "0:38:06", "throughput": 14147.61, "total_tokens": 16898816}
|
|
{"current_steps": 5365, "total_steps": 15621, "loss": 0.5104, "lr": 1.6604162501183581e-06, "epoch": 0.34344792266820307, "percentage": 34.34, "elapsed_time": "0:19:55", "remaining_time": "0:38:04", "throughput": 14153.81, "total_tokens": 16915136}
|
|
{"current_steps": 5370, "total_steps": 15621, "loss": 0.4607, "lr": 1.6595768065059045e-06, "epoch": 0.34376800460917994, "percentage": 34.38, "elapsed_time": "0:19:55", "remaining_time": "0:38:02", "throughput": 14159.81, "total_tokens": 16931200}
|
|
{"current_steps": 5375, "total_steps": 15621, "loss": 0.4652, "lr": 1.6587365394040641e-06, "epoch": 0.3440880865501568, "percentage": 34.41, "elapsed_time": "0:19:56", "remaining_time": "0:38:00", "throughput": 14165.59, "total_tokens": 16946816}
|
|
{"current_steps": 5380, "total_steps": 15621, "loss": 0.3893, "lr": 1.6578954498619195e-06, "epoch": 0.3444081684911337, "percentage": 34.44, "elapsed_time": "0:19:56", "remaining_time": "0:37:58", "throughput": 14171.61, "total_tokens": 16962880}
|
|
{"current_steps": 5385, "total_steps": 15621, "loss": 0.4587, "lr": 1.6570535389295814e-06, "epoch": 0.34472825043211064, "percentage": 34.47, "elapsed_time": "0:19:57", "remaining_time": "0:37:56", "throughput": 14177.32, "total_tokens": 16978240}
|
|
{"current_steps": 5390, "total_steps": 15621, "loss": 0.3628, "lr": 1.6562108076581853e-06, "epoch": 0.3450483323730875, "percentage": 34.5, "elapsed_time": "0:19:58", "remaining_time": "0:37:54", "throughput": 14183.12, "total_tokens": 16993728}
|
|
{"current_steps": 5395, "total_steps": 15621, "loss": 0.5903, "lr": 1.6553672570998912e-06, "epoch": 0.3453684143140644, "percentage": 34.54, "elapsed_time": "0:19:58", "remaining_time": "0:37:52", "throughput": 14189.06, "total_tokens": 17009728}
|
|
{"current_steps": 5400, "total_steps": 15621, "loss": 0.4174, "lr": 1.6545228883078815e-06, "epoch": 0.3456884962550413, "percentage": 34.57, "elapsed_time": "0:19:59", "remaining_time": "0:37:50", "throughput": 14194.54, "total_tokens": 17024640}
|
|
{"current_steps": 5405, "total_steps": 15621, "loss": 0.3541, "lr": 1.653677702336361e-06, "epoch": 0.34600857819601816, "percentage": 34.6, "elapsed_time": "0:20:00", "remaining_time": "0:37:48", "throughput": 14200.43, "total_tokens": 17040512}
|
|
{"current_steps": 5410, "total_steps": 15621, "loss": 0.4657, "lr": 1.6528317002405538e-06, "epoch": 0.3463286601369951, "percentage": 34.63, "elapsed_time": "0:20:00", "remaining_time": "0:37:46", "throughput": 14206.21, "total_tokens": 17056064}
|
|
{"current_steps": 5415, "total_steps": 15621, "loss": 0.3692, "lr": 1.6519848830767043e-06, "epoch": 0.34664874207797197, "percentage": 34.66, "elapsed_time": "0:20:01", "remaining_time": "0:37:44", "throughput": 14212.43, "total_tokens": 17072448}
|
|
{"current_steps": 5420, "total_steps": 15621, "loss": 0.6197, "lr": 1.6511372519020726e-06, "epoch": 0.34696882401894885, "percentage": 34.7, "elapsed_time": "0:20:01", "remaining_time": "0:37:42", "throughput": 14218.29, "total_tokens": 17088320}
|
|
{"current_steps": 5425, "total_steps": 15621, "loss": 0.4291, "lr": 1.650288807774937e-06, "epoch": 0.34728890595992573, "percentage": 34.73, "elapsed_time": "0:20:02", "remaining_time": "0:37:39", "throughput": 14224.35, "total_tokens": 17104448}
|
|
{"current_steps": 5430, "total_steps": 15621, "loss": 0.3964, "lr": 1.6494395517545893e-06, "epoch": 0.3476089879009026, "percentage": 34.76, "elapsed_time": "0:20:03", "remaining_time": "0:37:38", "throughput": 14231.06, "total_tokens": 17121856}
|
|
{"current_steps": 5435, "total_steps": 15621, "loss": 0.5052, "lr": 1.6485894849013362e-06, "epoch": 0.34792906984187955, "percentage": 34.79, "elapsed_time": "0:20:03", "remaining_time": "0:37:35", "throughput": 14236.39, "total_tokens": 17136512}
|
|
{"current_steps": 5440, "total_steps": 15621, "loss": 0.443, "lr": 1.6477386082764961e-06, "epoch": 0.3482491517828564, "percentage": 34.82, "elapsed_time": "0:20:04", "remaining_time": "0:37:33", "throughput": 14242.47, "total_tokens": 17152640}
|
|
{"current_steps": 5445, "total_steps": 15621, "loss": 0.362, "lr": 1.6468869229423983e-06, "epoch": 0.3485692337238333, "percentage": 34.86, "elapsed_time": "0:20:04", "remaining_time": "0:37:31", "throughput": 14247.92, "total_tokens": 17167680}
|
|
{"current_steps": 5450, "total_steps": 15621, "loss": 0.6295, "lr": 1.6460344299623813e-06, "epoch": 0.3488893156648102, "percentage": 34.89, "elapsed_time": "0:20:05", "remaining_time": "0:37:29", "throughput": 14253.62, "total_tokens": 17183296}
|
|
{"current_steps": 5455, "total_steps": 15621, "loss": 0.5424, "lr": 1.6451811304007939e-06, "epoch": 0.34920939760578706, "percentage": 34.92, "elapsed_time": "0:20:06", "remaining_time": "0:37:27", "throughput": 14259.07, "total_tokens": 17198272}
|
|
{"current_steps": 5460, "total_steps": 15621, "loss": 0.5177, "lr": 1.6443270253229895e-06, "epoch": 0.349529479546764, "percentage": 34.95, "elapsed_time": "0:20:06", "remaining_time": "0:37:25", "throughput": 14264.65, "total_tokens": 17213376}
|
|
{"current_steps": 5465, "total_steps": 15621, "loss": 0.4657, "lr": 1.6434721157953288e-06, "epoch": 0.3498495614877409, "percentage": 34.98, "elapsed_time": "0:20:07", "remaining_time": "0:37:23", "throughput": 14270.77, "total_tokens": 17229632}
|
|
{"current_steps": 5470, "total_steps": 15621, "loss": 0.579, "lr": 1.6426164028851765e-06, "epoch": 0.35016964342871776, "percentage": 35.02, "elapsed_time": "0:20:07", "remaining_time": "0:37:21", "throughput": 14276.73, "total_tokens": 17245696}
|
|
{"current_steps": 5474, "total_steps": 15621, "eval_loss": 0.43906036019325256, "epoch": 0.3504257089814993, "percentage": 35.04, "elapsed_time": "0:20:57", "remaining_time": "0:38:51", "throughput": 13724.04, "total_tokens": 17259840}
|
|
{"current_steps": 5475, "total_steps": 15621, "loss": 0.3787, "lr": 1.6417598876609002e-06, "epoch": 0.35048972536969464, "percentage": 35.05, "elapsed_time": "0:21:46", "remaining_time": "0:40:20", "throughput": 13215.82, "total_tokens": 17262976}
|
|
{"current_steps": 5480, "total_steps": 15621, "loss": 0.419, "lr": 1.640902571191869e-06, "epoch": 0.3508098073106715, "percentage": 35.08, "elapsed_time": "0:21:46", "remaining_time": "0:40:18", "throughput": 13221.52, "total_tokens": 17278336}
|
|
{"current_steps": 5485, "total_steps": 15621, "loss": 0.3535, "lr": 1.6400444545484524e-06, "epoch": 0.3511298892516484, "percentage": 35.11, "elapsed_time": "0:21:47", "remaining_time": "0:40:16", "throughput": 13227.0, "total_tokens": 17293248}
|
|
{"current_steps": 5490, "total_steps": 15621, "loss": 0.4275, "lr": 1.6391855388020193e-06, "epoch": 0.35144997119262533, "percentage": 35.14, "elapsed_time": "0:21:48", "remaining_time": "0:40:13", "throughput": 13232.89, "total_tokens": 17309184}
|
|
{"current_steps": 5495, "total_steps": 15621, "loss": 0.4436, "lr": 1.6383258250249363e-06, "epoch": 0.3517700531336022, "percentage": 35.18, "elapsed_time": "0:21:48", "remaining_time": "0:40:11", "throughput": 13238.85, "total_tokens": 17325248}
|
|
{"current_steps": 5500, "total_steps": 15621, "loss": 0.4226, "lr": 1.6374653142905661e-06, "epoch": 0.3520901350745791, "percentage": 35.21, "elapsed_time": "0:21:49", "remaining_time": "0:40:09", "throughput": 13244.54, "total_tokens": 17340736}
|
|
{"current_steps": 5505, "total_steps": 15621, "loss": 0.4188, "lr": 1.6366040076732662e-06, "epoch": 0.35241021701555597, "percentage": 35.24, "elapsed_time": "0:21:49", "remaining_time": "0:40:07", "throughput": 13250.13, "total_tokens": 17355904}
|
|
{"current_steps": 5510, "total_steps": 15621, "loss": 0.4712, "lr": 1.6357419062483882e-06, "epoch": 0.35273029895653285, "percentage": 35.27, "elapsed_time": "0:21:50", "remaining_time": "0:40:04", "throughput": 13255.75, "total_tokens": 17371264}
|
|
{"current_steps": 5515, "total_steps": 15621, "loss": 0.4168, "lr": 1.6348790110922758e-06, "epoch": 0.3530503808975098, "percentage": 35.31, "elapsed_time": "0:21:51", "remaining_time": "0:40:02", "throughput": 13262.29, "total_tokens": 17388608}
|
|
{"current_steps": 5520, "total_steps": 15621, "loss": 0.4668, "lr": 1.6340153232822635e-06, "epoch": 0.35337046283848667, "percentage": 35.34, "elapsed_time": "0:21:51", "remaining_time": "0:40:00", "throughput": 13267.86, "total_tokens": 17403712}
|
|
{"current_steps": 5525, "total_steps": 15621, "loss": 0.4809, "lr": 1.633150843896676e-06, "epoch": 0.35369054477946354, "percentage": 35.37, "elapsed_time": "0:21:52", "remaining_time": "0:39:58", "throughput": 13274.46, "total_tokens": 17421056}
|
|
{"current_steps": 5530, "total_steps": 15621, "loss": 0.5588, "lr": 1.6322855740148263e-06, "epoch": 0.3540106267204404, "percentage": 35.4, "elapsed_time": "0:21:52", "remaining_time": "0:39:55", "throughput": 13279.9, "total_tokens": 17436096}
|
|
{"current_steps": 5535, "total_steps": 15621, "loss": 0.3701, "lr": 1.6314195147170132e-06, "epoch": 0.3543307086614173, "percentage": 35.43, "elapsed_time": "0:21:53", "remaining_time": "0:39:53", "throughput": 13285.96, "total_tokens": 17452480}
|
|
{"current_steps": 5540, "total_steps": 15621, "loss": 0.4038, "lr": 1.6305526670845225e-06, "epoch": 0.35465079060239424, "percentage": 35.47, "elapsed_time": "0:21:54", "remaining_time": "0:39:51", "throughput": 13291.54, "total_tokens": 17467776}
|
|
{"current_steps": 5545, "total_steps": 15621, "loss": 0.5081, "lr": 1.6296850321996232e-06, "epoch": 0.3549708725433711, "percentage": 35.5, "elapsed_time": "0:21:54", "remaining_time": "0:39:49", "throughput": 13296.98, "total_tokens": 17482752}
|
|
{"current_steps": 5550, "total_steps": 15621, "loss": 0.3885, "lr": 1.6288166111455683e-06, "epoch": 0.355290954484348, "percentage": 35.53, "elapsed_time": "0:21:55", "remaining_time": "0:39:46", "throughput": 13302.45, "total_tokens": 17497792}
|
|
{"current_steps": 5555, "total_steps": 15621, "loss": 0.4774, "lr": 1.6279474050065906e-06, "epoch": 0.3556110364253249, "percentage": 35.56, "elapsed_time": "0:21:55", "remaining_time": "0:39:44", "throughput": 13307.99, "total_tokens": 17513024}
|
|
{"current_steps": 5560, "total_steps": 15621, "loss": 0.4143, "lr": 1.6270774148679054e-06, "epoch": 0.35593111836630176, "percentage": 35.59, "elapsed_time": "0:21:56", "remaining_time": "0:39:42", "throughput": 13313.76, "total_tokens": 17529024}
|
|
{"current_steps": 5565, "total_steps": 15621, "loss": 0.3764, "lr": 1.6262066418157048e-06, "epoch": 0.35625120030727864, "percentage": 35.63, "elapsed_time": "0:21:57", "remaining_time": "0:39:40", "throughput": 13319.12, "total_tokens": 17543936}
|
|
{"current_steps": 5570, "total_steps": 15621, "loss": 0.5374, "lr": 1.6253350869371595e-06, "epoch": 0.35657128224825557, "percentage": 35.66, "elapsed_time": "0:21:57", "remaining_time": "0:39:37", "throughput": 13324.61, "total_tokens": 17559168}
|
|
{"current_steps": 5575, "total_steps": 15621, "loss": 0.3828, "lr": 1.6244627513204158e-06, "epoch": 0.35689136418923245, "percentage": 35.69, "elapsed_time": "0:21:58", "remaining_time": "0:39:35", "throughput": 13330.36, "total_tokens": 17574912}
|
|
{"current_steps": 5580, "total_steps": 15621, "loss": 0.4239, "lr": 1.6235896360545954e-06, "epoch": 0.35721144613020933, "percentage": 35.72, "elapsed_time": "0:21:59", "remaining_time": "0:39:33", "throughput": 13335.94, "total_tokens": 17590272}
|
|
{"current_steps": 5585, "total_steps": 15621, "loss": 0.4379, "lr": 1.622715742229792e-06, "epoch": 0.3575315280711862, "percentage": 35.75, "elapsed_time": "0:21:59", "remaining_time": "0:39:31", "throughput": 13341.64, "total_tokens": 17605952}
|
|
{"current_steps": 5590, "total_steps": 15621, "loss": 0.3813, "lr": 1.6218410709370734e-06, "epoch": 0.3578516100121631, "percentage": 35.79, "elapsed_time": "0:22:00", "remaining_time": "0:39:29", "throughput": 13347.1, "total_tokens": 17621120}
|
|
{"current_steps": 5595, "total_steps": 15621, "loss": 0.5629, "lr": 1.6209656232684768e-06, "epoch": 0.35817169195314, "percentage": 35.82, "elapsed_time": "0:22:00", "remaining_time": "0:39:26", "throughput": 13352.48, "total_tokens": 17636096}
|
|
{"current_steps": 5600, "total_steps": 15621, "loss": 0.4427, "lr": 1.620089400317008e-06, "epoch": 0.3584917738941169, "percentage": 35.85, "elapsed_time": "0:22:01", "remaining_time": "0:39:24", "throughput": 13358.59, "total_tokens": 17652672}
|
|
{"current_steps": 5605, "total_steps": 15621, "loss": 0.4875, "lr": 1.6192124031766425e-06, "epoch": 0.3588118558350938, "percentage": 35.88, "elapsed_time": "0:22:02", "remaining_time": "0:39:22", "throughput": 13364.02, "total_tokens": 17668032}
|
|
{"current_steps": 5610, "total_steps": 15621, "loss": 0.4474, "lr": 1.6183346329423213e-06, "epoch": 0.35913193777607066, "percentage": 35.91, "elapsed_time": "0:22:02", "remaining_time": "0:39:20", "throughput": 13369.47, "total_tokens": 17683264}
|
|
{"current_steps": 5615, "total_steps": 15621, "loss": 0.3642, "lr": 1.6174560907099508e-06, "epoch": 0.35945201971704754, "percentage": 35.95, "elapsed_time": "0:22:03", "remaining_time": "0:39:18", "throughput": 13375.25, "total_tokens": 17699200}
|
|
{"current_steps": 5620, "total_steps": 15621, "loss": 0.3489, "lr": 1.6165767775764013e-06, "epoch": 0.3597721016580245, "percentage": 35.98, "elapsed_time": "0:22:03", "remaining_time": "0:39:15", "throughput": 13380.89, "total_tokens": 17714816}
|
|
{"current_steps": 5625, "total_steps": 15621, "loss": 0.411, "lr": 1.6156966946395056e-06, "epoch": 0.36009218359900136, "percentage": 36.01, "elapsed_time": "0:22:04", "remaining_time": "0:39:13", "throughput": 13387.41, "total_tokens": 17732352}
|
|
{"current_steps": 5630, "total_steps": 15621, "loss": 0.5376, "lr": 1.6148158429980577e-06, "epoch": 0.36041226553997824, "percentage": 36.04, "elapsed_time": "0:22:05", "remaining_time": "0:39:11", "throughput": 13393.19, "total_tokens": 17748288}
|
|
{"current_steps": 5635, "total_steps": 15621, "loss": 0.3839, "lr": 1.6139342237518108e-06, "epoch": 0.3607323474809551, "percentage": 36.07, "elapsed_time": "0:22:05", "remaining_time": "0:39:09", "throughput": 13398.56, "total_tokens": 17763520}
|
|
{"current_steps": 5640, "total_steps": 15621, "loss": 0.428, "lr": 1.6130518380014773e-06, "epoch": 0.361052429421932, "percentage": 36.11, "elapsed_time": "0:22:06", "remaining_time": "0:39:07", "throughput": 13404.26, "total_tokens": 17779328}
|
|
{"current_steps": 5645, "total_steps": 15621, "loss": 0.4178, "lr": 1.6121686868487259e-06, "epoch": 0.3613725113629089, "percentage": 36.14, "elapsed_time": "0:22:07", "remaining_time": "0:39:05", "throughput": 13410.15, "total_tokens": 17795584}
|
|
{"current_steps": 5650, "total_steps": 15621, "loss": 0.44, "lr": 1.6112847713961815e-06, "epoch": 0.3616925933038858, "percentage": 36.17, "elapsed_time": "0:22:07", "remaining_time": "0:39:02", "throughput": 13415.32, "total_tokens": 17810368}
|
|
{"current_steps": 5655, "total_steps": 15621, "loss": 0.4283, "lr": 1.610400092747423e-06, "epoch": 0.3620126752448627, "percentage": 36.2, "elapsed_time": "0:22:08", "remaining_time": "0:39:00", "throughput": 13421.13, "total_tokens": 17826496}
|
|
{"current_steps": 5660, "total_steps": 15621, "loss": 0.4191, "lr": 1.609514652006981e-06, "epoch": 0.36233275718583957, "percentage": 36.23, "elapsed_time": "0:22:08", "remaining_time": "0:38:58", "throughput": 13426.35, "total_tokens": 17841344}
|
|
{"current_steps": 5665, "total_steps": 15621, "loss": 0.5596, "lr": 1.60862845028034e-06, "epoch": 0.36265283912681645, "percentage": 36.27, "elapsed_time": "0:22:09", "remaining_time": "0:38:56", "throughput": 13432.08, "total_tokens": 17857408}
|
|
{"current_steps": 5670, "total_steps": 15621, "loss": 0.4256, "lr": 1.6077414886739327e-06, "epoch": 0.36297292106779333, "percentage": 36.3, "elapsed_time": "0:22:10", "remaining_time": "0:38:54", "throughput": 13437.77, "total_tokens": 17873280}
|
|
{"current_steps": 5675, "total_steps": 15621, "loss": 0.4936, "lr": 1.6068537682951412e-06, "epoch": 0.36329300300877027, "percentage": 36.33, "elapsed_time": "0:22:10", "remaining_time": "0:38:52", "throughput": 13443.17, "total_tokens": 17888448}
|
|
{"current_steps": 5680, "total_steps": 15621, "loss": 0.4402, "lr": 1.6059652902522947e-06, "epoch": 0.36361308494974715, "percentage": 36.36, "elapsed_time": "0:22:11", "remaining_time": "0:38:49", "throughput": 13448.82, "total_tokens": 17904320}
|
|
{"current_steps": 5685, "total_steps": 15621, "loss": 0.3667, "lr": 1.6050760556546683e-06, "epoch": 0.363933166890724, "percentage": 36.39, "elapsed_time": "0:22:11", "remaining_time": "0:38:47", "throughput": 13454.29, "total_tokens": 17919744}
|
|
{"current_steps": 5690, "total_steps": 15621, "loss": 0.3814, "lr": 1.6041860656124823e-06, "epoch": 0.3642532488317009, "percentage": 36.43, "elapsed_time": "0:22:12", "remaining_time": "0:38:45", "throughput": 13459.61, "total_tokens": 17934656}
|
|
{"current_steps": 5695, "total_steps": 15621, "loss": 0.5375, "lr": 1.6032953212368993e-06, "epoch": 0.3645733307726778, "percentage": 36.46, "elapsed_time": "0:22:13", "remaining_time": "0:38:43", "throughput": 13465.49, "total_tokens": 17950976}
|
|
{"current_steps": 5700, "total_steps": 15621, "loss": 0.4688, "lr": 1.6024038236400243e-06, "epoch": 0.3648934127136547, "percentage": 36.49, "elapsed_time": "0:22:13", "remaining_time": "0:38:41", "throughput": 13470.95, "total_tokens": 17966400}
|
|
{"current_steps": 5705, "total_steps": 15621, "loss": 0.5649, "lr": 1.6015115739349027e-06, "epoch": 0.3652134946546316, "percentage": 36.52, "elapsed_time": "0:22:14", "remaining_time": "0:38:39", "throughput": 13477.3, "total_tokens": 17983872}
|
|
{"current_steps": 5710, "total_steps": 15621, "loss": 0.5461, "lr": 1.6006185732355183e-06, "epoch": 0.3655335765956085, "percentage": 36.55, "elapsed_time": "0:22:14", "remaining_time": "0:38:37", "throughput": 13482.99, "total_tokens": 17999680}
|
|
{"current_steps": 5715, "total_steps": 15621, "loss": 0.3802, "lr": 1.5997248226567931e-06, "epoch": 0.36585365853658536, "percentage": 36.59, "elapsed_time": "0:22:15", "remaining_time": "0:38:35", "throughput": 13488.27, "total_tokens": 18014784}
|
|
{"current_steps": 5720, "total_steps": 15621, "loss": 0.4997, "lr": 1.5988303233145853e-06, "epoch": 0.36617374047756224, "percentage": 36.62, "elapsed_time": "0:22:16", "remaining_time": "0:38:32", "throughput": 13493.54, "total_tokens": 18029888}
|
|
{"current_steps": 5725, "total_steps": 15621, "loss": 0.3877, "lr": 1.597935076325688e-06, "epoch": 0.3664938224185392, "percentage": 36.65, "elapsed_time": "0:22:16", "remaining_time": "0:38:30", "throughput": 13499.09, "total_tokens": 18045632}
|
|
{"current_steps": 5730, "total_steps": 15621, "loss": 0.5839, "lr": 1.5970390828078272e-06, "epoch": 0.36681390435951605, "percentage": 36.68, "elapsed_time": "0:22:17", "remaining_time": "0:38:28", "throughput": 13504.44, "total_tokens": 18060928}
|
|
{"current_steps": 5735, "total_steps": 15621, "loss": 0.4567, "lr": 1.5961423438796615e-06, "epoch": 0.36713398630049293, "percentage": 36.71, "elapsed_time": "0:22:18", "remaining_time": "0:38:26", "throughput": 13509.85, "total_tokens": 18076352}
|
|
{"current_steps": 5740, "total_steps": 15621, "loss": 0.4411, "lr": 1.59524486066078e-06, "epoch": 0.3674540682414698, "percentage": 36.75, "elapsed_time": "0:22:18", "remaining_time": "0:38:24", "throughput": 13515.42, "total_tokens": 18092096}
|
|
{"current_steps": 5745, "total_steps": 15621, "loss": 0.5834, "lr": 1.5943466342717012e-06, "epoch": 0.3677741501824467, "percentage": 36.78, "elapsed_time": "0:22:19", "remaining_time": "0:38:22", "throughput": 13520.92, "total_tokens": 18107648}
|
|
{"current_steps": 5750, "total_steps": 15621, "loss": 0.4433, "lr": 1.5934476658338708e-06, "epoch": 0.36809423212342357, "percentage": 36.81, "elapsed_time": "0:22:19", "remaining_time": "0:38:20", "throughput": 13526.43, "total_tokens": 18123264}
|
|
{"current_steps": 5755, "total_steps": 15621, "loss": 0.5414, "lr": 1.5925479564696619e-06, "epoch": 0.3684143140644005, "percentage": 36.84, "elapsed_time": "0:22:20", "remaining_time": "0:38:17", "throughput": 13531.73, "total_tokens": 18138368}
|
|
{"current_steps": 5760, "total_steps": 15621, "loss": 0.3336, "lr": 1.5916475073023721e-06, "epoch": 0.3687343960053774, "percentage": 36.87, "elapsed_time": "0:22:21", "remaining_time": "0:38:15", "throughput": 13537.4, "total_tokens": 18154432}
|
|
{"current_steps": 5765, "total_steps": 15621, "loss": 0.3355, "lr": 1.5907463194562226e-06, "epoch": 0.36905447794635426, "percentage": 36.91, "elapsed_time": "0:22:21", "remaining_time": "0:38:13", "throughput": 13543.38, "total_tokens": 18171200}
|
|
{"current_steps": 5770, "total_steps": 15621, "loss": 0.3807, "lr": 1.589844394056357e-06, "epoch": 0.36937455988733114, "percentage": 36.94, "elapsed_time": "0:22:22", "remaining_time": "0:38:11", "throughput": 13549.05, "total_tokens": 18187008}
|
|
{"current_steps": 5775, "total_steps": 15621, "loss": 0.3492, "lr": 1.5889417322288403e-06, "epoch": 0.369694641828308, "percentage": 36.97, "elapsed_time": "0:22:22", "remaining_time": "0:38:09", "throughput": 13554.63, "total_tokens": 18202944}
|
|
{"current_steps": 5780, "total_steps": 15621, "loss": 0.4969, "lr": 1.5880383351006556e-06, "epoch": 0.37001472376928496, "percentage": 37.0, "elapsed_time": "0:22:23", "remaining_time": "0:38:07", "throughput": 13559.82, "total_tokens": 18217984}
|
|
{"current_steps": 5785, "total_steps": 15621, "loss": 0.505, "lr": 1.5871342037997055e-06, "epoch": 0.37033480571026184, "percentage": 37.03, "elapsed_time": "0:22:24", "remaining_time": "0:38:05", "throughput": 13565.38, "total_tokens": 18233984}
|
|
{"current_steps": 5790, "total_steps": 15621, "loss": 0.403, "lr": 1.5862293394548082e-06, "epoch": 0.3706548876512387, "percentage": 37.07, "elapsed_time": "0:22:24", "remaining_time": "0:38:03", "throughput": 13570.53, "total_tokens": 18249024}
|
|
{"current_steps": 5795, "total_steps": 15621, "loss": 0.3414, "lr": 1.5853237431956972e-06, "epoch": 0.3709749695922156, "percentage": 37.1, "elapsed_time": "0:22:25", "remaining_time": "0:38:01", "throughput": 13575.74, "total_tokens": 18264256}
|
|
{"current_steps": 5800, "total_steps": 15621, "loss": 0.5495, "lr": 1.5844174161530206e-06, "epoch": 0.3712950515331925, "percentage": 37.13, "elapsed_time": "0:22:25", "remaining_time": "0:37:59", "throughput": 13581.14, "total_tokens": 18279936}
|
|
{"current_steps": 5805, "total_steps": 15621, "loss": 0.4039, "lr": 1.5835103594583382e-06, "epoch": 0.3716151334741694, "percentage": 37.16, "elapsed_time": "0:22:26", "remaining_time": "0:37:57", "throughput": 13586.61, "total_tokens": 18295488}
|
|
{"current_steps": 5810, "total_steps": 15621, "loss": 0.5329, "lr": 1.5826025742441207e-06, "epoch": 0.3719352154151463, "percentage": 37.19, "elapsed_time": "0:22:27", "remaining_time": "0:37:54", "throughput": 13592.13, "total_tokens": 18311360}
|
|
{"current_steps": 5815, "total_steps": 15621, "loss": 0.4284, "lr": 1.5816940616437486e-06, "epoch": 0.37225529735612317, "percentage": 37.23, "elapsed_time": "0:22:27", "remaining_time": "0:37:52", "throughput": 13597.37, "total_tokens": 18326592}
|
|
{"current_steps": 5820, "total_steps": 15621, "loss": 0.3573, "lr": 1.5807848227915108e-06, "epoch": 0.37257537929710005, "percentage": 37.26, "elapsed_time": "0:22:28", "remaining_time": "0:37:50", "throughput": 13603.54, "total_tokens": 18344000}
|
|
{"current_steps": 5825, "total_steps": 15621, "loss": 0.4787, "lr": 1.5798748588226028e-06, "epoch": 0.37289546123807693, "percentage": 37.29, "elapsed_time": "0:22:29", "remaining_time": "0:37:48", "throughput": 13609.11, "total_tokens": 18359872}
|
|
{"current_steps": 5830, "total_steps": 15621, "loss": 0.4776, "lr": 1.578964170873125e-06, "epoch": 0.3732155431790538, "percentage": 37.32, "elapsed_time": "0:22:29", "remaining_time": "0:37:46", "throughput": 13614.0, "total_tokens": 18374400}
|
|
{"current_steps": 5835, "total_steps": 15621, "loss": 0.2927, "lr": 1.5780527600800816e-06, "epoch": 0.37353562512003075, "percentage": 37.35, "elapsed_time": "0:22:30", "remaining_time": "0:37:44", "throughput": 13619.69, "total_tokens": 18390656}
|
|
{"current_steps": 5840, "total_steps": 15621, "loss": 0.4476, "lr": 1.5771406275813808e-06, "epoch": 0.3738557070610076, "percentage": 37.39, "elapsed_time": "0:22:30", "remaining_time": "0:37:42", "throughput": 13625.06, "total_tokens": 18406400}
|
|
{"current_steps": 5845, "total_steps": 15621, "loss": 0.5497, "lr": 1.5762277745158297e-06, "epoch": 0.3741757890019845, "percentage": 37.42, "elapsed_time": "0:22:31", "remaining_time": "0:37:40", "throughput": 13630.83, "total_tokens": 18422848}
|
|
{"current_steps": 5850, "total_steps": 15621, "loss": 0.4932, "lr": 1.5753142020231365e-06, "epoch": 0.3744958709429614, "percentage": 37.45, "elapsed_time": "0:22:32", "remaining_time": "0:37:38", "throughput": 13636.43, "total_tokens": 18438912}
|
|
{"current_steps": 5855, "total_steps": 15621, "loss": 0.525, "lr": 1.5743999112439073e-06, "epoch": 0.37481595288393826, "percentage": 37.48, "elapsed_time": "0:22:32", "remaining_time": "0:37:36", "throughput": 13642.24, "total_tokens": 18455488}
|
|
{"current_steps": 5860, "total_steps": 15621, "loss": 0.3954, "lr": 1.5734849033196446e-06, "epoch": 0.3751360348249152, "percentage": 37.51, "elapsed_time": "0:22:33", "remaining_time": "0:37:34", "throughput": 13647.11, "total_tokens": 18470080}
|
|
{"current_steps": 5865, "total_steps": 15621, "loss": 0.4337, "lr": 1.5725691793927468e-06, "epoch": 0.3754561167658921, "percentage": 37.55, "elapsed_time": "0:22:33", "remaining_time": "0:37:32", "throughput": 13651.94, "total_tokens": 18484480}
|
|
{"current_steps": 5870, "total_steps": 15621, "loss": 0.46, "lr": 1.5716527406065057e-06, "epoch": 0.37577619870686896, "percentage": 37.58, "elapsed_time": "0:22:34", "remaining_time": "0:37:30", "throughput": 13657.91, "total_tokens": 18501312}
|
|
{"current_steps": 5875, "total_steps": 15621, "loss": 0.449, "lr": 1.570735588105106e-06, "epoch": 0.37609628064784584, "percentage": 37.61, "elapsed_time": "0:22:35", "remaining_time": "0:37:28", "throughput": 13662.88, "total_tokens": 18515968}
|
|
{"current_steps": 5880, "total_steps": 15621, "loss": 0.3901, "lr": 1.5698177230336234e-06, "epoch": 0.3764163625888227, "percentage": 37.64, "elapsed_time": "0:22:35", "remaining_time": "0:37:26", "throughput": 13668.04, "total_tokens": 18531200}
|
|
{"current_steps": 5885, "total_steps": 15621, "loss": 0.2699, "lr": 1.568899146538023e-06, "epoch": 0.37673644452979965, "percentage": 37.67, "elapsed_time": "0:22:36", "remaining_time": "0:37:24", "throughput": 13673.78, "total_tokens": 18547712}
|
|
{"current_steps": 5890, "total_steps": 15621, "loss": 0.4111, "lr": 1.5679798597651587e-06, "epoch": 0.37705652647077653, "percentage": 37.71, "elapsed_time": "0:22:37", "remaining_time": "0:37:21", "throughput": 13678.87, "total_tokens": 18562752}
|
|
{"current_steps": 5895, "total_steps": 15621, "loss": 0.4265, "lr": 1.5670598638627706e-06, "epoch": 0.3773766084117534, "percentage": 37.74, "elapsed_time": "0:22:37", "remaining_time": "0:37:19", "throughput": 13684.17, "total_tokens": 18578368}
|
|
{"current_steps": 5900, "total_steps": 15621, "loss": 0.3882, "lr": 1.5661391599794847e-06, "epoch": 0.3776966903527303, "percentage": 37.77, "elapsed_time": "0:22:38", "remaining_time": "0:37:17", "throughput": 13689.26, "total_tokens": 18593408}
|
|
{"current_steps": 5905, "total_steps": 15621, "loss": 0.4155, "lr": 1.56521774926481e-06, "epoch": 0.37801677229370717, "percentage": 37.8, "elapsed_time": "0:22:38", "remaining_time": "0:37:15", "throughput": 13694.14, "total_tokens": 18607872}
|
|
{"current_steps": 5910, "total_steps": 15621, "loss": 0.359, "lr": 1.5642956328691393e-06, "epoch": 0.3783368542346841, "percentage": 37.83, "elapsed_time": "0:22:39", "remaining_time": "0:37:14", "throughput": 13697.15, "total_tokens": 18624000}
|
|
{"current_steps": 5915, "total_steps": 15621, "loss": 0.564, "lr": 1.5633728119437451e-06, "epoch": 0.378656936175661, "percentage": 37.87, "elapsed_time": "0:22:40", "remaining_time": "0:37:12", "throughput": 13703.06, "total_tokens": 18640704}
|
|
{"current_steps": 5920, "total_steps": 15621, "loss": 0.4568, "lr": 1.5624492876407807e-06, "epoch": 0.37897701811663786, "percentage": 37.9, "elapsed_time": "0:22:40", "remaining_time": "0:37:10", "throughput": 13709.36, "total_tokens": 18658368}
|
|
{"current_steps": 5925, "total_steps": 15621, "loss": 0.4087, "lr": 1.5615250611132766e-06, "epoch": 0.37929710005761474, "percentage": 37.93, "elapsed_time": "0:22:41", "remaining_time": "0:37:08", "throughput": 13715.44, "total_tokens": 18675584}
|
|
{"current_steps": 5930, "total_steps": 15621, "loss": 0.5669, "lr": 1.5606001335151405e-06, "epoch": 0.3796171819985916, "percentage": 37.96, "elapsed_time": "0:22:42", "remaining_time": "0:37:06", "throughput": 13721.08, "total_tokens": 18691904}
|
|
{"current_steps": 5935, "total_steps": 15621, "loss": 0.3744, "lr": 1.5596745060011561e-06, "epoch": 0.3799372639395685, "percentage": 37.99, "elapsed_time": "0:22:42", "remaining_time": "0:37:04", "throughput": 13726.95, "total_tokens": 18708736}
|
|
{"current_steps": 5940, "total_steps": 15621, "loss": 0.3464, "lr": 1.5587481797269793e-06, "epoch": 0.38025734588054544, "percentage": 38.03, "elapsed_time": "0:22:43", "remaining_time": "0:37:02", "throughput": 13732.07, "total_tokens": 18724032}
|
|
{"current_steps": 5945, "total_steps": 15621, "loss": 0.4203, "lr": 1.5578211558491396e-06, "epoch": 0.3805774278215223, "percentage": 38.06, "elapsed_time": "0:22:44", "remaining_time": "0:37:00", "throughput": 13737.68, "total_tokens": 18740352}
|
|
{"current_steps": 5950, "total_steps": 15621, "loss": 0.3225, "lr": 1.5568934355250375e-06, "epoch": 0.3808975097624992, "percentage": 38.09, "elapsed_time": "0:22:44", "remaining_time": "0:36:58", "throughput": 13742.38, "total_tokens": 18754560}
|
|
{"current_steps": 5955, "total_steps": 15621, "loss": 0.6491, "lr": 1.5559650199129423e-06, "epoch": 0.3812175917034761, "percentage": 38.12, "elapsed_time": "0:22:45", "remaining_time": "0:36:56", "throughput": 13747.34, "total_tokens": 18769280}
|
|
{"current_steps": 5960, "total_steps": 15621, "loss": 0.4012, "lr": 1.5550359101719921e-06, "epoch": 0.38153767364445296, "percentage": 38.15, "elapsed_time": "0:22:45", "remaining_time": "0:36:54", "throughput": 13752.48, "total_tokens": 18784512}
|
|
{"current_steps": 5965, "total_steps": 15621, "loss": 0.3561, "lr": 1.554106107462191e-06, "epoch": 0.3818577555854299, "percentage": 38.19, "elapsed_time": "0:22:46", "remaining_time": "0:36:52", "throughput": 13757.88, "total_tokens": 18800384}
|
|
{"current_steps": 5970, "total_steps": 15621, "loss": 0.4248, "lr": 1.5531756129444092e-06, "epoch": 0.38217783752640677, "percentage": 38.22, "elapsed_time": "0:22:47", "remaining_time": "0:36:50", "throughput": 13763.03, "total_tokens": 18815552}
|
|
{"current_steps": 5975, "total_steps": 15621, "loss": 0.3884, "lr": 1.5522444277803796e-06, "epoch": 0.38249791946738365, "percentage": 38.25, "elapsed_time": "0:22:47", "remaining_time": "0:36:47", "throughput": 13767.86, "total_tokens": 18830080}
|
|
{"current_steps": 5980, "total_steps": 15621, "loss": 0.4319, "lr": 1.5513125531326976e-06, "epoch": 0.38281800140836053, "percentage": 38.28, "elapsed_time": "0:22:48", "remaining_time": "0:36:46", "throughput": 13773.37, "total_tokens": 18846272}
|
|
{"current_steps": 5985, "total_steps": 15621, "loss": 0.3747, "lr": 1.5503799901648198e-06, "epoch": 0.3831380833493374, "percentage": 38.31, "elapsed_time": "0:22:48", "remaining_time": "0:36:43", "throughput": 13778.2, "total_tokens": 18860928}
|
|
{"current_steps": 5990, "total_steps": 15621, "loss": 0.4553, "lr": 1.5494467400410625e-06, "epoch": 0.38345816529031435, "percentage": 38.35, "elapsed_time": "0:22:49", "remaining_time": "0:36:41", "throughput": 13783.63, "total_tokens": 18877120}
|
|
{"current_steps": 5995, "total_steps": 15621, "loss": 0.6017, "lr": 1.5485128039265986e-06, "epoch": 0.3837782472312912, "percentage": 38.38, "elapsed_time": "0:22:50", "remaining_time": "0:36:39", "throughput": 13788.7, "total_tokens": 18892224}
|
|
{"current_steps": 6000, "total_steps": 15621, "loss": 0.4408, "lr": 1.547578182987459e-06, "epoch": 0.3840983291722681, "percentage": 38.41, "elapsed_time": "0:22:50", "remaining_time": "0:36:37", "throughput": 13793.59, "total_tokens": 18907008}
|
|
{"current_steps": 6005, "total_steps": 15621, "loss": 0.2736, "lr": 1.5466428783905286e-06, "epoch": 0.384418411113245, "percentage": 38.44, "elapsed_time": "0:22:51", "remaining_time": "0:36:35", "throughput": 13798.71, "total_tokens": 18922368}
|
|
{"current_steps": 6010, "total_steps": 15621, "loss": 0.4288, "lr": 1.5457068913035463e-06, "epoch": 0.38473849305422186, "percentage": 38.47, "elapsed_time": "0:22:51", "remaining_time": "0:36:33", "throughput": 13803.76, "total_tokens": 18937536}
|
|
{"current_steps": 6015, "total_steps": 15621, "loss": 0.4784, "lr": 1.544770222895103e-06, "epoch": 0.38505857499519874, "percentage": 38.51, "elapsed_time": "0:22:52", "remaining_time": "0:36:31", "throughput": 13809.42, "total_tokens": 18954048}
|
|
{"current_steps": 6020, "total_steps": 15621, "loss": 0.5188, "lr": 1.5438328743346398e-06, "epoch": 0.3853786569361757, "percentage": 38.54, "elapsed_time": "0:22:53", "remaining_time": "0:36:29", "throughput": 13814.55, "total_tokens": 18969472}
|
|
{"current_steps": 6025, "total_steps": 15621, "loss": 0.4098, "lr": 1.5428948467924478e-06, "epoch": 0.38569873887715256, "percentage": 38.57, "elapsed_time": "0:22:53", "remaining_time": "0:36:27", "throughput": 13819.28, "total_tokens": 18983872}
|
|
{"current_steps": 6030, "total_steps": 15621, "loss": 0.3223, "lr": 1.5419561414396656e-06, "epoch": 0.38601882081812944, "percentage": 38.6, "elapsed_time": "0:22:54", "remaining_time": "0:36:25", "throughput": 13824.4, "total_tokens": 18999360}
|
|
{"current_steps": 6035, "total_steps": 15621, "loss": 0.4888, "lr": 1.541016759448277e-06, "epoch": 0.3863389027591063, "percentage": 38.63, "elapsed_time": "0:22:54", "remaining_time": "0:36:23", "throughput": 13829.77, "total_tokens": 19015424}
|
|
{"current_steps": 6040, "total_steps": 15621, "loss": 0.3641, "lr": 1.5400767019911124e-06, "epoch": 0.3866589847000832, "percentage": 38.67, "elapsed_time": "0:22:55", "remaining_time": "0:36:22", "throughput": 13835.19, "total_tokens": 19031616}
|
|
{"current_steps": 6045, "total_steps": 15621, "loss": 0.4821, "lr": 1.539135970241844e-06, "epoch": 0.38697906664106013, "percentage": 38.7, "elapsed_time": "0:22:56", "remaining_time": "0:36:20", "throughput": 13840.31, "total_tokens": 19047040}
|
|
{"current_steps": 6050, "total_steps": 15621, "loss": 0.479, "lr": 1.5381945653749866e-06, "epoch": 0.387299148582037, "percentage": 38.73, "elapsed_time": "0:22:56", "remaining_time": "0:36:18", "throughput": 13845.62, "total_tokens": 19062848}
|
|
{"current_steps": 6055, "total_steps": 15621, "loss": 0.5564, "lr": 1.5372524885658952e-06, "epoch": 0.3876192305230139, "percentage": 38.76, "elapsed_time": "0:22:57", "remaining_time": "0:36:16", "throughput": 13851.1, "total_tokens": 19078976}
|
|
{"current_steps": 6060, "total_steps": 15621, "loss": 0.3676, "lr": 1.5363097409907638e-06, "epoch": 0.38793931246399077, "percentage": 38.79, "elapsed_time": "0:22:58", "remaining_time": "0:36:14", "throughput": 13855.9, "total_tokens": 19093632}
|
|
{"current_steps": 6065, "total_steps": 15621, "loss": 0.3605, "lr": 1.535366323826624e-06, "epoch": 0.38825939440496765, "percentage": 38.83, "elapsed_time": "0:22:58", "remaining_time": "0:36:12", "throughput": 13861.0, "total_tokens": 19109056}
|
|
{"current_steps": 6070, "total_steps": 15621, "loss": 0.3699, "lr": 1.534422238251343e-06, "epoch": 0.3885794763459446, "percentage": 38.86, "elapsed_time": "0:22:59", "remaining_time": "0:36:10", "throughput": 13866.09, "total_tokens": 19124544}
|
|
{"current_steps": 6075, "total_steps": 15621, "loss": 0.3834, "lr": 1.5334774854436223e-06, "epoch": 0.38889955828692147, "percentage": 38.89, "elapsed_time": "0:22:59", "remaining_time": "0:36:08", "throughput": 13871.39, "total_tokens": 19140480}
|
|
{"current_steps": 6080, "total_steps": 15621, "loss": 0.3776, "lr": 1.5325320665829975e-06, "epoch": 0.38921964022789834, "percentage": 38.92, "elapsed_time": "0:23:00", "remaining_time": "0:36:06", "throughput": 13876.8, "total_tokens": 19156736}
|
|
{"current_steps": 6085, "total_steps": 15621, "loss": 0.4455, "lr": 1.5315859828498352e-06, "epoch": 0.3895397221688752, "percentage": 38.95, "elapsed_time": "0:23:01", "remaining_time": "0:36:04", "throughput": 13881.61, "total_tokens": 19171520}
|
|
{"current_steps": 6090, "total_steps": 15621, "loss": 0.4921, "lr": 1.5306392354253316e-06, "epoch": 0.3898598041098521, "percentage": 38.99, "elapsed_time": "0:23:01", "remaining_time": "0:36:02", "throughput": 13886.81, "total_tokens": 19187136}
|
|
{"current_steps": 6095, "total_steps": 15621, "loss": 0.4377, "lr": 1.5296918254915123e-06, "epoch": 0.39017988605082904, "percentage": 39.02, "elapsed_time": "0:23:02", "remaining_time": "0:36:00", "throughput": 13891.6, "total_tokens": 19201856}
|
|
{"current_steps": 6100, "total_steps": 15621, "loss": 0.3869, "lr": 1.5287437542312296e-06, "epoch": 0.3904999679918059, "percentage": 39.05, "elapsed_time": "0:23:02", "remaining_time": "0:35:58", "throughput": 13896.37, "total_tokens": 19216704}
|
|
{"current_steps": 6105, "total_steps": 15621, "loss": 0.5316, "lr": 1.5277950228281614e-06, "epoch": 0.3908200499327828, "percentage": 39.08, "elapsed_time": "0:23:03", "remaining_time": "0:35:56", "throughput": 13902.1, "total_tokens": 19233408}
|
|
{"current_steps": 6110, "total_steps": 15621, "loss": 0.354, "lr": 1.52684563246681e-06, "epoch": 0.3911401318737597, "percentage": 39.11, "elapsed_time": "0:23:04", "remaining_time": "0:35:54", "throughput": 13907.76, "total_tokens": 19250048}
|
|
{"current_steps": 6115, "total_steps": 15621, "loss": 0.4243, "lr": 1.5258955843325015e-06, "epoch": 0.39146021381473656, "percentage": 39.15, "elapsed_time": "0:23:04", "remaining_time": "0:35:52", "throughput": 13913.33, "total_tokens": 19266560}
|
|
{"current_steps": 6120, "total_steps": 15621, "loss": 0.4885, "lr": 1.5249448796113804e-06, "epoch": 0.39178029575571344, "percentage": 39.18, "elapsed_time": "0:23:05", "remaining_time": "0:35:50", "throughput": 13918.15, "total_tokens": 19281408}
|
|
{"current_steps": 6125, "total_steps": 15621, "loss": 0.4747, "lr": 1.5239935194904141e-06, "epoch": 0.39210037769669037, "percentage": 39.21, "elapsed_time": "0:23:05", "remaining_time": "0:35:48", "throughput": 13923.01, "total_tokens": 19296384}
|
|
{"current_steps": 6130, "total_steps": 15621, "loss": 0.3702, "lr": 1.523041505157386e-06, "epoch": 0.39242045963766725, "percentage": 39.24, "elapsed_time": "0:23:06", "remaining_time": "0:35:46", "throughput": 13928.12, "total_tokens": 19312000}
|
|
{"current_steps": 6135, "total_steps": 15621, "loss": 0.3909, "lr": 1.5220888378008977e-06, "epoch": 0.39274054157864413, "percentage": 39.27, "elapsed_time": "0:23:07", "remaining_time": "0:35:44", "throughput": 13933.2, "total_tokens": 19327488}
|
|
{"current_steps": 6140, "total_steps": 15621, "loss": 0.4661, "lr": 1.5211355186103654e-06, "epoch": 0.393060623519621, "percentage": 39.31, "elapsed_time": "0:23:07", "remaining_time": "0:35:42", "throughput": 13937.92, "total_tokens": 19342080}
|
|
{"current_steps": 6145, "total_steps": 15621, "loss": 0.4126, "lr": 1.5201815487760192e-06, "epoch": 0.3933807054605979, "percentage": 39.34, "elapsed_time": "0:23:08", "remaining_time": "0:35:40", "throughput": 13943.31, "total_tokens": 19358336}
|
|
{"current_steps": 6150, "total_steps": 15621, "loss": 0.508, "lr": 1.5192269294889019e-06, "epoch": 0.3937007874015748, "percentage": 39.37, "elapsed_time": "0:23:08", "remaining_time": "0:35:38", "throughput": 13948.21, "total_tokens": 19373376}
|
|
{"current_steps": 6155, "total_steps": 15621, "loss": 0.4029, "lr": 1.5182716619408666e-06, "epoch": 0.3940208693425517, "percentage": 39.4, "elapsed_time": "0:23:09", "remaining_time": "0:35:37", "throughput": 13953.16, "total_tokens": 19388608}
|
|
{"current_steps": 6160, "total_steps": 15621, "loss": 0.5398, "lr": 1.5173157473245764e-06, "epoch": 0.3943409512835286, "percentage": 39.43, "elapsed_time": "0:23:10", "remaining_time": "0:35:35", "throughput": 13957.85, "total_tokens": 19403264}
|
|
{"current_steps": 6165, "total_steps": 15621, "loss": 0.4363, "lr": 1.5163591868335016e-06, "epoch": 0.39466103322450546, "percentage": 39.47, "elapsed_time": "0:23:10", "remaining_time": "0:35:33", "throughput": 13962.87, "total_tokens": 19418816}
|
|
{"current_steps": 6170, "total_steps": 15621, "loss": 0.5781, "lr": 1.515401981661919e-06, "epoch": 0.39498111516548234, "percentage": 39.5, "elapsed_time": "0:23:11", "remaining_time": "0:35:31", "throughput": 13968.37, "total_tokens": 19435392}
|
|
{"current_steps": 6175, "total_steps": 15621, "loss": 0.4592, "lr": 1.514444133004911e-06, "epoch": 0.3953011971064593, "percentage": 39.53, "elapsed_time": "0:23:11", "remaining_time": "0:35:29", "throughput": 13973.05, "total_tokens": 19450048}
|
|
{"current_steps": 6180, "total_steps": 15621, "loss": 0.4592, "lr": 1.5134856420583631e-06, "epoch": 0.39562127904743616, "percentage": 39.56, "elapsed_time": "0:23:12", "remaining_time": "0:35:27", "throughput": 13978.5, "total_tokens": 19466368}
|
|
{"current_steps": 6185, "total_steps": 15621, "loss": 0.3338, "lr": 1.5125265100189614e-06, "epoch": 0.39594136098841304, "percentage": 39.59, "elapsed_time": "0:23:13", "remaining_time": "0:35:25", "throughput": 13983.82, "total_tokens": 19482624}
|
|
{"current_steps": 6190, "total_steps": 15621, "loss": 0.5304, "lr": 1.5115667380841948e-06, "epoch": 0.3962614429293899, "percentage": 39.63, "elapsed_time": "0:23:13", "remaining_time": "0:35:23", "throughput": 13988.85, "total_tokens": 19498048}
|
|
{"current_steps": 6195, "total_steps": 15621, "loss": 0.43, "lr": 1.510606327452349e-06, "epoch": 0.3965815248703668, "percentage": 39.66, "elapsed_time": "0:23:14", "remaining_time": "0:35:21", "throughput": 13994.54, "total_tokens": 19515264}
|
|
{"current_steps": 6200, "total_steps": 15621, "loss": 0.4319, "lr": 1.5096452793225082e-06, "epoch": 0.3969016068113437, "percentage": 39.69, "elapsed_time": "0:23:15", "remaining_time": "0:35:19", "throughput": 14000.52, "total_tokens": 19533056}
|
|
{"current_steps": 6205, "total_steps": 15621, "loss": 0.4003, "lr": 1.5086835948945522e-06, "epoch": 0.3972216887523206, "percentage": 39.72, "elapsed_time": "0:23:15", "remaining_time": "0:35:18", "throughput": 14005.47, "total_tokens": 19548480}
|
|
{"current_steps": 6210, "total_steps": 15621, "loss": 0.3271, "lr": 1.5077212753691556e-06, "epoch": 0.3975417706932975, "percentage": 39.75, "elapsed_time": "0:23:16", "remaining_time": "0:35:16", "throughput": 14010.39, "total_tokens": 19563712}
|
|
{"current_steps": 6215, "total_steps": 15621, "loss": 0.4049, "lr": 1.5067583219477852e-06, "epoch": 0.39786185263427437, "percentage": 39.79, "elapsed_time": "0:23:16", "remaining_time": "0:35:14", "throughput": 14015.19, "total_tokens": 19578624}
|
|
{"current_steps": 6220, "total_steps": 15621, "loss": 0.3916, "lr": 1.5057947358327e-06, "epoch": 0.39818193457525125, "percentage": 39.82, "elapsed_time": "0:23:17", "remaining_time": "0:35:12", "throughput": 14019.87, "total_tokens": 19593408}
|
|
{"current_steps": 6225, "total_steps": 15621, "loss": 0.4907, "lr": 1.504830518226948e-06, "epoch": 0.39850201651622813, "percentage": 39.85, "elapsed_time": "0:23:18", "remaining_time": "0:35:10", "throughput": 14025.08, "total_tokens": 19609216}
|
|
{"current_steps": 6230, "total_steps": 15621, "loss": 0.449, "lr": 1.5038656703343672e-06, "epoch": 0.39882209845720507, "percentage": 39.88, "elapsed_time": "0:23:18", "remaining_time": "0:35:08", "throughput": 14030.08, "total_tokens": 19624896}
|
|
{"current_steps": 6235, "total_steps": 15621, "loss": 0.4925, "lr": 1.5029001933595805e-06, "epoch": 0.39914218039818194, "percentage": 39.91, "elapsed_time": "0:23:19", "remaining_time": "0:35:06", "throughput": 14035.0, "total_tokens": 19640128}
|
|
{"current_steps": 6240, "total_steps": 15621, "loss": 0.3433, "lr": 1.501934088507998e-06, "epoch": 0.3994622623391588, "percentage": 39.95, "elapsed_time": "0:23:19", "remaining_time": "0:35:04", "throughput": 14040.1, "total_tokens": 19655680}
|
|
{"current_steps": 6245, "total_steps": 15621, "loss": 0.6227, "lr": 1.5009673569858126e-06, "epoch": 0.3997823442801357, "percentage": 39.98, "elapsed_time": "0:23:20", "remaining_time": "0:35:02", "throughput": 14045.49, "total_tokens": 19672192}
|
|
{"current_steps": 6250, "total_steps": 15621, "loss": 0.5284, "lr": 1.5e-06, "epoch": 0.4001024262211126, "percentage": 40.01, "elapsed_time": "0:23:21", "remaining_time": "0:35:00", "throughput": 14050.98, "total_tokens": 19688896}
|
|
{"current_steps": 6255, "total_steps": 15621, "loss": 0.3547, "lr": 1.4990320187583167e-06, "epoch": 0.4004225081620895, "percentage": 40.04, "elapsed_time": "0:23:21", "remaining_time": "0:34:59", "throughput": 14055.89, "total_tokens": 19704128}
|
|
{"current_steps": 6256, "total_steps": 15621, "eval_loss": 0.42333245277404785, "epoch": 0.4004865245502849, "percentage": 40.05, "elapsed_time": "0:24:11", "remaining_time": "0:36:12", "throughput": 13580.8, "total_tokens": 19707456}
|
|
{"current_steps": 6260, "total_steps": 15621, "loss": 0.395, "lr": 1.4980634144692986e-06, "epoch": 0.4007425901030664, "percentage": 40.07, "elapsed_time": "0:24:53", "remaining_time": "0:37:13", "throughput": 13204.42, "total_tokens": 19719744}
|
|
{"current_steps": 6265, "total_steps": 15621, "loss": 0.3795, "lr": 1.4970941883422599e-06, "epoch": 0.4010626720440433, "percentage": 40.11, "elapsed_time": "0:24:54", "remaining_time": "0:37:11", "throughput": 13209.87, "total_tokens": 19736128}
|
|
{"current_steps": 6270, "total_steps": 15621, "loss": 0.4165, "lr": 1.4961243415872901e-06, "epoch": 0.40138275398502016, "percentage": 40.14, "elapsed_time": "0:24:54", "remaining_time": "0:37:09", "throughput": 13214.73, "total_tokens": 19751296}
|
|
{"current_steps": 6275, "total_steps": 15621, "loss": 0.4057, "lr": 1.4951538754152551e-06, "epoch": 0.40170283592599704, "percentage": 40.17, "elapsed_time": "0:24:55", "remaining_time": "0:37:06", "throughput": 13219.35, "total_tokens": 19765888}
|
|
{"current_steps": 6280, "total_steps": 15621, "loss": 0.4205, "lr": 1.4941827910377925e-06, "epoch": 0.402022917866974, "percentage": 40.2, "elapsed_time": "0:24:55", "remaining_time": "0:37:04", "throughput": 13224.09, "total_tokens": 19780864}
|
|
{"current_steps": 6285, "total_steps": 15621, "loss": 0.4014, "lr": 1.4932110896673131e-06, "epoch": 0.40234299980795085, "percentage": 40.23, "elapsed_time": "0:24:56", "remaining_time": "0:37:02", "throughput": 13229.33, "total_tokens": 19796864}
|
|
{"current_steps": 6290, "total_steps": 15621, "loss": 0.5395, "lr": 1.4922387725169973e-06, "epoch": 0.40266308174892773, "percentage": 40.27, "elapsed_time": "0:24:57", "remaining_time": "0:37:00", "throughput": 13234.16, "total_tokens": 19811904}
|
|
{"current_steps": 6295, "total_steps": 15621, "loss": 0.4049, "lr": 1.4912658408007947e-06, "epoch": 0.4029831636899046, "percentage": 40.3, "elapsed_time": "0:24:57", "remaining_time": "0:36:58", "throughput": 13239.16, "total_tokens": 19827456}
|
|
{"current_steps": 6300, "total_steps": 15621, "loss": 0.4269, "lr": 1.4902922957334215e-06, "epoch": 0.4033032456308815, "percentage": 40.33, "elapsed_time": "0:24:58", "remaining_time": "0:36:56", "throughput": 13243.97, "total_tokens": 19842496}
|
|
{"current_steps": 6305, "total_steps": 15621, "loss": 0.408, "lr": 1.4893181385303608e-06, "epoch": 0.40362332757185837, "percentage": 40.36, "elapsed_time": "0:24:58", "remaining_time": "0:36:54", "throughput": 13249.02, "total_tokens": 19858240}
|
|
{"current_steps": 6310, "total_steps": 15621, "loss": 0.3994, "lr": 1.4883433704078584e-06, "epoch": 0.4039434095128353, "percentage": 40.39, "elapsed_time": "0:24:59", "remaining_time": "0:36:52", "throughput": 13254.33, "total_tokens": 19874368}
|
|
{"current_steps": 6315, "total_steps": 15621, "loss": 0.3874, "lr": 1.4873679925829246e-06, "epoch": 0.4042634914538122, "percentage": 40.43, "elapsed_time": "0:25:00", "remaining_time": "0:36:50", "throughput": 13260.06, "total_tokens": 19891904}
|
|
{"current_steps": 6320, "total_steps": 15621, "loss": 0.4077, "lr": 1.4863920062733298e-06, "epoch": 0.40458357339478906, "percentage": 40.46, "elapsed_time": "0:25:00", "remaining_time": "0:36:48", "throughput": 13264.99, "total_tokens": 19907392}
|
|
{"current_steps": 6325, "total_steps": 15621, "loss": 0.3779, "lr": 1.485415412697604e-06, "epoch": 0.40490365533576594, "percentage": 40.49, "elapsed_time": "0:25:01", "remaining_time": "0:36:46", "throughput": 13269.9, "total_tokens": 19922624}
|
|
{"current_steps": 6330, "total_steps": 15621, "loss": 0.4348, "lr": 1.484438213075036e-06, "epoch": 0.4052237372767428, "percentage": 40.52, "elapsed_time": "0:25:01", "remaining_time": "0:36:44", "throughput": 13275.36, "total_tokens": 19939328}
|
|
{"current_steps": 6335, "total_steps": 15621, "loss": 0.4465, "lr": 1.4834604086256713e-06, "epoch": 0.40554381921771976, "percentage": 40.55, "elapsed_time": "0:25:02", "remaining_time": "0:36:42", "throughput": 13280.54, "total_tokens": 19955392}
|
|
{"current_steps": 6340, "total_steps": 15621, "loss": 0.3818, "lr": 1.4824820005703097e-06, "epoch": 0.40586390115869664, "percentage": 40.59, "elapsed_time": "0:25:03", "remaining_time": "0:36:40", "throughput": 13285.79, "total_tokens": 19971520}
|
|
{"current_steps": 6345, "total_steps": 15621, "loss": 0.46, "lr": 1.4815029901305061e-06, "epoch": 0.4061839830996735, "percentage": 40.62, "elapsed_time": "0:25:03", "remaining_time": "0:36:38", "throughput": 13291.3, "total_tokens": 19988352}
|
|
{"current_steps": 6350, "total_steps": 15621, "loss": 0.4748, "lr": 1.480523378528565e-06, "epoch": 0.4065040650406504, "percentage": 40.65, "elapsed_time": "0:25:04", "remaining_time": "0:36:36", "throughput": 13296.76, "total_tokens": 20005184}
|
|
{"current_steps": 6355, "total_steps": 15621, "loss": 0.4064, "lr": 1.4795431669875441e-06, "epoch": 0.4068241469816273, "percentage": 40.68, "elapsed_time": "0:25:05", "remaining_time": "0:36:34", "throughput": 13301.76, "total_tokens": 20020800}
|
|
{"current_steps": 6360, "total_steps": 15621, "loss": 0.472, "lr": 1.478562356731249e-06, "epoch": 0.4071442289226042, "percentage": 40.71, "elapsed_time": "0:25:05", "remaining_time": "0:36:32", "throughput": 13306.76, "total_tokens": 20036416}
|
|
{"current_steps": 6365, "total_steps": 15621, "loss": 0.4525, "lr": 1.4775809489842326e-06, "epoch": 0.4074643108635811, "percentage": 40.75, "elapsed_time": "0:25:06", "remaining_time": "0:36:30", "throughput": 13312.21, "total_tokens": 20053184}
|
|
{"current_steps": 6370, "total_steps": 15621, "loss": 0.3987, "lr": 1.4765989449717937e-06, "epoch": 0.40778439280455797, "percentage": 40.78, "elapsed_time": "0:25:07", "remaining_time": "0:36:28", "throughput": 13317.62, "total_tokens": 20069888}
|
|
{"current_steps": 6375, "total_steps": 15621, "loss": 0.5504, "lr": 1.4756163459199763e-06, "epoch": 0.40810447474553485, "percentage": 40.81, "elapsed_time": "0:25:07", "remaining_time": "0:36:26", "throughput": 13322.66, "total_tokens": 20085760}
|
|
{"current_steps": 6380, "total_steps": 15621, "loss": 0.2742, "lr": 1.4746331530555665e-06, "epoch": 0.40842455668651173, "percentage": 40.84, "elapsed_time": "0:25:08", "remaining_time": "0:36:24", "throughput": 13327.55, "total_tokens": 20101056}
|
|
{"current_steps": 6385, "total_steps": 15621, "loss": 0.4133, "lr": 1.4736493676060923e-06, "epoch": 0.4087446386274886, "percentage": 40.87, "elapsed_time": "0:25:08", "remaining_time": "0:36:22", "throughput": 13332.3, "total_tokens": 20116352}
|
|
{"current_steps": 6390, "total_steps": 15621, "loss": 0.3642, "lr": 1.4726649907998216e-06, "epoch": 0.40906472056846555, "percentage": 40.91, "elapsed_time": "0:25:09", "remaining_time": "0:36:20", "throughput": 13337.14, "total_tokens": 20131712}
|
|
{"current_steps": 6395, "total_steps": 15621, "loss": 0.3759, "lr": 1.4716800238657599e-06, "epoch": 0.4093848025094424, "percentage": 40.94, "elapsed_time": "0:25:10", "remaining_time": "0:36:18", "throughput": 13341.91, "total_tokens": 20146880}
|
|
{"current_steps": 6400, "total_steps": 15621, "loss": 0.2767, "lr": 1.4706944680336505e-06, "epoch": 0.4097048844504193, "percentage": 40.97, "elapsed_time": "0:25:10", "remaining_time": "0:36:16", "throughput": 13347.27, "total_tokens": 20163520}
|
|
{"current_steps": 6405, "total_steps": 15621, "loss": 0.4681, "lr": 1.469708324533971e-06, "epoch": 0.4100249663913962, "percentage": 41.0, "elapsed_time": "0:25:11", "remaining_time": "0:36:14", "throughput": 13351.72, "total_tokens": 20177984}
|
|
{"current_steps": 6410, "total_steps": 15621, "loss": 0.3395, "lr": 1.4687215945979335e-06, "epoch": 0.41034504833237306, "percentage": 41.03, "elapsed_time": "0:25:11", "remaining_time": "0:36:12", "throughput": 13356.63, "total_tokens": 20193472}
|
|
{"current_steps": 6415, "total_steps": 15621, "loss": 0.4507, "lr": 1.4677342794574815e-06, "epoch": 0.41066513027335, "percentage": 41.07, "elapsed_time": "0:25:12", "remaining_time": "0:36:10", "throughput": 13362.25, "total_tokens": 20210624}
|
|
{"current_steps": 6420, "total_steps": 15621, "loss": 0.4199, "lr": 1.4667463803452902e-06, "epoch": 0.4109852122143269, "percentage": 41.1, "elapsed_time": "0:25:13", "remaining_time": "0:36:08", "throughput": 13367.36, "total_tokens": 20226688}
|
|
{"current_steps": 6425, "total_steps": 15621, "loss": 0.4472, "lr": 1.4657578984947627e-06, "epoch": 0.41130529415530376, "percentage": 41.13, "elapsed_time": "0:25:13", "remaining_time": "0:36:06", "throughput": 13373.26, "total_tokens": 20244608}
|
|
{"current_steps": 6430, "total_steps": 15621, "loss": 0.3699, "lr": 1.4647688351400303e-06, "epoch": 0.41162537609628064, "percentage": 41.16, "elapsed_time": "0:25:14", "remaining_time": "0:36:04", "throughput": 13378.53, "total_tokens": 20261184}
|
|
{"current_steps": 6435, "total_steps": 15621, "loss": 0.3348, "lr": 1.46377919151595e-06, "epoch": 0.4119454580372575, "percentage": 41.19, "elapsed_time": "0:25:15", "remaining_time": "0:36:02", "throughput": 13383.37, "total_tokens": 20276736}
|
|
{"current_steps": 6440, "total_steps": 15621, "loss": 0.4651, "lr": 1.462788968858104e-06, "epoch": 0.41226553997823445, "percentage": 41.23, "elapsed_time": "0:25:15", "remaining_time": "0:36:00", "throughput": 13388.87, "total_tokens": 20293888}
|
|
{"current_steps": 6445, "total_steps": 15621, "loss": 0.482, "lr": 1.4617981684027966e-06, "epoch": 0.41258562191921133, "percentage": 41.26, "elapsed_time": "0:25:16", "remaining_time": "0:35:58", "throughput": 13393.83, "total_tokens": 20309696}
|
|
{"current_steps": 6450, "total_steps": 15621, "loss": 0.4013, "lr": 1.4608067913870536e-06, "epoch": 0.4129057038601882, "percentage": 41.29, "elapsed_time": "0:25:16", "remaining_time": "0:35:56", "throughput": 13398.85, "total_tokens": 20325632}
|
|
{"current_steps": 6455, "total_steps": 15621, "loss": 0.3968, "lr": 1.4598148390486213e-06, "epoch": 0.4132257858011651, "percentage": 41.32, "elapsed_time": "0:25:17", "remaining_time": "0:35:54", "throughput": 13404.0, "total_tokens": 20341888}
|
|
{"current_steps": 6460, "total_steps": 15621, "loss": 0.5073, "lr": 1.4588223126259639e-06, "epoch": 0.41354586774214197, "percentage": 41.35, "elapsed_time": "0:25:18", "remaining_time": "0:35:53", "throughput": 13409.39, "total_tokens": 20358656}
|
|
{"current_steps": 6465, "total_steps": 15621, "loss": 0.3245, "lr": 1.4578292133582615e-06, "epoch": 0.4138659496831189, "percentage": 41.39, "elapsed_time": "0:25:18", "remaining_time": "0:35:51", "throughput": 13413.69, "total_tokens": 20372864}
|
|
{"current_steps": 6470, "total_steps": 15621, "loss": 0.3954, "lr": 1.456835542485411e-06, "epoch": 0.4141860316240958, "percentage": 41.42, "elapsed_time": "0:25:19", "remaining_time": "0:35:49", "throughput": 13418.29, "total_tokens": 20387840}
|
|
{"current_steps": 6475, "total_steps": 15621, "loss": 0.4092, "lr": 1.4558413012480215e-06, "epoch": 0.41450611356507266, "percentage": 41.45, "elapsed_time": "0:25:20", "remaining_time": "0:35:47", "throughput": 13423.68, "total_tokens": 20404736}
|
|
{"current_steps": 6480, "total_steps": 15621, "loss": 0.5673, "lr": 1.4548464908874156e-06, "epoch": 0.41482619550604954, "percentage": 41.48, "elapsed_time": "0:25:20", "remaining_time": "0:35:45", "throughput": 13429.6, "total_tokens": 20422848}
|
|
{"current_steps": 6485, "total_steps": 15621, "loss": 0.3996, "lr": 1.4538511126456255e-06, "epoch": 0.4151462774470264, "percentage": 41.51, "elapsed_time": "0:25:21", "remaining_time": "0:35:43", "throughput": 13434.27, "total_tokens": 20438016}
|
|
{"current_steps": 6490, "total_steps": 15621, "loss": 0.5913, "lr": 1.452855167765392e-06, "epoch": 0.4154663593880033, "percentage": 41.55, "elapsed_time": "0:25:21", "remaining_time": "0:35:41", "throughput": 13439.47, "total_tokens": 20454464}
|
|
{"current_steps": 6495, "total_steps": 15621, "loss": 0.4487, "lr": 1.4518586574901647e-06, "epoch": 0.41578644132898024, "percentage": 41.58, "elapsed_time": "0:25:22", "remaining_time": "0:35:39", "throughput": 13444.51, "total_tokens": 20470464}
|
|
{"current_steps": 6500, "total_steps": 15621, "loss": 0.4617, "lr": 1.450861583064098e-06, "epoch": 0.4161065232699571, "percentage": 41.61, "elapsed_time": "0:25:23", "remaining_time": "0:35:37", "throughput": 13449.25, "total_tokens": 20485696}
|
|
{"current_steps": 6505, "total_steps": 15621, "loss": 0.3642, "lr": 1.4498639457320515e-06, "epoch": 0.416426605210934, "percentage": 41.64, "elapsed_time": "0:25:23", "remaining_time": "0:35:35", "throughput": 13453.8, "total_tokens": 20500608}
|
|
{"current_steps": 6510, "total_steps": 15621, "loss": 0.4686, "lr": 1.4488657467395865e-06, "epoch": 0.4167466871519109, "percentage": 41.67, "elapsed_time": "0:25:24", "remaining_time": "0:35:33", "throughput": 13458.47, "total_tokens": 20515776}
|
|
{"current_steps": 6515, "total_steps": 15621, "loss": 0.5078, "lr": 1.4478669873329663e-06, "epoch": 0.41706676909288776, "percentage": 41.71, "elapsed_time": "0:25:24", "remaining_time": "0:35:31", "throughput": 13463.35, "total_tokens": 20531456}
|
|
{"current_steps": 6520, "total_steps": 15621, "loss": 0.386, "lr": 1.4468676687591536e-06, "epoch": 0.4173868510338647, "percentage": 41.74, "elapsed_time": "0:25:25", "remaining_time": "0:35:29", "throughput": 13468.18, "total_tokens": 20547200}
|
|
{"current_steps": 6525, "total_steps": 15621, "loss": 0.4358, "lr": 1.4458677922658104e-06, "epoch": 0.41770693297484157, "percentage": 41.77, "elapsed_time": "0:25:26", "remaining_time": "0:35:27", "throughput": 13472.88, "total_tokens": 20562560}
|
|
{"current_steps": 6530, "total_steps": 15621, "loss": 0.2798, "lr": 1.444867359101293e-06, "epoch": 0.41802701491581845, "percentage": 41.8, "elapsed_time": "0:25:26", "remaining_time": "0:35:25", "throughput": 13477.37, "total_tokens": 20577344}
|
|
{"current_steps": 6535, "total_steps": 15621, "loss": 0.3529, "lr": 1.4438663705146545e-06, "epoch": 0.41834709685679533, "percentage": 41.83, "elapsed_time": "0:25:27", "remaining_time": "0:35:23", "throughput": 13482.2, "total_tokens": 20593088}
|
|
{"current_steps": 6540, "total_steps": 15621, "loss": 0.3589, "lr": 1.442864827755641e-06, "epoch": 0.4186671787977722, "percentage": 41.87, "elapsed_time": "0:25:28", "remaining_time": "0:35:21", "throughput": 13487.49, "total_tokens": 20609792}
|
|
{"current_steps": 6545, "total_steps": 15621, "loss": 0.4407, "lr": 1.4418627320746901e-06, "epoch": 0.41898726073874915, "percentage": 41.9, "elapsed_time": "0:25:28", "remaining_time": "0:35:19", "throughput": 13492.27, "total_tokens": 20625280}
|
|
{"current_steps": 6550, "total_steps": 15621, "loss": 0.3854, "lr": 1.4408600847229304e-06, "epoch": 0.419307342679726, "percentage": 41.93, "elapsed_time": "0:25:29", "remaining_time": "0:35:17", "throughput": 13497.53, "total_tokens": 20641984}
|
|
{"current_steps": 6555, "total_steps": 15621, "loss": 0.5281, "lr": 1.4398568869521782e-06, "epoch": 0.4196274246207029, "percentage": 41.96, "elapsed_time": "0:25:29", "remaining_time": "0:35:16", "throughput": 13502.59, "total_tokens": 20658240}
|
|
{"current_steps": 6560, "total_steps": 15621, "loss": 0.3645, "lr": 1.4388531400149384e-06, "epoch": 0.4199475065616798, "percentage": 41.99, "elapsed_time": "0:25:30", "remaining_time": "0:35:14", "throughput": 13507.27, "total_tokens": 20673408}
|
|
{"current_steps": 6565, "total_steps": 15621, "loss": 0.3866, "lr": 1.4378488451644007e-06, "epoch": 0.42026758850265666, "percentage": 42.03, "elapsed_time": "0:25:31", "remaining_time": "0:35:12", "throughput": 13512.04, "total_tokens": 20688960}
|
|
{"current_steps": 6570, "total_steps": 15621, "loss": 0.4049, "lr": 1.4368440036544386e-06, "epoch": 0.42058767044363354, "percentage": 42.06, "elapsed_time": "0:25:31", "remaining_time": "0:35:10", "throughput": 13516.87, "total_tokens": 20704768}
|
|
{"current_steps": 6575, "total_steps": 15621, "loss": 0.4199, "lr": 1.435838616739609e-06, "epoch": 0.4209077523846105, "percentage": 42.09, "elapsed_time": "0:25:32", "remaining_time": "0:35:08", "throughput": 13521.45, "total_tokens": 20719808}
|
|
{"current_steps": 6580, "total_steps": 15621, "loss": 0.5392, "lr": 1.4348326856751493e-06, "epoch": 0.42122783432558736, "percentage": 42.12, "elapsed_time": "0:25:32", "remaining_time": "0:35:06", "throughput": 13526.36, "total_tokens": 20735680}
|
|
{"current_steps": 6585, "total_steps": 15621, "loss": 0.3422, "lr": 1.433826211716976e-06, "epoch": 0.42154791626656424, "percentage": 42.15, "elapsed_time": "0:25:33", "remaining_time": "0:35:04", "throughput": 13530.7, "total_tokens": 20750144}
|
|
{"current_steps": 6590, "total_steps": 15621, "loss": 0.3966, "lr": 1.4328191961216835e-06, "epoch": 0.4218679982075411, "percentage": 42.19, "elapsed_time": "0:25:34", "remaining_time": "0:35:02", "throughput": 13535.58, "total_tokens": 20766016}
|
|
{"current_steps": 6595, "total_steps": 15621, "loss": 0.4812, "lr": 1.4318116401465427e-06, "epoch": 0.422188080148518, "percentage": 42.22, "elapsed_time": "0:25:34", "remaining_time": "0:35:00", "throughput": 13540.8, "total_tokens": 20782720}
|
|
{"current_steps": 6600, "total_steps": 15621, "loss": 0.388, "lr": 1.430803545049499e-06, "epoch": 0.42250816208949493, "percentage": 42.25, "elapsed_time": "0:25:35", "remaining_time": "0:34:58", "throughput": 13545.52, "total_tokens": 20798208}
|
|
{"current_steps": 6605, "total_steps": 15621, "loss": 0.5652, "lr": 1.4297949120891716e-06, "epoch": 0.4228282440304718, "percentage": 42.28, "elapsed_time": "0:25:36", "remaining_time": "0:34:56", "throughput": 13550.02, "total_tokens": 20813056}
|
|
{"current_steps": 6610, "total_steps": 15621, "loss": 0.4121, "lr": 1.4287857425248497e-06, "epoch": 0.4231483259714487, "percentage": 42.31, "elapsed_time": "0:25:36", "remaining_time": "0:34:54", "throughput": 13554.87, "total_tokens": 20828800}
|
|
{"current_steps": 6615, "total_steps": 15621, "loss": 0.4974, "lr": 1.427776037616494e-06, "epoch": 0.42346840791242557, "percentage": 42.35, "elapsed_time": "0:25:37", "remaining_time": "0:34:52", "throughput": 13559.74, "total_tokens": 20844736}
|
|
{"current_steps": 6620, "total_steps": 15621, "loss": 0.3527, "lr": 1.4267657986247326e-06, "epoch": 0.42378848985340245, "percentage": 42.38, "elapsed_time": "0:25:37", "remaining_time": "0:34:50", "throughput": 13564.67, "total_tokens": 20860672}
|
|
{"current_steps": 6625, "total_steps": 15621, "loss": 0.3746, "lr": 1.425755026810861e-06, "epoch": 0.4241085717943794, "percentage": 42.41, "elapsed_time": "0:25:38", "remaining_time": "0:34:49", "throughput": 13569.74, "total_tokens": 20877184}
|
|
{"current_steps": 6630, "total_steps": 15621, "loss": 0.4095, "lr": 1.4247437234368394e-06, "epoch": 0.42442865373535626, "percentage": 42.44, "elapsed_time": "0:25:39", "remaining_time": "0:34:47", "throughput": 13575.04, "total_tokens": 20894208}
|
|
{"current_steps": 6635, "total_steps": 15621, "loss": 0.4001, "lr": 1.423731889765292e-06, "epoch": 0.42474873567633314, "percentage": 42.47, "elapsed_time": "0:25:39", "remaining_time": "0:34:45", "throughput": 13579.74, "total_tokens": 20909696}
|
|
{"current_steps": 6640, "total_steps": 15621, "loss": 0.3504, "lr": 1.422719527059505e-06, "epoch": 0.42506881761731, "percentage": 42.51, "elapsed_time": "0:25:40", "remaining_time": "0:34:43", "throughput": 13584.8, "total_tokens": 20926016}
|
|
{"current_steps": 6645, "total_steps": 15621, "loss": 0.3636, "lr": 1.4217066365834253e-06, "epoch": 0.4253888995582869, "percentage": 42.54, "elapsed_time": "0:25:41", "remaining_time": "0:34:41", "throughput": 13589.47, "total_tokens": 20941440}
|
|
{"current_steps": 6650, "total_steps": 15621, "loss": 0.4406, "lr": 1.4206932196016586e-06, "epoch": 0.42570898149926384, "percentage": 42.57, "elapsed_time": "0:25:41", "remaining_time": "0:34:39", "throughput": 13593.92, "total_tokens": 20956352}
|
|
{"current_steps": 6655, "total_steps": 15621, "loss": 0.3928, "lr": 1.4196792773794672e-06, "epoch": 0.4260290634402407, "percentage": 42.6, "elapsed_time": "0:25:42", "remaining_time": "0:34:37", "throughput": 13599.1, "total_tokens": 20973056}
|
|
{"current_steps": 6660, "total_steps": 15621, "loss": 0.438, "lr": 1.418664811182771e-06, "epoch": 0.4263491453812176, "percentage": 42.63, "elapsed_time": "0:25:42", "remaining_time": "0:34:35", "throughput": 13604.01, "total_tokens": 20989248}
|
|
{"current_steps": 6665, "total_steps": 15621, "loss": 0.4836, "lr": 1.417649822278142e-06, "epoch": 0.4266692273221945, "percentage": 42.67, "elapsed_time": "0:25:43", "remaining_time": "0:34:33", "throughput": 13608.51, "total_tokens": 21004096}
|
|
{"current_steps": 6670, "total_steps": 15621, "loss": 0.4722, "lr": 1.4166343119328064e-06, "epoch": 0.42698930926317136, "percentage": 42.7, "elapsed_time": "0:25:44", "remaining_time": "0:34:32", "throughput": 13613.51, "total_tokens": 21020224}
|
|
{"current_steps": 6675, "total_steps": 15621, "loss": 0.4616, "lr": 1.4156182814146404e-06, "epoch": 0.42730939120414824, "percentage": 42.73, "elapsed_time": "0:25:44", "remaining_time": "0:34:30", "throughput": 13618.08, "total_tokens": 21035264}
|
|
{"current_steps": 6680, "total_steps": 15621, "loss": 0.3497, "lr": 1.4146017319921701e-06, "epoch": 0.42762947314512517, "percentage": 42.76, "elapsed_time": "0:25:45", "remaining_time": "0:34:28", "throughput": 13623.2, "total_tokens": 21051904}
|
|
{"current_steps": 6685, "total_steps": 15621, "loss": 0.4215, "lr": 1.4135846649345695e-06, "epoch": 0.42794955508610205, "percentage": 42.79, "elapsed_time": "0:25:45", "remaining_time": "0:34:26", "throughput": 13628.68, "total_tokens": 21069504}
|
|
{"current_steps": 6690, "total_steps": 15621, "loss": 0.427, "lr": 1.4125670815116589e-06, "epoch": 0.42826963702707893, "percentage": 42.83, "elapsed_time": "0:25:46", "remaining_time": "0:34:24", "throughput": 13633.15, "total_tokens": 21084288}
|
|
{"current_steps": 6695, "total_steps": 15621, "loss": 0.2926, "lr": 1.4115489829939025e-06, "epoch": 0.4285897189680558, "percentage": 42.86, "elapsed_time": "0:25:47", "remaining_time": "0:34:22", "throughput": 13638.14, "total_tokens": 21100544}
|
|
{"current_steps": 6700, "total_steps": 15621, "loss": 0.4407, "lr": 1.4105303706524093e-06, "epoch": 0.4289098009090327, "percentage": 42.89, "elapsed_time": "0:25:47", "remaining_time": "0:34:20", "throughput": 13642.98, "total_tokens": 21116608}
|
|
{"current_steps": 6705, "total_steps": 15621, "loss": 0.5926, "lr": 1.4095112457589276e-06, "epoch": 0.4292298828500096, "percentage": 42.92, "elapsed_time": "0:25:48", "remaining_time": "0:34:18", "throughput": 13647.55, "total_tokens": 21131776}
|
|
{"current_steps": 6710, "total_steps": 15621, "loss": 0.3962, "lr": 1.4084916095858477e-06, "epoch": 0.4295499647909865, "percentage": 42.95, "elapsed_time": "0:25:48", "remaining_time": "0:34:17", "throughput": 13651.81, "total_tokens": 21146368}
|
|
{"current_steps": 6715, "total_steps": 15621, "loss": 0.4951, "lr": 1.407471463406197e-06, "epoch": 0.4298700467319634, "percentage": 42.99, "elapsed_time": "0:25:49", "remaining_time": "0:34:15", "throughput": 13656.66, "total_tokens": 21162368}
|
|
{"current_steps": 6720, "total_steps": 15621, "loss": 0.4329, "lr": 1.4064508084936399e-06, "epoch": 0.43019012867294026, "percentage": 43.02, "elapsed_time": "0:25:50", "remaining_time": "0:34:13", "throughput": 13661.79, "total_tokens": 21179008}
|
|
{"current_steps": 6725, "total_steps": 15621, "loss": 0.5761, "lr": 1.405429646122476e-06, "epoch": 0.43051021061391714, "percentage": 43.05, "elapsed_time": "0:25:50", "remaining_time": "0:34:11", "throughput": 13667.07, "total_tokens": 21196160}
|
|
{"current_steps": 6730, "total_steps": 15621, "loss": 0.5175, "lr": 1.4044079775676392e-06, "epoch": 0.4308302925548941, "percentage": 43.08, "elapsed_time": "0:25:51", "remaining_time": "0:34:09", "throughput": 13671.89, "total_tokens": 21212032}
|
|
{"current_steps": 6735, "total_steps": 15621, "loss": 0.3659, "lr": 1.4033858041046936e-06, "epoch": 0.43115037449587096, "percentage": 43.12, "elapsed_time": "0:25:52", "remaining_time": "0:34:07", "throughput": 13677.64, "total_tokens": 21230272}
|
|
{"current_steps": 6740, "total_steps": 15621, "loss": 0.3926, "lr": 1.4023631270098352e-06, "epoch": 0.43147045643684784, "percentage": 43.15, "elapsed_time": "0:25:52", "remaining_time": "0:34:06", "throughput": 13682.28, "total_tokens": 21245760}
|
|
{"current_steps": 6745, "total_steps": 15621, "loss": 0.3411, "lr": 1.4013399475598888e-06, "epoch": 0.4317905383778247, "percentage": 43.18, "elapsed_time": "0:25:53", "remaining_time": "0:34:04", "throughput": 13686.78, "total_tokens": 21260992}
|
|
{"current_steps": 6750, "total_steps": 15621, "loss": 0.2807, "lr": 1.4003162670323056e-06, "epoch": 0.4321106203188016, "percentage": 43.21, "elapsed_time": "0:25:53", "remaining_time": "0:34:02", "throughput": 13690.88, "total_tokens": 21275136}
|
|
{"current_steps": 6755, "total_steps": 15621, "loss": 0.5292, "lr": 1.3992920867051627e-06, "epoch": 0.4324307022597785, "percentage": 43.24, "elapsed_time": "0:25:54", "remaining_time": "0:34:00", "throughput": 13695.49, "total_tokens": 21290560}
|
|
{"current_steps": 6760, "total_steps": 15621, "loss": 0.3525, "lr": 1.3982674078571614e-06, "epoch": 0.4327507842007554, "percentage": 43.28, "elapsed_time": "0:25:55", "remaining_time": "0:33:58", "throughput": 13699.94, "total_tokens": 21305536}
|
|
{"current_steps": 6765, "total_steps": 15621, "loss": 0.3785, "lr": 1.3972422317676252e-06, "epoch": 0.4330708661417323, "percentage": 43.31, "elapsed_time": "0:25:55", "remaining_time": "0:33:56", "throughput": 13704.41, "total_tokens": 21320576}
|
|
{"current_steps": 6770, "total_steps": 15621, "loss": 0.367, "lr": 1.3962165597164985e-06, "epoch": 0.43339094808270917, "percentage": 43.34, "elapsed_time": "0:25:56", "remaining_time": "0:33:54", "throughput": 13708.84, "total_tokens": 21335680}
|
|
{"current_steps": 6775, "total_steps": 15621, "loss": 0.3496, "lr": 1.395190392984345e-06, "epoch": 0.43371103002368605, "percentage": 43.37, "elapsed_time": "0:25:56", "remaining_time": "0:33:52", "throughput": 13713.63, "total_tokens": 21351808}
|
|
{"current_steps": 6780, "total_steps": 15621, "loss": 0.4482, "lr": 1.3941637328523452e-06, "epoch": 0.43403111196466293, "percentage": 43.4, "elapsed_time": "0:25:57", "remaining_time": "0:33:51", "throughput": 13717.97, "total_tokens": 21366464}
|
|
{"current_steps": 6785, "total_steps": 15621, "loss": 0.3094, "lr": 1.3931365806022978e-06, "epoch": 0.43435119390563987, "percentage": 43.44, "elapsed_time": "0:25:58", "remaining_time": "0:33:49", "throughput": 13723.04, "total_tokens": 21383296}
|
|
{"current_steps": 6790, "total_steps": 15621, "loss": 0.3178, "lr": 1.3921089375166131e-06, "epoch": 0.43467127584661674, "percentage": 43.47, "elapsed_time": "0:25:58", "remaining_time": "0:33:47", "throughput": 13727.96, "total_tokens": 21399616}
|
|
{"current_steps": 6795, "total_steps": 15621, "loss": 0.4475, "lr": 1.391080804878316e-06, "epoch": 0.4349913577875936, "percentage": 43.5, "elapsed_time": "0:25:59", "remaining_time": "0:33:45", "throughput": 13732.43, "total_tokens": 21414848}
|
|
{"current_steps": 6800, "total_steps": 15621, "loss": 0.3748, "lr": 1.3900521839710427e-06, "epoch": 0.4353114397285705, "percentage": 43.53, "elapsed_time": "0:26:00", "remaining_time": "0:33:43", "throughput": 13736.95, "total_tokens": 21430144}
|
|
{"current_steps": 6805, "total_steps": 15621, "loss": 0.3516, "lr": 1.3890230760790373e-06, "epoch": 0.4356315216695474, "percentage": 43.56, "elapsed_time": "0:26:00", "remaining_time": "0:33:41", "throughput": 13741.36, "total_tokens": 21445248}
|
|
{"current_steps": 6810, "total_steps": 15621, "loss": 0.5972, "lr": 1.3879934824871544e-06, "epoch": 0.4359516036105243, "percentage": 43.6, "elapsed_time": "0:26:01", "remaining_time": "0:33:39", "throughput": 13745.87, "total_tokens": 21460544}
|
|
{"current_steps": 6815, "total_steps": 15621, "loss": 0.4871, "lr": 1.3869634044808526e-06, "epoch": 0.4362716855515012, "percentage": 43.63, "elapsed_time": "0:26:01", "remaining_time": "0:33:38", "throughput": 13750.55, "total_tokens": 21476224}
|
|
{"current_steps": 6820, "total_steps": 15621, "loss": 0.5996, "lr": 1.3859328433461971e-06, "epoch": 0.4365917674924781, "percentage": 43.66, "elapsed_time": "0:26:02", "remaining_time": "0:33:36", "throughput": 13755.18, "total_tokens": 21491712}
|
|
{"current_steps": 6825, "total_steps": 15621, "loss": 0.5784, "lr": 1.3849018003698553e-06, "epoch": 0.43691184943345496, "percentage": 43.69, "elapsed_time": "0:26:03", "remaining_time": "0:33:34", "throughput": 13760.4, "total_tokens": 21508928}
|
|
{"current_steps": 6830, "total_steps": 15621, "loss": 0.415, "lr": 1.3838702768390964e-06, "epoch": 0.43723193137443184, "percentage": 43.72, "elapsed_time": "0:26:03", "remaining_time": "0:33:32", "throughput": 13764.68, "total_tokens": 21523648}
|
|
{"current_steps": 6835, "total_steps": 15621, "loss": 0.4777, "lr": 1.38283827404179e-06, "epoch": 0.43755201331540877, "percentage": 43.76, "elapsed_time": "0:26:04", "remaining_time": "0:33:30", "throughput": 13769.28, "total_tokens": 21539264}
|
|
{"current_steps": 6840, "total_steps": 15621, "loss": 0.3776, "lr": 1.381805793266403e-06, "epoch": 0.43787209525638565, "percentage": 43.79, "elapsed_time": "0:26:04", "remaining_time": "0:33:29", "throughput": 13774.06, "total_tokens": 21555520}
|
|
{"current_steps": 6845, "total_steps": 15621, "loss": 0.4517, "lr": 1.3807728358020009e-06, "epoch": 0.43819217719736253, "percentage": 43.82, "elapsed_time": "0:26:05", "remaining_time": "0:33:27", "throughput": 13778.32, "total_tokens": 21570112}
|
|
{"current_steps": 6850, "total_steps": 15621, "loss": 0.3386, "lr": 1.3797394029382416e-06, "epoch": 0.4385122591383394, "percentage": 43.85, "elapsed_time": "0:26:06", "remaining_time": "0:33:25", "throughput": 13782.55, "total_tokens": 21584768}
|
|
{"current_steps": 6855, "total_steps": 15621, "loss": 0.2963, "lr": 1.37870549596538e-06, "epoch": 0.4388323410793163, "percentage": 43.88, "elapsed_time": "0:26:06", "remaining_time": "0:33:23", "throughput": 13786.98, "total_tokens": 21599872}
|
|
{"current_steps": 6860, "total_steps": 15621, "loss": 0.5262, "lr": 1.3776711161742595e-06, "epoch": 0.43915242302029317, "percentage": 43.92, "elapsed_time": "0:26:07", "remaining_time": "0:33:21", "throughput": 13791.68, "total_tokens": 21615808}
|
|
{"current_steps": 6865, "total_steps": 15621, "loss": 0.4639, "lr": 1.3766362648563166e-06, "epoch": 0.4394725049612701, "percentage": 43.95, "elapsed_time": "0:26:07", "remaining_time": "0:33:19", "throughput": 13795.99, "total_tokens": 21630656}
|
|
{"current_steps": 6870, "total_steps": 15621, "loss": 0.4073, "lr": 1.3756009433035744e-06, "epoch": 0.439792586902247, "percentage": 43.98, "elapsed_time": "0:26:08", "remaining_time": "0:33:17", "throughput": 13800.84, "total_tokens": 21646976}
|
|
{"current_steps": 6875, "total_steps": 15621, "loss": 0.5615, "lr": 1.3745651528086447e-06, "epoch": 0.44011266884322386, "percentage": 44.01, "elapsed_time": "0:26:09", "remaining_time": "0:33:16", "throughput": 13806.38, "total_tokens": 21665024}
|
|
{"current_steps": 6880, "total_steps": 15621, "loss": 0.4486, "lr": 1.373528894664724e-06, "epoch": 0.44043275078420074, "percentage": 44.04, "elapsed_time": "0:26:09", "remaining_time": "0:33:14", "throughput": 13810.79, "total_tokens": 21680128}
|
|
{"current_steps": 6885, "total_steps": 15621, "loss": 0.3509, "lr": 1.3724921701655924e-06, "epoch": 0.4407528327251776, "percentage": 44.08, "elapsed_time": "0:26:10", "remaining_time": "0:33:12", "throughput": 13815.35, "total_tokens": 21695808}
|
|
{"current_steps": 6890, "total_steps": 15621, "loss": 0.3155, "lr": 1.3714549806056125e-06, "epoch": 0.44107291466615456, "percentage": 44.11, "elapsed_time": "0:26:11", "remaining_time": "0:33:10", "throughput": 13819.98, "total_tokens": 21711936}
|
|
{"current_steps": 6895, "total_steps": 15621, "loss": 0.4241, "lr": 1.3704173272797283e-06, "epoch": 0.44139299660713144, "percentage": 44.14, "elapsed_time": "0:26:11", "remaining_time": "0:33:09", "throughput": 13824.49, "total_tokens": 21727488}
|
|
{"current_steps": 6900, "total_steps": 15621, "loss": 0.4386, "lr": 1.3693792114834619e-06, "epoch": 0.4417130785481083, "percentage": 44.17, "elapsed_time": "0:26:12", "remaining_time": "0:33:07", "throughput": 13829.92, "total_tokens": 21745280}
|
|
{"current_steps": 6905, "total_steps": 15621, "loss": 0.4684, "lr": 1.3683406345129129e-06, "epoch": 0.4420331604890852, "percentage": 44.2, "elapsed_time": "0:26:12", "remaining_time": "0:33:05", "throughput": 13834.15, "total_tokens": 21760000}
|
|
{"current_steps": 6910, "total_steps": 15621, "loss": 0.4025, "lr": 1.3673015976647567e-06, "epoch": 0.4423532424300621, "percentage": 44.24, "elapsed_time": "0:26:13", "remaining_time": "0:33:03", "throughput": 13838.59, "total_tokens": 21775232}
|
|
{"current_steps": 6915, "total_steps": 15621, "loss": 0.3967, "lr": 1.3662621022362435e-06, "epoch": 0.442673324371039, "percentage": 44.27, "elapsed_time": "0:26:14", "remaining_time": "0:33:01", "throughput": 13843.14, "total_tokens": 21790656}
|
|
{"current_steps": 6920, "total_steps": 15621, "loss": 0.4654, "lr": 1.3652221495251952e-06, "epoch": 0.4429934063120159, "percentage": 44.3, "elapsed_time": "0:26:14", "remaining_time": "0:33:00", "throughput": 13847.68, "total_tokens": 21806336}
|
|
{"current_steps": 6925, "total_steps": 15621, "loss": 0.3204, "lr": 1.3641817408300049e-06, "epoch": 0.44331348825299277, "percentage": 44.33, "elapsed_time": "0:26:15", "remaining_time": "0:32:58", "throughput": 13852.96, "total_tokens": 21823744}
|
|
{"current_steps": 6930, "total_steps": 15621, "loss": 0.5579, "lr": 1.3631408774496352e-06, "epoch": 0.44363357019396965, "percentage": 44.36, "elapsed_time": "0:26:15", "remaining_time": "0:32:56", "throughput": 13857.38, "total_tokens": 21839104}
|
|
{"current_steps": 6935, "total_steps": 15621, "loss": 0.3566, "lr": 1.3620995606836165e-06, "epoch": 0.44395365213494653, "percentage": 44.4, "elapsed_time": "0:26:16", "remaining_time": "0:32:54", "throughput": 13861.86, "total_tokens": 21854528}
|
|
{"current_steps": 6940, "total_steps": 15621, "loss": 0.6023, "lr": 1.3610577918320446e-06, "epoch": 0.4442737340759234, "percentage": 44.43, "elapsed_time": "0:26:17", "remaining_time": "0:32:52", "throughput": 13866.57, "total_tokens": 21870592}
|
|
{"current_steps": 6945, "total_steps": 15621, "loss": 0.3743, "lr": 1.3600155721955802e-06, "epoch": 0.44459381601690035, "percentage": 44.46, "elapsed_time": "0:26:17", "remaining_time": "0:32:51", "throughput": 13870.89, "total_tokens": 21885696}
|
|
{"current_steps": 6950, "total_steps": 15621, "loss": 0.3819, "lr": 1.3589729030754468e-06, "epoch": 0.4449138979578772, "percentage": 44.49, "elapsed_time": "0:26:18", "remaining_time": "0:32:49", "throughput": 13875.41, "total_tokens": 21901248}
|
|
{"current_steps": 6955, "total_steps": 15621, "loss": 0.4341, "lr": 1.3579297857734293e-06, "epoch": 0.4452339798988541, "percentage": 44.52, "elapsed_time": "0:26:19", "remaining_time": "0:32:47", "throughput": 13879.69, "total_tokens": 21916352}
|
|
{"current_steps": 6960, "total_steps": 15621, "loss": 0.3365, "lr": 1.3568862215918717e-06, "epoch": 0.445554061839831, "percentage": 44.56, "elapsed_time": "0:26:19", "remaining_time": "0:32:45", "throughput": 13883.84, "total_tokens": 21931072}
|
|
{"current_steps": 6965, "total_steps": 15621, "loss": 0.4944, "lr": 1.3558422118336762e-06, "epoch": 0.44587414378080786, "percentage": 44.59, "elapsed_time": "0:26:20", "remaining_time": "0:32:43", "throughput": 13888.39, "total_tokens": 21946752}
|
|
{"current_steps": 6970, "total_steps": 15621, "loss": 0.4804, "lr": 1.354797757802301e-06, "epoch": 0.4461942257217848, "percentage": 44.62, "elapsed_time": "0:26:20", "remaining_time": "0:32:42", "throughput": 13892.84, "total_tokens": 21962176}
|
|
{"current_steps": 6975, "total_steps": 15621, "loss": 0.392, "lr": 1.3537528608017596e-06, "epoch": 0.4465143076627617, "percentage": 44.65, "elapsed_time": "0:26:21", "remaining_time": "0:32:40", "throughput": 13897.61, "total_tokens": 21978496}
|
|
{"current_steps": 6980, "total_steps": 15621, "loss": 0.3973, "lr": 1.352707522136618e-06, "epoch": 0.44683438960373856, "percentage": 44.68, "elapsed_time": "0:26:22", "remaining_time": "0:32:38", "throughput": 13901.51, "total_tokens": 21992576}
|
|
{"current_steps": 6985, "total_steps": 15621, "loss": 0.3998, "lr": 1.3516617431119934e-06, "epoch": 0.44715447154471544, "percentage": 44.72, "elapsed_time": "0:26:22", "remaining_time": "0:32:36", "throughput": 13905.95, "total_tokens": 22008000}
|
|
{"current_steps": 6990, "total_steps": 15621, "loss": 0.53, "lr": 1.350615525033554e-06, "epoch": 0.4474745534856923, "percentage": 44.75, "elapsed_time": "0:26:23", "remaining_time": "0:32:34", "throughput": 13910.22, "total_tokens": 22022976}
|
|
{"current_steps": 6995, "total_steps": 15621, "loss": 0.4027, "lr": 1.3495688692075144e-06, "epoch": 0.44779463542666925, "percentage": 44.78, "elapsed_time": "0:26:23", "remaining_time": "0:32:33", "throughput": 13914.6, "total_tokens": 22038144}
|
|
{"current_steps": 7000, "total_steps": 15621, "loss": 0.3435, "lr": 1.3485217769406376e-06, "epoch": 0.44811471736764613, "percentage": 44.81, "elapsed_time": "0:26:24", "remaining_time": "0:32:31", "throughput": 13919.22, "total_tokens": 22054016}
|
|
{"current_steps": 7005, "total_steps": 15621, "loss": 0.3605, "lr": 1.3474742495402303e-06, "epoch": 0.448434799308623, "percentage": 44.84, "elapsed_time": "0:26:25", "remaining_time": "0:32:29", "throughput": 13925.21, "total_tokens": 22073920}
|
|
{"current_steps": 7010, "total_steps": 15621, "loss": 0.4297, "lr": 1.3464262883141425e-06, "epoch": 0.4487548812495999, "percentage": 44.88, "elapsed_time": "0:26:25", "remaining_time": "0:32:27", "throughput": 13929.85, "total_tokens": 22089728}
|
|
{"current_steps": 7015, "total_steps": 15621, "loss": 0.5687, "lr": 1.3453778945707663e-06, "epoch": 0.44907496319057677, "percentage": 44.91, "elapsed_time": "0:26:26", "remaining_time": "0:32:26", "throughput": 13934.25, "total_tokens": 22105344}
|
|
{"current_steps": 7020, "total_steps": 15621, "loss": 0.4471, "lr": 1.3443290696190332e-06, "epoch": 0.4493950451315537, "percentage": 44.94, "elapsed_time": "0:26:27", "remaining_time": "0:32:24", "throughput": 13939.06, "total_tokens": 22121792}
|
|
{"current_steps": 7025, "total_steps": 15621, "loss": 0.4034, "lr": 1.343279814768414e-06, "epoch": 0.4497151270725306, "percentage": 44.97, "elapsed_time": "0:26:27", "remaining_time": "0:32:22", "throughput": 13943.05, "total_tokens": 22136128}
|
|
{"current_steps": 7030, "total_steps": 15621, "loss": 0.38, "lr": 1.3422301313289156e-06, "epoch": 0.45003520901350746, "percentage": 45.0, "elapsed_time": "0:26:28", "remaining_time": "0:32:20", "throughput": 13947.6, "total_tokens": 22151936}
|
|
{"current_steps": 7035, "total_steps": 15621, "loss": 0.3794, "lr": 1.34118002061108e-06, "epoch": 0.45035529095448434, "percentage": 45.04, "elapsed_time": "0:26:28", "remaining_time": "0:32:19", "throughput": 13952.27, "total_tokens": 22168128}
|
|
{"current_steps": 7038, "total_steps": 15621, "eval_loss": 0.43158382177352905, "epoch": 0.4505473401190705, "percentage": 45.05, "elapsed_time": "0:27:18", "remaining_time": "0:33:18", "throughput": 13536.59, "total_tokens": 22178432}
|
|
{"current_steps": 7040, "total_steps": 15621, "loss": 0.4309, "lr": 1.3401294839259828e-06, "epoch": 0.4506753728954612, "percentage": 45.07, "elapsed_time": "0:27:47", "remaining_time": "0:33:51", "throughput": 13307.35, "total_tokens": 22184512}
|
|
{"current_steps": 7045, "total_steps": 15621, "loss": 0.54, "lr": 1.3390785225852312e-06, "epoch": 0.4509954548364381, "percentage": 45.1, "elapsed_time": "0:27:47", "remaining_time": "0:33:50", "throughput": 13311.77, "total_tokens": 22199872}
|
|
{"current_steps": 7050, "total_steps": 15621, "loss": 0.4411, "lr": 1.3380271379009631e-06, "epoch": 0.45131553677741504, "percentage": 45.13, "elapsed_time": "0:27:48", "remaining_time": "0:33:48", "throughput": 13316.8, "total_tokens": 22216960}
|
|
{"current_steps": 7055, "total_steps": 15621, "loss": 0.2615, "lr": 1.3369753311858442e-06, "epoch": 0.4516356187183919, "percentage": 45.16, "elapsed_time": "0:27:48", "remaining_time": "0:33:46", "throughput": 13320.92, "total_tokens": 22231488}
|
|
{"current_steps": 7060, "total_steps": 15621, "loss": 0.4584, "lr": 1.3359231037530682e-06, "epoch": 0.4519557006593688, "percentage": 45.2, "elapsed_time": "0:27:49", "remaining_time": "0:33:44", "throughput": 13325.35, "total_tokens": 22246976}
|
|
{"current_steps": 7065, "total_steps": 15621, "loss": 0.4139, "lr": 1.3348704569163527e-06, "epoch": 0.4522757826003457, "percentage": 45.23, "elapsed_time": "0:27:50", "remaining_time": "0:33:42", "throughput": 13330.16, "total_tokens": 22263680}
|
|
{"current_steps": 7070, "total_steps": 15621, "loss": 0.3347, "lr": 1.33381739198994e-06, "epoch": 0.45259586454132256, "percentage": 45.26, "elapsed_time": "0:27:50", "remaining_time": "0:33:40", "throughput": 13334.74, "total_tokens": 22279552}
|
|
{"current_steps": 7075, "total_steps": 15621, "loss": 0.4436, "lr": 1.3327639102885938e-06, "epoch": 0.4529159464822995, "percentage": 45.29, "elapsed_time": "0:27:51", "remaining_time": "0:33:38", "throughput": 13339.3, "total_tokens": 22295296}
|
|
{"current_steps": 7080, "total_steps": 15621, "loss": 0.3973, "lr": 1.3317100131275986e-06, "epoch": 0.45323602842327637, "percentage": 45.32, "elapsed_time": "0:27:52", "remaining_time": "0:33:37", "throughput": 13343.53, "total_tokens": 22310400}
|
|
{"current_steps": 7085, "total_steps": 15621, "loss": 0.492, "lr": 1.3306557018227576e-06, "epoch": 0.45355611036425325, "percentage": 45.36, "elapsed_time": "0:27:52", "remaining_time": "0:33:35", "throughput": 13348.35, "total_tokens": 22326848}
|
|
{"current_steps": 7090, "total_steps": 15621, "loss": 0.47, "lr": 1.3296009776903903e-06, "epoch": 0.45387619230523013, "percentage": 45.39, "elapsed_time": "0:27:53", "remaining_time": "0:33:33", "throughput": 13352.86, "total_tokens": 22342592}
|
|
{"current_steps": 7095, "total_steps": 15621, "loss": 0.4386, "lr": 1.3285458420473323e-06, "epoch": 0.454196274246207, "percentage": 45.42, "elapsed_time": "0:27:53", "remaining_time": "0:33:31", "throughput": 13357.6, "total_tokens": 22358912}
|
|
{"current_steps": 7100, "total_steps": 15621, "loss": 0.3744, "lr": 1.3274902962109332e-06, "epoch": 0.45451635618718395, "percentage": 45.45, "elapsed_time": "0:27:54", "remaining_time": "0:33:29", "throughput": 13362.08, "total_tokens": 22374528}
|
|
{"current_steps": 7105, "total_steps": 15621, "loss": 0.3686, "lr": 1.3264343414990539e-06, "epoch": 0.4548364381281608, "percentage": 45.48, "elapsed_time": "0:27:55", "remaining_time": "0:33:27", "throughput": 13366.43, "total_tokens": 22389824}
|
|
{"current_steps": 7110, "total_steps": 15621, "loss": 0.4148, "lr": 1.3253779792300663e-06, "epoch": 0.4551565200691377, "percentage": 45.52, "elapsed_time": "0:27:55", "remaining_time": "0:33:25", "throughput": 13370.83, "total_tokens": 22405376}
|
|
{"current_steps": 7115, "total_steps": 15621, "loss": 0.3551, "lr": 1.3243212107228518e-06, "epoch": 0.4554766020101146, "percentage": 45.55, "elapsed_time": "0:27:56", "remaining_time": "0:33:23", "throughput": 13374.97, "total_tokens": 22420032}
|
|
{"current_steps": 7120, "total_steps": 15621, "loss": 0.3909, "lr": 1.3232640372967974e-06, "epoch": 0.45579668395109146, "percentage": 45.58, "elapsed_time": "0:27:56", "remaining_time": "0:33:22", "throughput": 13379.08, "total_tokens": 22434688}
|
|
{"current_steps": 7125, "total_steps": 15621, "loss": 0.4645, "lr": 1.3222064602717974e-06, "epoch": 0.45611676589206834, "percentage": 45.61, "elapsed_time": "0:27:57", "remaining_time": "0:33:20", "throughput": 13383.79, "total_tokens": 22451072}
|
|
{"current_steps": 7130, "total_steps": 15621, "loss": 0.3488, "lr": 1.321148480968248e-06, "epoch": 0.4564368478330453, "percentage": 45.64, "elapsed_time": "0:27:58", "remaining_time": "0:33:18", "throughput": 13388.19, "total_tokens": 22466688}
|
|
{"current_steps": 7135, "total_steps": 15621, "loss": 0.4609, "lr": 1.3200901007070495e-06, "epoch": 0.45675692977402216, "percentage": 45.68, "elapsed_time": "0:27:58", "remaining_time": "0:33:16", "throughput": 13392.65, "total_tokens": 22482432}
|
|
{"current_steps": 7140, "total_steps": 15621, "loss": 0.4616, "lr": 1.3190313208096022e-06, "epoch": 0.45707701171499904, "percentage": 45.71, "elapsed_time": "0:27:59", "remaining_time": "0:33:14", "throughput": 13396.7, "total_tokens": 22496960}
|
|
{"current_steps": 7145, "total_steps": 15621, "loss": 0.3617, "lr": 1.3179721425978048e-06, "epoch": 0.4573970936559759, "percentage": 45.74, "elapsed_time": "0:27:59", "remaining_time": "0:33:12", "throughput": 13401.03, "total_tokens": 22512256}
|
|
{"current_steps": 7150, "total_steps": 15621, "loss": 0.4002, "lr": 1.3169125673940541e-06, "epoch": 0.4577171755969528, "percentage": 45.77, "elapsed_time": "0:28:00", "remaining_time": "0:33:10", "throughput": 13405.62, "total_tokens": 22528192}
|
|
{"current_steps": 7155, "total_steps": 15621, "loss": 0.4126, "lr": 1.3158525965212422e-06, "epoch": 0.45803725753792973, "percentage": 45.8, "elapsed_time": "0:28:01", "remaining_time": "0:33:09", "throughput": 13410.6, "total_tokens": 22545408}
|
|
{"current_steps": 7160, "total_steps": 15621, "loss": 0.5063, "lr": 1.3147922313027548e-06, "epoch": 0.4583573394789066, "percentage": 45.84, "elapsed_time": "0:28:01", "remaining_time": "0:33:07", "throughput": 13414.95, "total_tokens": 22560832}
|
|
{"current_steps": 7165, "total_steps": 15621, "loss": 0.3456, "lr": 1.3137314730624707e-06, "epoch": 0.4586774214198835, "percentage": 45.87, "elapsed_time": "0:28:02", "remaining_time": "0:33:05", "throughput": 13419.86, "total_tokens": 22577728}
|
|
{"current_steps": 7170, "total_steps": 15621, "loss": 0.4722, "lr": 1.3126703231247588e-06, "epoch": 0.45899750336086037, "percentage": 45.9, "elapsed_time": "0:28:03", "remaining_time": "0:33:03", "throughput": 13424.56, "total_tokens": 22594112}
|
|
{"current_steps": 7175, "total_steps": 15621, "loss": 0.3917, "lr": 1.3116087828144772e-06, "epoch": 0.45931758530183725, "percentage": 45.93, "elapsed_time": "0:28:03", "remaining_time": "0:33:01", "throughput": 13428.93, "total_tokens": 22609728}
|
|
{"current_steps": 7180, "total_steps": 15621, "loss": 0.4692, "lr": 1.310546853456972e-06, "epoch": 0.4596376672428142, "percentage": 45.96, "elapsed_time": "0:28:04", "remaining_time": "0:33:00", "throughput": 13433.08, "total_tokens": 22624704}
|
|
{"current_steps": 7185, "total_steps": 15621, "loss": 0.3145, "lr": 1.3094845363780737e-06, "epoch": 0.45995774918379106, "percentage": 46.0, "elapsed_time": "0:28:04", "remaining_time": "0:32:58", "throughput": 13437.52, "total_tokens": 22640448}
|
|
{"current_steps": 7190, "total_steps": 15621, "loss": 0.2277, "lr": 1.3084218329040976e-06, "epoch": 0.46027783112476794, "percentage": 46.03, "elapsed_time": "0:28:05", "remaining_time": "0:32:56", "throughput": 13441.8, "total_tokens": 22655680}
|
|
{"current_steps": 7195, "total_steps": 15621, "loss": 0.3769, "lr": 1.3073587443618425e-06, "epoch": 0.4605979130657448, "percentage": 46.06, "elapsed_time": "0:28:06", "remaining_time": "0:32:54", "throughput": 13446.57, "total_tokens": 22672128}
|
|
{"current_steps": 7200, "total_steps": 15621, "loss": 0.5418, "lr": 1.3062952720785861e-06, "epoch": 0.4609179950067217, "percentage": 46.09, "elapsed_time": "0:28:06", "remaining_time": "0:32:52", "throughput": 13450.71, "total_tokens": 22687104}
|
|
{"current_steps": 7205, "total_steps": 15621, "loss": 0.3724, "lr": 1.305231417382086e-06, "epoch": 0.4612380769476986, "percentage": 46.12, "elapsed_time": "0:28:07", "remaining_time": "0:32:50", "throughput": 13455.19, "total_tokens": 22702976}
|
|
{"current_steps": 7210, "total_steps": 15621, "loss": 0.3522, "lr": 1.3041671816005777e-06, "epoch": 0.4615581588886755, "percentage": 46.16, "elapsed_time": "0:28:07", "remaining_time": "0:32:49", "throughput": 13459.52, "total_tokens": 22718464}
|
|
{"current_steps": 7215, "total_steps": 15621, "loss": 0.3783, "lr": 1.3031025660627718e-06, "epoch": 0.4618782408296524, "percentage": 46.19, "elapsed_time": "0:28:08", "remaining_time": "0:32:47", "throughput": 13464.07, "total_tokens": 22734656}
|
|
{"current_steps": 7220, "total_steps": 15621, "loss": 0.4376, "lr": 1.3020375720978534e-06, "epoch": 0.4621983227706293, "percentage": 46.22, "elapsed_time": "0:28:09", "remaining_time": "0:32:45", "throughput": 13468.41, "total_tokens": 22750016}
|
|
{"current_steps": 7225, "total_steps": 15621, "loss": 0.3855, "lr": 1.3009722010354799e-06, "epoch": 0.46251840471160616, "percentage": 46.25, "elapsed_time": "0:28:09", "remaining_time": "0:32:43", "throughput": 13472.77, "total_tokens": 22765632}
|
|
{"current_steps": 7230, "total_steps": 15621, "loss": 0.4528, "lr": 1.2999064542057794e-06, "epoch": 0.46283848665258304, "percentage": 46.28, "elapsed_time": "0:28:10", "remaining_time": "0:32:41", "throughput": 13477.12, "total_tokens": 22781184}
|
|
{"current_steps": 7235, "total_steps": 15621, "loss": 0.4842, "lr": 1.2988403329393495e-06, "epoch": 0.46315856859355997, "percentage": 46.32, "elapsed_time": "0:28:10", "remaining_time": "0:32:40", "throughput": 13481.62, "total_tokens": 22797248}
|
|
{"current_steps": 7240, "total_steps": 15621, "loss": 0.4177, "lr": 1.2977738385672557e-06, "epoch": 0.46347865053453685, "percentage": 46.35, "elapsed_time": "0:28:11", "remaining_time": "0:32:38", "throughput": 13485.94, "total_tokens": 22812800}
|
|
{"current_steps": 7245, "total_steps": 15621, "loss": 0.4087, "lr": 1.2967069724210278e-06, "epoch": 0.46379873247551373, "percentage": 46.38, "elapsed_time": "0:28:12", "remaining_time": "0:32:36", "throughput": 13489.89, "total_tokens": 22827200}
|
|
{"current_steps": 7250, "total_steps": 15621, "loss": 0.5265, "lr": 1.2956397358326609e-06, "epoch": 0.4641188144164906, "percentage": 46.41, "elapsed_time": "0:28:12", "remaining_time": "0:32:34", "throughput": 13494.39, "total_tokens": 22843264}
|
|
{"current_steps": 7255, "total_steps": 15621, "loss": 0.3799, "lr": 1.294572130134613e-06, "epoch": 0.4644388963574675, "percentage": 46.44, "elapsed_time": "0:28:13", "remaining_time": "0:32:32", "throughput": 13498.67, "total_tokens": 22858624}
|
|
{"current_steps": 7260, "total_steps": 15621, "loss": 0.5557, "lr": 1.2935041566598016e-06, "epoch": 0.4647589782984444, "percentage": 46.48, "elapsed_time": "0:28:13", "remaining_time": "0:32:30", "throughput": 13502.92, "total_tokens": 22873856}
|
|
{"current_steps": 7265, "total_steps": 15621, "loss": 0.356, "lr": 1.2924358167416049e-06, "epoch": 0.4650790602394213, "percentage": 46.51, "elapsed_time": "0:28:14", "remaining_time": "0:32:29", "throughput": 13507.3, "total_tokens": 22889600}
|
|
{"current_steps": 7270, "total_steps": 15621, "loss": 0.4007, "lr": 1.2913671117138572e-06, "epoch": 0.4653991421803982, "percentage": 46.54, "elapsed_time": "0:28:15", "remaining_time": "0:32:27", "throughput": 13511.45, "total_tokens": 22904704}
|
|
{"current_steps": 7275, "total_steps": 15621, "loss": 0.3471, "lr": 1.29029804291085e-06, "epoch": 0.46571922412137506, "percentage": 46.57, "elapsed_time": "0:28:15", "remaining_time": "0:32:25", "throughput": 13515.77, "total_tokens": 22920384}
|
|
{"current_steps": 7280, "total_steps": 15621, "loss": 0.3475, "lr": 1.2892286116673269e-06, "epoch": 0.46603930606235194, "percentage": 46.6, "elapsed_time": "0:28:16", "remaining_time": "0:32:23", "throughput": 13520.44, "total_tokens": 22937024}
|
|
{"current_steps": 7285, "total_steps": 15621, "loss": 0.4934, "lr": 1.2881588193184865e-06, "epoch": 0.4663593880033289, "percentage": 46.64, "elapsed_time": "0:28:17", "remaining_time": "0:32:21", "throughput": 13525.56, "total_tokens": 22954816}
|
|
{"current_steps": 7290, "total_steps": 15621, "loss": 0.2918, "lr": 1.287088667199977e-06, "epoch": 0.46667946994430576, "percentage": 46.67, "elapsed_time": "0:28:17", "remaining_time": "0:32:20", "throughput": 13529.59, "total_tokens": 22969472}
|
|
{"current_steps": 7295, "total_steps": 15621, "loss": 0.4681, "lr": 1.2860181566478956e-06, "epoch": 0.46699955188528264, "percentage": 46.7, "elapsed_time": "0:28:18", "remaining_time": "0:32:18", "throughput": 13533.58, "total_tokens": 22984192}
|
|
{"current_steps": 7300, "total_steps": 15621, "loss": 0.3868, "lr": 1.2849472889987874e-06, "epoch": 0.4673196338262595, "percentage": 46.73, "elapsed_time": "0:28:18", "remaining_time": "0:32:16", "throughput": 13537.88, "total_tokens": 22999680}
|
|
{"current_steps": 7305, "total_steps": 15621, "loss": 0.3784, "lr": 1.2838760655896431e-06, "epoch": 0.4676397157672364, "percentage": 46.76, "elapsed_time": "0:28:19", "remaining_time": "0:32:14", "throughput": 13541.98, "total_tokens": 23014720}
|
|
{"current_steps": 7310, "total_steps": 15621, "loss": 0.4544, "lr": 1.2828044877578983e-06, "epoch": 0.4679597977082133, "percentage": 46.8, "elapsed_time": "0:28:20", "remaining_time": "0:32:12", "throughput": 13546.42, "total_tokens": 23030528}
|
|
{"current_steps": 7315, "total_steps": 15621, "loss": 0.5205, "lr": 1.2817325568414297e-06, "epoch": 0.4682798796491902, "percentage": 46.83, "elapsed_time": "0:28:20", "remaining_time": "0:32:11", "throughput": 13550.99, "total_tokens": 23046784}
|
|
{"current_steps": 7320, "total_steps": 15621, "loss": 0.3379, "lr": 1.2806602741785562e-06, "epoch": 0.4685999615901671, "percentage": 46.86, "elapsed_time": "0:28:21", "remaining_time": "0:32:09", "throughput": 13555.03, "total_tokens": 23061632}
|
|
{"current_steps": 7325, "total_steps": 15621, "loss": 0.3202, "lr": 1.2795876411080346e-06, "epoch": 0.46892004353114397, "percentage": 46.89, "elapsed_time": "0:28:21", "remaining_time": "0:32:07", "throughput": 13559.55, "total_tokens": 23077888}
|
|
{"current_steps": 7330, "total_steps": 15621, "loss": 0.3308, "lr": 1.278514658969061e-06, "epoch": 0.46924012547212085, "percentage": 46.92, "elapsed_time": "0:28:22", "remaining_time": "0:32:05", "throughput": 13563.82, "total_tokens": 23093568}
|
|
{"current_steps": 7335, "total_steps": 15621, "loss": 0.5047, "lr": 1.2774413291012648e-06, "epoch": 0.46956020741309773, "percentage": 46.96, "elapsed_time": "0:28:23", "remaining_time": "0:32:04", "throughput": 13568.01, "total_tokens": 23108992}
|
|
{"current_steps": 7340, "total_steps": 15621, "loss": 0.4191, "lr": 1.2763676528447122e-06, "epoch": 0.46988028935407467, "percentage": 46.99, "elapsed_time": "0:28:23", "remaining_time": "0:32:02", "throughput": 13572.43, "total_tokens": 23124992}
|
|
{"current_steps": 7345, "total_steps": 15621, "loss": 0.3417, "lr": 1.2752936315399003e-06, "epoch": 0.47020037129505154, "percentage": 47.02, "elapsed_time": "0:28:24", "remaining_time": "0:32:00", "throughput": 13577.16, "total_tokens": 23141888}
|
|
{"current_steps": 7350, "total_steps": 15621, "loss": 0.3346, "lr": 1.2742192665277566e-06, "epoch": 0.4705204532360284, "percentage": 47.05, "elapsed_time": "0:28:25", "remaining_time": "0:31:58", "throughput": 13581.57, "total_tokens": 23157888}
|
|
{"current_steps": 7355, "total_steps": 15621, "loss": 0.2813, "lr": 1.2731445591496393e-06, "epoch": 0.4708405351770053, "percentage": 47.08, "elapsed_time": "0:28:25", "remaining_time": "0:31:56", "throughput": 13585.63, "total_tokens": 23172864}
|
|
{"current_steps": 7360, "total_steps": 15621, "loss": 0.4622, "lr": 1.2720695107473325e-06, "epoch": 0.4711606171179822, "percentage": 47.12, "elapsed_time": "0:28:26", "remaining_time": "0:31:55", "throughput": 13589.91, "total_tokens": 23188352}
|
|
{"current_steps": 7365, "total_steps": 15621, "loss": 0.3897, "lr": 1.2709941226630475e-06, "epoch": 0.4714806990589591, "percentage": 47.15, "elapsed_time": "0:28:26", "remaining_time": "0:31:53", "throughput": 13594.17, "total_tokens": 23204096}
|
|
{"current_steps": 7370, "total_steps": 15621, "loss": 0.3513, "lr": 1.2699183962394182e-06, "epoch": 0.471800780999936, "percentage": 47.18, "elapsed_time": "0:28:27", "remaining_time": "0:31:51", "throughput": 13598.21, "total_tokens": 23219072}
|
|
{"current_steps": 7375, "total_steps": 15621, "loss": 0.4198, "lr": 1.2688423328195021e-06, "epoch": 0.4721208629409129, "percentage": 47.21, "elapsed_time": "0:28:28", "remaining_time": "0:31:49", "throughput": 13602.52, "total_tokens": 23234560}
|
|
{"current_steps": 7380, "total_steps": 15621, "loss": 0.3426, "lr": 1.267765933746777e-06, "epoch": 0.47244094488188976, "percentage": 47.24, "elapsed_time": "0:28:28", "remaining_time": "0:31:48", "throughput": 13606.9, "total_tokens": 23250304}
|
|
{"current_steps": 7385, "total_steps": 15621, "loss": 0.6245, "lr": 1.2666892003651397e-06, "epoch": 0.47276102682286664, "percentage": 47.28, "elapsed_time": "0:28:29", "remaining_time": "0:31:46", "throughput": 13611.08, "total_tokens": 23265664}
|
|
{"current_steps": 7390, "total_steps": 15621, "loss": 0.442, "lr": 1.2656121340189043e-06, "epoch": 0.4730811087638435, "percentage": 47.31, "elapsed_time": "0:28:29", "remaining_time": "0:31:44", "throughput": 13615.46, "total_tokens": 23281472}
|
|
{"current_steps": 7395, "total_steps": 15621, "loss": 0.411, "lr": 1.264534736052801e-06, "epoch": 0.47340119070482045, "percentage": 47.34, "elapsed_time": "0:28:30", "remaining_time": "0:31:42", "throughput": 13619.68, "total_tokens": 23297024}
|
|
{"current_steps": 7400, "total_steps": 15621, "loss": 0.4385, "lr": 1.2634570078119739e-06, "epoch": 0.47372127264579733, "percentage": 47.37, "elapsed_time": "0:28:31", "remaining_time": "0:31:41", "throughput": 13624.14, "total_tokens": 23313344}
|
|
{"current_steps": 7405, "total_steps": 15621, "loss": 0.5213, "lr": 1.262378950641979e-06, "epoch": 0.4740413545867742, "percentage": 47.4, "elapsed_time": "0:28:31", "remaining_time": "0:31:39", "throughput": 13628.23, "total_tokens": 23328512}
|
|
{"current_steps": 7410, "total_steps": 15621, "loss": 0.4465, "lr": 1.2613005658887836e-06, "epoch": 0.4743614365277511, "percentage": 47.44, "elapsed_time": "0:28:32", "remaining_time": "0:31:37", "throughput": 13631.84, "total_tokens": 23342400}
|
|
{"current_steps": 7415, "total_steps": 15621, "loss": 0.4134, "lr": 1.2602218548987637e-06, "epoch": 0.47468151846872797, "percentage": 47.47, "elapsed_time": "0:28:32", "remaining_time": "0:31:35", "throughput": 13636.24, "total_tokens": 23358400}
|
|
{"current_steps": 7420, "total_steps": 15621, "loss": 0.4102, "lr": 1.2591428190187029e-06, "epoch": 0.4750016004097049, "percentage": 47.5, "elapsed_time": "0:28:33", "remaining_time": "0:31:33", "throughput": 13640.29, "total_tokens": 23373376}
|
|
{"current_steps": 7425, "total_steps": 15621, "loss": 0.5013, "lr": 1.2580634595957898e-06, "epoch": 0.4753216823506818, "percentage": 47.53, "elapsed_time": "0:28:34", "remaining_time": "0:31:32", "throughput": 13645.07, "total_tokens": 23390400}
|
|
{"current_steps": 7430, "total_steps": 15621, "loss": 0.3705, "lr": 1.2569837779776172e-06, "epoch": 0.47564176429165866, "percentage": 47.56, "elapsed_time": "0:28:34", "remaining_time": "0:31:30", "throughput": 13649.45, "total_tokens": 23406400}
|
|
{"current_steps": 7435, "total_steps": 15621, "loss": 0.3131, "lr": 1.2559037755121804e-06, "epoch": 0.47596184623263554, "percentage": 47.6, "elapsed_time": "0:28:35", "remaining_time": "0:31:28", "throughput": 13653.62, "total_tokens": 23421824}
|
|
{"current_steps": 7440, "total_steps": 15621, "loss": 0.4512, "lr": 1.2548234535478754e-06, "epoch": 0.4762819281736124, "percentage": 47.63, "elapsed_time": "0:28:36", "remaining_time": "0:31:26", "throughput": 13658.21, "total_tokens": 23438272}
|
|
{"current_steps": 7445, "total_steps": 15621, "loss": 0.4216, "lr": 1.2537428134334968e-06, "epoch": 0.47660201011458936, "percentage": 47.66, "elapsed_time": "0:28:36", "remaining_time": "0:31:25", "throughput": 13662.8, "total_tokens": 23454976}
|
|
{"current_steps": 7450, "total_steps": 15621, "loss": 0.5189, "lr": 1.252661856518236e-06, "epoch": 0.47692209205556624, "percentage": 47.69, "elapsed_time": "0:28:37", "remaining_time": "0:31:23", "throughput": 13667.31, "total_tokens": 23471168}
|
|
{"current_steps": 7455, "total_steps": 15621, "loss": 0.3564, "lr": 1.251580584151681e-06, "epoch": 0.4772421739965431, "percentage": 47.72, "elapsed_time": "0:28:37", "remaining_time": "0:31:21", "throughput": 13671.54, "total_tokens": 23486720}
|
|
{"current_steps": 7460, "total_steps": 15621, "loss": 0.3059, "lr": 1.2504989976838129e-06, "epoch": 0.47756225593752, "percentage": 47.76, "elapsed_time": "0:28:38", "remaining_time": "0:31:20", "throughput": 13675.99, "total_tokens": 23502912}
|
|
{"current_steps": 7465, "total_steps": 15621, "loss": 0.3667, "lr": 1.2494170984650048e-06, "epoch": 0.4778823378784969, "percentage": 47.79, "elapsed_time": "0:28:39", "remaining_time": "0:31:18", "throughput": 13680.67, "total_tokens": 23519552}
|
|
{"current_steps": 7470, "total_steps": 15621, "loss": 0.4019, "lr": 1.248334887846021e-06, "epoch": 0.4782024198194738, "percentage": 47.82, "elapsed_time": "0:28:39", "remaining_time": "0:31:16", "throughput": 13685.16, "total_tokens": 23535936}
|
|
{"current_steps": 7475, "total_steps": 15621, "loss": 0.4373, "lr": 1.2472523671780135e-06, "epoch": 0.4785225017604507, "percentage": 47.85, "elapsed_time": "0:28:40", "remaining_time": "0:31:14", "throughput": 13689.19, "total_tokens": 23551040}
|
|
{"current_steps": 7480, "total_steps": 15621, "loss": 0.3115, "lr": 1.2461695378125233e-06, "epoch": 0.47884258370142757, "percentage": 47.88, "elapsed_time": "0:28:41", "remaining_time": "0:31:13", "throughput": 13693.22, "total_tokens": 23566208}
|
|
{"current_steps": 7485, "total_steps": 15621, "loss": 0.4197, "lr": 1.245086401101474e-06, "epoch": 0.47916266564240445, "percentage": 47.92, "elapsed_time": "0:28:41", "remaining_time": "0:31:11", "throughput": 13697.43, "total_tokens": 23581696}
|
|
{"current_steps": 7490, "total_steps": 15621, "loss": 0.4454, "lr": 1.2440029583971757e-06, "epoch": 0.47948274758338133, "percentage": 47.95, "elapsed_time": "0:28:42", "remaining_time": "0:31:09", "throughput": 13701.59, "total_tokens": 23597248}
|
|
{"current_steps": 7495, "total_steps": 15621, "loss": 0.4913, "lr": 1.2429192110523188e-06, "epoch": 0.4798028295243582, "percentage": 47.98, "elapsed_time": "0:28:42", "remaining_time": "0:31:07", "throughput": 13705.75, "total_tokens": 23612800}
|
|
{"current_steps": 7500, "total_steps": 15621, "loss": 0.3338, "lr": 1.2418351604199746e-06, "epoch": 0.48012291146533514, "percentage": 48.01, "elapsed_time": "0:28:43", "remaining_time": "0:31:06", "throughput": 13710.17, "total_tokens": 23629056}
|
|
{"current_steps": 7505, "total_steps": 15621, "loss": 0.4447, "lr": 1.2407508078535934e-06, "epoch": 0.480442993406312, "percentage": 48.04, "elapsed_time": "0:28:44", "remaining_time": "0:31:04", "throughput": 13714.26, "total_tokens": 23644352}
|
|
{"current_steps": 7510, "total_steps": 15621, "loss": 0.2785, "lr": 1.2396661547070017e-06, "epoch": 0.4807630753472889, "percentage": 48.08, "elapsed_time": "0:28:44", "remaining_time": "0:31:02", "throughput": 13718.89, "total_tokens": 23661120}
|
|
{"current_steps": 7515, "total_steps": 15621, "loss": 0.3347, "lr": 1.238581202334402e-06, "epoch": 0.4810831572882658, "percentage": 48.11, "elapsed_time": "0:28:45", "remaining_time": "0:31:01", "throughput": 13723.41, "total_tokens": 23677632}
|
|
{"current_steps": 7520, "total_steps": 15621, "loss": 0.3673, "lr": 1.2374959520903699e-06, "epoch": 0.48140323922924266, "percentage": 48.14, "elapsed_time": "0:28:45", "remaining_time": "0:30:59", "throughput": 13727.89, "total_tokens": 23693952}
|
|
{"current_steps": 7525, "total_steps": 15621, "loss": 0.3341, "lr": 1.2364104053298531e-06, "epoch": 0.4817233211702196, "percentage": 48.17, "elapsed_time": "0:28:46", "remaining_time": "0:30:57", "throughput": 13731.84, "total_tokens": 23708736}
|
|
{"current_steps": 7530, "total_steps": 15621, "loss": 0.3913, "lr": 1.2353245634081692e-06, "epoch": 0.4820434031111965, "percentage": 48.2, "elapsed_time": "0:28:47", "remaining_time": "0:30:55", "throughput": 13736.24, "total_tokens": 23724864}
|
|
{"current_steps": 7535, "total_steps": 15621, "loss": 0.4148, "lr": 1.2342384276810053e-06, "epoch": 0.48236348505217336, "percentage": 48.24, "elapsed_time": "0:28:47", "remaining_time": "0:30:54", "throughput": 13740.31, "total_tokens": 23740160}
|
|
{"current_steps": 7540, "total_steps": 15621, "loss": 0.423, "lr": 1.233151999504414e-06, "epoch": 0.48268356699315024, "percentage": 48.27, "elapsed_time": "0:28:48", "remaining_time": "0:30:52", "throughput": 13744.33, "total_tokens": 23755264}
|
|
{"current_steps": 7545, "total_steps": 15621, "loss": 0.3317, "lr": 1.232065280234814e-06, "epoch": 0.4830036489341271, "percentage": 48.3, "elapsed_time": "0:28:48", "remaining_time": "0:30:50", "throughput": 13748.19, "total_tokens": 23770112}
|
|
{"current_steps": 7550, "total_steps": 15621, "loss": 0.4189, "lr": 1.2309782712289867e-06, "epoch": 0.48332373087510405, "percentage": 48.33, "elapsed_time": "0:28:49", "remaining_time": "0:30:48", "throughput": 13752.37, "total_tokens": 23785536}
|
|
{"current_steps": 7555, "total_steps": 15621, "loss": 0.4307, "lr": 1.2298909738440758e-06, "epoch": 0.48364381281608093, "percentage": 48.36, "elapsed_time": "0:28:50", "remaining_time": "0:30:47", "throughput": 13756.64, "total_tokens": 23801280}
|
|
{"current_steps": 7560, "total_steps": 15621, "loss": 0.371, "lr": 1.2288033894375847e-06, "epoch": 0.4839638947570578, "percentage": 48.4, "elapsed_time": "0:28:50", "remaining_time": "0:30:45", "throughput": 13760.65, "total_tokens": 23816448}
|
|
{"current_steps": 7565, "total_steps": 15621, "loss": 0.5539, "lr": 1.2277155193673755e-06, "epoch": 0.4842839766980347, "percentage": 48.43, "elapsed_time": "0:28:51", "remaining_time": "0:30:43", "throughput": 13764.98, "total_tokens": 23832512}
|
|
{"current_steps": 7570, "total_steps": 15621, "loss": 0.3968, "lr": 1.2266273649916668e-06, "epoch": 0.48460405863901157, "percentage": 48.46, "elapsed_time": "0:28:51", "remaining_time": "0:30:42", "throughput": 13769.24, "total_tokens": 23848192}
|
|
{"current_steps": 7575, "total_steps": 15621, "loss": 0.4249, "lr": 1.2255389276690318e-06, "epoch": 0.48492414057998845, "percentage": 48.49, "elapsed_time": "0:28:52", "remaining_time": "0:30:40", "throughput": 13773.42, "total_tokens": 23863808}
|
|
{"current_steps": 7580, "total_steps": 15621, "loss": 0.2927, "lr": 1.2244502087583978e-06, "epoch": 0.4852442225209654, "percentage": 48.52, "elapsed_time": "0:28:53", "remaining_time": "0:30:38", "throughput": 13778.1, "total_tokens": 23880960}
|
|
{"current_steps": 7585, "total_steps": 15621, "loss": 0.3969, "lr": 1.2233612096190426e-06, "epoch": 0.48556430446194226, "percentage": 48.56, "elapsed_time": "0:28:53", "remaining_time": "0:30:36", "throughput": 13782.15, "total_tokens": 23896256}
|
|
{"current_steps": 7590, "total_steps": 15621, "loss": 0.5189, "lr": 1.222271931610595e-06, "epoch": 0.48588438640291914, "percentage": 48.59, "elapsed_time": "0:28:54", "remaining_time": "0:30:35", "throughput": 13786.66, "total_tokens": 23912832}
|
|
{"current_steps": 7595, "total_steps": 15621, "loss": 0.4929, "lr": 1.2211823760930306e-06, "epoch": 0.486204468343896, "percentage": 48.62, "elapsed_time": "0:28:55", "remaining_time": "0:30:33", "throughput": 13790.96, "total_tokens": 23928768}
|
|
{"current_steps": 7600, "total_steps": 15621, "loss": 0.4206, "lr": 1.2200925444266726e-06, "epoch": 0.4865245502848729, "percentage": 48.65, "elapsed_time": "0:28:55", "remaining_time": "0:30:31", "throughput": 13795.37, "total_tokens": 23945088}
|
|
{"current_steps": 7605, "total_steps": 15621, "loss": 0.5087, "lr": 1.219002437972189e-06, "epoch": 0.48684463222584984, "percentage": 48.68, "elapsed_time": "0:28:56", "remaining_time": "0:30:30", "throughput": 13799.33, "total_tokens": 23960192}
|
|
{"current_steps": 7610, "total_steps": 15621, "loss": 0.4208, "lr": 1.21791205809059e-06, "epoch": 0.4871647141668267, "percentage": 48.72, "elapsed_time": "0:28:56", "remaining_time": "0:30:28", "throughput": 13803.92, "total_tokens": 23977152}
|
|
{"current_steps": 7615, "total_steps": 15621, "loss": 0.3611, "lr": 1.2168214061432283e-06, "epoch": 0.4874847961078036, "percentage": 48.75, "elapsed_time": "0:28:57", "remaining_time": "0:30:26", "throughput": 13807.9, "total_tokens": 23992448}
|
|
{"current_steps": 7620, "total_steps": 15621, "loss": 0.4276, "lr": 1.2157304834917947e-06, "epoch": 0.4878048780487805, "percentage": 48.78, "elapsed_time": "0:28:58", "remaining_time": "0:30:25", "throughput": 13812.12, "total_tokens": 24008384}
|
|
{"current_steps": 7625, "total_steps": 15621, "loss": 0.6241, "lr": 1.2146392914983202e-06, "epoch": 0.48812495998975736, "percentage": 48.81, "elapsed_time": "0:28:58", "remaining_time": "0:30:23", "throughput": 13816.82, "total_tokens": 24025728}
|
|
{"current_steps": 7630, "total_steps": 15621, "loss": 0.5169, "lr": 1.2135478315251694e-06, "epoch": 0.4884450419307343, "percentage": 48.84, "elapsed_time": "0:28:59", "remaining_time": "0:30:21", "throughput": 13820.65, "total_tokens": 24040448}
|
|
{"current_steps": 7635, "total_steps": 15621, "loss": 0.3428, "lr": 1.2124561049350442e-06, "epoch": 0.48876512387171117, "percentage": 48.88, "elapsed_time": "0:29:00", "remaining_time": "0:30:20", "throughput": 13824.46, "total_tokens": 24055168}
|
|
{"current_steps": 7640, "total_steps": 15621, "loss": 0.453, "lr": 1.2113641130909772e-06, "epoch": 0.48908520581268805, "percentage": 48.91, "elapsed_time": "0:29:00", "remaining_time": "0:30:18", "throughput": 13828.31, "total_tokens": 24070016}
|
|
{"current_steps": 7645, "total_steps": 15621, "loss": 0.3108, "lr": 1.2102718573563334e-06, "epoch": 0.48940528775366493, "percentage": 48.94, "elapsed_time": "0:29:01", "remaining_time": "0:30:16", "throughput": 13832.14, "total_tokens": 24084800}
|
|
{"current_steps": 7650, "total_steps": 15621, "loss": 0.4842, "lr": 1.2091793390948066e-06, "epoch": 0.4897253696946418, "percentage": 48.97, "elapsed_time": "0:29:01", "remaining_time": "0:30:14", "throughput": 13836.28, "total_tokens": 24100416}
|
|
{"current_steps": 7655, "total_steps": 15621, "loss": 0.2906, "lr": 1.2080865596704191e-06, "epoch": 0.49004545163561875, "percentage": 49.0, "elapsed_time": "0:29:02", "remaining_time": "0:30:13", "throughput": 13840.75, "total_tokens": 24117120}
|
|
{"current_steps": 7660, "total_steps": 15621, "loss": 0.4391, "lr": 1.2069935204475187e-06, "epoch": 0.4903655335765956, "percentage": 49.04, "elapsed_time": "0:29:03", "remaining_time": "0:30:11", "throughput": 13844.68, "total_tokens": 24132224}
|
|
{"current_steps": 7665, "total_steps": 15621, "loss": 0.3992, "lr": 1.2059002227907776e-06, "epoch": 0.4906856155175725, "percentage": 49.07, "elapsed_time": "0:29:03", "remaining_time": "0:30:09", "throughput": 13848.7, "total_tokens": 24147712}
|
|
{"current_steps": 7670, "total_steps": 15621, "loss": 0.4121, "lr": 1.2048066680651908e-06, "epoch": 0.4910056974585494, "percentage": 49.1, "elapsed_time": "0:29:04", "remaining_time": "0:30:08", "throughput": 13853.16, "total_tokens": 24164288}
|
|
{"current_steps": 7675, "total_steps": 15621, "loss": 0.5577, "lr": 1.2037128576360743e-06, "epoch": 0.49132577939952626, "percentage": 49.13, "elapsed_time": "0:29:05", "remaining_time": "0:30:07", "throughput": 13861.27, "total_tokens": 24193728}
|
|
{"current_steps": 7680, "total_steps": 15621, "loss": 0.4148, "lr": 1.2026187928690627e-06, "epoch": 0.49164586134050314, "percentage": 49.16, "elapsed_time": "0:29:06", "remaining_time": "0:30:05", "throughput": 13865.22, "total_tokens": 24208832}
|
|
{"current_steps": 7685, "total_steps": 15621, "loss": 0.5085, "lr": 1.2015244751301098e-06, "epoch": 0.4919659432814801, "percentage": 49.2, "elapsed_time": "0:29:06", "remaining_time": "0:30:03", "throughput": 13869.0, "total_tokens": 24223424}
|
|
{"current_steps": 7690, "total_steps": 15621, "loss": 0.43, "lr": 1.2004299057854832e-06, "epoch": 0.49228602522245696, "percentage": 49.23, "elapsed_time": "0:29:07", "remaining_time": "0:30:01", "throughput": 13873.06, "total_tokens": 24238976}
|
|
{"current_steps": 7695, "total_steps": 15621, "loss": 0.3893, "lr": 1.1993350862017661e-06, "epoch": 0.49260610716343384, "percentage": 49.26, "elapsed_time": "0:29:07", "remaining_time": "0:30:00", "throughput": 13876.84, "total_tokens": 24253632}
|
|
{"current_steps": 7700, "total_steps": 15621, "loss": 0.3968, "lr": 1.1982400177458534e-06, "epoch": 0.4929261891044107, "percentage": 49.29, "elapsed_time": "0:29:08", "remaining_time": "0:29:58", "throughput": 13881.51, "total_tokens": 24270720}
|
|
{"current_steps": 7705, "total_steps": 15621, "loss": 0.4284, "lr": 1.197144701784951e-06, "epoch": 0.4932462710453876, "percentage": 49.32, "elapsed_time": "0:29:09", "remaining_time": "0:29:56", "throughput": 13885.21, "total_tokens": 24285312}
|
|
{"current_steps": 7710, "total_steps": 15621, "loss": 0.3926, "lr": 1.1960491396865735e-06, "epoch": 0.49356635298636453, "percentage": 49.36, "elapsed_time": "0:29:09", "remaining_time": "0:29:55", "throughput": 13889.09, "total_tokens": 24300352}
|
|
{"current_steps": 7715, "total_steps": 15621, "loss": 0.3458, "lr": 1.1949533328185435e-06, "epoch": 0.4938864349273414, "percentage": 49.39, "elapsed_time": "0:29:10", "remaining_time": "0:29:53", "throughput": 13893.56, "total_tokens": 24317056}
|
|
{"current_steps": 7720, "total_steps": 15621, "loss": 0.3741, "lr": 1.1938572825489883e-06, "epoch": 0.4942065168683183, "percentage": 49.42, "elapsed_time": "0:29:10", "remaining_time": "0:29:51", "throughput": 13897.84, "total_tokens": 24333184}
|
|
{"current_steps": 7725, "total_steps": 15621, "loss": 0.409, "lr": 1.1927609902463394e-06, "epoch": 0.49452659880929517, "percentage": 49.45, "elapsed_time": "0:29:11", "remaining_time": "0:29:50", "throughput": 13901.82, "total_tokens": 24348672}
|
|
{"current_steps": 7730, "total_steps": 15621, "loss": 0.4346, "lr": 1.1916644572793314e-06, "epoch": 0.49484668075027205, "percentage": 49.48, "elapsed_time": "0:29:12", "remaining_time": "0:29:48", "throughput": 13905.66, "total_tokens": 24363648}
|
|
{"current_steps": 7735, "total_steps": 15621, "loss": 0.4964, "lr": 1.190567685016998e-06, "epoch": 0.495166762691249, "percentage": 49.52, "elapsed_time": "0:29:12", "remaining_time": "0:29:46", "throughput": 13910.35, "total_tokens": 24380992}
|
|
{"current_steps": 7740, "total_steps": 15621, "loss": 0.4107, "lr": 1.189470674828672e-06, "epoch": 0.49548684463222586, "percentage": 49.55, "elapsed_time": "0:29:13", "remaining_time": "0:29:45", "throughput": 13914.12, "total_tokens": 24395776}
|
|
{"current_steps": 7745, "total_steps": 15621, "loss": 0.3878, "lr": 1.188373428083984e-06, "epoch": 0.49580692657320274, "percentage": 49.58, "elapsed_time": "0:29:13", "remaining_time": "0:29:43", "throughput": 13918.28, "total_tokens": 24411584}
|
|
{"current_steps": 7750, "total_steps": 15621, "loss": 0.5219, "lr": 1.1872759461528596e-06, "epoch": 0.4961270085141796, "percentage": 49.61, "elapsed_time": "0:29:14", "remaining_time": "0:29:41", "throughput": 13922.14, "total_tokens": 24426560}
|
|
{"current_steps": 7755, "total_steps": 15621, "loss": 0.39, "lr": 1.1861782304055174e-06, "epoch": 0.4964470904551565, "percentage": 49.64, "elapsed_time": "0:29:15", "remaining_time": "0:29:40", "throughput": 13926.12, "total_tokens": 24441856}
|
|
{"current_steps": 7760, "total_steps": 15621, "loss": 0.3345, "lr": 1.1850802822124686e-06, "epoch": 0.4967671723961334, "percentage": 49.68, "elapsed_time": "0:29:15", "remaining_time": "0:29:38", "throughput": 13930.2, "total_tokens": 24457472}
|
|
{"current_steps": 7765, "total_steps": 15621, "loss": 0.5005, "lr": 1.1839821029445143e-06, "epoch": 0.4970872543371103, "percentage": 49.71, "elapsed_time": "0:29:16", "remaining_time": "0:29:36", "throughput": 13933.9, "total_tokens": 24471936}
|
|
{"current_steps": 7770, "total_steps": 15621, "loss": 0.3195, "lr": 1.1828836939727442e-06, "epoch": 0.4974073362780872, "percentage": 49.74, "elapsed_time": "0:29:16", "remaining_time": "0:29:35", "throughput": 13938.0, "total_tokens": 24487616}
|
|
{"current_steps": 7775, "total_steps": 15621, "loss": 0.433, "lr": 1.181785056668535e-06, "epoch": 0.4977274182190641, "percentage": 49.77, "elapsed_time": "0:29:17", "remaining_time": "0:29:33", "throughput": 13942.25, "total_tokens": 24503936}
|
|
{"current_steps": 7780, "total_steps": 15621, "loss": 0.4212, "lr": 1.180686192403548e-06, "epoch": 0.49804750016004096, "percentage": 49.8, "elapsed_time": "0:29:18", "remaining_time": "0:29:31", "throughput": 13945.95, "total_tokens": 24518464}
|
|
{"current_steps": 7785, "total_steps": 15621, "loss": 0.3439, "lr": 1.1795871025497285e-06, "epoch": 0.49836758210101784, "percentage": 49.84, "elapsed_time": "0:29:18", "remaining_time": "0:29:30", "throughput": 13949.68, "total_tokens": 24533184}
|
|
{"current_steps": 7790, "total_steps": 15621, "loss": 0.4122, "lr": 1.1784877884793029e-06, "epoch": 0.49868766404199477, "percentage": 49.87, "elapsed_time": "0:29:19", "remaining_time": "0:29:28", "throughput": 13953.81, "total_tokens": 24548992}
|
|
{"current_steps": 7795, "total_steps": 15621, "loss": 0.3627, "lr": 1.1773882515647776e-06, "epoch": 0.49900774598297165, "percentage": 49.9, "elapsed_time": "0:29:19", "remaining_time": "0:29:26", "throughput": 13958.48, "total_tokens": 24566592}
|
|
{"current_steps": 7800, "total_steps": 15621, "loss": 0.4811, "lr": 1.1762884931789376e-06, "epoch": 0.49932782792394853, "percentage": 49.93, "elapsed_time": "0:29:20", "remaining_time": "0:29:25", "throughput": 13963.01, "total_tokens": 24583552}
|
|
{"current_steps": 7805, "total_steps": 15621, "loss": 0.4548, "lr": 1.1751885146948436e-06, "epoch": 0.4996479098649254, "percentage": 49.96, "elapsed_time": "0:29:21", "remaining_time": "0:29:23", "throughput": 13967.15, "total_tokens": 24599552}
|
|
{"current_steps": 7810, "total_steps": 15621, "loss": 0.3633, "lr": 1.1740883174858327e-06, "epoch": 0.4999679918059023, "percentage": 50.0, "elapsed_time": "0:29:21", "remaining_time": "0:29:22", "throughput": 13971.07, "total_tokens": 24614912}
|
|
{"current_steps": 7815, "total_steps": 15621, "loss": 0.3649, "lr": 1.1729879029255127e-06, "epoch": 0.5002880737468792, "percentage": 50.03, "elapsed_time": "0:29:22", "remaining_time": "0:29:20", "throughput": 13974.85, "total_tokens": 24629696}
|
|
{"current_steps": 7820, "total_steps": 15621, "loss": 0.3939, "lr": 1.171887272387765e-06, "epoch": 0.5006081556878561, "percentage": 50.06, "elapsed_time": "0:29:23", "remaining_time": "0:29:18", "throughput": 13979.17, "total_tokens": 24646208}
|
|
{"current_steps": 7820, "total_steps": 15621, "eval_loss": 0.4134162962436676, "epoch": 0.5006081556878561, "percentage": 50.06, "elapsed_time": "0:30:12", "remaining_time": "0:30:07", "throughput": 13600.06, "total_tokens": 24646208}
|
|
{"current_steps": 7825, "total_steps": 15621, "loss": 0.4985, "lr": 1.1707864272467397e-06, "epoch": 0.500928237628833, "percentage": 50.09, "elapsed_time": "0:30:43", "remaining_time": "0:30:36", "throughput": 13379.2, "total_tokens": 24661120}
|
|
{"current_steps": 7830, "total_steps": 15621, "loss": 0.423, "lr": 1.169685368876855e-06, "epoch": 0.5012483195698099, "percentage": 50.12, "elapsed_time": "0:30:43", "remaining_time": "0:30:34", "throughput": 13383.83, "total_tokens": 24678336}
|
|
{"current_steps": 7835, "total_steps": 15621, "loss": 0.5534, "lr": 1.1685840986527946e-06, "epoch": 0.5015684015107867, "percentage": 50.16, "elapsed_time": "0:30:44", "remaining_time": "0:30:32", "throughput": 13387.99, "total_tokens": 24694336}
|
|
{"current_steps": 7840, "total_steps": 15621, "loss": 0.4044, "lr": 1.1674826179495076e-06, "epoch": 0.5018884834517636, "percentage": 50.19, "elapsed_time": "0:30:45", "remaining_time": "0:30:31", "throughput": 13391.6, "total_tokens": 24708608}
|
|
{"current_steps": 7845, "total_steps": 15621, "loss": 0.415, "lr": 1.1663809281422056e-06, "epoch": 0.5022085653927405, "percentage": 50.22, "elapsed_time": "0:30:45", "remaining_time": "0:30:29", "throughput": 13395.76, "total_tokens": 24724672}
|
|
{"current_steps": 7850, "total_steps": 15621, "loss": 0.4562, "lr": 1.1652790306063615e-06, "epoch": 0.5025286473337174, "percentage": 50.25, "elapsed_time": "0:30:46", "remaining_time": "0:30:27", "throughput": 13399.84, "total_tokens": 24740608}
|
|
{"current_steps": 7855, "total_steps": 15621, "loss": 0.416, "lr": 1.164176926717707e-06, "epoch": 0.5028487292746944, "percentage": 50.28, "elapsed_time": "0:30:47", "remaining_time": "0:30:26", "throughput": 13404.62, "total_tokens": 24758528}
|
|
{"current_steps": 7860, "total_steps": 15621, "loss": 0.3702, "lr": 1.1630746178522315e-06, "epoch": 0.5031688112156713, "percentage": 50.32, "elapsed_time": "0:30:47", "remaining_time": "0:30:24", "throughput": 13408.28, "total_tokens": 24772992}
|
|
{"current_steps": 7865, "total_steps": 15621, "loss": 0.4398, "lr": 1.1619721053861816e-06, "epoch": 0.5034888931566481, "percentage": 50.35, "elapsed_time": "0:30:48", "remaining_time": "0:30:22", "throughput": 13412.14, "total_tokens": 24788160}
|
|
{"current_steps": 7870, "total_steps": 15621, "loss": 0.4093, "lr": 1.1608693906960558e-06, "epoch": 0.503808975097625, "percentage": 50.38, "elapsed_time": "0:30:48", "remaining_time": "0:30:20", "throughput": 13416.31, "total_tokens": 24804224}
|
|
{"current_steps": 7875, "total_steps": 15621, "loss": 0.4426, "lr": 1.1597664751586069e-06, "epoch": 0.5041290570386019, "percentage": 50.41, "elapsed_time": "0:30:49", "remaining_time": "0:30:19", "throughput": 13420.67, "total_tokens": 24820928}
|
|
{"current_steps": 7880, "total_steps": 15621, "loss": 0.3837, "lr": 1.1586633601508382e-06, "epoch": 0.5044491389795788, "percentage": 50.44, "elapsed_time": "0:30:50", "remaining_time": "0:30:17", "throughput": 13424.43, "total_tokens": 24835776}
|
|
{"current_steps": 7885, "total_steps": 15621, "loss": 0.3858, "lr": 1.1575600470500014e-06, "epoch": 0.5047692209205557, "percentage": 50.48, "elapsed_time": "0:30:50", "remaining_time": "0:30:15", "throughput": 13428.53, "total_tokens": 24851648}
|
|
{"current_steps": 7890, "total_steps": 15621, "loss": 0.42, "lr": 1.1564565372335957e-06, "epoch": 0.5050893028615325, "percentage": 50.51, "elapsed_time": "0:30:51", "remaining_time": "0:30:13", "throughput": 13432.45, "total_tokens": 24866880}
|
|
{"current_steps": 7895, "total_steps": 15621, "loss": 0.3162, "lr": 1.1553528320793663e-06, "epoch": 0.5054093848025094, "percentage": 50.54, "elapsed_time": "0:30:51", "remaining_time": "0:30:12", "throughput": 13436.26, "total_tokens": 24881856}
|
|
{"current_steps": 7900, "total_steps": 15621, "loss": 0.4364, "lr": 1.1542489329653022e-06, "epoch": 0.5057294667434863, "percentage": 50.57, "elapsed_time": "0:30:52", "remaining_time": "0:30:10", "throughput": 13440.67, "total_tokens": 24898560}
|
|
{"current_steps": 7905, "total_steps": 15621, "loss": 0.3754, "lr": 1.1531448412696343e-06, "epoch": 0.5060495486844632, "percentage": 50.6, "elapsed_time": "0:30:53", "remaining_time": "0:30:08", "throughput": 13444.36, "total_tokens": 24913216}
|
|
{"current_steps": 7910, "total_steps": 15621, "loss": 0.4913, "lr": 1.1520405583708337e-06, "epoch": 0.5063696306254402, "percentage": 50.64, "elapsed_time": "0:30:53", "remaining_time": "0:30:07", "throughput": 13448.35, "total_tokens": 24928832}
|
|
{"current_steps": 7915, "total_steps": 15621, "loss": 0.4917, "lr": 1.1509360856476109e-06, "epoch": 0.506689712566417, "percentage": 50.67, "elapsed_time": "0:30:54", "remaining_time": "0:30:05", "throughput": 13452.37, "total_tokens": 24944512}
|
|
{"current_steps": 7920, "total_steps": 15621, "loss": 0.4612, "lr": 1.149831424478913e-06, "epoch": 0.5070097945073939, "percentage": 50.7, "elapsed_time": "0:30:54", "remaining_time": "0:30:03", "throughput": 13456.22, "total_tokens": 24959744}
|
|
{"current_steps": 7925, "total_steps": 15621, "loss": 0.3948, "lr": 1.1487265762439224e-06, "epoch": 0.5073298764483708, "percentage": 50.73, "elapsed_time": "0:30:55", "remaining_time": "0:30:01", "throughput": 13460.24, "total_tokens": 24975488}
|
|
{"current_steps": 7930, "total_steps": 15621, "loss": 0.362, "lr": 1.1476215423220547e-06, "epoch": 0.5076499583893477, "percentage": 50.76, "elapsed_time": "0:30:56", "remaining_time": "0:30:00", "throughput": 13463.95, "total_tokens": 24990272}
|
|
{"current_steps": 7935, "total_steps": 15621, "loss": 0.3761, "lr": 1.146516324092959e-06, "epoch": 0.5079700403303246, "percentage": 50.8, "elapsed_time": "0:30:56", "remaining_time": "0:29:58", "throughput": 13468.09, "total_tokens": 25006272}
|
|
{"current_steps": 7940, "total_steps": 15621, "loss": 0.2954, "lr": 1.1454109229365117e-06, "epoch": 0.5082901222713014, "percentage": 50.83, "elapsed_time": "0:30:57", "remaining_time": "0:29:56", "throughput": 13472.26, "total_tokens": 25022464}
|
|
{"current_steps": 7945, "total_steps": 15621, "loss": 0.3132, "lr": 1.14430534023282e-06, "epoch": 0.5086102042122783, "percentage": 50.86, "elapsed_time": "0:30:57", "remaining_time": "0:29:55", "throughput": 13476.01, "total_tokens": 25037376}
|
|
{"current_steps": 7950, "total_steps": 15621, "loss": 0.4736, "lr": 1.1431995773622167e-06, "epoch": 0.5089302861532552, "percentage": 50.89, "elapsed_time": "0:30:58", "remaining_time": "0:29:53", "throughput": 13480.14, "total_tokens": 25053440}
|
|
{"current_steps": 7955, "total_steps": 15621, "loss": 0.4369, "lr": 1.1420936357052597e-06, "epoch": 0.5092503680942321, "percentage": 50.93, "elapsed_time": "0:30:59", "remaining_time": "0:29:51", "throughput": 13484.13, "total_tokens": 25069120}
|
|
{"current_steps": 7960, "total_steps": 15621, "loss": 0.3078, "lr": 1.1409875166427303e-06, "epoch": 0.5095704500352091, "percentage": 50.96, "elapsed_time": "0:30:59", "remaining_time": "0:29:49", "throughput": 13487.97, "total_tokens": 25084224}
|
|
{"current_steps": 7965, "total_steps": 15621, "loss": 0.4996, "lr": 1.1398812215556308e-06, "epoch": 0.509890531976186, "percentage": 50.99, "elapsed_time": "0:31:00", "remaining_time": "0:29:48", "throughput": 13491.86, "total_tokens": 25099520}
|
|
{"current_steps": 7970, "total_steps": 15621, "loss": 0.362, "lr": 1.1387747518251837e-06, "epoch": 0.5102106139171628, "percentage": 51.02, "elapsed_time": "0:31:00", "remaining_time": "0:29:46", "throughput": 13495.85, "total_tokens": 25115200}
|
|
{"current_steps": 7975, "total_steps": 15621, "loss": 0.3266, "lr": 1.13766810883283e-06, "epoch": 0.5105306958581397, "percentage": 51.05, "elapsed_time": "0:31:01", "remaining_time": "0:29:44", "throughput": 13500.03, "total_tokens": 25131520}
|
|
{"current_steps": 7980, "total_steps": 15621, "loss": 0.5172, "lr": 1.1365612939602255e-06, "epoch": 0.5108507777991166, "percentage": 51.09, "elapsed_time": "0:31:02", "remaining_time": "0:29:43", "throughput": 13504.17, "total_tokens": 25147776}
|
|
{"current_steps": 7985, "total_steps": 15621, "loss": 0.3683, "lr": 1.1354543085892423e-06, "epoch": 0.5111708597400935, "percentage": 51.12, "elapsed_time": "0:31:02", "remaining_time": "0:29:41", "throughput": 13507.92, "total_tokens": 25162816}
|
|
{"current_steps": 7990, "total_steps": 15621, "loss": 0.3333, "lr": 1.1343471541019646e-06, "epoch": 0.5114909416810703, "percentage": 51.15, "elapsed_time": "0:31:03", "remaining_time": "0:29:39", "throughput": 13511.98, "total_tokens": 25178752}
|
|
{"current_steps": 7995, "total_steps": 15621, "loss": 0.3719, "lr": 1.1332398318806872e-06, "epoch": 0.5118110236220472, "percentage": 51.18, "elapsed_time": "0:31:04", "remaining_time": "0:29:38", "throughput": 13515.79, "total_tokens": 25194048}
|
|
{"current_steps": 8000, "total_steps": 15621, "loss": 0.3796, "lr": 1.1321323433079158e-06, "epoch": 0.5121311055630241, "percentage": 51.21, "elapsed_time": "0:31:04", "remaining_time": "0:29:36", "throughput": 13519.6, "total_tokens": 25209216}
|
|
{"current_steps": 8005, "total_steps": 15621, "loss": 0.379, "lr": 1.1310246897663623e-06, "epoch": 0.512451187504001, "percentage": 51.25, "elapsed_time": "0:31:05", "remaining_time": "0:29:34", "throughput": 13523.47, "total_tokens": 25224640}
|
|
{"current_steps": 8010, "total_steps": 15621, "loss": 0.408, "lr": 1.1299168726389447e-06, "epoch": 0.5127712694449779, "percentage": 51.28, "elapsed_time": "0:31:05", "remaining_time": "0:29:32", "throughput": 13527.21, "total_tokens": 25239808}
|
|
{"current_steps": 8015, "total_steps": 15621, "loss": 0.3354, "lr": 1.1288088933087868e-06, "epoch": 0.5130913513859549, "percentage": 51.31, "elapsed_time": "0:31:06", "remaining_time": "0:29:31", "throughput": 13531.77, "total_tokens": 25257344}
|
|
{"current_steps": 8020, "total_steps": 15621, "loss": 0.3365, "lr": 1.1277007531592127e-06, "epoch": 0.5134114333269317, "percentage": 51.34, "elapsed_time": "0:31:07", "remaining_time": "0:29:29", "throughput": 13535.39, "total_tokens": 25272064}
|
|
{"current_steps": 8025, "total_steps": 15621, "loss": 0.3619, "lr": 1.1265924535737492e-06, "epoch": 0.5137315152679086, "percentage": 51.37, "elapsed_time": "0:31:07", "remaining_time": "0:29:27", "throughput": 13539.4, "total_tokens": 25287936}
|
|
{"current_steps": 8030, "total_steps": 15621, "loss": 0.3007, "lr": 1.125483995936121e-06, "epoch": 0.5140515972088855, "percentage": 51.41, "elapsed_time": "0:31:08", "remaining_time": "0:29:26", "throughput": 13543.19, "total_tokens": 25303232}
|
|
{"current_steps": 8035, "total_steps": 15621, "loss": 0.376, "lr": 1.1243753816302507e-06, "epoch": 0.5143716791498624, "percentage": 51.44, "elapsed_time": "0:31:08", "remaining_time": "0:29:24", "throughput": 13547.02, "total_tokens": 25318656}
|
|
{"current_steps": 8040, "total_steps": 15621, "loss": 0.417, "lr": 1.1232666120402558e-06, "epoch": 0.5146917610908393, "percentage": 51.47, "elapsed_time": "0:31:09", "remaining_time": "0:29:22", "throughput": 13550.73, "total_tokens": 25333760}
|
|
{"current_steps": 8045, "total_steps": 15621, "loss": 0.3827, "lr": 1.1221576885504487e-06, "epoch": 0.5150118430318161, "percentage": 51.5, "elapsed_time": "0:31:10", "remaining_time": "0:29:21", "throughput": 13554.77, "total_tokens": 25349824}
|
|
{"current_steps": 8050, "total_steps": 15621, "loss": 0.4027, "lr": 1.121048612545333e-06, "epoch": 0.515331924972793, "percentage": 51.53, "elapsed_time": "0:31:10", "remaining_time": "0:29:19", "throughput": 13558.64, "total_tokens": 25365376}
|
|
{"current_steps": 8055, "total_steps": 15621, "loss": 0.4599, "lr": 1.1199393854096034e-06, "epoch": 0.5156520069137699, "percentage": 51.57, "elapsed_time": "0:31:11", "remaining_time": "0:29:17", "throughput": 13562.52, "total_tokens": 25380928}
|
|
{"current_steps": 8060, "total_steps": 15621, "loss": 0.3487, "lr": 1.118830008528143e-06, "epoch": 0.5159720888547468, "percentage": 51.6, "elapsed_time": "0:31:12", "remaining_time": "0:29:16", "throughput": 13566.33, "total_tokens": 25396352}
|
|
{"current_steps": 8065, "total_steps": 15621, "loss": 0.3159, "lr": 1.1177204832860212e-06, "epoch": 0.5162921707957238, "percentage": 51.63, "elapsed_time": "0:31:12", "remaining_time": "0:29:14", "throughput": 13570.07, "total_tokens": 25411456}
|
|
{"current_steps": 8070, "total_steps": 15621, "loss": 0.4322, "lr": 1.1166108110684947e-06, "epoch": 0.5166122527367006, "percentage": 51.66, "elapsed_time": "0:31:13", "remaining_time": "0:29:12", "throughput": 13574.48, "total_tokens": 25428544}
|
|
{"current_steps": 8075, "total_steps": 15621, "loss": 0.3988, "lr": 1.1155009932610003e-06, "epoch": 0.5169323346776775, "percentage": 51.69, "elapsed_time": "0:31:13", "remaining_time": "0:29:11", "throughput": 13578.33, "total_tokens": 25443968}
|
|
{"current_steps": 8080, "total_steps": 15621, "loss": 0.3273, "lr": 1.1143910312491605e-06, "epoch": 0.5172524166186544, "percentage": 51.73, "elapsed_time": "0:31:14", "remaining_time": "0:29:09", "throughput": 13582.03, "total_tokens": 25458880}
|
|
{"current_steps": 8085, "total_steps": 15621, "loss": 0.3196, "lr": 1.1132809264187748e-06, "epoch": 0.5175724985596313, "percentage": 51.76, "elapsed_time": "0:31:15", "remaining_time": "0:29:07", "throughput": 13585.84, "total_tokens": 25474304}
|
|
{"current_steps": 8090, "total_steps": 15621, "loss": 0.3884, "lr": 1.1121706801558226e-06, "epoch": 0.5178925805006082, "percentage": 51.79, "elapsed_time": "0:31:15", "remaining_time": "0:29:06", "throughput": 13589.53, "total_tokens": 25489472}
|
|
{"current_steps": 8095, "total_steps": 15621, "loss": 0.3827, "lr": 1.111060293846459e-06, "epoch": 0.518212662441585, "percentage": 51.82, "elapsed_time": "0:31:16", "remaining_time": "0:29:04", "throughput": 13593.33, "total_tokens": 25504896}
|
|
{"current_steps": 8100, "total_steps": 15621, "loss": 0.4807, "lr": 1.1099497688770148e-06, "epoch": 0.5185327443825619, "percentage": 51.85, "elapsed_time": "0:31:16", "remaining_time": "0:29:02", "throughput": 13596.84, "total_tokens": 25519360}
|
|
{"current_steps": 8105, "total_steps": 15621, "loss": 0.4418, "lr": 1.1088391066339928e-06, "epoch": 0.5188528263235388, "percentage": 51.89, "elapsed_time": "0:31:17", "remaining_time": "0:29:01", "throughput": 13600.93, "total_tokens": 25535680}
|
|
{"current_steps": 8110, "total_steps": 15621, "loss": 0.5327, "lr": 1.1077283085040684e-06, "epoch": 0.5191729082645157, "percentage": 51.92, "elapsed_time": "0:31:18", "remaining_time": "0:28:59", "throughput": 13604.57, "total_tokens": 25550592}
|
|
{"current_steps": 8115, "total_steps": 15621, "loss": 0.4083, "lr": 1.1066173758740863e-06, "epoch": 0.5194929902054926, "percentage": 51.95, "elapsed_time": "0:31:18", "remaining_time": "0:28:57", "throughput": 13608.24, "total_tokens": 25565696}
|
|
{"current_steps": 8120, "total_steps": 15621, "loss": 0.3485, "lr": 1.105506310131058e-06, "epoch": 0.5198130721464695, "percentage": 51.98, "elapsed_time": "0:31:19", "remaining_time": "0:28:56", "throughput": 13612.22, "total_tokens": 25581568}
|
|
{"current_steps": 8125, "total_steps": 15621, "loss": 0.466, "lr": 1.1043951126621634e-06, "epoch": 0.5201331540874464, "percentage": 52.01, "elapsed_time": "0:31:19", "remaining_time": "0:28:54", "throughput": 13616.22, "total_tokens": 25597760}
|
|
{"current_steps": 8130, "total_steps": 15621, "loss": 0.4111, "lr": 1.1032837848547445e-06, "epoch": 0.5204532360284233, "percentage": 52.05, "elapsed_time": "0:31:20", "remaining_time": "0:28:52", "throughput": 13620.72, "total_tokens": 25615424}
|
|
{"current_steps": 8135, "total_steps": 15621, "loss": 0.4094, "lr": 1.1021723280963074e-06, "epoch": 0.5207733179694002, "percentage": 52.08, "elapsed_time": "0:31:21", "remaining_time": "0:28:51", "throughput": 13624.49, "total_tokens": 25630720}
|
|
{"current_steps": 8140, "total_steps": 15621, "loss": 0.4886, "lr": 1.1010607437745194e-06, "epoch": 0.5210933999103771, "percentage": 52.11, "elapsed_time": "0:31:21", "remaining_time": "0:28:49", "throughput": 13629.27, "total_tokens": 25649280}
|
|
{"current_steps": 8145, "total_steps": 15621, "loss": 0.5002, "lr": 1.0999490332772057e-06, "epoch": 0.5214134818513539, "percentage": 52.14, "elapsed_time": "0:31:22", "remaining_time": "0:28:47", "throughput": 13632.97, "total_tokens": 25664576}
|
|
{"current_steps": 8150, "total_steps": 15621, "loss": 0.4193, "lr": 1.0988371979923507e-06, "epoch": 0.5217335637923308, "percentage": 52.17, "elapsed_time": "0:31:23", "remaining_time": "0:28:46", "throughput": 13636.91, "total_tokens": 25680384}
|
|
{"current_steps": 8155, "total_steps": 15621, "loss": 0.4017, "lr": 1.097725239308094e-06, "epoch": 0.5220536457333077, "percentage": 52.21, "elapsed_time": "0:31:23", "remaining_time": "0:28:44", "throughput": 13640.82, "total_tokens": 25696128}
|
|
{"current_steps": 8160, "total_steps": 15621, "loss": 0.2794, "lr": 1.0966131586127278e-06, "epoch": 0.5223737276742846, "percentage": 52.24, "elapsed_time": "0:31:24", "remaining_time": "0:28:42", "throughput": 13645.0, "total_tokens": 25712768}
|
|
{"current_steps": 8165, "total_steps": 15621, "loss": 0.4033, "lr": 1.0955009572946992e-06, "epoch": 0.5226938096152615, "percentage": 52.27, "elapsed_time": "0:31:24", "remaining_time": "0:28:41", "throughput": 13648.62, "total_tokens": 25727616}
|
|
{"current_steps": 8170, "total_steps": 15621, "loss": 0.4149, "lr": 1.094388636742604e-06, "epoch": 0.5230138915562383, "percentage": 52.3, "elapsed_time": "0:31:25", "remaining_time": "0:28:39", "throughput": 13652.89, "total_tokens": 25744384}
|
|
{"current_steps": 8175, "total_steps": 15621, "loss": 0.3376, "lr": 1.0932761983451878e-06, "epoch": 0.5233339734972153, "percentage": 52.33, "elapsed_time": "0:31:26", "remaining_time": "0:28:38", "throughput": 13656.91, "total_tokens": 25760640}
|
|
{"current_steps": 8180, "total_steps": 15621, "loss": 0.3116, "lr": 1.0921636434913425e-06, "epoch": 0.5236540554381922, "percentage": 52.37, "elapsed_time": "0:31:26", "remaining_time": "0:28:36", "throughput": 13660.89, "total_tokens": 25776640}
|
|
{"current_steps": 8185, "total_steps": 15621, "loss": 0.2977, "lr": 1.091050973570106e-06, "epoch": 0.5239741373791691, "percentage": 52.4, "elapsed_time": "0:31:27", "remaining_time": "0:28:34", "throughput": 13664.59, "total_tokens": 25791744}
|
|
{"current_steps": 8190, "total_steps": 15621, "loss": 0.5531, "lr": 1.08993818997066e-06, "epoch": 0.524294219320146, "percentage": 52.43, "elapsed_time": "0:31:28", "remaining_time": "0:28:33", "throughput": 13668.74, "total_tokens": 25808256}
|
|
{"current_steps": 8195, "total_steps": 15621, "loss": 0.4378, "lr": 1.0888252940823283e-06, "epoch": 0.5246143012611229, "percentage": 52.46, "elapsed_time": "0:31:28", "remaining_time": "0:28:31", "throughput": 13672.65, "total_tokens": 25824128}
|
|
{"current_steps": 8200, "total_steps": 15621, "loss": 0.4676, "lr": 1.0877122872945737e-06, "epoch": 0.5249343832020997, "percentage": 52.49, "elapsed_time": "0:31:29", "remaining_time": "0:28:29", "throughput": 13676.81, "total_tokens": 25840576}
|
|
{"current_steps": 8205, "total_steps": 15621, "loss": 0.317, "lr": 1.0865991709969983e-06, "epoch": 0.5252544651430766, "percentage": 52.53, "elapsed_time": "0:31:29", "remaining_time": "0:28:28", "throughput": 13680.67, "total_tokens": 25856256}
|
|
{"current_steps": 8210, "total_steps": 15621, "loss": 0.4482, "lr": 1.0854859465793416e-06, "epoch": 0.5255745470840535, "percentage": 52.56, "elapsed_time": "0:31:30", "remaining_time": "0:28:26", "throughput": 13684.4, "total_tokens": 25871424}
|
|
{"current_steps": 8215, "total_steps": 15621, "loss": 0.4974, "lr": 1.0843726154314767e-06, "epoch": 0.5258946290250304, "percentage": 52.59, "elapsed_time": "0:31:31", "remaining_time": "0:28:24", "throughput": 13688.01, "total_tokens": 25886272}
|
|
{"current_steps": 8220, "total_steps": 15621, "loss": 0.4376, "lr": 1.083259178943411e-06, "epoch": 0.5262147109660072, "percentage": 52.62, "elapsed_time": "0:31:31", "remaining_time": "0:28:23", "throughput": 13691.88, "total_tokens": 25901952}
|
|
{"current_steps": 8225, "total_steps": 15621, "loss": 0.3694, "lr": 1.0821456385052822e-06, "epoch": 0.5265347929069842, "percentage": 52.65, "elapsed_time": "0:31:32", "remaining_time": "0:28:21", "throughput": 13695.77, "total_tokens": 25917888}
|
|
{"current_steps": 8230, "total_steps": 15621, "loss": 0.4199, "lr": 1.0810319955073598e-06, "epoch": 0.5268548748479611, "percentage": 52.69, "elapsed_time": "0:31:33", "remaining_time": "0:28:20", "throughput": 13699.69, "total_tokens": 25933824}
|
|
{"current_steps": 8235, "total_steps": 15621, "loss": 0.3888, "lr": 1.0799182513400393e-06, "epoch": 0.527174956788938, "percentage": 52.72, "elapsed_time": "0:31:33", "remaining_time": "0:28:18", "throughput": 13704.11, "total_tokens": 25951360}
|
|
{"current_steps": 8240, "total_steps": 15621, "loss": 0.3594, "lr": 1.0788044073938438e-06, "epoch": 0.5274950387299149, "percentage": 52.75, "elapsed_time": "0:31:34", "remaining_time": "0:28:16", "throughput": 13708.04, "total_tokens": 25967232}
|
|
{"current_steps": 8245, "total_steps": 15621, "loss": 0.4146, "lr": 1.0776904650594205e-06, "epoch": 0.5278151206708918, "percentage": 52.78, "elapsed_time": "0:31:34", "remaining_time": "0:28:15", "throughput": 13711.79, "total_tokens": 25982592}
|
|
{"current_steps": 8250, "total_steps": 15621, "loss": 0.4094, "lr": 1.0765764257275394e-06, "epoch": 0.5281352026118686, "percentage": 52.81, "elapsed_time": "0:31:35", "remaining_time": "0:28:13", "throughput": 13715.51, "total_tokens": 25997824}
|
|
{"current_steps": 8255, "total_steps": 15621, "loss": 0.4292, "lr": 1.0754622907890914e-06, "epoch": 0.5284552845528455, "percentage": 52.85, "elapsed_time": "0:31:36", "remaining_time": "0:28:11", "throughput": 13719.42, "total_tokens": 26013632}
|
|
{"current_steps": 8260, "total_steps": 15621, "loss": 0.3249, "lr": 1.0743480616350873e-06, "epoch": 0.5287753664938224, "percentage": 52.88, "elapsed_time": "0:31:36", "remaining_time": "0:28:10", "throughput": 13723.07, "total_tokens": 26028800}
|
|
{"current_steps": 8265, "total_steps": 15621, "loss": 0.339, "lr": 1.0732337396566558e-06, "epoch": 0.5290954484347993, "percentage": 52.91, "elapsed_time": "0:31:37", "remaining_time": "0:28:08", "throughput": 13726.92, "total_tokens": 26044672}
|
|
{"current_steps": 8270, "total_steps": 15621, "loss": 0.396, "lr": 1.07211932624504e-06, "epoch": 0.5294155303757762, "percentage": 52.94, "elapsed_time": "0:31:37", "remaining_time": "0:28:07", "throughput": 13730.81, "total_tokens": 26060544}
|
|
{"current_steps": 8275, "total_steps": 15621, "loss": 0.3786, "lr": 1.0710048227915988e-06, "epoch": 0.529735612316753, "percentage": 52.97, "elapsed_time": "0:31:38", "remaining_time": "0:28:05", "throughput": 13734.62, "total_tokens": 26076160}
|
|
{"current_steps": 8280, "total_steps": 15621, "loss": 0.4186, "lr": 1.0698902306878024e-06, "epoch": 0.53005569425773, "percentage": 53.01, "elapsed_time": "0:31:39", "remaining_time": "0:28:03", "throughput": 13738.64, "total_tokens": 26092352}
|
|
{"current_steps": 8285, "total_steps": 15621, "loss": 0.3024, "lr": 1.0687755513252325e-06, "epoch": 0.5303757761987069, "percentage": 53.04, "elapsed_time": "0:31:39", "remaining_time": "0:28:02", "throughput": 13742.33, "total_tokens": 26107776}
|
|
{"current_steps": 8290, "total_steps": 15621, "loss": 0.31, "lr": 1.0676607860955794e-06, "epoch": 0.5306958581396838, "percentage": 53.07, "elapsed_time": "0:31:40", "remaining_time": "0:28:00", "throughput": 13746.26, "total_tokens": 26123712}
|
|
{"current_steps": 8295, "total_steps": 15621, "loss": 0.386, "lr": 1.0665459363906404e-06, "epoch": 0.5310159400806607, "percentage": 53.1, "elapsed_time": "0:31:41", "remaining_time": "0:27:58", "throughput": 13750.05, "total_tokens": 26139200}
|
|
{"current_steps": 8300, "total_steps": 15621, "loss": 0.4355, "lr": 1.0654310036023185e-06, "epoch": 0.5313360220216375, "percentage": 53.13, "elapsed_time": "0:31:41", "remaining_time": "0:27:57", "throughput": 13753.47, "total_tokens": 26153600}
|
|
{"current_steps": 8305, "total_steps": 15621, "loss": 0.4206, "lr": 1.0643159891226203e-06, "epoch": 0.5316561039626144, "percentage": 53.17, "elapsed_time": "0:31:42", "remaining_time": "0:27:55", "throughput": 13757.4, "total_tokens": 26169600}
|
|
{"current_steps": 8310, "total_steps": 15621, "loss": 0.3398, "lr": 1.0632008943436545e-06, "epoch": 0.5319761859035913, "percentage": 53.2, "elapsed_time": "0:31:42", "remaining_time": "0:27:54", "throughput": 13761.38, "total_tokens": 26185536}
|
|
{"current_steps": 8315, "total_steps": 15621, "loss": 0.453, "lr": 1.0620857206576299e-06, "epoch": 0.5322962678445682, "percentage": 53.23, "elapsed_time": "0:31:43", "remaining_time": "0:27:52", "throughput": 13765.27, "total_tokens": 26201536}
|
|
{"current_steps": 8320, "total_steps": 15621, "loss": 0.2888, "lr": 1.0609704694568546e-06, "epoch": 0.5326163497855451, "percentage": 53.26, "elapsed_time": "0:31:44", "remaining_time": "0:27:50", "throughput": 13768.87, "total_tokens": 26216576}
|
|
{"current_steps": 8325, "total_steps": 15621, "loss": 0.2904, "lr": 1.0598551421337318e-06, "epoch": 0.5329364317265219, "percentage": 53.29, "elapsed_time": "0:31:44", "remaining_time": "0:27:49", "throughput": 13772.79, "total_tokens": 26232640}
|
|
{"current_steps": 8330, "total_steps": 15621, "loss": 0.5146, "lr": 1.0587397400807617e-06, "epoch": 0.5332565136674989, "percentage": 53.33, "elapsed_time": "0:31:45", "remaining_time": "0:27:47", "throughput": 13776.61, "total_tokens": 26248448}
|
|
{"current_steps": 8335, "total_steps": 15621, "loss": 0.519, "lr": 1.057624264690536e-06, "epoch": 0.5335765956084758, "percentage": 53.36, "elapsed_time": "0:31:45", "remaining_time": "0:27:46", "throughput": 13780.32, "total_tokens": 26263872}
|
|
{"current_steps": 8340, "total_steps": 15621, "loss": 0.4598, "lr": 1.0565087173557394e-06, "epoch": 0.5338966775494527, "percentage": 53.39, "elapsed_time": "0:31:46", "remaining_time": "0:27:44", "throughput": 13784.24, "total_tokens": 26279872}
|
|
{"current_steps": 8345, "total_steps": 15621, "loss": 0.3428, "lr": 1.055393099469146e-06, "epoch": 0.5342167594904296, "percentage": 53.42, "elapsed_time": "0:31:47", "remaining_time": "0:27:42", "throughput": 13788.15, "total_tokens": 26295680}
|
|
{"current_steps": 8350, "total_steps": 15621, "loss": 0.4057, "lr": 1.054277412423617e-06, "epoch": 0.5345368414314065, "percentage": 53.45, "elapsed_time": "0:31:47", "remaining_time": "0:27:41", "throughput": 13791.84, "total_tokens": 26311040}
|
|
{"current_steps": 8355, "total_steps": 15621, "loss": 0.4603, "lr": 1.0531616576121017e-06, "epoch": 0.5348569233723833, "percentage": 53.49, "elapsed_time": "0:31:48", "remaining_time": "0:27:39", "throughput": 13795.5, "total_tokens": 26326144}
|
|
{"current_steps": 8360, "total_steps": 15621, "loss": 0.3347, "lr": 1.0520458364276325e-06, "epoch": 0.5351770053133602, "percentage": 53.52, "elapsed_time": "0:31:48", "remaining_time": "0:27:37", "throughput": 13799.29, "total_tokens": 26341952}
|
|
{"current_steps": 8365, "total_steps": 15621, "loss": 0.3565, "lr": 1.0509299502633256e-06, "epoch": 0.5354970872543371, "percentage": 53.55, "elapsed_time": "0:31:49", "remaining_time": "0:27:36", "throughput": 13802.77, "total_tokens": 26356672}
|
|
{"current_steps": 8370, "total_steps": 15621, "loss": 0.4493, "lr": 1.0498140005123777e-06, "epoch": 0.535817169195314, "percentage": 53.58, "elapsed_time": "0:31:50", "remaining_time": "0:27:34", "throughput": 13806.74, "total_tokens": 26373056}
|
|
{"current_steps": 8375, "total_steps": 15621, "loss": 0.426, "lr": 1.0486979885680653e-06, "epoch": 0.5361372511362908, "percentage": 53.61, "elapsed_time": "0:31:50", "remaining_time": "0:27:33", "throughput": 13810.29, "total_tokens": 26388032}
|
|
{"current_steps": 8380, "total_steps": 15621, "loss": 0.4115, "lr": 1.0475819158237424e-06, "epoch": 0.5364573330772677, "percentage": 53.65, "elapsed_time": "0:31:51", "remaining_time": "0:27:31", "throughput": 13813.81, "total_tokens": 26402880}
|
|
{"current_steps": 8385, "total_steps": 15621, "loss": 0.4713, "lr": 1.0464657836728389e-06, "epoch": 0.5367774150182447, "percentage": 53.68, "elapsed_time": "0:31:51", "remaining_time": "0:27:29", "throughput": 13817.8, "total_tokens": 26419328}
|
|
{"current_steps": 8390, "total_steps": 15621, "loss": 0.3981, "lr": 1.045349593508859e-06, "epoch": 0.5370974969592216, "percentage": 53.71, "elapsed_time": "0:31:52", "remaining_time": "0:27:28", "throughput": 13821.33, "total_tokens": 26434112}
|
|
{"current_steps": 8395, "total_steps": 15621, "loss": 0.297, "lr": 1.0442333467253788e-06, "epoch": 0.5374175789001985, "percentage": 53.74, "elapsed_time": "0:31:53", "remaining_time": "0:27:26", "throughput": 13825.38, "total_tokens": 26450688}
|
|
{"current_steps": 8400, "total_steps": 15621, "loss": 0.3602, "lr": 1.0431170447160463e-06, "epoch": 0.5377376608411754, "percentage": 53.77, "elapsed_time": "0:31:53", "remaining_time": "0:27:25", "throughput": 13829.13, "total_tokens": 26466368}
|
|
{"current_steps": 8405, "total_steps": 15621, "loss": 0.3495, "lr": 1.0420006888745767e-06, "epoch": 0.5380577427821522, "percentage": 53.81, "elapsed_time": "0:31:54", "remaining_time": "0:27:23", "throughput": 13833.07, "total_tokens": 26482624}
|
|
{"current_steps": 8410, "total_steps": 15621, "loss": 0.3668, "lr": 1.0408842805947543e-06, "epoch": 0.5383778247231291, "percentage": 53.84, "elapsed_time": "0:31:55", "remaining_time": "0:27:22", "throughput": 13837.1, "total_tokens": 26499200}
|
|
{"current_steps": 8415, "total_steps": 15621, "loss": 0.5119, "lr": 1.0397678212704276e-06, "epoch": 0.538697906664106, "percentage": 53.87, "elapsed_time": "0:31:55", "remaining_time": "0:27:20", "throughput": 13840.59, "total_tokens": 26514048}
|
|
{"current_steps": 8420, "total_steps": 15621, "loss": 0.4034, "lr": 1.038651312295509e-06, "epoch": 0.5390179886050829, "percentage": 53.9, "elapsed_time": "0:31:56", "remaining_time": "0:27:18", "throughput": 13844.2, "total_tokens": 26529216}
|
|
{"current_steps": 8425, "total_steps": 15621, "loss": 0.4192, "lr": 1.037534755063973e-06, "epoch": 0.5393380705460598, "percentage": 53.93, "elapsed_time": "0:31:56", "remaining_time": "0:27:17", "throughput": 13848.04, "total_tokens": 26545152}
|
|
{"current_steps": 8430, "total_steps": 15621, "loss": 0.4147, "lr": 1.0364181509698548e-06, "epoch": 0.5396581524870366, "percentage": 53.97, "elapsed_time": "0:31:57", "remaining_time": "0:27:15", "throughput": 13851.7, "total_tokens": 26560512}
|
|
{"current_steps": 8435, "total_steps": 15621, "loss": 0.35, "lr": 1.0353015014072476e-06, "epoch": 0.5399782344280136, "percentage": 54.0, "elapsed_time": "0:31:58", "remaining_time": "0:27:14", "throughput": 13855.23, "total_tokens": 26575488}
|
|
{"current_steps": 8440, "total_steps": 15621, "loss": 0.405, "lr": 1.0341848077703013e-06, "epoch": 0.5402983163689905, "percentage": 54.03, "elapsed_time": "0:31:58", "remaining_time": "0:27:12", "throughput": 13858.99, "total_tokens": 26591040}
|
|
{"current_steps": 8445, "total_steps": 15621, "loss": 0.3229, "lr": 1.033068071453221e-06, "epoch": 0.5406183983099674, "percentage": 54.06, "elapsed_time": "0:31:59", "remaining_time": "0:27:10", "throughput": 13862.87, "total_tokens": 26606976}
|
|
{"current_steps": 8450, "total_steps": 15621, "loss": 0.3623, "lr": 1.0319512938502653e-06, "epoch": 0.5409384802509443, "percentage": 54.09, "elapsed_time": "0:31:59", "remaining_time": "0:27:09", "throughput": 13866.85, "total_tokens": 26623296}
|
|
{"current_steps": 8455, "total_steps": 15621, "loss": 0.3123, "lr": 1.0308344763557444e-06, "epoch": 0.5412585621919211, "percentage": 54.13, "elapsed_time": "0:32:00", "remaining_time": "0:27:07", "throughput": 13870.41, "total_tokens": 26638336}
|
|
{"current_steps": 8460, "total_steps": 15621, "loss": 0.2841, "lr": 1.0297176203640175e-06, "epoch": 0.541578644132898, "percentage": 54.16, "elapsed_time": "0:32:01", "remaining_time": "0:27:06", "throughput": 13874.35, "total_tokens": 26654400}
|
|
{"current_steps": 8465, "total_steps": 15621, "loss": 0.3482, "lr": 1.0286007272694924e-06, "epoch": 0.5418987260738749, "percentage": 54.19, "elapsed_time": "0:32:01", "remaining_time": "0:27:04", "throughput": 13877.93, "total_tokens": 26669568}
|
|
{"current_steps": 8470, "total_steps": 15621, "loss": 0.4695, "lr": 1.0274837984666239e-06, "epoch": 0.5422188080148518, "percentage": 54.22, "elapsed_time": "0:32:02", "remaining_time": "0:27:02", "throughput": 13881.89, "total_tokens": 26686016}
|
|
{"current_steps": 8475, "total_steps": 15621, "loss": 0.4184, "lr": 1.02636683534991e-06, "epoch": 0.5425388899558287, "percentage": 54.25, "elapsed_time": "0:32:02", "remaining_time": "0:27:01", "throughput": 13885.57, "total_tokens": 26701504}
|
|
{"current_steps": 8480, "total_steps": 15621, "loss": 0.5884, "lr": 1.0252498393138928e-06, "epoch": 0.5428589718968055, "percentage": 54.29, "elapsed_time": "0:32:03", "remaining_time": "0:26:59", "throughput": 13889.3, "total_tokens": 26717120}
|
|
{"current_steps": 8485, "total_steps": 15621, "loss": 0.4193, "lr": 1.0241328117531546e-06, "epoch": 0.5431790538377824, "percentage": 54.32, "elapsed_time": "0:32:04", "remaining_time": "0:26:58", "throughput": 13893.03, "total_tokens": 26732736}
|
|
{"current_steps": 8490, "total_steps": 15621, "loss": 0.4126, "lr": 1.0230157540623174e-06, "epoch": 0.5434991357787594, "percentage": 54.35, "elapsed_time": "0:32:04", "remaining_time": "0:26:56", "throughput": 13896.4, "total_tokens": 26747392}
|
|
{"current_steps": 8495, "total_steps": 15621, "loss": 0.4462, "lr": 1.0218986676360415e-06, "epoch": 0.5438192177197363, "percentage": 54.38, "elapsed_time": "0:32:05", "remaining_time": "0:26:55", "throughput": 13899.83, "total_tokens": 26762112}
|
|
{"current_steps": 8500, "total_steps": 15621, "loss": 0.3709, "lr": 1.0207815538690216e-06, "epoch": 0.5441392996607132, "percentage": 54.41, "elapsed_time": "0:32:05", "remaining_time": "0:26:53", "throughput": 13903.55, "total_tokens": 26777856}
|
|
{"current_steps": 8505, "total_steps": 15621, "loss": 0.3055, "lr": 1.0196644141559877e-06, "epoch": 0.54445938160169, "percentage": 54.45, "elapsed_time": "0:32:06", "remaining_time": "0:26:51", "throughput": 13907.44, "total_tokens": 26794048}
|
|
{"current_steps": 8510, "total_steps": 15621, "loss": 0.3509, "lr": 1.0185472498917021e-06, "epoch": 0.5447794635426669, "percentage": 54.48, "elapsed_time": "0:32:07", "remaining_time": "0:26:50", "throughput": 13911.21, "total_tokens": 26809792}
|
|
{"current_steps": 8515, "total_steps": 15621, "loss": 0.4421, "lr": 1.017430062470957e-06, "epoch": 0.5450995454836438, "percentage": 54.51, "elapsed_time": "0:32:07", "remaining_time": "0:26:48", "throughput": 13914.8, "total_tokens": 26825024}
|
|
{"current_steps": 8520, "total_steps": 15621, "loss": 0.3472, "lr": 1.016312853288574e-06, "epoch": 0.5454196274246207, "percentage": 54.54, "elapsed_time": "0:32:08", "remaining_time": "0:26:47", "throughput": 13918.85, "total_tokens": 26841536}
|
|
{"current_steps": 8525, "total_steps": 15621, "loss": 0.395, "lr": 1.0151956237394027e-06, "epoch": 0.5457397093655976, "percentage": 54.57, "elapsed_time": "0:32:09", "remaining_time": "0:26:45", "throughput": 13922.69, "total_tokens": 26857600}
|
|
{"current_steps": 8530, "total_steps": 15621, "loss": 0.3942, "lr": 1.0140783752183164e-06, "epoch": 0.5460597913065744, "percentage": 54.61, "elapsed_time": "0:32:09", "remaining_time": "0:26:44", "throughput": 13926.76, "total_tokens": 26874176}
|
|
{"current_steps": 8535, "total_steps": 15621, "loss": 0.4162, "lr": 1.0129611091202138e-06, "epoch": 0.5463798732475513, "percentage": 54.64, "elapsed_time": "0:32:10", "remaining_time": "0:26:42", "throughput": 13930.61, "total_tokens": 26890176}
|
|
{"current_steps": 8540, "total_steps": 15621, "loss": 0.2897, "lr": 1.0118438268400135e-06, "epoch": 0.5466999551885282, "percentage": 54.67, "elapsed_time": "0:32:10", "remaining_time": "0:26:41", "throughput": 13934.23, "total_tokens": 26905728}
|
|
{"current_steps": 8545, "total_steps": 15621, "loss": 0.4655, "lr": 1.0107265297726568e-06, "epoch": 0.5470200371295052, "percentage": 54.7, "elapsed_time": "0:32:11", "remaining_time": "0:26:39", "throughput": 13937.85, "total_tokens": 26921280}
|
|
{"current_steps": 8550, "total_steps": 15621, "loss": 0.4065, "lr": 1.009609219313102e-06, "epoch": 0.5473401190704821, "percentage": 54.73, "elapsed_time": "0:32:12", "remaining_time": "0:26:37", "throughput": 13941.45, "total_tokens": 26936704}
|
|
{"current_steps": 8555, "total_steps": 15621, "loss": 0.4008, "lr": 1.0084918968563236e-06, "epoch": 0.547660201011459, "percentage": 54.77, "elapsed_time": "0:32:12", "remaining_time": "0:26:36", "throughput": 13945.2, "total_tokens": 26952448}
|
|
{"current_steps": 8560, "total_steps": 15621, "loss": 0.3928, "lr": 1.0073745637973124e-06, "epoch": 0.5479802829524358, "percentage": 54.8, "elapsed_time": "0:32:13", "remaining_time": "0:26:34", "throughput": 13948.76, "total_tokens": 26967680}
|
|
{"current_steps": 8565, "total_steps": 15621, "loss": 0.3489, "lr": 1.0062572215310718e-06, "epoch": 0.5483003648934127, "percentage": 54.83, "elapsed_time": "0:32:13", "remaining_time": "0:26:33", "throughput": 13952.14, "total_tokens": 26982400}
|
|
{"current_steps": 8570, "total_steps": 15621, "loss": 0.313, "lr": 1.0051398714526165e-06, "epoch": 0.5486204468343896, "percentage": 54.86, "elapsed_time": "0:32:14", "remaining_time": "0:26:31", "throughput": 13955.96, "total_tokens": 26998400}
|
|
{"current_steps": 8575, "total_steps": 15621, "loss": 0.3506, "lr": 1.0040225149569712e-06, "epoch": 0.5489405287753665, "percentage": 54.89, "elapsed_time": "0:32:15", "remaining_time": "0:26:30", "throughput": 13960.21, "total_tokens": 27015936}
|
|
{"current_steps": 8580, "total_steps": 15621, "loss": 0.3263, "lr": 1.0029051534391693e-06, "epoch": 0.5492606107163434, "percentage": 54.93, "elapsed_time": "0:32:15", "remaining_time": "0:26:28", "throughput": 13963.6, "total_tokens": 27030528}
|
|
{"current_steps": 8585, "total_steps": 15621, "loss": 0.3621, "lr": 1.001787788294249e-06, "epoch": 0.5495806926573202, "percentage": 54.96, "elapsed_time": "0:32:16", "remaining_time": "0:26:27", "throughput": 13967.25, "total_tokens": 27046080}
|
|
{"current_steps": 8590, "total_steps": 15621, "loss": 0.4206, "lr": 1.0006704209172537e-06, "epoch": 0.5499007745982971, "percentage": 54.99, "elapsed_time": "0:32:16", "remaining_time": "0:26:25", "throughput": 13970.85, "total_tokens": 27061504}
|
|
{"current_steps": 8595, "total_steps": 15621, "loss": 0.4297, "lr": 9.995530527032301e-07, "epoch": 0.5502208565392741, "percentage": 55.02, "elapsed_time": "0:32:17", "remaining_time": "0:26:23", "throughput": 13974.49, "total_tokens": 27077056}
|
|
{"current_steps": 8600, "total_steps": 15621, "loss": 0.3382, "lr": 9.984356850472257e-07, "epoch": 0.550540938480251, "percentage": 55.05, "elapsed_time": "0:32:18", "remaining_time": "0:26:22", "throughput": 13978.95, "total_tokens": 27095168}
|
|
{"current_steps": 8602, "total_steps": 15621, "eval_loss": 0.3985471725463867, "epoch": 0.5506689712566417, "percentage": 55.07, "elapsed_time": "0:33:07", "remaining_time": "0:27:01", "throughput": 13634.54, "total_tokens": 27101056}
|
|
{"current_steps": 8605, "total_steps": 15621, "loss": 0.3698, "lr": 9.97318319344287e-07, "epoch": 0.5508610204212279, "percentage": 55.09, "elapsed_time": "0:33:29", "remaining_time": "0:27:18", "throughput": 13487.95, "total_tokens": 27110144}
|
|
{"current_steps": 8610, "total_steps": 15621, "loss": 0.5311, "lr": 9.962009569894577e-07, "epoch": 0.5511811023622047, "percentage": 55.12, "elapsed_time": "0:33:30", "remaining_time": "0:27:17", "throughput": 13491.37, "total_tokens": 27124864}
|
|
{"current_steps": 8615, "total_steps": 15621, "loss": 0.3769, "lr": 9.95083599377778e-07, "epoch": 0.5515011843031816, "percentage": 55.15, "elapsed_time": "0:33:31", "remaining_time": "0:27:15", "throughput": 13494.93, "total_tokens": 27140160}
|
|
{"current_steps": 8620, "total_steps": 15621, "loss": 0.374, "lr": 9.939662479042828e-07, "epoch": 0.5518212662441585, "percentage": 55.18, "elapsed_time": "0:33:31", "remaining_time": "0:27:13", "throughput": 13498.55, "total_tokens": 27155712}
|
|
{"current_steps": 8625, "total_steps": 15621, "loss": 0.4573, "lr": 9.92848903963998e-07, "epoch": 0.5521413481851354, "percentage": 55.21, "elapsed_time": "0:33:32", "remaining_time": "0:27:12", "throughput": 13502.33, "total_tokens": 27171520}
|
|
{"current_steps": 8630, "total_steps": 15621, "loss": 0.4487, "lr": 9.9173156895194e-07, "epoch": 0.5524614301261123, "percentage": 55.25, "elapsed_time": "0:33:32", "remaining_time": "0:27:10", "throughput": 13505.9, "total_tokens": 27186752}
|
|
{"current_steps": 8635, "total_steps": 15621, "loss": 0.3823, "lr": 9.906142442631154e-07, "epoch": 0.5527815120670891, "percentage": 55.28, "elapsed_time": "0:33:33", "remaining_time": "0:27:09", "throughput": 13509.37, "total_tokens": 27201664}
|
|
{"current_steps": 8640, "total_steps": 15621, "loss": 0.3804, "lr": 9.894969312925171e-07, "epoch": 0.553101594008066, "percentage": 55.31, "elapsed_time": "0:33:34", "remaining_time": "0:27:07", "throughput": 13513.51, "total_tokens": 27218880}
|
|
{"current_steps": 8645, "total_steps": 15621, "loss": 0.3448, "lr": 9.883796314351234e-07, "epoch": 0.5534216759490429, "percentage": 55.34, "elapsed_time": "0:33:34", "remaining_time": "0:27:05", "throughput": 13517.51, "total_tokens": 27235648}
|
|
{"current_steps": 8650, "total_steps": 15621, "loss": 0.3997, "lr": 9.872623460858966e-07, "epoch": 0.5537417578900199, "percentage": 55.37, "elapsed_time": "0:33:35", "remaining_time": "0:27:04", "throughput": 13521.04, "total_tokens": 27250880}
|
|
{"current_steps": 8655, "total_steps": 15621, "loss": 0.3163, "lr": 9.861450766397799e-07, "epoch": 0.5540618398309968, "percentage": 55.41, "elapsed_time": "0:33:36", "remaining_time": "0:27:02", "throughput": 13524.81, "total_tokens": 27266880}
|
|
{"current_steps": 8660, "total_steps": 15621, "loss": 0.411, "lr": 9.850278244916976e-07, "epoch": 0.5543819217719737, "percentage": 55.44, "elapsed_time": "0:33:36", "remaining_time": "0:27:01", "throughput": 13528.56, "total_tokens": 27282816}
|
|
{"current_steps": 8665, "total_steps": 15621, "loss": 0.4309, "lr": 9.839105910365524e-07, "epoch": 0.5547020037129505, "percentage": 55.47, "elapsed_time": "0:33:37", "remaining_time": "0:26:59", "throughput": 13532.22, "total_tokens": 27298496}
|
|
{"current_steps": 8670, "total_steps": 15621, "loss": 0.331, "lr": 9.827933776692235e-07, "epoch": 0.5550220856539274, "percentage": 55.5, "elapsed_time": "0:33:37", "remaining_time": "0:26:57", "throughput": 13535.79, "total_tokens": 27313856}
|
|
{"current_steps": 8675, "total_steps": 15621, "loss": 0.34, "lr": 9.81676185784564e-07, "epoch": 0.5553421675949043, "percentage": 55.53, "elapsed_time": "0:33:38", "remaining_time": "0:26:56", "throughput": 13539.17, "total_tokens": 27328448}
|
|
{"current_steps": 8680, "total_steps": 15621, "loss": 0.3916, "lr": 9.805590167774021e-07, "epoch": 0.5556622495358812, "percentage": 55.57, "elapsed_time": "0:33:39", "remaining_time": "0:26:54", "throughput": 13542.76, "total_tokens": 27343872}
|
|
{"current_steps": 8685, "total_steps": 15621, "loss": 0.5632, "lr": 9.79441872042536e-07, "epoch": 0.555982331476858, "percentage": 55.6, "elapsed_time": "0:33:39", "remaining_time": "0:26:52", "throughput": 13546.19, "total_tokens": 27358720}
|
|
{"current_steps": 8690, "total_steps": 15621, "loss": 0.3856, "lr": 9.783247529747338e-07, "epoch": 0.5563024134178349, "percentage": 55.63, "elapsed_time": "0:33:40", "remaining_time": "0:26:51", "throughput": 13549.5, "total_tokens": 27373312}
|
|
{"current_steps": 8695, "total_steps": 15621, "loss": 0.3571, "lr": 9.772076609687323e-07, "epoch": 0.5566224953588118, "percentage": 55.66, "elapsed_time": "0:33:40", "remaining_time": "0:26:49", "throughput": 13553.01, "total_tokens": 27388544}
|
|
{"current_steps": 8700, "total_steps": 15621, "loss": 0.3259, "lr": 9.760905974192334e-07, "epoch": 0.5569425772997888, "percentage": 55.69, "elapsed_time": "0:33:41", "remaining_time": "0:26:48", "throughput": 13556.93, "total_tokens": 27405120}
|
|
{"current_steps": 8705, "total_steps": 15621, "loss": 0.4078, "lr": 9.749735637209044e-07, "epoch": 0.5572626592407657, "percentage": 55.73, "elapsed_time": "0:33:42", "remaining_time": "0:26:46", "throughput": 13560.48, "total_tokens": 27420544}
|
|
{"current_steps": 8710, "total_steps": 15621, "loss": 0.3137, "lr": 9.738565612683754e-07, "epoch": 0.5575827411817426, "percentage": 55.76, "elapsed_time": "0:33:42", "remaining_time": "0:26:44", "throughput": 13563.95, "total_tokens": 27435456}
|
|
{"current_steps": 8715, "total_steps": 15621, "loss": 0.3477, "lr": 9.727395914562363e-07, "epoch": 0.5579028231227194, "percentage": 55.79, "elapsed_time": "0:33:43", "remaining_time": "0:26:43", "throughput": 13567.85, "total_tokens": 27452032}
|
|
{"current_steps": 8720, "total_steps": 15621, "loss": 0.4159, "lr": 9.716226556790372e-07, "epoch": 0.5582229050636963, "percentage": 55.82, "elapsed_time": "0:33:43", "remaining_time": "0:26:41", "throughput": 13571.48, "total_tokens": 27467520}
|
|
{"current_steps": 8725, "total_steps": 15621, "loss": 0.312, "lr": 9.705057553312855e-07, "epoch": 0.5585429870046732, "percentage": 55.85, "elapsed_time": "0:33:44", "remaining_time": "0:26:40", "throughput": 13575.01, "total_tokens": 27482816}
|
|
{"current_steps": 8730, "total_steps": 15621, "loss": 0.374, "lr": 9.693888918074452e-07, "epoch": 0.5588630689456501, "percentage": 55.89, "elapsed_time": "0:33:45", "remaining_time": "0:26:38", "throughput": 13578.39, "total_tokens": 27497600}
|
|
{"current_steps": 8735, "total_steps": 15621, "loss": 0.4861, "lr": 9.682720665019325e-07, "epoch": 0.559183150886627, "percentage": 55.92, "elapsed_time": "0:33:45", "remaining_time": "0:26:36", "throughput": 13582.02, "total_tokens": 27513344}
|
|
{"current_steps": 8740, "total_steps": 15621, "loss": 0.4204, "lr": 9.671552808091172e-07, "epoch": 0.5595032328276038, "percentage": 55.95, "elapsed_time": "0:33:46", "remaining_time": "0:26:35", "throughput": 13586.05, "total_tokens": 27530304}
|
|
{"current_steps": 8745, "total_steps": 15621, "loss": 0.3409, "lr": 9.660385361233195e-07, "epoch": 0.5598233147685807, "percentage": 55.98, "elapsed_time": "0:33:46", "remaining_time": "0:26:33", "throughput": 13589.55, "total_tokens": 27545664}
|
|
{"current_steps": 8750, "total_steps": 15621, "loss": 0.2987, "lr": 9.649218338388084e-07, "epoch": 0.5601433967095576, "percentage": 56.01, "elapsed_time": "0:33:47", "remaining_time": "0:26:32", "throughput": 13593.01, "total_tokens": 27560704}
|
|
{"current_steps": 8755, "total_steps": 15621, "loss": 0.4353, "lr": 9.638051753497994e-07, "epoch": 0.5604634786505346, "percentage": 56.05, "elapsed_time": "0:33:48", "remaining_time": "0:26:30", "throughput": 13596.9, "total_tokens": 27577472}
|
|
{"current_steps": 8760, "total_steps": 15621, "loss": 0.3597, "lr": 9.62688562050454e-07, "epoch": 0.5607835605915115, "percentage": 56.08, "elapsed_time": "0:33:48", "remaining_time": "0:26:29", "throughput": 13600.46, "total_tokens": 27592960}
|
|
{"current_steps": 8765, "total_steps": 15621, "loss": 0.4033, "lr": 9.615719953348772e-07, "epoch": 0.5611036425324883, "percentage": 56.11, "elapsed_time": "0:33:49", "remaining_time": "0:26:27", "throughput": 13604.58, "total_tokens": 27610304}
|
|
{"current_steps": 8770, "total_steps": 15621, "loss": 0.5574, "lr": 9.604554765971148e-07, "epoch": 0.5614237244734652, "percentage": 56.14, "elapsed_time": "0:33:50", "remaining_time": "0:26:25", "throughput": 13608.88, "total_tokens": 27628288}
|
|
{"current_steps": 8775, "total_steps": 15621, "loss": 0.4069, "lr": 9.593390072311549e-07, "epoch": 0.5617438064144421, "percentage": 56.17, "elapsed_time": "0:33:50", "remaining_time": "0:26:24", "throughput": 13612.47, "total_tokens": 27643904}
|
|
{"current_steps": 8780, "total_steps": 15621, "loss": 0.3576, "lr": 9.582225886309216e-07, "epoch": 0.562063888355419, "percentage": 56.21, "elapsed_time": "0:33:51", "remaining_time": "0:26:22", "throughput": 13616.25, "total_tokens": 27660224}
|
|
{"current_steps": 8785, "total_steps": 15621, "loss": 0.3015, "lr": 9.571062221902767e-07, "epoch": 0.5623839702963959, "percentage": 56.24, "elapsed_time": "0:33:51", "remaining_time": "0:26:21", "throughput": 13619.67, "total_tokens": 27675136}
|
|
{"current_steps": 8790, "total_steps": 15621, "loss": 0.3485, "lr": 9.559899093030175e-07, "epoch": 0.5627040522373727, "percentage": 56.27, "elapsed_time": "0:33:52", "remaining_time": "0:26:19", "throughput": 13623.08, "total_tokens": 27690176}
|
|
{"current_steps": 8795, "total_steps": 15621, "loss": 0.3061, "lr": 9.54873651362873e-07, "epoch": 0.5630241341783496, "percentage": 56.3, "elapsed_time": "0:33:53", "remaining_time": "0:26:17", "throughput": 13626.27, "total_tokens": 27704512}
|
|
{"current_steps": 8800, "total_steps": 15621, "loss": 0.46, "lr": 9.537574497635043e-07, "epoch": 0.5633442161193265, "percentage": 56.33, "elapsed_time": "0:33:53", "remaining_time": "0:26:16", "throughput": 13629.94, "total_tokens": 27720448}
|
|
{"current_steps": 8805, "total_steps": 15621, "loss": 0.4966, "lr": 9.52641305898503e-07, "epoch": 0.5636642980603035, "percentage": 56.37, "elapsed_time": "0:33:54", "remaining_time": "0:26:14", "throughput": 13633.47, "total_tokens": 27735808}
|
|
{"current_steps": 8810, "total_steps": 15621, "loss": 0.3122, "lr": 9.515252211613873e-07, "epoch": 0.5639843800012804, "percentage": 56.4, "elapsed_time": "0:33:54", "remaining_time": "0:26:13", "throughput": 13636.73, "total_tokens": 27750464}
|
|
{"current_steps": 8815, "total_steps": 15621, "loss": 0.4586, "lr": 9.504091969456021e-07, "epoch": 0.5643044619422573, "percentage": 56.43, "elapsed_time": "0:33:55", "remaining_time": "0:26:11", "throughput": 13639.78, "total_tokens": 27764352}
|
|
{"current_steps": 8820, "total_steps": 15621, "loss": 0.338, "lr": 9.492932346445165e-07, "epoch": 0.5646245438832341, "percentage": 56.46, "elapsed_time": "0:33:56", "remaining_time": "0:26:10", "throughput": 13643.27, "total_tokens": 27779840}
|
|
{"current_steps": 8825, "total_steps": 15621, "loss": 0.27, "lr": 9.48177335651423e-07, "epoch": 0.564944625824211, "percentage": 56.49, "elapsed_time": "0:33:56", "remaining_time": "0:26:08", "throughput": 13647.09, "total_tokens": 27796352}
|
|
{"current_steps": 8830, "total_steps": 15621, "loss": 0.3325, "lr": 9.470615013595346e-07, "epoch": 0.5652647077651879, "percentage": 56.53, "elapsed_time": "0:33:57", "remaining_time": "0:26:06", "throughput": 13650.25, "total_tokens": 27810624}
|
|
{"current_steps": 8835, "total_steps": 15621, "loss": 0.4447, "lr": 9.459457331619829e-07, "epoch": 0.5655847897061648, "percentage": 56.56, "elapsed_time": "0:33:57", "remaining_time": "0:26:05", "throughput": 13653.51, "total_tokens": 27825152}
|
|
{"current_steps": 8840, "total_steps": 15621, "loss": 0.4076, "lr": 9.448300324518182e-07, "epoch": 0.5659048716471416, "percentage": 56.59, "elapsed_time": "0:33:58", "remaining_time": "0:26:03", "throughput": 13657.0, "total_tokens": 27840384}
|
|
{"current_steps": 8845, "total_steps": 15621, "loss": 0.3017, "lr": 9.437144006220058e-07, "epoch": 0.5662249535881185, "percentage": 56.62, "elapsed_time": "0:33:59", "remaining_time": "0:26:02", "throughput": 13660.71, "total_tokens": 27856640}
|
|
{"current_steps": 8850, "total_steps": 15621, "loss": 0.2027, "lr": 9.425988390654249e-07, "epoch": 0.5665450355290954, "percentage": 56.65, "elapsed_time": "0:33:59", "remaining_time": "0:26:00", "throughput": 13664.42, "total_tokens": 27872768}
|
|
{"current_steps": 8855, "total_steps": 15621, "loss": 0.4955, "lr": 9.414833491748677e-07, "epoch": 0.5668651174700723, "percentage": 56.69, "elapsed_time": "0:34:00", "remaining_time": "0:25:59", "throughput": 13667.7, "total_tokens": 27887488}
|
|
{"current_steps": 8860, "total_steps": 15621, "loss": 0.3024, "lr": 9.40367932343036e-07, "epoch": 0.5671851994110493, "percentage": 56.72, "elapsed_time": "0:34:00", "remaining_time": "0:25:57", "throughput": 13671.1, "total_tokens": 27902720}
|
|
{"current_steps": 8865, "total_steps": 15621, "loss": 0.374, "lr": 9.392525899625407e-07, "epoch": 0.5675052813520262, "percentage": 56.75, "elapsed_time": "0:34:01", "remaining_time": "0:25:55", "throughput": 13674.59, "total_tokens": 27918080}
|
|
{"current_steps": 8870, "total_steps": 15621, "loss": 0.4011, "lr": 9.381373234259004e-07, "epoch": 0.567825363293003, "percentage": 56.78, "elapsed_time": "0:34:02", "remaining_time": "0:25:54", "throughput": 13678.23, "total_tokens": 27933760}
|
|
{"current_steps": 8875, "total_steps": 15621, "loss": 0.375, "lr": 9.370221341255382e-07, "epoch": 0.5681454452339799, "percentage": 56.81, "elapsed_time": "0:34:02", "remaining_time": "0:25:52", "throughput": 13681.63, "total_tokens": 27948992}
|
|
{"current_steps": 8880, "total_steps": 15621, "loss": 0.3382, "lr": 9.359070234537807e-07, "epoch": 0.5684655271749568, "percentage": 56.85, "elapsed_time": "0:34:03", "remaining_time": "0:25:51", "throughput": 13685.79, "total_tokens": 27966848}
|
|
{"current_steps": 8885, "total_steps": 15621, "loss": 0.3803, "lr": 9.34791992802857e-07, "epoch": 0.5687856091159337, "percentage": 56.88, "elapsed_time": "0:34:04", "remaining_time": "0:25:49", "throughput": 13689.04, "total_tokens": 27981696}
|
|
{"current_steps": 8890, "total_steps": 15621, "loss": 0.2607, "lr": 9.336770435648963e-07, "epoch": 0.5691056910569106, "percentage": 56.91, "elapsed_time": "0:34:04", "remaining_time": "0:25:48", "throughput": 13692.57, "total_tokens": 27997376}
|
|
{"current_steps": 8895, "total_steps": 15621, "loss": 0.4075, "lr": 9.325621771319246e-07, "epoch": 0.5694257729978874, "percentage": 56.94, "elapsed_time": "0:34:05", "remaining_time": "0:25:46", "throughput": 13696.4, "total_tokens": 28014016}
|
|
{"current_steps": 8900, "total_steps": 15621, "loss": 0.4178, "lr": 9.314473948958673e-07, "epoch": 0.5697458549388643, "percentage": 56.97, "elapsed_time": "0:34:05", "remaining_time": "0:25:45", "throughput": 13700.11, "total_tokens": 28030400}
|
|
{"current_steps": 8905, "total_steps": 15621, "loss": 0.3456, "lr": 9.303326982485422e-07, "epoch": 0.5700659368798412, "percentage": 57.01, "elapsed_time": "0:34:06", "remaining_time": "0:25:43", "throughput": 13703.99, "total_tokens": 28047104}
|
|
{"current_steps": 8910, "total_steps": 15621, "loss": 0.3546, "lr": 9.29218088581661e-07, "epoch": 0.5703860188208181, "percentage": 57.04, "elapsed_time": "0:34:07", "remaining_time": "0:25:41", "throughput": 13707.66, "total_tokens": 28063168}
|
|
{"current_steps": 8915, "total_steps": 15621, "loss": 0.3462, "lr": 9.281035672868278e-07, "epoch": 0.5707061007617951, "percentage": 57.07, "elapsed_time": "0:34:07", "remaining_time": "0:25:40", "throughput": 13711.25, "total_tokens": 28079104}
|
|
{"current_steps": 8920, "total_steps": 15621, "loss": 0.3912, "lr": 9.269891357555348e-07, "epoch": 0.571026182702772, "percentage": 57.1, "elapsed_time": "0:34:08", "remaining_time": "0:25:38", "throughput": 13714.79, "total_tokens": 28094720}
|
|
{"current_steps": 8925, "total_steps": 15621, "loss": 0.2754, "lr": 9.25874795379163e-07, "epoch": 0.5713462646437488, "percentage": 57.13, "elapsed_time": "0:34:09", "remaining_time": "0:25:37", "throughput": 13718.45, "total_tokens": 28110848}
|
|
{"current_steps": 8930, "total_steps": 15621, "loss": 0.4172, "lr": 9.247605475489793e-07, "epoch": 0.5716663465847257, "percentage": 57.17, "elapsed_time": "0:34:09", "remaining_time": "0:25:35", "throughput": 13722.14, "total_tokens": 28127040}
|
|
{"current_steps": 8935, "total_steps": 15621, "loss": 0.3062, "lr": 9.236463936561358e-07, "epoch": 0.5719864285257026, "percentage": 57.2, "elapsed_time": "0:34:10", "remaining_time": "0:25:34", "throughput": 13725.93, "total_tokens": 28143424}
|
|
{"current_steps": 8940, "total_steps": 15621, "loss": 0.5365, "lr": 9.225323350916661e-07, "epoch": 0.5723065104666795, "percentage": 57.23, "elapsed_time": "0:34:10", "remaining_time": "0:25:32", "throughput": 13729.3, "total_tokens": 28158528}
|
|
{"current_steps": 8945, "total_steps": 15621, "loss": 0.3948, "lr": 9.214183732464855e-07, "epoch": 0.5726265924076563, "percentage": 57.26, "elapsed_time": "0:34:11", "remaining_time": "0:25:31", "throughput": 13732.75, "total_tokens": 28173888}
|
|
{"current_steps": 8950, "total_steps": 15621, "loss": 0.3671, "lr": 9.203045095113886e-07, "epoch": 0.5729466743486332, "percentage": 57.29, "elapsed_time": "0:34:12", "remaining_time": "0:25:29", "throughput": 13736.94, "total_tokens": 28191872}
|
|
{"current_steps": 8955, "total_steps": 15621, "loss": 0.4305, "lr": 9.191907452770476e-07, "epoch": 0.5732667562896101, "percentage": 57.33, "elapsed_time": "0:34:12", "remaining_time": "0:25:28", "throughput": 13740.31, "total_tokens": 28206912}
|
|
{"current_steps": 8960, "total_steps": 15621, "loss": 0.4233, "lr": 9.180770819340095e-07, "epoch": 0.573586838230587, "percentage": 57.36, "elapsed_time": "0:34:13", "remaining_time": "0:25:26", "throughput": 13743.8, "total_tokens": 28222336}
|
|
{"current_steps": 8965, "total_steps": 15621, "loss": 0.376, "lr": 9.169635208726967e-07, "epoch": 0.573906920171564, "percentage": 57.39, "elapsed_time": "0:34:14", "remaining_time": "0:25:25", "throughput": 13747.35, "total_tokens": 28238144}
|
|
{"current_steps": 8970, "total_steps": 15621, "loss": 0.3787, "lr": 9.15850063483403e-07, "epoch": 0.5742270021125409, "percentage": 57.42, "elapsed_time": "0:34:14", "remaining_time": "0:25:23", "throughput": 13750.76, "total_tokens": 28253376}
|
|
{"current_steps": 8975, "total_steps": 15621, "loss": 0.3493, "lr": 9.147367111562928e-07, "epoch": 0.5745470840535177, "percentage": 57.45, "elapsed_time": "0:34:15", "remaining_time": "0:25:21", "throughput": 13754.4, "total_tokens": 28269248}
|
|
{"current_steps": 8980, "total_steps": 15621, "loss": 0.4094, "lr": 9.136234652814005e-07, "epoch": 0.5748671659944946, "percentage": 57.49, "elapsed_time": "0:34:15", "remaining_time": "0:25:20", "throughput": 13758.1, "total_tokens": 28285440}
|
|
{"current_steps": 8985, "total_steps": 15621, "loss": 0.2965, "lr": 9.125103272486255e-07, "epoch": 0.5751872479354715, "percentage": 57.52, "elapsed_time": "0:34:16", "remaining_time": "0:25:18", "throughput": 13761.5, "total_tokens": 28300736}
|
|
{"current_steps": 8990, "total_steps": 15621, "loss": 0.361, "lr": 9.11397298447734e-07, "epoch": 0.5755073298764484, "percentage": 57.55, "elapsed_time": "0:34:17", "remaining_time": "0:25:17", "throughput": 13764.84, "total_tokens": 28315712}
|
|
{"current_steps": 8995, "total_steps": 15621, "loss": 0.3287, "lr": 9.10284380268356e-07, "epoch": 0.5758274118174252, "percentage": 57.58, "elapsed_time": "0:34:17", "remaining_time": "0:25:15", "throughput": 13768.52, "total_tokens": 28332032}
|
|
{"current_steps": 9000, "total_steps": 15621, "loss": 0.4476, "lr": 9.091715740999828e-07, "epoch": 0.5761474937584021, "percentage": 57.61, "elapsed_time": "0:34:18", "remaining_time": "0:25:14", "throughput": 13772.1, "total_tokens": 28347968}
|
|
{"current_steps": 9005, "total_steps": 15621, "loss": 0.3849, "lr": 9.080588813319654e-07, "epoch": 0.576467575699379, "percentage": 57.65, "elapsed_time": "0:34:18", "remaining_time": "0:25:12", "throughput": 13775.43, "total_tokens": 28362944}
|
|
{"current_steps": 9010, "total_steps": 15621, "loss": 0.3032, "lr": 9.069463033535143e-07, "epoch": 0.5767876576403559, "percentage": 57.68, "elapsed_time": "0:34:19", "remaining_time": "0:25:11", "throughput": 13778.96, "total_tokens": 28378624}
|
|
{"current_steps": 9015, "total_steps": 15621, "loss": 0.3865, "lr": 9.058338415536962e-07, "epoch": 0.5771077395813328, "percentage": 57.71, "elapsed_time": "0:34:20", "remaining_time": "0:25:09", "throughput": 13782.4, "total_tokens": 28394048}
|
|
{"current_steps": 9020, "total_steps": 15621, "loss": 0.3808, "lr": 9.04721497321432e-07, "epoch": 0.5774278215223098, "percentage": 57.74, "elapsed_time": "0:34:20", "remaining_time": "0:25:08", "throughput": 13785.88, "total_tokens": 28409664}
|
|
{"current_steps": 9025, "total_steps": 15621, "loss": 0.3744, "lr": 9.036092720454977e-07, "epoch": 0.5777479034632866, "percentage": 57.77, "elapsed_time": "0:34:21", "remaining_time": "0:25:06", "throughput": 13789.24, "total_tokens": 28424768}
|
|
{"current_steps": 9030, "total_steps": 15621, "loss": 0.3387, "lr": 9.024971671145189e-07, "epoch": 0.5780679854042635, "percentage": 57.81, "elapsed_time": "0:34:21", "remaining_time": "0:25:05", "throughput": 13792.45, "total_tokens": 28439424}
|
|
{"current_steps": 9035, "total_steps": 15621, "loss": 0.4406, "lr": 9.013851839169718e-07, "epoch": 0.5783880673452404, "percentage": 57.84, "elapsed_time": "0:34:22", "remaining_time": "0:25:03", "throughput": 13796.29, "total_tokens": 28456064}
|
|
{"current_steps": 9040, "total_steps": 15621, "loss": 0.3388, "lr": 9.002733238411801e-07, "epoch": 0.5787081492862173, "percentage": 57.87, "elapsed_time": "0:34:23", "remaining_time": "0:25:02", "throughput": 13800.09, "total_tokens": 28472768}
|
|
{"current_steps": 9045, "total_steps": 15621, "loss": 0.3489, "lr": 8.991615882753147e-07, "epoch": 0.5790282312271942, "percentage": 57.9, "elapsed_time": "0:34:23", "remaining_time": "0:25:00", "throughput": 13803.65, "total_tokens": 28488704}
|
|
{"current_steps": 9050, "total_steps": 15621, "loss": 0.4431, "lr": 8.980499786073904e-07, "epoch": 0.579348313168171, "percentage": 57.93, "elapsed_time": "0:34:24", "remaining_time": "0:24:58", "throughput": 13807.0, "total_tokens": 28503808}
|
|
{"current_steps": 9055, "total_steps": 15621, "loss": 0.4759, "lr": 8.969384962252645e-07, "epoch": 0.5796683951091479, "percentage": 57.97, "elapsed_time": "0:34:25", "remaining_time": "0:24:57", "throughput": 13810.76, "total_tokens": 28520320}
|
|
{"current_steps": 9060, "total_steps": 15621, "loss": 0.4431, "lr": 8.958271425166366e-07, "epoch": 0.5799884770501248, "percentage": 58.0, "elapsed_time": "0:34:25", "remaining_time": "0:24:55", "throughput": 13814.18, "total_tokens": 28535680}
|
|
{"current_steps": 9065, "total_steps": 15621, "loss": 0.396, "lr": 8.947159188690442e-07, "epoch": 0.5803085589911017, "percentage": 58.03, "elapsed_time": "0:34:26", "remaining_time": "0:24:54", "throughput": 13817.67, "total_tokens": 28551488}
|
|
{"current_steps": 9070, "total_steps": 15621, "loss": 0.4786, "lr": 8.93604826669863e-07, "epoch": 0.5806286409320787, "percentage": 58.06, "elapsed_time": "0:34:26", "remaining_time": "0:24:52", "throughput": 13821.14, "total_tokens": 28567040}
|
|
{"current_steps": 9075, "total_steps": 15621, "loss": 0.3986, "lr": 8.924938673063052e-07, "epoch": 0.5809487228730555, "percentage": 58.09, "elapsed_time": "0:34:27", "remaining_time": "0:24:51", "throughput": 13824.24, "total_tokens": 28581568}
|
|
{"current_steps": 9080, "total_steps": 15621, "loss": 0.3559, "lr": 8.913830421654166e-07, "epoch": 0.5812688048140324, "percentage": 58.13, "elapsed_time": "0:34:28", "remaining_time": "0:24:49", "throughput": 13827.71, "total_tokens": 28596992}
|
|
{"current_steps": 9085, "total_steps": 15621, "loss": 0.4757, "lr": 8.902723526340746e-07, "epoch": 0.5815888867550093, "percentage": 58.16, "elapsed_time": "0:34:28", "remaining_time": "0:24:48", "throughput": 13831.59, "total_tokens": 28613952}
|
|
{"current_steps": 9090, "total_steps": 15621, "loss": 0.4202, "lr": 8.89161800098989e-07, "epoch": 0.5819089686959862, "percentage": 58.19, "elapsed_time": "0:34:29", "remaining_time": "0:24:46", "throughput": 13834.84, "total_tokens": 28628736}
|
|
{"current_steps": 9095, "total_steps": 15621, "loss": 0.3704, "lr": 8.880513859466974e-07, "epoch": 0.5822290506369631, "percentage": 58.22, "elapsed_time": "0:34:29", "remaining_time": "0:24:45", "throughput": 13838.45, "total_tokens": 28644928}
|
|
{"current_steps": 9100, "total_steps": 15621, "loss": 0.278, "lr": 8.869411115635645e-07, "epoch": 0.5825491325779399, "percentage": 58.25, "elapsed_time": "0:34:30", "remaining_time": "0:24:43", "throughput": 13842.07, "total_tokens": 28661184}
|
|
{"current_steps": 9105, "total_steps": 15621, "loss": 0.2772, "lr": 8.858309783357816e-07, "epoch": 0.5828692145189168, "percentage": 58.29, "elapsed_time": "0:34:31", "remaining_time": "0:24:42", "throughput": 13845.27, "total_tokens": 28675776}
|
|
{"current_steps": 9110, "total_steps": 15621, "loss": 0.4318, "lr": 8.847209876493629e-07, "epoch": 0.5831892964598937, "percentage": 58.32, "elapsed_time": "0:34:31", "remaining_time": "0:24:40", "throughput": 13848.92, "total_tokens": 28692160}
|
|
{"current_steps": 9115, "total_steps": 15621, "loss": 0.2576, "lr": 8.836111408901441e-07, "epoch": 0.5835093784008706, "percentage": 58.35, "elapsed_time": "0:34:32", "remaining_time": "0:24:39", "throughput": 13852.23, "total_tokens": 28707328}
|
|
{"current_steps": 9120, "total_steps": 15621, "loss": 0.4235, "lr": 8.825014394437828e-07, "epoch": 0.5838294603418475, "percentage": 58.38, "elapsed_time": "0:34:32", "remaining_time": "0:24:37", "throughput": 13855.58, "total_tokens": 28722624}
|
|
{"current_steps": 9125, "total_steps": 15621, "loss": 0.3748, "lr": 8.813918846957542e-07, "epoch": 0.5841495422828245, "percentage": 58.41, "elapsed_time": "0:34:33", "remaining_time": "0:24:36", "throughput": 13858.9, "total_tokens": 28737856}
|
|
{"current_steps": 9130, "total_steps": 15621, "loss": 0.4501, "lr": 8.802824780313499e-07, "epoch": 0.5844696242238013, "percentage": 58.45, "elapsed_time": "0:34:34", "remaining_time": "0:24:34", "throughput": 13862.09, "total_tokens": 28752448}
|
|
{"current_steps": 9135, "total_steps": 15621, "loss": 0.3958, "lr": 8.791732208356771e-07, "epoch": 0.5847897061647782, "percentage": 58.48, "elapsed_time": "0:34:34", "remaining_time": "0:24:33", "throughput": 13865.44, "total_tokens": 28767616}
|
|
{"current_steps": 9140, "total_steps": 15621, "loss": 0.4649, "lr": 8.780641144936573e-07, "epoch": 0.5851097881057551, "percentage": 58.51, "elapsed_time": "0:34:35", "remaining_time": "0:24:31", "throughput": 13868.64, "total_tokens": 28782400}
|
|
{"current_steps": 9145, "total_steps": 15621, "loss": 0.4457, "lr": 8.76955160390022e-07, "epoch": 0.585429870046732, "percentage": 58.54, "elapsed_time": "0:34:35", "remaining_time": "0:24:30", "throughput": 13872.17, "total_tokens": 28798336}
|
|
{"current_steps": 9150, "total_steps": 15621, "loss": 0.2868, "lr": 8.758463599093136e-07, "epoch": 0.5857499519877089, "percentage": 58.57, "elapsed_time": "0:34:36", "remaining_time": "0:24:28", "throughput": 13875.73, "total_tokens": 28814336}
|
|
{"current_steps": 9155, "total_steps": 15621, "loss": 0.5273, "lr": 8.747377144358825e-07, "epoch": 0.5860700339286857, "percentage": 58.61, "elapsed_time": "0:34:37", "remaining_time": "0:24:27", "throughput": 13879.43, "total_tokens": 28830656}
|
|
{"current_steps": 9160, "total_steps": 15621, "loss": 0.418, "lr": 8.736292253538861e-07, "epoch": 0.5863901158696626, "percentage": 58.64, "elapsed_time": "0:34:37", "remaining_time": "0:24:25", "throughput": 13882.94, "total_tokens": 28846656}
|
|
{"current_steps": 9165, "total_steps": 15621, "loss": 0.309, "lr": 8.725208940472851e-07, "epoch": 0.5867101978106395, "percentage": 58.67, "elapsed_time": "0:34:38", "remaining_time": "0:24:24", "throughput": 13886.58, "total_tokens": 28862848}
|
|
{"current_steps": 9170, "total_steps": 15621, "loss": 0.4083, "lr": 8.714127218998448e-07, "epoch": 0.5870302797516164, "percentage": 58.7, "elapsed_time": "0:34:39", "remaining_time": "0:24:22", "throughput": 13890.0, "total_tokens": 28878400}
|
|
{"current_steps": 9175, "total_steps": 15621, "loss": 0.5084, "lr": 8.70304710295131e-07, "epoch": 0.5873503616925934, "percentage": 58.74, "elapsed_time": "0:34:39", "remaining_time": "0:24:21", "throughput": 13893.3, "total_tokens": 28893568}
|
|
{"current_steps": 9180, "total_steps": 15621, "loss": 0.367, "lr": 8.691968606165092e-07, "epoch": 0.5876704436335702, "percentage": 58.77, "elapsed_time": "0:34:40", "remaining_time": "0:24:19", "throughput": 13896.91, "total_tokens": 28909824}
|
|
{"current_steps": 9185, "total_steps": 15621, "loss": 0.3078, "lr": 8.680891742471429e-07, "epoch": 0.5879905255745471, "percentage": 58.8, "elapsed_time": "0:34:40", "remaining_time": "0:24:18", "throughput": 13900.39, "total_tokens": 28925568}
|
|
{"current_steps": 9190, "total_steps": 15621, "loss": 0.3272, "lr": 8.669816525699912e-07, "epoch": 0.588310607515524, "percentage": 58.83, "elapsed_time": "0:34:41", "remaining_time": "0:24:16", "throughput": 13903.77, "total_tokens": 28941056}
|
|
{"current_steps": 9195, "total_steps": 15621, "loss": 0.4143, "lr": 8.658742969678079e-07, "epoch": 0.5886306894565009, "percentage": 58.86, "elapsed_time": "0:34:42", "remaining_time": "0:24:15", "throughput": 13906.83, "total_tokens": 28955456}
|
|
{"current_steps": 9200, "total_steps": 15621, "loss": 0.2927, "lr": 8.647671088231398e-07, "epoch": 0.5889507713974778, "percentage": 58.9, "elapsed_time": "0:34:42", "remaining_time": "0:24:13", "throughput": 13910.24, "total_tokens": 28971136}
|
|
{"current_steps": 9205, "total_steps": 15621, "loss": 0.4087, "lr": 8.636600895183245e-07, "epoch": 0.5892708533384546, "percentage": 58.93, "elapsed_time": "0:34:43", "remaining_time": "0:24:12", "throughput": 13914.14, "total_tokens": 28988480}
|
|
{"current_steps": 9210, "total_steps": 15621, "loss": 0.3669, "lr": 8.625532404354877e-07, "epoch": 0.5895909352794315, "percentage": 58.96, "elapsed_time": "0:34:44", "remaining_time": "0:24:10", "throughput": 13917.67, "total_tokens": 29004544}
|
|
{"current_steps": 9215, "total_steps": 15621, "loss": 0.3809, "lr": 8.614465629565443e-07, "epoch": 0.5899110172204084, "percentage": 58.99, "elapsed_time": "0:34:44", "remaining_time": "0:24:09", "throughput": 13920.85, "total_tokens": 29019328}
|
|
{"current_steps": 9220, "total_steps": 15621, "loss": 0.3336, "lr": 8.603400584631939e-07, "epoch": 0.5902310991613853, "percentage": 59.02, "elapsed_time": "0:34:45", "remaining_time": "0:24:07", "throughput": 13924.21, "total_tokens": 29034752}
|
|
{"current_steps": 9225, "total_steps": 15621, "loss": 0.4422, "lr": 8.592337283369198e-07, "epoch": 0.5905511811023622, "percentage": 59.06, "elapsed_time": "0:34:45", "remaining_time": "0:24:06", "throughput": 13927.7, "total_tokens": 29050816}
|
|
{"current_steps": 9230, "total_steps": 15621, "loss": 0.2752, "lr": 8.581275739589893e-07, "epoch": 0.5908712630433391, "percentage": 59.09, "elapsed_time": "0:34:46", "remaining_time": "0:24:04", "throughput": 13930.97, "total_tokens": 29065920}
|
|
{"current_steps": 9235, "total_steps": 15621, "loss": 0.483, "lr": 8.570215967104481e-07, "epoch": 0.591191344984316, "percentage": 59.12, "elapsed_time": "0:34:47", "remaining_time": "0:24:03", "throughput": 13934.26, "total_tokens": 29080960}
|
|
{"current_steps": 9240, "total_steps": 15621, "loss": 0.4786, "lr": 8.559157979721225e-07, "epoch": 0.5915114269252929, "percentage": 59.15, "elapsed_time": "0:34:47", "remaining_time": "0:24:01", "throughput": 13937.68, "total_tokens": 29096768}
|
|
{"current_steps": 9245, "total_steps": 15621, "loss": 0.5513, "lr": 8.548101791246145e-07, "epoch": 0.5918315088662698, "percentage": 59.18, "elapsed_time": "0:34:48", "remaining_time": "0:24:00", "throughput": 13941.13, "total_tokens": 29112448}
|
|
{"current_steps": 9250, "total_steps": 15621, "loss": 0.3392, "lr": 8.537047415483028e-07, "epoch": 0.5921515908072467, "percentage": 59.22, "elapsed_time": "0:34:48", "remaining_time": "0:23:58", "throughput": 13944.44, "total_tokens": 29127808}
|
|
{"current_steps": 9255, "total_steps": 15621, "loss": 0.2774, "lr": 8.525994866233388e-07, "epoch": 0.5924716727482235, "percentage": 59.25, "elapsed_time": "0:34:49", "remaining_time": "0:23:57", "throughput": 13947.72, "total_tokens": 29142912}
|
|
{"current_steps": 9260, "total_steps": 15621, "loss": 0.3847, "lr": 8.514944157296464e-07, "epoch": 0.5927917546892004, "percentage": 59.28, "elapsed_time": "0:34:50", "remaining_time": "0:23:55", "throughput": 13951.28, "total_tokens": 29159168}
|
|
{"current_steps": 9265, "total_steps": 15621, "loss": 0.3826, "lr": 8.503895302469199e-07, "epoch": 0.5931118366301773, "percentage": 59.31, "elapsed_time": "0:34:50", "remaining_time": "0:23:54", "throughput": 13954.87, "total_tokens": 29175488}
|
|
{"current_steps": 9270, "total_steps": 15621, "loss": 0.4143, "lr": 8.492848315546214e-07, "epoch": 0.5934319185711542, "percentage": 59.34, "elapsed_time": "0:34:51", "remaining_time": "0:23:52", "throughput": 13958.26, "total_tokens": 29191104}
|
|
{"current_steps": 9275, "total_steps": 15621, "loss": 0.4172, "lr": 8.4818032103198e-07, "epoch": 0.5937520005121311, "percentage": 59.38, "elapsed_time": "0:34:51", "remaining_time": "0:23:51", "throughput": 13961.51, "total_tokens": 29206208}
|
|
{"current_steps": 9280, "total_steps": 15621, "loss": 0.4169, "lr": 8.470760000579906e-07, "epoch": 0.5940720824531079, "percentage": 59.41, "elapsed_time": "0:34:52", "remaining_time": "0:23:49", "throughput": 13964.72, "total_tokens": 29221312}
|
|
{"current_steps": 9285, "total_steps": 15621, "loss": 0.4932, "lr": 8.459718700114108e-07, "epoch": 0.5943921643940849, "percentage": 59.44, "elapsed_time": "0:34:53", "remaining_time": "0:23:48", "throughput": 13968.08, "total_tokens": 29236800}
|
|
{"current_steps": 9290, "total_steps": 15621, "loss": 0.4521, "lr": 8.448679322707595e-07, "epoch": 0.5947122463350618, "percentage": 59.47, "elapsed_time": "0:34:53", "remaining_time": "0:23:46", "throughput": 13971.53, "total_tokens": 29252480}
|
|
{"current_steps": 9295, "total_steps": 15621, "loss": 0.5845, "lr": 8.437641882143163e-07, "epoch": 0.5950323282760387, "percentage": 59.5, "elapsed_time": "0:34:54", "remaining_time": "0:23:45", "throughput": 13974.61, "total_tokens": 29266944}
|
|
{"current_steps": 9300, "total_steps": 15621, "loss": 0.319, "lr": 8.426606392201185e-07, "epoch": 0.5953524102170156, "percentage": 59.54, "elapsed_time": "0:34:54", "remaining_time": "0:23:43", "throughput": 13978.02, "total_tokens": 29282816}
|
|
{"current_steps": 9305, "total_steps": 15621, "loss": 0.3009, "lr": 8.415572866659599e-07, "epoch": 0.5956724921579925, "percentage": 59.57, "elapsed_time": "0:34:55", "remaining_time": "0:23:42", "throughput": 13981.26, "total_tokens": 29297984}
|
|
{"current_steps": 9310, "total_steps": 15621, "loss": 0.376, "lr": 8.404541319293896e-07, "epoch": 0.5959925740989693, "percentage": 59.6, "elapsed_time": "0:34:56", "remaining_time": "0:23:40", "throughput": 13984.69, "total_tokens": 29313664}
|
|
{"current_steps": 9315, "total_steps": 15621, "loss": 0.5842, "lr": 8.393511763877086e-07, "epoch": 0.5963126560399462, "percentage": 59.63, "elapsed_time": "0:34:56", "remaining_time": "0:23:39", "throughput": 13988.14, "total_tokens": 29329472}
|
|
{"current_steps": 9320, "total_steps": 15621, "loss": 0.4463, "lr": 8.3824842141797e-07, "epoch": 0.5966327379809231, "percentage": 59.66, "elapsed_time": "0:34:57", "remaining_time": "0:23:37", "throughput": 13991.85, "total_tokens": 29346048}
|
|
{"current_steps": 9325, "total_steps": 15621, "loss": 0.3801, "lr": 8.371458683969765e-07, "epoch": 0.5969528199219, "percentage": 59.7, "elapsed_time": "0:34:57", "remaining_time": "0:23:36", "throughput": 13995.21, "total_tokens": 29361664}
|
|
{"current_steps": 9330, "total_steps": 15621, "loss": 0.3887, "lr": 8.360435187012787e-07, "epoch": 0.5972729018628768, "percentage": 59.73, "elapsed_time": "0:34:58", "remaining_time": "0:23:35", "throughput": 13998.46, "total_tokens": 29376896}
|
|
{"current_steps": 9335, "total_steps": 15621, "loss": 0.3767, "lr": 8.349413737071725e-07, "epoch": 0.5975929838038538, "percentage": 59.76, "elapsed_time": "0:34:59", "remaining_time": "0:23:33", "throughput": 14001.86, "total_tokens": 29392640}
|
|
{"current_steps": 9340, "total_steps": 15621, "loss": 0.4399, "lr": 8.338394347906994e-07, "epoch": 0.5979130657448307, "percentage": 59.79, "elapsed_time": "0:34:59", "remaining_time": "0:23:32", "throughput": 14005.07, "total_tokens": 29407808}
|
|
{"current_steps": 9345, "total_steps": 15621, "loss": 0.2995, "lr": 8.327377033276431e-07, "epoch": 0.5982331476858076, "percentage": 59.82, "elapsed_time": "0:35:00", "remaining_time": "0:23:30", "throughput": 14008.18, "total_tokens": 29422528}
|
|
{"current_steps": 9350, "total_steps": 15621, "loss": 0.3481, "lr": 8.316361806935279e-07, "epoch": 0.5985532296267845, "percentage": 59.86, "elapsed_time": "0:35:00", "remaining_time": "0:23:29", "throughput": 14011.56, "total_tokens": 29438272}
|
|
{"current_steps": 9355, "total_steps": 15621, "loss": 0.4557, "lr": 8.305348682636177e-07, "epoch": 0.5988733115677614, "percentage": 59.89, "elapsed_time": "0:35:01", "remaining_time": "0:23:27", "throughput": 14014.78, "total_tokens": 29453376}
|
|
{"current_steps": 9360, "total_steps": 15621, "loss": 0.4204, "lr": 8.294337674129144e-07, "epoch": 0.5991933935087382, "percentage": 59.92, "elapsed_time": "0:35:02", "remaining_time": "0:23:26", "throughput": 14018.26, "total_tokens": 29469248}
|
|
{"current_steps": 9365, "total_steps": 15621, "loss": 0.2783, "lr": 8.283328795161554e-07, "epoch": 0.5995134754497151, "percentage": 59.95, "elapsed_time": "0:35:02", "remaining_time": "0:23:24", "throughput": 14021.88, "total_tokens": 29485888}
|
|
{"current_steps": 9370, "total_steps": 15621, "loss": 0.3194, "lr": 8.272322059478114e-07, "epoch": 0.599833557390692, "percentage": 59.98, "elapsed_time": "0:35:03", "remaining_time": "0:23:23", "throughput": 14025.03, "total_tokens": 29500864}
|
|
{"current_steps": 9375, "total_steps": 15621, "loss": 0.2312, "lr": 8.261317480820871e-07, "epoch": 0.6001536393316689, "percentage": 60.02, "elapsed_time": "0:35:04", "remaining_time": "0:23:21", "throughput": 14028.34, "total_tokens": 29516288}
|
|
{"current_steps": 9380, "total_steps": 15621, "loss": 0.4, "lr": 8.250315072929168e-07, "epoch": 0.6004737212726458, "percentage": 60.05, "elapsed_time": "0:35:04", "remaining_time": "0:23:20", "throughput": 14031.37, "total_tokens": 29530880}
|
|
{"current_steps": 9384, "total_steps": 15621, "eval_loss": 0.3916759490966797, "epoch": 0.6007297868254273, "percentage": 60.07, "elapsed_time": "0:35:54", "remaining_time": "0:23:51", "throughput": 13714.5, "total_tokens": 29544576}
|
|
{"current_steps": 9385, "total_steps": 15621, "loss": 0.3513, "lr": 8.239314849539637e-07, "epoch": 0.6007938032136226, "percentage": 60.08, "elapsed_time": "0:36:22", "remaining_time": "0:24:10", "throughput": 13540.28, "total_tokens": 29547840}
|
|
{"current_steps": 9390, "total_steps": 15621, "loss": 0.4204, "lr": 8.228316824386193e-07, "epoch": 0.6011138851545996, "percentage": 60.11, "elapsed_time": "0:36:22", "remaining_time": "0:24:08", "throughput": 13543.81, "total_tokens": 29564096}
|
|
{"current_steps": 9395, "total_steps": 15621, "loss": 0.3633, "lr": 8.217321011199995e-07, "epoch": 0.6014339670955765, "percentage": 60.14, "elapsed_time": "0:36:23", "remaining_time": "0:24:06", "throughput": 13547.11, "total_tokens": 29579520}
|
|
{"current_steps": 9400, "total_steps": 15621, "loss": 0.4256, "lr": 8.206327423709441e-07, "epoch": 0.6017540490365534, "percentage": 60.18, "elapsed_time": "0:36:24", "remaining_time": "0:24:05", "throughput": 13550.2, "total_tokens": 29594048}
|
|
{"current_steps": 9405, "total_steps": 15621, "loss": 0.3871, "lr": 8.195336075640163e-07, "epoch": 0.6020741309775303, "percentage": 60.21, "elapsed_time": "0:36:24", "remaining_time": "0:24:03", "throughput": 13553.75, "total_tokens": 29610368}
|
|
{"current_steps": 9410, "total_steps": 15621, "loss": 0.4232, "lr": 8.184346980714984e-07, "epoch": 0.6023942129185071, "percentage": 60.24, "elapsed_time": "0:36:25", "remaining_time": "0:24:02", "throughput": 13557.05, "total_tokens": 29625792}
|
|
{"current_steps": 9415, "total_steps": 15621, "loss": 0.3399, "lr": 8.173360152653914e-07, "epoch": 0.602714294859484, "percentage": 60.27, "elapsed_time": "0:36:25", "remaining_time": "0:24:00", "throughput": 13560.6, "total_tokens": 29642240}
|
|
{"current_steps": 9420, "total_steps": 15621, "loss": 0.293, "lr": 8.162375605174143e-07, "epoch": 0.6030343768004609, "percentage": 60.3, "elapsed_time": "0:36:26", "remaining_time": "0:23:59", "throughput": 13564.03, "total_tokens": 29658176}
|
|
{"current_steps": 9425, "total_steps": 15621, "loss": 0.3118, "lr": 8.151393351990005e-07, "epoch": 0.6033544587414378, "percentage": 60.34, "elapsed_time": "0:36:27", "remaining_time": "0:23:57", "throughput": 13567.86, "total_tokens": 29675392}
|
|
{"current_steps": 9430, "total_steps": 15621, "loss": 0.4241, "lr": 8.140413406812971e-07, "epoch": 0.6036745406824147, "percentage": 60.37, "elapsed_time": "0:36:27", "remaining_time": "0:23:56", "throughput": 13570.98, "total_tokens": 29690048}
|
|
{"current_steps": 9435, "total_steps": 15621, "loss": 0.3052, "lr": 8.129435783351635e-07, "epoch": 0.6039946226233915, "percentage": 60.4, "elapsed_time": "0:36:28", "remaining_time": "0:23:54", "throughput": 13574.2, "total_tokens": 29705088}
|
|
{"current_steps": 9440, "total_steps": 15621, "loss": 0.4482, "lr": 8.118460495311685e-07, "epoch": 0.6043147045643685, "percentage": 60.43, "elapsed_time": "0:36:28", "remaining_time": "0:23:53", "throughput": 13577.52, "total_tokens": 29720576}
|
|
{"current_steps": 9445, "total_steps": 15621, "loss": 0.4204, "lr": 8.107487556395901e-07, "epoch": 0.6046347865053454, "percentage": 60.46, "elapsed_time": "0:36:29", "remaining_time": "0:23:51", "throughput": 13581.09, "total_tokens": 29736896}
|
|
{"current_steps": 9450, "total_steps": 15621, "loss": 0.3567, "lr": 8.096516980304115e-07, "epoch": 0.6049548684463223, "percentage": 60.5, "elapsed_time": "0:36:30", "remaining_time": "0:23:50", "throughput": 13584.48, "total_tokens": 29752768}
|
|
{"current_steps": 9455, "total_steps": 15621, "loss": 0.3355, "lr": 8.085548780733238e-07, "epoch": 0.6052749503872992, "percentage": 60.53, "elapsed_time": "0:36:30", "remaining_time": "0:23:48", "throughput": 13587.9, "total_tokens": 29768640}
|
|
{"current_steps": 9460, "total_steps": 15621, "loss": 0.338, "lr": 8.074582971377182e-07, "epoch": 0.605595032328276, "percentage": 60.56, "elapsed_time": "0:36:31", "remaining_time": "0:23:47", "throughput": 13591.73, "total_tokens": 29786240}
|
|
{"current_steps": 9465, "total_steps": 15621, "loss": 0.4356, "lr": 8.063619565926892e-07, "epoch": 0.6059151142692529, "percentage": 60.59, "elapsed_time": "0:36:32", "remaining_time": "0:23:45", "throughput": 13595.13, "total_tokens": 29802176}
|
|
{"current_steps": 9470, "total_steps": 15621, "loss": 0.3912, "lr": 8.052658578070313e-07, "epoch": 0.6062351962102298, "percentage": 60.62, "elapsed_time": "0:36:32", "remaining_time": "0:23:44", "throughput": 13598.42, "total_tokens": 29817600}
|
|
{"current_steps": 9475, "total_steps": 15621, "loss": 0.3313, "lr": 8.041700021492362e-07, "epoch": 0.6065552781512067, "percentage": 60.66, "elapsed_time": "0:36:33", "remaining_time": "0:23:42", "throughput": 13601.7, "total_tokens": 29832960}
|
|
{"current_steps": 9480, "total_steps": 15621, "loss": 0.2888, "lr": 8.030743909874924e-07, "epoch": 0.6068753600921836, "percentage": 60.69, "elapsed_time": "0:36:33", "remaining_time": "0:23:41", "throughput": 13604.95, "total_tokens": 29848448}
|
|
{"current_steps": 9485, "total_steps": 15621, "loss": 0.3247, "lr": 8.019790256896839e-07, "epoch": 0.6071954420331604, "percentage": 60.72, "elapsed_time": "0:36:34", "remaining_time": "0:23:39", "throughput": 13608.09, "total_tokens": 29863296}
|
|
{"current_steps": 9490, "total_steps": 15621, "loss": 0.3806, "lr": 8.008839076233871e-07, "epoch": 0.6075155239741373, "percentage": 60.75, "elapsed_time": "0:36:35", "remaining_time": "0:23:38", "throughput": 13611.75, "total_tokens": 29880128}
|
|
{"current_steps": 9495, "total_steps": 15621, "loss": 0.3618, "lr": 7.997890381558691e-07, "epoch": 0.6078356059151143, "percentage": 60.78, "elapsed_time": "0:36:35", "remaining_time": "0:23:36", "throughput": 13614.99, "total_tokens": 29895296}
|
|
{"current_steps": 9500, "total_steps": 15621, "loss": 0.4291, "lr": 7.986944186540878e-07, "epoch": 0.6081556878560912, "percentage": 60.82, "elapsed_time": "0:36:36", "remaining_time": "0:23:35", "throughput": 13618.43, "total_tokens": 29911296}
|
|
{"current_steps": 9505, "total_steps": 15621, "loss": 0.4594, "lr": 7.976000504846885e-07, "epoch": 0.6084757697970681, "percentage": 60.85, "elapsed_time": "0:36:36", "remaining_time": "0:23:33", "throughput": 13621.8, "total_tokens": 29926912}
|
|
{"current_steps": 9510, "total_steps": 15621, "loss": 0.4726, "lr": 7.965059350140024e-07, "epoch": 0.608795851738045, "percentage": 60.88, "elapsed_time": "0:36:37", "remaining_time": "0:23:32", "throughput": 13625.07, "total_tokens": 29942272}
|
|
{"current_steps": 9515, "total_steps": 15621, "loss": 0.4037, "lr": 7.954120736080461e-07, "epoch": 0.6091159336790218, "percentage": 60.91, "elapsed_time": "0:36:38", "remaining_time": "0:23:30", "throughput": 13628.37, "total_tokens": 29958016}
|
|
{"current_steps": 9520, "total_steps": 15621, "loss": 0.5797, "lr": 7.943184676325178e-07, "epoch": 0.6094360156199987, "percentage": 60.94, "elapsed_time": "0:36:38", "remaining_time": "0:23:29", "throughput": 13631.99, "total_tokens": 29974720}
|
|
{"current_steps": 9525, "total_steps": 15621, "loss": 0.4342, "lr": 7.932251184527974e-07, "epoch": 0.6097560975609756, "percentage": 60.98, "elapsed_time": "0:36:39", "remaining_time": "0:23:27", "throughput": 13635.66, "total_tokens": 29991680}
|
|
{"current_steps": 9530, "total_steps": 15621, "loss": 0.2753, "lr": 7.921320274339446e-07, "epoch": 0.6100761795019525, "percentage": 61.01, "elapsed_time": "0:36:40", "remaining_time": "0:23:26", "throughput": 13638.96, "total_tokens": 30007168}
|
|
{"current_steps": 9535, "total_steps": 15621, "loss": 0.3337, "lr": 7.910391959406966e-07, "epoch": 0.6103962614429294, "percentage": 61.04, "elapsed_time": "0:36:40", "remaining_time": "0:23:24", "throughput": 13642.23, "total_tokens": 30022656}
|
|
{"current_steps": 9540, "total_steps": 15621, "loss": 0.3943, "lr": 7.899466253374653e-07, "epoch": 0.6107163433839062, "percentage": 61.07, "elapsed_time": "0:36:41", "remaining_time": "0:23:23", "throughput": 13645.52, "total_tokens": 30038144}
|
|
{"current_steps": 9545, "total_steps": 15621, "loss": 0.3347, "lr": 7.88854316988339e-07, "epoch": 0.6110364253248832, "percentage": 61.1, "elapsed_time": "0:36:41", "remaining_time": "0:23:21", "throughput": 13649.33, "total_tokens": 30055488}
|
|
{"current_steps": 9550, "total_steps": 15621, "loss": 0.3016, "lr": 7.877622722570771e-07, "epoch": 0.6113565072658601, "percentage": 61.14, "elapsed_time": "0:36:42", "remaining_time": "0:23:20", "throughput": 13652.66, "total_tokens": 30071040}
|
|
{"current_steps": 9555, "total_steps": 15621, "loss": 0.4185, "lr": 7.866704925071101e-07, "epoch": 0.611676589206837, "percentage": 61.17, "elapsed_time": "0:36:43", "remaining_time": "0:23:18", "throughput": 13656.34, "total_tokens": 30088000}
|
|
{"current_steps": 9560, "total_steps": 15621, "loss": 0.422, "lr": 7.855789791015377e-07, "epoch": 0.6119966711478139, "percentage": 61.2, "elapsed_time": "0:36:43", "remaining_time": "0:23:17", "throughput": 13659.47, "total_tokens": 30103040}
|
|
{"current_steps": 9565, "total_steps": 15621, "loss": 0.3946, "lr": 7.844877334031277e-07, "epoch": 0.6123167530887907, "percentage": 61.23, "elapsed_time": "0:36:44", "remaining_time": "0:23:15", "throughput": 13662.56, "total_tokens": 30117760}
|
|
{"current_steps": 9570, "total_steps": 15621, "loss": 0.4797, "lr": 7.833967567743131e-07, "epoch": 0.6126368350297676, "percentage": 61.26, "elapsed_time": "0:36:45", "remaining_time": "0:23:14", "throughput": 13666.03, "total_tokens": 30133888}
|
|
{"current_steps": 9575, "total_steps": 15621, "loss": 0.3747, "lr": 7.823060505771903e-07, "epoch": 0.6129569169707445, "percentage": 61.3, "elapsed_time": "0:36:45", "remaining_time": "0:23:12", "throughput": 13669.25, "total_tokens": 30149312}
|
|
{"current_steps": 9580, "total_steps": 15621, "loss": 0.3944, "lr": 7.812156161735199e-07, "epoch": 0.6132769989117214, "percentage": 61.33, "elapsed_time": "0:36:46", "remaining_time": "0:23:11", "throughput": 13672.3, "total_tokens": 30163840}
|
|
{"current_steps": 9585, "total_steps": 15621, "loss": 0.5462, "lr": 7.801254549247215e-07, "epoch": 0.6135970808526983, "percentage": 61.36, "elapsed_time": "0:36:46", "remaining_time": "0:23:09", "throughput": 13675.91, "total_tokens": 30180544}
|
|
{"current_steps": 9590, "total_steps": 15621, "loss": 0.3212, "lr": 7.790355681918739e-07, "epoch": 0.6139171627936751, "percentage": 61.39, "elapsed_time": "0:36:47", "remaining_time": "0:23:08", "throughput": 13679.43, "total_tokens": 30197120}
|
|
{"current_steps": 9595, "total_steps": 15621, "loss": 0.421, "lr": 7.779459573357144e-07, "epoch": 0.614237244734652, "percentage": 61.42, "elapsed_time": "0:36:48", "remaining_time": "0:23:06", "throughput": 13682.87, "total_tokens": 30213376}
|
|
{"current_steps": 9600, "total_steps": 15621, "loss": 0.4225, "lr": 7.768566237166338e-07, "epoch": 0.614557326675629, "percentage": 61.46, "elapsed_time": "0:36:48", "remaining_time": "0:23:05", "throughput": 13686.24, "total_tokens": 30229120}
|
|
{"current_steps": 9605, "total_steps": 15621, "loss": 0.5064, "lr": 7.757675686946786e-07, "epoch": 0.6148774086166059, "percentage": 61.49, "elapsed_time": "0:36:49", "remaining_time": "0:23:03", "throughput": 13689.47, "total_tokens": 30244544}
|
|
{"current_steps": 9610, "total_steps": 15621, "loss": 0.4207, "lr": 7.746787936295468e-07, "epoch": 0.6151974905575828, "percentage": 61.52, "elapsed_time": "0:36:49", "remaining_time": "0:23:02", "throughput": 13692.94, "total_tokens": 30260864}
|
|
{"current_steps": 9615, "total_steps": 15621, "loss": 0.3739, "lr": 7.735902998805868e-07, "epoch": 0.6155175724985597, "percentage": 61.55, "elapsed_time": "0:36:50", "remaining_time": "0:23:00", "throughput": 13695.93, "total_tokens": 30275456}
|
|
{"current_steps": 9620, "total_steps": 15621, "loss": 0.4195, "lr": 7.725020888067955e-07, "epoch": 0.6158376544395365, "percentage": 61.58, "elapsed_time": "0:36:51", "remaining_time": "0:22:59", "throughput": 13699.19, "total_tokens": 30291008}
|
|
{"current_steps": 9625, "total_steps": 15621, "loss": 0.4814, "lr": 7.714141617668176e-07, "epoch": 0.6161577363805134, "percentage": 61.62, "elapsed_time": "0:36:51", "remaining_time": "0:22:57", "throughput": 13702.53, "total_tokens": 30306816}
|
|
{"current_steps": 9630, "total_steps": 15621, "loss": 0.3298, "lr": 7.703265201189426e-07, "epoch": 0.6164778183214903, "percentage": 61.65, "elapsed_time": "0:36:52", "remaining_time": "0:22:56", "throughput": 13705.71, "total_tokens": 30322240}
|
|
{"current_steps": 9635, "total_steps": 15621, "loss": 0.3357, "lr": 7.692391652211036e-07, "epoch": 0.6167979002624672, "percentage": 61.68, "elapsed_time": "0:36:53", "remaining_time": "0:22:54", "throughput": 13709.01, "total_tokens": 30338048}
|
|
{"current_steps": 9640, "total_steps": 15621, "loss": 0.3313, "lr": 7.681520984308769e-07, "epoch": 0.617117982203444, "percentage": 61.71, "elapsed_time": "0:36:53", "remaining_time": "0:22:53", "throughput": 13712.37, "total_tokens": 30353984}
|
|
{"current_steps": 9645, "total_steps": 15621, "loss": 0.4902, "lr": 7.670653211054772e-07, "epoch": 0.6174380641444209, "percentage": 61.74, "elapsed_time": "0:36:54", "remaining_time": "0:22:51", "throughput": 13715.71, "total_tokens": 30370048}
|
|
{"current_steps": 9650, "total_steps": 15621, "loss": 0.413, "lr": 7.659788346017591e-07, "epoch": 0.6177581460853978, "percentage": 61.78, "elapsed_time": "0:36:54", "remaining_time": "0:22:50", "throughput": 13718.88, "total_tokens": 30385344}
|
|
{"current_steps": 9655, "total_steps": 15621, "loss": 0.3813, "lr": 7.648926402762133e-07, "epoch": 0.6180782280263748, "percentage": 61.81, "elapsed_time": "0:36:55", "remaining_time": "0:22:48", "throughput": 13722.01, "total_tokens": 30400576}
|
|
{"current_steps": 9660, "total_steps": 15621, "loss": 0.3867, "lr": 7.638067394849671e-07, "epoch": 0.6183983099673517, "percentage": 61.84, "elapsed_time": "0:36:56", "remaining_time": "0:22:47", "throughput": 13725.1, "total_tokens": 30415424}
|
|
{"current_steps": 9665, "total_steps": 15621, "loss": 0.4056, "lr": 7.627211335837797e-07, "epoch": 0.6187183919083286, "percentage": 61.87, "elapsed_time": "0:36:56", "remaining_time": "0:22:45", "throughput": 13728.24, "total_tokens": 30430592}
|
|
{"current_steps": 9670, "total_steps": 15621, "loss": 0.4352, "lr": 7.616358239280427e-07, "epoch": 0.6190384738493054, "percentage": 61.9, "elapsed_time": "0:36:57", "remaining_time": "0:22:44", "throughput": 13731.41, "total_tokens": 30445952}
|
|
{"current_steps": 9675, "total_steps": 15621, "loss": 0.3274, "lr": 7.605508118727787e-07, "epoch": 0.6193585557902823, "percentage": 61.94, "elapsed_time": "0:36:57", "remaining_time": "0:22:43", "throughput": 13734.67, "total_tokens": 30461568}
|
|
{"current_steps": 9680, "total_steps": 15621, "loss": 0.3611, "lr": 7.594660987726373e-07, "epoch": 0.6196786377312592, "percentage": 61.97, "elapsed_time": "0:36:58", "remaining_time": "0:22:41", "throughput": 13737.77, "total_tokens": 30476672}
|
|
{"current_steps": 9685, "total_steps": 15621, "loss": 0.4013, "lr": 7.583816859818956e-07, "epoch": 0.6199987196722361, "percentage": 62.0, "elapsed_time": "0:36:59", "remaining_time": "0:22:40", "throughput": 13741.07, "total_tokens": 30492672}
|
|
{"current_steps": 9690, "total_steps": 15621, "loss": 0.3785, "lr": 7.57297574854456e-07, "epoch": 0.620318801613213, "percentage": 62.03, "elapsed_time": "0:36:59", "remaining_time": "0:22:38", "throughput": 13744.15, "total_tokens": 30507712}
|
|
{"current_steps": 9695, "total_steps": 15621, "loss": 0.4395, "lr": 7.56213766743844e-07, "epoch": 0.6206388835541898, "percentage": 62.06, "elapsed_time": "0:37:00", "remaining_time": "0:22:37", "throughput": 13747.61, "total_tokens": 30524032}
|
|
{"current_steps": 9700, "total_steps": 15621, "loss": 0.333, "lr": 7.551302630032064e-07, "epoch": 0.6209589654951667, "percentage": 62.1, "elapsed_time": "0:37:00", "remaining_time": "0:22:35", "throughput": 13750.87, "total_tokens": 30539776}
|
|
{"current_steps": 9705, "total_steps": 15621, "loss": 0.3693, "lr": 7.540470649853106e-07, "epoch": 0.6212790474361437, "percentage": 62.13, "elapsed_time": "0:37:01", "remaining_time": "0:22:34", "throughput": 13753.97, "total_tokens": 30554752}
|
|
{"current_steps": 9710, "total_steps": 15621, "loss": 0.4034, "lr": 7.529641740425419e-07, "epoch": 0.6215991293771206, "percentage": 62.16, "elapsed_time": "0:37:02", "remaining_time": "0:22:32", "throughput": 13757.64, "total_tokens": 30571968}
|
|
{"current_steps": 9715, "total_steps": 15621, "loss": 0.4351, "lr": 7.518815915269023e-07, "epoch": 0.6219192113180975, "percentage": 62.19, "elapsed_time": "0:37:02", "remaining_time": "0:22:31", "throughput": 13760.8, "total_tokens": 30587264}
|
|
{"current_steps": 9720, "total_steps": 15621, "loss": 0.3948, "lr": 7.507993187900092e-07, "epoch": 0.6222392932590743, "percentage": 62.22, "elapsed_time": "0:37:03", "remaining_time": "0:22:29", "throughput": 13764.11, "total_tokens": 30603200}
|
|
{"current_steps": 9725, "total_steps": 15621, "loss": 0.4253, "lr": 7.497173571830926e-07, "epoch": 0.6225593752000512, "percentage": 62.26, "elapsed_time": "0:37:03", "remaining_time": "0:22:28", "throughput": 13767.09, "total_tokens": 30617856}
|
|
{"current_steps": 9730, "total_steps": 15621, "loss": 0.4732, "lr": 7.486357080569938e-07, "epoch": 0.6228794571410281, "percentage": 62.29, "elapsed_time": "0:37:04", "remaining_time": "0:22:26", "throughput": 13770.04, "total_tokens": 30632448}
|
|
{"current_steps": 9735, "total_steps": 15621, "loss": 0.3747, "lr": 7.47554372762165e-07, "epoch": 0.623199539082005, "percentage": 62.32, "elapsed_time": "0:37:05", "remaining_time": "0:22:25", "throughput": 13773.18, "total_tokens": 30647680}
|
|
{"current_steps": 9740, "total_steps": 15621, "loss": 0.4905, "lr": 7.464733526486662e-07, "epoch": 0.6235196210229819, "percentage": 62.35, "elapsed_time": "0:37:05", "remaining_time": "0:22:23", "throughput": 13776.49, "total_tokens": 30663616}
|
|
{"current_steps": 9745, "total_steps": 15621, "loss": 0.3424, "lr": 7.453926490661628e-07, "epoch": 0.6238397029639587, "percentage": 62.38, "elapsed_time": "0:37:06", "remaining_time": "0:22:22", "throughput": 13780.62, "total_tokens": 30682496}
|
|
{"current_steps": 9750, "total_steps": 15621, "loss": 0.3639, "lr": 7.443122633639267e-07, "epoch": 0.6241597849049356, "percentage": 62.42, "elapsed_time": "0:37:07", "remaining_time": "0:22:21", "throughput": 13783.73, "total_tokens": 30697664}
|
|
{"current_steps": 9755, "total_steps": 15621, "loss": 0.3835, "lr": 7.432321968908319e-07, "epoch": 0.6244798668459125, "percentage": 62.45, "elapsed_time": "0:37:07", "remaining_time": "0:22:19", "throughput": 13786.99, "total_tokens": 30713408}
|
|
{"current_steps": 9760, "total_steps": 15621, "loss": 0.3173, "lr": 7.421524509953543e-07, "epoch": 0.6247999487868895, "percentage": 62.48, "elapsed_time": "0:37:08", "remaining_time": "0:22:18", "throughput": 13790.64, "total_tokens": 30730496}
|
|
{"current_steps": 9765, "total_steps": 15621, "loss": 0.4158, "lr": 7.410730270255687e-07, "epoch": 0.6251200307278664, "percentage": 62.51, "elapsed_time": "0:37:08", "remaining_time": "0:22:16", "throughput": 13793.76, "total_tokens": 30745664}
|
|
{"current_steps": 9770, "total_steps": 15621, "loss": 0.3655, "lr": 7.399939263291493e-07, "epoch": 0.6254401126688433, "percentage": 62.54, "elapsed_time": "0:37:09", "remaining_time": "0:22:15", "throughput": 13796.92, "total_tokens": 30760960}
|
|
{"current_steps": 9775, "total_steps": 15621, "loss": 0.4854, "lr": 7.389151502533657e-07, "epoch": 0.6257601946098201, "percentage": 62.58, "elapsed_time": "0:37:10", "remaining_time": "0:22:13", "throughput": 13799.91, "total_tokens": 30775872}
|
|
{"current_steps": 9780, "total_steps": 15621, "loss": 0.3683, "lr": 7.378367001450819e-07, "epoch": 0.626080276550797, "percentage": 62.61, "elapsed_time": "0:37:10", "remaining_time": "0:22:12", "throughput": 13803.15, "total_tokens": 30791424}
|
|
{"current_steps": 9785, "total_steps": 15621, "loss": 0.4317, "lr": 7.367585773507567e-07, "epoch": 0.6264003584917739, "percentage": 62.64, "elapsed_time": "0:37:11", "remaining_time": "0:22:10", "throughput": 13806.54, "total_tokens": 30807680}
|
|
{"current_steps": 9790, "total_steps": 15621, "loss": 0.4428, "lr": 7.356807832164385e-07, "epoch": 0.6267204404327508, "percentage": 62.67, "elapsed_time": "0:37:12", "remaining_time": "0:22:09", "throughput": 13809.88, "total_tokens": 30823680}
|
|
{"current_steps": 9795, "total_steps": 15621, "loss": 0.4404, "lr": 7.346033190877654e-07, "epoch": 0.6270405223737276, "percentage": 62.7, "elapsed_time": "0:37:12", "remaining_time": "0:22:07", "throughput": 13813.11, "total_tokens": 30839360}
|
|
{"current_steps": 9800, "total_steps": 15621, "loss": 0.3596, "lr": 7.335261863099651e-07, "epoch": 0.6273606043147045, "percentage": 62.74, "elapsed_time": "0:37:13", "remaining_time": "0:22:06", "throughput": 13816.29, "total_tokens": 30854784}
|
|
{"current_steps": 9805, "total_steps": 15621, "loss": 0.3969, "lr": 7.324493862278498e-07, "epoch": 0.6276806862556814, "percentage": 62.77, "elapsed_time": "0:37:13", "remaining_time": "0:22:05", "throughput": 13819.58, "total_tokens": 30870592}
|
|
{"current_steps": 9810, "total_steps": 15621, "loss": 0.4546, "lr": 7.313729201858167e-07, "epoch": 0.6280007681966584, "percentage": 62.8, "elapsed_time": "0:37:14", "remaining_time": "0:22:03", "throughput": 13822.75, "total_tokens": 30885952}
|
|
{"current_steps": 9815, "total_steps": 15621, "loss": 0.3285, "lr": 7.302967895278473e-07, "epoch": 0.6283208501376353, "percentage": 62.83, "elapsed_time": "0:37:15", "remaining_time": "0:22:02", "throughput": 13826.04, "total_tokens": 30902080}
|
|
{"current_steps": 9820, "total_steps": 15621, "loss": 0.4045, "lr": 7.292209955975028e-07, "epoch": 0.6286409320786122, "percentage": 62.86, "elapsed_time": "0:37:15", "remaining_time": "0:22:00", "throughput": 13829.69, "total_tokens": 30919232}
|
|
{"current_steps": 9825, "total_steps": 15621, "loss": 0.4068, "lr": 7.281455397379244e-07, "epoch": 0.628961014019589, "percentage": 62.9, "elapsed_time": "0:37:16", "remaining_time": "0:21:59", "throughput": 13833.37, "total_tokens": 30936448}
|
|
{"current_steps": 9830, "total_steps": 15621, "loss": 0.3249, "lr": 7.270704232918316e-07, "epoch": 0.6292810959605659, "percentage": 62.93, "elapsed_time": "0:37:16", "remaining_time": "0:21:57", "throughput": 13836.65, "total_tokens": 30952256}
|
|
{"current_steps": 9835, "total_steps": 15621, "loss": 0.401, "lr": 7.2599564760152e-07, "epoch": 0.6296011779015428, "percentage": 62.96, "elapsed_time": "0:37:17", "remaining_time": "0:21:56", "throughput": 13839.71, "total_tokens": 30967360}
|
|
{"current_steps": 9840, "total_steps": 15621, "loss": 0.3851, "lr": 7.249212140088592e-07, "epoch": 0.6299212598425197, "percentage": 62.99, "elapsed_time": "0:37:18", "remaining_time": "0:21:54", "throughput": 13842.64, "total_tokens": 30982016}
|
|
{"current_steps": 9845, "total_steps": 15621, "loss": 0.3347, "lr": 7.23847123855293e-07, "epoch": 0.6302413417834966, "percentage": 63.02, "elapsed_time": "0:37:18", "remaining_time": "0:21:53", "throughput": 13845.97, "total_tokens": 30998080}
|
|
{"current_steps": 9850, "total_steps": 15621, "loss": 0.274, "lr": 7.227733784818349e-07, "epoch": 0.6305614237244734, "percentage": 63.06, "elapsed_time": "0:37:19", "remaining_time": "0:21:52", "throughput": 13849.04, "total_tokens": 31013184}
|
|
{"current_steps": 9855, "total_steps": 15621, "loss": 0.3758, "lr": 7.216999792290683e-07, "epoch": 0.6308815056654503, "percentage": 63.09, "elapsed_time": "0:37:19", "remaining_time": "0:21:50", "throughput": 13852.24, "total_tokens": 31028800}
|
|
{"current_steps": 9860, "total_steps": 15621, "loss": 0.4837, "lr": 7.206269274371457e-07, "epoch": 0.6312015876064272, "percentage": 63.12, "elapsed_time": "0:37:20", "remaining_time": "0:21:49", "throughput": 13855.53, "total_tokens": 31044736}
|
|
{"current_steps": 9865, "total_steps": 15621, "loss": 0.3489, "lr": 7.195542244457845e-07, "epoch": 0.6315216695474042, "percentage": 63.15, "elapsed_time": "0:37:21", "remaining_time": "0:21:47", "throughput": 13858.64, "total_tokens": 31059968}
|
|
{"current_steps": 9870, "total_steps": 15621, "loss": 0.3215, "lr": 7.184818715942666e-07, "epoch": 0.6318417514883811, "percentage": 63.18, "elapsed_time": "0:37:21", "remaining_time": "0:21:46", "throughput": 13861.62, "total_tokens": 31074880}
|
|
{"current_steps": 9875, "total_steps": 15621, "loss": 0.3499, "lr": 7.174098702214374e-07, "epoch": 0.6321618334293579, "percentage": 63.22, "elapsed_time": "0:37:22", "remaining_time": "0:21:44", "throughput": 13864.8, "total_tokens": 31090432}
|
|
{"current_steps": 9880, "total_steps": 15621, "loss": 0.372, "lr": 7.163382216657033e-07, "epoch": 0.6324819153703348, "percentage": 63.25, "elapsed_time": "0:37:23", "remaining_time": "0:21:43", "throughput": 13868.26, "total_tokens": 31107264}
|
|
{"current_steps": 9885, "total_steps": 15621, "loss": 0.3531, "lr": 7.152669272650302e-07, "epoch": 0.6328019973113117, "percentage": 63.28, "elapsed_time": "0:37:23", "remaining_time": "0:21:41", "throughput": 13871.75, "total_tokens": 31124096}
|
|
{"current_steps": 9890, "total_steps": 15621, "loss": 0.3881, "lr": 7.141959883569411e-07, "epoch": 0.6331220792522886, "percentage": 63.31, "elapsed_time": "0:37:24", "remaining_time": "0:21:40", "throughput": 13874.7, "total_tokens": 31138752}
|
|
{"current_steps": 9895, "total_steps": 15621, "loss": 0.4624, "lr": 7.131254062785165e-07, "epoch": 0.6334421611932655, "percentage": 63.34, "elapsed_time": "0:37:24", "remaining_time": "0:21:39", "throughput": 13877.78, "total_tokens": 31154048}
|
|
{"current_steps": 9900, "total_steps": 15621, "loss": 0.5159, "lr": 7.120551823663907e-07, "epoch": 0.6337622431342423, "percentage": 63.38, "elapsed_time": "0:37:25", "remaining_time": "0:21:37", "throughput": 13881.18, "total_tokens": 31170304}
|
|
{"current_steps": 9905, "total_steps": 15621, "loss": 0.2778, "lr": 7.109853179567499e-07, "epoch": 0.6340823250752192, "percentage": 63.41, "elapsed_time": "0:37:26", "remaining_time": "0:21:36", "throughput": 13884.46, "total_tokens": 31186368}
|
|
{"current_steps": 9910, "total_steps": 15621, "loss": 0.4266, "lr": 7.099158143853337e-07, "epoch": 0.6344024070161961, "percentage": 63.44, "elapsed_time": "0:37:26", "remaining_time": "0:21:34", "throughput": 13887.59, "total_tokens": 31201664}
|
|
{"current_steps": 9915, "total_steps": 15621, "loss": 0.396, "lr": 7.088466729874289e-07, "epoch": 0.634722488957173, "percentage": 63.47, "elapsed_time": "0:37:27", "remaining_time": "0:21:33", "throughput": 13890.74, "total_tokens": 31217216}
|
|
{"current_steps": 9920, "total_steps": 15621, "loss": 0.3762, "lr": 7.077778950978713e-07, "epoch": 0.63504257089815, "percentage": 63.5, "elapsed_time": "0:37:27", "remaining_time": "0:21:31", "throughput": 13894.19, "total_tokens": 31233728}
|
|
{"current_steps": 9925, "total_steps": 15621, "loss": 0.4657, "lr": 7.06709482051043e-07, "epoch": 0.6353626528391269, "percentage": 63.54, "elapsed_time": "0:37:28", "remaining_time": "0:21:30", "throughput": 13897.46, "total_tokens": 31249664}
|
|
{"current_steps": 9930, "total_steps": 15621, "loss": 0.2958, "lr": 7.056414351808698e-07, "epoch": 0.6356827347801037, "percentage": 63.57, "elapsed_time": "0:37:29", "remaining_time": "0:21:29", "throughput": 13900.67, "total_tokens": 31265408}
|
|
{"current_steps": 9935, "total_steps": 15621, "loss": 0.3557, "lr": 7.045737558208206e-07, "epoch": 0.6360028167210806, "percentage": 63.6, "elapsed_time": "0:37:29", "remaining_time": "0:21:27", "throughput": 13903.85, "total_tokens": 31281088}
|
|
{"current_steps": 9940, "total_steps": 15621, "loss": 0.4025, "lr": 7.035064453039064e-07, "epoch": 0.6363228986620575, "percentage": 63.63, "elapsed_time": "0:37:30", "remaining_time": "0:21:26", "throughput": 13906.94, "total_tokens": 31296512}
|
|
{"current_steps": 9945, "total_steps": 15621, "loss": 0.3796, "lr": 7.024395049626766e-07, "epoch": 0.6366429806030344, "percentage": 63.66, "elapsed_time": "0:37:31", "remaining_time": "0:21:24", "throughput": 13910.11, "total_tokens": 31312000}
|
|
{"current_steps": 9950, "total_steps": 15621, "loss": 0.3378, "lr": 7.013729361292182e-07, "epoch": 0.6369630625440112, "percentage": 63.7, "elapsed_time": "0:37:31", "remaining_time": "0:21:23", "throughput": 13913.2, "total_tokens": 31327488}
|
|
{"current_steps": 9955, "total_steps": 15621, "loss": 0.2992, "lr": 7.003067401351554e-07, "epoch": 0.6372831444849881, "percentage": 63.73, "elapsed_time": "0:37:32", "remaining_time": "0:21:21", "throughput": 13916.59, "total_tokens": 31343936}
|
|
{"current_steps": 9960, "total_steps": 15621, "loss": 0.3971, "lr": 6.992409183116465e-07, "epoch": 0.637603226425965, "percentage": 63.76, "elapsed_time": "0:37:32", "remaining_time": "0:21:20", "throughput": 13919.66, "total_tokens": 31359232}
|
|
{"current_steps": 9965, "total_steps": 15621, "loss": 0.3715, "lr": 6.981754719893826e-07, "epoch": 0.6379233083669419, "percentage": 63.79, "elapsed_time": "0:37:33", "remaining_time": "0:21:19", "throughput": 13923.03, "total_tokens": 31375616}
|
|
{"current_steps": 9970, "total_steps": 15621, "loss": 0.4687, "lr": 6.971104024985852e-07, "epoch": 0.6382433903079189, "percentage": 63.82, "elapsed_time": "0:37:34", "remaining_time": "0:21:17", "throughput": 13926.32, "total_tokens": 31391680}
|
|
{"current_steps": 9975, "total_steps": 15621, "loss": 0.3829, "lr": 6.960457111690068e-07, "epoch": 0.6385634722488958, "percentage": 63.86, "elapsed_time": "0:37:34", "remaining_time": "0:21:16", "throughput": 13929.5, "total_tokens": 31407424}
|
|
{"current_steps": 9980, "total_steps": 15621, "loss": 0.3854, "lr": 6.94981399329927e-07, "epoch": 0.6388835541898726, "percentage": 63.89, "elapsed_time": "0:37:35", "remaining_time": "0:21:14", "throughput": 13932.62, "total_tokens": 31422912}
|
|
{"current_steps": 9985, "total_steps": 15621, "loss": 0.3806, "lr": 6.939174683101509e-07, "epoch": 0.6392036361308495, "percentage": 63.92, "elapsed_time": "0:37:35", "remaining_time": "0:21:13", "throughput": 13935.9, "total_tokens": 31438912}
|
|
{"current_steps": 9990, "total_steps": 15621, "loss": 0.2888, "lr": 6.9285391943801e-07, "epoch": 0.6395237180718264, "percentage": 63.95, "elapsed_time": "0:37:36", "remaining_time": "0:21:11", "throughput": 13939.24, "total_tokens": 31455168}
|
|
{"current_steps": 9995, "total_steps": 15621, "loss": 0.32, "lr": 6.917907540413569e-07, "epoch": 0.6398438000128033, "percentage": 63.98, "elapsed_time": "0:37:37", "remaining_time": "0:21:10", "throughput": 13942.36, "total_tokens": 31470592}
|
|
{"current_steps": 10000, "total_steps": 15621, "loss": 0.3466, "lr": 6.907279734475659e-07, "epoch": 0.6401638819537802, "percentage": 64.02, "elapsed_time": "0:37:37", "remaining_time": "0:21:09", "throughput": 13945.31, "total_tokens": 31485632}
|
|
{"current_steps": 10005, "total_steps": 15621, "loss": 0.353, "lr": 6.896655789835317e-07, "epoch": 0.640483963894757, "percentage": 64.05, "elapsed_time": "0:37:38", "remaining_time": "0:21:07", "throughput": 13948.25, "total_tokens": 31500352}
|
|
{"current_steps": 10010, "total_steps": 15621, "loss": 0.365, "lr": 6.886035719756656e-07, "epoch": 0.6408040458357339, "percentage": 64.08, "elapsed_time": "0:37:39", "remaining_time": "0:21:06", "throughput": 13951.65, "total_tokens": 31516928}
|
|
{"current_steps": 10015, "total_steps": 15621, "loss": 0.272, "lr": 6.875419537498959e-07, "epoch": 0.6411241277767108, "percentage": 64.11, "elapsed_time": "0:37:39", "remaining_time": "0:21:04", "throughput": 13954.78, "total_tokens": 31532608}
|
|
{"current_steps": 10020, "total_steps": 15621, "loss": 0.5903, "lr": 6.864807256316658e-07, "epoch": 0.6414442097176877, "percentage": 64.14, "elapsed_time": "0:37:40", "remaining_time": "0:21:03", "throughput": 13958.08, "total_tokens": 31548608}
|
|
{"current_steps": 10025, "total_steps": 15621, "loss": 0.4124, "lr": 6.854198889459311e-07, "epoch": 0.6417642916586647, "percentage": 64.18, "elapsed_time": "0:37:40", "remaining_time": "0:21:02", "throughput": 13961.22, "total_tokens": 31564224}
|
|
{"current_steps": 10030, "total_steps": 15621, "loss": 0.2575, "lr": 6.84359445017158e-07, "epoch": 0.6420843735996415, "percentage": 64.21, "elapsed_time": "0:37:41", "remaining_time": "0:21:00", "throughput": 13964.19, "total_tokens": 31579200}
|
|
{"current_steps": 10035, "total_steps": 15621, "loss": 0.4146, "lr": 6.832993951693244e-07, "epoch": 0.6424044555406184, "percentage": 64.24, "elapsed_time": "0:37:42", "remaining_time": "0:20:59", "throughput": 13967.32, "total_tokens": 31594816}
|
|
{"current_steps": 10040, "total_steps": 15621, "loss": 0.3439, "lr": 6.822397407259144e-07, "epoch": 0.6427245374815953, "percentage": 64.27, "elapsed_time": "0:37:42", "remaining_time": "0:20:57", "throughput": 13970.47, "total_tokens": 31610432}
|
|
{"current_steps": 10045, "total_steps": 15621, "loss": 0.3688, "lr": 6.811804830099186e-07, "epoch": 0.6430446194225722, "percentage": 64.3, "elapsed_time": "0:37:43", "remaining_time": "0:20:56", "throughput": 13974.03, "total_tokens": 31627520}
|
|
{"current_steps": 10050, "total_steps": 15621, "loss": 0.3446, "lr": 6.801216233438336e-07, "epoch": 0.6433647013635491, "percentage": 64.34, "elapsed_time": "0:37:43", "remaining_time": "0:20:54", "throughput": 13977.51, "total_tokens": 31644352}
|
|
{"current_steps": 10055, "total_steps": 15621, "loss": 0.3831, "lr": 6.790631630496575e-07, "epoch": 0.6436847833045259, "percentage": 64.37, "elapsed_time": "0:37:44", "remaining_time": "0:20:53", "throughput": 13980.66, "total_tokens": 31660160}
|
|
{"current_steps": 10060, "total_steps": 15621, "loss": 0.4395, "lr": 6.780051034488903e-07, "epoch": 0.6440048652455028, "percentage": 64.4, "elapsed_time": "0:37:45", "remaining_time": "0:20:52", "throughput": 13983.96, "total_tokens": 31676352}
|
|
{"current_steps": 10065, "total_steps": 15621, "loss": 0.3439, "lr": 6.769474458625323e-07, "epoch": 0.6443249471864797, "percentage": 64.43, "elapsed_time": "0:37:45", "remaining_time": "0:20:50", "throughput": 13987.17, "total_tokens": 31692160}
|
|
{"current_steps": 10070, "total_steps": 15621, "loss": 0.3099, "lr": 6.758901916110813e-07, "epoch": 0.6446450291274566, "percentage": 64.46, "elapsed_time": "0:37:46", "remaining_time": "0:20:49", "throughput": 13990.27, "total_tokens": 31707712}
|
|
{"current_steps": 10075, "total_steps": 15621, "loss": 0.3246, "lr": 6.748333420145315e-07, "epoch": 0.6449651110684336, "percentage": 64.5, "elapsed_time": "0:37:47", "remaining_time": "0:20:47", "throughput": 13993.53, "total_tokens": 31723776}
|
|
{"current_steps": 10080, "total_steps": 15621, "loss": 0.3972, "lr": 6.737768983923718e-07, "epoch": 0.6452851930094105, "percentage": 64.53, "elapsed_time": "0:37:47", "remaining_time": "0:20:46", "throughput": 13997.01, "total_tokens": 31740672}
|
|
{"current_steps": 10085, "total_steps": 15621, "loss": 0.2989, "lr": 6.727208620635849e-07, "epoch": 0.6456052749503873, "percentage": 64.56, "elapsed_time": "0:37:48", "remaining_time": "0:20:45", "throughput": 13999.99, "total_tokens": 31755648}
|
|
{"current_steps": 10090, "total_steps": 15621, "loss": 0.4543, "lr": 6.716652343466446e-07, "epoch": 0.6459253568913642, "percentage": 64.59, "elapsed_time": "0:37:48", "remaining_time": "0:20:43", "throughput": 14002.97, "total_tokens": 31770624}
|
|
{"current_steps": 10095, "total_steps": 15621, "loss": 0.3094, "lr": 6.706100165595139e-07, "epoch": 0.6462454388323411, "percentage": 64.62, "elapsed_time": "0:37:49", "remaining_time": "0:20:42", "throughput": 14006.25, "total_tokens": 31786816}
|
|
{"current_steps": 10100, "total_steps": 15621, "loss": 0.396, "lr": 6.695552100196452e-07, "epoch": 0.646565520773318, "percentage": 64.66, "elapsed_time": "0:37:50", "remaining_time": "0:20:40", "throughput": 14009.22, "total_tokens": 31801792}
|
|
{"current_steps": 10105, "total_steps": 15621, "loss": 0.5142, "lr": 6.685008160439769e-07, "epoch": 0.6468856027142948, "percentage": 64.69, "elapsed_time": "0:37:50", "remaining_time": "0:20:39", "throughput": 14012.74, "total_tokens": 31818944}
|
|
{"current_steps": 10110, "total_steps": 15621, "loss": 0.4128, "lr": 6.674468359489313e-07, "epoch": 0.6472056846552717, "percentage": 64.72, "elapsed_time": "0:37:51", "remaining_time": "0:20:38", "throughput": 14015.74, "total_tokens": 31834176}
|
|
{"current_steps": 10115, "total_steps": 15621, "loss": 0.3496, "lr": 6.663932710504163e-07, "epoch": 0.6475257665962486, "percentage": 64.75, "elapsed_time": "0:37:51", "remaining_time": "0:20:36", "throughput": 14018.95, "total_tokens": 31850176}
|
|
{"current_steps": 10120, "total_steps": 15621, "loss": 0.3894, "lr": 6.653401226638192e-07, "epoch": 0.6478458485372255, "percentage": 64.78, "elapsed_time": "0:37:52", "remaining_time": "0:20:35", "throughput": 14022.07, "total_tokens": 31865600}
|
|
{"current_steps": 10125, "total_steps": 15621, "loss": 0.3921, "lr": 6.64287392104008e-07, "epoch": 0.6481659304782024, "percentage": 64.82, "elapsed_time": "0:37:53", "remaining_time": "0:20:33", "throughput": 14025.01, "total_tokens": 31880512}
|
|
{"current_steps": 10130, "total_steps": 15621, "loss": 0.4388, "lr": 6.632350806853299e-07, "epoch": 0.6484860124191794, "percentage": 64.85, "elapsed_time": "0:37:53", "remaining_time": "0:20:32", "throughput": 14028.21, "total_tokens": 31896512}
|
|
{"current_steps": 10135, "total_steps": 15621, "loss": 0.4029, "lr": 6.621831897216074e-07, "epoch": 0.6488060943601562, "percentage": 64.88, "elapsed_time": "0:37:54", "remaining_time": "0:20:31", "throughput": 14031.47, "total_tokens": 31912768}
|
|
{"current_steps": 10140, "total_steps": 15621, "loss": 0.4345, "lr": 6.611317205261387e-07, "epoch": 0.6491261763011331, "percentage": 64.91, "elapsed_time": "0:37:54", "remaining_time": "0:20:29", "throughput": 14034.34, "total_tokens": 31927488}
|
|
{"current_steps": 10145, "total_steps": 15621, "loss": 0.3416, "lr": 6.60080674411696e-07, "epoch": 0.64944625824211, "percentage": 64.94, "elapsed_time": "0:37:55", "remaining_time": "0:20:28", "throughput": 14037.38, "total_tokens": 31942784}
|
|
{"current_steps": 10150, "total_steps": 15621, "loss": 0.3172, "lr": 6.590300526905225e-07, "epoch": 0.6497663401830869, "percentage": 64.98, "elapsed_time": "0:37:56", "remaining_time": "0:20:26", "throughput": 14040.49, "total_tokens": 31958528}
|
|
{"current_steps": 10155, "total_steps": 15621, "loss": 0.4676, "lr": 6.579798566743313e-07, "epoch": 0.6500864221240638, "percentage": 65.01, "elapsed_time": "0:37:56", "remaining_time": "0:20:25", "throughput": 14043.54, "total_tokens": 31974016}
|
|
{"current_steps": 10160, "total_steps": 15621, "loss": 0.3143, "lr": 6.569300876743049e-07, "epoch": 0.6504065040650406, "percentage": 65.04, "elapsed_time": "0:37:57", "remaining_time": "0:20:24", "throughput": 14046.93, "total_tokens": 31990720}
|
|
{"current_steps": 10165, "total_steps": 15621, "loss": 0.3188, "lr": 6.558807470010923e-07, "epoch": 0.6507265860060175, "percentage": 65.07, "elapsed_time": "0:37:58", "remaining_time": "0:20:22", "throughput": 14050.28, "total_tokens": 32007168}
|
|
{"current_steps": 10166, "total_steps": 15621, "eval_loss": 0.37842774391174316, "epoch": 0.6507906023942129, "percentage": 65.08, "elapsed_time": "0:38:47", "remaining_time": "0:20:48", "throughput": 13754.12, "total_tokens": 32010176}
|
|
{"current_steps": 10170, "total_steps": 15621, "loss": 0.3642, "lr": 6.548318359648071e-07, "epoch": 0.6510466679469944, "percentage": 65.1, "elapsed_time": "0:39:12", "remaining_time": "0:21:01", "throughput": 13609.15, "total_tokens": 32022208}
|
|
{"current_steps": 10175, "total_steps": 15621, "loss": 0.3967, "lr": 6.537833558750279e-07, "epoch": 0.6513667498879713, "percentage": 65.14, "elapsed_time": "0:39:13", "remaining_time": "0:20:59", "throughput": 13612.3, "total_tokens": 32037760}
|
|
{"current_steps": 10180, "total_steps": 15621, "loss": 0.3055, "lr": 6.527353080407938e-07, "epoch": 0.6516868318289483, "percentage": 65.17, "elapsed_time": "0:39:14", "remaining_time": "0:20:58", "throughput": 13615.29, "total_tokens": 32052800}
|
|
{"current_steps": 10185, "total_steps": 15621, "loss": 0.3366, "lr": 6.516876937706048e-07, "epoch": 0.6520069137699251, "percentage": 65.2, "elapsed_time": "0:39:14", "remaining_time": "0:20:56", "throughput": 13618.36, "total_tokens": 32068288}
|
|
{"current_steps": 10190, "total_steps": 15621, "loss": 0.3758, "lr": 6.506405143724196e-07, "epoch": 0.652326995710902, "percentage": 65.23, "elapsed_time": "0:39:15", "remaining_time": "0:20:55", "throughput": 13621.26, "total_tokens": 32083200}
|
|
{"current_steps": 10195, "total_steps": 15621, "loss": 0.4635, "lr": 6.495937711536546e-07, "epoch": 0.6526470776518789, "percentage": 65.26, "elapsed_time": "0:39:15", "remaining_time": "0:20:53", "throughput": 13624.3, "total_tokens": 32098432}
|
|
{"current_steps": 10200, "total_steps": 15621, "loss": 0.4226, "lr": 6.485474654211803e-07, "epoch": 0.6529671595928558, "percentage": 65.3, "elapsed_time": "0:39:16", "remaining_time": "0:20:52", "throughput": 13627.67, "total_tokens": 32114944}
|
|
{"current_steps": 10205, "total_steps": 15621, "loss": 0.3044, "lr": 6.475015984813217e-07, "epoch": 0.6532872415338327, "percentage": 65.33, "elapsed_time": "0:39:17", "remaining_time": "0:20:51", "throughput": 13631.04, "total_tokens": 32131520}
|
|
{"current_steps": 10210, "total_steps": 15621, "loss": 0.3158, "lr": 6.464561716398564e-07, "epoch": 0.6536073234748095, "percentage": 65.36, "elapsed_time": "0:39:17", "remaining_time": "0:20:49", "throughput": 13634.14, "total_tokens": 32147008}
|
|
{"current_steps": 10215, "total_steps": 15621, "loss": 0.3734, "lr": 6.454111862020122e-07, "epoch": 0.6539274054157864, "percentage": 65.39, "elapsed_time": "0:39:18", "remaining_time": "0:20:48", "throughput": 13637.21, "total_tokens": 32162560}
|
|
{"current_steps": 10220, "total_steps": 15621, "loss": 0.3636, "lr": 6.443666434724649e-07, "epoch": 0.6542474873567633, "percentage": 65.42, "elapsed_time": "0:39:19", "remaining_time": "0:20:46", "throughput": 13640.0, "total_tokens": 32177024}
|
|
{"current_steps": 10225, "total_steps": 15621, "loss": 0.5155, "lr": 6.43322544755339e-07, "epoch": 0.6545675692977402, "percentage": 65.46, "elapsed_time": "0:39:19", "remaining_time": "0:20:45", "throughput": 13643.17, "total_tokens": 32193024}
|
|
{"current_steps": 10230, "total_steps": 15621, "loss": 0.3365, "lr": 6.422788913542038e-07, "epoch": 0.6548876512387171, "percentage": 65.49, "elapsed_time": "0:39:20", "remaining_time": "0:20:43", "throughput": 13646.31, "total_tokens": 32208896}
|
|
{"current_steps": 10235, "total_steps": 15621, "loss": 0.3296, "lr": 6.412356845720726e-07, "epoch": 0.655207733179694, "percentage": 65.52, "elapsed_time": "0:39:20", "remaining_time": "0:20:42", "throughput": 13649.59, "total_tokens": 32225280}
|
|
{"current_steps": 10240, "total_steps": 15621, "loss": 0.3605, "lr": 6.40192925711402e-07, "epoch": 0.6555278151206709, "percentage": 65.55, "elapsed_time": "0:39:21", "remaining_time": "0:20:40", "throughput": 13652.63, "total_tokens": 32240768}
|
|
{"current_steps": 10245, "total_steps": 15621, "loss": 0.3264, "lr": 6.39150616074088e-07, "epoch": 0.6558478970616478, "percentage": 65.58, "elapsed_time": "0:39:22", "remaining_time": "0:20:39", "throughput": 13655.61, "total_tokens": 32255872}
|
|
{"current_steps": 10250, "total_steps": 15621, "loss": 0.4193, "lr": 6.381087569614668e-07, "epoch": 0.6561679790026247, "percentage": 65.62, "elapsed_time": "0:39:22", "remaining_time": "0:20:38", "throughput": 13658.96, "total_tokens": 32272512}
|
|
{"current_steps": 10255, "total_steps": 15621, "loss": 0.3828, "lr": 6.370673496743116e-07, "epoch": 0.6564880609436016, "percentage": 65.65, "elapsed_time": "0:39:23", "remaining_time": "0:20:36", "throughput": 13661.58, "total_tokens": 32286272}
|
|
{"current_steps": 10260, "total_steps": 15621, "loss": 0.4331, "lr": 6.360263955128315e-07, "epoch": 0.6568081428845784, "percentage": 65.68, "elapsed_time": "0:39:23", "remaining_time": "0:20:35", "throughput": 13664.7, "total_tokens": 32301952}
|
|
{"current_steps": 10265, "total_steps": 15621, "loss": 0.3602, "lr": 6.349858957766701e-07, "epoch": 0.6571282248255553, "percentage": 65.71, "elapsed_time": "0:39:24", "remaining_time": "0:20:33", "throughput": 13667.91, "total_tokens": 32318208}
|
|
{"current_steps": 10270, "total_steps": 15621, "loss": 0.336, "lr": 6.339458517649036e-07, "epoch": 0.6574483067665322, "percentage": 65.74, "elapsed_time": "0:39:25", "remaining_time": "0:20:32", "throughput": 13670.91, "total_tokens": 32333504}
|
|
{"current_steps": 10275, "total_steps": 15621, "loss": 0.3626, "lr": 6.329062647760395e-07, "epoch": 0.6577683887075091, "percentage": 65.78, "elapsed_time": "0:39:25", "remaining_time": "0:20:30", "throughput": 13674.3, "total_tokens": 32350208}
|
|
{"current_steps": 10280, "total_steps": 15621, "loss": 0.3351, "lr": 6.318671361080137e-07, "epoch": 0.658088470648486, "percentage": 65.81, "elapsed_time": "0:39:26", "remaining_time": "0:20:29", "throughput": 13677.29, "total_tokens": 32365376}
|
|
{"current_steps": 10285, "total_steps": 15621, "loss": 0.3306, "lr": 6.308284670581906e-07, "epoch": 0.6584085525894628, "percentage": 65.84, "elapsed_time": "0:39:26", "remaining_time": "0:20:28", "throughput": 13680.44, "total_tokens": 32381248}
|
|
{"current_steps": 10290, "total_steps": 15621, "loss": 0.4558, "lr": 6.297902589233612e-07, "epoch": 0.6587286345304398, "percentage": 65.87, "elapsed_time": "0:39:27", "remaining_time": "0:20:26", "throughput": 13683.28, "total_tokens": 32395968}
|
|
{"current_steps": 10295, "total_steps": 15621, "loss": 0.3737, "lr": 6.287525129997404e-07, "epoch": 0.6590487164714167, "percentage": 65.9, "elapsed_time": "0:39:28", "remaining_time": "0:20:25", "throughput": 13686.35, "total_tokens": 32411456}
|
|
{"current_steps": 10300, "total_steps": 15621, "loss": 0.3865, "lr": 6.277152305829656e-07, "epoch": 0.6593687984123936, "percentage": 65.94, "elapsed_time": "0:39:28", "remaining_time": "0:20:23", "throughput": 13689.4, "total_tokens": 32426880}
|
|
{"current_steps": 10305, "total_steps": 15621, "loss": 0.3281, "lr": 6.266784129680968e-07, "epoch": 0.6596888803533705, "percentage": 65.97, "elapsed_time": "0:39:29", "remaining_time": "0:20:22", "throughput": 13692.47, "total_tokens": 32442368}
|
|
{"current_steps": 10310, "total_steps": 15621, "loss": 0.3781, "lr": 6.256420614496129e-07, "epoch": 0.6600089622943474, "percentage": 66.0, "elapsed_time": "0:39:29", "remaining_time": "0:20:20", "throughput": 13695.51, "total_tokens": 32457920}
|
|
{"current_steps": 10315, "total_steps": 15621, "loss": 0.4085, "lr": 6.246061773214102e-07, "epoch": 0.6603290442353242, "percentage": 66.03, "elapsed_time": "0:39:30", "remaining_time": "0:20:19", "throughput": 13698.61, "total_tokens": 32473536}
|
|
{"current_steps": 10320, "total_steps": 15621, "loss": 0.3956, "lr": 6.235707618768032e-07, "epoch": 0.6606491261763011, "percentage": 66.06, "elapsed_time": "0:39:31", "remaining_time": "0:20:18", "throughput": 13701.93, "total_tokens": 32490240}
|
|
{"current_steps": 10325, "total_steps": 15621, "loss": 0.3506, "lr": 6.225358164085196e-07, "epoch": 0.660969208117278, "percentage": 66.1, "elapsed_time": "0:39:31", "remaining_time": "0:20:16", "throughput": 13704.96, "total_tokens": 32505728}
|
|
{"current_steps": 10330, "total_steps": 15621, "loss": 0.3521, "lr": 6.21501342208701e-07, "epoch": 0.6612892900582549, "percentage": 66.13, "elapsed_time": "0:39:32", "remaining_time": "0:20:15", "throughput": 13707.88, "total_tokens": 32520960}
|
|
{"current_steps": 10335, "total_steps": 15621, "loss": 0.4036, "lr": 6.204673405689007e-07, "epoch": 0.6616093719992318, "percentage": 66.16, "elapsed_time": "0:39:33", "remaining_time": "0:20:13", "throughput": 13710.8, "total_tokens": 32535872}
|
|
{"current_steps": 10340, "total_steps": 15621, "loss": 0.3158, "lr": 6.194338127800823e-07, "epoch": 0.6619294539402087, "percentage": 66.19, "elapsed_time": "0:39:33", "remaining_time": "0:20:12", "throughput": 13714.1, "total_tokens": 32552448}
|
|
{"current_steps": 10345, "total_steps": 15621, "loss": 0.3866, "lr": 6.184007601326165e-07, "epoch": 0.6622495358811856, "percentage": 66.22, "elapsed_time": "0:39:34", "remaining_time": "0:20:10", "throughput": 13716.97, "total_tokens": 32567232}
|
|
{"current_steps": 10350, "total_steps": 15621, "loss": 0.3515, "lr": 6.173681839162824e-07, "epoch": 0.6625696178221625, "percentage": 66.26, "elapsed_time": "0:39:34", "remaining_time": "0:20:09", "throughput": 13720.2, "total_tokens": 32583360}
|
|
{"current_steps": 10355, "total_steps": 15621, "loss": 0.3336, "lr": 6.163360854202635e-07, "epoch": 0.6628896997631394, "percentage": 66.29, "elapsed_time": "0:39:35", "remaining_time": "0:20:08", "throughput": 13723.21, "total_tokens": 32598656}
|
|
{"current_steps": 10360, "total_steps": 15621, "loss": 0.306, "lr": 6.153044659331461e-07, "epoch": 0.6632097817041163, "percentage": 66.32, "elapsed_time": "0:39:36", "remaining_time": "0:20:06", "throughput": 13726.21, "total_tokens": 32614144}
|
|
{"current_steps": 10365, "total_steps": 15621, "loss": 0.3687, "lr": 6.142733267429203e-07, "epoch": 0.6635298636450931, "percentage": 66.35, "elapsed_time": "0:39:36", "remaining_time": "0:20:05", "throughput": 13729.1, "total_tokens": 32629120}
|
|
{"current_steps": 10370, "total_steps": 15621, "loss": 0.4287, "lr": 6.132426691369748e-07, "epoch": 0.66384994558607, "percentage": 66.38, "elapsed_time": "0:39:37", "remaining_time": "0:20:03", "throughput": 13732.45, "total_tokens": 32645952}
|
|
{"current_steps": 10375, "total_steps": 15621, "loss": 0.3988, "lr": 6.122124944020977e-07, "epoch": 0.6641700275270469, "percentage": 66.42, "elapsed_time": "0:39:37", "remaining_time": "0:20:02", "throughput": 13735.54, "total_tokens": 32661696}
|
|
{"current_steps": 10380, "total_steps": 15621, "loss": 0.3753, "lr": 6.111828038244749e-07, "epoch": 0.6644901094680238, "percentage": 66.45, "elapsed_time": "0:39:38", "remaining_time": "0:20:00", "throughput": 13738.72, "total_tokens": 32677760}
|
|
{"current_steps": 10385, "total_steps": 15621, "loss": 0.2948, "lr": 6.101535986896866e-07, "epoch": 0.6648101914090007, "percentage": 66.48, "elapsed_time": "0:39:39", "remaining_time": "0:19:59", "throughput": 13741.81, "total_tokens": 32693568}
|
|
{"current_steps": 10390, "total_steps": 15621, "loss": 0.2899, "lr": 6.091248802827076e-07, "epoch": 0.6651302733499775, "percentage": 66.51, "elapsed_time": "0:39:39", "remaining_time": "0:19:58", "throughput": 13744.77, "total_tokens": 32708736}
|
|
{"current_steps": 10395, "total_steps": 15621, "loss": 0.3218, "lr": 6.080966498879048e-07, "epoch": 0.6654503552909545, "percentage": 66.55, "elapsed_time": "0:39:40", "remaining_time": "0:19:56", "throughput": 13748.09, "total_tokens": 32725440}
|
|
{"current_steps": 10400, "total_steps": 15621, "loss": 0.2962, "lr": 6.070689087890363e-07, "epoch": 0.6657704372319314, "percentage": 66.58, "elapsed_time": "0:39:40", "remaining_time": "0:19:55", "throughput": 13751.05, "total_tokens": 32740608}
|
|
{"current_steps": 10405, "total_steps": 15621, "loss": 0.3974, "lr": 6.060416582692487e-07, "epoch": 0.6660905191729083, "percentage": 66.61, "elapsed_time": "0:39:41", "remaining_time": "0:19:53", "throughput": 13754.11, "total_tokens": 32756416}
|
|
{"current_steps": 10410, "total_steps": 15621, "loss": 0.3358, "lr": 6.05014899611076e-07, "epoch": 0.6664106011138852, "percentage": 66.64, "elapsed_time": "0:39:42", "remaining_time": "0:19:52", "throughput": 13757.16, "total_tokens": 32771904}
|
|
{"current_steps": 10415, "total_steps": 15621, "loss": 0.3724, "lr": 6.039886340964391e-07, "epoch": 0.666730683054862, "percentage": 66.67, "elapsed_time": "0:39:42", "remaining_time": "0:19:51", "throughput": 13760.18, "total_tokens": 32787392}
|
|
{"current_steps": 10420, "total_steps": 15621, "loss": 0.334, "lr": 6.029628630066423e-07, "epoch": 0.6670507649958389, "percentage": 66.71, "elapsed_time": "0:39:43", "remaining_time": "0:19:49", "throughput": 13763.28, "total_tokens": 32803136}
|
|
{"current_steps": 10425, "total_steps": 15621, "loss": 0.4173, "lr": 6.019375876223724e-07, "epoch": 0.6673708469368158, "percentage": 66.74, "elapsed_time": "0:39:43", "remaining_time": "0:19:48", "throughput": 13766.25, "total_tokens": 32818624}
|
|
{"current_steps": 10430, "total_steps": 15621, "loss": 0.4672, "lr": 6.009128092236982e-07, "epoch": 0.6676909288777927, "percentage": 66.77, "elapsed_time": "0:39:44", "remaining_time": "0:19:46", "throughput": 13769.24, "total_tokens": 32833920}
|
|
{"current_steps": 10435, "total_steps": 15621, "loss": 0.3859, "lr": 5.998885290900679e-07, "epoch": 0.6680110108187696, "percentage": 66.8, "elapsed_time": "0:39:45", "remaining_time": "0:19:45", "throughput": 13772.03, "total_tokens": 32848512}
|
|
{"current_steps": 10440, "total_steps": 15621, "loss": 0.3391, "lr": 5.988647485003061e-07, "epoch": 0.6683310927597464, "percentage": 66.83, "elapsed_time": "0:39:45", "remaining_time": "0:19:43", "throughput": 13775.3, "total_tokens": 32865088}
|
|
{"current_steps": 10445, "total_steps": 15621, "loss": 0.4559, "lr": 5.978414687326164e-07, "epoch": 0.6686511747007234, "percentage": 66.87, "elapsed_time": "0:39:46", "remaining_time": "0:19:42", "throughput": 13778.67, "total_tokens": 32882048}
|
|
{"current_steps": 10450, "total_steps": 15621, "loss": 0.365, "lr": 5.968186910645745e-07, "epoch": 0.6689712566417003, "percentage": 66.9, "elapsed_time": "0:39:47", "remaining_time": "0:19:41", "throughput": 13781.88, "total_tokens": 32898624}
|
|
{"current_steps": 10455, "total_steps": 15621, "loss": 0.505, "lr": 5.957964167731305e-07, "epoch": 0.6692913385826772, "percentage": 66.93, "elapsed_time": "0:39:47", "remaining_time": "0:19:39", "throughput": 13784.84, "total_tokens": 32914176}
|
|
{"current_steps": 10460, "total_steps": 15621, "loss": 0.4068, "lr": 5.947746471346065e-07, "epoch": 0.6696114205236541, "percentage": 66.96, "elapsed_time": "0:39:48", "remaining_time": "0:19:38", "throughput": 13788.18, "total_tokens": 32931136}
|
|
{"current_steps": 10465, "total_steps": 15621, "loss": 0.3349, "lr": 5.937533834246932e-07, "epoch": 0.669931502464631, "percentage": 66.99, "elapsed_time": "0:39:49", "remaining_time": "0:19:37", "throughput": 13791.39, "total_tokens": 32947648}
|
|
{"current_steps": 10470, "total_steps": 15621, "loss": 0.3745, "lr": 5.927326269184504e-07, "epoch": 0.6702515844056078, "percentage": 67.03, "elapsed_time": "0:39:49", "remaining_time": "0:19:35", "throughput": 13794.64, "total_tokens": 32964224}
|
|
{"current_steps": 10475, "total_steps": 15621, "loss": 0.4498, "lr": 5.917123788903049e-07, "epoch": 0.6705716663465847, "percentage": 67.06, "elapsed_time": "0:39:50", "remaining_time": "0:19:34", "throughput": 13798.23, "total_tokens": 32982080}
|
|
{"current_steps": 10480, "total_steps": 15621, "loss": 0.4642, "lr": 5.906926406140484e-07, "epoch": 0.6708917482875616, "percentage": 67.09, "elapsed_time": "0:39:50", "remaining_time": "0:19:32", "throughput": 13801.16, "total_tokens": 32997440}
|
|
{"current_steps": 10485, "total_steps": 15621, "loss": 0.4298, "lr": 5.896734133628354e-07, "epoch": 0.6712118302285385, "percentage": 67.12, "elapsed_time": "0:39:51", "remaining_time": "0:19:31", "throughput": 13804.18, "total_tokens": 33013056}
|
|
{"current_steps": 10490, "total_steps": 15621, "loss": 0.3736, "lr": 5.886546984091838e-07, "epoch": 0.6715319121695154, "percentage": 67.15, "elapsed_time": "0:39:52", "remaining_time": "0:19:30", "throughput": 13807.1, "total_tokens": 33028416}
|
|
{"current_steps": 10495, "total_steps": 15621, "loss": 0.3415, "lr": 5.876364970249711e-07, "epoch": 0.6718519941104922, "percentage": 67.19, "elapsed_time": "0:39:52", "remaining_time": "0:19:28", "throughput": 13809.83, "total_tokens": 33042880}
|
|
{"current_steps": 10500, "total_steps": 15621, "loss": 0.2735, "lr": 5.866188104814336e-07, "epoch": 0.6721720760514692, "percentage": 67.22, "elapsed_time": "0:39:53", "remaining_time": "0:19:27", "throughput": 13812.77, "total_tokens": 33058240}
|
|
{"current_steps": 10505, "total_steps": 15621, "loss": 0.3792, "lr": 5.856016400491646e-07, "epoch": 0.6724921579924461, "percentage": 67.25, "elapsed_time": "0:39:53", "remaining_time": "0:19:25", "throughput": 13815.79, "total_tokens": 33073920}
|
|
{"current_steps": 10510, "total_steps": 15621, "loss": 0.3192, "lr": 5.845849869981136e-07, "epoch": 0.672812239933423, "percentage": 67.28, "elapsed_time": "0:39:54", "remaining_time": "0:19:24", "throughput": 13818.74, "total_tokens": 33089344}
|
|
{"current_steps": 10515, "total_steps": 15621, "loss": 0.3458, "lr": 5.835688525975842e-07, "epoch": 0.6731323218743999, "percentage": 67.31, "elapsed_time": "0:39:55", "remaining_time": "0:19:23", "throughput": 13821.58, "total_tokens": 33104384}
|
|
{"current_steps": 10520, "total_steps": 15621, "loss": 0.3931, "lr": 5.825532381162311e-07, "epoch": 0.6734524038153767, "percentage": 67.35, "elapsed_time": "0:39:55", "remaining_time": "0:19:21", "throughput": 13824.58, "total_tokens": 33120064}
|
|
{"current_steps": 10525, "total_steps": 15621, "loss": 0.3866, "lr": 5.815381448220619e-07, "epoch": 0.6737724857563536, "percentage": 67.38, "elapsed_time": "0:39:56", "remaining_time": "0:19:20", "throughput": 13827.65, "total_tokens": 33136128}
|
|
{"current_steps": 10530, "total_steps": 15621, "loss": 0.3452, "lr": 5.805235739824327e-07, "epoch": 0.6740925676973305, "percentage": 67.41, "elapsed_time": "0:39:57", "remaining_time": "0:19:18", "throughput": 13831.39, "total_tokens": 33154816}
|
|
{"current_steps": 10535, "total_steps": 15621, "loss": 0.5023, "lr": 5.795095268640458e-07, "epoch": 0.6744126496383074, "percentage": 67.44, "elapsed_time": "0:39:57", "remaining_time": "0:19:17", "throughput": 13834.27, "total_tokens": 33169920}
|
|
{"current_steps": 10540, "total_steps": 15621, "loss": 0.541, "lr": 5.784960047329519e-07, "epoch": 0.6747327315792843, "percentage": 67.47, "elapsed_time": "0:39:58", "remaining_time": "0:19:16", "throughput": 13837.82, "total_tokens": 33187712}
|
|
{"current_steps": 10545, "total_steps": 15621, "loss": 0.3866, "lr": 5.774830088545452e-07, "epoch": 0.6750528135202611, "percentage": 67.51, "elapsed_time": "0:39:58", "remaining_time": "0:19:14", "throughput": 13840.68, "total_tokens": 33202880}
|
|
{"current_steps": 10550, "total_steps": 15621, "loss": 0.2997, "lr": 5.76470540493563e-07, "epoch": 0.6753728954612381, "percentage": 67.54, "elapsed_time": "0:39:59", "remaining_time": "0:19:13", "throughput": 13843.79, "total_tokens": 33218944}
|
|
{"current_steps": 10555, "total_steps": 15621, "loss": 0.4652, "lr": 5.754586009140836e-07, "epoch": 0.675692977402215, "percentage": 67.57, "elapsed_time": "0:40:00", "remaining_time": "0:19:11", "throughput": 13846.77, "total_tokens": 33234688}
|
|
{"current_steps": 10560, "total_steps": 15621, "loss": 0.3679, "lr": 5.744471913795256e-07, "epoch": 0.6760130593431919, "percentage": 67.6, "elapsed_time": "0:40:00", "remaining_time": "0:19:10", "throughput": 13849.67, "total_tokens": 33249920}
|
|
{"current_steps": 10565, "total_steps": 15621, "loss": 0.3365, "lr": 5.734363131526459e-07, "epoch": 0.6763331412841688, "percentage": 67.63, "elapsed_time": "0:40:01", "remaining_time": "0:19:09", "throughput": 13852.72, "total_tokens": 33265792}
|
|
{"current_steps": 10570, "total_steps": 15621, "loss": 0.3742, "lr": 5.724259674955377e-07, "epoch": 0.6766532232251457, "percentage": 67.67, "elapsed_time": "0:40:01", "remaining_time": "0:19:07", "throughput": 13855.6, "total_tokens": 33280832}
|
|
{"current_steps": 10575, "total_steps": 15621, "loss": 0.3888, "lr": 5.714161556696291e-07, "epoch": 0.6769733051661225, "percentage": 67.7, "elapsed_time": "0:40:02", "remaining_time": "0:19:06", "throughput": 13858.64, "total_tokens": 33296576}
|
|
{"current_steps": 10580, "total_steps": 15621, "loss": 0.3388, "lr": 5.704068789356824e-07, "epoch": 0.6772933871070994, "percentage": 67.73, "elapsed_time": "0:40:03", "remaining_time": "0:19:05", "throughput": 13862.68, "total_tokens": 33316672}
|
|
{"current_steps": 10585, "total_steps": 15621, "loss": 0.3496, "lr": 5.693981385537912e-07, "epoch": 0.6776134690480763, "percentage": 67.76, "elapsed_time": "0:40:03", "remaining_time": "0:19:03", "throughput": 13865.44, "total_tokens": 33331456}
|
|
{"current_steps": 10590, "total_steps": 15621, "loss": 0.3447, "lr": 5.683899357833801e-07, "epoch": 0.6779335509890532, "percentage": 67.79, "elapsed_time": "0:40:04", "remaining_time": "0:19:02", "throughput": 13868.32, "total_tokens": 33346752}
|
|
{"current_steps": 10595, "total_steps": 15621, "loss": 0.455, "lr": 5.673822718832015e-07, "epoch": 0.67825363293003, "percentage": 67.83, "elapsed_time": "0:40:05", "remaining_time": "0:19:00", "throughput": 13871.34, "total_tokens": 33362688}
|
|
{"current_steps": 10600, "total_steps": 15621, "loss": 0.3697, "lr": 5.663751481113362e-07, "epoch": 0.6785737148710069, "percentage": 67.86, "elapsed_time": "0:40:05", "remaining_time": "0:18:59", "throughput": 13874.12, "total_tokens": 33377600}
|
|
{"current_steps": 10605, "total_steps": 15621, "loss": 0.4282, "lr": 5.653685657251896e-07, "epoch": 0.6788937968119839, "percentage": 67.89, "elapsed_time": "0:40:06", "remaining_time": "0:18:58", "throughput": 13877.07, "total_tokens": 33393280}
|
|
{"current_steps": 10610, "total_steps": 15621, "loss": 0.3746, "lr": 5.643625259814922e-07, "epoch": 0.6792138787529608, "percentage": 67.92, "elapsed_time": "0:40:07", "remaining_time": "0:18:56", "throughput": 13880.37, "total_tokens": 33410112}
|
|
{"current_steps": 10615, "total_steps": 15621, "loss": 0.3664, "lr": 5.633570301362953e-07, "epoch": 0.6795339606939377, "percentage": 67.95, "elapsed_time": "0:40:07", "remaining_time": "0:18:55", "throughput": 13883.58, "total_tokens": 33426624}
|
|
{"current_steps": 10620, "total_steps": 15621, "loss": 0.36, "lr": 5.623520794449739e-07, "epoch": 0.6798540426349146, "percentage": 67.99, "elapsed_time": "0:40:08", "remaining_time": "0:18:54", "throughput": 13886.57, "total_tokens": 33442240}
|
|
{"current_steps": 10625, "total_steps": 15621, "loss": 0.4713, "lr": 5.613476751622195e-07, "epoch": 0.6801741245758914, "percentage": 68.02, "elapsed_time": "0:40:08", "remaining_time": "0:18:52", "throughput": 13889.67, "total_tokens": 33458432}
|
|
{"current_steps": 10630, "total_steps": 15621, "loss": 0.4368, "lr": 5.603438185420426e-07, "epoch": 0.6804942065168683, "percentage": 68.05, "elapsed_time": "0:40:09", "remaining_time": "0:18:51", "throughput": 13892.61, "total_tokens": 33473856}
|
|
{"current_steps": 10635, "total_steps": 15621, "loss": 0.4714, "lr": 5.593405108377714e-07, "epoch": 0.6808142884578452, "percentage": 68.08, "elapsed_time": "0:40:10", "remaining_time": "0:18:49", "throughput": 13895.52, "total_tokens": 33489216}
|
|
{"current_steps": 10640, "total_steps": 15621, "loss": 0.4586, "lr": 5.583377533020457e-07, "epoch": 0.6811343703988221, "percentage": 68.11, "elapsed_time": "0:40:10", "remaining_time": "0:18:48", "throughput": 13898.6, "total_tokens": 33505280}
|
|
{"current_steps": 10645, "total_steps": 15621, "loss": 0.2834, "lr": 5.573355471868201e-07, "epoch": 0.681454452339799, "percentage": 68.15, "elapsed_time": "0:40:11", "remaining_time": "0:18:47", "throughput": 13901.47, "total_tokens": 33520512}
|
|
{"current_steps": 10650, "total_steps": 15621, "loss": 0.3532, "lr": 5.563338937433621e-07, "epoch": 0.6817745342807758, "percentage": 68.18, "elapsed_time": "0:40:11", "remaining_time": "0:18:45", "throughput": 13904.72, "total_tokens": 33537344}
|
|
{"current_steps": 10655, "total_steps": 15621, "loss": 0.2438, "lr": 5.553327942222472e-07, "epoch": 0.6820946162217527, "percentage": 68.21, "elapsed_time": "0:40:12", "remaining_time": "0:18:44", "throughput": 13907.48, "total_tokens": 33552128}
|
|
{"current_steps": 10660, "total_steps": 15621, "loss": 0.3547, "lr": 5.54332249873359e-07, "epoch": 0.6824146981627297, "percentage": 68.24, "elapsed_time": "0:40:13", "remaining_time": "0:18:43", "throughput": 13910.24, "total_tokens": 33566784}
|
|
{"current_steps": 10665, "total_steps": 15621, "loss": 0.3052, "lr": 5.533322619458896e-07, "epoch": 0.6827347801037066, "percentage": 68.27, "elapsed_time": "0:40:13", "remaining_time": "0:18:41", "throughput": 13913.14, "total_tokens": 33582080}
|
|
{"current_steps": 10670, "total_steps": 15621, "loss": 0.4079, "lr": 5.52332831688336e-07, "epoch": 0.6830548620446835, "percentage": 68.31, "elapsed_time": "0:40:14", "remaining_time": "0:18:40", "throughput": 13915.9, "total_tokens": 33596864}
|
|
{"current_steps": 10675, "total_steps": 15621, "loss": 0.3454, "lr": 5.513339603484981e-07, "epoch": 0.6833749439856603, "percentage": 68.34, "elapsed_time": "0:40:14", "remaining_time": "0:18:38", "throughput": 13918.97, "total_tokens": 33613056}
|
|
{"current_steps": 10680, "total_steps": 15621, "loss": 0.5049, "lr": 5.503356491734785e-07, "epoch": 0.6836950259266372, "percentage": 68.37, "elapsed_time": "0:40:15", "remaining_time": "0:18:37", "throughput": 13921.8, "total_tokens": 33628160}
|
|
{"current_steps": 10685, "total_steps": 15621, "loss": 0.4346, "lr": 5.493378994096806e-07, "epoch": 0.6840151078676141, "percentage": 68.4, "elapsed_time": "0:40:16", "remaining_time": "0:18:36", "throughput": 13925.12, "total_tokens": 33645184}
|
|
{"current_steps": 10690, "total_steps": 15621, "loss": 0.3909, "lr": 5.483407123028067e-07, "epoch": 0.684335189808591, "percentage": 68.43, "elapsed_time": "0:40:16", "remaining_time": "0:18:34", "throughput": 13928.05, "total_tokens": 33660800}
|
|
{"current_steps": 10695, "total_steps": 15621, "loss": 0.4766, "lr": 5.473440890978566e-07, "epoch": 0.6846552717495679, "percentage": 68.47, "elapsed_time": "0:40:17", "remaining_time": "0:18:33", "throughput": 13931.1, "total_tokens": 33676736}
|
|
{"current_steps": 10700, "total_steps": 15621, "loss": 0.4079, "lr": 5.463480310391261e-07, "epoch": 0.6849753536905447, "percentage": 68.5, "elapsed_time": "0:40:18", "remaining_time": "0:18:32", "throughput": 13934.16, "total_tokens": 33692928}
|
|
{"current_steps": 10705, "total_steps": 15621, "loss": 0.3839, "lr": 5.453525393702052e-07, "epoch": 0.6852954356315216, "percentage": 68.53, "elapsed_time": "0:40:18", "remaining_time": "0:18:30", "throughput": 13937.07, "total_tokens": 33708352}
|
|
{"current_steps": 10710, "total_steps": 15621, "loss": 0.3644, "lr": 5.443576153339771e-07, "epoch": 0.6856155175724986, "percentage": 68.56, "elapsed_time": "0:40:19", "remaining_time": "0:18:29", "throughput": 13940.03, "total_tokens": 33723968}
|
|
{"current_steps": 10715, "total_steps": 15621, "loss": 0.3272, "lr": 5.433632601726159e-07, "epoch": 0.6859355995134755, "percentage": 68.59, "elapsed_time": "0:40:19", "remaining_time": "0:18:27", "throughput": 13942.88, "total_tokens": 33739200}
|
|
{"current_steps": 10720, "total_steps": 15621, "loss": 0.3404, "lr": 5.42369475127586e-07, "epoch": 0.6862556814544524, "percentage": 68.63, "elapsed_time": "0:40:20", "remaining_time": "0:18:26", "throughput": 13945.85, "total_tokens": 33754944}
|
|
{"current_steps": 10725, "total_steps": 15621, "loss": 0.4709, "lr": 5.413762614396396e-07, "epoch": 0.6865757633954293, "percentage": 68.66, "elapsed_time": "0:40:21", "remaining_time": "0:18:25", "throughput": 13948.53, "total_tokens": 33769472}
|
|
{"current_steps": 10730, "total_steps": 15621, "loss": 0.4262, "lr": 5.403836203488157e-07, "epoch": 0.6868958453364061, "percentage": 68.69, "elapsed_time": "0:40:21", "remaining_time": "0:18:23", "throughput": 13951.41, "total_tokens": 33784896}
|
|
{"current_steps": 10735, "total_steps": 15621, "loss": 0.3638, "lr": 5.393915530944382e-07, "epoch": 0.687215927277383, "percentage": 68.72, "elapsed_time": "0:40:22", "remaining_time": "0:18:22", "throughput": 13954.27, "total_tokens": 33800320}
|
|
{"current_steps": 10740, "total_steps": 15621, "loss": 0.3765, "lr": 5.384000609151145e-07, "epoch": 0.6875360092183599, "percentage": 68.75, "elapsed_time": "0:40:22", "remaining_time": "0:18:21", "throughput": 13957.46, "total_tokens": 33816896}
|
|
{"current_steps": 10745, "total_steps": 15621, "loss": 0.3763, "lr": 5.374091450487353e-07, "epoch": 0.6878560911593368, "percentage": 68.79, "elapsed_time": "0:40:23", "remaining_time": "0:18:19", "throughput": 13960.59, "total_tokens": 33833344}
|
|
{"current_steps": 10750, "total_steps": 15621, "loss": 0.3352, "lr": 5.364188067324693e-07, "epoch": 0.6881761731003136, "percentage": 68.82, "elapsed_time": "0:40:24", "remaining_time": "0:18:18", "throughput": 13963.75, "total_tokens": 33849856}
|
|
{"current_steps": 10755, "total_steps": 15621, "loss": 0.3441, "lr": 5.354290472027659e-07, "epoch": 0.6884962550412905, "percentage": 68.85, "elapsed_time": "0:40:24", "remaining_time": "0:18:17", "throughput": 13966.66, "total_tokens": 33865344}
|
|
{"current_steps": 10760, "total_steps": 15621, "loss": 0.4955, "lr": 5.344398676953525e-07, "epoch": 0.6888163369822674, "percentage": 68.88, "elapsed_time": "0:40:25", "remaining_time": "0:18:15", "throughput": 13969.85, "total_tokens": 33881792}
|
|
{"current_steps": 10765, "total_steps": 15621, "loss": 0.4902, "lr": 5.334512694452303e-07, "epoch": 0.6891364189232444, "percentage": 68.91, "elapsed_time": "0:40:25", "remaining_time": "0:18:14", "throughput": 13973.03, "total_tokens": 33898368}
|
|
{"current_steps": 10770, "total_steps": 15621, "loss": 0.3489, "lr": 5.324632536866755e-07, "epoch": 0.6894565008642213, "percentage": 68.95, "elapsed_time": "0:40:26", "remaining_time": "0:18:12", "throughput": 13976.05, "total_tokens": 33914368}
|
|
{"current_steps": 10775, "total_steps": 15621, "loss": 0.3526, "lr": 5.314758216532386e-07, "epoch": 0.6897765828051982, "percentage": 68.98, "elapsed_time": "0:40:27", "remaining_time": "0:18:11", "throughput": 13978.92, "total_tokens": 33929728}
|
|
{"current_steps": 10780, "total_steps": 15621, "loss": 0.3743, "lr": 5.304889745777396e-07, "epoch": 0.690096664746175, "percentage": 69.01, "elapsed_time": "0:40:27", "remaining_time": "0:18:10", "throughput": 13981.67, "total_tokens": 33944704}
|
|
{"current_steps": 10785, "total_steps": 15621, "loss": 0.6418, "lr": 5.295027136922678e-07, "epoch": 0.6904167466871519, "percentage": 69.04, "elapsed_time": "0:40:28", "remaining_time": "0:18:08", "throughput": 13984.57, "total_tokens": 33960128}
|
|
{"current_steps": 10790, "total_steps": 15621, "loss": 0.4207, "lr": 5.285170402281827e-07, "epoch": 0.6907368286281288, "percentage": 69.07, "elapsed_time": "0:40:28", "remaining_time": "0:18:07", "throughput": 13987.3, "total_tokens": 33975104}
|
|
{"current_steps": 10795, "total_steps": 15621, "loss": 0.4588, "lr": 5.275319554161087e-07, "epoch": 0.6910569105691057, "percentage": 69.11, "elapsed_time": "0:40:29", "remaining_time": "0:18:06", "throughput": 13990.23, "total_tokens": 33990720}
|
|
{"current_steps": 10800, "total_steps": 15621, "loss": 0.4123, "lr": 5.265474604859356e-07, "epoch": 0.6913769925100826, "percentage": 69.14, "elapsed_time": "0:40:30", "remaining_time": "0:18:04", "throughput": 13993.12, "total_tokens": 34006272}
|
|
{"current_steps": 10805, "total_steps": 15621, "loss": 0.3902, "lr": 5.255635566668171e-07, "epoch": 0.6916970744510594, "percentage": 69.17, "elapsed_time": "0:40:30", "remaining_time": "0:18:03", "throughput": 13996.17, "total_tokens": 34022400}
|
|
{"current_steps": 10810, "total_steps": 15621, "loss": 0.3704, "lr": 5.245802451871686e-07, "epoch": 0.6920171563920363, "percentage": 69.2, "elapsed_time": "0:40:31", "remaining_time": "0:18:02", "throughput": 13999.25, "total_tokens": 34038720}
|
|
{"current_steps": 10815, "total_steps": 15621, "loss": 0.4316, "lr": 5.235975272746663e-07, "epoch": 0.6923372383330133, "percentage": 69.23, "elapsed_time": "0:40:32", "remaining_time": "0:18:00", "throughput": 14002.05, "total_tokens": 34053760}
|
|
{"current_steps": 10820, "total_steps": 15621, "loss": 0.3024, "lr": 5.226154041562442e-07, "epoch": 0.6926573202739902, "percentage": 69.27, "elapsed_time": "0:40:32", "remaining_time": "0:17:59", "throughput": 14005.02, "total_tokens": 34069568}
|
|
{"current_steps": 10825, "total_steps": 15621, "loss": 0.406, "lr": 5.216338770580953e-07, "epoch": 0.6929774022149671, "percentage": 69.3, "elapsed_time": "0:40:33", "remaining_time": "0:17:58", "throughput": 14008.35, "total_tokens": 34086912}
|
|
{"current_steps": 10830, "total_steps": 15621, "loss": 0.3649, "lr": 5.206529472056678e-07, "epoch": 0.6932974841559439, "percentage": 69.33, "elapsed_time": "0:40:33", "remaining_time": "0:17:56", "throughput": 14011.06, "total_tokens": 34101696}
|
|
{"current_steps": 10835, "total_steps": 15621, "loss": 0.3168, "lr": 5.196726158236637e-07, "epoch": 0.6936175660969208, "percentage": 69.36, "elapsed_time": "0:40:34", "remaining_time": "0:17:55", "throughput": 14013.63, "total_tokens": 34115904}
|
|
{"current_steps": 10840, "total_steps": 15621, "loss": 0.3372, "lr": 5.186928841360384e-07, "epoch": 0.6939376480378977, "percentage": 69.39, "elapsed_time": "0:40:35", "remaining_time": "0:17:53", "throughput": 14016.52, "total_tokens": 34131328}
|
|
{"current_steps": 10845, "total_steps": 15621, "loss": 0.4395, "lr": 5.177137533659985e-07, "epoch": 0.6942577299788746, "percentage": 69.43, "elapsed_time": "0:40:35", "remaining_time": "0:17:52", "throughput": 14019.8, "total_tokens": 34148544}
|
|
{"current_steps": 10850, "total_steps": 15621, "loss": 0.4564, "lr": 5.167352247360002e-07, "epoch": 0.6945778119198515, "percentage": 69.46, "elapsed_time": "0:40:36", "remaining_time": "0:17:51", "throughput": 14022.56, "total_tokens": 34163520}
|
|
{"current_steps": 10855, "total_steps": 15621, "loss": 0.3993, "lr": 5.157572994677479e-07, "epoch": 0.6948978938608283, "percentage": 69.49, "elapsed_time": "0:40:36", "remaining_time": "0:17:49", "throughput": 14025.31, "total_tokens": 34178368}
|
|
{"current_steps": 10860, "total_steps": 15621, "loss": 0.4055, "lr": 5.147799787821929e-07, "epoch": 0.6952179758018052, "percentage": 69.52, "elapsed_time": "0:40:37", "remaining_time": "0:17:48", "throughput": 14028.2, "total_tokens": 34193920}
|
|
{"current_steps": 10865, "total_steps": 15621, "loss": 0.485, "lr": 5.138032638995315e-07, "epoch": 0.6955380577427821, "percentage": 69.55, "elapsed_time": "0:40:38", "remaining_time": "0:17:47", "throughput": 14031.27, "total_tokens": 34210176}
|
|
{"current_steps": 10870, "total_steps": 15621, "loss": 0.3575, "lr": 5.128271560392037e-07, "epoch": 0.6958581396837591, "percentage": 69.59, "elapsed_time": "0:40:38", "remaining_time": "0:17:45", "throughput": 14034.55, "total_tokens": 34227328}
|
|
{"current_steps": 10875, "total_steps": 15621, "loss": 0.3901, "lr": 5.118516564198916e-07, "epoch": 0.696178221624736, "percentage": 69.62, "elapsed_time": "0:40:39", "remaining_time": "0:17:44", "throughput": 14037.21, "total_tokens": 34241984}
|
|
{"current_steps": 10880, "total_steps": 15621, "loss": 0.3371, "lr": 5.108767662595175e-07, "epoch": 0.6964983035657129, "percentage": 69.65, "elapsed_time": "0:40:39", "remaining_time": "0:17:43", "throughput": 14039.92, "total_tokens": 34256896}
|
|
{"current_steps": 10885, "total_steps": 15621, "loss": 0.3824, "lr": 5.099024867752446e-07, "epoch": 0.6968183855066897, "percentage": 69.68, "elapsed_time": "0:40:40", "remaining_time": "0:17:41", "throughput": 14043.07, "total_tokens": 34273792}
|
|
{"current_steps": 10890, "total_steps": 15621, "loss": 0.3219, "lr": 5.089288191834709e-07, "epoch": 0.6971384674476666, "percentage": 69.71, "elapsed_time": "0:40:41", "remaining_time": "0:17:40", "throughput": 14046.25, "total_tokens": 34290752}
|
|
{"current_steps": 10895, "total_steps": 15621, "loss": 0.3367, "lr": 5.079557646998318e-07, "epoch": 0.6974585493886435, "percentage": 69.75, "elapsed_time": "0:40:41", "remaining_time": "0:17:39", "throughput": 14049.63, "total_tokens": 34308416}
|
|
{"current_steps": 10900, "total_steps": 15621, "loss": 0.403, "lr": 5.069833245391981e-07, "epoch": 0.6977786313296204, "percentage": 69.78, "elapsed_time": "0:40:42", "remaining_time": "0:17:37", "throughput": 14052.44, "total_tokens": 34323776}
|
|
{"current_steps": 10905, "total_steps": 15621, "loss": 0.322, "lr": 5.060114999156728e-07, "epoch": 0.6980987132705972, "percentage": 69.81, "elapsed_time": "0:40:43", "remaining_time": "0:17:36", "throughput": 14055.21, "total_tokens": 34338944}
|
|
{"current_steps": 10910, "total_steps": 15621, "loss": 0.3462, "lr": 5.050402920425895e-07, "epoch": 0.6984187952115741, "percentage": 69.84, "elapsed_time": "0:40:43", "remaining_time": "0:17:35", "throughput": 14058.07, "total_tokens": 34354432}
|
|
{"current_steps": 10915, "total_steps": 15621, "loss": 0.2526, "lr": 5.040697021325128e-07, "epoch": 0.698738877152551, "percentage": 69.87, "elapsed_time": "0:40:44", "remaining_time": "0:17:33", "throughput": 14061.0, "total_tokens": 34370432}
|
|
{"current_steps": 10920, "total_steps": 15621, "loss": 0.437, "lr": 5.030997313972361e-07, "epoch": 0.699058959093528, "percentage": 69.91, "elapsed_time": "0:40:45", "remaining_time": "0:17:32", "throughput": 14063.95, "total_tokens": 34386496}
|
|
{"current_steps": 10925, "total_steps": 15621, "loss": 0.368, "lr": 5.021303810477795e-07, "epoch": 0.6993790410345049, "percentage": 69.94, "elapsed_time": "0:40:45", "remaining_time": "0:17:31", "throughput": 14066.97, "total_tokens": 34402560}
|
|
{"current_steps": 10930, "total_steps": 15621, "loss": 0.2859, "lr": 5.011616522943869e-07, "epoch": 0.6996991229754818, "percentage": 69.97, "elapsed_time": "0:40:46", "remaining_time": "0:17:29", "throughput": 14069.91, "total_tokens": 34418496}
|
|
{"current_steps": 10935, "total_steps": 15621, "loss": 0.2731, "lr": 5.001935463465289e-07, "epoch": 0.7000192049164586, "percentage": 70.0, "elapsed_time": "0:40:46", "remaining_time": "0:17:28", "throughput": 14072.9, "total_tokens": 34434752}
|
|
{"current_steps": 10940, "total_steps": 15621, "loss": 0.3965, "lr": 4.99226064412897e-07, "epoch": 0.7003392868574355, "percentage": 70.03, "elapsed_time": "0:40:47", "remaining_time": "0:17:27", "throughput": 14075.73, "total_tokens": 34450176}
|
|
{"current_steps": 10945, "total_steps": 15621, "loss": 0.4233, "lr": 4.982592077014026e-07, "epoch": 0.7006593687984124, "percentage": 70.07, "elapsed_time": "0:40:48", "remaining_time": "0:17:25", "throughput": 14078.55, "total_tokens": 34465600}
|
|
{"current_steps": 10948, "total_steps": 15621, "eval_loss": 0.37222641706466675, "epoch": 0.7008514179629985, "percentage": 70.09, "elapsed_time": "0:41:37", "remaining_time": "0:17:46", "throughput": 13803.0, "total_tokens": 34475136}
|
|
{"current_steps": 10950, "total_steps": 15621, "loss": 0.3026, "lr": 4.97292977419179e-07, "epoch": 0.7009794507393893, "percentage": 70.1, "elapsed_time": "0:42:00", "remaining_time": "0:17:55", "throughput": 13678.89, "total_tokens": 34481600}
|
|
{"current_steps": 10955, "total_steps": 15621, "loss": 0.2954, "lr": 4.963273747725755e-07, "epoch": 0.7012995326803662, "percentage": 70.13, "elapsed_time": "0:42:01", "remaining_time": "0:17:53", "throughput": 13682.17, "total_tokens": 34498752}
|
|
{"current_steps": 10960, "total_steps": 15621, "loss": 0.4061, "lr": 4.953624009671582e-07, "epoch": 0.701619614621343, "percentage": 70.16, "elapsed_time": "0:42:02", "remaining_time": "0:17:52", "throughput": 13685.05, "total_tokens": 34514240}
|
|
{"current_steps": 10965, "total_steps": 15621, "loss": 0.4161, "lr": 4.943980572077086e-07, "epoch": 0.7019396965623199, "percentage": 70.19, "elapsed_time": "0:42:02", "remaining_time": "0:17:51", "throughput": 13687.67, "total_tokens": 34528704}
|
|
{"current_steps": 10970, "total_steps": 15621, "loss": 0.3243, "lr": 4.934343446982209e-07, "epoch": 0.7022597785032968, "percentage": 70.23, "elapsed_time": "0:42:03", "remaining_time": "0:17:49", "throughput": 13690.67, "total_tokens": 34544704}
|
|
{"current_steps": 10975, "total_steps": 15621, "loss": 0.3698, "lr": 4.924712646419016e-07, "epoch": 0.7025798604442738, "percentage": 70.26, "elapsed_time": "0:42:03", "remaining_time": "0:17:48", "throughput": 13693.45, "total_tokens": 34560000}
|
|
{"current_steps": 10980, "total_steps": 15621, "loss": 0.3211, "lr": 4.915088182411674e-07, "epoch": 0.7028999423852507, "percentage": 70.29, "elapsed_time": "0:42:04", "remaining_time": "0:17:47", "throughput": 13696.28, "total_tokens": 34575296}
|
|
{"current_steps": 10985, "total_steps": 15621, "loss": 0.3715, "lr": 4.905470066976439e-07, "epoch": 0.7032200243262275, "percentage": 70.32, "elapsed_time": "0:42:05", "remaining_time": "0:17:45", "throughput": 13699.05, "total_tokens": 34590528}
|
|
{"current_steps": 10990, "total_steps": 15621, "loss": 0.4187, "lr": 4.895858312121644e-07, "epoch": 0.7035401062672044, "percentage": 70.35, "elapsed_time": "0:42:05", "remaining_time": "0:17:44", "throughput": 13701.75, "total_tokens": 34605312}
|
|
{"current_steps": 10995, "total_steps": 15621, "loss": 0.4337, "lr": 4.886252929847674e-07, "epoch": 0.7038601882081813, "percentage": 70.39, "elapsed_time": "0:42:06", "remaining_time": "0:17:42", "throughput": 13704.59, "total_tokens": 34620736}
|
|
{"current_steps": 11000, "total_steps": 15621, "loss": 0.4578, "lr": 4.876653932146963e-07, "epoch": 0.7041802701491582, "percentage": 70.42, "elapsed_time": "0:42:06", "remaining_time": "0:17:41", "throughput": 13707.55, "total_tokens": 34636736}
|
|
{"current_steps": 11005, "total_steps": 15621, "loss": 0.3782, "lr": 4.86706133100397e-07, "epoch": 0.7045003520901351, "percentage": 70.45, "elapsed_time": "0:42:07", "remaining_time": "0:17:40", "throughput": 13710.29, "total_tokens": 34651776}
|
|
{"current_steps": 11010, "total_steps": 15621, "loss": 0.2923, "lr": 4.857475138395178e-07, "epoch": 0.7048204340311119, "percentage": 70.48, "elapsed_time": "0:42:07", "remaining_time": "0:17:38", "throughput": 13712.89, "total_tokens": 34666176}
|
|
{"current_steps": 11015, "total_steps": 15621, "loss": 0.2529, "lr": 4.847895366289054e-07, "epoch": 0.7051405159720888, "percentage": 70.51, "elapsed_time": "0:42:08", "remaining_time": "0:17:37", "throughput": 13715.88, "total_tokens": 34682112}
|
|
{"current_steps": 11020, "total_steps": 15621, "loss": 0.3828, "lr": 4.838322026646057e-07, "epoch": 0.7054605979130657, "percentage": 70.55, "elapsed_time": "0:42:09", "remaining_time": "0:17:35", "throughput": 13718.58, "total_tokens": 34697024}
|
|
{"current_steps": 11025, "total_steps": 15621, "loss": 0.3577, "lr": 4.82875513141861e-07, "epoch": 0.7057806798540426, "percentage": 70.58, "elapsed_time": "0:42:09", "remaining_time": "0:17:34", "throughput": 13721.46, "total_tokens": 34712704}
|
|
{"current_steps": 11030, "total_steps": 15621, "loss": 0.3791, "lr": 4.819194692551106e-07, "epoch": 0.7061007617950196, "percentage": 70.61, "elapsed_time": "0:42:10", "remaining_time": "0:17:33", "throughput": 13724.3, "total_tokens": 34728256}
|
|
{"current_steps": 11035, "total_steps": 15621, "loss": 0.4268, "lr": 4.809640721979855e-07, "epoch": 0.7064208437359965, "percentage": 70.64, "elapsed_time": "0:42:11", "remaining_time": "0:17:31", "throughput": 13727.31, "total_tokens": 34744512}
|
|
{"current_steps": 11040, "total_steps": 15621, "loss": 0.4158, "lr": 4.8000932316331e-07, "epoch": 0.7067409256769733, "percentage": 70.67, "elapsed_time": "0:42:11", "remaining_time": "0:17:30", "throughput": 13729.91, "total_tokens": 34758912}
|
|
{"current_steps": 11045, "total_steps": 15621, "loss": 0.4037, "lr": 4.790552233431002e-07, "epoch": 0.7070610076179502, "percentage": 70.71, "elapsed_time": "0:42:12", "remaining_time": "0:17:29", "throughput": 13732.87, "total_tokens": 34774848}
|
|
{"current_steps": 11050, "total_steps": 15621, "loss": 0.4168, "lr": 4.781017739285611e-07, "epoch": 0.7073810895589271, "percentage": 70.74, "elapsed_time": "0:42:12", "remaining_time": "0:17:27", "throughput": 13735.65, "total_tokens": 34790016}
|
|
{"current_steps": 11055, "total_steps": 15621, "loss": 0.3453, "lr": 4.771489761100842e-07, "epoch": 0.707701171499904, "percentage": 70.77, "elapsed_time": "0:42:13", "remaining_time": "0:17:26", "throughput": 13738.37, "total_tokens": 34804992}
|
|
{"current_steps": 11060, "total_steps": 15621, "loss": 0.2687, "lr": 4.761968310772501e-07, "epoch": 0.7080212534408808, "percentage": 70.8, "elapsed_time": "0:42:14", "remaining_time": "0:17:24", "throughput": 13741.17, "total_tokens": 34820288}
|
|
{"current_steps": 11065, "total_steps": 15621, "loss": 0.2718, "lr": 4.7524534001882267e-07, "epoch": 0.7083413353818577, "percentage": 70.83, "elapsed_time": "0:42:14", "remaining_time": "0:17:23", "throughput": 13744.07, "total_tokens": 34836096}
|
|
{"current_steps": 11070, "total_steps": 15621, "loss": 0.394, "lr": 4.7429450412274897e-07, "epoch": 0.7086614173228346, "percentage": 70.87, "elapsed_time": "0:42:15", "remaining_time": "0:17:22", "throughput": 13746.89, "total_tokens": 34851584}
|
|
{"current_steps": 11075, "total_steps": 15621, "loss": 0.3458, "lr": 4.733443245761596e-07, "epoch": 0.7089814992638115, "percentage": 70.9, "elapsed_time": "0:42:15", "remaining_time": "0:17:20", "throughput": 13749.94, "total_tokens": 34868032}
|
|
{"current_steps": 11080, "total_steps": 15621, "loss": 0.3821, "lr": 4.723948025653646e-07, "epoch": 0.7093015812047885, "percentage": 70.93, "elapsed_time": "0:42:16", "remaining_time": "0:17:19", "throughput": 13752.88, "total_tokens": 34884032}
|
|
{"current_steps": 11085, "total_steps": 15621, "loss": 0.3254, "lr": 4.714459392758534e-07, "epoch": 0.7096216631457654, "percentage": 70.96, "elapsed_time": "0:42:17", "remaining_time": "0:17:18", "throughput": 13755.67, "total_tokens": 34899456}
|
|
{"current_steps": 11090, "total_steps": 15621, "loss": 0.3735, "lr": 4.70497735892293e-07, "epoch": 0.7099417450867422, "percentage": 70.99, "elapsed_time": "0:42:17", "remaining_time": "0:17:16", "throughput": 13758.62, "total_tokens": 34915456}
|
|
{"current_steps": 11095, "total_steps": 15621, "loss": 0.3331, "lr": 4.695501935985263e-07, "epoch": 0.7102618270277191, "percentage": 71.03, "elapsed_time": "0:42:18", "remaining_time": "0:17:15", "throughput": 13761.53, "total_tokens": 34931328}
|
|
{"current_steps": 11100, "total_steps": 15621, "loss": 0.3999, "lr": 4.686033135775711e-07, "epoch": 0.710581908968696, "percentage": 71.06, "elapsed_time": "0:42:18", "remaining_time": "0:17:14", "throughput": 13764.31, "total_tokens": 34946816}
|
|
{"current_steps": 11105, "total_steps": 15621, "loss": 0.3245, "lr": 4.6765709701161817e-07, "epoch": 0.7109019909096729, "percentage": 71.09, "elapsed_time": "0:42:19", "remaining_time": "0:17:12", "throughput": 13767.66, "total_tokens": 34964544}
|
|
{"current_steps": 11110, "total_steps": 15621, "loss": 0.3996, "lr": 4.6671154508203003e-07, "epoch": 0.7112220728506498, "percentage": 71.12, "elapsed_time": "0:42:20", "remaining_time": "0:17:11", "throughput": 13770.97, "total_tokens": 34982208}
|
|
{"current_steps": 11115, "total_steps": 15621, "loss": 0.3439, "lr": 4.657666589693393e-07, "epoch": 0.7115421547916266, "percentage": 71.15, "elapsed_time": "0:42:20", "remaining_time": "0:17:10", "throughput": 13774.47, "total_tokens": 35000576}
|
|
{"current_steps": 11120, "total_steps": 15621, "loss": 0.3145, "lr": 4.6482243985324753e-07, "epoch": 0.7118622367326035, "percentage": 71.19, "elapsed_time": "0:42:21", "remaining_time": "0:17:08", "throughput": 13777.0, "total_tokens": 35014912}
|
|
{"current_steps": 11125, "total_steps": 15621, "loss": 0.2914, "lr": 4.638788889126232e-07, "epoch": 0.7121823186735804, "percentage": 71.22, "elapsed_time": "0:42:22", "remaining_time": "0:17:07", "throughput": 13779.62, "total_tokens": 35029632}
|
|
{"current_steps": 11130, "total_steps": 15621, "loss": 0.3239, "lr": 4.6293600732550085e-07, "epoch": 0.7125024006145573, "percentage": 71.25, "elapsed_time": "0:42:22", "remaining_time": "0:17:06", "throughput": 13782.38, "total_tokens": 35044992}
|
|
{"current_steps": 11135, "total_steps": 15621, "loss": 0.4686, "lr": 4.619937962690792e-07, "epoch": 0.7128224825555343, "percentage": 71.28, "elapsed_time": "0:42:23", "remaining_time": "0:17:04", "throughput": 13785.17, "total_tokens": 35060544}
|
|
{"current_steps": 11140, "total_steps": 15621, "loss": 0.5105, "lr": 4.610522569197197e-07, "epoch": 0.7131425644965111, "percentage": 71.31, "elapsed_time": "0:42:23", "remaining_time": "0:17:03", "throughput": 13787.86, "total_tokens": 35075648}
|
|
{"current_steps": 11145, "total_steps": 15621, "loss": 0.3294, "lr": 4.6011139045294554e-07, "epoch": 0.713462646437488, "percentage": 71.35, "elapsed_time": "0:42:24", "remaining_time": "0:17:01", "throughput": 13790.59, "total_tokens": 35090880}
|
|
{"current_steps": 11150, "total_steps": 15621, "loss": 0.3904, "lr": 4.59171198043439e-07, "epoch": 0.7137827283784649, "percentage": 71.38, "elapsed_time": "0:42:25", "remaining_time": "0:17:00", "throughput": 13793.39, "total_tokens": 35106432}
|
|
{"current_steps": 11155, "total_steps": 15621, "loss": 0.4349, "lr": 4.582316808650424e-07, "epoch": 0.7141028103194418, "percentage": 71.41, "elapsed_time": "0:42:25", "remaining_time": "0:16:59", "throughput": 13796.14, "total_tokens": 35121664}
|
|
{"current_steps": 11160, "total_steps": 15621, "loss": 0.491, "lr": 4.572928400907529e-07, "epoch": 0.7144228922604187, "percentage": 71.44, "elapsed_time": "0:42:26", "remaining_time": "0:16:57", "throughput": 13798.91, "total_tokens": 35137152}
|
|
{"current_steps": 11165, "total_steps": 15621, "loss": 0.3682, "lr": 4.5635467689272434e-07, "epoch": 0.7147429742013955, "percentage": 71.47, "elapsed_time": "0:42:26", "remaining_time": "0:16:56", "throughput": 13801.8, "total_tokens": 35153088}
|
|
{"current_steps": 11170, "total_steps": 15621, "loss": 0.3654, "lr": 4.554171924422655e-07, "epoch": 0.7150630561423724, "percentage": 71.51, "elapsed_time": "0:42:27", "remaining_time": "0:16:55", "throughput": 13804.49, "total_tokens": 35168192}
|
|
{"current_steps": 11175, "total_steps": 15621, "loss": 0.3242, "lr": 4.544803879098356e-07, "epoch": 0.7153831380833493, "percentage": 71.54, "elapsed_time": "0:42:28", "remaining_time": "0:16:53", "throughput": 13807.4, "total_tokens": 35184192}
|
|
{"current_steps": 11180, "total_steps": 15621, "loss": 0.3848, "lr": 4.535442644650462e-07, "epoch": 0.7157032200243262, "percentage": 71.57, "elapsed_time": "0:42:28", "remaining_time": "0:16:52", "throughput": 13810.33, "total_tokens": 35200256}
|
|
{"current_steps": 11185, "total_steps": 15621, "loss": 0.4889, "lr": 4.5260882327665906e-07, "epoch": 0.7160233019653032, "percentage": 71.6, "elapsed_time": "0:42:29", "remaining_time": "0:16:51", "throughput": 13812.9, "total_tokens": 35214720}
|
|
{"current_steps": 11190, "total_steps": 15621, "loss": 0.5077, "lr": 4.5167406551258347e-07, "epoch": 0.71634338390628, "percentage": 71.63, "elapsed_time": "0:42:30", "remaining_time": "0:16:49", "throughput": 13815.77, "total_tokens": 35230720}
|
|
{"current_steps": 11195, "total_steps": 15621, "loss": 0.3948, "lr": 4.5073999233987445e-07, "epoch": 0.7166634658472569, "percentage": 71.67, "elapsed_time": "0:42:30", "remaining_time": "0:16:48", "throughput": 13818.6, "total_tokens": 35246400}
|
|
{"current_steps": 11200, "total_steps": 15621, "loss": 0.4854, "lr": 4.4980660492473434e-07, "epoch": 0.7169835477882338, "percentage": 71.7, "elapsed_time": "0:42:31", "remaining_time": "0:16:47", "throughput": 13821.59, "total_tokens": 35262784}
|
|
{"current_steps": 11205, "total_steps": 15621, "loss": 0.2735, "lr": 4.4887390443250804e-07, "epoch": 0.7173036297292107, "percentage": 71.73, "elapsed_time": "0:42:31", "remaining_time": "0:16:45", "throughput": 13824.25, "total_tokens": 35277632}
|
|
{"current_steps": 11210, "total_steps": 15621, "loss": 0.2981, "lr": 4.4794189202768295e-07, "epoch": 0.7176237116701876, "percentage": 71.76, "elapsed_time": "0:42:32", "remaining_time": "0:16:44", "throughput": 13826.9, "total_tokens": 35292544}
|
|
{"current_steps": 11215, "total_steps": 15621, "loss": 0.3816, "lr": 4.4701056887388757e-07, "epoch": 0.7179437936111644, "percentage": 71.79, "elapsed_time": "0:42:33", "remaining_time": "0:16:43", "throughput": 13829.77, "total_tokens": 35308352}
|
|
{"current_steps": 11220, "total_steps": 15621, "loss": 0.3307, "lr": 4.460799361338897e-07, "epoch": 0.7182638755521413, "percentage": 71.83, "elapsed_time": "0:42:33", "remaining_time": "0:16:41", "throughput": 13832.59, "total_tokens": 35323904}
|
|
{"current_steps": 11225, "total_steps": 15621, "loss": 0.4203, "lr": 4.451499949695954e-07, "epoch": 0.7185839574931182, "percentage": 71.86, "elapsed_time": "0:42:34", "remaining_time": "0:16:40", "throughput": 13835.54, "total_tokens": 35340224}
|
|
{"current_steps": 11230, "total_steps": 15621, "loss": 0.375, "lr": 4.44220746542047e-07, "epoch": 0.7189040394340951, "percentage": 71.89, "elapsed_time": "0:42:34", "remaining_time": "0:16:38", "throughput": 13838.32, "total_tokens": 35355776}
|
|
{"current_steps": 11235, "total_steps": 15621, "loss": 0.474, "lr": 4.432921920114221e-07, "epoch": 0.719224121375072, "percentage": 71.92, "elapsed_time": "0:42:35", "remaining_time": "0:16:37", "throughput": 13841.04, "total_tokens": 35371072}
|
|
{"current_steps": 11240, "total_steps": 15621, "loss": 0.3144, "lr": 4.4236433253703185e-07, "epoch": 0.719544203316049, "percentage": 71.95, "elapsed_time": "0:42:36", "remaining_time": "0:16:36", "throughput": 13844.01, "total_tokens": 35387520}
|
|
{"current_steps": 11245, "total_steps": 15621, "loss": 0.4042, "lr": 4.4143716927732e-07, "epoch": 0.7198642852570258, "percentage": 71.99, "elapsed_time": "0:42:36", "remaining_time": "0:16:34", "throughput": 13846.98, "total_tokens": 35403840}
|
|
{"current_steps": 11250, "total_steps": 15621, "loss": 0.3767, "lr": 4.405107033898604e-07, "epoch": 0.7201843671980027, "percentage": 72.02, "elapsed_time": "0:42:37", "remaining_time": "0:16:33", "throughput": 13849.9, "total_tokens": 35420032}
|
|
{"current_steps": 11255, "total_steps": 15621, "loss": 0.2887, "lr": 4.395849360313568e-07, "epoch": 0.7205044491389796, "percentage": 72.05, "elapsed_time": "0:42:38", "remaining_time": "0:16:32", "throughput": 13852.8, "total_tokens": 35436032}
|
|
{"current_steps": 11260, "total_steps": 15621, "loss": 0.3505, "lr": 4.386598683576406e-07, "epoch": 0.7208245310799565, "percentage": 72.08, "elapsed_time": "0:42:38", "remaining_time": "0:16:30", "throughput": 13855.45, "total_tokens": 35451136}
|
|
{"current_steps": 11265, "total_steps": 15621, "loss": 0.4744, "lr": 4.377355015236696e-07, "epoch": 0.7211446130209334, "percentage": 72.11, "elapsed_time": "0:42:39", "remaining_time": "0:16:29", "throughput": 13858.25, "total_tokens": 35466816}
|
|
{"current_steps": 11270, "total_steps": 15621, "loss": 0.3588, "lr": 4.368118366835266e-07, "epoch": 0.7214646949619102, "percentage": 72.15, "elapsed_time": "0:42:39", "remaining_time": "0:16:28", "throughput": 13861.3, "total_tokens": 35483456}
|
|
{"current_steps": 11275, "total_steps": 15621, "loss": 0.4691, "lr": 4.358888749904177e-07, "epoch": 0.7217847769028871, "percentage": 72.18, "elapsed_time": "0:42:40", "remaining_time": "0:16:26", "throughput": 13864.24, "total_tokens": 35499584}
|
|
{"current_steps": 11280, "total_steps": 15621, "loss": 0.3521, "lr": 4.349666175966725e-07, "epoch": 0.722104858843864, "percentage": 72.21, "elapsed_time": "0:42:41", "remaining_time": "0:16:25", "throughput": 13867.05, "total_tokens": 35515328}
|
|
{"current_steps": 11285, "total_steps": 15621, "loss": 0.4721, "lr": 4.340450656537392e-07, "epoch": 0.7224249407848409, "percentage": 72.24, "elapsed_time": "0:42:41", "remaining_time": "0:16:24", "throughput": 13869.61, "total_tokens": 35530048}
|
|
{"current_steps": 11290, "total_steps": 15621, "loss": 0.2995, "lr": 4.331242203121861e-07, "epoch": 0.7227450227258178, "percentage": 72.27, "elapsed_time": "0:42:42", "remaining_time": "0:16:22", "throughput": 13872.46, "total_tokens": 35545792}
|
|
{"current_steps": 11295, "total_steps": 15621, "loss": 0.3775, "lr": 4.322040827217004e-07, "epoch": 0.7230651046667947, "percentage": 72.31, "elapsed_time": "0:42:42", "remaining_time": "0:16:21", "throughput": 13875.27, "total_tokens": 35561344}
|
|
{"current_steps": 11300, "total_steps": 15621, "loss": 0.4064, "lr": 4.312846540310838e-07, "epoch": 0.7233851866077716, "percentage": 72.34, "elapsed_time": "0:42:43", "remaining_time": "0:16:20", "throughput": 13878.09, "total_tokens": 35577024}
|
|
{"current_steps": 11305, "total_steps": 15621, "loss": 0.3527, "lr": 4.3036593538825373e-07, "epoch": 0.7237052685487485, "percentage": 72.37, "elapsed_time": "0:42:44", "remaining_time": "0:16:18", "throughput": 13880.76, "total_tokens": 35592192}
|
|
{"current_steps": 11310, "total_steps": 15621, "loss": 0.3375, "lr": 4.2944792794024196e-07, "epoch": 0.7240253504897254, "percentage": 72.4, "elapsed_time": "0:42:44", "remaining_time": "0:16:17", "throughput": 13883.61, "total_tokens": 35607872}
|
|
{"current_steps": 11315, "total_steps": 15621, "loss": 0.3015, "lr": 4.285306328331915e-07, "epoch": 0.7243454324307023, "percentage": 72.43, "elapsed_time": "0:42:45", "remaining_time": "0:16:16", "throughput": 13886.5, "total_tokens": 35623872}
|
|
{"current_steps": 11320, "total_steps": 15621, "loss": 0.3168, "lr": 4.2761405121235506e-07, "epoch": 0.7246655143716791, "percentage": 72.47, "elapsed_time": "0:42:45", "remaining_time": "0:16:14", "throughput": 13889.07, "total_tokens": 35638720}
|
|
{"current_steps": 11325, "total_steps": 15621, "loss": 0.538, "lr": 4.266981842220965e-07, "epoch": 0.724985596312656, "percentage": 72.5, "elapsed_time": "0:42:46", "remaining_time": "0:16:13", "throughput": 13892.2, "total_tokens": 35655680}
|
|
{"current_steps": 11330, "total_steps": 15621, "loss": 0.2663, "lr": 4.257830330058864e-07, "epoch": 0.7253056782536329, "percentage": 72.53, "elapsed_time": "0:42:47", "remaining_time": "0:16:12", "throughput": 13894.95, "total_tokens": 35671168}
|
|
{"current_steps": 11335, "total_steps": 15621, "loss": 0.4085, "lr": 4.248685987063019e-07, "epoch": 0.7256257601946098, "percentage": 72.56, "elapsed_time": "0:42:47", "remaining_time": "0:16:10", "throughput": 13897.77, "total_tokens": 35686848}
|
|
{"current_steps": 11340, "total_steps": 15621, "loss": 0.3486, "lr": 4.2395488246502396e-07, "epoch": 0.7259458421355867, "percentage": 72.59, "elapsed_time": "0:42:48", "remaining_time": "0:16:09", "throughput": 13900.63, "total_tokens": 35702720}
|
|
{"current_steps": 11345, "total_steps": 15621, "loss": 0.4532, "lr": 4.2304188542283913e-07, "epoch": 0.7262659240765637, "percentage": 72.63, "elapsed_time": "0:42:49", "remaining_time": "0:16:08", "throughput": 13903.97, "total_tokens": 35720640}
|
|
{"current_steps": 11350, "total_steps": 15621, "loss": 0.3855, "lr": 4.221296087196347e-07, "epoch": 0.7265860060175405, "percentage": 72.66, "elapsed_time": "0:42:49", "remaining_time": "0:16:06", "throughput": 13906.55, "total_tokens": 35735424}
|
|
{"current_steps": 11355, "total_steps": 15621, "loss": 0.46, "lr": 4.2121805349439867e-07, "epoch": 0.7269060879585174, "percentage": 72.69, "elapsed_time": "0:42:50", "remaining_time": "0:16:05", "throughput": 13909.37, "total_tokens": 35751168}
|
|
{"current_steps": 11360, "total_steps": 15621, "loss": 0.3829, "lr": 4.203072208852184e-07, "epoch": 0.7272261698994943, "percentage": 72.72, "elapsed_time": "0:42:50", "remaining_time": "0:16:04", "throughput": 13912.19, "total_tokens": 35767168}
|
|
{"current_steps": 11365, "total_steps": 15621, "loss": 0.447, "lr": 4.193971120292793e-07, "epoch": 0.7275462518404712, "percentage": 72.75, "elapsed_time": "0:42:51", "remaining_time": "0:16:02", "throughput": 13914.89, "total_tokens": 35782464}
|
|
{"current_steps": 11370, "total_steps": 15621, "loss": 0.4004, "lr": 4.184877280628629e-07, "epoch": 0.727866333781448, "percentage": 72.79, "elapsed_time": "0:42:52", "remaining_time": "0:16:01", "throughput": 13917.73, "total_tokens": 35798592}
|
|
{"current_steps": 11375, "total_steps": 15621, "loss": 0.3955, "lr": 4.1757907012134565e-07, "epoch": 0.7281864157224249, "percentage": 72.82, "elapsed_time": "0:42:52", "remaining_time": "0:16:00", "throughput": 13920.64, "total_tokens": 35814720}
|
|
{"current_steps": 11380, "total_steps": 15621, "loss": 0.2807, "lr": 4.166711393391978e-07, "epoch": 0.7285064976634018, "percentage": 72.85, "elapsed_time": "0:42:53", "remaining_time": "0:15:59", "throughput": 13923.33, "total_tokens": 35830016}
|
|
{"current_steps": 11385, "total_steps": 15621, "loss": 0.3365, "lr": 4.1576393684998146e-07, "epoch": 0.7288265796043787, "percentage": 72.88, "elapsed_time": "0:42:53", "remaining_time": "0:15:57", "throughput": 13926.07, "total_tokens": 35845632}
|
|
{"current_steps": 11390, "total_steps": 15621, "loss": 0.3505, "lr": 4.1485746378634966e-07, "epoch": 0.7291466615453556, "percentage": 72.91, "elapsed_time": "0:42:54", "remaining_time": "0:15:56", "throughput": 13928.84, "total_tokens": 35861184}
|
|
{"current_steps": 11395, "total_steps": 15621, "loss": 0.4186, "lr": 4.1395172128004473e-07, "epoch": 0.7294667434863324, "percentage": 72.95, "elapsed_time": "0:42:55", "remaining_time": "0:15:55", "throughput": 13931.68, "total_tokens": 35876864}
|
|
{"current_steps": 11400, "total_steps": 15621, "loss": 0.3272, "lr": 4.130467104618963e-07, "epoch": 0.7297868254273094, "percentage": 72.98, "elapsed_time": "0:42:55", "remaining_time": "0:15:53", "throughput": 13934.73, "total_tokens": 35893568}
|
|
{"current_steps": 11405, "total_steps": 15621, "loss": 0.3336, "lr": 4.1214243246182223e-07, "epoch": 0.7301069073682863, "percentage": 73.01, "elapsed_time": "0:42:56", "remaining_time": "0:15:52", "throughput": 13937.59, "total_tokens": 35909696}
|
|
{"current_steps": 11410, "total_steps": 15621, "loss": 0.465, "lr": 4.1123888840882306e-07, "epoch": 0.7304269893092632, "percentage": 73.04, "elapsed_time": "0:42:57", "remaining_time": "0:15:51", "throughput": 13940.31, "total_tokens": 35925120}
|
|
{"current_steps": 11415, "total_steps": 15621, "loss": 0.3184, "lr": 4.1033607943098415e-07, "epoch": 0.7307470712502401, "percentage": 73.07, "elapsed_time": "0:42:57", "remaining_time": "0:15:49", "throughput": 13943.11, "total_tokens": 35940800}
|
|
{"current_steps": 11420, "total_steps": 15621, "loss": 0.3461, "lr": 4.0943400665547423e-07, "epoch": 0.731067153191217, "percentage": 73.11, "elapsed_time": "0:42:58", "remaining_time": "0:15:48", "throughput": 13945.8, "total_tokens": 35955968}
|
|
{"current_steps": 11425, "total_steps": 15621, "loss": 0.3261, "lr": 4.0853267120854064e-07, "epoch": 0.7313872351321938, "percentage": 73.14, "elapsed_time": "0:42:58", "remaining_time": "0:15:47", "throughput": 13948.67, "total_tokens": 35972096}
|
|
{"current_steps": 11430, "total_steps": 15621, "loss": 0.3358, "lr": 4.076320742155117e-07, "epoch": 0.7317073170731707, "percentage": 73.17, "elapsed_time": "0:42:59", "remaining_time": "0:15:45", "throughput": 13951.2, "total_tokens": 35986624}
|
|
{"current_steps": 11435, "total_steps": 15621, "loss": 0.3546, "lr": 4.067322168007928e-07, "epoch": 0.7320273990141476, "percentage": 73.2, "elapsed_time": "0:43:00", "remaining_time": "0:15:44", "throughput": 13954.14, "total_tokens": 36003008}
|
|
{"current_steps": 11440, "total_steps": 15621, "loss": 0.3539, "lr": 4.0583310008786775e-07, "epoch": 0.7323474809551245, "percentage": 73.23, "elapsed_time": "0:43:00", "remaining_time": "0:15:43", "throughput": 13956.54, "total_tokens": 36017152}
|
|
{"current_steps": 11445, "total_steps": 15621, "loss": 0.2777, "lr": 4.049347251992932e-07, "epoch": 0.7326675628961014, "percentage": 73.27, "elapsed_time": "0:43:01", "remaining_time": "0:15:41", "throughput": 13959.1, "total_tokens": 36031936}
|
|
{"current_steps": 11450, "total_steps": 15621, "loss": 0.3461, "lr": 4.0403709325670064e-07, "epoch": 0.7329876448370783, "percentage": 73.3, "elapsed_time": "0:43:01", "remaining_time": "0:15:40", "throughput": 13961.98, "total_tokens": 36048064}
|
|
{"current_steps": 11455, "total_steps": 15621, "loss": 0.4433, "lr": 4.03140205380795e-07, "epoch": 0.7333077267780552, "percentage": 73.33, "elapsed_time": "0:43:02", "remaining_time": "0:15:39", "throughput": 13964.88, "total_tokens": 36064256}
|
|
{"current_steps": 11460, "total_steps": 15621, "loss": 0.6545, "lr": 4.0224406269135115e-07, "epoch": 0.7336278087190321, "percentage": 73.36, "elapsed_time": "0:43:03", "remaining_time": "0:15:37", "throughput": 13967.51, "total_tokens": 36079424}
|
|
{"current_steps": 11465, "total_steps": 15621, "loss": 0.3062, "lr": 4.0134866630721266e-07, "epoch": 0.733947890660009, "percentage": 73.39, "elapsed_time": "0:43:03", "remaining_time": "0:15:36", "throughput": 13970.36, "total_tokens": 36095424}
|
|
{"current_steps": 11470, "total_steps": 15621, "loss": 0.3666, "lr": 4.0045401734629367e-07, "epoch": 0.7342679726009859, "percentage": 73.43, "elapsed_time": "0:43:04", "remaining_time": "0:15:35", "throughput": 13973.19, "total_tokens": 36111360}
|
|
{"current_steps": 11475, "total_steps": 15621, "loss": 0.3819, "lr": 3.9956011692557377e-07, "epoch": 0.7345880545419627, "percentage": 73.46, "elapsed_time": "0:43:04", "remaining_time": "0:15:33", "throughput": 13975.98, "total_tokens": 36127232}
|
|
{"current_steps": 11480, "total_steps": 15621, "loss": 0.3447, "lr": 3.986669661610972e-07, "epoch": 0.7349081364829396, "percentage": 73.49, "elapsed_time": "0:43:05", "remaining_time": "0:15:32", "throughput": 13978.77, "total_tokens": 36143168}
|
|
{"current_steps": 11485, "total_steps": 15621, "loss": 0.329, "lr": 3.9777456616797414e-07, "epoch": 0.7352282184239165, "percentage": 73.52, "elapsed_time": "0:43:06", "remaining_time": "0:15:31", "throughput": 13981.44, "total_tokens": 36158272}
|
|
{"current_steps": 11490, "total_steps": 15621, "loss": 0.3544, "lr": 3.968829180603761e-07, "epoch": 0.7355483003648934, "percentage": 73.55, "elapsed_time": "0:43:06", "remaining_time": "0:15:30", "throughput": 13984.0, "total_tokens": 36173056}
|
|
{"current_steps": 11495, "total_steps": 15621, "loss": 0.4025, "lr": 3.9599202295153624e-07, "epoch": 0.7358683823058703, "percentage": 73.59, "elapsed_time": "0:43:07", "remaining_time": "0:15:28", "throughput": 13986.57, "total_tokens": 36187904}
|
|
{"current_steps": 11500, "total_steps": 15621, "loss": 0.3587, "lr": 3.951018819537476e-07, "epoch": 0.7361884642468471, "percentage": 73.62, "elapsed_time": "0:43:08", "remaining_time": "0:15:27", "throughput": 13989.8, "total_tokens": 36205632}
|
|
{"current_steps": 11505, "total_steps": 15621, "loss": 0.3492, "lr": 3.942124961783616e-07, "epoch": 0.7365085461878241, "percentage": 73.65, "elapsed_time": "0:43:08", "remaining_time": "0:15:26", "throughput": 13992.32, "total_tokens": 36220160}
|
|
{"current_steps": 11510, "total_steps": 15621, "loss": 0.3096, "lr": 3.933238667357869e-07, "epoch": 0.736828628128801, "percentage": 73.68, "elapsed_time": "0:43:09", "remaining_time": "0:15:24", "throughput": 13995.24, "total_tokens": 36236416}
|
|
{"current_steps": 11515, "total_steps": 15621, "loss": 0.3546, "lr": 3.924359947354876e-07, "epoch": 0.7371487100697779, "percentage": 73.71, "elapsed_time": "0:43:09", "remaining_time": "0:15:23", "throughput": 13997.86, "total_tokens": 36251584}
|
|
{"current_steps": 11520, "total_steps": 15621, "loss": 0.3261, "lr": 3.915488812859826e-07, "epoch": 0.7374687920107548, "percentage": 73.75, "elapsed_time": "0:43:10", "remaining_time": "0:15:22", "throughput": 14000.27, "total_tokens": 36265856}
|
|
{"current_steps": 11525, "total_steps": 15621, "loss": 0.3797, "lr": 3.90662527494843e-07, "epoch": 0.7377888739517316, "percentage": 73.78, "elapsed_time": "0:43:11", "remaining_time": "0:15:20", "throughput": 14003.58, "total_tokens": 36283904}
|
|
{"current_steps": 11530, "total_steps": 15621, "loss": 0.3638, "lr": 3.8977693446869285e-07, "epoch": 0.7381089558927085, "percentage": 73.81, "elapsed_time": "0:43:11", "remaining_time": "0:15:19", "throughput": 14006.05, "total_tokens": 36298432}
|
|
{"current_steps": 11535, "total_steps": 15621, "loss": 0.3298, "lr": 3.8889210331320445e-07, "epoch": 0.7384290378336854, "percentage": 73.84, "elapsed_time": "0:43:12", "remaining_time": "0:15:18", "throughput": 14008.73, "total_tokens": 36313728}
|
|
{"current_steps": 11540, "total_steps": 15621, "loss": 0.3795, "lr": 3.8800803513310033e-07, "epoch": 0.7387491197746623, "percentage": 73.87, "elapsed_time": "0:43:12", "remaining_time": "0:15:16", "throughput": 14011.4, "total_tokens": 36329088}
|
|
{"current_steps": 11545, "total_steps": 15621, "loss": 0.4125, "lr": 3.8712473103214993e-07, "epoch": 0.7390692017156392, "percentage": 73.91, "elapsed_time": "0:43:13", "remaining_time": "0:15:15", "throughput": 14014.12, "total_tokens": 36345024}
|
|
{"current_steps": 11550, "total_steps": 15621, "loss": 0.3077, "lr": 3.862421921131688e-07, "epoch": 0.739389283656616, "percentage": 73.94, "elapsed_time": "0:43:14", "remaining_time": "0:15:14", "throughput": 14017.17, "total_tokens": 36361792}
|
|
{"current_steps": 11555, "total_steps": 15621, "loss": 0.2844, "lr": 3.85360419478017e-07, "epoch": 0.739709365597593, "percentage": 73.97, "elapsed_time": "0:43:14", "remaining_time": "0:15:13", "throughput": 14019.84, "total_tokens": 36377152}
|
|
{"current_steps": 11560, "total_steps": 15621, "loss": 0.346, "lr": 3.8447941422759786e-07, "epoch": 0.7400294475385699, "percentage": 74.0, "elapsed_time": "0:43:15", "remaining_time": "0:15:11", "throughput": 14022.85, "total_tokens": 36394048}
|
|
{"current_steps": 11565, "total_steps": 15621, "loss": 0.3546, "lr": 3.835991774618579e-07, "epoch": 0.7403495294795468, "percentage": 74.03, "elapsed_time": "0:43:15", "remaining_time": "0:15:10", "throughput": 14025.44, "total_tokens": 36409152}
|
|
{"current_steps": 11570, "total_steps": 15621, "loss": 0.3882, "lr": 3.827197102797818e-07, "epoch": 0.7406696114205237, "percentage": 74.07, "elapsed_time": "0:43:16", "remaining_time": "0:15:09", "throughput": 14028.68, "total_tokens": 36427072}
|
|
{"current_steps": 11575, "total_steps": 15621, "loss": 0.4667, "lr": 3.818410137793947e-07, "epoch": 0.7409896933615006, "percentage": 74.1, "elapsed_time": "0:43:17", "remaining_time": "0:15:07", "throughput": 14031.73, "total_tokens": 36444288}
|
|
{"current_steps": 11580, "total_steps": 15621, "loss": 0.4323, "lr": 3.809630890577602e-07, "epoch": 0.7413097753024774, "percentage": 74.13, "elapsed_time": "0:43:17", "remaining_time": "0:15:06", "throughput": 14034.51, "total_tokens": 36460096}
|
|
{"current_steps": 11585, "total_steps": 15621, "loss": 0.3414, "lr": 3.800859372109777e-07, "epoch": 0.7416298572434543, "percentage": 74.16, "elapsed_time": "0:43:18", "remaining_time": "0:15:05", "throughput": 14037.14, "total_tokens": 36475264}
|
|
{"current_steps": 11590, "total_steps": 15621, "loss": 0.3205, "lr": 3.7920955933418055e-07, "epoch": 0.7419499391844312, "percentage": 74.19, "elapsed_time": "0:43:19", "remaining_time": "0:15:03", "throughput": 14039.96, "total_tokens": 36491264}
|
|
{"current_steps": 11595, "total_steps": 15621, "loss": 0.3158, "lr": 3.7833395652153775e-07, "epoch": 0.7422700211254081, "percentage": 74.23, "elapsed_time": "0:43:19", "remaining_time": "0:15:02", "throughput": 14042.54, "total_tokens": 36506368}
|
|
{"current_steps": 11600, "total_steps": 15621, "loss": 0.2953, "lr": 3.774591298662497e-07, "epoch": 0.742590103066385, "percentage": 74.26, "elapsed_time": "0:43:20", "remaining_time": "0:15:01", "throughput": 14045.42, "total_tokens": 36522432}
|
|
{"current_steps": 11605, "total_steps": 15621, "loss": 0.4255, "lr": 3.765850804605468e-07, "epoch": 0.7429101850073618, "percentage": 74.29, "elapsed_time": "0:43:20", "remaining_time": "0:15:00", "throughput": 14048.38, "total_tokens": 36539008}
|
|
{"current_steps": 11610, "total_steps": 15621, "loss": 0.2863, "lr": 3.7571180939569104e-07, "epoch": 0.7432302669483388, "percentage": 74.32, "elapsed_time": "0:43:21", "remaining_time": "0:14:58", "throughput": 14051.02, "total_tokens": 36554240}
|
|
{"current_steps": 11615, "total_steps": 15621, "loss": 0.308, "lr": 3.748393177619711e-07, "epoch": 0.7435503488893157, "percentage": 74.36, "elapsed_time": "0:43:22", "remaining_time": "0:14:57", "throughput": 14053.73, "total_tokens": 36569920}
|
|
{"current_steps": 11620, "total_steps": 15621, "loss": 0.3273, "lr": 3.739676066487032e-07, "epoch": 0.7438704308302926, "percentage": 74.39, "elapsed_time": "0:43:22", "remaining_time": "0:14:56", "throughput": 14056.49, "total_tokens": 36585792}
|
|
{"current_steps": 11625, "total_steps": 15621, "loss": 0.2906, "lr": 3.730966771442289e-07, "epoch": 0.7441905127712695, "percentage": 74.42, "elapsed_time": "0:43:23", "remaining_time": "0:14:54", "throughput": 14059.17, "total_tokens": 36601280}
|
|
{"current_steps": 11630, "total_steps": 15621, "loss": 0.5193, "lr": 3.722265303359137e-07, "epoch": 0.7445105947122463, "percentage": 74.45, "elapsed_time": "0:43:23", "remaining_time": "0:14:53", "throughput": 14061.92, "total_tokens": 36617152}
|
|
{"current_steps": 11635, "total_steps": 15621, "loss": 0.4, "lr": 3.713571673101463e-07, "epoch": 0.7448306766532232, "percentage": 74.48, "elapsed_time": "0:43:24", "remaining_time": "0:14:52", "throughput": 14064.55, "total_tokens": 36632512}
|
|
{"current_steps": 11640, "total_steps": 15621, "loss": 0.3338, "lr": 3.704885891523366e-07, "epoch": 0.7451507585942001, "percentage": 74.52, "elapsed_time": "0:43:25", "remaining_time": "0:14:51", "throughput": 14067.19, "total_tokens": 36647744}
|
|
{"current_steps": 11645, "total_steps": 15621, "loss": 0.3878, "lr": 3.696207969469146e-07, "epoch": 0.745470840535177, "percentage": 74.55, "elapsed_time": "0:43:25", "remaining_time": "0:14:49", "throughput": 14069.86, "total_tokens": 36663360}
|
|
{"current_steps": 11650, "total_steps": 15621, "loss": 0.3571, "lr": 3.6875379177732913e-07, "epoch": 0.7457909224761539, "percentage": 74.58, "elapsed_time": "0:43:26", "remaining_time": "0:14:48", "throughput": 14072.48, "total_tokens": 36678656}
|
|
{"current_steps": 11655, "total_steps": 15621, "loss": 0.4971, "lr": 3.6788757472604634e-07, "epoch": 0.7461110044171307, "percentage": 74.61, "elapsed_time": "0:43:27", "remaining_time": "0:14:47", "throughput": 14075.15, "total_tokens": 36693952}
|
|
{"current_steps": 11660, "total_steps": 15621, "loss": 0.3139, "lr": 3.6702214687454825e-07, "epoch": 0.7464310863581076, "percentage": 74.64, "elapsed_time": "0:43:27", "remaining_time": "0:14:45", "throughput": 14077.92, "total_tokens": 36709888}
|
|
{"current_steps": 11665, "total_steps": 15621, "loss": 0.3103, "lr": 3.6615750930333177e-07, "epoch": 0.7467511682990846, "percentage": 74.68, "elapsed_time": "0:43:28", "remaining_time": "0:14:44", "throughput": 14080.61, "total_tokens": 36725504}
|
|
{"current_steps": 11670, "total_steps": 15621, "loss": 0.3055, "lr": 3.65293663091907e-07, "epoch": 0.7470712502400615, "percentage": 74.71, "elapsed_time": "0:43:28", "remaining_time": "0:14:43", "throughput": 14083.4, "total_tokens": 36741376}
|
|
{"current_steps": 11675, "total_steps": 15621, "loss": 0.4277, "lr": 3.6443060931879623e-07, "epoch": 0.7473913321810384, "percentage": 74.74, "elapsed_time": "0:43:29", "remaining_time": "0:14:41", "throughput": 14086.07, "total_tokens": 36756864}
|
|
{"current_steps": 11680, "total_steps": 15621, "loss": 0.4503, "lr": 3.635683490615321e-07, "epoch": 0.7477114141220152, "percentage": 74.77, "elapsed_time": "0:43:30", "remaining_time": "0:14:40", "throughput": 14088.8, "total_tokens": 36772608}
|
|
{"current_steps": 11685, "total_steps": 15621, "loss": 0.2975, "lr": 3.6270688339665634e-07, "epoch": 0.7480314960629921, "percentage": 74.8, "elapsed_time": "0:43:30", "remaining_time": "0:14:39", "throughput": 14091.52, "total_tokens": 36788352}
|
|
{"current_steps": 11690, "total_steps": 15621, "loss": 0.3444, "lr": 3.6184621339972e-07, "epoch": 0.748351578003969, "percentage": 74.84, "elapsed_time": "0:43:31", "remaining_time": "0:14:38", "throughput": 14094.24, "total_tokens": 36804096}
|
|
{"current_steps": 11695, "total_steps": 15621, "loss": 0.3568, "lr": 3.609863401452786e-07, "epoch": 0.7486716599449459, "percentage": 74.87, "elapsed_time": "0:43:31", "remaining_time": "0:14:36", "throughput": 14096.97, "total_tokens": 36819776}
|
|
{"current_steps": 11700, "total_steps": 15621, "loss": 0.4084, "lr": 3.6012726470689416e-07, "epoch": 0.7489917418859228, "percentage": 74.9, "elapsed_time": "0:43:32", "remaining_time": "0:14:35", "throughput": 14099.61, "total_tokens": 36835072}
|
|
{"current_steps": 11705, "total_steps": 15621, "loss": 0.3318, "lr": 3.592689881571329e-07, "epoch": 0.7493118238268996, "percentage": 74.93, "elapsed_time": "0:43:33", "remaining_time": "0:14:34", "throughput": 14102.33, "total_tokens": 36850816}
|
|
{"current_steps": 11710, "total_steps": 15621, "loss": 0.4348, "lr": 3.5841151156756334e-07, "epoch": 0.7496319057678765, "percentage": 74.96, "elapsed_time": "0:43:33", "remaining_time": "0:14:32", "throughput": 14104.99, "total_tokens": 36866368}
|
|
{"current_steps": 11715, "total_steps": 15621, "loss": 0.3994, "lr": 3.575548360087539e-07, "epoch": 0.7499519877088535, "percentage": 75.0, "elapsed_time": "0:43:34", "remaining_time": "0:14:31", "throughput": 14108.42, "total_tokens": 36885376}
|
|
{"current_steps": 11720, "total_steps": 15621, "loss": 0.3173, "lr": 3.5669896255027533e-07, "epoch": 0.7502720696498304, "percentage": 75.03, "elapsed_time": "0:43:35", "remaining_time": "0:14:30", "throughput": 14110.92, "total_tokens": 36900288}
|
|
{"current_steps": 11725, "total_steps": 15621, "loss": 0.4035, "lr": 3.5584389226069543e-07, "epoch": 0.7505921515908073, "percentage": 75.06, "elapsed_time": "0:43:35", "remaining_time": "0:14:29", "throughput": 14113.69, "total_tokens": 36916224}
|
|
{"current_steps": 11730, "total_steps": 15621, "loss": 0.2995, "lr": 3.5498962620757866e-07, "epoch": 0.7509122335317842, "percentage": 75.09, "elapsed_time": "0:43:36", "remaining_time": "0:14:27", "throughput": 14116.34, "total_tokens": 36931648}
|
|
{"current_steps": 11730, "total_steps": 15621, "eval_loss": 0.3647865653038025, "epoch": 0.7509122335317842, "percentage": 75.09, "elapsed_time": "0:44:25", "remaining_time": "0:14:44", "throughput": 13855.98, "total_tokens": 36931648}
|
|
{"current_steps": 11735, "total_steps": 15621, "loss": 0.4327, "lr": 3.5413616545748713e-07, "epoch": 0.751232315472761, "percentage": 75.12, "elapsed_time": "0:45:02", "remaining_time": "0:14:55", "throughput": 13669.22, "total_tokens": 36945856}
|
|
{"current_steps": 11740, "total_steps": 15621, "loss": 0.5026, "lr": 3.532835110759763e-07, "epoch": 0.7515523974137379, "percentage": 75.16, "elapsed_time": "0:45:03", "remaining_time": "0:14:53", "throughput": 13671.98, "total_tokens": 36961792}
|
|
{"current_steps": 11745, "total_steps": 15621, "loss": 0.3038, "lr": 3.524316641275955e-07, "epoch": 0.7518724793547148, "percentage": 75.19, "elapsed_time": "0:45:04", "remaining_time": "0:14:52", "throughput": 13674.61, "total_tokens": 36977152}
|
|
{"current_steps": 11750, "total_steps": 15621, "loss": 0.4152, "lr": 3.5158062567588467e-07, "epoch": 0.7521925612956917, "percentage": 75.22, "elapsed_time": "0:45:04", "remaining_time": "0:14:51", "throughput": 13677.12, "total_tokens": 36991936}
|
|
{"current_steps": 11755, "total_steps": 15621, "loss": 0.3924, "lr": 3.5073039678337633e-07, "epoch": 0.7525126432366686, "percentage": 75.25, "elapsed_time": "0:45:05", "remaining_time": "0:14:49", "throughput": 13679.64, "total_tokens": 37006784}
|
|
{"current_steps": 11760, "total_steps": 15621, "loss": 0.348, "lr": 3.498809785115908e-07, "epoch": 0.7528327251776454, "percentage": 75.28, "elapsed_time": "0:45:05", "remaining_time": "0:14:48", "throughput": 13682.31, "total_tokens": 37022208}
|
|
{"current_steps": 11765, "total_steps": 15621, "loss": 0.3504, "lr": 3.4903237192103697e-07, "epoch": 0.7531528071186223, "percentage": 75.32, "elapsed_time": "0:45:06", "remaining_time": "0:14:47", "throughput": 13685.39, "total_tokens": 37039488}
|
|
{"current_steps": 11770, "total_steps": 15621, "loss": 0.3372, "lr": 3.481845780712099e-07, "epoch": 0.7534728890595993, "percentage": 75.35, "elapsed_time": "0:45:07", "remaining_time": "0:14:45", "throughput": 13688.32, "total_tokens": 37056064}
|
|
{"current_steps": 11775, "total_steps": 15621, "loss": 0.3354, "lr": 3.4733759802059037e-07, "epoch": 0.7537929710005762, "percentage": 75.38, "elapsed_time": "0:45:07", "remaining_time": "0:14:44", "throughput": 13691.14, "total_tokens": 37072256}
|
|
{"current_steps": 11780, "total_steps": 15621, "loss": 0.4239, "lr": 3.4649143282664273e-07, "epoch": 0.7541130529415531, "percentage": 75.41, "elapsed_time": "0:45:08", "remaining_time": "0:14:43", "throughput": 13693.69, "total_tokens": 37087360}
|
|
{"current_steps": 11785, "total_steps": 15621, "loss": 0.2992, "lr": 3.456460835458143e-07, "epoch": 0.7544331348825299, "percentage": 75.44, "elapsed_time": "0:45:08", "remaining_time": "0:14:41", "throughput": 13696.18, "total_tokens": 37102144}
|
|
{"current_steps": 11790, "total_steps": 15621, "loss": 0.3172, "lr": 3.4480155123353337e-07, "epoch": 0.7547532168235068, "percentage": 75.48, "elapsed_time": "0:45:09", "remaining_time": "0:14:40", "throughput": 13698.82, "total_tokens": 37117568}
|
|
{"current_steps": 11795, "total_steps": 15621, "loss": 0.4541, "lr": 3.4395783694420875e-07, "epoch": 0.7550732987644837, "percentage": 75.51, "elapsed_time": "0:45:10", "remaining_time": "0:14:39", "throughput": 13701.41, "total_tokens": 37132800}
|
|
{"current_steps": 11800, "total_steps": 15621, "loss": 0.4009, "lr": 3.4311494173122743e-07, "epoch": 0.7553933807054606, "percentage": 75.54, "elapsed_time": "0:45:10", "remaining_time": "0:14:37", "throughput": 13703.91, "total_tokens": 37147776}
|
|
{"current_steps": 11805, "total_steps": 15621, "loss": 0.387, "lr": 3.422728666469541e-07, "epoch": 0.7557134626464375, "percentage": 75.57, "elapsed_time": "0:45:11", "remaining_time": "0:14:36", "throughput": 13706.75, "total_tokens": 37163904}
|
|
{"current_steps": 11810, "total_steps": 15621, "loss": 0.4272, "lr": 3.41431612742729e-07, "epoch": 0.7560335445874143, "percentage": 75.6, "elapsed_time": "0:45:11", "remaining_time": "0:14:35", "throughput": 13709.64, "total_tokens": 37180416}
|
|
{"current_steps": 11815, "total_steps": 15621, "loss": 0.4243, "lr": 3.4059118106886855e-07, "epoch": 0.7563536265283912, "percentage": 75.64, "elapsed_time": "0:45:12", "remaining_time": "0:14:33", "throughput": 13712.4, "total_tokens": 37196480}
|
|
{"current_steps": 11820, "total_steps": 15621, "loss": 0.5118, "lr": 3.3975157267466036e-07, "epoch": 0.7566737084693682, "percentage": 75.67, "elapsed_time": "0:45:13", "remaining_time": "0:14:32", "throughput": 13714.99, "total_tokens": 37211648}
|
|
{"current_steps": 11825, "total_steps": 15621, "loss": 0.29, "lr": 3.389127886083656e-07, "epoch": 0.7569937904103451, "percentage": 75.7, "elapsed_time": "0:45:13", "remaining_time": "0:14:31", "throughput": 13717.64, "total_tokens": 37227072}
|
|
{"current_steps": 11830, "total_steps": 15621, "loss": 0.3415, "lr": 3.3807482991721667e-07, "epoch": 0.757313872351322, "percentage": 75.73, "elapsed_time": "0:45:14", "remaining_time": "0:14:29", "throughput": 13720.62, "total_tokens": 37243968}
|
|
{"current_steps": 11835, "total_steps": 15621, "loss": 0.3219, "lr": 3.3723769764741474e-07, "epoch": 0.7576339542922989, "percentage": 75.76, "elapsed_time": "0:45:15", "remaining_time": "0:14:28", "throughput": 13723.18, "total_tokens": 37259200}
|
|
{"current_steps": 11840, "total_steps": 15621, "loss": 0.2948, "lr": 3.3640139284412825e-07, "epoch": 0.7579540362332757, "percentage": 75.8, "elapsed_time": "0:45:15", "remaining_time": "0:14:27", "throughput": 13725.92, "total_tokens": 37275072}
|
|
{"current_steps": 11845, "total_steps": 15621, "loss": 0.399, "lr": 3.355659165514948e-07, "epoch": 0.7582741181742526, "percentage": 75.83, "elapsed_time": "0:45:16", "remaining_time": "0:14:25", "throughput": 13728.74, "total_tokens": 37291392}
|
|
{"current_steps": 11850, "total_steps": 15621, "loss": 0.2714, "lr": 3.347312698126161e-07, "epoch": 0.7585942001152295, "percentage": 75.86, "elapsed_time": "0:45:16", "remaining_time": "0:14:24", "throughput": 13731.57, "total_tokens": 37307648}
|
|
{"current_steps": 11855, "total_steps": 15621, "loss": 0.2191, "lr": 3.338974536695578e-07, "epoch": 0.7589142820562064, "percentage": 75.89, "elapsed_time": "0:45:17", "remaining_time": "0:14:23", "throughput": 13734.21, "total_tokens": 37323136}
|
|
{"current_steps": 11860, "total_steps": 15621, "loss": 0.3183, "lr": 3.330644691633492e-07, "epoch": 0.7592343639971832, "percentage": 75.92, "elapsed_time": "0:45:18", "remaining_time": "0:14:21", "throughput": 13736.83, "total_tokens": 37338496}
|
|
{"current_steps": 11865, "total_steps": 15621, "loss": 0.2783, "lr": 3.322323173339818e-07, "epoch": 0.7595544459381601, "percentage": 75.96, "elapsed_time": "0:45:18", "remaining_time": "0:14:20", "throughput": 13740.03, "total_tokens": 37356800}
|
|
{"current_steps": 11870, "total_steps": 15621, "loss": 0.4264, "lr": 3.314009992204071e-07, "epoch": 0.759874527879137, "percentage": 75.99, "elapsed_time": "0:45:19", "remaining_time": "0:14:19", "throughput": 13742.75, "total_tokens": 37372800}
|
|
{"current_steps": 11875, "total_steps": 15621, "loss": 0.3269, "lr": 3.3057051586053443e-07, "epoch": 0.760194609820114, "percentage": 76.02, "elapsed_time": "0:45:20", "remaining_time": "0:14:18", "throughput": 13745.46, "total_tokens": 37388608}
|
|
{"current_steps": 11880, "total_steps": 15621, "loss": 0.4584, "lr": 3.297408682912329e-07, "epoch": 0.7605146917610909, "percentage": 76.05, "elapsed_time": "0:45:20", "remaining_time": "0:14:16", "throughput": 13748.34, "total_tokens": 37405184}
|
|
{"current_steps": 11885, "total_steps": 15621, "loss": 0.2741, "lr": 3.289120575483271e-07, "epoch": 0.7608347737020678, "percentage": 76.08, "elapsed_time": "0:45:21", "remaining_time": "0:14:15", "throughput": 13750.86, "total_tokens": 37420096}
|
|
{"current_steps": 11890, "total_steps": 15621, "loss": 0.4214, "lr": 3.280840846665969e-07, "epoch": 0.7611548556430446, "percentage": 76.12, "elapsed_time": "0:45:21", "remaining_time": "0:14:14", "throughput": 13753.21, "total_tokens": 37434368}
|
|
{"current_steps": 11895, "total_steps": 15621, "loss": 0.3005, "lr": 3.272569506797761e-07, "epoch": 0.7614749375840215, "percentage": 76.15, "elapsed_time": "0:45:22", "remaining_time": "0:14:12", "throughput": 13755.7, "total_tokens": 37449344}
|
|
{"current_steps": 11900, "total_steps": 15621, "loss": 0.3314, "lr": 3.2643065662055136e-07, "epoch": 0.7617950195249984, "percentage": 76.18, "elapsed_time": "0:45:23", "remaining_time": "0:14:11", "throughput": 13758.25, "total_tokens": 37464448}
|
|
{"current_steps": 11905, "total_steps": 15621, "loss": 0.2837, "lr": 3.2560520352056033e-07, "epoch": 0.7621151014659753, "percentage": 76.21, "elapsed_time": "0:45:23", "remaining_time": "0:14:10", "throughput": 13761.31, "total_tokens": 37481856}
|
|
{"current_steps": 11910, "total_steps": 15621, "loss": 0.3985, "lr": 3.24780592410391e-07, "epoch": 0.7624351834069522, "percentage": 76.24, "elapsed_time": "0:45:24", "remaining_time": "0:14:08", "throughput": 13764.03, "total_tokens": 37497856}
|
|
{"current_steps": 11915, "total_steps": 15621, "loss": 0.4494, "lr": 3.2395682431957994e-07, "epoch": 0.762755265347929, "percentage": 76.28, "elapsed_time": "0:45:24", "remaining_time": "0:14:07", "throughput": 13766.67, "total_tokens": 37513600}
|
|
{"current_steps": 11920, "total_steps": 15621, "loss": 0.324, "lr": 3.231339002766115e-07, "epoch": 0.7630753472889059, "percentage": 76.31, "elapsed_time": "0:45:25", "remaining_time": "0:14:06", "throughput": 13769.35, "total_tokens": 37529408}
|
|
{"current_steps": 11925, "total_steps": 15621, "loss": 0.3296, "lr": 3.2231182130891564e-07, "epoch": 0.7633954292298829, "percentage": 76.34, "elapsed_time": "0:45:26", "remaining_time": "0:14:04", "throughput": 13772.23, "total_tokens": 37545984}
|
|
{"current_steps": 11930, "total_steps": 15621, "loss": 0.3405, "lr": 3.214905884428679e-07, "epoch": 0.7637155111708598, "percentage": 76.37, "elapsed_time": "0:45:26", "remaining_time": "0:14:03", "throughput": 13774.94, "total_tokens": 37561856}
|
|
{"current_steps": 11935, "total_steps": 15621, "loss": 0.3253, "lr": 3.206702027037868e-07, "epoch": 0.7640355931118367, "percentage": 76.4, "elapsed_time": "0:45:27", "remaining_time": "0:14:02", "throughput": 13777.82, "total_tokens": 37578624}
|
|
{"current_steps": 11940, "total_steps": 15621, "loss": 0.3882, "lr": 3.198506651159344e-07, "epoch": 0.7643556750528135, "percentage": 76.44, "elapsed_time": "0:45:28", "remaining_time": "0:14:01", "throughput": 13780.39, "total_tokens": 37593920}
|
|
{"current_steps": 11945, "total_steps": 15621, "loss": 0.38, "lr": 3.190319767025121e-07, "epoch": 0.7646757569937904, "percentage": 76.47, "elapsed_time": "0:45:28", "remaining_time": "0:13:59", "throughput": 13783.04, "total_tokens": 37609664}
|
|
{"current_steps": 11950, "total_steps": 15621, "loss": 0.4989, "lr": 3.1821413848566213e-07, "epoch": 0.7649958389347673, "percentage": 76.5, "elapsed_time": "0:45:29", "remaining_time": "0:13:58", "throughput": 13785.81, "total_tokens": 37626048}
|
|
{"current_steps": 11955, "total_steps": 15621, "loss": 0.3798, "lr": 3.1739715148646564e-07, "epoch": 0.7653159208757442, "percentage": 76.53, "elapsed_time": "0:45:29", "remaining_time": "0:13:57", "throughput": 13788.48, "total_tokens": 37641792}
|
|
{"current_steps": 11960, "total_steps": 15621, "loss": 0.4583, "lr": 3.1658101672494043e-07, "epoch": 0.7656360028167211, "percentage": 76.56, "elapsed_time": "0:45:30", "remaining_time": "0:13:55", "throughput": 13790.91, "total_tokens": 37656512}
|
|
{"current_steps": 11965, "total_steps": 15621, "loss": 0.3527, "lr": 3.157657352200397e-07, "epoch": 0.7659560847576979, "percentage": 76.6, "elapsed_time": "0:45:31", "remaining_time": "0:13:54", "throughput": 13793.51, "total_tokens": 37672000}
|
|
{"current_steps": 11970, "total_steps": 15621, "loss": 0.3362, "lr": 3.149513079896521e-07, "epoch": 0.7662761666986748, "percentage": 76.63, "elapsed_time": "0:45:31", "remaining_time": "0:13:53", "throughput": 13796.1, "total_tokens": 37687232}
|
|
{"current_steps": 11975, "total_steps": 15621, "loss": 0.3244, "lr": 3.1413773605060034e-07, "epoch": 0.7665962486396517, "percentage": 76.66, "elapsed_time": "0:45:32", "remaining_time": "0:13:51", "throughput": 13798.7, "total_tokens": 37702656}
|
|
{"current_steps": 11980, "total_steps": 15621, "loss": 0.4343, "lr": 3.1332502041863783e-07, "epoch": 0.7669163305806287, "percentage": 76.69, "elapsed_time": "0:45:32", "remaining_time": "0:13:50", "throughput": 13801.29, "total_tokens": 37718080}
|
|
{"current_steps": 11985, "total_steps": 15621, "loss": 0.3141, "lr": 3.1251316210844946e-07, "epoch": 0.7672364125216056, "percentage": 76.72, "elapsed_time": "0:45:33", "remaining_time": "0:13:49", "throughput": 13804.34, "total_tokens": 37735680}
|
|
{"current_steps": 11990, "total_steps": 15621, "loss": 0.2871, "lr": 3.1170216213365055e-07, "epoch": 0.7675564944625825, "percentage": 76.76, "elapsed_time": "0:45:34", "remaining_time": "0:13:48", "throughput": 13806.69, "total_tokens": 37749952}
|
|
{"current_steps": 11995, "total_steps": 15621, "loss": 0.4607, "lr": 3.1089202150678397e-07, "epoch": 0.7678765764035593, "percentage": 76.79, "elapsed_time": "0:45:34", "remaining_time": "0:13:46", "throughput": 13809.23, "total_tokens": 37765312}
|
|
{"current_steps": 12000, "total_steps": 15621, "loss": 0.4695, "lr": 3.1008274123931886e-07, "epoch": 0.7681966583445362, "percentage": 76.82, "elapsed_time": "0:45:35", "remaining_time": "0:13:45", "throughput": 13811.68, "total_tokens": 37780160}
|
|
{"current_steps": 12005, "total_steps": 15621, "loss": 0.2672, "lr": 3.092743223416523e-07, "epoch": 0.7685167402855131, "percentage": 76.85, "elapsed_time": "0:45:36", "remaining_time": "0:13:44", "throughput": 13814.42, "total_tokens": 37796352}
|
|
{"current_steps": 12010, "total_steps": 15621, "loss": 0.3499, "lr": 3.0846676582310413e-07, "epoch": 0.76883682222649, "percentage": 76.88, "elapsed_time": "0:45:36", "remaining_time": "0:13:42", "throughput": 13817.28, "total_tokens": 37812864}
|
|
{"current_steps": 12015, "total_steps": 15621, "loss": 0.3824, "lr": 3.076600726919185e-07, "epoch": 0.7691569041674668, "percentage": 76.92, "elapsed_time": "0:45:37", "remaining_time": "0:13:41", "throughput": 13819.76, "total_tokens": 37827840}
|
|
{"current_steps": 12020, "total_steps": 15621, "loss": 0.3579, "lr": 3.0685424395526106e-07, "epoch": 0.7694769861084437, "percentage": 76.95, "elapsed_time": "0:45:37", "remaining_time": "0:13:40", "throughput": 13823.19, "total_tokens": 37847040}
|
|
{"current_steps": 12025, "total_steps": 15621, "loss": 0.2819, "lr": 3.060492806192184e-07, "epoch": 0.7697970680494206, "percentage": 76.98, "elapsed_time": "0:45:38", "remaining_time": "0:13:38", "throughput": 13825.79, "total_tokens": 37862464}
|
|
{"current_steps": 12030, "total_steps": 15621, "loss": 0.377, "lr": 3.052451836887968e-07, "epoch": 0.7701171499903975, "percentage": 77.01, "elapsed_time": "0:45:39", "remaining_time": "0:13:37", "throughput": 13828.36, "total_tokens": 37877760}
|
|
{"current_steps": 12035, "total_steps": 15621, "loss": 0.2861, "lr": 3.044419541679207e-07, "epoch": 0.7704372319313745, "percentage": 77.04, "elapsed_time": "0:45:39", "remaining_time": "0:13:36", "throughput": 13830.82, "total_tokens": 37892800}
|
|
{"current_steps": 12040, "total_steps": 15621, "loss": 0.4239, "lr": 3.0363959305943153e-07, "epoch": 0.7707573138723514, "percentage": 77.08, "elapsed_time": "0:45:40", "remaining_time": "0:13:35", "throughput": 13833.59, "total_tokens": 37909056}
|
|
{"current_steps": 12045, "total_steps": 15621, "loss": 0.348, "lr": 3.028381013650867e-07, "epoch": 0.7710773958133282, "percentage": 77.11, "elapsed_time": "0:45:40", "remaining_time": "0:13:33", "throughput": 13836.39, "total_tokens": 37925376}
|
|
{"current_steps": 12050, "total_steps": 15621, "loss": 0.3716, "lr": 3.0203748008555783e-07, "epoch": 0.7713974777543051, "percentage": 77.14, "elapsed_time": "0:45:41", "remaining_time": "0:13:32", "throughput": 13839.14, "total_tokens": 37941632}
|
|
{"current_steps": 12055, "total_steps": 15621, "loss": 0.3805, "lr": 3.012377302204301e-07, "epoch": 0.771717559695282, "percentage": 77.17, "elapsed_time": "0:45:42", "remaining_time": "0:13:31", "throughput": 13841.69, "total_tokens": 37957056}
|
|
{"current_steps": 12060, "total_steps": 15621, "loss": 0.3916, "lr": 3.0043885276820046e-07, "epoch": 0.7720376416362589, "percentage": 77.2, "elapsed_time": "0:45:42", "remaining_time": "0:13:29", "throughput": 13844.36, "total_tokens": 37973184}
|
|
{"current_steps": 12065, "total_steps": 15621, "loss": 0.3087, "lr": 2.99640848726277e-07, "epoch": 0.7723577235772358, "percentage": 77.24, "elapsed_time": "0:45:43", "remaining_time": "0:13:28", "throughput": 13846.83, "total_tokens": 37988288}
|
|
{"current_steps": 12070, "total_steps": 15621, "loss": 0.3812, "lr": 2.9884371909097704e-07, "epoch": 0.7726778055182126, "percentage": 77.27, "elapsed_time": "0:45:44", "remaining_time": "0:13:27", "throughput": 13849.46, "total_tokens": 38004224}
|
|
{"current_steps": 12075, "total_steps": 15621, "loss": 0.3711, "lr": 2.9804746485752616e-07, "epoch": 0.7729978874591895, "percentage": 77.3, "elapsed_time": "0:45:44", "remaining_time": "0:13:26", "throughput": 13851.99, "total_tokens": 38019456}
|
|
{"current_steps": 12080, "total_steps": 15621, "loss": 0.4058, "lr": 2.972520870200573e-07, "epoch": 0.7733179694001664, "percentage": 77.33, "elapsed_time": "0:45:45", "remaining_time": "0:13:24", "throughput": 13854.59, "total_tokens": 38035264}
|
|
{"current_steps": 12085, "total_steps": 15621, "loss": 0.4045, "lr": 2.9645758657160904e-07, "epoch": 0.7736380513411434, "percentage": 77.36, "elapsed_time": "0:45:45", "remaining_time": "0:13:23", "throughput": 13857.25, "total_tokens": 38051072}
|
|
{"current_steps": 12090, "total_steps": 15621, "loss": 0.3538, "lr": 2.9566396450412444e-07, "epoch": 0.7739581332821203, "percentage": 77.4, "elapsed_time": "0:45:46", "remaining_time": "0:13:22", "throughput": 13859.8, "total_tokens": 38066688}
|
|
{"current_steps": 12095, "total_steps": 15621, "loss": 0.3193, "lr": 2.9487122180844957e-07, "epoch": 0.7742782152230971, "percentage": 77.43, "elapsed_time": "0:45:47", "remaining_time": "0:13:20", "throughput": 13862.36, "total_tokens": 38082048}
|
|
{"current_steps": 12100, "total_steps": 15621, "loss": 0.2996, "lr": 2.9407935947433406e-07, "epoch": 0.774598297164074, "percentage": 77.46, "elapsed_time": "0:45:47", "remaining_time": "0:13:19", "throughput": 13864.88, "total_tokens": 38097344}
|
|
{"current_steps": 12105, "total_steps": 15621, "loss": 0.446, "lr": 2.932883784904264e-07, "epoch": 0.7749183791050509, "percentage": 77.49, "elapsed_time": "0:45:48", "remaining_time": "0:13:18", "throughput": 13867.32, "total_tokens": 38112320}
|
|
{"current_steps": 12110, "total_steps": 15621, "loss": 0.2475, "lr": 2.9249827984427555e-07, "epoch": 0.7752384610460278, "percentage": 77.52, "elapsed_time": "0:45:48", "remaining_time": "0:13:16", "throughput": 13869.94, "total_tokens": 38128000}
|
|
{"current_steps": 12115, "total_steps": 15621, "loss": 0.3015, "lr": 2.917090645223297e-07, "epoch": 0.7755585429870047, "percentage": 77.56, "elapsed_time": "0:45:49", "remaining_time": "0:13:15", "throughput": 13872.42, "total_tokens": 38143168}
|
|
{"current_steps": 12120, "total_steps": 15621, "loss": 0.2912, "lr": 2.909207335099332e-07, "epoch": 0.7758786249279815, "percentage": 77.59, "elapsed_time": "0:45:50", "remaining_time": "0:13:14", "throughput": 13874.81, "total_tokens": 38157824}
|
|
{"current_steps": 12125, "total_steps": 15621, "loss": 0.3332, "lr": 2.9013328779132595e-07, "epoch": 0.7761987068689584, "percentage": 77.62, "elapsed_time": "0:45:50", "remaining_time": "0:13:13", "throughput": 13877.29, "total_tokens": 38172864}
|
|
{"current_steps": 12130, "total_steps": 15621, "loss": 0.425, "lr": 2.893467283496439e-07, "epoch": 0.7765187888099353, "percentage": 77.65, "elapsed_time": "0:45:51", "remaining_time": "0:13:11", "throughput": 13879.63, "total_tokens": 38187264}
|
|
{"current_steps": 12135, "total_steps": 15621, "loss": 0.3551, "lr": 2.885610561669155e-07, "epoch": 0.7768388707509122, "percentage": 77.68, "elapsed_time": "0:45:51", "remaining_time": "0:13:10", "throughput": 13882.57, "total_tokens": 38204288}
|
|
{"current_steps": 12140, "total_steps": 15621, "loss": 0.3462, "lr": 2.8777627222406163e-07, "epoch": 0.7771589526918892, "percentage": 77.72, "elapsed_time": "0:45:52", "remaining_time": "0:13:09", "throughput": 13885.01, "total_tokens": 38219264}
|
|
{"current_steps": 12145, "total_steps": 15621, "loss": 0.3863, "lr": 2.869923775008943e-07, "epoch": 0.777479034632866, "percentage": 77.75, "elapsed_time": "0:45:53", "remaining_time": "0:13:07", "throughput": 13887.48, "total_tokens": 38234496}
|
|
{"current_steps": 12150, "total_steps": 15621, "loss": 0.2559, "lr": 2.862093729761155e-07, "epoch": 0.7777991165738429, "percentage": 77.78, "elapsed_time": "0:45:53", "remaining_time": "0:13:06", "throughput": 13890.29, "total_tokens": 38251072}
|
|
{"current_steps": 12155, "total_steps": 15621, "loss": 0.4049, "lr": 2.854272596273152e-07, "epoch": 0.7781191985148198, "percentage": 77.81, "elapsed_time": "0:45:54", "remaining_time": "0:13:05", "throughput": 13892.84, "total_tokens": 38266560}
|
|
{"current_steps": 12160, "total_steps": 15621, "loss": 0.3287, "lr": 2.8464603843097134e-07, "epoch": 0.7784392804557967, "percentage": 77.84, "elapsed_time": "0:45:55", "remaining_time": "0:13:04", "throughput": 13895.62, "total_tokens": 38282944}
|
|
{"current_steps": 12165, "total_steps": 15621, "loss": 0.3291, "lr": 2.8386571036244764e-07, "epoch": 0.7787593623967736, "percentage": 77.88, "elapsed_time": "0:45:55", "remaining_time": "0:13:02", "throughput": 13898.39, "total_tokens": 38299264}
|
|
{"current_steps": 12170, "total_steps": 15621, "loss": 0.39, "lr": 2.830862763959929e-07, "epoch": 0.7790794443377504, "percentage": 77.91, "elapsed_time": "0:45:56", "remaining_time": "0:13:01", "throughput": 13900.88, "total_tokens": 38314368}
|
|
{"current_steps": 12175, "total_steps": 15621, "loss": 0.3154, "lr": 2.8230773750473956e-07, "epoch": 0.7793995262787273, "percentage": 77.94, "elapsed_time": "0:45:56", "remaining_time": "0:13:00", "throughput": 13903.4, "total_tokens": 38329664}
|
|
{"current_steps": 12180, "total_steps": 15621, "loss": 0.3072, "lr": 2.8153009466070267e-07, "epoch": 0.7797196082197042, "percentage": 77.97, "elapsed_time": "0:45:57", "remaining_time": "0:12:59", "throughput": 13905.99, "total_tokens": 38345408}
|
|
{"current_steps": 12185, "total_steps": 15621, "loss": 0.2878, "lr": 2.807533488347783e-07, "epoch": 0.7800396901606811, "percentage": 78.0, "elapsed_time": "0:45:58", "remaining_time": "0:12:57", "throughput": 13908.95, "total_tokens": 38362688}
|
|
{"current_steps": 12190, "total_steps": 15621, "loss": 0.2548, "lr": 2.7997750099674277e-07, "epoch": 0.7803597721016581, "percentage": 78.04, "elapsed_time": "0:45:58", "remaining_time": "0:12:56", "throughput": 13911.39, "total_tokens": 38377600}
|
|
{"current_steps": 12195, "total_steps": 15621, "loss": 0.5286, "lr": 2.792025521152512e-07, "epoch": 0.780679854042635, "percentage": 78.07, "elapsed_time": "0:45:59", "remaining_time": "0:12:55", "throughput": 13913.84, "total_tokens": 38392640}
|
|
{"current_steps": 12200, "total_steps": 15621, "loss": 0.4496, "lr": 2.784285031578365e-07, "epoch": 0.7809999359836118, "percentage": 78.1, "elapsed_time": "0:45:59", "remaining_time": "0:12:53", "throughput": 13916.42, "total_tokens": 38408448}
|
|
{"current_steps": 12205, "total_steps": 15621, "loss": 0.3629, "lr": 2.7765535509090786e-07, "epoch": 0.7813200179245887, "percentage": 78.13, "elapsed_time": "0:46:00", "remaining_time": "0:12:52", "throughput": 13919.08, "total_tokens": 38424512}
|
|
{"current_steps": 12210, "total_steps": 15621, "loss": 0.4739, "lr": 2.768831088797495e-07, "epoch": 0.7816400998655656, "percentage": 78.16, "elapsed_time": "0:46:01", "remaining_time": "0:12:51", "throughput": 13921.47, "total_tokens": 38439296}
|
|
{"current_steps": 12215, "total_steps": 15621, "loss": 0.2482, "lr": 2.761117654885201e-07, "epoch": 0.7819601818065425, "percentage": 78.2, "elapsed_time": "0:46:01", "remaining_time": "0:12:50", "throughput": 13924.16, "total_tokens": 38455424}
|
|
{"current_steps": 12220, "total_steps": 15621, "loss": 0.3265, "lr": 2.7534132588025063e-07, "epoch": 0.7822802637475194, "percentage": 78.23, "elapsed_time": "0:46:02", "remaining_time": "0:12:48", "throughput": 13926.7, "total_tokens": 38470976}
|
|
{"current_steps": 12225, "total_steps": 15621, "loss": 0.5075, "lr": 2.7457179101684483e-07, "epoch": 0.7826003456884962, "percentage": 78.26, "elapsed_time": "0:46:02", "remaining_time": "0:12:47", "throughput": 13929.16, "total_tokens": 38486016}
|
|
{"current_steps": 12230, "total_steps": 15621, "loss": 0.298, "lr": 2.7380316185907506e-07, "epoch": 0.7829204276294731, "percentage": 78.29, "elapsed_time": "0:46:03", "remaining_time": "0:12:46", "throughput": 13931.65, "total_tokens": 38501248}
|
|
{"current_steps": 12235, "total_steps": 15621, "loss": 0.3503, "lr": 2.730354393665839e-07, "epoch": 0.78324050957045, "percentage": 78.32, "elapsed_time": "0:46:04", "remaining_time": "0:12:44", "throughput": 13934.23, "total_tokens": 38516992}
|
|
{"current_steps": 12240, "total_steps": 15621, "loss": 0.3702, "lr": 2.7226862449788245e-07, "epoch": 0.7835605915114269, "percentage": 78.36, "elapsed_time": "0:46:04", "remaining_time": "0:12:43", "throughput": 13936.57, "total_tokens": 38531456}
|
|
{"current_steps": 12245, "total_steps": 15621, "loss": 0.3264, "lr": 2.715027182103482e-07, "epoch": 0.7838806734524039, "percentage": 78.39, "elapsed_time": "0:46:05", "remaining_time": "0:12:42", "throughput": 13939.12, "total_tokens": 38546880}
|
|
{"current_steps": 12250, "total_steps": 15621, "loss": 0.3039, "lr": 2.707377214602232e-07, "epoch": 0.7842007553933807, "percentage": 78.42, "elapsed_time": "0:46:05", "remaining_time": "0:12:41", "throughput": 13941.62, "total_tokens": 38562176}
|
|
{"current_steps": 12255, "total_steps": 15621, "loss": 0.4366, "lr": 2.699736352026157e-07, "epoch": 0.7845208373343576, "percentage": 78.45, "elapsed_time": "0:46:06", "remaining_time": "0:12:39", "throughput": 13944.14, "total_tokens": 38577472}
|
|
{"current_steps": 12260, "total_steps": 15621, "loss": 0.3297, "lr": 2.6921046039149645e-07, "epoch": 0.7848409192753345, "percentage": 78.48, "elapsed_time": "0:46:07", "remaining_time": "0:12:38", "throughput": 13946.69, "total_tokens": 38593088}
|
|
{"current_steps": 12265, "total_steps": 15621, "loss": 0.3408, "lr": 2.6844819797969744e-07, "epoch": 0.7851610012163114, "percentage": 78.52, "elapsed_time": "0:46:07", "remaining_time": "0:12:37", "throughput": 13949.1, "total_tokens": 38607936}
|
|
{"current_steps": 12270, "total_steps": 15621, "loss": 0.2481, "lr": 2.6768684891891236e-07, "epoch": 0.7854810831572883, "percentage": 78.55, "elapsed_time": "0:46:08", "remaining_time": "0:12:36", "throughput": 13951.97, "total_tokens": 38625024}
|
|
{"current_steps": 12275, "total_steps": 15621, "loss": 0.3321, "lr": 2.6692641415969497e-07, "epoch": 0.7858011650982651, "percentage": 78.58, "elapsed_time": "0:46:09", "remaining_time": "0:12:34", "throughput": 13954.79, "total_tokens": 38641792}
|
|
{"current_steps": 12280, "total_steps": 15621, "loss": 0.395, "lr": 2.66166894651457e-07, "epoch": 0.786121247039242, "percentage": 78.61, "elapsed_time": "0:46:09", "remaining_time": "0:12:33", "throughput": 13957.27, "total_tokens": 38656896}
|
|
{"current_steps": 12285, "total_steps": 15621, "loss": 0.3426, "lr": 2.654082913424668e-07, "epoch": 0.7864413289802189, "percentage": 78.64, "elapsed_time": "0:46:10", "remaining_time": "0:12:32", "throughput": 13959.87, "total_tokens": 38672448}
|
|
{"current_steps": 12290, "total_steps": 15621, "loss": 0.3016, "lr": 2.6465060517985003e-07, "epoch": 0.7867614109211958, "percentage": 78.68, "elapsed_time": "0:46:10", "remaining_time": "0:12:31", "throughput": 13962.55, "total_tokens": 38688576}
|
|
{"current_steps": 12295, "total_steps": 15621, "loss": 0.5123, "lr": 2.638938371095867e-07, "epoch": 0.7870814928621728, "percentage": 78.71, "elapsed_time": "0:46:11", "remaining_time": "0:12:29", "throughput": 13965.09, "total_tokens": 38704064}
|
|
{"current_steps": 12300, "total_steps": 15621, "loss": 0.381, "lr": 2.6313798807651065e-07, "epoch": 0.7874015748031497, "percentage": 78.74, "elapsed_time": "0:46:12", "remaining_time": "0:12:28", "throughput": 13967.5, "total_tokens": 38718976}
|
|
{"current_steps": 12305, "total_steps": 15621, "loss": 0.3529, "lr": 2.6238305902430813e-07, "epoch": 0.7877216567441265, "percentage": 78.77, "elapsed_time": "0:46:12", "remaining_time": "0:12:27", "throughput": 13969.98, "total_tokens": 38734272}
|
|
{"current_steps": 12310, "total_steps": 15621, "loss": 0.307, "lr": 2.61629050895517e-07, "epoch": 0.7880417386851034, "percentage": 78.8, "elapsed_time": "0:46:13", "remaining_time": "0:12:25", "throughput": 13972.45, "total_tokens": 38749504}
|
|
{"current_steps": 12315, "total_steps": 15621, "loss": 0.3171, "lr": 2.608759646315253e-07, "epoch": 0.7883618206260803, "percentage": 78.84, "elapsed_time": "0:46:13", "remaining_time": "0:12:24", "throughput": 13974.85, "total_tokens": 38764352}
|
|
{"current_steps": 12320, "total_steps": 15621, "loss": 0.3637, "lr": 2.6012380117257005e-07, "epoch": 0.7886819025670572, "percentage": 78.87, "elapsed_time": "0:46:14", "remaining_time": "0:12:23", "throughput": 13977.46, "total_tokens": 38780096}
|
|
{"current_steps": 12325, "total_steps": 15621, "loss": 0.3902, "lr": 2.5937256145773613e-07, "epoch": 0.789001984508034, "percentage": 78.9, "elapsed_time": "0:46:15", "remaining_time": "0:12:22", "throughput": 13980.02, "total_tokens": 38795712}
|
|
{"current_steps": 12330, "total_steps": 15621, "loss": 0.3264, "lr": 2.586222464249551e-07, "epoch": 0.7893220664490109, "percentage": 78.93, "elapsed_time": "0:46:15", "remaining_time": "0:12:20", "throughput": 13982.58, "total_tokens": 38811328}
|
|
{"current_steps": 12335, "total_steps": 15621, "loss": 0.2022, "lr": 2.5787285701100413e-07, "epoch": 0.7896421483899878, "percentage": 78.96, "elapsed_time": "0:46:16", "remaining_time": "0:12:19", "throughput": 13985.01, "total_tokens": 38826240}
|
|
{"current_steps": 12340, "total_steps": 15621, "loss": 0.3672, "lr": 2.571243941515048e-07, "epoch": 0.7899622303309647, "percentage": 79.0, "elapsed_time": "0:46:16", "remaining_time": "0:12:18", "throughput": 13987.71, "total_tokens": 38842624}
|
|
{"current_steps": 12345, "total_steps": 15621, "loss": 0.2672, "lr": 2.563768587809213e-07, "epoch": 0.7902823122719416, "percentage": 79.03, "elapsed_time": "0:46:17", "remaining_time": "0:12:17", "throughput": 13990.09, "total_tokens": 38857472}
|
|
{"current_steps": 12350, "total_steps": 15621, "loss": 0.4118, "lr": 2.5563025183256137e-07, "epoch": 0.7906023942129186, "percentage": 79.06, "elapsed_time": "0:46:18", "remaining_time": "0:12:15", "throughput": 13992.47, "total_tokens": 38872256}
|
|
{"current_steps": 12355, "total_steps": 15621, "loss": 0.552, "lr": 2.548845742385717e-07, "epoch": 0.7909224761538954, "percentage": 79.09, "elapsed_time": "0:46:18", "remaining_time": "0:12:14", "throughput": 13995.51, "total_tokens": 38890048}
|
|
{"current_steps": 12360, "total_steps": 15621, "loss": 0.2356, "lr": 2.541398269299393e-07, "epoch": 0.7912425580948723, "percentage": 79.12, "elapsed_time": "0:46:19", "remaining_time": "0:12:13", "throughput": 13998.08, "total_tokens": 38905664}
|
|
{"current_steps": 12365, "total_steps": 15621, "loss": 0.2978, "lr": 2.5339601083649063e-07, "epoch": 0.7915626400358492, "percentage": 79.16, "elapsed_time": "0:46:20", "remaining_time": "0:12:12", "throughput": 14001.63, "total_tokens": 38926144}
|
|
{"current_steps": 12370, "total_steps": 15621, "loss": 0.4751, "lr": 2.526531268868889e-07, "epoch": 0.7918827219768261, "percentage": 79.19, "elapsed_time": "0:46:20", "remaining_time": "0:12:10", "throughput": 14004.35, "total_tokens": 38942720}
|
|
{"current_steps": 12375, "total_steps": 15621, "loss": 0.3397, "lr": 2.5191117600863266e-07, "epoch": 0.792202803917803, "percentage": 79.22, "elapsed_time": "0:46:21", "remaining_time": "0:12:09", "throughput": 14006.85, "total_tokens": 38958144}
|
|
{"current_steps": 12380, "total_steps": 15621, "loss": 0.2568, "lr": 2.511701591280565e-07, "epoch": 0.7925228858587798, "percentage": 79.25, "elapsed_time": "0:46:21", "remaining_time": "0:12:08", "throughput": 14009.35, "total_tokens": 38973376}
|
|
{"current_steps": 12385, "total_steps": 15621, "loss": 0.346, "lr": 2.504300771703295e-07, "epoch": 0.7928429677997567, "percentage": 79.28, "elapsed_time": "0:46:22", "remaining_time": "0:12:07", "throughput": 14012.02, "total_tokens": 38989504}
|
|
{"current_steps": 12390, "total_steps": 15621, "loss": 0.3626, "lr": 2.496909310594517e-07, "epoch": 0.7931630497407336, "percentage": 79.32, "elapsed_time": "0:46:23", "remaining_time": "0:12:05", "throughput": 14014.58, "total_tokens": 39005056}
|
|
{"current_steps": 12395, "total_steps": 15621, "loss": 0.4459, "lr": 2.4895272171825587e-07, "epoch": 0.7934831316817105, "percentage": 79.35, "elapsed_time": "0:46:23", "remaining_time": "0:12:04", "throughput": 14017.12, "total_tokens": 39020608}
|
|
{"current_steps": 12400, "total_steps": 15621, "loss": 0.443, "lr": 2.482154500684055e-07, "epoch": 0.7938032136226874, "percentage": 79.38, "elapsed_time": "0:46:24", "remaining_time": "0:12:03", "throughput": 14019.57, "total_tokens": 39035712}
|
|
{"current_steps": 12405, "total_steps": 15621, "loss": 0.3361, "lr": 2.4747911703039293e-07, "epoch": 0.7941232955636643, "percentage": 79.41, "elapsed_time": "0:46:24", "remaining_time": "0:12:02", "throughput": 14022.01, "total_tokens": 39050880}
|
|
{"current_steps": 12410, "total_steps": 15621, "loss": 0.3689, "lr": 2.467437235235378e-07, "epoch": 0.7944433775046412, "percentage": 79.44, "elapsed_time": "0:46:25", "remaining_time": "0:12:00", "throughput": 14024.45, "total_tokens": 39065792}
|
|
{"current_steps": 12415, "total_steps": 15621, "loss": 0.3418, "lr": 2.460092704659883e-07, "epoch": 0.7947634594456181, "percentage": 79.48, "elapsed_time": "0:46:26", "remaining_time": "0:11:59", "throughput": 14026.93, "total_tokens": 39080960}
|
|
{"current_steps": 12420, "total_steps": 15621, "loss": 0.2604, "lr": 2.452757587747174e-07, "epoch": 0.795083541386595, "percentage": 79.51, "elapsed_time": "0:46:26", "remaining_time": "0:11:58", "throughput": 14029.59, "total_tokens": 39097216}
|
|
{"current_steps": 12425, "total_steps": 15621, "loss": 0.1771, "lr": 2.445431893655232e-07, "epoch": 0.7954036233275719, "percentage": 79.54, "elapsed_time": "0:46:27", "remaining_time": "0:11:56", "throughput": 14032.21, "total_tokens": 39113152}
|
|
{"current_steps": 12430, "total_steps": 15621, "loss": 0.3722, "lr": 2.438115631530271e-07, "epoch": 0.7957237052685487, "percentage": 79.57, "elapsed_time": "0:46:28", "remaining_time": "0:11:55", "throughput": 14035.05, "total_tokens": 39130176}
|
|
{"current_steps": 12435, "total_steps": 15621, "loss": 0.2283, "lr": 2.4308088105067305e-07, "epoch": 0.7960437872095256, "percentage": 79.6, "elapsed_time": "0:46:28", "remaining_time": "0:11:54", "throughput": 14037.58, "total_tokens": 39145792}
|
|
{"current_steps": 12440, "total_steps": 15621, "loss": 0.4201, "lr": 2.423511439707262e-07, "epoch": 0.7963638691505025, "percentage": 79.64, "elapsed_time": "0:46:29", "remaining_time": "0:11:53", "throughput": 14040.09, "total_tokens": 39161280}
|
|
{"current_steps": 12445, "total_steps": 15621, "loss": 0.2784, "lr": 2.4162235282427177e-07, "epoch": 0.7966839510914794, "percentage": 79.67, "elapsed_time": "0:46:29", "remaining_time": "0:11:51", "throughput": 14042.54, "total_tokens": 39176512}
|
|
{"current_steps": 12450, "total_steps": 15621, "loss": 0.3621, "lr": 2.408945085212144e-07, "epoch": 0.7970040330324563, "percentage": 79.7, "elapsed_time": "0:46:30", "remaining_time": "0:11:50", "throughput": 14045.0, "total_tokens": 39191808}
|
|
{"current_steps": 12455, "total_steps": 15621, "loss": 0.2479, "lr": 2.401676119702759e-07, "epoch": 0.7973241149734333, "percentage": 79.73, "elapsed_time": "0:46:31", "remaining_time": "0:11:49", "throughput": 14047.78, "total_tokens": 39208640}
|
|
{"current_steps": 12460, "total_steps": 15621, "loss": 0.3438, "lr": 2.394416640789952e-07, "epoch": 0.7976441969144101, "percentage": 79.76, "elapsed_time": "0:46:31", "remaining_time": "0:11:48", "throughput": 14050.1, "total_tokens": 39223232}
|
|
{"current_steps": 12465, "total_steps": 15621, "loss": 0.3098, "lr": 2.3871666575372696e-07, "epoch": 0.797964278855387, "percentage": 79.8, "elapsed_time": "0:46:32", "remaining_time": "0:11:46", "throughput": 14052.57, "total_tokens": 39238656}
|
|
{"current_steps": 12470, "total_steps": 15621, "loss": 0.532, "lr": 2.3799261789963964e-07, "epoch": 0.7982843607963639, "percentage": 79.83, "elapsed_time": "0:46:32", "remaining_time": "0:11:45", "throughput": 14055.45, "total_tokens": 39255872}
|
|
{"current_steps": 12475, "total_steps": 15621, "loss": 0.2708, "lr": 2.3726952142071644e-07, "epoch": 0.7986044427373408, "percentage": 79.86, "elapsed_time": "0:46:33", "remaining_time": "0:11:44", "throughput": 14057.79, "total_tokens": 39270784}
|
|
{"current_steps": 12480, "total_steps": 15621, "loss": 0.3462, "lr": 2.365473772197508e-07, "epoch": 0.7989245246783176, "percentage": 79.89, "elapsed_time": "0:46:34", "remaining_time": "0:11:43", "throughput": 14060.3, "total_tokens": 39286080}
|
|
{"current_steps": 12485, "total_steps": 15621, "loss": 0.356, "lr": 2.3582618619834883e-07, "epoch": 0.7992446066192945, "percentage": 79.92, "elapsed_time": "0:46:34", "remaining_time": "0:11:41", "throughput": 14062.77, "total_tokens": 39301312}
|
|
{"current_steps": 12490, "total_steps": 15621, "loss": 0.2216, "lr": 2.3510594925692528e-07, "epoch": 0.7995646885602714, "percentage": 79.96, "elapsed_time": "0:46:35", "remaining_time": "0:11:40", "throughput": 14065.25, "total_tokens": 39316736}
|
|
{"current_steps": 12495, "total_steps": 15621, "loss": 0.3493, "lr": 2.343866672947057e-07, "epoch": 0.7998847705012483, "percentage": 79.99, "elapsed_time": "0:46:35", "remaining_time": "0:11:39", "throughput": 14067.56, "total_tokens": 39331264}
|
|
{"current_steps": 12500, "total_steps": 15621, "loss": 0.2587, "lr": 2.336683412097209e-07, "epoch": 0.8002048524422252, "percentage": 80.02, "elapsed_time": "0:46:36", "remaining_time": "0:11:38", "throughput": 14069.89, "total_tokens": 39345856}
|
|
{"current_steps": 12505, "total_steps": 15621, "loss": 0.3645, "lr": 2.329509718988095e-07, "epoch": 0.800524934383202, "percentage": 80.05, "elapsed_time": "0:46:37", "remaining_time": "0:11:36", "throughput": 14072.41, "total_tokens": 39361280}
|
|
{"current_steps": 12510, "total_steps": 15621, "loss": 0.3367, "lr": 2.3223456025761645e-07, "epoch": 0.800845016324179, "percentage": 80.08, "elapsed_time": "0:46:37", "remaining_time": "0:11:35", "throughput": 14074.71, "total_tokens": 39375872}
|
|
{"current_steps": 12512, "total_steps": 15621, "eval_loss": 0.36358681321144104, "epoch": 0.8009730491005698, "percentage": 80.1, "elapsed_time": "0:47:27", "remaining_time": "0:11:47", "throughput": 13832.76, "total_tokens": 39382144}
|
|
{"current_steps": 12515, "total_steps": 15621, "loss": 0.2866, "lr": 2.315191071805892e-07, "epoch": 0.8011650982651559, "percentage": 80.12, "elapsed_time": "0:47:58", "remaining_time": "0:11:54", "throughput": 13685.71, "total_tokens": 39392320}
|
|
{"current_steps": 12520, "total_steps": 15621, "loss": 0.3619, "lr": 2.3080461356097937e-07, "epoch": 0.8014851802061328, "percentage": 80.15, "elapsed_time": "0:47:58", "remaining_time": "0:11:53", "throughput": 13688.19, "total_tokens": 39407680}
|
|
{"current_steps": 12525, "total_steps": 15621, "loss": 0.288, "lr": 2.30091080290841e-07, "epoch": 0.8018052621471097, "percentage": 80.18, "elapsed_time": "0:47:59", "remaining_time": "0:11:51", "throughput": 13690.95, "total_tokens": 39424512}
|
|
{"current_steps": 12530, "total_steps": 15621, "loss": 0.3463, "lr": 2.29378508261029e-07, "epoch": 0.8021253440880866, "percentage": 80.21, "elapsed_time": "0:48:00", "remaining_time": "0:11:50", "throughput": 13693.26, "total_tokens": 39439296}
|
|
{"current_steps": 12535, "total_steps": 15621, "loss": 0.3707, "lr": 2.2866689836119702e-07, "epoch": 0.8024454260290634, "percentage": 80.24, "elapsed_time": "0:48:00", "remaining_time": "0:11:49", "throughput": 13696.13, "total_tokens": 39456576}
|
|
{"current_steps": 12540, "total_steps": 15621, "loss": 0.3536, "lr": 2.2795625147979913e-07, "epoch": 0.8027655079700403, "percentage": 80.28, "elapsed_time": "0:48:01", "remaining_time": "0:11:47", "throughput": 13698.76, "total_tokens": 39472512}
|
|
{"current_steps": 12545, "total_steps": 15621, "loss": 0.2332, "lr": 2.2724656850408597e-07, "epoch": 0.8030855899110172, "percentage": 80.31, "elapsed_time": "0:48:02", "remaining_time": "0:11:46", "throughput": 13701.29, "total_tokens": 39488192}
|
|
{"current_steps": 12550, "total_steps": 15621, "loss": 0.3855, "lr": 2.2653785032010532e-07, "epoch": 0.8034056718519941, "percentage": 80.34, "elapsed_time": "0:48:02", "remaining_time": "0:11:45", "throughput": 13703.71, "total_tokens": 39503552}
|
|
{"current_steps": 12555, "total_steps": 15621, "loss": 0.3363, "lr": 2.258300978126999e-07, "epoch": 0.803725753792971, "percentage": 80.37, "elapsed_time": "0:48:03", "remaining_time": "0:11:44", "throughput": 13706.35, "total_tokens": 39519744}
|
|
{"current_steps": 12560, "total_steps": 15621, "loss": 0.4753, "lr": 2.2512331186550715e-07, "epoch": 0.804045835733948, "percentage": 80.4, "elapsed_time": "0:48:03", "remaining_time": "0:11:42", "throughput": 13708.82, "total_tokens": 39535232}
|
|
{"current_steps": 12565, "total_steps": 15621, "loss": 0.3878, "lr": 2.244174933609575e-07, "epoch": 0.8043659176749248, "percentage": 80.44, "elapsed_time": "0:48:04", "remaining_time": "0:11:41", "throughput": 13711.08, "total_tokens": 39549568}
|
|
{"current_steps": 12570, "total_steps": 15621, "loss": 0.2764, "lr": 2.2371264318027383e-07, "epoch": 0.8046859996159017, "percentage": 80.47, "elapsed_time": "0:48:05", "remaining_time": "0:11:40", "throughput": 13712.61, "total_tokens": 39566016}
|
|
{"current_steps": 12575, "total_steps": 15621, "loss": 0.2308, "lr": 2.2300876220346975e-07, "epoch": 0.8050060815568786, "percentage": 80.5, "elapsed_time": "0:48:05", "remaining_time": "0:11:39", "throughput": 13715.15, "total_tokens": 39581760}
|
|
{"current_steps": 12580, "total_steps": 15621, "loss": 0.2785, "lr": 2.2230585130934897e-07, "epoch": 0.8053261634978555, "percentage": 80.53, "elapsed_time": "0:48:06", "remaining_time": "0:11:37", "throughput": 13717.76, "total_tokens": 39597888}
|
|
{"current_steps": 12585, "total_steps": 15621, "loss": 0.4454, "lr": 2.2160391137550394e-07, "epoch": 0.8056462454388323, "percentage": 80.56, "elapsed_time": "0:48:07", "remaining_time": "0:11:36", "throughput": 13720.28, "total_tokens": 39613568}
|
|
{"current_steps": 12590, "total_steps": 15621, "loss": 0.4314, "lr": 2.2090294327831494e-07, "epoch": 0.8059663273798092, "percentage": 80.6, "elapsed_time": "0:48:07", "remaining_time": "0:11:35", "throughput": 13722.59, "total_tokens": 39628096}
|
|
{"current_steps": 12595, "total_steps": 15621, "loss": 0.2695, "lr": 2.202029478929488e-07, "epoch": 0.8062864093207861, "percentage": 80.63, "elapsed_time": "0:48:08", "remaining_time": "0:11:33", "throughput": 13724.83, "total_tokens": 39642560}
|
|
{"current_steps": 12600, "total_steps": 15621, "loss": 0.2967, "lr": 2.195039260933581e-07, "epoch": 0.806606491261763, "percentage": 80.66, "elapsed_time": "0:48:08", "remaining_time": "0:11:32", "throughput": 13727.34, "total_tokens": 39658112}
|
|
{"current_steps": 12605, "total_steps": 15621, "loss": 0.2657, "lr": 2.1880587875227973e-07, "epoch": 0.8069265732027399, "percentage": 80.69, "elapsed_time": "0:48:09", "remaining_time": "0:11:31", "throughput": 13729.95, "total_tokens": 39674112}
|
|
{"current_steps": 12610, "total_steps": 15621, "loss": 0.3313, "lr": 2.18108806741234e-07, "epoch": 0.8072466551437167, "percentage": 80.72, "elapsed_time": "0:48:10", "remaining_time": "0:11:30", "throughput": 13732.63, "total_tokens": 39690432}
|
|
{"current_steps": 12615, "total_steps": 15621, "loss": 0.3512, "lr": 2.1741271093052315e-07, "epoch": 0.8075667370846937, "percentage": 80.76, "elapsed_time": "0:48:10", "remaining_time": "0:11:28", "throughput": 13735.07, "total_tokens": 39705792}
|
|
{"current_steps": 12620, "total_steps": 15621, "loss": 0.4692, "lr": 2.167175921892318e-07, "epoch": 0.8078868190256706, "percentage": 80.79, "elapsed_time": "0:48:11", "remaining_time": "0:11:27", "throughput": 13737.68, "total_tokens": 39722048}
|
|
{"current_steps": 12625, "total_steps": 15621, "loss": 0.4239, "lr": 2.1602345138522314e-07, "epoch": 0.8082069009666475, "percentage": 80.82, "elapsed_time": "0:48:12", "remaining_time": "0:11:26", "throughput": 13740.31, "total_tokens": 39738304}
|
|
{"current_steps": 12630, "total_steps": 15621, "loss": 0.3468, "lr": 2.1533028938514008e-07, "epoch": 0.8085269829076244, "percentage": 80.85, "elapsed_time": "0:48:12", "remaining_time": "0:11:25", "throughput": 13742.78, "total_tokens": 39753728}
|
|
{"current_steps": 12635, "total_steps": 15621, "loss": 0.3435, "lr": 2.1463810705440433e-07, "epoch": 0.8088470648486012, "percentage": 80.88, "elapsed_time": "0:48:13", "remaining_time": "0:11:23", "throughput": 13745.31, "total_tokens": 39769600}
|
|
{"current_steps": 12640, "total_steps": 15621, "loss": 0.3519, "lr": 2.139469052572127e-07, "epoch": 0.8091671467895781, "percentage": 80.92, "elapsed_time": "0:48:13", "remaining_time": "0:11:22", "throughput": 13747.56, "total_tokens": 39784000}
|
|
{"current_steps": 12645, "total_steps": 15621, "loss": 0.344, "lr": 2.1325668485653891e-07, "epoch": 0.809487228730555, "percentage": 80.95, "elapsed_time": "0:48:14", "remaining_time": "0:11:21", "throughput": 13750.19, "total_tokens": 39800320}
|
|
{"current_steps": 12650, "total_steps": 15621, "loss": 0.457, "lr": 2.1256744671413173e-07, "epoch": 0.8098073106715319, "percentage": 80.98, "elapsed_time": "0:48:15", "remaining_time": "0:11:19", "throughput": 13752.59, "total_tokens": 39815360}
|
|
{"current_steps": 12655, "total_steps": 15621, "loss": 0.3821, "lr": 2.1187919169051316e-07, "epoch": 0.8101273926125088, "percentage": 81.01, "elapsed_time": "0:48:15", "remaining_time": "0:11:18", "throughput": 13754.85, "total_tokens": 39829952}
|
|
{"current_steps": 12660, "total_steps": 15621, "loss": 0.3528, "lr": 2.111919206449767e-07, "epoch": 0.8104474745534856, "percentage": 81.04, "elapsed_time": "0:48:16", "remaining_time": "0:11:17", "throughput": 13757.31, "total_tokens": 39845376}
|
|
{"current_steps": 12665, "total_steps": 15621, "loss": 0.4858, "lr": 2.1050563443558922e-07, "epoch": 0.8107675564944626, "percentage": 81.08, "elapsed_time": "0:48:16", "remaining_time": "0:11:16", "throughput": 13759.95, "total_tokens": 39861696}
|
|
{"current_steps": 12670, "total_steps": 15621, "loss": 0.297, "lr": 2.0982033391918697e-07, "epoch": 0.8110876384354395, "percentage": 81.11, "elapsed_time": "0:48:17", "remaining_time": "0:11:14", "throughput": 13762.47, "total_tokens": 39877440}
|
|
{"current_steps": 12675, "total_steps": 15621, "loss": 0.334, "lr": 2.0913601995137543e-07, "epoch": 0.8114077203764164, "percentage": 81.14, "elapsed_time": "0:48:18", "remaining_time": "0:11:13", "throughput": 13765.08, "total_tokens": 39893760}
|
|
{"current_steps": 12680, "total_steps": 15621, "loss": 0.2943, "lr": 2.084526933865287e-07, "epoch": 0.8117278023173933, "percentage": 81.17, "elapsed_time": "0:48:18", "remaining_time": "0:11:12", "throughput": 13767.62, "total_tokens": 39909568}
|
|
{"current_steps": 12685, "total_steps": 15621, "loss": 0.4543, "lr": 2.0777035507778817e-07, "epoch": 0.8120478842583702, "percentage": 81.2, "elapsed_time": "0:48:19", "remaining_time": "0:11:11", "throughput": 13769.79, "total_tokens": 39923648}
|
|
{"current_steps": 12690, "total_steps": 15621, "loss": 0.4299, "lr": 2.0708900587706135e-07, "epoch": 0.812367966199347, "percentage": 81.24, "elapsed_time": "0:48:19", "remaining_time": "0:11:09", "throughput": 13772.17, "total_tokens": 39939008}
|
|
{"current_steps": 12695, "total_steps": 15621, "loss": 0.3374, "lr": 2.0640864663502e-07, "epoch": 0.8126880481403239, "percentage": 81.27, "elapsed_time": "0:48:20", "remaining_time": "0:11:08", "throughput": 13774.71, "total_tokens": 39955072}
|
|
{"current_steps": 12700, "total_steps": 15621, "loss": 0.4545, "lr": 2.057292782011013e-07, "epoch": 0.8130081300813008, "percentage": 81.3, "elapsed_time": "0:48:21", "remaining_time": "0:11:07", "throughput": 13777.24, "total_tokens": 39970880}
|
|
{"current_steps": 12705, "total_steps": 15621, "loss": 0.2967, "lr": 2.0505090142350468e-07, "epoch": 0.8133282120222777, "percentage": 81.33, "elapsed_time": "0:48:21", "remaining_time": "0:11:06", "throughput": 13779.64, "total_tokens": 39986240}
|
|
{"current_steps": 12710, "total_steps": 15621, "loss": 0.3427, "lr": 2.0437351714919127e-07, "epoch": 0.8136482939632546, "percentage": 81.36, "elapsed_time": "0:48:22", "remaining_time": "0:11:04", "throughput": 13782.12, "total_tokens": 40001856}
|
|
{"current_steps": 12715, "total_steps": 15621, "loss": 0.309, "lr": 2.0369712622388336e-07, "epoch": 0.8139683759042314, "percentage": 81.4, "elapsed_time": "0:48:23", "remaining_time": "0:11:03", "throughput": 13784.72, "total_tokens": 40018112}
|
|
{"current_steps": 12720, "total_steps": 15621, "loss": 0.2879, "lr": 2.0302172949206298e-07, "epoch": 0.8142884578452084, "percentage": 81.43, "elapsed_time": "0:48:23", "remaining_time": "0:11:02", "throughput": 13787.14, "total_tokens": 40033664}
|
|
{"current_steps": 12725, "total_steps": 15621, "loss": 0.2967, "lr": 2.0234732779697094e-07, "epoch": 0.8146085397861853, "percentage": 81.46, "elapsed_time": "0:48:24", "remaining_time": "0:11:00", "throughput": 13789.5, "total_tokens": 40048768}
|
|
{"current_steps": 12730, "total_steps": 15621, "loss": 0.3229, "lr": 2.016739219806056e-07, "epoch": 0.8149286217271622, "percentage": 81.49, "elapsed_time": "0:48:24", "remaining_time": "0:10:59", "throughput": 13791.72, "total_tokens": 40063232}
|
|
{"current_steps": 12735, "total_steps": 15621, "loss": 0.3904, "lr": 2.0100151288372215e-07, "epoch": 0.8152487036681391, "percentage": 81.52, "elapsed_time": "0:48:25", "remaining_time": "0:10:58", "throughput": 13794.28, "total_tokens": 40079296}
|
|
{"current_steps": 12740, "total_steps": 15621, "loss": 0.5554, "lr": 2.0033010134583084e-07, "epoch": 0.8155687856091159, "percentage": 81.56, "elapsed_time": "0:48:26", "remaining_time": "0:10:57", "throughput": 13796.77, "total_tokens": 40094976}
|
|
{"current_steps": 12745, "total_steps": 15621, "loss": 0.3218, "lr": 1.9965968820519763e-07, "epoch": 0.8158888675500928, "percentage": 81.59, "elapsed_time": "0:48:26", "remaining_time": "0:10:55", "throughput": 13799.2, "total_tokens": 40110464}
|
|
{"current_steps": 12750, "total_steps": 15621, "loss": 0.3981, "lr": 1.9899027429884042e-07, "epoch": 0.8162089494910697, "percentage": 81.62, "elapsed_time": "0:48:27", "remaining_time": "0:10:54", "throughput": 13801.57, "total_tokens": 40125568}
|
|
{"current_steps": 12755, "total_steps": 15621, "loss": 0.4142, "lr": 1.983218604625305e-07, "epoch": 0.8165290314320466, "percentage": 81.65, "elapsed_time": "0:48:27", "remaining_time": "0:10:53", "throughput": 13804.12, "total_tokens": 40141440}
|
|
{"current_steps": 12760, "total_steps": 15621, "loss": 0.3275, "lr": 1.9765444753079096e-07, "epoch": 0.8168491133730235, "percentage": 81.68, "elapsed_time": "0:48:28", "remaining_time": "0:10:52", "throughput": 13806.43, "total_tokens": 40156416}
|
|
{"current_steps": 12765, "total_steps": 15621, "loss": 0.3998, "lr": 1.9698803633689408e-07, "epoch": 0.8171691953140003, "percentage": 81.72, "elapsed_time": "0:48:29", "remaining_time": "0:10:50", "throughput": 13809.06, "total_tokens": 40172928}
|
|
{"current_steps": 12770, "total_steps": 15621, "loss": 0.2336, "lr": 1.963226277128619e-07, "epoch": 0.8174892772549772, "percentage": 81.75, "elapsed_time": "0:48:29", "remaining_time": "0:10:49", "throughput": 13811.46, "total_tokens": 40188096}
|
|
{"current_steps": 12775, "total_steps": 15621, "loss": 0.3593, "lr": 1.956582224894655e-07, "epoch": 0.8178093591959542, "percentage": 81.78, "elapsed_time": "0:48:30", "remaining_time": "0:10:48", "throughput": 13813.97, "total_tokens": 40204032}
|
|
{"current_steps": 12780, "total_steps": 15621, "loss": 0.3646, "lr": 1.949948214962227e-07, "epoch": 0.8181294411369311, "percentage": 81.81, "elapsed_time": "0:48:30", "remaining_time": "0:10:47", "throughput": 13816.32, "total_tokens": 40218944}
|
|
{"current_steps": 12785, "total_steps": 15621, "loss": 0.3731, "lr": 1.943324255613964e-07, "epoch": 0.818449523077908, "percentage": 81.84, "elapsed_time": "0:48:31", "remaining_time": "0:10:45", "throughput": 13818.97, "total_tokens": 40235456}
|
|
{"current_steps": 12790, "total_steps": 15621, "loss": 0.4505, "lr": 1.936710355119967e-07, "epoch": 0.8187696050188848, "percentage": 81.88, "elapsed_time": "0:48:32", "remaining_time": "0:10:44", "throughput": 13821.24, "total_tokens": 40250176}
|
|
{"current_steps": 12795, "total_steps": 15621, "loss": 0.3157, "lr": 1.9301065217377655e-07, "epoch": 0.8190896869598617, "percentage": 81.91, "elapsed_time": "0:48:32", "remaining_time": "0:10:43", "throughput": 13823.64, "total_tokens": 40265472}
|
|
{"current_steps": 12800, "total_steps": 15621, "loss": 0.3992, "lr": 1.9235127637123249e-07, "epoch": 0.8194097689008386, "percentage": 81.94, "elapsed_time": "0:48:33", "remaining_time": "0:10:42", "throughput": 13826.22, "total_tokens": 40281728}
|
|
{"current_steps": 12805, "total_steps": 15621, "loss": 0.3282, "lr": 1.9169290892760225e-07, "epoch": 0.8197298508418155, "percentage": 81.97, "elapsed_time": "0:48:34", "remaining_time": "0:10:40", "throughput": 13828.57, "total_tokens": 40296768}
|
|
{"current_steps": 12810, "total_steps": 15621, "loss": 0.3201, "lr": 1.91035550664866e-07, "epoch": 0.8200499327827924, "percentage": 82.0, "elapsed_time": "0:48:34", "remaining_time": "0:10:39", "throughput": 13830.81, "total_tokens": 40311488}
|
|
{"current_steps": 12815, "total_steps": 15621, "loss": 0.314, "lr": 1.903792024037433e-07, "epoch": 0.8203700147237692, "percentage": 82.04, "elapsed_time": "0:48:35", "remaining_time": "0:10:38", "throughput": 13833.28, "total_tokens": 40327232}
|
|
{"current_steps": 12820, "total_steps": 15621, "loss": 0.4472, "lr": 1.8972386496369185e-07, "epoch": 0.8206900966647461, "percentage": 82.07, "elapsed_time": "0:48:35", "remaining_time": "0:10:37", "throughput": 13835.97, "total_tokens": 40344064}
|
|
{"current_steps": 12825, "total_steps": 15621, "loss": 0.3976, "lr": 1.89069539162909e-07, "epoch": 0.8210101786057231, "percentage": 82.1, "elapsed_time": "0:48:36", "remaining_time": "0:10:35", "throughput": 13838.32, "total_tokens": 40359040}
|
|
{"current_steps": 12830, "total_steps": 15621, "loss": 0.4066, "lr": 1.8841622581832783e-07, "epoch": 0.8213302605467, "percentage": 82.13, "elapsed_time": "0:48:37", "remaining_time": "0:10:34", "throughput": 13841.12, "total_tokens": 40376384}
|
|
{"current_steps": 12835, "total_steps": 15621, "loss": 0.5901, "lr": 1.8776392574561783e-07, "epoch": 0.8216503424876769, "percentage": 82.17, "elapsed_time": "0:48:37", "remaining_time": "0:10:33", "throughput": 13843.56, "total_tokens": 40391936}
|
|
{"current_steps": 12840, "total_steps": 15621, "loss": 0.4831, "lr": 1.8711263975918322e-07, "epoch": 0.8219704244286538, "percentage": 82.2, "elapsed_time": "0:48:38", "remaining_time": "0:10:32", "throughput": 13846.32, "total_tokens": 40408832}
|
|
{"current_steps": 12845, "total_steps": 15621, "loss": 0.4603, "lr": 1.8646236867216215e-07, "epoch": 0.8222905063696306, "percentage": 82.23, "elapsed_time": "0:48:39", "remaining_time": "0:10:30", "throughput": 13848.96, "total_tokens": 40425280}
|
|
{"current_steps": 12850, "total_steps": 15621, "loss": 0.338, "lr": 1.8581311329642591e-07, "epoch": 0.8226105883106075, "percentage": 82.26, "elapsed_time": "0:48:39", "remaining_time": "0:10:29", "throughput": 13851.39, "total_tokens": 40440832}
|
|
{"current_steps": 12855, "total_steps": 15621, "loss": 0.2651, "lr": 1.8516487444257723e-07, "epoch": 0.8229306702515844, "percentage": 82.29, "elapsed_time": "0:48:40", "remaining_time": "0:10:28", "throughput": 13854.3, "total_tokens": 40458624}
|
|
{"current_steps": 12860, "total_steps": 15621, "loss": 0.4093, "lr": 1.8451765291995004e-07, "epoch": 0.8232507521925613, "percentage": 82.33, "elapsed_time": "0:48:40", "remaining_time": "0:10:27", "throughput": 13856.89, "total_tokens": 40474688}
|
|
{"current_steps": 12865, "total_steps": 15621, "loss": 0.3554, "lr": 1.8387144953660806e-07, "epoch": 0.8235708341335382, "percentage": 82.36, "elapsed_time": "0:48:41", "remaining_time": "0:10:25", "throughput": 13859.47, "total_tokens": 40490816}
|
|
{"current_steps": 12870, "total_steps": 15621, "loss": 0.4472, "lr": 1.832262650993437e-07, "epoch": 0.823890916074515, "percentage": 82.39, "elapsed_time": "0:48:42", "remaining_time": "0:10:24", "throughput": 13861.86, "total_tokens": 40506112}
|
|
{"current_steps": 12875, "total_steps": 15621, "loss": 0.2954, "lr": 1.825821004136774e-07, "epoch": 0.8242109980154919, "percentage": 82.42, "elapsed_time": "0:48:42", "remaining_time": "0:10:23", "throughput": 13864.23, "total_tokens": 40521344}
|
|
{"current_steps": 12880, "total_steps": 15621, "loss": 0.2698, "lr": 1.819389562838559e-07, "epoch": 0.8245310799564689, "percentage": 82.45, "elapsed_time": "0:48:43", "remaining_time": "0:10:22", "throughput": 13866.73, "total_tokens": 40537024}
|
|
{"current_steps": 12885, "total_steps": 15621, "loss": 0.3136, "lr": 1.8129683351285319e-07, "epoch": 0.8248511618974458, "percentage": 82.49, "elapsed_time": "0:48:43", "remaining_time": "0:10:20", "throughput": 13869.19, "total_tokens": 40552640}
|
|
{"current_steps": 12890, "total_steps": 15621, "loss": 0.3186, "lr": 1.8065573290236626e-07, "epoch": 0.8251712438384227, "percentage": 82.52, "elapsed_time": "0:48:44", "remaining_time": "0:10:19", "throughput": 13871.59, "total_tokens": 40568000}
|
|
{"current_steps": 12895, "total_steps": 15621, "loss": 0.3809, "lr": 1.8001565525281682e-07, "epoch": 0.8254913257793995, "percentage": 82.55, "elapsed_time": "0:48:45", "remaining_time": "0:10:18", "throughput": 13874.31, "total_tokens": 40584960}
|
|
{"current_steps": 12900, "total_steps": 15621, "loss": 0.3665, "lr": 1.793766013633493e-07, "epoch": 0.8258114077203764, "percentage": 82.58, "elapsed_time": "0:48:45", "remaining_time": "0:10:17", "throughput": 13876.75, "total_tokens": 40600704}
|
|
{"current_steps": 12905, "total_steps": 15621, "loss": 0.3693, "lr": 1.7873857203183074e-07, "epoch": 0.8261314896613533, "percentage": 82.61, "elapsed_time": "0:48:46", "remaining_time": "0:10:15", "throughput": 13879.12, "total_tokens": 40615872}
|
|
{"current_steps": 12910, "total_steps": 15621, "loss": 0.4563, "lr": 1.7810156805484733e-07, "epoch": 0.8264515716023302, "percentage": 82.65, "elapsed_time": "0:48:47", "remaining_time": "0:10:14", "throughput": 13881.86, "total_tokens": 40632640}
|
|
{"current_steps": 12915, "total_steps": 15621, "loss": 0.2995, "lr": 1.7746559022770612e-07, "epoch": 0.8267716535433071, "percentage": 82.68, "elapsed_time": "0:48:47", "remaining_time": "0:10:13", "throughput": 13884.27, "total_tokens": 40648064}
|
|
{"current_steps": 12920, "total_steps": 15621, "loss": 0.3663, "lr": 1.7683063934443342e-07, "epoch": 0.8270917354842839, "percentage": 82.71, "elapsed_time": "0:48:48", "remaining_time": "0:10:12", "throughput": 13886.95, "total_tokens": 40664704}
|
|
{"current_steps": 12925, "total_steps": 15621, "loss": 0.4004, "lr": 1.7619671619777277e-07, "epoch": 0.8274118174252608, "percentage": 82.74, "elapsed_time": "0:48:48", "remaining_time": "0:10:10", "throughput": 13889.52, "total_tokens": 40681024}
|
|
{"current_steps": 12930, "total_steps": 15621, "loss": 0.4101, "lr": 1.7556382157918404e-07, "epoch": 0.8277318993662378, "percentage": 82.77, "elapsed_time": "0:48:49", "remaining_time": "0:10:09", "throughput": 13891.82, "total_tokens": 40695936}
|
|
{"current_steps": 12935, "total_steps": 15621, "loss": 0.3185, "lr": 1.7493195627884427e-07, "epoch": 0.8280519813072147, "percentage": 82.81, "elapsed_time": "0:48:50", "remaining_time": "0:10:08", "throughput": 13894.64, "total_tokens": 40713472}
|
|
{"current_steps": 12940, "total_steps": 15621, "loss": 0.3141, "lr": 1.7430112108564465e-07, "epoch": 0.8283720632481916, "percentage": 82.84, "elapsed_time": "0:48:50", "remaining_time": "0:10:07", "throughput": 13897.11, "total_tokens": 40729344}
|
|
{"current_steps": 12945, "total_steps": 15621, "loss": 0.3861, "lr": 1.736713167871896e-07, "epoch": 0.8286921451891684, "percentage": 82.87, "elapsed_time": "0:48:51", "remaining_time": "0:10:05", "throughput": 13899.71, "total_tokens": 40745856}
|
|
{"current_steps": 12950, "total_steps": 15621, "loss": 0.2993, "lr": 1.7304254416979803e-07, "epoch": 0.8290122271301453, "percentage": 82.9, "elapsed_time": "0:48:52", "remaining_time": "0:10:04", "throughput": 13902.26, "total_tokens": 40761920}
|
|
{"current_steps": 12955, "total_steps": 15621, "loss": 0.2488, "lr": 1.7241480401849963e-07, "epoch": 0.8293323090711222, "percentage": 82.93, "elapsed_time": "0:48:52", "remaining_time": "0:10:03", "throughput": 13904.59, "total_tokens": 40776960}
|
|
{"current_steps": 12960, "total_steps": 15621, "loss": 0.3455, "lr": 1.7178809711703524e-07, "epoch": 0.8296523910120991, "percentage": 82.97, "elapsed_time": "0:48:53", "remaining_time": "0:10:02", "throughput": 13906.94, "total_tokens": 40792192}
|
|
{"current_steps": 12965, "total_steps": 15621, "loss": 0.3612, "lr": 1.7116242424785599e-07, "epoch": 0.829972472953076, "percentage": 83.0, "elapsed_time": "0:48:53", "remaining_time": "0:10:01", "throughput": 13909.46, "total_tokens": 40808256}
|
|
{"current_steps": 12970, "total_steps": 15621, "loss": 0.4288, "lr": 1.7053778619212166e-07, "epoch": 0.8302925548940528, "percentage": 83.03, "elapsed_time": "0:48:54", "remaining_time": "0:09:59", "throughput": 13911.8, "total_tokens": 40823424}
|
|
{"current_steps": 12975, "total_steps": 15621, "loss": 0.4221, "lr": 1.6991418372970022e-07, "epoch": 0.8306126368350297, "percentage": 83.06, "elapsed_time": "0:48:55", "remaining_time": "0:09:58", "throughput": 13914.62, "total_tokens": 40840960}
|
|
{"current_steps": 12980, "total_steps": 15621, "loss": 0.3775, "lr": 1.6929161763916666e-07, "epoch": 0.8309327187760066, "percentage": 83.09, "elapsed_time": "0:48:55", "remaining_time": "0:09:57", "throughput": 13917.25, "total_tokens": 40857536}
|
|
{"current_steps": 12985, "total_steps": 15621, "loss": 0.3597, "lr": 1.686700886978021e-07, "epoch": 0.8312528007169836, "percentage": 83.13, "elapsed_time": "0:48:56", "remaining_time": "0:09:56", "throughput": 13919.93, "total_tokens": 40874240}
|
|
{"current_steps": 12990, "total_steps": 15621, "loss": 0.3573, "lr": 1.6804959768159266e-07, "epoch": 0.8315728826579605, "percentage": 83.16, "elapsed_time": "0:48:56", "remaining_time": "0:09:54", "throughput": 13922.14, "total_tokens": 40888960}
|
|
{"current_steps": 12995, "total_steps": 15621, "loss": 0.5238, "lr": 1.674301453652287e-07, "epoch": 0.8318929645989374, "percentage": 83.19, "elapsed_time": "0:48:57", "remaining_time": "0:09:53", "throughput": 13924.53, "total_tokens": 40904512}
|
|
{"current_steps": 13000, "total_steps": 15621, "loss": 0.2903, "lr": 1.6681173252210378e-07, "epoch": 0.8322130465399142, "percentage": 83.22, "elapsed_time": "0:48:58", "remaining_time": "0:09:52", "throughput": 13927.32, "total_tokens": 40921856}
|
|
{"current_steps": 13005, "total_steps": 15621, "loss": 0.3741, "lr": 1.6619435992431342e-07, "epoch": 0.8325331284808911, "percentage": 83.25, "elapsed_time": "0:48:58", "remaining_time": "0:09:51", "throughput": 13930.02, "total_tokens": 40938752}
|
|
{"current_steps": 13010, "total_steps": 15621, "loss": 0.3033, "lr": 1.6557802834265466e-07, "epoch": 0.832853210421868, "percentage": 83.29, "elapsed_time": "0:48:59", "remaining_time": "0:09:49", "throughput": 13932.36, "total_tokens": 40954048}
|
|
{"current_steps": 13015, "total_steps": 15621, "loss": 0.3593, "lr": 1.649627385466248e-07, "epoch": 0.8331732923628449, "percentage": 83.32, "elapsed_time": "0:49:00", "remaining_time": "0:09:48", "throughput": 13935.4, "total_tokens": 40972672}
|
|
{"current_steps": 13020, "total_steps": 15621, "loss": 0.242, "lr": 1.643484913044202e-07, "epoch": 0.8334933743038218, "percentage": 83.35, "elapsed_time": "0:49:00", "remaining_time": "0:09:47", "throughput": 13937.7, "total_tokens": 40987648}
|
|
{"current_steps": 13025, "total_steps": 15621, "loss": 0.3147, "lr": 1.6373528738293564e-07, "epoch": 0.8338134562447986, "percentage": 83.38, "elapsed_time": "0:49:01", "remaining_time": "0:09:46", "throughput": 13940.17, "total_tokens": 41003328}
|
|
{"current_steps": 13030, "total_steps": 15621, "loss": 0.2875, "lr": 1.6312312754776404e-07, "epoch": 0.8341335381857755, "percentage": 83.41, "elapsed_time": "0:49:01", "remaining_time": "0:09:45", "throughput": 13942.53, "total_tokens": 41018624}
|
|
{"current_steps": 13035, "total_steps": 15621, "loss": 0.3321, "lr": 1.6251201256319357e-07, "epoch": 0.8344536201267524, "percentage": 83.45, "elapsed_time": "0:49:02", "remaining_time": "0:09:43", "throughput": 13945.02, "total_tokens": 41034624}
|
|
{"current_steps": 13040, "total_steps": 15621, "loss": 0.3821, "lr": 1.619019431922083e-07, "epoch": 0.8347737020677294, "percentage": 83.48, "elapsed_time": "0:49:03", "remaining_time": "0:09:42", "throughput": 13947.34, "total_tokens": 41049664}
|
|
{"current_steps": 13045, "total_steps": 15621, "loss": 0.3454, "lr": 1.6129292019648754e-07, "epoch": 0.8350937840087063, "percentage": 83.51, "elapsed_time": "0:49:03", "remaining_time": "0:09:41", "throughput": 13949.99, "total_tokens": 41066368}
|
|
{"current_steps": 13050, "total_steps": 15621, "loss": 0.2916, "lr": 1.606849443364038e-07, "epoch": 0.8354138659496831, "percentage": 83.54, "elapsed_time": "0:49:04", "remaining_time": "0:09:40", "throughput": 13952.4, "total_tokens": 41082048}
|
|
{"current_steps": 13055, "total_steps": 15621, "loss": 0.3422, "lr": 1.6007801637102104e-07, "epoch": 0.83573394789066, "percentage": 83.57, "elapsed_time": "0:49:05", "remaining_time": "0:09:38", "throughput": 13954.9, "total_tokens": 41098048}
|
|
{"current_steps": 13060, "total_steps": 15621, "loss": 0.3826, "lr": 1.594721370580969e-07, "epoch": 0.8360540298316369, "percentage": 83.61, "elapsed_time": "0:49:05", "remaining_time": "0:09:37", "throughput": 13957.12, "total_tokens": 41112768}
|
|
{"current_steps": 13065, "total_steps": 15621, "loss": 0.4512, "lr": 1.588673071540788e-07, "epoch": 0.8363741117726138, "percentage": 83.64, "elapsed_time": "0:49:06", "remaining_time": "0:09:36", "throughput": 13959.39, "total_tokens": 41127488}
|
|
{"current_steps": 13070, "total_steps": 15621, "loss": 0.3295, "lr": 1.5826352741410332e-07, "epoch": 0.8366941937135907, "percentage": 83.67, "elapsed_time": "0:49:06", "remaining_time": "0:09:35", "throughput": 13961.62, "total_tokens": 41142272}
|
|
{"current_steps": 13075, "total_steps": 15621, "loss": 0.2947, "lr": 1.576607985919971e-07, "epoch": 0.8370142756545675, "percentage": 83.7, "elapsed_time": "0:49:07", "remaining_time": "0:09:33", "throughput": 13964.02, "total_tokens": 41157952}
|
|
{"current_steps": 13080, "total_steps": 15621, "loss": 0.3595, "lr": 1.57059121440274e-07, "epoch": 0.8373343575955444, "percentage": 83.73, "elapsed_time": "0:49:08", "remaining_time": "0:09:32", "throughput": 13966.29, "total_tokens": 41172992}
|
|
{"current_steps": 13085, "total_steps": 15621, "loss": 0.3642, "lr": 1.56458496710135e-07, "epoch": 0.8376544395365213, "percentage": 83.77, "elapsed_time": "0:49:08", "remaining_time": "0:09:31", "throughput": 13968.5, "total_tokens": 41187776}
|
|
{"current_steps": 13090, "total_steps": 15621, "loss": 0.3461, "lr": 1.5585892515146716e-07, "epoch": 0.8379745214774983, "percentage": 83.8, "elapsed_time": "0:49:09", "remaining_time": "0:09:30", "throughput": 13971.15, "total_tokens": 41204416}
|
|
{"current_steps": 13095, "total_steps": 15621, "loss": 0.4195, "lr": 1.5526040751284253e-07, "epoch": 0.8382946034184752, "percentage": 83.83, "elapsed_time": "0:49:09", "remaining_time": "0:09:29", "throughput": 13973.61, "total_tokens": 41220032}
|
|
{"current_steps": 13100, "total_steps": 15621, "loss": 0.3118, "lr": 1.546629445415174e-07, "epoch": 0.838614685359452, "percentage": 83.86, "elapsed_time": "0:49:10", "remaining_time": "0:09:27", "throughput": 13976.04, "total_tokens": 41235776}
|
|
{"current_steps": 13105, "total_steps": 15621, "loss": 0.3725, "lr": 1.5406653698343141e-07, "epoch": 0.8389347673004289, "percentage": 83.89, "elapsed_time": "0:49:11", "remaining_time": "0:09:26", "throughput": 13978.62, "total_tokens": 41252160}
|
|
{"current_steps": 13110, "total_steps": 15621, "loss": 0.3539, "lr": 1.5347118558320637e-07, "epoch": 0.8392548492414058, "percentage": 83.93, "elapsed_time": "0:49:11", "remaining_time": "0:09:25", "throughput": 13981.3, "total_tokens": 41269056}
|
|
{"current_steps": 13115, "total_steps": 15621, "loss": 0.3562, "lr": 1.5287689108414558e-07, "epoch": 0.8395749311823827, "percentage": 83.96, "elapsed_time": "0:49:12", "remaining_time": "0:09:24", "throughput": 13983.85, "total_tokens": 41285312}
|
|
{"current_steps": 13120, "total_steps": 15621, "loss": 0.3246, "lr": 1.5228365422823242e-07, "epoch": 0.8398950131233596, "percentage": 83.99, "elapsed_time": "0:49:12", "remaining_time": "0:09:22", "throughput": 13986.28, "total_tokens": 41300992}
|
|
{"current_steps": 13125, "total_steps": 15621, "loss": 0.2623, "lr": 1.5169147575613038e-07, "epoch": 0.8402150950643364, "percentage": 84.02, "elapsed_time": "0:49:13", "remaining_time": "0:09:21", "throughput": 13988.96, "total_tokens": 41317952}
|
|
{"current_steps": 13130, "total_steps": 15621, "loss": 0.2941, "lr": 1.5110035640718098e-07, "epoch": 0.8405351770053133, "percentage": 84.05, "elapsed_time": "0:49:14", "remaining_time": "0:09:20", "throughput": 13991.32, "total_tokens": 41333440}
|
|
{"current_steps": 13135, "total_steps": 15621, "loss": 0.3725, "lr": 1.5051029691940387e-07, "epoch": 0.8408552589462902, "percentage": 84.09, "elapsed_time": "0:49:14", "remaining_time": "0:09:19", "throughput": 13993.78, "total_tokens": 41349312}
|
|
{"current_steps": 13140, "total_steps": 15621, "loss": 0.3449, "lr": 1.4992129802949515e-07, "epoch": 0.8411753408872671, "percentage": 84.12, "elapsed_time": "0:49:15", "remaining_time": "0:09:18", "throughput": 13996.03, "total_tokens": 41364288}
|
|
{"current_steps": 13145, "total_steps": 15621, "loss": 0.2836, "lr": 1.4933336047282696e-07, "epoch": 0.8414954228282441, "percentage": 84.15, "elapsed_time": "0:49:16", "remaining_time": "0:09:16", "throughput": 13998.4, "total_tokens": 41379904}
|
|
{"current_steps": 13150, "total_steps": 15621, "loss": 0.3199, "lr": 1.4874648498344579e-07, "epoch": 0.841815504769221, "percentage": 84.18, "elapsed_time": "0:49:16", "remaining_time": "0:09:15", "throughput": 14000.59, "total_tokens": 41394432}
|
|
{"current_steps": 13155, "total_steps": 15621, "loss": 0.3419, "lr": 1.4816067229407348e-07, "epoch": 0.8421355867101978, "percentage": 84.21, "elapsed_time": "0:49:17", "remaining_time": "0:09:14", "throughput": 14002.92, "total_tokens": 41409984}
|
|
{"current_steps": 13160, "total_steps": 15621, "loss": 0.3038, "lr": 1.4757592313610322e-07, "epoch": 0.8424556686511747, "percentage": 84.25, "elapsed_time": "0:49:17", "remaining_time": "0:09:13", "throughput": 14005.35, "total_tokens": 41425984}
|
|
{"current_steps": 13165, "total_steps": 15621, "loss": 0.3293, "lr": 1.4699223823960128e-07, "epoch": 0.8427757505921516, "percentage": 84.28, "elapsed_time": "0:49:18", "remaining_time": "0:09:11", "throughput": 14007.76, "total_tokens": 41441920}
|
|
{"current_steps": 13170, "total_steps": 15621, "loss": 0.3392, "lr": 1.4640961833330579e-07, "epoch": 0.8430958325331285, "percentage": 84.31, "elapsed_time": "0:49:19", "remaining_time": "0:09:10", "throughput": 14010.18, "total_tokens": 41457664}
|
|
{"current_steps": 13175, "total_steps": 15621, "loss": 0.2544, "lr": 1.4582806414462378e-07, "epoch": 0.8434159144741054, "percentage": 84.34, "elapsed_time": "0:49:19", "remaining_time": "0:09:09", "throughput": 14012.48, "total_tokens": 41472832}
|
|
{"current_steps": 13180, "total_steps": 15621, "loss": 0.3411, "lr": 1.4524757639963258e-07, "epoch": 0.8437359964150822, "percentage": 84.37, "elapsed_time": "0:49:20", "remaining_time": "0:09:08", "throughput": 14015.29, "total_tokens": 41490368}
|
|
{"current_steps": 13185, "total_steps": 15621, "loss": 0.4458, "lr": 1.4466815582307845e-07, "epoch": 0.8440560783560591, "percentage": 84.41, "elapsed_time": "0:49:20", "remaining_time": "0:09:07", "throughput": 14017.81, "total_tokens": 41506624}
|
|
{"current_steps": 13190, "total_steps": 15621, "loss": 0.2433, "lr": 1.440898031383746e-07, "epoch": 0.844376160297036, "percentage": 84.44, "elapsed_time": "0:49:21", "remaining_time": "0:09:05", "throughput": 14020.41, "total_tokens": 41523264}
|
|
{"current_steps": 13195, "total_steps": 15621, "loss": 0.3678, "lr": 1.4351251906760064e-07, "epoch": 0.844696242238013, "percentage": 84.47, "elapsed_time": "0:49:22", "remaining_time": "0:09:04", "throughput": 14022.79, "total_tokens": 41538944}
|
|
{"current_steps": 13200, "total_steps": 15621, "loss": 0.3919, "lr": 1.4293630433150317e-07, "epoch": 0.8450163241789899, "percentage": 84.5, "elapsed_time": "0:49:22", "remaining_time": "0:09:03", "throughput": 14025.25, "total_tokens": 41554880}
|
|
{"current_steps": 13205, "total_steps": 15621, "loss": 0.4473, "lr": 1.423611596494927e-07, "epoch": 0.8453364061199667, "percentage": 84.53, "elapsed_time": "0:49:23", "remaining_time": "0:09:02", "throughput": 14027.41, "total_tokens": 41569280}
|
|
{"current_steps": 13210, "total_steps": 15621, "loss": 0.3541, "lr": 1.4178708573964438e-07, "epoch": 0.8456564880609436, "percentage": 84.57, "elapsed_time": "0:49:24", "remaining_time": "0:09:00", "throughput": 14029.73, "total_tokens": 41584576}
|
|
{"current_steps": 13215, "total_steps": 15621, "loss": 0.3483, "lr": 1.4121408331869566e-07, "epoch": 0.8459765700019205, "percentage": 84.6, "elapsed_time": "0:49:24", "remaining_time": "0:08:59", "throughput": 14032.07, "total_tokens": 41600000}
|
|
{"current_steps": 13220, "total_steps": 15621, "loss": 0.3539, "lr": 1.406421531020474e-07, "epoch": 0.8462966519428974, "percentage": 84.63, "elapsed_time": "0:49:25", "remaining_time": "0:08:58", "throughput": 14034.33, "total_tokens": 41615040}
|
|
{"current_steps": 13225, "total_steps": 15621, "loss": 0.3418, "lr": 1.4007129580376097e-07, "epoch": 0.8466167338838743, "percentage": 84.66, "elapsed_time": "0:49:25", "remaining_time": "0:08:57", "throughput": 14036.64, "total_tokens": 41630208}
|
|
{"current_steps": 13230, "total_steps": 15621, "loss": 0.354, "lr": 1.3950151213655847e-07, "epoch": 0.8469368158248511, "percentage": 84.69, "elapsed_time": "0:49:26", "remaining_time": "0:08:56", "throughput": 14038.96, "total_tokens": 41645440}
|
|
{"current_steps": 13235, "total_steps": 15621, "loss": 0.3286, "lr": 1.389328028118214e-07, "epoch": 0.847256897765828, "percentage": 84.73, "elapsed_time": "0:49:27", "remaining_time": "0:08:54", "throughput": 14041.37, "total_tokens": 41661184}
|
|
{"current_steps": 13240, "total_steps": 15621, "loss": 0.3546, "lr": 1.3836516853959e-07, "epoch": 0.8475769797068049, "percentage": 84.76, "elapsed_time": "0:49:27", "remaining_time": "0:08:53", "throughput": 14043.65, "total_tokens": 41676224}
|
|
{"current_steps": 13245, "total_steps": 15621, "loss": 0.3031, "lr": 1.3779861002856242e-07, "epoch": 0.8478970616477818, "percentage": 84.79, "elapsed_time": "0:49:28", "remaining_time": "0:08:52", "throughput": 14045.85, "total_tokens": 41690816}
|
|
{"current_steps": 13250, "total_steps": 15621, "loss": 0.3261, "lr": 1.3723312798609366e-07, "epoch": 0.8482171435887588, "percentage": 84.82, "elapsed_time": "0:49:28", "remaining_time": "0:08:51", "throughput": 14048.28, "total_tokens": 41706688}
|
|
{"current_steps": 13255, "total_steps": 15621, "loss": 0.3518, "lr": 1.3666872311819455e-07, "epoch": 0.8485372255297357, "percentage": 84.85, "elapsed_time": "0:49:29", "remaining_time": "0:08:50", "throughput": 14050.61, "total_tokens": 41721920}
|
|
{"current_steps": 13260, "total_steps": 15621, "loss": 0.2742, "lr": 1.361053961295312e-07, "epoch": 0.8488573074707125, "percentage": 84.89, "elapsed_time": "0:49:30", "remaining_time": "0:08:48", "throughput": 14053.12, "total_tokens": 41738112}
|
|
{"current_steps": 13265, "total_steps": 15621, "loss": 0.3463, "lr": 1.3554314772342412e-07, "epoch": 0.8491773894116894, "percentage": 84.92, "elapsed_time": "0:49:30", "remaining_time": "0:08:47", "throughput": 14055.5, "total_tokens": 41753792}
|
|
{"current_steps": 13270, "total_steps": 15621, "loss": 0.3268, "lr": 1.349819786018469e-07, "epoch": 0.8494974713526663, "percentage": 84.95, "elapsed_time": "0:49:31", "remaining_time": "0:08:46", "throughput": 14058.26, "total_tokens": 41771328}
|
|
{"current_steps": 13275, "total_steps": 15621, "loss": 0.375, "lr": 1.3442188946542566e-07, "epoch": 0.8498175532936432, "percentage": 84.98, "elapsed_time": "0:49:31", "remaining_time": "0:08:45", "throughput": 14060.81, "total_tokens": 41787712}
|
|
{"current_steps": 13280, "total_steps": 15621, "loss": 0.2995, "lr": 1.338628810134388e-07, "epoch": 0.85013763523462, "percentage": 85.01, "elapsed_time": "0:49:32", "remaining_time": "0:08:43", "throughput": 14063.1, "total_tokens": 41803072}
|
|
{"current_steps": 13285, "total_steps": 15621, "loss": 0.3636, "lr": 1.3330495394381435e-07, "epoch": 0.8504577171755969, "percentage": 85.05, "elapsed_time": "0:49:33", "remaining_time": "0:08:42", "throughput": 14065.5, "total_tokens": 41818688}
|
|
{"current_steps": 13290, "total_steps": 15621, "loss": 0.272, "lr": 1.3274810895313083e-07, "epoch": 0.8507777991165738, "percentage": 85.08, "elapsed_time": "0:49:33", "remaining_time": "0:08:41", "throughput": 14067.79, "total_tokens": 41833792}
|
|
{"current_steps": 13294, "total_steps": 15621, "eval_loss": 0.3570670485496521, "epoch": 0.8510338646693554, "percentage": 85.1, "elapsed_time": "0:50:23", "remaining_time": "0:08:49", "throughput": 13841.27, "total_tokens": 41847872}
|
|
{"current_steps": 13295, "total_steps": 15621, "loss": 0.3708, "lr": 1.321923467366164e-07, "epoch": 0.8510978810575507, "percentage": 85.11, "elapsed_time": "0:51:05", "remaining_time": "0:08:56", "throughput": 13653.41, "total_tokens": 41850880}
|
|
{"current_steps": 13300, "total_steps": 15621, "loss": 0.1815, "lr": 1.3163766798814603e-07, "epoch": 0.8514179629985277, "percentage": 85.14, "elapsed_time": "0:51:05", "remaining_time": "0:08:55", "throughput": 13655.78, "total_tokens": 41866560}
|
|
{"current_steps": 13305, "total_steps": 15621, "loss": 0.2872, "lr": 1.3108407340024264e-07, "epoch": 0.8517380449395046, "percentage": 85.17, "elapsed_time": "0:51:06", "remaining_time": "0:08:53", "throughput": 13658.17, "total_tokens": 41882240}
|
|
{"current_steps": 13310, "total_steps": 15621, "loss": 0.332, "lr": 1.3053156366407613e-07, "epoch": 0.8520581268804814, "percentage": 85.21, "elapsed_time": "0:51:07", "remaining_time": "0:08:52", "throughput": 13660.77, "total_tokens": 41898880}
|
|
{"current_steps": 13315, "total_steps": 15621, "loss": 0.2398, "lr": 1.2998013946946119e-07, "epoch": 0.8523782088214583, "percentage": 85.24, "elapsed_time": "0:51:07", "remaining_time": "0:08:51", "throughput": 13663.44, "total_tokens": 41915968}
|
|
{"current_steps": 13320, "total_steps": 15621, "loss": 0.3556, "lr": 1.2942980150485706e-07, "epoch": 0.8526982907624352, "percentage": 85.27, "elapsed_time": "0:51:08", "remaining_time": "0:08:50", "throughput": 13665.71, "total_tokens": 41930816}
|
|
{"current_steps": 13325, "total_steps": 15621, "loss": 0.3098, "lr": 1.2888055045736723e-07, "epoch": 0.8530183727034121, "percentage": 85.3, "elapsed_time": "0:51:08", "remaining_time": "0:08:48", "throughput": 13668.24, "total_tokens": 41947200}
|
|
{"current_steps": 13330, "total_steps": 15621, "loss": 0.3021, "lr": 1.283323870127384e-07, "epoch": 0.853338454644389, "percentage": 85.33, "elapsed_time": "0:51:09", "remaining_time": "0:08:47", "throughput": 13670.49, "total_tokens": 41962240}
|
|
{"current_steps": 13335, "total_steps": 15621, "loss": 0.3063, "lr": 1.2778531185535911e-07, "epoch": 0.8536585365853658, "percentage": 85.37, "elapsed_time": "0:51:10", "remaining_time": "0:08:46", "throughput": 13673.03, "total_tokens": 41978752}
|
|
{"current_steps": 13340, "total_steps": 15621, "loss": 0.324, "lr": 1.2723932566825844e-07, "epoch": 0.8539786185263427, "percentage": 85.4, "elapsed_time": "0:51:10", "remaining_time": "0:08:45", "throughput": 13675.36, "total_tokens": 41994112}
|
|
{"current_steps": 13345, "total_steps": 15621, "loss": 0.2986, "lr": 1.2669442913310723e-07, "epoch": 0.8542987004673196, "percentage": 85.43, "elapsed_time": "0:51:11", "remaining_time": "0:08:43", "throughput": 13677.84, "total_tokens": 42010432}
|
|
{"current_steps": 13350, "total_steps": 15621, "loss": 0.2722, "lr": 1.2615062293021506e-07, "epoch": 0.8546187824082965, "percentage": 85.46, "elapsed_time": "0:51:12", "remaining_time": "0:08:42", "throughput": 13680.19, "total_tokens": 42025984}
|
|
{"current_steps": 13355, "total_steps": 15621, "loss": 0.3185, "lr": 1.2560790773853025e-07, "epoch": 0.8549388643492735, "percentage": 85.49, "elapsed_time": "0:51:12", "remaining_time": "0:08:41", "throughput": 13682.42, "total_tokens": 42040832}
|
|
{"current_steps": 13360, "total_steps": 15621, "loss": 0.4035, "lr": 1.2506628423563915e-07, "epoch": 0.8552589462902503, "percentage": 85.53, "elapsed_time": "0:51:13", "remaining_time": "0:08:40", "throughput": 13685.02, "total_tokens": 42057536}
|
|
{"current_steps": 13365, "total_steps": 15621, "loss": 0.2863, "lr": 1.2452575309776493e-07, "epoch": 0.8555790282312272, "percentage": 85.56, "elapsed_time": "0:51:13", "remaining_time": "0:08:38", "throughput": 13687.4, "total_tokens": 42073152}
|
|
{"current_steps": 13370, "total_steps": 15621, "loss": 0.304, "lr": 1.2398631499976732e-07, "epoch": 0.8558991101722041, "percentage": 85.59, "elapsed_time": "0:51:14", "remaining_time": "0:08:37", "throughput": 13689.72, "total_tokens": 42088512}
|
|
{"current_steps": 13375, "total_steps": 15621, "loss": 0.4208, "lr": 1.234479706151409e-07, "epoch": 0.856219192113181, "percentage": 85.62, "elapsed_time": "0:51:15", "remaining_time": "0:08:36", "throughput": 13691.97, "total_tokens": 42103552}
|
|
{"current_steps": 13380, "total_steps": 15621, "loss": 0.3608, "lr": 1.2291072061601503e-07, "epoch": 0.8565392740541579, "percentage": 85.65, "elapsed_time": "0:51:15", "remaining_time": "0:08:35", "throughput": 13694.5, "total_tokens": 42119872}
|
|
{"current_steps": 13385, "total_steps": 15621, "loss": 0.4351, "lr": 1.2237456567315264e-07, "epoch": 0.8568593559951347, "percentage": 85.69, "elapsed_time": "0:51:16", "remaining_time": "0:08:33", "throughput": 13697.15, "total_tokens": 42136832}
|
|
{"current_steps": 13390, "total_steps": 15621, "loss": 0.2975, "lr": 1.2183950645594944e-07, "epoch": 0.8571794379361116, "percentage": 85.72, "elapsed_time": "0:51:16", "remaining_time": "0:08:32", "throughput": 13699.59, "total_tokens": 42152896}
|
|
{"current_steps": 13395, "total_steps": 15621, "loss": 0.3421, "lr": 1.2130554363243318e-07, "epoch": 0.8574995198770885, "percentage": 85.75, "elapsed_time": "0:51:17", "remaining_time": "0:08:31", "throughput": 13701.84, "total_tokens": 42168064}
|
|
{"current_steps": 13400, "total_steps": 15621, "loss": 0.3703, "lr": 1.207726778692625e-07, "epoch": 0.8578196018180654, "percentage": 85.78, "elapsed_time": "0:51:18", "remaining_time": "0:08:30", "throughput": 13704.03, "total_tokens": 42182784}
|
|
{"current_steps": 13405, "total_steps": 15621, "loss": 0.3271, "lr": 1.2024090983172718e-07, "epoch": 0.8581396837590423, "percentage": 85.81, "elapsed_time": "0:51:18", "remaining_time": "0:08:28", "throughput": 13706.62, "total_tokens": 42199744}
|
|
{"current_steps": 13410, "total_steps": 15621, "loss": 0.3625, "lr": 1.1971024018374532e-07, "epoch": 0.8584597657000193, "percentage": 85.85, "elapsed_time": "0:51:19", "remaining_time": "0:08:27", "throughput": 13708.92, "total_tokens": 42215040}
|
|
{"current_steps": 13415, "total_steps": 15621, "loss": 0.3091, "lr": 1.1918066958786432e-07, "epoch": 0.8587798476409961, "percentage": 85.88, "elapsed_time": "0:51:19", "remaining_time": "0:08:26", "throughput": 13711.17, "total_tokens": 42230144}
|
|
{"current_steps": 13420, "total_steps": 15621, "loss": 0.3553, "lr": 1.1865219870525922e-07, "epoch": 0.859099929581973, "percentage": 85.91, "elapsed_time": "0:51:20", "remaining_time": "0:08:25", "throughput": 13713.64, "total_tokens": 42246528}
|
|
{"current_steps": 13425, "total_steps": 15621, "loss": 0.4317, "lr": 1.1812482819573222e-07, "epoch": 0.8594200115229499, "percentage": 85.94, "elapsed_time": "0:51:21", "remaining_time": "0:08:24", "throughput": 13716.18, "total_tokens": 42263168}
|
|
{"current_steps": 13430, "total_steps": 15621, "loss": 0.3905, "lr": 1.1759855871771163e-07, "epoch": 0.8597400934639268, "percentage": 85.97, "elapsed_time": "0:51:21", "remaining_time": "0:08:22", "throughput": 13718.56, "total_tokens": 42278912}
|
|
{"current_steps": 13435, "total_steps": 15621, "loss": 0.3824, "lr": 1.1707339092825075e-07, "epoch": 0.8600601754049036, "percentage": 86.01, "elapsed_time": "0:51:22", "remaining_time": "0:08:21", "throughput": 13720.97, "total_tokens": 42294656}
|
|
{"current_steps": 13440, "total_steps": 15621, "loss": 0.3909, "lr": 1.1654932548302842e-07, "epoch": 0.8603802573458805, "percentage": 86.04, "elapsed_time": "0:51:23", "remaining_time": "0:08:20", "throughput": 13723.55, "total_tokens": 42311552}
|
|
{"current_steps": 13445, "total_steps": 15621, "loss": 0.3635, "lr": 1.1602636303634595e-07, "epoch": 0.8607003392868574, "percentage": 86.07, "elapsed_time": "0:51:23", "remaining_time": "0:08:19", "throughput": 13725.98, "total_tokens": 42327552}
|
|
{"current_steps": 13450, "total_steps": 15621, "loss": 0.3583, "lr": 1.1550450424112801e-07, "epoch": 0.8610204212278343, "percentage": 86.1, "elapsed_time": "0:51:24", "remaining_time": "0:08:17", "throughput": 13728.37, "total_tokens": 42343360}
|
|
{"current_steps": 13455, "total_steps": 15621, "loss": 0.3341, "lr": 1.1498374974892178e-07, "epoch": 0.8613405031688112, "percentage": 86.13, "elapsed_time": "0:51:25", "remaining_time": "0:08:16", "throughput": 13730.94, "total_tokens": 42360064}
|
|
{"current_steps": 13460, "total_steps": 15621, "loss": 0.4371, "lr": 1.144641002098955e-07, "epoch": 0.8616605851097882, "percentage": 86.17, "elapsed_time": "0:51:25", "remaining_time": "0:08:15", "throughput": 13733.12, "total_tokens": 42374976}
|
|
{"current_steps": 13465, "total_steps": 15621, "loss": 0.3524, "lr": 1.1394555627283697e-07, "epoch": 0.861980667050765, "percentage": 86.2, "elapsed_time": "0:51:26", "remaining_time": "0:08:14", "throughput": 13735.68, "total_tokens": 42391616}
|
|
{"current_steps": 13470, "total_steps": 15621, "loss": 0.3095, "lr": 1.134281185851551e-07, "epoch": 0.8623007489917419, "percentage": 86.23, "elapsed_time": "0:51:26", "remaining_time": "0:08:12", "throughput": 13737.88, "total_tokens": 42406528}
|
|
{"current_steps": 13475, "total_steps": 15621, "loss": 0.288, "lr": 1.1291178779287691e-07, "epoch": 0.8626208309327188, "percentage": 86.26, "elapsed_time": "0:51:27", "remaining_time": "0:08:11", "throughput": 13740.68, "total_tokens": 42424320}
|
|
{"current_steps": 13480, "total_steps": 15621, "loss": 0.3654, "lr": 1.1239656454064683e-07, "epoch": 0.8629409128736957, "percentage": 86.29, "elapsed_time": "0:51:28", "remaining_time": "0:08:10", "throughput": 13743.23, "total_tokens": 42440960}
|
|
{"current_steps": 13485, "total_steps": 15621, "loss": 0.2474, "lr": 1.1188244947172776e-07, "epoch": 0.8632609948146726, "percentage": 86.33, "elapsed_time": "0:51:28", "remaining_time": "0:08:09", "throughput": 13745.55, "total_tokens": 42456448}
|
|
{"current_steps": 13490, "total_steps": 15621, "loss": 0.3165, "lr": 1.1136944322799812e-07, "epoch": 0.8635810767556494, "percentage": 86.36, "elapsed_time": "0:51:29", "remaining_time": "0:08:08", "throughput": 13747.95, "total_tokens": 42472448}
|
|
{"current_steps": 13495, "total_steps": 15621, "loss": 0.3147, "lr": 1.1085754644995227e-07, "epoch": 0.8639011586966263, "percentage": 86.39, "elapsed_time": "0:51:29", "remaining_time": "0:08:06", "throughput": 13750.23, "total_tokens": 42487808}
|
|
{"current_steps": 13500, "total_steps": 15621, "loss": 0.3516, "lr": 1.1034675977669938e-07, "epoch": 0.8642212406376032, "percentage": 86.42, "elapsed_time": "0:51:30", "remaining_time": "0:08:05", "throughput": 13752.64, "total_tokens": 42503744}
|
|
{"current_steps": 13505, "total_steps": 15621, "loss": 0.5636, "lr": 1.0983708384596258e-07, "epoch": 0.8645413225785801, "percentage": 86.45, "elapsed_time": "0:51:31", "remaining_time": "0:08:04", "throughput": 13755.27, "total_tokens": 42520768}
|
|
{"current_steps": 13510, "total_steps": 15621, "loss": 0.3664, "lr": 1.0932851929407827e-07, "epoch": 0.864861404519557, "percentage": 86.49, "elapsed_time": "0:51:31", "remaining_time": "0:08:03", "throughput": 13757.78, "total_tokens": 42537408}
|
|
{"current_steps": 13515, "total_steps": 15621, "loss": 0.36, "lr": 1.0882106675599534e-07, "epoch": 0.8651814864605339, "percentage": 86.52, "elapsed_time": "0:51:32", "remaining_time": "0:08:01", "throughput": 13760.24, "total_tokens": 42553728}
|
|
{"current_steps": 13520, "total_steps": 15621, "loss": 0.3304, "lr": 1.0831472686527409e-07, "epoch": 0.8655015684015108, "percentage": 86.55, "elapsed_time": "0:51:33", "remaining_time": "0:08:00", "throughput": 13762.47, "total_tokens": 42568896}
|
|
{"current_steps": 13525, "total_steps": 15621, "loss": 0.2939, "lr": 1.0780950025408586e-07, "epoch": 0.8658216503424877, "percentage": 86.58, "elapsed_time": "0:51:33", "remaining_time": "0:07:59", "throughput": 13764.68, "total_tokens": 42584000}
|
|
{"current_steps": 13530, "total_steps": 15621, "loss": 0.3824, "lr": 1.0730538755321217e-07, "epoch": 0.8661417322834646, "percentage": 86.61, "elapsed_time": "0:51:34", "remaining_time": "0:07:58", "throughput": 13767.08, "total_tokens": 42600192}
|
|
{"current_steps": 13535, "total_steps": 15621, "loss": 0.304, "lr": 1.0680238939204334e-07, "epoch": 0.8664618142244415, "percentage": 86.65, "elapsed_time": "0:51:34", "remaining_time": "0:07:56", "throughput": 13769.14, "total_tokens": 42614656}
|
|
{"current_steps": 13540, "total_steps": 15621, "loss": 0.3989, "lr": 1.0630050639857879e-07, "epoch": 0.8667818961654183, "percentage": 86.68, "elapsed_time": "0:51:35", "remaining_time": "0:07:55", "throughput": 13771.31, "total_tokens": 42629504}
|
|
{"current_steps": 13545, "total_steps": 15621, "loss": 0.3036, "lr": 1.0579973919942508e-07, "epoch": 0.8671019781063952, "percentage": 86.71, "elapsed_time": "0:51:36", "remaining_time": "0:07:54", "throughput": 13773.46, "total_tokens": 42644224}
|
|
{"current_steps": 13550, "total_steps": 15621, "loss": 0.2417, "lr": 1.0530008841979621e-07, "epoch": 0.8674220600473721, "percentage": 86.74, "elapsed_time": "0:51:36", "remaining_time": "0:07:53", "throughput": 13775.73, "total_tokens": 42659584}
|
|
{"current_steps": 13555, "total_steps": 15621, "loss": 0.2756, "lr": 1.048015546835117e-07, "epoch": 0.867742141988349, "percentage": 86.77, "elapsed_time": "0:51:37", "remaining_time": "0:07:52", "throughput": 13778.19, "total_tokens": 42675776}
|
|
{"current_steps": 13560, "total_steps": 15621, "loss": 0.3976, "lr": 1.0430413861299691e-07, "epoch": 0.8680622239293259, "percentage": 86.81, "elapsed_time": "0:51:38", "remaining_time": "0:07:50", "throughput": 13780.87, "total_tokens": 42693184}
|
|
{"current_steps": 13565, "total_steps": 15621, "loss": 0.4533, "lr": 1.0380784082928196e-07, "epoch": 0.8683823058703029, "percentage": 86.84, "elapsed_time": "0:51:38", "remaining_time": "0:07:49", "throughput": 13783.59, "total_tokens": 42710784}
|
|
{"current_steps": 13570, "total_steps": 15621, "loss": 0.3903, "lr": 1.0331266195200006e-07, "epoch": 0.8687023878112797, "percentage": 86.87, "elapsed_time": "0:51:39", "remaining_time": "0:07:48", "throughput": 13786.06, "total_tokens": 42727040}
|
|
{"current_steps": 13575, "total_steps": 15621, "loss": 0.3126, "lr": 1.0281860259938779e-07, "epoch": 0.8690224697522566, "percentage": 86.9, "elapsed_time": "0:51:39", "remaining_time": "0:07:47", "throughput": 13788.31, "total_tokens": 42742208}
|
|
{"current_steps": 13580, "total_steps": 15621, "loss": 0.3673, "lr": 1.0232566338828452e-07, "epoch": 0.8693425516932335, "percentage": 86.93, "elapsed_time": "0:51:40", "remaining_time": "0:07:45", "throughput": 13790.76, "total_tokens": 42758464}
|
|
{"current_steps": 13585, "total_steps": 15621, "loss": 0.4102, "lr": 1.018338449341305e-07, "epoch": 0.8696626336342104, "percentage": 86.97, "elapsed_time": "0:51:41", "remaining_time": "0:07:44", "throughput": 13793.07, "total_tokens": 42774016}
|
|
{"current_steps": 13590, "total_steps": 15621, "loss": 0.3942, "lr": 1.0134314785096632e-07, "epoch": 0.8699827155751872, "percentage": 87.0, "elapsed_time": "0:51:41", "remaining_time": "0:07:43", "throughput": 13795.33, "total_tokens": 42789248}
|
|
{"current_steps": 13595, "total_steps": 15621, "loss": 0.342, "lr": 1.0085357275143359e-07, "epoch": 0.8703027975161641, "percentage": 87.03, "elapsed_time": "0:51:42", "remaining_time": "0:07:42", "throughput": 13797.61, "total_tokens": 42804608}
|
|
{"current_steps": 13600, "total_steps": 15621, "loss": 0.4964, "lr": 1.0036512024677268e-07, "epoch": 0.870622879457141, "percentage": 87.06, "elapsed_time": "0:51:42", "remaining_time": "0:07:41", "throughput": 13799.8, "total_tokens": 42819584}
|
|
{"current_steps": 13605, "total_steps": 15621, "loss": 0.2733, "lr": 9.98777909468217e-08, "epoch": 0.8709429613981179, "percentage": 87.09, "elapsed_time": "0:51:43", "remaining_time": "0:07:39", "throughput": 13802.1, "total_tokens": 42835200}
|
|
{"current_steps": 13610, "total_steps": 15621, "loss": 0.406, "lr": 9.939158546001736e-08, "epoch": 0.8712630433390948, "percentage": 87.13, "elapsed_time": "0:51:44", "remaining_time": "0:07:38", "throughput": 13804.77, "total_tokens": 42852672}
|
|
{"current_steps": 13615, "total_steps": 15621, "loss": 0.3322, "lr": 9.890650439339299e-08, "epoch": 0.8715831252800716, "percentage": 87.16, "elapsed_time": "0:51:44", "remaining_time": "0:07:37", "throughput": 13807.17, "total_tokens": 42868672}
|
|
{"current_steps": 13620, "total_steps": 15621, "loss": 0.416, "lr": 9.842254835257791e-08, "epoch": 0.8719032072210486, "percentage": 87.19, "elapsed_time": "0:51:45", "remaining_time": "0:07:36", "throughput": 13809.45, "total_tokens": 42884096}
|
|
{"current_steps": 13625, "total_steps": 15621, "loss": 0.3767, "lr": 9.793971794179679e-08, "epoch": 0.8722232891620255, "percentage": 87.22, "elapsed_time": "0:51:45", "remaining_time": "0:07:35", "throughput": 13811.61, "total_tokens": 42898752}
|
|
{"current_steps": 13630, "total_steps": 15621, "loss": 0.3417, "lr": 9.745801376386931e-08, "epoch": 0.8725433711030024, "percentage": 87.25, "elapsed_time": "0:51:46", "remaining_time": "0:07:33", "throughput": 13813.99, "total_tokens": 42914688}
|
|
{"current_steps": 13635, "total_steps": 15621, "loss": 0.3211, "lr": 9.697743642020861e-08, "epoch": 0.8728634530439793, "percentage": 87.29, "elapsed_time": "0:51:47", "remaining_time": "0:07:32", "throughput": 13816.42, "total_tokens": 42930688}
|
|
{"current_steps": 13640, "total_steps": 15621, "loss": 0.3372, "lr": 9.649798651082119e-08, "epoch": 0.8731835349849562, "percentage": 87.32, "elapsed_time": "0:51:47", "remaining_time": "0:07:31", "throughput": 13818.89, "total_tokens": 42947008}
|
|
{"current_steps": 13645, "total_steps": 15621, "loss": 0.3946, "lr": 9.601966463430588e-08, "epoch": 0.873503616925933, "percentage": 87.35, "elapsed_time": "0:51:48", "remaining_time": "0:07:30", "throughput": 13821.22, "total_tokens": 42962816}
|
|
{"current_steps": 13650, "total_steps": 15621, "loss": 0.3405, "lr": 9.554247138785321e-08, "epoch": 0.8738236988669099, "percentage": 87.38, "elapsed_time": "0:51:49", "remaining_time": "0:07:28", "throughput": 13823.36, "total_tokens": 42977664}
|
|
{"current_steps": 13655, "total_steps": 15621, "loss": 0.4684, "lr": 9.506640736724447e-08, "epoch": 0.8741437808078868, "percentage": 87.41, "elapsed_time": "0:51:49", "remaining_time": "0:07:27", "throughput": 13825.7, "total_tokens": 42993472}
|
|
{"current_steps": 13660, "total_steps": 15621, "loss": 0.3895, "lr": 9.459147316685123e-08, "epoch": 0.8744638627488637, "percentage": 87.45, "elapsed_time": "0:51:50", "remaining_time": "0:07:26", "throughput": 13828.34, "total_tokens": 43010688}
|
|
{"current_steps": 13665, "total_steps": 15621, "loss": 0.3357, "lr": 9.41176693796345e-08, "epoch": 0.8747839446898406, "percentage": 87.48, "elapsed_time": "0:51:50", "remaining_time": "0:07:25", "throughput": 13830.89, "total_tokens": 43027392}
|
|
{"current_steps": 13670, "total_steps": 15621, "loss": 0.4172, "lr": 9.364499659714364e-08, "epoch": 0.8751040266308175, "percentage": 87.51, "elapsed_time": "0:51:51", "remaining_time": "0:07:24", "throughput": 13833.21, "total_tokens": 43043008}
|
|
{"current_steps": 13675, "total_steps": 15621, "loss": 0.342, "lr": 9.31734554095165e-08, "epoch": 0.8754241085717944, "percentage": 87.54, "elapsed_time": "0:51:52", "remaining_time": "0:07:22", "throughput": 13835.61, "total_tokens": 43059072}
|
|
{"current_steps": 13680, "total_steps": 15621, "loss": 0.3481, "lr": 9.270304640547744e-08, "epoch": 0.8757441905127713, "percentage": 87.57, "elapsed_time": "0:51:52", "remaining_time": "0:07:21", "throughput": 13837.92, "total_tokens": 43074624}
|
|
{"current_steps": 13685, "total_steps": 15621, "loss": 0.3952, "lr": 9.223377017233768e-08, "epoch": 0.8760642724537482, "percentage": 87.61, "elapsed_time": "0:51:53", "remaining_time": "0:07:20", "throughput": 13840.11, "total_tokens": 43089536}
|
|
{"current_steps": 13690, "total_steps": 15621, "loss": 0.3535, "lr": 9.176562729599458e-08, "epoch": 0.8763843543947251, "percentage": 87.64, "elapsed_time": "0:51:53", "remaining_time": "0:07:19", "throughput": 13842.3, "total_tokens": 43104512}
|
|
{"current_steps": 13695, "total_steps": 15621, "loss": 0.3463, "lr": 9.129861836092944e-08, "epoch": 0.8767044363357019, "percentage": 87.67, "elapsed_time": "0:51:54", "remaining_time": "0:07:18", "throughput": 13844.71, "total_tokens": 43120640}
|
|
{"current_steps": 13700, "total_steps": 15621, "loss": 0.4422, "lr": 9.083274395020845e-08, "epoch": 0.8770245182766788, "percentage": 87.7, "elapsed_time": "0:51:55", "remaining_time": "0:07:16", "throughput": 13847.04, "total_tokens": 43136384}
|
|
{"current_steps": 13705, "total_steps": 15621, "loss": 0.4045, "lr": 9.036800464548156e-08, "epoch": 0.8773446002176557, "percentage": 87.73, "elapsed_time": "0:51:55", "remaining_time": "0:07:15", "throughput": 13849.61, "total_tokens": 43153216}
|
|
{"current_steps": 13710, "total_steps": 15621, "loss": 0.3473, "lr": 8.990440102698138e-08, "epoch": 0.8776646821586326, "percentage": 87.77, "elapsed_time": "0:51:56", "remaining_time": "0:07:14", "throughput": 13851.73, "total_tokens": 43167936}
|
|
{"current_steps": 13715, "total_steps": 15621, "loss": 0.2767, "lr": 8.944193367352182e-08, "epoch": 0.8779847640996095, "percentage": 87.8, "elapsed_time": "0:51:57", "remaining_time": "0:07:13", "throughput": 13854.07, "total_tokens": 43183872}
|
|
{"current_steps": 13720, "total_steps": 15621, "loss": 0.4057, "lr": 8.898060316249944e-08, "epoch": 0.8783048460405863, "percentage": 87.83, "elapsed_time": "0:51:57", "remaining_time": "0:07:11", "throughput": 13856.49, "total_tokens": 43200256}
|
|
{"current_steps": 13725, "total_steps": 15621, "loss": 0.3563, "lr": 8.852041006989064e-08, "epoch": 0.8786249279815633, "percentage": 87.86, "elapsed_time": "0:51:58", "remaining_time": "0:07:10", "throughput": 13859.12, "total_tokens": 43217600}
|
|
{"current_steps": 13730, "total_steps": 15621, "loss": 0.3785, "lr": 8.80613549702518e-08, "epoch": 0.8789450099225402, "percentage": 87.89, "elapsed_time": "0:51:58", "remaining_time": "0:07:09", "throughput": 13861.47, "total_tokens": 43233344}
|
|
{"current_steps": 13735, "total_steps": 15621, "loss": 0.5423, "lr": 8.760343843671824e-08, "epoch": 0.8792650918635171, "percentage": 87.93, "elapsed_time": "0:51:59", "remaining_time": "0:07:08", "throughput": 13863.8, "total_tokens": 43249280}
|
|
{"current_steps": 13740, "total_steps": 15621, "loss": 0.4461, "lr": 8.714666104100487e-08, "epoch": 0.879585173804494, "percentage": 87.96, "elapsed_time": "0:52:00", "remaining_time": "0:07:07", "throughput": 13866.14, "total_tokens": 43265024}
|
|
{"current_steps": 13745, "total_steps": 15621, "loss": 0.3544, "lr": 8.66910233534034e-08, "epoch": 0.8799052557454708, "percentage": 87.99, "elapsed_time": "0:52:00", "remaining_time": "0:07:05", "throughput": 13868.44, "total_tokens": 43280576}
|
|
{"current_steps": 13750, "total_steps": 15621, "loss": 0.3156, "lr": 8.62365259427823e-08, "epoch": 0.8802253376864477, "percentage": 88.02, "elapsed_time": "0:52:01", "remaining_time": "0:07:04", "throughput": 13870.7, "total_tokens": 43296064}
|
|
{"current_steps": 13755, "total_steps": 15621, "loss": 0.2899, "lr": 8.578316937658758e-08, "epoch": 0.8805454196274246, "percentage": 88.05, "elapsed_time": "0:52:02", "remaining_time": "0:07:03", "throughput": 13872.98, "total_tokens": 43311552}
|
|
{"current_steps": 13760, "total_steps": 15621, "loss": 0.3116, "lr": 8.533095422083992e-08, "epoch": 0.8808655015684015, "percentage": 88.09, "elapsed_time": "0:52:02", "remaining_time": "0:07:02", "throughput": 13875.12, "total_tokens": 43326272}
|
|
{"current_steps": 13765, "total_steps": 15621, "loss": 0.2906, "lr": 8.487988104013533e-08, "epoch": 0.8811855835093784, "percentage": 88.12, "elapsed_time": "0:52:03", "remaining_time": "0:07:01", "throughput": 13877.55, "total_tokens": 43342592}
|
|
{"current_steps": 13770, "total_steps": 15621, "loss": 0.3188, "lr": 8.4429950397644e-08, "epoch": 0.8815056654503552, "percentage": 88.15, "elapsed_time": "0:52:03", "remaining_time": "0:06:59", "throughput": 13879.77, "total_tokens": 43357888}
|
|
{"current_steps": 13775, "total_steps": 15621, "loss": 0.2679, "lr": 8.398116285510948e-08, "epoch": 0.8818257473913321, "percentage": 88.18, "elapsed_time": "0:52:04", "remaining_time": "0:06:58", "throughput": 13882.2, "total_tokens": 43374272}
|
|
{"current_steps": 13780, "total_steps": 15621, "loss": 0.2698, "lr": 8.353351897284844e-08, "epoch": 0.8821458293323091, "percentage": 88.21, "elapsed_time": "0:52:05", "remaining_time": "0:06:57", "throughput": 13885.12, "total_tokens": 43393280}
|
|
{"current_steps": 13785, "total_steps": 15621, "loss": 0.4762, "lr": 8.308701930974949e-08, "epoch": 0.882465911273286, "percentage": 88.25, "elapsed_time": "0:52:05", "remaining_time": "0:06:56", "throughput": 13887.58, "total_tokens": 43409600}
|
|
{"current_steps": 13790, "total_steps": 15621, "loss": 0.4038, "lr": 8.264166442327269e-08, "epoch": 0.8827859932142629, "percentage": 88.28, "elapsed_time": "0:52:06", "remaining_time": "0:06:55", "throughput": 13889.73, "total_tokens": 43424384}
|
|
{"current_steps": 13795, "total_steps": 15621, "loss": 0.2533, "lr": 8.219745486944885e-08, "epoch": 0.8831060751552398, "percentage": 88.31, "elapsed_time": "0:52:06", "remaining_time": "0:06:53", "throughput": 13892.02, "total_tokens": 43440128}
|
|
{"current_steps": 13800, "total_steps": 15621, "loss": 0.4597, "lr": 8.175439120287875e-08, "epoch": 0.8834261570962166, "percentage": 88.34, "elapsed_time": "0:52:07", "remaining_time": "0:06:52", "throughput": 13894.19, "total_tokens": 43455168}
|
|
{"current_steps": 13805, "total_steps": 15621, "loss": 0.3494, "lr": 8.131247397673269e-08, "epoch": 0.8837462390371935, "percentage": 88.37, "elapsed_time": "0:52:08", "remaining_time": "0:06:51", "throughput": 13896.73, "total_tokens": 43472064}
|
|
{"current_steps": 13810, "total_steps": 15621, "loss": 0.4333, "lr": 8.087170374274921e-08, "epoch": 0.8840663209781704, "percentage": 88.41, "elapsed_time": "0:52:08", "remaining_time": "0:06:50", "throughput": 13899.07, "total_tokens": 43488000}
|
|
{"current_steps": 13815, "total_steps": 15621, "loss": 0.2981, "lr": 8.043208105123578e-08, "epoch": 0.8843864029191473, "percentage": 88.44, "elapsed_time": "0:52:09", "remaining_time": "0:06:49", "throughput": 13901.35, "total_tokens": 43503488}
|
|
{"current_steps": 13820, "total_steps": 15621, "loss": 0.335, "lr": 7.999360645106579e-08, "epoch": 0.8847064848601242, "percentage": 88.47, "elapsed_time": "0:52:10", "remaining_time": "0:06:47", "throughput": 13903.5, "total_tokens": 43518336}
|
|
{"current_steps": 13825, "total_steps": 15621, "loss": 0.2651, "lr": 7.955628048968011e-08, "epoch": 0.885026566801101, "percentage": 88.5, "elapsed_time": "0:52:10", "remaining_time": "0:06:46", "throughput": 13905.57, "total_tokens": 43532800}
|
|
{"current_steps": 13830, "total_steps": 15621, "loss": 0.2627, "lr": 7.912010371308564e-08, "epoch": 0.885346648742078, "percentage": 88.53, "elapsed_time": "0:52:11", "remaining_time": "0:06:45", "throughput": 13907.72, "total_tokens": 43547648}
|
|
{"current_steps": 13835, "total_steps": 15621, "loss": 0.2935, "lr": 7.868507666585422e-08, "epoch": 0.8856667306830549, "percentage": 88.57, "elapsed_time": "0:52:11", "remaining_time": "0:06:44", "throughput": 13909.87, "total_tokens": 43562688}
|
|
{"current_steps": 13840, "total_steps": 15621, "loss": 0.4137, "lr": 7.825119989112172e-08, "epoch": 0.8859868126240318, "percentage": 88.6, "elapsed_time": "0:52:12", "remaining_time": "0:06:43", "throughput": 13912.14, "total_tokens": 43578176}
|
|
{"current_steps": 13845, "total_steps": 15621, "loss": 0.2938, "lr": 7.78184739305886e-08, "epoch": 0.8863068945650087, "percentage": 88.63, "elapsed_time": "0:52:12", "remaining_time": "0:06:41", "throughput": 13914.46, "total_tokens": 43593920}
|
|
{"current_steps": 13850, "total_steps": 15621, "loss": 0.3491, "lr": 7.73868993245187e-08, "epoch": 0.8866269765059855, "percentage": 88.66, "elapsed_time": "0:52:13", "remaining_time": "0:06:40", "throughput": 13917.01, "total_tokens": 43610944}
|
|
{"current_steps": 13855, "total_steps": 15621, "loss": 0.3412, "lr": 7.695647661173754e-08, "epoch": 0.8869470584469624, "percentage": 88.69, "elapsed_time": "0:52:14", "remaining_time": "0:06:39", "throughput": 13919.37, "total_tokens": 43627008}
|
|
{"current_steps": 13860, "total_steps": 15621, "loss": 0.3785, "lr": 7.652720632963284e-08, "epoch": 0.8872671403879393, "percentage": 88.73, "elapsed_time": "0:52:14", "remaining_time": "0:06:38", "throughput": 13921.67, "total_tokens": 43642752}
|
|
{"current_steps": 13865, "total_steps": 15621, "loss": 0.3396, "lr": 7.609908901415396e-08, "epoch": 0.8875872223289162, "percentage": 88.76, "elapsed_time": "0:52:15", "remaining_time": "0:06:37", "throughput": 13923.95, "total_tokens": 43658496}
|
|
{"current_steps": 13870, "total_steps": 15621, "loss": 0.4018, "lr": 7.567212519981047e-08, "epoch": 0.8879073042698931, "percentage": 88.79, "elapsed_time": "0:52:16", "remaining_time": "0:06:35", "throughput": 13926.23, "total_tokens": 43674304}
|
|
{"current_steps": 13875, "total_steps": 15621, "loss": 0.3382, "lr": 7.524631541967108e-08, "epoch": 0.8882273862108699, "percentage": 88.82, "elapsed_time": "0:52:16", "remaining_time": "0:06:34", "throughput": 13928.45, "total_tokens": 43689536}
|
|
{"current_steps": 13880, "total_steps": 15621, "loss": 0.2903, "lr": 7.482166020536485e-08, "epoch": 0.8885474681518468, "percentage": 88.85, "elapsed_time": "0:52:17", "remaining_time": "0:06:33", "throughput": 13930.96, "total_tokens": 43706496}
|
|
{"current_steps": 13885, "total_steps": 15621, "loss": 0.3108, "lr": 7.439816008707877e-08, "epoch": 0.8888675500928238, "percentage": 88.89, "elapsed_time": "0:52:17", "remaining_time": "0:06:32", "throughput": 13933.08, "total_tokens": 43721408}
|
|
{"current_steps": 13890, "total_steps": 15621, "loss": 0.3216, "lr": 7.397581559355748e-08, "epoch": 0.8891876320338007, "percentage": 88.92, "elapsed_time": "0:52:18", "remaining_time": "0:06:31", "throughput": 13935.45, "total_tokens": 43737536}
|
|
{"current_steps": 13895, "total_steps": 15621, "loss": 0.4116, "lr": 7.355462725210315e-08, "epoch": 0.8895077139747776, "percentage": 88.95, "elapsed_time": "0:52:19", "remaining_time": "0:06:29", "throughput": 13937.64, "total_tokens": 43752640}
|
|
{"current_steps": 13900, "total_steps": 15621, "loss": 0.4081, "lr": 7.313459558857438e-08, "epoch": 0.8898277959157544, "percentage": 88.98, "elapsed_time": "0:52:19", "remaining_time": "0:06:28", "throughput": 13939.93, "total_tokens": 43768384}
|
|
{"current_steps": 13905, "total_steps": 15621, "loss": 0.3108, "lr": 7.271572112738566e-08, "epoch": 0.8901478778567313, "percentage": 89.01, "elapsed_time": "0:52:20", "remaining_time": "0:06:27", "throughput": 13942.25, "total_tokens": 43784320}
|
|
{"current_steps": 13910, "total_steps": 15621, "loss": 0.3582, "lr": 7.229800439150657e-08, "epoch": 0.8904679597977082, "percentage": 89.05, "elapsed_time": "0:52:20", "remaining_time": "0:06:26", "throughput": 13944.4, "total_tokens": 43799232}
|
|
{"current_steps": 13915, "total_steps": 15621, "loss": 0.3721, "lr": 7.188144590246148e-08, "epoch": 0.8907880417386851, "percentage": 89.08, "elapsed_time": "0:52:21", "remaining_time": "0:06:25", "throughput": 13946.78, "total_tokens": 43815360}
|
|
{"current_steps": 13920, "total_steps": 15621, "loss": 0.339, "lr": 7.146604618032848e-08, "epoch": 0.891108123679662, "percentage": 89.11, "elapsed_time": "0:52:22", "remaining_time": "0:06:23", "throughput": 13948.9, "total_tokens": 43830336}
|
|
{"current_steps": 13925, "total_steps": 15621, "loss": 0.4065, "lr": 7.105180574373904e-08, "epoch": 0.8914282056206388, "percentage": 89.14, "elapsed_time": "0:52:22", "remaining_time": "0:06:22", "throughput": 13951.29, "total_tokens": 43846656}
|
|
{"current_steps": 13930, "total_steps": 15621, "loss": 0.3231, "lr": 7.063872510987712e-08, "epoch": 0.8917482875616157, "percentage": 89.17, "elapsed_time": "0:52:23", "remaining_time": "0:06:21", "throughput": 13953.63, "total_tokens": 43862720}
|
|
{"current_steps": 13935, "total_steps": 15621, "loss": 0.3558, "lr": 7.022680479447874e-08, "epoch": 0.8920683695025927, "percentage": 89.21, "elapsed_time": "0:52:24", "remaining_time": "0:06:20", "throughput": 13955.6, "total_tokens": 43876800}
|
|
{"current_steps": 13940, "total_steps": 15621, "loss": 0.2952, "lr": 6.98160453118316e-08, "epoch": 0.8923884514435696, "percentage": 89.24, "elapsed_time": "0:52:24", "remaining_time": "0:06:19", "throughput": 13957.81, "total_tokens": 43892160}
|
|
{"current_steps": 13945, "total_steps": 15621, "loss": 0.333, "lr": 6.940644717477328e-08, "epoch": 0.8927085333845465, "percentage": 89.27, "elapsed_time": "0:52:25", "remaining_time": "0:06:18", "throughput": 13960.18, "total_tokens": 43908416}
|
|
{"current_steps": 13950, "total_steps": 15621, "loss": 0.4213, "lr": 6.899801089469204e-08, "epoch": 0.8930286153255234, "percentage": 89.3, "elapsed_time": "0:52:25", "remaining_time": "0:06:16", "throughput": 13962.37, "total_tokens": 43923712}
|
|
{"current_steps": 13955, "total_steps": 15621, "loss": 0.3555, "lr": 6.85907369815254e-08, "epoch": 0.8933486972665002, "percentage": 89.33, "elapsed_time": "0:52:26", "remaining_time": "0:06:15", "throughput": 13964.65, "total_tokens": 43939520}
|
|
{"current_steps": 13960, "total_steps": 15621, "loss": 0.3895, "lr": 6.81846259437595e-08, "epoch": 0.8936687792074771, "percentage": 89.37, "elapsed_time": "0:52:27", "remaining_time": "0:06:14", "throughput": 13966.81, "total_tokens": 43954688}
|
|
{"current_steps": 13965, "total_steps": 15621, "loss": 0.3146, "lr": 6.77796782884289e-08, "epoch": 0.893988861148454, "percentage": 89.4, "elapsed_time": "0:52:27", "remaining_time": "0:06:13", "throughput": 13968.94, "total_tokens": 43969600}
|
|
{"current_steps": 13970, "total_steps": 15621, "loss": 0.3824, "lr": 6.737589452111526e-08, "epoch": 0.8943089430894309, "percentage": 89.43, "elapsed_time": "0:52:28", "remaining_time": "0:06:12", "throughput": 13971.25, "total_tokens": 43985472}
|
|
{"current_steps": 13975, "total_steps": 15621, "loss": 0.3916, "lr": 6.697327514594786e-08, "epoch": 0.8946290250304078, "percentage": 89.46, "elapsed_time": "0:52:28", "remaining_time": "0:06:10", "throughput": 13973.45, "total_tokens": 44000768}
|
|
{"current_steps": 13980, "total_steps": 15621, "loss": 0.4586, "lr": 6.657182066560118e-08, "epoch": 0.8949491069713846, "percentage": 89.49, "elapsed_time": "0:52:29", "remaining_time": "0:06:09", "throughput": 13975.8, "total_tokens": 44017088}
|
|
{"current_steps": 13985, "total_steps": 15621, "loss": 0.37, "lr": 6.617153158129596e-08, "epoch": 0.8952691889123615, "percentage": 89.53, "elapsed_time": "0:52:30", "remaining_time": "0:06:08", "throughput": 13977.84, "total_tokens": 44031488}
|
|
{"current_steps": 13990, "total_steps": 15621, "loss": 0.337, "lr": 6.577240839279807e-08, "epoch": 0.8955892708533385, "percentage": 89.56, "elapsed_time": "0:52:30", "remaining_time": "0:06:07", "throughput": 13980.12, "total_tokens": 44047296}
|
|
{"current_steps": 13995, "total_steps": 15621, "loss": 0.3143, "lr": 6.537445159841748e-08, "epoch": 0.8959093527943154, "percentage": 89.59, "elapsed_time": "0:52:31", "remaining_time": "0:06:06", "throughput": 13982.51, "total_tokens": 44063744}
|
|
{"current_steps": 14000, "total_steps": 15621, "loss": 0.3936, "lr": 6.497766169500752e-08, "epoch": 0.8962294347352923, "percentage": 89.62, "elapsed_time": "0:52:31", "remaining_time": "0:06:04", "throughput": 13984.77, "total_tokens": 44079168}
|
|
{"current_steps": 14005, "total_steps": 15621, "loss": 0.2643, "lr": 6.458203917796546e-08, "epoch": 0.8965495166762691, "percentage": 89.65, "elapsed_time": "0:52:32", "remaining_time": "0:06:03", "throughput": 13986.83, "total_tokens": 44093824}
|
|
{"current_steps": 14010, "total_steps": 15621, "loss": 0.455, "lr": 6.418758454123041e-08, "epoch": 0.896869598617246, "percentage": 89.69, "elapsed_time": "0:52:33", "remaining_time": "0:06:02", "throughput": 13989.41, "total_tokens": 44111296}
|
|
{"current_steps": 14015, "total_steps": 15621, "loss": 0.3905, "lr": 6.379429827728377e-08, "epoch": 0.8971896805582229, "percentage": 89.72, "elapsed_time": "0:52:33", "remaining_time": "0:06:01", "throughput": 13991.91, "total_tokens": 44128000}
|
|
{"current_steps": 14020, "total_steps": 15621, "loss": 0.3833, "lr": 6.340218087714799e-08, "epoch": 0.8975097624991998, "percentage": 89.75, "elapsed_time": "0:52:34", "remaining_time": "0:06:00", "throughput": 13994.12, "total_tokens": 44143488}
|
|
{"current_steps": 14025, "total_steps": 15621, "loss": 0.3567, "lr": 6.301123283038634e-08, "epoch": 0.8978298444401767, "percentage": 89.78, "elapsed_time": "0:52:35", "remaining_time": "0:05:59", "throughput": 13996.37, "total_tokens": 44158976}
|
|
{"current_steps": 14030, "total_steps": 15621, "loss": 0.319, "lr": 6.262145462510193e-08, "epoch": 0.8981499263811535, "percentage": 89.81, "elapsed_time": "0:52:35", "remaining_time": "0:05:57", "throughput": 13998.87, "total_tokens": 44175808}
|
|
{"current_steps": 14035, "total_steps": 15621, "loss": 0.2817, "lr": 6.223284674793738e-08, "epoch": 0.8984700083221304, "percentage": 89.85, "elapsed_time": "0:52:36", "remaining_time": "0:05:56", "throughput": 14000.92, "total_tokens": 44190336}
|
|
{"current_steps": 14040, "total_steps": 15621, "loss": 0.3835, "lr": 6.184540968407437e-08, "epoch": 0.8987900902631074, "percentage": 89.88, "elapsed_time": "0:52:36", "remaining_time": "0:05:55", "throughput": 14003.11, "total_tokens": 44205696}
|
|
{"current_steps": 14045, "total_steps": 15621, "loss": 0.3546, "lr": 6.145914391723239e-08, "epoch": 0.8991101722040843, "percentage": 89.91, "elapsed_time": "0:52:37", "remaining_time": "0:05:54", "throughput": 14005.49, "total_tokens": 44222016}
|
|
{"current_steps": 14050, "total_steps": 15621, "loss": 0.3285, "lr": 6.107404992966902e-08, "epoch": 0.8994302541450612, "percentage": 89.94, "elapsed_time": "0:52:38", "remaining_time": "0:05:53", "throughput": 14007.92, "total_tokens": 44238592}
|
|
{"current_steps": 14055, "total_steps": 15621, "loss": 0.2517, "lr": 6.069012820217856e-08, "epoch": 0.899750336086038, "percentage": 89.98, "elapsed_time": "0:52:38", "remaining_time": "0:05:51", "throughput": 14010.12, "total_tokens": 44254016}
|
|
{"current_steps": 14060, "total_steps": 15621, "loss": 0.3757, "lr": 6.030737921409168e-08, "epoch": 0.9000704180270149, "percentage": 90.01, "elapsed_time": "0:52:39", "remaining_time": "0:05:50", "throughput": 14012.34, "total_tokens": 44269376}
|
|
{"current_steps": 14065, "total_steps": 15621, "loss": 0.4646, "lr": 5.992580344327503e-08, "epoch": 0.9003904999679918, "percentage": 90.04, "elapsed_time": "0:52:39", "remaining_time": "0:05:49", "throughput": 14014.5, "total_tokens": 44284672}
|
|
{"current_steps": 14070, "total_steps": 15621, "loss": 0.352, "lr": 5.954540136613051e-08, "epoch": 0.9007105819089687, "percentage": 90.07, "elapsed_time": "0:52:40", "remaining_time": "0:05:48", "throughput": 14016.75, "total_tokens": 44300224}
|
|
{"current_steps": 14075, "total_steps": 15621, "loss": 0.3451, "lr": 5.916617345759456e-08, "epoch": 0.9010306638499456, "percentage": 90.1, "elapsed_time": "0:52:41", "remaining_time": "0:05:47", "throughput": 14018.9, "total_tokens": 44315264}
|
|
{"current_steps": 14076, "total_steps": 15621, "eval_loss": 0.3543796241283417, "epoch": 0.901094680238141, "percentage": 90.11, "elapsed_time": "0:53:30", "remaining_time": "0:05:52", "throughput": 13804.79, "total_tokens": 44318848}
|
|
{"current_steps": 14080, "total_steps": 15621, "loss": 0.4234, "lr": 5.878812019113766e-08, "epoch": 0.9013507457909224, "percentage": 90.14, "elapsed_time": "0:54:00", "remaining_time": "0:05:54", "throughput": 13680.08, "total_tokens": 44330176}
|
|
{"current_steps": 14085, "total_steps": 15621, "loss": 0.2892, "lr": 5.84112420387638e-08, "epoch": 0.9016708277318993, "percentage": 90.17, "elapsed_time": "0:54:01", "remaining_time": "0:05:53", "throughput": 13682.21, "total_tokens": 44345152}
|
|
{"current_steps": 14090, "total_steps": 15621, "loss": 0.3656, "lr": 5.8035539471009697e-08, "epoch": 0.9019909096728762, "percentage": 90.2, "elapsed_time": "0:54:01", "remaining_time": "0:05:52", "throughput": 13684.52, "total_tokens": 44361152}
|
|
{"current_steps": 14095, "total_steps": 15621, "loss": 0.4078, "lr": 5.7661012956944253e-08, "epoch": 0.9023109916138532, "percentage": 90.23, "elapsed_time": "0:54:02", "remaining_time": "0:05:51", "throughput": 13686.67, "total_tokens": 44376128}
|
|
{"current_steps": 14100, "total_steps": 15621, "loss": 0.2842, "lr": 5.728766296416876e-08, "epoch": 0.9026310735548301, "percentage": 90.26, "elapsed_time": "0:54:02", "remaining_time": "0:05:49", "throughput": 13689.04, "total_tokens": 44392192}
|
|
{"current_steps": 14105, "total_steps": 15621, "loss": 0.4079, "lr": 5.6915489958814453e-08, "epoch": 0.902951155495807, "percentage": 90.3, "elapsed_time": "0:54:03", "remaining_time": "0:05:48", "throughput": 13691.28, "total_tokens": 44407680}
|
|
{"current_steps": 14110, "total_steps": 15621, "loss": 0.4093, "lr": 5.654449440554399e-08, "epoch": 0.9032712374367838, "percentage": 90.33, "elapsed_time": "0:54:04", "remaining_time": "0:05:47", "throughput": 13693.74, "total_tokens": 44424384}
|
|
{"current_steps": 14115, "total_steps": 15621, "loss": 0.3752, "lr": 5.617467676754972e-08, "epoch": 0.9035913193777607, "percentage": 90.36, "elapsed_time": "0:54:04", "remaining_time": "0:05:46", "throughput": 13695.91, "total_tokens": 44439744}
|
|
{"current_steps": 14120, "total_steps": 15621, "loss": 0.3012, "lr": 5.580603750655344e-08, "epoch": 0.9039114013187376, "percentage": 90.39, "elapsed_time": "0:54:05", "remaining_time": "0:05:44", "throughput": 13697.97, "total_tokens": 44454272}
|
|
{"current_steps": 14125, "total_steps": 15621, "loss": 0.3578, "lr": 5.543857708280497e-08, "epoch": 0.9042314832597145, "percentage": 90.42, "elapsed_time": "0:54:05", "remaining_time": "0:05:43", "throughput": 13700.05, "total_tokens": 44468992}
|
|
{"current_steps": 14130, "total_steps": 15621, "loss": 0.4819, "lr": 5.507229595508367e-08, "epoch": 0.9045515652006914, "percentage": 90.46, "elapsed_time": "0:54:06", "remaining_time": "0:05:42", "throughput": 13702.33, "total_tokens": 44484864}
|
|
{"current_steps": 14135, "total_steps": 15621, "loss": 0.289, "lr": 5.4707194580695504e-08, "epoch": 0.9048716471416682, "percentage": 90.49, "elapsed_time": "0:54:07", "remaining_time": "0:05:41", "throughput": 13704.48, "total_tokens": 44499968}
|
|
{"current_steps": 14140, "total_steps": 15621, "loss": 0.4239, "lr": 5.4343273415473846e-08, "epoch": 0.9051917290826451, "percentage": 90.52, "elapsed_time": "0:54:07", "remaining_time": "0:05:40", "throughput": 13707.15, "total_tokens": 44517952}
|
|
{"current_steps": 14145, "total_steps": 15621, "loss": 0.3421, "lr": 5.3980532913778576e-08, "epoch": 0.905511811023622, "percentage": 90.55, "elapsed_time": "0:54:08", "remaining_time": "0:05:38", "throughput": 13709.29, "total_tokens": 44532928}
|
|
{"current_steps": 14150, "total_steps": 15621, "loss": 0.3955, "lr": 5.361897352849554e-08, "epoch": 0.905831892964599, "percentage": 90.58, "elapsed_time": "0:54:08", "remaining_time": "0:05:37", "throughput": 13711.47, "total_tokens": 44548288}
|
|
{"current_steps": 14155, "total_steps": 15621, "loss": 0.3331, "lr": 5.325859571103586e-08, "epoch": 0.9061519749055759, "percentage": 90.62, "elapsed_time": "0:54:09", "remaining_time": "0:05:36", "throughput": 13713.67, "total_tokens": 44563712}
|
|
{"current_steps": 14160, "total_steps": 15621, "loss": 0.3333, "lr": 5.289939991133508e-08, "epoch": 0.9064720568465527, "percentage": 90.65, "elapsed_time": "0:54:10", "remaining_time": "0:05:35", "throughput": 13715.89, "total_tokens": 44579264}
|
|
{"current_steps": 14165, "total_steps": 15621, "loss": 0.2384, "lr": 5.2541386577853895e-08, "epoch": 0.9067921387875296, "percentage": 90.68, "elapsed_time": "0:54:10", "remaining_time": "0:05:34", "throughput": 13718.0, "total_tokens": 44594176}
|
|
{"current_steps": 14170, "total_steps": 15621, "loss": 0.2502, "lr": 5.2184556157576e-08, "epoch": 0.9071122207285065, "percentage": 90.71, "elapsed_time": "0:54:11", "remaining_time": "0:05:32", "throughput": 13720.22, "total_tokens": 44609664}
|
|
{"current_steps": 14175, "total_steps": 15621, "loss": 0.3649, "lr": 5.1828909096008234e-08, "epoch": 0.9074323026694834, "percentage": 90.74, "elapsed_time": "0:54:12", "remaining_time": "0:05:31", "throughput": 13722.76, "total_tokens": 44626944}
|
|
{"current_steps": 14180, "total_steps": 15621, "loss": 0.2331, "lr": 5.14744458371803e-08, "epoch": 0.9077523846104603, "percentage": 90.78, "elapsed_time": "0:54:12", "remaining_time": "0:05:30", "throughput": 13725.16, "total_tokens": 44643520}
|
|
{"current_steps": 14185, "total_steps": 15621, "loss": 0.5075, "lr": 5.1121166823643646e-08, "epoch": 0.9080724665514371, "percentage": 90.81, "elapsed_time": "0:54:13", "remaining_time": "0:05:29", "throughput": 13727.16, "total_tokens": 44657984}
|
|
{"current_steps": 14190, "total_steps": 15621, "loss": 0.376, "lr": 5.076907249647122e-08, "epoch": 0.908392548492414, "percentage": 90.84, "elapsed_time": "0:54:13", "remaining_time": "0:05:28", "throughput": 13729.29, "total_tokens": 44673024}
|
|
{"current_steps": 14195, "total_steps": 15621, "loss": 0.412, "lr": 5.0418163295257055e-08, "epoch": 0.9087126304333909, "percentage": 90.87, "elapsed_time": "0:54:14", "remaining_time": "0:05:26", "throughput": 13731.27, "total_tokens": 44687424}
|
|
{"current_steps": 14200, "total_steps": 15621, "loss": 0.2867, "lr": 5.006843965811536e-08, "epoch": 0.9090327123743679, "percentage": 90.9, "elapsed_time": "0:54:15", "remaining_time": "0:05:25", "throughput": 13733.49, "total_tokens": 44702976}
|
|
{"current_steps": 14205, "total_steps": 15621, "loss": 0.482, "lr": 4.971990202168008e-08, "epoch": 0.9093527943153448, "percentage": 90.94, "elapsed_time": "0:54:15", "remaining_time": "0:05:24", "throughput": 13735.65, "total_tokens": 44718144}
|
|
{"current_steps": 14210, "total_steps": 15621, "loss": 0.3277, "lr": 4.9372550821104697e-08, "epoch": 0.9096728762563216, "percentage": 90.97, "elapsed_time": "0:54:16", "remaining_time": "0:05:23", "throughput": 13738.11, "total_tokens": 44734912}
|
|
{"current_steps": 14215, "total_steps": 15621, "loss": 0.311, "lr": 4.902638649006119e-08, "epoch": 0.9099929581972985, "percentage": 91.0, "elapsed_time": "0:54:16", "remaining_time": "0:05:22", "throughput": 13740.24, "total_tokens": 44749888}
|
|
{"current_steps": 14220, "total_steps": 15621, "loss": 0.3201, "lr": 4.868140946073973e-08, "epoch": 0.9103130401382754, "percentage": 91.03, "elapsed_time": "0:54:17", "remaining_time": "0:05:20", "throughput": 13742.26, "total_tokens": 44764544}
|
|
{"current_steps": 14225, "total_steps": 15621, "loss": 0.2995, "lr": 4.833762016384857e-08, "epoch": 0.9106331220792523, "percentage": 91.06, "elapsed_time": "0:54:18", "remaining_time": "0:05:19", "throughput": 13744.64, "total_tokens": 44780992}
|
|
{"current_steps": 14230, "total_steps": 15621, "loss": 0.3879, "lr": 4.799501902861214e-08, "epoch": 0.9109532040202292, "percentage": 91.1, "elapsed_time": "0:54:18", "remaining_time": "0:05:18", "throughput": 13746.83, "total_tokens": 44796672}
|
|
{"current_steps": 14235, "total_steps": 15621, "loss": 0.4313, "lr": 4.765360648277217e-08, "epoch": 0.911273285961206, "percentage": 91.13, "elapsed_time": "0:54:19", "remaining_time": "0:05:17", "throughput": 13749.04, "total_tokens": 44812224}
|
|
{"current_steps": 14240, "total_steps": 15621, "loss": 0.4254, "lr": 4.7313382952586465e-08, "epoch": 0.9115933679021829, "percentage": 91.16, "elapsed_time": "0:54:19", "remaining_time": "0:05:16", "throughput": 13751.13, "total_tokens": 44827136}
|
|
{"current_steps": 14245, "total_steps": 15621, "loss": 0.3787, "lr": 4.6974348862828027e-08, "epoch": 0.9119134498431598, "percentage": 91.19, "elapsed_time": "0:54:20", "remaining_time": "0:05:14", "throughput": 13753.24, "total_tokens": 44842176}
|
|
{"current_steps": 14250, "total_steps": 15621, "loss": 0.4211, "lr": 4.663650463678448e-08, "epoch": 0.9122335317841367, "percentage": 91.22, "elapsed_time": "0:54:21", "remaining_time": "0:05:13", "throughput": 13755.63, "total_tokens": 44858880}
|
|
{"current_steps": 14255, "total_steps": 15621, "loss": 0.4399, "lr": 4.629985069625875e-08, "epoch": 0.9125536137251137, "percentage": 91.26, "elapsed_time": "0:54:21", "remaining_time": "0:05:12", "throughput": 13758.02, "total_tokens": 44875328}
|
|
{"current_steps": 14260, "total_steps": 15621, "loss": 0.3625, "lr": 4.596438746156728e-08, "epoch": 0.9128736956660906, "percentage": 91.29, "elapsed_time": "0:54:22", "remaining_time": "0:05:11", "throughput": 13760.46, "total_tokens": 44892032}
|
|
{"current_steps": 14265, "total_steps": 15621, "loss": 0.3618, "lr": 4.563011535153949e-08, "epoch": 0.9131937776070674, "percentage": 91.32, "elapsed_time": "0:54:22", "remaining_time": "0:05:10", "throughput": 13762.61, "total_tokens": 44907328}
|
|
{"current_steps": 14270, "total_steps": 15621, "loss": 0.2686, "lr": 4.52970347835181e-08, "epoch": 0.9135138595480443, "percentage": 91.35, "elapsed_time": "0:54:23", "remaining_time": "0:05:08", "throughput": 13764.74, "total_tokens": 44922560}
|
|
{"current_steps": 14275, "total_steps": 15621, "loss": 0.3256, "lr": 4.496514617335845e-08, "epoch": 0.9138339414890212, "percentage": 91.38, "elapsed_time": "0:54:24", "remaining_time": "0:05:07", "throughput": 13766.84, "total_tokens": 44937728}
|
|
{"current_steps": 14280, "total_steps": 15621, "loss": 0.3568, "lr": 4.4634449935427197e-08, "epoch": 0.9141540234299981, "percentage": 91.42, "elapsed_time": "0:54:24", "remaining_time": "0:05:06", "throughput": 13769.25, "total_tokens": 44954560}
|
|
{"current_steps": 14285, "total_steps": 15621, "loss": 0.3032, "lr": 4.430494648260219e-08, "epoch": 0.914474105370975, "percentage": 91.45, "elapsed_time": "0:54:25", "remaining_time": "0:05:05", "throughput": 13771.69, "total_tokens": 44971520}
|
|
{"current_steps": 14290, "total_steps": 15621, "loss": 0.4391, "lr": 4.397663622627279e-08, "epoch": 0.9147941873119518, "percentage": 91.48, "elapsed_time": "0:54:26", "remaining_time": "0:05:04", "throughput": 13773.92, "total_tokens": 44987392}
|
|
{"current_steps": 14295, "total_steps": 15621, "loss": 0.3116, "lr": 4.364951957633789e-08, "epoch": 0.9151142692529287, "percentage": 91.51, "elapsed_time": "0:54:26", "remaining_time": "0:05:03", "throughput": 13776.08, "total_tokens": 45002688}
|
|
{"current_steps": 14300, "total_steps": 15621, "loss": 0.2874, "lr": 4.332359694120669e-08, "epoch": 0.9154343511939056, "percentage": 91.54, "elapsed_time": "0:54:27", "remaining_time": "0:05:01", "throughput": 13778.19, "total_tokens": 45017792}
|
|
{"current_steps": 14305, "total_steps": 15621, "loss": 0.3561, "lr": 4.299886872779734e-08, "epoch": 0.9157544331348826, "percentage": 91.58, "elapsed_time": "0:54:27", "remaining_time": "0:05:00", "throughput": 13780.24, "total_tokens": 45032640}
|
|
{"current_steps": 14310, "total_steps": 15621, "loss": 0.2945, "lr": 4.267533534153678e-08, "epoch": 0.9160745150758595, "percentage": 91.61, "elapsed_time": "0:54:28", "remaining_time": "0:04:59", "throughput": 13782.46, "total_tokens": 45048256}
|
|
{"current_steps": 14315, "total_steps": 15621, "loss": 0.3251, "lr": 4.2352997186360316e-08, "epoch": 0.9163945970168363, "percentage": 91.64, "elapsed_time": "0:54:29", "remaining_time": "0:04:58", "throughput": 13784.74, "total_tokens": 45064192}
|
|
{"current_steps": 14320, "total_steps": 15621, "loss": 0.321, "lr": 4.203185466471082e-08, "epoch": 0.9167146789578132, "percentage": 91.67, "elapsed_time": "0:54:29", "remaining_time": "0:04:57", "throughput": 13786.84, "total_tokens": 45079488}
|
|
{"current_steps": 14325, "total_steps": 15621, "loss": 0.3791, "lr": 4.1711908177538556e-08, "epoch": 0.9170347608987901, "percentage": 91.7, "elapsed_time": "0:54:30", "remaining_time": "0:04:55", "throughput": 13789.11, "total_tokens": 45095616}
|
|
{"current_steps": 14330, "total_steps": 15621, "loss": 0.3797, "lr": 4.139315812430055e-08, "epoch": 0.917354842839767, "percentage": 91.74, "elapsed_time": "0:54:30", "remaining_time": "0:04:54", "throughput": 13791.21, "total_tokens": 45110592}
|
|
{"current_steps": 14335, "total_steps": 15621, "loss": 0.3756, "lr": 4.1075604902959915e-08, "epoch": 0.9176749247807439, "percentage": 91.77, "elapsed_time": "0:54:31", "remaining_time": "0:04:53", "throughput": 13793.58, "total_tokens": 45127168}
|
|
{"current_steps": 14340, "total_steps": 15621, "loss": 0.3157, "lr": 4.07592489099855e-08, "epoch": 0.9179950067217207, "percentage": 91.8, "elapsed_time": "0:54:32", "remaining_time": "0:04:52", "throughput": 13795.66, "total_tokens": 45142208}
|
|
{"current_steps": 14345, "total_steps": 15621, "loss": 0.3917, "lr": 4.044409054035147e-08, "epoch": 0.9183150886626976, "percentage": 91.83, "elapsed_time": "0:54:32", "remaining_time": "0:04:51", "throughput": 13797.74, "total_tokens": 45157184}
|
|
{"current_steps": 14350, "total_steps": 15621, "loss": 0.3891, "lr": 4.0130130187537195e-08, "epoch": 0.9186351706036745, "percentage": 91.86, "elapsed_time": "0:54:33", "remaining_time": "0:04:49", "throughput": 13800.26, "total_tokens": 45174464}
|
|
{"current_steps": 14355, "total_steps": 15621, "loss": 0.3157, "lr": 3.981736824352522e-08, "epoch": 0.9189552525446514, "percentage": 91.9, "elapsed_time": "0:54:34", "remaining_time": "0:04:48", "throughput": 13802.24, "total_tokens": 45188992}
|
|
{"current_steps": 14360, "total_steps": 15621, "loss": 0.4661, "lr": 3.950580509880286e-08, "epoch": 0.9192753344856284, "percentage": 91.93, "elapsed_time": "0:54:34", "remaining_time": "0:04:47", "throughput": 13804.3, "total_tokens": 45204032}
|
|
{"current_steps": 14365, "total_steps": 15621, "loss": 0.4012, "lr": 3.9195441142360066e-08, "epoch": 0.9195954164266052, "percentage": 91.96, "elapsed_time": "0:54:35", "remaining_time": "0:04:46", "throughput": 13806.4, "total_tokens": 45219328}
|
|
{"current_steps": 14370, "total_steps": 15621, "loss": 0.3271, "lr": 3.888627676169043e-08, "epoch": 0.9199154983675821, "percentage": 91.99, "elapsed_time": "0:54:35", "remaining_time": "0:04:45", "throughput": 13808.71, "total_tokens": 45235584}
|
|
{"current_steps": 14375, "total_steps": 15621, "loss": 0.3709, "lr": 3.857831234278886e-08, "epoch": 0.920235580308559, "percentage": 92.02, "elapsed_time": "0:54:36", "remaining_time": "0:04:43", "throughput": 13810.83, "total_tokens": 45250880}
|
|
{"current_steps": 14380, "total_steps": 15621, "loss": 0.4085, "lr": 3.827154827015255e-08, "epoch": 0.9205556622495359, "percentage": 92.06, "elapsed_time": "0:54:37", "remaining_time": "0:04:42", "throughput": 13813.07, "total_tokens": 45266752}
|
|
{"current_steps": 14385, "total_steps": 15621, "loss": 0.2914, "lr": 3.7965984926780383e-08, "epoch": 0.9208757441905128, "percentage": 92.09, "elapsed_time": "0:54:37", "remaining_time": "0:04:41", "throughput": 13815.3, "total_tokens": 45282496}
|
|
{"current_steps": 14390, "total_steps": 15621, "loss": 0.3577, "lr": 3.766162269417139e-08, "epoch": 0.9211958261314896, "percentage": 92.12, "elapsed_time": "0:54:38", "remaining_time": "0:04:40", "throughput": 13817.28, "total_tokens": 45297024}
|
|
{"current_steps": 14395, "total_steps": 15621, "loss": 0.3693, "lr": 3.73584619523255e-08, "epoch": 0.9215159080724665, "percentage": 92.15, "elapsed_time": "0:54:38", "remaining_time": "0:04:39", "throughput": 13819.74, "total_tokens": 45314176}
|
|
{"current_steps": 14400, "total_steps": 15621, "loss": 0.3557, "lr": 3.7056503079742616e-08, "epoch": 0.9218359900134434, "percentage": 92.18, "elapsed_time": "0:54:39", "remaining_time": "0:04:38", "throughput": 13821.85, "total_tokens": 45329344}
|
|
{"current_steps": 14405, "total_steps": 15621, "loss": 0.3428, "lr": 3.6755746453421945e-08, "epoch": 0.9221560719544203, "percentage": 92.22, "elapsed_time": "0:54:40", "remaining_time": "0:04:36", "throughput": 13823.96, "total_tokens": 45344384}
|
|
{"current_steps": 14410, "total_steps": 15621, "loss": 0.2869, "lr": 3.645619244886145e-08, "epoch": 0.9224761538953972, "percentage": 92.25, "elapsed_time": "0:54:40", "remaining_time": "0:04:35", "throughput": 13826.16, "total_tokens": 45360192}
|
|
{"current_steps": 14415, "total_steps": 15621, "loss": 0.3103, "lr": 3.615784144005796e-08, "epoch": 0.9227962358363742, "percentage": 92.28, "elapsed_time": "0:54:41", "remaining_time": "0:04:34", "throughput": 13828.37, "total_tokens": 45376000}
|
|
{"current_steps": 14420, "total_steps": 15621, "loss": 0.4093, "lr": 3.5860693799506184e-08, "epoch": 0.923116317777351, "percentage": 92.31, "elapsed_time": "0:54:41", "remaining_time": "0:04:33", "throughput": 13830.33, "total_tokens": 45390400}
|
|
{"current_steps": 14425, "total_steps": 15621, "loss": 0.4518, "lr": 3.5564749898198466e-08, "epoch": 0.9234363997183279, "percentage": 92.34, "elapsed_time": "0:54:42", "remaining_time": "0:04:32", "throughput": 13832.68, "total_tokens": 45406976}
|
|
{"current_steps": 14430, "total_steps": 15621, "loss": 0.3481, "lr": 3.527001010562425e-08, "epoch": 0.9237564816593048, "percentage": 92.38, "elapsed_time": "0:54:43", "remaining_time": "0:04:30", "throughput": 13834.78, "total_tokens": 45422080}
|
|
{"current_steps": 14435, "total_steps": 15621, "loss": 0.3429, "lr": 3.4976474789769504e-08, "epoch": 0.9240765636002817, "percentage": 92.41, "elapsed_time": "0:54:43", "remaining_time": "0:04:29", "throughput": 13837.28, "total_tokens": 45439296}
|
|
{"current_steps": 14440, "total_steps": 15621, "loss": 0.2983, "lr": 3.4684144317116636e-08, "epoch": 0.9243966455412586, "percentage": 92.44, "elapsed_time": "0:54:44", "remaining_time": "0:04:28", "throughput": 13839.37, "total_tokens": 45454208}
|
|
{"current_steps": 14445, "total_steps": 15621, "loss": 0.3001, "lr": 3.439301905264369e-08, "epoch": 0.9247167274822354, "percentage": 92.47, "elapsed_time": "0:54:45", "remaining_time": "0:04:27", "throughput": 13841.65, "total_tokens": 45470400}
|
|
{"current_steps": 14450, "total_steps": 15621, "loss": 0.3212, "lr": 3.410309935982403e-08, "epoch": 0.9250368094232123, "percentage": 92.5, "elapsed_time": "0:54:45", "remaining_time": "0:04:26", "throughput": 13843.91, "total_tokens": 45486528}
|
|
{"current_steps": 14455, "total_steps": 15621, "loss": 0.3429, "lr": 3.381438560062555e-08, "epoch": 0.9253568913641892, "percentage": 92.54, "elapsed_time": "0:54:46", "remaining_time": "0:04:25", "throughput": 13845.98, "total_tokens": 45501440}
|
|
{"current_steps": 14460, "total_steps": 15621, "loss": 0.3181, "lr": 3.3526878135511025e-08, "epoch": 0.9256769733051661, "percentage": 92.57, "elapsed_time": "0:54:46", "remaining_time": "0:04:23", "throughput": 13848.3, "total_tokens": 45517760}
|
|
{"current_steps": 14465, "total_steps": 15621, "loss": 0.3642, "lr": 3.324057732343666e-08, "epoch": 0.9259970552461431, "percentage": 92.6, "elapsed_time": "0:54:47", "remaining_time": "0:04:22", "throughput": 13850.41, "total_tokens": 45533056}
|
|
{"current_steps": 14470, "total_steps": 15621, "loss": 0.4131, "lr": 3.295548352185262e-08, "epoch": 0.9263171371871199, "percentage": 92.63, "elapsed_time": "0:54:48", "remaining_time": "0:04:21", "throughput": 13852.65, "total_tokens": 45549248}
|
|
{"current_steps": 14475, "total_steps": 15621, "loss": 0.3477, "lr": 3.2671597086701753e-08, "epoch": 0.9266372191280968, "percentage": 92.66, "elapsed_time": "0:54:48", "remaining_time": "0:04:20", "throughput": 13855.02, "total_tokens": 45565760}
|
|
{"current_steps": 14480, "total_steps": 15621, "loss": 0.3246, "lr": 3.238891837241964e-08, "epoch": 0.9269573010690737, "percentage": 92.7, "elapsed_time": "0:54:49", "remaining_time": "0:04:19", "throughput": 13857.23, "total_tokens": 45581568}
|
|
{"current_steps": 14485, "total_steps": 15621, "loss": 0.4038, "lr": 3.210744773193386e-08, "epoch": 0.9272773830100506, "percentage": 92.73, "elapsed_time": "0:54:49", "remaining_time": "0:04:18", "throughput": 13859.38, "total_tokens": 45596928}
|
|
{"current_steps": 14490, "total_steps": 15621, "loss": 0.2948, "lr": 3.182718551666386e-08, "epoch": 0.9275974649510275, "percentage": 92.76, "elapsed_time": "0:54:50", "remaining_time": "0:04:16", "throughput": 13861.61, "total_tokens": 45612800}
|
|
{"current_steps": 14495, "total_steps": 15621, "loss": 0.4114, "lr": 3.154813207652063e-08, "epoch": 0.9279175468920043, "percentage": 92.79, "elapsed_time": "0:54:51", "remaining_time": "0:04:15", "throughput": 13863.66, "total_tokens": 45627584}
|
|
{"current_steps": 14500, "total_steps": 15621, "loss": 0.3379, "lr": 3.1270287759905143e-08, "epoch": 0.9282376288329812, "percentage": 92.82, "elapsed_time": "0:54:51", "remaining_time": "0:04:14", "throughput": 13865.94, "total_tokens": 45643840}
|
|
{"current_steps": 14505, "total_steps": 15621, "loss": 0.2884, "lr": 3.0993652913709476e-08, "epoch": 0.9285577107739581, "percentage": 92.86, "elapsed_time": "0:54:52", "remaining_time": "0:04:13", "throughput": 13868.06, "total_tokens": 45659072}
|
|
{"current_steps": 14510, "total_steps": 15621, "loss": 0.482, "lr": 3.0718227883315796e-08, "epoch": 0.928877792714935, "percentage": 92.89, "elapsed_time": "0:54:53", "remaining_time": "0:04:12", "throughput": 13870.36, "total_tokens": 45675328}
|
|
{"current_steps": 14515, "total_steps": 15621, "loss": 0.368, "lr": 3.044401301259503e-08, "epoch": 0.9291978746559119, "percentage": 92.92, "elapsed_time": "0:54:53", "remaining_time": "0:04:10", "throughput": 13872.51, "total_tokens": 45690816}
|
|
{"current_steps": 14520, "total_steps": 15621, "loss": 0.3333, "lr": 3.017100864390787e-08, "epoch": 0.9295179565968889, "percentage": 92.95, "elapsed_time": "0:54:54", "remaining_time": "0:04:09", "throughput": 13874.66, "total_tokens": 45706432}
|
|
{"current_steps": 14525, "total_steps": 15621, "loss": 0.3446, "lr": 2.9899215118103446e-08, "epoch": 0.9298380385378657, "percentage": 92.98, "elapsed_time": "0:54:54", "remaining_time": "0:04:08", "throughput": 13876.82, "total_tokens": 45721920}
|
|
{"current_steps": 14530, "total_steps": 15621, "loss": 0.3433, "lr": 2.9628632774519435e-08, "epoch": 0.9301581204788426, "percentage": 93.02, "elapsed_time": "0:54:55", "remaining_time": "0:04:07", "throughput": 13879.09, "total_tokens": 45738048}
|
|
{"current_steps": 14535, "total_steps": 15621, "loss": 0.3308, "lr": 2.9359261950980485e-08, "epoch": 0.9304782024198195, "percentage": 93.05, "elapsed_time": "0:54:56", "remaining_time": "0:04:06", "throughput": 13881.3, "total_tokens": 45753856}
|
|
{"current_steps": 14540, "total_steps": 15621, "loss": 0.3015, "lr": 2.90911029837998e-08, "epoch": 0.9307982843607964, "percentage": 93.08, "elapsed_time": "0:54:56", "remaining_time": "0:04:05", "throughput": 13883.35, "total_tokens": 45768704}
|
|
{"current_steps": 14545, "total_steps": 15621, "loss": 0.2789, "lr": 2.8824156207776673e-08, "epoch": 0.9311183663017732, "percentage": 93.11, "elapsed_time": "0:54:57", "remaining_time": "0:04:03", "throughput": 13885.47, "total_tokens": 45783936}
|
|
{"current_steps": 14550, "total_steps": 15621, "loss": 0.4514, "lr": 2.8558421956197397e-08, "epoch": 0.9314384482427501, "percentage": 93.14, "elapsed_time": "0:54:57", "remaining_time": "0:04:02", "throughput": 13887.78, "total_tokens": 45800320}
|
|
{"current_steps": 14555, "total_steps": 15621, "loss": 0.3864, "lr": 2.829390056083436e-08, "epoch": 0.931758530183727, "percentage": 93.18, "elapsed_time": "0:54:58", "remaining_time": "0:04:01", "throughput": 13890.05, "total_tokens": 45816512}
|
|
{"current_steps": 14560, "total_steps": 15621, "loss": 0.3037, "lr": 2.8030592351945492e-08, "epoch": 0.9320786121247039, "percentage": 93.21, "elapsed_time": "0:54:59", "remaining_time": "0:04:00", "throughput": 13892.19, "total_tokens": 45831936}
|
|
{"current_steps": 14565, "total_steps": 15621, "loss": 0.2968, "lr": 2.776849765827427e-08, "epoch": 0.9323986940656808, "percentage": 93.24, "elapsed_time": "0:54:59", "remaining_time": "0:03:59", "throughput": 13894.22, "total_tokens": 45846784}
|
|
{"current_steps": 14570, "total_steps": 15621, "loss": 0.4282, "lr": 2.750761680704905e-08, "epoch": 0.9327187760066578, "percentage": 93.27, "elapsed_time": "0:55:00", "remaining_time": "0:03:58", "throughput": 13896.32, "total_tokens": 45862080}
|
|
{"current_steps": 14575, "total_steps": 15621, "loss": 0.3937, "lr": 2.724795012398251e-08, "epoch": 0.9330388579476346, "percentage": 93.3, "elapsed_time": "0:55:00", "remaining_time": "0:03:56", "throughput": 13898.61, "total_tokens": 45878528}
|
|
{"current_steps": 14580, "total_steps": 15621, "loss": 0.3737, "lr": 2.6989497933271543e-08, "epoch": 0.9333589398886115, "percentage": 93.34, "elapsed_time": "0:55:01", "remaining_time": "0:03:55", "throughput": 13900.76, "total_tokens": 45894016}
|
|
{"current_steps": 14585, "total_steps": 15621, "loss": 0.3295, "lr": 2.673226055759692e-08, "epoch": 0.9336790218295884, "percentage": 93.37, "elapsed_time": "0:55:02", "remaining_time": "0:03:54", "throughput": 13902.87, "total_tokens": 45909504}
|
|
{"current_steps": 14590, "total_steps": 15621, "loss": 0.338, "lr": 2.6476238318122402e-08, "epoch": 0.9339991037705653, "percentage": 93.4, "elapsed_time": "0:55:02", "remaining_time": "0:03:53", "throughput": 13905.1, "total_tokens": 45925376}
|
|
{"current_steps": 14595, "total_steps": 15621, "loss": 0.3956, "lr": 2.6221431534494742e-08, "epoch": 0.9343191857115422, "percentage": 93.43, "elapsed_time": "0:55:03", "remaining_time": "0:03:52", "throughput": 13907.09, "total_tokens": 45940224}
|
|
{"current_steps": 14600, "total_steps": 15621, "loss": 0.3521, "lr": 2.5967840524843243e-08, "epoch": 0.934639267652519, "percentage": 93.46, "elapsed_time": "0:55:03", "remaining_time": "0:03:51", "throughput": 13909.12, "total_tokens": 45955072}
|
|
{"current_steps": 14605, "total_steps": 15621, "loss": 0.4287, "lr": 2.5715465605779195e-08, "epoch": 0.9349593495934959, "percentage": 93.5, "elapsed_time": "0:55:04", "remaining_time": "0:03:49", "throughput": 13911.2, "total_tokens": 45970240}
|
|
{"current_steps": 14610, "total_steps": 15621, "loss": 0.406, "lr": 2.5464307092395777e-08, "epoch": 0.9352794315344728, "percentage": 93.53, "elapsed_time": "0:55:05", "remaining_time": "0:03:48", "throughput": 13913.35, "total_tokens": 45985856}
|
|
{"current_steps": 14615, "total_steps": 15621, "loss": 0.3398, "lr": 2.5214365298267148e-08, "epoch": 0.9355995134754497, "percentage": 93.56, "elapsed_time": "0:55:05", "remaining_time": "0:03:47", "throughput": 13915.3, "total_tokens": 46000256}
|
|
{"current_steps": 14620, "total_steps": 15621, "loss": 0.32, "lr": 2.4965640535448917e-08, "epoch": 0.9359195954164266, "percentage": 93.59, "elapsed_time": "0:55:06", "remaining_time": "0:03:46", "throughput": 13917.41, "total_tokens": 46015616}
|
|
{"current_steps": 14625, "total_steps": 15621, "loss": 0.3741, "lr": 2.471813311447657e-08, "epoch": 0.9362396773574035, "percentage": 93.62, "elapsed_time": "0:55:06", "remaining_time": "0:03:45", "throughput": 13919.52, "total_tokens": 46031040}
|
|
{"current_steps": 14630, "total_steps": 15621, "loss": 0.3304, "lr": 2.4471843344365915e-08, "epoch": 0.9365597592983804, "percentage": 93.66, "elapsed_time": "0:55:07", "remaining_time": "0:03:44", "throughput": 13921.6, "total_tokens": 46046016}
|
|
{"current_steps": 14635, "total_steps": 15621, "loss": 0.2715, "lr": 2.42267715326131e-08, "epoch": 0.9368798412393573, "percentage": 93.69, "elapsed_time": "0:55:08", "remaining_time": "0:03:42", "throughput": 13923.93, "total_tokens": 46062528}
|
|
{"current_steps": 14640, "total_steps": 15621, "loss": 0.3426, "lr": 2.3982917985192697e-08, "epoch": 0.9371999231803342, "percentage": 93.72, "elapsed_time": "0:55:08", "remaining_time": "0:03:41", "throughput": 13926.11, "total_tokens": 46078144}
|
|
{"current_steps": 14645, "total_steps": 15621, "loss": 0.3748, "lr": 2.3740283006558838e-08, "epoch": 0.9375200051213111, "percentage": 93.75, "elapsed_time": "0:55:09", "remaining_time": "0:03:40", "throughput": 13928.82, "total_tokens": 46096896}
|
|
{"current_steps": 14650, "total_steps": 15621, "loss": 0.3715, "lr": 2.349886689964431e-08, "epoch": 0.9378400870622879, "percentage": 93.78, "elapsed_time": "0:55:10", "remaining_time": "0:03:39", "throughput": 13930.88, "total_tokens": 46111808}
|
|
{"current_steps": 14655, "total_steps": 15621, "loss": 0.2804, "lr": 2.32586699658599e-08, "epoch": 0.9381601690032648, "percentage": 93.82, "elapsed_time": "0:55:10", "remaining_time": "0:03:38", "throughput": 13933.14, "total_tokens": 46127936}
|
|
{"current_steps": 14660, "total_steps": 15621, "loss": 0.3522, "lr": 2.3019692505094056e-08, "epoch": 0.9384802509442417, "percentage": 93.85, "elapsed_time": "0:55:11", "remaining_time": "0:03:37", "throughput": 13935.17, "total_tokens": 46142848}
|
|
{"current_steps": 14665, "total_steps": 15621, "loss": 0.5364, "lr": 2.2781934815713223e-08, "epoch": 0.9388003328852186, "percentage": 93.88, "elapsed_time": "0:55:11", "remaining_time": "0:03:35", "throughput": 13937.36, "total_tokens": 46158848}
|
|
{"current_steps": 14670, "total_steps": 15621, "loss": 0.3566, "lr": 2.254539719456061e-08, "epoch": 0.9391204148261955, "percentage": 93.91, "elapsed_time": "0:55:12", "remaining_time": "0:03:34", "throughput": 13939.58, "total_tokens": 46174912}
|
|
{"current_steps": 14675, "total_steps": 15621, "loss": 0.2587, "lr": 2.231007993695633e-08, "epoch": 0.9394404967671725, "percentage": 93.94, "elapsed_time": "0:55:13", "remaining_time": "0:03:33", "throughput": 13941.51, "total_tokens": 46189248}
|
|
{"current_steps": 14680, "total_steps": 15621, "loss": 0.314, "lr": 2.2075983336696357e-08, "epoch": 0.9397605787081493, "percentage": 93.98, "elapsed_time": "0:55:13", "remaining_time": "0:03:32", "throughput": 13943.65, "total_tokens": 46204928}
|
|
{"current_steps": 14685, "total_steps": 15621, "loss": 0.3916, "lr": 2.1843107686053353e-08, "epoch": 0.9400806606491262, "percentage": 94.01, "elapsed_time": "0:55:14", "remaining_time": "0:03:31", "throughput": 13945.71, "total_tokens": 46220160}
|
|
{"current_steps": 14690, "total_steps": 15621, "loss": 0.4249, "lr": 2.1611453275775405e-08, "epoch": 0.9404007425901031, "percentage": 94.04, "elapsed_time": "0:55:14", "remaining_time": "0:03:30", "throughput": 13947.86, "total_tokens": 46235584}
|
|
{"current_steps": 14695, "total_steps": 15621, "loss": 0.2691, "lr": 2.138102039508538e-08, "epoch": 0.94072082453108, "percentage": 94.07, "elapsed_time": "0:55:15", "remaining_time": "0:03:28", "throughput": 13950.12, "total_tokens": 46251904}
|
|
{"current_steps": 14700, "total_steps": 15621, "loss": 0.3948, "lr": 2.1151809331681703e-08, "epoch": 0.9410409064720568, "percentage": 94.1, "elapsed_time": "0:55:16", "remaining_time": "0:03:27", "throughput": 13952.33, "total_tokens": 46268032}
|
|
{"current_steps": 14705, "total_steps": 15621, "loss": 0.3362, "lr": 2.092382037173701e-08, "epoch": 0.9413609884130337, "percentage": 94.14, "elapsed_time": "0:55:16", "remaining_time": "0:03:26", "throughput": 13954.43, "total_tokens": 46283392}
|
|
{"current_steps": 14710, "total_steps": 15621, "loss": 0.2966, "lr": 2.0697053799898277e-08, "epoch": 0.9416810703540106, "percentage": 94.17, "elapsed_time": "0:55:17", "remaining_time": "0:03:25", "throughput": 13956.52, "total_tokens": 46298752}
|
|
{"current_steps": 14715, "total_steps": 15621, "loss": 0.3392, "lr": 2.0471509899286144e-08, "epoch": 0.9420011522949875, "percentage": 94.2, "elapsed_time": "0:55:17", "remaining_time": "0:03:24", "throughput": 13958.71, "total_tokens": 46314624}
|
|
{"current_steps": 14720, "total_steps": 15621, "loss": 0.3403, "lr": 2.0247188951494797e-08, "epoch": 0.9423212342359644, "percentage": 94.23, "elapsed_time": "0:55:18", "remaining_time": "0:03:23", "throughput": 13961.11, "total_tokens": 46331712}
|
|
{"current_steps": 14725, "total_steps": 15621, "loss": 0.5398, "lr": 2.0024091236591655e-08, "epoch": 0.9426413161769412, "percentage": 94.26, "elapsed_time": "0:55:19", "remaining_time": "0:03:21", "throughput": 13963.2, "total_tokens": 46347200}
|
|
{"current_steps": 14730, "total_steps": 15621, "loss": 0.3166, "lr": 1.98022170331168e-08, "epoch": 0.9429613981179182, "percentage": 94.3, "elapsed_time": "0:55:19", "remaining_time": "0:03:20", "throughput": 13965.42, "total_tokens": 46363008}
|
|
{"current_steps": 14735, "total_steps": 15621, "loss": 0.3797, "lr": 1.9581566618082744e-08, "epoch": 0.9432814800588951, "percentage": 94.33, "elapsed_time": "0:55:20", "remaining_time": "0:03:19", "throughput": 13967.6, "total_tokens": 46378816}
|
|
{"current_steps": 14740, "total_steps": 15621, "loss": 0.3915, "lr": 1.9362140266974025e-08, "epoch": 0.943601561999872, "percentage": 94.36, "elapsed_time": "0:55:21", "remaining_time": "0:03:18", "throughput": 13969.84, "total_tokens": 46395200}
|
|
{"current_steps": 14745, "total_steps": 15621, "loss": 0.3198, "lr": 1.9143938253747383e-08, "epoch": 0.9439216439408489, "percentage": 94.39, "elapsed_time": "0:55:21", "remaining_time": "0:03:17", "throughput": 13972.16, "total_tokens": 46411840}
|
|
{"current_steps": 14750, "total_steps": 15621, "loss": 0.4515, "lr": 1.892696085083023e-08, "epoch": 0.9442417258818258, "percentage": 94.42, "elapsed_time": "0:55:22", "remaining_time": "0:03:16", "throughput": 13974.32, "total_tokens": 46427776}
|
|
{"current_steps": 14755, "total_steps": 15621, "loss": 0.3118, "lr": 1.8711208329121542e-08, "epoch": 0.9445618078228026, "percentage": 94.46, "elapsed_time": "0:55:23", "remaining_time": "0:03:15", "throughput": 13976.7, "total_tokens": 46444736}
|
|
{"current_steps": 14760, "total_steps": 15621, "loss": 0.3325, "lr": 1.849668095799084e-08, "epoch": 0.9448818897637795, "percentage": 94.49, "elapsed_time": "0:55:23", "remaining_time": "0:03:13", "throughput": 13978.9, "total_tokens": 46460672}
|
|
{"current_steps": 14765, "total_steps": 15621, "loss": 0.3344, "lr": 1.8283379005278098e-08, "epoch": 0.9452019717047564, "percentage": 94.52, "elapsed_time": "0:55:24", "remaining_time": "0:03:12", "throughput": 13981.09, "total_tokens": 46476736}
|
|
{"current_steps": 14770, "total_steps": 15621, "loss": 0.3231, "lr": 1.807130273729329e-08, "epoch": 0.9455220536457333, "percentage": 94.55, "elapsed_time": "0:55:24", "remaining_time": "0:03:11", "throughput": 13983.26, "total_tokens": 46492416}
|
|
{"current_steps": 14775, "total_steps": 15621, "loss": 0.3349, "lr": 1.7860452418816173e-08, "epoch": 0.9458421355867102, "percentage": 94.58, "elapsed_time": "0:55:25", "remaining_time": "0:03:10", "throughput": 13985.26, "total_tokens": 46507264}
|
|
{"current_steps": 14780, "total_steps": 15621, "loss": 0.3288, "lr": 1.7650828313095834e-08, "epoch": 0.946162217527687, "percentage": 94.62, "elapsed_time": "0:55:26", "remaining_time": "0:03:09", "throughput": 13987.65, "total_tokens": 46524224}
|
|
{"current_steps": 14785, "total_steps": 15621, "loss": 0.3101, "lr": 1.7442430681850362e-08, "epoch": 0.946482299468664, "percentage": 94.65, "elapsed_time": "0:55:26", "remaining_time": "0:03:08", "throughput": 13989.71, "total_tokens": 46539456}
|
|
{"current_steps": 14790, "total_steps": 15621, "loss": 0.4302, "lr": 1.723525978526652e-08, "epoch": 0.9468023814096409, "percentage": 94.68, "elapsed_time": "0:55:27", "remaining_time": "0:03:06", "throughput": 13991.84, "total_tokens": 46555136}
|
|
{"current_steps": 14795, "total_steps": 15621, "loss": 0.3501, "lr": 1.702931588199996e-08, "epoch": 0.9471224633506178, "percentage": 94.71, "elapsed_time": "0:55:27", "remaining_time": "0:03:05", "throughput": 13993.93, "total_tokens": 46570432}
|
|
{"current_steps": 14800, "total_steps": 15621, "loss": 0.3115, "lr": 1.6824599229173897e-08, "epoch": 0.9474425452915947, "percentage": 94.74, "elapsed_time": "0:55:28", "remaining_time": "0:03:04", "throughput": 13996.11, "total_tokens": 46586304}
|
|
{"current_steps": 14805, "total_steps": 15621, "loss": 0.2909, "lr": 1.662111008237932e-08, "epoch": 0.9477626272325715, "percentage": 94.78, "elapsed_time": "0:55:29", "remaining_time": "0:03:03", "throughput": 13998.33, "total_tokens": 46602432}
|
|
{"current_steps": 14810, "total_steps": 15621, "loss": 0.3218, "lr": 1.6418848695675003e-08, "epoch": 0.9480827091735484, "percentage": 94.81, "elapsed_time": "0:55:29", "remaining_time": "0:03:02", "throughput": 14000.34, "total_tokens": 46617472}
|
|
{"current_steps": 14815, "total_steps": 15621, "loss": 0.372, "lr": 1.6217815321586614e-08, "epoch": 0.9484027911145253, "percentage": 94.84, "elapsed_time": "0:55:30", "remaining_time": "0:03:01", "throughput": 14002.43, "total_tokens": 46632896}
|
|
{"current_steps": 14820, "total_steps": 15621, "loss": 0.355, "lr": 1.6018010211106602e-08, "epoch": 0.9487228730555022, "percentage": 94.87, "elapsed_time": "0:55:30", "remaining_time": "0:03:00", "throughput": 14004.71, "total_tokens": 46649408}
|
|
{"current_steps": 14825, "total_steps": 15621, "loss": 0.2816, "lr": 1.58194336136942e-08, "epoch": 0.9490429549964791, "percentage": 94.9, "elapsed_time": "0:55:31", "remaining_time": "0:02:58", "throughput": 14006.89, "total_tokens": 46665344}
|
|
{"current_steps": 14830, "total_steps": 15621, "loss": 0.4274, "lr": 1.5622085777274417e-08, "epoch": 0.9493630369374559, "percentage": 94.94, "elapsed_time": "0:55:32", "remaining_time": "0:02:57", "throughput": 14008.92, "total_tokens": 46680704}
|
|
{"current_steps": 14835, "total_steps": 15621, "loss": 0.3333, "lr": 1.542596694823839e-08, "epoch": 0.9496831188784329, "percentage": 94.97, "elapsed_time": "0:55:32", "remaining_time": "0:02:56", "throughput": 14010.97, "total_tokens": 46695936}
|
|
{"current_steps": 14840, "total_steps": 15621, "loss": 0.4259, "lr": 1.5231077371442914e-08, "epoch": 0.9500032008194098, "percentage": 95.0, "elapsed_time": "0:55:33", "remaining_time": "0:02:55", "throughput": 14013.11, "total_tokens": 46711680}
|
|
{"current_steps": 14845, "total_steps": 15621, "loss": 0.2888, "lr": 1.5037417290209685e-08, "epoch": 0.9503232827603867, "percentage": 95.03, "elapsed_time": "0:55:34", "remaining_time": "0:02:54", "throughput": 14015.18, "total_tokens": 46727040}
|
|
{"current_steps": 14850, "total_steps": 15621, "loss": 0.393, "lr": 1.4844986946325743e-08, "epoch": 0.9506433647013636, "percentage": 95.06, "elapsed_time": "0:55:34", "remaining_time": "0:02:53", "throughput": 14017.3, "total_tokens": 46742720}
|
|
{"current_steps": 14855, "total_steps": 15621, "loss": 0.2502, "lr": 1.4653786580042681e-08, "epoch": 0.9509634466423404, "percentage": 95.1, "elapsed_time": "0:55:35", "remaining_time": "0:02:51", "throughput": 14019.39, "total_tokens": 46758336}
|
|
{"current_steps": 14858, "total_steps": 15621, "eval_loss": 0.3537425398826599, "epoch": 0.9511554958069266, "percentage": 95.12, "elapsed_time": "0:56:24", "remaining_time": "0:02:53", "throughput": 13817.16, "total_tokens": 46767552}
|
|
{"current_steps": 14860, "total_steps": 15621, "loss": 0.3108, "lr": 1.4463816430076215e-08, "epoch": 0.9512835285833173, "percentage": 95.13, "elapsed_time": "0:56:46", "remaining_time": "0:02:54", "throughput": 13729.94, "total_tokens": 46773312}
|
|
{"current_steps": 14865, "total_steps": 15621, "loss": 0.3685, "lr": 1.4275076733606395e-08, "epoch": 0.9516036105242942, "percentage": 95.16, "elapsed_time": "0:56:47", "remaining_time": "0:02:53", "throughput": 13731.89, "total_tokens": 46787968}
|
|
{"current_steps": 14870, "total_steps": 15621, "loss": 0.2913, "lr": 1.4087567726277061e-08, "epoch": 0.9519236924652711, "percentage": 95.19, "elapsed_time": "0:56:47", "remaining_time": "0:02:52", "throughput": 13734.04, "total_tokens": 46803712}
|
|
{"current_steps": 14875, "total_steps": 15621, "loss": 0.2789, "lr": 1.390128964219528e-08, "epoch": 0.952243774406248, "percentage": 95.22, "elapsed_time": "0:56:48", "remaining_time": "0:02:50", "throughput": 13736.36, "total_tokens": 46820288}
|
|
{"current_steps": 14880, "total_steps": 15621, "loss": 0.3819, "lr": 1.3716242713931348e-08, "epoch": 0.9525638563472248, "percentage": 95.26, "elapsed_time": "0:56:49", "remaining_time": "0:02:49", "throughput": 13738.47, "total_tokens": 46835904}
|
|
{"current_steps": 14885, "total_steps": 15621, "loss": 0.3714, "lr": 1.3532427172518789e-08, "epoch": 0.9528839382882017, "percentage": 95.29, "elapsed_time": "0:56:49", "remaining_time": "0:02:48", "throughput": 13740.52, "total_tokens": 46851136}
|
|
{"current_steps": 14890, "total_steps": 15621, "loss": 0.3343, "lr": 1.3349843247453252e-08, "epoch": 0.9532040202291787, "percentage": 95.32, "elapsed_time": "0:56:50", "remaining_time": "0:02:47", "throughput": 13742.77, "total_tokens": 46867456}
|
|
{"current_steps": 14895, "total_steps": 15621, "loss": 0.2772, "lr": 1.3168491166692941e-08, "epoch": 0.9535241021701556, "percentage": 95.35, "elapsed_time": "0:56:50", "remaining_time": "0:02:46", "throughput": 13744.83, "total_tokens": 46882816}
|
|
{"current_steps": 14900, "total_steps": 15621, "loss": 0.4506, "lr": 1.2988371156658073e-08, "epoch": 0.9538441841111325, "percentage": 95.38, "elapsed_time": "0:56:51", "remaining_time": "0:02:45", "throughput": 13746.99, "total_tokens": 46898624}
|
|
{"current_steps": 14905, "total_steps": 15621, "loss": 0.282, "lr": 1.2809483442230763e-08, "epoch": 0.9541642660521094, "percentage": 95.42, "elapsed_time": "0:56:52", "remaining_time": "0:02:43", "throughput": 13749.12, "total_tokens": 46914304}
|
|
{"current_steps": 14910, "total_steps": 15621, "loss": 0.3705, "lr": 1.2631828246754128e-08, "epoch": 0.9544843479930862, "percentage": 95.45, "elapsed_time": "0:56:52", "remaining_time": "0:02:42", "throughput": 13751.32, "total_tokens": 46930368}
|
|
{"current_steps": 14915, "total_steps": 15621, "loss": 0.364, "lr": 1.2455405792032969e-08, "epoch": 0.9548044299340631, "percentage": 95.48, "elapsed_time": "0:56:53", "remaining_time": "0:02:41", "throughput": 13753.41, "total_tokens": 46945792}
|
|
{"current_steps": 14920, "total_steps": 15621, "loss": 0.342, "lr": 1.2280216298332646e-08, "epoch": 0.95512451187504, "percentage": 95.51, "elapsed_time": "0:56:54", "remaining_time": "0:02:40", "throughput": 13755.63, "total_tokens": 46962048}
|
|
{"current_steps": 14925, "total_steps": 15621, "loss": 0.4603, "lr": 1.2106259984379642e-08, "epoch": 0.9554445938160169, "percentage": 95.54, "elapsed_time": "0:56:54", "remaining_time": "0:02:39", "throughput": 13757.61, "total_tokens": 46976768}
|
|
{"current_steps": 14930, "total_steps": 15621, "loss": 0.4141, "lr": 1.1933537067359889e-08, "epoch": 0.9557646757569938, "percentage": 95.58, "elapsed_time": "0:56:55", "remaining_time": "0:02:38", "throughput": 13759.58, "total_tokens": 46991424}
|
|
{"current_steps": 14935, "total_steps": 15621, "loss": 0.3607, "lr": 1.1762047762920446e-08, "epoch": 0.9560847576979706, "percentage": 95.61, "elapsed_time": "0:56:55", "remaining_time": "0:02:36", "throughput": 13761.63, "total_tokens": 47006656}
|
|
{"current_steps": 14940, "total_steps": 15621, "loss": 0.3576, "lr": 1.1591792285167602e-08, "epoch": 0.9564048396389476, "percentage": 95.64, "elapsed_time": "0:56:56", "remaining_time": "0:02:35", "throughput": 13763.66, "total_tokens": 47021824}
|
|
{"current_steps": 14945, "total_steps": 15621, "loss": 0.3907, "lr": 1.1422770846667206e-08, "epoch": 0.9567249215799245, "percentage": 95.67, "elapsed_time": "0:56:56", "remaining_time": "0:02:34", "throughput": 13765.81, "total_tokens": 47037440}
|
|
{"current_steps": 14950, "total_steps": 15621, "loss": 0.307, "lr": 1.1254983658444572e-08, "epoch": 0.9570450035209014, "percentage": 95.7, "elapsed_time": "0:56:57", "remaining_time": "0:02:33", "throughput": 13768.01, "total_tokens": 47053760}
|
|
{"current_steps": 14955, "total_steps": 15621, "loss": 0.3148, "lr": 1.1088430929984017e-08, "epoch": 0.9573650854618783, "percentage": 95.74, "elapsed_time": "0:56:58", "remaining_time": "0:02:32", "throughput": 13770.03, "total_tokens": 47068928}
|
|
{"current_steps": 14960, "total_steps": 15621, "loss": 0.383, "lr": 1.0923112869228645e-08, "epoch": 0.9576851674028551, "percentage": 95.77, "elapsed_time": "0:56:58", "remaining_time": "0:02:31", "throughput": 13772.18, "total_tokens": 47084672}
|
|
{"current_steps": 14965, "total_steps": 15621, "loss": 0.3613, "lr": 1.0759029682579801e-08, "epoch": 0.958005249343832, "percentage": 95.8, "elapsed_time": "0:56:59", "remaining_time": "0:02:29", "throughput": 13774.52, "total_tokens": 47101632}
|
|
{"current_steps": 14970, "total_steps": 15621, "loss": 0.306, "lr": 1.0596181574897389e-08, "epoch": 0.9583253312848089, "percentage": 95.83, "elapsed_time": "0:57:00", "remaining_time": "0:02:28", "throughput": 13776.49, "total_tokens": 47116480}
|
|
{"current_steps": 14975, "total_steps": 15621, "loss": 0.3155, "lr": 1.0434568749499107e-08, "epoch": 0.9586454132257858, "percentage": 95.86, "elapsed_time": "0:57:00", "remaining_time": "0:02:27", "throughput": 13778.77, "total_tokens": 47132992}
|
|
{"current_steps": 14980, "total_steps": 15621, "loss": 0.3061, "lr": 1.027419140816066e-08, "epoch": 0.9589654951667627, "percentage": 95.9, "elapsed_time": "0:57:01", "remaining_time": "0:02:26", "throughput": 13780.97, "total_tokens": 47149056}
|
|
{"current_steps": 14985, "total_steps": 15621, "loss": 0.2984, "lr": 1.0115049751114768e-08, "epoch": 0.9592855771077395, "percentage": 95.93, "elapsed_time": "0:57:01", "remaining_time": "0:02:25", "throughput": 13783.12, "total_tokens": 47164864}
|
|
{"current_steps": 14990, "total_steps": 15621, "loss": 0.3481, "lr": 9.957143977051941e-09, "epoch": 0.9596056590487164, "percentage": 95.96, "elapsed_time": "0:57:02", "remaining_time": "0:02:24", "throughput": 13785.2, "total_tokens": 47180544}
|
|
{"current_steps": 14995, "total_steps": 15621, "loss": 0.3836, "lr": 9.800474283119142e-09, "epoch": 0.9599257409896934, "percentage": 95.99, "elapsed_time": "0:57:03", "remaining_time": "0:02:22", "throughput": 13787.39, "total_tokens": 47196608}
|
|
{"current_steps": 15000, "total_steps": 15621, "loss": 0.3701, "lr": 9.645040864920462e-09, "epoch": 0.9602458229306703, "percentage": 96.02, "elapsed_time": "0:57:03", "remaining_time": "0:02:21", "throughput": 13789.69, "total_tokens": 47213504}
|
|
{"current_steps": 15005, "total_steps": 15621, "loss": 0.4056, "lr": 9.490843916516334e-09, "epoch": 0.9605659048716472, "percentage": 96.06, "elapsed_time": "0:57:04", "remaining_time": "0:02:20", "throughput": 13791.68, "total_tokens": 47228288}
|
|
{"current_steps": 15010, "total_steps": 15621, "loss": 0.4448, "lr": 9.337883630423316e-09, "epoch": 0.960885986812624, "percentage": 96.09, "elapsed_time": "0:57:05", "remaining_time": "0:02:19", "throughput": 13793.75, "total_tokens": 47243712}
|
|
{"current_steps": 15015, "total_steps": 15621, "loss": 0.4909, "lr": 9.186160197614423e-09, "epoch": 0.9612060687536009, "percentage": 96.12, "elapsed_time": "0:57:05", "remaining_time": "0:02:18", "throughput": 13795.96, "total_tokens": 47259904}
|
|
{"current_steps": 15020, "total_steps": 15621, "loss": 0.4837, "lr": 9.035673807517795e-09, "epoch": 0.9615261506945778, "percentage": 96.15, "elapsed_time": "0:57:06", "remaining_time": "0:02:17", "throughput": 13797.96, "total_tokens": 47275072}
|
|
{"current_steps": 15025, "total_steps": 15621, "loss": 0.27, "lr": 8.886424648017698e-09, "epoch": 0.9618462326355547, "percentage": 96.18, "elapsed_time": "0:57:06", "remaining_time": "0:02:15", "throughput": 13800.06, "total_tokens": 47290688}
|
|
{"current_steps": 15030, "total_steps": 15621, "loss": 0.3408, "lr": 8.738412905453408e-09, "epoch": 0.9621663145765316, "percentage": 96.22, "elapsed_time": "0:57:07", "remaining_time": "0:02:14", "throughput": 13802.19, "total_tokens": 47306496}
|
|
{"current_steps": 15035, "total_steps": 15621, "loss": 0.3575, "lr": 8.591638764619324e-09, "epoch": 0.9624863965175084, "percentage": 96.25, "elapsed_time": "0:57:08", "remaining_time": "0:02:13", "throughput": 13804.16, "total_tokens": 47321280}
|
|
{"current_steps": 15040, "total_steps": 15621, "loss": 0.3623, "lr": 8.446102408764643e-09, "epoch": 0.9628064784584853, "percentage": 96.28, "elapsed_time": "0:57:08", "remaining_time": "0:02:12", "throughput": 13806.38, "total_tokens": 47337536}
|
|
{"current_steps": 15045, "total_steps": 15621, "loss": 0.273, "lr": 8.301804019593129e-09, "epoch": 0.9631265603994623, "percentage": 96.31, "elapsed_time": "0:57:09", "remaining_time": "0:02:11", "throughput": 13808.43, "total_tokens": 47353024}
|
|
{"current_steps": 15050, "total_steps": 15621, "loss": 0.3535, "lr": 8.158743777263333e-09, "epoch": 0.9634466423404392, "percentage": 96.34, "elapsed_time": "0:57:09", "remaining_time": "0:02:10", "throughput": 13810.58, "total_tokens": 47369088}
|
|
{"current_steps": 15055, "total_steps": 15621, "loss": 0.3678, "lr": 8.016921860387272e-09, "epoch": 0.9637667242814161, "percentage": 96.38, "elapsed_time": "0:57:10", "remaining_time": "0:02:08", "throughput": 13812.61, "total_tokens": 47384320}
|
|
{"current_steps": 15060, "total_steps": 15621, "loss": 0.3908, "lr": 7.876338446031416e-09, "epoch": 0.964086806222393, "percentage": 96.41, "elapsed_time": "0:57:11", "remaining_time": "0:02:07", "throughput": 13814.85, "total_tokens": 47400896}
|
|
{"current_steps": 15065, "total_steps": 15621, "loss": 0.3169, "lr": 7.736993709716033e-09, "epoch": 0.9644068881633698, "percentage": 96.44, "elapsed_time": "0:57:11", "remaining_time": "0:02:06", "throughput": 13817.01, "total_tokens": 47416896}
|
|
{"current_steps": 15070, "total_steps": 15621, "loss": 0.4783, "lr": 7.59888782541418e-09, "epoch": 0.9647269701043467, "percentage": 96.47, "elapsed_time": "0:57:12", "remaining_time": "0:02:05", "throughput": 13819.08, "total_tokens": 47432320}
|
|
{"current_steps": 15075, "total_steps": 15621, "loss": 0.2656, "lr": 7.462020965553151e-09, "epoch": 0.9650470520453236, "percentage": 96.5, "elapsed_time": "0:57:12", "remaining_time": "0:02:04", "throughput": 13821.27, "total_tokens": 47448320}
|
|
{"current_steps": 15080, "total_steps": 15621, "loss": 0.49, "lr": 7.32639330101259e-09, "epoch": 0.9653671339863005, "percentage": 96.54, "elapsed_time": "0:57:13", "remaining_time": "0:02:03", "throughput": 13823.27, "total_tokens": 47463488}
|
|
{"current_steps": 15085, "total_steps": 15621, "loss": 0.3886, "lr": 7.1920050011252675e-09, "epoch": 0.9656872159272774, "percentage": 96.57, "elapsed_time": "0:57:14", "remaining_time": "0:02:02", "throughput": 13825.37, "total_tokens": 47479104}
|
|
{"current_steps": 15090, "total_steps": 15621, "loss": 0.391, "lr": 7.058856233676525e-09, "epoch": 0.9660072978682542, "percentage": 96.6, "elapsed_time": "0:57:14", "remaining_time": "0:02:00", "throughput": 13827.78, "total_tokens": 47496448}
|
|
{"current_steps": 15095, "total_steps": 15621, "loss": 0.3733, "lr": 6.926947164904162e-09, "epoch": 0.9663273798092311, "percentage": 96.63, "elapsed_time": "0:57:15", "remaining_time": "0:01:59", "throughput": 13829.85, "total_tokens": 47511936}
|
|
{"current_steps": 15100, "total_steps": 15621, "loss": 0.3984, "lr": 6.796277959498331e-09, "epoch": 0.9666474617502081, "percentage": 96.66, "elapsed_time": "0:57:16", "remaining_time": "0:01:58", "throughput": 13832.07, "total_tokens": 47528320}
|
|
{"current_steps": 15105, "total_steps": 15621, "loss": 0.2793, "lr": 6.666848780600864e-09, "epoch": 0.966967543691185, "percentage": 96.7, "elapsed_time": "0:57:16", "remaining_time": "0:01:57", "throughput": 13834.07, "total_tokens": 47543296}
|
|
{"current_steps": 15110, "total_steps": 15621, "loss": 0.2751, "lr": 6.538659789805834e-09, "epoch": 0.9672876256321619, "percentage": 96.73, "elapsed_time": "0:57:17", "remaining_time": "0:01:56", "throughput": 13836.11, "total_tokens": 47558656}
|
|
{"current_steps": 15115, "total_steps": 15621, "loss": 0.3498, "lr": 6.411711147158438e-09, "epoch": 0.9676077075731387, "percentage": 96.76, "elapsed_time": "0:57:17", "remaining_time": "0:01:55", "throughput": 13838.28, "total_tokens": 47574720}
|
|
{"current_steps": 15120, "total_steps": 15621, "loss": 0.3107, "lr": 6.286003011155783e-09, "epoch": 0.9679277895141156, "percentage": 96.79, "elapsed_time": "0:57:18", "remaining_time": "0:01:53", "throughput": 13840.36, "total_tokens": 47590272}
|
|
{"current_steps": 15125, "total_steps": 15621, "loss": 0.4098, "lr": 6.161535538745877e-09, "epoch": 0.9682478714550925, "percentage": 96.82, "elapsed_time": "0:57:19", "remaining_time": "0:01:52", "throughput": 13842.38, "total_tokens": 47605696}
|
|
{"current_steps": 15130, "total_steps": 15621, "loss": 0.3975, "lr": 6.0383088853277475e-09, "epoch": 0.9685679533960694, "percentage": 96.86, "elapsed_time": "0:57:19", "remaining_time": "0:01:51", "throughput": 13844.58, "total_tokens": 47621760}
|
|
{"current_steps": 15135, "total_steps": 15621, "loss": 0.3081, "lr": 5.916323204751439e-09, "epoch": 0.9688880353370463, "percentage": 96.89, "elapsed_time": "0:57:20", "remaining_time": "0:01:50", "throughput": 13847.01, "total_tokens": 47639296}
|
|
{"current_steps": 15140, "total_steps": 15621, "loss": 0.2648, "lr": 5.795578649317345e-09, "epoch": 0.9692081172780231, "percentage": 96.92, "elapsed_time": "0:57:21", "remaining_time": "0:01:49", "throughput": 13849.02, "total_tokens": 47654656}
|
|
{"current_steps": 15145, "total_steps": 15621, "loss": 0.3157, "lr": 5.676075369776656e-09, "epoch": 0.969528199219, "percentage": 96.95, "elapsed_time": "0:57:21", "remaining_time": "0:01:48", "throughput": 13851.25, "total_tokens": 47671168}
|
|
{"current_steps": 15150, "total_steps": 15621, "loss": 0.3348, "lr": 5.557813515330468e-09, "epoch": 0.9698482811599769, "percentage": 96.98, "elapsed_time": "0:57:22", "remaining_time": "0:01:47", "throughput": 13853.26, "total_tokens": 47686400}
|
|
{"current_steps": 15155, "total_steps": 15621, "loss": 0.3439, "lr": 5.440793233630115e-09, "epoch": 0.9701683631009539, "percentage": 97.02, "elapsed_time": "0:57:22", "remaining_time": "0:01:45", "throughput": 13855.28, "total_tokens": 47701760}
|
|
{"current_steps": 15160, "total_steps": 15621, "loss": 0.3063, "lr": 5.325014670776951e-09, "epoch": 0.9704884450419308, "percentage": 97.05, "elapsed_time": "0:57:23", "remaining_time": "0:01:44", "throughput": 13857.31, "total_tokens": 47717248}
|
|
{"current_steps": 15165, "total_steps": 15621, "loss": 0.3599, "lr": 5.21047797132157e-09, "epoch": 0.9708085269829076, "percentage": 97.08, "elapsed_time": "0:57:24", "remaining_time": "0:01:43", "throughput": 13859.65, "total_tokens": 47734336}
|
|
{"current_steps": 15170, "total_steps": 15621, "loss": 0.3417, "lr": 5.097183278264694e-09, "epoch": 0.9711286089238845, "percentage": 97.11, "elapsed_time": "0:57:24", "remaining_time": "0:01:42", "throughput": 13861.8, "total_tokens": 47750464}
|
|
{"current_steps": 15175, "total_steps": 15621, "loss": 0.4364, "lr": 4.985130733055954e-09, "epoch": 0.9714486908648614, "percentage": 97.14, "elapsed_time": "0:57:25", "remaining_time": "0:01:41", "throughput": 13863.83, "total_tokens": 47765824}
|
|
{"current_steps": 15180, "total_steps": 15621, "loss": 0.3893, "lr": 4.874320475594107e-09, "epoch": 0.9717687728058383, "percentage": 97.18, "elapsed_time": "0:57:25", "remaining_time": "0:01:40", "throughput": 13865.95, "total_tokens": 47781760}
|
|
{"current_steps": 15185, "total_steps": 15621, "loss": 0.2832, "lr": 4.764752644227377e-09, "epoch": 0.9720888547468152, "percentage": 97.21, "elapsed_time": "0:57:26", "remaining_time": "0:01:38", "throughput": 13868.01, "total_tokens": 47797312}
|
|
{"current_steps": 15190, "total_steps": 15621, "loss": 0.3392, "lr": 4.656427375752336e-09, "epoch": 0.972408936687792, "percentage": 97.24, "elapsed_time": "0:57:27", "remaining_time": "0:01:37", "throughput": 13870.22, "total_tokens": 47813440}
|
|
{"current_steps": 15195, "total_steps": 15621, "loss": 0.34, "lr": 4.549344805414246e-09, "epoch": 0.9727290186287689, "percentage": 97.27, "elapsed_time": "0:57:27", "remaining_time": "0:01:36", "throughput": 13872.36, "total_tokens": 47829440}
|
|
{"current_steps": 15200, "total_steps": 15621, "loss": 0.4139, "lr": 4.443505066907049e-09, "epoch": 0.9730491005697458, "percentage": 97.3, "elapsed_time": "0:57:28", "remaining_time": "0:01:35", "throughput": 13874.35, "total_tokens": 47844608}
|
|
{"current_steps": 15205, "total_steps": 15621, "loss": 0.2823, "lr": 4.338908292372934e-09, "epoch": 0.9733691825107228, "percentage": 97.34, "elapsed_time": "0:57:29", "remaining_time": "0:01:34", "throughput": 13876.43, "total_tokens": 47860160}
|
|
{"current_steps": 15210, "total_steps": 15621, "loss": 0.3864, "lr": 4.235554612402214e-09, "epoch": 0.9736892644516997, "percentage": 97.37, "elapsed_time": "0:57:29", "remaining_time": "0:01:33", "throughput": 13878.49, "total_tokens": 47875648}
|
|
{"current_steps": 15215, "total_steps": 15621, "loss": 0.381, "lr": 4.133444156033006e-09, "epoch": 0.9740093463926766, "percentage": 97.4, "elapsed_time": "0:57:30", "remaining_time": "0:01:32", "throughput": 13880.83, "total_tokens": 47892736}
|
|
{"current_steps": 15220, "total_steps": 15621, "loss": 0.3145, "lr": 4.032577050751551e-09, "epoch": 0.9743294283336534, "percentage": 97.43, "elapsed_time": "0:57:30", "remaining_time": "0:01:30", "throughput": 13882.98, "total_tokens": 47908992}
|
|
{"current_steps": 15225, "total_steps": 15621, "loss": 0.3428, "lr": 3.932953422491669e-09, "epoch": 0.9746495102746303, "percentage": 97.46, "elapsed_time": "0:57:31", "remaining_time": "0:01:29", "throughput": 13885.1, "total_tokens": 47924736}
|
|
{"current_steps": 15230, "total_steps": 15621, "loss": 0.284, "lr": 3.8345733956345326e-09, "epoch": 0.9749695922156072, "percentage": 97.5, "elapsed_time": "0:57:32", "remaining_time": "0:01:28", "throughput": 13887.27, "total_tokens": 47941056}
|
|
{"current_steps": 15235, "total_steps": 15621, "loss": 0.3619, "lr": 3.737437093008777e-09, "epoch": 0.9752896741565841, "percentage": 97.53, "elapsed_time": "0:57:32", "remaining_time": "0:01:27", "throughput": 13889.52, "total_tokens": 47957824}
|
|
{"current_steps": 15240, "total_steps": 15621, "loss": 0.4107, "lr": 3.641544635890281e-09, "epoch": 0.975609756097561, "percentage": 97.56, "elapsed_time": "0:57:33", "remaining_time": "0:01:26", "throughput": 13891.5, "total_tokens": 47973056}
|
|
{"current_steps": 15245, "total_steps": 15621, "loss": 0.3896, "lr": 3.546896144001832e-09, "epoch": 0.9759298380385378, "percentage": 97.59, "elapsed_time": "0:57:34", "remaining_time": "0:01:25", "throughput": 13893.62, "total_tokens": 47988928}
|
|
{"current_steps": 15250, "total_steps": 15621, "loss": 0.3926, "lr": 3.4534917355132364e-09, "epoch": 0.9762499199795147, "percentage": 97.62, "elapsed_time": "0:57:34", "remaining_time": "0:01:24", "throughput": 13895.58, "total_tokens": 48004032}
|
|
{"current_steps": 15255, "total_steps": 15621, "loss": 0.4376, "lr": 3.361331527040878e-09, "epoch": 0.9765700019204916, "percentage": 97.66, "elapsed_time": "0:57:35", "remaining_time": "0:01:22", "throughput": 13897.82, "total_tokens": 48020800}
|
|
{"current_steps": 15260, "total_steps": 15621, "loss": 0.3935, "lr": 3.270415633647938e-09, "epoch": 0.9768900838614686, "percentage": 97.69, "elapsed_time": "0:57:35", "remaining_time": "0:01:21", "throughput": 13899.99, "total_tokens": 48036800}
|
|
{"current_steps": 15265, "total_steps": 15621, "loss": 0.2847, "lr": 3.180744168843952e-09, "epoch": 0.9772101658024455, "percentage": 97.72, "elapsed_time": "0:57:36", "remaining_time": "0:01:20", "throughput": 13901.85, "total_tokens": 48051264}
|
|
{"current_steps": 15270, "total_steps": 15621, "loss": 0.2318, "lr": 3.0923172445849187e-09, "epoch": 0.9775302477434223, "percentage": 97.75, "elapsed_time": "0:57:37", "remaining_time": "0:01:19", "throughput": 13903.81, "total_tokens": 48066176}
|
|
{"current_steps": 15275, "total_steps": 15621, "loss": 0.3178, "lr": 3.0051349712727493e-09, "epoch": 0.9778503296843992, "percentage": 97.79, "elapsed_time": "0:57:37", "remaining_time": "0:01:18", "throughput": 13905.9, "total_tokens": 48081984}
|
|
{"current_steps": 15280, "total_steps": 15621, "loss": 0.4072, "lr": 2.9191974577555954e-09, "epoch": 0.9781704116253761, "percentage": 97.82, "elapsed_time": "0:57:38", "remaining_time": "0:01:17", "throughput": 13907.84, "total_tokens": 48096896}
|
|
{"current_steps": 15285, "total_steps": 15621, "loss": 0.2334, "lr": 2.8345048113274096e-09, "epoch": 0.978490493566353, "percentage": 97.85, "elapsed_time": "0:57:38", "remaining_time": "0:01:16", "throughput": 13909.84, "total_tokens": 48112128}
|
|
{"current_steps": 15290, "total_steps": 15621, "loss": 0.3388, "lr": 2.751057137727941e-09, "epoch": 0.9788105755073299, "percentage": 97.88, "elapsed_time": "0:57:39", "remaining_time": "0:01:14", "throughput": 13911.89, "total_tokens": 48127616}
|
|
{"current_steps": 15295, "total_steps": 15621, "loss": 0.384, "lr": 2.66885454114274e-09, "epoch": 0.9791306574483067, "percentage": 97.91, "elapsed_time": "0:57:40", "remaining_time": "0:01:13", "throughput": 13913.78, "total_tokens": 48142144}
|
|
{"current_steps": 15300, "total_steps": 15621, "loss": 0.3776, "lr": 2.5878971242025983e-09, "epoch": 0.9794507393892836, "percentage": 97.95, "elapsed_time": "0:57:40", "remaining_time": "0:01:12", "throughput": 13915.92, "total_tokens": 48158272}
|
|
{"current_steps": 15305, "total_steps": 15621, "loss": 0.3239, "lr": 2.5081849879837746e-09, "epoch": 0.9797708213302605, "percentage": 97.98, "elapsed_time": "0:57:41", "remaining_time": "0:01:11", "throughput": 13917.84, "total_tokens": 48173120}
|
|
{"current_steps": 15310, "total_steps": 15621, "loss": 0.3428, "lr": 2.429718232007771e-09, "epoch": 0.9800909032712375, "percentage": 98.01, "elapsed_time": "0:57:41", "remaining_time": "0:01:10", "throughput": 13919.87, "total_tokens": 48188672}
|
|
{"current_steps": 15315, "total_steps": 15621, "loss": 0.2688, "lr": 2.3524969542414453e-09, "epoch": 0.9804109852122144, "percentage": 98.04, "elapsed_time": "0:57:42", "remaining_time": "0:01:09", "throughput": 13921.96, "total_tokens": 48204480}
|
|
{"current_steps": 15320, "total_steps": 15621, "loss": 0.3525, "lr": 2.2765212510963418e-09, "epoch": 0.9807310671531912, "percentage": 98.07, "elapsed_time": "0:57:43", "remaining_time": "0:01:08", "throughput": 13923.95, "total_tokens": 48219584}
|
|
{"current_steps": 15325, "total_steps": 15621, "loss": 0.2847, "lr": 2.2017912174289164e-09, "epoch": 0.9810511490941681, "percentage": 98.11, "elapsed_time": "0:57:43", "remaining_time": "0:01:06", "throughput": 13926.15, "total_tokens": 48235904}
|
|
{"current_steps": 15330, "total_steps": 15621, "loss": 0.4052, "lr": 2.128306946540648e-09, "epoch": 0.981371231035145, "percentage": 98.14, "elapsed_time": "0:57:44", "remaining_time": "0:01:05", "throughput": 13928.48, "total_tokens": 48252992}
|
|
{"current_steps": 15335, "total_steps": 15621, "loss": 0.3316, "lr": 2.0560685301774792e-09, "epoch": 0.9816913129761219, "percentage": 98.17, "elapsed_time": "0:57:44", "remaining_time": "0:01:04", "throughput": 13930.4, "total_tokens": 48267840}
|
|
{"current_steps": 15340, "total_steps": 15621, "loss": 0.3781, "lr": 1.985076058529933e-09, "epoch": 0.9820113949170988, "percentage": 98.2, "elapsed_time": "0:57:45", "remaining_time": "0:01:03", "throughput": 13932.32, "total_tokens": 48282688}
|
|
{"current_steps": 15345, "total_steps": 15621, "loss": 0.4768, "lr": 1.9153296202328863e-09, "epoch": 0.9823314768580756, "percentage": 98.23, "elapsed_time": "0:57:46", "remaining_time": "0:01:02", "throughput": 13934.71, "total_tokens": 48300096}
|
|
{"current_steps": 15350, "total_steps": 15621, "loss": 0.3929, "lr": 1.8468293023656823e-09, "epoch": 0.9826515587990525, "percentage": 98.27, "elapsed_time": "0:57:46", "remaining_time": "0:01:01", "throughput": 13936.67, "total_tokens": 48315136}
|
|
{"current_steps": 15355, "total_steps": 15621, "loss": 0.4052, "lr": 1.7795751904515766e-09, "epoch": 0.9829716407400294, "percentage": 98.3, "elapsed_time": "0:57:47", "remaining_time": "0:01:00", "throughput": 13938.65, "total_tokens": 48330240}
|
|
{"current_steps": 15360, "total_steps": 15621, "loss": 0.318, "lr": 1.7135673684584019e-09, "epoch": 0.9832917226810063, "percentage": 98.33, "elapsed_time": "0:57:47", "remaining_time": "0:00:58", "throughput": 13940.59, "total_tokens": 48345280}
|
|
{"current_steps": 15365, "total_steps": 15621, "loss": 0.3972, "lr": 1.6488059187974579e-09, "epoch": 0.9836118046219833, "percentage": 98.36, "elapsed_time": "0:57:48", "remaining_time": "0:00:57", "throughput": 13942.79, "total_tokens": 48361792}
|
|
{"current_steps": 15370, "total_steps": 15621, "loss": 0.4099, "lr": 1.5852909223242894e-09, "epoch": 0.9839318865629602, "percentage": 98.39, "elapsed_time": "0:57:49", "remaining_time": "0:00:56", "throughput": 13944.83, "total_tokens": 48377408}
|
|
{"current_steps": 15375, "total_steps": 15621, "loss": 0.3759, "lr": 1.5230224583380192e-09, "epoch": 0.984251968503937, "percentage": 98.43, "elapsed_time": "0:57:49", "remaining_time": "0:00:55", "throughput": 13946.86, "total_tokens": 48392896}
|
|
{"current_steps": 15380, "total_steps": 15621, "loss": 0.4663, "lr": 1.4620006045816813e-09, "epoch": 0.9845720504449139, "percentage": 98.46, "elapsed_time": "0:57:50", "remaining_time": "0:00:54", "throughput": 13948.75, "total_tokens": 48407552}
|
|
{"current_steps": 15385, "total_steps": 15621, "loss": 0.2785, "lr": 1.4022254372417774e-09, "epoch": 0.9848921323858908, "percentage": 98.49, "elapsed_time": "0:57:51", "remaining_time": "0:00:53", "throughput": 13951.0, "total_tokens": 48424320}
|
|
{"current_steps": 15390, "total_steps": 15621, "loss": 0.5093, "lr": 1.3436970309481655e-09, "epoch": 0.9852122143268677, "percentage": 98.52, "elapsed_time": "0:57:51", "remaining_time": "0:00:52", "throughput": 13953.4, "total_tokens": 48441984}
|
|
{"current_steps": 15395, "total_steps": 15621, "loss": 0.3442, "lr": 1.2864154587742815e-09, "epoch": 0.9855322962678446, "percentage": 98.55, "elapsed_time": "0:57:52", "remaining_time": "0:00:50", "throughput": 13955.31, "total_tokens": 48456832}
|
|
{"current_steps": 15400, "total_steps": 15621, "loss": 0.3608, "lr": 1.2303807922370292e-09, "epoch": 0.9858523782088214, "percentage": 98.59, "elapsed_time": "0:57:52", "remaining_time": "0:00:49", "throughput": 13957.36, "total_tokens": 48472512}
|
|
{"current_steps": 15405, "total_steps": 15621, "loss": 0.3122, "lr": 1.1755931012961128e-09, "epoch": 0.9861724601497983, "percentage": 98.62, "elapsed_time": "0:57:53", "remaining_time": "0:00:48", "throughput": 13959.51, "total_tokens": 48488832}
|
|
{"current_steps": 15410, "total_steps": 15621, "loss": 0.3491, "lr": 1.122052454354705e-09, "epoch": 0.9864925420907752, "percentage": 98.65, "elapsed_time": "0:57:54", "remaining_time": "0:00:47", "throughput": 13961.45, "total_tokens": 48503936}
|
|
{"current_steps": 15415, "total_steps": 15621, "loss": 0.4398, "lr": 1.0697589182590005e-09, "epoch": 0.9868126240317522, "percentage": 98.68, "elapsed_time": "0:57:54", "remaining_time": "0:00:46", "throughput": 13963.43, "total_tokens": 48519040}
|
|
{"current_steps": 15420, "total_steps": 15621, "loss": 0.5967, "lr": 1.018712558297996e-09, "epoch": 0.9871327059727291, "percentage": 98.71, "elapsed_time": "0:57:55", "remaining_time": "0:00:45", "throughput": 13965.55, "total_tokens": 48535040}
|
|
{"current_steps": 15425, "total_steps": 15621, "loss": 0.4383, "lr": 9.689134382037113e-10, "epoch": 0.9874527879137059, "percentage": 98.75, "elapsed_time": "0:57:55", "remaining_time": "0:00:44", "throughput": 13967.78, "total_tokens": 48551808}
|
|
{"current_steps": 15430, "total_steps": 15621, "loss": 0.3967, "lr": 9.203616201508557e-10, "epoch": 0.9877728698546828, "percentage": 98.78, "elapsed_time": "0:57:56", "remaining_time": "0:00:43", "throughput": 13969.71, "total_tokens": 48566592}
|
|
{"current_steps": 15435, "total_steps": 15621, "loss": 0.3159, "lr": 8.730571647570517e-10, "epoch": 0.9880929517956597, "percentage": 98.81, "elapsed_time": "0:57:57", "remaining_time": "0:00:41", "throughput": 13971.85, "total_tokens": 48582720}
|
|
{"current_steps": 15440, "total_steps": 15621, "loss": 0.4878, "lr": 8.270001310825003e-10, "epoch": 0.9884130337366366, "percentage": 98.84, "elapsed_time": "0:57:57", "remaining_time": "0:00:40", "throughput": 13974.02, "total_tokens": 48599104}
|
|
{"current_steps": 15445, "total_steps": 15621, "loss": 0.3118, "lr": 7.821905766297599e-10, "epoch": 0.9887331156776135, "percentage": 98.87, "elapsed_time": "0:57:58", "remaining_time": "0:00:39", "throughput": 13976.13, "total_tokens": 48615040}
|
|
{"current_steps": 15450, "total_steps": 15621, "loss": 0.3926, "lr": 7.386285573441897e-10, "epoch": 0.9890531976185903, "percentage": 98.91, "elapsed_time": "0:57:59", "remaining_time": "0:00:38", "throughput": 13978.22, "total_tokens": 48630976}
|
|
{"current_steps": 15455, "total_steps": 15621, "loss": 0.2862, "lr": 6.963141276136175e-10, "epoch": 0.9893732795595672, "percentage": 98.94, "elapsed_time": "0:57:59", "remaining_time": "0:00:37", "throughput": 13980.21, "total_tokens": 48646080}
|
|
{"current_steps": 15460, "total_steps": 15621, "loss": 0.2525, "lr": 6.552473402678949e-10, "epoch": 0.9896933615005441, "percentage": 98.97, "elapsed_time": "0:58:00", "remaining_time": "0:00:36", "throughput": 13982.4, "total_tokens": 48662528}
|
|
{"current_steps": 15465, "total_steps": 15621, "loss": 0.3301, "lr": 6.154282465794524e-10, "epoch": 0.990013443441521, "percentage": 99.0, "elapsed_time": "0:58:00", "remaining_time": "0:00:35", "throughput": 13984.77, "total_tokens": 48680000}
|
|
{"current_steps": 15470, "total_steps": 15621, "loss": 0.424, "lr": 5.768568962629672e-10, "epoch": 0.990333525382498, "percentage": 99.03, "elapsed_time": "0:58:01", "remaining_time": "0:00:33", "throughput": 13986.94, "total_tokens": 48696256}
|
|
{"current_steps": 15475, "total_steps": 15621, "loss": 0.3065, "lr": 5.395333374751398e-10, "epoch": 0.9906536073234748, "percentage": 99.07, "elapsed_time": "0:58:02", "remaining_time": "0:00:32", "throughput": 13988.87, "total_tokens": 48711168}
|
|
{"current_steps": 15480, "total_steps": 15621, "loss": 0.5309, "lr": 5.034576168149174e-10, "epoch": 0.9909736892644517, "percentage": 99.1, "elapsed_time": "0:58:02", "remaining_time": "0:00:31", "throughput": 13990.93, "total_tokens": 48726848}
|
|
{"current_steps": 15485, "total_steps": 15621, "loss": 0.4868, "lr": 4.686297793231597e-10, "epoch": 0.9912937712054286, "percentage": 99.13, "elapsed_time": "0:58:03", "remaining_time": "0:00:30", "throughput": 13993.12, "total_tokens": 48743232}
|
|
{"current_steps": 15490, "total_steps": 15621, "loss": 0.456, "lr": 4.350498684829729e-10, "epoch": 0.9916138531464055, "percentage": 99.16, "elapsed_time": "0:58:03", "remaining_time": "0:00:29", "throughput": 13995.04, "total_tokens": 48758080}
|
|
{"current_steps": 15495, "total_steps": 15621, "loss": 0.3105, "lr": 4.0271792621926483e-10, "epoch": 0.9919339350873824, "percentage": 99.19, "elapsed_time": "0:58:04", "remaining_time": "0:00:28", "throughput": 13996.99, "total_tokens": 48773120}
|
|
{"current_steps": 15500, "total_steps": 15621, "loss": 0.3815, "lr": 3.716339928987455e-10, "epoch": 0.9922540170283592, "percentage": 99.23, "elapsed_time": "0:58:05", "remaining_time": "0:00:27", "throughput": 13999.07, "total_tokens": 48789056}
|
|
{"current_steps": 15505, "total_steps": 15621, "loss": 0.4142, "lr": 3.41798107330149e-10, "epoch": 0.9925740989693361, "percentage": 99.26, "elapsed_time": "0:58:05", "remaining_time": "0:00:26", "throughput": 14001.03, "total_tokens": 48804288}
|
|
{"current_steps": 15510, "total_steps": 15621, "loss": 0.3715, "lr": 3.1321030676390027e-10, "epoch": 0.992894180910313, "percentage": 99.29, "elapsed_time": "0:58:06", "remaining_time": "0:00:24", "throughput": 14002.89, "total_tokens": 48818816}
|
|
{"current_steps": 15515, "total_steps": 15621, "loss": 0.2872, "lr": 2.8587062689222617e-10, "epoch": 0.9932142628512899, "percentage": 99.32, "elapsed_time": "0:58:06", "remaining_time": "0:00:23", "throughput": 14005.13, "total_tokens": 48835520}
|
|
{"current_steps": 15520, "total_steps": 15621, "loss": 0.3221, "lr": 2.5977910184904473e-10, "epoch": 0.9935343447922668, "percentage": 99.35, "elapsed_time": "0:58:07", "remaining_time": "0:00:22", "throughput": 14007.21, "total_tokens": 48851328}
|
|
{"current_steps": 15525, "total_steps": 15621, "loss": 0.3354, "lr": 2.3493576420985373e-10, "epoch": 0.9938544267332438, "percentage": 99.39, "elapsed_time": "0:58:08", "remaining_time": "0:00:21", "throughput": 14009.12, "total_tokens": 48866304}
|
|
{"current_steps": 15530, "total_steps": 15621, "loss": 0.3174, "lr": 2.11340644991842e-10, "epoch": 0.9941745086742206, "percentage": 99.42, "elapsed_time": "0:58:08", "remaining_time": "0:00:20", "throughput": 14011.3, "total_tokens": 48882752}
|
|
{"current_steps": 15535, "total_steps": 15621, "loss": 0.3041, "lr": 1.8899377365388936e-10, "epoch": 0.9944945906151975, "percentage": 99.45, "elapsed_time": "0:58:09", "remaining_time": "0:00:19", "throughput": 14013.31, "total_tokens": 48898304}
|
|
{"current_steps": 15540, "total_steps": 15621, "loss": 0.4202, "lr": 1.6789517809634447e-10, "epoch": 0.9948146725561744, "percentage": 99.48, "elapsed_time": "0:58:10", "remaining_time": "0:00:18", "throughput": 14015.37, "total_tokens": 48914048}
|
|
{"current_steps": 15545, "total_steps": 15621, "loss": 0.3127, "lr": 1.480448846609139e-10, "epoch": 0.9951347544971513, "percentage": 99.51, "elapsed_time": "0:58:10", "remaining_time": "0:00:17", "throughput": 14017.47, "total_tokens": 48930176}
|
|
{"current_steps": 15550, "total_steps": 15621, "loss": 0.3505, "lr": 1.294429181311063e-10, "epoch": 0.9954548364381282, "percentage": 99.55, "elapsed_time": "0:58:11", "remaining_time": "0:00:15", "throughput": 14019.5, "total_tokens": 48945920}
|
|
{"current_steps": 15555, "total_steps": 15621, "loss": 0.4079, "lr": 1.1208930173145503e-10, "epoch": 0.995774918379105, "percentage": 99.58, "elapsed_time": "0:58:11", "remaining_time": "0:00:14", "throughput": 14021.43, "total_tokens": 48960832}
|
|
{"current_steps": 15560, "total_steps": 15621, "loss": 0.3213, "lr": 9.598405712840651e-11, "epoch": 0.9960950003200819, "percentage": 99.61, "elapsed_time": "0:58:12", "remaining_time": "0:00:13", "throughput": 14023.56, "total_tokens": 48977280}
|
|
{"current_steps": 15565, "total_steps": 15621, "loss": 0.347, "lr": 8.1127204429432e-11, "epoch": 0.9964150822610588, "percentage": 99.64, "elapsed_time": "0:58:13", "remaining_time": "0:00:12", "throughput": 14025.47, "total_tokens": 48992512}
|
|
{"current_steps": 15570, "total_steps": 15621, "loss": 0.3524, "lr": 6.751876218336061e-11, "epoch": 0.9967351642020357, "percentage": 99.67, "elapsed_time": "0:58:13", "remaining_time": "0:00:11", "throughput": 14027.5, "total_tokens": 49008128}
|
|
{"current_steps": 15575, "total_steps": 15621, "loss": 0.3376, "lr": 5.515874738071247e-11, "epoch": 0.9970552461430127, "percentage": 99.71, "elapsed_time": "0:58:14", "remaining_time": "0:00:10", "throughput": 14029.66, "total_tokens": 49024512}
|
|
{"current_steps": 15580, "total_steps": 15621, "loss": 0.308, "lr": 4.404717545303249e-11, "epoch": 0.9973753280839895, "percentage": 99.74, "elapsed_time": "0:58:14", "remaining_time": "0:00:09", "throughput": 14031.67, "total_tokens": 49040128}
|
|
{"current_steps": 15585, "total_steps": 15621, "loss": 0.3099, "lr": 3.418406027322352e-11, "epoch": 0.9976954100249664, "percentage": 99.77, "elapsed_time": "0:58:15", "remaining_time": "0:00:08", "throughput": 14033.63, "total_tokens": 49055360}
|
|
{"current_steps": 15590, "total_steps": 15621, "loss": 0.3518, "lr": 2.5569414155546254e-11, "epoch": 0.9980154919659433, "percentage": 99.8, "elapsed_time": "0:58:16", "remaining_time": "0:00:06", "throughput": 14035.71, "total_tokens": 49071360}
|
|
{"current_steps": 15595, "total_steps": 15621, "loss": 0.2734, "lr": 1.8203247855397287e-11, "epoch": 0.9983355739069202, "percentage": 99.83, "elapsed_time": "0:58:16", "remaining_time": "0:00:05", "throughput": 14037.58, "total_tokens": 49086144}
|
|
{"current_steps": 15600, "total_steps": 15621, "loss": 0.395, "lr": 1.2085570569642101e-11, "epoch": 0.9986556558478971, "percentage": 99.87, "elapsed_time": "0:58:17", "remaining_time": "0:00:04", "throughput": 14039.52, "total_tokens": 49101312}
|
|
{"current_steps": 15605, "total_steps": 15621, "loss": 0.3097, "lr": 7.216389936171019e-12, "epoch": 0.9989757377888739, "percentage": 99.9, "elapsed_time": "0:58:17", "remaining_time": "0:00:03", "throughput": 14041.49, "total_tokens": 49116672}
|
|
{"current_steps": 15610, "total_steps": 15621, "loss": 0.1772, "lr": 3.5957120342322567e-12, "epoch": 0.9992958197298508, "percentage": 99.93, "elapsed_time": "0:58:18", "remaining_time": "0:00:02", "throughput": 14043.52, "total_tokens": 49132288}
|
|
{"current_steps": 15615, "total_steps": 15621, "loss": 0.3934, "lr": 1.2235413842098807e-12, "epoch": 0.9996159016708277, "percentage": 99.96, "elapsed_time": "0:58:19", "remaining_time": "0:00:01", "throughput": 14045.5, "total_tokens": 49148096}
|
|
{"current_steps": 15620, "total_steps": 15621, "loss": 0.2515, "lr": 9.98809480678986e-14, "epoch": 0.9999359836118046, "percentage": 99.99, "elapsed_time": "0:58:19", "remaining_time": "0:00:00", "throughput": 14047.56, "total_tokens": 49163840}
|
|
{"current_steps": 15621, "total_steps": 15621, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:59:06", "remaining_time": "0:00:00", "throughput": 13864.36, "total_tokens": 49166912}
|