3145 lines
758 KiB
JSON
3145 lines
758 KiB
JSON
{"current_steps": 5, "total_steps": 15621, "loss": 2.1538, "lr": 5.118362124120281e-09, "epoch": 0.0003200819409768901, "percentage": 0.03, "elapsed_time": "0:00:01", "remaining_time": "1:38:49", "throughput": 8089.76, "total_tokens": 15360}
|
|
{"current_steps": 10, "total_steps": 15621, "loss": 2.3529, "lr": 1.1516314779270634e-08, "epoch": 0.0006401638819537802, "percentage": 0.06, "elapsed_time": "0:00:02", "remaining_time": "1:07:18", "throughput": 12024.73, "total_tokens": 31104}
|
|
{"current_steps": 15, "total_steps": 15621, "loss": 2.1426, "lr": 1.7914267434420987e-08, "epoch": 0.0009602458229306702, "percentage": 0.1, "elapsed_time": "0:00:03", "remaining_time": "0:56:14", "throughput": 14246.73, "total_tokens": 46208}
|
|
{"current_steps": 20, "total_steps": 15621, "loss": 2.7631, "lr": 2.431222008957134e-08, "epoch": 0.0012803277639075604, "percentage": 0.13, "elapsed_time": "0:00:03", "remaining_time": "0:51:11", "throughput": 15865.7, "total_tokens": 62464}
|
|
{"current_steps": 25, "total_steps": 15621, "loss": 2.1363, "lr": 3.071017274472169e-08, "epoch": 0.0016004097048844504, "percentage": 0.16, "elapsed_time": "0:00:04", "remaining_time": "0:48:28", "throughput": 16965.52, "total_tokens": 79104}
|
|
{"current_steps": 30, "total_steps": 15621, "loss": 2.2293, "lr": 3.710812539987204e-08, "epoch": 0.0019204916458613404, "percentage": 0.19, "elapsed_time": "0:00:05", "remaining_time": "0:46:14", "throughput": 17775.24, "total_tokens": 94912}
|
|
{"current_steps": 35, "total_steps": 15621, "loss": 2.3549, "lr": 4.350607805502239e-08, "epoch": 0.0022405735868382304, "percentage": 0.22, "elapsed_time": "0:00:06", "remaining_time": "0:44:40", "throughput": 18401.99, "total_tokens": 110784}
|
|
{"current_steps": 40, "total_steps": 15621, "loss": 2.1602, "lr": 4.990403071017274e-08, "epoch": 0.002560655527815121, "percentage": 0.26, "elapsed_time": "0:00:06", "remaining_time": "0:43:17", "throughput": 18849.37, "total_tokens": 125696}
|
|
{"current_steps": 45, "total_steps": 15621, "loss": 2.1132, "lr": 5.6301983365323095e-08, "epoch": 0.002880737468792011, "percentage": 0.29, "elapsed_time": "0:00:07", "remaining_time": "0:42:12", "throughput": 19225.89, "total_tokens": 140672}
|
|
{"current_steps": 50, "total_steps": 15621, "loss": 2.0446, "lr": 6.269993602047345e-08, "epoch": 0.003200819409768901, "percentage": 0.32, "elapsed_time": "0:00:07", "remaining_time": "0:41:20", "throughput": 19518.19, "total_tokens": 155456}
|
|
{"current_steps": 55, "total_steps": 15621, "loss": 1.9213, "lr": 6.90978886756238e-08, "epoch": 0.003520901350745791, "percentage": 0.35, "elapsed_time": "0:00:08", "remaining_time": "0:40:43", "throughput": 19783.79, "total_tokens": 170816}
|
|
{"current_steps": 60, "total_steps": 15621, "loss": 2.0365, "lr": 7.549584133077414e-08, "epoch": 0.003840983291722681, "percentage": 0.38, "elapsed_time": "0:00:09", "remaining_time": "0:40:03", "throughput": 19975.64, "total_tokens": 185088}
|
|
{"current_steps": 65, "total_steps": 15621, "loss": 1.6514, "lr": 8.18937939859245e-08, "epoch": 0.004161065232699571, "percentage": 0.42, "elapsed_time": "0:00:09", "remaining_time": "0:39:36", "throughput": 20182.21, "total_tokens": 200384}
|
|
{"current_steps": 70, "total_steps": 15621, "loss": 1.6765, "lr": 8.829174664107485e-08, "epoch": 0.004481147173676461, "percentage": 0.45, "elapsed_time": "0:00:10", "remaining_time": "0:39:13", "throughput": 20362.21, "total_tokens": 215744}
|
|
{"current_steps": 75, "total_steps": 15621, "loss": 1.6609, "lr": 9.468969929622521e-08, "epoch": 0.004801229114653352, "percentage": 0.48, "elapsed_time": "0:00:11", "remaining_time": "0:38:51", "throughput": 20487.78, "total_tokens": 230400}
|
|
{"current_steps": 80, "total_steps": 15621, "loss": 1.326, "lr": 1.0108765195137556e-07, "epoch": 0.005121311055630242, "percentage": 0.51, "elapsed_time": "0:00:11", "remaining_time": "0:38:37", "throughput": 20669.1, "total_tokens": 246592}
|
|
{"current_steps": 85, "total_steps": 15621, "loss": 1.1489, "lr": 1.074856046065259e-07, "epoch": 0.005441392996607132, "percentage": 0.54, "elapsed_time": "0:00:12", "remaining_time": "0:38:23", "throughput": 20810.68, "total_tokens": 262272}
|
|
{"current_steps": 90, "total_steps": 15621, "loss": 1.0445, "lr": 1.1388355726167625e-07, "epoch": 0.005761474937584022, "percentage": 0.58, "elapsed_time": "0:00:13", "remaining_time": "0:38:10", "throughput": 20927.01, "total_tokens": 277760}
|
|
{"current_steps": 95, "total_steps": 15621, "loss": 1.2459, "lr": 1.202815099168266e-07, "epoch": 0.006081556878560912, "percentage": 0.61, "elapsed_time": "0:00:13", "remaining_time": "0:37:56", "throughput": 21030.82, "total_tokens": 292992}
|
|
{"current_steps": 100, "total_steps": 15621, "loss": 1.1182, "lr": 1.2667946257197694e-07, "epoch": 0.006401638819537802, "percentage": 0.64, "elapsed_time": "0:00:14", "remaining_time": "0:37:41", "throughput": 21126.68, "total_tokens": 307840}
|
|
{"current_steps": 105, "total_steps": 15621, "loss": 1.0297, "lr": 1.3307741522712732e-07, "epoch": 0.006721720760514692, "percentage": 0.67, "elapsed_time": "0:00:15", "remaining_time": "0:37:29", "throughput": 21216.69, "total_tokens": 323008}
|
|
{"current_steps": 110, "total_steps": 15621, "loss": 1.051, "lr": 1.3947536788227767e-07, "epoch": 0.007041802701491582, "percentage": 0.7, "elapsed_time": "0:00:15", "remaining_time": "0:37:24", "throughput": 21324.08, "total_tokens": 339456}
|
|
{"current_steps": 115, "total_steps": 15621, "loss": 1.1295, "lr": 1.45873320537428e-07, "epoch": 0.007361884642468472, "percentage": 0.74, "elapsed_time": "0:00:16", "remaining_time": "0:37:15", "throughput": 21396.2, "total_tokens": 354816}
|
|
{"current_steps": 120, "total_steps": 15621, "loss": 0.8247, "lr": 1.5227127319257838e-07, "epoch": 0.007681966583445362, "percentage": 0.77, "elapsed_time": "0:00:17", "remaining_time": "0:37:04", "throughput": 21453.95, "total_tokens": 369472}
|
|
{"current_steps": 125, "total_steps": 15621, "loss": 0.9577, "lr": 1.586692258477287e-07, "epoch": 0.008002048524422252, "percentage": 0.8, "elapsed_time": "0:00:17", "remaining_time": "0:36:56", "throughput": 21515.33, "total_tokens": 384768}
|
|
{"current_steps": 130, "total_steps": 15621, "loss": 1.0328, "lr": 1.6506717850287908e-07, "epoch": 0.008322130465399142, "percentage": 0.83, "elapsed_time": "0:00:18", "remaining_time": "0:36:50", "throughput": 21577.63, "total_tokens": 400192}
|
|
{"current_steps": 135, "total_steps": 15621, "loss": 0.8953, "lr": 1.7146513115802943e-07, "epoch": 0.008642212406376032, "percentage": 0.86, "elapsed_time": "0:00:19", "remaining_time": "0:36:46", "throughput": 21655.51, "total_tokens": 416640}
|
|
{"current_steps": 140, "total_steps": 15621, "loss": 0.8265, "lr": 1.7786308381317976e-07, "epoch": 0.008962294347352922, "percentage": 0.9, "elapsed_time": "0:00:19", "remaining_time": "0:36:42", "throughput": 21718.54, "total_tokens": 432640}
|
|
{"current_steps": 145, "total_steps": 15621, "loss": 0.8983, "lr": 1.8426103646833014e-07, "epoch": 0.009282376288329812, "percentage": 0.93, "elapsed_time": "0:00:20", "remaining_time": "0:36:39", "throughput": 21769.13, "total_tokens": 448640}
|
|
{"current_steps": 150, "total_steps": 15621, "loss": 0.9503, "lr": 1.9065898912348046e-07, "epoch": 0.009602458229306703, "percentage": 0.96, "elapsed_time": "0:00:21", "remaining_time": "0:36:35", "throughput": 21818.09, "total_tokens": 464448}
|
|
{"current_steps": 155, "total_steps": 15621, "loss": 0.858, "lr": 1.9705694177863084e-07, "epoch": 0.009922540170283593, "percentage": 0.99, "elapsed_time": "0:00:21", "remaining_time": "0:36:29", "throughput": 21853.99, "total_tokens": 479488}
|
|
{"current_steps": 160, "total_steps": 15621, "loss": 0.7304, "lr": 2.034548944337812e-07, "epoch": 0.010242622111260483, "percentage": 1.02, "elapsed_time": "0:00:22", "remaining_time": "0:36:25", "throughput": 21900.34, "total_tokens": 495296}
|
|
{"current_steps": 165, "total_steps": 15621, "loss": 0.7848, "lr": 2.0985284708893152e-07, "epoch": 0.010562704052237373, "percentage": 1.06, "elapsed_time": "0:00:23", "remaining_time": "0:36:18", "throughput": 21935.33, "total_tokens": 510144}
|
|
{"current_steps": 170, "total_steps": 15621, "loss": 0.8469, "lr": 2.162507997440819e-07, "epoch": 0.010882785993214263, "percentage": 1.09, "elapsed_time": "0:00:23", "remaining_time": "0:36:12", "throughput": 21959.47, "total_tokens": 524928}
|
|
{"current_steps": 175, "total_steps": 15621, "loss": 0.7184, "lr": 2.2264875239923222e-07, "epoch": 0.011202867934191153, "percentage": 1.12, "elapsed_time": "0:00:24", "remaining_time": "0:36:11", "throughput": 22007.51, "total_tokens": 541504}
|
|
{"current_steps": 180, "total_steps": 15621, "loss": 0.7354, "lr": 2.290467050543826e-07, "epoch": 0.011522949875168043, "percentage": 1.15, "elapsed_time": "0:00:25", "remaining_time": "0:36:05", "throughput": 22031.18, "total_tokens": 556096}
|
|
{"current_steps": 185, "total_steps": 15621, "loss": 0.7351, "lr": 2.3544465770953295e-07, "epoch": 0.011843031816144933, "percentage": 1.18, "elapsed_time": "0:00:25", "remaining_time": "0:36:04", "throughput": 22076.87, "total_tokens": 572736}
|
|
{"current_steps": 190, "total_steps": 15621, "loss": 0.853, "lr": 2.418426103646833e-07, "epoch": 0.012163113757121823, "percentage": 1.22, "elapsed_time": "0:00:26", "remaining_time": "0:36:01", "throughput": 22106.49, "total_tokens": 588352}
|
|
{"current_steps": 195, "total_steps": 15621, "loss": 0.9742, "lr": 2.4824056301983363e-07, "epoch": 0.012483195698098713, "percentage": 1.25, "elapsed_time": "0:00:27", "remaining_time": "0:35:57", "throughput": 22129.73, "total_tokens": 603520}
|
|
{"current_steps": 200, "total_steps": 15621, "loss": 0.8167, "lr": 2.54638515674984e-07, "epoch": 0.012803277639075603, "percentage": 1.28, "elapsed_time": "0:00:27", "remaining_time": "0:35:55", "throughput": 22158.86, "total_tokens": 619392}
|
|
{"current_steps": 205, "total_steps": 15621, "loss": 0.7996, "lr": 2.6103646833013433e-07, "epoch": 0.013123359580052493, "percentage": 1.31, "elapsed_time": "0:00:28", "remaining_time": "0:35:53", "throughput": 22194.59, "total_tokens": 635456}
|
|
{"current_steps": 210, "total_steps": 15621, "loss": 0.8732, "lr": 2.6743442098528466e-07, "epoch": 0.013443441521029383, "percentage": 1.34, "elapsed_time": "0:00:29", "remaining_time": "0:35:49", "throughput": 22217.52, "total_tokens": 650880}
|
|
{"current_steps": 215, "total_steps": 15621, "loss": 0.8244, "lr": 2.7383237364043504e-07, "epoch": 0.013763523462006273, "percentage": 1.38, "elapsed_time": "0:00:29", "remaining_time": "0:35:48", "throughput": 22239.4, "total_tokens": 666688}
|
|
{"current_steps": 220, "total_steps": 15621, "loss": 0.7909, "lr": 2.802303262955854e-07, "epoch": 0.014083605402983163, "percentage": 1.41, "elapsed_time": "0:00:30", "remaining_time": "0:35:44", "throughput": 22261.79, "total_tokens": 682112}
|
|
{"current_steps": 225, "total_steps": 15621, "loss": 0.8318, "lr": 2.866282789507358e-07, "epoch": 0.014403687343960053, "percentage": 1.44, "elapsed_time": "0:00:31", "remaining_time": "0:35:42", "throughput": 22286.5, "total_tokens": 697728}
|
|
{"current_steps": 230, "total_steps": 15621, "loss": 0.6828, "lr": 2.9302623160588607e-07, "epoch": 0.014723769284936943, "percentage": 1.47, "elapsed_time": "0:00:31", "remaining_time": "0:35:37", "throughput": 22307.42, "total_tokens": 712704}
|
|
{"current_steps": 235, "total_steps": 15621, "loss": 0.9689, "lr": 2.9942418426103644e-07, "epoch": 0.015043851225913833, "percentage": 1.5, "elapsed_time": "0:00:32", "remaining_time": "0:35:37", "throughput": 22337.18, "total_tokens": 729408}
|
|
{"current_steps": 240, "total_steps": 15621, "loss": 0.7854, "lr": 3.058221369161868e-07, "epoch": 0.015363933166890723, "percentage": 1.54, "elapsed_time": "0:00:33", "remaining_time": "0:35:36", "throughput": 22360.45, "total_tokens": 745344}
|
|
{"current_steps": 245, "total_steps": 15621, "loss": 0.7013, "lr": 3.1222008957133715e-07, "epoch": 0.015684015107867613, "percentage": 1.57, "elapsed_time": "0:00:34", "remaining_time": "0:35:36", "throughput": 22400.54, "total_tokens": 762688}
|
|
{"current_steps": 250, "total_steps": 15621, "loss": 0.7128, "lr": 3.186180422264875e-07, "epoch": 0.016004097048844503, "percentage": 1.6, "elapsed_time": "0:00:34", "remaining_time": "0:35:36", "throughput": 22426.34, "total_tokens": 779392}
|
|
{"current_steps": 255, "total_steps": 15621, "loss": 0.7911, "lr": 3.2501599488163785e-07, "epoch": 0.016324178989821393, "percentage": 1.63, "elapsed_time": "0:00:35", "remaining_time": "0:35:32", "throughput": 22437.61, "total_tokens": 794112}
|
|
{"current_steps": 260, "total_steps": 15621, "loss": 0.8429, "lr": 3.314139475367882e-07, "epoch": 0.016644260930798283, "percentage": 1.66, "elapsed_time": "0:00:36", "remaining_time": "0:35:31", "throughput": 22459.48, "total_tokens": 810112}
|
|
{"current_steps": 265, "total_steps": 15621, "loss": 0.8602, "lr": 3.3781190019193855e-07, "epoch": 0.016964342871775173, "percentage": 1.7, "elapsed_time": "0:00:36", "remaining_time": "0:35:28", "throughput": 22473.94, "total_tokens": 825472}
|
|
{"current_steps": 270, "total_steps": 15621, "loss": 0.9818, "lr": 3.4420985284708893e-07, "epoch": 0.017284424812752063, "percentage": 1.73, "elapsed_time": "0:00:37", "remaining_time": "0:35:24", "throughput": 22481.36, "total_tokens": 840128}
|
|
{"current_steps": 275, "total_steps": 15621, "loss": 0.7846, "lr": 3.5060780550223926e-07, "epoch": 0.017604506753728953, "percentage": 1.76, "elapsed_time": "0:00:38", "remaining_time": "0:35:21", "throughput": 22491.1, "total_tokens": 855104}
|
|
{"current_steps": 280, "total_steps": 15621, "loss": 0.8039, "lr": 3.570057581573896e-07, "epoch": 0.017924588694705843, "percentage": 1.79, "elapsed_time": "0:00:38", "remaining_time": "0:35:20", "throughput": 22504.33, "total_tokens": 870848}
|
|
{"current_steps": 285, "total_steps": 15621, "loss": 0.7489, "lr": 3.6340371081253996e-07, "epoch": 0.018244670635682733, "percentage": 1.82, "elapsed_time": "0:00:39", "remaining_time": "0:35:16", "throughput": 22514.64, "total_tokens": 885760}
|
|
{"current_steps": 290, "total_steps": 15621, "loss": 0.705, "lr": 3.6980166346769034e-07, "epoch": 0.018564752576659623, "percentage": 1.86, "elapsed_time": "0:00:39", "remaining_time": "0:35:14", "throughput": 22524.41, "total_tokens": 900928}
|
|
{"current_steps": 295, "total_steps": 15621, "loss": 0.7869, "lr": 3.7619961612284067e-07, "epoch": 0.018884834517636517, "percentage": 1.89, "elapsed_time": "0:00:40", "remaining_time": "0:35:11", "throughput": 22538.15, "total_tokens": 915968}
|
|
{"current_steps": 300, "total_steps": 15621, "loss": 0.9906, "lr": 3.8259756877799104e-07, "epoch": 0.019204916458613407, "percentage": 1.92, "elapsed_time": "0:00:41", "remaining_time": "0:35:11", "throughput": 22567.41, "total_tokens": 933056}
|
|
{"current_steps": 305, "total_steps": 15621, "loss": 0.737, "lr": 3.889955214331414e-07, "epoch": 0.019524998399590297, "percentage": 1.95, "elapsed_time": "0:00:42", "remaining_time": "0:35:09", "throughput": 22576.97, "total_tokens": 948416}
|
|
{"current_steps": 310, "total_steps": 15621, "loss": 0.7708, "lr": 3.953934740882917e-07, "epoch": 0.019845080340567187, "percentage": 1.98, "elapsed_time": "0:00:42", "remaining_time": "0:35:06", "throughput": 22581.04, "total_tokens": 962880}
|
|
{"current_steps": 315, "total_steps": 15621, "loss": 0.8126, "lr": 4.0179142674344207e-07, "epoch": 0.020165162281544077, "percentage": 2.02, "elapsed_time": "0:00:43", "remaining_time": "0:35:06", "throughput": 22603.14, "total_tokens": 979904}
|
|
{"current_steps": 320, "total_steps": 15621, "loss": 0.8299, "lr": 4.0818937939859245e-07, "epoch": 0.020485244222520967, "percentage": 2.05, "elapsed_time": "0:00:44", "remaining_time": "0:35:04", "throughput": 22609.5, "total_tokens": 995136}
|
|
{"current_steps": 325, "total_steps": 15621, "loss": 0.771, "lr": 4.145873320537428e-07, "epoch": 0.020805326163497857, "percentage": 2.08, "elapsed_time": "0:00:44", "remaining_time": "0:35:03", "throughput": 22622.46, "total_tokens": 1011008}
|
|
{"current_steps": 330, "total_steps": 15621, "loss": 0.831, "lr": 4.2098528470889315e-07, "epoch": 0.021125408104474747, "percentage": 2.11, "elapsed_time": "0:00:45", "remaining_time": "0:35:01", "throughput": 22623.01, "total_tokens": 1025792}
|
|
{"current_steps": 335, "total_steps": 15621, "loss": 0.6929, "lr": 4.273832373640435e-07, "epoch": 0.021445490045451637, "percentage": 2.14, "elapsed_time": "0:00:46", "remaining_time": "0:35:01", "throughput": 22643.26, "total_tokens": 1042944}
|
|
{"current_steps": 340, "total_steps": 15621, "loss": 0.8271, "lr": 4.3378119001919386e-07, "epoch": 0.021765571986428527, "percentage": 2.18, "elapsed_time": "0:00:46", "remaining_time": "0:35:00", "throughput": 22653.2, "total_tokens": 1058688}
|
|
{"current_steps": 345, "total_steps": 15621, "loss": 0.7202, "lr": 4.401791426743442e-07, "epoch": 0.022085653927405417, "percentage": 2.21, "elapsed_time": "0:00:47", "remaining_time": "0:34:59", "throughput": 22663.56, "total_tokens": 1074560}
|
|
{"current_steps": 350, "total_steps": 15621, "loss": 0.6947, "lr": 4.4657709532949456e-07, "epoch": 0.022405735868382307, "percentage": 2.24, "elapsed_time": "0:00:48", "remaining_time": "0:34:57", "throughput": 22669.54, "total_tokens": 1089728}
|
|
{"current_steps": 355, "total_steps": 15621, "loss": 0.9107, "lr": 4.5297504798464494e-07, "epoch": 0.022725817809359197, "percentage": 2.27, "elapsed_time": "0:00:48", "remaining_time": "0:34:55", "throughput": 22676.44, "total_tokens": 1105024}
|
|
{"current_steps": 360, "total_steps": 15621, "loss": 0.7831, "lr": 4.593730006397952e-07, "epoch": 0.023045899750336087, "percentage": 2.3, "elapsed_time": "0:00:49", "remaining_time": "0:34:54", "throughput": 22686.17, "total_tokens": 1121088}
|
|
{"current_steps": 365, "total_steps": 15621, "loss": 0.7062, "lr": 4.657709532949456e-07, "epoch": 0.023365981691312977, "percentage": 2.34, "elapsed_time": "0:00:50", "remaining_time": "0:34:53", "throughput": 22697.86, "total_tokens": 1136896}
|
|
{"current_steps": 370, "total_steps": 15621, "loss": 0.7133, "lr": 4.7216890595009597e-07, "epoch": 0.023686063632289867, "percentage": 2.37, "elapsed_time": "0:00:50", "remaining_time": "0:34:53", "throughput": 22710.36, "total_tokens": 1153280}
|
|
{"current_steps": 375, "total_steps": 15621, "loss": 0.8307, "lr": 4.785668586052463e-07, "epoch": 0.024006145573266757, "percentage": 2.4, "elapsed_time": "0:00:51", "remaining_time": "0:34:52", "throughput": 22722.94, "total_tokens": 1169536}
|
|
{"current_steps": 380, "total_steps": 15621, "loss": 0.7573, "lr": 4.849648112603967e-07, "epoch": 0.024326227514243647, "percentage": 2.43, "elapsed_time": "0:00:52", "remaining_time": "0:34:51", "throughput": 22728.9, "total_tokens": 1185088}
|
|
{"current_steps": 385, "total_steps": 15621, "loss": 0.6599, "lr": 4.91362763915547e-07, "epoch": 0.024646309455220537, "percentage": 2.46, "elapsed_time": "0:00:52", "remaining_time": "0:34:50", "throughput": 22732.94, "total_tokens": 1200832}
|
|
{"current_steps": 390, "total_steps": 15621, "loss": 0.6614, "lr": 4.977607165706974e-07, "epoch": 0.024966391396197427, "percentage": 2.5, "elapsed_time": "0:00:53", "remaining_time": "0:34:48", "throughput": 22741.12, "total_tokens": 1216320}
|
|
{"current_steps": 395, "total_steps": 15621, "loss": 0.7446, "lr": 5.041586692258478e-07, "epoch": 0.025286473337174317, "percentage": 2.53, "elapsed_time": "0:00:54", "remaining_time": "0:34:48", "throughput": 22749.93, "total_tokens": 1232832}
|
|
{"current_steps": 400, "total_steps": 15621, "loss": 0.7023, "lr": 5.10556621880998e-07, "epoch": 0.025606555278151207, "percentage": 2.56, "elapsed_time": "0:00:54", "remaining_time": "0:34:47", "throughput": 22757.08, "total_tokens": 1248384}
|
|
{"current_steps": 405, "total_steps": 15621, "loss": 0.672, "lr": 5.169545745361484e-07, "epoch": 0.025926637219128097, "percentage": 2.59, "elapsed_time": "0:00:55", "remaining_time": "0:34:45", "throughput": 22765.31, "total_tokens": 1263936}
|
|
{"current_steps": 410, "total_steps": 15621, "loss": 1.1224, "lr": 5.233525271912988e-07, "epoch": 0.026246719160104987, "percentage": 2.62, "elapsed_time": "0:00:56", "remaining_time": "0:35:09", "throughput": 22756.92, "total_tokens": 1294208}
|
|
{"current_steps": 415, "total_steps": 15621, "loss": 0.7874, "lr": 5.297504798464492e-07, "epoch": 0.026566801101081877, "percentage": 2.66, "elapsed_time": "0:00:57", "remaining_time": "0:35:07", "throughput": 22761.38, "total_tokens": 1309120}
|
|
{"current_steps": 420, "total_steps": 15621, "loss": 0.8574, "lr": 5.361484325015994e-07, "epoch": 0.026886883042058767, "percentage": 2.69, "elapsed_time": "0:00:58", "remaining_time": "0:35:05", "throughput": 22766.09, "total_tokens": 1324224}
|
|
{"current_steps": 425, "total_steps": 15621, "loss": 0.683, "lr": 5.425463851567498e-07, "epoch": 0.027206964983035656, "percentage": 2.72, "elapsed_time": "0:00:58", "remaining_time": "0:35:05", "throughput": 22777.8, "total_tokens": 1341056}
|
|
{"current_steps": 430, "total_steps": 15621, "loss": 0.7444, "lr": 5.489443378119002e-07, "epoch": 0.027527046924012546, "percentage": 2.75, "elapsed_time": "0:00:59", "remaining_time": "0:35:03", "throughput": 22782.36, "total_tokens": 1356544}
|
|
{"current_steps": 435, "total_steps": 15621, "loss": 0.6717, "lr": 5.553422904670505e-07, "epoch": 0.027847128864989436, "percentage": 2.78, "elapsed_time": "0:01:00", "remaining_time": "0:35:01", "throughput": 22786.66, "total_tokens": 1371840}
|
|
{"current_steps": 440, "total_steps": 15621, "loss": 0.6805, "lr": 5.61740243122201e-07, "epoch": 0.028167210805966326, "percentage": 2.82, "elapsed_time": "0:01:00", "remaining_time": "0:34:59", "throughput": 22789.83, "total_tokens": 1386816}
|
|
{"current_steps": 445, "total_steps": 15621, "loss": 0.744, "lr": 5.681381957773512e-07, "epoch": 0.028487292746943216, "percentage": 2.85, "elapsed_time": "0:01:01", "remaining_time": "0:34:57", "throughput": 22793.51, "total_tokens": 1401792}
|
|
{"current_steps": 450, "total_steps": 15621, "loss": 0.6219, "lr": 5.745361484325015e-07, "epoch": 0.028807374687920106, "percentage": 2.88, "elapsed_time": "0:01:02", "remaining_time": "0:34:55", "throughput": 22797.26, "total_tokens": 1416896}
|
|
{"current_steps": 455, "total_steps": 15621, "loss": 0.7479, "lr": 5.80934101087652e-07, "epoch": 0.029127456628896996, "percentage": 2.91, "elapsed_time": "0:01:02", "remaining_time": "0:34:54", "throughput": 22801.44, "total_tokens": 1432704}
|
|
{"current_steps": 460, "total_steps": 15621, "loss": 0.7009, "lr": 5.873320537428022e-07, "epoch": 0.029447538569873886, "percentage": 2.94, "elapsed_time": "0:01:03", "remaining_time": "0:34:53", "throughput": 22805.41, "total_tokens": 1448384}
|
|
{"current_steps": 465, "total_steps": 15621, "loss": 0.7189, "lr": 5.937300063979526e-07, "epoch": 0.029767620510850776, "percentage": 2.98, "elapsed_time": "0:01:04", "remaining_time": "0:34:52", "throughput": 22816.13, "total_tokens": 1464832}
|
|
{"current_steps": 470, "total_steps": 15621, "loss": 0.678, "lr": 6.00127959053103e-07, "epoch": 0.030087702451827666, "percentage": 3.01, "elapsed_time": "0:01:04", "remaining_time": "0:34:50", "throughput": 22815.42, "total_tokens": 1479424}
|
|
{"current_steps": 475, "total_steps": 15621, "loss": 0.7233, "lr": 6.065259117082533e-07, "epoch": 0.030407784392804556, "percentage": 3.04, "elapsed_time": "0:01:05", "remaining_time": "0:34:48", "throughput": 22816.5, "total_tokens": 1494336}
|
|
{"current_steps": 480, "total_steps": 15621, "loss": 0.6771, "lr": 6.129238643634037e-07, "epoch": 0.030727866333781446, "percentage": 3.07, "elapsed_time": "0:01:06", "remaining_time": "0:34:46", "throughput": 22817.77, "total_tokens": 1509184}
|
|
{"current_steps": 485, "total_steps": 15621, "loss": 0.8088, "lr": 6.19321817018554e-07, "epoch": 0.031047948274758336, "percentage": 3.1, "elapsed_time": "0:01:06", "remaining_time": "0:34:45", "throughput": 22825.17, "total_tokens": 1525504}
|
|
{"current_steps": 490, "total_steps": 15621, "loss": 0.5954, "lr": 6.257197696737044e-07, "epoch": 0.031368030215735226, "percentage": 3.14, "elapsed_time": "0:01:07", "remaining_time": "0:34:44", "throughput": 22832.99, "total_tokens": 1541504}
|
|
{"current_steps": 495, "total_steps": 15621, "loss": 0.6166, "lr": 6.321177223288548e-07, "epoch": 0.03168811215671212, "percentage": 3.17, "elapsed_time": "0:01:08", "remaining_time": "0:34:43", "throughput": 22838.98, "total_tokens": 1557184}
|
|
{"current_steps": 500, "total_steps": 15621, "loss": 0.7699, "lr": 6.385156749840051e-07, "epoch": 0.032008194097689006, "percentage": 3.2, "elapsed_time": "0:01:08", "remaining_time": "0:34:42", "throughput": 22847.08, "total_tokens": 1573440}
|
|
{"current_steps": 505, "total_steps": 15621, "loss": 0.8718, "lr": 6.449136276391554e-07, "epoch": 0.0323282760386659, "percentage": 3.23, "elapsed_time": "0:01:09", "remaining_time": "0:34:41", "throughput": 22851.08, "total_tokens": 1588736}
|
|
{"current_steps": 510, "total_steps": 15621, "loss": 0.6977, "lr": 6.513115802943058e-07, "epoch": 0.032648357979642786, "percentage": 3.26, "elapsed_time": "0:01:10", "remaining_time": "0:34:39", "throughput": 22856.01, "total_tokens": 1604352}
|
|
{"current_steps": 515, "total_steps": 15621, "loss": 0.6582, "lr": 6.577095329494562e-07, "epoch": 0.03296843992061968, "percentage": 3.3, "elapsed_time": "0:01:10", "remaining_time": "0:34:37", "throughput": 22856.19, "total_tokens": 1618816}
|
|
{"current_steps": 520, "total_steps": 15621, "loss": 0.7409, "lr": 6.641074856046065e-07, "epoch": 0.033288521861596566, "percentage": 3.33, "elapsed_time": "0:01:11", "remaining_time": "0:34:37", "throughput": 22868.94, "total_tokens": 1635648}
|
|
{"current_steps": 525, "total_steps": 15621, "loss": 0.7248, "lr": 6.705054382597568e-07, "epoch": 0.03360860380257346, "percentage": 3.36, "elapsed_time": "0:01:12", "remaining_time": "0:34:36", "throughput": 22871.74, "total_tokens": 1651328}
|
|
{"current_steps": 530, "total_steps": 15621, "loss": 0.7271, "lr": 6.769033909149072e-07, "epoch": 0.033928685743550346, "percentage": 3.39, "elapsed_time": "0:01:12", "remaining_time": "0:34:36", "throughput": 22883.84, "total_tokens": 1668928}
|
|
{"current_steps": 535, "total_steps": 15621, "loss": 0.6188, "lr": 6.833013435700575e-07, "epoch": 0.03424876768452724, "percentage": 3.42, "elapsed_time": "0:01:13", "remaining_time": "0:34:36", "throughput": 22891.17, "total_tokens": 1685504}
|
|
{"current_steps": 540, "total_steps": 15621, "loss": 0.7016, "lr": 6.89699296225208e-07, "epoch": 0.034568849625504126, "percentage": 3.46, "elapsed_time": "0:01:14", "remaining_time": "0:34:35", "throughput": 22898.17, "total_tokens": 1701952}
|
|
{"current_steps": 545, "total_steps": 15621, "loss": 0.7293, "lr": 6.960972488803583e-07, "epoch": 0.03488893156648102, "percentage": 3.49, "elapsed_time": "0:01:14", "remaining_time": "0:34:34", "throughput": 22899.64, "total_tokens": 1716992}
|
|
{"current_steps": 550, "total_steps": 15621, "loss": 0.583, "lr": 7.024952015355085e-07, "epoch": 0.035209013507457906, "percentage": 3.52, "elapsed_time": "0:01:15", "remaining_time": "0:34:32", "throughput": 22899.33, "total_tokens": 1732160}
|
|
{"current_steps": 555, "total_steps": 15621, "loss": 0.656, "lr": 7.08893154190659e-07, "epoch": 0.0355290954484348, "percentage": 3.55, "elapsed_time": "0:01:16", "remaining_time": "0:34:32", "throughput": 22904.56, "total_tokens": 1748416}
|
|
{"current_steps": 560, "total_steps": 15621, "loss": 0.7074, "lr": 7.152911068458093e-07, "epoch": 0.035849177389411686, "percentage": 3.58, "elapsed_time": "0:01:16", "remaining_time": "0:34:30", "throughput": 22906.88, "total_tokens": 1763776}
|
|
{"current_steps": 565, "total_steps": 15621, "loss": 0.7046, "lr": 7.216890595009597e-07, "epoch": 0.03616925933038858, "percentage": 3.62, "elapsed_time": "0:01:17", "remaining_time": "0:34:30", "throughput": 22914.21, "total_tokens": 1780160}
|
|
{"current_steps": 570, "total_steps": 15621, "loss": 0.6024, "lr": 7.2808701215611e-07, "epoch": 0.036489341271365466, "percentage": 3.65, "elapsed_time": "0:01:18", "remaining_time": "0:34:29", "throughput": 22919.37, "total_tokens": 1795968}
|
|
{"current_steps": 575, "total_steps": 15621, "loss": 0.6319, "lr": 7.344849648112603e-07, "epoch": 0.03680942321234236, "percentage": 3.68, "elapsed_time": "0:01:19", "remaining_time": "0:34:31", "throughput": 22936.59, "total_tokens": 1815424}
|
|
{"current_steps": 580, "total_steps": 15621, "loss": 0.8256, "lr": 7.408829174664107e-07, "epoch": 0.037129505153319246, "percentage": 3.71, "elapsed_time": "0:01:19", "remaining_time": "0:34:30", "throughput": 22941.71, "total_tokens": 1831936}
|
|
{"current_steps": 585, "total_steps": 15621, "loss": 0.6147, "lr": 7.472808701215611e-07, "epoch": 0.03744958709429614, "percentage": 3.74, "elapsed_time": "0:01:20", "remaining_time": "0:34:29", "throughput": 22942.23, "total_tokens": 1847424}
|
|
{"current_steps": 590, "total_steps": 15621, "loss": 0.7338, "lr": 7.536788227767114e-07, "epoch": 0.03776966903527303, "percentage": 3.78, "elapsed_time": "0:01:21", "remaining_time": "0:34:28", "throughput": 22942.12, "total_tokens": 1862400}
|
|
{"current_steps": 595, "total_steps": 15621, "loss": 0.7698, "lr": 7.600767754318617e-07, "epoch": 0.03808975097624992, "percentage": 3.81, "elapsed_time": "0:01:21", "remaining_time": "0:34:26", "throughput": 22942.22, "total_tokens": 1876928}
|
|
{"current_steps": 600, "total_steps": 15621, "loss": 0.6403, "lr": 7.664747280870121e-07, "epoch": 0.03840983291722681, "percentage": 3.84, "elapsed_time": "0:01:22", "remaining_time": "0:34:25", "throughput": 22943.97, "total_tokens": 1892608}
|
|
{"current_steps": 605, "total_steps": 15621, "loss": 0.7287, "lr": 7.728726807421625e-07, "epoch": 0.0387299148582037, "percentage": 3.87, "elapsed_time": "0:01:23", "remaining_time": "0:34:25", "throughput": 22952.95, "total_tokens": 1909696}
|
|
{"current_steps": 610, "total_steps": 15621, "loss": 0.7582, "lr": 7.792706333973129e-07, "epoch": 0.03904999679918059, "percentage": 3.9, "elapsed_time": "0:01:23", "remaining_time": "0:34:23", "throughput": 22953.26, "total_tokens": 1924864}
|
|
{"current_steps": 615, "total_steps": 15621, "loss": 0.5567, "lr": 7.856685860524632e-07, "epoch": 0.03937007874015748, "percentage": 3.94, "elapsed_time": "0:01:24", "remaining_time": "0:34:22", "throughput": 22955.9, "total_tokens": 1939968}
|
|
{"current_steps": 620, "total_steps": 15621, "loss": 0.711, "lr": 7.920665387076135e-07, "epoch": 0.03969016068113437, "percentage": 3.97, "elapsed_time": "0:01:25", "remaining_time": "0:34:20", "throughput": 22956.37, "total_tokens": 1955136}
|
|
{"current_steps": 625, "total_steps": 15621, "loss": 0.7024, "lr": 7.984644913627639e-07, "epoch": 0.04001024262211126, "percentage": 4.0, "elapsed_time": "0:01:25", "remaining_time": "0:34:19", "throughput": 22958.58, "total_tokens": 1970880}
|
|
{"current_steps": 630, "total_steps": 15621, "loss": 0.583, "lr": 8.048624440179143e-07, "epoch": 0.04033032456308815, "percentage": 4.03, "elapsed_time": "0:01:26", "remaining_time": "0:34:18", "throughput": 22961.3, "total_tokens": 1986752}
|
|
{"current_steps": 635, "total_steps": 15621, "loss": 0.5988, "lr": 8.112603966730645e-07, "epoch": 0.04065040650406504, "percentage": 4.07, "elapsed_time": "0:01:27", "remaining_time": "0:34:17", "throughput": 22961.9, "total_tokens": 2001856}
|
|
{"current_steps": 640, "total_steps": 15621, "loss": 0.6581, "lr": 8.17658349328215e-07, "epoch": 0.04097048844504193, "percentage": 4.1, "elapsed_time": "0:01:27", "remaining_time": "0:34:18", "throughput": 22973.14, "total_tokens": 2019968}
|
|
{"current_steps": 645, "total_steps": 15621, "loss": 0.7118, "lr": 8.240563019833653e-07, "epoch": 0.04129057038601882, "percentage": 4.13, "elapsed_time": "0:01:28", "remaining_time": "0:34:16", "throughput": 22974.99, "total_tokens": 2035328}
|
|
{"current_steps": 650, "total_steps": 15621, "loss": 0.6236, "lr": 8.304542546385156e-07, "epoch": 0.04161065232699571, "percentage": 4.16, "elapsed_time": "0:01:29", "remaining_time": "0:34:19", "throughput": 22988.72, "total_tokens": 2055168}
|
|
{"current_steps": 655, "total_steps": 15621, "loss": 0.7478, "lr": 8.36852207293666e-07, "epoch": 0.0419307342679726, "percentage": 4.19, "elapsed_time": "0:01:30", "remaining_time": "0:34:18", "throughput": 22993.6, "total_tokens": 2071808}
|
|
{"current_steps": 660, "total_steps": 15621, "loss": 0.6519, "lr": 8.432501599488163e-07, "epoch": 0.04225081620894949, "percentage": 4.23, "elapsed_time": "0:01:30", "remaining_time": "0:34:17", "throughput": 22996.44, "total_tokens": 2087424}
|
|
{"current_steps": 665, "total_steps": 15621, "loss": 0.7696, "lr": 8.496481126039667e-07, "epoch": 0.04257089814992638, "percentage": 4.26, "elapsed_time": "0:01:31", "remaining_time": "0:34:16", "throughput": 22997.51, "total_tokens": 2102592}
|
|
{"current_steps": 670, "total_steps": 15621, "loss": 0.6487, "lr": 8.560460652591171e-07, "epoch": 0.04289098009090327, "percentage": 4.29, "elapsed_time": "0:01:32", "remaining_time": "0:34:15", "throughput": 23004.76, "total_tokens": 2119488}
|
|
{"current_steps": 675, "total_steps": 15621, "loss": 0.6395, "lr": 8.624440179142674e-07, "epoch": 0.04321106203188016, "percentage": 4.32, "elapsed_time": "0:01:32", "remaining_time": "0:34:15", "throughput": 23010.71, "total_tokens": 2136000}
|
|
{"current_steps": 680, "total_steps": 15621, "loss": 0.7432, "lr": 8.688419705694177e-07, "epoch": 0.04353114397285705, "percentage": 4.35, "elapsed_time": "0:01:33", "remaining_time": "0:34:14", "throughput": 23015.92, "total_tokens": 2152448}
|
|
{"current_steps": 685, "total_steps": 15621, "loss": 0.6236, "lr": 8.752399232245681e-07, "epoch": 0.04385122591383394, "percentage": 4.39, "elapsed_time": "0:01:34", "remaining_time": "0:34:13", "throughput": 23018.64, "total_tokens": 2168000}
|
|
{"current_steps": 690, "total_steps": 15621, "loss": 0.5919, "lr": 8.816378758797185e-07, "epoch": 0.04417130785481083, "percentage": 4.42, "elapsed_time": "0:01:34", "remaining_time": "0:34:12", "throughput": 23018.88, "total_tokens": 2183552}
|
|
{"current_steps": 695, "total_steps": 15621, "loss": 0.7068, "lr": 8.880358285348688e-07, "epoch": 0.04449138979578772, "percentage": 4.45, "elapsed_time": "0:01:35", "remaining_time": "0:34:11", "throughput": 23023.12, "total_tokens": 2199488}
|
|
{"current_steps": 700, "total_steps": 15621, "loss": 0.6095, "lr": 8.944337811900191e-07, "epoch": 0.04481147173676461, "percentage": 4.48, "elapsed_time": "0:01:36", "remaining_time": "0:34:10", "throughput": 23025.65, "total_tokens": 2215296}
|
|
{"current_steps": 705, "total_steps": 15621, "loss": 0.7106, "lr": 9.008317338451695e-07, "epoch": 0.0451315536777415, "percentage": 4.51, "elapsed_time": "0:01:36", "remaining_time": "0:34:09", "throughput": 23026.33, "total_tokens": 2230016}
|
|
{"current_steps": 710, "total_steps": 15621, "loss": 0.666, "lr": 9.072296865003198e-07, "epoch": 0.04545163561871839, "percentage": 4.55, "elapsed_time": "0:01:37", "remaining_time": "0:34:07", "throughput": 23025.56, "total_tokens": 2245056}
|
|
{"current_steps": 715, "total_steps": 15621, "loss": 0.6173, "lr": 9.136276391554703e-07, "epoch": 0.04577171755969528, "percentage": 4.58, "elapsed_time": "0:01:38", "remaining_time": "0:34:07", "throughput": 23029.05, "total_tokens": 2261248}
|
|
{"current_steps": 720, "total_steps": 15621, "loss": 0.65, "lr": 9.200255918106205e-07, "epoch": 0.04609179950067217, "percentage": 4.61, "elapsed_time": "0:01:38", "remaining_time": "0:34:06", "throughput": 23033.38, "total_tokens": 2278016}
|
|
{"current_steps": 725, "total_steps": 15621, "loss": 0.5715, "lr": 9.264235444657708e-07, "epoch": 0.04641188144164906, "percentage": 4.64, "elapsed_time": "0:01:39", "remaining_time": "0:34:05", "throughput": 23031.9, "total_tokens": 2292800}
|
|
{"current_steps": 730, "total_steps": 15621, "loss": 0.5988, "lr": 9.328214971209213e-07, "epoch": 0.04673196338262595, "percentage": 4.67, "elapsed_time": "0:01:40", "remaining_time": "0:34:04", "throughput": 23033.09, "total_tokens": 2308224}
|
|
{"current_steps": 735, "total_steps": 15621, "loss": 0.7054, "lr": 9.392194497760716e-07, "epoch": 0.04705204532360284, "percentage": 4.71, "elapsed_time": "0:01:40", "remaining_time": "0:34:04", "throughput": 23040.87, "total_tokens": 2325760}
|
|
{"current_steps": 740, "total_steps": 15621, "loss": 0.6911, "lr": 9.456174024312221e-07, "epoch": 0.04737212726457973, "percentage": 4.74, "elapsed_time": "0:01:41", "remaining_time": "0:34:03", "throughput": 23044.07, "total_tokens": 2341632}
|
|
{"current_steps": 745, "total_steps": 15621, "loss": 0.7079, "lr": 9.520153550863723e-07, "epoch": 0.04769220920555662, "percentage": 4.77, "elapsed_time": "0:01:42", "remaining_time": "0:34:02", "throughput": 23047.16, "total_tokens": 2357504}
|
|
{"current_steps": 750, "total_steps": 15621, "loss": 0.7284, "lr": 9.584133077415226e-07, "epoch": 0.04801229114653351, "percentage": 4.8, "elapsed_time": "0:01:42", "remaining_time": "0:34:01", "throughput": 23045.5, "total_tokens": 2372608}
|
|
{"current_steps": 755, "total_steps": 15621, "loss": 0.5945, "lr": 9.64811260396673e-07, "epoch": 0.0483323730875104, "percentage": 4.83, "elapsed_time": "0:01:43", "remaining_time": "0:34:00", "throughput": 23048.64, "total_tokens": 2388352}
|
|
{"current_steps": 760, "total_steps": 15621, "loss": 0.7012, "lr": 9.712092130518234e-07, "epoch": 0.04865245502848729, "percentage": 4.87, "elapsed_time": "0:01:44", "remaining_time": "0:33:59", "throughput": 23051.31, "total_tokens": 2404480}
|
|
{"current_steps": 765, "total_steps": 15621, "loss": 0.5138, "lr": 9.776071657069737e-07, "epoch": 0.04897253696946418, "percentage": 4.9, "elapsed_time": "0:01:44", "remaining_time": "0:33:58", "throughput": 23052.49, "total_tokens": 2419648}
|
|
{"current_steps": 770, "total_steps": 15621, "loss": 0.71, "lr": 9.840051183621241e-07, "epoch": 0.04929261891044107, "percentage": 4.93, "elapsed_time": "0:01:45", "remaining_time": "0:33:57", "throughput": 23053.81, "total_tokens": 2435584}
|
|
{"current_steps": 775, "total_steps": 15621, "loss": 0.6251, "lr": 9.904030710172743e-07, "epoch": 0.04961270085141796, "percentage": 4.96, "elapsed_time": "0:01:46", "remaining_time": "0:33:56", "throughput": 23055.26, "total_tokens": 2451072}
|
|
{"current_steps": 780, "total_steps": 15621, "loss": 0.7605, "lr": 9.968010236724249e-07, "epoch": 0.04993278279239485, "percentage": 4.99, "elapsed_time": "0:01:47", "remaining_time": "0:33:56", "throughput": 23061.16, "total_tokens": 2467968}
|
|
{"current_steps": 782, "total_steps": 15621, "eval_loss": 0.6365677118301392, "epoch": 0.05006081556878561, "percentage": 5.01, "elapsed_time": "0:02:37", "remaining_time": "0:49:55", "throughput": 15673.76, "total_tokens": 2474432}
|
|
{"current_steps": 785, "total_steps": 15621, "loss": 0.6742, "lr": 1.0031989763275752e-06, "epoch": 0.05025286473337175, "percentage": 5.03, "elapsed_time": "0:03:20", "remaining_time": "1:03:04", "throughput": 12409.36, "total_tokens": 2484928}
|
|
{"current_steps": 790, "total_steps": 15621, "loss": 0.6802, "lr": 1.0095969289827256e-06, "epoch": 0.05057294667434863, "percentage": 5.06, "elapsed_time": "0:03:20", "remaining_time": "1:02:52", "throughput": 12448.82, "total_tokens": 2501504}
|
|
{"current_steps": 795, "total_steps": 15621, "loss": 0.5171, "lr": 1.0159948816378758e-06, "epoch": 0.050893028615325527, "percentage": 5.09, "elapsed_time": "0:03:21", "remaining_time": "1:02:40", "throughput": 12490.55, "total_tokens": 2518848}
|
|
{"current_steps": 800, "total_steps": 15621, "loss": 0.5474, "lr": 1.0223928342930262e-06, "epoch": 0.05121311055630241, "percentage": 5.12, "elapsed_time": "0:03:22", "remaining_time": "1:02:29", "throughput": 12530.13, "total_tokens": 2535680}
|
|
{"current_steps": 805, "total_steps": 15621, "loss": 0.6751, "lr": 1.0287907869481766e-06, "epoch": 0.051533192497279307, "percentage": 5.15, "elapsed_time": "0:03:23", "remaining_time": "1:02:16", "throughput": 12564.35, "total_tokens": 2550976}
|
|
{"current_steps": 810, "total_steps": 15621, "loss": 0.5552, "lr": 1.035188739603327e-06, "epoch": 0.05185327443825619, "percentage": 5.19, "elapsed_time": "0:03:23", "remaining_time": "1:02:04", "throughput": 12599.99, "total_tokens": 2566656}
|
|
{"current_steps": 815, "total_steps": 15621, "loss": 0.6319, "lr": 1.0415866922584773e-06, "epoch": 0.052173356379233086, "percentage": 5.22, "elapsed_time": "0:03:24", "remaining_time": "1:01:52", "throughput": 12632.75, "total_tokens": 2581568}
|
|
{"current_steps": 820, "total_steps": 15621, "loss": 0.64, "lr": 1.0479846449136277e-06, "epoch": 0.05249343832020997, "percentage": 5.25, "elapsed_time": "0:03:25", "remaining_time": "1:01:40", "throughput": 12665.84, "total_tokens": 2596608}
|
|
{"current_steps": 825, "total_steps": 15621, "loss": 0.672, "lr": 1.0543825975687779e-06, "epoch": 0.052813520261186866, "percentage": 5.28, "elapsed_time": "0:03:25", "remaining_time": "1:01:28", "throughput": 12699.74, "total_tokens": 2612032}
|
|
{"current_steps": 830, "total_steps": 15621, "loss": 0.7367, "lr": 1.0607805502239282e-06, "epoch": 0.05313360220216375, "percentage": 5.31, "elapsed_time": "0:03:26", "remaining_time": "1:01:16", "throughput": 12733.45, "total_tokens": 2627264}
|
|
{"current_steps": 835, "total_steps": 15621, "loss": 0.549, "lr": 1.0671785028790788e-06, "epoch": 0.053453684143140646, "percentage": 5.35, "elapsed_time": "0:03:27", "remaining_time": "1:01:05", "throughput": 12768.92, "total_tokens": 2643264}
|
|
{"current_steps": 840, "total_steps": 15621, "loss": 0.542, "lr": 1.073576455534229e-06, "epoch": 0.05377376608411753, "percentage": 5.38, "elapsed_time": "0:03:27", "remaining_time": "1:00:54", "throughput": 12804.24, "total_tokens": 2659264}
|
|
{"current_steps": 845, "total_steps": 15621, "loss": 0.5724, "lr": 1.0799744081893794e-06, "epoch": 0.054093848025094426, "percentage": 5.41, "elapsed_time": "0:03:28", "remaining_time": "1:00:42", "throughput": 12835.16, "total_tokens": 2673856}
|
|
{"current_steps": 850, "total_steps": 15621, "loss": 0.4715, "lr": 1.0863723608445297e-06, "epoch": 0.05441392996607131, "percentage": 5.44, "elapsed_time": "0:03:28", "remaining_time": "1:00:31", "throughput": 12865.5, "total_tokens": 2688448}
|
|
{"current_steps": 855, "total_steps": 15621, "loss": 0.6158, "lr": 1.09277031349968e-06, "epoch": 0.054734011907048206, "percentage": 5.47, "elapsed_time": "0:03:29", "remaining_time": "1:00:20", "throughput": 12898.27, "total_tokens": 2703872}
|
|
{"current_steps": 860, "total_steps": 15621, "loss": 0.6116, "lr": 1.0991682661548305e-06, "epoch": 0.05505409384802509, "percentage": 5.51, "elapsed_time": "0:03:30", "remaining_time": "1:00:09", "throughput": 12929.88, "total_tokens": 2719040}
|
|
{"current_steps": 865, "total_steps": 15621, "loss": 0.6182, "lr": 1.1055662188099809e-06, "epoch": 0.055374175789001986, "percentage": 5.54, "elapsed_time": "0:03:30", "remaining_time": "0:59:58", "throughput": 12964.73, "total_tokens": 2735168}
|
|
{"current_steps": 870, "total_steps": 15621, "loss": 0.5281, "lr": 1.111964171465131e-06, "epoch": 0.05569425772997887, "percentage": 5.57, "elapsed_time": "0:03:31", "remaining_time": "0:59:48", "throughput": 12997.16, "total_tokens": 2750592}
|
|
{"current_steps": 875, "total_steps": 15621, "loss": 0.6583, "lr": 1.1183621241202814e-06, "epoch": 0.056014339670955766, "percentage": 5.6, "elapsed_time": "0:03:32", "remaining_time": "0:59:38", "throughput": 13032.38, "total_tokens": 2767232}
|
|
{"current_steps": 880, "total_steps": 15621, "loss": 0.6623, "lr": 1.1247600767754318e-06, "epoch": 0.05633442161193265, "percentage": 5.63, "elapsed_time": "0:03:33", "remaining_time": "0:59:29", "throughput": 13070.01, "total_tokens": 2784768}
|
|
{"current_steps": 885, "total_steps": 15621, "loss": 0.5782, "lr": 1.1311580294305822e-06, "epoch": 0.056654503552909546, "percentage": 5.67, "elapsed_time": "0:03:33", "remaining_time": "0:59:18", "throughput": 13100.67, "total_tokens": 2799872}
|
|
{"current_steps": 890, "total_steps": 15621, "loss": 0.6444, "lr": 1.1375559820857326e-06, "epoch": 0.05697458549388643, "percentage": 5.7, "elapsed_time": "0:03:34", "remaining_time": "0:59:08", "throughput": 13133.87, "total_tokens": 2816000}
|
|
{"current_steps": 895, "total_steps": 15621, "loss": 0.5844, "lr": 1.143953934740883e-06, "epoch": 0.057294667434863326, "percentage": 5.73, "elapsed_time": "0:03:35", "remaining_time": "0:58:58", "throughput": 13165.7, "total_tokens": 2831744}
|
|
{"current_steps": 900, "total_steps": 15621, "loss": 0.6139, "lr": 1.150351887396033e-06, "epoch": 0.05761474937584021, "percentage": 5.76, "elapsed_time": "0:03:35", "remaining_time": "0:58:49", "throughput": 13197.22, "total_tokens": 2847424}
|
|
{"current_steps": 905, "total_steps": 15621, "loss": 0.6182, "lr": 1.1567498400511835e-06, "epoch": 0.057934831316817106, "percentage": 5.79, "elapsed_time": "0:03:36", "remaining_time": "0:58:38", "throughput": 13226.79, "total_tokens": 2862272}
|
|
{"current_steps": 910, "total_steps": 15621, "loss": 0.4962, "lr": 1.163147792706334e-06, "epoch": 0.05825491325779399, "percentage": 5.83, "elapsed_time": "0:03:37", "remaining_time": "0:58:28", "throughput": 13255.74, "total_tokens": 2877120}
|
|
{"current_steps": 915, "total_steps": 15621, "loss": 0.5176, "lr": 1.1695457453614842e-06, "epoch": 0.058574995198770886, "percentage": 5.86, "elapsed_time": "0:03:37", "remaining_time": "0:58:19", "throughput": 13292.08, "total_tokens": 2894592}
|
|
{"current_steps": 920, "total_steps": 15621, "loss": 0.6171, "lr": 1.1759436980166346e-06, "epoch": 0.05889507713974777, "percentage": 5.89, "elapsed_time": "0:03:38", "remaining_time": "0:58:10", "throughput": 13321.84, "total_tokens": 2909888}
|
|
{"current_steps": 925, "total_steps": 15621, "loss": 0.6226, "lr": 1.182341650671785e-06, "epoch": 0.059215159080724666, "percentage": 5.92, "elapsed_time": "0:03:39", "remaining_time": "0:58:01", "throughput": 13352.17, "total_tokens": 2925632}
|
|
{"current_steps": 930, "total_steps": 15621, "loss": 0.6205, "lr": 1.1887396033269352e-06, "epoch": 0.05953524102170155, "percentage": 5.95, "elapsed_time": "0:03:39", "remaining_time": "0:57:52", "throughput": 13383.98, "total_tokens": 2941760}
|
|
{"current_steps": 935, "total_steps": 15621, "loss": 0.7098, "lr": 1.1951375559820858e-06, "epoch": 0.059855322962678446, "percentage": 5.99, "elapsed_time": "0:03:40", "remaining_time": "0:57:42", "throughput": 13414.66, "total_tokens": 2957376}
|
|
{"current_steps": 940, "total_steps": 15621, "loss": 0.5669, "lr": 1.2015355086372361e-06, "epoch": 0.06017540490365533, "percentage": 6.02, "elapsed_time": "0:03:41", "remaining_time": "0:57:33", "throughput": 13444.05, "total_tokens": 2972800}
|
|
{"current_steps": 945, "total_steps": 15621, "loss": 0.7169, "lr": 1.2079334612923863e-06, "epoch": 0.060495486844632226, "percentage": 6.05, "elapsed_time": "0:03:41", "remaining_time": "0:57:24", "throughput": 13474.2, "total_tokens": 2988480}
|
|
{"current_steps": 950, "total_steps": 15621, "loss": 0.6581, "lr": 1.2143314139475367e-06, "epoch": 0.06081556878560911, "percentage": 6.08, "elapsed_time": "0:03:42", "remaining_time": "0:57:15", "throughput": 13504.78, "total_tokens": 3004480}
|
|
{"current_steps": 955, "total_steps": 15621, "loss": 0.4512, "lr": 1.220729366602687e-06, "epoch": 0.061135650726586006, "percentage": 6.11, "elapsed_time": "0:03:43", "remaining_time": "0:57:06", "throughput": 13534.97, "total_tokens": 3020288}
|
|
{"current_steps": 960, "total_steps": 15621, "loss": 0.5964, "lr": 1.2271273192578374e-06, "epoch": 0.06145573266756289, "percentage": 6.15, "elapsed_time": "0:03:43", "remaining_time": "0:56:58", "throughput": 13563.89, "total_tokens": 3035968}
|
|
{"current_steps": 965, "total_steps": 15621, "loss": 0.7436, "lr": 1.2335252719129878e-06, "epoch": 0.061775814608539786, "percentage": 6.18, "elapsed_time": "0:03:44", "remaining_time": "0:56:49", "throughput": 13593.47, "total_tokens": 3051776}
|
|
{"current_steps": 970, "total_steps": 15621, "loss": 0.5567, "lr": 1.2399232245681382e-06, "epoch": 0.06209589654951667, "percentage": 6.21, "elapsed_time": "0:03:45", "remaining_time": "0:56:40", "throughput": 13620.15, "total_tokens": 3066560}
|
|
{"current_steps": 975, "total_steps": 15621, "loss": 0.6348, "lr": 1.2463211772232884e-06, "epoch": 0.062415978490493566, "percentage": 6.24, "elapsed_time": "0:03:45", "remaining_time": "0:56:32", "throughput": 13650.17, "total_tokens": 3082496}
|
|
{"current_steps": 980, "total_steps": 15621, "loss": 0.6289, "lr": 1.2527191298784387e-06, "epoch": 0.06273606043147045, "percentage": 6.27, "elapsed_time": "0:03:46", "remaining_time": "0:56:23", "throughput": 13677.97, "total_tokens": 3097856}
|
|
{"current_steps": 985, "total_steps": 15621, "loss": 0.5081, "lr": 1.2591170825335893e-06, "epoch": 0.06305614237244735, "percentage": 6.31, "elapsed_time": "0:03:47", "remaining_time": "0:56:15", "throughput": 13706.76, "total_tokens": 3113664}
|
|
{"current_steps": 990, "total_steps": 15621, "loss": 0.6276, "lr": 1.2655150351887395e-06, "epoch": 0.06337622431342424, "percentage": 6.34, "elapsed_time": "0:03:47", "remaining_time": "0:56:07", "throughput": 13735.52, "total_tokens": 3129792}
|
|
{"current_steps": 995, "total_steps": 15621, "loss": 0.5917, "lr": 1.2719129878438899e-06, "epoch": 0.06369630625440113, "percentage": 6.37, "elapsed_time": "0:03:48", "remaining_time": "0:55:59", "throughput": 13762.81, "total_tokens": 3145024}
|
|
{"current_steps": 1000, "total_steps": 15621, "loss": 0.7761, "lr": 1.2783109404990402e-06, "epoch": 0.06401638819537801, "percentage": 6.4, "elapsed_time": "0:03:49", "remaining_time": "0:55:51", "throughput": 13791.92, "total_tokens": 3161216}
|
|
{"current_steps": 1005, "total_steps": 15621, "loss": 0.4107, "lr": 1.2847088931541904e-06, "epoch": 0.0643364701363549, "percentage": 6.43, "elapsed_time": "0:03:49", "remaining_time": "0:55:43", "throughput": 13820.09, "total_tokens": 3176960}
|
|
{"current_steps": 1010, "total_steps": 15621, "loss": 0.6098, "lr": 1.291106845809341e-06, "epoch": 0.0646565520773318, "percentage": 6.47, "elapsed_time": "0:03:50", "remaining_time": "0:55:35", "throughput": 13849.22, "total_tokens": 3193088}
|
|
{"current_steps": 1015, "total_steps": 15621, "loss": 0.6776, "lr": 1.2975047984644914e-06, "epoch": 0.0649766340183087, "percentage": 6.5, "elapsed_time": "0:03:51", "remaining_time": "0:55:28", "throughput": 13879.96, "total_tokens": 3210112}
|
|
{"current_steps": 1020, "total_steps": 15621, "loss": 0.508, "lr": 1.3039027511196418e-06, "epoch": 0.06529671595928557, "percentage": 6.53, "elapsed_time": "0:03:51", "remaining_time": "0:55:19", "throughput": 13905.02, "total_tokens": 3224768}
|
|
{"current_steps": 1025, "total_steps": 15621, "loss": 0.6266, "lr": 1.310300703774792e-06, "epoch": 0.06561679790026247, "percentage": 6.56, "elapsed_time": "0:03:52", "remaining_time": "0:55:11", "throughput": 13931.6, "total_tokens": 3240128}
|
|
{"current_steps": 1030, "total_steps": 15621, "loss": 0.5607, "lr": 1.3166986564299423e-06, "epoch": 0.06593687984123936, "percentage": 6.59, "elapsed_time": "0:03:53", "remaining_time": "0:55:04", "throughput": 13960.95, "total_tokens": 3256576}
|
|
{"current_steps": 1035, "total_steps": 15621, "loss": 0.5973, "lr": 1.3230966090850929e-06, "epoch": 0.06625696178221625, "percentage": 6.63, "elapsed_time": "0:03:53", "remaining_time": "0:54:56", "throughput": 13988.01, "total_tokens": 3272384}
|
|
{"current_steps": 1040, "total_steps": 15621, "loss": 0.4099, "lr": 1.329494561740243e-06, "epoch": 0.06657704372319313, "percentage": 6.66, "elapsed_time": "0:03:54", "remaining_time": "0:54:49", "throughput": 14015.89, "total_tokens": 3288512}
|
|
{"current_steps": 1045, "total_steps": 15621, "loss": 0.4889, "lr": 1.3358925143953934e-06, "epoch": 0.06689712566417003, "percentage": 6.69, "elapsed_time": "0:03:55", "remaining_time": "0:54:42", "throughput": 14048.0, "total_tokens": 3306304}
|
|
{"current_steps": 1050, "total_steps": 15621, "loss": 0.4795, "lr": 1.3422904670505438e-06, "epoch": 0.06721720760514692, "percentage": 6.72, "elapsed_time": "0:03:56", "remaining_time": "0:54:35", "throughput": 14072.86, "total_tokens": 3321344}
|
|
{"current_steps": 1055, "total_steps": 15621, "loss": 0.6207, "lr": 1.348688419705694e-06, "epoch": 0.06753728954612381, "percentage": 6.75, "elapsed_time": "0:03:56", "remaining_time": "0:54:28", "throughput": 14102.83, "total_tokens": 3338560}
|
|
{"current_steps": 1060, "total_steps": 15621, "loss": 0.5093, "lr": 1.3550863723608446e-06, "epoch": 0.06785737148710069, "percentage": 6.79, "elapsed_time": "0:03:57", "remaining_time": "0:54:20", "throughput": 14125.91, "total_tokens": 3353152}
|
|
{"current_steps": 1065, "total_steps": 15621, "loss": 0.5401, "lr": 1.361484325015995e-06, "epoch": 0.06817745342807759, "percentage": 6.82, "elapsed_time": "0:03:58", "remaining_time": "0:54:13", "throughput": 14153.27, "total_tokens": 3369536}
|
|
{"current_steps": 1070, "total_steps": 15621, "loss": 0.6053, "lr": 1.3678822776711451e-06, "epoch": 0.06849753536905448, "percentage": 6.85, "elapsed_time": "0:03:58", "remaining_time": "0:54:06", "throughput": 14177.96, "total_tokens": 3384832}
|
|
{"current_steps": 1075, "total_steps": 15621, "loss": 0.494, "lr": 1.3742802303262955e-06, "epoch": 0.06881761731003137, "percentage": 6.88, "elapsed_time": "0:03:59", "remaining_time": "0:53:59", "throughput": 14200.76, "total_tokens": 3399424}
|
|
{"current_steps": 1080, "total_steps": 15621, "loss": 0.6625, "lr": 1.3806781829814459e-06, "epoch": 0.06913769925100825, "percentage": 6.91, "elapsed_time": "0:04:00", "remaining_time": "0:53:52", "throughput": 14229.84, "total_tokens": 3416704}
|
|
{"current_steps": 1085, "total_steps": 15621, "loss": 0.5524, "lr": 1.3870761356365963e-06, "epoch": 0.06945778119198515, "percentage": 6.95, "elapsed_time": "0:04:00", "remaining_time": "0:53:45", "throughput": 14253.45, "total_tokens": 3431552}
|
|
{"current_steps": 1090, "total_steps": 15621, "loss": 0.6509, "lr": 1.3934740882917466e-06, "epoch": 0.06977786313296204, "percentage": 6.98, "elapsed_time": "0:04:01", "remaining_time": "0:53:38", "throughput": 14279.05, "total_tokens": 3447488}
|
|
{"current_steps": 1095, "total_steps": 15621, "loss": 0.5205, "lr": 1.399872040946897e-06, "epoch": 0.07009794507393893, "percentage": 7.01, "elapsed_time": "0:04:02", "remaining_time": "0:53:31", "throughput": 14304.57, "total_tokens": 3463424}
|
|
{"current_steps": 1100, "total_steps": 15621, "loss": 0.6568, "lr": 1.4062699936020472e-06, "epoch": 0.07041802701491581, "percentage": 7.04, "elapsed_time": "0:04:02", "remaining_time": "0:53:25", "throughput": 14330.71, "total_tokens": 3479680}
|
|
{"current_steps": 1105, "total_steps": 15621, "loss": 0.5165, "lr": 1.4126679462571976e-06, "epoch": 0.0707381089558927, "percentage": 7.07, "elapsed_time": "0:04:03", "remaining_time": "0:53:18", "throughput": 14356.3, "total_tokens": 3495552}
|
|
{"current_steps": 1110, "total_steps": 15621, "loss": 0.4769, "lr": 1.4190658989123481e-06, "epoch": 0.0710581908968696, "percentage": 7.11, "elapsed_time": "0:04:04", "remaining_time": "0:53:11", "throughput": 14380.31, "total_tokens": 3510976}
|
|
{"current_steps": 1115, "total_steps": 15621, "loss": 0.6453, "lr": 1.4254638515674983e-06, "epoch": 0.0713782728378465, "percentage": 7.14, "elapsed_time": "0:04:04", "remaining_time": "0:53:04", "throughput": 14403.03, "total_tokens": 3526016}
|
|
{"current_steps": 1120, "total_steps": 15621, "loss": 0.5647, "lr": 1.4318618042226487e-06, "epoch": 0.07169835477882337, "percentage": 7.17, "elapsed_time": "0:04:05", "remaining_time": "0:52:57", "throughput": 14424.95, "total_tokens": 3540544}
|
|
{"current_steps": 1125, "total_steps": 15621, "loss": 0.6074, "lr": 1.438259756877799e-06, "epoch": 0.07201843671980027, "percentage": 7.2, "elapsed_time": "0:04:06", "remaining_time": "0:52:51", "throughput": 14449.58, "total_tokens": 3556416}
|
|
{"current_steps": 1130, "total_steps": 15621, "loss": 0.5063, "lr": 1.4446577095329492e-06, "epoch": 0.07233851866077716, "percentage": 7.23, "elapsed_time": "0:04:06", "remaining_time": "0:52:44", "throughput": 14474.08, "total_tokens": 3572096}
|
|
{"current_steps": 1135, "total_steps": 15621, "loss": 0.4966, "lr": 1.4510556621880998e-06, "epoch": 0.07265860060175405, "percentage": 7.27, "elapsed_time": "0:04:07", "remaining_time": "0:52:38", "throughput": 14497.77, "total_tokens": 3587712}
|
|
{"current_steps": 1140, "total_steps": 15621, "loss": 0.5503, "lr": 1.4574536148432502e-06, "epoch": 0.07297868254273093, "percentage": 7.3, "elapsed_time": "0:04:08", "remaining_time": "0:52:32", "throughput": 14525.65, "total_tokens": 3605056}
|
|
{"current_steps": 1145, "total_steps": 15621, "loss": 0.6826, "lr": 1.4638515674984004e-06, "epoch": 0.07329876448370783, "percentage": 7.33, "elapsed_time": "0:04:08", "remaining_time": "0:52:26", "throughput": 14550.55, "total_tokens": 3621184}
|
|
{"current_steps": 1150, "total_steps": 15621, "loss": 0.5917, "lr": 1.4702495201535507e-06, "epoch": 0.07361884642468472, "percentage": 7.36, "elapsed_time": "0:04:09", "remaining_time": "0:52:19", "throughput": 14570.73, "total_tokens": 3635392}
|
|
{"current_steps": 1155, "total_steps": 15621, "loss": 0.5064, "lr": 1.4766474728087011e-06, "epoch": 0.07393892836566161, "percentage": 7.39, "elapsed_time": "0:04:10", "remaining_time": "0:52:12", "throughput": 14591.93, "total_tokens": 3649984}
|
|
{"current_steps": 1160, "total_steps": 15621, "loss": 0.5239, "lr": 1.4830454254638515e-06, "epoch": 0.07425901030663849, "percentage": 7.43, "elapsed_time": "0:04:10", "remaining_time": "0:52:06", "throughput": 14616.15, "total_tokens": 3665920}
|
|
{"current_steps": 1165, "total_steps": 15621, "loss": 0.4573, "lr": 1.4894433781190019e-06, "epoch": 0.07457909224761539, "percentage": 7.46, "elapsed_time": "0:04:11", "remaining_time": "0:52:00", "throughput": 14636.19, "total_tokens": 3680256}
|
|
{"current_steps": 1170, "total_steps": 15621, "loss": 0.5284, "lr": 1.4958413307741523e-06, "epoch": 0.07489917418859228, "percentage": 7.49, "elapsed_time": "0:04:12", "remaining_time": "0:51:54", "throughput": 14662.84, "total_tokens": 3697536}
|
|
{"current_steps": 1175, "total_steps": 15621, "loss": 0.6147, "lr": 1.5022392834293024e-06, "epoch": 0.07521925612956917, "percentage": 7.52, "elapsed_time": "0:04:12", "remaining_time": "0:51:48", "throughput": 14684.77, "total_tokens": 3713088}
|
|
{"current_steps": 1180, "total_steps": 15621, "loss": 0.6674, "lr": 1.5086372360844528e-06, "epoch": 0.07553933807054607, "percentage": 7.55, "elapsed_time": "0:04:13", "remaining_time": "0:51:43", "throughput": 14710.48, "total_tokens": 3729920}
|
|
{"current_steps": 1185, "total_steps": 15621, "loss": 0.5478, "lr": 1.5150351887396034e-06, "epoch": 0.07585942001152295, "percentage": 7.59, "elapsed_time": "0:04:14", "remaining_time": "0:51:37", "throughput": 14733.52, "total_tokens": 3745664}
|
|
{"current_steps": 1190, "total_steps": 15621, "loss": 0.5246, "lr": 1.5214331413947536e-06, "epoch": 0.07617950195249984, "percentage": 7.62, "elapsed_time": "0:04:14", "remaining_time": "0:51:30", "throughput": 14754.45, "total_tokens": 3760576}
|
|
{"current_steps": 1195, "total_steps": 15621, "loss": 0.5073, "lr": 1.527831094049904e-06, "epoch": 0.07649958389347673, "percentage": 7.65, "elapsed_time": "0:04:15", "remaining_time": "0:51:25", "throughput": 14777.12, "total_tokens": 3776576}
|
|
{"current_steps": 1200, "total_steps": 15621, "loss": 0.5868, "lr": 1.5342290467050543e-06, "epoch": 0.07681966583445363, "percentage": 7.68, "elapsed_time": "0:04:16", "remaining_time": "0:51:19", "throughput": 14799.54, "total_tokens": 3792384}
|
|
{"current_steps": 1205, "total_steps": 15621, "loss": 0.6464, "lr": 1.5406269993602045e-06, "epoch": 0.0771397477754305, "percentage": 7.71, "elapsed_time": "0:04:16", "remaining_time": "0:51:13", "throughput": 14818.14, "total_tokens": 3806592}
|
|
{"current_steps": 1210, "total_steps": 15621, "loss": 0.5778, "lr": 1.547024952015355e-06, "epoch": 0.0774598297164074, "percentage": 7.75, "elapsed_time": "0:04:17", "remaining_time": "0:51:07", "throughput": 14839.93, "total_tokens": 3822080}
|
|
{"current_steps": 1215, "total_steps": 15621, "loss": 0.5346, "lr": 1.5534229046705055e-06, "epoch": 0.07777991165738429, "percentage": 7.78, "elapsed_time": "0:04:18", "remaining_time": "0:51:01", "throughput": 14860.29, "total_tokens": 3837120}
|
|
{"current_steps": 1220, "total_steps": 15621, "loss": 0.6952, "lr": 1.5598208573256556e-06, "epoch": 0.07809999359836119, "percentage": 7.81, "elapsed_time": "0:04:18", "remaining_time": "0:50:55", "throughput": 14882.3, "total_tokens": 3852864}
|
|
{"current_steps": 1225, "total_steps": 15621, "loss": 0.5148, "lr": 1.566218809980806e-06, "epoch": 0.07842007553933807, "percentage": 7.84, "elapsed_time": "0:04:19", "remaining_time": "0:50:50", "throughput": 14905.6, "total_tokens": 3869184}
|
|
{"current_steps": 1230, "total_steps": 15621, "loss": 0.5376, "lr": 1.5726167626359564e-06, "epoch": 0.07874015748031496, "percentage": 7.87, "elapsed_time": "0:04:20", "remaining_time": "0:50:45", "throughput": 14928.04, "total_tokens": 3885248}
|
|
{"current_steps": 1235, "total_steps": 15621, "loss": 0.4261, "lr": 1.5790147152911068e-06, "epoch": 0.07906023942129185, "percentage": 7.91, "elapsed_time": "0:04:20", "remaining_time": "0:50:39", "throughput": 14948.8, "total_tokens": 3900416}
|
|
{"current_steps": 1240, "total_steps": 15621, "loss": 0.553, "lr": 1.5854126679462571e-06, "epoch": 0.07938032136226875, "percentage": 7.94, "elapsed_time": "0:04:21", "remaining_time": "0:50:33", "throughput": 14970.39, "total_tokens": 3916096}
|
|
{"current_steps": 1245, "total_steps": 15621, "loss": 0.6992, "lr": 1.5918106206014075e-06, "epoch": 0.07970040330324563, "percentage": 7.97, "elapsed_time": "0:04:22", "remaining_time": "0:50:28", "throughput": 14995.17, "total_tokens": 3933312}
|
|
{"current_steps": 1250, "total_steps": 15621, "loss": 0.615, "lr": 1.5982085732565577e-06, "epoch": 0.08002048524422252, "percentage": 8.0, "elapsed_time": "0:04:22", "remaining_time": "0:50:23", "throughput": 15017.58, "total_tokens": 3949440}
|
|
{"current_steps": 1255, "total_steps": 15621, "loss": 0.681, "lr": 1.604606525911708e-06, "epoch": 0.08034056718519941, "percentage": 8.03, "elapsed_time": "0:04:23", "remaining_time": "0:50:18", "throughput": 15038.83, "total_tokens": 3964992}
|
|
{"current_steps": 1260, "total_steps": 15621, "loss": 0.733, "lr": 1.6110044785668586e-06, "epoch": 0.0806606491261763, "percentage": 8.07, "elapsed_time": "0:04:24", "remaining_time": "0:50:12", "throughput": 15062.36, "total_tokens": 3981696}
|
|
{"current_steps": 1265, "total_steps": 15621, "loss": 0.6123, "lr": 1.617402431222009e-06, "epoch": 0.08098073106715319, "percentage": 8.1, "elapsed_time": "0:04:25", "remaining_time": "0:50:07", "throughput": 15083.29, "total_tokens": 3997248}
|
|
{"current_steps": 1270, "total_steps": 15621, "loss": 0.6693, "lr": 1.6238003838771592e-06, "epoch": 0.08130081300813008, "percentage": 8.13, "elapsed_time": "0:04:25", "remaining_time": "0:50:01", "throughput": 15101.53, "total_tokens": 4011648}
|
|
{"current_steps": 1275, "total_steps": 15621, "loss": 0.5273, "lr": 1.6301983365323096e-06, "epoch": 0.08162089494910697, "percentage": 8.16, "elapsed_time": "0:04:26", "remaining_time": "0:49:56", "throughput": 15123.88, "total_tokens": 4028160}
|
|
{"current_steps": 1280, "total_steps": 15621, "loss": 0.5358, "lr": 1.63659628918746e-06, "epoch": 0.08194097689008387, "percentage": 8.19, "elapsed_time": "0:04:27", "remaining_time": "0:49:51", "throughput": 15143.93, "total_tokens": 4043584}
|
|
{"current_steps": 1285, "total_steps": 15621, "loss": 0.7184, "lr": 1.6429942418426103e-06, "epoch": 0.08226105883106075, "percentage": 8.23, "elapsed_time": "0:04:27", "remaining_time": "0:49:46", "throughput": 15164.79, "total_tokens": 4059456}
|
|
{"current_steps": 1290, "total_steps": 15621, "loss": 0.6079, "lr": 1.6493921944977607e-06, "epoch": 0.08258114077203764, "percentage": 8.26, "elapsed_time": "0:04:28", "remaining_time": "0:49:41", "throughput": 15187.4, "total_tokens": 4076096}
|
|
{"current_steps": 1295, "total_steps": 15621, "loss": 0.6143, "lr": 1.655790147152911e-06, "epoch": 0.08290122271301453, "percentage": 8.29, "elapsed_time": "0:04:29", "remaining_time": "0:49:37", "throughput": 15211.64, "total_tokens": 4093568}
|
|
{"current_steps": 1300, "total_steps": 15621, "loss": 0.66, "lr": 1.6621880998080612e-06, "epoch": 0.08322130465399143, "percentage": 8.32, "elapsed_time": "0:04:29", "remaining_time": "0:49:31", "throughput": 15231.45, "total_tokens": 4108864}
|
|
{"current_steps": 1305, "total_steps": 15621, "loss": 0.6375, "lr": 1.6685860524632116e-06, "epoch": 0.0835413865949683, "percentage": 8.35, "elapsed_time": "0:04:30", "remaining_time": "0:49:26", "throughput": 15250.84, "total_tokens": 4124224}
|
|
{"current_steps": 1310, "total_steps": 15621, "loss": 0.5497, "lr": 1.6749840051183622e-06, "epoch": 0.0838614685359452, "percentage": 8.39, "elapsed_time": "0:04:31", "remaining_time": "0:49:21", "throughput": 15269.44, "total_tokens": 4139008}
|
|
{"current_steps": 1315, "total_steps": 15621, "loss": 0.5553, "lr": 1.6813819577735124e-06, "epoch": 0.08418155047692209, "percentage": 8.42, "elapsed_time": "0:04:31", "remaining_time": "0:49:16", "throughput": 15290.21, "total_tokens": 4155008}
|
|
{"current_steps": 1320, "total_steps": 15621, "loss": 0.4854, "lr": 1.6877799104286628e-06, "epoch": 0.08450163241789899, "percentage": 8.45, "elapsed_time": "0:04:32", "remaining_time": "0:49:11", "throughput": 15314.13, "total_tokens": 4172544}
|
|
{"current_steps": 1325, "total_steps": 15621, "loss": 0.6347, "lr": 1.6941778630838131e-06, "epoch": 0.08482171435887587, "percentage": 8.48, "elapsed_time": "0:04:33", "remaining_time": "0:49:06", "throughput": 15334.55, "total_tokens": 4188416}
|
|
{"current_steps": 1330, "total_steps": 15621, "loss": 0.4947, "lr": 1.7005758157389633e-06, "epoch": 0.08514179629985276, "percentage": 8.51, "elapsed_time": "0:04:33", "remaining_time": "0:49:01", "throughput": 15351.17, "total_tokens": 4202560}
|
|
{"current_steps": 1335, "total_steps": 15621, "loss": 0.5618, "lr": 1.706973768394114e-06, "epoch": 0.08546187824082965, "percentage": 8.55, "elapsed_time": "0:04:34", "remaining_time": "0:48:57", "throughput": 15372.84, "total_tokens": 4219392}
|
|
{"current_steps": 1340, "total_steps": 15621, "loss": 0.4941, "lr": 1.7133717210492643e-06, "epoch": 0.08578196018180655, "percentage": 8.58, "elapsed_time": "0:04:35", "remaining_time": "0:48:52", "throughput": 15392.67, "total_tokens": 4235328}
|
|
{"current_steps": 1345, "total_steps": 15621, "loss": 0.5828, "lr": 1.7197696737044144e-06, "epoch": 0.08610204212278343, "percentage": 8.61, "elapsed_time": "0:04:35", "remaining_time": "0:48:47", "throughput": 15410.97, "total_tokens": 4250368}
|
|
{"current_steps": 1350, "total_steps": 15621, "loss": 0.4594, "lr": 1.7261676263595648e-06, "epoch": 0.08642212406376032, "percentage": 8.64, "elapsed_time": "0:04:36", "remaining_time": "0:48:42", "throughput": 15430.02, "total_tokens": 4265856}
|
|
{"current_steps": 1355, "total_steps": 15621, "loss": 0.6623, "lr": 1.7325655790147152e-06, "epoch": 0.08674220600473721, "percentage": 8.67, "elapsed_time": "0:04:37", "remaining_time": "0:48:37", "throughput": 15449.77, "total_tokens": 4281792}
|
|
{"current_steps": 1360, "total_steps": 15621, "loss": 0.5861, "lr": 1.7389635316698656e-06, "epoch": 0.0870622879457141, "percentage": 8.71, "elapsed_time": "0:04:37", "remaining_time": "0:48:33", "throughput": 15468.27, "total_tokens": 4297088}
|
|
{"current_steps": 1365, "total_steps": 15621, "loss": 0.5083, "lr": 1.745361484325016e-06, "epoch": 0.087382369886691, "percentage": 8.74, "elapsed_time": "0:04:38", "remaining_time": "0:48:28", "throughput": 15485.87, "total_tokens": 4312192}
|
|
{"current_steps": 1370, "total_steps": 15621, "loss": 0.4848, "lr": 1.7517594369801663e-06, "epoch": 0.08770245182766788, "percentage": 8.77, "elapsed_time": "0:04:39", "remaining_time": "0:48:23", "throughput": 15502.74, "total_tokens": 4326720}
|
|
{"current_steps": 1375, "total_steps": 15621, "loss": 0.7944, "lr": 1.7581573896353165e-06, "epoch": 0.08802253376864477, "percentage": 8.8, "elapsed_time": "0:04:39", "remaining_time": "0:48:18", "throughput": 15520.42, "total_tokens": 4341760}
|
|
{"current_steps": 1380, "total_steps": 15621, "loss": 0.6211, "lr": 1.7645553422904669e-06, "epoch": 0.08834261570962167, "percentage": 8.83, "elapsed_time": "0:04:40", "remaining_time": "0:48:13", "throughput": 15540.14, "total_tokens": 4357760}
|
|
{"current_steps": 1385, "total_steps": 15621, "loss": 0.6269, "lr": 1.7709532949456175e-06, "epoch": 0.08866269765059856, "percentage": 8.87, "elapsed_time": "0:04:41", "remaining_time": "0:48:09", "throughput": 15559.53, "total_tokens": 4373824}
|
|
{"current_steps": 1390, "total_steps": 15621, "loss": 0.4764, "lr": 1.7773512476007676e-06, "epoch": 0.08898277959157544, "percentage": 8.9, "elapsed_time": "0:04:41", "remaining_time": "0:48:04", "throughput": 15577.39, "total_tokens": 4388992}
|
|
{"current_steps": 1395, "total_steps": 15621, "loss": 0.5266, "lr": 1.783749200255918e-06, "epoch": 0.08930286153255233, "percentage": 8.93, "elapsed_time": "0:04:42", "remaining_time": "0:48:00", "throughput": 15594.89, "total_tokens": 4404288}
|
|
{"current_steps": 1400, "total_steps": 15621, "loss": 0.5359, "lr": 1.7901471529110684e-06, "epoch": 0.08962294347352923, "percentage": 8.96, "elapsed_time": "0:04:43", "remaining_time": "0:47:55", "throughput": 15613.53, "total_tokens": 4419840}
|
|
{"current_steps": 1405, "total_steps": 15621, "loss": 0.5118, "lr": 1.7965451055662186e-06, "epoch": 0.08994302541450612, "percentage": 8.99, "elapsed_time": "0:04:43", "remaining_time": "0:47:50", "throughput": 15631.41, "total_tokens": 4435200}
|
|
{"current_steps": 1410, "total_steps": 15621, "loss": 0.61, "lr": 1.8029430582213691e-06, "epoch": 0.090263107355483, "percentage": 9.03, "elapsed_time": "0:04:44", "remaining_time": "0:47:46", "throughput": 15648.54, "total_tokens": 4450368}
|
|
{"current_steps": 1415, "total_steps": 15621, "loss": 0.4899, "lr": 1.8093410108765195e-06, "epoch": 0.09058318929645989, "percentage": 9.06, "elapsed_time": "0:04:45", "remaining_time": "0:47:41", "throughput": 15666.91, "total_tokens": 4466048}
|
|
{"current_steps": 1420, "total_steps": 15621, "loss": 0.6001, "lr": 1.8157389635316697e-06, "epoch": 0.09090327123743679, "percentage": 9.09, "elapsed_time": "0:04:45", "remaining_time": "0:47:37", "throughput": 15685.53, "total_tokens": 4481920}
|
|
{"current_steps": 1425, "total_steps": 15621, "loss": 0.5666, "lr": 1.82213691618682e-06, "epoch": 0.09122335317841368, "percentage": 9.12, "elapsed_time": "0:04:46", "remaining_time": "0:47:33", "throughput": 15704.84, "total_tokens": 4498112}
|
|
{"current_steps": 1430, "total_steps": 15621, "loss": 0.4314, "lr": 1.8285348688419704e-06, "epoch": 0.09154343511939056, "percentage": 9.15, "elapsed_time": "0:04:47", "remaining_time": "0:47:29", "throughput": 15726.24, "total_tokens": 4515648}
|
|
{"current_steps": 1435, "total_steps": 15621, "loss": 0.5809, "lr": 1.8349328214971208e-06, "epoch": 0.09186351706036745, "percentage": 9.19, "elapsed_time": "0:04:47", "remaining_time": "0:47:25", "throughput": 15744.98, "total_tokens": 4531840}
|
|
{"current_steps": 1440, "total_steps": 15621, "loss": 0.5463, "lr": 1.8413307741522712e-06, "epoch": 0.09218359900134435, "percentage": 9.22, "elapsed_time": "0:04:48", "remaining_time": "0:47:21", "throughput": 15762.7, "total_tokens": 4547456}
|
|
{"current_steps": 1445, "total_steps": 15621, "loss": 0.5548, "lr": 1.8477287268074216e-06, "epoch": 0.09250368094232124, "percentage": 9.25, "elapsed_time": "0:04:49", "remaining_time": "0:47:16", "throughput": 15780.93, "total_tokens": 4563328}
|
|
{"current_steps": 1450, "total_steps": 15621, "loss": 0.5856, "lr": 1.8541266794625718e-06, "epoch": 0.09282376288329812, "percentage": 9.28, "elapsed_time": "0:04:49", "remaining_time": "0:47:12", "throughput": 15798.94, "total_tokens": 4579392}
|
|
{"current_steps": 1455, "total_steps": 15621, "loss": 0.6159, "lr": 1.8605246321177221e-06, "epoch": 0.09314384482427501, "percentage": 9.31, "elapsed_time": "0:04:50", "remaining_time": "0:47:08", "throughput": 15817.43, "total_tokens": 4595584}
|
|
{"current_steps": 1460, "total_steps": 15621, "loss": 0.5707, "lr": 1.8669225847728727e-06, "epoch": 0.0934639267652519, "percentage": 9.35, "elapsed_time": "0:04:51", "remaining_time": "0:47:04", "throughput": 15832.4, "total_tokens": 4610112}
|
|
{"current_steps": 1465, "total_steps": 15621, "loss": 0.7073, "lr": 1.8733205374280229e-06, "epoch": 0.0937840087062288, "percentage": 9.38, "elapsed_time": "0:04:51", "remaining_time": "0:47:00", "throughput": 15851.09, "total_tokens": 4626432}
|
|
{"current_steps": 1470, "total_steps": 15621, "loss": 0.5799, "lr": 1.8797184900831733e-06, "epoch": 0.09410409064720568, "percentage": 9.41, "elapsed_time": "0:04:52", "remaining_time": "0:46:56", "throughput": 15867.81, "total_tokens": 4641792}
|
|
{"current_steps": 1475, "total_steps": 15621, "loss": 0.4144, "lr": 1.8861164427383236e-06, "epoch": 0.09442417258818257, "percentage": 9.44, "elapsed_time": "0:04:53", "remaining_time": "0:46:51", "throughput": 15883.78, "total_tokens": 4656896}
|
|
{"current_steps": 1480, "total_steps": 15621, "loss": 0.6021, "lr": 1.8925143953934738e-06, "epoch": 0.09474425452915947, "percentage": 9.47, "elapsed_time": "0:04:53", "remaining_time": "0:46:48", "throughput": 15901.99, "total_tokens": 4673472}
|
|
{"current_steps": 1485, "total_steps": 15621, "loss": 0.4252, "lr": 1.8989123480486244e-06, "epoch": 0.09506433647013636, "percentage": 9.51, "elapsed_time": "0:04:54", "remaining_time": "0:46:43", "throughput": 15918.41, "total_tokens": 4688896}
|
|
{"current_steps": 1490, "total_steps": 15621, "loss": 0.6809, "lr": 1.9053103007037748e-06, "epoch": 0.09538441841111324, "percentage": 9.54, "elapsed_time": "0:04:55", "remaining_time": "0:46:39", "throughput": 15935.16, "total_tokens": 4704576}
|
|
{"current_steps": 1495, "total_steps": 15621, "loss": 0.6032, "lr": 1.911708253358925e-06, "epoch": 0.09570450035209013, "percentage": 9.57, "elapsed_time": "0:04:55", "remaining_time": "0:46:35", "throughput": 15949.94, "total_tokens": 4719040}
|
|
{"current_steps": 1500, "total_steps": 15621, "loss": 0.4781, "lr": 1.9181062060140753e-06, "epoch": 0.09602458229306703, "percentage": 9.6, "elapsed_time": "0:04:56", "remaining_time": "0:46:31", "throughput": 15965.19, "total_tokens": 4733696}
|
|
{"current_steps": 1505, "total_steps": 15621, "loss": 0.6128, "lr": 1.9245041586692255e-06, "epoch": 0.09634466423404392, "percentage": 9.63, "elapsed_time": "0:04:57", "remaining_time": "0:46:27", "throughput": 15981.46, "total_tokens": 4748992}
|
|
{"current_steps": 1510, "total_steps": 15621, "loss": 0.6961, "lr": 1.930902111324376e-06, "epoch": 0.0966647461750208, "percentage": 9.67, "elapsed_time": "0:04:57", "remaining_time": "0:46:23", "throughput": 15998.8, "total_tokens": 4764992}
|
|
{"current_steps": 1515, "total_steps": 15621, "loss": 0.6847, "lr": 1.9373000639795267e-06, "epoch": 0.09698482811599769, "percentage": 9.7, "elapsed_time": "0:04:58", "remaining_time": "0:46:19", "throughput": 16014.73, "total_tokens": 4780352}
|
|
{"current_steps": 1520, "total_steps": 15621, "loss": 0.5486, "lr": 1.943698016634677e-06, "epoch": 0.09730491005697459, "percentage": 9.73, "elapsed_time": "0:04:59", "remaining_time": "0:46:15", "throughput": 16031.23, "total_tokens": 4796224}
|
|
{"current_steps": 1525, "total_steps": 15621, "loss": 0.5423, "lr": 1.950095969289827e-06, "epoch": 0.09762499199795148, "percentage": 9.76, "elapsed_time": "0:04:59", "remaining_time": "0:46:11", "throughput": 16047.29, "total_tokens": 4811840}
|
|
{"current_steps": 1530, "total_steps": 15621, "loss": 0.5833, "lr": 1.9564939219449776e-06, "epoch": 0.09794507393892836, "percentage": 9.79, "elapsed_time": "0:05:00", "remaining_time": "0:46:07", "throughput": 16061.66, "total_tokens": 4826432}
|
|
{"current_steps": 1535, "total_steps": 15621, "loss": 0.3899, "lr": 1.9628918746001278e-06, "epoch": 0.09826515587990525, "percentage": 9.83, "elapsed_time": "0:05:01", "remaining_time": "0:46:03", "throughput": 16077.67, "total_tokens": 4841920}
|
|
{"current_steps": 1540, "total_steps": 15621, "loss": 0.5976, "lr": 1.9692898272552783e-06, "epoch": 0.09858523782088215, "percentage": 9.86, "elapsed_time": "0:05:01", "remaining_time": "0:45:59", "throughput": 16093.97, "total_tokens": 4857536}
|
|
{"current_steps": 1545, "total_steps": 15621, "loss": 0.6862, "lr": 1.9756877799104285e-06, "epoch": 0.09890531976185904, "percentage": 9.89, "elapsed_time": "0:05:02", "remaining_time": "0:45:55", "throughput": 16110.53, "total_tokens": 4873408}
|
|
{"current_steps": 1550, "total_steps": 15621, "loss": 0.5992, "lr": 1.9820857325655787e-06, "epoch": 0.09922540170283592, "percentage": 9.92, "elapsed_time": "0:05:03", "remaining_time": "0:45:52", "throughput": 16127.39, "total_tokens": 4889536}
|
|
{"current_steps": 1555, "total_steps": 15621, "loss": 0.6222, "lr": 1.9884836852207293e-06, "epoch": 0.09954548364381281, "percentage": 9.95, "elapsed_time": "0:05:03", "remaining_time": "0:45:48", "throughput": 16142.07, "total_tokens": 4904448}
|
|
{"current_steps": 1560, "total_steps": 15621, "loss": 0.6538, "lr": 1.99488163787588e-06, "epoch": 0.0998655655847897, "percentage": 9.99, "elapsed_time": "0:05:04", "remaining_time": "0:45:44", "throughput": 16156.87, "total_tokens": 4919616}
|
|
{"current_steps": 1564, "total_steps": 15621, "eval_loss": 0.5419119000434875, "epoch": 0.10012163113757122, "percentage": 10.01, "elapsed_time": "0:05:55", "remaining_time": "0:53:16", "throughput": 13867.35, "total_tokens": 4931328}
|
|
{"current_steps": 1565, "total_steps": 15621, "loss": 0.5142, "lr": 1.9999999750297625e-06, "epoch": 0.1001856475257666, "percentage": 10.02, "elapsed_time": "0:06:35", "remaining_time": "0:59:15", "throughput": 12465.62, "total_tokens": 4934144}
|
|
{"current_steps": 1570, "total_steps": 15621, "loss": 0.5243, "lr": 1.9999991010715873e-06, "epoch": 0.1005057294667435, "percentage": 10.05, "elapsed_time": "0:06:36", "remaining_time": "0:59:08", "throughput": 12484.85, "total_tokens": 4950272}
|
|
{"current_steps": 1575, "total_steps": 15621, "loss": 0.5354, "lr": 1.999996978602793e-06, "epoch": 0.10082581140772037, "percentage": 10.08, "elapsed_time": "0:06:37", "remaining_time": "0:59:01", "throughput": 12501.82, "total_tokens": 4965056}
|
|
{"current_steps": 1580, "total_steps": 15621, "loss": 0.5617, "lr": 1.99999360762603e-06, "epoch": 0.10114589334869727, "percentage": 10.11, "elapsed_time": "0:06:37", "remaining_time": "0:58:55", "throughput": 12519.02, "total_tokens": 4980160}
|
|
{"current_steps": 1585, "total_steps": 15621, "loss": 0.4574, "lr": 1.9999889881455065e-06, "epoch": 0.10146597528967416, "percentage": 10.15, "elapsed_time": "0:06:38", "remaining_time": "0:58:49", "throughput": 12538.98, "total_tokens": 4996992}
|
|
{"current_steps": 1590, "total_steps": 15621, "loss": 0.5212, "lr": 1.9999831201669897e-06, "epoch": 0.10178605723065105, "percentage": 10.18, "elapsed_time": "0:06:39", "remaining_time": "0:58:42", "throughput": 12557.24, "total_tokens": 5012608}
|
|
{"current_steps": 1595, "total_steps": 15621, "loss": 0.4917, "lr": 1.9999760036978067e-06, "epoch": 0.10210613917162793, "percentage": 10.21, "elapsed_time": "0:06:39", "remaining_time": "0:58:36", "throughput": 12574.66, "total_tokens": 5027840}
|
|
{"current_steps": 1600, "total_steps": 15621, "loss": 0.5698, "lr": 1.9999676387468417e-06, "epoch": 0.10242622111260483, "percentage": 10.24, "elapsed_time": "0:06:40", "remaining_time": "0:58:29", "throughput": 12591.65, "total_tokens": 5042752}
|
|
{"current_steps": 1605, "total_steps": 15621, "loss": 0.5443, "lr": 1.999958025324539e-06, "epoch": 0.10274630305358172, "percentage": 10.27, "elapsed_time": "0:06:41", "remaining_time": "0:58:23", "throughput": 12610.02, "total_tokens": 5058624}
|
|
{"current_steps": 1610, "total_steps": 15621, "loss": 0.6261, "lr": 1.999947163442901e-06, "epoch": 0.10306638499455861, "percentage": 10.31, "elapsed_time": "0:06:41", "remaining_time": "0:58:17", "throughput": 12629.24, "total_tokens": 5075008}
|
|
{"current_steps": 1615, "total_steps": 15621, "loss": 0.5363, "lr": 1.9999350531154884e-06, "epoch": 0.10338646693553549, "percentage": 10.34, "elapsed_time": "0:06:42", "remaining_time": "0:58:10", "throughput": 12647.49, "total_tokens": 5090880}
|
|
{"current_steps": 1620, "total_steps": 15621, "loss": 0.5701, "lr": 1.9999216943574223e-06, "epoch": 0.10370654887651239, "percentage": 10.37, "elapsed_time": "0:06:43", "remaining_time": "0:58:04", "throughput": 12665.56, "total_tokens": 5106816}
|
|
{"current_steps": 1625, "total_steps": 15621, "loss": 0.463, "lr": 1.9999070871853796e-06, "epoch": 0.10402663081748928, "percentage": 10.4, "elapsed_time": "0:06:43", "remaining_time": "0:57:58", "throughput": 12685.49, "total_tokens": 5123904}
|
|
{"current_steps": 1630, "total_steps": 15621, "loss": 0.4856, "lr": 1.9998912316175986e-06, "epoch": 0.10434671275846617, "percentage": 10.43, "elapsed_time": "0:06:44", "remaining_time": "0:57:52", "throughput": 12704.11, "total_tokens": 5140160}
|
|
{"current_steps": 1635, "total_steps": 15621, "loss": 0.5123, "lr": 1.9998741276738752e-06, "epoch": 0.10466679469944305, "percentage": 10.47, "elapsed_time": "0:06:45", "remaining_time": "0:57:46", "throughput": 12722.2, "total_tokens": 5156288}
|
|
{"current_steps": 1640, "total_steps": 15621, "loss": 0.5907, "lr": 1.999855775375563e-06, "epoch": 0.10498687664041995, "percentage": 10.5, "elapsed_time": "0:06:45", "remaining_time": "0:57:40", "throughput": 12739.58, "total_tokens": 5171776}
|
|
{"current_steps": 1645, "total_steps": 15621, "loss": 0.6812, "lr": 1.999836174745576e-06, "epoch": 0.10530695858139684, "percentage": 10.53, "elapsed_time": "0:06:46", "remaining_time": "0:57:35", "throughput": 12760.16, "total_tokens": 5189504}
|
|
{"current_steps": 1650, "total_steps": 15621, "loss": 0.5825, "lr": 1.9998153258083853e-06, "epoch": 0.10562704052237373, "percentage": 10.56, "elapsed_time": "0:06:47", "remaining_time": "0:57:29", "throughput": 12777.59, "total_tokens": 5205056}
|
|
{"current_steps": 1655, "total_steps": 15621, "loss": 0.5911, "lr": 1.9997932285900214e-06, "epoch": 0.10594712246335061, "percentage": 10.59, "elapsed_time": "0:06:48", "remaining_time": "0:57:23", "throughput": 12798.0, "total_tokens": 5222656}
|
|
{"current_steps": 1660, "total_steps": 15621, "loss": 0.6352, "lr": 1.9997698831180726e-06, "epoch": 0.1062672044043275, "percentage": 10.63, "elapsed_time": "0:06:48", "remaining_time": "0:57:17", "throughput": 12816.04, "total_tokens": 5238848}
|
|
{"current_steps": 1665, "total_steps": 15621, "loss": 0.5226, "lr": 1.999745289421686e-06, "epoch": 0.1065872863453044, "percentage": 10.66, "elapsed_time": "0:06:49", "remaining_time": "0:57:12", "throughput": 12834.64, "total_tokens": 5255296}
|
|
{"current_steps": 1670, "total_steps": 15621, "loss": 0.7595, "lr": 1.9997194475315674e-06, "epoch": 0.10690736828628129, "percentage": 10.69, "elapsed_time": "0:06:50", "remaining_time": "0:57:06", "throughput": 12850.97, "total_tokens": 5270336}
|
|
{"current_steps": 1675, "total_steps": 15621, "loss": 0.4864, "lr": 1.9996923574799808e-06, "epoch": 0.10722745022725817, "percentage": 10.72, "elapsed_time": "0:06:50", "remaining_time": "0:57:00", "throughput": 12869.32, "total_tokens": 5286720}
|
|
{"current_steps": 1680, "total_steps": 15621, "loss": 0.6553, "lr": 1.9996640193007476e-06, "epoch": 0.10754753216823507, "percentage": 10.75, "elapsed_time": "0:06:51", "remaining_time": "0:56:54", "throughput": 12885.32, "total_tokens": 5301632}
|
|
{"current_steps": 1685, "total_steps": 15621, "loss": 0.402, "lr": 1.9996344330292495e-06, "epoch": 0.10786761410921196, "percentage": 10.79, "elapsed_time": "0:06:52", "remaining_time": "0:56:48", "throughput": 12901.27, "total_tokens": 5316544}
|
|
{"current_steps": 1690, "total_steps": 15621, "loss": 0.5449, "lr": 1.9996035987024245e-06, "epoch": 0.10818769605018885, "percentage": 10.82, "elapsed_time": "0:06:52", "remaining_time": "0:56:42", "throughput": 12918.56, "total_tokens": 5332544}
|
|
{"current_steps": 1695, "total_steps": 15621, "loss": 0.5498, "lr": 1.99957151635877e-06, "epoch": 0.10850777799116573, "percentage": 10.85, "elapsed_time": "0:06:53", "remaining_time": "0:56:36", "throughput": 12935.17, "total_tokens": 5348096}
|
|
{"current_steps": 1700, "total_steps": 15621, "loss": 0.6298, "lr": 1.999538186038341e-06, "epoch": 0.10882785993214263, "percentage": 10.88, "elapsed_time": "0:06:54", "remaining_time": "0:56:30", "throughput": 12949.89, "total_tokens": 5362368}
|
|
{"current_steps": 1705, "total_steps": 15621, "loss": 0.5357, "lr": 1.999503607782751e-06, "epoch": 0.10914794187311952, "percentage": 10.91, "elapsed_time": "0:06:54", "remaining_time": "0:56:25", "throughput": 12966.93, "total_tokens": 5378176}
|
|
{"current_steps": 1710, "total_steps": 15621, "loss": 0.5219, "lr": 1.999467781635171e-06, "epoch": 0.10946802381409641, "percentage": 10.95, "elapsed_time": "0:06:55", "remaining_time": "0:56:19", "throughput": 12985.05, "total_tokens": 5394752}
|
|
{"current_steps": 1715, "total_steps": 15621, "loss": 0.7002, "lr": 1.9994307076403306e-06, "epoch": 0.10978810575507329, "percentage": 10.98, "elapsed_time": "0:06:56", "remaining_time": "0:56:14", "throughput": 13004.43, "total_tokens": 5412160}
|
|
{"current_steps": 1720, "total_steps": 15621, "loss": 0.5297, "lr": 1.999392385844517e-06, "epoch": 0.11010818769605019, "percentage": 11.01, "elapsed_time": "0:06:56", "remaining_time": "0:56:08", "throughput": 13021.08, "total_tokens": 5427840}
|
|
{"current_steps": 1725, "total_steps": 15621, "loss": 0.4006, "lr": 1.9993528162955753e-06, "epoch": 0.11042826963702708, "percentage": 11.04, "elapsed_time": "0:06:57", "remaining_time": "0:56:03", "throughput": 13038.74, "total_tokens": 5444224}
|
|
{"current_steps": 1730, "total_steps": 15621, "loss": 0.5775, "lr": 1.9993119990429095e-06, "epoch": 0.11074835157800397, "percentage": 11.07, "elapsed_time": "0:06:58", "remaining_time": "0:55:57", "throughput": 13054.87, "total_tokens": 5459648}
|
|
{"current_steps": 1735, "total_steps": 15621, "loss": 0.7845, "lr": 1.9992699341374794e-06, "epoch": 0.11106843351898085, "percentage": 11.11, "elapsed_time": "0:06:58", "remaining_time": "0:55:52", "throughput": 13070.78, "total_tokens": 5475008}
|
|
{"current_steps": 1740, "total_steps": 15621, "loss": 0.533, "lr": 1.9992266216318033e-06, "epoch": 0.11138851545995775, "percentage": 11.14, "elapsed_time": "0:06:59", "remaining_time": "0:55:47", "throughput": 13088.17, "total_tokens": 5491456}
|
|
{"current_steps": 1745, "total_steps": 15621, "loss": 0.5745, "lr": 1.9991820615799583e-06, "epoch": 0.11170859740093464, "percentage": 11.17, "elapsed_time": "0:07:00", "remaining_time": "0:55:41", "throughput": 13105.06, "total_tokens": 5507520}
|
|
{"current_steps": 1750, "total_steps": 15621, "loss": 0.6964, "lr": 1.999136254037578e-06, "epoch": 0.11202867934191153, "percentage": 11.2, "elapsed_time": "0:07:00", "remaining_time": "0:55:36", "throughput": 13121.14, "total_tokens": 5523072}
|
|
{"current_steps": 1755, "total_steps": 15621, "loss": 0.5134, "lr": 1.999089199061853e-06, "epoch": 0.11234876128288843, "percentage": 11.23, "elapsed_time": "0:07:01", "remaining_time": "0:55:30", "throughput": 13136.64, "total_tokens": 5538304}
|
|
{"current_steps": 1760, "total_steps": 15621, "loss": 0.4639, "lr": 1.9990408967115326e-06, "epoch": 0.1126688432238653, "percentage": 11.27, "elapsed_time": "0:07:02", "remaining_time": "0:55:25", "throughput": 13152.79, "total_tokens": 5553920}
|
|
{"current_steps": 1765, "total_steps": 15621, "loss": 0.4624, "lr": 1.998991347046922e-06, "epoch": 0.1129889251648422, "percentage": 11.3, "elapsed_time": "0:07:02", "remaining_time": "0:55:20", "throughput": 13168.57, "total_tokens": 5569344}
|
|
{"current_steps": 1770, "total_steps": 15621, "loss": 0.5057, "lr": 1.9989405501298857e-06, "epoch": 0.11330900710581909, "percentage": 11.33, "elapsed_time": "0:07:03", "remaining_time": "0:55:14", "throughput": 13186.11, "total_tokens": 5585856}
|
|
{"current_steps": 1775, "total_steps": 15621, "loss": 0.5777, "lr": 1.9988885060238436e-06, "epoch": 0.11362908904679599, "percentage": 11.36, "elapsed_time": "0:07:04", "remaining_time": "0:55:10", "throughput": 13205.54, "total_tokens": 5603840}
|
|
{"current_steps": 1780, "total_steps": 15621, "loss": 0.5185, "lr": 1.9988352147937735e-06, "epoch": 0.11394917098777287, "percentage": 11.39, "elapsed_time": "0:07:05", "remaining_time": "0:55:05", "throughput": 13222.59, "total_tokens": 5620352}
|
|
{"current_steps": 1785, "total_steps": 15621, "loss": 0.5548, "lr": 1.99878067650621e-06, "epoch": 0.11426925292874976, "percentage": 11.43, "elapsed_time": "0:07:05", "remaining_time": "0:55:00", "throughput": 13238.94, "total_tokens": 5636544}
|
|
{"current_steps": 1790, "total_steps": 15621, "loss": 0.5416, "lr": 1.998724891229245e-06, "epoch": 0.11458933486972665, "percentage": 11.46, "elapsed_time": "0:07:06", "remaining_time": "0:54:55", "throughput": 13255.36, "total_tokens": 5652672}
|
|
{"current_steps": 1795, "total_steps": 15621, "loss": 0.5025, "lr": 1.998667859032527e-06, "epoch": 0.11490941681070355, "percentage": 11.49, "elapsed_time": "0:07:07", "remaining_time": "0:54:49", "throughput": 13270.88, "total_tokens": 5668224}
|
|
{"current_steps": 1800, "total_steps": 15621, "loss": 0.4544, "lr": 1.9986095799872613e-06, "epoch": 0.11522949875168043, "percentage": 11.52, "elapsed_time": "0:07:07", "remaining_time": "0:54:44", "throughput": 13287.61, "total_tokens": 5684480}
|
|
{"current_steps": 1805, "total_steps": 15621, "loss": 0.4475, "lr": 1.99855005416621e-06, "epoch": 0.11554958069265732, "percentage": 11.55, "elapsed_time": "0:07:08", "remaining_time": "0:54:39", "throughput": 13304.24, "total_tokens": 5700864}
|
|
{"current_steps": 1810, "total_steps": 15621, "loss": 0.6003, "lr": 1.998489281643692e-06, "epoch": 0.11586966263363421, "percentage": 11.59, "elapsed_time": "0:07:09", "remaining_time": "0:54:34", "throughput": 13319.54, "total_tokens": 5716224}
|
|
{"current_steps": 1815, "total_steps": 15621, "loss": 0.4876, "lr": 1.998427262495582e-06, "epoch": 0.1161897445746111, "percentage": 11.62, "elapsed_time": "0:07:09", "remaining_time": "0:54:29", "throughput": 13336.75, "total_tokens": 5733056}
|
|
{"current_steps": 1820, "total_steps": 15621, "loss": 0.6507, "lr": 1.9983639967993124e-06, "epoch": 0.11650982651558799, "percentage": 11.65, "elapsed_time": "0:07:10", "remaining_time": "0:54:24", "throughput": 13352.83, "total_tokens": 5749120}
|
|
{"current_steps": 1825, "total_steps": 15621, "loss": 0.7451, "lr": 1.99829948463387e-06, "epoch": 0.11682990845656488, "percentage": 11.68, "elapsed_time": "0:07:11", "remaining_time": "0:54:19", "throughput": 13367.2, "total_tokens": 5763968}
|
|
{"current_steps": 1830, "total_steps": 15621, "loss": 0.5556, "lr": 1.9982337260798e-06, "epoch": 0.11714999039754177, "percentage": 11.71, "elapsed_time": "0:07:11", "remaining_time": "0:54:14", "throughput": 13382.47, "total_tokens": 5779520}
|
|
{"current_steps": 1835, "total_steps": 15621, "loss": 0.5874, "lr": 1.998166721219203e-06, "epoch": 0.11747007233851867, "percentage": 11.75, "elapsed_time": "0:07:12", "remaining_time": "0:54:10", "throughput": 13402.58, "total_tokens": 5798848}
|
|
{"current_steps": 1840, "total_steps": 15621, "loss": 0.5069, "lr": 1.9980984701357338e-06, "epoch": 0.11779015427949555, "percentage": 11.78, "elapsed_time": "0:07:13", "remaining_time": "0:54:05", "throughput": 13417.15, "total_tokens": 5813952}
|
|
{"current_steps": 1845, "total_steps": 15621, "loss": 0.4306, "lr": 1.998028972914606e-06, "epoch": 0.11811023622047244, "percentage": 11.81, "elapsed_time": "0:07:14", "remaining_time": "0:54:00", "throughput": 13432.89, "total_tokens": 5830016}
|
|
{"current_steps": 1850, "total_steps": 15621, "loss": 0.5965, "lr": 1.9979582296425877e-06, "epoch": 0.11843031816144933, "percentage": 11.84, "elapsed_time": "0:07:14", "remaining_time": "0:53:55", "throughput": 13447.5, "total_tokens": 5845312}
|
|
{"current_steps": 1855, "total_steps": 15621, "loss": 0.5894, "lr": 1.9978862404080022e-06, "epoch": 0.11875040010242623, "percentage": 11.88, "elapsed_time": "0:07:15", "remaining_time": "0:53:50", "throughput": 13462.16, "total_tokens": 5860672}
|
|
{"current_steps": 1860, "total_steps": 15621, "loss": 0.5369, "lr": 1.9978130053007295e-06, "epoch": 0.1190704820434031, "percentage": 11.91, "elapsed_time": "0:07:16", "remaining_time": "0:53:45", "throughput": 13476.35, "total_tokens": 5875776}
|
|
{"current_steps": 1865, "total_steps": 15621, "loss": 0.4361, "lr": 1.9977385244122034e-06, "epoch": 0.11939056398438, "percentage": 11.94, "elapsed_time": "0:07:16", "remaining_time": "0:53:40", "throughput": 13491.18, "total_tokens": 5891200}
|
|
{"current_steps": 1870, "total_steps": 15621, "loss": 0.4922, "lr": 1.997662797835415e-06, "epoch": 0.11971064592535689, "percentage": 11.97, "elapsed_time": "0:07:17", "remaining_time": "0:53:36", "throughput": 13506.27, "total_tokens": 5907008}
|
|
{"current_steps": 1875, "total_steps": 15621, "loss": 0.4561, "lr": 1.9975858256649097e-06, "epoch": 0.12003072786633379, "percentage": 12.0, "elapsed_time": "0:07:18", "remaining_time": "0:53:31", "throughput": 13521.79, "total_tokens": 5923264}
|
|
{"current_steps": 1880, "total_steps": 15621, "loss": 0.4952, "lr": 1.997507607996788e-06, "epoch": 0.12035080980731067, "percentage": 12.04, "elapsed_time": "0:07:18", "remaining_time": "0:53:26", "throughput": 13537.73, "total_tokens": 5939648}
|
|
{"current_steps": 1885, "total_steps": 15621, "loss": 0.4576, "lr": 1.997428144928706e-06, "epoch": 0.12067089174828756, "percentage": 12.07, "elapsed_time": "0:07:19", "remaining_time": "0:53:22", "throughput": 13553.12, "total_tokens": 5955520}
|
|
{"current_steps": 1890, "total_steps": 15621, "loss": 0.5277, "lr": 1.9973474365598736e-06, "epoch": 0.12099097368926445, "percentage": 12.1, "elapsed_time": "0:07:20", "remaining_time": "0:53:17", "throughput": 13567.77, "total_tokens": 5971072}
|
|
{"current_steps": 1895, "total_steps": 15621, "loss": 0.5794, "lr": 1.9972654829910568e-06, "epoch": 0.12131105563024135, "percentage": 12.13, "elapsed_time": "0:07:20", "remaining_time": "0:53:12", "throughput": 13583.17, "total_tokens": 5987264}
|
|
{"current_steps": 1900, "total_steps": 15621, "loss": 0.6246, "lr": 1.9971822843245748e-06, "epoch": 0.12163113757121823, "percentage": 12.16, "elapsed_time": "0:07:21", "remaining_time": "0:53:08", "throughput": 13597.92, "total_tokens": 6002880}
|
|
{"current_steps": 1905, "total_steps": 15621, "loss": 0.5281, "lr": 1.997097840664303e-06, "epoch": 0.12195121951219512, "percentage": 12.2, "elapsed_time": "0:07:22", "remaining_time": "0:53:03", "throughput": 13614.04, "total_tokens": 6019520}
|
|
{"current_steps": 1910, "total_steps": 15621, "loss": 0.5722, "lr": 1.99701215211567e-06, "epoch": 0.12227130145317201, "percentage": 12.23, "elapsed_time": "0:07:22", "remaining_time": "0:52:59", "throughput": 13629.72, "total_tokens": 6035904}
|
|
{"current_steps": 1915, "total_steps": 15621, "loss": 0.6162, "lr": 1.9969252187856587e-06, "epoch": 0.1225913833941489, "percentage": 12.26, "elapsed_time": "0:07:23", "remaining_time": "0:52:54", "throughput": 13643.38, "total_tokens": 6050816}
|
|
{"current_steps": 1920, "total_steps": 15621, "loss": 0.414, "lr": 1.9968370407828065e-06, "epoch": 0.12291146533512579, "percentage": 12.29, "elapsed_time": "0:07:24", "remaining_time": "0:52:49", "throughput": 13657.36, "total_tokens": 6065920}
|
|
{"current_steps": 1925, "total_steps": 15621, "loss": 0.5995, "lr": 1.996747618217205e-06, "epoch": 0.12323154727610268, "percentage": 12.32, "elapsed_time": "0:07:24", "remaining_time": "0:52:44", "throughput": 13672.05, "total_tokens": 6081728}
|
|
{"current_steps": 1930, "total_steps": 15621, "loss": 0.492, "lr": 1.9966569512004987e-06, "epoch": 0.12355162921707957, "percentage": 12.36, "elapsed_time": "0:07:25", "remaining_time": "0:52:40", "throughput": 13686.9, "total_tokens": 6097472}
|
|
{"current_steps": 1935, "total_steps": 15621, "loss": 0.5079, "lr": 1.996565039845887e-06, "epoch": 0.12387171115805647, "percentage": 12.39, "elapsed_time": "0:07:26", "remaining_time": "0:52:35", "throughput": 13701.37, "total_tokens": 6113152}
|
|
{"current_steps": 1940, "total_steps": 15621, "loss": 0.6364, "lr": 1.996471884268122e-06, "epoch": 0.12419179309903335, "percentage": 12.42, "elapsed_time": "0:07:26", "remaining_time": "0:52:31", "throughput": 13716.79, "total_tokens": 6129408}
|
|
{"current_steps": 1945, "total_steps": 15621, "loss": 0.5506, "lr": 1.9963774845835097e-06, "epoch": 0.12451187504001024, "percentage": 12.45, "elapsed_time": "0:07:27", "remaining_time": "0:52:26", "throughput": 13731.17, "total_tokens": 6144896}
|
|
{"current_steps": 1950, "total_steps": 15621, "loss": 0.5895, "lr": 1.996281840909909e-06, "epoch": 0.12483195698098713, "percentage": 12.48, "elapsed_time": "0:07:28", "remaining_time": "0:52:22", "throughput": 13745.12, "total_tokens": 6160256}
|
|
{"current_steps": 1955, "total_steps": 15621, "loss": 0.6389, "lr": 1.9961849533667322e-06, "epoch": 0.12515203892196403, "percentage": 12.52, "elapsed_time": "0:07:28", "remaining_time": "0:52:17", "throughput": 13758.52, "total_tokens": 6175104}
|
|
{"current_steps": 1960, "total_steps": 15621, "loss": 0.5267, "lr": 1.9960868220749447e-06, "epoch": 0.1254721208629409, "percentage": 12.55, "elapsed_time": "0:07:29", "remaining_time": "0:52:12", "throughput": 13772.08, "total_tokens": 6190272}
|
|
{"current_steps": 1965, "total_steps": 15621, "loss": 0.5836, "lr": 1.9959874471570644e-06, "epoch": 0.1257922028039178, "percentage": 12.58, "elapsed_time": "0:07:30", "remaining_time": "0:52:08", "throughput": 13786.23, "total_tokens": 6205952}
|
|
{"current_steps": 1970, "total_steps": 15621, "loss": 0.5619, "lr": 1.9958868287371625e-06, "epoch": 0.1261122847448947, "percentage": 12.61, "elapsed_time": "0:07:30", "remaining_time": "0:52:04", "throughput": 13801.81, "total_tokens": 6222592}
|
|
{"current_steps": 1975, "total_steps": 15621, "loss": 0.4804, "lr": 1.9957849669408617e-06, "epoch": 0.12643236668587157, "percentage": 12.64, "elapsed_time": "0:07:31", "remaining_time": "0:51:59", "throughput": 13815.34, "total_tokens": 6237696}
|
|
{"current_steps": 1980, "total_steps": 15621, "loss": 0.4947, "lr": 1.995681861895338e-06, "epoch": 0.12675244862684848, "percentage": 12.68, "elapsed_time": "0:07:32", "remaining_time": "0:51:55", "throughput": 13830.52, "total_tokens": 6254080}
|
|
{"current_steps": 1985, "total_steps": 15621, "loss": 0.5828, "lr": 1.9955775137293187e-06, "epoch": 0.12707253056782536, "percentage": 12.71, "elapsed_time": "0:07:32", "remaining_time": "0:51:51", "throughput": 13844.83, "total_tokens": 6270016}
|
|
{"current_steps": 1990, "total_steps": 15621, "loss": 0.6161, "lr": 1.9954719225730845e-06, "epoch": 0.12739261250880227, "percentage": 12.74, "elapsed_time": "0:07:33", "remaining_time": "0:51:46", "throughput": 13858.37, "total_tokens": 6285184}
|
|
{"current_steps": 1995, "total_steps": 15621, "loss": 0.4833, "lr": 1.9953650885584666e-06, "epoch": 0.12771269444977915, "percentage": 12.77, "elapsed_time": "0:07:34", "remaining_time": "0:51:42", "throughput": 13872.37, "total_tokens": 6300992}
|
|
{"current_steps": 2000, "total_steps": 15621, "loss": 0.5462, "lr": 1.995257011818849e-06, "epoch": 0.12803277639075603, "percentage": 12.8, "elapsed_time": "0:07:34", "remaining_time": "0:51:37", "throughput": 13884.58, "total_tokens": 6315392}
|
|
{"current_steps": 2005, "total_steps": 15621, "loss": 0.4676, "lr": 1.9951476924891666e-06, "epoch": 0.12835285833173293, "percentage": 12.84, "elapsed_time": "0:07:35", "remaining_time": "0:51:33", "throughput": 13898.23, "total_tokens": 6331136}
|
|
{"current_steps": 2010, "total_steps": 15621, "loss": 0.5551, "lr": 1.9950371307059056e-06, "epoch": 0.1286729402727098, "percentage": 12.87, "elapsed_time": "0:07:36", "remaining_time": "0:51:29", "throughput": 13912.99, "total_tokens": 6347584}
|
|
{"current_steps": 2015, "total_steps": 15621, "loss": 0.5584, "lr": 1.9949253266071036e-06, "epoch": 0.1289930222136867, "percentage": 12.9, "elapsed_time": "0:07:36", "remaining_time": "0:51:25", "throughput": 13926.0, "total_tokens": 6362560}
|
|
{"current_steps": 2020, "total_steps": 15621, "loss": 0.5131, "lr": 1.9948122803323503e-06, "epoch": 0.1293131041546636, "percentage": 12.93, "elapsed_time": "0:07:37", "remaining_time": "0:51:20", "throughput": 13940.03, "total_tokens": 6378304}
|
|
{"current_steps": 2025, "total_steps": 15621, "loss": 0.5125, "lr": 1.9946979920227844e-06, "epoch": 0.12963318609564048, "percentage": 12.96, "elapsed_time": "0:07:38", "remaining_time": "0:51:16", "throughput": 13953.03, "total_tokens": 6393280}
|
|
{"current_steps": 2030, "total_steps": 15621, "loss": 0.5188, "lr": 1.994582461821096e-06, "epoch": 0.1299532680366174, "percentage": 13.0, "elapsed_time": "0:07:38", "remaining_time": "0:51:12", "throughput": 13967.37, "total_tokens": 6409472}
|
|
{"current_steps": 2035, "total_steps": 15621, "loss": 0.7149, "lr": 1.9944656898715267e-06, "epoch": 0.13027334997759427, "percentage": 13.03, "elapsed_time": "0:07:39", "remaining_time": "0:51:08", "throughput": 13980.84, "total_tokens": 6424960}
|
|
{"current_steps": 2040, "total_steps": 15621, "loss": 0.6082, "lr": 1.994347676319867e-06, "epoch": 0.13059343191857115, "percentage": 13.06, "elapsed_time": "0:07:40", "remaining_time": "0:51:03", "throughput": 13993.7, "total_tokens": 6440000}
|
|
{"current_steps": 2045, "total_steps": 15621, "loss": 0.4607, "lr": 1.994228421313459e-06, "epoch": 0.13091351385954805, "percentage": 13.09, "elapsed_time": "0:07:40", "remaining_time": "0:50:59", "throughput": 14009.81, "total_tokens": 6457600}
|
|
{"current_steps": 2050, "total_steps": 15621, "loss": 0.5187, "lr": 1.994107925001193e-06, "epoch": 0.13123359580052493, "percentage": 13.12, "elapsed_time": "0:07:41", "remaining_time": "0:50:55", "throughput": 14023.17, "total_tokens": 6473088}
|
|
{"current_steps": 2055, "total_steps": 15621, "loss": 0.595, "lr": 1.9939861875335108e-06, "epoch": 0.1315536777415018, "percentage": 13.16, "elapsed_time": "0:07:42", "remaining_time": "0:50:51", "throughput": 14035.26, "total_tokens": 6487680}
|
|
{"current_steps": 2060, "total_steps": 15621, "loss": 0.4909, "lr": 1.9938632090624025e-06, "epoch": 0.13187375968247872, "percentage": 13.19, "elapsed_time": "0:07:42", "remaining_time": "0:50:47", "throughput": 14048.73, "total_tokens": 6503296}
|
|
{"current_steps": 2065, "total_steps": 15621, "loss": 0.5368, "lr": 1.9937389897414087e-06, "epoch": 0.1321938416234556, "percentage": 13.22, "elapsed_time": "0:07:43", "remaining_time": "0:50:43", "throughput": 14062.17, "total_tokens": 6518912}
|
|
{"current_steps": 2070, "total_steps": 15621, "loss": 0.5642, "lr": 1.993613529725618e-06, "epoch": 0.1325139235644325, "percentage": 13.25, "elapsed_time": "0:07:44", "remaining_time": "0:50:39", "throughput": 14075.81, "total_tokens": 6534784}
|
|
{"current_steps": 2075, "total_steps": 15621, "loss": 0.5303, "lr": 1.99348682917167e-06, "epoch": 0.13283400550540939, "percentage": 13.28, "elapsed_time": "0:07:44", "remaining_time": "0:50:35", "throughput": 14089.26, "total_tokens": 6550528}
|
|
{"current_steps": 2080, "total_steps": 15621, "loss": 0.5475, "lr": 1.99335888823775e-06, "epoch": 0.13315408744638627, "percentage": 13.32, "elapsed_time": "0:07:45", "remaining_time": "0:50:31", "throughput": 14102.47, "total_tokens": 6566144}
|
|
{"current_steps": 2085, "total_steps": 15621, "loss": 0.5654, "lr": 1.993229707083595e-06, "epoch": 0.13347416938736317, "percentage": 13.35, "elapsed_time": "0:07:46", "remaining_time": "0:50:27", "throughput": 14118.13, "total_tokens": 6583872}
|
|
{"current_steps": 2090, "total_steps": 15621, "loss": 0.4165, "lr": 1.993099285870489e-06, "epoch": 0.13379425132834005, "percentage": 13.38, "elapsed_time": "0:07:47", "remaining_time": "0:50:24", "throughput": 14134.76, "total_tokens": 6602304}
|
|
{"current_steps": 2095, "total_steps": 15621, "loss": 0.462, "lr": 1.992967624761264e-06, "epoch": 0.13411433326931693, "percentage": 13.41, "elapsed_time": "0:07:47", "remaining_time": "0:50:20", "throughput": 14148.17, "total_tokens": 6618112}
|
|
{"current_steps": 2100, "total_steps": 15621, "loss": 0.6239, "lr": 1.9928347239203014e-06, "epoch": 0.13443441521029384, "percentage": 13.44, "elapsed_time": "0:07:48", "remaining_time": "0:50:16", "throughput": 14163.62, "total_tokens": 6635584}
|
|
{"current_steps": 2105, "total_steps": 15621, "loss": 0.5283, "lr": 1.9927005835135282e-06, "epoch": 0.13475449715127072, "percentage": 13.48, "elapsed_time": "0:07:49", "remaining_time": "0:50:12", "throughput": 14179.44, "total_tokens": 6653568}
|
|
{"current_steps": 2110, "total_steps": 15621, "loss": 0.4596, "lr": 1.9925652037084214e-06, "epoch": 0.13507457909224763, "percentage": 13.51, "elapsed_time": "0:07:49", "remaining_time": "0:50:08", "throughput": 14191.98, "total_tokens": 6668864}
|
|
{"current_steps": 2115, "total_steps": 15621, "loss": 0.4838, "lr": 1.9924285846740037e-06, "epoch": 0.1353946610332245, "percentage": 13.54, "elapsed_time": "0:07:50", "remaining_time": "0:50:05", "throughput": 14204.8, "total_tokens": 6684416}
|
|
{"current_steps": 2120, "total_steps": 15621, "loss": 0.5948, "lr": 1.9922907265808452e-06, "epoch": 0.13571474297420139, "percentage": 13.57, "elapsed_time": "0:07:51", "remaining_time": "0:50:00", "throughput": 14217.03, "total_tokens": 6699392}
|
|
{"current_steps": 2125, "total_steps": 15621, "loss": 0.544, "lr": 1.9921516296010643e-06, "epoch": 0.1360348249151783, "percentage": 13.6, "elapsed_time": "0:07:51", "remaining_time": "0:49:56", "throughput": 14229.23, "total_tokens": 6714560}
|
|
{"current_steps": 2130, "total_steps": 15621, "loss": 0.5678, "lr": 1.9920112939083246e-06, "epoch": 0.13635490685615517, "percentage": 13.64, "elapsed_time": "0:07:52", "remaining_time": "0:49:52", "throughput": 14241.97, "total_tokens": 6729920}
|
|
{"current_steps": 2135, "total_steps": 15621, "loss": 0.5607, "lr": 1.9918697196778367e-06, "epoch": 0.13667498879713205, "percentage": 13.67, "elapsed_time": "0:07:53", "remaining_time": "0:49:48", "throughput": 14253.84, "total_tokens": 6744768}
|
|
{"current_steps": 2140, "total_steps": 15621, "loss": 0.4531, "lr": 1.9917269070863578e-06, "epoch": 0.13699507073810896, "percentage": 13.7, "elapsed_time": "0:07:53", "remaining_time": "0:49:44", "throughput": 14265.73, "total_tokens": 6759680}
|
|
{"current_steps": 2145, "total_steps": 15621, "loss": 0.5091, "lr": 1.9915828563121915e-06, "epoch": 0.13731515267908584, "percentage": 13.73, "elapsed_time": "0:07:54", "remaining_time": "0:49:41", "throughput": 14278.48, "total_tokens": 6775168}
|
|
{"current_steps": 2150, "total_steps": 15621, "loss": 0.5144, "lr": 1.9914375675351865e-06, "epoch": 0.13763523462006275, "percentage": 13.76, "elapsed_time": "0:07:55", "remaining_time": "0:49:37", "throughput": 14291.85, "total_tokens": 6791296}
|
|
{"current_steps": 2155, "total_steps": 15621, "loss": 0.4326, "lr": 1.991291040936738e-06, "epoch": 0.13795531656103963, "percentage": 13.8, "elapsed_time": "0:07:55", "remaining_time": "0:49:33", "throughput": 14306.67, "total_tokens": 6808640}
|
|
{"current_steps": 2160, "total_steps": 15621, "loss": 0.6764, "lr": 1.9911432766997857e-06, "epoch": 0.1382753985020165, "percentage": 13.83, "elapsed_time": "0:07:56", "remaining_time": "0:49:29", "throughput": 14319.16, "total_tokens": 6824064}
|
|
{"current_steps": 2165, "total_steps": 15621, "loss": 0.455, "lr": 1.990994275008815e-06, "epoch": 0.1385954804429934, "percentage": 13.86, "elapsed_time": "0:07:57", "remaining_time": "0:49:26", "throughput": 14332.26, "total_tokens": 6839872}
|
|
{"current_steps": 2170, "total_steps": 15621, "loss": 0.515, "lr": 1.9908440360498565e-06, "epoch": 0.1389155623839703, "percentage": 13.89, "elapsed_time": "0:07:57", "remaining_time": "0:49:22", "throughput": 14344.94, "total_tokens": 6855744}
|
|
{"current_steps": 2175, "total_steps": 15621, "loss": 0.5589, "lr": 1.990692560010485e-06, "epoch": 0.1392356443249472, "percentage": 13.92, "elapsed_time": "0:07:58", "remaining_time": "0:49:18", "throughput": 14355.42, "total_tokens": 6869632}
|
|
{"current_steps": 2180, "total_steps": 15621, "loss": 0.4574, "lr": 1.9905398470798206e-06, "epoch": 0.13955572626592408, "percentage": 13.96, "elapsed_time": "0:07:59", "remaining_time": "0:49:14", "throughput": 14368.35, "total_tokens": 6885696}
|
|
{"current_steps": 2185, "total_steps": 15621, "loss": 0.37, "lr": 1.990385897448527e-06, "epoch": 0.13987580820690096, "percentage": 13.99, "elapsed_time": "0:07:59", "remaining_time": "0:49:10", "throughput": 14381.13, "total_tokens": 6901504}
|
|
{"current_steps": 2190, "total_steps": 15621, "loss": 0.5817, "lr": 1.9902307113088114e-06, "epoch": 0.14019589014787787, "percentage": 14.02, "elapsed_time": "0:08:00", "remaining_time": "0:49:07", "throughput": 14392.99, "total_tokens": 6916480}
|
|
{"current_steps": 2195, "total_steps": 15621, "loss": 0.4882, "lr": 1.9900742888544264e-06, "epoch": 0.14051597208885475, "percentage": 14.05, "elapsed_time": "0:08:01", "remaining_time": "0:49:03", "throughput": 14405.71, "total_tokens": 6932416}
|
|
{"current_steps": 2200, "total_steps": 15621, "loss": 0.5338, "lr": 1.989916630280667e-06, "epoch": 0.14083605402983163, "percentage": 14.08, "elapsed_time": "0:08:01", "remaining_time": "0:48:59", "throughput": 14419.29, "total_tokens": 6948992}
|
|
{"current_steps": 2205, "total_steps": 15621, "loss": 0.464, "lr": 1.989757735784372e-06, "epoch": 0.14115613597080853, "percentage": 14.12, "elapsed_time": "0:08:02", "remaining_time": "0:48:56", "throughput": 14431.52, "total_tokens": 6964416}
|
|
{"current_steps": 2210, "total_steps": 15621, "loss": 0.4246, "lr": 1.989597605563923e-06, "epoch": 0.1414762179117854, "percentage": 14.15, "elapsed_time": "0:08:03", "remaining_time": "0:48:52", "throughput": 14444.5, "total_tokens": 6980544}
|
|
{"current_steps": 2215, "total_steps": 15621, "loss": 0.5755, "lr": 1.9894362398192437e-06, "epoch": 0.14179629985276232, "percentage": 14.18, "elapsed_time": "0:08:03", "remaining_time": "0:48:49", "throughput": 14458.28, "total_tokens": 6997440}
|
|
{"current_steps": 2220, "total_steps": 15621, "loss": 0.4218, "lr": 1.9892736387518023e-06, "epoch": 0.1421163817937392, "percentage": 14.21, "elapsed_time": "0:08:04", "remaining_time": "0:48:45", "throughput": 14470.15, "total_tokens": 7012672}
|
|
{"current_steps": 2225, "total_steps": 15621, "loss": 0.4798, "lr": 1.9891098025646075e-06, "epoch": 0.14243646373471608, "percentage": 14.24, "elapsed_time": "0:08:05", "remaining_time": "0:48:41", "throughput": 14481.6, "total_tokens": 7027648}
|
|
{"current_steps": 2230, "total_steps": 15621, "loss": 0.5266, "lr": 1.9889447314622105e-06, "epoch": 0.142756545675693, "percentage": 14.28, "elapsed_time": "0:08:05", "remaining_time": "0:48:38", "throughput": 14493.7, "total_tokens": 7043200}
|
|
{"current_steps": 2235, "total_steps": 15621, "loss": 0.7416, "lr": 1.9887784256507046e-06, "epoch": 0.14307662761666987, "percentage": 14.31, "elapsed_time": "0:08:06", "remaining_time": "0:48:34", "throughput": 14505.51, "total_tokens": 7058688}
|
|
{"current_steps": 2240, "total_steps": 15621, "loss": 0.6734, "lr": 1.988610885337725e-06, "epoch": 0.14339670955764675, "percentage": 14.34, "elapsed_time": "0:08:07", "remaining_time": "0:48:30", "throughput": 14517.27, "total_tokens": 7074048}
|
|
{"current_steps": 2245, "total_steps": 15621, "loss": 0.5319, "lr": 1.9884421107324476e-06, "epoch": 0.14371679149862365, "percentage": 14.37, "elapsed_time": "0:08:07", "remaining_time": "0:48:27", "throughput": 14529.55, "total_tokens": 7089792}
|
|
{"current_steps": 2250, "total_steps": 15621, "loss": 0.4753, "lr": 1.9882721020455893e-06, "epoch": 0.14403687343960053, "percentage": 14.4, "elapsed_time": "0:08:08", "remaining_time": "0:48:23", "throughput": 14540.5, "total_tokens": 7104640}
|
|
{"current_steps": 2255, "total_steps": 15621, "loss": 0.5137, "lr": 1.988100859489408e-06, "epoch": 0.14435695538057744, "percentage": 14.44, "elapsed_time": "0:08:09", "remaining_time": "0:48:20", "throughput": 14552.27, "total_tokens": 7120064}
|
|
{"current_steps": 2260, "total_steps": 15621, "loss": 0.4839, "lr": 1.9879283832777017e-06, "epoch": 0.14467703732155432, "percentage": 14.47, "elapsed_time": "0:08:09", "remaining_time": "0:48:16", "throughput": 14563.68, "total_tokens": 7135232}
|
|
{"current_steps": 2265, "total_steps": 15621, "loss": 0.5247, "lr": 1.9877546736258096e-06, "epoch": 0.1449971192625312, "percentage": 14.5, "elapsed_time": "0:08:10", "remaining_time": "0:48:12", "throughput": 14574.19, "total_tokens": 7149632}
|
|
{"current_steps": 2270, "total_steps": 15621, "loss": 0.4134, "lr": 1.98757973075061e-06, "epoch": 0.1453172012035081, "percentage": 14.53, "elapsed_time": "0:08:11", "remaining_time": "0:48:09", "throughput": 14585.21, "total_tokens": 7164352}
|
|
{"current_steps": 2275, "total_steps": 15621, "loss": 0.52, "lr": 1.987403554870521e-06, "epoch": 0.14563728314448499, "percentage": 14.56, "elapsed_time": "0:08:11", "remaining_time": "0:48:05", "throughput": 14596.8, "total_tokens": 7179776}
|
|
{"current_steps": 2280, "total_steps": 15621, "loss": 0.423, "lr": 1.9872261462055003e-06, "epoch": 0.14595736508546187, "percentage": 14.6, "elapsed_time": "0:08:12", "remaining_time": "0:48:01", "throughput": 14607.33, "total_tokens": 7194240}
|
|
{"current_steps": 2285, "total_steps": 15621, "loss": 0.4393, "lr": 1.987047504977045e-06, "epoch": 0.14627744702643877, "percentage": 14.63, "elapsed_time": "0:08:13", "remaining_time": "0:47:58", "throughput": 14618.91, "total_tokens": 7209472}
|
|
{"current_steps": 2290, "total_steps": 15621, "loss": 0.4174, "lr": 1.9868676314081902e-06, "epoch": 0.14659752896741565, "percentage": 14.66, "elapsed_time": "0:08:13", "remaining_time": "0:47:54", "throughput": 14630.79, "total_tokens": 7225088}
|
|
{"current_steps": 2295, "total_steps": 15621, "loss": 0.6811, "lr": 1.9866865257235107e-06, "epoch": 0.14691761090839256, "percentage": 14.69, "elapsed_time": "0:08:14", "remaining_time": "0:47:51", "throughput": 14642.66, "total_tokens": 7240704}
|
|
{"current_steps": 2300, "total_steps": 15621, "loss": 0.4241, "lr": 1.9865041881491188e-06, "epoch": 0.14723769284936944, "percentage": 14.72, "elapsed_time": "0:08:15", "remaining_time": "0:47:47", "throughput": 14654.45, "total_tokens": 7256000}
|
|
{"current_steps": 2305, "total_steps": 15621, "loss": 0.6191, "lr": 1.9863206189126653e-06, "epoch": 0.14755777479034632, "percentage": 14.76, "elapsed_time": "0:08:15", "remaining_time": "0:47:44", "throughput": 14664.72, "total_tokens": 7270336}
|
|
{"current_steps": 2310, "total_steps": 15621, "loss": 0.5735, "lr": 1.9861358182433382e-06, "epoch": 0.14787785673132323, "percentage": 14.79, "elapsed_time": "0:08:16", "remaining_time": "0:47:40", "throughput": 14675.98, "total_tokens": 7285440}
|
|
{"current_steps": 2315, "total_steps": 15621, "loss": 0.4719, "lr": 1.9859497863718634e-06, "epoch": 0.1481979386723001, "percentage": 14.82, "elapsed_time": "0:08:17", "remaining_time": "0:47:37", "throughput": 14687.54, "total_tokens": 7301120}
|
|
{"current_steps": 2320, "total_steps": 15621, "loss": 0.5315, "lr": 1.985762523530504e-06, "epoch": 0.14851802061327699, "percentage": 14.85, "elapsed_time": "0:08:17", "remaining_time": "0:47:33", "throughput": 14698.63, "total_tokens": 7316416}
|
|
{"current_steps": 2325, "total_steps": 15621, "loss": 0.4997, "lr": 1.98557402995306e-06, "epoch": 0.1488381025542539, "percentage": 14.88, "elapsed_time": "0:08:18", "remaining_time": "0:47:30", "throughput": 14710.46, "total_tokens": 7332160}
|
|
{"current_steps": 2330, "total_steps": 15621, "loss": 0.7101, "lr": 1.985384305874868e-06, "epoch": 0.14915818449523077, "percentage": 14.92, "elapsed_time": "0:08:19", "remaining_time": "0:47:27", "throughput": 14722.02, "total_tokens": 7347776}
|
|
{"current_steps": 2335, "total_steps": 15621, "loss": 0.5478, "lr": 1.9851933515328e-06, "epoch": 0.14947826643620768, "percentage": 14.95, "elapsed_time": "0:08:19", "remaining_time": "0:47:23", "throughput": 14733.31, "total_tokens": 7363200}
|
|
{"current_steps": 2340, "total_steps": 15621, "loss": 0.475, "lr": 1.985001167165265e-06, "epoch": 0.14979834837718456, "percentage": 14.98, "elapsed_time": "0:08:20", "remaining_time": "0:47:20", "throughput": 14744.87, "total_tokens": 7378752}
|
|
{"current_steps": 2345, "total_steps": 15621, "loss": 0.5239, "lr": 1.984807753012208e-06, "epoch": 0.15011843031816144, "percentage": 15.01, "elapsed_time": "0:08:21", "remaining_time": "0:47:16", "throughput": 14755.98, "total_tokens": 7393984}
|
|
{"current_steps": 2346, "total_steps": 15621, "eval_loss": 0.5113906264305115, "epoch": 0.15018244670635683, "percentage": 15.02, "elapsed_time": "0:09:11", "remaining_time": "0:52:02", "throughput": 13405.11, "total_tokens": 7397056}
|
|
{"current_steps": 2350, "total_steps": 15621, "loss": 0.5882, "lr": 1.9846131093151086e-06, "epoch": 0.15043851225913835, "percentage": 15.04, "elapsed_time": "0:09:49", "remaining_time": "0:55:30", "throughput": 12560.74, "total_tokens": 7408832}
|
|
{"current_steps": 2355, "total_steps": 15621, "loss": 0.4612, "lr": 1.9844172363169808e-06, "epoch": 0.15075859420011523, "percentage": 15.08, "elapsed_time": "0:09:50", "remaining_time": "0:55:26", "throughput": 12571.46, "total_tokens": 7423040}
|
|
{"current_steps": 2360, "total_steps": 15621, "loss": 0.5148, "lr": 1.9842201342623756e-06, "epoch": 0.15107867614109213, "percentage": 15.11, "elapsed_time": "0:09:51", "remaining_time": "0:55:21", "throughput": 12583.42, "total_tokens": 7438464}
|
|
{"current_steps": 2365, "total_steps": 15621, "loss": 0.5219, "lr": 1.9840218033973766e-06, "epoch": 0.151398758082069, "percentage": 15.14, "elapsed_time": "0:09:51", "remaining_time": "0:55:17", "throughput": 12595.28, "total_tokens": 7453824}
|
|
{"current_steps": 2370, "total_steps": 15621, "loss": 0.5858, "lr": 1.9838222439696027e-06, "epoch": 0.1517188400230459, "percentage": 15.17, "elapsed_time": "0:09:52", "remaining_time": "0:55:12", "throughput": 12607.22, "total_tokens": 7469312}
|
|
{"current_steps": 2375, "total_steps": 15621, "loss": 0.7034, "lr": 1.9836214562282058e-06, "epoch": 0.1520389219640228, "percentage": 15.2, "elapsed_time": "0:09:53", "remaining_time": "0:55:08", "throughput": 12619.53, "total_tokens": 7485120}
|
|
{"current_steps": 2380, "total_steps": 15621, "loss": 0.5189, "lr": 1.9834194404238715e-06, "epoch": 0.15235900390499968, "percentage": 15.24, "elapsed_time": "0:09:53", "remaining_time": "0:55:03", "throughput": 12631.31, "total_tokens": 7500416}
|
|
{"current_steps": 2385, "total_steps": 15621, "loss": 0.4149, "lr": 1.9832161968088193e-06, "epoch": 0.15267908584597656, "percentage": 15.27, "elapsed_time": "0:09:54", "remaining_time": "0:54:59", "throughput": 12644.06, "total_tokens": 7516672}
|
|
{"current_steps": 2390, "total_steps": 15621, "loss": 0.4703, "lr": 1.9830117256368015e-06, "epoch": 0.15299916778695347, "percentage": 15.3, "elapsed_time": "0:09:55", "remaining_time": "0:54:54", "throughput": 12656.65, "total_tokens": 7532800}
|
|
{"current_steps": 2395, "total_steps": 15621, "loss": 0.4994, "lr": 1.982806027163102e-06, "epoch": 0.15331924972793035, "percentage": 15.33, "elapsed_time": "0:09:55", "remaining_time": "0:54:50", "throughput": 12668.03, "total_tokens": 7547776}
|
|
{"current_steps": 2400, "total_steps": 15621, "loss": 0.5718, "lr": 1.9825991016445386e-06, "epoch": 0.15363933166890725, "percentage": 15.36, "elapsed_time": "0:09:56", "remaining_time": "0:54:45", "throughput": 12679.04, "total_tokens": 7562496}
|
|
{"current_steps": 2405, "total_steps": 15621, "loss": 0.5263, "lr": 1.9823909493394594e-06, "epoch": 0.15395941360988413, "percentage": 15.4, "elapsed_time": "0:09:57", "remaining_time": "0:54:41", "throughput": 12690.7, "total_tokens": 7577920}
|
|
{"current_steps": 2410, "total_steps": 15621, "loss": 0.5373, "lr": 1.9821815705077455e-06, "epoch": 0.154279495550861, "percentage": 15.43, "elapsed_time": "0:09:57", "remaining_time": "0:54:36", "throughput": 12702.34, "total_tokens": 7593216}
|
|
{"current_steps": 2415, "total_steps": 15621, "loss": 0.5752, "lr": 1.9819709654108087e-06, "epoch": 0.15459957749183792, "percentage": 15.46, "elapsed_time": "0:09:58", "remaining_time": "0:54:32", "throughput": 12713.45, "total_tokens": 7608192}
|
|
{"current_steps": 2420, "total_steps": 15621, "loss": 0.4606, "lr": 1.981759134311592e-06, "epoch": 0.1549196594328148, "percentage": 15.49, "elapsed_time": "0:09:59", "remaining_time": "0:54:28", "throughput": 12725.98, "total_tokens": 7624448}
|
|
{"current_steps": 2425, "total_steps": 15621, "loss": 0.4839, "lr": 1.981546077474569e-06, "epoch": 0.15523974137379168, "percentage": 15.52, "elapsed_time": "0:09:59", "remaining_time": "0:54:23", "throughput": 12737.94, "total_tokens": 7640192}
|
|
{"current_steps": 2430, "total_steps": 15621, "loss": 0.534, "lr": 1.981331795165744e-06, "epoch": 0.15555982331476859, "percentage": 15.56, "elapsed_time": "0:10:00", "remaining_time": "0:54:19", "throughput": 12748.87, "total_tokens": 7654848}
|
|
{"current_steps": 2435, "total_steps": 15621, "loss": 0.6053, "lr": 1.9811162876526498e-06, "epoch": 0.15587990525574547, "percentage": 15.59, "elapsed_time": "0:10:01", "remaining_time": "0:54:15", "throughput": 12761.0, "total_tokens": 7670848}
|
|
{"current_steps": 2440, "total_steps": 15621, "loss": 0.6575, "lr": 1.9808995552043515e-06, "epoch": 0.15619998719672237, "percentage": 15.62, "elapsed_time": "0:10:01", "remaining_time": "0:54:10", "throughput": 12772.33, "total_tokens": 7686016}
|
|
{"current_steps": 2445, "total_steps": 15621, "loss": 0.5662, "lr": 1.9806815980914413e-06, "epoch": 0.15652006913769925, "percentage": 15.65, "elapsed_time": "0:10:02", "remaining_time": "0:54:06", "throughput": 12784.26, "total_tokens": 7701760}
|
|
{"current_steps": 2450, "total_steps": 15621, "loss": 0.5736, "lr": 1.9804624165860417e-06, "epoch": 0.15684015107867613, "percentage": 15.68, "elapsed_time": "0:10:03", "remaining_time": "0:54:02", "throughput": 12796.33, "total_tokens": 7717760}
|
|
{"current_steps": 2455, "total_steps": 15621, "loss": 0.3894, "lr": 1.9802420109618028e-06, "epoch": 0.15716023301965304, "percentage": 15.72, "elapsed_time": "0:10:03", "remaining_time": "0:53:58", "throughput": 12807.94, "total_tokens": 7733376}
|
|
{"current_steps": 2460, "total_steps": 15621, "loss": 0.503, "lr": 1.980020381493904e-06, "epoch": 0.15748031496062992, "percentage": 15.75, "elapsed_time": "0:10:04", "remaining_time": "0:53:54", "throughput": 12821.07, "total_tokens": 7750464}
|
|
{"current_steps": 2465, "total_steps": 15621, "loss": 0.5024, "lr": 1.979797528459052e-06, "epoch": 0.1578003969016068, "percentage": 15.78, "elapsed_time": "0:10:05", "remaining_time": "0:53:50", "throughput": 12835.31, "total_tokens": 7768576}
|
|
{"current_steps": 2470, "total_steps": 15621, "loss": 0.5285, "lr": 1.979573452135482e-06, "epoch": 0.1581204788425837, "percentage": 15.81, "elapsed_time": "0:10:05", "remaining_time": "0:53:46", "throughput": 12847.05, "total_tokens": 7784256}
|
|
{"current_steps": 2475, "total_steps": 15621, "loss": 0.3218, "lr": 1.979348152802955e-06, "epoch": 0.15844056078356059, "percentage": 15.84, "elapsed_time": "0:10:06", "remaining_time": "0:53:41", "throughput": 12858.04, "total_tokens": 7799232}
|
|
{"current_steps": 2480, "total_steps": 15621, "loss": 0.5854, "lr": 1.979121630742761e-06, "epoch": 0.1587606427245375, "percentage": 15.88, "elapsed_time": "0:10:07", "remaining_time": "0:53:37", "throughput": 12869.86, "total_tokens": 7815040}
|
|
{"current_steps": 2485, "total_steps": 15621, "loss": 0.4547, "lr": 1.9788938862377146e-06, "epoch": 0.15908072466551437, "percentage": 15.91, "elapsed_time": "0:10:07", "remaining_time": "0:53:33", "throughput": 12881.12, "total_tokens": 7830400}
|
|
{"current_steps": 2490, "total_steps": 15621, "loss": 0.4803, "lr": 1.9786649195721577e-06, "epoch": 0.15940080660649125, "percentage": 15.94, "elapsed_time": "0:10:08", "remaining_time": "0:53:29", "throughput": 12892.83, "total_tokens": 7846336}
|
|
{"current_steps": 2495, "total_steps": 15621, "loss": 0.6471, "lr": 1.978434731031958e-06, "epoch": 0.15972088854746816, "percentage": 15.97, "elapsed_time": "0:10:09", "remaining_time": "0:53:25", "throughput": 12904.96, "total_tokens": 7862528}
|
|
{"current_steps": 2500, "total_steps": 15621, "loss": 0.4554, "lr": 1.9782033209045085e-06, "epoch": 0.16004097048844504, "percentage": 16.0, "elapsed_time": "0:10:09", "remaining_time": "0:53:21", "throughput": 12918.18, "total_tokens": 7880000}
|
|
{"current_steps": 2505, "total_steps": 15621, "loss": 0.4114, "lr": 1.977970689478727e-06, "epoch": 0.16036105242942192, "percentage": 16.04, "elapsed_time": "0:10:10", "remaining_time": "0:53:17", "throughput": 12929.27, "total_tokens": 7895296}
|
|
{"current_steps": 2510, "total_steps": 15621, "loss": 0.5963, "lr": 1.9777368370450577e-06, "epoch": 0.16068113437039883, "percentage": 16.07, "elapsed_time": "0:10:11", "remaining_time": "0:53:13", "throughput": 12940.8, "total_tokens": 7911104}
|
|
{"current_steps": 2515, "total_steps": 15621, "loss": 0.5129, "lr": 1.9775017638954674e-06, "epoch": 0.1610012163113757, "percentage": 16.1, "elapsed_time": "0:10:11", "remaining_time": "0:53:09", "throughput": 12951.38, "total_tokens": 7925952}
|
|
{"current_steps": 2520, "total_steps": 15621, "loss": 0.6004, "lr": 1.9772654703234476e-06, "epoch": 0.1613212982523526, "percentage": 16.13, "elapsed_time": "0:10:12", "remaining_time": "0:53:04", "throughput": 12962.01, "total_tokens": 7940928}
|
|
{"current_steps": 2525, "total_steps": 15621, "loss": 0.5638, "lr": 1.977027956624014e-06, "epoch": 0.1616413801933295, "percentage": 16.16, "elapsed_time": "0:10:13", "remaining_time": "0:53:00", "throughput": 12971.96, "total_tokens": 7955200}
|
|
{"current_steps": 2530, "total_steps": 15621, "loss": 0.5759, "lr": 1.9767892230937046e-06, "epoch": 0.16196146213430637, "percentage": 16.2, "elapsed_time": "0:10:13", "remaining_time": "0:52:56", "throughput": 12983.27, "total_tokens": 7970944}
|
|
{"current_steps": 2535, "total_steps": 15621, "loss": 0.4305, "lr": 1.976549270030581e-06, "epoch": 0.16228154407528328, "percentage": 16.23, "elapsed_time": "0:10:14", "remaining_time": "0:52:52", "throughput": 12993.89, "total_tokens": 7985856}
|
|
{"current_steps": 2540, "total_steps": 15621, "loss": 0.4789, "lr": 1.9763080977342286e-06, "epoch": 0.16260162601626016, "percentage": 16.26, "elapsed_time": "0:10:15", "remaining_time": "0:52:48", "throughput": 13004.53, "total_tokens": 8001088}
|
|
{"current_steps": 2545, "total_steps": 15621, "loss": 0.4995, "lr": 1.9760657065057527e-06, "epoch": 0.16292170795723707, "percentage": 16.29, "elapsed_time": "0:10:15", "remaining_time": "0:52:44", "throughput": 13016.69, "total_tokens": 8017856}
|
|
{"current_steps": 2550, "total_steps": 15621, "loss": 0.4597, "lr": 1.975822096647782e-06, "epoch": 0.16324178989821395, "percentage": 16.32, "elapsed_time": "0:10:16", "remaining_time": "0:52:40", "throughput": 13028.18, "total_tokens": 8033792}
|
|
{"current_steps": 2555, "total_steps": 15621, "loss": 0.4952, "lr": 1.975577268464466e-06, "epoch": 0.16356187183919083, "percentage": 16.36, "elapsed_time": "0:10:17", "remaining_time": "0:52:36", "throughput": 13038.05, "total_tokens": 8048256}
|
|
{"current_steps": 2560, "total_steps": 15621, "loss": 0.5653, "lr": 1.9753312222614765e-06, "epoch": 0.16388195378016773, "percentage": 16.39, "elapsed_time": "0:10:17", "remaining_time": "0:52:32", "throughput": 13049.01, "total_tokens": 8063680}
|
|
{"current_steps": 2565, "total_steps": 15621, "loss": 0.4827, "lr": 1.9750839583460036e-06, "epoch": 0.1642020357211446, "percentage": 16.42, "elapsed_time": "0:10:18", "remaining_time": "0:52:28", "throughput": 13060.53, "total_tokens": 8079744}
|
|
{"current_steps": 2570, "total_steps": 15621, "loss": 0.5034, "lr": 1.9748354770267603e-06, "epoch": 0.1645221176621215, "percentage": 16.45, "elapsed_time": "0:10:19", "remaining_time": "0:52:24", "throughput": 13070.96, "total_tokens": 8094656}
|
|
{"current_steps": 2575, "total_steps": 15621, "loss": 0.5117, "lr": 1.9745857786139777e-06, "epoch": 0.1648421996030984, "percentage": 16.48, "elapsed_time": "0:10:19", "remaining_time": "0:52:21", "throughput": 13082.21, "total_tokens": 8110528}
|
|
{"current_steps": 2580, "total_steps": 15621, "loss": 0.6109, "lr": 1.974334863419408e-06, "epoch": 0.16516228154407528, "percentage": 16.52, "elapsed_time": "0:10:20", "remaining_time": "0:52:17", "throughput": 13093.67, "total_tokens": 8126720}
|
|
{"current_steps": 2585, "total_steps": 15621, "loss": 0.5038, "lr": 1.9740827317563212e-06, "epoch": 0.1654823634850522, "percentage": 16.55, "elapsed_time": "0:10:21", "remaining_time": "0:52:13", "throughput": 13103.65, "total_tokens": 8141312}
|
|
{"current_steps": 2590, "total_steps": 15621, "loss": 0.485, "lr": 1.973829383939507e-06, "epoch": 0.16580244542602907, "percentage": 16.58, "elapsed_time": "0:10:21", "remaining_time": "0:52:09", "throughput": 13114.32, "total_tokens": 8156736}
|
|
{"current_steps": 2595, "total_steps": 15621, "loss": 0.4978, "lr": 1.973574820285273e-06, "epoch": 0.16612252736700595, "percentage": 16.61, "elapsed_time": "0:10:22", "remaining_time": "0:52:05", "throughput": 13125.4, "total_tokens": 8172480}
|
|
{"current_steps": 2600, "total_steps": 15621, "loss": 0.581, "lr": 1.9733190411114443e-06, "epoch": 0.16644260930798285, "percentage": 16.64, "elapsed_time": "0:10:23", "remaining_time": "0:52:01", "throughput": 13136.31, "total_tokens": 8188224}
|
|
{"current_steps": 2605, "total_steps": 15621, "loss": 0.4388, "lr": 1.9730620467373654e-06, "epoch": 0.16676269124895973, "percentage": 16.68, "elapsed_time": "0:10:24", "remaining_time": "0:51:57", "throughput": 13147.79, "total_tokens": 8204352}
|
|
{"current_steps": 2610, "total_steps": 15621, "loss": 0.5835, "lr": 1.9728038374838958e-06, "epoch": 0.1670827731899366, "percentage": 16.71, "elapsed_time": "0:10:24", "remaining_time": "0:51:53", "throughput": 13158.11, "total_tokens": 8219328}
|
|
{"current_steps": 2615, "total_steps": 15621, "loss": 0.392, "lr": 1.972544413673413e-06, "epoch": 0.16740285513091352, "percentage": 16.74, "elapsed_time": "0:10:25", "remaining_time": "0:51:50", "throughput": 13168.48, "total_tokens": 8234560}
|
|
{"current_steps": 2620, "total_steps": 15621, "loss": 0.5766, "lr": 1.9722837756298108e-06, "epoch": 0.1677229370718904, "percentage": 16.77, "elapsed_time": "0:10:25", "remaining_time": "0:51:46", "throughput": 13178.44, "total_tokens": 8249344}
|
|
{"current_steps": 2625, "total_steps": 15621, "loss": 0.551, "lr": 1.972021923678499e-06, "epoch": 0.1680430190128673, "percentage": 16.8, "elapsed_time": "0:10:26", "remaining_time": "0:51:42", "throughput": 13189.73, "total_tokens": 8265600}
|
|
{"current_steps": 2630, "total_steps": 15621, "loss": 0.4822, "lr": 1.971758858146403e-06, "epoch": 0.16836310095384419, "percentage": 16.84, "elapsed_time": "0:10:27", "remaining_time": "0:51:38", "throughput": 13199.68, "total_tokens": 8280384}
|
|
{"current_steps": 2635, "total_steps": 15621, "loss": 0.4916, "lr": 1.9714945793619626e-06, "epoch": 0.16868318289482107, "percentage": 16.87, "elapsed_time": "0:10:27", "remaining_time": "0:51:34", "throughput": 13209.98, "total_tokens": 8295744}
|
|
{"current_steps": 2640, "total_steps": 15621, "loss": 0.52, "lr": 1.971229087655133e-06, "epoch": 0.16900326483579797, "percentage": 16.9, "elapsed_time": "0:10:28", "remaining_time": "0:51:31", "throughput": 13220.89, "total_tokens": 8311680}
|
|
{"current_steps": 2645, "total_steps": 15621, "loss": 0.4659, "lr": 1.9709623833573842e-06, "epoch": 0.16932334677677485, "percentage": 16.93, "elapsed_time": "0:10:29", "remaining_time": "0:51:27", "throughput": 13230.88, "total_tokens": 8326592}
|
|
{"current_steps": 2650, "total_steps": 15621, "loss": 0.4454, "lr": 1.9706944668016994e-06, "epoch": 0.16964342871775173, "percentage": 16.96, "elapsed_time": "0:10:29", "remaining_time": "0:51:23", "throughput": 13240.96, "total_tokens": 8341632}
|
|
{"current_steps": 2655, "total_steps": 15621, "loss": 0.4643, "lr": 1.9704253383225756e-06, "epoch": 0.16996351065872864, "percentage": 17.0, "elapsed_time": "0:10:30", "remaining_time": "0:51:20", "throughput": 13252.6, "total_tokens": 8358400}
|
|
{"current_steps": 2660, "total_steps": 15621, "loss": 0.4813, "lr": 1.970154998256023e-06, "epoch": 0.17028359259970552, "percentage": 17.03, "elapsed_time": "0:10:31", "remaining_time": "0:51:16", "throughput": 13263.39, "total_tokens": 8374144}
|
|
{"current_steps": 2665, "total_steps": 15621, "loss": 0.4266, "lr": 1.9698834469395644e-06, "epoch": 0.17060367454068243, "percentage": 17.06, "elapsed_time": "0:10:32", "remaining_time": "0:51:12", "throughput": 13273.71, "total_tokens": 8389440}
|
|
{"current_steps": 2670, "total_steps": 15621, "loss": 0.5565, "lr": 1.969610684712234e-06, "epoch": 0.1709237564816593, "percentage": 17.09, "elapsed_time": "0:10:32", "remaining_time": "0:51:08", "throughput": 13283.97, "total_tokens": 8404672}
|
|
{"current_steps": 2675, "total_steps": 15621, "loss": 0.5696, "lr": 1.9693367119145794e-06, "epoch": 0.17124383842263619, "percentage": 17.12, "elapsed_time": "0:10:33", "remaining_time": "0:51:05", "throughput": 13294.33, "total_tokens": 8420096}
|
|
{"current_steps": 2680, "total_steps": 15621, "loss": 0.6647, "lr": 1.969061528888659e-06, "epoch": 0.1715639203636131, "percentage": 17.16, "elapsed_time": "0:10:34", "remaining_time": "0:51:01", "throughput": 13305.35, "total_tokens": 8436288}
|
|
{"current_steps": 2685, "total_steps": 15621, "loss": 0.549, "lr": 1.9687851359780415e-06, "epoch": 0.17188400230458997, "percentage": 17.19, "elapsed_time": "0:10:34", "remaining_time": "0:50:58", "throughput": 13316.55, "total_tokens": 8452672}
|
|
{"current_steps": 2690, "total_steps": 15621, "loss": 0.4875, "lr": 1.968507533527807e-06, "epoch": 0.17220408424556685, "percentage": 17.22, "elapsed_time": "0:10:35", "remaining_time": "0:50:54", "throughput": 13327.8, "total_tokens": 8469120}
|
|
{"current_steps": 2695, "total_steps": 15621, "loss": 0.4694, "lr": 1.9682287218845455e-06, "epoch": 0.17252416618654376, "percentage": 17.25, "elapsed_time": "0:10:36", "remaining_time": "0:50:51", "throughput": 13338.19, "total_tokens": 8484736}
|
|
{"current_steps": 2700, "total_steps": 15621, "loss": 0.7448, "lr": 1.967948701396356e-06, "epoch": 0.17284424812752064, "percentage": 17.28, "elapsed_time": "0:10:36", "remaining_time": "0:50:47", "throughput": 13348.83, "total_tokens": 8500480}
|
|
{"current_steps": 2705, "total_steps": 15621, "loss": 0.3988, "lr": 1.9676674724128485e-06, "epoch": 0.17316433006849755, "percentage": 17.32, "elapsed_time": "0:10:37", "remaining_time": "0:50:43", "throughput": 13357.9, "total_tokens": 8514624}
|
|
{"current_steps": 2710, "total_steps": 15621, "loss": 0.4666, "lr": 1.9673850352851397e-06, "epoch": 0.17348441200947443, "percentage": 17.35, "elapsed_time": "0:10:38", "remaining_time": "0:50:39", "throughput": 13367.67, "total_tokens": 8529664}
|
|
{"current_steps": 2715, "total_steps": 15621, "loss": 0.5852, "lr": 1.967101390365856e-06, "epoch": 0.1738044939504513, "percentage": 17.38, "elapsed_time": "0:10:38", "remaining_time": "0:50:36", "throughput": 13378.22, "total_tokens": 8545280}
|
|
{"current_steps": 2720, "total_steps": 15621, "loss": 0.4975, "lr": 1.966816538009131e-06, "epoch": 0.1741245758914282, "percentage": 17.41, "elapsed_time": "0:10:39", "remaining_time": "0:50:32", "throughput": 13388.25, "total_tokens": 8560384}
|
|
{"current_steps": 2725, "total_steps": 15621, "loss": 0.538, "lr": 1.966530478570607e-06, "epoch": 0.1744446578324051, "percentage": 17.44, "elapsed_time": "0:10:40", "remaining_time": "0:50:29", "throughput": 13399.6, "total_tokens": 8576960}
|
|
{"current_steps": 2730, "total_steps": 15621, "loss": 0.4686, "lr": 1.9662432124074325e-06, "epoch": 0.174764739773382, "percentage": 17.48, "elapsed_time": "0:10:40", "remaining_time": "0:50:25", "throughput": 13409.69, "total_tokens": 8592384}
|
|
{"current_steps": 2735, "total_steps": 15621, "loss": 0.4889, "lr": 1.965954739878262e-06, "epoch": 0.17508482171435888, "percentage": 17.51, "elapsed_time": "0:10:41", "remaining_time": "0:50:22", "throughput": 13421.0, "total_tokens": 8609024}
|
|
{"current_steps": 2740, "total_steps": 15621, "loss": 0.4298, "lr": 1.965665061343257e-06, "epoch": 0.17540490365533576, "percentage": 17.54, "elapsed_time": "0:10:42", "remaining_time": "0:50:18", "throughput": 13431.53, "total_tokens": 8624768}
|
|
{"current_steps": 2745, "total_steps": 15621, "loss": 0.4643, "lr": 1.965374177164085e-06, "epoch": 0.17572498559631267, "percentage": 17.57, "elapsed_time": "0:10:42", "remaining_time": "0:50:15", "throughput": 13441.81, "total_tokens": 8640448}
|
|
{"current_steps": 2750, "total_steps": 15621, "loss": 0.5569, "lr": 1.9650820877039182e-06, "epoch": 0.17604506753728955, "percentage": 17.6, "elapsed_time": "0:10:43", "remaining_time": "0:50:11", "throughput": 13451.43, "total_tokens": 8655296}
|
|
{"current_steps": 2755, "total_steps": 15621, "loss": 0.4903, "lr": 1.9647887933274334e-06, "epoch": 0.17636514947826643, "percentage": 17.64, "elapsed_time": "0:10:44", "remaining_time": "0:50:08", "throughput": 13462.49, "total_tokens": 8671872}
|
|
{"current_steps": 2760, "total_steps": 15621, "loss": 0.4835, "lr": 1.9644942944008124e-06, "epoch": 0.17668523141924333, "percentage": 17.67, "elapsed_time": "0:10:44", "remaining_time": "0:50:04", "throughput": 13473.05, "total_tokens": 8687680}
|
|
{"current_steps": 2765, "total_steps": 15621, "loss": 0.6033, "lr": 1.96419859129174e-06, "epoch": 0.1770053133602202, "percentage": 17.7, "elapsed_time": "0:10:45", "remaining_time": "0:50:01", "throughput": 13482.81, "total_tokens": 8702912}
|
|
{"current_steps": 2770, "total_steps": 15621, "loss": 0.467, "lr": 1.963901684369406e-06, "epoch": 0.17732539530119712, "percentage": 17.73, "elapsed_time": "0:10:46", "remaining_time": "0:49:57", "throughput": 13492.75, "total_tokens": 8718144}
|
|
{"current_steps": 2775, "total_steps": 15621, "loss": 0.5107, "lr": 1.9636035740045013e-06, "epoch": 0.177645477242174, "percentage": 17.76, "elapsed_time": "0:10:46", "remaining_time": "0:49:54", "throughput": 13502.36, "total_tokens": 8732992}
|
|
{"current_steps": 2780, "total_steps": 15621, "loss": 0.6129, "lr": 1.9633042605692207e-06, "epoch": 0.17796555918315088, "percentage": 17.8, "elapsed_time": "0:10:47", "remaining_time": "0:49:50", "throughput": 13512.91, "total_tokens": 8749056}
|
|
{"current_steps": 2785, "total_steps": 15621, "loss": 0.4943, "lr": 1.9630037444372597e-06, "epoch": 0.17828564112412779, "percentage": 17.83, "elapsed_time": "0:10:48", "remaining_time": "0:49:47", "throughput": 13523.51, "total_tokens": 8765184}
|
|
{"current_steps": 2790, "total_steps": 15621, "loss": 0.4163, "lr": 1.9627020259838177e-06, "epoch": 0.17860572306510467, "percentage": 17.86, "elapsed_time": "0:10:48", "remaining_time": "0:49:43", "throughput": 13533.39, "total_tokens": 8780480}
|
|
{"current_steps": 2795, "total_steps": 15621, "loss": 0.5605, "lr": 1.9623991055855925e-06, "epoch": 0.17892580500608155, "percentage": 17.89, "elapsed_time": "0:10:49", "remaining_time": "0:49:40", "throughput": 13543.75, "total_tokens": 8796352}
|
|
{"current_steps": 2800, "total_steps": 15621, "loss": 0.4507, "lr": 1.962094983620784e-06, "epoch": 0.17924588694705845, "percentage": 17.92, "elapsed_time": "0:10:50", "remaining_time": "0:49:36", "throughput": 13552.55, "total_tokens": 8810688}
|
|
{"current_steps": 2805, "total_steps": 15621, "loss": 0.4204, "lr": 1.9617896604690925e-06, "epoch": 0.17956596888803533, "percentage": 17.96, "elapsed_time": "0:10:50", "remaining_time": "0:49:33", "throughput": 13562.59, "total_tokens": 8826304}
|
|
{"current_steps": 2810, "total_steps": 15621, "loss": 0.4545, "lr": 1.961483136511717e-06, "epoch": 0.17988605082901224, "percentage": 17.99, "elapsed_time": "0:10:51", "remaining_time": "0:49:29", "throughput": 13572.04, "total_tokens": 8841344}
|
|
{"current_steps": 2815, "total_steps": 15621, "loss": 0.6135, "lr": 1.9611754121313567e-06, "epoch": 0.18020613276998912, "percentage": 18.02, "elapsed_time": "0:10:52", "remaining_time": "0:49:26", "throughput": 13582.81, "total_tokens": 8857664}
|
|
{"current_steps": 2820, "total_steps": 15621, "loss": 0.5854, "lr": 1.960866487712209e-06, "epoch": 0.180526214710966, "percentage": 18.05, "elapsed_time": "0:10:52", "remaining_time": "0:49:23", "throughput": 13592.87, "total_tokens": 8873408}
|
|
{"current_steps": 2825, "total_steps": 15621, "loss": 0.4328, "lr": 1.9605563636399695e-06, "epoch": 0.1808462966519429, "percentage": 18.08, "elapsed_time": "0:10:53", "remaining_time": "0:49:19", "throughput": 13603.32, "total_tokens": 8889472}
|
|
{"current_steps": 2830, "total_steps": 15621, "loss": 0.6013, "lr": 1.9602450403018315e-06, "epoch": 0.18116637859291979, "percentage": 18.12, "elapsed_time": "0:10:54", "remaining_time": "0:49:16", "throughput": 13612.87, "total_tokens": 8904640}
|
|
{"current_steps": 2835, "total_steps": 15621, "loss": 0.4548, "lr": 1.9599325180864864e-06, "epoch": 0.18148646053389667, "percentage": 18.15, "elapsed_time": "0:10:54", "remaining_time": "0:49:13", "throughput": 13622.29, "total_tokens": 8919680}
|
|
{"current_steps": 2840, "total_steps": 15621, "loss": 0.446, "lr": 1.9596187973841216e-06, "epoch": 0.18180654247487357, "percentage": 18.18, "elapsed_time": "0:10:55", "remaining_time": "0:49:09", "throughput": 13632.3, "total_tokens": 8935360}
|
|
{"current_steps": 2845, "total_steps": 15621, "loss": 0.4871, "lr": 1.959303878586421e-06, "epoch": 0.18212662441585045, "percentage": 18.21, "elapsed_time": "0:10:56", "remaining_time": "0:49:06", "throughput": 13642.79, "total_tokens": 8951552}
|
|
{"current_steps": 2850, "total_steps": 15621, "loss": 0.585, "lr": 1.9589877620865647e-06, "epoch": 0.18244670635682736, "percentage": 18.24, "elapsed_time": "0:10:56", "remaining_time": "0:49:03", "throughput": 13653.96, "total_tokens": 8968576}
|
|
{"current_steps": 2855, "total_steps": 15621, "loss": 0.4598, "lr": 1.9586704482792277e-06, "epoch": 0.18276678829780424, "percentage": 18.28, "elapsed_time": "0:10:57", "remaining_time": "0:48:59", "throughput": 13663.49, "total_tokens": 8983744}
|
|
{"current_steps": 2860, "total_steps": 15621, "loss": 0.4344, "lr": 1.95835193756058e-06, "epoch": 0.18308687023878112, "percentage": 18.31, "elapsed_time": "0:10:58", "remaining_time": "0:48:56", "throughput": 13672.89, "total_tokens": 8999040}
|
|
{"current_steps": 2865, "total_steps": 15621, "loss": 0.4269, "lr": 1.9580322303282858e-06, "epoch": 0.18340695217975803, "percentage": 18.34, "elapsed_time": "0:10:58", "remaining_time": "0:48:53", "throughput": 13683.84, "total_tokens": 9015872}
|
|
{"current_steps": 2870, "total_steps": 15621, "loss": 0.4106, "lr": 1.9577113269815038e-06, "epoch": 0.1837270341207349, "percentage": 18.37, "elapsed_time": "0:10:59", "remaining_time": "0:48:50", "throughput": 13693.93, "total_tokens": 9031744}
|
|
{"current_steps": 2875, "total_steps": 15621, "loss": 0.5936, "lr": 1.957389227920885e-06, "epoch": 0.18404711606171179, "percentage": 18.4, "elapsed_time": "0:11:00", "remaining_time": "0:48:47", "throughput": 13704.04, "total_tokens": 9047872}
|
|
{"current_steps": 2880, "total_steps": 15621, "loss": 0.5225, "lr": 1.957065933548574e-06, "epoch": 0.1843671980026887, "percentage": 18.44, "elapsed_time": "0:11:00", "remaining_time": "0:48:43", "throughput": 13713.32, "total_tokens": 9062976}
|
|
{"current_steps": 2885, "total_steps": 15621, "loss": 0.5881, "lr": 1.956741444268208e-06, "epoch": 0.18468727994366557, "percentage": 18.47, "elapsed_time": "0:11:01", "remaining_time": "0:48:40", "throughput": 13722.8, "total_tokens": 9078208}
|
|
{"current_steps": 2890, "total_steps": 15621, "loss": 0.4778, "lr": 1.9564157604849154e-06, "epoch": 0.18500736188464248, "percentage": 18.5, "elapsed_time": "0:11:02", "remaining_time": "0:48:37", "throughput": 13733.13, "total_tokens": 9094720}
|
|
{"current_steps": 2895, "total_steps": 15621, "loss": 0.529, "lr": 1.9560888826053163e-06, "epoch": 0.18532744382561936, "percentage": 18.53, "elapsed_time": "0:11:02", "remaining_time": "0:48:34", "throughput": 13742.84, "total_tokens": 9110336}
|
|
{"current_steps": 2900, "total_steps": 15621, "loss": 0.5617, "lr": 1.9557608110375212e-06, "epoch": 0.18564752576659624, "percentage": 18.56, "elapsed_time": "0:11:03", "remaining_time": "0:48:30", "throughput": 13753.34, "total_tokens": 9126912}
|
|
{"current_steps": 2905, "total_steps": 15621, "loss": 0.5447, "lr": 1.955431546191132e-06, "epoch": 0.18596760770757315, "percentage": 18.6, "elapsed_time": "0:11:04", "remaining_time": "0:48:27", "throughput": 13762.96, "total_tokens": 9142400}
|
|
{"current_steps": 2910, "total_steps": 15621, "loss": 0.5254, "lr": 1.95510108847724e-06, "epoch": 0.18628768964855003, "percentage": 18.63, "elapsed_time": "0:11:04", "remaining_time": "0:48:24", "throughput": 13771.86, "total_tokens": 9157184}
|
|
{"current_steps": 2915, "total_steps": 15621, "loss": 0.526, "lr": 1.954769438308424e-06, "epoch": 0.1866077715895269, "percentage": 18.66, "elapsed_time": "0:11:05", "remaining_time": "0:48:21", "throughput": 13782.17, "total_tokens": 9173696}
|
|
{"current_steps": 2920, "total_steps": 15621, "loss": 0.5085, "lr": 1.954436596098754e-06, "epoch": 0.1869278535305038, "percentage": 18.69, "elapsed_time": "0:11:06", "remaining_time": "0:48:18", "throughput": 13792.35, "total_tokens": 9190080}
|
|
{"current_steps": 2925, "total_steps": 15621, "loss": 0.5828, "lr": 1.9541025622637875e-06, "epoch": 0.1872479354714807, "percentage": 18.72, "elapsed_time": "0:11:06", "remaining_time": "0:48:14", "throughput": 13800.62, "total_tokens": 9204352}
|
|
{"current_steps": 2930, "total_steps": 15621, "loss": 0.6086, "lr": 1.95376733722057e-06, "epoch": 0.1875680174124576, "percentage": 18.76, "elapsed_time": "0:11:07", "remaining_time": "0:48:11", "throughput": 13809.54, "total_tokens": 9219200}
|
|
{"current_steps": 2935, "total_steps": 15621, "loss": 0.4778, "lr": 1.9534309213876337e-06, "epoch": 0.18788809935343448, "percentage": 18.79, "elapsed_time": "0:11:08", "remaining_time": "0:48:08", "throughput": 13818.06, "total_tokens": 9233600}
|
|
{"current_steps": 2940, "total_steps": 15621, "loss": 0.4369, "lr": 1.953093315184997e-06, "epoch": 0.18820818129441136, "percentage": 18.82, "elapsed_time": "0:11:08", "remaining_time": "0:48:05", "throughput": 13827.91, "total_tokens": 9249536}
|
|
{"current_steps": 2945, "total_steps": 15621, "loss": 0.6525, "lr": 1.952754519034166e-06, "epoch": 0.18852826323538827, "percentage": 18.85, "elapsed_time": "0:11:09", "remaining_time": "0:48:01", "throughput": 13836.66, "total_tokens": 9264256}
|
|
{"current_steps": 2950, "total_steps": 15621, "loss": 0.4542, "lr": 1.9524145333581313e-06, "epoch": 0.18884834517636515, "percentage": 18.88, "elapsed_time": "0:11:10", "remaining_time": "0:47:58", "throughput": 13845.81, "total_tokens": 9279488}
|
|
{"current_steps": 2955, "total_steps": 15621, "loss": 0.5187, "lr": 1.952073358581369e-06, "epoch": 0.18916842711734205, "percentage": 18.92, "elapsed_time": "0:11:10", "remaining_time": "0:47:55", "throughput": 13854.6, "total_tokens": 9294336}
|
|
{"current_steps": 2960, "total_steps": 15621, "loss": 0.5615, "lr": 1.95173099512984e-06, "epoch": 0.18948850905831893, "percentage": 18.95, "elapsed_time": "0:11:11", "remaining_time": "0:47:52", "throughput": 13863.6, "total_tokens": 9309376}
|
|
{"current_steps": 2965, "total_steps": 15621, "loss": 0.4698, "lr": 1.9513874434309894e-06, "epoch": 0.1898085909992958, "percentage": 18.98, "elapsed_time": "0:11:12", "remaining_time": "0:47:49", "throughput": 13872.32, "total_tokens": 9324224}
|
|
{"current_steps": 2970, "total_steps": 15621, "loss": 0.4491, "lr": 1.951042703913745e-06, "epoch": 0.19012867294027272, "percentage": 19.01, "elapsed_time": "0:11:12", "remaining_time": "0:47:45", "throughput": 13881.12, "total_tokens": 9339136}
|
|
{"current_steps": 2975, "total_steps": 15621, "loss": 0.4492, "lr": 1.950696777008518e-06, "epoch": 0.1904487548812496, "percentage": 19.04, "elapsed_time": "0:11:13", "remaining_time": "0:47:42", "throughput": 13890.45, "total_tokens": 9354688}
|
|
{"current_steps": 2980, "total_steps": 15621, "loss": 0.4948, "lr": 1.9503496631472025e-06, "epoch": 0.19076883682222648, "percentage": 19.08, "elapsed_time": "0:11:14", "remaining_time": "0:47:39", "throughput": 13899.22, "total_tokens": 9369664}
|
|
{"current_steps": 2985, "total_steps": 15621, "loss": 0.6353, "lr": 1.9500013627631746e-06, "epoch": 0.19108891876320339, "percentage": 19.11, "elapsed_time": "0:11:14", "remaining_time": "0:47:36", "throughput": 13908.12, "total_tokens": 9384768}
|
|
{"current_steps": 2990, "total_steps": 15621, "loss": 0.3771, "lr": 1.949651876291291e-06, "epoch": 0.19140900070418027, "percentage": 19.14, "elapsed_time": "0:11:15", "remaining_time": "0:47:33", "throughput": 13917.44, "total_tokens": 9400320}
|
|
{"current_steps": 2995, "total_steps": 15621, "loss": 0.4872, "lr": 1.9493012041678894e-06, "epoch": 0.19172908264515717, "percentage": 19.17, "elapsed_time": "0:11:16", "remaining_time": "0:47:30", "throughput": 13926.55, "total_tokens": 9415872}
|
|
{"current_steps": 3000, "total_steps": 15621, "loss": 0.5988, "lr": 1.9489493468307883e-06, "epoch": 0.19204916458613405, "percentage": 19.2, "elapsed_time": "0:11:16", "remaining_time": "0:47:27", "throughput": 13936.94, "total_tokens": 9432704}
|
|
{"current_steps": 3005, "total_steps": 15621, "loss": 0.5456, "lr": 1.948596304719286e-06, "epoch": 0.19236924652711093, "percentage": 19.24, "elapsed_time": "0:11:17", "remaining_time": "0:47:24", "throughput": 13946.03, "total_tokens": 9448192}
|
|
{"current_steps": 3010, "total_steps": 15621, "loss": 0.4447, "lr": 1.9482420782741594e-06, "epoch": 0.19268932846808784, "percentage": 19.27, "elapsed_time": "0:11:18", "remaining_time": "0:47:21", "throughput": 13955.92, "total_tokens": 9464576}
|
|
{"current_steps": 3015, "total_steps": 15621, "loss": 0.5591, "lr": 1.9478866679376647e-06, "epoch": 0.19300941040906472, "percentage": 19.3, "elapsed_time": "0:11:18", "remaining_time": "0:47:18", "throughput": 13964.98, "total_tokens": 9479936}
|
|
{"current_steps": 3020, "total_steps": 15621, "loss": 0.5564, "lr": 1.9475300741535353e-06, "epoch": 0.1933294923500416, "percentage": 19.33, "elapsed_time": "0:11:19", "remaining_time": "0:47:15", "throughput": 13975.72, "total_tokens": 9497280}
|
|
{"current_steps": 3025, "total_steps": 15621, "loss": 0.4714, "lr": 1.9471722973669833e-06, "epoch": 0.1936495742910185, "percentage": 19.36, "elapsed_time": "0:11:20", "remaining_time": "0:47:12", "throughput": 13986.42, "total_tokens": 9514496}
|
|
{"current_steps": 3030, "total_steps": 15621, "loss": 0.3979, "lr": 1.946813338024697e-06, "epoch": 0.19396965623199539, "percentage": 19.4, "elapsed_time": "0:11:20", "remaining_time": "0:47:09", "throughput": 13995.15, "total_tokens": 9529536}
|
|
{"current_steps": 3035, "total_steps": 15621, "loss": 0.5342, "lr": 1.9464531965748414e-06, "epoch": 0.1942897381729723, "percentage": 19.43, "elapsed_time": "0:11:21", "remaining_time": "0:47:06", "throughput": 14004.64, "total_tokens": 9545472}
|
|
{"current_steps": 3040, "total_steps": 15621, "loss": 0.5827, "lr": 1.9460918734670573e-06, "epoch": 0.19460982011394917, "percentage": 19.46, "elapsed_time": "0:11:22", "remaining_time": "0:47:03", "throughput": 14013.59, "total_tokens": 9560960}
|
|
{"current_steps": 3045, "total_steps": 15621, "loss": 0.543, "lr": 1.945729369152461e-06, "epoch": 0.19492990205492605, "percentage": 19.49, "elapsed_time": "0:11:22", "remaining_time": "0:47:00", "throughput": 14022.34, "total_tokens": 9576320}
|
|
{"current_steps": 3050, "total_steps": 15621, "loss": 0.5533, "lr": 1.945365684083643e-06, "epoch": 0.19524998399590296, "percentage": 19.52, "elapsed_time": "0:11:23", "remaining_time": "0:46:57", "throughput": 14031.48, "total_tokens": 9592192}
|
|
{"current_steps": 3055, "total_steps": 15621, "loss": 0.615, "lr": 1.945000818714668e-06, "epoch": 0.19557006593687984, "percentage": 19.56, "elapsed_time": "0:11:24", "remaining_time": "0:46:54", "throughput": 14040.85, "total_tokens": 9608128}
|
|
{"current_steps": 3060, "total_steps": 15621, "loss": 0.546, "lr": 1.944634773501076e-06, "epoch": 0.19589014787785672, "percentage": 19.59, "elapsed_time": "0:11:24", "remaining_time": "0:46:51", "throughput": 14050.11, "total_tokens": 9623872}
|
|
{"current_steps": 3065, "total_steps": 15621, "loss": 0.5662, "lr": 1.9442675488998783e-06, "epoch": 0.19621022981883363, "percentage": 19.62, "elapsed_time": "0:11:25", "remaining_time": "0:46:48", "throughput": 14058.96, "total_tokens": 9639488}
|
|
{"current_steps": 3070, "total_steps": 15621, "loss": 0.5017, "lr": 1.9438991453695587e-06, "epoch": 0.1965303117598105, "percentage": 19.65, "elapsed_time": "0:11:26", "remaining_time": "0:46:45", "throughput": 14068.37, "total_tokens": 9655680}
|
|
{"current_steps": 3075, "total_steps": 15621, "loss": 0.5648, "lr": 1.943529563370073e-06, "epoch": 0.1968503937007874, "percentage": 19.69, "elapsed_time": "0:11:26", "remaining_time": "0:46:42", "throughput": 14076.7, "total_tokens": 9670400}
|
|
{"current_steps": 3080, "total_steps": 15621, "loss": 0.3815, "lr": 1.9431588033628495e-06, "epoch": 0.1971704756417643, "percentage": 19.72, "elapsed_time": "0:11:27", "remaining_time": "0:46:39", "throughput": 14085.2, "total_tokens": 9685504}
|
|
{"current_steps": 3085, "total_steps": 15621, "loss": 0.6302, "lr": 1.9427868658107862e-06, "epoch": 0.19749055758274117, "percentage": 19.75, "elapsed_time": "0:11:28", "remaining_time": "0:46:37", "throughput": 14094.83, "total_tokens": 9701952}
|
|
{"current_steps": 3090, "total_steps": 15621, "loss": 0.449, "lr": 1.942413751178251e-06, "epoch": 0.19781063952371808, "percentage": 19.78, "elapsed_time": "0:11:28", "remaining_time": "0:46:34", "throughput": 14103.31, "total_tokens": 9716928}
|
|
{"current_steps": 3095, "total_steps": 15621, "loss": 0.6552, "lr": 1.9420394599310826e-06, "epoch": 0.19813072146469496, "percentage": 19.81, "elapsed_time": "0:11:29", "remaining_time": "0:46:31", "throughput": 14111.86, "total_tokens": 9732096}
|
|
{"current_steps": 3100, "total_steps": 15621, "loss": 0.5247, "lr": 1.941663992536588e-06, "epoch": 0.19845080340567184, "percentage": 19.85, "elapsed_time": "0:11:30", "remaining_time": "0:46:28", "throughput": 14120.64, "total_tokens": 9747648}
|
|
{"current_steps": 3105, "total_steps": 15621, "loss": 0.4467, "lr": 1.941287349463542e-06, "epoch": 0.19877088534664875, "percentage": 19.88, "elapsed_time": "0:11:30", "remaining_time": "0:46:25", "throughput": 14129.29, "total_tokens": 9763072}
|
|
{"current_steps": 3110, "total_steps": 15621, "loss": 0.4856, "lr": 1.940909531182188e-06, "epoch": 0.19909096728762563, "percentage": 19.91, "elapsed_time": "0:11:31", "remaining_time": "0:46:22", "throughput": 14137.75, "total_tokens": 9778176}
|
|
{"current_steps": 3115, "total_steps": 15621, "loss": 0.6168, "lr": 1.9405305381642375e-06, "epoch": 0.19941104922860253, "percentage": 19.94, "elapsed_time": "0:11:32", "remaining_time": "0:46:19", "throughput": 14146.52, "total_tokens": 9793536}
|
|
{"current_steps": 3120, "total_steps": 15621, "loss": 0.5055, "lr": 1.9401503708828665e-06, "epoch": 0.1997311311695794, "percentage": 19.97, "elapsed_time": "0:11:32", "remaining_time": "0:46:16", "throughput": 14154.66, "total_tokens": 9808192}
|
|
{"current_steps": 3125, "total_steps": 15621, "loss": 0.5889, "lr": 1.939769029812719e-06, "epoch": 0.2000512131105563, "percentage": 20.01, "elapsed_time": "0:11:33", "remaining_time": "0:46:13", "throughput": 14163.01, "total_tokens": 9823232}
|
|
{"current_steps": 3128, "total_steps": 15621, "eval_loss": 0.4917045831680298, "epoch": 0.20024326227514244, "percentage": 20.02, "elapsed_time": "0:12:24", "remaining_time": "0:49:33", "throughput": 13205.54, "total_tokens": 9832064}
|
|
{"current_steps": 3130, "total_steps": 15621, "loss": 0.5998, "lr": 1.939386515429904e-06, "epoch": 0.2003712950515332, "percentage": 20.04, "elapsed_time": "0:13:19", "remaining_time": "0:53:08", "throughput": 12314.28, "total_tokens": 9839488}
|
|
{"current_steps": 3135, "total_steps": 15621, "loss": 0.4234, "lr": 1.9390028282119942e-06, "epoch": 0.20069137699251008, "percentage": 20.07, "elapsed_time": "0:13:19", "remaining_time": "0:53:05", "throughput": 12324.44, "total_tokens": 9856192}
|
|
{"current_steps": 3140, "total_steps": 15621, "loss": 0.5139, "lr": 1.938617968638029e-06, "epoch": 0.201011458933487, "percentage": 20.1, "elapsed_time": "0:13:20", "remaining_time": "0:53:01", "throughput": 12333.29, "total_tokens": 9871552}
|
|
{"current_steps": 3145, "total_steps": 15621, "loss": 0.5103, "lr": 1.938231937188509e-06, "epoch": 0.20133154087446387, "percentage": 20.13, "elapsed_time": "0:13:21", "remaining_time": "0:52:57", "throughput": 12341.56, "total_tokens": 9886016}
|
|
{"current_steps": 3150, "total_steps": 15621, "loss": 0.6257, "lr": 1.9378447343453995e-06, "epoch": 0.20165162281544075, "percentage": 20.17, "elapsed_time": "0:13:21", "remaining_time": "0:52:54", "throughput": 12352.25, "total_tokens": 9903552}
|
|
{"current_steps": 3155, "total_steps": 15621, "loss": 0.3501, "lr": 1.9374563605921275e-06, "epoch": 0.20197170475641765, "percentage": 20.2, "elapsed_time": "0:13:22", "remaining_time": "0:52:50", "throughput": 12362.31, "total_tokens": 9920320}
|
|
{"current_steps": 3160, "total_steps": 15621, "loss": 0.5844, "lr": 1.937066816413582e-06, "epoch": 0.20229178669739453, "percentage": 20.23, "elapsed_time": "0:13:23", "remaining_time": "0:52:47", "throughput": 12371.51, "total_tokens": 9935936}
|
|
{"current_steps": 3165, "total_steps": 15621, "loss": 0.4866, "lr": 1.9366761022961146e-06, "epoch": 0.2026118686383714, "percentage": 20.26, "elapsed_time": "0:13:23", "remaining_time": "0:52:43", "throughput": 12380.03, "total_tokens": 9950912}
|
|
{"current_steps": 3170, "total_steps": 15621, "loss": 0.5726, "lr": 1.9362842187275354e-06, "epoch": 0.20293195057934832, "percentage": 20.29, "elapsed_time": "0:13:24", "remaining_time": "0:52:39", "throughput": 12388.74, "total_tokens": 9966080}
|
|
{"current_steps": 3175, "total_steps": 15621, "loss": 0.4769, "lr": 1.9358911661971155e-06, "epoch": 0.2032520325203252, "percentage": 20.33, "elapsed_time": "0:13:25", "remaining_time": "0:52:36", "throughput": 12396.19, "total_tokens": 9982080}
|
|
{"current_steps": 3180, "total_steps": 15621, "loss": 0.4818, "lr": 1.9354969451955864e-06, "epoch": 0.2035721144613021, "percentage": 20.36, "elapsed_time": "0:13:25", "remaining_time": "0:52:32", "throughput": 12404.36, "total_tokens": 9996544}
|
|
{"current_steps": 3185, "total_steps": 15621, "loss": 0.5595, "lr": 1.9351015562151375e-06, "epoch": 0.20389219640227899, "percentage": 20.39, "elapsed_time": "0:13:26", "remaining_time": "0:52:29", "throughput": 12413.18, "total_tokens": 10011776}
|
|
{"current_steps": 3190, "total_steps": 15621, "loss": 0.4337, "lr": 1.934704999749416e-06, "epoch": 0.20421227834325587, "percentage": 20.42, "elapsed_time": "0:13:27", "remaining_time": "0:52:25", "throughput": 12422.1, "total_tokens": 10027264}
|
|
{"current_steps": 3195, "total_steps": 15621, "loss": 0.4251, "lr": 1.9343072762935274e-06, "epoch": 0.20453236028423277, "percentage": 20.45, "elapsed_time": "0:13:27", "remaining_time": "0:52:21", "throughput": 12430.84, "total_tokens": 10042432}
|
|
{"current_steps": 3200, "total_steps": 15621, "loss": 0.4122, "lr": 1.933908386344035e-06, "epoch": 0.20485244222520965, "percentage": 20.49, "elapsed_time": "0:13:28", "remaining_time": "0:52:18", "throughput": 12439.55, "total_tokens": 10057792}
|
|
{"current_steps": 3205, "total_steps": 15621, "loss": 0.528, "lr": 1.9335083303989565e-06, "epoch": 0.20517252416618653, "percentage": 20.52, "elapsed_time": "0:13:29", "remaining_time": "0:52:14", "throughput": 12449.78, "total_tokens": 10074752}
|
|
{"current_steps": 3210, "total_steps": 15621, "loss": 0.5767, "lr": 1.9331071089577674e-06, "epoch": 0.20549260610716344, "percentage": 20.55, "elapsed_time": "0:13:29", "remaining_time": "0:52:11", "throughput": 12459.1, "total_tokens": 10090752}
|
|
{"current_steps": 3215, "total_steps": 15621, "loss": 0.5028, "lr": 1.9327047225213963e-06, "epoch": 0.20581268804814032, "percentage": 20.58, "elapsed_time": "0:13:30", "remaining_time": "0:52:07", "throughput": 12467.96, "total_tokens": 10106240}
|
|
{"current_steps": 3220, "total_steps": 15621, "loss": 0.4154, "lr": 1.9323011715922283e-06, "epoch": 0.20613276998911723, "percentage": 20.61, "elapsed_time": "0:13:31", "remaining_time": "0:52:04", "throughput": 12476.89, "total_tokens": 10121856}
|
|
{"current_steps": 3225, "total_steps": 15621, "loss": 0.4682, "lr": 1.931896456674101e-06, "epoch": 0.2064528519300941, "percentage": 20.65, "elapsed_time": "0:13:31", "remaining_time": "0:52:00", "throughput": 12485.81, "total_tokens": 10137408}
|
|
{"current_steps": 3230, "total_steps": 15621, "loss": 0.4611, "lr": 1.931490578272306e-06, "epoch": 0.20677293387107099, "percentage": 20.68, "elapsed_time": "0:13:32", "remaining_time": "0:51:57", "throughput": 12494.41, "total_tokens": 10152640}
|
|
{"current_steps": 3235, "total_steps": 15621, "loss": 0.3551, "lr": 1.9310835368935867e-06, "epoch": 0.2070930158120479, "percentage": 20.71, "elapsed_time": "0:13:33", "remaining_time": "0:51:53", "throughput": 12503.03, "total_tokens": 10167936}
|
|
{"current_steps": 3240, "total_steps": 15621, "loss": 0.4241, "lr": 1.93067533304614e-06, "epoch": 0.20741309775302477, "percentage": 20.74, "elapsed_time": "0:13:33", "remaining_time": "0:51:50", "throughput": 12511.83, "total_tokens": 10183360}
|
|
{"current_steps": 3245, "total_steps": 15621, "loss": 0.5624, "lr": 1.9302659672396128e-06, "epoch": 0.20773317969400165, "percentage": 20.77, "elapsed_time": "0:13:34", "remaining_time": "0:51:46", "throughput": 12520.23, "total_tokens": 10198208}
|
|
{"current_steps": 3250, "total_steps": 15621, "loss": 0.4975, "lr": 1.9298554399851025e-06, "epoch": 0.20805326163497856, "percentage": 20.81, "elapsed_time": "0:13:35", "remaining_time": "0:51:43", "throughput": 12528.91, "total_tokens": 10213568}
|
|
{"current_steps": 3255, "total_steps": 15621, "loss": 0.4755, "lr": 1.929443751795158e-06, "epoch": 0.20837334357595544, "percentage": 20.84, "elapsed_time": "0:13:35", "remaining_time": "0:51:39", "throughput": 12538.5, "total_tokens": 10230080}
|
|
{"current_steps": 3260, "total_steps": 15621, "loss": 0.4792, "lr": 1.929030903183776e-06, "epoch": 0.20869342551693235, "percentage": 20.87, "elapsed_time": "0:13:36", "remaining_time": "0:51:36", "throughput": 12548.23, "total_tokens": 10246912}
|
|
{"current_steps": 3265, "total_steps": 15621, "loss": 0.5231, "lr": 1.9286168946664033e-06, "epoch": 0.20901350745790923, "percentage": 20.9, "elapsed_time": "0:13:37", "remaining_time": "0:51:32", "throughput": 12556.97, "total_tokens": 10262464}
|
|
{"current_steps": 3270, "total_steps": 15621, "loss": 0.6606, "lr": 1.9282017267599352e-06, "epoch": 0.2093335893988861, "percentage": 20.93, "elapsed_time": "0:13:37", "remaining_time": "0:51:29", "throughput": 12565.69, "total_tokens": 10278016}
|
|
{"current_steps": 3275, "total_steps": 15621, "loss": 0.5055, "lr": 1.9277853999827125e-06, "epoch": 0.209653671339863, "percentage": 20.97, "elapsed_time": "0:13:38", "remaining_time": "0:51:26", "throughput": 12574.62, "total_tokens": 10293824}
|
|
{"current_steps": 3280, "total_steps": 15621, "loss": 0.5263, "lr": 1.9273679148545244e-06, "epoch": 0.2099737532808399, "percentage": 21.0, "elapsed_time": "0:13:39", "remaining_time": "0:51:22", "throughput": 12583.57, "total_tokens": 10309568}
|
|
{"current_steps": 3285, "total_steps": 15621, "loss": 0.4181, "lr": 1.9269492718966062e-06, "epoch": 0.21029383522181677, "percentage": 21.03, "elapsed_time": "0:13:39", "remaining_time": "0:51:19", "throughput": 12592.74, "total_tokens": 10325696}
|
|
{"current_steps": 3290, "total_steps": 15621, "loss": 0.5398, "lr": 1.9265294716316384e-06, "epoch": 0.21061391716279368, "percentage": 21.06, "elapsed_time": "0:13:40", "remaining_time": "0:51:15", "throughput": 12602.01, "total_tokens": 10342016}
|
|
{"current_steps": 3295, "total_steps": 15621, "loss": 0.468, "lr": 1.926108514583747e-06, "epoch": 0.21093399910377056, "percentage": 21.09, "elapsed_time": "0:13:41", "remaining_time": "0:51:12", "throughput": 12610.64, "total_tokens": 10357632}
|
|
{"current_steps": 3300, "total_steps": 15621, "loss": 0.4805, "lr": 1.925686401278501e-06, "epoch": 0.21125408104474747, "percentage": 21.13, "elapsed_time": "0:13:42", "remaining_time": "0:51:09", "throughput": 12619.2, "total_tokens": 10373056}
|
|
{"current_steps": 3305, "total_steps": 15621, "loss": 0.6377, "lr": 1.9252631322429143e-06, "epoch": 0.21157416298572435, "percentage": 21.16, "elapsed_time": "0:13:42", "remaining_time": "0:51:05", "throughput": 12628.35, "total_tokens": 10389248}
|
|
{"current_steps": 3310, "total_steps": 15621, "loss": 0.4445, "lr": 1.9248387080054435e-06, "epoch": 0.21189424492670123, "percentage": 21.19, "elapsed_time": "0:13:43", "remaining_time": "0:51:02", "throughput": 12637.08, "total_tokens": 10404864}
|
|
{"current_steps": 3315, "total_steps": 15621, "loss": 0.4925, "lr": 1.9244131290959864e-06, "epoch": 0.21221432686767813, "percentage": 21.22, "elapsed_time": "0:13:44", "remaining_time": "0:50:58", "throughput": 12645.63, "total_tokens": 10420416}
|
|
{"current_steps": 3320, "total_steps": 15621, "loss": 0.4311, "lr": 1.9239863960458845e-06, "epoch": 0.212534408808655, "percentage": 21.25, "elapsed_time": "0:13:44", "remaining_time": "0:50:55", "throughput": 12653.77, "total_tokens": 10435456}
|
|
{"current_steps": 3325, "total_steps": 15621, "loss": 0.4857, "lr": 1.923558509387918e-06, "epoch": 0.21285449074963192, "percentage": 21.29, "elapsed_time": "0:13:45", "remaining_time": "0:50:52", "throughput": 12662.8, "total_tokens": 10451584}
|
|
{"current_steps": 3330, "total_steps": 15621, "loss": 0.3719, "lr": 1.9231294696563086e-06, "epoch": 0.2131745726906088, "percentage": 21.32, "elapsed_time": "0:13:46", "remaining_time": "0:50:48", "throughput": 12671.78, "total_tokens": 10467584}
|
|
{"current_steps": 3335, "total_steps": 15621, "loss": 0.432, "lr": 1.922699277386718e-06, "epoch": 0.21349465463158568, "percentage": 21.35, "elapsed_time": "0:13:46", "remaining_time": "0:50:45", "throughput": 12680.41, "total_tokens": 10483264}
|
|
{"current_steps": 3340, "total_steps": 15621, "loss": 0.5869, "lr": 1.9222679331162454e-06, "epoch": 0.21381473657256259, "percentage": 21.38, "elapsed_time": "0:13:47", "remaining_time": "0:50:42", "throughput": 12688.77, "total_tokens": 10498560}
|
|
{"current_steps": 3345, "total_steps": 15621, "loss": 0.4431, "lr": 1.92183543738343e-06, "epoch": 0.21413481851353947, "percentage": 21.41, "elapsed_time": "0:13:48", "remaining_time": "0:50:38", "throughput": 12697.31, "total_tokens": 10514176}
|
|
{"current_steps": 3350, "total_steps": 15621, "loss": 0.4427, "lr": 1.9214017907282475e-06, "epoch": 0.21445490045451635, "percentage": 21.45, "elapsed_time": "0:13:48", "remaining_time": "0:50:35", "throughput": 12705.96, "total_tokens": 10529792}
|
|
{"current_steps": 3355, "total_steps": 15621, "loss": 0.499, "lr": 1.9209669936921105e-06, "epoch": 0.21477498239549325, "percentage": 21.48, "elapsed_time": "0:13:49", "remaining_time": "0:50:32", "throughput": 12714.84, "total_tokens": 10545856}
|
|
{"current_steps": 3360, "total_steps": 15621, "loss": 0.4092, "lr": 1.920531046817869e-06, "epoch": 0.21509506433647013, "percentage": 21.51, "elapsed_time": "0:13:50", "remaining_time": "0:50:29", "throughput": 12724.0, "total_tokens": 10562368}
|
|
{"current_steps": 3365, "total_steps": 15621, "loss": 0.6207, "lr": 1.9200939506498067e-06, "epoch": 0.21541514627744704, "percentage": 21.54, "elapsed_time": "0:13:50", "remaining_time": "0:50:25", "throughput": 12732.02, "total_tokens": 10577280}
|
|
{"current_steps": 3370, "total_steps": 15621, "loss": 0.5719, "lr": 1.9196557057336446e-06, "epoch": 0.21573522821842392, "percentage": 21.57, "elapsed_time": "0:13:51", "remaining_time": "0:50:22", "throughput": 12740.23, "total_tokens": 10592384}
|
|
{"current_steps": 3375, "total_steps": 15621, "loss": 0.4577, "lr": 1.9192163126165354e-06, "epoch": 0.2160553101594008, "percentage": 21.61, "elapsed_time": "0:13:52", "remaining_time": "0:50:19", "throughput": 12749.13, "total_tokens": 10608704}
|
|
{"current_steps": 3380, "total_steps": 15621, "loss": 0.4011, "lr": 1.9187757718470673e-06, "epoch": 0.2163753921003777, "percentage": 21.64, "elapsed_time": "0:13:52", "remaining_time": "0:50:16", "throughput": 12758.27, "total_tokens": 10625280}
|
|
{"current_steps": 3385, "total_steps": 15621, "loss": 0.5336, "lr": 1.9183340839752606e-06, "epoch": 0.21669547404135459, "percentage": 21.67, "elapsed_time": "0:13:53", "remaining_time": "0:50:12", "throughput": 12767.01, "total_tokens": 10641152}
|
|
{"current_steps": 3390, "total_steps": 15621, "loss": 0.4227, "lr": 1.9178912495525672e-06, "epoch": 0.21701555598233147, "percentage": 21.7, "elapsed_time": "0:13:54", "remaining_time": "0:50:09", "throughput": 12775.87, "total_tokens": 10657472}
|
|
{"current_steps": 3395, "total_steps": 15621, "loss": 0.4942, "lr": 1.917447269131872e-06, "epoch": 0.21733563792330837, "percentage": 21.73, "elapsed_time": "0:13:54", "remaining_time": "0:50:06", "throughput": 12784.75, "total_tokens": 10673600}
|
|
{"current_steps": 3400, "total_steps": 15621, "loss": 0.5717, "lr": 1.917002143267489e-06, "epoch": 0.21765571986428525, "percentage": 21.77, "elapsed_time": "0:13:55", "remaining_time": "0:50:03", "throughput": 12793.21, "total_tokens": 10689344}
|
|
{"current_steps": 3405, "total_steps": 15621, "loss": 0.4315, "lr": 1.9165558725151633e-06, "epoch": 0.21797580180526216, "percentage": 21.8, "elapsed_time": "0:13:56", "remaining_time": "0:50:00", "throughput": 12801.15, "total_tokens": 10704384}
|
|
{"current_steps": 3410, "total_steps": 15621, "loss": 0.4937, "lr": 1.9161084574320692e-06, "epoch": 0.21829588374623904, "percentage": 21.83, "elapsed_time": "0:13:56", "remaining_time": "0:49:56", "throughput": 12809.98, "total_tokens": 10720512}
|
|
{"current_steps": 3415, "total_steps": 15621, "loss": 0.485, "lr": 1.91565989857681e-06, "epoch": 0.21861596568721592, "percentage": 21.86, "elapsed_time": "0:13:57", "remaining_time": "0:49:53", "throughput": 12817.99, "total_tokens": 10735744}
|
|
{"current_steps": 3420, "total_steps": 15621, "loss": 0.4665, "lr": 1.9152101965094162e-06, "epoch": 0.21893604762819283, "percentage": 21.89, "elapsed_time": "0:13:58", "remaining_time": "0:49:50", "throughput": 12825.93, "total_tokens": 10750848}
|
|
{"current_steps": 3425, "total_steps": 15621, "loss": 0.4939, "lr": 1.9147593517913464e-06, "epoch": 0.2192561295691697, "percentage": 21.93, "elapsed_time": "0:13:58", "remaining_time": "0:49:47", "throughput": 12833.54, "total_tokens": 10765632}
|
|
{"current_steps": 3430, "total_steps": 15621, "loss": 0.3868, "lr": 1.914307364985485e-06, "epoch": 0.21957621151014659, "percentage": 21.96, "elapsed_time": "0:13:59", "remaining_time": "0:49:43", "throughput": 12841.51, "total_tokens": 10780928}
|
|
{"current_steps": 3435, "total_steps": 15621, "loss": 0.4273, "lr": 1.913854236656144e-06, "epoch": 0.2198962934511235, "percentage": 21.99, "elapsed_time": "0:14:00", "remaining_time": "0:49:40", "throughput": 12849.98, "total_tokens": 10796864}
|
|
{"current_steps": 3440, "total_steps": 15621, "loss": 0.4643, "lr": 1.9133999673690584e-06, "epoch": 0.22021637539210037, "percentage": 22.02, "elapsed_time": "0:14:00", "remaining_time": "0:49:37", "throughput": 12858.5, "total_tokens": 10812672}
|
|
{"current_steps": 3445, "total_steps": 15621, "loss": 0.4782, "lr": 1.9129445576913886e-06, "epoch": 0.22053645733307728, "percentage": 22.05, "elapsed_time": "0:14:01", "remaining_time": "0:49:34", "throughput": 12867.01, "total_tokens": 10828544}
|
|
{"current_steps": 3450, "total_steps": 15621, "loss": 0.5318, "lr": 1.91248800819172e-06, "epoch": 0.22085653927405416, "percentage": 22.09, "elapsed_time": "0:14:02", "remaining_time": "0:49:31", "throughput": 12875.41, "total_tokens": 10844288}
|
|
{"current_steps": 3455, "total_steps": 15621, "loss": 0.5306, "lr": 1.912030319440059e-06, "epoch": 0.22117662121503104, "percentage": 22.12, "elapsed_time": "0:14:02", "remaining_time": "0:49:28", "throughput": 12883.83, "total_tokens": 10860160}
|
|
{"current_steps": 3460, "total_steps": 15621, "loss": 0.6076, "lr": 1.9115714920078354e-06, "epoch": 0.22149670315600795, "percentage": 22.15, "elapsed_time": "0:14:03", "remaining_time": "0:49:25", "throughput": 12892.14, "total_tokens": 10875968}
|
|
{"current_steps": 3465, "total_steps": 15621, "loss": 0.3367, "lr": 1.9111115264679017e-06, "epoch": 0.22181678509698483, "percentage": 22.18, "elapsed_time": "0:14:04", "remaining_time": "0:49:21", "throughput": 12900.75, "total_tokens": 10892096}
|
|
{"current_steps": 3470, "total_steps": 15621, "loss": 0.4611, "lr": 1.910650423394529e-06, "epoch": 0.2221368670379617, "percentage": 22.21, "elapsed_time": "0:14:04", "remaining_time": "0:49:18", "throughput": 12909.53, "total_tokens": 10908544}
|
|
{"current_steps": 3475, "total_steps": 15621, "loss": 0.4804, "lr": 1.910188183363411e-06, "epoch": 0.2224569489789386, "percentage": 22.25, "elapsed_time": "0:14:05", "remaining_time": "0:49:15", "throughput": 12918.05, "total_tokens": 10924544}
|
|
{"current_steps": 3480, "total_steps": 15621, "loss": 0.4408, "lr": 1.909724806951659e-06, "epoch": 0.2227770309199155, "percentage": 22.28, "elapsed_time": "0:14:06", "remaining_time": "0:49:12", "throughput": 12927.49, "total_tokens": 10941888}
|
|
{"current_steps": 3485, "total_steps": 15621, "loss": 0.4689, "lr": 1.909260294737804e-06, "epoch": 0.2230971128608924, "percentage": 22.31, "elapsed_time": "0:14:07", "remaining_time": "0:49:09", "throughput": 12936.51, "total_tokens": 10958592}
|
|
{"current_steps": 3490, "total_steps": 15621, "loss": 0.5319, "lr": 1.9087946473017953e-06, "epoch": 0.22341719480186928, "percentage": 22.34, "elapsed_time": "0:14:07", "remaining_time": "0:49:06", "throughput": 12944.61, "total_tokens": 10974208}
|
|
{"current_steps": 3495, "total_steps": 15621, "loss": 0.4363, "lr": 1.9083278652249992e-06, "epoch": 0.22373727674284616, "percentage": 22.37, "elapsed_time": "0:14:08", "remaining_time": "0:49:03", "throughput": 12952.05, "total_tokens": 10988928}
|
|
{"current_steps": 3500, "total_steps": 15621, "loss": 0.4327, "lr": 1.9078599490901983e-06, "epoch": 0.22405735868382307, "percentage": 22.41, "elapsed_time": "0:14:09", "remaining_time": "0:49:00", "throughput": 12961.29, "total_tokens": 11005952}
|
|
{"current_steps": 3505, "total_steps": 15621, "loss": 0.4012, "lr": 1.9073908994815914e-06, "epoch": 0.22437744062479995, "percentage": 22.44, "elapsed_time": "0:14:09", "remaining_time": "0:48:57", "throughput": 12968.66, "total_tokens": 11020608}
|
|
{"current_steps": 3510, "total_steps": 15621, "loss": 0.4999, "lr": 1.9069207169847928e-06, "epoch": 0.22469752256577685, "percentage": 22.47, "elapsed_time": "0:14:10", "remaining_time": "0:48:54", "throughput": 12977.17, "total_tokens": 11036736}
|
|
{"current_steps": 3515, "total_steps": 15621, "loss": 0.3645, "lr": 1.9064494021868302e-06, "epoch": 0.22501760450675373, "percentage": 22.5, "elapsed_time": "0:14:11", "remaining_time": "0:48:51", "throughput": 12985.48, "total_tokens": 11052480}
|
|
{"current_steps": 3520, "total_steps": 15621, "loss": 0.4816, "lr": 1.9059769556761464e-06, "epoch": 0.2253376864477306, "percentage": 22.53, "elapsed_time": "0:14:11", "remaining_time": "0:48:48", "throughput": 12993.85, "total_tokens": 11068416}
|
|
{"current_steps": 3525, "total_steps": 15621, "loss": 0.4443, "lr": 1.9055033780425962e-06, "epoch": 0.22565776838870752, "percentage": 22.57, "elapsed_time": "0:14:12", "remaining_time": "0:48:45", "throughput": 13003.71, "total_tokens": 11086400}
|
|
{"current_steps": 3530, "total_steps": 15621, "loss": 0.5674, "lr": 1.9050286698774464e-06, "epoch": 0.2259778503296844, "percentage": 22.6, "elapsed_time": "0:14:13", "remaining_time": "0:48:42", "throughput": 13012.28, "total_tokens": 11102848}
|
|
{"current_steps": 3535, "total_steps": 15621, "loss": 0.5366, "lr": 1.904552831773376e-06, "epoch": 0.22629793227066128, "percentage": 22.63, "elapsed_time": "0:14:13", "remaining_time": "0:48:39", "throughput": 13020.02, "total_tokens": 11118080}
|
|
{"current_steps": 3540, "total_steps": 15621, "loss": 0.5045, "lr": 1.9040758643244748e-06, "epoch": 0.22661801421163819, "percentage": 22.66, "elapsed_time": "0:14:14", "remaining_time": "0:48:36", "throughput": 13027.61, "total_tokens": 11133120}
|
|
{"current_steps": 3545, "total_steps": 15621, "loss": 0.4452, "lr": 1.903597768126242e-06, "epoch": 0.22693809615261507, "percentage": 22.69, "elapsed_time": "0:14:15", "remaining_time": "0:48:33", "throughput": 13036.54, "total_tokens": 11150144}
|
|
{"current_steps": 3550, "total_steps": 15621, "loss": 0.4862, "lr": 1.9031185437755862e-06, "epoch": 0.22725817809359197, "percentage": 22.73, "elapsed_time": "0:14:15", "remaining_time": "0:48:30", "throughput": 13044.44, "total_tokens": 11165760}
|
|
{"current_steps": 3555, "total_steps": 15621, "loss": 0.4948, "lr": 1.9026381918708246e-06, "epoch": 0.22757826003456885, "percentage": 22.76, "elapsed_time": "0:14:16", "remaining_time": "0:48:27", "throughput": 13051.52, "total_tokens": 11180096}
|
|
{"current_steps": 3560, "total_steps": 15621, "loss": 0.3775, "lr": 1.9021567130116822e-06, "epoch": 0.22789834197554573, "percentage": 22.79, "elapsed_time": "0:14:17", "remaining_time": "0:48:24", "throughput": 13059.17, "total_tokens": 11195584}
|
|
{"current_steps": 3565, "total_steps": 15621, "loss": 0.389, "lr": 1.9016741077992916e-06, "epoch": 0.22821842391652264, "percentage": 22.82, "elapsed_time": "0:14:17", "remaining_time": "0:48:21", "throughput": 13066.93, "total_tokens": 11210944}
|
|
{"current_steps": 3570, "total_steps": 15621, "loss": 0.4008, "lr": 1.90119037683619e-06, "epoch": 0.22853850585749952, "percentage": 22.85, "elapsed_time": "0:14:18", "remaining_time": "0:48:18", "throughput": 13075.47, "total_tokens": 11227392}
|
|
{"current_steps": 3575, "total_steps": 15621, "loss": 0.6598, "lr": 1.9007055207263223e-06, "epoch": 0.2288585877984764, "percentage": 22.89, "elapsed_time": "0:14:19", "remaining_time": "0:48:15", "throughput": 13084.36, "total_tokens": 11244416}
|
|
{"current_steps": 3580, "total_steps": 15621, "loss": 0.3584, "lr": 1.900219540075036e-06, "epoch": 0.2291786697394533, "percentage": 22.92, "elapsed_time": "0:14:20", "remaining_time": "0:48:12", "throughput": 13092.75, "total_tokens": 11260672}
|
|
{"current_steps": 3585, "total_steps": 15621, "loss": 0.4823, "lr": 1.8997324354890845e-06, "epoch": 0.22949875168043019, "percentage": 22.95, "elapsed_time": "0:14:20", "remaining_time": "0:48:09", "throughput": 13101.57, "total_tokens": 11277504}
|
|
{"current_steps": 3590, "total_steps": 15621, "loss": 0.5325, "lr": 1.8992442075766233e-06, "epoch": 0.2298188336214071, "percentage": 22.98, "elapsed_time": "0:14:21", "remaining_time": "0:48:06", "throughput": 13109.46, "total_tokens": 11293184}
|
|
{"current_steps": 3595, "total_steps": 15621, "loss": 0.3273, "lr": 1.8987548569472105e-06, "epoch": 0.23013891556238397, "percentage": 23.01, "elapsed_time": "0:14:22", "remaining_time": "0:48:03", "throughput": 13117.24, "total_tokens": 11308480}
|
|
{"current_steps": 3600, "total_steps": 15621, "loss": 0.3958, "lr": 1.8982643842118064e-06, "epoch": 0.23045899750336085, "percentage": 23.05, "elapsed_time": "0:14:22", "remaining_time": "0:48:00", "throughput": 13124.96, "total_tokens": 11323840}
|
|
{"current_steps": 3605, "total_steps": 15621, "loss": 0.5822, "lr": 1.8977727899827716e-06, "epoch": 0.23077907944433776, "percentage": 23.08, "elapsed_time": "0:14:23", "remaining_time": "0:47:57", "throughput": 13132.92, "total_tokens": 11339456}
|
|
{"current_steps": 3610, "total_steps": 15621, "loss": 0.6628, "lr": 1.8972800748738678e-06, "epoch": 0.23109916138531464, "percentage": 23.11, "elapsed_time": "0:14:24", "remaining_time": "0:47:55", "throughput": 13140.57, "total_tokens": 11354880}
|
|
{"current_steps": 3615, "total_steps": 15621, "loss": 0.5365, "lr": 1.896786239500255e-06, "epoch": 0.23141924332629152, "percentage": 23.14, "elapsed_time": "0:14:24", "remaining_time": "0:47:52", "throughput": 13148.15, "total_tokens": 11369984}
|
|
{"current_steps": 3620, "total_steps": 15621, "loss": 0.4328, "lr": 1.8962912844784928e-06, "epoch": 0.23173932526726843, "percentage": 23.17, "elapsed_time": "0:14:25", "remaining_time": "0:47:48", "throughput": 13155.41, "total_tokens": 11384640}
|
|
{"current_steps": 3625, "total_steps": 15621, "loss": 0.5017, "lr": 1.8957952104265384e-06, "epoch": 0.2320594072082453, "percentage": 23.21, "elapsed_time": "0:14:26", "remaining_time": "0:47:46", "throughput": 13163.92, "total_tokens": 11401152}
|
|
{"current_steps": 3630, "total_steps": 15621, "loss": 0.4551, "lr": 1.8952980179637458e-06, "epoch": 0.2323794891492222, "percentage": 23.24, "elapsed_time": "0:14:26", "remaining_time": "0:47:43", "throughput": 13171.87, "total_tokens": 11416896}
|
|
{"current_steps": 3635, "total_steps": 15621, "loss": 0.5002, "lr": 1.8947997077108662e-06, "epoch": 0.2326995710901991, "percentage": 23.27, "elapsed_time": "0:14:27", "remaining_time": "0:47:40", "throughput": 13179.98, "total_tokens": 11432832}
|
|
{"current_steps": 3640, "total_steps": 15621, "loss": 0.5022, "lr": 1.894300280290045e-06, "epoch": 0.23301965303117597, "percentage": 23.3, "elapsed_time": "0:14:28", "remaining_time": "0:47:37", "throughput": 13187.69, "total_tokens": 11448320}
|
|
{"current_steps": 3645, "total_steps": 15621, "loss": 0.5691, "lr": 1.8937997363248237e-06, "epoch": 0.23333973497215288, "percentage": 23.33, "elapsed_time": "0:14:28", "remaining_time": "0:47:34", "throughput": 13195.13, "total_tokens": 11463488}
|
|
{"current_steps": 3650, "total_steps": 15621, "loss": 0.4616, "lr": 1.8932980764401373e-06, "epoch": 0.23365981691312976, "percentage": 23.37, "elapsed_time": "0:14:29", "remaining_time": "0:47:31", "throughput": 13202.63, "total_tokens": 11478592}
|
|
{"current_steps": 3655, "total_steps": 15621, "loss": 0.367, "lr": 1.8927953012623141e-06, "epoch": 0.23397989885410664, "percentage": 23.4, "elapsed_time": "0:14:30", "remaining_time": "0:47:28", "throughput": 13210.62, "total_tokens": 11494720}
|
|
{"current_steps": 3660, "total_steps": 15621, "loss": 0.4884, "lr": 1.8922914114190744e-06, "epoch": 0.23429998079508355, "percentage": 23.43, "elapsed_time": "0:14:30", "remaining_time": "0:47:25", "throughput": 13218.92, "total_tokens": 11511232}
|
|
{"current_steps": 3665, "total_steps": 15621, "loss": 0.5212, "lr": 1.8917864075395312e-06, "epoch": 0.23462006273606043, "percentage": 23.46, "elapsed_time": "0:14:31", "remaining_time": "0:47:22", "throughput": 13226.79, "total_tokens": 11527040}
|
|
{"current_steps": 3670, "total_steps": 15621, "loss": 0.4641, "lr": 1.8912802902541873e-06, "epoch": 0.23494014467703733, "percentage": 23.49, "elapsed_time": "0:14:32", "remaining_time": "0:47:20", "throughput": 13234.48, "total_tokens": 11542528}
|
|
{"current_steps": 3675, "total_steps": 15621, "loss": 0.503, "lr": 1.8907730601949362e-06, "epoch": 0.2352602266180142, "percentage": 23.53, "elapsed_time": "0:14:32", "remaining_time": "0:47:17", "throughput": 13241.9, "total_tokens": 11557696}
|
|
{"current_steps": 3680, "total_steps": 15621, "loss": 0.4703, "lr": 1.8902647179950608e-06, "epoch": 0.2355803085589911, "percentage": 23.56, "elapsed_time": "0:14:33", "remaining_time": "0:47:14", "throughput": 13250.74, "total_tokens": 11574848}
|
|
{"current_steps": 3685, "total_steps": 15621, "loss": 0.5074, "lr": 1.889755264289232e-06, "epoch": 0.235900390499968, "percentage": 23.59, "elapsed_time": "0:14:34", "remaining_time": "0:47:11", "throughput": 13257.94, "total_tokens": 11589696}
|
|
{"current_steps": 3690, "total_steps": 15621, "loss": 0.3915, "lr": 1.8892446997135087e-06, "epoch": 0.23622047244094488, "percentage": 23.62, "elapsed_time": "0:14:34", "remaining_time": "0:47:08", "throughput": 13266.71, "total_tokens": 11606848}
|
|
{"current_steps": 3695, "total_steps": 15621, "loss": 0.6641, "lr": 1.888733024905337e-06, "epoch": 0.23654055438192176, "percentage": 23.65, "elapsed_time": "0:14:35", "remaining_time": "0:47:06", "throughput": 13275.42, "total_tokens": 11623744}
|
|
{"current_steps": 3700, "total_steps": 15621, "loss": 0.4874, "lr": 1.888220240503549e-06, "epoch": 0.23686063632289867, "percentage": 23.69, "elapsed_time": "0:14:36", "remaining_time": "0:47:03", "throughput": 13283.7, "total_tokens": 11640256}
|
|
{"current_steps": 3705, "total_steps": 15621, "loss": 0.4096, "lr": 1.8877063471483618e-06, "epoch": 0.23718071826387555, "percentage": 23.72, "elapsed_time": "0:14:36", "remaining_time": "0:47:00", "throughput": 13291.27, "total_tokens": 11655744}
|
|
{"current_steps": 3710, "total_steps": 15621, "loss": 0.2877, "lr": 1.8871913454813772e-06, "epoch": 0.23750080020485245, "percentage": 23.75, "elapsed_time": "0:14:37", "remaining_time": "0:46:57", "throughput": 13298.86, "total_tokens": 11671104}
|
|
{"current_steps": 3715, "total_steps": 15621, "loss": 0.3741, "lr": 1.886675236145581e-06, "epoch": 0.23782088214582933, "percentage": 23.78, "elapsed_time": "0:14:38", "remaining_time": "0:46:54", "throughput": 13306.7, "total_tokens": 11686848}
|
|
{"current_steps": 3720, "total_steps": 15621, "loss": 0.5053, "lr": 1.8861580197853422e-06, "epoch": 0.2381409640868062, "percentage": 23.81, "elapsed_time": "0:14:38", "remaining_time": "0:46:51", "throughput": 13314.03, "total_tokens": 11701952}
|
|
{"current_steps": 3725, "total_steps": 15621, "loss": 0.4637, "lr": 1.8856396970464105e-06, "epoch": 0.23846104602778312, "percentage": 23.85, "elapsed_time": "0:14:39", "remaining_time": "0:46:49", "throughput": 13322.39, "total_tokens": 11718592}
|
|
{"current_steps": 3730, "total_steps": 15621, "loss": 0.5129, "lr": 1.8851202685759189e-06, "epoch": 0.23878112796876, "percentage": 23.88, "elapsed_time": "0:14:40", "remaining_time": "0:46:46", "throughput": 13330.02, "total_tokens": 11734208}
|
|
{"current_steps": 3735, "total_steps": 15621, "loss": 0.4186, "lr": 1.8845997350223792e-06, "epoch": 0.2391012099097369, "percentage": 23.91, "elapsed_time": "0:14:40", "remaining_time": "0:46:43", "throughput": 13337.11, "total_tokens": 11748992}
|
|
{"current_steps": 3740, "total_steps": 15621, "loss": 0.4258, "lr": 1.8840780970356842e-06, "epoch": 0.23942129185071379, "percentage": 23.94, "elapsed_time": "0:14:41", "remaining_time": "0:46:40", "throughput": 13344.71, "total_tokens": 11764608}
|
|
{"current_steps": 3745, "total_steps": 15621, "loss": 0.3919, "lr": 1.8835553552671048e-06, "epoch": 0.23974137379169067, "percentage": 23.97, "elapsed_time": "0:14:42", "remaining_time": "0:46:37", "throughput": 13352.71, "total_tokens": 11780800}
|
|
{"current_steps": 3750, "total_steps": 15621, "loss": 0.4467, "lr": 1.8830315103692902e-06, "epoch": 0.24006145573266757, "percentage": 24.01, "elapsed_time": "0:14:42", "remaining_time": "0:46:34", "throughput": 13359.86, "total_tokens": 11795776}
|
|
{"current_steps": 3755, "total_steps": 15621, "loss": 0.52, "lr": 1.8825065629962669e-06, "epoch": 0.24038153767364445, "percentage": 24.04, "elapsed_time": "0:14:43", "remaining_time": "0:46:32", "throughput": 13367.68, "total_tokens": 11811776}
|
|
{"current_steps": 3760, "total_steps": 15621, "loss": 0.4902, "lr": 1.881980513803438e-06, "epoch": 0.24070161961462133, "percentage": 24.07, "elapsed_time": "0:14:44", "remaining_time": "0:46:29", "throughput": 13375.74, "total_tokens": 11828224}
|
|
{"current_steps": 3765, "total_steps": 15621, "loss": 0.5093, "lr": 1.881453363447582e-06, "epoch": 0.24102170155559824, "percentage": 24.1, "elapsed_time": "0:14:44", "remaining_time": "0:46:26", "throughput": 13383.16, "total_tokens": 11843904}
|
|
{"current_steps": 3770, "total_steps": 15621, "loss": 0.5653, "lr": 1.880925112586852e-06, "epoch": 0.24134178349657512, "percentage": 24.13, "elapsed_time": "0:14:45", "remaining_time": "0:46:24", "throughput": 13390.58, "total_tokens": 11859392}
|
|
{"current_steps": 3775, "total_steps": 15621, "loss": 0.4347, "lr": 1.8803957618807762e-06, "epoch": 0.24166186543755203, "percentage": 24.17, "elapsed_time": "0:14:46", "remaining_time": "0:46:21", "throughput": 13398.76, "total_tokens": 11875968}
|
|
{"current_steps": 3780, "total_steps": 15621, "loss": 0.4527, "lr": 1.8798653119902548e-06, "epoch": 0.2419819473785289, "percentage": 24.2, "elapsed_time": "0:14:47", "remaining_time": "0:46:18", "throughput": 13406.15, "total_tokens": 11891584}
|
|
{"current_steps": 3785, "total_steps": 15621, "loss": 0.4997, "lr": 1.8793337635775603e-06, "epoch": 0.24230202931950579, "percentage": 24.23, "elapsed_time": "0:14:47", "remaining_time": "0:46:15", "throughput": 13413.45, "total_tokens": 11906944}
|
|
{"current_steps": 3790, "total_steps": 15621, "loss": 0.4883, "lr": 1.8788011173063376e-06, "epoch": 0.2426221112604827, "percentage": 24.26, "elapsed_time": "0:14:48", "remaining_time": "0:46:13", "throughput": 13420.81, "total_tokens": 11922368}
|
|
{"current_steps": 3795, "total_steps": 15621, "loss": 0.5193, "lr": 1.8782673738416018e-06, "epoch": 0.24294219320145957, "percentage": 24.29, "elapsed_time": "0:14:49", "remaining_time": "0:46:10", "throughput": 13428.62, "total_tokens": 11938432}
|
|
{"current_steps": 3800, "total_steps": 15621, "loss": 0.5232, "lr": 1.877732533849737e-06, "epoch": 0.24326227514243645, "percentage": 24.33, "elapsed_time": "0:14:49", "remaining_time": "0:46:07", "throughput": 13437.85, "total_tokens": 11956608}
|
|
{"current_steps": 3805, "total_steps": 15621, "loss": 0.4473, "lr": 1.8771965979984988e-06, "epoch": 0.24358235708341336, "percentage": 24.36, "elapsed_time": "0:14:50", "remaining_time": "0:46:05", "throughput": 13445.32, "total_tokens": 11972480}
|
|
{"current_steps": 3810, "total_steps": 15621, "loss": 0.3903, "lr": 1.8766595669570084e-06, "epoch": 0.24390243902439024, "percentage": 24.39, "elapsed_time": "0:14:51", "remaining_time": "0:46:02", "throughput": 13452.04, "total_tokens": 11987072}
|
|
{"current_steps": 3815, "total_steps": 15621, "loss": 0.4257, "lr": 1.8761214413957553e-06, "epoch": 0.24422252096536715, "percentage": 24.42, "elapsed_time": "0:14:51", "remaining_time": "0:45:59", "throughput": 13459.06, "total_tokens": 12002112}
|
|
{"current_steps": 3820, "total_steps": 15621, "loss": 0.3607, "lr": 1.8755822219865963e-06, "epoch": 0.24454260290634403, "percentage": 24.45, "elapsed_time": "0:14:52", "remaining_time": "0:45:56", "throughput": 13465.89, "total_tokens": 12016960}
|
|
{"current_steps": 3825, "total_steps": 15621, "loss": 0.4264, "lr": 1.875041909402752e-06, "epoch": 0.2448626848473209, "percentage": 24.49, "elapsed_time": "0:14:53", "remaining_time": "0:45:54", "throughput": 13473.32, "total_tokens": 12032576}
|
|
{"current_steps": 3830, "total_steps": 15621, "loss": 0.3694, "lr": 1.8745005043188102e-06, "epoch": 0.2451827667882978, "percentage": 24.52, "elapsed_time": "0:14:53", "remaining_time": "0:45:51", "throughput": 13481.07, "total_tokens": 12048768}
|
|
{"current_steps": 3835, "total_steps": 15621, "loss": 0.3828, "lr": 1.8739580074107208e-06, "epoch": 0.2455028487292747, "percentage": 24.55, "elapsed_time": "0:14:54", "remaining_time": "0:45:48", "throughput": 13489.03, "total_tokens": 12065088}
|
|
{"current_steps": 3840, "total_steps": 15621, "loss": 0.7066, "lr": 1.873414419355798e-06, "epoch": 0.24582293067025157, "percentage": 24.58, "elapsed_time": "0:14:55", "remaining_time": "0:45:46", "throughput": 13496.41, "total_tokens": 12080704}
|
|
{"current_steps": 3845, "total_steps": 15621, "loss": 0.4319, "lr": 1.872869740832717e-06, "epoch": 0.24614301261122848, "percentage": 24.61, "elapsed_time": "0:14:55", "remaining_time": "0:45:43", "throughput": 13503.98, "total_tokens": 12096704}
|
|
{"current_steps": 3850, "total_steps": 15621, "loss": 0.5962, "lr": 1.8723239725215165e-06, "epoch": 0.24646309455220536, "percentage": 24.65, "elapsed_time": "0:14:56", "remaining_time": "0:45:40", "throughput": 13510.71, "total_tokens": 12111488}
|
|
{"current_steps": 3855, "total_steps": 15621, "loss": 0.4195, "lr": 1.871777115103594e-06, "epoch": 0.24678317649318227, "percentage": 24.68, "elapsed_time": "0:14:57", "remaining_time": "0:45:38", "throughput": 13518.87, "total_tokens": 12128192}
|
|
{"current_steps": 3860, "total_steps": 15621, "loss": 0.4734, "lr": 1.8712291692617074e-06, "epoch": 0.24710325843415915, "percentage": 24.71, "elapsed_time": "0:14:57", "remaining_time": "0:45:35", "throughput": 13526.15, "total_tokens": 12143808}
|
|
{"current_steps": 3865, "total_steps": 15621, "loss": 0.496, "lr": 1.8706801356799735e-06, "epoch": 0.24742334037513602, "percentage": 24.74, "elapsed_time": "0:14:58", "remaining_time": "0:45:32", "throughput": 13533.28, "total_tokens": 12159232}
|
|
{"current_steps": 3870, "total_steps": 15621, "loss": 0.4515, "lr": 1.8701300150438674e-06, "epoch": 0.24774342231611293, "percentage": 24.77, "elapsed_time": "0:14:59", "remaining_time": "0:45:30", "throughput": 13540.98, "total_tokens": 12175360}
|
|
{"current_steps": 3875, "total_steps": 15621, "loss": 0.4208, "lr": 1.869578808040221e-06, "epoch": 0.2480635042570898, "percentage": 24.81, "elapsed_time": "0:14:59", "remaining_time": "0:45:27", "throughput": 13547.92, "total_tokens": 12190272}
|
|
{"current_steps": 3880, "total_steps": 15621, "loss": 0.5226, "lr": 1.869026515357223e-06, "epoch": 0.2483835861980667, "percentage": 24.84, "elapsed_time": "0:15:00", "remaining_time": "0:45:25", "throughput": 13556.85, "total_tokens": 12208448}
|
|
{"current_steps": 3885, "total_steps": 15621, "loss": 0.6458, "lr": 1.8684731376844169e-06, "epoch": 0.2487036681390436, "percentage": 24.87, "elapsed_time": "0:15:01", "remaining_time": "0:45:22", "throughput": 13565.31, "total_tokens": 12225984}
|
|
{"current_steps": 3890, "total_steps": 15621, "loss": 0.5022, "lr": 1.8679186757127014e-06, "epoch": 0.24902375008002048, "percentage": 24.9, "elapsed_time": "0:15:01", "remaining_time": "0:45:19", "throughput": 13572.38, "total_tokens": 12241408}
|
|
{"current_steps": 3895, "total_steps": 15621, "loss": 0.4355, "lr": 1.8673631301343288e-06, "epoch": 0.24934383202099739, "percentage": 24.93, "elapsed_time": "0:15:02", "remaining_time": "0:45:17", "throughput": 13578.97, "total_tokens": 12256064}
|
|
{"current_steps": 3900, "total_steps": 15621, "loss": 0.4515, "lr": 1.8668065016429044e-06, "epoch": 0.24966391396197427, "percentage": 24.97, "elapsed_time": "0:15:03", "remaining_time": "0:45:14", "throughput": 13586.97, "total_tokens": 12272832}
|
|
{"current_steps": 3905, "total_steps": 15621, "loss": 0.5368, "lr": 1.866248790933385e-06, "epoch": 0.24998399590295114, "percentage": 25.0, "elapsed_time": "0:15:03", "remaining_time": "0:45:12", "throughput": 13594.64, "total_tokens": 12289024}
|
|
{"current_steps": 3910, "total_steps": 15621, "loss": 0.4277, "lr": 1.8656899987020795e-06, "epoch": 0.25030407784392805, "percentage": 25.03, "elapsed_time": "0:15:04", "remaining_time": "0:45:09", "throughput": 13601.55, "total_tokens": 12304064}
|
|
{"current_steps": 3910, "total_steps": 15621, "eval_loss": 0.46774157881736755, "epoch": 0.25030407784392805, "percentage": 25.03, "elapsed_time": "0:15:55", "remaining_time": "0:47:41", "throughput": 12879.41, "total_tokens": 12304064}
|
|
{"current_steps": 3915, "total_steps": 15621, "loss": 0.4665, "lr": 1.865130125646646e-06, "epoch": 0.25062415978490493, "percentage": 25.06, "elapsed_time": "0:16:45", "remaining_time": "0:50:06", "throughput": 12252.97, "total_tokens": 12320256}
|
|
{"current_steps": 3920, "total_steps": 15621, "loss": 0.4426, "lr": 1.8645691724660933e-06, "epoch": 0.2509442417258818, "percentage": 25.09, "elapsed_time": "0:16:46", "remaining_time": "0:50:03", "throughput": 12259.95, "total_tokens": 12335360}
|
|
{"current_steps": 3925, "total_steps": 15621, "loss": 0.4718, "lr": 1.8640071398607774e-06, "epoch": 0.2512643236668587, "percentage": 25.13, "elapsed_time": "0:16:46", "remaining_time": "0:50:00", "throughput": 12267.64, "total_tokens": 12351488}
|
|
{"current_steps": 3930, "total_steps": 15621, "loss": 0.6284, "lr": 1.8634440285324024e-06, "epoch": 0.2515844056078356, "percentage": 25.16, "elapsed_time": "0:16:47", "remaining_time": "0:49:57", "throughput": 12274.2, "total_tokens": 12365952}
|
|
{"current_steps": 3935, "total_steps": 15621, "loss": 0.4716, "lr": 1.8628798391840205e-06, "epoch": 0.2519044875488125, "percentage": 25.19, "elapsed_time": "0:16:48", "remaining_time": "0:49:53", "throughput": 12281.54, "total_tokens": 12381376}
|
|
{"current_steps": 3940, "total_steps": 15621, "loss": 0.4596, "lr": 1.8623145725200277e-06, "epoch": 0.2522245694897894, "percentage": 25.22, "elapsed_time": "0:16:48", "remaining_time": "0:49:50", "throughput": 12288.37, "total_tokens": 12396160}
|
|
{"current_steps": 3945, "total_steps": 15621, "loss": 0.4591, "lr": 1.8617482292461664e-06, "epoch": 0.25254465143076626, "percentage": 25.25, "elapsed_time": "0:16:49", "remaining_time": "0:49:47", "throughput": 12295.13, "total_tokens": 12410944}
|
|
{"current_steps": 3950, "total_steps": 15621, "loss": 0.4216, "lr": 1.861180810069523e-06, "epoch": 0.25286473337174314, "percentage": 25.29, "elapsed_time": "0:16:50", "remaining_time": "0:49:44", "throughput": 12302.25, "total_tokens": 12426304}
|
|
{"current_steps": 3955, "total_steps": 15621, "loss": 0.4785, "lr": 1.8606123156985268e-06, "epoch": 0.2531848153127201, "percentage": 25.32, "elapsed_time": "0:16:50", "remaining_time": "0:49:41", "throughput": 12309.87, "total_tokens": 12442432}
|
|
{"current_steps": 3960, "total_steps": 15621, "loss": 0.4666, "lr": 1.8600427468429496e-06, "epoch": 0.25350489725369696, "percentage": 25.35, "elapsed_time": "0:16:51", "remaining_time": "0:49:38", "throughput": 12317.27, "total_tokens": 12458368}
|
|
{"current_steps": 3965, "total_steps": 15621, "loss": 0.433, "lr": 1.8594721042139052e-06, "epoch": 0.25382497919467384, "percentage": 25.38, "elapsed_time": "0:16:52", "remaining_time": "0:49:35", "throughput": 12324.74, "total_tokens": 12474368}
|
|
{"current_steps": 3970, "total_steps": 15621, "loss": 0.423, "lr": 1.858900388523847e-06, "epoch": 0.2541450611356507, "percentage": 25.41, "elapsed_time": "0:16:52", "remaining_time": "0:49:32", "throughput": 12332.1, "total_tokens": 12490176}
|
|
{"current_steps": 3975, "total_steps": 15621, "loss": 0.4523, "lr": 1.8583276004865694e-06, "epoch": 0.2544651430766276, "percentage": 25.45, "elapsed_time": "0:16:53", "remaining_time": "0:49:29", "throughput": 12340.59, "total_tokens": 12507840}
|
|
{"current_steps": 3980, "total_steps": 15621, "loss": 0.352, "lr": 1.8577537408172046e-06, "epoch": 0.25478522501760453, "percentage": 25.48, "elapsed_time": "0:16:54", "remaining_time": "0:49:26", "throughput": 12347.79, "total_tokens": 12523520}
|
|
{"current_steps": 3985, "total_steps": 15621, "loss": 0.5448, "lr": 1.8571788102322234e-06, "epoch": 0.2551053069585814, "percentage": 25.51, "elapsed_time": "0:16:54", "remaining_time": "0:49:23", "throughput": 12356.03, "total_tokens": 12540736}
|
|
{"current_steps": 3990, "total_steps": 15621, "loss": 0.4698, "lr": 1.8566028094494332e-06, "epoch": 0.2554253888995583, "percentage": 25.54, "elapsed_time": "0:16:55", "remaining_time": "0:49:20", "throughput": 12363.32, "total_tokens": 12556352}
|
|
{"current_steps": 3995, "total_steps": 15621, "loss": 0.3732, "lr": 1.8560257391879778e-06, "epoch": 0.25574547084053517, "percentage": 25.57, "elapsed_time": "0:16:56", "remaining_time": "0:49:17", "throughput": 12369.72, "total_tokens": 12570688}
|
|
{"current_steps": 4000, "total_steps": 15621, "loss": 0.4079, "lr": 1.855447600168336e-06, "epoch": 0.25606555278151205, "percentage": 25.61, "elapsed_time": "0:16:56", "remaining_time": "0:49:14", "throughput": 12376.77, "total_tokens": 12585984}
|
|
{"current_steps": 4005, "total_steps": 15621, "loss": 0.4732, "lr": 1.8548683931123215e-06, "epoch": 0.25638563472248893, "percentage": 25.64, "elapsed_time": "0:16:57", "remaining_time": "0:49:11", "throughput": 12383.78, "total_tokens": 12601216}
|
|
{"current_steps": 4010, "total_steps": 15621, "loss": 0.4471, "lr": 1.8542881187430807e-06, "epoch": 0.25670571666346587, "percentage": 25.67, "elapsed_time": "0:16:58", "remaining_time": "0:49:08", "throughput": 12392.11, "total_tokens": 12618624}
|
|
{"current_steps": 4015, "total_steps": 15621, "loss": 0.5899, "lr": 1.8537067777850935e-06, "epoch": 0.25702579860444275, "percentage": 25.7, "elapsed_time": "0:16:58", "remaining_time": "0:49:05", "throughput": 12400.25, "total_tokens": 12635840}
|
|
{"current_steps": 4020, "total_steps": 15621, "loss": 0.359, "lr": 1.8531243709641704e-06, "epoch": 0.2573458805454196, "percentage": 25.73, "elapsed_time": "0:16:59", "remaining_time": "0:49:02", "throughput": 12407.65, "total_tokens": 12651904}
|
|
{"current_steps": 4025, "total_steps": 15621, "loss": 0.4977, "lr": 1.8525408990074533e-06, "epoch": 0.2576659624863965, "percentage": 25.77, "elapsed_time": "0:17:00", "remaining_time": "0:48:59", "throughput": 12414.49, "total_tokens": 12666944}
|
|
{"current_steps": 4030, "total_steps": 15621, "loss": 0.4184, "lr": 1.851956362643414e-06, "epoch": 0.2579860444273734, "percentage": 25.8, "elapsed_time": "0:17:01", "remaining_time": "0:48:56", "throughput": 12421.68, "total_tokens": 12682688}
|
|
{"current_steps": 4035, "total_steps": 15621, "loss": 0.5578, "lr": 1.851370762601853e-06, "epoch": 0.2583061263683503, "percentage": 25.83, "elapsed_time": "0:17:01", "remaining_time": "0:48:53", "throughput": 12428.86, "total_tokens": 12698304}
|
|
{"current_steps": 4040, "total_steps": 15621, "loss": 0.5083, "lr": 1.8507840996138983e-06, "epoch": 0.2586262083093272, "percentage": 25.86, "elapsed_time": "0:17:02", "remaining_time": "0:48:50", "throughput": 12435.32, "total_tokens": 12712896}
|
|
{"current_steps": 4045, "total_steps": 15621, "loss": 0.3908, "lr": 1.8501963744120062e-06, "epoch": 0.2589462902503041, "percentage": 25.89, "elapsed_time": "0:17:02", "remaining_time": "0:48:47", "throughput": 12441.76, "total_tokens": 12727488}
|
|
{"current_steps": 4050, "total_steps": 15621, "loss": 0.408, "lr": 1.849607587729958e-06, "epoch": 0.25926637219128096, "percentage": 25.93, "elapsed_time": "0:17:03", "remaining_time": "0:48:44", "throughput": 12448.63, "total_tokens": 12742720}
|
|
{"current_steps": 4055, "total_steps": 15621, "loss": 0.3966, "lr": 1.8490177403028615e-06, "epoch": 0.25958645413225784, "percentage": 25.96, "elapsed_time": "0:17:04", "remaining_time": "0:48:41", "throughput": 12455.5, "total_tokens": 12757760}
|
|
{"current_steps": 4060, "total_steps": 15621, "loss": 0.4966, "lr": 1.8484268328671475e-06, "epoch": 0.2599065360732348, "percentage": 25.99, "elapsed_time": "0:17:04", "remaining_time": "0:48:38", "throughput": 12462.59, "total_tokens": 12773312}
|
|
{"current_steps": 4065, "total_steps": 15621, "loss": 0.5448, "lr": 1.847834866160571e-06, "epoch": 0.26022661801421165, "percentage": 26.02, "elapsed_time": "0:17:05", "remaining_time": "0:48:35", "throughput": 12470.6, "total_tokens": 12790336}
|
|
{"current_steps": 4070, "total_steps": 15621, "loss": 0.4919, "lr": 1.847241840922209e-06, "epoch": 0.26054669995518853, "percentage": 26.05, "elapsed_time": "0:17:06", "remaining_time": "0:48:32", "throughput": 12477.36, "total_tokens": 12805632}
|
|
{"current_steps": 4075, "total_steps": 15621, "loss": 0.4875, "lr": 1.8466477578924616e-06, "epoch": 0.2608667818961654, "percentage": 26.09, "elapsed_time": "0:17:06", "remaining_time": "0:48:29", "throughput": 12484.4, "total_tokens": 12821184}
|
|
{"current_steps": 4080, "total_steps": 15621, "loss": 0.5004, "lr": 1.8460526178130472e-06, "epoch": 0.2611868638371423, "percentage": 26.12, "elapsed_time": "0:17:07", "remaining_time": "0:48:26", "throughput": 12491.24, "total_tokens": 12836544}
|
|
{"current_steps": 4085, "total_steps": 15621, "loss": 0.436, "lr": 1.8454564214270056e-06, "epoch": 0.26150694577811917, "percentage": 26.15, "elapsed_time": "0:17:08", "remaining_time": "0:48:23", "throughput": 12498.17, "total_tokens": 12852032}
|
|
{"current_steps": 4090, "total_steps": 15621, "loss": 0.4469, "lr": 1.8448591694786955e-06, "epoch": 0.2618270277190961, "percentage": 26.18, "elapsed_time": "0:17:08", "remaining_time": "0:48:21", "throughput": 12505.06, "total_tokens": 12867456}
|
|
{"current_steps": 4095, "total_steps": 15621, "loss": 0.341, "lr": 1.8442608627137925e-06, "epoch": 0.262147109660073, "percentage": 26.21, "elapsed_time": "0:17:09", "remaining_time": "0:48:18", "throughput": 12513.49, "total_tokens": 12885184}
|
|
{"current_steps": 4100, "total_steps": 15621, "loss": 0.3896, "lr": 1.8436615018792897e-06, "epoch": 0.26246719160104987, "percentage": 26.25, "elapsed_time": "0:17:10", "remaining_time": "0:48:15", "throughput": 12520.37, "total_tokens": 12900416}
|
|
{"current_steps": 4105, "total_steps": 15621, "loss": 0.5792, "lr": 1.8430610877234957e-06, "epoch": 0.26278727354202674, "percentage": 26.28, "elapsed_time": "0:17:11", "remaining_time": "0:48:12", "throughput": 12527.19, "total_tokens": 12915648}
|
|
{"current_steps": 4110, "total_steps": 15621, "loss": 0.4624, "lr": 1.8424596209960356e-06, "epoch": 0.2631073554830036, "percentage": 26.31, "elapsed_time": "0:17:11", "remaining_time": "0:48:09", "throughput": 12533.71, "total_tokens": 12930368}
|
|
{"current_steps": 4115, "total_steps": 15621, "loss": 0.5265, "lr": 1.8418571024478466e-06, "epoch": 0.26342743742398056, "percentage": 26.34, "elapsed_time": "0:17:12", "remaining_time": "0:48:06", "throughput": 12540.48, "total_tokens": 12945472}
|
|
{"current_steps": 4120, "total_steps": 15621, "loss": 0.491, "lr": 1.8412535328311812e-06, "epoch": 0.26374751936495744, "percentage": 26.37, "elapsed_time": "0:17:12", "remaining_time": "0:48:03", "throughput": 12547.59, "total_tokens": 12961472}
|
|
{"current_steps": 4125, "total_steps": 15621, "loss": 0.5816, "lr": 1.8406489128996023e-06, "epoch": 0.2640676013059343, "percentage": 26.41, "elapsed_time": "0:17:13", "remaining_time": "0:48:00", "throughput": 12553.86, "total_tokens": 12975872}
|
|
{"current_steps": 4130, "total_steps": 15621, "loss": 0.529, "lr": 1.8400432434079853e-06, "epoch": 0.2643876832469112, "percentage": 26.44, "elapsed_time": "0:17:14", "remaining_time": "0:47:57", "throughput": 12561.22, "total_tokens": 12992128}
|
|
{"current_steps": 4135, "total_steps": 15621, "loss": 0.4095, "lr": 1.8394365251125162e-06, "epoch": 0.2647077651878881, "percentage": 26.47, "elapsed_time": "0:17:15", "remaining_time": "0:47:56", "throughput": 12575.01, "total_tokens": 13021184}
|
|
{"current_steps": 4140, "total_steps": 15621, "loss": 0.4425, "lr": 1.8388287587706888e-06, "epoch": 0.265027847128865, "percentage": 26.5, "elapsed_time": "0:17:16", "remaining_time": "0:47:53", "throughput": 12582.41, "total_tokens": 13037568}
|
|
{"current_steps": 4145, "total_steps": 15621, "loss": 0.4682, "lr": 1.8382199451413074e-06, "epoch": 0.2653479290698419, "percentage": 26.53, "elapsed_time": "0:17:16", "remaining_time": "0:47:50", "throughput": 12589.44, "total_tokens": 13053440}
|
|
{"current_steps": 4150, "total_steps": 15621, "loss": 0.5178, "lr": 1.837610084984483e-06, "epoch": 0.26566801101081877, "percentage": 26.57, "elapsed_time": "0:17:17", "remaining_time": "0:47:47", "throughput": 12596.57, "total_tokens": 13069440}
|
|
{"current_steps": 4155, "total_steps": 15621, "loss": 0.5487, "lr": 1.8369991790616327e-06, "epoch": 0.26598809295179565, "percentage": 26.6, "elapsed_time": "0:17:18", "remaining_time": "0:47:44", "throughput": 12602.95, "total_tokens": 13084224}
|
|
{"current_steps": 4160, "total_steps": 15621, "loss": 0.6725, "lr": 1.8363872281354795e-06, "epoch": 0.26630817489277253, "percentage": 26.63, "elapsed_time": "0:17:18", "remaining_time": "0:47:42", "throughput": 12609.18, "total_tokens": 13098688}
|
|
{"current_steps": 4165, "total_steps": 15621, "loss": 0.4162, "lr": 1.835774232970052e-06, "epoch": 0.26662825683374947, "percentage": 26.66, "elapsed_time": "0:17:19", "remaining_time": "0:47:39", "throughput": 12615.95, "total_tokens": 13114112}
|
|
{"current_steps": 4170, "total_steps": 15621, "loss": 0.454, "lr": 1.8351601943306815e-06, "epoch": 0.26694833877472635, "percentage": 26.69, "elapsed_time": "0:17:20", "remaining_time": "0:47:36", "throughput": 12623.18, "total_tokens": 13130240}
|
|
{"current_steps": 4175, "total_steps": 15621, "loss": 0.3972, "lr": 1.8345451129840025e-06, "epoch": 0.2672684207157032, "percentage": 26.73, "elapsed_time": "0:17:20", "remaining_time": "0:47:33", "throughput": 12629.85, "total_tokens": 13145536}
|
|
{"current_steps": 4180, "total_steps": 15621, "loss": 0.5506, "lr": 1.8339289896979515e-06, "epoch": 0.2675885026566801, "percentage": 26.76, "elapsed_time": "0:17:21", "remaining_time": "0:47:30", "throughput": 12636.21, "total_tokens": 13160256}
|
|
{"current_steps": 4185, "total_steps": 15621, "loss": 0.5525, "lr": 1.8333118252417651e-06, "epoch": 0.267908584597657, "percentage": 26.79, "elapsed_time": "0:17:22", "remaining_time": "0:47:27", "throughput": 12643.77, "total_tokens": 13177088}
|
|
{"current_steps": 4190, "total_steps": 15621, "loss": 0.5154, "lr": 1.832693620385981e-06, "epoch": 0.26822866653863386, "percentage": 26.82, "elapsed_time": "0:17:22", "remaining_time": "0:47:25", "throughput": 12650.69, "total_tokens": 13192768}
|
|
{"current_steps": 4195, "total_steps": 15621, "loss": 0.5089, "lr": 1.8320743759024352e-06, "epoch": 0.2685487484796108, "percentage": 26.85, "elapsed_time": "0:17:23", "remaining_time": "0:47:22", "throughput": 12657.37, "total_tokens": 13208192}
|
|
{"current_steps": 4200, "total_steps": 15621, "loss": 0.5486, "lr": 1.831454092564261e-06, "epoch": 0.2688688304205877, "percentage": 26.89, "elapsed_time": "0:17:24", "remaining_time": "0:47:19", "throughput": 12664.24, "total_tokens": 13223872}
|
|
{"current_steps": 4205, "total_steps": 15621, "loss": 0.4597, "lr": 1.8308327711458899e-06, "epoch": 0.26918891236156456, "percentage": 26.92, "elapsed_time": "0:17:24", "remaining_time": "0:47:16", "throughput": 12670.76, "total_tokens": 13239104}
|
|
{"current_steps": 4210, "total_steps": 15621, "loss": 0.3925, "lr": 1.830210412423049e-06, "epoch": 0.26950899430254144, "percentage": 26.95, "elapsed_time": "0:17:25", "remaining_time": "0:47:13", "throughput": 12677.48, "total_tokens": 13254464}
|
|
{"current_steps": 4215, "total_steps": 15621, "loss": 0.3617, "lr": 1.8295870171727605e-06, "epoch": 0.2698290762435183, "percentage": 26.98, "elapsed_time": "0:17:26", "remaining_time": "0:47:10", "throughput": 12684.15, "total_tokens": 13269824}
|
|
{"current_steps": 4220, "total_steps": 15621, "loss": 0.4149, "lr": 1.8289625861733408e-06, "epoch": 0.27014915818449525, "percentage": 27.01, "elapsed_time": "0:17:26", "remaining_time": "0:47:08", "throughput": 12692.68, "total_tokens": 13288448}
|
|
{"current_steps": 4225, "total_steps": 15621, "loss": 0.5178, "lr": 1.8283371202043991e-06, "epoch": 0.27046924012547213, "percentage": 27.05, "elapsed_time": "0:17:27", "remaining_time": "0:47:05", "throughput": 12699.56, "total_tokens": 13304320}
|
|
{"current_steps": 4230, "total_steps": 15621, "loss": 0.5533, "lr": 1.827710620046837e-06, "epoch": 0.270789322066449, "percentage": 27.08, "elapsed_time": "0:17:28", "remaining_time": "0:47:03", "throughput": 12707.54, "total_tokens": 13321920}
|
|
{"current_steps": 4235, "total_steps": 15621, "loss": 0.4571, "lr": 1.8270830864828474e-06, "epoch": 0.2711094040074259, "percentage": 27.11, "elapsed_time": "0:17:29", "remaining_time": "0:47:00", "throughput": 12714.14, "total_tokens": 13337280}
|
|
{"current_steps": 4240, "total_steps": 15621, "loss": 0.434, "lr": 1.8264545202959133e-06, "epoch": 0.27142948594840277, "percentage": 27.14, "elapsed_time": "0:17:29", "remaining_time": "0:46:57", "throughput": 12721.6, "total_tokens": 13354112}
|
|
{"current_steps": 4245, "total_steps": 15621, "loss": 0.4362, "lr": 1.8258249222708067e-06, "epoch": 0.2717495678893797, "percentage": 27.17, "elapsed_time": "0:17:30", "remaining_time": "0:46:54", "throughput": 12728.23, "total_tokens": 13369600}
|
|
{"current_steps": 4250, "total_steps": 15621, "loss": 0.4558, "lr": 1.8251942931935886e-06, "epoch": 0.2720696498303566, "percentage": 27.21, "elapsed_time": "0:17:31", "remaining_time": "0:46:52", "throughput": 12735.04, "total_tokens": 13385536}
|
|
{"current_steps": 4255, "total_steps": 15621, "loss": 0.3748, "lr": 1.8245626338516069e-06, "epoch": 0.27238973177133347, "percentage": 27.24, "elapsed_time": "0:17:31", "remaining_time": "0:46:49", "throughput": 12741.59, "total_tokens": 13400832}
|
|
{"current_steps": 4260, "total_steps": 15621, "loss": 0.338, "lr": 1.823929945033495e-06, "epoch": 0.27270981371231034, "percentage": 27.27, "elapsed_time": "0:17:32", "remaining_time": "0:46:46", "throughput": 12748.07, "total_tokens": 13416000}
|
|
{"current_steps": 4265, "total_steps": 15621, "loss": 0.5038, "lr": 1.8232962275291728e-06, "epoch": 0.2730298956532872, "percentage": 27.3, "elapsed_time": "0:17:33", "remaining_time": "0:46:43", "throughput": 12754.71, "total_tokens": 13431360}
|
|
{"current_steps": 4270, "total_steps": 15621, "loss": 0.4415, "lr": 1.822661482129844e-06, "epoch": 0.2733499775942641, "percentage": 27.33, "elapsed_time": "0:17:33", "remaining_time": "0:46:41", "throughput": 12761.39, "total_tokens": 13446976}
|
|
{"current_steps": 4275, "total_steps": 15621, "loss": 0.3688, "lr": 1.8220257096279956e-06, "epoch": 0.27367005953524104, "percentage": 27.37, "elapsed_time": "0:17:34", "remaining_time": "0:46:38", "throughput": 12768.24, "total_tokens": 13463040}
|
|
{"current_steps": 4280, "total_steps": 15621, "loss": 0.6843, "lr": 1.8213889108173972e-06, "epoch": 0.2739901414762179, "percentage": 27.4, "elapsed_time": "0:17:35", "remaining_time": "0:46:35", "throughput": 12774.89, "total_tokens": 13478656}
|
|
{"current_steps": 4285, "total_steps": 15621, "loss": 0.4995, "lr": 1.8207510864930992e-06, "epoch": 0.2743102234171948, "percentage": 27.43, "elapsed_time": "0:17:35", "remaining_time": "0:46:33", "throughput": 12782.01, "total_tokens": 13495296}
|
|
{"current_steps": 4290, "total_steps": 15621, "loss": 0.5081, "lr": 1.8201122374514336e-06, "epoch": 0.2746303053581717, "percentage": 27.46, "elapsed_time": "0:17:36", "remaining_time": "0:46:30", "throughput": 12788.63, "total_tokens": 13510912}
|
|
{"current_steps": 4295, "total_steps": 15621, "loss": 0.4362, "lr": 1.8194723644900099e-06, "epoch": 0.27495038729914856, "percentage": 27.5, "elapsed_time": "0:17:37", "remaining_time": "0:46:27", "throughput": 12794.92, "total_tokens": 13525952}
|
|
{"current_steps": 4300, "total_steps": 15621, "loss": 0.5305, "lr": 1.8188314684077173e-06, "epoch": 0.2752704692401255, "percentage": 27.53, "elapsed_time": "0:17:37", "remaining_time": "0:46:25", "throughput": 12804.49, "total_tokens": 13546752}
|
|
{"current_steps": 4305, "total_steps": 15621, "loss": 0.5643, "lr": 1.8181895500047226e-06, "epoch": 0.2755905511811024, "percentage": 27.56, "elapsed_time": "0:17:38", "remaining_time": "0:46:22", "throughput": 12810.73, "total_tokens": 13561728}
|
|
{"current_steps": 4310, "total_steps": 15621, "loss": 0.4562, "lr": 1.817546610082468e-06, "epoch": 0.27591063312207925, "percentage": 27.59, "elapsed_time": "0:17:39", "remaining_time": "0:46:19", "throughput": 12817.28, "total_tokens": 13577344}
|
|
{"current_steps": 4315, "total_steps": 15621, "loss": 0.4862, "lr": 1.816902649443672e-06, "epoch": 0.27623071506305613, "percentage": 27.62, "elapsed_time": "0:17:39", "remaining_time": "0:46:17", "throughput": 12823.4, "total_tokens": 13592256}
|
|
{"current_steps": 4320, "total_steps": 15621, "loss": 0.5403, "lr": 1.8162576688923262e-06, "epoch": 0.276550797004033, "percentage": 27.66, "elapsed_time": "0:17:40", "remaining_time": "0:46:14", "throughput": 12830.61, "total_tokens": 13608832}
|
|
{"current_steps": 4325, "total_steps": 15621, "loss": 0.5508, "lr": 1.815611669233697e-06, "epoch": 0.27687087894500995, "percentage": 27.69, "elapsed_time": "0:17:41", "remaining_time": "0:46:11", "throughput": 12836.99, "total_tokens": 13624128}
|
|
{"current_steps": 4330, "total_steps": 15621, "loss": 0.5369, "lr": 1.8149646512743222e-06, "epoch": 0.2771909608859868, "percentage": 27.72, "elapsed_time": "0:17:42", "remaining_time": "0:46:09", "throughput": 12844.05, "total_tokens": 13640576}
|
|
{"current_steps": 4335, "total_steps": 15621, "loss": 0.4653, "lr": 1.8143166158220118e-06, "epoch": 0.2775110428269637, "percentage": 27.75, "elapsed_time": "0:17:42", "remaining_time": "0:46:06", "throughput": 12850.45, "total_tokens": 13655872}
|
|
{"current_steps": 4340, "total_steps": 15621, "loss": 0.6598, "lr": 1.8136675636858454e-06, "epoch": 0.2778311247679406, "percentage": 27.78, "elapsed_time": "0:17:43", "remaining_time": "0:46:04", "throughput": 12857.53, "total_tokens": 13672384}
|
|
{"current_steps": 4345, "total_steps": 15621, "loss": 0.408, "lr": 1.8130174956761723e-06, "epoch": 0.27815120670891746, "percentage": 27.82, "elapsed_time": "0:17:44", "remaining_time": "0:46:01", "throughput": 12863.74, "total_tokens": 13687296}
|
|
{"current_steps": 4350, "total_steps": 15621, "loss": 0.5366, "lr": 1.81236641260461e-06, "epoch": 0.2784712886498944, "percentage": 27.85, "elapsed_time": "0:17:44", "remaining_time": "0:45:58", "throughput": 12870.1, "total_tokens": 13702528}
|
|
{"current_steps": 4355, "total_steps": 15621, "loss": 0.498, "lr": 1.811714315284043e-06, "epoch": 0.2787913705908713, "percentage": 27.88, "elapsed_time": "0:17:45", "remaining_time": "0:45:55", "throughput": 12876.3, "total_tokens": 13717568}
|
|
{"current_steps": 4360, "total_steps": 15621, "loss": 0.4252, "lr": 1.8110612045286229e-06, "epoch": 0.27911145253184816, "percentage": 27.91, "elapsed_time": "0:17:46", "remaining_time": "0:45:53", "throughput": 12883.06, "total_tokens": 13733568}
|
|
{"current_steps": 4365, "total_steps": 15621, "loss": 0.3778, "lr": 1.8104070811537661e-06, "epoch": 0.27943153447282504, "percentage": 27.94, "elapsed_time": "0:17:46", "remaining_time": "0:45:50", "throughput": 12889.72, "total_tokens": 13749312}
|
|
{"current_steps": 4370, "total_steps": 15621, "loss": 0.4405, "lr": 1.8097519459761533e-06, "epoch": 0.2797516164138019, "percentage": 27.98, "elapsed_time": "0:17:47", "remaining_time": "0:45:48", "throughput": 12896.77, "total_tokens": 13765952}
|
|
{"current_steps": 4375, "total_steps": 15621, "loss": 0.5056, "lr": 1.8090957998137283e-06, "epoch": 0.2800716983547788, "percentage": 28.01, "elapsed_time": "0:17:48", "remaining_time": "0:45:45", "throughput": 12903.16, "total_tokens": 13781440}
|
|
{"current_steps": 4380, "total_steps": 15621, "loss": 0.4528, "lr": 1.8084386434856978e-06, "epoch": 0.28039178029575573, "percentage": 28.04, "elapsed_time": "0:17:48", "remaining_time": "0:45:42", "throughput": 12909.56, "total_tokens": 13796864}
|
|
{"current_steps": 4385, "total_steps": 15621, "loss": 0.4966, "lr": 1.8077804778125283e-06, "epoch": 0.2807118622367326, "percentage": 28.07, "elapsed_time": "0:17:49", "remaining_time": "0:45:40", "throughput": 12916.26, "total_tokens": 13812736}
|
|
{"current_steps": 4390, "total_steps": 15621, "loss": 0.489, "lr": 1.807121303615948e-06, "epoch": 0.2810319441777095, "percentage": 28.1, "elapsed_time": "0:17:50", "remaining_time": "0:45:37", "throughput": 12922.74, "total_tokens": 13828288}
|
|
{"current_steps": 4395, "total_steps": 15621, "loss": 0.4168, "lr": 1.8064611217189434e-06, "epoch": 0.28135202611868637, "percentage": 28.14, "elapsed_time": "0:17:50", "remaining_time": "0:45:35", "throughput": 12930.24, "total_tokens": 13845568}
|
|
{"current_steps": 4400, "total_steps": 15621, "loss": 0.3902, "lr": 1.8057999329457596e-06, "epoch": 0.28167210805966325, "percentage": 28.17, "elapsed_time": "0:17:51", "remaining_time": "0:45:32", "throughput": 12936.49, "total_tokens": 13860608}
|
|
{"current_steps": 4405, "total_steps": 15621, "loss": 0.5584, "lr": 1.8051377381218984e-06, "epoch": 0.2819921900006402, "percentage": 28.2, "elapsed_time": "0:17:52", "remaining_time": "0:45:29", "throughput": 12943.2, "total_tokens": 13876608}
|
|
{"current_steps": 4410, "total_steps": 15621, "loss": 0.5613, "lr": 1.8044745380741177e-06, "epoch": 0.28231227194161707, "percentage": 28.23, "elapsed_time": "0:17:52", "remaining_time": "0:45:27", "throughput": 12950.44, "total_tokens": 13893632}
|
|
{"current_steps": 4415, "total_steps": 15621, "loss": 0.3872, "lr": 1.8038103336304306e-06, "epoch": 0.28263235388259395, "percentage": 28.26, "elapsed_time": "0:17:53", "remaining_time": "0:45:24", "throughput": 12956.89, "total_tokens": 13909312}
|
|
{"current_steps": 4420, "total_steps": 15621, "loss": 0.5718, "lr": 1.8031451256201042e-06, "epoch": 0.2829524358235708, "percentage": 28.3, "elapsed_time": "0:17:54", "remaining_time": "0:45:22", "throughput": 12963.83, "total_tokens": 13925824}
|
|
{"current_steps": 4425, "total_steps": 15621, "loss": 0.5314, "lr": 1.8024789148736589e-06, "epoch": 0.2832725177645477, "percentage": 28.33, "elapsed_time": "0:17:54", "remaining_time": "0:45:19", "throughput": 12970.86, "total_tokens": 13942336}
|
|
{"current_steps": 4430, "total_steps": 15621, "loss": 0.4017, "lr": 1.8018117022228655e-06, "epoch": 0.28359259970552464, "percentage": 28.36, "elapsed_time": "0:17:55", "remaining_time": "0:45:17", "throughput": 12977.19, "total_tokens": 13957760}
|
|
{"current_steps": 4435, "total_steps": 15621, "loss": 0.5044, "lr": 1.8011434885007479e-06, "epoch": 0.2839126816465015, "percentage": 28.39, "elapsed_time": "0:17:56", "remaining_time": "0:45:14", "throughput": 12983.48, "total_tokens": 13972992}
|
|
{"current_steps": 4440, "total_steps": 15621, "loss": 0.4184, "lr": 1.8004742745415787e-06, "epoch": 0.2842327635874784, "percentage": 28.42, "elapsed_time": "0:17:56", "remaining_time": "0:45:11", "throughput": 12989.99, "total_tokens": 13988736}
|
|
{"current_steps": 4445, "total_steps": 15621, "loss": 0.5398, "lr": 1.799804061180879e-06, "epoch": 0.2845528455284553, "percentage": 28.46, "elapsed_time": "0:17:57", "remaining_time": "0:45:09", "throughput": 12995.86, "total_tokens": 14003520}
|
|
{"current_steps": 4450, "total_steps": 15621, "loss": 0.5016, "lr": 1.799132849255418e-06, "epoch": 0.28487292746943216, "percentage": 28.49, "elapsed_time": "0:17:58", "remaining_time": "0:45:06", "throughput": 13003.1, "total_tokens": 14020608}
|
|
{"current_steps": 4455, "total_steps": 15621, "loss": 0.4168, "lr": 1.798460639603212e-06, "epoch": 0.28519300941040904, "percentage": 28.52, "elapsed_time": "0:17:58", "remaining_time": "0:45:04", "throughput": 13008.96, "total_tokens": 14035328}
|
|
{"current_steps": 4460, "total_steps": 15621, "loss": 0.4799, "lr": 1.7977874330635224e-06, "epoch": 0.285513091351386, "percentage": 28.55, "elapsed_time": "0:17:59", "remaining_time": "0:45:01", "throughput": 13015.22, "total_tokens": 14050816}
|
|
{"current_steps": 4465, "total_steps": 15621, "loss": 0.3319, "lr": 1.7971132304768555e-06, "epoch": 0.28583317329236285, "percentage": 28.58, "elapsed_time": "0:18:00", "remaining_time": "0:44:59", "throughput": 13021.83, "total_tokens": 14066880}
|
|
{"current_steps": 4470, "total_steps": 15621, "loss": 0.5081, "lr": 1.7964380326849612e-06, "epoch": 0.28615325523333973, "percentage": 28.62, "elapsed_time": "0:18:00", "remaining_time": "0:44:56", "throughput": 13027.79, "total_tokens": 14081728}
|
|
{"current_steps": 4475, "total_steps": 15621, "loss": 0.4885, "lr": 1.795761840530832e-06, "epoch": 0.2864733371743166, "percentage": 28.65, "elapsed_time": "0:18:01", "remaining_time": "0:44:53", "throughput": 13034.55, "total_tokens": 14097984}
|
|
{"current_steps": 4480, "total_steps": 15621, "loss": 0.4186, "lr": 1.7950846548587015e-06, "epoch": 0.2867934191152935, "percentage": 28.68, "elapsed_time": "0:18:02", "remaining_time": "0:44:51", "throughput": 13041.92, "total_tokens": 14115264}
|
|
{"current_steps": 4485, "total_steps": 15621, "loss": 0.2815, "lr": 1.7944064765140445e-06, "epoch": 0.2871135010562704, "percentage": 28.71, "elapsed_time": "0:18:02", "remaining_time": "0:44:48", "throughput": 13047.47, "total_tokens": 14129472}
|
|
{"current_steps": 4490, "total_steps": 15621, "loss": 0.5567, "lr": 1.7937273063435735e-06, "epoch": 0.2874335829972473, "percentage": 28.74, "elapsed_time": "0:18:03", "remaining_time": "0:44:46", "throughput": 13053.7, "total_tokens": 14144896}
|
|
{"current_steps": 4495, "total_steps": 15621, "loss": 0.3559, "lr": 1.7930471451952416e-06, "epoch": 0.2877536649382242, "percentage": 28.78, "elapsed_time": "0:18:04", "remaining_time": "0:44:43", "throughput": 13059.6, "total_tokens": 14159744}
|
|
{"current_steps": 4500, "total_steps": 15621, "loss": 0.4935, "lr": 1.7923659939182377e-06, "epoch": 0.28807374687920106, "percentage": 28.81, "elapsed_time": "0:18:04", "remaining_time": "0:44:41", "throughput": 13066.55, "total_tokens": 14176384}
|
|
{"current_steps": 4505, "total_steps": 15621, "loss": 0.5441, "lr": 1.7916838533629866e-06, "epoch": 0.28839382882017794, "percentage": 28.84, "elapsed_time": "0:18:05", "remaining_time": "0:44:38", "throughput": 13073.08, "total_tokens": 14192320}
|
|
{"current_steps": 4510, "total_steps": 15621, "loss": 0.3929, "lr": 1.7910007243811493e-06, "epoch": 0.2887139107611549, "percentage": 28.87, "elapsed_time": "0:18:06", "remaining_time": "0:44:36", "throughput": 13079.5, "total_tokens": 14208192}
|
|
{"current_steps": 4515, "total_steps": 15621, "loss": 0.5737, "lr": 1.7903166078256202e-06, "epoch": 0.28903399270213176, "percentage": 28.9, "elapsed_time": "0:18:06", "remaining_time": "0:44:33", "throughput": 13085.41, "total_tokens": 14223104}
|
|
{"current_steps": 4520, "total_steps": 15621, "loss": 0.4223, "lr": 1.789631504550527e-06, "epoch": 0.28935407464310864, "percentage": 28.94, "elapsed_time": "0:18:07", "remaining_time": "0:44:31", "throughput": 13091.6, "total_tokens": 14238464}
|
|
{"current_steps": 4525, "total_steps": 15621, "loss": 0.3912, "lr": 1.7889454154112288e-06, "epoch": 0.2896741565840855, "percentage": 28.97, "elapsed_time": "0:18:08", "remaining_time": "0:44:28", "throughput": 13098.15, "total_tokens": 14254656}
|
|
{"current_steps": 4530, "total_steps": 15621, "loss": 0.3903, "lr": 1.7882583412643167e-06, "epoch": 0.2899942385250624, "percentage": 29.0, "elapsed_time": "0:18:08", "remaining_time": "0:44:26", "throughput": 13103.7, "total_tokens": 14268928}
|
|
{"current_steps": 4535, "total_steps": 15621, "loss": 0.4489, "lr": 1.78757028296761e-06, "epoch": 0.29031432046603933, "percentage": 29.03, "elapsed_time": "0:18:09", "remaining_time": "0:44:23", "throughput": 13110.73, "total_tokens": 14285952}
|
|
{"current_steps": 4540, "total_steps": 15621, "loss": 0.3513, "lr": 1.7868812413801582e-06, "epoch": 0.2906344024070162, "percentage": 29.06, "elapsed_time": "0:18:10", "remaining_time": "0:44:21", "throughput": 13117.19, "total_tokens": 14301760}
|
|
{"current_steps": 4545, "total_steps": 15621, "loss": 0.4985, "lr": 1.7861912173622372e-06, "epoch": 0.2909544843479931, "percentage": 29.1, "elapsed_time": "0:18:11", "remaining_time": "0:44:18", "throughput": 13123.89, "total_tokens": 14318208}
|
|
{"current_steps": 4550, "total_steps": 15621, "loss": 0.4537, "lr": 1.7855002117753504e-06, "epoch": 0.29127456628896997, "percentage": 29.13, "elapsed_time": "0:18:11", "remaining_time": "0:44:16", "throughput": 13130.35, "total_tokens": 14334144}
|
|
{"current_steps": 4555, "total_steps": 15621, "loss": 0.5489, "lr": 1.7848082254822266e-06, "epoch": 0.29159464822994685, "percentage": 29.16, "elapsed_time": "0:18:12", "remaining_time": "0:44:13", "throughput": 13136.2, "total_tokens": 14349120}
|
|
{"current_steps": 4560, "total_steps": 15621, "loss": 0.4957, "lr": 1.7841152593468185e-06, "epoch": 0.29191473017092373, "percentage": 29.19, "elapsed_time": "0:18:13", "remaining_time": "0:44:11", "throughput": 13142.84, "total_tokens": 14365376}
|
|
{"current_steps": 4565, "total_steps": 15621, "loss": 0.4636, "lr": 1.7834213142343026e-06, "epoch": 0.29223481211190067, "percentage": 29.22, "elapsed_time": "0:18:13", "remaining_time": "0:44:08", "throughput": 13149.39, "total_tokens": 14381568}
|
|
{"current_steps": 4570, "total_steps": 15621, "loss": 0.4752, "lr": 1.7827263910110777e-06, "epoch": 0.29255489405287755, "percentage": 29.26, "elapsed_time": "0:18:14", "remaining_time": "0:44:06", "throughput": 13155.66, "total_tokens": 14397312}
|
|
{"current_steps": 4575, "total_steps": 15621, "loss": 0.4631, "lr": 1.7820304905447632e-06, "epoch": 0.2928749759938544, "percentage": 29.29, "elapsed_time": "0:18:15", "remaining_time": "0:44:03", "throughput": 13161.89, "total_tokens": 14412928}
|
|
{"current_steps": 4580, "total_steps": 15621, "loss": 0.4515, "lr": 1.7813336137041991e-06, "epoch": 0.2931950579348313, "percentage": 29.32, "elapsed_time": "0:18:15", "remaining_time": "0:44:01", "throughput": 13167.73, "total_tokens": 14427968}
|
|
{"current_steps": 4585, "total_steps": 15621, "loss": 0.3591, "lr": 1.7806357613594447e-06, "epoch": 0.2935151398758082, "percentage": 29.35, "elapsed_time": "0:18:16", "remaining_time": "0:43:58", "throughput": 13173.59, "total_tokens": 14442944}
|
|
{"current_steps": 4590, "total_steps": 15621, "loss": 0.452, "lr": 1.7799369343817764e-06, "epoch": 0.2938352218167851, "percentage": 29.38, "elapsed_time": "0:18:17", "remaining_time": "0:43:56", "throughput": 13179.68, "total_tokens": 14458176}
|
|
{"current_steps": 4595, "total_steps": 15621, "loss": 0.3618, "lr": 1.7792371336436883e-06, "epoch": 0.294155303757762, "percentage": 29.42, "elapsed_time": "0:18:17", "remaining_time": "0:43:53", "throughput": 13185.74, "total_tokens": 14473600}
|
|
{"current_steps": 4600, "total_steps": 15621, "loss": 0.6561, "lr": 1.7785363600188892e-06, "epoch": 0.2944753856987389, "percentage": 29.45, "elapsed_time": "0:18:18", "remaining_time": "0:43:51", "throughput": 13191.72, "total_tokens": 14488896}
|
|
{"current_steps": 4605, "total_steps": 15621, "loss": 0.5982, "lr": 1.7778346143823038e-06, "epoch": 0.29479546763971576, "percentage": 29.48, "elapsed_time": "0:18:18", "remaining_time": "0:43:48", "throughput": 13196.9, "total_tokens": 14502784}
|
|
{"current_steps": 4610, "total_steps": 15621, "loss": 0.4353, "lr": 1.7771318976100696e-06, "epoch": 0.29511554958069264, "percentage": 29.51, "elapsed_time": "0:18:19", "remaining_time": "0:43:46", "throughput": 13204.0, "total_tokens": 14520000}
|
|
{"current_steps": 4615, "total_steps": 15621, "loss": 0.3531, "lr": 1.7764282105795364e-06, "epoch": 0.2954356315216696, "percentage": 29.54, "elapsed_time": "0:18:20", "remaining_time": "0:43:44", "throughput": 13210.51, "total_tokens": 14536320}
|
|
{"current_steps": 4620, "total_steps": 15621, "loss": 0.4688, "lr": 1.7757235541692663e-06, "epoch": 0.29575571346264645, "percentage": 29.58, "elapsed_time": "0:18:21", "remaining_time": "0:43:41", "throughput": 13216.62, "total_tokens": 14551808}
|
|
{"current_steps": 4625, "total_steps": 15621, "loss": 0.3106, "lr": 1.7750179292590306e-06, "epoch": 0.29607579540362333, "percentage": 29.61, "elapsed_time": "0:18:21", "remaining_time": "0:43:39", "throughput": 13222.51, "total_tokens": 14566976}
|
|
{"current_steps": 4630, "total_steps": 15621, "loss": 0.3511, "lr": 1.7743113367298107e-06, "epoch": 0.2963958773446002, "percentage": 29.64, "elapsed_time": "0:18:22", "remaining_time": "0:43:36", "throughput": 13228.89, "total_tokens": 14583104}
|
|
{"current_steps": 4635, "total_steps": 15621, "loss": 0.4515, "lr": 1.7736037774637955e-06, "epoch": 0.2967159592855771, "percentage": 29.67, "elapsed_time": "0:18:23", "remaining_time": "0:43:34", "throughput": 13234.85, "total_tokens": 14598336}
|
|
{"current_steps": 4640, "total_steps": 15621, "loss": 0.5141, "lr": 1.772895252344381e-06, "epoch": 0.29703604122655397, "percentage": 29.7, "elapsed_time": "0:18:23", "remaining_time": "0:43:32", "throughput": 13241.72, "total_tokens": 14615232}
|
|
{"current_steps": 4645, "total_steps": 15621, "loss": 0.388, "lr": 1.7721857622561692e-06, "epoch": 0.2973561231675309, "percentage": 29.74, "elapsed_time": "0:18:24", "remaining_time": "0:43:29", "throughput": 13247.85, "total_tokens": 14630848}
|
|
{"current_steps": 4650, "total_steps": 15621, "loss": 0.4668, "lr": 1.7714753080849664e-06, "epoch": 0.2976762051085078, "percentage": 29.77, "elapsed_time": "0:18:25", "remaining_time": "0:43:27", "throughput": 13254.3, "total_tokens": 14647040}
|
|
{"current_steps": 4655, "total_steps": 15621, "loss": 0.4196, "lr": 1.7707638907177837e-06, "epoch": 0.29799628704948466, "percentage": 29.8, "elapsed_time": "0:18:25", "remaining_time": "0:43:24", "throughput": 13259.98, "total_tokens": 14661888}
|
|
{"current_steps": 4660, "total_steps": 15621, "loss": 0.7015, "lr": 1.7700515110428336e-06, "epoch": 0.29831636899046154, "percentage": 29.83, "elapsed_time": "0:18:26", "remaining_time": "0:43:22", "throughput": 13266.19, "total_tokens": 14677696}
|
|
{"current_steps": 4665, "total_steps": 15621, "loss": 0.4795, "lr": 1.7693381699495307e-06, "epoch": 0.2986364509314384, "percentage": 29.86, "elapsed_time": "0:18:27", "remaining_time": "0:43:19", "throughput": 13272.28, "total_tokens": 14693184}
|
|
{"current_steps": 4670, "total_steps": 15621, "loss": 0.3712, "lr": 1.7686238683284894e-06, "epoch": 0.29895653287241536, "percentage": 29.9, "elapsed_time": "0:18:27", "remaining_time": "0:43:17", "throughput": 13277.93, "total_tokens": 14707904}
|
|
{"current_steps": 4675, "total_steps": 15621, "loss": 0.3553, "lr": 1.7679086070715237e-06, "epoch": 0.29927661481339224, "percentage": 29.93, "elapsed_time": "0:18:28", "remaining_time": "0:43:15", "throughput": 13284.33, "total_tokens": 14724096}
|
|
{"current_steps": 4680, "total_steps": 15621, "loss": 0.4575, "lr": 1.7671923870716459e-06, "epoch": 0.2995966967543691, "percentage": 29.96, "elapsed_time": "0:18:29", "remaining_time": "0:43:12", "throughput": 13289.83, "total_tokens": 14738752}
|
|
{"current_steps": 4685, "total_steps": 15621, "loss": 0.355, "lr": 1.7664752092230652e-06, "epoch": 0.299916778695346, "percentage": 29.99, "elapsed_time": "0:18:29", "remaining_time": "0:43:10", "throughput": 13295.51, "total_tokens": 14753664}
|
|
{"current_steps": 4690, "total_steps": 15621, "loss": 0.3708, "lr": 1.7657570744211863e-06, "epoch": 0.3002368606363229, "percentage": 30.02, "elapsed_time": "0:18:30", "remaining_time": "0:43:07", "throughput": 13301.57, "total_tokens": 14769152}
|
|
{"current_steps": 4692, "total_steps": 15621, "eval_loss": 0.46517089009284973, "epoch": 0.30036489341271366, "percentage": 30.04, "elapsed_time": "0:19:21", "remaining_time": "0:45:04", "throughput": 12724.3, "total_tokens": 14775488}
|
|
{"current_steps": 4695, "total_steps": 15621, "loss": 0.5088, "lr": 1.765037983562609e-06, "epoch": 0.3005569425772998, "percentage": 30.06, "elapsed_time": "0:19:57", "remaining_time": "0:46:27", "throughput": 12344.09, "total_tokens": 14784128}
|
|
{"current_steps": 4700, "total_steps": 15621, "loss": 0.4325, "lr": 1.7643179375451264e-06, "epoch": 0.3008770245182767, "percentage": 30.09, "elapsed_time": "0:19:58", "remaining_time": "0:46:24", "throughput": 12350.38, "total_tokens": 14799936}
|
|
{"current_steps": 4705, "total_steps": 15621, "loss": 0.6141, "lr": 1.7635969372677252e-06, "epoch": 0.30119710645925357, "percentage": 30.12, "elapsed_time": "0:19:58", "remaining_time": "0:46:21", "throughput": 12355.76, "total_tokens": 14814208}
|
|
{"current_steps": 4710, "total_steps": 15621, "loss": 0.4862, "lr": 1.7628749836305818e-06, "epoch": 0.30151718840023045, "percentage": 30.15, "elapsed_time": "0:19:59", "remaining_time": "0:46:19", "throughput": 12361.68, "total_tokens": 14829504}
|
|
{"current_steps": 4715, "total_steps": 15621, "loss": 0.4053, "lr": 1.7621520775350645e-06, "epoch": 0.30183727034120733, "percentage": 30.18, "elapsed_time": "0:20:00", "remaining_time": "0:46:16", "throughput": 12367.22, "total_tokens": 14843968}
|
|
{"current_steps": 4720, "total_steps": 15621, "loss": 0.4685, "lr": 1.7614282198837293e-06, "epoch": 0.30215735228218427, "percentage": 30.22, "elapsed_time": "0:20:00", "remaining_time": "0:46:13", "throughput": 12373.55, "total_tokens": 14859840}
|
|
{"current_steps": 4725, "total_steps": 15621, "loss": 0.4873, "lr": 1.7607034115803219e-06, "epoch": 0.30247743422316115, "percentage": 30.25, "elapsed_time": "0:20:01", "remaining_time": "0:46:10", "throughput": 12379.68, "total_tokens": 14875648}
|
|
{"current_steps": 4730, "total_steps": 15621, "loss": 0.4244, "lr": 1.7599776535297734e-06, "epoch": 0.302797516164138, "percentage": 30.28, "elapsed_time": "0:20:02", "remaining_time": "0:46:08", "throughput": 12385.35, "total_tokens": 14890560}
|
|
{"current_steps": 4735, "total_steps": 15621, "loss": 0.478, "lr": 1.7592509466382012e-06, "epoch": 0.3031175981051149, "percentage": 30.31, "elapsed_time": "0:20:02", "remaining_time": "0:46:05", "throughput": 12391.72, "total_tokens": 14906688}
|
|
{"current_steps": 4740, "total_steps": 15621, "loss": 0.5622, "lr": 1.7585232918129076e-06, "epoch": 0.3034376800460918, "percentage": 30.34, "elapsed_time": "0:20:03", "remaining_time": "0:46:03", "throughput": 12397.89, "total_tokens": 14922496}
|
|
{"current_steps": 4745, "total_steps": 15621, "loss": 0.4656, "lr": 1.757794689962378e-06, "epoch": 0.30375776198706866, "percentage": 30.38, "elapsed_time": "0:20:04", "remaining_time": "0:46:00", "throughput": 12404.31, "total_tokens": 14938880}
|
|
{"current_steps": 4750, "total_steps": 15621, "loss": 0.5035, "lr": 1.7570651419962807e-06, "epoch": 0.3040778439280456, "percentage": 30.41, "elapsed_time": "0:20:04", "remaining_time": "0:45:57", "throughput": 12410.14, "total_tokens": 14954112}
|
|
{"current_steps": 4755, "total_steps": 15621, "loss": 0.4471, "lr": 1.7563346488254647e-06, "epoch": 0.3043979258690225, "percentage": 30.44, "elapsed_time": "0:20:05", "remaining_time": "0:45:55", "throughput": 12416.12, "total_tokens": 14969536}
|
|
{"current_steps": 4760, "total_steps": 15621, "loss": 0.351, "lr": 1.755603211361959e-06, "epoch": 0.30471800780999936, "percentage": 30.47, "elapsed_time": "0:20:06", "remaining_time": "0:45:52", "throughput": 12422.47, "total_tokens": 14985728}
|
|
{"current_steps": 4765, "total_steps": 15621, "loss": 0.4522, "lr": 1.7548708305189722e-06, "epoch": 0.30503808975097624, "percentage": 30.5, "elapsed_time": "0:20:07", "remaining_time": "0:45:50", "throughput": 12429.89, "total_tokens": 15003904}
|
|
{"current_steps": 4770, "total_steps": 15621, "loss": 0.5752, "lr": 1.7541375072108905e-06, "epoch": 0.3053581716919531, "percentage": 30.54, "elapsed_time": "0:20:07", "remaining_time": "0:45:47", "throughput": 12435.83, "total_tokens": 15019328}
|
|
{"current_steps": 4775, "total_steps": 15621, "loss": 0.4732, "lr": 1.7534032423532766e-06, "epoch": 0.30567825363293005, "percentage": 30.57, "elapsed_time": "0:20:08", "remaining_time": "0:45:44", "throughput": 12441.26, "total_tokens": 15033856}
|
|
{"current_steps": 4780, "total_steps": 15621, "loss": 0.361, "lr": 1.7526680368628685e-06, "epoch": 0.30599833557390693, "percentage": 30.6, "elapsed_time": "0:20:09", "remaining_time": "0:45:42", "throughput": 12448.19, "total_tokens": 15051200}
|
|
{"current_steps": 4785, "total_steps": 15621, "loss": 0.4427, "lr": 1.751931891657579e-06, "epoch": 0.3063184175148838, "percentage": 30.63, "elapsed_time": "0:20:09", "remaining_time": "0:45:39", "throughput": 12453.98, "total_tokens": 15066368}
|
|
{"current_steps": 4790, "total_steps": 15621, "loss": 0.3568, "lr": 1.7511948076564943e-06, "epoch": 0.3066384994558607, "percentage": 30.66, "elapsed_time": "0:20:10", "remaining_time": "0:45:36", "throughput": 12459.76, "total_tokens": 15081600}
|
|
{"current_steps": 4795, "total_steps": 15621, "loss": 0.5404, "lr": 1.7504567857798722e-06, "epoch": 0.30695858139683757, "percentage": 30.7, "elapsed_time": "0:20:11", "remaining_time": "0:45:34", "throughput": 12465.92, "total_tokens": 15097536}
|
|
{"current_steps": 4800, "total_steps": 15621, "loss": 0.4943, "lr": 1.7497178269491417e-06, "epoch": 0.3072786633378145, "percentage": 30.73, "elapsed_time": "0:20:11", "remaining_time": "0:45:31", "throughput": 12472.22, "total_tokens": 15113728}
|
|
{"current_steps": 4805, "total_steps": 15621, "loss": 0.5532, "lr": 1.7489779320869014e-06, "epoch": 0.3075987452787914, "percentage": 30.76, "elapsed_time": "0:20:12", "remaining_time": "0:45:29", "throughput": 12478.59, "total_tokens": 15130048}
|
|
{"current_steps": 4810, "total_steps": 15621, "loss": 0.3715, "lr": 1.7482371021169193e-06, "epoch": 0.30791882721976827, "percentage": 30.79, "elapsed_time": "0:20:13", "remaining_time": "0:45:26", "throughput": 12484.55, "total_tokens": 15145600}
|
|
{"current_steps": 4815, "total_steps": 15621, "loss": 0.4077, "lr": 1.7474953379641297e-06, "epoch": 0.30823890916074514, "percentage": 30.82, "elapsed_time": "0:20:13", "remaining_time": "0:45:24", "throughput": 12491.12, "total_tokens": 15162368}
|
|
{"current_steps": 4820, "total_steps": 15621, "loss": 0.438, "lr": 1.746752640554634e-06, "epoch": 0.308558991101722, "percentage": 30.86, "elapsed_time": "0:20:14", "remaining_time": "0:45:21", "throughput": 12497.31, "total_tokens": 15178368}
|
|
{"current_steps": 4825, "total_steps": 15621, "loss": 0.5348, "lr": 1.7460090108156988e-06, "epoch": 0.3088790730426989, "percentage": 30.89, "elapsed_time": "0:20:15", "remaining_time": "0:45:18", "throughput": 12503.03, "total_tokens": 15193408}
|
|
{"current_steps": 4830, "total_steps": 15621, "loss": 0.3155, "lr": 1.7452644496757548e-06, "epoch": 0.30919915498367584, "percentage": 30.92, "elapsed_time": "0:20:15", "remaining_time": "0:45:16", "throughput": 12508.79, "total_tokens": 15208640}
|
|
{"current_steps": 4835, "total_steps": 15621, "loss": 0.4557, "lr": 1.7445189580643946e-06, "epoch": 0.3095192369246527, "percentage": 30.95, "elapsed_time": "0:20:16", "remaining_time": "0:45:13", "throughput": 12514.79, "total_tokens": 15224192}
|
|
{"current_steps": 4840, "total_steps": 15621, "loss": 0.5187, "lr": 1.7437725369123737e-06, "epoch": 0.3098393188656296, "percentage": 30.98, "elapsed_time": "0:20:17", "remaining_time": "0:45:11", "throughput": 12520.62, "total_tokens": 15239616}
|
|
{"current_steps": 4845, "total_steps": 15621, "loss": 0.4925, "lr": 1.7430251871516077e-06, "epoch": 0.3101594008066065, "percentage": 31.02, "elapsed_time": "0:20:17", "remaining_time": "0:45:08", "throughput": 12526.79, "total_tokens": 15255680}
|
|
{"current_steps": 4850, "total_steps": 15621, "loss": 0.5256, "lr": 1.7422769097151715e-06, "epoch": 0.31047948274758336, "percentage": 31.05, "elapsed_time": "0:20:18", "remaining_time": "0:45:06", "throughput": 12532.7, "total_tokens": 15271232}
|
|
{"current_steps": 4855, "total_steps": 15621, "loss": 0.5038, "lr": 1.7415277055372982e-06, "epoch": 0.3107995646885603, "percentage": 31.08, "elapsed_time": "0:20:19", "remaining_time": "0:45:03", "throughput": 12538.68, "total_tokens": 15287040}
|
|
{"current_steps": 4860, "total_steps": 15621, "loss": 0.5181, "lr": 1.7407775755533778e-06, "epoch": 0.31111964662953717, "percentage": 31.11, "elapsed_time": "0:20:19", "remaining_time": "0:45:01", "throughput": 12545.45, "total_tokens": 15304256}
|
|
{"current_steps": 4865, "total_steps": 15621, "loss": 0.364, "lr": 1.7400265206999568e-06, "epoch": 0.31143972857051405, "percentage": 31.14, "elapsed_time": "0:20:20", "remaining_time": "0:44:58", "throughput": 12552.51, "total_tokens": 15322112}
|
|
{"current_steps": 4870, "total_steps": 15621, "loss": 0.5297, "lr": 1.7392745419147362e-06, "epoch": 0.31175981051149093, "percentage": 31.18, "elapsed_time": "0:20:21", "remaining_time": "0:44:56", "throughput": 12558.18, "total_tokens": 15337216}
|
|
{"current_steps": 4875, "total_steps": 15621, "loss": 0.4478, "lr": 1.7385216401365693e-06, "epoch": 0.3120798924524678, "percentage": 31.21, "elapsed_time": "0:20:22", "remaining_time": "0:44:53", "throughput": 12564.67, "total_tokens": 15354048}
|
|
{"current_steps": 4880, "total_steps": 15621, "loss": 0.4964, "lr": 1.7377678163054638e-06, "epoch": 0.31239997439344475, "percentage": 31.24, "elapsed_time": "0:20:22", "remaining_time": "0:44:51", "throughput": 12570.34, "total_tokens": 15369344}
|
|
{"current_steps": 4885, "total_steps": 15621, "loss": 0.4864, "lr": 1.7370130713625775e-06, "epoch": 0.3127200563344216, "percentage": 31.27, "elapsed_time": "0:20:23", "remaining_time": "0:44:48", "throughput": 12576.74, "total_tokens": 15385920}
|
|
{"current_steps": 4890, "total_steps": 15621, "loss": 0.3948, "lr": 1.736257406250218e-06, "epoch": 0.3130401382753985, "percentage": 31.3, "elapsed_time": "0:20:24", "remaining_time": "0:44:46", "throughput": 12582.6, "total_tokens": 15401536}
|
|
{"current_steps": 4895, "total_steps": 15621, "loss": 0.4629, "lr": 1.735500821911842e-06, "epoch": 0.3133602202163754, "percentage": 31.34, "elapsed_time": "0:20:24", "remaining_time": "0:44:43", "throughput": 12588.46, "total_tokens": 15417152}
|
|
{"current_steps": 4900, "total_steps": 15621, "loss": 0.4961, "lr": 1.7347433192920544e-06, "epoch": 0.31368030215735226, "percentage": 31.37, "elapsed_time": "0:20:25", "remaining_time": "0:44:41", "throughput": 12593.87, "total_tokens": 15431872}
|
|
{"current_steps": 4905, "total_steps": 15621, "loss": 0.4021, "lr": 1.7339848993366056e-06, "epoch": 0.3140003840983292, "percentage": 31.4, "elapsed_time": "0:20:26", "remaining_time": "0:44:38", "throughput": 12599.82, "total_tokens": 15447552}
|
|
{"current_steps": 4910, "total_steps": 15621, "loss": 0.4667, "lr": 1.7332255629923922e-06, "epoch": 0.3143204660393061, "percentage": 31.43, "elapsed_time": "0:20:26", "remaining_time": "0:44:36", "throughput": 12606.27, "total_tokens": 15464384}
|
|
{"current_steps": 4915, "total_steps": 15621, "loss": 0.5038, "lr": 1.732465311207454e-06, "epoch": 0.31464054798028296, "percentage": 31.46, "elapsed_time": "0:20:27", "remaining_time": "0:44:33", "throughput": 12611.94, "total_tokens": 15479808}
|
|
{"current_steps": 4920, "total_steps": 15621, "loss": 0.5018, "lr": 1.731704144930975e-06, "epoch": 0.31496062992125984, "percentage": 31.5, "elapsed_time": "0:20:28", "remaining_time": "0:44:31", "throughput": 12618.28, "total_tokens": 15496512}
|
|
{"current_steps": 4925, "total_steps": 15621, "loss": 0.4137, "lr": 1.7309420651132797e-06, "epoch": 0.3152807118622367, "percentage": 31.53, "elapsed_time": "0:20:28", "remaining_time": "0:44:28", "throughput": 12624.51, "total_tokens": 15512896}
|
|
{"current_steps": 4930, "total_steps": 15621, "loss": 0.3295, "lr": 1.7301790727058343e-06, "epoch": 0.3156007938032136, "percentage": 31.56, "elapsed_time": "0:20:29", "remaining_time": "0:44:26", "throughput": 12630.12, "total_tokens": 15528064}
|
|
{"current_steps": 4935, "total_steps": 15621, "loss": 0.3593, "lr": 1.7294151686612431e-06, "epoch": 0.31592087574419053, "percentage": 31.59, "elapsed_time": "0:20:30", "remaining_time": "0:44:23", "throughput": 12635.81, "total_tokens": 15543424}
|
|
{"current_steps": 4940, "total_steps": 15621, "loss": 0.5778, "lr": 1.7286503539332495e-06, "epoch": 0.3162409576851674, "percentage": 31.62, "elapsed_time": "0:20:30", "remaining_time": "0:44:21", "throughput": 12642.13, "total_tokens": 15560192}
|
|
{"current_steps": 4945, "total_steps": 15621, "loss": 0.3873, "lr": 1.7278846294767337e-06, "epoch": 0.3165610396261443, "percentage": 31.66, "elapsed_time": "0:20:31", "remaining_time": "0:44:18", "throughput": 12648.02, "total_tokens": 15576128}
|
|
{"current_steps": 4950, "total_steps": 15621, "loss": 0.6923, "lr": 1.7271179962477118e-06, "epoch": 0.31688112156712117, "percentage": 31.69, "elapsed_time": "0:20:32", "remaining_time": "0:44:16", "throughput": 12654.2, "total_tokens": 15592576}
|
|
{"current_steps": 4955, "total_steps": 15621, "loss": 0.4372, "lr": 1.7263504552033341e-06, "epoch": 0.31720120350809805, "percentage": 31.72, "elapsed_time": "0:20:32", "remaining_time": "0:44:13", "throughput": 12659.68, "total_tokens": 15607744}
|
|
{"current_steps": 4960, "total_steps": 15621, "loss": 0.481, "lr": 1.725582007301885e-06, "epoch": 0.317521285449075, "percentage": 31.75, "elapsed_time": "0:20:33", "remaining_time": "0:44:11", "throughput": 12665.45, "total_tokens": 15623360}
|
|
{"current_steps": 4965, "total_steps": 15621, "loss": 0.4251, "lr": 1.7248126535027806e-06, "epoch": 0.31784136739005187, "percentage": 31.78, "elapsed_time": "0:20:34", "remaining_time": "0:44:08", "throughput": 12671.12, "total_tokens": 15638656}
|
|
{"current_steps": 4970, "total_steps": 15621, "loss": 0.4569, "lr": 1.7240423947665678e-06, "epoch": 0.31816144933102875, "percentage": 31.82, "elapsed_time": "0:20:34", "remaining_time": "0:44:06", "throughput": 12676.92, "total_tokens": 15654400}
|
|
{"current_steps": 4975, "total_steps": 15621, "loss": 0.3867, "lr": 1.723271232054924e-06, "epoch": 0.3184815312720056, "percentage": 31.85, "elapsed_time": "0:20:35", "remaining_time": "0:44:03", "throughput": 12682.64, "total_tokens": 15670016}
|
|
{"current_steps": 4980, "total_steps": 15621, "loss": 0.5265, "lr": 1.722499166330655e-06, "epoch": 0.3188016132129825, "percentage": 31.88, "elapsed_time": "0:20:36", "remaining_time": "0:44:01", "throughput": 12688.71, "total_tokens": 15686208}
|
|
{"current_steps": 4985, "total_steps": 15621, "loss": 0.443, "lr": 1.7217261985576936e-06, "epoch": 0.31912169515395944, "percentage": 31.91, "elapsed_time": "0:20:36", "remaining_time": "0:43:59", "throughput": 12694.79, "total_tokens": 15702592}
|
|
{"current_steps": 4990, "total_steps": 15621, "loss": 0.5114, "lr": 1.7209523297010992e-06, "epoch": 0.3194417770949363, "percentage": 31.94, "elapsed_time": "0:20:37", "remaining_time": "0:43:56", "throughput": 12700.26, "total_tokens": 15717696}
|
|
{"current_steps": 4995, "total_steps": 15621, "loss": 0.4619, "lr": 1.7201775607270564e-06, "epoch": 0.3197618590359132, "percentage": 31.98, "elapsed_time": "0:20:38", "remaining_time": "0:43:54", "throughput": 12705.92, "total_tokens": 15733184}
|
|
{"current_steps": 5000, "total_steps": 15621, "loss": 0.5318, "lr": 1.7194018926028733e-06, "epoch": 0.3200819409768901, "percentage": 32.01, "elapsed_time": "0:20:38", "remaining_time": "0:43:51", "throughput": 12712.09, "total_tokens": 15749888}
|
|
{"current_steps": 5005, "total_steps": 15621, "loss": 0.3622, "lr": 1.7186253262969803e-06, "epoch": 0.32040202291786696, "percentage": 32.04, "elapsed_time": "0:20:39", "remaining_time": "0:43:49", "throughput": 12719.2, "total_tokens": 15768384}
|
|
{"current_steps": 5010, "total_steps": 15621, "loss": 0.3291, "lr": 1.7178478627789299e-06, "epoch": 0.32072210485884384, "percentage": 32.07, "elapsed_time": "0:20:40", "remaining_time": "0:43:47", "throughput": 12725.07, "total_tokens": 15784448}
|
|
{"current_steps": 5015, "total_steps": 15621, "loss": 0.4122, "lr": 1.7170695030193944e-06, "epoch": 0.3210421867998208, "percentage": 32.1, "elapsed_time": "0:20:41", "remaining_time": "0:43:44", "throughput": 12730.91, "total_tokens": 15800512}
|
|
{"current_steps": 5020, "total_steps": 15621, "loss": 0.4778, "lr": 1.716290247990165e-06, "epoch": 0.32136226874079765, "percentage": 32.14, "elapsed_time": "0:20:41", "remaining_time": "0:43:42", "throughput": 12736.32, "total_tokens": 15815680}
|
|
{"current_steps": 5025, "total_steps": 15621, "loss": 0.3896, "lr": 1.715510098664151e-06, "epoch": 0.32168235068177453, "percentage": 32.17, "elapsed_time": "0:20:42", "remaining_time": "0:43:39", "throughput": 12741.58, "total_tokens": 15830528}
|
|
{"current_steps": 5030, "total_steps": 15621, "loss": 0.5141, "lr": 1.7147290560153777e-06, "epoch": 0.3220024326227514, "percentage": 32.2, "elapsed_time": "0:20:43", "remaining_time": "0:43:37", "throughput": 12746.91, "total_tokens": 15845568}
|
|
{"current_steps": 5035, "total_steps": 15621, "loss": 0.447, "lr": 1.7139471210189862e-06, "epoch": 0.3223225145637283, "percentage": 32.23, "elapsed_time": "0:20:43", "remaining_time": "0:43:35", "throughput": 12752.8, "total_tokens": 15861632}
|
|
{"current_steps": 5040, "total_steps": 15621, "loss": 0.543, "lr": 1.7131642946512312e-06, "epoch": 0.3226425965047052, "percentage": 32.26, "elapsed_time": "0:20:44", "remaining_time": "0:43:32", "throughput": 12758.62, "total_tokens": 15877632}
|
|
{"current_steps": 5045, "total_steps": 15621, "loss": 0.3918, "lr": 1.712380577889481e-06, "epoch": 0.3229626784456821, "percentage": 32.3, "elapsed_time": "0:20:45", "remaining_time": "0:43:30", "throughput": 12764.24, "total_tokens": 15893184}
|
|
{"current_steps": 5050, "total_steps": 15621, "loss": 0.3963, "lr": 1.711595971712215e-06, "epoch": 0.323282760386659, "percentage": 32.33, "elapsed_time": "0:20:45", "remaining_time": "0:43:27", "throughput": 12769.68, "total_tokens": 15908416}
|
|
{"current_steps": 5055, "total_steps": 15621, "loss": 0.4042, "lr": 1.7108104770990234e-06, "epoch": 0.32360284232763586, "percentage": 32.36, "elapsed_time": "0:20:46", "remaining_time": "0:43:25", "throughput": 12775.38, "total_tokens": 15924224}
|
|
{"current_steps": 5060, "total_steps": 15621, "loss": 0.254, "lr": 1.7100240950306052e-06, "epoch": 0.32392292426861274, "percentage": 32.39, "elapsed_time": "0:20:47", "remaining_time": "0:43:23", "throughput": 12781.1, "total_tokens": 15940032}
|
|
{"current_steps": 5065, "total_steps": 15621, "loss": 0.4647, "lr": 1.7092368264887677e-06, "epoch": 0.3242430062095897, "percentage": 32.42, "elapsed_time": "0:20:47", "remaining_time": "0:43:20", "throughput": 12786.38, "total_tokens": 15954944}
|
|
{"current_steps": 5070, "total_steps": 15621, "loss": 0.4846, "lr": 1.7084486724564252e-06, "epoch": 0.32456308815056656, "percentage": 32.46, "elapsed_time": "0:20:48", "remaining_time": "0:43:18", "throughput": 12792.07, "total_tokens": 15970624}
|
|
{"current_steps": 5075, "total_steps": 15621, "loss": 0.4092, "lr": 1.707659633917597e-06, "epoch": 0.32488317009154344, "percentage": 32.49, "elapsed_time": "0:20:49", "remaining_time": "0:43:15", "throughput": 12797.9, "total_tokens": 15986688}
|
|
{"current_steps": 5080, "total_steps": 15621, "loss": 0.4098, "lr": 1.7068697118574064e-06, "epoch": 0.3252032520325203, "percentage": 32.52, "elapsed_time": "0:20:49", "remaining_time": "0:43:13", "throughput": 12803.71, "total_tokens": 16002752}
|
|
{"current_steps": 5085, "total_steps": 15621, "loss": 0.4931, "lr": 1.7060789072620816e-06, "epoch": 0.3255233339734972, "percentage": 32.55, "elapsed_time": "0:20:50", "remaining_time": "0:43:11", "throughput": 12809.24, "total_tokens": 16018112}
|
|
{"current_steps": 5090, "total_steps": 15621, "loss": 0.4288, "lr": 1.7052872211189509e-06, "epoch": 0.32584341591447413, "percentage": 32.58, "elapsed_time": "0:20:51", "remaining_time": "0:43:08", "throughput": 12814.94, "total_tokens": 16033984}
|
|
{"current_steps": 5095, "total_steps": 15621, "loss": 0.3304, "lr": 1.7044946544164431e-06, "epoch": 0.326163497855451, "percentage": 32.62, "elapsed_time": "0:20:51", "remaining_time": "0:43:06", "throughput": 12820.44, "total_tokens": 16049536}
|
|
{"current_steps": 5100, "total_steps": 15621, "loss": 0.3713, "lr": 1.703701208144088e-06, "epoch": 0.3264835797964279, "percentage": 32.65, "elapsed_time": "0:20:52", "remaining_time": "0:43:03", "throughput": 12826.58, "total_tokens": 16066304}
|
|
{"current_steps": 5105, "total_steps": 15621, "loss": 0.4829, "lr": 1.702906883292512e-06, "epoch": 0.32680366173740477, "percentage": 32.68, "elapsed_time": "0:20:53", "remaining_time": "0:43:01", "throughput": 12831.9, "total_tokens": 16081536}
|
|
{"current_steps": 5110, "total_steps": 15621, "loss": 0.5586, "lr": 1.7021116808534393e-06, "epoch": 0.32712374367838165, "percentage": 32.71, "elapsed_time": "0:20:53", "remaining_time": "0:42:59", "throughput": 12837.33, "total_tokens": 16096896}
|
|
{"current_steps": 5115, "total_steps": 15621, "loss": 0.443, "lr": 1.7013156018196893e-06, "epoch": 0.32744382561935853, "percentage": 32.74, "elapsed_time": "0:20:54", "remaining_time": "0:42:56", "throughput": 12843.11, "total_tokens": 16112960}
|
|
{"current_steps": 5120, "total_steps": 15621, "loss": 0.4038, "lr": 1.7005186471851759e-06, "epoch": 0.32776390756033547, "percentage": 32.78, "elapsed_time": "0:20:55", "remaining_time": "0:42:54", "throughput": 12849.05, "total_tokens": 16129344}
|
|
{"current_steps": 5125, "total_steps": 15621, "loss": 0.6052, "lr": 1.6997208179449066e-06, "epoch": 0.32808398950131235, "percentage": 32.81, "elapsed_time": "0:20:56", "remaining_time": "0:42:52", "throughput": 12855.99, "total_tokens": 16147776}
|
|
{"current_steps": 5130, "total_steps": 15621, "loss": 0.3508, "lr": 1.6989221150949806e-06, "epoch": 0.3284040714422892, "percentage": 32.84, "elapsed_time": "0:20:56", "remaining_time": "0:42:50", "throughput": 12861.22, "total_tokens": 16162880}
|
|
{"current_steps": 5135, "total_steps": 15621, "loss": 0.2676, "lr": 1.6981225396325873e-06, "epoch": 0.3287241533832661, "percentage": 32.87, "elapsed_time": "0:20:57", "remaining_time": "0:42:47", "throughput": 12867.23, "total_tokens": 16179392}
|
|
{"current_steps": 5140, "total_steps": 15621, "loss": 0.504, "lr": 1.6973220925560067e-06, "epoch": 0.329044235324243, "percentage": 32.9, "elapsed_time": "0:20:58", "remaining_time": "0:42:45", "throughput": 12872.52, "total_tokens": 16194560}
|
|
{"current_steps": 5145, "total_steps": 15621, "loss": 0.4243, "lr": 1.696520774864606e-06, "epoch": 0.3293643172652199, "percentage": 32.94, "elapsed_time": "0:20:58", "remaining_time": "0:42:42", "throughput": 12878.07, "total_tokens": 16210112}
|
|
{"current_steps": 5150, "total_steps": 15621, "loss": 0.464, "lr": 1.69571858755884e-06, "epoch": 0.3296843992061968, "percentage": 32.97, "elapsed_time": "0:20:59", "remaining_time": "0:42:40", "throughput": 12883.72, "total_tokens": 16225856}
|
|
{"current_steps": 5155, "total_steps": 15621, "loss": 0.4314, "lr": 1.6949155316402487e-06, "epoch": 0.3300044811471737, "percentage": 33.0, "elapsed_time": "0:21:00", "remaining_time": "0:42:38", "throughput": 12889.17, "total_tokens": 16241536}
|
|
{"current_steps": 5160, "total_steps": 15621, "loss": 0.3807, "lr": 1.6941116081114566e-06, "epoch": 0.33032456308815056, "percentage": 33.03, "elapsed_time": "0:21:00", "remaining_time": "0:42:35", "throughput": 12894.31, "total_tokens": 16256384}
|
|
{"current_steps": 5165, "total_steps": 15621, "loss": 0.398, "lr": 1.6933068179761722e-06, "epoch": 0.33064464502912744, "percentage": 33.06, "elapsed_time": "0:21:01", "remaining_time": "0:42:33", "throughput": 12899.54, "total_tokens": 16271360}
|
|
{"current_steps": 5170, "total_steps": 15621, "loss": 0.4122, "lr": 1.6925011622391857e-06, "epoch": 0.3309647269701044, "percentage": 33.1, "elapsed_time": "0:21:02", "remaining_time": "0:42:31", "throughput": 12904.89, "total_tokens": 16286656}
|
|
{"current_steps": 5175, "total_steps": 15621, "loss": 0.4255, "lr": 1.6916946419063667e-06, "epoch": 0.33128480891108125, "percentage": 33.13, "elapsed_time": "0:21:02", "remaining_time": "0:42:28", "throughput": 12910.54, "total_tokens": 16302592}
|
|
{"current_steps": 5180, "total_steps": 15621, "loss": 0.5442, "lr": 1.690887257984666e-06, "epoch": 0.33160489085205813, "percentage": 33.16, "elapsed_time": "0:21:03", "remaining_time": "0:42:26", "throughput": 12916.22, "total_tokens": 16318656}
|
|
{"current_steps": 5185, "total_steps": 15621, "loss": 0.4755, "lr": 1.690079011482112e-06, "epoch": 0.331924972793035, "percentage": 33.19, "elapsed_time": "0:21:04", "remaining_time": "0:42:24", "throughput": 12921.68, "total_tokens": 16334016}
|
|
{"current_steps": 5190, "total_steps": 15621, "loss": 0.5287, "lr": 1.6892699034078096e-06, "epoch": 0.3322450547340119, "percentage": 33.22, "elapsed_time": "0:21:04", "remaining_time": "0:42:21", "throughput": 12927.34, "total_tokens": 16349888}
|
|
{"current_steps": 5195, "total_steps": 15621, "loss": 0.503, "lr": 1.68845993477194e-06, "epoch": 0.33256513667498877, "percentage": 33.26, "elapsed_time": "0:21:05", "remaining_time": "0:42:19", "throughput": 12932.62, "total_tokens": 16365056}
|
|
{"current_steps": 5200, "total_steps": 15621, "loss": 0.3973, "lr": 1.6876491065857584e-06, "epoch": 0.3328852186159657, "percentage": 33.29, "elapsed_time": "0:21:06", "remaining_time": "0:42:17", "throughput": 12937.77, "total_tokens": 16380032}
|
|
{"current_steps": 5205, "total_steps": 15621, "loss": 0.6461, "lr": 1.6868374198615928e-06, "epoch": 0.3332053005569426, "percentage": 33.32, "elapsed_time": "0:21:06", "remaining_time": "0:42:14", "throughput": 12942.8, "total_tokens": 16394752}
|
|
{"current_steps": 5210, "total_steps": 15621, "loss": 0.4714, "lr": 1.6860248756128448e-06, "epoch": 0.33352538249791946, "percentage": 33.35, "elapsed_time": "0:21:07", "remaining_time": "0:42:12", "throughput": 12948.25, "total_tokens": 16410368}
|
|
{"current_steps": 5215, "total_steps": 15621, "loss": 0.4142, "lr": 1.6852114748539844e-06, "epoch": 0.33384546443889634, "percentage": 33.38, "elapsed_time": "0:21:08", "remaining_time": "0:42:10", "throughput": 12953.24, "total_tokens": 16425088}
|
|
{"current_steps": 5220, "total_steps": 15621, "loss": 0.3446, "lr": 1.6843972186005525e-06, "epoch": 0.3341655463798732, "percentage": 33.42, "elapsed_time": "0:21:08", "remaining_time": "0:42:07", "throughput": 12958.95, "total_tokens": 16441152}
|
|
{"current_steps": 5225, "total_steps": 15621, "loss": 0.4705, "lr": 1.6835821078691577e-06, "epoch": 0.33448562832085016, "percentage": 33.45, "elapsed_time": "0:21:09", "remaining_time": "0:42:05", "throughput": 12965.13, "total_tokens": 16458240}
|
|
{"current_steps": 5230, "total_steps": 15621, "loss": 0.4342, "lr": 1.6827661436774746e-06, "epoch": 0.33480571026182704, "percentage": 33.48, "elapsed_time": "0:21:10", "remaining_time": "0:42:03", "throughput": 12970.73, "total_tokens": 16474112}
|
|
{"current_steps": 5235, "total_steps": 15621, "loss": 0.3957, "lr": 1.681949327044245e-06, "epoch": 0.3351257922028039, "percentage": 33.51, "elapsed_time": "0:21:10", "remaining_time": "0:42:01", "throughput": 12976.62, "total_tokens": 16490560}
|
|
{"current_steps": 5240, "total_steps": 15621, "loss": 0.6821, "lr": 1.6811316589892734e-06, "epoch": 0.3354458741437808, "percentage": 33.54, "elapsed_time": "0:21:11", "remaining_time": "0:41:58", "throughput": 12981.82, "total_tokens": 16505728}
|
|
{"current_steps": 5245, "total_steps": 15621, "loss": 0.4364, "lr": 1.6803131405334284e-06, "epoch": 0.3357659560847577, "percentage": 33.58, "elapsed_time": "0:21:12", "remaining_time": "0:41:56", "throughput": 12987.54, "total_tokens": 16521856}
|
|
{"current_steps": 5250, "total_steps": 15621, "loss": 0.4436, "lr": 1.6794937726986396e-06, "epoch": 0.3360860380257346, "percentage": 33.61, "elapsed_time": "0:21:12", "remaining_time": "0:41:54", "throughput": 12993.16, "total_tokens": 16537792}
|
|
{"current_steps": 5255, "total_steps": 15621, "loss": 0.4347, "lr": 1.6786735565078974e-06, "epoch": 0.3364061199667115, "percentage": 33.64, "elapsed_time": "0:21:13", "remaining_time": "0:41:52", "throughput": 12998.56, "total_tokens": 16553408}
|
|
{"current_steps": 5260, "total_steps": 15621, "loss": 0.4233, "lr": 1.677852492985251e-06, "epoch": 0.33672620190768837, "percentage": 33.67, "elapsed_time": "0:21:14", "remaining_time": "0:41:49", "throughput": 13004.51, "total_tokens": 16570112}
|
|
{"current_steps": 5265, "total_steps": 15621, "loss": 0.5003, "lr": 1.6770305831558086e-06, "epoch": 0.33704628384866525, "percentage": 33.7, "elapsed_time": "0:21:14", "remaining_time": "0:41:47", "throughput": 13010.15, "total_tokens": 16586304}
|
|
{"current_steps": 5270, "total_steps": 15621, "loss": 0.3912, "lr": 1.6762078280457342e-06, "epoch": 0.33736636578964213, "percentage": 33.74, "elapsed_time": "0:21:15", "remaining_time": "0:41:45", "throughput": 13015.48, "total_tokens": 16601920}
|
|
{"current_steps": 5275, "total_steps": 15621, "loss": 0.4725, "lr": 1.6753842286822465e-06, "epoch": 0.33768644773061907, "percentage": 33.77, "elapsed_time": "0:21:16", "remaining_time": "0:41:43", "throughput": 13021.26, "total_tokens": 16618240}
|
|
{"current_steps": 5280, "total_steps": 15621, "loss": 0.5845, "lr": 1.6745597860936199e-06, "epoch": 0.33800652967159595, "percentage": 33.8, "elapsed_time": "0:21:16", "remaining_time": "0:41:40", "throughput": 13026.45, "total_tokens": 16633408}
|
|
{"current_steps": 5285, "total_steps": 15621, "loss": 0.4484, "lr": 1.6737345013091794e-06, "epoch": 0.3383266116125728, "percentage": 33.83, "elapsed_time": "0:21:17", "remaining_time": "0:41:38", "throughput": 13032.13, "total_tokens": 16649664}
|
|
{"current_steps": 5290, "total_steps": 15621, "loss": 0.4686, "lr": 1.672908375359304e-06, "epoch": 0.3386466935535497, "percentage": 33.86, "elapsed_time": "0:21:18", "remaining_time": "0:41:36", "throughput": 13037.32, "total_tokens": 16664896}
|
|
{"current_steps": 5295, "total_steps": 15621, "loss": 0.5565, "lr": 1.6720814092754209e-06, "epoch": 0.3389667754945266, "percentage": 33.9, "elapsed_time": "0:21:18", "remaining_time": "0:41:34", "throughput": 13042.58, "total_tokens": 16680384}
|
|
{"current_steps": 5300, "total_steps": 15621, "loss": 0.3785, "lr": 1.6712536040900075e-06, "epoch": 0.33928685743550346, "percentage": 33.93, "elapsed_time": "0:21:19", "remaining_time": "0:41:31", "throughput": 13048.1, "total_tokens": 16696192}
|
|
{"current_steps": 5305, "total_steps": 15621, "loss": 0.4741, "lr": 1.6704249608365878e-06, "epoch": 0.3396069393764804, "percentage": 33.96, "elapsed_time": "0:21:20", "remaining_time": "0:41:30", "throughput": 13059.68, "total_tokens": 16727104}
|
|
{"current_steps": 5310, "total_steps": 15621, "loss": 0.4291, "lr": 1.669595480549733e-06, "epoch": 0.3399270213174573, "percentage": 33.99, "elapsed_time": "0:21:21", "remaining_time": "0:41:28", "throughput": 13064.58, "total_tokens": 16741696}
|
|
{"current_steps": 5315, "total_steps": 15621, "loss": 0.4384, "lr": 1.6687651642650587e-06, "epoch": 0.34024710325843416, "percentage": 34.02, "elapsed_time": "0:21:22", "remaining_time": "0:41:26", "throughput": 13069.84, "total_tokens": 16757120}
|
|
{"current_steps": 5320, "total_steps": 15621, "loss": 0.4572, "lr": 1.6679340130192245e-06, "epoch": 0.34056718519941104, "percentage": 34.06, "elapsed_time": "0:21:22", "remaining_time": "0:41:23", "throughput": 13074.97, "total_tokens": 16772416}
|
|
{"current_steps": 5325, "total_steps": 15621, "loss": 0.3287, "lr": 1.667102027849933e-06, "epoch": 0.3408872671403879, "percentage": 34.09, "elapsed_time": "0:21:23", "remaining_time": "0:41:21", "throughput": 13080.51, "total_tokens": 16788352}
|
|
{"current_steps": 5330, "total_steps": 15621, "loss": 0.3582, "lr": 1.6662692097959266e-06, "epoch": 0.34120734908136485, "percentage": 34.12, "elapsed_time": "0:21:24", "remaining_time": "0:41:19", "throughput": 13085.68, "total_tokens": 16803648}
|
|
{"current_steps": 5335, "total_steps": 15621, "loss": 0.4741, "lr": 1.6654355598969894e-06, "epoch": 0.34152743102234173, "percentage": 34.15, "elapsed_time": "0:21:24", "remaining_time": "0:41:17", "throughput": 13090.86, "total_tokens": 16818944}
|
|
{"current_steps": 5340, "total_steps": 15621, "loss": 0.5007, "lr": 1.6646010791939423e-06, "epoch": 0.3418475129633186, "percentage": 34.18, "elapsed_time": "0:21:25", "remaining_time": "0:41:14", "throughput": 13095.91, "total_tokens": 16833984}
|
|
{"current_steps": 5345, "total_steps": 15621, "loss": 0.5632, "lr": 1.6637657687286446e-06, "epoch": 0.3421675949042955, "percentage": 34.22, "elapsed_time": "0:21:26", "remaining_time": "0:41:12", "throughput": 13101.16, "total_tokens": 16849280}
|
|
{"current_steps": 5350, "total_steps": 15621, "loss": 0.4051, "lr": 1.6629296295439912e-06, "epoch": 0.34248767684527237, "percentage": 34.25, "elapsed_time": "0:21:26", "remaining_time": "0:41:10", "throughput": 13106.78, "total_tokens": 16865664}
|
|
{"current_steps": 5355, "total_steps": 15621, "loss": 0.4945, "lr": 1.6620926626839116e-06, "epoch": 0.3428077587862493, "percentage": 34.28, "elapsed_time": "0:21:27", "remaining_time": "0:41:08", "throughput": 13112.12, "total_tokens": 16881536}
|
|
{"current_steps": 5360, "total_steps": 15621, "loss": 0.4456, "lr": 1.661254869193369e-06, "epoch": 0.3431278407272262, "percentage": 34.31, "elapsed_time": "0:21:28", "remaining_time": "0:41:06", "throughput": 13118.21, "total_tokens": 16898816}
|
|
{"current_steps": 5365, "total_steps": 15621, "loss": 0.5174, "lr": 1.6604162501183581e-06, "epoch": 0.34344792266820307, "percentage": 34.34, "elapsed_time": "0:21:28", "remaining_time": "0:41:03", "throughput": 13123.85, "total_tokens": 16915136}
|
|
{"current_steps": 5370, "total_steps": 15621, "loss": 0.4742, "lr": 1.6595768065059045e-06, "epoch": 0.34376800460917994, "percentage": 34.38, "elapsed_time": "0:21:29", "remaining_time": "0:41:01", "throughput": 13129.3, "total_tokens": 16931200}
|
|
{"current_steps": 5375, "total_steps": 15621, "loss": 0.4691, "lr": 1.6587365394040641e-06, "epoch": 0.3440880865501568, "percentage": 34.41, "elapsed_time": "0:21:30", "remaining_time": "0:40:59", "throughput": 13134.52, "total_tokens": 16946816}
|
|
{"current_steps": 5380, "total_steps": 15621, "loss": 0.3826, "lr": 1.6578954498619195e-06, "epoch": 0.3444081684911337, "percentage": 34.44, "elapsed_time": "0:21:30", "remaining_time": "0:40:57", "throughput": 13139.98, "total_tokens": 16962880}
|
|
{"current_steps": 5385, "total_steps": 15621, "loss": 0.4712, "lr": 1.6570535389295814e-06, "epoch": 0.34472825043211064, "percentage": 34.47, "elapsed_time": "0:21:31", "remaining_time": "0:40:55", "throughput": 13145.16, "total_tokens": 16978240}
|
|
{"current_steps": 5390, "total_steps": 15621, "loss": 0.3684, "lr": 1.6562108076581853e-06, "epoch": 0.3450483323730875, "percentage": 34.5, "elapsed_time": "0:21:32", "remaining_time": "0:40:52", "throughput": 13150.39, "total_tokens": 16993728}
|
|
{"current_steps": 5395, "total_steps": 15621, "loss": 0.5846, "lr": 1.6553672570998912e-06, "epoch": 0.3453684143140644, "percentage": 34.54, "elapsed_time": "0:21:32", "remaining_time": "0:40:50", "throughput": 13155.78, "total_tokens": 17009728}
|
|
{"current_steps": 5400, "total_steps": 15621, "loss": 0.414, "lr": 1.6545228883078815e-06, "epoch": 0.3456884962550413, "percentage": 34.57, "elapsed_time": "0:21:33", "remaining_time": "0:40:48", "throughput": 13160.71, "total_tokens": 17024640}
|
|
{"current_steps": 5405, "total_steps": 15621, "loss": 0.36, "lr": 1.653677702336361e-06, "epoch": 0.34600857819601816, "percentage": 34.6, "elapsed_time": "0:21:34", "remaining_time": "0:40:46", "throughput": 13166.05, "total_tokens": 17040512}
|
|
{"current_steps": 5410, "total_steps": 15621, "loss": 0.4801, "lr": 1.6528317002405538e-06, "epoch": 0.3463286601369951, "percentage": 34.63, "elapsed_time": "0:21:34", "remaining_time": "0:40:44", "throughput": 13171.28, "total_tokens": 17056064}
|
|
{"current_steps": 5415, "total_steps": 15621, "loss": 0.3685, "lr": 1.6519848830767043e-06, "epoch": 0.34664874207797197, "percentage": 34.66, "elapsed_time": "0:21:35", "remaining_time": "0:40:41", "throughput": 13176.92, "total_tokens": 17072448}
|
|
{"current_steps": 5420, "total_steps": 15621, "loss": 0.6228, "lr": 1.6511372519020726e-06, "epoch": 0.34696882401894885, "percentage": 34.7, "elapsed_time": "0:21:36", "remaining_time": "0:40:39", "throughput": 13182.24, "total_tokens": 17088320}
|
|
{"current_steps": 5425, "total_steps": 15621, "loss": 0.4376, "lr": 1.650288807774937e-06, "epoch": 0.34728890595992573, "percentage": 34.73, "elapsed_time": "0:21:36", "remaining_time": "0:40:37", "throughput": 13187.73, "total_tokens": 17104448}
|
|
{"current_steps": 5430, "total_steps": 15621, "loss": 0.3981, "lr": 1.6494395517545893e-06, "epoch": 0.3476089879009026, "percentage": 34.76, "elapsed_time": "0:21:37", "remaining_time": "0:40:35", "throughput": 13193.85, "total_tokens": 17121856}
|
|
{"current_steps": 5435, "total_steps": 15621, "loss": 0.5135, "lr": 1.6485894849013362e-06, "epoch": 0.34792906984187955, "percentage": 34.79, "elapsed_time": "0:21:38", "remaining_time": "0:40:33", "throughput": 13198.65, "total_tokens": 17136512}
|
|
{"current_steps": 5440, "total_steps": 15621, "loss": 0.4487, "lr": 1.6477386082764961e-06, "epoch": 0.3482491517828564, "percentage": 34.82, "elapsed_time": "0:21:39", "remaining_time": "0:40:31", "throughput": 13204.15, "total_tokens": 17152640}
|
|
{"current_steps": 5445, "total_steps": 15621, "loss": 0.3645, "lr": 1.6468869229423983e-06, "epoch": 0.3485692337238333, "percentage": 34.86, "elapsed_time": "0:21:39", "remaining_time": "0:40:28", "throughput": 13209.06, "total_tokens": 17167680}
|
|
{"current_steps": 5450, "total_steps": 15621, "loss": 0.6431, "lr": 1.6460344299623813e-06, "epoch": 0.3488893156648102, "percentage": 34.89, "elapsed_time": "0:21:40", "remaining_time": "0:40:26", "throughput": 13214.22, "total_tokens": 17183296}
|
|
{"current_steps": 5455, "total_steps": 15621, "loss": 0.5412, "lr": 1.6451811304007939e-06, "epoch": 0.34920939760578706, "percentage": 34.92, "elapsed_time": "0:21:41", "remaining_time": "0:40:24", "throughput": 13219.14, "total_tokens": 17198272}
|
|
{"current_steps": 5460, "total_steps": 15621, "loss": 0.5194, "lr": 1.6443270253229895e-06, "epoch": 0.349529479546764, "percentage": 34.95, "elapsed_time": "0:21:41", "remaining_time": "0:40:22", "throughput": 13224.16, "total_tokens": 17213376}
|
|
{"current_steps": 5465, "total_steps": 15621, "loss": 0.4614, "lr": 1.6434721157953288e-06, "epoch": 0.3498495614877409, "percentage": 34.98, "elapsed_time": "0:21:42", "remaining_time": "0:40:20", "throughput": 13229.71, "total_tokens": 17229632}
|
|
{"current_steps": 5470, "total_steps": 15621, "loss": 0.5873, "lr": 1.6426164028851765e-06, "epoch": 0.35016964342871776, "percentage": 35.02, "elapsed_time": "0:21:43", "remaining_time": "0:40:18", "throughput": 13235.12, "total_tokens": 17245696}
|
|
{"current_steps": 5474, "total_steps": 15621, "eval_loss": 0.44318872690200806, "epoch": 0.3504257089814993, "percentage": 35.04, "elapsed_time": "0:22:34", "remaining_time": "0:41:50", "throughput": 12745.62, "total_tokens": 17259840}
|
|
{"current_steps": 5475, "total_steps": 15621, "loss": 0.3797, "lr": 1.6417598876609002e-06, "epoch": 0.35048972536969464, "percentage": 35.05, "elapsed_time": "0:23:13", "remaining_time": "0:43:02", "throughput": 12387.84, "total_tokens": 17262976}
|
|
{"current_steps": 5480, "total_steps": 15621, "loss": 0.4144, "lr": 1.640902571191869e-06, "epoch": 0.3508098073106715, "percentage": 35.08, "elapsed_time": "0:23:14", "remaining_time": "0:43:00", "throughput": 12392.98, "total_tokens": 17278336}
|
|
{"current_steps": 5485, "total_steps": 15621, "loss": 0.3617, "lr": 1.6400444545484524e-06, "epoch": 0.3511298892516484, "percentage": 35.11, "elapsed_time": "0:23:14", "remaining_time": "0:42:57", "throughput": 12397.92, "total_tokens": 17293248}
|
|
{"current_steps": 5490, "total_steps": 15621, "loss": 0.428, "lr": 1.6391855388020193e-06, "epoch": 0.35144997119262533, "percentage": 35.14, "elapsed_time": "0:23:15", "remaining_time": "0:42:55", "throughput": 12403.27, "total_tokens": 17309184}
|
|
{"current_steps": 5495, "total_steps": 15621, "loss": 0.4654, "lr": 1.6383258250249363e-06, "epoch": 0.3517700531336022, "percentage": 35.18, "elapsed_time": "0:23:16", "remaining_time": "0:42:52", "throughput": 12408.68, "total_tokens": 17325248}
|
|
{"current_steps": 5500, "total_steps": 15621, "loss": 0.4297, "lr": 1.6374653142905661e-06, "epoch": 0.3520901350745791, "percentage": 35.21, "elapsed_time": "0:23:16", "remaining_time": "0:42:50", "throughput": 12413.86, "total_tokens": 17340736}
|
|
{"current_steps": 5505, "total_steps": 15621, "loss": 0.4224, "lr": 1.6366040076732662e-06, "epoch": 0.35241021701555597, "percentage": 35.24, "elapsed_time": "0:23:17", "remaining_time": "0:42:48", "throughput": 12418.93, "total_tokens": 17355904}
|
|
{"current_steps": 5510, "total_steps": 15621, "loss": 0.4675, "lr": 1.6357419062483882e-06, "epoch": 0.35273029895653285, "percentage": 35.27, "elapsed_time": "0:23:18", "remaining_time": "0:42:45", "throughput": 12424.03, "total_tokens": 17371264}
|
|
{"current_steps": 5515, "total_steps": 15621, "loss": 0.4268, "lr": 1.6348790110922758e-06, "epoch": 0.3530503808975098, "percentage": 35.31, "elapsed_time": "0:23:18", "remaining_time": "0:42:43", "throughput": 12430.02, "total_tokens": 17388608}
|
|
{"current_steps": 5520, "total_steps": 15621, "loss": 0.4558, "lr": 1.6340153232822635e-06, "epoch": 0.35337046283848667, "percentage": 35.34, "elapsed_time": "0:23:19", "remaining_time": "0:42:41", "throughput": 12435.06, "total_tokens": 17403712}
|
|
{"current_steps": 5525, "total_steps": 15621, "loss": 0.5137, "lr": 1.633150843896676e-06, "epoch": 0.35369054477946354, "percentage": 35.37, "elapsed_time": "0:23:20", "remaining_time": "0:42:38", "throughput": 12441.1, "total_tokens": 17421056}
|
|
{"current_steps": 5530, "total_steps": 15621, "loss": 0.5658, "lr": 1.6322855740148263e-06, "epoch": 0.3540106267204404, "percentage": 35.4, "elapsed_time": "0:23:20", "remaining_time": "0:42:36", "throughput": 12446.04, "total_tokens": 17436096}
|
|
{"current_steps": 5535, "total_steps": 15621, "loss": 0.3768, "lr": 1.6314195147170132e-06, "epoch": 0.3543307086614173, "percentage": 35.43, "elapsed_time": "0:23:21", "remaining_time": "0:42:34", "throughput": 12451.54, "total_tokens": 17452480}
|
|
{"current_steps": 5540, "total_steps": 15621, "loss": 0.4032, "lr": 1.6305526670845225e-06, "epoch": 0.35465079060239424, "percentage": 35.47, "elapsed_time": "0:23:22", "remaining_time": "0:42:31", "throughput": 12456.59, "total_tokens": 17467776}
|
|
{"current_steps": 5545, "total_steps": 15621, "loss": 0.4877, "lr": 1.6296850321996232e-06, "epoch": 0.3549708725433711, "percentage": 35.5, "elapsed_time": "0:23:22", "remaining_time": "0:42:29", "throughput": 12461.51, "total_tokens": 17482752}
|
|
{"current_steps": 5550, "total_steps": 15621, "loss": 0.3843, "lr": 1.6288166111455683e-06, "epoch": 0.355290954484348, "percentage": 35.53, "elapsed_time": "0:23:23", "remaining_time": "0:42:26", "throughput": 12466.46, "total_tokens": 17497792}
|
|
{"current_steps": 5555, "total_steps": 15621, "loss": 0.4878, "lr": 1.6279474050065906e-06, "epoch": 0.3556110364253249, "percentage": 35.56, "elapsed_time": "0:23:24", "remaining_time": "0:42:24", "throughput": 12471.48, "total_tokens": 17513024}
|
|
{"current_steps": 5560, "total_steps": 15621, "loss": 0.4049, "lr": 1.6270774148679054e-06, "epoch": 0.35593111836630176, "percentage": 35.59, "elapsed_time": "0:23:24", "remaining_time": "0:42:22", "throughput": 12476.73, "total_tokens": 17529024}
|
|
{"current_steps": 5565, "total_steps": 15621, "loss": 0.3788, "lr": 1.6262066418157048e-06, "epoch": 0.35625120030727864, "percentage": 35.63, "elapsed_time": "0:23:25", "remaining_time": "0:42:19", "throughput": 12481.58, "total_tokens": 17543936}
|
|
{"current_steps": 5570, "total_steps": 15621, "loss": 0.5444, "lr": 1.6253350869371595e-06, "epoch": 0.35657128224825557, "percentage": 35.66, "elapsed_time": "0:23:26", "remaining_time": "0:42:17", "throughput": 12486.56, "total_tokens": 17559168}
|
|
{"current_steps": 5575, "total_steps": 15621, "loss": 0.3861, "lr": 1.6244627513204158e-06, "epoch": 0.35689136418923245, "percentage": 35.69, "elapsed_time": "0:23:26", "remaining_time": "0:42:15", "throughput": 12491.78, "total_tokens": 17574912}
|
|
{"current_steps": 5580, "total_steps": 15621, "loss": 0.4319, "lr": 1.6235896360545954e-06, "epoch": 0.35721144613020933, "percentage": 35.72, "elapsed_time": "0:23:27", "remaining_time": "0:42:12", "throughput": 12496.82, "total_tokens": 17590272}
|
|
{"current_steps": 5585, "total_steps": 15621, "loss": 0.4466, "lr": 1.622715742229792e-06, "epoch": 0.3575315280711862, "percentage": 35.75, "elapsed_time": "0:23:28", "remaining_time": "0:42:10", "throughput": 12501.99, "total_tokens": 17605952}
|
|
{"current_steps": 5590, "total_steps": 15621, "loss": 0.3861, "lr": 1.6218410709370734e-06, "epoch": 0.3578516100121631, "percentage": 35.79, "elapsed_time": "0:23:28", "remaining_time": "0:42:08", "throughput": 12506.94, "total_tokens": 17621120}
|
|
{"current_steps": 5595, "total_steps": 15621, "loss": 0.5462, "lr": 1.6209656232684768e-06, "epoch": 0.35817169195314, "percentage": 35.82, "elapsed_time": "0:23:29", "remaining_time": "0:42:05", "throughput": 12511.81, "total_tokens": 17636096}
|
|
{"current_steps": 5600, "total_steps": 15621, "loss": 0.4566, "lr": 1.620089400317008e-06, "epoch": 0.3584917738941169, "percentage": 35.85, "elapsed_time": "0:23:30", "remaining_time": "0:42:03", "throughput": 12517.4, "total_tokens": 17652672}
|
|
{"current_steps": 5605, "total_steps": 15621, "loss": 0.4979, "lr": 1.6192124031766425e-06, "epoch": 0.3588118558350938, "percentage": 35.88, "elapsed_time": "0:23:30", "remaining_time": "0:42:01", "throughput": 12522.37, "total_tokens": 17668032}
|
|
{"current_steps": 5610, "total_steps": 15621, "loss": 0.4507, "lr": 1.6183346329423213e-06, "epoch": 0.35913193777607066, "percentage": 35.91, "elapsed_time": "0:23:31", "remaining_time": "0:41:58", "throughput": 12527.32, "total_tokens": 17683264}
|
|
{"current_steps": 5615, "total_steps": 15621, "loss": 0.3672, "lr": 1.6174560907099508e-06, "epoch": 0.35945201971704754, "percentage": 35.95, "elapsed_time": "0:23:32", "remaining_time": "0:41:56", "throughput": 12532.59, "total_tokens": 17699200}
|
|
{"current_steps": 5620, "total_steps": 15621, "loss": 0.3538, "lr": 1.6165767775764013e-06, "epoch": 0.3597721016580245, "percentage": 35.98, "elapsed_time": "0:23:32", "remaining_time": "0:41:54", "throughput": 12537.69, "total_tokens": 17714816}
|
|
{"current_steps": 5625, "total_steps": 15621, "loss": 0.4157, "lr": 1.6156966946395056e-06, "epoch": 0.36009218359900136, "percentage": 36.01, "elapsed_time": "0:23:33", "remaining_time": "0:41:52", "throughput": 12543.66, "total_tokens": 17732352}
|
|
{"current_steps": 5630, "total_steps": 15621, "loss": 0.536, "lr": 1.6148158429980577e-06, "epoch": 0.36041226553997824, "percentage": 36.04, "elapsed_time": "0:23:34", "remaining_time": "0:41:49", "throughput": 12548.9, "total_tokens": 17748288}
|
|
{"current_steps": 5635, "total_steps": 15621, "loss": 0.3758, "lr": 1.6139342237518108e-06, "epoch": 0.3607323474809551, "percentage": 36.07, "elapsed_time": "0:23:34", "remaining_time": "0:41:47", "throughput": 12553.75, "total_tokens": 17763520}
|
|
{"current_steps": 5640, "total_steps": 15621, "loss": 0.4256, "lr": 1.6130518380014773e-06, "epoch": 0.361052429421932, "percentage": 36.11, "elapsed_time": "0:23:35", "remaining_time": "0:41:45", "throughput": 12558.89, "total_tokens": 17779328}
|
|
{"current_steps": 5645, "total_steps": 15621, "loss": 0.4313, "lr": 1.6121686868487259e-06, "epoch": 0.3613725113629089, "percentage": 36.14, "elapsed_time": "0:23:36", "remaining_time": "0:41:43", "throughput": 12564.22, "total_tokens": 17795584}
|
|
{"current_steps": 5650, "total_steps": 15621, "loss": 0.4449, "lr": 1.6112847713961815e-06, "epoch": 0.3616925933038858, "percentage": 36.17, "elapsed_time": "0:23:37", "remaining_time": "0:41:40", "throughput": 12568.87, "total_tokens": 17810368}
|
|
{"current_steps": 5655, "total_steps": 15621, "loss": 0.4365, "lr": 1.610400092747423e-06, "epoch": 0.3620126752448627, "percentage": 36.2, "elapsed_time": "0:23:37", "remaining_time": "0:41:38", "throughput": 12574.09, "total_tokens": 17826496}
|
|
{"current_steps": 5660, "total_steps": 15621, "loss": 0.4266, "lr": 1.609514652006981e-06, "epoch": 0.36233275718583957, "percentage": 36.23, "elapsed_time": "0:23:38", "remaining_time": "0:41:36", "throughput": 12578.8, "total_tokens": 17841344}
|
|
{"current_steps": 5665, "total_steps": 15621, "loss": 0.5632, "lr": 1.60862845028034e-06, "epoch": 0.36265283912681645, "percentage": 36.27, "elapsed_time": "0:23:39", "remaining_time": "0:41:33", "throughput": 12583.98, "total_tokens": 17857408}
|
|
{"current_steps": 5670, "total_steps": 15621, "loss": 0.4209, "lr": 1.6077414886739327e-06, "epoch": 0.36297292106779333, "percentage": 36.3, "elapsed_time": "0:23:39", "remaining_time": "0:41:31", "throughput": 12589.1, "total_tokens": 17873280}
|
|
{"current_steps": 5675, "total_steps": 15621, "loss": 0.5023, "lr": 1.6068537682951412e-06, "epoch": 0.36329300300877027, "percentage": 36.33, "elapsed_time": "0:23:40", "remaining_time": "0:41:29", "throughput": 12593.96, "total_tokens": 17888448}
|
|
{"current_steps": 5680, "total_steps": 15621, "loss": 0.4459, "lr": 1.6059652902522947e-06, "epoch": 0.36361308494974715, "percentage": 36.36, "elapsed_time": "0:23:41", "remaining_time": "0:41:27", "throughput": 12599.05, "total_tokens": 17904320}
|
|
{"current_steps": 5685, "total_steps": 15621, "loss": 0.3725, "lr": 1.6050760556546683e-06, "epoch": 0.363933166890724, "percentage": 36.39, "elapsed_time": "0:23:41", "remaining_time": "0:41:24", "throughput": 12603.98, "total_tokens": 17919744}
|
|
{"current_steps": 5690, "total_steps": 15621, "loss": 0.3823, "lr": 1.6041860656124823e-06, "epoch": 0.3642532488317009, "percentage": 36.43, "elapsed_time": "0:23:42", "remaining_time": "0:41:22", "throughput": 12608.74, "total_tokens": 17934656}
|
|
{"current_steps": 5695, "total_steps": 15621, "loss": 0.5608, "lr": 1.6032953212368993e-06, "epoch": 0.3645733307726778, "percentage": 36.46, "elapsed_time": "0:23:43", "remaining_time": "0:41:20", "throughput": 12614.03, "total_tokens": 17950976}
|
|
{"current_steps": 5700, "total_steps": 15621, "loss": 0.465, "lr": 1.6024038236400243e-06, "epoch": 0.3648934127136547, "percentage": 36.49, "elapsed_time": "0:23:43", "remaining_time": "0:41:18", "throughput": 12618.96, "total_tokens": 17966400}
|
|
{"current_steps": 5705, "total_steps": 15621, "loss": 0.5704, "lr": 1.6015115739349027e-06, "epoch": 0.3652134946546316, "percentage": 36.52, "elapsed_time": "0:23:44", "remaining_time": "0:41:15", "throughput": 12624.71, "total_tokens": 17983872}
|
|
{"current_steps": 5710, "total_steps": 15621, "loss": 0.5358, "lr": 1.6006185732355183e-06, "epoch": 0.3655335765956085, "percentage": 36.55, "elapsed_time": "0:23:45", "remaining_time": "0:41:13", "throughput": 12629.84, "total_tokens": 17999680}
|
|
{"current_steps": 5715, "total_steps": 15621, "loss": 0.3807, "lr": 1.5997248226567931e-06, "epoch": 0.36585365853658536, "percentage": 36.59, "elapsed_time": "0:23:45", "remaining_time": "0:41:11", "throughput": 12634.61, "total_tokens": 18014784}
|
|
{"current_steps": 5720, "total_steps": 15621, "loss": 0.5063, "lr": 1.5988303233145853e-06, "epoch": 0.36617374047756224, "percentage": 36.62, "elapsed_time": "0:23:46", "remaining_time": "0:41:09", "throughput": 12639.38, "total_tokens": 18029888}
|
|
{"current_steps": 5725, "total_steps": 15621, "loss": 0.3721, "lr": 1.597935076325688e-06, "epoch": 0.3664938224185392, "percentage": 36.65, "elapsed_time": "0:23:47", "remaining_time": "0:41:06", "throughput": 12644.41, "total_tokens": 18045632}
|
|
{"current_steps": 5730, "total_steps": 15621, "loss": 0.5996, "lr": 1.5970390828078272e-06, "epoch": 0.36681390435951605, "percentage": 36.68, "elapsed_time": "0:23:47", "remaining_time": "0:41:04", "throughput": 12649.23, "total_tokens": 18060928}
|
|
{"current_steps": 5735, "total_steps": 15621, "loss": 0.4616, "lr": 1.5961423438796615e-06, "epoch": 0.36713398630049293, "percentage": 36.71, "elapsed_time": "0:23:48", "remaining_time": "0:41:02", "throughput": 12654.1, "total_tokens": 18076352}
|
|
{"current_steps": 5740, "total_steps": 15621, "loss": 0.45, "lr": 1.59524486066078e-06, "epoch": 0.3674540682414698, "percentage": 36.75, "elapsed_time": "0:23:49", "remaining_time": "0:41:00", "throughput": 12659.13, "total_tokens": 18092096}
|
|
{"current_steps": 5745, "total_steps": 15621, "loss": 0.5875, "lr": 1.5943466342717012e-06, "epoch": 0.3677741501824467, "percentage": 36.78, "elapsed_time": "0:23:49", "remaining_time": "0:40:57", "throughput": 12664.07, "total_tokens": 18107648}
|
|
{"current_steps": 5750, "total_steps": 15621, "loss": 0.4526, "lr": 1.5934476658338708e-06, "epoch": 0.36809423212342357, "percentage": 36.81, "elapsed_time": "0:23:50", "remaining_time": "0:40:55", "throughput": 12668.99, "total_tokens": 18123264}
|
|
{"current_steps": 5755, "total_steps": 15621, "loss": 0.5482, "lr": 1.5925479564696619e-06, "epoch": 0.3684143140644005, "percentage": 36.84, "elapsed_time": "0:23:51", "remaining_time": "0:40:53", "throughput": 12673.76, "total_tokens": 18138368}
|
|
{"current_steps": 5760, "total_steps": 15621, "loss": 0.3433, "lr": 1.5916475073023721e-06, "epoch": 0.3687343960053774, "percentage": 36.87, "elapsed_time": "0:23:51", "remaining_time": "0:40:51", "throughput": 12678.9, "total_tokens": 18154432}
|
|
{"current_steps": 5765, "total_steps": 15621, "loss": 0.3385, "lr": 1.5907463194562226e-06, "epoch": 0.36905447794635426, "percentage": 36.91, "elapsed_time": "0:23:52", "remaining_time": "0:40:49", "throughput": 12684.32, "total_tokens": 18171200}
|
|
{"current_steps": 5770, "total_steps": 15621, "loss": 0.3763, "lr": 1.589844394056357e-06, "epoch": 0.36937455988733114, "percentage": 36.94, "elapsed_time": "0:23:53", "remaining_time": "0:40:46", "throughput": 12689.45, "total_tokens": 18187008}
|
|
{"current_steps": 5775, "total_steps": 15621, "loss": 0.3462, "lr": 1.5889417322288403e-06, "epoch": 0.369694641828308, "percentage": 36.97, "elapsed_time": "0:23:53", "remaining_time": "0:40:44", "throughput": 12694.51, "total_tokens": 18202944}
|
|
{"current_steps": 5780, "total_steps": 15621, "loss": 0.4963, "lr": 1.5880383351006556e-06, "epoch": 0.37001472376928496, "percentage": 37.0, "elapsed_time": "0:23:54", "remaining_time": "0:40:42", "throughput": 12699.19, "total_tokens": 18217984}
|
|
{"current_steps": 5785, "total_steps": 15621, "loss": 0.5257, "lr": 1.5871342037997055e-06, "epoch": 0.37033480571026184, "percentage": 37.03, "elapsed_time": "0:23:55", "remaining_time": "0:40:40", "throughput": 12704.24, "total_tokens": 18233984}
|
|
{"current_steps": 5790, "total_steps": 15621, "loss": 0.416, "lr": 1.5862293394548082e-06, "epoch": 0.3706548876512387, "percentage": 37.07, "elapsed_time": "0:23:55", "remaining_time": "0:40:38", "throughput": 12708.89, "total_tokens": 18249024}
|
|
{"current_steps": 5795, "total_steps": 15621, "loss": 0.3512, "lr": 1.5853237431956972e-06, "epoch": 0.3709749695922156, "percentage": 37.1, "elapsed_time": "0:23:56", "remaining_time": "0:40:35", "throughput": 12713.6, "total_tokens": 18264256}
|
|
{"current_steps": 5800, "total_steps": 15621, "loss": 0.554, "lr": 1.5844174161530206e-06, "epoch": 0.3712950515331925, "percentage": 37.13, "elapsed_time": "0:23:57", "remaining_time": "0:40:33", "throughput": 12718.49, "total_tokens": 18279936}
|
|
{"current_steps": 5805, "total_steps": 15621, "loss": 0.4147, "lr": 1.5835103594583382e-06, "epoch": 0.3716151334741694, "percentage": 37.16, "elapsed_time": "0:23:57", "remaining_time": "0:40:31", "throughput": 12723.46, "total_tokens": 18295488}
|
|
{"current_steps": 5810, "total_steps": 15621, "loss": 0.5357, "lr": 1.5826025742441207e-06, "epoch": 0.3719352154151463, "percentage": 37.19, "elapsed_time": "0:23:58", "remaining_time": "0:40:29", "throughput": 12728.51, "total_tokens": 18311360}
|
|
{"current_steps": 5815, "total_steps": 15621, "loss": 0.4282, "lr": 1.5816940616437486e-06, "epoch": 0.37225529735612317, "percentage": 37.23, "elapsed_time": "0:23:59", "remaining_time": "0:40:27", "throughput": 12733.29, "total_tokens": 18326592}
|
|
{"current_steps": 5820, "total_steps": 15621, "loss": 0.3564, "lr": 1.5807848227915108e-06, "epoch": 0.37257537929710005, "percentage": 37.26, "elapsed_time": "0:23:59", "remaining_time": "0:40:24", "throughput": 12738.94, "total_tokens": 18344000}
|
|
{"current_steps": 5825, "total_steps": 15621, "loss": 0.4888, "lr": 1.5798748588226028e-06, "epoch": 0.37289546123807693, "percentage": 37.29, "elapsed_time": "0:24:00", "remaining_time": "0:40:22", "throughput": 12744.01, "total_tokens": 18359872}
|
|
{"current_steps": 5830, "total_steps": 15621, "loss": 0.472, "lr": 1.578964170873125e-06, "epoch": 0.3732155431790538, "percentage": 37.32, "elapsed_time": "0:24:01", "remaining_time": "0:40:20", "throughput": 12748.43, "total_tokens": 18374400}
|
|
{"current_steps": 5835, "total_steps": 15621, "loss": 0.2731, "lr": 1.5780527600800816e-06, "epoch": 0.37353562512003075, "percentage": 37.35, "elapsed_time": "0:24:01", "remaining_time": "0:40:18", "throughput": 12753.61, "total_tokens": 18390656}
|
|
{"current_steps": 5840, "total_steps": 15621, "loss": 0.4561, "lr": 1.5771406275813808e-06, "epoch": 0.3738557070610076, "percentage": 37.39, "elapsed_time": "0:24:02", "remaining_time": "0:40:16", "throughput": 12758.5, "total_tokens": 18406400}
|
|
{"current_steps": 5845, "total_steps": 15621, "loss": 0.5531, "lr": 1.5762277745158297e-06, "epoch": 0.3741757890019845, "percentage": 37.42, "elapsed_time": "0:24:03", "remaining_time": "0:40:14", "throughput": 12763.74, "total_tokens": 18422848}
|
|
{"current_steps": 5850, "total_steps": 15621, "loss": 0.5008, "lr": 1.5753142020231365e-06, "epoch": 0.3744958709429614, "percentage": 37.45, "elapsed_time": "0:24:04", "remaining_time": "0:40:11", "throughput": 12768.83, "total_tokens": 18438912}
|
|
{"current_steps": 5855, "total_steps": 15621, "loss": 0.5494, "lr": 1.5743999112439073e-06, "epoch": 0.37481595288393826, "percentage": 37.48, "elapsed_time": "0:24:04", "remaining_time": "0:40:09", "throughput": 12774.12, "total_tokens": 18455488}
|
|
{"current_steps": 5860, "total_steps": 15621, "loss": 0.4015, "lr": 1.5734849033196446e-06, "epoch": 0.3751360348249152, "percentage": 37.51, "elapsed_time": "0:24:05", "remaining_time": "0:40:07", "throughput": 12778.51, "total_tokens": 18470080}
|
|
{"current_steps": 5865, "total_steps": 15621, "loss": 0.4426, "lr": 1.5725691793927468e-06, "epoch": 0.3754561167658921, "percentage": 37.55, "elapsed_time": "0:24:06", "remaining_time": "0:40:05", "throughput": 12782.85, "total_tokens": 18484480}
|
|
{"current_steps": 5870, "total_steps": 15621, "loss": 0.4731, "lr": 1.5716527406065057e-06, "epoch": 0.37577619870686896, "percentage": 37.58, "elapsed_time": "0:24:06", "remaining_time": "0:40:03", "throughput": 12788.31, "total_tokens": 18501312}
|
|
{"current_steps": 5875, "total_steps": 15621, "loss": 0.4582, "lr": 1.570735588105106e-06, "epoch": 0.37609628064784584, "percentage": 37.61, "elapsed_time": "0:24:07", "remaining_time": "0:40:01", "throughput": 12792.79, "total_tokens": 18515968}
|
|
{"current_steps": 5880, "total_steps": 15621, "loss": 0.3808, "lr": 1.5698177230336234e-06, "epoch": 0.3764163625888227, "percentage": 37.64, "elapsed_time": "0:24:08", "remaining_time": "0:39:58", "throughput": 12797.45, "total_tokens": 18531200}
|
|
{"current_steps": 5885, "total_steps": 15621, "loss": 0.2686, "lr": 1.568899146538023e-06, "epoch": 0.37673644452979965, "percentage": 37.67, "elapsed_time": "0:24:08", "remaining_time": "0:39:56", "throughput": 12802.68, "total_tokens": 18547712}
|
|
{"current_steps": 5890, "total_steps": 15621, "loss": 0.4112, "lr": 1.5679798597651587e-06, "epoch": 0.37705652647077653, "percentage": 37.71, "elapsed_time": "0:24:09", "remaining_time": "0:39:54", "throughput": 12807.27, "total_tokens": 18562752}
|
|
{"current_steps": 5895, "total_steps": 15621, "loss": 0.4375, "lr": 1.5670598638627706e-06, "epoch": 0.3773766084117534, "percentage": 37.74, "elapsed_time": "0:24:10", "remaining_time": "0:39:52", "throughput": 12812.07, "total_tokens": 18578368}
|
|
{"current_steps": 5900, "total_steps": 15621, "loss": 0.3833, "lr": 1.5661391599794847e-06, "epoch": 0.3776966903527303, "percentage": 37.77, "elapsed_time": "0:24:10", "remaining_time": "0:39:50", "throughput": 12814.47, "total_tokens": 18593408}
|
|
{"current_steps": 5905, "total_steps": 15621, "loss": 0.4148, "lr": 1.56521774926481e-06, "epoch": 0.37801677229370717, "percentage": 37.8, "elapsed_time": "0:24:11", "remaining_time": "0:39:48", "throughput": 12818.86, "total_tokens": 18607872}
|
|
{"current_steps": 5910, "total_steps": 15621, "loss": 0.359, "lr": 1.5642956328691393e-06, "epoch": 0.3783368542346841, "percentage": 37.83, "elapsed_time": "0:24:12", "remaining_time": "0:39:46", "throughput": 12823.92, "total_tokens": 18624000}
|
|
{"current_steps": 5915, "total_steps": 15621, "loss": 0.5591, "lr": 1.5633728119437451e-06, "epoch": 0.378656936175661, "percentage": 37.87, "elapsed_time": "0:24:12", "remaining_time": "0:39:44", "throughput": 12829.29, "total_tokens": 18640704}
|
|
{"current_steps": 5920, "total_steps": 15621, "loss": 0.472, "lr": 1.5624492876407807e-06, "epoch": 0.37897701811663786, "percentage": 37.9, "elapsed_time": "0:24:13", "remaining_time": "0:39:42", "throughput": 12835.02, "total_tokens": 18658368}
|
|
{"current_steps": 5925, "total_steps": 15621, "loss": 0.411, "lr": 1.5615250611132766e-06, "epoch": 0.37929710005761474, "percentage": 37.93, "elapsed_time": "0:24:14", "remaining_time": "0:39:40", "throughput": 12840.55, "total_tokens": 18675584}
|
|
{"current_steps": 5930, "total_steps": 15621, "loss": 0.5683, "lr": 1.5606001335151405e-06, "epoch": 0.3796171819985916, "percentage": 37.96, "elapsed_time": "0:24:15", "remaining_time": "0:39:37", "throughput": 12845.66, "total_tokens": 18691904}
|
|
{"current_steps": 5935, "total_steps": 15621, "loss": 0.3734, "lr": 1.5596745060011561e-06, "epoch": 0.3799372639395685, "percentage": 37.99, "elapsed_time": "0:24:15", "remaining_time": "0:39:35", "throughput": 12851.0, "total_tokens": 18708736}
|
|
{"current_steps": 5940, "total_steps": 15621, "loss": 0.3492, "lr": 1.5587481797269793e-06, "epoch": 0.38025734588054544, "percentage": 38.03, "elapsed_time": "0:24:16", "remaining_time": "0:39:33", "throughput": 12855.63, "total_tokens": 18724032}
|
|
{"current_steps": 5945, "total_steps": 15621, "loss": 0.4266, "lr": 1.5578211558491396e-06, "epoch": 0.3805774278215223, "percentage": 38.06, "elapsed_time": "0:24:17", "remaining_time": "0:39:31", "throughput": 12860.71, "total_tokens": 18740352}
|
|
{"current_steps": 5950, "total_steps": 15621, "loss": 0.3346, "lr": 1.5568934355250375e-06, "epoch": 0.3808975097624992, "percentage": 38.09, "elapsed_time": "0:24:17", "remaining_time": "0:39:29", "throughput": 12864.92, "total_tokens": 18754560}
|
|
{"current_steps": 5955, "total_steps": 15621, "loss": 0.6693, "lr": 1.5559650199129423e-06, "epoch": 0.3812175917034761, "percentage": 38.12, "elapsed_time": "0:24:18", "remaining_time": "0:39:27", "throughput": 12869.39, "total_tokens": 18769280}
|
|
{"current_steps": 5960, "total_steps": 15621, "loss": 0.4131, "lr": 1.5550359101719921e-06, "epoch": 0.38153767364445296, "percentage": 38.15, "elapsed_time": "0:24:19", "remaining_time": "0:39:25", "throughput": 12874.03, "total_tokens": 18784512}
|
|
{"current_steps": 5965, "total_steps": 15621, "loss": 0.3615, "lr": 1.554106107462191e-06, "epoch": 0.3818577555854299, "percentage": 38.19, "elapsed_time": "0:24:19", "remaining_time": "0:39:23", "throughput": 12878.91, "total_tokens": 18800384}
|
|
{"current_steps": 5970, "total_steps": 15621, "loss": 0.4262, "lr": 1.5531756129444092e-06, "epoch": 0.38217783752640677, "percentage": 38.22, "elapsed_time": "0:24:20", "remaining_time": "0:39:20", "throughput": 12883.55, "total_tokens": 18815552}
|
|
{"current_steps": 5975, "total_steps": 15621, "loss": 0.4191, "lr": 1.5522444277803796e-06, "epoch": 0.38249791946738365, "percentage": 38.25, "elapsed_time": "0:24:21", "remaining_time": "0:39:18", "throughput": 12887.9, "total_tokens": 18830080}
|
|
{"current_steps": 5980, "total_steps": 15621, "loss": 0.4244, "lr": 1.5513125531326976e-06, "epoch": 0.38281800140836053, "percentage": 38.28, "elapsed_time": "0:24:21", "remaining_time": "0:39:16", "throughput": 12892.91, "total_tokens": 18846272}
|
|
{"current_steps": 5985, "total_steps": 15621, "loss": 0.3802, "lr": 1.5503799901648198e-06, "epoch": 0.3831380833493374, "percentage": 38.31, "elapsed_time": "0:24:22", "remaining_time": "0:39:14", "throughput": 12897.27, "total_tokens": 18860928}
|
|
{"current_steps": 5990, "total_steps": 15621, "loss": 0.4461, "lr": 1.5494467400410625e-06, "epoch": 0.38345816529031435, "percentage": 38.35, "elapsed_time": "0:24:23", "remaining_time": "0:39:12", "throughput": 12902.2, "total_tokens": 18877120}
|
|
{"current_steps": 5995, "total_steps": 15621, "loss": 0.6047, "lr": 1.5485128039265986e-06, "epoch": 0.3837782472312912, "percentage": 38.38, "elapsed_time": "0:24:23", "remaining_time": "0:39:10", "throughput": 12906.78, "total_tokens": 18892224}
|
|
{"current_steps": 6000, "total_steps": 15621, "loss": 0.445, "lr": 1.547578182987459e-06, "epoch": 0.3840983291722681, "percentage": 38.41, "elapsed_time": "0:24:24", "remaining_time": "0:39:08", "throughput": 12911.2, "total_tokens": 18907008}
|
|
{"current_steps": 6005, "total_steps": 15621, "loss": 0.2856, "lr": 1.5466428783905286e-06, "epoch": 0.384418411113245, "percentage": 38.44, "elapsed_time": "0:24:25", "remaining_time": "0:39:06", "throughput": 12915.85, "total_tokens": 18922368}
|
|
{"current_steps": 6010, "total_steps": 15621, "loss": 0.4418, "lr": 1.5457068913035463e-06, "epoch": 0.38473849305422186, "percentage": 38.47, "elapsed_time": "0:24:25", "remaining_time": "0:39:03", "throughput": 12920.41, "total_tokens": 18937536}
|
|
{"current_steps": 6015, "total_steps": 15621, "loss": 0.5024, "lr": 1.544770222895103e-06, "epoch": 0.38505857499519874, "percentage": 38.51, "elapsed_time": "0:24:26", "remaining_time": "0:39:01", "throughput": 12925.54, "total_tokens": 18954048}
|
|
{"current_steps": 6020, "total_steps": 15621, "loss": 0.5102, "lr": 1.5438328743346398e-06, "epoch": 0.3853786569361757, "percentage": 38.54, "elapsed_time": "0:24:27", "remaining_time": "0:38:59", "throughput": 12930.18, "total_tokens": 18969472}
|
|
{"current_steps": 6025, "total_steps": 15621, "loss": 0.4192, "lr": 1.5428948467924478e-06, "epoch": 0.38569873887715256, "percentage": 38.57, "elapsed_time": "0:24:27", "remaining_time": "0:38:57", "throughput": 12934.44, "total_tokens": 18983872}
|
|
{"current_steps": 6030, "total_steps": 15621, "loss": 0.3268, "lr": 1.5419561414396656e-06, "epoch": 0.38601882081812944, "percentage": 38.6, "elapsed_time": "0:24:28", "remaining_time": "0:38:55", "throughput": 12939.08, "total_tokens": 18999360}
|
|
{"current_steps": 6035, "total_steps": 15621, "loss": 0.4969, "lr": 1.541016759448277e-06, "epoch": 0.3863389027591063, "percentage": 38.63, "elapsed_time": "0:24:29", "remaining_time": "0:38:53", "throughput": 12943.96, "total_tokens": 19015424}
|
|
{"current_steps": 6040, "total_steps": 15621, "loss": 0.3775, "lr": 1.5400767019911124e-06, "epoch": 0.3866589847000832, "percentage": 38.67, "elapsed_time": "0:24:29", "remaining_time": "0:38:51", "throughput": 12948.89, "total_tokens": 19031616}
|
|
{"current_steps": 6045, "total_steps": 15621, "loss": 0.4886, "lr": 1.539135970241844e-06, "epoch": 0.38697906664106013, "percentage": 38.7, "elapsed_time": "0:24:30", "remaining_time": "0:38:49", "throughput": 12953.54, "total_tokens": 19047040}
|
|
{"current_steps": 6050, "total_steps": 15621, "loss": 0.4842, "lr": 1.5381945653749866e-06, "epoch": 0.387299148582037, "percentage": 38.73, "elapsed_time": "0:24:31", "remaining_time": "0:38:47", "throughput": 12958.34, "total_tokens": 19062848}
|
|
{"current_steps": 6055, "total_steps": 15621, "loss": 0.5516, "lr": 1.5372524885658952e-06, "epoch": 0.3876192305230139, "percentage": 38.76, "elapsed_time": "0:24:31", "remaining_time": "0:38:45", "throughput": 12963.32, "total_tokens": 19078976}
|
|
{"current_steps": 6060, "total_steps": 15621, "loss": 0.3732, "lr": 1.5363097409907638e-06, "epoch": 0.38793931246399077, "percentage": 38.79, "elapsed_time": "0:24:32", "remaining_time": "0:38:43", "throughput": 12967.64, "total_tokens": 19093632}
|
|
{"current_steps": 6065, "total_steps": 15621, "loss": 0.3583, "lr": 1.535366323826624e-06, "epoch": 0.38825939440496765, "percentage": 38.83, "elapsed_time": "0:24:33", "remaining_time": "0:38:40", "throughput": 12972.25, "total_tokens": 19109056}
|
|
{"current_steps": 6070, "total_steps": 15621, "loss": 0.3623, "lr": 1.534422238251343e-06, "epoch": 0.3885794763459446, "percentage": 38.86, "elapsed_time": "0:24:33", "remaining_time": "0:38:38", "throughput": 12976.87, "total_tokens": 19124544}
|
|
{"current_steps": 6075, "total_steps": 15621, "loss": 0.3844, "lr": 1.5334774854436223e-06, "epoch": 0.38889955828692147, "percentage": 38.89, "elapsed_time": "0:24:34", "remaining_time": "0:38:36", "throughput": 12981.69, "total_tokens": 19140480}
|
|
{"current_steps": 6080, "total_steps": 15621, "loss": 0.378, "lr": 1.5325320665829975e-06, "epoch": 0.38921964022789834, "percentage": 38.92, "elapsed_time": "0:24:35", "remaining_time": "0:38:34", "throughput": 12986.61, "total_tokens": 19156736}
|
|
{"current_steps": 6085, "total_steps": 15621, "loss": 0.4624, "lr": 1.5315859828498352e-06, "epoch": 0.3895397221688752, "percentage": 38.95, "elapsed_time": "0:24:35", "remaining_time": "0:38:32", "throughput": 12990.95, "total_tokens": 19171520}
|
|
{"current_steps": 6090, "total_steps": 15621, "loss": 0.5057, "lr": 1.5306392354253316e-06, "epoch": 0.3898598041098521, "percentage": 38.99, "elapsed_time": "0:24:36", "remaining_time": "0:38:30", "throughput": 12995.65, "total_tokens": 19187136}
|
|
{"current_steps": 6095, "total_steps": 15621, "loss": 0.4389, "lr": 1.5296918254915123e-06, "epoch": 0.39017988605082904, "percentage": 39.02, "elapsed_time": "0:24:37", "remaining_time": "0:38:28", "throughput": 12999.97, "total_tokens": 19201856}
|
|
{"current_steps": 6100, "total_steps": 15621, "loss": 0.3827, "lr": 1.5287437542312296e-06, "epoch": 0.3904999679918059, "percentage": 39.05, "elapsed_time": "0:24:37", "remaining_time": "0:38:26", "throughput": 13004.29, "total_tokens": 19216704}
|
|
{"current_steps": 6105, "total_steps": 15621, "loss": 0.5423, "lr": 1.5277950228281614e-06, "epoch": 0.3908200499327828, "percentage": 39.08, "elapsed_time": "0:24:38", "remaining_time": "0:38:24", "throughput": 13009.49, "total_tokens": 19233408}
|
|
{"current_steps": 6110, "total_steps": 15621, "loss": 0.3617, "lr": 1.52684563246681e-06, "epoch": 0.3911401318737597, "percentage": 39.11, "elapsed_time": "0:24:39", "remaining_time": "0:38:22", "throughput": 13014.62, "total_tokens": 19250048}
|
|
{"current_steps": 6115, "total_steps": 15621, "loss": 0.4241, "lr": 1.5258955843325015e-06, "epoch": 0.39146021381473656, "percentage": 39.15, "elapsed_time": "0:24:39", "remaining_time": "0:38:20", "throughput": 13019.68, "total_tokens": 19266560}
|
|
{"current_steps": 6120, "total_steps": 15621, "loss": 0.5018, "lr": 1.5249448796113804e-06, "epoch": 0.39178029575571344, "percentage": 39.18, "elapsed_time": "0:24:40", "remaining_time": "0:38:18", "throughput": 13024.02, "total_tokens": 19281408}
|
|
{"current_steps": 6125, "total_steps": 15621, "loss": 0.4797, "lr": 1.5239935194904141e-06, "epoch": 0.39210037769669037, "percentage": 39.21, "elapsed_time": "0:24:41", "remaining_time": "0:38:16", "throughput": 13028.4, "total_tokens": 19296384}
|
|
{"current_steps": 6130, "total_steps": 15621, "loss": 0.3946, "lr": 1.523041505157386e-06, "epoch": 0.39242045963766725, "percentage": 39.24, "elapsed_time": "0:24:41", "remaining_time": "0:38:14", "throughput": 13033.04, "total_tokens": 19312000}
|
|
{"current_steps": 6135, "total_steps": 15621, "loss": 0.395, "lr": 1.5220888378008977e-06, "epoch": 0.39274054157864413, "percentage": 39.27, "elapsed_time": "0:24:42", "remaining_time": "0:38:12", "throughput": 13037.64, "total_tokens": 19327488}
|
|
{"current_steps": 6140, "total_steps": 15621, "loss": 0.4748, "lr": 1.5211355186103654e-06, "epoch": 0.393060623519621, "percentage": 39.31, "elapsed_time": "0:24:43", "remaining_time": "0:38:10", "throughput": 13041.89, "total_tokens": 19342080}
|
|
{"current_steps": 6145, "total_steps": 15621, "loss": 0.4435, "lr": 1.5201815487760192e-06, "epoch": 0.3933807054605979, "percentage": 39.34, "elapsed_time": "0:24:43", "remaining_time": "0:38:08", "throughput": 13046.79, "total_tokens": 19358336}
|
|
{"current_steps": 6150, "total_steps": 15621, "loss": 0.5032, "lr": 1.5192269294889019e-06, "epoch": 0.3937007874015748, "percentage": 39.37, "elapsed_time": "0:24:44", "remaining_time": "0:38:05", "throughput": 13051.22, "total_tokens": 19373376}
|
|
{"current_steps": 6155, "total_steps": 15621, "loss": 0.4021, "lr": 1.5182716619408666e-06, "epoch": 0.3940208693425517, "percentage": 39.4, "elapsed_time": "0:24:45", "remaining_time": "0:38:03", "throughput": 13055.7, "total_tokens": 19388608}
|
|
{"current_steps": 6160, "total_steps": 15621, "loss": 0.5383, "lr": 1.5173157473245764e-06, "epoch": 0.3943409512835286, "percentage": 39.43, "elapsed_time": "0:24:45", "remaining_time": "0:38:01", "throughput": 13059.92, "total_tokens": 19403264}
|
|
{"current_steps": 6165, "total_steps": 15621, "loss": 0.4397, "lr": 1.5163591868335016e-06, "epoch": 0.39466103322450546, "percentage": 39.47, "elapsed_time": "0:24:46", "remaining_time": "0:37:59", "throughput": 13064.47, "total_tokens": 19418816}
|
|
{"current_steps": 6170, "total_steps": 15621, "loss": 0.5856, "lr": 1.515401981661919e-06, "epoch": 0.39498111516548234, "percentage": 39.5, "elapsed_time": "0:24:47", "remaining_time": "0:37:57", "throughput": 13069.47, "total_tokens": 19435392}
|
|
{"current_steps": 6175, "total_steps": 15621, "loss": 0.4567, "lr": 1.514444133004911e-06, "epoch": 0.3953011971064593, "percentage": 39.53, "elapsed_time": "0:24:47", "remaining_time": "0:37:55", "throughput": 13073.7, "total_tokens": 19450048}
|
|
{"current_steps": 6180, "total_steps": 15621, "loss": 0.465, "lr": 1.5134856420583631e-06, "epoch": 0.39562127904743616, "percentage": 39.56, "elapsed_time": "0:24:48", "remaining_time": "0:37:53", "throughput": 13078.64, "total_tokens": 19466368}
|
|
{"current_steps": 6185, "total_steps": 15621, "loss": 0.34, "lr": 1.5125265100189614e-06, "epoch": 0.39594136098841304, "percentage": 39.59, "elapsed_time": "0:24:49", "remaining_time": "0:37:51", "throughput": 13083.48, "total_tokens": 19482624}
|
|
{"current_steps": 6190, "total_steps": 15621, "loss": 0.5382, "lr": 1.5115667380841948e-06, "epoch": 0.3962614429293899, "percentage": 39.63, "elapsed_time": "0:24:49", "remaining_time": "0:37:49", "throughput": 13088.04, "total_tokens": 19498048}
|
|
{"current_steps": 6195, "total_steps": 15621, "loss": 0.4413, "lr": 1.510606327452349e-06, "epoch": 0.3965815248703668, "percentage": 39.66, "elapsed_time": "0:24:50", "remaining_time": "0:37:47", "throughput": 13093.22, "total_tokens": 19515264}
|
|
{"current_steps": 6200, "total_steps": 15621, "loss": 0.4267, "lr": 1.5096452793225082e-06, "epoch": 0.3969016068113437, "percentage": 39.69, "elapsed_time": "0:24:51", "remaining_time": "0:37:45", "throughput": 13098.69, "total_tokens": 19533056}
|
|
{"current_steps": 6205, "total_steps": 15621, "loss": 0.3994, "lr": 1.5086835948945522e-06, "epoch": 0.3972216887523206, "percentage": 39.72, "elapsed_time": "0:24:51", "remaining_time": "0:37:43", "throughput": 13103.18, "total_tokens": 19548480}
|
|
{"current_steps": 6210, "total_steps": 15621, "loss": 0.3462, "lr": 1.5077212753691556e-06, "epoch": 0.3975417706932975, "percentage": 39.75, "elapsed_time": "0:24:52", "remaining_time": "0:37:41", "throughput": 13107.62, "total_tokens": 19563712}
|
|
{"current_steps": 6215, "total_steps": 15621, "loss": 0.41, "lr": 1.5067583219477852e-06, "epoch": 0.39786185263427437, "percentage": 39.79, "elapsed_time": "0:24:53", "remaining_time": "0:37:39", "throughput": 13111.95, "total_tokens": 19578624}
|
|
{"current_steps": 6220, "total_steps": 15621, "loss": 0.3926, "lr": 1.5057947358327e-06, "epoch": 0.39818193457525125, "percentage": 39.82, "elapsed_time": "0:24:53", "remaining_time": "0:37:37", "throughput": 13116.17, "total_tokens": 19593408}
|
|
{"current_steps": 6225, "total_steps": 15621, "loss": 0.5044, "lr": 1.504830518226948e-06, "epoch": 0.39850201651622813, "percentage": 39.85, "elapsed_time": "0:24:54", "remaining_time": "0:37:35", "throughput": 13120.88, "total_tokens": 19609216}
|
|
{"current_steps": 6230, "total_steps": 15621, "loss": 0.4468, "lr": 1.5038656703343672e-06, "epoch": 0.39882209845720507, "percentage": 39.88, "elapsed_time": "0:24:55", "remaining_time": "0:37:33", "throughput": 13125.42, "total_tokens": 19624896}
|
|
{"current_steps": 6235, "total_steps": 15621, "loss": 0.5125, "lr": 1.5029001933595805e-06, "epoch": 0.39914218039818194, "percentage": 39.91, "elapsed_time": "0:24:55", "remaining_time": "0:37:31", "throughput": 13129.86, "total_tokens": 19640128}
|
|
{"current_steps": 6240, "total_steps": 15621, "loss": 0.3482, "lr": 1.501934088507998e-06, "epoch": 0.3994622623391588, "percentage": 39.95, "elapsed_time": "0:24:56", "remaining_time": "0:37:29", "throughput": 13134.46, "total_tokens": 19655680}
|
|
{"current_steps": 6245, "total_steps": 15621, "loss": 0.6246, "lr": 1.5009673569858126e-06, "epoch": 0.3997823442801357, "percentage": 39.98, "elapsed_time": "0:24:57", "remaining_time": "0:37:27", "throughput": 13139.36, "total_tokens": 19672192}
|
|
{"current_steps": 6250, "total_steps": 15621, "loss": 0.534, "lr": 1.5e-06, "epoch": 0.4001024262211126, "percentage": 40.01, "elapsed_time": "0:24:57", "remaining_time": "0:37:25", "throughput": 13144.35, "total_tokens": 19688896}
|
|
{"current_steps": 6255, "total_steps": 15621, "loss": 0.3556, "lr": 1.4990320187583167e-06, "epoch": 0.4004225081620895, "percentage": 40.04, "elapsed_time": "0:24:58", "remaining_time": "0:37:23", "throughput": 13148.79, "total_tokens": 19704128}
|
|
{"current_steps": 6256, "total_steps": 15621, "eval_loss": 0.4279458224773407, "epoch": 0.4004865245502849, "percentage": 40.05, "elapsed_time": "0:25:49", "remaining_time": "0:38:39", "throughput": 12720.35, "total_tokens": 19707456}
|
|
{"current_steps": 6260, "total_steps": 15621, "loss": 0.3913, "lr": 1.4980634144692986e-06, "epoch": 0.4007425901030664, "percentage": 40.07, "elapsed_time": "0:26:29", "remaining_time": "0:39:36", "throughput": 12408.36, "total_tokens": 19719744}
|
|
{"current_steps": 6265, "total_steps": 15621, "loss": 0.3734, "lr": 1.4970941883422599e-06, "epoch": 0.4010626720440433, "percentage": 40.11, "elapsed_time": "0:26:29", "remaining_time": "0:39:34", "throughput": 12413.32, "total_tokens": 19736128}
|
|
{"current_steps": 6270, "total_steps": 15621, "loss": 0.4286, "lr": 1.4961243415872901e-06, "epoch": 0.40138275398502016, "percentage": 40.14, "elapsed_time": "0:26:30", "remaining_time": "0:39:32", "throughput": 12417.7, "total_tokens": 19751296}
|
|
{"current_steps": 6275, "total_steps": 15621, "loss": 0.3958, "lr": 1.4951538754152551e-06, "epoch": 0.40170283592599704, "percentage": 40.17, "elapsed_time": "0:26:31", "remaining_time": "0:39:29", "throughput": 12421.88, "total_tokens": 19765888}
|
|
{"current_steps": 6280, "total_steps": 15621, "loss": 0.4227, "lr": 1.4941827910377925e-06, "epoch": 0.402022917866974, "percentage": 40.2, "elapsed_time": "0:26:31", "remaining_time": "0:39:27", "throughput": 12426.17, "total_tokens": 19780864}
|
|
{"current_steps": 6285, "total_steps": 15621, "loss": 0.3978, "lr": 1.4932110896673131e-06, "epoch": 0.40234299980795085, "percentage": 40.23, "elapsed_time": "0:26:32", "remaining_time": "0:39:25", "throughput": 12430.92, "total_tokens": 19796864}
|
|
{"current_steps": 6290, "total_steps": 15621, "loss": 0.5383, "lr": 1.4922387725169973e-06, "epoch": 0.40266308174892773, "percentage": 40.27, "elapsed_time": "0:26:33", "remaining_time": "0:39:23", "throughput": 12435.28, "total_tokens": 19811904}
|
|
{"current_steps": 6295, "total_steps": 15621, "loss": 0.418, "lr": 1.4912658408007947e-06, "epoch": 0.4029831636899046, "percentage": 40.3, "elapsed_time": "0:26:33", "remaining_time": "0:39:21", "throughput": 12439.81, "total_tokens": 19827456}
|
|
{"current_steps": 6300, "total_steps": 15621, "loss": 0.4194, "lr": 1.4902922957334215e-06, "epoch": 0.4033032456308815, "percentage": 40.33, "elapsed_time": "0:26:34", "remaining_time": "0:39:19", "throughput": 12444.16, "total_tokens": 19842496}
|
|
{"current_steps": 6305, "total_steps": 15621, "loss": 0.4186, "lr": 1.4893181385303608e-06, "epoch": 0.40362332757185837, "percentage": 40.36, "elapsed_time": "0:26:35", "remaining_time": "0:39:16", "throughput": 12448.75, "total_tokens": 19858240}
|
|
{"current_steps": 6310, "total_steps": 15621, "loss": 0.4262, "lr": 1.4883433704078584e-06, "epoch": 0.4039434095128353, "percentage": 40.39, "elapsed_time": "0:26:35", "remaining_time": "0:39:14", "throughput": 12453.58, "total_tokens": 19874368}
|
|
{"current_steps": 6315, "total_steps": 15621, "loss": 0.3986, "lr": 1.4873679925829246e-06, "epoch": 0.4042634914538122, "percentage": 40.43, "elapsed_time": "0:26:36", "remaining_time": "0:39:12", "throughput": 12458.84, "total_tokens": 19891904}
|
|
{"current_steps": 6320, "total_steps": 15621, "loss": 0.4157, "lr": 1.4863920062733298e-06, "epoch": 0.40458357339478906, "percentage": 40.46, "elapsed_time": "0:26:37", "remaining_time": "0:39:10", "throughput": 12463.32, "total_tokens": 19907392}
|
|
{"current_steps": 6325, "total_steps": 15621, "loss": 0.3822, "lr": 1.485415412697604e-06, "epoch": 0.40490365533576594, "percentage": 40.49, "elapsed_time": "0:26:37", "remaining_time": "0:39:08", "throughput": 12467.76, "total_tokens": 19922624}
|
|
{"current_steps": 6330, "total_steps": 15621, "loss": 0.4286, "lr": 1.484438213075036e-06, "epoch": 0.4052237372767428, "percentage": 40.52, "elapsed_time": "0:26:38", "remaining_time": "0:39:06", "throughput": 12472.73, "total_tokens": 19939328}
|
|
{"current_steps": 6335, "total_steps": 15621, "loss": 0.4412, "lr": 1.4834604086256713e-06, "epoch": 0.40554381921771976, "percentage": 40.55, "elapsed_time": "0:26:39", "remaining_time": "0:39:04", "throughput": 12477.45, "total_tokens": 19955392}
|
|
{"current_steps": 6340, "total_steps": 15621, "loss": 0.401, "lr": 1.4824820005703097e-06, "epoch": 0.40586390115869664, "percentage": 40.59, "elapsed_time": "0:26:39", "remaining_time": "0:39:02", "throughput": 12482.22, "total_tokens": 19971520}
|
|
{"current_steps": 6345, "total_steps": 15621, "loss": 0.448, "lr": 1.4815029901305061e-06, "epoch": 0.4061839830996735, "percentage": 40.62, "elapsed_time": "0:26:40", "remaining_time": "0:39:00", "throughput": 12487.24, "total_tokens": 19988352}
|
|
{"current_steps": 6350, "total_steps": 15621, "loss": 0.4706, "lr": 1.480523378528565e-06, "epoch": 0.4065040650406504, "percentage": 40.65, "elapsed_time": "0:26:41", "remaining_time": "0:38:58", "throughput": 12492.23, "total_tokens": 20005184}
|
|
{"current_steps": 6355, "total_steps": 15621, "loss": 0.4379, "lr": 1.4795431669875441e-06, "epoch": 0.4068241469816273, "percentage": 40.68, "elapsed_time": "0:26:42", "remaining_time": "0:38:55", "throughput": 12496.75, "total_tokens": 20020800}
|
|
{"current_steps": 6360, "total_steps": 15621, "loss": 0.475, "lr": 1.478562356731249e-06, "epoch": 0.4071442289226042, "percentage": 40.71, "elapsed_time": "0:26:42", "remaining_time": "0:38:53", "throughput": 12501.28, "total_tokens": 20036416}
|
|
{"current_steps": 6365, "total_steps": 15621, "loss": 0.4608, "lr": 1.4775809489842326e-06, "epoch": 0.4074643108635811, "percentage": 40.75, "elapsed_time": "0:26:43", "remaining_time": "0:38:51", "throughput": 12506.24, "total_tokens": 20053184}
|
|
{"current_steps": 6370, "total_steps": 15621, "loss": 0.3944, "lr": 1.4765989449717937e-06, "epoch": 0.40778439280455797, "percentage": 40.78, "elapsed_time": "0:26:44", "remaining_time": "0:38:49", "throughput": 12511.17, "total_tokens": 20069888}
|
|
{"current_steps": 6375, "total_steps": 15621, "loss": 0.534, "lr": 1.4756163459199763e-06, "epoch": 0.40810447474553485, "percentage": 40.81, "elapsed_time": "0:26:44", "remaining_time": "0:38:47", "throughput": 12515.76, "total_tokens": 20085760}
|
|
{"current_steps": 6380, "total_steps": 15621, "loss": 0.2694, "lr": 1.4746331530555665e-06, "epoch": 0.40842455668651173, "percentage": 40.84, "elapsed_time": "0:26:45", "remaining_time": "0:38:45", "throughput": 12520.18, "total_tokens": 20101056}
|
|
{"current_steps": 6385, "total_steps": 15621, "loss": 0.4114, "lr": 1.4736493676060923e-06, "epoch": 0.4087446386274886, "percentage": 40.87, "elapsed_time": "0:26:46", "remaining_time": "0:38:43", "throughput": 12524.48, "total_tokens": 20116352}
|
|
{"current_steps": 6390, "total_steps": 15621, "loss": 0.3752, "lr": 1.4726649907998216e-06, "epoch": 0.40906472056846555, "percentage": 40.91, "elapsed_time": "0:26:46", "remaining_time": "0:38:41", "throughput": 12528.86, "total_tokens": 20131712}
|
|
{"current_steps": 6395, "total_steps": 15621, "loss": 0.3816, "lr": 1.4716800238657599e-06, "epoch": 0.4093848025094424, "percentage": 40.94, "elapsed_time": "0:26:47", "remaining_time": "0:38:39", "throughput": 12533.19, "total_tokens": 20146880}
|
|
{"current_steps": 6400, "total_steps": 15621, "loss": 0.285, "lr": 1.4706944680336505e-06, "epoch": 0.4097048844504193, "percentage": 40.97, "elapsed_time": "0:26:48", "remaining_time": "0:38:37", "throughput": 12538.07, "total_tokens": 20163520}
|
|
{"current_steps": 6405, "total_steps": 15621, "loss": 0.4656, "lr": 1.469708324533971e-06, "epoch": 0.4100249663913962, "percentage": 41.0, "elapsed_time": "0:26:48", "remaining_time": "0:38:34", "throughput": 12542.08, "total_tokens": 20177984}
|
|
{"current_steps": 6410, "total_steps": 15621, "loss": 0.3425, "lr": 1.4687215945979335e-06, "epoch": 0.41034504833237306, "percentage": 41.03, "elapsed_time": "0:26:49", "remaining_time": "0:38:32", "throughput": 12546.52, "total_tokens": 20193472}
|
|
{"current_steps": 6415, "total_steps": 15621, "loss": 0.4557, "lr": 1.4677342794574815e-06, "epoch": 0.41066513027335, "percentage": 41.07, "elapsed_time": "0:26:50", "remaining_time": "0:38:30", "throughput": 12551.64, "total_tokens": 20210624}
|
|
{"current_steps": 6420, "total_steps": 15621, "loss": 0.4171, "lr": 1.4667463803452902e-06, "epoch": 0.4109852122143269, "percentage": 41.1, "elapsed_time": "0:26:50", "remaining_time": "0:38:28", "throughput": 12556.28, "total_tokens": 20226688}
|
|
{"current_steps": 6425, "total_steps": 15621, "loss": 0.4553, "lr": 1.4657578984947627e-06, "epoch": 0.41130529415530376, "percentage": 41.13, "elapsed_time": "0:26:51", "remaining_time": "0:38:26", "throughput": 12561.66, "total_tokens": 20244608}
|
|
{"current_steps": 6430, "total_steps": 15621, "loss": 0.3597, "lr": 1.4647688351400303e-06, "epoch": 0.41162537609628064, "percentage": 41.16, "elapsed_time": "0:26:52", "remaining_time": "0:38:24", "throughput": 12566.43, "total_tokens": 20261184}
|
|
{"current_steps": 6435, "total_steps": 15621, "loss": 0.3288, "lr": 1.46377919151595e-06, "epoch": 0.4119454580372575, "percentage": 41.19, "elapsed_time": "0:26:53", "remaining_time": "0:38:22", "throughput": 12570.82, "total_tokens": 20276736}
|
|
{"current_steps": 6440, "total_steps": 15621, "loss": 0.47, "lr": 1.462788968858104e-06, "epoch": 0.41226553997823445, "percentage": 41.23, "elapsed_time": "0:26:53", "remaining_time": "0:38:20", "throughput": 12575.82, "total_tokens": 20293888}
|
|
{"current_steps": 6445, "total_steps": 15621, "loss": 0.4858, "lr": 1.4617981684027966e-06, "epoch": 0.41258562191921133, "percentage": 41.26, "elapsed_time": "0:26:54", "remaining_time": "0:38:18", "throughput": 12580.31, "total_tokens": 20309696}
|
|
{"current_steps": 6450, "total_steps": 15621, "loss": 0.3958, "lr": 1.4608067913870536e-06, "epoch": 0.4129057038601882, "percentage": 41.29, "elapsed_time": "0:26:55", "remaining_time": "0:38:16", "throughput": 12584.85, "total_tokens": 20325632}
|
|
{"current_steps": 6455, "total_steps": 15621, "loss": 0.3994, "lr": 1.4598148390486213e-06, "epoch": 0.4132257858011651, "percentage": 41.32, "elapsed_time": "0:26:55", "remaining_time": "0:38:14", "throughput": 12589.53, "total_tokens": 20341888}
|
|
{"current_steps": 6460, "total_steps": 15621, "loss": 0.5083, "lr": 1.4588223126259639e-06, "epoch": 0.41354586774214197, "percentage": 41.35, "elapsed_time": "0:26:56", "remaining_time": "0:38:12", "throughput": 12594.44, "total_tokens": 20358656}
|
|
{"current_steps": 6465, "total_steps": 15621, "loss": 0.3307, "lr": 1.4578292133582615e-06, "epoch": 0.4138659496831189, "percentage": 41.39, "elapsed_time": "0:26:57", "remaining_time": "0:38:10", "throughput": 12598.31, "total_tokens": 20372864}
|
|
{"current_steps": 6470, "total_steps": 15621, "loss": 0.3876, "lr": 1.456835542485411e-06, "epoch": 0.4141860316240958, "percentage": 41.42, "elapsed_time": "0:26:57", "remaining_time": "0:38:08", "throughput": 12602.43, "total_tokens": 20387840}
|
|
{"current_steps": 6475, "total_steps": 15621, "loss": 0.4136, "lr": 1.4558413012480215e-06, "epoch": 0.41450611356507266, "percentage": 41.45, "elapsed_time": "0:26:58", "remaining_time": "0:38:06", "throughput": 12607.32, "total_tokens": 20404736}
|
|
{"current_steps": 6480, "total_steps": 15621, "loss": 0.5707, "lr": 1.4548464908874156e-06, "epoch": 0.41482619550604954, "percentage": 41.48, "elapsed_time": "0:26:59", "remaining_time": "0:38:04", "throughput": 12612.71, "total_tokens": 20422848}
|
|
{"current_steps": 6485, "total_steps": 15621, "loss": 0.4077, "lr": 1.4538511126456255e-06, "epoch": 0.4151462774470264, "percentage": 41.51, "elapsed_time": "0:26:59", "remaining_time": "0:38:02", "throughput": 12616.9, "total_tokens": 20438016}
|
|
{"current_steps": 6490, "total_steps": 15621, "loss": 0.5888, "lr": 1.452855167765392e-06, "epoch": 0.4154663593880033, "percentage": 41.55, "elapsed_time": "0:27:00", "remaining_time": "0:38:00", "throughput": 12621.61, "total_tokens": 20454464}
|
|
{"current_steps": 6495, "total_steps": 15621, "loss": 0.4553, "lr": 1.4518586574901647e-06, "epoch": 0.41578644132898024, "percentage": 41.58, "elapsed_time": "0:27:01", "remaining_time": "0:37:58", "throughput": 12626.17, "total_tokens": 20470464}
|
|
{"current_steps": 6500, "total_steps": 15621, "loss": 0.4639, "lr": 1.450861583064098e-06, "epoch": 0.4161065232699571, "percentage": 41.61, "elapsed_time": "0:27:01", "remaining_time": "0:37:55", "throughput": 12630.44, "total_tokens": 20485696}
|
|
{"current_steps": 6505, "total_steps": 15621, "loss": 0.352, "lr": 1.4498639457320515e-06, "epoch": 0.416426605210934, "percentage": 41.64, "elapsed_time": "0:27:02", "remaining_time": "0:37:53", "throughput": 12634.53, "total_tokens": 20500608}
|
|
{"current_steps": 6510, "total_steps": 15621, "loss": 0.4715, "lr": 1.4488657467395865e-06, "epoch": 0.4167466871519109, "percentage": 41.67, "elapsed_time": "0:27:03", "remaining_time": "0:37:51", "throughput": 12638.74, "total_tokens": 20515776}
|
|
{"current_steps": 6515, "total_steps": 15621, "loss": 0.5086, "lr": 1.4478669873329663e-06, "epoch": 0.41706676909288776, "percentage": 41.71, "elapsed_time": "0:27:03", "remaining_time": "0:37:49", "throughput": 12643.15, "total_tokens": 20531456}
|
|
{"current_steps": 6520, "total_steps": 15621, "loss": 0.3953, "lr": 1.4468676687591536e-06, "epoch": 0.4173868510338647, "percentage": 41.74, "elapsed_time": "0:27:04", "remaining_time": "0:37:47", "throughput": 12647.51, "total_tokens": 20547200}
|
|
{"current_steps": 6525, "total_steps": 15621, "loss": 0.4326, "lr": 1.4458677922658104e-06, "epoch": 0.41770693297484157, "percentage": 41.77, "elapsed_time": "0:27:05", "remaining_time": "0:37:45", "throughput": 12651.74, "total_tokens": 20562560}
|
|
{"current_steps": 6530, "total_steps": 15621, "loss": 0.2884, "lr": 1.444867359101293e-06, "epoch": 0.41802701491581845, "percentage": 41.8, "elapsed_time": "0:27:05", "remaining_time": "0:37:43", "throughput": 12655.76, "total_tokens": 20577344}
|
|
{"current_steps": 6535, "total_steps": 15621, "loss": 0.3541, "lr": 1.4438663705146545e-06, "epoch": 0.41834709685679533, "percentage": 41.83, "elapsed_time": "0:27:06", "remaining_time": "0:37:41", "throughput": 12660.11, "total_tokens": 20593088}
|
|
{"current_steps": 6540, "total_steps": 15621, "loss": 0.3645, "lr": 1.442864827755641e-06, "epoch": 0.4186671787977722, "percentage": 41.87, "elapsed_time": "0:27:07", "remaining_time": "0:37:39", "throughput": 12664.9, "total_tokens": 20609792}
|
|
{"current_steps": 6545, "total_steps": 15621, "loss": 0.4507, "lr": 1.4418627320746901e-06, "epoch": 0.41898726073874915, "percentage": 41.9, "elapsed_time": "0:27:07", "remaining_time": "0:37:37", "throughput": 12669.2, "total_tokens": 20625280}
|
|
{"current_steps": 6550, "total_steps": 15621, "loss": 0.3912, "lr": 1.4408600847229304e-06, "epoch": 0.419307342679726, "percentage": 41.93, "elapsed_time": "0:27:08", "remaining_time": "0:37:35", "throughput": 12673.98, "total_tokens": 20641984}
|
|
{"current_steps": 6555, "total_steps": 15621, "loss": 0.5483, "lr": 1.4398568869521782e-06, "epoch": 0.4196274246207029, "percentage": 41.96, "elapsed_time": "0:27:09", "remaining_time": "0:37:33", "throughput": 12678.57, "total_tokens": 20658240}
|
|
{"current_steps": 6560, "total_steps": 15621, "loss": 0.3603, "lr": 1.4388531400149384e-06, "epoch": 0.4199475065616798, "percentage": 41.99, "elapsed_time": "0:27:10", "remaining_time": "0:37:31", "throughput": 12682.78, "total_tokens": 20673408}
|
|
{"current_steps": 6565, "total_steps": 15621, "loss": 0.3865, "lr": 1.4378488451644007e-06, "epoch": 0.42026758850265666, "percentage": 42.03, "elapsed_time": "0:27:10", "remaining_time": "0:37:29", "throughput": 12687.1, "total_tokens": 20688960}
|
|
{"current_steps": 6570, "total_steps": 15621, "loss": 0.4216, "lr": 1.4368440036544386e-06, "epoch": 0.42058767044363354, "percentage": 42.06, "elapsed_time": "0:27:11", "remaining_time": "0:37:27", "throughput": 12691.48, "total_tokens": 20704768}
|
|
{"current_steps": 6575, "total_steps": 15621, "loss": 0.4157, "lr": 1.435838616739609e-06, "epoch": 0.4209077523846105, "percentage": 42.09, "elapsed_time": "0:27:12", "remaining_time": "0:37:25", "throughput": 12695.62, "total_tokens": 20719808}
|
|
{"current_steps": 6580, "total_steps": 15621, "loss": 0.5319, "lr": 1.4348326856751493e-06, "epoch": 0.42122783432558736, "percentage": 42.12, "elapsed_time": "0:27:12", "remaining_time": "0:37:23", "throughput": 12700.07, "total_tokens": 20735680}
|
|
{"current_steps": 6585, "total_steps": 15621, "loss": 0.3379, "lr": 1.433826211716976e-06, "epoch": 0.42154791626656424, "percentage": 42.15, "elapsed_time": "0:27:13", "remaining_time": "0:37:21", "throughput": 12703.99, "total_tokens": 20750144}
|
|
{"current_steps": 6590, "total_steps": 15621, "loss": 0.3988, "lr": 1.4328191961216835e-06, "epoch": 0.4218679982075411, "percentage": 42.19, "elapsed_time": "0:27:14", "remaining_time": "0:37:19", "throughput": 12708.44, "total_tokens": 20766016}
|
|
{"current_steps": 6595, "total_steps": 15621, "loss": 0.4818, "lr": 1.4318116401465427e-06, "epoch": 0.422188080148518, "percentage": 42.22, "elapsed_time": "0:27:14", "remaining_time": "0:37:17", "throughput": 12713.2, "total_tokens": 20782720}
|
|
{"current_steps": 6600, "total_steps": 15621, "loss": 0.3925, "lr": 1.430803545049499e-06, "epoch": 0.42250816208949493, "percentage": 42.25, "elapsed_time": "0:27:15", "remaining_time": "0:37:15", "throughput": 12717.5, "total_tokens": 20798208}
|
|
{"current_steps": 6605, "total_steps": 15621, "loss": 0.5891, "lr": 1.4297949120891716e-06, "epoch": 0.4228282440304718, "percentage": 42.28, "elapsed_time": "0:27:16", "remaining_time": "0:37:13", "throughput": 12721.56, "total_tokens": 20813056}
|
|
{"current_steps": 6610, "total_steps": 15621, "loss": 0.4266, "lr": 1.4287857425248497e-06, "epoch": 0.4231483259714487, "percentage": 42.31, "elapsed_time": "0:27:16", "remaining_time": "0:37:11", "throughput": 12725.95, "total_tokens": 20828800}
|
|
{"current_steps": 6615, "total_steps": 15621, "loss": 0.4956, "lr": 1.427776037616494e-06, "epoch": 0.42346840791242557, "percentage": 42.35, "elapsed_time": "0:27:17", "remaining_time": "0:37:09", "throughput": 12730.39, "total_tokens": 20844736}
|
|
{"current_steps": 6620, "total_steps": 15621, "loss": 0.3504, "lr": 1.4267657986247326e-06, "epoch": 0.42378848985340245, "percentage": 42.38, "elapsed_time": "0:27:18", "remaining_time": "0:37:07", "throughput": 12734.86, "total_tokens": 20860672}
|
|
{"current_steps": 6625, "total_steps": 15621, "loss": 0.3666, "lr": 1.425755026810861e-06, "epoch": 0.4241085717943794, "percentage": 42.41, "elapsed_time": "0:27:18", "remaining_time": "0:37:05", "throughput": 12739.47, "total_tokens": 20877184}
|
|
{"current_steps": 6630, "total_steps": 15621, "loss": 0.3965, "lr": 1.4247437234368394e-06, "epoch": 0.42442865373535626, "percentage": 42.44, "elapsed_time": "0:27:19", "remaining_time": "0:37:03", "throughput": 12744.3, "total_tokens": 20894208}
|
|
{"current_steps": 6635, "total_steps": 15621, "loss": 0.407, "lr": 1.423731889765292e-06, "epoch": 0.42474873567633314, "percentage": 42.47, "elapsed_time": "0:27:20", "remaining_time": "0:37:01", "throughput": 12748.57, "total_tokens": 20909696}
|
|
{"current_steps": 6640, "total_steps": 15621, "loss": 0.3465, "lr": 1.422719527059505e-06, "epoch": 0.42506881761731, "percentage": 42.51, "elapsed_time": "0:27:20", "remaining_time": "0:36:59", "throughput": 12753.16, "total_tokens": 20926016}
|
|
{"current_steps": 6645, "total_steps": 15621, "loss": 0.362, "lr": 1.4217066365834253e-06, "epoch": 0.4253888995582869, "percentage": 42.54, "elapsed_time": "0:27:21", "remaining_time": "0:36:57", "throughput": 12757.37, "total_tokens": 20941440}
|
|
{"current_steps": 6650, "total_steps": 15621, "loss": 0.4566, "lr": 1.4206932196016586e-06, "epoch": 0.42570898149926384, "percentage": 42.57, "elapsed_time": "0:27:22", "remaining_time": "0:36:55", "throughput": 12761.42, "total_tokens": 20956352}
|
|
{"current_steps": 6655, "total_steps": 15621, "loss": 0.3947, "lr": 1.4196792773794672e-06, "epoch": 0.4260290634402407, "percentage": 42.6, "elapsed_time": "0:27:22", "remaining_time": "0:36:53", "throughput": 12766.15, "total_tokens": 20973056}
|
|
{"current_steps": 6660, "total_steps": 15621, "loss": 0.4406, "lr": 1.418664811182771e-06, "epoch": 0.4263491453812176, "percentage": 42.63, "elapsed_time": "0:27:23", "remaining_time": "0:36:51", "throughput": 12770.61, "total_tokens": 20989248}
|
|
{"current_steps": 6665, "total_steps": 15621, "loss": 0.4946, "lr": 1.417649822278142e-06, "epoch": 0.4266692273221945, "percentage": 42.67, "elapsed_time": "0:27:24", "remaining_time": "0:36:49", "throughput": 12774.68, "total_tokens": 21004096}
|
|
{"current_steps": 6670, "total_steps": 15621, "loss": 0.489, "lr": 1.4166343119328064e-06, "epoch": 0.42698930926317136, "percentage": 42.7, "elapsed_time": "0:27:24", "remaining_time": "0:36:47", "throughput": 12779.2, "total_tokens": 21020224}
|
|
{"current_steps": 6675, "total_steps": 15621, "loss": 0.466, "lr": 1.4156182814146404e-06, "epoch": 0.42730939120414824, "percentage": 42.73, "elapsed_time": "0:27:25", "remaining_time": "0:36:45", "throughput": 12783.32, "total_tokens": 21035264}
|
|
{"current_steps": 6680, "total_steps": 15621, "loss": 0.354, "lr": 1.4146017319921701e-06, "epoch": 0.42762947314512517, "percentage": 42.76, "elapsed_time": "0:27:26", "remaining_time": "0:36:43", "throughput": 12787.97, "total_tokens": 21051904}
|
|
{"current_steps": 6685, "total_steps": 15621, "loss": 0.4117, "lr": 1.4135846649345695e-06, "epoch": 0.42794955508610205, "percentage": 42.79, "elapsed_time": "0:27:26", "remaining_time": "0:36:41", "throughput": 12792.98, "total_tokens": 21069504}
|
|
{"current_steps": 6690, "total_steps": 15621, "loss": 0.4259, "lr": 1.4125670815116589e-06, "epoch": 0.42826963702707893, "percentage": 42.83, "elapsed_time": "0:27:27", "remaining_time": "0:36:39", "throughput": 12796.99, "total_tokens": 21084288}
|
|
{"current_steps": 6695, "total_steps": 15621, "loss": 0.2933, "lr": 1.4115489829939025e-06, "epoch": 0.4285897189680558, "percentage": 42.86, "elapsed_time": "0:27:28", "remaining_time": "0:36:37", "throughput": 12801.52, "total_tokens": 21100544}
|
|
{"current_steps": 6700, "total_steps": 15621, "loss": 0.4315, "lr": 1.4105303706524093e-06, "epoch": 0.4289098009090327, "percentage": 42.89, "elapsed_time": "0:27:28", "remaining_time": "0:36:35", "throughput": 12805.93, "total_tokens": 21116608}
|
|
{"current_steps": 6705, "total_steps": 15621, "loss": 0.6147, "lr": 1.4095112457589276e-06, "epoch": 0.4292298828500096, "percentage": 42.92, "elapsed_time": "0:27:29", "remaining_time": "0:36:33", "throughput": 12810.06, "total_tokens": 21131776}
|
|
{"current_steps": 6710, "total_steps": 15621, "loss": 0.4185, "lr": 1.4084916095858477e-06, "epoch": 0.4295499647909865, "percentage": 42.95, "elapsed_time": "0:27:30", "remaining_time": "0:36:31", "throughput": 12813.89, "total_tokens": 21146368}
|
|
{"current_steps": 6715, "total_steps": 15621, "loss": 0.509, "lr": 1.407471463406197e-06, "epoch": 0.4298700467319634, "percentage": 42.99, "elapsed_time": "0:27:30", "remaining_time": "0:36:29", "throughput": 12818.28, "total_tokens": 21162368}
|
|
{"current_steps": 6720, "total_steps": 15621, "loss": 0.4404, "lr": 1.4064508084936399e-06, "epoch": 0.43019012867294026, "percentage": 43.02, "elapsed_time": "0:27:31", "remaining_time": "0:36:27", "throughput": 12822.94, "total_tokens": 21179008}
|
|
{"current_steps": 6725, "total_steps": 15621, "loss": 0.569, "lr": 1.405429646122476e-06, "epoch": 0.43051021061391714, "percentage": 43.05, "elapsed_time": "0:27:32", "remaining_time": "0:36:25", "throughput": 12827.76, "total_tokens": 21196160}
|
|
{"current_steps": 6730, "total_steps": 15621, "loss": 0.5342, "lr": 1.4044079775676392e-06, "epoch": 0.4308302925548941, "percentage": 43.08, "elapsed_time": "0:27:33", "remaining_time": "0:36:23", "throughput": 12832.12, "total_tokens": 21212032}
|
|
{"current_steps": 6735, "total_steps": 15621, "loss": 0.3587, "lr": 1.4033858041046936e-06, "epoch": 0.43115037449587096, "percentage": 43.12, "elapsed_time": "0:27:33", "remaining_time": "0:36:21", "throughput": 12837.39, "total_tokens": 21230272}
|
|
{"current_steps": 6740, "total_steps": 15621, "loss": 0.3928, "lr": 1.4023631270098352e-06, "epoch": 0.43147045643684784, "percentage": 43.15, "elapsed_time": "0:27:34", "remaining_time": "0:36:19", "throughput": 12841.58, "total_tokens": 21245760}
|
|
{"current_steps": 6745, "total_steps": 15621, "loss": 0.3446, "lr": 1.4013399475598888e-06, "epoch": 0.4317905383778247, "percentage": 43.18, "elapsed_time": "0:27:35", "remaining_time": "0:36:18", "throughput": 12845.65, "total_tokens": 21260992}
|
|
{"current_steps": 6750, "total_steps": 15621, "loss": 0.2819, "lr": 1.4003162670323056e-06, "epoch": 0.4321106203188016, "percentage": 43.21, "elapsed_time": "0:27:35", "remaining_time": "0:36:16", "throughput": 12849.33, "total_tokens": 21275136}
|
|
{"current_steps": 6755, "total_steps": 15621, "loss": 0.5416, "lr": 1.3992920867051627e-06, "epoch": 0.4324307022597785, "percentage": 43.24, "elapsed_time": "0:27:36", "remaining_time": "0:36:14", "throughput": 12853.49, "total_tokens": 21290560}
|
|
{"current_steps": 6760, "total_steps": 15621, "loss": 0.3552, "lr": 1.3982674078571614e-06, "epoch": 0.4327507842007554, "percentage": 43.28, "elapsed_time": "0:27:37", "remaining_time": "0:36:12", "throughput": 12857.51, "total_tokens": 21305536}
|
|
{"current_steps": 6765, "total_steps": 15621, "loss": 0.3758, "lr": 1.3972422317676252e-06, "epoch": 0.4330708661417323, "percentage": 43.31, "elapsed_time": "0:27:37", "remaining_time": "0:36:10", "throughput": 12861.53, "total_tokens": 21320576}
|
|
{"current_steps": 6770, "total_steps": 15621, "loss": 0.3698, "lr": 1.3962165597164985e-06, "epoch": 0.43339094808270917, "percentage": 43.34, "elapsed_time": "0:27:38", "remaining_time": "0:36:08", "throughput": 12865.54, "total_tokens": 21335680}
|
|
{"current_steps": 6775, "total_steps": 15621, "loss": 0.3519, "lr": 1.395190392984345e-06, "epoch": 0.43371103002368605, "percentage": 43.37, "elapsed_time": "0:27:39", "remaining_time": "0:36:06", "throughput": 12869.88, "total_tokens": 21351808}
|
|
{"current_steps": 6780, "total_steps": 15621, "loss": 0.4522, "lr": 1.3941637328523452e-06, "epoch": 0.43403111196466293, "percentage": 43.4, "elapsed_time": "0:27:39", "remaining_time": "0:36:04", "throughput": 12873.79, "total_tokens": 21366464}
|
|
{"current_steps": 6785, "total_steps": 15621, "loss": 0.3038, "lr": 1.3931365806022978e-06, "epoch": 0.43435119390563987, "percentage": 43.44, "elapsed_time": "0:27:40", "remaining_time": "0:36:02", "throughput": 12878.4, "total_tokens": 21383296}
|
|
{"current_steps": 6790, "total_steps": 15621, "loss": 0.3111, "lr": 1.3921089375166131e-06, "epoch": 0.43467127584661674, "percentage": 43.47, "elapsed_time": "0:27:41", "remaining_time": "0:36:00", "throughput": 12882.87, "total_tokens": 21399616}
|
|
{"current_steps": 6795, "total_steps": 15621, "loss": 0.4455, "lr": 1.391080804878316e-06, "epoch": 0.4349913577875936, "percentage": 43.5, "elapsed_time": "0:27:41", "remaining_time": "0:35:58", "throughput": 12886.91, "total_tokens": 21414848}
|
|
{"current_steps": 6800, "total_steps": 15621, "loss": 0.3804, "lr": 1.3900521839710427e-06, "epoch": 0.4353114397285705, "percentage": 43.53, "elapsed_time": "0:27:42", "remaining_time": "0:35:56", "throughput": 12890.99, "total_tokens": 21430144}
|
|
{"current_steps": 6805, "total_steps": 15621, "loss": 0.3503, "lr": 1.3890230760790373e-06, "epoch": 0.4356315216695474, "percentage": 43.56, "elapsed_time": "0:27:43", "remaining_time": "0:35:54", "throughput": 12894.97, "total_tokens": 21445248}
|
|
{"current_steps": 6810, "total_steps": 15621, "loss": 0.598, "lr": 1.3879934824871544e-06, "epoch": 0.4359516036105243, "percentage": 43.6, "elapsed_time": "0:27:43", "remaining_time": "0:35:52", "throughput": 12899.03, "total_tokens": 21460544}
|
|
{"current_steps": 6815, "total_steps": 15621, "loss": 0.5102, "lr": 1.3869634044808526e-06, "epoch": 0.4362716855515012, "percentage": 43.63, "elapsed_time": "0:27:44", "remaining_time": "0:35:50", "throughput": 12903.26, "total_tokens": 21476224}
|
|
{"current_steps": 6820, "total_steps": 15621, "loss": 0.6093, "lr": 1.3859328433461971e-06, "epoch": 0.4365917674924781, "percentage": 43.66, "elapsed_time": "0:27:45", "remaining_time": "0:35:48", "throughput": 12907.45, "total_tokens": 21491712}
|
|
{"current_steps": 6825, "total_steps": 15621, "loss": 0.5794, "lr": 1.3849018003698553e-06, "epoch": 0.43691184943345496, "percentage": 43.69, "elapsed_time": "0:27:45", "remaining_time": "0:35:46", "throughput": 12912.2, "total_tokens": 21508928}
|
|
{"current_steps": 6830, "total_steps": 15621, "loss": 0.3975, "lr": 1.3838702768390964e-06, "epoch": 0.43723193137443184, "percentage": 43.72, "elapsed_time": "0:27:46", "remaining_time": "0:35:44", "throughput": 12916.05, "total_tokens": 21523648}
|
|
{"current_steps": 6835, "total_steps": 15621, "loss": 0.474, "lr": 1.38283827404179e-06, "epoch": 0.43755201331540877, "percentage": 43.76, "elapsed_time": "0:27:47", "remaining_time": "0:35:42", "throughput": 12920.22, "total_tokens": 21539264}
|
|
{"current_steps": 6840, "total_steps": 15621, "loss": 0.3763, "lr": 1.381805793266403e-06, "epoch": 0.43787209525638565, "percentage": 43.79, "elapsed_time": "0:27:47", "remaining_time": "0:35:41", "throughput": 12924.56, "total_tokens": 21555520}
|
|
{"current_steps": 6845, "total_steps": 15621, "loss": 0.4524, "lr": 1.3807728358020009e-06, "epoch": 0.43819217719736253, "percentage": 43.82, "elapsed_time": "0:27:48", "remaining_time": "0:35:39", "throughput": 12928.39, "total_tokens": 21570112}
|
|
{"current_steps": 6850, "total_steps": 15621, "loss": 0.3372, "lr": 1.3797394029382416e-06, "epoch": 0.4385122591383394, "percentage": 43.85, "elapsed_time": "0:27:49", "remaining_time": "0:35:37", "throughput": 12932.19, "total_tokens": 21584768}
|
|
{"current_steps": 6855, "total_steps": 15621, "loss": 0.3008, "lr": 1.37870549596538e-06, "epoch": 0.4388323410793163, "percentage": 43.88, "elapsed_time": "0:27:49", "remaining_time": "0:35:35", "throughput": 12936.19, "total_tokens": 21599872}
|
|
{"current_steps": 6860, "total_steps": 15621, "loss": 0.5217, "lr": 1.3776711161742595e-06, "epoch": 0.43915242302029317, "percentage": 43.92, "elapsed_time": "0:27:50", "remaining_time": "0:35:33", "throughput": 12940.45, "total_tokens": 21615808}
|
|
{"current_steps": 6865, "total_steps": 15621, "loss": 0.4772, "lr": 1.3766362648563166e-06, "epoch": 0.4394725049612701, "percentage": 43.95, "elapsed_time": "0:27:51", "remaining_time": "0:35:31", "throughput": 12944.32, "total_tokens": 21630656}
|
|
{"current_steps": 6870, "total_steps": 15621, "loss": 0.4123, "lr": 1.3756009433035744e-06, "epoch": 0.439792586902247, "percentage": 43.98, "elapsed_time": "0:27:51", "remaining_time": "0:35:29", "throughput": 12948.74, "total_tokens": 21646976}
|
|
{"current_steps": 6875, "total_steps": 15621, "loss": 0.5783, "lr": 1.3745651528086447e-06, "epoch": 0.44011266884322386, "percentage": 44.01, "elapsed_time": "0:27:52", "remaining_time": "0:35:27", "throughput": 12953.79, "total_tokens": 21665024}
|
|
{"current_steps": 6880, "total_steps": 15621, "loss": 0.4489, "lr": 1.373528894664724e-06, "epoch": 0.44043275078420074, "percentage": 44.04, "elapsed_time": "0:27:53", "remaining_time": "0:35:25", "throughput": 12957.77, "total_tokens": 21680128}
|
|
{"current_steps": 6885, "total_steps": 15621, "loss": 0.3466, "lr": 1.3724921701655924e-06, "epoch": 0.4407528327251776, "percentage": 44.08, "elapsed_time": "0:27:53", "remaining_time": "0:35:23", "throughput": 12961.9, "total_tokens": 21695808}
|
|
{"current_steps": 6890, "total_steps": 15621, "loss": 0.3186, "lr": 1.3714549806056125e-06, "epoch": 0.44107291466615456, "percentage": 44.11, "elapsed_time": "0:27:54", "remaining_time": "0:35:21", "throughput": 12966.18, "total_tokens": 21711936}
|
|
{"current_steps": 6895, "total_steps": 15621, "loss": 0.4162, "lr": 1.3704173272797283e-06, "epoch": 0.44139299660713144, "percentage": 44.14, "elapsed_time": "0:27:55", "remaining_time": "0:35:20", "throughput": 12970.26, "total_tokens": 21727488}
|
|
{"current_steps": 6900, "total_steps": 15621, "loss": 0.4556, "lr": 1.3693792114834619e-06, "epoch": 0.4417130785481083, "percentage": 44.17, "elapsed_time": "0:27:55", "remaining_time": "0:35:18", "throughput": 12975.21, "total_tokens": 21745280}
|
|
{"current_steps": 6905, "total_steps": 15621, "loss": 0.467, "lr": 1.3683406345129129e-06, "epoch": 0.4420331604890852, "percentage": 44.2, "elapsed_time": "0:27:56", "remaining_time": "0:35:16", "throughput": 12979.03, "total_tokens": 21760000}
|
|
{"current_steps": 6910, "total_steps": 15621, "loss": 0.3971, "lr": 1.3673015976647567e-06, "epoch": 0.4423532424300621, "percentage": 44.24, "elapsed_time": "0:27:57", "remaining_time": "0:35:14", "throughput": 12983.04, "total_tokens": 21775232}
|
|
{"current_steps": 6915, "total_steps": 15621, "loss": 0.3979, "lr": 1.3662621022362435e-06, "epoch": 0.442673324371039, "percentage": 44.27, "elapsed_time": "0:27:57", "remaining_time": "0:35:12", "throughput": 12987.15, "total_tokens": 21790656}
|
|
{"current_steps": 6920, "total_steps": 15621, "loss": 0.462, "lr": 1.3652221495251952e-06, "epoch": 0.4429934063120159, "percentage": 44.3, "elapsed_time": "0:27:58", "remaining_time": "0:35:10", "throughput": 12991.25, "total_tokens": 21806336}
|
|
{"current_steps": 6925, "total_steps": 15621, "loss": 0.3242, "lr": 1.3641817408300049e-06, "epoch": 0.44331348825299277, "percentage": 44.33, "elapsed_time": "0:27:59", "remaining_time": "0:35:08", "throughput": 12996.06, "total_tokens": 21823744}
|
|
{"current_steps": 6930, "total_steps": 15621, "loss": 0.559, "lr": 1.3631408774496352e-06, "epoch": 0.44363357019396965, "percentage": 44.36, "elapsed_time": "0:27:59", "remaining_time": "0:35:06", "throughput": 13000.05, "total_tokens": 21839104}
|
|
{"current_steps": 6935, "total_steps": 15621, "loss": 0.3616, "lr": 1.3620995606836165e-06, "epoch": 0.44395365213494653, "percentage": 44.4, "elapsed_time": "0:28:00", "remaining_time": "0:35:04", "throughput": 13004.1, "total_tokens": 21854528}
|
|
{"current_steps": 6940, "total_steps": 15621, "loss": 0.6013, "lr": 1.3610577918320446e-06, "epoch": 0.4442737340759234, "percentage": 44.43, "elapsed_time": "0:28:01", "remaining_time": "0:35:03", "throughput": 13008.36, "total_tokens": 21870592}
|
|
{"current_steps": 6945, "total_steps": 15621, "loss": 0.3823, "lr": 1.3600155721955802e-06, "epoch": 0.44459381601690035, "percentage": 44.46, "elapsed_time": "0:28:01", "remaining_time": "0:35:01", "throughput": 13012.25, "total_tokens": 21885696}
|
|
{"current_steps": 6950, "total_steps": 15621, "loss": 0.4017, "lr": 1.3589729030754468e-06, "epoch": 0.4449138979578772, "percentage": 44.49, "elapsed_time": "0:28:02", "remaining_time": "0:34:59", "throughput": 13016.34, "total_tokens": 21901248}
|
|
{"current_steps": 6955, "total_steps": 15621, "loss": 0.4293, "lr": 1.3579297857734293e-06, "epoch": 0.4452339798988541, "percentage": 44.52, "elapsed_time": "0:28:03", "remaining_time": "0:34:57", "throughput": 13020.2, "total_tokens": 21916352}
|
|
{"current_steps": 6960, "total_steps": 15621, "loss": 0.3354, "lr": 1.3568862215918717e-06, "epoch": 0.445554061839831, "percentage": 44.56, "elapsed_time": "0:28:03", "remaining_time": "0:34:55", "throughput": 13023.94, "total_tokens": 21931072}
|
|
{"current_steps": 6965, "total_steps": 15621, "loss": 0.5014, "lr": 1.3558422118336762e-06, "epoch": 0.44587414378080786, "percentage": 44.59, "elapsed_time": "0:28:04", "remaining_time": "0:34:53", "throughput": 13028.03, "total_tokens": 21946752}
|
|
{"current_steps": 6970, "total_steps": 15621, "loss": 0.4669, "lr": 1.354797757802301e-06, "epoch": 0.4461942257217848, "percentage": 44.62, "elapsed_time": "0:28:05", "remaining_time": "0:34:51", "throughput": 13032.04, "total_tokens": 21962176}
|
|
{"current_steps": 6975, "total_steps": 15621, "loss": 0.3986, "lr": 1.3537528608017596e-06, "epoch": 0.4465143076627617, "percentage": 44.65, "elapsed_time": "0:28:05", "remaining_time": "0:34:49", "throughput": 13036.36, "total_tokens": 21978496}
|
|
{"current_steps": 6980, "total_steps": 15621, "loss": 0.3989, "lr": 1.352707522136618e-06, "epoch": 0.44683438960373856, "percentage": 44.68, "elapsed_time": "0:28:06", "remaining_time": "0:34:47", "throughput": 13039.84, "total_tokens": 21992576}
|
|
{"current_steps": 6985, "total_steps": 15621, "loss": 0.3987, "lr": 1.3516617431119934e-06, "epoch": 0.44715447154471544, "percentage": 44.72, "elapsed_time": "0:28:07", "remaining_time": "0:34:46", "throughput": 13043.85, "total_tokens": 22008000}
|
|
{"current_steps": 6990, "total_steps": 15621, "loss": 0.5453, "lr": 1.350615525033554e-06, "epoch": 0.4474745534856923, "percentage": 44.75, "elapsed_time": "0:28:07", "remaining_time": "0:34:44", "throughput": 13047.7, "total_tokens": 22022976}
|
|
{"current_steps": 6995, "total_steps": 15621, "loss": 0.4055, "lr": 1.3495688692075144e-06, "epoch": 0.44779463542666925, "percentage": 44.78, "elapsed_time": "0:28:08", "remaining_time": "0:34:42", "throughput": 13051.64, "total_tokens": 22038144}
|
|
{"current_steps": 7000, "total_steps": 15621, "loss": 0.35, "lr": 1.3485217769406376e-06, "epoch": 0.44811471736764613, "percentage": 44.81, "elapsed_time": "0:28:09", "remaining_time": "0:34:40", "throughput": 13055.83, "total_tokens": 22054016}
|
|
{"current_steps": 7005, "total_steps": 15621, "loss": 0.3627, "lr": 1.3474742495402303e-06, "epoch": 0.448434799308623, "percentage": 44.84, "elapsed_time": "0:28:10", "remaining_time": "0:34:38", "throughput": 13061.32, "total_tokens": 22073920}
|
|
{"current_steps": 7010, "total_steps": 15621, "loss": 0.4295, "lr": 1.3464262883141425e-06, "epoch": 0.4487548812495999, "percentage": 44.88, "elapsed_time": "0:28:10", "remaining_time": "0:34:36", "throughput": 13065.51, "total_tokens": 22089728}
|
|
{"current_steps": 7015, "total_steps": 15621, "loss": 0.5883, "lr": 1.3453778945707663e-06, "epoch": 0.44907496319057677, "percentage": 44.91, "elapsed_time": "0:28:11", "remaining_time": "0:34:34", "throughput": 13069.51, "total_tokens": 22105344}
|
|
{"current_steps": 7020, "total_steps": 15621, "loss": 0.4596, "lr": 1.3443290696190332e-06, "epoch": 0.4493950451315537, "percentage": 44.94, "elapsed_time": "0:28:12", "remaining_time": "0:34:33", "throughput": 13073.87, "total_tokens": 22121792}
|
|
{"current_steps": 7025, "total_steps": 15621, "loss": 0.4175, "lr": 1.343279814768414e-06, "epoch": 0.4497151270725306, "percentage": 44.97, "elapsed_time": "0:28:12", "remaining_time": "0:34:31", "throughput": 13077.45, "total_tokens": 22136128}
|
|
{"current_steps": 7030, "total_steps": 15621, "loss": 0.3849, "lr": 1.3422301313289156e-06, "epoch": 0.45003520901350746, "percentage": 45.0, "elapsed_time": "0:28:13", "remaining_time": "0:34:29", "throughput": 13081.58, "total_tokens": 22151936}
|
|
{"current_steps": 7035, "total_steps": 15621, "loss": 0.3775, "lr": 1.34118002061108e-06, "epoch": 0.45035529095448434, "percentage": 45.04, "elapsed_time": "0:28:14", "remaining_time": "0:34:27", "throughput": 13085.82, "total_tokens": 22168128}
|
|
{"current_steps": 7038, "total_steps": 15621, "eval_loss": 0.43633610010147095, "epoch": 0.4505473401190705, "percentage": 45.05, "elapsed_time": "0:29:05", "remaining_time": "0:35:28", "throughput": 12708.94, "total_tokens": 22178432}
|
|
{"current_steps": 7040, "total_steps": 15621, "loss": 0.4432, "lr": 1.3401294839259828e-06, "epoch": 0.4506753728954612, "percentage": 45.07, "elapsed_time": "0:32:59", "remaining_time": "0:40:13", "throughput": 11204.92, "total_tokens": 22184512}
|
|
{"current_steps": 7045, "total_steps": 15621, "loss": 0.5428, "lr": 1.3390785225852312e-06, "epoch": 0.4509954548364381, "percentage": 45.1, "elapsed_time": "0:33:00", "remaining_time": "0:40:10", "throughput": 11208.94, "total_tokens": 22199872}
|
|
{"current_steps": 7050, "total_steps": 15621, "loss": 0.444, "lr": 1.3380271379009631e-06, "epoch": 0.45131553677741504, "percentage": 45.13, "elapsed_time": "0:33:01", "remaining_time": "0:40:08", "throughput": 11213.51, "total_tokens": 22216960}
|
|
{"current_steps": 7055, "total_steps": 15621, "loss": 0.2645, "lr": 1.3369753311858442e-06, "epoch": 0.4516356187183919, "percentage": 45.16, "elapsed_time": "0:33:01", "remaining_time": "0:40:06", "throughput": 11217.26, "total_tokens": 22231488}
|
|
{"current_steps": 7060, "total_steps": 15621, "loss": 0.4597, "lr": 1.3359231037530682e-06, "epoch": 0.4519557006593688, "percentage": 45.2, "elapsed_time": "0:33:02", "remaining_time": "0:40:04", "throughput": 11221.3, "total_tokens": 22246976}
|
|
{"current_steps": 7065, "total_steps": 15621, "loss": 0.4178, "lr": 1.3348704569163527e-06, "epoch": 0.4522757826003457, "percentage": 45.23, "elapsed_time": "0:33:03", "remaining_time": "0:40:01", "throughput": 11225.7, "total_tokens": 22263680}
|
|
{"current_steps": 7070, "total_steps": 15621, "loss": 0.3371, "lr": 1.33381739198994e-06, "epoch": 0.45259586454132256, "percentage": 45.26, "elapsed_time": "0:33:03", "remaining_time": "0:39:59", "throughput": 11229.87, "total_tokens": 22279552}
|
|
{"current_steps": 7075, "total_steps": 15621, "loss": 0.4463, "lr": 1.3327639102885938e-06, "epoch": 0.4529159464822995, "percentage": 45.29, "elapsed_time": "0:33:04", "remaining_time": "0:39:57", "throughput": 11234.01, "total_tokens": 22295296}
|
|
{"current_steps": 7080, "total_steps": 15621, "loss": 0.3979, "lr": 1.3317100131275986e-06, "epoch": 0.45323602842327637, "percentage": 45.32, "elapsed_time": "0:33:05", "remaining_time": "0:39:54", "throughput": 11237.89, "total_tokens": 22310400}
|
|
{"current_steps": 7085, "total_steps": 15621, "loss": 0.4852, "lr": 1.3306557018227576e-06, "epoch": 0.45355611036425325, "percentage": 45.36, "elapsed_time": "0:33:05", "remaining_time": "0:39:52", "throughput": 11242.27, "total_tokens": 22326848}
|
|
{"current_steps": 7090, "total_steps": 15621, "loss": 0.4673, "lr": 1.3296009776903903e-06, "epoch": 0.45387619230523013, "percentage": 45.39, "elapsed_time": "0:33:06", "remaining_time": "0:39:50", "throughput": 11246.39, "total_tokens": 22342592}
|
|
{"current_steps": 7095, "total_steps": 15621, "loss": 0.4693, "lr": 1.3285458420473323e-06, "epoch": 0.454196274246207, "percentage": 45.42, "elapsed_time": "0:33:07", "remaining_time": "0:39:48", "throughput": 11250.7, "total_tokens": 22358912}
|
|
{"current_steps": 7100, "total_steps": 15621, "loss": 0.3789, "lr": 1.3274902962109332e-06, "epoch": 0.45451635618718395, "percentage": 45.45, "elapsed_time": "0:33:08", "remaining_time": "0:39:45", "throughput": 11254.78, "total_tokens": 22374528}
|
|
{"current_steps": 7105, "total_steps": 15621, "loss": 0.3752, "lr": 1.3264343414990539e-06, "epoch": 0.4548364381281608, "percentage": 45.48, "elapsed_time": "0:33:08", "remaining_time": "0:39:43", "throughput": 11258.75, "total_tokens": 22389824}
|
|
{"current_steps": 7110, "total_steps": 15621, "loss": 0.4269, "lr": 1.3253779792300663e-06, "epoch": 0.4551565200691377, "percentage": 45.52, "elapsed_time": "0:33:09", "remaining_time": "0:39:41", "throughput": 11262.77, "total_tokens": 22405376}
|
|
{"current_steps": 7115, "total_steps": 15621, "loss": 0.3442, "lr": 1.3243212107228518e-06, "epoch": 0.4554766020101146, "percentage": 45.55, "elapsed_time": "0:33:09", "remaining_time": "0:39:39", "throughput": 11266.54, "total_tokens": 22420032}
|
|
{"current_steps": 7120, "total_steps": 15621, "loss": 0.393, "lr": 1.3232640372967974e-06, "epoch": 0.45579668395109146, "percentage": 45.58, "elapsed_time": "0:33:10", "remaining_time": "0:39:36", "throughput": 11270.29, "total_tokens": 22434688}
|
|
{"current_steps": 7125, "total_steps": 15621, "loss": 0.4691, "lr": 1.3222064602717974e-06, "epoch": 0.45611676589206834, "percentage": 45.61, "elapsed_time": "0:33:11", "remaining_time": "0:39:34", "throughput": 11274.58, "total_tokens": 22451072}
|
|
{"current_steps": 7130, "total_steps": 15621, "loss": 0.3578, "lr": 1.321148480968248e-06, "epoch": 0.4564368478330453, "percentage": 45.64, "elapsed_time": "0:33:11", "remaining_time": "0:39:32", "throughput": 11278.6, "total_tokens": 22466688}
|
|
{"current_steps": 7135, "total_steps": 15621, "loss": 0.4627, "lr": 1.3200901007070495e-06, "epoch": 0.45675692977402216, "percentage": 45.68, "elapsed_time": "0:33:12", "remaining_time": "0:39:29", "throughput": 11282.68, "total_tokens": 22482432}
|
|
{"current_steps": 7140, "total_steps": 15621, "loss": 0.4653, "lr": 1.3190313208096022e-06, "epoch": 0.45707701171499904, "percentage": 45.71, "elapsed_time": "0:33:13", "remaining_time": "0:39:27", "throughput": 11286.37, "total_tokens": 22496960}
|
|
{"current_steps": 7145, "total_steps": 15621, "loss": 0.3506, "lr": 1.3179721425978048e-06, "epoch": 0.4573970936559759, "percentage": 45.74, "elapsed_time": "0:33:13", "remaining_time": "0:39:25", "throughput": 11290.31, "total_tokens": 22512256}
|
|
{"current_steps": 7150, "total_steps": 15621, "loss": 0.3801, "lr": 1.3169125673940541e-06, "epoch": 0.4577171755969528, "percentage": 45.77, "elapsed_time": "0:33:14", "remaining_time": "0:39:23", "throughput": 11294.47, "total_tokens": 22528192}
|
|
{"current_steps": 7155, "total_steps": 15621, "loss": 0.4222, "lr": 1.3158525965212422e-06, "epoch": 0.45803725753792973, "percentage": 45.8, "elapsed_time": "0:33:15", "remaining_time": "0:39:20", "throughput": 11299.02, "total_tokens": 22545408}
|
|
{"current_steps": 7160, "total_steps": 15621, "loss": 0.499, "lr": 1.3147922313027548e-06, "epoch": 0.4583573394789066, "percentage": 45.84, "elapsed_time": "0:33:16", "remaining_time": "0:39:18", "throughput": 11302.98, "total_tokens": 22560832}
|
|
{"current_steps": 7165, "total_steps": 15621, "loss": 0.3566, "lr": 1.3137314730624707e-06, "epoch": 0.4586774214198835, "percentage": 45.87, "elapsed_time": "0:33:16", "remaining_time": "0:39:16", "throughput": 11307.46, "total_tokens": 22577728}
|
|
{"current_steps": 7170, "total_steps": 15621, "loss": 0.4792, "lr": 1.3126703231247588e-06, "epoch": 0.45899750336086037, "percentage": 45.9, "elapsed_time": "0:33:17", "remaining_time": "0:39:14", "throughput": 11311.74, "total_tokens": 22594112}
|
|
{"current_steps": 7175, "total_steps": 15621, "loss": 0.3942, "lr": 1.3116087828144772e-06, "epoch": 0.45931758530183725, "percentage": 45.93, "elapsed_time": "0:33:18", "remaining_time": "0:39:12", "throughput": 11315.73, "total_tokens": 22609728}
|
|
{"current_steps": 7180, "total_steps": 15621, "loss": 0.4788, "lr": 1.310546853456972e-06, "epoch": 0.4596376672428142, "percentage": 45.96, "elapsed_time": "0:33:18", "remaining_time": "0:39:09", "throughput": 11319.55, "total_tokens": 22624704}
|
|
{"current_steps": 7185, "total_steps": 15621, "loss": 0.3133, "lr": 1.3094845363780737e-06, "epoch": 0.45995774918379106, "percentage": 46.0, "elapsed_time": "0:33:19", "remaining_time": "0:39:07", "throughput": 11323.61, "total_tokens": 22640448}
|
|
{"current_steps": 7190, "total_steps": 15621, "loss": 0.2221, "lr": 1.3084218329040976e-06, "epoch": 0.46027783112476794, "percentage": 46.03, "elapsed_time": "0:33:20", "remaining_time": "0:39:05", "throughput": 11327.5, "total_tokens": 22655680}
|
|
{"current_steps": 7195, "total_steps": 15621, "loss": 0.3836, "lr": 1.3073587443618425e-06, "epoch": 0.4605979130657448, "percentage": 46.06, "elapsed_time": "0:33:20", "remaining_time": "0:39:03", "throughput": 11331.83, "total_tokens": 22672128}
|
|
{"current_steps": 7200, "total_steps": 15621, "loss": 0.528, "lr": 1.3062952720785861e-06, "epoch": 0.4609179950067217, "percentage": 46.09, "elapsed_time": "0:33:21", "remaining_time": "0:39:00", "throughput": 11335.61, "total_tokens": 22687104}
|
|
{"current_steps": 7205, "total_steps": 15621, "loss": 0.3679, "lr": 1.305231417382086e-06, "epoch": 0.4612380769476986, "percentage": 46.12, "elapsed_time": "0:33:22", "remaining_time": "0:38:58", "throughput": 11339.7, "total_tokens": 22702976}
|
|
{"current_steps": 7210, "total_steps": 15621, "loss": 0.3473, "lr": 1.3041671816005777e-06, "epoch": 0.4615581588886755, "percentage": 46.16, "elapsed_time": "0:33:22", "remaining_time": "0:38:56", "throughput": 11343.66, "total_tokens": 22718464}
|
|
{"current_steps": 7215, "total_steps": 15621, "loss": 0.3735, "lr": 1.3031025660627718e-06, "epoch": 0.4618782408296524, "percentage": 46.19, "elapsed_time": "0:33:23", "remaining_time": "0:38:54", "throughput": 11347.84, "total_tokens": 22734656}
|
|
{"current_steps": 7220, "total_steps": 15621, "loss": 0.4378, "lr": 1.3020375720978534e-06, "epoch": 0.4621983227706293, "percentage": 46.22, "elapsed_time": "0:33:24", "remaining_time": "0:38:51", "throughput": 11351.77, "total_tokens": 22750016}
|
|
{"current_steps": 7225, "total_steps": 15621, "loss": 0.385, "lr": 1.3009722010354799e-06, "epoch": 0.46251840471160616, "percentage": 46.25, "elapsed_time": "0:33:24", "remaining_time": "0:38:49", "throughput": 11355.75, "total_tokens": 22765632}
|
|
{"current_steps": 7230, "total_steps": 15621, "loss": 0.4572, "lr": 1.2999064542057794e-06, "epoch": 0.46283848665258304, "percentage": 46.28, "elapsed_time": "0:33:25", "remaining_time": "0:38:47", "throughput": 11359.7, "total_tokens": 22781184}
|
|
{"current_steps": 7235, "total_steps": 15621, "loss": 0.4955, "lr": 1.2988403329393495e-06, "epoch": 0.46315856859355997, "percentage": 46.32, "elapsed_time": "0:33:26", "remaining_time": "0:38:45", "throughput": 11363.79, "total_tokens": 22797248}
|
|
{"current_steps": 7240, "total_steps": 15621, "loss": 0.4186, "lr": 1.2977738385672557e-06, "epoch": 0.46347865053453685, "percentage": 46.35, "elapsed_time": "0:33:26", "remaining_time": "0:38:43", "throughput": 11367.72, "total_tokens": 22812800}
|
|
{"current_steps": 7245, "total_steps": 15621, "loss": 0.4086, "lr": 1.2967069724210278e-06, "epoch": 0.46379873247551373, "percentage": 46.38, "elapsed_time": "0:33:27", "remaining_time": "0:38:40", "throughput": 11371.31, "total_tokens": 22827200}
|
|
{"current_steps": 7250, "total_steps": 15621, "loss": 0.5472, "lr": 1.2956397358326609e-06, "epoch": 0.4641188144164906, "percentage": 46.41, "elapsed_time": "0:33:28", "remaining_time": "0:38:38", "throughput": 11375.4, "total_tokens": 22843264}
|
|
{"current_steps": 7255, "total_steps": 15621, "loss": 0.3845, "lr": 1.294572130134613e-06, "epoch": 0.4644388963574675, "percentage": 46.44, "elapsed_time": "0:33:28", "remaining_time": "0:38:36", "throughput": 11379.29, "total_tokens": 22858624}
|
|
{"current_steps": 7260, "total_steps": 15621, "loss": 0.5608, "lr": 1.2935041566598016e-06, "epoch": 0.4647589782984444, "percentage": 46.48, "elapsed_time": "0:33:29", "remaining_time": "0:38:34", "throughput": 11383.17, "total_tokens": 22873856}
|
|
{"current_steps": 7265, "total_steps": 15621, "loss": 0.3669, "lr": 1.2924358167416049e-06, "epoch": 0.4650790602394213, "percentage": 46.51, "elapsed_time": "0:33:30", "remaining_time": "0:38:31", "throughput": 11387.16, "total_tokens": 22889600}
|
|
{"current_steps": 7270, "total_steps": 15621, "loss": 0.4085, "lr": 1.2913671117138572e-06, "epoch": 0.4653991421803982, "percentage": 46.54, "elapsed_time": "0:33:30", "remaining_time": "0:38:29", "throughput": 11390.95, "total_tokens": 22904704}
|
|
{"current_steps": 7275, "total_steps": 15621, "loss": 0.3516, "lr": 1.29029804291085e-06, "epoch": 0.46571922412137506, "percentage": 46.57, "elapsed_time": "0:33:31", "remaining_time": "0:38:27", "throughput": 11394.89, "total_tokens": 22920384}
|
|
{"current_steps": 7280, "total_steps": 15621, "loss": 0.3724, "lr": 1.2892286116673269e-06, "epoch": 0.46603930606235194, "percentage": 46.6, "elapsed_time": "0:33:32", "remaining_time": "0:38:25", "throughput": 11399.14, "total_tokens": 22937024}
|
|
{"current_steps": 7285, "total_steps": 15621, "loss": 0.501, "lr": 1.2881588193184865e-06, "epoch": 0.4663593880033289, "percentage": 46.64, "elapsed_time": "0:33:32", "remaining_time": "0:38:23", "throughput": 11403.79, "total_tokens": 22954816}
|
|
{"current_steps": 7290, "total_steps": 15621, "loss": 0.2811, "lr": 1.287088667199977e-06, "epoch": 0.46667946994430576, "percentage": 46.67, "elapsed_time": "0:33:33", "remaining_time": "0:38:21", "throughput": 11407.44, "total_tokens": 22969472}
|
|
{"current_steps": 7295, "total_steps": 15621, "loss": 0.4666, "lr": 1.2860181566478956e-06, "epoch": 0.46699955188528264, "percentage": 46.7, "elapsed_time": "0:33:34", "remaining_time": "0:38:18", "throughput": 11411.09, "total_tokens": 22984192}
|
|
{"current_steps": 7300, "total_steps": 15621, "loss": 0.3772, "lr": 1.2849472889987874e-06, "epoch": 0.4673196338262595, "percentage": 46.73, "elapsed_time": "0:33:34", "remaining_time": "0:38:16", "throughput": 11414.99, "total_tokens": 22999680}
|
|
{"current_steps": 7305, "total_steps": 15621, "loss": 0.3756, "lr": 1.2838760655896431e-06, "epoch": 0.4676397157672364, "percentage": 46.76, "elapsed_time": "0:33:35", "remaining_time": "0:38:14", "throughput": 11418.71, "total_tokens": 23014720}
|
|
{"current_steps": 7310, "total_steps": 15621, "loss": 0.4629, "lr": 1.2828044877578983e-06, "epoch": 0.4679597977082133, "percentage": 46.8, "elapsed_time": "0:33:36", "remaining_time": "0:38:12", "throughput": 11422.74, "total_tokens": 23030528}
|
|
{"current_steps": 7315, "total_steps": 15621, "loss": 0.5176, "lr": 1.2817325568414297e-06, "epoch": 0.4682798796491902, "percentage": 46.83, "elapsed_time": "0:33:36", "remaining_time": "0:38:10", "throughput": 11426.88, "total_tokens": 23046784}
|
|
{"current_steps": 7320, "total_steps": 15621, "loss": 0.3307, "lr": 1.2806602741785562e-06, "epoch": 0.4685999615901671, "percentage": 46.86, "elapsed_time": "0:33:37", "remaining_time": "0:38:07", "throughput": 11430.55, "total_tokens": 23061632}
|
|
{"current_steps": 7325, "total_steps": 15621, "loss": 0.3325, "lr": 1.2795876411080346e-06, "epoch": 0.46892004353114397, "percentage": 46.89, "elapsed_time": "0:33:38", "remaining_time": "0:38:05", "throughput": 11434.66, "total_tokens": 23077888}
|
|
{"current_steps": 7330, "total_steps": 15621, "loss": 0.3222, "lr": 1.278514658969061e-06, "epoch": 0.46924012547212085, "percentage": 46.92, "elapsed_time": "0:33:38", "remaining_time": "0:38:03", "throughput": 11438.56, "total_tokens": 23093568}
|
|
{"current_steps": 7335, "total_steps": 15621, "loss": 0.5175, "lr": 1.2774413291012648e-06, "epoch": 0.46956020741309773, "percentage": 46.96, "elapsed_time": "0:33:39", "remaining_time": "0:38:01", "throughput": 11442.39, "total_tokens": 23108992}
|
|
{"current_steps": 7340, "total_steps": 15621, "loss": 0.4328, "lr": 1.2763676528447122e-06, "epoch": 0.46988028935407467, "percentage": 46.99, "elapsed_time": "0:33:40", "remaining_time": "0:37:59", "throughput": 11446.42, "total_tokens": 23124992}
|
|
{"current_steps": 7345, "total_steps": 15621, "loss": 0.3446, "lr": 1.2752936315399003e-06, "epoch": 0.47020037129505154, "percentage": 47.02, "elapsed_time": "0:33:40", "remaining_time": "0:37:57", "throughput": 11450.74, "total_tokens": 23141888}
|
|
{"current_steps": 7350, "total_steps": 15621, "loss": 0.343, "lr": 1.2742192665277566e-06, "epoch": 0.4705204532360284, "percentage": 47.05, "elapsed_time": "0:33:41", "remaining_time": "0:37:55", "throughput": 11454.76, "total_tokens": 23157888}
|
|
{"current_steps": 7355, "total_steps": 15621, "loss": 0.2838, "lr": 1.2731445591496393e-06, "epoch": 0.4708405351770053, "percentage": 47.08, "elapsed_time": "0:33:42", "remaining_time": "0:37:52", "throughput": 11458.45, "total_tokens": 23172864}
|
|
{"current_steps": 7360, "total_steps": 15621, "loss": 0.456, "lr": 1.2720695107473325e-06, "epoch": 0.4711606171179822, "percentage": 47.12, "elapsed_time": "0:33:43", "remaining_time": "0:37:50", "throughput": 11462.35, "total_tokens": 23188352}
|
|
{"current_steps": 7365, "total_steps": 15621, "loss": 0.3861, "lr": 1.2709941226630475e-06, "epoch": 0.4714806990589591, "percentage": 47.15, "elapsed_time": "0:33:43", "remaining_time": "0:37:48", "throughput": 11466.23, "total_tokens": 23204096}
|
|
{"current_steps": 7370, "total_steps": 15621, "loss": 0.3526, "lr": 1.2699183962394182e-06, "epoch": 0.471800780999936, "percentage": 47.18, "elapsed_time": "0:33:44", "remaining_time": "0:37:46", "throughput": 11469.91, "total_tokens": 23219072}
|
|
{"current_steps": 7375, "total_steps": 15621, "loss": 0.4323, "lr": 1.2688423328195021e-06, "epoch": 0.4721208629409129, "percentage": 47.21, "elapsed_time": "0:33:45", "remaining_time": "0:37:44", "throughput": 11473.81, "total_tokens": 23234560}
|
|
{"current_steps": 7380, "total_steps": 15621, "loss": 0.3497, "lr": 1.267765933746777e-06, "epoch": 0.47244094488188976, "percentage": 47.24, "elapsed_time": "0:33:45", "remaining_time": "0:37:42", "throughput": 11477.78, "total_tokens": 23250304}
|
|
{"current_steps": 7385, "total_steps": 15621, "loss": 0.6383, "lr": 1.2666892003651397e-06, "epoch": 0.47276102682286664, "percentage": 47.28, "elapsed_time": "0:33:46", "remaining_time": "0:37:39", "throughput": 11481.58, "total_tokens": 23265664}
|
|
{"current_steps": 7390, "total_steps": 15621, "loss": 0.453, "lr": 1.2656121340189043e-06, "epoch": 0.4730811087638435, "percentage": 47.31, "elapsed_time": "0:33:47", "remaining_time": "0:37:37", "throughput": 11485.56, "total_tokens": 23281472}
|
|
{"current_steps": 7395, "total_steps": 15621, "loss": 0.4142, "lr": 1.264534736052801e-06, "epoch": 0.47340119070482045, "percentage": 47.34, "elapsed_time": "0:33:47", "remaining_time": "0:37:35", "throughput": 11489.43, "total_tokens": 23297024}
|
|
{"current_steps": 7400, "total_steps": 15621, "loss": 0.4348, "lr": 1.2634570078119739e-06, "epoch": 0.47372127264579733, "percentage": 47.37, "elapsed_time": "0:33:48", "remaining_time": "0:37:33", "throughput": 11493.52, "total_tokens": 23313344}
|
|
{"current_steps": 7405, "total_steps": 15621, "loss": 0.535, "lr": 1.262378950641979e-06, "epoch": 0.4740413545867742, "percentage": 47.4, "elapsed_time": "0:33:49", "remaining_time": "0:37:31", "throughput": 11497.26, "total_tokens": 23328512}
|
|
{"current_steps": 7410, "total_steps": 15621, "loss": 0.444, "lr": 1.2613005658887836e-06, "epoch": 0.4743614365277511, "percentage": 47.44, "elapsed_time": "0:33:49", "remaining_time": "0:37:29", "throughput": 11500.57, "total_tokens": 23342400}
|
|
{"current_steps": 7415, "total_steps": 15621, "loss": 0.4198, "lr": 1.2602218548987637e-06, "epoch": 0.47468151846872797, "percentage": 47.47, "elapsed_time": "0:33:50", "remaining_time": "0:37:26", "throughput": 11504.59, "total_tokens": 23358400}
|
|
{"current_steps": 7420, "total_steps": 15621, "loss": 0.4155, "lr": 1.2591428190187029e-06, "epoch": 0.4750016004097049, "percentage": 47.5, "elapsed_time": "0:33:51", "remaining_time": "0:37:24", "throughput": 11508.28, "total_tokens": 23373376}
|
|
{"current_steps": 7425, "total_steps": 15621, "loss": 0.5093, "lr": 1.2580634595957898e-06, "epoch": 0.4753216823506818, "percentage": 47.53, "elapsed_time": "0:33:51", "remaining_time": "0:37:22", "throughput": 11512.65, "total_tokens": 23390400}
|
|
{"current_steps": 7430, "total_steps": 15621, "loss": 0.3871, "lr": 1.2569837779776172e-06, "epoch": 0.47564176429165866, "percentage": 47.56, "elapsed_time": "0:33:52", "remaining_time": "0:37:20", "throughput": 11516.65, "total_tokens": 23406400}
|
|
{"current_steps": 7435, "total_steps": 15621, "loss": 0.3134, "lr": 1.2559037755121804e-06, "epoch": 0.47596184623263554, "percentage": 47.6, "elapsed_time": "0:33:53", "remaining_time": "0:37:18", "throughput": 11520.47, "total_tokens": 23421824}
|
|
{"current_steps": 7440, "total_steps": 15621, "loss": 0.4599, "lr": 1.2548234535478754e-06, "epoch": 0.4762819281736124, "percentage": 47.63, "elapsed_time": "0:33:53", "remaining_time": "0:37:16", "throughput": 11524.66, "total_tokens": 23438272}
|
|
{"current_steps": 7445, "total_steps": 15621, "loss": 0.4267, "lr": 1.2537428134334968e-06, "epoch": 0.47660201011458936, "percentage": 47.66, "elapsed_time": "0:33:54", "remaining_time": "0:37:14", "throughput": 11528.87, "total_tokens": 23454976}
|
|
{"current_steps": 7450, "total_steps": 15621, "loss": 0.5302, "lr": 1.252661856518236e-06, "epoch": 0.47692209205556624, "percentage": 47.69, "elapsed_time": "0:33:55", "remaining_time": "0:37:12", "throughput": 11532.98, "total_tokens": 23471168}
|
|
{"current_steps": 7455, "total_steps": 15621, "loss": 0.3683, "lr": 1.251580584151681e-06, "epoch": 0.4772421739965431, "percentage": 47.72, "elapsed_time": "0:33:55", "remaining_time": "0:37:09", "throughput": 11536.85, "total_tokens": 23486720}
|
|
{"current_steps": 7460, "total_steps": 15621, "loss": 0.309, "lr": 1.2504989976838129e-06, "epoch": 0.47756225593752, "percentage": 47.76, "elapsed_time": "0:33:56", "remaining_time": "0:37:07", "throughput": 11540.9, "total_tokens": 23502912}
|
|
{"current_steps": 7465, "total_steps": 15621, "loss": 0.3629, "lr": 1.2494170984650048e-06, "epoch": 0.4778823378784969, "percentage": 47.79, "elapsed_time": "0:33:57", "remaining_time": "0:37:05", "throughput": 11545.15, "total_tokens": 23519552}
|
|
{"current_steps": 7470, "total_steps": 15621, "loss": 0.4253, "lr": 1.248334887846021e-06, "epoch": 0.4782024198194738, "percentage": 47.82, "elapsed_time": "0:33:57", "remaining_time": "0:37:03", "throughput": 11549.25, "total_tokens": 23535936}
|
|
{"current_steps": 7475, "total_steps": 15621, "loss": 0.4411, "lr": 1.2472523671780135e-06, "epoch": 0.4785225017604507, "percentage": 47.85, "elapsed_time": "0:33:58", "remaining_time": "0:37:01", "throughput": 11552.94, "total_tokens": 23551040}
|
|
{"current_steps": 7480, "total_steps": 15621, "loss": 0.309, "lr": 1.2461695378125233e-06, "epoch": 0.47884258370142757, "percentage": 47.88, "elapsed_time": "0:33:59", "remaining_time": "0:36:59", "throughput": 11556.62, "total_tokens": 23566208}
|
|
{"current_steps": 7485, "total_steps": 15621, "loss": 0.4347, "lr": 1.245086401101474e-06, "epoch": 0.47916266564240445, "percentage": 47.92, "elapsed_time": "0:33:59", "remaining_time": "0:36:57", "throughput": 11560.45, "total_tokens": 23581696}
|
|
{"current_steps": 7490, "total_steps": 15621, "loss": 0.4439, "lr": 1.2440029583971757e-06, "epoch": 0.47948274758338133, "percentage": 47.95, "elapsed_time": "0:34:00", "remaining_time": "0:36:55", "throughput": 11564.28, "total_tokens": 23597248}
|
|
{"current_steps": 7495, "total_steps": 15621, "loss": 0.502, "lr": 1.2429192110523188e-06, "epoch": 0.4798028295243582, "percentage": 47.98, "elapsed_time": "0:34:01", "remaining_time": "0:36:53", "throughput": 11568.08, "total_tokens": 23612800}
|
|
{"current_steps": 7500, "total_steps": 15621, "loss": 0.3388, "lr": 1.2418351604199746e-06, "epoch": 0.48012291146533514, "percentage": 48.01, "elapsed_time": "0:34:01", "remaining_time": "0:36:50", "throughput": 11572.12, "total_tokens": 23629056}
|
|
{"current_steps": 7505, "total_steps": 15621, "loss": 0.4502, "lr": 1.2407508078535934e-06, "epoch": 0.480442993406312, "percentage": 48.04, "elapsed_time": "0:34:02", "remaining_time": "0:36:48", "throughput": 11575.86, "total_tokens": 23644352}
|
|
{"current_steps": 7510, "total_steps": 15621, "loss": 0.2899, "lr": 1.2396661547070017e-06, "epoch": 0.4807630753472889, "percentage": 48.08, "elapsed_time": "0:34:03", "remaining_time": "0:36:46", "throughput": 11580.07, "total_tokens": 23661120}
|
|
{"current_steps": 7515, "total_steps": 15621, "loss": 0.3362, "lr": 1.238581202334402e-06, "epoch": 0.4810831572882658, "percentage": 48.11, "elapsed_time": "0:34:03", "remaining_time": "0:36:44", "throughput": 11584.19, "total_tokens": 23677632}
|
|
{"current_steps": 7520, "total_steps": 15621, "loss": 0.3676, "lr": 1.2374959520903699e-06, "epoch": 0.48140323922924266, "percentage": 48.14, "elapsed_time": "0:34:04", "remaining_time": "0:36:42", "throughput": 11588.28, "total_tokens": 23693952}
|
|
{"current_steps": 7525, "total_steps": 15621, "loss": 0.3442, "lr": 1.2364104053298531e-06, "epoch": 0.4817233211702196, "percentage": 48.17, "elapsed_time": "0:34:05", "remaining_time": "0:36:40", "throughput": 11591.88, "total_tokens": 23708736}
|
|
{"current_steps": 7530, "total_steps": 15621, "loss": 0.392, "lr": 1.2353245634081692e-06, "epoch": 0.4820434031111965, "percentage": 48.2, "elapsed_time": "0:34:05", "remaining_time": "0:36:38", "throughput": 11595.89, "total_tokens": 23724864}
|
|
{"current_steps": 7535, "total_steps": 15621, "loss": 0.4165, "lr": 1.2342384276810053e-06, "epoch": 0.48236348505217336, "percentage": 48.24, "elapsed_time": "0:34:06", "remaining_time": "0:36:36", "throughput": 11599.62, "total_tokens": 23740160}
|
|
{"current_steps": 7540, "total_steps": 15621, "loss": 0.435, "lr": 1.233151999504414e-06, "epoch": 0.48268356699315024, "percentage": 48.27, "elapsed_time": "0:34:07", "remaining_time": "0:36:34", "throughput": 11603.29, "total_tokens": 23755264}
|
|
{"current_steps": 7545, "total_steps": 15621, "loss": 0.3445, "lr": 1.232065280234814e-06, "epoch": 0.4830036489341271, "percentage": 48.3, "elapsed_time": "0:34:07", "remaining_time": "0:36:32", "throughput": 11606.82, "total_tokens": 23770112}
|
|
{"current_steps": 7550, "total_steps": 15621, "loss": 0.4075, "lr": 1.2309782712289867e-06, "epoch": 0.48332373087510405, "percentage": 48.33, "elapsed_time": "0:34:08", "remaining_time": "0:36:29", "throughput": 11610.63, "total_tokens": 23785536}
|
|
{"current_steps": 7555, "total_steps": 15621, "loss": 0.4257, "lr": 1.2298909738440758e-06, "epoch": 0.48364381281608093, "percentage": 48.36, "elapsed_time": "0:34:09", "remaining_time": "0:36:27", "throughput": 11614.52, "total_tokens": 23801280}
|
|
{"current_steps": 7560, "total_steps": 15621, "loss": 0.3893, "lr": 1.2288033894375847e-06, "epoch": 0.4839638947570578, "percentage": 48.4, "elapsed_time": "0:34:09", "remaining_time": "0:36:25", "throughput": 11618.19, "total_tokens": 23816448}
|
|
{"current_steps": 7565, "total_steps": 15621, "loss": 0.541, "lr": 1.2277155193673755e-06, "epoch": 0.4842839766980347, "percentage": 48.43, "elapsed_time": "0:34:10", "remaining_time": "0:36:23", "throughput": 11622.15, "total_tokens": 23832512}
|
|
{"current_steps": 7570, "total_steps": 15621, "loss": 0.3945, "lr": 1.2266273649916668e-06, "epoch": 0.48460405863901157, "percentage": 48.46, "elapsed_time": "0:34:11", "remaining_time": "0:36:21", "throughput": 11626.03, "total_tokens": 23848192}
|
|
{"current_steps": 7575, "total_steps": 15621, "loss": 0.4394, "lr": 1.2255389276690318e-06, "epoch": 0.48492414057998845, "percentage": 48.49, "elapsed_time": "0:34:11", "remaining_time": "0:36:19", "throughput": 11629.85, "total_tokens": 23863808}
|
|
{"current_steps": 7580, "total_steps": 15621, "loss": 0.3096, "lr": 1.2244502087583978e-06, "epoch": 0.4852442225209654, "percentage": 48.52, "elapsed_time": "0:34:12", "remaining_time": "0:36:17", "throughput": 11634.13, "total_tokens": 23880960}
|
|
{"current_steps": 7585, "total_steps": 15621, "loss": 0.3963, "lr": 1.2233612096190426e-06, "epoch": 0.48556430446194226, "percentage": 48.56, "elapsed_time": "0:34:13", "remaining_time": "0:36:15", "throughput": 11637.83, "total_tokens": 23896256}
|
|
{"current_steps": 7590, "total_steps": 15621, "loss": 0.5109, "lr": 1.222271931610595e-06, "epoch": 0.48588438640291914, "percentage": 48.59, "elapsed_time": "0:34:14", "remaining_time": "0:36:13", "throughput": 11641.95, "total_tokens": 23912832}
|
|
{"current_steps": 7595, "total_steps": 15621, "loss": 0.4938, "lr": 1.2211823760930306e-06, "epoch": 0.486204468343896, "percentage": 48.62, "elapsed_time": "0:34:14", "remaining_time": "0:36:11", "throughput": 11645.87, "total_tokens": 23928768}
|
|
{"current_steps": 7600, "total_steps": 15621, "loss": 0.4297, "lr": 1.2200925444266726e-06, "epoch": 0.4865245502848729, "percentage": 48.65, "elapsed_time": "0:34:15", "remaining_time": "0:36:09", "throughput": 11649.9, "total_tokens": 23945088}
|
|
{"current_steps": 7605, "total_steps": 15621, "loss": 0.5101, "lr": 1.219002437972189e-06, "epoch": 0.48684463222584984, "percentage": 48.68, "elapsed_time": "0:34:16", "remaining_time": "0:36:07", "throughput": 11653.53, "total_tokens": 23960192}
|
|
{"current_steps": 7610, "total_steps": 15621, "loss": 0.4324, "lr": 1.21791205809059e-06, "epoch": 0.4871647141668267, "percentage": 48.72, "elapsed_time": "0:34:16", "remaining_time": "0:36:05", "throughput": 11657.73, "total_tokens": 23977152}
|
|
{"current_steps": 7615, "total_steps": 15621, "loss": 0.3628, "lr": 1.2168214061432283e-06, "epoch": 0.4874847961078036, "percentage": 48.75, "elapsed_time": "0:34:17", "remaining_time": "0:36:03", "throughput": 11661.38, "total_tokens": 23992448}
|
|
{"current_steps": 7620, "total_steps": 15621, "loss": 0.4397, "lr": 1.2157304834917947e-06, "epoch": 0.4878048780487805, "percentage": 48.78, "elapsed_time": "0:34:18", "remaining_time": "0:36:01", "throughput": 11665.25, "total_tokens": 24008384}
|
|
{"current_steps": 7625, "total_steps": 15621, "loss": 0.6103, "lr": 1.2146392914983202e-06, "epoch": 0.48812495998975736, "percentage": 48.81, "elapsed_time": "0:34:18", "remaining_time": "0:35:59", "throughput": 11669.56, "total_tokens": 24025728}
|
|
{"current_steps": 7630, "total_steps": 15621, "loss": 0.51, "lr": 1.2135478315251694e-06, "epoch": 0.4884450419307343, "percentage": 48.84, "elapsed_time": "0:34:19", "remaining_time": "0:35:56", "throughput": 11673.06, "total_tokens": 24040448}
|
|
{"current_steps": 7635, "total_steps": 15621, "loss": 0.36, "lr": 1.2124561049350442e-06, "epoch": 0.48876512387171117, "percentage": 48.88, "elapsed_time": "0:34:20", "remaining_time": "0:35:54", "throughput": 11676.55, "total_tokens": 24055168}
|
|
{"current_steps": 7640, "total_steps": 15621, "loss": 0.4474, "lr": 1.2113641130909772e-06, "epoch": 0.48908520581268805, "percentage": 48.91, "elapsed_time": "0:34:20", "remaining_time": "0:35:52", "throughput": 11680.07, "total_tokens": 24070016}
|
|
{"current_steps": 7645, "total_steps": 15621, "loss": 0.3074, "lr": 1.2102718573563334e-06, "epoch": 0.48940528775366493, "percentage": 48.94, "elapsed_time": "0:34:21", "remaining_time": "0:35:50", "throughput": 11683.58, "total_tokens": 24084800}
|
|
{"current_steps": 7650, "total_steps": 15621, "loss": 0.4884, "lr": 1.2091793390948066e-06, "epoch": 0.4897253696946418, "percentage": 48.97, "elapsed_time": "0:34:22", "remaining_time": "0:35:48", "throughput": 11687.36, "total_tokens": 24100416}
|
|
{"current_steps": 7655, "total_steps": 15621, "loss": 0.2873, "lr": 1.2080865596704191e-06, "epoch": 0.49004545163561875, "percentage": 49.0, "elapsed_time": "0:34:22", "remaining_time": "0:35:46", "throughput": 11691.46, "total_tokens": 24117120}
|
|
{"current_steps": 7660, "total_steps": 15621, "loss": 0.4317, "lr": 1.2069935204475187e-06, "epoch": 0.4903655335765956, "percentage": 49.04, "elapsed_time": "0:34:23", "remaining_time": "0:35:44", "throughput": 11695.06, "total_tokens": 24132224}
|
|
{"current_steps": 7665, "total_steps": 15621, "loss": 0.4037, "lr": 1.2059002227907776e-06, "epoch": 0.4906856155175725, "percentage": 49.07, "elapsed_time": "0:34:24", "remaining_time": "0:35:42", "throughput": 11698.76, "total_tokens": 24147712}
|
|
{"current_steps": 7670, "total_steps": 15621, "loss": 0.408, "lr": 1.2048066680651908e-06, "epoch": 0.4910056974585494, "percentage": 49.1, "elapsed_time": "0:34:24", "remaining_time": "0:35:40", "throughput": 11702.83, "total_tokens": 24164288}
|
|
{"current_steps": 7675, "total_steps": 15621, "loss": 0.5751, "lr": 1.2037128576360743e-06, "epoch": 0.49132577939952626, "percentage": 49.13, "elapsed_time": "0:34:26", "remaining_time": "0:35:38", "throughput": 11710.41, "total_tokens": 24193728}
|
|
{"current_steps": 7680, "total_steps": 15621, "loss": 0.406, "lr": 1.2026187928690627e-06, "epoch": 0.49164586134050314, "percentage": 49.16, "elapsed_time": "0:34:26", "remaining_time": "0:35:36", "throughput": 11714.03, "total_tokens": 24208832}
|
|
{"current_steps": 7685, "total_steps": 15621, "loss": 0.5004, "lr": 1.2015244751301098e-06, "epoch": 0.4919659432814801, "percentage": 49.2, "elapsed_time": "0:34:27", "remaining_time": "0:35:34", "throughput": 11717.48, "total_tokens": 24223424}
|
|
{"current_steps": 7690, "total_steps": 15621, "loss": 0.444, "lr": 1.2004299057854832e-06, "epoch": 0.49228602522245696, "percentage": 49.23, "elapsed_time": "0:34:27", "remaining_time": "0:35:32", "throughput": 11721.21, "total_tokens": 24238976}
|
|
{"current_steps": 7695, "total_steps": 15621, "loss": 0.3837, "lr": 1.1993350862017661e-06, "epoch": 0.49260610716343384, "percentage": 49.26, "elapsed_time": "0:34:28", "remaining_time": "0:35:30", "throughput": 11724.67, "total_tokens": 24253632}
|
|
{"current_steps": 7700, "total_steps": 15621, "loss": 0.4074, "lr": 1.1982400177458534e-06, "epoch": 0.4929261891044107, "percentage": 49.29, "elapsed_time": "0:34:29", "remaining_time": "0:35:28", "throughput": 11728.94, "total_tokens": 24270720}
|
|
{"current_steps": 7705, "total_steps": 15621, "loss": 0.4385, "lr": 1.197144701784951e-06, "epoch": 0.4932462710453876, "percentage": 49.32, "elapsed_time": "0:34:29", "remaining_time": "0:35:26", "throughput": 11732.34, "total_tokens": 24285312}
|
|
{"current_steps": 7710, "total_steps": 15621, "loss": 0.409, "lr": 1.1960491396865735e-06, "epoch": 0.49356635298636453, "percentage": 49.36, "elapsed_time": "0:34:30", "remaining_time": "0:35:24", "throughput": 11735.9, "total_tokens": 24300352}
|
|
{"current_steps": 7715, "total_steps": 15621, "loss": 0.3518, "lr": 1.1949533328185435e-06, "epoch": 0.4938864349273414, "percentage": 49.39, "elapsed_time": "0:34:31", "remaining_time": "0:35:22", "throughput": 11739.99, "total_tokens": 24317056}
|
|
{"current_steps": 7720, "total_steps": 15621, "loss": 0.3705, "lr": 1.1938572825489883e-06, "epoch": 0.4942065168683183, "percentage": 49.42, "elapsed_time": "0:34:31", "remaining_time": "0:35:20", "throughput": 11743.91, "total_tokens": 24333184}
|
|
{"current_steps": 7725, "total_steps": 15621, "loss": 0.4313, "lr": 1.1927609902463394e-06, "epoch": 0.49452659880929517, "percentage": 49.45, "elapsed_time": "0:34:32", "remaining_time": "0:35:18", "throughput": 11747.57, "total_tokens": 24348672}
|
|
{"current_steps": 7730, "total_steps": 15621, "loss": 0.4342, "lr": 1.1916644572793314e-06, "epoch": 0.49484668075027205, "percentage": 49.48, "elapsed_time": "0:34:33", "remaining_time": "0:35:16", "throughput": 11751.08, "total_tokens": 24363648}
|
|
{"current_steps": 7735, "total_steps": 15621, "loss": 0.4951, "lr": 1.190567685016998e-06, "epoch": 0.495166762691249, "percentage": 49.52, "elapsed_time": "0:34:34", "remaining_time": "0:35:14", "throughput": 11755.37, "total_tokens": 24380992}
|
|
{"current_steps": 7740, "total_steps": 15621, "loss": 0.4152, "lr": 1.189470674828672e-06, "epoch": 0.49548684463222586, "percentage": 49.55, "elapsed_time": "0:34:34", "remaining_time": "0:35:12", "throughput": 11758.83, "total_tokens": 24395776}
|
|
{"current_steps": 7745, "total_steps": 15621, "loss": 0.3851, "lr": 1.188373428083984e-06, "epoch": 0.49580692657320274, "percentage": 49.58, "elapsed_time": "0:34:35", "remaining_time": "0:35:10", "throughput": 11762.63, "total_tokens": 24411584}
|
|
{"current_steps": 7750, "total_steps": 15621, "loss": 0.5355, "lr": 1.1872759461528596e-06, "epoch": 0.4961270085141796, "percentage": 49.61, "elapsed_time": "0:34:35", "remaining_time": "0:35:08", "throughput": 11766.17, "total_tokens": 24426560}
|
|
{"current_steps": 7755, "total_steps": 15621, "loss": 0.4046, "lr": 1.1861782304055174e-06, "epoch": 0.4964470904551565, "percentage": 49.64, "elapsed_time": "0:34:36", "remaining_time": "0:35:06", "throughput": 11769.81, "total_tokens": 24441856}
|
|
{"current_steps": 7760, "total_steps": 15621, "loss": 0.3269, "lr": 1.1850802822124686e-06, "epoch": 0.4967671723961334, "percentage": 49.68, "elapsed_time": "0:34:37", "remaining_time": "0:35:04", "throughput": 11773.54, "total_tokens": 24457472}
|
|
{"current_steps": 7765, "total_steps": 15621, "loss": 0.5104, "lr": 1.1839821029445143e-06, "epoch": 0.4970872543371103, "percentage": 49.71, "elapsed_time": "0:34:37", "remaining_time": "0:35:02", "throughput": 11776.93, "total_tokens": 24471936}
|
|
{"current_steps": 7770, "total_steps": 15621, "loss": 0.3332, "lr": 1.1828836939727442e-06, "epoch": 0.4974073362780872, "percentage": 49.74, "elapsed_time": "0:34:38", "remaining_time": "0:35:00", "throughput": 11780.68, "total_tokens": 24487616}
|
|
{"current_steps": 7775, "total_steps": 15621, "loss": 0.4292, "lr": 1.181785056668535e-06, "epoch": 0.4977274182190641, "percentage": 49.77, "elapsed_time": "0:34:39", "remaining_time": "0:34:58", "throughput": 11784.58, "total_tokens": 24503936}
|
|
{"current_steps": 7780, "total_steps": 15621, "loss": 0.429, "lr": 1.180686192403548e-06, "epoch": 0.49804750016004096, "percentage": 49.8, "elapsed_time": "0:34:39", "remaining_time": "0:34:56", "throughput": 11787.97, "total_tokens": 24518464}
|
|
{"current_steps": 7785, "total_steps": 15621, "loss": 0.3479, "lr": 1.1795871025497285e-06, "epoch": 0.49836758210101784, "percentage": 49.84, "elapsed_time": "0:34:40", "remaining_time": "0:34:54", "throughput": 11791.39, "total_tokens": 24533184}
|
|
{"current_steps": 7790, "total_steps": 15621, "loss": 0.4288, "lr": 1.1784877884793029e-06, "epoch": 0.49868766404199477, "percentage": 49.87, "elapsed_time": "0:34:41", "remaining_time": "0:34:52", "throughput": 11795.18, "total_tokens": 24548992}
|
|
{"current_steps": 7795, "total_steps": 15621, "loss": 0.3681, "lr": 1.1773882515647776e-06, "epoch": 0.49900774598297165, "percentage": 49.9, "elapsed_time": "0:34:42", "remaining_time": "0:34:50", "throughput": 11799.47, "total_tokens": 24566592}
|
|
{"current_steps": 7800, "total_steps": 15621, "loss": 0.4776, "lr": 1.1762884931789376e-06, "epoch": 0.49932782792394853, "percentage": 49.93, "elapsed_time": "0:34:42", "remaining_time": "0:34:48", "throughput": 11803.61, "total_tokens": 24583552}
|
|
{"current_steps": 7805, "total_steps": 15621, "loss": 0.4538, "lr": 1.1751885146948436e-06, "epoch": 0.4996479098649254, "percentage": 49.96, "elapsed_time": "0:34:43", "remaining_time": "0:34:46", "throughput": 11807.42, "total_tokens": 24599552}
|
|
{"current_steps": 7810, "total_steps": 15621, "loss": 0.3799, "lr": 1.1740883174858327e-06, "epoch": 0.4999679918059023, "percentage": 50.0, "elapsed_time": "0:34:44", "remaining_time": "0:34:44", "throughput": 11811.03, "total_tokens": 24614912}
|
|
{"current_steps": 7815, "total_steps": 15621, "loss": 0.3643, "lr": 1.1729879029255127e-06, "epoch": 0.5002880737468792, "percentage": 50.03, "elapsed_time": "0:34:44", "remaining_time": "0:34:42", "throughput": 11814.49, "total_tokens": 24629696}
|
|
{"current_steps": 7820, "total_steps": 15621, "loss": 0.3997, "lr": 1.171887272387765e-06, "epoch": 0.5006081556878561, "percentage": 50.06, "elapsed_time": "0:34:45", "remaining_time": "0:34:40", "throughput": 11818.44, "total_tokens": 24646208}
|
|
{"current_steps": 7820, "total_steps": 15621, "eval_loss": 0.4178144633769989, "epoch": 0.5006081556878561, "percentage": 50.06, "elapsed_time": "0:35:36", "remaining_time": "0:35:30", "throughput": 11537.51, "total_tokens": 24646208}
|
|
{"current_steps": 7825, "total_steps": 15621, "loss": 0.4907, "lr": 1.1707864272467397e-06, "epoch": 0.500928237628833, "percentage": 50.09, "elapsed_time": "0:38:45", "remaining_time": "0:38:37", "throughput": 10603.43, "total_tokens": 24661120}
|
|
{"current_steps": 7830, "total_steps": 15621, "loss": 0.4269, "lr": 1.169685368876855e-06, "epoch": 0.5012483195698099, "percentage": 50.12, "elapsed_time": "0:38:46", "remaining_time": "0:38:34", "throughput": 10607.59, "total_tokens": 24678336}
|
|
{"current_steps": 7835, "total_steps": 15621, "loss": 0.5471, "lr": 1.1685840986527946e-06, "epoch": 0.5015684015107867, "percentage": 50.16, "elapsed_time": "0:38:47", "remaining_time": "0:38:32", "throughput": 10611.34, "total_tokens": 24694336}
|
|
{"current_steps": 7840, "total_steps": 15621, "loss": 0.3986, "lr": 1.1674826179495076e-06, "epoch": 0.5018884834517636, "percentage": 50.19, "elapsed_time": "0:38:47", "remaining_time": "0:38:30", "throughput": 10614.61, "total_tokens": 24708608}
|
|
{"current_steps": 7845, "total_steps": 15621, "loss": 0.4302, "lr": 1.1663809281422056e-06, "epoch": 0.5022085653927405, "percentage": 50.22, "elapsed_time": "0:38:48", "remaining_time": "0:38:28", "throughput": 10618.37, "total_tokens": 24724672}
|
|
{"current_steps": 7850, "total_steps": 15621, "loss": 0.4556, "lr": 1.1652790306063615e-06, "epoch": 0.5025286473337174, "percentage": 50.25, "elapsed_time": "0:38:49", "remaining_time": "0:38:25", "throughput": 10622.07, "total_tokens": 24740608}
|
|
{"current_steps": 7855, "total_steps": 15621, "loss": 0.4065, "lr": 1.164176926717707e-06, "epoch": 0.5028487292746944, "percentage": 50.28, "elapsed_time": "0:38:49", "remaining_time": "0:38:23", "throughput": 10626.38, "total_tokens": 24758528}
|
|
{"current_steps": 7860, "total_steps": 15621, "loss": 0.3737, "lr": 1.1630746178522315e-06, "epoch": 0.5031688112156713, "percentage": 50.32, "elapsed_time": "0:38:50", "remaining_time": "0:38:21", "throughput": 10629.7, "total_tokens": 24772992}
|
|
{"current_steps": 7865, "total_steps": 15621, "loss": 0.4417, "lr": 1.1619721053861816e-06, "epoch": 0.5034888931566481, "percentage": 50.35, "elapsed_time": "0:38:51", "remaining_time": "0:38:18", "throughput": 10633.19, "total_tokens": 24788160}
|
|
{"current_steps": 7870, "total_steps": 15621, "loss": 0.4104, "lr": 1.1608693906960558e-06, "epoch": 0.503808975097625, "percentage": 50.38, "elapsed_time": "0:38:51", "remaining_time": "0:38:16", "throughput": 10636.96, "total_tokens": 24804224}
|
|
{"current_steps": 7875, "total_steps": 15621, "loss": 0.4523, "lr": 1.1597664751586069e-06, "epoch": 0.5041290570386019, "percentage": 50.41, "elapsed_time": "0:38:52", "remaining_time": "0:38:14", "throughput": 10640.91, "total_tokens": 24820928}
|
|
{"current_steps": 7880, "total_steps": 15621, "loss": 0.3953, "lr": 1.1586633601508382e-06, "epoch": 0.5044491389795788, "percentage": 50.44, "elapsed_time": "0:38:53", "remaining_time": "0:38:12", "throughput": 10644.31, "total_tokens": 24835776}
|
|
{"current_steps": 7885, "total_steps": 15621, "loss": 0.3764, "lr": 1.1575600470500014e-06, "epoch": 0.5047692209205557, "percentage": 50.48, "elapsed_time": "0:38:53", "remaining_time": "0:38:09", "throughput": 10648.01, "total_tokens": 24851648}
|
|
{"current_steps": 7890, "total_steps": 15621, "loss": 0.4222, "lr": 1.1564565372335957e-06, "epoch": 0.5050893028615325, "percentage": 50.51, "elapsed_time": "0:38:54", "remaining_time": "0:38:07", "throughput": 10651.54, "total_tokens": 24866880}
|
|
{"current_steps": 7895, "total_steps": 15621, "loss": 0.3276, "lr": 1.1553528320793663e-06, "epoch": 0.5054093848025094, "percentage": 50.54, "elapsed_time": "0:38:55", "remaining_time": "0:38:05", "throughput": 10654.98, "total_tokens": 24881856}
|
|
{"current_steps": 7900, "total_steps": 15621, "loss": 0.4327, "lr": 1.1542489329653022e-06, "epoch": 0.5057294667434863, "percentage": 50.57, "elapsed_time": "0:38:55", "remaining_time": "0:38:03", "throughput": 10658.95, "total_tokens": 24898560}
|
|
{"current_steps": 7905, "total_steps": 15621, "loss": 0.3841, "lr": 1.1531448412696343e-06, "epoch": 0.5060495486844632, "percentage": 50.6, "elapsed_time": "0:38:56", "remaining_time": "0:38:00", "throughput": 10662.29, "total_tokens": 24913216}
|
|
{"current_steps": 7910, "total_steps": 15621, "loss": 0.5014, "lr": 1.1520405583708337e-06, "epoch": 0.5063696306254402, "percentage": 50.64, "elapsed_time": "0:38:57", "remaining_time": "0:37:58", "throughput": 10665.89, "total_tokens": 24928832}
|
|
{"current_steps": 7915, "total_steps": 15621, "loss": 0.4926, "lr": 1.1509360856476109e-06, "epoch": 0.506689712566417, "percentage": 50.67, "elapsed_time": "0:38:57", "remaining_time": "0:37:56", "throughput": 10669.52, "total_tokens": 24944512}
|
|
{"current_steps": 7920, "total_steps": 15621, "loss": 0.4731, "lr": 1.149831424478913e-06, "epoch": 0.5070097945073939, "percentage": 50.7, "elapsed_time": "0:38:58", "remaining_time": "0:37:53", "throughput": 10673.01, "total_tokens": 24959744}
|
|
{"current_steps": 7925, "total_steps": 15621, "loss": 0.3939, "lr": 1.1487265762439224e-06, "epoch": 0.5073298764483708, "percentage": 50.73, "elapsed_time": "0:38:59", "remaining_time": "0:37:51", "throughput": 10676.65, "total_tokens": 24975488}
|
|
{"current_steps": 7930, "total_steps": 15621, "loss": 0.3612, "lr": 1.1476215423220547e-06, "epoch": 0.5076499583893477, "percentage": 50.76, "elapsed_time": "0:38:59", "remaining_time": "0:37:49", "throughput": 10680.01, "total_tokens": 24990272}
|
|
{"current_steps": 7935, "total_steps": 15621, "loss": 0.3724, "lr": 1.146516324092959e-06, "epoch": 0.5079700403303246, "percentage": 50.8, "elapsed_time": "0:39:00", "remaining_time": "0:37:47", "throughput": 10683.72, "total_tokens": 25006272}
|
|
{"current_steps": 7940, "total_steps": 15621, "loss": 0.2965, "lr": 1.1454109229365117e-06, "epoch": 0.5082901222713014, "percentage": 50.83, "elapsed_time": "0:39:01", "remaining_time": "0:37:44", "throughput": 10687.49, "total_tokens": 25022464}
|
|
{"current_steps": 7945, "total_steps": 15621, "loss": 0.3151, "lr": 1.14430534023282e-06, "epoch": 0.5086102042122783, "percentage": 50.86, "elapsed_time": "0:39:01", "remaining_time": "0:37:42", "throughput": 10690.89, "total_tokens": 25037376}
|
|
{"current_steps": 7950, "total_steps": 15621, "loss": 0.4737, "lr": 1.1431995773622167e-06, "epoch": 0.5089302861532552, "percentage": 50.89, "elapsed_time": "0:39:02", "remaining_time": "0:37:40", "throughput": 10694.62, "total_tokens": 25053440}
|
|
{"current_steps": 7955, "total_steps": 15621, "loss": 0.4343, "lr": 1.1420936357052597e-06, "epoch": 0.5092503680942321, "percentage": 50.93, "elapsed_time": "0:39:03", "remaining_time": "0:37:38", "throughput": 10698.24, "total_tokens": 25069120}
|
|
{"current_steps": 7960, "total_steps": 15621, "loss": 0.3024, "lr": 1.1409875166427303e-06, "epoch": 0.5095704500352091, "percentage": 50.96, "elapsed_time": "0:39:03", "remaining_time": "0:37:35", "throughput": 10701.71, "total_tokens": 25084224}
|
|
{"current_steps": 7965, "total_steps": 15621, "loss": 0.5023, "lr": 1.1398812215556308e-06, "epoch": 0.509890531976186, "percentage": 50.99, "elapsed_time": "0:39:04", "remaining_time": "0:37:33", "throughput": 10705.23, "total_tokens": 25099520}
|
|
{"current_steps": 7970, "total_steps": 15621, "loss": 0.372, "lr": 1.1387747518251837e-06, "epoch": 0.5102106139171628, "percentage": 51.02, "elapsed_time": "0:39:05", "remaining_time": "0:37:31", "throughput": 10708.84, "total_tokens": 25115200}
|
|
{"current_steps": 7975, "total_steps": 15621, "loss": 0.3171, "lr": 1.13766810883283e-06, "epoch": 0.5105306958581397, "percentage": 51.05, "elapsed_time": "0:39:05", "remaining_time": "0:37:29", "throughput": 10712.63, "total_tokens": 25131520}
|
|
{"current_steps": 7980, "total_steps": 15621, "loss": 0.5088, "lr": 1.1365612939602255e-06, "epoch": 0.5108507777991166, "percentage": 51.09, "elapsed_time": "0:39:06", "remaining_time": "0:37:26", "throughput": 10716.38, "total_tokens": 25147776}
|
|
{"current_steps": 7985, "total_steps": 15621, "loss": 0.3884, "lr": 1.1354543085892423e-06, "epoch": 0.5111708597400935, "percentage": 51.12, "elapsed_time": "0:39:07", "remaining_time": "0:37:24", "throughput": 10719.79, "total_tokens": 25162816}
|
|
{"current_steps": 7990, "total_steps": 15621, "loss": 0.3417, "lr": 1.1343471541019646e-06, "epoch": 0.5114909416810703, "percentage": 51.15, "elapsed_time": "0:39:08", "remaining_time": "0:37:22", "throughput": 10723.47, "total_tokens": 25178752}
|
|
{"current_steps": 7995, "total_steps": 15621, "loss": 0.3672, "lr": 1.1332398318806872e-06, "epoch": 0.5118110236220472, "percentage": 51.18, "elapsed_time": "0:39:08", "remaining_time": "0:37:20", "throughput": 10726.94, "total_tokens": 25194048}
|
|
{"current_steps": 8000, "total_steps": 15621, "loss": 0.3787, "lr": 1.1321323433079158e-06, "epoch": 0.5121311055630241, "percentage": 51.21, "elapsed_time": "0:39:09", "remaining_time": "0:37:18", "throughput": 10730.41, "total_tokens": 25209216}
|
|
{"current_steps": 8005, "total_steps": 15621, "loss": 0.3897, "lr": 1.1310246897663623e-06, "epoch": 0.512451187504001, "percentage": 51.25, "elapsed_time": "0:39:09", "remaining_time": "0:37:15", "throughput": 10733.93, "total_tokens": 25224640}
|
|
{"current_steps": 8010, "total_steps": 15621, "loss": 0.4115, "lr": 1.1299168726389447e-06, "epoch": 0.5127712694449779, "percentage": 51.28, "elapsed_time": "0:39:10", "remaining_time": "0:37:13", "throughput": 10737.35, "total_tokens": 25239808}
|
|
{"current_steps": 8015, "total_steps": 15621, "loss": 0.346, "lr": 1.1288088933087868e-06, "epoch": 0.5130913513859549, "percentage": 51.31, "elapsed_time": "0:39:11", "remaining_time": "0:37:11", "throughput": 10741.49, "total_tokens": 25257344}
|
|
{"current_steps": 8020, "total_steps": 15621, "loss": 0.3318, "lr": 1.1277007531592127e-06, "epoch": 0.5134114333269317, "percentage": 51.34, "elapsed_time": "0:39:12", "remaining_time": "0:37:09", "throughput": 10744.79, "total_tokens": 25272064}
|
|
{"current_steps": 8025, "total_steps": 15621, "loss": 0.3698, "lr": 1.1265924535737492e-06, "epoch": 0.5137315152679086, "percentage": 51.37, "elapsed_time": "0:39:12", "remaining_time": "0:37:06", "throughput": 10748.45, "total_tokens": 25287936}
|
|
{"current_steps": 8030, "total_steps": 15621, "loss": 0.3019, "lr": 1.125483995936121e-06, "epoch": 0.5140515972088855, "percentage": 51.41, "elapsed_time": "0:39:13", "remaining_time": "0:37:04", "throughput": 10751.91, "total_tokens": 25303232}
|
|
{"current_steps": 8035, "total_steps": 15621, "loss": 0.3742, "lr": 1.1243753816302507e-06, "epoch": 0.5143716791498624, "percentage": 51.44, "elapsed_time": "0:39:14", "remaining_time": "0:37:02", "throughput": 10755.4, "total_tokens": 25318656}
|
|
{"current_steps": 8040, "total_steps": 15621, "loss": 0.4047, "lr": 1.1232666120402558e-06, "epoch": 0.5146917610908393, "percentage": 51.47, "elapsed_time": "0:39:14", "remaining_time": "0:37:00", "throughput": 10758.8, "total_tokens": 25333760}
|
|
{"current_steps": 8045, "total_steps": 15621, "loss": 0.3819, "lr": 1.1221576885504487e-06, "epoch": 0.5150118430318161, "percentage": 51.5, "elapsed_time": "0:39:15", "remaining_time": "0:36:58", "throughput": 10762.48, "total_tokens": 25349824}
|
|
{"current_steps": 8050, "total_steps": 15621, "loss": 0.3978, "lr": 1.121048612545333e-06, "epoch": 0.515331924972793, "percentage": 51.53, "elapsed_time": "0:39:16", "remaining_time": "0:36:55", "throughput": 10766.03, "total_tokens": 25365376}
|
|
{"current_steps": 8055, "total_steps": 15621, "loss": 0.459, "lr": 1.1199393854096034e-06, "epoch": 0.5156520069137699, "percentage": 51.57, "elapsed_time": "0:39:16", "remaining_time": "0:36:53", "throughput": 10769.57, "total_tokens": 25380928}
|
|
{"current_steps": 8060, "total_steps": 15621, "loss": 0.3448, "lr": 1.118830008528143e-06, "epoch": 0.5159720888547468, "percentage": 51.6, "elapsed_time": "0:39:17", "remaining_time": "0:36:51", "throughput": 10773.06, "total_tokens": 25396352}
|
|
{"current_steps": 8065, "total_steps": 15621, "loss": 0.3084, "lr": 1.1177204832860212e-06, "epoch": 0.5162921707957238, "percentage": 51.63, "elapsed_time": "0:39:18", "remaining_time": "0:36:49", "throughput": 10776.47, "total_tokens": 25411456}
|
|
{"current_steps": 8070, "total_steps": 15621, "loss": 0.4402, "lr": 1.1166108110684947e-06, "epoch": 0.5166122527367006, "percentage": 51.66, "elapsed_time": "0:39:18", "remaining_time": "0:36:47", "throughput": 10780.45, "total_tokens": 25428544}
|
|
{"current_steps": 8075, "total_steps": 15621, "loss": 0.4209, "lr": 1.1155009932610003e-06, "epoch": 0.5169323346776775, "percentage": 51.69, "elapsed_time": "0:39:19", "remaining_time": "0:36:44", "throughput": 10783.93, "total_tokens": 25443968}
|
|
{"current_steps": 8080, "total_steps": 15621, "loss": 0.3319, "lr": 1.1143910312491605e-06, "epoch": 0.5172524166186544, "percentage": 51.73, "elapsed_time": "0:39:20", "remaining_time": "0:36:42", "throughput": 10787.29, "total_tokens": 25458880}
|
|
{"current_steps": 8085, "total_steps": 15621, "loss": 0.3206, "lr": 1.1132809264187748e-06, "epoch": 0.5175724985596313, "percentage": 51.76, "elapsed_time": "0:39:20", "remaining_time": "0:36:40", "throughput": 10790.77, "total_tokens": 25474304}
|
|
{"current_steps": 8090, "total_steps": 15621, "loss": 0.4119, "lr": 1.1121706801558226e-06, "epoch": 0.5178925805006082, "percentage": 51.79, "elapsed_time": "0:39:21", "remaining_time": "0:36:38", "throughput": 10794.18, "total_tokens": 25489472}
|
|
{"current_steps": 8095, "total_steps": 15621, "loss": 0.3921, "lr": 1.111060293846459e-06, "epoch": 0.518212662441585, "percentage": 51.82, "elapsed_time": "0:39:22", "remaining_time": "0:36:36", "throughput": 10797.65, "total_tokens": 25504896}
|
|
{"current_steps": 8100, "total_steps": 15621, "loss": 0.4749, "lr": 1.1099497688770148e-06, "epoch": 0.5185327443825619, "percentage": 51.85, "elapsed_time": "0:39:22", "remaining_time": "0:36:33", "throughput": 10800.85, "total_tokens": 25519360}
|
|
{"current_steps": 8105, "total_steps": 15621, "loss": 0.4449, "lr": 1.1088391066339928e-06, "epoch": 0.5188528263235388, "percentage": 51.89, "elapsed_time": "0:39:23", "remaining_time": "0:36:31", "throughput": 10804.58, "total_tokens": 25535680}
|
|
{"current_steps": 8110, "total_steps": 15621, "loss": 0.5377, "lr": 1.1077283085040684e-06, "epoch": 0.5191729082645157, "percentage": 51.92, "elapsed_time": "0:39:24", "remaining_time": "0:36:29", "throughput": 10807.91, "total_tokens": 25550592}
|
|
{"current_steps": 8115, "total_steps": 15621, "loss": 0.3997, "lr": 1.1066173758740863e-06, "epoch": 0.5194929902054926, "percentage": 51.95, "elapsed_time": "0:39:24", "remaining_time": "0:36:27", "throughput": 10811.27, "total_tokens": 25565696}
|
|
{"current_steps": 8120, "total_steps": 15621, "loss": 0.3523, "lr": 1.105506310131058e-06, "epoch": 0.5198130721464695, "percentage": 51.98, "elapsed_time": "0:39:25", "remaining_time": "0:36:25", "throughput": 10814.9, "total_tokens": 25581568}
|
|
{"current_steps": 8125, "total_steps": 15621, "loss": 0.4599, "lr": 1.1043951126621634e-06, "epoch": 0.5201331540874464, "percentage": 52.01, "elapsed_time": "0:39:26", "remaining_time": "0:36:22", "throughput": 10818.56, "total_tokens": 25597760}
|
|
{"current_steps": 8130, "total_steps": 15621, "loss": 0.4081, "lr": 1.1032837848547445e-06, "epoch": 0.5204532360284233, "percentage": 52.05, "elapsed_time": "0:39:26", "remaining_time": "0:36:20", "throughput": 10822.66, "total_tokens": 25615424}
|
|
{"current_steps": 8135, "total_steps": 15621, "loss": 0.4117, "lr": 1.1021723280963074e-06, "epoch": 0.5207733179694002, "percentage": 52.08, "elapsed_time": "0:39:27", "remaining_time": "0:36:18", "throughput": 10826.11, "total_tokens": 25630720}
|
|
{"current_steps": 8140, "total_steps": 15621, "loss": 0.5029, "lr": 1.1010607437745194e-06, "epoch": 0.5210933999103771, "percentage": 52.11, "elapsed_time": "0:39:28", "remaining_time": "0:36:16", "throughput": 10830.47, "total_tokens": 25649280}
|
|
{"current_steps": 8145, "total_steps": 15621, "loss": 0.5131, "lr": 1.0999490332772057e-06, "epoch": 0.5214134818513539, "percentage": 52.14, "elapsed_time": "0:39:28", "remaining_time": "0:36:14", "throughput": 10833.87, "total_tokens": 25664576}
|
|
{"current_steps": 8150, "total_steps": 15621, "loss": 0.426, "lr": 1.0988371979923507e-06, "epoch": 0.5217335637923308, "percentage": 52.17, "elapsed_time": "0:39:29", "remaining_time": "0:36:12", "throughput": 10837.46, "total_tokens": 25680384}
|
|
{"current_steps": 8155, "total_steps": 15621, "loss": 0.4235, "lr": 1.097725239308094e-06, "epoch": 0.5220536457333077, "percentage": 52.21, "elapsed_time": "0:39:30", "remaining_time": "0:36:10", "throughput": 10841.02, "total_tokens": 25696128}
|
|
{"current_steps": 8160, "total_steps": 15621, "loss": 0.2819, "lr": 1.0966131586127278e-06, "epoch": 0.5223737276742846, "percentage": 52.24, "elapsed_time": "0:39:30", "remaining_time": "0:36:07", "throughput": 10844.82, "total_tokens": 25712768}
|
|
{"current_steps": 8165, "total_steps": 15621, "loss": 0.4086, "lr": 1.0955009572946992e-06, "epoch": 0.5226938096152615, "percentage": 52.27, "elapsed_time": "0:39:31", "remaining_time": "0:36:05", "throughput": 10848.12, "total_tokens": 25727616}
|
|
{"current_steps": 8170, "total_steps": 15621, "loss": 0.4159, "lr": 1.094388636742604e-06, "epoch": 0.5230138915562383, "percentage": 52.3, "elapsed_time": "0:39:32", "remaining_time": "0:36:03", "throughput": 10851.99, "total_tokens": 25744384}
|
|
{"current_steps": 8175, "total_steps": 15621, "loss": 0.3516, "lr": 1.0932761983451878e-06, "epoch": 0.5233339734972153, "percentage": 52.33, "elapsed_time": "0:39:33", "remaining_time": "0:36:01", "throughput": 10855.66, "total_tokens": 25760640}
|
|
{"current_steps": 8180, "total_steps": 15621, "loss": 0.3157, "lr": 1.0921636434913425e-06, "epoch": 0.5236540554381922, "percentage": 52.37, "elapsed_time": "0:39:33", "remaining_time": "0:35:59", "throughput": 10859.28, "total_tokens": 25776640}
|
|
{"current_steps": 8185, "total_steps": 15621, "loss": 0.2979, "lr": 1.091050973570106e-06, "epoch": 0.5239741373791691, "percentage": 52.4, "elapsed_time": "0:39:34", "remaining_time": "0:35:57", "throughput": 10862.66, "total_tokens": 25791744}
|
|
{"current_steps": 8190, "total_steps": 15621, "loss": 0.5589, "lr": 1.08993818997066e-06, "epoch": 0.524294219320146, "percentage": 52.43, "elapsed_time": "0:39:35", "remaining_time": "0:35:54", "throughput": 10866.43, "total_tokens": 25808256}
|
|
{"current_steps": 8195, "total_steps": 15621, "loss": 0.4481, "lr": 1.0888252940823283e-06, "epoch": 0.5246143012611229, "percentage": 52.46, "elapsed_time": "0:39:35", "remaining_time": "0:35:52", "throughput": 10870.0, "total_tokens": 25824128}
|
|
{"current_steps": 8200, "total_steps": 15621, "loss": 0.4767, "lr": 1.0877122872945737e-06, "epoch": 0.5249343832020997, "percentage": 52.49, "elapsed_time": "0:39:36", "remaining_time": "0:35:50", "throughput": 10873.77, "total_tokens": 25840576}
|
|
{"current_steps": 8205, "total_steps": 15621, "loss": 0.3206, "lr": 1.0865991709969983e-06, "epoch": 0.5252544651430766, "percentage": 52.53, "elapsed_time": "0:39:37", "remaining_time": "0:35:48", "throughput": 10877.29, "total_tokens": 25856256}
|
|
{"current_steps": 8210, "total_steps": 15621, "loss": 0.4424, "lr": 1.0854859465793416e-06, "epoch": 0.5255745470840535, "percentage": 52.56, "elapsed_time": "0:39:37", "remaining_time": "0:35:46", "throughput": 10880.68, "total_tokens": 25871424}
|
|
{"current_steps": 8215, "total_steps": 15621, "loss": 0.4916, "lr": 1.0843726154314767e-06, "epoch": 0.5258946290250304, "percentage": 52.59, "elapsed_time": "0:39:38", "remaining_time": "0:35:44", "throughput": 10883.96, "total_tokens": 25886272}
|
|
{"current_steps": 8220, "total_steps": 15621, "loss": 0.4302, "lr": 1.083259178943411e-06, "epoch": 0.5262147109660072, "percentage": 52.62, "elapsed_time": "0:39:39", "remaining_time": "0:35:42", "throughput": 10887.48, "total_tokens": 25901952}
|
|
{"current_steps": 8225, "total_steps": 15621, "loss": 0.3779, "lr": 1.0821456385052822e-06, "epoch": 0.5265347929069842, "percentage": 52.65, "elapsed_time": "0:39:39", "remaining_time": "0:35:39", "throughput": 10891.04, "total_tokens": 25917888}
|
|
{"current_steps": 8230, "total_steps": 15621, "loss": 0.4074, "lr": 1.0810319955073598e-06, "epoch": 0.5268548748479611, "percentage": 52.69, "elapsed_time": "0:39:40", "remaining_time": "0:35:37", "throughput": 10894.61, "total_tokens": 25933824}
|
|
{"current_steps": 8235, "total_steps": 15621, "loss": 0.3842, "lr": 1.0799182513400393e-06, "epoch": 0.527174956788938, "percentage": 52.72, "elapsed_time": "0:39:41", "remaining_time": "0:35:35", "throughput": 10898.64, "total_tokens": 25951360}
|
|
{"current_steps": 8240, "total_steps": 15621, "loss": 0.3524, "lr": 1.0788044073938438e-06, "epoch": 0.5274950387299149, "percentage": 52.75, "elapsed_time": "0:39:41", "remaining_time": "0:35:33", "throughput": 10902.21, "total_tokens": 25967232}
|
|
{"current_steps": 8245, "total_steps": 15621, "loss": 0.4361, "lr": 1.0776904650594205e-06, "epoch": 0.5278151206708918, "percentage": 52.78, "elapsed_time": "0:39:42", "remaining_time": "0:35:31", "throughput": 10905.63, "total_tokens": 25982592}
|
|
{"current_steps": 8250, "total_steps": 15621, "loss": 0.4055, "lr": 1.0765764257275394e-06, "epoch": 0.5281352026118686, "percentage": 52.81, "elapsed_time": "0:39:43", "remaining_time": "0:35:29", "throughput": 10909.01, "total_tokens": 25997824}
|
|
{"current_steps": 8255, "total_steps": 15621, "loss": 0.4559, "lr": 1.0754622907890914e-06, "epoch": 0.5284552845528455, "percentage": 52.85, "elapsed_time": "0:39:43", "remaining_time": "0:35:27", "throughput": 10912.57, "total_tokens": 26013632}
|
|
{"current_steps": 8260, "total_steps": 15621, "loss": 0.3412, "lr": 1.0743480616350873e-06, "epoch": 0.5287753664938224, "percentage": 52.88, "elapsed_time": "0:39:44", "remaining_time": "0:35:24", "throughput": 10915.91, "total_tokens": 26028800}
|
|
{"current_steps": 8265, "total_steps": 15621, "loss": 0.3488, "lr": 1.0732337396566558e-06, "epoch": 0.5290954484347993, "percentage": 52.91, "elapsed_time": "0:39:45", "remaining_time": "0:35:22", "throughput": 10919.44, "total_tokens": 26044672}
|
|
{"current_steps": 8270, "total_steps": 15621, "loss": 0.3944, "lr": 1.07211932624504e-06, "epoch": 0.5294155303757762, "percentage": 52.94, "elapsed_time": "0:39:45", "remaining_time": "0:35:20", "throughput": 10922.81, "total_tokens": 26060544}
|
|
{"current_steps": 8275, "total_steps": 15621, "loss": 0.3714, "lr": 1.0710048227915988e-06, "epoch": 0.529735612316753, "percentage": 52.97, "elapsed_time": "0:39:46", "remaining_time": "0:35:18", "throughput": 10926.28, "total_tokens": 26076160}
|
|
{"current_steps": 8280, "total_steps": 15621, "loss": 0.4306, "lr": 1.0698902306878024e-06, "epoch": 0.53005569425773, "percentage": 53.01, "elapsed_time": "0:39:47", "remaining_time": "0:35:16", "throughput": 10929.93, "total_tokens": 26092352}
|
|
{"current_steps": 8285, "total_steps": 15621, "loss": 0.3033, "lr": 1.0687755513252325e-06, "epoch": 0.5303757761987069, "percentage": 53.04, "elapsed_time": "0:39:47", "remaining_time": "0:35:14", "throughput": 10933.31, "total_tokens": 26107776}
|
|
{"current_steps": 8290, "total_steps": 15621, "loss": 0.3065, "lr": 1.0676607860955794e-06, "epoch": 0.5306958581396838, "percentage": 53.07, "elapsed_time": "0:39:48", "remaining_time": "0:35:12", "throughput": 10936.89, "total_tokens": 26123712}
|
|
{"current_steps": 8295, "total_steps": 15621, "loss": 0.3837, "lr": 1.0665459363906404e-06, "epoch": 0.5310159400806607, "percentage": 53.1, "elapsed_time": "0:39:49", "remaining_time": "0:35:10", "throughput": 10940.34, "total_tokens": 26139200}
|
|
{"current_steps": 8300, "total_steps": 15621, "loss": 0.4238, "lr": 1.0654310036023185e-06, "epoch": 0.5313360220216375, "percentage": 53.13, "elapsed_time": "0:39:49", "remaining_time": "0:35:07", "throughput": 10943.47, "total_tokens": 26153600}
|
|
{"current_steps": 8305, "total_steps": 15621, "loss": 0.4224, "lr": 1.0643159891226203e-06, "epoch": 0.5316561039626144, "percentage": 53.17, "elapsed_time": "0:39:50", "remaining_time": "0:35:05", "throughput": 10947.05, "total_tokens": 26169600}
|
|
{"current_steps": 8310, "total_steps": 15621, "loss": 0.3419, "lr": 1.0632008943436545e-06, "epoch": 0.5319761859035913, "percentage": 53.2, "elapsed_time": "0:39:51", "remaining_time": "0:35:03", "throughput": 10950.66, "total_tokens": 26185536}
|
|
{"current_steps": 8315, "total_steps": 15621, "loss": 0.4642, "lr": 1.0620857206576299e-06, "epoch": 0.5322962678445682, "percentage": 53.23, "elapsed_time": "0:39:51", "remaining_time": "0:35:01", "throughput": 10954.21, "total_tokens": 26201536}
|
|
{"current_steps": 8320, "total_steps": 15621, "loss": 0.2997, "lr": 1.0609704694568546e-06, "epoch": 0.5326163497855451, "percentage": 53.26, "elapsed_time": "0:39:52", "remaining_time": "0:34:59", "throughput": 10957.5, "total_tokens": 26216576}
|
|
{"current_steps": 8325, "total_steps": 15621, "loss": 0.2991, "lr": 1.0598551421337318e-06, "epoch": 0.5329364317265219, "percentage": 53.29, "elapsed_time": "0:39:53", "remaining_time": "0:34:57", "throughput": 10961.07, "total_tokens": 26232640}
|
|
{"current_steps": 8330, "total_steps": 15621, "loss": 0.539, "lr": 1.0587397400807617e-06, "epoch": 0.5332565136674989, "percentage": 53.33, "elapsed_time": "0:39:53", "remaining_time": "0:34:55", "throughput": 10964.57, "total_tokens": 26248448}
|
|
{"current_steps": 8335, "total_steps": 15621, "loss": 0.5144, "lr": 1.057624264690536e-06, "epoch": 0.5335765956084758, "percentage": 53.36, "elapsed_time": "0:39:54", "remaining_time": "0:34:53", "throughput": 10967.97, "total_tokens": 26263872}
|
|
{"current_steps": 8340, "total_steps": 15621, "loss": 0.4616, "lr": 1.0565087173557394e-06, "epoch": 0.5338966775494527, "percentage": 53.39, "elapsed_time": "0:39:55", "remaining_time": "0:34:51", "throughput": 10971.54, "total_tokens": 26279872}
|
|
{"current_steps": 8345, "total_steps": 15621, "loss": 0.3469, "lr": 1.055393099469146e-06, "epoch": 0.5342167594904296, "percentage": 53.42, "elapsed_time": "0:39:55", "remaining_time": "0:34:49", "throughput": 10975.08, "total_tokens": 26295680}
|
|
{"current_steps": 8350, "total_steps": 15621, "loss": 0.4155, "lr": 1.054277412423617e-06, "epoch": 0.5345368414314065, "percentage": 53.45, "elapsed_time": "0:39:56", "remaining_time": "0:34:46", "throughput": 10978.45, "total_tokens": 26311040}
|
|
{"current_steps": 8355, "total_steps": 15621, "loss": 0.47, "lr": 1.0531616576121017e-06, "epoch": 0.5348569233723833, "percentage": 53.49, "elapsed_time": "0:39:57", "remaining_time": "0:34:44", "throughput": 10981.79, "total_tokens": 26326144}
|
|
{"current_steps": 8360, "total_steps": 15621, "loss": 0.336, "lr": 1.0520458364276325e-06, "epoch": 0.5351770053133602, "percentage": 53.52, "elapsed_time": "0:39:57", "remaining_time": "0:34:42", "throughput": 10985.27, "total_tokens": 26341952}
|
|
{"current_steps": 8365, "total_steps": 15621, "loss": 0.3636, "lr": 1.0509299502633256e-06, "epoch": 0.5354970872543371, "percentage": 53.55, "elapsed_time": "0:39:58", "remaining_time": "0:34:40", "throughput": 10988.46, "total_tokens": 26356672}
|
|
{"current_steps": 8370, "total_steps": 15621, "loss": 0.4452, "lr": 1.0498140005123777e-06, "epoch": 0.535817169195314, "percentage": 53.58, "elapsed_time": "0:39:59", "remaining_time": "0:34:38", "throughput": 10992.09, "total_tokens": 26373056}
|
|
{"current_steps": 8375, "total_steps": 15621, "loss": 0.4254, "lr": 1.0486979885680653e-06, "epoch": 0.5361372511362908, "percentage": 53.61, "elapsed_time": "0:39:59", "remaining_time": "0:34:36", "throughput": 10995.34, "total_tokens": 26388032}
|
|
{"current_steps": 8380, "total_steps": 15621, "loss": 0.4324, "lr": 1.0475819158237424e-06, "epoch": 0.5364573330772677, "percentage": 53.65, "elapsed_time": "0:40:00", "remaining_time": "0:34:34", "throughput": 10998.56, "total_tokens": 26402880}
|
|
{"current_steps": 8385, "total_steps": 15621, "loss": 0.481, "lr": 1.0464657836728389e-06, "epoch": 0.5367774150182447, "percentage": 53.68, "elapsed_time": "0:40:01", "remaining_time": "0:34:32", "throughput": 11002.21, "total_tokens": 26419328}
|
|
{"current_steps": 8390, "total_steps": 15621, "loss": 0.4169, "lr": 1.045349593508859e-06, "epoch": 0.5370974969592216, "percentage": 53.71, "elapsed_time": "0:40:01", "remaining_time": "0:34:30", "throughput": 11005.43, "total_tokens": 26434112}
|
|
{"current_steps": 8395, "total_steps": 15621, "loss": 0.2911, "lr": 1.0442333467253788e-06, "epoch": 0.5374175789001985, "percentage": 53.74, "elapsed_time": "0:40:02", "remaining_time": "0:34:28", "throughput": 11009.14, "total_tokens": 26450688}
|
|
{"current_steps": 8400, "total_steps": 15621, "loss": 0.3651, "lr": 1.0431170447160463e-06, "epoch": 0.5377376608411754, "percentage": 53.77, "elapsed_time": "0:40:03", "remaining_time": "0:34:25", "throughput": 11012.57, "total_tokens": 26466368}
|
|
{"current_steps": 8405, "total_steps": 15621, "loss": 0.3663, "lr": 1.0420006888745767e-06, "epoch": 0.5380577427821522, "percentage": 53.81, "elapsed_time": "0:40:03", "remaining_time": "0:34:23", "throughput": 11016.17, "total_tokens": 26482624}
|
|
{"current_steps": 8410, "total_steps": 15621, "loss": 0.3745, "lr": 1.0408842805947543e-06, "epoch": 0.5383778247231291, "percentage": 53.84, "elapsed_time": "0:40:04", "remaining_time": "0:34:21", "throughput": 11019.85, "total_tokens": 26499200}
|
|
{"current_steps": 8415, "total_steps": 15621, "loss": 0.5144, "lr": 1.0397678212704276e-06, "epoch": 0.538697906664106, "percentage": 53.87, "elapsed_time": "0:40:05", "remaining_time": "0:34:19", "throughput": 11023.05, "total_tokens": 26514048}
|
|
{"current_steps": 8420, "total_steps": 15621, "loss": 0.4061, "lr": 1.038651312295509e-06, "epoch": 0.5390179886050829, "percentage": 53.9, "elapsed_time": "0:40:05", "remaining_time": "0:34:17", "throughput": 11026.34, "total_tokens": 26529216}
|
|
{"current_steps": 8425, "total_steps": 15621, "loss": 0.4173, "lr": 1.037534755063973e-06, "epoch": 0.5393380705460598, "percentage": 53.93, "elapsed_time": "0:40:06", "remaining_time": "0:34:15", "throughput": 11029.85, "total_tokens": 26545152}
|
|
{"current_steps": 8430, "total_steps": 15621, "loss": 0.4124, "lr": 1.0364181509698548e-06, "epoch": 0.5396581524870366, "percentage": 53.97, "elapsed_time": "0:40:07", "remaining_time": "0:34:13", "throughput": 11033.2, "total_tokens": 26560512}
|
|
{"current_steps": 8435, "total_steps": 15621, "loss": 0.3606, "lr": 1.0353015014072476e-06, "epoch": 0.5399782344280136, "percentage": 54.0, "elapsed_time": "0:40:07", "remaining_time": "0:34:11", "throughput": 11036.42, "total_tokens": 26575488}
|
|
{"current_steps": 8440, "total_steps": 15621, "loss": 0.4008, "lr": 1.0341848077703013e-06, "epoch": 0.5402983163689905, "percentage": 54.03, "elapsed_time": "0:40:08", "remaining_time": "0:34:09", "throughput": 11039.84, "total_tokens": 26591040}
|
|
{"current_steps": 8445, "total_steps": 15621, "loss": 0.3228, "lr": 1.033068071453221e-06, "epoch": 0.5406183983099674, "percentage": 54.06, "elapsed_time": "0:40:09", "remaining_time": "0:34:07", "throughput": 11043.37, "total_tokens": 26606976}
|
|
{"current_steps": 8450, "total_steps": 15621, "loss": 0.372, "lr": 1.0319512938502653e-06, "epoch": 0.5409384802509443, "percentage": 54.09, "elapsed_time": "0:40:10", "remaining_time": "0:34:05", "throughput": 11046.99, "total_tokens": 26623296}
|
|
{"current_steps": 8455, "total_steps": 15621, "loss": 0.3241, "lr": 1.0308344763557444e-06, "epoch": 0.5412585621919211, "percentage": 54.13, "elapsed_time": "0:40:10", "remaining_time": "0:34:03", "throughput": 11050.25, "total_tokens": 26638336}
|
|
{"current_steps": 8460, "total_steps": 15621, "loss": 0.2886, "lr": 1.0297176203640175e-06, "epoch": 0.541578644132898, "percentage": 54.16, "elapsed_time": "0:40:11", "remaining_time": "0:34:01", "throughput": 11053.82, "total_tokens": 26654400}
|
|
{"current_steps": 8465, "total_steps": 15621, "loss": 0.3553, "lr": 1.0286007272694924e-06, "epoch": 0.5418987260738749, "percentage": 54.19, "elapsed_time": "0:40:11", "remaining_time": "0:33:59", "throughput": 11057.1, "total_tokens": 26669568}
|
|
{"current_steps": 8470, "total_steps": 15621, "loss": 0.4816, "lr": 1.0274837984666239e-06, "epoch": 0.5422188080148518, "percentage": 54.22, "elapsed_time": "0:40:12", "remaining_time": "0:33:56", "throughput": 11060.72, "total_tokens": 26686016}
|
|
{"current_steps": 8475, "total_steps": 15621, "loss": 0.4212, "lr": 1.02636683534991e-06, "epoch": 0.5425388899558287, "percentage": 54.25, "elapsed_time": "0:40:13", "remaining_time": "0:33:54", "throughput": 11064.09, "total_tokens": 26701504}
|
|
{"current_steps": 8480, "total_steps": 15621, "loss": 0.5995, "lr": 1.0252498393138928e-06, "epoch": 0.5428589718968055, "percentage": 54.29, "elapsed_time": "0:40:14", "remaining_time": "0:33:52", "throughput": 11067.49, "total_tokens": 26717120}
|
|
{"current_steps": 8485, "total_steps": 15621, "loss": 0.415, "lr": 1.0241328117531546e-06, "epoch": 0.5431790538377824, "percentage": 54.32, "elapsed_time": "0:40:14", "remaining_time": "0:33:50", "throughput": 11070.9, "total_tokens": 26732736}
|
|
{"current_steps": 8490, "total_steps": 15621, "loss": 0.4128, "lr": 1.0230157540623174e-06, "epoch": 0.5434991357787594, "percentage": 54.35, "elapsed_time": "0:40:15", "remaining_time": "0:33:48", "throughput": 11074.0, "total_tokens": 26747392}
|
|
{"current_steps": 8495, "total_steps": 15621, "loss": 0.4605, "lr": 1.0218986676360415e-06, "epoch": 0.5438192177197363, "percentage": 54.38, "elapsed_time": "0:40:15", "remaining_time": "0:33:46", "throughput": 11077.14, "total_tokens": 26762112}
|
|
{"current_steps": 8500, "total_steps": 15621, "loss": 0.3673, "lr": 1.0207815538690216e-06, "epoch": 0.5441392996607132, "percentage": 54.41, "elapsed_time": "0:40:16", "remaining_time": "0:33:44", "throughput": 11080.55, "total_tokens": 26777856}
|
|
{"current_steps": 8505, "total_steps": 15621, "loss": 0.3133, "lr": 1.0196644141559877e-06, "epoch": 0.54445938160169, "percentage": 54.45, "elapsed_time": "0:40:17", "remaining_time": "0:33:42", "throughput": 11084.1, "total_tokens": 26794048}
|
|
{"current_steps": 8510, "total_steps": 15621, "loss": 0.3397, "lr": 1.0185472498917021e-06, "epoch": 0.5447794635426669, "percentage": 54.48, "elapsed_time": "0:40:18", "remaining_time": "0:33:40", "throughput": 11087.54, "total_tokens": 26809792}
|
|
{"current_steps": 8515, "total_steps": 15621, "loss": 0.4261, "lr": 1.017430062470957e-06, "epoch": 0.5450995454836438, "percentage": 54.51, "elapsed_time": "0:40:18", "remaining_time": "0:33:38", "throughput": 11090.83, "total_tokens": 26825024}
|
|
{"current_steps": 8520, "total_steps": 15621, "loss": 0.3494, "lr": 1.016312853288574e-06, "epoch": 0.5454196274246207, "percentage": 54.54, "elapsed_time": "0:40:19", "remaining_time": "0:33:36", "throughput": 11094.5, "total_tokens": 26841536}
|
|
{"current_steps": 8525, "total_steps": 15621, "loss": 0.3875, "lr": 1.0151956237394027e-06, "epoch": 0.5457397093655976, "percentage": 54.57, "elapsed_time": "0:40:20", "remaining_time": "0:33:34", "throughput": 11098.01, "total_tokens": 26857600}
|
|
{"current_steps": 8530, "total_steps": 15621, "loss": 0.3999, "lr": 1.0140783752183164e-06, "epoch": 0.5460597913065744, "percentage": 54.61, "elapsed_time": "0:40:20", "remaining_time": "0:33:32", "throughput": 11101.7, "total_tokens": 26874176}
|
|
{"current_steps": 8535, "total_steps": 15621, "loss": 0.4338, "lr": 1.0129611091202138e-06, "epoch": 0.5463798732475513, "percentage": 54.64, "elapsed_time": "0:40:21", "remaining_time": "0:33:30", "throughput": 11105.21, "total_tokens": 26890176}
|
|
{"current_steps": 8540, "total_steps": 15621, "loss": 0.2926, "lr": 1.0118438268400135e-06, "epoch": 0.5466999551885282, "percentage": 54.67, "elapsed_time": "0:40:22", "remaining_time": "0:33:28", "throughput": 11108.54, "total_tokens": 26905728}
|
|
{"current_steps": 8545, "total_steps": 15621, "loss": 0.4599, "lr": 1.0107265297726568e-06, "epoch": 0.5470200371295052, "percentage": 54.7, "elapsed_time": "0:40:22", "remaining_time": "0:33:26", "throughput": 11111.87, "total_tokens": 26921280}
|
|
{"current_steps": 8550, "total_steps": 15621, "loss": 0.4048, "lr": 1.009609219313102e-06, "epoch": 0.5473401190704821, "percentage": 54.73, "elapsed_time": "0:40:23", "remaining_time": "0:33:24", "throughput": 11115.17, "total_tokens": 26936704}
|
|
{"current_steps": 8555, "total_steps": 15621, "loss": 0.3919, "lr": 1.0084918968563236e-06, "epoch": 0.547660201011459, "percentage": 54.77, "elapsed_time": "0:40:24", "remaining_time": "0:33:22", "throughput": 11118.6, "total_tokens": 26952448}
|
|
{"current_steps": 8560, "total_steps": 15621, "loss": 0.3917, "lr": 1.0073745637973124e-06, "epoch": 0.5479802829524358, "percentage": 54.8, "elapsed_time": "0:40:24", "remaining_time": "0:33:20", "throughput": 11121.85, "total_tokens": 26967680}
|
|
{"current_steps": 8565, "total_steps": 15621, "loss": 0.3606, "lr": 1.0062572215310718e-06, "epoch": 0.5483003648934127, "percentage": 54.83, "elapsed_time": "0:40:25", "remaining_time": "0:33:18", "throughput": 11124.96, "total_tokens": 26982400}
|
|
{"current_steps": 8570, "total_steps": 15621, "loss": 0.3227, "lr": 1.0051398714526165e-06, "epoch": 0.5486204468343896, "percentage": 54.86, "elapsed_time": "0:40:26", "remaining_time": "0:33:16", "throughput": 11128.45, "total_tokens": 26998400}
|
|
{"current_steps": 8575, "total_steps": 15621, "loss": 0.3731, "lr": 1.0040225149569712e-06, "epoch": 0.5489405287753665, "percentage": 54.89, "elapsed_time": "0:40:26", "remaining_time": "0:33:14", "throughput": 11132.33, "total_tokens": 27015936}
|
|
{"current_steps": 8580, "total_steps": 15621, "loss": 0.3339, "lr": 1.0029051534391693e-06, "epoch": 0.5492606107163434, "percentage": 54.93, "elapsed_time": "0:40:27", "remaining_time": "0:33:12", "throughput": 11135.43, "total_tokens": 27030528}
|
|
{"current_steps": 8585, "total_steps": 15621, "loss": 0.3793, "lr": 1.001787788294249e-06, "epoch": 0.5495806926573202, "percentage": 54.96, "elapsed_time": "0:40:28", "remaining_time": "0:33:09", "throughput": 11138.77, "total_tokens": 27046080}
|
|
{"current_steps": 8590, "total_steps": 15621, "loss": 0.4226, "lr": 1.0006704209172537e-06, "epoch": 0.5499007745982971, "percentage": 54.99, "elapsed_time": "0:40:28", "remaining_time": "0:33:07", "throughput": 11142.07, "total_tokens": 27061504}
|
|
{"current_steps": 8595, "total_steps": 15621, "loss": 0.4382, "lr": 9.995530527032301e-07, "epoch": 0.5502208565392741, "percentage": 55.02, "elapsed_time": "0:40:29", "remaining_time": "0:33:05", "throughput": 11145.41, "total_tokens": 27077056}
|
|
{"current_steps": 8600, "total_steps": 15621, "loss": 0.3435, "lr": 9.984356850472257e-07, "epoch": 0.550540938480251, "percentage": 55.05, "elapsed_time": "0:40:30", "remaining_time": "0:33:03", "throughput": 11149.46, "total_tokens": 27095168}
|
|
{"current_steps": 8602, "total_steps": 15621, "eval_loss": 0.40140706300735474, "epoch": 0.5506689712566417, "percentage": 55.07, "elapsed_time": "0:41:21", "remaining_time": "0:33:44", "throughput": 10923.08, "total_tokens": 27101056}
|
|
{"current_steps": 8605, "total_steps": 15621, "loss": 0.3753, "lr": 9.97318319344287e-07, "epoch": 0.5508610204212279, "percentage": 55.09, "elapsed_time": "0:44:17", "remaining_time": "0:36:06", "throughput": 10202.08, "total_tokens": 27110144}
|
|
{"current_steps": 8610, "total_steps": 15621, "loss": 0.5273, "lr": 9.962009569894577e-07, "epoch": 0.5511811023622047, "percentage": 55.12, "elapsed_time": "0:44:17", "remaining_time": "0:36:04", "throughput": 10205.15, "total_tokens": 27124864}
|
|
{"current_steps": 8615, "total_steps": 15621, "loss": 0.3813, "lr": 9.95083599377778e-07, "epoch": 0.5515011843031816, "percentage": 55.15, "elapsed_time": "0:44:18", "remaining_time": "0:36:02", "throughput": 10208.35, "total_tokens": 27140160}
|
|
{"current_steps": 8620, "total_steps": 15621, "loss": 0.3966, "lr": 9.939662479042828e-07, "epoch": 0.5518212662441585, "percentage": 55.18, "elapsed_time": "0:44:19", "remaining_time": "0:35:59", "throughput": 10211.61, "total_tokens": 27155712}
|
|
{"current_steps": 8625, "total_steps": 15621, "loss": 0.4682, "lr": 9.92848903963998e-07, "epoch": 0.5521413481851354, "percentage": 55.21, "elapsed_time": "0:44:19", "remaining_time": "0:35:57", "throughput": 10214.99, "total_tokens": 27171520}
|
|
{"current_steps": 8630, "total_steps": 15621, "loss": 0.441, "lr": 9.9173156895194e-07, "epoch": 0.5524614301261123, "percentage": 55.25, "elapsed_time": "0:44:20", "remaining_time": "0:35:55", "throughput": 10218.2, "total_tokens": 27186752}
|
|
{"current_steps": 8635, "total_steps": 15621, "loss": 0.3889, "lr": 9.906142442631154e-07, "epoch": 0.5527815120670891, "percentage": 55.28, "elapsed_time": "0:44:21", "remaining_time": "0:35:53", "throughput": 10221.32, "total_tokens": 27201664}
|
|
{"current_steps": 8640, "total_steps": 15621, "loss": 0.3914, "lr": 9.894969312925171e-07, "epoch": 0.553101594008066, "percentage": 55.31, "elapsed_time": "0:44:21", "remaining_time": "0:35:50", "throughput": 10225.03, "total_tokens": 27218880}
|
|
{"current_steps": 8645, "total_steps": 15621, "loss": 0.3477, "lr": 9.883796314351234e-07, "epoch": 0.5534216759490429, "percentage": 55.34, "elapsed_time": "0:44:22", "remaining_time": "0:35:48", "throughput": 10228.61, "total_tokens": 27235648}
|
|
{"current_steps": 8650, "total_steps": 15621, "loss": 0.3945, "lr": 9.872623460858966e-07, "epoch": 0.5537417578900199, "percentage": 55.37, "elapsed_time": "0:44:23", "remaining_time": "0:35:46", "throughput": 10231.79, "total_tokens": 27250880}
|
|
{"current_steps": 8655, "total_steps": 15621, "loss": 0.3152, "lr": 9.861450766397799e-07, "epoch": 0.5540618398309968, "percentage": 55.41, "elapsed_time": "0:44:24", "remaining_time": "0:35:44", "throughput": 10235.18, "total_tokens": 27266880}
|
|
{"current_steps": 8660, "total_steps": 15621, "loss": 0.4157, "lr": 9.850278244916976e-07, "epoch": 0.5543819217719737, "percentage": 55.44, "elapsed_time": "0:44:24", "remaining_time": "0:35:41", "throughput": 10238.54, "total_tokens": 27282816}
|
|
{"current_steps": 8665, "total_steps": 15621, "loss": 0.4323, "lr": 9.839105910365524e-07, "epoch": 0.5547020037129505, "percentage": 55.47, "elapsed_time": "0:44:25", "remaining_time": "0:35:39", "throughput": 10241.84, "total_tokens": 27298496}
|
|
{"current_steps": 8670, "total_steps": 15621, "loss": 0.3436, "lr": 9.827933776692235e-07, "epoch": 0.5550220856539274, "percentage": 55.5, "elapsed_time": "0:44:26", "remaining_time": "0:35:37", "throughput": 10245.06, "total_tokens": 27313856}
|
|
{"current_steps": 8675, "total_steps": 15621, "loss": 0.3362, "lr": 9.81676185784564e-07, "epoch": 0.5553421675949043, "percentage": 55.53, "elapsed_time": "0:44:26", "remaining_time": "0:35:35", "throughput": 10248.09, "total_tokens": 27328448}
|
|
{"current_steps": 8680, "total_steps": 15621, "loss": 0.4001, "lr": 9.805590167774021e-07, "epoch": 0.5556622495358812, "percentage": 55.57, "elapsed_time": "0:44:27", "remaining_time": "0:35:32", "throughput": 10251.33, "total_tokens": 27343872}
|
|
{"current_steps": 8685, "total_steps": 15621, "loss": 0.5593, "lr": 9.79441872042536e-07, "epoch": 0.555982331476858, "percentage": 55.6, "elapsed_time": "0:44:27", "remaining_time": "0:35:30", "throughput": 10254.41, "total_tokens": 27358720}
|
|
{"current_steps": 8690, "total_steps": 15621, "loss": 0.3818, "lr": 9.783247529747338e-07, "epoch": 0.5563024134178349, "percentage": 55.63, "elapsed_time": "0:44:28", "remaining_time": "0:35:28", "throughput": 10257.42, "total_tokens": 27373312}
|
|
{"current_steps": 8695, "total_steps": 15621, "loss": 0.358, "lr": 9.772076609687323e-07, "epoch": 0.5566224953588118, "percentage": 55.66, "elapsed_time": "0:44:29", "remaining_time": "0:35:26", "throughput": 10260.59, "total_tokens": 27388544}
|
|
{"current_steps": 8700, "total_steps": 15621, "loss": 0.3191, "lr": 9.760905974192334e-07, "epoch": 0.5569425772997888, "percentage": 55.69, "elapsed_time": "0:44:29", "remaining_time": "0:35:24", "throughput": 10264.11, "total_tokens": 27405120}
|
|
{"current_steps": 8705, "total_steps": 15621, "loss": 0.4284, "lr": 9.749735637209044e-07, "epoch": 0.5572626592407657, "percentage": 55.73, "elapsed_time": "0:44:30", "remaining_time": "0:35:21", "throughput": 10267.32, "total_tokens": 27420544}
|
|
{"current_steps": 8710, "total_steps": 15621, "loss": 0.3233, "lr": 9.738565612683754e-07, "epoch": 0.5575827411817426, "percentage": 55.76, "elapsed_time": "0:44:31", "remaining_time": "0:35:19", "throughput": 10270.43, "total_tokens": 27435456}
|
|
{"current_steps": 8715, "total_steps": 15621, "loss": 0.3406, "lr": 9.727395914562363e-07, "epoch": 0.5579028231227194, "percentage": 55.79, "elapsed_time": "0:44:32", "remaining_time": "0:35:17", "throughput": 10273.94, "total_tokens": 27452032}
|
|
{"current_steps": 8720, "total_steps": 15621, "loss": 0.4084, "lr": 9.716226556790372e-07, "epoch": 0.5582229050636963, "percentage": 55.82, "elapsed_time": "0:44:32", "remaining_time": "0:35:15", "throughput": 10277.2, "total_tokens": 27467520}
|
|
{"current_steps": 8725, "total_steps": 15621, "loss": 0.3149, "lr": 9.705057553312855e-07, "epoch": 0.5585429870046732, "percentage": 55.85, "elapsed_time": "0:44:33", "remaining_time": "0:35:12", "throughput": 10280.38, "total_tokens": 27482816}
|
|
{"current_steps": 8730, "total_steps": 15621, "loss": 0.3853, "lr": 9.693888918074452e-07, "epoch": 0.5588630689456501, "percentage": 55.89, "elapsed_time": "0:44:33", "remaining_time": "0:35:10", "throughput": 10283.43, "total_tokens": 27497600}
|
|
{"current_steps": 8735, "total_steps": 15621, "loss": 0.4952, "lr": 9.682720665019325e-07, "epoch": 0.559183150886627, "percentage": 55.92, "elapsed_time": "0:44:34", "remaining_time": "0:35:08", "throughput": 10286.69, "total_tokens": 27513344}
|
|
{"current_steps": 8740, "total_steps": 15621, "loss": 0.4144, "lr": 9.671552808091172e-07, "epoch": 0.5595032328276038, "percentage": 55.95, "elapsed_time": "0:44:35", "remaining_time": "0:35:06", "throughput": 10290.29, "total_tokens": 27530304}
|
|
{"current_steps": 8745, "total_steps": 15621, "loss": 0.3504, "lr": 9.660385361233195e-07, "epoch": 0.5598233147685807, "percentage": 55.98, "elapsed_time": "0:44:36", "remaining_time": "0:35:04", "throughput": 10293.46, "total_tokens": 27545664}
|
|
{"current_steps": 8750, "total_steps": 15621, "loss": 0.3053, "lr": 9.649218338388084e-07, "epoch": 0.5601433967095576, "percentage": 56.01, "elapsed_time": "0:44:36", "remaining_time": "0:35:01", "throughput": 10296.57, "total_tokens": 27560704}
|
|
{"current_steps": 8755, "total_steps": 15621, "loss": 0.4472, "lr": 9.638051753497994e-07, "epoch": 0.5604634786505346, "percentage": 56.05, "elapsed_time": "0:44:37", "remaining_time": "0:34:59", "throughput": 10300.09, "total_tokens": 27577472}
|
|
{"current_steps": 8760, "total_steps": 15621, "loss": 0.3676, "lr": 9.62688562050454e-07, "epoch": 0.5607835605915115, "percentage": 56.08, "elapsed_time": "0:44:38", "remaining_time": "0:34:57", "throughput": 10303.32, "total_tokens": 27592960}
|
|
{"current_steps": 8765, "total_steps": 15621, "loss": 0.4074, "lr": 9.615719953348772e-07, "epoch": 0.5611036425324883, "percentage": 56.11, "elapsed_time": "0:44:38", "remaining_time": "0:34:55", "throughput": 10307.02, "total_tokens": 27610304}
|
|
{"current_steps": 8770, "total_steps": 15621, "loss": 0.568, "lr": 9.604554765971148e-07, "epoch": 0.5614237244734652, "percentage": 56.14, "elapsed_time": "0:44:39", "remaining_time": "0:34:53", "throughput": 10310.89, "total_tokens": 27628288}
|
|
{"current_steps": 8775, "total_steps": 15621, "loss": 0.4119, "lr": 9.593390072311549e-07, "epoch": 0.5617438064144421, "percentage": 56.17, "elapsed_time": "0:44:40", "remaining_time": "0:34:51", "throughput": 10314.14, "total_tokens": 27643904}
|
|
{"current_steps": 8780, "total_steps": 15621, "loss": 0.3703, "lr": 9.582225886309216e-07, "epoch": 0.562063888355419, "percentage": 56.21, "elapsed_time": "0:44:40", "remaining_time": "0:34:48", "throughput": 10317.55, "total_tokens": 27660224}
|
|
{"current_steps": 8785, "total_steps": 15621, "loss": 0.3098, "lr": 9.571062221902767e-07, "epoch": 0.5623839702963959, "percentage": 56.24, "elapsed_time": "0:44:41", "remaining_time": "0:34:46", "throughput": 10320.64, "total_tokens": 27675136}
|
|
{"current_steps": 8790, "total_steps": 15621, "loss": 0.3557, "lr": 9.559899093030175e-07, "epoch": 0.5627040522373727, "percentage": 56.27, "elapsed_time": "0:44:42", "remaining_time": "0:34:44", "throughput": 10323.73, "total_tokens": 27690176}
|
|
{"current_steps": 8795, "total_steps": 15621, "loss": 0.3065, "lr": 9.54873651362873e-07, "epoch": 0.5630241341783496, "percentage": 56.3, "elapsed_time": "0:44:42", "remaining_time": "0:34:42", "throughput": 10326.63, "total_tokens": 27704512}
|
|
{"current_steps": 8800, "total_steps": 15621, "loss": 0.4565, "lr": 9.537574497635043e-07, "epoch": 0.5633442161193265, "percentage": 56.33, "elapsed_time": "0:44:43", "remaining_time": "0:34:40", "throughput": 10329.95, "total_tokens": 27720448}
|
|
{"current_steps": 8805, "total_steps": 15621, "loss": 0.5121, "lr": 9.52641305898503e-07, "epoch": 0.5636642980603035, "percentage": 56.37, "elapsed_time": "0:44:44", "remaining_time": "0:34:37", "throughput": 10333.13, "total_tokens": 27735808}
|
|
{"current_steps": 8810, "total_steps": 15621, "loss": 0.3203, "lr": 9.515252211613873e-07, "epoch": 0.5639843800012804, "percentage": 56.4, "elapsed_time": "0:44:44", "remaining_time": "0:34:35", "throughput": 10336.11, "total_tokens": 27750464}
|
|
{"current_steps": 8815, "total_steps": 15621, "loss": 0.4539, "lr": 9.504091969456021e-07, "epoch": 0.5643044619422573, "percentage": 56.43, "elapsed_time": "0:44:45", "remaining_time": "0:34:33", "throughput": 10338.89, "total_tokens": 27764352}
|
|
{"current_steps": 8820, "total_steps": 15621, "loss": 0.3435, "lr": 9.492932346445165e-07, "epoch": 0.5646245438832341, "percentage": 56.46, "elapsed_time": "0:44:46", "remaining_time": "0:34:31", "throughput": 10342.05, "total_tokens": 27779840}
|
|
{"current_steps": 8825, "total_steps": 15621, "loss": 0.2767, "lr": 9.48177335651423e-07, "epoch": 0.564944625824211, "percentage": 56.49, "elapsed_time": "0:44:46", "remaining_time": "0:34:29", "throughput": 10345.5, "total_tokens": 27796352}
|
|
{"current_steps": 8830, "total_steps": 15621, "loss": 0.343, "lr": 9.470615013595346e-07, "epoch": 0.5652647077651879, "percentage": 56.53, "elapsed_time": "0:44:47", "remaining_time": "0:34:26", "throughput": 10348.37, "total_tokens": 27810624}
|
|
{"current_steps": 8835, "total_steps": 15621, "loss": 0.4395, "lr": 9.459457331619829e-07, "epoch": 0.5655847897061648, "percentage": 56.56, "elapsed_time": "0:44:48", "remaining_time": "0:34:24", "throughput": 10351.31, "total_tokens": 27825152}
|
|
{"current_steps": 8840, "total_steps": 15621, "loss": 0.4142, "lr": 9.448300324518182e-07, "epoch": 0.5659048716471416, "percentage": 56.59, "elapsed_time": "0:44:48", "remaining_time": "0:34:22", "throughput": 10354.44, "total_tokens": 27840384}
|
|
{"current_steps": 8845, "total_steps": 15621, "loss": 0.3014, "lr": 9.437144006220058e-07, "epoch": 0.5662249535881185, "percentage": 56.62, "elapsed_time": "0:44:49", "remaining_time": "0:34:20", "throughput": 10357.8, "total_tokens": 27856640}
|
|
{"current_steps": 8850, "total_steps": 15621, "loss": 0.2097, "lr": 9.425988390654249e-07, "epoch": 0.5665450355290954, "percentage": 56.65, "elapsed_time": "0:44:50", "remaining_time": "0:34:18", "throughput": 10361.14, "total_tokens": 27872768}
|
|
{"current_steps": 8855, "total_steps": 15621, "loss": 0.5239, "lr": 9.414833491748677e-07, "epoch": 0.5668651174700723, "percentage": 56.69, "elapsed_time": "0:44:50", "remaining_time": "0:34:15", "throughput": 10364.12, "total_tokens": 27887488}
|
|
{"current_steps": 8860, "total_steps": 15621, "loss": 0.2943, "lr": 9.40367932343036e-07, "epoch": 0.5671851994110493, "percentage": 56.72, "elapsed_time": "0:44:51", "remaining_time": "0:34:13", "throughput": 10367.21, "total_tokens": 27902720}
|
|
{"current_steps": 8865, "total_steps": 15621, "loss": 0.3817, "lr": 9.392525899625407e-07, "epoch": 0.5675052813520262, "percentage": 56.75, "elapsed_time": "0:44:52", "remaining_time": "0:34:11", "throughput": 10370.35, "total_tokens": 27918080}
|
|
{"current_steps": 8870, "total_steps": 15621, "loss": 0.3887, "lr": 9.381373234259004e-07, "epoch": 0.567825363293003, "percentage": 56.78, "elapsed_time": "0:44:52", "remaining_time": "0:34:09", "throughput": 10373.61, "total_tokens": 27933760}
|
|
{"current_steps": 8875, "total_steps": 15621, "loss": 0.3858, "lr": 9.370221341255382e-07, "epoch": 0.5681454452339799, "percentage": 56.81, "elapsed_time": "0:44:53", "remaining_time": "0:34:07", "throughput": 10376.7, "total_tokens": 27948992}
|
|
{"current_steps": 8880, "total_steps": 15621, "loss": 0.3428, "lr": 9.359070234537807e-07, "epoch": 0.5684655271749568, "percentage": 56.85, "elapsed_time": "0:44:54", "remaining_time": "0:34:05", "throughput": 10380.44, "total_tokens": 27966848}
|
|
{"current_steps": 8885, "total_steps": 15621, "loss": 0.3816, "lr": 9.34791992802857e-07, "epoch": 0.5687856091159337, "percentage": 56.88, "elapsed_time": "0:44:54", "remaining_time": "0:34:03", "throughput": 10383.42, "total_tokens": 27981696}
|
|
{"current_steps": 8890, "total_steps": 15621, "loss": 0.2578, "lr": 9.336770435648963e-07, "epoch": 0.5691056910569106, "percentage": 56.91, "elapsed_time": "0:44:55", "remaining_time": "0:34:00", "throughput": 10386.64, "total_tokens": 27997376}
|
|
{"current_steps": 8895, "total_steps": 15621, "loss": 0.4013, "lr": 9.325621771319246e-07, "epoch": 0.5694257729978874, "percentage": 56.94, "elapsed_time": "0:44:56", "remaining_time": "0:33:58", "throughput": 10390.1, "total_tokens": 28014016}
|
|
{"current_steps": 8900, "total_steps": 15621, "loss": 0.4245, "lr": 9.314473948958673e-07, "epoch": 0.5697458549388643, "percentage": 56.97, "elapsed_time": "0:44:56", "remaining_time": "0:33:56", "throughput": 10393.49, "total_tokens": 28030400}
|
|
{"current_steps": 8905, "total_steps": 15621, "loss": 0.3464, "lr": 9.303326982485422e-07, "epoch": 0.5700659368798412, "percentage": 57.01, "elapsed_time": "0:44:57", "remaining_time": "0:33:54", "throughput": 10396.99, "total_tokens": 28047104}
|
|
{"current_steps": 8910, "total_steps": 15621, "loss": 0.3751, "lr": 9.29218088581661e-07, "epoch": 0.5703860188208181, "percentage": 57.04, "elapsed_time": "0:44:58", "remaining_time": "0:33:52", "throughput": 10400.31, "total_tokens": 28063168}
|
|
{"current_steps": 8915, "total_steps": 15621, "loss": 0.3567, "lr": 9.281035672868278e-07, "epoch": 0.5707061007617951, "percentage": 57.07, "elapsed_time": "0:44:58", "remaining_time": "0:33:50", "throughput": 10403.57, "total_tokens": 28079104}
|
|
{"current_steps": 8920, "total_steps": 15621, "loss": 0.4098, "lr": 9.269891357555348e-07, "epoch": 0.571026182702772, "percentage": 57.1, "elapsed_time": "0:44:59", "remaining_time": "0:33:48", "throughput": 10406.77, "total_tokens": 28094720}
|
|
{"current_steps": 8925, "total_steps": 15621, "loss": 0.2775, "lr": 9.25874795379163e-07, "epoch": 0.5713462646437488, "percentage": 57.13, "elapsed_time": "0:45:00", "remaining_time": "0:33:45", "throughput": 10410.07, "total_tokens": 28110848}
|
|
{"current_steps": 8930, "total_steps": 15621, "loss": 0.4246, "lr": 9.247605475489793e-07, "epoch": 0.5716663465847257, "percentage": 57.17, "elapsed_time": "0:45:01", "remaining_time": "0:33:43", "throughput": 10413.41, "total_tokens": 28127040}
|
|
{"current_steps": 8935, "total_steps": 15621, "loss": 0.3106, "lr": 9.236463936561358e-07, "epoch": 0.5719864285257026, "percentage": 57.2, "elapsed_time": "0:45:01", "remaining_time": "0:33:41", "throughput": 10416.8, "total_tokens": 28143424}
|
|
{"current_steps": 8940, "total_steps": 15621, "loss": 0.5312, "lr": 9.225323350916661e-07, "epoch": 0.5723065104666795, "percentage": 57.23, "elapsed_time": "0:45:02", "remaining_time": "0:33:39", "throughput": 10419.86, "total_tokens": 28158528}
|
|
{"current_steps": 8945, "total_steps": 15621, "loss": 0.3963, "lr": 9.214183732464855e-07, "epoch": 0.5726265924076563, "percentage": 57.26, "elapsed_time": "0:45:03", "remaining_time": "0:33:37", "throughput": 10422.97, "total_tokens": 28173888}
|
|
{"current_steps": 8950, "total_steps": 15621, "loss": 0.3663, "lr": 9.203045095113886e-07, "epoch": 0.5729466743486332, "percentage": 57.29, "elapsed_time": "0:45:03", "remaining_time": "0:33:35", "throughput": 10426.73, "total_tokens": 28191872}
|
|
{"current_steps": 8955, "total_steps": 15621, "loss": 0.4394, "lr": 9.191907452770476e-07, "epoch": 0.5732667562896101, "percentage": 57.33, "elapsed_time": "0:45:04", "remaining_time": "0:33:33", "throughput": 10429.77, "total_tokens": 28206912}
|
|
{"current_steps": 8960, "total_steps": 15621, "loss": 0.4103, "lr": 9.180770819340095e-07, "epoch": 0.573586838230587, "percentage": 57.36, "elapsed_time": "0:45:05", "remaining_time": "0:33:31", "throughput": 10432.91, "total_tokens": 28222336}
|
|
{"current_steps": 8965, "total_steps": 15621, "loss": 0.3816, "lr": 9.169635208726967e-07, "epoch": 0.573906920171564, "percentage": 57.39, "elapsed_time": "0:45:05", "remaining_time": "0:33:28", "throughput": 10436.12, "total_tokens": 28238144}
|
|
{"current_steps": 8970, "total_steps": 15621, "loss": 0.3919, "lr": 9.15850063483403e-07, "epoch": 0.5742270021125409, "percentage": 57.42, "elapsed_time": "0:45:06", "remaining_time": "0:33:26", "throughput": 10439.21, "total_tokens": 28253376}
|
|
{"current_steps": 8975, "total_steps": 15621, "loss": 0.3549, "lr": 9.147367111562928e-07, "epoch": 0.5745470840535177, "percentage": 57.45, "elapsed_time": "0:45:07", "remaining_time": "0:33:24", "throughput": 10442.49, "total_tokens": 28269248}
|
|
{"current_steps": 8980, "total_steps": 15621, "loss": 0.4151, "lr": 9.136234652814005e-07, "epoch": 0.5748671659944946, "percentage": 57.49, "elapsed_time": "0:45:07", "remaining_time": "0:33:22", "throughput": 10445.83, "total_tokens": 28285440}
|
|
{"current_steps": 8985, "total_steps": 15621, "loss": 0.3061, "lr": 9.125103272486255e-07, "epoch": 0.5751872479354715, "percentage": 57.52, "elapsed_time": "0:45:08", "remaining_time": "0:33:20", "throughput": 10448.92, "total_tokens": 28300736}
|
|
{"current_steps": 8990, "total_steps": 15621, "loss": 0.3626, "lr": 9.11397298447734e-07, "epoch": 0.5755073298764484, "percentage": 57.55, "elapsed_time": "0:45:09", "remaining_time": "0:33:18", "throughput": 10451.94, "total_tokens": 28315712}
|
|
{"current_steps": 8995, "total_steps": 15621, "loss": 0.342, "lr": 9.10284380268356e-07, "epoch": 0.5758274118174252, "percentage": 57.58, "elapsed_time": "0:45:09", "remaining_time": "0:33:16", "throughput": 10455.28, "total_tokens": 28332032}
|
|
{"current_steps": 9000, "total_steps": 15621, "loss": 0.4546, "lr": 9.091715740999828e-07, "epoch": 0.5761474937584021, "percentage": 57.61, "elapsed_time": "0:45:10", "remaining_time": "0:33:14", "throughput": 10458.53, "total_tokens": 28347968}
|
|
{"current_steps": 9005, "total_steps": 15621, "loss": 0.39, "lr": 9.080588813319654e-07, "epoch": 0.576467575699379, "percentage": 57.65, "elapsed_time": "0:45:11", "remaining_time": "0:33:11", "throughput": 10461.55, "total_tokens": 28362944}
|
|
{"current_steps": 9010, "total_steps": 15621, "loss": 0.2894, "lr": 9.069463033535143e-07, "epoch": 0.5767876576403559, "percentage": 57.68, "elapsed_time": "0:45:11", "remaining_time": "0:33:09", "throughput": 10464.75, "total_tokens": 28378624}
|
|
{"current_steps": 9015, "total_steps": 15621, "loss": 0.3832, "lr": 9.058338415536962e-07, "epoch": 0.5771077395813328, "percentage": 57.71, "elapsed_time": "0:45:12", "remaining_time": "0:33:07", "throughput": 10467.87, "total_tokens": 28394048}
|
|
{"current_steps": 9020, "total_steps": 15621, "loss": 0.3796, "lr": 9.04721497321432e-07, "epoch": 0.5774278215223098, "percentage": 57.74, "elapsed_time": "0:45:13", "remaining_time": "0:33:05", "throughput": 10471.03, "total_tokens": 28409664}
|
|
{"current_steps": 9025, "total_steps": 15621, "loss": 0.3794, "lr": 9.036092720454977e-07, "epoch": 0.5777479034632866, "percentage": 57.77, "elapsed_time": "0:45:13", "remaining_time": "0:33:03", "throughput": 10474.08, "total_tokens": 28424768}
|
|
{"current_steps": 9030, "total_steps": 15621, "loss": 0.3439, "lr": 9.024971671145189e-07, "epoch": 0.5780679854042635, "percentage": 57.81, "elapsed_time": "0:45:14", "remaining_time": "0:33:01", "throughput": 10477.01, "total_tokens": 28439424}
|
|
{"current_steps": 9035, "total_steps": 15621, "loss": 0.443, "lr": 9.013851839169718e-07, "epoch": 0.5783880673452404, "percentage": 57.84, "elapsed_time": "0:45:15", "remaining_time": "0:32:59", "throughput": 10480.46, "total_tokens": 28456064}
|
|
{"current_steps": 9040, "total_steps": 15621, "loss": 0.3457, "lr": 9.002733238411801e-07, "epoch": 0.5787081492862173, "percentage": 57.87, "elapsed_time": "0:45:15", "remaining_time": "0:32:57", "throughput": 10483.9, "total_tokens": 28472768}
|
|
{"current_steps": 9045, "total_steps": 15621, "loss": 0.3528, "lr": 8.991615882753147e-07, "epoch": 0.5790282312271942, "percentage": 57.9, "elapsed_time": "0:45:16", "remaining_time": "0:32:55", "throughput": 10487.13, "total_tokens": 28488704}
|
|
{"current_steps": 9050, "total_steps": 15621, "loss": 0.4516, "lr": 8.980499786073904e-07, "epoch": 0.579348313168171, "percentage": 57.93, "elapsed_time": "0:45:17", "remaining_time": "0:32:52", "throughput": 10490.02, "total_tokens": 28503808}
|
|
{"current_steps": 9055, "total_steps": 15621, "loss": 0.4616, "lr": 8.969384962252645e-07, "epoch": 0.5796683951091479, "percentage": 57.97, "elapsed_time": "0:45:17", "remaining_time": "0:32:50", "throughput": 10493.41, "total_tokens": 28520320}
|
|
{"current_steps": 9060, "total_steps": 15621, "loss": 0.4395, "lr": 8.958271425166366e-07, "epoch": 0.5799884770501248, "percentage": 58.0, "elapsed_time": "0:45:18", "remaining_time": "0:32:48", "throughput": 10496.51, "total_tokens": 28535680}
|
|
{"current_steps": 9065, "total_steps": 15621, "loss": 0.3943, "lr": 8.947159188690442e-07, "epoch": 0.5803085589911017, "percentage": 58.03, "elapsed_time": "0:45:19", "remaining_time": "0:32:46", "throughput": 10499.7, "total_tokens": 28551488}
|
|
{"current_steps": 9070, "total_steps": 15621, "loss": 0.4633, "lr": 8.93604826669863e-07, "epoch": 0.5806286409320787, "percentage": 58.06, "elapsed_time": "0:45:19", "remaining_time": "0:32:44", "throughput": 10502.84, "total_tokens": 28567040}
|
|
{"current_steps": 9075, "total_steps": 15621, "loss": 0.389, "lr": 8.924938673063052e-07, "epoch": 0.5809487228730555, "percentage": 58.09, "elapsed_time": "0:45:20", "remaining_time": "0:32:42", "throughput": 10505.69, "total_tokens": 28581568}
|
|
{"current_steps": 9080, "total_steps": 15621, "loss": 0.3616, "lr": 8.913830421654166e-07, "epoch": 0.5812688048140324, "percentage": 58.13, "elapsed_time": "0:45:21", "remaining_time": "0:32:40", "throughput": 10508.82, "total_tokens": 28596992}
|
|
{"current_steps": 9085, "total_steps": 15621, "loss": 0.4752, "lr": 8.902723526340746e-07, "epoch": 0.5815888867550093, "percentage": 58.16, "elapsed_time": "0:45:21", "remaining_time": "0:32:38", "throughput": 10512.33, "total_tokens": 28613952}
|
|
{"current_steps": 9090, "total_steps": 15621, "loss": 0.4343, "lr": 8.89161800098989e-07, "epoch": 0.5819089686959862, "percentage": 58.19, "elapsed_time": "0:45:22", "remaining_time": "0:32:36", "throughput": 10515.28, "total_tokens": 28628736}
|
|
{"current_steps": 9095, "total_steps": 15621, "loss": 0.3683, "lr": 8.880513859466974e-07, "epoch": 0.5822290506369631, "percentage": 58.22, "elapsed_time": "0:45:23", "remaining_time": "0:32:34", "throughput": 10518.56, "total_tokens": 28644928}
|
|
{"current_steps": 9100, "total_steps": 15621, "loss": 0.2861, "lr": 8.869411115635645e-07, "epoch": 0.5825491325779399, "percentage": 58.25, "elapsed_time": "0:45:23", "remaining_time": "0:32:31", "throughput": 10521.86, "total_tokens": 28661184}
|
|
{"current_steps": 9105, "total_steps": 15621, "loss": 0.2823, "lr": 8.858309783357816e-07, "epoch": 0.5828692145189168, "percentage": 58.29, "elapsed_time": "0:45:24", "remaining_time": "0:32:29", "throughput": 10524.76, "total_tokens": 28675776}
|
|
{"current_steps": 9110, "total_steps": 15621, "loss": 0.4335, "lr": 8.847209876493629e-07, "epoch": 0.5831892964598937, "percentage": 58.32, "elapsed_time": "0:45:25", "remaining_time": "0:32:27", "throughput": 10528.08, "total_tokens": 28692160}
|
|
{"current_steps": 9115, "total_steps": 15621, "loss": 0.2627, "lr": 8.836111408901441e-07, "epoch": 0.5835093784008706, "percentage": 58.35, "elapsed_time": "0:45:25", "remaining_time": "0:32:25", "throughput": 10531.1, "total_tokens": 28707328}
|
|
{"current_steps": 9120, "total_steps": 15621, "loss": 0.4159, "lr": 8.825014394437828e-07, "epoch": 0.5838294603418475, "percentage": 58.38, "elapsed_time": "0:45:26", "remaining_time": "0:32:23", "throughput": 10534.15, "total_tokens": 28722624}
|
|
{"current_steps": 9125, "total_steps": 15621, "loss": 0.4013, "lr": 8.813918846957542e-07, "epoch": 0.5841495422828245, "percentage": 58.41, "elapsed_time": "0:45:27", "remaining_time": "0:32:21", "throughput": 10537.18, "total_tokens": 28737856}
|
|
{"current_steps": 9130, "total_steps": 15621, "loss": 0.4447, "lr": 8.802824780313499e-07, "epoch": 0.5844696242238013, "percentage": 58.45, "elapsed_time": "0:45:27", "remaining_time": "0:32:19", "throughput": 10540.08, "total_tokens": 28752448}
|
|
{"current_steps": 9135, "total_steps": 15621, "loss": 0.3924, "lr": 8.791732208356771e-07, "epoch": 0.5847897061647782, "percentage": 58.48, "elapsed_time": "0:45:28", "remaining_time": "0:32:17", "throughput": 10543.12, "total_tokens": 28767616}
|
|
{"current_steps": 9140, "total_steps": 15621, "loss": 0.4676, "lr": 8.780641144936573e-07, "epoch": 0.5851097881057551, "percentage": 58.51, "elapsed_time": "0:45:29", "remaining_time": "0:32:15", "throughput": 10546.04, "total_tokens": 28782400}
|
|
{"current_steps": 9145, "total_steps": 15621, "loss": 0.446, "lr": 8.76955160390022e-07, "epoch": 0.585429870046732, "percentage": 58.54, "elapsed_time": "0:45:29", "remaining_time": "0:32:13", "throughput": 10549.25, "total_tokens": 28798336}
|
|
{"current_steps": 9150, "total_steps": 15621, "loss": 0.2893, "lr": 8.758463599093136e-07, "epoch": 0.5857499519877089, "percentage": 58.57, "elapsed_time": "0:45:30", "remaining_time": "0:32:11", "throughput": 10552.48, "total_tokens": 28814336}
|
|
{"current_steps": 9155, "total_steps": 15621, "loss": 0.5245, "lr": 8.747377144358825e-07, "epoch": 0.5860700339286857, "percentage": 58.61, "elapsed_time": "0:45:31", "remaining_time": "0:32:09", "throughput": 10555.81, "total_tokens": 28830656}
|
|
{"current_steps": 9160, "total_steps": 15621, "loss": 0.4169, "lr": 8.736292253538861e-07, "epoch": 0.5863901158696626, "percentage": 58.64, "elapsed_time": "0:45:31", "remaining_time": "0:32:06", "throughput": 10559.02, "total_tokens": 28846656}
|
|
{"current_steps": 9165, "total_steps": 15621, "loss": 0.3115, "lr": 8.725208940472851e-07, "epoch": 0.5867101978106395, "percentage": 58.67, "elapsed_time": "0:45:32", "remaining_time": "0:32:04", "throughput": 10562.31, "total_tokens": 28862848}
|
|
{"current_steps": 9170, "total_steps": 15621, "loss": 0.4071, "lr": 8.714127218998448e-07, "epoch": 0.5870302797516164, "percentage": 58.7, "elapsed_time": "0:45:33", "remaining_time": "0:32:02", "throughput": 10565.42, "total_tokens": 28878400}
|
|
{"current_steps": 9175, "total_steps": 15621, "loss": 0.5141, "lr": 8.70304710295131e-07, "epoch": 0.5873503616925934, "percentage": 58.74, "elapsed_time": "0:45:33", "remaining_time": "0:32:00", "throughput": 10568.43, "total_tokens": 28893568}
|
|
{"current_steps": 9180, "total_steps": 15621, "loss": 0.3766, "lr": 8.691968606165092e-07, "epoch": 0.5876704436335702, "percentage": 58.77, "elapsed_time": "0:45:34", "remaining_time": "0:31:58", "throughput": 10571.71, "total_tokens": 28909824}
|
|
{"current_steps": 9185, "total_steps": 15621, "loss": 0.3189, "lr": 8.680891742471429e-07, "epoch": 0.5879905255745471, "percentage": 58.8, "elapsed_time": "0:45:35", "remaining_time": "0:31:56", "throughput": 10574.87, "total_tokens": 28925568}
|
|
{"current_steps": 9190, "total_steps": 15621, "loss": 0.3236, "lr": 8.669816525699912e-07, "epoch": 0.588310607515524, "percentage": 58.83, "elapsed_time": "0:45:35", "remaining_time": "0:31:54", "throughput": 10577.95, "total_tokens": 28941056}
|
|
{"current_steps": 9195, "total_steps": 15621, "loss": 0.4153, "lr": 8.658742969678079e-07, "epoch": 0.5886306894565009, "percentage": 58.86, "elapsed_time": "0:45:36", "remaining_time": "0:31:52", "throughput": 10580.75, "total_tokens": 28955456}
|
|
{"current_steps": 9200, "total_steps": 15621, "loss": 0.2925, "lr": 8.647671088231398e-07, "epoch": 0.5889507713974778, "percentage": 58.9, "elapsed_time": "0:45:37", "remaining_time": "0:31:50", "throughput": 10583.85, "total_tokens": 28971136}
|
|
{"current_steps": 9205, "total_steps": 15621, "loss": 0.4144, "lr": 8.636600895183245e-07, "epoch": 0.5892708533384546, "percentage": 58.93, "elapsed_time": "0:45:38", "remaining_time": "0:31:48", "throughput": 10587.39, "total_tokens": 28988480}
|
|
{"current_steps": 9210, "total_steps": 15621, "loss": 0.3702, "lr": 8.625532404354877e-07, "epoch": 0.5895909352794315, "percentage": 58.96, "elapsed_time": "0:45:38", "remaining_time": "0:31:46", "throughput": 10590.61, "total_tokens": 29004544}
|
|
{"current_steps": 9215, "total_steps": 15621, "loss": 0.3944, "lr": 8.614465629565443e-07, "epoch": 0.5899110172204084, "percentage": 58.99, "elapsed_time": "0:45:39", "remaining_time": "0:31:44", "throughput": 10593.52, "total_tokens": 29019328}
|
|
{"current_steps": 9220, "total_steps": 15621, "loss": 0.3414, "lr": 8.603400584631939e-07, "epoch": 0.5902310991613853, "percentage": 59.02, "elapsed_time": "0:45:40", "remaining_time": "0:31:42", "throughput": 10596.58, "total_tokens": 29034752}
|
|
{"current_steps": 9225, "total_steps": 15621, "loss": 0.4473, "lr": 8.592337283369198e-07, "epoch": 0.5905511811023622, "percentage": 59.06, "elapsed_time": "0:45:40", "remaining_time": "0:31:40", "throughput": 10599.77, "total_tokens": 29050816}
|
|
{"current_steps": 9230, "total_steps": 15621, "loss": 0.2833, "lr": 8.581275739589893e-07, "epoch": 0.5908712630433391, "percentage": 59.09, "elapsed_time": "0:45:41", "remaining_time": "0:31:38", "throughput": 10602.75, "total_tokens": 29065920}
|
|
{"current_steps": 9235, "total_steps": 15621, "loss": 0.509, "lr": 8.570215967104481e-07, "epoch": 0.591191344984316, "percentage": 59.12, "elapsed_time": "0:45:42", "remaining_time": "0:31:36", "throughput": 10605.74, "total_tokens": 29080960}
|
|
{"current_steps": 9240, "total_steps": 15621, "loss": 0.4754, "lr": 8.559157979721225e-07, "epoch": 0.5915114269252929, "percentage": 59.15, "elapsed_time": "0:45:42", "remaining_time": "0:31:34", "throughput": 10608.89, "total_tokens": 29096768}
|
|
{"current_steps": 9245, "total_steps": 15621, "loss": 0.5592, "lr": 8.548101791246145e-07, "epoch": 0.5918315088662698, "percentage": 59.18, "elapsed_time": "0:45:43", "remaining_time": "0:31:32", "throughput": 10612.02, "total_tokens": 29112448}
|
|
{"current_steps": 9250, "total_steps": 15621, "loss": 0.3436, "lr": 8.537047415483028e-07, "epoch": 0.5921515908072467, "percentage": 59.22, "elapsed_time": "0:45:44", "remaining_time": "0:31:29", "throughput": 10615.04, "total_tokens": 29127808}
|
|
{"current_steps": 9255, "total_steps": 15621, "loss": 0.2783, "lr": 8.525994866233388e-07, "epoch": 0.5924716727482235, "percentage": 59.25, "elapsed_time": "0:45:44", "remaining_time": "0:31:27", "throughput": 10618.03, "total_tokens": 29142912}
|
|
{"current_steps": 9260, "total_steps": 15621, "loss": 0.3963, "lr": 8.514944157296464e-07, "epoch": 0.5927917546892004, "percentage": 59.28, "elapsed_time": "0:45:45", "remaining_time": "0:31:25", "throughput": 10621.27, "total_tokens": 29159168}
|
|
{"current_steps": 9265, "total_steps": 15621, "loss": 0.3875, "lr": 8.503895302469199e-07, "epoch": 0.5931118366301773, "percentage": 59.31, "elapsed_time": "0:45:46", "remaining_time": "0:31:23", "throughput": 10624.54, "total_tokens": 29175488}
|
|
{"current_steps": 9270, "total_steps": 15621, "loss": 0.4151, "lr": 8.492848315546214e-07, "epoch": 0.5934319185711542, "percentage": 59.34, "elapsed_time": "0:45:46", "remaining_time": "0:31:21", "throughput": 10627.62, "total_tokens": 29191104}
|
|
{"current_steps": 9275, "total_steps": 15621, "loss": 0.4485, "lr": 8.4818032103198e-07, "epoch": 0.5937520005121311, "percentage": 59.38, "elapsed_time": "0:45:47", "remaining_time": "0:31:19", "throughput": 10630.59, "total_tokens": 29206208}
|
|
{"current_steps": 9280, "total_steps": 15621, "loss": 0.4186, "lr": 8.470760000579906e-07, "epoch": 0.5940720824531079, "percentage": 59.41, "elapsed_time": "0:45:48", "remaining_time": "0:31:17", "throughput": 10633.52, "total_tokens": 29221312}
|
|
{"current_steps": 9285, "total_steps": 15621, "loss": 0.5047, "lr": 8.459718700114108e-07, "epoch": 0.5943921643940849, "percentage": 59.44, "elapsed_time": "0:45:48", "remaining_time": "0:31:15", "throughput": 10636.58, "total_tokens": 29236800}
|
|
{"current_steps": 9290, "total_steps": 15621, "loss": 0.4508, "lr": 8.448679322707595e-07, "epoch": 0.5947122463350618, "percentage": 59.47, "elapsed_time": "0:45:49", "remaining_time": "0:31:13", "throughput": 10639.7, "total_tokens": 29252480}
|
|
{"current_steps": 9295, "total_steps": 15621, "loss": 0.6011, "lr": 8.437641882143163e-07, "epoch": 0.5950323282760387, "percentage": 59.5, "elapsed_time": "0:45:50", "remaining_time": "0:31:11", "throughput": 10642.51, "total_tokens": 29266944}
|
|
{"current_steps": 9300, "total_steps": 15621, "loss": 0.3106, "lr": 8.426606392201185e-07, "epoch": 0.5953524102170156, "percentage": 59.54, "elapsed_time": "0:45:50", "remaining_time": "0:31:09", "throughput": 10645.63, "total_tokens": 29282816}
|
|
{"current_steps": 9305, "total_steps": 15621, "loss": 0.3154, "lr": 8.415572866659599e-07, "epoch": 0.5956724921579925, "percentage": 59.57, "elapsed_time": "0:45:51", "remaining_time": "0:31:07", "throughput": 10648.58, "total_tokens": 29297984}
|
|
{"current_steps": 9310, "total_steps": 15621, "loss": 0.3652, "lr": 8.404541319293896e-07, "epoch": 0.5959925740989693, "percentage": 59.6, "elapsed_time": "0:45:52", "remaining_time": "0:31:05", "throughput": 10651.69, "total_tokens": 29313664}
|
|
{"current_steps": 9315, "total_steps": 15621, "loss": 0.593, "lr": 8.393511763877086e-07, "epoch": 0.5963126560399462, "percentage": 59.63, "elapsed_time": "0:45:52", "remaining_time": "0:31:03", "throughput": 10654.83, "total_tokens": 29329472}
|
|
{"current_steps": 9320, "total_steps": 15621, "loss": 0.4438, "lr": 8.3824842141797e-07, "epoch": 0.5966327379809231, "percentage": 59.66, "elapsed_time": "0:45:53", "remaining_time": "0:31:01", "throughput": 10658.17, "total_tokens": 29346048}
|
|
{"current_steps": 9325, "total_steps": 15621, "loss": 0.3806, "lr": 8.371458683969765e-07, "epoch": 0.5969528199219, "percentage": 59.7, "elapsed_time": "0:45:54", "remaining_time": "0:30:59", "throughput": 10661.23, "total_tokens": 29361664}
|
|
{"current_steps": 9330, "total_steps": 15621, "loss": 0.3848, "lr": 8.360435187012787e-07, "epoch": 0.5972729018628768, "percentage": 59.73, "elapsed_time": "0:45:54", "remaining_time": "0:30:57", "throughput": 10664.2, "total_tokens": 29376896}
|
|
{"current_steps": 9335, "total_steps": 15621, "loss": 0.3866, "lr": 8.349413737071725e-07, "epoch": 0.5975929838038538, "percentage": 59.76, "elapsed_time": "0:45:55", "remaining_time": "0:30:55", "throughput": 10667.3, "total_tokens": 29392640}
|
|
{"current_steps": 9340, "total_steps": 15621, "loss": 0.4486, "lr": 8.338394347906994e-07, "epoch": 0.5979130657448307, "percentage": 59.79, "elapsed_time": "0:45:56", "remaining_time": "0:30:53", "throughput": 10670.24, "total_tokens": 29407808}
|
|
{"current_steps": 9345, "total_steps": 15621, "loss": 0.3114, "lr": 8.327377033276431e-07, "epoch": 0.5982331476858076, "percentage": 59.82, "elapsed_time": "0:45:56", "remaining_time": "0:30:51", "throughput": 10673.09, "total_tokens": 29422528}
|
|
{"current_steps": 9350, "total_steps": 15621, "loss": 0.3484, "lr": 8.316361806935279e-07, "epoch": 0.5985532296267845, "percentage": 59.86, "elapsed_time": "0:45:57", "remaining_time": "0:30:49", "throughput": 10676.18, "total_tokens": 29438272}
|
|
{"current_steps": 9355, "total_steps": 15621, "loss": 0.4397, "lr": 8.305348682636177e-07, "epoch": 0.5988733115677614, "percentage": 59.89, "elapsed_time": "0:45:58", "remaining_time": "0:30:47", "throughput": 10679.12, "total_tokens": 29453376}
|
|
{"current_steps": 9360, "total_steps": 15621, "loss": 0.4149, "lr": 8.294337674129144e-07, "epoch": 0.5991933935087382, "percentage": 59.92, "elapsed_time": "0:45:58", "remaining_time": "0:30:45", "throughput": 10682.27, "total_tokens": 29469248}
|
|
{"current_steps": 9365, "total_steps": 15621, "loss": 0.2745, "lr": 8.283328795161554e-07, "epoch": 0.5995134754497151, "percentage": 59.95, "elapsed_time": "0:45:59", "remaining_time": "0:30:43", "throughput": 10685.57, "total_tokens": 29485888}
|
|
{"current_steps": 9370, "total_steps": 15621, "loss": 0.3205, "lr": 8.272322059478114e-07, "epoch": 0.599833557390692, "percentage": 59.98, "elapsed_time": "0:46:00", "remaining_time": "0:30:41", "throughput": 10688.46, "total_tokens": 29500864}
|
|
{"current_steps": 9375, "total_steps": 15621, "loss": 0.2427, "lr": 8.261317480820871e-07, "epoch": 0.6001536393316689, "percentage": 60.02, "elapsed_time": "0:46:00", "remaining_time": "0:30:39", "throughput": 10691.48, "total_tokens": 29516288}
|
|
{"current_steps": 9380, "total_steps": 15621, "loss": 0.4129, "lr": 8.250315072929168e-07, "epoch": 0.6004737212726458, "percentage": 60.05, "elapsed_time": "0:46:01", "remaining_time": "0:30:37", "throughput": 10694.27, "total_tokens": 29530880}
|
|
{"current_steps": 9384, "total_steps": 15621, "eval_loss": 0.39462828636169434, "epoch": 0.6007297868254273, "percentage": 60.07, "elapsed_time": "0:46:52", "remaining_time": "0:31:09", "throughput": 10504.4, "total_tokens": 29544576}
|
|
{"current_steps": 9385, "total_steps": 15621, "loss": 0.35, "lr": 8.239314849539637e-07, "epoch": 0.6007938032136226, "percentage": 60.08, "elapsed_time": "1:02:55", "remaining_time": "0:41:48", "throughput": 7825.91, "total_tokens": 29547840}
|
|
{"current_steps": 9390, "total_steps": 15621, "loss": 0.4234, "lr": 8.228316824386193e-07, "epoch": 0.6011138851545996, "percentage": 60.11, "elapsed_time": "1:02:56", "remaining_time": "0:41:45", "throughput": 7828.78, "total_tokens": 29564096}
|
|
{"current_steps": 9395, "total_steps": 15621, "loss": 0.378, "lr": 8.217321011199995e-07, "epoch": 0.6014339670955765, "percentage": 60.14, "elapsed_time": "1:02:56", "remaining_time": "0:41:42", "throughput": 7831.49, "total_tokens": 29579520}
|
|
{"current_steps": 9400, "total_steps": 15621, "loss": 0.433, "lr": 8.206327423709441e-07, "epoch": 0.6017540490365534, "percentage": 60.18, "elapsed_time": "1:02:57", "remaining_time": "0:41:40", "throughput": 7834.02, "total_tokens": 29594048}
|
|
{"current_steps": 9405, "total_steps": 15621, "loss": 0.3913, "lr": 8.195336075640163e-07, "epoch": 0.6020741309775303, "percentage": 60.21, "elapsed_time": "1:02:58", "remaining_time": "0:41:37", "throughput": 7836.9, "total_tokens": 29610368}
|
|
{"current_steps": 9410, "total_steps": 15621, "loss": 0.4248, "lr": 8.184346980714984e-07, "epoch": 0.6023942129185071, "percentage": 60.24, "elapsed_time": "1:02:58", "remaining_time": "0:41:34", "throughput": 7839.6, "total_tokens": 29625792}
|
|
{"current_steps": 9415, "total_steps": 15621, "loss": 0.3563, "lr": 8.173360152653914e-07, "epoch": 0.602714294859484, "percentage": 60.27, "elapsed_time": "1:02:59", "remaining_time": "0:41:31", "throughput": 7842.5, "total_tokens": 29642240}
|
|
{"current_steps": 9420, "total_steps": 15621, "loss": 0.3138, "lr": 8.162375605174143e-07, "epoch": 0.6030343768004609, "percentage": 60.3, "elapsed_time": "1:03:00", "remaining_time": "0:41:28", "throughput": 7845.3, "total_tokens": 29658176}
|
|
{"current_steps": 9425, "total_steps": 15621, "loss": 0.3068, "lr": 8.151393351990005e-07, "epoch": 0.6033544587414378, "percentage": 60.34, "elapsed_time": "1:03:01", "remaining_time": "0:41:25", "throughput": 7848.37, "total_tokens": 29675392}
|
|
{"current_steps": 9430, "total_steps": 15621, "loss": 0.4185, "lr": 8.140413406812971e-07, "epoch": 0.6036745406824147, "percentage": 60.37, "elapsed_time": "1:03:01", "remaining_time": "0:41:22", "throughput": 7850.92, "total_tokens": 29690048}
|
|
{"current_steps": 9435, "total_steps": 15621, "loss": 0.3111, "lr": 8.129435783351635e-07, "epoch": 0.6039946226233915, "percentage": 60.4, "elapsed_time": "1:03:02", "remaining_time": "0:41:19", "throughput": 7853.55, "total_tokens": 29705088}
|
|
{"current_steps": 9440, "total_steps": 15621, "loss": 0.4421, "lr": 8.118460495311685e-07, "epoch": 0.6043147045643685, "percentage": 60.43, "elapsed_time": "1:03:03", "remaining_time": "0:41:17", "throughput": 7856.25, "total_tokens": 29720576}
|
|
{"current_steps": 9445, "total_steps": 15621, "loss": 0.4352, "lr": 8.107487556395901e-07, "epoch": 0.6046347865053454, "percentage": 60.46, "elapsed_time": "1:03:03", "remaining_time": "0:41:14", "throughput": 7859.14, "total_tokens": 29736896}
|
|
{"current_steps": 9450, "total_steps": 15621, "loss": 0.3688, "lr": 8.096516980304115e-07, "epoch": 0.6049548684463223, "percentage": 60.5, "elapsed_time": "1:03:04", "remaining_time": "0:41:11", "throughput": 7861.91, "total_tokens": 29752768}
|
|
{"current_steps": 9455, "total_steps": 15621, "loss": 0.3448, "lr": 8.085548780733238e-07, "epoch": 0.6052749503872992, "percentage": 60.53, "elapsed_time": "1:03:05", "remaining_time": "0:41:08", "throughput": 7864.7, "total_tokens": 29768640}
|
|
{"current_steps": 9460, "total_steps": 15621, "loss": 0.3368, "lr": 8.074582971377182e-07, "epoch": 0.605595032328276, "percentage": 60.56, "elapsed_time": "1:03:05", "remaining_time": "0:41:05", "throughput": 7867.81, "total_tokens": 29786240}
|
|
{"current_steps": 9465, "total_steps": 15621, "loss": 0.4407, "lr": 8.063619565926892e-07, "epoch": 0.6059151142692529, "percentage": 60.59, "elapsed_time": "1:03:06", "remaining_time": "0:41:02", "throughput": 7870.6, "total_tokens": 29802176}
|
|
{"current_steps": 9470, "total_steps": 15621, "loss": 0.3992, "lr": 8.052658578070313e-07, "epoch": 0.6062351962102298, "percentage": 60.62, "elapsed_time": "1:03:07", "remaining_time": "0:40:59", "throughput": 7873.29, "total_tokens": 29817600}
|
|
{"current_steps": 9475, "total_steps": 15621, "loss": 0.3233, "lr": 8.041700021492362e-07, "epoch": 0.6065552781512067, "percentage": 60.66, "elapsed_time": "1:03:07", "remaining_time": "0:40:57", "throughput": 7875.97, "total_tokens": 29832960}
|
|
{"current_steps": 9480, "total_steps": 15621, "loss": 0.2929, "lr": 8.030743909874924e-07, "epoch": 0.6068753600921836, "percentage": 60.69, "elapsed_time": "1:03:08", "remaining_time": "0:40:54", "throughput": 7878.66, "total_tokens": 29848448}
|
|
{"current_steps": 9485, "total_steps": 15621, "loss": 0.3299, "lr": 8.019790256896839e-07, "epoch": 0.6071954420331604, "percentage": 60.72, "elapsed_time": "1:03:09", "remaining_time": "0:40:51", "throughput": 7881.23, "total_tokens": 29863296}
|
|
{"current_steps": 9490, "total_steps": 15621, "loss": 0.3934, "lr": 8.008839076233871e-07, "epoch": 0.6075155239741373, "percentage": 60.75, "elapsed_time": "1:03:09", "remaining_time": "0:40:48", "throughput": 7884.2, "total_tokens": 29880128}
|
|
{"current_steps": 9495, "total_steps": 15621, "loss": 0.3564, "lr": 7.997890381558691e-07, "epoch": 0.6078356059151143, "percentage": 60.78, "elapsed_time": "1:03:10", "remaining_time": "0:40:45", "throughput": 7886.85, "total_tokens": 29895296}
|
|
{"current_steps": 9500, "total_steps": 15621, "loss": 0.434, "lr": 7.986944186540878e-07, "epoch": 0.6081556878560912, "percentage": 60.82, "elapsed_time": "1:03:11", "remaining_time": "0:40:42", "throughput": 7889.65, "total_tokens": 29911296}
|
|
{"current_steps": 9505, "total_steps": 15621, "loss": 0.4603, "lr": 7.976000504846885e-07, "epoch": 0.6084757697970681, "percentage": 60.85, "elapsed_time": "1:03:11", "remaining_time": "0:40:39", "throughput": 7892.38, "total_tokens": 29926912}
|
|
{"current_steps": 9510, "total_steps": 15621, "loss": 0.4725, "lr": 7.965059350140024e-07, "epoch": 0.608795851738045, "percentage": 60.88, "elapsed_time": "1:03:12", "remaining_time": "0:40:37", "throughput": 7895.06, "total_tokens": 29942272}
|
|
{"current_steps": 9515, "total_steps": 15621, "loss": 0.4093, "lr": 7.954120736080461e-07, "epoch": 0.6091159336790218, "percentage": 60.91, "elapsed_time": "1:03:13", "remaining_time": "0:40:34", "throughput": 7897.79, "total_tokens": 29958016}
|
|
{"current_steps": 9520, "total_steps": 15621, "loss": 0.5561, "lr": 7.943184676325178e-07, "epoch": 0.6094360156199987, "percentage": 60.94, "elapsed_time": "1:03:13", "remaining_time": "0:40:31", "throughput": 7900.73, "total_tokens": 29974720}
|
|
{"current_steps": 9525, "total_steps": 15621, "loss": 0.4295, "lr": 7.932251184527974e-07, "epoch": 0.6097560975609756, "percentage": 60.98, "elapsed_time": "1:03:14", "remaining_time": "0:40:28", "throughput": 7903.71, "total_tokens": 29991680}
|
|
{"current_steps": 9530, "total_steps": 15621, "loss": 0.2678, "lr": 7.921320274339446e-07, "epoch": 0.6100761795019525, "percentage": 61.01, "elapsed_time": "1:03:15", "remaining_time": "0:40:25", "throughput": 7906.41, "total_tokens": 30007168}
|
|
{"current_steps": 9535, "total_steps": 15621, "loss": 0.34, "lr": 7.910391959406966e-07, "epoch": 0.6103962614429294, "percentage": 61.04, "elapsed_time": "1:03:15", "remaining_time": "0:40:22", "throughput": 7909.09, "total_tokens": 30022656}
|
|
{"current_steps": 9540, "total_steps": 15621, "loss": 0.3896, "lr": 7.899466253374653e-07, "epoch": 0.6107163433839062, "percentage": 61.07, "elapsed_time": "1:03:16", "remaining_time": "0:40:20", "throughput": 7911.78, "total_tokens": 30038144}
|
|
{"current_steps": 9545, "total_steps": 15621, "loss": 0.3321, "lr": 7.88854316988339e-07, "epoch": 0.6110364253248832, "percentage": 61.1, "elapsed_time": "1:03:17", "remaining_time": "0:40:17", "throughput": 7914.85, "total_tokens": 30055488}
|
|
{"current_steps": 9550, "total_steps": 15621, "loss": 0.3085, "lr": 7.877622722570771e-07, "epoch": 0.6113565072658601, "percentage": 61.14, "elapsed_time": "1:03:18", "remaining_time": "0:40:14", "throughput": 7917.55, "total_tokens": 30071040}
|
|
{"current_steps": 9555, "total_steps": 15621, "loss": 0.4224, "lr": 7.866704925071101e-07, "epoch": 0.611676589206837, "percentage": 61.17, "elapsed_time": "1:03:18", "remaining_time": "0:40:11", "throughput": 7920.53, "total_tokens": 30088000}
|
|
{"current_steps": 9560, "total_steps": 15621, "loss": 0.4359, "lr": 7.855789791015377e-07, "epoch": 0.6119966711478139, "percentage": 61.2, "elapsed_time": "1:03:19", "remaining_time": "0:40:08", "throughput": 7923.12, "total_tokens": 30103040}
|
|
{"current_steps": 9565, "total_steps": 15621, "loss": 0.3887, "lr": 7.844877334031277e-07, "epoch": 0.6123167530887907, "percentage": 61.23, "elapsed_time": "1:03:20", "remaining_time": "0:40:05", "throughput": 7925.67, "total_tokens": 30117760}
|
|
{"current_steps": 9570, "total_steps": 15621, "loss": 0.4969, "lr": 7.833967567743131e-07, "epoch": 0.6126368350297676, "percentage": 61.26, "elapsed_time": "1:03:20", "remaining_time": "0:40:03", "throughput": 7928.48, "total_tokens": 30133888}
|
|
{"current_steps": 9575, "total_steps": 15621, "loss": 0.3596, "lr": 7.823060505771903e-07, "epoch": 0.6129569169707445, "percentage": 61.3, "elapsed_time": "1:03:21", "remaining_time": "0:40:00", "throughput": 7931.14, "total_tokens": 30149312}
|
|
{"current_steps": 9580, "total_steps": 15621, "loss": 0.4176, "lr": 7.812156161735199e-07, "epoch": 0.6132769989117214, "percentage": 61.33, "elapsed_time": "1:03:22", "remaining_time": "0:39:57", "throughput": 7933.64, "total_tokens": 30163840}
|
|
{"current_steps": 9585, "total_steps": 15621, "loss": 0.5474, "lr": 7.801254549247215e-07, "epoch": 0.6135970808526983, "percentage": 61.36, "elapsed_time": "1:03:22", "remaining_time": "0:39:54", "throughput": 7936.58, "total_tokens": 30180544}
|
|
{"current_steps": 9590, "total_steps": 15621, "loss": 0.338, "lr": 7.790355681918739e-07, "epoch": 0.6139171627936751, "percentage": 61.39, "elapsed_time": "1:03:23", "remaining_time": "0:39:51", "throughput": 7939.46, "total_tokens": 30197120}
|
|
{"current_steps": 9595, "total_steps": 15621, "loss": 0.4222, "lr": 7.779459573357144e-07, "epoch": 0.614237244734652, "percentage": 61.42, "elapsed_time": "1:03:24", "remaining_time": "0:39:49", "throughput": 7942.28, "total_tokens": 30213376}
|
|
{"current_steps": 9600, "total_steps": 15621, "loss": 0.4138, "lr": 7.768566237166338e-07, "epoch": 0.614557326675629, "percentage": 61.46, "elapsed_time": "1:03:24", "remaining_time": "0:39:46", "throughput": 7945.02, "total_tokens": 30229120}
|
|
{"current_steps": 9605, "total_steps": 15621, "loss": 0.5188, "lr": 7.757675686946786e-07, "epoch": 0.6148774086166059, "percentage": 61.49, "elapsed_time": "1:03:25", "remaining_time": "0:39:43", "throughput": 7947.69, "total_tokens": 30244544}
|
|
{"current_steps": 9610, "total_steps": 15621, "loss": 0.4258, "lr": 7.746787936295468e-07, "epoch": 0.6151974905575828, "percentage": 61.52, "elapsed_time": "1:03:26", "remaining_time": "0:39:40", "throughput": 7950.54, "total_tokens": 30260864}
|
|
{"current_steps": 9615, "total_steps": 15621, "loss": 0.3681, "lr": 7.735902998805868e-07, "epoch": 0.6155175724985597, "percentage": 61.55, "elapsed_time": "1:03:26", "remaining_time": "0:39:37", "throughput": 7953.03, "total_tokens": 30275456}
|
|
{"current_steps": 9620, "total_steps": 15621, "loss": 0.4284, "lr": 7.725020888067955e-07, "epoch": 0.6158376544395365, "percentage": 61.58, "elapsed_time": "1:03:27", "remaining_time": "0:39:35", "throughput": 7955.71, "total_tokens": 30291008}
|
|
{"current_steps": 9625, "total_steps": 15621, "loss": 0.4779, "lr": 7.714141617668176e-07, "epoch": 0.6161577363805134, "percentage": 61.62, "elapsed_time": "1:03:28", "remaining_time": "0:39:32", "throughput": 7958.45, "total_tokens": 30306816}
|
|
{"current_steps": 9630, "total_steps": 15621, "loss": 0.3342, "lr": 7.703265201189426e-07, "epoch": 0.6164778183214903, "percentage": 61.65, "elapsed_time": "1:03:28", "remaining_time": "0:39:29", "throughput": 7961.09, "total_tokens": 30322240}
|
|
{"current_steps": 9635, "total_steps": 15621, "loss": 0.3333, "lr": 7.692391652211036e-07, "epoch": 0.6167979002624672, "percentage": 61.68, "elapsed_time": "1:03:29", "remaining_time": "0:39:26", "throughput": 7963.81, "total_tokens": 30338048}
|
|
{"current_steps": 9640, "total_steps": 15621, "loss": 0.3256, "lr": 7.681520984308769e-07, "epoch": 0.617117982203444, "percentage": 61.71, "elapsed_time": "1:03:30", "remaining_time": "0:39:23", "throughput": 7966.57, "total_tokens": 30353984}
|
|
{"current_steps": 9645, "total_steps": 15621, "loss": 0.496, "lr": 7.670653211054772e-07, "epoch": 0.6174380641444209, "percentage": 61.74, "elapsed_time": "1:03:30", "remaining_time": "0:39:21", "throughput": 7969.33, "total_tokens": 30370048}
|
|
{"current_steps": 9650, "total_steps": 15621, "loss": 0.4137, "lr": 7.659788346017591e-07, "epoch": 0.6177581460853978, "percentage": 61.78, "elapsed_time": "1:03:31", "remaining_time": "0:39:18", "throughput": 7971.95, "total_tokens": 30385344}
|
|
{"current_steps": 9655, "total_steps": 15621, "loss": 0.3994, "lr": 7.648926402762133e-07, "epoch": 0.6180782280263748, "percentage": 61.81, "elapsed_time": "1:03:32", "remaining_time": "0:39:15", "throughput": 7974.55, "total_tokens": 30400576}
|
|
{"current_steps": 9660, "total_steps": 15621, "loss": 0.3861, "lr": 7.638067394849671e-07, "epoch": 0.6183983099673517, "percentage": 61.84, "elapsed_time": "1:03:32", "remaining_time": "0:39:12", "throughput": 7977.1, "total_tokens": 30415424}
|
|
{"current_steps": 9665, "total_steps": 15621, "loss": 0.3971, "lr": 7.627211335837797e-07, "epoch": 0.6187183919083286, "percentage": 61.87, "elapsed_time": "1:03:33", "remaining_time": "0:39:10", "throughput": 7979.69, "total_tokens": 30430592}
|
|
{"current_steps": 9670, "total_steps": 15621, "loss": 0.4285, "lr": 7.616358239280427e-07, "epoch": 0.6190384738493054, "percentage": 61.9, "elapsed_time": "1:03:34", "remaining_time": "0:39:07", "throughput": 7982.32, "total_tokens": 30445952}
|
|
{"current_steps": 9675, "total_steps": 15621, "loss": 0.3194, "lr": 7.605508118727787e-07, "epoch": 0.6193585557902823, "percentage": 61.94, "elapsed_time": "1:03:34", "remaining_time": "0:39:04", "throughput": 7985.01, "total_tokens": 30461568}
|
|
{"current_steps": 9680, "total_steps": 15621, "loss": 0.3642, "lr": 7.594660987726373e-07, "epoch": 0.6196786377312592, "percentage": 61.97, "elapsed_time": "1:03:35", "remaining_time": "0:39:01", "throughput": 7987.58, "total_tokens": 30476672}
|
|
{"current_steps": 9685, "total_steps": 15621, "loss": 0.3969, "lr": 7.583816859818956e-07, "epoch": 0.6199987196722361, "percentage": 62.0, "elapsed_time": "1:03:36", "remaining_time": "0:38:58", "throughput": 7990.33, "total_tokens": 30492672}
|
|
{"current_steps": 9690, "total_steps": 15621, "loss": 0.3783, "lr": 7.57297574854456e-07, "epoch": 0.620318801613213, "percentage": 62.03, "elapsed_time": "1:03:36", "remaining_time": "0:38:56", "throughput": 7992.88, "total_tokens": 30507712}
|
|
{"current_steps": 9695, "total_steps": 15621, "loss": 0.4477, "lr": 7.56213766743844e-07, "epoch": 0.6206388835541898, "percentage": 62.06, "elapsed_time": "1:03:37", "remaining_time": "0:38:53", "throughput": 7995.71, "total_tokens": 30524032}
|
|
{"current_steps": 9700, "total_steps": 15621, "loss": 0.3281, "lr": 7.551302630032064e-07, "epoch": 0.6209589654951667, "percentage": 62.1, "elapsed_time": "1:03:38", "remaining_time": "0:38:50", "throughput": 7998.4, "total_tokens": 30539776}
|
|
{"current_steps": 9705, "total_steps": 15621, "loss": 0.3758, "lr": 7.540470649853106e-07, "epoch": 0.6212790474361437, "percentage": 62.13, "elapsed_time": "1:03:38", "remaining_time": "0:38:47", "throughput": 8000.95, "total_tokens": 30554752}
|
|
{"current_steps": 9710, "total_steps": 15621, "loss": 0.3955, "lr": 7.529641740425419e-07, "epoch": 0.6215991293771206, "percentage": 62.16, "elapsed_time": "1:03:39", "remaining_time": "0:38:45", "throughput": 8003.94, "total_tokens": 30571968}
|
|
{"current_steps": 9715, "total_steps": 15621, "loss": 0.449, "lr": 7.518815915269023e-07, "epoch": 0.6219192113180975, "percentage": 62.19, "elapsed_time": "1:03:40", "remaining_time": "0:38:42", "throughput": 8006.56, "total_tokens": 30587264}
|
|
{"current_steps": 9720, "total_steps": 15621, "loss": 0.3823, "lr": 7.507993187900092e-07, "epoch": 0.6222392932590743, "percentage": 62.22, "elapsed_time": "1:03:40", "remaining_time": "0:38:39", "throughput": 8009.29, "total_tokens": 30603200}
|
|
{"current_steps": 9725, "total_steps": 15621, "loss": 0.4186, "lr": 7.497173571830926e-07, "epoch": 0.6225593752000512, "percentage": 62.26, "elapsed_time": "1:03:41", "remaining_time": "0:38:36", "throughput": 8011.78, "total_tokens": 30617856}
|
|
{"current_steps": 9730, "total_steps": 15621, "loss": 0.4631, "lr": 7.486357080569938e-07, "epoch": 0.6228794571410281, "percentage": 62.29, "elapsed_time": "1:03:42", "remaining_time": "0:38:34", "throughput": 8014.25, "total_tokens": 30632448}
|
|
{"current_steps": 9735, "total_steps": 15621, "loss": 0.3768, "lr": 7.47554372762165e-07, "epoch": 0.623199539082005, "percentage": 62.32, "elapsed_time": "1:03:42", "remaining_time": "0:38:31", "throughput": 8016.86, "total_tokens": 30647680}
|
|
{"current_steps": 9740, "total_steps": 15621, "loss": 0.4872, "lr": 7.464733526486662e-07, "epoch": 0.6235196210229819, "percentage": 62.35, "elapsed_time": "1:03:43", "remaining_time": "0:38:28", "throughput": 8019.59, "total_tokens": 30663616}
|
|
{"current_steps": 9745, "total_steps": 15621, "loss": 0.3515, "lr": 7.453926490661628e-07, "epoch": 0.6238397029639587, "percentage": 62.38, "elapsed_time": "1:03:44", "remaining_time": "0:38:25", "throughput": 8022.93, "total_tokens": 30682496}
|
|
{"current_steps": 9750, "total_steps": 15621, "loss": 0.3687, "lr": 7.443122633639267e-07, "epoch": 0.6241597849049356, "percentage": 62.42, "elapsed_time": "1:03:45", "remaining_time": "0:38:23", "throughput": 8025.51, "total_tokens": 30697664}
|
|
{"current_steps": 9755, "total_steps": 15621, "loss": 0.3856, "lr": 7.432321968908319e-07, "epoch": 0.6244798668459125, "percentage": 62.45, "elapsed_time": "1:03:45", "remaining_time": "0:38:20", "throughput": 8028.21, "total_tokens": 30713408}
|
|
{"current_steps": 9760, "total_steps": 15621, "loss": 0.3178, "lr": 7.421524509953543e-07, "epoch": 0.6247999487868895, "percentage": 62.48, "elapsed_time": "1:03:46", "remaining_time": "0:38:17", "throughput": 8031.19, "total_tokens": 30730496}
|
|
{"current_steps": 9765, "total_steps": 15621, "loss": 0.4143, "lr": 7.410730270255687e-07, "epoch": 0.6251200307278664, "percentage": 62.51, "elapsed_time": "1:03:47", "remaining_time": "0:38:15", "throughput": 8033.78, "total_tokens": 30745664}
|
|
{"current_steps": 9770, "total_steps": 15621, "loss": 0.3747, "lr": 7.399939263291493e-07, "epoch": 0.6254401126688433, "percentage": 62.54, "elapsed_time": "1:03:47", "remaining_time": "0:38:12", "throughput": 8036.39, "total_tokens": 30760960}
|
|
{"current_steps": 9775, "total_steps": 15621, "loss": 0.479, "lr": 7.389151502533657e-07, "epoch": 0.6257601946098201, "percentage": 62.58, "elapsed_time": "1:03:48", "remaining_time": "0:38:09", "throughput": 8038.91, "total_tokens": 30775872}
|
|
{"current_steps": 9780, "total_steps": 15621, "loss": 0.3696, "lr": 7.378367001450819e-07, "epoch": 0.626080276550797, "percentage": 62.61, "elapsed_time": "1:03:49", "remaining_time": "0:38:06", "throughput": 8041.58, "total_tokens": 30791424}
|
|
{"current_steps": 9785, "total_steps": 15621, "loss": 0.426, "lr": 7.367585773507567e-07, "epoch": 0.6264003584917739, "percentage": 62.64, "elapsed_time": "1:03:49", "remaining_time": "0:38:04", "throughput": 8044.38, "total_tokens": 30807680}
|
|
{"current_steps": 9790, "total_steps": 15621, "loss": 0.4515, "lr": 7.356807832164385e-07, "epoch": 0.6267204404327508, "percentage": 62.67, "elapsed_time": "1:03:50", "remaining_time": "0:38:01", "throughput": 8047.12, "total_tokens": 30823680}
|
|
{"current_steps": 9795, "total_steps": 15621, "loss": 0.4401, "lr": 7.346033190877654e-07, "epoch": 0.6270405223737276, "percentage": 62.7, "elapsed_time": "1:03:51", "remaining_time": "0:37:58", "throughput": 8049.8, "total_tokens": 30839360}
|
|
{"current_steps": 9800, "total_steps": 15621, "loss": 0.3541, "lr": 7.335261863099651e-07, "epoch": 0.6273606043147045, "percentage": 62.74, "elapsed_time": "1:03:51", "remaining_time": "0:37:55", "throughput": 8052.44, "total_tokens": 30854784}
|
|
{"current_steps": 9805, "total_steps": 15621, "loss": 0.4232, "lr": 7.324493862278498e-07, "epoch": 0.6276806862556814, "percentage": 62.77, "elapsed_time": "1:03:52", "remaining_time": "0:37:53", "throughput": 8055.15, "total_tokens": 30870592}
|
|
{"current_steps": 9810, "total_steps": 15621, "loss": 0.4636, "lr": 7.313729201858167e-07, "epoch": 0.6280007681966584, "percentage": 62.8, "elapsed_time": "1:03:53", "remaining_time": "0:37:50", "throughput": 8057.77, "total_tokens": 30885952}
|
|
{"current_steps": 9815, "total_steps": 15621, "loss": 0.3329, "lr": 7.302967895278473e-07, "epoch": 0.6283208501376353, "percentage": 62.83, "elapsed_time": "1:03:53", "remaining_time": "0:37:47", "throughput": 8060.52, "total_tokens": 30902080}
|
|
{"current_steps": 9820, "total_steps": 15621, "loss": 0.4042, "lr": 7.292209955975028e-07, "epoch": 0.6286409320786122, "percentage": 62.86, "elapsed_time": "1:03:54", "remaining_time": "0:37:45", "throughput": 8063.5, "total_tokens": 30919232}
|
|
{"current_steps": 9825, "total_steps": 15621, "loss": 0.4078, "lr": 7.281455397379244e-07, "epoch": 0.628961014019589, "percentage": 62.9, "elapsed_time": "1:03:55", "remaining_time": "0:37:42", "throughput": 8066.5, "total_tokens": 30936448}
|
|
{"current_steps": 9830, "total_steps": 15621, "loss": 0.3225, "lr": 7.270704232918316e-07, "epoch": 0.6292810959605659, "percentage": 62.93, "elapsed_time": "1:03:55", "remaining_time": "0:37:39", "throughput": 8069.2, "total_tokens": 30952256}
|
|
{"current_steps": 9835, "total_steps": 15621, "loss": 0.4216, "lr": 7.2599564760152e-07, "epoch": 0.6296011779015428, "percentage": 62.96, "elapsed_time": "1:03:56", "remaining_time": "0:37:37", "throughput": 8071.76, "total_tokens": 30967360}
|
|
{"current_steps": 9840, "total_steps": 15621, "loss": 0.3852, "lr": 7.249212140088592e-07, "epoch": 0.6299212598425197, "percentage": 62.99, "elapsed_time": "1:03:57", "remaining_time": "0:37:34", "throughput": 8074.23, "total_tokens": 30982016}
|
|
{"current_steps": 9845, "total_steps": 15621, "loss": 0.3347, "lr": 7.23847123855293e-07, "epoch": 0.6302413417834966, "percentage": 63.02, "elapsed_time": "1:03:57", "remaining_time": "0:37:31", "throughput": 8076.98, "total_tokens": 30998080}
|
|
{"current_steps": 9850, "total_steps": 15621, "loss": 0.2805, "lr": 7.227733784818349e-07, "epoch": 0.6305614237244734, "percentage": 63.06, "elapsed_time": "1:03:58", "remaining_time": "0:37:28", "throughput": 8079.54, "total_tokens": 31013184}
|
|
{"current_steps": 9855, "total_steps": 15621, "loss": 0.3804, "lr": 7.216999792290683e-07, "epoch": 0.6308815056654503, "percentage": 63.09, "elapsed_time": "1:03:59", "remaining_time": "0:37:26", "throughput": 8082.2, "total_tokens": 31028800}
|
|
{"current_steps": 9860, "total_steps": 15621, "loss": 0.49, "lr": 7.206269274371457e-07, "epoch": 0.6312015876064272, "percentage": 63.12, "elapsed_time": "1:03:59", "remaining_time": "0:37:23", "throughput": 8084.92, "total_tokens": 31044736}
|
|
{"current_steps": 9865, "total_steps": 15621, "loss": 0.3496, "lr": 7.195542244457845e-07, "epoch": 0.6315216695474042, "percentage": 63.15, "elapsed_time": "1:04:00", "remaining_time": "0:37:20", "throughput": 8087.5, "total_tokens": 31059968}
|
|
{"current_steps": 9870, "total_steps": 15621, "loss": 0.3266, "lr": 7.184818715942666e-07, "epoch": 0.6318417514883811, "percentage": 63.18, "elapsed_time": "1:04:01", "remaining_time": "0:37:18", "throughput": 8090.01, "total_tokens": 31074880}
|
|
{"current_steps": 9875, "total_steps": 15621, "loss": 0.355, "lr": 7.174098702214374e-07, "epoch": 0.6321618334293579, "percentage": 63.22, "elapsed_time": "1:04:01", "remaining_time": "0:37:15", "throughput": 8092.66, "total_tokens": 31090432}
|
|
{"current_steps": 9880, "total_steps": 15621, "loss": 0.37, "lr": 7.163382216657033e-07, "epoch": 0.6324819153703348, "percentage": 63.25, "elapsed_time": "1:04:02", "remaining_time": "0:37:12", "throughput": 8095.53, "total_tokens": 31107264}
|
|
{"current_steps": 9885, "total_steps": 15621, "loss": 0.3444, "lr": 7.152669272650302e-07, "epoch": 0.6328019973113117, "percentage": 63.28, "elapsed_time": "1:04:03", "remaining_time": "0:37:10", "throughput": 8098.42, "total_tokens": 31124096}
|
|
{"current_steps": 9890, "total_steps": 15621, "loss": 0.3869, "lr": 7.141959883569411e-07, "epoch": 0.6331220792522886, "percentage": 63.31, "elapsed_time": "1:04:03", "remaining_time": "0:37:07", "throughput": 8100.88, "total_tokens": 31138752}
|
|
{"current_steps": 9895, "total_steps": 15621, "loss": 0.4701, "lr": 7.131254062785165e-07, "epoch": 0.6334421611932655, "percentage": 63.34, "elapsed_time": "1:04:04", "remaining_time": "0:37:04", "throughput": 8103.47, "total_tokens": 31154048}
|
|
{"current_steps": 9900, "total_steps": 15621, "loss": 0.5118, "lr": 7.120551823663907e-07, "epoch": 0.6337622431342423, "percentage": 63.38, "elapsed_time": "1:04:05", "remaining_time": "0:37:02", "throughput": 8106.26, "total_tokens": 31170304}
|
|
{"current_steps": 9905, "total_steps": 15621, "loss": 0.2817, "lr": 7.109853179567499e-07, "epoch": 0.6340823250752192, "percentage": 63.41, "elapsed_time": "1:04:05", "remaining_time": "0:36:59", "throughput": 8108.99, "total_tokens": 31186368}
|
|
{"current_steps": 9910, "total_steps": 15621, "loss": 0.4235, "lr": 7.099158143853337e-07, "epoch": 0.6344024070161961, "percentage": 63.44, "elapsed_time": "1:04:06", "remaining_time": "0:36:56", "throughput": 8111.58, "total_tokens": 31201664}
|
|
{"current_steps": 9915, "total_steps": 15621, "loss": 0.3891, "lr": 7.088466729874289e-07, "epoch": 0.634722488957173, "percentage": 63.47, "elapsed_time": "1:04:07", "remaining_time": "0:36:54", "throughput": 8114.22, "total_tokens": 31217216}
|
|
{"current_steps": 9920, "total_steps": 15621, "loss": 0.3784, "lr": 7.077778950978713e-07, "epoch": 0.63504257089815, "percentage": 63.5, "elapsed_time": "1:04:07", "remaining_time": "0:36:51", "throughput": 8117.05, "total_tokens": 31233728}
|
|
{"current_steps": 9925, "total_steps": 15621, "loss": 0.4682, "lr": 7.06709482051043e-07, "epoch": 0.6353626528391269, "percentage": 63.54, "elapsed_time": "1:04:08", "remaining_time": "0:36:48", "throughput": 8119.76, "total_tokens": 31249664}
|
|
{"current_steps": 9930, "total_steps": 15621, "loss": 0.3033, "lr": 7.056414351808698e-07, "epoch": 0.6356827347801037, "percentage": 63.57, "elapsed_time": "1:04:09", "remaining_time": "0:36:46", "throughput": 8122.43, "total_tokens": 31265408}
|
|
{"current_steps": 9935, "total_steps": 15621, "loss": 0.3517, "lr": 7.045737558208206e-07, "epoch": 0.6360028167210806, "percentage": 63.6, "elapsed_time": "1:04:09", "remaining_time": "0:36:43", "throughput": 8125.08, "total_tokens": 31281088}
|
|
{"current_steps": 9940, "total_steps": 15621, "loss": 0.4014, "lr": 7.035064453039064e-07, "epoch": 0.6363228986620575, "percentage": 63.63, "elapsed_time": "1:04:10", "remaining_time": "0:36:40", "throughput": 8127.68, "total_tokens": 31296512}
|
|
{"current_steps": 9945, "total_steps": 15621, "loss": 0.3772, "lr": 7.024395049626766e-07, "epoch": 0.6366429806030344, "percentage": 63.66, "elapsed_time": "1:04:11", "remaining_time": "0:36:38", "throughput": 8130.31, "total_tokens": 31312000}
|
|
{"current_steps": 9950, "total_steps": 15621, "loss": 0.3408, "lr": 7.013729361292182e-07, "epoch": 0.6369630625440112, "percentage": 63.7, "elapsed_time": "1:04:11", "remaining_time": "0:36:35", "throughput": 8132.91, "total_tokens": 31327488}
|
|
{"current_steps": 9955, "total_steps": 15621, "loss": 0.3065, "lr": 7.003067401351554e-07, "epoch": 0.6372831444849881, "percentage": 63.73, "elapsed_time": "1:04:12", "remaining_time": "0:36:32", "throughput": 8135.71, "total_tokens": 31343936}
|
|
{"current_steps": 9960, "total_steps": 15621, "loss": 0.406, "lr": 6.992409183116465e-07, "epoch": 0.637603226425965, "percentage": 63.76, "elapsed_time": "1:04:13", "remaining_time": "0:36:30", "throughput": 8138.29, "total_tokens": 31359232}
|
|
{"current_steps": 9965, "total_steps": 15621, "loss": 0.3724, "lr": 6.981754719893826e-07, "epoch": 0.6379233083669419, "percentage": 63.79, "elapsed_time": "1:04:13", "remaining_time": "0:36:27", "throughput": 8141.08, "total_tokens": 31375616}
|
|
{"current_steps": 9970, "total_steps": 15621, "loss": 0.4679, "lr": 6.971104024985852e-07, "epoch": 0.6382433903079189, "percentage": 63.82, "elapsed_time": "1:04:14", "remaining_time": "0:36:24", "throughput": 8143.81, "total_tokens": 31391680}
|
|
{"current_steps": 9975, "total_steps": 15621, "loss": 0.3809, "lr": 6.960457111690068e-07, "epoch": 0.6385634722488958, "percentage": 63.86, "elapsed_time": "1:04:15", "remaining_time": "0:36:22", "throughput": 8146.47, "total_tokens": 31407424}
|
|
{"current_steps": 9980, "total_steps": 15621, "loss": 0.3787, "lr": 6.94981399329927e-07, "epoch": 0.6388835541898726, "percentage": 63.89, "elapsed_time": "1:04:16", "remaining_time": "0:36:19", "throughput": 8149.07, "total_tokens": 31422912}
|
|
{"current_steps": 9985, "total_steps": 15621, "loss": 0.3921, "lr": 6.939174683101509e-07, "epoch": 0.6392036361308495, "percentage": 63.92, "elapsed_time": "1:04:16", "remaining_time": "0:36:16", "throughput": 8151.79, "total_tokens": 31438912}
|
|
{"current_steps": 9990, "total_steps": 15621, "loss": 0.2898, "lr": 6.9285391943801e-07, "epoch": 0.6395237180718264, "percentage": 63.95, "elapsed_time": "1:04:17", "remaining_time": "0:36:14", "throughput": 8154.56, "total_tokens": 31455168}
|
|
{"current_steps": 9995, "total_steps": 15621, "loss": 0.3133, "lr": 6.917907540413569e-07, "epoch": 0.6398438000128033, "percentage": 63.98, "elapsed_time": "1:04:18", "remaining_time": "0:36:11", "throughput": 8157.16, "total_tokens": 31470592}
|
|
{"current_steps": 10000, "total_steps": 15621, "loss": 0.3477, "lr": 6.907279734475659e-07, "epoch": 0.6401638819537802, "percentage": 64.02, "elapsed_time": "1:04:18", "remaining_time": "0:36:08", "throughput": 8159.66, "total_tokens": 31485632}
|
|
{"current_steps": 10005, "total_steps": 15621, "loss": 0.3725, "lr": 6.896655789835317e-07, "epoch": 0.640483963894757, "percentage": 64.05, "elapsed_time": "1:04:19", "remaining_time": "0:36:06", "throughput": 8162.12, "total_tokens": 31500352}
|
|
{"current_steps": 10010, "total_steps": 15621, "loss": 0.3702, "lr": 6.886035719756656e-07, "epoch": 0.6408040458357339, "percentage": 64.08, "elapsed_time": "1:04:20", "remaining_time": "0:36:03", "throughput": 8164.94, "total_tokens": 31516928}
|
|
{"current_steps": 10015, "total_steps": 15621, "loss": 0.279, "lr": 6.875419537498959e-07, "epoch": 0.6411241277767108, "percentage": 64.11, "elapsed_time": "1:04:20", "remaining_time": "0:36:01", "throughput": 8167.57, "total_tokens": 31532608}
|
|
{"current_steps": 10020, "total_steps": 15621, "loss": 0.6005, "lr": 6.864807256316658e-07, "epoch": 0.6414442097176877, "percentage": 64.14, "elapsed_time": "1:04:21", "remaining_time": "0:35:58", "throughput": 8170.29, "total_tokens": 31548608}
|
|
{"current_steps": 10025, "total_steps": 15621, "loss": 0.4117, "lr": 6.854198889459311e-07, "epoch": 0.6417642916586647, "percentage": 64.18, "elapsed_time": "1:04:22", "remaining_time": "0:35:55", "throughput": 8172.91, "total_tokens": 31564224}
|
|
{"current_steps": 10030, "total_steps": 15621, "loss": 0.2567, "lr": 6.84359445017158e-07, "epoch": 0.6420843735996415, "percentage": 64.21, "elapsed_time": "1:04:22", "remaining_time": "0:35:53", "throughput": 8175.41, "total_tokens": 31579200}
|
|
{"current_steps": 10035, "total_steps": 15621, "loss": 0.4257, "lr": 6.832993951693244e-07, "epoch": 0.6424044555406184, "percentage": 64.24, "elapsed_time": "1:04:23", "remaining_time": "0:35:50", "throughput": 8178.03, "total_tokens": 31594816}
|
|
{"current_steps": 10040, "total_steps": 15621, "loss": 0.3547, "lr": 6.822397407259144e-07, "epoch": 0.6427245374815953, "percentage": 64.27, "elapsed_time": "1:04:24", "remaining_time": "0:35:47", "throughput": 8180.66, "total_tokens": 31610432}
|
|
{"current_steps": 10045, "total_steps": 15621, "loss": 0.3794, "lr": 6.811804830099186e-07, "epoch": 0.6430446194225722, "percentage": 64.3, "elapsed_time": "1:04:24", "remaining_time": "0:35:45", "throughput": 8183.58, "total_tokens": 31627520}
|
|
{"current_steps": 10050, "total_steps": 15621, "loss": 0.3557, "lr": 6.801216233438336e-07, "epoch": 0.6433647013635491, "percentage": 64.34, "elapsed_time": "1:04:25", "remaining_time": "0:35:42", "throughput": 8186.45, "total_tokens": 31644352}
|
|
{"current_steps": 10055, "total_steps": 15621, "loss": 0.3919, "lr": 6.790631630496575e-07, "epoch": 0.6436847833045259, "percentage": 64.37, "elapsed_time": "1:04:26", "remaining_time": "0:35:40", "throughput": 8189.1, "total_tokens": 31660160}
|
|
{"current_steps": 10060, "total_steps": 15621, "loss": 0.45, "lr": 6.780051034488903e-07, "epoch": 0.6440048652455028, "percentage": 64.4, "elapsed_time": "1:04:26", "remaining_time": "0:35:37", "throughput": 8191.84, "total_tokens": 31676352}
|
|
{"current_steps": 10065, "total_steps": 15621, "loss": 0.3409, "lr": 6.769474458625323e-07, "epoch": 0.6443249471864797, "percentage": 64.43, "elapsed_time": "1:04:27", "remaining_time": "0:35:34", "throughput": 8194.5, "total_tokens": 31692160}
|
|
{"current_steps": 10070, "total_steps": 15621, "loss": 0.316, "lr": 6.758901916110813e-07, "epoch": 0.6446450291274566, "percentage": 64.46, "elapsed_time": "1:04:28", "remaining_time": "0:35:32", "throughput": 8197.1, "total_tokens": 31707712}
|
|
{"current_steps": 10075, "total_steps": 15621, "loss": 0.3278, "lr": 6.748333420145315e-07, "epoch": 0.6449651110684336, "percentage": 64.5, "elapsed_time": "1:04:28", "remaining_time": "0:35:29", "throughput": 8199.81, "total_tokens": 31723776}
|
|
{"current_steps": 10080, "total_steps": 15621, "loss": 0.4116, "lr": 6.737768983923718e-07, "epoch": 0.6452851930094105, "percentage": 64.53, "elapsed_time": "1:04:29", "remaining_time": "0:35:27", "throughput": 8202.69, "total_tokens": 31740672}
|
|
{"current_steps": 10085, "total_steps": 15621, "loss": 0.2941, "lr": 6.727208620635849e-07, "epoch": 0.6456052749503873, "percentage": 64.56, "elapsed_time": "1:04:30", "remaining_time": "0:35:24", "throughput": 8205.18, "total_tokens": 31755648}
|
|
{"current_steps": 10090, "total_steps": 15621, "loss": 0.4488, "lr": 6.716652343466446e-07, "epoch": 0.6459253568913642, "percentage": 64.59, "elapsed_time": "1:04:30", "remaining_time": "0:35:21", "throughput": 8207.68, "total_tokens": 31770624}
|
|
{"current_steps": 10095, "total_steps": 15621, "loss": 0.3044, "lr": 6.706100165595139e-07, "epoch": 0.6462454388323411, "percentage": 64.62, "elapsed_time": "1:04:31", "remaining_time": "0:35:19", "throughput": 8210.41, "total_tokens": 31786816}
|
|
{"current_steps": 10100, "total_steps": 15621, "loss": 0.3924, "lr": 6.695552100196452e-07, "epoch": 0.646565520773318, "percentage": 64.66, "elapsed_time": "1:04:32", "remaining_time": "0:35:16", "throughput": 8212.91, "total_tokens": 31801792}
|
|
{"current_steps": 10105, "total_steps": 15621, "loss": 0.5025, "lr": 6.685008160439769e-07, "epoch": 0.6468856027142948, "percentage": 64.69, "elapsed_time": "1:04:32", "remaining_time": "0:35:14", "throughput": 8215.82, "total_tokens": 31818944}
|
|
{"current_steps": 10110, "total_steps": 15621, "loss": 0.406, "lr": 6.674468359489313e-07, "epoch": 0.6472056846552717, "percentage": 64.72, "elapsed_time": "1:04:33", "remaining_time": "0:35:11", "throughput": 8218.35, "total_tokens": 31834176}
|
|
{"current_steps": 10115, "total_steps": 15621, "loss": 0.3488, "lr": 6.663932710504163e-07, "epoch": 0.6475257665962486, "percentage": 64.75, "elapsed_time": "1:04:34", "remaining_time": "0:35:08", "throughput": 8221.03, "total_tokens": 31850176}
|
|
{"current_steps": 10120, "total_steps": 15621, "loss": 0.3845, "lr": 6.653401226638192e-07, "epoch": 0.6478458485372255, "percentage": 64.78, "elapsed_time": "1:04:34", "remaining_time": "0:35:06", "throughput": 8223.62, "total_tokens": 31865600}
|
|
{"current_steps": 10125, "total_steps": 15621, "loss": 0.3985, "lr": 6.64287392104008e-07, "epoch": 0.6481659304782024, "percentage": 64.82, "elapsed_time": "1:04:35", "remaining_time": "0:35:03", "throughput": 8226.09, "total_tokens": 31880512}
|
|
{"current_steps": 10130, "total_steps": 15621, "loss": 0.4502, "lr": 6.632350806853299e-07, "epoch": 0.6484860124191794, "percentage": 64.85, "elapsed_time": "1:04:36", "remaining_time": "0:35:01", "throughput": 8228.77, "total_tokens": 31896512}
|
|
{"current_steps": 10135, "total_steps": 15621, "loss": 0.4127, "lr": 6.621831897216074e-07, "epoch": 0.6488060943601562, "percentage": 64.88, "elapsed_time": "1:04:36", "remaining_time": "0:34:58", "throughput": 8231.5, "total_tokens": 31912768}
|
|
{"current_steps": 10140, "total_steps": 15621, "loss": 0.4332, "lr": 6.611317205261387e-07, "epoch": 0.6491261763011331, "percentage": 64.91, "elapsed_time": "1:04:37", "remaining_time": "0:34:55", "throughput": 8233.93, "total_tokens": 31927488}
|
|
{"current_steps": 10145, "total_steps": 15621, "loss": 0.3464, "lr": 6.60080674411696e-07, "epoch": 0.64944625824211, "percentage": 64.94, "elapsed_time": "1:04:38", "remaining_time": "0:34:53", "throughput": 8236.47, "total_tokens": 31942784}
|
|
{"current_steps": 10150, "total_steps": 15621, "loss": 0.3139, "lr": 6.590300526905225e-07, "epoch": 0.6497663401830869, "percentage": 64.98, "elapsed_time": "1:04:38", "remaining_time": "0:34:50", "throughput": 8239.1, "total_tokens": 31958528}
|
|
{"current_steps": 10155, "total_steps": 15621, "loss": 0.4675, "lr": 6.579798566743313e-07, "epoch": 0.6500864221240638, "percentage": 65.01, "elapsed_time": "1:04:39", "remaining_time": "0:34:48", "throughput": 8241.66, "total_tokens": 31974016}
|
|
{"current_steps": 10160, "total_steps": 15621, "loss": 0.3272, "lr": 6.569300876743049e-07, "epoch": 0.6504065040650406, "percentage": 65.04, "elapsed_time": "1:04:40", "remaining_time": "0:34:45", "throughput": 8244.48, "total_tokens": 31990720}
|
|
{"current_steps": 10165, "total_steps": 15621, "loss": 0.324, "lr": 6.558807470010923e-07, "epoch": 0.6507265860060175, "percentage": 65.07, "elapsed_time": "1:04:40", "remaining_time": "0:34:43", "throughput": 8247.26, "total_tokens": 32007168}
|
|
{"current_steps": 10166, "total_steps": 15621, "eval_loss": 0.38159435987472534, "epoch": 0.6507906023942129, "percentage": 65.08, "elapsed_time": "1:05:31", "remaining_time": "0:35:09", "throughput": 8141.57, "total_tokens": 32010176}
|
|
{"current_steps": 10170, "total_steps": 15621, "loss": 0.355, "lr": 6.548318359648071e-07, "epoch": 0.6510466679469944, "percentage": 65.1, "elapsed_time": "1:09:19", "remaining_time": "0:37:09", "throughput": 7697.71, "total_tokens": 32022208}
|
|
{"current_steps": 10175, "total_steps": 15621, "loss": 0.4036, "lr": 6.537833558750279e-07, "epoch": 0.6513667498879713, "percentage": 65.14, "elapsed_time": "1:09:20", "remaining_time": "0:37:06", "throughput": 7700.22, "total_tokens": 32037760}
|
|
{"current_steps": 10180, "total_steps": 15621, "loss": 0.3108, "lr": 6.527353080407938e-07, "epoch": 0.6516868318289483, "percentage": 65.17, "elapsed_time": "1:09:21", "remaining_time": "0:37:04", "throughput": 7702.63, "total_tokens": 32052800}
|
|
{"current_steps": 10185, "total_steps": 15621, "loss": 0.3491, "lr": 6.516876937706048e-07, "epoch": 0.6520069137699251, "percentage": 65.2, "elapsed_time": "1:09:21", "remaining_time": "0:37:01", "throughput": 7705.12, "total_tokens": 32068288}
|
|
{"current_steps": 10190, "total_steps": 15621, "loss": 0.3769, "lr": 6.506405143724196e-07, "epoch": 0.652326995710902, "percentage": 65.23, "elapsed_time": "1:09:22", "remaining_time": "0:36:58", "throughput": 7707.48, "total_tokens": 32083200}
|
|
{"current_steps": 10195, "total_steps": 15621, "loss": 0.4685, "lr": 6.495937711536546e-07, "epoch": 0.6526470776518789, "percentage": 65.26, "elapsed_time": "1:09:23", "remaining_time": "0:36:55", "throughput": 7709.93, "total_tokens": 32098432}
|
|
{"current_steps": 10200, "total_steps": 15621, "loss": 0.4177, "lr": 6.485474654211803e-07, "epoch": 0.6529671595928558, "percentage": 65.3, "elapsed_time": "1:09:23", "remaining_time": "0:36:53", "throughput": 7712.61, "total_tokens": 32114944}
|
|
{"current_steps": 10205, "total_steps": 15621, "loss": 0.3062, "lr": 6.475015984813217e-07, "epoch": 0.6532872415338327, "percentage": 65.33, "elapsed_time": "1:09:24", "remaining_time": "0:36:50", "throughput": 7715.3, "total_tokens": 32131520}
|
|
{"current_steps": 10210, "total_steps": 15621, "loss": 0.321, "lr": 6.464561716398564e-07, "epoch": 0.6536073234748095, "percentage": 65.36, "elapsed_time": "1:09:25", "remaining_time": "0:36:47", "throughput": 7717.79, "total_tokens": 32147008}
|
|
{"current_steps": 10215, "total_steps": 15621, "loss": 0.3851, "lr": 6.454111862020122e-07, "epoch": 0.6539274054157864, "percentage": 65.39, "elapsed_time": "1:09:25", "remaining_time": "0:36:44", "throughput": 7720.27, "total_tokens": 32162560}
|
|
{"current_steps": 10220, "total_steps": 15621, "loss": 0.3665, "lr": 6.443666434724649e-07, "epoch": 0.6542474873567633, "percentage": 65.42, "elapsed_time": "1:09:26", "remaining_time": "0:36:41", "throughput": 7722.56, "total_tokens": 32177024}
|
|
{"current_steps": 10225, "total_steps": 15621, "loss": 0.542, "lr": 6.43322544755339e-07, "epoch": 0.6545675692977402, "percentage": 65.46, "elapsed_time": "1:09:27", "remaining_time": "0:36:39", "throughput": 7725.13, "total_tokens": 32193024}
|
|
{"current_steps": 10230, "total_steps": 15621, "loss": 0.3447, "lr": 6.422788913542038e-07, "epoch": 0.6548876512387171, "percentage": 65.49, "elapsed_time": "1:09:27", "remaining_time": "0:36:36", "throughput": 7727.67, "total_tokens": 32208896}
|
|
{"current_steps": 10235, "total_steps": 15621, "loss": 0.338, "lr": 6.412356845720726e-07, "epoch": 0.655207733179694, "percentage": 65.52, "elapsed_time": "1:09:28", "remaining_time": "0:36:33", "throughput": 7730.31, "total_tokens": 32225280}
|
|
{"current_steps": 10240, "total_steps": 15621, "loss": 0.3601, "lr": 6.40192925711402e-07, "epoch": 0.6555278151206709, "percentage": 65.55, "elapsed_time": "1:09:29", "remaining_time": "0:36:30", "throughput": 7732.77, "total_tokens": 32240768}
|
|
{"current_steps": 10245, "total_steps": 15621, "loss": 0.3259, "lr": 6.39150616074088e-07, "epoch": 0.6558478970616478, "percentage": 65.58, "elapsed_time": "1:09:30", "remaining_time": "0:36:28", "throughput": 7735.18, "total_tokens": 32255872}
|
|
{"current_steps": 10250, "total_steps": 15621, "loss": 0.4068, "lr": 6.381087569614668e-07, "epoch": 0.6561679790026247, "percentage": 65.62, "elapsed_time": "1:09:30", "remaining_time": "0:36:25", "throughput": 7737.87, "total_tokens": 32272512}
|
|
{"current_steps": 10255, "total_steps": 15621, "loss": 0.3801, "lr": 6.370673496743116e-07, "epoch": 0.6564880609436016, "percentage": 65.65, "elapsed_time": "1:09:31", "remaining_time": "0:36:22", "throughput": 7740.03, "total_tokens": 32286272}
|
|
{"current_steps": 10260, "total_steps": 15621, "loss": 0.4224, "lr": 6.360263955128315e-07, "epoch": 0.6568081428845784, "percentage": 65.68, "elapsed_time": "1:09:32", "remaining_time": "0:36:19", "throughput": 7742.54, "total_tokens": 32301952}
|
|
{"current_steps": 10265, "total_steps": 15621, "loss": 0.3657, "lr": 6.349858957766701e-07, "epoch": 0.6571282248255553, "percentage": 65.71, "elapsed_time": "1:09:32", "remaining_time": "0:36:17", "throughput": 7745.14, "total_tokens": 32318208}
|
|
{"current_steps": 10270, "total_steps": 15621, "loss": 0.3385, "lr": 6.339458517649036e-07, "epoch": 0.6574483067665322, "percentage": 65.74, "elapsed_time": "1:09:33", "remaining_time": "0:36:14", "throughput": 7747.58, "total_tokens": 32333504}
|
|
{"current_steps": 10275, "total_steps": 15621, "loss": 0.3685, "lr": 6.329062647760395e-07, "epoch": 0.6577683887075091, "percentage": 65.78, "elapsed_time": "1:09:34", "remaining_time": "0:36:11", "throughput": 7750.29, "total_tokens": 32350208}
|
|
{"current_steps": 10280, "total_steps": 15621, "loss": 0.3259, "lr": 6.318671361080137e-07, "epoch": 0.658088470648486, "percentage": 65.81, "elapsed_time": "1:09:34", "remaining_time": "0:36:08", "throughput": 7752.71, "total_tokens": 32365376}
|
|
{"current_steps": 10285, "total_steps": 15621, "loss": 0.3411, "lr": 6.308284670581906e-07, "epoch": 0.6584085525894628, "percentage": 65.84, "elapsed_time": "1:09:35", "remaining_time": "0:36:06", "throughput": 7755.25, "total_tokens": 32381248}
|
|
{"current_steps": 10290, "total_steps": 15621, "loss": 0.47, "lr": 6.297902589233612e-07, "epoch": 0.6587286345304398, "percentage": 65.87, "elapsed_time": "1:09:36", "remaining_time": "0:36:03", "throughput": 7757.58, "total_tokens": 32395968}
|
|
{"current_steps": 10295, "total_steps": 15621, "loss": 0.3728, "lr": 6.287525129997404e-07, "epoch": 0.6590487164714167, "percentage": 65.9, "elapsed_time": "1:09:36", "remaining_time": "0:36:00", "throughput": 7760.06, "total_tokens": 32411456}
|
|
{"current_steps": 10300, "total_steps": 15621, "loss": 0.4016, "lr": 6.277152305829656e-07, "epoch": 0.6593687984123936, "percentage": 65.94, "elapsed_time": "1:09:37", "remaining_time": "0:35:58", "throughput": 7762.52, "total_tokens": 32426880}
|
|
{"current_steps": 10305, "total_steps": 15621, "loss": 0.326, "lr": 6.266784129680968e-07, "epoch": 0.6596888803533705, "percentage": 65.97, "elapsed_time": "1:09:38", "remaining_time": "0:35:55", "throughput": 7764.99, "total_tokens": 32442368}
|
|
{"current_steps": 10310, "total_steps": 15621, "loss": 0.3979, "lr": 6.256420614496129e-07, "epoch": 0.6600089622943474, "percentage": 66.0, "elapsed_time": "1:09:38", "remaining_time": "0:35:52", "throughput": 7767.47, "total_tokens": 32457920}
|
|
{"current_steps": 10315, "total_steps": 15621, "loss": 0.4182, "lr": 6.246061773214102e-07, "epoch": 0.6603290442353242, "percentage": 66.03, "elapsed_time": "1:09:39", "remaining_time": "0:35:49", "throughput": 7769.97, "total_tokens": 32473536}
|
|
{"current_steps": 10320, "total_steps": 15621, "loss": 0.4073, "lr": 6.235707618768032e-07, "epoch": 0.6606491261763011, "percentage": 66.06, "elapsed_time": "1:09:40", "remaining_time": "0:35:47", "throughput": 7772.65, "total_tokens": 32490240}
|
|
{"current_steps": 10325, "total_steps": 15621, "loss": 0.344, "lr": 6.225358164085196e-07, "epoch": 0.660969208117278, "percentage": 66.1, "elapsed_time": "1:09:40", "remaining_time": "0:35:44", "throughput": 7775.12, "total_tokens": 32505728}
|
|
{"current_steps": 10330, "total_steps": 15621, "loss": 0.3463, "lr": 6.21501342208701e-07, "epoch": 0.6612892900582549, "percentage": 66.13, "elapsed_time": "1:09:41", "remaining_time": "0:35:41", "throughput": 7777.52, "total_tokens": 32520960}
|
|
{"current_steps": 10335, "total_steps": 15621, "loss": 0.3945, "lr": 6.204673405689007e-07, "epoch": 0.6616093719992318, "percentage": 66.16, "elapsed_time": "1:09:42", "remaining_time": "0:35:38", "throughput": 7779.9, "total_tokens": 32535872}
|
|
{"current_steps": 10340, "total_steps": 15621, "loss": 0.3129, "lr": 6.194338127800823e-07, "epoch": 0.6619294539402087, "percentage": 66.19, "elapsed_time": "1:09:42", "remaining_time": "0:35:36", "throughput": 7782.56, "total_tokens": 32552448}
|
|
{"current_steps": 10345, "total_steps": 15621, "loss": 0.3936, "lr": 6.184007601326165e-07, "epoch": 0.6622495358811856, "percentage": 66.22, "elapsed_time": "1:09:43", "remaining_time": "0:35:33", "throughput": 7784.9, "total_tokens": 32567232}
|
|
{"current_steps": 10350, "total_steps": 15621, "loss": 0.37, "lr": 6.173681839162824e-07, "epoch": 0.6625696178221625, "percentage": 66.26, "elapsed_time": "1:09:44", "remaining_time": "0:35:30", "throughput": 7787.5, "total_tokens": 32583360}
|
|
{"current_steps": 10355, "total_steps": 15621, "loss": 0.3328, "lr": 6.163360854202635e-07, "epoch": 0.6628896997631394, "percentage": 66.29, "elapsed_time": "1:09:44", "remaining_time": "0:35:28", "throughput": 7789.93, "total_tokens": 32598656}
|
|
{"current_steps": 10360, "total_steps": 15621, "loss": 0.3189, "lr": 6.153044659331461e-07, "epoch": 0.6632097817041163, "percentage": 66.32, "elapsed_time": "1:09:45", "remaining_time": "0:35:25", "throughput": 7792.39, "total_tokens": 32614144}
|
|
{"current_steps": 10365, "total_steps": 15621, "loss": 0.3708, "lr": 6.142733267429203e-07, "epoch": 0.6635298636450931, "percentage": 66.35, "elapsed_time": "1:09:46", "remaining_time": "0:35:22", "throughput": 7794.75, "total_tokens": 32629120}
|
|
{"current_steps": 10370, "total_steps": 15621, "loss": 0.4218, "lr": 6.132426691369748e-07, "epoch": 0.66384994558607, "percentage": 66.38, "elapsed_time": "1:09:46", "remaining_time": "0:35:20", "throughput": 7797.46, "total_tokens": 32645952}
|
|
{"current_steps": 10375, "total_steps": 15621, "loss": 0.3955, "lr": 6.122124944020977e-07, "epoch": 0.6641700275270469, "percentage": 66.42, "elapsed_time": "1:09:47", "remaining_time": "0:35:17", "throughput": 7799.97, "total_tokens": 32661696}
|
|
{"current_steps": 10380, "total_steps": 15621, "loss": 0.3779, "lr": 6.111828038244749e-07, "epoch": 0.6644901094680238, "percentage": 66.45, "elapsed_time": "1:09:48", "remaining_time": "0:35:14", "throughput": 7802.51, "total_tokens": 32677760}
|
|
{"current_steps": 10385, "total_steps": 15621, "loss": 0.3063, "lr": 6.101535986896866e-07, "epoch": 0.6648101914090007, "percentage": 66.48, "elapsed_time": "1:09:48", "remaining_time": "0:35:11", "throughput": 7805.03, "total_tokens": 32693568}
|
|
{"current_steps": 10390, "total_steps": 15621, "loss": 0.2929, "lr": 6.091248802827076e-07, "epoch": 0.6651302733499775, "percentage": 66.51, "elapsed_time": "1:09:49", "remaining_time": "0:35:09", "throughput": 7807.43, "total_tokens": 32708736}
|
|
{"current_steps": 10395, "total_steps": 15621, "loss": 0.3258, "lr": 6.080966498879048e-07, "epoch": 0.6654503552909545, "percentage": 66.55, "elapsed_time": "1:09:50", "remaining_time": "0:35:06", "throughput": 7810.11, "total_tokens": 32725440}
|
|
{"current_steps": 10400, "total_steps": 15621, "loss": 0.293, "lr": 6.070689087890363e-07, "epoch": 0.6657704372319314, "percentage": 66.58, "elapsed_time": "1:09:50", "remaining_time": "0:35:03", "throughput": 7812.51, "total_tokens": 32740608}
|
|
{"current_steps": 10405, "total_steps": 15621, "loss": 0.4026, "lr": 6.060416582692487e-07, "epoch": 0.6660905191729083, "percentage": 66.61, "elapsed_time": "1:09:51", "remaining_time": "0:35:01", "throughput": 7815.02, "total_tokens": 32756416}
|
|
{"current_steps": 10410, "total_steps": 15621, "loss": 0.3334, "lr": 6.05014899611076e-07, "epoch": 0.6664106011138852, "percentage": 66.64, "elapsed_time": "1:09:52", "remaining_time": "0:34:58", "throughput": 7817.48, "total_tokens": 32771904}
|
|
{"current_steps": 10415, "total_steps": 15621, "loss": 0.3801, "lr": 6.039886340964391e-07, "epoch": 0.666730683054862, "percentage": 66.67, "elapsed_time": "1:09:52", "remaining_time": "0:34:55", "throughput": 7819.94, "total_tokens": 32787392}
|
|
{"current_steps": 10420, "total_steps": 15621, "loss": 0.3367, "lr": 6.029628630066423e-07, "epoch": 0.6670507649958389, "percentage": 66.71, "elapsed_time": "1:09:53", "remaining_time": "0:34:53", "throughput": 7822.44, "total_tokens": 32803136}
|
|
{"current_steps": 10425, "total_steps": 15621, "loss": 0.4266, "lr": 6.019375876223724e-07, "epoch": 0.6673708469368158, "percentage": 66.74, "elapsed_time": "1:09:54", "remaining_time": "0:34:50", "throughput": 7824.89, "total_tokens": 32818624}
|
|
{"current_steps": 10430, "total_steps": 15621, "loss": 0.4689, "lr": 6.009128092236982e-07, "epoch": 0.6676909288777927, "percentage": 66.77, "elapsed_time": "1:09:54", "remaining_time": "0:34:47", "throughput": 7827.31, "total_tokens": 32833920}
|
|
{"current_steps": 10435, "total_steps": 15621, "loss": 0.3876, "lr": 5.998885290900679e-07, "epoch": 0.6680110108187696, "percentage": 66.8, "elapsed_time": "1:09:55", "remaining_time": "0:34:45", "throughput": 7829.6, "total_tokens": 32848512}
|
|
{"current_steps": 10440, "total_steps": 15621, "loss": 0.3414, "lr": 5.988647485003061e-07, "epoch": 0.6683310927597464, "percentage": 66.83, "elapsed_time": "1:09:56", "remaining_time": "0:34:42", "throughput": 7832.25, "total_tokens": 32865088}
|
|
{"current_steps": 10445, "total_steps": 15621, "loss": 0.4652, "lr": 5.978414687326164e-07, "epoch": 0.6686511747007234, "percentage": 66.87, "elapsed_time": "1:09:56", "remaining_time": "0:34:39", "throughput": 7834.97, "total_tokens": 32882048}
|
|
{"current_steps": 10450, "total_steps": 15621, "loss": 0.3775, "lr": 5.968186910645745e-07, "epoch": 0.6689712566417003, "percentage": 66.9, "elapsed_time": "1:09:57", "remaining_time": "0:34:37", "throughput": 7837.6, "total_tokens": 32898624}
|
|
{"current_steps": 10455, "total_steps": 15621, "loss": 0.5049, "lr": 5.957964167731305e-07, "epoch": 0.6692913385826772, "percentage": 66.93, "elapsed_time": "1:09:58", "remaining_time": "0:34:34", "throughput": 7840.05, "total_tokens": 32914176}
|
|
{"current_steps": 10460, "total_steps": 15621, "loss": 0.4117, "lr": 5.947746471346065e-07, "epoch": 0.6696114205236541, "percentage": 66.96, "elapsed_time": "1:09:58", "remaining_time": "0:34:31", "throughput": 7842.76, "total_tokens": 32931136}
|
|
{"current_steps": 10465, "total_steps": 15621, "loss": 0.3321, "lr": 5.937533834246932e-07, "epoch": 0.669931502464631, "percentage": 66.99, "elapsed_time": "1:09:59", "remaining_time": "0:34:29", "throughput": 7845.39, "total_tokens": 32947648}
|
|
{"current_steps": 10470, "total_steps": 15621, "loss": 0.3795, "lr": 5.927326269184504e-07, "epoch": 0.6702515844056078, "percentage": 67.03, "elapsed_time": "1:10:00", "remaining_time": "0:34:26", "throughput": 7848.03, "total_tokens": 32964224}
|
|
{"current_steps": 10475, "total_steps": 15621, "loss": 0.4602, "lr": 5.917123788903049e-07, "epoch": 0.6705716663465847, "percentage": 67.06, "elapsed_time": "1:10:01", "remaining_time": "0:34:23", "throughput": 7850.91, "total_tokens": 32982080}
|
|
{"current_steps": 10480, "total_steps": 15621, "loss": 0.4674, "lr": 5.906926406140484e-07, "epoch": 0.6708917482875616, "percentage": 67.09, "elapsed_time": "1:10:01", "remaining_time": "0:34:21", "throughput": 7853.33, "total_tokens": 32997440}
|
|
{"current_steps": 10485, "total_steps": 15621, "loss": 0.424, "lr": 5.896734133628354e-07, "epoch": 0.6712118302285385, "percentage": 67.12, "elapsed_time": "1:10:02", "remaining_time": "0:34:18", "throughput": 7855.8, "total_tokens": 33013056}
|
|
{"current_steps": 10490, "total_steps": 15621, "loss": 0.3804, "lr": 5.886546984091838e-07, "epoch": 0.6715319121695154, "percentage": 67.15, "elapsed_time": "1:10:03", "remaining_time": "0:34:15", "throughput": 7858.21, "total_tokens": 33028416}
|
|
{"current_steps": 10495, "total_steps": 15621, "loss": 0.3567, "lr": 5.876364970249711e-07, "epoch": 0.6718519941104922, "percentage": 67.19, "elapsed_time": "1:10:03", "remaining_time": "0:34:13", "throughput": 7860.47, "total_tokens": 33042880}
|
|
{"current_steps": 10500, "total_steps": 15621, "loss": 0.2744, "lr": 5.866188104814336e-07, "epoch": 0.6721720760514692, "percentage": 67.22, "elapsed_time": "1:10:04", "remaining_time": "0:34:10", "throughput": 7862.88, "total_tokens": 33058240}
|
|
{"current_steps": 10505, "total_steps": 15621, "loss": 0.3833, "lr": 5.856016400491646e-07, "epoch": 0.6724921579924461, "percentage": 67.25, "elapsed_time": "1:10:05", "remaining_time": "0:34:07", "throughput": 7865.35, "total_tokens": 33073920}
|
|
{"current_steps": 10510, "total_steps": 15621, "loss": 0.3158, "lr": 5.845849869981136e-07, "epoch": 0.672812239933423, "percentage": 67.28, "elapsed_time": "1:10:05", "remaining_time": "0:34:05", "throughput": 7867.78, "total_tokens": 33089344}
|
|
{"current_steps": 10515, "total_steps": 15621, "loss": 0.3608, "lr": 5.835688525975842e-07, "epoch": 0.6731323218743999, "percentage": 67.31, "elapsed_time": "1:10:06", "remaining_time": "0:34:02", "throughput": 7870.13, "total_tokens": 33104384}
|
|
{"current_steps": 10520, "total_steps": 15621, "loss": 0.3926, "lr": 5.825532381162311e-07, "epoch": 0.6734524038153767, "percentage": 67.35, "elapsed_time": "1:10:07", "remaining_time": "0:33:59", "throughput": 7872.59, "total_tokens": 33120064}
|
|
{"current_steps": 10525, "total_steps": 15621, "loss": 0.3889, "lr": 5.815381448220619e-07, "epoch": 0.6737724857563536, "percentage": 67.38, "elapsed_time": "1:10:07", "remaining_time": "0:33:57", "throughput": 7875.12, "total_tokens": 33136128}
|
|
{"current_steps": 10530, "total_steps": 15621, "loss": 0.3599, "lr": 5.805235739824327e-07, "epoch": 0.6740925676973305, "percentage": 67.41, "elapsed_time": "1:10:08", "remaining_time": "0:33:54", "throughput": 7878.13, "total_tokens": 33154816}
|
|
{"current_steps": 10535, "total_steps": 15621, "loss": 0.5053, "lr": 5.795095268640458e-07, "epoch": 0.6744126496383074, "percentage": 67.44, "elapsed_time": "1:10:09", "remaining_time": "0:33:52", "throughput": 7880.49, "total_tokens": 33169920}
|
|
{"current_steps": 10540, "total_steps": 15621, "loss": 0.5436, "lr": 5.784960047329519e-07, "epoch": 0.6747327315792843, "percentage": 67.47, "elapsed_time": "1:10:09", "remaining_time": "0:33:49", "throughput": 7883.35, "total_tokens": 33187712}
|
|
{"current_steps": 10545, "total_steps": 15621, "loss": 0.3931, "lr": 5.774830088545452e-07, "epoch": 0.6750528135202611, "percentage": 67.51, "elapsed_time": "1:10:10", "remaining_time": "0:33:46", "throughput": 7885.72, "total_tokens": 33202880}
|
|
{"current_steps": 10550, "total_steps": 15621, "loss": 0.3059, "lr": 5.76470540493563e-07, "epoch": 0.6753728954612381, "percentage": 67.54, "elapsed_time": "1:10:11", "remaining_time": "0:33:44", "throughput": 7888.26, "total_tokens": 33218944}
|
|
{"current_steps": 10555, "total_steps": 15621, "loss": 0.4468, "lr": 5.754586009140836e-07, "epoch": 0.675692977402215, "percentage": 67.57, "elapsed_time": "1:10:11", "remaining_time": "0:33:41", "throughput": 7890.73, "total_tokens": 33234688}
|
|
{"current_steps": 10560, "total_steps": 15621, "loss": 0.3582, "lr": 5.744471913795256e-07, "epoch": 0.6760130593431919, "percentage": 67.6, "elapsed_time": "1:10:12", "remaining_time": "0:33:38", "throughput": 7893.12, "total_tokens": 33249920}
|
|
{"current_steps": 10565, "total_steps": 15621, "loss": 0.3455, "lr": 5.734363131526459e-07, "epoch": 0.6763331412841688, "percentage": 67.63, "elapsed_time": "1:10:13", "remaining_time": "0:33:36", "throughput": 7895.62, "total_tokens": 33265792}
|
|
{"current_steps": 10570, "total_steps": 15621, "loss": 0.3779, "lr": 5.724259674955377e-07, "epoch": 0.6766532232251457, "percentage": 67.67, "elapsed_time": "1:10:13", "remaining_time": "0:33:33", "throughput": 7897.98, "total_tokens": 33280832}
|
|
{"current_steps": 10575, "total_steps": 15621, "loss": 0.3829, "lr": 5.714161556696291e-07, "epoch": 0.6769733051661225, "percentage": 67.7, "elapsed_time": "1:10:14", "remaining_time": "0:33:31", "throughput": 7900.46, "total_tokens": 33296576}
|
|
{"current_steps": 10580, "total_steps": 15621, "loss": 0.3425, "lr": 5.704068789356824e-07, "epoch": 0.6772933871070994, "percentage": 67.73, "elapsed_time": "1:10:15", "remaining_time": "0:33:28", "throughput": 7903.71, "total_tokens": 33316672}
|
|
{"current_steps": 10585, "total_steps": 15621, "loss": 0.3569, "lr": 5.693981385537912e-07, "epoch": 0.6776134690480763, "percentage": 67.76, "elapsed_time": "1:10:15", "remaining_time": "0:33:25", "throughput": 7906.0, "total_tokens": 33331456}
|
|
{"current_steps": 10590, "total_steps": 15621, "loss": 0.3483, "lr": 5.683899357833801e-07, "epoch": 0.6779335509890532, "percentage": 67.79, "elapsed_time": "1:10:16", "remaining_time": "0:33:23", "throughput": 7908.39, "total_tokens": 33346752}
|
|
{"current_steps": 10595, "total_steps": 15621, "loss": 0.4486, "lr": 5.673822718832015e-07, "epoch": 0.67825363293003, "percentage": 67.83, "elapsed_time": "1:10:17", "remaining_time": "0:33:20", "throughput": 7910.88, "total_tokens": 33362688}
|
|
{"current_steps": 10600, "total_steps": 15621, "loss": 0.3732, "lr": 5.663751481113362e-07, "epoch": 0.6785737148710069, "percentage": 67.86, "elapsed_time": "1:10:17", "remaining_time": "0:33:17", "throughput": 7913.2, "total_tokens": 33377600}
|
|
{"current_steps": 10605, "total_steps": 15621, "loss": 0.4346, "lr": 5.653685657251896e-07, "epoch": 0.6788937968119839, "percentage": 67.89, "elapsed_time": "1:10:18", "remaining_time": "0:33:15", "throughput": 7915.64, "total_tokens": 33393280}
|
|
{"current_steps": 10610, "total_steps": 15621, "loss": 0.378, "lr": 5.643625259814922e-07, "epoch": 0.6792138787529608, "percentage": 67.92, "elapsed_time": "1:10:19", "remaining_time": "0:33:12", "throughput": 7918.3, "total_tokens": 33410112}
|
|
{"current_steps": 10615, "total_steps": 15621, "loss": 0.3557, "lr": 5.633570301362953e-07, "epoch": 0.6795339606939377, "percentage": 67.95, "elapsed_time": "1:10:20", "remaining_time": "0:33:10", "throughput": 7920.85, "total_tokens": 33426624}
|
|
{"current_steps": 10620, "total_steps": 15621, "loss": 0.3642, "lr": 5.623520794449739e-07, "epoch": 0.6798540426349146, "percentage": 67.99, "elapsed_time": "1:10:20", "remaining_time": "0:33:07", "throughput": 7923.3, "total_tokens": 33442240}
|
|
{"current_steps": 10625, "total_steps": 15621, "loss": 0.4764, "lr": 5.613476751622195e-07, "epoch": 0.6801741245758914, "percentage": 68.02, "elapsed_time": "1:10:21", "remaining_time": "0:33:04", "throughput": 7925.84, "total_tokens": 33458432}
|
|
{"current_steps": 10630, "total_steps": 15621, "loss": 0.4373, "lr": 5.603438185420426e-07, "epoch": 0.6804942065168683, "percentage": 68.05, "elapsed_time": "1:10:22", "remaining_time": "0:33:02", "throughput": 7928.26, "total_tokens": 33473856}
|
|
{"current_steps": 10635, "total_steps": 15621, "loss": 0.473, "lr": 5.593405108377714e-07, "epoch": 0.6808142884578452, "percentage": 68.08, "elapsed_time": "1:10:22", "remaining_time": "0:32:59", "throughput": 7930.65, "total_tokens": 33489216}
|
|
{"current_steps": 10640, "total_steps": 15621, "loss": 0.4676, "lr": 5.583377533020457e-07, "epoch": 0.6811343703988221, "percentage": 68.11, "elapsed_time": "1:10:23", "remaining_time": "0:32:57", "throughput": 7933.17, "total_tokens": 33505280}
|
|
{"current_steps": 10645, "total_steps": 15621, "loss": 0.2929, "lr": 5.573355471868201e-07, "epoch": 0.681454452339799, "percentage": 68.15, "elapsed_time": "1:10:24", "remaining_time": "0:32:54", "throughput": 7935.54, "total_tokens": 33520512}
|
|
{"current_steps": 10650, "total_steps": 15621, "loss": 0.3535, "lr": 5.563338937433621e-07, "epoch": 0.6817745342807758, "percentage": 68.18, "elapsed_time": "1:10:24", "remaining_time": "0:32:51", "throughput": 7938.19, "total_tokens": 33537344}
|
|
{"current_steps": 10655, "total_steps": 15621, "loss": 0.2518, "lr": 5.553327942222472e-07, "epoch": 0.6820946162217527, "percentage": 68.21, "elapsed_time": "1:10:25", "remaining_time": "0:32:49", "throughput": 7940.47, "total_tokens": 33552128}
|
|
{"current_steps": 10660, "total_steps": 15621, "loss": 0.3535, "lr": 5.54332249873359e-07, "epoch": 0.6824146981627297, "percentage": 68.24, "elapsed_time": "1:10:26", "remaining_time": "0:32:46", "throughput": 7942.74, "total_tokens": 33566784}
|
|
{"current_steps": 10665, "total_steps": 15621, "loss": 0.2955, "lr": 5.533322619458896e-07, "epoch": 0.6827347801037066, "percentage": 68.27, "elapsed_time": "1:10:26", "remaining_time": "0:32:44", "throughput": 7945.12, "total_tokens": 33582080}
|
|
{"current_steps": 10670, "total_steps": 15621, "loss": 0.4268, "lr": 5.52332831688336e-07, "epoch": 0.6830548620446835, "percentage": 68.31, "elapsed_time": "1:10:27", "remaining_time": "0:32:41", "throughput": 7947.4, "total_tokens": 33596864}
|
|
{"current_steps": 10675, "total_steps": 15621, "loss": 0.3527, "lr": 5.513339603484981e-07, "epoch": 0.6833749439856603, "percentage": 68.34, "elapsed_time": "1:10:28", "remaining_time": "0:32:38", "throughput": 7949.93, "total_tokens": 33613056}
|
|
{"current_steps": 10680, "total_steps": 15621, "loss": 0.4979, "lr": 5.503356491734785e-07, "epoch": 0.6836950259266372, "percentage": 68.37, "elapsed_time": "1:10:28", "remaining_time": "0:32:36", "throughput": 7952.26, "total_tokens": 33628160}
|
|
{"current_steps": 10685, "total_steps": 15621, "loss": 0.4457, "lr": 5.493378994096806e-07, "epoch": 0.6840151078676141, "percentage": 68.4, "elapsed_time": "1:10:29", "remaining_time": "0:32:33", "throughput": 7954.95, "total_tokens": 33645184}
|
|
{"current_steps": 10690, "total_steps": 15621, "loss": 0.39, "lr": 5.483407123028067e-07, "epoch": 0.684335189808591, "percentage": 68.43, "elapsed_time": "1:10:30", "remaining_time": "0:32:31", "throughput": 7957.36, "total_tokens": 33660800}
|
|
{"current_steps": 10695, "total_steps": 15621, "loss": 0.4734, "lr": 5.473440890978566e-07, "epoch": 0.6846552717495679, "percentage": 68.47, "elapsed_time": "1:10:30", "remaining_time": "0:32:28", "throughput": 7959.85, "total_tokens": 33676736}
|
|
{"current_steps": 10700, "total_steps": 15621, "loss": 0.4094, "lr": 5.463480310391261e-07, "epoch": 0.6849753536905447, "percentage": 68.5, "elapsed_time": "1:10:31", "remaining_time": "0:32:26", "throughput": 7962.37, "total_tokens": 33692928}
|
|
{"current_steps": 10705, "total_steps": 15621, "loss": 0.3824, "lr": 5.453525393702052e-07, "epoch": 0.6852954356315216, "percentage": 68.53, "elapsed_time": "1:10:32", "remaining_time": "0:32:23", "throughput": 7964.76, "total_tokens": 33708352}
|
|
{"current_steps": 10710, "total_steps": 15621, "loss": 0.3687, "lr": 5.443576153339771e-07, "epoch": 0.6856155175724986, "percentage": 68.56, "elapsed_time": "1:10:32", "remaining_time": "0:32:20", "throughput": 7967.18, "total_tokens": 33723968}
|
|
{"current_steps": 10715, "total_steps": 15621, "loss": 0.3238, "lr": 5.433632601726159e-07, "epoch": 0.6859355995134755, "percentage": 68.59, "elapsed_time": "1:10:33", "remaining_time": "0:32:18", "throughput": 7969.53, "total_tokens": 33739200}
|
|
{"current_steps": 10720, "total_steps": 15621, "loss": 0.3306, "lr": 5.42369475127586e-07, "epoch": 0.6862556814544524, "percentage": 68.63, "elapsed_time": "1:10:34", "remaining_time": "0:32:15", "throughput": 7971.97, "total_tokens": 33754944}
|
|
{"current_steps": 10725, "total_steps": 15621, "loss": 0.4715, "lr": 5.413762614396396e-07, "epoch": 0.6865757633954293, "percentage": 68.66, "elapsed_time": "1:10:34", "remaining_time": "0:32:13", "throughput": 7974.19, "total_tokens": 33769472}
|
|
{"current_steps": 10730, "total_steps": 15621, "loss": 0.4267, "lr": 5.403836203488157e-07, "epoch": 0.6868958453364061, "percentage": 68.69, "elapsed_time": "1:10:35", "remaining_time": "0:32:10", "throughput": 7976.57, "total_tokens": 33784896}
|
|
{"current_steps": 10735, "total_steps": 15621, "loss": 0.3686, "lr": 5.393915530944382e-07, "epoch": 0.687215927277383, "percentage": 68.72, "elapsed_time": "1:10:36", "remaining_time": "0:32:08", "throughput": 7978.94, "total_tokens": 33800320}
|
|
{"current_steps": 10740, "total_steps": 15621, "loss": 0.3743, "lr": 5.384000609151145e-07, "epoch": 0.6875360092183599, "percentage": 68.75, "elapsed_time": "1:10:36", "remaining_time": "0:32:05", "throughput": 7981.53, "total_tokens": 33816896}
|
|
{"current_steps": 10745, "total_steps": 15621, "loss": 0.3655, "lr": 5.374091450487353e-07, "epoch": 0.6878560911593368, "percentage": 68.79, "elapsed_time": "1:10:37", "remaining_time": "0:32:02", "throughput": 7984.1, "total_tokens": 33833344}
|
|
{"current_steps": 10750, "total_steps": 15621, "loss": 0.3346, "lr": 5.364188067324693e-07, "epoch": 0.6881761731003136, "percentage": 68.82, "elapsed_time": "1:10:38", "remaining_time": "0:32:00", "throughput": 7986.67, "total_tokens": 33849856}
|
|
{"current_steps": 10755, "total_steps": 15621, "loss": 0.3566, "lr": 5.354290472027659e-07, "epoch": 0.6884962550412905, "percentage": 68.85, "elapsed_time": "1:10:38", "remaining_time": "0:31:57", "throughput": 7989.07, "total_tokens": 33865344}
|
|
{"current_steps": 10760, "total_steps": 15621, "loss": 0.4921, "lr": 5.344398676953525e-07, "epoch": 0.6888163369822674, "percentage": 68.88, "elapsed_time": "1:10:39", "remaining_time": "0:31:55", "throughput": 7991.65, "total_tokens": 33881792}
|
|
{"current_steps": 10765, "total_steps": 15621, "loss": 0.4873, "lr": 5.334512694452303e-07, "epoch": 0.6891364189232444, "percentage": 68.91, "elapsed_time": "1:10:40", "remaining_time": "0:31:52", "throughput": 7994.22, "total_tokens": 33898368}
|
|
{"current_steps": 10770, "total_steps": 15621, "loss": 0.345, "lr": 5.324632536866755e-07, "epoch": 0.6894565008642213, "percentage": 68.95, "elapsed_time": "1:10:41", "remaining_time": "0:31:50", "throughput": 7996.7, "total_tokens": 33914368}
|
|
{"current_steps": 10775, "total_steps": 15621, "loss": 0.349, "lr": 5.314758216532386e-07, "epoch": 0.6897765828051982, "percentage": 68.98, "elapsed_time": "1:10:41", "remaining_time": "0:31:47", "throughput": 7999.07, "total_tokens": 33929728}
|
|
{"current_steps": 10780, "total_steps": 15621, "loss": 0.3866, "lr": 5.304889745777396e-07, "epoch": 0.690096664746175, "percentage": 69.01, "elapsed_time": "1:10:42", "remaining_time": "0:31:45", "throughput": 8001.35, "total_tokens": 33944704}
|
|
{"current_steps": 10785, "total_steps": 15621, "loss": 0.6472, "lr": 5.295027136922678e-07, "epoch": 0.6904167466871519, "percentage": 69.04, "elapsed_time": "1:10:43", "remaining_time": "0:31:42", "throughput": 8003.73, "total_tokens": 33960128}
|
|
{"current_steps": 10790, "total_steps": 15621, "loss": 0.4201, "lr": 5.285170402281827e-07, "epoch": 0.6907368286281288, "percentage": 69.07, "elapsed_time": "1:10:43", "remaining_time": "0:31:40", "throughput": 8006.02, "total_tokens": 33975104}
|
|
{"current_steps": 10795, "total_steps": 15621, "loss": 0.459, "lr": 5.275319554161087e-07, "epoch": 0.6910569105691057, "percentage": 69.11, "elapsed_time": "1:10:44", "remaining_time": "0:31:37", "throughput": 8008.43, "total_tokens": 33990720}
|
|
{"current_steps": 10800, "total_steps": 15621, "loss": 0.4207, "lr": 5.265474604859356e-07, "epoch": 0.6913769925100826, "percentage": 69.14, "elapsed_time": "1:10:45", "remaining_time": "0:31:34", "throughput": 8010.82, "total_tokens": 34006272}
|
|
{"current_steps": 10805, "total_steps": 15621, "loss": 0.3828, "lr": 5.255635566668171e-07, "epoch": 0.6916970744510594, "percentage": 69.17, "elapsed_time": "1:10:45", "remaining_time": "0:31:32", "throughput": 8013.32, "total_tokens": 34022400}
|
|
{"current_steps": 10810, "total_steps": 15621, "loss": 0.3811, "lr": 5.245802451871686e-07, "epoch": 0.6920171563920363, "percentage": 69.2, "elapsed_time": "1:10:46", "remaining_time": "0:31:29", "throughput": 8015.86, "total_tokens": 34038720}
|
|
{"current_steps": 10815, "total_steps": 15621, "loss": 0.4381, "lr": 5.235975272746663e-07, "epoch": 0.6923372383330133, "percentage": 69.23, "elapsed_time": "1:10:47", "remaining_time": "0:31:27", "throughput": 8018.18, "total_tokens": 34053760}
|
|
{"current_steps": 10820, "total_steps": 15621, "loss": 0.3033, "lr": 5.226154041562442e-07, "epoch": 0.6926573202739902, "percentage": 69.27, "elapsed_time": "1:10:47", "remaining_time": "0:31:24", "throughput": 8020.62, "total_tokens": 34069568}
|
|
{"current_steps": 10825, "total_steps": 15621, "loss": 0.4078, "lr": 5.216338770580953e-07, "epoch": 0.6929774022149671, "percentage": 69.3, "elapsed_time": "1:10:48", "remaining_time": "0:31:22", "throughput": 8023.34, "total_tokens": 34086912}
|
|
{"current_steps": 10830, "total_steps": 15621, "loss": 0.359, "lr": 5.206529472056678e-07, "epoch": 0.6932974841559439, "percentage": 69.33, "elapsed_time": "1:10:49", "remaining_time": "0:31:19", "throughput": 8025.6, "total_tokens": 34101696}
|
|
{"current_steps": 10835, "total_steps": 15621, "loss": 0.3084, "lr": 5.196726158236637e-07, "epoch": 0.6936175660969208, "percentage": 69.36, "elapsed_time": "1:10:49", "remaining_time": "0:31:17", "throughput": 8027.76, "total_tokens": 34115904}
|
|
{"current_steps": 10840, "total_steps": 15621, "loss": 0.3404, "lr": 5.186928841360384e-07, "epoch": 0.6939376480378977, "percentage": 69.39, "elapsed_time": "1:10:50", "remaining_time": "0:31:14", "throughput": 8030.14, "total_tokens": 34131328}
|
|
{"current_steps": 10845, "total_steps": 15621, "loss": 0.4466, "lr": 5.177137533659985e-07, "epoch": 0.6942577299788746, "percentage": 69.43, "elapsed_time": "1:10:51", "remaining_time": "0:31:12", "throughput": 8032.83, "total_tokens": 34148544}
|
|
{"current_steps": 10850, "total_steps": 15621, "loss": 0.4562, "lr": 5.167352247360002e-07, "epoch": 0.6945778119198515, "percentage": 69.46, "elapsed_time": "1:10:51", "remaining_time": "0:31:09", "throughput": 8035.13, "total_tokens": 34163520}
|
|
{"current_steps": 10855, "total_steps": 15621, "loss": 0.398, "lr": 5.157572994677479e-07, "epoch": 0.6948978938608283, "percentage": 69.49, "elapsed_time": "1:10:52", "remaining_time": "0:31:07", "throughput": 8037.41, "total_tokens": 34178368}
|
|
{"current_steps": 10860, "total_steps": 15621, "loss": 0.4086, "lr": 5.147799787821929e-07, "epoch": 0.6952179758018052, "percentage": 69.52, "elapsed_time": "1:10:53", "remaining_time": "0:31:04", "throughput": 8039.8, "total_tokens": 34193920}
|
|
{"current_steps": 10865, "total_steps": 15621, "loss": 0.4939, "lr": 5.138032638995315e-07, "epoch": 0.6955380577427821, "percentage": 69.55, "elapsed_time": "1:10:53", "remaining_time": "0:31:02", "throughput": 8042.33, "total_tokens": 34210176}
|
|
{"current_steps": 10870, "total_steps": 15621, "loss": 0.3602, "lr": 5.128271560392037e-07, "epoch": 0.6958581396837591, "percentage": 69.59, "elapsed_time": "1:10:54", "remaining_time": "0:30:59", "throughput": 8045.01, "total_tokens": 34227328}
|
|
{"current_steps": 10875, "total_steps": 15621, "loss": 0.3959, "lr": 5.118516564198916e-07, "epoch": 0.696178221624736, "percentage": 69.62, "elapsed_time": "1:10:55", "remaining_time": "0:30:56", "throughput": 8047.24, "total_tokens": 34241984}
|
|
{"current_steps": 10880, "total_steps": 15621, "loss": 0.3339, "lr": 5.108767662595175e-07, "epoch": 0.6964983035657129, "percentage": 69.65, "elapsed_time": "1:10:55", "remaining_time": "0:30:54", "throughput": 8049.51, "total_tokens": 34256896}
|
|
{"current_steps": 10885, "total_steps": 15621, "loss": 0.3904, "lr": 5.099024867752446e-07, "epoch": 0.6968183855066897, "percentage": 69.68, "elapsed_time": "1:10:56", "remaining_time": "0:30:51", "throughput": 8052.12, "total_tokens": 34273792}
|
|
{"current_steps": 10890, "total_steps": 15621, "loss": 0.3381, "lr": 5.089288191834709e-07, "epoch": 0.6971384674476666, "percentage": 69.71, "elapsed_time": "1:10:57", "remaining_time": "0:30:49", "throughput": 8054.75, "total_tokens": 34290752}
|
|
{"current_steps": 10895, "total_steps": 15621, "loss": 0.3422, "lr": 5.079557646998318e-07, "epoch": 0.6974585493886435, "percentage": 69.75, "elapsed_time": "1:10:57", "remaining_time": "0:30:46", "throughput": 8057.52, "total_tokens": 34308416}
|
|
{"current_steps": 10900, "total_steps": 15621, "loss": 0.3981, "lr": 5.069833245391981e-07, "epoch": 0.6977786313296204, "percentage": 69.78, "elapsed_time": "1:10:58", "remaining_time": "0:30:44", "throughput": 8059.87, "total_tokens": 34323776}
|
|
{"current_steps": 10905, "total_steps": 15621, "loss": 0.2941, "lr": 5.060114999156728e-07, "epoch": 0.6980987132705972, "percentage": 69.81, "elapsed_time": "1:10:59", "remaining_time": "0:30:41", "throughput": 8062.18, "total_tokens": 34338944}
|
|
{"current_steps": 10910, "total_steps": 15621, "loss": 0.3407, "lr": 5.050402920425895e-07, "epoch": 0.6984187952115741, "percentage": 69.84, "elapsed_time": "1:10:59", "remaining_time": "0:30:39", "throughput": 8064.56, "total_tokens": 34354432}
|
|
{"current_steps": 10915, "total_steps": 15621, "loss": 0.2503, "lr": 5.040697021325128e-07, "epoch": 0.698738877152551, "percentage": 69.87, "elapsed_time": "1:11:00", "remaining_time": "0:30:36", "throughput": 8067.0, "total_tokens": 34370432}
|
|
{"current_steps": 10920, "total_steps": 15621, "loss": 0.4438, "lr": 5.030997313972361e-07, "epoch": 0.699058959093528, "percentage": 69.91, "elapsed_time": "1:11:01", "remaining_time": "0:30:34", "throughput": 8069.47, "total_tokens": 34386496}
|
|
{"current_steps": 10925, "total_steps": 15621, "loss": 0.3692, "lr": 5.021303810477795e-07, "epoch": 0.6993790410345049, "percentage": 69.94, "elapsed_time": "1:11:01", "remaining_time": "0:30:31", "throughput": 8071.95, "total_tokens": 34402560}
|
|
{"current_steps": 10930, "total_steps": 15621, "loss": 0.2937, "lr": 5.011616522943869e-07, "epoch": 0.6996991229754818, "percentage": 69.97, "elapsed_time": "1:11:02", "remaining_time": "0:30:29", "throughput": 8074.4, "total_tokens": 34418496}
|
|
{"current_steps": 10935, "total_steps": 15621, "loss": 0.2772, "lr": 5.001935463465289e-07, "epoch": 0.7000192049164586, "percentage": 70.0, "elapsed_time": "1:11:03", "remaining_time": "0:30:26", "throughput": 8076.89, "total_tokens": 34434752}
|
|
{"current_steps": 10940, "total_steps": 15621, "loss": 0.3775, "lr": 4.99226064412897e-07, "epoch": 0.7003392868574355, "percentage": 70.03, "elapsed_time": "1:11:04", "remaining_time": "0:30:24", "throughput": 8079.25, "total_tokens": 34450176}
|
|
{"current_steps": 10945, "total_steps": 15621, "loss": 0.4286, "lr": 4.982592077014026e-07, "epoch": 0.7006593687984124, "percentage": 70.07, "elapsed_time": "1:11:04", "remaining_time": "0:30:21", "throughput": 8081.62, "total_tokens": 34465600}
|
|
{"current_steps": 10948, "total_steps": 15621, "eval_loss": 0.3744131922721863, "epoch": 0.7008514179629985, "percentage": 70.09, "elapsed_time": "1:11:55", "remaining_time": "0:30:42", "throughput": 7988.2, "total_tokens": 34475136}
|
|
{"current_steps": 10950, "total_steps": 15621, "loss": 0.2973, "lr": 4.97292977419179e-07, "epoch": 0.7009794507393893, "percentage": 70.1, "elapsed_time": "1:13:55", "remaining_time": "0:31:32", "throughput": 7773.4, "total_tokens": 34481600}
|
|
{"current_steps": 10955, "total_steps": 15621, "loss": 0.2881, "lr": 4.963273747725755e-07, "epoch": 0.7012995326803662, "percentage": 70.13, "elapsed_time": "1:13:56", "remaining_time": "0:31:29", "throughput": 7776.02, "total_tokens": 34498752}
|
|
{"current_steps": 10960, "total_steps": 15621, "loss": 0.413, "lr": 4.953624009671582e-07, "epoch": 0.701619614621343, "percentage": 70.16, "elapsed_time": "1:13:57", "remaining_time": "0:31:27", "throughput": 7778.35, "total_tokens": 34514240}
|
|
{"current_steps": 10965, "total_steps": 15621, "loss": 0.4164, "lr": 4.943980572077086e-07, "epoch": 0.7019396965623199, "percentage": 70.19, "elapsed_time": "1:13:57", "remaining_time": "0:31:24", "throughput": 7780.5, "total_tokens": 34528704}
|
|
{"current_steps": 10970, "total_steps": 15621, "loss": 0.3207, "lr": 4.934343446982209e-07, "epoch": 0.7022597785032968, "percentage": 70.23, "elapsed_time": "1:13:58", "remaining_time": "0:31:21", "throughput": 7782.91, "total_tokens": 34544704}
|
|
{"current_steps": 10975, "total_steps": 15621, "loss": 0.3836, "lr": 4.924712646419016e-07, "epoch": 0.7025798604442738, "percentage": 70.26, "elapsed_time": "1:13:59", "remaining_time": "0:31:19", "throughput": 7785.19, "total_tokens": 34560000}
|
|
{"current_steps": 10980, "total_steps": 15621, "loss": 0.3222, "lr": 4.915088182411674e-07, "epoch": 0.7028999423852507, "percentage": 70.29, "elapsed_time": "1:13:59", "remaining_time": "0:31:16", "throughput": 7787.49, "total_tokens": 34575296}
|
|
{"current_steps": 10985, "total_steps": 15621, "loss": 0.3897, "lr": 4.905470066976439e-07, "epoch": 0.7032200243262275, "percentage": 70.32, "elapsed_time": "1:14:00", "remaining_time": "0:31:14", "throughput": 7789.75, "total_tokens": 34590528}
|
|
{"current_steps": 10990, "total_steps": 15621, "loss": 0.4156, "lr": 4.895858312121644e-07, "epoch": 0.7035401062672044, "percentage": 70.35, "elapsed_time": "1:14:01", "remaining_time": "0:31:11", "throughput": 7791.96, "total_tokens": 34605312}
|
|
{"current_steps": 10995, "total_steps": 15621, "loss": 0.4342, "lr": 4.886252929847674e-07, "epoch": 0.7038601882081813, "percentage": 70.39, "elapsed_time": "1:14:01", "remaining_time": "0:31:08", "throughput": 7794.27, "total_tokens": 34620736}
|
|
{"current_steps": 11000, "total_steps": 15621, "loss": 0.4627, "lr": 4.876653932146963e-07, "epoch": 0.7041802701491582, "percentage": 70.42, "elapsed_time": "1:14:02", "remaining_time": "0:31:06", "throughput": 7796.67, "total_tokens": 34636736}
|
|
{"current_steps": 11005, "total_steps": 15621, "loss": 0.3895, "lr": 4.86706133100397e-07, "epoch": 0.7045003520901351, "percentage": 70.45, "elapsed_time": "1:14:03", "remaining_time": "0:31:03", "throughput": 7798.91, "total_tokens": 34651776}
|
|
{"current_steps": 11010, "total_steps": 15621, "loss": 0.2889, "lr": 4.857475138395178e-07, "epoch": 0.7048204340311119, "percentage": 70.48, "elapsed_time": "1:14:03", "remaining_time": "0:31:01", "throughput": 7801.04, "total_tokens": 34666176}
|
|
{"current_steps": 11015, "total_steps": 15621, "loss": 0.2493, "lr": 4.847895366289054e-07, "epoch": 0.7051405159720888, "percentage": 70.51, "elapsed_time": "1:14:04", "remaining_time": "0:30:58", "throughput": 7803.44, "total_tokens": 34682112}
|
|
{"current_steps": 11020, "total_steps": 15621, "loss": 0.3825, "lr": 4.838322026646057e-07, "epoch": 0.7054605979130657, "percentage": 70.55, "elapsed_time": "1:14:05", "remaining_time": "0:30:55", "throughput": 7805.66, "total_tokens": 34697024}
|
|
{"current_steps": 11025, "total_steps": 15621, "loss": 0.371, "lr": 4.82875513141861e-07, "epoch": 0.7057806798540426, "percentage": 70.58, "elapsed_time": "1:14:05", "remaining_time": "0:30:53", "throughput": 7808.0, "total_tokens": 34712704}
|
|
{"current_steps": 11030, "total_steps": 15621, "loss": 0.375, "lr": 4.819194692551106e-07, "epoch": 0.7061007617950196, "percentage": 70.61, "elapsed_time": "1:14:06", "remaining_time": "0:30:50", "throughput": 7810.32, "total_tokens": 34728256}
|
|
{"current_steps": 11035, "total_steps": 15621, "loss": 0.435, "lr": 4.809640721979855e-07, "epoch": 0.7064208437359965, "percentage": 70.64, "elapsed_time": "1:14:07", "remaining_time": "0:30:48", "throughput": 7812.76, "total_tokens": 34744512}
|
|
{"current_steps": 11040, "total_steps": 15621, "loss": 0.4181, "lr": 4.8000932316331e-07, "epoch": 0.7067409256769733, "percentage": 70.67, "elapsed_time": "1:14:07", "remaining_time": "0:30:45", "throughput": 7814.89, "total_tokens": 34758912}
|
|
{"current_steps": 11045, "total_steps": 15621, "loss": 0.3914, "lr": 4.790552233431002e-07, "epoch": 0.7070610076179502, "percentage": 70.71, "elapsed_time": "1:14:08", "remaining_time": "0:30:43", "throughput": 7817.29, "total_tokens": 34774848}
|
|
{"current_steps": 11050, "total_steps": 15621, "loss": 0.416, "lr": 4.781017739285611e-07, "epoch": 0.7073810895589271, "percentage": 70.74, "elapsed_time": "1:14:09", "remaining_time": "0:30:40", "throughput": 7819.54, "total_tokens": 34790016}
|
|
{"current_steps": 11055, "total_steps": 15621, "loss": 0.3528, "lr": 4.771489761100842e-07, "epoch": 0.707701171499904, "percentage": 70.77, "elapsed_time": "1:14:09", "remaining_time": "0:30:37", "throughput": 7821.77, "total_tokens": 34804992}
|
|
{"current_steps": 11060, "total_steps": 15621, "loss": 0.2746, "lr": 4.761968310772501e-07, "epoch": 0.7080212534408808, "percentage": 70.8, "elapsed_time": "1:14:10", "remaining_time": "0:30:35", "throughput": 7824.04, "total_tokens": 34820288}
|
|
{"current_steps": 11065, "total_steps": 15621, "loss": 0.2814, "lr": 4.7524534001882267e-07, "epoch": 0.7083413353818577, "percentage": 70.83, "elapsed_time": "1:14:11", "remaining_time": "0:30:32", "throughput": 7826.41, "total_tokens": 34836096}
|
|
{"current_steps": 11070, "total_steps": 15621, "loss": 0.3875, "lr": 4.7429450412274897e-07, "epoch": 0.7086614173228346, "percentage": 70.87, "elapsed_time": "1:14:11", "remaining_time": "0:30:30", "throughput": 7828.72, "total_tokens": 34851584}
|
|
{"current_steps": 11075, "total_steps": 15621, "loss": 0.3542, "lr": 4.733443245761596e-07, "epoch": 0.7089814992638115, "percentage": 70.9, "elapsed_time": "1:14:12", "remaining_time": "0:30:27", "throughput": 7831.19, "total_tokens": 34868032}
|
|
{"current_steps": 11080, "total_steps": 15621, "loss": 0.3826, "lr": 4.723948025653646e-07, "epoch": 0.7093015812047885, "percentage": 70.93, "elapsed_time": "1:14:13", "remaining_time": "0:30:25", "throughput": 7833.58, "total_tokens": 34884032}
|
|
{"current_steps": 11085, "total_steps": 15621, "loss": 0.3252, "lr": 4.714459392758534e-07, "epoch": 0.7096216631457654, "percentage": 70.96, "elapsed_time": "1:14:13", "remaining_time": "0:30:22", "throughput": 7835.87, "total_tokens": 34899456}
|
|
{"current_steps": 11090, "total_steps": 15621, "loss": 0.3772, "lr": 4.70497735892293e-07, "epoch": 0.7099417450867422, "percentage": 70.99, "elapsed_time": "1:14:14", "remaining_time": "0:30:19", "throughput": 7838.27, "total_tokens": 34915456}
|
|
{"current_steps": 11095, "total_steps": 15621, "loss": 0.3408, "lr": 4.695501935985263e-07, "epoch": 0.7102618270277191, "percentage": 71.03, "elapsed_time": "1:14:15", "remaining_time": "0:30:17", "throughput": 7840.64, "total_tokens": 34931328}
|
|
{"current_steps": 11100, "total_steps": 15621, "loss": 0.4064, "lr": 4.686033135775711e-07, "epoch": 0.710581908968696, "percentage": 71.06, "elapsed_time": "1:14:15", "remaining_time": "0:30:14", "throughput": 7842.92, "total_tokens": 34946816}
|
|
{"current_steps": 11105, "total_steps": 15621, "loss": 0.3274, "lr": 4.6765709701161817e-07, "epoch": 0.7109019909096729, "percentage": 71.09, "elapsed_time": "1:14:16", "remaining_time": "0:30:12", "throughput": 7845.61, "total_tokens": 34964544}
|
|
{"current_steps": 11110, "total_steps": 15621, "loss": 0.3861, "lr": 4.6671154508203003e-07, "epoch": 0.7112220728506498, "percentage": 71.12, "elapsed_time": "1:14:17", "remaining_time": "0:30:09", "throughput": 7848.28, "total_tokens": 34982208}
|
|
{"current_steps": 11115, "total_steps": 15621, "loss": 0.3523, "lr": 4.657666589693393e-07, "epoch": 0.7115421547916266, "percentage": 71.15, "elapsed_time": "1:14:18", "remaining_time": "0:30:07", "throughput": 7851.07, "total_tokens": 35000576}
|
|
{"current_steps": 11120, "total_steps": 15621, "loss": 0.3167, "lr": 4.6482243985324753e-07, "epoch": 0.7118622367326035, "percentage": 71.19, "elapsed_time": "1:14:18", "remaining_time": "0:30:04", "throughput": 7853.17, "total_tokens": 35014912}
|
|
{"current_steps": 11125, "total_steps": 15621, "loss": 0.2867, "lr": 4.638788889126232e-07, "epoch": 0.7121823186735804, "percentage": 71.22, "elapsed_time": "1:14:19", "remaining_time": "0:30:02", "throughput": 7855.33, "total_tokens": 35029632}
|
|
{"current_steps": 11130, "total_steps": 15621, "loss": 0.3423, "lr": 4.6293600732550085e-07, "epoch": 0.7125024006145573, "percentage": 71.25, "elapsed_time": "1:14:20", "remaining_time": "0:29:59", "throughput": 7857.61, "total_tokens": 35044992}
|
|
{"current_steps": 11135, "total_steps": 15621, "loss": 0.4721, "lr": 4.619937962690792e-07, "epoch": 0.7128224825555343, "percentage": 71.28, "elapsed_time": "1:14:20", "remaining_time": "0:29:57", "throughput": 7859.91, "total_tokens": 35060544}
|
|
{"current_steps": 11140, "total_steps": 15621, "loss": 0.5205, "lr": 4.610522569197197e-07, "epoch": 0.7131425644965111, "percentage": 71.31, "elapsed_time": "1:14:21", "remaining_time": "0:29:54", "throughput": 7862.13, "total_tokens": 35075648}
|
|
{"current_steps": 11145, "total_steps": 15621, "loss": 0.3271, "lr": 4.6011139045294554e-07, "epoch": 0.713462646437488, "percentage": 71.35, "elapsed_time": "1:14:22", "remaining_time": "0:29:52", "throughput": 7864.38, "total_tokens": 35090880}
|
|
{"current_steps": 11150, "total_steps": 15621, "loss": 0.3935, "lr": 4.59171198043439e-07, "epoch": 0.7137827283784649, "percentage": 71.38, "elapsed_time": "1:14:22", "remaining_time": "0:29:49", "throughput": 7866.68, "total_tokens": 35106432}
|
|
{"current_steps": 11155, "total_steps": 15621, "loss": 0.4446, "lr": 4.582316808650424e-07, "epoch": 0.7141028103194418, "percentage": 71.41, "elapsed_time": "1:14:23", "remaining_time": "0:29:46", "throughput": 7868.93, "total_tokens": 35121664}
|
|
{"current_steps": 11160, "total_steps": 15621, "loss": 0.4704, "lr": 4.572928400907529e-07, "epoch": 0.7144228922604187, "percentage": 71.44, "elapsed_time": "1:14:24", "remaining_time": "0:29:44", "throughput": 7871.21, "total_tokens": 35137152}
|
|
{"current_steps": 11165, "total_steps": 15621, "loss": 0.3787, "lr": 4.5635467689272434e-07, "epoch": 0.7147429742013955, "percentage": 71.47, "elapsed_time": "1:14:24", "remaining_time": "0:29:41", "throughput": 7873.58, "total_tokens": 35153088}
|
|
{"current_steps": 11170, "total_steps": 15621, "loss": 0.3674, "lr": 4.554171924422655e-07, "epoch": 0.7150630561423724, "percentage": 71.51, "elapsed_time": "1:14:25", "remaining_time": "0:29:39", "throughput": 7875.8, "total_tokens": 35168192}
|
|
{"current_steps": 11175, "total_steps": 15621, "loss": 0.3288, "lr": 4.544803879098356e-07, "epoch": 0.7153831380833493, "percentage": 71.54, "elapsed_time": "1:14:26", "remaining_time": "0:29:36", "throughput": 7878.17, "total_tokens": 35184192}
|
|
{"current_steps": 11180, "total_steps": 15621, "loss": 0.3703, "lr": 4.535442644650462e-07, "epoch": 0.7157032200243262, "percentage": 71.57, "elapsed_time": "1:14:26", "remaining_time": "0:29:34", "throughput": 7880.55, "total_tokens": 35200256}
|
|
{"current_steps": 11185, "total_steps": 15621, "loss": 0.4906, "lr": 4.5260882327665906e-07, "epoch": 0.7160233019653032, "percentage": 71.6, "elapsed_time": "1:14:27", "remaining_time": "0:29:31", "throughput": 7882.67, "total_tokens": 35214720}
|
|
{"current_steps": 11190, "total_steps": 15621, "loss": 0.5148, "lr": 4.5167406551258347e-07, "epoch": 0.71634338390628, "percentage": 71.63, "elapsed_time": "1:14:28", "remaining_time": "0:29:29", "throughput": 7885.02, "total_tokens": 35230720}
|
|
{"current_steps": 11195, "total_steps": 15621, "loss": 0.3863, "lr": 4.5073999233987445e-07, "epoch": 0.7166634658472569, "percentage": 71.67, "elapsed_time": "1:14:28", "remaining_time": "0:29:26", "throughput": 7887.34, "total_tokens": 35246400}
|
|
{"current_steps": 11200, "total_steps": 15621, "loss": 0.47, "lr": 4.4980660492473434e-07, "epoch": 0.7169835477882338, "percentage": 71.7, "elapsed_time": "1:14:29", "remaining_time": "0:29:24", "throughput": 7889.78, "total_tokens": 35262784}
|
|
{"current_steps": 11205, "total_steps": 15621, "loss": 0.2775, "lr": 4.4887390443250804e-07, "epoch": 0.7173036297292107, "percentage": 71.73, "elapsed_time": "1:14:30", "remaining_time": "0:29:21", "throughput": 7891.96, "total_tokens": 35277632}
|
|
{"current_steps": 11210, "total_steps": 15621, "loss": 0.2913, "lr": 4.4794189202768295e-07, "epoch": 0.7176237116701876, "percentage": 71.76, "elapsed_time": "1:14:30", "remaining_time": "0:29:19", "throughput": 7894.15, "total_tokens": 35292544}
|
|
{"current_steps": 11215, "total_steps": 15621, "loss": 0.368, "lr": 4.4701056887388757e-07, "epoch": 0.7179437936111644, "percentage": 71.79, "elapsed_time": "1:14:31", "remaining_time": "0:29:16", "throughput": 7896.5, "total_tokens": 35308352}
|
|
{"current_steps": 11220, "total_steps": 15621, "loss": 0.3343, "lr": 4.460799361338897e-07, "epoch": 0.7182638755521413, "percentage": 71.83, "elapsed_time": "1:14:32", "remaining_time": "0:29:14", "throughput": 7898.79, "total_tokens": 35323904}
|
|
{"current_steps": 11225, "total_steps": 15621, "loss": 0.4156, "lr": 4.451499949695954e-07, "epoch": 0.7185839574931182, "percentage": 71.86, "elapsed_time": "1:14:32", "remaining_time": "0:29:11", "throughput": 7901.21, "total_tokens": 35340224}
|
|
{"current_steps": 11230, "total_steps": 15621, "loss": 0.375, "lr": 4.44220746542047e-07, "epoch": 0.7189040394340951, "percentage": 71.89, "elapsed_time": "1:14:33", "remaining_time": "0:29:09", "throughput": 7903.5, "total_tokens": 35355776}
|
|
{"current_steps": 11235, "total_steps": 15621, "loss": 0.4772, "lr": 4.432921920114221e-07, "epoch": 0.719224121375072, "percentage": 71.92, "elapsed_time": "1:14:34", "remaining_time": "0:29:06", "throughput": 7905.75, "total_tokens": 35371072}
|
|
{"current_steps": 11240, "total_steps": 15621, "loss": 0.3169, "lr": 4.4236433253703185e-07, "epoch": 0.719544203316049, "percentage": 71.95, "elapsed_time": "1:14:34", "remaining_time": "0:29:04", "throughput": 7908.18, "total_tokens": 35387520}
|
|
{"current_steps": 11245, "total_steps": 15621, "loss": 0.3928, "lr": 4.4143716927732e-07, "epoch": 0.7198642852570258, "percentage": 71.99, "elapsed_time": "1:14:35", "remaining_time": "0:29:01", "throughput": 7910.61, "total_tokens": 35403840}
|
|
{"current_steps": 11250, "total_steps": 15621, "loss": 0.3873, "lr": 4.405107033898604e-07, "epoch": 0.7201843671980027, "percentage": 72.02, "elapsed_time": "1:14:36", "remaining_time": "0:28:59", "throughput": 7913.0, "total_tokens": 35420032}
|
|
{"current_steps": 11255, "total_steps": 15621, "loss": 0.2845, "lr": 4.395849360313568e-07, "epoch": 0.7205044491389796, "percentage": 72.05, "elapsed_time": "1:14:36", "remaining_time": "0:28:56", "throughput": 7915.37, "total_tokens": 35436032}
|
|
{"current_steps": 11260, "total_steps": 15621, "loss": 0.3583, "lr": 4.386598683576406e-07, "epoch": 0.7208245310799565, "percentage": 72.08, "elapsed_time": "1:14:37", "remaining_time": "0:28:54", "throughput": 7917.57, "total_tokens": 35451136}
|
|
{"current_steps": 11265, "total_steps": 15621, "loss": 0.4711, "lr": 4.377355015236696e-07, "epoch": 0.7211446130209334, "percentage": 72.11, "elapsed_time": "1:14:38", "remaining_time": "0:28:51", "throughput": 7919.87, "total_tokens": 35466816}
|
|
{"current_steps": 11270, "total_steps": 15621, "loss": 0.3555, "lr": 4.368118366835266e-07, "epoch": 0.7214646949619102, "percentage": 72.15, "elapsed_time": "1:14:38", "remaining_time": "0:28:49", "throughput": 7922.35, "total_tokens": 35483456}
|
|
{"current_steps": 11275, "total_steps": 15621, "loss": 0.4612, "lr": 4.358888749904177e-07, "epoch": 0.7217847769028871, "percentage": 72.18, "elapsed_time": "1:14:39", "remaining_time": "0:28:46", "throughput": 7924.74, "total_tokens": 35499584}
|
|
{"current_steps": 11280, "total_steps": 15621, "loss": 0.3546, "lr": 4.349666175966725e-07, "epoch": 0.722104858843864, "percentage": 72.21, "elapsed_time": "1:14:40", "remaining_time": "0:28:44", "throughput": 7927.05, "total_tokens": 35515328}
|
|
{"current_steps": 11285, "total_steps": 15621, "loss": 0.4744, "lr": 4.340450656537392e-07, "epoch": 0.7224249407848409, "percentage": 72.24, "elapsed_time": "1:14:40", "remaining_time": "0:28:41", "throughput": 7929.18, "total_tokens": 35530048}
|
|
{"current_steps": 11290, "total_steps": 15621, "loss": 0.2965, "lr": 4.331242203121861e-07, "epoch": 0.7227450227258178, "percentage": 72.27, "elapsed_time": "1:14:41", "remaining_time": "0:28:39", "throughput": 7931.51, "total_tokens": 35545792}
|
|
{"current_steps": 11295, "total_steps": 15621, "loss": 0.3871, "lr": 4.322040827217004e-07, "epoch": 0.7230651046667947, "percentage": 72.31, "elapsed_time": "1:14:42", "remaining_time": "0:28:36", "throughput": 7933.8, "total_tokens": 35561344}
|
|
{"current_steps": 11300, "total_steps": 15621, "loss": 0.405, "lr": 4.312846540310838e-07, "epoch": 0.7233851866077716, "percentage": 72.34, "elapsed_time": "1:14:42", "remaining_time": "0:28:34", "throughput": 7936.11, "total_tokens": 35577024}
|
|
{"current_steps": 11305, "total_steps": 15621, "loss": 0.3728, "lr": 4.3036593538825373e-07, "epoch": 0.7237052685487485, "percentage": 72.37, "elapsed_time": "1:14:43", "remaining_time": "0:28:31", "throughput": 7938.32, "total_tokens": 35592192}
|
|
{"current_steps": 11310, "total_steps": 15621, "loss": 0.3287, "lr": 4.2944792794024196e-07, "epoch": 0.7240253504897254, "percentage": 72.4, "elapsed_time": "1:14:44", "remaining_time": "0:28:29", "throughput": 7940.64, "total_tokens": 35607872}
|
|
{"current_steps": 11315, "total_steps": 15621, "loss": 0.3117, "lr": 4.285306328331915e-07, "epoch": 0.7243454324307023, "percentage": 72.43, "elapsed_time": "1:14:44", "remaining_time": "0:28:26", "throughput": 7943.0, "total_tokens": 35623872}
|
|
{"current_steps": 11320, "total_steps": 15621, "loss": 0.3067, "lr": 4.2761405121235506e-07, "epoch": 0.7246655143716791, "percentage": 72.47, "elapsed_time": "1:14:45", "remaining_time": "0:28:24", "throughput": 7945.15, "total_tokens": 35638720}
|
|
{"current_steps": 11325, "total_steps": 15621, "loss": 0.5403, "lr": 4.266981842220965e-07, "epoch": 0.724985596312656, "percentage": 72.5, "elapsed_time": "1:14:46", "remaining_time": "0:28:21", "throughput": 7947.68, "total_tokens": 35655680}
|
|
{"current_steps": 11330, "total_steps": 15621, "loss": 0.2708, "lr": 4.257830330058864e-07, "epoch": 0.7253056782536329, "percentage": 72.53, "elapsed_time": "1:14:46", "remaining_time": "0:28:19", "throughput": 7949.96, "total_tokens": 35671168}
|
|
{"current_steps": 11335, "total_steps": 15621, "loss": 0.4088, "lr": 4.248685987063019e-07, "epoch": 0.7256257601946098, "percentage": 72.56, "elapsed_time": "1:14:47", "remaining_time": "0:28:16", "throughput": 7952.26, "total_tokens": 35686848}
|
|
{"current_steps": 11340, "total_steps": 15621, "loss": 0.3478, "lr": 4.2395488246502396e-07, "epoch": 0.7259458421355867, "percentage": 72.59, "elapsed_time": "1:14:48", "remaining_time": "0:28:14", "throughput": 7954.61, "total_tokens": 35702720}
|
|
{"current_steps": 11345, "total_steps": 15621, "loss": 0.4566, "lr": 4.2304188542283913e-07, "epoch": 0.7262659240765637, "percentage": 72.63, "elapsed_time": "1:14:49", "remaining_time": "0:28:11", "throughput": 7957.3, "total_tokens": 35720640}
|
|
{"current_steps": 11350, "total_steps": 15621, "loss": 0.3923, "lr": 4.221296087196347e-07, "epoch": 0.7265860060175405, "percentage": 72.66, "elapsed_time": "1:14:49", "remaining_time": "0:28:09", "throughput": 7959.44, "total_tokens": 35735424}
|
|
{"current_steps": 11355, "total_steps": 15621, "loss": 0.4596, "lr": 4.2121805349439867e-07, "epoch": 0.7269060879585174, "percentage": 72.69, "elapsed_time": "1:14:50", "remaining_time": "0:28:06", "throughput": 7961.76, "total_tokens": 35751168}
|
|
{"current_steps": 11360, "total_steps": 15621, "loss": 0.3787, "lr": 4.203072208852184e-07, "epoch": 0.7272261698994943, "percentage": 72.72, "elapsed_time": "1:14:51", "remaining_time": "0:28:04", "throughput": 7964.1, "total_tokens": 35767168}
|
|
{"current_steps": 11365, "total_steps": 15621, "loss": 0.439, "lr": 4.193971120292793e-07, "epoch": 0.7275462518404712, "percentage": 72.75, "elapsed_time": "1:14:51", "remaining_time": "0:28:02", "throughput": 7966.33, "total_tokens": 35782464}
|
|
{"current_steps": 11370, "total_steps": 15621, "loss": 0.406, "lr": 4.184877280628629e-07, "epoch": 0.727866333781448, "percentage": 72.79, "elapsed_time": "1:14:52", "remaining_time": "0:27:59", "throughput": 7968.69, "total_tokens": 35798592}
|
|
{"current_steps": 11375, "total_steps": 15621, "loss": 0.396, "lr": 4.1757907012134565e-07, "epoch": 0.7281864157224249, "percentage": 72.82, "elapsed_time": "1:14:53", "remaining_time": "0:27:57", "throughput": 7971.07, "total_tokens": 35814720}
|
|
{"current_steps": 11380, "total_steps": 15621, "loss": 0.2826, "lr": 4.166711393391978e-07, "epoch": 0.7285064976634018, "percentage": 72.85, "elapsed_time": "1:14:53", "remaining_time": "0:27:54", "throughput": 7973.3, "total_tokens": 35830016}
|
|
{"current_steps": 11385, "total_steps": 15621, "loss": 0.345, "lr": 4.1576393684998146e-07, "epoch": 0.7288265796043787, "percentage": 72.88, "elapsed_time": "1:14:54", "remaining_time": "0:27:52", "throughput": 7975.58, "total_tokens": 35845632}
|
|
{"current_steps": 11390, "total_steps": 15621, "loss": 0.3556, "lr": 4.1485746378634966e-07, "epoch": 0.7291466615453556, "percentage": 72.91, "elapsed_time": "1:14:55", "remaining_time": "0:27:49", "throughput": 7977.86, "total_tokens": 35861184}
|
|
{"current_steps": 11395, "total_steps": 15621, "loss": 0.4311, "lr": 4.1395172128004473e-07, "epoch": 0.7294667434863324, "percentage": 72.95, "elapsed_time": "1:14:55", "remaining_time": "0:27:47", "throughput": 7980.17, "total_tokens": 35876864}
|
|
{"current_steps": 11400, "total_steps": 15621, "loss": 0.3318, "lr": 4.130467104618963e-07, "epoch": 0.7297868254273094, "percentage": 72.98, "elapsed_time": "1:14:56", "remaining_time": "0:27:44", "throughput": 7982.65, "total_tokens": 35893568}
|
|
{"current_steps": 11405, "total_steps": 15621, "loss": 0.3364, "lr": 4.1214243246182223e-07, "epoch": 0.7301069073682863, "percentage": 73.01, "elapsed_time": "1:14:57", "remaining_time": "0:27:42", "throughput": 7985.01, "total_tokens": 35909696}
|
|
{"current_steps": 11410, "total_steps": 15621, "loss": 0.5046, "lr": 4.1123888840882306e-07, "epoch": 0.7304269893092632, "percentage": 73.04, "elapsed_time": "1:14:57", "remaining_time": "0:27:39", "throughput": 7987.26, "total_tokens": 35925120}
|
|
{"current_steps": 11415, "total_steps": 15621, "loss": 0.3223, "lr": 4.1033607943098415e-07, "epoch": 0.7307470712502401, "percentage": 73.07, "elapsed_time": "1:14:58", "remaining_time": "0:27:37", "throughput": 7989.56, "total_tokens": 35940800}
|
|
{"current_steps": 11420, "total_steps": 15621, "loss": 0.3444, "lr": 4.0943400665547423e-07, "epoch": 0.731067153191217, "percentage": 73.11, "elapsed_time": "1:14:59", "remaining_time": "0:27:35", "throughput": 7991.78, "total_tokens": 35955968}
|
|
{"current_steps": 11425, "total_steps": 15621, "loss": 0.3449, "lr": 4.0853267120854064e-07, "epoch": 0.7313872351321938, "percentage": 73.14, "elapsed_time": "1:14:59", "remaining_time": "0:27:32", "throughput": 7994.14, "total_tokens": 35972096}
|
|
{"current_steps": 11430, "total_steps": 15621, "loss": 0.3315, "lr": 4.076320742155117e-07, "epoch": 0.7317073170731707, "percentage": 73.17, "elapsed_time": "1:15:00", "remaining_time": "0:27:30", "throughput": 7996.25, "total_tokens": 35986624}
|
|
{"current_steps": 11435, "total_steps": 15621, "loss": 0.3493, "lr": 4.067322168007928e-07, "epoch": 0.7320273990141476, "percentage": 73.2, "elapsed_time": "1:15:01", "remaining_time": "0:27:27", "throughput": 7998.66, "total_tokens": 36003008}
|
|
{"current_steps": 11440, "total_steps": 15621, "loss": 0.3597, "lr": 4.0583310008786775e-07, "epoch": 0.7323474809551245, "percentage": 73.23, "elapsed_time": "1:15:01", "remaining_time": "0:27:25", "throughput": 8000.68, "total_tokens": 36017152}
|
|
{"current_steps": 11445, "total_steps": 15621, "loss": 0.271, "lr": 4.049347251992932e-07, "epoch": 0.7326675628961014, "percentage": 73.27, "elapsed_time": "1:15:02", "remaining_time": "0:27:22", "throughput": 8002.82, "total_tokens": 36031936}
|
|
{"current_steps": 11450, "total_steps": 15621, "loss": 0.353, "lr": 4.0403709325670064e-07, "epoch": 0.7329876448370783, "percentage": 73.3, "elapsed_time": "1:15:03", "remaining_time": "0:27:20", "throughput": 8005.19, "total_tokens": 36048064}
|
|
{"current_steps": 11455, "total_steps": 15621, "loss": 0.4653, "lr": 4.03140205380795e-07, "epoch": 0.7333077267780552, "percentage": 73.33, "elapsed_time": "1:15:03", "remaining_time": "0:27:17", "throughput": 8007.56, "total_tokens": 36064256}
|
|
{"current_steps": 11460, "total_steps": 15621, "loss": 0.6715, "lr": 4.0224406269135115e-07, "epoch": 0.7336278087190321, "percentage": 73.36, "elapsed_time": "1:15:04", "remaining_time": "0:27:15", "throughput": 8009.76, "total_tokens": 36079424}
|
|
{"current_steps": 11465, "total_steps": 15621, "loss": 0.3111, "lr": 4.0134866630721266e-07, "epoch": 0.733947890660009, "percentage": 73.39, "elapsed_time": "1:15:05", "remaining_time": "0:27:13", "throughput": 8012.1, "total_tokens": 36095424}
|
|
{"current_steps": 11470, "total_steps": 15621, "loss": 0.3618, "lr": 4.0045401734629367e-07, "epoch": 0.7342679726009859, "percentage": 73.43, "elapsed_time": "1:15:05", "remaining_time": "0:27:10", "throughput": 8014.43, "total_tokens": 36111360}
|
|
{"current_steps": 11475, "total_steps": 15621, "loss": 0.3825, "lr": 3.9956011692557377e-07, "epoch": 0.7345880545419627, "percentage": 73.46, "elapsed_time": "1:15:06", "remaining_time": "0:27:08", "throughput": 8016.75, "total_tokens": 36127232}
|
|
{"current_steps": 11480, "total_steps": 15621, "loss": 0.3532, "lr": 3.986669661610972e-07, "epoch": 0.7349081364829396, "percentage": 73.49, "elapsed_time": "1:15:07", "remaining_time": "0:27:05", "throughput": 8019.07, "total_tokens": 36143168}
|
|
{"current_steps": 11485, "total_steps": 15621, "loss": 0.3323, "lr": 3.9777456616797414e-07, "epoch": 0.7352282184239165, "percentage": 73.52, "elapsed_time": "1:15:07", "remaining_time": "0:27:03", "throughput": 8021.27, "total_tokens": 36158272}
|
|
{"current_steps": 11490, "total_steps": 15621, "loss": 0.3731, "lr": 3.968829180603761e-07, "epoch": 0.7355483003648934, "percentage": 73.55, "elapsed_time": "1:15:08", "remaining_time": "0:27:00", "throughput": 8023.4, "total_tokens": 36173056}
|
|
{"current_steps": 11495, "total_steps": 15621, "loss": 0.3927, "lr": 3.9599202295153624e-07, "epoch": 0.7358683823058703, "percentage": 73.59, "elapsed_time": "1:15:09", "remaining_time": "0:26:58", "throughput": 8025.54, "total_tokens": 36187904}
|
|
{"current_steps": 11500, "total_steps": 15621, "loss": 0.3596, "lr": 3.951018819537476e-07, "epoch": 0.7361884642468471, "percentage": 73.62, "elapsed_time": "1:15:09", "remaining_time": "0:26:56", "throughput": 8028.17, "total_tokens": 36205632}
|
|
{"current_steps": 11505, "total_steps": 15621, "loss": 0.3478, "lr": 3.942124961783616e-07, "epoch": 0.7365085461878241, "percentage": 73.65, "elapsed_time": "1:15:10", "remaining_time": "0:26:53", "throughput": 8030.27, "total_tokens": 36220160}
|
|
{"current_steps": 11510, "total_steps": 15621, "loss": 0.3164, "lr": 3.933238667357869e-07, "epoch": 0.736828628128801, "percentage": 73.68, "elapsed_time": "1:15:11", "remaining_time": "0:26:51", "throughput": 8032.65, "total_tokens": 36236416}
|
|
{"current_steps": 11515, "total_steps": 15621, "loss": 0.3449, "lr": 3.924359947354876e-07, "epoch": 0.7371487100697779, "percentage": 73.71, "elapsed_time": "1:15:11", "remaining_time": "0:26:48", "throughput": 8034.84, "total_tokens": 36251584}
|
|
{"current_steps": 11520, "total_steps": 15621, "loss": 0.3289, "lr": 3.915488812859826e-07, "epoch": 0.7374687920107548, "percentage": 73.75, "elapsed_time": "1:15:12", "remaining_time": "0:26:46", "throughput": 8036.88, "total_tokens": 36265856}
|
|
{"current_steps": 11525, "total_steps": 15621, "loss": 0.3927, "lr": 3.90662527494843e-07, "epoch": 0.7377888739517316, "percentage": 73.78, "elapsed_time": "1:15:13", "remaining_time": "0:26:43", "throughput": 8039.57, "total_tokens": 36283904}
|
|
{"current_steps": 11530, "total_steps": 15621, "loss": 0.3627, "lr": 3.8977693446869285e-07, "epoch": 0.7381089558927085, "percentage": 73.81, "elapsed_time": "1:15:13", "remaining_time": "0:26:41", "throughput": 8041.64, "total_tokens": 36298432}
|
|
{"current_steps": 11535, "total_steps": 15621, "loss": 0.3247, "lr": 3.8889210331320445e-07, "epoch": 0.7384290378336854, "percentage": 73.84, "elapsed_time": "1:15:14", "remaining_time": "0:26:39", "throughput": 8043.87, "total_tokens": 36313728}
|
|
{"current_steps": 11540, "total_steps": 15621, "loss": 0.3595, "lr": 3.8800803513310033e-07, "epoch": 0.7387491197746623, "percentage": 73.87, "elapsed_time": "1:15:15", "remaining_time": "0:26:36", "throughput": 8046.09, "total_tokens": 36329088}
|
|
{"current_steps": 11545, "total_steps": 15621, "loss": 0.4255, "lr": 3.8712473103214993e-07, "epoch": 0.7390692017156392, "percentage": 73.91, "elapsed_time": "1:15:15", "remaining_time": "0:26:34", "throughput": 8048.41, "total_tokens": 36345024}
|
|
{"current_steps": 11550, "total_steps": 15621, "loss": 0.3089, "lr": 3.862421921131688e-07, "epoch": 0.739389283656616, "percentage": 73.94, "elapsed_time": "1:15:16", "remaining_time": "0:26:31", "throughput": 8050.88, "total_tokens": 36361792}
|
|
{"current_steps": 11555, "total_steps": 15621, "loss": 0.2832, "lr": 3.85360419478017e-07, "epoch": 0.739709365597593, "percentage": 73.97, "elapsed_time": "1:15:17", "remaining_time": "0:26:29", "throughput": 8053.11, "total_tokens": 36377152}
|
|
{"current_steps": 11560, "total_steps": 15621, "loss": 0.3552, "lr": 3.8447941422759786e-07, "epoch": 0.7400294475385699, "percentage": 74.0, "elapsed_time": "1:15:17", "remaining_time": "0:26:27", "throughput": 8055.58, "total_tokens": 36394048}
|
|
{"current_steps": 11565, "total_steps": 15621, "loss": 0.3684, "lr": 3.835991774618579e-07, "epoch": 0.7403495294795468, "percentage": 74.03, "elapsed_time": "1:15:18", "remaining_time": "0:26:24", "throughput": 8057.75, "total_tokens": 36409152}
|
|
{"current_steps": 11570, "total_steps": 15621, "loss": 0.3859, "lr": 3.827197102797818e-07, "epoch": 0.7406696114205237, "percentage": 74.07, "elapsed_time": "1:15:19", "remaining_time": "0:26:22", "throughput": 8060.4, "total_tokens": 36427072}
|
|
{"current_steps": 11575, "total_steps": 15621, "loss": 0.4771, "lr": 3.818410137793947e-07, "epoch": 0.7409896933615006, "percentage": 74.1, "elapsed_time": "1:15:19", "remaining_time": "0:26:19", "throughput": 8062.91, "total_tokens": 36444288}
|
|
{"current_steps": 11580, "total_steps": 15621, "loss": 0.4402, "lr": 3.809630890577602e-07, "epoch": 0.7413097753024774, "percentage": 74.13, "elapsed_time": "1:15:20", "remaining_time": "0:26:17", "throughput": 8065.21, "total_tokens": 36460096}
|
|
{"current_steps": 11585, "total_steps": 15621, "loss": 0.3388, "lr": 3.800859372109777e-07, "epoch": 0.7416298572434543, "percentage": 74.16, "elapsed_time": "1:15:21", "remaining_time": "0:26:15", "throughput": 8067.4, "total_tokens": 36475264}
|
|
{"current_steps": 11590, "total_steps": 15621, "loss": 0.325, "lr": 3.7920955933418055e-07, "epoch": 0.7419499391844312, "percentage": 74.19, "elapsed_time": "1:15:21", "remaining_time": "0:26:12", "throughput": 8069.73, "total_tokens": 36491264}
|
|
{"current_steps": 11595, "total_steps": 15621, "loss": 0.3245, "lr": 3.7833395652153775e-07, "epoch": 0.7422700211254081, "percentage": 74.23, "elapsed_time": "1:15:22", "remaining_time": "0:26:10", "throughput": 8071.9, "total_tokens": 36506368}
|
|
{"current_steps": 11600, "total_steps": 15621, "loss": 0.3117, "lr": 3.774591298662497e-07, "epoch": 0.742590103066385, "percentage": 74.26, "elapsed_time": "1:15:23", "remaining_time": "0:26:07", "throughput": 8074.25, "total_tokens": 36522432}
|
|
{"current_steps": 11605, "total_steps": 15621, "loss": 0.4221, "lr": 3.765850804605468e-07, "epoch": 0.7429101850073618, "percentage": 74.29, "elapsed_time": "1:15:24", "remaining_time": "0:26:05", "throughput": 8076.68, "total_tokens": 36539008}
|
|
{"current_steps": 11610, "total_steps": 15621, "loss": 0.2818, "lr": 3.7571180939569104e-07, "epoch": 0.7432302669483388, "percentage": 74.32, "elapsed_time": "1:15:24", "remaining_time": "0:26:03", "throughput": 8078.87, "total_tokens": 36554240}
|
|
{"current_steps": 11615, "total_steps": 15621, "loss": 0.3181, "lr": 3.748393177619711e-07, "epoch": 0.7435503488893157, "percentage": 74.36, "elapsed_time": "1:15:25", "remaining_time": "0:26:00", "throughput": 8081.13, "total_tokens": 36569920}
|
|
{"current_steps": 11620, "total_steps": 15621, "loss": 0.3139, "lr": 3.739676066487032e-07, "epoch": 0.7438704308302926, "percentage": 74.39, "elapsed_time": "1:15:26", "remaining_time": "0:25:58", "throughput": 8083.43, "total_tokens": 36585792}
|
|
{"current_steps": 11625, "total_steps": 15621, "loss": 0.2923, "lr": 3.730966771442289e-07, "epoch": 0.7441905127712695, "percentage": 74.42, "elapsed_time": "1:15:26", "remaining_time": "0:25:56", "throughput": 8085.66, "total_tokens": 36601280}
|
|
{"current_steps": 11630, "total_steps": 15621, "loss": 0.5229, "lr": 3.722265303359137e-07, "epoch": 0.7445105947122463, "percentage": 74.45, "elapsed_time": "1:15:27", "remaining_time": "0:25:53", "throughput": 8087.95, "total_tokens": 36617152}
|
|
{"current_steps": 11635, "total_steps": 15621, "loss": 0.4046, "lr": 3.713571673101463e-07, "epoch": 0.7448306766532232, "percentage": 74.48, "elapsed_time": "1:15:28", "remaining_time": "0:25:51", "throughput": 8090.16, "total_tokens": 36632512}
|
|
{"current_steps": 11640, "total_steps": 15621, "loss": 0.344, "lr": 3.704885891523366e-07, "epoch": 0.7451507585942001, "percentage": 74.52, "elapsed_time": "1:15:28", "remaining_time": "0:25:48", "throughput": 8092.35, "total_tokens": 36647744}
|
|
{"current_steps": 11645, "total_steps": 15621, "loss": 0.3938, "lr": 3.696207969469146e-07, "epoch": 0.745470840535177, "percentage": 74.55, "elapsed_time": "1:15:29", "remaining_time": "0:25:46", "throughput": 8094.6, "total_tokens": 36663360}
|
|
{"current_steps": 11650, "total_steps": 15621, "loss": 0.373, "lr": 3.6875379177732913e-07, "epoch": 0.7457909224761539, "percentage": 74.58, "elapsed_time": "1:15:30", "remaining_time": "0:25:44", "throughput": 8096.79, "total_tokens": 36678656}
|
|
{"current_steps": 11655, "total_steps": 15621, "loss": 0.5096, "lr": 3.6788757472604634e-07, "epoch": 0.7461110044171307, "percentage": 74.61, "elapsed_time": "1:15:30", "remaining_time": "0:25:41", "throughput": 8098.99, "total_tokens": 36693952}
|
|
{"current_steps": 11660, "total_steps": 15621, "loss": 0.3264, "lr": 3.6702214687454825e-07, "epoch": 0.7464310863581076, "percentage": 74.64, "elapsed_time": "1:15:31", "remaining_time": "0:25:39", "throughput": 8101.3, "total_tokens": 36709888}
|
|
{"current_steps": 11665, "total_steps": 15621, "loss": 0.3066, "lr": 3.6615750930333177e-07, "epoch": 0.7467511682990846, "percentage": 74.68, "elapsed_time": "1:15:32", "remaining_time": "0:25:36", "throughput": 8103.54, "total_tokens": 36725504}
|
|
{"current_steps": 11670, "total_steps": 15621, "loss": 0.3025, "lr": 3.65293663091907e-07, "epoch": 0.7470712502400615, "percentage": 74.71, "elapsed_time": "1:15:32", "remaining_time": "0:25:34", "throughput": 8105.85, "total_tokens": 36741376}
|
|
{"current_steps": 11675, "total_steps": 15621, "loss": 0.435, "lr": 3.6443060931879623e-07, "epoch": 0.7473913321810384, "percentage": 74.74, "elapsed_time": "1:15:33", "remaining_time": "0:25:32", "throughput": 8108.07, "total_tokens": 36756864}
|
|
{"current_steps": 11680, "total_steps": 15621, "loss": 0.4612, "lr": 3.635683490615321e-07, "epoch": 0.7477114141220152, "percentage": 74.77, "elapsed_time": "1:15:34", "remaining_time": "0:25:29", "throughput": 8110.34, "total_tokens": 36772608}
|
|
{"current_steps": 11685, "total_steps": 15621, "loss": 0.3057, "lr": 3.6270688339665634e-07, "epoch": 0.7480314960629921, "percentage": 74.8, "elapsed_time": "1:15:34", "remaining_time": "0:25:27", "throughput": 8112.61, "total_tokens": 36788352}
|
|
{"current_steps": 11690, "total_steps": 15621, "loss": 0.3581, "lr": 3.6184621339972e-07, "epoch": 0.748351578003969, "percentage": 74.84, "elapsed_time": "1:15:35", "remaining_time": "0:25:25", "throughput": 8114.87, "total_tokens": 36804096}
|
|
{"current_steps": 11695, "total_steps": 15621, "loss": 0.3592, "lr": 3.609863401452786e-07, "epoch": 0.7486716599449459, "percentage": 74.87, "elapsed_time": "1:15:36", "remaining_time": "0:25:22", "throughput": 8117.14, "total_tokens": 36819776}
|
|
{"current_steps": 11700, "total_steps": 15621, "loss": 0.4102, "lr": 3.6012726470689416e-07, "epoch": 0.7489917418859228, "percentage": 74.9, "elapsed_time": "1:15:36", "remaining_time": "0:25:20", "throughput": 8119.34, "total_tokens": 36835072}
|
|
{"current_steps": 11705, "total_steps": 15621, "loss": 0.3346, "lr": 3.592689881571329e-07, "epoch": 0.7493118238268996, "percentage": 74.93, "elapsed_time": "1:15:37", "remaining_time": "0:25:18", "throughput": 8121.6, "total_tokens": 36850816}
|
|
{"current_steps": 11710, "total_steps": 15621, "loss": 0.4205, "lr": 3.5841151156756334e-07, "epoch": 0.7496319057678765, "percentage": 74.96, "elapsed_time": "1:15:38", "remaining_time": "0:25:15", "throughput": 8123.83, "total_tokens": 36866368}
|
|
{"current_steps": 11715, "total_steps": 15621, "loss": 0.4196, "lr": 3.575548360087539e-07, "epoch": 0.7499519877088535, "percentage": 75.0, "elapsed_time": "1:15:38", "remaining_time": "0:25:13", "throughput": 8126.63, "total_tokens": 36885376}
|
|
{"current_steps": 11720, "total_steps": 15621, "loss": 0.3191, "lr": 3.5669896255027533e-07, "epoch": 0.7502720696498304, "percentage": 75.03, "elapsed_time": "1:15:39", "remaining_time": "0:25:10", "throughput": 8128.74, "total_tokens": 36900288}
|
|
{"current_steps": 11725, "total_steps": 15621, "loss": 0.3892, "lr": 3.5584389226069543e-07, "epoch": 0.7505921515908073, "percentage": 75.06, "elapsed_time": "1:15:40", "remaining_time": "0:25:08", "throughput": 8131.04, "total_tokens": 36916224}
|
|
{"current_steps": 11730, "total_steps": 15621, "loss": 0.3097, "lr": 3.5498962620757866e-07, "epoch": 0.7509122335317842, "percentage": 75.09, "elapsed_time": "1:15:40", "remaining_time": "0:25:06", "throughput": 8133.25, "total_tokens": 36931648}
|
|
{"current_steps": 11730, "total_steps": 15621, "eval_loss": 0.36731547117233276, "epoch": 0.7509122335317842, "percentage": 75.09, "elapsed_time": "1:16:31", "remaining_time": "0:25:23", "throughput": 8043.65, "total_tokens": 36931648}
|
|
{"current_steps": 11735, "total_steps": 15621, "loss": 0.4301, "lr": 3.5413616545748713e-07, "epoch": 0.751232315472761, "percentage": 75.12, "elapsed_time": "1:17:07", "remaining_time": "0:25:32", "throughput": 7983.35, "total_tokens": 36945856}
|
|
{"current_steps": 11740, "total_steps": 15621, "loss": 0.509, "lr": 3.532835110759763e-07, "epoch": 0.7515523974137379, "percentage": 75.16, "elapsed_time": "1:17:08", "remaining_time": "0:25:30", "throughput": 7985.62, "total_tokens": 36961792}
|
|
{"current_steps": 11745, "total_steps": 15621, "loss": 0.3072, "lr": 3.524316641275955e-07, "epoch": 0.7518724793547148, "percentage": 75.19, "elapsed_time": "1:17:09", "remaining_time": "0:25:27", "throughput": 7987.8, "total_tokens": 36977152}
|
|
{"current_steps": 11750, "total_steps": 15621, "loss": 0.4213, "lr": 3.5158062567588467e-07, "epoch": 0.7521925612956917, "percentage": 75.22, "elapsed_time": "1:17:09", "remaining_time": "0:25:25", "throughput": 7989.88, "total_tokens": 36991936}
|
|
{"current_steps": 11755, "total_steps": 15621, "loss": 0.4065, "lr": 3.5073039678337633e-07, "epoch": 0.7525126432366686, "percentage": 75.25, "elapsed_time": "1:17:10", "remaining_time": "0:25:22", "throughput": 7991.97, "total_tokens": 37006784}
|
|
{"current_steps": 11760, "total_steps": 15621, "loss": 0.3394, "lr": 3.498809785115908e-07, "epoch": 0.7528327251776454, "percentage": 75.28, "elapsed_time": "1:17:11", "remaining_time": "0:25:20", "throughput": 7994.16, "total_tokens": 37022208}
|
|
{"current_steps": 11765, "total_steps": 15621, "loss": 0.3495, "lr": 3.4903237192103697e-07, "epoch": 0.7531528071186223, "percentage": 75.32, "elapsed_time": "1:17:11", "remaining_time": "0:25:18", "throughput": 7996.66, "total_tokens": 37039488}
|
|
{"current_steps": 11770, "total_steps": 15621, "loss": 0.3453, "lr": 3.481845780712099e-07, "epoch": 0.7534728890595993, "percentage": 75.35, "elapsed_time": "1:17:12", "remaining_time": "0:25:15", "throughput": 7999.03, "total_tokens": 37056064}
|
|
{"current_steps": 11775, "total_steps": 15621, "loss": 0.3434, "lr": 3.4733759802059037e-07, "epoch": 0.7537929710005762, "percentage": 75.38, "elapsed_time": "1:17:13", "remaining_time": "0:25:13", "throughput": 8001.35, "total_tokens": 37072256}
|
|
{"current_steps": 11780, "total_steps": 15621, "loss": 0.428, "lr": 3.4649143282664273e-07, "epoch": 0.7541130529415531, "percentage": 75.41, "elapsed_time": "1:17:13", "remaining_time": "0:25:10", "throughput": 8003.47, "total_tokens": 37087360}
|
|
{"current_steps": 11785, "total_steps": 15621, "loss": 0.3164, "lr": 3.456460835458143e-07, "epoch": 0.7544331348825299, "percentage": 75.44, "elapsed_time": "1:17:14", "remaining_time": "0:25:08", "throughput": 8005.54, "total_tokens": 37102144}
|
|
{"current_steps": 11790, "total_steps": 15621, "loss": 0.3131, "lr": 3.4480155123353337e-07, "epoch": 0.7547532168235068, "percentage": 75.48, "elapsed_time": "1:17:15", "remaining_time": "0:25:06", "throughput": 8007.72, "total_tokens": 37117568}
|
|
{"current_steps": 11795, "total_steps": 15621, "loss": 0.4608, "lr": 3.4395783694420875e-07, "epoch": 0.7550732987644837, "percentage": 75.51, "elapsed_time": "1:17:15", "remaining_time": "0:25:03", "throughput": 8009.87, "total_tokens": 37132800}
|
|
{"current_steps": 11800, "total_steps": 15621, "loss": 0.4036, "lr": 3.4311494173122743e-07, "epoch": 0.7553933807054606, "percentage": 75.54, "elapsed_time": "1:17:16", "remaining_time": "0:25:01", "throughput": 8011.96, "total_tokens": 37147776}
|
|
{"current_steps": 11805, "total_steps": 15621, "loss": 0.3944, "lr": 3.422728666469541e-07, "epoch": 0.7557134626464375, "percentage": 75.57, "elapsed_time": "1:17:17", "remaining_time": "0:24:58", "throughput": 8014.27, "total_tokens": 37163904}
|
|
{"current_steps": 11810, "total_steps": 15621, "loss": 0.4316, "lr": 3.41431612742729e-07, "epoch": 0.7560335445874143, "percentage": 75.6, "elapsed_time": "1:17:17", "remaining_time": "0:24:56", "throughput": 8016.63, "total_tokens": 37180416}
|
|
{"current_steps": 11815, "total_steps": 15621, "loss": 0.4235, "lr": 3.4059118106886855e-07, "epoch": 0.7563536265283912, "percentage": 75.64, "elapsed_time": "1:17:18", "remaining_time": "0:24:54", "throughput": 8018.91, "total_tokens": 37196480}
|
|
{"current_steps": 11820, "total_steps": 15621, "loss": 0.5208, "lr": 3.3975157267466036e-07, "epoch": 0.7566737084693682, "percentage": 75.67, "elapsed_time": "1:17:19", "remaining_time": "0:24:51", "throughput": 8021.05, "total_tokens": 37211648}
|
|
{"current_steps": 11825, "total_steps": 15621, "loss": 0.2942, "lr": 3.389127886083656e-07, "epoch": 0.7569937904103451, "percentage": 75.7, "elapsed_time": "1:17:19", "remaining_time": "0:24:49", "throughput": 8023.23, "total_tokens": 37227072}
|
|
{"current_steps": 11830, "total_steps": 15621, "loss": 0.3342, "lr": 3.3807482991721667e-07, "epoch": 0.757313872351322, "percentage": 75.73, "elapsed_time": "1:17:20", "remaining_time": "0:24:47", "throughput": 8025.65, "total_tokens": 37243968}
|
|
{"current_steps": 11835, "total_steps": 15621, "loss": 0.32, "lr": 3.3723769764741474e-07, "epoch": 0.7576339542922989, "percentage": 75.76, "elapsed_time": "1:17:21", "remaining_time": "0:24:44", "throughput": 8027.78, "total_tokens": 37259200}
|
|
{"current_steps": 11840, "total_steps": 15621, "loss": 0.2946, "lr": 3.3640139284412825e-07, "epoch": 0.7579540362332757, "percentage": 75.8, "elapsed_time": "1:17:21", "remaining_time": "0:24:42", "throughput": 8030.03, "total_tokens": 37275072}
|
|
{"current_steps": 11845, "total_steps": 15621, "loss": 0.4, "lr": 3.355659165514948e-07, "epoch": 0.7582741181742526, "percentage": 75.83, "elapsed_time": "1:17:22", "remaining_time": "0:24:40", "throughput": 8032.35, "total_tokens": 37291392}
|
|
{"current_steps": 11850, "total_steps": 15621, "loss": 0.2828, "lr": 3.347312698126161e-07, "epoch": 0.7585942001152295, "percentage": 75.86, "elapsed_time": "1:17:23", "remaining_time": "0:24:37", "throughput": 8034.67, "total_tokens": 37307648}
|
|
{"current_steps": 11855, "total_steps": 15621, "loss": 0.2188, "lr": 3.338974536695578e-07, "epoch": 0.7589142820562064, "percentage": 75.89, "elapsed_time": "1:17:24", "remaining_time": "0:24:35", "throughput": 8036.85, "total_tokens": 37323136}
|
|
{"current_steps": 11860, "total_steps": 15621, "loss": 0.3193, "lr": 3.330644691633492e-07, "epoch": 0.7592343639971832, "percentage": 75.92, "elapsed_time": "1:17:24", "remaining_time": "0:24:32", "throughput": 8039.01, "total_tokens": 37338496}
|
|
{"current_steps": 11865, "total_steps": 15621, "loss": 0.2764, "lr": 3.322323173339818e-07, "epoch": 0.7595544459381601, "percentage": 75.96, "elapsed_time": "1:17:25", "remaining_time": "0:24:30", "throughput": 8041.64, "total_tokens": 37356800}
|
|
{"current_steps": 11870, "total_steps": 15621, "loss": 0.4461, "lr": 3.314009992204071e-07, "epoch": 0.759874527879137, "percentage": 75.99, "elapsed_time": "1:17:26", "remaining_time": "0:24:28", "throughput": 8043.9, "total_tokens": 37372800}
|
|
{"current_steps": 11875, "total_steps": 15621, "loss": 0.3172, "lr": 3.3057051586053443e-07, "epoch": 0.760194609820114, "percentage": 76.02, "elapsed_time": "1:17:26", "remaining_time": "0:24:25", "throughput": 8046.13, "total_tokens": 37388608}
|
|
{"current_steps": 11880, "total_steps": 15621, "loss": 0.4503, "lr": 3.297408682912329e-07, "epoch": 0.7605146917610909, "percentage": 76.05, "elapsed_time": "1:17:27", "remaining_time": "0:24:23", "throughput": 8048.49, "total_tokens": 37405184}
|
|
{"current_steps": 11885, "total_steps": 15621, "loss": 0.2743, "lr": 3.289120575483271e-07, "epoch": 0.7608347737020678, "percentage": 76.08, "elapsed_time": "1:17:28", "remaining_time": "0:24:21", "throughput": 8050.58, "total_tokens": 37420096}
|
|
{"current_steps": 11890, "total_steps": 15621, "loss": 0.4177, "lr": 3.280840846665969e-07, "epoch": 0.7611548556430446, "percentage": 76.12, "elapsed_time": "1:17:28", "remaining_time": "0:24:18", "throughput": 8052.56, "total_tokens": 37434368}
|
|
{"current_steps": 11895, "total_steps": 15621, "loss": 0.3019, "lr": 3.272569506797761e-07, "epoch": 0.7614749375840215, "percentage": 76.15, "elapsed_time": "1:17:29", "remaining_time": "0:24:16", "throughput": 8054.64, "total_tokens": 37449344}
|
|
{"current_steps": 11900, "total_steps": 15621, "loss": 0.3364, "lr": 3.2643065662055136e-07, "epoch": 0.7617950195249984, "percentage": 76.18, "elapsed_time": "1:17:30", "remaining_time": "0:24:14", "throughput": 8056.76, "total_tokens": 37464448}
|
|
{"current_steps": 11905, "total_steps": 15621, "loss": 0.2844, "lr": 3.2560520352056033e-07, "epoch": 0.7621151014659753, "percentage": 76.21, "elapsed_time": "1:17:30", "remaining_time": "0:24:11", "throughput": 8059.25, "total_tokens": 37481856}
|
|
{"current_steps": 11910, "total_steps": 15621, "loss": 0.3952, "lr": 3.24780592410391e-07, "epoch": 0.7624351834069522, "percentage": 76.24, "elapsed_time": "1:17:31", "remaining_time": "0:24:09", "throughput": 8061.5, "total_tokens": 37497856}
|
|
{"current_steps": 11915, "total_steps": 15621, "loss": 0.4545, "lr": 3.2395682431957994e-07, "epoch": 0.762755265347929, "percentage": 76.28, "elapsed_time": "1:17:32", "remaining_time": "0:24:06", "throughput": 8063.71, "total_tokens": 37513600}
|
|
{"current_steps": 11920, "total_steps": 15621, "loss": 0.3272, "lr": 3.231339002766115e-07, "epoch": 0.7630753472889059, "percentage": 76.31, "elapsed_time": "1:17:32", "remaining_time": "0:24:04", "throughput": 8065.92, "total_tokens": 37529408}
|
|
{"current_steps": 11925, "total_steps": 15621, "loss": 0.3396, "lr": 3.2231182130891564e-07, "epoch": 0.7633954292298829, "percentage": 76.34, "elapsed_time": "1:17:33", "remaining_time": "0:24:02", "throughput": 8068.29, "total_tokens": 37545984}
|
|
{"current_steps": 11930, "total_steps": 15621, "loss": 0.3342, "lr": 3.214905884428679e-07, "epoch": 0.7637155111708598, "percentage": 76.37, "elapsed_time": "1:17:34", "remaining_time": "0:23:59", "throughput": 8070.52, "total_tokens": 37561856}
|
|
{"current_steps": 11935, "total_steps": 15621, "loss": 0.3292, "lr": 3.206702027037868e-07, "epoch": 0.7640355931118367, "percentage": 76.4, "elapsed_time": "1:17:34", "remaining_time": "0:23:57", "throughput": 8072.9, "total_tokens": 37578624}
|
|
{"current_steps": 11940, "total_steps": 15621, "loss": 0.3962, "lr": 3.198506651159344e-07, "epoch": 0.7643556750528135, "percentage": 76.44, "elapsed_time": "1:17:35", "remaining_time": "0:23:55", "throughput": 8075.04, "total_tokens": 37593920}
|
|
{"current_steps": 11945, "total_steps": 15621, "loss": 0.3658, "lr": 3.190319767025121e-07, "epoch": 0.7646757569937904, "percentage": 76.47, "elapsed_time": "1:17:36", "remaining_time": "0:23:52", "throughput": 8077.24, "total_tokens": 37609664}
|
|
{"current_steps": 11950, "total_steps": 15621, "loss": 0.4959, "lr": 3.1821413848566213e-07, "epoch": 0.7649958389347673, "percentage": 76.5, "elapsed_time": "1:17:36", "remaining_time": "0:23:50", "throughput": 8079.54, "total_tokens": 37626048}
|
|
{"current_steps": 11955, "total_steps": 15621, "loss": 0.3753, "lr": 3.1739715148646564e-07, "epoch": 0.7653159208757442, "percentage": 76.53, "elapsed_time": "1:17:37", "remaining_time": "0:23:48", "throughput": 8081.75, "total_tokens": 37641792}
|
|
{"current_steps": 11960, "total_steps": 15621, "loss": 0.4534, "lr": 3.1658101672494043e-07, "epoch": 0.7656360028167211, "percentage": 76.56, "elapsed_time": "1:17:38", "remaining_time": "0:23:45", "throughput": 8083.79, "total_tokens": 37656512}
|
|
{"current_steps": 11965, "total_steps": 15621, "loss": 0.3377, "lr": 3.157657352200397e-07, "epoch": 0.7659560847576979, "percentage": 76.6, "elapsed_time": "1:17:38", "remaining_time": "0:23:43", "throughput": 8085.94, "total_tokens": 37672000}
|
|
{"current_steps": 11970, "total_steps": 15621, "loss": 0.3278, "lr": 3.149513079896521e-07, "epoch": 0.7662761666986748, "percentage": 76.63, "elapsed_time": "1:17:39", "remaining_time": "0:23:41", "throughput": 8088.07, "total_tokens": 37687232}
|
|
{"current_steps": 11975, "total_steps": 15621, "loss": 0.3237, "lr": 3.1413773605060034e-07, "epoch": 0.7665962486396517, "percentage": 76.66, "elapsed_time": "1:17:40", "remaining_time": "0:23:38", "throughput": 8090.23, "total_tokens": 37702656}
|
|
{"current_steps": 11980, "total_steps": 15621, "loss": 0.4234, "lr": 3.1332502041863783e-07, "epoch": 0.7669163305806287, "percentage": 76.69, "elapsed_time": "1:17:40", "remaining_time": "0:23:36", "throughput": 8092.39, "total_tokens": 37718080}
|
|
{"current_steps": 11985, "total_steps": 15621, "loss": 0.3181, "lr": 3.1251316210844946e-07, "epoch": 0.7672364125216056, "percentage": 76.72, "elapsed_time": "1:17:41", "remaining_time": "0:23:34", "throughput": 8094.89, "total_tokens": 37735680}
|
|
{"current_steps": 11990, "total_steps": 15621, "loss": 0.2871, "lr": 3.1170216213365055e-07, "epoch": 0.7675564944625825, "percentage": 76.76, "elapsed_time": "1:17:42", "remaining_time": "0:23:31", "throughput": 8096.87, "total_tokens": 37749952}
|
|
{"current_steps": 11995, "total_steps": 15621, "loss": 0.4582, "lr": 3.1089202150678397e-07, "epoch": 0.7678765764035593, "percentage": 76.79, "elapsed_time": "1:17:42", "remaining_time": "0:23:29", "throughput": 8099.0, "total_tokens": 37765312}
|
|
{"current_steps": 12000, "total_steps": 15621, "loss": 0.4919, "lr": 3.1008274123931886e-07, "epoch": 0.7681966583445362, "percentage": 76.82, "elapsed_time": "1:17:43", "remaining_time": "0:23:27", "throughput": 8101.06, "total_tokens": 37780160}
|
|
{"current_steps": 12005, "total_steps": 15621, "loss": 0.2657, "lr": 3.092743223416523e-07, "epoch": 0.7685167402855131, "percentage": 76.85, "elapsed_time": "1:17:44", "remaining_time": "0:23:24", "throughput": 8103.34, "total_tokens": 37796352}
|
|
{"current_steps": 12010, "total_steps": 15621, "loss": 0.3551, "lr": 3.0846676582310413e-07, "epoch": 0.76883682222649, "percentage": 76.88, "elapsed_time": "1:17:44", "remaining_time": "0:23:22", "throughput": 8105.67, "total_tokens": 37812864}
|
|
{"current_steps": 12015, "total_steps": 15621, "loss": 0.3818, "lr": 3.076600726919185e-07, "epoch": 0.7691569041674668, "percentage": 76.92, "elapsed_time": "1:17:45", "remaining_time": "0:23:20", "throughput": 8107.75, "total_tokens": 37827840}
|
|
{"current_steps": 12020, "total_steps": 15621, "loss": 0.3599, "lr": 3.0685424395526106e-07, "epoch": 0.7694769861084437, "percentage": 76.95, "elapsed_time": "1:17:46", "remaining_time": "0:23:17", "throughput": 8110.52, "total_tokens": 37847040}
|
|
{"current_steps": 12025, "total_steps": 15621, "loss": 0.2875, "lr": 3.060492806192184e-07, "epoch": 0.7697970680494206, "percentage": 76.98, "elapsed_time": "1:17:47", "remaining_time": "0:23:15", "throughput": 8112.68, "total_tokens": 37862464}
|
|
{"current_steps": 12030, "total_steps": 15621, "loss": 0.3826, "lr": 3.052451836887968e-07, "epoch": 0.7701171499903975, "percentage": 77.01, "elapsed_time": "1:17:47", "remaining_time": "0:23:13", "throughput": 8114.81, "total_tokens": 37877760}
|
|
{"current_steps": 12035, "total_steps": 15621, "loss": 0.2867, "lr": 3.044419541679207e-07, "epoch": 0.7704372319313745, "percentage": 77.04, "elapsed_time": "1:17:48", "remaining_time": "0:23:11", "throughput": 8116.89, "total_tokens": 37892800}
|
|
{"current_steps": 12040, "total_steps": 15621, "loss": 0.4353, "lr": 3.0363959305943153e-07, "epoch": 0.7707573138723514, "percentage": 77.08, "elapsed_time": "1:17:49", "remaining_time": "0:23:08", "throughput": 8119.17, "total_tokens": 37909056}
|
|
{"current_steps": 12045, "total_steps": 15621, "loss": 0.3447, "lr": 3.028381013650867e-07, "epoch": 0.7710773958133282, "percentage": 77.11, "elapsed_time": "1:17:49", "remaining_time": "0:23:06", "throughput": 8121.46, "total_tokens": 37925376}
|
|
{"current_steps": 12050, "total_steps": 15621, "loss": 0.3705, "lr": 3.0203748008555783e-07, "epoch": 0.7713974777543051, "percentage": 77.14, "elapsed_time": "1:17:50", "remaining_time": "0:23:04", "throughput": 8123.74, "total_tokens": 37941632}
|
|
{"current_steps": 12055, "total_steps": 15621, "loss": 0.374, "lr": 3.012377302204301e-07, "epoch": 0.771717559695282, "percentage": 77.17, "elapsed_time": "1:17:51", "remaining_time": "0:23:01", "throughput": 8125.88, "total_tokens": 37957056}
|
|
{"current_steps": 12060, "total_steps": 15621, "loss": 0.3959, "lr": 3.0043885276820046e-07, "epoch": 0.7720376416362589, "percentage": 77.2, "elapsed_time": "1:17:51", "remaining_time": "0:22:59", "throughput": 8128.12, "total_tokens": 37973184}
|
|
{"current_steps": 12065, "total_steps": 15621, "loss": 0.3027, "lr": 2.99640848726277e-07, "epoch": 0.7723577235772358, "percentage": 77.24, "elapsed_time": "1:17:52", "remaining_time": "0:22:57", "throughput": 8130.21, "total_tokens": 37988288}
|
|
{"current_steps": 12070, "total_steps": 15621, "loss": 0.3723, "lr": 2.9884371909097704e-07, "epoch": 0.7726778055182126, "percentage": 77.27, "elapsed_time": "1:17:53", "remaining_time": "0:22:54", "throughput": 8132.42, "total_tokens": 38004224}
|
|
{"current_steps": 12075, "total_steps": 15621, "loss": 0.3721, "lr": 2.9804746485752616e-07, "epoch": 0.7729978874591895, "percentage": 77.3, "elapsed_time": "1:17:53", "remaining_time": "0:22:52", "throughput": 8134.55, "total_tokens": 38019456}
|
|
{"current_steps": 12080, "total_steps": 15621, "loss": 0.4237, "lr": 2.972520870200573e-07, "epoch": 0.7733179694001664, "percentage": 77.33, "elapsed_time": "1:17:54", "remaining_time": "0:22:50", "throughput": 8136.74, "total_tokens": 38035264}
|
|
{"current_steps": 12085, "total_steps": 15621, "loss": 0.4166, "lr": 2.9645758657160904e-07, "epoch": 0.7736380513411434, "percentage": 77.36, "elapsed_time": "1:17:55", "remaining_time": "0:22:47", "throughput": 8138.95, "total_tokens": 38051072}
|
|
{"current_steps": 12090, "total_steps": 15621, "loss": 0.3573, "lr": 2.9566396450412444e-07, "epoch": 0.7739581332821203, "percentage": 77.4, "elapsed_time": "1:17:55", "remaining_time": "0:22:45", "throughput": 8141.11, "total_tokens": 38066688}
|
|
{"current_steps": 12095, "total_steps": 15621, "loss": 0.3237, "lr": 2.9487122180844957e-07, "epoch": 0.7742782152230971, "percentage": 77.43, "elapsed_time": "1:17:56", "remaining_time": "0:22:43", "throughput": 8143.25, "total_tokens": 38082048}
|
|
{"current_steps": 12100, "total_steps": 15621, "loss": 0.3143, "lr": 2.9407935947433406e-07, "epoch": 0.774598297164074, "percentage": 77.46, "elapsed_time": "1:17:57", "remaining_time": "0:22:41", "throughput": 8145.36, "total_tokens": 38097344}
|
|
{"current_steps": 12105, "total_steps": 15621, "loss": 0.4448, "lr": 2.932883784904264e-07, "epoch": 0.7749183791050509, "percentage": 77.49, "elapsed_time": "1:17:57", "remaining_time": "0:22:38", "throughput": 8147.42, "total_tokens": 38112320}
|
|
{"current_steps": 12110, "total_steps": 15621, "loss": 0.244, "lr": 2.9249827984427555e-07, "epoch": 0.7752384610460278, "percentage": 77.52, "elapsed_time": "1:17:58", "remaining_time": "0:22:36", "throughput": 8149.6, "total_tokens": 38128000}
|
|
{"current_steps": 12115, "total_steps": 15621, "loss": 0.3049, "lr": 2.917090645223297e-07, "epoch": 0.7755585429870047, "percentage": 77.56, "elapsed_time": "1:17:59", "remaining_time": "0:22:34", "throughput": 8151.7, "total_tokens": 38143168}
|
|
{"current_steps": 12120, "total_steps": 15621, "loss": 0.301, "lr": 2.909207335099332e-07, "epoch": 0.7758786249279815, "percentage": 77.59, "elapsed_time": "1:17:59", "remaining_time": "0:22:31", "throughput": 8153.71, "total_tokens": 38157824}
|
|
{"current_steps": 12125, "total_steps": 15621, "loss": 0.3329, "lr": 2.9013328779132595e-07, "epoch": 0.7761987068689584, "percentage": 77.62, "elapsed_time": "1:18:00", "remaining_time": "0:22:29", "throughput": 8155.79, "total_tokens": 38172864}
|
|
{"current_steps": 12130, "total_steps": 15621, "loss": 0.4221, "lr": 2.893467283496439e-07, "epoch": 0.7765187888099353, "percentage": 77.65, "elapsed_time": "1:18:01", "remaining_time": "0:22:27", "throughput": 8157.76, "total_tokens": 38187264}
|
|
{"current_steps": 12135, "total_steps": 15621, "loss": 0.3534, "lr": 2.885610561669155e-07, "epoch": 0.7768388707509122, "percentage": 77.68, "elapsed_time": "1:18:01", "remaining_time": "0:22:24", "throughput": 8160.17, "total_tokens": 38204288}
|
|
{"current_steps": 12140, "total_steps": 15621, "loss": 0.3447, "lr": 2.8777627222406163e-07, "epoch": 0.7771589526918892, "percentage": 77.72, "elapsed_time": "1:18:02", "remaining_time": "0:22:22", "throughput": 8162.23, "total_tokens": 38219264}
|
|
{"current_steps": 12145, "total_steps": 15621, "loss": 0.3845, "lr": 2.869923775008943e-07, "epoch": 0.777479034632866, "percentage": 77.75, "elapsed_time": "1:18:03", "remaining_time": "0:22:20", "throughput": 8164.33, "total_tokens": 38234496}
|
|
{"current_steps": 12150, "total_steps": 15621, "loss": 0.2729, "lr": 2.862093729761155e-07, "epoch": 0.7777991165738429, "percentage": 77.78, "elapsed_time": "1:18:03", "remaining_time": "0:22:18", "throughput": 8166.66, "total_tokens": 38251072}
|
|
{"current_steps": 12155, "total_steps": 15621, "loss": 0.3971, "lr": 2.854272596273152e-07, "epoch": 0.7781191985148198, "percentage": 77.81, "elapsed_time": "1:18:04", "remaining_time": "0:22:15", "throughput": 8168.8, "total_tokens": 38266560}
|
|
{"current_steps": 12160, "total_steps": 15621, "loss": 0.331, "lr": 2.8464603843097134e-07, "epoch": 0.7784392804557967, "percentage": 77.84, "elapsed_time": "1:18:05", "remaining_time": "0:22:13", "throughput": 8171.09, "total_tokens": 38282944}
|
|
{"current_steps": 12165, "total_steps": 15621, "loss": 0.3274, "lr": 2.8386571036244764e-07, "epoch": 0.7787593623967736, "percentage": 77.88, "elapsed_time": "1:18:05", "remaining_time": "0:22:11", "throughput": 8173.38, "total_tokens": 38299264}
|
|
{"current_steps": 12170, "total_steps": 15621, "loss": 0.3866, "lr": 2.830862763959929e-07, "epoch": 0.7790794443377504, "percentage": 77.91, "elapsed_time": "1:18:06", "remaining_time": "0:22:08", "throughput": 8175.46, "total_tokens": 38314368}
|
|
{"current_steps": 12175, "total_steps": 15621, "loss": 0.3108, "lr": 2.8230773750473956e-07, "epoch": 0.7793995262787273, "percentage": 77.94, "elapsed_time": "1:18:07", "remaining_time": "0:22:06", "throughput": 8177.58, "total_tokens": 38329664}
|
|
{"current_steps": 12180, "total_steps": 15621, "loss": 0.3067, "lr": 2.8153009466070267e-07, "epoch": 0.7797196082197042, "percentage": 77.97, "elapsed_time": "1:18:07", "remaining_time": "0:22:04", "throughput": 8179.75, "total_tokens": 38345408}
|
|
{"current_steps": 12185, "total_steps": 15621, "loss": 0.2959, "lr": 2.807533488347783e-07, "epoch": 0.7800396901606811, "percentage": 78.0, "elapsed_time": "1:18:08", "remaining_time": "0:22:02", "throughput": 8182.19, "total_tokens": 38362688}
|
|
{"current_steps": 12190, "total_steps": 15621, "loss": 0.2508, "lr": 2.7997750099674277e-07, "epoch": 0.7803597721016581, "percentage": 78.04, "elapsed_time": "1:18:09", "remaining_time": "0:21:59", "throughput": 8184.24, "total_tokens": 38377600}
|
|
{"current_steps": 12195, "total_steps": 15621, "loss": 0.5263, "lr": 2.792025521152512e-07, "epoch": 0.780679854042635, "percentage": 78.07, "elapsed_time": "1:18:09", "remaining_time": "0:21:57", "throughput": 8186.3, "total_tokens": 38392640}
|
|
{"current_steps": 12200, "total_steps": 15621, "loss": 0.4457, "lr": 2.784285031578365e-07, "epoch": 0.7809999359836118, "percentage": 78.1, "elapsed_time": "1:18:10", "remaining_time": "0:21:55", "throughput": 8188.48, "total_tokens": 38408448}
|
|
{"current_steps": 12205, "total_steps": 15621, "loss": 0.3649, "lr": 2.7765535509090786e-07, "epoch": 0.7813200179245887, "percentage": 78.13, "elapsed_time": "1:18:11", "remaining_time": "0:21:53", "throughput": 8190.7, "total_tokens": 38424512}
|
|
{"current_steps": 12210, "total_steps": 15621, "loss": 0.4661, "lr": 2.768831088797495e-07, "epoch": 0.7816400998655656, "percentage": 78.16, "elapsed_time": "1:18:11", "remaining_time": "0:21:50", "throughput": 8192.73, "total_tokens": 38439296}
|
|
{"current_steps": 12215, "total_steps": 15621, "loss": 0.247, "lr": 2.761117654885201e-07, "epoch": 0.7819601818065425, "percentage": 78.2, "elapsed_time": "1:18:12", "remaining_time": "0:21:48", "throughput": 8194.97, "total_tokens": 38455424}
|
|
{"current_steps": 12220, "total_steps": 15621, "loss": 0.3314, "lr": 2.7534132588025063e-07, "epoch": 0.7822802637475194, "percentage": 78.23, "elapsed_time": "1:18:13", "remaining_time": "0:21:46", "throughput": 8197.11, "total_tokens": 38470976}
|
|
{"current_steps": 12225, "total_steps": 15621, "loss": 0.5088, "lr": 2.7457179101684483e-07, "epoch": 0.7826003456884962, "percentage": 78.26, "elapsed_time": "1:18:13", "remaining_time": "0:21:43", "throughput": 8199.18, "total_tokens": 38486016}
|
|
{"current_steps": 12230, "total_steps": 15621, "loss": 0.2958, "lr": 2.7380316185907506e-07, "epoch": 0.7829204276294731, "percentage": 78.29, "elapsed_time": "1:18:14", "remaining_time": "0:21:41", "throughput": 8201.27, "total_tokens": 38501248}
|
|
{"current_steps": 12235, "total_steps": 15621, "loss": 0.3508, "lr": 2.730354393665839e-07, "epoch": 0.78324050957045, "percentage": 78.32, "elapsed_time": "1:18:15", "remaining_time": "0:21:39", "throughput": 8203.44, "total_tokens": 38516992}
|
|
{"current_steps": 12240, "total_steps": 15621, "loss": 0.3871, "lr": 2.7226862449788245e-07, "epoch": 0.7835605915114269, "percentage": 78.36, "elapsed_time": "1:18:15", "remaining_time": "0:21:37", "throughput": 8205.41, "total_tokens": 38531456}
|
|
{"current_steps": 12245, "total_steps": 15621, "loss": 0.3283, "lr": 2.715027182103482e-07, "epoch": 0.7838806734524039, "percentage": 78.39, "elapsed_time": "1:18:16", "remaining_time": "0:21:34", "throughput": 8207.55, "total_tokens": 38546880}
|
|
{"current_steps": 12250, "total_steps": 15621, "loss": 0.3104, "lr": 2.707377214602232e-07, "epoch": 0.7842007553933807, "percentage": 78.42, "elapsed_time": "1:18:17", "remaining_time": "0:21:32", "throughput": 8209.65, "total_tokens": 38562176}
|
|
{"current_steps": 12255, "total_steps": 15621, "loss": 0.4304, "lr": 2.699736352026157e-07, "epoch": 0.7845208373343576, "percentage": 78.45, "elapsed_time": "1:18:17", "remaining_time": "0:21:30", "throughput": 8211.75, "total_tokens": 38577472}
|
|
{"current_steps": 12260, "total_steps": 15621, "loss": 0.3265, "lr": 2.6921046039149645e-07, "epoch": 0.7848409192753345, "percentage": 78.48, "elapsed_time": "1:18:18", "remaining_time": "0:21:28", "throughput": 8213.9, "total_tokens": 38593088}
|
|
{"current_steps": 12265, "total_steps": 15621, "loss": 0.3378, "lr": 2.6844819797969744e-07, "epoch": 0.7851610012163114, "percentage": 78.52, "elapsed_time": "1:18:19", "remaining_time": "0:21:25", "throughput": 8215.93, "total_tokens": 38607936}
|
|
{"current_steps": 12270, "total_steps": 15621, "loss": 0.2504, "lr": 2.6768684891891236e-07, "epoch": 0.7854810831572883, "percentage": 78.55, "elapsed_time": "1:18:19", "remaining_time": "0:21:23", "throughput": 8218.31, "total_tokens": 38625024}
|
|
{"current_steps": 12275, "total_steps": 15621, "loss": 0.3268, "lr": 2.6692641415969497e-07, "epoch": 0.7858011650982651, "percentage": 78.58, "elapsed_time": "1:18:20", "remaining_time": "0:21:21", "throughput": 8220.64, "total_tokens": 38641792}
|
|
{"current_steps": 12280, "total_steps": 15621, "loss": 0.4112, "lr": 2.66166894651457e-07, "epoch": 0.786121247039242, "percentage": 78.61, "elapsed_time": "1:18:21", "remaining_time": "0:21:19", "throughput": 8222.72, "total_tokens": 38656896}
|
|
{"current_steps": 12285, "total_steps": 15621, "loss": 0.343, "lr": 2.654082913424668e-07, "epoch": 0.7864413289802189, "percentage": 78.64, "elapsed_time": "1:18:21", "remaining_time": "0:21:16", "throughput": 8224.87, "total_tokens": 38672448}
|
|
{"current_steps": 12290, "total_steps": 15621, "loss": 0.305, "lr": 2.6465060517985003e-07, "epoch": 0.7867614109211958, "percentage": 78.68, "elapsed_time": "1:18:22", "remaining_time": "0:21:14", "throughput": 8227.1, "total_tokens": 38688576}
|
|
{"current_steps": 12295, "total_steps": 15621, "loss": 0.5196, "lr": 2.638938371095867e-07, "epoch": 0.7870814928621728, "percentage": 78.71, "elapsed_time": "1:18:23", "remaining_time": "0:21:12", "throughput": 8229.23, "total_tokens": 38704064}
|
|
{"current_steps": 12300, "total_steps": 15621, "loss": 0.3756, "lr": 2.6313798807651065e-07, "epoch": 0.7874015748031497, "percentage": 78.74, "elapsed_time": "1:18:23", "remaining_time": "0:21:10", "throughput": 8231.27, "total_tokens": 38718976}
|
|
{"current_steps": 12305, "total_steps": 15621, "loss": 0.3578, "lr": 2.6238305902430813e-07, "epoch": 0.7877216567441265, "percentage": 78.77, "elapsed_time": "1:18:24", "remaining_time": "0:21:07", "throughput": 8233.36, "total_tokens": 38734272}
|
|
{"current_steps": 12310, "total_steps": 15621, "loss": 0.3147, "lr": 2.61629050895517e-07, "epoch": 0.7880417386851034, "percentage": 78.8, "elapsed_time": "1:18:25", "remaining_time": "0:21:05", "throughput": 8235.44, "total_tokens": 38749504}
|
|
{"current_steps": 12315, "total_steps": 15621, "loss": 0.3237, "lr": 2.608759646315253e-07, "epoch": 0.7883618206260803, "percentage": 78.84, "elapsed_time": "1:18:25", "remaining_time": "0:21:03", "throughput": 8237.47, "total_tokens": 38764352}
|
|
{"current_steps": 12320, "total_steps": 15621, "loss": 0.3771, "lr": 2.6012380117257005e-07, "epoch": 0.7886819025670572, "percentage": 78.87, "elapsed_time": "1:18:26", "remaining_time": "0:21:01", "throughput": 8239.64, "total_tokens": 38780096}
|
|
{"current_steps": 12325, "total_steps": 15621, "loss": 0.3853, "lr": 2.5937256145773613e-07, "epoch": 0.789001984508034, "percentage": 78.9, "elapsed_time": "1:18:27", "remaining_time": "0:20:58", "throughput": 8241.78, "total_tokens": 38795712}
|
|
{"current_steps": 12330, "total_steps": 15621, "loss": 0.3191, "lr": 2.586222464249551e-07, "epoch": 0.7893220664490109, "percentage": 78.93, "elapsed_time": "1:18:27", "remaining_time": "0:20:56", "throughput": 8243.93, "total_tokens": 38811328}
|
|
{"current_steps": 12335, "total_steps": 15621, "loss": 0.2067, "lr": 2.5787285701100413e-07, "epoch": 0.7896421483899878, "percentage": 78.96, "elapsed_time": "1:18:28", "remaining_time": "0:20:54", "throughput": 8245.97, "total_tokens": 38826240}
|
|
{"current_steps": 12340, "total_steps": 15621, "loss": 0.3655, "lr": 2.571243941515048e-07, "epoch": 0.7899622303309647, "percentage": 79.0, "elapsed_time": "1:18:29", "remaining_time": "0:20:52", "throughput": 8248.23, "total_tokens": 38842624}
|
|
{"current_steps": 12345, "total_steps": 15621, "loss": 0.278, "lr": 2.563768587809213e-07, "epoch": 0.7902823122719416, "percentage": 79.03, "elapsed_time": "1:18:29", "remaining_time": "0:20:49", "throughput": 8250.24, "total_tokens": 38857472}
|
|
{"current_steps": 12350, "total_steps": 15621, "loss": 0.4174, "lr": 2.5563025183256137e-07, "epoch": 0.7906023942129186, "percentage": 79.06, "elapsed_time": "1:18:30", "remaining_time": "0:20:47", "throughput": 8252.25, "total_tokens": 38872256}
|
|
{"current_steps": 12355, "total_steps": 15621, "loss": 0.5513, "lr": 2.548845742385717e-07, "epoch": 0.7909224761538954, "percentage": 79.09, "elapsed_time": "1:18:31", "remaining_time": "0:20:45", "throughput": 8254.75, "total_tokens": 38890048}
|
|
{"current_steps": 12360, "total_steps": 15621, "loss": 0.2424, "lr": 2.541398269299393e-07, "epoch": 0.7912425580948723, "percentage": 79.12, "elapsed_time": "1:18:31", "remaining_time": "0:20:43", "throughput": 8256.9, "total_tokens": 38905664}
|
|
{"current_steps": 12365, "total_steps": 15621, "loss": 0.3106, "lr": 2.5339601083649063e-07, "epoch": 0.7915626400358492, "percentage": 79.16, "elapsed_time": "1:18:32", "remaining_time": "0:20:40", "throughput": 8259.8, "total_tokens": 38926144}
|
|
{"current_steps": 12370, "total_steps": 15621, "loss": 0.5144, "lr": 2.526531268868889e-07, "epoch": 0.7918827219768261, "percentage": 79.19, "elapsed_time": "1:18:33", "remaining_time": "0:20:38", "throughput": 8262.08, "total_tokens": 38942720}
|
|
{"current_steps": 12375, "total_steps": 15621, "loss": 0.3388, "lr": 2.5191117600863266e-07, "epoch": 0.792202803917803, "percentage": 79.22, "elapsed_time": "1:18:34", "remaining_time": "0:20:36", "throughput": 8264.19, "total_tokens": 38958144}
|
|
{"current_steps": 12380, "total_steps": 15621, "loss": 0.2559, "lr": 2.511701591280565e-07, "epoch": 0.7925228858587798, "percentage": 79.25, "elapsed_time": "1:18:34", "remaining_time": "0:20:34", "throughput": 8266.28, "total_tokens": 38973376}
|
|
{"current_steps": 12385, "total_steps": 15621, "loss": 0.3501, "lr": 2.504300771703295e-07, "epoch": 0.7928429677997567, "percentage": 79.28, "elapsed_time": "1:18:35", "remaining_time": "0:20:32", "throughput": 8268.5, "total_tokens": 38989504}
|
|
{"current_steps": 12390, "total_steps": 15621, "loss": 0.3819, "lr": 2.496909310594517e-07, "epoch": 0.7931630497407336, "percentage": 79.32, "elapsed_time": "1:18:36", "remaining_time": "0:20:29", "throughput": 8270.64, "total_tokens": 39005056}
|
|
{"current_steps": 12395, "total_steps": 15621, "loss": 0.4581, "lr": 2.4895272171825587e-07, "epoch": 0.7934831316817105, "percentage": 79.35, "elapsed_time": "1:18:36", "remaining_time": "0:20:27", "throughput": 8272.77, "total_tokens": 39020608}
|
|
{"current_steps": 12400, "total_steps": 15621, "loss": 0.4464, "lr": 2.482154500684055e-07, "epoch": 0.7938032136226874, "percentage": 79.38, "elapsed_time": "1:18:37", "remaining_time": "0:20:25", "throughput": 8274.82, "total_tokens": 39035712}
|
|
{"current_steps": 12405, "total_steps": 15621, "loss": 0.3431, "lr": 2.4747911703039293e-07, "epoch": 0.7941232955636643, "percentage": 79.41, "elapsed_time": "1:18:38", "remaining_time": "0:20:23", "throughput": 8276.89, "total_tokens": 39050880}
|
|
{"current_steps": 12410, "total_steps": 15621, "loss": 0.3737, "lr": 2.467437235235378e-07, "epoch": 0.7944433775046412, "percentage": 79.44, "elapsed_time": "1:18:38", "remaining_time": "0:20:20", "throughput": 8278.92, "total_tokens": 39065792}
|
|
{"current_steps": 12415, "total_steps": 15621, "loss": 0.3441, "lr": 2.460092704659883e-07, "epoch": 0.7947634594456181, "percentage": 79.48, "elapsed_time": "1:18:39", "remaining_time": "0:20:18", "throughput": 8281.0, "total_tokens": 39080960}
|
|
{"current_steps": 12420, "total_steps": 15621, "loss": 0.2641, "lr": 2.452757587747174e-07, "epoch": 0.795083541386595, "percentage": 79.51, "elapsed_time": "1:18:40", "remaining_time": "0:20:16", "throughput": 8283.23, "total_tokens": 39097216}
|
|
{"current_steps": 12425, "total_steps": 15621, "loss": 0.182, "lr": 2.445431893655232e-07, "epoch": 0.7954036233275719, "percentage": 79.54, "elapsed_time": "1:18:40", "remaining_time": "0:20:14", "throughput": 8285.42, "total_tokens": 39113152}
|
|
{"current_steps": 12430, "total_steps": 15621, "loss": 0.3652, "lr": 2.438115631530271e-07, "epoch": 0.7957237052685487, "percentage": 79.57, "elapsed_time": "1:18:41", "remaining_time": "0:20:12", "throughput": 8287.77, "total_tokens": 39130176}
|
|
{"current_steps": 12435, "total_steps": 15621, "loss": 0.2338, "lr": 2.4308088105067305e-07, "epoch": 0.7960437872095256, "percentage": 79.6, "elapsed_time": "1:18:42", "remaining_time": "0:20:09", "throughput": 8289.9, "total_tokens": 39145792}
|
|
{"current_steps": 12440, "total_steps": 15621, "loss": 0.4227, "lr": 2.423511439707262e-07, "epoch": 0.7963638691505025, "percentage": 79.64, "elapsed_time": "1:18:42", "remaining_time": "0:20:07", "throughput": 8292.02, "total_tokens": 39161280}
|
|
{"current_steps": 12445, "total_steps": 15621, "loss": 0.2807, "lr": 2.4162235282427177e-07, "epoch": 0.7966839510914794, "percentage": 79.67, "elapsed_time": "1:18:43", "remaining_time": "0:20:05", "throughput": 8294.09, "total_tokens": 39176512}
|
|
{"current_steps": 12450, "total_steps": 15621, "loss": 0.353, "lr": 2.408945085212144e-07, "epoch": 0.7970040330324563, "percentage": 79.7, "elapsed_time": "1:18:44", "remaining_time": "0:20:03", "throughput": 8296.16, "total_tokens": 39191808}
|
|
{"current_steps": 12455, "total_steps": 15621, "loss": 0.2507, "lr": 2.401676119702759e-07, "epoch": 0.7973241149734333, "percentage": 79.73, "elapsed_time": "1:18:44", "remaining_time": "0:20:01", "throughput": 8298.48, "total_tokens": 39208640}
|
|
{"current_steps": 12460, "total_steps": 15621, "loss": 0.3667, "lr": 2.394416640789952e-07, "epoch": 0.7976441969144101, "percentage": 79.76, "elapsed_time": "1:18:45", "remaining_time": "0:19:58", "throughput": 8300.45, "total_tokens": 39223232}
|
|
{"current_steps": 12465, "total_steps": 15621, "loss": 0.3149, "lr": 2.3871666575372696e-07, "epoch": 0.797964278855387, "percentage": 79.8, "elapsed_time": "1:18:46", "remaining_time": "0:19:56", "throughput": 8302.54, "total_tokens": 39238656}
|
|
{"current_steps": 12470, "total_steps": 15621, "loss": 0.5348, "lr": 2.3799261789963964e-07, "epoch": 0.7982843607963639, "percentage": 79.83, "elapsed_time": "1:18:46", "remaining_time": "0:19:54", "throughput": 8304.93, "total_tokens": 39255872}
|
|
{"current_steps": 12475, "total_steps": 15621, "loss": 0.269, "lr": 2.3726952142071644e-07, "epoch": 0.7986044427373408, "percentage": 79.86, "elapsed_time": "1:18:47", "remaining_time": "0:19:52", "throughput": 8306.93, "total_tokens": 39270784}
|
|
{"current_steps": 12480, "total_steps": 15621, "loss": 0.3524, "lr": 2.365473772197508e-07, "epoch": 0.7989245246783176, "percentage": 79.89, "elapsed_time": "1:18:48", "remaining_time": "0:19:49", "throughput": 8309.02, "total_tokens": 39286080}
|
|
{"current_steps": 12485, "total_steps": 15621, "loss": 0.3557, "lr": 2.3582618619834883e-07, "epoch": 0.7992446066192945, "percentage": 79.92, "elapsed_time": "1:18:48", "remaining_time": "0:19:47", "throughput": 8311.1, "total_tokens": 39301312}
|
|
{"current_steps": 12490, "total_steps": 15621, "loss": 0.2214, "lr": 2.3510594925692528e-07, "epoch": 0.7995646885602714, "percentage": 79.96, "elapsed_time": "1:18:49", "remaining_time": "0:19:45", "throughput": 8313.19, "total_tokens": 39316736}
|
|
{"current_steps": 12495, "total_steps": 15621, "loss": 0.3518, "lr": 2.343866672947057e-07, "epoch": 0.7998847705012483, "percentage": 79.99, "elapsed_time": "1:18:50", "remaining_time": "0:19:43", "throughput": 8315.15, "total_tokens": 39331264}
|
|
{"current_steps": 12500, "total_steps": 15621, "loss": 0.2711, "lr": 2.336683412097209e-07, "epoch": 0.8002048524422252, "percentage": 80.02, "elapsed_time": "1:18:50", "remaining_time": "0:19:41", "throughput": 8317.12, "total_tokens": 39345856}
|
|
{"current_steps": 12505, "total_steps": 15621, "loss": 0.3662, "lr": 2.329509718988095e-07, "epoch": 0.800524934383202, "percentage": 80.05, "elapsed_time": "1:18:51", "remaining_time": "0:19:38", "throughput": 8319.22, "total_tokens": 39361280}
|
|
{"current_steps": 12510, "total_steps": 15621, "loss": 0.3395, "lr": 2.3223456025761645e-07, "epoch": 0.800845016324179, "percentage": 80.08, "elapsed_time": "1:18:52", "remaining_time": "0:19:36", "throughput": 8321.18, "total_tokens": 39375872}
|
|
{"current_steps": 12512, "total_steps": 15621, "eval_loss": 0.3655269742012024, "epoch": 0.8009730491005698, "percentage": 80.1, "elapsed_time": "1:19:42", "remaining_time": "0:19:48", "throughput": 8234.01, "total_tokens": 39382144}
|
|
{"current_steps": 12515, "total_steps": 15621, "loss": 0.3043, "lr": 2.315191071805892e-07, "epoch": 0.8011650982651559, "percentage": 80.12, "elapsed_time": "1:23:39", "remaining_time": "0:20:45", "throughput": 7847.49, "total_tokens": 39392320}
|
|
{"current_steps": 12520, "total_steps": 15621, "loss": 0.3619, "lr": 2.3080461356097937e-07, "epoch": 0.8014851802061328, "percentage": 80.15, "elapsed_time": "1:23:40", "remaining_time": "0:20:43", "throughput": 7849.52, "total_tokens": 39407680}
|
|
{"current_steps": 12525, "total_steps": 15621, "loss": 0.2951, "lr": 2.30091080290841e-07, "epoch": 0.8018052621471097, "percentage": 80.18, "elapsed_time": "1:23:41", "remaining_time": "0:20:41", "throughput": 7851.76, "total_tokens": 39424512}
|
|
{"current_steps": 12530, "total_steps": 15621, "loss": 0.3417, "lr": 2.29378508261029e-07, "epoch": 0.8021253440880866, "percentage": 80.21, "elapsed_time": "1:23:41", "remaining_time": "0:20:38", "throughput": 7853.69, "total_tokens": 39439296}
|
|
{"current_steps": 12535, "total_steps": 15621, "loss": 0.3672, "lr": 2.2866689836119702e-07, "epoch": 0.8024454260290634, "percentage": 80.24, "elapsed_time": "1:23:42", "remaining_time": "0:20:36", "throughput": 7856.0, "total_tokens": 39456576}
|
|
{"current_steps": 12540, "total_steps": 15621, "loss": 0.3553, "lr": 2.2795625147979913e-07, "epoch": 0.8027655079700403, "percentage": 80.28, "elapsed_time": "1:23:43", "remaining_time": "0:20:34", "throughput": 7858.13, "total_tokens": 39472512}
|
|
{"current_steps": 12545, "total_steps": 15621, "loss": 0.2351, "lr": 2.2724656850408597e-07, "epoch": 0.8030855899110172, "percentage": 80.31, "elapsed_time": "1:23:43", "remaining_time": "0:20:31", "throughput": 7860.19, "total_tokens": 39488192}
|
|
{"current_steps": 12550, "total_steps": 15621, "loss": 0.3808, "lr": 2.2653785032010532e-07, "epoch": 0.8034056718519941, "percentage": 80.34, "elapsed_time": "1:23:44", "remaining_time": "0:20:29", "throughput": 7862.2, "total_tokens": 39503552}
|
|
{"current_steps": 12555, "total_steps": 15621, "loss": 0.3368, "lr": 2.258300978126999e-07, "epoch": 0.803725753792971, "percentage": 80.37, "elapsed_time": "1:23:45", "remaining_time": "0:20:27", "throughput": 7864.35, "total_tokens": 39519744}
|
|
{"current_steps": 12560, "total_steps": 15621, "loss": 0.4903, "lr": 2.2512331186550715e-07, "epoch": 0.804045835733948, "percentage": 80.4, "elapsed_time": "1:23:45", "remaining_time": "0:20:24", "throughput": 7866.38, "total_tokens": 39535232}
|
|
{"current_steps": 12565, "total_steps": 15621, "loss": 0.3867, "lr": 2.244174933609575e-07, "epoch": 0.8043659176749248, "percentage": 80.44, "elapsed_time": "1:23:46", "remaining_time": "0:20:22", "throughput": 7868.25, "total_tokens": 39549568}
|
|
{"current_steps": 12570, "total_steps": 15621, "loss": 0.2726, "lr": 2.2371264318027383e-07, "epoch": 0.8046859996159017, "percentage": 80.47, "elapsed_time": "1:23:47", "remaining_time": "0:20:20", "throughput": 7870.44, "total_tokens": 39566016}
|
|
{"current_steps": 12575, "total_steps": 15621, "loss": 0.2337, "lr": 2.2300876220346975e-07, "epoch": 0.8050060815568786, "percentage": 80.5, "elapsed_time": "1:23:47", "remaining_time": "0:20:17", "throughput": 7872.52, "total_tokens": 39581760}
|
|
{"current_steps": 12580, "total_steps": 15621, "loss": 0.2888, "lr": 2.2230585130934897e-07, "epoch": 0.8053261634978555, "percentage": 80.53, "elapsed_time": "1:23:48", "remaining_time": "0:20:15", "throughput": 7874.65, "total_tokens": 39597888}
|
|
{"current_steps": 12585, "total_steps": 15621, "loss": 0.4469, "lr": 2.2160391137550394e-07, "epoch": 0.8056462454388323, "percentage": 80.56, "elapsed_time": "1:23:49", "remaining_time": "0:20:13", "throughput": 7876.71, "total_tokens": 39613568}
|
|
{"current_steps": 12590, "total_steps": 15621, "loss": 0.4226, "lr": 2.2090294327831494e-07, "epoch": 0.8059663273798092, "percentage": 80.6, "elapsed_time": "1:23:49", "remaining_time": "0:20:10", "throughput": 7878.61, "total_tokens": 39628096}
|
|
{"current_steps": 12595, "total_steps": 15621, "loss": 0.2881, "lr": 2.202029478929488e-07, "epoch": 0.8062864093207861, "percentage": 80.63, "elapsed_time": "1:23:50", "remaining_time": "0:20:08", "throughput": 7880.48, "total_tokens": 39642560}
|
|
{"current_steps": 12600, "total_steps": 15621, "loss": 0.2958, "lr": 2.195039260933581e-07, "epoch": 0.806606491261763, "percentage": 80.66, "elapsed_time": "1:23:51", "remaining_time": "0:20:06", "throughput": 7882.53, "total_tokens": 39658112}
|
|
{"current_steps": 12605, "total_steps": 15621, "loss": 0.2724, "lr": 2.1880587875227973e-07, "epoch": 0.8069265732027399, "percentage": 80.69, "elapsed_time": "1:23:51", "remaining_time": "0:20:03", "throughput": 7884.65, "total_tokens": 39674112}
|
|
{"current_steps": 12610, "total_steps": 15621, "loss": 0.3308, "lr": 2.18108806741234e-07, "epoch": 0.8072466551437167, "percentage": 80.72, "elapsed_time": "1:23:52", "remaining_time": "0:20:01", "throughput": 7886.81, "total_tokens": 39690432}
|
|
{"current_steps": 12615, "total_steps": 15621, "loss": 0.3547, "lr": 2.1741271093052315e-07, "epoch": 0.8075667370846937, "percentage": 80.76, "elapsed_time": "1:23:53", "remaining_time": "0:19:59", "throughput": 7888.83, "total_tokens": 39705792}
|
|
{"current_steps": 12620, "total_steps": 15621, "loss": 0.4658, "lr": 2.167175921892318e-07, "epoch": 0.8078868190256706, "percentage": 80.79, "elapsed_time": "1:23:53", "remaining_time": "0:19:57", "throughput": 7890.97, "total_tokens": 39722048}
|
|
{"current_steps": 12625, "total_steps": 15621, "loss": 0.4219, "lr": 2.1602345138522314e-07, "epoch": 0.8082069009666475, "percentage": 80.82, "elapsed_time": "1:23:54", "remaining_time": "0:19:54", "throughput": 7893.12, "total_tokens": 39738304}
|
|
{"current_steps": 12630, "total_steps": 15621, "loss": 0.3551, "lr": 2.1533028938514008e-07, "epoch": 0.8085269829076244, "percentage": 80.85, "elapsed_time": "1:23:55", "remaining_time": "0:19:52", "throughput": 7895.14, "total_tokens": 39753728}
|
|
{"current_steps": 12635, "total_steps": 15621, "loss": 0.3441, "lr": 2.1463810705440433e-07, "epoch": 0.8088470648486012, "percentage": 80.88, "elapsed_time": "1:23:55", "remaining_time": "0:19:50", "throughput": 7897.22, "total_tokens": 39769600}
|
|
{"current_steps": 12640, "total_steps": 15621, "loss": 0.3571, "lr": 2.139469052572127e-07, "epoch": 0.8091671467895781, "percentage": 80.92, "elapsed_time": "1:23:56", "remaining_time": "0:19:47", "throughput": 7899.09, "total_tokens": 39784000}
|
|
{"current_steps": 12645, "total_steps": 15621, "loss": 0.3587, "lr": 2.1325668485653891e-07, "epoch": 0.809487228730555, "percentage": 80.95, "elapsed_time": "1:23:57", "remaining_time": "0:19:45", "throughput": 7901.23, "total_tokens": 39800320}
|
|
{"current_steps": 12650, "total_steps": 15621, "loss": 0.4617, "lr": 2.1256744671413173e-07, "epoch": 0.8098073106715319, "percentage": 80.98, "elapsed_time": "1:23:57", "remaining_time": "0:19:43", "throughput": 7903.2, "total_tokens": 39815360}
|
|
{"current_steps": 12655, "total_steps": 15621, "loss": 0.3819, "lr": 2.1187919169051316e-07, "epoch": 0.8101273926125088, "percentage": 81.01, "elapsed_time": "1:23:58", "remaining_time": "0:19:40", "throughput": 7905.08, "total_tokens": 39829952}
|
|
{"current_steps": 12660, "total_steps": 15621, "loss": 0.3505, "lr": 2.111919206449767e-07, "epoch": 0.8104474745534856, "percentage": 81.04, "elapsed_time": "1:23:59", "remaining_time": "0:19:38", "throughput": 7907.1, "total_tokens": 39845376}
|
|
{"current_steps": 12665, "total_steps": 15621, "loss": 0.4955, "lr": 2.1050563443558922e-07, "epoch": 0.8107675564944626, "percentage": 81.08, "elapsed_time": "1:23:59", "remaining_time": "0:19:36", "throughput": 7909.25, "total_tokens": 39861696}
|
|
{"current_steps": 12670, "total_steps": 15621, "loss": 0.3, "lr": 2.0982033391918697e-07, "epoch": 0.8110876384354395, "percentage": 81.11, "elapsed_time": "1:24:00", "remaining_time": "0:19:34", "throughput": 7911.32, "total_tokens": 39877440}
|
|
{"current_steps": 12675, "total_steps": 15621, "loss": 0.3292, "lr": 2.0913601995137543e-07, "epoch": 0.8114077203764164, "percentage": 81.14, "elapsed_time": "1:24:01", "remaining_time": "0:19:31", "throughput": 7913.46, "total_tokens": 39893760}
|
|
{"current_steps": 12680, "total_steps": 15621, "loss": 0.2889, "lr": 2.084526933865287e-07, "epoch": 0.8117278023173933, "percentage": 81.17, "elapsed_time": "1:24:01", "remaining_time": "0:19:29", "throughput": 7915.54, "total_tokens": 39909568}
|
|
{"current_steps": 12685, "total_steps": 15621, "loss": 0.4667, "lr": 2.0777035507778817e-07, "epoch": 0.8120478842583702, "percentage": 81.2, "elapsed_time": "1:24:02", "remaining_time": "0:19:27", "throughput": 7917.35, "total_tokens": 39923648}
|
|
{"current_steps": 12690, "total_steps": 15621, "loss": 0.4268, "lr": 2.0708900587706135e-07, "epoch": 0.812367966199347, "percentage": 81.24, "elapsed_time": "1:24:03", "remaining_time": "0:19:24", "throughput": 7919.34, "total_tokens": 39939008}
|
|
{"current_steps": 12695, "total_steps": 15621, "loss": 0.3356, "lr": 2.0640864663502e-07, "epoch": 0.8126880481403239, "percentage": 81.27, "elapsed_time": "1:24:03", "remaining_time": "0:19:22", "throughput": 7921.44, "total_tokens": 39955072}
|
|
{"current_steps": 12700, "total_steps": 15621, "loss": 0.4563, "lr": 2.057292782011013e-07, "epoch": 0.8130081300813008, "percentage": 81.3, "elapsed_time": "1:24:04", "remaining_time": "0:19:20", "throughput": 7923.51, "total_tokens": 39970880}
|
|
{"current_steps": 12705, "total_steps": 15621, "loss": 0.3045, "lr": 2.0505090142350468e-07, "epoch": 0.8133282120222777, "percentage": 81.33, "elapsed_time": "1:24:05", "remaining_time": "0:19:17", "throughput": 7925.5, "total_tokens": 39986240}
|
|
{"current_steps": 12710, "total_steps": 15621, "loss": 0.3426, "lr": 2.0437351714919127e-07, "epoch": 0.8136482939632546, "percentage": 81.36, "elapsed_time": "1:24:05", "remaining_time": "0:19:15", "throughput": 7927.54, "total_tokens": 40001856}
|
|
{"current_steps": 12715, "total_steps": 15621, "loss": 0.3084, "lr": 2.0369712622388336e-07, "epoch": 0.8139683759042314, "percentage": 81.4, "elapsed_time": "1:24:06", "remaining_time": "0:19:13", "throughput": 7929.68, "total_tokens": 40018112}
|
|
{"current_steps": 12720, "total_steps": 15621, "loss": 0.2869, "lr": 2.0302172949206298e-07, "epoch": 0.8142884578452084, "percentage": 81.43, "elapsed_time": "1:24:07", "remaining_time": "0:19:11", "throughput": 7931.69, "total_tokens": 40033664}
|
|
{"current_steps": 12725, "total_steps": 15621, "loss": 0.3069, "lr": 2.0234732779697094e-07, "epoch": 0.8146085397861853, "percentage": 81.46, "elapsed_time": "1:24:07", "remaining_time": "0:19:08", "throughput": 7933.65, "total_tokens": 40048768}
|
|
{"current_steps": 12730, "total_steps": 15621, "loss": 0.3267, "lr": 2.016739219806056e-07, "epoch": 0.8149286217271622, "percentage": 81.49, "elapsed_time": "1:24:08", "remaining_time": "0:19:06", "throughput": 7935.51, "total_tokens": 40063232}
|
|
{"current_steps": 12735, "total_steps": 15621, "loss": 0.3839, "lr": 2.0100151288372215e-07, "epoch": 0.8152487036681391, "percentage": 81.52, "elapsed_time": "1:24:09", "remaining_time": "0:19:04", "throughput": 7937.61, "total_tokens": 40079296}
|
|
{"current_steps": 12740, "total_steps": 15621, "loss": 0.5609, "lr": 2.0033010134583084e-07, "epoch": 0.8155687856091159, "percentage": 81.56, "elapsed_time": "1:24:09", "remaining_time": "0:19:01", "throughput": 7939.66, "total_tokens": 40094976}
|
|
{"current_steps": 12745, "total_steps": 15621, "loss": 0.314, "lr": 1.9965968820519763e-07, "epoch": 0.8158888675500928, "percentage": 81.59, "elapsed_time": "1:24:10", "remaining_time": "0:18:59", "throughput": 7941.68, "total_tokens": 40110464}
|
|
{"current_steps": 12750, "total_steps": 15621, "loss": 0.4042, "lr": 1.9899027429884042e-07, "epoch": 0.8162089494910697, "percentage": 81.62, "elapsed_time": "1:24:11", "remaining_time": "0:18:57", "throughput": 7943.63, "total_tokens": 40125568}
|
|
{"current_steps": 12755, "total_steps": 15621, "loss": 0.4302, "lr": 1.983218604625305e-07, "epoch": 0.8165290314320466, "percentage": 81.65, "elapsed_time": "1:24:11", "remaining_time": "0:18:55", "throughput": 7945.71, "total_tokens": 40141440}
|
|
{"current_steps": 12760, "total_steps": 15621, "loss": 0.3288, "lr": 1.9765444753079096e-07, "epoch": 0.8168491133730235, "percentage": 81.68, "elapsed_time": "1:24:12", "remaining_time": "0:18:52", "throughput": 7947.64, "total_tokens": 40156416}
|
|
{"current_steps": 12765, "total_steps": 15621, "loss": 0.3985, "lr": 1.9698803633689408e-07, "epoch": 0.8171691953140003, "percentage": 81.72, "elapsed_time": "1:24:13", "remaining_time": "0:18:50", "throughput": 7949.8, "total_tokens": 40172928}
|
|
{"current_steps": 12770, "total_steps": 15621, "loss": 0.2404, "lr": 1.963226277128619e-07, "epoch": 0.8174892772549772, "percentage": 81.75, "elapsed_time": "1:24:13", "remaining_time": "0:18:48", "throughput": 7951.77, "total_tokens": 40188096}
|
|
{"current_steps": 12775, "total_steps": 15621, "loss": 0.3559, "lr": 1.956582224894655e-07, "epoch": 0.8178093591959542, "percentage": 81.78, "elapsed_time": "1:24:14", "remaining_time": "0:18:46", "throughput": 7953.85, "total_tokens": 40204032}
|
|
{"current_steps": 12780, "total_steps": 15621, "loss": 0.369, "lr": 1.949948214962227e-07, "epoch": 0.8181294411369311, "percentage": 81.81, "elapsed_time": "1:24:15", "remaining_time": "0:18:43", "throughput": 7955.78, "total_tokens": 40218944}
|
|
{"current_steps": 12785, "total_steps": 15621, "loss": 0.358, "lr": 1.943324255613964e-07, "epoch": 0.818449523077908, "percentage": 81.84, "elapsed_time": "1:24:16", "remaining_time": "0:18:41", "throughput": 7957.94, "total_tokens": 40235456}
|
|
{"current_steps": 12790, "total_steps": 15621, "loss": 0.4564, "lr": 1.936710355119967e-07, "epoch": 0.8187696050188848, "percentage": 81.88, "elapsed_time": "1:24:16", "remaining_time": "0:18:39", "throughput": 7959.83, "total_tokens": 40250176}
|
|
{"current_steps": 12795, "total_steps": 15621, "loss": 0.3312, "lr": 1.9301065217377655e-07, "epoch": 0.8190896869598617, "percentage": 81.91, "elapsed_time": "1:24:17", "remaining_time": "0:18:36", "throughput": 7961.81, "total_tokens": 40265472}
|
|
{"current_steps": 12800, "total_steps": 15621, "loss": 0.3995, "lr": 1.9235127637123249e-07, "epoch": 0.8194097689008386, "percentage": 81.94, "elapsed_time": "1:24:18", "remaining_time": "0:18:34", "throughput": 7963.93, "total_tokens": 40281728}
|
|
{"current_steps": 12805, "total_steps": 15621, "loss": 0.3221, "lr": 1.9169290892760225e-07, "epoch": 0.8197298508418155, "percentage": 81.97, "elapsed_time": "1:24:18", "remaining_time": "0:18:32", "throughput": 7965.87, "total_tokens": 40296768}
|
|
{"current_steps": 12810, "total_steps": 15621, "loss": 0.3295, "lr": 1.91035550664866e-07, "epoch": 0.8200499327827924, "percentage": 82.0, "elapsed_time": "1:24:19", "remaining_time": "0:18:30", "throughput": 7967.74, "total_tokens": 40311488}
|
|
{"current_steps": 12815, "total_steps": 15621, "loss": 0.3238, "lr": 1.903792024037433e-07, "epoch": 0.8203700147237692, "percentage": 82.04, "elapsed_time": "1:24:20", "remaining_time": "0:18:27", "throughput": 7969.78, "total_tokens": 40327232}
|
|
{"current_steps": 12820, "total_steps": 15621, "loss": 0.4338, "lr": 1.8972386496369185e-07, "epoch": 0.8206900966647461, "percentage": 82.07, "elapsed_time": "1:24:20", "remaining_time": "0:18:25", "throughput": 7971.99, "total_tokens": 40344064}
|
|
{"current_steps": 12825, "total_steps": 15621, "loss": 0.3917, "lr": 1.89069539162909e-07, "epoch": 0.8210101786057231, "percentage": 82.1, "elapsed_time": "1:24:21", "remaining_time": "0:18:23", "throughput": 7973.92, "total_tokens": 40359040}
|
|
{"current_steps": 12830, "total_steps": 15621, "loss": 0.4034, "lr": 1.8841622581832783e-07, "epoch": 0.8213302605467, "percentage": 82.13, "elapsed_time": "1:24:22", "remaining_time": "0:18:21", "throughput": 7976.21, "total_tokens": 40376384}
|
|
{"current_steps": 12835, "total_steps": 15621, "loss": 0.5928, "lr": 1.8776392574561783e-07, "epoch": 0.8216503424876769, "percentage": 82.17, "elapsed_time": "1:24:22", "remaining_time": "0:18:18", "throughput": 7978.23, "total_tokens": 40391936}
|
|
{"current_steps": 12840, "total_steps": 15621, "loss": 0.4702, "lr": 1.8711263975918322e-07, "epoch": 0.8219704244286538, "percentage": 82.2, "elapsed_time": "1:24:23", "remaining_time": "0:18:16", "throughput": 7980.46, "total_tokens": 40408832}
|
|
{"current_steps": 12845, "total_steps": 15621, "loss": 0.4516, "lr": 1.8646236867216215e-07, "epoch": 0.8222905063696306, "percentage": 82.23, "elapsed_time": "1:24:24", "remaining_time": "0:18:14", "throughput": 7982.61, "total_tokens": 40425280}
|
|
{"current_steps": 12850, "total_steps": 15621, "loss": 0.3451, "lr": 1.8581311329642591e-07, "epoch": 0.8226105883106075, "percentage": 82.26, "elapsed_time": "1:24:24", "remaining_time": "0:18:12", "throughput": 7984.62, "total_tokens": 40440832}
|
|
{"current_steps": 12855, "total_steps": 15621, "loss": 0.2711, "lr": 1.8516487444257723e-07, "epoch": 0.8229306702515844, "percentage": 82.29, "elapsed_time": "1:24:25", "remaining_time": "0:18:09", "throughput": 7986.98, "total_tokens": 40458624}
|
|
{"current_steps": 12860, "total_steps": 15621, "loss": 0.4068, "lr": 1.8451765291995004e-07, "epoch": 0.8232507521925613, "percentage": 82.33, "elapsed_time": "1:24:26", "remaining_time": "0:18:07", "throughput": 7989.08, "total_tokens": 40474688}
|
|
{"current_steps": 12865, "total_steps": 15621, "loss": 0.3591, "lr": 1.8387144953660806e-07, "epoch": 0.8235708341335382, "percentage": 82.36, "elapsed_time": "1:24:26", "remaining_time": "0:18:05", "throughput": 7991.19, "total_tokens": 40490816}
|
|
{"current_steps": 12870, "total_steps": 15621, "loss": 0.4492, "lr": 1.832262650993437e-07, "epoch": 0.823890916074515, "percentage": 82.39, "elapsed_time": "1:24:27", "remaining_time": "0:18:03", "throughput": 7993.16, "total_tokens": 40506112}
|
|
{"current_steps": 12875, "total_steps": 15621, "loss": 0.2973, "lr": 1.825821004136774e-07, "epoch": 0.8242109980154919, "percentage": 82.42, "elapsed_time": "1:24:28", "remaining_time": "0:18:00", "throughput": 7995.13, "total_tokens": 40521344}
|
|
{"current_steps": 12880, "total_steps": 15621, "loss": 0.2799, "lr": 1.819389562838559e-07, "epoch": 0.8245310799564689, "percentage": 82.45, "elapsed_time": "1:24:28", "remaining_time": "0:17:58", "throughput": 7997.17, "total_tokens": 40537024}
|
|
{"current_steps": 12885, "total_steps": 15621, "loss": 0.3058, "lr": 1.8129683351285319e-07, "epoch": 0.8248511618974458, "percentage": 82.49, "elapsed_time": "1:24:29", "remaining_time": "0:17:56", "throughput": 7999.2, "total_tokens": 40552640}
|
|
{"current_steps": 12890, "total_steps": 15621, "loss": 0.3209, "lr": 1.8065573290236626e-07, "epoch": 0.8251712438384227, "percentage": 82.52, "elapsed_time": "1:24:30", "remaining_time": "0:17:54", "throughput": 8001.18, "total_tokens": 40568000}
|
|
{"current_steps": 12895, "total_steps": 15621, "loss": 0.3806, "lr": 1.8001565525281682e-07, "epoch": 0.8254913257793995, "percentage": 82.55, "elapsed_time": "1:24:30", "remaining_time": "0:17:52", "throughput": 8003.4, "total_tokens": 40584960}
|
|
{"current_steps": 12900, "total_steps": 15621, "loss": 0.3707, "lr": 1.793766013633493e-07, "epoch": 0.8258114077203764, "percentage": 82.58, "elapsed_time": "1:24:31", "remaining_time": "0:17:49", "throughput": 8005.43, "total_tokens": 40600704}
|
|
{"current_steps": 12905, "total_steps": 15621, "loss": 0.3865, "lr": 1.7873857203183074e-07, "epoch": 0.8261314896613533, "percentage": 82.61, "elapsed_time": "1:24:32", "remaining_time": "0:17:47", "throughput": 8007.39, "total_tokens": 40615872}
|
|
{"current_steps": 12910, "total_steps": 15621, "loss": 0.4632, "lr": 1.7810156805484733e-07, "epoch": 0.8264515716023302, "percentage": 82.65, "elapsed_time": "1:24:32", "remaining_time": "0:17:45", "throughput": 8009.61, "total_tokens": 40632640}
|
|
{"current_steps": 12915, "total_steps": 15621, "loss": 0.3007, "lr": 1.7746559022770612e-07, "epoch": 0.8267716535433071, "percentage": 82.68, "elapsed_time": "1:24:33", "remaining_time": "0:17:43", "throughput": 8011.6, "total_tokens": 40648064}
|
|
{"current_steps": 12920, "total_steps": 15621, "loss": 0.3833, "lr": 1.7683063934443342e-07, "epoch": 0.8270917354842839, "percentage": 82.71, "elapsed_time": "1:24:34", "remaining_time": "0:17:40", "throughput": 8013.78, "total_tokens": 40664704}
|
|
{"current_steps": 12925, "total_steps": 15621, "loss": 0.4074, "lr": 1.7619671619777277e-07, "epoch": 0.8274118174252608, "percentage": 82.74, "elapsed_time": "1:24:35", "remaining_time": "0:17:38", "throughput": 8015.9, "total_tokens": 40681024}
|
|
{"current_steps": 12930, "total_steps": 15621, "loss": 0.4121, "lr": 1.7556382157918404e-07, "epoch": 0.8277318993662378, "percentage": 82.77, "elapsed_time": "1:24:35", "remaining_time": "0:17:36", "throughput": 8017.81, "total_tokens": 40695936}
|
|
{"current_steps": 12935, "total_steps": 15621, "loss": 0.3177, "lr": 1.7493195627884427e-07, "epoch": 0.8280519813072147, "percentage": 82.81, "elapsed_time": "1:24:36", "remaining_time": "0:17:34", "throughput": 8020.11, "total_tokens": 40713472}
|
|
{"current_steps": 12940, "total_steps": 15621, "loss": 0.3141, "lr": 1.7430112108564465e-07, "epoch": 0.8283720632481916, "percentage": 82.84, "elapsed_time": "1:24:37", "remaining_time": "0:17:31", "throughput": 8022.16, "total_tokens": 40729344}
|
|
{"current_steps": 12945, "total_steps": 15621, "loss": 0.3983, "lr": 1.736713167871896e-07, "epoch": 0.8286921451891684, "percentage": 82.87, "elapsed_time": "1:24:37", "remaining_time": "0:17:29", "throughput": 8024.3, "total_tokens": 40745856}
|
|
{"current_steps": 12950, "total_steps": 15621, "loss": 0.2973, "lr": 1.7304254416979803e-07, "epoch": 0.8290122271301453, "percentage": 82.9, "elapsed_time": "1:24:38", "remaining_time": "0:17:27", "throughput": 8026.39, "total_tokens": 40761920}
|
|
{"current_steps": 12955, "total_steps": 15621, "loss": 0.263, "lr": 1.7241480401849963e-07, "epoch": 0.8293323090711222, "percentage": 82.93, "elapsed_time": "1:24:39", "remaining_time": "0:17:25", "throughput": 8028.33, "total_tokens": 40776960}
|
|
{"current_steps": 12960, "total_steps": 15621, "loss": 0.3413, "lr": 1.7178809711703524e-07, "epoch": 0.8296523910120991, "percentage": 82.97, "elapsed_time": "1:24:39", "remaining_time": "0:17:23", "throughput": 8030.28, "total_tokens": 40792192}
|
|
{"current_steps": 12965, "total_steps": 15621, "loss": 0.36, "lr": 1.7116242424785599e-07, "epoch": 0.829972472953076, "percentage": 83.0, "elapsed_time": "1:24:40", "remaining_time": "0:17:20", "throughput": 8032.36, "total_tokens": 40808256}
|
|
{"current_steps": 12970, "total_steps": 15621, "loss": 0.4272, "lr": 1.7053778619212166e-07, "epoch": 0.8302925548940528, "percentage": 83.03, "elapsed_time": "1:24:41", "remaining_time": "0:17:18", "throughput": 8034.3, "total_tokens": 40823424}
|
|
{"current_steps": 12975, "total_steps": 15621, "loss": 0.4132, "lr": 1.6991418372970022e-07, "epoch": 0.8306126368350297, "percentage": 83.06, "elapsed_time": "1:24:41", "remaining_time": "0:17:16", "throughput": 8036.6, "total_tokens": 40840960}
|
|
{"current_steps": 12980, "total_steps": 15621, "loss": 0.3849, "lr": 1.6929161763916666e-07, "epoch": 0.8309327187760066, "percentage": 83.09, "elapsed_time": "1:24:42", "remaining_time": "0:17:14", "throughput": 8038.76, "total_tokens": 40857536}
|
|
{"current_steps": 12985, "total_steps": 15621, "loss": 0.3582, "lr": 1.686700886978021e-07, "epoch": 0.8312528007169836, "percentage": 83.13, "elapsed_time": "1:24:43", "remaining_time": "0:17:11", "throughput": 8040.94, "total_tokens": 40874240}
|
|
{"current_steps": 12990, "total_steps": 15621, "loss": 0.3579, "lr": 1.6804959768159266e-07, "epoch": 0.8315728826579605, "percentage": 83.16, "elapsed_time": "1:24:43", "remaining_time": "0:17:09", "throughput": 8042.81, "total_tokens": 40888960}
|
|
{"current_steps": 12995, "total_steps": 15621, "loss": 0.5373, "lr": 1.674301453652287e-07, "epoch": 0.8318929645989374, "percentage": 83.19, "elapsed_time": "1:24:44", "remaining_time": "0:17:07", "throughput": 8044.8, "total_tokens": 40904512}
|
|
{"current_steps": 13000, "total_steps": 15621, "loss": 0.2969, "lr": 1.6681173252210378e-07, "epoch": 0.8322130465399142, "percentage": 83.22, "elapsed_time": "1:24:45", "remaining_time": "0:17:05", "throughput": 8047.07, "total_tokens": 40921856}
|
|
{"current_steps": 13005, "total_steps": 15621, "loss": 0.3801, "lr": 1.6619435992431342e-07, "epoch": 0.8325331284808911, "percentage": 83.25, "elapsed_time": "1:24:46", "remaining_time": "0:17:03", "throughput": 8049.28, "total_tokens": 40938752}
|
|
{"current_steps": 13010, "total_steps": 15621, "loss": 0.3026, "lr": 1.6557802834265466e-07, "epoch": 0.832853210421868, "percentage": 83.29, "elapsed_time": "1:24:46", "remaining_time": "0:17:00", "throughput": 8051.23, "total_tokens": 40954048}
|
|
{"current_steps": 13015, "total_steps": 15621, "loss": 0.3634, "lr": 1.649627385466248e-07, "epoch": 0.8331732923628449, "percentage": 83.32, "elapsed_time": "1:24:47", "remaining_time": "0:16:58", "throughput": 8053.69, "total_tokens": 40972672}
|
|
{"current_steps": 13020, "total_steps": 15621, "loss": 0.2467, "lr": 1.643484913044202e-07, "epoch": 0.8334933743038218, "percentage": 83.35, "elapsed_time": "1:24:48", "remaining_time": "0:16:56", "throughput": 8055.61, "total_tokens": 40987648}
|
|
{"current_steps": 13025, "total_steps": 15621, "loss": 0.3171, "lr": 1.6373528738293564e-07, "epoch": 0.8338134562447986, "percentage": 83.38, "elapsed_time": "1:24:48", "remaining_time": "0:16:54", "throughput": 8057.64, "total_tokens": 41003328}
|
|
{"current_steps": 13030, "total_steps": 15621, "loss": 0.2939, "lr": 1.6312312754776404e-07, "epoch": 0.8341335381857755, "percentage": 83.41, "elapsed_time": "1:24:49", "remaining_time": "0:16:52", "throughput": 8059.6, "total_tokens": 41018624}
|
|
{"current_steps": 13035, "total_steps": 15621, "loss": 0.3318, "lr": 1.6251201256319357e-07, "epoch": 0.8344536201267524, "percentage": 83.45, "elapsed_time": "1:24:50", "remaining_time": "0:16:49", "throughput": 8061.66, "total_tokens": 41034624}
|
|
{"current_steps": 13040, "total_steps": 15621, "loss": 0.3699, "lr": 1.619019431922083e-07, "epoch": 0.8347737020677294, "percentage": 83.48, "elapsed_time": "1:24:50", "remaining_time": "0:16:47", "throughput": 8063.58, "total_tokens": 41049664}
|
|
{"current_steps": 13045, "total_steps": 15621, "loss": 0.3494, "lr": 1.6129292019648754e-07, "epoch": 0.8350937840087063, "percentage": 83.51, "elapsed_time": "1:24:51", "remaining_time": "0:16:45", "throughput": 8065.75, "total_tokens": 41066368}
|
|
{"current_steps": 13050, "total_steps": 15621, "loss": 0.2975, "lr": 1.606849443364038e-07, "epoch": 0.8354138659496831, "percentage": 83.54, "elapsed_time": "1:24:52", "remaining_time": "0:16:43", "throughput": 8067.76, "total_tokens": 41082048}
|
|
{"current_steps": 13055, "total_steps": 15621, "loss": 0.3425, "lr": 1.6007801637102104e-07, "epoch": 0.83573394789066, "percentage": 83.57, "elapsed_time": "1:24:52", "remaining_time": "0:16:41", "throughput": 8069.82, "total_tokens": 41098048}
|
|
{"current_steps": 13060, "total_steps": 15621, "loss": 0.3858, "lr": 1.594721370580969e-07, "epoch": 0.8360540298316369, "percentage": 83.61, "elapsed_time": "1:24:53", "remaining_time": "0:16:38", "throughput": 8071.69, "total_tokens": 41112768}
|
|
{"current_steps": 13065, "total_steps": 15621, "loss": 0.4241, "lr": 1.588673071540788e-07, "epoch": 0.8363741117726138, "percentage": 83.64, "elapsed_time": "1:24:54", "remaining_time": "0:16:36", "throughput": 8073.57, "total_tokens": 41127488}
|
|
{"current_steps": 13070, "total_steps": 15621, "loss": 0.3195, "lr": 1.5826352741410332e-07, "epoch": 0.8366941937135907, "percentage": 83.67, "elapsed_time": "1:24:54", "remaining_time": "0:16:34", "throughput": 8075.44, "total_tokens": 41142272}
|
|
{"current_steps": 13075, "total_steps": 15621, "loss": 0.2947, "lr": 1.576607985919971e-07, "epoch": 0.8370142756545675, "percentage": 83.7, "elapsed_time": "1:24:55", "remaining_time": "0:16:32", "throughput": 8077.45, "total_tokens": 41157952}
|
|
{"current_steps": 13080, "total_steps": 15621, "loss": 0.3547, "lr": 1.57059121440274e-07, "epoch": 0.8373343575955444, "percentage": 83.73, "elapsed_time": "1:24:56", "remaining_time": "0:16:29", "throughput": 8079.36, "total_tokens": 41172992}
|
|
{"current_steps": 13085, "total_steps": 15621, "loss": 0.3823, "lr": 1.56458496710135e-07, "epoch": 0.8376544395365213, "percentage": 83.77, "elapsed_time": "1:24:56", "remaining_time": "0:16:27", "throughput": 8081.22, "total_tokens": 41187776}
|
|
{"current_steps": 13090, "total_steps": 15621, "loss": 0.3403, "lr": 1.5585892515146716e-07, "epoch": 0.8379745214774983, "percentage": 83.8, "elapsed_time": "1:24:57", "remaining_time": "0:16:25", "throughput": 8083.38, "total_tokens": 41204416}
|
|
{"current_steps": 13095, "total_steps": 15621, "loss": 0.4214, "lr": 1.5526040751284253e-07, "epoch": 0.8382946034184752, "percentage": 83.83, "elapsed_time": "1:24:58", "remaining_time": "0:16:23", "throughput": 8085.4, "total_tokens": 41220032}
|
|
{"current_steps": 13100, "total_steps": 15621, "loss": 0.3168, "lr": 1.546629445415174e-07, "epoch": 0.838614685359452, "percentage": 83.86, "elapsed_time": "1:24:58", "remaining_time": "0:16:21", "throughput": 8087.42, "total_tokens": 41235776}
|
|
{"current_steps": 13105, "total_steps": 15621, "loss": 0.3724, "lr": 1.5406653698343141e-07, "epoch": 0.8389347673004289, "percentage": 83.89, "elapsed_time": "1:24:59", "remaining_time": "0:16:19", "throughput": 8089.54, "total_tokens": 41252160}
|
|
{"current_steps": 13110, "total_steps": 15621, "loss": 0.3591, "lr": 1.5347118558320637e-07, "epoch": 0.8392548492414058, "percentage": 83.93, "elapsed_time": "1:25:00", "remaining_time": "0:16:16", "throughput": 8091.74, "total_tokens": 41269056}
|
|
{"current_steps": 13115, "total_steps": 15621, "loss": 0.3632, "lr": 1.5287689108414558e-07, "epoch": 0.8395749311823827, "percentage": 83.96, "elapsed_time": "1:25:00", "remaining_time": "0:16:14", "throughput": 8093.83, "total_tokens": 41285312}
|
|
{"current_steps": 13120, "total_steps": 15621, "loss": 0.3374, "lr": 1.5228365422823242e-07, "epoch": 0.8398950131233596, "percentage": 83.99, "elapsed_time": "1:25:01", "remaining_time": "0:16:12", "throughput": 8095.84, "total_tokens": 41300992}
|
|
{"current_steps": 13125, "total_steps": 15621, "loss": 0.2637, "lr": 1.5169147575613038e-07, "epoch": 0.8402150950643364, "percentage": 84.02, "elapsed_time": "1:25:02", "remaining_time": "0:16:10", "throughput": 8098.04, "total_tokens": 41317952}
|
|
{"current_steps": 13130, "total_steps": 15621, "loss": 0.297, "lr": 1.5110035640718098e-07, "epoch": 0.8405351770053133, "percentage": 84.05, "elapsed_time": "1:25:02", "remaining_time": "0:16:08", "throughput": 8100.02, "total_tokens": 41333440}
|
|
{"current_steps": 13135, "total_steps": 15621, "loss": 0.3665, "lr": 1.5051029691940387e-07, "epoch": 0.8408552589462902, "percentage": 84.09, "elapsed_time": "1:25:03", "remaining_time": "0:16:05", "throughput": 8102.05, "total_tokens": 41349312}
|
|
{"current_steps": 13140, "total_steps": 15621, "loss": 0.356, "lr": 1.4992129802949515e-07, "epoch": 0.8411753408872671, "percentage": 84.12, "elapsed_time": "1:25:04", "remaining_time": "0:16:03", "throughput": 8103.94, "total_tokens": 41364288}
|
|
{"current_steps": 13145, "total_steps": 15621, "loss": 0.2884, "lr": 1.4933336047282696e-07, "epoch": 0.8414954228282441, "percentage": 84.15, "elapsed_time": "1:25:04", "remaining_time": "0:16:01", "throughput": 8105.93, "total_tokens": 41379904}
|
|
{"current_steps": 13150, "total_steps": 15621, "loss": 0.3481, "lr": 1.4874648498344579e-07, "epoch": 0.841815504769221, "percentage": 84.18, "elapsed_time": "1:25:05", "remaining_time": "0:15:59", "throughput": 8107.77, "total_tokens": 41394432}
|
|
{"current_steps": 13155, "total_steps": 15621, "loss": 0.3485, "lr": 1.4816067229407348e-07, "epoch": 0.8421355867101978, "percentage": 84.21, "elapsed_time": "1:25:06", "remaining_time": "0:15:57", "throughput": 8109.74, "total_tokens": 41409984}
|
|
{"current_steps": 13160, "total_steps": 15621, "loss": 0.3051, "lr": 1.4757592313610322e-07, "epoch": 0.8424556686511747, "percentage": 84.25, "elapsed_time": "1:25:06", "remaining_time": "0:15:55", "throughput": 8111.78, "total_tokens": 41425984}
|
|
{"current_steps": 13165, "total_steps": 15621, "loss": 0.3312, "lr": 1.4699223823960128e-07, "epoch": 0.8427757505921516, "percentage": 84.28, "elapsed_time": "1:25:07", "remaining_time": "0:15:52", "throughput": 8113.8, "total_tokens": 41441920}
|
|
{"current_steps": 13170, "total_steps": 15621, "loss": 0.3389, "lr": 1.4640961833330579e-07, "epoch": 0.8430958325331285, "percentage": 84.31, "elapsed_time": "1:25:08", "remaining_time": "0:15:50", "throughput": 8115.81, "total_tokens": 41457664}
|
|
{"current_steps": 13175, "total_steps": 15621, "loss": 0.2518, "lr": 1.4582806414462378e-07, "epoch": 0.8434159144741054, "percentage": 84.34, "elapsed_time": "1:25:08", "remaining_time": "0:15:48", "throughput": 8117.74, "total_tokens": 41472832}
|
|
{"current_steps": 13180, "total_steps": 15621, "loss": 0.33, "lr": 1.4524757639963258e-07, "epoch": 0.8437359964150822, "percentage": 84.37, "elapsed_time": "1:25:09", "remaining_time": "0:15:46", "throughput": 8120.03, "total_tokens": 41490368}
|
|
{"current_steps": 13185, "total_steps": 15621, "loss": 0.4397, "lr": 1.4466815582307845e-07, "epoch": 0.8440560783560591, "percentage": 84.41, "elapsed_time": "1:25:10", "remaining_time": "0:15:44", "throughput": 8122.12, "total_tokens": 41506624}
|
|
{"current_steps": 13190, "total_steps": 15621, "loss": 0.251, "lr": 1.440898031383746e-07, "epoch": 0.844376160297036, "percentage": 84.44, "elapsed_time": "1:25:11", "remaining_time": "0:15:41", "throughput": 8124.26, "total_tokens": 41523264}
|
|
{"current_steps": 13195, "total_steps": 15621, "loss": 0.3803, "lr": 1.4351251906760064e-07, "epoch": 0.844696242238013, "percentage": 84.47, "elapsed_time": "1:25:11", "remaining_time": "0:15:39", "throughput": 8126.25, "total_tokens": 41538944}
|
|
{"current_steps": 13200, "total_steps": 15621, "loss": 0.3939, "lr": 1.4293630433150317e-07, "epoch": 0.8450163241789899, "percentage": 84.5, "elapsed_time": "1:25:12", "remaining_time": "0:15:37", "throughput": 8128.29, "total_tokens": 41554880}
|
|
{"current_steps": 13205, "total_steps": 15621, "loss": 0.4367, "lr": 1.423611596494927e-07, "epoch": 0.8453364061199667, "percentage": 84.53, "elapsed_time": "1:25:13", "remaining_time": "0:15:35", "throughput": 8130.1, "total_tokens": 41569280}
|
|
{"current_steps": 13210, "total_steps": 15621, "loss": 0.3546, "lr": 1.4178708573964438e-07, "epoch": 0.8456564880609436, "percentage": 84.57, "elapsed_time": "1:25:13", "remaining_time": "0:15:33", "throughput": 8132.03, "total_tokens": 41584576}
|
|
{"current_steps": 13215, "total_steps": 15621, "loss": 0.3589, "lr": 1.4121408331869566e-07, "epoch": 0.8459765700019205, "percentage": 84.6, "elapsed_time": "1:25:14", "remaining_time": "0:15:31", "throughput": 8133.99, "total_tokens": 41600000}
|
|
{"current_steps": 13220, "total_steps": 15621, "loss": 0.3603, "lr": 1.406421531020474e-07, "epoch": 0.8462966519428974, "percentage": 84.63, "elapsed_time": "1:25:14", "remaining_time": "0:15:28", "throughput": 8135.89, "total_tokens": 41615040}
|
|
{"current_steps": 13225, "total_steps": 15621, "loss": 0.3551, "lr": 1.4007129580376097e-07, "epoch": 0.8466167338838743, "percentage": 84.66, "elapsed_time": "1:25:15", "remaining_time": "0:15:26", "throughput": 8137.81, "total_tokens": 41630208}
|
|
{"current_steps": 13230, "total_steps": 15621, "loss": 0.3672, "lr": 1.3950151213655847e-07, "epoch": 0.8469368158248511, "percentage": 84.69, "elapsed_time": "1:25:16", "remaining_time": "0:15:24", "throughput": 8139.75, "total_tokens": 41645440}
|
|
{"current_steps": 13235, "total_steps": 15621, "loss": 0.3281, "lr": 1.389328028118214e-07, "epoch": 0.847256897765828, "percentage": 84.73, "elapsed_time": "1:25:16", "remaining_time": "0:15:22", "throughput": 8141.75, "total_tokens": 41661184}
|
|
{"current_steps": 13240, "total_steps": 15621, "loss": 0.358, "lr": 1.3836516853959e-07, "epoch": 0.8475769797068049, "percentage": 84.76, "elapsed_time": "1:25:17", "remaining_time": "0:15:20", "throughput": 8143.66, "total_tokens": 41676224}
|
|
{"current_steps": 13245, "total_steps": 15621, "loss": 0.308, "lr": 1.3779861002856242e-07, "epoch": 0.8478970616477818, "percentage": 84.79, "elapsed_time": "1:25:18", "remaining_time": "0:15:18", "throughput": 8145.49, "total_tokens": 41690816}
|
|
{"current_steps": 13250, "total_steps": 15621, "loss": 0.3357, "lr": 1.3723312798609366e-07, "epoch": 0.8482171435887588, "percentage": 84.82, "elapsed_time": "1:25:18", "remaining_time": "0:15:16", "throughput": 8147.52, "total_tokens": 41706688}
|
|
{"current_steps": 13255, "total_steps": 15621, "loss": 0.349, "lr": 1.3666872311819455e-07, "epoch": 0.8485372255297357, "percentage": 84.85, "elapsed_time": "1:25:19", "remaining_time": "0:15:13", "throughput": 8149.45, "total_tokens": 41721920}
|
|
{"current_steps": 13260, "total_steps": 15621, "loss": 0.285, "lr": 1.361053961295312e-07, "epoch": 0.8488573074707125, "percentage": 84.89, "elapsed_time": "1:25:20", "remaining_time": "0:15:11", "throughput": 8151.52, "total_tokens": 41738112}
|
|
{"current_steps": 13265, "total_steps": 15621, "loss": 0.3445, "lr": 1.3554314772342412e-07, "epoch": 0.8491773894116894, "percentage": 84.92, "elapsed_time": "1:25:20", "remaining_time": "0:15:09", "throughput": 8153.51, "total_tokens": 41753792}
|
|
{"current_steps": 13270, "total_steps": 15621, "loss": 0.3294, "lr": 1.349819786018469e-07, "epoch": 0.8494974713526663, "percentage": 84.95, "elapsed_time": "1:25:21", "remaining_time": "0:15:07", "throughput": 8155.79, "total_tokens": 41771328}
|
|
{"current_steps": 13275, "total_steps": 15621, "loss": 0.3734, "lr": 1.3442188946542566e-07, "epoch": 0.8498175532936432, "percentage": 84.98, "elapsed_time": "1:25:22", "remaining_time": "0:15:05", "throughput": 8157.9, "total_tokens": 41787712}
|
|
{"current_steps": 13280, "total_steps": 15621, "loss": 0.3099, "lr": 1.338628810134388e-07, "epoch": 0.85013763523462, "percentage": 85.01, "elapsed_time": "1:25:23", "remaining_time": "0:15:03", "throughput": 8159.83, "total_tokens": 41803072}
|
|
{"current_steps": 13285, "total_steps": 15621, "loss": 0.3624, "lr": 1.3330495394381435e-07, "epoch": 0.8504577171755969, "percentage": 85.05, "elapsed_time": "1:25:23", "remaining_time": "0:15:00", "throughput": 8161.82, "total_tokens": 41818688}
|
|
{"current_steps": 13290, "total_steps": 15621, "loss": 0.2868, "lr": 1.3274810895313083e-07, "epoch": 0.8507777991165738, "percentage": 85.08, "elapsed_time": "1:25:24", "remaining_time": "0:14:58", "throughput": 8163.73, "total_tokens": 41833792}
|
|
{"current_steps": 13294, "total_steps": 15621, "eval_loss": 0.35909759998321533, "epoch": 0.8510338646693554, "percentage": 85.1, "elapsed_time": "1:26:15", "remaining_time": "0:15:05", "throughput": 8085.49, "total_tokens": 41847872}
|
|
{"current_steps": 13295, "total_steps": 15621, "loss": 0.3846, "lr": 1.321923467366164e-07, "epoch": 0.8510978810575507, "percentage": 85.11, "elapsed_time": "1:29:30", "remaining_time": "0:15:39", "throughput": 7792.44, "total_tokens": 41850880}
|
|
{"current_steps": 13300, "total_steps": 15621, "loss": 0.183, "lr": 1.3163766798814603e-07, "epoch": 0.8514179629985277, "percentage": 85.14, "elapsed_time": "1:29:31", "remaining_time": "0:15:37", "throughput": 7794.36, "total_tokens": 41866560}
|
|
{"current_steps": 13305, "total_steps": 15621, "loss": 0.3041, "lr": 1.3108407340024264e-07, "epoch": 0.8517380449395046, "percentage": 85.17, "elapsed_time": "1:29:32", "remaining_time": "0:15:35", "throughput": 7796.3, "total_tokens": 41882240}
|
|
{"current_steps": 13310, "total_steps": 15621, "loss": 0.3421, "lr": 1.3053156366407613e-07, "epoch": 0.8520581268804814, "percentage": 85.21, "elapsed_time": "1:29:32", "remaining_time": "0:15:32", "throughput": 7798.38, "total_tokens": 41898880}
|
|
{"current_steps": 13315, "total_steps": 15621, "loss": 0.2428, "lr": 1.2998013946946119e-07, "epoch": 0.8523782088214583, "percentage": 85.24, "elapsed_time": "1:29:33", "remaining_time": "0:15:30", "throughput": 7800.52, "total_tokens": 41915968}
|
|
{"current_steps": 13320, "total_steps": 15621, "loss": 0.3499, "lr": 1.2942980150485706e-07, "epoch": 0.8526982907624352, "percentage": 85.27, "elapsed_time": "1:29:34", "remaining_time": "0:15:28", "throughput": 7802.35, "total_tokens": 41930816}
|
|
{"current_steps": 13325, "total_steps": 15621, "loss": 0.3192, "lr": 1.2888055045736723e-07, "epoch": 0.8530183727034121, "percentage": 85.3, "elapsed_time": "1:29:34", "remaining_time": "0:15:26", "throughput": 7804.38, "total_tokens": 41947200}
|
|
{"current_steps": 13330, "total_steps": 15621, "loss": 0.301, "lr": 1.283323870127384e-07, "epoch": 0.853338454644389, "percentage": 85.33, "elapsed_time": "1:29:35", "remaining_time": "0:15:23", "throughput": 7806.23, "total_tokens": 41962240}
|
|
{"current_steps": 13335, "total_steps": 15621, "loss": 0.3015, "lr": 1.2778531185535911e-07, "epoch": 0.8536585365853658, "percentage": 85.37, "elapsed_time": "1:29:36", "remaining_time": "0:15:21", "throughput": 7808.28, "total_tokens": 41978752}
|
|
{"current_steps": 13340, "total_steps": 15621, "loss": 0.3288, "lr": 1.2723932566825844e-07, "epoch": 0.8539786185263427, "percentage": 85.4, "elapsed_time": "1:29:36", "remaining_time": "0:15:19", "throughput": 7810.18, "total_tokens": 41994112}
|
|
{"current_steps": 13345, "total_steps": 15621, "loss": 0.294, "lr": 1.2669442913310723e-07, "epoch": 0.8542987004673196, "percentage": 85.43, "elapsed_time": "1:29:37", "remaining_time": "0:15:17", "throughput": 7812.2, "total_tokens": 42010432}
|
|
{"current_steps": 13350, "total_steps": 15621, "loss": 0.2745, "lr": 1.2615062293021506e-07, "epoch": 0.8546187824082965, "percentage": 85.46, "elapsed_time": "1:29:38", "remaining_time": "0:15:14", "throughput": 7814.11, "total_tokens": 42025984}
|
|
{"current_steps": 13355, "total_steps": 15621, "loss": 0.3147, "lr": 1.2560790773853025e-07, "epoch": 0.8549388643492735, "percentage": 85.49, "elapsed_time": "1:29:38", "remaining_time": "0:15:12", "throughput": 7815.94, "total_tokens": 42040832}
|
|
{"current_steps": 13360, "total_steps": 15621, "loss": 0.4083, "lr": 1.2506628423563915e-07, "epoch": 0.8552589462902503, "percentage": 85.53, "elapsed_time": "1:29:39", "remaining_time": "0:15:10", "throughput": 7818.02, "total_tokens": 42057536}
|
|
{"current_steps": 13365, "total_steps": 15621, "loss": 0.2828, "lr": 1.2452575309776493e-07, "epoch": 0.8555790282312272, "percentage": 85.56, "elapsed_time": "1:29:40", "remaining_time": "0:15:08", "throughput": 7819.96, "total_tokens": 42073152}
|
|
{"current_steps": 13370, "total_steps": 15621, "loss": 0.3032, "lr": 1.2398631499976732e-07, "epoch": 0.8558991101722041, "percentage": 85.59, "elapsed_time": "1:29:40", "remaining_time": "0:15:05", "throughput": 7821.85, "total_tokens": 42088512}
|
|
{"current_steps": 13375, "total_steps": 15621, "loss": 0.4253, "lr": 1.234479706151409e-07, "epoch": 0.856219192113181, "percentage": 85.62, "elapsed_time": "1:29:41", "remaining_time": "0:15:03", "throughput": 7823.7, "total_tokens": 42103552}
|
|
{"current_steps": 13380, "total_steps": 15621, "loss": 0.3577, "lr": 1.2291072061601503e-07, "epoch": 0.8565392740541579, "percentage": 85.65, "elapsed_time": "1:29:42", "remaining_time": "0:15:01", "throughput": 7825.73, "total_tokens": 42119872}
|
|
{"current_steps": 13385, "total_steps": 15621, "loss": 0.4374, "lr": 1.2237456567315264e-07, "epoch": 0.8568593559951347, "percentage": 85.69, "elapsed_time": "1:29:42", "remaining_time": "0:14:59", "throughput": 7827.86, "total_tokens": 42136832}
|
|
{"current_steps": 13390, "total_steps": 15621, "loss": 0.3158, "lr": 1.2183950645594944e-07, "epoch": 0.8571794379361116, "percentage": 85.72, "elapsed_time": "1:29:43", "remaining_time": "0:14:57", "throughput": 7829.84, "total_tokens": 42152896}
|
|
{"current_steps": 13395, "total_steps": 15621, "loss": 0.3555, "lr": 1.2130554363243318e-07, "epoch": 0.8574995198770885, "percentage": 85.75, "elapsed_time": "1:29:44", "remaining_time": "0:14:54", "throughput": 7831.69, "total_tokens": 42168064}
|
|
{"current_steps": 13400, "total_steps": 15621, "loss": 0.3738, "lr": 1.207726778692625e-07, "epoch": 0.8578196018180654, "percentage": 85.78, "elapsed_time": "1:29:44", "remaining_time": "0:14:52", "throughput": 7833.49, "total_tokens": 42182784}
|
|
{"current_steps": 13405, "total_steps": 15621, "loss": 0.3228, "lr": 1.2024090983172718e-07, "epoch": 0.8581396837590423, "percentage": 85.81, "elapsed_time": "1:29:45", "remaining_time": "0:14:50", "throughput": 7835.6, "total_tokens": 42199744}
|
|
{"current_steps": 13410, "total_steps": 15621, "loss": 0.3631, "lr": 1.1971024018374532e-07, "epoch": 0.8584597657000193, "percentage": 85.85, "elapsed_time": "1:29:46", "remaining_time": "0:14:48", "throughput": 7837.48, "total_tokens": 42215040}
|
|
{"current_steps": 13415, "total_steps": 15621, "loss": 0.3079, "lr": 1.1918066958786432e-07, "epoch": 0.8587798476409961, "percentage": 85.88, "elapsed_time": "1:29:46", "remaining_time": "0:14:45", "throughput": 7839.32, "total_tokens": 42230144}
|
|
{"current_steps": 13420, "total_steps": 15621, "loss": 0.3677, "lr": 1.1865219870525922e-07, "epoch": 0.859099929581973, "percentage": 85.91, "elapsed_time": "1:29:47", "remaining_time": "0:14:43", "throughput": 7841.34, "total_tokens": 42246528}
|
|
{"current_steps": 13425, "total_steps": 15621, "loss": 0.4245, "lr": 1.1812482819573222e-07, "epoch": 0.8594200115229499, "percentage": 85.94, "elapsed_time": "1:29:48", "remaining_time": "0:14:41", "throughput": 7843.4, "total_tokens": 42263168}
|
|
{"current_steps": 13430, "total_steps": 15621, "loss": 0.3877, "lr": 1.1759855871771163e-07, "epoch": 0.8597400934639268, "percentage": 85.97, "elapsed_time": "1:29:49", "remaining_time": "0:14:39", "throughput": 7845.34, "total_tokens": 42278912}
|
|
{"current_steps": 13435, "total_steps": 15621, "loss": 0.387, "lr": 1.1707339092825075e-07, "epoch": 0.8600601754049036, "percentage": 86.01, "elapsed_time": "1:29:49", "remaining_time": "0:14:36", "throughput": 7847.29, "total_tokens": 42294656}
|
|
{"current_steps": 13440, "total_steps": 15621, "loss": 0.3927, "lr": 1.1654932548302842e-07, "epoch": 0.8603802573458805, "percentage": 86.04, "elapsed_time": "1:29:50", "remaining_time": "0:14:34", "throughput": 7849.38, "total_tokens": 42311552}
|
|
{"current_steps": 13445, "total_steps": 15621, "loss": 0.365, "lr": 1.1602636303634595e-07, "epoch": 0.8607003392868574, "percentage": 86.07, "elapsed_time": "1:29:51", "remaining_time": "0:14:32", "throughput": 7851.36, "total_tokens": 42327552}
|
|
{"current_steps": 13450, "total_steps": 15621, "loss": 0.3526, "lr": 1.1550450424112801e-07, "epoch": 0.8610204212278343, "percentage": 86.1, "elapsed_time": "1:29:51", "remaining_time": "0:14:30", "throughput": 7853.31, "total_tokens": 42343360}
|
|
{"current_steps": 13455, "total_steps": 15621, "loss": 0.3455, "lr": 1.1498374974892178e-07, "epoch": 0.8613405031688112, "percentage": 86.13, "elapsed_time": "1:29:52", "remaining_time": "0:14:28", "throughput": 7855.39, "total_tokens": 42360064}
|
|
{"current_steps": 13460, "total_steps": 15621, "loss": 0.4371, "lr": 1.144641002098955e-07, "epoch": 0.8616605851097882, "percentage": 86.17, "elapsed_time": "1:29:53", "remaining_time": "0:14:25", "throughput": 7857.19, "total_tokens": 42374976}
|
|
{"current_steps": 13465, "total_steps": 15621, "loss": 0.3502, "lr": 1.1394555627283697e-07, "epoch": 0.861980667050765, "percentage": 86.2, "elapsed_time": "1:29:53", "remaining_time": "0:14:23", "throughput": 7859.27, "total_tokens": 42391616}
|
|
{"current_steps": 13470, "total_steps": 15621, "loss": 0.3075, "lr": 1.134281185851551e-07, "epoch": 0.8623007489917419, "percentage": 86.23, "elapsed_time": "1:29:54", "remaining_time": "0:14:21", "throughput": 7861.08, "total_tokens": 42406528}
|
|
{"current_steps": 13475, "total_steps": 15621, "loss": 0.2948, "lr": 1.1291178779287691e-07, "epoch": 0.8626208309327188, "percentage": 86.26, "elapsed_time": "1:29:55", "remaining_time": "0:14:19", "throughput": 7863.31, "total_tokens": 42424320}
|
|
{"current_steps": 13480, "total_steps": 15621, "loss": 0.3616, "lr": 1.1239656454064683e-07, "epoch": 0.8629409128736957, "percentage": 86.29, "elapsed_time": "1:29:55", "remaining_time": "0:14:17", "throughput": 7865.37, "total_tokens": 42440960}
|
|
{"current_steps": 13485, "total_steps": 15621, "loss": 0.2464, "lr": 1.1188244947172776e-07, "epoch": 0.8632609948146726, "percentage": 86.33, "elapsed_time": "1:29:56", "remaining_time": "0:14:14", "throughput": 7867.27, "total_tokens": 42456448}
|
|
{"current_steps": 13490, "total_steps": 15621, "loss": 0.3201, "lr": 1.1136944322799812e-07, "epoch": 0.8635810767556494, "percentage": 86.36, "elapsed_time": "1:29:57", "remaining_time": "0:14:12", "throughput": 7869.23, "total_tokens": 42472448}
|
|
{"current_steps": 13495, "total_steps": 15621, "loss": 0.3177, "lr": 1.1085754644995227e-07, "epoch": 0.8639011586966263, "percentage": 86.39, "elapsed_time": "1:29:57", "remaining_time": "0:14:10", "throughput": 7871.11, "total_tokens": 42487808}
|
|
{"current_steps": 13500, "total_steps": 15621, "loss": 0.3577, "lr": 1.1034675977669938e-07, "epoch": 0.8642212406376032, "percentage": 86.42, "elapsed_time": "1:29:58", "remaining_time": "0:14:08", "throughput": 7873.06, "total_tokens": 42503744}
|
|
{"current_steps": 13505, "total_steps": 15621, "loss": 0.6111, "lr": 1.0983708384596258e-07, "epoch": 0.8645413225785801, "percentage": 86.45, "elapsed_time": "1:29:59", "remaining_time": "0:14:05", "throughput": 7875.19, "total_tokens": 42520768}
|
|
{"current_steps": 13510, "total_steps": 15621, "loss": 0.3703, "lr": 1.0932851929407827e-07, "epoch": 0.864861404519557, "percentage": 86.49, "elapsed_time": "1:30:00", "remaining_time": "0:14:03", "throughput": 7877.24, "total_tokens": 42537408}
|
|
{"current_steps": 13515, "total_steps": 15621, "loss": 0.3583, "lr": 1.0882106675599534e-07, "epoch": 0.8651814864605339, "percentage": 86.52, "elapsed_time": "1:30:00", "remaining_time": "0:14:01", "throughput": 7879.25, "total_tokens": 42553728}
|
|
{"current_steps": 13520, "total_steps": 15621, "loss": 0.3226, "lr": 1.0831472686527409e-07, "epoch": 0.8655015684015108, "percentage": 86.55, "elapsed_time": "1:30:01", "remaining_time": "0:13:59", "throughput": 7881.1, "total_tokens": 42568896}
|
|
{"current_steps": 13525, "total_steps": 15621, "loss": 0.2985, "lr": 1.0780950025408586e-07, "epoch": 0.8658216503424877, "percentage": 86.58, "elapsed_time": "1:30:02", "remaining_time": "0:13:57", "throughput": 7882.94, "total_tokens": 42584000}
|
|
{"current_steps": 13530, "total_steps": 15621, "loss": 0.3884, "lr": 1.0730538755321217e-07, "epoch": 0.8661417322834646, "percentage": 86.61, "elapsed_time": "1:30:02", "remaining_time": "0:13:54", "throughput": 7884.92, "total_tokens": 42600192}
|
|
{"current_steps": 13535, "total_steps": 15621, "loss": 0.2997, "lr": 1.0680238939204334e-07, "epoch": 0.8664618142244415, "percentage": 86.65, "elapsed_time": "1:30:03", "remaining_time": "0:13:52", "throughput": 7886.66, "total_tokens": 42614656}
|
|
{"current_steps": 13540, "total_steps": 15621, "loss": 0.402, "lr": 1.0630050639857879e-07, "epoch": 0.8667818961654183, "percentage": 86.68, "elapsed_time": "1:30:04", "remaining_time": "0:13:50", "throughput": 7888.46, "total_tokens": 42629504}
|
|
{"current_steps": 13545, "total_steps": 15621, "loss": 0.3165, "lr": 1.0579973919942508e-07, "epoch": 0.8671019781063952, "percentage": 86.71, "elapsed_time": "1:30:04", "remaining_time": "0:13:48", "throughput": 7890.24, "total_tokens": 42644224}
|
|
{"current_steps": 13550, "total_steps": 15621, "loss": 0.2452, "lr": 1.0530008841979621e-07, "epoch": 0.8674220600473721, "percentage": 86.74, "elapsed_time": "1:30:05", "remaining_time": "0:13:46", "throughput": 7892.1, "total_tokens": 42659584}
|
|
{"current_steps": 13555, "total_steps": 15621, "loss": 0.272, "lr": 1.048015546835117e-07, "epoch": 0.867742141988349, "percentage": 86.77, "elapsed_time": "1:30:06", "remaining_time": "0:13:43", "throughput": 7894.1, "total_tokens": 42675776}
|
|
{"current_steps": 13560, "total_steps": 15621, "loss": 0.388, "lr": 1.0430413861299691e-07, "epoch": 0.8680622239293259, "percentage": 86.81, "elapsed_time": "1:30:06", "remaining_time": "0:13:41", "throughput": 7896.26, "total_tokens": 42693184}
|
|
{"current_steps": 13565, "total_steps": 15621, "loss": 0.4564, "lr": 1.0380784082928196e-07, "epoch": 0.8683823058703029, "percentage": 86.84, "elapsed_time": "1:30:07", "remaining_time": "0:13:39", "throughput": 7898.45, "total_tokens": 42710784}
|
|
{"current_steps": 13570, "total_steps": 15621, "loss": 0.3905, "lr": 1.0331266195200006e-07, "epoch": 0.8687023878112797, "percentage": 86.87, "elapsed_time": "1:30:08", "remaining_time": "0:13:37", "throughput": 7900.45, "total_tokens": 42727040}
|
|
{"current_steps": 13575, "total_steps": 15621, "loss": 0.3189, "lr": 1.0281860259938779e-07, "epoch": 0.8690224697522566, "percentage": 86.9, "elapsed_time": "1:30:08", "remaining_time": "0:13:35", "throughput": 7902.3, "total_tokens": 42742208}
|
|
{"current_steps": 13580, "total_steps": 15621, "loss": 0.3634, "lr": 1.0232566338828452e-07, "epoch": 0.8693425516932335, "percentage": 86.93, "elapsed_time": "1:30:09", "remaining_time": "0:13:33", "throughput": 7904.29, "total_tokens": 42758464}
|
|
{"current_steps": 13585, "total_steps": 15621, "loss": 0.4021, "lr": 1.018338449341305e-07, "epoch": 0.8696626336342104, "percentage": 86.97, "elapsed_time": "1:30:10", "remaining_time": "0:13:30", "throughput": 7906.19, "total_tokens": 42774016}
|
|
{"current_steps": 13590, "total_steps": 15621, "loss": 0.3924, "lr": 1.0134314785096632e-07, "epoch": 0.8699827155751872, "percentage": 87.0, "elapsed_time": "1:30:10", "remaining_time": "0:13:28", "throughput": 7908.05, "total_tokens": 42789248}
|
|
{"current_steps": 13595, "total_steps": 15621, "loss": 0.3446, "lr": 1.0085357275143359e-07, "epoch": 0.8703027975161641, "percentage": 87.03, "elapsed_time": "1:30:11", "remaining_time": "0:13:26", "throughput": 7909.92, "total_tokens": 42804608}
|
|
{"current_steps": 13600, "total_steps": 15621, "loss": 0.495, "lr": 1.0036512024677268e-07, "epoch": 0.870622879457141, "percentage": 87.06, "elapsed_time": "1:30:12", "remaining_time": "0:13:24", "throughput": 7911.73, "total_tokens": 42819584}
|
|
{"current_steps": 13605, "total_steps": 15621, "loss": 0.2823, "lr": 9.98777909468217e-08, "epoch": 0.8709429613981179, "percentage": 87.09, "elapsed_time": "1:30:12", "remaining_time": "0:13:22", "throughput": 7913.63, "total_tokens": 42835200}
|
|
{"current_steps": 13610, "total_steps": 15621, "loss": 0.4072, "lr": 9.939158546001736e-08, "epoch": 0.8712630433390948, "percentage": 87.13, "elapsed_time": "1:30:13", "remaining_time": "0:13:19", "throughput": 7915.8, "total_tokens": 42852672}
|
|
{"current_steps": 13615, "total_steps": 15621, "loss": 0.3252, "lr": 9.890650439339299e-08, "epoch": 0.8715831252800716, "percentage": 87.16, "elapsed_time": "1:30:14", "remaining_time": "0:13:17", "throughput": 7917.76, "total_tokens": 42868672}
|
|
{"current_steps": 13620, "total_steps": 15621, "loss": 0.412, "lr": 9.842254835257791e-08, "epoch": 0.8719032072210486, "percentage": 87.19, "elapsed_time": "1:30:14", "remaining_time": "0:13:15", "throughput": 7919.64, "total_tokens": 42884096}
|
|
{"current_steps": 13625, "total_steps": 15621, "loss": 0.374, "lr": 9.793971794179679e-08, "epoch": 0.8722232891620255, "percentage": 87.22, "elapsed_time": "1:30:15", "remaining_time": "0:13:13", "throughput": 7921.41, "total_tokens": 42898752}
|
|
{"current_steps": 13630, "total_steps": 15621, "loss": 0.3535, "lr": 9.745801376386931e-08, "epoch": 0.8725433711030024, "percentage": 87.25, "elapsed_time": "1:30:16", "remaining_time": "0:13:11", "throughput": 7923.36, "total_tokens": 42914688}
|
|
{"current_steps": 13635, "total_steps": 15621, "loss": 0.3186, "lr": 9.697743642020861e-08, "epoch": 0.8728634530439793, "percentage": 87.29, "elapsed_time": "1:30:16", "remaining_time": "0:13:08", "throughput": 7925.33, "total_tokens": 42930688}
|
|
{"current_steps": 13640, "total_steps": 15621, "loss": 0.3329, "lr": 9.649798651082119e-08, "epoch": 0.8731835349849562, "percentage": 87.32, "elapsed_time": "1:30:17", "remaining_time": "0:13:06", "throughput": 7927.34, "total_tokens": 42947008}
|
|
{"current_steps": 13645, "total_steps": 15621, "loss": 0.3973, "lr": 9.601966463430588e-08, "epoch": 0.873503616925933, "percentage": 87.35, "elapsed_time": "1:30:18", "remaining_time": "0:13:04", "throughput": 7929.26, "total_tokens": 42962816}
|
|
{"current_steps": 13650, "total_steps": 15621, "loss": 0.3428, "lr": 9.554247138785321e-08, "epoch": 0.8738236988669099, "percentage": 87.38, "elapsed_time": "1:30:18", "remaining_time": "0:13:02", "throughput": 7931.05, "total_tokens": 42977664}
|
|
{"current_steps": 13655, "total_steps": 15621, "loss": 0.4653, "lr": 9.506640736724447e-08, "epoch": 0.8741437808078868, "percentage": 87.41, "elapsed_time": "1:30:19", "remaining_time": "0:13:00", "throughput": 7932.97, "total_tokens": 42993472}
|
|
{"current_steps": 13660, "total_steps": 15621, "loss": 0.3973, "lr": 9.459147316685123e-08, "epoch": 0.8744638627488637, "percentage": 87.45, "elapsed_time": "1:30:20", "remaining_time": "0:12:58", "throughput": 7935.11, "total_tokens": 43010688}
|
|
{"current_steps": 13665, "total_steps": 15621, "loss": 0.3411, "lr": 9.41176693796345e-08, "epoch": 0.8747839446898406, "percentage": 87.48, "elapsed_time": "1:30:20", "remaining_time": "0:12:55", "throughput": 7937.17, "total_tokens": 43027392}
|
|
{"current_steps": 13670, "total_steps": 15621, "loss": 0.4175, "lr": 9.364499659714364e-08, "epoch": 0.8751040266308175, "percentage": 87.51, "elapsed_time": "1:30:21", "remaining_time": "0:12:53", "throughput": 7939.07, "total_tokens": 43043008}
|
|
{"current_steps": 13675, "total_steps": 15621, "loss": 0.3438, "lr": 9.31734554095165e-08, "epoch": 0.8754241085717944, "percentage": 87.54, "elapsed_time": "1:30:22", "remaining_time": "0:12:51", "throughput": 7941.04, "total_tokens": 43059072}
|
|
{"current_steps": 13680, "total_steps": 15621, "loss": 0.3456, "lr": 9.270304640547744e-08, "epoch": 0.8757441905127713, "percentage": 87.57, "elapsed_time": "1:30:23", "remaining_time": "0:12:49", "throughput": 7942.93, "total_tokens": 43074624}
|
|
{"current_steps": 13685, "total_steps": 15621, "loss": 0.3922, "lr": 9.223377017233768e-08, "epoch": 0.8760642724537482, "percentage": 87.61, "elapsed_time": "1:30:23", "remaining_time": "0:12:47", "throughput": 7944.74, "total_tokens": 43089536}
|
|
{"current_steps": 13690, "total_steps": 15621, "loss": 0.361, "lr": 9.176562729599458e-08, "epoch": 0.8763843543947251, "percentage": 87.64, "elapsed_time": "1:30:24", "remaining_time": "0:12:45", "throughput": 7946.55, "total_tokens": 43104512}
|
|
{"current_steps": 13695, "total_steps": 15621, "loss": 0.3434, "lr": 9.129861836092944e-08, "epoch": 0.8767044363357019, "percentage": 87.67, "elapsed_time": "1:30:24", "remaining_time": "0:12:42", "throughput": 7948.52, "total_tokens": 43120640}
|
|
{"current_steps": 13700, "total_steps": 15621, "loss": 0.4433, "lr": 9.083274395020845e-08, "epoch": 0.8770245182766788, "percentage": 87.7, "elapsed_time": "1:30:25", "remaining_time": "0:12:40", "throughput": 7950.43, "total_tokens": 43136384}
|
|
{"current_steps": 13705, "total_steps": 15621, "loss": 0.4021, "lr": 9.036800464548156e-08, "epoch": 0.8773446002176557, "percentage": 87.73, "elapsed_time": "1:30:26", "remaining_time": "0:12:38", "throughput": 7952.51, "total_tokens": 43153216}
|
|
{"current_steps": 13710, "total_steps": 15621, "loss": 0.3506, "lr": 8.990440102698138e-08, "epoch": 0.8776646821586326, "percentage": 87.77, "elapsed_time": "1:30:27", "remaining_time": "0:12:36", "throughput": 7954.28, "total_tokens": 43167936}
|
|
{"current_steps": 13715, "total_steps": 15621, "loss": 0.2722, "lr": 8.944193367352182e-08, "epoch": 0.8779847640996095, "percentage": 87.8, "elapsed_time": "1:30:27", "remaining_time": "0:12:34", "throughput": 7956.21, "total_tokens": 43183872}
|
|
{"current_steps": 13720, "total_steps": 15621, "loss": 0.408, "lr": 8.898060316249944e-08, "epoch": 0.8783048460405863, "percentage": 87.83, "elapsed_time": "1:30:28", "remaining_time": "0:12:32", "throughput": 7958.21, "total_tokens": 43200256}
|
|
{"current_steps": 13725, "total_steps": 15621, "loss": 0.3606, "lr": 8.852041006989064e-08, "epoch": 0.8786249279815633, "percentage": 87.86, "elapsed_time": "1:30:29", "remaining_time": "0:12:29", "throughput": 7960.34, "total_tokens": 43217600}
|
|
{"current_steps": 13730, "total_steps": 15621, "loss": 0.3858, "lr": 8.80613549702518e-08, "epoch": 0.8789450099225402, "percentage": 87.89, "elapsed_time": "1:30:29", "remaining_time": "0:12:27", "throughput": 7962.26, "total_tokens": 43233344}
|
|
{"current_steps": 13735, "total_steps": 15621, "loss": 0.5397, "lr": 8.760343843671824e-08, "epoch": 0.8792650918635171, "percentage": 87.93, "elapsed_time": "1:30:30", "remaining_time": "0:12:25", "throughput": 7964.19, "total_tokens": 43249280}
|
|
{"current_steps": 13740, "total_steps": 15621, "loss": 0.4595, "lr": 8.714666104100487e-08, "epoch": 0.879585173804494, "percentage": 87.96, "elapsed_time": "1:30:31", "remaining_time": "0:12:23", "throughput": 7966.11, "total_tokens": 43265024}
|
|
{"current_steps": 13745, "total_steps": 15621, "loss": 0.3597, "lr": 8.66910233534034e-08, "epoch": 0.8799052557454708, "percentage": 87.99, "elapsed_time": "1:30:31", "remaining_time": "0:12:21", "throughput": 7968.0, "total_tokens": 43280576}
|
|
{"current_steps": 13750, "total_steps": 15621, "loss": 0.3074, "lr": 8.62365259427823e-08, "epoch": 0.8802253376864477, "percentage": 88.02, "elapsed_time": "1:30:32", "remaining_time": "0:12:19", "throughput": 7969.87, "total_tokens": 43296064}
|
|
{"current_steps": 13755, "total_steps": 15621, "loss": 0.292, "lr": 8.578316937658758e-08, "epoch": 0.8805454196274246, "percentage": 88.05, "elapsed_time": "1:30:33", "remaining_time": "0:12:17", "throughput": 7971.74, "total_tokens": 43311552}
|
|
{"current_steps": 13760, "total_steps": 15621, "loss": 0.3216, "lr": 8.533095422083992e-08, "epoch": 0.8808655015684015, "percentage": 88.09, "elapsed_time": "1:30:33", "remaining_time": "0:12:14", "throughput": 7973.52, "total_tokens": 43326272}
|
|
{"current_steps": 13765, "total_steps": 15621, "loss": 0.2926, "lr": 8.487988104013533e-08, "epoch": 0.8811855835093784, "percentage": 88.12, "elapsed_time": "1:30:34", "remaining_time": "0:12:12", "throughput": 7975.5, "total_tokens": 43342592}
|
|
{"current_steps": 13770, "total_steps": 15621, "loss": 0.3183, "lr": 8.4429950397644e-08, "epoch": 0.8815056654503552, "percentage": 88.15, "elapsed_time": "1:30:35", "remaining_time": "0:12:10", "throughput": 7977.34, "total_tokens": 43357888}
|
|
{"current_steps": 13775, "total_steps": 15621, "loss": 0.272, "lr": 8.398116285510948e-08, "epoch": 0.8818257473913321, "percentage": 88.18, "elapsed_time": "1:30:35", "remaining_time": "0:12:08", "throughput": 7979.34, "total_tokens": 43374272}
|
|
{"current_steps": 13780, "total_steps": 15621, "loss": 0.2715, "lr": 8.353351897284844e-08, "epoch": 0.8821458293323091, "percentage": 88.21, "elapsed_time": "1:30:36", "remaining_time": "0:12:06", "throughput": 7981.69, "total_tokens": 43393280}
|
|
{"current_steps": 13785, "total_steps": 15621, "loss": 0.4713, "lr": 8.308701930974949e-08, "epoch": 0.882465911273286, "percentage": 88.25, "elapsed_time": "1:30:37", "remaining_time": "0:12:04", "throughput": 7983.69, "total_tokens": 43409600}
|
|
{"current_steps": 13790, "total_steps": 15621, "loss": 0.4144, "lr": 8.264166442327269e-08, "epoch": 0.8827859932142629, "percentage": 88.28, "elapsed_time": "1:30:37", "remaining_time": "0:12:02", "throughput": 7985.47, "total_tokens": 43424384}
|
|
{"current_steps": 13795, "total_steps": 15621, "loss": 0.2591, "lr": 8.219745486944885e-08, "epoch": 0.8831060751552398, "percentage": 88.31, "elapsed_time": "1:30:38", "remaining_time": "0:11:59", "throughput": 7987.37, "total_tokens": 43440128}
|
|
{"current_steps": 13800, "total_steps": 15621, "loss": 0.4706, "lr": 8.175439120287875e-08, "epoch": 0.8834261570962166, "percentage": 88.34, "elapsed_time": "1:30:39", "remaining_time": "0:11:57", "throughput": 7989.17, "total_tokens": 43455168}
|
|
{"current_steps": 13805, "total_steps": 15621, "loss": 0.3454, "lr": 8.131247397673269e-08, "epoch": 0.8837462390371935, "percentage": 88.37, "elapsed_time": "1:30:39", "remaining_time": "0:11:55", "throughput": 7991.24, "total_tokens": 43472064}
|
|
{"current_steps": 13810, "total_steps": 15621, "loss": 0.4261, "lr": 8.087170374274921e-08, "epoch": 0.8840663209781704, "percentage": 88.41, "elapsed_time": "1:30:40", "remaining_time": "0:11:53", "throughput": 7993.17, "total_tokens": 43488000}
|
|
{"current_steps": 13815, "total_steps": 15621, "loss": 0.2942, "lr": 8.043208105123578e-08, "epoch": 0.8843864029191473, "percentage": 88.44, "elapsed_time": "1:30:41", "remaining_time": "0:11:51", "throughput": 7995.04, "total_tokens": 43503488}
|
|
{"current_steps": 13820, "total_steps": 15621, "loss": 0.3418, "lr": 7.999360645106579e-08, "epoch": 0.8847064848601242, "percentage": 88.47, "elapsed_time": "1:30:41", "remaining_time": "0:11:49", "throughput": 7996.83, "total_tokens": 43518336}
|
|
{"current_steps": 13825, "total_steps": 15621, "loss": 0.2716, "lr": 7.955628048968011e-08, "epoch": 0.885026566801101, "percentage": 88.5, "elapsed_time": "1:30:42", "remaining_time": "0:11:47", "throughput": 7998.55, "total_tokens": 43532800}
|
|
{"current_steps": 13830, "total_steps": 15621, "loss": 0.2586, "lr": 7.912010371308564e-08, "epoch": 0.885346648742078, "percentage": 88.53, "elapsed_time": "1:30:43", "remaining_time": "0:11:44", "throughput": 8000.34, "total_tokens": 43547648}
|
|
{"current_steps": 13835, "total_steps": 15621, "loss": 0.2934, "lr": 7.868507666585422e-08, "epoch": 0.8856667306830549, "percentage": 88.57, "elapsed_time": "1:30:43", "remaining_time": "0:11:42", "throughput": 8002.13, "total_tokens": 43562688}
|
|
{"current_steps": 13840, "total_steps": 15621, "loss": 0.4174, "lr": 7.825119989112172e-08, "epoch": 0.8859868126240318, "percentage": 88.6, "elapsed_time": "1:30:44", "remaining_time": "0:11:40", "throughput": 8004.01, "total_tokens": 43578176}
|
|
{"current_steps": 13845, "total_steps": 15621, "loss": 0.2904, "lr": 7.78184739305886e-08, "epoch": 0.8863068945650087, "percentage": 88.63, "elapsed_time": "1:30:45", "remaining_time": "0:11:38", "throughput": 8005.91, "total_tokens": 43593920}
|
|
{"current_steps": 13850, "total_steps": 15621, "loss": 0.3606, "lr": 7.73868993245187e-08, "epoch": 0.8866269765059855, "percentage": 88.66, "elapsed_time": "1:30:45", "remaining_time": "0:11:36", "throughput": 8007.99, "total_tokens": 43610944}
|
|
{"current_steps": 13855, "total_steps": 15621, "loss": 0.3406, "lr": 7.695647661173754e-08, "epoch": 0.8869470584469624, "percentage": 88.69, "elapsed_time": "1:30:46", "remaining_time": "0:11:34", "throughput": 8009.94, "total_tokens": 43627008}
|
|
{"current_steps": 13860, "total_steps": 15621, "loss": 0.3843, "lr": 7.652720632963284e-08, "epoch": 0.8872671403879393, "percentage": 88.73, "elapsed_time": "1:30:47", "remaining_time": "0:11:32", "throughput": 8011.84, "total_tokens": 43642752}
|
|
{"current_steps": 13865, "total_steps": 15621, "loss": 0.3506, "lr": 7.609908901415396e-08, "epoch": 0.8875872223289162, "percentage": 88.76, "elapsed_time": "1:30:47", "remaining_time": "0:11:29", "throughput": 8013.73, "total_tokens": 43658496}
|
|
{"current_steps": 13870, "total_steps": 15621, "loss": 0.3988, "lr": 7.567212519981047e-08, "epoch": 0.8879073042698931, "percentage": 88.79, "elapsed_time": "1:30:48", "remaining_time": "0:11:27", "throughput": 8015.64, "total_tokens": 43674304}
|
|
{"current_steps": 13875, "total_steps": 15621, "loss": 0.3315, "lr": 7.524631541967108e-08, "epoch": 0.8882273862108699, "percentage": 88.82, "elapsed_time": "1:30:49", "remaining_time": "0:11:25", "throughput": 8017.48, "total_tokens": 43689536}
|
|
{"current_steps": 13880, "total_steps": 15621, "loss": 0.2984, "lr": 7.482166020536485e-08, "epoch": 0.8885474681518468, "percentage": 88.85, "elapsed_time": "1:30:50", "remaining_time": "0:11:23", "throughput": 8019.54, "total_tokens": 43706496}
|
|
{"current_steps": 13885, "total_steps": 15621, "loss": 0.3097, "lr": 7.439816008707877e-08, "epoch": 0.8888675500928238, "percentage": 88.89, "elapsed_time": "1:30:50", "remaining_time": "0:11:21", "throughput": 8021.31, "total_tokens": 43721408}
|
|
{"current_steps": 13890, "total_steps": 15621, "loss": 0.3397, "lr": 7.397581559355748e-08, "epoch": 0.8891876320338007, "percentage": 88.92, "elapsed_time": "1:30:51", "remaining_time": "0:11:19", "throughput": 8023.27, "total_tokens": 43737536}
|
|
{"current_steps": 13895, "total_steps": 15621, "loss": 0.4171, "lr": 7.355462725210315e-08, "epoch": 0.8895077139747776, "percentage": 88.95, "elapsed_time": "1:30:51", "remaining_time": "0:11:17", "throughput": 8025.08, "total_tokens": 43752640}
|
|
{"current_steps": 13900, "total_steps": 15621, "loss": 0.4097, "lr": 7.313459558857438e-08, "epoch": 0.8898277959157544, "percentage": 88.98, "elapsed_time": "1:30:52", "remaining_time": "0:11:15", "throughput": 8026.97, "total_tokens": 43768384}
|
|
{"current_steps": 13905, "total_steps": 15621, "loss": 0.3141, "lr": 7.271572112738566e-08, "epoch": 0.8901478778567313, "percentage": 89.01, "elapsed_time": "1:30:53", "remaining_time": "0:11:12", "throughput": 8028.89, "total_tokens": 43784320}
|
|
{"current_steps": 13910, "total_steps": 15621, "loss": 0.3635, "lr": 7.229800439150657e-08, "epoch": 0.8904679597977082, "percentage": 89.05, "elapsed_time": "1:30:53", "remaining_time": "0:11:10", "throughput": 8030.67, "total_tokens": 43799232}
|
|
{"current_steps": 13915, "total_steps": 15621, "loss": 0.3806, "lr": 7.188144590246148e-08, "epoch": 0.8907880417386851, "percentage": 89.08, "elapsed_time": "1:30:54", "remaining_time": "0:11:08", "throughput": 8032.63, "total_tokens": 43815360}
|
|
{"current_steps": 13920, "total_steps": 15621, "loss": 0.3317, "lr": 7.146604618032848e-08, "epoch": 0.891108123679662, "percentage": 89.11, "elapsed_time": "1:30:55", "remaining_time": "0:11:06", "throughput": 8034.4, "total_tokens": 43830336}
|
|
{"current_steps": 13925, "total_steps": 15621, "loss": 0.4062, "lr": 7.105180574373904e-08, "epoch": 0.8914282056206388, "percentage": 89.14, "elapsed_time": "1:30:56", "remaining_time": "0:11:04", "throughput": 8036.38, "total_tokens": 43846656}
|
|
{"current_steps": 13930, "total_steps": 15621, "loss": 0.3279, "lr": 7.063872510987712e-08, "epoch": 0.8917482875616157, "percentage": 89.17, "elapsed_time": "1:30:56", "remaining_time": "0:11:02", "throughput": 8038.31, "total_tokens": 43862720}
|
|
{"current_steps": 13935, "total_steps": 15621, "loss": 0.3541, "lr": 7.022680479447874e-08, "epoch": 0.8920683695025927, "percentage": 89.21, "elapsed_time": "1:30:57", "remaining_time": "0:11:00", "throughput": 8039.97, "total_tokens": 43876800}
|
|
{"current_steps": 13940, "total_steps": 15621, "loss": 0.3046, "lr": 6.98160453118316e-08, "epoch": 0.8923884514435696, "percentage": 89.24, "elapsed_time": "1:30:57", "remaining_time": "0:10:58", "throughput": 8041.81, "total_tokens": 43892160}
|
|
{"current_steps": 13945, "total_steps": 15621, "loss": 0.3444, "lr": 6.940644717477328e-08, "epoch": 0.8927085333845465, "percentage": 89.27, "elapsed_time": "1:30:58", "remaining_time": "0:10:56", "throughput": 8043.77, "total_tokens": 43908416}
|
|
{"current_steps": 13950, "total_steps": 15621, "loss": 0.4553, "lr": 6.899801089469204e-08, "epoch": 0.8930286153255234, "percentage": 89.3, "elapsed_time": "1:30:59", "remaining_time": "0:10:53", "throughput": 8045.59, "total_tokens": 43923712}
|
|
{"current_steps": 13955, "total_steps": 15621, "loss": 0.3491, "lr": 6.85907369815254e-08, "epoch": 0.8933486972665002, "percentage": 89.33, "elapsed_time": "1:31:00", "remaining_time": "0:10:51", "throughput": 8047.49, "total_tokens": 43939520}
|
|
{"current_steps": 13960, "total_steps": 15621, "loss": 0.3771, "lr": 6.81846259437595e-08, "epoch": 0.8936687792074771, "percentage": 89.37, "elapsed_time": "1:31:00", "remaining_time": "0:10:49", "throughput": 8049.29, "total_tokens": 43954688}
|
|
{"current_steps": 13965, "total_steps": 15621, "loss": 0.3246, "lr": 6.77796782884289e-08, "epoch": 0.893988861148454, "percentage": 89.4, "elapsed_time": "1:31:01", "remaining_time": "0:10:47", "throughput": 8051.07, "total_tokens": 43969600}
|
|
{"current_steps": 13970, "total_steps": 15621, "loss": 0.3885, "lr": 6.737589452111526e-08, "epoch": 0.8943089430894309, "percentage": 89.43, "elapsed_time": "1:31:02", "remaining_time": "0:10:45", "throughput": 8052.97, "total_tokens": 43985472}
|
|
{"current_steps": 13975, "total_steps": 15621, "loss": 0.4012, "lr": 6.697327514594786e-08, "epoch": 0.8946290250304078, "percentage": 89.46, "elapsed_time": "1:31:02", "remaining_time": "0:10:43", "throughput": 8054.8, "total_tokens": 44000768}
|
|
{"current_steps": 13980, "total_steps": 15621, "loss": 0.4538, "lr": 6.657182066560118e-08, "epoch": 0.8949491069713846, "percentage": 89.49, "elapsed_time": "1:31:03", "remaining_time": "0:10:41", "throughput": 8056.76, "total_tokens": 44017088}
|
|
{"current_steps": 13985, "total_steps": 15621, "loss": 0.3715, "lr": 6.617153158129596e-08, "epoch": 0.8952691889123615, "percentage": 89.53, "elapsed_time": "1:31:04", "remaining_time": "0:10:39", "throughput": 8058.46, "total_tokens": 44031488}
|
|
{"current_steps": 13990, "total_steps": 15621, "loss": 0.3356, "lr": 6.577240839279807e-08, "epoch": 0.8955892708533385, "percentage": 89.56, "elapsed_time": "1:31:04", "remaining_time": "0:10:37", "throughput": 8060.36, "total_tokens": 44047296}
|
|
{"current_steps": 13995, "total_steps": 15621, "loss": 0.3162, "lr": 6.537445159841748e-08, "epoch": 0.8959093527943154, "percentage": 89.59, "elapsed_time": "1:31:05", "remaining_time": "0:10:34", "throughput": 8062.33, "total_tokens": 44063744}
|
|
{"current_steps": 14000, "total_steps": 15621, "loss": 0.3898, "lr": 6.497766169500752e-08, "epoch": 0.8962294347352923, "percentage": 89.62, "elapsed_time": "1:31:06", "remaining_time": "0:10:32", "throughput": 8064.19, "total_tokens": 44079168}
|
|
{"current_steps": 14005, "total_steps": 15621, "loss": 0.2716, "lr": 6.458203917796546e-08, "epoch": 0.8965495166762691, "percentage": 89.65, "elapsed_time": "1:31:06", "remaining_time": "0:10:30", "throughput": 8065.92, "total_tokens": 44093824}
|
|
{"current_steps": 14010, "total_steps": 15621, "loss": 0.4511, "lr": 6.418758454123041e-08, "epoch": 0.896869598617246, "percentage": 89.69, "elapsed_time": "1:31:07", "remaining_time": "0:10:28", "throughput": 8068.04, "total_tokens": 44111296}
|
|
{"current_steps": 14015, "total_steps": 15621, "loss": 0.3912, "lr": 6.379429827728377e-08, "epoch": 0.8971896805582229, "percentage": 89.72, "elapsed_time": "1:31:08", "remaining_time": "0:10:26", "throughput": 8070.07, "total_tokens": 44128000}
|
|
{"current_steps": 14020, "total_steps": 15621, "loss": 0.3795, "lr": 6.340218087714799e-08, "epoch": 0.8975097624991998, "percentage": 89.75, "elapsed_time": "1:31:08", "remaining_time": "0:10:24", "throughput": 8071.92, "total_tokens": 44143488}
|
|
{"current_steps": 14025, "total_steps": 15621, "loss": 0.347, "lr": 6.301123283038634e-08, "epoch": 0.8978298444401767, "percentage": 89.78, "elapsed_time": "1:31:09", "remaining_time": "0:10:22", "throughput": 8073.77, "total_tokens": 44158976}
|
|
{"current_steps": 14030, "total_steps": 15621, "loss": 0.3207, "lr": 6.262145462510193e-08, "epoch": 0.8981499263811535, "percentage": 89.81, "elapsed_time": "1:31:10", "remaining_time": "0:10:20", "throughput": 8075.82, "total_tokens": 44175808}
|
|
{"current_steps": 14035, "total_steps": 15621, "loss": 0.2917, "lr": 6.223284674793738e-08, "epoch": 0.8984700083221304, "percentage": 89.85, "elapsed_time": "1:31:10", "remaining_time": "0:10:18", "throughput": 8077.53, "total_tokens": 44190336}
|
|
{"current_steps": 14040, "total_steps": 15621, "loss": 0.39, "lr": 6.184540968407437e-08, "epoch": 0.8987900902631074, "percentage": 89.88, "elapsed_time": "1:31:11", "remaining_time": "0:10:16", "throughput": 8079.36, "total_tokens": 44205696}
|
|
{"current_steps": 14045, "total_steps": 15621, "loss": 0.3515, "lr": 6.145914391723239e-08, "epoch": 0.8991101722040843, "percentage": 89.91, "elapsed_time": "1:31:12", "remaining_time": "0:10:14", "throughput": 8081.33, "total_tokens": 44222016}
|
|
{"current_steps": 14050, "total_steps": 15621, "loss": 0.327, "lr": 6.107404992966902e-08, "epoch": 0.8994302541450612, "percentage": 89.94, "elapsed_time": "1:31:12", "remaining_time": "0:10:11", "throughput": 8083.32, "total_tokens": 44238592}
|
|
{"current_steps": 14055, "total_steps": 15621, "loss": 0.2489, "lr": 6.069012820217856e-08, "epoch": 0.899750336086038, "percentage": 89.98, "elapsed_time": "1:31:13", "remaining_time": "0:10:09", "throughput": 8085.16, "total_tokens": 44254016}
|
|
{"current_steps": 14060, "total_steps": 15621, "loss": 0.3843, "lr": 6.030737921409168e-08, "epoch": 0.9000704180270149, "percentage": 90.01, "elapsed_time": "1:31:14", "remaining_time": "0:10:07", "throughput": 8087.0, "total_tokens": 44269376}
|
|
{"current_steps": 14065, "total_steps": 15621, "loss": 0.4579, "lr": 5.992580344327503e-08, "epoch": 0.9003904999679918, "percentage": 90.04, "elapsed_time": "1:31:14", "remaining_time": "0:10:05", "throughput": 8088.81, "total_tokens": 44284672}
|
|
{"current_steps": 14070, "total_steps": 15621, "loss": 0.346, "lr": 5.954540136613051e-08, "epoch": 0.9007105819089687, "percentage": 90.07, "elapsed_time": "1:31:15", "remaining_time": "0:10:03", "throughput": 8090.67, "total_tokens": 44300224}
|
|
{"current_steps": 14075, "total_steps": 15621, "loss": 0.3511, "lr": 5.916617345759456e-08, "epoch": 0.9010306638499456, "percentage": 90.1, "elapsed_time": "1:31:16", "remaining_time": "0:10:01", "throughput": 8092.45, "total_tokens": 44315264}
|
|
{"current_steps": 14076, "total_steps": 15621, "eval_loss": 0.35641103982925415, "epoch": 0.901094680238141, "percentage": 90.11, "elapsed_time": "1:32:06", "remaining_time": "0:10:06", "throughput": 8018.83, "total_tokens": 44318848}
|
|
{"current_steps": 14080, "total_steps": 15621, "loss": 0.4212, "lr": 5.878812019113766e-08, "epoch": 0.9013507457909224, "percentage": 90.14, "elapsed_time": "1:32:40", "remaining_time": "0:10:08", "throughput": 7971.94, "total_tokens": 44330176}
|
|
{"current_steps": 14085, "total_steps": 15621, "loss": 0.3065, "lr": 5.84112420387638e-08, "epoch": 0.9016708277318993, "percentage": 90.17, "elapsed_time": "1:32:41", "remaining_time": "0:10:06", "throughput": 7973.7, "total_tokens": 44345152}
|
|
{"current_steps": 14090, "total_steps": 15621, "loss": 0.3625, "lr": 5.8035539471009697e-08, "epoch": 0.9019909096728762, "percentage": 90.2, "elapsed_time": "1:32:42", "remaining_time": "0:10:04", "throughput": 7975.59, "total_tokens": 44361152}
|
|
{"current_steps": 14095, "total_steps": 15621, "loss": 0.4095, "lr": 5.7661012956944253e-08, "epoch": 0.9023109916138532, "percentage": 90.23, "elapsed_time": "1:32:42", "remaining_time": "0:10:02", "throughput": 7977.36, "total_tokens": 44376128}
|
|
{"current_steps": 14100, "total_steps": 15621, "loss": 0.2917, "lr": 5.728766296416876e-08, "epoch": 0.9026310735548301, "percentage": 90.26, "elapsed_time": "1:32:43", "remaining_time": "0:10:00", "throughput": 7979.28, "total_tokens": 44392192}
|
|
{"current_steps": 14105, "total_steps": 15621, "loss": 0.4205, "lr": 5.6915489958814453e-08, "epoch": 0.902951155495807, "percentage": 90.3, "elapsed_time": "1:32:44", "remaining_time": "0:09:58", "throughput": 7981.11, "total_tokens": 44407680}
|
|
{"current_steps": 14110, "total_steps": 15621, "loss": 0.4106, "lr": 5.654449440554399e-08, "epoch": 0.9032712374367838, "percentage": 90.33, "elapsed_time": "1:32:44", "remaining_time": "0:09:55", "throughput": 7983.11, "total_tokens": 44424384}
|
|
{"current_steps": 14115, "total_steps": 15621, "loss": 0.3803, "lr": 5.617467676754972e-08, "epoch": 0.9035913193777607, "percentage": 90.36, "elapsed_time": "1:32:45", "remaining_time": "0:09:53", "throughput": 7984.91, "total_tokens": 44439744}
|
|
{"current_steps": 14120, "total_steps": 15621, "loss": 0.296, "lr": 5.580603750655344e-08, "epoch": 0.9039114013187376, "percentage": 90.39, "elapsed_time": "1:32:46", "remaining_time": "0:09:51", "throughput": 7986.61, "total_tokens": 44454272}
|
|
{"current_steps": 14125, "total_steps": 15621, "loss": 0.3739, "lr": 5.543857708280497e-08, "epoch": 0.9042314832597145, "percentage": 90.42, "elapsed_time": "1:32:46", "remaining_time": "0:09:49", "throughput": 7988.34, "total_tokens": 44468992}
|
|
{"current_steps": 14130, "total_steps": 15621, "loss": 0.4703, "lr": 5.507229595508367e-08, "epoch": 0.9045515652006914, "percentage": 90.46, "elapsed_time": "1:32:47", "remaining_time": "0:09:47", "throughput": 7990.22, "total_tokens": 44484864}
|
|
{"current_steps": 14135, "total_steps": 15621, "loss": 0.2887, "lr": 5.4707194580695504e-08, "epoch": 0.9048716471416682, "percentage": 90.49, "elapsed_time": "1:32:48", "remaining_time": "0:09:45", "throughput": 7991.99, "total_tokens": 44499968}
|
|
{"current_steps": 14140, "total_steps": 15621, "loss": 0.4279, "lr": 5.4343273415473846e-08, "epoch": 0.9051917290826451, "percentage": 90.52, "elapsed_time": "1:32:48", "remaining_time": "0:09:43", "throughput": 7994.15, "total_tokens": 44517952}
|
|
{"current_steps": 14145, "total_steps": 15621, "loss": 0.3413, "lr": 5.3980532913778576e-08, "epoch": 0.905511811023622, "percentage": 90.55, "elapsed_time": "1:32:49", "remaining_time": "0:09:41", "throughput": 7995.91, "total_tokens": 44532928}
|
|
{"current_steps": 14150, "total_steps": 15621, "loss": 0.3928, "lr": 5.361897352849554e-08, "epoch": 0.905831892964599, "percentage": 90.58, "elapsed_time": "1:32:50", "remaining_time": "0:09:39", "throughput": 7997.72, "total_tokens": 44548288}
|
|
{"current_steps": 14155, "total_steps": 15621, "loss": 0.3204, "lr": 5.325859571103586e-08, "epoch": 0.9061519749055759, "percentage": 90.62, "elapsed_time": "1:32:50", "remaining_time": "0:09:36", "throughput": 7999.53, "total_tokens": 44563712}
|
|
{"current_steps": 14160, "total_steps": 15621, "loss": 0.3376, "lr": 5.289939991133508e-08, "epoch": 0.9064720568465527, "percentage": 90.65, "elapsed_time": "1:32:51", "remaining_time": "0:09:34", "throughput": 8001.36, "total_tokens": 44579264}
|
|
{"current_steps": 14165, "total_steps": 15621, "loss": 0.2387, "lr": 5.2541386577853895e-08, "epoch": 0.9067921387875296, "percentage": 90.68, "elapsed_time": "1:32:52", "remaining_time": "0:09:32", "throughput": 8003.11, "total_tokens": 44594176}
|
|
{"current_steps": 14170, "total_steps": 15621, "loss": 0.2536, "lr": 5.2184556157576e-08, "epoch": 0.9071122207285065, "percentage": 90.71, "elapsed_time": "1:32:52", "remaining_time": "0:09:30", "throughput": 8004.93, "total_tokens": 44609664}
|
|
{"current_steps": 14175, "total_steps": 15621, "loss": 0.3807, "lr": 5.1828909096008234e-08, "epoch": 0.9074323026694834, "percentage": 90.74, "elapsed_time": "1:32:53", "remaining_time": "0:09:28", "throughput": 8007.0, "total_tokens": 44626944}
|
|
{"current_steps": 14180, "total_steps": 15621, "loss": 0.2294, "lr": 5.14744458371803e-08, "epoch": 0.9077523846104603, "percentage": 90.78, "elapsed_time": "1:32:54", "remaining_time": "0:09:26", "throughput": 8008.97, "total_tokens": 44643520}
|
|
{"current_steps": 14185, "total_steps": 15621, "loss": 0.4922, "lr": 5.1121166823643646e-08, "epoch": 0.9080724665514371, "percentage": 90.81, "elapsed_time": "1:32:54", "remaining_time": "0:09:24", "throughput": 8010.65, "total_tokens": 44657984}
|
|
{"current_steps": 14190, "total_steps": 15621, "loss": 0.3841, "lr": 5.076907249647122e-08, "epoch": 0.908392548492414, "percentage": 90.84, "elapsed_time": "1:32:55", "remaining_time": "0:09:22", "throughput": 8012.41, "total_tokens": 44673024}
|
|
{"current_steps": 14195, "total_steps": 15621, "loss": 0.4111, "lr": 5.0418163295257055e-08, "epoch": 0.9087126304333909, "percentage": 90.87, "elapsed_time": "1:32:56", "remaining_time": "0:09:20", "throughput": 8014.07, "total_tokens": 44687424}
|
|
{"current_steps": 14200, "total_steps": 15621, "loss": 0.2901, "lr": 5.006843965811536e-08, "epoch": 0.9090327123743679, "percentage": 90.9, "elapsed_time": "1:32:56", "remaining_time": "0:09:18", "throughput": 8015.9, "total_tokens": 44702976}
|
|
{"current_steps": 14205, "total_steps": 15621, "loss": 0.4813, "lr": 4.971990202168008e-08, "epoch": 0.9093527943153448, "percentage": 90.94, "elapsed_time": "1:32:57", "remaining_time": "0:09:15", "throughput": 8017.69, "total_tokens": 44718144}
|
|
{"current_steps": 14210, "total_steps": 15621, "loss": 0.3209, "lr": 4.9372550821104697e-08, "epoch": 0.9096728762563216, "percentage": 90.97, "elapsed_time": "1:32:58", "remaining_time": "0:09:13", "throughput": 8019.68, "total_tokens": 44734912}
|
|
{"current_steps": 14215, "total_steps": 15621, "loss": 0.3205, "lr": 4.902638649006119e-08, "epoch": 0.9099929581972985, "percentage": 91.0, "elapsed_time": "1:32:58", "remaining_time": "0:09:11", "throughput": 8021.44, "total_tokens": 44749888}
|
|
{"current_steps": 14220, "total_steps": 15621, "loss": 0.3289, "lr": 4.868140946073973e-08, "epoch": 0.9103130401382754, "percentage": 91.03, "elapsed_time": "1:32:59", "remaining_time": "0:09:09", "throughput": 8023.14, "total_tokens": 44764544}
|
|
{"current_steps": 14225, "total_steps": 15621, "loss": 0.3017, "lr": 4.833762016384857e-08, "epoch": 0.9106331220792523, "percentage": 91.06, "elapsed_time": "1:33:00", "remaining_time": "0:09:07", "throughput": 8025.08, "total_tokens": 44780992}
|
|
{"current_steps": 14230, "total_steps": 15621, "loss": 0.3869, "lr": 4.799501902861214e-08, "epoch": 0.9109532040202292, "percentage": 91.1, "elapsed_time": "1:33:00", "remaining_time": "0:09:05", "throughput": 8026.91, "total_tokens": 44796672}
|
|
{"current_steps": 14235, "total_steps": 15621, "loss": 0.4287, "lr": 4.765360648277217e-08, "epoch": 0.911273285961206, "percentage": 91.13, "elapsed_time": "1:33:01", "remaining_time": "0:09:03", "throughput": 8028.73, "total_tokens": 44812224}
|
|
{"current_steps": 14240, "total_steps": 15621, "loss": 0.4228, "lr": 4.7313382952586465e-08, "epoch": 0.9115933679021829, "percentage": 91.16, "elapsed_time": "1:33:02", "remaining_time": "0:09:01", "throughput": 8030.47, "total_tokens": 44827136}
|
|
{"current_steps": 14245, "total_steps": 15621, "loss": 0.3649, "lr": 4.6974348862828027e-08, "epoch": 0.9119134498431598, "percentage": 91.19, "elapsed_time": "1:33:02", "remaining_time": "0:08:59", "throughput": 8032.22, "total_tokens": 44842176}
|
|
{"current_steps": 14250, "total_steps": 15621, "loss": 0.4412, "lr": 4.663650463678448e-08, "epoch": 0.9122335317841367, "percentage": 91.22, "elapsed_time": "1:33:03", "remaining_time": "0:08:57", "throughput": 8034.19, "total_tokens": 44858880}
|
|
{"current_steps": 14255, "total_steps": 15621, "loss": 0.4434, "lr": 4.629985069625875e-08, "epoch": 0.9125536137251137, "percentage": 91.26, "elapsed_time": "1:33:04", "remaining_time": "0:08:55", "throughput": 8036.15, "total_tokens": 44875328}
|
|
{"current_steps": 14260, "total_steps": 15621, "loss": 0.3751, "lr": 4.596438746156728e-08, "epoch": 0.9128736956660906, "percentage": 91.29, "elapsed_time": "1:33:04", "remaining_time": "0:08:53", "throughput": 8038.13, "total_tokens": 44892032}
|
|
{"current_steps": 14265, "total_steps": 15621, "loss": 0.36, "lr": 4.563011535153949e-08, "epoch": 0.9131937776070674, "percentage": 91.32, "elapsed_time": "1:33:05", "remaining_time": "0:08:50", "throughput": 8039.91, "total_tokens": 44907328}
|
|
{"current_steps": 14270, "total_steps": 15621, "loss": 0.2689, "lr": 4.52970347835181e-08, "epoch": 0.9135138595480443, "percentage": 91.35, "elapsed_time": "1:33:06", "remaining_time": "0:08:48", "throughput": 8041.68, "total_tokens": 44922560}
|
|
{"current_steps": 14275, "total_steps": 15621, "loss": 0.327, "lr": 4.496514617335845e-08, "epoch": 0.9138339414890212, "percentage": 91.38, "elapsed_time": "1:33:06", "remaining_time": "0:08:46", "throughput": 8043.45, "total_tokens": 44937728}
|
|
{"current_steps": 14280, "total_steps": 15621, "loss": 0.3603, "lr": 4.4634449935427197e-08, "epoch": 0.9141540234299981, "percentage": 91.42, "elapsed_time": "1:33:07", "remaining_time": "0:08:44", "throughput": 8045.43, "total_tokens": 44954560}
|
|
{"current_steps": 14285, "total_steps": 15621, "loss": 0.3096, "lr": 4.430494648260219e-08, "epoch": 0.914474105370975, "percentage": 91.45, "elapsed_time": "1:33:08", "remaining_time": "0:08:42", "throughput": 8047.43, "total_tokens": 44971520}
|
|
{"current_steps": 14290, "total_steps": 15621, "loss": 0.4524, "lr": 4.397663622627279e-08, "epoch": 0.9147941873119518, "percentage": 91.48, "elapsed_time": "1:33:08", "remaining_time": "0:08:40", "throughput": 8049.29, "total_tokens": 44987392}
|
|
{"current_steps": 14295, "total_steps": 15621, "loss": 0.3122, "lr": 4.364951957633789e-08, "epoch": 0.9151142692529287, "percentage": 91.51, "elapsed_time": "1:33:09", "remaining_time": "0:08:38", "throughput": 8051.07, "total_tokens": 45002688}
|
|
{"current_steps": 14300, "total_steps": 15621, "loss": 0.2953, "lr": 4.332359694120669e-08, "epoch": 0.9154343511939056, "percentage": 91.54, "elapsed_time": "1:33:10", "remaining_time": "0:08:36", "throughput": 8052.82, "total_tokens": 45017792}
|
|
{"current_steps": 14305, "total_steps": 15621, "loss": 0.3571, "lr": 4.299886872779734e-08, "epoch": 0.9157544331348826, "percentage": 91.58, "elapsed_time": "1:33:10", "remaining_time": "0:08:34", "throughput": 8054.53, "total_tokens": 45032640}
|
|
{"current_steps": 14310, "total_steps": 15621, "loss": 0.2975, "lr": 4.267533534153678e-08, "epoch": 0.9160745150758595, "percentage": 91.61, "elapsed_time": "1:33:11", "remaining_time": "0:08:32", "throughput": 8056.36, "total_tokens": 45048256}
|
|
{"current_steps": 14315, "total_steps": 15621, "loss": 0.3218, "lr": 4.2352997186360316e-08, "epoch": 0.9163945970168363, "percentage": 91.64, "elapsed_time": "1:33:12", "remaining_time": "0:08:30", "throughput": 8058.23, "total_tokens": 45064192}
|
|
{"current_steps": 14320, "total_steps": 15621, "loss": 0.3243, "lr": 4.203185466471082e-08, "epoch": 0.9167146789578132, "percentage": 91.67, "elapsed_time": "1:33:12", "remaining_time": "0:08:28", "throughput": 8059.99, "total_tokens": 45079488}
|
|
{"current_steps": 14325, "total_steps": 15621, "loss": 0.3984, "lr": 4.1711908177538556e-08, "epoch": 0.9170347608987901, "percentage": 91.7, "elapsed_time": "1:33:13", "remaining_time": "0:08:26", "throughput": 8061.88, "total_tokens": 45095616}
|
|
{"current_steps": 14330, "total_steps": 15621, "loss": 0.378, "lr": 4.139315812430055e-08, "epoch": 0.917354842839767, "percentage": 91.74, "elapsed_time": "1:33:14", "remaining_time": "0:08:23", "throughput": 8063.61, "total_tokens": 45110592}
|
|
{"current_steps": 14335, "total_steps": 15621, "loss": 0.3863, "lr": 4.1075604902959915e-08, "epoch": 0.9176749247807439, "percentage": 91.77, "elapsed_time": "1:33:15", "remaining_time": "0:08:21", "throughput": 8065.56, "total_tokens": 45127168}
|
|
{"current_steps": 14340, "total_steps": 15621, "loss": 0.3137, "lr": 4.07592489099855e-08, "epoch": 0.9179950067217207, "percentage": 91.8, "elapsed_time": "1:33:15", "remaining_time": "0:08:19", "throughput": 8067.3, "total_tokens": 45142208}
|
|
{"current_steps": 14345, "total_steps": 15621, "loss": 0.3934, "lr": 4.044409054035147e-08, "epoch": 0.9183150886626976, "percentage": 91.83, "elapsed_time": "1:33:16", "remaining_time": "0:08:17", "throughput": 8069.04, "total_tokens": 45157184}
|
|
{"current_steps": 14350, "total_steps": 15621, "loss": 0.3929, "lr": 4.0130130187537195e-08, "epoch": 0.9186351706036745, "percentage": 91.86, "elapsed_time": "1:33:17", "remaining_time": "0:08:15", "throughput": 8071.1, "total_tokens": 45174464}
|
|
{"current_steps": 14355, "total_steps": 15621, "loss": 0.3149, "lr": 3.981736824352522e-08, "epoch": 0.9189552525446514, "percentage": 91.9, "elapsed_time": "1:33:17", "remaining_time": "0:08:13", "throughput": 8072.77, "total_tokens": 45188992}
|
|
{"current_steps": 14360, "total_steps": 15621, "loss": 0.4703, "lr": 3.950580509880286e-08, "epoch": 0.9192753344856284, "percentage": 91.93, "elapsed_time": "1:33:18", "remaining_time": "0:08:11", "throughput": 8074.5, "total_tokens": 45204032}
|
|
{"current_steps": 14365, "total_steps": 15621, "loss": 0.3999, "lr": 3.9195441142360066e-08, "epoch": 0.9195954164266052, "percentage": 91.96, "elapsed_time": "1:33:19", "remaining_time": "0:08:09", "throughput": 8076.26, "total_tokens": 45219328}
|
|
{"current_steps": 14370, "total_steps": 15621, "loss": 0.321, "lr": 3.888627676169043e-08, "epoch": 0.9199154983675821, "percentage": 91.99, "elapsed_time": "1:33:19", "remaining_time": "0:08:07", "throughput": 8078.17, "total_tokens": 45235584}
|
|
{"current_steps": 14375, "total_steps": 15621, "loss": 0.3666, "lr": 3.857831234278886e-08, "epoch": 0.920235580308559, "percentage": 92.02, "elapsed_time": "1:33:20", "remaining_time": "0:08:05", "throughput": 8079.94, "total_tokens": 45250880}
|
|
{"current_steps": 14380, "total_steps": 15621, "loss": 0.4145, "lr": 3.827154827015255e-08, "epoch": 0.9205556622495359, "percentage": 92.06, "elapsed_time": "1:33:21", "remaining_time": "0:08:03", "throughput": 8081.79, "total_tokens": 45266752}
|
|
{"current_steps": 14385, "total_steps": 15621, "loss": 0.285, "lr": 3.7965984926780383e-08, "epoch": 0.9208757441905128, "percentage": 92.09, "elapsed_time": "1:33:21", "remaining_time": "0:08:01", "throughput": 8083.62, "total_tokens": 45282496}
|
|
{"current_steps": 14390, "total_steps": 15621, "loss": 0.3521, "lr": 3.766162269417139e-08, "epoch": 0.9211958261314896, "percentage": 92.12, "elapsed_time": "1:33:22", "remaining_time": "0:07:59", "throughput": 8085.28, "total_tokens": 45297024}
|
|
{"current_steps": 14395, "total_steps": 15621, "loss": 0.3723, "lr": 3.73584619523255e-08, "epoch": 0.9215159080724665, "percentage": 92.15, "elapsed_time": "1:33:23", "remaining_time": "0:07:57", "throughput": 8087.3, "total_tokens": 45314176}
|
|
{"current_steps": 14400, "total_steps": 15621, "loss": 0.352, "lr": 3.7056503079742616e-08, "epoch": 0.9218359900134434, "percentage": 92.18, "elapsed_time": "1:33:23", "remaining_time": "0:07:55", "throughput": 8089.05, "total_tokens": 45329344}
|
|
{"current_steps": 14405, "total_steps": 15621, "loss": 0.3452, "lr": 3.6755746453421945e-08, "epoch": 0.9221560719544203, "percentage": 92.22, "elapsed_time": "1:33:24", "remaining_time": "0:07:53", "throughput": 8090.8, "total_tokens": 45344384}
|
|
{"current_steps": 14410, "total_steps": 15621, "loss": 0.2969, "lr": 3.645619244886145e-08, "epoch": 0.9224761538953972, "percentage": 92.25, "elapsed_time": "1:33:25", "remaining_time": "0:07:51", "throughput": 8092.63, "total_tokens": 45360192}
|
|
{"current_steps": 14415, "total_steps": 15621, "loss": 0.3147, "lr": 3.615784144005796e-08, "epoch": 0.9227962358363742, "percentage": 92.28, "elapsed_time": "1:33:25", "remaining_time": "0:07:48", "throughput": 8094.46, "total_tokens": 45376000}
|
|
{"current_steps": 14420, "total_steps": 15621, "loss": 0.4197, "lr": 3.5860693799506184e-08, "epoch": 0.923116317777351, "percentage": 92.31, "elapsed_time": "1:33:26", "remaining_time": "0:07:46", "throughput": 8096.11, "total_tokens": 45390400}
|
|
{"current_steps": 14425, "total_steps": 15621, "loss": 0.4608, "lr": 3.5564749898198466e-08, "epoch": 0.9234363997183279, "percentage": 92.34, "elapsed_time": "1:33:27", "remaining_time": "0:07:44", "throughput": 8098.06, "total_tokens": 45406976}
|
|
{"current_steps": 14430, "total_steps": 15621, "loss": 0.3533, "lr": 3.527001010562425e-08, "epoch": 0.9237564816593048, "percentage": 92.38, "elapsed_time": "1:33:27", "remaining_time": "0:07:42", "throughput": 8099.8, "total_tokens": 45422080}
|
|
{"current_steps": 14435, "total_steps": 15621, "loss": 0.3585, "lr": 3.4976474789769504e-08, "epoch": 0.9240765636002817, "percentage": 92.41, "elapsed_time": "1:33:28", "remaining_time": "0:07:40", "throughput": 8101.84, "total_tokens": 45439296}
|
|
{"current_steps": 14440, "total_steps": 15621, "loss": 0.2994, "lr": 3.4684144317116636e-08, "epoch": 0.9243966455412586, "percentage": 92.44, "elapsed_time": "1:33:29", "remaining_time": "0:07:38", "throughput": 8103.57, "total_tokens": 45454208}
|
|
{"current_steps": 14445, "total_steps": 15621, "loss": 0.3015, "lr": 3.439301905264369e-08, "epoch": 0.9247167274822354, "percentage": 92.47, "elapsed_time": "1:33:29", "remaining_time": "0:07:36", "throughput": 8105.46, "total_tokens": 45470400}
|
|
{"current_steps": 14450, "total_steps": 15621, "loss": 0.324, "lr": 3.410309935982403e-08, "epoch": 0.9250368094232123, "percentage": 92.5, "elapsed_time": "1:33:30", "remaining_time": "0:07:34", "throughput": 8107.34, "total_tokens": 45486528}
|
|
{"current_steps": 14455, "total_steps": 15621, "loss": 0.3488, "lr": 3.381438560062555e-08, "epoch": 0.9253568913641892, "percentage": 92.54, "elapsed_time": "1:33:31", "remaining_time": "0:07:32", "throughput": 8109.06, "total_tokens": 45501440}
|
|
{"current_steps": 14460, "total_steps": 15621, "loss": 0.3167, "lr": 3.3526878135511025e-08, "epoch": 0.9256769733051661, "percentage": 92.57, "elapsed_time": "1:33:31", "remaining_time": "0:07:30", "throughput": 8110.97, "total_tokens": 45517760}
|
|
{"current_steps": 14465, "total_steps": 15621, "loss": 0.3751, "lr": 3.324057732343666e-08, "epoch": 0.9259970552461431, "percentage": 92.6, "elapsed_time": "1:33:32", "remaining_time": "0:07:28", "throughput": 8112.73, "total_tokens": 45533056}
|
|
{"current_steps": 14470, "total_steps": 15621, "loss": 0.421, "lr": 3.295548352185262e-08, "epoch": 0.9263171371871199, "percentage": 92.63, "elapsed_time": "1:33:33", "remaining_time": "0:07:26", "throughput": 8114.62, "total_tokens": 45549248}
|
|
{"current_steps": 14475, "total_steps": 15621, "loss": 0.3503, "lr": 3.2671597086701753e-08, "epoch": 0.9266372191280968, "percentage": 92.66, "elapsed_time": "1:33:33", "remaining_time": "0:07:24", "throughput": 8116.56, "total_tokens": 45565760}
|
|
{"current_steps": 14480, "total_steps": 15621, "loss": 0.3294, "lr": 3.238891837241964e-08, "epoch": 0.9269573010690737, "percentage": 92.7, "elapsed_time": "1:33:34", "remaining_time": "0:07:22", "throughput": 8118.4, "total_tokens": 45581568}
|
|
{"current_steps": 14485, "total_steps": 15621, "loss": 0.4179, "lr": 3.210744773193386e-08, "epoch": 0.9272773830100506, "percentage": 92.73, "elapsed_time": "1:33:35", "remaining_time": "0:07:20", "throughput": 8120.18, "total_tokens": 45596928}
|
|
{"current_steps": 14490, "total_steps": 15621, "loss": 0.3016, "lr": 3.182718551666386e-08, "epoch": 0.9275974649510275, "percentage": 92.76, "elapsed_time": "1:33:35", "remaining_time": "0:07:18", "throughput": 8122.03, "total_tokens": 45612800}
|
|
{"current_steps": 14495, "total_steps": 15621, "loss": 0.415, "lr": 3.154813207652063e-08, "epoch": 0.9279175468920043, "percentage": 92.79, "elapsed_time": "1:33:36", "remaining_time": "0:07:16", "throughput": 8123.74, "total_tokens": 45627584}
|
|
{"current_steps": 14500, "total_steps": 15621, "loss": 0.3294, "lr": 3.1270287759905143e-08, "epoch": 0.9282376288329812, "percentage": 92.82, "elapsed_time": "1:33:37", "remaining_time": "0:07:14", "throughput": 8125.63, "total_tokens": 45643840}
|
|
{"current_steps": 14505, "total_steps": 15621, "loss": 0.2947, "lr": 3.0993652913709476e-08, "epoch": 0.9285577107739581, "percentage": 92.86, "elapsed_time": "1:33:37", "remaining_time": "0:07:12", "throughput": 8127.39, "total_tokens": 45659072}
|
|
{"current_steps": 14510, "total_steps": 15621, "loss": 0.4243, "lr": 3.0718227883315796e-08, "epoch": 0.928877792714935, "percentage": 92.89, "elapsed_time": "1:33:38", "remaining_time": "0:07:10", "throughput": 8129.29, "total_tokens": 45675328}
|
|
{"current_steps": 14515, "total_steps": 15621, "loss": 0.3658, "lr": 3.044401301259503e-08, "epoch": 0.9291978746559119, "percentage": 92.92, "elapsed_time": "1:33:39", "remaining_time": "0:07:08", "throughput": 8131.08, "total_tokens": 45690816}
|
|
{"current_steps": 14520, "total_steps": 15621, "loss": 0.3301, "lr": 3.017100864390787e-08, "epoch": 0.9295179565968889, "percentage": 92.95, "elapsed_time": "1:33:39", "remaining_time": "0:07:06", "throughput": 8132.89, "total_tokens": 45706432}
|
|
{"current_steps": 14525, "total_steps": 15621, "loss": 0.3406, "lr": 2.9899215118103446e-08, "epoch": 0.9298380385378657, "percentage": 92.98, "elapsed_time": "1:33:40", "remaining_time": "0:07:04", "throughput": 8134.68, "total_tokens": 45721920}
|
|
{"current_steps": 14530, "total_steps": 15621, "loss": 0.3547, "lr": 2.9628632774519435e-08, "epoch": 0.9301581204788426, "percentage": 93.02, "elapsed_time": "1:33:41", "remaining_time": "0:07:02", "throughput": 8136.56, "total_tokens": 45738048}
|
|
{"current_steps": 14535, "total_steps": 15621, "loss": 0.3313, "lr": 2.9359261950980485e-08, "epoch": 0.9304782024198195, "percentage": 93.05, "elapsed_time": "1:33:41", "remaining_time": "0:07:00", "throughput": 8138.39, "total_tokens": 45753856}
|
|
{"current_steps": 14540, "total_steps": 15621, "loss": 0.2998, "lr": 2.90911029837998e-08, "epoch": 0.9307982843607964, "percentage": 93.08, "elapsed_time": "1:33:42", "remaining_time": "0:06:58", "throughput": 8140.1, "total_tokens": 45768704}
|
|
{"current_steps": 14545, "total_steps": 15621, "loss": 0.2851, "lr": 2.8824156207776673e-08, "epoch": 0.9311183663017732, "percentage": 93.11, "elapsed_time": "1:33:43", "remaining_time": "0:06:55", "throughput": 8141.86, "total_tokens": 45783936}
|
|
{"current_steps": 14550, "total_steps": 15621, "loss": 0.4491, "lr": 2.8558421956197397e-08, "epoch": 0.9314384482427501, "percentage": 93.14, "elapsed_time": "1:33:43", "remaining_time": "0:06:53", "throughput": 8143.77, "total_tokens": 45800320}
|
|
{"current_steps": 14555, "total_steps": 15621, "loss": 0.3872, "lr": 2.829390056083436e-08, "epoch": 0.931758530183727, "percentage": 93.18, "elapsed_time": "1:33:44", "remaining_time": "0:06:51", "throughput": 8145.65, "total_tokens": 45816512}
|
|
{"current_steps": 14560, "total_steps": 15621, "loss": 0.3173, "lr": 2.8030592351945492e-08, "epoch": 0.9320786121247039, "percentage": 93.21, "elapsed_time": "1:33:45", "remaining_time": "0:06:49", "throughput": 8147.43, "total_tokens": 45831936}
|
|
{"current_steps": 14565, "total_steps": 15621, "loss": 0.2995, "lr": 2.776849765827427e-08, "epoch": 0.9323986940656808, "percentage": 93.24, "elapsed_time": "1:33:45", "remaining_time": "0:06:47", "throughput": 8149.14, "total_tokens": 45846784}
|
|
{"current_steps": 14570, "total_steps": 15621, "loss": 0.4281, "lr": 2.750761680704905e-08, "epoch": 0.9327187760066578, "percentage": 93.27, "elapsed_time": "1:33:46", "remaining_time": "0:06:45", "throughput": 8150.89, "total_tokens": 45862080}
|
|
{"current_steps": 14575, "total_steps": 15621, "loss": 0.3977, "lr": 2.724795012398251e-08, "epoch": 0.9330388579476346, "percentage": 93.3, "elapsed_time": "1:33:47", "remaining_time": "0:06:43", "throughput": 8152.8, "total_tokens": 45878528}
|
|
{"current_steps": 14580, "total_steps": 15621, "loss": 0.3726, "lr": 2.6989497933271543e-08, "epoch": 0.9333589398886115, "percentage": 93.34, "elapsed_time": "1:33:47", "remaining_time": "0:06:41", "throughput": 8154.59, "total_tokens": 45894016}
|
|
{"current_steps": 14585, "total_steps": 15621, "loss": 0.3228, "lr": 2.673226055759692e-08, "epoch": 0.9336790218295884, "percentage": 93.37, "elapsed_time": "1:33:48", "remaining_time": "0:06:39", "throughput": 8156.37, "total_tokens": 45909504}
|
|
{"current_steps": 14590, "total_steps": 15621, "loss": 0.341, "lr": 2.6476238318122402e-08, "epoch": 0.9339991037705653, "percentage": 93.4, "elapsed_time": "1:33:49", "remaining_time": "0:06:37", "throughput": 8158.21, "total_tokens": 45925376}
|
|
{"current_steps": 14595, "total_steps": 15621, "loss": 0.3917, "lr": 2.6221431534494742e-08, "epoch": 0.9343191857115422, "percentage": 93.43, "elapsed_time": "1:33:49", "remaining_time": "0:06:35", "throughput": 8159.9, "total_tokens": 45940224}
|
|
{"current_steps": 14600, "total_steps": 15621, "loss": 0.3508, "lr": 2.5967840524843243e-08, "epoch": 0.934639267652519, "percentage": 93.46, "elapsed_time": "1:33:50", "remaining_time": "0:06:33", "throughput": 8161.6, "total_tokens": 45955072}
|
|
{"current_steps": 14605, "total_steps": 15621, "loss": 0.4243, "lr": 2.5715465605779195e-08, "epoch": 0.9349593495934959, "percentage": 93.5, "elapsed_time": "1:33:51", "remaining_time": "0:06:31", "throughput": 8163.35, "total_tokens": 45970240}
|
|
{"current_steps": 14610, "total_steps": 15621, "loss": 0.4145, "lr": 2.5464307092395777e-08, "epoch": 0.9352794315344728, "percentage": 93.53, "elapsed_time": "1:33:51", "remaining_time": "0:06:29", "throughput": 8165.14, "total_tokens": 45985856}
|
|
{"current_steps": 14615, "total_steps": 15621, "loss": 0.345, "lr": 2.5214365298267148e-08, "epoch": 0.9355995134754497, "percentage": 93.56, "elapsed_time": "1:33:52", "remaining_time": "0:06:27", "throughput": 8166.78, "total_tokens": 46000256}
|
|
{"current_steps": 14620, "total_steps": 15621, "loss": 0.3203, "lr": 2.4965640535448917e-08, "epoch": 0.9359195954164266, "percentage": 93.59, "elapsed_time": "1:33:53", "remaining_time": "0:06:25", "throughput": 8168.55, "total_tokens": 46015616}
|
|
{"current_steps": 14625, "total_steps": 15621, "loss": 0.3659, "lr": 2.471813311447657e-08, "epoch": 0.9362396773574035, "percentage": 93.62, "elapsed_time": "1:33:53", "remaining_time": "0:06:23", "throughput": 8170.32, "total_tokens": 46031040}
|
|
{"current_steps": 14630, "total_steps": 15621, "loss": 0.3221, "lr": 2.4471843344365915e-08, "epoch": 0.9365597592983804, "percentage": 93.66, "elapsed_time": "1:33:54", "remaining_time": "0:06:21", "throughput": 8172.05, "total_tokens": 46046016}
|
|
{"current_steps": 14635, "total_steps": 15621, "loss": 0.2701, "lr": 2.42267715326131e-08, "epoch": 0.9368798412393573, "percentage": 93.69, "elapsed_time": "1:33:55", "remaining_time": "0:06:19", "throughput": 8173.97, "total_tokens": 46062528}
|
|
{"current_steps": 14640, "total_steps": 15621, "loss": 0.3421, "lr": 2.3982917985192697e-08, "epoch": 0.9371999231803342, "percentage": 93.72, "elapsed_time": "1:33:55", "remaining_time": "0:06:17", "throughput": 8175.78, "total_tokens": 46078144}
|
|
{"current_steps": 14645, "total_steps": 15621, "loss": 0.3982, "lr": 2.3740283006558838e-08, "epoch": 0.9375200051213111, "percentage": 93.75, "elapsed_time": "1:33:56", "remaining_time": "0:06:15", "throughput": 8178.0, "total_tokens": 46096896}
|
|
{"current_steps": 14650, "total_steps": 15621, "loss": 0.3756, "lr": 2.349886689964431e-08, "epoch": 0.9378400870622879, "percentage": 93.78, "elapsed_time": "1:33:57", "remaining_time": "0:06:13", "throughput": 8179.71, "total_tokens": 46111808}
|
|
{"current_steps": 14655, "total_steps": 15621, "loss": 0.2836, "lr": 2.32586699658599e-08, "epoch": 0.9381601690032648, "percentage": 93.82, "elapsed_time": "1:33:58", "remaining_time": "0:06:11", "throughput": 8181.58, "total_tokens": 46127936}
|
|
{"current_steps": 14660, "total_steps": 15621, "loss": 0.3551, "lr": 2.3019692505094056e-08, "epoch": 0.9384802509442417, "percentage": 93.85, "elapsed_time": "1:33:58", "remaining_time": "0:06:09", "throughput": 8183.29, "total_tokens": 46142848}
|
|
{"current_steps": 14665, "total_steps": 15621, "loss": 0.5477, "lr": 2.2781934815713223e-08, "epoch": 0.9388003328852186, "percentage": 93.88, "elapsed_time": "1:33:59", "remaining_time": "0:06:07", "throughput": 8185.13, "total_tokens": 46158848}
|
|
{"current_steps": 14670, "total_steps": 15621, "loss": 0.3611, "lr": 2.254539719456061e-08, "epoch": 0.9391204148261955, "percentage": 93.91, "elapsed_time": "1:34:00", "remaining_time": "0:06:05", "throughput": 8186.98, "total_tokens": 46174912}
|
|
{"current_steps": 14675, "total_steps": 15621, "loss": 0.2694, "lr": 2.231007993695633e-08, "epoch": 0.9394404967671725, "percentage": 93.94, "elapsed_time": "1:34:00", "remaining_time": "0:06:03", "throughput": 8188.6, "total_tokens": 46189248}
|
|
{"current_steps": 14680, "total_steps": 15621, "loss": 0.3136, "lr": 2.2075983336696357e-08, "epoch": 0.9397605787081493, "percentage": 93.98, "elapsed_time": "1:34:01", "remaining_time": "0:06:01", "throughput": 8190.41, "total_tokens": 46204928}
|
|
{"current_steps": 14685, "total_steps": 15621, "loss": 0.3964, "lr": 2.1843107686053353e-08, "epoch": 0.9400806606491262, "percentage": 94.01, "elapsed_time": "1:34:02", "remaining_time": "0:05:59", "throughput": 8192.15, "total_tokens": 46220160}
|
|
{"current_steps": 14690, "total_steps": 15621, "loss": 0.4228, "lr": 2.1611453275775405e-08, "epoch": 0.9404007425901031, "percentage": 94.04, "elapsed_time": "1:34:02", "remaining_time": "0:05:57", "throughput": 8193.93, "total_tokens": 46235584}
|
|
{"current_steps": 14695, "total_steps": 15621, "loss": 0.2719, "lr": 2.138102039508538e-08, "epoch": 0.94072082453108, "percentage": 94.07, "elapsed_time": "1:34:03", "remaining_time": "0:05:55", "throughput": 8195.82, "total_tokens": 46251904}
|
|
{"current_steps": 14700, "total_steps": 15621, "loss": 0.3995, "lr": 2.1151809331681703e-08, "epoch": 0.9410409064720568, "percentage": 94.1, "elapsed_time": "1:34:04", "remaining_time": "0:05:53", "throughput": 8197.67, "total_tokens": 46268032}
|
|
{"current_steps": 14705, "total_steps": 15621, "loss": 0.3371, "lr": 2.092382037173701e-08, "epoch": 0.9413609884130337, "percentage": 94.14, "elapsed_time": "1:34:04", "remaining_time": "0:05:51", "throughput": 8199.43, "total_tokens": 46283392}
|
|
{"current_steps": 14710, "total_steps": 15621, "loss": 0.3089, "lr": 2.0697053799898277e-08, "epoch": 0.9416810703540106, "percentage": 94.17, "elapsed_time": "1:34:05", "remaining_time": "0:05:49", "throughput": 8201.18, "total_tokens": 46298752}
|
|
{"current_steps": 14715, "total_steps": 15621, "loss": 0.3394, "lr": 2.0471509899286144e-08, "epoch": 0.9420011522949875, "percentage": 94.2, "elapsed_time": "1:34:06", "remaining_time": "0:05:47", "throughput": 8203.01, "total_tokens": 46314624}
|
|
{"current_steps": 14720, "total_steps": 15621, "loss": 0.3517, "lr": 2.0247188951494797e-08, "epoch": 0.9423212342359644, "percentage": 94.23, "elapsed_time": "1:34:06", "remaining_time": "0:05:45", "throughput": 8205.0, "total_tokens": 46331712}
|
|
{"current_steps": 14725, "total_steps": 15621, "loss": 0.5446, "lr": 2.0024091236591655e-08, "epoch": 0.9426413161769412, "percentage": 94.26, "elapsed_time": "1:34:07", "remaining_time": "0:05:43", "throughput": 8206.71, "total_tokens": 46347200}
|
|
{"current_steps": 14730, "total_steps": 15621, "loss": 0.3148, "lr": 1.98022170331168e-08, "epoch": 0.9429613981179182, "percentage": 94.3, "elapsed_time": "1:34:08", "remaining_time": "0:05:41", "throughput": 8208.54, "total_tokens": 46363008}
|
|
{"current_steps": 14735, "total_steps": 15621, "loss": 0.3808, "lr": 1.9581566618082744e-08, "epoch": 0.9432814800588951, "percentage": 94.33, "elapsed_time": "1:34:08", "remaining_time": "0:05:39", "throughput": 8210.36, "total_tokens": 46378816}
|
|
{"current_steps": 14740, "total_steps": 15621, "loss": 0.4079, "lr": 1.9362140266974025e-08, "epoch": 0.943601561999872, "percentage": 94.36, "elapsed_time": "1:34:09", "remaining_time": "0:05:37", "throughput": 8212.24, "total_tokens": 46395200}
|
|
{"current_steps": 14745, "total_steps": 15621, "loss": 0.3223, "lr": 1.9143938253747383e-08, "epoch": 0.9439216439408489, "percentage": 94.39, "elapsed_time": "1:34:10", "remaining_time": "0:05:35", "throughput": 8214.17, "total_tokens": 46411840}
|
|
{"current_steps": 14750, "total_steps": 15621, "loss": 0.4503, "lr": 1.892696085083023e-08, "epoch": 0.9442417258818258, "percentage": 94.42, "elapsed_time": "1:34:10", "remaining_time": "0:05:33", "throughput": 8215.99, "total_tokens": 46427776}
|
|
{"current_steps": 14755, "total_steps": 15621, "loss": 0.3146, "lr": 1.8711208329121542e-08, "epoch": 0.9445618078228026, "percentage": 94.46, "elapsed_time": "1:34:11", "remaining_time": "0:05:31", "throughput": 8217.96, "total_tokens": 46444736}
|
|
{"current_steps": 14760, "total_steps": 15621, "loss": 0.3372, "lr": 1.849668095799084e-08, "epoch": 0.9448818897637795, "percentage": 94.49, "elapsed_time": "1:34:12", "remaining_time": "0:05:29", "throughput": 8219.79, "total_tokens": 46460672}
|
|
{"current_steps": 14765, "total_steps": 15621, "loss": 0.3458, "lr": 1.8283379005278098e-08, "epoch": 0.9452019717047564, "percentage": 94.52, "elapsed_time": "1:34:12", "remaining_time": "0:05:27", "throughput": 8221.63, "total_tokens": 46476736}
|
|
{"current_steps": 14770, "total_steps": 15621, "loss": 0.3238, "lr": 1.807130273729329e-08, "epoch": 0.9455220536457333, "percentage": 94.55, "elapsed_time": "1:34:13", "remaining_time": "0:05:25", "throughput": 8223.43, "total_tokens": 46492416}
|
|
{"current_steps": 14775, "total_steps": 15621, "loss": 0.3331, "lr": 1.7860452418816173e-08, "epoch": 0.9458421355867102, "percentage": 94.58, "elapsed_time": "1:34:14", "remaining_time": "0:05:23", "throughput": 8225.12, "total_tokens": 46507264}
|
|
{"current_steps": 14780, "total_steps": 15621, "loss": 0.3365, "lr": 1.7650828313095834e-08, "epoch": 0.946162217527687, "percentage": 94.62, "elapsed_time": "1:34:15", "remaining_time": "0:05:21", "throughput": 8227.09, "total_tokens": 46524224}
|
|
{"current_steps": 14785, "total_steps": 15621, "loss": 0.3172, "lr": 1.7442430681850362e-08, "epoch": 0.946482299468664, "percentage": 94.65, "elapsed_time": "1:34:15", "remaining_time": "0:05:19", "throughput": 8228.83, "total_tokens": 46539456}
|
|
{"current_steps": 14790, "total_steps": 15621, "loss": 0.402, "lr": 1.723525978526652e-08, "epoch": 0.9468023814096409, "percentage": 94.68, "elapsed_time": "1:34:16", "remaining_time": "0:05:17", "throughput": 8230.62, "total_tokens": 46555136}
|
|
{"current_steps": 14795, "total_steps": 15621, "loss": 0.3503, "lr": 1.702931588199996e-08, "epoch": 0.9471224633506178, "percentage": 94.71, "elapsed_time": "1:34:16", "remaining_time": "0:05:15", "throughput": 8232.37, "total_tokens": 46570432}
|
|
{"current_steps": 14800, "total_steps": 15621, "loss": 0.3141, "lr": 1.6824599229173897e-08, "epoch": 0.9474425452915947, "percentage": 94.74, "elapsed_time": "1:34:17", "remaining_time": "0:05:13", "throughput": 8234.19, "total_tokens": 46586304}
|
|
{"current_steps": 14805, "total_steps": 15621, "loss": 0.2946, "lr": 1.662111008237932e-08, "epoch": 0.9477626272325715, "percentage": 94.78, "elapsed_time": "1:34:18", "remaining_time": "0:05:11", "throughput": 8236.04, "total_tokens": 46602432}
|
|
{"current_steps": 14810, "total_steps": 15621, "loss": 0.3135, "lr": 1.6418848695675003e-08, "epoch": 0.9480827091735484, "percentage": 94.81, "elapsed_time": "1:34:19", "remaining_time": "0:05:09", "throughput": 8237.74, "total_tokens": 46617472}
|
|
{"current_steps": 14815, "total_steps": 15621, "loss": 0.3713, "lr": 1.6217815321586614e-08, "epoch": 0.9484027911145253, "percentage": 94.84, "elapsed_time": "1:34:19", "remaining_time": "0:05:07", "throughput": 8239.5, "total_tokens": 46632896}
|
|
{"current_steps": 14820, "total_steps": 15621, "loss": 0.3516, "lr": 1.6018010211106602e-08, "epoch": 0.9487228730555022, "percentage": 94.87, "elapsed_time": "1:34:20", "remaining_time": "0:05:05", "throughput": 8241.4, "total_tokens": 46649408}
|
|
{"current_steps": 14825, "total_steps": 15621, "loss": 0.2899, "lr": 1.58194336136942e-08, "epoch": 0.9490429549964791, "percentage": 94.9, "elapsed_time": "1:34:21", "remaining_time": "0:05:03", "throughput": 8243.22, "total_tokens": 46665344}
|
|
{"current_steps": 14830, "total_steps": 15621, "loss": 0.4377, "lr": 1.5622085777274417e-08, "epoch": 0.9493630369374559, "percentage": 94.94, "elapsed_time": "1:34:21", "remaining_time": "0:05:01", "throughput": 8244.96, "total_tokens": 46680704}
|
|
{"current_steps": 14835, "total_steps": 15621, "loss": 0.3267, "lr": 1.542596694823839e-08, "epoch": 0.9496831188784329, "percentage": 94.97, "elapsed_time": "1:34:22", "remaining_time": "0:05:00", "throughput": 8246.68, "total_tokens": 46695936}
|
|
{"current_steps": 14840, "total_steps": 15621, "loss": 0.4208, "lr": 1.5231077371442914e-08, "epoch": 0.9500032008194098, "percentage": 95.0, "elapsed_time": "1:34:23", "remaining_time": "0:04:58", "throughput": 8248.48, "total_tokens": 46711680}
|
|
{"current_steps": 14845, "total_steps": 15621, "loss": 0.2846, "lr": 1.5037417290209685e-08, "epoch": 0.9503232827603867, "percentage": 95.03, "elapsed_time": "1:34:23", "remaining_time": "0:04:56", "throughput": 8250.23, "total_tokens": 46727040}
|
|
{"current_steps": 14850, "total_steps": 15621, "loss": 0.3933, "lr": 1.4844986946325743e-08, "epoch": 0.9506433647013636, "percentage": 95.06, "elapsed_time": "1:34:24", "remaining_time": "0:04:54", "throughput": 8252.01, "total_tokens": 46742720}
|
|
{"current_steps": 14855, "total_steps": 15621, "loss": 0.2686, "lr": 1.4653786580042681e-08, "epoch": 0.9509634466423404, "percentage": 95.1, "elapsed_time": "1:34:25", "remaining_time": "0:04:52", "throughput": 8253.78, "total_tokens": 46758336}
|
|
{"current_steps": 14858, "total_steps": 15621, "eval_loss": 0.35565948486328125, "epoch": 0.9511554958069266, "percentage": 95.12, "elapsed_time": "1:35:16", "remaining_time": "0:04:53", "throughput": 8181.71, "total_tokens": 46767552}
|
|
{"current_steps": 14860, "total_steps": 15621, "loss": 0.2999, "lr": 1.4463816430076215e-08, "epoch": 0.9512835285833173, "percentage": 95.13, "elapsed_time": "1:38:02", "remaining_time": "0:05:01", "throughput": 7950.96, "total_tokens": 46773312}
|
|
{"current_steps": 14865, "total_steps": 15621, "loss": 0.3573, "lr": 1.4275076733606395e-08, "epoch": 0.9516036105242942, "percentage": 95.16, "elapsed_time": "1:38:03", "remaining_time": "0:04:59", "throughput": 7952.58, "total_tokens": 46787968}
|
|
{"current_steps": 14870, "total_steps": 15621, "loss": 0.2955, "lr": 1.4087567726277061e-08, "epoch": 0.9519236924652711, "percentage": 95.19, "elapsed_time": "1:38:04", "remaining_time": "0:04:57", "throughput": 7954.34, "total_tokens": 46803712}
|
|
{"current_steps": 14875, "total_steps": 15621, "loss": 0.2811, "lr": 1.390128964219528e-08, "epoch": 0.952243774406248, "percentage": 95.22, "elapsed_time": "1:38:04", "remaining_time": "0:04:55", "throughput": 7956.23, "total_tokens": 46820288}
|
|
{"current_steps": 14880, "total_steps": 15621, "loss": 0.3966, "lr": 1.3716242713931348e-08, "epoch": 0.9525638563472248, "percentage": 95.26, "elapsed_time": "1:38:05", "remaining_time": "0:04:53", "throughput": 7957.97, "total_tokens": 46835904}
|
|
{"current_steps": 14885, "total_steps": 15621, "loss": 0.3738, "lr": 1.3532427172518789e-08, "epoch": 0.9528839382882017, "percentage": 95.29, "elapsed_time": "1:38:06", "remaining_time": "0:04:51", "throughput": 7959.66, "total_tokens": 46851136}
|
|
{"current_steps": 14890, "total_steps": 15621, "loss": 0.3431, "lr": 1.3349843247453252e-08, "epoch": 0.9532040202291787, "percentage": 95.32, "elapsed_time": "1:38:06", "remaining_time": "0:04:49", "throughput": 7961.49, "total_tokens": 46867456}
|
|
{"current_steps": 14895, "total_steps": 15621, "loss": 0.2796, "lr": 1.3168491166692941e-08, "epoch": 0.9535241021701556, "percentage": 95.35, "elapsed_time": "1:38:07", "remaining_time": "0:04:46", "throughput": 7963.2, "total_tokens": 46882816}
|
|
{"current_steps": 14900, "total_steps": 15621, "loss": 0.4594, "lr": 1.2988371156658073e-08, "epoch": 0.9538441841111325, "percentage": 95.38, "elapsed_time": "1:38:08", "remaining_time": "0:04:44", "throughput": 7964.97, "total_tokens": 46898624}
|
|
{"current_steps": 14905, "total_steps": 15621, "loss": 0.2959, "lr": 1.2809483442230763e-08, "epoch": 0.9541642660521094, "percentage": 95.42, "elapsed_time": "1:38:08", "remaining_time": "0:04:42", "throughput": 7966.73, "total_tokens": 46914304}
|
|
{"current_steps": 14910, "total_steps": 15621, "loss": 0.373, "lr": 1.2631828246754128e-08, "epoch": 0.9544843479930862, "percentage": 95.45, "elapsed_time": "1:38:09", "remaining_time": "0:04:40", "throughput": 7968.53, "total_tokens": 46930368}
|
|
{"current_steps": 14915, "total_steps": 15621, "loss": 0.3678, "lr": 1.2455405792032969e-08, "epoch": 0.9548044299340631, "percentage": 95.48, "elapsed_time": "1:38:10", "remaining_time": "0:04:38", "throughput": 7970.25, "total_tokens": 46945792}
|
|
{"current_steps": 14920, "total_steps": 15621, "loss": 0.3474, "lr": 1.2280216298332646e-08, "epoch": 0.95512451187504, "percentage": 95.51, "elapsed_time": "1:38:10", "remaining_time": "0:04:36", "throughput": 7972.08, "total_tokens": 46962048}
|
|
{"current_steps": 14925, "total_steps": 15621, "loss": 0.4736, "lr": 1.2106259984379642e-08, "epoch": 0.9554445938160169, "percentage": 95.54, "elapsed_time": "1:38:11", "remaining_time": "0:04:34", "throughput": 7973.71, "total_tokens": 46976768}
|
|
{"current_steps": 14930, "total_steps": 15621, "loss": 0.4153, "lr": 1.1933537067359889e-08, "epoch": 0.9557646757569938, "percentage": 95.58, "elapsed_time": "1:38:12", "remaining_time": "0:04:32", "throughput": 7975.34, "total_tokens": 46991424}
|
|
{"current_steps": 14935, "total_steps": 15621, "loss": 0.3603, "lr": 1.1762047762920446e-08, "epoch": 0.9560847576979706, "percentage": 95.61, "elapsed_time": "1:38:12", "remaining_time": "0:04:30", "throughput": 7977.03, "total_tokens": 47006656}
|
|
{"current_steps": 14940, "total_steps": 15621, "loss": 0.3643, "lr": 1.1591792285167602e-08, "epoch": 0.9564048396389476, "percentage": 95.64, "elapsed_time": "1:38:13", "remaining_time": "0:04:28", "throughput": 7978.72, "total_tokens": 47021824}
|
|
{"current_steps": 14945, "total_steps": 15621, "loss": 0.3862, "lr": 1.1422770846667206e-08, "epoch": 0.9567249215799245, "percentage": 95.67, "elapsed_time": "1:38:14", "remaining_time": "0:04:26", "throughput": 7980.47, "total_tokens": 47037440}
|
|
{"current_steps": 14950, "total_steps": 15621, "loss": 0.303, "lr": 1.1254983658444572e-08, "epoch": 0.9570450035209014, "percentage": 95.7, "elapsed_time": "1:38:14", "remaining_time": "0:04:24", "throughput": 7982.29, "total_tokens": 47053760}
|
|
{"current_steps": 14955, "total_steps": 15621, "loss": 0.3218, "lr": 1.1088430929984017e-08, "epoch": 0.9573650854618783, "percentage": 95.74, "elapsed_time": "1:38:15", "remaining_time": "0:04:22", "throughput": 7983.96, "total_tokens": 47068928}
|
|
{"current_steps": 14960, "total_steps": 15621, "loss": 0.3807, "lr": 1.0923112869228645e-08, "epoch": 0.9576851674028551, "percentage": 95.77, "elapsed_time": "1:38:16", "remaining_time": "0:04:20", "throughput": 7985.72, "total_tokens": 47084672}
|
|
{"current_steps": 14965, "total_steps": 15621, "loss": 0.3554, "lr": 1.0759029682579801e-08, "epoch": 0.958005249343832, "percentage": 95.8, "elapsed_time": "1:38:16", "remaining_time": "0:04:18", "throughput": 7987.63, "total_tokens": 47101632}
|
|
{"current_steps": 14970, "total_steps": 15621, "loss": 0.3051, "lr": 1.0596181574897389e-08, "epoch": 0.9583253312848089, "percentage": 95.83, "elapsed_time": "1:38:17", "remaining_time": "0:04:16", "throughput": 7989.27, "total_tokens": 47116480}
|
|
{"current_steps": 14975, "total_steps": 15621, "loss": 0.3227, "lr": 1.0434568749499107e-08, "epoch": 0.9586454132257858, "percentage": 95.86, "elapsed_time": "1:38:18", "remaining_time": "0:04:14", "throughput": 7991.13, "total_tokens": 47132992}
|
|
{"current_steps": 14980, "total_steps": 15621, "loss": 0.3077, "lr": 1.027419140816066e-08, "epoch": 0.9589654951667627, "percentage": 95.9, "elapsed_time": "1:38:18", "remaining_time": "0:04:12", "throughput": 7992.93, "total_tokens": 47149056}
|
|
{"current_steps": 14985, "total_steps": 15621, "loss": 0.3029, "lr": 1.0115049751114768e-08, "epoch": 0.9592855771077395, "percentage": 95.93, "elapsed_time": "1:38:19", "remaining_time": "0:04:10", "throughput": 7994.7, "total_tokens": 47164864}
|
|
{"current_steps": 14990, "total_steps": 15621, "loss": 0.3514, "lr": 9.957143977051941e-09, "epoch": 0.9596056590487164, "percentage": 95.96, "elapsed_time": "1:38:20", "remaining_time": "0:04:08", "throughput": 7996.44, "total_tokens": 47180544}
|
|
{"current_steps": 14995, "total_steps": 15621, "loss": 0.3879, "lr": 9.800474283119142e-09, "epoch": 0.9599257409896934, "percentage": 95.99, "elapsed_time": "1:38:20", "remaining_time": "0:04:06", "throughput": 7998.23, "total_tokens": 47196608}
|
|
{"current_steps": 15000, "total_steps": 15621, "loss": 0.3755, "lr": 9.645040864920462e-09, "epoch": 0.9602458229306703, "percentage": 96.02, "elapsed_time": "1:38:21", "remaining_time": "0:04:04", "throughput": 8000.12, "total_tokens": 47213504}
|
|
{"current_steps": 15005, "total_steps": 15621, "loss": 0.4015, "lr": 9.490843916516334e-09, "epoch": 0.9605659048716472, "percentage": 96.06, "elapsed_time": "1:38:22", "remaining_time": "0:04:02", "throughput": 8001.76, "total_tokens": 47228288}
|
|
{"current_steps": 15010, "total_steps": 15621, "loss": 0.452, "lr": 9.337883630423316e-09, "epoch": 0.960885986812624, "percentage": 96.09, "elapsed_time": "1:38:22", "remaining_time": "0:04:00", "throughput": 8003.47, "total_tokens": 47243712}
|
|
{"current_steps": 15015, "total_steps": 15621, "loss": 0.5173, "lr": 9.186160197614423e-09, "epoch": 0.9612060687536009, "percentage": 96.12, "elapsed_time": "1:38:23", "remaining_time": "0:03:58", "throughput": 8005.29, "total_tokens": 47259904}
|
|
{"current_steps": 15020, "total_steps": 15621, "loss": 0.4795, "lr": 9.035673807517795e-09, "epoch": 0.9615261506945778, "percentage": 96.15, "elapsed_time": "1:38:24", "remaining_time": "0:03:56", "throughput": 8006.96, "total_tokens": 47275072}
|
|
{"current_steps": 15025, "total_steps": 15621, "loss": 0.2802, "lr": 8.886424648017698e-09, "epoch": 0.9618462326355547, "percentage": 96.18, "elapsed_time": "1:38:24", "remaining_time": "0:03:54", "throughput": 8008.69, "total_tokens": 47290688}
|
|
{"current_steps": 15030, "total_steps": 15621, "loss": 0.34, "lr": 8.738412905453408e-09, "epoch": 0.9621663145765316, "percentage": 96.22, "elapsed_time": "1:38:25", "remaining_time": "0:03:52", "throughput": 8010.45, "total_tokens": 47306496}
|
|
{"current_steps": 15035, "total_steps": 15621, "loss": 0.3524, "lr": 8.591638764619324e-09, "epoch": 0.9624863965175084, "percentage": 96.25, "elapsed_time": "1:38:26", "remaining_time": "0:03:50", "throughput": 8012.08, "total_tokens": 47321280}
|
|
{"current_steps": 15040, "total_steps": 15621, "loss": 0.3707, "lr": 8.446102408764643e-09, "epoch": 0.9628064784584853, "percentage": 96.28, "elapsed_time": "1:38:26", "remaining_time": "0:03:48", "throughput": 8013.9, "total_tokens": 47337536}
|
|
{"current_steps": 15045, "total_steps": 15621, "loss": 0.2796, "lr": 8.301804019593129e-09, "epoch": 0.9631265603994623, "percentage": 96.31, "elapsed_time": "1:38:27", "remaining_time": "0:03:46", "throughput": 8015.61, "total_tokens": 47353024}
|
|
{"current_steps": 15050, "total_steps": 15621, "loss": 0.3505, "lr": 8.158743777263333e-09, "epoch": 0.9634466423404392, "percentage": 96.34, "elapsed_time": "1:38:28", "remaining_time": "0:03:44", "throughput": 8017.39, "total_tokens": 47369088}
|
|
{"current_steps": 15055, "total_steps": 15621, "loss": 0.3566, "lr": 8.016921860387272e-09, "epoch": 0.9637667242814161, "percentage": 96.38, "elapsed_time": "1:38:28", "remaining_time": "0:03:42", "throughput": 8019.08, "total_tokens": 47384320}
|
|
{"current_steps": 15060, "total_steps": 15621, "loss": 0.3949, "lr": 7.876338446031416e-09, "epoch": 0.964086806222393, "percentage": 96.41, "elapsed_time": "1:38:29", "remaining_time": "0:03:40", "throughput": 8020.92, "total_tokens": 47400896}
|
|
{"current_steps": 15065, "total_steps": 15621, "loss": 0.3234, "lr": 7.736993709716033e-09, "epoch": 0.9644068881633698, "percentage": 96.44, "elapsed_time": "1:38:30", "remaining_time": "0:03:38", "throughput": 8022.7, "total_tokens": 47416896}
|
|
{"current_steps": 15070, "total_steps": 15621, "loss": 0.4736, "lr": 7.59888782541418e-09, "epoch": 0.9647269701043467, "percentage": 96.47, "elapsed_time": "1:38:31", "remaining_time": "0:03:36", "throughput": 8024.41, "total_tokens": 47432320}
|
|
{"current_steps": 15075, "total_steps": 15621, "loss": 0.2698, "lr": 7.462020965553151e-09, "epoch": 0.9650470520453236, "percentage": 96.5, "elapsed_time": "1:38:31", "remaining_time": "0:03:34", "throughput": 8026.2, "total_tokens": 47448320}
|
|
{"current_steps": 15080, "total_steps": 15621, "loss": 0.4844, "lr": 7.32639330101259e-09, "epoch": 0.9653671339863005, "percentage": 96.54, "elapsed_time": "1:38:32", "remaining_time": "0:03:32", "throughput": 8027.88, "total_tokens": 47463488}
|
|
{"current_steps": 15085, "total_steps": 15621, "loss": 0.3884, "lr": 7.1920050011252675e-09, "epoch": 0.9656872159272774, "percentage": 96.57, "elapsed_time": "1:38:33", "remaining_time": "0:03:30", "throughput": 8029.61, "total_tokens": 47479104}
|
|
{"current_steps": 15090, "total_steps": 15621, "loss": 0.3994, "lr": 7.058856233676525e-09, "epoch": 0.9660072978682542, "percentage": 96.6, "elapsed_time": "1:38:33", "remaining_time": "0:03:28", "throughput": 8031.56, "total_tokens": 47496448}
|
|
{"current_steps": 15095, "total_steps": 15621, "loss": 0.3758, "lr": 6.926947164904162e-09, "epoch": 0.9663273798092311, "percentage": 96.63, "elapsed_time": "1:38:34", "remaining_time": "0:03:26", "throughput": 8033.28, "total_tokens": 47511936}
|
|
{"current_steps": 15100, "total_steps": 15621, "loss": 0.4048, "lr": 6.796277959498331e-09, "epoch": 0.9666474617502081, "percentage": 96.66, "elapsed_time": "1:38:35", "remaining_time": "0:03:24", "throughput": 8035.1, "total_tokens": 47528320}
|
|
{"current_steps": 15105, "total_steps": 15621, "loss": 0.2726, "lr": 6.666848780600864e-09, "epoch": 0.966967543691185, "percentage": 96.7, "elapsed_time": "1:38:35", "remaining_time": "0:03:22", "throughput": 8036.76, "total_tokens": 47543296}
|
|
{"current_steps": 15110, "total_steps": 15621, "loss": 0.2706, "lr": 6.538659789805834e-09, "epoch": 0.9672876256321619, "percentage": 96.73, "elapsed_time": "1:38:36", "remaining_time": "0:03:20", "throughput": 8038.45, "total_tokens": 47558656}
|
|
{"current_steps": 15115, "total_steps": 15621, "loss": 0.3739, "lr": 6.411711147158438e-09, "epoch": 0.9676077075731387, "percentage": 96.76, "elapsed_time": "1:38:37", "remaining_time": "0:03:18", "throughput": 8040.24, "total_tokens": 47574720}
|
|
{"current_steps": 15120, "total_steps": 15621, "loss": 0.3126, "lr": 6.286003011155783e-09, "epoch": 0.9679277895141156, "percentage": 96.79, "elapsed_time": "1:38:37", "remaining_time": "0:03:16", "throughput": 8041.96, "total_tokens": 47590272}
|
|
{"current_steps": 15125, "total_steps": 15621, "loss": 0.4041, "lr": 6.161535538745877e-09, "epoch": 0.9682478714550925, "percentage": 96.82, "elapsed_time": "1:38:38", "remaining_time": "0:03:14", "throughput": 8043.66, "total_tokens": 47605696}
|
|
{"current_steps": 15130, "total_steps": 15621, "loss": 0.3798, "lr": 6.0383088853277475e-09, "epoch": 0.9685679533960694, "percentage": 96.86, "elapsed_time": "1:38:39", "remaining_time": "0:03:12", "throughput": 8045.46, "total_tokens": 47621760}
|
|
{"current_steps": 15135, "total_steps": 15621, "loss": 0.3175, "lr": 5.916323204751439e-09, "epoch": 0.9688880353370463, "percentage": 96.89, "elapsed_time": "1:38:39", "remaining_time": "0:03:10", "throughput": 8047.44, "total_tokens": 47639296}
|
|
{"current_steps": 15140, "total_steps": 15621, "loss": 0.2636, "lr": 5.795578649317345e-09, "epoch": 0.9692081172780231, "percentage": 96.92, "elapsed_time": "1:38:40", "remaining_time": "0:03:08", "throughput": 8049.13, "total_tokens": 47654656}
|
|
{"current_steps": 15145, "total_steps": 15621, "loss": 0.3059, "lr": 5.676075369776656e-09, "epoch": 0.969528199219, "percentage": 96.95, "elapsed_time": "1:38:41", "remaining_time": "0:03:06", "throughput": 8050.96, "total_tokens": 47671168}
|
|
{"current_steps": 15150, "total_steps": 15621, "loss": 0.3451, "lr": 5.557813515330468e-09, "epoch": 0.9698482811599769, "percentage": 96.98, "elapsed_time": "1:38:41", "remaining_time": "0:03:04", "throughput": 8052.63, "total_tokens": 47686400}
|
|
{"current_steps": 15155, "total_steps": 15621, "loss": 0.3484, "lr": 5.440793233630115e-09, "epoch": 0.9701683631009539, "percentage": 97.02, "elapsed_time": "1:38:42", "remaining_time": "0:03:02", "throughput": 8054.31, "total_tokens": 47701760}
|
|
{"current_steps": 15160, "total_steps": 15621, "loss": 0.3073, "lr": 5.325014670776951e-09, "epoch": 0.9704884450419308, "percentage": 97.05, "elapsed_time": "1:38:43", "remaining_time": "0:03:00", "throughput": 8056.01, "total_tokens": 47717248}
|
|
{"current_steps": 15165, "total_steps": 15621, "loss": 0.3607, "lr": 5.21047797132157e-09, "epoch": 0.9708085269829076, "percentage": 97.08, "elapsed_time": "1:38:43", "remaining_time": "0:02:58", "throughput": 8057.93, "total_tokens": 47734336}
|
|
{"current_steps": 15170, "total_steps": 15621, "loss": 0.3428, "lr": 5.097183278264694e-09, "epoch": 0.9711286089238845, "percentage": 97.11, "elapsed_time": "1:38:44", "remaining_time": "0:02:56", "throughput": 8059.71, "total_tokens": 47750464}
|
|
{"current_steps": 15175, "total_steps": 15621, "loss": 0.4272, "lr": 4.985130733055954e-09, "epoch": 0.9714486908648614, "percentage": 97.14, "elapsed_time": "1:38:45", "remaining_time": "0:02:54", "throughput": 8061.4, "total_tokens": 47765824}
|
|
{"current_steps": 15180, "total_steps": 15621, "loss": 0.381, "lr": 4.874320475594107e-09, "epoch": 0.9717687728058383, "percentage": 97.18, "elapsed_time": "1:38:45", "remaining_time": "0:02:52", "throughput": 8063.15, "total_tokens": 47781760}
|
|
{"current_steps": 15185, "total_steps": 15621, "loss": 0.292, "lr": 4.764752644227377e-09, "epoch": 0.9720888547468152, "percentage": 97.21, "elapsed_time": "1:38:46", "remaining_time": "0:02:50", "throughput": 8064.86, "total_tokens": 47797312}
|
|
{"current_steps": 15190, "total_steps": 15621, "loss": 0.335, "lr": 4.656427375752336e-09, "epoch": 0.972408936687792, "percentage": 97.24, "elapsed_time": "1:38:47", "remaining_time": "0:02:48", "throughput": 8066.67, "total_tokens": 47813440}
|
|
{"current_steps": 15195, "total_steps": 15621, "loss": 0.343, "lr": 4.549344805414246e-09, "epoch": 0.9727290186287689, "percentage": 97.27, "elapsed_time": "1:38:47", "remaining_time": "0:02:46", "throughput": 8068.44, "total_tokens": 47829440}
|
|
{"current_steps": 15200, "total_steps": 15621, "loss": 0.4009, "lr": 4.443505066907049e-09, "epoch": 0.9730491005697458, "percentage": 97.3, "elapsed_time": "1:38:48", "remaining_time": "0:02:44", "throughput": 8070.1, "total_tokens": 47844608}
|
|
{"current_steps": 15205, "total_steps": 15621, "loss": 0.2898, "lr": 4.338908292372934e-09, "epoch": 0.9733691825107228, "percentage": 97.34, "elapsed_time": "1:38:49", "remaining_time": "0:02:42", "throughput": 8071.81, "total_tokens": 47860160}
|
|
{"current_steps": 15210, "total_steps": 15621, "loss": 0.3906, "lr": 4.235554612402214e-09, "epoch": 0.9736892644516997, "percentage": 97.37, "elapsed_time": "1:38:49", "remaining_time": "0:02:40", "throughput": 8073.52, "total_tokens": 47875648}
|
|
{"current_steps": 15215, "total_steps": 15621, "loss": 0.3799, "lr": 4.133444156033006e-09, "epoch": 0.9740093463926766, "percentage": 97.4, "elapsed_time": "1:38:50", "remaining_time": "0:02:38", "throughput": 8075.43, "total_tokens": 47892736}
|
|
{"current_steps": 15220, "total_steps": 15621, "loss": 0.3319, "lr": 4.032577050751551e-09, "epoch": 0.9743294283336534, "percentage": 97.43, "elapsed_time": "1:38:51", "remaining_time": "0:02:36", "throughput": 8077.22, "total_tokens": 47908992}
|
|
{"current_steps": 15225, "total_steps": 15621, "loss": 0.3489, "lr": 3.932953422491669e-09, "epoch": 0.9746495102746303, "percentage": 97.46, "elapsed_time": "1:38:52", "remaining_time": "0:02:34", "throughput": 8078.96, "total_tokens": 47924736}
|
|
{"current_steps": 15230, "total_steps": 15621, "loss": 0.2816, "lr": 3.8345733956345326e-09, "epoch": 0.9749695922156072, "percentage": 97.5, "elapsed_time": "1:38:52", "remaining_time": "0:02:32", "throughput": 8080.76, "total_tokens": 47941056}
|
|
{"current_steps": 15235, "total_steps": 15621, "loss": 0.3635, "lr": 3.737437093008777e-09, "epoch": 0.9752896741565841, "percentage": 97.53, "elapsed_time": "1:38:53", "remaining_time": "0:02:30", "throughput": 8082.62, "total_tokens": 47957824}
|
|
{"current_steps": 15240, "total_steps": 15621, "loss": 0.4132, "lr": 3.641544635890281e-09, "epoch": 0.975609756097561, "percentage": 97.56, "elapsed_time": "1:38:54", "remaining_time": "0:02:28", "throughput": 8084.28, "total_tokens": 47973056}
|
|
{"current_steps": 15245, "total_steps": 15621, "loss": 0.3959, "lr": 3.546896144001832e-09, "epoch": 0.9759298380385378, "percentage": 97.59, "elapsed_time": "1:38:54", "remaining_time": "0:02:26", "throughput": 8086.03, "total_tokens": 47988928}
|
|
{"current_steps": 15250, "total_steps": 15621, "loss": 0.3935, "lr": 3.4534917355132364e-09, "epoch": 0.9762499199795147, "percentage": 97.62, "elapsed_time": "1:38:55", "remaining_time": "0:02:24", "throughput": 8087.67, "total_tokens": 48004032}
|
|
{"current_steps": 15255, "total_steps": 15621, "loss": 0.4168, "lr": 3.361331527040878e-09, "epoch": 0.9765700019204916, "percentage": 97.66, "elapsed_time": "1:38:56", "remaining_time": "0:02:22", "throughput": 8089.53, "total_tokens": 48020800}
|
|
{"current_steps": 15260, "total_steps": 15621, "loss": 0.3997, "lr": 3.270415633647938e-09, "epoch": 0.9768900838614686, "percentage": 97.69, "elapsed_time": "1:38:56", "remaining_time": "0:02:20", "throughput": 8091.3, "total_tokens": 48036800}
|
|
{"current_steps": 15265, "total_steps": 15621, "loss": 0.2911, "lr": 3.180744168843952e-09, "epoch": 0.9772101658024455, "percentage": 97.72, "elapsed_time": "1:38:57", "remaining_time": "0:02:18", "throughput": 8092.87, "total_tokens": 48051264}
|
|
{"current_steps": 15270, "total_steps": 15621, "loss": 0.226, "lr": 3.0923172445849187e-09, "epoch": 0.9775302477434223, "percentage": 97.75, "elapsed_time": "1:38:58", "remaining_time": "0:02:16", "throughput": 8094.5, "total_tokens": 48066176}
|
|
{"current_steps": 15275, "total_steps": 15621, "loss": 0.3135, "lr": 3.0051349712727493e-09, "epoch": 0.9778503296843992, "percentage": 97.79, "elapsed_time": "1:38:58", "remaining_time": "0:02:14", "throughput": 8096.23, "total_tokens": 48081984}
|
|
{"current_steps": 15280, "total_steps": 15621, "loss": 0.4143, "lr": 2.9191974577555954e-09, "epoch": 0.9781704116253761, "percentage": 97.82, "elapsed_time": "1:38:59", "remaining_time": "0:02:12", "throughput": 8097.85, "total_tokens": 48096896}
|
|
{"current_steps": 15285, "total_steps": 15621, "loss": 0.2341, "lr": 2.8345048113274096e-09, "epoch": 0.978490493566353, "percentage": 97.85, "elapsed_time": "1:39:00", "remaining_time": "0:02:10", "throughput": 8099.52, "total_tokens": 48112128}
|
|
{"current_steps": 15290, "total_steps": 15621, "loss": 0.3353, "lr": 2.751057137727941e-09, "epoch": 0.9788105755073299, "percentage": 97.88, "elapsed_time": "1:39:00", "remaining_time": "0:02:08", "throughput": 8101.22, "total_tokens": 48127616}
|
|
{"current_steps": 15295, "total_steps": 15621, "loss": 0.3961, "lr": 2.66885454114274e-09, "epoch": 0.9791306574483067, "percentage": 97.91, "elapsed_time": "1:39:01", "remaining_time": "0:02:06", "throughput": 8102.8, "total_tokens": 48142144}
|
|
{"current_steps": 15300, "total_steps": 15621, "loss": 0.3685, "lr": 2.5878971242025983e-09, "epoch": 0.9794507393892836, "percentage": 97.95, "elapsed_time": "1:39:02", "remaining_time": "0:02:04", "throughput": 8104.57, "total_tokens": 48158272}
|
|
{"current_steps": 15305, "total_steps": 15621, "loss": 0.3216, "lr": 2.5081849879837746e-09, "epoch": 0.9797708213302605, "percentage": 97.98, "elapsed_time": "1:39:02", "remaining_time": "0:02:02", "throughput": 8106.19, "total_tokens": 48173120}
|
|
{"current_steps": 15310, "total_steps": 15621, "loss": 0.3423, "lr": 2.429718232007771e-09, "epoch": 0.9800909032712375, "percentage": 98.01, "elapsed_time": "1:39:03", "remaining_time": "0:02:00", "throughput": 8107.88, "total_tokens": 48188672}
|
|
{"current_steps": 15315, "total_steps": 15621, "loss": 0.2693, "lr": 2.3524969542414453e-09, "epoch": 0.9804109852122144, "percentage": 98.04, "elapsed_time": "1:39:04", "remaining_time": "0:01:58", "throughput": 8109.62, "total_tokens": 48204480}
|
|
{"current_steps": 15320, "total_steps": 15621, "loss": 0.3537, "lr": 2.2765212510963418e-09, "epoch": 0.9807310671531912, "percentage": 98.07, "elapsed_time": "1:39:04", "remaining_time": "0:01:56", "throughput": 8111.27, "total_tokens": 48219584}
|
|
{"current_steps": 15325, "total_steps": 15621, "loss": 0.2813, "lr": 2.2017912174289164e-09, "epoch": 0.9810511490941681, "percentage": 98.11, "elapsed_time": "1:39:05", "remaining_time": "0:01:54", "throughput": 8113.07, "total_tokens": 48235904}
|
|
{"current_steps": 15330, "total_steps": 15621, "loss": 0.3963, "lr": 2.128306946540648e-09, "epoch": 0.981371231035145, "percentage": 98.14, "elapsed_time": "1:39:06", "remaining_time": "0:01:52", "throughput": 8114.98, "total_tokens": 48252992}
|
|
{"current_steps": 15335, "total_steps": 15621, "loss": 0.3328, "lr": 2.0560685301774792e-09, "epoch": 0.9816913129761219, "percentage": 98.17, "elapsed_time": "1:39:06", "remaining_time": "0:01:50", "throughput": 8116.59, "total_tokens": 48267840}
|
|
{"current_steps": 15340, "total_steps": 15621, "loss": 0.3753, "lr": 1.985076058529933e-09, "epoch": 0.9820113949170988, "percentage": 98.2, "elapsed_time": "1:39:07", "remaining_time": "0:01:48", "throughput": 8118.2, "total_tokens": 48282688}
|
|
{"current_steps": 15345, "total_steps": 15621, "loss": 0.478, "lr": 1.9153296202328863e-09, "epoch": 0.9823314768580756, "percentage": 98.23, "elapsed_time": "1:39:08", "remaining_time": "0:01:46", "throughput": 8120.15, "total_tokens": 48300096}
|
|
{"current_steps": 15350, "total_steps": 15621, "loss": 0.3943, "lr": 1.8468293023656823e-09, "epoch": 0.9826515587990525, "percentage": 98.27, "elapsed_time": "1:39:08", "remaining_time": "0:01:45", "throughput": 8121.78, "total_tokens": 48315136}
|
|
{"current_steps": 15355, "total_steps": 15621, "loss": 0.4025, "lr": 1.7795751904515766e-09, "epoch": 0.9829716407400294, "percentage": 98.3, "elapsed_time": "1:39:09", "remaining_time": "0:01:43", "throughput": 8123.43, "total_tokens": 48330240}
|
|
{"current_steps": 15360, "total_steps": 15621, "loss": 0.3109, "lr": 1.7135673684584019e-09, "epoch": 0.9832917226810063, "percentage": 98.33, "elapsed_time": "1:39:10", "remaining_time": "0:01:41", "throughput": 8125.06, "total_tokens": 48345280}
|
|
{"current_steps": 15365, "total_steps": 15621, "loss": 0.403, "lr": 1.6488059187974579e-09, "epoch": 0.9836118046219833, "percentage": 98.36, "elapsed_time": "1:39:10", "remaining_time": "0:01:39", "throughput": 8126.89, "total_tokens": 48361792}
|
|
{"current_steps": 15370, "total_steps": 15621, "loss": 0.4034, "lr": 1.5852909223242894e-09, "epoch": 0.9839318865629602, "percentage": 98.39, "elapsed_time": "1:39:11", "remaining_time": "0:01:37", "throughput": 8128.59, "total_tokens": 48377408}
|
|
{"current_steps": 15375, "total_steps": 15621, "loss": 0.3679, "lr": 1.5230224583380192e-09, "epoch": 0.984251968503937, "percentage": 98.43, "elapsed_time": "1:39:12", "remaining_time": "0:01:35", "throughput": 8130.28, "total_tokens": 48392896}
|
|
{"current_steps": 15380, "total_steps": 15621, "loss": 0.4625, "lr": 1.4620006045816813e-09, "epoch": 0.9845720504449139, "percentage": 98.46, "elapsed_time": "1:39:12", "remaining_time": "0:01:33", "throughput": 8131.87, "total_tokens": 48407552}
|
|
{"current_steps": 15385, "total_steps": 15621, "loss": 0.2809, "lr": 1.4022254372417774e-09, "epoch": 0.9848921323858908, "percentage": 98.49, "elapsed_time": "1:39:13", "remaining_time": "0:01:31", "throughput": 8133.72, "total_tokens": 48424320}
|
|
{"current_steps": 15390, "total_steps": 15621, "loss": 0.5055, "lr": 1.3436970309481655e-09, "epoch": 0.9852122143268677, "percentage": 98.52, "elapsed_time": "1:39:14", "remaining_time": "0:01:29", "throughput": 8135.68, "total_tokens": 48441984}
|
|
{"current_steps": 15395, "total_steps": 15621, "loss": 0.333, "lr": 1.2864154587742815e-09, "epoch": 0.9855322962678446, "percentage": 98.55, "elapsed_time": "1:39:14", "remaining_time": "0:01:27", "throughput": 8137.29, "total_tokens": 48456832}
|
|
{"current_steps": 15400, "total_steps": 15621, "loss": 0.3719, "lr": 1.2303807922370292e-09, "epoch": 0.9858523782088214, "percentage": 98.59, "elapsed_time": "1:39:15", "remaining_time": "0:01:25", "throughput": 8138.99, "total_tokens": 48472512}
|
|
{"current_steps": 15405, "total_steps": 15621, "loss": 0.3169, "lr": 1.1755931012961128e-09, "epoch": 0.9861724601497983, "percentage": 98.62, "elapsed_time": "1:39:16", "remaining_time": "0:01:23", "throughput": 8140.77, "total_tokens": 48488832}
|
|
{"current_steps": 15410, "total_steps": 15621, "loss": 0.3615, "lr": 1.122052454354705e-09, "epoch": 0.9864925420907752, "percentage": 98.65, "elapsed_time": "1:39:16", "remaining_time": "0:01:21", "throughput": 8142.4, "total_tokens": 48503936}
|
|
{"current_steps": 15415, "total_steps": 15621, "loss": 0.4383, "lr": 1.0697589182590005e-09, "epoch": 0.9868126240317522, "percentage": 98.68, "elapsed_time": "1:39:17", "remaining_time": "0:01:19", "throughput": 8144.05, "total_tokens": 48519040}
|
|
{"current_steps": 15420, "total_steps": 15621, "loss": 0.6, "lr": 1.018712558297996e-09, "epoch": 0.9871327059727291, "percentage": 98.71, "elapsed_time": "1:39:18", "remaining_time": "0:01:17", "throughput": 8145.8, "total_tokens": 48535040}
|
|
{"current_steps": 15425, "total_steps": 15621, "loss": 0.4438, "lr": 9.689134382037113e-10, "epoch": 0.9874527879137059, "percentage": 98.75, "elapsed_time": "1:39:18", "remaining_time": "0:01:15", "throughput": 8147.65, "total_tokens": 48551808}
|
|
{"current_steps": 15430, "total_steps": 15621, "loss": 0.3976, "lr": 9.203616201508557e-10, "epoch": 0.9877728698546828, "percentage": 98.78, "elapsed_time": "1:39:19", "remaining_time": "0:01:13", "throughput": 8149.25, "total_tokens": 48566592}
|
|
{"current_steps": 15435, "total_steps": 15621, "loss": 0.3103, "lr": 8.730571647570517e-10, "epoch": 0.9880929517956597, "percentage": 98.81, "elapsed_time": "1:39:20", "remaining_time": "0:01:11", "throughput": 8151.01, "total_tokens": 48582720}
|
|
{"current_steps": 15440, "total_steps": 15621, "loss": 0.4765, "lr": 8.270001310825003e-10, "epoch": 0.9884130337366366, "percentage": 98.84, "elapsed_time": "1:39:21", "remaining_time": "0:01:09", "throughput": 8152.81, "total_tokens": 48599104}
|
|
{"current_steps": 15445, "total_steps": 15621, "loss": 0.3114, "lr": 7.821905766297599e-10, "epoch": 0.9887331156776135, "percentage": 98.87, "elapsed_time": "1:39:21", "remaining_time": "0:01:07", "throughput": 8154.55, "total_tokens": 48615040}
|
|
{"current_steps": 15450, "total_steps": 15621, "loss": 0.3971, "lr": 7.386285573441897e-10, "epoch": 0.9890531976185903, "percentage": 98.91, "elapsed_time": "1:39:22", "remaining_time": "0:01:05", "throughput": 8156.29, "total_tokens": 48630976}
|
|
{"current_steps": 15455, "total_steps": 15621, "loss": 0.283, "lr": 6.963141276136175e-10, "epoch": 0.9893732795595672, "percentage": 98.94, "elapsed_time": "1:39:23", "remaining_time": "0:01:04", "throughput": 8157.94, "total_tokens": 48646080}
|
|
{"current_steps": 15460, "total_steps": 15621, "loss": 0.2476, "lr": 6.552473402678949e-10, "epoch": 0.9896933615005441, "percentage": 98.97, "elapsed_time": "1:39:23", "remaining_time": "0:01:02", "throughput": 8159.75, "total_tokens": 48662528}
|
|
{"current_steps": 15465, "total_steps": 15621, "loss": 0.3244, "lr": 6.154282465794524e-10, "epoch": 0.990013443441521, "percentage": 99.0, "elapsed_time": "1:39:24", "remaining_time": "0:01:00", "throughput": 8161.69, "total_tokens": 48680000}
|
|
{"current_steps": 15470, "total_steps": 15621, "loss": 0.4256, "lr": 5.768568962629672e-10, "epoch": 0.990333525382498, "percentage": 99.03, "elapsed_time": "1:39:25", "remaining_time": "0:00:58", "throughput": 8163.48, "total_tokens": 48696256}
|
|
{"current_steps": 15475, "total_steps": 15621, "loss": 0.3062, "lr": 5.395333374751398e-10, "epoch": 0.9906536073234748, "percentage": 99.07, "elapsed_time": "1:39:25", "remaining_time": "0:00:56", "throughput": 8165.1, "total_tokens": 48711168}
|
|
{"current_steps": 15480, "total_steps": 15621, "loss": 0.5477, "lr": 5.034576168149174e-10, "epoch": 0.9909736892644517, "percentage": 99.1, "elapsed_time": "1:39:26", "remaining_time": "0:00:54", "throughput": 8166.81, "total_tokens": 48726848}
|
|
{"current_steps": 15485, "total_steps": 15621, "loss": 0.4838, "lr": 4.686297793231597e-10, "epoch": 0.9912937712054286, "percentage": 99.13, "elapsed_time": "1:39:27", "remaining_time": "0:00:52", "throughput": 8168.61, "total_tokens": 48743232}
|
|
{"current_steps": 15490, "total_steps": 15621, "loss": 0.4541, "lr": 4.350498684829729e-10, "epoch": 0.9916138531464055, "percentage": 99.16, "elapsed_time": "1:39:27", "remaining_time": "0:00:50", "throughput": 8170.22, "total_tokens": 48758080}
|
|
{"current_steps": 15495, "total_steps": 15621, "loss": 0.3123, "lr": 4.0271792621926483e-10, "epoch": 0.9919339350873824, "percentage": 99.19, "elapsed_time": "1:39:28", "remaining_time": "0:00:48", "throughput": 8171.85, "total_tokens": 48773120}
|
|
{"current_steps": 15500, "total_steps": 15621, "loss": 0.3749, "lr": 3.716339928987455e-10, "epoch": 0.9922540170283592, "percentage": 99.23, "elapsed_time": "1:39:29", "remaining_time": "0:00:46", "throughput": 8173.58, "total_tokens": 48789056}
|
|
{"current_steps": 15505, "total_steps": 15621, "loss": 0.4189, "lr": 3.41798107330149e-10, "epoch": 0.9925740989693361, "percentage": 99.26, "elapsed_time": "1:39:29", "remaining_time": "0:00:44", "throughput": 8175.23, "total_tokens": 48804288}
|
|
{"current_steps": 15510, "total_steps": 15621, "loss": 0.3683, "lr": 3.1321030676390027e-10, "epoch": 0.992894180910313, "percentage": 99.29, "elapsed_time": "1:39:30", "remaining_time": "0:00:42", "throughput": 8176.8, "total_tokens": 48818816}
|
|
{"current_steps": 15515, "total_steps": 15621, "loss": 0.291, "lr": 2.8587062689222617e-10, "epoch": 0.9932142628512899, "percentage": 99.32, "elapsed_time": "1:39:31", "remaining_time": "0:00:40", "throughput": 8178.64, "total_tokens": 48835520}
|
|
{"current_steps": 15520, "total_steps": 15621, "loss": 0.3139, "lr": 2.5977910184904473e-10, "epoch": 0.9935343447922668, "percentage": 99.35, "elapsed_time": "1:39:31", "remaining_time": "0:00:38", "throughput": 8180.36, "total_tokens": 48851328}
|
|
{"current_steps": 15525, "total_steps": 15621, "loss": 0.3466, "lr": 2.3493576420985373e-10, "epoch": 0.9938544267332438, "percentage": 99.39, "elapsed_time": "1:39:32", "remaining_time": "0:00:36", "throughput": 8181.97, "total_tokens": 48866304}
|
|
{"current_steps": 15530, "total_steps": 15621, "loss": 0.3311, "lr": 2.11340644991842e-10, "epoch": 0.9941745086742206, "percentage": 99.42, "elapsed_time": "1:39:33", "remaining_time": "0:00:35", "throughput": 8183.78, "total_tokens": 48882752}
|
|
{"current_steps": 15535, "total_steps": 15621, "loss": 0.3046, "lr": 1.8899377365388936e-10, "epoch": 0.9944945906151975, "percentage": 99.45, "elapsed_time": "1:39:33", "remaining_time": "0:00:33", "throughput": 8185.46, "total_tokens": 48898304}
|
|
{"current_steps": 15540, "total_steps": 15621, "loss": 0.4284, "lr": 1.6789517809634447e-10, "epoch": 0.9948146725561744, "percentage": 99.48, "elapsed_time": "1:39:34", "remaining_time": "0:00:31", "throughput": 8187.17, "total_tokens": 48914048}
|
|
{"current_steps": 15545, "total_steps": 15621, "loss": 0.3291, "lr": 1.480448846609139e-10, "epoch": 0.9951347544971513, "percentage": 99.51, "elapsed_time": "1:39:35", "remaining_time": "0:00:29", "throughput": 8188.93, "total_tokens": 48930176}
|
|
{"current_steps": 15550, "total_steps": 15621, "loss": 0.3522, "lr": 1.294429181311063e-10, "epoch": 0.9954548364381282, "percentage": 99.55, "elapsed_time": "1:39:35", "remaining_time": "0:00:27", "throughput": 8190.63, "total_tokens": 48945920}
|
|
{"current_steps": 15555, "total_steps": 15621, "loss": 0.4063, "lr": 1.1208930173145503e-10, "epoch": 0.995774918379105, "percentage": 99.58, "elapsed_time": "1:39:36", "remaining_time": "0:00:25", "throughput": 8192.24, "total_tokens": 48960832}
|
|
{"current_steps": 15560, "total_steps": 15621, "loss": 0.3278, "lr": 9.598405712840651e-11, "epoch": 0.9960950003200819, "percentage": 99.61, "elapsed_time": "1:39:37", "remaining_time": "0:00:23", "throughput": 8194.03, "total_tokens": 48977280}
|
|
{"current_steps": 15565, "total_steps": 15621, "loss": 0.3526, "lr": 8.1127204429432e-11, "epoch": 0.9964150822610588, "percentage": 99.64, "elapsed_time": "1:39:37", "remaining_time": "0:00:21", "throughput": 8195.66, "total_tokens": 48992512}
|
|
{"current_steps": 15570, "total_steps": 15621, "loss": 0.351, "lr": 6.751876218336061e-11, "epoch": 0.9967351642020357, "percentage": 99.67, "elapsed_time": "1:39:38", "remaining_time": "0:00:19", "throughput": 8197.36, "total_tokens": 49008128}
|
|
{"current_steps": 15575, "total_steps": 15621, "loss": 0.3451, "lr": 5.515874738071247e-11, "epoch": 0.9970552461430127, "percentage": 99.71, "elapsed_time": "1:39:39", "remaining_time": "0:00:17", "throughput": 8199.14, "total_tokens": 49024512}
|
|
{"current_steps": 15580, "total_steps": 15621, "loss": 0.3131, "lr": 4.404717545303249e-11, "epoch": 0.9973753280839895, "percentage": 99.74, "elapsed_time": "1:39:39", "remaining_time": "0:00:15", "throughput": 8200.83, "total_tokens": 49040128}
|
|
{"current_steps": 15585, "total_steps": 15621, "loss": 0.3111, "lr": 3.418406027322352e-11, "epoch": 0.9976954100249664, "percentage": 99.77, "elapsed_time": "1:39:40", "remaining_time": "0:00:13", "throughput": 8202.48, "total_tokens": 49055360}
|
|
{"current_steps": 15590, "total_steps": 15621, "loss": 0.3576, "lr": 2.5569414155546254e-11, "epoch": 0.9980154919659433, "percentage": 99.8, "elapsed_time": "1:39:41", "remaining_time": "0:00:11", "throughput": 8204.21, "total_tokens": 49071360}
|
|
{"current_steps": 15595, "total_steps": 15621, "loss": 0.2698, "lr": 1.8203247855397287e-11, "epoch": 0.9983355739069202, "percentage": 99.83, "elapsed_time": "1:39:41", "remaining_time": "0:00:09", "throughput": 8205.8, "total_tokens": 49086144}
|
|
{"current_steps": 15600, "total_steps": 15621, "loss": 0.3915, "lr": 1.2085570569642101e-11, "epoch": 0.9986556558478971, "percentage": 99.87, "elapsed_time": "1:39:42", "remaining_time": "0:00:08", "throughput": 8207.43, "total_tokens": 49101312}
|
|
{"current_steps": 15605, "total_steps": 15621, "loss": 0.3151, "lr": 7.216389936171019e-12, "epoch": 0.9989757377888739, "percentage": 99.9, "elapsed_time": "1:39:43", "remaining_time": "0:00:06", "throughput": 8209.08, "total_tokens": 49116672}
|
|
{"current_steps": 15610, "total_steps": 15621, "loss": 0.1751, "lr": 3.5957120342322567e-12, "epoch": 0.9992958197298508, "percentage": 99.93, "elapsed_time": "1:39:43", "remaining_time": "0:00:04", "throughput": 8210.78, "total_tokens": 49132288}
|
|
{"current_steps": 15615, "total_steps": 15621, "loss": 0.3884, "lr": 1.2235413842098807e-12, "epoch": 0.9996159016708277, "percentage": 99.96, "elapsed_time": "1:39:44", "remaining_time": "0:00:02", "throughput": 8212.48, "total_tokens": 49148096}
|
|
{"current_steps": 15620, "total_steps": 15621, "loss": 0.2485, "lr": 9.98809480678986e-14, "epoch": 0.9999359836118046, "percentage": 99.99, "elapsed_time": "1:39:45", "remaining_time": "0:00:00", "throughput": 8214.19, "total_tokens": 49163840}
|
|
{"current_steps": 15621, "total_steps": 15621, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "1:40:29", "remaining_time": "0:00:00", "throughput": 8154.18, "total_tokens": 49166912}
|