15642 lines
3.7 MiB
15642 lines
3.7 MiB
{"current_steps": 5, "total_steps": 78105, "loss": 2.1551, "lr": 2.560491614389963e-09, "epoch": 0.0003200819409768901, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "4:41:54", "throughput": 14184.41, "total_tokens": 15360}
|
|
{"current_steps": 10, "total_steps": 78105, "loss": 2.3621, "lr": 5.761106132377417e-09, "epoch": 0.0006401638819537802, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "3:50:57", "throughput": 17528.16, "total_tokens": 31104}
|
|
{"current_steps": 15, "total_steps": 78105, "loss": 2.1414, "lr": 8.961720650364872e-09, "epoch": 0.0009602458229306702, "percentage": 0.02, "elapsed_time": "0:00:02", "remaining_time": "3:31:14", "throughput": 18979.51, "total_tokens": 46208}
|
|
{"current_steps": 20, "total_steps": 78105, "loss": 2.7749, "lr": 1.2162335168352324e-08, "epoch": 0.0012803277639075604, "percentage": 0.03, "elapsed_time": "0:00:03", "remaining_time": "3:23:47", "throughput": 19945.06, "total_tokens": 62464}
|
|
{"current_steps": 25, "total_steps": 78105, "loss": 2.146, "lr": 1.536294968633978e-08, "epoch": 0.0016004097048844504, "percentage": 0.03, "elapsed_time": "0:00:03", "remaining_time": "3:19:50", "throughput": 20603.57, "total_tokens": 79104}
|
|
{"current_steps": 30, "total_steps": 78105, "loss": 2.2474, "lr": 1.8563564204327233e-08, "epoch": 0.0019204916458613404, "percentage": 0.04, "elapsed_time": "0:00:04", "remaining_time": "3:16:04", "throughput": 20996.81, "total_tokens": 94912}
|
|
{"current_steps": 35, "total_steps": 78105, "loss": 2.3948, "lr": 2.1764178722314687e-08, "epoch": 0.0022405735868382304, "percentage": 0.04, "elapsed_time": "0:00:05", "remaining_time": "3:13:31", "throughput": 21282.46, "total_tokens": 110784}
|
|
{"current_steps": 40, "total_steps": 78105, "loss": 2.2191, "lr": 2.496479324030214e-08, "epoch": 0.002560655527815121, "percentage": 0.05, "elapsed_time": "0:00:05", "remaining_time": "3:10:33", "throughput": 21455.77, "total_tokens": 125696}
|
|
{"current_steps": 45, "total_steps": 78105, "loss": 2.2197, "lr": 2.8165407758289592e-08, "epoch": 0.002880737468792011, "percentage": 0.06, "elapsed_time": "0:00:06", "remaining_time": "3:08:15", "throughput": 21602.64, "total_tokens": 140672}
|
|
{"current_steps": 50, "total_steps": 78105, "loss": 2.1808, "lr": 3.1366022276277046e-08, "epoch": 0.003200819409768901, "percentage": 0.06, "elapsed_time": "0:00:07", "remaining_time": "3:06:24", "throughput": 21698.56, "total_tokens": 155456}
|
|
{"current_steps": 55, "total_steps": 78105, "loss": 2.078, "lr": 3.4566636794264506e-08, "epoch": 0.003520901350745791, "percentage": 0.07, "elapsed_time": "0:00:07", "remaining_time": "3:05:24", "throughput": 21790.88, "total_tokens": 170816}
|
|
{"current_steps": 60, "total_steps": 78105, "loss": 2.3648, "lr": 3.7767251312251953e-08, "epoch": 0.003840983291722681, "percentage": 0.08, "elapsed_time": "0:00:08", "remaining_time": "3:03:44", "throughput": 21838.24, "total_tokens": 185088}
|
|
{"current_steps": 65, "total_steps": 78105, "loss": 2.0772, "lr": 4.096786583023941e-08, "epoch": 0.004161065232699571, "percentage": 0.08, "elapsed_time": "0:00:09", "remaining_time": "3:02:55", "throughput": 21919.25, "total_tokens": 200384}
|
|
{"current_steps": 70, "total_steps": 78105, "loss": 2.047, "lr": 4.416848034822686e-08, "epoch": 0.004481147173676461, "percentage": 0.09, "elapsed_time": "0:00:09", "remaining_time": "3:02:20", "throughput": 21982.49, "total_tokens": 215744}
|
|
{"current_steps": 75, "total_steps": 78105, "loss": 2.0567, "lr": 4.736909486621432e-08, "epoch": 0.004801229114653352, "percentage": 0.1, "elapsed_time": "0:00:10", "remaining_time": "3:01:32", "throughput": 22006.65, "total_tokens": 230400}
|
|
{"current_steps": 80, "total_steps": 78105, "loss": 1.8879, "lr": 5.056970938420177e-08, "epoch": 0.005121311055630242, "percentage": 0.1, "elapsed_time": "0:00:11", "remaining_time": "3:01:24", "throughput": 22096.74, "total_tokens": 246592}
|
|
{"current_steps": 85, "total_steps": 78105, "loss": 1.6707, "lr": 5.377032390218922e-08, "epoch": 0.005441392996607132, "percentage": 0.11, "elapsed_time": "0:00:11", "remaining_time": "3:01:04", "throughput": 22156.94, "total_tokens": 262272}
|
|
{"current_steps": 90, "total_steps": 78105, "loss": 1.5695, "lr": 5.6970938420176683e-08, "epoch": 0.005761474937584022, "percentage": 0.12, "elapsed_time": "0:00:12", "remaining_time": "3:00:45", "throughput": 22199.97, "total_tokens": 277760}
|
|
{"current_steps": 95, "total_steps": 78105, "loss": 1.8715, "lr": 6.017155293816413e-08, "epoch": 0.006081556878560912, "percentage": 0.12, "elapsed_time": "0:00:13", "remaining_time": "3:00:18", "throughput": 22238.3, "total_tokens": 292992}
|
|
{"current_steps": 100, "total_steps": 78105, "loss": 1.6241, "lr": 6.337216745615158e-08, "epoch": 0.006401638819537802, "percentage": 0.13, "elapsed_time": "0:00:13", "remaining_time": "2:59:39", "throughput": 22275.8, "total_tokens": 307840}
|
|
{"current_steps": 105, "total_steps": 78105, "loss": 1.6729, "lr": 6.657278197413904e-08, "epoch": 0.006721720760514692, "percentage": 0.13, "elapsed_time": "0:00:14", "remaining_time": "2:59:14", "throughput": 22311.85, "total_tokens": 323008}
|
|
{"current_steps": 110, "total_steps": 78105, "loss": 1.6035, "lr": 6.977339649212649e-08, "epoch": 0.007041802701491582, "percentage": 0.14, "elapsed_time": "0:00:15", "remaining_time": "2:59:20", "throughput": 22367.37, "total_tokens": 339456}
|
|
{"current_steps": 115, "total_steps": 78105, "loss": 1.4905, "lr": 7.297401101011395e-08, "epoch": 0.007361884642468472, "percentage": 0.15, "elapsed_time": "0:00:15", "remaining_time": "2:59:05", "throughput": 22392.76, "total_tokens": 354816}
|
|
{"current_steps": 120, "total_steps": 78105, "loss": 1.1193, "lr": 7.61746255281014e-08, "epoch": 0.007681966583445362, "percentage": 0.15, "elapsed_time": "0:00:16", "remaining_time": "2:58:35", "throughput": 22408.08, "total_tokens": 369472}
|
|
{"current_steps": 125, "total_steps": 78105, "loss": 1.2304, "lr": 7.937524004608884e-08, "epoch": 0.008002048524422252, "percentage": 0.16, "elapsed_time": "0:00:17", "remaining_time": "2:58:21", "throughput": 22429.16, "total_tokens": 384768}
|
|
{"current_steps": 130, "total_steps": 78105, "loss": 1.2864, "lr": 8.257585456407631e-08, "epoch": 0.008322130465399142, "percentage": 0.17, "elapsed_time": "0:00:17", "remaining_time": "2:58:09", "throughput": 22455.13, "total_tokens": 400192}
|
|
{"current_steps": 135, "total_steps": 78105, "loss": 1.1549, "lr": 8.577646908206376e-08, "epoch": 0.008642212406376032, "percentage": 0.17, "elapsed_time": "0:00:18", "remaining_time": "2:58:16", "throughput": 22496.2, "total_tokens": 416640}
|
|
{"current_steps": 140, "total_steps": 78105, "loss": 1.0669, "lr": 8.89770836000512e-08, "epoch": 0.008962294347352922, "percentage": 0.18, "elapsed_time": "0:00:19", "remaining_time": "2:58:15", "throughput": 22525.89, "total_tokens": 432640}
|
|
{"current_steps": 145, "total_steps": 78105, "loss": 1.1356, "lr": 9.217769811803867e-08, "epoch": 0.009282376288329812, "percentage": 0.19, "elapsed_time": "0:00:19", "remaining_time": "2:58:18", "throughput": 22546.13, "total_tokens": 448640}
|
|
{"current_steps": 150, "total_steps": 78105, "loss": 1.0674, "lr": 9.537831263602612e-08, "epoch": 0.009602458229306703, "percentage": 0.19, "elapsed_time": "0:00:20", "remaining_time": "2:58:16", "throughput": 22564.66, "total_tokens": 464448}
|
|
{"current_steps": 155, "total_steps": 78105, "loss": 1.0259, "lr": 9.857892715401356e-08, "epoch": 0.009922540170283593, "percentage": 0.2, "elapsed_time": "0:00:21", "remaining_time": "2:58:02", "throughput": 22573.14, "total_tokens": 479488}
|
|
{"current_steps": 160, "total_steps": 78105, "loss": 0.8523, "lr": 1.0177954167200103e-07, "epoch": 0.010242622111260483, "percentage": 0.2, "elapsed_time": "0:00:21", "remaining_time": "2:57:58", "throughput": 22595.57, "total_tokens": 495296}
|
|
{"current_steps": 165, "total_steps": 78105, "loss": 0.8782, "lr": 1.0498015618998849e-07, "epoch": 0.010562704052237373, "percentage": 0.21, "elapsed_time": "0:00:22", "remaining_time": "2:57:38", "throughput": 22608.06, "total_tokens": 510144}
|
|
{"current_steps": 170, "total_steps": 78105, "loss": 0.8971, "lr": 1.0818077070797593e-07, "epoch": 0.010882785993214263, "percentage": 0.22, "elapsed_time": "0:00:23", "remaining_time": "2:57:23", "throughput": 22610.26, "total_tokens": 524928}
|
|
{"current_steps": 175, "total_steps": 78105, "loss": 0.8068, "lr": 1.1138138522596339e-07, "epoch": 0.011202867934191153, "percentage": 0.22, "elapsed_time": "0:00:23", "remaining_time": "2:57:33", "throughput": 22634.73, "total_tokens": 541504}
|
|
{"current_steps": 180, "total_steps": 78105, "loss": 0.7725, "lr": 1.1458199974395085e-07, "epoch": 0.011522949875168043, "percentage": 0.23, "elapsed_time": "0:00:24", "remaining_time": "2:57:13", "throughput": 22640.0, "total_tokens": 556096}
|
|
{"current_steps": 185, "total_steps": 78105, "loss": 0.7868, "lr": 1.1778261426193829e-07, "epoch": 0.011843031816144933, "percentage": 0.24, "elapsed_time": "0:00:25", "remaining_time": "2:57:22", "throughput": 22666.58, "total_tokens": 572736}
|
|
{"current_steps": 190, "total_steps": 78105, "loss": 0.8952, "lr": 1.2098322877992575e-07, "epoch": 0.012163113757121823, "percentage": 0.24, "elapsed_time": "0:00:25", "remaining_time": "2:57:18", "throughput": 22678.75, "total_tokens": 588352}
|
|
{"current_steps": 195, "total_steps": 78105, "loss": 1.0207, "lr": 1.2418384329791322e-07, "epoch": 0.012483195698098713, "percentage": 0.25, "elapsed_time": "0:00:26", "remaining_time": "2:57:09", "throughput": 22685.57, "total_tokens": 603520}
|
|
{"current_steps": 200, "total_steps": 78105, "loss": 0.837, "lr": 1.2738445781590066e-07, "epoch": 0.012803277639075603, "percentage": 0.26, "elapsed_time": "0:00:27", "remaining_time": "2:57:08", "throughput": 22699.38, "total_tokens": 619392}
|
|
{"current_steps": 205, "total_steps": 78105, "loss": 0.8286, "lr": 1.305850723338881e-07, "epoch": 0.013123359580052493, "percentage": 0.26, "elapsed_time": "0:00:27", "remaining_time": "2:57:08", "throughput": 22720.38, "total_tokens": 635456}
|
|
{"current_steps": 210, "total_steps": 78105, "loss": 0.9468, "lr": 1.3378568685187557e-07, "epoch": 0.013443441521029383, "percentage": 0.27, "elapsed_time": "0:00:28", "remaining_time": "2:57:01", "throughput": 22729.5, "total_tokens": 650880}
|
|
{"current_steps": 215, "total_steps": 78105, "loss": 0.8357, "lr": 1.36986301369863e-07, "epoch": 0.013763523462006273, "percentage": 0.28, "elapsed_time": "0:00:29", "remaining_time": "2:57:02", "throughput": 22738.07, "total_tokens": 666688}
|
|
{"current_steps": 220, "total_steps": 78105, "loss": 0.8286, "lr": 1.4018691588785048e-07, "epoch": 0.014083605402983163, "percentage": 0.28, "elapsed_time": "0:00:29", "remaining_time": "2:56:55", "throughput": 22747.83, "total_tokens": 682112}
|
|
{"current_steps": 225, "total_steps": 78105, "loss": 0.8606, "lr": 1.4338753040583795e-07, "epoch": 0.014403687343960053, "percentage": 0.29, "elapsed_time": "0:00:30", "remaining_time": "2:56:50", "throughput": 22760.17, "total_tokens": 697728}
|
|
{"current_steps": 230, "total_steps": 78105, "loss": 0.6876, "lr": 1.4658814492382539e-07, "epoch": 0.014723769284936943, "percentage": 0.29, "elapsed_time": "0:00:31", "remaining_time": "2:56:38", "throughput": 22769.44, "total_tokens": 712704}
|
|
{"current_steps": 235, "total_steps": 78105, "loss": 1.0125, "lr": 1.4978875944181283e-07, "epoch": 0.015043851225913833, "percentage": 0.3, "elapsed_time": "0:00:32", "remaining_time": "2:56:46", "throughput": 22787.51, "total_tokens": 729408}
|
|
{"current_steps": 240, "total_steps": 78105, "loss": 0.8374, "lr": 1.529893739598003e-07, "epoch": 0.015363933166890723, "percentage": 0.31, "elapsed_time": "0:00:32", "remaining_time": "2:56:46", "throughput": 22799.39, "total_tokens": 745344}
|
|
{"current_steps": 245, "total_steps": 78105, "loss": 0.7396, "lr": 1.5618998847778773e-07, "epoch": 0.015684015107867613, "percentage": 0.31, "elapsed_time": "0:00:33", "remaining_time": "2:56:57", "throughput": 22828.65, "total_tokens": 762688}
|
|
{"current_steps": 250, "total_steps": 78105, "loss": 0.7435, "lr": 1.5939060299577523e-07, "epoch": 0.016004097048844503, "percentage": 0.32, "elapsed_time": "0:00:34", "remaining_time": "2:57:05", "throughput": 22843.38, "total_tokens": 779392}
|
|
{"current_steps": 255, "total_steps": 78105, "loss": 0.8331, "lr": 1.6259121751376267e-07, "epoch": 0.016324178989821393, "percentage": 0.33, "elapsed_time": "0:00:34", "remaining_time": "2:56:52", "throughput": 22844.02, "total_tokens": 794112}
|
|
{"current_steps": 260, "total_steps": 78105, "loss": 0.8581, "lr": 1.657918320317501e-07, "epoch": 0.016644260930798283, "percentage": 0.33, "elapsed_time": "0:00:35", "remaining_time": "2:56:52", "throughput": 22855.17, "total_tokens": 810112}
|
|
{"current_steps": 265, "total_steps": 78105, "loss": 0.8835, "lr": 1.6899244654973758e-07, "epoch": 0.016964342871775173, "percentage": 0.34, "elapsed_time": "0:00:36", "remaining_time": "2:56:46", "throughput": 22860.49, "total_tokens": 825472}
|
|
{"current_steps": 270, "total_steps": 78105, "loss": 1.0456, "lr": 1.7219306106772502e-07, "epoch": 0.017284424812752063, "percentage": 0.35, "elapsed_time": "0:00:36", "remaining_time": "2:56:34", "throughput": 22859.36, "total_tokens": 840128}
|
|
{"current_steps": 275, "total_steps": 78105, "loss": 0.8342, "lr": 1.7539367558571246e-07, "epoch": 0.017604506753728953, "percentage": 0.35, "elapsed_time": "0:00:37", "remaining_time": "2:56:26", "throughput": 22861.1, "total_tokens": 855104}
|
|
{"current_steps": 280, "total_steps": 78105, "loss": 0.8543, "lr": 1.7859429010369995e-07, "epoch": 0.017924588694705843, "percentage": 0.36, "elapsed_time": "0:00:38", "remaining_time": "2:56:25", "throughput": 22866.03, "total_tokens": 870848}
|
|
{"current_steps": 285, "total_steps": 78105, "loss": 0.7704, "lr": 1.817949046216874e-07, "epoch": 0.018244670635682733, "percentage": 0.36, "elapsed_time": "0:00:38", "remaining_time": "2:56:16", "throughput": 22868.57, "total_tokens": 885760}
|
|
{"current_steps": 290, "total_steps": 78105, "loss": 0.7688, "lr": 1.8499551913967483e-07, "epoch": 0.018564752576659623, "percentage": 0.37, "elapsed_time": "0:00:39", "remaining_time": "2:56:09", "throughput": 22870.87, "total_tokens": 900928}
|
|
{"current_steps": 295, "total_steps": 78105, "loss": 0.8229, "lr": 1.881961336576623e-07, "epoch": 0.018884834517636517, "percentage": 0.38, "elapsed_time": "0:00:40", "remaining_time": "2:56:00", "throughput": 22877.37, "total_tokens": 915968}
|
|
{"current_steps": 300, "total_steps": 78105, "loss": 1.0619, "lr": 1.9139674817564974e-07, "epoch": 0.019204916458613407, "percentage": 0.38, "elapsed_time": "0:00:40", "remaining_time": "2:56:07", "throughput": 22899.33, "total_tokens": 933056}
|
|
{"current_steps": 305, "total_steps": 78105, "loss": 0.7684, "lr": 1.9459736269363718e-07, "epoch": 0.019524998399590297, "percentage": 0.39, "elapsed_time": "0:00:41", "remaining_time": "2:56:03", "throughput": 22901.72, "total_tokens": 948416}
|
|
{"current_steps": 310, "total_steps": 78105, "loss": 0.8022, "lr": 1.9779797721162467e-07, "epoch": 0.019845080340567187, "percentage": 0.4, "elapsed_time": "0:00:42", "remaining_time": "2:55:52", "throughput": 22898.9, "total_tokens": 962880}
|
|
{"current_steps": 315, "total_steps": 78105, "loss": 0.8321, "lr": 2.0099859172961212e-07, "epoch": 0.020165162281544077, "percentage": 0.4, "elapsed_time": "0:00:42", "remaining_time": "2:56:00", "throughput": 22913.74, "total_tokens": 979904}
|
|
{"current_steps": 320, "total_steps": 78105, "loss": 0.8538, "lr": 2.0419920624759956e-07, "epoch": 0.020485244222520967, "percentage": 0.41, "elapsed_time": "0:00:43", "remaining_time": "2:55:56", "throughput": 22914.26, "total_tokens": 995136}
|
|
{"current_steps": 325, "total_steps": 78105, "loss": 0.8307, "lr": 2.0739982076558702e-07, "epoch": 0.020805326163497857, "percentage": 0.42, "elapsed_time": "0:00:44", "remaining_time": "2:55:56", "throughput": 22919.62, "total_tokens": 1011008}
|
|
{"current_steps": 330, "total_steps": 78105, "loss": 0.8699, "lr": 2.1060043528357446e-07, "epoch": 0.021125408104474747, "percentage": 0.42, "elapsed_time": "0:00:44", "remaining_time": "2:55:50", "throughput": 22913.85, "total_tokens": 1025792}
|
|
{"current_steps": 335, "total_steps": 78105, "loss": 0.7236, "lr": 2.138010498015619e-07, "epoch": 0.021445490045451637, "percentage": 0.43, "elapsed_time": "0:00:45", "remaining_time": "2:56:00", "throughput": 22927.32, "total_tokens": 1042944}
|
|
{"current_steps": 340, "total_steps": 78105, "loss": 0.8477, "lr": 2.170016643195494e-07, "epoch": 0.021765571986428527, "percentage": 0.44, "elapsed_time": "0:00:46", "remaining_time": "2:55:59", "throughput": 22930.78, "total_tokens": 1058688}
|
|
{"current_steps": 345, "total_steps": 78105, "loss": 0.7446, "lr": 2.2020227883753684e-07, "epoch": 0.022085653927405417, "percentage": 0.44, "elapsed_time": "0:00:46", "remaining_time": "2:55:59", "throughput": 22935.52, "total_tokens": 1074560}
|
|
{"current_steps": 350, "total_steps": 78105, "loss": 0.731, "lr": 2.2340289335552428e-07, "epoch": 0.022405735868382307, "percentage": 0.45, "elapsed_time": "0:00:47", "remaining_time": "2:55:55", "throughput": 22936.11, "total_tokens": 1089728}
|
|
{"current_steps": 355, "total_steps": 78105, "loss": 0.9404, "lr": 2.2660350787351175e-07, "epoch": 0.022725817809359197, "percentage": 0.45, "elapsed_time": "0:00:48", "remaining_time": "2:55:51", "throughput": 22937.5, "total_tokens": 1105024}
|
|
{"current_steps": 360, "total_steps": 78105, "loss": 0.8047, "lr": 2.298041223914992e-07, "epoch": 0.023045899750336087, "percentage": 0.46, "elapsed_time": "0:00:48", "remaining_time": "2:55:53", "throughput": 22941.73, "total_tokens": 1121088}
|
|
{"current_steps": 365, "total_steps": 78105, "loss": 0.7779, "lr": 2.3300473690948663e-07, "epoch": 0.023365981691312977, "percentage": 0.47, "elapsed_time": "0:00:49", "remaining_time": "2:55:51", "throughput": 22947.97, "total_tokens": 1136896}
|
|
{"current_steps": 370, "total_steps": 78105, "loss": 0.7289, "lr": 2.3620535142747412e-07, "epoch": 0.023686063632289867, "percentage": 0.47, "elapsed_time": "0:00:50", "remaining_time": "2:55:55", "throughput": 22955.19, "total_tokens": 1153280}
|
|
{"current_steps": 375, "total_steps": 78105, "loss": 0.9225, "lr": 2.3940596594546154e-07, "epoch": 0.024006145573266757, "percentage": 0.48, "elapsed_time": "0:00:50", "remaining_time": "2:55:57", "throughput": 22962.92, "total_tokens": 1169536}
|
|
{"current_steps": 380, "total_steps": 78105, "loss": 0.8308, "lr": 2.42606580463449e-07, "epoch": 0.024326227514243647, "percentage": 0.49, "elapsed_time": "0:00:51", "remaining_time": "2:55:55", "throughput": 22963.98, "total_tokens": 1185088}
|
|
{"current_steps": 385, "total_steps": 78105, "loss": 0.6761, "lr": 2.4580719498143647e-07, "epoch": 0.024646309455220537, "percentage": 0.49, "elapsed_time": "0:00:52", "remaining_time": "2:55:56", "throughput": 22963.43, "total_tokens": 1200832}
|
|
{"current_steps": 390, "total_steps": 78105, "loss": 0.6987, "lr": 2.4900780949942394e-07, "epoch": 0.024966391396197427, "percentage": 0.5, "elapsed_time": "0:00:52", "remaining_time": "2:55:53", "throughput": 22966.84, "total_tokens": 1216320}
|
|
{"current_steps": 395, "total_steps": 78105, "loss": 0.793, "lr": 2.5220842401741135e-07, "epoch": 0.025286473337174317, "percentage": 0.51, "elapsed_time": "0:00:53", "remaining_time": "2:55:58", "throughput": 22970.97, "total_tokens": 1232832}
|
|
{"current_steps": 400, "total_steps": 78105, "loss": 0.7453, "lr": 2.554090385353988e-07, "epoch": 0.025606555278151207, "percentage": 0.51, "elapsed_time": "0:00:54", "remaining_time": "2:55:56", "throughput": 22973.81, "total_tokens": 1248384}
|
|
{"current_steps": 405, "total_steps": 78105, "loss": 0.7423, "lr": 2.586096530533863e-07, "epoch": 0.025926637219128097, "percentage": 0.52, "elapsed_time": "0:00:55", "remaining_time": "2:55:53", "throughput": 22977.85, "total_tokens": 1263936}
|
|
{"current_steps": 410, "total_steps": 78105, "loss": 1.2019, "lr": 2.618102675713737e-07, "epoch": 0.026246719160104987, "percentage": 0.52, "elapsed_time": "0:00:56", "remaining_time": "2:57:33", "throughput": 23020.14, "total_tokens": 1294208}
|
|
{"current_steps": 415, "total_steps": 78105, "loss": 0.869, "lr": 2.650108820893612e-07, "epoch": 0.026566801101081877, "percentage": 0.53, "elapsed_time": "0:00:56", "remaining_time": "2:57:26", "throughput": 23019.84, "total_tokens": 1309120}
|
|
{"current_steps": 420, "total_steps": 78105, "loss": 0.8688, "lr": 2.6821149660734863e-07, "epoch": 0.026886883042058767, "percentage": 0.54, "elapsed_time": "0:00:57", "remaining_time": "2:57:20", "throughput": 23019.66, "total_tokens": 1324224}
|
|
{"current_steps": 425, "total_steps": 78105, "loss": 0.6996, "lr": 2.714121111253361e-07, "epoch": 0.027206964983035656, "percentage": 0.54, "elapsed_time": "0:00:58", "remaining_time": "2:57:24", "throughput": 23026.61, "total_tokens": 1341056}
|
|
{"current_steps": 430, "total_steps": 78105, "loss": 0.811, "lr": 2.7461272564332357e-07, "epoch": 0.027527046924012546, "percentage": 0.55, "elapsed_time": "0:00:58", "remaining_time": "2:57:21", "throughput": 23027.15, "total_tokens": 1356544}
|
|
{"current_steps": 435, "total_steps": 78105, "loss": 0.7142, "lr": 2.77813340161311e-07, "epoch": 0.027847128864989436, "percentage": 0.56, "elapsed_time": "0:00:59", "remaining_time": "2:57:17", "throughput": 23027.11, "total_tokens": 1371840}
|
|
{"current_steps": 440, "total_steps": 78105, "loss": 0.7129, "lr": 2.8101395467929845e-07, "epoch": 0.028167210805966326, "percentage": 0.56, "elapsed_time": "0:01:00", "remaining_time": "2:57:10", "throughput": 23026.16, "total_tokens": 1386816}
|
|
{"current_steps": 445, "total_steps": 78105, "loss": 0.7842, "lr": 2.842145691972859e-07, "epoch": 0.028487292746943216, "percentage": 0.57, "elapsed_time": "0:01:00", "remaining_time": "2:57:04", "throughput": 23025.99, "total_tokens": 1401792}
|
|
{"current_steps": 450, "total_steps": 78105, "loss": 0.6599, "lr": 2.874151837152734e-07, "epoch": 0.028807374687920106, "percentage": 0.58, "elapsed_time": "0:01:01", "remaining_time": "2:56:58", "throughput": 23025.67, "total_tokens": 1416896}
|
|
{"current_steps": 455, "total_steps": 78105, "loss": 0.8144, "lr": 2.906157982332608e-07, "epoch": 0.029127456628896996, "percentage": 0.58, "elapsed_time": "0:01:02", "remaining_time": "2:56:58", "throughput": 23025.77, "total_tokens": 1432704}
|
|
{"current_steps": 460, "total_steps": 78105, "loss": 0.7013, "lr": 2.9381641275124827e-07, "epoch": 0.029447538569873886, "percentage": 0.59, "elapsed_time": "0:01:02", "remaining_time": "2:56:57", "throughput": 23025.81, "total_tokens": 1448384}
|
|
{"current_steps": 465, "total_steps": 78105, "loss": 0.7257, "lr": 2.9701702726923573e-07, "epoch": 0.029767620510850776, "percentage": 0.6, "elapsed_time": "0:01:03", "remaining_time": "2:56:58", "throughput": 23032.6, "total_tokens": 1464832}
|
|
{"current_steps": 470, "total_steps": 78105, "loss": 0.6784, "lr": 3.0021764178722315e-07, "epoch": 0.030087702451827666, "percentage": 0.6, "elapsed_time": "0:01:04", "remaining_time": "2:56:51", "throughput": 23028.24, "total_tokens": 1479424}
|
|
{"current_steps": 475, "total_steps": 78105, "loss": 0.775, "lr": 3.034182563052106e-07, "epoch": 0.030407784392804556, "percentage": 0.61, "elapsed_time": "0:01:04", "remaining_time": "2:56:46", "throughput": 23025.94, "total_tokens": 1494336}
|
|
{"current_steps": 480, "total_steps": 78105, "loss": 0.6793, "lr": 3.066188708231981e-07, "epoch": 0.030727866333781446, "percentage": 0.61, "elapsed_time": "0:01:05", "remaining_time": "2:56:40", "throughput": 23023.81, "total_tokens": 1509184}
|
|
{"current_steps": 485, "total_steps": 78105, "loss": 0.8161, "lr": 3.0981948534118555e-07, "epoch": 0.031047948274758336, "percentage": 0.62, "elapsed_time": "0:01:06", "remaining_time": "2:56:42", "throughput": 23027.67, "total_tokens": 1525504}
|
|
{"current_steps": 490, "total_steps": 78105, "loss": 0.6406, "lr": 3.13020099859173e-07, "epoch": 0.031368030215735226, "percentage": 0.63, "elapsed_time": "0:01:06", "remaining_time": "2:56:41", "throughput": 23032.22, "total_tokens": 1541504}
|
|
{"current_steps": 495, "total_steps": 78105, "loss": 0.6751, "lr": 3.1622071437716043e-07, "epoch": 0.03168811215671212, "percentage": 0.63, "elapsed_time": "0:01:07", "remaining_time": "2:56:39", "throughput": 23034.79, "total_tokens": 1557184}
|
|
{"current_steps": 500, "total_steps": 78105, "loss": 0.7808, "lr": 3.194213288951479e-07, "epoch": 0.032008194097689006, "percentage": 0.64, "elapsed_time": "0:01:08", "remaining_time": "2:56:39", "throughput": 23039.69, "total_tokens": 1573440}
|
|
{"current_steps": 505, "total_steps": 78105, "loss": 0.8709, "lr": 3.2262194341313536e-07, "epoch": 0.0323282760386659, "percentage": 0.65, "elapsed_time": "0:01:08", "remaining_time": "2:56:35", "throughput": 23040.59, "total_tokens": 1588736}
|
|
{"current_steps": 510, "total_steps": 78105, "loss": 0.7501, "lr": 3.258225579311228e-07, "epoch": 0.032648357979642786, "percentage": 0.65, "elapsed_time": "0:01:09", "remaining_time": "2:56:33", "throughput": 23042.11, "total_tokens": 1604352}
|
|
{"current_steps": 515, "total_steps": 78105, "loss": 0.7055, "lr": 3.2902317244911025e-07, "epoch": 0.03296843992061968, "percentage": 0.66, "elapsed_time": "0:01:10", "remaining_time": "2:56:25", "throughput": 23039.05, "total_tokens": 1618816}
|
|
{"current_steps": 520, "total_steps": 78105, "loss": 0.7891, "lr": 3.322237869670977e-07, "epoch": 0.033288521861596566, "percentage": 0.67, "elapsed_time": "0:01:10", "remaining_time": "2:56:28", "throughput": 23048.55, "total_tokens": 1635648}
|
|
{"current_steps": 525, "total_steps": 78105, "loss": 0.7411, "lr": 3.3542440148508513e-07, "epoch": 0.03360860380257346, "percentage": 0.67, "elapsed_time": "0:01:11", "remaining_time": "2:56:27", "throughput": 23048.2, "total_tokens": 1651328}
|
|
{"current_steps": 530, "total_steps": 78105, "loss": 0.7586, "lr": 3.386250160030726e-07, "epoch": 0.033928685743550346, "percentage": 0.68, "elapsed_time": "0:01:12", "remaining_time": "2:56:34", "throughput": 23057.26, "total_tokens": 1668928}
|
|
{"current_steps": 535, "total_steps": 78105, "loss": 0.6686, "lr": 3.418256305210601e-07, "epoch": 0.03424876768452724, "percentage": 0.68, "elapsed_time": "0:01:13", "remaining_time": "2:56:37", "throughput": 23061.36, "total_tokens": 1685504}
|
|
{"current_steps": 540, "total_steps": 78105, "loss": 0.7246, "lr": 3.450262450390475e-07, "epoch": 0.034568849625504126, "percentage": 0.69, "elapsed_time": "0:01:13", "remaining_time": "2:56:38", "throughput": 23065.46, "total_tokens": 1701952}
|
|
{"current_steps": 545, "total_steps": 78105, "loss": 0.7636, "lr": 3.48226859557035e-07, "epoch": 0.03488893156648102, "percentage": 0.7, "elapsed_time": "0:01:14", "remaining_time": "2:56:34", "throughput": 23064.27, "total_tokens": 1716992}
|
|
{"current_steps": 550, "total_steps": 78105, "loss": 0.642, "lr": 3.5142747407502246e-07, "epoch": 0.035209013507457906, "percentage": 0.7, "elapsed_time": "0:01:15", "remaining_time": "2:56:31", "throughput": 23061.46, "total_tokens": 1732160}
|
|
{"current_steps": 555, "total_steps": 78105, "loss": 0.6954, "lr": 3.546280885930099e-07, "epoch": 0.0355290954484348, "percentage": 0.71, "elapsed_time": "0:01:15", "remaining_time": "2:56:32", "throughput": 23063.92, "total_tokens": 1748416}
|
|
{"current_steps": 560, "total_steps": 78105, "loss": 0.7845, "lr": 3.5782870311099734e-07, "epoch": 0.035849177389411686, "percentage": 0.72, "elapsed_time": "0:01:16", "remaining_time": "2:56:29", "throughput": 23063.52, "total_tokens": 1763776}
|
|
{"current_steps": 565, "total_steps": 78105, "loss": 0.7543, "lr": 3.610293176289848e-07, "epoch": 0.03616925933038858, "percentage": 0.72, "elapsed_time": "0:01:17", "remaining_time": "2:56:30", "throughput": 23068.43, "total_tokens": 1780160}
|
|
{"current_steps": 570, "total_steps": 78105, "loss": 0.6669, "lr": 3.642299321469722e-07, "epoch": 0.036489341271365466, "percentage": 0.73, "elapsed_time": "0:01:17", "remaining_time": "2:56:28", "throughput": 23071.32, "total_tokens": 1795968}
|
|
{"current_steps": 575, "total_steps": 78105, "loss": 0.667, "lr": 3.674305466649597e-07, "epoch": 0.03680942321234236, "percentage": 0.74, "elapsed_time": "0:01:18", "remaining_time": "2:56:43", "throughput": 23085.69, "total_tokens": 1815424}
|
|
{"current_steps": 580, "total_steps": 78105, "loss": 0.8525, "lr": 3.7063116118294716e-07, "epoch": 0.037129505153319246, "percentage": 0.74, "elapsed_time": "0:01:19", "remaining_time": "2:56:45", "throughput": 23088.54, "total_tokens": 1831936}
|
|
{"current_steps": 585, "total_steps": 78105, "loss": 0.7031, "lr": 3.7383177570093457e-07, "epoch": 0.03744958709429614, "percentage": 0.75, "elapsed_time": "0:01:20", "remaining_time": "2:56:43", "throughput": 23086.73, "total_tokens": 1847424}
|
|
{"current_steps": 590, "total_steps": 78105, "loss": 0.7892, "lr": 3.7703239021892204e-07, "epoch": 0.03776966903527303, "percentage": 0.76, "elapsed_time": "0:01:20", "remaining_time": "2:56:39", "throughput": 23084.32, "total_tokens": 1862400}
|
|
{"current_steps": 595, "total_steps": 78105, "loss": 0.7662, "lr": 3.8023300473690956e-07, "epoch": 0.03808975097624992, "percentage": 0.76, "elapsed_time": "0:01:21", "remaining_time": "2:56:32", "throughput": 23082.34, "total_tokens": 1876928}
|
|
{"current_steps": 600, "total_steps": 78105, "loss": 0.6976, "lr": 3.834336192548969e-07, "epoch": 0.03840983291722681, "percentage": 0.77, "elapsed_time": "0:01:21", "remaining_time": "2:56:31", "throughput": 23082.17, "total_tokens": 1892608}
|
|
{"current_steps": 605, "total_steps": 78105, "loss": 0.7447, "lr": 3.8663423377288444e-07, "epoch": 0.0387299148582037, "percentage": 0.77, "elapsed_time": "0:01:22", "remaining_time": "2:56:35", "throughput": 23088.99, "total_tokens": 1909696}
|
|
{"current_steps": 610, "total_steps": 78105, "loss": 0.8291, "lr": 3.898348482908719e-07, "epoch": 0.03904999679918059, "percentage": 0.78, "elapsed_time": "0:01:23", "remaining_time": "2:56:31", "throughput": 23087.8, "total_tokens": 1924864}
|
|
{"current_steps": 615, "total_steps": 78105, "loss": 0.5963, "lr": 3.930354628088593e-07, "epoch": 0.03937007874015748, "percentage": 0.79, "elapsed_time": "0:01:24", "remaining_time": "2:56:26", "throughput": 23088.61, "total_tokens": 1939968}
|
|
{"current_steps": 620, "total_steps": 78105, "loss": 0.7966, "lr": 3.962360773268468e-07, "epoch": 0.03969016068113437, "percentage": 0.79, "elapsed_time": "0:01:24", "remaining_time": "2:56:23", "throughput": 23087.1, "total_tokens": 1955136}
|
|
{"current_steps": 625, "total_steps": 78105, "loss": 0.7001, "lr": 3.9943669184483426e-07, "epoch": 0.04001024262211126, "percentage": 0.8, "elapsed_time": "0:01:25", "remaining_time": "2:56:22", "throughput": 23087.04, "total_tokens": 1970880}
|
|
{"current_steps": 630, "total_steps": 78105, "loss": 0.6206, "lr": 4.0263730636282167e-07, "epoch": 0.04033032456308815, "percentage": 0.81, "elapsed_time": "0:01:26", "remaining_time": "2:56:22", "throughput": 23087.89, "total_tokens": 1986752}
|
|
{"current_steps": 635, "total_steps": 78105, "loss": 0.6489, "lr": 4.0583792088080914e-07, "epoch": 0.04065040650406504, "percentage": 0.81, "elapsed_time": "0:01:26", "remaining_time": "2:56:18", "throughput": 23086.5, "total_tokens": 2001856}
|
|
{"current_steps": 640, "total_steps": 78105, "loss": 0.686, "lr": 4.090385353987966e-07, "epoch": 0.04097048844504193, "percentage": 0.82, "elapsed_time": "0:01:27", "remaining_time": "2:56:26", "throughput": 23095.84, "total_tokens": 2019968}
|
|
{"current_steps": 645, "total_steps": 78105, "loss": 0.7926, "lr": 4.12239149916784e-07, "epoch": 0.04129057038601882, "percentage": 0.83, "elapsed_time": "0:01:28", "remaining_time": "2:56:23", "throughput": 23095.83, "total_tokens": 2035328}
|
|
{"current_steps": 650, "total_steps": 78105, "loss": 0.6748, "lr": 4.154397644347715e-07, "epoch": 0.04161065232699571, "percentage": 0.83, "elapsed_time": "0:01:28", "remaining_time": "2:56:38", "throughput": 23107.27, "total_tokens": 2055168}
|
|
{"current_steps": 655, "total_steps": 78105, "loss": 0.7494, "lr": 4.18640378952759e-07, "epoch": 0.0419307342679726, "percentage": 0.84, "elapsed_time": "0:01:29", "remaining_time": "2:56:40", "throughput": 23110.34, "total_tokens": 2071808}
|
|
{"current_steps": 660, "total_steps": 78105, "loss": 0.6756, "lr": 4.2184099347074637e-07, "epoch": 0.04225081620894949, "percentage": 0.85, "elapsed_time": "0:01:30", "remaining_time": "2:56:38", "throughput": 23111.25, "total_tokens": 2087424}
|
|
{"current_steps": 665, "total_steps": 78105, "loss": 0.8031, "lr": 4.250416079887339e-07, "epoch": 0.04257089814992638, "percentage": 0.85, "elapsed_time": "0:01:30", "remaining_time": "2:56:34", "throughput": 23110.66, "total_tokens": 2102592}
|
|
{"current_steps": 670, "total_steps": 78105, "loss": 0.7141, "lr": 4.2824222250672136e-07, "epoch": 0.04289098009090327, "percentage": 0.86, "elapsed_time": "0:01:31", "remaining_time": "2:56:36", "throughput": 23116.19, "total_tokens": 2119488}
|
|
{"current_steps": 675, "total_steps": 78105, "loss": 0.6826, "lr": 4.3144283702470877e-07, "epoch": 0.04321106203188016, "percentage": 0.86, "elapsed_time": "0:01:32", "remaining_time": "2:56:37", "throughput": 23120.49, "total_tokens": 2136000}
|
|
{"current_steps": 680, "total_steps": 78105, "loss": 0.8482, "lr": 4.3464345154269624e-07, "epoch": 0.04353114397285705, "percentage": 0.87, "elapsed_time": "0:01:33", "remaining_time": "2:56:38", "throughput": 23124.22, "total_tokens": 2152448}
|
|
{"current_steps": 685, "total_steps": 78105, "loss": 0.6599, "lr": 4.378440660606837e-07, "epoch": 0.04385122591383394, "percentage": 0.88, "elapsed_time": "0:01:33", "remaining_time": "2:56:35", "throughput": 23125.32, "total_tokens": 2168000}
|
|
{"current_steps": 690, "total_steps": 78105, "loss": 0.6298, "lr": 4.410446805786711e-07, "epoch": 0.04417130785481083, "percentage": 0.88, "elapsed_time": "0:01:34", "remaining_time": "2:56:34", "throughput": 23124.0, "total_tokens": 2183552}
|
|
{"current_steps": 695, "total_steps": 78105, "loss": 0.7726, "lr": 4.442452950966586e-07, "epoch": 0.04449138979578772, "percentage": 0.89, "elapsed_time": "0:01:35", "remaining_time": "2:56:32", "throughput": 23126.79, "total_tokens": 2199488}
|
|
{"current_steps": 700, "total_steps": 78105, "loss": 0.7076, "lr": 4.4744590961464605e-07, "epoch": 0.04481147173676461, "percentage": 0.9, "elapsed_time": "0:01:35", "remaining_time": "2:56:31", "throughput": 23127.95, "total_tokens": 2215296}
|
|
{"current_steps": 705, "total_steps": 78105, "loss": 0.7781, "lr": 4.5064652413263347e-07, "epoch": 0.0451315536777415, "percentage": 0.9, "elapsed_time": "0:01:36", "remaining_time": "2:56:26", "throughput": 23127.28, "total_tokens": 2230016}
|
|
{"current_steps": 710, "total_steps": 78105, "loss": 0.7446, "lr": 4.5384713865062093e-07, "epoch": 0.04545163561871839, "percentage": 0.91, "elapsed_time": "0:01:37", "remaining_time": "2:56:22", "throughput": 23125.13, "total_tokens": 2245056}
|
|
{"current_steps": 715, "total_steps": 78105, "loss": 0.6779, "lr": 4.5704775316860845e-07, "epoch": 0.04577171755969528, "percentage": 0.92, "elapsed_time": "0:01:37", "remaining_time": "2:56:22", "throughput": 23127.25, "total_tokens": 2261248}
|
|
{"current_steps": 720, "total_steps": 78105, "loss": 0.6933, "lr": 4.602483676865958e-07, "epoch": 0.04609179950067217, "percentage": 0.92, "elapsed_time": "0:01:38", "remaining_time": "2:56:25", "throughput": 23130.21, "total_tokens": 2278016}
|
|
{"current_steps": 725, "total_steps": 78105, "loss": 0.5873, "lr": 4.6344898220458334e-07, "epoch": 0.04641188144164906, "percentage": 0.93, "elapsed_time": "0:01:39", "remaining_time": "2:56:21", "throughput": 23127.45, "total_tokens": 2292800}
|
|
{"current_steps": 730, "total_steps": 78105, "loss": 0.6548, "lr": 4.666495967225708e-07, "epoch": 0.04673196338262595, "percentage": 0.93, "elapsed_time": "0:01:39", "remaining_time": "2:56:18", "throughput": 23127.35, "total_tokens": 2308224}
|
|
{"current_steps": 735, "total_steps": 78105, "loss": 0.7381, "lr": 4.698502112405582e-07, "epoch": 0.04705204532360284, "percentage": 0.94, "elapsed_time": "0:01:40", "remaining_time": "2:56:22", "throughput": 23133.76, "total_tokens": 2325760}
|
|
{"current_steps": 740, "total_steps": 78105, "loss": 0.748, "lr": 4.730508257585457e-07, "epoch": 0.04737212726457973, "percentage": 0.95, "elapsed_time": "0:01:41", "remaining_time": "2:56:21", "throughput": 23135.66, "total_tokens": 2341632}
|
|
{"current_steps": 745, "total_steps": 78105, "loss": 0.7587, "lr": 4.7625144027653315e-07, "epoch": 0.04769220920555662, "percentage": 0.95, "elapsed_time": "0:01:41", "remaining_time": "2:56:20", "throughput": 23137.32, "total_tokens": 2357504}
|
|
{"current_steps": 750, "total_steps": 78105, "loss": 0.7956, "lr": 4.794520547945206e-07, "epoch": 0.04801229114653351, "percentage": 0.96, "elapsed_time": "0:01:42", "remaining_time": "2:56:17", "throughput": 23134.37, "total_tokens": 2372608}
|
|
{"current_steps": 755, "total_steps": 78105, "loss": 0.6417, "lr": 4.82652669312508e-07, "epoch": 0.0483323730875104, "percentage": 0.97, "elapsed_time": "0:01:43", "remaining_time": "2:56:15", "throughput": 23136.26, "total_tokens": 2388352}
|
|
{"current_steps": 760, "total_steps": 78105, "loss": 0.7503, "lr": 4.858532838304955e-07, "epoch": 0.04865245502848729, "percentage": 0.97, "elapsed_time": "0:01:43", "remaining_time": "2:56:16", "throughput": 23137.36, "total_tokens": 2404480}
|
|
{"current_steps": 765, "total_steps": 78105, "loss": 0.5662, "lr": 4.89053898348483e-07, "epoch": 0.04897253696946418, "percentage": 0.98, "elapsed_time": "0:01:44", "remaining_time": "2:56:12", "throughput": 23137.04, "total_tokens": 2419648}
|
|
{"current_steps": 770, "total_steps": 78105, "loss": 0.747, "lr": 4.922545128664704e-07, "epoch": 0.04929261891044107, "percentage": 0.99, "elapsed_time": "0:01:45", "remaining_time": "2:56:12", "throughput": 23136.79, "total_tokens": 2435584}
|
|
{"current_steps": 775, "total_steps": 78105, "loss": 0.6679, "lr": 4.954551273844579e-07, "epoch": 0.04961270085141796, "percentage": 0.99, "elapsed_time": "0:01:45", "remaining_time": "2:56:10", "throughput": 23136.8, "total_tokens": 2451072}
|
|
{"current_steps": 780, "total_steps": 78105, "loss": 0.7975, "lr": 4.986557419024453e-07, "epoch": 0.04993278279239485, "percentage": 1.0, "elapsed_time": "0:01:46", "remaining_time": "2:56:12", "throughput": 23141.31, "total_tokens": 2467968}
|
|
{"current_steps": 785, "total_steps": 78105, "loss": 0.6755, "lr": 5.018563564204327e-07, "epoch": 0.05025286473337175, "percentage": 1.01, "elapsed_time": "0:01:47", "remaining_time": "2:56:15", "throughput": 23144.91, "total_tokens": 2484928}
|
|
{"current_steps": 790, "total_steps": 78105, "loss": 0.6544, "lr": 5.050569709384202e-07, "epoch": 0.05057294667434863, "percentage": 1.01, "elapsed_time": "0:01:48", "remaining_time": "2:56:15", "throughput": 23148.79, "total_tokens": 2501504}
|
|
{"current_steps": 795, "total_steps": 78105, "loss": 0.5379, "lr": 5.082575854564077e-07, "epoch": 0.050893028615325527, "percentage": 1.02, "elapsed_time": "0:01:48", "remaining_time": "2:56:18", "throughput": 23155.4, "total_tokens": 2518848}
|
|
{"current_steps": 800, "total_steps": 78105, "loss": 0.6548, "lr": 5.114581999743951e-07, "epoch": 0.05121311055630241, "percentage": 1.02, "elapsed_time": "0:01:49", "remaining_time": "2:56:19", "throughput": 23159.58, "total_tokens": 2535680}
|
|
{"current_steps": 805, "total_steps": 78105, "loss": 0.7033, "lr": 5.146588144923826e-07, "epoch": 0.051533192497279307, "percentage": 1.03, "elapsed_time": "0:01:50", "remaining_time": "2:56:17", "throughput": 23157.77, "total_tokens": 2550976}
|
|
{"current_steps": 810, "total_steps": 78105, "loss": 0.6126, "lr": 5.1785942901037e-07, "epoch": 0.05185327443825619, "percentage": 1.04, "elapsed_time": "0:01:50", "remaining_time": "2:56:15", "throughput": 23159.09, "total_tokens": 2566656}
|
|
{"current_steps": 815, "total_steps": 78105, "loss": 0.6814, "lr": 5.210600435283575e-07, "epoch": 0.052173356379233086, "percentage": 1.04, "elapsed_time": "0:01:51", "remaining_time": "2:56:12", "throughput": 23157.0, "total_tokens": 2581568}
|
|
{"current_steps": 820, "total_steps": 78105, "loss": 0.6776, "lr": 5.24260658046345e-07, "epoch": 0.05249343832020997, "percentage": 1.05, "elapsed_time": "0:01:52", "remaining_time": "2:56:08", "throughput": 23155.87, "total_tokens": 2596608}
|
|
{"current_steps": 825, "total_steps": 78105, "loss": 0.6859, "lr": 5.274612725643324e-07, "epoch": 0.052813520261186866, "percentage": 1.06, "elapsed_time": "0:01:52", "remaining_time": "2:56:06", "throughput": 23155.0, "total_tokens": 2612032}
|
|
{"current_steps": 830, "total_steps": 78105, "loss": 0.8132, "lr": 5.306618870823198e-07, "epoch": 0.05313360220216375, "percentage": 1.06, "elapsed_time": "0:01:53", "remaining_time": "2:56:03", "throughput": 23155.46, "total_tokens": 2627264}
|
|
{"current_steps": 835, "total_steps": 78105, "loss": 0.6381, "lr": 5.338625016003073e-07, "epoch": 0.053453684143140646, "percentage": 1.07, "elapsed_time": "0:01:54", "remaining_time": "2:56:02", "throughput": 23157.17, "total_tokens": 2643264}
|
|
{"current_steps": 840, "total_steps": 78105, "loss": 0.643, "lr": 5.370631161182948e-07, "epoch": 0.05377376608411753, "percentage": 1.08, "elapsed_time": "0:01:54", "remaining_time": "2:56:01", "throughput": 23159.03, "total_tokens": 2659264}
|
|
{"current_steps": 845, "total_steps": 78105, "loss": 0.5987, "lr": 5.402637306362822e-07, "epoch": 0.054093848025094426, "percentage": 1.08, "elapsed_time": "0:01:55", "remaining_time": "2:55:57", "throughput": 23157.16, "total_tokens": 2673856}
|
|
{"current_steps": 850, "total_steps": 78105, "loss": 0.5327, "lr": 5.434643451542697e-07, "epoch": 0.05441392996607131, "percentage": 1.09, "elapsed_time": "0:01:56", "remaining_time": "2:55:53", "throughput": 23154.0, "total_tokens": 2688448}
|
|
{"current_steps": 855, "total_steps": 78105, "loss": 0.6485, "lr": 5.466649596722571e-07, "epoch": 0.054734011907048206, "percentage": 1.09, "elapsed_time": "0:01:56", "remaining_time": "2:55:51", "throughput": 23153.88, "total_tokens": 2703872}
|
|
{"current_steps": 860, "total_steps": 78105, "loss": 0.6576, "lr": 5.498655741902445e-07, "epoch": 0.05505409384802509, "percentage": 1.1, "elapsed_time": "0:01:57", "remaining_time": "2:55:48", "throughput": 23152.05, "total_tokens": 2719040}
|
|
{"current_steps": 865, "total_steps": 78105, "loss": 0.6156, "lr": 5.530661887082321e-07, "epoch": 0.055374175789001986, "percentage": 1.11, "elapsed_time": "0:01:58", "remaining_time": "2:55:47", "throughput": 23154.8, "total_tokens": 2735168}
|
|
{"current_steps": 870, "total_steps": 78105, "loss": 0.5268, "lr": 5.562668032262195e-07, "epoch": 0.05569425772997887, "percentage": 1.11, "elapsed_time": "0:01:58", "remaining_time": "2:55:45", "throughput": 23155.32, "total_tokens": 2750592}
|
|
{"current_steps": 875, "total_steps": 78105, "loss": 0.6933, "lr": 5.594674177442069e-07, "epoch": 0.056014339670955766, "percentage": 1.12, "elapsed_time": "0:01:59", "remaining_time": "2:55:46", "throughput": 23158.13, "total_tokens": 2767232}
|
|
{"current_steps": 880, "total_steps": 78105, "loss": 0.7287, "lr": 5.626680322621944e-07, "epoch": 0.05633442161193265, "percentage": 1.13, "elapsed_time": "0:02:00", "remaining_time": "2:55:50", "throughput": 23163.16, "total_tokens": 2784768}
|
|
{"current_steps": 885, "total_steps": 78105, "loss": 0.6941, "lr": 5.658686467801819e-07, "epoch": 0.056654503552909546, "percentage": 1.13, "elapsed_time": "0:02:00", "remaining_time": "2:55:47", "throughput": 23162.29, "total_tokens": 2799872}
|
|
{"current_steps": 890, "total_steps": 78105, "loss": 0.6689, "lr": 5.690692612981693e-07, "epoch": 0.05697458549388643, "percentage": 1.14, "elapsed_time": "0:02:01", "remaining_time": "2:55:47", "throughput": 23163.21, "total_tokens": 2816000}
|
|
{"current_steps": 895, "total_steps": 78105, "loss": 0.6694, "lr": 5.722698758161568e-07, "epoch": 0.057294667434863326, "percentage": 1.15, "elapsed_time": "0:02:02", "remaining_time": "2:55:46", "throughput": 23162.91, "total_tokens": 2831744}
|
|
{"current_steps": 900, "total_steps": 78105, "loss": 0.7011, "lr": 5.754704903341442e-07, "epoch": 0.05761474937584021, "percentage": 1.15, "elapsed_time": "0:02:02", "remaining_time": "2:55:45", "throughput": 23162.75, "total_tokens": 2847424}
|
|
{"current_steps": 905, "total_steps": 78105, "loss": 0.738, "lr": 5.786711048521316e-07, "epoch": 0.057934831316817106, "percentage": 1.16, "elapsed_time": "0:02:03", "remaining_time": "2:55:41", "throughput": 23162.21, "total_tokens": 2862272}
|
|
{"current_steps": 910, "total_steps": 78105, "loss": 0.524, "lr": 5.818717193701191e-07, "epoch": 0.05825491325779399, "percentage": 1.17, "elapsed_time": "0:02:04", "remaining_time": "2:55:38", "throughput": 23160.35, "total_tokens": 2877120}
|
|
{"current_steps": 915, "total_steps": 78105, "loss": 0.6219, "lr": 5.850723338881066e-07, "epoch": 0.058574995198770886, "percentage": 1.17, "elapsed_time": "0:02:04", "remaining_time": "2:55:40", "throughput": 23165.74, "total_tokens": 2894592}
|
|
{"current_steps": 920, "total_steps": 78105, "loss": 0.6786, "lr": 5.88272948406094e-07, "epoch": 0.05889507713974777, "percentage": 1.18, "elapsed_time": "0:02:05", "remaining_time": "2:55:38", "throughput": 23164.94, "total_tokens": 2909888}
|
|
{"current_steps": 925, "total_steps": 78105, "loss": 0.7083, "lr": 5.914735629240815e-07, "epoch": 0.059215159080724666, "percentage": 1.18, "elapsed_time": "0:02:06", "remaining_time": "2:55:38", "throughput": 23163.62, "total_tokens": 2925632}
|
|
{"current_steps": 930, "total_steps": 78105, "loss": 0.658, "lr": 5.946741774420689e-07, "epoch": 0.05953524102170155, "percentage": 1.19, "elapsed_time": "0:02:06", "remaining_time": "2:55:38", "throughput": 23165.25, "total_tokens": 2941760}
|
|
{"current_steps": 935, "total_steps": 78105, "loss": 0.814, "lr": 5.978747919600564e-07, "epoch": 0.059855322962678446, "percentage": 1.2, "elapsed_time": "0:02:07", "remaining_time": "2:55:35", "throughput": 23166.94, "total_tokens": 2957376}
|
|
{"current_steps": 940, "total_steps": 78105, "loss": 0.6456, "lr": 6.010754064780439e-07, "epoch": 0.06017540490365533, "percentage": 1.2, "elapsed_time": "0:02:08", "remaining_time": "2:55:34", "throughput": 23166.35, "total_tokens": 2972800}
|
|
{"current_steps": 945, "total_steps": 78105, "loss": 0.8423, "lr": 6.042760209960313e-07, "epoch": 0.060495486844632226, "percentage": 1.21, "elapsed_time": "0:02:08", "remaining_time": "2:55:32", "throughput": 23167.2, "total_tokens": 2988480}
|
|
{"current_steps": 950, "total_steps": 78105, "loss": 0.7039, "lr": 6.074766355140187e-07, "epoch": 0.06081556878560911, "percentage": 1.22, "elapsed_time": "0:02:09", "remaining_time": "2:55:32", "throughput": 23168.07, "total_tokens": 3004480}
|
|
{"current_steps": 955, "total_steps": 78105, "loss": 0.5306, "lr": 6.106772500320062e-07, "epoch": 0.061135650726586006, "percentage": 1.22, "elapsed_time": "0:02:10", "remaining_time": "2:55:30", "throughput": 23169.4, "total_tokens": 3020288}
|
|
{"current_steps": 960, "total_steps": 78105, "loss": 0.6458, "lr": 6.138778645499937e-07, "epoch": 0.06145573266756289, "percentage": 1.23, "elapsed_time": "0:02:11", "remaining_time": "2:55:30", "throughput": 23168.09, "total_tokens": 3035968}
|
|
{"current_steps": 965, "total_steps": 78105, "loss": 0.8471, "lr": 6.170784790679811e-07, "epoch": 0.061775814608539786, "percentage": 1.24, "elapsed_time": "0:02:11", "remaining_time": "2:55:29", "throughput": 23168.68, "total_tokens": 3051776}
|
|
{"current_steps": 970, "total_steps": 78105, "loss": 0.6216, "lr": 6.202790935859686e-07, "epoch": 0.06209589654951667, "percentage": 1.24, "elapsed_time": "0:02:12", "remaining_time": "2:55:26", "throughput": 23166.72, "total_tokens": 3066560}
|
|
{"current_steps": 975, "total_steps": 78105, "loss": 0.5871, "lr": 6.23479708103956e-07, "epoch": 0.062415978490493566, "percentage": 1.25, "elapsed_time": "0:02:13", "remaining_time": "2:55:24", "throughput": 23168.79, "total_tokens": 3082496}
|
|
{"current_steps": 980, "total_steps": 78105, "loss": 0.66, "lr": 6.266803226219435e-07, "epoch": 0.06273606043147045, "percentage": 1.25, "elapsed_time": "0:02:13", "remaining_time": "2:55:23", "throughput": 23168.02, "total_tokens": 3097856}
|
|
{"current_steps": 985, "total_steps": 78105, "loss": 0.537, "lr": 6.298809371399309e-07, "epoch": 0.06305614237244735, "percentage": 1.26, "elapsed_time": "0:02:14", "remaining_time": "2:55:22", "throughput": 23168.32, "total_tokens": 3113664}
|
|
{"current_steps": 990, "total_steps": 78105, "loss": 0.6638, "lr": 6.330815516579184e-07, "epoch": 0.06337622431342424, "percentage": 1.27, "elapsed_time": "0:02:15", "remaining_time": "2:55:21", "throughput": 23170.12, "total_tokens": 3129792}
|
|
{"current_steps": 995, "total_steps": 78105, "loss": 0.7097, "lr": 6.362821661759058e-07, "epoch": 0.06369630625440113, "percentage": 1.27, "elapsed_time": "0:02:15", "remaining_time": "2:55:19", "throughput": 23169.55, "total_tokens": 3145024}
|
|
{"current_steps": 1000, "total_steps": 78105, "loss": 0.7786, "lr": 6.394827806938933e-07, "epoch": 0.06401638819537801, "percentage": 1.28, "elapsed_time": "0:02:16", "remaining_time": "2:55:19", "throughput": 23170.15, "total_tokens": 3161216}
|
|
{"current_steps": 1005, "total_steps": 78105, "loss": 0.4931, "lr": 6.426833952118808e-07, "epoch": 0.0643364701363549, "percentage": 1.29, "elapsed_time": "0:02:17", "remaining_time": "2:55:18", "throughput": 23170.22, "total_tokens": 3176960}
|
|
{"current_steps": 1010, "total_steps": 78105, "loss": 0.6856, "lr": 6.458840097298682e-07, "epoch": 0.0646565520773318, "percentage": 1.29, "elapsed_time": "0:02:17", "remaining_time": "2:55:18", "throughput": 23171.61, "total_tokens": 3193088}
|
|
{"current_steps": 1015, "total_steps": 78105, "loss": 0.7215, "lr": 6.490846242478556e-07, "epoch": 0.0649766340183087, "percentage": 1.3, "elapsed_time": "0:02:18", "remaining_time": "2:55:20", "throughput": 23174.19, "total_tokens": 3210112}
|
|
{"current_steps": 1020, "total_steps": 78105, "loss": 0.5505, "lr": 6.52285238765843e-07, "epoch": 0.06529671595928557, "percentage": 1.31, "elapsed_time": "0:02:19", "remaining_time": "2:55:16", "throughput": 23172.79, "total_tokens": 3224768}
|
|
{"current_steps": 1025, "total_steps": 78105, "loss": 0.6565, "lr": 6.554858532838305e-07, "epoch": 0.06561679790026247, "percentage": 1.31, "elapsed_time": "0:02:19", "remaining_time": "2:55:14", "throughput": 23172.71, "total_tokens": 3240128}
|
|
{"current_steps": 1030, "total_steps": 78105, "loss": 0.6695, "lr": 6.58686467801818e-07, "epoch": 0.06593687984123936, "percentage": 1.32, "elapsed_time": "0:02:20", "remaining_time": "2:55:15", "throughput": 23175.36, "total_tokens": 3256576}
|
|
{"current_steps": 1035, "total_steps": 78105, "loss": 0.6371, "lr": 6.618870823198055e-07, "epoch": 0.06625696178221625, "percentage": 1.33, "elapsed_time": "0:02:21", "remaining_time": "2:55:14", "throughput": 23175.24, "total_tokens": 3272384}
|
|
{"current_steps": 1040, "total_steps": 78105, "loss": 0.4953, "lr": 6.65087696837793e-07, "epoch": 0.06657704372319313, "percentage": 1.33, "elapsed_time": "0:02:21", "remaining_time": "2:55:14", "throughput": 23176.29, "total_tokens": 3288512}
|
|
{"current_steps": 1045, "total_steps": 78105, "loss": 0.5671, "lr": 6.682883113557803e-07, "epoch": 0.06689712566417003, "percentage": 1.34, "elapsed_time": "0:02:22", "remaining_time": "2:55:17", "throughput": 23181.72, "total_tokens": 3306304}
|
|
{"current_steps": 1050, "total_steps": 78105, "loss": 0.5265, "lr": 6.714889258737677e-07, "epoch": 0.06721720760514692, "percentage": 1.34, "elapsed_time": "0:02:23", "remaining_time": "2:55:14", "throughput": 23180.38, "total_tokens": 3321344}
|
|
{"current_steps": 1055, "total_steps": 78105, "loss": 0.6922, "lr": 6.746895403917553e-07, "epoch": 0.06753728954612381, "percentage": 1.35, "elapsed_time": "0:02:24", "remaining_time": "2:55:17", "throughput": 23183.7, "total_tokens": 3338560}
|
|
{"current_steps": 1060, "total_steps": 78105, "loss": 0.5338, "lr": 6.778901549097428e-07, "epoch": 0.06785737148710069, "percentage": 1.36, "elapsed_time": "0:02:24", "remaining_time": "2:55:13", "throughput": 23181.1, "total_tokens": 3353152}
|
|
{"current_steps": 1065, "total_steps": 78105, "loss": 0.517, "lr": 6.810907694277303e-07, "epoch": 0.06817745342807759, "percentage": 1.36, "elapsed_time": "0:02:25", "remaining_time": "2:55:14", "throughput": 23182.01, "total_tokens": 3369536}
|
|
{"current_steps": 1070, "total_steps": 78105, "loss": 0.6104, "lr": 6.842913839457177e-07, "epoch": 0.06849753536905448, "percentage": 1.37, "elapsed_time": "0:02:26", "remaining_time": "2:55:12", "throughput": 23180.9, "total_tokens": 3384832}
|
|
{"current_steps": 1075, "total_steps": 78105, "loss": 0.5292, "lr": 6.874919984637051e-07, "epoch": 0.06881761731003137, "percentage": 1.38, "elapsed_time": "0:02:26", "remaining_time": "2:55:09", "throughput": 23178.49, "total_tokens": 3399424}
|
|
{"current_steps": 1080, "total_steps": 78105, "loss": 0.697, "lr": 6.906926129816925e-07, "epoch": 0.06913769925100825, "percentage": 1.38, "elapsed_time": "0:02:27", "remaining_time": "2:55:11", "throughput": 23181.67, "total_tokens": 3416704}
|
|
{"current_steps": 1085, "total_steps": 78105, "loss": 0.649, "lr": 6.9389322749968e-07, "epoch": 0.06945778119198515, "percentage": 1.39, "elapsed_time": "0:02:28", "remaining_time": "2:55:08", "throughput": 23181.15, "total_tokens": 3431552}
|
|
{"current_steps": 1090, "total_steps": 78105, "loss": 0.72, "lr": 6.970938420176675e-07, "epoch": 0.06977786313296204, "percentage": 1.4, "elapsed_time": "0:02:28", "remaining_time": "2:55:07", "throughput": 23181.29, "total_tokens": 3447488}
|
|
{"current_steps": 1095, "total_steps": 78105, "loss": 0.5332, "lr": 7.00294456535655e-07, "epoch": 0.07009794507393893, "percentage": 1.4, "elapsed_time": "0:02:29", "remaining_time": "2:55:07", "throughput": 23181.93, "total_tokens": 3463424}
|
|
{"current_steps": 1100, "total_steps": 78105, "loss": 0.6978, "lr": 7.034950710536424e-07, "epoch": 0.07041802701491581, "percentage": 1.41, "elapsed_time": "0:02:30", "remaining_time": "2:55:07", "throughput": 23182.74, "total_tokens": 3479680}
|
|
{"current_steps": 1105, "total_steps": 78105, "loss": 0.5592, "lr": 7.066956855716298e-07, "epoch": 0.0707381089558927, "percentage": 1.41, "elapsed_time": "0:02:30", "remaining_time": "2:55:06", "throughput": 23184.14, "total_tokens": 3495552}
|
|
{"current_steps": 1110, "total_steps": 78105, "loss": 0.5827, "lr": 7.098963000896172e-07, "epoch": 0.0710581908968696, "percentage": 1.42, "elapsed_time": "0:02:31", "remaining_time": "2:55:04", "throughput": 23183.57, "total_tokens": 3510976}
|
|
{"current_steps": 1115, "total_steps": 78105, "loss": 0.6853, "lr": 7.130969146076047e-07, "epoch": 0.0713782728378465, "percentage": 1.43, "elapsed_time": "0:02:32", "remaining_time": "2:55:02", "throughput": 23181.59, "total_tokens": 3526016}
|
|
{"current_steps": 1120, "total_steps": 78105, "loss": 0.591, "lr": 7.162975291255922e-07, "epoch": 0.07169835477882337, "percentage": 1.43, "elapsed_time": "0:02:32", "remaining_time": "2:54:58", "throughput": 23180.0, "total_tokens": 3540544}
|
|
{"current_steps": 1125, "total_steps": 78105, "loss": 0.618, "lr": 7.194981436435796e-07, "epoch": 0.07201843671980027, "percentage": 1.44, "elapsed_time": "0:02:33", "remaining_time": "2:54:58", "throughput": 23180.36, "total_tokens": 3556416}
|
|
{"current_steps": 1130, "total_steps": 78105, "loss": 0.5904, "lr": 7.226987581615671e-07, "epoch": 0.07233851866077716, "percentage": 1.45, "elapsed_time": "0:02:34", "remaining_time": "2:54:56", "throughput": 23181.42, "total_tokens": 3572096}
|
|
{"current_steps": 1135, "total_steps": 78105, "loss": 0.5448, "lr": 7.258993726795545e-07, "epoch": 0.07265860060175405, "percentage": 1.45, "elapsed_time": "0:02:34", "remaining_time": "2:54:55", "throughput": 23181.03, "total_tokens": 3587712}
|
|
{"current_steps": 1140, "total_steps": 78105, "loss": 0.6193, "lr": 7.290999871975419e-07, "epoch": 0.07297868254273093, "percentage": 1.46, "elapsed_time": "0:02:35", "remaining_time": "2:54:57", "throughput": 23185.21, "total_tokens": 3605056}
|
|
{"current_steps": 1145, "total_steps": 78105, "loss": 0.7339, "lr": 7.323006017155294e-07, "epoch": 0.07329876448370783, "percentage": 1.47, "elapsed_time": "0:02:36", "remaining_time": "2:54:57", "throughput": 23186.89, "total_tokens": 3621184}
|
|
{"current_steps": 1150, "total_steps": 78105, "loss": 0.6294, "lr": 7.355012162335169e-07, "epoch": 0.07361884642468472, "percentage": 1.47, "elapsed_time": "0:02:36", "remaining_time": "2:54:52", "throughput": 23184.16, "total_tokens": 3635392}
|
|
{"current_steps": 1155, "total_steps": 78105, "loss": 0.5553, "lr": 7.387018307515043e-07, "epoch": 0.07393892836566161, "percentage": 1.48, "elapsed_time": "0:02:37", "remaining_time": "2:54:49", "throughput": 23182.96, "total_tokens": 3649984}
|
|
{"current_steps": 1160, "total_steps": 78105, "loss": 0.6221, "lr": 7.419024452694919e-07, "epoch": 0.07425901030663849, "percentage": 1.49, "elapsed_time": "0:02:38", "remaining_time": "2:54:48", "throughput": 23184.34, "total_tokens": 3665920}
|
|
{"current_steps": 1165, "total_steps": 78105, "loss": 0.4922, "lr": 7.451030597874792e-07, "epoch": 0.07457909224761539, "percentage": 1.49, "elapsed_time": "0:02:38", "remaining_time": "2:54:44", "throughput": 23182.07, "total_tokens": 3680256}
|
|
{"current_steps": 1170, "total_steps": 78105, "loss": 0.5405, "lr": 7.483036743054666e-07, "epoch": 0.07489917418859228, "percentage": 1.5, "elapsed_time": "0:02:39", "remaining_time": "2:54:46", "throughput": 23185.45, "total_tokens": 3697536}
|
|
{"current_steps": 1175, "total_steps": 78105, "loss": 0.659, "lr": 7.515042888234542e-07, "epoch": 0.07521925612956917, "percentage": 1.5, "elapsed_time": "0:02:40", "remaining_time": "2:54:46", "throughput": 23183.13, "total_tokens": 3713088}
|
|
{"current_steps": 1180, "total_steps": 78105, "loss": 0.6654, "lr": 7.547049033414417e-07, "epoch": 0.07553933807054607, "percentage": 1.51, "elapsed_time": "0:02:40", "remaining_time": "2:54:47", "throughput": 23186.41, "total_tokens": 3729920}
|
|
{"current_steps": 1185, "total_steps": 78105, "loss": 0.5656, "lr": 7.579055178594291e-07, "epoch": 0.07585942001152295, "percentage": 1.52, "elapsed_time": "0:02:41", "remaining_time": "2:54:45", "throughput": 23187.39, "total_tokens": 3745664}
|
|
{"current_steps": 1190, "total_steps": 78105, "loss": 0.5935, "lr": 7.611061323774166e-07, "epoch": 0.07617950195249984, "percentage": 1.52, "elapsed_time": "0:02:42", "remaining_time": "2:54:43", "throughput": 23186.21, "total_tokens": 3760576}
|
|
{"current_steps": 1195, "total_steps": 78105, "loss": 0.5309, "lr": 7.64306746895404e-07, "epoch": 0.07649958389347673, "percentage": 1.53, "elapsed_time": "0:02:42", "remaining_time": "2:54:43", "throughput": 23185.83, "total_tokens": 3776576}
|
|
{"current_steps": 1200, "total_steps": 78105, "loss": 0.6443, "lr": 7.675073614133914e-07, "epoch": 0.07681966583445363, "percentage": 1.54, "elapsed_time": "0:02:43", "remaining_time": "2:54:42", "throughput": 23186.05, "total_tokens": 3792384}
|
|
{"current_steps": 1205, "total_steps": 78105, "loss": 0.6309, "lr": 7.707079759313789e-07, "epoch": 0.0771397477754305, "percentage": 1.54, "elapsed_time": "0:02:44", "remaining_time": "2:54:38", "throughput": 23182.52, "total_tokens": 3806592}
|
|
{"current_steps": 1210, "total_steps": 78105, "loss": 0.6041, "lr": 7.739085904493664e-07, "epoch": 0.0774598297164074, "percentage": 1.55, "elapsed_time": "0:02:44", "remaining_time": "2:54:37", "throughput": 23182.65, "total_tokens": 3822080}
|
|
{"current_steps": 1215, "total_steps": 78105, "loss": 0.5811, "lr": 7.771092049673538e-07, "epoch": 0.07777991165738429, "percentage": 1.56, "elapsed_time": "0:02:45", "remaining_time": "2:54:35", "throughput": 23181.01, "total_tokens": 3837120}
|
|
{"current_steps": 1220, "total_steps": 78105, "loss": 0.7375, "lr": 7.803098194853412e-07, "epoch": 0.07809999359836119, "percentage": 1.56, "elapsed_time": "0:02:46", "remaining_time": "2:54:34", "throughput": 23181.44, "total_tokens": 3852864}
|
|
{"current_steps": 1225, "total_steps": 78105, "loss": 0.5147, "lr": 7.835104340033287e-07, "epoch": 0.07842007553933807, "percentage": 1.57, "elapsed_time": "0:02:46", "remaining_time": "2:54:34", "throughput": 23183.18, "total_tokens": 3869184}
|
|
{"current_steps": 1230, "total_steps": 78105, "loss": 0.5631, "lr": 7.867110485213161e-07, "epoch": 0.07874015748031496, "percentage": 1.57, "elapsed_time": "0:02:47", "remaining_time": "2:54:33", "throughput": 23184.24, "total_tokens": 3885248}
|
|
{"current_steps": 1235, "total_steps": 78105, "loss": 0.4543, "lr": 7.899116630393036e-07, "epoch": 0.07906023942129185, "percentage": 1.58, "elapsed_time": "0:02:48", "remaining_time": "2:54:31", "throughput": 23183.89, "total_tokens": 3900416}
|
|
{"current_steps": 1240, "total_steps": 78105, "loss": 0.5628, "lr": 7.931122775572911e-07, "epoch": 0.07938032136226875, "percentage": 1.59, "elapsed_time": "0:02:48", "remaining_time": "2:54:30", "throughput": 23184.08, "total_tokens": 3916096}
|
|
{"current_steps": 1245, "total_steps": 78105, "loss": 0.6837, "lr": 7.963128920752785e-07, "epoch": 0.07970040330324563, "percentage": 1.59, "elapsed_time": "0:02:49", "remaining_time": "2:54:32", "throughput": 23187.2, "total_tokens": 3933312}
|
|
{"current_steps": 1250, "total_steps": 78105, "loss": 0.6801, "lr": 7.995135065932659e-07, "epoch": 0.08002048524422252, "percentage": 1.6, "elapsed_time": "0:02:50", "remaining_time": "2:54:31", "throughput": 23188.41, "total_tokens": 3949440}
|
|
{"current_steps": 1255, "total_steps": 78105, "loss": 0.6986, "lr": 8.027141211112534e-07, "epoch": 0.08034056718519941, "percentage": 1.61, "elapsed_time": "0:02:50", "remaining_time": "2:54:30", "throughput": 23189.01, "total_tokens": 3964992}
|
|
{"current_steps": 1260, "total_steps": 78105, "loss": 0.7041, "lr": 8.059147356292408e-07, "epoch": 0.0806606491261763, "percentage": 1.61, "elapsed_time": "0:02:51", "remaining_time": "2:54:30", "throughput": 23191.48, "total_tokens": 3981696}
|
|
{"current_steps": 1265, "total_steps": 78105, "loss": 0.6558, "lr": 8.091153501472283e-07, "epoch": 0.08098073106715319, "percentage": 1.62, "elapsed_time": "0:02:52", "remaining_time": "2:54:29", "throughput": 23192.03, "total_tokens": 3997248}
|
|
{"current_steps": 1270, "total_steps": 78105, "loss": 0.6925, "lr": 8.123159646652158e-07, "epoch": 0.08130081300813008, "percentage": 1.63, "elapsed_time": "0:02:52", "remaining_time": "2:54:26", "throughput": 23189.79, "total_tokens": 4011648}
|
|
{"current_steps": 1275, "total_steps": 78105, "loss": 0.5155, "lr": 8.155165791832032e-07, "epoch": 0.08162089494910697, "percentage": 1.63, "elapsed_time": "0:02:53", "remaining_time": "2:54:26", "throughput": 23191.0, "total_tokens": 4028160}
|
|
{"current_steps": 1280, "total_steps": 78105, "loss": 0.6299, "lr": 8.187171937011906e-07, "epoch": 0.08194097689008387, "percentage": 1.64, "elapsed_time": "0:02:54", "remaining_time": "2:54:25", "throughput": 23190.46, "total_tokens": 4043584}
|
|
{"current_steps": 1285, "total_steps": 78105, "loss": 0.7085, "lr": 8.219178082191781e-07, "epoch": 0.08226105883106075, "percentage": 1.65, "elapsed_time": "0:02:55", "remaining_time": "2:54:24", "throughput": 23190.67, "total_tokens": 4059456}
|
|
{"current_steps": 1290, "total_steps": 78105, "loss": 0.5905, "lr": 8.251184227371655e-07, "epoch": 0.08258114077203764, "percentage": 1.65, "elapsed_time": "0:02:55", "remaining_time": "2:54:25", "throughput": 23192.78, "total_tokens": 4076096}
|
|
{"current_steps": 1295, "total_steps": 78105, "loss": 0.6022, "lr": 8.283190372551531e-07, "epoch": 0.08290122271301453, "percentage": 1.66, "elapsed_time": "0:02:56", "remaining_time": "2:54:27", "throughput": 23196.5, "total_tokens": 4093568}
|
|
{"current_steps": 1300, "total_steps": 78105, "loss": 0.6345, "lr": 8.315196517731406e-07, "epoch": 0.08322130465399143, "percentage": 1.66, "elapsed_time": "0:02:57", "remaining_time": "2:54:25", "throughput": 23196.57, "total_tokens": 4108864}
|
|
{"current_steps": 1305, "total_steps": 78105, "loss": 0.6653, "lr": 8.34720266291128e-07, "epoch": 0.0835413865949683, "percentage": 1.67, "elapsed_time": "0:02:57", "remaining_time": "2:54:23", "throughput": 23195.9, "total_tokens": 4124224}
|
|
{"current_steps": 1310, "total_steps": 78105, "loss": 0.5601, "lr": 8.379208808091154e-07, "epoch": 0.0838614685359452, "percentage": 1.68, "elapsed_time": "0:02:58", "remaining_time": "2:54:20", "throughput": 23195.2, "total_tokens": 4139008}
|
|
{"current_steps": 1315, "total_steps": 78105, "loss": 0.5444, "lr": 8.411214953271029e-07, "epoch": 0.08418155047692209, "percentage": 1.68, "elapsed_time": "0:02:59", "remaining_time": "2:54:20", "throughput": 23196.14, "total_tokens": 4155008}
|
|
{"current_steps": 1320, "total_steps": 78105, "loss": 0.5153, "lr": 8.443221098450903e-07, "epoch": 0.08450163241789899, "percentage": 1.69, "elapsed_time": "0:02:59", "remaining_time": "2:54:21", "throughput": 23200.2, "total_tokens": 4172544}
|
|
{"current_steps": 1325, "total_steps": 78105, "loss": 0.7105, "lr": 8.475227243630778e-07, "epoch": 0.08482171435887587, "percentage": 1.7, "elapsed_time": "0:03:00", "remaining_time": "2:54:20", "throughput": 23201.21, "total_tokens": 4188416}
|
|
{"current_steps": 1330, "total_steps": 78105, "loss": 0.5091, "lr": 8.507233388810653e-07, "epoch": 0.08514179629985276, "percentage": 1.7, "elapsed_time": "0:03:01", "remaining_time": "2:54:17", "throughput": 23198.64, "total_tokens": 4202560}
|
|
{"current_steps": 1335, "total_steps": 78105, "loss": 0.5489, "lr": 8.539239533990527e-07, "epoch": 0.08546187824082965, "percentage": 1.71, "elapsed_time": "0:03:01", "remaining_time": "2:54:18", "throughput": 23200.21, "total_tokens": 4219392}
|
|
{"current_steps": 1340, "total_steps": 78105, "loss": 0.5204, "lr": 8.571245679170401e-07, "epoch": 0.08578196018180655, "percentage": 1.72, "elapsed_time": "0:03:02", "remaining_time": "2:54:18", "throughput": 23200.29, "total_tokens": 4235328}
|
|
{"current_steps": 1345, "total_steps": 78105, "loss": 0.5803, "lr": 8.603251824350276e-07, "epoch": 0.08610204212278343, "percentage": 1.72, "elapsed_time": "0:03:03", "remaining_time": "2:54:15", "throughput": 23199.72, "total_tokens": 4250368}
|
|
{"current_steps": 1350, "total_steps": 78105, "loss": 0.5222, "lr": 8.63525796953015e-07, "epoch": 0.08642212406376032, "percentage": 1.73, "elapsed_time": "0:03:03", "remaining_time": "2:54:14", "throughput": 23199.83, "total_tokens": 4265856}
|
|
{"current_steps": 1355, "total_steps": 78105, "loss": 0.6881, "lr": 8.667264114710025e-07, "epoch": 0.08674220600473721, "percentage": 1.73, "elapsed_time": "0:03:04", "remaining_time": "2:54:13", "throughput": 23199.89, "total_tokens": 4281792}
|
|
{"current_steps": 1360, "total_steps": 78105, "loss": 0.624, "lr": 8.6992702598899e-07, "epoch": 0.0870622879457141, "percentage": 1.74, "elapsed_time": "0:03:05", "remaining_time": "2:54:12", "throughput": 23199.61, "total_tokens": 4297088}
|
|
{"current_steps": 1365, "total_steps": 78105, "loss": 0.549, "lr": 8.731276405069774e-07, "epoch": 0.087382369886691, "percentage": 1.75, "elapsed_time": "0:03:05", "remaining_time": "2:54:10", "throughput": 23198.03, "total_tokens": 4312192}
|
|
{"current_steps": 1370, "total_steps": 78105, "loss": 0.5303, "lr": 8.763282550249648e-07, "epoch": 0.08770245182766788, "percentage": 1.75, "elapsed_time": "0:03:06", "remaining_time": "2:54:07", "throughput": 23196.59, "total_tokens": 4326720}
|
|
{"current_steps": 1375, "total_steps": 78105, "loss": 0.7837, "lr": 8.795288695429523e-07, "epoch": 0.08802253376864477, "percentage": 1.76, "elapsed_time": "0:03:07", "remaining_time": "2:54:05", "throughput": 23195.09, "total_tokens": 4341760}
|
|
{"current_steps": 1380, "total_steps": 78105, "loss": 0.6399, "lr": 8.827294840609397e-07, "epoch": 0.08834261570962167, "percentage": 1.77, "elapsed_time": "0:03:07", "remaining_time": "2:54:05", "throughput": 23195.78, "total_tokens": 4357760}
|
|
{"current_steps": 1385, "total_steps": 78105, "loss": 0.6436, "lr": 8.859300985789272e-07, "epoch": 0.08866269765059856, "percentage": 1.77, "elapsed_time": "0:03:08", "remaining_time": "2:54:05", "throughput": 23195.79, "total_tokens": 4373824}
|
|
{"current_steps": 1390, "total_steps": 78105, "loss": 0.4859, "lr": 8.891307130969147e-07, "epoch": 0.08898277959157544, "percentage": 1.78, "elapsed_time": "0:03:09", "remaining_time": "2:54:03", "throughput": 23195.15, "total_tokens": 4388992}
|
|
{"current_steps": 1395, "total_steps": 78105, "loss": 0.5303, "lr": 8.923313276149021e-07, "epoch": 0.08930286153255233, "percentage": 1.79, "elapsed_time": "0:03:09", "remaining_time": "2:54:01", "throughput": 23194.04, "total_tokens": 4404288}
|
|
{"current_steps": 1400, "total_steps": 78105, "loss": 0.5821, "lr": 8.955319421328895e-07, "epoch": 0.08962294347352923, "percentage": 1.79, "elapsed_time": "0:03:10", "remaining_time": "2:54:00", "throughput": 23194.94, "total_tokens": 4419840}
|
|
{"current_steps": 1405, "total_steps": 78105, "loss": 0.5764, "lr": 8.98732556650877e-07, "epoch": 0.08994302541450612, "percentage": 1.8, "elapsed_time": "0:03:11", "remaining_time": "2:53:58", "throughput": 23194.76, "total_tokens": 4435200}
|
|
{"current_steps": 1410, "total_steps": 78105, "loss": 0.6502, "lr": 9.019331711688644e-07, "epoch": 0.090263107355483, "percentage": 1.81, "elapsed_time": "0:03:11", "remaining_time": "2:53:56", "throughput": 23193.71, "total_tokens": 4450368}
|
|
{"current_steps": 1415, "total_steps": 78105, "loss": 0.494, "lr": 9.05133785686852e-07, "epoch": 0.09058318929645989, "percentage": 1.81, "elapsed_time": "0:03:12", "remaining_time": "2:53:55", "throughput": 23194.23, "total_tokens": 4466048}
|
|
{"current_steps": 1420, "total_steps": 78105, "loss": 0.5941, "lr": 9.083344002048395e-07, "epoch": 0.09090327123743679, "percentage": 1.82, "elapsed_time": "0:03:13", "remaining_time": "2:53:54", "throughput": 23195.02, "total_tokens": 4481920}
|
|
{"current_steps": 1425, "total_steps": 78105, "loss": 0.6146, "lr": 9.115350147228269e-07, "epoch": 0.09122335317841368, "percentage": 1.82, "elapsed_time": "0:03:13", "remaining_time": "2:53:54", "throughput": 23196.69, "total_tokens": 4498112}
|
|
{"current_steps": 1430, "total_steps": 78105, "loss": 0.4945, "lr": 9.147356292408143e-07, "epoch": 0.09154343511939056, "percentage": 1.83, "elapsed_time": "0:03:14", "remaining_time": "2:53:56", "throughput": 23199.93, "total_tokens": 4515648}
|
|
{"current_steps": 1435, "total_steps": 78105, "loss": 0.6525, "lr": 9.179362437588018e-07, "epoch": 0.09186351706036745, "percentage": 1.84, "elapsed_time": "0:03:15", "remaining_time": "2:53:56", "throughput": 23200.74, "total_tokens": 4531840}
|
|
{"current_steps": 1440, "total_steps": 78105, "loss": 0.5896, "lr": 9.211368582767892e-07, "epoch": 0.09218359900134435, "percentage": 1.84, "elapsed_time": "0:03:16", "remaining_time": "2:53:55", "throughput": 23200.92, "total_tokens": 4547456}
|
|
{"current_steps": 1445, "total_steps": 78105, "loss": 0.5707, "lr": 9.243374727947767e-07, "epoch": 0.09250368094232124, "percentage": 1.85, "elapsed_time": "0:03:16", "remaining_time": "2:53:54", "throughput": 23201.76, "total_tokens": 4563328}
|
|
{"current_steps": 1450, "total_steps": 78105, "loss": 0.5985, "lr": 9.275380873127642e-07, "epoch": 0.09282376288329812, "percentage": 1.86, "elapsed_time": "0:03:17", "remaining_time": "2:53:54", "throughput": 23201.8, "total_tokens": 4579392}
|
|
{"current_steps": 1455, "total_steps": 78105, "loss": 0.5964, "lr": 9.307387018307516e-07, "epoch": 0.09314384482427501, "percentage": 1.86, "elapsed_time": "0:03:18", "remaining_time": "2:53:53", "throughput": 23202.8, "total_tokens": 4595584}
|
|
{"current_steps": 1460, "total_steps": 78105, "loss": 0.5431, "lr": 9.33939316348739e-07, "epoch": 0.0934639267652519, "percentage": 1.87, "elapsed_time": "0:03:18", "remaining_time": "2:53:51", "throughput": 23200.4, "total_tokens": 4610112}
|
|
{"current_steps": 1465, "total_steps": 78105, "loss": 0.6868, "lr": 9.371399308667265e-07, "epoch": 0.0937840087062288, "percentage": 1.88, "elapsed_time": "0:03:19", "remaining_time": "2:53:51", "throughput": 23201.91, "total_tokens": 4626432}
|
|
{"current_steps": 1470, "total_steps": 78105, "loss": 0.7079, "lr": 9.403405453847139e-07, "epoch": 0.09410409064720568, "percentage": 1.88, "elapsed_time": "0:03:20", "remaining_time": "2:53:49", "throughput": 23201.6, "total_tokens": 4641792}
|
|
{"current_steps": 1475, "total_steps": 78105, "loss": 0.4159, "lr": 9.435411599027014e-07, "epoch": 0.09442417258818257, "percentage": 1.89, "elapsed_time": "0:03:20", "remaining_time": "2:53:48", "throughput": 23200.42, "total_tokens": 4656896}
|
|
{"current_steps": 1480, "total_steps": 78105, "loss": 0.6569, "lr": 9.467417744206889e-07, "epoch": 0.09474425452915947, "percentage": 1.89, "elapsed_time": "0:03:21", "remaining_time": "2:53:49", "throughput": 23200.83, "total_tokens": 4673472}
|
|
{"current_steps": 1485, "total_steps": 78105, "loss": 0.4387, "lr": 9.499423889386763e-07, "epoch": 0.09506433647013636, "percentage": 1.9, "elapsed_time": "0:03:22", "remaining_time": "2:53:47", "throughput": 23200.31, "total_tokens": 4688896}
|
|
{"current_steps": 1490, "total_steps": 78105, "loss": 0.7065, "lr": 9.531430034566637e-07, "epoch": 0.09538441841111324, "percentage": 1.91, "elapsed_time": "0:03:22", "remaining_time": "2:53:47", "throughput": 23199.98, "total_tokens": 4704576}
|
|
{"current_steps": 1495, "total_steps": 78105, "loss": 0.5807, "lr": 9.563436179746512e-07, "epoch": 0.09570450035209013, "percentage": 1.91, "elapsed_time": "0:03:23", "remaining_time": "2:53:44", "throughput": 23198.39, "total_tokens": 4719040}
|
|
{"current_steps": 1500, "total_steps": 78105, "loss": 0.5473, "lr": 9.595442324926386e-07, "epoch": 0.09602458229306703, "percentage": 1.92, "elapsed_time": "0:03:24", "remaining_time": "2:53:41", "throughput": 23197.57, "total_tokens": 4733696}
|
|
{"current_steps": 1505, "total_steps": 78105, "loss": 0.5672, "lr": 9.62744847010626e-07, "epoch": 0.09634466423404392, "percentage": 1.93, "elapsed_time": "0:03:24", "remaining_time": "2:53:39", "throughput": 23197.54, "total_tokens": 4748992}
|
|
{"current_steps": 1510, "total_steps": 78105, "loss": 0.703, "lr": 9.659454615286136e-07, "epoch": 0.0966647461750208, "percentage": 1.93, "elapsed_time": "0:03:25", "remaining_time": "2:53:39", "throughput": 23198.4, "total_tokens": 4764992}
|
|
{"current_steps": 1515, "total_steps": 78105, "loss": 0.6398, "lr": 9.69146076046601e-07, "epoch": 0.09698482811599769, "percentage": 1.94, "elapsed_time": "0:03:26", "remaining_time": "2:53:37", "throughput": 23197.81, "total_tokens": 4780352}
|
|
{"current_steps": 1520, "total_steps": 78105, "loss": 0.5853, "lr": 9.723466905645885e-07, "epoch": 0.09730491005697459, "percentage": 1.95, "elapsed_time": "0:03:26", "remaining_time": "2:53:37", "throughput": 23197.55, "total_tokens": 4796224}
|
|
{"current_steps": 1525, "total_steps": 78105, "loss": 0.5472, "lr": 9.75547305082576e-07, "epoch": 0.09762499199795148, "percentage": 1.95, "elapsed_time": "0:03:27", "remaining_time": "2:53:36", "throughput": 23197.2, "total_tokens": 4811840}
|
|
{"current_steps": 1530, "total_steps": 78105, "loss": 0.5887, "lr": 9.787479196005634e-07, "epoch": 0.09794507393892836, "percentage": 1.96, "elapsed_time": "0:03:28", "remaining_time": "2:53:34", "throughput": 23195.43, "total_tokens": 4826432}
|
|
{"current_steps": 1535, "total_steps": 78105, "loss": 0.4231, "lr": 9.81948534118551e-07, "epoch": 0.09826515587990525, "percentage": 1.97, "elapsed_time": "0:03:28", "remaining_time": "2:53:32", "throughput": 23195.28, "total_tokens": 4841920}
|
|
{"current_steps": 1540, "total_steps": 78105, "loss": 0.6139, "lr": 9.851491486365384e-07, "epoch": 0.09858523782088215, "percentage": 1.97, "elapsed_time": "0:03:29", "remaining_time": "2:53:31", "throughput": 23195.72, "total_tokens": 4857536}
|
|
{"current_steps": 1545, "total_steps": 78105, "loss": 0.7112, "lr": 9.883497631545258e-07, "epoch": 0.09890531976185904, "percentage": 1.98, "elapsed_time": "0:03:30", "remaining_time": "2:53:30", "throughput": 23196.15, "total_tokens": 4873408}
|
|
{"current_steps": 1550, "total_steps": 78105, "loss": 0.6346, "lr": 9.91550377672513e-07, "epoch": 0.09922540170283592, "percentage": 1.98, "elapsed_time": "0:03:30", "remaining_time": "2:53:30", "throughput": 23196.85, "total_tokens": 4889536}
|
|
{"current_steps": 1555, "total_steps": 78105, "loss": 0.5943, "lr": 9.947509921905006e-07, "epoch": 0.09954548364381281, "percentage": 1.99, "elapsed_time": "0:03:31", "remaining_time": "2:53:28", "throughput": 23195.86, "total_tokens": 4904448}
|
|
{"current_steps": 1560, "total_steps": 78105, "loss": 0.6641, "lr": 9.97951606708488e-07, "epoch": 0.0998655655847897, "percentage": 2.0, "elapsed_time": "0:03:32", "remaining_time": "2:53:27", "throughput": 23194.76, "total_tokens": 4919616}
|
|
{"current_steps": 1565, "total_steps": 78105, "loss": 0.569, "lr": 1.0011522212264755e-06, "epoch": 0.1001856475257666, "percentage": 2.0, "elapsed_time": "0:03:32", "remaining_time": "2:53:24", "throughput": 23192.8, "total_tokens": 4934144}
|
|
{"current_steps": 1570, "total_steps": 78105, "loss": 0.5557, "lr": 1.004352835744463e-06, "epoch": 0.1005057294667435, "percentage": 2.01, "elapsed_time": "0:03:33", "remaining_time": "2:53:24", "throughput": 23193.26, "total_tokens": 4950272}
|
|
{"current_steps": 1575, "total_steps": 78105, "loss": 0.6099, "lr": 1.0075534502624504e-06, "epoch": 0.10082581140772037, "percentage": 2.02, "elapsed_time": "0:03:34", "remaining_time": "2:53:22", "throughput": 23191.28, "total_tokens": 4965056}
|
|
{"current_steps": 1580, "total_steps": 78105, "loss": 0.5355, "lr": 1.0107540647804379e-06, "epoch": 0.10114589334869727, "percentage": 2.02, "elapsed_time": "0:03:34", "remaining_time": "2:53:21", "throughput": 23189.53, "total_tokens": 4980160}
|
|
{"current_steps": 1585, "total_steps": 78105, "loss": 0.4531, "lr": 1.0139546792984254e-06, "epoch": 0.10146597528967416, "percentage": 2.03, "elapsed_time": "0:03:35", "remaining_time": "2:53:22", "throughput": 23190.8, "total_tokens": 4996992}
|
|
{"current_steps": 1590, "total_steps": 78105, "loss": 0.5276, "lr": 1.0171552938164128e-06, "epoch": 0.10178605723065105, "percentage": 2.04, "elapsed_time": "0:03:36", "remaining_time": "2:53:21", "throughput": 23191.17, "total_tokens": 5012608}
|
|
{"current_steps": 1595, "total_steps": 78105, "loss": 0.4634, "lr": 1.0203559083344003e-06, "epoch": 0.10210613917162793, "percentage": 2.04, "elapsed_time": "0:03:36", "remaining_time": "2:53:19", "throughput": 23190.57, "total_tokens": 5027840}
|
|
{"current_steps": 1600, "total_steps": 78105, "loss": 0.6232, "lr": 1.0235565228523878e-06, "epoch": 0.10242622111260483, "percentage": 2.05, "elapsed_time": "0:03:37", "remaining_time": "2:53:17", "throughput": 23189.85, "total_tokens": 5042752}
|
|
{"current_steps": 1605, "total_steps": 78105, "loss": 0.5789, "lr": 1.0267571373703752e-06, "epoch": 0.10274630305358172, "percentage": 2.05, "elapsed_time": "0:03:38", "remaining_time": "2:53:17", "throughput": 23190.39, "total_tokens": 5058624}
|
|
{"current_steps": 1610, "total_steps": 78105, "loss": 0.6365, "lr": 1.0299577518883625e-06, "epoch": 0.10306638499455861, "percentage": 2.06, "elapsed_time": "0:03:38", "remaining_time": "2:53:16", "throughput": 23191.95, "total_tokens": 5075008}
|
|
{"current_steps": 1615, "total_steps": 78105, "loss": 0.5593, "lr": 1.0331583664063502e-06, "epoch": 0.10338646693553549, "percentage": 2.07, "elapsed_time": "0:03:39", "remaining_time": "2:53:16", "throughput": 23192.51, "total_tokens": 5090880}
|
|
{"current_steps": 1620, "total_steps": 78105, "loss": 0.5551, "lr": 1.0363589809243376e-06, "epoch": 0.10370654887651239, "percentage": 2.07, "elapsed_time": "0:03:40", "remaining_time": "2:53:15", "throughput": 23192.4, "total_tokens": 5106816}
|
|
{"current_steps": 1625, "total_steps": 78105, "loss": 0.4576, "lr": 1.039559595442325e-06, "epoch": 0.10402663081748928, "percentage": 2.08, "elapsed_time": "0:03:40", "remaining_time": "2:53:17", "throughput": 23194.06, "total_tokens": 5123904}
|
|
{"current_steps": 1630, "total_steps": 78105, "loss": 0.5034, "lr": 1.0427602099603126e-06, "epoch": 0.10434671275846617, "percentage": 2.09, "elapsed_time": "0:03:41", "remaining_time": "2:53:17", "throughput": 23194.24, "total_tokens": 5140160}
|
|
{"current_steps": 1635, "total_steps": 78105, "loss": 0.4584, "lr": 1.0459608244783e-06, "epoch": 0.10466679469944305, "percentage": 2.09, "elapsed_time": "0:03:42", "remaining_time": "2:53:17", "throughput": 23193.73, "total_tokens": 5156288}
|
|
{"current_steps": 1640, "total_steps": 78105, "loss": 0.5812, "lr": 1.0491614389962873e-06, "epoch": 0.10498687664041995, "percentage": 2.1, "elapsed_time": "0:03:42", "remaining_time": "2:53:16", "throughput": 23193.8, "total_tokens": 5171776}
|
|
{"current_steps": 1645, "total_steps": 78105, "loss": 0.7, "lr": 1.0523620535142747e-06, "epoch": 0.10530695858139684, "percentage": 2.11, "elapsed_time": "0:03:43", "remaining_time": "2:53:18", "throughput": 23196.43, "total_tokens": 5189504}
|
|
{"current_steps": 1650, "total_steps": 78105, "loss": 0.5602, "lr": 1.0555626680322622e-06, "epoch": 0.10562704052237373, "percentage": 2.11, "elapsed_time": "0:03:44", "remaining_time": "2:53:17", "throughput": 23196.81, "total_tokens": 5205056}
|
|
{"current_steps": 1655, "total_steps": 78105, "loss": 0.6155, "lr": 1.0587632825502497e-06, "epoch": 0.10594712246335061, "percentage": 2.12, "elapsed_time": "0:03:45", "remaining_time": "2:53:18", "throughput": 23199.82, "total_tokens": 5222656}
|
|
{"current_steps": 1660, "total_steps": 78105, "loss": 0.5936, "lr": 1.0619638970682371e-06, "epoch": 0.1062672044043275, "percentage": 2.13, "elapsed_time": "0:03:45", "remaining_time": "2:53:18", "throughput": 23200.34, "total_tokens": 5238848}
|
|
{"current_steps": 1665, "total_steps": 78105, "loss": 0.5589, "lr": 1.0651645115862246e-06, "epoch": 0.1065872863453044, "percentage": 2.13, "elapsed_time": "0:03:46", "remaining_time": "2:53:19", "throughput": 23201.28, "total_tokens": 5255296}
|
|
{"current_steps": 1670, "total_steps": 78105, "loss": 0.7539, "lr": 1.068365126104212e-06, "epoch": 0.10690736828628129, "percentage": 2.14, "elapsed_time": "0:03:47", "remaining_time": "2:53:17", "throughput": 23200.09, "total_tokens": 5270336}
|
|
{"current_steps": 1675, "total_steps": 78105, "loss": 0.5115, "lr": 1.0715657406221996e-06, "epoch": 0.10722745022725817, "percentage": 2.14, "elapsed_time": "0:03:47", "remaining_time": "2:53:17", "throughput": 23200.71, "total_tokens": 5286720}
|
|
{"current_steps": 1680, "total_steps": 78105, "loss": 0.621, "lr": 1.074766355140187e-06, "epoch": 0.10754753216823507, "percentage": 2.15, "elapsed_time": "0:03:48", "remaining_time": "2:53:15", "throughput": 23199.22, "total_tokens": 5301632}
|
|
{"current_steps": 1685, "total_steps": 78105, "loss": 0.3955, "lr": 1.0779669696581745e-06, "epoch": 0.10786761410921196, "percentage": 2.16, "elapsed_time": "0:03:49", "remaining_time": "2:53:14", "throughput": 23197.87, "total_tokens": 5316544}
|
|
{"current_steps": 1690, "total_steps": 78105, "loss": 0.6051, "lr": 1.081167584176162e-06, "epoch": 0.10818769605018885, "percentage": 2.16, "elapsed_time": "0:03:49", "remaining_time": "2:53:13", "throughput": 23197.77, "total_tokens": 5332544}
|
|
{"current_steps": 1695, "total_steps": 78105, "loss": 0.4972, "lr": 1.0843681986941494e-06, "epoch": 0.10850777799116573, "percentage": 2.17, "elapsed_time": "0:03:50", "remaining_time": "2:53:12", "throughput": 23197.66, "total_tokens": 5348096}
|
|
{"current_steps": 1700, "total_steps": 78105, "loss": 0.6385, "lr": 1.0875688132121367e-06, "epoch": 0.10882785993214263, "percentage": 2.18, "elapsed_time": "0:03:51", "remaining_time": "2:53:10", "throughput": 23195.72, "total_tokens": 5362368}
|
|
{"current_steps": 1705, "total_steps": 78105, "loss": 0.5557, "lr": 1.0907694277301241e-06, "epoch": 0.10914794187311952, "percentage": 2.18, "elapsed_time": "0:03:51", "remaining_time": "2:53:09", "throughput": 23196.08, "total_tokens": 5378176}
|
|
{"current_steps": 1710, "total_steps": 78105, "loss": 0.5577, "lr": 1.0939700422481116e-06, "epoch": 0.10946802381409641, "percentage": 2.19, "elapsed_time": "0:03:52", "remaining_time": "2:53:09", "throughput": 23197.53, "total_tokens": 5394752}
|
|
{"current_steps": 1715, "total_steps": 78105, "loss": 0.6704, "lr": 1.0971706567660993e-06, "epoch": 0.10978810575507329, "percentage": 2.2, "elapsed_time": "0:03:53", "remaining_time": "2:53:10", "throughput": 23200.4, "total_tokens": 5412160}
|
|
{"current_steps": 1720, "total_steps": 78105, "loss": 0.5459, "lr": 1.1003712712840868e-06, "epoch": 0.11010818769605019, "percentage": 2.2, "elapsed_time": "0:03:53", "remaining_time": "2:53:09", "throughput": 23200.4, "total_tokens": 5427840}
|
|
{"current_steps": 1725, "total_steps": 78105, "loss": 0.4625, "lr": 1.1035718858020742e-06, "epoch": 0.11042826963702708, "percentage": 2.21, "elapsed_time": "0:03:54", "remaining_time": "2:53:09", "throughput": 23201.49, "total_tokens": 5444224}
|
|
{"current_steps": 1730, "total_steps": 78105, "loss": 0.6443, "lr": 1.1067725003200615e-06, "epoch": 0.11074835157800397, "percentage": 2.21, "elapsed_time": "0:03:55", "remaining_time": "2:53:08", "throughput": 23201.22, "total_tokens": 5459648}
|
|
{"current_steps": 1735, "total_steps": 78105, "loss": 0.7315, "lr": 1.109973114838049e-06, "epoch": 0.11106843351898085, "percentage": 2.22, "elapsed_time": "0:03:55", "remaining_time": "2:53:07", "throughput": 23200.58, "total_tokens": 5475008}
|
|
{"current_steps": 1740, "total_steps": 78105, "loss": 0.5923, "lr": 1.1131737293560364e-06, "epoch": 0.11138851545995775, "percentage": 2.23, "elapsed_time": "0:03:56", "remaining_time": "2:53:07", "throughput": 23201.13, "total_tokens": 5491456}
|
|
{"current_steps": 1745, "total_steps": 78105, "loss": 0.5267, "lr": 1.1163743438740239e-06, "epoch": 0.11170859740093464, "percentage": 2.23, "elapsed_time": "0:03:57", "remaining_time": "2:53:07", "throughput": 23201.61, "total_tokens": 5507520}
|
|
{"current_steps": 1750, "total_steps": 78105, "loss": 0.6325, "lr": 1.1195749583920113e-06, "epoch": 0.11202867934191153, "percentage": 2.24, "elapsed_time": "0:03:58", "remaining_time": "2:53:06", "throughput": 23201.53, "total_tokens": 5523072}
|
|
{"current_steps": 1755, "total_steps": 78105, "loss": 0.5338, "lr": 1.1227755729099988e-06, "epoch": 0.11234876128288843, "percentage": 2.25, "elapsed_time": "0:03:58", "remaining_time": "2:53:05", "throughput": 23200.61, "total_tokens": 5538304}
|
|
{"current_steps": 1760, "total_steps": 78105, "loss": 0.4734, "lr": 1.1259761874279863e-06, "epoch": 0.1126688432238653, "percentage": 2.25, "elapsed_time": "0:03:59", "remaining_time": "2:53:04", "throughput": 23200.66, "total_tokens": 5553920}
|
|
{"current_steps": 1765, "total_steps": 78105, "loss": 0.4812, "lr": 1.1291768019459737e-06, "epoch": 0.1129889251648422, "percentage": 2.26, "elapsed_time": "0:04:00", "remaining_time": "2:53:02", "throughput": 23200.11, "total_tokens": 5569344}
|
|
{"current_steps": 1770, "total_steps": 78105, "loss": 0.5413, "lr": 1.1323774164639612e-06, "epoch": 0.11330900710581909, "percentage": 2.27, "elapsed_time": "0:04:00", "remaining_time": "2:53:03", "throughput": 23201.32, "total_tokens": 5585856}
|
|
{"current_steps": 1775, "total_steps": 78105, "loss": 0.5719, "lr": 1.1355780309819487e-06, "epoch": 0.11362908904679599, "percentage": 2.27, "elapsed_time": "0:04:01", "remaining_time": "2:53:05", "throughput": 23204.09, "total_tokens": 5603840}
|
|
{"current_steps": 1780, "total_steps": 78105, "loss": 0.5216, "lr": 1.1387786454999361e-06, "epoch": 0.11394917098777287, "percentage": 2.28, "elapsed_time": "0:04:02", "remaining_time": "2:53:05", "throughput": 23204.72, "total_tokens": 5620352}
|
|
{"current_steps": 1785, "total_steps": 78105, "loss": 0.5749, "lr": 1.1419792600179236e-06, "epoch": 0.11426925292874976, "percentage": 2.29, "elapsed_time": "0:04:02", "remaining_time": "2:53:05", "throughput": 23204.76, "total_tokens": 5636544}
|
|
{"current_steps": 1790, "total_steps": 78105, "loss": 0.5669, "lr": 1.1451798745359109e-06, "epoch": 0.11458933486972665, "percentage": 2.29, "elapsed_time": "0:04:03", "remaining_time": "2:53:05", "throughput": 23205.38, "total_tokens": 5652672}
|
|
{"current_steps": 1795, "total_steps": 78105, "loss": 0.4952, "lr": 1.1483804890538983e-06, "epoch": 0.11490941681070355, "percentage": 2.3, "elapsed_time": "0:04:04", "remaining_time": "2:53:04", "throughput": 23205.15, "total_tokens": 5668224}
|
|
{"current_steps": 1800, "total_steps": 78105, "loss": 0.4601, "lr": 1.1515811035718858e-06, "epoch": 0.11522949875168043, "percentage": 2.3, "elapsed_time": "0:04:04", "remaining_time": "2:53:03", "throughput": 23206.4, "total_tokens": 5684480}
|
|
{"current_steps": 1805, "total_steps": 78105, "loss": 0.4491, "lr": 1.1547817180898733e-06, "epoch": 0.11554958069265732, "percentage": 2.31, "elapsed_time": "0:04:05", "remaining_time": "2:53:04", "throughput": 23207.1, "total_tokens": 5700864}
|
|
{"current_steps": 1810, "total_steps": 78105, "loss": 0.5962, "lr": 1.1579823326078607e-06, "epoch": 0.11586966263363421, "percentage": 2.32, "elapsed_time": "0:04:06", "remaining_time": "2:53:02", "throughput": 23206.88, "total_tokens": 5716224}
|
|
{"current_steps": 1815, "total_steps": 78105, "loss": 0.4912, "lr": 1.1611829471258482e-06, "epoch": 0.1161897445746111, "percentage": 2.32, "elapsed_time": "0:04:07", "remaining_time": "2:53:03", "throughput": 23208.37, "total_tokens": 5733056}
|
|
{"current_steps": 1820, "total_steps": 78105, "loss": 0.6931, "lr": 1.1643835616438357e-06, "epoch": 0.11650982651558799, "percentage": 2.33, "elapsed_time": "0:04:07", "remaining_time": "2:53:02", "throughput": 23208.85, "total_tokens": 5749120}
|
|
{"current_steps": 1825, "total_steps": 78105, "loss": 0.6725, "lr": 1.1675841761618231e-06, "epoch": 0.11682990845656488, "percentage": 2.34, "elapsed_time": "0:04:08", "remaining_time": "2:53:00", "throughput": 23207.86, "total_tokens": 5763968}
|
|
{"current_steps": 1830, "total_steps": 78105, "loss": 0.602, "lr": 1.1707847906798106e-06, "epoch": 0.11714999039754177, "percentage": 2.34, "elapsed_time": "0:04:09", "remaining_time": "2:52:59", "throughput": 23207.86, "total_tokens": 5779520}
|
|
{"current_steps": 1835, "total_steps": 78105, "loss": 0.5777, "lr": 1.173985405197798e-06, "epoch": 0.11747007233851867, "percentage": 2.35, "elapsed_time": "0:04:09", "remaining_time": "2:53:03", "throughput": 23211.71, "total_tokens": 5798848}
|
|
{"current_steps": 1840, "total_steps": 78105, "loss": 0.4797, "lr": 1.1771860197157855e-06, "epoch": 0.11779015427949555, "percentage": 2.36, "elapsed_time": "0:04:10", "remaining_time": "2:53:02", "throughput": 23211.18, "total_tokens": 5813952}
|
|
{"current_steps": 1845, "total_steps": 78105, "loss": 0.427, "lr": 1.180386634233773e-06, "epoch": 0.11811023622047244, "percentage": 2.36, "elapsed_time": "0:04:11", "remaining_time": "2:53:01", "throughput": 23211.55, "total_tokens": 5830016}
|
|
{"current_steps": 1850, "total_steps": 78105, "loss": 0.5708, "lr": 1.1835872487517603e-06, "epoch": 0.11843031816144933, "percentage": 2.37, "elapsed_time": "0:04:11", "remaining_time": "2:53:00", "throughput": 23210.77, "total_tokens": 5845312}
|
|
{"current_steps": 1855, "total_steps": 78105, "loss": 0.6263, "lr": 1.186787863269748e-06, "epoch": 0.11875040010242623, "percentage": 2.38, "elapsed_time": "0:04:12", "remaining_time": "2:52:59", "throughput": 23210.29, "total_tokens": 5860672}
|
|
{"current_steps": 1860, "total_steps": 78105, "loss": 0.5924, "lr": 1.1899884777877354e-06, "epoch": 0.1190704820434031, "percentage": 2.38, "elapsed_time": "0:04:13", "remaining_time": "2:52:57", "throughput": 23209.29, "total_tokens": 5875776}
|
|
{"current_steps": 1865, "total_steps": 78105, "loss": 0.3965, "lr": 1.1931890923057229e-06, "epoch": 0.11939056398438, "percentage": 2.39, "elapsed_time": "0:04:13", "remaining_time": "2:52:56", "throughput": 23209.29, "total_tokens": 5891200}
|
|
{"current_steps": 1870, "total_steps": 78105, "loss": 0.4818, "lr": 1.1963897068237103e-06, "epoch": 0.11971064592535689, "percentage": 2.39, "elapsed_time": "0:04:14", "remaining_time": "2:52:55", "throughput": 23209.2, "total_tokens": 5907008}
|
|
{"current_steps": 1875, "total_steps": 78105, "loss": 0.4941, "lr": 1.1995903213416978e-06, "epoch": 0.12003072786633379, "percentage": 2.4, "elapsed_time": "0:04:15", "remaining_time": "2:52:55", "throughput": 23209.33, "total_tokens": 5923264}
|
|
{"current_steps": 1880, "total_steps": 78105, "loss": 0.5121, "lr": 1.202790935859685e-06, "epoch": 0.12035080980731067, "percentage": 2.41, "elapsed_time": "0:04:15", "remaining_time": "2:52:55", "throughput": 23210.3, "total_tokens": 5939648}
|
|
{"current_steps": 1885, "total_steps": 78105, "loss": 0.4495, "lr": 1.2059915503776725e-06, "epoch": 0.12067089174828756, "percentage": 2.41, "elapsed_time": "0:04:16", "remaining_time": "2:52:54", "throughput": 23211.28, "total_tokens": 5955520}
|
|
{"current_steps": 1890, "total_steps": 78105, "loss": 0.5622, "lr": 1.20919216489566e-06, "epoch": 0.12099097368926445, "percentage": 2.42, "elapsed_time": "0:04:17", "remaining_time": "2:52:53", "throughput": 23211.04, "total_tokens": 5971072}
|
|
{"current_steps": 1895, "total_steps": 78105, "loss": 0.605, "lr": 1.2123927794136475e-06, "epoch": 0.12131105563024135, "percentage": 2.43, "elapsed_time": "0:04:17", "remaining_time": "2:52:53", "throughput": 23211.07, "total_tokens": 5987264}
|
|
{"current_steps": 1900, "total_steps": 78105, "loss": 0.634, "lr": 1.215593393931635e-06, "epoch": 0.12163113757121823, "percentage": 2.43, "elapsed_time": "0:04:18", "remaining_time": "2:52:52", "throughput": 23210.89, "total_tokens": 6002880}
|
|
{"current_steps": 1905, "total_steps": 78105, "loss": 0.5494, "lr": 1.2187940084496224e-06, "epoch": 0.12195121951219512, "percentage": 2.44, "elapsed_time": "0:04:19", "remaining_time": "2:52:53", "throughput": 23212.26, "total_tokens": 6019520}
|
|
{"current_steps": 1910, "total_steps": 78105, "loss": 0.6188, "lr": 1.2219946229676099e-06, "epoch": 0.12227130145317201, "percentage": 2.45, "elapsed_time": "0:04:20", "remaining_time": "2:52:52", "throughput": 23213.01, "total_tokens": 6035904}
|
|
{"current_steps": 1915, "total_steps": 78105, "loss": 0.607, "lr": 1.2251952374855973e-06, "epoch": 0.1225913833941489, "percentage": 2.45, "elapsed_time": "0:04:20", "remaining_time": "2:52:51", "throughput": 23211.99, "total_tokens": 6050816}
|
|
{"current_steps": 1920, "total_steps": 78105, "loss": 0.4801, "lr": 1.2283958520035848e-06, "epoch": 0.12291146533512579, "percentage": 2.46, "elapsed_time": "0:04:21", "remaining_time": "2:52:49", "throughput": 23211.49, "total_tokens": 6065920}
|
|
{"current_steps": 1925, "total_steps": 78105, "loss": 0.6155, "lr": 1.2315964665215723e-06, "epoch": 0.12323154727610268, "percentage": 2.46, "elapsed_time": "0:04:22", "remaining_time": "2:52:49", "throughput": 23211.3, "total_tokens": 6081728}
|
|
{"current_steps": 1930, "total_steps": 78105, "loss": 0.5382, "lr": 1.2347970810395597e-06, "epoch": 0.12355162921707957, "percentage": 2.47, "elapsed_time": "0:04:22", "remaining_time": "2:52:47", "throughput": 23211.92, "total_tokens": 6097472}
|
|
{"current_steps": 1935, "total_steps": 78105, "loss": 0.5551, "lr": 1.2379976955575472e-06, "epoch": 0.12387171115805647, "percentage": 2.48, "elapsed_time": "0:04:23", "remaining_time": "2:52:47", "throughput": 23211.67, "total_tokens": 6113152}
|
|
{"current_steps": 1940, "total_steps": 78105, "loss": 0.6146, "lr": 1.2411983100755345e-06, "epoch": 0.12419179309903335, "percentage": 2.48, "elapsed_time": "0:04:24", "remaining_time": "2:52:46", "throughput": 23212.77, "total_tokens": 6129408}
|
|
{"current_steps": 1945, "total_steps": 78105, "loss": 0.5587, "lr": 1.244398924593522e-06, "epoch": 0.12451187504001024, "percentage": 2.49, "elapsed_time": "0:04:24", "remaining_time": "2:52:45", "throughput": 23213.04, "total_tokens": 6144896}
|
|
{"current_steps": 1950, "total_steps": 78105, "loss": 0.6047, "lr": 1.2475995391115094e-06, "epoch": 0.12483195698098713, "percentage": 2.5, "elapsed_time": "0:04:25", "remaining_time": "2:52:44", "throughput": 23212.56, "total_tokens": 6160256}
|
|
{"current_steps": 1955, "total_steps": 78105, "loss": 0.583, "lr": 1.250800153629497e-06, "epoch": 0.12515203892196403, "percentage": 2.5, "elapsed_time": "0:04:26", "remaining_time": "2:52:42", "throughput": 23211.93, "total_tokens": 6175104}
|
|
{"current_steps": 1960, "total_steps": 78105, "loss": 0.5475, "lr": 1.2540007681474845e-06, "epoch": 0.1254721208629409, "percentage": 2.51, "elapsed_time": "0:04:26", "remaining_time": "2:52:40", "throughput": 23211.09, "total_tokens": 6190272}
|
|
{"current_steps": 1965, "total_steps": 78105, "loss": 0.5469, "lr": 1.257201382665472e-06, "epoch": 0.1257922028039178, "percentage": 2.52, "elapsed_time": "0:04:27", "remaining_time": "2:52:40", "throughput": 23210.72, "total_tokens": 6205952}
|
|
{"current_steps": 1970, "total_steps": 78105, "loss": 0.5827, "lr": 1.2604019971834595e-06, "epoch": 0.1261122847448947, "percentage": 2.52, "elapsed_time": "0:04:28", "remaining_time": "2:52:40", "throughput": 23212.04, "total_tokens": 6222592}
|
|
{"current_steps": 1975, "total_steps": 78105, "loss": 0.4421, "lr": 1.263602611701447e-06, "epoch": 0.12643236668587157, "percentage": 2.53, "elapsed_time": "0:04:28", "remaining_time": "2:52:38", "throughput": 23211.62, "total_tokens": 6237696}
|
|
{"current_steps": 1980, "total_steps": 78105, "loss": 0.4855, "lr": 1.2668032262194344e-06, "epoch": 0.12675244862684848, "percentage": 2.54, "elapsed_time": "0:04:29", "remaining_time": "2:52:38", "throughput": 23212.74, "total_tokens": 6254080}
|
|
{"current_steps": 1985, "total_steps": 78105, "loss": 0.5988, "lr": 1.2700038407374215e-06, "epoch": 0.12707253056782536, "percentage": 2.54, "elapsed_time": "0:04:30", "remaining_time": "2:52:38", "throughput": 23212.71, "total_tokens": 6270016}
|
|
{"current_steps": 1990, "total_steps": 78105, "loss": 0.5707, "lr": 1.2732044552554091e-06, "epoch": 0.12739261250880227, "percentage": 2.55, "elapsed_time": "0:04:30", "remaining_time": "2:52:36", "throughput": 23212.62, "total_tokens": 6285184}
|
|
{"current_steps": 1995, "total_steps": 78105, "loss": 0.5036, "lr": 1.2764050697733966e-06, "epoch": 0.12771269444977915, "percentage": 2.55, "elapsed_time": "0:04:31", "remaining_time": "2:52:35", "throughput": 23212.35, "total_tokens": 6300992}
|
|
{"current_steps": 2000, "total_steps": 78105, "loss": 0.5292, "lr": 1.279605684291384e-06, "epoch": 0.12803277639075603, "percentage": 2.56, "elapsed_time": "0:04:32", "remaining_time": "2:52:33", "throughput": 23210.59, "total_tokens": 6315392}
|
|
{"current_steps": 2005, "total_steps": 78105, "loss": 0.4883, "lr": 1.2828062988093715e-06, "epoch": 0.12835285833173293, "percentage": 2.57, "elapsed_time": "0:04:32", "remaining_time": "2:52:33", "throughput": 23209.68, "total_tokens": 6331136}
|
|
{"current_steps": 2010, "total_steps": 78105, "loss": 0.5797, "lr": 1.286006913327359e-06, "epoch": 0.1286729402727098, "percentage": 2.57, "elapsed_time": "0:04:33", "remaining_time": "2:52:33", "throughput": 23210.18, "total_tokens": 6347584}
|
|
{"current_steps": 2015, "total_steps": 78105, "loss": 0.5378, "lr": 1.2892075278453465e-06, "epoch": 0.1289930222136867, "percentage": 2.58, "elapsed_time": "0:04:34", "remaining_time": "2:52:31", "throughput": 23209.61, "total_tokens": 6362560}
|
|
{"current_steps": 2020, "total_steps": 78105, "loss": 0.4951, "lr": 1.292408142363334e-06, "epoch": 0.1293131041546636, "percentage": 2.59, "elapsed_time": "0:04:34", "remaining_time": "2:52:30", "throughput": 23210.11, "total_tokens": 6378304}
|
|
{"current_steps": 2025, "total_steps": 78105, "loss": 0.5996, "lr": 1.2956087568813214e-06, "epoch": 0.12963318609564048, "percentage": 2.59, "elapsed_time": "0:04:35", "remaining_time": "2:52:29", "throughput": 23209.7, "total_tokens": 6393280}
|
|
{"current_steps": 2030, "total_steps": 78105, "loss": 0.5688, "lr": 1.2988093713993089e-06, "epoch": 0.1299532680366174, "percentage": 2.6, "elapsed_time": "0:04:36", "remaining_time": "2:52:28", "throughput": 23210.16, "total_tokens": 6409472}
|
|
{"current_steps": 2035, "total_steps": 78105, "loss": 0.7725, "lr": 1.3020099859172963e-06, "epoch": 0.13027334997759427, "percentage": 2.61, "elapsed_time": "0:04:36", "remaining_time": "2:52:27", "throughput": 23210.04, "total_tokens": 6424960}
|
|
{"current_steps": 2040, "total_steps": 78105, "loss": 0.5561, "lr": 1.3052106004352838e-06, "epoch": 0.13059343191857115, "percentage": 2.61, "elapsed_time": "0:04:37", "remaining_time": "2:52:26", "throughput": 23209.39, "total_tokens": 6440000}
|
|
{"current_steps": 2045, "total_steps": 78105, "loss": 0.4751, "lr": 1.308411214953271e-06, "epoch": 0.13091351385954805, "percentage": 2.62, "elapsed_time": "0:04:38", "remaining_time": "2:52:27", "throughput": 23211.56, "total_tokens": 6457600}
|
|
{"current_steps": 2050, "total_steps": 78105, "loss": 0.523, "lr": 1.3116118294712585e-06, "epoch": 0.13123359580052493, "percentage": 2.62, "elapsed_time": "0:04:38", "remaining_time": "2:52:26", "throughput": 23211.47, "total_tokens": 6473088}
|
|
{"current_steps": 2055, "total_steps": 78105, "loss": 0.6071, "lr": 1.314812443989246e-06, "epoch": 0.1315536777415018, "percentage": 2.63, "elapsed_time": "0:04:39", "remaining_time": "2:52:24", "throughput": 23210.11, "total_tokens": 6487680}
|
|
{"current_steps": 2060, "total_steps": 78105, "loss": 0.4731, "lr": 1.3180130585072335e-06, "epoch": 0.13187375968247872, "percentage": 2.64, "elapsed_time": "0:04:40", "remaining_time": "2:52:23", "throughput": 23210.11, "total_tokens": 6503296}
|
|
{"current_steps": 2065, "total_steps": 78105, "loss": 0.5632, "lr": 1.321213673025221e-06, "epoch": 0.1321938416234556, "percentage": 2.64, "elapsed_time": "0:04:40", "remaining_time": "2:52:22", "throughput": 23210.2, "total_tokens": 6518912}
|
|
{"current_steps": 2070, "total_steps": 78105, "loss": 0.5936, "lr": 1.3244142875432084e-06, "epoch": 0.1325139235644325, "percentage": 2.65, "elapsed_time": "0:04:41", "remaining_time": "2:52:21", "throughput": 23210.35, "total_tokens": 6534784}
|
|
{"current_steps": 2075, "total_steps": 78105, "loss": 0.5515, "lr": 1.3276149020611959e-06, "epoch": 0.13283400550540939, "percentage": 2.66, "elapsed_time": "0:04:42", "remaining_time": "2:52:20", "throughput": 23210.41, "total_tokens": 6550528}
|
|
{"current_steps": 2080, "total_steps": 78105, "loss": 0.5215, "lr": 1.3308155165791833e-06, "epoch": 0.13315408744638627, "percentage": 2.66, "elapsed_time": "0:04:42", "remaining_time": "2:52:20", "throughput": 23210.29, "total_tokens": 6566144}
|
|
{"current_steps": 2085, "total_steps": 78105, "loss": 0.5965, "lr": 1.3340161310971708e-06, "epoch": 0.13347416938736317, "percentage": 2.67, "elapsed_time": "0:04:43", "remaining_time": "2:52:21", "throughput": 23212.19, "total_tokens": 6583872}
|
|
{"current_steps": 2090, "total_steps": 78105, "loss": 0.4697, "lr": 1.3372167456151583e-06, "epoch": 0.13379425132834005, "percentage": 2.68, "elapsed_time": "0:04:44", "remaining_time": "2:52:23", "throughput": 23215.05, "total_tokens": 6602304}
|
|
{"current_steps": 2095, "total_steps": 78105, "loss": 0.4969, "lr": 1.3404173601331457e-06, "epoch": 0.13411433326931693, "percentage": 2.68, "elapsed_time": "0:04:45", "remaining_time": "2:52:23", "throughput": 23215.26, "total_tokens": 6618112}
|
|
{"current_steps": 2100, "total_steps": 78105, "loss": 0.6301, "lr": 1.3436179746511332e-06, "epoch": 0.13443441521029384, "percentage": 2.69, "elapsed_time": "0:04:45", "remaining_time": "2:52:24", "throughput": 23217.3, "total_tokens": 6635584}
|
|
{"current_steps": 2105, "total_steps": 78105, "loss": 0.5273, "lr": 1.3468185891691205e-06, "epoch": 0.13475449715127072, "percentage": 2.7, "elapsed_time": "0:04:46", "remaining_time": "2:52:25", "throughput": 23219.37, "total_tokens": 6653568}
|
|
{"current_steps": 2110, "total_steps": 78105, "loss": 0.4814, "lr": 1.350019203687108e-06, "epoch": 0.13507457909224763, "percentage": 2.7, "elapsed_time": "0:04:47", "remaining_time": "2:52:24", "throughput": 23218.67, "total_tokens": 6668864}
|
|
{"current_steps": 2115, "total_steps": 78105, "loss": 0.5259, "lr": 1.3532198182050954e-06, "epoch": 0.1353946610332245, "percentage": 2.71, "elapsed_time": "0:04:47", "remaining_time": "2:52:23", "throughput": 23218.36, "total_tokens": 6684416}
|
|
{"current_steps": 2120, "total_steps": 78105, "loss": 0.598, "lr": 1.3564204327230829e-06, "epoch": 0.13571474297420139, "percentage": 2.71, "elapsed_time": "0:04:48", "remaining_time": "2:52:22", "throughput": 23217.41, "total_tokens": 6699392}
|
|
{"current_steps": 2125, "total_steps": 78105, "loss": 0.5651, "lr": 1.3596210472410703e-06, "epoch": 0.1360348249151783, "percentage": 2.72, "elapsed_time": "0:04:49", "remaining_time": "2:52:20", "throughput": 23216.44, "total_tokens": 6714560}
|
|
{"current_steps": 2130, "total_steps": 78105, "loss": 0.5548, "lr": 1.3628216617590578e-06, "epoch": 0.13635490685615517, "percentage": 2.73, "elapsed_time": "0:04:49", "remaining_time": "2:52:19", "throughput": 23216.63, "total_tokens": 6729920}
|
|
{"current_steps": 2135, "total_steps": 78105, "loss": 0.5734, "lr": 1.3660222762770453e-06, "epoch": 0.13667498879713205, "percentage": 2.73, "elapsed_time": "0:04:50", "remaining_time": "2:52:17", "throughput": 23215.69, "total_tokens": 6744768}
|
|
{"current_steps": 2140, "total_steps": 78105, "loss": 0.4626, "lr": 1.3692228907950327e-06, "epoch": 0.13699507073810896, "percentage": 2.74, "elapsed_time": "0:04:51", "remaining_time": "2:52:16", "throughput": 23214.76, "total_tokens": 6759680}
|
|
{"current_steps": 2145, "total_steps": 78105, "loss": 0.5224, "lr": 1.3724235053130202e-06, "epoch": 0.13731515267908584, "percentage": 2.75, "elapsed_time": "0:04:51", "remaining_time": "2:52:14", "throughput": 23214.94, "total_tokens": 6775168}
|
|
{"current_steps": 2150, "total_steps": 78105, "loss": 0.5441, "lr": 1.3756241198310077e-06, "epoch": 0.13763523462006275, "percentage": 2.75, "elapsed_time": "0:04:52", "remaining_time": "2:52:14", "throughput": 23215.48, "total_tokens": 6791296}
|
|
{"current_steps": 2155, "total_steps": 78105, "loss": 0.446, "lr": 1.3788247343489951e-06, "epoch": 0.13795531656103963, "percentage": 2.76, "elapsed_time": "0:04:53", "remaining_time": "2:52:15", "throughput": 23217.28, "total_tokens": 6808640}
|
|
{"current_steps": 2160, "total_steps": 78105, "loss": 0.6584, "lr": 1.3820253488669826e-06, "epoch": 0.1382753985020165, "percentage": 2.77, "elapsed_time": "0:04:53", "remaining_time": "2:52:14", "throughput": 23217.21, "total_tokens": 6824064}
|
|
{"current_steps": 2165, "total_steps": 78105, "loss": 0.4944, "lr": 1.3852259633849698e-06, "epoch": 0.1385954804429934, "percentage": 2.77, "elapsed_time": "0:04:54", "remaining_time": "2:52:13", "throughput": 23218.02, "total_tokens": 6839872}
|
|
{"current_steps": 2170, "total_steps": 78105, "loss": 0.4855, "lr": 1.3884265779029573e-06, "epoch": 0.1389155623839703, "percentage": 2.78, "elapsed_time": "0:04:55", "remaining_time": "2:52:12", "throughput": 23217.71, "total_tokens": 6855744}
|
|
{"current_steps": 2175, "total_steps": 78105, "loss": 0.5269, "lr": 1.3916271924209448e-06, "epoch": 0.1392356443249472, "percentage": 2.78, "elapsed_time": "0:04:55", "remaining_time": "2:52:10", "throughput": 23215.83, "total_tokens": 6869632}
|
|
{"current_steps": 2180, "total_steps": 78105, "loss": 0.4592, "lr": 1.3948278069389322e-06, "epoch": 0.13955572626592408, "percentage": 2.79, "elapsed_time": "0:04:56", "remaining_time": "2:52:09", "throughput": 23215.67, "total_tokens": 6885696}
|
|
{"current_steps": 2185, "total_steps": 78105, "loss": 0.4235, "lr": 1.3980284214569197e-06, "epoch": 0.13987580820690096, "percentage": 2.8, "elapsed_time": "0:04:57", "remaining_time": "2:52:09", "throughput": 23215.9, "total_tokens": 6901504}
|
|
{"current_steps": 2190, "total_steps": 78105, "loss": 0.6028, "lr": 1.4012290359749072e-06, "epoch": 0.14019589014787787, "percentage": 2.8, "elapsed_time": "0:04:57", "remaining_time": "2:52:07", "throughput": 23215.61, "total_tokens": 6916480}
|
|
{"current_steps": 2195, "total_steps": 78105, "loss": 0.4976, "lr": 1.4044296504928949e-06, "epoch": 0.14051597208885475, "percentage": 2.81, "elapsed_time": "0:04:58", "remaining_time": "2:52:06", "throughput": 23215.62, "total_tokens": 6932416}
|
|
{"current_steps": 2200, "total_steps": 78105, "loss": 0.5292, "lr": 1.4076302650108823e-06, "epoch": 0.14083605402983163, "percentage": 2.82, "elapsed_time": "0:04:59", "remaining_time": "2:52:06", "throughput": 23216.71, "total_tokens": 6948992}
|
|
{"current_steps": 2205, "total_steps": 78105, "loss": 0.4567, "lr": 1.4108308795288698e-06, "epoch": 0.14115613597080853, "percentage": 2.82, "elapsed_time": "0:04:59", "remaining_time": "2:52:05", "throughput": 23216.6, "total_tokens": 6964416}
|
|
{"current_steps": 2210, "total_steps": 78105, "loss": 0.4673, "lr": 1.4140314940468573e-06, "epoch": 0.1414762179117854, "percentage": 2.83, "elapsed_time": "0:05:00", "remaining_time": "2:52:05", "throughput": 23217.18, "total_tokens": 6980544}
|
|
{"current_steps": 2215, "total_steps": 78105, "loss": 0.5736, "lr": 1.4172321085648447e-06, "epoch": 0.14179629985276232, "percentage": 2.84, "elapsed_time": "0:05:01", "remaining_time": "2:52:05", "throughput": 23218.16, "total_tokens": 6997440}
|
|
{"current_steps": 2220, "total_steps": 78105, "loss": 0.4089, "lr": 1.4204327230828322e-06, "epoch": 0.1421163817937392, "percentage": 2.84, "elapsed_time": "0:05:02", "remaining_time": "2:52:04", "throughput": 23217.47, "total_tokens": 7012672}
|
|
{"current_steps": 2225, "total_steps": 78105, "loss": 0.4398, "lr": 1.4236333376008192e-06, "epoch": 0.14243646373471608, "percentage": 2.85, "elapsed_time": "0:05:02", "remaining_time": "2:52:03", "throughput": 23216.24, "total_tokens": 7027648}
|
|
{"current_steps": 2230, "total_steps": 78105, "loss": 0.5415, "lr": 1.426833952118807e-06, "epoch": 0.142756545675693, "percentage": 2.86, "elapsed_time": "0:05:03", "remaining_time": "2:52:02", "throughput": 23216.0, "total_tokens": 7043200}
|
|
{"current_steps": 2235, "total_steps": 78105, "loss": 0.7296, "lr": 1.4300345666367944e-06, "epoch": 0.14307662761666987, "percentage": 2.86, "elapsed_time": "0:05:04", "remaining_time": "2:52:01", "throughput": 23215.44, "total_tokens": 7058688}
|
|
{"current_steps": 2240, "total_steps": 78105, "loss": 0.589, "lr": 1.4332351811547819e-06, "epoch": 0.14339670955764675, "percentage": 2.87, "elapsed_time": "0:05:04", "remaining_time": "2:52:00", "throughput": 23214.98, "total_tokens": 7074048}
|
|
{"current_steps": 2245, "total_steps": 78105, "loss": 0.5317, "lr": 1.4364357956727693e-06, "epoch": 0.14371679149862365, "percentage": 2.87, "elapsed_time": "0:05:05", "remaining_time": "2:51:59", "throughput": 23215.1, "total_tokens": 7089792}
|
|
{"current_steps": 2250, "total_steps": 78105, "loss": 0.4828, "lr": 1.4396364101907568e-06, "epoch": 0.14403687343960053, "percentage": 2.88, "elapsed_time": "0:05:06", "remaining_time": "2:51:58", "throughput": 23213.72, "total_tokens": 7104640}
|
|
{"current_steps": 2255, "total_steps": 78105, "loss": 0.5703, "lr": 1.4428370247087443e-06, "epoch": 0.14435695538057744, "percentage": 2.89, "elapsed_time": "0:05:06", "remaining_time": "2:51:57", "throughput": 23213.3, "total_tokens": 7120064}
|
|
{"current_steps": 2260, "total_steps": 78105, "loss": 0.4994, "lr": 1.4460376392267317e-06, "epoch": 0.14467703732155432, "percentage": 2.89, "elapsed_time": "0:05:07", "remaining_time": "2:51:55", "throughput": 23212.56, "total_tokens": 7135232}
|
|
{"current_steps": 2265, "total_steps": 78105, "loss": 0.5625, "lr": 1.4492382537447192e-06, "epoch": 0.1449971192625312, "percentage": 2.9, "elapsed_time": "0:05:08", "remaining_time": "2:51:53", "throughput": 23211.21, "total_tokens": 7149632}
|
|
{"current_steps": 2270, "total_steps": 78105, "loss": 0.421, "lr": 1.4524388682627067e-06, "epoch": 0.1453172012035081, "percentage": 2.91, "elapsed_time": "0:05:08", "remaining_time": "2:51:52", "throughput": 23209.98, "total_tokens": 7164352}
|
|
{"current_steps": 2275, "total_steps": 78105, "loss": 0.5397, "lr": 1.4556394827806941e-06, "epoch": 0.14563728314448499, "percentage": 2.91, "elapsed_time": "0:05:09", "remaining_time": "2:51:51", "throughput": 23209.22, "total_tokens": 7179776}
|
|
{"current_steps": 2280, "total_steps": 78105, "loss": 0.4446, "lr": 1.4588400972986816e-06, "epoch": 0.14595736508546187, "percentage": 2.92, "elapsed_time": "0:05:09", "remaining_time": "2:51:49", "throughput": 23207.64, "total_tokens": 7194240}
|
|
{"current_steps": 2285, "total_steps": 78105, "loss": 0.4157, "lr": 1.4620407118166688e-06, "epoch": 0.14627744702643877, "percentage": 2.93, "elapsed_time": "0:05:10", "remaining_time": "2:51:48", "throughput": 23207.26, "total_tokens": 7209472}
|
|
{"current_steps": 2290, "total_steps": 78105, "loss": 0.4357, "lr": 1.4652413263346563e-06, "epoch": 0.14659752896741565, "percentage": 2.93, "elapsed_time": "0:05:11", "remaining_time": "2:51:47", "throughput": 23207.33, "total_tokens": 7225088}
|
|
{"current_steps": 2295, "total_steps": 78105, "loss": 0.6335, "lr": 1.4684419408526438e-06, "epoch": 0.14691761090839256, "percentage": 2.94, "elapsed_time": "0:05:11", "remaining_time": "2:51:46", "throughput": 23207.45, "total_tokens": 7240704}
|
|
{"current_steps": 2300, "total_steps": 78105, "loss": 0.4996, "lr": 1.4716425553706312e-06, "epoch": 0.14723769284936944, "percentage": 2.94, "elapsed_time": "0:05:12", "remaining_time": "2:51:44", "throughput": 23208.03, "total_tokens": 7256000}
|
|
{"current_steps": 2305, "total_steps": 78105, "loss": 0.5397, "lr": 1.4748431698886187e-06, "epoch": 0.14755777479034632, "percentage": 2.95, "elapsed_time": "0:05:13", "remaining_time": "2:51:42", "throughput": 23206.68, "total_tokens": 7270336}
|
|
{"current_steps": 2310, "total_steps": 78105, "loss": 0.5644, "lr": 1.4780437844066062e-06, "epoch": 0.14787785673132323, "percentage": 2.96, "elapsed_time": "0:05:13", "remaining_time": "2:51:40", "throughput": 23206.44, "total_tokens": 7285440}
|
|
{"current_steps": 2315, "total_steps": 78105, "loss": 0.4965, "lr": 1.4812443989245936e-06, "epoch": 0.1481979386723001, "percentage": 2.96, "elapsed_time": "0:05:14", "remaining_time": "2:51:40", "throughput": 23206.05, "total_tokens": 7301120}
|
|
{"current_steps": 2320, "total_steps": 78105, "loss": 0.514, "lr": 1.4844450134425811e-06, "epoch": 0.14851802061327699, "percentage": 2.97, "elapsed_time": "0:05:15", "remaining_time": "2:51:39", "throughput": 23205.25, "total_tokens": 7316416}
|
|
{"current_steps": 2325, "total_steps": 78105, "loss": 0.5168, "lr": 1.4876456279605686e-06, "epoch": 0.1488381025542539, "percentage": 2.98, "elapsed_time": "0:05:15", "remaining_time": "2:51:38", "throughput": 23205.56, "total_tokens": 7332160}
|
|
{"current_steps": 2330, "total_steps": 78105, "loss": 0.6872, "lr": 1.490846242478556e-06, "epoch": 0.14915818449523077, "percentage": 2.98, "elapsed_time": "0:05:16", "remaining_time": "2:51:37", "throughput": 23205.29, "total_tokens": 7347776}
|
|
{"current_steps": 2335, "total_steps": 78105, "loss": 0.5837, "lr": 1.4940468569965435e-06, "epoch": 0.14947826643620768, "percentage": 2.99, "elapsed_time": "0:05:17", "remaining_time": "2:51:36", "throughput": 23204.91, "total_tokens": 7363200}
|
|
{"current_steps": 2340, "total_steps": 78105, "loss": 0.4732, "lr": 1.497247471514531e-06, "epoch": 0.14979834837718456, "percentage": 3.0, "elapsed_time": "0:05:17", "remaining_time": "2:51:35", "throughput": 23205.08, "total_tokens": 7378752}
|
|
{"current_steps": 2345, "total_steps": 78105, "loss": 0.4904, "lr": 1.5004480860325182e-06, "epoch": 0.15011843031816144, "percentage": 3.0, "elapsed_time": "0:05:18", "remaining_time": "2:51:34", "throughput": 23204.73, "total_tokens": 7393984}
|
|
{"current_steps": 2350, "total_steps": 78105, "loss": 0.5569, "lr": 1.5036487005505057e-06, "epoch": 0.15043851225913835, "percentage": 3.01, "elapsed_time": "0:05:19", "remaining_time": "2:51:32", "throughput": 23204.15, "total_tokens": 7408832}
|
|
{"current_steps": 2355, "total_steps": 78105, "loss": 0.4751, "lr": 1.5068493150684932e-06, "epoch": 0.15075859420011523, "percentage": 3.02, "elapsed_time": "0:05:19", "remaining_time": "2:51:30", "throughput": 23202.61, "total_tokens": 7423040}
|
|
{"current_steps": 2360, "total_steps": 78105, "loss": 0.5269, "lr": 1.5100499295864806e-06, "epoch": 0.15107867614109213, "percentage": 3.02, "elapsed_time": "0:05:20", "remaining_time": "2:51:29", "throughput": 23202.17, "total_tokens": 7438464}
|
|
{"current_steps": 2365, "total_steps": 78105, "loss": 0.5282, "lr": 1.5132505441044681e-06, "epoch": 0.151398758082069, "percentage": 3.03, "elapsed_time": "0:05:21", "remaining_time": "2:51:28", "throughput": 23201.7, "total_tokens": 7453824}
|
|
{"current_steps": 2370, "total_steps": 78105, "loss": 0.6004, "lr": 1.5164511586224556e-06, "epoch": 0.1517188400230459, "percentage": 3.03, "elapsed_time": "0:05:21", "remaining_time": "2:51:27", "throughput": 23201.32, "total_tokens": 7469312}
|
|
{"current_steps": 2375, "total_steps": 78105, "loss": 0.6537, "lr": 1.519651773140443e-06, "epoch": 0.1520389219640228, "percentage": 3.04, "elapsed_time": "0:05:22", "remaining_time": "2:51:26", "throughput": 23201.52, "total_tokens": 7485120}
|
|
{"current_steps": 2380, "total_steps": 78105, "loss": 0.5134, "lr": 1.5228523876584305e-06, "epoch": 0.15235900390499968, "percentage": 3.05, "elapsed_time": "0:05:23", "remaining_time": "2:51:25", "throughput": 23201.34, "total_tokens": 7500416}
|
|
{"current_steps": 2385, "total_steps": 78105, "loss": 0.4294, "lr": 1.526053002176418e-06, "epoch": 0.15267908584597656, "percentage": 3.05, "elapsed_time": "0:05:23", "remaining_time": "2:51:25", "throughput": 23202.0, "total_tokens": 7516672}
|
|
{"current_steps": 2390, "total_steps": 78105, "loss": 0.5174, "lr": 1.5292536166944054e-06, "epoch": 0.15299916778695347, "percentage": 3.06, "elapsed_time": "0:05:24", "remaining_time": "2:51:24", "throughput": 23202.59, "total_tokens": 7532800}
|
|
{"current_steps": 2395, "total_steps": 78105, "loss": 0.5293, "lr": 1.532454231212393e-06, "epoch": 0.15331924972793035, "percentage": 3.07, "elapsed_time": "0:05:25", "remaining_time": "2:51:23", "throughput": 23201.91, "total_tokens": 7547776}
|
|
{"current_steps": 2400, "total_steps": 78105, "loss": 0.6115, "lr": 1.5356548457303804e-06, "epoch": 0.15363933166890725, "percentage": 3.07, "elapsed_time": "0:05:25", "remaining_time": "2:51:21", "throughput": 23200.98, "total_tokens": 7562496}
|
|
{"current_steps": 2405, "total_steps": 78105, "loss": 0.5652, "lr": 1.5388554602483676e-06, "epoch": 0.15395941360988413, "percentage": 3.08, "elapsed_time": "0:05:26", "remaining_time": "2:51:20", "throughput": 23200.41, "total_tokens": 7577920}
|
|
{"current_steps": 2410, "total_steps": 78105, "loss": 0.5677, "lr": 1.542056074766355e-06, "epoch": 0.154279495550861, "percentage": 3.09, "elapsed_time": "0:05:27", "remaining_time": "2:51:19", "throughput": 23200.33, "total_tokens": 7593216}
|
|
{"current_steps": 2415, "total_steps": 78105, "loss": 0.5918, "lr": 1.5452566892843426e-06, "epoch": 0.15459957749183792, "percentage": 3.09, "elapsed_time": "0:05:27", "remaining_time": "2:51:18", "throughput": 23199.35, "total_tokens": 7608192}
|
|
{"current_steps": 2420, "total_steps": 78105, "loss": 0.4747, "lr": 1.54845730380233e-06, "epoch": 0.1549196594328148, "percentage": 3.1, "elapsed_time": "0:05:28", "remaining_time": "2:51:18", "throughput": 23199.99, "total_tokens": 7624448}
|
|
{"current_steps": 2425, "total_steps": 78105, "loss": 0.4965, "lr": 1.5516579183203175e-06, "epoch": 0.15523974137379168, "percentage": 3.1, "elapsed_time": "0:05:29", "remaining_time": "2:51:17", "throughput": 23200.06, "total_tokens": 7640192}
|
|
{"current_steps": 2430, "total_steps": 78105, "loss": 0.5224, "lr": 1.554858532838305e-06, "epoch": 0.15555982331476859, "percentage": 3.11, "elapsed_time": "0:05:29", "remaining_time": "2:51:15", "throughput": 23199.53, "total_tokens": 7654848}
|
|
{"current_steps": 2435, "total_steps": 78105, "loss": 0.6756, "lr": 1.5580591473562926e-06, "epoch": 0.15587990525574547, "percentage": 3.12, "elapsed_time": "0:05:30", "remaining_time": "2:51:15", "throughput": 23199.75, "total_tokens": 7670848}
|
|
{"current_steps": 2440, "total_steps": 78105, "loss": 0.6278, "lr": 1.5612597618742801e-06, "epoch": 0.15619998719672237, "percentage": 3.12, "elapsed_time": "0:05:31", "remaining_time": "2:51:13", "throughput": 23199.46, "total_tokens": 7686016}
|
|
{"current_steps": 2445, "total_steps": 78105, "loss": 0.549, "lr": 1.5644603763922676e-06, "epoch": 0.15652006913769925, "percentage": 3.13, "elapsed_time": "0:05:31", "remaining_time": "2:51:12", "throughput": 23199.79, "total_tokens": 7701760}
|
|
{"current_steps": 2450, "total_steps": 78105, "loss": 0.5704, "lr": 1.567660990910255e-06, "epoch": 0.15684015107867613, "percentage": 3.14, "elapsed_time": "0:05:32", "remaining_time": "2:51:12", "throughput": 23199.95, "total_tokens": 7717760}
|
|
{"current_steps": 2455, "total_steps": 78105, "loss": 0.3862, "lr": 1.5708616054282425e-06, "epoch": 0.15716023301965304, "percentage": 3.14, "elapsed_time": "0:05:33", "remaining_time": "2:51:11", "throughput": 23199.65, "total_tokens": 7733376}
|
|
{"current_steps": 2460, "total_steps": 78105, "loss": 0.5703, "lr": 1.57406221994623e-06, "epoch": 0.15748031496062992, "percentage": 3.15, "elapsed_time": "0:05:34", "remaining_time": "2:51:12", "throughput": 23200.93, "total_tokens": 7750464}
|
|
{"current_steps": 2465, "total_steps": 78105, "loss": 0.5254, "lr": 1.577262834464217e-06, "epoch": 0.1578003969016068, "percentage": 3.16, "elapsed_time": "0:05:34", "remaining_time": "2:51:13", "throughput": 23203.58, "total_tokens": 7768576}
|
|
{"current_steps": 2470, "total_steps": 78105, "loss": 0.4806, "lr": 1.5804634489822047e-06, "epoch": 0.1581204788425837, "percentage": 3.16, "elapsed_time": "0:05:35", "remaining_time": "2:51:12", "throughput": 23203.92, "total_tokens": 7784256}
|
|
{"current_steps": 2475, "total_steps": 78105, "loss": 0.3638, "lr": 1.5836640635001922e-06, "epoch": 0.15844056078356059, "percentage": 3.17, "elapsed_time": "0:05:36", "remaining_time": "2:51:11", "throughput": 23203.47, "total_tokens": 7799232}
|
|
{"current_steps": 2480, "total_steps": 78105, "loss": 0.6353, "lr": 1.5868646780181796e-06, "epoch": 0.1587606427245375, "percentage": 3.18, "elapsed_time": "0:05:36", "remaining_time": "2:51:10", "throughput": 23203.87, "total_tokens": 7815040}
|
|
{"current_steps": 2485, "total_steps": 78105, "loss": 0.4745, "lr": 1.5900652925361671e-06, "epoch": 0.15908072466551437, "percentage": 3.18, "elapsed_time": "0:05:37", "remaining_time": "2:51:09", "throughput": 23203.67, "total_tokens": 7830400}
|
|
{"current_steps": 2490, "total_steps": 78105, "loss": 0.5098, "lr": 1.5932659070541546e-06, "epoch": 0.15940080660649125, "percentage": 3.19, "elapsed_time": "0:05:38", "remaining_time": "2:51:08", "throughput": 23203.63, "total_tokens": 7846336}
|
|
{"current_steps": 2495, "total_steps": 78105, "loss": 0.6127, "lr": 1.596466521572142e-06, "epoch": 0.15972088854746816, "percentage": 3.19, "elapsed_time": "0:05:38", "remaining_time": "2:51:08", "throughput": 23204.52, "total_tokens": 7862528}
|
|
{"current_steps": 2500, "total_steps": 78105, "loss": 0.4496, "lr": 1.5996671360901295e-06, "epoch": 0.16004097048844504, "percentage": 3.2, "elapsed_time": "0:05:39", "remaining_time": "2:51:09", "throughput": 23205.78, "total_tokens": 7880000}
|
|
{"current_steps": 2505, "total_steps": 78105, "loss": 0.4487, "lr": 1.602867750608117e-06, "epoch": 0.16036105242942192, "percentage": 3.21, "elapsed_time": "0:05:40", "remaining_time": "2:51:08", "throughput": 23205.77, "total_tokens": 7895296}
|
|
{"current_steps": 2510, "total_steps": 78105, "loss": 0.636, "lr": 1.6060683651261044e-06, "epoch": 0.16068113437039883, "percentage": 3.21, "elapsed_time": "0:05:40", "remaining_time": "2:51:07", "throughput": 23205.79, "total_tokens": 7911104}
|
|
{"current_steps": 2515, "total_steps": 78105, "loss": 0.5421, "lr": 1.609268979644092e-06, "epoch": 0.1610012163113757, "percentage": 3.22, "elapsed_time": "0:05:41", "remaining_time": "2:51:05", "throughput": 23205.32, "total_tokens": 7925952}
|
|
{"current_steps": 2520, "total_steps": 78105, "loss": 0.6206, "lr": 1.6124695941620794e-06, "epoch": 0.1613212982523526, "percentage": 3.23, "elapsed_time": "0:05:42", "remaining_time": "2:51:04", "throughput": 23204.45, "total_tokens": 7940928}
|
|
{"current_steps": 2525, "total_steps": 78105, "loss": 0.5694, "lr": 1.6156702086800666e-06, "epoch": 0.1616413801933295, "percentage": 3.23, "elapsed_time": "0:05:42", "remaining_time": "2:51:02", "throughput": 23203.12, "total_tokens": 7955200}
|
|
{"current_steps": 2530, "total_steps": 78105, "loss": 0.6508, "lr": 1.618870823198054e-06, "epoch": 0.16196146213430637, "percentage": 3.24, "elapsed_time": "0:05:43", "remaining_time": "2:51:01", "throughput": 23202.87, "total_tokens": 7970944}
|
|
{"current_steps": 2535, "total_steps": 78105, "loss": 0.4962, "lr": 1.6220714377160416e-06, "epoch": 0.16228154407528328, "percentage": 3.25, "elapsed_time": "0:05:44", "remaining_time": "2:51:00", "throughput": 23202.42, "total_tokens": 7985856}
|
|
{"current_steps": 2540, "total_steps": 78105, "loss": 0.4368, "lr": 1.625272052234029e-06, "epoch": 0.16260162601626016, "percentage": 3.25, "elapsed_time": "0:05:44", "remaining_time": "2:50:59", "throughput": 23201.46, "total_tokens": 8001088}
|
|
{"current_steps": 2545, "total_steps": 78105, "loss": 0.5544, "lr": 1.6284726667520165e-06, "epoch": 0.16292170795723707, "percentage": 3.26, "elapsed_time": "0:05:45", "remaining_time": "2:50:59", "throughput": 23202.06, "total_tokens": 8017856}
|
|
{"current_steps": 2550, "total_steps": 78105, "loss": 0.5051, "lr": 1.631673281270004e-06, "epoch": 0.16324178989821395, "percentage": 3.26, "elapsed_time": "0:05:46", "remaining_time": "2:50:59", "throughput": 23202.42, "total_tokens": 8033792}
|
|
{"current_steps": 2555, "total_steps": 78105, "loss": 0.5028, "lr": 1.6348738957879914e-06, "epoch": 0.16356187183919083, "percentage": 3.27, "elapsed_time": "0:05:46", "remaining_time": "2:50:57", "throughput": 23200.95, "total_tokens": 8048256}
|
|
{"current_steps": 2560, "total_steps": 78105, "loss": 0.5597, "lr": 1.638074510305979e-06, "epoch": 0.16388195378016773, "percentage": 3.28, "elapsed_time": "0:05:47", "remaining_time": "2:50:56", "throughput": 23200.9, "total_tokens": 8063680}
|
|
{"current_steps": 2565, "total_steps": 78105, "loss": 0.4872, "lr": 1.6412751248239664e-06, "epoch": 0.1642020357211446, "percentage": 3.28, "elapsed_time": "0:05:48", "remaining_time": "2:50:55", "throughput": 23201.27, "total_tokens": 8079744}
|
|
{"current_steps": 2570, "total_steps": 78105, "loss": 0.5346, "lr": 1.6444757393419538e-06, "epoch": 0.1645221176621215, "percentage": 3.29, "elapsed_time": "0:05:48", "remaining_time": "2:50:54", "throughput": 23200.86, "total_tokens": 8094656}
|
|
{"current_steps": 2575, "total_steps": 78105, "loss": 0.5457, "lr": 1.6476763538599413e-06, "epoch": 0.1648421996030984, "percentage": 3.3, "elapsed_time": "0:05:49", "remaining_time": "2:50:53", "throughput": 23201.12, "total_tokens": 8110528}
|
|
{"current_steps": 2580, "total_steps": 78105, "loss": 0.5589, "lr": 1.6508769683779288e-06, "epoch": 0.16516228154407528, "percentage": 3.3, "elapsed_time": "0:05:50", "remaining_time": "2:50:53", "throughput": 23201.23, "total_tokens": 8126720}
|
|
{"current_steps": 2585, "total_steps": 78105, "loss": 0.5366, "lr": 1.654077582895916e-06, "epoch": 0.1654823634850522, "percentage": 3.31, "elapsed_time": "0:05:50", "remaining_time": "2:50:51", "throughput": 23200.33, "total_tokens": 8141312}
|
|
{"current_steps": 2590, "total_steps": 78105, "loss": 0.5181, "lr": 1.6572781974139035e-06, "epoch": 0.16580244542602907, "percentage": 3.32, "elapsed_time": "0:05:51", "remaining_time": "2:50:50", "throughput": 23199.87, "total_tokens": 8156736}
|
|
{"current_steps": 2595, "total_steps": 78105, "loss": 0.5483, "lr": 1.660478811931891e-06, "epoch": 0.16612252736700595, "percentage": 3.32, "elapsed_time": "0:05:52", "remaining_time": "2:50:50", "throughput": 23200.18, "total_tokens": 8172480}
|
|
{"current_steps": 2600, "total_steps": 78105, "loss": 0.6026, "lr": 1.6636794264498784e-06, "epoch": 0.16644260930798285, "percentage": 3.33, "elapsed_time": "0:05:52", "remaining_time": "2:50:49", "throughput": 23200.18, "total_tokens": 8188224}
|
|
{"current_steps": 2605, "total_steps": 78105, "loss": 0.4673, "lr": 1.666880040967866e-06, "epoch": 0.16676269124895973, "percentage": 3.34, "elapsed_time": "0:05:53", "remaining_time": "2:50:48", "throughput": 23200.92, "total_tokens": 8204352}
|
|
{"current_steps": 2610, "total_steps": 78105, "loss": 0.6196, "lr": 1.6700806554858534e-06, "epoch": 0.1670827731899366, "percentage": 3.34, "elapsed_time": "0:05:54", "remaining_time": "2:50:47", "throughput": 23200.65, "total_tokens": 8219328}
|
|
{"current_steps": 2615, "total_steps": 78105, "loss": 0.4395, "lr": 1.6732812700038408e-06, "epoch": 0.16740285513091352, "percentage": 3.35, "elapsed_time": "0:05:54", "remaining_time": "2:50:46", "throughput": 23200.05, "total_tokens": 8234560}
|
|
{"current_steps": 2620, "total_steps": 78105, "loss": 0.6626, "lr": 1.6764818845218283e-06, "epoch": 0.1677229370718904, "percentage": 3.35, "elapsed_time": "0:05:55", "remaining_time": "2:50:44", "throughput": 23199.17, "total_tokens": 8249344}
|
|
{"current_steps": 2625, "total_steps": 78105, "loss": 0.5272, "lr": 1.6796824990398158e-06, "epoch": 0.1680430190128673, "percentage": 3.36, "elapsed_time": "0:05:56", "remaining_time": "2:50:44", "throughput": 23199.33, "total_tokens": 8265600}
|
|
{"current_steps": 2630, "total_steps": 78105, "loss": 0.4894, "lr": 1.6828831135578032e-06, "epoch": 0.16836310095384419, "percentage": 3.37, "elapsed_time": "0:05:56", "remaining_time": "2:50:43", "throughput": 23198.66, "total_tokens": 8280384}
|
|
{"current_steps": 2635, "total_steps": 78105, "loss": 0.5581, "lr": 1.6860837280757907e-06, "epoch": 0.16868318289482107, "percentage": 3.37, "elapsed_time": "0:05:57", "remaining_time": "2:50:42", "throughput": 23197.98, "total_tokens": 8295744}
|
|
{"current_steps": 2640, "total_steps": 78105, "loss": 0.5212, "lr": 1.6892843425937782e-06, "epoch": 0.16900326483579797, "percentage": 3.38, "elapsed_time": "0:05:58", "remaining_time": "2:50:41", "throughput": 23197.94, "total_tokens": 8311680}
|
|
{"current_steps": 2645, "total_steps": 78105, "loss": 0.5057, "lr": 1.6924849571117654e-06, "epoch": 0.16932334677677485, "percentage": 3.39, "elapsed_time": "0:05:58", "remaining_time": "2:50:40", "throughput": 23197.4, "total_tokens": 8326592}
|
|
{"current_steps": 2650, "total_steps": 78105, "loss": 0.5038, "lr": 1.6956855716297529e-06, "epoch": 0.16964342871775173, "percentage": 3.39, "elapsed_time": "0:05:59", "remaining_time": "2:50:39", "throughput": 23196.88, "total_tokens": 8341632}
|
|
{"current_steps": 2655, "total_steps": 78105, "loss": 0.4613, "lr": 1.6988861861477404e-06, "epoch": 0.16996351065872864, "percentage": 3.4, "elapsed_time": "0:06:00", "remaining_time": "2:50:39", "throughput": 23197.32, "total_tokens": 8358400}
|
|
{"current_steps": 2660, "total_steps": 78105, "loss": 0.4923, "lr": 1.7020868006657278e-06, "epoch": 0.17028359259970552, "percentage": 3.41, "elapsed_time": "0:06:00", "remaining_time": "2:50:38", "throughput": 23197.26, "total_tokens": 8374144}
|
|
{"current_steps": 2665, "total_steps": 78105, "loss": 0.4093, "lr": 1.7052874151837153e-06, "epoch": 0.17060367454068243, "percentage": 3.41, "elapsed_time": "0:06:01", "remaining_time": "2:50:37", "throughput": 23196.78, "total_tokens": 8389440}
|
|
{"current_steps": 2670, "total_steps": 78105, "loss": 0.5168, "lr": 1.7084880297017028e-06, "epoch": 0.1709237564816593, "percentage": 3.42, "elapsed_time": "0:06:02", "remaining_time": "2:50:36", "throughput": 23196.52, "total_tokens": 8404672}
|
|
{"current_steps": 2675, "total_steps": 78105, "loss": 0.5451, "lr": 1.7116886442196904e-06, "epoch": 0.17124383842263619, "percentage": 3.42, "elapsed_time": "0:06:02", "remaining_time": "2:50:35", "throughput": 23196.18, "total_tokens": 8420096}
|
|
{"current_steps": 2680, "total_steps": 78105, "loss": 0.6514, "lr": 1.714889258737678e-06, "epoch": 0.1715639203636131, "percentage": 3.43, "elapsed_time": "0:06:03", "remaining_time": "2:50:35", "throughput": 23196.43, "total_tokens": 8436288}
|
|
{"current_steps": 2685, "total_steps": 78105, "loss": 0.5755, "lr": 1.7180898732556654e-06, "epoch": 0.17188400230458997, "percentage": 3.44, "elapsed_time": "0:06:04", "remaining_time": "2:50:35", "throughput": 23196.71, "total_tokens": 8452672}
|
|
{"current_steps": 2690, "total_steps": 78105, "loss": 0.4582, "lr": 1.7212904877736528e-06, "epoch": 0.17220408424556685, "percentage": 3.44, "elapsed_time": "0:06:05", "remaining_time": "2:50:35", "throughput": 23196.88, "total_tokens": 8469120}
|
|
{"current_steps": 2695, "total_steps": 78105, "loss": 0.4967, "lr": 1.7244911022916403e-06, "epoch": 0.17252416618654376, "percentage": 3.45, "elapsed_time": "0:06:05", "remaining_time": "2:50:34", "throughput": 23196.52, "total_tokens": 8484736}
|
|
{"current_steps": 2700, "total_steps": 78105, "loss": 0.738, "lr": 1.7276917168096278e-06, "epoch": 0.17284424812752064, "percentage": 3.46, "elapsed_time": "0:06:06", "remaining_time": "2:50:34", "throughput": 23196.74, "total_tokens": 8500480}
|
|
{"current_steps": 2705, "total_steps": 78105, "loss": 0.3984, "lr": 1.7308923313276148e-06, "epoch": 0.17316433006849755, "percentage": 3.46, "elapsed_time": "0:06:07", "remaining_time": "2:50:32", "throughput": 23195.43, "total_tokens": 8514624}
|
|
{"current_steps": 2710, "total_steps": 78105, "loss": 0.4841, "lr": 1.7340929458456025e-06, "epoch": 0.17348441200947443, "percentage": 3.47, "elapsed_time": "0:06:07", "remaining_time": "2:50:31", "throughput": 23194.49, "total_tokens": 8529664}
|
|
{"current_steps": 2715, "total_steps": 78105, "loss": 0.6277, "lr": 1.73729356036359e-06, "epoch": 0.1738044939504513, "percentage": 3.48, "elapsed_time": "0:06:08", "remaining_time": "2:50:30", "throughput": 23194.82, "total_tokens": 8545280}
|
|
{"current_steps": 2720, "total_steps": 78105, "loss": 0.5041, "lr": 1.7404941748815774e-06, "epoch": 0.1741245758914282, "percentage": 3.48, "elapsed_time": "0:06:09", "remaining_time": "2:50:28", "throughput": 23194.67, "total_tokens": 8560384}
|
|
{"current_steps": 2725, "total_steps": 78105, "loss": 0.582, "lr": 1.743694789399565e-06, "epoch": 0.1744446578324051, "percentage": 3.49, "elapsed_time": "0:06:09", "remaining_time": "2:50:28", "throughput": 23195.62, "total_tokens": 8576960}
|
|
{"current_steps": 2730, "total_steps": 78105, "loss": 0.4479, "lr": 1.7468954039175524e-06, "epoch": 0.174764739773382, "percentage": 3.5, "elapsed_time": "0:06:10", "remaining_time": "2:50:27", "throughput": 23195.1, "total_tokens": 8592384}
|
|
{"current_steps": 2735, "total_steps": 78105, "loss": 0.5252, "lr": 1.7500960184355398e-06, "epoch": 0.17508482171435888, "percentage": 3.5, "elapsed_time": "0:06:11", "remaining_time": "2:50:27", "throughput": 23195.87, "total_tokens": 8609024}
|
|
{"current_steps": 2740, "total_steps": 78105, "loss": 0.4628, "lr": 1.7532966329535273e-06, "epoch": 0.17540490365533576, "percentage": 3.51, "elapsed_time": "0:06:11", "remaining_time": "2:50:27", "throughput": 23196.16, "total_tokens": 8624768}
|
|
{"current_steps": 2745, "total_steps": 78105, "loss": 0.5124, "lr": 1.7564972474715148e-06, "epoch": 0.17572498559631267, "percentage": 3.51, "elapsed_time": "0:06:12", "remaining_time": "2:50:26", "throughput": 23195.89, "total_tokens": 8640448}
|
|
{"current_steps": 2750, "total_steps": 78105, "loss": 0.5235, "lr": 1.7596978619895022e-06, "epoch": 0.17604506753728955, "percentage": 3.52, "elapsed_time": "0:06:13", "remaining_time": "2:50:24", "throughput": 23195.44, "total_tokens": 8655296}
|
|
{"current_steps": 2755, "total_steps": 78105, "loss": 0.514, "lr": 1.7628984765074897e-06, "epoch": 0.17636514947826643, "percentage": 3.53, "elapsed_time": "0:06:13", "remaining_time": "2:50:24", "throughput": 23195.93, "total_tokens": 8671872}
|
|
{"current_steps": 2760, "total_steps": 78105, "loss": 0.5037, "lr": 1.7660990910254772e-06, "epoch": 0.17668523141924333, "percentage": 3.53, "elapsed_time": "0:06:14", "remaining_time": "2:50:24", "throughput": 23196.55, "total_tokens": 8687680}
|
|
{"current_steps": 2765, "total_steps": 78105, "loss": 0.6377, "lr": 1.7692997055434644e-06, "epoch": 0.1770053133602202, "percentage": 3.54, "elapsed_time": "0:06:15", "remaining_time": "2:50:23", "throughput": 23195.96, "total_tokens": 8702912}
|
|
{"current_steps": 2770, "total_steps": 78105, "loss": 0.4919, "lr": 1.7725003200614519e-06, "epoch": 0.17732539530119712, "percentage": 3.55, "elapsed_time": "0:06:15", "remaining_time": "2:50:21", "throughput": 23195.93, "total_tokens": 8718144}
|
|
{"current_steps": 2775, "total_steps": 78105, "loss": 0.5202, "lr": 1.7757009345794394e-06, "epoch": 0.177645477242174, "percentage": 3.55, "elapsed_time": "0:06:16", "remaining_time": "2:50:20", "throughput": 23195.7, "total_tokens": 8732992}
|
|
{"current_steps": 2780, "total_steps": 78105, "loss": 0.5555, "lr": 1.7789015490974268e-06, "epoch": 0.17796555918315088, "percentage": 3.56, "elapsed_time": "0:06:17", "remaining_time": "2:50:19", "throughput": 23195.97, "total_tokens": 8749056}
|
|
{"current_steps": 2785, "total_steps": 78105, "loss": 0.5075, "lr": 1.7821021636154143e-06, "epoch": 0.17828564112412779, "percentage": 3.57, "elapsed_time": "0:06:17", "remaining_time": "2:50:19", "throughput": 23196.31, "total_tokens": 8765184}
|
|
{"current_steps": 2790, "total_steps": 78105, "loss": 0.4381, "lr": 1.7853027781334018e-06, "epoch": 0.17860572306510467, "percentage": 3.57, "elapsed_time": "0:06:18", "remaining_time": "2:50:18", "throughput": 23196.22, "total_tokens": 8780480}
|
|
{"current_steps": 2795, "total_steps": 78105, "loss": 0.5762, "lr": 1.7885033926513892e-06, "epoch": 0.17892580500608155, "percentage": 3.58, "elapsed_time": "0:06:19", "remaining_time": "2:50:17", "throughput": 23196.52, "total_tokens": 8796352}
|
|
{"current_steps": 2800, "total_steps": 78105, "loss": 0.4723, "lr": 1.7917040071693767e-06, "epoch": 0.17924588694705845, "percentage": 3.58, "elapsed_time": "0:06:19", "remaining_time": "2:50:15", "throughput": 23195.22, "total_tokens": 8810688}
|
|
{"current_steps": 2805, "total_steps": 78105, "loss": 0.4801, "lr": 1.7949046216873642e-06, "epoch": 0.17956596888803533, "percentage": 3.59, "elapsed_time": "0:06:20", "remaining_time": "2:50:15", "throughput": 23195.17, "total_tokens": 8826304}
|
|
{"current_steps": 2810, "total_steps": 78105, "loss": 0.4333, "lr": 1.7981052362053516e-06, "epoch": 0.17988605082901224, "percentage": 3.6, "elapsed_time": "0:06:21", "remaining_time": "2:50:13", "throughput": 23194.53, "total_tokens": 8841344}
|
|
{"current_steps": 2815, "total_steps": 78105, "loss": 0.5636, "lr": 1.801305850723339e-06, "epoch": 0.18020613276998912, "percentage": 3.6, "elapsed_time": "0:06:21", "remaining_time": "2:50:13", "throughput": 23195.45, "total_tokens": 8857664}
|
|
{"current_steps": 2820, "total_steps": 78105, "loss": 0.597, "lr": 1.8045064652413266e-06, "epoch": 0.180526214710966, "percentage": 3.61, "elapsed_time": "0:06:22", "remaining_time": "2:50:12", "throughput": 23195.39, "total_tokens": 8873408}
|
|
{"current_steps": 2825, "total_steps": 78105, "loss": 0.4652, "lr": 1.8077070797593138e-06, "epoch": 0.1808462966519429, "percentage": 3.62, "elapsed_time": "0:06:23", "remaining_time": "2:50:12", "throughput": 23195.94, "total_tokens": 8889472}
|
|
{"current_steps": 2830, "total_steps": 78105, "loss": 0.6546, "lr": 1.8109076942773013e-06, "epoch": 0.18116637859291979, "percentage": 3.62, "elapsed_time": "0:06:23", "remaining_time": "2:50:11", "throughput": 23195.6, "total_tokens": 8904640}
|
|
{"current_steps": 2835, "total_steps": 78105, "loss": 0.4452, "lr": 1.8141083087952887e-06, "epoch": 0.18148646053389667, "percentage": 3.63, "elapsed_time": "0:06:24", "remaining_time": "2:50:09", "throughput": 23195.29, "total_tokens": 8919680}
|
|
{"current_steps": 2840, "total_steps": 78105, "loss": 0.4601, "lr": 1.8173089233132762e-06, "epoch": 0.18180654247487357, "percentage": 3.64, "elapsed_time": "0:06:25", "remaining_time": "2:50:09", "throughput": 23195.42, "total_tokens": 8935360}
|
|
{"current_steps": 2845, "total_steps": 78105, "loss": 0.4969, "lr": 1.8205095378312637e-06, "epoch": 0.18212662441585045, "percentage": 3.64, "elapsed_time": "0:06:25", "remaining_time": "2:50:08", "throughput": 23196.12, "total_tokens": 8951552}
|
|
{"current_steps": 2850, "total_steps": 78105, "loss": 0.6108, "lr": 1.8237101523492512e-06, "epoch": 0.18244670635682736, "percentage": 3.65, "elapsed_time": "0:06:26", "remaining_time": "2:50:08", "throughput": 23197.05, "total_tokens": 8968576}
|
|
{"current_steps": 2855, "total_steps": 78105, "loss": 0.4499, "lr": 1.8269107668672386e-06, "epoch": 0.18276678829780424, "percentage": 3.66, "elapsed_time": "0:06:27", "remaining_time": "2:50:07", "throughput": 23196.6, "total_tokens": 8983744}
|
|
{"current_steps": 2860, "total_steps": 78105, "loss": 0.4559, "lr": 1.830111381385226e-06, "epoch": 0.18308687023878112, "percentage": 3.66, "elapsed_time": "0:06:27", "remaining_time": "2:50:07", "throughput": 23195.51, "total_tokens": 8999040}
|
|
{"current_steps": 2865, "total_steps": 78105, "loss": 0.4791, "lr": 1.8333119959032136e-06, "epoch": 0.18340695217975803, "percentage": 3.67, "elapsed_time": "0:06:28", "remaining_time": "2:50:07", "throughput": 23196.52, "total_tokens": 9015872}
|
|
{"current_steps": 2870, "total_steps": 78105, "loss": 0.4226, "lr": 1.836512610421201e-06, "epoch": 0.1837270341207349, "percentage": 3.67, "elapsed_time": "0:06:29", "remaining_time": "2:50:06", "throughput": 23196.89, "total_tokens": 9031744}
|
|
{"current_steps": 2875, "total_steps": 78105, "loss": 0.6299, "lr": 1.8397132249391885e-06, "epoch": 0.18404711606171179, "percentage": 3.68, "elapsed_time": "0:06:30", "remaining_time": "2:50:06", "throughput": 23196.94, "total_tokens": 9047872}
|
|
{"current_steps": 2880, "total_steps": 78105, "loss": 0.5186, "lr": 1.842913839457176e-06, "epoch": 0.1843671980026887, "percentage": 3.69, "elapsed_time": "0:06:30", "remaining_time": "2:50:05", "throughput": 23196.44, "total_tokens": 9062976}
|
|
{"current_steps": 2885, "total_steps": 78105, "loss": 0.6009, "lr": 1.8461144539751632e-06, "epoch": 0.18468727994366557, "percentage": 3.69, "elapsed_time": "0:06:31", "remaining_time": "2:50:03", "throughput": 23196.39, "total_tokens": 9078208}
|
|
{"current_steps": 2890, "total_steps": 78105, "loss": 0.442, "lr": 1.8493150684931507e-06, "epoch": 0.18500736188464248, "percentage": 3.7, "elapsed_time": "0:06:32", "remaining_time": "2:50:04", "throughput": 23196.64, "total_tokens": 9094720}
|
|
{"current_steps": 2895, "total_steps": 78105, "loss": 0.5871, "lr": 1.8525156830111381e-06, "epoch": 0.18532744382561936, "percentage": 3.71, "elapsed_time": "0:06:32", "remaining_time": "2:50:03", "throughput": 23196.7, "total_tokens": 9110336}
|
|
{"current_steps": 2900, "total_steps": 78105, "loss": 0.5919, "lr": 1.8557162975291256e-06, "epoch": 0.18564752576659624, "percentage": 3.71, "elapsed_time": "0:06:33", "remaining_time": "2:50:03", "throughput": 23197.34, "total_tokens": 9126912}
|
|
{"current_steps": 2905, "total_steps": 78105, "loss": 0.5381, "lr": 1.858916912047113e-06, "epoch": 0.18596760770757315, "percentage": 3.72, "elapsed_time": "0:06:34", "remaining_time": "2:50:02", "throughput": 23197.44, "total_tokens": 9142400}
|
|
{"current_steps": 2910, "total_steps": 78105, "loss": 0.5612, "lr": 1.8621175265651005e-06, "epoch": 0.18628768964855003, "percentage": 3.73, "elapsed_time": "0:06:34", "remaining_time": "2:50:00", "throughput": 23196.74, "total_tokens": 9157184}
|
|
{"current_steps": 2915, "total_steps": 78105, "loss": 0.5459, "lr": 1.8653181410830882e-06, "epoch": 0.1866077715895269, "percentage": 3.73, "elapsed_time": "0:06:35", "remaining_time": "2:50:00", "throughput": 23197.13, "total_tokens": 9173696}
|
|
{"current_steps": 2920, "total_steps": 78105, "loss": 0.5249, "lr": 1.8685187556010757e-06, "epoch": 0.1869278535305038, "percentage": 3.74, "elapsed_time": "0:06:36", "remaining_time": "2:50:00", "throughput": 23197.44, "total_tokens": 9190080}
|
|
{"current_steps": 2925, "total_steps": 78105, "loss": 0.6124, "lr": 1.8717193701190632e-06, "epoch": 0.1872479354714807, "percentage": 3.74, "elapsed_time": "0:06:36", "remaining_time": "2:49:58", "throughput": 23196.1, "total_tokens": 9204352}
|
|
{"current_steps": 2930, "total_steps": 78105, "loss": 0.5798, "lr": 1.8749199846370506e-06, "epoch": 0.1875680174124576, "percentage": 3.75, "elapsed_time": "0:06:37", "remaining_time": "2:49:57", "throughput": 23195.61, "total_tokens": 9219200}
|
|
{"current_steps": 2935, "total_steps": 78105, "loss": 0.4787, "lr": 1.878120599155038e-06, "epoch": 0.18788809935343448, "percentage": 3.76, "elapsed_time": "0:06:38", "remaining_time": "2:49:55", "throughput": 23194.77, "total_tokens": 9233600}
|
|
{"current_steps": 2940, "total_steps": 78105, "loss": 0.4465, "lr": 1.8813212136730256e-06, "epoch": 0.18820818129441136, "percentage": 3.76, "elapsed_time": "0:06:38", "remaining_time": "2:49:55", "throughput": 23195.17, "total_tokens": 9249536}
|
|
{"current_steps": 2945, "total_steps": 78105, "loss": 0.6955, "lr": 1.8845218281910126e-06, "epoch": 0.18852826323538827, "percentage": 3.77, "elapsed_time": "0:06:39", "remaining_time": "2:49:53", "throughput": 23194.57, "total_tokens": 9264256}
|
|
{"current_steps": 2950, "total_steps": 78105, "loss": 0.4204, "lr": 1.8877224427090003e-06, "epoch": 0.18884834517636515, "percentage": 3.78, "elapsed_time": "0:06:40", "remaining_time": "2:49:52", "throughput": 23194.25, "total_tokens": 9279488}
|
|
{"current_steps": 2955, "total_steps": 78105, "loss": 0.5249, "lr": 1.8909230572269877e-06, "epoch": 0.18916842711734205, "percentage": 3.78, "elapsed_time": "0:06:40", "remaining_time": "2:49:51", "throughput": 23193.65, "total_tokens": 9294336}
|
|
{"current_steps": 2960, "total_steps": 78105, "loss": 0.5779, "lr": 1.8941236717449752e-06, "epoch": 0.18948850905831893, "percentage": 3.79, "elapsed_time": "0:06:41", "remaining_time": "2:49:49", "throughput": 23193.38, "total_tokens": 9309376}
|
|
{"current_steps": 2965, "total_steps": 78105, "loss": 0.4775, "lr": 1.8973242862629627e-06, "epoch": 0.1898085909992958, "percentage": 3.8, "elapsed_time": "0:06:42", "remaining_time": "2:49:48", "throughput": 23192.68, "total_tokens": 9324224}
|
|
{"current_steps": 2970, "total_steps": 78105, "loss": 0.4359, "lr": 1.9005249007809502e-06, "epoch": 0.19012867294027272, "percentage": 3.8, "elapsed_time": "0:06:42", "remaining_time": "2:49:47", "throughput": 23192.17, "total_tokens": 9339136}
|
|
{"current_steps": 2975, "total_steps": 78105, "loss": 0.4773, "lr": 1.9037255152989376e-06, "epoch": 0.1904487548812496, "percentage": 3.81, "elapsed_time": "0:06:43", "remaining_time": "2:49:46", "throughput": 23192.09, "total_tokens": 9354688}
|
|
{"current_steps": 2980, "total_steps": 78105, "loss": 0.5187, "lr": 1.906926129816925e-06, "epoch": 0.19076883682222648, "percentage": 3.82, "elapsed_time": "0:06:44", "remaining_time": "2:49:45", "throughput": 23191.4, "total_tokens": 9369664}
|
|
{"current_steps": 2985, "total_steps": 78105, "loss": 0.6318, "lr": 1.9101267443349123e-06, "epoch": 0.19108891876320339, "percentage": 3.82, "elapsed_time": "0:06:44", "remaining_time": "2:49:43", "throughput": 23190.94, "total_tokens": 9384768}
|
|
{"current_steps": 2990, "total_steps": 78105, "loss": 0.4131, "lr": 1.9133273588529e-06, "epoch": 0.19140900070418027, "percentage": 3.83, "elapsed_time": "0:06:45", "remaining_time": "2:49:43", "throughput": 23190.97, "total_tokens": 9400320}
|
|
{"current_steps": 2995, "total_steps": 78105, "loss": 0.4738, "lr": 1.9165279733708873e-06, "epoch": 0.19172908264515717, "percentage": 3.83, "elapsed_time": "0:06:46", "remaining_time": "2:49:42", "throughput": 23190.47, "total_tokens": 9415872}
|
|
{"current_steps": 3000, "total_steps": 78105, "loss": 0.5976, "lr": 1.9197285878888747e-06, "epoch": 0.19204916458613405, "percentage": 3.84, "elapsed_time": "0:06:46", "remaining_time": "2:49:42", "throughput": 23191.45, "total_tokens": 9432704}
|
|
{"current_steps": 3005, "total_steps": 78105, "loss": 0.493, "lr": 1.922929202406862e-06, "epoch": 0.19236924652711093, "percentage": 3.85, "elapsed_time": "0:06:47", "remaining_time": "2:49:41", "throughput": 23191.07, "total_tokens": 9448192}
|
|
{"current_steps": 3010, "total_steps": 78105, "loss": 0.4355, "lr": 1.9261298169248497e-06, "epoch": 0.19268932846808784, "percentage": 3.85, "elapsed_time": "0:06:48", "remaining_time": "2:49:41", "throughput": 23191.55, "total_tokens": 9464576}
|
|
{"current_steps": 3015, "total_steps": 78105, "loss": 0.5541, "lr": 1.929330431442837e-06, "epoch": 0.19300941040906472, "percentage": 3.86, "elapsed_time": "0:06:48", "remaining_time": "2:49:40", "throughput": 23191.27, "total_tokens": 9479936}
|
|
{"current_steps": 3020, "total_steps": 78105, "loss": 0.584, "lr": 1.9325310459608246e-06, "epoch": 0.1933294923500416, "percentage": 3.87, "elapsed_time": "0:06:49", "remaining_time": "2:49:41", "throughput": 23192.52, "total_tokens": 9497280}
|
|
{"current_steps": 3025, "total_steps": 78105, "loss": 0.4925, "lr": 1.935731660478812e-06, "epoch": 0.1936495742910185, "percentage": 3.87, "elapsed_time": "0:06:50", "remaining_time": "2:49:41", "throughput": 23194.08, "total_tokens": 9514496}
|
|
{"current_steps": 3030, "total_steps": 78105, "loss": 0.4291, "lr": 1.9389322749967995e-06, "epoch": 0.19396965623199539, "percentage": 3.88, "elapsed_time": "0:06:50", "remaining_time": "2:49:40", "throughput": 23193.72, "total_tokens": 9529536}
|
|
{"current_steps": 3035, "total_steps": 78105, "loss": 0.5895, "lr": 1.942132889514787e-06, "epoch": 0.1942897381729723, "percentage": 3.89, "elapsed_time": "0:06:51", "remaining_time": "2:49:39", "throughput": 23194.09, "total_tokens": 9545472}
|
|
{"current_steps": 3040, "total_steps": 78105, "loss": 0.6192, "lr": 1.9453335040327745e-06, "epoch": 0.19460982011394917, "percentage": 3.89, "elapsed_time": "0:06:52", "remaining_time": "2:49:38", "throughput": 23193.83, "total_tokens": 9560960}
|
|
{"current_steps": 3045, "total_steps": 78105, "loss": 0.5843, "lr": 1.948534118550762e-06, "epoch": 0.19492990205492605, "percentage": 3.9, "elapsed_time": "0:06:52", "remaining_time": "2:49:37", "throughput": 23193.23, "total_tokens": 9576320}
|
|
{"current_steps": 3050, "total_steps": 78105, "loss": 0.5828, "lr": 1.9517347330687494e-06, "epoch": 0.19524998399590296, "percentage": 3.9, "elapsed_time": "0:06:53", "remaining_time": "2:49:37", "throughput": 23192.86, "total_tokens": 9592192}
|
|
{"current_steps": 3055, "total_steps": 78105, "loss": 0.6384, "lr": 1.954935347586737e-06, "epoch": 0.19557006593687984, "percentage": 3.91, "elapsed_time": "0:06:54", "remaining_time": "2:49:37", "throughput": 23193.06, "total_tokens": 9608128}
|
|
{"current_steps": 3060, "total_steps": 78105, "loss": 0.5492, "lr": 1.9581359621047243e-06, "epoch": 0.19589014787785672, "percentage": 3.92, "elapsed_time": "0:06:54", "remaining_time": "2:49:36", "throughput": 23193.34, "total_tokens": 9623872}
|
|
{"current_steps": 3065, "total_steps": 78105, "loss": 0.5739, "lr": 1.9613365766227114e-06, "epoch": 0.19621022981883363, "percentage": 3.92, "elapsed_time": "0:06:55", "remaining_time": "2:49:35", "throughput": 23192.91, "total_tokens": 9639488}
|
|
{"current_steps": 3070, "total_steps": 78105, "loss": 0.514, "lr": 1.9645371911406993e-06, "epoch": 0.1965303117598105, "percentage": 3.93, "elapsed_time": "0:06:56", "remaining_time": "2:49:35", "throughput": 23192.99, "total_tokens": 9655680}
|
|
{"current_steps": 3075, "total_steps": 78105, "loss": 0.5939, "lr": 1.9677378056586867e-06, "epoch": 0.1968503937007874, "percentage": 3.94, "elapsed_time": "0:06:56", "remaining_time": "2:49:33", "throughput": 23192.45, "total_tokens": 9670400}
|
|
{"current_steps": 3080, "total_steps": 78105, "loss": 0.4051, "lr": 1.9709384201766742e-06, "epoch": 0.1971704756417643, "percentage": 3.94, "elapsed_time": "0:06:57", "remaining_time": "2:49:32", "throughput": 23191.84, "total_tokens": 9685504}
|
|
{"current_steps": 3085, "total_steps": 78105, "loss": 0.6816, "lr": 1.9741390346946617e-06, "epoch": 0.19749055758274117, "percentage": 3.95, "elapsed_time": "0:06:58", "remaining_time": "2:49:32", "throughput": 23192.25, "total_tokens": 9701952}
|
|
{"current_steps": 3090, "total_steps": 78105, "loss": 0.4583, "lr": 1.977339649212649e-06, "epoch": 0.19781063952371808, "percentage": 3.96, "elapsed_time": "0:06:58", "remaining_time": "2:49:31", "throughput": 23191.85, "total_tokens": 9716928}
|
|
{"current_steps": 3095, "total_steps": 78105, "loss": 0.6711, "lr": 1.9805402637306366e-06, "epoch": 0.19813072146469496, "percentage": 3.96, "elapsed_time": "0:06:59", "remaining_time": "2:49:30", "throughput": 23191.42, "total_tokens": 9732096}
|
|
{"current_steps": 3100, "total_steps": 78105, "loss": 0.5532, "lr": 1.983740878248624e-06, "epoch": 0.19845080340567184, "percentage": 3.97, "elapsed_time": "0:07:00", "remaining_time": "2:49:29", "throughput": 23191.0, "total_tokens": 9747648}
|
|
{"current_steps": 3105, "total_steps": 78105, "loss": 0.4788, "lr": 1.9869414927666116e-06, "epoch": 0.19877088534664875, "percentage": 3.98, "elapsed_time": "0:07:00", "remaining_time": "2:49:28", "throughput": 23190.52, "total_tokens": 9763072}
|
|
{"current_steps": 3110, "total_steps": 78105, "loss": 0.4999, "lr": 1.990142107284599e-06, "epoch": 0.19909096728762563, "percentage": 3.98, "elapsed_time": "0:07:01", "remaining_time": "2:49:27", "throughput": 23190.1, "total_tokens": 9778176}
|
|
{"current_steps": 3115, "total_steps": 78105, "loss": 0.5723, "lr": 1.9933427218025865e-06, "epoch": 0.19941104922860253, "percentage": 3.99, "elapsed_time": "0:07:02", "remaining_time": "2:49:26", "throughput": 23190.13, "total_tokens": 9793536}
|
|
{"current_steps": 3120, "total_steps": 78105, "loss": 0.51, "lr": 1.996543336320574e-06, "epoch": 0.1997311311695794, "percentage": 3.99, "elapsed_time": "0:07:02", "remaining_time": "2:49:25", "throughput": 23189.62, "total_tokens": 9808192}
|
|
{"current_steps": 3125, "total_steps": 78105, "loss": 0.615, "lr": 1.999743950838561e-06, "epoch": 0.2000512131105563, "percentage": 4.0, "elapsed_time": "0:07:03", "remaining_time": "2:49:24", "throughput": 23189.11, "total_tokens": 9823232}
|
|
{"current_steps": 3130, "total_steps": 78105, "loss": 0.6188, "lr": 2.0029445653565485e-06, "epoch": 0.2003712950515332, "percentage": 4.01, "elapsed_time": "0:07:04", "remaining_time": "2:49:23", "throughput": 23189.58, "total_tokens": 9839488}
|
|
{"current_steps": 3135, "total_steps": 78105, "loss": 0.4495, "lr": 2.006145179874536e-06, "epoch": 0.20069137699251008, "percentage": 4.01, "elapsed_time": "0:07:05", "remaining_time": "2:49:23", "throughput": 23190.62, "total_tokens": 9856192}
|
|
{"current_steps": 3140, "total_steps": 78105, "loss": 0.5418, "lr": 2.0093457943925234e-06, "epoch": 0.201011458933487, "percentage": 4.02, "elapsed_time": "0:07:05", "remaining_time": "2:49:22", "throughput": 23189.92, "total_tokens": 9871552}
|
|
{"current_steps": 3145, "total_steps": 78105, "loss": 0.5226, "lr": 2.012546408910511e-06, "epoch": 0.20133154087446387, "percentage": 4.03, "elapsed_time": "0:07:06", "remaining_time": "2:49:21", "throughput": 23189.08, "total_tokens": 9886016}
|
|
{"current_steps": 3150, "total_steps": 78105, "loss": 0.6394, "lr": 2.0157470234284983e-06, "epoch": 0.20165162281544075, "percentage": 4.03, "elapsed_time": "0:07:07", "remaining_time": "2:49:21", "throughput": 23190.43, "total_tokens": 9903552}
|
|
{"current_steps": 3155, "total_steps": 78105, "loss": 0.3662, "lr": 2.018947637946486e-06, "epoch": 0.20197170475641765, "percentage": 4.04, "elapsed_time": "0:07:07", "remaining_time": "2:49:21", "throughput": 23191.28, "total_tokens": 9920320}
|
|
{"current_steps": 3160, "total_steps": 78105, "loss": 0.6472, "lr": 2.0221482524644733e-06, "epoch": 0.20229178669739453, "percentage": 4.05, "elapsed_time": "0:07:08", "remaining_time": "2:49:20", "throughput": 23191.51, "total_tokens": 9935936}
|
|
{"current_steps": 3165, "total_steps": 78105, "loss": 0.5605, "lr": 2.0253488669824607e-06, "epoch": 0.2026118686383714, "percentage": 4.05, "elapsed_time": "0:07:09", "remaining_time": "2:49:19", "throughput": 23190.73, "total_tokens": 9950912}
|
|
{"current_steps": 3170, "total_steps": 78105, "loss": 0.5868, "lr": 2.028549481500448e-06, "epoch": 0.20293195057934832, "percentage": 4.06, "elapsed_time": "0:07:09", "remaining_time": "2:49:18", "throughput": 23190.24, "total_tokens": 9966080}
|
|
{"current_steps": 3175, "total_steps": 78105, "loss": 0.4591, "lr": 2.0317500960184357e-06, "epoch": 0.2032520325203252, "percentage": 4.07, "elapsed_time": "0:07:10", "remaining_time": "2:49:18", "throughput": 23190.4, "total_tokens": 9982080}
|
|
{"current_steps": 3180, "total_steps": 78105, "loss": 0.5424, "lr": 2.034950710536423e-06, "epoch": 0.2035721144613021, "percentage": 4.07, "elapsed_time": "0:07:11", "remaining_time": "2:49:16", "throughput": 23189.51, "total_tokens": 9996544}
|
|
{"current_steps": 3185, "total_steps": 78105, "loss": 0.546, "lr": 2.0381513250544106e-06, "epoch": 0.20389219640227899, "percentage": 4.08, "elapsed_time": "0:07:11", "remaining_time": "2:49:15", "throughput": 23189.42, "total_tokens": 10011776}
|
|
{"current_steps": 3190, "total_steps": 78105, "loss": 0.4516, "lr": 2.041351939572398e-06, "epoch": 0.20421227834325587, "percentage": 4.08, "elapsed_time": "0:07:12", "remaining_time": "2:49:14", "throughput": 23189.2, "total_tokens": 10027264}
|
|
{"current_steps": 3195, "total_steps": 78105, "loss": 0.4452, "lr": 2.0445525540903855e-06, "epoch": 0.20453236028423277, "percentage": 4.09, "elapsed_time": "0:07:13", "remaining_time": "2:49:13", "throughput": 23189.03, "total_tokens": 10042432}
|
|
{"current_steps": 3200, "total_steps": 78105, "loss": 0.4167, "lr": 2.047753168608373e-06, "epoch": 0.20485244222520965, "percentage": 4.1, "elapsed_time": "0:07:13", "remaining_time": "2:49:12", "throughput": 23188.45, "total_tokens": 10057792}
|
|
{"current_steps": 3205, "total_steps": 78105, "loss": 0.4757, "lr": 2.0509537831263605e-06, "epoch": 0.20517252416618653, "percentage": 4.1, "elapsed_time": "0:07:14", "remaining_time": "2:49:12", "throughput": 23190.0, "total_tokens": 10074752}
|
|
{"current_steps": 3210, "total_steps": 78105, "loss": 0.6621, "lr": 2.054154397644348e-06, "epoch": 0.20549260610716344, "percentage": 4.11, "elapsed_time": "0:07:15", "remaining_time": "2:49:12", "throughput": 23190.33, "total_tokens": 10090752}
|
|
{"current_steps": 3215, "total_steps": 78105, "loss": 0.5276, "lr": 2.0573550121623354e-06, "epoch": 0.20581268804814032, "percentage": 4.12, "elapsed_time": "0:07:15", "remaining_time": "2:49:11", "throughput": 23190.19, "total_tokens": 10106240}
|
|
{"current_steps": 3220, "total_steps": 78105, "loss": 0.4633, "lr": 2.060555626680323e-06, "epoch": 0.20613276998911723, "percentage": 4.12, "elapsed_time": "0:07:16", "remaining_time": "2:49:10", "throughput": 23190.1, "total_tokens": 10121856}
|
|
{"current_steps": 3225, "total_steps": 78105, "loss": 0.4951, "lr": 2.0637562411983103e-06, "epoch": 0.2064528519300941, "percentage": 4.13, "elapsed_time": "0:07:17", "remaining_time": "2:49:09", "throughput": 23190.13, "total_tokens": 10137408}
|
|
{"current_steps": 3230, "total_steps": 78105, "loss": 0.485, "lr": 2.066956855716298e-06, "epoch": 0.20677293387107099, "percentage": 4.14, "elapsed_time": "0:07:17", "remaining_time": "2:49:08", "throughput": 23189.77, "total_tokens": 10152640}
|
|
{"current_steps": 3235, "total_steps": 78105, "loss": 0.4234, "lr": 2.0701574702342853e-06, "epoch": 0.2070930158120479, "percentage": 4.14, "elapsed_time": "0:07:18", "remaining_time": "2:49:07", "throughput": 23189.35, "total_tokens": 10167936}
|
|
{"current_steps": 3240, "total_steps": 78105, "loss": 0.463, "lr": 2.0733580847522727e-06, "epoch": 0.20741309775302477, "percentage": 4.15, "elapsed_time": "0:07:19", "remaining_time": "2:49:06", "throughput": 23189.36, "total_tokens": 10183360}
|
|
{"current_steps": 3245, "total_steps": 78105, "loss": 0.5593, "lr": 2.0765586992702598e-06, "epoch": 0.20773317969400165, "percentage": 4.15, "elapsed_time": "0:07:19", "remaining_time": "2:49:05", "throughput": 23189.19, "total_tokens": 10198208}
|
|
{"current_steps": 3250, "total_steps": 78105, "loss": 0.5326, "lr": 2.0797593137882473e-06, "epoch": 0.20805326163497856, "percentage": 4.16, "elapsed_time": "0:07:20", "remaining_time": "2:49:04", "throughput": 23189.02, "total_tokens": 10213568}
|
|
{"current_steps": 3255, "total_steps": 78105, "loss": 0.4922, "lr": 2.0829599283062347e-06, "epoch": 0.20837334357595544, "percentage": 4.17, "elapsed_time": "0:07:21", "remaining_time": "2:49:04", "throughput": 23189.78, "total_tokens": 10230080}
|
|
{"current_steps": 3260, "total_steps": 78105, "loss": 0.5181, "lr": 2.086160542824222e-06, "epoch": 0.20869342551693235, "percentage": 4.17, "elapsed_time": "0:07:21", "remaining_time": "2:49:04", "throughput": 23190.46, "total_tokens": 10246912}
|
|
{"current_steps": 3265, "total_steps": 78105, "loss": 0.538, "lr": 2.0893611573422097e-06, "epoch": 0.20901350745790923, "percentage": 4.18, "elapsed_time": "0:07:22", "remaining_time": "2:49:03", "throughput": 23190.3, "total_tokens": 10262464}
|
|
{"current_steps": 3270, "total_steps": 78105, "loss": 0.6577, "lr": 2.092561771860197e-06, "epoch": 0.2093335893988861, "percentage": 4.19, "elapsed_time": "0:07:23", "remaining_time": "2:49:02", "throughput": 23190.09, "total_tokens": 10278016}
|
|
{"current_steps": 3275, "total_steps": 78105, "loss": 0.5375, "lr": 2.0957623863781846e-06, "epoch": 0.209653671339863, "percentage": 4.19, "elapsed_time": "0:07:23", "remaining_time": "2:49:02", "throughput": 23190.17, "total_tokens": 10293824}
|
|
{"current_steps": 3280, "total_steps": 78105, "loss": 0.5761, "lr": 2.0989630008961725e-06, "epoch": 0.2099737532808399, "percentage": 4.2, "elapsed_time": "0:07:24", "remaining_time": "2:49:01", "throughput": 23190.15, "total_tokens": 10309568}
|
|
{"current_steps": 3285, "total_steps": 78105, "loss": 0.439, "lr": 2.10216361541416e-06, "epoch": 0.21029383522181677, "percentage": 4.21, "elapsed_time": "0:07:25", "remaining_time": "2:49:01", "throughput": 23190.45, "total_tokens": 10325696}
|
|
{"current_steps": 3290, "total_steps": 78105, "loss": 0.5091, "lr": 2.1053642299321474e-06, "epoch": 0.21061391716279368, "percentage": 4.21, "elapsed_time": "0:07:25", "remaining_time": "2:49:00", "throughput": 23190.88, "total_tokens": 10342016}
|
|
{"current_steps": 3295, "total_steps": 78105, "loss": 0.4905, "lr": 2.108564844450135e-06, "epoch": 0.21093399910377056, "percentage": 4.22, "elapsed_time": "0:07:26", "remaining_time": "2:49:00", "throughput": 23190.53, "total_tokens": 10357632}
|
|
{"current_steps": 3300, "total_steps": 78105, "loss": 0.5472, "lr": 2.1117654589681223e-06, "epoch": 0.21125408104474747, "percentage": 4.23, "elapsed_time": "0:07:27", "remaining_time": "2:48:59", "throughput": 23190.33, "total_tokens": 10373056}
|
|
{"current_steps": 3305, "total_steps": 78105, "loss": 0.7056, "lr": 2.1149660734861094e-06, "epoch": 0.21157416298572435, "percentage": 4.23, "elapsed_time": "0:07:27", "remaining_time": "2:48:59", "throughput": 23190.78, "total_tokens": 10389248}
|
|
{"current_steps": 3310, "total_steps": 78105, "loss": 0.4812, "lr": 2.118166688004097e-06, "epoch": 0.21189424492670123, "percentage": 4.24, "elapsed_time": "0:07:28", "remaining_time": "2:48:58", "throughput": 23190.9, "total_tokens": 10404864}
|
|
{"current_steps": 3315, "total_steps": 78105, "loss": 0.4748, "lr": 2.1213673025220843e-06, "epoch": 0.21221432686767813, "percentage": 4.24, "elapsed_time": "0:07:29", "remaining_time": "2:48:57", "throughput": 23190.62, "total_tokens": 10420416}
|
|
{"current_steps": 3320, "total_steps": 78105, "loss": 0.4937, "lr": 2.124567917040072e-06, "epoch": 0.212534408808655, "percentage": 4.25, "elapsed_time": "0:07:30", "remaining_time": "2:48:56", "throughput": 23189.89, "total_tokens": 10435456}
|
|
{"current_steps": 3325, "total_steps": 78105, "loss": 0.5216, "lr": 2.1277685315580593e-06, "epoch": 0.21285449074963192, "percentage": 4.26, "elapsed_time": "0:07:30", "remaining_time": "2:48:56", "throughput": 23190.2, "total_tokens": 10451584}
|
|
{"current_steps": 3330, "total_steps": 78105, "loss": 0.3753, "lr": 2.1309691460760467e-06, "epoch": 0.2131745726906088, "percentage": 4.26, "elapsed_time": "0:07:31", "remaining_time": "2:48:55", "throughput": 23190.63, "total_tokens": 10467584}
|
|
{"current_steps": 3335, "total_steps": 78105, "loss": 0.5046, "lr": 2.134169760594034e-06, "epoch": 0.21349465463158568, "percentage": 4.27, "elapsed_time": "0:07:32", "remaining_time": "2:48:54", "throughput": 23190.5, "total_tokens": 10483264}
|
|
{"current_steps": 3340, "total_steps": 78105, "loss": 0.5896, "lr": 2.1373703751120217e-06, "epoch": 0.21381473657256259, "percentage": 4.28, "elapsed_time": "0:07:32", "remaining_time": "2:48:53", "throughput": 23190.21, "total_tokens": 10498560}
|
|
{"current_steps": 3345, "total_steps": 78105, "loss": 0.4651, "lr": 2.140570989630009e-06, "epoch": 0.21413481851353947, "percentage": 4.28, "elapsed_time": "0:07:33", "remaining_time": "2:48:53", "throughput": 23190.0, "total_tokens": 10514176}
|
|
{"current_steps": 3350, "total_steps": 78105, "loss": 0.4636, "lr": 2.1437716041479966e-06, "epoch": 0.21445490045451635, "percentage": 4.29, "elapsed_time": "0:07:34", "remaining_time": "2:48:52", "throughput": 23190.25, "total_tokens": 10529792}
|
|
{"current_steps": 3355, "total_steps": 78105, "loss": 0.5346, "lr": 2.146972218665984e-06, "epoch": 0.21477498239549325, "percentage": 4.3, "elapsed_time": "0:07:34", "remaining_time": "2:48:51", "throughput": 23190.47, "total_tokens": 10545856}
|
|
{"current_steps": 3360, "total_steps": 78105, "loss": 0.4138, "lr": 2.1501728331839715e-06, "epoch": 0.21509506433647013, "percentage": 4.3, "elapsed_time": "0:07:35", "remaining_time": "2:48:51", "throughput": 23190.86, "total_tokens": 10562368}
|
|
{"current_steps": 3365, "total_steps": 78105, "loss": 0.6783, "lr": 2.153373447701959e-06, "epoch": 0.21541514627744704, "percentage": 4.31, "elapsed_time": "0:07:36", "remaining_time": "2:48:50", "throughput": 23190.35, "total_tokens": 10577280}
|
|
{"current_steps": 3370, "total_steps": 78105, "loss": 0.537, "lr": 2.1565740622199465e-06, "epoch": 0.21573522821842392, "percentage": 4.31, "elapsed_time": "0:07:36", "remaining_time": "2:48:49", "throughput": 23190.24, "total_tokens": 10592384}
|
|
{"current_steps": 3375, "total_steps": 78105, "loss": 0.4837, "lr": 2.159774676737934e-06, "epoch": 0.2160553101594008, "percentage": 4.32, "elapsed_time": "0:07:37", "remaining_time": "2:48:49", "throughput": 23190.14, "total_tokens": 10608704}
|
|
{"current_steps": 3380, "total_steps": 78105, "loss": 0.4391, "lr": 2.1629752912559214e-06, "epoch": 0.2163753921003777, "percentage": 4.33, "elapsed_time": "0:07:38", "remaining_time": "2:48:49", "throughput": 23190.49, "total_tokens": 10625280}
|
|
{"current_steps": 3385, "total_steps": 78105, "loss": 0.5326, "lr": 2.166175905773909e-06, "epoch": 0.21669547404135459, "percentage": 4.33, "elapsed_time": "0:07:38", "remaining_time": "2:48:48", "throughput": 23190.91, "total_tokens": 10641152}
|
|
{"current_steps": 3390, "total_steps": 78105, "loss": 0.4427, "lr": 2.1693765202918963e-06, "epoch": 0.21701555598233147, "percentage": 4.34, "elapsed_time": "0:07:39", "remaining_time": "2:48:48", "throughput": 23190.96, "total_tokens": 10657472}
|
|
{"current_steps": 3395, "total_steps": 78105, "loss": 0.529, "lr": 2.172577134809884e-06, "epoch": 0.21733563792330837, "percentage": 4.35, "elapsed_time": "0:07:40", "remaining_time": "2:48:47", "throughput": 23191.4, "total_tokens": 10673600}
|
|
{"current_steps": 3400, "total_steps": 78105, "loss": 0.54, "lr": 2.1757777493278713e-06, "epoch": 0.21765571986428525, "percentage": 4.35, "elapsed_time": "0:07:40", "remaining_time": "2:48:47", "throughput": 23191.26, "total_tokens": 10689344}
|
|
{"current_steps": 3405, "total_steps": 78105, "loss": 0.5031, "lr": 2.1789783638458587e-06, "epoch": 0.21797580180526216, "percentage": 4.36, "elapsed_time": "0:07:41", "remaining_time": "2:48:46", "throughput": 23190.73, "total_tokens": 10704384}
|
|
{"current_steps": 3410, "total_steps": 78105, "loss": 0.4983, "lr": 2.182178978363846e-06, "epoch": 0.21829588374623904, "percentage": 4.37, "elapsed_time": "0:07:42", "remaining_time": "2:48:45", "throughput": 23191.29, "total_tokens": 10720512}
|
|
{"current_steps": 3415, "total_steps": 78105, "loss": 0.5506, "lr": 2.1853795928818337e-06, "epoch": 0.21861596568721592, "percentage": 4.37, "elapsed_time": "0:07:42", "remaining_time": "2:48:44", "throughput": 23190.89, "total_tokens": 10735744}
|
|
{"current_steps": 3420, "total_steps": 78105, "loss": 0.5132, "lr": 2.188580207399821e-06, "epoch": 0.21893604762819283, "percentage": 4.38, "elapsed_time": "0:07:43", "remaining_time": "2:48:43", "throughput": 23190.53, "total_tokens": 10750848}
|
|
{"current_steps": 3425, "total_steps": 78105, "loss": 0.5671, "lr": 2.191780821917808e-06, "epoch": 0.2192561295691697, "percentage": 4.39, "elapsed_time": "0:07:44", "remaining_time": "2:48:42", "throughput": 23189.64, "total_tokens": 10765632}
|
|
{"current_steps": 3430, "total_steps": 78105, "loss": 0.451, "lr": 2.1949814364357956e-06, "epoch": 0.21957621151014659, "percentage": 4.39, "elapsed_time": "0:07:44", "remaining_time": "2:48:41", "throughput": 23189.01, "total_tokens": 10780928}
|
|
{"current_steps": 3435, "total_steps": 78105, "loss": 0.4231, "lr": 2.198182050953783e-06, "epoch": 0.2198962934511235, "percentage": 4.4, "elapsed_time": "0:07:45", "remaining_time": "2:48:41", "throughput": 23189.03, "total_tokens": 10796864}
|
|
{"current_steps": 3440, "total_steps": 78105, "loss": 0.4668, "lr": 2.2013826654717706e-06, "epoch": 0.22021637539210037, "percentage": 4.4, "elapsed_time": "0:07:46", "remaining_time": "2:48:40", "throughput": 23189.57, "total_tokens": 10812672}
|
|
{"current_steps": 3445, "total_steps": 78105, "loss": 0.4731, "lr": 2.204583279989758e-06, "epoch": 0.22053645733307728, "percentage": 4.41, "elapsed_time": "0:07:46", "remaining_time": "2:48:39", "throughput": 23189.94, "total_tokens": 10828544}
|
|
{"current_steps": 3450, "total_steps": 78105, "loss": 0.5998, "lr": 2.2077838945077455e-06, "epoch": 0.22085653927405416, "percentage": 4.42, "elapsed_time": "0:07:47", "remaining_time": "2:48:38", "throughput": 23190.21, "total_tokens": 10844288}
|
|
{"current_steps": 3455, "total_steps": 78105, "loss": 0.5434, "lr": 2.210984509025733e-06, "epoch": 0.22117662121503104, "percentage": 4.42, "elapsed_time": "0:07:48", "remaining_time": "2:48:38", "throughput": 23190.37, "total_tokens": 10860160}
|
|
{"current_steps": 3460, "total_steps": 78105, "loss": 0.6148, "lr": 2.2141851235437204e-06, "epoch": 0.22149670315600795, "percentage": 4.43, "elapsed_time": "0:07:48", "remaining_time": "2:48:37", "throughput": 23190.27, "total_tokens": 10875968}
|
|
{"current_steps": 3465, "total_steps": 78105, "loss": 0.3782, "lr": 2.217385738061708e-06, "epoch": 0.22181678509698483, "percentage": 4.44, "elapsed_time": "0:07:49", "remaining_time": "2:48:37", "throughput": 23190.67, "total_tokens": 10892096}
|
|
{"current_steps": 3470, "total_steps": 78105, "loss": 0.441, "lr": 2.2205863525796954e-06, "epoch": 0.2221368670379617, "percentage": 4.44, "elapsed_time": "0:07:50", "remaining_time": "2:48:37", "throughput": 23191.08, "total_tokens": 10908544}
|
|
{"current_steps": 3475, "total_steps": 78105, "loss": 0.515, "lr": 2.223786967097683e-06, "epoch": 0.2224569489789386, "percentage": 4.45, "elapsed_time": "0:07:51", "remaining_time": "2:48:36", "throughput": 23191.43, "total_tokens": 10924544}
|
|
{"current_steps": 3480, "total_steps": 78105, "loss": 0.4791, "lr": 2.2269875816156703e-06, "epoch": 0.2227770309199155, "percentage": 4.46, "elapsed_time": "0:07:51", "remaining_time": "2:48:36", "throughput": 23192.53, "total_tokens": 10941888}
|
|
{"current_steps": 3485, "total_steps": 78105, "loss": 0.4866, "lr": 2.2301881961336578e-06, "epoch": 0.2230971128608924, "percentage": 4.46, "elapsed_time": "0:07:52", "remaining_time": "2:48:36", "throughput": 23193.35, "total_tokens": 10958592}
|
|
{"current_steps": 3490, "total_steps": 78105, "loss": 0.664, "lr": 2.2333888106516453e-06, "epoch": 0.22341719480186928, "percentage": 4.47, "elapsed_time": "0:07:53", "remaining_time": "2:48:36", "throughput": 23193.13, "total_tokens": 10974208}
|
|
{"current_steps": 3495, "total_steps": 78105, "loss": 0.4307, "lr": 2.2365894251696327e-06, "epoch": 0.22373727674284616, "percentage": 4.47, "elapsed_time": "0:07:53", "remaining_time": "2:48:34", "throughput": 23192.33, "total_tokens": 10988928}
|
|
{"current_steps": 3500, "total_steps": 78105, "loss": 0.4749, "lr": 2.23979003968762e-06, "epoch": 0.22405735868382307, "percentage": 4.48, "elapsed_time": "0:07:54", "remaining_time": "2:48:34", "throughput": 23193.52, "total_tokens": 11005952}
|
|
{"current_steps": 3505, "total_steps": 78105, "loss": 0.4606, "lr": 2.2429906542056077e-06, "epoch": 0.22437744062479995, "percentage": 4.49, "elapsed_time": "0:07:55", "remaining_time": "2:48:33", "throughput": 23192.77, "total_tokens": 11020608}
|
|
{"current_steps": 3510, "total_steps": 78105, "loss": 0.4978, "lr": 2.246191268723595e-06, "epoch": 0.22469752256577685, "percentage": 4.49, "elapsed_time": "0:07:55", "remaining_time": "2:48:33", "throughput": 23193.24, "total_tokens": 11036736}
|
|
{"current_steps": 3515, "total_steps": 78105, "loss": 0.3839, "lr": 2.2493918832415826e-06, "epoch": 0.22501760450675373, "percentage": 4.5, "elapsed_time": "0:07:56", "remaining_time": "2:48:32", "throughput": 23193.64, "total_tokens": 11052480}
|
|
{"current_steps": 3520, "total_steps": 78105, "loss": 0.5269, "lr": 2.25259249775957e-06, "epoch": 0.2253376864477306, "percentage": 4.51, "elapsed_time": "0:07:57", "remaining_time": "2:48:31", "throughput": 23193.98, "total_tokens": 11068416}
|
|
{"current_steps": 3525, "total_steps": 78105, "loss": 0.464, "lr": 2.2557931122775575e-06, "epoch": 0.22565776838870752, "percentage": 4.51, "elapsed_time": "0:07:57", "remaining_time": "2:48:32", "throughput": 23195.77, "total_tokens": 11086400}
|
|
{"current_steps": 3530, "total_steps": 78105, "loss": 0.574, "lr": 2.258993726795545e-06, "epoch": 0.2259778503296844, "percentage": 4.52, "elapsed_time": "0:07:58", "remaining_time": "2:48:32", "throughput": 23196.11, "total_tokens": 11102848}
|
|
{"current_steps": 3535, "total_steps": 78105, "loss": 0.5355, "lr": 2.2621943413135325e-06, "epoch": 0.22629793227066128, "percentage": 4.53, "elapsed_time": "0:07:59", "remaining_time": "2:48:30", "throughput": 23195.9, "total_tokens": 11118080}
|
|
{"current_steps": 3540, "total_steps": 78105, "loss": 0.5444, "lr": 2.26539495583152e-06, "epoch": 0.22661801421163819, "percentage": 4.53, "elapsed_time": "0:07:59", "remaining_time": "2:48:29", "throughput": 23195.46, "total_tokens": 11133120}
|
|
{"current_steps": 3545, "total_steps": 78105, "loss": 0.4621, "lr": 2.268595570349507e-06, "epoch": 0.22693809615261507, "percentage": 4.54, "elapsed_time": "0:08:00", "remaining_time": "2:48:30", "throughput": 23196.09, "total_tokens": 11150144}
|
|
{"current_steps": 3550, "total_steps": 78105, "loss": 0.5036, "lr": 2.271796184867495e-06, "epoch": 0.22725817809359197, "percentage": 4.55, "elapsed_time": "0:08:01", "remaining_time": "2:48:29", "throughput": 23195.83, "total_tokens": 11165760}
|
|
{"current_steps": 3555, "total_steps": 78105, "loss": 0.4894, "lr": 2.2749967993854823e-06, "epoch": 0.22757826003456885, "percentage": 4.55, "elapsed_time": "0:08:02", "remaining_time": "2:48:27", "throughput": 23195.05, "total_tokens": 11180096}
|
|
{"current_steps": 3560, "total_steps": 78105, "loss": 0.396, "lr": 2.27819741390347e-06, "epoch": 0.22789834197554573, "percentage": 4.56, "elapsed_time": "0:08:02", "remaining_time": "2:48:27", "throughput": 23194.31, "total_tokens": 11195584}
|
|
{"current_steps": 3565, "total_steps": 78105, "loss": 0.4426, "lr": 2.2813980284214573e-06, "epoch": 0.22821842391652264, "percentage": 4.56, "elapsed_time": "0:08:03", "remaining_time": "2:48:26", "throughput": 23194.12, "total_tokens": 11210944}
|
|
{"current_steps": 3570, "total_steps": 78105, "loss": 0.428, "lr": 2.2845986429394447e-06, "epoch": 0.22853850585749952, "percentage": 4.57, "elapsed_time": "0:08:04", "remaining_time": "2:48:26", "throughput": 23194.59, "total_tokens": 11227392}
|
|
{"current_steps": 3575, "total_steps": 78105, "loss": 0.6618, "lr": 2.287799257457432e-06, "epoch": 0.2288585877984764, "percentage": 4.58, "elapsed_time": "0:08:04", "remaining_time": "2:48:26", "throughput": 23195.34, "total_tokens": 11244416}
|
|
{"current_steps": 3580, "total_steps": 78105, "loss": 0.4212, "lr": 2.2909998719754197e-06, "epoch": 0.2291786697394533, "percentage": 4.58, "elapsed_time": "0:08:05", "remaining_time": "2:48:25", "throughput": 23195.58, "total_tokens": 11260672}
|
|
{"current_steps": 3585, "total_steps": 78105, "loss": 0.5228, "lr": 2.294200486493407e-06, "epoch": 0.22949875168043019, "percentage": 4.59, "elapsed_time": "0:08:06", "remaining_time": "2:48:25", "throughput": 23196.36, "total_tokens": 11277504}
|
|
{"current_steps": 3590, "total_steps": 78105, "loss": 0.5973, "lr": 2.2974011010113946e-06, "epoch": 0.2298188336214071, "percentage": 4.6, "elapsed_time": "0:08:06", "remaining_time": "2:48:25", "throughput": 23196.08, "total_tokens": 11293184}
|
|
{"current_steps": 3595, "total_steps": 78105, "loss": 0.3263, "lr": 2.300601715529382e-06, "epoch": 0.23013891556238397, "percentage": 4.6, "elapsed_time": "0:08:07", "remaining_time": "2:48:24", "throughput": 23196.08, "total_tokens": 11308480}
|
|
{"current_steps": 3600, "total_steps": 78105, "loss": 0.4499, "lr": 2.3038023300473695e-06, "epoch": 0.23045899750336085, "percentage": 4.61, "elapsed_time": "0:08:08", "remaining_time": "2:48:23", "throughput": 23195.87, "total_tokens": 11323840}
|
|
{"current_steps": 3605, "total_steps": 78105, "loss": 0.6712, "lr": 2.3070029445653566e-06, "epoch": 0.23077907944433776, "percentage": 4.62, "elapsed_time": "0:08:08", "remaining_time": "2:48:22", "throughput": 23196.03, "total_tokens": 11339456}
|
|
{"current_steps": 3610, "total_steps": 78105, "loss": 0.6056, "lr": 2.310203559083344e-06, "epoch": 0.23109916138531464, "percentage": 4.62, "elapsed_time": "0:08:09", "remaining_time": "2:48:21", "throughput": 23195.61, "total_tokens": 11354880}
|
|
{"current_steps": 3615, "total_steps": 78105, "loss": 0.5216, "lr": 2.3134041736013315e-06, "epoch": 0.23141924332629152, "percentage": 4.63, "elapsed_time": "0:08:10", "remaining_time": "2:48:20", "throughput": 23195.37, "total_tokens": 11369984}
|
|
{"current_steps": 3620, "total_steps": 78105, "loss": 0.4563, "lr": 2.316604788119319e-06, "epoch": 0.23173932526726843, "percentage": 4.63, "elapsed_time": "0:08:10", "remaining_time": "2:48:19", "throughput": 23194.93, "total_tokens": 11384640}
|
|
{"current_steps": 3625, "total_steps": 78105, "loss": 0.4994, "lr": 2.3198054026373064e-06, "epoch": 0.2320594072082453, "percentage": 4.64, "elapsed_time": "0:08:11", "remaining_time": "2:48:18", "throughput": 23195.51, "total_tokens": 11401152}
|
|
{"current_steps": 3630, "total_steps": 78105, "loss": 0.4981, "lr": 2.323006017155294e-06, "epoch": 0.2323794891492222, "percentage": 4.65, "elapsed_time": "0:08:12", "remaining_time": "2:48:18", "throughput": 23195.55, "total_tokens": 11416896}
|
|
{"current_steps": 3635, "total_steps": 78105, "loss": 0.5016, "lr": 2.3262066316732814e-06, "epoch": 0.2326995710901991, "percentage": 4.65, "elapsed_time": "0:08:12", "remaining_time": "2:48:17", "throughput": 23195.83, "total_tokens": 11432832}
|
|
{"current_steps": 3640, "total_steps": 78105, "loss": 0.5421, "lr": 2.329407246191269e-06, "epoch": 0.23301965303117597, "percentage": 4.66, "elapsed_time": "0:08:13", "remaining_time": "2:48:16", "throughput": 23195.65, "total_tokens": 11448320}
|
|
{"current_steps": 3645, "total_steps": 78105, "loss": 0.6042, "lr": 2.3326078607092563e-06, "epoch": 0.23333973497215288, "percentage": 4.67, "elapsed_time": "0:08:14", "remaining_time": "2:48:15", "throughput": 23195.16, "total_tokens": 11463488}
|
|
{"current_steps": 3650, "total_steps": 78105, "loss": 0.4562, "lr": 2.3358084752272438e-06, "epoch": 0.23365981691312976, "percentage": 4.67, "elapsed_time": "0:08:14", "remaining_time": "2:48:14", "throughput": 23194.94, "total_tokens": 11478592}
|
|
{"current_steps": 3655, "total_steps": 78105, "loss": 0.4173, "lr": 2.3390090897452312e-06, "epoch": 0.23397989885410664, "percentage": 4.68, "elapsed_time": "0:08:15", "remaining_time": "2:48:14", "throughput": 23194.77, "total_tokens": 11494720}
|
|
{"current_steps": 3660, "total_steps": 78105, "loss": 0.5285, "lr": 2.3422097042632187e-06, "epoch": 0.23429998079508355, "percentage": 4.69, "elapsed_time": "0:08:16", "remaining_time": "2:48:14", "throughput": 23194.91, "total_tokens": 11511232}
|
|
{"current_steps": 3665, "total_steps": 78105, "loss": 0.5821, "lr": 2.345410318781206e-06, "epoch": 0.23462006273606043, "percentage": 4.69, "elapsed_time": "0:08:16", "remaining_time": "2:48:13", "throughput": 23194.96, "total_tokens": 11527040}
|
|
{"current_steps": 3670, "total_steps": 78105, "loss": 0.5213, "lr": 2.3486109332991936e-06, "epoch": 0.23494014467703733, "percentage": 4.7, "elapsed_time": "0:08:17", "remaining_time": "2:48:12", "throughput": 23194.94, "total_tokens": 11542528}
|
|
{"current_steps": 3675, "total_steps": 78105, "loss": 0.4849, "lr": 2.351811547817181e-06, "epoch": 0.2352602266180142, "percentage": 4.71, "elapsed_time": "0:08:18", "remaining_time": "2:48:11", "throughput": 23194.61, "total_tokens": 11557696}
|
|
{"current_steps": 3680, "total_steps": 78105, "loss": 0.4685, "lr": 2.3550121623351686e-06, "epoch": 0.2355803085589911, "percentage": 4.71, "elapsed_time": "0:08:19", "remaining_time": "2:48:12", "throughput": 23195.44, "total_tokens": 11574848}
|
|
{"current_steps": 3685, "total_steps": 78105, "loss": 0.531, "lr": 2.358212776853156e-06, "epoch": 0.235900390499968, "percentage": 4.72, "elapsed_time": "0:08:19", "remaining_time": "2:48:10", "throughput": 23194.89, "total_tokens": 11589696}
|
|
{"current_steps": 3690, "total_steps": 78105, "loss": 0.4164, "lr": 2.3614133913711435e-06, "epoch": 0.23622047244094488, "percentage": 4.72, "elapsed_time": "0:08:20", "remaining_time": "2:48:11", "throughput": 23195.66, "total_tokens": 11606848}
|
|
{"current_steps": 3695, "total_steps": 78105, "loss": 0.7326, "lr": 2.364614005889131e-06, "epoch": 0.23654055438192176, "percentage": 4.73, "elapsed_time": "0:08:21", "remaining_time": "2:48:11", "throughput": 23196.72, "total_tokens": 11623744}
|
|
{"current_steps": 3700, "total_steps": 78105, "loss": 0.5326, "lr": 2.3678146204071184e-06, "epoch": 0.23686063632289867, "percentage": 4.74, "elapsed_time": "0:08:21", "remaining_time": "2:48:10", "throughput": 23197.16, "total_tokens": 11640256}
|
|
{"current_steps": 3705, "total_steps": 78105, "loss": 0.4121, "lr": 2.371015234925106e-06, "epoch": 0.23718071826387555, "percentage": 4.74, "elapsed_time": "0:08:22", "remaining_time": "2:48:10", "throughput": 23197.0, "total_tokens": 11655744}
|
|
{"current_steps": 3710, "total_steps": 78105, "loss": 0.3082, "lr": 2.3742158494430934e-06, "epoch": 0.23750080020485245, "percentage": 4.75, "elapsed_time": "0:08:23", "remaining_time": "2:48:09", "throughput": 23197.09, "total_tokens": 11671104}
|
|
{"current_steps": 3715, "total_steps": 78105, "loss": 0.4167, "lr": 2.377416463961081e-06, "epoch": 0.23782088214582933, "percentage": 4.76, "elapsed_time": "0:08:23", "remaining_time": "2:48:08", "throughput": 23197.43, "total_tokens": 11686848}
|
|
{"current_steps": 3720, "total_steps": 78105, "loss": 0.5506, "lr": 2.3806170784790683e-06, "epoch": 0.2381409640868062, "percentage": 4.76, "elapsed_time": "0:08:24", "remaining_time": "2:48:07", "throughput": 23197.18, "total_tokens": 11701952}
|
|
{"current_steps": 3725, "total_steps": 78105, "loss": 0.5445, "lr": 2.3838176929970554e-06, "epoch": 0.23846104602778312, "percentage": 4.77, "elapsed_time": "0:08:25", "remaining_time": "2:48:06", "throughput": 23197.81, "total_tokens": 11718592}
|
|
{"current_steps": 3730, "total_steps": 78105, "loss": 0.5111, "lr": 2.387018307515043e-06, "epoch": 0.23878112796876, "percentage": 4.78, "elapsed_time": "0:08:25", "remaining_time": "2:48:06", "throughput": 23197.87, "total_tokens": 11734208}
|
|
{"current_steps": 3735, "total_steps": 78105, "loss": 0.4335, "lr": 2.3902189220330303e-06, "epoch": 0.2391012099097369, "percentage": 4.78, "elapsed_time": "0:08:26", "remaining_time": "2:48:04", "throughput": 23197.47, "total_tokens": 11748992}
|
|
{"current_steps": 3740, "total_steps": 78105, "loss": 0.4791, "lr": 2.3934195365510178e-06, "epoch": 0.23942129185071379, "percentage": 4.79, "elapsed_time": "0:08:27", "remaining_time": "2:48:04", "throughput": 23197.47, "total_tokens": 11764608}
|
|
{"current_steps": 3745, "total_steps": 78105, "loss": 0.5135, "lr": 2.3966201510690052e-06, "epoch": 0.23974137379169067, "percentage": 4.79, "elapsed_time": "0:08:27", "remaining_time": "2:48:03", "throughput": 23197.86, "total_tokens": 11780800}
|
|
{"current_steps": 3750, "total_steps": 78105, "loss": 0.5004, "lr": 2.3998207655869927e-06, "epoch": 0.24006145573266757, "percentage": 4.8, "elapsed_time": "0:08:28", "remaining_time": "2:48:02", "throughput": 23197.5, "total_tokens": 11795776}
|
|
{"current_steps": 3755, "total_steps": 78105, "loss": 0.5629, "lr": 2.40302138010498e-06, "epoch": 0.24038153767364445, "percentage": 4.81, "elapsed_time": "0:08:29", "remaining_time": "2:48:01", "throughput": 23197.72, "total_tokens": 11811776}
|
|
{"current_steps": 3760, "total_steps": 78105, "loss": 0.5444, "lr": 2.406221994622968e-06, "epoch": 0.24070161961462133, "percentage": 4.81, "elapsed_time": "0:08:29", "remaining_time": "2:48:01", "throughput": 23198.07, "total_tokens": 11828224}
|
|
{"current_steps": 3765, "total_steps": 78105, "loss": 0.4867, "lr": 2.4094226091409555e-06, "epoch": 0.24102170155559824, "percentage": 4.82, "elapsed_time": "0:08:30", "remaining_time": "2:48:01", "throughput": 23197.63, "total_tokens": 11843904}
|
|
{"current_steps": 3770, "total_steps": 78105, "loss": 0.6049, "lr": 2.412623223658943e-06, "epoch": 0.24134178349657512, "percentage": 4.83, "elapsed_time": "0:08:31", "remaining_time": "2:48:00", "throughput": 23197.49, "total_tokens": 11859392}
|
|
{"current_steps": 3775, "total_steps": 78105, "loss": 0.5397, "lr": 2.4158238381769305e-06, "epoch": 0.24166186543755203, "percentage": 4.83, "elapsed_time": "0:08:31", "remaining_time": "2:48:00", "throughput": 23198.11, "total_tokens": 11875968}
|
|
{"current_steps": 3780, "total_steps": 78105, "loss": 0.4635, "lr": 2.419024452694918e-06, "epoch": 0.2419819473785289, "percentage": 4.84, "elapsed_time": "0:08:32", "remaining_time": "2:47:59", "throughput": 23197.73, "total_tokens": 11891584}
|
|
{"current_steps": 3785, "total_steps": 78105, "loss": 0.4962, "lr": 2.422225067212905e-06, "epoch": 0.24230202931950579, "percentage": 4.85, "elapsed_time": "0:08:33", "remaining_time": "2:47:58", "throughput": 23197.52, "total_tokens": 11906944}
|
|
{"current_steps": 3790, "total_steps": 78105, "loss": 0.5116, "lr": 2.4254256817308924e-06, "epoch": 0.2426221112604827, "percentage": 4.85, "elapsed_time": "0:08:33", "remaining_time": "2:47:57", "throughput": 23197.42, "total_tokens": 11922368}
|
|
{"current_steps": 3795, "total_steps": 78105, "loss": 0.5673, "lr": 2.42862629624888e-06, "epoch": 0.24294219320145957, "percentage": 4.86, "elapsed_time": "0:08:34", "remaining_time": "2:47:57", "throughput": 23197.81, "total_tokens": 11938432}
|
|
{"current_steps": 3800, "total_steps": 78105, "loss": 0.5269, "lr": 2.4318269107668674e-06, "epoch": 0.24326227514243645, "percentage": 4.87, "elapsed_time": "0:08:35", "remaining_time": "2:47:57", "throughput": 23199.39, "total_tokens": 11956608}
|
|
{"current_steps": 3805, "total_steps": 78105, "loss": 0.4994, "lr": 2.435027525284855e-06, "epoch": 0.24358235708341336, "percentage": 4.87, "elapsed_time": "0:08:36", "remaining_time": "2:47:57", "throughput": 23198.86, "total_tokens": 11972480}
|
|
{"current_steps": 3810, "total_steps": 78105, "loss": 0.4374, "lr": 2.4382281398028423e-06, "epoch": 0.24390243902439024, "percentage": 4.88, "elapsed_time": "0:08:36", "remaining_time": "2:47:56", "throughput": 23197.87, "total_tokens": 11987072}
|
|
{"current_steps": 3815, "total_steps": 78105, "loss": 0.4759, "lr": 2.4414287543208298e-06, "epoch": 0.24422252096536715, "percentage": 4.88, "elapsed_time": "0:08:37", "remaining_time": "2:47:55", "throughput": 23197.2, "total_tokens": 12002112}
|
|
{"current_steps": 3820, "total_steps": 78105, "loss": 0.3848, "lr": 2.4446293688388172e-06, "epoch": 0.24454260290634403, "percentage": 4.89, "elapsed_time": "0:08:38", "remaining_time": "2:47:54", "throughput": 23196.31, "total_tokens": 12016960}
|
|
{"current_steps": 3825, "total_steps": 78105, "loss": 0.4593, "lr": 2.4478299833568047e-06, "epoch": 0.2448626848473209, "percentage": 4.9, "elapsed_time": "0:08:38", "remaining_time": "2:47:53", "throughput": 23196.34, "total_tokens": 12032576}
|
|
{"current_steps": 3830, "total_steps": 78105, "loss": 0.3892, "lr": 2.451030597874792e-06, "epoch": 0.2451827667882978, "percentage": 4.9, "elapsed_time": "0:08:39", "remaining_time": "2:47:53", "throughput": 23196.58, "total_tokens": 12048768}
|
|
{"current_steps": 3835, "total_steps": 78105, "loss": 0.4523, "lr": 2.4542312123927796e-06, "epoch": 0.2455028487292747, "percentage": 4.91, "elapsed_time": "0:08:40", "remaining_time": "2:47:52", "throughput": 23197.29, "total_tokens": 12065088}
|
|
{"current_steps": 3840, "total_steps": 78105, "loss": 0.6496, "lr": 2.457431826910767e-06, "epoch": 0.24582293067025157, "percentage": 4.92, "elapsed_time": "0:08:40", "remaining_time": "2:47:51", "throughput": 23197.33, "total_tokens": 12080704}
|
|
{"current_steps": 3845, "total_steps": 78105, "loss": 0.4849, "lr": 2.4606324414287546e-06, "epoch": 0.24614301261122848, "percentage": 4.92, "elapsed_time": "0:08:41", "remaining_time": "2:47:51", "throughput": 23197.42, "total_tokens": 12096704}
|
|
{"current_steps": 3850, "total_steps": 78105, "loss": 0.5674, "lr": 2.463833055946742e-06, "epoch": 0.24646309455220536, "percentage": 4.93, "elapsed_time": "0:08:42", "remaining_time": "2:47:50", "throughput": 23196.77, "total_tokens": 12111488}
|
|
{"current_steps": 3855, "total_steps": 78105, "loss": 0.523, "lr": 2.4670336704647295e-06, "epoch": 0.24678317649318227, "percentage": 4.94, "elapsed_time": "0:08:42", "remaining_time": "2:47:49", "throughput": 23197.76, "total_tokens": 12128192}
|
|
{"current_steps": 3860, "total_steps": 78105, "loss": 0.4971, "lr": 2.470234284982717e-06, "epoch": 0.24710325843415915, "percentage": 4.94, "elapsed_time": "0:08:43", "remaining_time": "2:47:49", "throughput": 23197.61, "total_tokens": 12143808}
|
|
{"current_steps": 3865, "total_steps": 78105, "loss": 0.5054, "lr": 2.4734348995007044e-06, "epoch": 0.24742334037513602, "percentage": 4.95, "elapsed_time": "0:08:44", "remaining_time": "2:47:48", "throughput": 23197.05, "total_tokens": 12159232}
|
|
{"current_steps": 3870, "total_steps": 78105, "loss": 0.4884, "lr": 2.476635514018692e-06, "epoch": 0.24774342231611293, "percentage": 4.95, "elapsed_time": "0:08:44", "remaining_time": "2:47:48", "throughput": 23197.23, "total_tokens": 12175360}
|
|
{"current_steps": 3875, "total_steps": 78105, "loss": 0.4883, "lr": 2.4798361285366794e-06, "epoch": 0.2480635042570898, "percentage": 4.96, "elapsed_time": "0:08:45", "remaining_time": "2:47:46", "throughput": 23196.88, "total_tokens": 12190272}
|
|
{"current_steps": 3880, "total_steps": 78105, "loss": 0.5172, "lr": 2.483036743054667e-06, "epoch": 0.2483835861980667, "percentage": 4.97, "elapsed_time": "0:08:46", "remaining_time": "2:47:47", "throughput": 23198.02, "total_tokens": 12208448}
|
|
{"current_steps": 3885, "total_steps": 78105, "loss": 0.7252, "lr": 2.4862373575726543e-06, "epoch": 0.2487036681390436, "percentage": 4.97, "elapsed_time": "0:08:47", "remaining_time": "2:47:48", "throughput": 23198.68, "total_tokens": 12225984}
|
|
{"current_steps": 3890, "total_steps": 78105, "loss": 0.5371, "lr": 2.4894379720906418e-06, "epoch": 0.24902375008002048, "percentage": 4.98, "elapsed_time": "0:08:47", "remaining_time": "2:47:47", "throughput": 23198.17, "total_tokens": 12241408}
|
|
{"current_steps": 3895, "total_steps": 78105, "loss": 0.4739, "lr": 2.4926385866086292e-06, "epoch": 0.24934383202099739, "percentage": 4.99, "elapsed_time": "0:08:48", "remaining_time": "2:47:46", "throughput": 23197.33, "total_tokens": 12256064}
|
|
{"current_steps": 3900, "total_steps": 78105, "loss": 0.4953, "lr": 2.4958392011266167e-06, "epoch": 0.24966391396197427, "percentage": 4.99, "elapsed_time": "0:08:49", "remaining_time": "2:47:46", "throughput": 23197.78, "total_tokens": 12272832}
|
|
{"current_steps": 3905, "total_steps": 78105, "loss": 0.6094, "lr": 2.4990398156446038e-06, "epoch": 0.24998399590295114, "percentage": 5.0, "elapsed_time": "0:08:49", "remaining_time": "2:47:45", "throughput": 23198.11, "total_tokens": 12289024}
|
|
{"current_steps": 3906, "total_steps": 78105, "eval_loss": 0.5013530850410461, "epoch": 0.25004801229114654, "percentage": 5.0, "elapsed_time": "0:09:40", "remaining_time": "3:03:56", "throughput": 21157.41, "total_tokens": 12292032}
|
|
{"current_steps": 3910, "total_steps": 78105, "loss": 0.4658, "lr": 2.5022404301625912e-06, "epoch": 0.25030407784392805, "percentage": 5.01, "elapsed_time": "0:10:46", "remaining_time": "3:24:31", "throughput": 19025.89, "total_tokens": 12304064}
|
|
{"current_steps": 3915, "total_steps": 78105, "loss": 0.5182, "lr": 2.5054410446805787e-06, "epoch": 0.25062415978490493, "percentage": 5.01, "elapsed_time": "0:10:47", "remaining_time": "3:24:28", "throughput": 19030.42, "total_tokens": 12320256}
|
|
{"current_steps": 3920, "total_steps": 78105, "loss": 0.5085, "lr": 2.508641659198566e-06, "epoch": 0.2509442417258818, "percentage": 5.02, "elapsed_time": "0:10:48", "remaining_time": "3:24:24", "throughput": 19034.26, "total_tokens": 12335360}
|
|
{"current_steps": 3925, "total_steps": 78105, "loss": 0.5237, "lr": 2.5118422737165536e-06, "epoch": 0.2512643236668587, "percentage": 5.03, "elapsed_time": "0:10:48", "remaining_time": "3:24:20", "throughput": 19038.94, "total_tokens": 12351488}
|
|
{"current_steps": 3930, "total_steps": 78105, "loss": 0.6624, "lr": 2.515042888234541e-06, "epoch": 0.2515844056078356, "percentage": 5.03, "elapsed_time": "0:10:49", "remaining_time": "3:24:16", "throughput": 19042.34, "total_tokens": 12365952}
|
|
{"current_steps": 3935, "total_steps": 78105, "loss": 0.5619, "lr": 2.5182435027525286e-06, "epoch": 0.2519044875488125, "percentage": 5.04, "elapsed_time": "0:10:50", "remaining_time": "3:24:12", "throughput": 19046.81, "total_tokens": 12381376}
|
|
{"current_steps": 3940, "total_steps": 78105, "loss": 0.4985, "lr": 2.521444117270516e-06, "epoch": 0.2522245694897894, "percentage": 5.04, "elapsed_time": "0:10:50", "remaining_time": "3:24:08", "throughput": 19050.62, "total_tokens": 12396160}
|
|
{"current_steps": 3945, "total_steps": 78105, "loss": 0.5166, "lr": 2.5246447317885035e-06, "epoch": 0.25254465143076626, "percentage": 5.05, "elapsed_time": "0:10:51", "remaining_time": "3:24:04", "throughput": 19054.3, "total_tokens": 12410944}
|
|
{"current_steps": 3950, "total_steps": 78105, "loss": 0.3987, "lr": 2.527845346306491e-06, "epoch": 0.25286473337174314, "percentage": 5.06, "elapsed_time": "0:10:52", "remaining_time": "3:24:00", "throughput": 19058.38, "total_tokens": 12426304}
|
|
{"current_steps": 3955, "total_steps": 78105, "loss": 0.4964, "lr": 2.5310459608244784e-06, "epoch": 0.2531848153127201, "percentage": 5.06, "elapsed_time": "0:10:52", "remaining_time": "3:23:57", "throughput": 19063.0, "total_tokens": 12442432}
|
|
{"current_steps": 3960, "total_steps": 78105, "loss": 0.523, "lr": 2.534246575342466e-06, "epoch": 0.25350489725369696, "percentage": 5.07, "elapsed_time": "0:10:53", "remaining_time": "3:23:53", "throughput": 19067.28, "total_tokens": 12458368}
|
|
{"current_steps": 3965, "total_steps": 78105, "loss": 0.4316, "lr": 2.5374471898604538e-06, "epoch": 0.25382497919467384, "percentage": 5.08, "elapsed_time": "0:10:54", "remaining_time": "3:23:50", "throughput": 19071.69, "total_tokens": 12474368}
|
|
{"current_steps": 3970, "total_steps": 78105, "loss": 0.5835, "lr": 2.5406478043784412e-06, "epoch": 0.2541450611356507, "percentage": 5.08, "elapsed_time": "0:10:54", "remaining_time": "3:23:46", "throughput": 19075.96, "total_tokens": 12490176}
|
|
{"current_steps": 3975, "total_steps": 78105, "loss": 0.6054, "lr": 2.5438484188964287e-06, "epoch": 0.2544651430766276, "percentage": 5.09, "elapsed_time": "0:10:55", "remaining_time": "3:23:44", "throughput": 19081.43, "total_tokens": 12507840}
|
|
{"current_steps": 3980, "total_steps": 78105, "loss": 0.4305, "lr": 2.547049033414416e-06, "epoch": 0.25478522501760453, "percentage": 5.1, "elapsed_time": "0:10:56", "remaining_time": "3:23:40", "throughput": 19085.48, "total_tokens": 12523520}
|
|
{"current_steps": 3985, "total_steps": 78105, "loss": 0.5948, "lr": 2.5502496479324037e-06, "epoch": 0.2551053069585814, "percentage": 5.1, "elapsed_time": "0:10:56", "remaining_time": "3:23:38", "throughput": 19090.73, "total_tokens": 12540736}
|
|
{"current_steps": 3990, "total_steps": 78105, "loss": 0.5259, "lr": 2.553450262450391e-06, "epoch": 0.2554253888995583, "percentage": 5.11, "elapsed_time": "0:10:57", "remaining_time": "3:23:34", "throughput": 19095.07, "total_tokens": 12556352}
|
|
{"current_steps": 3995, "total_steps": 78105, "loss": 0.3837, "lr": 2.5566508769683777e-06, "epoch": 0.25574547084053517, "percentage": 5.11, "elapsed_time": "0:10:58", "remaining_time": "3:23:30", "throughput": 19098.36, "total_tokens": 12570688}
|
|
{"current_steps": 4000, "total_steps": 78105, "loss": 0.4606, "lr": 2.559851491486365e-06, "epoch": 0.25606555278151205, "percentage": 5.12, "elapsed_time": "0:10:58", "remaining_time": "3:23:26", "throughput": 19102.41, "total_tokens": 12585984}
|
|
{"current_steps": 4005, "total_steps": 78105, "loss": 0.5053, "lr": 2.5630521060043527e-06, "epoch": 0.25638563472248893, "percentage": 5.13, "elapsed_time": "0:10:59", "remaining_time": "3:23:22", "throughput": 19106.45, "total_tokens": 12601216}
|
|
{"current_steps": 4010, "total_steps": 78105, "loss": 0.4721, "lr": 2.56625272052234e-06, "epoch": 0.25670571666346587, "percentage": 5.13, "elapsed_time": "0:11:00", "remaining_time": "3:23:19", "throughput": 19111.81, "total_tokens": 12618624}
|
|
{"current_steps": 4015, "total_steps": 78105, "loss": 0.6074, "lr": 2.5694533350403276e-06, "epoch": 0.25702579860444275, "percentage": 5.14, "elapsed_time": "0:11:00", "remaining_time": "3:23:17", "throughput": 19116.73, "total_tokens": 12635840}
|
|
{"current_steps": 4020, "total_steps": 78105, "loss": 0.4088, "lr": 2.572653949558315e-06, "epoch": 0.2573458805454196, "percentage": 5.15, "elapsed_time": "0:11:01", "remaining_time": "3:23:14", "throughput": 19121.05, "total_tokens": 12651904}
|
|
{"current_steps": 4025, "total_steps": 78105, "loss": 0.599, "lr": 2.5758545640763025e-06, "epoch": 0.2576659624863965, "percentage": 5.15, "elapsed_time": "0:11:02", "remaining_time": "3:23:10", "throughput": 19124.88, "total_tokens": 12666944}
|
|
{"current_steps": 4030, "total_steps": 78105, "loss": 0.4438, "lr": 2.5790551785942904e-06, "epoch": 0.2579860444273734, "percentage": 5.16, "elapsed_time": "0:11:03", "remaining_time": "3:23:06", "throughput": 19128.99, "total_tokens": 12682688}
|
|
{"current_steps": 4035, "total_steps": 78105, "loss": 0.5512, "lr": 2.582255793112278e-06, "epoch": 0.2583061263683503, "percentage": 5.17, "elapsed_time": "0:11:03", "remaining_time": "3:23:03", "throughput": 19133.2, "total_tokens": 12698304}
|
|
{"current_steps": 4040, "total_steps": 78105, "loss": 0.5183, "lr": 2.5854564076302654e-06, "epoch": 0.2586262083093272, "percentage": 5.17, "elapsed_time": "0:11:04", "remaining_time": "3:22:59", "throughput": 19136.55, "total_tokens": 12712896}
|
|
{"current_steps": 4045, "total_steps": 78105, "loss": 0.3824, "lr": 2.588657022148253e-06, "epoch": 0.2589462902503041, "percentage": 5.18, "elapsed_time": "0:11:04", "remaining_time": "3:22:54", "throughput": 19139.92, "total_tokens": 12727488}
|
|
{"current_steps": 4050, "total_steps": 78105, "loss": 0.3892, "lr": 2.5918576366662403e-06, "epoch": 0.25926637219128096, "percentage": 5.19, "elapsed_time": "0:11:05", "remaining_time": "3:22:51", "throughput": 19143.74, "total_tokens": 12742720}
|
|
{"current_steps": 4055, "total_steps": 78105, "loss": 0.4508, "lr": 2.5950582511842278e-06, "epoch": 0.25958645413225784, "percentage": 5.19, "elapsed_time": "0:11:06", "remaining_time": "3:22:47", "throughput": 19147.74, "total_tokens": 12757760}
|
|
{"current_steps": 4060, "total_steps": 78105, "loss": 0.5575, "lr": 2.5982588657022152e-06, "epoch": 0.2599065360732348, "percentage": 5.2, "elapsed_time": "0:11:06", "remaining_time": "3:22:43", "throughput": 19151.84, "total_tokens": 12773312}
|
|
{"current_steps": 4065, "total_steps": 78105, "loss": 0.6196, "lr": 2.6014594802202027e-06, "epoch": 0.26022661801421165, "percentage": 5.2, "elapsed_time": "0:11:07", "remaining_time": "3:22:40", "throughput": 19156.93, "total_tokens": 12790336}
|
|
{"current_steps": 4070, "total_steps": 78105, "loss": 0.4902, "lr": 2.60466009473819e-06, "epoch": 0.26054669995518853, "percentage": 5.21, "elapsed_time": "0:11:08", "remaining_time": "3:22:37", "throughput": 19160.43, "total_tokens": 12805632}
|
|
{"current_steps": 4075, "total_steps": 78105, "loss": 0.5747, "lr": 2.6078607092561776e-06, "epoch": 0.2608667818961654, "percentage": 5.22, "elapsed_time": "0:11:09", "remaining_time": "3:22:33", "throughput": 19164.47, "total_tokens": 12821184}
|
|
{"current_steps": 4080, "total_steps": 78105, "loss": 0.5308, "lr": 2.611061323774165e-06, "epoch": 0.2611868638371423, "percentage": 5.22, "elapsed_time": "0:11:09", "remaining_time": "3:22:30", "throughput": 19168.18, "total_tokens": 12836544}
|
|
{"current_steps": 4085, "total_steps": 78105, "loss": 0.5001, "lr": 2.6142619382921526e-06, "epoch": 0.26150694577811917, "percentage": 5.23, "elapsed_time": "0:11:10", "remaining_time": "3:22:26", "throughput": 19172.05, "total_tokens": 12852032}
|
|
{"current_steps": 4090, "total_steps": 78105, "loss": 0.4556, "lr": 2.61746255281014e-06, "epoch": 0.2618270277190961, "percentage": 5.24, "elapsed_time": "0:11:11", "remaining_time": "3:22:23", "throughput": 19175.87, "total_tokens": 12867456}
|
|
{"current_steps": 4095, "total_steps": 78105, "loss": 0.3812, "lr": 2.6206631673281275e-06, "epoch": 0.262147109660073, "percentage": 5.24, "elapsed_time": "0:11:11", "remaining_time": "3:22:20", "throughput": 19181.45, "total_tokens": 12885184}
|
|
{"current_steps": 4100, "total_steps": 78105, "loss": 0.4032, "lr": 2.623863781846115e-06, "epoch": 0.26246719160104987, "percentage": 5.25, "elapsed_time": "0:11:12", "remaining_time": "3:22:16", "throughput": 19185.4, "total_tokens": 12900416}
|
|
{"current_steps": 4105, "total_steps": 78105, "loss": 0.6657, "lr": 2.6270643963641024e-06, "epoch": 0.26278727354202674, "percentage": 5.26, "elapsed_time": "0:11:13", "remaining_time": "3:22:13", "throughput": 19189.26, "total_tokens": 12915648}
|
|
{"current_steps": 4110, "total_steps": 78105, "loss": 0.4769, "lr": 2.63026501088209e-06, "epoch": 0.2631073554830036, "percentage": 5.26, "elapsed_time": "0:11:13", "remaining_time": "3:22:09", "throughput": 19192.8, "total_tokens": 12930368}
|
|
{"current_steps": 4115, "total_steps": 78105, "loss": 0.5281, "lr": 2.633465625400077e-06, "epoch": 0.26342743742398056, "percentage": 5.27, "elapsed_time": "0:11:14", "remaining_time": "3:22:05", "throughput": 19196.66, "total_tokens": 12945472}
|
|
{"current_steps": 4120, "total_steps": 78105, "loss": 0.5905, "lr": 2.6366662399180644e-06, "epoch": 0.26374751936495744, "percentage": 5.27, "elapsed_time": "0:11:15", "remaining_time": "3:22:02", "throughput": 19200.65, "total_tokens": 12961472}
|
|
{"current_steps": 4125, "total_steps": 78105, "loss": 0.6382, "lr": 2.639866854436052e-06, "epoch": 0.2640676013059343, "percentage": 5.28, "elapsed_time": "0:11:15", "remaining_time": "3:21:58", "throughput": 19203.93, "total_tokens": 12975872}
|
|
{"current_steps": 4130, "total_steps": 78105, "loss": 0.5455, "lr": 2.6430674689540394e-06, "epoch": 0.2643876832469112, "percentage": 5.29, "elapsed_time": "0:11:16", "remaining_time": "3:21:55", "throughput": 19208.31, "total_tokens": 12992128}
|
|
{"current_steps": 4135, "total_steps": 78105, "loss": 0.4478, "lr": 2.646268083472027e-06, "epoch": 0.2647077651878881, "percentage": 5.29, "elapsed_time": "0:11:17", "remaining_time": "3:22:00", "throughput": 19217.74, "total_tokens": 13021184}
|
|
{"current_steps": 4140, "total_steps": 78105, "loss": 0.4527, "lr": 2.6494686979900143e-06, "epoch": 0.265027847128865, "percentage": 5.3, "elapsed_time": "0:11:18", "remaining_time": "3:21:57", "throughput": 19222.13, "total_tokens": 13037568}
|
|
{"current_steps": 4145, "total_steps": 78105, "loss": 0.5069, "lr": 2.6526693125080018e-06, "epoch": 0.2653479290698419, "percentage": 5.31, "elapsed_time": "0:11:18", "remaining_time": "3:21:54", "throughput": 19226.19, "total_tokens": 13053440}
|
|
{"current_steps": 4150, "total_steps": 78105, "loss": 0.6118, "lr": 2.6558699270259892e-06, "epoch": 0.26566801101081877, "percentage": 5.31, "elapsed_time": "0:11:19", "remaining_time": "3:21:51", "throughput": 19230.33, "total_tokens": 13069440}
|
|
{"current_steps": 4155, "total_steps": 78105, "loss": 0.5617, "lr": 2.6590705415439767e-06, "epoch": 0.26598809295179565, "percentage": 5.32, "elapsed_time": "0:11:20", "remaining_time": "3:21:47", "throughput": 19233.72, "total_tokens": 13084224}
|
|
{"current_steps": 4160, "total_steps": 78105, "loss": 0.7227, "lr": 2.662271156061964e-06, "epoch": 0.26630817489277253, "percentage": 5.33, "elapsed_time": "0:11:20", "remaining_time": "3:21:43", "throughput": 19237.01, "total_tokens": 13098688}
|
|
{"current_steps": 4165, "total_steps": 78105, "loss": 0.4641, "lr": 2.6654717705799516e-06, "epoch": 0.26662825683374947, "percentage": 5.33, "elapsed_time": "0:11:21", "remaining_time": "3:21:39", "throughput": 19240.66, "total_tokens": 13114112}
|
|
{"current_steps": 4170, "total_steps": 78105, "loss": 0.4726, "lr": 2.668672385097939e-06, "epoch": 0.26694833877472635, "percentage": 5.34, "elapsed_time": "0:11:22", "remaining_time": "3:21:36", "throughput": 19245.0, "total_tokens": 13130240}
|
|
{"current_steps": 4175, "total_steps": 78105, "loss": 0.4111, "lr": 2.6718729996159266e-06, "epoch": 0.2672684207157032, "percentage": 5.35, "elapsed_time": "0:11:22", "remaining_time": "3:21:33", "throughput": 19248.71, "total_tokens": 13145536}
|
|
{"current_steps": 4180, "total_steps": 78105, "loss": 0.6038, "lr": 2.675073614133914e-06, "epoch": 0.2675885026566801, "percentage": 5.35, "elapsed_time": "0:11:23", "remaining_time": "3:21:29", "throughput": 19252.2, "total_tokens": 13160256}
|
|
{"current_steps": 4185, "total_steps": 78105, "loss": 0.5761, "lr": 2.6782742286519015e-06, "epoch": 0.267908584597657, "percentage": 5.36, "elapsed_time": "0:11:24", "remaining_time": "3:21:26", "throughput": 19256.92, "total_tokens": 13177088}
|
|
{"current_steps": 4190, "total_steps": 78105, "loss": 0.5292, "lr": 2.681474843169889e-06, "epoch": 0.26822866653863386, "percentage": 5.36, "elapsed_time": "0:11:24", "remaining_time": "3:21:23", "throughput": 19260.92, "total_tokens": 13192768}
|
|
{"current_steps": 4195, "total_steps": 78105, "loss": 0.5566, "lr": 2.6846754576878764e-06, "epoch": 0.2685487484796108, "percentage": 5.37, "elapsed_time": "0:11:25", "remaining_time": "3:21:19", "throughput": 19264.62, "total_tokens": 13208192}
|
|
{"current_steps": 4200, "total_steps": 78105, "loss": 0.603, "lr": 2.687876072205864e-06, "epoch": 0.2688688304205877, "percentage": 5.38, "elapsed_time": "0:11:26", "remaining_time": "3:21:16", "throughput": 19268.58, "total_tokens": 13223872}
|
|
{"current_steps": 4205, "total_steps": 78105, "loss": 0.5049, "lr": 2.6910766867238514e-06, "epoch": 0.26918891236156456, "percentage": 5.38, "elapsed_time": "0:11:26", "remaining_time": "3:21:12", "throughput": 19272.07, "total_tokens": 13239104}
|
|
{"current_steps": 4210, "total_steps": 78105, "loss": 0.3918, "lr": 2.694277301241839e-06, "epoch": 0.26950899430254144, "percentage": 5.39, "elapsed_time": "0:11:27", "remaining_time": "3:21:09", "throughput": 19275.94, "total_tokens": 13254464}
|
|
{"current_steps": 4215, "total_steps": 78105, "loss": 0.3768, "lr": 2.6974779157598263e-06, "epoch": 0.2698290762435183, "percentage": 5.4, "elapsed_time": "0:11:28", "remaining_time": "3:21:05", "throughput": 19279.69, "total_tokens": 13269824}
|
|
{"current_steps": 4220, "total_steps": 78105, "loss": 0.4682, "lr": 2.7006785302778138e-06, "epoch": 0.27014915818449525, "percentage": 5.4, "elapsed_time": "0:11:29", "remaining_time": "3:21:03", "throughput": 19285.38, "total_tokens": 13288448}
|
|
{"current_steps": 4225, "total_steps": 78105, "loss": 0.5645, "lr": 2.7038791447958012e-06, "epoch": 0.27046924012547213, "percentage": 5.41, "elapsed_time": "0:11:29", "remaining_time": "3:21:00", "throughput": 19289.27, "total_tokens": 13304320}
|
|
{"current_steps": 4230, "total_steps": 78105, "loss": 0.6966, "lr": 2.7070797593137883e-06, "epoch": 0.270789322066449, "percentage": 5.42, "elapsed_time": "0:11:30", "remaining_time": "3:20:58", "throughput": 19294.44, "total_tokens": 13321920}
|
|
{"current_steps": 4235, "total_steps": 78105, "loss": 0.4596, "lr": 2.7102803738317757e-06, "epoch": 0.2711094040074259, "percentage": 5.42, "elapsed_time": "0:11:31", "remaining_time": "3:20:55", "throughput": 19298.04, "total_tokens": 13337280}
|
|
{"current_steps": 4240, "total_steps": 78105, "loss": 0.4459, "lr": 2.713480988349763e-06, "epoch": 0.27142948594840277, "percentage": 5.43, "elapsed_time": "0:11:31", "remaining_time": "3:20:52", "throughput": 19302.42, "total_tokens": 13354112}
|
|
{"current_steps": 4245, "total_steps": 78105, "loss": 0.4562, "lr": 2.7166816028677507e-06, "epoch": 0.2717495678893797, "percentage": 5.43, "elapsed_time": "0:11:32", "remaining_time": "3:20:49", "throughput": 19305.86, "total_tokens": 13369600}
|
|
{"current_steps": 4250, "total_steps": 78105, "loss": 0.4506, "lr": 2.719882217385738e-06, "epoch": 0.2720696498303566, "percentage": 5.44, "elapsed_time": "0:11:33", "remaining_time": "3:20:46", "throughput": 19309.58, "total_tokens": 13385536}
|
|
{"current_steps": 4255, "total_steps": 78105, "loss": 0.4474, "lr": 2.7230828319037256e-06, "epoch": 0.27238973177133347, "percentage": 5.45, "elapsed_time": "0:11:33", "remaining_time": "3:20:42", "throughput": 19313.14, "total_tokens": 13400832}
|
|
{"current_steps": 4260, "total_steps": 78105, "loss": 0.3514, "lr": 2.726283446421713e-06, "epoch": 0.27270981371231034, "percentage": 5.45, "elapsed_time": "0:11:34", "remaining_time": "3:20:39", "throughput": 19316.65, "total_tokens": 13416000}
|
|
{"current_steps": 4265, "total_steps": 78105, "loss": 0.5444, "lr": 2.7294840609397005e-06, "epoch": 0.2730298956532872, "percentage": 5.46, "elapsed_time": "0:11:35", "remaining_time": "3:20:35", "throughput": 19320.31, "total_tokens": 13431360}
|
|
{"current_steps": 4270, "total_steps": 78105, "loss": 0.5426, "lr": 2.732684675457688e-06, "epoch": 0.2733499775942641, "percentage": 5.47, "elapsed_time": "0:11:35", "remaining_time": "3:20:32", "throughput": 19324.06, "total_tokens": 13446976}
|
|
{"current_steps": 4275, "total_steps": 78105, "loss": 0.4505, "lr": 2.7358852899756755e-06, "epoch": 0.27367005953524104, "percentage": 5.47, "elapsed_time": "0:11:36", "remaining_time": "3:20:29", "throughput": 19328.03, "total_tokens": 13463040}
|
|
{"current_steps": 4280, "total_steps": 78105, "loss": 0.7821, "lr": 2.739085904493663e-06, "epoch": 0.2739901414762179, "percentage": 5.48, "elapsed_time": "0:11:37", "remaining_time": "3:20:26", "throughput": 19331.7, "total_tokens": 13478656}
|
|
{"current_steps": 4285, "total_steps": 78105, "loss": 0.5213, "lr": 2.7422865190116504e-06, "epoch": 0.2743102234171948, "percentage": 5.49, "elapsed_time": "0:11:37", "remaining_time": "3:20:23", "throughput": 19335.72, "total_tokens": 13495296}
|
|
{"current_steps": 4290, "total_steps": 78105, "loss": 0.4987, "lr": 2.745487133529638e-06, "epoch": 0.2746303053581717, "percentage": 5.49, "elapsed_time": "0:11:38", "remaining_time": "3:20:20", "throughput": 19339.29, "total_tokens": 13510912}
|
|
{"current_steps": 4295, "total_steps": 78105, "loss": 0.4589, "lr": 2.7486877480476253e-06, "epoch": 0.27495038729914856, "percentage": 5.5, "elapsed_time": "0:11:39", "remaining_time": "3:20:17", "throughput": 19342.52, "total_tokens": 13525952}
|
|
{"current_steps": 4300, "total_steps": 78105, "loss": 0.5413, "lr": 2.751888362565613e-06, "epoch": 0.2752704692401255, "percentage": 5.51, "elapsed_time": "0:11:40", "remaining_time": "3:20:16", "throughput": 19349.13, "total_tokens": 13546752}
|
|
{"current_steps": 4305, "total_steps": 78105, "loss": 0.5601, "lr": 2.7550889770836003e-06, "epoch": 0.2755905511811024, "percentage": 5.51, "elapsed_time": "0:11:40", "remaining_time": "3:20:13", "throughput": 19352.35, "total_tokens": 13561728}
|
|
{"current_steps": 4310, "total_steps": 78105, "loss": 0.4906, "lr": 2.7582895916015877e-06, "epoch": 0.27591063312207925, "percentage": 5.52, "elapsed_time": "0:11:41", "remaining_time": "3:20:10", "throughput": 19355.77, "total_tokens": 13577344}
|
|
{"current_steps": 4315, "total_steps": 78105, "loss": 0.5657, "lr": 2.761490206119575e-06, "epoch": 0.27623071506305613, "percentage": 5.52, "elapsed_time": "0:11:42", "remaining_time": "3:20:06", "throughput": 19358.75, "total_tokens": 13592256}
|
|
{"current_steps": 4320, "total_steps": 78105, "loss": 0.5641, "lr": 2.7646908206375627e-06, "epoch": 0.276550797004033, "percentage": 5.53, "elapsed_time": "0:11:42", "remaining_time": "3:20:04", "throughput": 19362.93, "total_tokens": 13608832}
|
|
{"current_steps": 4325, "total_steps": 78105, "loss": 0.5632, "lr": 2.76789143515555e-06, "epoch": 0.27687087894500995, "percentage": 5.54, "elapsed_time": "0:11:43", "remaining_time": "3:20:01", "throughput": 19366.15, "total_tokens": 13624128}
|
|
{"current_steps": 4330, "total_steps": 78105, "loss": 0.5651, "lr": 2.7710920496735376e-06, "epoch": 0.2771909608859868, "percentage": 5.54, "elapsed_time": "0:11:44", "remaining_time": "3:19:58", "throughput": 19370.05, "total_tokens": 13640576}
|
|
{"current_steps": 4335, "total_steps": 78105, "loss": 0.5601, "lr": 2.774292664191525e-06, "epoch": 0.2775110428269637, "percentage": 5.55, "elapsed_time": "0:11:44", "remaining_time": "3:19:55", "throughput": 19373.26, "total_tokens": 13655872}
|
|
{"current_steps": 4340, "total_steps": 78105, "loss": 0.7039, "lr": 2.7774932787095125e-06, "epoch": 0.2778311247679406, "percentage": 5.56, "elapsed_time": "0:11:45", "remaining_time": "3:19:52", "throughput": 19377.2, "total_tokens": 13672384}
|
|
{"current_steps": 4345, "total_steps": 78105, "loss": 0.4537, "lr": 2.7806938932275e-06, "epoch": 0.27815120670891746, "percentage": 5.56, "elapsed_time": "0:11:46", "remaining_time": "3:19:49", "throughput": 19380.28, "total_tokens": 13687296}
|
|
{"current_steps": 4350, "total_steps": 78105, "loss": 0.5639, "lr": 2.783894507745487e-06, "epoch": 0.2784712886498944, "percentage": 5.57, "elapsed_time": "0:11:46", "remaining_time": "3:19:45", "throughput": 19383.55, "total_tokens": 13702528}
|
|
{"current_steps": 4355, "total_steps": 78105, "loss": 0.4969, "lr": 2.7870951222634745e-06, "epoch": 0.2787913705908713, "percentage": 5.58, "elapsed_time": "0:11:47", "remaining_time": "3:19:42", "throughput": 19386.67, "total_tokens": 13717568}
|
|
{"current_steps": 4360, "total_steps": 78105, "loss": 0.4192, "lr": 2.790295736781462e-06, "epoch": 0.27911145253184816, "percentage": 5.58, "elapsed_time": "0:11:48", "remaining_time": "3:19:39", "throughput": 19390.39, "total_tokens": 13733568}
|
|
{"current_steps": 4365, "total_steps": 78105, "loss": 0.406, "lr": 2.7934963512994495e-06, "epoch": 0.27943153447282504, "percentage": 5.59, "elapsed_time": "0:11:48", "remaining_time": "3:19:36", "throughput": 19393.97, "total_tokens": 13749312}
|
|
{"current_steps": 4370, "total_steps": 78105, "loss": 0.5638, "lr": 2.796696965817437e-06, "epoch": 0.2797516164138019, "percentage": 5.6, "elapsed_time": "0:11:49", "remaining_time": "3:19:34", "throughput": 19397.92, "total_tokens": 13765952}
|
|
{"current_steps": 4375, "total_steps": 78105, "loss": 0.555, "lr": 2.7998975803354244e-06, "epoch": 0.2800716983547788, "percentage": 5.6, "elapsed_time": "0:11:50", "remaining_time": "3:19:31", "throughput": 19401.09, "total_tokens": 13781440}
|
|
{"current_steps": 4380, "total_steps": 78105, "loss": 0.4757, "lr": 2.803098194853412e-06, "epoch": 0.28039178029575573, "percentage": 5.61, "elapsed_time": "0:11:51", "remaining_time": "3:19:28", "throughput": 19404.31, "total_tokens": 13796864}
|
|
{"current_steps": 4385, "total_steps": 78105, "loss": 0.5962, "lr": 2.8062988093713993e-06, "epoch": 0.2807118622367326, "percentage": 5.61, "elapsed_time": "0:11:51", "remaining_time": "3:19:25", "throughput": 19407.9, "total_tokens": 13812736}
|
|
{"current_steps": 4390, "total_steps": 78105, "loss": 0.4975, "lr": 2.809499423889387e-06, "epoch": 0.2810319441777095, "percentage": 5.62, "elapsed_time": "0:11:52", "remaining_time": "3:19:22", "throughput": 19411.31, "total_tokens": 13828288}
|
|
{"current_steps": 4395, "total_steps": 78105, "loss": 0.491, "lr": 2.8127000384073743e-06, "epoch": 0.28135202611868637, "percentage": 5.63, "elapsed_time": "0:11:53", "remaining_time": "3:19:19", "throughput": 19415.82, "total_tokens": 13845568}
|
|
{"current_steps": 4400, "total_steps": 78105, "loss": 0.4427, "lr": 2.8159006529253617e-06, "epoch": 0.28167210805966325, "percentage": 5.63, "elapsed_time": "0:11:53", "remaining_time": "3:19:16", "throughput": 19419.04, "total_tokens": 13860608}
|
|
{"current_steps": 4405, "total_steps": 78105, "loss": 0.5652, "lr": 2.819101267443349e-06, "epoch": 0.2819921900006402, "percentage": 5.64, "elapsed_time": "0:11:54", "remaining_time": "3:19:13", "throughput": 19422.65, "total_tokens": 13876608}
|
|
{"current_steps": 4410, "total_steps": 78105, "loss": 0.5074, "lr": 2.8223018819613367e-06, "epoch": 0.28231227194161707, "percentage": 5.65, "elapsed_time": "0:11:55", "remaining_time": "3:19:11", "throughput": 19426.74, "total_tokens": 13893632}
|
|
{"current_steps": 4415, "total_steps": 78105, "loss": 0.4899, "lr": 2.825502496479324e-06, "epoch": 0.28263235388259395, "percentage": 5.65, "elapsed_time": "0:11:55", "remaining_time": "3:19:08", "throughput": 19430.0, "total_tokens": 13909312}
|
|
{"current_steps": 4420, "total_steps": 78105, "loss": 0.6169, "lr": 2.8287031109973116e-06, "epoch": 0.2829524358235708, "percentage": 5.66, "elapsed_time": "0:11:56", "remaining_time": "3:19:05", "throughput": 19433.75, "total_tokens": 13925824}
|
|
{"current_steps": 4425, "total_steps": 78105, "loss": 0.5673, "lr": 2.831903725515299e-06, "epoch": 0.2832725177645477, "percentage": 5.67, "elapsed_time": "0:11:57", "remaining_time": "3:19:03", "throughput": 19437.81, "total_tokens": 13942336}
|
|
{"current_steps": 4430, "total_steps": 78105, "loss": 0.4293, "lr": 2.8351043400332865e-06, "epoch": 0.28359259970552464, "percentage": 5.67, "elapsed_time": "0:11:57", "remaining_time": "3:19:00", "throughput": 19441.04, "total_tokens": 13957760}
|
|
{"current_steps": 4435, "total_steps": 78105, "loss": 0.5372, "lr": 2.838304954551274e-06, "epoch": 0.2839126816465015, "percentage": 5.68, "elapsed_time": "0:11:58", "remaining_time": "3:18:56", "throughput": 19444.27, "total_tokens": 13972992}
|
|
{"current_steps": 4440, "total_steps": 78105, "loss": 0.4939, "lr": 2.8415055690692615e-06, "epoch": 0.2842327635874784, "percentage": 5.68, "elapsed_time": "0:11:59", "remaining_time": "3:18:54", "throughput": 19447.72, "total_tokens": 13988736}
|
|
{"current_steps": 4445, "total_steps": 78105, "loss": 0.6253, "lr": 2.8447061835872494e-06, "epoch": 0.2845528455284553, "percentage": 5.69, "elapsed_time": "0:11:59", "remaining_time": "3:18:50", "throughput": 19450.39, "total_tokens": 14003520}
|
|
{"current_steps": 4450, "total_steps": 78105, "loss": 0.5701, "lr": 2.847906798105237e-06, "epoch": 0.28487292746943216, "percentage": 5.7, "elapsed_time": "0:12:00", "remaining_time": "3:18:48", "throughput": 19454.59, "total_tokens": 14020608}
|
|
{"current_steps": 4455, "total_steps": 78105, "loss": 0.4201, "lr": 2.8511074126232243e-06, "epoch": 0.28519300941040904, "percentage": 5.7, "elapsed_time": "0:12:01", "remaining_time": "3:18:45", "throughput": 19457.43, "total_tokens": 14035328}
|
|
{"current_steps": 4460, "total_steps": 78105, "loss": 0.484, "lr": 2.8543080271412118e-06, "epoch": 0.285513091351386, "percentage": 5.71, "elapsed_time": "0:12:02", "remaining_time": "3:18:42", "throughput": 19460.55, "total_tokens": 14050816}
|
|
{"current_steps": 4465, "total_steps": 78105, "loss": 0.3641, "lr": 2.8575086416591992e-06, "epoch": 0.28583317329236285, "percentage": 5.72, "elapsed_time": "0:12:02", "remaining_time": "3:18:39", "throughput": 19464.16, "total_tokens": 14066880}
|
|
{"current_steps": 4470, "total_steps": 78105, "loss": 0.5482, "lr": 2.860709256177186e-06, "epoch": 0.28615325523333973, "percentage": 5.72, "elapsed_time": "0:12:03", "remaining_time": "3:18:35", "throughput": 19467.18, "total_tokens": 14081728}
|
|
{"current_steps": 4475, "total_steps": 78105, "loss": 0.5251, "lr": 2.8639098706951733e-06, "epoch": 0.2864733371743166, "percentage": 5.73, "elapsed_time": "0:12:04", "remaining_time": "3:18:33", "throughput": 19471.06, "total_tokens": 14097984}
|
|
{"current_steps": 4480, "total_steps": 78105, "loss": 0.4527, "lr": 2.8671104852131608e-06, "epoch": 0.2867934191152935, "percentage": 5.74, "elapsed_time": "0:12:04", "remaining_time": "3:18:30", "throughput": 19475.52, "total_tokens": 14115264}
|
|
{"current_steps": 4485, "total_steps": 78105, "loss": 0.3157, "lr": 2.8703110997311482e-06, "epoch": 0.2871135010562704, "percentage": 5.74, "elapsed_time": "0:12:05", "remaining_time": "3:18:27", "throughput": 19478.07, "total_tokens": 14129472}
|
|
{"current_steps": 4490, "total_steps": 78105, "loss": 0.63, "lr": 2.8735117142491357e-06, "epoch": 0.2874335829972473, "percentage": 5.75, "elapsed_time": "0:12:06", "remaining_time": "3:18:24", "throughput": 19481.39, "total_tokens": 14144896}
|
|
{"current_steps": 4495, "total_steps": 78105, "loss": 0.3895, "lr": 2.876712328767123e-06, "epoch": 0.2877536649382242, "percentage": 5.76, "elapsed_time": "0:12:06", "remaining_time": "3:18:20", "throughput": 19484.32, "total_tokens": 14159744}
|
|
{"current_steps": 4500, "total_steps": 78105, "loss": 0.4867, "lr": 2.8799129432851106e-06, "epoch": 0.28807374687920106, "percentage": 5.76, "elapsed_time": "0:12:07", "remaining_time": "3:18:18", "throughput": 19488.44, "total_tokens": 14176384}
|
|
{"current_steps": 4505, "total_steps": 78105, "loss": 0.5708, "lr": 2.883113557803098e-06, "epoch": 0.28839382882017794, "percentage": 5.77, "elapsed_time": "0:12:08", "remaining_time": "3:18:15", "throughput": 19491.96, "total_tokens": 14192320}
|
|
{"current_steps": 4510, "total_steps": 78105, "loss": 0.4018, "lr": 2.886314172321086e-06, "epoch": 0.2887139107611549, "percentage": 5.77, "elapsed_time": "0:12:08", "remaining_time": "3:18:12", "throughput": 19495.33, "total_tokens": 14208192}
|
|
{"current_steps": 4515, "total_steps": 78105, "loss": 0.6319, "lr": 2.8895147868390735e-06, "epoch": 0.28903399270213176, "percentage": 5.78, "elapsed_time": "0:12:09", "remaining_time": "3:18:09", "throughput": 19498.3, "total_tokens": 14223104}
|
|
{"current_steps": 4520, "total_steps": 78105, "loss": 0.4392, "lr": 2.892715401357061e-06, "epoch": 0.28935407464310864, "percentage": 5.79, "elapsed_time": "0:12:10", "remaining_time": "3:18:06", "throughput": 19501.62, "total_tokens": 14238464}
|
|
{"current_steps": 4525, "total_steps": 78105, "loss": 0.4296, "lr": 2.8959160158750484e-06, "epoch": 0.2896741565840855, "percentage": 5.79, "elapsed_time": "0:12:10", "remaining_time": "3:18:03", "throughput": 19505.11, "total_tokens": 14254656}
|
|
{"current_steps": 4530, "total_steps": 78105, "loss": 0.4133, "lr": 2.899116630393036e-06, "epoch": 0.2899942385250624, "percentage": 5.8, "elapsed_time": "0:12:11", "remaining_time": "3:18:00", "throughput": 19507.63, "total_tokens": 14268928}
|
|
{"current_steps": 4535, "total_steps": 78105, "loss": 0.5015, "lr": 2.9023172449110233e-06, "epoch": 0.29031432046603933, "percentage": 5.81, "elapsed_time": "0:12:12", "remaining_time": "3:17:57", "throughput": 19511.56, "total_tokens": 14285952}
|
|
{"current_steps": 4540, "total_steps": 78105, "loss": 0.4114, "lr": 2.905517859429011e-06, "epoch": 0.2906344024070162, "percentage": 5.81, "elapsed_time": "0:12:12", "remaining_time": "3:17:55", "throughput": 19515.07, "total_tokens": 14301760}
|
|
{"current_steps": 4545, "total_steps": 78105, "loss": 0.5292, "lr": 2.9087184739469983e-06, "epoch": 0.2909544843479931, "percentage": 5.82, "elapsed_time": "0:12:13", "remaining_time": "3:17:52", "throughput": 19518.7, "total_tokens": 14318208}
|
|
{"current_steps": 4550, "total_steps": 78105, "loss": 0.4741, "lr": 2.9119190884649857e-06, "epoch": 0.29127456628896997, "percentage": 5.83, "elapsed_time": "0:12:14", "remaining_time": "3:17:49", "throughput": 19522.15, "total_tokens": 14334144}
|
|
{"current_steps": 4555, "total_steps": 78105, "loss": 0.6629, "lr": 2.915119702982973e-06, "epoch": 0.29159464822994685, "percentage": 5.83, "elapsed_time": "0:12:14", "remaining_time": "3:17:46", "throughput": 19524.95, "total_tokens": 14349120}
|
|
{"current_steps": 4560, "total_steps": 78105, "loss": 0.5884, "lr": 2.9183203175009607e-06, "epoch": 0.29191473017092373, "percentage": 5.84, "elapsed_time": "0:12:15", "remaining_time": "3:17:44", "throughput": 19528.61, "total_tokens": 14365376}
|
|
{"current_steps": 4565, "total_steps": 78105, "loss": 0.5375, "lr": 2.921520932018948e-06, "epoch": 0.29223481211190067, "percentage": 5.84, "elapsed_time": "0:12:16", "remaining_time": "3:17:41", "throughput": 19532.11, "total_tokens": 14381568}
|
|
{"current_steps": 4570, "total_steps": 78105, "loss": 0.4778, "lr": 2.9247215465369356e-06, "epoch": 0.29255489405287755, "percentage": 5.85, "elapsed_time": "0:12:16", "remaining_time": "3:17:38", "throughput": 19535.3, "total_tokens": 14397312}
|
|
{"current_steps": 4575, "total_steps": 78105, "loss": 0.4894, "lr": 2.927922161054923e-06, "epoch": 0.2928749759938544, "percentage": 5.86, "elapsed_time": "0:12:17", "remaining_time": "3:17:35", "throughput": 19538.62, "total_tokens": 14412928}
|
|
{"current_steps": 4580, "total_steps": 78105, "loss": 0.5225, "lr": 2.9311227755729105e-06, "epoch": 0.2931950579348313, "percentage": 5.86, "elapsed_time": "0:12:18", "remaining_time": "3:17:32", "throughput": 19541.47, "total_tokens": 14427968}
|
|
{"current_steps": 4585, "total_steps": 78105, "loss": 0.4422, "lr": 2.934323390090898e-06, "epoch": 0.2935151398758082, "percentage": 5.87, "elapsed_time": "0:12:18", "remaining_time": "3:17:29", "throughput": 19544.43, "total_tokens": 14442944}
|
|
{"current_steps": 4590, "total_steps": 78105, "loss": 0.5032, "lr": 2.937524004608885e-06, "epoch": 0.2938352218167851, "percentage": 5.88, "elapsed_time": "0:12:19", "remaining_time": "3:17:26", "throughput": 19547.71, "total_tokens": 14458176}
|
|
{"current_steps": 4595, "total_steps": 78105, "loss": 0.4244, "lr": 2.9407246191268725e-06, "epoch": 0.294155303757762, "percentage": 5.88, "elapsed_time": "0:12:20", "remaining_time": "3:17:23", "throughput": 19550.8, "total_tokens": 14473600}
|
|
{"current_steps": 4600, "total_steps": 78105, "loss": 0.6711, "lr": 2.94392523364486e-06, "epoch": 0.2944753856987389, "percentage": 5.89, "elapsed_time": "0:12:20", "remaining_time": "3:17:20", "throughput": 19553.83, "total_tokens": 14488896}
|
|
{"current_steps": 4605, "total_steps": 78105, "loss": 0.6182, "lr": 2.9471258481628475e-06, "epoch": 0.29479546763971576, "percentage": 5.9, "elapsed_time": "0:12:21", "remaining_time": "3:17:16", "throughput": 19556.04, "total_tokens": 14502784}
|
|
{"current_steps": 4610, "total_steps": 78105, "loss": 0.4884, "lr": 2.950326462680835e-06, "epoch": 0.29511554958069264, "percentage": 5.9, "elapsed_time": "0:12:22", "remaining_time": "3:17:14", "throughput": 19560.33, "total_tokens": 14520000}
|
|
{"current_steps": 4615, "total_steps": 78105, "loss": 0.3833, "lr": 2.9535270771988224e-06, "epoch": 0.2954356315216696, "percentage": 5.91, "elapsed_time": "0:12:23", "remaining_time": "3:17:11", "throughput": 19563.9, "total_tokens": 14536320}
|
|
{"current_steps": 4620, "total_steps": 78105, "loss": 0.4793, "lr": 2.95672769171681e-06, "epoch": 0.29575571346264645, "percentage": 5.92, "elapsed_time": "0:12:23", "remaining_time": "3:17:08", "throughput": 19567.14, "total_tokens": 14551808}
|
|
{"current_steps": 4625, "total_steps": 78105, "loss": 0.3634, "lr": 2.9599283062347973e-06, "epoch": 0.29607579540362333, "percentage": 5.92, "elapsed_time": "0:12:24", "remaining_time": "3:17:05", "throughput": 19570.14, "total_tokens": 14566976}
|
|
{"current_steps": 4630, "total_steps": 78105, "loss": 0.3677, "lr": 2.963128920752785e-06, "epoch": 0.2963958773446002, "percentage": 5.93, "elapsed_time": "0:12:25", "remaining_time": "3:17:03", "throughput": 19573.6, "total_tokens": 14583104}
|
|
{"current_steps": 4635, "total_steps": 78105, "loss": 0.4969, "lr": 2.9663295352707723e-06, "epoch": 0.2967159592855771, "percentage": 5.93, "elapsed_time": "0:12:25", "remaining_time": "3:17:00", "throughput": 19576.76, "total_tokens": 14598336}
|
|
{"current_steps": 4640, "total_steps": 78105, "loss": 0.5815, "lr": 2.9695301497887597e-06, "epoch": 0.29703604122655397, "percentage": 5.94, "elapsed_time": "0:12:26", "remaining_time": "3:16:57", "throughput": 19580.81, "total_tokens": 14615232}
|
|
{"current_steps": 4645, "total_steps": 78105, "loss": 0.4829, "lr": 2.972730764306747e-06, "epoch": 0.2973561231675309, "percentage": 5.95, "elapsed_time": "0:12:27", "remaining_time": "3:16:55", "throughput": 19583.89, "total_tokens": 14630848}
|
|
{"current_steps": 4650, "total_steps": 78105, "loss": 0.5286, "lr": 2.9759313788247347e-06, "epoch": 0.2976762051085078, "percentage": 5.95, "elapsed_time": "0:12:27", "remaining_time": "3:16:52", "throughput": 19587.35, "total_tokens": 14647040}
|
|
{"current_steps": 4655, "total_steps": 78105, "loss": 0.4687, "lr": 2.979131993342722e-06, "epoch": 0.29799628704948466, "percentage": 5.96, "elapsed_time": "0:12:28", "remaining_time": "3:16:49", "throughput": 19589.99, "total_tokens": 14661888}
|
|
{"current_steps": 4660, "total_steps": 78105, "loss": 0.8269, "lr": 2.9823326078607096e-06, "epoch": 0.29831636899046154, "percentage": 5.97, "elapsed_time": "0:12:29", "remaining_time": "3:16:46", "throughput": 19593.21, "total_tokens": 14677696}
|
|
{"current_steps": 4665, "total_steps": 78105, "loss": 0.4892, "lr": 2.985533222378697e-06, "epoch": 0.2986364509314384, "percentage": 5.97, "elapsed_time": "0:12:29", "remaining_time": "3:16:43", "throughput": 19596.5, "total_tokens": 14693184}
|
|
{"current_steps": 4670, "total_steps": 78105, "loss": 0.3668, "lr": 2.9887338368966845e-06, "epoch": 0.29895653287241536, "percentage": 5.98, "elapsed_time": "0:12:30", "remaining_time": "3:16:40", "throughput": 19599.33, "total_tokens": 14707904}
|
|
{"current_steps": 4675, "total_steps": 78105, "loss": 0.4194, "lr": 2.991934451414672e-06, "epoch": 0.29927661481339224, "percentage": 5.99, "elapsed_time": "0:12:31", "remaining_time": "3:16:37", "throughput": 19602.92, "total_tokens": 14724096}
|
|
{"current_steps": 4680, "total_steps": 78105, "loss": 0.575, "lr": 2.9951350659326595e-06, "epoch": 0.2995966967543691, "percentage": 5.99, "elapsed_time": "0:12:31", "remaining_time": "3:16:34", "throughput": 19605.53, "total_tokens": 14738752}
|
|
{"current_steps": 4685, "total_steps": 78105, "loss": 0.3559, "lr": 2.998335680450647e-06, "epoch": 0.299916778695346, "percentage": 6.0, "elapsed_time": "0:12:32", "remaining_time": "3:16:31", "throughput": 19608.39, "total_tokens": 14753664}
|
|
{"current_steps": 4690, "total_steps": 78105, "loss": 0.4628, "lr": 3.0015362949686344e-06, "epoch": 0.3002368606363229, "percentage": 6.0, "elapsed_time": "0:12:33", "remaining_time": "3:16:28", "throughput": 19611.68, "total_tokens": 14769152}
|
|
{"current_steps": 4695, "total_steps": 78105, "loss": 0.5057, "lr": 3.004736909486622e-06, "epoch": 0.3005569425772998, "percentage": 6.01, "elapsed_time": "0:12:33", "remaining_time": "3:16:25", "throughput": 19614.48, "total_tokens": 14784128}
|
|
{"current_steps": 4700, "total_steps": 78105, "loss": 0.5411, "lr": 3.0079375240046093e-06, "epoch": 0.3008770245182767, "percentage": 6.02, "elapsed_time": "0:12:34", "remaining_time": "3:16:22", "throughput": 19617.94, "total_tokens": 14799936}
|
|
{"current_steps": 4705, "total_steps": 78105, "loss": 0.641, "lr": 3.011138138522597e-06, "epoch": 0.30119710645925357, "percentage": 6.02, "elapsed_time": "0:12:35", "remaining_time": "3:16:19", "throughput": 19620.31, "total_tokens": 14814208}
|
|
{"current_steps": 4710, "total_steps": 78105, "loss": 0.5401, "lr": 3.014338753040584e-06, "epoch": 0.30151718840023045, "percentage": 6.03, "elapsed_time": "0:12:35", "remaining_time": "3:16:16", "throughput": 19623.24, "total_tokens": 14829504}
|
|
{"current_steps": 4715, "total_steps": 78105, "loss": 0.4631, "lr": 3.0175393675585713e-06, "epoch": 0.30183727034120733, "percentage": 6.04, "elapsed_time": "0:12:36", "remaining_time": "3:16:12", "throughput": 19625.88, "total_tokens": 14843968}
|
|
{"current_steps": 4720, "total_steps": 78105, "loss": 0.4741, "lr": 3.0207399820765588e-06, "epoch": 0.30215735228218427, "percentage": 6.04, "elapsed_time": "0:12:37", "remaining_time": "3:16:09", "throughput": 19629.42, "total_tokens": 14859840}
|
|
{"current_steps": 4725, "total_steps": 78105, "loss": 0.5715, "lr": 3.0239405965945462e-06, "epoch": 0.30247743422316115, "percentage": 6.05, "elapsed_time": "0:12:37", "remaining_time": "3:16:07", "throughput": 19632.33, "total_tokens": 14875648}
|
|
{"current_steps": 4730, "total_steps": 78105, "loss": 0.5296, "lr": 3.0271412111125337e-06, "epoch": 0.302797516164138, "percentage": 6.06, "elapsed_time": "0:12:38", "remaining_time": "3:16:04", "throughput": 19634.85, "total_tokens": 14890560}
|
|
{"current_steps": 4735, "total_steps": 78105, "loss": 0.5479, "lr": 3.030341825630521e-06, "epoch": 0.3031175981051149, "percentage": 6.06, "elapsed_time": "0:12:39", "remaining_time": "3:16:01", "throughput": 19638.27, "total_tokens": 14906688}
|
|
{"current_steps": 4740, "total_steps": 78105, "loss": 0.625, "lr": 3.0335424401485086e-06, "epoch": 0.3034376800460918, "percentage": 6.07, "elapsed_time": "0:12:39", "remaining_time": "3:15:59", "throughput": 19641.46, "total_tokens": 14922496}
|
|
{"current_steps": 4745, "total_steps": 78105, "loss": 0.5292, "lr": 3.036743054666496e-06, "epoch": 0.30375776198706866, "percentage": 6.08, "elapsed_time": "0:12:40", "remaining_time": "3:15:56", "throughput": 19644.86, "total_tokens": 14938880}
|
|
{"current_steps": 4750, "total_steps": 78105, "loss": 0.5366, "lr": 3.0399436691844836e-06, "epoch": 0.3040778439280456, "percentage": 6.08, "elapsed_time": "0:12:41", "remaining_time": "3:15:53", "throughput": 19647.68, "total_tokens": 14954112}
|
|
{"current_steps": 4755, "total_steps": 78105, "loss": 0.4153, "lr": 3.043144283702471e-06, "epoch": 0.3043979258690225, "percentage": 6.09, "elapsed_time": "0:12:41", "remaining_time": "3:15:51", "throughput": 19650.76, "total_tokens": 14969536}
|
|
{"current_steps": 4760, "total_steps": 78105, "loss": 0.3946, "lr": 3.0463448982204585e-06, "epoch": 0.30471800780999936, "percentage": 6.09, "elapsed_time": "0:12:42", "remaining_time": "3:15:48", "throughput": 19654.21, "total_tokens": 14985728}
|
|
{"current_steps": 4765, "total_steps": 78105, "loss": 0.5285, "lr": 3.049545512738446e-06, "epoch": 0.30503808975097624, "percentage": 6.1, "elapsed_time": "0:12:43", "remaining_time": "3:15:46", "throughput": 19658.77, "total_tokens": 15003904}
|
|
{"current_steps": 4770, "total_steps": 78105, "loss": 0.6277, "lr": 3.0527461272564334e-06, "epoch": 0.3053581716919531, "percentage": 6.11, "elapsed_time": "0:12:43", "remaining_time": "3:15:44", "throughput": 19661.78, "total_tokens": 15019328}
|
|
{"current_steps": 4775, "total_steps": 78105, "loss": 0.5619, "lr": 3.055946741774421e-06, "epoch": 0.30567825363293005, "percentage": 6.11, "elapsed_time": "0:12:44", "remaining_time": "3:15:40", "throughput": 19664.22, "total_tokens": 15033856}
|
|
{"current_steps": 4780, "total_steps": 78105, "loss": 0.4167, "lr": 3.0591473562924084e-06, "epoch": 0.30599833557390693, "percentage": 6.12, "elapsed_time": "0:12:45", "remaining_time": "3:15:39", "throughput": 19668.17, "total_tokens": 15051200}
|
|
{"current_steps": 4785, "total_steps": 78105, "loss": 0.4579, "lr": 3.062347970810396e-06, "epoch": 0.3063184175148838, "percentage": 6.13, "elapsed_time": "0:12:45", "remaining_time": "3:15:36", "throughput": 19671.04, "total_tokens": 15066368}
|
|
{"current_steps": 4790, "total_steps": 78105, "loss": 0.3633, "lr": 3.0655485853283833e-06, "epoch": 0.3066384994558607, "percentage": 6.13, "elapsed_time": "0:12:46", "remaining_time": "3:15:33", "throughput": 19673.83, "total_tokens": 15081600}
|
|
{"current_steps": 4795, "total_steps": 78105, "loss": 0.5975, "lr": 3.0687491998463708e-06, "epoch": 0.30695858139683757, "percentage": 6.14, "elapsed_time": "0:12:47", "remaining_time": "3:15:30", "throughput": 19677.06, "total_tokens": 15097536}
|
|
{"current_steps": 4800, "total_steps": 78105, "loss": 0.5337, "lr": 3.0719498143643583e-06, "epoch": 0.3072786633378145, "percentage": 6.15, "elapsed_time": "0:12:47", "remaining_time": "3:15:28", "throughput": 19680.49, "total_tokens": 15113728}
|
|
{"current_steps": 4805, "total_steps": 78105, "loss": 0.6239, "lr": 3.0751504288823457e-06, "epoch": 0.3075987452787914, "percentage": 6.15, "elapsed_time": "0:12:48", "remaining_time": "3:15:25", "throughput": 19683.69, "total_tokens": 15130048}
|
|
{"current_steps": 4810, "total_steps": 78105, "loss": 0.4298, "lr": 3.078351043400333e-06, "epoch": 0.30791882721976827, "percentage": 6.16, "elapsed_time": "0:12:49", "remaining_time": "3:15:23", "throughput": 19686.74, "total_tokens": 15145600}
|
|
{"current_steps": 4815, "total_steps": 78105, "loss": 0.5121, "lr": 3.0815516579183207e-06, "epoch": 0.30823890916074514, "percentage": 6.16, "elapsed_time": "0:12:50", "remaining_time": "3:15:20", "throughput": 19690.43, "total_tokens": 15162368}
|
|
{"current_steps": 4820, "total_steps": 78105, "loss": 0.497, "lr": 3.084752272436308e-06, "epoch": 0.308558991101722, "percentage": 6.17, "elapsed_time": "0:12:50", "remaining_time": "3:15:18", "throughput": 19693.73, "total_tokens": 15178368}
|
|
{"current_steps": 4825, "total_steps": 78105, "loss": 0.5909, "lr": 3.0879528869542956e-06, "epoch": 0.3088790730426989, "percentage": 6.18, "elapsed_time": "0:12:51", "remaining_time": "3:15:15", "throughput": 19696.61, "total_tokens": 15193408}
|
|
{"current_steps": 4830, "total_steps": 78105, "loss": 0.3568, "lr": 3.0911535014722826e-06, "epoch": 0.30919915498367584, "percentage": 6.18, "elapsed_time": "0:12:52", "remaining_time": "3:15:12", "throughput": 19699.48, "total_tokens": 15208640}
|
|
{"current_steps": 4835, "total_steps": 78105, "loss": 0.532, "lr": 3.09435411599027e-06, "epoch": 0.3095192369246527, "percentage": 6.19, "elapsed_time": "0:12:52", "remaining_time": "3:15:09", "throughput": 19702.68, "total_tokens": 15224192}
|
|
{"current_steps": 4840, "total_steps": 78105, "loss": 0.591, "lr": 3.0975547305082576e-06, "epoch": 0.3098393188656296, "percentage": 6.2, "elapsed_time": "0:12:53", "remaining_time": "3:15:06", "throughput": 19705.59, "total_tokens": 15239616}
|
|
{"current_steps": 4845, "total_steps": 78105, "loss": 0.4536, "lr": 3.100755345026245e-06, "epoch": 0.3101594008066065, "percentage": 6.2, "elapsed_time": "0:12:54", "remaining_time": "3:15:04", "throughput": 19708.86, "total_tokens": 15255680}
|
|
{"current_steps": 4850, "total_steps": 78105, "loss": 0.5104, "lr": 3.1039559595442325e-06, "epoch": 0.31047948274758336, "percentage": 6.21, "elapsed_time": "0:12:54", "remaining_time": "3:15:01", "throughput": 19711.86, "total_tokens": 15271232}
|
|
{"current_steps": 4855, "total_steps": 78105, "loss": 0.4745, "lr": 3.10715657406222e-06, "epoch": 0.3107995646885603, "percentage": 6.22, "elapsed_time": "0:12:55", "remaining_time": "3:14:58", "throughput": 19714.9, "total_tokens": 15287040}
|
|
{"current_steps": 4860, "total_steps": 78105, "loss": 0.5914, "lr": 3.1103571885802074e-06, "epoch": 0.31111964662953717, "percentage": 6.22, "elapsed_time": "0:12:56", "remaining_time": "3:14:56", "throughput": 19718.83, "total_tokens": 15304256}
|
|
{"current_steps": 4865, "total_steps": 78105, "loss": 0.3885, "lr": 3.113557803098195e-06, "epoch": 0.31143972857051405, "percentage": 6.23, "elapsed_time": "0:12:56", "remaining_time": "3:14:55", "throughput": 19723.02, "total_tokens": 15322112}
|
|
{"current_steps": 4870, "total_steps": 78105, "loss": 0.5413, "lr": 3.1167584176161824e-06, "epoch": 0.31175981051149093, "percentage": 6.24, "elapsed_time": "0:12:57", "remaining_time": "3:14:52", "throughput": 19725.83, "total_tokens": 15337216}
|
|
{"current_steps": 4875, "total_steps": 78105, "loss": 0.5022, "lr": 3.11995903213417e-06, "epoch": 0.3120798924524678, "percentage": 6.24, "elapsed_time": "0:12:58", "remaining_time": "3:14:50", "throughput": 19729.42, "total_tokens": 15354048}
|
|
{"current_steps": 4880, "total_steps": 78105, "loss": 0.4865, "lr": 3.1231596466521573e-06, "epoch": 0.31239997439344475, "percentage": 6.25, "elapsed_time": "0:12:58", "remaining_time": "3:14:47", "throughput": 19732.12, "total_tokens": 15369344}
|
|
{"current_steps": 4885, "total_steps": 78105, "loss": 0.5673, "lr": 3.1263602611701448e-06, "epoch": 0.3127200563344216, "percentage": 6.25, "elapsed_time": "0:12:59", "remaining_time": "3:14:45", "throughput": 19735.71, "total_tokens": 15385920}
|
|
{"current_steps": 4890, "total_steps": 78105, "loss": 0.4472, "lr": 3.1295608756881322e-06, "epoch": 0.3130401382753985, "percentage": 6.26, "elapsed_time": "0:13:00", "remaining_time": "3:14:42", "throughput": 19738.66, "total_tokens": 15401536}
|
|
{"current_steps": 4895, "total_steps": 78105, "loss": 0.5766, "lr": 3.1327614902061197e-06, "epoch": 0.3133602202163754, "percentage": 6.27, "elapsed_time": "0:13:00", "remaining_time": "3:14:39", "throughput": 19741.56, "total_tokens": 15417152}
|
|
{"current_steps": 4900, "total_steps": 78105, "loss": 0.6036, "lr": 3.135962104724107e-06, "epoch": 0.31368030215735226, "percentage": 6.27, "elapsed_time": "0:13:01", "remaining_time": "3:14:36", "throughput": 19744.08, "total_tokens": 15431872}
|
|
{"current_steps": 4905, "total_steps": 78105, "loss": 0.4521, "lr": 3.1391627192420946e-06, "epoch": 0.3140003840983292, "percentage": 6.28, "elapsed_time": "0:13:02", "remaining_time": "3:14:34", "throughput": 19747.07, "total_tokens": 15447552}
|
|
{"current_steps": 4910, "total_steps": 78105, "loss": 0.5343, "lr": 3.142363333760082e-06, "epoch": 0.3143204660393061, "percentage": 6.29, "elapsed_time": "0:13:02", "remaining_time": "3:14:32", "throughput": 19750.49, "total_tokens": 15464384}
|
|
{"current_steps": 4915, "total_steps": 78105, "loss": 0.5376, "lr": 3.1455639482780696e-06, "epoch": 0.31464054798028296, "percentage": 6.29, "elapsed_time": "0:13:03", "remaining_time": "3:14:29", "throughput": 19752.99, "total_tokens": 15479808}
|
|
{"current_steps": 4920, "total_steps": 78105, "loss": 0.5259, "lr": 3.148764562796057e-06, "epoch": 0.31496062992125984, "percentage": 6.3, "elapsed_time": "0:13:04", "remaining_time": "3:14:27", "throughput": 19756.32, "total_tokens": 15496512}
|
|
{"current_steps": 4925, "total_steps": 78105, "loss": 0.4217, "lr": 3.151965177314045e-06, "epoch": 0.3152807118622367, "percentage": 6.31, "elapsed_time": "0:13:05", "remaining_time": "3:14:25", "throughput": 19759.69, "total_tokens": 15512896}
|
|
{"current_steps": 4930, "total_steps": 78105, "loss": 0.3742, "lr": 3.1551657918320324e-06, "epoch": 0.3156007938032136, "percentage": 6.31, "elapsed_time": "0:13:05", "remaining_time": "3:14:22", "throughput": 19762.41, "total_tokens": 15528064}
|
|
{"current_steps": 4935, "total_steps": 78105, "loss": 0.4034, "lr": 3.15836640635002e-06, "epoch": 0.31592087574419053, "percentage": 6.32, "elapsed_time": "0:13:06", "remaining_time": "3:14:19", "throughput": 19765.21, "total_tokens": 15543424}
|
|
{"current_steps": 4940, "total_steps": 78105, "loss": 0.6094, "lr": 3.1615670208680073e-06, "epoch": 0.3162409576851674, "percentage": 6.32, "elapsed_time": "0:13:07", "remaining_time": "3:14:17", "throughput": 19768.55, "total_tokens": 15560192}
|
|
{"current_steps": 4945, "total_steps": 78105, "loss": 0.5051, "lr": 3.164767635385995e-06, "epoch": 0.3165610396261443, "percentage": 6.33, "elapsed_time": "0:13:07", "remaining_time": "3:14:15", "throughput": 19771.46, "total_tokens": 15576128}
|
|
{"current_steps": 4950, "total_steps": 78105, "loss": 0.7277, "lr": 3.1679682499039814e-06, "epoch": 0.31688112156712117, "percentage": 6.34, "elapsed_time": "0:13:08", "remaining_time": "3:14:13", "throughput": 19774.78, "total_tokens": 15592576}
|
|
{"current_steps": 4955, "total_steps": 78105, "loss": 0.4515, "lr": 3.171168864421969e-06, "epoch": 0.31720120350809805, "percentage": 6.34, "elapsed_time": "0:13:09", "remaining_time": "3:14:10", "throughput": 19777.34, "total_tokens": 15607744}
|
|
{"current_steps": 4960, "total_steps": 78105, "loss": 0.6054, "lr": 3.1743694789399564e-06, "epoch": 0.317521285449075, "percentage": 6.35, "elapsed_time": "0:13:09", "remaining_time": "3:14:07", "throughput": 19780.23, "total_tokens": 15623360}
|
|
{"current_steps": 4965, "total_steps": 78105, "loss": 0.4599, "lr": 3.177570093457944e-06, "epoch": 0.31784136739005187, "percentage": 6.36, "elapsed_time": "0:13:10", "remaining_time": "3:14:05", "throughput": 19783.11, "total_tokens": 15638656}
|
|
{"current_steps": 4970, "total_steps": 78105, "loss": 0.5218, "lr": 3.1807707079759313e-06, "epoch": 0.31816144933102875, "percentage": 6.36, "elapsed_time": "0:13:11", "remaining_time": "3:14:02", "throughput": 19786.06, "total_tokens": 15654400}
|
|
{"current_steps": 4975, "total_steps": 78105, "loss": 0.4533, "lr": 3.1839713224939188e-06, "epoch": 0.3184815312720056, "percentage": 6.37, "elapsed_time": "0:13:11", "remaining_time": "3:13:59", "throughput": 19788.87, "total_tokens": 15670016}
|
|
{"current_steps": 4980, "total_steps": 78105, "loss": 0.5557, "lr": 3.1871719370119062e-06, "epoch": 0.3188016132129825, "percentage": 6.38, "elapsed_time": "0:13:12", "remaining_time": "3:13:57", "throughput": 19792.17, "total_tokens": 15686208}
|
|
{"current_steps": 4985, "total_steps": 78105, "loss": 0.4331, "lr": 3.1903725515298937e-06, "epoch": 0.31912169515395944, "percentage": 6.38, "elapsed_time": "0:13:13", "remaining_time": "3:13:55", "throughput": 19795.28, "total_tokens": 15702592}
|
|
{"current_steps": 4990, "total_steps": 78105, "loss": 0.5049, "lr": 3.193573166047881e-06, "epoch": 0.3194417770949363, "percentage": 6.39, "elapsed_time": "0:13:13", "remaining_time": "3:13:52", "throughput": 19797.96, "total_tokens": 15717696}
|
|
{"current_steps": 4995, "total_steps": 78105, "loss": 0.4435, "lr": 3.196773780565869e-06, "epoch": 0.3197618590359132, "percentage": 6.4, "elapsed_time": "0:13:14", "remaining_time": "3:13:49", "throughput": 19800.76, "total_tokens": 15733184}
|
|
{"current_steps": 5000, "total_steps": 78105, "loss": 0.5851, "lr": 3.1999743950838565e-06, "epoch": 0.3200819409768901, "percentage": 6.4, "elapsed_time": "0:13:15", "remaining_time": "3:13:47", "throughput": 19804.0, "total_tokens": 15749888}
|
|
{"current_steps": 5005, "total_steps": 78105, "loss": 0.4457, "lr": 3.203175009601844e-06, "epoch": 0.32040202291786696, "percentage": 6.41, "elapsed_time": "0:13:16", "remaining_time": "3:13:46", "throughput": 19808.21, "total_tokens": 15768384}
|
|
{"current_steps": 5010, "total_steps": 78105, "loss": 0.3612, "lr": 3.2063756241198314e-06, "epoch": 0.32072210485884384, "percentage": 6.41, "elapsed_time": "0:13:16", "remaining_time": "3:13:44", "throughput": 19811.23, "total_tokens": 15784448}
|
|
{"current_steps": 5015, "total_steps": 78105, "loss": 0.4858, "lr": 3.209576238637819e-06, "epoch": 0.3210421867998208, "percentage": 6.42, "elapsed_time": "0:13:17", "remaining_time": "3:13:42", "throughput": 19814.2, "total_tokens": 15800512}
|
|
{"current_steps": 5020, "total_steps": 78105, "loss": 0.5056, "lr": 3.2127768531558064e-06, "epoch": 0.32136226874079765, "percentage": 6.43, "elapsed_time": "0:13:18", "remaining_time": "3:13:39", "throughput": 19816.7, "total_tokens": 15815680}
|
|
{"current_steps": 5025, "total_steps": 78105, "loss": 0.5082, "lr": 3.215977467673794e-06, "epoch": 0.32168235068177453, "percentage": 6.43, "elapsed_time": "0:13:18", "remaining_time": "3:13:36", "throughput": 19819.08, "total_tokens": 15830528}
|
|
{"current_steps": 5030, "total_steps": 78105, "loss": 0.5727, "lr": 3.2191780821917813e-06, "epoch": 0.3220024326227514, "percentage": 6.44, "elapsed_time": "0:13:19", "remaining_time": "3:13:33", "throughput": 19821.51, "total_tokens": 15845568}
|
|
{"current_steps": 5035, "total_steps": 78105, "loss": 0.5703, "lr": 3.2223786967097688e-06, "epoch": 0.3223225145637283, "percentage": 6.45, "elapsed_time": "0:13:20", "remaining_time": "3:13:31", "throughput": 19824.66, "total_tokens": 15861632}
|
|
{"current_steps": 5040, "total_steps": 78105, "loss": 0.6083, "lr": 3.2255793112277563e-06, "epoch": 0.3226425965047052, "percentage": 6.45, "elapsed_time": "0:13:20", "remaining_time": "3:13:28", "throughput": 19827.62, "total_tokens": 15877632}
|
|
{"current_steps": 5045, "total_steps": 78105, "loss": 0.4118, "lr": 3.2287799257457437e-06, "epoch": 0.3229626784456821, "percentage": 6.46, "elapsed_time": "0:13:21", "remaining_time": "3:13:26", "throughput": 19830.47, "total_tokens": 15893184}
|
|
{"current_steps": 5050, "total_steps": 78105, "loss": 0.4441, "lr": 3.231980540263731e-06, "epoch": 0.323282760386659, "percentage": 6.47, "elapsed_time": "0:13:22", "remaining_time": "3:13:23", "throughput": 19833.03, "total_tokens": 15908416}
|
|
{"current_steps": 5055, "total_steps": 78105, "loss": 0.4126, "lr": 3.2351811547817187e-06, "epoch": 0.32360284232763586, "percentage": 6.47, "elapsed_time": "0:13:22", "remaining_time": "3:13:22", "throughput": 19833.95, "total_tokens": 15924224}
|
|
{"current_steps": 5060, "total_steps": 78105, "loss": 0.341, "lr": 3.238381769299706e-06, "epoch": 0.32392292426861274, "percentage": 6.48, "elapsed_time": "0:13:23", "remaining_time": "3:13:19", "throughput": 19836.83, "total_tokens": 15940032}
|
|
{"current_steps": 5065, "total_steps": 78105, "loss": 0.5604, "lr": 3.2415823838176936e-06, "epoch": 0.3242430062095897, "percentage": 6.48, "elapsed_time": "0:13:24", "remaining_time": "3:13:17", "throughput": 19839.25, "total_tokens": 15954944}
|
|
{"current_steps": 5070, "total_steps": 78105, "loss": 0.5718, "lr": 3.2447829983356806e-06, "epoch": 0.32456308815056656, "percentage": 6.49, "elapsed_time": "0:13:24", "remaining_time": "3:13:14", "throughput": 19842.21, "total_tokens": 15970624}
|
|
{"current_steps": 5075, "total_steps": 78105, "loss": 0.5341, "lr": 3.247983612853668e-06, "epoch": 0.32488317009154344, "percentage": 6.5, "elapsed_time": "0:13:25", "remaining_time": "3:13:12", "throughput": 19845.2, "total_tokens": 15986688}
|
|
{"current_steps": 5080, "total_steps": 78105, "loss": 0.4263, "lr": 3.2511842273716556e-06, "epoch": 0.3252032520325203, "percentage": 6.5, "elapsed_time": "0:13:26", "remaining_time": "3:13:09", "throughput": 19848.17, "total_tokens": 16002752}
|
|
{"current_steps": 5085, "total_steps": 78105, "loss": 0.535, "lr": 3.254384841889643e-06, "epoch": 0.3255233339734972, "percentage": 6.51, "elapsed_time": "0:13:26", "remaining_time": "3:13:07", "throughput": 19850.96, "total_tokens": 16018112}
|
|
{"current_steps": 5090, "total_steps": 78105, "loss": 0.481, "lr": 3.2575854564076305e-06, "epoch": 0.32584341591447413, "percentage": 6.52, "elapsed_time": "0:13:27", "remaining_time": "3:13:04", "throughput": 19853.84, "total_tokens": 16033984}
|
|
{"current_steps": 5095, "total_steps": 78105, "loss": 0.3992, "lr": 3.260786070925618e-06, "epoch": 0.326163497855451, "percentage": 6.52, "elapsed_time": "0:13:28", "remaining_time": "3:13:02", "throughput": 19856.52, "total_tokens": 16049536}
|
|
{"current_steps": 5100, "total_steps": 78105, "loss": 0.4262, "lr": 3.2639866854436054e-06, "epoch": 0.3264835797964279, "percentage": 6.53, "elapsed_time": "0:13:28", "remaining_time": "3:13:00", "throughput": 19859.92, "total_tokens": 16066304}
|
|
{"current_steps": 5105, "total_steps": 78105, "loss": 0.5625, "lr": 3.267187299961593e-06, "epoch": 0.32680366173740477, "percentage": 6.54, "elapsed_time": "0:13:29", "remaining_time": "3:12:57", "throughput": 19862.4, "total_tokens": 16081536}
|
|
{"current_steps": 5110, "total_steps": 78105, "loss": 0.708, "lr": 3.2703879144795804e-06, "epoch": 0.32712374367838165, "percentage": 6.54, "elapsed_time": "0:13:30", "remaining_time": "3:12:55", "throughput": 19865.09, "total_tokens": 16096896}
|
|
{"current_steps": 5115, "total_steps": 78105, "loss": 0.5131, "lr": 3.273588528997568e-06, "epoch": 0.32744382561935853, "percentage": 6.55, "elapsed_time": "0:13:30", "remaining_time": "3:12:52", "throughput": 19868.15, "total_tokens": 16112960}
|
|
{"current_steps": 5120, "total_steps": 78105, "loss": 0.467, "lr": 3.2767891435155553e-06, "epoch": 0.32776390756033547, "percentage": 6.56, "elapsed_time": "0:13:31", "remaining_time": "3:12:50", "throughput": 19871.39, "total_tokens": 16129344}
|
|
{"current_steps": 5125, "total_steps": 78105, "loss": 0.6375, "lr": 3.2799897580335428e-06, "epoch": 0.32808398950131235, "percentage": 6.56, "elapsed_time": "0:13:32", "remaining_time": "3:12:49", "throughput": 19875.61, "total_tokens": 16147776}
|
|
{"current_steps": 5130, "total_steps": 78105, "loss": 0.4597, "lr": 3.2831903725515302e-06, "epoch": 0.3284040714422892, "percentage": 6.57, "elapsed_time": "0:13:33", "remaining_time": "3:12:46", "throughput": 19877.97, "total_tokens": 16162880}
|
|
{"current_steps": 5135, "total_steps": 78105, "loss": 0.2809, "lr": 3.2863909870695177e-06, "epoch": 0.3287241533832661, "percentage": 6.57, "elapsed_time": "0:13:33", "remaining_time": "3:12:44", "throughput": 19881.21, "total_tokens": 16179392}
|
|
{"current_steps": 5140, "total_steps": 78105, "loss": 0.6244, "lr": 3.289591601587505e-06, "epoch": 0.329044235324243, "percentage": 6.58, "elapsed_time": "0:13:34", "remaining_time": "3:12:41", "throughput": 19883.63, "total_tokens": 16194560}
|
|
{"current_steps": 5145, "total_steps": 78105, "loss": 0.5175, "lr": 3.2927922161054926e-06, "epoch": 0.3293643172652199, "percentage": 6.59, "elapsed_time": "0:13:35", "remaining_time": "3:12:39", "throughput": 19886.42, "total_tokens": 16210112}
|
|
{"current_steps": 5150, "total_steps": 78105, "loss": 0.5084, "lr": 3.29599283062348e-06, "epoch": 0.3296843992061968, "percentage": 6.59, "elapsed_time": "0:13:35", "remaining_time": "3:12:36", "throughput": 19889.29, "total_tokens": 16225856}
|
|
{"current_steps": 5155, "total_steps": 78105, "loss": 0.5103, "lr": 3.2991934451414676e-06, "epoch": 0.3300044811471737, "percentage": 6.6, "elapsed_time": "0:13:36", "remaining_time": "3:12:34", "throughput": 19891.84, "total_tokens": 16241536}
|
|
{"current_steps": 5160, "total_steps": 78105, "loss": 0.4566, "lr": 3.302394059659455e-06, "epoch": 0.33032456308815056, "percentage": 6.61, "elapsed_time": "0:13:37", "remaining_time": "3:12:31", "throughput": 19894.15, "total_tokens": 16256384}
|
|
{"current_steps": 5165, "total_steps": 78105, "loss": 0.4815, "lr": 3.3055946741774425e-06, "epoch": 0.33064464502912744, "percentage": 6.61, "elapsed_time": "0:13:37", "remaining_time": "3:12:28", "throughput": 19896.58, "total_tokens": 16271360}
|
|
{"current_steps": 5170, "total_steps": 78105, "loss": 0.4515, "lr": 3.30879528869543e-06, "epoch": 0.3309647269701044, "percentage": 6.62, "elapsed_time": "0:13:38", "remaining_time": "3:12:26", "throughput": 19899.07, "total_tokens": 16286656}
|
|
{"current_steps": 5175, "total_steps": 78105, "loss": 0.4787, "lr": 3.3119959032134174e-06, "epoch": 0.33128480891108125, "percentage": 6.63, "elapsed_time": "0:13:39", "remaining_time": "3:12:24", "throughput": 19901.89, "total_tokens": 16302592}
|
|
{"current_steps": 5180, "total_steps": 78105, "loss": 0.6982, "lr": 3.315196517731405e-06, "epoch": 0.33160489085205813, "percentage": 6.63, "elapsed_time": "0:13:39", "remaining_time": "3:12:21", "throughput": 19904.7, "total_tokens": 16318656}
|
|
{"current_steps": 5185, "total_steps": 78105, "loss": 0.5162, "lr": 3.3183971322493924e-06, "epoch": 0.331924972793035, "percentage": 6.64, "elapsed_time": "0:13:40", "remaining_time": "3:12:19", "throughput": 19907.43, "total_tokens": 16334016}
|
|
{"current_steps": 5190, "total_steps": 78105, "loss": 0.5793, "lr": 3.3215977467673794e-06, "epoch": 0.3322450547340119, "percentage": 6.64, "elapsed_time": "0:13:41", "remaining_time": "3:12:16", "throughput": 19910.33, "total_tokens": 16349888}
|
|
{"current_steps": 5195, "total_steps": 78105, "loss": 0.5796, "lr": 3.324798361285367e-06, "epoch": 0.33256513667498877, "percentage": 6.65, "elapsed_time": "0:13:41", "remaining_time": "3:12:14", "throughput": 19912.81, "total_tokens": 16365056}
|
|
{"current_steps": 5200, "total_steps": 78105, "loss": 0.4358, "lr": 3.3279989758033544e-06, "epoch": 0.3328852186159657, "percentage": 6.66, "elapsed_time": "0:13:42", "remaining_time": "3:12:11", "throughput": 19915.09, "total_tokens": 16380032}
|
|
{"current_steps": 5205, "total_steps": 78105, "loss": 0.6268, "lr": 3.331199590321342e-06, "epoch": 0.3332053005569426, "percentage": 6.66, "elapsed_time": "0:13:43", "remaining_time": "3:12:08", "throughput": 19917.28, "total_tokens": 16394752}
|
|
{"current_steps": 5210, "total_steps": 78105, "loss": 0.5224, "lr": 3.3344002048393293e-06, "epoch": 0.33352538249791946, "percentage": 6.67, "elapsed_time": "0:13:43", "remaining_time": "3:12:06", "throughput": 19919.88, "total_tokens": 16410368}
|
|
{"current_steps": 5215, "total_steps": 78105, "loss": 0.4788, "lr": 3.3376008193573168e-06, "epoch": 0.33384546443889634, "percentage": 6.68, "elapsed_time": "0:13:44", "remaining_time": "3:12:03", "throughput": 19921.99, "total_tokens": 16425088}
|
|
{"current_steps": 5220, "total_steps": 78105, "loss": 0.4521, "lr": 3.3408014338753042e-06, "epoch": 0.3341655463798732, "percentage": 6.68, "elapsed_time": "0:13:45", "remaining_time": "3:12:01", "throughput": 19924.95, "total_tokens": 16441152}
|
|
{"current_steps": 5225, "total_steps": 78105, "loss": 0.5389, "lr": 3.3440020483932917e-06, "epoch": 0.33448562832085016, "percentage": 6.69, "elapsed_time": "0:13:45", "remaining_time": "3:11:59", "throughput": 19928.35, "total_tokens": 16458240}
|
|
{"current_steps": 5230, "total_steps": 78105, "loss": 0.575, "lr": 3.347202662911279e-06, "epoch": 0.33480571026182704, "percentage": 6.7, "elapsed_time": "0:13:46", "remaining_time": "3:11:57", "throughput": 19931.19, "total_tokens": 16474112}
|
|
{"current_steps": 5235, "total_steps": 78105, "loss": 0.4176, "lr": 3.3504032774292666e-06, "epoch": 0.3351257922028039, "percentage": 6.7, "elapsed_time": "0:13:47", "remaining_time": "3:11:55", "throughput": 19934.33, "total_tokens": 16490560}
|
|
{"current_steps": 5240, "total_steps": 78105, "loss": 0.7561, "lr": 3.353603891947254e-06, "epoch": 0.3354458741437808, "percentage": 6.71, "elapsed_time": "0:13:47", "remaining_time": "3:11:52", "throughput": 19936.69, "total_tokens": 16505728}
|
|
{"current_steps": 5245, "total_steps": 78105, "loss": 0.491, "lr": 3.3568045064652416e-06, "epoch": 0.3357659560847577, "percentage": 6.72, "elapsed_time": "0:13:48", "remaining_time": "3:11:50", "throughput": 19939.65, "total_tokens": 16521856}
|
|
{"current_steps": 5250, "total_steps": 78105, "loss": 0.4595, "lr": 3.360005120983229e-06, "epoch": 0.3360860380257346, "percentage": 6.72, "elapsed_time": "0:13:49", "remaining_time": "3:11:47", "throughput": 19942.53, "total_tokens": 16537792}
|
|
{"current_steps": 5255, "total_steps": 78105, "loss": 0.4857, "lr": 3.3632057355012165e-06, "epoch": 0.3364061199667115, "percentage": 6.73, "elapsed_time": "0:13:49", "remaining_time": "3:11:45", "throughput": 19945.14, "total_tokens": 16553408}
|
|
{"current_steps": 5260, "total_steps": 78105, "loss": 0.4904, "lr": 3.366406350019204e-06, "epoch": 0.33672620190768837, "percentage": 6.73, "elapsed_time": "0:13:50", "remaining_time": "3:11:43", "throughput": 19948.25, "total_tokens": 16570112}
|
|
{"current_steps": 5265, "total_steps": 78105, "loss": 0.5798, "lr": 3.3696069645371914e-06, "epoch": 0.33704628384866525, "percentage": 6.74, "elapsed_time": "0:13:51", "remaining_time": "3:11:41", "throughput": 19951.03, "total_tokens": 16586304}
|
|
{"current_steps": 5270, "total_steps": 78105, "loss": 0.5247, "lr": 3.372807579055179e-06, "epoch": 0.33736636578964213, "percentage": 6.75, "elapsed_time": "0:13:52", "remaining_time": "3:11:39", "throughput": 19953.31, "total_tokens": 16601920}
|
|
{"current_steps": 5275, "total_steps": 78105, "loss": 0.5074, "lr": 3.3760081935731664e-06, "epoch": 0.33768644773061907, "percentage": 6.75, "elapsed_time": "0:13:52", "remaining_time": "3:11:37", "throughput": 19956.33, "total_tokens": 16618240}
|
|
{"current_steps": 5280, "total_steps": 78105, "loss": 0.6338, "lr": 3.379208808091154e-06, "epoch": 0.33800652967159595, "percentage": 6.76, "elapsed_time": "0:13:53", "remaining_time": "3:11:34", "throughput": 19958.75, "total_tokens": 16633408}
|
|
{"current_steps": 5285, "total_steps": 78105, "loss": 0.4926, "lr": 3.3824094226091413e-06, "epoch": 0.3383266116125728, "percentage": 6.77, "elapsed_time": "0:13:54", "remaining_time": "3:11:32", "throughput": 19961.61, "total_tokens": 16649664}
|
|
{"current_steps": 5290, "total_steps": 78105, "loss": 0.4736, "lr": 3.3856100371271288e-06, "epoch": 0.3386466935535497, "percentage": 6.77, "elapsed_time": "0:13:54", "remaining_time": "3:11:30", "throughput": 19963.97, "total_tokens": 16664896}
|
|
{"current_steps": 5295, "total_steps": 78105, "loss": 0.5782, "lr": 3.3888106516451162e-06, "epoch": 0.3389667754945266, "percentage": 6.78, "elapsed_time": "0:13:55", "remaining_time": "3:11:27", "throughput": 19966.38, "total_tokens": 16680384}
|
|
{"current_steps": 5300, "total_steps": 78105, "loss": 0.4507, "lr": 3.3920112661631037e-06, "epoch": 0.33928685743550346, "percentage": 6.79, "elapsed_time": "0:13:56", "remaining_time": "3:11:25", "throughput": 19969.2, "total_tokens": 16696192}
|
|
{"current_steps": 5305, "total_steps": 78105, "loss": 0.63, "lr": 3.395211880681091e-06, "epoch": 0.3396069393764804, "percentage": 6.79, "elapsed_time": "0:13:57", "remaining_time": "3:11:30", "throughput": 19976.64, "total_tokens": 16727104}
|
|
{"current_steps": 5310, "total_steps": 78105, "loss": 0.5304, "lr": 3.398412495199078e-06, "epoch": 0.3399270213174573, "percentage": 6.8, "elapsed_time": "0:13:57", "remaining_time": "3:11:27", "throughput": 19978.78, "total_tokens": 16741696}
|
|
{"current_steps": 5315, "total_steps": 78105, "loss": 0.589, "lr": 3.4016131097170657e-06, "epoch": 0.34024710325843416, "percentage": 6.8, "elapsed_time": "0:13:58", "remaining_time": "3:11:25", "throughput": 19981.23, "total_tokens": 16757120}
|
|
{"current_steps": 5320, "total_steps": 78105, "loss": 0.5482, "lr": 3.404813724235053e-06, "epoch": 0.34056718519941104, "percentage": 6.81, "elapsed_time": "0:13:59", "remaining_time": "3:11:22", "throughput": 19983.48, "total_tokens": 16772416}
|
|
{"current_steps": 5325, "total_steps": 78105, "loss": 0.3581, "lr": 3.4080143387530406e-06, "epoch": 0.3408872671403879, "percentage": 6.82, "elapsed_time": "0:13:59", "remaining_time": "3:11:20", "throughput": 19986.34, "total_tokens": 16788352}
|
|
{"current_steps": 5330, "total_steps": 78105, "loss": 0.4131, "lr": 3.411214953271028e-06, "epoch": 0.34120734908136485, "percentage": 6.82, "elapsed_time": "0:14:00", "remaining_time": "3:11:18", "throughput": 19988.71, "total_tokens": 16803648}
|
|
{"current_steps": 5335, "total_steps": 78105, "loss": 0.5274, "lr": 3.4144155677890155e-06, "epoch": 0.34152743102234173, "percentage": 6.83, "elapsed_time": "0:14:01", "remaining_time": "3:11:15", "throughput": 19991.11, "total_tokens": 16818944}
|
|
{"current_steps": 5340, "total_steps": 78105, "loss": 0.4915, "lr": 3.417616182307003e-06, "epoch": 0.3418475129633186, "percentage": 6.84, "elapsed_time": "0:14:01", "remaining_time": "3:11:13", "throughput": 19993.36, "total_tokens": 16833984}
|
|
{"current_steps": 5345, "total_steps": 78105, "loss": 0.6687, "lr": 3.4208167968249905e-06, "epoch": 0.3421675949042955, "percentage": 6.84, "elapsed_time": "0:14:02", "remaining_time": "3:11:10", "throughput": 19995.94, "total_tokens": 16849280}
|
|
{"current_steps": 5350, "total_steps": 78105, "loss": 0.4588, "lr": 3.424017411342978e-06, "epoch": 0.34248767684527237, "percentage": 6.85, "elapsed_time": "0:14:03", "remaining_time": "3:11:08", "throughput": 19998.72, "total_tokens": 16865664}
|
|
{"current_steps": 5355, "total_steps": 78105, "loss": 0.6085, "lr": 3.4272180258609654e-06, "epoch": 0.3428077587862493, "percentage": 6.86, "elapsed_time": "0:14:04", "remaining_time": "3:11:06", "throughput": 20001.15, "total_tokens": 16881536}
|
|
{"current_steps": 5360, "total_steps": 78105, "loss": 0.4435, "lr": 3.430418640378953e-06, "epoch": 0.3431278407272262, "percentage": 6.86, "elapsed_time": "0:14:04", "remaining_time": "3:11:04", "throughput": 20004.47, "total_tokens": 16898816}
|
|
{"current_steps": 5365, "total_steps": 78105, "loss": 0.5517, "lr": 3.4336192548969403e-06, "epoch": 0.34344792266820307, "percentage": 6.87, "elapsed_time": "0:14:05", "remaining_time": "3:11:02", "throughput": 20007.31, "total_tokens": 16915136}
|
|
{"current_steps": 5370, "total_steps": 78105, "loss": 0.59, "lr": 3.436819869414928e-06, "epoch": 0.34376800460917994, "percentage": 6.88, "elapsed_time": "0:14:06", "remaining_time": "3:11:00", "throughput": 20009.89, "total_tokens": 16931200}
|
|
{"current_steps": 5375, "total_steps": 78105, "loss": 0.5054, "lr": 3.4400204839329153e-06, "epoch": 0.3440880865501568, "percentage": 6.88, "elapsed_time": "0:14:06", "remaining_time": "3:10:58", "throughput": 20012.26, "total_tokens": 16946816}
|
|
{"current_steps": 5380, "total_steps": 78105, "loss": 0.4416, "lr": 3.4432210984509027e-06, "epoch": 0.3444081684911337, "percentage": 6.89, "elapsed_time": "0:14:07", "remaining_time": "3:10:56", "throughput": 20014.89, "total_tokens": 16962880}
|
|
{"current_steps": 5385, "total_steps": 78105, "loss": 0.5929, "lr": 3.4464217129688902e-06, "epoch": 0.34472825043211064, "percentage": 6.89, "elapsed_time": "0:14:08", "remaining_time": "3:10:54", "throughput": 20017.19, "total_tokens": 16978240}
|
|
{"current_steps": 5390, "total_steps": 78105, "loss": 0.4171, "lr": 3.4496223274868777e-06, "epoch": 0.3450483323730875, "percentage": 6.9, "elapsed_time": "0:14:08", "remaining_time": "3:10:51", "throughput": 20019.69, "total_tokens": 16993728}
|
|
{"current_steps": 5395, "total_steps": 78105, "loss": 0.6317, "lr": 3.452822942004865e-06, "epoch": 0.3453684143140644, "percentage": 6.91, "elapsed_time": "0:14:09", "remaining_time": "3:10:49", "throughput": 20022.24, "total_tokens": 17009728}
|
|
{"current_steps": 5400, "total_steps": 78105, "loss": 0.4645, "lr": 3.4560235565228526e-06, "epoch": 0.3456884962550413, "percentage": 6.91, "elapsed_time": "0:14:10", "remaining_time": "3:10:46", "throughput": 20024.39, "total_tokens": 17024640}
|
|
{"current_steps": 5405, "total_steps": 78105, "loss": 0.4109, "lr": 3.4592241710408405e-06, "epoch": 0.34600857819601816, "percentage": 6.92, "elapsed_time": "0:14:10", "remaining_time": "3:10:44", "throughput": 20026.91, "total_tokens": 17040512}
|
|
{"current_steps": 5410, "total_steps": 78105, "loss": 0.5849, "lr": 3.462424785558828e-06, "epoch": 0.3463286601369951, "percentage": 6.93, "elapsed_time": "0:14:11", "remaining_time": "3:10:42", "throughput": 20029.4, "total_tokens": 17056064}
|
|
{"current_steps": 5415, "total_steps": 78105, "loss": 0.492, "lr": 3.4656254000768154e-06, "epoch": 0.34664874207797197, "percentage": 6.93, "elapsed_time": "0:14:12", "remaining_time": "3:10:40", "throughput": 20032.3, "total_tokens": 17072448}
|
|
{"current_steps": 5420, "total_steps": 78105, "loss": 0.626, "lr": 3.468826014594803e-06, "epoch": 0.34696882401894885, "percentage": 6.94, "elapsed_time": "0:14:12", "remaining_time": "3:10:38", "throughput": 20034.79, "total_tokens": 17088320}
|
|
{"current_steps": 5425, "total_steps": 78105, "loss": 0.5002, "lr": 3.4720266291127904e-06, "epoch": 0.34728890595992573, "percentage": 6.95, "elapsed_time": "0:14:13", "remaining_time": "3:10:36", "throughput": 20037.55, "total_tokens": 17104448}
|
|
{"current_steps": 5430, "total_steps": 78105, "loss": 0.4201, "lr": 3.475227243630777e-06, "epoch": 0.3476089879009026, "percentage": 6.95, "elapsed_time": "0:14:14", "remaining_time": "3:10:34", "throughput": 20040.99, "total_tokens": 17121856}
|
|
{"current_steps": 5435, "total_steps": 78105, "loss": 0.6419, "lr": 3.4784278581487645e-06, "epoch": 0.34792906984187955, "percentage": 6.96, "elapsed_time": "0:14:14", "remaining_time": "3:10:31", "throughput": 20043.07, "total_tokens": 17136512}
|
|
{"current_steps": 5440, "total_steps": 78105, "loss": 0.5514, "lr": 3.481628472666752e-06, "epoch": 0.3482491517828564, "percentage": 6.96, "elapsed_time": "0:14:15", "remaining_time": "3:10:29", "throughput": 20045.86, "total_tokens": 17152640}
|
|
{"current_steps": 5445, "total_steps": 78105, "loss": 0.3865, "lr": 3.4848290871847394e-06, "epoch": 0.3485692337238333, "percentage": 6.97, "elapsed_time": "0:14:16", "remaining_time": "3:10:27", "throughput": 20047.99, "total_tokens": 17167680}
|
|
{"current_steps": 5450, "total_steps": 78105, "loss": 0.7384, "lr": 3.488029701702727e-06, "epoch": 0.3488893156648102, "percentage": 6.98, "elapsed_time": "0:14:17", "remaining_time": "3:10:24", "throughput": 20050.3, "total_tokens": 17183296}
|
|
{"current_steps": 5455, "total_steps": 78105, "loss": 0.6669, "lr": 3.4912303162207143e-06, "epoch": 0.34920939760578706, "percentage": 6.98, "elapsed_time": "0:14:17", "remaining_time": "3:10:22", "throughput": 20052.45, "total_tokens": 17198272}
|
|
{"current_steps": 5460, "total_steps": 78105, "loss": 0.5908, "lr": 3.494430930738702e-06, "epoch": 0.349529479546764, "percentage": 6.99, "elapsed_time": "0:14:18", "remaining_time": "3:10:19", "throughput": 20054.81, "total_tokens": 17213376}
|
|
{"current_steps": 5465, "total_steps": 78105, "loss": 0.5236, "lr": 3.4976315452566893e-06, "epoch": 0.3498495614877409, "percentage": 7.0, "elapsed_time": "0:14:19", "remaining_time": "3:10:17", "throughput": 20057.7, "total_tokens": 17229632}
|
|
{"current_steps": 5470, "total_steps": 78105, "loss": 0.6976, "lr": 3.5008321597746767e-06, "epoch": 0.35016964342871776, "percentage": 7.0, "elapsed_time": "0:14:19", "remaining_time": "3:10:15", "throughput": 20060.37, "total_tokens": 17245696}
|
|
{"current_steps": 5475, "total_steps": 78105, "loss": 0.4405, "lr": 3.5040327742926646e-06, "epoch": 0.35048972536969464, "percentage": 7.01, "elapsed_time": "0:14:20", "remaining_time": "3:10:13", "throughput": 20063.66, "total_tokens": 17262976}
|
|
{"current_steps": 5480, "total_steps": 78105, "loss": 0.48, "lr": 3.507233388810652e-06, "epoch": 0.3508098073106715, "percentage": 7.02, "elapsed_time": "0:14:21", "remaining_time": "3:10:11", "throughput": 20066.0, "total_tokens": 17278336}
|
|
{"current_steps": 5485, "total_steps": 78105, "loss": 0.4554, "lr": 3.5104340033286396e-06, "epoch": 0.3511298892516484, "percentage": 7.02, "elapsed_time": "0:14:21", "remaining_time": "3:10:09", "throughput": 20068.15, "total_tokens": 17293248}
|
|
{"current_steps": 5490, "total_steps": 78105, "loss": 0.4985, "lr": 3.513634617846627e-06, "epoch": 0.35144997119262533, "percentage": 7.03, "elapsed_time": "0:14:22", "remaining_time": "3:10:06", "throughput": 20070.62, "total_tokens": 17309184}
|
|
{"current_steps": 5495, "total_steps": 78105, "loss": 0.527, "lr": 3.5168352323646145e-06, "epoch": 0.3517700531336022, "percentage": 7.04, "elapsed_time": "0:14:23", "remaining_time": "3:10:04", "throughput": 20073.23, "total_tokens": 17325248}
|
|
{"current_steps": 5500, "total_steps": 78105, "loss": 0.5258, "lr": 3.520035846882602e-06, "epoch": 0.3520901350745791, "percentage": 7.04, "elapsed_time": "0:14:23", "remaining_time": "3:10:02", "throughput": 20075.6, "total_tokens": 17340736}
|
|
{"current_steps": 5505, "total_steps": 78105, "loss": 0.474, "lr": 3.5232364614005894e-06, "epoch": 0.35241021701555597, "percentage": 7.05, "elapsed_time": "0:14:24", "remaining_time": "3:10:00", "throughput": 20077.91, "total_tokens": 17355904}
|
|
{"current_steps": 5510, "total_steps": 78105, "loss": 0.5218, "lr": 3.526437075918577e-06, "epoch": 0.35273029895653285, "percentage": 7.05, "elapsed_time": "0:14:25", "remaining_time": "3:09:57", "throughput": 20080.21, "total_tokens": 17371264}
|
|
{"current_steps": 5515, "total_steps": 78105, "loss": 0.4929, "lr": 3.5296376904365644e-06, "epoch": 0.3530503808975098, "percentage": 7.06, "elapsed_time": "0:14:25", "remaining_time": "3:09:56", "throughput": 20083.24, "total_tokens": 17388608}
|
|
{"current_steps": 5520, "total_steps": 78105, "loss": 0.4981, "lr": 3.532838304954552e-06, "epoch": 0.35337046283848667, "percentage": 7.07, "elapsed_time": "0:14:26", "remaining_time": "3:09:53", "throughput": 20085.53, "total_tokens": 17403712}
|
|
{"current_steps": 5525, "total_steps": 78105, "loss": 0.5438, "lr": 3.5360389194725393e-06, "epoch": 0.35369054477946354, "percentage": 7.07, "elapsed_time": "0:14:27", "remaining_time": "3:09:52", "throughput": 20088.84, "total_tokens": 17421056}
|
|
{"current_steps": 5530, "total_steps": 78105, "loss": 0.6835, "lr": 3.5392395339905268e-06, "epoch": 0.3540106267204404, "percentage": 7.08, "elapsed_time": "0:14:27", "remaining_time": "3:09:49", "throughput": 20090.96, "total_tokens": 17436096}
|
|
{"current_steps": 5535, "total_steps": 78105, "loss": 0.4615, "lr": 3.5424401485085142e-06, "epoch": 0.3543307086614173, "percentage": 7.09, "elapsed_time": "0:14:28", "remaining_time": "3:09:47", "throughput": 20093.62, "total_tokens": 17452480}
|
|
{"current_steps": 5540, "total_steps": 78105, "loss": 0.454, "lr": 3.5456407630265017e-06, "epoch": 0.35465079060239424, "percentage": 7.09, "elapsed_time": "0:14:29", "remaining_time": "3:09:45", "throughput": 20095.9, "total_tokens": 17467776}
|
|
{"current_steps": 5545, "total_steps": 78105, "loss": 0.5886, "lr": 3.548841377544489e-06, "epoch": 0.3549708725433711, "percentage": 7.1, "elapsed_time": "0:14:29", "remaining_time": "3:09:42", "throughput": 20098.06, "total_tokens": 17482752}
|
|
{"current_steps": 5550, "total_steps": 78105, "loss": 0.4031, "lr": 3.552041992062476e-06, "epoch": 0.355290954484348, "percentage": 7.11, "elapsed_time": "0:14:30", "remaining_time": "3:09:40", "throughput": 20100.25, "total_tokens": 17497792}
|
|
{"current_steps": 5555, "total_steps": 78105, "loss": 0.6044, "lr": 3.5552426065804637e-06, "epoch": 0.3556110364253249, "percentage": 7.11, "elapsed_time": "0:14:31", "remaining_time": "3:09:37", "throughput": 20102.49, "total_tokens": 17513024}
|
|
{"current_steps": 5560, "total_steps": 78105, "loss": 0.4138, "lr": 3.558443221098451e-06, "epoch": 0.35593111836630176, "percentage": 7.12, "elapsed_time": "0:14:31", "remaining_time": "3:09:36", "throughput": 20104.77, "total_tokens": 17529024}
|
|
{"current_steps": 5565, "total_steps": 78105, "loss": 0.5137, "lr": 3.5616438356164386e-06, "epoch": 0.35625120030727864, "percentage": 7.13, "elapsed_time": "0:14:32", "remaining_time": "3:09:33", "throughput": 20106.7, "total_tokens": 17543936}
|
|
{"current_steps": 5570, "total_steps": 78105, "loss": 0.6699, "lr": 3.564844450134426e-06, "epoch": 0.35657128224825557, "percentage": 7.13, "elapsed_time": "0:14:33", "remaining_time": "3:09:31", "throughput": 20108.88, "total_tokens": 17559168}
|
|
{"current_steps": 5575, "total_steps": 78105, "loss": 0.4501, "lr": 3.5680450646524135e-06, "epoch": 0.35689136418923245, "percentage": 7.14, "elapsed_time": "0:14:33", "remaining_time": "3:09:29", "throughput": 20111.32, "total_tokens": 17574912}
|
|
{"current_steps": 5580, "total_steps": 78105, "loss": 0.4311, "lr": 3.571245679170401e-06, "epoch": 0.35721144613020933, "percentage": 7.14, "elapsed_time": "0:14:34", "remaining_time": "3:09:26", "throughput": 20113.6, "total_tokens": 17590272}
|
|
{"current_steps": 5585, "total_steps": 78105, "loss": 0.5236, "lr": 3.5744462936883885e-06, "epoch": 0.3575315280711862, "percentage": 7.15, "elapsed_time": "0:14:35", "remaining_time": "3:09:24", "throughput": 20115.96, "total_tokens": 17605952}
|
|
{"current_steps": 5590, "total_steps": 78105, "loss": 0.4981, "lr": 3.577646908206376e-06, "epoch": 0.3578516100121631, "percentage": 7.16, "elapsed_time": "0:14:35", "remaining_time": "3:09:22", "throughput": 20118.11, "total_tokens": 17621120}
|
|
{"current_steps": 5595, "total_steps": 78105, "loss": 0.6862, "lr": 3.5808475227243634e-06, "epoch": 0.35817169195314, "percentage": 7.16, "elapsed_time": "0:14:36", "remaining_time": "3:09:19", "throughput": 20120.21, "total_tokens": 17636096}
|
|
{"current_steps": 5600, "total_steps": 78105, "loss": 0.5436, "lr": 3.584048137242351e-06, "epoch": 0.3584917738941169, "percentage": 7.17, "elapsed_time": "0:14:37", "remaining_time": "3:09:17", "throughput": 20123.04, "total_tokens": 17652672}
|
|
{"current_steps": 5605, "total_steps": 78105, "loss": 0.6614, "lr": 3.5872487517603383e-06, "epoch": 0.3588118558350938, "percentage": 7.18, "elapsed_time": "0:14:37", "remaining_time": "3:09:15", "throughput": 20125.14, "total_tokens": 17668032}
|
|
{"current_steps": 5610, "total_steps": 78105, "loss": 0.5959, "lr": 3.590449366278326e-06, "epoch": 0.35913193777607066, "percentage": 7.18, "elapsed_time": "0:14:38", "remaining_time": "3:09:13", "throughput": 20127.28, "total_tokens": 17683264}
|
|
{"current_steps": 5615, "total_steps": 78105, "loss": 0.4085, "lr": 3.5936499807963133e-06, "epoch": 0.35945201971704754, "percentage": 7.19, "elapsed_time": "0:14:39", "remaining_time": "3:09:11", "throughput": 20129.77, "total_tokens": 17699200}
|
|
{"current_steps": 5620, "total_steps": 78105, "loss": 0.3946, "lr": 3.5968505953143007e-06, "epoch": 0.3597721016580245, "percentage": 7.2, "elapsed_time": "0:14:39", "remaining_time": "3:09:09", "throughput": 20132.11, "total_tokens": 17714816}
|
|
{"current_steps": 5625, "total_steps": 78105, "loss": 0.4728, "lr": 3.6000512098322882e-06, "epoch": 0.36009218359900136, "percentage": 7.2, "elapsed_time": "0:14:40", "remaining_time": "3:09:07", "throughput": 20135.36, "total_tokens": 17732352}
|
|
{"current_steps": 5630, "total_steps": 78105, "loss": 0.6305, "lr": 3.6032518243502757e-06, "epoch": 0.36041226553997824, "percentage": 7.21, "elapsed_time": "0:14:41", "remaining_time": "3:09:05", "throughput": 20137.87, "total_tokens": 17748288}
|
|
{"current_steps": 5635, "total_steps": 78105, "loss": 0.4213, "lr": 3.606452438868263e-06, "epoch": 0.3607323474809551, "percentage": 7.21, "elapsed_time": "0:14:42", "remaining_time": "3:09:03", "throughput": 20139.87, "total_tokens": 17763520}
|
|
{"current_steps": 5640, "total_steps": 78105, "loss": 0.5041, "lr": 3.6096530533862506e-06, "epoch": 0.361052429421932, "percentage": 7.22, "elapsed_time": "0:14:42", "remaining_time": "3:09:01", "throughput": 20142.27, "total_tokens": 17779328}
|
|
{"current_steps": 5645, "total_steps": 78105, "loss": 0.4788, "lr": 3.612853667904238e-06, "epoch": 0.3613725113629089, "percentage": 7.23, "elapsed_time": "0:14:43", "remaining_time": "3:08:59", "throughput": 20144.83, "total_tokens": 17795584}
|
|
{"current_steps": 5650, "total_steps": 78105, "loss": 0.463, "lr": 3.6160542824222255e-06, "epoch": 0.3616925933038858, "percentage": 7.23, "elapsed_time": "0:14:44", "remaining_time": "3:08:56", "throughput": 20146.7, "total_tokens": 17810368}
|
|
{"current_steps": 5655, "total_steps": 78105, "loss": 0.4958, "lr": 3.619254896940213e-06, "epoch": 0.3620126752448627, "percentage": 7.24, "elapsed_time": "0:14:44", "remaining_time": "3:08:54", "throughput": 20149.15, "total_tokens": 17826496}
|
|
{"current_steps": 5660, "total_steps": 78105, "loss": 0.5192, "lr": 3.6224555114582005e-06, "epoch": 0.36233275718583957, "percentage": 7.25, "elapsed_time": "0:14:45", "remaining_time": "3:08:52", "throughput": 20151.06, "total_tokens": 17841344}
|
|
{"current_steps": 5665, "total_steps": 78105, "loss": 0.5501, "lr": 3.625656125976188e-06, "epoch": 0.36265283912681645, "percentage": 7.25, "elapsed_time": "0:14:46", "remaining_time": "3:08:50", "throughput": 20153.41, "total_tokens": 17857408}
|
|
{"current_steps": 5670, "total_steps": 78105, "loss": 0.5163, "lr": 3.628856740494175e-06, "epoch": 0.36297292106779333, "percentage": 7.26, "elapsed_time": "0:14:46", "remaining_time": "3:08:48", "throughput": 20155.78, "total_tokens": 17873280}
|
|
{"current_steps": 5675, "total_steps": 78105, "loss": 0.5723, "lr": 3.6320573550121625e-06, "epoch": 0.36329300300877027, "percentage": 7.27, "elapsed_time": "0:14:47", "remaining_time": "3:08:46", "throughput": 20157.89, "total_tokens": 17888448}
|
|
{"current_steps": 5680, "total_steps": 78105, "loss": 0.5371, "lr": 3.63525796953015e-06, "epoch": 0.36361308494974715, "percentage": 7.27, "elapsed_time": "0:14:48", "remaining_time": "3:08:44", "throughput": 20160.2, "total_tokens": 17904320}
|
|
{"current_steps": 5685, "total_steps": 78105, "loss": 0.3783, "lr": 3.6384585840481374e-06, "epoch": 0.363933166890724, "percentage": 7.28, "elapsed_time": "0:14:48", "remaining_time": "3:08:41", "throughput": 20162.37, "total_tokens": 17919744}
|
|
{"current_steps": 5690, "total_steps": 78105, "loss": 0.3927, "lr": 3.641659198566125e-06, "epoch": 0.3642532488317009, "percentage": 7.29, "elapsed_time": "0:14:49", "remaining_time": "3:08:39", "throughput": 20164.49, "total_tokens": 17934656}
|
|
{"current_steps": 5695, "total_steps": 78105, "loss": 0.6628, "lr": 3.6448598130841123e-06, "epoch": 0.3645733307726778, "percentage": 7.29, "elapsed_time": "0:14:50", "remaining_time": "3:08:37", "throughput": 20167.06, "total_tokens": 17950976}
|
|
{"current_steps": 5700, "total_steps": 78105, "loss": 0.6469, "lr": 3.6480604276021e-06, "epoch": 0.3648934127136547, "percentage": 7.3, "elapsed_time": "0:14:50", "remaining_time": "3:08:35", "throughput": 20169.28, "total_tokens": 17966400}
|
|
{"current_steps": 5705, "total_steps": 78105, "loss": 0.6176, "lr": 3.6512610421200873e-06, "epoch": 0.3652134946546316, "percentage": 7.3, "elapsed_time": "0:14:51", "remaining_time": "3:08:33", "throughput": 20172.31, "total_tokens": 17983872}
|
|
{"current_steps": 5710, "total_steps": 78105, "loss": 0.548, "lr": 3.6544616566380747e-06, "epoch": 0.3655335765956085, "percentage": 7.31, "elapsed_time": "0:14:52", "remaining_time": "3:08:31", "throughput": 20174.83, "total_tokens": 17999680}
|
|
{"current_steps": 5715, "total_steps": 78105, "loss": 0.4054, "lr": 3.657662271156062e-06, "epoch": 0.36585365853658536, "percentage": 7.32, "elapsed_time": "0:14:52", "remaining_time": "3:08:29", "throughput": 20176.91, "total_tokens": 18014784}
|
|
{"current_steps": 5720, "total_steps": 78105, "loss": 0.5321, "lr": 3.6608628856740497e-06, "epoch": 0.36617374047756224, "percentage": 7.32, "elapsed_time": "0:14:53", "remaining_time": "3:08:26", "throughput": 20178.94, "total_tokens": 18029888}
|
|
{"current_steps": 5725, "total_steps": 78105, "loss": 0.4562, "lr": 3.664063500192037e-06, "epoch": 0.3664938224185392, "percentage": 7.33, "elapsed_time": "0:14:54", "remaining_time": "3:08:24", "throughput": 20181.26, "total_tokens": 18045632}
|
|
{"current_steps": 5730, "total_steps": 78105, "loss": 0.6684, "lr": 3.6672641147100246e-06, "epoch": 0.36681390435951605, "percentage": 7.34, "elapsed_time": "0:14:54", "remaining_time": "3:08:22", "throughput": 20183.3, "total_tokens": 18060928}
|
|
{"current_steps": 5735, "total_steps": 78105, "loss": 0.4735, "lr": 3.670464729228012e-06, "epoch": 0.36713398630049293, "percentage": 7.34, "elapsed_time": "0:14:55", "remaining_time": "3:08:20", "throughput": 20185.41, "total_tokens": 18076352}
|
|
{"current_steps": 5740, "total_steps": 78105, "loss": 0.5248, "lr": 3.6736653437459995e-06, "epoch": 0.3674540682414698, "percentage": 7.35, "elapsed_time": "0:14:56", "remaining_time": "3:08:18", "throughput": 20187.75, "total_tokens": 18092096}
|
|
{"current_steps": 5745, "total_steps": 78105, "loss": 0.7352, "lr": 3.676865958263987e-06, "epoch": 0.3677741501824467, "percentage": 7.36, "elapsed_time": "0:14:56", "remaining_time": "3:08:16", "throughput": 20190.03, "total_tokens": 18107648}
|
|
{"current_steps": 5750, "total_steps": 78105, "loss": 0.489, "lr": 3.6800665727819745e-06, "epoch": 0.36809423212342357, "percentage": 7.36, "elapsed_time": "0:14:57", "remaining_time": "3:08:14", "throughput": 20192.29, "total_tokens": 18123264}
|
|
{"current_steps": 5755, "total_steps": 78105, "loss": 0.6408, "lr": 3.683267187299962e-06, "epoch": 0.3684143140644005, "percentage": 7.37, "elapsed_time": "0:14:58", "remaining_time": "3:08:11", "throughput": 20194.42, "total_tokens": 18138368}
|
|
{"current_steps": 5760, "total_steps": 78105, "loss": 0.3899, "lr": 3.6864678018179494e-06, "epoch": 0.3687343960053774, "percentage": 7.37, "elapsed_time": "0:14:58", "remaining_time": "3:08:09", "throughput": 20196.84, "total_tokens": 18154432}
|
|
{"current_steps": 5765, "total_steps": 78105, "loss": 0.4416, "lr": 3.689668416335937e-06, "epoch": 0.36905447794635426, "percentage": 7.38, "elapsed_time": "0:14:59", "remaining_time": "3:08:08", "throughput": 20199.56, "total_tokens": 18171200}
|
|
{"current_steps": 5770, "total_steps": 78105, "loss": 0.4342, "lr": 3.6928690308539243e-06, "epoch": 0.36937455988733114, "percentage": 7.39, "elapsed_time": "0:15:00", "remaining_time": "3:08:05", "throughput": 20202.15, "total_tokens": 18187008}
|
|
{"current_steps": 5775, "total_steps": 78105, "loss": 0.4311, "lr": 3.696069645371912e-06, "epoch": 0.369694641828308, "percentage": 7.39, "elapsed_time": "0:15:00", "remaining_time": "3:08:03", "throughput": 20204.49, "total_tokens": 18202944}
|
|
{"current_steps": 5780, "total_steps": 78105, "loss": 0.6343, "lr": 3.6992702598898993e-06, "epoch": 0.37001472376928496, "percentage": 7.4, "elapsed_time": "0:15:01", "remaining_time": "3:08:01", "throughput": 20206.43, "total_tokens": 18217984}
|
|
{"current_steps": 5785, "total_steps": 78105, "loss": 0.6752, "lr": 3.7024708744078867e-06, "epoch": 0.37033480571026184, "percentage": 7.41, "elapsed_time": "0:15:02", "remaining_time": "3:07:59", "throughput": 20208.66, "total_tokens": 18233984}
|
|
{"current_steps": 5790, "total_steps": 78105, "loss": 0.5054, "lr": 3.7056714889258738e-06, "epoch": 0.3706548876512387, "percentage": 7.41, "elapsed_time": "0:15:02", "remaining_time": "3:07:57", "throughput": 20210.57, "total_tokens": 18249024}
|
|
{"current_steps": 5795, "total_steps": 78105, "loss": 0.4128, "lr": 3.7088721034438612e-06, "epoch": 0.3709749695922156, "percentage": 7.42, "elapsed_time": "0:15:03", "remaining_time": "3:07:55", "throughput": 20212.52, "total_tokens": 18264256}
|
|
{"current_steps": 5800, "total_steps": 78105, "loss": 0.6297, "lr": 3.7120727179618487e-06, "epoch": 0.3712950515331925, "percentage": 7.43, "elapsed_time": "0:15:04", "remaining_time": "3:07:53", "throughput": 20214.62, "total_tokens": 18279936}
|
|
{"current_steps": 5805, "total_steps": 78105, "loss": 0.4761, "lr": 3.715273332479836e-06, "epoch": 0.3716151334741694, "percentage": 7.43, "elapsed_time": "0:15:04", "remaining_time": "3:07:51", "throughput": 20216.98, "total_tokens": 18295488}
|
|
{"current_steps": 5810, "total_steps": 78105, "loss": 0.6025, "lr": 3.7184739469978236e-06, "epoch": 0.3719352154151463, "percentage": 7.44, "elapsed_time": "0:15:05", "remaining_time": "3:07:49", "throughput": 20219.3, "total_tokens": 18311360}
|
|
{"current_steps": 5815, "total_steps": 78105, "loss": 0.4959, "lr": 3.721674561515811e-06, "epoch": 0.37225529735612317, "percentage": 7.45, "elapsed_time": "0:15:06", "remaining_time": "3:07:46", "throughput": 20221.36, "total_tokens": 18326592}
|
|
{"current_steps": 5820, "total_steps": 78105, "loss": 0.3994, "lr": 3.7248751760337986e-06, "epoch": 0.37257537929710005, "percentage": 7.45, "elapsed_time": "0:15:07", "remaining_time": "3:07:45", "throughput": 20224.22, "total_tokens": 18344000}
|
|
{"current_steps": 5825, "total_steps": 78105, "loss": 0.6157, "lr": 3.728075790551786e-06, "epoch": 0.37289546123807693, "percentage": 7.46, "elapsed_time": "0:15:07", "remaining_time": "3:07:43", "throughput": 20226.62, "total_tokens": 18359872}
|
|
{"current_steps": 5830, "total_steps": 78105, "loss": 0.4682, "lr": 3.7312764050697735e-06, "epoch": 0.3732155431790538, "percentage": 7.46, "elapsed_time": "0:15:08", "remaining_time": "3:07:40", "throughput": 20228.25, "total_tokens": 18374400}
|
|
{"current_steps": 5835, "total_steps": 78105, "loss": 0.3252, "lr": 3.734477019587761e-06, "epoch": 0.37353562512003075, "percentage": 7.47, "elapsed_time": "0:15:09", "remaining_time": "3:07:39", "throughput": 20230.71, "total_tokens": 18390656}
|
|
{"current_steps": 5840, "total_steps": 78105, "loss": 0.5131, "lr": 3.7376776341057485e-06, "epoch": 0.3738557070610076, "percentage": 7.48, "elapsed_time": "0:15:09", "remaining_time": "3:07:37", "throughput": 20232.75, "total_tokens": 18406400}
|
|
{"current_steps": 5845, "total_steps": 78105, "loss": 0.7071, "lr": 3.740878248623736e-06, "epoch": 0.3741757890019845, "percentage": 7.48, "elapsed_time": "0:15:10", "remaining_time": "3:07:35", "throughput": 20235.22, "total_tokens": 18422848}
|
|
{"current_steps": 5850, "total_steps": 78105, "loss": 0.6232, "lr": 3.7440788631417234e-06, "epoch": 0.3744958709429614, "percentage": 7.49, "elapsed_time": "0:15:11", "remaining_time": "3:07:33", "throughput": 20237.54, "total_tokens": 18438912}
|
|
{"current_steps": 5855, "total_steps": 78105, "loss": 0.5506, "lr": 3.747279477659711e-06, "epoch": 0.37481595288393826, "percentage": 7.5, "elapsed_time": "0:15:11", "remaining_time": "3:07:31", "throughput": 20240.08, "total_tokens": 18455488}
|
|
{"current_steps": 5860, "total_steps": 78105, "loss": 0.451, "lr": 3.7504800921776983e-06, "epoch": 0.3751360348249152, "percentage": 7.5, "elapsed_time": "0:15:12", "remaining_time": "3:07:29", "throughput": 20241.64, "total_tokens": 18470080}
|
|
{"current_steps": 5865, "total_steps": 78105, "loss": 0.5892, "lr": 3.7536807066956858e-06, "epoch": 0.3754561167658921, "percentage": 7.51, "elapsed_time": "0:15:13", "remaining_time": "3:07:27", "throughput": 20243.24, "total_tokens": 18484480}
|
|
{"current_steps": 5870, "total_steps": 78105, "loss": 0.5427, "lr": 3.7568813212136733e-06, "epoch": 0.37577619870686896, "percentage": 7.52, "elapsed_time": "0:15:13", "remaining_time": "3:07:25", "throughput": 20246.03, "total_tokens": 18501312}
|
|
{"current_steps": 5875, "total_steps": 78105, "loss": 0.5258, "lr": 3.7600819357316607e-06, "epoch": 0.37609628064784584, "percentage": 7.52, "elapsed_time": "0:15:14", "remaining_time": "3:07:22", "throughput": 20247.8, "total_tokens": 18515968}
|
|
{"current_steps": 5880, "total_steps": 78105, "loss": 0.4716, "lr": 3.763282550249648e-06, "epoch": 0.3764163625888227, "percentage": 7.53, "elapsed_time": "0:15:15", "remaining_time": "3:07:20", "throughput": 20249.68, "total_tokens": 18531200}
|
|
{"current_steps": 5885, "total_steps": 78105, "loss": 0.4008, "lr": 3.766483164767636e-06, "epoch": 0.37673644452979965, "percentage": 7.53, "elapsed_time": "0:15:15", "remaining_time": "3:07:19", "throughput": 20252.16, "total_tokens": 18547712}
|
|
{"current_steps": 5890, "total_steps": 78105, "loss": 0.4438, "lr": 3.7696837792856235e-06, "epoch": 0.37705652647077653, "percentage": 7.54, "elapsed_time": "0:15:16", "remaining_time": "3:07:16", "throughput": 20254.03, "total_tokens": 18562752}
|
|
{"current_steps": 5895, "total_steps": 78105, "loss": 0.4763, "lr": 3.772884393803611e-06, "epoch": 0.3773766084117534, "percentage": 7.55, "elapsed_time": "0:15:17", "remaining_time": "3:07:14", "throughput": 20256.07, "total_tokens": 18578368}
|
|
{"current_steps": 5900, "total_steps": 78105, "loss": 0.4597, "lr": 3.7760850083215985e-06, "epoch": 0.3776966903527303, "percentage": 7.55, "elapsed_time": "0:15:17", "remaining_time": "3:07:12", "throughput": 20257.94, "total_tokens": 18593408}
|
|
{"current_steps": 5905, "total_steps": 78105, "loss": 0.4968, "lr": 3.779285622839586e-06, "epoch": 0.37801677229370717, "percentage": 7.56, "elapsed_time": "0:15:18", "remaining_time": "3:07:10", "throughput": 20259.67, "total_tokens": 18607872}
|
|
{"current_steps": 5910, "total_steps": 78105, "loss": 0.414, "lr": 3.7824862373575726e-06, "epoch": 0.3783368542346841, "percentage": 7.57, "elapsed_time": "0:15:19", "remaining_time": "3:07:08", "throughput": 20262.02, "total_tokens": 18624000}
|
|
{"current_steps": 5915, "total_steps": 78105, "loss": 0.6451, "lr": 3.78568685187556e-06, "epoch": 0.378656936175661, "percentage": 7.57, "elapsed_time": "0:15:19", "remaining_time": "3:07:06", "throughput": 20264.78, "total_tokens": 18640704}
|
|
{"current_steps": 5920, "total_steps": 78105, "loss": 0.501, "lr": 3.7888874663935475e-06, "epoch": 0.37897701811663786, "percentage": 7.58, "elapsed_time": "0:15:20", "remaining_time": "3:07:05", "throughput": 20267.91, "total_tokens": 18658368}
|
|
{"current_steps": 5925, "total_steps": 78105, "loss": 0.5381, "lr": 3.792088080911535e-06, "epoch": 0.37929710005761474, "percentage": 7.59, "elapsed_time": "0:15:21", "remaining_time": "3:07:03", "throughput": 20270.81, "total_tokens": 18675584}
|
|
{"current_steps": 5930, "total_steps": 78105, "loss": 0.6799, "lr": 3.7952886954295224e-06, "epoch": 0.3796171819985916, "percentage": 7.59, "elapsed_time": "0:15:21", "remaining_time": "3:07:01", "throughput": 20273.23, "total_tokens": 18691904}
|
|
{"current_steps": 5935, "total_steps": 78105, "loss": 0.4492, "lr": 3.79848930994751e-06, "epoch": 0.3799372639395685, "percentage": 7.6, "elapsed_time": "0:15:22", "remaining_time": "3:07:00", "throughput": 20275.91, "total_tokens": 18708736}
|
|
{"current_steps": 5940, "total_steps": 78105, "loss": 0.4253, "lr": 3.8016899244654974e-06, "epoch": 0.38025734588054544, "percentage": 7.61, "elapsed_time": "0:15:23", "remaining_time": "3:06:58", "throughput": 20277.8, "total_tokens": 18724032}
|
|
{"current_steps": 5945, "total_steps": 78105, "loss": 0.5151, "lr": 3.804890538983485e-06, "epoch": 0.3805774278215223, "percentage": 7.61, "elapsed_time": "0:15:24", "remaining_time": "3:06:56", "throughput": 20280.18, "total_tokens": 18740352}
|
|
{"current_steps": 5950, "total_steps": 78105, "loss": 0.4548, "lr": 3.8080911535014723e-06, "epoch": 0.3808975097624992, "percentage": 7.62, "elapsed_time": "0:15:24", "remaining_time": "3:06:53", "throughput": 20281.72, "total_tokens": 18754560}
|
|
{"current_steps": 5955, "total_steps": 78105, "loss": 0.7133, "lr": 3.81129176801946e-06, "epoch": 0.3812175917034761, "percentage": 7.62, "elapsed_time": "0:15:25", "remaining_time": "3:06:51", "throughput": 20283.59, "total_tokens": 18769280}
|
|
{"current_steps": 5960, "total_steps": 78105, "loss": 0.4603, "lr": 3.8144923825374477e-06, "epoch": 0.38153767364445296, "percentage": 7.63, "elapsed_time": "0:15:26", "remaining_time": "3:06:49", "throughput": 20285.55, "total_tokens": 18784512}
|
|
{"current_steps": 5965, "total_steps": 78105, "loss": 0.4459, "lr": 3.817692997055435e-06, "epoch": 0.3818577555854299, "percentage": 7.64, "elapsed_time": "0:15:26", "remaining_time": "3:06:47", "throughput": 20287.75, "total_tokens": 18800384}
|
|
{"current_steps": 5970, "total_steps": 78105, "loss": 0.4875, "lr": 3.820893611573423e-06, "epoch": 0.38217783752640677, "percentage": 7.64, "elapsed_time": "0:15:27", "remaining_time": "3:06:44", "throughput": 20289.77, "total_tokens": 18815552}
|
|
{"current_steps": 5975, "total_steps": 78105, "loss": 0.4904, "lr": 3.82409422609141e-06, "epoch": 0.38249791946738365, "percentage": 7.65, "elapsed_time": "0:15:27", "remaining_time": "3:06:42", "throughput": 20291.48, "total_tokens": 18830080}
|
|
{"current_steps": 5980, "total_steps": 78105, "loss": 0.5087, "lr": 3.8272948406093975e-06, "epoch": 0.38281800140836053, "percentage": 7.66, "elapsed_time": "0:15:28", "remaining_time": "3:06:44", "throughput": 20287.83, "total_tokens": 18846272}
|
|
{"current_steps": 5985, "total_steps": 78105, "loss": 0.4675, "lr": 3.8304954551273846e-06, "epoch": 0.3831380833493374, "percentage": 7.66, "elapsed_time": "0:15:29", "remaining_time": "3:06:41", "throughput": 20289.49, "total_tokens": 18860928}
|
|
{"current_steps": 5990, "total_steps": 78105, "loss": 0.4753, "lr": 3.8336960696453725e-06, "epoch": 0.38345816529031435, "percentage": 7.67, "elapsed_time": "0:15:30", "remaining_time": "3:06:39", "throughput": 20291.63, "total_tokens": 18877120}
|
|
{"current_steps": 5995, "total_steps": 78105, "loss": 0.6253, "lr": 3.8368966841633595e-06, "epoch": 0.3837782472312912, "percentage": 7.68, "elapsed_time": "0:15:30", "remaining_time": "3:06:37", "throughput": 20293.57, "total_tokens": 18892224}
|
|
{"current_steps": 6000, "total_steps": 78105, "loss": 0.5498, "lr": 3.840097298681347e-06, "epoch": 0.3840983291722681, "percentage": 7.68, "elapsed_time": "0:15:31", "remaining_time": "3:06:35", "throughput": 20295.28, "total_tokens": 18907008}
|
|
{"current_steps": 6005, "total_steps": 78105, "loss": 0.3618, "lr": 3.8432979131993344e-06, "epoch": 0.384418411113245, "percentage": 7.69, "elapsed_time": "0:15:32", "remaining_time": "3:06:33", "throughput": 20297.2, "total_tokens": 18922368}
|
|
{"current_steps": 6010, "total_steps": 78105, "loss": 0.4821, "lr": 3.846498527717322e-06, "epoch": 0.38473849305422186, "percentage": 7.69, "elapsed_time": "0:15:32", "remaining_time": "3:06:31", "throughput": 20299.05, "total_tokens": 18937536}
|
|
{"current_steps": 6015, "total_steps": 78105, "loss": 0.5246, "lr": 3.849699142235309e-06, "epoch": 0.38505857499519874, "percentage": 7.7, "elapsed_time": "0:15:33", "remaining_time": "3:06:29", "throughput": 20301.55, "total_tokens": 18954048}
|
|
{"current_steps": 6020, "total_steps": 78105, "loss": 0.5951, "lr": 3.852899756753297e-06, "epoch": 0.3853786569361757, "percentage": 7.71, "elapsed_time": "0:15:34", "remaining_time": "3:06:27", "throughput": 20303.45, "total_tokens": 18969472}
|
|
{"current_steps": 6025, "total_steps": 78105, "loss": 0.4919, "lr": 3.856100371271284e-06, "epoch": 0.38569873887715256, "percentage": 7.71, "elapsed_time": "0:15:34", "remaining_time": "3:06:25", "throughput": 20305.06, "total_tokens": 18983872}
|
|
{"current_steps": 6030, "total_steps": 78105, "loss": 0.4294, "lr": 3.859300985789271e-06, "epoch": 0.38601882081812944, "percentage": 7.72, "elapsed_time": "0:15:35", "remaining_time": "3:06:23", "throughput": 20306.95, "total_tokens": 18999360}
|
|
{"current_steps": 6035, "total_steps": 78105, "loss": 0.5733, "lr": 3.862501600307259e-06, "epoch": 0.3863389027591063, "percentage": 7.73, "elapsed_time": "0:15:36", "remaining_time": "3:06:21", "throughput": 20309.08, "total_tokens": 19015424}
|
|
{"current_steps": 6040, "total_steps": 78105, "loss": 0.4235, "lr": 3.865702214825246e-06, "epoch": 0.3866589847000832, "percentage": 7.73, "elapsed_time": "0:15:36", "remaining_time": "3:06:19", "throughput": 20311.28, "total_tokens": 19031616}
|
|
{"current_steps": 6045, "total_steps": 78105, "loss": 0.5539, "lr": 3.868902829343234e-06, "epoch": 0.38697906664106013, "percentage": 7.74, "elapsed_time": "0:15:37", "remaining_time": "3:06:17", "throughput": 20313.24, "total_tokens": 19047040}
|
|
{"current_steps": 6050, "total_steps": 78105, "loss": 0.551, "lr": 3.872103443861221e-06, "epoch": 0.387299148582037, "percentage": 7.75, "elapsed_time": "0:15:38", "remaining_time": "3:06:15", "throughput": 20315.4, "total_tokens": 19062848}
|
|
{"current_steps": 6055, "total_steps": 78105, "loss": 0.7136, "lr": 3.875304058379209e-06, "epoch": 0.3876192305230139, "percentage": 7.75, "elapsed_time": "0:15:39", "remaining_time": "3:06:13", "throughput": 20317.76, "total_tokens": 19078976}
|
|
{"current_steps": 6060, "total_steps": 78105, "loss": 0.4144, "lr": 3.878504672897196e-06, "epoch": 0.38793931246399077, "percentage": 7.76, "elapsed_time": "0:15:39", "remaining_time": "3:06:11", "throughput": 20319.45, "total_tokens": 19093632}
|
|
{"current_steps": 6065, "total_steps": 78105, "loss": 0.3784, "lr": 3.881705287415184e-06, "epoch": 0.38825939440496765, "percentage": 7.77, "elapsed_time": "0:15:40", "remaining_time": "3:06:09", "throughput": 20321.4, "total_tokens": 19109056}
|
|
{"current_steps": 6070, "total_steps": 78105, "loss": 0.3711, "lr": 3.884905901933171e-06, "epoch": 0.3885794763459446, "percentage": 7.77, "elapsed_time": "0:15:41", "remaining_time": "3:06:07", "throughput": 20323.32, "total_tokens": 19124544}
|
|
{"current_steps": 6075, "total_steps": 78105, "loss": 0.4527, "lr": 3.888106516451159e-06, "epoch": 0.38889955828692147, "percentage": 7.78, "elapsed_time": "0:15:41", "remaining_time": "3:06:05", "throughput": 20325.46, "total_tokens": 19140480}
|
|
{"current_steps": 6080, "total_steps": 78105, "loss": 0.46, "lr": 3.891307130969146e-06, "epoch": 0.38921964022789834, "percentage": 7.78, "elapsed_time": "0:15:42", "remaining_time": "3:06:03", "throughput": 20327.66, "total_tokens": 19156736}
|
|
{"current_steps": 6085, "total_steps": 78105, "loss": 0.5563, "lr": 3.894507745487134e-06, "epoch": 0.3895397221688752, "percentage": 7.79, "elapsed_time": "0:15:43", "remaining_time": "3:06:01", "throughput": 20329.37, "total_tokens": 19171520}
|
|
{"current_steps": 6090, "total_steps": 78105, "loss": 0.6042, "lr": 3.897708360005121e-06, "epoch": 0.3898598041098521, "percentage": 7.8, "elapsed_time": "0:15:43", "remaining_time": "3:05:59", "throughput": 20331.45, "total_tokens": 19187136}
|
|
{"current_steps": 6095, "total_steps": 78105, "loss": 0.5486, "lr": 3.900908974523109e-06, "epoch": 0.39017988605082904, "percentage": 7.8, "elapsed_time": "0:15:44", "remaining_time": "3:05:57", "throughput": 20333.12, "total_tokens": 19201856}
|
|
{"current_steps": 6100, "total_steps": 78105, "loss": 0.4568, "lr": 3.904109589041096e-06, "epoch": 0.3904999679918059, "percentage": 7.81, "elapsed_time": "0:15:45", "remaining_time": "3:05:55", "throughput": 20334.72, "total_tokens": 19216704}
|
|
{"current_steps": 6105, "total_steps": 78105, "loss": 0.6902, "lr": 3.907310203559084e-06, "epoch": 0.3908200499327828, "percentage": 7.82, "elapsed_time": "0:15:45", "remaining_time": "3:05:53", "throughput": 20337.36, "total_tokens": 19233408}
|
|
{"current_steps": 6110, "total_steps": 78105, "loss": 0.4412, "lr": 3.910510818077071e-06, "epoch": 0.3911401318737597, "percentage": 7.82, "elapsed_time": "0:15:46", "remaining_time": "3:05:51", "throughput": 20339.88, "total_tokens": 19250048}
|
|
{"current_steps": 6115, "total_steps": 78105, "loss": 0.5017, "lr": 3.913711432595059e-06, "epoch": 0.39146021381473656, "percentage": 7.83, "elapsed_time": "0:15:47", "remaining_time": "3:05:50", "throughput": 20342.31, "total_tokens": 19266560}
|
|
{"current_steps": 6120, "total_steps": 78105, "loss": 0.4863, "lr": 3.916912047113046e-06, "epoch": 0.39178029575571344, "percentage": 7.84, "elapsed_time": "0:15:47", "remaining_time": "3:05:47", "throughput": 20344.02, "total_tokens": 19281408}
|
|
{"current_steps": 6125, "total_steps": 78105, "loss": 0.4705, "lr": 3.920112661631034e-06, "epoch": 0.39210037769669037, "percentage": 7.84, "elapsed_time": "0:15:48", "remaining_time": "3:05:45", "throughput": 20345.74, "total_tokens": 19296384}
|
|
{"current_steps": 6130, "total_steps": 78105, "loss": 0.4233, "lr": 3.9233132761490215e-06, "epoch": 0.39242045963766725, "percentage": 7.85, "elapsed_time": "0:15:49", "remaining_time": "3:05:43", "throughput": 20347.74, "total_tokens": 19312000}
|
|
{"current_steps": 6135, "total_steps": 78105, "loss": 0.4467, "lr": 3.926513890667009e-06, "epoch": 0.39274054157864413, "percentage": 7.85, "elapsed_time": "0:15:49", "remaining_time": "3:05:41", "throughput": 20349.71, "total_tokens": 19327488}
|
|
{"current_steps": 6140, "total_steps": 78105, "loss": 0.6074, "lr": 3.9297145051849965e-06, "epoch": 0.393060623519621, "percentage": 7.86, "elapsed_time": "0:15:50", "remaining_time": "3:05:39", "throughput": 20351.37, "total_tokens": 19342080}
|
|
{"current_steps": 6145, "total_steps": 78105, "loss": 0.4601, "lr": 3.9329151197029835e-06, "epoch": 0.3933807054605979, "percentage": 7.87, "elapsed_time": "0:15:51", "remaining_time": "3:05:37", "throughput": 20353.6, "total_tokens": 19358336}
|
|
{"current_steps": 6150, "total_steps": 78105, "loss": 0.6262, "lr": 3.9361157342209706e-06, "epoch": 0.3937007874015748, "percentage": 7.87, "elapsed_time": "0:15:51", "remaining_time": "3:05:35", "throughput": 20355.41, "total_tokens": 19373376}
|
|
{"current_steps": 6155, "total_steps": 78105, "loss": 0.5754, "lr": 3.939316348738958e-06, "epoch": 0.3940208693425517, "percentage": 7.88, "elapsed_time": "0:15:52", "remaining_time": "3:05:33", "throughput": 20357.28, "total_tokens": 19388608}
|
|
{"current_steps": 6160, "total_steps": 78105, "loss": 0.6771, "lr": 3.9425169632569455e-06, "epoch": 0.3943409512835286, "percentage": 7.89, "elapsed_time": "0:15:53", "remaining_time": "3:05:31", "throughput": 20358.86, "total_tokens": 19403264}
|
|
{"current_steps": 6165, "total_steps": 78105, "loss": 0.602, "lr": 3.9457175777749325e-06, "epoch": 0.39466103322450546, "percentage": 7.89, "elapsed_time": "0:15:53", "remaining_time": "3:05:29", "throughput": 20360.7, "total_tokens": 19418816}
|
|
{"current_steps": 6170, "total_steps": 78105, "loss": 0.5712, "lr": 3.9489181922929204e-06, "epoch": 0.39498111516548234, "percentage": 7.9, "elapsed_time": "0:15:54", "remaining_time": "3:05:27", "throughput": 20363.04, "total_tokens": 19435392}
|
|
{"current_steps": 6175, "total_steps": 78105, "loss": 0.5541, "lr": 3.9521188068109075e-06, "epoch": 0.3953011971064593, "percentage": 7.91, "elapsed_time": "0:15:55", "remaining_time": "3:05:25", "throughput": 20364.66, "total_tokens": 19450048}
|
|
{"current_steps": 6180, "total_steps": 78105, "loss": 0.529, "lr": 3.955319421328895e-06, "epoch": 0.39562127904743616, "percentage": 7.91, "elapsed_time": "0:15:55", "remaining_time": "3:05:23", "throughput": 20367.03, "total_tokens": 19466368}
|
|
{"current_steps": 6185, "total_steps": 78105, "loss": 0.385, "lr": 3.958520035846882e-06, "epoch": 0.39594136098841304, "percentage": 7.92, "elapsed_time": "0:15:56", "remaining_time": "3:05:22", "throughput": 20369.13, "total_tokens": 19482624}
|
|
{"current_steps": 6190, "total_steps": 78105, "loss": 0.5878, "lr": 3.96172065036487e-06, "epoch": 0.3962614429293899, "percentage": 7.93, "elapsed_time": "0:15:57", "remaining_time": "3:05:20", "throughput": 20371.1, "total_tokens": 19498048}
|
|
{"current_steps": 6195, "total_steps": 78105, "loss": 0.5465, "lr": 3.964921264882858e-06, "epoch": 0.3965815248703668, "percentage": 7.93, "elapsed_time": "0:15:57", "remaining_time": "3:05:18", "throughput": 20373.57, "total_tokens": 19515264}
|
|
{"current_steps": 6200, "total_steps": 78105, "loss": 0.484, "lr": 3.968121879400845e-06, "epoch": 0.3969016068113437, "percentage": 7.94, "elapsed_time": "0:15:58", "remaining_time": "3:05:17", "throughput": 20376.37, "total_tokens": 19533056}
|
|
{"current_steps": 6205, "total_steps": 78105, "loss": 0.4691, "lr": 3.971322493918833e-06, "epoch": 0.3972216887523206, "percentage": 7.94, "elapsed_time": "0:15:59", "remaining_time": "3:05:15", "throughput": 20378.18, "total_tokens": 19548480}
|
|
{"current_steps": 6210, "total_steps": 78105, "loss": 0.3764, "lr": 3.97452310843682e-06, "epoch": 0.3975417706932975, "percentage": 7.95, "elapsed_time": "0:15:59", "remaining_time": "3:05:13", "throughput": 20379.99, "total_tokens": 19563712}
|
|
{"current_steps": 6215, "total_steps": 78105, "loss": 0.4834, "lr": 3.977723722954808e-06, "epoch": 0.39786185263427437, "percentage": 7.96, "elapsed_time": "0:16:00", "remaining_time": "3:05:11", "throughput": 20381.73, "total_tokens": 19578624}
|
|
{"current_steps": 6220, "total_steps": 78105, "loss": 0.4655, "lr": 3.980924337472795e-06, "epoch": 0.39818193457525125, "percentage": 7.96, "elapsed_time": "0:16:01", "remaining_time": "3:05:09", "throughput": 20383.3, "total_tokens": 19593408}
|
|
{"current_steps": 6225, "total_steps": 78105, "loss": 0.6619, "lr": 3.984124951990783e-06, "epoch": 0.39850201651622813, "percentage": 7.97, "elapsed_time": "0:16:01", "remaining_time": "3:05:07", "throughput": 20385.45, "total_tokens": 19609216}
|
|
{"current_steps": 6230, "total_steps": 78105, "loss": 0.578, "lr": 3.98732556650877e-06, "epoch": 0.39882209845720507, "percentage": 7.98, "elapsed_time": "0:16:02", "remaining_time": "3:05:05", "throughput": 20387.27, "total_tokens": 19624896}
|
|
{"current_steps": 6235, "total_steps": 78105, "loss": 0.5431, "lr": 3.990526181026758e-06, "epoch": 0.39914218039818194, "percentage": 7.98, "elapsed_time": "0:16:03", "remaining_time": "3:05:03", "throughput": 20389.12, "total_tokens": 19640128}
|
|
{"current_steps": 6240, "total_steps": 78105, "loss": 0.3773, "lr": 3.993726795544745e-06, "epoch": 0.3994622623391588, "percentage": 7.99, "elapsed_time": "0:16:03", "remaining_time": "3:05:01", "throughput": 20391.19, "total_tokens": 19655680}
|
|
{"current_steps": 6245, "total_steps": 78105, "loss": 0.6672, "lr": 3.996927410062733e-06, "epoch": 0.3997823442801357, "percentage": 8.0, "elapsed_time": "0:16:04", "remaining_time": "3:04:59", "throughput": 20393.41, "total_tokens": 19672192}
|
|
{"current_steps": 6250, "total_steps": 78105, "loss": 0.6138, "lr": 4.00012802458072e-06, "epoch": 0.4001024262211126, "percentage": 8.0, "elapsed_time": "0:16:05", "remaining_time": "3:04:58", "throughput": 20395.76, "total_tokens": 19688896}
|
|
{"current_steps": 6255, "total_steps": 78105, "loss": 0.4616, "lr": 4.003328639098708e-06, "epoch": 0.4004225081620895, "percentage": 8.01, "elapsed_time": "0:16:06", "remaining_time": "3:04:56", "throughput": 20397.62, "total_tokens": 19704128}
|
|
{"current_steps": 6260, "total_steps": 78105, "loss": 0.5449, "lr": 4.006529253616695e-06, "epoch": 0.4007425901030664, "percentage": 8.01, "elapsed_time": "0:16:06", "remaining_time": "3:04:54", "throughput": 20399.6, "total_tokens": 19719744}
|
|
{"current_steps": 6265, "total_steps": 78105, "loss": 0.5009, "lr": 4.009729868134683e-06, "epoch": 0.4010626720440433, "percentage": 8.02, "elapsed_time": "0:16:07", "remaining_time": "3:04:52", "throughput": 20401.92, "total_tokens": 19736128}
|
|
{"current_steps": 6270, "total_steps": 78105, "loss": 0.5497, "lr": 4.01293048265267e-06, "epoch": 0.40138275398502016, "percentage": 8.03, "elapsed_time": "0:16:08", "remaining_time": "3:04:50", "throughput": 20403.59, "total_tokens": 19751296}
|
|
{"current_steps": 6275, "total_steps": 78105, "loss": 0.5978, "lr": 4.016131097170657e-06, "epoch": 0.40170283592599704, "percentage": 8.03, "elapsed_time": "0:16:08", "remaining_time": "3:04:48", "throughput": 20405.07, "total_tokens": 19765888}
|
|
{"current_steps": 6280, "total_steps": 78105, "loss": 0.5703, "lr": 4.019331711688645e-06, "epoch": 0.402022917866974, "percentage": 8.04, "elapsed_time": "0:16:09", "remaining_time": "3:04:46", "throughput": 20406.63, "total_tokens": 19780864}
|
|
{"current_steps": 6285, "total_steps": 78105, "loss": 0.491, "lr": 4.022532326206632e-06, "epoch": 0.40234299980795085, "percentage": 8.05, "elapsed_time": "0:16:10", "remaining_time": "3:04:44", "throughput": 20408.75, "total_tokens": 19796864}
|
|
{"current_steps": 6290, "total_steps": 78105, "loss": 0.5399, "lr": 4.02573294072462e-06, "epoch": 0.40266308174892773, "percentage": 8.05, "elapsed_time": "0:16:10", "remaining_time": "3:04:42", "throughput": 20410.49, "total_tokens": 19811904}
|
|
{"current_steps": 6295, "total_steps": 78105, "loss": 0.5094, "lr": 4.028933555242607e-06, "epoch": 0.4029831636899046, "percentage": 8.06, "elapsed_time": "0:16:11", "remaining_time": "3:04:40", "throughput": 20412.33, "total_tokens": 19827456}
|
|
{"current_steps": 6300, "total_steps": 78105, "loss": 0.4796, "lr": 4.032134169760595e-06, "epoch": 0.4033032456308815, "percentage": 8.07, "elapsed_time": "0:16:12", "remaining_time": "3:04:38", "throughput": 20414.03, "total_tokens": 19842496}
|
|
{"current_steps": 6305, "total_steps": 78105, "loss": 0.5461, "lr": 4.035334784278582e-06, "epoch": 0.40362332757185837, "percentage": 8.07, "elapsed_time": "0:16:12", "remaining_time": "3:04:36", "throughput": 20415.94, "total_tokens": 19858240}
|
|
{"current_steps": 6310, "total_steps": 78105, "loss": 0.4699, "lr": 4.0385353987965695e-06, "epoch": 0.4039434095128353, "percentage": 8.08, "elapsed_time": "0:16:13", "remaining_time": "3:04:34", "throughput": 20418.23, "total_tokens": 19874368}
|
|
{"current_steps": 6315, "total_steps": 78105, "loss": 0.5295, "lr": 4.0417360133145566e-06, "epoch": 0.4042634914538122, "percentage": 8.09, "elapsed_time": "0:16:14", "remaining_time": "3:04:33", "throughput": 20420.75, "total_tokens": 19891904}
|
|
{"current_steps": 6320, "total_steps": 78105, "loss": 0.4789, "lr": 4.0449366278325445e-06, "epoch": 0.40458357339478906, "percentage": 8.09, "elapsed_time": "0:16:14", "remaining_time": "3:04:31", "throughput": 20422.51, "total_tokens": 19907392}
|
|
{"current_steps": 6325, "total_steps": 78105, "loss": 0.5229, "lr": 4.0481372423505315e-06, "epoch": 0.40490365533576594, "percentage": 8.1, "elapsed_time": "0:16:15", "remaining_time": "3:04:29", "throughput": 20424.34, "total_tokens": 19922624}
|
|
{"current_steps": 6330, "total_steps": 78105, "loss": 0.5257, "lr": 4.051337856868519e-06, "epoch": 0.4052237372767428, "percentage": 8.1, "elapsed_time": "0:16:16", "remaining_time": "3:04:28", "throughput": 20426.66, "total_tokens": 19939328}
|
|
{"current_steps": 6335, "total_steps": 78105, "loss": 0.4435, "lr": 4.0545384713865064e-06, "epoch": 0.40554381921771976, "percentage": 8.11, "elapsed_time": "0:16:16", "remaining_time": "3:04:26", "throughput": 20428.7, "total_tokens": 19955392}
|
|
{"current_steps": 6340, "total_steps": 78105, "loss": 0.5389, "lr": 4.057739085904494e-06, "epoch": 0.40586390115869664, "percentage": 8.12, "elapsed_time": "0:16:17", "remaining_time": "3:04:24", "throughput": 20430.85, "total_tokens": 19971520}
|
|
{"current_steps": 6345, "total_steps": 78105, "loss": 0.621, "lr": 4.060939700422481e-06, "epoch": 0.4061839830996735, "percentage": 8.12, "elapsed_time": "0:16:18", "remaining_time": "3:04:23", "throughput": 20433.24, "total_tokens": 19988352}
|
|
{"current_steps": 6350, "total_steps": 78105, "loss": 0.5246, "lr": 4.064140314940469e-06, "epoch": 0.4065040650406504, "percentage": 8.13, "elapsed_time": "0:16:18", "remaining_time": "3:04:22", "throughput": 20435.54, "total_tokens": 20005184}
|
|
{"current_steps": 6355, "total_steps": 78105, "loss": 0.4853, "lr": 4.067340929458456e-06, "epoch": 0.4068241469816273, "percentage": 8.14, "elapsed_time": "0:16:19", "remaining_time": "3:04:20", "throughput": 20437.42, "total_tokens": 20020800}
|
|
{"current_steps": 6360, "total_steps": 78105, "loss": 0.5291, "lr": 4.070541543976444e-06, "epoch": 0.4071442289226042, "percentage": 8.14, "elapsed_time": "0:16:20", "remaining_time": "3:04:18", "throughput": 20439.31, "total_tokens": 20036416}
|
|
{"current_steps": 6365, "total_steps": 78105, "loss": 0.5981, "lr": 4.073742158494431e-06, "epoch": 0.4074643108635811, "percentage": 8.15, "elapsed_time": "0:16:20", "remaining_time": "3:04:16", "throughput": 20441.64, "total_tokens": 20053184}
|
|
{"current_steps": 6370, "total_steps": 78105, "loss": 0.4081, "lr": 4.076942773012419e-06, "epoch": 0.40778439280455797, "percentage": 8.16, "elapsed_time": "0:16:21", "remaining_time": "3:04:15", "throughput": 20443.93, "total_tokens": 20069888}
|
|
{"current_steps": 6375, "total_steps": 78105, "loss": 0.5855, "lr": 4.080143387530406e-06, "epoch": 0.40810447474553485, "percentage": 8.16, "elapsed_time": "0:16:22", "remaining_time": "3:04:13", "throughput": 20445.83, "total_tokens": 20085760}
|
|
{"current_steps": 6380, "total_steps": 78105, "loss": 0.4317, "lr": 4.083344002048394e-06, "epoch": 0.40842455668651173, "percentage": 8.17, "elapsed_time": "0:16:23", "remaining_time": "3:04:11", "throughput": 20447.67, "total_tokens": 20101056}
|
|
{"current_steps": 6385, "total_steps": 78105, "loss": 0.5518, "lr": 4.086544616566381e-06, "epoch": 0.4087446386274886, "percentage": 8.17, "elapsed_time": "0:16:23", "remaining_time": "3:04:09", "throughput": 20449.22, "total_tokens": 20116352}
|
|
{"current_steps": 6390, "total_steps": 78105, "loss": 0.4846, "lr": 4.089745231084368e-06, "epoch": 0.40906472056846555, "percentage": 8.18, "elapsed_time": "0:16:24", "remaining_time": "3:04:07", "throughput": 20450.93, "total_tokens": 20131712}
|
|
{"current_steps": 6395, "total_steps": 78105, "loss": 0.4319, "lr": 4.092945845602356e-06, "epoch": 0.4093848025094424, "percentage": 8.19, "elapsed_time": "0:16:25", "remaining_time": "3:04:05", "throughput": 20452.63, "total_tokens": 20146880}
|
|
{"current_steps": 6400, "total_steps": 78105, "loss": 0.3733, "lr": 4.096146460120343e-06, "epoch": 0.4097048844504193, "percentage": 8.19, "elapsed_time": "0:16:25", "remaining_time": "3:04:04", "throughput": 20454.89, "total_tokens": 20163520}
|
|
{"current_steps": 6405, "total_steps": 78105, "loss": 0.4392, "lr": 4.099347074638331e-06, "epoch": 0.4100249663913962, "percentage": 8.2, "elapsed_time": "0:16:26", "remaining_time": "3:04:02", "throughput": 20456.26, "total_tokens": 20177984}
|
|
{"current_steps": 6410, "total_steps": 78105, "loss": 0.4381, "lr": 4.102547689156318e-06, "epoch": 0.41034504833237306, "percentage": 8.21, "elapsed_time": "0:16:27", "remaining_time": "3:04:00", "throughput": 20458.08, "total_tokens": 20193472}
|
|
{"current_steps": 6415, "total_steps": 78105, "loss": 0.5462, "lr": 4.105748303674306e-06, "epoch": 0.41066513027335, "percentage": 8.21, "elapsed_time": "0:16:27", "remaining_time": "3:03:58", "throughput": 20460.68, "total_tokens": 20210624}
|
|
{"current_steps": 6420, "total_steps": 78105, "loss": 0.5894, "lr": 4.108948918192293e-06, "epoch": 0.4109852122143269, "percentage": 8.22, "elapsed_time": "0:16:28", "remaining_time": "3:03:57", "throughput": 20462.71, "total_tokens": 20226688}
|
|
{"current_steps": 6425, "total_steps": 78105, "loss": 0.4931, "lr": 4.112149532710281e-06, "epoch": 0.41130529415530376, "percentage": 8.23, "elapsed_time": "0:16:29", "remaining_time": "3:03:55", "throughput": 20465.55, "total_tokens": 20244608}
|
|
{"current_steps": 6430, "total_steps": 78105, "loss": 0.4032, "lr": 4.115350147228268e-06, "epoch": 0.41162537609628064, "percentage": 8.23, "elapsed_time": "0:16:29", "remaining_time": "3:03:54", "throughput": 20467.67, "total_tokens": 20261184}
|
|
{"current_steps": 6435, "total_steps": 78105, "loss": 0.4591, "lr": 4.118550761746256e-06, "epoch": 0.4119454580372575, "percentage": 8.24, "elapsed_time": "0:16:30", "remaining_time": "3:03:52", "throughput": 20469.4, "total_tokens": 20276736}
|
|
{"current_steps": 6440, "total_steps": 78105, "loss": 0.5466, "lr": 4.121751376264243e-06, "epoch": 0.41226553997823445, "percentage": 8.25, "elapsed_time": "0:16:31", "remaining_time": "3:03:51", "throughput": 20471.74, "total_tokens": 20293888}
|
|
{"current_steps": 6445, "total_steps": 78105, "loss": 0.6605, "lr": 4.124951990782231e-06, "epoch": 0.41258562191921133, "percentage": 8.25, "elapsed_time": "0:16:31", "remaining_time": "3:03:49", "throughput": 20473.59, "total_tokens": 20309696}
|
|
{"current_steps": 6450, "total_steps": 78105, "loss": 0.5178, "lr": 4.128152605300218e-06, "epoch": 0.4129057038601882, "percentage": 8.26, "elapsed_time": "0:16:32", "remaining_time": "3:03:47", "throughput": 20475.49, "total_tokens": 20325632}
|
|
{"current_steps": 6455, "total_steps": 78105, "loss": 0.4484, "lr": 4.131353219818206e-06, "epoch": 0.4132257858011651, "percentage": 8.26, "elapsed_time": "0:16:33", "remaining_time": "3:03:46", "throughput": 20477.56, "total_tokens": 20341888}
|
|
{"current_steps": 6460, "total_steps": 78105, "loss": 0.5741, "lr": 4.134553834336193e-06, "epoch": 0.41354586774214197, "percentage": 8.27, "elapsed_time": "0:16:34", "remaining_time": "3:03:44", "throughput": 20479.93, "total_tokens": 20358656}
|
|
{"current_steps": 6465, "total_steps": 78105, "loss": 0.4933, "lr": 4.1377544488541806e-06, "epoch": 0.4138659496831189, "percentage": 8.28, "elapsed_time": "0:16:34", "remaining_time": "3:03:42", "throughput": 20481.21, "total_tokens": 20372864}
|
|
{"current_steps": 6470, "total_steps": 78105, "loss": 0.5224, "lr": 4.140955063372168e-06, "epoch": 0.4141860316240958, "percentage": 8.28, "elapsed_time": "0:16:35", "remaining_time": "3:03:40", "throughput": 20482.68, "total_tokens": 20387840}
|
|
{"current_steps": 6475, "total_steps": 78105, "loss": 0.4308, "lr": 4.1441556778901555e-06, "epoch": 0.41450611356507266, "percentage": 8.29, "elapsed_time": "0:16:36", "remaining_time": "3:03:39", "throughput": 20484.96, "total_tokens": 20404736}
|
|
{"current_steps": 6480, "total_steps": 78105, "loss": 0.6879, "lr": 4.1473562924081426e-06, "epoch": 0.41482619550604954, "percentage": 8.3, "elapsed_time": "0:16:36", "remaining_time": "3:03:38", "throughput": 20487.83, "total_tokens": 20422848}
|
|
{"current_steps": 6485, "total_steps": 78105, "loss": 0.5472, "lr": 4.1505569069261304e-06, "epoch": 0.4151462774470264, "percentage": 8.3, "elapsed_time": "0:16:37", "remaining_time": "3:03:36", "throughput": 20489.25, "total_tokens": 20438016}
|
|
{"current_steps": 6490, "total_steps": 78105, "loss": 0.6798, "lr": 4.1537575214441175e-06, "epoch": 0.4154663593880033, "percentage": 8.31, "elapsed_time": "0:16:38", "remaining_time": "3:03:34", "throughput": 20491.39, "total_tokens": 20454464}
|
|
{"current_steps": 6495, "total_steps": 78105, "loss": 0.4954, "lr": 4.156958135962105e-06, "epoch": 0.41578644132898024, "percentage": 8.32, "elapsed_time": "0:16:38", "remaining_time": "3:03:33", "throughput": 20493.4, "total_tokens": 20470464}
|
|
{"current_steps": 6500, "total_steps": 78105, "loss": 0.5747, "lr": 4.160158750480092e-06, "epoch": 0.4161065232699571, "percentage": 8.32, "elapsed_time": "0:16:39", "remaining_time": "3:03:31", "throughput": 20495.14, "total_tokens": 20485696}
|
|
{"current_steps": 6505, "total_steps": 78105, "loss": 0.4195, "lr": 4.16335936499808e-06, "epoch": 0.416426605210934, "percentage": 8.33, "elapsed_time": "0:16:40", "remaining_time": "3:03:29", "throughput": 20496.66, "total_tokens": 20500608}
|
|
{"current_steps": 6510, "total_steps": 78105, "loss": 0.5438, "lr": 4.166559979516067e-06, "epoch": 0.4167466871519109, "percentage": 8.33, "elapsed_time": "0:16:40", "remaining_time": "3:03:27", "throughput": 20498.28, "total_tokens": 20515776}
|
|
{"current_steps": 6515, "total_steps": 78105, "loss": 0.5452, "lr": 4.169760594034054e-06, "epoch": 0.41706676909288776, "percentage": 8.34, "elapsed_time": "0:16:41", "remaining_time": "3:03:25", "throughput": 20500.13, "total_tokens": 20531456}
|
|
{"current_steps": 6520, "total_steps": 78105, "loss": 0.5425, "lr": 4.172961208552042e-06, "epoch": 0.4173868510338647, "percentage": 8.35, "elapsed_time": "0:16:42", "remaining_time": "3:03:23", "throughput": 20501.88, "total_tokens": 20547200}
|
|
{"current_steps": 6525, "total_steps": 78105, "loss": 0.5122, "lr": 4.176161823070029e-06, "epoch": 0.41770693297484157, "percentage": 8.35, "elapsed_time": "0:16:42", "remaining_time": "3:03:21", "throughput": 20503.52, "total_tokens": 20562560}
|
|
{"current_steps": 6530, "total_steps": 78105, "loss": 0.4084, "lr": 4.179362437588017e-06, "epoch": 0.41802701491581845, "percentage": 8.36, "elapsed_time": "0:16:43", "remaining_time": "3:03:19", "throughput": 20504.97, "total_tokens": 20577344}
|
|
{"current_steps": 6535, "total_steps": 78105, "loss": 0.4662, "lr": 4.182563052106004e-06, "epoch": 0.41834709685679533, "percentage": 8.37, "elapsed_time": "0:16:44", "remaining_time": "3:03:17", "throughput": 20506.72, "total_tokens": 20593088}
|
|
{"current_steps": 6540, "total_steps": 78105, "loss": 0.4794, "lr": 4.185763666623992e-06, "epoch": 0.4186671787977722, "percentage": 8.37, "elapsed_time": "0:16:44", "remaining_time": "3:03:16", "throughput": 20508.98, "total_tokens": 20609792}
|
|
{"current_steps": 6545, "total_steps": 78105, "loss": 0.4879, "lr": 4.188964281141979e-06, "epoch": 0.41898726073874915, "percentage": 8.38, "elapsed_time": "0:16:45", "remaining_time": "3:03:14", "throughput": 20510.72, "total_tokens": 20625280}
|
|
{"current_steps": 6550, "total_steps": 78105, "loss": 0.5136, "lr": 4.192164895659967e-06, "epoch": 0.419307342679726, "percentage": 8.39, "elapsed_time": "0:16:46", "remaining_time": "3:03:13", "throughput": 20512.98, "total_tokens": 20641984}
|
|
{"current_steps": 6555, "total_steps": 78105, "loss": 0.6363, "lr": 4.195365510177954e-06, "epoch": 0.4196274246207029, "percentage": 8.39, "elapsed_time": "0:16:46", "remaining_time": "3:03:11", "throughput": 20514.98, "total_tokens": 20658240}
|
|
{"current_steps": 6560, "total_steps": 78105, "loss": 0.5114, "lr": 4.198566124695942e-06, "epoch": 0.4199475065616798, "percentage": 8.4, "elapsed_time": "0:16:47", "remaining_time": "3:03:09", "throughput": 20516.69, "total_tokens": 20673408}
|
|
{"current_steps": 6565, "total_steps": 78105, "loss": 0.4934, "lr": 4.201766739213929e-06, "epoch": 0.42026758850265666, "percentage": 8.41, "elapsed_time": "0:16:48", "remaining_time": "3:03:07", "throughput": 20518.45, "total_tokens": 20688960}
|
|
{"current_steps": 6570, "total_steps": 78105, "loss": 0.4829, "lr": 4.204967353731917e-06, "epoch": 0.42058767044363354, "percentage": 8.41, "elapsed_time": "0:16:48", "remaining_time": "3:03:06", "throughput": 20520.21, "total_tokens": 20704768}
|
|
{"current_steps": 6575, "total_steps": 78105, "loss": 0.4683, "lr": 4.208167968249904e-06, "epoch": 0.4209077523846105, "percentage": 8.42, "elapsed_time": "0:16:49", "remaining_time": "3:03:04", "throughput": 20521.78, "total_tokens": 20719808}
|
|
{"current_steps": 6580, "total_steps": 78105, "loss": 0.6623, "lr": 4.211368582767892e-06, "epoch": 0.42122783432558736, "percentage": 8.42, "elapsed_time": "0:16:50", "remaining_time": "3:03:02", "throughput": 20523.67, "total_tokens": 20735680}
|
|
{"current_steps": 6585, "total_steps": 78105, "loss": 0.4296, "lr": 4.214569197285879e-06, "epoch": 0.42154791626656424, "percentage": 8.43, "elapsed_time": "0:16:50", "remaining_time": "3:03:00", "throughput": 20525.04, "total_tokens": 20750144}
|
|
{"current_steps": 6590, "total_steps": 78105, "loss": 0.473, "lr": 4.217769811803867e-06, "epoch": 0.4218679982075411, "percentage": 8.44, "elapsed_time": "0:16:51", "remaining_time": "3:02:58", "throughput": 20526.92, "total_tokens": 20766016}
|
|
{"current_steps": 6595, "total_steps": 78105, "loss": 0.6441, "lr": 4.220970426321854e-06, "epoch": 0.422188080148518, "percentage": 8.44, "elapsed_time": "0:16:52", "remaining_time": "3:02:57", "throughput": 20529.1, "total_tokens": 20782720}
|
|
{"current_steps": 6600, "total_steps": 78105, "loss": 0.4494, "lr": 4.224171040839842e-06, "epoch": 0.42250816208949493, "percentage": 8.45, "elapsed_time": "0:16:53", "remaining_time": "3:02:55", "throughput": 20530.83, "total_tokens": 20798208}
|
|
{"current_steps": 6605, "total_steps": 78105, "loss": 0.763, "lr": 4.227371655357829e-06, "epoch": 0.4228282440304718, "percentage": 8.46, "elapsed_time": "0:16:53", "remaining_time": "3:02:53", "throughput": 20532.35, "total_tokens": 20813056}
|
|
{"current_steps": 6610, "total_steps": 78105, "loss": 0.483, "lr": 4.230572269875817e-06, "epoch": 0.4231483259714487, "percentage": 8.46, "elapsed_time": "0:16:54", "remaining_time": "3:02:51", "throughput": 20534.21, "total_tokens": 20828800}
|
|
{"current_steps": 6615, "total_steps": 78105, "loss": 0.5142, "lr": 4.233772884393805e-06, "epoch": 0.42346840791242557, "percentage": 8.47, "elapsed_time": "0:16:55", "remaining_time": "3:02:49", "throughput": 20536.03, "total_tokens": 20844736}
|
|
{"current_steps": 6620, "total_steps": 78105, "loss": 0.4001, "lr": 4.236973498911792e-06, "epoch": 0.42378848985340245, "percentage": 8.48, "elapsed_time": "0:16:55", "remaining_time": "3:02:48", "throughput": 20537.94, "total_tokens": 20860672}
|
|
{"current_steps": 6625, "total_steps": 78105, "loss": 0.4318, "lr": 4.2401741134297795e-06, "epoch": 0.4241085717943794, "percentage": 8.48, "elapsed_time": "0:16:56", "remaining_time": "3:02:46", "throughput": 20539.93, "total_tokens": 20877184}
|
|
{"current_steps": 6630, "total_steps": 78105, "loss": 0.4935, "lr": 4.243374727947766e-06, "epoch": 0.42442865373535626, "percentage": 8.49, "elapsed_time": "0:16:57", "remaining_time": "3:02:45", "throughput": 20542.16, "total_tokens": 20894208}
|
|
{"current_steps": 6635, "total_steps": 78105, "loss": 0.5097, "lr": 4.246575342465754e-06, "epoch": 0.42474873567633314, "percentage": 8.49, "elapsed_time": "0:16:57", "remaining_time": "3:02:43", "throughput": 20543.85, "total_tokens": 20909696}
|
|
{"current_steps": 6640, "total_steps": 78105, "loss": 0.4985, "lr": 4.249775956983741e-06, "epoch": 0.42506881761731, "percentage": 8.5, "elapsed_time": "0:16:58", "remaining_time": "3:02:41", "throughput": 20545.88, "total_tokens": 20926016}
|
|
{"current_steps": 6645, "total_steps": 78105, "loss": 0.4899, "lr": 4.2529765715017285e-06, "epoch": 0.4253888995582869, "percentage": 8.51, "elapsed_time": "0:16:59", "remaining_time": "3:02:40", "throughput": 20547.49, "total_tokens": 20941440}
|
|
{"current_steps": 6650, "total_steps": 78105, "loss": 0.5684, "lr": 4.256177186019716e-06, "epoch": 0.42570898149926384, "percentage": 8.51, "elapsed_time": "0:16:59", "remaining_time": "3:02:38", "throughput": 20548.99, "total_tokens": 20956352}
|
|
{"current_steps": 6655, "total_steps": 78105, "loss": 0.541, "lr": 4.2593778005377035e-06, "epoch": 0.4260290634402407, "percentage": 8.52, "elapsed_time": "0:17:00", "remaining_time": "3:02:36", "throughput": 20551.18, "total_tokens": 20973056}
|
|
{"current_steps": 6660, "total_steps": 78105, "loss": 0.5376, "lr": 4.2625784150556905e-06, "epoch": 0.4263491453812176, "percentage": 8.53, "elapsed_time": "0:17:01", "remaining_time": "3:02:35", "throughput": 20553.02, "total_tokens": 20989248}
|
|
{"current_steps": 6665, "total_steps": 78105, "loss": 0.5234, "lr": 4.265779029573678e-06, "epoch": 0.4266692273221945, "percentage": 8.53, "elapsed_time": "0:17:01", "remaining_time": "3:02:33", "throughput": 20554.59, "total_tokens": 21004096}
|
|
{"current_steps": 6670, "total_steps": 78105, "loss": 0.5485, "lr": 4.2689796440916655e-06, "epoch": 0.42698930926317136, "percentage": 8.54, "elapsed_time": "0:17:02", "remaining_time": "3:02:31", "throughput": 20556.6, "total_tokens": 21020224}
|
|
{"current_steps": 6675, "total_steps": 78105, "loss": 0.4845, "lr": 4.272180258609653e-06, "epoch": 0.42730939120414824, "percentage": 8.55, "elapsed_time": "0:17:03", "remaining_time": "3:02:29", "throughput": 20558.21, "total_tokens": 21035264}
|
|
{"current_steps": 6680, "total_steps": 78105, "loss": 0.4489, "lr": 4.275380873127641e-06, "epoch": 0.42762947314512517, "percentage": 8.55, "elapsed_time": "0:17:03", "remaining_time": "3:02:28", "throughput": 20560.22, "total_tokens": 21051904}
|
|
{"current_steps": 6685, "total_steps": 78105, "loss": 0.4542, "lr": 4.278581487645628e-06, "epoch": 0.42794955508610205, "percentage": 8.56, "elapsed_time": "0:17:04", "remaining_time": "3:02:26", "throughput": 20562.63, "total_tokens": 21069504}
|
|
{"current_steps": 6690, "total_steps": 78105, "loss": 0.569, "lr": 4.281782102163616e-06, "epoch": 0.42826963702707893, "percentage": 8.57, "elapsed_time": "0:17:05", "remaining_time": "3:02:24", "throughput": 20564.13, "total_tokens": 21084288}
|
|
{"current_steps": 6695, "total_steps": 78105, "loss": 0.4573, "lr": 4.284982716681603e-06, "epoch": 0.4285897189680558, "percentage": 8.57, "elapsed_time": "0:17:05", "remaining_time": "3:02:23", "throughput": 20566.13, "total_tokens": 21100544}
|
|
{"current_steps": 6700, "total_steps": 78105, "loss": 0.52, "lr": 4.288183331199591e-06, "epoch": 0.4289098009090327, "percentage": 8.58, "elapsed_time": "0:17:06", "remaining_time": "3:02:21", "throughput": 20567.93, "total_tokens": 21116608}
|
|
{"current_steps": 6705, "total_steps": 78105, "loss": 0.734, "lr": 4.291383945717578e-06, "epoch": 0.4292298828500096, "percentage": 8.58, "elapsed_time": "0:17:07", "remaining_time": "3:02:19", "throughput": 20569.53, "total_tokens": 21131776}
|
|
{"current_steps": 6710, "total_steps": 78105, "loss": 0.4443, "lr": 4.294584560235566e-06, "epoch": 0.4295499647909865, "percentage": 8.59, "elapsed_time": "0:17:07", "remaining_time": "3:02:17", "throughput": 20570.75, "total_tokens": 21146368}
|
|
{"current_steps": 6715, "total_steps": 78105, "loss": 0.6303, "lr": 4.297785174753553e-06, "epoch": 0.4298700467319634, "percentage": 8.6, "elapsed_time": "0:17:08", "remaining_time": "3:02:16", "throughput": 20572.58, "total_tokens": 21162368}
|
|
{"current_steps": 6720, "total_steps": 78105, "loss": 0.5002, "lr": 4.300985789271541e-06, "epoch": 0.43019012867294026, "percentage": 8.6, "elapsed_time": "0:17:09", "remaining_time": "3:02:14", "throughput": 20574.75, "total_tokens": 21179008}
|
|
{"current_steps": 6725, "total_steps": 78105, "loss": 0.777, "lr": 4.304186403789528e-06, "epoch": 0.43051021061391714, "percentage": 8.61, "elapsed_time": "0:17:10", "remaining_time": "3:02:13", "throughput": 20576.9, "total_tokens": 21196160}
|
|
{"current_steps": 6730, "total_steps": 78105, "loss": 0.5936, "lr": 4.307387018307516e-06, "epoch": 0.4308302925548941, "percentage": 8.62, "elapsed_time": "0:17:10", "remaining_time": "3:02:11", "throughput": 20578.73, "total_tokens": 21212032}
|
|
{"current_steps": 6735, "total_steps": 78105, "loss": 0.4063, "lr": 4.310587632825503e-06, "epoch": 0.43115037449587096, "percentage": 8.62, "elapsed_time": "0:17:11", "remaining_time": "3:02:10", "throughput": 20581.5, "total_tokens": 21230272}
|
|
{"current_steps": 6740, "total_steps": 78105, "loss": 0.469, "lr": 4.313788247343491e-06, "epoch": 0.43147045643684784, "percentage": 8.63, "elapsed_time": "0:17:12", "remaining_time": "3:02:09", "throughput": 20583.13, "total_tokens": 21245760}
|
|
{"current_steps": 6745, "total_steps": 78105, "loss": 0.415, "lr": 4.316988861861478e-06, "epoch": 0.4317905383778247, "percentage": 8.64, "elapsed_time": "0:17:12", "remaining_time": "3:02:07", "throughput": 20584.64, "total_tokens": 21260992}
|
|
{"current_steps": 6750, "total_steps": 78105, "loss": 0.3589, "lr": 4.320189476379465e-06, "epoch": 0.4321106203188016, "percentage": 8.64, "elapsed_time": "0:17:13", "remaining_time": "3:02:05", "throughput": 20585.75, "total_tokens": 21275136}
|
|
{"current_steps": 6755, "total_steps": 78105, "loss": 0.6596, "lr": 4.323390090897453e-06, "epoch": 0.4324307022597785, "percentage": 8.65, "elapsed_time": "0:17:14", "remaining_time": "3:02:03", "throughput": 20587.39, "total_tokens": 21290560}
|
|
{"current_steps": 6760, "total_steps": 78105, "loss": 0.5297, "lr": 4.32659070541544e-06, "epoch": 0.4327507842007554, "percentage": 8.66, "elapsed_time": "0:17:14", "remaining_time": "3:02:01", "throughput": 20588.9, "total_tokens": 21305536}
|
|
{"current_steps": 6765, "total_steps": 78105, "loss": 0.4501, "lr": 4.329791319933428e-06, "epoch": 0.4330708661417323, "percentage": 8.66, "elapsed_time": "0:17:15", "remaining_time": "3:01:59", "throughput": 20590.43, "total_tokens": 21320576}
|
|
{"current_steps": 6770, "total_steps": 78105, "loss": 0.54, "lr": 4.332991934451415e-06, "epoch": 0.43339094808270917, "percentage": 8.67, "elapsed_time": "0:17:16", "remaining_time": "3:01:57", "throughput": 20591.87, "total_tokens": 21335680}
|
|
{"current_steps": 6775, "total_steps": 78105, "loss": 0.4078, "lr": 4.336192548969403e-06, "epoch": 0.43371103002368605, "percentage": 8.67, "elapsed_time": "0:17:16", "remaining_time": "3:01:56", "throughput": 20593.59, "total_tokens": 21351808}
|
|
{"current_steps": 6780, "total_steps": 78105, "loss": 0.6034, "lr": 4.33939316348739e-06, "epoch": 0.43403111196466293, "percentage": 8.68, "elapsed_time": "0:17:17", "remaining_time": "3:01:53", "throughput": 20595.03, "total_tokens": 21366464}
|
|
{"current_steps": 6785, "total_steps": 78105, "loss": 0.3572, "lr": 4.342593778005378e-06, "epoch": 0.43435119390563987, "percentage": 8.69, "elapsed_time": "0:17:18", "remaining_time": "3:01:52", "throughput": 20597.02, "total_tokens": 21383296}
|
|
{"current_steps": 6790, "total_steps": 78105, "loss": 0.5126, "lr": 4.345794392523365e-06, "epoch": 0.43467127584661674, "percentage": 8.69, "elapsed_time": "0:17:18", "remaining_time": "3:01:51", "throughput": 20598.96, "total_tokens": 21399616}
|
|
{"current_steps": 6795, "total_steps": 78105, "loss": 0.5213, "lr": 4.3489950070413526e-06, "epoch": 0.4349913577875936, "percentage": 8.7, "elapsed_time": "0:17:19", "remaining_time": "3:01:49", "throughput": 20600.45, "total_tokens": 21414848}
|
|
{"current_steps": 6800, "total_steps": 78105, "loss": 0.3968, "lr": 4.35219562155934e-06, "epoch": 0.4353114397285705, "percentage": 8.71, "elapsed_time": "0:17:20", "remaining_time": "3:01:47", "throughput": 20601.99, "total_tokens": 21430144}
|
|
{"current_steps": 6805, "total_steps": 78105, "loss": 0.4789, "lr": 4.3553962360773275e-06, "epoch": 0.4356315216695474, "percentage": 8.71, "elapsed_time": "0:17:20", "remaining_time": "3:01:45", "throughput": 20603.4, "total_tokens": 21445248}
|
|
{"current_steps": 6810, "total_steps": 78105, "loss": 0.7234, "lr": 4.3585968505953145e-06, "epoch": 0.4359516036105243, "percentage": 8.72, "elapsed_time": "0:17:21", "remaining_time": "3:01:43", "throughput": 20604.94, "total_tokens": 21460544}
|
|
{"current_steps": 6815, "total_steps": 78105, "loss": 0.5245, "lr": 4.3617974651133024e-06, "epoch": 0.4362716855515012, "percentage": 8.73, "elapsed_time": "0:17:22", "remaining_time": "3:01:42", "throughput": 20606.69, "total_tokens": 21476224}
|
|
{"current_steps": 6820, "total_steps": 78105, "loss": 0.6448, "lr": 4.3649980796312895e-06, "epoch": 0.4365917674924781, "percentage": 8.73, "elapsed_time": "0:17:22", "remaining_time": "3:01:40", "throughput": 20608.43, "total_tokens": 21491712}
|
|
{"current_steps": 6825, "total_steps": 78105, "loss": 0.6611, "lr": 4.368198694149277e-06, "epoch": 0.43691184943345496, "percentage": 8.74, "elapsed_time": "0:17:23", "remaining_time": "3:01:39", "throughput": 20610.63, "total_tokens": 21508928}
|
|
{"current_steps": 6830, "total_steps": 78105, "loss": 0.5358, "lr": 4.371399308667264e-06, "epoch": 0.43723193137443184, "percentage": 8.74, "elapsed_time": "0:17:24", "remaining_time": "3:01:37", "throughput": 20611.94, "total_tokens": 21523648}
|
|
{"current_steps": 6835, "total_steps": 78105, "loss": 0.5569, "lr": 4.374599923185252e-06, "epoch": 0.43755201331540877, "percentage": 8.75, "elapsed_time": "0:17:24", "remaining_time": "3:01:35", "throughput": 20613.55, "total_tokens": 21539264}
|
|
{"current_steps": 6840, "total_steps": 78105, "loss": 0.4689, "lr": 4.377800537703239e-06, "epoch": 0.43787209525638565, "percentage": 8.76, "elapsed_time": "0:17:25", "remaining_time": "3:01:34", "throughput": 20615.26, "total_tokens": 21555520}
|
|
{"current_steps": 6845, "total_steps": 78105, "loss": 0.6048, "lr": 4.381001152221227e-06, "epoch": 0.43819217719736253, "percentage": 8.76, "elapsed_time": "0:17:26", "remaining_time": "3:01:31", "throughput": 20616.62, "total_tokens": 21570112}
|
|
{"current_steps": 6850, "total_steps": 78105, "loss": 0.4128, "lr": 4.384201766739214e-06, "epoch": 0.4385122591383394, "percentage": 8.77, "elapsed_time": "0:17:26", "remaining_time": "3:01:30", "throughput": 20617.89, "total_tokens": 21584768}
|
|
{"current_steps": 6855, "total_steps": 78105, "loss": 0.367, "lr": 4.387402381257202e-06, "epoch": 0.4388323410793163, "percentage": 8.78, "elapsed_time": "0:17:27", "remaining_time": "3:01:28", "throughput": 20619.39, "total_tokens": 21599872}
|
|
{"current_steps": 6860, "total_steps": 78105, "loss": 0.5798, "lr": 4.390602995775189e-06, "epoch": 0.43915242302029317, "percentage": 8.78, "elapsed_time": "0:17:28", "remaining_time": "3:01:26", "throughput": 20621.08, "total_tokens": 21615808}
|
|
{"current_steps": 6865, "total_steps": 78105, "loss": 0.6669, "lr": 4.393803610293177e-06, "epoch": 0.4394725049612701, "percentage": 8.79, "elapsed_time": "0:17:28", "remaining_time": "3:01:24", "throughput": 20622.45, "total_tokens": 21630656}
|
|
{"current_steps": 6870, "total_steps": 78105, "loss": 0.5259, "lr": 4.397004224811164e-06, "epoch": 0.439792586902247, "percentage": 8.8, "elapsed_time": "0:17:29", "remaining_time": "3:01:23", "throughput": 20624.35, "total_tokens": 21646976}
|
|
{"current_steps": 6875, "total_steps": 78105, "loss": 0.6549, "lr": 4.400204839329151e-06, "epoch": 0.44011266884322386, "percentage": 8.8, "elapsed_time": "0:17:30", "remaining_time": "3:01:22", "throughput": 20626.92, "total_tokens": 21665024}
|
|
{"current_steps": 6880, "total_steps": 78105, "loss": 0.5625, "lr": 4.403405453847139e-06, "epoch": 0.44043275078420074, "percentage": 8.81, "elapsed_time": "0:17:30", "remaining_time": "3:01:20", "throughput": 20628.39, "total_tokens": 21680128}
|
|
{"current_steps": 6885, "total_steps": 78105, "loss": 0.3949, "lr": 4.406606068365126e-06, "epoch": 0.4407528327251776, "percentage": 8.82, "elapsed_time": "0:17:31", "remaining_time": "3:01:18", "throughput": 20629.96, "total_tokens": 21695808}
|
|
{"current_steps": 6890, "total_steps": 78105, "loss": 0.4253, "lr": 4.409806682883114e-06, "epoch": 0.44107291466615456, "percentage": 8.82, "elapsed_time": "0:17:32", "remaining_time": "3:01:17", "throughput": 20631.64, "total_tokens": 21711936}
|
|
{"current_steps": 6895, "total_steps": 78105, "loss": 0.5217, "lr": 4.413007297401101e-06, "epoch": 0.44139299660713144, "percentage": 8.83, "elapsed_time": "0:17:33", "remaining_time": "3:01:15", "throughput": 20633.15, "total_tokens": 21727488}
|
|
{"current_steps": 6900, "total_steps": 78105, "loss": 0.6068, "lr": 4.416207911919089e-06, "epoch": 0.4417130785481083, "percentage": 8.83, "elapsed_time": "0:17:33", "remaining_time": "3:01:14", "throughput": 20635.61, "total_tokens": 21745280}
|
|
{"current_steps": 6905, "total_steps": 78105, "loss": 0.5917, "lr": 4.419408526437076e-06, "epoch": 0.4420331604890852, "percentage": 8.84, "elapsed_time": "0:17:34", "remaining_time": "3:01:12", "throughput": 20636.93, "total_tokens": 21760000}
|
|
{"current_steps": 6910, "total_steps": 78105, "loss": 0.4796, "lr": 4.422609140955064e-06, "epoch": 0.4423532424300621, "percentage": 8.85, "elapsed_time": "0:17:35", "remaining_time": "3:01:10", "throughput": 20638.48, "total_tokens": 21775232}
|
|
{"current_steps": 6915, "total_steps": 78105, "loss": 0.5064, "lr": 4.425809755473051e-06, "epoch": 0.442673324371039, "percentage": 8.85, "elapsed_time": "0:17:35", "remaining_time": "3:01:08", "throughput": 20640.17, "total_tokens": 21790656}
|
|
{"current_steps": 6920, "total_steps": 78105, "loss": 0.5419, "lr": 4.429010369991039e-06, "epoch": 0.4429934063120159, "percentage": 8.86, "elapsed_time": "0:17:36", "remaining_time": "3:01:07", "throughput": 20641.71, "total_tokens": 21806336}
|
|
{"current_steps": 6925, "total_steps": 78105, "loss": 0.4058, "lr": 4.432210984509026e-06, "epoch": 0.44331348825299277, "percentage": 8.87, "elapsed_time": "0:17:37", "remaining_time": "3:01:06", "throughput": 20644.06, "total_tokens": 21823744}
|
|
{"current_steps": 6930, "total_steps": 78105, "loss": 0.645, "lr": 4.435411599027014e-06, "epoch": 0.44363357019396965, "percentage": 8.87, "elapsed_time": "0:17:37", "remaining_time": "3:01:04", "throughput": 20645.52, "total_tokens": 21839104}
|
|
{"current_steps": 6935, "total_steps": 78105, "loss": 0.4493, "lr": 4.438612213545001e-06, "epoch": 0.44395365213494653, "percentage": 8.88, "elapsed_time": "0:17:38", "remaining_time": "3:01:02", "throughput": 20647.09, "total_tokens": 21854528}
|
|
{"current_steps": 6940, "total_steps": 78105, "loss": 0.7441, "lr": 4.441812828062989e-06, "epoch": 0.4442737340759234, "percentage": 8.89, "elapsed_time": "0:17:39", "remaining_time": "3:01:01", "throughput": 20648.87, "total_tokens": 21870592}
|
|
{"current_steps": 6945, "total_steps": 78105, "loss": 0.5566, "lr": 4.445013442580976e-06, "epoch": 0.44459381601690035, "percentage": 8.89, "elapsed_time": "0:17:39", "remaining_time": "3:00:59", "throughput": 20650.22, "total_tokens": 21885696}
|
|
{"current_steps": 6950, "total_steps": 78105, "loss": 0.4665, "lr": 4.448214057098964e-06, "epoch": 0.4449138979578772, "percentage": 8.9, "elapsed_time": "0:17:40", "remaining_time": "3:00:57", "throughput": 20651.8, "total_tokens": 21901248}
|
|
{"current_steps": 6955, "total_steps": 78105, "loss": 0.6195, "lr": 4.451414671616951e-06, "epoch": 0.4452339798988541, "percentage": 8.9, "elapsed_time": "0:17:41", "remaining_time": "3:00:55", "throughput": 20653.09, "total_tokens": 21916352}
|
|
{"current_steps": 6960, "total_steps": 78105, "loss": 0.4685, "lr": 4.4546152861349385e-06, "epoch": 0.445554061839831, "percentage": 8.91, "elapsed_time": "0:17:41", "remaining_time": "3:00:53", "throughput": 20654.3, "total_tokens": 21931072}
|
|
{"current_steps": 6965, "total_steps": 78105, "loss": 0.515, "lr": 4.457815900652926e-06, "epoch": 0.44587414378080786, "percentage": 8.92, "elapsed_time": "0:17:42", "remaining_time": "3:00:52", "throughput": 20655.91, "total_tokens": 21946752}
|
|
{"current_steps": 6970, "total_steps": 78105, "loss": 0.5767, "lr": 4.4610165151709135e-06, "epoch": 0.4461942257217848, "percentage": 8.92, "elapsed_time": "0:17:43", "remaining_time": "3:00:50", "throughput": 20657.43, "total_tokens": 21962176}
|
|
{"current_steps": 6975, "total_steps": 78105, "loss": 0.4379, "lr": 4.4642171296889005e-06, "epoch": 0.4465143076627617, "percentage": 8.93, "elapsed_time": "0:17:43", "remaining_time": "3:00:49", "throughput": 20659.19, "total_tokens": 21978496}
|
|
{"current_steps": 6980, "total_steps": 78105, "loss": 0.6503, "lr": 4.467417744206888e-06, "epoch": 0.44683438960373856, "percentage": 8.94, "elapsed_time": "0:17:44", "remaining_time": "3:00:47", "throughput": 20660.14, "total_tokens": 21992576}
|
|
{"current_steps": 6985, "total_steps": 78105, "loss": 0.5465, "lr": 4.4706183587248755e-06, "epoch": 0.44715447154471544, "percentage": 8.94, "elapsed_time": "0:17:45", "remaining_time": "3:00:45", "throughput": 20661.58, "total_tokens": 22008000}
|
|
{"current_steps": 6990, "total_steps": 78105, "loss": 0.7151, "lr": 4.4738189732428625e-06, "epoch": 0.4474745534856923, "percentage": 8.95, "elapsed_time": "0:17:45", "remaining_time": "3:00:43", "throughput": 20662.95, "total_tokens": 22022976}
|
|
{"current_steps": 6995, "total_steps": 78105, "loss": 0.5282, "lr": 4.47701958776085e-06, "epoch": 0.44779463542666925, "percentage": 8.96, "elapsed_time": "0:17:46", "remaining_time": "3:00:41", "throughput": 20664.46, "total_tokens": 22038144}
|
|
{"current_steps": 7000, "total_steps": 78105, "loss": 0.5655, "lr": 4.4802202022788374e-06, "epoch": 0.44811471736764613, "percentage": 8.96, "elapsed_time": "0:17:47", "remaining_time": "3:00:40", "throughput": 20666.19, "total_tokens": 22054016}
|
|
{"current_steps": 7005, "total_steps": 78105, "loss": 0.4632, "lr": 4.483420816796825e-06, "epoch": 0.448434799308623, "percentage": 8.97, "elapsed_time": "0:17:47", "remaining_time": "3:00:39", "throughput": 20669.04, "total_tokens": 22073920}
|
|
{"current_steps": 7010, "total_steps": 78105, "loss": 0.4887, "lr": 4.486621431314812e-06, "epoch": 0.4487548812495999, "percentage": 8.98, "elapsed_time": "0:17:48", "remaining_time": "3:00:39", "throughput": 20667.87, "total_tokens": 22089728}
|
|
{"current_steps": 7015, "total_steps": 78105, "loss": 0.6692, "lr": 4.4898220458328e-06, "epoch": 0.44907496319057677, "percentage": 8.98, "elapsed_time": "0:17:49", "remaining_time": "3:00:38", "throughput": 20669.19, "total_tokens": 22105344}
|
|
{"current_steps": 7020, "total_steps": 78105, "loss": 0.5347, "lr": 4.493022660350787e-06, "epoch": 0.4493950451315537, "percentage": 8.99, "elapsed_time": "0:17:50", "remaining_time": "3:00:36", "throughput": 20670.97, "total_tokens": 22121792}
|
|
{"current_steps": 7025, "total_steps": 78105, "loss": 0.4648, "lr": 4.496223274868775e-06, "epoch": 0.4497151270725306, "percentage": 8.99, "elapsed_time": "0:17:50", "remaining_time": "3:00:34", "throughput": 20672.01, "total_tokens": 22136128}
|
|
{"current_steps": 7030, "total_steps": 78105, "loss": 0.4841, "lr": 4.499423889386762e-06, "epoch": 0.45003520901350746, "percentage": 9.0, "elapsed_time": "0:17:51", "remaining_time": "3:00:33", "throughput": 20673.67, "total_tokens": 22151936}
|
|
{"current_steps": 7035, "total_steps": 78105, "loss": 0.3882, "lr": 4.50262450390475e-06, "epoch": 0.45035529095448434, "percentage": 9.01, "elapsed_time": "0:17:52", "remaining_time": "3:00:31", "throughput": 20675.37, "total_tokens": 22168128}
|
|
{"current_steps": 7040, "total_steps": 78105, "loss": 0.4943, "lr": 4.505825118422737e-06, "epoch": 0.4506753728954612, "percentage": 9.01, "elapsed_time": "0:17:52", "remaining_time": "3:00:30", "throughput": 20677.12, "total_tokens": 22184512}
|
|
{"current_steps": 7045, "total_steps": 78105, "loss": 0.6257, "lr": 4.509025732940725e-06, "epoch": 0.4509954548364381, "percentage": 9.02, "elapsed_time": "0:17:53", "remaining_time": "3:00:28", "throughput": 20678.59, "total_tokens": 22199872}
|
|
{"current_steps": 7050, "total_steps": 78105, "loss": 0.5604, "lr": 4.512226347458712e-06, "epoch": 0.45131553677741504, "percentage": 9.03, "elapsed_time": "0:17:54", "remaining_time": "3:00:27", "throughput": 20680.54, "total_tokens": 22216960}
|
|
{"current_steps": 7055, "total_steps": 78105, "loss": 0.3448, "lr": 4.5154269619767e-06, "epoch": 0.4516356187183919, "percentage": 9.03, "elapsed_time": "0:17:54", "remaining_time": "3:00:25", "throughput": 20681.71, "total_tokens": 22231488}
|
|
{"current_steps": 7060, "total_steps": 78105, "loss": 0.5967, "lr": 4.518627576494687e-06, "epoch": 0.4519557006593688, "percentage": 9.04, "elapsed_time": "0:17:55", "remaining_time": "3:00:23", "throughput": 20683.13, "total_tokens": 22246976}
|
|
{"current_steps": 7065, "total_steps": 78105, "loss": 0.4857, "lr": 4.521828191012675e-06, "epoch": 0.4522757826003457, "percentage": 9.05, "elapsed_time": "0:17:56", "remaining_time": "3:00:22", "throughput": 20684.81, "total_tokens": 22263680}
|
|
{"current_steps": 7070, "total_steps": 78105, "loss": 0.4252, "lr": 4.525028805530662e-06, "epoch": 0.45259586454132256, "percentage": 9.05, "elapsed_time": "0:17:57", "remaining_time": "3:00:21", "throughput": 20686.39, "total_tokens": 22279552}
|
|
{"current_steps": 7075, "total_steps": 78105, "loss": 0.5059, "lr": 4.52822942004865e-06, "epoch": 0.4529159464822995, "percentage": 9.06, "elapsed_time": "0:17:57", "remaining_time": "3:00:19", "throughput": 20688.06, "total_tokens": 22295296}
|
|
{"current_steps": 7080, "total_steps": 78105, "loss": 0.5148, "lr": 4.531430034566637e-06, "epoch": 0.45323602842327637, "percentage": 9.06, "elapsed_time": "0:17:58", "remaining_time": "3:00:17", "throughput": 20689.34, "total_tokens": 22310400}
|
|
{"current_steps": 7085, "total_steps": 78105, "loss": 0.5518, "lr": 4.534630649084625e-06, "epoch": 0.45355611036425325, "percentage": 9.07, "elapsed_time": "0:17:59", "remaining_time": "3:00:16", "throughput": 20691.28, "total_tokens": 22326848}
|
|
{"current_steps": 7090, "total_steps": 78105, "loss": 0.5397, "lr": 4.537831263602613e-06, "epoch": 0.45387619230523013, "percentage": 9.08, "elapsed_time": "0:17:59", "remaining_time": "3:00:14", "throughput": 20692.88, "total_tokens": 22342592}
|
|
{"current_steps": 7095, "total_steps": 78105, "loss": 0.5794, "lr": 4.5410318781206e-06, "epoch": 0.454196274246207, "percentage": 9.08, "elapsed_time": "0:18:00", "remaining_time": "3:00:13", "throughput": 20694.71, "total_tokens": 22358912}
|
|
{"current_steps": 7100, "total_steps": 78105, "loss": 0.4531, "lr": 4.544232492638588e-06, "epoch": 0.45451635618718395, "percentage": 9.09, "elapsed_time": "0:18:01", "remaining_time": "3:00:11", "throughput": 20696.31, "total_tokens": 22374528}
|
|
{"current_steps": 7105, "total_steps": 78105, "loss": 0.4472, "lr": 4.547433107156575e-06, "epoch": 0.4548364381281608, "percentage": 9.1, "elapsed_time": "0:18:01", "remaining_time": "3:00:09", "throughput": 20697.77, "total_tokens": 22389824}
|
|
{"current_steps": 7110, "total_steps": 78105, "loss": 0.4667, "lr": 4.550633721674562e-06, "epoch": 0.4551565200691377, "percentage": 9.1, "elapsed_time": "0:18:02", "remaining_time": "3:00:08", "throughput": 20699.23, "total_tokens": 22405376}
|
|
{"current_steps": 7115, "total_steps": 78105, "loss": 0.4732, "lr": 4.553834336192549e-06, "epoch": 0.4554766020101146, "percentage": 9.11, "elapsed_time": "0:18:03", "remaining_time": "3:00:06", "throughput": 20700.54, "total_tokens": 22420032}
|
|
{"current_steps": 7120, "total_steps": 78105, "loss": 0.5929, "lr": 4.557034950710537e-06, "epoch": 0.45579668395109146, "percentage": 9.12, "elapsed_time": "0:18:03", "remaining_time": "3:00:04", "throughput": 20701.8, "total_tokens": 22434688}
|
|
{"current_steps": 7125, "total_steps": 78105, "loss": 0.4998, "lr": 4.560235565228524e-06, "epoch": 0.45611676589206834, "percentage": 9.12, "elapsed_time": "0:18:04", "remaining_time": "3:00:02", "throughput": 20703.57, "total_tokens": 22451072}
|
|
{"current_steps": 7130, "total_steps": 78105, "loss": 0.4491, "lr": 4.563436179746512e-06, "epoch": 0.4564368478330453, "percentage": 9.13, "elapsed_time": "0:18:05", "remaining_time": "3:00:01", "throughput": 20705.02, "total_tokens": 22466688}
|
|
{"current_steps": 7135, "total_steps": 78105, "loss": 0.5812, "lr": 4.566636794264499e-06, "epoch": 0.45675692977402216, "percentage": 9.14, "elapsed_time": "0:18:05", "remaining_time": "2:59:59", "throughput": 20706.56, "total_tokens": 22482432}
|
|
{"current_steps": 7140, "total_steps": 78105, "loss": 0.4904, "lr": 4.5698374087824865e-06, "epoch": 0.45707701171499904, "percentage": 9.14, "elapsed_time": "0:18:06", "remaining_time": "2:59:57", "throughput": 20707.74, "total_tokens": 22496960}
|
|
{"current_steps": 7145, "total_steps": 78105, "loss": 0.5087, "lr": 4.5730380233004736e-06, "epoch": 0.4573970936559759, "percentage": 9.15, "elapsed_time": "0:18:07", "remaining_time": "2:59:56", "throughput": 20709.2, "total_tokens": 22512256}
|
|
{"current_steps": 7150, "total_steps": 78105, "loss": 0.5976, "lr": 4.5762386378184615e-06, "epoch": 0.4577171755969528, "percentage": 9.15, "elapsed_time": "0:18:07", "remaining_time": "2:59:54", "throughput": 20710.91, "total_tokens": 22528192}
|
|
{"current_steps": 7155, "total_steps": 78105, "loss": 0.5641, "lr": 4.579439252336449e-06, "epoch": 0.45803725753792973, "percentage": 9.16, "elapsed_time": "0:18:08", "remaining_time": "2:59:53", "throughput": 20712.94, "total_tokens": 22545408}
|
|
{"current_steps": 7160, "total_steps": 78105, "loss": 0.6556, "lr": 4.582639866854436e-06, "epoch": 0.4583573394789066, "percentage": 9.17, "elapsed_time": "0:18:09", "remaining_time": "2:59:51", "throughput": 20714.39, "total_tokens": 22560832}
|
|
{"current_steps": 7165, "total_steps": 78105, "loss": 0.494, "lr": 4.585840481372424e-06, "epoch": 0.4586774214198835, "percentage": 9.17, "elapsed_time": "0:18:09", "remaining_time": "2:59:50", "throughput": 20716.44, "total_tokens": 22577728}
|
|
{"current_steps": 7170, "total_steps": 78105, "loss": 0.5734, "lr": 4.589041095890411e-06, "epoch": 0.45899750336086037, "percentage": 9.18, "elapsed_time": "0:18:10", "remaining_time": "2:59:49", "throughput": 20718.23, "total_tokens": 22594112}
|
|
{"current_steps": 7175, "total_steps": 78105, "loss": 0.5078, "lr": 4.592241710408399e-06, "epoch": 0.45931758530183725, "percentage": 9.19, "elapsed_time": "0:18:11", "remaining_time": "2:59:47", "throughput": 20719.62, "total_tokens": 22609728}
|
|
{"current_steps": 7180, "total_steps": 78105, "loss": 0.5578, "lr": 4.595442324926386e-06, "epoch": 0.4596376672428142, "percentage": 9.19, "elapsed_time": "0:18:11", "remaining_time": "2:59:45", "throughput": 20720.95, "total_tokens": 22624704}
|
|
{"current_steps": 7185, "total_steps": 78105, "loss": 0.4481, "lr": 4.598642939444374e-06, "epoch": 0.45995774918379106, "percentage": 9.2, "elapsed_time": "0:18:12", "remaining_time": "2:59:44", "throughput": 20722.46, "total_tokens": 22640448}
|
|
{"current_steps": 7190, "total_steps": 78105, "loss": 0.3506, "lr": 4.601843553962361e-06, "epoch": 0.46027783112476794, "percentage": 9.21, "elapsed_time": "0:18:13", "remaining_time": "2:59:42", "throughput": 20723.86, "total_tokens": 22655680}
|
|
{"current_steps": 7195, "total_steps": 78105, "loss": 0.5025, "lr": 4.605044168480349e-06, "epoch": 0.4605979130657448, "percentage": 9.21, "elapsed_time": "0:18:13", "remaining_time": "2:59:40", "throughput": 20725.82, "total_tokens": 22672128}
|
|
{"current_steps": 7200, "total_steps": 78105, "loss": 0.5757, "lr": 4.608244782998336e-06, "epoch": 0.4609179950067217, "percentage": 9.22, "elapsed_time": "0:18:14", "remaining_time": "2:59:39", "throughput": 20727.08, "total_tokens": 22687104}
|
|
{"current_steps": 7205, "total_steps": 78105, "loss": 0.5052, "lr": 4.611445397516324e-06, "epoch": 0.4612380769476986, "percentage": 9.22, "elapsed_time": "0:18:15", "remaining_time": "2:59:37", "throughput": 20728.69, "total_tokens": 22702976}
|
|
{"current_steps": 7210, "total_steps": 78105, "loss": 0.4947, "lr": 4.614646012034311e-06, "epoch": 0.4615581588886755, "percentage": 9.23, "elapsed_time": "0:18:15", "remaining_time": "2:59:35", "throughput": 20730.17, "total_tokens": 22718464}
|
|
{"current_steps": 7215, "total_steps": 78105, "loss": 0.4546, "lr": 4.617846626552299e-06, "epoch": 0.4618782408296524, "percentage": 9.24, "elapsed_time": "0:18:16", "remaining_time": "2:59:34", "throughput": 20731.9, "total_tokens": 22734656}
|
|
{"current_steps": 7220, "total_steps": 78105, "loss": 0.449, "lr": 4.621047241070286e-06, "epoch": 0.4621983227706293, "percentage": 9.24, "elapsed_time": "0:18:17", "remaining_time": "2:59:32", "throughput": 20733.43, "total_tokens": 22750016}
|
|
{"current_steps": 7225, "total_steps": 78105, "loss": 0.4982, "lr": 4.624247855588273e-06, "epoch": 0.46251840471160616, "percentage": 9.25, "elapsed_time": "0:18:17", "remaining_time": "2:59:31", "throughput": 20734.94, "total_tokens": 22765632}
|
|
{"current_steps": 7230, "total_steps": 78105, "loss": 0.5168, "lr": 4.627448470106261e-06, "epoch": 0.46283848665258304, "percentage": 9.26, "elapsed_time": "0:18:18", "remaining_time": "2:59:29", "throughput": 20736.44, "total_tokens": 22781184}
|
|
{"current_steps": 7235, "total_steps": 78105, "loss": 0.6873, "lr": 4.630649084624248e-06, "epoch": 0.46315856859355997, "percentage": 9.26, "elapsed_time": "0:18:19", "remaining_time": "2:59:28", "throughput": 20738.0, "total_tokens": 22797248}
|
|
{"current_steps": 7240, "total_steps": 78105, "loss": 0.4643, "lr": 4.633849699142236e-06, "epoch": 0.46347865053453685, "percentage": 9.27, "elapsed_time": "0:18:19", "remaining_time": "2:59:26", "throughput": 20739.41, "total_tokens": 22812800}
|
|
{"current_steps": 7245, "total_steps": 78105, "loss": 0.5374, "lr": 4.637050313660223e-06, "epoch": 0.46379873247551373, "percentage": 9.28, "elapsed_time": "0:18:20", "remaining_time": "2:59:24", "throughput": 20740.51, "total_tokens": 22827200}
|
|
{"current_steps": 7250, "total_steps": 78105, "loss": 0.6252, "lr": 4.640250928178211e-06, "epoch": 0.4641188144164906, "percentage": 9.28, "elapsed_time": "0:18:21", "remaining_time": "2:59:23", "throughput": 20742.12, "total_tokens": 22843264}
|
|
{"current_steps": 7255, "total_steps": 78105, "loss": 0.5602, "lr": 4.643451542696198e-06, "epoch": 0.4644388963574675, "percentage": 9.29, "elapsed_time": "0:18:21", "remaining_time": "2:59:21", "throughput": 20743.55, "total_tokens": 22858624}
|
|
{"current_steps": 7260, "total_steps": 78105, "loss": 0.5698, "lr": 4.646652157214186e-06, "epoch": 0.4647589782984444, "percentage": 9.3, "elapsed_time": "0:18:22", "remaining_time": "2:59:19", "throughput": 20745.02, "total_tokens": 22873856}
|
|
{"current_steps": 7265, "total_steps": 78105, "loss": 0.4438, "lr": 4.649852771732173e-06, "epoch": 0.4650790602394213, "percentage": 9.3, "elapsed_time": "0:18:23", "remaining_time": "2:59:18", "throughput": 20746.49, "total_tokens": 22889600}
|
|
{"current_steps": 7270, "total_steps": 78105, "loss": 0.5235, "lr": 4.653053386250161e-06, "epoch": 0.4653991421803982, "percentage": 9.31, "elapsed_time": "0:18:23", "remaining_time": "2:59:16", "throughput": 20747.81, "total_tokens": 22904704}
|
|
{"current_steps": 7275, "total_steps": 78105, "loss": 0.4328, "lr": 4.656254000768148e-06, "epoch": 0.46571922412137506, "percentage": 9.31, "elapsed_time": "0:18:24", "remaining_time": "2:59:14", "throughput": 20749.21, "total_tokens": 22920384}
|
|
{"current_steps": 7280, "total_steps": 78105, "loss": 0.5346, "lr": 4.659454615286136e-06, "epoch": 0.46603930606235194, "percentage": 9.32, "elapsed_time": "0:18:25", "remaining_time": "2:59:13", "throughput": 20750.97, "total_tokens": 22937024}
|
|
{"current_steps": 7285, "total_steps": 78105, "loss": 0.5461, "lr": 4.662655229804123e-06, "epoch": 0.4663593880033289, "percentage": 9.33, "elapsed_time": "0:18:26", "remaining_time": "2:59:12", "throughput": 20753.19, "total_tokens": 22954816}
|
|
{"current_steps": 7290, "total_steps": 78105, "loss": 0.3186, "lr": 4.6658558443221105e-06, "epoch": 0.46667946994430576, "percentage": 9.33, "elapsed_time": "0:18:26", "remaining_time": "2:59:10", "throughput": 20754.41, "total_tokens": 22969472}
|
|
{"current_steps": 7295, "total_steps": 78105, "loss": 0.493, "lr": 4.669056458840098e-06, "epoch": 0.46699955188528264, "percentage": 9.34, "elapsed_time": "0:18:27", "remaining_time": "2:59:08", "throughput": 20755.61, "total_tokens": 22984192}
|
|
{"current_steps": 7300, "total_steps": 78105, "loss": 0.4327, "lr": 4.6722570733580855e-06, "epoch": 0.4673196338262595, "percentage": 9.35, "elapsed_time": "0:18:28", "remaining_time": "2:59:07", "throughput": 20757.09, "total_tokens": 22999680}
|
|
{"current_steps": 7305, "total_steps": 78105, "loss": 0.5308, "lr": 4.6754576878760725e-06, "epoch": 0.4676397157672364, "percentage": 9.35, "elapsed_time": "0:18:28", "remaining_time": "2:59:05", "throughput": 20758.33, "total_tokens": 23014720}
|
|
{"current_steps": 7310, "total_steps": 78105, "loss": 0.5436, "lr": 4.67865830239406e-06, "epoch": 0.4679597977082133, "percentage": 9.36, "elapsed_time": "0:18:29", "remaining_time": "2:59:03", "throughput": 20759.95, "total_tokens": 23030528}
|
|
{"current_steps": 7315, "total_steps": 78105, "loss": 0.5756, "lr": 4.6818589169120474e-06, "epoch": 0.4682798796491902, "percentage": 9.37, "elapsed_time": "0:18:30", "remaining_time": "2:59:02", "throughput": 20761.66, "total_tokens": 23046784}
|
|
{"current_steps": 7320, "total_steps": 78105, "loss": 0.4716, "lr": 4.685059531430035e-06, "epoch": 0.4685999615901671, "percentage": 9.37, "elapsed_time": "0:18:30", "remaining_time": "2:59:00", "throughput": 20762.84, "total_tokens": 23061632}
|
|
{"current_steps": 7325, "total_steps": 78105, "loss": 0.4359, "lr": 4.688260145948022e-06, "epoch": 0.46892004353114397, "percentage": 9.38, "elapsed_time": "0:18:31", "remaining_time": "2:58:59", "throughput": 20764.49, "total_tokens": 23077888}
|
|
{"current_steps": 7330, "total_steps": 78105, "loss": 0.5006, "lr": 4.69146076046601e-06, "epoch": 0.46924012547212085, "percentage": 9.38, "elapsed_time": "0:18:32", "remaining_time": "2:58:57", "throughput": 20765.85, "total_tokens": 23093568}
|
|
{"current_steps": 7335, "total_steps": 78105, "loss": 0.6237, "lr": 4.694661374983997e-06, "epoch": 0.46956020741309773, "percentage": 9.39, "elapsed_time": "0:18:32", "remaining_time": "2:58:56", "throughput": 20767.18, "total_tokens": 23108992}
|
|
{"current_steps": 7340, "total_steps": 78105, "loss": 0.468, "lr": 4.697861989501985e-06, "epoch": 0.46988028935407467, "percentage": 9.4, "elapsed_time": "0:18:33", "remaining_time": "2:58:54", "throughput": 20768.77, "total_tokens": 23124992}
|
|
{"current_steps": 7345, "total_steps": 78105, "loss": 0.4068, "lr": 4.701062604019972e-06, "epoch": 0.47020037129505154, "percentage": 9.4, "elapsed_time": "0:18:34", "remaining_time": "2:58:53", "throughput": 20770.63, "total_tokens": 23141888}
|
|
{"current_steps": 7350, "total_steps": 78105, "loss": 0.4744, "lr": 4.704263218537959e-06, "epoch": 0.4705204532360284, "percentage": 9.41, "elapsed_time": "0:18:34", "remaining_time": "2:58:52", "throughput": 20772.2, "total_tokens": 23157888}
|
|
{"current_steps": 7355, "total_steps": 78105, "loss": 0.3915, "lr": 4.707463833055947e-06, "epoch": 0.4708405351770053, "percentage": 9.42, "elapsed_time": "0:18:35", "remaining_time": "2:58:50", "throughput": 20773.45, "total_tokens": 23172864}
|
|
{"current_steps": 7360, "total_steps": 78105, "loss": 0.558, "lr": 4.710664447573934e-06, "epoch": 0.4711606171179822, "percentage": 9.42, "elapsed_time": "0:18:36", "remaining_time": "2:58:48", "throughput": 20774.99, "total_tokens": 23188352}
|
|
{"current_steps": 7365, "total_steps": 78105, "loss": 0.4398, "lr": 4.713865062091922e-06, "epoch": 0.4714806990589591, "percentage": 9.43, "elapsed_time": "0:18:36", "remaining_time": "2:58:47", "throughput": 20776.32, "total_tokens": 23204096}
|
|
{"current_steps": 7370, "total_steps": 78105, "loss": 0.4179, "lr": 4.717065676609909e-06, "epoch": 0.471800780999936, "percentage": 9.44, "elapsed_time": "0:18:37", "remaining_time": "2:58:45", "throughput": 20777.51, "total_tokens": 23219072}
|
|
{"current_steps": 7375, "total_steps": 78105, "loss": 0.5466, "lr": 4.720266291127897e-06, "epoch": 0.4721208629409129, "percentage": 9.44, "elapsed_time": "0:18:38", "remaining_time": "2:58:43", "throughput": 20779.07, "total_tokens": 23234560}
|
|
{"current_steps": 7380, "total_steps": 78105, "loss": 0.4228, "lr": 4.723466905645884e-06, "epoch": 0.47244094488188976, "percentage": 9.45, "elapsed_time": "0:18:38", "remaining_time": "2:58:42", "throughput": 20780.7, "total_tokens": 23250304}
|
|
{"current_steps": 7385, "total_steps": 78105, "loss": 0.8351, "lr": 4.726667520163872e-06, "epoch": 0.47276102682286664, "percentage": 9.46, "elapsed_time": "0:18:39", "remaining_time": "2:58:40", "throughput": 20782.02, "total_tokens": 23265664}
|
|
{"current_steps": 7390, "total_steps": 78105, "loss": 0.6303, "lr": 4.729868134681859e-06, "epoch": 0.4730811087638435, "percentage": 9.46, "elapsed_time": "0:18:40", "remaining_time": "2:58:39", "throughput": 20783.59, "total_tokens": 23281472}
|
|
{"current_steps": 7395, "total_steps": 78105, "loss": 0.5959, "lr": 4.733068749199847e-06, "epoch": 0.47340119070482045, "percentage": 9.47, "elapsed_time": "0:18:40", "remaining_time": "2:58:37", "throughput": 20784.94, "total_tokens": 23297024}
|
|
{"current_steps": 7400, "total_steps": 78105, "loss": 0.5036, "lr": 4.736269363717834e-06, "epoch": 0.47372127264579733, "percentage": 9.47, "elapsed_time": "0:18:41", "remaining_time": "2:58:36", "throughput": 20786.49, "total_tokens": 23313344}
|
|
{"current_steps": 7405, "total_steps": 78105, "loss": 0.5866, "lr": 4.739469978235822e-06, "epoch": 0.4740413545867742, "percentage": 9.48, "elapsed_time": "0:18:42", "remaining_time": "2:58:34", "throughput": 20787.71, "total_tokens": 23328512}
|
|
{"current_steps": 7410, "total_steps": 78105, "loss": 0.544, "lr": 4.742670592753809e-06, "epoch": 0.4743614365277511, "percentage": 9.49, "elapsed_time": "0:18:42", "remaining_time": "2:58:32", "throughput": 20788.48, "total_tokens": 23342400}
|
|
{"current_steps": 7415, "total_steps": 78105, "loss": 0.5637, "lr": 4.745871207271797e-06, "epoch": 0.47468151846872797, "percentage": 9.49, "elapsed_time": "0:18:43", "remaining_time": "2:58:31", "throughput": 20790.03, "total_tokens": 23358400}
|
|
{"current_steps": 7420, "total_steps": 78105, "loss": 0.5588, "lr": 4.749071821789784e-06, "epoch": 0.4750016004097049, "percentage": 9.5, "elapsed_time": "0:18:44", "remaining_time": "2:58:29", "throughput": 20791.27, "total_tokens": 23373376}
|
|
{"current_steps": 7425, "total_steps": 78105, "loss": 0.6253, "lr": 4.752272436307772e-06, "epoch": 0.4753216823506818, "percentage": 9.51, "elapsed_time": "0:18:44", "remaining_time": "2:58:28", "throughput": 20793.22, "total_tokens": 23390400}
|
|
{"current_steps": 7430, "total_steps": 78105, "loss": 0.4955, "lr": 4.755473050825759e-06, "epoch": 0.47564176429165866, "percentage": 9.51, "elapsed_time": "0:18:45", "remaining_time": "2:58:26", "throughput": 20794.7, "total_tokens": 23406400}
|
|
{"current_steps": 7435, "total_steps": 78105, "loss": 0.3974, "lr": 4.758673665343747e-06, "epoch": 0.47596184623263554, "percentage": 9.52, "elapsed_time": "0:18:46", "remaining_time": "2:58:25", "throughput": 20796.05, "total_tokens": 23421824}
|
|
{"current_steps": 7440, "total_steps": 78105, "loss": 0.6372, "lr": 4.761874279861734e-06, "epoch": 0.4762819281736124, "percentage": 9.53, "elapsed_time": "0:18:46", "remaining_time": "2:58:23", "throughput": 20797.86, "total_tokens": 23438272}
|
|
{"current_steps": 7445, "total_steps": 78105, "loss": 0.576, "lr": 4.765074894379722e-06, "epoch": 0.47660201011458936, "percentage": 9.53, "elapsed_time": "0:18:47", "remaining_time": "2:58:22", "throughput": 20799.48, "total_tokens": 23454976}
|
|
{"current_steps": 7450, "total_steps": 78105, "loss": 0.5894, "lr": 4.768275508897709e-06, "epoch": 0.47692209205556624, "percentage": 9.54, "elapsed_time": "0:18:48", "remaining_time": "2:58:21", "throughput": 20801.22, "total_tokens": 23471168}
|
|
{"current_steps": 7455, "total_steps": 78105, "loss": 0.5048, "lr": 4.7714761234156965e-06, "epoch": 0.4772421739965431, "percentage": 9.54, "elapsed_time": "0:18:49", "remaining_time": "2:58:19", "throughput": 20802.68, "total_tokens": 23486720}
|
|
{"current_steps": 7460, "total_steps": 78105, "loss": 0.3781, "lr": 4.7746767379336836e-06, "epoch": 0.47756225593752, "percentage": 9.55, "elapsed_time": "0:18:49", "remaining_time": "2:58:18", "throughput": 20804.29, "total_tokens": 23502912}
|
|
{"current_steps": 7465, "total_steps": 78105, "loss": 0.4494, "lr": 4.777877352451671e-06, "epoch": 0.4778823378784969, "percentage": 9.56, "elapsed_time": "0:18:50", "remaining_time": "2:58:16", "throughput": 20806.23, "total_tokens": 23519552}
|
|
{"current_steps": 7470, "total_steps": 78105, "loss": 0.5935, "lr": 4.7810779669696585e-06, "epoch": 0.4782024198194738, "percentage": 9.56, "elapsed_time": "0:18:51", "remaining_time": "2:58:15", "throughput": 20807.87, "total_tokens": 23535936}
|
|
{"current_steps": 7475, "total_steps": 78105, "loss": 0.5767, "lr": 4.7842785814876455e-06, "epoch": 0.4785225017604507, "percentage": 9.57, "elapsed_time": "0:18:51", "remaining_time": "2:58:13", "throughput": 20809.07, "total_tokens": 23551040}
|
|
{"current_steps": 7480, "total_steps": 78105, "loss": 0.3451, "lr": 4.7874791960056334e-06, "epoch": 0.47884258370142757, "percentage": 9.58, "elapsed_time": "0:18:52", "remaining_time": "2:58:12", "throughput": 20810.25, "total_tokens": 23566208}
|
|
{"current_steps": 7485, "total_steps": 78105, "loss": 0.5582, "lr": 4.7906798105236205e-06, "epoch": 0.47916266564240445, "percentage": 9.58, "elapsed_time": "0:18:53", "remaining_time": "2:58:10", "throughput": 20811.68, "total_tokens": 23581696}
|
|
{"current_steps": 7490, "total_steps": 78105, "loss": 0.4705, "lr": 4.793880425041608e-06, "epoch": 0.47948274758338133, "percentage": 9.59, "elapsed_time": "0:18:53", "remaining_time": "2:58:09", "throughput": 20813.07, "total_tokens": 23597248}
|
|
{"current_steps": 7495, "total_steps": 78105, "loss": 0.7067, "lr": 4.797081039559595e-06, "epoch": 0.4798028295243582, "percentage": 9.6, "elapsed_time": "0:18:54", "remaining_time": "2:58:07", "throughput": 20814.35, "total_tokens": 23612800}
|
|
{"current_steps": 7500, "total_steps": 78105, "loss": 0.5679, "lr": 4.800281654077583e-06, "epoch": 0.48012291146533514, "percentage": 9.6, "elapsed_time": "0:18:55", "remaining_time": "2:58:06", "throughput": 20815.93, "total_tokens": 23629056}
|
|
{"current_steps": 7505, "total_steps": 78105, "loss": 0.5621, "lr": 4.80348226859557e-06, "epoch": 0.480442993406312, "percentage": 9.61, "elapsed_time": "0:18:55", "remaining_time": "2:58:04", "throughput": 20817.2, "total_tokens": 23644352}
|
|
{"current_steps": 7510, "total_steps": 78105, "loss": 0.5127, "lr": 4.806682883113558e-06, "epoch": 0.4807630753472889, "percentage": 9.62, "elapsed_time": "0:18:56", "remaining_time": "2:58:03", "throughput": 20819.0, "total_tokens": 23661120}
|
|
{"current_steps": 7515, "total_steps": 78105, "loss": 0.4116, "lr": 4.809883497631545e-06, "epoch": 0.4810831572882658, "percentage": 9.62, "elapsed_time": "0:18:57", "remaining_time": "2:58:02", "throughput": 20820.6, "total_tokens": 23677632}
|
|
{"current_steps": 7520, "total_steps": 78105, "loss": 0.4893, "lr": 4.813084112149533e-06, "epoch": 0.48140323922924266, "percentage": 9.63, "elapsed_time": "0:18:57", "remaining_time": "2:58:00", "throughput": 20822.3, "total_tokens": 23693952}
|
|
{"current_steps": 7525, "total_steps": 78105, "loss": 0.4928, "lr": 4.81628472666752e-06, "epoch": 0.4817233211702196, "percentage": 9.63, "elapsed_time": "0:18:58", "remaining_time": "2:57:58", "throughput": 20823.52, "total_tokens": 23708736}
|
|
{"current_steps": 7530, "total_steps": 78105, "loss": 0.5116, "lr": 4.819485341185508e-06, "epoch": 0.4820434031111965, "percentage": 9.64, "elapsed_time": "0:18:59", "remaining_time": "2:57:57", "throughput": 20825.13, "total_tokens": 23724864}
|
|
{"current_steps": 7535, "total_steps": 78105, "loss": 0.4219, "lr": 4.822685955703495e-06, "epoch": 0.48236348505217336, "percentage": 9.65, "elapsed_time": "0:18:59", "remaining_time": "2:57:55", "throughput": 20826.4, "total_tokens": 23740160}
|
|
{"current_steps": 7540, "total_steps": 78105, "loss": 0.6203, "lr": 4.825886570221483e-06, "epoch": 0.48268356699315024, "percentage": 9.65, "elapsed_time": "0:19:00", "remaining_time": "2:57:54", "throughput": 20827.62, "total_tokens": 23755264}
|
|
{"current_steps": 7545, "total_steps": 78105, "loss": 0.4138, "lr": 4.82908718473947e-06, "epoch": 0.4830036489341271, "percentage": 9.66, "elapsed_time": "0:19:01", "remaining_time": "2:57:52", "throughput": 20828.62, "total_tokens": 23770112}
|
|
{"current_steps": 7550, "total_steps": 78105, "loss": 0.5278, "lr": 4.832287799257458e-06, "epoch": 0.48332373087510405, "percentage": 9.67, "elapsed_time": "0:19:01", "remaining_time": "2:57:50", "throughput": 20830.05, "total_tokens": 23785536}
|
|
{"current_steps": 7555, "total_steps": 78105, "loss": 0.4705, "lr": 4.835488413775445e-06, "epoch": 0.48364381281608093, "percentage": 9.67, "elapsed_time": "0:19:02", "remaining_time": "2:57:49", "throughput": 20831.55, "total_tokens": 23801280}
|
|
{"current_steps": 7560, "total_steps": 78105, "loss": 0.5496, "lr": 4.838689028293433e-06, "epoch": 0.4839638947570578, "percentage": 9.68, "elapsed_time": "0:19:03", "remaining_time": "2:57:47", "throughput": 20832.75, "total_tokens": 23816448}
|
|
{"current_steps": 7565, "total_steps": 78105, "loss": 0.6405, "lr": 4.84188964281142e-06, "epoch": 0.4842839766980347, "percentage": 9.69, "elapsed_time": "0:19:03", "remaining_time": "2:57:46", "throughput": 20834.27, "total_tokens": 23832512}
|
|
{"current_steps": 7570, "total_steps": 78105, "loss": 0.4612, "lr": 4.845090257329408e-06, "epoch": 0.48460405863901157, "percentage": 9.69, "elapsed_time": "0:19:04", "remaining_time": "2:57:44", "throughput": 20835.67, "total_tokens": 23848192}
|
|
{"current_steps": 7575, "total_steps": 78105, "loss": 0.5829, "lr": 4.848290871847396e-06, "epoch": 0.48492414057998845, "percentage": 9.7, "elapsed_time": "0:19:05", "remaining_time": "2:57:43", "throughput": 20837.07, "total_tokens": 23863808}
|
|
{"current_steps": 7580, "total_steps": 78105, "loss": 0.4312, "lr": 4.851491486365383e-06, "epoch": 0.4852442225209654, "percentage": 9.7, "elapsed_time": "0:19:05", "remaining_time": "2:57:42", "throughput": 20838.91, "total_tokens": 23880960}
|
|
{"current_steps": 7585, "total_steps": 78105, "loss": 0.4587, "lr": 4.85469210088337e-06, "epoch": 0.48556430446194226, "percentage": 9.71, "elapsed_time": "0:19:06", "remaining_time": "2:57:40", "throughput": 20840.17, "total_tokens": 23896256}
|
|
{"current_steps": 7590, "total_steps": 78105, "loss": 0.596, "lr": 4.857892715401357e-06, "epoch": 0.48588438640291914, "percentage": 9.72, "elapsed_time": "0:19:07", "remaining_time": "2:57:39", "throughput": 20841.89, "total_tokens": 23912832}
|
|
{"current_steps": 7595, "total_steps": 78105, "loss": 0.5466, "lr": 4.861093329919345e-06, "epoch": 0.486204468343896, "percentage": 9.72, "elapsed_time": "0:19:08", "remaining_time": "2:57:37", "throughput": 20843.42, "total_tokens": 23928768}
|
|
{"current_steps": 7600, "total_steps": 78105, "loss": 0.5774, "lr": 4.864293944437332e-06, "epoch": 0.4865245502848729, "percentage": 9.73, "elapsed_time": "0:19:08", "remaining_time": "2:57:36", "throughput": 20845.06, "total_tokens": 23945088}
|
|
{"current_steps": 7605, "total_steps": 78105, "loss": 0.5805, "lr": 4.86749455895532e-06, "epoch": 0.48684463222584984, "percentage": 9.74, "elapsed_time": "0:19:09", "remaining_time": "2:57:34", "throughput": 20846.22, "total_tokens": 23960192}
|
|
{"current_steps": 7610, "total_steps": 78105, "loss": 0.4313, "lr": 4.870695173473307e-06, "epoch": 0.4871647141668267, "percentage": 9.74, "elapsed_time": "0:19:10", "remaining_time": "2:57:33", "throughput": 20847.98, "total_tokens": 23977152}
|
|
{"current_steps": 7615, "total_steps": 78105, "loss": 0.457, "lr": 4.873895787991295e-06, "epoch": 0.4874847961078036, "percentage": 9.75, "elapsed_time": "0:19:10", "remaining_time": "2:57:32", "throughput": 20849.11, "total_tokens": 23992448}
|
|
{"current_steps": 7620, "total_steps": 78105, "loss": 0.4882, "lr": 4.877096402509282e-06, "epoch": 0.4878048780487805, "percentage": 9.76, "elapsed_time": "0:19:11", "remaining_time": "2:57:30", "throughput": 20850.5, "total_tokens": 24008384}
|
|
{"current_steps": 7625, "total_steps": 78105, "loss": 0.6539, "lr": 4.8802970170272696e-06, "epoch": 0.48812495998975736, "percentage": 9.76, "elapsed_time": "0:19:12", "remaining_time": "2:57:29", "throughput": 20852.36, "total_tokens": 24025728}
|
|
{"current_steps": 7630, "total_steps": 78105, "loss": 0.5459, "lr": 4.883497631545257e-06, "epoch": 0.4884450419307343, "percentage": 9.77, "elapsed_time": "0:19:12", "remaining_time": "2:57:28", "throughput": 20853.41, "total_tokens": 24040448}
|
|
{"current_steps": 7635, "total_steps": 78105, "loss": 0.5034, "lr": 4.8866982460632445e-06, "epoch": 0.48876512387171117, "percentage": 9.78, "elapsed_time": "0:19:13", "remaining_time": "2:57:26", "throughput": 20854.44, "total_tokens": 24055168}
|
|
{"current_steps": 7640, "total_steps": 78105, "loss": 0.6462, "lr": 4.889898860581232e-06, "epoch": 0.48908520581268805, "percentage": 9.78, "elapsed_time": "0:19:14", "remaining_time": "2:57:24", "throughput": 20855.51, "total_tokens": 24070016}
|
|
{"current_steps": 7645, "total_steps": 78105, "loss": 0.363, "lr": 4.8930994750992194e-06, "epoch": 0.48940528775366493, "percentage": 9.79, "elapsed_time": "0:19:14", "remaining_time": "2:57:23", "throughput": 20856.58, "total_tokens": 24084800}
|
|
{"current_steps": 7650, "total_steps": 78105, "loss": 0.5898, "lr": 4.896300089617207e-06, "epoch": 0.4897253696946418, "percentage": 9.79, "elapsed_time": "0:19:15", "remaining_time": "2:57:21", "throughput": 20857.97, "total_tokens": 24100416}
|
|
{"current_steps": 7655, "total_steps": 78105, "loss": 0.42, "lr": 4.899500704135194e-06, "epoch": 0.49004545163561875, "percentage": 9.8, "elapsed_time": "0:19:16", "remaining_time": "2:57:20", "throughput": 20859.6, "total_tokens": 24117120}
|
|
{"current_steps": 7660, "total_steps": 78105, "loss": 0.5558, "lr": 4.902701318653182e-06, "epoch": 0.4903655335765956, "percentage": 9.81, "elapsed_time": "0:19:16", "remaining_time": "2:57:18", "throughput": 20860.78, "total_tokens": 24132224}
|
|
{"current_steps": 7665, "total_steps": 78105, "loss": 0.472, "lr": 4.905901933171169e-06, "epoch": 0.4906856155175725, "percentage": 9.81, "elapsed_time": "0:19:17", "remaining_time": "2:57:17", "throughput": 20861.98, "total_tokens": 24147712}
|
|
{"current_steps": 7670, "total_steps": 78105, "loss": 0.4881, "lr": 4.909102547689157e-06, "epoch": 0.4910056974585494, "percentage": 9.82, "elapsed_time": "0:19:18", "remaining_time": "2:57:15", "throughput": 20863.66, "total_tokens": 24164288}
|
|
{"current_steps": 7675, "total_steps": 78105, "loss": 0.6297, "lr": 4.912303162207144e-06, "epoch": 0.49132577939952626, "percentage": 9.83, "elapsed_time": "0:19:19", "remaining_time": "2:57:19", "throughput": 20867.69, "total_tokens": 24193728}
|
|
{"current_steps": 7680, "total_steps": 78105, "loss": 0.5525, "lr": 4.915503776725132e-06, "epoch": 0.49164586134050314, "percentage": 9.83, "elapsed_time": "0:19:20", "remaining_time": "2:57:17", "throughput": 20868.9, "total_tokens": 24208832}
|
|
{"current_steps": 7685, "total_steps": 78105, "loss": 0.617, "lr": 4.918704391243119e-06, "epoch": 0.4919659432814801, "percentage": 9.84, "elapsed_time": "0:19:20", "remaining_time": "2:57:15", "throughput": 20869.98, "total_tokens": 24223424}
|
|
{"current_steps": 7690, "total_steps": 78105, "loss": 0.5242, "lr": 4.921905005761107e-06, "epoch": 0.49228602522245696, "percentage": 9.85, "elapsed_time": "0:19:21", "remaining_time": "2:57:14", "throughput": 20871.21, "total_tokens": 24238976}
|
|
{"current_steps": 7695, "total_steps": 78105, "loss": 0.4704, "lr": 4.925105620279094e-06, "epoch": 0.49260610716343384, "percentage": 9.85, "elapsed_time": "0:19:22", "remaining_time": "2:57:12", "throughput": 20872.27, "total_tokens": 24253632}
|
|
{"current_steps": 7700, "total_steps": 78105, "loss": 0.4639, "lr": 4.928306234797082e-06, "epoch": 0.4929261891044107, "percentage": 9.86, "elapsed_time": "0:19:22", "remaining_time": "2:57:11", "throughput": 20874.23, "total_tokens": 24270720}
|
|
{"current_steps": 7705, "total_steps": 78105, "loss": 0.533, "lr": 4.931506849315069e-06, "epoch": 0.4932462710453876, "percentage": 9.86, "elapsed_time": "0:19:23", "remaining_time": "2:57:09", "throughput": 20875.17, "total_tokens": 24285312}
|
|
{"current_steps": 7710, "total_steps": 78105, "loss": 0.5732, "lr": 4.934707463833056e-06, "epoch": 0.49356635298636453, "percentage": 9.87, "elapsed_time": "0:19:24", "remaining_time": "2:57:07", "throughput": 20876.3, "total_tokens": 24300352}
|
|
{"current_steps": 7715, "total_steps": 78105, "loss": 0.5093, "lr": 4.937908078351044e-06, "epoch": 0.4938864349273414, "percentage": 9.88, "elapsed_time": "0:19:24", "remaining_time": "2:57:06", "throughput": 20877.88, "total_tokens": 24317056}
|
|
{"current_steps": 7720, "total_steps": 78105, "loss": 0.4937, "lr": 4.941108692869031e-06, "epoch": 0.4942065168683183, "percentage": 9.88, "elapsed_time": "0:19:25", "remaining_time": "2:57:05", "throughput": 20879.33, "total_tokens": 24333184}
|
|
{"current_steps": 7725, "total_steps": 78105, "loss": 0.6299, "lr": 4.944309307387019e-06, "epoch": 0.49452659880929517, "percentage": 9.89, "elapsed_time": "0:19:26", "remaining_time": "2:57:03", "throughput": 20880.41, "total_tokens": 24348672}
|
|
{"current_steps": 7730, "total_steps": 78105, "loss": 0.5236, "lr": 4.947509921905006e-06, "epoch": 0.49484668075027205, "percentage": 9.9, "elapsed_time": "0:19:26", "remaining_time": "2:57:02", "throughput": 20881.44, "total_tokens": 24363648}
|
|
{"current_steps": 7735, "total_steps": 78105, "loss": 0.4852, "lr": 4.950710536422994e-06, "epoch": 0.495166762691249, "percentage": 9.9, "elapsed_time": "0:19:27", "remaining_time": "2:57:01", "throughput": 20883.37, "total_tokens": 24380992}
|
|
{"current_steps": 7740, "total_steps": 78105, "loss": 0.5179, "lr": 4.953911150940981e-06, "epoch": 0.49548684463222586, "percentage": 9.91, "elapsed_time": "0:19:28", "remaining_time": "2:56:59", "throughput": 20884.4, "total_tokens": 24395776}
|
|
{"current_steps": 7745, "total_steps": 78105, "loss": 0.4997, "lr": 4.957111765458969e-06, "epoch": 0.49580692657320274, "percentage": 9.92, "elapsed_time": "0:19:28", "remaining_time": "2:56:58", "throughput": 20885.83, "total_tokens": 24411584}
|
|
{"current_steps": 7750, "total_steps": 78105, "loss": 0.7475, "lr": 4.960312379976956e-06, "epoch": 0.4961270085141796, "percentage": 9.92, "elapsed_time": "0:19:29", "remaining_time": "2:56:56", "throughput": 20886.95, "total_tokens": 24426560}
|
|
{"current_steps": 7755, "total_steps": 78105, "loss": 0.6749, "lr": 4.963512994494944e-06, "epoch": 0.4964470904551565, "percentage": 9.93, "elapsed_time": "0:19:30", "remaining_time": "2:56:54", "throughput": 20888.2, "total_tokens": 24441856}
|
|
{"current_steps": 7760, "total_steps": 78105, "loss": 0.4507, "lr": 4.966713609012931e-06, "epoch": 0.4967671723961334, "percentage": 9.94, "elapsed_time": "0:19:30", "remaining_time": "2:56:53", "throughput": 20889.55, "total_tokens": 24457472}
|
|
{"current_steps": 7765, "total_steps": 78105, "loss": 0.6491, "lr": 4.969914223530919e-06, "epoch": 0.4970872543371103, "percentage": 9.94, "elapsed_time": "0:19:31", "remaining_time": "2:56:51", "throughput": 20890.56, "total_tokens": 24471936}
|
|
{"current_steps": 7770, "total_steps": 78105, "loss": 0.4794, "lr": 4.973114838048906e-06, "epoch": 0.4974073362780872, "percentage": 9.95, "elapsed_time": "0:19:32", "remaining_time": "2:56:50", "throughput": 20891.95, "total_tokens": 24487616}
|
|
{"current_steps": 7775, "total_steps": 78105, "loss": 0.6671, "lr": 4.9763154525668936e-06, "epoch": 0.4977274182190641, "percentage": 9.95, "elapsed_time": "0:19:32", "remaining_time": "2:56:48", "throughput": 20893.35, "total_tokens": 24503936}
|
|
{"current_steps": 7780, "total_steps": 78105, "loss": 0.6155, "lr": 4.979516067084881e-06, "epoch": 0.49804750016004096, "percentage": 9.96, "elapsed_time": "0:19:33", "remaining_time": "2:56:47", "throughput": 20894.35, "total_tokens": 24518464}
|
|
{"current_steps": 7785, "total_steps": 78105, "loss": 0.3628, "lr": 4.9827166816028685e-06, "epoch": 0.49836758210101784, "percentage": 9.97, "elapsed_time": "0:19:34", "remaining_time": "2:56:45", "throughput": 20895.34, "total_tokens": 24533184}
|
|
{"current_steps": 7790, "total_steps": 78105, "loss": 0.5781, "lr": 4.9859172961208556e-06, "epoch": 0.49868766404199477, "percentage": 9.97, "elapsed_time": "0:19:34", "remaining_time": "2:56:43", "throughput": 20896.74, "total_tokens": 24548992}
|
|
{"current_steps": 7795, "total_steps": 78105, "loss": 0.5001, "lr": 4.9891179106388434e-06, "epoch": 0.49900774598297165, "percentage": 9.98, "elapsed_time": "0:19:35", "remaining_time": "2:56:43", "throughput": 20898.55, "total_tokens": 24566592}
|
|
{"current_steps": 7800, "total_steps": 78105, "loss": 0.6452, "lr": 4.9923185251568305e-06, "epoch": 0.49932782792394853, "percentage": 9.99, "elapsed_time": "0:19:36", "remaining_time": "2:56:41", "throughput": 20900.33, "total_tokens": 24583552}
|
|
{"current_steps": 7805, "total_steps": 78105, "loss": 0.5224, "lr": 4.995519139674818e-06, "epoch": 0.4996479098649254, "percentage": 9.99, "elapsed_time": "0:19:36", "remaining_time": "2:56:40", "throughput": 20901.7, "total_tokens": 24599552}
|
|
{"current_steps": 7810, "total_steps": 78105, "loss": 0.4689, "lr": 4.998719754192805e-06, "epoch": 0.4999679918059023, "percentage": 10.0, "elapsed_time": "0:19:37", "remaining_time": "2:56:39", "throughput": 20902.86, "total_tokens": 24614912}
|
|
{"current_steps": 7812, "total_steps": 78105, "eval_loss": 0.5265359282493591, "epoch": 0.5000960245822931, "percentage": 10.0, "elapsed_time": "0:20:28", "remaining_time": "3:04:17", "throughput": 20034.66, "total_tokens": 24620672}
|
|
{"current_steps": 7815, "total_steps": 78105, "loss": 0.4975, "lr": 4.999999977529344e-06, "epoch": 0.5002880737468792, "percentage": 10.01, "elapsed_time": "0:21:19", "remaining_time": "3:11:50", "throughput": 19245.69, "total_tokens": 24629696}
|
|
{"current_steps": 7820, "total_steps": 78105, "loss": 0.5334, "lr": 4.99999984020867e-06, "epoch": 0.5006081556878561, "percentage": 10.01, "elapsed_time": "0:21:20", "remaining_time": "3:11:48", "throughput": 19247.95, "total_tokens": 24646208}
|
|
{"current_steps": 7825, "total_steps": 78105, "loss": 0.6436, "lr": 4.999999578051025e-06, "epoch": 0.500928237628833, "percentage": 10.02, "elapsed_time": "0:21:21", "remaining_time": "3:11:46", "throughput": 19249.79, "total_tokens": 24661120}
|
|
{"current_steps": 7830, "total_steps": 78105, "loss": 0.5722, "lr": 4.999999191056423e-06, "epoch": 0.5012483195698099, "percentage": 10.02, "elapsed_time": "0:21:21", "remaining_time": "3:11:44", "throughput": 19252.47, "total_tokens": 24678336}
|
|
{"current_steps": 7835, "total_steps": 78105, "loss": 0.6323, "lr": 4.999998679224884e-06, "epoch": 0.5015684015107867, "percentage": 10.03, "elapsed_time": "0:21:22", "remaining_time": "3:11:42", "throughput": 19254.6, "total_tokens": 24694336}
|
|
{"current_steps": 7840, "total_steps": 78105, "loss": 0.4604, "lr": 4.999998042556433e-06, "epoch": 0.5018884834517636, "percentage": 10.04, "elapsed_time": "0:21:23", "remaining_time": "3:11:40", "throughput": 19256.24, "total_tokens": 24708608}
|
|
{"current_steps": 7845, "total_steps": 78105, "loss": 0.5815, "lr": 4.999997281051102e-06, "epoch": 0.5022085653927405, "percentage": 10.04, "elapsed_time": "0:21:23", "remaining_time": "3:11:38", "throughput": 19258.35, "total_tokens": 24724672}
|
|
{"current_steps": 7850, "total_steps": 78105, "loss": 0.5677, "lr": 4.999996394708928e-06, "epoch": 0.5025286473337174, "percentage": 10.05, "elapsed_time": "0:21:24", "remaining_time": "3:11:36", "throughput": 19260.37, "total_tokens": 24740608}
|
|
{"current_steps": 7855, "total_steps": 78105, "loss": 0.5456, "lr": 4.999995383529957e-06, "epoch": 0.5028487292746944, "percentage": 10.06, "elapsed_time": "0:21:25", "remaining_time": "3:11:34", "throughput": 19263.13, "total_tokens": 24758528}
|
|
{"current_steps": 7860, "total_steps": 78105, "loss": 0.4849, "lr": 4.9999942475142375e-06, "epoch": 0.5031688112156713, "percentage": 10.06, "elapsed_time": "0:21:25", "remaining_time": "3:11:32", "throughput": 19264.82, "total_tokens": 24772992}
|
|
{"current_steps": 7865, "total_steps": 78105, "loss": 0.5574, "lr": 4.999992986661828e-06, "epoch": 0.5034888931566481, "percentage": 10.07, "elapsed_time": "0:21:26", "remaining_time": "3:11:30", "throughput": 19266.67, "total_tokens": 24788160}
|
|
{"current_steps": 7870, "total_steps": 78105, "loss": 0.5406, "lr": 4.99999160097279e-06, "epoch": 0.503808975097625, "percentage": 10.08, "elapsed_time": "0:21:27", "remaining_time": "3:11:28", "throughput": 19268.85, "total_tokens": 24804224}
|
|
{"current_steps": 7875, "total_steps": 78105, "loss": 0.6051, "lr": 4.999990090447194e-06, "epoch": 0.5041290570386019, "percentage": 10.08, "elapsed_time": "0:21:27", "remaining_time": "3:11:26", "throughput": 19271.21, "total_tokens": 24820928}
|
|
{"current_steps": 7880, "total_steps": 78105, "loss": 0.5021, "lr": 4.9999884550851155e-06, "epoch": 0.5044491389795788, "percentage": 10.09, "elapsed_time": "0:21:28", "remaining_time": "3:11:24", "throughput": 19272.97, "total_tokens": 24835776}
|
|
{"current_steps": 7885, "total_steps": 78105, "loss": 0.4682, "lr": 4.999986694886634e-06, "epoch": 0.5047692209205557, "percentage": 10.1, "elapsed_time": "0:21:29", "remaining_time": "3:11:22", "throughput": 19275.05, "total_tokens": 24851648}
|
|
{"current_steps": 7890, "total_steps": 78105, "loss": 0.5255, "lr": 4.99998480985184e-06, "epoch": 0.5050893028615325, "percentage": 10.1, "elapsed_time": "0:21:29", "remaining_time": "3:11:19", "throughput": 19277.01, "total_tokens": 24866880}
|
|
{"current_steps": 7895, "total_steps": 78105, "loss": 0.4494, "lr": 4.9999827999808256e-06, "epoch": 0.5054093848025094, "percentage": 10.11, "elapsed_time": "0:21:30", "remaining_time": "3:11:17", "throughput": 19278.84, "total_tokens": 24881856}
|
|
{"current_steps": 7900, "total_steps": 78105, "loss": 0.5709, "lr": 4.999980665273693e-06, "epoch": 0.5057294667434863, "percentage": 10.11, "elapsed_time": "0:21:31", "remaining_time": "3:11:15", "throughput": 19281.31, "total_tokens": 24898560}
|
|
{"current_steps": 7905, "total_steps": 78105, "loss": 0.4448, "lr": 4.999978405730548e-06, "epoch": 0.5060495486844632, "percentage": 10.12, "elapsed_time": "0:21:31", "remaining_time": "3:11:13", "throughput": 19283.05, "total_tokens": 24913216}
|
|
{"current_steps": 7910, "total_steps": 78105, "loss": 0.6069, "lr": 4.999976021351503e-06, "epoch": 0.5063696306254402, "percentage": 10.13, "elapsed_time": "0:21:32", "remaining_time": "3:11:11", "throughput": 19285.03, "total_tokens": 24928832}
|
|
{"current_steps": 7915, "total_steps": 78105, "loss": 0.6528, "lr": 4.999973512136677e-06, "epoch": 0.506689712566417, "percentage": 10.13, "elapsed_time": "0:21:33", "remaining_time": "3:11:09", "throughput": 19287.07, "total_tokens": 24944512}
|
|
{"current_steps": 7920, "total_steps": 78105, "loss": 0.6276, "lr": 4.999970878086197e-06, "epoch": 0.5070097945073939, "percentage": 10.14, "elapsed_time": "0:21:33", "remaining_time": "3:11:07", "throughput": 19288.91, "total_tokens": 24959744}
|
|
{"current_steps": 7925, "total_steps": 78105, "loss": 0.5793, "lr": 4.999968119200192e-06, "epoch": 0.5073298764483708, "percentage": 10.15, "elapsed_time": "0:21:34", "remaining_time": "3:11:05", "throughput": 19290.94, "total_tokens": 24975488}
|
|
{"current_steps": 7930, "total_steps": 78105, "loss": 0.5143, "lr": 4.999965235478801e-06, "epoch": 0.5076499583893477, "percentage": 10.15, "elapsed_time": "0:21:35", "remaining_time": "3:11:02", "throughput": 19292.68, "total_tokens": 24990272}
|
|
{"current_steps": 7935, "total_steps": 78105, "loss": 0.3795, "lr": 4.9999622269221685e-06, "epoch": 0.5079700403303246, "percentage": 10.16, "elapsed_time": "0:21:36", "remaining_time": "3:11:00", "throughput": 19294.88, "total_tokens": 25006272}
|
|
{"current_steps": 7940, "total_steps": 78105, "loss": 0.4828, "lr": 4.9999590935304435e-06, "epoch": 0.5082901222713014, "percentage": 10.17, "elapsed_time": "0:21:36", "remaining_time": "3:10:58", "throughput": 19297.05, "total_tokens": 25022464}
|
|
{"current_steps": 7945, "total_steps": 78105, "loss": 0.4114, "lr": 4.999955835303784e-06, "epoch": 0.5086102042122783, "percentage": 10.17, "elapsed_time": "0:21:37", "remaining_time": "3:10:56", "throughput": 19298.83, "total_tokens": 25037376}
|
|
{"current_steps": 7950, "total_steps": 78105, "loss": 0.5628, "lr": 4.999952452242352e-06, "epoch": 0.5089302861532552, "percentage": 10.18, "elapsed_time": "0:21:38", "remaining_time": "3:10:54", "throughput": 19300.96, "total_tokens": 25053440}
|
|
{"current_steps": 7955, "total_steps": 78105, "loss": 0.6121, "lr": 4.999948944346315e-06, "epoch": 0.5092503680942321, "percentage": 10.19, "elapsed_time": "0:21:38", "remaining_time": "3:10:52", "throughput": 19302.87, "total_tokens": 25069120}
|
|
{"current_steps": 7960, "total_steps": 78105, "loss": 0.4008, "lr": 4.9999453116158505e-06, "epoch": 0.5095704500352091, "percentage": 10.19, "elapsed_time": "0:21:39", "remaining_time": "3:10:50", "throughput": 19304.69, "total_tokens": 25084224}
|
|
{"current_steps": 7965, "total_steps": 78105, "loss": 0.6273, "lr": 4.999941554051139e-06, "epoch": 0.509890531976186, "percentage": 10.2, "elapsed_time": "0:21:40", "remaining_time": "3:10:48", "throughput": 19306.51, "total_tokens": 25099520}
|
|
{"current_steps": 7970, "total_steps": 78105, "loss": 0.5022, "lr": 4.999937671652367e-06, "epoch": 0.5102106139171628, "percentage": 10.2, "elapsed_time": "0:21:40", "remaining_time": "3:10:46", "throughput": 19308.42, "total_tokens": 25115200}
|
|
{"current_steps": 7975, "total_steps": 78105, "loss": 0.4842, "lr": 4.9999336644197306e-06, "epoch": 0.5105306958581397, "percentage": 10.21, "elapsed_time": "0:21:41", "remaining_time": "3:10:44", "throughput": 19310.55, "total_tokens": 25131520}
|
|
{"current_steps": 7980, "total_steps": 78105, "loss": 0.6849, "lr": 4.999929532353428e-06, "epoch": 0.5108507777991166, "percentage": 10.22, "elapsed_time": "0:21:42", "remaining_time": "3:10:42", "throughput": 19312.59, "total_tokens": 25147776}
|
|
{"current_steps": 7985, "total_steps": 78105, "loss": 0.5415, "lr": 4.999925275453666e-06, "epoch": 0.5111708597400935, "percentage": 10.22, "elapsed_time": "0:21:42", "remaining_time": "3:10:40", "throughput": 19314.33, "total_tokens": 25162816}
|
|
{"current_steps": 7990, "total_steps": 78105, "loss": 0.4387, "lr": 4.999920893720659e-06, "epoch": 0.5114909416810703, "percentage": 10.23, "elapsed_time": "0:21:43", "remaining_time": "3:10:38", "throughput": 19316.44, "total_tokens": 25178752}
|
|
{"current_steps": 7995, "total_steps": 78105, "loss": 0.5952, "lr": 4.999916387154622e-06, "epoch": 0.5118110236220472, "percentage": 10.24, "elapsed_time": "0:21:44", "remaining_time": "3:10:36", "throughput": 19318.27, "total_tokens": 25194048}
|
|
{"current_steps": 8000, "total_steps": 78105, "loss": 0.4733, "lr": 4.999911755755783e-06, "epoch": 0.5121311055630241, "percentage": 10.24, "elapsed_time": "0:21:44", "remaining_time": "3:10:34", "throughput": 19320.18, "total_tokens": 25209216}
|
|
{"current_steps": 8005, "total_steps": 78105, "loss": 0.6132, "lr": 4.999906999524373e-06, "epoch": 0.512451187504001, "percentage": 10.25, "elapsed_time": "0:21:45", "remaining_time": "3:10:32", "throughput": 19322.09, "total_tokens": 25224640}
|
|
{"current_steps": 8010, "total_steps": 78105, "loss": 0.3993, "lr": 4.999902118460629e-06, "epoch": 0.5127712694449779, "percentage": 10.26, "elapsed_time": "0:21:46", "remaining_time": "3:10:30", "throughput": 19323.87, "total_tokens": 25239808}
|
|
{"current_steps": 8015, "total_steps": 78105, "loss": 0.4945, "lr": 4.999897112564795e-06, "epoch": 0.5130913513859549, "percentage": 10.26, "elapsed_time": "0:21:46", "remaining_time": "3:10:28", "throughput": 19326.51, "total_tokens": 25257344}
|
|
{"current_steps": 8020, "total_steps": 78105, "loss": 0.4241, "lr": 4.999891981837119e-06, "epoch": 0.5134114333269317, "percentage": 10.27, "elapsed_time": "0:21:47", "remaining_time": "3:10:26", "throughput": 19328.19, "total_tokens": 25272064}
|
|
{"current_steps": 8025, "total_steps": 78105, "loss": 0.4864, "lr": 4.99988672627786e-06, "epoch": 0.5137315152679086, "percentage": 10.27, "elapsed_time": "0:21:48", "remaining_time": "3:10:24", "throughput": 19330.28, "total_tokens": 25287936}
|
|
{"current_steps": 8030, "total_steps": 78105, "loss": 0.4549, "lr": 4.9998813458872795e-06, "epoch": 0.5140515972088855, "percentage": 10.28, "elapsed_time": "0:21:48", "remaining_time": "3:10:22", "throughput": 19332.12, "total_tokens": 25303232}
|
|
{"current_steps": 8035, "total_steps": 78105, "loss": 0.4298, "lr": 4.999875840665646e-06, "epoch": 0.5143716791498624, "percentage": 10.29, "elapsed_time": "0:21:49", "remaining_time": "3:10:19", "throughput": 19333.99, "total_tokens": 25318656}
|
|
{"current_steps": 8040, "total_steps": 78105, "loss": 0.486, "lr": 4.9998702106132336e-06, "epoch": 0.5146917610908393, "percentage": 10.29, "elapsed_time": "0:21:50", "remaining_time": "3:10:17", "throughput": 19335.74, "total_tokens": 25333760}
|
|
{"current_steps": 8045, "total_steps": 78105, "loss": 0.5555, "lr": 4.999864455730324e-06, "epoch": 0.5150118430318161, "percentage": 10.3, "elapsed_time": "0:21:50", "remaining_time": "3:10:15", "throughput": 19337.84, "total_tokens": 25349824}
|
|
{"current_steps": 8050, "total_steps": 78105, "loss": 0.4811, "lr": 4.9998585760172065e-06, "epoch": 0.515331924972793, "percentage": 10.31, "elapsed_time": "0:21:51", "remaining_time": "3:10:13", "throughput": 19339.78, "total_tokens": 25365376}
|
|
{"current_steps": 8055, "total_steps": 78105, "loss": 0.5462, "lr": 4.999852571474172e-06, "epoch": 0.5156520069137699, "percentage": 10.31, "elapsed_time": "0:21:52", "remaining_time": "3:10:11", "throughput": 19341.76, "total_tokens": 25380928}
|
|
{"current_steps": 8060, "total_steps": 78105, "loss": 0.4673, "lr": 4.999846442101522e-06, "epoch": 0.5159720888547468, "percentage": 10.32, "elapsed_time": "0:21:52", "remaining_time": "3:10:09", "throughput": 19343.63, "total_tokens": 25396352}
|
|
{"current_steps": 8065, "total_steps": 78105, "loss": 0.5058, "lr": 4.999840187899561e-06, "epoch": 0.5162921707957238, "percentage": 10.33, "elapsed_time": "0:21:53", "remaining_time": "3:10:07", "throughput": 19345.45, "total_tokens": 25411456}
|
|
{"current_steps": 8070, "total_steps": 78105, "loss": 0.4942, "lr": 4.9998338088686024e-06, "epoch": 0.5166122527367006, "percentage": 10.33, "elapsed_time": "0:21:54", "remaining_time": "3:10:05", "throughput": 19347.95, "total_tokens": 25428544}
|
|
{"current_steps": 8075, "total_steps": 78105, "loss": 0.5725, "lr": 4.999827305008964e-06, "epoch": 0.5169323346776775, "percentage": 10.34, "elapsed_time": "0:21:54", "remaining_time": "3:10:03", "throughput": 19349.89, "total_tokens": 25443968}
|
|
{"current_steps": 8080, "total_steps": 78105, "loss": 0.4588, "lr": 4.999820676320972e-06, "epoch": 0.5172524166186544, "percentage": 10.35, "elapsed_time": "0:21:55", "remaining_time": "3:10:01", "throughput": 19351.68, "total_tokens": 25458880}
|
|
{"current_steps": 8085, "total_steps": 78105, "loss": 0.3874, "lr": 4.999813922804956e-06, "epoch": 0.5175724985596313, "percentage": 10.35, "elapsed_time": "0:21:56", "remaining_time": "3:09:59", "throughput": 19353.56, "total_tokens": 25474304}
|
|
{"current_steps": 8090, "total_steps": 78105, "loss": 0.6068, "lr": 4.999807044461255e-06, "epoch": 0.5178925805006082, "percentage": 10.36, "elapsed_time": "0:21:56", "remaining_time": "3:09:57", "throughput": 19355.34, "total_tokens": 25489472}
|
|
{"current_steps": 8095, "total_steps": 78105, "loss": 0.488, "lr": 4.99980004129021e-06, "epoch": 0.518212662441585, "percentage": 10.36, "elapsed_time": "0:21:57", "remaining_time": "3:09:55", "throughput": 19357.18, "total_tokens": 25504896}
|
|
{"current_steps": 8100, "total_steps": 78105, "loss": 0.5093, "lr": 4.999792913292172e-06, "epoch": 0.5185327443825619, "percentage": 10.37, "elapsed_time": "0:21:58", "remaining_time": "3:09:52", "throughput": 19358.77, "total_tokens": 25519360}
|
|
{"current_steps": 8105, "total_steps": 78105, "loss": 0.6216, "lr": 4.999785660467496e-06, "epoch": 0.5188528263235388, "percentage": 10.38, "elapsed_time": "0:21:58", "remaining_time": "3:09:51", "throughput": 19360.93, "total_tokens": 25535680}
|
|
{"current_steps": 8110, "total_steps": 78105, "loss": 0.5825, "lr": 4.999778282816547e-06, "epoch": 0.5191729082645157, "percentage": 10.38, "elapsed_time": "0:21:59", "remaining_time": "3:09:48", "throughput": 19362.65, "total_tokens": 25550592}
|
|
{"current_steps": 8115, "total_steps": 78105, "loss": 0.5011, "lr": 4.99977078033969e-06, "epoch": 0.5194929902054926, "percentage": 10.39, "elapsed_time": "0:22:00", "remaining_time": "3:09:46", "throughput": 19364.38, "total_tokens": 25565696}
|
|
{"current_steps": 8120, "total_steps": 78105, "loss": 0.4527, "lr": 4.999763153037302e-06, "epoch": 0.5198130721464695, "percentage": 10.4, "elapsed_time": "0:22:00", "remaining_time": "3:09:44", "throughput": 19366.48, "total_tokens": 25581568}
|
|
{"current_steps": 8125, "total_steps": 78105, "loss": 0.6174, "lr": 4.9997554009097625e-06, "epoch": 0.5201331540874464, "percentage": 10.4, "elapsed_time": "0:22:01", "remaining_time": "3:09:42", "throughput": 19368.53, "total_tokens": 25597760}
|
|
{"current_steps": 8130, "total_steps": 78105, "loss": 0.534, "lr": 4.999747523957459e-06, "epoch": 0.5204532360284233, "percentage": 10.41, "elapsed_time": "0:22:02", "remaining_time": "3:09:41", "throughput": 19371.09, "total_tokens": 25615424}
|
|
{"current_steps": 8135, "total_steps": 78105, "loss": 0.5229, "lr": 4.9997395221807845e-06, "epoch": 0.5207733179694002, "percentage": 10.42, "elapsed_time": "0:22:03", "remaining_time": "3:09:39", "throughput": 19372.97, "total_tokens": 25630720}
|
|
{"current_steps": 8140, "total_steps": 78105, "loss": 0.5855, "lr": 4.99973139558014e-06, "epoch": 0.5210933999103771, "percentage": 10.42, "elapsed_time": "0:22:03", "remaining_time": "3:09:38", "throughput": 19375.75, "total_tokens": 25649280}
|
|
{"current_steps": 8145, "total_steps": 78105, "loss": 0.6326, "lr": 4.999723144155929e-06, "epoch": 0.5214134818513539, "percentage": 10.43, "elapsed_time": "0:22:04", "remaining_time": "3:09:36", "throughput": 19377.45, "total_tokens": 25664576}
|
|
{"current_steps": 8150, "total_steps": 78105, "loss": 0.5045, "lr": 4.999714767908565e-06, "epoch": 0.5217335637923308, "percentage": 10.43, "elapsed_time": "0:22:05", "remaining_time": "3:09:34", "throughput": 19379.43, "total_tokens": 25680384}
|
|
{"current_steps": 8155, "total_steps": 78105, "loss": 0.466, "lr": 4.999706266838466e-06, "epoch": 0.5220536457333077, "percentage": 10.44, "elapsed_time": "0:22:05", "remaining_time": "3:09:32", "throughput": 19381.39, "total_tokens": 25696128}
|
|
{"current_steps": 8160, "total_steps": 78105, "loss": 0.4224, "lr": 4.9996976409460575e-06, "epoch": 0.5223737276742846, "percentage": 10.45, "elapsed_time": "0:22:06", "remaining_time": "3:09:30", "throughput": 19383.6, "total_tokens": 25712768}
|
|
{"current_steps": 8165, "total_steps": 78105, "loss": 0.607, "lr": 4.9996888902317684e-06, "epoch": 0.5226938096152615, "percentage": 10.45, "elapsed_time": "0:22:07", "remaining_time": "3:09:28", "throughput": 19385.28, "total_tokens": 25727616}
|
|
{"current_steps": 8170, "total_steps": 78105, "loss": 0.4967, "lr": 4.999680014696037e-06, "epoch": 0.5230138915562383, "percentage": 10.46, "elapsed_time": "0:22:07", "remaining_time": "3:09:26", "throughput": 19387.63, "total_tokens": 25744384}
|
|
{"current_steps": 8175, "total_steps": 78105, "loss": 0.434, "lr": 4.999671014339306e-06, "epoch": 0.5233339734972153, "percentage": 10.47, "elapsed_time": "0:22:08", "remaining_time": "3:09:24", "throughput": 19389.64, "total_tokens": 25760640}
|
|
{"current_steps": 8180, "total_steps": 78105, "loss": 0.4633, "lr": 4.999661889162024e-06, "epoch": 0.5236540554381922, "percentage": 10.47, "elapsed_time": "0:22:09", "remaining_time": "3:09:22", "throughput": 19391.64, "total_tokens": 25776640}
|
|
{"current_steps": 8185, "total_steps": 78105, "loss": 0.4204, "lr": 4.999652639164648e-06, "epoch": 0.5239741373791691, "percentage": 10.48, "elapsed_time": "0:22:09", "remaining_time": "3:09:20", "throughput": 19393.41, "total_tokens": 25791744}
|
|
{"current_steps": 8190, "total_steps": 78105, "loss": 0.7351, "lr": 4.99964326434764e-06, "epoch": 0.524294219320146, "percentage": 10.49, "elapsed_time": "0:22:10", "remaining_time": "3:09:19", "throughput": 19395.53, "total_tokens": 25808256}
|
|
{"current_steps": 8195, "total_steps": 78105, "loss": 0.4311, "lr": 4.9996337647114675e-06, "epoch": 0.5246143012611229, "percentage": 10.49, "elapsed_time": "0:22:11", "remaining_time": "3:09:17", "throughput": 19397.41, "total_tokens": 25824128}
|
|
{"current_steps": 8200, "total_steps": 78105, "loss": 0.4729, "lr": 4.999624140256605e-06, "epoch": 0.5249343832020997, "percentage": 10.5, "elapsed_time": "0:22:12", "remaining_time": "3:09:15", "throughput": 19399.59, "total_tokens": 25840576}
|
|
{"current_steps": 8205, "total_steps": 78105, "loss": 0.3891, "lr": 4.999614390983532e-06, "epoch": 0.5252544651430766, "percentage": 10.51, "elapsed_time": "0:22:12", "remaining_time": "3:09:13", "throughput": 19401.47, "total_tokens": 25856256}
|
|
{"current_steps": 8210, "total_steps": 78105, "loss": 0.4825, "lr": 4.9996045168927365e-06, "epoch": 0.5255745470840535, "percentage": 10.51, "elapsed_time": "0:22:13", "remaining_time": "3:09:11", "throughput": 19403.22, "total_tokens": 25871424}
|
|
{"current_steps": 8215, "total_steps": 78105, "loss": 0.619, "lr": 4.999594517984711e-06, "epoch": 0.5258946290250304, "percentage": 10.52, "elapsed_time": "0:22:14", "remaining_time": "3:09:09", "throughput": 19404.81, "total_tokens": 25886272}
|
|
{"current_steps": 8220, "total_steps": 78105, "loss": 0.7025, "lr": 4.999584394259956e-06, "epoch": 0.5262147109660072, "percentage": 10.52, "elapsed_time": "0:22:14", "remaining_time": "3:09:07", "throughput": 19406.69, "total_tokens": 25901952}
|
|
{"current_steps": 8225, "total_steps": 78105, "loss": 0.4938, "lr": 4.999574145718977e-06, "epoch": 0.5265347929069842, "percentage": 10.53, "elapsed_time": "0:22:15", "remaining_time": "3:09:05", "throughput": 19408.51, "total_tokens": 25917888}
|
|
{"current_steps": 8230, "total_steps": 78105, "loss": 0.4671, "lr": 4.9995637723622834e-06, "epoch": 0.5268548748479611, "percentage": 10.54, "elapsed_time": "0:22:16", "remaining_time": "3:09:03", "throughput": 19410.39, "total_tokens": 25933824}
|
|
{"current_steps": 8235, "total_steps": 78105, "loss": 0.5235, "lr": 4.999553274190396e-06, "epoch": 0.527174956788938, "percentage": 10.54, "elapsed_time": "0:22:16", "remaining_time": "3:09:02", "throughput": 19412.8, "total_tokens": 25951360}
|
|
{"current_steps": 8240, "total_steps": 78105, "loss": 0.4596, "lr": 4.999542651203837e-06, "epoch": 0.5274950387299149, "percentage": 10.55, "elapsed_time": "0:22:17", "remaining_time": "3:09:00", "throughput": 19414.71, "total_tokens": 25967232}
|
|
{"current_steps": 8245, "total_steps": 78105, "loss": 0.5693, "lr": 4.999531903403138e-06, "epoch": 0.5278151206708918, "percentage": 10.56, "elapsed_time": "0:22:18", "remaining_time": "3:08:58", "throughput": 19416.44, "total_tokens": 25982592}
|
|
{"current_steps": 8250, "total_steps": 78105, "loss": 0.5243, "lr": 4.9995210307888354e-06, "epoch": 0.5281352026118686, "percentage": 10.56, "elapsed_time": "0:22:18", "remaining_time": "3:08:56", "throughput": 19418.13, "total_tokens": 25997824}
|
|
{"current_steps": 8255, "total_steps": 78105, "loss": 0.5829, "lr": 4.999510033361472e-06, "epoch": 0.5284552845528455, "percentage": 10.57, "elapsed_time": "0:22:19", "remaining_time": "3:08:54", "throughput": 19420.05, "total_tokens": 26013632}
|
|
{"current_steps": 8260, "total_steps": 78105, "loss": 0.4653, "lr": 4.999498911121596e-06, "epoch": 0.5287753664938224, "percentage": 10.58, "elapsed_time": "0:22:20", "remaining_time": "3:08:52", "throughput": 19421.66, "total_tokens": 26028800}
|
|
{"current_steps": 8265, "total_steps": 78105, "loss": 0.455, "lr": 4.999487664069765e-06, "epoch": 0.5290954484347993, "percentage": 10.58, "elapsed_time": "0:22:20", "remaining_time": "3:08:50", "throughput": 19423.41, "total_tokens": 26044672}
|
|
{"current_steps": 8270, "total_steps": 78105, "loss": 0.5519, "lr": 4.999476292206539e-06, "epoch": 0.5294155303757762, "percentage": 10.59, "elapsed_time": "0:22:21", "remaining_time": "3:08:48", "throughput": 19425.22, "total_tokens": 26060544}
|
|
{"current_steps": 8275, "total_steps": 78105, "loss": 0.5508, "lr": 4.999464795532487e-06, "epoch": 0.529735612316753, "percentage": 10.59, "elapsed_time": "0:22:22", "remaining_time": "3:08:46", "throughput": 19426.98, "total_tokens": 26076160}
|
|
{"current_steps": 8280, "total_steps": 78105, "loss": 0.512, "lr": 4.999453174048181e-06, "epoch": 0.53005569425773, "percentage": 10.6, "elapsed_time": "0:22:22", "remaining_time": "3:08:45", "throughput": 19428.96, "total_tokens": 26092352}
|
|
{"current_steps": 8285, "total_steps": 78105, "loss": 0.4312, "lr": 4.999441427754204e-06, "epoch": 0.5303757761987069, "percentage": 10.61, "elapsed_time": "0:22:23", "remaining_time": "3:08:43", "throughput": 19430.54, "total_tokens": 26107776}
|
|
{"current_steps": 8290, "total_steps": 78105, "loss": 0.4546, "lr": 4.9994295566511405e-06, "epoch": 0.5306958581396838, "percentage": 10.61, "elapsed_time": "0:22:24", "remaining_time": "3:08:41", "throughput": 19432.49, "total_tokens": 26123712}
|
|
{"current_steps": 8295, "total_steps": 78105, "loss": 0.534, "lr": 4.9994175607395835e-06, "epoch": 0.5310159400806607, "percentage": 10.62, "elapsed_time": "0:22:25", "remaining_time": "3:08:39", "throughput": 19434.31, "total_tokens": 26139200}
|
|
{"current_steps": 8300, "total_steps": 78105, "loss": 0.5576, "lr": 4.999405440020133e-06, "epoch": 0.5313360220216375, "percentage": 10.63, "elapsed_time": "0:22:25", "remaining_time": "3:08:37", "throughput": 19435.74, "total_tokens": 26153600}
|
|
{"current_steps": 8305, "total_steps": 78105, "loss": 0.6323, "lr": 4.999393194493395e-06, "epoch": 0.5316561039626144, "percentage": 10.63, "elapsed_time": "0:22:26", "remaining_time": "3:08:35", "throughput": 19437.67, "total_tokens": 26169600}
|
|
{"current_steps": 8310, "total_steps": 78105, "loss": 0.4154, "lr": 4.999380824159978e-06, "epoch": 0.5319761859035913, "percentage": 10.64, "elapsed_time": "0:22:27", "remaining_time": "3:08:33", "throughput": 19439.69, "total_tokens": 26185536}
|
|
{"current_steps": 8315, "total_steps": 78105, "loss": 0.4978, "lr": 4.999368329020502e-06, "epoch": 0.5322962678445682, "percentage": 10.65, "elapsed_time": "0:22:27", "remaining_time": "3:08:31", "throughput": 19441.52, "total_tokens": 26201536}
|
|
{"current_steps": 8320, "total_steps": 78105, "loss": 0.4387, "lr": 4.99935570907559e-06, "epoch": 0.5326163497855451, "percentage": 10.65, "elapsed_time": "0:22:28", "remaining_time": "3:08:29", "throughput": 19443.11, "total_tokens": 26216576}
|
|
{"current_steps": 8325, "total_steps": 78105, "loss": 0.3684, "lr": 4.999342964325873e-06, "epoch": 0.5329364317265219, "percentage": 10.66, "elapsed_time": "0:22:29", "remaining_time": "3:08:27", "throughput": 19444.96, "total_tokens": 26232640}
|
|
{"current_steps": 8330, "total_steps": 78105, "loss": 0.5969, "lr": 4.999330094771987e-06, "epoch": 0.5332565136674989, "percentage": 10.67, "elapsed_time": "0:22:29", "remaining_time": "3:08:26", "throughput": 19446.72, "total_tokens": 26248448}
|
|
{"current_steps": 8335, "total_steps": 78105, "loss": 0.5528, "lr": 4.999317100414575e-06, "epoch": 0.5335765956084758, "percentage": 10.67, "elapsed_time": "0:22:30", "remaining_time": "3:08:24", "throughput": 19448.38, "total_tokens": 26263872}
|
|
{"current_steps": 8340, "total_steps": 78105, "loss": 0.594, "lr": 4.999303981254284e-06, "epoch": 0.5338966775494527, "percentage": 10.68, "elapsed_time": "0:22:31", "remaining_time": "3:08:22", "throughput": 19450.33, "total_tokens": 26279872}
|
|
{"current_steps": 8345, "total_steps": 78105, "loss": 0.4841, "lr": 4.999290737291772e-06, "epoch": 0.5342167594904296, "percentage": 10.68, "elapsed_time": "0:22:31", "remaining_time": "3:08:20", "throughput": 19452.32, "total_tokens": 26295680}
|
|
{"current_steps": 8350, "total_steps": 78105, "loss": 0.5733, "lr": 4.999277368527699e-06, "epoch": 0.5345368414314065, "percentage": 10.69, "elapsed_time": "0:22:32", "remaining_time": "3:08:18", "throughput": 19454.01, "total_tokens": 26311040}
|
|
{"current_steps": 8355, "total_steps": 78105, "loss": 0.8566, "lr": 4.9992638749627315e-06, "epoch": 0.5348569233723833, "percentage": 10.7, "elapsed_time": "0:22:33", "remaining_time": "3:08:16", "throughput": 19455.72, "total_tokens": 26326144}
|
|
{"current_steps": 8360, "total_steps": 78105, "loss": 0.433, "lr": 4.999250256597544e-06, "epoch": 0.5351770053133602, "percentage": 10.7, "elapsed_time": "0:22:33", "remaining_time": "3:08:14", "throughput": 19457.44, "total_tokens": 26341952}
|
|
{"current_steps": 8365, "total_steps": 78105, "loss": 0.3803, "lr": 4.999236513432818e-06, "epoch": 0.5354970872543371, "percentage": 10.71, "elapsed_time": "0:22:34", "remaining_time": "3:08:12", "throughput": 19458.92, "total_tokens": 26356672}
|
|
{"current_steps": 8370, "total_steps": 78105, "loss": 0.572, "lr": 4.999222645469239e-06, "epoch": 0.535817169195314, "percentage": 10.72, "elapsed_time": "0:22:35", "remaining_time": "3:08:10", "throughput": 19460.87, "total_tokens": 26373056}
|
|
{"current_steps": 8375, "total_steps": 78105, "loss": 0.48, "lr": 4.999208652707497e-06, "epoch": 0.5361372511362908, "percentage": 10.72, "elapsed_time": "0:22:35", "remaining_time": "3:08:08", "throughput": 19462.45, "total_tokens": 26388032}
|
|
{"current_steps": 8380, "total_steps": 78105, "loss": 0.5506, "lr": 4.999194535148294e-06, "epoch": 0.5364573330772677, "percentage": 10.73, "elapsed_time": "0:22:36", "remaining_time": "3:08:06", "throughput": 19464.01, "total_tokens": 26402880}
|
|
{"current_steps": 8385, "total_steps": 78105, "loss": 0.6615, "lr": 4.999180292792334e-06, "epoch": 0.5367774150182447, "percentage": 10.74, "elapsed_time": "0:22:37", "remaining_time": "3:08:04", "throughput": 19466.0, "total_tokens": 26419328}
|
|
{"current_steps": 8390, "total_steps": 78105, "loss": 0.5452, "lr": 4.9991659256403276e-06, "epoch": 0.5370974969592216, "percentage": 10.74, "elapsed_time": "0:22:37", "remaining_time": "3:08:02", "throughput": 19467.62, "total_tokens": 26434112}
|
|
{"current_steps": 8395, "total_steps": 78105, "loss": 0.49, "lr": 4.999151433692992e-06, "epoch": 0.5374175789001985, "percentage": 10.75, "elapsed_time": "0:22:38", "remaining_time": "3:08:01", "throughput": 19469.77, "total_tokens": 26450688}
|
|
{"current_steps": 8400, "total_steps": 78105, "loss": 0.4958, "lr": 4.999136816951053e-06, "epoch": 0.5377376608411754, "percentage": 10.75, "elapsed_time": "0:22:39", "remaining_time": "3:07:59", "throughput": 19471.57, "total_tokens": 26466368}
|
|
{"current_steps": 8405, "total_steps": 78105, "loss": 0.4806, "lr": 4.9991220754152366e-06, "epoch": 0.5380577427821522, "percentage": 10.76, "elapsed_time": "0:22:39", "remaining_time": "3:07:57", "throughput": 19473.57, "total_tokens": 26482624}
|
|
{"current_steps": 8410, "total_steps": 78105, "loss": 0.49, "lr": 4.999107209086283e-06, "epoch": 0.5383778247231291, "percentage": 10.77, "elapsed_time": "0:22:40", "remaining_time": "3:07:55", "throughput": 19475.63, "total_tokens": 26499200}
|
|
{"current_steps": 8415, "total_steps": 78105, "loss": 0.5364, "lr": 4.9990922179649325e-06, "epoch": 0.538697906664106, "percentage": 10.77, "elapsed_time": "0:22:41", "remaining_time": "3:07:53", "throughput": 19477.2, "total_tokens": 26514048}
|
|
{"current_steps": 8420, "total_steps": 78105, "loss": 0.4705, "lr": 4.9990771020519336e-06, "epoch": 0.5390179886050829, "percentage": 10.78, "elapsed_time": "0:22:41", "remaining_time": "3:07:51", "throughput": 19478.89, "total_tokens": 26529216}
|
|
{"current_steps": 8425, "total_steps": 78105, "loss": 0.497, "lr": 4.999061861348041e-06, "epoch": 0.5393380705460598, "percentage": 10.79, "elapsed_time": "0:22:42", "remaining_time": "3:07:49", "throughput": 19480.8, "total_tokens": 26545152}
|
|
{"current_steps": 8430, "total_steps": 78105, "loss": 0.6447, "lr": 4.999046495854017e-06, "epoch": 0.5396581524870366, "percentage": 10.79, "elapsed_time": "0:22:43", "remaining_time": "3:07:47", "throughput": 19482.55, "total_tokens": 26560512}
|
|
{"current_steps": 8435, "total_steps": 78105, "loss": 0.4271, "lr": 4.999031005570628e-06, "epoch": 0.5399782344280136, "percentage": 10.8, "elapsed_time": "0:22:43", "remaining_time": "3:07:45", "throughput": 19484.16, "total_tokens": 26575488}
|
|
{"current_steps": 8440, "total_steps": 78105, "loss": 0.4814, "lr": 4.9990153904986475e-06, "epoch": 0.5402983163689905, "percentage": 10.81, "elapsed_time": "0:22:44", "remaining_time": "3:07:43", "throughput": 19486.04, "total_tokens": 26591040}
|
|
{"current_steps": 8445, "total_steps": 78105, "loss": 0.4528, "lr": 4.998999650638855e-06, "epoch": 0.5406183983099674, "percentage": 10.81, "elapsed_time": "0:22:45", "remaining_time": "3:07:41", "throughput": 19488.04, "total_tokens": 26606976}
|
|
{"current_steps": 8450, "total_steps": 78105, "loss": 0.483, "lr": 4.998983785992036e-06, "epoch": 0.5409384802509443, "percentage": 10.82, "elapsed_time": "0:22:45", "remaining_time": "3:07:40", "throughput": 19490.12, "total_tokens": 26623296}
|
|
{"current_steps": 8455, "total_steps": 78105, "loss": 0.4677, "lr": 4.9989677965589846e-06, "epoch": 0.5412585621919211, "percentage": 10.83, "elapsed_time": "0:22:46", "remaining_time": "3:07:38", "throughput": 19491.77, "total_tokens": 26638336}
|
|
{"current_steps": 8460, "total_steps": 78105, "loss": 0.3693, "lr": 4.9989516823404985e-06, "epoch": 0.541578644132898, "percentage": 10.83, "elapsed_time": "0:22:47", "remaining_time": "3:07:36", "throughput": 19493.86, "total_tokens": 26654400}
|
|
{"current_steps": 8465, "total_steps": 78105, "loss": 0.4609, "lr": 4.9989354433373805e-06, "epoch": 0.5418987260738749, "percentage": 10.84, "elapsed_time": "0:22:47", "remaining_time": "3:07:34", "throughput": 19495.53, "total_tokens": 26669568}
|
|
{"current_steps": 8470, "total_steps": 78105, "loss": 0.6815, "lr": 4.9989190795504435e-06, "epoch": 0.5422188080148518, "percentage": 10.84, "elapsed_time": "0:22:48", "remaining_time": "3:07:32", "throughput": 19497.56, "total_tokens": 26686016}
|
|
{"current_steps": 8475, "total_steps": 78105, "loss": 0.5297, "lr": 4.998902590980504e-06, "epoch": 0.5425388899558287, "percentage": 10.85, "elapsed_time": "0:22:49", "remaining_time": "3:07:30", "throughput": 19499.34, "total_tokens": 26701504}
|
|
{"current_steps": 8480, "total_steps": 78105, "loss": 0.8467, "lr": 4.998885977628386e-06, "epoch": 0.5428589718968055, "percentage": 10.86, "elapsed_time": "0:22:50", "remaining_time": "3:07:28", "throughput": 19501.16, "total_tokens": 26717120}
|
|
{"current_steps": 8485, "total_steps": 78105, "loss": 0.6439, "lr": 4.998869239494918e-06, "epoch": 0.5431790538377824, "percentage": 10.86, "elapsed_time": "0:22:50", "remaining_time": "3:07:26", "throughput": 19502.99, "total_tokens": 26732736}
|
|
{"current_steps": 8490, "total_steps": 78105, "loss": 0.5143, "lr": 4.998852376580937e-06, "epoch": 0.5434991357787594, "percentage": 10.87, "elapsed_time": "0:22:51", "remaining_time": "3:07:24", "throughput": 19504.42, "total_tokens": 26747392}
|
|
{"current_steps": 8495, "total_steps": 78105, "loss": 0.5548, "lr": 4.9988353888872835e-06, "epoch": 0.5438192177197363, "percentage": 10.88, "elapsed_time": "0:22:51", "remaining_time": "3:07:22", "throughput": 19505.94, "total_tokens": 26762112}
|
|
{"current_steps": 8500, "total_steps": 78105, "loss": 0.5419, "lr": 4.998818276414807e-06, "epoch": 0.5441392996607132, "percentage": 10.88, "elapsed_time": "0:22:52", "remaining_time": "3:07:20", "throughput": 19507.73, "total_tokens": 26777856}
|
|
{"current_steps": 8505, "total_steps": 78105, "loss": 0.5263, "lr": 4.998801039164361e-06, "epoch": 0.54445938160169, "percentage": 10.89, "elapsed_time": "0:22:53", "remaining_time": "3:07:18", "throughput": 19509.71, "total_tokens": 26794048}
|
|
{"current_steps": 8510, "total_steps": 78105, "loss": 0.3765, "lr": 4.998783677136808e-06, "epoch": 0.5447794635426669, "percentage": 10.9, "elapsed_time": "0:22:54", "remaining_time": "3:07:16", "throughput": 19511.6, "total_tokens": 26809792}
|
|
{"current_steps": 8515, "total_steps": 78105, "loss": 0.5347, "lr": 4.998766190333013e-06, "epoch": 0.5450995454836438, "percentage": 10.9, "elapsed_time": "0:22:54", "remaining_time": "3:07:14", "throughput": 19513.3, "total_tokens": 26825024}
|
|
{"current_steps": 8520, "total_steps": 78105, "loss": 0.5222, "lr": 4.998748578753851e-06, "epoch": 0.5454196274246207, "percentage": 10.91, "elapsed_time": "0:22:55", "remaining_time": "3:07:13", "throughput": 19515.52, "total_tokens": 26841536}
|
|
{"current_steps": 8525, "total_steps": 78105, "loss": 0.4838, "lr": 4.9987308424002e-06, "epoch": 0.5457397093655976, "percentage": 10.91, "elapsed_time": "0:22:56", "remaining_time": "3:07:11", "throughput": 19517.47, "total_tokens": 26857600}
|
|
{"current_steps": 8530, "total_steps": 78105, "loss": 0.4069, "lr": 4.998712981272946e-06, "epoch": 0.5460597913065744, "percentage": 10.92, "elapsed_time": "0:22:56", "remaining_time": "3:07:09", "throughput": 19519.69, "total_tokens": 26874176}
|
|
{"current_steps": 8535, "total_steps": 78105, "loss": 0.5914, "lr": 4.9986949953729815e-06, "epoch": 0.5463798732475513, "percentage": 10.93, "elapsed_time": "0:22:57", "remaining_time": "3:07:07", "throughput": 19521.63, "total_tokens": 26890176}
|
|
{"current_steps": 8540, "total_steps": 78105, "loss": 0.3921, "lr": 4.998676884701203e-06, "epoch": 0.5466999551885282, "percentage": 10.93, "elapsed_time": "0:22:58", "remaining_time": "3:07:05", "throughput": 19523.32, "total_tokens": 26905728}
|
|
{"current_steps": 8545, "total_steps": 78105, "loss": 0.5498, "lr": 4.998658649258517e-06, "epoch": 0.5470200371295052, "percentage": 10.94, "elapsed_time": "0:22:58", "remaining_time": "3:07:04", "throughput": 19525.0, "total_tokens": 26921280}
|
|
{"current_steps": 8550, "total_steps": 78105, "loss": 0.5778, "lr": 4.998640289045833e-06, "epoch": 0.5473401190704821, "percentage": 10.95, "elapsed_time": "0:22:59", "remaining_time": "3:07:02", "throughput": 19526.66, "total_tokens": 26936704}
|
|
{"current_steps": 8555, "total_steps": 78105, "loss": 0.4431, "lr": 4.998621804064068e-06, "epoch": 0.547660201011459, "percentage": 10.95, "elapsed_time": "0:23:00", "remaining_time": "3:07:00", "throughput": 19528.52, "total_tokens": 26952448}
|
|
{"current_steps": 8560, "total_steps": 78105, "loss": 0.4593, "lr": 4.9986031943141444e-06, "epoch": 0.5479802829524358, "percentage": 10.96, "elapsed_time": "0:23:00", "remaining_time": "3:06:58", "throughput": 19530.16, "total_tokens": 26967680}
|
|
{"current_steps": 8565, "total_steps": 78105, "loss": 0.4198, "lr": 4.998584459796992e-06, "epoch": 0.5483003648934127, "percentage": 10.97, "elapsed_time": "0:23:01", "remaining_time": "3:06:56", "throughput": 19531.63, "total_tokens": 26982400}
|
|
{"current_steps": 8570, "total_steps": 78105, "loss": 0.4486, "lr": 4.998565600513546e-06, "epoch": 0.5486204468343896, "percentage": 10.97, "elapsed_time": "0:23:02", "remaining_time": "3:06:54", "throughput": 19533.58, "total_tokens": 26998400}
|
|
{"current_steps": 8575, "total_steps": 78105, "loss": 0.4721, "lr": 4.99854661646475e-06, "epoch": 0.5489405287753665, "percentage": 10.98, "elapsed_time": "0:23:02", "remaining_time": "3:06:53", "throughput": 19535.84, "total_tokens": 27015936}
|
|
{"current_steps": 8580, "total_steps": 78105, "loss": 0.3714, "lr": 4.998527507651549e-06, "epoch": 0.5492606107163434, "percentage": 10.99, "elapsed_time": "0:23:03", "remaining_time": "3:06:50", "throughput": 19537.33, "total_tokens": 27030528}
|
|
{"current_steps": 8585, "total_steps": 78105, "loss": 0.5324, "lr": 4.9985082740748996e-06, "epoch": 0.5495806926573202, "percentage": 10.99, "elapsed_time": "0:23:04", "remaining_time": "3:06:49", "throughput": 19539.08, "total_tokens": 27046080}
|
|
{"current_steps": 8590, "total_steps": 78105, "loss": 0.545, "lr": 4.998488915735761e-06, "epoch": 0.5499007745982971, "percentage": 11.0, "elapsed_time": "0:23:04", "remaining_time": "3:06:47", "throughput": 19540.76, "total_tokens": 27061504}
|
|
{"current_steps": 8595, "total_steps": 78105, "loss": 0.4747, "lr": 4.9984694326351e-06, "epoch": 0.5502208565392741, "percentage": 11.0, "elapsed_time": "0:23:05", "remaining_time": "3:06:45", "throughput": 19542.49, "total_tokens": 27077056}
|
|
{"current_steps": 8600, "total_steps": 78105, "loss": 0.4366, "lr": 4.998449824773889e-06, "epoch": 0.550540938480251, "percentage": 11.01, "elapsed_time": "0:23:06", "remaining_time": "3:06:43", "throughput": 19545.06, "total_tokens": 27095168}
|
|
{"current_steps": 8605, "total_steps": 78105, "loss": 0.4849, "lr": 4.998430092153108e-06, "epoch": 0.5508610204212279, "percentage": 11.02, "elapsed_time": "0:23:06", "remaining_time": "3:06:41", "throughput": 19546.64, "total_tokens": 27110144}
|
|
{"current_steps": 8610, "total_steps": 78105, "loss": 0.6933, "lr": 4.9984102347737425e-06, "epoch": 0.5511811023622047, "percentage": 11.02, "elapsed_time": "0:23:07", "remaining_time": "3:06:39", "throughput": 19548.13, "total_tokens": 27124864}
|
|
{"current_steps": 8615, "total_steps": 78105, "loss": 0.4974, "lr": 4.998390252636783e-06, "epoch": 0.5515011843031816, "percentage": 11.03, "elapsed_time": "0:23:08", "remaining_time": "3:06:37", "throughput": 19549.76, "total_tokens": 27140160}
|
|
{"current_steps": 8620, "total_steps": 78105, "loss": 0.5157, "lr": 4.998370145743229e-06, "epoch": 0.5518212662441585, "percentage": 11.04, "elapsed_time": "0:23:08", "remaining_time": "3:06:36", "throughput": 19551.42, "total_tokens": 27155712}
|
|
{"current_steps": 8625, "total_steps": 78105, "loss": 0.5579, "lr": 4.998349914094082e-06, "epoch": 0.5521413481851354, "percentage": 11.04, "elapsed_time": "0:23:09", "remaining_time": "3:06:34", "throughput": 19553.31, "total_tokens": 27171520}
|
|
{"current_steps": 8630, "total_steps": 78105, "loss": 0.4922, "lr": 4.998329557690354e-06, "epoch": 0.5524614301261123, "percentage": 11.05, "elapsed_time": "0:23:10", "remaining_time": "3:06:32", "throughput": 19554.99, "total_tokens": 27186752}
|
|
{"current_steps": 8635, "total_steps": 78105, "loss": 0.4771, "lr": 4.998309076533062e-06, "epoch": 0.5527815120670891, "percentage": 11.06, "elapsed_time": "0:23:10", "remaining_time": "3:06:30", "throughput": 19556.55, "total_tokens": 27201664}
|
|
{"current_steps": 8640, "total_steps": 78105, "loss": 0.6043, "lr": 4.998288470623228e-06, "epoch": 0.553101594008066, "percentage": 11.06, "elapsed_time": "0:23:11", "remaining_time": "3:06:28", "throughput": 19558.78, "total_tokens": 27218880}
|
|
{"current_steps": 8645, "total_steps": 78105, "loss": 0.4912, "lr": 4.9982677399618805e-06, "epoch": 0.5534216759490429, "percentage": 11.07, "elapsed_time": "0:23:12", "remaining_time": "3:06:27", "throughput": 19560.82, "total_tokens": 27235648}
|
|
{"current_steps": 8650, "total_steps": 78105, "loss": 0.4957, "lr": 4.998246884550056e-06, "epoch": 0.5537417578900199, "percentage": 11.07, "elapsed_time": "0:23:13", "remaining_time": "3:06:25", "throughput": 19562.43, "total_tokens": 27250880}
|
|
{"current_steps": 8655, "total_steps": 78105, "loss": 0.3812, "lr": 4.998225904388793e-06, "epoch": 0.5540618398309968, "percentage": 11.08, "elapsed_time": "0:23:13", "remaining_time": "3:06:23", "throughput": 19564.27, "total_tokens": 27266880}
|
|
{"current_steps": 8660, "total_steps": 78105, "loss": 0.6054, "lr": 4.998204799479142e-06, "epoch": 0.5543819217719737, "percentage": 11.09, "elapsed_time": "0:23:14", "remaining_time": "3:06:21", "throughput": 19566.09, "total_tokens": 27282816}
|
|
{"current_steps": 8665, "total_steps": 78105, "loss": 0.5391, "lr": 4.998183569822156e-06, "epoch": 0.5547020037129505, "percentage": 11.09, "elapsed_time": "0:23:15", "remaining_time": "3:06:19", "throughput": 19567.82, "total_tokens": 27298496}
|
|
{"current_steps": 8670, "total_steps": 78105, "loss": 0.4191, "lr": 4.998162215418896e-06, "epoch": 0.5550220856539274, "percentage": 11.1, "elapsed_time": "0:23:15", "remaining_time": "3:06:17", "throughput": 19569.48, "total_tokens": 27313856}
|
|
{"current_steps": 8675, "total_steps": 78105, "loss": 0.3481, "lr": 4.998140736270426e-06, "epoch": 0.5553421675949043, "percentage": 11.11, "elapsed_time": "0:23:16", "remaining_time": "3:06:15", "throughput": 19570.97, "total_tokens": 27328448}
|
|
{"current_steps": 8680, "total_steps": 78105, "loss": 0.4723, "lr": 4.998119132377821e-06, "epoch": 0.5556622495358812, "percentage": 11.11, "elapsed_time": "0:23:17", "remaining_time": "3:06:13", "throughput": 19572.66, "total_tokens": 27343872}
|
|
{"current_steps": 8685, "total_steps": 78105, "loss": 0.6518, "lr": 4.998097403742158e-06, "epoch": 0.555982331476858, "percentage": 11.12, "elapsed_time": "0:23:17", "remaining_time": "3:06:11", "throughput": 19574.17, "total_tokens": 27358720}
|
|
{"current_steps": 8690, "total_steps": 78105, "loss": 0.4923, "lr": 4.998075550364523e-06, "epoch": 0.5563024134178349, "percentage": 11.13, "elapsed_time": "0:23:18", "remaining_time": "3:06:09", "throughput": 19575.58, "total_tokens": 27373312}
|
|
{"current_steps": 8695, "total_steps": 78105, "loss": 0.438, "lr": 4.998053572246007e-06, "epoch": 0.5566224953588118, "percentage": 11.13, "elapsed_time": "0:23:19", "remaining_time": "3:06:07", "throughput": 19577.19, "total_tokens": 27388544}
|
|
{"current_steps": 8700, "total_steps": 78105, "loss": 0.4999, "lr": 4.998031469387709e-06, "epoch": 0.5569425772997888, "percentage": 11.14, "elapsed_time": "0:23:19", "remaining_time": "3:06:06", "throughput": 19579.19, "total_tokens": 27405120}
|
|
{"current_steps": 8705, "total_steps": 78105, "loss": 0.4881, "lr": 4.998009241790729e-06, "epoch": 0.5572626592407657, "percentage": 11.15, "elapsed_time": "0:23:20", "remaining_time": "3:06:04", "throughput": 19580.8, "total_tokens": 27420544}
|
|
{"current_steps": 8710, "total_steps": 78105, "loss": 0.4088, "lr": 4.99798688945618e-06, "epoch": 0.5575827411817426, "percentage": 11.15, "elapsed_time": "0:23:21", "remaining_time": "3:06:02", "throughput": 19582.41, "total_tokens": 27435456}
|
|
{"current_steps": 8715, "total_steps": 78105, "loss": 0.3932, "lr": 4.9979644123851776e-06, "epoch": 0.5579028231227194, "percentage": 11.16, "elapsed_time": "0:23:21", "remaining_time": "3:06:00", "throughput": 19584.38, "total_tokens": 27452032}
|
|
{"current_steps": 8720, "total_steps": 78105, "loss": 0.6223, "lr": 4.997941810578843e-06, "epoch": 0.5582229050636963, "percentage": 11.16, "elapsed_time": "0:23:22", "remaining_time": "3:05:58", "throughput": 19586.15, "total_tokens": 27467520}
|
|
{"current_steps": 8725, "total_steps": 78105, "loss": 0.3775, "lr": 4.9979190840383065e-06, "epoch": 0.5585429870046732, "percentage": 11.17, "elapsed_time": "0:23:23", "remaining_time": "3:05:56", "throughput": 19587.78, "total_tokens": 27482816}
|
|
{"current_steps": 8730, "total_steps": 78105, "loss": 0.4589, "lr": 4.997896232764702e-06, "epoch": 0.5588630689456501, "percentage": 11.18, "elapsed_time": "0:23:23", "remaining_time": "3:05:54", "throughput": 19589.27, "total_tokens": 27497600}
|
|
{"current_steps": 8735, "total_steps": 78105, "loss": 0.4818, "lr": 4.997873256759171e-06, "epoch": 0.559183150886627, "percentage": 11.18, "elapsed_time": "0:23:24", "remaining_time": "3:05:53", "throughput": 19590.95, "total_tokens": 27513344}
|
|
{"current_steps": 8740, "total_steps": 78105, "loss": 0.4708, "lr": 4.997850156022859e-06, "epoch": 0.5595032328276038, "percentage": 11.19, "elapsed_time": "0:23:25", "remaining_time": "3:05:51", "throughput": 19593.09, "total_tokens": 27530304}
|
|
{"current_steps": 8745, "total_steps": 78105, "loss": 0.5141, "lr": 4.997826930556922e-06, "epoch": 0.5598233147685807, "percentage": 11.2, "elapsed_time": "0:23:25", "remaining_time": "3:05:49", "throughput": 19594.66, "total_tokens": 27545664}
|
|
{"current_steps": 8750, "total_steps": 78105, "loss": 0.3639, "lr": 4.997803580362519e-06, "epoch": 0.5601433967095576, "percentage": 11.2, "elapsed_time": "0:23:26", "remaining_time": "3:05:47", "throughput": 19596.23, "total_tokens": 27560704}
|
|
{"current_steps": 8755, "total_steps": 78105, "loss": 0.6498, "lr": 4.997780105440816e-06, "epoch": 0.5604634786505346, "percentage": 11.21, "elapsed_time": "0:23:27", "remaining_time": "3:05:46", "throughput": 19598.17, "total_tokens": 27577472}
|
|
{"current_steps": 8760, "total_steps": 78105, "loss": 0.4584, "lr": 4.9977565057929845e-06, "epoch": 0.5607835605915115, "percentage": 11.22, "elapsed_time": "0:23:27", "remaining_time": "3:05:44", "throughput": 19599.89, "total_tokens": 27592960}
|
|
{"current_steps": 8765, "total_steps": 78105, "loss": 0.4441, "lr": 4.9977327814202036e-06, "epoch": 0.5611036425324883, "percentage": 11.22, "elapsed_time": "0:23:28", "remaining_time": "3:05:42", "throughput": 19602.14, "total_tokens": 27610304}
|
|
{"current_steps": 8770, "total_steps": 78105, "loss": 0.7925, "lr": 4.9977089323236575e-06, "epoch": 0.5614237244734652, "percentage": 11.23, "elapsed_time": "0:23:29", "remaining_time": "3:05:41", "throughput": 19604.59, "total_tokens": 27628288}
|
|
{"current_steps": 8775, "total_steps": 78105, "loss": 0.5821, "lr": 4.997684958504537e-06, "epoch": 0.5617438064144421, "percentage": 11.23, "elapsed_time": "0:23:29", "remaining_time": "3:05:39", "throughput": 19606.33, "total_tokens": 27643904}
|
|
{"current_steps": 8780, "total_steps": 78105, "loss": 0.5352, "lr": 4.99766085996404e-06, "epoch": 0.562063888355419, "percentage": 11.24, "elapsed_time": "0:23:30", "remaining_time": "3:05:38", "throughput": 19608.22, "total_tokens": 27660224}
|
|
{"current_steps": 8785, "total_steps": 78105, "loss": 0.4474, "lr": 4.997636636703371e-06, "epoch": 0.5623839702963959, "percentage": 11.25, "elapsed_time": "0:23:31", "remaining_time": "3:05:36", "throughput": 19609.77, "total_tokens": 27675136}
|
|
{"current_steps": 8790, "total_steps": 78105, "loss": 0.4169, "lr": 4.997612288723736e-06, "epoch": 0.5627040522373727, "percentage": 11.25, "elapsed_time": "0:23:31", "remaining_time": "3:05:34", "throughput": 19611.32, "total_tokens": 27690176}
|
|
{"current_steps": 8795, "total_steps": 78105, "loss": 0.5149, "lr": 4.997587816026355e-06, "epoch": 0.5630241341783496, "percentage": 11.26, "elapsed_time": "0:23:32", "remaining_time": "3:05:32", "throughput": 19612.67, "total_tokens": 27704512}
|
|
{"current_steps": 8800, "total_steps": 78105, "loss": 0.6121, "lr": 4.997563218612446e-06, "epoch": 0.5633442161193265, "percentage": 11.27, "elapsed_time": "0:23:33", "remaining_time": "3:05:30", "throughput": 19614.48, "total_tokens": 27720448}
|
|
{"current_steps": 8805, "total_steps": 78105, "loss": 0.6136, "lr": 4.99753849648324e-06, "epoch": 0.5636642980603035, "percentage": 11.27, "elapsed_time": "0:23:33", "remaining_time": "3:05:28", "throughput": 19616.2, "total_tokens": 27735808}
|
|
{"current_steps": 8810, "total_steps": 78105, "loss": 0.4028, "lr": 4.9975136496399705e-06, "epoch": 0.5639843800012804, "percentage": 11.28, "elapsed_time": "0:23:34", "remaining_time": "3:05:26", "throughput": 19617.62, "total_tokens": 27750464}
|
|
{"current_steps": 8815, "total_steps": 78105, "loss": 0.4814, "lr": 4.997488678083879e-06, "epoch": 0.5643044619422573, "percentage": 11.29, "elapsed_time": "0:23:35", "remaining_time": "3:05:24", "throughput": 19618.83, "total_tokens": 27764352}
|
|
{"current_steps": 8820, "total_steps": 78105, "loss": 0.4174, "lr": 4.997463581816211e-06, "epoch": 0.5646245438832341, "percentage": 11.29, "elapsed_time": "0:23:35", "remaining_time": "3:05:22", "throughput": 19620.39, "total_tokens": 27779840}
|
|
{"current_steps": 8825, "total_steps": 78105, "loss": 0.3541, "lr": 4.997438360838221e-06, "epoch": 0.564944625824211, "percentage": 11.3, "elapsed_time": "0:23:36", "remaining_time": "3:05:20", "throughput": 19622.34, "total_tokens": 27796352}
|
|
{"current_steps": 8830, "total_steps": 78105, "loss": 0.5327, "lr": 4.9974130151511675e-06, "epoch": 0.5652647077651879, "percentage": 11.31, "elapsed_time": "0:23:37", "remaining_time": "3:05:18", "throughput": 19623.66, "total_tokens": 27810624}
|
|
{"current_steps": 8835, "total_steps": 78105, "loss": 0.4414, "lr": 4.997387544756317e-06, "epoch": 0.5655847897061648, "percentage": 11.31, "elapsed_time": "0:23:37", "remaining_time": "3:05:16", "throughput": 19625.09, "total_tokens": 27825152}
|
|
{"current_steps": 8840, "total_steps": 78105, "loss": 0.6232, "lr": 4.997361949654941e-06, "epoch": 0.5659048716471416, "percentage": 11.32, "elapsed_time": "0:23:38", "remaining_time": "3:05:14", "throughput": 19626.74, "total_tokens": 27840384}
|
|
{"current_steps": 8845, "total_steps": 78105, "loss": 0.4049, "lr": 4.9973362298483175e-06, "epoch": 0.5662249535881185, "percentage": 11.32, "elapsed_time": "0:23:39", "remaining_time": "3:05:12", "throughput": 19628.55, "total_tokens": 27856640}
|
|
{"current_steps": 8850, "total_steps": 78105, "loss": 0.3466, "lr": 4.99731038533773e-06, "epoch": 0.5665450355290954, "percentage": 11.33, "elapsed_time": "0:23:39", "remaining_time": "3:05:11", "throughput": 19630.38, "total_tokens": 27872768}
|
|
{"current_steps": 8855, "total_steps": 78105, "loss": 0.597, "lr": 4.997284416124471e-06, "epoch": 0.5668651174700723, "percentage": 11.34, "elapsed_time": "0:23:40", "remaining_time": "3:05:09", "throughput": 19631.76, "total_tokens": 27887488}
|
|
{"current_steps": 8860, "total_steps": 78105, "loss": 0.4015, "lr": 4.997258322209836e-06, "epoch": 0.5671851994110493, "percentage": 11.34, "elapsed_time": "0:23:41", "remaining_time": "3:05:07", "throughput": 19633.25, "total_tokens": 27902720}
|
|
{"current_steps": 8865, "total_steps": 78105, "loss": 0.4278, "lr": 4.997232103595127e-06, "epoch": 0.5675052813520262, "percentage": 11.35, "elapsed_time": "0:23:41", "remaining_time": "3:05:05", "throughput": 19634.87, "total_tokens": 27918080}
|
|
{"current_steps": 8870, "total_steps": 78105, "loss": 0.5455, "lr": 4.997205760281656e-06, "epoch": 0.567825363293003, "percentage": 11.36, "elapsed_time": "0:23:42", "remaining_time": "3:05:03", "throughput": 19636.7, "total_tokens": 27933760}
|
|
{"current_steps": 8875, "total_steps": 78105, "loss": 0.5835, "lr": 4.997179292270736e-06, "epoch": 0.5681454452339799, "percentage": 11.36, "elapsed_time": "0:23:43", "remaining_time": "3:05:01", "throughput": 19638.22, "total_tokens": 27948992}
|
|
{"current_steps": 8880, "total_steps": 78105, "loss": 0.5552, "lr": 4.997152699563689e-06, "epoch": 0.5684655271749568, "percentage": 11.37, "elapsed_time": "0:23:43", "remaining_time": "3:05:00", "throughput": 19640.46, "total_tokens": 27966848}
|
|
{"current_steps": 8885, "total_steps": 78105, "loss": 0.4773, "lr": 4.997125982161845e-06, "epoch": 0.5687856091159337, "percentage": 11.38, "elapsed_time": "0:23:44", "remaining_time": "3:04:58", "throughput": 19641.83, "total_tokens": 27981696}
|
|
{"current_steps": 8890, "total_steps": 78105, "loss": 0.4981, "lr": 4.997099140066535e-06, "epoch": 0.5691056910569106, "percentage": 11.38, "elapsed_time": "0:23:45", "remaining_time": "3:04:56", "throughput": 19643.5, "total_tokens": 27997376}
|
|
{"current_steps": 8895, "total_steps": 78105, "loss": 0.4624, "lr": 4.9970721732791005e-06, "epoch": 0.5694257729978874, "percentage": 11.39, "elapsed_time": "0:23:45", "remaining_time": "3:04:55", "throughput": 19645.46, "total_tokens": 28014016}
|
|
{"current_steps": 8900, "total_steps": 78105, "loss": 0.5496, "lr": 4.99704508180089e-06, "epoch": 0.5697458549388643, "percentage": 11.39, "elapsed_time": "0:23:46", "remaining_time": "3:04:53", "throughput": 19647.3, "total_tokens": 28030400}
|
|
{"current_steps": 8905, "total_steps": 78105, "loss": 0.456, "lr": 4.997017865633253e-06, "epoch": 0.5700659368798412, "percentage": 11.4, "elapsed_time": "0:23:47", "remaining_time": "3:04:52", "throughput": 19649.39, "total_tokens": 28047104}
|
|
{"current_steps": 8910, "total_steps": 78105, "loss": 0.5433, "lr": 4.996990524777552e-06, "epoch": 0.5703860188208181, "percentage": 11.41, "elapsed_time": "0:23:48", "remaining_time": "3:04:50", "throughput": 19651.24, "total_tokens": 28063168}
|
|
{"current_steps": 8915, "total_steps": 78105, "loss": 0.593, "lr": 4.99696305923515e-06, "epoch": 0.5707061007617951, "percentage": 11.41, "elapsed_time": "0:23:48", "remaining_time": "3:04:48", "throughput": 19652.93, "total_tokens": 28079104}
|
|
{"current_steps": 8920, "total_steps": 78105, "loss": 0.5787, "lr": 4.996935469007418e-06, "epoch": 0.571026182702772, "percentage": 11.42, "elapsed_time": "0:23:49", "remaining_time": "3:04:46", "throughput": 19654.62, "total_tokens": 28094720}
|
|
{"current_steps": 8925, "total_steps": 78105, "loss": 0.4108, "lr": 4.996907754095735e-06, "epoch": 0.5713462646437488, "percentage": 11.43, "elapsed_time": "0:23:50", "remaining_time": "3:04:45", "throughput": 19656.42, "total_tokens": 28110848}
|
|
{"current_steps": 8930, "total_steps": 78105, "loss": 0.5035, "lr": 4.996879914501486e-06, "epoch": 0.5716663465847257, "percentage": 11.43, "elapsed_time": "0:23:50", "remaining_time": "3:04:43", "throughput": 19658.24, "total_tokens": 28127040}
|
|
{"current_steps": 8935, "total_steps": 78105, "loss": 0.4915, "lr": 4.996851950226059e-06, "epoch": 0.5719864285257026, "percentage": 11.44, "elapsed_time": "0:23:51", "remaining_time": "3:04:41", "throughput": 19660.18, "total_tokens": 28143424}
|
|
{"current_steps": 8940, "total_steps": 78105, "loss": 0.7354, "lr": 4.996823861270851e-06, "epoch": 0.5723065104666795, "percentage": 11.45, "elapsed_time": "0:23:52", "remaining_time": "3:04:39", "throughput": 19661.69, "total_tokens": 28158528}
|
|
{"current_steps": 8945, "total_steps": 78105, "loss": 0.5087, "lr": 4.9967956476372645e-06, "epoch": 0.5726265924076563, "percentage": 11.45, "elapsed_time": "0:23:52", "remaining_time": "3:04:38", "throughput": 19663.26, "total_tokens": 28173888}
|
|
{"current_steps": 8950, "total_steps": 78105, "loss": 0.5592, "lr": 4.99676730932671e-06, "epoch": 0.5729466743486332, "percentage": 11.46, "elapsed_time": "0:23:53", "remaining_time": "3:04:36", "throughput": 19665.53, "total_tokens": 28191872}
|
|
{"current_steps": 8955, "total_steps": 78105, "loss": 0.6421, "lr": 4.996738846340601e-06, "epoch": 0.5732667562896101, "percentage": 11.47, "elapsed_time": "0:23:54", "remaining_time": "3:04:34", "throughput": 19667.06, "total_tokens": 28206912}
|
|
{"current_steps": 8960, "total_steps": 78105, "loss": 0.618, "lr": 4.996710258680358e-06, "epoch": 0.573586838230587, "percentage": 11.47, "elapsed_time": "0:23:54", "remaining_time": "3:04:33", "throughput": 19668.7, "total_tokens": 28222336}
|
|
{"current_steps": 8965, "total_steps": 78105, "loss": 0.4461, "lr": 4.99668154634741e-06, "epoch": 0.573906920171564, "percentage": 11.48, "elapsed_time": "0:23:55", "remaining_time": "3:04:31", "throughput": 19670.35, "total_tokens": 28238144}
|
|
{"current_steps": 8970, "total_steps": 78105, "loss": 0.3794, "lr": 4.996652709343191e-06, "epoch": 0.5742270021125409, "percentage": 11.48, "elapsed_time": "0:23:56", "remaining_time": "3:04:29", "throughput": 19671.87, "total_tokens": 28253376}
|
|
{"current_steps": 8975, "total_steps": 78105, "loss": 0.5317, "lr": 4.9966237476691395e-06, "epoch": 0.5745470840535177, "percentage": 11.49, "elapsed_time": "0:23:56", "remaining_time": "3:04:27", "throughput": 19673.69, "total_tokens": 28269248}
|
|
{"current_steps": 8980, "total_steps": 78105, "loss": 0.4716, "lr": 4.996594661326702e-06, "epoch": 0.5748671659944946, "percentage": 11.5, "elapsed_time": "0:23:57", "remaining_time": "3:04:26", "throughput": 19675.53, "total_tokens": 28285440}
|
|
{"current_steps": 8985, "total_steps": 78105, "loss": 0.3745, "lr": 4.996565450317333e-06, "epoch": 0.5751872479354715, "percentage": 11.5, "elapsed_time": "0:23:58", "remaining_time": "3:04:24", "throughput": 19677.04, "total_tokens": 28300736}
|
|
{"current_steps": 8990, "total_steps": 78105, "loss": 0.4753, "lr": 4.996536114642489e-06, "epoch": 0.5755073298764484, "percentage": 11.51, "elapsed_time": "0:23:58", "remaining_time": "3:04:22", "throughput": 19678.51, "total_tokens": 28315712}
|
|
{"current_steps": 8995, "total_steps": 78105, "loss": 0.5826, "lr": 4.996506654303634e-06, "epoch": 0.5758274118174252, "percentage": 11.52, "elapsed_time": "0:23:59", "remaining_time": "3:04:20", "throughput": 19680.27, "total_tokens": 28332032}
|
|
{"current_steps": 9000, "total_steps": 78105, "loss": 0.5248, "lr": 4.996477069302243e-06, "epoch": 0.5761474937584021, "percentage": 11.52, "elapsed_time": "0:24:00", "remaining_time": "3:04:19", "throughput": 19681.97, "total_tokens": 28347968}
|
|
{"current_steps": 9005, "total_steps": 78105, "loss": 0.4764, "lr": 4.996447359639789e-06, "epoch": 0.576467575699379, "percentage": 11.53, "elapsed_time": "0:24:00", "remaining_time": "3:04:17", "throughput": 19683.44, "total_tokens": 28362944}
|
|
{"current_steps": 9010, "total_steps": 78105, "loss": 0.4941, "lr": 4.996417525317757e-06, "epoch": 0.5767876576403559, "percentage": 11.54, "elapsed_time": "0:24:01", "remaining_time": "3:04:15", "throughput": 19685.11, "total_tokens": 28378624}
|
|
{"current_steps": 9015, "total_steps": 78105, "loss": 0.5076, "lr": 4.9963875663376385e-06, "epoch": 0.5771077395813328, "percentage": 11.54, "elapsed_time": "0:24:02", "remaining_time": "3:04:13", "throughput": 19686.67, "total_tokens": 28394048}
|
|
{"current_steps": 9020, "total_steps": 78105, "loss": 0.537, "lr": 4.996357482700927e-06, "epoch": 0.5774278215223098, "percentage": 11.55, "elapsed_time": "0:24:02", "remaining_time": "3:04:11", "throughput": 19688.21, "total_tokens": 28409664}
|
|
{"current_steps": 9025, "total_steps": 78105, "loss": 0.4628, "lr": 4.9963272744091275e-06, "epoch": 0.5777479034632866, "percentage": 11.55, "elapsed_time": "0:24:03", "remaining_time": "3:04:10", "throughput": 19689.66, "total_tokens": 28424768}
|
|
{"current_steps": 9030, "total_steps": 78105, "loss": 0.4314, "lr": 4.996296941463745e-06, "epoch": 0.5780679854042635, "percentage": 11.56, "elapsed_time": "0:24:04", "remaining_time": "3:04:08", "throughput": 19690.97, "total_tokens": 28439424}
|
|
{"current_steps": 9035, "total_steps": 78105, "loss": 0.6351, "lr": 4.9962664838662974e-06, "epoch": 0.5783880673452404, "percentage": 11.57, "elapsed_time": "0:24:04", "remaining_time": "3:04:06", "throughput": 19692.92, "total_tokens": 28456064}
|
|
{"current_steps": 9040, "total_steps": 78105, "loss": 0.5251, "lr": 4.996235901618304e-06, "epoch": 0.5787081492862173, "percentage": 11.57, "elapsed_time": "0:24:05", "remaining_time": "3:04:05", "throughput": 19694.85, "total_tokens": 28472768}
|
|
{"current_steps": 9045, "total_steps": 78105, "loss": 0.4512, "lr": 4.996205194721291e-06, "epoch": 0.5790282312271942, "percentage": 11.58, "elapsed_time": "0:24:06", "remaining_time": "3:04:03", "throughput": 19696.47, "total_tokens": 28488704}
|
|
{"current_steps": 9050, "total_steps": 78105, "loss": 0.5944, "lr": 4.9961743631767935e-06, "epoch": 0.579348313168171, "percentage": 11.59, "elapsed_time": "0:24:07", "remaining_time": "3:04:01", "throughput": 19697.91, "total_tokens": 28503808}
|
|
{"current_steps": 9055, "total_steps": 78105, "loss": 0.5982, "lr": 4.99614340698635e-06, "epoch": 0.5796683951091479, "percentage": 11.59, "elapsed_time": "0:24:07", "remaining_time": "3:03:59", "throughput": 19699.78, "total_tokens": 28520320}
|
|
{"current_steps": 9060, "total_steps": 78105, "loss": 0.5961, "lr": 4.9961123261515074e-06, "epoch": 0.5799884770501248, "percentage": 11.6, "elapsed_time": "0:24:08", "remaining_time": "3:03:58", "throughput": 19701.35, "total_tokens": 28535680}
|
|
{"current_steps": 9065, "total_steps": 78105, "loss": 0.4588, "lr": 4.996081120673817e-06, "epoch": 0.5803085589911017, "percentage": 11.61, "elapsed_time": "0:24:09", "remaining_time": "3:03:56", "throughput": 19702.96, "total_tokens": 28551488}
|
|
{"current_steps": 9070, "total_steps": 78105, "loss": 0.5326, "lr": 4.996049790554837e-06, "epoch": 0.5806286409320787, "percentage": 11.61, "elapsed_time": "0:24:09", "remaining_time": "3:03:54", "throughput": 19704.57, "total_tokens": 28567040}
|
|
{"current_steps": 9075, "total_steps": 78105, "loss": 0.4566, "lr": 4.996018335796131e-06, "epoch": 0.5809487228730555, "percentage": 11.62, "elapsed_time": "0:24:10", "remaining_time": "3:03:52", "throughput": 19705.75, "total_tokens": 28581568}
|
|
{"current_steps": 9080, "total_steps": 78105, "loss": 0.5191, "lr": 4.995986756399272e-06, "epoch": 0.5812688048140324, "percentage": 11.63, "elapsed_time": "0:24:11", "remaining_time": "3:03:50", "throughput": 19707.4, "total_tokens": 28596992}
|
|
{"current_steps": 9085, "total_steps": 78105, "loss": 0.7651, "lr": 4.995955052365834e-06, "epoch": 0.5815888867550093, "percentage": 11.63, "elapsed_time": "0:24:11", "remaining_time": "3:03:49", "throughput": 19709.43, "total_tokens": 28613952}
|
|
{"current_steps": 9090, "total_steps": 78105, "loss": 0.6473, "lr": 4.995923223697403e-06, "epoch": 0.5819089686959862, "percentage": 11.64, "elapsed_time": "0:24:12", "remaining_time": "3:03:47", "throughput": 19710.84, "total_tokens": 28628736}
|
|
{"current_steps": 9095, "total_steps": 78105, "loss": 0.5631, "lr": 4.995891270395566e-06, "epoch": 0.5822290506369631, "percentage": 11.64, "elapsed_time": "0:24:13", "remaining_time": "3:03:45", "throughput": 19712.56, "total_tokens": 28644928}
|
|
{"current_steps": 9100, "total_steps": 78105, "loss": 0.4256, "lr": 4.99585919246192e-06, "epoch": 0.5825491325779399, "percentage": 11.65, "elapsed_time": "0:24:13", "remaining_time": "3:03:44", "throughput": 19714.28, "total_tokens": 28661184}
|
|
{"current_steps": 9105, "total_steps": 78105, "loss": 0.3767, "lr": 4.995826989898066e-06, "epoch": 0.5828692145189168, "percentage": 11.66, "elapsed_time": "0:24:14", "remaining_time": "3:03:42", "throughput": 19715.64, "total_tokens": 28675776}
|
|
{"current_steps": 9110, "total_steps": 78105, "loss": 0.49, "lr": 4.995794662705613e-06, "epoch": 0.5831892964598937, "percentage": 11.66, "elapsed_time": "0:24:15", "remaining_time": "3:03:40", "throughput": 19717.38, "total_tokens": 28692160}
|
|
{"current_steps": 9115, "total_steps": 78105, "loss": 0.361, "lr": 4.995762210886175e-06, "epoch": 0.5835093784008706, "percentage": 11.67, "elapsed_time": "0:24:15", "remaining_time": "3:03:38", "throughput": 19718.81, "total_tokens": 28707328}
|
|
{"current_steps": 9120, "total_steps": 78105, "loss": 0.4557, "lr": 4.995729634441371e-06, "epoch": 0.5838294603418475, "percentage": 11.68, "elapsed_time": "0:24:16", "remaining_time": "3:03:37", "throughput": 19720.28, "total_tokens": 28722624}
|
|
{"current_steps": 9125, "total_steps": 78105, "loss": 0.4721, "lr": 4.995696933372829e-06, "epoch": 0.5841495422828245, "percentage": 11.68, "elapsed_time": "0:24:17", "remaining_time": "3:03:35", "throughput": 19721.72, "total_tokens": 28737856}
|
|
{"current_steps": 9130, "total_steps": 78105, "loss": 0.5019, "lr": 4.995664107682182e-06, "epoch": 0.5844696242238013, "percentage": 11.69, "elapsed_time": "0:24:17", "remaining_time": "3:03:33", "throughput": 19723.09, "total_tokens": 28752448}
|
|
{"current_steps": 9135, "total_steps": 78105, "loss": 0.4909, "lr": 4.995631157371069e-06, "epoch": 0.5847897061647782, "percentage": 11.7, "elapsed_time": "0:24:18", "remaining_time": "3:03:31", "throughput": 19724.6, "total_tokens": 28767616}
|
|
{"current_steps": 9140, "total_steps": 78105, "loss": 0.5935, "lr": 4.995598082441136e-06, "epoch": 0.5851097881057551, "percentage": 11.7, "elapsed_time": "0:24:19", "remaining_time": "3:03:29", "throughput": 19725.92, "total_tokens": 28782400}
|
|
{"current_steps": 9145, "total_steps": 78105, "loss": 0.6664, "lr": 4.995564882894033e-06, "epoch": 0.585429870046732, "percentage": 11.71, "elapsed_time": "0:24:19", "remaining_time": "3:03:27", "throughput": 19727.57, "total_tokens": 28798336}
|
|
{"current_steps": 9150, "total_steps": 78105, "loss": 0.4209, "lr": 4.995531558731419e-06, "epoch": 0.5857499519877089, "percentage": 11.71, "elapsed_time": "0:24:20", "remaining_time": "3:03:26", "throughput": 19729.26, "total_tokens": 28814336}
|
|
{"current_steps": 9155, "total_steps": 78105, "loss": 0.7192, "lr": 4.995498109954957e-06, "epoch": 0.5860700339286857, "percentage": 11.72, "elapsed_time": "0:24:21", "remaining_time": "3:03:24", "throughput": 19731.14, "total_tokens": 28830656}
|
|
{"current_steps": 9160, "total_steps": 78105, "loss": 0.6507, "lr": 4.995464536566319e-06, "epoch": 0.5863901158696626, "percentage": 11.73, "elapsed_time": "0:24:21", "remaining_time": "3:03:23", "throughput": 19732.76, "total_tokens": 28846656}
|
|
{"current_steps": 9165, "total_steps": 78105, "loss": 0.5404, "lr": 4.99543083856718e-06, "epoch": 0.5867101978106395, "percentage": 11.73, "elapsed_time": "0:24:22", "remaining_time": "3:03:21", "throughput": 19734.56, "total_tokens": 28862848}
|
|
{"current_steps": 9170, "total_steps": 78105, "loss": 0.5889, "lr": 4.995397015959223e-06, "epoch": 0.5870302797516164, "percentage": 11.74, "elapsed_time": "0:24:23", "remaining_time": "3:03:19", "throughput": 19736.12, "total_tokens": 28878400}
|
|
{"current_steps": 9175, "total_steps": 78105, "loss": 0.7242, "lr": 4.995363068744137e-06, "epoch": 0.5873503616925934, "percentage": 11.75, "elapsed_time": "0:24:23", "remaining_time": "3:03:17", "throughput": 19737.56, "total_tokens": 28893568}
|
|
{"current_steps": 9180, "total_steps": 78105, "loss": 0.6035, "lr": 4.9953289969236174e-06, "epoch": 0.5876704436335702, "percentage": 11.75, "elapsed_time": "0:24:24", "remaining_time": "3:03:16", "throughput": 19739.33, "total_tokens": 28909824}
|
|
{"current_steps": 9185, "total_steps": 78105, "loss": 0.4631, "lr": 4.995294800499366e-06, "epoch": 0.5879905255745471, "percentage": 11.76, "elapsed_time": "0:24:25", "remaining_time": "3:03:14", "throughput": 19740.94, "total_tokens": 28925568}
|
|
{"current_steps": 9190, "total_steps": 78105, "loss": 0.4697, "lr": 4.995260479473089e-06, "epoch": 0.588310607515524, "percentage": 11.77, "elapsed_time": "0:24:25", "remaining_time": "3:03:12", "throughput": 19742.44, "total_tokens": 28941056}
|
|
{"current_steps": 9195, "total_steps": 78105, "loss": 0.5873, "lr": 4.995226033846501e-06, "epoch": 0.5886306894565009, "percentage": 11.77, "elapsed_time": "0:24:26", "remaining_time": "3:03:10", "throughput": 19743.63, "total_tokens": 28955456}
|
|
{"current_steps": 9200, "total_steps": 78105, "loss": 0.4355, "lr": 4.9951914636213225e-06, "epoch": 0.5889507713974778, "percentage": 11.78, "elapsed_time": "0:24:27", "remaining_time": "3:03:09", "throughput": 19745.16, "total_tokens": 28971136}
|
|
{"current_steps": 9205, "total_steps": 78105, "loss": 0.5336, "lr": 4.995156768799279e-06, "epoch": 0.5892708533384546, "percentage": 11.79, "elapsed_time": "0:24:27", "remaining_time": "3:03:07", "throughput": 19747.17, "total_tokens": 28988480}
|
|
{"current_steps": 9210, "total_steps": 78105, "loss": 0.4822, "lr": 4.995121949382103e-06, "epoch": 0.5895909352794315, "percentage": 11.79, "elapsed_time": "0:24:28", "remaining_time": "3:03:06", "throughput": 19748.84, "total_tokens": 29004544}
|
|
{"current_steps": 9215, "total_steps": 78105, "loss": 0.5036, "lr": 4.995087005371534e-06, "epoch": 0.5899110172204084, "percentage": 11.8, "elapsed_time": "0:24:29", "remaining_time": "3:03:04", "throughput": 19750.17, "total_tokens": 29019328}
|
|
{"current_steps": 9220, "total_steps": 78105, "loss": 0.5889, "lr": 4.995051936769316e-06, "epoch": 0.5902310991613853, "percentage": 11.8, "elapsed_time": "0:24:29", "remaining_time": "3:03:02", "throughput": 19751.67, "total_tokens": 29034752}
|
|
{"current_steps": 9225, "total_steps": 78105, "loss": 0.6516, "lr": 4.9950167435772e-06, "epoch": 0.5905511811023622, "percentage": 11.81, "elapsed_time": "0:24:30", "remaining_time": "3:03:01", "throughput": 19753.26, "total_tokens": 29050816}
|
|
{"current_steps": 9230, "total_steps": 78105, "loss": 0.3739, "lr": 4.994981425796945e-06, "epoch": 0.5908712630433391, "percentage": 11.82, "elapsed_time": "0:24:31", "remaining_time": "3:02:59", "throughput": 19754.68, "total_tokens": 29065920}
|
|
{"current_steps": 9235, "total_steps": 78105, "loss": 0.6625, "lr": 4.994945983430313e-06, "epoch": 0.591191344984316, "percentage": 11.82, "elapsed_time": "0:24:31", "remaining_time": "3:02:57", "throughput": 19756.17, "total_tokens": 29080960}
|
|
{"current_steps": 9240, "total_steps": 78105, "loss": 0.611, "lr": 4.994910416479074e-06, "epoch": 0.5915114269252929, "percentage": 11.83, "elapsed_time": "0:24:32", "remaining_time": "3:02:55", "throughput": 19757.79, "total_tokens": 29096768}
|
|
{"current_steps": 9245, "total_steps": 78105, "loss": 0.6683, "lr": 4.994874724945005e-06, "epoch": 0.5918315088662698, "percentage": 11.84, "elapsed_time": "0:24:33", "remaining_time": "3:02:54", "throughput": 19759.4, "total_tokens": 29112448}
|
|
{"current_steps": 9250, "total_steps": 78105, "loss": 0.6542, "lr": 4.994838908829887e-06, "epoch": 0.5921515908072467, "percentage": 11.84, "elapsed_time": "0:24:34", "remaining_time": "3:02:52", "throughput": 19760.83, "total_tokens": 29127808}
|
|
{"current_steps": 9255, "total_steps": 78105, "loss": 0.4267, "lr": 4.994802968135509e-06, "epoch": 0.5924716727482235, "percentage": 11.85, "elapsed_time": "0:24:34", "remaining_time": "3:02:50", "throughput": 19762.21, "total_tokens": 29142912}
|
|
{"current_steps": 9260, "total_steps": 78105, "loss": 0.4546, "lr": 4.994766902863666e-06, "epoch": 0.5927917546892004, "percentage": 11.86, "elapsed_time": "0:24:35", "remaining_time": "3:02:48", "throughput": 19763.83, "total_tokens": 29159168}
|
|
{"current_steps": 9265, "total_steps": 78105, "loss": 0.5397, "lr": 4.9947307130161586e-06, "epoch": 0.5931118366301773, "percentage": 11.86, "elapsed_time": "0:24:36", "remaining_time": "3:02:47", "throughput": 19765.58, "total_tokens": 29175488}
|
|
{"current_steps": 9270, "total_steps": 78105, "loss": 0.5568, "lr": 4.994694398594794e-06, "epoch": 0.5934319185711542, "percentage": 11.87, "elapsed_time": "0:24:36", "remaining_time": "3:02:45", "throughput": 19767.11, "total_tokens": 29191104}
|
|
{"current_steps": 9275, "total_steps": 78105, "loss": 0.5317, "lr": 4.994657959601385e-06, "epoch": 0.5937520005121311, "percentage": 11.88, "elapsed_time": "0:24:37", "remaining_time": "3:02:43", "throughput": 19768.51, "total_tokens": 29206208}
|
|
{"current_steps": 9280, "total_steps": 78105, "loss": 0.5157, "lr": 4.994621396037752e-06, "epoch": 0.5940720824531079, "percentage": 11.88, "elapsed_time": "0:24:38", "remaining_time": "3:02:42", "throughput": 19769.84, "total_tokens": 29221312}
|
|
{"current_steps": 9285, "total_steps": 78105, "loss": 0.5074, "lr": 4.994584707905721e-06, "epoch": 0.5943921643940849, "percentage": 11.89, "elapsed_time": "0:24:38", "remaining_time": "3:02:40", "throughput": 19771.35, "total_tokens": 29236800}
|
|
{"current_steps": 9290, "total_steps": 78105, "loss": 0.6807, "lr": 4.994547895207123e-06, "epoch": 0.5947122463350618, "percentage": 11.89, "elapsed_time": "0:24:39", "remaining_time": "3:02:38", "throughput": 19772.98, "total_tokens": 29252480}
|
|
{"current_steps": 9295, "total_steps": 78105, "loss": 0.6824, "lr": 4.994510957943797e-06, "epoch": 0.5950323282760387, "percentage": 11.9, "elapsed_time": "0:24:40", "remaining_time": "3:02:36", "throughput": 19774.25, "total_tokens": 29266944}
|
|
{"current_steps": 9300, "total_steps": 78105, "loss": 0.501, "lr": 4.994473896117587e-06, "epoch": 0.5953524102170156, "percentage": 11.91, "elapsed_time": "0:24:40", "remaining_time": "3:02:35", "throughput": 19775.77, "total_tokens": 29282816}
|
|
{"current_steps": 9305, "total_steps": 78105, "loss": 0.3954, "lr": 4.994436709730344e-06, "epoch": 0.5956724921579925, "percentage": 11.91, "elapsed_time": "0:24:41", "remaining_time": "3:02:33", "throughput": 19777.14, "total_tokens": 29297984}
|
|
{"current_steps": 9310, "total_steps": 78105, "loss": 0.4635, "lr": 4.9943993987839245e-06, "epoch": 0.5959925740989693, "percentage": 11.92, "elapsed_time": "0:24:42", "remaining_time": "3:02:31", "throughput": 19778.76, "total_tokens": 29313664}
|
|
{"current_steps": 9315, "total_steps": 78105, "loss": 0.5479, "lr": 4.994361963280192e-06, "epoch": 0.5963126560399462, "percentage": 11.93, "elapsed_time": "0:24:42", "remaining_time": "3:02:29", "throughput": 19780.38, "total_tokens": 29329472}
|
|
{"current_steps": 9320, "total_steps": 78105, "loss": 0.5641, "lr": 4.994324403221017e-06, "epoch": 0.5966327379809231, "percentage": 11.93, "elapsed_time": "0:24:43", "remaining_time": "3:02:28", "throughput": 19782.28, "total_tokens": 29346048}
|
|
{"current_steps": 9325, "total_steps": 78105, "loss": 0.4047, "lr": 4.994286718608272e-06, "epoch": 0.5969528199219, "percentage": 11.94, "elapsed_time": "0:24:44", "remaining_time": "3:02:26", "throughput": 19783.73, "total_tokens": 29361664}
|
|
{"current_steps": 9330, "total_steps": 78105, "loss": 0.5308, "lr": 4.994248909443841e-06, "epoch": 0.5972729018628768, "percentage": 11.95, "elapsed_time": "0:24:44", "remaining_time": "3:02:25", "throughput": 19785.07, "total_tokens": 29376896}
|
|
{"current_steps": 9335, "total_steps": 78105, "loss": 0.54, "lr": 4.994210975729611e-06, "epoch": 0.5975929838038538, "percentage": 11.95, "elapsed_time": "0:24:45", "remaining_time": "3:02:23", "throughput": 19786.57, "total_tokens": 29392640}
|
|
{"current_steps": 9340, "total_steps": 78105, "loss": 0.5542, "lr": 4.994172917467478e-06, "epoch": 0.5979130657448307, "percentage": 11.96, "elapsed_time": "0:24:46", "remaining_time": "3:02:21", "throughput": 19787.87, "total_tokens": 29407808}
|
|
{"current_steps": 9345, "total_steps": 78105, "loss": 0.317, "lr": 4.994134734659341e-06, "epoch": 0.5982331476858076, "percentage": 11.96, "elapsed_time": "0:24:46", "remaining_time": "3:02:19", "throughput": 19789.1, "total_tokens": 29422528}
|
|
{"current_steps": 9350, "total_steps": 78105, "loss": 0.5401, "lr": 4.994096427307105e-06, "epoch": 0.5985532296267845, "percentage": 11.97, "elapsed_time": "0:24:47", "remaining_time": "3:02:18", "throughput": 19790.62, "total_tokens": 29438272}
|
|
{"current_steps": 9355, "total_steps": 78105, "loss": 0.6292, "lr": 4.994057995412687e-06, "epoch": 0.5988733115677614, "percentage": 11.98, "elapsed_time": "0:24:48", "remaining_time": "3:02:16", "throughput": 19792.01, "total_tokens": 29453376}
|
|
{"current_steps": 9360, "total_steps": 78105, "loss": 0.5249, "lr": 4.994019438978002e-06, "epoch": 0.5991933935087382, "percentage": 11.98, "elapsed_time": "0:24:48", "remaining_time": "3:02:14", "throughput": 19793.68, "total_tokens": 29469248}
|
|
{"current_steps": 9365, "total_steps": 78105, "loss": 0.4756, "lr": 4.993980758004979e-06, "epoch": 0.5995134754497151, "percentage": 11.99, "elapsed_time": "0:24:49", "remaining_time": "3:02:13", "throughput": 19795.43, "total_tokens": 29485888}
|
|
{"current_steps": 9370, "total_steps": 78105, "loss": 0.5408, "lr": 4.993941952495546e-06, "epoch": 0.599833557390692, "percentage": 12.0, "elapsed_time": "0:24:50", "remaining_time": "3:02:11", "throughput": 19796.72, "total_tokens": 29500864}
|
|
{"current_steps": 9375, "total_steps": 78105, "loss": 0.3278, "lr": 4.993903022451643e-06, "epoch": 0.6001536393316689, "percentage": 12.0, "elapsed_time": "0:24:50", "remaining_time": "3:02:09", "throughput": 19798.22, "total_tokens": 29516288}
|
|
{"current_steps": 9380, "total_steps": 78105, "loss": 0.4373, "lr": 4.993863967875213e-06, "epoch": 0.6004737212726458, "percentage": 12.01, "elapsed_time": "0:24:51", "remaining_time": "3:02:07", "throughput": 19799.39, "total_tokens": 29530880}
|
|
{"current_steps": 9385, "total_steps": 78105, "loss": 0.4586, "lr": 4.993824788768207e-06, "epoch": 0.6007938032136226, "percentage": 12.02, "elapsed_time": "0:24:52", "remaining_time": "3:02:06", "throughput": 19801.27, "total_tokens": 29547840}
|
|
{"current_steps": 9390, "total_steps": 78105, "loss": 0.4992, "lr": 4.99378548513258e-06, "epoch": 0.6011138851545996, "percentage": 12.02, "elapsed_time": "0:24:52", "remaining_time": "3:02:04", "throughput": 19802.93, "total_tokens": 29564096}
|
|
{"current_steps": 9395, "total_steps": 78105, "loss": 0.6268, "lr": 4.993746056970297e-06, "epoch": 0.6014339670955765, "percentage": 12.03, "elapsed_time": "0:24:53", "remaining_time": "3:02:03", "throughput": 19804.36, "total_tokens": 29579520}
|
|
{"current_steps": 9400, "total_steps": 78105, "loss": 0.5077, "lr": 4.993706504283324e-06, "epoch": 0.6017540490365534, "percentage": 12.04, "elapsed_time": "0:24:54", "remaining_time": "3:02:01", "throughput": 19805.64, "total_tokens": 29594048}
|
|
{"current_steps": 9405, "total_steps": 78105, "loss": 0.4874, "lr": 4.993666827073639e-06, "epoch": 0.6020741309775303, "percentage": 12.04, "elapsed_time": "0:24:54", "remaining_time": "3:01:59", "throughput": 19807.34, "total_tokens": 29610368}
|
|
{"current_steps": 9410, "total_steps": 78105, "loss": 0.5101, "lr": 4.99362702534322e-06, "epoch": 0.6023942129185071, "percentage": 12.05, "elapsed_time": "0:24:55", "remaining_time": "3:01:58", "throughput": 19808.79, "total_tokens": 29625792}
|
|
{"current_steps": 9415, "total_steps": 78105, "loss": 0.4477, "lr": 4.993587099094057e-06, "epoch": 0.602714294859484, "percentage": 12.05, "elapsed_time": "0:24:56", "remaining_time": "3:01:56", "throughput": 19810.44, "total_tokens": 29642240}
|
|
{"current_steps": 9420, "total_steps": 78105, "loss": 0.484, "lr": 4.993547048328143e-06, "epoch": 0.6030343768004609, "percentage": 12.06, "elapsed_time": "0:24:56", "remaining_time": "3:01:55", "throughput": 19812.01, "total_tokens": 29658176}
|
|
{"current_steps": 9425, "total_steps": 78105, "loss": 0.4888, "lr": 4.9935068730474774e-06, "epoch": 0.6033544587414378, "percentage": 12.07, "elapsed_time": "0:24:57", "remaining_time": "3:01:53", "throughput": 19814.01, "total_tokens": 29675392}
|
|
{"current_steps": 9430, "total_steps": 78105, "loss": 0.6311, "lr": 4.993466573254067e-06, "epoch": 0.6036745406824147, "percentage": 12.07, "elapsed_time": "0:24:58", "remaining_time": "3:01:51", "throughput": 19815.32, "total_tokens": 29690048}
|
|
{"current_steps": 9435, "total_steps": 78105, "loss": 0.4616, "lr": 4.993426148949924e-06, "epoch": 0.6039946226233915, "percentage": 12.08, "elapsed_time": "0:24:58", "remaining_time": "3:01:49", "throughput": 19816.71, "total_tokens": 29705088}
|
|
{"current_steps": 9440, "total_steps": 78105, "loss": 0.5847, "lr": 4.993385600137066e-06, "epoch": 0.6043147045643685, "percentage": 12.09, "elapsed_time": "0:24:59", "remaining_time": "3:01:48", "throughput": 19818.18, "total_tokens": 29720576}
|
|
{"current_steps": 9445, "total_steps": 78105, "loss": 0.5055, "lr": 4.99334492681752e-06, "epoch": 0.6046347865053454, "percentage": 12.09, "elapsed_time": "0:25:00", "remaining_time": "3:01:46", "throughput": 19819.89, "total_tokens": 29736896}
|
|
{"current_steps": 9450, "total_steps": 78105, "loss": 0.4512, "lr": 4.9933041289933145e-06, "epoch": 0.6049548684463223, "percentage": 12.1, "elapsed_time": "0:25:01", "remaining_time": "3:01:45", "throughput": 19821.41, "total_tokens": 29752768}
|
|
{"current_steps": 9455, "total_steps": 78105, "loss": 0.5214, "lr": 4.993263206666489e-06, "epoch": 0.6052749503872992, "percentage": 12.11, "elapsed_time": "0:25:01", "remaining_time": "3:01:43", "throughput": 19822.97, "total_tokens": 29768640}
|
|
{"current_steps": 9460, "total_steps": 78105, "loss": 0.4837, "lr": 4.993222159839086e-06, "epoch": 0.605595032328276, "percentage": 12.11, "elapsed_time": "0:25:02", "remaining_time": "3:01:42", "throughput": 19824.9, "total_tokens": 29786240}
|
|
{"current_steps": 9465, "total_steps": 78105, "loss": 0.6349, "lr": 4.9931809885131545e-06, "epoch": 0.6059151142692529, "percentage": 12.12, "elapsed_time": "0:25:03", "remaining_time": "3:01:40", "throughput": 19826.44, "total_tokens": 29802176}
|
|
{"current_steps": 9470, "total_steps": 78105, "loss": 0.4686, "lr": 4.99313969269075e-06, "epoch": 0.6062351962102298, "percentage": 12.12, "elapsed_time": "0:25:03", "remaining_time": "3:01:39", "throughput": 19827.89, "total_tokens": 29817600}
|
|
{"current_steps": 9475, "total_steps": 78105, "loss": 0.5119, "lr": 4.993098272373937e-06, "epoch": 0.6065552781512067, "percentage": 12.13, "elapsed_time": "0:25:04", "remaining_time": "3:01:37", "throughput": 19829.34, "total_tokens": 29832960}
|
|
{"current_steps": 9480, "total_steps": 78105, "loss": 0.3367, "lr": 4.993056727564782e-06, "epoch": 0.6068753600921836, "percentage": 12.14, "elapsed_time": "0:25:05", "remaining_time": "3:01:35", "throughput": 19830.73, "total_tokens": 29848448}
|
|
{"current_steps": 9485, "total_steps": 78105, "loss": 0.4875, "lr": 4.99301505826536e-06, "epoch": 0.6071954420331604, "percentage": 12.14, "elapsed_time": "0:25:05", "remaining_time": "3:01:33", "throughput": 19832.07, "total_tokens": 29863296}
|
|
{"current_steps": 9490, "total_steps": 78105, "loss": 0.4788, "lr": 4.992973264477752e-06, "epoch": 0.6075155239741373, "percentage": 12.15, "elapsed_time": "0:25:06", "remaining_time": "3:01:32", "throughput": 19833.88, "total_tokens": 29880128}
|
|
{"current_steps": 9495, "total_steps": 78105, "loss": 0.4658, "lr": 4.9929313462040435e-06, "epoch": 0.6078356059151143, "percentage": 12.16, "elapsed_time": "0:25:07", "remaining_time": "3:01:30", "throughput": 19835.34, "total_tokens": 29895296}
|
|
{"current_steps": 9500, "total_steps": 78105, "loss": 0.5559, "lr": 4.992889303446329e-06, "epoch": 0.6081556878560912, "percentage": 12.16, "elapsed_time": "0:25:07", "remaining_time": "3:01:29", "throughput": 19836.94, "total_tokens": 29911296}
|
|
{"current_steps": 9505, "total_steps": 78105, "loss": 0.598, "lr": 4.992847136206708e-06, "epoch": 0.6084757697970681, "percentage": 12.17, "elapsed_time": "0:25:08", "remaining_time": "3:01:27", "throughput": 19838.5, "total_tokens": 29926912}
|
|
{"current_steps": 9510, "total_steps": 78105, "loss": 0.6065, "lr": 4.9928048444872854e-06, "epoch": 0.608795851738045, "percentage": 12.18, "elapsed_time": "0:25:09", "remaining_time": "3:01:25", "throughput": 19839.96, "total_tokens": 29942272}
|
|
{"current_steps": 9515, "total_steps": 78105, "loss": 0.5619, "lr": 4.992762428290174e-06, "epoch": 0.6091159336790218, "percentage": 12.18, "elapsed_time": "0:25:09", "remaining_time": "3:01:24", "throughput": 19841.35, "total_tokens": 29958016}
|
|
{"current_steps": 9520, "total_steps": 78105, "loss": 0.595, "lr": 4.992719887617491e-06, "epoch": 0.6094360156199987, "percentage": 12.19, "elapsed_time": "0:25:10", "remaining_time": "3:01:22", "throughput": 19843.12, "total_tokens": 29974720}
|
|
{"current_steps": 9525, "total_steps": 78105, "loss": 0.5034, "lr": 4.992677222471361e-06, "epoch": 0.6097560975609756, "percentage": 12.2, "elapsed_time": "0:25:11", "remaining_time": "3:01:21", "throughput": 19844.92, "total_tokens": 29991680}
|
|
{"current_steps": 9530, "total_steps": 78105, "loss": 0.3773, "lr": 4.992634432853914e-06, "epoch": 0.6100761795019525, "percentage": 12.2, "elapsed_time": "0:25:11", "remaining_time": "3:01:19", "throughput": 19846.39, "total_tokens": 30007168}
|
|
{"current_steps": 9535, "total_steps": 78105, "loss": 0.4996, "lr": 4.992591518767288e-06, "epoch": 0.6103962614429294, "percentage": 12.21, "elapsed_time": "0:25:12", "remaining_time": "3:01:18", "throughput": 19847.79, "total_tokens": 30022656}
|
|
{"current_steps": 9540, "total_steps": 78105, "loss": 0.4819, "lr": 4.992548480213624e-06, "epoch": 0.6107163433839062, "percentage": 12.21, "elapsed_time": "0:25:13", "remaining_time": "3:01:16", "throughput": 19849.26, "total_tokens": 30038144}
|
|
{"current_steps": 9545, "total_steps": 78105, "loss": 0.4784, "lr": 4.992505317195072e-06, "epoch": 0.6110364253248832, "percentage": 12.22, "elapsed_time": "0:25:14", "remaining_time": "3:01:15", "throughput": 19851.24, "total_tokens": 30055488}
|
|
{"current_steps": 9550, "total_steps": 78105, "loss": 0.4976, "lr": 4.992462029713789e-06, "epoch": 0.6113565072658601, "percentage": 12.23, "elapsed_time": "0:25:14", "remaining_time": "3:01:13", "throughput": 19852.75, "total_tokens": 30071040}
|
|
{"current_steps": 9555, "total_steps": 78105, "loss": 0.4546, "lr": 4.992418617771933e-06, "epoch": 0.611676589206837, "percentage": 12.23, "elapsed_time": "0:25:15", "remaining_time": "3:01:12", "throughput": 19854.56, "total_tokens": 30088000}
|
|
{"current_steps": 9560, "total_steps": 78105, "loss": 0.5951, "lr": 4.992375081371675e-06, "epoch": 0.6119966711478139, "percentage": 12.24, "elapsed_time": "0:25:16", "remaining_time": "3:01:10", "throughput": 19855.83, "total_tokens": 30103040}
|
|
{"current_steps": 9565, "total_steps": 78105, "loss": 0.5744, "lr": 4.992331420515187e-06, "epoch": 0.6123167530887907, "percentage": 12.25, "elapsed_time": "0:25:16", "remaining_time": "3:01:08", "throughput": 19857.15, "total_tokens": 30117760}
|
|
{"current_steps": 9570, "total_steps": 78105, "loss": 0.572, "lr": 4.992287635204651e-06, "epoch": 0.6126368350297676, "percentage": 12.25, "elapsed_time": "0:25:17", "remaining_time": "3:01:06", "throughput": 19858.79, "total_tokens": 30133888}
|
|
{"current_steps": 9575, "total_steps": 78105, "loss": 0.5694, "lr": 4.9922437254422515e-06, "epoch": 0.6129569169707445, "percentage": 12.26, "elapsed_time": "0:25:18", "remaining_time": "3:01:05", "throughput": 19860.16, "total_tokens": 30149312}
|
|
{"current_steps": 9580, "total_steps": 78105, "loss": 0.4877, "lr": 4.992199691230183e-06, "epoch": 0.6132769989117214, "percentage": 12.27, "elapsed_time": "0:25:18", "remaining_time": "3:01:03", "throughput": 19861.46, "total_tokens": 30163840}
|
|
{"current_steps": 9585, "total_steps": 78105, "loss": 0.6384, "lr": 4.992155532570641e-06, "epoch": 0.6135970808526983, "percentage": 12.27, "elapsed_time": "0:25:19", "remaining_time": "3:01:01", "throughput": 19863.28, "total_tokens": 30180544}
|
|
{"current_steps": 9590, "total_steps": 78105, "loss": 0.4069, "lr": 4.992111249465836e-06, "epoch": 0.6139171627936751, "percentage": 12.28, "elapsed_time": "0:25:20", "remaining_time": "3:01:00", "throughput": 19864.94, "total_tokens": 30197120}
|
|
{"current_steps": 9595, "total_steps": 78105, "loss": 0.5439, "lr": 4.992066841917974e-06, "epoch": 0.614237244734652, "percentage": 12.28, "elapsed_time": "0:25:20", "remaining_time": "3:00:58", "throughput": 19866.52, "total_tokens": 30213376}
|
|
{"current_steps": 9600, "total_steps": 78105, "loss": 0.5553, "lr": 4.992022309929275e-06, "epoch": 0.614557326675629, "percentage": 12.29, "elapsed_time": "0:25:21", "remaining_time": "3:00:57", "throughput": 19868.1, "total_tokens": 30229120}
|
|
{"current_steps": 9605, "total_steps": 78105, "loss": 0.5806, "lr": 4.991977653501963e-06, "epoch": 0.6148774086166059, "percentage": 12.3, "elapsed_time": "0:25:22", "remaining_time": "3:00:55", "throughput": 19869.55, "total_tokens": 30244544}
|
|
{"current_steps": 9610, "total_steps": 78105, "loss": 0.5126, "lr": 4.991932872638267e-06, "epoch": 0.6151974905575828, "percentage": 12.3, "elapsed_time": "0:25:22", "remaining_time": "3:00:54", "throughput": 19871.21, "total_tokens": 30260864}
|
|
{"current_steps": 9615, "total_steps": 78105, "loss": 0.5995, "lr": 4.991887967340422e-06, "epoch": 0.6155175724985597, "percentage": 12.31, "elapsed_time": "0:25:23", "remaining_time": "3:00:52", "throughput": 19872.4, "total_tokens": 30275456}
|
|
{"current_steps": 9620, "total_steps": 78105, "loss": 0.6657, "lr": 4.991842937610673e-06, "epoch": 0.6158376544395365, "percentage": 12.32, "elapsed_time": "0:25:24", "remaining_time": "3:00:50", "throughput": 19873.87, "total_tokens": 30291008}
|
|
{"current_steps": 9625, "total_steps": 78105, "loss": 0.588, "lr": 4.991797783451267e-06, "epoch": 0.6161577363805134, "percentage": 12.32, "elapsed_time": "0:25:24", "remaining_time": "3:00:48", "throughput": 19875.42, "total_tokens": 30306816}
|
|
{"current_steps": 9630, "total_steps": 78105, "loss": 0.4988, "lr": 4.991752504864459e-06, "epoch": 0.6164778183214903, "percentage": 12.33, "elapsed_time": "0:25:25", "remaining_time": "3:00:47", "throughput": 19876.77, "total_tokens": 30322240}
|
|
{"current_steps": 9635, "total_steps": 78105, "loss": 0.4139, "lr": 4.99170710185251e-06, "epoch": 0.6167979002624672, "percentage": 12.34, "elapsed_time": "0:25:26", "remaining_time": "3:00:45", "throughput": 19878.24, "total_tokens": 30338048}
|
|
{"current_steps": 9640, "total_steps": 78105, "loss": 0.4087, "lr": 4.991661574417687e-06, "epoch": 0.617117982203444, "percentage": 12.34, "elapsed_time": "0:25:26", "remaining_time": "3:00:44", "throughput": 19879.8, "total_tokens": 30353984}
|
|
{"current_steps": 9645, "total_steps": 78105, "loss": 0.6605, "lr": 4.991615922562264e-06, "epoch": 0.6174380641444209, "percentage": 12.35, "elapsed_time": "0:25:27", "remaining_time": "3:00:42", "throughput": 19881.3, "total_tokens": 30370048}
|
|
{"current_steps": 9650, "total_steps": 78105, "loss": 0.4528, "lr": 4.9915701462885194e-06, "epoch": 0.6177581460853978, "percentage": 12.36, "elapsed_time": "0:25:28", "remaining_time": "3:00:40", "throughput": 19882.65, "total_tokens": 30385344}
|
|
{"current_steps": 9655, "total_steps": 78105, "loss": 0.578, "lr": 4.99152424559874e-06, "epoch": 0.6180782280263748, "percentage": 12.36, "elapsed_time": "0:25:28", "remaining_time": "3:00:39", "throughput": 19883.98, "total_tokens": 30400576}
|
|
{"current_steps": 9660, "total_steps": 78105, "loss": 0.4926, "lr": 4.991478220495218e-06, "epoch": 0.6183983099673517, "percentage": 12.37, "elapsed_time": "0:25:29", "remaining_time": "3:00:37", "throughput": 19885.32, "total_tokens": 30415424}
|
|
{"current_steps": 9665, "total_steps": 78105, "loss": 0.5363, "lr": 4.991432070980251e-06, "epoch": 0.6187183919083286, "percentage": 12.37, "elapsed_time": "0:25:30", "remaining_time": "3:00:35", "throughput": 19886.68, "total_tokens": 30430592}
|
|
{"current_steps": 9670, "total_steps": 78105, "loss": 0.5223, "lr": 4.991385797056144e-06, "epoch": 0.6190384738493054, "percentage": 12.38, "elapsed_time": "0:25:30", "remaining_time": "3:00:34", "throughput": 19888.04, "total_tokens": 30445952}
|
|
{"current_steps": 9675, "total_steps": 78105, "loss": 0.4267, "lr": 4.9913393987252075e-06, "epoch": 0.6193585557902823, "percentage": 12.39, "elapsed_time": "0:25:31", "remaining_time": "3:00:32", "throughput": 19889.52, "total_tokens": 30461568}
|
|
{"current_steps": 9680, "total_steps": 78105, "loss": 0.4994, "lr": 4.9912928759897585e-06, "epoch": 0.6196786377312592, "percentage": 12.39, "elapsed_time": "0:25:32", "remaining_time": "3:00:30", "throughput": 19890.82, "total_tokens": 30476672}
|
|
{"current_steps": 9685, "total_steps": 78105, "loss": 0.446, "lr": 4.991246228852119e-06, "epoch": 0.6199987196722361, "percentage": 12.4, "elapsed_time": "0:25:32", "remaining_time": "3:00:29", "throughput": 19892.29, "total_tokens": 30492672}
|
|
{"current_steps": 9690, "total_steps": 78105, "loss": 0.5466, "lr": 4.991199457314621e-06, "epoch": 0.620318801613213, "percentage": 12.41, "elapsed_time": "0:25:33", "remaining_time": "3:00:27", "throughput": 19893.53, "total_tokens": 30507712}
|
|
{"current_steps": 9695, "total_steps": 78105, "loss": 0.7476, "lr": 4.991152561379596e-06, "epoch": 0.6206388835541898, "percentage": 12.41, "elapsed_time": "0:25:34", "remaining_time": "3:00:25", "throughput": 19895.2, "total_tokens": 30524032}
|
|
{"current_steps": 9700, "total_steps": 78105, "loss": 0.4074, "lr": 4.9911055410493905e-06, "epoch": 0.6209589654951667, "percentage": 12.42, "elapsed_time": "0:25:34", "remaining_time": "3:00:24", "throughput": 19896.61, "total_tokens": 30539776}
|
|
{"current_steps": 9705, "total_steps": 78105, "loss": 0.4596, "lr": 4.991058396326348e-06, "epoch": 0.6212790474361437, "percentage": 12.43, "elapsed_time": "0:25:35", "remaining_time": "3:00:22", "throughput": 19897.88, "total_tokens": 30554752}
|
|
{"current_steps": 9710, "total_steps": 78105, "loss": 0.4597, "lr": 4.991011127212826e-06, "epoch": 0.6215991293771206, "percentage": 12.43, "elapsed_time": "0:25:36", "remaining_time": "3:00:21", "throughput": 19899.74, "total_tokens": 30571968}
|
|
{"current_steps": 9715, "total_steps": 78105, "loss": 0.5344, "lr": 4.990963733711183e-06, "epoch": 0.6219192113180975, "percentage": 12.44, "elapsed_time": "0:25:36", "remaining_time": "3:00:19", "throughput": 19901.06, "total_tokens": 30587264}
|
|
{"current_steps": 9720, "total_steps": 78105, "loss": 0.6401, "lr": 4.990916215823785e-06, "epoch": 0.6222392932590743, "percentage": 12.44, "elapsed_time": "0:25:37", "remaining_time": "3:00:18", "throughput": 19902.53, "total_tokens": 30603200}
|
|
{"current_steps": 9725, "total_steps": 78105, "loss": 0.4959, "lr": 4.990868573553007e-06, "epoch": 0.6225593752000512, "percentage": 12.45, "elapsed_time": "0:25:38", "remaining_time": "3:00:16", "throughput": 19903.68, "total_tokens": 30617856}
|
|
{"current_steps": 9730, "total_steps": 78105, "loss": 0.6265, "lr": 4.990820806901227e-06, "epoch": 0.6228794571410281, "percentage": 12.46, "elapsed_time": "0:25:38", "remaining_time": "3:00:14", "throughput": 19904.81, "total_tokens": 30632448}
|
|
{"current_steps": 9735, "total_steps": 78105, "loss": 0.5136, "lr": 4.990772915870829e-06, "epoch": 0.623199539082005, "percentage": 12.46, "elapsed_time": "0:25:39", "remaining_time": "3:00:12", "throughput": 19906.12, "total_tokens": 30647680}
|
|
{"current_steps": 9740, "total_steps": 78105, "loss": 0.6384, "lr": 4.9907249004642076e-06, "epoch": 0.6235196210229819, "percentage": 12.47, "elapsed_time": "0:25:40", "remaining_time": "3:00:11", "throughput": 19907.58, "total_tokens": 30663616}
|
|
{"current_steps": 9745, "total_steps": 78105, "loss": 0.6294, "lr": 4.990676760683757e-06, "epoch": 0.6238397029639587, "percentage": 12.48, "elapsed_time": "0:25:41", "remaining_time": "3:00:10", "throughput": 19909.9, "total_tokens": 30682496}
|
|
{"current_steps": 9750, "total_steps": 78105, "loss": 0.5362, "lr": 4.990628496531883e-06, "epoch": 0.6241597849049356, "percentage": 12.48, "elapsed_time": "0:25:41", "remaining_time": "3:00:08", "throughput": 19911.18, "total_tokens": 30697664}
|
|
{"current_steps": 9755, "total_steps": 78105, "loss": 0.5101, "lr": 4.990580108010994e-06, "epoch": 0.6244798668459125, "percentage": 12.49, "elapsed_time": "0:25:42", "remaining_time": "3:00:07", "throughput": 19912.62, "total_tokens": 30713408}
|
|
{"current_steps": 9760, "total_steps": 78105, "loss": 0.4221, "lr": 4.990531595123509e-06, "epoch": 0.6247999487868895, "percentage": 12.5, "elapsed_time": "0:25:43", "remaining_time": "3:00:05", "throughput": 19914.47, "total_tokens": 30730496}
|
|
{"current_steps": 9765, "total_steps": 78105, "loss": 0.4812, "lr": 4.9904829578718474e-06, "epoch": 0.6251200307278664, "percentage": 12.5, "elapsed_time": "0:25:43", "remaining_time": "3:00:04", "throughput": 19915.81, "total_tokens": 30745664}
|
|
{"current_steps": 9770, "total_steps": 78105, "loss": 0.4585, "lr": 4.9904341962584396e-06, "epoch": 0.6254401126688433, "percentage": 12.51, "elapsed_time": "0:25:44", "remaining_time": "3:00:02", "throughput": 19917.18, "total_tokens": 30760960}
|
|
{"current_steps": 9775, "total_steps": 78105, "loss": 0.5699, "lr": 4.99038531028572e-06, "epoch": 0.6257601946098201, "percentage": 12.52, "elapsed_time": "0:25:45", "remaining_time": "3:00:00", "throughput": 19918.32, "total_tokens": 30775872}
|
|
{"current_steps": 9780, "total_steps": 78105, "loss": 0.4519, "lr": 4.990336299956131e-06, "epoch": 0.626080276550797, "percentage": 12.52, "elapsed_time": "0:25:45", "remaining_time": "2:59:59", "throughput": 19919.77, "total_tokens": 30791424}
|
|
{"current_steps": 9785, "total_steps": 78105, "loss": 0.5148, "lr": 4.990287165272119e-06, "epoch": 0.6264003584917739, "percentage": 12.53, "elapsed_time": "0:25:46", "remaining_time": "2:59:57", "throughput": 19921.26, "total_tokens": 30807680}
|
|
{"current_steps": 9790, "total_steps": 78105, "loss": 0.5699, "lr": 4.990237906236136e-06, "epoch": 0.6267204404327508, "percentage": 12.53, "elapsed_time": "0:25:47", "remaining_time": "2:59:56", "throughput": 19922.74, "total_tokens": 30823680}
|
|
{"current_steps": 9795, "total_steps": 78105, "loss": 0.6064, "lr": 4.9901885228506444e-06, "epoch": 0.6270405223737276, "percentage": 12.54, "elapsed_time": "0:25:47", "remaining_time": "2:59:54", "throughput": 19924.1, "total_tokens": 30839360}
|
|
{"current_steps": 9800, "total_steps": 78105, "loss": 0.4785, "lr": 4.990139015118108e-06, "epoch": 0.6273606043147045, "percentage": 12.55, "elapsed_time": "0:25:48", "remaining_time": "2:59:52", "throughput": 19925.44, "total_tokens": 30854784}
|
|
{"current_steps": 9805, "total_steps": 78105, "loss": 0.5806, "lr": 4.990089383041e-06, "epoch": 0.6276806862556814, "percentage": 12.55, "elapsed_time": "0:25:49", "remaining_time": "2:59:51", "throughput": 19926.87, "total_tokens": 30870592}
|
|
{"current_steps": 9810, "total_steps": 78105, "loss": 0.5823, "lr": 4.9900396266217985e-06, "epoch": 0.6280007681966584, "percentage": 12.56, "elapsed_time": "0:25:49", "remaining_time": "2:59:49", "throughput": 19928.2, "total_tokens": 30885952}
|
|
{"current_steps": 9815, "total_steps": 78105, "loss": 0.4227, "lr": 4.9899897458629886e-06, "epoch": 0.6283208501376353, "percentage": 12.57, "elapsed_time": "0:25:50", "remaining_time": "2:59:48", "throughput": 19929.56, "total_tokens": 30902080}
|
|
{"current_steps": 9820, "total_steps": 78105, "loss": 0.519, "lr": 4.989939740767061e-06, "epoch": 0.6286409320786122, "percentage": 12.57, "elapsed_time": "0:25:51", "remaining_time": "2:59:47", "throughput": 19931.36, "total_tokens": 30919232}
|
|
{"current_steps": 9825, "total_steps": 78105, "loss": 0.4413, "lr": 4.989889611336512e-06, "epoch": 0.628961014019589, "percentage": 12.58, "elapsed_time": "0:25:52", "remaining_time": "2:59:45", "throughput": 19933.19, "total_tokens": 30936448}
|
|
{"current_steps": 9830, "total_steps": 78105, "loss": 0.3904, "lr": 4.989839357573845e-06, "epoch": 0.6292810959605659, "percentage": 12.59, "elapsed_time": "0:25:52", "remaining_time": "2:59:44", "throughput": 19934.64, "total_tokens": 30952256}
|
|
{"current_steps": 9835, "total_steps": 78105, "loss": 0.527, "lr": 4.98978897948157e-06, "epoch": 0.6296011779015428, "percentage": 12.59, "elapsed_time": "0:25:53", "remaining_time": "2:59:42", "throughput": 19935.86, "total_tokens": 30967360}
|
|
{"current_steps": 9840, "total_steps": 78105, "loss": 0.5358, "lr": 4.989738477062201e-06, "epoch": 0.6299212598425197, "percentage": 12.6, "elapsed_time": "0:25:54", "remaining_time": "2:59:40", "throughput": 19936.92, "total_tokens": 30982016}
|
|
{"current_steps": 9845, "total_steps": 78105, "loss": 0.4595, "lr": 4.989687850318262e-06, "epoch": 0.6302413417834966, "percentage": 12.6, "elapsed_time": "0:25:54", "remaining_time": "2:59:39", "throughput": 19938.39, "total_tokens": 30998080}
|
|
{"current_steps": 9850, "total_steps": 78105, "loss": 0.4813, "lr": 4.98963709925228e-06, "epoch": 0.6305614237244734, "percentage": 12.61, "elapsed_time": "0:25:55", "remaining_time": "2:59:37", "throughput": 19939.62, "total_tokens": 31013184}
|
|
{"current_steps": 9855, "total_steps": 78105, "loss": 0.5403, "lr": 4.98958622386679e-06, "epoch": 0.6308815056654503, "percentage": 12.62, "elapsed_time": "0:25:56", "remaining_time": "2:59:36", "throughput": 19941.02, "total_tokens": 31028800}
|
|
{"current_steps": 9860, "total_steps": 78105, "loss": 0.5813, "lr": 4.989535224164331e-06, "epoch": 0.6312015876064272, "percentage": 12.62, "elapsed_time": "0:25:56", "remaining_time": "2:59:34", "throughput": 19942.53, "total_tokens": 31044736}
|
|
{"current_steps": 9865, "total_steps": 78105, "loss": 0.4064, "lr": 4.98948410014745e-06, "epoch": 0.6315216695474042, "percentage": 12.63, "elapsed_time": "0:25:57", "remaining_time": "2:59:32", "throughput": 19943.84, "total_tokens": 31059968}
|
|
{"current_steps": 9870, "total_steps": 78105, "loss": 0.4707, "lr": 4.989432851818701e-06, "epoch": 0.6318417514883811, "percentage": 12.64, "elapsed_time": "0:25:58", "remaining_time": "2:59:31", "throughput": 19945.03, "total_tokens": 31074880}
|
|
{"current_steps": 9875, "total_steps": 78105, "loss": 0.4296, "lr": 4.989381479180643e-06, "epoch": 0.6321618334293579, "percentage": 12.64, "elapsed_time": "0:25:58", "remaining_time": "2:59:29", "throughput": 19946.41, "total_tokens": 31090432}
|
|
{"current_steps": 9880, "total_steps": 78105, "loss": 0.6233, "lr": 4.989329982235839e-06, "epoch": 0.6324819153703348, "percentage": 12.65, "elapsed_time": "0:25:59", "remaining_time": "2:59:28", "throughput": 19948.01, "total_tokens": 31107264}
|
|
{"current_steps": 9885, "total_steps": 78105, "loss": 0.4683, "lr": 4.989278360986864e-06, "epoch": 0.6328019973113117, "percentage": 12.66, "elapsed_time": "0:26:00", "remaining_time": "2:59:27", "throughput": 19949.68, "total_tokens": 31124096}
|
|
{"current_steps": 9890, "total_steps": 78105, "loss": 0.4671, "lr": 4.9892266154362915e-06, "epoch": 0.6331220792522886, "percentage": 12.66, "elapsed_time": "0:26:00", "remaining_time": "2:59:25", "throughput": 19950.87, "total_tokens": 31138752}
|
|
{"current_steps": 9895, "total_steps": 78105, "loss": 0.6549, "lr": 4.989174745586709e-06, "epoch": 0.6334421611932655, "percentage": 12.67, "elapsed_time": "0:26:01", "remaining_time": "2:59:23", "throughput": 19952.16, "total_tokens": 31154048}
|
|
{"current_steps": 9900, "total_steps": 78105, "loss": 0.7086, "lr": 4.989122751440706e-06, "epoch": 0.6337622431342423, "percentage": 12.68, "elapsed_time": "0:26:02", "remaining_time": "2:59:22", "throughput": 19953.79, "total_tokens": 31170304}
|
|
{"current_steps": 9905, "total_steps": 78105, "loss": 0.4503, "lr": 4.989070633000877e-06, "epoch": 0.6340823250752192, "percentage": 12.68, "elapsed_time": "0:26:02", "remaining_time": "2:59:20", "throughput": 19955.26, "total_tokens": 31186368}
|
|
{"current_steps": 9910, "total_steps": 78105, "loss": 0.5373, "lr": 4.989018390269827e-06, "epoch": 0.6344024070161961, "percentage": 12.69, "elapsed_time": "0:26:03", "remaining_time": "2:59:18", "throughput": 19956.61, "total_tokens": 31201664}
|
|
{"current_steps": 9915, "total_steps": 78105, "loss": 0.6288, "lr": 4.988966023250162e-06, "epoch": 0.634722488957173, "percentage": 12.69, "elapsed_time": "0:26:04", "remaining_time": "2:59:17", "throughput": 19957.87, "total_tokens": 31217216}
|
|
{"current_steps": 9920, "total_steps": 78105, "loss": 0.4829, "lr": 4.988913531944498e-06, "epoch": 0.63504257089815, "percentage": 12.7, "elapsed_time": "0:26:04", "remaining_time": "2:59:16", "throughput": 19959.46, "total_tokens": 31233728}
|
|
{"current_steps": 9925, "total_steps": 78105, "loss": 0.5572, "lr": 4.988860916355458e-06, "epoch": 0.6353626528391269, "percentage": 12.71, "elapsed_time": "0:26:05", "remaining_time": "2:59:14", "throughput": 19960.89, "total_tokens": 31249664}
|
|
{"current_steps": 9930, "total_steps": 78105, "loss": 0.4233, "lr": 4.988808176485668e-06, "epoch": 0.6356827347801037, "percentage": 12.71, "elapsed_time": "0:26:06", "remaining_time": "2:59:13", "throughput": 19962.29, "total_tokens": 31265408}
|
|
{"current_steps": 9935, "total_steps": 78105, "loss": 0.3763, "lr": 4.98875531233776e-06, "epoch": 0.6360028167210806, "percentage": 12.72, "elapsed_time": "0:26:06", "remaining_time": "2:59:11", "throughput": 19963.66, "total_tokens": 31281088}
|
|
{"current_steps": 9940, "total_steps": 78105, "loss": 0.5096, "lr": 4.9887023239143766e-06, "epoch": 0.6363228986620575, "percentage": 12.73, "elapsed_time": "0:26:07", "remaining_time": "2:59:09", "throughput": 19964.95, "total_tokens": 31296512}
|
|
{"current_steps": 9945, "total_steps": 78105, "loss": 0.5026, "lr": 4.988649211218161e-06, "epoch": 0.6366429806030344, "percentage": 12.73, "elapsed_time": "0:26:08", "remaining_time": "2:59:08", "throughput": 19966.38, "total_tokens": 31312000}
|
|
{"current_steps": 9950, "total_steps": 78105, "loss": 0.4883, "lr": 4.988595974251768e-06, "epoch": 0.6369630625440112, "percentage": 12.74, "elapsed_time": "0:26:08", "remaining_time": "2:59:06", "throughput": 19967.62, "total_tokens": 31327488}
|
|
{"current_steps": 9955, "total_steps": 78105, "loss": 0.4126, "lr": 4.988542613017853e-06, "epoch": 0.6372831444849881, "percentage": 12.75, "elapsed_time": "0:26:09", "remaining_time": "2:59:05", "throughput": 19969.19, "total_tokens": 31343936}
|
|
{"current_steps": 9960, "total_steps": 78105, "loss": 0.4868, "lr": 4.988489127519084e-06, "epoch": 0.637603226425965, "percentage": 12.75, "elapsed_time": "0:26:10", "remaining_time": "2:59:03", "throughput": 19970.45, "total_tokens": 31359232}
|
|
{"current_steps": 9965, "total_steps": 78105, "loss": 0.4822, "lr": 4.988435517758129e-06, "epoch": 0.6379233083669419, "percentage": 12.76, "elapsed_time": "0:26:10", "remaining_time": "2:59:02", "throughput": 19972.02, "total_tokens": 31375616}
|
|
{"current_steps": 9970, "total_steps": 78105, "loss": 0.5156, "lr": 4.988381783737666e-06, "epoch": 0.6382433903079189, "percentage": 12.76, "elapsed_time": "0:26:11", "remaining_time": "2:59:00", "throughput": 19973.5, "total_tokens": 31391680}
|
|
{"current_steps": 9975, "total_steps": 78105, "loss": 0.503, "lr": 4.988327925460378e-06, "epoch": 0.6385634722488958, "percentage": 12.77, "elapsed_time": "0:26:12", "remaining_time": "2:58:59", "throughput": 19974.91, "total_tokens": 31407424}
|
|
{"current_steps": 9980, "total_steps": 78105, "loss": 0.4856, "lr": 4.988273942928955e-06, "epoch": 0.6388835541898726, "percentage": 12.78, "elapsed_time": "0:26:13", "remaining_time": "2:58:57", "throughput": 19976.23, "total_tokens": 31422912}
|
|
{"current_steps": 9985, "total_steps": 78105, "loss": 0.5529, "lr": 4.988219836146092e-06, "epoch": 0.6392036361308495, "percentage": 12.78, "elapsed_time": "0:26:13", "remaining_time": "2:58:56", "throughput": 19977.74, "total_tokens": 31438912}
|
|
{"current_steps": 9990, "total_steps": 78105, "loss": 0.404, "lr": 4.988165605114492e-06, "epoch": 0.6395237180718264, "percentage": 12.79, "elapsed_time": "0:26:14", "remaining_time": "2:58:54", "throughput": 19979.31, "total_tokens": 31455168}
|
|
{"current_steps": 9995, "total_steps": 78105, "loss": 0.4475, "lr": 4.988111249836861e-06, "epoch": 0.6398438000128033, "percentage": 12.8, "elapsed_time": "0:26:15", "remaining_time": "2:58:53", "throughput": 19980.68, "total_tokens": 31470592}
|
|
{"current_steps": 10000, "total_steps": 78105, "loss": 0.4538, "lr": 4.988056770315915e-06, "epoch": 0.6401638819537802, "percentage": 12.8, "elapsed_time": "0:26:15", "remaining_time": "2:58:51", "throughput": 19981.77, "total_tokens": 31485632}
|
|
{"current_steps": 10005, "total_steps": 78105, "loss": 0.4255, "lr": 4.988002166554373e-06, "epoch": 0.640483963894757, "percentage": 12.81, "elapsed_time": "0:26:16", "remaining_time": "2:58:49", "throughput": 19982.87, "total_tokens": 31500352}
|
|
{"current_steps": 10010, "total_steps": 78105, "loss": 0.5204, "lr": 4.987947438554963e-06, "epoch": 0.6408040458357339, "percentage": 12.82, "elapsed_time": "0:26:17", "remaining_time": "2:58:48", "throughput": 19984.37, "total_tokens": 31516928}
|
|
{"current_steps": 10015, "total_steps": 78105, "loss": 0.4247, "lr": 4.987892586320417e-06, "epoch": 0.6411241277767108, "percentage": 12.82, "elapsed_time": "0:26:17", "remaining_time": "2:58:46", "throughput": 19985.61, "total_tokens": 31532608}
|
|
{"current_steps": 10020, "total_steps": 78105, "loss": 0.6965, "lr": 4.987837609853474e-06, "epoch": 0.6414442097176877, "percentage": 12.83, "elapsed_time": "0:26:18", "remaining_time": "2:58:45", "throughput": 19987.14, "total_tokens": 31548608}
|
|
{"current_steps": 10025, "total_steps": 78105, "loss": 0.6434, "lr": 4.98778250915688e-06, "epoch": 0.6417642916586647, "percentage": 12.84, "elapsed_time": "0:26:19", "remaining_time": "2:58:43", "throughput": 19988.47, "total_tokens": 31564224}
|
|
{"current_steps": 10030, "total_steps": 78105, "loss": 0.5233, "lr": 4.9877272842333855e-06, "epoch": 0.6420843735996415, "percentage": 12.84, "elapsed_time": "0:26:19", "remaining_time": "2:58:42", "throughput": 19989.64, "total_tokens": 31579200}
|
|
{"current_steps": 10035, "total_steps": 78105, "loss": 0.5963, "lr": 4.987671935085749e-06, "epoch": 0.6424044555406184, "percentage": 12.85, "elapsed_time": "0:26:20", "remaining_time": "2:58:40", "throughput": 19990.97, "total_tokens": 31594816}
|
|
{"current_steps": 10040, "total_steps": 78105, "loss": 0.4608, "lr": 4.987616461716732e-06, "epoch": 0.6427245374815953, "percentage": 12.85, "elapsed_time": "0:26:21", "remaining_time": "2:58:39", "throughput": 19992.35, "total_tokens": 31610432}
|
|
{"current_steps": 10045, "total_steps": 78105, "loss": 0.5298, "lr": 4.987560864129109e-06, "epoch": 0.6430446194225722, "percentage": 12.86, "elapsed_time": "0:26:21", "remaining_time": "2:58:37", "throughput": 19994.11, "total_tokens": 31627520}
|
|
{"current_steps": 10050, "total_steps": 78105, "loss": 0.4993, "lr": 4.9875051423256515e-06, "epoch": 0.6433647013635491, "percentage": 12.87, "elapsed_time": "0:26:22", "remaining_time": "2:58:36", "throughput": 19995.76, "total_tokens": 31644352}
|
|
{"current_steps": 10055, "total_steps": 78105, "loss": 0.6124, "lr": 4.987449296309145e-06, "epoch": 0.6436847833045259, "percentage": 12.87, "elapsed_time": "0:26:23", "remaining_time": "2:58:35", "throughput": 19997.02, "total_tokens": 31660160}
|
|
{"current_steps": 10060, "total_steps": 78105, "loss": 0.597, "lr": 4.987393326082377e-06, "epoch": 0.6440048652455028, "percentage": 12.88, "elapsed_time": "0:26:23", "remaining_time": "2:58:33", "throughput": 19998.54, "total_tokens": 31676352}
|
|
{"current_steps": 10065, "total_steps": 78105, "loss": 0.4995, "lr": 4.987337231648143e-06, "epoch": 0.6443249471864797, "percentage": 12.89, "elapsed_time": "0:26:24", "remaining_time": "2:58:32", "throughput": 19999.98, "total_tokens": 31692160}
|
|
{"current_steps": 10070, "total_steps": 78105, "loss": 0.475, "lr": 4.987281013009244e-06, "epoch": 0.6446450291274566, "percentage": 12.89, "elapsed_time": "0:26:25", "remaining_time": "2:58:30", "throughput": 20001.29, "total_tokens": 31707712}
|
|
{"current_steps": 10075, "total_steps": 78105, "loss": 0.385, "lr": 4.987224670168487e-06, "epoch": 0.6449651110684336, "percentage": 12.9, "elapsed_time": "0:26:25", "remaining_time": "2:58:29", "throughput": 20002.81, "total_tokens": 31723776}
|
|
{"current_steps": 10080, "total_steps": 78105, "loss": 0.5022, "lr": 4.987168203128685e-06, "epoch": 0.6452851930094105, "percentage": 12.91, "elapsed_time": "0:26:26", "remaining_time": "2:58:27", "throughput": 20004.51, "total_tokens": 31740672}
|
|
{"current_steps": 10085, "total_steps": 78105, "loss": 0.3749, "lr": 4.987111611892658e-06, "epoch": 0.6456052749503873, "percentage": 12.91, "elapsed_time": "0:26:27", "remaining_time": "2:58:26", "throughput": 20005.73, "total_tokens": 31755648}
|
|
{"current_steps": 10090, "total_steps": 78105, "loss": 0.6307, "lr": 4.987054896463233e-06, "epoch": 0.6459253568913642, "percentage": 12.92, "elapsed_time": "0:26:27", "remaining_time": "2:58:24", "throughput": 20006.98, "total_tokens": 31770624}
|
|
{"current_steps": 10095, "total_steps": 78105, "loss": 0.5458, "lr": 4.9869980568432395e-06, "epoch": 0.6462454388323411, "percentage": 12.92, "elapsed_time": "0:26:28", "remaining_time": "2:58:22", "throughput": 20008.41, "total_tokens": 31786816}
|
|
{"current_steps": 10100, "total_steps": 78105, "loss": 0.4628, "lr": 4.986941093035519e-06, "epoch": 0.646565520773318, "percentage": 12.93, "elapsed_time": "0:26:29", "remaining_time": "2:58:21", "throughput": 20009.62, "total_tokens": 31801792}
|
|
{"current_steps": 10105, "total_steps": 78105, "loss": 0.5302, "lr": 4.986884005042913e-06, "epoch": 0.6468856027142948, "percentage": 12.94, "elapsed_time": "0:26:30", "remaining_time": "2:58:19", "throughput": 20011.33, "total_tokens": 31818944}
|
|
{"current_steps": 10110, "total_steps": 78105, "loss": 0.5139, "lr": 4.986826792868274e-06, "epoch": 0.6472056846552717, "percentage": 12.94, "elapsed_time": "0:26:30", "remaining_time": "2:58:18", "throughput": 20012.55, "total_tokens": 31834176}
|
|
{"current_steps": 10115, "total_steps": 78105, "loss": 0.4676, "lr": 4.986769456514459e-06, "epoch": 0.6475257665962486, "percentage": 12.95, "elapsed_time": "0:26:31", "remaining_time": "2:58:16", "throughput": 20013.97, "total_tokens": 31850176}
|
|
{"current_steps": 10120, "total_steps": 78105, "loss": 0.5846, "lr": 4.986711995984329e-06, "epoch": 0.6478458485372255, "percentage": 12.96, "elapsed_time": "0:26:32", "remaining_time": "2:58:15", "throughput": 20015.36, "total_tokens": 31865600}
|
|
{"current_steps": 10125, "total_steps": 78105, "loss": 0.5411, "lr": 4.986654411280756e-06, "epoch": 0.6481659304782024, "percentage": 12.96, "elapsed_time": "0:26:32", "remaining_time": "2:58:13", "throughput": 20016.52, "total_tokens": 31880512}
|
|
{"current_steps": 10130, "total_steps": 78105, "loss": 0.5552, "lr": 4.986596702406613e-06, "epoch": 0.6484860124191794, "percentage": 12.97, "elapsed_time": "0:26:33", "remaining_time": "2:58:12", "throughput": 20017.91, "total_tokens": 31896512}
|
|
{"current_steps": 10135, "total_steps": 78105, "loss": 0.5344, "lr": 4.986538869364784e-06, "epoch": 0.6488060943601562, "percentage": 12.98, "elapsed_time": "0:26:34", "remaining_time": "2:58:10", "throughput": 20019.34, "total_tokens": 31912768}
|
|
{"current_steps": 10140, "total_steps": 78105, "loss": 0.5518, "lr": 4.986480912158156e-06, "epoch": 0.6491261763011331, "percentage": 12.98, "elapsed_time": "0:26:34", "remaining_time": "2:58:09", "throughput": 20020.43, "total_tokens": 31927488}
|
|
{"current_steps": 10145, "total_steps": 78105, "loss": 0.448, "lr": 4.986422830789622e-06, "epoch": 0.64944625824211, "percentage": 12.99, "elapsed_time": "0:26:35", "remaining_time": "2:58:07", "throughput": 20021.69, "total_tokens": 31942784}
|
|
{"current_steps": 10150, "total_steps": 78105, "loss": 0.4547, "lr": 4.986364625262083e-06, "epoch": 0.6497663401830869, "percentage": 13.0, "elapsed_time": "0:26:36", "remaining_time": "2:58:05", "throughput": 20023.02, "total_tokens": 31958528}
|
|
{"current_steps": 10155, "total_steps": 78105, "loss": 0.4755, "lr": 4.986306295578446e-06, "epoch": 0.6500864221240638, "percentage": 13.0, "elapsed_time": "0:26:36", "remaining_time": "2:58:04", "throughput": 20024.25, "total_tokens": 31974016}
|
|
{"current_steps": 10160, "total_steps": 78105, "loss": 0.4424, "lr": 4.986247841741624e-06, "epoch": 0.6504065040650406, "percentage": 13.01, "elapsed_time": "0:26:37", "remaining_time": "2:58:03", "throughput": 20025.86, "total_tokens": 31990720}
|
|
{"current_steps": 10165, "total_steps": 78105, "loss": 0.4666, "lr": 4.986189263754534e-06, "epoch": 0.6507265860060175, "percentage": 13.01, "elapsed_time": "0:26:38", "remaining_time": "2:58:01", "throughput": 20027.46, "total_tokens": 32007168}
|
|
{"current_steps": 10170, "total_steps": 78105, "loss": 0.4939, "lr": 4.9861305616201024e-06, "epoch": 0.6510466679469944, "percentage": 13.02, "elapsed_time": "0:26:38", "remaining_time": "2:58:00", "throughput": 20028.66, "total_tokens": 32022208}
|
|
{"current_steps": 10175, "total_steps": 78105, "loss": 0.6421, "lr": 4.986071735341261e-06, "epoch": 0.6513667498879713, "percentage": 13.03, "elapsed_time": "0:26:39", "remaining_time": "2:57:58", "throughput": 20030.05, "total_tokens": 32037760}
|
|
{"current_steps": 10180, "total_steps": 78105, "loss": 0.4327, "lr": 4.986012784920946e-06, "epoch": 0.6516868318289483, "percentage": 13.03, "elapsed_time": "0:26:40", "remaining_time": "2:57:56", "throughput": 20031.27, "total_tokens": 32052800}
|
|
{"current_steps": 10185, "total_steps": 78105, "loss": 0.6118, "lr": 4.9859537103621026e-06, "epoch": 0.6520069137699251, "percentage": 13.04, "elapsed_time": "0:26:40", "remaining_time": "2:57:55", "throughput": 20032.52, "total_tokens": 32068288}
|
|
{"current_steps": 10190, "total_steps": 78105, "loss": 0.4767, "lr": 4.985894511667679e-06, "epoch": 0.652326995710902, "percentage": 13.05, "elapsed_time": "0:26:41", "remaining_time": "2:57:53", "throughput": 20033.59, "total_tokens": 32083200}
|
|
{"current_steps": 10195, "total_steps": 78105, "loss": 0.4958, "lr": 4.985835188840632e-06, "epoch": 0.6526470776518789, "percentage": 13.05, "elapsed_time": "0:26:42", "remaining_time": "2:57:51", "throughput": 20034.88, "total_tokens": 32098432}
|
|
{"current_steps": 10200, "total_steps": 78105, "loss": 0.4854, "lr": 4.985775741883924e-06, "epoch": 0.6529671595928558, "percentage": 13.06, "elapsed_time": "0:26:42", "remaining_time": "2:57:50", "throughput": 20036.46, "total_tokens": 32114944}
|
|
{"current_steps": 10205, "total_steps": 78105, "loss": 0.404, "lr": 4.985716170800524e-06, "epoch": 0.6532872415338327, "percentage": 13.07, "elapsed_time": "0:26:43", "remaining_time": "2:57:49", "throughput": 20038.05, "total_tokens": 32131520}
|
|
{"current_steps": 10210, "total_steps": 78105, "loss": 0.4456, "lr": 4.985656475593405e-06, "epoch": 0.6536073234748095, "percentage": 13.07, "elapsed_time": "0:26:44", "remaining_time": "2:57:47", "throughput": 20039.39, "total_tokens": 32147008}
|
|
{"current_steps": 10215, "total_steps": 78105, "loss": 0.5132, "lr": 4.98559665626555e-06, "epoch": 0.6539274054157864, "percentage": 13.08, "elapsed_time": "0:26:44", "remaining_time": "2:57:46", "throughput": 20040.66, "total_tokens": 32162560}
|
|
{"current_steps": 10220, "total_steps": 78105, "loss": 0.4752, "lr": 4.985536712819944e-06, "epoch": 0.6542474873567633, "percentage": 13.08, "elapsed_time": "0:26:45", "remaining_time": "2:57:44", "throughput": 20041.66, "total_tokens": 32177024}
|
|
{"current_steps": 10225, "total_steps": 78105, "loss": 0.5257, "lr": 4.9854766452595815e-06, "epoch": 0.6545675692977402, "percentage": 13.09, "elapsed_time": "0:26:46", "remaining_time": "2:57:42", "throughput": 20043.02, "total_tokens": 32193024}
|
|
{"current_steps": 10230, "total_steps": 78105, "loss": 0.4498, "lr": 4.985416453587462e-06, "epoch": 0.6548876512387171, "percentage": 13.1, "elapsed_time": "0:26:46", "remaining_time": "2:57:41", "throughput": 20044.33, "total_tokens": 32208896}
|
|
{"current_steps": 10235, "total_steps": 78105, "loss": 0.4368, "lr": 4.985356137806591e-06, "epoch": 0.655207733179694, "percentage": 13.1, "elapsed_time": "0:26:47", "remaining_time": "2:57:40", "throughput": 20045.77, "total_tokens": 32225280}
|
|
{"current_steps": 10240, "total_steps": 78105, "loss": 0.4657, "lr": 4.98529569791998e-06, "epoch": 0.6555278151206709, "percentage": 13.11, "elapsed_time": "0:26:48", "remaining_time": "2:57:38", "throughput": 20047.0, "total_tokens": 32240768}
|
|
{"current_steps": 10245, "total_steps": 78105, "loss": 0.5121, "lr": 4.985235133930647e-06, "epoch": 0.6558478970616478, "percentage": 13.12, "elapsed_time": "0:26:48", "remaining_time": "2:57:37", "throughput": 20048.21, "total_tokens": 32255872}
|
|
{"current_steps": 10250, "total_steps": 78105, "loss": 0.5042, "lr": 4.9851744458416175e-06, "epoch": 0.6561679790026247, "percentage": 13.12, "elapsed_time": "0:26:49", "remaining_time": "2:57:35", "throughput": 20049.76, "total_tokens": 32272512}
|
|
{"current_steps": 10255, "total_steps": 78105, "loss": 0.5399, "lr": 4.98511363365592e-06, "epoch": 0.6564880609436016, "percentage": 13.13, "elapsed_time": "0:26:50", "remaining_time": "2:57:33", "throughput": 20050.62, "total_tokens": 32286272}
|
|
{"current_steps": 10260, "total_steps": 78105, "loss": 0.5322, "lr": 4.985052697376592e-06, "epoch": 0.6568081428845784, "percentage": 13.14, "elapsed_time": "0:26:50", "remaining_time": "2:57:32", "throughput": 20051.98, "total_tokens": 32301952}
|
|
{"current_steps": 10265, "total_steps": 78105, "loss": 0.6091, "lr": 4.984991637006677e-06, "epoch": 0.6571282248255553, "percentage": 13.14, "elapsed_time": "0:26:51", "remaining_time": "2:57:30", "throughput": 20053.37, "total_tokens": 32318208}
|
|
{"current_steps": 10270, "total_steps": 78105, "loss": 0.5593, "lr": 4.984930452549224e-06, "epoch": 0.6574483067665322, "percentage": 13.15, "elapsed_time": "0:26:52", "remaining_time": "2:57:29", "throughput": 20054.57, "total_tokens": 32333504}
|
|
{"current_steps": 10275, "total_steps": 78105, "loss": 0.5094, "lr": 4.984869144007286e-06, "epoch": 0.6577683887075091, "percentage": 13.16, "elapsed_time": "0:26:52", "remaining_time": "2:57:28", "throughput": 20056.22, "total_tokens": 32350208}
|
|
{"current_steps": 10280, "total_steps": 78105, "loss": 0.5346, "lr": 4.984807711383928e-06, "epoch": 0.658088470648486, "percentage": 13.16, "elapsed_time": "0:26:53", "remaining_time": "2:57:26", "throughput": 20057.45, "total_tokens": 32365376}
|
|
{"current_steps": 10285, "total_steps": 78105, "loss": 0.4705, "lr": 4.984746154682215e-06, "epoch": 0.6584085525894628, "percentage": 13.17, "elapsed_time": "0:26:54", "remaining_time": "2:57:24", "throughput": 20058.84, "total_tokens": 32381248}
|
|
{"current_steps": 10290, "total_steps": 78105, "loss": 0.5344, "lr": 4.984684473905221e-06, "epoch": 0.6587286345304398, "percentage": 13.17, "elapsed_time": "0:26:54", "remaining_time": "2:57:23", "throughput": 20059.89, "total_tokens": 32395968}
|
|
{"current_steps": 10295, "total_steps": 78105, "loss": 0.459, "lr": 4.9846226690560275e-06, "epoch": 0.6590487164714167, "percentage": 13.18, "elapsed_time": "0:26:55", "remaining_time": "2:57:21", "throughput": 20061.22, "total_tokens": 32411456}
|
|
{"current_steps": 10300, "total_steps": 78105, "loss": 0.4806, "lr": 4.984560740137719e-06, "epoch": 0.6593687984123936, "percentage": 13.19, "elapsed_time": "0:26:56", "remaining_time": "2:57:20", "throughput": 20062.51, "total_tokens": 32426880}
|
|
{"current_steps": 10305, "total_steps": 78105, "loss": 0.5004, "lr": 4.98449868715339e-06, "epoch": 0.6596888803533705, "percentage": 13.19, "elapsed_time": "0:26:56", "remaining_time": "2:57:18", "throughput": 20063.84, "total_tokens": 32442368}
|
|
{"current_steps": 10310, "total_steps": 78105, "loss": 0.5125, "lr": 4.984436510106137e-06, "epoch": 0.6600089622943474, "percentage": 13.2, "elapsed_time": "0:26:57", "remaining_time": "2:57:16", "throughput": 20065.09, "total_tokens": 32457920}
|
|
{"current_steps": 10315, "total_steps": 78105, "loss": 0.4846, "lr": 4.984374208999066e-06, "epoch": 0.6603290442353242, "percentage": 13.21, "elapsed_time": "0:26:58", "remaining_time": "2:57:15", "throughput": 20066.45, "total_tokens": 32473536}
|
|
{"current_steps": 10320, "total_steps": 78105, "loss": 0.4919, "lr": 4.984311783835287e-06, "epoch": 0.6606491261763011, "percentage": 13.21, "elapsed_time": "0:26:59", "remaining_time": "2:57:14", "throughput": 20067.95, "total_tokens": 32490240}
|
|
{"current_steps": 10325, "total_steps": 78105, "loss": 0.4298, "lr": 4.984249234617919e-06, "epoch": 0.660969208117278, "percentage": 13.22, "elapsed_time": "0:26:59", "remaining_time": "2:57:12", "throughput": 20069.16, "total_tokens": 32505728}
|
|
{"current_steps": 10330, "total_steps": 78105, "loss": 0.5663, "lr": 4.984186561350083e-06, "epoch": 0.6612892900582549, "percentage": 13.23, "elapsed_time": "0:27:00", "remaining_time": "2:57:11", "throughput": 20070.23, "total_tokens": 32520960}
|
|
{"current_steps": 10335, "total_steps": 78105, "loss": 0.5126, "lr": 4.98412376403491e-06, "epoch": 0.6616093719992318, "percentage": 13.23, "elapsed_time": "0:27:01", "remaining_time": "2:57:09", "throughput": 20071.38, "total_tokens": 32535872}
|
|
{"current_steps": 10340, "total_steps": 78105, "loss": 0.4722, "lr": 4.984060842675536e-06, "epoch": 0.6619294539402087, "percentage": 13.24, "elapsed_time": "0:27:01", "remaining_time": "2:57:08", "throughput": 20072.87, "total_tokens": 32552448}
|
|
{"current_steps": 10345, "total_steps": 78105, "loss": 0.4801, "lr": 4.983997797275103e-06, "epoch": 0.6622495358811856, "percentage": 13.24, "elapsed_time": "0:27:02", "remaining_time": "2:57:06", "throughput": 20073.92, "total_tokens": 32567232}
|
|
{"current_steps": 10350, "total_steps": 78105, "loss": 0.4348, "lr": 4.983934627836758e-06, "epoch": 0.6625696178221625, "percentage": 13.25, "elapsed_time": "0:27:03", "remaining_time": "2:57:05", "throughput": 20075.41, "total_tokens": 32583360}
|
|
{"current_steps": 10355, "total_steps": 78105, "loss": 0.4362, "lr": 4.983871334363656e-06, "epoch": 0.6628896997631394, "percentage": 13.26, "elapsed_time": "0:27:03", "remaining_time": "2:57:03", "throughput": 20076.68, "total_tokens": 32598656}
|
|
{"current_steps": 10360, "total_steps": 78105, "loss": 0.4203, "lr": 4.9838079168589574e-06, "epoch": 0.6632097817041163, "percentage": 13.26, "elapsed_time": "0:27:04", "remaining_time": "2:57:01", "throughput": 20077.9, "total_tokens": 32614144}
|
|
{"current_steps": 10365, "total_steps": 78105, "loss": 0.5201, "lr": 4.98374437532583e-06, "epoch": 0.6635298636450931, "percentage": 13.27, "elapsed_time": "0:27:05", "remaining_time": "2:57:00", "throughput": 20079.03, "total_tokens": 32629120}
|
|
{"current_steps": 10370, "total_steps": 78105, "loss": 0.6147, "lr": 4.9836807097674445e-06, "epoch": 0.66384994558607, "percentage": 13.28, "elapsed_time": "0:27:05", "remaining_time": "2:56:59", "throughput": 20080.62, "total_tokens": 32645952}
|
|
{"current_steps": 10375, "total_steps": 78105, "loss": 0.6087, "lr": 4.983616920186982e-06, "epoch": 0.6641700275270469, "percentage": 13.28, "elapsed_time": "0:27:06", "remaining_time": "2:56:57", "throughput": 20081.97, "total_tokens": 32661696}
|
|
{"current_steps": 10380, "total_steps": 78105, "loss": 0.5026, "lr": 4.983553006587627e-06, "epoch": 0.6644901094680238, "percentage": 13.29, "elapsed_time": "0:27:07", "remaining_time": "2:56:56", "throughput": 20083.39, "total_tokens": 32677760}
|
|
{"current_steps": 10385, "total_steps": 78105, "loss": 0.4457, "lr": 4.983488968972572e-06, "epoch": 0.6648101914090007, "percentage": 13.3, "elapsed_time": "0:27:07", "remaining_time": "2:56:54", "throughput": 20084.7, "total_tokens": 32693568}
|
|
{"current_steps": 10390, "total_steps": 78105, "loss": 0.4913, "lr": 4.983424807345013e-06, "epoch": 0.6651302733499775, "percentage": 13.3, "elapsed_time": "0:27:08", "remaining_time": "2:56:53", "throughput": 20085.93, "total_tokens": 32708736}
|
|
{"current_steps": 10395, "total_steps": 78105, "loss": 0.4128, "lr": 4.983360521708156e-06, "epoch": 0.6654503552909545, "percentage": 13.31, "elapsed_time": "0:27:09", "remaining_time": "2:56:51", "throughput": 20087.46, "total_tokens": 32725440}
|
|
{"current_steps": 10400, "total_steps": 78105, "loss": 0.4205, "lr": 4.9832961120652085e-06, "epoch": 0.6657704372319314, "percentage": 13.32, "elapsed_time": "0:27:09", "remaining_time": "2:56:50", "throughput": 20088.66, "total_tokens": 32740608}
|
|
{"current_steps": 10405, "total_steps": 78105, "loss": 0.6104, "lr": 4.983231578419389e-06, "epoch": 0.6660905191729083, "percentage": 13.32, "elapsed_time": "0:27:10", "remaining_time": "2:56:48", "throughput": 20089.93, "total_tokens": 32756416}
|
|
{"current_steps": 10410, "total_steps": 78105, "loss": 0.3898, "lr": 4.983166920773919e-06, "epoch": 0.6664106011138852, "percentage": 13.33, "elapsed_time": "0:27:11", "remaining_time": "2:56:47", "throughput": 20091.26, "total_tokens": 32771904}
|
|
{"current_steps": 10415, "total_steps": 78105, "loss": 0.6124, "lr": 4.983102139132027e-06, "epoch": 0.666730683054862, "percentage": 13.33, "elapsed_time": "0:27:11", "remaining_time": "2:56:45", "throughput": 20092.54, "total_tokens": 32787392}
|
|
{"current_steps": 10420, "total_steps": 78105, "loss": 0.4642, "lr": 4.983037233496948e-06, "epoch": 0.6670507649958389, "percentage": 13.34, "elapsed_time": "0:27:12", "remaining_time": "2:56:44", "throughput": 20093.9, "total_tokens": 32803136}
|
|
{"current_steps": 10425, "total_steps": 78105, "loss": 0.4817, "lr": 4.982972203871924e-06, "epoch": 0.6673708469368158, "percentage": 13.35, "elapsed_time": "0:27:13", "remaining_time": "2:56:42", "throughput": 20095.09, "total_tokens": 32818624}
|
|
{"current_steps": 10430, "total_steps": 78105, "loss": 0.4955, "lr": 4.9829070502602e-06, "epoch": 0.6676909288777927, "percentage": 13.35, "elapsed_time": "0:27:13", "remaining_time": "2:56:41", "throughput": 20096.36, "total_tokens": 32833920}
|
|
{"current_steps": 10435, "total_steps": 78105, "loss": 0.4401, "lr": 4.982841772665033e-06, "epoch": 0.6680110108187696, "percentage": 13.36, "elapsed_time": "0:27:14", "remaining_time": "2:56:39", "throughput": 20097.43, "total_tokens": 32848512}
|
|
{"current_steps": 10440, "total_steps": 78105, "loss": 0.4935, "lr": 4.9827763710896795e-06, "epoch": 0.6683310927597464, "percentage": 13.37, "elapsed_time": "0:27:15", "remaining_time": "2:56:38", "throughput": 20098.96, "total_tokens": 32865088}
|
|
{"current_steps": 10445, "total_steps": 78105, "loss": 0.6524, "lr": 4.982710845537406e-06, "epoch": 0.6686511747007234, "percentage": 13.37, "elapsed_time": "0:27:15", "remaining_time": "2:56:36", "throughput": 20100.58, "total_tokens": 32882048}
|
|
{"current_steps": 10450, "total_steps": 78105, "loss": 0.4655, "lr": 4.982645196011485e-06, "epoch": 0.6689712566417003, "percentage": 13.38, "elapsed_time": "0:27:16", "remaining_time": "2:56:35", "throughput": 20101.97, "total_tokens": 32898624}
|
|
{"current_steps": 10455, "total_steps": 78105, "loss": 0.6318, "lr": 4.9825794225151956e-06, "epoch": 0.6692913385826772, "percentage": 13.39, "elapsed_time": "0:27:17", "remaining_time": "2:56:34", "throughput": 20103.16, "total_tokens": 32914176}
|
|
{"current_steps": 10460, "total_steps": 78105, "loss": 0.4735, "lr": 4.98251352505182e-06, "epoch": 0.6696114205236541, "percentage": 13.39, "elapsed_time": "0:27:17", "remaining_time": "2:56:32", "throughput": 20104.74, "total_tokens": 32931136}
|
|
{"current_steps": 10465, "total_steps": 78105, "loss": 0.4761, "lr": 4.98244750362465e-06, "epoch": 0.669931502464631, "percentage": 13.4, "elapsed_time": "0:27:18", "remaining_time": "2:56:31", "throughput": 20106.17, "total_tokens": 32947648}
|
|
{"current_steps": 10470, "total_steps": 78105, "loss": 0.4036, "lr": 4.982381358236983e-06, "epoch": 0.6702515844056078, "percentage": 13.41, "elapsed_time": "0:27:19", "remaining_time": "2:56:30", "throughput": 20107.62, "total_tokens": 32964224}
|
|
{"current_steps": 10475, "total_steps": 78105, "loss": 0.6034, "lr": 4.98231508889212e-06, "epoch": 0.6705716663465847, "percentage": 13.41, "elapsed_time": "0:27:20", "remaining_time": "2:56:29", "throughput": 20109.45, "total_tokens": 32982080}
|
|
{"current_steps": 10480, "total_steps": 78105, "loss": 0.6726, "lr": 4.9822486955933725e-06, "epoch": 0.6708917482875616, "percentage": 13.42, "elapsed_time": "0:27:20", "remaining_time": "2:56:27", "throughput": 20110.63, "total_tokens": 32997440}
|
|
{"current_steps": 10485, "total_steps": 78105, "loss": 0.5825, "lr": 4.982182178344054e-06, "epoch": 0.6712118302285385, "percentage": 13.42, "elapsed_time": "0:27:21", "remaining_time": "2:56:26", "throughput": 20111.9, "total_tokens": 33013056}
|
|
{"current_steps": 10490, "total_steps": 78105, "loss": 0.5856, "lr": 4.982115537147487e-06, "epoch": 0.6715319121695154, "percentage": 13.43, "elapsed_time": "0:27:22", "remaining_time": "2:56:24", "throughput": 20113.03, "total_tokens": 33028416}
|
|
{"current_steps": 10495, "total_steps": 78105, "loss": 0.5513, "lr": 4.982048772006998e-06, "epoch": 0.6718519941104922, "percentage": 13.44, "elapsed_time": "0:27:22", "remaining_time": "2:56:22", "throughput": 20114.02, "total_tokens": 33042880}
|
|
{"current_steps": 10500, "total_steps": 78105, "loss": 0.408, "lr": 4.981981882925924e-06, "epoch": 0.6721720760514692, "percentage": 13.44, "elapsed_time": "0:27:23", "remaining_time": "2:56:21", "throughput": 20115.19, "total_tokens": 33058240}
|
|
{"current_steps": 10505, "total_steps": 78105, "loss": 0.5072, "lr": 4.981914869907601e-06, "epoch": 0.6724921579924461, "percentage": 13.45, "elapsed_time": "0:27:24", "remaining_time": "2:56:19", "throughput": 20116.46, "total_tokens": 33073920}
|
|
{"current_steps": 10510, "total_steps": 78105, "loss": 0.4809, "lr": 4.981847732955378e-06, "epoch": 0.672812239933423, "percentage": 13.46, "elapsed_time": "0:27:24", "remaining_time": "2:56:18", "throughput": 20117.65, "total_tokens": 33089344}
|
|
{"current_steps": 10515, "total_steps": 78105, "loss": 0.477, "lr": 4.9817804720726074e-06, "epoch": 0.6731323218743999, "percentage": 13.46, "elapsed_time": "0:27:25", "remaining_time": "2:56:16", "throughput": 20118.74, "total_tokens": 33104384}
|
|
{"current_steps": 10520, "total_steps": 78105, "loss": 0.5688, "lr": 4.981713087262646e-06, "epoch": 0.6734524038153767, "percentage": 13.47, "elapsed_time": "0:27:26", "remaining_time": "2:56:15", "throughput": 20119.98, "total_tokens": 33120064}
|
|
{"current_steps": 10525, "total_steps": 78105, "loss": 0.5787, "lr": 4.98164557852886e-06, "epoch": 0.6737724857563536, "percentage": 13.48, "elapsed_time": "0:27:26", "remaining_time": "2:56:14", "throughput": 20121.27, "total_tokens": 33136128}
|
|
{"current_steps": 10530, "total_steps": 78105, "loss": 0.4445, "lr": 4.981577945874621e-06, "epoch": 0.6740925676973305, "percentage": 13.48, "elapsed_time": "0:27:27", "remaining_time": "2:56:13", "throughput": 20123.19, "total_tokens": 33154816}
|
|
{"current_steps": 10535, "total_steps": 78105, "loss": 0.6317, "lr": 4.981510189303305e-06, "epoch": 0.6744126496383074, "percentage": 13.49, "elapsed_time": "0:27:28", "remaining_time": "2:56:11", "throughput": 20124.35, "total_tokens": 33169920}
|
|
{"current_steps": 10540, "total_steps": 78105, "loss": 0.7115, "lr": 4.981442308818295e-06, "epoch": 0.6747327315792843, "percentage": 13.49, "elapsed_time": "0:27:28", "remaining_time": "2:56:10", "throughput": 20126.13, "total_tokens": 33187712}
|
|
{"current_steps": 10545, "total_steps": 78105, "loss": 0.6101, "lr": 4.981374304422982e-06, "epoch": 0.6750528135202611, "percentage": 13.5, "elapsed_time": "0:27:29", "remaining_time": "2:56:09", "throughput": 20127.24, "total_tokens": 33202880}
|
|
{"current_steps": 10550, "total_steps": 78105, "loss": 0.4095, "lr": 4.981306176120761e-06, "epoch": 0.6753728954612381, "percentage": 13.51, "elapsed_time": "0:27:30", "remaining_time": "2:56:07", "throughput": 20128.62, "total_tokens": 33218944}
|
|
{"current_steps": 10555, "total_steps": 78105, "loss": 0.5573, "lr": 4.981237923915036e-06, "epoch": 0.675692977402215, "percentage": 13.51, "elapsed_time": "0:27:31", "remaining_time": "2:56:06", "throughput": 20129.85, "total_tokens": 33234688}
|
|
{"current_steps": 10560, "total_steps": 78105, "loss": 0.4964, "lr": 4.981169547809212e-06, "epoch": 0.6760130593431919, "percentage": 13.52, "elapsed_time": "0:27:31", "remaining_time": "2:56:04", "throughput": 20131.04, "total_tokens": 33249920}
|
|
{"current_steps": 10565, "total_steps": 78105, "loss": 0.4489, "lr": 4.981101047806704e-06, "epoch": 0.6763331412841688, "percentage": 13.53, "elapsed_time": "0:27:32", "remaining_time": "2:56:03", "throughput": 20132.35, "total_tokens": 33265792}
|
|
{"current_steps": 10570, "total_steps": 78105, "loss": 0.5544, "lr": 4.981032423910934e-06, "epoch": 0.6766532232251457, "percentage": 13.53, "elapsed_time": "0:27:33", "remaining_time": "2:56:01", "throughput": 20133.56, "total_tokens": 33280832}
|
|
{"current_steps": 10575, "total_steps": 78105, "loss": 0.4573, "lr": 4.980963676125328e-06, "epoch": 0.6769733051661225, "percentage": 13.54, "elapsed_time": "0:27:33", "remaining_time": "2:56:00", "throughput": 20134.88, "total_tokens": 33296576}
|
|
{"current_steps": 10580, "total_steps": 78105, "loss": 0.4879, "lr": 4.980894804453319e-06, "epoch": 0.6772933871070994, "percentage": 13.55, "elapsed_time": "0:27:34", "remaining_time": "2:55:59", "throughput": 20137.09, "total_tokens": 33316672}
|
|
{"current_steps": 10585, "total_steps": 78105, "loss": 0.5271, "lr": 4.980825808898346e-06, "epoch": 0.6776134690480763, "percentage": 13.55, "elapsed_time": "0:27:35", "remaining_time": "2:55:57", "throughput": 20138.12, "total_tokens": 33331456}
|
|
{"current_steps": 10590, "total_steps": 78105, "loss": 0.4008, "lr": 4.980756689463854e-06, "epoch": 0.6779335509890532, "percentage": 13.56, "elapsed_time": "0:27:35", "remaining_time": "2:55:56", "throughput": 20139.26, "total_tokens": 33346752}
|
|
{"current_steps": 10595, "total_steps": 78105, "loss": 0.5284, "lr": 4.980687446153294e-06, "epoch": 0.67825363293003, "percentage": 13.57, "elapsed_time": "0:27:36", "remaining_time": "2:55:54", "throughput": 20140.49, "total_tokens": 33362688}
|
|
{"current_steps": 10600, "total_steps": 78105, "loss": 0.4703, "lr": 4.980618078970125e-06, "epoch": 0.6785737148710069, "percentage": 13.57, "elapsed_time": "0:27:37", "remaining_time": "2:55:53", "throughput": 20141.51, "total_tokens": 33377600}
|
|
{"current_steps": 10605, "total_steps": 78105, "loss": 0.6389, "lr": 4.98054858791781e-06, "epoch": 0.6788937968119839, "percentage": 13.58, "elapsed_time": "0:27:37", "remaining_time": "2:55:51", "throughput": 20142.69, "total_tokens": 33393280}
|
|
{"current_steps": 10610, "total_steps": 78105, "loss": 0.5023, "lr": 4.98047897299982e-06, "epoch": 0.6792138787529608, "percentage": 13.58, "elapsed_time": "0:27:38", "remaining_time": "2:55:50", "throughput": 20144.25, "total_tokens": 33410112}
|
|
{"current_steps": 10615, "total_steps": 78105, "loss": 0.494, "lr": 4.98040923421963e-06, "epoch": 0.6795339606939377, "percentage": 13.59, "elapsed_time": "0:27:39", "remaining_time": "2:55:49", "throughput": 20145.75, "total_tokens": 33426624}
|
|
{"current_steps": 10620, "total_steps": 78105, "loss": 0.5107, "lr": 4.980339371580723e-06, "epoch": 0.6798540426349146, "percentage": 13.6, "elapsed_time": "0:27:39", "remaining_time": "2:55:47", "throughput": 20147.03, "total_tokens": 33442240}
|
|
{"current_steps": 10625, "total_steps": 78105, "loss": 0.6039, "lr": 4.980269385086587e-06, "epoch": 0.6801741245758914, "percentage": 13.6, "elapsed_time": "0:27:40", "remaining_time": "2:55:46", "throughput": 20148.36, "total_tokens": 33458432}
|
|
{"current_steps": 10630, "total_steps": 78105, "loss": 0.6173, "lr": 4.9801992747407175e-06, "epoch": 0.6804942065168683, "percentage": 13.61, "elapsed_time": "0:27:41", "remaining_time": "2:55:45", "throughput": 20149.58, "total_tokens": 33473856}
|
|
{"current_steps": 10635, "total_steps": 78105, "loss": 0.5634, "lr": 4.980129040546615e-06, "epoch": 0.6808142884578452, "percentage": 13.62, "elapsed_time": "0:27:41", "remaining_time": "2:55:43", "throughput": 20150.73, "total_tokens": 33489216}
|
|
{"current_steps": 10640, "total_steps": 78105, "loss": 0.627, "lr": 4.980058682507787e-06, "epoch": 0.6811343703988221, "percentage": 13.62, "elapsed_time": "0:27:42", "remaining_time": "2:55:42", "throughput": 20152.07, "total_tokens": 33505280}
|
|
{"current_steps": 10645, "total_steps": 78105, "loss": 0.4155, "lr": 4.9799882006277464e-06, "epoch": 0.681454452339799, "percentage": 13.63, "elapsed_time": "0:27:43", "remaining_time": "2:55:40", "throughput": 20153.2, "total_tokens": 33520512}
|
|
{"current_steps": 10650, "total_steps": 78105, "loss": 0.5237, "lr": 4.979917594910012e-06, "epoch": 0.6817745342807758, "percentage": 13.64, "elapsed_time": "0:27:43", "remaining_time": "2:55:39", "throughput": 20154.68, "total_tokens": 33537344}
|
|
{"current_steps": 10655, "total_steps": 78105, "loss": 0.4356, "lr": 4.979846865358111e-06, "epoch": 0.6820946162217527, "percentage": 13.64, "elapsed_time": "0:27:44", "remaining_time": "2:55:37", "throughput": 20155.7, "total_tokens": 33552128}
|
|
{"current_steps": 10660, "total_steps": 78105, "loss": 0.425, "lr": 4.979776011975575e-06, "epoch": 0.6824146981627297, "percentage": 13.65, "elapsed_time": "0:27:45", "remaining_time": "2:55:36", "throughput": 20156.76, "total_tokens": 33566784}
|
|
{"current_steps": 10665, "total_steps": 78105, "loss": 0.4357, "lr": 4.979705034765942e-06, "epoch": 0.6827347801037066, "percentage": 13.65, "elapsed_time": "0:27:45", "remaining_time": "2:55:34", "throughput": 20157.97, "total_tokens": 33582080}
|
|
{"current_steps": 10670, "total_steps": 78105, "loss": 0.5631, "lr": 4.9796339337327545e-06, "epoch": 0.6830548620446835, "percentage": 13.66, "elapsed_time": "0:27:46", "remaining_time": "2:55:32", "throughput": 20159.0, "total_tokens": 33596864}
|
|
{"current_steps": 10675, "total_steps": 78105, "loss": 0.4965, "lr": 4.979562708879565e-06, "epoch": 0.6833749439856603, "percentage": 13.67, "elapsed_time": "0:27:47", "remaining_time": "2:55:31", "throughput": 20160.31, "total_tokens": 33613056}
|
|
{"current_steps": 10680, "total_steps": 78105, "loss": 0.582, "lr": 4.979491360209929e-06, "epoch": 0.6836950259266372, "percentage": 13.67, "elapsed_time": "0:27:47", "remaining_time": "2:55:30", "throughput": 20161.4, "total_tokens": 33628160}
|
|
{"current_steps": 10685, "total_steps": 78105, "loss": 0.4595, "lr": 4.97941988772741e-06, "epoch": 0.6840151078676141, "percentage": 13.68, "elapsed_time": "0:27:48", "remaining_time": "2:55:28", "throughput": 20163.01, "total_tokens": 33645184}
|
|
{"current_steps": 10690, "total_steps": 78105, "loss": 0.5501, "lr": 4.979348291435577e-06, "epoch": 0.684335189808591, "percentage": 13.69, "elapsed_time": "0:27:49", "remaining_time": "2:55:27", "throughput": 20164.17, "total_tokens": 33660800}
|
|
{"current_steps": 10695, "total_steps": 78105, "loss": 0.4411, "lr": 4.979276571338004e-06, "epoch": 0.6846552717495679, "percentage": 13.69, "elapsed_time": "0:27:50", "remaining_time": "2:55:26", "throughput": 20165.5, "total_tokens": 33676736}
|
|
{"current_steps": 10700, "total_steps": 78105, "loss": 0.6194, "lr": 4.979204727438274e-06, "epoch": 0.6849753536905447, "percentage": 13.7, "elapsed_time": "0:27:50", "remaining_time": "2:55:24", "throughput": 20166.79, "total_tokens": 33692928}
|
|
{"current_steps": 10705, "total_steps": 78105, "loss": 0.5011, "lr": 4.979132759739973e-06, "epoch": 0.6852954356315216, "percentage": 13.71, "elapsed_time": "0:27:51", "remaining_time": "2:55:23", "throughput": 20167.98, "total_tokens": 33708352}
|
|
{"current_steps": 10710, "total_steps": 78105, "loss": 0.5775, "lr": 4.979060668246696e-06, "epoch": 0.6856155175724986, "percentage": 13.71, "elapsed_time": "0:27:52", "remaining_time": "2:55:21", "throughput": 20169.21, "total_tokens": 33723968}
|
|
{"current_steps": 10715, "total_steps": 78105, "loss": 0.5769, "lr": 4.9789884529620415e-06, "epoch": 0.6859355995134755, "percentage": 13.72, "elapsed_time": "0:27:52", "remaining_time": "2:55:20", "throughput": 20170.34, "total_tokens": 33739200}
|
|
{"current_steps": 10720, "total_steps": 78105, "loss": 0.5149, "lr": 4.978916113889616e-06, "epoch": 0.6862556814544524, "percentage": 13.73, "elapsed_time": "0:27:53", "remaining_time": "2:55:18", "throughput": 20171.57, "total_tokens": 33754944}
|
|
{"current_steps": 10725, "total_steps": 78105, "loss": 0.4469, "lr": 4.978843651033032e-06, "epoch": 0.6865757633954293, "percentage": 13.73, "elapsed_time": "0:27:54", "remaining_time": "2:55:17", "throughput": 20172.54, "total_tokens": 33769472}
|
|
{"current_steps": 10730, "total_steps": 78105, "loss": 0.55, "lr": 4.978771064395909e-06, "epoch": 0.6868958453364061, "percentage": 13.74, "elapsed_time": "0:27:54", "remaining_time": "2:55:15", "throughput": 20173.69, "total_tokens": 33784896}
|
|
{"current_steps": 10735, "total_steps": 78105, "loss": 0.4415, "lr": 4.978698353981869e-06, "epoch": 0.687215927277383, "percentage": 13.74, "elapsed_time": "0:27:55", "remaining_time": "2:55:14", "throughput": 20174.79, "total_tokens": 33800320}
|
|
{"current_steps": 10740, "total_steps": 78105, "loss": 0.5544, "lr": 4.978625519794546e-06, "epoch": 0.6875360092183599, "percentage": 13.75, "elapsed_time": "0:27:56", "remaining_time": "2:55:12", "throughput": 20176.25, "total_tokens": 33816896}
|
|
{"current_steps": 10745, "total_steps": 78105, "loss": 0.6134, "lr": 4.9785525618375735e-06, "epoch": 0.6878560911593368, "percentage": 13.76, "elapsed_time": "0:27:56", "remaining_time": "2:55:11", "throughput": 20177.64, "total_tokens": 33833344}
|
|
{"current_steps": 10750, "total_steps": 78105, "loss": 0.5297, "lr": 4.978479480114598e-06, "epoch": 0.6881761731003136, "percentage": 13.76, "elapsed_time": "0:27:57", "remaining_time": "2:55:10", "throughput": 20179.05, "total_tokens": 33849856}
|
|
{"current_steps": 10755, "total_steps": 78105, "loss": 0.5039, "lr": 4.978406274629265e-06, "epoch": 0.6884962550412905, "percentage": 13.77, "elapsed_time": "0:27:58", "remaining_time": "2:55:08", "throughput": 20180.24, "total_tokens": 33865344}
|
|
{"current_steps": 10760, "total_steps": 78105, "loss": 0.5573, "lr": 4.978332945385234e-06, "epoch": 0.6888163369822674, "percentage": 13.78, "elapsed_time": "0:27:58", "remaining_time": "2:55:07", "throughput": 20181.74, "total_tokens": 33881792}
|
|
{"current_steps": 10765, "total_steps": 78105, "loss": 0.6204, "lr": 4.978259492386164e-06, "epoch": 0.6891364189232444, "percentage": 13.78, "elapsed_time": "0:27:59", "remaining_time": "2:55:06", "throughput": 20183.19, "total_tokens": 33898368}
|
|
{"current_steps": 10770, "total_steps": 78105, "loss": 0.5, "lr": 4.978185915635725e-06, "epoch": 0.6894565008642213, "percentage": 13.79, "elapsed_time": "0:28:00", "remaining_time": "2:55:04", "throughput": 20184.47, "total_tokens": 33914368}
|
|
{"current_steps": 10775, "total_steps": 78105, "loss": 0.4288, "lr": 4.978112215137588e-06, "epoch": 0.6897765828051982, "percentage": 13.8, "elapsed_time": "0:28:00", "remaining_time": "2:55:03", "throughput": 20185.61, "total_tokens": 33929728}
|
|
{"current_steps": 10780, "total_steps": 78105, "loss": 0.4813, "lr": 4.978038390895437e-06, "epoch": 0.690096664746175, "percentage": 13.8, "elapsed_time": "0:28:01", "remaining_time": "2:55:01", "throughput": 20186.63, "total_tokens": 33944704}
|
|
{"current_steps": 10785, "total_steps": 78105, "loss": 0.7686, "lr": 4.977964442912955e-06, "epoch": 0.6904167466871519, "percentage": 13.81, "elapsed_time": "0:28:02", "remaining_time": "2:55:00", "throughput": 20187.81, "total_tokens": 33960128}
|
|
{"current_steps": 10790, "total_steps": 78105, "loss": 0.6656, "lr": 4.977890371193836e-06, "epoch": 0.6907368286281288, "percentage": 13.81, "elapsed_time": "0:28:02", "remaining_time": "2:54:58", "throughput": 20188.79, "total_tokens": 33975104}
|
|
{"current_steps": 10795, "total_steps": 78105, "loss": 0.5917, "lr": 4.977816175741779e-06, "epoch": 0.6910569105691057, "percentage": 13.82, "elapsed_time": "0:28:03", "remaining_time": "2:54:57", "throughput": 20190.01, "total_tokens": 33990720}
|
|
{"current_steps": 10800, "total_steps": 78105, "loss": 0.5768, "lr": 4.9777418565604885e-06, "epoch": 0.6913769925100826, "percentage": 13.83, "elapsed_time": "0:28:04", "remaining_time": "2:54:55", "throughput": 20191.14, "total_tokens": 34006272}
|
|
{"current_steps": 10805, "total_steps": 78105, "loss": 0.4049, "lr": 4.977667413653676e-06, "epoch": 0.6916970744510594, "percentage": 13.83, "elapsed_time": "0:28:04", "remaining_time": "2:54:54", "throughput": 20192.46, "total_tokens": 34022400}
|
|
{"current_steps": 10810, "total_steps": 78105, "loss": 0.4865, "lr": 4.977592847025058e-06, "epoch": 0.6920171563920363, "percentage": 13.84, "elapsed_time": "0:28:05", "remaining_time": "2:54:53", "throughput": 20193.82, "total_tokens": 34038720}
|
|
{"current_steps": 10815, "total_steps": 78105, "loss": 0.5691, "lr": 4.9775181566783595e-06, "epoch": 0.6923372383330133, "percentage": 13.85, "elapsed_time": "0:28:06", "remaining_time": "2:54:51", "throughput": 20194.94, "total_tokens": 34053760}
|
|
{"current_steps": 10820, "total_steps": 78105, "loss": 0.396, "lr": 4.977443342617308e-06, "epoch": 0.6926573202739902, "percentage": 13.85, "elapsed_time": "0:28:06", "remaining_time": "2:54:50", "throughput": 20196.19, "total_tokens": 34069568}
|
|
{"current_steps": 10825, "total_steps": 78105, "loss": 0.4147, "lr": 4.977368404845641e-06, "epoch": 0.6929774022149671, "percentage": 13.86, "elapsed_time": "0:28:07", "remaining_time": "2:54:49", "throughput": 20197.76, "total_tokens": 34086912}
|
|
{"current_steps": 10830, "total_steps": 78105, "loss": 0.4735, "lr": 4.9772933433671006e-06, "epoch": 0.6932974841559439, "percentage": 13.87, "elapsed_time": "0:28:08", "remaining_time": "2:54:47", "throughput": 20198.79, "total_tokens": 34101696}
|
|
{"current_steps": 10835, "total_steps": 78105, "loss": 0.4479, "lr": 4.977218158185434e-06, "epoch": 0.6936175660969208, "percentage": 13.87, "elapsed_time": "0:28:08", "remaining_time": "2:54:45", "throughput": 20199.65, "total_tokens": 34115904}
|
|
{"current_steps": 10840, "total_steps": 78105, "loss": 0.6332, "lr": 4.977142849304396e-06, "epoch": 0.6939376480378977, "percentage": 13.88, "elapsed_time": "0:28:09", "remaining_time": "2:54:44", "throughput": 20200.83, "total_tokens": 34131328}
|
|
{"current_steps": 10845, "total_steps": 78105, "loss": 0.5561, "lr": 4.977067416727747e-06, "epoch": 0.6942577299788746, "percentage": 13.89, "elapsed_time": "0:28:10", "remaining_time": "2:54:43", "throughput": 20202.34, "total_tokens": 34148544}
|
|
{"current_steps": 10850, "total_steps": 78105, "loss": 0.5371, "lr": 4.976991860459254e-06, "epoch": 0.6945778119198515, "percentage": 13.89, "elapsed_time": "0:28:10", "remaining_time": "2:54:41", "throughput": 20203.43, "total_tokens": 34163520}
|
|
{"current_steps": 10855, "total_steps": 78105, "loss": 0.4885, "lr": 4.976916180502689e-06, "epoch": 0.6948978938608283, "percentage": 13.9, "elapsed_time": "0:28:11", "remaining_time": "2:54:40", "throughput": 20204.5, "total_tokens": 34178368}
|
|
{"current_steps": 10860, "total_steps": 78105, "loss": 0.6393, "lr": 4.976840376861832e-06, "epoch": 0.6952179758018052, "percentage": 13.9, "elapsed_time": "0:28:12", "remaining_time": "2:54:38", "throughput": 20205.69, "total_tokens": 34193920}
|
|
{"current_steps": 10865, "total_steps": 78105, "loss": 0.6128, "lr": 4.976764449540468e-06, "epoch": 0.6955380577427821, "percentage": 13.91, "elapsed_time": "0:28:12", "remaining_time": "2:54:37", "throughput": 20207.04, "total_tokens": 34210176}
|
|
{"current_steps": 10870, "total_steps": 78105, "loss": 0.5699, "lr": 4.976688398542389e-06, "epoch": 0.6958581396837591, "percentage": 13.92, "elapsed_time": "0:28:13", "remaining_time": "2:54:36", "throughput": 20208.6, "total_tokens": 34227328}
|
|
{"current_steps": 10875, "total_steps": 78105, "loss": 0.4597, "lr": 4.976612223871391e-06, "epoch": 0.696178221624736, "percentage": 13.92, "elapsed_time": "0:28:14", "remaining_time": "2:54:34", "throughput": 20209.46, "total_tokens": 34241984}
|
|
{"current_steps": 10880, "total_steps": 78105, "loss": 0.4676, "lr": 4.976535925531279e-06, "epoch": 0.6964983035657129, "percentage": 13.93, "elapsed_time": "0:28:15", "remaining_time": "2:54:33", "throughput": 20210.4, "total_tokens": 34256896}
|
|
{"current_steps": 10885, "total_steps": 78105, "loss": 0.5493, "lr": 4.9764595035258625e-06, "epoch": 0.6968183855066897, "percentage": 13.94, "elapsed_time": "0:28:15", "remaining_time": "2:54:31", "throughput": 20211.71, "total_tokens": 34273792}
|
|
{"current_steps": 10890, "total_steps": 78105, "loss": 0.5085, "lr": 4.9763829578589586e-06, "epoch": 0.6971384674476666, "percentage": 13.94, "elapsed_time": "0:28:16", "remaining_time": "2:54:30", "throughput": 20213.07, "total_tokens": 34290752}
|
|
{"current_steps": 10895, "total_steps": 78105, "loss": 0.5571, "lr": 4.976306288534388e-06, "epoch": 0.6974585493886435, "percentage": 13.95, "elapsed_time": "0:28:17", "remaining_time": "2:54:29", "throughput": 20214.67, "total_tokens": 34308416}
|
|
{"current_steps": 10900, "total_steps": 78105, "loss": 0.7076, "lr": 4.976229495555981e-06, "epoch": 0.6977786313296204, "percentage": 13.96, "elapsed_time": "0:28:17", "remaining_time": "2:54:28", "throughput": 20215.75, "total_tokens": 34323776}
|
|
{"current_steps": 10905, "total_steps": 78105, "loss": 0.4328, "lr": 4.97615257892757e-06, "epoch": 0.6980987132705972, "percentage": 13.96, "elapsed_time": "0:28:18", "remaining_time": "2:54:26", "throughput": 20216.78, "total_tokens": 34338944}
|
|
{"current_steps": 10910, "total_steps": 78105, "loss": 0.4736, "lr": 4.976075538652998e-06, "epoch": 0.6984187952115741, "percentage": 13.97, "elapsed_time": "0:28:19", "remaining_time": "2:54:25", "throughput": 20217.9, "total_tokens": 34354432}
|
|
{"current_steps": 10915, "total_steps": 78105, "loss": 0.4492, "lr": 4.97599837473611e-06, "epoch": 0.698738877152551, "percentage": 13.97, "elapsed_time": "0:28:19", "remaining_time": "2:54:24", "throughput": 20219.04, "total_tokens": 34370432}
|
|
{"current_steps": 10920, "total_steps": 78105, "loss": 0.5545, "lr": 4.975921087180761e-06, "epoch": 0.699058959093528, "percentage": 13.98, "elapsed_time": "0:28:20", "remaining_time": "2:54:22", "throughput": 20220.21, "total_tokens": 34386496}
|
|
{"current_steps": 10925, "total_steps": 78105, "loss": 0.5357, "lr": 4.975843675990809e-06, "epoch": 0.6993790410345049, "percentage": 13.99, "elapsed_time": "0:28:21", "remaining_time": "2:54:21", "throughput": 20221.52, "total_tokens": 34402560}
|
|
{"current_steps": 10930, "total_steps": 78105, "loss": 0.4802, "lr": 4.97576614117012e-06, "epoch": 0.6996991229754818, "percentage": 13.99, "elapsed_time": "0:28:21", "remaining_time": "2:54:20", "throughput": 20222.74, "total_tokens": 34418496}
|
|
{"current_steps": 10935, "total_steps": 78105, "loss": 0.494, "lr": 4.9756884827225665e-06, "epoch": 0.7000192049164586, "percentage": 14.0, "elapsed_time": "0:28:22", "remaining_time": "2:54:18", "throughput": 20223.96, "total_tokens": 34434752}
|
|
{"current_steps": 10940, "total_steps": 78105, "loss": 0.5727, "lr": 4.9756107006520245e-06, "epoch": 0.7003392868574355, "percentage": 14.01, "elapsed_time": "0:28:23", "remaining_time": "2:54:17", "throughput": 20225.08, "total_tokens": 34450176}
|
|
{"current_steps": 10945, "total_steps": 78105, "loss": 0.5875, "lr": 4.9755327949623796e-06, "epoch": 0.7006593687984124, "percentage": 14.01, "elapsed_time": "0:28:24", "remaining_time": "2:54:15", "throughput": 20226.27, "total_tokens": 34465600}
|
|
{"current_steps": 10950, "total_steps": 78105, "loss": 0.4471, "lr": 4.975454765657522e-06, "epoch": 0.7009794507393893, "percentage": 14.02, "elapsed_time": "0:28:24", "remaining_time": "2:54:14", "throughput": 20227.46, "total_tokens": 34481600}
|
|
{"current_steps": 10955, "total_steps": 78105, "loss": 0.4347, "lr": 4.975376612741347e-06, "epoch": 0.7012995326803662, "percentage": 14.03, "elapsed_time": "0:28:25", "remaining_time": "2:54:13", "throughput": 20229.02, "total_tokens": 34498752}
|
|
{"current_steps": 10960, "total_steps": 78105, "loss": 0.5944, "lr": 4.975298336217757e-06, "epoch": 0.701619614621343, "percentage": 14.03, "elapsed_time": "0:28:26", "remaining_time": "2:54:12", "throughput": 20230.2, "total_tokens": 34514240}
|
|
{"current_steps": 10965, "total_steps": 78105, "loss": 0.5639, "lr": 4.975219936090664e-06, "epoch": 0.7019396965623199, "percentage": 14.04, "elapsed_time": "0:28:26", "remaining_time": "2:54:10", "throughput": 20231.11, "total_tokens": 34528704}
|
|
{"current_steps": 10970, "total_steps": 78105, "loss": 0.5451, "lr": 4.9751414123639776e-06, "epoch": 0.7022597785032968, "percentage": 14.05, "elapsed_time": "0:28:27", "remaining_time": "2:54:09", "throughput": 20232.38, "total_tokens": 34544704}
|
|
{"current_steps": 10975, "total_steps": 78105, "loss": 0.5939, "lr": 4.975062765041623e-06, "epoch": 0.7025798604442738, "percentage": 14.05, "elapsed_time": "0:28:28", "remaining_time": "2:54:07", "throughput": 20233.39, "total_tokens": 34560000}
|
|
{"current_steps": 10980, "total_steps": 78105, "loss": 0.4441, "lr": 4.974983994127526e-06, "epoch": 0.7028999423852507, "percentage": 14.06, "elapsed_time": "0:28:28", "remaining_time": "2:54:06", "throughput": 20234.52, "total_tokens": 34575296}
|
|
{"current_steps": 10985, "total_steps": 78105, "loss": 0.493, "lr": 4.974905099625619e-06, "epoch": 0.7032200243262275, "percentage": 14.06, "elapsed_time": "0:28:29", "remaining_time": "2:54:04", "throughput": 20235.54, "total_tokens": 34590528}
|
|
{"current_steps": 10990, "total_steps": 78105, "loss": 0.5594, "lr": 4.974826081539843e-06, "epoch": 0.7035401062672044, "percentage": 14.07, "elapsed_time": "0:28:30", "remaining_time": "2:54:03", "throughput": 20236.53, "total_tokens": 34605312}
|
|
{"current_steps": 10995, "total_steps": 78105, "loss": 0.5058, "lr": 4.974746939874144e-06, "epoch": 0.7038601882081813, "percentage": 14.08, "elapsed_time": "0:28:30", "remaining_time": "2:54:01", "throughput": 20237.65, "total_tokens": 34620736}
|
|
{"current_steps": 11000, "total_steps": 78105, "loss": 0.5211, "lr": 4.974667674632474e-06, "epoch": 0.7041802701491582, "percentage": 14.08, "elapsed_time": "0:28:31", "remaining_time": "2:54:00", "throughput": 20238.88, "total_tokens": 34636736}
|
|
{"current_steps": 11005, "total_steps": 78105, "loss": 0.5566, "lr": 4.974588285818789e-06, "epoch": 0.7045003520901351, "percentage": 14.09, "elapsed_time": "0:28:32", "remaining_time": "2:53:58", "throughput": 20239.91, "total_tokens": 34651776}
|
|
{"current_steps": 11010, "total_steps": 78105, "loss": 0.4289, "lr": 4.9745087734370545e-06, "epoch": 0.7048204340311119, "percentage": 14.1, "elapsed_time": "0:28:32", "remaining_time": "2:53:57", "throughput": 20240.8, "total_tokens": 34666176}
|
|
{"current_steps": 11015, "total_steps": 78105, "loss": 0.3934, "lr": 4.974429137491242e-06, "epoch": 0.7051405159720888, "percentage": 14.1, "elapsed_time": "0:28:33", "remaining_time": "2:53:55", "throughput": 20242.09, "total_tokens": 34682112}
|
|
{"current_steps": 11020, "total_steps": 78105, "loss": 0.5075, "lr": 4.974349377985327e-06, "epoch": 0.7054605979130657, "percentage": 14.11, "elapsed_time": "0:28:34", "remaining_time": "2:53:54", "throughput": 20243.09, "total_tokens": 34697024}
|
|
{"current_steps": 11025, "total_steps": 78105, "loss": 0.5785, "lr": 4.974269494923293e-06, "epoch": 0.7057806798540426, "percentage": 14.12, "elapsed_time": "0:28:34", "remaining_time": "2:53:52", "throughput": 20244.23, "total_tokens": 34712704}
|
|
{"current_steps": 11030, "total_steps": 78105, "loss": 0.3854, "lr": 4.974189488309128e-06, "epoch": 0.7061007617950196, "percentage": 14.12, "elapsed_time": "0:28:35", "remaining_time": "2:53:51", "throughput": 20245.33, "total_tokens": 34728256}
|
|
{"current_steps": 11035, "total_steps": 78105, "loss": 0.4843, "lr": 4.974109358146827e-06, "epoch": 0.7064208437359965, "percentage": 14.13, "elapsed_time": "0:28:36", "remaining_time": "2:53:50", "throughput": 20246.61, "total_tokens": 34744512}
|
|
{"current_steps": 11040, "total_steps": 78105, "loss": 0.5917, "lr": 4.974029104440392e-06, "epoch": 0.7067409256769733, "percentage": 14.13, "elapsed_time": "0:28:36", "remaining_time": "2:53:48", "throughput": 20247.5, "total_tokens": 34758912}
|
|
{"current_steps": 11045, "total_steps": 78105, "loss": 0.4637, "lr": 4.97394872719383e-06, "epoch": 0.7070610076179502, "percentage": 14.14, "elapsed_time": "0:28:37", "remaining_time": "2:53:47", "throughput": 20248.78, "total_tokens": 34774848}
|
|
{"current_steps": 11050, "total_steps": 78105, "loss": 0.5963, "lr": 4.973868226411156e-06, "epoch": 0.7073810895589271, "percentage": 14.15, "elapsed_time": "0:28:38", "remaining_time": "2:53:45", "throughput": 20249.87, "total_tokens": 34790016}
|
|
{"current_steps": 11055, "total_steps": 78105, "loss": 0.5213, "lr": 4.9737876020963875e-06, "epoch": 0.707701171499904, "percentage": 14.15, "elapsed_time": "0:28:38", "remaining_time": "2:53:44", "throughput": 20250.9, "total_tokens": 34804992}
|
|
{"current_steps": 11060, "total_steps": 78105, "loss": 0.3757, "lr": 4.9737068542535525e-06, "epoch": 0.7080212534408808, "percentage": 14.16, "elapsed_time": "0:28:39", "remaining_time": "2:53:42", "throughput": 20252.0, "total_tokens": 34820288}
|
|
{"current_steps": 11065, "total_steps": 78105, "loss": 0.4366, "lr": 4.9736259828866814e-06, "epoch": 0.7083413353818577, "percentage": 14.17, "elapsed_time": "0:28:40", "remaining_time": "2:53:41", "throughput": 20253.19, "total_tokens": 34836096}
|
|
{"current_steps": 11070, "total_steps": 78105, "loss": 0.6283, "lr": 4.9735449879998145e-06, "epoch": 0.7086614173228346, "percentage": 14.17, "elapsed_time": "0:28:40", "remaining_time": "2:53:39", "throughput": 20254.32, "total_tokens": 34851584}
|
|
{"current_steps": 11075, "total_steps": 78105, "loss": 0.4686, "lr": 4.973463869596994e-06, "epoch": 0.7089814992638115, "percentage": 14.18, "elapsed_time": "0:28:41", "remaining_time": "2:53:38", "throughput": 20255.66, "total_tokens": 34868032}
|
|
{"current_steps": 11080, "total_steps": 78105, "loss": 0.5039, "lr": 4.973382627682273e-06, "epoch": 0.7093015812047885, "percentage": 14.19, "elapsed_time": "0:28:42", "remaining_time": "2:53:37", "throughput": 20256.87, "total_tokens": 34884032}
|
|
{"current_steps": 11085, "total_steps": 78105, "loss": 0.4791, "lr": 4.973301262259705e-06, "epoch": 0.7096216631457654, "percentage": 14.19, "elapsed_time": "0:28:42", "remaining_time": "2:53:35", "throughput": 20257.92, "total_tokens": 34899456}
|
|
{"current_steps": 11090, "total_steps": 78105, "loss": 0.4094, "lr": 4.973219773333356e-06, "epoch": 0.7099417450867422, "percentage": 14.2, "elapsed_time": "0:28:43", "remaining_time": "2:53:34", "throughput": 20259.18, "total_tokens": 34915456}
|
|
{"current_steps": 11095, "total_steps": 78105, "loss": 0.4623, "lr": 4.9731381609072945e-06, "epoch": 0.7102618270277191, "percentage": 14.21, "elapsed_time": "0:28:44", "remaining_time": "2:53:33", "throughput": 20260.36, "total_tokens": 34931328}
|
|
{"current_steps": 11100, "total_steps": 78105, "loss": 0.5094, "lr": 4.973056424985595e-06, "epoch": 0.710581908968696, "percentage": 14.21, "elapsed_time": "0:28:44", "remaining_time": "2:53:31", "throughput": 20261.4, "total_tokens": 34946816}
|
|
{"current_steps": 11105, "total_steps": 78105, "loss": 0.4377, "lr": 4.97297456557234e-06, "epoch": 0.7109019909096729, "percentage": 14.22, "elapsed_time": "0:28:45", "remaining_time": "2:53:30", "throughput": 20263.01, "total_tokens": 34964544}
|
|
{"current_steps": 11110, "total_steps": 78105, "loss": 0.4891, "lr": 4.972892582671615e-06, "epoch": 0.7112220728506498, "percentage": 14.22, "elapsed_time": "0:28:46", "remaining_time": "2:53:29", "throughput": 20264.6, "total_tokens": 34982208}
|
|
{"current_steps": 11115, "total_steps": 78105, "loss": 0.6064, "lr": 4.972810476287516e-06, "epoch": 0.7115421547916266, "percentage": 14.23, "elapsed_time": "0:28:47", "remaining_time": "2:53:28", "throughput": 20266.36, "total_tokens": 35000576}
|
|
{"current_steps": 11120, "total_steps": 78105, "loss": 0.3861, "lr": 4.9727282464241425e-06, "epoch": 0.7118622367326035, "percentage": 14.24, "elapsed_time": "0:28:47", "remaining_time": "2:53:27", "throughput": 20267.21, "total_tokens": 35014912}
|
|
{"current_steps": 11125, "total_steps": 78105, "loss": 0.4301, "lr": 4.9726458930855994e-06, "epoch": 0.7121823186735804, "percentage": 14.24, "elapsed_time": "0:28:48", "remaining_time": "2:53:25", "throughput": 20268.14, "total_tokens": 35029632}
|
|
{"current_steps": 11130, "total_steps": 78105, "loss": 0.487, "lr": 4.9725634162760005e-06, "epoch": 0.7125024006145573, "percentage": 14.25, "elapsed_time": "0:28:48", "remaining_time": "2:53:24", "throughput": 20269.21, "total_tokens": 35044992}
|
|
{"current_steps": 11135, "total_steps": 78105, "loss": 0.6388, "lr": 4.972480815999464e-06, "epoch": 0.7128224825555343, "percentage": 14.26, "elapsed_time": "0:28:49", "remaining_time": "2:53:22", "throughput": 20270.28, "total_tokens": 35060544}
|
|
{"current_steps": 11140, "total_steps": 78105, "loss": 0.5645, "lr": 4.972398092260115e-06, "epoch": 0.7131425644965111, "percentage": 14.26, "elapsed_time": "0:28:50", "remaining_time": "2:53:21", "throughput": 20271.26, "total_tokens": 35075648}
|
|
{"current_steps": 11145, "total_steps": 78105, "loss": 0.4202, "lr": 4.9723152450620815e-06, "epoch": 0.713462646437488, "percentage": 14.27, "elapsed_time": "0:28:50", "remaining_time": "2:53:19", "throughput": 20272.3, "total_tokens": 35090880}
|
|
{"current_steps": 11150, "total_steps": 78105, "loss": 0.6373, "lr": 4.9722322744095036e-06, "epoch": 0.7137827283784649, "percentage": 14.28, "elapsed_time": "0:28:51", "remaining_time": "2:53:18", "throughput": 20273.4, "total_tokens": 35106432}
|
|
{"current_steps": 11155, "total_steps": 78105, "loss": 0.5008, "lr": 4.972149180306524e-06, "epoch": 0.7141028103194418, "percentage": 14.28, "elapsed_time": "0:28:52", "remaining_time": "2:53:16", "throughput": 20274.48, "total_tokens": 35121664}
|
|
{"current_steps": 11160, "total_steps": 78105, "loss": 0.6063, "lr": 4.97206596275729e-06, "epoch": 0.7144228922604187, "percentage": 14.29, "elapsed_time": "0:28:52", "remaining_time": "2:53:15", "throughput": 20275.54, "total_tokens": 35137152}
|
|
{"current_steps": 11165, "total_steps": 78105, "loss": 0.5116, "lr": 4.971982621765959e-06, "epoch": 0.7147429742013955, "percentage": 14.29, "elapsed_time": "0:28:53", "remaining_time": "2:53:14", "throughput": 20276.73, "total_tokens": 35153088}
|
|
{"current_steps": 11170, "total_steps": 78105, "loss": 0.4851, "lr": 4.971899157336691e-06, "epoch": 0.7150630561423724, "percentage": 14.3, "elapsed_time": "0:28:54", "remaining_time": "2:53:12", "throughput": 20277.72, "total_tokens": 35168192}
|
|
{"current_steps": 11175, "total_steps": 78105, "loss": 0.5449, "lr": 4.971815569473656e-06, "epoch": 0.7153831380833493, "percentage": 14.31, "elapsed_time": "0:28:55", "remaining_time": "2:53:11", "throughput": 20278.91, "total_tokens": 35184192}
|
|
{"current_steps": 11180, "total_steps": 78105, "loss": 0.5011, "lr": 4.971731858181026e-06, "epoch": 0.7157032200243262, "percentage": 14.31, "elapsed_time": "0:28:55", "remaining_time": "2:53:10", "throughput": 20280.14, "total_tokens": 35200256}
|
|
{"current_steps": 11185, "total_steps": 78105, "loss": 0.4837, "lr": 4.9716480234629825e-06, "epoch": 0.7160233019653032, "percentage": 14.32, "elapsed_time": "0:28:56", "remaining_time": "2:53:08", "throughput": 20281.04, "total_tokens": 35214720}
|
|
{"current_steps": 11190, "total_steps": 78105, "loss": 0.4837, "lr": 4.97156406532371e-06, "epoch": 0.71634338390628, "percentage": 14.33, "elapsed_time": "0:28:57", "remaining_time": "2:53:07", "throughput": 20282.18, "total_tokens": 35230720}
|
|
{"current_steps": 11195, "total_steps": 78105, "loss": 0.4826, "lr": 4.971479983767403e-06, "epoch": 0.7166634658472569, "percentage": 14.33, "elapsed_time": "0:28:57", "remaining_time": "2:53:05", "throughput": 20283.3, "total_tokens": 35246400}
|
|
{"current_steps": 11200, "total_steps": 78105, "loss": 0.4933, "lr": 4.971395778798258e-06, "epoch": 0.7169835477882338, "percentage": 14.34, "elapsed_time": "0:28:58", "remaining_time": "2:53:04", "throughput": 20284.58, "total_tokens": 35262784}
|
|
{"current_steps": 11205, "total_steps": 78105, "loss": 0.4372, "lr": 4.9713114504204815e-06, "epoch": 0.7173036297292107, "percentage": 14.35, "elapsed_time": "0:28:59", "remaining_time": "2:53:03", "throughput": 20285.58, "total_tokens": 35277632}
|
|
{"current_steps": 11210, "total_steps": 78105, "loss": 0.4651, "lr": 4.971226998638284e-06, "epoch": 0.7176237116701876, "percentage": 14.35, "elapsed_time": "0:28:59", "remaining_time": "2:53:01", "throughput": 20286.55, "total_tokens": 35292544}
|
|
{"current_steps": 11215, "total_steps": 78105, "loss": 0.4884, "lr": 4.971142423455882e-06, "epoch": 0.7179437936111644, "percentage": 14.36, "elapsed_time": "0:29:00", "remaining_time": "2:53:00", "throughput": 20287.77, "total_tokens": 35308352}
|
|
{"current_steps": 11220, "total_steps": 78105, "loss": 0.5077, "lr": 4.971057724877499e-06, "epoch": 0.7182638755521413, "percentage": 14.37, "elapsed_time": "0:29:01", "remaining_time": "2:52:58", "throughput": 20288.93, "total_tokens": 35323904}
|
|
{"current_steps": 11225, "total_steps": 78105, "loss": 0.5591, "lr": 4.970972902907365e-06, "epoch": 0.7185839574931182, "percentage": 14.37, "elapsed_time": "0:29:01", "remaining_time": "2:52:57", "throughput": 20290.18, "total_tokens": 35340224}
|
|
{"current_steps": 11230, "total_steps": 78105, "loss": 0.5306, "lr": 4.970887957549715e-06, "epoch": 0.7189040394340951, "percentage": 14.38, "elapsed_time": "0:29:02", "remaining_time": "2:52:56", "throughput": 20291.25, "total_tokens": 35355776}
|
|
{"current_steps": 11235, "total_steps": 78105, "loss": 0.5181, "lr": 4.97080288880879e-06, "epoch": 0.719224121375072, "percentage": 14.38, "elapsed_time": "0:29:03", "remaining_time": "2:52:54", "throughput": 20292.28, "total_tokens": 35371072}
|
|
{"current_steps": 11240, "total_steps": 78105, "loss": 0.4511, "lr": 4.970717696688839e-06, "epoch": 0.719544203316049, "percentage": 14.39, "elapsed_time": "0:29:03", "remaining_time": "2:52:53", "throughput": 20293.53, "total_tokens": 35387520}
|
|
{"current_steps": 11245, "total_steps": 78105, "loss": 0.56, "lr": 4.970632381194116e-06, "epoch": 0.7198642852570258, "percentage": 14.4, "elapsed_time": "0:29:04", "remaining_time": "2:52:52", "throughput": 20294.81, "total_tokens": 35403840}
|
|
{"current_steps": 11250, "total_steps": 78105, "loss": 0.4955, "lr": 4.970546942328881e-06, "epoch": 0.7201843671980027, "percentage": 14.4, "elapsed_time": "0:29:05", "remaining_time": "2:52:50", "throughput": 20296.01, "total_tokens": 35420032}
|
|
{"current_steps": 11255, "total_steps": 78105, "loss": 0.412, "lr": 4.9704613800974e-06, "epoch": 0.7205044491389796, "percentage": 14.41, "elapsed_time": "0:29:05", "remaining_time": "2:52:49", "throughput": 20297.22, "total_tokens": 35436032}
|
|
{"current_steps": 11260, "total_steps": 78105, "loss": 0.4155, "lr": 4.9703756945039465e-06, "epoch": 0.7208245310799565, "percentage": 14.42, "elapsed_time": "0:29:06", "remaining_time": "2:52:48", "throughput": 20298.15, "total_tokens": 35451136}
|
|
{"current_steps": 11265, "total_steps": 78105, "loss": 0.5428, "lr": 4.970289885552797e-06, "epoch": 0.7211446130209334, "percentage": 14.42, "elapsed_time": "0:29:07", "remaining_time": "2:52:46", "throughput": 20299.25, "total_tokens": 35466816}
|
|
{"current_steps": 11270, "total_steps": 78105, "loss": 0.4465, "lr": 4.97020395324824e-06, "epoch": 0.7214646949619102, "percentage": 14.43, "elapsed_time": "0:29:07", "remaining_time": "2:52:45", "throughput": 20300.64, "total_tokens": 35483456}
|
|
{"current_steps": 11275, "total_steps": 78105, "loss": 0.6662, "lr": 4.970117897594562e-06, "epoch": 0.7217847769028871, "percentage": 14.44, "elapsed_time": "0:29:08", "remaining_time": "2:52:44", "throughput": 20301.92, "total_tokens": 35499584}
|
|
{"current_steps": 11280, "total_steps": 78105, "loss": 0.577, "lr": 4.970031718596065e-06, "epoch": 0.722104858843864, "percentage": 14.44, "elapsed_time": "0:29:09", "remaining_time": "2:52:42", "throughput": 20303.03, "total_tokens": 35515328}
|
|
{"current_steps": 11285, "total_steps": 78105, "loss": 0.5868, "lr": 4.969945416257048e-06, "epoch": 0.7224249407848409, "percentage": 14.45, "elapsed_time": "0:29:09", "remaining_time": "2:52:41", "throughput": 20303.88, "total_tokens": 35530048}
|
|
{"current_steps": 11290, "total_steps": 78105, "loss": 0.4074, "lr": 4.969858990581823e-06, "epoch": 0.7227450227258178, "percentage": 14.45, "elapsed_time": "0:29:10", "remaining_time": "2:52:40", "throughput": 20305.07, "total_tokens": 35545792}
|
|
{"current_steps": 11295, "total_steps": 78105, "loss": 0.4847, "lr": 4.969772441574705e-06, "epoch": 0.7230651046667947, "percentage": 14.46, "elapsed_time": "0:29:11", "remaining_time": "2:52:38", "throughput": 20306.22, "total_tokens": 35561344}
|
|
{"current_steps": 11300, "total_steps": 78105, "loss": 0.4858, "lr": 4.9696857692400165e-06, "epoch": 0.7233851866077716, "percentage": 14.47, "elapsed_time": "0:29:11", "remaining_time": "2:52:37", "throughput": 20307.37, "total_tokens": 35577024}
|
|
{"current_steps": 11305, "total_steps": 78105, "loss": 0.5038, "lr": 4.969598973582084e-06, "epoch": 0.7237052685487485, "percentage": 14.47, "elapsed_time": "0:29:12", "remaining_time": "2:52:35", "throughput": 20308.36, "total_tokens": 35592192}
|
|
{"current_steps": 11310, "total_steps": 78105, "loss": 0.4985, "lr": 4.969512054605243e-06, "epoch": 0.7240253504897254, "percentage": 14.48, "elapsed_time": "0:29:13", "remaining_time": "2:52:34", "throughput": 20309.54, "total_tokens": 35607872}
|
|
{"current_steps": 11315, "total_steps": 78105, "loss": 0.469, "lr": 4.969425012313833e-06, "epoch": 0.7243454324307023, "percentage": 14.49, "elapsed_time": "0:29:13", "remaining_time": "2:52:33", "throughput": 20310.77, "total_tokens": 35623872}
|
|
{"current_steps": 11320, "total_steps": 78105, "loss": 0.4882, "lr": 4.969337846712201e-06, "epoch": 0.7246655143716791, "percentage": 14.49, "elapsed_time": "0:29:14", "remaining_time": "2:52:31", "throughput": 20311.61, "total_tokens": 35638720}
|
|
{"current_steps": 11325, "total_steps": 78105, "loss": 0.6997, "lr": 4.9692505578046975e-06, "epoch": 0.724985596312656, "percentage": 14.5, "elapsed_time": "0:29:15", "remaining_time": "2:52:30", "throughput": 20313.07, "total_tokens": 35655680}
|
|
{"current_steps": 11330, "total_steps": 78105, "loss": 0.4621, "lr": 4.9691631455956855e-06, "epoch": 0.7253056782536329, "percentage": 14.51, "elapsed_time": "0:29:15", "remaining_time": "2:52:29", "throughput": 20314.14, "total_tokens": 35671168}
|
|
{"current_steps": 11335, "total_steps": 78105, "loss": 0.4241, "lr": 4.969075610089526e-06, "epoch": 0.7256257601946098, "percentage": 14.51, "elapsed_time": "0:29:16", "remaining_time": "2:52:27", "throughput": 20315.29, "total_tokens": 35686848}
|
|
{"current_steps": 11340, "total_steps": 78105, "loss": 0.401, "lr": 4.968987951290592e-06, "epoch": 0.7259458421355867, "percentage": 14.52, "elapsed_time": "0:29:17", "remaining_time": "2:52:26", "throughput": 20316.47, "total_tokens": 35702720}
|
|
{"current_steps": 11345, "total_steps": 78105, "loss": 0.6738, "lr": 4.968900169203259e-06, "epoch": 0.7262659240765637, "percentage": 14.53, "elapsed_time": "0:29:18", "remaining_time": "2:52:25", "throughput": 20318.14, "total_tokens": 35720640}
|
|
{"current_steps": 11350, "total_steps": 78105, "loss": 0.4632, "lr": 4.968812263831913e-06, "epoch": 0.7265860060175405, "percentage": 14.53, "elapsed_time": "0:29:18", "remaining_time": "2:52:23", "throughput": 20319.02, "total_tokens": 35735424}
|
|
{"current_steps": 11355, "total_steps": 78105, "loss": 0.5915, "lr": 4.968724235180942e-06, "epoch": 0.7269060879585174, "percentage": 14.54, "elapsed_time": "0:29:19", "remaining_time": "2:52:22", "throughput": 20320.16, "total_tokens": 35751168}
|
|
{"current_steps": 11360, "total_steps": 78105, "loss": 0.5772, "lr": 4.968636083254741e-06, "epoch": 0.7272261698994943, "percentage": 14.54, "elapsed_time": "0:29:20", "remaining_time": "2:52:21", "throughput": 20321.23, "total_tokens": 35767168}
|
|
{"current_steps": 11365, "total_steps": 78105, "loss": 0.6086, "lr": 4.968547808057713e-06, "epoch": 0.7275462518404712, "percentage": 14.55, "elapsed_time": "0:29:20", "remaining_time": "2:52:19", "throughput": 20322.25, "total_tokens": 35782464}
|
|
{"current_steps": 11370, "total_steps": 78105, "loss": 0.5673, "lr": 4.968459409594266e-06, "epoch": 0.727866333781448, "percentage": 14.56, "elapsed_time": "0:29:21", "remaining_time": "2:52:18", "throughput": 20323.36, "total_tokens": 35798592}
|
|
{"current_steps": 11375, "total_steps": 78105, "loss": 0.5758, "lr": 4.968370887868813e-06, "epoch": 0.7281864157224249, "percentage": 14.56, "elapsed_time": "0:29:22", "remaining_time": "2:52:17", "throughput": 20324.58, "total_tokens": 35814720}
|
|
{"current_steps": 11380, "total_steps": 78105, "loss": 0.3895, "lr": 4.968282242885776e-06, "epoch": 0.7285064976634018, "percentage": 14.57, "elapsed_time": "0:29:22", "remaining_time": "2:52:15", "throughput": 20325.58, "total_tokens": 35830016}
|
|
{"current_steps": 11385, "total_steps": 78105, "loss": 0.45, "lr": 4.968193474649581e-06, "epoch": 0.7288265796043787, "percentage": 14.58, "elapsed_time": "0:29:23", "remaining_time": "2:52:14", "throughput": 20326.62, "total_tokens": 35845632}
|
|
{"current_steps": 11390, "total_steps": 78105, "loss": 0.5233, "lr": 4.968104583164659e-06, "epoch": 0.7291466615453556, "percentage": 14.58, "elapsed_time": "0:29:24", "remaining_time": "2:52:13", "throughput": 20327.71, "total_tokens": 35861184}
|
|
{"current_steps": 11395, "total_steps": 78105, "loss": 0.687, "lr": 4.968015568435451e-06, "epoch": 0.7294667434863324, "percentage": 14.59, "elapsed_time": "0:29:24", "remaining_time": "2:52:11", "throughput": 20328.94, "total_tokens": 35876864}
|
|
{"current_steps": 11400, "total_steps": 78105, "loss": 0.4534, "lr": 4.967926430466401e-06, "epoch": 0.7297868254273094, "percentage": 14.6, "elapsed_time": "0:29:25", "remaining_time": "2:52:10", "throughput": 20330.32, "total_tokens": 35893568}
|
|
{"current_steps": 11405, "total_steps": 78105, "loss": 0.4689, "lr": 4.967837169261959e-06, "epoch": 0.7301069073682863, "percentage": 14.6, "elapsed_time": "0:29:26", "remaining_time": "2:52:09", "throughput": 20331.47, "total_tokens": 35909696}
|
|
{"current_steps": 11410, "total_steps": 78105, "loss": 0.4996, "lr": 4.9677477848265835e-06, "epoch": 0.7304269893092632, "percentage": 14.61, "elapsed_time": "0:29:26", "remaining_time": "2:52:07", "throughput": 20332.51, "total_tokens": 35925120}
|
|
{"current_steps": 11415, "total_steps": 78105, "loss": 0.597, "lr": 4.967658277164738e-06, "epoch": 0.7307470712502401, "percentage": 14.61, "elapsed_time": "0:29:27", "remaining_time": "2:52:06", "throughput": 20333.66, "total_tokens": 35940800}
|
|
{"current_steps": 11420, "total_steps": 78105, "loss": 0.455, "lr": 4.967568646280891e-06, "epoch": 0.731067153191217, "percentage": 14.62, "elapsed_time": "0:29:28", "remaining_time": "2:52:05", "throughput": 20334.69, "total_tokens": 35955968}
|
|
{"current_steps": 11425, "total_steps": 78105, "loss": 0.4821, "lr": 4.9674788921795205e-06, "epoch": 0.7313872351321938, "percentage": 14.63, "elapsed_time": "0:29:28", "remaining_time": "2:52:03", "throughput": 20335.87, "total_tokens": 35972096}
|
|
{"current_steps": 11430, "total_steps": 78105, "loss": 0.4694, "lr": 4.9673890148651055e-06, "epoch": 0.7317073170731707, "percentage": 14.63, "elapsed_time": "0:29:29", "remaining_time": "2:52:02", "throughput": 20336.77, "total_tokens": 35986624}
|
|
{"current_steps": 11435, "total_steps": 78105, "loss": 0.4582, "lr": 4.967299014342136e-06, "epoch": 0.7320273990141476, "percentage": 14.64, "elapsed_time": "0:29:30", "remaining_time": "2:52:01", "throughput": 20338.03, "total_tokens": 36003008}
|
|
{"current_steps": 11440, "total_steps": 78105, "loss": 0.6141, "lr": 4.967208890615105e-06, "epoch": 0.7323474809551245, "percentage": 14.65, "elapsed_time": "0:29:30", "remaining_time": "2:51:59", "throughput": 20338.74, "total_tokens": 36017152}
|
|
{"current_steps": 11445, "total_steps": 78105, "loss": 0.36, "lr": 4.967118643688513e-06, "epoch": 0.7326675628961014, "percentage": 14.65, "elapsed_time": "0:29:31", "remaining_time": "2:51:57", "throughput": 20339.63, "total_tokens": 36031936}
|
|
{"current_steps": 11450, "total_steps": 78105, "loss": 0.5462, "lr": 4.967028273566866e-06, "epoch": 0.7329876448370783, "percentage": 14.66, "elapsed_time": "0:29:32", "remaining_time": "2:51:56", "throughput": 20340.83, "total_tokens": 36048064}
|
|
{"current_steps": 11455, "total_steps": 78105, "loss": 0.8065, "lr": 4.966937780254679e-06, "epoch": 0.7333077267780552, "percentage": 14.67, "elapsed_time": "0:29:32", "remaining_time": "2:51:55", "throughput": 20342.05, "total_tokens": 36064256}
|
|
{"current_steps": 11460, "total_steps": 78105, "loss": 0.8037, "lr": 4.966847163756468e-06, "epoch": 0.7336278087190321, "percentage": 14.67, "elapsed_time": "0:29:33", "remaining_time": "2:51:54", "throughput": 20342.99, "total_tokens": 36079424}
|
|
{"current_steps": 11465, "total_steps": 78105, "loss": 0.4751, "lr": 4.96675642407676e-06, "epoch": 0.733947890660009, "percentage": 14.68, "elapsed_time": "0:29:34", "remaining_time": "2:51:52", "throughput": 20344.17, "total_tokens": 36095424}
|
|
{"current_steps": 11470, "total_steps": 78105, "loss": 0.4355, "lr": 4.966665561220083e-06, "epoch": 0.7342679726009859, "percentage": 14.69, "elapsed_time": "0:29:34", "remaining_time": "2:51:51", "throughput": 20345.33, "total_tokens": 36111360}
|
|
{"current_steps": 11475, "total_steps": 78105, "loss": 0.4282, "lr": 4.966574575190978e-06, "epoch": 0.7345880545419627, "percentage": 14.69, "elapsed_time": "0:29:35", "remaining_time": "2:51:50", "throughput": 20346.43, "total_tokens": 36127232}
|
|
{"current_steps": 11480, "total_steps": 78105, "loss": 0.4187, "lr": 4.966483465993987e-06, "epoch": 0.7349081364829396, "percentage": 14.7, "elapsed_time": "0:29:36", "remaining_time": "2:51:48", "throughput": 20347.52, "total_tokens": 36143168}
|
|
{"current_steps": 11485, "total_steps": 78105, "loss": 0.3868, "lr": 4.966392233633658e-06, "epoch": 0.7352282184239165, "percentage": 14.7, "elapsed_time": "0:29:36", "remaining_time": "2:51:47", "throughput": 20348.56, "total_tokens": 36158272}
|
|
{"current_steps": 11490, "total_steps": 78105, "loss": 0.4495, "lr": 4.966300878114548e-06, "epoch": 0.7355483003648934, "percentage": 14.71, "elapsed_time": "0:29:37", "remaining_time": "2:51:45", "throughput": 20349.46, "total_tokens": 36173056}
|
|
{"current_steps": 11495, "total_steps": 78105, "loss": 0.4979, "lr": 4.9662093994412185e-06, "epoch": 0.7358683823058703, "percentage": 14.72, "elapsed_time": "0:29:38", "remaining_time": "2:51:44", "throughput": 20350.37, "total_tokens": 36187904}
|
|
{"current_steps": 11500, "total_steps": 78105, "loss": 0.5348, "lr": 4.966117797618238e-06, "epoch": 0.7361884642468471, "percentage": 14.72, "elapsed_time": "0:29:38", "remaining_time": "2:51:43", "throughput": 20351.91, "total_tokens": 36205632}
|
|
{"current_steps": 11505, "total_steps": 78105, "loss": 0.4951, "lr": 4.96602607265018e-06, "epoch": 0.7365085461878241, "percentage": 14.73, "elapsed_time": "0:29:39", "remaining_time": "2:51:41", "throughput": 20352.78, "total_tokens": 36220160}
|
|
{"current_steps": 11510, "total_steps": 78105, "loss": 0.4787, "lr": 4.965934224541626e-06, "epoch": 0.736828628128801, "percentage": 14.74, "elapsed_time": "0:29:40", "remaining_time": "2:51:40", "throughput": 20354.06, "total_tokens": 36236416}
|
|
{"current_steps": 11515, "total_steps": 78105, "loss": 0.5312, "lr": 4.965842253297159e-06, "epoch": 0.7371487100697779, "percentage": 14.74, "elapsed_time": "0:29:40", "remaining_time": "2:51:39", "throughput": 20355.01, "total_tokens": 36251584}
|
|
{"current_steps": 11520, "total_steps": 78105, "loss": 0.4703, "lr": 4.965750158921376e-06, "epoch": 0.7374687920107548, "percentage": 14.75, "elapsed_time": "0:29:41", "remaining_time": "2:51:37", "throughput": 20355.75, "total_tokens": 36265856}
|
|
{"current_steps": 11525, "total_steps": 78105, "loss": 0.5367, "lr": 4.965657941418873e-06, "epoch": 0.7377888739517316, "percentage": 14.76, "elapsed_time": "0:29:42", "remaining_time": "2:51:36", "throughput": 20357.39, "total_tokens": 36283904}
|
|
{"current_steps": 11530, "total_steps": 78105, "loss": 0.5535, "lr": 4.965565600794256e-06, "epoch": 0.7381089558927085, "percentage": 14.76, "elapsed_time": "0:29:42", "remaining_time": "2:51:35", "throughput": 20358.18, "total_tokens": 36298432}
|
|
{"current_steps": 11535, "total_steps": 78105, "loss": 0.3427, "lr": 4.965473137052135e-06, "epoch": 0.7384290378336854, "percentage": 14.77, "elapsed_time": "0:29:43", "remaining_time": "2:51:33", "throughput": 20359.23, "total_tokens": 36313728}
|
|
{"current_steps": 11540, "total_steps": 78105, "loss": 0.4738, "lr": 4.965380550197129e-06, "epoch": 0.7387491197746623, "percentage": 14.77, "elapsed_time": "0:29:44", "remaining_time": "2:51:32", "throughput": 20360.22, "total_tokens": 36329088}
|
|
{"current_steps": 11545, "total_steps": 78105, "loss": 0.52, "lr": 4.965287840233859e-06, "epoch": 0.7390692017156392, "percentage": 14.78, "elapsed_time": "0:29:44", "remaining_time": "2:51:30", "throughput": 20361.39, "total_tokens": 36345024}
|
|
{"current_steps": 11550, "total_steps": 78105, "loss": 0.4575, "lr": 4.965195007166955e-06, "epoch": 0.739389283656616, "percentage": 14.79, "elapsed_time": "0:29:45", "remaining_time": "2:51:29", "throughput": 20362.81, "total_tokens": 36361792}
|
|
{"current_steps": 11555, "total_steps": 78105, "loss": 0.4301, "lr": 4.965102051001054e-06, "epoch": 0.739709365597593, "percentage": 14.79, "elapsed_time": "0:29:46", "remaining_time": "2:51:28", "throughput": 20363.84, "total_tokens": 36377152}
|
|
{"current_steps": 11560, "total_steps": 78105, "loss": 0.509, "lr": 4.965008971740796e-06, "epoch": 0.7400294475385699, "percentage": 14.8, "elapsed_time": "0:29:47", "remaining_time": "2:51:27", "throughput": 20365.14, "total_tokens": 36394048}
|
|
{"current_steps": 11565, "total_steps": 78105, "loss": 0.3984, "lr": 4.96491576939083e-06, "epoch": 0.7403495294795468, "percentage": 14.81, "elapsed_time": "0:29:47", "remaining_time": "2:51:25", "throughput": 20366.03, "total_tokens": 36409152}
|
|
{"current_steps": 11570, "total_steps": 78105, "loss": 0.5386, "lr": 4.9648224439558094e-06, "epoch": 0.7406696114205237, "percentage": 14.81, "elapsed_time": "0:29:48", "remaining_time": "2:51:24", "throughput": 20367.57, "total_tokens": 36427072}
|
|
{"current_steps": 11575, "total_steps": 78105, "loss": 0.5373, "lr": 4.964728995440396e-06, "epoch": 0.7409896933615006, "percentage": 14.82, "elapsed_time": "0:29:49", "remaining_time": "2:51:23", "throughput": 20368.87, "total_tokens": 36444288}
|
|
{"current_steps": 11580, "total_steps": 78105, "loss": 0.6004, "lr": 4.964635423849253e-06, "epoch": 0.7413097753024774, "percentage": 14.83, "elapsed_time": "0:29:49", "remaining_time": "2:51:22", "throughput": 20369.98, "total_tokens": 36460096}
|
|
{"current_steps": 11585, "total_steps": 78105, "loss": 0.4622, "lr": 4.964541729187056e-06, "epoch": 0.7416298572434543, "percentage": 14.83, "elapsed_time": "0:29:50", "remaining_time": "2:51:21", "throughput": 20370.96, "total_tokens": 36475264}
|
|
{"current_steps": 11590, "total_steps": 78105, "loss": 0.5301, "lr": 4.964447911458483e-06, "epoch": 0.7419499391844312, "percentage": 14.84, "elapsed_time": "0:29:51", "remaining_time": "2:51:19", "throughput": 20372.12, "total_tokens": 36491264}
|
|
{"current_steps": 11595, "total_steps": 78105, "loss": 0.4956, "lr": 4.964353970668217e-06, "epoch": 0.7422700211254081, "percentage": 14.85, "elapsed_time": "0:29:51", "remaining_time": "2:51:18", "throughput": 20373.03, "total_tokens": 36506368}
|
|
{"current_steps": 11600, "total_steps": 78105, "loss": 0.488, "lr": 4.96425990682095e-06, "epoch": 0.742590103066385, "percentage": 14.85, "elapsed_time": "0:29:52", "remaining_time": "2:51:17", "throughput": 20374.3, "total_tokens": 36522432}
|
|
{"current_steps": 11605, "total_steps": 78105, "loss": 0.5611, "lr": 4.96416571992138e-06, "epoch": 0.7429101850073618, "percentage": 14.86, "elapsed_time": "0:29:53", "remaining_time": "2:51:15", "throughput": 20375.61, "total_tokens": 36539008}
|
|
{"current_steps": 11610, "total_steps": 78105, "loss": 0.4755, "lr": 4.9640714099742086e-06, "epoch": 0.7432302669483388, "percentage": 14.86, "elapsed_time": "0:29:53", "remaining_time": "2:51:14", "throughput": 20376.6, "total_tokens": 36554240}
|
|
{"current_steps": 11615, "total_steps": 78105, "loss": 0.3484, "lr": 4.963976976984146e-06, "epoch": 0.7435503488893157, "percentage": 14.87, "elapsed_time": "0:29:54", "remaining_time": "2:51:13", "throughput": 20377.62, "total_tokens": 36569920}
|
|
{"current_steps": 11620, "total_steps": 78105, "loss": 0.6534, "lr": 4.963882420955907e-06, "epoch": 0.7438704308302926, "percentage": 14.88, "elapsed_time": "0:29:55", "remaining_time": "2:51:11", "throughput": 20378.7, "total_tokens": 36585792}
|
|
{"current_steps": 11625, "total_steps": 78105, "loss": 0.4732, "lr": 4.9637877418942145e-06, "epoch": 0.7441905127712695, "percentage": 14.88, "elapsed_time": "0:29:55", "remaining_time": "2:51:10", "throughput": 20379.72, "total_tokens": 36601280}
|
|
{"current_steps": 11630, "total_steps": 78105, "loss": 0.7017, "lr": 4.963692939803796e-06, "epoch": 0.7445105947122463, "percentage": 14.89, "elapsed_time": "0:29:56", "remaining_time": "2:51:09", "throughput": 20380.8, "total_tokens": 36617152}
|
|
{"current_steps": 11635, "total_steps": 78105, "loss": 0.553, "lr": 4.963598014689385e-06, "epoch": 0.7448306766532232, "percentage": 14.9, "elapsed_time": "0:29:57", "remaining_time": "2:51:07", "throughput": 20381.73, "total_tokens": 36632512}
|
|
{"current_steps": 11640, "total_steps": 78105, "loss": 0.4394, "lr": 4.963502966555721e-06, "epoch": 0.7451507585942001, "percentage": 14.9, "elapsed_time": "0:29:57", "remaining_time": "2:51:06", "throughput": 20382.74, "total_tokens": 36647744}
|
|
{"current_steps": 11645, "total_steps": 78105, "loss": 0.5441, "lr": 4.96340779540755e-06, "epoch": 0.745470840535177, "percentage": 14.91, "elapsed_time": "0:29:58", "remaining_time": "2:51:05", "throughput": 20383.72, "total_tokens": 36663360}
|
|
{"current_steps": 11650, "total_steps": 78105, "loss": 0.5831, "lr": 4.9633125012496264e-06, "epoch": 0.7457909224761539, "percentage": 14.92, "elapsed_time": "0:29:59", "remaining_time": "2:51:03", "throughput": 20384.67, "total_tokens": 36678656}
|
|
{"current_steps": 11655, "total_steps": 78105, "loss": 0.6294, "lr": 4.963217084086707e-06, "epoch": 0.7461110044171307, "percentage": 14.92, "elapsed_time": "0:29:59", "remaining_time": "2:51:02", "throughput": 20385.71, "total_tokens": 36693952}
|
|
{"current_steps": 11660, "total_steps": 78105, "loss": 0.4533, "lr": 4.963121543923557e-06, "epoch": 0.7464310863581076, "percentage": 14.93, "elapsed_time": "0:30:00", "remaining_time": "2:51:01", "throughput": 20386.81, "total_tokens": 36709888}
|
|
{"current_steps": 11665, "total_steps": 78105, "loss": 0.4112, "lr": 4.963025880764947e-06, "epoch": 0.7467511682990846, "percentage": 14.94, "elapsed_time": "0:30:01", "remaining_time": "2:50:59", "throughput": 20387.83, "total_tokens": 36725504}
|
|
{"current_steps": 11670, "total_steps": 78105, "loss": 0.4568, "lr": 4.962930094615654e-06, "epoch": 0.7470712502400615, "percentage": 14.94, "elapsed_time": "0:30:02", "remaining_time": "2:50:58", "throughput": 20388.98, "total_tokens": 36741376}
|
|
{"current_steps": 11675, "total_steps": 78105, "loss": 0.5701, "lr": 4.962834185480461e-06, "epoch": 0.7473913321810384, "percentage": 14.95, "elapsed_time": "0:30:02", "remaining_time": "2:50:57", "throughput": 20390.0, "total_tokens": 36756864}
|
|
{"current_steps": 11680, "total_steps": 78105, "loss": 0.598, "lr": 4.962738153364156e-06, "epoch": 0.7477114141220152, "percentage": 14.95, "elapsed_time": "0:30:03", "remaining_time": "2:50:55", "throughput": 20391.07, "total_tokens": 36772608}
|
|
{"current_steps": 11685, "total_steps": 78105, "loss": 0.432, "lr": 4.9626419982715366e-06, "epoch": 0.7480314960629921, "percentage": 14.96, "elapsed_time": "0:30:04", "remaining_time": "2:50:54", "throughput": 20392.13, "total_tokens": 36788352}
|
|
{"current_steps": 11690, "total_steps": 78105, "loss": 0.5105, "lr": 4.962545720207404e-06, "epoch": 0.748351578003969, "percentage": 14.97, "elapsed_time": "0:30:04", "remaining_time": "2:50:53", "throughput": 20393.17, "total_tokens": 36804096}
|
|
{"current_steps": 11695, "total_steps": 78105, "loss": 0.4935, "lr": 4.962449319176564e-06, "epoch": 0.7486716599449459, "percentage": 14.97, "elapsed_time": "0:30:05", "remaining_time": "2:50:51", "throughput": 20394.25, "total_tokens": 36819776}
|
|
{"current_steps": 11700, "total_steps": 78105, "loss": 0.7523, "lr": 4.962352795183832e-06, "epoch": 0.7489917418859228, "percentage": 14.98, "elapsed_time": "0:30:06", "remaining_time": "2:50:50", "throughput": 20395.27, "total_tokens": 36835072}
|
|
{"current_steps": 11705, "total_steps": 78105, "loss": 0.4038, "lr": 4.962256148234027e-06, "epoch": 0.7493118238268996, "percentage": 14.99, "elapsed_time": "0:30:06", "remaining_time": "2:50:49", "throughput": 20396.32, "total_tokens": 36850816}
|
|
{"current_steps": 11710, "total_steps": 78105, "loss": 0.5105, "lr": 4.9621593783319754e-06, "epoch": 0.7496319057678765, "percentage": 14.99, "elapsed_time": "0:30:07", "remaining_time": "2:50:47", "throughput": 20397.31, "total_tokens": 36866368}
|
|
{"current_steps": 11715, "total_steps": 78105, "loss": 0.5124, "lr": 4.9620624854825094e-06, "epoch": 0.7499519877088535, "percentage": 15.0, "elapsed_time": "0:30:08", "remaining_time": "2:50:47", "throughput": 20399.02, "total_tokens": 36885376}
|
|
{"current_steps": 11718, "total_steps": 78105, "eval_loss": 0.49851447343826294, "epoch": 0.7501440368734396, "percentage": 15.0, "elapsed_time": "0:30:59", "remaining_time": "2:55:35", "throughput": 19840.19, "total_tokens": 36894016}
|
|
{"current_steps": 11720, "total_steps": 78105, "loss": 0.4566, "lr": 4.9619654696904675e-06, "epoch": 0.7502720696498304, "percentage": 15.01, "elapsed_time": "0:31:32", "remaining_time": "2:58:40", "throughput": 19496.53, "total_tokens": 36900288}
|
|
{"current_steps": 11725, "total_steps": 78105, "loss": 0.5913, "lr": 4.961868330960693e-06, "epoch": 0.7505921515908073, "percentage": 15.01, "elapsed_time": "0:31:33", "remaining_time": "2:58:38", "throughput": 19497.93, "total_tokens": 36916224}
|
|
{"current_steps": 11730, "total_steps": 78105, "loss": 0.4233, "lr": 4.961771069298038e-06, "epoch": 0.7509122335317842, "percentage": 15.02, "elapsed_time": "0:31:34", "remaining_time": "2:58:37", "throughput": 19499.21, "total_tokens": 36931648}
|
|
{"current_steps": 11735, "total_steps": 78105, "loss": 0.5752, "lr": 4.961673684707359e-06, "epoch": 0.751232315472761, "percentage": 15.02, "elapsed_time": "0:31:34", "remaining_time": "2:58:35", "throughput": 19500.2, "total_tokens": 36945856}
|
|
{"current_steps": 11740, "total_steps": 78105, "loss": 0.5424, "lr": 4.961576177193519e-06, "epoch": 0.7515523974137379, "percentage": 15.03, "elapsed_time": "0:31:35", "remaining_time": "2:58:34", "throughput": 19501.56, "total_tokens": 36961792}
|
|
{"current_steps": 11745, "total_steps": 78105, "loss": 0.4873, "lr": 4.961478546761386e-06, "epoch": 0.7518724793547148, "percentage": 15.04, "elapsed_time": "0:31:35", "remaining_time": "2:58:32", "throughput": 19502.81, "total_tokens": 36977152}
|
|
{"current_steps": 11750, "total_steps": 78105, "loss": 0.5303, "lr": 4.961380793415835e-06, "epoch": 0.7521925612956917, "percentage": 15.04, "elapsed_time": "0:31:36", "remaining_time": "2:58:30", "throughput": 19503.94, "total_tokens": 36991936}
|
|
{"current_steps": 11755, "total_steps": 78105, "loss": 0.605, "lr": 4.9612829171617494e-06, "epoch": 0.7525126432366686, "percentage": 15.05, "elapsed_time": "0:31:37", "remaining_time": "2:58:29", "throughput": 19505.08, "total_tokens": 37006784}
|
|
{"current_steps": 11760, "total_steps": 78105, "loss": 0.4821, "lr": 4.961184918004015e-06, "epoch": 0.7528327251776454, "percentage": 15.06, "elapsed_time": "0:31:37", "remaining_time": "2:58:27", "throughput": 19506.37, "total_tokens": 37022208}
|
|
{"current_steps": 11765, "total_steps": 78105, "loss": 0.4572, "lr": 4.961086795947525e-06, "epoch": 0.7531528071186223, "percentage": 15.06, "elapsed_time": "0:31:38", "remaining_time": "2:58:26", "throughput": 19508.06, "total_tokens": 37039488}
|
|
{"current_steps": 11770, "total_steps": 78105, "loss": 0.5268, "lr": 4.96098855099718e-06, "epoch": 0.7534728890595993, "percentage": 15.07, "elapsed_time": "0:31:39", "remaining_time": "2:58:24", "throughput": 19509.6, "total_tokens": 37056064}
|
|
{"current_steps": 11775, "total_steps": 78105, "loss": 0.3497, "lr": 4.960890183157886e-06, "epoch": 0.7537929710005762, "percentage": 15.08, "elapsed_time": "0:31:40", "remaining_time": "2:58:23", "throughput": 19511.04, "total_tokens": 37072256}
|
|
{"current_steps": 11780, "total_steps": 78105, "loss": 0.6945, "lr": 4.960791692434554e-06, "epoch": 0.7541130529415531, "percentage": 15.08, "elapsed_time": "0:31:40", "remaining_time": "2:58:21", "throughput": 19512.17, "total_tokens": 37087360}
|
|
{"current_steps": 11785, "total_steps": 78105, "loss": 0.4554, "lr": 4.960693078832103e-06, "epoch": 0.7544331348825299, "percentage": 15.09, "elapsed_time": "0:31:41", "remaining_time": "2:58:20", "throughput": 19513.26, "total_tokens": 37102144}
|
|
{"current_steps": 11790, "total_steps": 78105, "loss": 0.4506, "lr": 4.960594342355457e-06, "epoch": 0.7547532168235068, "percentage": 15.1, "elapsed_time": "0:31:42", "remaining_time": "2:58:18", "throughput": 19514.52, "total_tokens": 37117568}
|
|
{"current_steps": 11795, "total_steps": 78105, "loss": 0.5544, "lr": 4.960495483009546e-06, "epoch": 0.7550732987644837, "percentage": 15.1, "elapsed_time": "0:31:42", "remaining_time": "2:58:16", "throughput": 19515.71, "total_tokens": 37132800}
|
|
{"current_steps": 11800, "total_steps": 78105, "loss": 0.7038, "lr": 4.960396500799307e-06, "epoch": 0.7553933807054606, "percentage": 15.11, "elapsed_time": "0:31:43", "remaining_time": "2:58:15", "throughput": 19516.79, "total_tokens": 37147776}
|
|
{"current_steps": 11805, "total_steps": 78105, "loss": 0.4506, "lr": 4.960297395729683e-06, "epoch": 0.7557134626464375, "percentage": 15.11, "elapsed_time": "0:31:44", "remaining_time": "2:58:13", "throughput": 19518.26, "total_tokens": 37163904}
|
|
{"current_steps": 11810, "total_steps": 78105, "loss": 0.5447, "lr": 4.960198167805621e-06, "epoch": 0.7560335445874143, "percentage": 15.12, "elapsed_time": "0:31:44", "remaining_time": "2:58:12", "throughput": 19519.76, "total_tokens": 37180416}
|
|
{"current_steps": 11815, "total_steps": 78105, "loss": 0.4104, "lr": 4.960098817032078e-06, "epoch": 0.7563536265283912, "percentage": 15.13, "elapsed_time": "0:31:45", "remaining_time": "2:58:10", "throughput": 19521.12, "total_tokens": 37196480}
|
|
{"current_steps": 11820, "total_steps": 78105, "loss": 0.5251, "lr": 4.959999343414015e-06, "epoch": 0.7566737084693682, "percentage": 15.13, "elapsed_time": "0:31:46", "remaining_time": "2:58:09", "throughput": 19522.33, "total_tokens": 37211648}
|
|
{"current_steps": 11825, "total_steps": 78105, "loss": 0.4863, "lr": 4.959899746956397e-06, "epoch": 0.7569937904103451, "percentage": 15.14, "elapsed_time": "0:31:46", "remaining_time": "2:58:07", "throughput": 19523.62, "total_tokens": 37227072}
|
|
{"current_steps": 11830, "total_steps": 78105, "loss": 0.4256, "lr": 4.9598000276642e-06, "epoch": 0.757313872351322, "percentage": 15.15, "elapsed_time": "0:31:47", "remaining_time": "2:58:06", "throughput": 19525.22, "total_tokens": 37243968}
|
|
{"current_steps": 11835, "total_steps": 78105, "loss": 0.5758, "lr": 4.959700185542401e-06, "epoch": 0.7576339542922989, "percentage": 15.15, "elapsed_time": "0:31:48", "remaining_time": "2:58:04", "throughput": 19526.4, "total_tokens": 37259200}
|
|
{"current_steps": 11840, "total_steps": 78105, "loss": 0.3839, "lr": 4.959600220595988e-06, "epoch": 0.7579540362332757, "percentage": 15.16, "elapsed_time": "0:31:48", "remaining_time": "2:58:03", "throughput": 19527.75, "total_tokens": 37275072}
|
|
{"current_steps": 11845, "total_steps": 78105, "loss": 0.4547, "lr": 4.959500132829951e-06, "epoch": 0.7582741181742526, "percentage": 15.17, "elapsed_time": "0:31:49", "remaining_time": "2:58:01", "throughput": 19529.18, "total_tokens": 37291392}
|
|
{"current_steps": 11850, "total_steps": 78105, "loss": 0.4876, "lr": 4.959399922249289e-06, "epoch": 0.7585942001152295, "percentage": 15.17, "elapsed_time": "0:31:50", "remaining_time": "2:58:00", "throughput": 19530.63, "total_tokens": 37307648}
|
|
{"current_steps": 11855, "total_steps": 78105, "loss": 0.3464, "lr": 4.959299588859005e-06, "epoch": 0.7589142820562064, "percentage": 15.18, "elapsed_time": "0:31:50", "remaining_time": "2:57:58", "throughput": 19531.86, "total_tokens": 37323136}
|
|
{"current_steps": 11860, "total_steps": 78105, "loss": 0.4314, "lr": 4.959199132664109e-06, "epoch": 0.7592343639971832, "percentage": 15.18, "elapsed_time": "0:31:51", "remaining_time": "2:57:57", "throughput": 19533.1, "total_tokens": 37338496}
|
|
{"current_steps": 11865, "total_steps": 78105, "loss": 0.445, "lr": 4.959098553669619e-06, "epoch": 0.7595544459381601, "percentage": 15.19, "elapsed_time": "0:31:52", "remaining_time": "2:57:56", "throughput": 19534.88, "total_tokens": 37356800}
|
|
{"current_steps": 11870, "total_steps": 78105, "loss": 0.4906, "lr": 4.958997851880555e-06, "epoch": 0.759874527879137, "percentage": 15.2, "elapsed_time": "0:31:53", "remaining_time": "2:57:54", "throughput": 19536.18, "total_tokens": 37372800}
|
|
{"current_steps": 11875, "total_steps": 78105, "loss": 0.4387, "lr": 4.958897027301947e-06, "epoch": 0.760194609820114, "percentage": 15.2, "elapsed_time": "0:31:53", "remaining_time": "2:57:53", "throughput": 19537.47, "total_tokens": 37388608}
|
|
{"current_steps": 11880, "total_steps": 78105, "loss": 0.5326, "lr": 4.95879607993883e-06, "epoch": 0.7605146917610909, "percentage": 15.21, "elapsed_time": "0:31:54", "remaining_time": "2:57:51", "throughput": 19538.97, "total_tokens": 37405184}
|
|
{"current_steps": 11885, "total_steps": 78105, "loss": 0.3786, "lr": 4.958695009796244e-06, "epoch": 0.7608347737020678, "percentage": 15.22, "elapsed_time": "0:31:55", "remaining_time": "2:57:50", "throughput": 19540.08, "total_tokens": 37420096}
|
|
{"current_steps": 11890, "total_steps": 78105, "loss": 0.6187, "lr": 4.958593816879236e-06, "epoch": 0.7611548556430446, "percentage": 15.22, "elapsed_time": "0:31:55", "remaining_time": "2:57:48", "throughput": 19541.02, "total_tokens": 37434368}
|
|
{"current_steps": 11895, "total_steps": 78105, "loss": 0.4273, "lr": 4.958492501192859e-06, "epoch": 0.7614749375840215, "percentage": 15.23, "elapsed_time": "0:31:56", "remaining_time": "2:57:46", "throughput": 19542.05, "total_tokens": 37449344}
|
|
{"current_steps": 11900, "total_steps": 78105, "loss": 0.3901, "lr": 4.958391062742173e-06, "epoch": 0.7617950195249984, "percentage": 15.24, "elapsed_time": "0:31:57", "remaining_time": "2:57:45", "throughput": 19543.21, "total_tokens": 37464448}
|
|
{"current_steps": 11905, "total_steps": 78105, "loss": 0.4547, "lr": 4.958289501532242e-06, "epoch": 0.7621151014659753, "percentage": 15.24, "elapsed_time": "0:31:57", "remaining_time": "2:57:43", "throughput": 19544.86, "total_tokens": 37481856}
|
|
{"current_steps": 11910, "total_steps": 78105, "loss": 0.463, "lr": 4.958187817568138e-06, "epoch": 0.7624351834069522, "percentage": 15.25, "elapsed_time": "0:31:58", "remaining_time": "2:57:42", "throughput": 19546.14, "total_tokens": 37497856}
|
|
{"current_steps": 11915, "total_steps": 78105, "loss": 0.5234, "lr": 4.958086010854938e-06, "epoch": 0.762755265347929, "percentage": 15.26, "elapsed_time": "0:31:59", "remaining_time": "2:57:41", "throughput": 19547.36, "total_tokens": 37513600}
|
|
{"current_steps": 11920, "total_steps": 78105, "loss": 0.5496, "lr": 4.957984081397728e-06, "epoch": 0.7630753472889059, "percentage": 15.26, "elapsed_time": "0:31:59", "remaining_time": "2:57:39", "throughput": 19548.61, "total_tokens": 37529408}
|
|
{"current_steps": 11925, "total_steps": 78105, "loss": 0.4377, "lr": 4.957882029201595e-06, "epoch": 0.7633954292298829, "percentage": 15.27, "elapsed_time": "0:32:00", "remaining_time": "2:57:38", "throughput": 19550.11, "total_tokens": 37545984}
|
|
{"current_steps": 11930, "total_steps": 78105, "loss": 0.5404, "lr": 4.957779854271636e-06, "epoch": 0.7637155111708598, "percentage": 15.27, "elapsed_time": "0:32:01", "remaining_time": "2:57:36", "throughput": 19551.42, "total_tokens": 37561856}
|
|
{"current_steps": 11935, "total_steps": 78105, "loss": 0.4171, "lr": 4.957677556612953e-06, "epoch": 0.7640355931118367, "percentage": 15.28, "elapsed_time": "0:32:01", "remaining_time": "2:57:35", "throughput": 19552.88, "total_tokens": 37578624}
|
|
{"current_steps": 11940, "total_steps": 78105, "loss": 0.6082, "lr": 4.957575136230655e-06, "epoch": 0.7643556750528135, "percentage": 15.29, "elapsed_time": "0:32:02", "remaining_time": "2:57:33", "throughput": 19554.06, "total_tokens": 37593920}
|
|
{"current_steps": 11945, "total_steps": 78105, "loss": 0.8119, "lr": 4.9574725931298565e-06, "epoch": 0.7646757569937904, "percentage": 15.29, "elapsed_time": "0:32:03", "remaining_time": "2:57:32", "throughput": 19555.26, "total_tokens": 37609664}
|
|
{"current_steps": 11950, "total_steps": 78105, "loss": 0.6045, "lr": 4.957369927315676e-06, "epoch": 0.7649958389347673, "percentage": 15.3, "elapsed_time": "0:32:03", "remaining_time": "2:57:30", "throughput": 19556.58, "total_tokens": 37626048}
|
|
{"current_steps": 11955, "total_steps": 78105, "loss": 0.6714, "lr": 4.957267138793241e-06, "epoch": 0.7653159208757442, "percentage": 15.31, "elapsed_time": "0:32:04", "remaining_time": "2:57:29", "throughput": 19557.84, "total_tokens": 37641792}
|
|
{"current_steps": 11960, "total_steps": 78105, "loss": 0.5358, "lr": 4.957164227567685e-06, "epoch": 0.7656360028167211, "percentage": 15.31, "elapsed_time": "0:32:05", "remaining_time": "2:57:27", "throughput": 19558.85, "total_tokens": 37656512}
|
|
{"current_steps": 11965, "total_steps": 78105, "loss": 0.4715, "lr": 4.957061193644147e-06, "epoch": 0.7659560847576979, "percentage": 15.32, "elapsed_time": "0:32:05", "remaining_time": "2:57:26", "throughput": 19560.0, "total_tokens": 37672000}
|
|
{"current_steps": 11970, "total_steps": 78105, "loss": 0.4264, "lr": 4.95695803702777e-06, "epoch": 0.7662761666986748, "percentage": 15.33, "elapsed_time": "0:32:06", "remaining_time": "2:57:24", "throughput": 19561.18, "total_tokens": 37687232}
|
|
{"current_steps": 11975, "total_steps": 78105, "loss": 0.4438, "lr": 4.956854757723708e-06, "epoch": 0.7665962486396517, "percentage": 15.33, "elapsed_time": "0:32:07", "remaining_time": "2:57:23", "throughput": 19562.38, "total_tokens": 37702656}
|
|
{"current_steps": 11980, "total_steps": 78105, "loss": 0.5411, "lr": 4.956751355737116e-06, "epoch": 0.7669163305806287, "percentage": 15.34, "elapsed_time": "0:32:07", "remaining_time": "2:57:21", "throughput": 19563.54, "total_tokens": 37718080}
|
|
{"current_steps": 11985, "total_steps": 78105, "loss": 0.3796, "lr": 4.956647831073158e-06, "epoch": 0.7672364125216056, "percentage": 15.34, "elapsed_time": "0:32:08", "remaining_time": "2:57:20", "throughput": 19565.16, "total_tokens": 37735680}
|
|
{"current_steps": 11990, "total_steps": 78105, "loss": 0.4634, "lr": 4.956544183737003e-06, "epoch": 0.7675564944625825, "percentage": 15.35, "elapsed_time": "0:32:09", "remaining_time": "2:57:18", "throughput": 19566.13, "total_tokens": 37749952}
|
|
{"current_steps": 11995, "total_steps": 78105, "loss": 0.7163, "lr": 4.956440413733828e-06, "epoch": 0.7678765764035593, "percentage": 15.36, "elapsed_time": "0:32:10", "remaining_time": "2:57:21", "throughput": 19559.58, "total_tokens": 37765312}
|
|
{"current_steps": 12000, "total_steps": 78105, "loss": 0.5274, "lr": 4.956336521068814e-06, "epoch": 0.7681966583445362, "percentage": 15.36, "elapsed_time": "0:32:11", "remaining_time": "2:57:19", "throughput": 19560.65, "total_tokens": 37780160}
|
|
{"current_steps": 12005, "total_steps": 78105, "loss": 0.3818, "lr": 4.956232505747148e-06, "epoch": 0.7685167402855131, "percentage": 15.37, "elapsed_time": "0:32:12", "remaining_time": "2:57:18", "throughput": 19561.98, "total_tokens": 37796352}
|
|
{"current_steps": 12010, "total_steps": 78105, "loss": 0.4416, "lr": 4.9561283677740254e-06, "epoch": 0.76883682222649, "percentage": 15.38, "elapsed_time": "0:32:12", "remaining_time": "2:57:17", "throughput": 19563.45, "total_tokens": 37812864}
|
|
{"current_steps": 12015, "total_steps": 78105, "loss": 0.5501, "lr": 4.956024107154645e-06, "epoch": 0.7691569041674668, "percentage": 15.38, "elapsed_time": "0:32:13", "remaining_time": "2:57:15", "throughput": 19564.54, "total_tokens": 37827840}
|
|
{"current_steps": 12020, "total_steps": 78105, "loss": 0.5076, "lr": 4.9559197238942135e-06, "epoch": 0.7694769861084437, "percentage": 15.39, "elapsed_time": "0:32:14", "remaining_time": "2:57:14", "throughput": 19566.59, "total_tokens": 37847040}
|
|
{"current_steps": 12025, "total_steps": 78105, "loss": 0.4629, "lr": 4.955815217997944e-06, "epoch": 0.7697970680494206, "percentage": 15.4, "elapsed_time": "0:32:14", "remaining_time": "2:57:12", "throughput": 19567.82, "total_tokens": 37862464}
|
|
{"current_steps": 12030, "total_steps": 78105, "loss": 0.5349, "lr": 4.9557105894710545e-06, "epoch": 0.7701171499903975, "percentage": 15.4, "elapsed_time": "0:32:15", "remaining_time": "2:57:11", "throughput": 19568.85, "total_tokens": 37877760}
|
|
{"current_steps": 12035, "total_steps": 78105, "loss": 0.4056, "lr": 4.955605838318769e-06, "epoch": 0.7704372319313745, "percentage": 15.41, "elapsed_time": "0:32:16", "remaining_time": "2:57:09", "throughput": 19569.95, "total_tokens": 37892800}
|
|
{"current_steps": 12040, "total_steps": 78105, "loss": 0.7183, "lr": 4.955500964546319e-06, "epoch": 0.7707573138723514, "percentage": 15.42, "elapsed_time": "0:32:16", "remaining_time": "2:57:08", "throughput": 19571.36, "total_tokens": 37909056}
|
|
{"current_steps": 12045, "total_steps": 78105, "loss": 0.4416, "lr": 4.9553959681589404e-06, "epoch": 0.7710773958133282, "percentage": 15.42, "elapsed_time": "0:32:17", "remaining_time": "2:57:06", "throughput": 19572.83, "total_tokens": 37925376}
|
|
{"current_steps": 12050, "total_steps": 78105, "loss": 0.494, "lr": 4.955290849161878e-06, "epoch": 0.7713974777543051, "percentage": 15.43, "elapsed_time": "0:32:18", "remaining_time": "2:57:05", "throughput": 19574.23, "total_tokens": 37941632}
|
|
{"current_steps": 12055, "total_steps": 78105, "loss": 0.4919, "lr": 4.955185607560379e-06, "epoch": 0.771717559695282, "percentage": 15.43, "elapsed_time": "0:32:19", "remaining_time": "2:57:03", "throughput": 19575.42, "total_tokens": 37957056}
|
|
{"current_steps": 12060, "total_steps": 78105, "loss": 0.5699, "lr": 4.955080243359699e-06, "epoch": 0.7720376416362589, "percentage": 15.44, "elapsed_time": "0:32:19", "remaining_time": "2:57:02", "throughput": 19576.7, "total_tokens": 37973184}
|
|
{"current_steps": 12065, "total_steps": 78105, "loss": 0.4621, "lr": 4.954974756565101e-06, "epoch": 0.7723577235772358, "percentage": 15.45, "elapsed_time": "0:32:20", "remaining_time": "2:57:00", "throughput": 19577.82, "total_tokens": 37988288}
|
|
{"current_steps": 12070, "total_steps": 78105, "loss": 0.5975, "lr": 4.95486914718185e-06, "epoch": 0.7726778055182126, "percentage": 15.45, "elapsed_time": "0:32:21", "remaining_time": "2:56:59", "throughput": 19579.07, "total_tokens": 38004224}
|
|
{"current_steps": 12075, "total_steps": 78105, "loss": 0.6549, "lr": 4.954763415215221e-06, "epoch": 0.7729978874591895, "percentage": 15.46, "elapsed_time": "0:32:21", "remaining_time": "2:56:57", "throughput": 19580.28, "total_tokens": 38019456}
|
|
{"current_steps": 12080, "total_steps": 78105, "loss": 0.584, "lr": 4.954657560670494e-06, "epoch": 0.7733179694001664, "percentage": 15.47, "elapsed_time": "0:32:22", "remaining_time": "2:56:56", "throughput": 19581.49, "total_tokens": 38035264}
|
|
{"current_steps": 12085, "total_steps": 78105, "loss": 0.522, "lr": 4.9545515835529535e-06, "epoch": 0.7736380513411434, "percentage": 15.47, "elapsed_time": "0:32:23", "remaining_time": "2:56:55", "throughput": 19582.81, "total_tokens": 38051072}
|
|
{"current_steps": 12090, "total_steps": 78105, "loss": 0.5556, "lr": 4.954445483867892e-06, "epoch": 0.7739581332821203, "percentage": 15.48, "elapsed_time": "0:32:23", "remaining_time": "2:56:53", "throughput": 19583.97, "total_tokens": 38066688}
|
|
{"current_steps": 12095, "total_steps": 78105, "loss": 0.4577, "lr": 4.9543392616206085e-06, "epoch": 0.7742782152230971, "percentage": 15.49, "elapsed_time": "0:32:24", "remaining_time": "2:56:51", "throughput": 19585.18, "total_tokens": 38082048}
|
|
{"current_steps": 12100, "total_steps": 78105, "loss": 0.4157, "lr": 4.954232916816406e-06, "epoch": 0.774598297164074, "percentage": 15.49, "elapsed_time": "0:32:25", "remaining_time": "2:56:50", "throughput": 19586.33, "total_tokens": 38097344}
|
|
{"current_steps": 12105, "total_steps": 78105, "loss": 0.6133, "lr": 4.954126449460596e-06, "epoch": 0.7749183791050509, "percentage": 15.5, "elapsed_time": "0:32:25", "remaining_time": "2:56:48", "throughput": 19587.39, "total_tokens": 38112320}
|
|
{"current_steps": 12110, "total_steps": 78105, "loss": 0.4779, "lr": 4.954019859558493e-06, "epoch": 0.7752384610460278, "percentage": 15.5, "elapsed_time": "0:32:26", "remaining_time": "2:56:47", "throughput": 19588.65, "total_tokens": 38128000}
|
|
{"current_steps": 12115, "total_steps": 78105, "loss": 0.4682, "lr": 4.953913147115422e-06, "epoch": 0.7755585429870047, "percentage": 15.51, "elapsed_time": "0:32:27", "remaining_time": "2:56:45", "throughput": 19589.76, "total_tokens": 38143168}
|
|
{"current_steps": 12120, "total_steps": 78105, "loss": 0.3558, "lr": 4.953806312136709e-06, "epoch": 0.7758786249279815, "percentage": 15.52, "elapsed_time": "0:32:27", "remaining_time": "2:56:44", "throughput": 19590.77, "total_tokens": 38157824}
|
|
{"current_steps": 12125, "total_steps": 78105, "loss": 0.4056, "lr": 4.953699354627692e-06, "epoch": 0.7761987068689584, "percentage": 15.52, "elapsed_time": "0:32:28", "remaining_time": "2:56:42", "throughput": 19591.91, "total_tokens": 38172864}
|
|
{"current_steps": 12130, "total_steps": 78105, "loss": 0.6804, "lr": 4.953592274593709e-06, "epoch": 0.7765187888099353, "percentage": 15.53, "elapsed_time": "0:32:29", "remaining_time": "2:56:40", "throughput": 19592.89, "total_tokens": 38187264}
|
|
{"current_steps": 12135, "total_steps": 78105, "loss": 0.4035, "lr": 4.953485072040108e-06, "epoch": 0.7768388707509122, "percentage": 15.54, "elapsed_time": "0:32:29", "remaining_time": "2:56:39", "throughput": 19594.5, "total_tokens": 38204288}
|
|
{"current_steps": 12140, "total_steps": 78105, "loss": 0.3822, "lr": 4.9533777469722425e-06, "epoch": 0.7771589526918892, "percentage": 15.54, "elapsed_time": "0:32:30", "remaining_time": "2:56:37", "throughput": 19595.57, "total_tokens": 38219264}
|
|
{"current_steps": 12145, "total_steps": 78105, "loss": 0.6172, "lr": 4.953270299395472e-06, "epoch": 0.777479034632866, "percentage": 15.55, "elapsed_time": "0:32:31", "remaining_time": "2:56:36", "throughput": 19596.74, "total_tokens": 38234496}
|
|
{"current_steps": 12150, "total_steps": 78105, "loss": 0.3488, "lr": 4.953162729315161e-06, "epoch": 0.7777991165738429, "percentage": 15.56, "elapsed_time": "0:32:31", "remaining_time": "2:56:34", "throughput": 19598.17, "total_tokens": 38251072}
|
|
{"current_steps": 12155, "total_steps": 78105, "loss": 0.5489, "lr": 4.953055036736681e-06, "epoch": 0.7781191985148198, "percentage": 15.56, "elapsed_time": "0:32:32", "remaining_time": "2:56:33", "throughput": 19599.35, "total_tokens": 38266560}
|
|
{"current_steps": 12160, "total_steps": 78105, "loss": 0.4762, "lr": 4.952947221665409e-06, "epoch": 0.7784392804557967, "percentage": 15.57, "elapsed_time": "0:32:33", "remaining_time": "2:56:32", "throughput": 19600.79, "total_tokens": 38282944}
|
|
{"current_steps": 12165, "total_steps": 78105, "loss": 0.4529, "lr": 4.952839284106731e-06, "epoch": 0.7787593623967736, "percentage": 15.58, "elapsed_time": "0:32:33", "remaining_time": "2:56:30", "throughput": 19602.21, "total_tokens": 38299264}
|
|
{"current_steps": 12170, "total_steps": 78105, "loss": 0.5542, "lr": 4.952731224066036e-06, "epoch": 0.7790794443377504, "percentage": 15.58, "elapsed_time": "0:32:34", "remaining_time": "2:56:29", "throughput": 19603.36, "total_tokens": 38314368}
|
|
{"current_steps": 12175, "total_steps": 78105, "loss": 0.5084, "lr": 4.952623041548719e-06, "epoch": 0.7793995262787273, "percentage": 15.59, "elapsed_time": "0:32:35", "remaining_time": "2:56:27", "throughput": 19604.52, "total_tokens": 38329664}
|
|
{"current_steps": 12180, "total_steps": 78105, "loss": 0.4042, "lr": 4.952514736560182e-06, "epoch": 0.7797196082197042, "percentage": 15.59, "elapsed_time": "0:32:35", "remaining_time": "2:56:26", "throughput": 19605.73, "total_tokens": 38345408}
|
|
{"current_steps": 12185, "total_steps": 78105, "loss": 0.4182, "lr": 4.9524063091058335e-06, "epoch": 0.7800396901606811, "percentage": 15.6, "elapsed_time": "0:32:36", "remaining_time": "2:56:24", "throughput": 19607.32, "total_tokens": 38362688}
|
|
{"current_steps": 12190, "total_steps": 78105, "loss": 0.3383, "lr": 4.952297759191089e-06, "epoch": 0.7803597721016581, "percentage": 15.61, "elapsed_time": "0:32:37", "remaining_time": "2:56:23", "throughput": 19608.4, "total_tokens": 38377600}
|
|
{"current_steps": 12195, "total_steps": 78105, "loss": 0.5621, "lr": 4.952189086821367e-06, "epoch": 0.780679854042635, "percentage": 15.61, "elapsed_time": "0:32:37", "remaining_time": "2:56:21", "throughput": 19609.49, "total_tokens": 38392640}
|
|
{"current_steps": 12200, "total_steps": 78105, "loss": 0.6388, "lr": 4.952080292002096e-06, "epoch": 0.7809999359836118, "percentage": 15.62, "elapsed_time": "0:32:38", "remaining_time": "2:56:20", "throughput": 19610.68, "total_tokens": 38408448}
|
|
{"current_steps": 12205, "total_steps": 78105, "loss": 0.5865, "lr": 4.9519713747387075e-06, "epoch": 0.7813200179245887, "percentage": 15.63, "elapsed_time": "0:32:39", "remaining_time": "2:56:18", "throughput": 19611.95, "total_tokens": 38424512}
|
|
{"current_steps": 12210, "total_steps": 78105, "loss": 0.6003, "lr": 4.95186233503664e-06, "epoch": 0.7816400998655656, "percentage": 15.63, "elapsed_time": "0:32:39", "remaining_time": "2:56:17", "throughput": 19612.98, "total_tokens": 38439296}
|
|
{"current_steps": 12215, "total_steps": 78105, "loss": 0.3559, "lr": 4.951753172901339e-06, "epoch": 0.7819601818065425, "percentage": 15.64, "elapsed_time": "0:32:40", "remaining_time": "2:56:15", "throughput": 19614.31, "total_tokens": 38455424}
|
|
{"current_steps": 12220, "total_steps": 78105, "loss": 0.5281, "lr": 4.951643888338256e-06, "epoch": 0.7822802637475194, "percentage": 15.65, "elapsed_time": "0:32:41", "remaining_time": "2:56:14", "throughput": 19615.45, "total_tokens": 38470976}
|
|
{"current_steps": 12225, "total_steps": 78105, "loss": 0.663, "lr": 4.951534481352845e-06, "epoch": 0.7826003456884962, "percentage": 15.65, "elapsed_time": "0:32:41", "remaining_time": "2:56:12", "throughput": 19616.57, "total_tokens": 38486016}
|
|
{"current_steps": 12230, "total_steps": 78105, "loss": 0.4021, "lr": 4.951424951950574e-06, "epoch": 0.7829204276294731, "percentage": 15.66, "elapsed_time": "0:32:42", "remaining_time": "2:56:11", "throughput": 19617.69, "total_tokens": 38501248}
|
|
{"current_steps": 12235, "total_steps": 78105, "loss": 0.4555, "lr": 4.951315300136909e-06, "epoch": 0.78324050957045, "percentage": 15.66, "elapsed_time": "0:32:43", "remaining_time": "2:56:09", "throughput": 19618.9, "total_tokens": 38516992}
|
|
{"current_steps": 12240, "total_steps": 78105, "loss": 0.4991, "lr": 4.951205525917326e-06, "epoch": 0.7835605915114269, "percentage": 15.67, "elapsed_time": "0:32:43", "remaining_time": "2:56:07", "throughput": 19619.89, "total_tokens": 38531456}
|
|
{"current_steps": 12245, "total_steps": 78105, "loss": 0.4745, "lr": 4.951095629297308e-06, "epoch": 0.7838806734524039, "percentage": 15.68, "elapsed_time": "0:32:44", "remaining_time": "2:56:06", "throughput": 19621.09, "total_tokens": 38546880}
|
|
{"current_steps": 12250, "total_steps": 78105, "loss": 0.4795, "lr": 4.9509856102823404e-06, "epoch": 0.7842007553933807, "percentage": 15.68, "elapsed_time": "0:32:45", "remaining_time": "2:56:04", "throughput": 19622.19, "total_tokens": 38562176}
|
|
{"current_steps": 12255, "total_steps": 78105, "loss": 0.53, "lr": 4.950875468877918e-06, "epoch": 0.7845208373343576, "percentage": 15.69, "elapsed_time": "0:32:45", "remaining_time": "2:56:03", "throughput": 19623.36, "total_tokens": 38577472}
|
|
{"current_steps": 12260, "total_steps": 78105, "loss": 0.4954, "lr": 4.9507652050895415e-06, "epoch": 0.7848409192753345, "percentage": 15.7, "elapsed_time": "0:32:46", "remaining_time": "2:56:01", "throughput": 19624.52, "total_tokens": 38593088}
|
|
{"current_steps": 12265, "total_steps": 78105, "loss": 0.6472, "lr": 4.950654818922716e-06, "epoch": 0.7851610012163114, "percentage": 15.7, "elapsed_time": "0:32:47", "remaining_time": "2:56:00", "throughput": 19625.58, "total_tokens": 38607936}
|
|
{"current_steps": 12270, "total_steps": 78105, "loss": 0.3883, "lr": 4.950544310382954e-06, "epoch": 0.7854810831572883, "percentage": 15.71, "elapsed_time": "0:32:47", "remaining_time": "2:55:59", "throughput": 19627.05, "total_tokens": 38625024}
|
|
{"current_steps": 12275, "total_steps": 78105, "loss": 0.4866, "lr": 4.950433679475774e-06, "epoch": 0.7858011650982651, "percentage": 15.72, "elapsed_time": "0:32:48", "remaining_time": "2:55:57", "throughput": 19628.49, "total_tokens": 38641792}
|
|
{"current_steps": 12280, "total_steps": 78105, "loss": 0.5882, "lr": 4.950322926206699e-06, "epoch": 0.786121247039242, "percentage": 15.72, "elapsed_time": "0:32:49", "remaining_time": "2:55:56", "throughput": 19629.62, "total_tokens": 38656896}
|
|
{"current_steps": 12285, "total_steps": 78105, "loss": 0.4555, "lr": 4.950212050581261e-06, "epoch": 0.7864413289802189, "percentage": 15.73, "elapsed_time": "0:32:49", "remaining_time": "2:55:54", "throughput": 19630.91, "total_tokens": 38672448}
|
|
{"current_steps": 12290, "total_steps": 78105, "loss": 0.4815, "lr": 4.950101052604995e-06, "epoch": 0.7867614109211958, "percentage": 15.74, "elapsed_time": "0:32:50", "remaining_time": "2:55:53", "throughput": 19632.24, "total_tokens": 38688576}
|
|
{"current_steps": 12295, "total_steps": 78105, "loss": 0.6147, "lr": 4.9499899322834455e-06, "epoch": 0.7870814928621728, "percentage": 15.74, "elapsed_time": "0:32:51", "remaining_time": "2:55:51", "throughput": 19633.43, "total_tokens": 38704064}
|
|
{"current_steps": 12300, "total_steps": 78105, "loss": 0.4661, "lr": 4.949878689622161e-06, "epoch": 0.7874015748031497, "percentage": 15.75, "elapsed_time": "0:32:51", "remaining_time": "2:55:50", "throughput": 19634.47, "total_tokens": 38718976}
|
|
{"current_steps": 12305, "total_steps": 78105, "loss": 0.5098, "lr": 4.949767324626694e-06, "epoch": 0.7877216567441265, "percentage": 15.75, "elapsed_time": "0:32:52", "remaining_time": "2:55:48", "throughput": 19635.59, "total_tokens": 38734272}
|
|
{"current_steps": 12310, "total_steps": 78105, "loss": 0.4012, "lr": 4.949655837302608e-06, "epoch": 0.7880417386851034, "percentage": 15.76, "elapsed_time": "0:32:53", "remaining_time": "2:55:47", "throughput": 19636.71, "total_tokens": 38749504}
|
|
{"current_steps": 12315, "total_steps": 78105, "loss": 0.6175, "lr": 4.949544227655469e-06, "epoch": 0.7883618206260803, "percentage": 15.77, "elapsed_time": "0:32:53", "remaining_time": "2:55:45", "throughput": 19637.78, "total_tokens": 38764352}
|
|
{"current_steps": 12320, "total_steps": 78105, "loss": 0.4962, "lr": 4.9494324956908505e-06, "epoch": 0.7886819025670572, "percentage": 15.77, "elapsed_time": "0:32:54", "remaining_time": "2:55:43", "throughput": 19639.05, "total_tokens": 38780096}
|
|
{"current_steps": 12325, "total_steps": 78105, "loss": 0.4824, "lr": 4.949320641414332e-06, "epoch": 0.789001984508034, "percentage": 15.78, "elapsed_time": "0:32:55", "remaining_time": "2:55:42", "throughput": 19640.24, "total_tokens": 38795712}
|
|
{"current_steps": 12330, "total_steps": 78105, "loss": 0.4731, "lr": 4.9492086648314984e-06, "epoch": 0.7893220664490109, "percentage": 15.79, "elapsed_time": "0:32:55", "remaining_time": "2:55:41", "throughput": 19641.47, "total_tokens": 38811328}
|
|
{"current_steps": 12335, "total_steps": 78105, "loss": 0.4184, "lr": 4.949096565947942e-06, "epoch": 0.7896421483899878, "percentage": 15.79, "elapsed_time": "0:32:56", "remaining_time": "2:55:39", "throughput": 19642.58, "total_tokens": 38826240}
|
|
{"current_steps": 12340, "total_steps": 78105, "loss": 0.4814, "lr": 4.94898434476926e-06, "epoch": 0.7899622303309647, "percentage": 15.8, "elapsed_time": "0:32:57", "remaining_time": "2:55:38", "throughput": 19643.91, "total_tokens": 38842624}
|
|
{"current_steps": 12345, "total_steps": 78105, "loss": 0.394, "lr": 4.9488720013010556e-06, "epoch": 0.7902823122719416, "percentage": 15.81, "elapsed_time": "0:32:57", "remaining_time": "2:55:36", "throughput": 19644.92, "total_tokens": 38857472}
|
|
{"current_steps": 12350, "total_steps": 78105, "loss": 0.586, "lr": 4.948759535548939e-06, "epoch": 0.7906023942129186, "percentage": 15.81, "elapsed_time": "0:32:58", "remaining_time": "2:55:34", "throughput": 19645.95, "total_tokens": 38872256}
|
|
{"current_steps": 12355, "total_steps": 78105, "loss": 0.5738, "lr": 4.948646947518527e-06, "epoch": 0.7909224761538954, "percentage": 15.82, "elapsed_time": "0:32:59", "remaining_time": "2:55:33", "throughput": 19647.64, "total_tokens": 38890048}
|
|
{"current_steps": 12360, "total_steps": 78105, "loss": 0.3512, "lr": 4.948534237215441e-06, "epoch": 0.7912425580948723, "percentage": 15.82, "elapsed_time": "0:33:00", "remaining_time": "2:55:32", "throughput": 19648.88, "total_tokens": 38905664}
|
|
{"current_steps": 12365, "total_steps": 78105, "loss": 0.4868, "lr": 4.948421404645308e-06, "epoch": 0.7915626400358492, "percentage": 15.83, "elapsed_time": "0:33:00", "remaining_time": "2:55:31", "throughput": 19651.01, "total_tokens": 38926144}
|
|
{"current_steps": 12370, "total_steps": 78105, "loss": 0.62, "lr": 4.948308449813764e-06, "epoch": 0.7918827219768261, "percentage": 15.84, "elapsed_time": "0:33:01", "remaining_time": "2:55:30", "throughput": 19652.34, "total_tokens": 38942720}
|
|
{"current_steps": 12375, "total_steps": 78105, "loss": 0.4897, "lr": 4.948195372726449e-06, "epoch": 0.792202803917803, "percentage": 15.84, "elapsed_time": "0:33:02", "remaining_time": "2:55:28", "throughput": 19653.48, "total_tokens": 38958144}
|
|
{"current_steps": 12380, "total_steps": 78105, "loss": 0.3435, "lr": 4.948082173389011e-06, "epoch": 0.7925228858587798, "percentage": 15.85, "elapsed_time": "0:33:02", "remaining_time": "2:55:27", "throughput": 19654.66, "total_tokens": 38973376}
|
|
{"current_steps": 12385, "total_steps": 78105, "loss": 0.4778, "lr": 4.947968851807098e-06, "epoch": 0.7928429677997567, "percentage": 15.86, "elapsed_time": "0:33:03", "remaining_time": "2:55:25", "throughput": 19655.98, "total_tokens": 38989504}
|
|
{"current_steps": 12390, "total_steps": 78105, "loss": 0.5404, "lr": 4.947855407986373e-06, "epoch": 0.7931630497407336, "percentage": 15.86, "elapsed_time": "0:33:04", "remaining_time": "2:55:24", "throughput": 19657.22, "total_tokens": 39005056}
|
|
{"current_steps": 12395, "total_steps": 78105, "loss": 0.5248, "lr": 4.947741841932499e-06, "epoch": 0.7934831316817105, "percentage": 15.87, "elapsed_time": "0:33:04", "remaining_time": "2:55:22", "throughput": 19658.41, "total_tokens": 39020608}
|
|
{"current_steps": 12400, "total_steps": 78105, "loss": 0.6509, "lr": 4.947628153651147e-06, "epoch": 0.7938032136226874, "percentage": 15.88, "elapsed_time": "0:33:05", "remaining_time": "2:55:21", "throughput": 19659.52, "total_tokens": 39035712}
|
|
{"current_steps": 12405, "total_steps": 78105, "loss": 0.4714, "lr": 4.947514343147995e-06, "epoch": 0.7941232955636643, "percentage": 15.88, "elapsed_time": "0:33:06", "remaining_time": "2:55:19", "throughput": 19660.61, "total_tokens": 39050880}
|
|
{"current_steps": 12410, "total_steps": 78105, "loss": 0.5796, "lr": 4.947400410428725e-06, "epoch": 0.7944433775046412, "percentage": 15.89, "elapsed_time": "0:33:06", "remaining_time": "2:55:18", "throughput": 19661.74, "total_tokens": 39065792}
|
|
{"current_steps": 12415, "total_steps": 78105, "loss": 0.4938, "lr": 4.947286355499026e-06, "epoch": 0.7947634594456181, "percentage": 15.9, "elapsed_time": "0:33:07", "remaining_time": "2:55:16", "throughput": 19662.9, "total_tokens": 39080960}
|
|
{"current_steps": 12420, "total_steps": 78105, "loss": 0.3604, "lr": 4.947172178364595e-06, "epoch": 0.795083541386595, "percentage": 15.9, "elapsed_time": "0:33:08", "remaining_time": "2:55:15", "throughput": 19664.2, "total_tokens": 39097216}
|
|
{"current_steps": 12425, "total_steps": 78105, "loss": 0.321, "lr": 4.947057879031131e-06, "epoch": 0.7954036233275719, "percentage": 15.91, "elapsed_time": "0:33:08", "remaining_time": "2:55:13", "throughput": 19665.48, "total_tokens": 39113152}
|
|
{"current_steps": 12430, "total_steps": 78105, "loss": 0.618, "lr": 4.946943457504343e-06, "epoch": 0.7957237052685487, "percentage": 15.91, "elapsed_time": "0:33:09", "remaining_time": "2:55:12", "throughput": 19666.97, "total_tokens": 39130176}
|
|
{"current_steps": 12435, "total_steps": 78105, "loss": 0.3819, "lr": 4.946828913789945e-06, "epoch": 0.7960437872095256, "percentage": 15.92, "elapsed_time": "0:33:10", "remaining_time": "2:55:10", "throughput": 19668.15, "total_tokens": 39145792}
|
|
{"current_steps": 12440, "total_steps": 78105, "loss": 0.593, "lr": 4.9467142478936555e-06, "epoch": 0.7963638691505025, "percentage": 15.93, "elapsed_time": "0:33:10", "remaining_time": "2:55:09", "throughput": 19669.32, "total_tokens": 39161280}
|
|
{"current_steps": 12445, "total_steps": 78105, "loss": 0.3727, "lr": 4.946599459821202e-06, "epoch": 0.7966839510914794, "percentage": 15.93, "elapsed_time": "0:33:11", "remaining_time": "2:55:07", "throughput": 19670.41, "total_tokens": 39176512}
|
|
{"current_steps": 12450, "total_steps": 78105, "loss": 0.5835, "lr": 4.946484549578314e-06, "epoch": 0.7970040330324563, "percentage": 15.94, "elapsed_time": "0:33:12", "remaining_time": "2:55:06", "throughput": 19671.51, "total_tokens": 39191808}
|
|
{"current_steps": 12455, "total_steps": 78105, "loss": 0.3811, "lr": 4.946369517170731e-06, "epoch": 0.7973241149734333, "percentage": 15.95, "elapsed_time": "0:33:13", "remaining_time": "2:55:05", "throughput": 19672.92, "total_tokens": 39208640}
|
|
{"current_steps": 12460, "total_steps": 78105, "loss": 0.4684, "lr": 4.946254362604198e-06, "epoch": 0.7976441969144101, "percentage": 15.95, "elapsed_time": "0:33:13", "remaining_time": "2:55:03", "throughput": 19673.9, "total_tokens": 39223232}
|
|
{"current_steps": 12465, "total_steps": 78105, "loss": 0.4334, "lr": 4.9461390858844635e-06, "epoch": 0.797964278855387, "percentage": 15.96, "elapsed_time": "0:33:14", "remaining_time": "2:55:02", "throughput": 19675.0, "total_tokens": 39238656}
|
|
{"current_steps": 12470, "total_steps": 78105, "loss": 0.553, "lr": 4.946023687017285e-06, "epoch": 0.7982843607963639, "percentage": 15.97, "elapsed_time": "0:33:15", "remaining_time": "2:55:00", "throughput": 19676.52, "total_tokens": 39255872}
|
|
{"current_steps": 12475, "total_steps": 78105, "loss": 0.4521, "lr": 4.945908166008424e-06, "epoch": 0.7986044427373408, "percentage": 15.97, "elapsed_time": "0:33:15", "remaining_time": "2:54:59", "throughput": 19677.48, "total_tokens": 39270784}
|
|
{"current_steps": 12480, "total_steps": 78105, "loss": 0.4391, "lr": 4.945792522863649e-06, "epoch": 0.7989245246783176, "percentage": 15.98, "elapsed_time": "0:33:16", "remaining_time": "2:54:57", "throughput": 19678.67, "total_tokens": 39286080}
|
|
{"current_steps": 12485, "total_steps": 78105, "loss": 0.5249, "lr": 4.945676757588736e-06, "epoch": 0.7992446066192945, "percentage": 15.98, "elapsed_time": "0:33:17", "remaining_time": "2:54:56", "throughput": 19679.82, "total_tokens": 39301312}
|
|
{"current_steps": 12490, "total_steps": 78105, "loss": 0.4352, "lr": 4.945560870189465e-06, "epoch": 0.7995646885602714, "percentage": 15.99, "elapsed_time": "0:33:17", "remaining_time": "2:54:54", "throughput": 19680.94, "total_tokens": 39316736}
|
|
{"current_steps": 12495, "total_steps": 78105, "loss": 0.4878, "lr": 4.945444860671622e-06, "epoch": 0.7998847705012483, "percentage": 16.0, "elapsed_time": "0:33:18", "remaining_time": "2:54:53", "throughput": 19681.93, "total_tokens": 39331264}
|
|
{"current_steps": 12500, "total_steps": 78105, "loss": 0.4433, "lr": 4.945328729041e-06, "epoch": 0.8002048524422252, "percentage": 16.0, "elapsed_time": "0:33:18", "remaining_time": "2:54:51", "throughput": 19682.93, "total_tokens": 39345856}
|
|
{"current_steps": 12505, "total_steps": 78105, "loss": 0.5399, "lr": 4.945212475303399e-06, "epoch": 0.800524934383202, "percentage": 16.01, "elapsed_time": "0:33:19", "remaining_time": "2:54:49", "throughput": 19684.13, "total_tokens": 39361280}
|
|
{"current_steps": 12510, "total_steps": 78105, "loss": 0.4515, "lr": 4.9450960994646245e-06, "epoch": 0.800845016324179, "percentage": 16.02, "elapsed_time": "0:33:20", "remaining_time": "2:54:48", "throughput": 19685.08, "total_tokens": 39375872}
|
|
{"current_steps": 12515, "total_steps": 78105, "loss": 0.3918, "lr": 4.944979601530486e-06, "epoch": 0.8011650982651559, "percentage": 16.02, "elapsed_time": "0:33:20", "remaining_time": "2:54:47", "throughput": 19686.43, "total_tokens": 39392320}
|
|
{"current_steps": 12520, "total_steps": 78105, "loss": 0.4591, "lr": 4.944862981506802e-06, "epoch": 0.8014851802061328, "percentage": 16.03, "elapsed_time": "0:33:21", "remaining_time": "2:54:45", "throughput": 19687.56, "total_tokens": 39407680}
|
|
{"current_steps": 12525, "total_steps": 78105, "loss": 0.4409, "lr": 4.944746239399395e-06, "epoch": 0.8018052621471097, "percentage": 16.04, "elapsed_time": "0:33:22", "remaining_time": "2:54:44", "throughput": 19688.95, "total_tokens": 39424512}
|
|
{"current_steps": 12530, "total_steps": 78105, "loss": 0.505, "lr": 4.944629375214095e-06, "epoch": 0.8021253440880866, "percentage": 16.04, "elapsed_time": "0:33:23", "remaining_time": "2:54:42", "throughput": 19689.88, "total_tokens": 39439296}
|
|
{"current_steps": 12535, "total_steps": 78105, "loss": 0.577, "lr": 4.944512388956738e-06, "epoch": 0.8024454260290634, "percentage": 16.05, "elapsed_time": "0:33:23", "remaining_time": "2:54:41", "throughput": 19691.4, "total_tokens": 39456576}
|
|
{"current_steps": 12540, "total_steps": 78105, "loss": 0.3687, "lr": 4.944395280633165e-06, "epoch": 0.8027655079700403, "percentage": 16.06, "elapsed_time": "0:33:24", "remaining_time": "2:54:40", "throughput": 19692.74, "total_tokens": 39472512}
|
|
{"current_steps": 12545, "total_steps": 78105, "loss": 0.4122, "lr": 4.944278050249224e-06, "epoch": 0.8030855899110172, "percentage": 16.06, "elapsed_time": "0:33:25", "remaining_time": "2:54:38", "throughput": 19693.9, "total_tokens": 39488192}
|
|
{"current_steps": 12550, "total_steps": 78105, "loss": 0.5375, "lr": 4.944160697810769e-06, "epoch": 0.8034056718519941, "percentage": 16.07, "elapsed_time": "0:33:25", "remaining_time": "2:54:37", "throughput": 19694.93, "total_tokens": 39503552}
|
|
{"current_steps": 12555, "total_steps": 78105, "loss": 0.5382, "lr": 4.94404322332366e-06, "epoch": 0.803725753792971, "percentage": 16.07, "elapsed_time": "0:33:26", "remaining_time": "2:54:35", "throughput": 19696.21, "total_tokens": 39519744}
|
|
{"current_steps": 12560, "total_steps": 78105, "loss": 0.6759, "lr": 4.943925626793764e-06, "epoch": 0.804045835733948, "percentage": 16.08, "elapsed_time": "0:33:27", "remaining_time": "2:54:34", "throughput": 19697.32, "total_tokens": 39535232}
|
|
{"current_steps": 12565, "total_steps": 78105, "loss": 0.5241, "lr": 4.943807908226951e-06, "epoch": 0.8043659176749248, "percentage": 16.09, "elapsed_time": "0:33:27", "remaining_time": "2:54:32", "throughput": 19698.26, "total_tokens": 39549568}
|
|
{"current_steps": 12570, "total_steps": 78105, "loss": 0.4066, "lr": 4.9436900676291e-06, "epoch": 0.8046859996159017, "percentage": 16.09, "elapsed_time": "0:33:28", "remaining_time": "2:54:31", "throughput": 19699.65, "total_tokens": 39566016}
|
|
{"current_steps": 12575, "total_steps": 78105, "loss": 0.4023, "lr": 4.943572105006097e-06, "epoch": 0.8050060815568786, "percentage": 16.1, "elapsed_time": "0:33:29", "remaining_time": "2:54:29", "throughput": 19700.85, "total_tokens": 39581760}
|
|
{"current_steps": 12580, "total_steps": 78105, "loss": 0.5066, "lr": 4.94345402036383e-06, "epoch": 0.8053261634978555, "percentage": 16.11, "elapsed_time": "0:33:29", "remaining_time": "2:54:28", "throughput": 19702.09, "total_tokens": 39597888}
|
|
{"current_steps": 12585, "total_steps": 78105, "loss": 0.5907, "lr": 4.9433358137081974e-06, "epoch": 0.8056462454388323, "percentage": 16.11, "elapsed_time": "0:33:30", "remaining_time": "2:54:27", "throughput": 19703.24, "total_tokens": 39613568}
|
|
{"current_steps": 12590, "total_steps": 78105, "loss": 0.4664, "lr": 4.943217485045101e-06, "epoch": 0.8059663273798092, "percentage": 16.12, "elapsed_time": "0:33:31", "remaining_time": "2:54:25", "throughput": 19704.22, "total_tokens": 39628096}
|
|
{"current_steps": 12595, "total_steps": 78105, "loss": 0.3563, "lr": 4.94309903438045e-06, "epoch": 0.8062864093207861, "percentage": 16.13, "elapsed_time": "0:33:31", "remaining_time": "2:54:23", "throughput": 19705.11, "total_tokens": 39642560}
|
|
{"current_steps": 12600, "total_steps": 78105, "loss": 0.4185, "lr": 4.9429804617201585e-06, "epoch": 0.806606491261763, "percentage": 16.13, "elapsed_time": "0:33:32", "remaining_time": "2:54:22", "throughput": 19706.29, "total_tokens": 39658112}
|
|
{"current_steps": 12605, "total_steps": 78105, "loss": 0.5237, "lr": 4.9428617670701475e-06, "epoch": 0.8069265732027399, "percentage": 16.14, "elapsed_time": "0:33:33", "remaining_time": "2:54:20", "throughput": 19707.57, "total_tokens": 39674112}
|
|
{"current_steps": 12610, "total_steps": 78105, "loss": 0.4363, "lr": 4.942742950436344e-06, "epoch": 0.8072466551437167, "percentage": 16.14, "elapsed_time": "0:33:33", "remaining_time": "2:54:19", "throughput": 19708.93, "total_tokens": 39690432}
|
|
{"current_steps": 12615, "total_steps": 78105, "loss": 0.4683, "lr": 4.942624011824683e-06, "epoch": 0.8075667370846937, "percentage": 16.15, "elapsed_time": "0:33:34", "remaining_time": "2:54:18", "throughput": 19710.04, "total_tokens": 39705792}
|
|
{"current_steps": 12620, "total_steps": 78105, "loss": 0.5312, "lr": 4.942504951241101e-06, "epoch": 0.8078868190256706, "percentage": 16.16, "elapsed_time": "0:33:35", "remaining_time": "2:54:16", "throughput": 19711.27, "total_tokens": 39722048}
|
|
{"current_steps": 12625, "total_steps": 78105, "loss": 0.4972, "lr": 4.9423857686915435e-06, "epoch": 0.8082069009666475, "percentage": 16.16, "elapsed_time": "0:33:35", "remaining_time": "2:54:15", "throughput": 19712.56, "total_tokens": 39738304}
|
|
{"current_steps": 12630, "total_steps": 78105, "loss": 0.4825, "lr": 4.9422664641819634e-06, "epoch": 0.8085269829076244, "percentage": 16.17, "elapsed_time": "0:33:36", "remaining_time": "2:54:13", "throughput": 19713.69, "total_tokens": 39753728}
|
|
{"current_steps": 12635, "total_steps": 78105, "loss": 0.4945, "lr": 4.942147037718317e-06, "epoch": 0.8088470648486012, "percentage": 16.18, "elapsed_time": "0:33:37", "remaining_time": "2:54:12", "throughput": 19714.84, "total_tokens": 39769600}
|
|
{"current_steps": 12640, "total_steps": 78105, "loss": 0.4703, "lr": 4.942027489306569e-06, "epoch": 0.8091671467895781, "percentage": 16.18, "elapsed_time": "0:33:37", "remaining_time": "2:54:10", "throughput": 19715.76, "total_tokens": 39784000}
|
|
{"current_steps": 12645, "total_steps": 78105, "loss": 0.4305, "lr": 4.941907818952687e-06, "epoch": 0.809487228730555, "percentage": 16.19, "elapsed_time": "0:33:38", "remaining_time": "2:54:09", "throughput": 19717.02, "total_tokens": 39800320}
|
|
{"current_steps": 12650, "total_steps": 78105, "loss": 0.4837, "lr": 4.941788026662649e-06, "epoch": 0.8098073106715319, "percentage": 16.2, "elapsed_time": "0:33:39", "remaining_time": "2:54:08", "throughput": 19718.07, "total_tokens": 39815360}
|
|
{"current_steps": 12655, "total_steps": 78105, "loss": 0.4893, "lr": 4.941668112442436e-06, "epoch": 0.8101273926125088, "percentage": 16.2, "elapsed_time": "0:33:39", "remaining_time": "2:54:06", "throughput": 19718.98, "total_tokens": 39829952}
|
|
{"current_steps": 12660, "total_steps": 78105, "loss": 0.473, "lr": 4.941548076298035e-06, "epoch": 0.8104474745534856, "percentage": 16.21, "elapsed_time": "0:33:40", "remaining_time": "2:54:05", "throughput": 19720.1, "total_tokens": 39845376}
|
|
{"current_steps": 12665, "total_steps": 78105, "loss": 0.5856, "lr": 4.94142791823544e-06, "epoch": 0.8107675564944626, "percentage": 16.22, "elapsed_time": "0:33:41", "remaining_time": "2:54:03", "throughput": 19721.41, "total_tokens": 39861696}
|
|
{"current_steps": 12670, "total_steps": 78105, "loss": 0.48, "lr": 4.941307638260653e-06, "epoch": 0.8110876384354395, "percentage": 16.22, "elapsed_time": "0:33:41", "remaining_time": "2:54:02", "throughput": 19722.61, "total_tokens": 39877440}
|
|
{"current_steps": 12675, "total_steps": 78105, "loss": 0.4398, "lr": 4.941187236379678e-06, "epoch": 0.8114077203764164, "percentage": 16.23, "elapsed_time": "0:33:42", "remaining_time": "2:54:00", "throughput": 19723.86, "total_tokens": 39893760}
|
|
{"current_steps": 12680, "total_steps": 78105, "loss": 0.3593, "lr": 4.941066712598528e-06, "epoch": 0.8117278023173933, "percentage": 16.23, "elapsed_time": "0:33:43", "remaining_time": "2:53:59", "throughput": 19725.12, "total_tokens": 39909568}
|
|
{"current_steps": 12685, "total_steps": 78105, "loss": 0.5778, "lr": 4.940946066923222e-06, "epoch": 0.8120478842583702, "percentage": 16.24, "elapsed_time": "0:33:43", "remaining_time": "2:53:57", "throughput": 19725.99, "total_tokens": 39923648}
|
|
{"current_steps": 12690, "total_steps": 78105, "loss": 0.5954, "lr": 4.940825299359784e-06, "epoch": 0.812367966199347, "percentage": 16.25, "elapsed_time": "0:33:44", "remaining_time": "2:53:56", "throughput": 19727.02, "total_tokens": 39939008}
|
|
{"current_steps": 12695, "total_steps": 78105, "loss": 0.5824, "lr": 4.9407044099142435e-06, "epoch": 0.8126880481403239, "percentage": 16.25, "elapsed_time": "0:33:45", "remaining_time": "2:53:55", "throughput": 19728.21, "total_tokens": 39955072}
|
|
{"current_steps": 12700, "total_steps": 78105, "loss": 0.5653, "lr": 4.940583398592639e-06, "epoch": 0.8130081300813008, "percentage": 16.26, "elapsed_time": "0:33:45", "remaining_time": "2:53:53", "throughput": 19729.44, "total_tokens": 39970880}
|
|
{"current_steps": 12705, "total_steps": 78105, "loss": 0.5909, "lr": 4.940462265401012e-06, "epoch": 0.8133282120222777, "percentage": 16.27, "elapsed_time": "0:33:46", "remaining_time": "2:53:52", "throughput": 19730.52, "total_tokens": 39986240}
|
|
{"current_steps": 12710, "total_steps": 78105, "loss": 0.4893, "lr": 4.940341010345409e-06, "epoch": 0.8136482939632546, "percentage": 16.27, "elapsed_time": "0:33:47", "remaining_time": "2:53:50", "throughput": 19731.7, "total_tokens": 40001856}
|
|
{"current_steps": 12715, "total_steps": 78105, "loss": 0.5328, "lr": 4.94021963343189e-06, "epoch": 0.8139683759042314, "percentage": 16.28, "elapsed_time": "0:33:47", "remaining_time": "2:53:49", "throughput": 19732.98, "total_tokens": 40018112}
|
|
{"current_steps": 12720, "total_steps": 78105, "loss": 0.4697, "lr": 4.940098134666512e-06, "epoch": 0.8142884578452084, "percentage": 16.29, "elapsed_time": "0:33:48", "remaining_time": "2:53:47", "throughput": 19734.03, "total_tokens": 40033664}
|
|
{"current_steps": 12725, "total_steps": 78105, "loss": 0.4335, "lr": 4.939976514055344e-06, "epoch": 0.8146085397861853, "percentage": 16.29, "elapsed_time": "0:33:49", "remaining_time": "2:53:46", "throughput": 19735.07, "total_tokens": 40048768}
|
|
{"current_steps": 12730, "total_steps": 78105, "loss": 0.4576, "lr": 4.939854771604457e-06, "epoch": 0.8149286217271622, "percentage": 16.3, "elapsed_time": "0:33:49", "remaining_time": "2:53:44", "throughput": 19735.98, "total_tokens": 40063232}
|
|
{"current_steps": 12735, "total_steps": 78105, "loss": 0.5915, "lr": 4.939732907319933e-06, "epoch": 0.8152487036681391, "percentage": 16.3, "elapsed_time": "0:33:50", "remaining_time": "2:53:43", "throughput": 19737.2, "total_tokens": 40079296}
|
|
{"current_steps": 12740, "total_steps": 78105, "loss": 0.57, "lr": 4.9396109212078545e-06, "epoch": 0.8155687856091159, "percentage": 16.31, "elapsed_time": "0:33:51", "remaining_time": "2:53:42", "throughput": 19738.39, "total_tokens": 40094976}
|
|
{"current_steps": 12745, "total_steps": 78105, "loss": 0.4082, "lr": 4.939488813274315e-06, "epoch": 0.8158888675500928, "percentage": 16.32, "elapsed_time": "0:33:51", "remaining_time": "2:53:40", "throughput": 19739.53, "total_tokens": 40110464}
|
|
{"current_steps": 12750, "total_steps": 78105, "loss": 0.6115, "lr": 4.93936658352541e-06, "epoch": 0.8162089494910697, "percentage": 16.32, "elapsed_time": "0:33:52", "remaining_time": "2:53:39", "throughput": 19740.59, "total_tokens": 40125568}
|
|
{"current_steps": 12755, "total_steps": 78105, "loss": 0.4509, "lr": 4.939244231967244e-06, "epoch": 0.8165290314320466, "percentage": 16.33, "elapsed_time": "0:33:53", "remaining_time": "2:53:37", "throughput": 19741.83, "total_tokens": 40141440}
|
|
{"current_steps": 12760, "total_steps": 78105, "loss": 0.5462, "lr": 4.939121758605927e-06, "epoch": 0.8168491133730235, "percentage": 16.34, "elapsed_time": "0:33:53", "remaining_time": "2:53:36", "throughput": 19742.79, "total_tokens": 40156416}
|
|
{"current_steps": 12765, "total_steps": 78105, "loss": 0.5609, "lr": 4.938999163447574e-06, "epoch": 0.8171691953140003, "percentage": 16.34, "elapsed_time": "0:33:54", "remaining_time": "2:53:34", "throughput": 19744.07, "total_tokens": 40172928}
|
|
{"current_steps": 12770, "total_steps": 78105, "loss": 0.3699, "lr": 4.9388764464983075e-06, "epoch": 0.8174892772549772, "percentage": 16.35, "elapsed_time": "0:33:55", "remaining_time": "2:53:33", "throughput": 19745.16, "total_tokens": 40188096}
|
|
{"current_steps": 12775, "total_steps": 78105, "loss": 0.5429, "lr": 4.938753607764255e-06, "epoch": 0.8178093591959542, "percentage": 16.36, "elapsed_time": "0:33:56", "remaining_time": "2:53:32", "throughput": 19746.35, "total_tokens": 40204032}
|
|
{"current_steps": 12780, "total_steps": 78105, "loss": 0.4195, "lr": 4.93863064725155e-06, "epoch": 0.8181294411369311, "percentage": 16.36, "elapsed_time": "0:33:56", "remaining_time": "2:53:30", "throughput": 19747.39, "total_tokens": 40218944}
|
|
{"current_steps": 12785, "total_steps": 78105, "loss": 0.7037, "lr": 4.9385075649663334e-06, "epoch": 0.818449523077908, "percentage": 16.37, "elapsed_time": "0:33:57", "remaining_time": "2:53:29", "throughput": 19748.71, "total_tokens": 40235456}
|
|
{"current_steps": 12790, "total_steps": 78105, "loss": 0.5339, "lr": 4.93838436091475e-06, "epoch": 0.8187696050188848, "percentage": 16.38, "elapsed_time": "0:33:58", "remaining_time": "2:53:27", "throughput": 19749.62, "total_tokens": 40250176}
|
|
{"current_steps": 12795, "total_steps": 78105, "loss": 0.4056, "lr": 4.9382610351029535e-06, "epoch": 0.8190896869598617, "percentage": 16.38, "elapsed_time": "0:33:58", "remaining_time": "2:53:26", "throughput": 19750.68, "total_tokens": 40265472}
|
|
{"current_steps": 12800, "total_steps": 78105, "loss": 0.4793, "lr": 4.9381375875371005e-06, "epoch": 0.8194097689008386, "percentage": 16.39, "elapsed_time": "0:33:59", "remaining_time": "2:53:24", "throughput": 19751.92, "total_tokens": 40281728}
|
|
{"current_steps": 12805, "total_steps": 78105, "loss": 0.4221, "lr": 4.938014018223356e-06, "epoch": 0.8197298508418155, "percentage": 16.39, "elapsed_time": "0:34:00", "remaining_time": "2:53:24", "throughput": 19750.58, "total_tokens": 40296768}
|
|
{"current_steps": 12810, "total_steps": 78105, "loss": 0.6157, "lr": 4.937890327167891e-06, "epoch": 0.8200499327827924, "percentage": 16.4, "elapsed_time": "0:34:00", "remaining_time": "2:53:23", "throughput": 19751.46, "total_tokens": 40311488}
|
|
{"current_steps": 12815, "total_steps": 78105, "loss": 0.3821, "lr": 4.937766514376882e-06, "epoch": 0.8203700147237692, "percentage": 16.41, "elapsed_time": "0:34:01", "remaining_time": "2:53:21", "throughput": 19752.58, "total_tokens": 40327232}
|
|
{"current_steps": 12820, "total_steps": 78105, "loss": 0.6138, "lr": 4.937642579856511e-06, "epoch": 0.8206900966647461, "percentage": 16.41, "elapsed_time": "0:34:02", "remaining_time": "2:53:20", "throughput": 19753.89, "total_tokens": 40344064}
|
|
{"current_steps": 12825, "total_steps": 78105, "loss": 0.5867, "lr": 4.9375185236129665e-06, "epoch": 0.8210101786057231, "percentage": 16.42, "elapsed_time": "0:34:02", "remaining_time": "2:53:18", "throughput": 19754.89, "total_tokens": 40359040}
|
|
{"current_steps": 12830, "total_steps": 78105, "loss": 0.4853, "lr": 4.937394345652444e-06, "epoch": 0.8213302605467, "percentage": 16.43, "elapsed_time": "0:34:03", "remaining_time": "2:53:17", "throughput": 19756.33, "total_tokens": 40376384}
|
|
{"current_steps": 12835, "total_steps": 78105, "loss": 0.701, "lr": 4.937270045981143e-06, "epoch": 0.8216503424876769, "percentage": 16.43, "elapsed_time": "0:34:04", "remaining_time": "2:53:16", "throughput": 19757.43, "total_tokens": 40391936}
|
|
{"current_steps": 12840, "total_steps": 78105, "loss": 0.3718, "lr": 4.937145624605272e-06, "epoch": 0.8219704244286538, "percentage": 16.44, "elapsed_time": "0:34:05", "remaining_time": "2:53:15", "throughput": 19758.87, "total_tokens": 40408832}
|
|
{"current_steps": 12845, "total_steps": 78105, "loss": 0.5003, "lr": 4.937021081531042e-06, "epoch": 0.8222905063696306, "percentage": 16.45, "elapsed_time": "0:34:05", "remaining_time": "2:53:13", "throughput": 19760.16, "total_tokens": 40425280}
|
|
{"current_steps": 12850, "total_steps": 78105, "loss": 0.4998, "lr": 4.936896416764674e-06, "epoch": 0.8226105883106075, "percentage": 16.45, "elapsed_time": "0:34:06", "remaining_time": "2:53:12", "throughput": 19761.24, "total_tokens": 40440832}
|
|
{"current_steps": 12855, "total_steps": 78105, "loss": 0.3566, "lr": 4.936771630312392e-06, "epoch": 0.8229306702515844, "percentage": 16.46, "elapsed_time": "0:34:07", "remaining_time": "2:53:11", "throughput": 19762.78, "total_tokens": 40458624}
|
|
{"current_steps": 12860, "total_steps": 78105, "loss": 0.5574, "lr": 4.936646722180428e-06, "epoch": 0.8232507521925613, "percentage": 16.47, "elapsed_time": "0:34:07", "remaining_time": "2:53:09", "throughput": 19764.04, "total_tokens": 40474688}
|
|
{"current_steps": 12865, "total_steps": 78105, "loss": 0.4617, "lr": 4.936521692375018e-06, "epoch": 0.8235708341335382, "percentage": 16.47, "elapsed_time": "0:34:08", "remaining_time": "2:53:08", "throughput": 19765.3, "total_tokens": 40490816}
|
|
{"current_steps": 12870, "total_steps": 78105, "loss": 0.6127, "lr": 4.9363965409024065e-06, "epoch": 0.823890916074515, "percentage": 16.48, "elapsed_time": "0:34:09", "remaining_time": "2:53:07", "throughput": 19766.34, "total_tokens": 40506112}
|
|
{"current_steps": 12875, "total_steps": 78105, "loss": 0.5227, "lr": 4.936271267768842e-06, "epoch": 0.8242109980154919, "percentage": 16.48, "elapsed_time": "0:34:09", "remaining_time": "2:53:05", "throughput": 19767.37, "total_tokens": 40521344}
|
|
{"current_steps": 12880, "total_steps": 78105, "loss": 0.3535, "lr": 4.936145872980581e-06, "epoch": 0.8245310799564689, "percentage": 16.49, "elapsed_time": "0:34:10", "remaining_time": "2:53:04", "throughput": 19768.56, "total_tokens": 40537024}
|
|
{"current_steps": 12885, "total_steps": 78105, "loss": 0.5369, "lr": 4.9360203565438845e-06, "epoch": 0.8248511618974458, "percentage": 16.5, "elapsed_time": "0:34:11", "remaining_time": "2:53:02", "throughput": 19769.69, "total_tokens": 40552640}
|
|
{"current_steps": 12890, "total_steps": 78105, "loss": 0.4837, "lr": 4.9358947184650195e-06, "epoch": 0.8251712438384227, "percentage": 16.5, "elapsed_time": "0:34:11", "remaining_time": "2:53:01", "throughput": 19770.75, "total_tokens": 40568000}
|
|
{"current_steps": 12895, "total_steps": 78105, "loss": 0.4927, "lr": 4.935768958750261e-06, "epoch": 0.8254913257793995, "percentage": 16.51, "elapsed_time": "0:34:12", "remaining_time": "2:53:00", "throughput": 19772.11, "total_tokens": 40584960}
|
|
{"current_steps": 12900, "total_steps": 78105, "loss": 0.5667, "lr": 4.935643077405888e-06, "epoch": 0.8258114077203764, "percentage": 16.52, "elapsed_time": "0:34:13", "remaining_time": "2:52:58", "throughput": 19773.2, "total_tokens": 40600704}
|
|
{"current_steps": 12905, "total_steps": 78105, "loss": 0.4383, "lr": 4.9355170744381866e-06, "epoch": 0.8261314896613533, "percentage": 16.52, "elapsed_time": "0:34:13", "remaining_time": "2:52:57", "throughput": 19774.25, "total_tokens": 40615872}
|
|
{"current_steps": 12910, "total_steps": 78105, "loss": 0.5866, "lr": 4.9353909498534495e-06, "epoch": 0.8264515716023302, "percentage": 16.53, "elapsed_time": "0:34:14", "remaining_time": "2:52:56", "throughput": 19775.72, "total_tokens": 40632640}
|
|
{"current_steps": 12915, "total_steps": 78105, "loss": 0.4792, "lr": 4.935264703657972e-06, "epoch": 0.8267716535433071, "percentage": 16.54, "elapsed_time": "0:34:15", "remaining_time": "2:52:54", "throughput": 19776.79, "total_tokens": 40648064}
|
|
{"current_steps": 12920, "total_steps": 78105, "loss": 0.5778, "lr": 4.9351383358580615e-06, "epoch": 0.8270917354842839, "percentage": 16.54, "elapsed_time": "0:34:16", "remaining_time": "2:52:53", "throughput": 19778.16, "total_tokens": 40664704}
|
|
{"current_steps": 12925, "total_steps": 78105, "loss": 0.6154, "lr": 4.935011846460026e-06, "epoch": 0.8274118174252608, "percentage": 16.55, "elapsed_time": "0:34:16", "remaining_time": "2:52:52", "throughput": 19779.37, "total_tokens": 40681024}
|
|
{"current_steps": 12930, "total_steps": 78105, "loss": 0.5939, "lr": 4.934885235470183e-06, "epoch": 0.8277318993662378, "percentage": 16.55, "elapsed_time": "0:34:17", "remaining_time": "2:52:50", "throughput": 19780.34, "total_tokens": 40695936}
|
|
{"current_steps": 12935, "total_steps": 78105, "loss": 0.4226, "lr": 4.934758502894853e-06, "epoch": 0.8280519813072147, "percentage": 16.56, "elapsed_time": "0:34:18", "remaining_time": "2:52:49", "throughput": 19781.81, "total_tokens": 40713472}
|
|
{"current_steps": 12940, "total_steps": 78105, "loss": 0.508, "lr": 4.934631648740367e-06, "epoch": 0.8283720632481916, "percentage": 16.57, "elapsed_time": "0:34:18", "remaining_time": "2:52:48", "throughput": 19782.9, "total_tokens": 40729344}
|
|
{"current_steps": 12945, "total_steps": 78105, "loss": 0.4391, "lr": 4.934504673013057e-06, "epoch": 0.8286921451891684, "percentage": 16.57, "elapsed_time": "0:34:19", "remaining_time": "2:52:46", "throughput": 19784.15, "total_tokens": 40745856}
|
|
{"current_steps": 12950, "total_steps": 78105, "loss": 0.4285, "lr": 4.934377575719265e-06, "epoch": 0.8290122271301453, "percentage": 16.58, "elapsed_time": "0:34:20", "remaining_time": "2:52:45", "throughput": 19785.36, "total_tokens": 40761920}
|
|
{"current_steps": 12955, "total_steps": 78105, "loss": 0.3555, "lr": 4.934250356865337e-06, "epoch": 0.8293323090711222, "percentage": 16.59, "elapsed_time": "0:34:20", "remaining_time": "2:52:43", "throughput": 19786.38, "total_tokens": 40776960}
|
|
{"current_steps": 12960, "total_steps": 78105, "loss": 0.5046, "lr": 4.934123016457625e-06, "epoch": 0.8296523910120991, "percentage": 16.59, "elapsed_time": "0:34:21", "remaining_time": "2:52:42", "throughput": 19787.39, "total_tokens": 40792192}
|
|
{"current_steps": 12965, "total_steps": 78105, "loss": 0.5857, "lr": 4.933995554502489e-06, "epoch": 0.829972472953076, "percentage": 16.6, "elapsed_time": "0:34:22", "remaining_time": "2:52:41", "throughput": 19788.56, "total_tokens": 40808256}
|
|
{"current_steps": 12970, "total_steps": 78105, "loss": 0.5344, "lr": 4.933867971006294e-06, "epoch": 0.8302925548940528, "percentage": 16.61, "elapsed_time": "0:34:22", "remaining_time": "2:52:39", "throughput": 19789.57, "total_tokens": 40823424}
|
|
{"current_steps": 12975, "total_steps": 78105, "loss": 0.4892, "lr": 4.933740265975409e-06, "epoch": 0.8306126368350297, "percentage": 16.61, "elapsed_time": "0:34:23", "remaining_time": "2:52:38", "throughput": 19791.05, "total_tokens": 40840960}
|
|
{"current_steps": 12980, "total_steps": 78105, "loss": 0.5096, "lr": 4.933612439416212e-06, "epoch": 0.8309327187760066, "percentage": 16.62, "elapsed_time": "0:34:24", "remaining_time": "2:52:37", "throughput": 19792.32, "total_tokens": 40857536}
|
|
{"current_steps": 12985, "total_steps": 78105, "loss": 0.4609, "lr": 4.933484491335086e-06, "epoch": 0.8312528007169836, "percentage": 16.63, "elapsed_time": "0:34:25", "remaining_time": "2:52:36", "throughput": 19793.68, "total_tokens": 40874240}
|
|
{"current_steps": 12990, "total_steps": 78105, "loss": 0.5848, "lr": 4.933356421738421e-06, "epoch": 0.8315728826579605, "percentage": 16.63, "elapsed_time": "0:34:25", "remaining_time": "2:52:34", "throughput": 19794.54, "total_tokens": 40888960}
|
|
{"current_steps": 12995, "total_steps": 78105, "loss": 0.6522, "lr": 4.9332282306326105e-06, "epoch": 0.8318929645989374, "percentage": 16.64, "elapsed_time": "0:34:26", "remaining_time": "2:52:33", "throughput": 19795.59, "total_tokens": 40904512}
|
|
{"current_steps": 13000, "total_steps": 78105, "loss": 0.4739, "lr": 4.933099918024057e-06, "epoch": 0.8322130465399142, "percentage": 16.64, "elapsed_time": "0:34:27", "remaining_time": "2:52:32", "throughput": 19797.03, "total_tokens": 40921856}
|
|
{"current_steps": 13005, "total_steps": 78105, "loss": 0.472, "lr": 4.932971483919166e-06, "epoch": 0.8325331284808911, "percentage": 16.65, "elapsed_time": "0:34:27", "remaining_time": "2:52:30", "throughput": 19798.4, "total_tokens": 40938752}
|
|
{"current_steps": 13010, "total_steps": 78105, "loss": 0.4399, "lr": 4.932842928324353e-06, "epoch": 0.832853210421868, "percentage": 16.66, "elapsed_time": "0:34:28", "remaining_time": "2:52:29", "throughput": 19799.4, "total_tokens": 40954048}
|
|
{"current_steps": 13015, "total_steps": 78105, "loss": 0.511, "lr": 4.9327142512460355e-06, "epoch": 0.8331732923628449, "percentage": 16.66, "elapsed_time": "0:34:29", "remaining_time": "2:52:28", "throughput": 19801.09, "total_tokens": 40972672}
|
|
{"current_steps": 13020, "total_steps": 78105, "loss": 0.3619, "lr": 4.932585452690641e-06, "epoch": 0.8334933743038218, "percentage": 16.67, "elapsed_time": "0:34:29", "remaining_time": "2:52:26", "throughput": 19802.08, "total_tokens": 40987648}
|
|
{"current_steps": 13025, "total_steps": 78105, "loss": 0.4629, "lr": 4.932456532664599e-06, "epoch": 0.8338134562447986, "percentage": 16.68, "elapsed_time": "0:34:30", "remaining_time": "2:52:25", "throughput": 19803.26, "total_tokens": 41003328}
|
|
{"current_steps": 13030, "total_steps": 78105, "loss": 0.4555, "lr": 4.932327491174348e-06, "epoch": 0.8341335381857755, "percentage": 16.68, "elapsed_time": "0:34:31", "remaining_time": "2:52:24", "throughput": 19804.3, "total_tokens": 41018624}
|
|
{"current_steps": 13035, "total_steps": 78105, "loss": 0.4254, "lr": 4.932198328226332e-06, "epoch": 0.8344536201267524, "percentage": 16.69, "elapsed_time": "0:34:31", "remaining_time": "2:52:22", "throughput": 19805.45, "total_tokens": 41034624}
|
|
{"current_steps": 13040, "total_steps": 78105, "loss": 0.554, "lr": 4.932069043827e-06, "epoch": 0.8347737020677294, "percentage": 16.7, "elapsed_time": "0:34:32", "remaining_time": "2:52:21", "throughput": 19806.46, "total_tokens": 41049664}
|
|
{"current_steps": 13045, "total_steps": 78105, "loss": 0.5138, "lr": 4.931939637982809e-06, "epoch": 0.8350937840087063, "percentage": 16.7, "elapsed_time": "0:34:33", "remaining_time": "2:52:19", "throughput": 19807.78, "total_tokens": 41066368}
|
|
{"current_steps": 13050, "total_steps": 78105, "loss": 0.4383, "lr": 4.931810110700219e-06, "epoch": 0.8354138659496831, "percentage": 16.71, "elapsed_time": "0:34:33", "remaining_time": "2:52:18", "throughput": 19808.87, "total_tokens": 41082048}
|
|
{"current_steps": 13055, "total_steps": 78105, "loss": 0.5354, "lr": 4.931680461985699e-06, "epoch": 0.83573394789066, "percentage": 16.71, "elapsed_time": "0:34:34", "remaining_time": "2:52:17", "throughput": 19810.02, "total_tokens": 41098048}
|
|
{"current_steps": 13060, "total_steps": 78105, "loss": 0.4701, "lr": 4.931550691845724e-06, "epoch": 0.8360540298316369, "percentage": 16.72, "elapsed_time": "0:34:35", "remaining_time": "2:52:15", "throughput": 19810.92, "total_tokens": 41112768}
|
|
{"current_steps": 13065, "total_steps": 78105, "loss": 0.5058, "lr": 4.9314208002867715e-06, "epoch": 0.8363741117726138, "percentage": 16.73, "elapsed_time": "0:34:35", "remaining_time": "2:52:14", "throughput": 19811.9, "total_tokens": 41127488}
|
|
{"current_steps": 13070, "total_steps": 78105, "loss": 0.3496, "lr": 4.9312907873153295e-06, "epoch": 0.8366941937135907, "percentage": 16.73, "elapsed_time": "0:34:36", "remaining_time": "2:52:12", "throughput": 19812.79, "total_tokens": 41142272}
|
|
{"current_steps": 13075, "total_steps": 78105, "loss": 0.3812, "lr": 4.931160652937889e-06, "epoch": 0.8370142756545675, "percentage": 16.74, "elapsed_time": "0:34:37", "remaining_time": "2:52:11", "throughput": 19813.86, "total_tokens": 41157952}
|
|
{"current_steps": 13080, "total_steps": 78105, "loss": 0.6256, "lr": 4.93103039716095e-06, "epoch": 0.8373343575955444, "percentage": 16.75, "elapsed_time": "0:34:37", "remaining_time": "2:52:09", "throughput": 19814.78, "total_tokens": 41172992}
|
|
{"current_steps": 13085, "total_steps": 78105, "loss": 0.5281, "lr": 4.930900019991016e-06, "epoch": 0.8376544395365213, "percentage": 16.75, "elapsed_time": "0:34:38", "remaining_time": "2:52:08", "throughput": 19815.63, "total_tokens": 41187776}
|
|
{"current_steps": 13090, "total_steps": 78105, "loss": 0.4531, "lr": 4.930769521434597e-06, "epoch": 0.8379745214774983, "percentage": 16.76, "elapsed_time": "0:34:39", "remaining_time": "2:52:07", "throughput": 19816.98, "total_tokens": 41204416}
|
|
{"current_steps": 13095, "total_steps": 78105, "loss": 0.5544, "lr": 4.930638901498208e-06, "epoch": 0.8382946034184752, "percentage": 16.77, "elapsed_time": "0:34:39", "remaining_time": "2:52:05", "throughput": 19818.16, "total_tokens": 41220032}
|
|
{"current_steps": 13100, "total_steps": 78105, "loss": 0.5094, "lr": 4.930508160188374e-06, "epoch": 0.838614685359452, "percentage": 16.77, "elapsed_time": "0:34:40", "remaining_time": "2:52:04", "throughput": 19819.27, "total_tokens": 41235776}
|
|
{"current_steps": 13105, "total_steps": 78105, "loss": 0.4823, "lr": 4.930377297511623e-06, "epoch": 0.8389347673004289, "percentage": 16.78, "elapsed_time": "0:34:41", "remaining_time": "2:52:03", "throughput": 19820.53, "total_tokens": 41252160}
|
|
{"current_steps": 13110, "total_steps": 78105, "loss": 0.4357, "lr": 4.930246313474488e-06, "epoch": 0.8392548492414058, "percentage": 16.79, "elapsed_time": "0:34:41", "remaining_time": "2:52:01", "throughput": 19821.9, "total_tokens": 41269056}
|
|
{"current_steps": 13115, "total_steps": 78105, "loss": 0.4334, "lr": 4.930115208083512e-06, "epoch": 0.8395749311823827, "percentage": 16.79, "elapsed_time": "0:34:42", "remaining_time": "2:52:00", "throughput": 19823.14, "total_tokens": 41285312}
|
|
{"current_steps": 13120, "total_steps": 78105, "loss": 0.4662, "lr": 4.92998398134524e-06, "epoch": 0.8398950131233596, "percentage": 16.8, "elapsed_time": "0:34:43", "remaining_time": "2:51:59", "throughput": 19824.25, "total_tokens": 41300992}
|
|
{"current_steps": 13125, "total_steps": 78105, "loss": 0.3655, "lr": 4.9298526332662255e-06, "epoch": 0.8402150950643364, "percentage": 16.8, "elapsed_time": "0:34:44", "remaining_time": "2:51:57", "throughput": 19825.61, "total_tokens": 41317952}
|
|
{"current_steps": 13130, "total_steps": 78105, "loss": 0.4765, "lr": 4.929721163853028e-06, "epoch": 0.8405351770053133, "percentage": 16.81, "elapsed_time": "0:34:44", "remaining_time": "2:51:56", "throughput": 19826.64, "total_tokens": 41333440}
|
|
{"current_steps": 13135, "total_steps": 78105, "loss": 0.5691, "lr": 4.929589573112209e-06, "epoch": 0.8408552589462902, "percentage": 16.82, "elapsed_time": "0:34:45", "remaining_time": "2:51:55", "throughput": 19827.78, "total_tokens": 41349312}
|
|
{"current_steps": 13140, "total_steps": 78105, "loss": 0.4671, "lr": 4.929457861050344e-06, "epoch": 0.8411753408872671, "percentage": 16.82, "elapsed_time": "0:34:46", "remaining_time": "2:51:53", "throughput": 19828.68, "total_tokens": 41364288}
|
|
{"current_steps": 13145, "total_steps": 78105, "loss": 0.4784, "lr": 4.929326027674007e-06, "epoch": 0.8414954228282441, "percentage": 16.83, "elapsed_time": "0:34:46", "remaining_time": "2:51:52", "throughput": 19829.7, "total_tokens": 41379904}
|
|
{"current_steps": 13150, "total_steps": 78105, "loss": 0.4815, "lr": 4.929194072989783e-06, "epoch": 0.841815504769221, "percentage": 16.84, "elapsed_time": "0:34:47", "remaining_time": "2:51:50", "throughput": 19830.6, "total_tokens": 41394432}
|
|
{"current_steps": 13155, "total_steps": 78105, "loss": 0.405, "lr": 4.92906199700426e-06, "epoch": 0.8421355867101978, "percentage": 16.84, "elapsed_time": "0:34:48", "remaining_time": "2:51:49", "throughput": 19831.6, "total_tokens": 41409984}
|
|
{"current_steps": 13160, "total_steps": 78105, "loss": 0.5116, "lr": 4.9289297997240325e-06, "epoch": 0.8424556686511747, "percentage": 16.85, "elapsed_time": "0:34:48", "remaining_time": "2:51:48", "throughput": 19832.67, "total_tokens": 41425984}
|
|
{"current_steps": 13165, "total_steps": 78105, "loss": 0.5143, "lr": 4.928797481155704e-06, "epoch": 0.8427757505921516, "percentage": 16.86, "elapsed_time": "0:34:49", "remaining_time": "2:51:46", "throughput": 19833.71, "total_tokens": 41441920}
|
|
{"current_steps": 13170, "total_steps": 78105, "loss": 0.43, "lr": 4.928665041305879e-06, "epoch": 0.8430958325331285, "percentage": 16.86, "elapsed_time": "0:34:50", "remaining_time": "2:51:45", "throughput": 19834.82, "total_tokens": 41457664}
|
|
{"current_steps": 13175, "total_steps": 78105, "loss": 0.4053, "lr": 4.928532480181173e-06, "epoch": 0.8434159144741054, "percentage": 16.87, "elapsed_time": "0:34:50", "remaining_time": "2:51:44", "throughput": 19835.79, "total_tokens": 41472832}
|
|
{"current_steps": 13180, "total_steps": 78105, "loss": 0.4232, "lr": 4.928399797788205e-06, "epoch": 0.8437359964150822, "percentage": 16.87, "elapsed_time": "0:34:51", "remaining_time": "2:51:42", "throughput": 19837.31, "total_tokens": 41490368}
|
|
{"current_steps": 13185, "total_steps": 78105, "loss": 0.5054, "lr": 4.9282669941336e-06, "epoch": 0.8440560783560591, "percentage": 16.88, "elapsed_time": "0:34:52", "remaining_time": "2:51:41", "throughput": 19838.52, "total_tokens": 41506624}
|
|
{"current_steps": 13190, "total_steps": 78105, "loss": 0.3315, "lr": 4.9281340692239895e-06, "epoch": 0.844376160297036, "percentage": 16.89, "elapsed_time": "0:34:52", "remaining_time": "2:51:40", "throughput": 19839.79, "total_tokens": 41523264}
|
|
{"current_steps": 13195, "total_steps": 78105, "loss": 0.4889, "lr": 4.928001023066011e-06, "epoch": 0.844696242238013, "percentage": 16.89, "elapsed_time": "0:34:53", "remaining_time": "2:51:39", "throughput": 19840.81, "total_tokens": 41538944}
|
|
{"current_steps": 13200, "total_steps": 78105, "loss": 0.4197, "lr": 4.92786785566631e-06, "epoch": 0.8450163241789899, "percentage": 16.9, "elapsed_time": "0:34:54", "remaining_time": "2:51:37", "throughput": 19841.98, "total_tokens": 41554880}
|
|
{"current_steps": 13205, "total_steps": 78105, "loss": 0.6719, "lr": 4.927734567031533e-06, "epoch": 0.8453364061199667, "percentage": 16.91, "elapsed_time": "0:34:54", "remaining_time": "2:51:36", "throughput": 19842.83, "total_tokens": 41569280}
|
|
{"current_steps": 13210, "total_steps": 78105, "loss": 0.3513, "lr": 4.9276011571683375e-06, "epoch": 0.8456564880609436, "percentage": 16.91, "elapsed_time": "0:34:55", "remaining_time": "2:51:34", "throughput": 19843.82, "total_tokens": 41584576}
|
|
{"current_steps": 13215, "total_steps": 78105, "loss": 0.4335, "lr": 4.927467626083385e-06, "epoch": 0.8459765700019205, "percentage": 16.92, "elapsed_time": "0:34:56", "remaining_time": "2:51:33", "throughput": 19844.84, "total_tokens": 41600000}
|
|
{"current_steps": 13220, "total_steps": 78105, "loss": 0.477, "lr": 4.927333973783344e-06, "epoch": 0.8462966519428974, "percentage": 16.93, "elapsed_time": "0:34:56", "remaining_time": "2:51:31", "throughput": 19845.78, "total_tokens": 41615040}
|
|
{"current_steps": 13225, "total_steps": 78105, "loss": 0.4326, "lr": 4.9272002002748875e-06, "epoch": 0.8466167338838743, "percentage": 16.93, "elapsed_time": "0:34:57", "remaining_time": "2:51:30", "throughput": 19846.8, "total_tokens": 41630208}
|
|
{"current_steps": 13230, "total_steps": 78105, "loss": 0.5416, "lr": 4.927066305564696e-06, "epoch": 0.8469368158248511, "percentage": 16.94, "elapsed_time": "0:34:58", "remaining_time": "2:51:28", "throughput": 19847.83, "total_tokens": 41645440}
|
|
{"current_steps": 13235, "total_steps": 78105, "loss": 0.4848, "lr": 4.926932289659456e-06, "epoch": 0.847256897765828, "percentage": 16.95, "elapsed_time": "0:34:58", "remaining_time": "2:51:27", "throughput": 19848.91, "total_tokens": 41661184}
|
|
{"current_steps": 13240, "total_steps": 78105, "loss": 0.5978, "lr": 4.926798152565858e-06, "epoch": 0.8475769797068049, "percentage": 16.95, "elapsed_time": "0:34:59", "remaining_time": "2:51:26", "throughput": 19849.89, "total_tokens": 41676224}
|
|
{"current_steps": 13245, "total_steps": 78105, "loss": 0.4259, "lr": 4.926663894290601e-06, "epoch": 0.8478970616477818, "percentage": 16.96, "elapsed_time": "0:35:00", "remaining_time": "2:51:25", "throughput": 19849.43, "total_tokens": 41690816}
|
|
{"current_steps": 13250, "total_steps": 78105, "loss": 0.4539, "lr": 4.926529514840389e-06, "epoch": 0.8482171435887588, "percentage": 16.96, "elapsed_time": "0:35:01", "remaining_time": "2:51:23", "throughput": 19850.55, "total_tokens": 41706688}
|
|
{"current_steps": 13255, "total_steps": 78105, "loss": 0.4773, "lr": 4.926395014221933e-06, "epoch": 0.8485372255297357, "percentage": 16.97, "elapsed_time": "0:35:01", "remaining_time": "2:51:22", "throughput": 19851.59, "total_tokens": 41721920}
|
|
{"current_steps": 13260, "total_steps": 78105, "loss": 0.482, "lr": 4.926260392441948e-06, "epoch": 0.8488573074707125, "percentage": 16.98, "elapsed_time": "0:35:02", "remaining_time": "2:51:21", "throughput": 19852.79, "total_tokens": 41738112}
|
|
{"current_steps": 13265, "total_steps": 78105, "loss": 0.4383, "lr": 4.926125649507157e-06, "epoch": 0.8491773894116894, "percentage": 16.98, "elapsed_time": "0:35:03", "remaining_time": "2:51:19", "throughput": 19853.85, "total_tokens": 41753792}
|
|
{"current_steps": 13270, "total_steps": 78105, "loss": 0.3857, "lr": 4.92599078542429e-06, "epoch": 0.8494974713526663, "percentage": 16.99, "elapsed_time": "0:35:03", "remaining_time": "2:51:18", "throughput": 19855.39, "total_tokens": 41771328}
|
|
{"current_steps": 13275, "total_steps": 78105, "loss": 0.5227, "lr": 4.925855800200078e-06, "epoch": 0.8498175532936432, "percentage": 17.0, "elapsed_time": "0:35:04", "remaining_time": "2:51:17", "throughput": 19856.68, "total_tokens": 41787712}
|
|
{"current_steps": 13280, "total_steps": 78105, "loss": 0.3605, "lr": 4.925720693841263e-06, "epoch": 0.85013763523462, "percentage": 17.0, "elapsed_time": "0:35:05", "remaining_time": "2:51:16", "throughput": 19857.63, "total_tokens": 41803072}
|
|
{"current_steps": 13285, "total_steps": 78105, "loss": 0.4722, "lr": 4.925585466354592e-06, "epoch": 0.8504577171755969, "percentage": 17.01, "elapsed_time": "0:35:05", "remaining_time": "2:51:14", "throughput": 19858.74, "total_tokens": 41818688}
|
|
{"current_steps": 13290, "total_steps": 78105, "loss": 0.4006, "lr": 4.925450117746817e-06, "epoch": 0.8507777991165738, "percentage": 17.02, "elapsed_time": "0:35:06", "remaining_time": "2:51:13", "throughput": 19859.73, "total_tokens": 41833792}
|
|
{"current_steps": 13295, "total_steps": 78105, "loss": 0.5135, "lr": 4.925314648024697e-06, "epoch": 0.8510978810575507, "percentage": 17.02, "elapsed_time": "0:35:07", "remaining_time": "2:51:11", "throughput": 19861.11, "total_tokens": 41850880}
|
|
{"current_steps": 13300, "total_steps": 78105, "loss": 0.3769, "lr": 4.925179057194997e-06, "epoch": 0.8514179629985277, "percentage": 17.03, "elapsed_time": "0:35:07", "remaining_time": "2:51:10", "throughput": 19862.13, "total_tokens": 41866560}
|
|
{"current_steps": 13305, "total_steps": 78105, "loss": 0.5361, "lr": 4.925043345264486e-06, "epoch": 0.8517380449395046, "percentage": 17.03, "elapsed_time": "0:35:08", "remaining_time": "2:51:09", "throughput": 19863.2, "total_tokens": 41882240}
|
|
{"current_steps": 13310, "total_steps": 78105, "loss": 0.6143, "lr": 4.9249075122399435e-06, "epoch": 0.8520581268804814, "percentage": 17.04, "elapsed_time": "0:35:09", "remaining_time": "2:51:08", "throughput": 19864.49, "total_tokens": 41898880}
|
|
{"current_steps": 13315, "total_steps": 78105, "loss": 0.3941, "lr": 4.9247715581281495e-06, "epoch": 0.8523782088214583, "percentage": 17.05, "elapsed_time": "0:35:09", "remaining_time": "2:51:06", "throughput": 19865.84, "total_tokens": 41915968}
|
|
{"current_steps": 13320, "total_steps": 78105, "loss": 0.4157, "lr": 4.924635482935895e-06, "epoch": 0.8526982907624352, "percentage": 17.05, "elapsed_time": "0:35:10", "remaining_time": "2:51:05", "throughput": 19866.81, "total_tokens": 41930816}
|
|
{"current_steps": 13325, "total_steps": 78105, "loss": 0.6425, "lr": 4.924499286669974e-06, "epoch": 0.8530183727034121, "percentage": 17.06, "elapsed_time": "0:35:11", "remaining_time": "2:51:04", "throughput": 19868.02, "total_tokens": 41947200}
|
|
{"current_steps": 13330, "total_steps": 78105, "loss": 0.3953, "lr": 4.9243629693371865e-06, "epoch": 0.853338454644389, "percentage": 17.07, "elapsed_time": "0:35:11", "remaining_time": "2:51:02", "throughput": 19868.96, "total_tokens": 41962240}
|
|
{"current_steps": 13335, "total_steps": 78105, "loss": 0.6252, "lr": 4.924226530944341e-06, "epoch": 0.8536585365853658, "percentage": 17.07, "elapsed_time": "0:35:12", "remaining_time": "2:51:01", "throughput": 19870.16, "total_tokens": 41978752}
|
|
{"current_steps": 13340, "total_steps": 78105, "loss": 0.4485, "lr": 4.924089971498249e-06, "epoch": 0.8539786185263427, "percentage": 17.08, "elapsed_time": "0:35:13", "remaining_time": "2:51:00", "throughput": 19871.21, "total_tokens": 41994112}
|
|
{"current_steps": 13345, "total_steps": 78105, "loss": 0.4313, "lr": 4.923953291005731e-06, "epoch": 0.8542987004673196, "percentage": 17.09, "elapsed_time": "0:35:14", "remaining_time": "2:50:58", "throughput": 19872.36, "total_tokens": 42010432}
|
|
{"current_steps": 13350, "total_steps": 78105, "loss": 0.4504, "lr": 4.923816489473612e-06, "epoch": 0.8546187824082965, "percentage": 17.09, "elapsed_time": "0:35:14", "remaining_time": "2:50:57", "throughput": 19873.37, "total_tokens": 42025984}
|
|
{"current_steps": 13355, "total_steps": 78105, "loss": 0.4746, "lr": 4.923679566908722e-06, "epoch": 0.8549388643492735, "percentage": 17.1, "elapsed_time": "0:35:15", "remaining_time": "2:50:55", "throughput": 19874.3, "total_tokens": 42040832}
|
|
{"current_steps": 13360, "total_steps": 78105, "loss": 0.5126, "lr": 4.923542523317899e-06, "epoch": 0.8552589462902503, "percentage": 17.11, "elapsed_time": "0:35:16", "remaining_time": "2:50:54", "throughput": 19875.57, "total_tokens": 42057536}
|
|
{"current_steps": 13365, "total_steps": 78105, "loss": 0.4442, "lr": 4.923405358707986e-06, "epoch": 0.8555790282312272, "percentage": 17.11, "elapsed_time": "0:35:16", "remaining_time": "2:50:53", "throughput": 19876.65, "total_tokens": 42073152}
|
|
{"current_steps": 13370, "total_steps": 78105, "loss": 0.4596, "lr": 4.923268073085833e-06, "epoch": 0.8558991101722041, "percentage": 17.12, "elapsed_time": "0:35:17", "remaining_time": "2:50:51", "throughput": 19877.67, "total_tokens": 42088512}
|
|
{"current_steps": 13375, "total_steps": 78105, "loss": 0.507, "lr": 4.923130666458294e-06, "epoch": 0.856219192113181, "percentage": 17.12, "elapsed_time": "0:35:18", "remaining_time": "2:50:50", "throughput": 19878.59, "total_tokens": 42103552}
|
|
{"current_steps": 13380, "total_steps": 78105, "loss": 0.4841, "lr": 4.922993138832232e-06, "epoch": 0.8565392740541579, "percentage": 17.13, "elapsed_time": "0:35:18", "remaining_time": "2:50:49", "throughput": 19879.81, "total_tokens": 42119872}
|
|
{"current_steps": 13385, "total_steps": 78105, "loss": 0.5768, "lr": 4.922855490214514e-06, "epoch": 0.8568593559951347, "percentage": 17.14, "elapsed_time": "0:35:19", "remaining_time": "2:50:48", "throughput": 19881.15, "total_tokens": 42136832}
|
|
{"current_steps": 13390, "total_steps": 78105, "loss": 0.4577, "lr": 4.922717720612012e-06, "epoch": 0.8571794379361116, "percentage": 17.14, "elapsed_time": "0:35:20", "remaining_time": "2:50:46", "throughput": 19882.26, "total_tokens": 42152896}
|
|
{"current_steps": 13395, "total_steps": 78105, "loss": 0.6398, "lr": 4.922579830031606e-06, "epoch": 0.8574995198770885, "percentage": 17.15, "elapsed_time": "0:35:20", "remaining_time": "2:50:45", "throughput": 19883.17, "total_tokens": 42168064}
|
|
{"current_steps": 13400, "total_steps": 78105, "loss": 0.4283, "lr": 4.922441818480183e-06, "epoch": 0.8578196018180654, "percentage": 17.16, "elapsed_time": "0:35:21", "remaining_time": "2:50:43", "throughput": 19884.05, "total_tokens": 42182784}
|
|
{"current_steps": 13405, "total_steps": 78105, "loss": 0.4372, "lr": 4.922303685964634e-06, "epoch": 0.8581396837590423, "percentage": 17.16, "elapsed_time": "0:35:22", "remaining_time": "2:50:42", "throughput": 19885.3, "total_tokens": 42199744}
|
|
{"current_steps": 13410, "total_steps": 78105, "loss": 0.3618, "lr": 4.922165432491855e-06, "epoch": 0.8584597657000193, "percentage": 17.17, "elapsed_time": "0:35:22", "remaining_time": "2:50:41", "throughput": 19886.3, "total_tokens": 42215040}
|
|
{"current_steps": 13415, "total_steps": 78105, "loss": 0.4587, "lr": 4.922027058068752e-06, "epoch": 0.8587798476409961, "percentage": 17.18, "elapsed_time": "0:35:23", "remaining_time": "2:50:39", "throughput": 19887.2, "total_tokens": 42230144}
|
|
{"current_steps": 13420, "total_steps": 78105, "loss": 0.5418, "lr": 4.9218885627022336e-06, "epoch": 0.859099929581973, "percentage": 17.18, "elapsed_time": "0:35:24", "remaining_time": "2:50:38", "throughput": 19888.32, "total_tokens": 42246528}
|
|
{"current_steps": 13425, "total_steps": 78105, "loss": 0.5169, "lr": 4.921749946399215e-06, "epoch": 0.8594200115229499, "percentage": 17.19, "elapsed_time": "0:35:24", "remaining_time": "2:50:37", "throughput": 19889.51, "total_tokens": 42263168}
|
|
{"current_steps": 13430, "total_steps": 78105, "loss": 0.5094, "lr": 4.921611209166618e-06, "epoch": 0.8597400934639268, "percentage": 17.19, "elapsed_time": "0:35:25", "remaining_time": "2:50:36", "throughput": 19890.56, "total_tokens": 42278912}
|
|
{"current_steps": 13435, "total_steps": 78105, "loss": 0.495, "lr": 4.921472351011372e-06, "epoch": 0.8600601754049036, "percentage": 17.2, "elapsed_time": "0:35:26", "remaining_time": "2:50:34", "throughput": 19891.69, "total_tokens": 42294656}
|
|
{"current_steps": 13440, "total_steps": 78105, "loss": 0.7637, "lr": 4.92133337194041e-06, "epoch": 0.8603802573458805, "percentage": 17.21, "elapsed_time": "0:35:26", "remaining_time": "2:50:33", "throughput": 19892.93, "total_tokens": 42311552}
|
|
{"current_steps": 13445, "total_steps": 78105, "loss": 0.5743, "lr": 4.9211942719606705e-06, "epoch": 0.8607003392868574, "percentage": 17.21, "elapsed_time": "0:35:27", "remaining_time": "2:50:32", "throughput": 19894.08, "total_tokens": 42327552}
|
|
{"current_steps": 13450, "total_steps": 78105, "loss": 0.4584, "lr": 4.921055051079101e-06, "epoch": 0.8610204212278343, "percentage": 17.22, "elapsed_time": "0:35:28", "remaining_time": "2:50:31", "throughput": 19895.14, "total_tokens": 42343360}
|
|
{"current_steps": 13455, "total_steps": 78105, "loss": 0.5159, "lr": 4.920915709302653e-06, "epoch": 0.8613405031688112, "percentage": 17.23, "elapsed_time": "0:35:29", "remaining_time": "2:50:29", "throughput": 19896.42, "total_tokens": 42360064}
|
|
{"current_steps": 13460, "total_steps": 78105, "loss": 0.4687, "lr": 4.920776246638285e-06, "epoch": 0.8616605851097882, "percentage": 17.23, "elapsed_time": "0:35:29", "remaining_time": "2:50:28", "throughput": 19897.27, "total_tokens": 42374976}
|
|
{"current_steps": 13465, "total_steps": 78105, "loss": 0.4043, "lr": 4.920636663092961e-06, "epoch": 0.861980667050765, "percentage": 17.24, "elapsed_time": "0:35:30", "remaining_time": "2:50:27", "throughput": 19898.56, "total_tokens": 42391616}
|
|
{"current_steps": 13470, "total_steps": 78105, "loss": 0.4318, "lr": 4.9204969586736495e-06, "epoch": 0.8623007489917419, "percentage": 17.25, "elapsed_time": "0:35:31", "remaining_time": "2:50:25", "throughput": 19899.43, "total_tokens": 42406528}
|
|
{"current_steps": 13475, "total_steps": 78105, "loss": 0.5153, "lr": 4.9203571333873284e-06, "epoch": 0.8626208309327188, "percentage": 17.25, "elapsed_time": "0:35:31", "remaining_time": "2:50:24", "throughput": 19900.93, "total_tokens": 42424320}
|
|
{"current_steps": 13480, "total_steps": 78105, "loss": 0.611, "lr": 4.92021718724098e-06, "epoch": 0.8629409128736957, "percentage": 17.26, "elapsed_time": "0:35:32", "remaining_time": "2:50:23", "throughput": 19902.19, "total_tokens": 42440960}
|
|
{"current_steps": 13485, "total_steps": 78105, "loss": 0.3195, "lr": 4.92007712024159e-06, "epoch": 0.8632609948146726, "percentage": 17.27, "elapsed_time": "0:35:33", "remaining_time": "2:50:22", "throughput": 19903.2, "total_tokens": 42456448}
|
|
{"current_steps": 13490, "total_steps": 78105, "loss": 0.4619, "lr": 4.9199369323961554e-06, "epoch": 0.8635810767556494, "percentage": 17.27, "elapsed_time": "0:35:33", "remaining_time": "2:50:20", "throughput": 19904.28, "total_tokens": 42472448}
|
|
{"current_steps": 13495, "total_steps": 78105, "loss": 0.6213, "lr": 4.919796623711675e-06, "epoch": 0.8639011586966263, "percentage": 17.28, "elapsed_time": "0:35:34", "remaining_time": "2:50:19", "throughput": 19905.25, "total_tokens": 42487808}
|
|
{"current_steps": 13500, "total_steps": 78105, "loss": 0.4896, "lr": 4.919656194195156e-06, "epoch": 0.8642212406376032, "percentage": 17.28, "elapsed_time": "0:35:35", "remaining_time": "2:50:18", "throughput": 19906.35, "total_tokens": 42503744}
|
|
{"current_steps": 13505, "total_steps": 78105, "loss": 0.6244, "lr": 4.9195156438536095e-06, "epoch": 0.8645413225785801, "percentage": 17.29, "elapsed_time": "0:35:35", "remaining_time": "2:50:16", "throughput": 19907.73, "total_tokens": 42520768}
|
|
{"current_steps": 13510, "total_steps": 78105, "loss": 0.5117, "lr": 4.9193749726940555e-06, "epoch": 0.864861404519557, "percentage": 17.3, "elapsed_time": "0:35:36", "remaining_time": "2:50:15", "throughput": 19908.93, "total_tokens": 42537408}
|
|
{"current_steps": 13515, "total_steps": 78105, "loss": 0.4307, "lr": 4.919234180723517e-06, "epoch": 0.8651814864605339, "percentage": 17.3, "elapsed_time": "0:35:37", "remaining_time": "2:50:14", "throughput": 19910.09, "total_tokens": 42553728}
|
|
{"current_steps": 13520, "total_steps": 78105, "loss": 0.4898, "lr": 4.9190932679490245e-06, "epoch": 0.8655015684015108, "percentage": 17.31, "elapsed_time": "0:35:37", "remaining_time": "2:50:13", "throughput": 19911.04, "total_tokens": 42568896}
|
|
{"current_steps": 13525, "total_steps": 78105, "loss": 0.4423, "lr": 4.918952234377615e-06, "epoch": 0.8658216503424877, "percentage": 17.32, "elapsed_time": "0:35:38", "remaining_time": "2:50:11", "throughput": 19911.95, "total_tokens": 42584000}
|
|
{"current_steps": 13530, "total_steps": 78105, "loss": 0.4808, "lr": 4.9188110800163306e-06, "epoch": 0.8661417322834646, "percentage": 17.32, "elapsed_time": "0:35:39", "remaining_time": "2:50:10", "throughput": 19913.06, "total_tokens": 42600192}
|
|
{"current_steps": 13535, "total_steps": 78105, "loss": 0.3951, "lr": 4.9186698048722205e-06, "epoch": 0.8664618142244415, "percentage": 17.33, "elapsed_time": "0:35:39", "remaining_time": "2:50:08", "throughput": 19913.81, "total_tokens": 42614656}
|
|
{"current_steps": 13540, "total_steps": 78105, "loss": 0.4872, "lr": 4.918528408952338e-06, "epoch": 0.8667818961654183, "percentage": 17.34, "elapsed_time": "0:35:40", "remaining_time": "2:50:07", "throughput": 19914.69, "total_tokens": 42629504}
|
|
{"current_steps": 13545, "total_steps": 78105, "loss": 0.5433, "lr": 4.9183868922637446e-06, "epoch": 0.8671019781063952, "percentage": 17.34, "elapsed_time": "0:35:41", "remaining_time": "2:50:05", "throughput": 19915.54, "total_tokens": 42644224}
|
|
{"current_steps": 13550, "total_steps": 78105, "loss": 0.359, "lr": 4.918245254813507e-06, "epoch": 0.8674220600473721, "percentage": 17.35, "elapsed_time": "0:35:41", "remaining_time": "2:50:04", "throughput": 19916.49, "total_tokens": 42659584}
|
|
{"current_steps": 13555, "total_steps": 78105, "loss": 0.4165, "lr": 4.918103496608698e-06, "epoch": 0.867742141988349, "percentage": 17.35, "elapsed_time": "0:35:42", "remaining_time": "2:50:03", "throughput": 19917.65, "total_tokens": 42675776}
|
|
{"current_steps": 13560, "total_steps": 78105, "loss": 0.5199, "lr": 4.917961617656395e-06, "epoch": 0.8680622239293259, "percentage": 17.36, "elapsed_time": "0:35:43", "remaining_time": "2:50:02", "throughput": 19919.01, "total_tokens": 42693184}
|
|
{"current_steps": 13565, "total_steps": 78105, "loss": 0.5467, "lr": 4.917819617963684e-06, "epoch": 0.8683823058703029, "percentage": 17.37, "elapsed_time": "0:35:44", "remaining_time": "2:50:01", "throughput": 19920.44, "total_tokens": 42710784}
|
|
{"current_steps": 13570, "total_steps": 78105, "loss": 0.5478, "lr": 4.917677497537655e-06, "epoch": 0.8687023878112797, "percentage": 17.37, "elapsed_time": "0:35:44", "remaining_time": "2:49:59", "throughput": 19921.6, "total_tokens": 42727040}
|
|
{"current_steps": 13575, "total_steps": 78105, "loss": 0.4198, "lr": 4.917535256385405e-06, "epoch": 0.8690224697522566, "percentage": 17.38, "elapsed_time": "0:35:45", "remaining_time": "2:49:58", "throughput": 19922.55, "total_tokens": 42742208}
|
|
{"current_steps": 13580, "total_steps": 78105, "loss": 0.464, "lr": 4.917392894514037e-06, "epoch": 0.8693425516932335, "percentage": 17.39, "elapsed_time": "0:35:46", "remaining_time": "2:49:57", "throughput": 19923.71, "total_tokens": 42758464}
|
|
{"current_steps": 13585, "total_steps": 78105, "loss": 0.5945, "lr": 4.917250411930659e-06, "epoch": 0.8696626336342104, "percentage": 17.39, "elapsed_time": "0:35:46", "remaining_time": "2:49:55", "throughput": 19924.73, "total_tokens": 42774016}
|
|
{"current_steps": 13590, "total_steps": 78105, "loss": 0.4659, "lr": 4.917107808642387e-06, "epoch": 0.8699827155751872, "percentage": 17.4, "elapsed_time": "0:35:47", "remaining_time": "2:49:54", "throughput": 19925.7, "total_tokens": 42789248}
|
|
{"current_steps": 13595, "total_steps": 78105, "loss": 0.4435, "lr": 4.916965084656341e-06, "epoch": 0.8703027975161641, "percentage": 17.41, "elapsed_time": "0:35:48", "remaining_time": "2:49:53", "throughput": 19926.68, "total_tokens": 42804608}
|
|
{"current_steps": 13600, "total_steps": 78105, "loss": 0.5812, "lr": 4.916822239979649e-06, "epoch": 0.870622879457141, "percentage": 17.41, "elapsed_time": "0:35:48", "remaining_time": "2:49:51", "throughput": 19927.55, "total_tokens": 42819584}
|
|
{"current_steps": 13605, "total_steps": 78105, "loss": 0.4933, "lr": 4.916679274619442e-06, "epoch": 0.8709429613981179, "percentage": 17.42, "elapsed_time": "0:35:49", "remaining_time": "2:49:50", "throughput": 19928.53, "total_tokens": 42835200}
|
|
{"current_steps": 13610, "total_steps": 78105, "loss": 0.4593, "lr": 4.91653618858286e-06, "epoch": 0.8712630433390948, "percentage": 17.43, "elapsed_time": "0:35:50", "remaining_time": "2:49:49", "throughput": 19929.86, "total_tokens": 42852672}
|
|
{"current_steps": 13615, "total_steps": 78105, "loss": 0.3882, "lr": 4.916392981877048e-06, "epoch": 0.8715831252800716, "percentage": 17.43, "elapsed_time": "0:35:50", "remaining_time": "2:49:47", "throughput": 19930.95, "total_tokens": 42868672}
|
|
{"current_steps": 13620, "total_steps": 78105, "loss": 0.5423, "lr": 4.916249654509159e-06, "epoch": 0.8719032072210486, "percentage": 17.44, "elapsed_time": "0:35:51", "remaining_time": "2:49:46", "throughput": 19931.92, "total_tokens": 42884096}
|
|
{"current_steps": 13625, "total_steps": 78105, "loss": 0.5687, "lr": 4.916106206486346e-06, "epoch": 0.8722232891620255, "percentage": 17.44, "elapsed_time": "0:35:52", "remaining_time": "2:49:45", "throughput": 19932.81, "total_tokens": 42898752}
|
|
{"current_steps": 13630, "total_steps": 78105, "loss": 0.632, "lr": 4.915962637815774e-06, "epoch": 0.8725433711030024, "percentage": 17.45, "elapsed_time": "0:35:52", "remaining_time": "2:49:43", "throughput": 19933.9, "total_tokens": 42914688}
|
|
{"current_steps": 13635, "total_steps": 78105, "loss": 0.4514, "lr": 4.915818948504614e-06, "epoch": 0.8728634530439793, "percentage": 17.46, "elapsed_time": "0:35:53", "remaining_time": "2:49:42", "throughput": 19935.06, "total_tokens": 42930688}
|
|
{"current_steps": 13640, "total_steps": 78105, "loss": 0.5396, "lr": 4.9156751385600375e-06, "epoch": 0.8731835349849562, "percentage": 17.46, "elapsed_time": "0:35:54", "remaining_time": "2:49:41", "throughput": 19936.24, "total_tokens": 42947008}
|
|
{"current_steps": 13645, "total_steps": 78105, "loss": 0.6046, "lr": 4.915531207989228e-06, "epoch": 0.873503616925933, "percentage": 17.47, "elapsed_time": "0:35:54", "remaining_time": "2:49:39", "throughput": 19937.25, "total_tokens": 42962816}
|
|
{"current_steps": 13650, "total_steps": 78105, "loss": 0.4804, "lr": 4.915387156799371e-06, "epoch": 0.8738236988669099, "percentage": 17.48, "elapsed_time": "0:35:55", "remaining_time": "2:49:38", "throughput": 19938.06, "total_tokens": 42977664}
|
|
{"current_steps": 13655, "total_steps": 78105, "loss": 0.5684, "lr": 4.9152429849976625e-06, "epoch": 0.8741437808078868, "percentage": 17.48, "elapsed_time": "0:35:56", "remaining_time": "2:49:37", "throughput": 19939.02, "total_tokens": 42993472}
|
|
{"current_steps": 13660, "total_steps": 78105, "loss": 0.581, "lr": 4.915098692591299e-06, "epoch": 0.8744638627488637, "percentage": 17.49, "elapsed_time": "0:35:56", "remaining_time": "2:49:36", "throughput": 19940.33, "total_tokens": 43010688}
|
|
{"current_steps": 13665, "total_steps": 78105, "loss": 0.4333, "lr": 4.914954279587486e-06, "epoch": 0.8747839446898406, "percentage": 17.5, "elapsed_time": "0:35:57", "remaining_time": "2:49:34", "throughput": 19941.55, "total_tokens": 43027392}
|
|
{"current_steps": 13670, "total_steps": 78105, "loss": 0.5035, "lr": 4.914809745993437e-06, "epoch": 0.8751040266308175, "percentage": 17.5, "elapsed_time": "0:35:58", "remaining_time": "2:49:33", "throughput": 19942.52, "total_tokens": 43043008}
|
|
{"current_steps": 13675, "total_steps": 78105, "loss": 0.5147, "lr": 4.9146650918163655e-06, "epoch": 0.8754241085717944, "percentage": 17.51, "elapsed_time": "0:35:59", "remaining_time": "2:49:32", "throughput": 19943.6, "total_tokens": 43059072}
|
|
{"current_steps": 13680, "total_steps": 78105, "loss": 0.4494, "lr": 4.914520317063498e-06, "epoch": 0.8757441905127713, "percentage": 17.51, "elapsed_time": "0:35:59", "remaining_time": "2:49:31", "throughput": 19944.6, "total_tokens": 43074624}
|
|
{"current_steps": 13685, "total_steps": 78105, "loss": 0.5185, "lr": 4.914375421742062e-06, "epoch": 0.8760642724537482, "percentage": 17.52, "elapsed_time": "0:36:00", "remaining_time": "2:49:29", "throughput": 19945.47, "total_tokens": 43089536}
|
|
{"current_steps": 13690, "total_steps": 78105, "loss": 0.5262, "lr": 4.914230405859294e-06, "epoch": 0.8763843543947251, "percentage": 17.53, "elapsed_time": "0:36:01", "remaining_time": "2:49:28", "throughput": 19946.33, "total_tokens": 43104512}
|
|
{"current_steps": 13695, "total_steps": 78105, "loss": 0.4122, "lr": 4.914085269422435e-06, "epoch": 0.8767044363357019, "percentage": 17.53, "elapsed_time": "0:36:01", "remaining_time": "2:49:26", "throughput": 19947.42, "total_tokens": 43120640}
|
|
{"current_steps": 13700, "total_steps": 78105, "loss": 0.5458, "lr": 4.913940012438732e-06, "epoch": 0.8770245182766788, "percentage": 17.54, "elapsed_time": "0:36:02", "remaining_time": "2:49:25", "throughput": 19948.41, "total_tokens": 43136384}
|
|
{"current_steps": 13705, "total_steps": 78105, "loss": 0.4664, "lr": 4.9137946349154386e-06, "epoch": 0.8773446002176557, "percentage": 17.55, "elapsed_time": "0:36:03", "remaining_time": "2:49:24", "throughput": 19949.65, "total_tokens": 43153216}
|
|
{"current_steps": 13710, "total_steps": 78105, "loss": 0.4954, "lr": 4.913649136859814e-06, "epoch": 0.8776646821586326, "percentage": 17.55, "elapsed_time": "0:36:03", "remaining_time": "2:49:23", "throughput": 19950.44, "total_tokens": 43167936}
|
|
{"current_steps": 13715, "total_steps": 78105, "loss": 0.4044, "lr": 4.9135035182791235e-06, "epoch": 0.8779847640996095, "percentage": 17.56, "elapsed_time": "0:36:04", "remaining_time": "2:49:21", "throughput": 19951.41, "total_tokens": 43183872}
|
|
{"current_steps": 13720, "total_steps": 78105, "loss": 0.6712, "lr": 4.913357779180639e-06, "epoch": 0.8783048460405863, "percentage": 17.57, "elapsed_time": "0:36:05", "remaining_time": "2:49:20", "throughput": 19952.45, "total_tokens": 43200256}
|
|
{"current_steps": 13725, "total_steps": 78105, "loss": 0.5041, "lr": 4.9132119195716375e-06, "epoch": 0.8786249279815633, "percentage": 17.57, "elapsed_time": "0:36:05", "remaining_time": "2:49:19", "throughput": 19953.73, "total_tokens": 43217600}
|
|
{"current_steps": 13730, "total_steps": 78105, "loss": 0.4631, "lr": 4.9130659394594025e-06, "epoch": 0.8789450099225402, "percentage": 17.58, "elapsed_time": "0:36:06", "remaining_time": "2:49:18", "throughput": 19954.75, "total_tokens": 43233344}
|
|
{"current_steps": 13735, "total_steps": 78105, "loss": 0.7038, "lr": 4.912919838851224e-06, "epoch": 0.8792650918635171, "percentage": 17.59, "elapsed_time": "0:36:07", "remaining_time": "2:49:17", "throughput": 19955.72, "total_tokens": 43249280}
|
|
{"current_steps": 13740, "total_steps": 78105, "loss": 0.508, "lr": 4.912773617754398e-06, "epoch": 0.879585173804494, "percentage": 17.59, "elapsed_time": "0:36:07", "remaining_time": "2:49:15", "throughput": 19956.73, "total_tokens": 43265024}
|
|
{"current_steps": 13745, "total_steps": 78105, "loss": 0.5256, "lr": 4.912627276176224e-06, "epoch": 0.8799052557454708, "percentage": 17.6, "elapsed_time": "0:36:08", "remaining_time": "2:49:14", "throughput": 19957.69, "total_tokens": 43280576}
|
|
{"current_steps": 13750, "total_steps": 78105, "loss": 0.451, "lr": 4.912480814124011e-06, "epoch": 0.8802253376864477, "percentage": 17.6, "elapsed_time": "0:36:09", "remaining_time": "2:49:13", "throughput": 19958.6, "total_tokens": 43296064}
|
|
{"current_steps": 13755, "total_steps": 78105, "loss": 0.5008, "lr": 4.912334231605073e-06, "epoch": 0.8805454196274246, "percentage": 17.61, "elapsed_time": "0:36:09", "remaining_time": "2:49:11", "throughput": 19959.55, "total_tokens": 43311552}
|
|
{"current_steps": 13760, "total_steps": 78105, "loss": 0.5462, "lr": 4.912187528626729e-06, "epoch": 0.8808655015684015, "percentage": 17.62, "elapsed_time": "0:36:10", "remaining_time": "2:49:10", "throughput": 19960.43, "total_tokens": 43326272}
|
|
{"current_steps": 13765, "total_steps": 78105, "loss": 0.452, "lr": 4.912040705196303e-06, "epoch": 0.8811855835093784, "percentage": 17.62, "elapsed_time": "0:36:11", "remaining_time": "2:49:09", "throughput": 19961.54, "total_tokens": 43342592}
|
|
{"current_steps": 13770, "total_steps": 78105, "loss": 0.3922, "lr": 4.911893761321129e-06, "epoch": 0.8815056654503552, "percentage": 17.63, "elapsed_time": "0:36:11", "remaining_time": "2:49:07", "throughput": 19962.47, "total_tokens": 43357888}
|
|
{"current_steps": 13775, "total_steps": 78105, "loss": 0.3391, "lr": 4.911746697008543e-06, "epoch": 0.8818257473913321, "percentage": 17.64, "elapsed_time": "0:36:12", "remaining_time": "2:49:06", "throughput": 19963.56, "total_tokens": 43374272}
|
|
{"current_steps": 13780, "total_steps": 78105, "loss": 0.3914, "lr": 4.91159951226589e-06, "epoch": 0.8821458293323091, "percentage": 17.64, "elapsed_time": "0:36:13", "remaining_time": "2:49:05", "throughput": 19965.14, "total_tokens": 43393280}
|
|
{"current_steps": 13785, "total_steps": 78105, "loss": 0.4902, "lr": 4.9114522071005185e-06, "epoch": 0.882465911273286, "percentage": 17.65, "elapsed_time": "0:36:14", "remaining_time": "2:49:04", "throughput": 19966.33, "total_tokens": 43409600}
|
|
{"current_steps": 13790, "total_steps": 78105, "loss": 0.5714, "lr": 4.911304781519785e-06, "epoch": 0.8827859932142629, "percentage": 17.66, "elapsed_time": "0:36:14", "remaining_time": "2:49:02", "throughput": 19967.21, "total_tokens": 43424384}
|
|
{"current_steps": 13795, "total_steps": 78105, "loss": 0.4055, "lr": 4.911157235531051e-06, "epoch": 0.8831060751552398, "percentage": 17.66, "elapsed_time": "0:36:15", "remaining_time": "2:49:01", "throughput": 19968.19, "total_tokens": 43440128}
|
|
{"current_steps": 13800, "total_steps": 78105, "loss": 0.578, "lr": 4.911009569141683e-06, "epoch": 0.8834261570962166, "percentage": 17.67, "elapsed_time": "0:36:16", "remaining_time": "2:49:00", "throughput": 19969.07, "total_tokens": 43455168}
|
|
{"current_steps": 13805, "total_steps": 78105, "loss": 0.4172, "lr": 4.910861782359057e-06, "epoch": 0.8837462390371935, "percentage": 17.67, "elapsed_time": "0:36:16", "remaining_time": "2:48:59", "throughput": 19970.33, "total_tokens": 43472064}
|
|
{"current_steps": 13810, "total_steps": 78105, "loss": 0.6041, "lr": 4.910713875190549e-06, "epoch": 0.8840663209781704, "percentage": 17.68, "elapsed_time": "0:36:17", "remaining_time": "2:48:57", "throughput": 19971.36, "total_tokens": 43488000}
|
|
{"current_steps": 13815, "total_steps": 78105, "loss": 0.4479, "lr": 4.910565847643549e-06, "epoch": 0.8843864029191473, "percentage": 17.69, "elapsed_time": "0:36:18", "remaining_time": "2:48:56", "throughput": 19972.37, "total_tokens": 43503488}
|
|
{"current_steps": 13820, "total_steps": 78105, "loss": 0.5161, "lr": 4.910417699725446e-06, "epoch": 0.8847064848601242, "percentage": 17.69, "elapsed_time": "0:36:18", "remaining_time": "2:48:55", "throughput": 19973.26, "total_tokens": 43518336}
|
|
{"current_steps": 13825, "total_steps": 78105, "loss": 0.42, "lr": 4.910269431443639e-06, "epoch": 0.885026566801101, "percentage": 17.7, "elapsed_time": "0:36:19", "remaining_time": "2:48:53", "throughput": 19974.04, "total_tokens": 43532800}
|
|
{"current_steps": 13830, "total_steps": 78105, "loss": 0.3951, "lr": 4.9101210428055306e-06, "epoch": 0.885346648742078, "percentage": 17.71, "elapsed_time": "0:36:20", "remaining_time": "2:48:52", "throughput": 19974.93, "total_tokens": 43547648}
|
|
{"current_steps": 13835, "total_steps": 78105, "loss": 0.4051, "lr": 4.909972533818531e-06, "epoch": 0.8856667306830549, "percentage": 17.71, "elapsed_time": "0:36:20", "remaining_time": "2:48:50", "throughput": 19975.78, "total_tokens": 43562688}
|
|
{"current_steps": 13840, "total_steps": 78105, "loss": 0.4491, "lr": 4.909823904490057e-06, "epoch": 0.8859868126240318, "percentage": 17.72, "elapsed_time": "0:36:21", "remaining_time": "2:48:49", "throughput": 19976.78, "total_tokens": 43578176}
|
|
{"current_steps": 13845, "total_steps": 78105, "loss": 0.4316, "lr": 4.909675154827529e-06, "epoch": 0.8863068945650087, "percentage": 17.73, "elapsed_time": "0:36:22", "remaining_time": "2:48:48", "throughput": 19977.83, "total_tokens": 43593920}
|
|
{"current_steps": 13850, "total_steps": 78105, "loss": 0.5023, "lr": 4.909526284838375e-06, "epoch": 0.8866269765059855, "percentage": 17.73, "elapsed_time": "0:36:22", "remaining_time": "2:48:46", "throughput": 19979.09, "total_tokens": 43610944}
|
|
{"current_steps": 13855, "total_steps": 78105, "loss": 0.5231, "lr": 4.90937729453003e-06, "epoch": 0.8869470584469624, "percentage": 17.74, "elapsed_time": "0:36:23", "remaining_time": "2:48:45", "throughput": 19980.15, "total_tokens": 43627008}
|
|
{"current_steps": 13860, "total_steps": 78105, "loss": 0.6215, "lr": 4.909228183909932e-06, "epoch": 0.8872671403879393, "percentage": 17.75, "elapsed_time": "0:36:24", "remaining_time": "2:48:44", "throughput": 19981.16, "total_tokens": 43642752}
|
|
{"current_steps": 13865, "total_steps": 78105, "loss": 0.416, "lr": 4.909078952985529e-06, "epoch": 0.8875872223289162, "percentage": 17.75, "elapsed_time": "0:36:24", "remaining_time": "2:48:43", "throughput": 19982.13, "total_tokens": 43658496}
|
|
{"current_steps": 13870, "total_steps": 78105, "loss": 0.5573, "lr": 4.90892960176427e-06, "epoch": 0.8879073042698931, "percentage": 17.76, "elapsed_time": "0:36:25", "remaining_time": "2:48:41", "throughput": 19983.2, "total_tokens": 43674304}
|
|
{"current_steps": 13875, "total_steps": 78105, "loss": 0.446, "lr": 4.908780130253615e-06, "epoch": 0.8882273862108699, "percentage": 17.76, "elapsed_time": "0:36:26", "remaining_time": "2:48:40", "throughput": 19984.16, "total_tokens": 43689536}
|
|
{"current_steps": 13880, "total_steps": 78105, "loss": 0.4856, "lr": 4.908630538461027e-06, "epoch": 0.8885474681518468, "percentage": 17.77, "elapsed_time": "0:36:26", "remaining_time": "2:48:39", "throughput": 19985.34, "total_tokens": 43706496}
|
|
{"current_steps": 13885, "total_steps": 78105, "loss": 0.378, "lr": 4.908480826393976e-06, "epoch": 0.8888675500928238, "percentage": 17.78, "elapsed_time": "0:36:27", "remaining_time": "2:48:37", "throughput": 19986.17, "total_tokens": 43721408}
|
|
{"current_steps": 13890, "total_steps": 78105, "loss": 0.5503, "lr": 4.908330994059939e-06, "epoch": 0.8891876320338007, "percentage": 17.78, "elapsed_time": "0:36:28", "remaining_time": "2:48:36", "throughput": 19987.26, "total_tokens": 43737536}
|
|
{"current_steps": 13895, "total_steps": 78105, "loss": 0.5514, "lr": 4.908181041466396e-06, "epoch": 0.8895077139747776, "percentage": 17.79, "elapsed_time": "0:36:28", "remaining_time": "2:48:35", "throughput": 19988.17, "total_tokens": 43752640}
|
|
{"current_steps": 13900, "total_steps": 78105, "loss": 0.5013, "lr": 4.9080309686208344e-06, "epoch": 0.8898277959157544, "percentage": 17.8, "elapsed_time": "0:36:29", "remaining_time": "2:48:33", "throughput": 19989.14, "total_tokens": 43768384}
|
|
{"current_steps": 13905, "total_steps": 78105, "loss": 0.3755, "lr": 4.90788077553075e-06, "epoch": 0.8901478778567313, "percentage": 17.8, "elapsed_time": "0:36:30", "remaining_time": "2:48:32", "throughput": 19990.18, "total_tokens": 43784320}
|
|
{"current_steps": 13910, "total_steps": 78105, "loss": 0.5875, "lr": 4.907730462203642e-06, "epoch": 0.8904679597977082, "percentage": 17.81, "elapsed_time": "0:36:30", "remaining_time": "2:48:31", "throughput": 19991.05, "total_tokens": 43799232}
|
|
{"current_steps": 13915, "total_steps": 78105, "loss": 0.5377, "lr": 4.907580028647016e-06, "epoch": 0.8907880417386851, "percentage": 17.82, "elapsed_time": "0:36:31", "remaining_time": "2:48:29", "throughput": 19992.18, "total_tokens": 43815360}
|
|
{"current_steps": 13920, "total_steps": 78105, "loss": 0.4908, "lr": 4.907429474868384e-06, "epoch": 0.891108123679662, "percentage": 17.82, "elapsed_time": "0:36:32", "remaining_time": "2:48:28", "throughput": 19992.99, "total_tokens": 43830336}
|
|
{"current_steps": 13925, "total_steps": 78105, "loss": 0.484, "lr": 4.9072788008752635e-06, "epoch": 0.8914282056206388, "percentage": 17.83, "elapsed_time": "0:36:32", "remaining_time": "2:48:27", "throughput": 19994.08, "total_tokens": 43846656}
|
|
{"current_steps": 13930, "total_steps": 78105, "loss": 0.4801, "lr": 4.9071280066751794e-06, "epoch": 0.8917482875616157, "percentage": 17.83, "elapsed_time": "0:36:33", "remaining_time": "2:48:26", "throughput": 19995.12, "total_tokens": 43862720}
|
|
{"current_steps": 13935, "total_steps": 78105, "loss": 0.4507, "lr": 4.906977092275661e-06, "epoch": 0.8920683695025927, "percentage": 17.84, "elapsed_time": "0:36:34", "remaining_time": "2:48:24", "throughput": 19995.81, "total_tokens": 43876800}
|
|
{"current_steps": 13940, "total_steps": 78105, "loss": 0.3272, "lr": 4.9068260576842435e-06, "epoch": 0.8923884514435696, "percentage": 17.85, "elapsed_time": "0:36:34", "remaining_time": "2:48:23", "throughput": 19996.74, "total_tokens": 43892160}
|
|
{"current_steps": 13945, "total_steps": 78105, "loss": 0.4564, "lr": 4.90667490290847e-06, "epoch": 0.8927085333845465, "percentage": 17.85, "elapsed_time": "0:36:35", "remaining_time": "2:48:22", "throughput": 19997.83, "total_tokens": 43908416}
|
|
{"current_steps": 13950, "total_steps": 78105, "loss": 0.4872, "lr": 4.906523627955887e-06, "epoch": 0.8930286153255234, "percentage": 17.86, "elapsed_time": "0:36:36", "remaining_time": "2:48:20", "throughput": 19998.71, "total_tokens": 43923712}
|
|
{"current_steps": 13955, "total_steps": 78105, "loss": 0.5561, "lr": 4.90637223283405e-06, "epoch": 0.8933486972665002, "percentage": 17.87, "elapsed_time": "0:36:37", "remaining_time": "2:48:19", "throughput": 19999.68, "total_tokens": 43939520}
|
|
{"current_steps": 13960, "total_steps": 78105, "loss": 0.5394, "lr": 4.9062207175505174e-06, "epoch": 0.8936687792074771, "percentage": 17.87, "elapsed_time": "0:36:37", "remaining_time": "2:48:18", "throughput": 20000.55, "total_tokens": 43954688}
|
|
{"current_steps": 13965, "total_steps": 78105, "loss": 0.4483, "lr": 4.906069082112856e-06, "epoch": 0.893988861148454, "percentage": 17.88, "elapsed_time": "0:36:38", "remaining_time": "2:48:16", "throughput": 20001.41, "total_tokens": 43969600}
|
|
{"current_steps": 13970, "total_steps": 78105, "loss": 0.4856, "lr": 4.905917326528638e-06, "epoch": 0.8943089430894309, "percentage": 17.89, "elapsed_time": "0:36:39", "remaining_time": "2:48:15", "throughput": 20002.43, "total_tokens": 43985472}
|
|
{"current_steps": 13975, "total_steps": 78105, "loss": 0.5622, "lr": 4.905765450805439e-06, "epoch": 0.8946290250304078, "percentage": 17.89, "elapsed_time": "0:36:39", "remaining_time": "2:48:14", "throughput": 20003.37, "total_tokens": 44000768}
|
|
{"current_steps": 13980, "total_steps": 78105, "loss": 0.689, "lr": 4.905613454950846e-06, "epoch": 0.8949491069713846, "percentage": 17.9, "elapsed_time": "0:36:40", "remaining_time": "2:48:12", "throughput": 20004.39, "total_tokens": 44017088}
|
|
{"current_steps": 13985, "total_steps": 78105, "loss": 0.493, "lr": 4.905461338972447e-06, "epoch": 0.8952691889123615, "percentage": 17.91, "elapsed_time": "0:36:41", "remaining_time": "2:48:11", "throughput": 20005.16, "total_tokens": 44031488}
|
|
{"current_steps": 13990, "total_steps": 78105, "loss": 0.4007, "lr": 4.9053091028778375e-06, "epoch": 0.8955892708533385, "percentage": 17.91, "elapsed_time": "0:36:41", "remaining_time": "2:48:10", "throughput": 20006.14, "total_tokens": 44047296}
|
|
{"current_steps": 13995, "total_steps": 78105, "loss": 0.4141, "lr": 4.905156746674622e-06, "epoch": 0.8959093527943154, "percentage": 17.92, "elapsed_time": "0:36:42", "remaining_time": "2:48:08", "throughput": 20007.23, "total_tokens": 44063744}
|
|
{"current_steps": 14000, "total_steps": 78105, "loss": 0.5588, "lr": 4.905004270370405e-06, "epoch": 0.8962294347352923, "percentage": 17.92, "elapsed_time": "0:36:43", "remaining_time": "2:48:11", "throughput": 20001.2, "total_tokens": 44079168}
|
|
{"current_steps": 14005, "total_steps": 78105, "loss": 0.4681, "lr": 4.904851673972803e-06, "epoch": 0.8965495166762691, "percentage": 17.93, "elapsed_time": "0:36:44", "remaining_time": "2:48:09", "throughput": 20001.98, "total_tokens": 44093824}
|
|
{"current_steps": 14010, "total_steps": 78105, "loss": 0.6702, "lr": 4.904698957489434e-06, "epoch": 0.896869598617246, "percentage": 17.94, "elapsed_time": "0:36:45", "remaining_time": "2:48:08", "throughput": 20003.24, "total_tokens": 44111296}
|
|
{"current_steps": 14015, "total_steps": 78105, "loss": 0.5934, "lr": 4.904546120927925e-06, "epoch": 0.8971896805582229, "percentage": 17.94, "elapsed_time": "0:36:45", "remaining_time": "2:48:07", "throughput": 20004.49, "total_tokens": 44128000}
|
|
{"current_steps": 14020, "total_steps": 78105, "loss": 0.438, "lr": 4.904393164295908e-06, "epoch": 0.8975097624991998, "percentage": 17.95, "elapsed_time": "0:36:46", "remaining_time": "2:48:06", "throughput": 20005.42, "total_tokens": 44143488}
|
|
{"current_steps": 14025, "total_steps": 78105, "loss": 0.4309, "lr": 4.90424008760102e-06, "epoch": 0.8978298444401767, "percentage": 17.96, "elapsed_time": "0:36:47", "remaining_time": "2:48:04", "throughput": 20006.41, "total_tokens": 44158976}
|
|
{"current_steps": 14030, "total_steps": 78105, "loss": 0.6214, "lr": 4.904086890850905e-06, "epoch": 0.8981499263811535, "percentage": 17.96, "elapsed_time": "0:36:47", "remaining_time": "2:48:03", "throughput": 20007.66, "total_tokens": 44175808}
|
|
{"current_steps": 14035, "total_steps": 78105, "loss": 0.4426, "lr": 4.903933574053213e-06, "epoch": 0.8984700083221304, "percentage": 17.97, "elapsed_time": "0:36:48", "remaining_time": "2:48:02", "throughput": 20008.45, "total_tokens": 44190336}
|
|
{"current_steps": 14040, "total_steps": 78105, "loss": 0.4952, "lr": 4.9037801372156e-06, "epoch": 0.8987900902631074, "percentage": 17.98, "elapsed_time": "0:36:49", "remaining_time": "2:48:02", "throughput": 20006.55, "total_tokens": 44205696}
|
|
{"current_steps": 14045, "total_steps": 78105, "loss": 0.5449, "lr": 4.903626580345729e-06, "epoch": 0.8991101722040843, "percentage": 17.98, "elapsed_time": "0:36:50", "remaining_time": "2:48:01", "throughput": 20007.65, "total_tokens": 44222016}
|
|
{"current_steps": 14050, "total_steps": 78105, "loss": 0.494, "lr": 4.9034729034512655e-06, "epoch": 0.8994302541450612, "percentage": 17.99, "elapsed_time": "0:36:50", "remaining_time": "2:47:59", "throughput": 20008.79, "total_tokens": 44238592}
|
|
{"current_steps": 14055, "total_steps": 78105, "loss": 0.3467, "lr": 4.903319106539884e-06, "epoch": 0.899750336086038, "percentage": 18.0, "elapsed_time": "0:36:51", "remaining_time": "2:47:58", "throughput": 20009.71, "total_tokens": 44254016}
|
|
{"current_steps": 14060, "total_steps": 78105, "loss": 0.5265, "lr": 4.9031651896192655e-06, "epoch": 0.9000704180270149, "percentage": 18.0, "elapsed_time": "0:36:52", "remaining_time": "2:47:57", "throughput": 20010.68, "total_tokens": 44269376}
|
|
{"current_steps": 14065, "total_steps": 78105, "loss": 0.6117, "lr": 4.903011152697095e-06, "epoch": 0.9003904999679918, "percentage": 18.01, "elapsed_time": "0:36:52", "remaining_time": "2:47:55", "throughput": 20011.55, "total_tokens": 44284672}
|
|
{"current_steps": 14070, "total_steps": 78105, "loss": 0.5628, "lr": 4.902856995781064e-06, "epoch": 0.9007105819089687, "percentage": 18.01, "elapsed_time": "0:36:53", "remaining_time": "2:47:54", "throughput": 20012.55, "total_tokens": 44300224}
|
|
{"current_steps": 14075, "total_steps": 78105, "loss": 0.6203, "lr": 4.902702718878871e-06, "epoch": 0.9010306638499456, "percentage": 18.02, "elapsed_time": "0:36:54", "remaining_time": "2:47:53", "throughput": 20013.45, "total_tokens": 44315264}
|
|
{"current_steps": 14080, "total_steps": 78105, "loss": 0.5193, "lr": 4.902548321998219e-06, "epoch": 0.9013507457909224, "percentage": 18.03, "elapsed_time": "0:36:54", "remaining_time": "2:47:51", "throughput": 20014.34, "total_tokens": 44330176}
|
|
{"current_steps": 14085, "total_steps": 78105, "loss": 0.5402, "lr": 4.902393805146819e-06, "epoch": 0.9016708277318993, "percentage": 18.03, "elapsed_time": "0:36:55", "remaining_time": "2:47:50", "throughput": 20015.2, "total_tokens": 44345152}
|
|
{"current_steps": 14090, "total_steps": 78105, "loss": 0.485, "lr": 4.902239168332386e-06, "epoch": 0.9019909096728762, "percentage": 18.04, "elapsed_time": "0:36:56", "remaining_time": "2:47:49", "throughput": 20016.19, "total_tokens": 44361152}
|
|
{"current_steps": 14095, "total_steps": 78105, "loss": 0.5202, "lr": 4.902084411562641e-06, "epoch": 0.9023109916138532, "percentage": 18.05, "elapsed_time": "0:36:56", "remaining_time": "2:47:47", "throughput": 20017.1, "total_tokens": 44376128}
|
|
{"current_steps": 14100, "total_steps": 78105, "loss": 0.4646, "lr": 4.901929534845313e-06, "epoch": 0.9026310735548301, "percentage": 18.05, "elapsed_time": "0:36:57", "remaining_time": "2:47:46", "throughput": 20018.21, "total_tokens": 44392192}
|
|
{"current_steps": 14105, "total_steps": 78105, "loss": 0.6618, "lr": 4.9017745381881345e-06, "epoch": 0.902951155495807, "percentage": 18.06, "elapsed_time": "0:36:58", "remaining_time": "2:47:45", "throughput": 20019.2, "total_tokens": 44407680}
|
|
{"current_steps": 14110, "total_steps": 78105, "loss": 0.5255, "lr": 4.901619421598847e-06, "epoch": 0.9032712374367838, "percentage": 18.07, "elapsed_time": "0:36:58", "remaining_time": "2:47:43", "throughput": 20020.37, "total_tokens": 44424384}
|
|
{"current_steps": 14115, "total_steps": 78105, "loss": 0.485, "lr": 4.901464185085194e-06, "epoch": 0.9035913193777607, "percentage": 18.07, "elapsed_time": "0:36:59", "remaining_time": "2:47:42", "throughput": 20021.27, "total_tokens": 44439744}
|
|
{"current_steps": 14120, "total_steps": 78105, "loss": 0.515, "lr": 4.9013088286549295e-06, "epoch": 0.9039114013187376, "percentage": 18.08, "elapsed_time": "0:37:00", "remaining_time": "2:47:41", "throughput": 20022.07, "total_tokens": 44454272}
|
|
{"current_steps": 14125, "total_steps": 78105, "loss": 0.4794, "lr": 4.901153352315809e-06, "epoch": 0.9042314832597145, "percentage": 18.08, "elapsed_time": "0:37:00", "remaining_time": "2:47:39", "throughput": 20022.89, "total_tokens": 44468992}
|
|
{"current_steps": 14130, "total_steps": 78105, "loss": 0.4356, "lr": 4.900997756075599e-06, "epoch": 0.9045515652006914, "percentage": 18.09, "elapsed_time": "0:37:01", "remaining_time": "2:47:38", "throughput": 20023.91, "total_tokens": 44484864}
|
|
{"current_steps": 14135, "total_steps": 78105, "loss": 0.4222, "lr": 4.900842039942065e-06, "epoch": 0.9048716471416682, "percentage": 18.1, "elapsed_time": "0:37:02", "remaining_time": "2:47:37", "throughput": 20024.79, "total_tokens": 44499968}
|
|
{"current_steps": 14140, "total_steps": 78105, "loss": 0.5553, "lr": 4.900686203922986e-06, "epoch": 0.9051917290826451, "percentage": 18.1, "elapsed_time": "0:37:02", "remaining_time": "2:47:36", "throughput": 20026.17, "total_tokens": 44517952}
|
|
{"current_steps": 14145, "total_steps": 78105, "loss": 0.399, "lr": 4.900530248026143e-06, "epoch": 0.905511811023622, "percentage": 18.11, "elapsed_time": "0:37:03", "remaining_time": "2:47:34", "throughput": 20027.06, "total_tokens": 44532928}
|
|
{"current_steps": 14150, "total_steps": 78105, "loss": 0.4205, "lr": 4.900374172259324e-06, "epoch": 0.905831892964599, "percentage": 18.12, "elapsed_time": "0:37:04", "remaining_time": "2:47:33", "throughput": 20027.97, "total_tokens": 44548288}
|
|
{"current_steps": 14155, "total_steps": 78105, "loss": 0.4555, "lr": 4.900217976630321e-06, "epoch": 0.9061519749055759, "percentage": 18.12, "elapsed_time": "0:37:04", "remaining_time": "2:47:32", "throughput": 20028.89, "total_tokens": 44563712}
|
|
{"current_steps": 14160, "total_steps": 78105, "loss": 0.4886, "lr": 4.9000616611469344e-06, "epoch": 0.9064720568465527, "percentage": 18.13, "elapsed_time": "0:37:05", "remaining_time": "2:47:30", "throughput": 20029.83, "total_tokens": 44579264}
|
|
{"current_steps": 14165, "total_steps": 78105, "loss": 0.3468, "lr": 4.89990522581697e-06, "epoch": 0.9067921387875296, "percentage": 18.14, "elapsed_time": "0:37:06", "remaining_time": "2:47:29", "throughput": 20030.69, "total_tokens": 44594176}
|
|
{"current_steps": 14170, "total_steps": 78105, "loss": 0.516, "lr": 4.89974867064824e-06, "epoch": 0.9071122207285065, "percentage": 18.14, "elapsed_time": "0:37:06", "remaining_time": "2:47:28", "throughput": 20031.65, "total_tokens": 44609664}
|
|
{"current_steps": 14175, "total_steps": 78105, "loss": 0.5396, "lr": 4.899591995648561e-06, "epoch": 0.9074323026694834, "percentage": 18.15, "elapsed_time": "0:37:07", "remaining_time": "2:47:26", "throughput": 20032.93, "total_tokens": 44626944}
|
|
{"current_steps": 14180, "total_steps": 78105, "loss": 0.3364, "lr": 4.899435200825756e-06, "epoch": 0.9077523846104603, "percentage": 18.16, "elapsed_time": "0:37:08", "remaining_time": "2:47:25", "throughput": 20034.08, "total_tokens": 44643520}
|
|
{"current_steps": 14185, "total_steps": 78105, "loss": 0.5203, "lr": 4.899278286187656e-06, "epoch": 0.9080724665514371, "percentage": 18.16, "elapsed_time": "0:37:09", "remaining_time": "2:47:24", "throughput": 20034.81, "total_tokens": 44657984}
|
|
{"current_steps": 14190, "total_steps": 78105, "loss": 0.5656, "lr": 4.899121251742095e-06, "epoch": 0.908392548492414, "percentage": 18.17, "elapsed_time": "0:37:09", "remaining_time": "2:47:22", "throughput": 20035.7, "total_tokens": 44673024}
|
|
{"current_steps": 14195, "total_steps": 78105, "loss": 0.5394, "lr": 4.898964097496917e-06, "epoch": 0.9087126304333909, "percentage": 18.17, "elapsed_time": "0:37:10", "remaining_time": "2:47:21", "throughput": 20036.39, "total_tokens": 44687424}
|
|
{"current_steps": 14200, "total_steps": 78105, "loss": 0.4064, "lr": 4.898806823459966e-06, "epoch": 0.9090327123743679, "percentage": 18.18, "elapsed_time": "0:37:10", "remaining_time": "2:47:20", "throughput": 20037.35, "total_tokens": 44702976}
|
|
{"current_steps": 14205, "total_steps": 78105, "loss": 0.5029, "lr": 4.898649429639097e-06, "epoch": 0.9093527943153448, "percentage": 18.19, "elapsed_time": "0:37:11", "remaining_time": "2:47:18", "throughput": 20038.26, "total_tokens": 44718144}
|
|
{"current_steps": 14210, "total_steps": 78105, "loss": 0.4381, "lr": 4.898491916042171e-06, "epoch": 0.9096728762563216, "percentage": 18.19, "elapsed_time": "0:37:12", "remaining_time": "2:47:17", "throughput": 20039.47, "total_tokens": 44734912}
|
|
{"current_steps": 14215, "total_steps": 78105, "loss": 0.4233, "lr": 4.898334282677051e-06, "epoch": 0.9099929581972985, "percentage": 18.2, "elapsed_time": "0:37:12", "remaining_time": "2:47:16", "throughput": 20040.34, "total_tokens": 44749888}
|
|
{"current_steps": 14220, "total_steps": 78105, "loss": 0.5408, "lr": 4.898176529551609e-06, "epoch": 0.9103130401382754, "percentage": 18.21, "elapsed_time": "0:37:13", "remaining_time": "2:47:14", "throughput": 20041.1, "total_tokens": 44764544}
|
|
{"current_steps": 14225, "total_steps": 78105, "loss": 0.3792, "lr": 4.898018656673724e-06, "epoch": 0.9106331220792523, "percentage": 18.21, "elapsed_time": "0:37:14", "remaining_time": "2:47:13", "throughput": 20042.2, "total_tokens": 44780992}
|
|
{"current_steps": 14230, "total_steps": 78105, "loss": 0.589, "lr": 4.8978606640512774e-06, "epoch": 0.9109532040202292, "percentage": 18.22, "elapsed_time": "0:37:15", "remaining_time": "2:47:12", "throughput": 20043.1, "total_tokens": 44796672}
|
|
{"current_steps": 14235, "total_steps": 78105, "loss": 0.5243, "lr": 4.897702551692159e-06, "epoch": 0.911273285961206, "percentage": 18.23, "elapsed_time": "0:37:15", "remaining_time": "2:47:11", "throughput": 20044.06, "total_tokens": 44812224}
|
|
{"current_steps": 14240, "total_steps": 78105, "loss": 0.4864, "lr": 4.897544319604265e-06, "epoch": 0.9115933679021829, "percentage": 18.23, "elapsed_time": "0:37:16", "remaining_time": "2:47:09", "throughput": 20044.89, "total_tokens": 44827136}
|
|
{"current_steps": 14245, "total_steps": 78105, "loss": 0.5151, "lr": 4.897385967795496e-06, "epoch": 0.9119134498431598, "percentage": 18.24, "elapsed_time": "0:37:16", "remaining_time": "2:47:08", "throughput": 20045.72, "total_tokens": 44842176}
|
|
{"current_steps": 14250, "total_steps": 78105, "loss": 0.6346, "lr": 4.897227496273759e-06, "epoch": 0.9122335317841367, "percentage": 18.24, "elapsed_time": "0:37:17", "remaining_time": "2:47:07", "throughput": 20046.8, "total_tokens": 44858880}
|
|
{"current_steps": 14255, "total_steps": 78105, "loss": 0.562, "lr": 4.897068905046967e-06, "epoch": 0.9125536137251137, "percentage": 18.25, "elapsed_time": "0:37:18", "remaining_time": "2:47:06", "throughput": 20047.96, "total_tokens": 44875328}
|
|
{"current_steps": 14260, "total_steps": 78105, "loss": 0.4998, "lr": 4.896910194123041e-06, "epoch": 0.9128736956660906, "percentage": 18.26, "elapsed_time": "0:37:19", "remaining_time": "2:47:04", "throughput": 20049.15, "total_tokens": 44892032}
|
|
{"current_steps": 14265, "total_steps": 78105, "loss": 0.4398, "lr": 4.896751363509904e-06, "epoch": 0.9131937776070674, "percentage": 18.26, "elapsed_time": "0:37:19", "remaining_time": "2:47:03", "throughput": 20050.04, "total_tokens": 44907328}
|
|
{"current_steps": 14270, "total_steps": 78105, "loss": 0.377, "lr": 4.896592413215489e-06, "epoch": 0.9135138595480443, "percentage": 18.27, "elapsed_time": "0:37:20", "remaining_time": "2:47:02", "throughput": 20050.9, "total_tokens": 44922560}
|
|
{"current_steps": 14275, "total_steps": 78105, "loss": 0.5012, "lr": 4.8964333432477315e-06, "epoch": 0.9138339414890212, "percentage": 18.28, "elapsed_time": "0:37:21", "remaining_time": "2:47:00", "throughput": 20051.73, "total_tokens": 44937728}
|
|
{"current_steps": 14280, "total_steps": 78105, "loss": 0.5723, "lr": 4.8962741536145755e-06, "epoch": 0.9141540234299981, "percentage": 18.28, "elapsed_time": "0:37:21", "remaining_time": "2:46:59", "throughput": 20052.83, "total_tokens": 44954560}
|
|
{"current_steps": 14285, "total_steps": 78105, "loss": 0.5088, "lr": 4.89611484432397e-06, "epoch": 0.914474105370975, "percentage": 18.29, "elapsed_time": "0:37:22", "remaining_time": "2:46:58", "throughput": 20054.0, "total_tokens": 44971520}
|
|
{"current_steps": 14290, "total_steps": 78105, "loss": 0.6032, "lr": 4.89595541538387e-06, "epoch": 0.9147941873119518, "percentage": 18.3, "elapsed_time": "0:37:23", "remaining_time": "2:46:57", "throughput": 20054.97, "total_tokens": 44987392}
|
|
{"current_steps": 14295, "total_steps": 78105, "loss": 0.4007, "lr": 4.8957958668022374e-06, "epoch": 0.9151142692529287, "percentage": 18.3, "elapsed_time": "0:37:23", "remaining_time": "2:46:56", "throughput": 20055.89, "total_tokens": 45002688}
|
|
{"current_steps": 14300, "total_steps": 78105, "loss": 0.4718, "lr": 4.8956361985870374e-06, "epoch": 0.9154343511939056, "percentage": 18.31, "elapsed_time": "0:37:24", "remaining_time": "2:46:54", "throughput": 20056.74, "total_tokens": 45017792}
|
|
{"current_steps": 14305, "total_steps": 78105, "loss": 0.5366, "lr": 4.895476410746245e-06, "epoch": 0.9157544331348826, "percentage": 18.32, "elapsed_time": "0:37:25", "remaining_time": "2:46:53", "throughput": 20057.52, "total_tokens": 45032640}
|
|
{"current_steps": 14310, "total_steps": 78105, "loss": 0.4542, "lr": 4.895316503287837e-06, "epoch": 0.9160745150758595, "percentage": 18.32, "elapsed_time": "0:37:25", "remaining_time": "2:46:52", "throughput": 20058.5, "total_tokens": 45048256}
|
|
{"current_steps": 14315, "total_steps": 78105, "loss": 0.536, "lr": 4.8951564762198e-06, "epoch": 0.9163945970168363, "percentage": 18.33, "elapsed_time": "0:37:26", "remaining_time": "2:46:50", "throughput": 20059.53, "total_tokens": 45064192}
|
|
{"current_steps": 14320, "total_steps": 78105, "loss": 0.447, "lr": 4.894996329550125e-06, "epoch": 0.9167146789578132, "percentage": 18.33, "elapsed_time": "0:37:27", "remaining_time": "2:46:49", "throughput": 20060.35, "total_tokens": 45079488}
|
|
{"current_steps": 14325, "total_steps": 78105, "loss": 0.5554, "lr": 4.894836063286809e-06, "epoch": 0.9170347608987901, "percentage": 18.34, "elapsed_time": "0:37:27", "remaining_time": "2:46:48", "throughput": 20061.32, "total_tokens": 45095616}
|
|
{"current_steps": 14330, "total_steps": 78105, "loss": 0.5817, "lr": 4.894675677437853e-06, "epoch": 0.917354842839767, "percentage": 18.35, "elapsed_time": "0:37:28", "remaining_time": "2:46:47", "throughput": 20062.18, "total_tokens": 45110592}
|
|
{"current_steps": 14335, "total_steps": 78105, "loss": 0.5129, "lr": 4.894515172011268e-06, "epoch": 0.9176749247807439, "percentage": 18.35, "elapsed_time": "0:37:29", "remaining_time": "2:46:45", "throughput": 20063.26, "total_tokens": 45127168}
|
|
{"current_steps": 14340, "total_steps": 78105, "loss": 0.4606, "lr": 4.894354547015067e-06, "epoch": 0.9179950067217207, "percentage": 18.36, "elapsed_time": "0:37:29", "remaining_time": "2:46:44", "throughput": 20064.11, "total_tokens": 45142208}
|
|
{"current_steps": 14345, "total_steps": 78105, "loss": 0.5549, "lr": 4.894193802457272e-06, "epoch": 0.9183150886626976, "percentage": 18.37, "elapsed_time": "0:37:30", "remaining_time": "2:46:43", "throughput": 20064.95, "total_tokens": 45157184}
|
|
{"current_steps": 14350, "total_steps": 78105, "loss": 0.5885, "lr": 4.8940329383459095e-06, "epoch": 0.9186351706036745, "percentage": 18.37, "elapsed_time": "0:37:31", "remaining_time": "2:46:42", "throughput": 20066.24, "total_tokens": 45174464}
|
|
{"current_steps": 14355, "total_steps": 78105, "loss": 0.4462, "lr": 4.8938719546890126e-06, "epoch": 0.9189552525446514, "percentage": 18.38, "elapsed_time": "0:37:31", "remaining_time": "2:46:40", "throughput": 20066.96, "total_tokens": 45188992}
|
|
{"current_steps": 14360, "total_steps": 78105, "loss": 0.6097, "lr": 4.893710851494619e-06, "epoch": 0.9192753344856284, "percentage": 18.39, "elapsed_time": "0:37:32", "remaining_time": "2:46:39", "throughput": 20067.77, "total_tokens": 45204032}
|
|
{"current_steps": 14365, "total_steps": 78105, "loss": 0.6409, "lr": 4.8935496287707736e-06, "epoch": 0.9195954164266052, "percentage": 18.39, "elapsed_time": "0:37:33", "remaining_time": "2:46:38", "throughput": 20068.59, "total_tokens": 45219328}
|
|
{"current_steps": 14370, "total_steps": 78105, "loss": 0.5678, "lr": 4.893388286525528e-06, "epoch": 0.9199154983675821, "percentage": 18.4, "elapsed_time": "0:37:33", "remaining_time": "2:46:36", "throughput": 20069.66, "total_tokens": 45235584}
|
|
{"current_steps": 14375, "total_steps": 78105, "loss": 0.4068, "lr": 4.8932268247669366e-06, "epoch": 0.920235580308559, "percentage": 18.4, "elapsed_time": "0:37:34", "remaining_time": "2:46:35", "throughput": 20070.53, "total_tokens": 45250880}
|
|
{"current_steps": 14380, "total_steps": 78105, "loss": 0.549, "lr": 4.893065243503065e-06, "epoch": 0.9205556622495359, "percentage": 18.41, "elapsed_time": "0:37:35", "remaining_time": "2:46:34", "throughput": 20071.52, "total_tokens": 45266752}
|
|
{"current_steps": 14385, "total_steps": 78105, "loss": 0.4711, "lr": 4.892903542741979e-06, "epoch": 0.9208757441905128, "percentage": 18.42, "elapsed_time": "0:37:35", "remaining_time": "2:46:32", "throughput": 20072.47, "total_tokens": 45282496}
|
|
{"current_steps": 14390, "total_steps": 78105, "loss": 0.4715, "lr": 4.892741722491755e-06, "epoch": 0.9211958261314896, "percentage": 18.42, "elapsed_time": "0:37:36", "remaining_time": "2:46:31", "throughput": 20073.18, "total_tokens": 45297024}
|
|
{"current_steps": 14395, "total_steps": 78105, "loss": 0.4262, "lr": 4.892579782760472e-06, "epoch": 0.9215159080724665, "percentage": 18.43, "elapsed_time": "0:37:37", "remaining_time": "2:46:30", "throughput": 20074.36, "total_tokens": 45314176}
|
|
{"current_steps": 14400, "total_steps": 78105, "loss": 0.4715, "lr": 4.8924177235562186e-06, "epoch": 0.9218359900134434, "percentage": 18.44, "elapsed_time": "0:37:37", "remaining_time": "2:46:29", "throughput": 20075.19, "total_tokens": 45329344}
|
|
{"current_steps": 14405, "total_steps": 78105, "loss": 0.4856, "lr": 4.892255544887084e-06, "epoch": 0.9221560719544203, "percentage": 18.44, "elapsed_time": "0:37:38", "remaining_time": "2:46:27", "throughput": 20076.07, "total_tokens": 45344384}
|
|
{"current_steps": 14410, "total_steps": 78105, "loss": 0.4845, "lr": 4.8920932467611705e-06, "epoch": 0.9224761538953972, "percentage": 18.45, "elapsed_time": "0:37:39", "remaining_time": "2:46:26", "throughput": 20076.97, "total_tokens": 45360192}
|
|
{"current_steps": 14415, "total_steps": 78105, "loss": 0.455, "lr": 4.891930829186579e-06, "epoch": 0.9227962358363742, "percentage": 18.46, "elapsed_time": "0:37:39", "remaining_time": "2:46:25", "throughput": 20077.9, "total_tokens": 45376000}
|
|
{"current_steps": 14420, "total_steps": 78105, "loss": 0.6976, "lr": 4.89176829217142e-06, "epoch": 0.923116317777351, "percentage": 18.46, "elapsed_time": "0:37:40", "remaining_time": "2:46:23", "throughput": 20078.61, "total_tokens": 45390400}
|
|
{"current_steps": 14425, "total_steps": 78105, "loss": 0.597, "lr": 4.891605635723812e-06, "epoch": 0.9234363997183279, "percentage": 18.47, "elapsed_time": "0:37:41", "remaining_time": "2:46:22", "throughput": 20079.68, "total_tokens": 45406976}
|
|
{"current_steps": 14430, "total_steps": 78105, "loss": 0.4891, "lr": 4.891442859851876e-06, "epoch": 0.9237564816593048, "percentage": 18.48, "elapsed_time": "0:37:41", "remaining_time": "2:46:21", "throughput": 20080.52, "total_tokens": 45422080}
|
|
{"current_steps": 14435, "total_steps": 78105, "loss": 0.4595, "lr": 4.891279964563739e-06, "epoch": 0.9240765636002817, "percentage": 18.48, "elapsed_time": "0:37:42", "remaining_time": "2:46:20", "throughput": 20081.77, "total_tokens": 45439296}
|
|
{"current_steps": 14440, "total_steps": 78105, "loss": 0.3903, "lr": 4.891116949867537e-06, "epoch": 0.9243966455412586, "percentage": 18.49, "elapsed_time": "0:37:43", "remaining_time": "2:46:19", "throughput": 20082.62, "total_tokens": 45454208}
|
|
{"current_steps": 14445, "total_steps": 78105, "loss": 0.4345, "lr": 4.89095381577141e-06, "epoch": 0.9247167274822354, "percentage": 18.49, "elapsed_time": "0:37:44", "remaining_time": "2:46:17", "throughput": 20083.63, "total_tokens": 45470400}
|
|
{"current_steps": 14450, "total_steps": 78105, "loss": 0.5197, "lr": 4.890790562283503e-06, "epoch": 0.9250368094232123, "percentage": 18.5, "elapsed_time": "0:37:44", "remaining_time": "2:46:16", "throughput": 20084.6, "total_tokens": 45486528}
|
|
{"current_steps": 14455, "total_steps": 78105, "loss": 0.4466, "lr": 4.8906271894119685e-06, "epoch": 0.9253568913641892, "percentage": 18.51, "elapsed_time": "0:37:45", "remaining_time": "2:46:15", "throughput": 20085.44, "total_tokens": 45501440}
|
|
{"current_steps": 14460, "total_steps": 78105, "loss": 0.4898, "lr": 4.890463697164965e-06, "epoch": 0.9256769733051661, "percentage": 18.51, "elapsed_time": "0:37:46", "remaining_time": "2:46:14", "throughput": 20086.49, "total_tokens": 45517760}
|
|
{"current_steps": 14465, "total_steps": 78105, "loss": 0.4846, "lr": 4.890300085550654e-06, "epoch": 0.9259970552461431, "percentage": 18.52, "elapsed_time": "0:37:46", "remaining_time": "2:46:12", "throughput": 20087.31, "total_tokens": 45533056}
|
|
{"current_steps": 14470, "total_steps": 78105, "loss": 0.477, "lr": 4.890136354577209e-06, "epoch": 0.9263171371871199, "percentage": 18.53, "elapsed_time": "0:37:47", "remaining_time": "2:46:11", "throughput": 20088.3, "total_tokens": 45549248}
|
|
{"current_steps": 14475, "total_steps": 78105, "loss": 0.3856, "lr": 4.889972504252804e-06, "epoch": 0.9266372191280968, "percentage": 18.53, "elapsed_time": "0:37:48", "remaining_time": "2:46:10", "throughput": 20089.44, "total_tokens": 45565760}
|
|
{"current_steps": 14480, "total_steps": 78105, "loss": 0.519, "lr": 4.889808534585621e-06, "epoch": 0.9269573010690737, "percentage": 18.54, "elapsed_time": "0:37:48", "remaining_time": "2:46:09", "throughput": 20090.37, "total_tokens": 45581568}
|
|
{"current_steps": 14485, "total_steps": 78105, "loss": 0.6125, "lr": 4.889644445583848e-06, "epoch": 0.9272773830100506, "percentage": 18.55, "elapsed_time": "0:37:49", "remaining_time": "2:46:07", "throughput": 20091.28, "total_tokens": 45596928}
|
|
{"current_steps": 14490, "total_steps": 78105, "loss": 0.486, "lr": 4.889480237255678e-06, "epoch": 0.9275974649510275, "percentage": 18.55, "elapsed_time": "0:37:50", "remaining_time": "2:46:06", "throughput": 20092.25, "total_tokens": 45612800}
|
|
{"current_steps": 14495, "total_steps": 78105, "loss": 0.5381, "lr": 4.889315909609311e-06, "epoch": 0.9279175468920043, "percentage": 18.56, "elapsed_time": "0:37:50", "remaining_time": "2:46:05", "throughput": 20093.06, "total_tokens": 45627584}
|
|
{"current_steps": 14500, "total_steps": 78105, "loss": 0.4355, "lr": 4.889151462652955e-06, "epoch": 0.9282376288329812, "percentage": 18.56, "elapsed_time": "0:37:51", "remaining_time": "2:46:04", "throughput": 20094.06, "total_tokens": 45643840}
|
|
{"current_steps": 14505, "total_steps": 78105, "loss": 0.4318, "lr": 4.888986896394817e-06, "epoch": 0.9285577107739581, "percentage": 18.57, "elapsed_time": "0:37:52", "remaining_time": "2:46:02", "throughput": 20094.94, "total_tokens": 45659072}
|
|
{"current_steps": 14510, "total_steps": 78105, "loss": 0.7709, "lr": 4.888822210843119e-06, "epoch": 0.928877792714935, "percentage": 18.58, "elapsed_time": "0:37:52", "remaining_time": "2:46:01", "throughput": 20095.99, "total_tokens": 45675328}
|
|
{"current_steps": 14515, "total_steps": 78105, "loss": 0.5242, "lr": 4.888657406006082e-06, "epoch": 0.9291978746559119, "percentage": 18.58, "elapsed_time": "0:37:53", "remaining_time": "2:46:00", "throughput": 20096.88, "total_tokens": 45690816}
|
|
{"current_steps": 14520, "total_steps": 78105, "loss": 0.3906, "lr": 4.888492481891937e-06, "epoch": 0.9295179565968889, "percentage": 18.59, "elapsed_time": "0:37:54", "remaining_time": "2:45:59", "throughput": 20097.75, "total_tokens": 45706432}
|
|
{"current_steps": 14525, "total_steps": 78105, "loss": 0.4153, "lr": 4.888327438508918e-06, "epoch": 0.9298380385378657, "percentage": 18.6, "elapsed_time": "0:37:54", "remaining_time": "2:45:57", "throughput": 20098.65, "total_tokens": 45721920}
|
|
{"current_steps": 14530, "total_steps": 78105, "loss": 0.402, "lr": 4.888162275865267e-06, "epoch": 0.9301581204788426, "percentage": 18.6, "elapsed_time": "0:37:55", "remaining_time": "2:45:56", "throughput": 20099.66, "total_tokens": 45738048}
|
|
{"current_steps": 14535, "total_steps": 78105, "loss": 0.3587, "lr": 4.887996993969231e-06, "epoch": 0.9304782024198195, "percentage": 18.61, "elapsed_time": "0:37:56", "remaining_time": "2:45:55", "throughput": 20100.63, "total_tokens": 45753856}
|
|
{"current_steps": 14540, "total_steps": 78105, "loss": 0.5039, "lr": 4.887831592829064e-06, "epoch": 0.9307982843607964, "percentage": 18.62, "elapsed_time": "0:37:56", "remaining_time": "2:45:53", "throughput": 20101.43, "total_tokens": 45768704}
|
|
{"current_steps": 14545, "total_steps": 78105, "loss": 0.3943, "lr": 4.887666072453026e-06, "epoch": 0.9311183663017732, "percentage": 18.62, "elapsed_time": "0:37:57", "remaining_time": "2:45:52", "throughput": 20102.32, "total_tokens": 45783936}
|
|
{"current_steps": 14550, "total_steps": 78105, "loss": 0.6048, "lr": 4.887500432849379e-06, "epoch": 0.9314384482427501, "percentage": 18.63, "elapsed_time": "0:37:58", "remaining_time": "2:45:51", "throughput": 20103.34, "total_tokens": 45800320}
|
|
{"current_steps": 14555, "total_steps": 78105, "loss": 0.4135, "lr": 4.887334674026399e-06, "epoch": 0.931758530183727, "percentage": 18.64, "elapsed_time": "0:37:58", "remaining_time": "2:45:50", "throughput": 20104.35, "total_tokens": 45816512}
|
|
{"current_steps": 14560, "total_steps": 78105, "loss": 0.4462, "lr": 4.887168795992359e-06, "epoch": 0.9320786121247039, "percentage": 18.64, "elapsed_time": "0:37:59", "remaining_time": "2:45:48", "throughput": 20105.25, "total_tokens": 45831936}
|
|
{"current_steps": 14565, "total_steps": 78105, "loss": 0.503, "lr": 4.887002798755544e-06, "epoch": 0.9323986940656808, "percentage": 18.65, "elapsed_time": "0:38:00", "remaining_time": "2:45:47", "throughput": 20106.01, "total_tokens": 45846784}
|
|
{"current_steps": 14570, "total_steps": 78105, "loss": 0.5613, "lr": 4.886836682324243e-06, "epoch": 0.9327187760066578, "percentage": 18.65, "elapsed_time": "0:38:00", "remaining_time": "2:45:46", "throughput": 20106.84, "total_tokens": 45862080}
|
|
{"current_steps": 14575, "total_steps": 78105, "loss": 0.5538, "lr": 4.886670446706751e-06, "epoch": 0.9330388579476346, "percentage": 18.66, "elapsed_time": "0:38:01", "remaining_time": "2:45:45", "throughput": 20107.83, "total_tokens": 45878528}
|
|
{"current_steps": 14580, "total_steps": 78105, "loss": 0.5014, "lr": 4.886504091911367e-06, "epoch": 0.9333589398886115, "percentage": 18.67, "elapsed_time": "0:38:02", "remaining_time": "2:45:43", "throughput": 20108.73, "total_tokens": 45894016}
|
|
{"current_steps": 14585, "total_steps": 78105, "loss": 0.5506, "lr": 4.886337617946401e-06, "epoch": 0.9336790218295884, "percentage": 18.67, "elapsed_time": "0:38:02", "remaining_time": "2:45:42", "throughput": 20109.56, "total_tokens": 45909504}
|
|
{"current_steps": 14590, "total_steps": 78105, "loss": 0.5049, "lr": 4.886171024820163e-06, "epoch": 0.9339991037705653, "percentage": 18.68, "elapsed_time": "0:38:03", "remaining_time": "2:45:41", "throughput": 20110.55, "total_tokens": 45925376}
|
|
{"current_steps": 14595, "total_steps": 78105, "loss": 0.6034, "lr": 4.886004312540974e-06, "epoch": 0.9343191857115422, "percentage": 18.69, "elapsed_time": "0:38:04", "remaining_time": "2:45:40", "throughput": 20111.27, "total_tokens": 45940224}
|
|
{"current_steps": 14600, "total_steps": 78105, "loss": 0.4263, "lr": 4.885837481117158e-06, "epoch": 0.934639267652519, "percentage": 18.69, "elapsed_time": "0:38:04", "remaining_time": "2:45:38", "throughput": 20112.05, "total_tokens": 45955072}
|
|
{"current_steps": 14605, "total_steps": 78105, "loss": 0.4194, "lr": 4.885670530557046e-06, "epoch": 0.9349593495934959, "percentage": 18.7, "elapsed_time": "0:38:05", "remaining_time": "2:45:37", "throughput": 20112.89, "total_tokens": 45970240}
|
|
{"current_steps": 14610, "total_steps": 78105, "loss": 0.8094, "lr": 4.885503460868973e-06, "epoch": 0.9352794315344728, "percentage": 18.71, "elapsed_time": "0:38:06", "remaining_time": "2:45:36", "throughput": 20113.79, "total_tokens": 45985856}
|
|
{"current_steps": 14615, "total_steps": 78105, "loss": 0.4448, "lr": 4.8853362720612844e-06, "epoch": 0.9355995134754497, "percentage": 18.71, "elapsed_time": "0:38:06", "remaining_time": "2:45:34", "throughput": 20114.5, "total_tokens": 46000256}
|
|
{"current_steps": 14620, "total_steps": 78105, "loss": 0.4281, "lr": 4.885168964142326e-06, "epoch": 0.9359195954164266, "percentage": 18.72, "elapsed_time": "0:38:07", "remaining_time": "2:45:33", "throughput": 20115.37, "total_tokens": 46015616}
|
|
{"current_steps": 14625, "total_steps": 78105, "loss": 0.4017, "lr": 4.885001537120454e-06, "epoch": 0.9362396773574035, "percentage": 18.72, "elapsed_time": "0:38:08", "remaining_time": "2:45:32", "throughput": 20116.2, "total_tokens": 46031040}
|
|
{"current_steps": 14630, "total_steps": 78105, "loss": 0.4725, "lr": 4.884833991004027e-06, "epoch": 0.9365597592983804, "percentage": 18.73, "elapsed_time": "0:38:08", "remaining_time": "2:45:30", "throughput": 20117.07, "total_tokens": 46046016}
|
|
{"current_steps": 14635, "total_steps": 78105, "loss": 0.4231, "lr": 4.884666325801414e-06, "epoch": 0.9368798412393573, "percentage": 18.74, "elapsed_time": "0:38:09", "remaining_time": "2:45:29", "throughput": 20118.15, "total_tokens": 46062528}
|
|
{"current_steps": 14640, "total_steps": 78105, "loss": 0.4505, "lr": 4.884498541520985e-06, "epoch": 0.9371999231803342, "percentage": 18.74, "elapsed_time": "0:38:10", "remaining_time": "2:45:28", "throughput": 20119.1, "total_tokens": 46078144}
|
|
{"current_steps": 14645, "total_steps": 78105, "loss": 0.6019, "lr": 4.88433063817112e-06, "epoch": 0.9375200051213111, "percentage": 18.75, "elapsed_time": "0:38:11", "remaining_time": "2:45:27", "throughput": 20120.55, "total_tokens": 46096896}
|
|
{"current_steps": 14650, "total_steps": 78105, "loss": 0.4503, "lr": 4.884162615760202e-06, "epoch": 0.9378400870622879, "percentage": 18.76, "elapsed_time": "0:38:11", "remaining_time": "2:45:26", "throughput": 20121.4, "total_tokens": 46111808}
|
|
{"current_steps": 14655, "total_steps": 78105, "loss": 0.4946, "lr": 4.883994474296622e-06, "epoch": 0.9381601690032648, "percentage": 18.76, "elapsed_time": "0:38:12", "remaining_time": "2:45:24", "throughput": 20122.42, "total_tokens": 46127936}
|
|
{"current_steps": 14660, "total_steps": 78105, "loss": 0.4874, "lr": 4.883826213788775e-06, "epoch": 0.9384802509442417, "percentage": 18.77, "elapsed_time": "0:38:13", "remaining_time": "2:45:23", "throughput": 20123.21, "total_tokens": 46142848}
|
|
{"current_steps": 14665, "total_steps": 78105, "loss": 0.7442, "lr": 4.883657834245065e-06, "epoch": 0.9388003328852186, "percentage": 18.78, "elapsed_time": "0:38:13", "remaining_time": "2:45:22", "throughput": 20124.11, "total_tokens": 46158848}
|
|
{"current_steps": 14670, "total_steps": 78105, "loss": 0.5611, "lr": 4.883489335673898e-06, "epoch": 0.9391204148261955, "percentage": 18.78, "elapsed_time": "0:38:14", "remaining_time": "2:45:21", "throughput": 20125.05, "total_tokens": 46174912}
|
|
{"current_steps": 14675, "total_steps": 78105, "loss": 0.4503, "lr": 4.883320718083688e-06, "epoch": 0.9394404967671725, "percentage": 18.79, "elapsed_time": "0:38:15", "remaining_time": "2:45:19", "throughput": 20125.74, "total_tokens": 46189248}
|
|
{"current_steps": 14680, "total_steps": 78105, "loss": 0.4028, "lr": 4.8831519814828565e-06, "epoch": 0.9397605787081493, "percentage": 18.8, "elapsed_time": "0:38:15", "remaining_time": "2:45:18", "throughput": 20126.7, "total_tokens": 46204928}
|
|
{"current_steps": 14685, "total_steps": 78105, "loss": 0.4413, "lr": 4.8829831258798285e-06, "epoch": 0.9400806606491262, "percentage": 18.8, "elapsed_time": "0:38:16", "remaining_time": "2:45:17", "throughput": 20127.5, "total_tokens": 46220160}
|
|
{"current_steps": 14690, "total_steps": 78105, "loss": 0.5157, "lr": 4.8828141512830355e-06, "epoch": 0.9404007425901031, "percentage": 18.81, "elapsed_time": "0:38:17", "remaining_time": "2:45:16", "throughput": 20128.43, "total_tokens": 46235584}
|
|
{"current_steps": 14695, "total_steps": 78105, "loss": 0.3311, "lr": 4.8826450577009154e-06, "epoch": 0.94072082453108, "percentage": 18.81, "elapsed_time": "0:38:17", "remaining_time": "2:45:14", "throughput": 20129.41, "total_tokens": 46251904}
|
|
{"current_steps": 14700, "total_steps": 78105, "loss": 0.498, "lr": 4.882475845141912e-06, "epoch": 0.9410409064720568, "percentage": 18.82, "elapsed_time": "0:38:18", "remaining_time": "2:45:13", "throughput": 20130.33, "total_tokens": 46268032}
|
|
{"current_steps": 14705, "total_steps": 78105, "loss": 0.4591, "lr": 4.882306513614474e-06, "epoch": 0.9413609884130337, "percentage": 18.83, "elapsed_time": "0:38:19", "remaining_time": "2:45:12", "throughput": 20131.19, "total_tokens": 46283392}
|
|
{"current_steps": 14710, "total_steps": 78105, "loss": 0.4144, "lr": 4.882137063127059e-06, "epoch": 0.9416810703540106, "percentage": 18.83, "elapsed_time": "0:38:19", "remaining_time": "2:45:11", "throughput": 20132.01, "total_tokens": 46298752}
|
|
{"current_steps": 14715, "total_steps": 78105, "loss": 0.3803, "lr": 4.881967493688126e-06, "epoch": 0.9420011522949875, "percentage": 18.84, "elapsed_time": "0:38:20", "remaining_time": "2:45:09", "throughput": 20132.96, "total_tokens": 46314624}
|
|
{"current_steps": 14720, "total_steps": 78105, "loss": 0.4852, "lr": 4.881797805306144e-06, "epoch": 0.9423212342359644, "percentage": 18.85, "elapsed_time": "0:38:21", "remaining_time": "2:45:08", "throughput": 20134.1, "total_tokens": 46331712}
|
|
{"current_steps": 14725, "total_steps": 78105, "loss": 0.5661, "lr": 4.881627997989587e-06, "epoch": 0.9426413161769412, "percentage": 18.85, "elapsed_time": "0:38:21", "remaining_time": "2:45:07", "throughput": 20134.92, "total_tokens": 46347200}
|
|
{"current_steps": 14730, "total_steps": 78105, "loss": 0.4971, "lr": 4.881458071746932e-06, "epoch": 0.9429613981179182, "percentage": 18.86, "elapsed_time": "0:38:22", "remaining_time": "2:45:06", "throughput": 20135.92, "total_tokens": 46363008}
|
|
{"current_steps": 14735, "total_steps": 78105, "loss": 0.4977, "lr": 4.881288026586665e-06, "epoch": 0.9432814800588951, "percentage": 18.87, "elapsed_time": "0:38:23", "remaining_time": "2:45:05", "throughput": 20136.85, "total_tokens": 46378816}
|
|
{"current_steps": 14740, "total_steps": 78105, "loss": 0.3572, "lr": 4.881117862517278e-06, "epoch": 0.943601561999872, "percentage": 18.87, "elapsed_time": "0:38:23", "remaining_time": "2:45:04", "throughput": 20137.81, "total_tokens": 46395200}
|
|
{"current_steps": 14745, "total_steps": 78105, "loss": 0.4358, "lr": 4.880947579547268e-06, "epoch": 0.9439216439408489, "percentage": 18.88, "elapsed_time": "0:38:24", "remaining_time": "2:45:02", "throughput": 20138.87, "total_tokens": 46411840}
|
|
{"current_steps": 14750, "total_steps": 78105, "loss": 0.6732, "lr": 4.880777177685138e-06, "epoch": 0.9442417258818258, "percentage": 18.88, "elapsed_time": "0:38:25", "remaining_time": "2:45:01", "throughput": 20139.74, "total_tokens": 46427776}
|
|
{"current_steps": 14755, "total_steps": 78105, "loss": 0.58, "lr": 4.880606656939397e-06, "epoch": 0.9445618078228026, "percentage": 18.89, "elapsed_time": "0:38:25", "remaining_time": "2:45:00", "throughput": 20140.88, "total_tokens": 46444736}
|
|
{"current_steps": 14760, "total_steps": 78105, "loss": 0.5092, "lr": 4.880436017318559e-06, "epoch": 0.9448818897637795, "percentage": 18.9, "elapsed_time": "0:38:26", "remaining_time": "2:44:59", "throughput": 20141.84, "total_tokens": 46460672}
|
|
{"current_steps": 14765, "total_steps": 78105, "loss": 0.4399, "lr": 4.8802652588311465e-06, "epoch": 0.9452019717047564, "percentage": 18.9, "elapsed_time": "0:38:27", "remaining_time": "2:44:58", "throughput": 20142.75, "total_tokens": 46476736}
|
|
{"current_steps": 14770, "total_steps": 78105, "loss": 0.5088, "lr": 4.8800943814856835e-06, "epoch": 0.9455220536457333, "percentage": 18.91, "elapsed_time": "0:38:28", "remaining_time": "2:44:57", "throughput": 20143.68, "total_tokens": 46492416}
|
|
{"current_steps": 14775, "total_steps": 78105, "loss": 0.3932, "lr": 4.879923385290706e-06, "epoch": 0.9458421355867102, "percentage": 18.92, "elapsed_time": "0:38:28", "remaining_time": "2:44:55", "throughput": 20144.45, "total_tokens": 46507264}
|
|
{"current_steps": 14780, "total_steps": 78105, "loss": 0.4766, "lr": 4.879752270254751e-06, "epoch": 0.946162217527687, "percentage": 18.92, "elapsed_time": "0:38:29", "remaining_time": "2:44:54", "throughput": 20145.57, "total_tokens": 46524224}
|
|
{"current_steps": 14785, "total_steps": 78105, "loss": 0.6264, "lr": 4.879581036386363e-06, "epoch": 0.946482299468664, "percentage": 18.93, "elapsed_time": "0:38:30", "remaining_time": "2:44:53", "throughput": 20146.38, "total_tokens": 46539456}
|
|
{"current_steps": 14790, "total_steps": 78105, "loss": 0.4624, "lr": 4.8794096836940926e-06, "epoch": 0.9468023814096409, "percentage": 18.94, "elapsed_time": "0:38:30", "remaining_time": "2:44:52", "throughput": 20147.24, "total_tokens": 46555136}
|
|
{"current_steps": 14795, "total_steps": 78105, "loss": 0.4513, "lr": 4.879238212186498e-06, "epoch": 0.9471224633506178, "percentage": 18.94, "elapsed_time": "0:38:31", "remaining_time": "2:44:50", "throughput": 20148.09, "total_tokens": 46570432}
|
|
{"current_steps": 14800, "total_steps": 78105, "loss": 0.4042, "lr": 4.8790666218721385e-06, "epoch": 0.9474425452915947, "percentage": 18.95, "elapsed_time": "0:38:32", "remaining_time": "2:44:49", "throughput": 20149.03, "total_tokens": 46586304}
|
|
{"current_steps": 14805, "total_steps": 78105, "loss": 0.4828, "lr": 4.878894912759584e-06, "epoch": 0.9477626272325715, "percentage": 18.96, "elapsed_time": "0:38:32", "remaining_time": "2:44:48", "throughput": 20150.02, "total_tokens": 46602432}
|
|
{"current_steps": 14810, "total_steps": 78105, "loss": 0.4343, "lr": 4.87872308485741e-06, "epoch": 0.9480827091735484, "percentage": 18.96, "elapsed_time": "0:38:33", "remaining_time": "2:44:47", "throughput": 20150.75, "total_tokens": 46617472}
|
|
{"current_steps": 14815, "total_steps": 78105, "loss": 0.5112, "lr": 4.878551138174195e-06, "epoch": 0.9484027911145253, "percentage": 18.97, "elapsed_time": "0:38:34", "remaining_time": "2:44:45", "throughput": 20151.59, "total_tokens": 46632896}
|
|
{"current_steps": 14820, "total_steps": 78105, "loss": 0.3885, "lr": 4.8783790727185246e-06, "epoch": 0.9487228730555022, "percentage": 18.97, "elapsed_time": "0:38:34", "remaining_time": "2:44:44", "throughput": 20152.59, "total_tokens": 46649408}
|
|
{"current_steps": 14825, "total_steps": 78105, "loss": 0.4176, "lr": 4.878206888498993e-06, "epoch": 0.9490429549964791, "percentage": 18.98, "elapsed_time": "0:38:35", "remaining_time": "2:44:43", "throughput": 20153.5, "total_tokens": 46665344}
|
|
{"current_steps": 14830, "total_steps": 78105, "loss": 0.5009, "lr": 4.878034585524196e-06, "epoch": 0.9493630369374559, "percentage": 18.99, "elapsed_time": "0:38:36", "remaining_time": "2:44:42", "throughput": 20154.23, "total_tokens": 46680704}
|
|
{"current_steps": 14835, "total_steps": 78105, "loss": 0.5474, "lr": 4.877862163802739e-06, "epoch": 0.9496831188784329, "percentage": 18.99, "elapsed_time": "0:38:36", "remaining_time": "2:44:41", "throughput": 20155.01, "total_tokens": 46695936}
|
|
{"current_steps": 14840, "total_steps": 78105, "loss": 0.4303, "lr": 4.877689623343232e-06, "epoch": 0.9500032008194098, "percentage": 19.0, "elapsed_time": "0:38:37", "remaining_time": "2:44:39", "throughput": 20155.9, "total_tokens": 46711680}
|
|
{"current_steps": 14845, "total_steps": 78105, "loss": 0.4389, "lr": 4.87751696415429e-06, "epoch": 0.9503232827603867, "percentage": 19.01, "elapsed_time": "0:38:38", "remaining_time": "2:44:38", "throughput": 20156.7, "total_tokens": 46727040}
|
|
{"current_steps": 14850, "total_steps": 78105, "loss": 0.5531, "lr": 4.877344186244534e-06, "epoch": 0.9506433647013636, "percentage": 19.01, "elapsed_time": "0:38:38", "remaining_time": "2:44:37", "throughput": 20157.55, "total_tokens": 46742720}
|
|
{"current_steps": 14855, "total_steps": 78105, "loss": 0.462, "lr": 4.877171289622593e-06, "epoch": 0.9509634466423404, "percentage": 19.02, "elapsed_time": "0:38:39", "remaining_time": "2:44:36", "throughput": 20158.37, "total_tokens": 46758336}
|
|
{"current_steps": 14860, "total_steps": 78105, "loss": 0.5603, "lr": 4.8769982742971e-06, "epoch": 0.9512835285833173, "percentage": 19.03, "elapsed_time": "0:38:40", "remaining_time": "2:44:34", "throughput": 20159.15, "total_tokens": 46773312}
|
|
{"current_steps": 14865, "total_steps": 78105, "loss": 0.3841, "lr": 4.876825140276694e-06, "epoch": 0.9516036105242942, "percentage": 19.03, "elapsed_time": "0:38:40", "remaining_time": "2:44:33", "throughput": 20159.85, "total_tokens": 46787968}
|
|
{"current_steps": 14870, "total_steps": 78105, "loss": 0.3905, "lr": 4.876651887570022e-06, "epoch": 0.9519236924652711, "percentage": 19.04, "elapsed_time": "0:38:41", "remaining_time": "2:44:32", "throughput": 20160.74, "total_tokens": 46803712}
|
|
{"current_steps": 14875, "total_steps": 78105, "loss": 0.4292, "lr": 4.876478516185733e-06, "epoch": 0.952243774406248, "percentage": 19.04, "elapsed_time": "0:38:42", "remaining_time": "2:44:31", "throughput": 20161.85, "total_tokens": 46820288}
|
|
{"current_steps": 14880, "total_steps": 78105, "loss": 0.4137, "lr": 4.8763050261324866e-06, "epoch": 0.9525638563472248, "percentage": 19.05, "elapsed_time": "0:38:42", "remaining_time": "2:44:29", "throughput": 20162.69, "total_tokens": 46835904}
|
|
{"current_steps": 14885, "total_steps": 78105, "loss": 0.5489, "lr": 4.876131417418945e-06, "epoch": 0.9528839382882017, "percentage": 19.06, "elapsed_time": "0:38:43", "remaining_time": "2:44:28", "throughput": 20163.47, "total_tokens": 46851136}
|
|
{"current_steps": 14890, "total_steps": 78105, "loss": 0.5037, "lr": 4.8759576900537765e-06, "epoch": 0.9532040202291787, "percentage": 19.06, "elapsed_time": "0:38:44", "remaining_time": "2:44:27", "throughput": 20164.45, "total_tokens": 46867456}
|
|
{"current_steps": 14895, "total_steps": 78105, "loss": 0.4738, "lr": 4.875783844045656e-06, "epoch": 0.9535241021701556, "percentage": 19.07, "elapsed_time": "0:38:44", "remaining_time": "2:44:26", "throughput": 20165.25, "total_tokens": 46882816}
|
|
{"current_steps": 14900, "total_steps": 78105, "loss": 0.5471, "lr": 4.875609879403267e-06, "epoch": 0.9538441841111325, "percentage": 19.08, "elapsed_time": "0:38:45", "remaining_time": "2:44:25", "throughput": 20166.16, "total_tokens": 46898624}
|
|
{"current_steps": 14905, "total_steps": 78105, "loss": 0.5049, "lr": 4.875435796135294e-06, "epoch": 0.9541642660521094, "percentage": 19.08, "elapsed_time": "0:38:46", "remaining_time": "2:44:23", "throughput": 20167.03, "total_tokens": 46914304}
|
|
{"current_steps": 14910, "total_steps": 78105, "loss": 0.535, "lr": 4.875261594250431e-06, "epoch": 0.9544843479930862, "percentage": 19.09, "elapsed_time": "0:38:46", "remaining_time": "2:44:22", "throughput": 20167.97, "total_tokens": 46930368}
|
|
{"current_steps": 14915, "total_steps": 78105, "loss": 0.4771, "lr": 4.875087273757375e-06, "epoch": 0.9548044299340631, "percentage": 19.1, "elapsed_time": "0:38:47", "remaining_time": "2:44:21", "throughput": 20168.83, "total_tokens": 46945792}
|
|
{"current_steps": 14920, "total_steps": 78105, "loss": 0.4557, "lr": 4.874912834664833e-06, "epoch": 0.95512451187504, "percentage": 19.1, "elapsed_time": "0:38:48", "remaining_time": "2:44:20", "throughput": 20169.8, "total_tokens": 46962048}
|
|
{"current_steps": 14925, "total_steps": 78105, "loss": 0.5768, "lr": 4.874738276981515e-06, "epoch": 0.9554445938160169, "percentage": 19.11, "elapsed_time": "0:38:48", "remaining_time": "2:44:18", "throughput": 20170.58, "total_tokens": 46976768}
|
|
{"current_steps": 14930, "total_steps": 78105, "loss": 0.5047, "lr": 4.874563600716137e-06, "epoch": 0.9557646757569938, "percentage": 19.12, "elapsed_time": "0:38:49", "remaining_time": "2:44:17", "throughput": 20171.33, "total_tokens": 46991424}
|
|
{"current_steps": 14935, "total_steps": 78105, "loss": 0.5943, "lr": 4.8743888058774205e-06, "epoch": 0.9560847576979706, "percentage": 19.12, "elapsed_time": "0:38:50", "remaining_time": "2:44:16", "throughput": 20172.14, "total_tokens": 47006656}
|
|
{"current_steps": 14940, "total_steps": 78105, "loss": 0.4124, "lr": 4.874213892474094e-06, "epoch": 0.9564048396389476, "percentage": 19.13, "elapsed_time": "0:38:50", "remaining_time": "2:44:14", "throughput": 20172.94, "total_tokens": 47021824}
|
|
{"current_steps": 14945, "total_steps": 78105, "loss": 0.5393, "lr": 4.874038860514895e-06, "epoch": 0.9567249215799245, "percentage": 19.13, "elapsed_time": "0:38:51", "remaining_time": "2:44:13", "throughput": 20173.87, "total_tokens": 47037440}
|
|
{"current_steps": 14950, "total_steps": 78105, "loss": 0.4314, "lr": 4.873863710008559e-06, "epoch": 0.9570450035209014, "percentage": 19.14, "elapsed_time": "0:38:52", "remaining_time": "2:44:12", "throughput": 20174.78, "total_tokens": 47053760}
|
|
{"current_steps": 14955, "total_steps": 78105, "loss": 0.4475, "lr": 4.873688440963835e-06, "epoch": 0.9573650854618783, "percentage": 19.15, "elapsed_time": "0:38:52", "remaining_time": "2:44:11", "throughput": 20175.55, "total_tokens": 47068928}
|
|
{"current_steps": 14960, "total_steps": 78105, "loss": 0.4712, "lr": 4.873513053389475e-06, "epoch": 0.9576851674028551, "percentage": 19.15, "elapsed_time": "0:38:53", "remaining_time": "2:44:10", "throughput": 20176.45, "total_tokens": 47084672}
|
|
{"current_steps": 14965, "total_steps": 78105, "loss": 0.5139, "lr": 4.873337547294236e-06, "epoch": 0.958005249343832, "percentage": 19.16, "elapsed_time": "0:38:54", "remaining_time": "2:44:09", "throughput": 20177.52, "total_tokens": 47101632}
|
|
{"current_steps": 14970, "total_steps": 78105, "loss": 0.4392, "lr": 4.873161922686882e-06, "epoch": 0.9583253312848089, "percentage": 19.17, "elapsed_time": "0:38:55", "remaining_time": "2:44:07", "throughput": 20178.25, "total_tokens": 47116480}
|
|
{"current_steps": 14975, "total_steps": 78105, "loss": 0.6716, "lr": 4.872986179576182e-06, "epoch": 0.9586454132257858, "percentage": 19.17, "elapsed_time": "0:38:55", "remaining_time": "2:44:06", "throughput": 20179.29, "total_tokens": 47132992}
|
|
{"current_steps": 14980, "total_steps": 78105, "loss": 0.4487, "lr": 4.872810317970914e-06, "epoch": 0.9589654951667627, "percentage": 19.18, "elapsed_time": "0:38:56", "remaining_time": "2:44:05", "throughput": 20180.27, "total_tokens": 47149056}
|
|
{"current_steps": 14985, "total_steps": 78105, "loss": 0.408, "lr": 4.872634337879858e-06, "epoch": 0.9592855771077395, "percentage": 19.19, "elapsed_time": "0:38:57", "remaining_time": "2:44:04", "throughput": 20181.17, "total_tokens": 47164864}
|
|
{"current_steps": 14990, "total_steps": 78105, "loss": 0.5247, "lr": 4.8724582393118015e-06, "epoch": 0.9596056590487164, "percentage": 19.19, "elapsed_time": "0:38:57", "remaining_time": "2:44:03", "throughput": 20181.96, "total_tokens": 47180544}
|
|
{"current_steps": 14995, "total_steps": 78105, "loss": 0.4967, "lr": 4.872282022275538e-06, "epoch": 0.9599257409896934, "percentage": 19.2, "elapsed_time": "0:38:58", "remaining_time": "2:44:01", "throughput": 20182.9, "total_tokens": 47196608}
|
|
{"current_steps": 15000, "total_steps": 78105, "loss": 0.4217, "lr": 4.8721056867798675e-06, "epoch": 0.9602458229306703, "percentage": 19.2, "elapsed_time": "0:38:59", "remaining_time": "2:44:00", "throughput": 20183.89, "total_tokens": 47213504}
|
|
{"current_steps": 15005, "total_steps": 78105, "loss": 0.3406, "lr": 4.871929232833595e-06, "epoch": 0.9605659048716472, "percentage": 19.21, "elapsed_time": "0:38:59", "remaining_time": "2:43:59", "throughput": 20184.67, "total_tokens": 47228288}
|
|
{"current_steps": 15010, "total_steps": 78105, "loss": 0.5522, "lr": 4.871752660445531e-06, "epoch": 0.960885986812624, "percentage": 19.22, "elapsed_time": "0:39:00", "remaining_time": "2:43:58", "throughput": 20185.5, "total_tokens": 47243712}
|
|
{"current_steps": 15015, "total_steps": 78105, "loss": 0.6114, "lr": 4.871575969624493e-06, "epoch": 0.9612060687536009, "percentage": 19.22, "elapsed_time": "0:39:01", "remaining_time": "2:43:57", "throughput": 20186.47, "total_tokens": 47259904}
|
|
{"current_steps": 15020, "total_steps": 78105, "loss": 0.574, "lr": 4.871399160379305e-06, "epoch": 0.9615261506945778, "percentage": 19.23, "elapsed_time": "0:39:01", "remaining_time": "2:43:55", "throughput": 20187.21, "total_tokens": 47275072}
|
|
{"current_steps": 15025, "total_steps": 78105, "loss": 0.5103, "lr": 4.871222232718795e-06, "epoch": 0.9618462326355547, "percentage": 19.24, "elapsed_time": "0:39:02", "remaining_time": "2:43:54", "throughput": 20188.06, "total_tokens": 47290688}
|
|
{"current_steps": 15030, "total_steps": 78105, "loss": 0.4575, "lr": 4.871045186651797e-06, "epoch": 0.9621663145765316, "percentage": 19.24, "elapsed_time": "0:39:03", "remaining_time": "2:43:53", "throughput": 20188.96, "total_tokens": 47306496}
|
|
{"current_steps": 15035, "total_steps": 78105, "loss": 0.3715, "lr": 4.870868022187153e-06, "epoch": 0.9624863965175084, "percentage": 19.25, "elapsed_time": "0:39:03", "remaining_time": "2:43:52", "throughput": 20189.71, "total_tokens": 47321280}
|
|
{"current_steps": 15040, "total_steps": 78105, "loss": 0.5453, "lr": 4.87069073933371e-06, "epoch": 0.9628064784584853, "percentage": 19.26, "elapsed_time": "0:39:04", "remaining_time": "2:43:50", "throughput": 20190.69, "total_tokens": 47337536}
|
|
{"current_steps": 15045, "total_steps": 78105, "loss": 0.447, "lr": 4.87051333810032e-06, "epoch": 0.9631265603994623, "percentage": 19.26, "elapsed_time": "0:39:05", "remaining_time": "2:43:49", "throughput": 20191.46, "total_tokens": 47353024}
|
|
{"current_steps": 15050, "total_steps": 78105, "loss": 0.4071, "lr": 4.87033581849584e-06, "epoch": 0.9634466423404392, "percentage": 19.27, "elapsed_time": "0:39:05", "remaining_time": "2:43:48", "throughput": 20192.35, "total_tokens": 47369088}
|
|
{"current_steps": 15055, "total_steps": 78105, "loss": 0.7118, "lr": 4.870158180529138e-06, "epoch": 0.9637667242814161, "percentage": 19.28, "elapsed_time": "0:39:06", "remaining_time": "2:43:47", "throughput": 20193.15, "total_tokens": 47384320}
|
|
{"current_steps": 15060, "total_steps": 78105, "loss": 0.5317, "lr": 4.869980424209081e-06, "epoch": 0.964086806222393, "percentage": 19.28, "elapsed_time": "0:39:07", "remaining_time": "2:43:46", "throughput": 20194.13, "total_tokens": 47400896}
|
|
{"current_steps": 15065, "total_steps": 78105, "loss": 0.385, "lr": 4.869802549544546e-06, "epoch": 0.9644068881633698, "percentage": 19.29, "elapsed_time": "0:39:07", "remaining_time": "2:43:45", "throughput": 20195.06, "total_tokens": 47416896}
|
|
{"current_steps": 15070, "total_steps": 78105, "loss": 0.732, "lr": 4.8696245565444165e-06, "epoch": 0.9647269701043467, "percentage": 19.29, "elapsed_time": "0:39:08", "remaining_time": "2:43:43", "throughput": 20195.93, "total_tokens": 47432320}
|
|
{"current_steps": 15075, "total_steps": 78105, "loss": 0.4796, "lr": 4.869446445217578e-06, "epoch": 0.9650470520453236, "percentage": 19.3, "elapsed_time": "0:39:09", "remaining_time": "2:43:42", "throughput": 20196.9, "total_tokens": 47448320}
|
|
{"current_steps": 15080, "total_steps": 78105, "loss": 0.5214, "lr": 4.869268215572928e-06, "epoch": 0.9653671339863005, "percentage": 19.31, "elapsed_time": "0:39:09", "remaining_time": "2:43:41", "throughput": 20197.64, "total_tokens": 47463488}
|
|
{"current_steps": 15085, "total_steps": 78105, "loss": 0.3804, "lr": 4.8690898676193635e-06, "epoch": 0.9656872159272774, "percentage": 19.31, "elapsed_time": "0:39:10", "remaining_time": "2:43:40", "throughput": 20198.48, "total_tokens": 47479104}
|
|
{"current_steps": 15090, "total_steps": 78105, "loss": 0.3869, "lr": 4.8689114013657914e-06, "epoch": 0.9660072978682542, "percentage": 19.32, "elapsed_time": "0:39:11", "remaining_time": "2:43:39", "throughput": 20199.65, "total_tokens": 47496448}
|
|
{"current_steps": 15095, "total_steps": 78105, "loss": 0.5534, "lr": 4.868732816821122e-06, "epoch": 0.9663273798092311, "percentage": 19.33, "elapsed_time": "0:39:12", "remaining_time": "2:43:37", "throughput": 20200.49, "total_tokens": 47511936}
|
|
{"current_steps": 15100, "total_steps": 78105, "loss": 0.5165, "lr": 4.8685541139942745e-06, "epoch": 0.9666474617502081, "percentage": 19.33, "elapsed_time": "0:39:12", "remaining_time": "2:43:36", "throughput": 20201.47, "total_tokens": 47528320}
|
|
{"current_steps": 15105, "total_steps": 78105, "loss": 0.4627, "lr": 4.868375292894173e-06, "epoch": 0.966967543691185, "percentage": 19.34, "elapsed_time": "0:39:13", "remaining_time": "2:43:35", "throughput": 20202.26, "total_tokens": 47543296}
|
|
{"current_steps": 15110, "total_steps": 78105, "loss": 0.4229, "lr": 4.868196353529745e-06, "epoch": 0.9672876256321619, "percentage": 19.35, "elapsed_time": "0:39:14", "remaining_time": "2:43:34", "throughput": 20203.06, "total_tokens": 47558656}
|
|
{"current_steps": 15115, "total_steps": 78105, "loss": 0.5188, "lr": 4.868017295909926e-06, "epoch": 0.9676077075731387, "percentage": 19.35, "elapsed_time": "0:39:14", "remaining_time": "2:43:33", "throughput": 20203.98, "total_tokens": 47574720}
|
|
{"current_steps": 15120, "total_steps": 78105, "loss": 0.4448, "lr": 4.867838120043659e-06, "epoch": 0.9679277895141156, "percentage": 19.36, "elapsed_time": "0:39:15", "remaining_time": "2:43:31", "throughput": 20204.81, "total_tokens": 47590272}
|
|
{"current_steps": 15125, "total_steps": 78105, "loss": 0.4908, "lr": 4.867658825939889e-06, "epoch": 0.9682478714550925, "percentage": 19.36, "elapsed_time": "0:39:16", "remaining_time": "2:43:30", "throughput": 20205.59, "total_tokens": 47605696}
|
|
{"current_steps": 15130, "total_steps": 78105, "loss": 0.4739, "lr": 4.86747941360757e-06, "epoch": 0.9685679533960694, "percentage": 19.37, "elapsed_time": "0:39:16", "remaining_time": "2:43:29", "throughput": 20206.62, "total_tokens": 47621760}
|
|
{"current_steps": 15135, "total_steps": 78105, "loss": 0.3881, "lr": 4.8672998830556616e-06, "epoch": 0.9688880353370463, "percentage": 19.38, "elapsed_time": "0:39:17", "remaining_time": "2:43:28", "throughput": 20207.83, "total_tokens": 47639296}
|
|
{"current_steps": 15140, "total_steps": 78105, "loss": 0.2948, "lr": 4.8671202342931275e-06, "epoch": 0.9692081172780231, "percentage": 19.38, "elapsed_time": "0:39:18", "remaining_time": "2:43:27", "throughput": 20208.63, "total_tokens": 47654656}
|
|
{"current_steps": 15145, "total_steps": 78105, "loss": 0.5355, "lr": 4.866940467328938e-06, "epoch": 0.969528199219, "percentage": 19.39, "elapsed_time": "0:39:18", "remaining_time": "2:43:26", "throughput": 20209.61, "total_tokens": 47671168}
|
|
{"current_steps": 15150, "total_steps": 78105, "loss": 0.468, "lr": 4.8667605821720714e-06, "epoch": 0.9698482811599769, "percentage": 19.4, "elapsed_time": "0:39:19", "remaining_time": "2:43:24", "throughput": 20210.38, "total_tokens": 47686400}
|
|
{"current_steps": 15155, "total_steps": 78105, "loss": 0.4837, "lr": 4.866580578831509e-06, "epoch": 0.9701683631009539, "percentage": 19.4, "elapsed_time": "0:39:20", "remaining_time": "2:43:23", "throughput": 20211.16, "total_tokens": 47701760}
|
|
{"current_steps": 15160, "total_steps": 78105, "loss": 0.5242, "lr": 4.86640045731624e-06, "epoch": 0.9704884450419308, "percentage": 19.41, "elapsed_time": "0:39:20", "remaining_time": "2:43:22", "throughput": 20211.95, "total_tokens": 47717248}
|
|
{"current_steps": 15165, "total_steps": 78105, "loss": 0.5109, "lr": 4.866220217635258e-06, "epoch": 0.9708085269829076, "percentage": 19.42, "elapsed_time": "0:39:21", "remaining_time": "2:43:21", "throughput": 20213.05, "total_tokens": 47734336}
|
|
{"current_steps": 15170, "total_steps": 78105, "loss": 0.5269, "lr": 4.866039859797563e-06, "epoch": 0.9711286089238845, "percentage": 19.42, "elapsed_time": "0:39:22", "remaining_time": "2:43:20", "throughput": 20213.96, "total_tokens": 47750464}
|
|
{"current_steps": 15175, "total_steps": 78105, "loss": 0.8597, "lr": 4.865859383812162e-06, "epoch": 0.9714486908648614, "percentage": 19.43, "elapsed_time": "0:39:22", "remaining_time": "2:43:18", "throughput": 20214.76, "total_tokens": 47765824}
|
|
{"current_steps": 15180, "total_steps": 78105, "loss": 0.434, "lr": 4.865678789688067e-06, "epoch": 0.9717687728058383, "percentage": 19.44, "elapsed_time": "0:39:23", "remaining_time": "2:43:17", "throughput": 20215.62, "total_tokens": 47781760}
|
|
{"current_steps": 15185, "total_steps": 78105, "loss": 0.4185, "lr": 4.865498077434295e-06, "epoch": 0.9720888547468152, "percentage": 19.44, "elapsed_time": "0:39:24", "remaining_time": "2:43:16", "throughput": 20216.43, "total_tokens": 47797312}
|
|
{"current_steps": 15190, "total_steps": 78105, "loss": 0.4009, "lr": 4.865317247059871e-06, "epoch": 0.972408936687792, "percentage": 19.45, "elapsed_time": "0:39:24", "remaining_time": "2:43:15", "throughput": 20217.43, "total_tokens": 47813440}
|
|
{"current_steps": 15195, "total_steps": 78105, "loss": 0.4948, "lr": 4.865136298573824e-06, "epoch": 0.9727290186287689, "percentage": 19.45, "elapsed_time": "0:39:25", "remaining_time": "2:43:14", "throughput": 20218.33, "total_tokens": 47829440}
|
|
{"current_steps": 15200, "total_steps": 78105, "loss": 0.5519, "lr": 4.86495523198519e-06, "epoch": 0.9730491005697458, "percentage": 19.46, "elapsed_time": "0:39:26", "remaining_time": "2:43:12", "throughput": 20219.05, "total_tokens": 47844608}
|
|
{"current_steps": 15205, "total_steps": 78105, "loss": 0.465, "lr": 4.86477404730301e-06, "epoch": 0.9733691825107228, "percentage": 19.47, "elapsed_time": "0:39:26", "remaining_time": "2:43:11", "throughput": 20219.88, "total_tokens": 47860160}
|
|
{"current_steps": 15210, "total_steps": 78105, "loss": 0.4305, "lr": 4.864592744536332e-06, "epoch": 0.9736892644516997, "percentage": 19.47, "elapsed_time": "0:39:27", "remaining_time": "2:43:10", "throughput": 20220.68, "total_tokens": 47875648}
|
|
{"current_steps": 15215, "total_steps": 78105, "loss": 0.4308, "lr": 4.864411323694208e-06, "epoch": 0.9740093463926766, "percentage": 19.48, "elapsed_time": "0:39:28", "remaining_time": "2:43:09", "throughput": 20221.74, "total_tokens": 47892736}
|
|
{"current_steps": 15220, "total_steps": 78105, "loss": 0.5133, "lr": 4.8642297847857e-06, "epoch": 0.9743294283336534, "percentage": 19.49, "elapsed_time": "0:39:29", "remaining_time": "2:43:08", "throughput": 20222.57, "total_tokens": 47908992}
|
|
{"current_steps": 15225, "total_steps": 78105, "loss": 0.5394, "lr": 4.8640481278198704e-06, "epoch": 0.9746495102746303, "percentage": 19.49, "elapsed_time": "0:39:29", "remaining_time": "2:43:07", "throughput": 20223.42, "total_tokens": 47924736}
|
|
{"current_steps": 15230, "total_steps": 78105, "loss": 0.5153, "lr": 4.863866352805791e-06, "epoch": 0.9749695922156072, "percentage": 19.5, "elapsed_time": "0:39:30", "remaining_time": "2:43:06", "throughput": 20224.31, "total_tokens": 47941056}
|
|
{"current_steps": 15235, "total_steps": 78105, "loss": 0.476, "lr": 4.863684459752539e-06, "epoch": 0.9752896741565841, "percentage": 19.51, "elapsed_time": "0:39:31", "remaining_time": "2:43:05", "throughput": 20225.28, "total_tokens": 47957824}
|
|
{"current_steps": 15240, "total_steps": 78105, "loss": 0.7246, "lr": 4.863502448669197e-06, "epoch": 0.975609756097561, "percentage": 19.51, "elapsed_time": "0:39:31", "remaining_time": "2:43:03", "throughput": 20225.98, "total_tokens": 47973056}
|
|
{"current_steps": 15245, "total_steps": 78105, "loss": 0.5526, "lr": 4.863320319564854e-06, "epoch": 0.9759298380385378, "percentage": 19.52, "elapsed_time": "0:39:32", "remaining_time": "2:43:02", "throughput": 20226.87, "total_tokens": 47988928}
|
|
{"current_steps": 15250, "total_steps": 78105, "loss": 0.5295, "lr": 4.863138072448604e-06, "epoch": 0.9762499199795147, "percentage": 19.52, "elapsed_time": "0:39:33", "remaining_time": "2:43:01", "throughput": 20227.58, "total_tokens": 48004032}
|
|
{"current_steps": 15255, "total_steps": 78105, "loss": 0.5599, "lr": 4.8629557073295486e-06, "epoch": 0.9765700019204916, "percentage": 19.53, "elapsed_time": "0:39:33", "remaining_time": "2:43:00", "throughput": 20228.54, "total_tokens": 48020800}
|
|
{"current_steps": 15260, "total_steps": 78105, "loss": 0.6066, "lr": 4.862773224216793e-06, "epoch": 0.9768900838614686, "percentage": 19.54, "elapsed_time": "0:39:34", "remaining_time": "2:42:59", "throughput": 20229.49, "total_tokens": 48036800}
|
|
{"current_steps": 15265, "total_steps": 78105, "loss": 0.3402, "lr": 4.86259062311945e-06, "epoch": 0.9772101658024455, "percentage": 19.54, "elapsed_time": "0:39:35", "remaining_time": "2:42:57", "throughput": 20230.13, "total_tokens": 48051264}
|
|
{"current_steps": 15270, "total_steps": 78105, "loss": 0.3295, "lr": 4.862407904046637e-06, "epoch": 0.9775302477434223, "percentage": 19.55, "elapsed_time": "0:39:35", "remaining_time": "2:42:56", "throughput": 20230.87, "total_tokens": 48066176}
|
|
{"current_steps": 15275, "total_steps": 78105, "loss": 0.3461, "lr": 4.862225067007479e-06, "epoch": 0.9778503296843992, "percentage": 19.56, "elapsed_time": "0:39:36", "remaining_time": "2:42:55", "throughput": 20231.73, "total_tokens": 48081984}
|
|
{"current_steps": 15280, "total_steps": 78105, "loss": 0.569, "lr": 4.862042112011105e-06, "epoch": 0.9781704116253761, "percentage": 19.56, "elapsed_time": "0:39:37", "remaining_time": "2:42:54", "throughput": 20232.42, "total_tokens": 48096896}
|
|
{"current_steps": 15285, "total_steps": 78105, "loss": 0.396, "lr": 4.861859039066652e-06, "epoch": 0.978490493566353, "percentage": 19.57, "elapsed_time": "0:39:37", "remaining_time": "2:42:52", "throughput": 20233.19, "total_tokens": 48112128}
|
|
{"current_steps": 15290, "total_steps": 78105, "loss": 0.449, "lr": 4.861675848183261e-06, "epoch": 0.9788105755073299, "percentage": 19.58, "elapsed_time": "0:39:38", "remaining_time": "2:42:51", "throughput": 20234.03, "total_tokens": 48127616}
|
|
{"current_steps": 15295, "total_steps": 78105, "loss": 0.5732, "lr": 4.861492539370079e-06, "epoch": 0.9791306574483067, "percentage": 19.58, "elapsed_time": "0:39:39", "remaining_time": "2:42:50", "throughput": 20234.7, "total_tokens": 48142144}
|
|
{"current_steps": 15300, "total_steps": 78105, "loss": 0.4656, "lr": 4.861309112636261e-06, "epoch": 0.9794507393892836, "percentage": 19.59, "elapsed_time": "0:39:39", "remaining_time": "2:42:49", "throughput": 20235.59, "total_tokens": 48158272}
|
|
{"current_steps": 15305, "total_steps": 78105, "loss": 0.5226, "lr": 4.861125567990965e-06, "epoch": 0.9797708213302605, "percentage": 19.6, "elapsed_time": "0:39:40", "remaining_time": "2:42:47", "throughput": 20236.29, "total_tokens": 48173120}
|
|
{"current_steps": 15310, "total_steps": 78105, "loss": 0.4565, "lr": 4.860941905443357e-06, "epoch": 0.9800909032712375, "percentage": 19.6, "elapsed_time": "0:39:41", "remaining_time": "2:42:46", "throughput": 20237.07, "total_tokens": 48188672}
|
|
{"current_steps": 15315, "total_steps": 78105, "loss": 0.4428, "lr": 4.860758125002608e-06, "epoch": 0.9804109852122144, "percentage": 19.61, "elapsed_time": "0:39:41", "remaining_time": "2:42:45", "throughput": 20237.9, "total_tokens": 48204480}
|
|
{"current_steps": 15320, "total_steps": 78105, "loss": 0.4556, "lr": 4.860574226677894e-06, "epoch": 0.9807310671531912, "percentage": 19.61, "elapsed_time": "0:39:42", "remaining_time": "2:42:44", "throughput": 20238.67, "total_tokens": 48219584}
|
|
{"current_steps": 15325, "total_steps": 78105, "loss": 0.39, "lr": 4.860390210478401e-06, "epoch": 0.9810511490941681, "percentage": 19.62, "elapsed_time": "0:39:43", "remaining_time": "2:42:43", "throughput": 20239.65, "total_tokens": 48235904}
|
|
{"current_steps": 15330, "total_steps": 78105, "loss": 0.4555, "lr": 4.860206076413314e-06, "epoch": 0.981371231035145, "percentage": 19.63, "elapsed_time": "0:39:43", "remaining_time": "2:42:42", "throughput": 20240.76, "total_tokens": 48252992}
|
|
{"current_steps": 15335, "total_steps": 78105, "loss": 0.47, "lr": 4.860021824491829e-06, "epoch": 0.9816913129761219, "percentage": 19.63, "elapsed_time": "0:39:44", "remaining_time": "2:42:40", "throughput": 20241.44, "total_tokens": 48267840}
|
|
{"current_steps": 15340, "total_steps": 78105, "loss": 0.6372, "lr": 4.859837454723149e-06, "epoch": 0.9820113949170988, "percentage": 19.64, "elapsed_time": "0:39:45", "remaining_time": "2:42:39", "throughput": 20242.14, "total_tokens": 48282688}
|
|
{"current_steps": 15345, "total_steps": 78105, "loss": 0.6663, "lr": 4.859652967116477e-06, "epoch": 0.9823314768580756, "percentage": 19.65, "elapsed_time": "0:39:45", "remaining_time": "2:42:38", "throughput": 20243.31, "total_tokens": 48300096}
|
|
{"current_steps": 15350, "total_steps": 78105, "loss": 0.5205, "lr": 4.859468361681027e-06, "epoch": 0.9826515587990525, "percentage": 19.65, "elapsed_time": "0:39:46", "remaining_time": "2:42:37", "throughput": 20244.05, "total_tokens": 48315136}
|
|
{"current_steps": 15355, "total_steps": 78105, "loss": 0.4813, "lr": 4.859283638426017e-06, "epoch": 0.9829716407400294, "percentage": 19.66, "elapsed_time": "0:39:47", "remaining_time": "2:42:35", "throughput": 20244.81, "total_tokens": 48330240}
|
|
{"current_steps": 15360, "total_steps": 78105, "loss": 0.3946, "lr": 4.859098797360672e-06, "epoch": 0.9832917226810063, "percentage": 19.67, "elapsed_time": "0:39:47", "remaining_time": "2:42:34", "throughput": 20245.45, "total_tokens": 48345280}
|
|
{"current_steps": 15365, "total_steps": 78105, "loss": 0.4358, "lr": 4.858913838494221e-06, "epoch": 0.9836118046219833, "percentage": 19.67, "elapsed_time": "0:39:48", "remaining_time": "2:42:33", "throughput": 20246.39, "total_tokens": 48361792}
|
|
{"current_steps": 15370, "total_steps": 78105, "loss": 0.4608, "lr": 4.858728761835898e-06, "epoch": 0.9839318865629602, "percentage": 19.68, "elapsed_time": "0:39:49", "remaining_time": "2:42:32", "throughput": 20247.13, "total_tokens": 48377408}
|
|
{"current_steps": 15375, "total_steps": 78105, "loss": 0.4886, "lr": 4.858543567394949e-06, "epoch": 0.984251968503937, "percentage": 19.69, "elapsed_time": "0:39:50", "remaining_time": "2:42:31", "throughput": 20247.92, "total_tokens": 48392896}
|
|
{"current_steps": 15380, "total_steps": 78105, "loss": 0.7478, "lr": 4.8583582551806186e-06, "epoch": 0.9845720504449139, "percentage": 19.69, "elapsed_time": "0:39:50", "remaining_time": "2:42:29", "throughput": 20248.54, "total_tokens": 48407552}
|
|
{"current_steps": 15385, "total_steps": 78105, "loss": 0.3833, "lr": 4.858172825202161e-06, "epoch": 0.9848921323858908, "percentage": 19.7, "elapsed_time": "0:39:51", "remaining_time": "2:42:28", "throughput": 20249.53, "total_tokens": 48424320}
|
|
{"current_steps": 15390, "total_steps": 78105, "loss": 0.6789, "lr": 4.857987277468836e-06, "epoch": 0.9852122143268677, "percentage": 19.7, "elapsed_time": "0:39:52", "remaining_time": "2:42:28", "throughput": 20250.64, "total_tokens": 48441984}
|
|
{"current_steps": 15395, "total_steps": 78105, "loss": 0.5358, "lr": 4.857801611989909e-06, "epoch": 0.9855322962678446, "percentage": 19.71, "elapsed_time": "0:39:52", "remaining_time": "2:42:26", "throughput": 20251.31, "total_tokens": 48456832}
|
|
{"current_steps": 15400, "total_steps": 78105, "loss": 0.399, "lr": 4.857615828774651e-06, "epoch": 0.9858523782088214, "percentage": 19.72, "elapsed_time": "0:39:53", "remaining_time": "2:42:25", "throughput": 20252.05, "total_tokens": 48472512}
|
|
{"current_steps": 15405, "total_steps": 78105, "loss": 0.3159, "lr": 4.857429927832339e-06, "epoch": 0.9861724601497983, "percentage": 19.72, "elapsed_time": "0:39:54", "remaining_time": "2:42:24", "throughput": 20252.91, "total_tokens": 48488832}
|
|
{"current_steps": 15410, "total_steps": 78105, "loss": 0.524, "lr": 4.857243909172255e-06, "epoch": 0.9864925420907752, "percentage": 19.73, "elapsed_time": "0:39:54", "remaining_time": "2:42:23", "throughput": 20253.62, "total_tokens": 48503936}
|
|
{"current_steps": 15415, "total_steps": 78105, "loss": 0.5741, "lr": 4.857057772803688e-06, "epoch": 0.9868126240317522, "percentage": 19.74, "elapsed_time": "0:39:55", "remaining_time": "2:42:21", "throughput": 20254.39, "total_tokens": 48519040}
|
|
{"current_steps": 15420, "total_steps": 78105, "loss": 0.6914, "lr": 4.856871518735935e-06, "epoch": 0.9871327059727291, "percentage": 19.74, "elapsed_time": "0:39:56", "remaining_time": "2:42:20", "throughput": 20255.31, "total_tokens": 48535040}
|
|
{"current_steps": 15425, "total_steps": 78105, "loss": 0.425, "lr": 4.856685146978294e-06, "epoch": 0.9874527879137059, "percentage": 19.75, "elapsed_time": "0:39:56", "remaining_time": "2:42:19", "throughput": 20256.3, "total_tokens": 48551808}
|
|
{"current_steps": 15430, "total_steps": 78105, "loss": 0.5954, "lr": 4.856498657540072e-06, "epoch": 0.9877728698546828, "percentage": 19.76, "elapsed_time": "0:39:57", "remaining_time": "2:42:18", "throughput": 20257.03, "total_tokens": 48566592}
|
|
{"current_steps": 15435, "total_steps": 78105, "loss": 0.3705, "lr": 4.856312050430582e-06, "epoch": 0.9880929517956597, "percentage": 19.76, "elapsed_time": "0:39:58", "remaining_time": "2:42:17", "throughput": 20257.95, "total_tokens": 48582720}
|
|
{"current_steps": 15440, "total_steps": 78105, "loss": 0.5104, "lr": 4.856125325659143e-06, "epoch": 0.9884130337366366, "percentage": 19.77, "elapsed_time": "0:39:58", "remaining_time": "2:42:16", "throughput": 20258.88, "total_tokens": 48599104}
|
|
{"current_steps": 15445, "total_steps": 78105, "loss": 0.4091, "lr": 4.855938483235076e-06, "epoch": 0.9887331156776135, "percentage": 19.77, "elapsed_time": "0:39:59", "remaining_time": "2:42:15", "throughput": 20259.77, "total_tokens": 48615040}
|
|
{"current_steps": 15450, "total_steps": 78105, "loss": 0.4871, "lr": 4.855751523167713e-06, "epoch": 0.9890531976185903, "percentage": 19.78, "elapsed_time": "0:40:00", "remaining_time": "2:42:13", "throughput": 20260.64, "total_tokens": 48630976}
|
|
{"current_steps": 15455, "total_steps": 78105, "loss": 0.4397, "lr": 4.85556444546639e-06, "epoch": 0.9893732795595672, "percentage": 19.79, "elapsed_time": "0:40:00", "remaining_time": "2:42:12", "throughput": 20261.44, "total_tokens": 48646080}
|
|
{"current_steps": 15460, "total_steps": 78105, "loss": 0.4126, "lr": 4.855377250140449e-06, "epoch": 0.9896933615005441, "percentage": 19.79, "elapsed_time": "0:40:01", "remaining_time": "2:42:11", "throughput": 20262.37, "total_tokens": 48662528}
|
|
{"current_steps": 15465, "total_steps": 78105, "loss": 0.4326, "lr": 4.855189937199235e-06, "epoch": 0.990013443441521, "percentage": 19.8, "elapsed_time": "0:40:02", "remaining_time": "2:42:10", "throughput": 20263.47, "total_tokens": 48680000}
|
|
{"current_steps": 15470, "total_steps": 78105, "loss": 0.4272, "lr": 4.855002506652104e-06, "epoch": 0.990333525382498, "percentage": 19.81, "elapsed_time": "0:40:03", "remaining_time": "2:42:09", "throughput": 20264.39, "total_tokens": 48696256}
|
|
{"current_steps": 15475, "total_steps": 78105, "loss": 0.3104, "lr": 4.854814958508416e-06, "epoch": 0.9906536073234748, "percentage": 19.81, "elapsed_time": "0:40:03", "remaining_time": "2:42:08", "throughput": 20265.09, "total_tokens": 48711168}
|
|
{"current_steps": 15480, "total_steps": 78105, "loss": 0.8846, "lr": 4.854627292777533e-06, "epoch": 0.9909736892644517, "percentage": 19.82, "elapsed_time": "0:40:04", "remaining_time": "2:42:07", "throughput": 20265.91, "total_tokens": 48726848}
|
|
{"current_steps": 15485, "total_steps": 78105, "loss": 0.5502, "lr": 4.854439509468829e-06, "epoch": 0.9912937712054286, "percentage": 19.83, "elapsed_time": "0:40:05", "remaining_time": "2:42:05", "throughput": 20266.84, "total_tokens": 48743232}
|
|
{"current_steps": 15490, "total_steps": 78105, "loss": 0.5215, "lr": 4.854251608591679e-06, "epoch": 0.9916138531464055, "percentage": 19.83, "elapsed_time": "0:40:05", "remaining_time": "2:42:04", "throughput": 20267.52, "total_tokens": 48758080}
|
|
{"current_steps": 15495, "total_steps": 78105, "loss": 0.4002, "lr": 4.854063590155467e-06, "epoch": 0.9919339350873824, "percentage": 19.84, "elapsed_time": "0:40:06", "remaining_time": "2:42:03", "throughput": 20268.22, "total_tokens": 48773120}
|
|
{"current_steps": 15500, "total_steps": 78105, "loss": 0.5051, "lr": 4.8538754541695806e-06, "epoch": 0.9922540170283592, "percentage": 19.85, "elapsed_time": "0:40:07", "remaining_time": "2:42:02", "throughput": 20269.03, "total_tokens": 48789056}
|
|
{"current_steps": 15505, "total_steps": 78105, "loss": 0.5882, "lr": 4.8536872006434155e-06, "epoch": 0.9925740989693361, "percentage": 19.85, "elapsed_time": "0:40:07", "remaining_time": "2:42:01", "throughput": 20269.72, "total_tokens": 48804288}
|
|
{"current_steps": 15510, "total_steps": 78105, "loss": 0.4437, "lr": 4.853498829586371e-06, "epoch": 0.992894180910313, "percentage": 19.86, "elapsed_time": "0:40:08", "remaining_time": "2:41:59", "throughput": 20270.33, "total_tokens": 48818816}
|
|
{"current_steps": 15515, "total_steps": 78105, "loss": 0.5251, "lr": 4.8533103410078544e-06, "epoch": 0.9932142628512899, "percentage": 19.86, "elapsed_time": "0:40:09", "remaining_time": "2:41:58", "throughput": 20271.31, "total_tokens": 48835520}
|
|
{"current_steps": 15520, "total_steps": 78105, "loss": 0.3338, "lr": 4.853121734917276e-06, "epoch": 0.9935343447922668, "percentage": 19.87, "elapsed_time": "0:40:09", "remaining_time": "2:41:57", "throughput": 20272.17, "total_tokens": 48851328}
|
|
{"current_steps": 15525, "total_steps": 78105, "loss": 0.4314, "lr": 4.852933011324057e-06, "epoch": 0.9938544267332438, "percentage": 19.88, "elapsed_time": "0:40:10", "remaining_time": "2:41:56", "throughput": 20272.87, "total_tokens": 48866304}
|
|
{"current_steps": 15530, "total_steps": 78105, "loss": 0.4162, "lr": 4.852744170237619e-06, "epoch": 0.9941745086742206, "percentage": 19.88, "elapsed_time": "0:40:11", "remaining_time": "2:41:55", "throughput": 20273.82, "total_tokens": 48882752}
|
|
{"current_steps": 15535, "total_steps": 78105, "loss": 0.5135, "lr": 4.852555211667391e-06, "epoch": 0.9944945906151975, "percentage": 19.89, "elapsed_time": "0:40:11", "remaining_time": "2:41:53", "throughput": 20274.6, "total_tokens": 48898304}
|
|
{"current_steps": 15540, "total_steps": 78105, "loss": 0.6086, "lr": 4.852366135622811e-06, "epoch": 0.9948146725561744, "percentage": 19.9, "elapsed_time": "0:40:12", "remaining_time": "2:41:52", "throughput": 20275.45, "total_tokens": 48914048}
|
|
{"current_steps": 15545, "total_steps": 78105, "loss": 0.4028, "lr": 4.852176942113318e-06, "epoch": 0.9951347544971513, "percentage": 19.9, "elapsed_time": "0:40:13", "remaining_time": "2:41:51", "throughput": 20276.3, "total_tokens": 48930176}
|
|
{"current_steps": 15550, "total_steps": 78105, "loss": 0.5957, "lr": 4.851987631148361e-06, "epoch": 0.9954548364381282, "percentage": 19.91, "elapsed_time": "0:40:13", "remaining_time": "2:41:50", "throughput": 20277.09, "total_tokens": 48945920}
|
|
{"current_steps": 15555, "total_steps": 78105, "loss": 0.5758, "lr": 4.851798202737393e-06, "epoch": 0.995774918379105, "percentage": 19.92, "elapsed_time": "0:40:14", "remaining_time": "2:41:49", "throughput": 20277.83, "total_tokens": 48960832}
|
|
{"current_steps": 15560, "total_steps": 78105, "loss": 0.3928, "lr": 4.851608656889874e-06, "epoch": 0.9960950003200819, "percentage": 19.92, "elapsed_time": "0:40:15", "remaining_time": "2:41:48", "throughput": 20278.7, "total_tokens": 48977280}
|
|
{"current_steps": 15565, "total_steps": 78105, "loss": 0.3878, "lr": 4.851418993615266e-06, "epoch": 0.9964150822610588, "percentage": 19.93, "elapsed_time": "0:40:15", "remaining_time": "2:41:46", "throughput": 20279.35, "total_tokens": 48992512}
|
|
{"current_steps": 15570, "total_steps": 78105, "loss": 0.4631, "lr": 4.851229212923042e-06, "epoch": 0.9967351642020357, "percentage": 19.93, "elapsed_time": "0:40:16", "remaining_time": "2:41:45", "throughput": 20280.16, "total_tokens": 49008128}
|
|
{"current_steps": 15575, "total_steps": 78105, "loss": 0.4514, "lr": 4.851039314822678e-06, "epoch": 0.9970552461430127, "percentage": 19.94, "elapsed_time": "0:40:17", "remaining_time": "2:41:44", "throughput": 20281.09, "total_tokens": 49024512}
|
|
{"current_steps": 15580, "total_steps": 78105, "loss": 0.3798, "lr": 4.850849299323657e-06, "epoch": 0.9973753280839895, "percentage": 19.95, "elapsed_time": "0:40:17", "remaining_time": "2:41:43", "throughput": 20281.87, "total_tokens": 49040128}
|
|
{"current_steps": 15585, "total_steps": 78105, "loss": 0.4323, "lr": 4.8506591664354676e-06, "epoch": 0.9976954100249664, "percentage": 19.95, "elapsed_time": "0:40:18", "remaining_time": "2:41:42", "throughput": 20282.63, "total_tokens": 49055360}
|
|
{"current_steps": 15590, "total_steps": 78105, "loss": 0.4633, "lr": 4.850468916167603e-06, "epoch": 0.9980154919659433, "percentage": 19.96, "elapsed_time": "0:40:19", "remaining_time": "2:41:41", "throughput": 20283.48, "total_tokens": 49071360}
|
|
{"current_steps": 15595, "total_steps": 78105, "loss": 0.3504, "lr": 4.850278548529563e-06, "epoch": 0.9983355739069202, "percentage": 19.97, "elapsed_time": "0:40:19", "remaining_time": "2:41:39", "throughput": 20284.14, "total_tokens": 49086144}
|
|
{"current_steps": 15600, "total_steps": 78105, "loss": 0.5339, "lr": 4.850088063530856e-06, "epoch": 0.9986556558478971, "percentage": 19.97, "elapsed_time": "0:40:20", "remaining_time": "2:41:38", "throughput": 20284.87, "total_tokens": 49101312}
|
|
{"current_steps": 15605, "total_steps": 78105, "loss": 0.3432, "lr": 4.849897461180991e-06, "epoch": 0.9989757377888739, "percentage": 19.98, "elapsed_time": "0:40:21", "remaining_time": "2:41:37", "throughput": 20285.61, "total_tokens": 49116672}
|
|
{"current_steps": 15610, "total_steps": 78105, "loss": 0.3068, "lr": 4.849706741489489e-06, "epoch": 0.9992958197298508, "percentage": 19.99, "elapsed_time": "0:40:21", "remaining_time": "2:41:36", "throughput": 20286.45, "total_tokens": 49132288}
|
|
{"current_steps": 15615, "total_steps": 78105, "loss": 0.5035, "lr": 4.84951590446587e-06, "epoch": 0.9996159016708277, "percentage": 19.99, "elapsed_time": "0:40:22", "remaining_time": "2:41:35", "throughput": 20287.21, "total_tokens": 49148096}
|
|
{"current_steps": 15620, "total_steps": 78105, "loss": 0.343, "lr": 4.849324950119665e-06, "epoch": 0.9999359836118046, "percentage": 20.0, "elapsed_time": "0:40:23", "remaining_time": "2:41:33", "throughput": 20288.09, "total_tokens": 49163840}
|
|
{"current_steps": 15624, "total_steps": 78105, "eval_loss": 0.4853726923465729, "epoch": 1.0001920491645861, "percentage": 20.0, "elapsed_time": "0:41:14", "remaining_time": "2:44:57", "throughput": 19870.23, "total_tokens": 49176512}
|
|
{"current_steps": 15625, "total_steps": 78105, "loss": 0.3136, "lr": 4.849133878460409e-06, "epoch": 1.0002560655527815, "percentage": 20.01, "elapsed_time": "0:41:50", "remaining_time": "2:47:18", "throughput": 19591.19, "total_tokens": 49180096}
|
|
{"current_steps": 15630, "total_steps": 78105, "loss": 0.267, "lr": 4.848942689497643e-06, "epoch": 1.0005761474937584, "percentage": 20.01, "elapsed_time": "0:41:50", "remaining_time": "2:47:16", "throughput": 19592.11, "total_tokens": 49195648}
|
|
{"current_steps": 15635, "total_steps": 78105, "loss": 0.4077, "lr": 4.848751383240915e-06, "epoch": 1.0008962294347352, "percentage": 20.02, "elapsed_time": "0:41:51", "remaining_time": "2:47:15", "throughput": 19593.27, "total_tokens": 49212544}
|
|
{"current_steps": 15640, "total_steps": 78105, "loss": 0.3645, "lr": 4.848559959699778e-06, "epoch": 1.0012163113757122, "percentage": 20.02, "elapsed_time": "0:41:52", "remaining_time": "2:47:14", "throughput": 19594.29, "total_tokens": 49228672}
|
|
{"current_steps": 15645, "total_steps": 78105, "loss": 0.3051, "lr": 4.848368418883788e-06, "epoch": 1.001536393316689, "percentage": 20.03, "elapsed_time": "0:41:53", "remaining_time": "2:47:12", "throughput": 19595.01, "total_tokens": 49243072}
|
|
{"current_steps": 15650, "total_steps": 78105, "loss": 0.3817, "lr": 4.848176760802512e-06, "epoch": 1.001856475257666, "percentage": 20.04, "elapsed_time": "0:41:53", "remaining_time": "2:47:11", "throughput": 19596.08, "total_tokens": 49259520}
|
|
{"current_steps": 15655, "total_steps": 78105, "loss": 0.2813, "lr": 4.84798498546552e-06, "epoch": 1.002176557198643, "percentage": 20.04, "elapsed_time": "0:41:54", "remaining_time": "2:47:10", "throughput": 19596.96, "total_tokens": 49274752}
|
|
{"current_steps": 15660, "total_steps": 78105, "loss": 0.3692, "lr": 4.847793092882388e-06, "epoch": 1.0024966391396197, "percentage": 20.05, "elapsed_time": "0:41:55", "remaining_time": "2:47:08", "throughput": 19597.8, "total_tokens": 49289728}
|
|
{"current_steps": 15665, "total_steps": 78105, "loss": 0.4069, "lr": 4.847601083062699e-06, "epoch": 1.0028167210805967, "percentage": 20.06, "elapsed_time": "0:41:55", "remaining_time": "2:47:07", "throughput": 19598.72, "total_tokens": 49304960}
|
|
{"current_steps": 15670, "total_steps": 78105, "loss": 0.4501, "lr": 4.847408956016039e-06, "epoch": 1.0031368030215735, "percentage": 20.06, "elapsed_time": "0:41:56", "remaining_time": "2:47:06", "throughput": 19599.81, "total_tokens": 49321152}
|
|
{"current_steps": 15675, "total_steps": 78105, "loss": 0.2486, "lr": 4.847216711752003e-06, "epoch": 1.0034568849625505, "percentage": 20.07, "elapsed_time": "0:41:57", "remaining_time": "2:47:05", "throughput": 19600.92, "total_tokens": 49337984}
|
|
{"current_steps": 15680, "total_steps": 78105, "loss": 0.2594, "lr": 4.847024350280191e-06, "epoch": 1.0037769669035272, "percentage": 20.08, "elapsed_time": "0:41:57", "remaining_time": "2:47:03", "throughput": 19601.73, "total_tokens": 49352768}
|
|
{"current_steps": 15685, "total_steps": 78105, "loss": 0.2567, "lr": 4.846831871610207e-06, "epoch": 1.0040970488445042, "percentage": 20.08, "elapsed_time": "0:41:58", "remaining_time": "2:47:02", "throughput": 19602.79, "total_tokens": 49368832}
|
|
{"current_steps": 15690, "total_steps": 78105, "loss": 0.3306, "lr": 4.846639275751664e-06, "epoch": 1.004417130785481, "percentage": 20.09, "elapsed_time": "0:41:59", "remaining_time": "2:47:01", "throughput": 19603.77, "total_tokens": 49384640}
|
|
{"current_steps": 15695, "total_steps": 78105, "loss": 0.3599, "lr": 4.84644656271418e-06, "epoch": 1.004737212726458, "percentage": 20.09, "elapsed_time": "0:41:59", "remaining_time": "2:46:59", "throughput": 19604.68, "total_tokens": 49399936}
|
|
{"current_steps": 15700, "total_steps": 78105, "loss": 0.2855, "lr": 4.846253732507376e-06, "epoch": 1.0050572946674348, "percentage": 20.1, "elapsed_time": "0:42:00", "remaining_time": "2:46:58", "throughput": 19605.61, "total_tokens": 49415680}
|
|
{"current_steps": 15705, "total_steps": 78105, "loss": 0.3478, "lr": 4.846060785140882e-06, "epoch": 1.0053773766084118, "percentage": 20.11, "elapsed_time": "0:42:01", "remaining_time": "2:46:57", "throughput": 19606.44, "total_tokens": 49430592}
|
|
{"current_steps": 15710, "total_steps": 78105, "loss": 0.3396, "lr": 4.845867720624332e-06, "epoch": 1.0056974585493887, "percentage": 20.11, "elapsed_time": "0:42:01", "remaining_time": "2:46:55", "throughput": 19607.28, "total_tokens": 49445312}
|
|
{"current_steps": 15715, "total_steps": 78105, "loss": 0.3172, "lr": 4.845674538967367e-06, "epoch": 1.0060175404903655, "percentage": 20.12, "elapsed_time": "0:42:02", "remaining_time": "2:46:54", "throughput": 19608.16, "total_tokens": 49460352}
|
|
{"current_steps": 15720, "total_steps": 78105, "loss": 0.2856, "lr": 4.8454812401796355e-06, "epoch": 1.0063376224313425, "percentage": 20.13, "elapsed_time": "0:42:03", "remaining_time": "2:46:53", "throughput": 19609.17, "total_tokens": 49476352}
|
|
{"current_steps": 15725, "total_steps": 78105, "loss": 0.2585, "lr": 4.845287824270787e-06, "epoch": 1.0066577043723193, "percentage": 20.13, "elapsed_time": "0:42:03", "remaining_time": "2:46:51", "throughput": 19610.13, "total_tokens": 49492096}
|
|
{"current_steps": 15730, "total_steps": 78105, "loss": 0.2834, "lr": 4.8450942912504805e-06, "epoch": 1.0069777863132963, "percentage": 20.14, "elapsed_time": "0:42:04", "remaining_time": "2:46:50", "throughput": 19610.97, "total_tokens": 49506944}
|
|
{"current_steps": 15735, "total_steps": 78105, "loss": 0.3926, "lr": 4.844900641128381e-06, "epoch": 1.007297868254273, "percentage": 20.15, "elapsed_time": "0:42:05", "remaining_time": "2:46:49", "throughput": 19612.14, "total_tokens": 49523904}
|
|
{"current_steps": 15740, "total_steps": 78105, "loss": 0.4609, "lr": 4.844706873914156e-06, "epoch": 1.00761795019525, "percentage": 20.15, "elapsed_time": "0:42:05", "remaining_time": "2:46:47", "throughput": 19612.92, "total_tokens": 49538624}
|
|
{"current_steps": 15745, "total_steps": 78105, "loss": 0.4485, "lr": 4.844512989617484e-06, "epoch": 1.0079380321362268, "percentage": 20.16, "elapsed_time": "0:42:06", "remaining_time": "2:46:46", "throughput": 19613.93, "total_tokens": 49554816}
|
|
{"current_steps": 15750, "total_steps": 78105, "loss": 0.2031, "lr": 4.844318988248045e-06, "epoch": 1.0082581140772038, "percentage": 20.17, "elapsed_time": "0:42:07", "remaining_time": "2:46:45", "throughput": 19614.92, "total_tokens": 49570496}
|
|
{"current_steps": 15755, "total_steps": 78105, "loss": 0.5065, "lr": 4.844124869815528e-06, "epoch": 1.0085781960181806, "percentage": 20.17, "elapsed_time": "0:42:07", "remaining_time": "2:46:43", "throughput": 19615.94, "total_tokens": 49586432}
|
|
{"current_steps": 15760, "total_steps": 78105, "loss": 0.3074, "lr": 4.843930634329623e-06, "epoch": 1.0088982779591575, "percentage": 20.18, "elapsed_time": "0:42:08", "remaining_time": "2:46:42", "throughput": 19617.07, "total_tokens": 49603264}
|
|
{"current_steps": 15765, "total_steps": 78105, "loss": 0.2569, "lr": 4.8437362818000325e-06, "epoch": 1.0092183599001345, "percentage": 20.18, "elapsed_time": "0:42:09", "remaining_time": "2:46:41", "throughput": 19617.96, "total_tokens": 49619008}
|
|
{"current_steps": 15770, "total_steps": 78105, "loss": 0.3842, "lr": 4.843541812236461e-06, "epoch": 1.0095384418411113, "percentage": 20.19, "elapsed_time": "0:42:09", "remaining_time": "2:46:40", "throughput": 19619.05, "total_tokens": 49635456}
|
|
{"current_steps": 15775, "total_steps": 78105, "loss": 0.379, "lr": 4.843347225648617e-06, "epoch": 1.0098585237820883, "percentage": 20.2, "elapsed_time": "0:42:10", "remaining_time": "2:46:39", "throughput": 19620.11, "total_tokens": 49651776}
|
|
{"current_steps": 15780, "total_steps": 78105, "loss": 0.3251, "lr": 4.843152522046219e-06, "epoch": 1.010178605723065, "percentage": 20.2, "elapsed_time": "0:42:11", "remaining_time": "2:46:37", "throughput": 19621.07, "total_tokens": 49667264}
|
|
{"current_steps": 15785, "total_steps": 78105, "loss": 0.2912, "lr": 4.84295770143899e-06, "epoch": 1.010498687664042, "percentage": 20.21, "elapsed_time": "0:42:12", "remaining_time": "2:46:36", "throughput": 19622.04, "total_tokens": 49683392}
|
|
{"current_steps": 15790, "total_steps": 78105, "loss": 0.2347, "lr": 4.842762763836656e-06, "epoch": 1.0108187696050188, "percentage": 20.22, "elapsed_time": "0:42:12", "remaining_time": "2:46:35", "throughput": 19622.99, "total_tokens": 49698560}
|
|
{"current_steps": 15795, "total_steps": 78105, "loss": 0.4577, "lr": 4.842567709248953e-06, "epoch": 1.0111388515459958, "percentage": 20.22, "elapsed_time": "0:42:13", "remaining_time": "2:46:33", "throughput": 19623.91, "total_tokens": 49713984}
|
|
{"current_steps": 15800, "total_steps": 78105, "loss": 0.2336, "lr": 4.842372537685621e-06, "epoch": 1.0114589334869726, "percentage": 20.23, "elapsed_time": "0:42:14", "remaining_time": "2:46:32", "throughput": 19624.89, "total_tokens": 49729920}
|
|
{"current_steps": 15805, "total_steps": 78105, "loss": 0.2783, "lr": 4.842177249156405e-06, "epoch": 1.0117790154279496, "percentage": 20.24, "elapsed_time": "0:42:14", "remaining_time": "2:46:31", "throughput": 19626.12, "total_tokens": 49747008}
|
|
{"current_steps": 15810, "total_steps": 78105, "loss": 0.3888, "lr": 4.8419818436710575e-06, "epoch": 1.0120990973689263, "percentage": 20.24, "elapsed_time": "0:42:15", "remaining_time": "2:46:30", "throughput": 19627.29, "total_tokens": 49763584}
|
|
{"current_steps": 15815, "total_steps": 78105, "loss": 0.4723, "lr": 4.841786321239336e-06, "epoch": 1.0124191793099033, "percentage": 20.25, "elapsed_time": "0:42:16", "remaining_time": "2:46:28", "throughput": 19628.12, "total_tokens": 49778368}
|
|
{"current_steps": 15820, "total_steps": 78105, "loss": 0.325, "lr": 4.841590681871002e-06, "epoch": 1.0127392612508803, "percentage": 20.25, "elapsed_time": "0:42:16", "remaining_time": "2:46:27", "throughput": 19629.12, "total_tokens": 49794368}
|
|
{"current_steps": 15825, "total_steps": 78105, "loss": 0.3768, "lr": 4.8413949255758274e-06, "epoch": 1.013059343191857, "percentage": 20.26, "elapsed_time": "0:42:17", "remaining_time": "2:46:26", "throughput": 19630.17, "total_tokens": 49810688}
|
|
{"current_steps": 15830, "total_steps": 78105, "loss": 0.4759, "lr": 4.841199052363586e-06, "epoch": 1.013379425132834, "percentage": 20.27, "elapsed_time": "0:42:18", "remaining_time": "2:46:24", "throughput": 19631.02, "total_tokens": 49825920}
|
|
{"current_steps": 15835, "total_steps": 78105, "loss": 0.5284, "lr": 4.841003062244059e-06, "epoch": 1.0136995070738108, "percentage": 20.27, "elapsed_time": "0:42:18", "remaining_time": "2:46:23", "throughput": 19632.02, "total_tokens": 49841856}
|
|
{"current_steps": 15840, "total_steps": 78105, "loss": 0.3258, "lr": 4.840806955227033e-06, "epoch": 1.0140195890147878, "percentage": 20.28, "elapsed_time": "0:42:19", "remaining_time": "2:46:22", "throughput": 19632.94, "total_tokens": 49857088}
|
|
{"current_steps": 15845, "total_steps": 78105, "loss": 0.3578, "lr": 4.8406107313223e-06, "epoch": 1.0143396709557646, "percentage": 20.29, "elapsed_time": "0:42:20", "remaining_time": "2:46:21", "throughput": 19633.87, "total_tokens": 49872832}
|
|
{"current_steps": 15850, "total_steps": 78105, "loss": 0.3216, "lr": 4.840414390539659e-06, "epoch": 1.0146597528967416, "percentage": 20.29, "elapsed_time": "0:42:20", "remaining_time": "2:46:19", "throughput": 19634.68, "total_tokens": 49887936}
|
|
{"current_steps": 15855, "total_steps": 78105, "loss": 0.2599, "lr": 4.840217932888915e-06, "epoch": 1.0149798348377184, "percentage": 20.3, "elapsed_time": "0:42:21", "remaining_time": "2:46:18", "throughput": 19635.44, "total_tokens": 49902656}
|
|
{"current_steps": 15860, "total_steps": 78105, "loss": 0.3158, "lr": 4.840021358379876e-06, "epoch": 1.0152999167786954, "percentage": 20.31, "elapsed_time": "0:42:22", "remaining_time": "2:46:17", "throughput": 19636.4, "total_tokens": 49918464}
|
|
{"current_steps": 15865, "total_steps": 78105, "loss": 0.2552, "lr": 4.83982466702236e-06, "epoch": 1.0156199987196723, "percentage": 20.31, "elapsed_time": "0:42:22", "remaining_time": "2:46:15", "throughput": 19637.23, "total_tokens": 49933696}
|
|
{"current_steps": 15870, "total_steps": 78105, "loss": 0.3292, "lr": 4.839627858826186e-06, "epoch": 1.0159400806606491, "percentage": 20.32, "elapsed_time": "0:42:23", "remaining_time": "2:46:14", "throughput": 19638.22, "total_tokens": 49949760}
|
|
{"current_steps": 15875, "total_steps": 78105, "loss": 0.3322, "lr": 4.839430933801185e-06, "epoch": 1.016260162601626, "percentage": 20.33, "elapsed_time": "0:42:24", "remaining_time": "2:46:13", "throughput": 19639.24, "total_tokens": 49966080}
|
|
{"current_steps": 15880, "total_steps": 78105, "loss": 0.2365, "lr": 4.839233891957188e-06, "epoch": 1.0165802445426029, "percentage": 20.33, "elapsed_time": "0:42:24", "remaining_time": "2:46:11", "throughput": 19640.0, "total_tokens": 49980928}
|
|
{"current_steps": 15885, "total_steps": 78105, "loss": 0.3634, "lr": 4.839036733304036e-06, "epoch": 1.0169003264835799, "percentage": 20.34, "elapsed_time": "0:42:25", "remaining_time": "2:46:10", "throughput": 19640.87, "total_tokens": 49996160}
|
|
{"current_steps": 15890, "total_steps": 78105, "loss": 0.2996, "lr": 4.838839457851573e-06, "epoch": 1.0172204084245566, "percentage": 20.34, "elapsed_time": "0:42:26", "remaining_time": "2:46:09", "throughput": 19641.66, "total_tokens": 50011008}
|
|
{"current_steps": 15895, "total_steps": 78105, "loss": 0.2945, "lr": 4.83864206560965e-06, "epoch": 1.0175404903655336, "percentage": 20.35, "elapsed_time": "0:42:26", "remaining_time": "2:46:07", "throughput": 19642.5, "total_tokens": 50026304}
|
|
{"current_steps": 15900, "total_steps": 78105, "loss": 0.2465, "lr": 4.838444556588123e-06, "epoch": 1.0178605723065104, "percentage": 20.36, "elapsed_time": "0:42:27", "remaining_time": "2:46:06", "throughput": 19643.46, "total_tokens": 50041856}
|
|
{"current_steps": 15905, "total_steps": 78105, "loss": 0.4178, "lr": 4.838246930796856e-06, "epoch": 1.0181806542474874, "percentage": 20.36, "elapsed_time": "0:42:28", "remaining_time": "2:46:05", "throughput": 19644.28, "total_tokens": 50057152}
|
|
{"current_steps": 15910, "total_steps": 78105, "loss": 0.2823, "lr": 4.838049188245717e-06, "epoch": 1.0185007361884642, "percentage": 20.37, "elapsed_time": "0:42:28", "remaining_time": "2:46:04", "throughput": 19645.64, "total_tokens": 50075392}
|
|
{"current_steps": 15915, "total_steps": 78105, "loss": 0.3396, "lr": 4.8378513289445785e-06, "epoch": 1.0188208181294411, "percentage": 20.38, "elapsed_time": "0:42:29", "remaining_time": "2:46:02", "throughput": 19646.53, "total_tokens": 50090816}
|
|
{"current_steps": 15920, "total_steps": 78105, "loss": 0.2388, "lr": 4.837653352903324e-06, "epoch": 1.0191409000704181, "percentage": 20.38, "elapsed_time": "0:42:30", "remaining_time": "2:46:01", "throughput": 19647.37, "total_tokens": 50106112}
|
|
{"current_steps": 15925, "total_steps": 78105, "loss": 0.4008, "lr": 4.837455260131836e-06, "epoch": 1.019460982011395, "percentage": 20.39, "elapsed_time": "0:42:30", "remaining_time": "2:46:00", "throughput": 19648.3, "total_tokens": 50121664}
|
|
{"current_steps": 15930, "total_steps": 78105, "loss": 0.3061, "lr": 4.837257050640009e-06, "epoch": 1.019781063952372, "percentage": 20.4, "elapsed_time": "0:42:31", "remaining_time": "2:45:59", "throughput": 19649.34, "total_tokens": 50137856}
|
|
{"current_steps": 15935, "total_steps": 78105, "loss": 0.295, "lr": 4.837058724437738e-06, "epoch": 1.0201011458933487, "percentage": 20.4, "elapsed_time": "0:42:32", "remaining_time": "2:45:57", "throughput": 19650.32, "total_tokens": 50153984}
|
|
{"current_steps": 15940, "total_steps": 78105, "loss": 0.3458, "lr": 4.836860281534928e-06, "epoch": 1.0204212278343257, "percentage": 20.41, "elapsed_time": "0:42:33", "remaining_time": "2:45:56", "throughput": 19651.28, "total_tokens": 50170112}
|
|
{"current_steps": 15945, "total_steps": 78105, "loss": 0.2094, "lr": 4.836661721941488e-06, "epoch": 1.0207413097753024, "percentage": 20.41, "elapsed_time": "0:42:33", "remaining_time": "2:45:55", "throughput": 19652.12, "total_tokens": 50185216}
|
|
{"current_steps": 15950, "total_steps": 78105, "loss": 0.3635, "lr": 4.836463045667333e-06, "epoch": 1.0210613917162794, "percentage": 20.42, "elapsed_time": "0:42:34", "remaining_time": "2:45:54", "throughput": 19653.39, "total_tokens": 50203200}
|
|
{"current_steps": 15955, "total_steps": 78105, "loss": 0.3291, "lr": 4.836264252722384e-06, "epoch": 1.0213814736572562, "percentage": 20.43, "elapsed_time": "0:42:35", "remaining_time": "2:45:52", "throughput": 19654.17, "total_tokens": 50218304}
|
|
{"current_steps": 15960, "total_steps": 78105, "loss": 0.3045, "lr": 4.8360653431165665e-06, "epoch": 1.0217015555982332, "percentage": 20.43, "elapsed_time": "0:42:35", "remaining_time": "2:45:51", "throughput": 19655.08, "total_tokens": 50234048}
|
|
{"current_steps": 15965, "total_steps": 78105, "loss": 0.289, "lr": 4.835866316859814e-06, "epoch": 1.02202163753921, "percentage": 20.44, "elapsed_time": "0:42:36", "remaining_time": "2:45:50", "throughput": 19656.0, "total_tokens": 50250112}
|
|
{"current_steps": 15970, "total_steps": 78105, "loss": 0.1814, "lr": 4.835667173962065e-06, "epoch": 1.022341719480187, "percentage": 20.45, "elapsed_time": "0:42:37", "remaining_time": "2:45:49", "throughput": 19657.0, "total_tokens": 50266752}
|
|
{"current_steps": 15975, "total_steps": 78105, "loss": 0.3621, "lr": 4.835467914433262e-06, "epoch": 1.022661801421164, "percentage": 20.45, "elapsed_time": "0:42:37", "remaining_time": "2:45:48", "throughput": 19657.79, "total_tokens": 50281792}
|
|
{"current_steps": 15980, "total_steps": 78105, "loss": 0.3635, "lr": 4.835268538283359e-06, "epoch": 1.0229818833621407, "percentage": 20.46, "elapsed_time": "0:42:38", "remaining_time": "2:45:46", "throughput": 19658.61, "total_tokens": 50296896}
|
|
{"current_steps": 15985, "total_steps": 78105, "loss": 0.2286, "lr": 4.835069045522307e-06, "epoch": 1.0233019653031177, "percentage": 20.47, "elapsed_time": "0:42:39", "remaining_time": "2:45:45", "throughput": 19659.63, "total_tokens": 50313280}
|
|
{"current_steps": 15990, "total_steps": 78105, "loss": 0.3084, "lr": 4.834869436160071e-06, "epoch": 1.0236220472440944, "percentage": 20.47, "elapsed_time": "0:42:39", "remaining_time": "2:45:44", "throughput": 19660.52, "total_tokens": 50329088}
|
|
{"current_steps": 15995, "total_steps": 78105, "loss": 0.4138, "lr": 4.834669710206617e-06, "epoch": 1.0239421291850714, "percentage": 20.48, "elapsed_time": "0:42:40", "remaining_time": "2:45:43", "throughput": 19661.54, "total_tokens": 50345472}
|
|
{"current_steps": 16000, "total_steps": 78105, "loss": 0.2707, "lr": 4.834469867671918e-06, "epoch": 1.0242622111260482, "percentage": 20.49, "elapsed_time": "0:42:41", "remaining_time": "2:45:41", "throughput": 19662.3, "total_tokens": 50360128}
|
|
{"current_steps": 16005, "total_steps": 78105, "loss": 0.3587, "lr": 4.8342699085659545e-06, "epoch": 1.0245822930670252, "percentage": 20.49, "elapsed_time": "0:42:41", "remaining_time": "2:45:40", "throughput": 19663.14, "total_tokens": 50375424}
|
|
{"current_steps": 16010, "total_steps": 78105, "loss": 0.3576, "lr": 4.83406983289871e-06, "epoch": 1.024902375008002, "percentage": 20.5, "elapsed_time": "0:42:42", "remaining_time": "2:45:39", "throughput": 19664.03, "total_tokens": 50390656}
|
|
{"current_steps": 16015, "total_steps": 78105, "loss": 0.268, "lr": 4.8338696406801755e-06, "epoch": 1.025222456948979, "percentage": 20.5, "elapsed_time": "0:42:43", "remaining_time": "2:45:37", "throughput": 19665.08, "total_tokens": 50407296}
|
|
{"current_steps": 16020, "total_steps": 78105, "loss": 0.3322, "lr": 4.8336693319203485e-06, "epoch": 1.0255425388899557, "percentage": 20.51, "elapsed_time": "0:42:43", "remaining_time": "2:45:36", "throughput": 19665.98, "total_tokens": 50422912}
|
|
{"current_steps": 16025, "total_steps": 78105, "loss": 0.3111, "lr": 4.83346890662923e-06, "epoch": 1.0258626208309327, "percentage": 20.52, "elapsed_time": "0:42:44", "remaining_time": "2:45:35", "throughput": 19666.84, "total_tokens": 50438208}
|
|
{"current_steps": 16030, "total_steps": 78105, "loss": 0.3233, "lr": 4.833268364816829e-06, "epoch": 1.0261827027719097, "percentage": 20.52, "elapsed_time": "0:42:45", "remaining_time": "2:45:33", "throughput": 19667.73, "total_tokens": 50453760}
|
|
{"current_steps": 16035, "total_steps": 78105, "loss": 0.296, "lr": 4.833067706493159e-06, "epoch": 1.0265027847128865, "percentage": 20.53, "elapsed_time": "0:42:45", "remaining_time": "2:45:32", "throughput": 19668.66, "total_tokens": 50469504}
|
|
{"current_steps": 16040, "total_steps": 78105, "loss": 0.31, "lr": 4.832866931668242e-06, "epoch": 1.0268228666538635, "percentage": 20.54, "elapsed_time": "0:42:46", "remaining_time": "2:45:31", "throughput": 19669.64, "total_tokens": 50485568}
|
|
{"current_steps": 16045, "total_steps": 78105, "loss": 0.2915, "lr": 4.8326660403521005e-06, "epoch": 1.0271429485948402, "percentage": 20.54, "elapsed_time": "0:42:47", "remaining_time": "2:45:30", "throughput": 19670.42, "total_tokens": 50500480}
|
|
{"current_steps": 16050, "total_steps": 78105, "loss": 0.3051, "lr": 4.832465032554768e-06, "epoch": 1.0274630305358172, "percentage": 20.55, "elapsed_time": "0:42:48", "remaining_time": "2:45:28", "throughput": 19671.46, "total_tokens": 50516352}
|
|
{"current_steps": 16055, "total_steps": 78105, "loss": 0.3903, "lr": 4.832263908286281e-06, "epoch": 1.027783112476794, "percentage": 20.56, "elapsed_time": "0:42:48", "remaining_time": "2:45:27", "throughput": 19672.3, "total_tokens": 50531456}
|
|
{"current_steps": 16060, "total_steps": 78105, "loss": 0.3955, "lr": 4.832062667556682e-06, "epoch": 1.028103194417771, "percentage": 20.56, "elapsed_time": "0:42:49", "remaining_time": "2:45:26", "throughput": 19673.13, "total_tokens": 50546240}
|
|
{"current_steps": 16065, "total_steps": 78105, "loss": 0.3919, "lr": 4.8318613103760215e-06, "epoch": 1.0284232763587478, "percentage": 20.57, "elapsed_time": "0:42:49", "remaining_time": "2:45:24", "throughput": 19673.9, "total_tokens": 50560768}
|
|
{"current_steps": 16070, "total_steps": 78105, "loss": 0.4741, "lr": 4.831659836754353e-06, "epoch": 1.0287433582997247, "percentage": 20.57, "elapsed_time": "0:42:50", "remaining_time": "2:45:23", "throughput": 19675.22, "total_tokens": 50578816}
|
|
{"current_steps": 16075, "total_steps": 78105, "loss": 0.389, "lr": 4.831458246701738e-06, "epoch": 1.0290634402407015, "percentage": 20.58, "elapsed_time": "0:42:51", "remaining_time": "2:45:22", "throughput": 19676.11, "total_tokens": 50594048}
|
|
{"current_steps": 16080, "total_steps": 78105, "loss": 0.2532, "lr": 4.831256540228242e-06, "epoch": 1.0293835221816785, "percentage": 20.59, "elapsed_time": "0:42:52", "remaining_time": "2:45:20", "throughput": 19676.95, "total_tokens": 50609344}
|
|
{"current_steps": 16085, "total_steps": 78105, "loss": 0.3798, "lr": 4.831054717343936e-06, "epoch": 1.0297036041226555, "percentage": 20.59, "elapsed_time": "0:42:52", "remaining_time": "2:45:19", "throughput": 19677.88, "total_tokens": 50624832}
|
|
{"current_steps": 16090, "total_steps": 78105, "loss": 0.2135, "lr": 4.830852778058901e-06, "epoch": 1.0300236860636323, "percentage": 20.6, "elapsed_time": "0:42:53", "remaining_time": "2:45:18", "throughput": 19678.84, "total_tokens": 50641152}
|
|
{"current_steps": 16095, "total_steps": 78105, "loss": 0.3473, "lr": 4.830650722383219e-06, "epoch": 1.0303437680046093, "percentage": 20.61, "elapsed_time": "0:42:54", "remaining_time": "2:45:17", "throughput": 19679.87, "total_tokens": 50657472}
|
|
{"current_steps": 16100, "total_steps": 78105, "loss": 0.3126, "lr": 4.83044855032698e-06, "epoch": 1.030663849945586, "percentage": 20.61, "elapsed_time": "0:42:54", "remaining_time": "2:45:16", "throughput": 19680.79, "total_tokens": 50673536}
|
|
{"current_steps": 16105, "total_steps": 78105, "loss": 0.282, "lr": 4.830246261900279e-06, "epoch": 1.030983931886563, "percentage": 20.62, "elapsed_time": "0:42:55", "remaining_time": "2:45:14", "throughput": 19681.69, "total_tokens": 50688832}
|
|
{"current_steps": 16110, "total_steps": 78105, "loss": 0.2764, "lr": 4.830043857113217e-06, "epoch": 1.0313040138275398, "percentage": 20.63, "elapsed_time": "0:42:56", "remaining_time": "2:45:13", "throughput": 19682.84, "total_tokens": 50705728}
|
|
{"current_steps": 16115, "total_steps": 78105, "loss": 0.3087, "lr": 4.829841335975902e-06, "epoch": 1.0316240957685168, "percentage": 20.63, "elapsed_time": "0:42:56", "remaining_time": "2:45:12", "throughput": 19683.88, "total_tokens": 50722112}
|
|
{"current_steps": 16120, "total_steps": 78105, "loss": 0.326, "lr": 4.8296386984984466e-06, "epoch": 1.0319441777094935, "percentage": 20.64, "elapsed_time": "0:42:57", "remaining_time": "2:45:11", "throughput": 19684.73, "total_tokens": 50737408}
|
|
{"current_steps": 16125, "total_steps": 78105, "loss": 0.277, "lr": 4.829435944690969e-06, "epoch": 1.0322642596504705, "percentage": 20.65, "elapsed_time": "0:42:58", "remaining_time": "2:45:09", "throughput": 19685.63, "total_tokens": 50753152}
|
|
{"current_steps": 16130, "total_steps": 78105, "loss": 0.4376, "lr": 4.829233074563594e-06, "epoch": 1.0325843415914475, "percentage": 20.65, "elapsed_time": "0:42:58", "remaining_time": "2:45:08", "throughput": 19686.52, "total_tokens": 50768704}
|
|
{"current_steps": 16135, "total_steps": 78105, "loss": 0.2066, "lr": 4.8290300881264516e-06, "epoch": 1.0329044235324243, "percentage": 20.66, "elapsed_time": "0:42:59", "remaining_time": "2:45:07", "throughput": 19687.39, "total_tokens": 50783936}
|
|
{"current_steps": 16140, "total_steps": 78105, "loss": 0.3182, "lr": 4.8288269853896786e-06, "epoch": 1.0332245054734013, "percentage": 20.66, "elapsed_time": "0:43:00", "remaining_time": "2:45:05", "throughput": 19688.29, "total_tokens": 50799488}
|
|
{"current_steps": 16145, "total_steps": 78105, "loss": 0.4688, "lr": 4.828623766363416e-06, "epoch": 1.033544587414378, "percentage": 20.67, "elapsed_time": "0:43:00", "remaining_time": "2:45:04", "throughput": 19689.19, "total_tokens": 50814720}
|
|
{"current_steps": 16150, "total_steps": 78105, "loss": 0.2703, "lr": 4.828420431057812e-06, "epoch": 1.033864669355355, "percentage": 20.68, "elapsed_time": "0:43:01", "remaining_time": "2:45:03", "throughput": 19689.94, "total_tokens": 50829632}
|
|
{"current_steps": 16155, "total_steps": 78105, "loss": 0.2409, "lr": 4.828216979483019e-06, "epoch": 1.0341847512963318, "percentage": 20.68, "elapsed_time": "0:43:02", "remaining_time": "2:45:01", "throughput": 19690.88, "total_tokens": 50845376}
|
|
{"current_steps": 16160, "total_steps": 78105, "loss": 0.411, "lr": 4.828013411649198e-06, "epoch": 1.0345048332373088, "percentage": 20.69, "elapsed_time": "0:43:02", "remaining_time": "2:45:00", "throughput": 19691.91, "total_tokens": 50861888}
|
|
{"current_steps": 16165, "total_steps": 78105, "loss": 0.3307, "lr": 4.827809727566514e-06, "epoch": 1.0348249151782856, "percentage": 20.7, "elapsed_time": "0:43:03", "remaining_time": "2:44:59", "throughput": 19692.89, "total_tokens": 50877952}
|
|
{"current_steps": 16170, "total_steps": 78105, "loss": 0.3777, "lr": 4.827605927245137e-06, "epoch": 1.0351449971192626, "percentage": 20.7, "elapsed_time": "0:43:04", "remaining_time": "2:44:58", "throughput": 19693.62, "total_tokens": 50892608}
|
|
{"current_steps": 16175, "total_steps": 78105, "loss": 0.3058, "lr": 4.827402010695244e-06, "epoch": 1.0354650790602393, "percentage": 20.71, "elapsed_time": "0:43:04", "remaining_time": "2:44:57", "throughput": 19694.78, "total_tokens": 50909568}
|
|
{"current_steps": 16180, "total_steps": 78105, "loss": 0.326, "lr": 4.827197977927017e-06, "epoch": 1.0357851610012163, "percentage": 20.72, "elapsed_time": "0:43:05", "remaining_time": "2:44:55", "throughput": 19695.86, "total_tokens": 50926144}
|
|
{"current_steps": 16185, "total_steps": 78105, "loss": 0.2551, "lr": 4.826993828950646e-06, "epoch": 1.0361052429421933, "percentage": 20.72, "elapsed_time": "0:43:06", "remaining_time": "2:44:54", "throughput": 19696.81, "total_tokens": 50942080}
|
|
{"current_steps": 16190, "total_steps": 78105, "loss": 0.2769, "lr": 4.826789563776323e-06, "epoch": 1.03642532488317, "percentage": 20.73, "elapsed_time": "0:43:07", "remaining_time": "2:44:53", "throughput": 19697.83, "total_tokens": 50958592}
|
|
{"current_steps": 16195, "total_steps": 78105, "loss": 0.2245, "lr": 4.826585182414249e-06, "epoch": 1.036745406824147, "percentage": 20.73, "elapsed_time": "0:43:07", "remaining_time": "2:44:52", "throughput": 19698.83, "total_tokens": 50974464}
|
|
{"current_steps": 16200, "total_steps": 78105, "loss": 0.4052, "lr": 4.826380684874631e-06, "epoch": 1.0370654887651238, "percentage": 20.74, "elapsed_time": "0:43:08", "remaining_time": "2:44:50", "throughput": 19699.76, "total_tokens": 50990016}
|
|
{"current_steps": 16205, "total_steps": 78105, "loss": 0.4216, "lr": 4.826176071167678e-06, "epoch": 1.0373855707061008, "percentage": 20.75, "elapsed_time": "0:43:09", "remaining_time": "2:44:49", "throughput": 19700.65, "total_tokens": 51005376}
|
|
{"current_steps": 16210, "total_steps": 78105, "loss": 0.4809, "lr": 4.825971341303609e-06, "epoch": 1.0377056526470776, "percentage": 20.75, "elapsed_time": "0:43:09", "remaining_time": "2:44:48", "throughput": 19701.47, "total_tokens": 51020416}
|
|
{"current_steps": 16215, "total_steps": 78105, "loss": 0.3424, "lr": 4.825766495292647e-06, "epoch": 1.0380257345880546, "percentage": 20.76, "elapsed_time": "0:43:10", "remaining_time": "2:44:46", "throughput": 19702.37, "total_tokens": 51036096}
|
|
{"current_steps": 16220, "total_steps": 78105, "loss": 0.3262, "lr": 4.825561533145021e-06, "epoch": 1.0383458165290314, "percentage": 20.77, "elapsed_time": "0:43:11", "remaining_time": "2:44:45", "throughput": 19703.2, "total_tokens": 51051328}
|
|
{"current_steps": 16225, "total_steps": 78105, "loss": 0.4197, "lr": 4.825356454870964e-06, "epoch": 1.0386658984700083, "percentage": 20.77, "elapsed_time": "0:43:11", "remaining_time": "2:44:44", "throughput": 19704.21, "total_tokens": 51067776}
|
|
{"current_steps": 16230, "total_steps": 78105, "loss": 0.3289, "lr": 4.825151260480719e-06, "epoch": 1.038985980410985, "percentage": 20.78, "elapsed_time": "0:43:12", "remaining_time": "2:44:43", "throughput": 19705.36, "total_tokens": 51084736}
|
|
{"current_steps": 16235, "total_steps": 78105, "loss": 0.2553, "lr": 4.824945949984532e-06, "epoch": 1.039306062351962, "percentage": 20.79, "elapsed_time": "0:43:13", "remaining_time": "2:44:42", "throughput": 19706.28, "total_tokens": 51100352}
|
|
{"current_steps": 16240, "total_steps": 78105, "loss": 0.2415, "lr": 4.8247405233926534e-06, "epoch": 1.039626144292939, "percentage": 20.79, "elapsed_time": "0:43:13", "remaining_time": "2:44:40", "throughput": 19707.21, "total_tokens": 51116352}
|
|
{"current_steps": 16245, "total_steps": 78105, "loss": 0.3407, "lr": 4.824534980715343e-06, "epoch": 1.0399462262339159, "percentage": 20.8, "elapsed_time": "0:43:14", "remaining_time": "2:44:39", "throughput": 19708.23, "total_tokens": 51132736}
|
|
{"current_steps": 16250, "total_steps": 78105, "loss": 0.3125, "lr": 4.824329321962863e-06, "epoch": 1.0402663081748929, "percentage": 20.81, "elapsed_time": "0:43:15", "remaining_time": "2:44:38", "throughput": 19709.35, "total_tokens": 51149696}
|
|
{"current_steps": 16255, "total_steps": 78105, "loss": 0.2986, "lr": 4.824123547145484e-06, "epoch": 1.0405863901158696, "percentage": 20.81, "elapsed_time": "0:43:15", "remaining_time": "2:44:37", "throughput": 19710.26, "total_tokens": 51165248}
|
|
{"current_steps": 16260, "total_steps": 78105, "loss": 0.3958, "lr": 4.82391765627348e-06, "epoch": 1.0409064720568466, "percentage": 20.82, "elapsed_time": "0:43:16", "remaining_time": "2:44:36", "throughput": 19711.28, "total_tokens": 51181632}
|
|
{"current_steps": 16265, "total_steps": 78105, "loss": 0.3475, "lr": 4.823711649357135e-06, "epoch": 1.0412265539978234, "percentage": 20.82, "elapsed_time": "0:43:17", "remaining_time": "2:44:34", "throughput": 19712.12, "total_tokens": 51196864}
|
|
{"current_steps": 16270, "total_steps": 78105, "loss": 0.2704, "lr": 4.823505526406732e-06, "epoch": 1.0415466359388004, "percentage": 20.83, "elapsed_time": "0:43:17", "remaining_time": "2:44:33", "throughput": 19713.12, "total_tokens": 51213184}
|
|
{"current_steps": 16275, "total_steps": 78105, "loss": 0.2751, "lr": 4.823299287432567e-06, "epoch": 1.0418667178797771, "percentage": 20.84, "elapsed_time": "0:43:18", "remaining_time": "2:44:32", "throughput": 19714.13, "total_tokens": 51229504}
|
|
{"current_steps": 16280, "total_steps": 78105, "loss": 0.264, "lr": 4.823092932444937e-06, "epoch": 1.0421867998207541, "percentage": 20.84, "elapsed_time": "0:43:19", "remaining_time": "2:44:31", "throughput": 19714.97, "total_tokens": 51244800}
|
|
{"current_steps": 16285, "total_steps": 78105, "loss": 0.2822, "lr": 4.822886461454146e-06, "epoch": 1.042506881761731, "percentage": 20.85, "elapsed_time": "0:43:19", "remaining_time": "2:44:29", "throughput": 19715.79, "total_tokens": 51260032}
|
|
{"current_steps": 16290, "total_steps": 78105, "loss": 0.3556, "lr": 4.822679874470505e-06, "epoch": 1.0428269637027079, "percentage": 20.86, "elapsed_time": "0:43:20", "remaining_time": "2:44:28", "throughput": 19716.64, "total_tokens": 51275392}
|
|
{"current_steps": 16295, "total_steps": 78105, "loss": 0.3518, "lr": 4.8224731715043295e-06, "epoch": 1.0431470456436849, "percentage": 20.86, "elapsed_time": "0:43:21", "remaining_time": "2:44:27", "throughput": 19717.53, "total_tokens": 51290944}
|
|
{"current_steps": 16300, "total_steps": 78105, "loss": 0.2881, "lr": 4.822266352565941e-06, "epoch": 1.0434671275846616, "percentage": 20.87, "elapsed_time": "0:43:21", "remaining_time": "2:44:25", "throughput": 19718.51, "total_tokens": 51307200}
|
|
{"current_steps": 16305, "total_steps": 78105, "loss": 0.3328, "lr": 4.822059417665667e-06, "epoch": 1.0437872095256386, "percentage": 20.88, "elapsed_time": "0:43:22", "remaining_time": "2:44:24", "throughput": 19719.24, "total_tokens": 51321920}
|
|
{"current_steps": 16310, "total_steps": 78105, "loss": 0.233, "lr": 4.821852366813841e-06, "epoch": 1.0441072914666154, "percentage": 20.88, "elapsed_time": "0:43:23", "remaining_time": "2:44:23", "throughput": 19720.12, "total_tokens": 51337344}
|
|
{"current_steps": 16315, "total_steps": 78105, "loss": 0.2586, "lr": 4.821645200020802e-06, "epoch": 1.0444273734075924, "percentage": 20.89, "elapsed_time": "0:43:23", "remaining_time": "2:44:21", "throughput": 19720.91, "total_tokens": 51352256}
|
|
{"current_steps": 16320, "total_steps": 78105, "loss": 0.3016, "lr": 4.821437917296895e-06, "epoch": 1.0447474553485692, "percentage": 20.89, "elapsed_time": "0:43:24", "remaining_time": "2:44:20", "throughput": 19721.83, "total_tokens": 51367936}
|
|
{"current_steps": 16325, "total_steps": 78105, "loss": 0.4314, "lr": 4.82123051865247e-06, "epoch": 1.0450675372895462, "percentage": 20.9, "elapsed_time": "0:43:25", "remaining_time": "2:44:19", "throughput": 19722.62, "total_tokens": 51382592}
|
|
{"current_steps": 16330, "total_steps": 78105, "loss": 0.4091, "lr": 4.821023004097883e-06, "epoch": 1.045387619230523, "percentage": 20.91, "elapsed_time": "0:43:25", "remaining_time": "2:44:18", "throughput": 19723.63, "total_tokens": 51398528}
|
|
{"current_steps": 16335, "total_steps": 78105, "loss": 0.3303, "lr": 4.820815373643498e-06, "epoch": 1.0457077011715, "percentage": 20.91, "elapsed_time": "0:43:26", "remaining_time": "2:44:16", "throughput": 19724.4, "total_tokens": 51413376}
|
|
{"current_steps": 16340, "total_steps": 78105, "loss": 0.3399, "lr": 4.820607627299682e-06, "epoch": 1.0460277831124767, "percentage": 20.92, "elapsed_time": "0:43:27", "remaining_time": "2:44:15", "throughput": 19725.7, "total_tokens": 51432064}
|
|
{"current_steps": 16345, "total_steps": 78105, "loss": 0.3488, "lr": 4.820399765076808e-06, "epoch": 1.0463478650534537, "percentage": 20.93, "elapsed_time": "0:43:28", "remaining_time": "2:44:14", "throughput": 19726.48, "total_tokens": 51447232}
|
|
{"current_steps": 16350, "total_steps": 78105, "loss": 0.3695, "lr": 4.8201917869852565e-06, "epoch": 1.0466679469944307, "percentage": 20.93, "elapsed_time": "0:43:28", "remaining_time": "2:44:13", "throughput": 19727.4, "total_tokens": 51463168}
|
|
{"current_steps": 16355, "total_steps": 78105, "loss": 0.1727, "lr": 4.819983693035412e-06, "epoch": 1.0469880289354074, "percentage": 20.94, "elapsed_time": "0:43:29", "remaining_time": "2:44:12", "throughput": 19728.75, "total_tokens": 51481216}
|
|
{"current_steps": 16360, "total_steps": 78105, "loss": 0.2678, "lr": 4.819775483237667e-06, "epoch": 1.0473081108763844, "percentage": 20.95, "elapsed_time": "0:43:30", "remaining_time": "2:44:11", "throughput": 19729.78, "total_tokens": 51497856}
|
|
{"current_steps": 16365, "total_steps": 78105, "loss": 0.3309, "lr": 4.819567157602418e-06, "epoch": 1.0476281928173612, "percentage": 20.95, "elapsed_time": "0:43:30", "remaining_time": "2:44:09", "throughput": 19730.63, "total_tokens": 51512896}
|
|
{"current_steps": 16370, "total_steps": 78105, "loss": 0.3148, "lr": 4.819358716140067e-06, "epoch": 1.0479482747583382, "percentage": 20.96, "elapsed_time": "0:43:31", "remaining_time": "2:44:08", "throughput": 19731.54, "total_tokens": 51528448}
|
|
{"current_steps": 16375, "total_steps": 78105, "loss": 0.3235, "lr": 4.819150158861023e-06, "epoch": 1.048268356699315, "percentage": 20.97, "elapsed_time": "0:43:32", "remaining_time": "2:44:07", "throughput": 19732.5, "total_tokens": 51544768}
|
|
{"current_steps": 16380, "total_steps": 78105, "loss": 0.285, "lr": 4.8189414857756985e-06, "epoch": 1.048588438640292, "percentage": 20.97, "elapsed_time": "0:43:32", "remaining_time": "2:44:06", "throughput": 19733.38, "total_tokens": 51560448}
|
|
{"current_steps": 16385, "total_steps": 78105, "loss": 0.3436, "lr": 4.818732696894517e-06, "epoch": 1.0489085205812687, "percentage": 20.98, "elapsed_time": "0:43:33", "remaining_time": "2:44:04", "throughput": 19734.24, "total_tokens": 51575616}
|
|
{"current_steps": 16390, "total_steps": 78105, "loss": 0.2012, "lr": 4.8185237922279014e-06, "epoch": 1.0492286025222457, "percentage": 20.98, "elapsed_time": "0:43:34", "remaining_time": "2:44:03", "throughput": 19735.15, "total_tokens": 51591680}
|
|
{"current_steps": 16395, "total_steps": 78105, "loss": 0.2593, "lr": 4.818314771786284e-06, "epoch": 1.0495486844632227, "percentage": 20.99, "elapsed_time": "0:43:34", "remaining_time": "2:44:02", "throughput": 19736.1, "total_tokens": 51607680}
|
|
{"current_steps": 16400, "total_steps": 78105, "loss": 0.2339, "lr": 4.818105635580104e-06, "epoch": 1.0498687664041995, "percentage": 21.0, "elapsed_time": "0:43:35", "remaining_time": "2:44:01", "throughput": 19736.86, "total_tokens": 51622912}
|
|
{"current_steps": 16405, "total_steps": 78105, "loss": 0.4946, "lr": 4.817896383619802e-06, "epoch": 1.0501888483451765, "percentage": 21.0, "elapsed_time": "0:43:36", "remaining_time": "2:43:59", "throughput": 19737.72, "total_tokens": 51638144}
|
|
{"current_steps": 16410, "total_steps": 78105, "loss": 0.2627, "lr": 4.817687015915829e-06, "epoch": 1.0505089302861532, "percentage": 21.01, "elapsed_time": "0:43:36", "remaining_time": "2:43:58", "throughput": 19738.67, "total_tokens": 51654336}
|
|
{"current_steps": 16415, "total_steps": 78105, "loss": 0.3434, "lr": 4.817477532478638e-06, "epoch": 1.0508290122271302, "percentage": 21.02, "elapsed_time": "0:43:37", "remaining_time": "2:43:57", "throughput": 19739.53, "total_tokens": 51669760}
|
|
{"current_steps": 16420, "total_steps": 78105, "loss": 0.3096, "lr": 4.817267933318691e-06, "epoch": 1.051149094168107, "percentage": 21.02, "elapsed_time": "0:43:38", "remaining_time": "2:43:56", "throughput": 19740.39, "total_tokens": 51685440}
|
|
{"current_steps": 16425, "total_steps": 78105, "loss": 0.3697, "lr": 4.817058218446453e-06, "epoch": 1.051469176109084, "percentage": 21.03, "elapsed_time": "0:43:38", "remaining_time": "2:43:54", "throughput": 19741.21, "total_tokens": 51700544}
|
|
{"current_steps": 16430, "total_steps": 78105, "loss": 0.2282, "lr": 4.816848387872397e-06, "epoch": 1.0517892580500607, "percentage": 21.04, "elapsed_time": "0:43:39", "remaining_time": "2:43:53", "throughput": 19742.07, "total_tokens": 51716096}
|
|
{"current_steps": 16435, "total_steps": 78105, "loss": 0.3054, "lr": 4.8166384416070005e-06, "epoch": 1.0521093399910377, "percentage": 21.04, "elapsed_time": "0:43:40", "remaining_time": "2:43:52", "throughput": 19743.21, "total_tokens": 51733312}
|
|
{"current_steps": 16440, "total_steps": 78105, "loss": 0.3531, "lr": 4.8164283796607466e-06, "epoch": 1.0524294219320145, "percentage": 21.05, "elapsed_time": "0:43:40", "remaining_time": "2:43:51", "throughput": 19744.1, "total_tokens": 51748992}
|
|
{"current_steps": 16445, "total_steps": 78105, "loss": 0.4066, "lr": 4.8162182020441265e-06, "epoch": 1.0527495038729915, "percentage": 21.05, "elapsed_time": "0:43:41", "remaining_time": "2:43:49", "throughput": 19744.93, "total_tokens": 51764096}
|
|
{"current_steps": 16450, "total_steps": 78105, "loss": 0.3169, "lr": 4.816007908767633e-06, "epoch": 1.0530695858139685, "percentage": 21.06, "elapsed_time": "0:43:42", "remaining_time": "2:43:48", "throughput": 19745.8, "total_tokens": 51779584}
|
|
{"current_steps": 16455, "total_steps": 78105, "loss": 0.3345, "lr": 4.815797499841769e-06, "epoch": 1.0533896677549452, "percentage": 21.07, "elapsed_time": "0:43:42", "remaining_time": "2:43:47", "throughput": 19746.55, "total_tokens": 51794368}
|
|
{"current_steps": 16460, "total_steps": 78105, "loss": 0.2701, "lr": 4.81558697527704e-06, "epoch": 1.0537097496959222, "percentage": 21.07, "elapsed_time": "0:43:43", "remaining_time": "2:43:45", "throughput": 19747.36, "total_tokens": 51809664}
|
|
{"current_steps": 16465, "total_steps": 78105, "loss": 0.332, "lr": 4.815376335083959e-06, "epoch": 1.054029831636899, "percentage": 21.08, "elapsed_time": "0:43:44", "remaining_time": "2:43:44", "throughput": 19748.16, "total_tokens": 51824832}
|
|
{"current_steps": 16470, "total_steps": 78105, "loss": 0.5044, "lr": 4.815165579273044e-06, "epoch": 1.054349913577876, "percentage": 21.09, "elapsed_time": "0:43:44", "remaining_time": "2:43:43", "throughput": 19749.24, "total_tokens": 51841408}
|
|
{"current_steps": 16475, "total_steps": 78105, "loss": 0.2284, "lr": 4.8149547078548195e-06, "epoch": 1.0546699955188528, "percentage": 21.09, "elapsed_time": "0:43:45", "remaining_time": "2:43:42", "throughput": 19750.13, "total_tokens": 51857024}
|
|
{"current_steps": 16480, "total_steps": 78105, "loss": 0.2576, "lr": 4.814743720839815e-06, "epoch": 1.0549900774598298, "percentage": 21.1, "elapsed_time": "0:43:46", "remaining_time": "2:43:40", "throughput": 19751.02, "total_tokens": 51873088}
|
|
{"current_steps": 16485, "total_steps": 78105, "loss": 0.3573, "lr": 4.814532618238566e-06, "epoch": 1.0553101594008065, "percentage": 21.11, "elapsed_time": "0:43:47", "remaining_time": "2:43:39", "throughput": 19751.92, "total_tokens": 51888832}
|
|
{"current_steps": 16490, "total_steps": 78105, "loss": 0.2721, "lr": 4.814321400061614e-06, "epoch": 1.0556302413417835, "percentage": 21.11, "elapsed_time": "0:43:47", "remaining_time": "2:43:38", "throughput": 19752.9, "total_tokens": 51905088}
|
|
{"current_steps": 16495, "total_steps": 78105, "loss": 0.3373, "lr": 4.814110066319506e-06, "epoch": 1.0559503232827603, "percentage": 21.12, "elapsed_time": "0:43:48", "remaining_time": "2:43:37", "throughput": 19753.79, "total_tokens": 51920448}
|
|
{"current_steps": 16500, "total_steps": 78105, "loss": 0.2952, "lr": 4.8138986170227955e-06, "epoch": 1.0562704052237373, "percentage": 21.13, "elapsed_time": "0:43:49", "remaining_time": "2:43:36", "throughput": 19754.85, "total_tokens": 51936960}
|
|
{"current_steps": 16505, "total_steps": 78105, "loss": 0.3214, "lr": 4.81368705218204e-06, "epoch": 1.0565904871647143, "percentage": 21.13, "elapsed_time": "0:43:49", "remaining_time": "2:43:34", "throughput": 19755.87, "total_tokens": 51953536}
|
|
{"current_steps": 16510, "total_steps": 78105, "loss": 0.3257, "lr": 4.8134753718078054e-06, "epoch": 1.056910569105691, "percentage": 21.14, "elapsed_time": "0:43:50", "remaining_time": "2:43:33", "throughput": 19756.71, "total_tokens": 51968704}
|
|
{"current_steps": 16515, "total_steps": 78105, "loss": 0.314, "lr": 4.813263575910661e-06, "epoch": 1.057230651046668, "percentage": 21.14, "elapsed_time": "0:43:51", "remaining_time": "2:43:32", "throughput": 19757.52, "total_tokens": 51983808}
|
|
{"current_steps": 16520, "total_steps": 78105, "loss": 0.2979, "lr": 4.813051664501182e-06, "epoch": 1.0575507329876448, "percentage": 21.15, "elapsed_time": "0:43:51", "remaining_time": "2:43:30", "throughput": 19758.34, "total_tokens": 51999424}
|
|
{"current_steps": 16525, "total_steps": 78105, "loss": 0.3579, "lr": 4.812839637589953e-06, "epoch": 1.0578708149286218, "percentage": 21.16, "elapsed_time": "0:43:52", "remaining_time": "2:43:29", "throughput": 19759.13, "total_tokens": 52014784}
|
|
{"current_steps": 16530, "total_steps": 78105, "loss": 0.3112, "lr": 4.812627495187558e-06, "epoch": 1.0581908968695986, "percentage": 21.16, "elapsed_time": "0:43:53", "remaining_time": "2:43:28", "throughput": 19760.05, "total_tokens": 52030656}
|
|
{"current_steps": 16535, "total_steps": 78105, "loss": 0.3321, "lr": 4.812415237304593e-06, "epoch": 1.0585109788105755, "percentage": 21.17, "elapsed_time": "0:43:53", "remaining_time": "2:43:27", "throughput": 19760.87, "total_tokens": 52045888}
|
|
{"current_steps": 16540, "total_steps": 78105, "loss": 0.2757, "lr": 4.812202863951655e-06, "epoch": 1.0588310607515523, "percentage": 21.18, "elapsed_time": "0:43:54", "remaining_time": "2:43:25", "throughput": 19761.66, "total_tokens": 52060672}
|
|
{"current_steps": 16545, "total_steps": 78105, "loss": 0.2529, "lr": 4.811990375139351e-06, "epoch": 1.0591511426925293, "percentage": 21.18, "elapsed_time": "0:43:55", "remaining_time": "2:43:24", "throughput": 19762.39, "total_tokens": 52075456}
|
|
{"current_steps": 16550, "total_steps": 78105, "loss": 0.3074, "lr": 4.81177777087829e-06, "epoch": 1.059471224633506, "percentage": 21.19, "elapsed_time": "0:43:55", "remaining_time": "2:43:23", "throughput": 19763.04, "total_tokens": 52089600}
|
|
{"current_steps": 16555, "total_steps": 78105, "loss": 0.3159, "lr": 4.8115650511790875e-06, "epoch": 1.059791306574483, "percentage": 21.2, "elapsed_time": "0:43:56", "remaining_time": "2:43:21", "throughput": 19763.85, "total_tokens": 52104768}
|
|
{"current_steps": 16560, "total_steps": 78105, "loss": 0.2456, "lr": 4.811352216052368e-06, "epoch": 1.06011138851546, "percentage": 21.2, "elapsed_time": "0:43:57", "remaining_time": "2:43:20", "throughput": 19764.68, "total_tokens": 52120576}
|
|
{"current_steps": 16565, "total_steps": 78105, "loss": 0.2515, "lr": 4.8111392655087585e-06, "epoch": 1.0604314704564368, "percentage": 21.21, "elapsed_time": "0:43:57", "remaining_time": "2:43:19", "throughput": 19765.57, "total_tokens": 52136256}
|
|
{"current_steps": 16570, "total_steps": 78105, "loss": 0.3077, "lr": 4.810926199558892e-06, "epoch": 1.0607515523974138, "percentage": 21.22, "elapsed_time": "0:43:58", "remaining_time": "2:43:18", "throughput": 19766.47, "total_tokens": 52152256}
|
|
{"current_steps": 16575, "total_steps": 78105, "loss": 0.364, "lr": 4.810713018213407e-06, "epoch": 1.0610716343383906, "percentage": 21.22, "elapsed_time": "0:43:59", "remaining_time": "2:43:16", "throughput": 19767.27, "total_tokens": 52167488}
|
|
{"current_steps": 16580, "total_steps": 78105, "loss": 0.4094, "lr": 4.810499721482952e-06, "epoch": 1.0613917162793676, "percentage": 21.23, "elapsed_time": "0:43:59", "remaining_time": "2:43:15", "throughput": 19768.04, "total_tokens": 52182208}
|
|
{"current_steps": 16585, "total_steps": 78105, "loss": 0.2917, "lr": 4.8102863093781745e-06, "epoch": 1.0617117982203443, "percentage": 21.23, "elapsed_time": "0:44:00", "remaining_time": "2:43:14", "throughput": 19769.06, "total_tokens": 52198720}
|
|
{"current_steps": 16590, "total_steps": 78105, "loss": 0.5487, "lr": 4.810072781909734e-06, "epoch": 1.0620318801613213, "percentage": 21.24, "elapsed_time": "0:44:01", "remaining_time": "2:43:13", "throughput": 19770.15, "total_tokens": 52215552}
|
|
{"current_steps": 16595, "total_steps": 78105, "loss": 0.2942, "lr": 4.80985913908829e-06, "epoch": 1.062351962102298, "percentage": 21.25, "elapsed_time": "0:44:01", "remaining_time": "2:43:12", "throughput": 19771.2, "total_tokens": 52232064}
|
|
{"current_steps": 16600, "total_steps": 78105, "loss": 0.4301, "lr": 4.809645380924512e-06, "epoch": 1.062672044043275, "percentage": 21.25, "elapsed_time": "0:44:02", "remaining_time": "2:43:10", "throughput": 19772.15, "total_tokens": 52248064}
|
|
{"current_steps": 16605, "total_steps": 78105, "loss": 0.438, "lr": 4.809431507429076e-06, "epoch": 1.0629921259842519, "percentage": 21.26, "elapsed_time": "0:44:03", "remaining_time": "2:43:09", "throughput": 19773.23, "total_tokens": 52265024}
|
|
{"current_steps": 16610, "total_steps": 78105, "loss": 0.1769, "lr": 4.809217518612659e-06, "epoch": 1.0633122079252288, "percentage": 21.27, "elapsed_time": "0:44:03", "remaining_time": "2:43:08", "throughput": 19774.09, "total_tokens": 52280320}
|
|
{"current_steps": 16615, "total_steps": 78105, "loss": 0.2284, "lr": 4.809003414485947e-06, "epoch": 1.0636322898662058, "percentage": 21.27, "elapsed_time": "0:44:04", "remaining_time": "2:43:07", "throughput": 19774.89, "total_tokens": 52295616}
|
|
{"current_steps": 16620, "total_steps": 78105, "loss": 0.3144, "lr": 4.8087891950596314e-06, "epoch": 1.0639523718071826, "percentage": 21.28, "elapsed_time": "0:44:05", "remaining_time": "2:43:05", "throughput": 19775.85, "total_tokens": 52311680}
|
|
{"current_steps": 16625, "total_steps": 78105, "loss": 0.3209, "lr": 4.80857486034441e-06, "epoch": 1.0642724537481596, "percentage": 21.29, "elapsed_time": "0:44:05", "remaining_time": "2:43:04", "throughput": 19776.59, "total_tokens": 52326464}
|
|
{"current_steps": 16630, "total_steps": 78105, "loss": 0.3622, "lr": 4.808360410350985e-06, "epoch": 1.0645925356891364, "percentage": 21.29, "elapsed_time": "0:44:06", "remaining_time": "2:43:03", "throughput": 19777.44, "total_tokens": 52341696}
|
|
{"current_steps": 16635, "total_steps": 78105, "loss": 0.3042, "lr": 4.808145845090065e-06, "epoch": 1.0649126176301134, "percentage": 21.3, "elapsed_time": "0:44:07", "remaining_time": "2:43:02", "throughput": 19778.31, "total_tokens": 52357504}
|
|
{"current_steps": 16640, "total_steps": 78105, "loss": 0.3643, "lr": 4.807931164572364e-06, "epoch": 1.0652326995710901, "percentage": 21.3, "elapsed_time": "0:44:07", "remaining_time": "2:43:00", "throughput": 19779.32, "total_tokens": 52373888}
|
|
{"current_steps": 16645, "total_steps": 78105, "loss": 0.2879, "lr": 4.807716368808602e-06, "epoch": 1.0655527815120671, "percentage": 21.31, "elapsed_time": "0:44:08", "remaining_time": "2:42:59", "throughput": 19780.18, "total_tokens": 52389440}
|
|
{"current_steps": 16650, "total_steps": 78105, "loss": 0.2488, "lr": 4.807501457809505e-06, "epoch": 1.0658728634530439, "percentage": 21.32, "elapsed_time": "0:44:09", "remaining_time": "2:42:58", "throughput": 19781.0, "total_tokens": 52404608}
|
|
{"current_steps": 16655, "total_steps": 78105, "loss": 0.3124, "lr": 4.807286431585805e-06, "epoch": 1.0661929453940209, "percentage": 21.32, "elapsed_time": "0:44:09", "remaining_time": "2:42:56", "throughput": 19781.76, "total_tokens": 52419520}
|
|
{"current_steps": 16660, "total_steps": 78105, "loss": 0.2219, "lr": 4.807071290148238e-06, "epoch": 1.0665130273349979, "percentage": 21.33, "elapsed_time": "0:44:10", "remaining_time": "2:42:55", "throughput": 19782.57, "total_tokens": 52434944}
|
|
{"current_steps": 16665, "total_steps": 78105, "loss": 0.2712, "lr": 4.806856033507549e-06, "epoch": 1.0668331092759746, "percentage": 21.34, "elapsed_time": "0:44:11", "remaining_time": "2:42:54", "throughput": 19783.48, "total_tokens": 52451008}
|
|
{"current_steps": 16670, "total_steps": 78105, "loss": 0.3997, "lr": 4.806640661674485e-06, "epoch": 1.0671531912169516, "percentage": 21.34, "elapsed_time": "0:44:11", "remaining_time": "2:42:53", "throughput": 19784.5, "total_tokens": 52467392}
|
|
{"current_steps": 16675, "total_steps": 78105, "loss": 0.2614, "lr": 4.806425174659801e-06, "epoch": 1.0674732731579284, "percentage": 21.35, "elapsed_time": "0:44:12", "remaining_time": "2:42:52", "throughput": 19785.3, "total_tokens": 52482432}
|
|
{"current_steps": 16680, "total_steps": 78105, "loss": 0.3247, "lr": 4.806209572474258e-06, "epoch": 1.0677933550989054, "percentage": 21.36, "elapsed_time": "0:44:13", "remaining_time": "2:42:50", "throughput": 19786.03, "total_tokens": 52497472}
|
|
{"current_steps": 16685, "total_steps": 78105, "loss": 0.1651, "lr": 4.805993855128622e-06, "epoch": 1.0681134370398822, "percentage": 21.36, "elapsed_time": "0:44:13", "remaining_time": "2:42:49", "throughput": 19786.85, "total_tokens": 52512960}
|
|
{"current_steps": 16690, "total_steps": 78105, "loss": 0.3146, "lr": 4.8057780226336636e-06, "epoch": 1.0684335189808591, "percentage": 21.37, "elapsed_time": "0:44:14", "remaining_time": "2:42:48", "throughput": 19787.69, "total_tokens": 52528000}
|
|
{"current_steps": 16695, "total_steps": 78105, "loss": 0.4474, "lr": 4.805562075000161e-06, "epoch": 1.068753600921836, "percentage": 21.38, "elapsed_time": "0:44:15", "remaining_time": "2:42:46", "throughput": 19788.46, "total_tokens": 52542976}
|
|
{"current_steps": 16700, "total_steps": 78105, "loss": 0.3965, "lr": 4.805346012238898e-06, "epoch": 1.069073682862813, "percentage": 21.38, "elapsed_time": "0:44:15", "remaining_time": "2:42:45", "throughput": 19789.3, "total_tokens": 52558208}
|
|
{"current_steps": 16705, "total_steps": 78105, "loss": 0.3391, "lr": 4.805129834360664e-06, "epoch": 1.0693937648037897, "percentage": 21.39, "elapsed_time": "0:44:16", "remaining_time": "2:42:44", "throughput": 19790.38, "total_tokens": 52574976}
|
|
{"current_steps": 16710, "total_steps": 78105, "loss": 0.2333, "lr": 4.8049135413762514e-06, "epoch": 1.0697138467447667, "percentage": 21.39, "elapsed_time": "0:44:17", "remaining_time": "2:42:43", "throughput": 19791.17, "total_tokens": 52590208}
|
|
{"current_steps": 16715, "total_steps": 78105, "loss": 0.3121, "lr": 4.804697133296463e-06, "epoch": 1.0700339286857437, "percentage": 21.4, "elapsed_time": "0:44:17", "remaining_time": "2:42:42", "throughput": 19792.16, "total_tokens": 52606720}
|
|
{"current_steps": 16720, "total_steps": 78105, "loss": 0.3195, "lr": 4.804480610132104e-06, "epoch": 1.0703540106267204, "percentage": 21.41, "elapsed_time": "0:44:18", "remaining_time": "2:42:40", "throughput": 19793.29, "total_tokens": 52623872}
|
|
{"current_steps": 16725, "total_steps": 78105, "loss": 0.223, "lr": 4.804263971893988e-06, "epoch": 1.0706740925676974, "percentage": 21.41, "elapsed_time": "0:44:19", "remaining_time": "2:42:39", "throughput": 19794.11, "total_tokens": 52639040}
|
|
{"current_steps": 16730, "total_steps": 78105, "loss": 0.301, "lr": 4.804047218592932e-06, "epoch": 1.0709941745086742, "percentage": 21.42, "elapsed_time": "0:44:20", "remaining_time": "2:42:38", "throughput": 19795.1, "total_tokens": 52655680}
|
|
{"current_steps": 16735, "total_steps": 78105, "loss": 0.3257, "lr": 4.8038303502397584e-06, "epoch": 1.0713142564496512, "percentage": 21.43, "elapsed_time": "0:44:20", "remaining_time": "2:42:37", "throughput": 19795.93, "total_tokens": 52670912}
|
|
{"current_steps": 16740, "total_steps": 78105, "loss": 0.3093, "lr": 4.803613366845297e-06, "epoch": 1.071634338390628, "percentage": 21.43, "elapsed_time": "0:44:21", "remaining_time": "2:42:35", "throughput": 19796.8, "total_tokens": 52686656}
|
|
{"current_steps": 16745, "total_steps": 78105, "loss": 0.259, "lr": 4.803396268420384e-06, "epoch": 1.071954420331605, "percentage": 21.44, "elapsed_time": "0:44:21", "remaining_time": "2:42:34", "throughput": 19797.43, "total_tokens": 52700736}
|
|
{"current_steps": 16750, "total_steps": 78105, "loss": 0.3664, "lr": 4.803179054975859e-06, "epoch": 1.0722745022725817, "percentage": 21.45, "elapsed_time": "0:44:22", "remaining_time": "2:42:33", "throughput": 19798.26, "total_tokens": 52716352}
|
|
{"current_steps": 16755, "total_steps": 78105, "loss": 0.3951, "lr": 4.802961726522568e-06, "epoch": 1.0725945842135587, "percentage": 21.45, "elapsed_time": "0:44:23", "remaining_time": "2:42:31", "throughput": 19798.99, "total_tokens": 52730944}
|
|
{"current_steps": 16760, "total_steps": 78105, "loss": 0.2282, "lr": 4.8027442830713655e-06, "epoch": 1.0729146661545355, "percentage": 21.46, "elapsed_time": "0:44:23", "remaining_time": "2:42:30", "throughput": 19799.82, "total_tokens": 52746368}
|
|
{"current_steps": 16765, "total_steps": 78105, "loss": 0.3642, "lr": 4.802526724633107e-06, "epoch": 1.0732347480955124, "percentage": 21.46, "elapsed_time": "0:44:24", "remaining_time": "2:42:29", "throughput": 19800.79, "total_tokens": 52762624}
|
|
{"current_steps": 16770, "total_steps": 78105, "loss": 0.3688, "lr": 4.802309051218658e-06, "epoch": 1.0735548300364894, "percentage": 21.47, "elapsed_time": "0:44:25", "remaining_time": "2:42:28", "throughput": 19801.53, "total_tokens": 52777728}
|
|
{"current_steps": 16775, "total_steps": 78105, "loss": 0.3491, "lr": 4.802091262838886e-06, "epoch": 1.0738749119774662, "percentage": 21.48, "elapsed_time": "0:44:26", "remaining_time": "2:42:27", "throughput": 19802.48, "total_tokens": 52793920}
|
|
{"current_steps": 16780, "total_steps": 78105, "loss": 0.3184, "lr": 4.801873359504669e-06, "epoch": 1.0741949939184432, "percentage": 21.48, "elapsed_time": "0:44:26", "remaining_time": "2:42:25", "throughput": 19803.36, "total_tokens": 52809792}
|
|
{"current_steps": 16785, "total_steps": 78105, "loss": 0.4376, "lr": 4.801655341226886e-06, "epoch": 1.07451507585942, "percentage": 21.49, "elapsed_time": "0:44:27", "remaining_time": "2:42:24", "throughput": 19804.17, "total_tokens": 52824960}
|
|
{"current_steps": 16790, "total_steps": 78105, "loss": 0.3442, "lr": 4.801437208016424e-06, "epoch": 1.074835157800397, "percentage": 21.5, "elapsed_time": "0:44:28", "remaining_time": "2:42:23", "throughput": 19804.86, "total_tokens": 52839616}
|
|
{"current_steps": 16795, "total_steps": 78105, "loss": 0.2801, "lr": 4.801218959884176e-06, "epoch": 1.0751552397413737, "percentage": 21.5, "elapsed_time": "0:44:28", "remaining_time": "2:42:22", "throughput": 19805.76, "total_tokens": 52855616}
|
|
{"current_steps": 16800, "total_steps": 78105, "loss": 0.4209, "lr": 4.801000596841039e-06, "epoch": 1.0754753216823507, "percentage": 21.51, "elapsed_time": "0:44:29", "remaining_time": "2:42:21", "throughput": 19806.93, "total_tokens": 52873088}
|
|
{"current_steps": 16805, "total_steps": 78105, "loss": 0.2287, "lr": 4.800782118897917e-06, "epoch": 1.0757954036233275, "percentage": 21.52, "elapsed_time": "0:44:30", "remaining_time": "2:42:19", "throughput": 19807.83, "total_tokens": 52889280}
|
|
{"current_steps": 16810, "total_steps": 78105, "loss": 0.3968, "lr": 4.8005635260657224e-06, "epoch": 1.0761154855643045, "percentage": 21.52, "elapsed_time": "0:44:30", "remaining_time": "2:42:18", "throughput": 19808.68, "total_tokens": 52904896}
|
|
{"current_steps": 16815, "total_steps": 78105, "loss": 0.2281, "lr": 4.800344818355368e-06, "epoch": 1.0764355675052815, "percentage": 21.53, "elapsed_time": "0:44:31", "remaining_time": "2:42:17", "throughput": 19809.52, "total_tokens": 52920640}
|
|
{"current_steps": 16820, "total_steps": 78105, "loss": 0.2776, "lr": 4.800125995777775e-06, "epoch": 1.0767556494462582, "percentage": 21.54, "elapsed_time": "0:44:32", "remaining_time": "2:42:16", "throughput": 19810.3, "total_tokens": 52935680}
|
|
{"current_steps": 16825, "total_steps": 78105, "loss": 0.3489, "lr": 4.79990705834387e-06, "epoch": 1.0770757313872352, "percentage": 21.54, "elapsed_time": "0:44:32", "remaining_time": "2:42:14", "throughput": 19811.06, "total_tokens": 52950784}
|
|
{"current_steps": 16830, "total_steps": 78105, "loss": 0.2919, "lr": 4.799688006064587e-06, "epoch": 1.077395813328212, "percentage": 21.55, "elapsed_time": "0:44:33", "remaining_time": "2:42:13", "throughput": 19811.81, "total_tokens": 52965824}
|
|
{"current_steps": 16835, "total_steps": 78105, "loss": 0.2484, "lr": 4.799468838950863e-06, "epoch": 1.077715895269189, "percentage": 21.55, "elapsed_time": "0:44:34", "remaining_time": "2:42:12", "throughput": 19812.68, "total_tokens": 52981568}
|
|
{"current_steps": 16840, "total_steps": 78105, "loss": 0.3151, "lr": 4.799249557013643e-06, "epoch": 1.0780359772101658, "percentage": 21.56, "elapsed_time": "0:44:34", "remaining_time": "2:42:11", "throughput": 19813.58, "total_tokens": 52997632}
|
|
{"current_steps": 16845, "total_steps": 78105, "loss": 0.2957, "lr": 4.7990301602638764e-06, "epoch": 1.0783560591511427, "percentage": 21.57, "elapsed_time": "0:44:35", "remaining_time": "2:42:09", "throughput": 19814.41, "total_tokens": 53013056}
|
|
{"current_steps": 16850, "total_steps": 78105, "loss": 0.3658, "lr": 4.79881064871252e-06, "epoch": 1.0786761410921195, "percentage": 21.57, "elapsed_time": "0:44:36", "remaining_time": "2:42:08", "throughput": 19815.18, "total_tokens": 53028096}
|
|
{"current_steps": 16855, "total_steps": 78105, "loss": 0.2365, "lr": 4.798591022370532e-06, "epoch": 1.0789962230330965, "percentage": 21.58, "elapsed_time": "0:44:36", "remaining_time": "2:42:07", "throughput": 19816.06, "total_tokens": 53043968}
|
|
{"current_steps": 16860, "total_steps": 78105, "loss": 0.3099, "lr": 4.798371281248883e-06, "epoch": 1.0793163049740733, "percentage": 21.59, "elapsed_time": "0:44:37", "remaining_time": "2:42:06", "throughput": 19816.98, "total_tokens": 53060224}
|
|
{"current_steps": 16865, "total_steps": 78105, "loss": 0.3508, "lr": 4.798151425358543e-06, "epoch": 1.0796363869150503, "percentage": 21.59, "elapsed_time": "0:44:38", "remaining_time": "2:42:05", "throughput": 19817.98, "total_tokens": 53076544}
|
|
{"current_steps": 16870, "total_steps": 78105, "loss": 0.2995, "lr": 4.7979314547104915e-06, "epoch": 1.079956468856027, "percentage": 21.6, "elapsed_time": "0:44:38", "remaining_time": "2:42:03", "throughput": 19818.86, "total_tokens": 53092224}
|
|
{"current_steps": 16875, "total_steps": 78105, "loss": 0.3381, "lr": 4.797711369315713e-06, "epoch": 1.080276550797004, "percentage": 21.61, "elapsed_time": "0:44:39", "remaining_time": "2:42:02", "throughput": 19819.57, "total_tokens": 53106752}
|
|
{"current_steps": 16880, "total_steps": 78105, "loss": 0.3309, "lr": 4.797491169185197e-06, "epoch": 1.080596632737981, "percentage": 21.61, "elapsed_time": "0:44:40", "remaining_time": "2:42:01", "throughput": 19820.3, "total_tokens": 53121600}
|
|
{"current_steps": 16885, "total_steps": 78105, "loss": 0.2545, "lr": 4.797270854329938e-06, "epoch": 1.0809167146789578, "percentage": 21.62, "elapsed_time": "0:44:40", "remaining_time": "2:41:59", "throughput": 19820.91, "total_tokens": 53135936}
|
|
{"current_steps": 16890, "total_steps": 78105, "loss": 0.3368, "lr": 4.7970504247609405e-06, "epoch": 1.0812367966199348, "percentage": 21.62, "elapsed_time": "0:44:41", "remaining_time": "2:41:58", "throughput": 19821.79, "total_tokens": 53152000}
|
|
{"current_steps": 16895, "total_steps": 78105, "loss": 0.4705, "lr": 4.7968298804892085e-06, "epoch": 1.0815568785609115, "percentage": 21.63, "elapsed_time": "0:44:42", "remaining_time": "2:41:57", "throughput": 19822.63, "total_tokens": 53167424}
|
|
{"current_steps": 16900, "total_steps": 78105, "loss": 0.1764, "lr": 4.796609221525756e-06, "epoch": 1.0818769605018885, "percentage": 21.64, "elapsed_time": "0:44:42", "remaining_time": "2:41:56", "throughput": 19823.32, "total_tokens": 53182272}
|
|
{"current_steps": 16905, "total_steps": 78105, "loss": 0.3175, "lr": 4.7963884478816025e-06, "epoch": 1.0821970424428653, "percentage": 21.64, "elapsed_time": "0:44:43", "remaining_time": "2:41:55", "throughput": 19824.37, "total_tokens": 53199360}
|
|
{"current_steps": 16910, "total_steps": 78105, "loss": 0.3394, "lr": 4.79616755956777e-06, "epoch": 1.0825171243838423, "percentage": 21.65, "elapsed_time": "0:44:44", "remaining_time": "2:41:53", "throughput": 19825.16, "total_tokens": 53214912}
|
|
{"current_steps": 16915, "total_steps": 78105, "loss": 0.312, "lr": 4.795946556595291e-06, "epoch": 1.082837206324819, "percentage": 21.66, "elapsed_time": "0:44:44", "remaining_time": "2:41:52", "throughput": 19825.93, "total_tokens": 53230208}
|
|
{"current_steps": 16920, "total_steps": 78105, "loss": 0.298, "lr": 4.7957254389752e-06, "epoch": 1.083157288265796, "percentage": 21.66, "elapsed_time": "0:44:45", "remaining_time": "2:41:51", "throughput": 19826.72, "total_tokens": 53245312}
|
|
{"current_steps": 16925, "total_steps": 78105, "loss": 0.4834, "lr": 4.795504206718538e-06, "epoch": 1.083477370206773, "percentage": 21.67, "elapsed_time": "0:44:46", "remaining_time": "2:41:49", "throughput": 19827.49, "total_tokens": 53260352}
|
|
{"current_steps": 16930, "total_steps": 78105, "loss": 0.3298, "lr": 4.795282859836353e-06, "epoch": 1.0837974521477498, "percentage": 21.68, "elapsed_time": "0:44:46", "remaining_time": "2:41:48", "throughput": 19828.48, "total_tokens": 53276608}
|
|
{"current_steps": 16935, "total_steps": 78105, "loss": 0.2241, "lr": 4.7950613983396975e-06, "epoch": 1.0841175340887268, "percentage": 21.68, "elapsed_time": "0:44:47", "remaining_time": "2:41:47", "throughput": 19829.45, "total_tokens": 53292864}
|
|
{"current_steps": 16940, "total_steps": 78105, "loss": 0.3922, "lr": 4.794839822239631e-06, "epoch": 1.0844376160297036, "percentage": 21.69, "elapsed_time": "0:44:48", "remaining_time": "2:41:46", "throughput": 19830.26, "total_tokens": 53308288}
|
|
{"current_steps": 16945, "total_steps": 78105, "loss": 0.3077, "lr": 4.794618131547217e-06, "epoch": 1.0847576979706806, "percentage": 21.7, "elapsed_time": "0:44:48", "remaining_time": "2:41:45", "throughput": 19831.11, "total_tokens": 53324096}
|
|
{"current_steps": 16950, "total_steps": 78105, "loss": 0.3316, "lr": 4.7943963262735245e-06, "epoch": 1.0850777799116573, "percentage": 21.7, "elapsed_time": "0:44:49", "remaining_time": "2:41:44", "throughput": 19832.17, "total_tokens": 53341248}
|
|
{"current_steps": 16955, "total_steps": 78105, "loss": 0.3751, "lr": 4.794174406429632e-06, "epoch": 1.0853978618526343, "percentage": 21.71, "elapsed_time": "0:44:50", "remaining_time": "2:41:42", "throughput": 19833.08, "total_tokens": 53357184}
|
|
{"current_steps": 16960, "total_steps": 78105, "loss": 0.3232, "lr": 4.793952372026619e-06, "epoch": 1.085717943793611, "percentage": 21.71, "elapsed_time": "0:44:50", "remaining_time": "2:41:41", "throughput": 19833.7, "total_tokens": 53371392}
|
|
{"current_steps": 16965, "total_steps": 78105, "loss": 0.2409, "lr": 4.7937302230755735e-06, "epoch": 1.086038025734588, "percentage": 21.72, "elapsed_time": "0:44:51", "remaining_time": "2:41:40", "throughput": 19834.7, "total_tokens": 53387904}
|
|
{"current_steps": 16970, "total_steps": 78105, "loss": 0.3087, "lr": 4.793507959587588e-06, "epoch": 1.0863581076755648, "percentage": 21.73, "elapsed_time": "0:44:52", "remaining_time": "2:41:39", "throughput": 19835.6, "total_tokens": 53403968}
|
|
{"current_steps": 16975, "total_steps": 78105, "loss": 0.2574, "lr": 4.793285581573762e-06, "epoch": 1.0866781896165418, "percentage": 21.73, "elapsed_time": "0:44:53", "remaining_time": "2:41:38", "throughput": 19836.44, "total_tokens": 53419776}
|
|
{"current_steps": 16980, "total_steps": 78105, "loss": 0.3332, "lr": 4.793063089045199e-06, "epoch": 1.0869982715575188, "percentage": 21.74, "elapsed_time": "0:44:53", "remaining_time": "2:41:36", "throughput": 19837.19, "total_tokens": 53434816}
|
|
{"current_steps": 16985, "total_steps": 78105, "loss": 0.3752, "lr": 4.792840482013009e-06, "epoch": 1.0873183534984956, "percentage": 21.75, "elapsed_time": "0:44:54", "remaining_time": "2:41:35", "throughput": 19838.05, "total_tokens": 53450688}
|
|
{"current_steps": 16990, "total_steps": 78105, "loss": 0.5535, "lr": 4.792617760488308e-06, "epoch": 1.0876384354394726, "percentage": 21.75, "elapsed_time": "0:44:55", "remaining_time": "2:41:34", "throughput": 19838.92, "total_tokens": 53466240}
|
|
{"current_steps": 16995, "total_steps": 78105, "loss": 0.3152, "lr": 4.792394924482219e-06, "epoch": 1.0879585173804494, "percentage": 21.76, "elapsed_time": "0:44:55", "remaining_time": "2:41:33", "throughput": 19839.74, "total_tokens": 53481728}
|
|
{"current_steps": 17000, "total_steps": 78105, "loss": 0.4126, "lr": 4.792171974005868e-06, "epoch": 1.0882785993214263, "percentage": 21.77, "elapsed_time": "0:44:56", "remaining_time": "2:41:31", "throughput": 19840.48, "total_tokens": 53496896}
|
|
{"current_steps": 17005, "total_steps": 78105, "loss": 0.3726, "lr": 4.791948909070388e-06, "epoch": 1.0885986812624031, "percentage": 21.77, "elapsed_time": "0:44:57", "remaining_time": "2:41:30", "throughput": 19841.23, "total_tokens": 53512192}
|
|
{"current_steps": 17010, "total_steps": 78105, "loss": 0.3589, "lr": 4.7917257296869165e-06, "epoch": 1.08891876320338, "percentage": 21.78, "elapsed_time": "0:44:57", "remaining_time": "2:41:29", "throughput": 19841.99, "total_tokens": 53527424}
|
|
{"current_steps": 17015, "total_steps": 78105, "loss": 0.2884, "lr": 4.7915024358666005e-06, "epoch": 1.0892388451443569, "percentage": 21.78, "elapsed_time": "0:44:58", "remaining_time": "2:41:27", "throughput": 19842.68, "total_tokens": 53542016}
|
|
{"current_steps": 17020, "total_steps": 78105, "loss": 0.2984, "lr": 4.791279027620589e-06, "epoch": 1.0895589270853339, "percentage": 21.79, "elapsed_time": "0:44:59", "remaining_time": "2:41:26", "throughput": 19843.55, "total_tokens": 53557824}
|
|
{"current_steps": 17025, "total_steps": 78105, "loss": 0.2232, "lr": 4.7910555049600374e-06, "epoch": 1.0898790090263106, "percentage": 21.8, "elapsed_time": "0:44:59", "remaining_time": "2:41:25", "throughput": 19844.14, "total_tokens": 53571904}
|
|
{"current_steps": 17030, "total_steps": 78105, "loss": 0.3296, "lr": 4.790831867896107e-06, "epoch": 1.0901990909672876, "percentage": 21.8, "elapsed_time": "0:45:00", "remaining_time": "2:41:24", "throughput": 19845.14, "total_tokens": 53588800}
|
|
{"current_steps": 17035, "total_steps": 78105, "loss": 0.2592, "lr": 4.790608116439966e-06, "epoch": 1.0905191729082646, "percentage": 21.81, "elapsed_time": "0:45:01", "remaining_time": "2:41:23", "throughput": 19846.21, "total_tokens": 53605824}
|
|
{"current_steps": 17040, "total_steps": 78105, "loss": 0.3961, "lr": 4.790384250602788e-06, "epoch": 1.0908392548492414, "percentage": 21.82, "elapsed_time": "0:45:01", "remaining_time": "2:41:22", "throughput": 19847.09, "total_tokens": 53622016}
|
|
{"current_steps": 17045, "total_steps": 78105, "loss": 0.2924, "lr": 4.7901602703957494e-06, "epoch": 1.0911593367902184, "percentage": 21.82, "elapsed_time": "0:45:02", "remaining_time": "2:41:20", "throughput": 19847.92, "total_tokens": 53637888}
|
|
{"current_steps": 17050, "total_steps": 78105, "loss": 0.3298, "lr": 4.7899361758300365e-06, "epoch": 1.0914794187311951, "percentage": 21.83, "elapsed_time": "0:45:03", "remaining_time": "2:41:19", "throughput": 19848.74, "total_tokens": 53653760}
|
|
{"current_steps": 17055, "total_steps": 78105, "loss": 0.2407, "lr": 4.789711966916839e-06, "epoch": 1.0917995006721721, "percentage": 21.84, "elapsed_time": "0:45:03", "remaining_time": "2:41:18", "throughput": 19849.4, "total_tokens": 53668160}
|
|
{"current_steps": 17060, "total_steps": 78105, "loss": 0.3244, "lr": 4.7894876436673525e-06, "epoch": 1.092119582613149, "percentage": 21.84, "elapsed_time": "0:45:04", "remaining_time": "2:41:17", "throughput": 19850.14, "total_tokens": 53683136}
|
|
{"current_steps": 17065, "total_steps": 78105, "loss": 0.3165, "lr": 4.789263206092778e-06, "epoch": 1.0924396645541259, "percentage": 21.85, "elapsed_time": "0:45:05", "remaining_time": "2:41:15", "throughput": 19850.97, "total_tokens": 53698752}
|
|
{"current_steps": 17070, "total_steps": 78105, "loss": 0.2705, "lr": 4.789038654204323e-06, "epoch": 1.0927597464951027, "percentage": 21.86, "elapsed_time": "0:45:05", "remaining_time": "2:41:14", "throughput": 19851.79, "total_tokens": 53714496}
|
|
{"current_steps": 17075, "total_steps": 78105, "loss": 0.2991, "lr": 4.788813988013202e-06, "epoch": 1.0930798284360796, "percentage": 21.86, "elapsed_time": "0:45:06", "remaining_time": "2:41:13", "throughput": 19852.57, "total_tokens": 53729984}
|
|
{"current_steps": 17080, "total_steps": 78105, "loss": 0.3435, "lr": 4.788589207530632e-06, "epoch": 1.0933999103770566, "percentage": 21.87, "elapsed_time": "0:45:07", "remaining_time": "2:41:12", "throughput": 19853.43, "total_tokens": 53746048}
|
|
{"current_steps": 17085, "total_steps": 78105, "loss": 0.2946, "lr": 4.788364312767837e-06, "epoch": 1.0937199923180334, "percentage": 21.87, "elapsed_time": "0:45:07", "remaining_time": "2:41:11", "throughput": 19854.23, "total_tokens": 53761728}
|
|
{"current_steps": 17090, "total_steps": 78105, "loss": 0.3548, "lr": 4.788139303736049e-06, "epoch": 1.0940400742590104, "percentage": 21.88, "elapsed_time": "0:45:08", "remaining_time": "2:41:09", "throughput": 19854.98, "total_tokens": 53776704}
|
|
{"current_steps": 17095, "total_steps": 78105, "loss": 0.2661, "lr": 4.787914180446502e-06, "epoch": 1.0943601561999872, "percentage": 21.89, "elapsed_time": "0:45:09", "remaining_time": "2:41:08", "throughput": 19856.07, "total_tokens": 53794048}
|
|
{"current_steps": 17100, "total_steps": 78105, "loss": 0.3857, "lr": 4.7876889429104374e-06, "epoch": 1.0946802381409642, "percentage": 21.89, "elapsed_time": "0:45:09", "remaining_time": "2:41:07", "throughput": 19856.75, "total_tokens": 53808576}
|
|
{"current_steps": 17105, "total_steps": 78105, "loss": 0.3105, "lr": 4.787463591139104e-06, "epoch": 1.095000320081941, "percentage": 21.9, "elapsed_time": "0:45:10", "remaining_time": "2:41:06", "throughput": 19857.45, "total_tokens": 53823360}
|
|
{"current_steps": 17110, "total_steps": 78105, "loss": 0.4111, "lr": 4.787238125143754e-06, "epoch": 1.095320402022918, "percentage": 21.91, "elapsed_time": "0:45:11", "remaining_time": "2:41:04", "throughput": 19858.25, "total_tokens": 53838912}
|
|
{"current_steps": 17115, "total_steps": 78105, "loss": 0.4905, "lr": 4.787012544935646e-06, "epoch": 1.0956404839638947, "percentage": 21.91, "elapsed_time": "0:45:11", "remaining_time": "2:41:03", "throughput": 19859.03, "total_tokens": 53854336}
|
|
{"current_steps": 17120, "total_steps": 78105, "loss": 0.2755, "lr": 4.786786850526044e-06, "epoch": 1.0959605659048717, "percentage": 21.92, "elapsed_time": "0:45:12", "remaining_time": "2:41:02", "throughput": 19860.16, "total_tokens": 53871680}
|
|
{"current_steps": 17125, "total_steps": 78105, "loss": 0.2438, "lr": 4.7865610419262175e-06, "epoch": 1.0962806478458484, "percentage": 21.93, "elapsed_time": "0:45:13", "remaining_time": "2:41:01", "throughput": 19860.99, "total_tokens": 53887360}
|
|
{"current_steps": 17130, "total_steps": 78105, "loss": 0.2652, "lr": 4.786335119147443e-06, "epoch": 1.0966007297868254, "percentage": 21.93, "elapsed_time": "0:45:13", "remaining_time": "2:41:00", "throughput": 19861.67, "total_tokens": 53902144}
|
|
{"current_steps": 17135, "total_steps": 78105, "loss": 0.5099, "lr": 4.786109082201003e-06, "epoch": 1.0969208117278022, "percentage": 21.94, "elapsed_time": "0:45:14", "remaining_time": "2:40:59", "throughput": 19862.75, "total_tokens": 53919488}
|
|
{"current_steps": 17140, "total_steps": 78105, "loss": 0.4236, "lr": 4.7858829310981815e-06, "epoch": 1.0972408936687792, "percentage": 21.94, "elapsed_time": "0:45:15", "remaining_time": "2:40:58", "throughput": 19863.68, "total_tokens": 53936000}
|
|
{"current_steps": 17145, "total_steps": 78105, "loss": 0.3126, "lr": 4.785656665850274e-06, "epoch": 1.0975609756097562, "percentage": 21.95, "elapsed_time": "0:45:16", "remaining_time": "2:40:57", "throughput": 19864.82, "total_tokens": 53953920}
|
|
{"current_steps": 17150, "total_steps": 78105, "loss": 0.401, "lr": 4.7854302864685785e-06, "epoch": 1.097881057550733, "percentage": 21.96, "elapsed_time": "0:45:16", "remaining_time": "2:40:55", "throughput": 19865.73, "total_tokens": 53970240}
|
|
{"current_steps": 17155, "total_steps": 78105, "loss": 0.3472, "lr": 4.7852037929643976e-06, "epoch": 1.09820113949171, "percentage": 21.96, "elapsed_time": "0:45:17", "remaining_time": "2:40:54", "throughput": 19866.52, "total_tokens": 53985984}
|
|
{"current_steps": 17160, "total_steps": 78105, "loss": 0.2382, "lr": 4.784977185349044e-06, "epoch": 1.0985212214326867, "percentage": 21.97, "elapsed_time": "0:45:18", "remaining_time": "2:40:53", "throughput": 19867.5, "total_tokens": 54002432}
|
|
{"current_steps": 17165, "total_steps": 78105, "loss": 0.2789, "lr": 4.784750463633831e-06, "epoch": 1.0988413033736637, "percentage": 21.98, "elapsed_time": "0:45:18", "remaining_time": "2:40:52", "throughput": 19868.35, "total_tokens": 54018304}
|
|
{"current_steps": 17170, "total_steps": 78105, "loss": 0.2712, "lr": 4.784523627830081e-06, "epoch": 1.0991613853146405, "percentage": 21.98, "elapsed_time": "0:45:19", "remaining_time": "2:40:51", "throughput": 19868.93, "total_tokens": 54032384}
|
|
{"current_steps": 17175, "total_steps": 78105, "loss": 0.3503, "lr": 4.7842966779491205e-06, "epoch": 1.0994814672556175, "percentage": 21.99, "elapsed_time": "0:45:20", "remaining_time": "2:40:49", "throughput": 19869.71, "total_tokens": 54048128}
|
|
{"current_steps": 17180, "total_steps": 78105, "loss": 0.3664, "lr": 4.7840696140022825e-06, "epoch": 1.0998015491965942, "percentage": 22.0, "elapsed_time": "0:45:20", "remaining_time": "2:40:48", "throughput": 19870.42, "total_tokens": 54063040}
|
|
{"current_steps": 17185, "total_steps": 78105, "loss": 0.2494, "lr": 4.783842436000905e-06, "epoch": 1.1001216311375712, "percentage": 22.0, "elapsed_time": "0:45:21", "remaining_time": "2:40:47", "throughput": 19871.28, "total_tokens": 54078720}
|
|
{"current_steps": 17190, "total_steps": 78105, "loss": 0.3827, "lr": 4.783615143956332e-06, "epoch": 1.1004417130785482, "percentage": 22.01, "elapsed_time": "0:45:22", "remaining_time": "2:40:46", "throughput": 19871.91, "total_tokens": 54093248}
|
|
{"current_steps": 17195, "total_steps": 78105, "loss": 0.2817, "lr": 4.7833877378799145e-06, "epoch": 1.100761795019525, "percentage": 22.02, "elapsed_time": "0:45:22", "remaining_time": "2:40:44", "throughput": 19872.76, "total_tokens": 54109120}
|
|
{"current_steps": 17200, "total_steps": 78105, "loss": 0.3777, "lr": 4.783160217783007e-06, "epoch": 1.101081876960502, "percentage": 22.02, "elapsed_time": "0:45:23", "remaining_time": "2:40:43", "throughput": 19873.58, "total_tokens": 54124544}
|
|
{"current_steps": 17205, "total_steps": 78105, "loss": 0.2661, "lr": 4.7829325836769705e-06, "epoch": 1.1014019589014787, "percentage": 22.03, "elapsed_time": "0:45:24", "remaining_time": "2:40:42", "throughput": 19874.3, "total_tokens": 54139456}
|
|
{"current_steps": 17210, "total_steps": 78105, "loss": 0.3123, "lr": 4.782704835573172e-06, "epoch": 1.1017220408424557, "percentage": 22.03, "elapsed_time": "0:45:24", "remaining_time": "2:40:41", "throughput": 19874.96, "total_tokens": 54154112}
|
|
{"current_steps": 17215, "total_steps": 78105, "loss": 0.3453, "lr": 4.782476973482984e-06, "epoch": 1.1020421227834325, "percentage": 22.04, "elapsed_time": "0:45:25", "remaining_time": "2:40:39", "throughput": 19875.72, "total_tokens": 54169664}
|
|
{"current_steps": 17220, "total_steps": 78105, "loss": 0.2779, "lr": 4.782248997417785e-06, "epoch": 1.1023622047244095, "percentage": 22.05, "elapsed_time": "0:45:26", "remaining_time": "2:40:38", "throughput": 19876.73, "total_tokens": 54186752}
|
|
{"current_steps": 17225, "total_steps": 78105, "loss": 0.2646, "lr": 4.782020907388959e-06, "epoch": 1.1026822866653863, "percentage": 22.05, "elapsed_time": "0:45:26", "remaining_time": "2:40:37", "throughput": 19877.91, "total_tokens": 54204928}
|
|
{"current_steps": 17230, "total_steps": 78105, "loss": 0.3441, "lr": 4.781792703407896e-06, "epoch": 1.1030023686063632, "percentage": 22.06, "elapsed_time": "0:45:27", "remaining_time": "2:40:36", "throughput": 19878.64, "total_tokens": 54220160}
|
|
{"current_steps": 17235, "total_steps": 78105, "loss": 0.4479, "lr": 4.78156438548599e-06, "epoch": 1.10332245054734, "percentage": 22.07, "elapsed_time": "0:45:28", "remaining_time": "2:40:35", "throughput": 19879.41, "total_tokens": 54235392}
|
|
{"current_steps": 17240, "total_steps": 78105, "loss": 0.3998, "lr": 4.781335953634643e-06, "epoch": 1.103642532488317, "percentage": 22.07, "elapsed_time": "0:45:28", "remaining_time": "2:40:34", "throughput": 19880.2, "total_tokens": 54250560}
|
|
{"current_steps": 17245, "total_steps": 78105, "loss": 0.3057, "lr": 4.781107407865262e-06, "epoch": 1.103962614429294, "percentage": 22.08, "elapsed_time": "0:45:29", "remaining_time": "2:40:33", "throughput": 19881.11, "total_tokens": 54267072}
|
|
{"current_steps": 17250, "total_steps": 78105, "loss": 0.3702, "lr": 4.780878748189259e-06, "epoch": 1.1042826963702708, "percentage": 22.09, "elapsed_time": "0:45:30", "remaining_time": "2:40:31", "throughput": 19882.03, "total_tokens": 54283584}
|
|
{"current_steps": 17255, "total_steps": 78105, "loss": 0.2723, "lr": 4.780649974618051e-06, "epoch": 1.1046027783112478, "percentage": 22.09, "elapsed_time": "0:45:30", "remaining_time": "2:40:30", "throughput": 19882.89, "total_tokens": 54299584}
|
|
{"current_steps": 17260, "total_steps": 78105, "loss": 0.3932, "lr": 4.780421087163064e-06, "epoch": 1.1049228602522245, "percentage": 22.1, "elapsed_time": "0:45:31", "remaining_time": "2:40:29", "throughput": 19883.77, "total_tokens": 54315968}
|
|
{"current_steps": 17265, "total_steps": 78105, "loss": 0.4483, "lr": 4.7801920858357255e-06, "epoch": 1.1052429421932015, "percentage": 22.1, "elapsed_time": "0:45:32", "remaining_time": "2:40:28", "throughput": 19884.52, "total_tokens": 54331392}
|
|
{"current_steps": 17270, "total_steps": 78105, "loss": 0.3162, "lr": 4.779962970647471e-06, "epoch": 1.1055630241341783, "percentage": 22.11, "elapsed_time": "0:45:33", "remaining_time": "2:40:27", "throughput": 19885.27, "total_tokens": 54346624}
|
|
{"current_steps": 17275, "total_steps": 78105, "loss": 0.3045, "lr": 4.779733741609742e-06, "epoch": 1.1058831060751553, "percentage": 22.12, "elapsed_time": "0:45:33", "remaining_time": "2:40:26", "throughput": 19886.28, "total_tokens": 54363904}
|
|
{"current_steps": 17280, "total_steps": 78105, "loss": 0.2935, "lr": 4.779504398733985e-06, "epoch": 1.106203188016132, "percentage": 22.12, "elapsed_time": "0:45:34", "remaining_time": "2:40:24", "throughput": 19886.87, "total_tokens": 54378240}
|
|
{"current_steps": 17285, "total_steps": 78105, "loss": 0.3299, "lr": 4.779274942031651e-06, "epoch": 1.106523269957109, "percentage": 22.13, "elapsed_time": "0:45:35", "remaining_time": "2:40:23", "throughput": 19887.7, "total_tokens": 54393856}
|
|
{"current_steps": 17290, "total_steps": 78105, "loss": 0.3736, "lr": 4.7790453715142e-06, "epoch": 1.1068433518980858, "percentage": 22.14, "elapsed_time": "0:45:35", "remaining_time": "2:40:22", "throughput": 19888.57, "total_tokens": 54409792}
|
|
{"current_steps": 17295, "total_steps": 78105, "loss": 0.3891, "lr": 4.778815687193093e-06, "epoch": 1.1071634338390628, "percentage": 22.14, "elapsed_time": "0:45:36", "remaining_time": "2:40:21", "throughput": 19889.34, "total_tokens": 54425024}
|
|
{"current_steps": 17300, "total_steps": 78105, "loss": 0.3918, "lr": 4.778585889079801e-06, "epoch": 1.1074835157800398, "percentage": 22.15, "elapsed_time": "0:45:37", "remaining_time": "2:40:20", "throughput": 19890.1, "total_tokens": 54440384}
|
|
{"current_steps": 17305, "total_steps": 78105, "loss": 0.4742, "lr": 4.778355977185798e-06, "epoch": 1.1078035977210166, "percentage": 22.16, "elapsed_time": "0:45:37", "remaining_time": "2:40:18", "throughput": 19890.94, "total_tokens": 54456064}
|
|
{"current_steps": 17310, "total_steps": 78105, "loss": 0.4649, "lr": 4.778125951522565e-06, "epoch": 1.1081236796619935, "percentage": 22.16, "elapsed_time": "0:45:38", "remaining_time": "2:40:17", "throughput": 19891.65, "total_tokens": 54470976}
|
|
{"current_steps": 17315, "total_steps": 78105, "loss": 0.4483, "lr": 4.77789581210159e-06, "epoch": 1.1084437616029703, "percentage": 22.17, "elapsed_time": "0:45:39", "remaining_time": "2:40:16", "throughput": 19892.51, "total_tokens": 54486720}
|
|
{"current_steps": 17320, "total_steps": 78105, "loss": 0.2347, "lr": 4.777665558934361e-06, "epoch": 1.1087638435439473, "percentage": 22.18, "elapsed_time": "0:45:39", "remaining_time": "2:40:15", "throughput": 19893.33, "total_tokens": 54502592}
|
|
{"current_steps": 17325, "total_steps": 78105, "loss": 0.3618, "lr": 4.7774351920323795e-06, "epoch": 1.109083925484924, "percentage": 22.18, "elapsed_time": "0:45:40", "remaining_time": "2:40:13", "throughput": 19893.99, "total_tokens": 54517376}
|
|
{"current_steps": 17330, "total_steps": 78105, "loss": 0.3037, "lr": 4.777204711407146e-06, "epoch": 1.109404007425901, "percentage": 22.19, "elapsed_time": "0:45:41", "remaining_time": "2:40:12", "throughput": 19894.83, "total_tokens": 54533248}
|
|
{"current_steps": 17335, "total_steps": 78105, "loss": 0.3779, "lr": 4.776974117070172e-06, "epoch": 1.1097240893668778, "percentage": 22.19, "elapsed_time": "0:45:41", "remaining_time": "2:40:11", "throughput": 19895.62, "total_tokens": 54548800}
|
|
{"current_steps": 17340, "total_steps": 78105, "loss": 0.3112, "lr": 4.77674340903297e-06, "epoch": 1.1100441713078548, "percentage": 22.2, "elapsed_time": "0:45:42", "remaining_time": "2:40:10", "throughput": 19896.4, "total_tokens": 54564352}
|
|
{"current_steps": 17345, "total_steps": 78105, "loss": 0.3762, "lr": 4.776512587307062e-06, "epoch": 1.1103642532488318, "percentage": 22.21, "elapsed_time": "0:45:43", "remaining_time": "2:40:09", "throughput": 19897.29, "total_tokens": 54580480}
|
|
{"current_steps": 17350, "total_steps": 78105, "loss": 0.2193, "lr": 4.776281651903972e-06, "epoch": 1.1106843351898086, "percentage": 22.21, "elapsed_time": "0:45:43", "remaining_time": "2:40:07", "throughput": 19897.8, "total_tokens": 54594368}
|
|
{"current_steps": 17355, "total_steps": 78105, "loss": 0.3551, "lr": 4.776050602835234e-06, "epoch": 1.1110044171307856, "percentage": 22.22, "elapsed_time": "0:45:44", "remaining_time": "2:40:06", "throughput": 19898.46, "total_tokens": 54609216}
|
|
{"current_steps": 17360, "total_steps": 78105, "loss": 0.3503, "lr": 4.775819440112385e-06, "epoch": 1.1113244990717623, "percentage": 22.23, "elapsed_time": "0:45:45", "remaining_time": "2:40:05", "throughput": 19899.23, "total_tokens": 54624640}
|
|
{"current_steps": 17365, "total_steps": 78105, "loss": 0.3011, "lr": 4.775588163746966e-06, "epoch": 1.1116445810127393, "percentage": 22.23, "elapsed_time": "0:45:45", "remaining_time": "2:40:04", "throughput": 19900.08, "total_tokens": 54640384}
|
|
{"current_steps": 17370, "total_steps": 78105, "loss": 0.3279, "lr": 4.775356773750528e-06, "epoch": 1.111964662953716, "percentage": 22.24, "elapsed_time": "0:45:46", "remaining_time": "2:40:02", "throughput": 19900.87, "total_tokens": 54655616}
|
|
{"current_steps": 17375, "total_steps": 78105, "loss": 0.3209, "lr": 4.7751252701346255e-06, "epoch": 1.112284744894693, "percentage": 22.25, "elapsed_time": "0:45:47", "remaining_time": "2:40:01", "throughput": 19901.64, "total_tokens": 54670656}
|
|
{"current_steps": 17380, "total_steps": 78105, "loss": 0.3393, "lr": 4.7748936529108154e-06, "epoch": 1.1126048268356699, "percentage": 22.25, "elapsed_time": "0:45:47", "remaining_time": "2:40:00", "throughput": 19902.46, "total_tokens": 54686464}
|
|
{"current_steps": 17385, "total_steps": 78105, "loss": 0.4474, "lr": 4.774661922090667e-06, "epoch": 1.1129249087766468, "percentage": 22.26, "elapsed_time": "0:45:48", "remaining_time": "2:39:59", "throughput": 19903.4, "total_tokens": 54703232}
|
|
{"current_steps": 17390, "total_steps": 78105, "loss": 0.2873, "lr": 4.774430077685751e-06, "epoch": 1.1132449907176236, "percentage": 22.26, "elapsed_time": "0:45:49", "remaining_time": "2:39:58", "throughput": 19904.49, "total_tokens": 54720512}
|
|
{"current_steps": 17395, "total_steps": 78105, "loss": 0.3057, "lr": 4.774198119707644e-06, "epoch": 1.1135650726586006, "percentage": 22.27, "elapsed_time": "0:45:49", "remaining_time": "2:39:57", "throughput": 19905.51, "total_tokens": 54737216}
|
|
{"current_steps": 17400, "total_steps": 78105, "loss": 0.2909, "lr": 4.7739660481679285e-06, "epoch": 1.1138851545995774, "percentage": 22.28, "elapsed_time": "0:45:50", "remaining_time": "2:39:55", "throughput": 19906.29, "total_tokens": 54752640}
|
|
{"current_steps": 17405, "total_steps": 78105, "loss": 0.3757, "lr": 4.773733863078193e-06, "epoch": 1.1142052365405544, "percentage": 22.28, "elapsed_time": "0:45:51", "remaining_time": "2:39:54", "throughput": 19907.09, "total_tokens": 54767936}
|
|
{"current_steps": 17410, "total_steps": 78105, "loss": 0.2134, "lr": 4.773501564450032e-06, "epoch": 1.1145253184815314, "percentage": 22.29, "elapsed_time": "0:45:51", "remaining_time": "2:39:53", "throughput": 19907.86, "total_tokens": 54783488}
|
|
{"current_steps": 17415, "total_steps": 78105, "loss": 0.2501, "lr": 4.773269152295045e-06, "epoch": 1.1148454004225081, "percentage": 22.3, "elapsed_time": "0:45:52", "remaining_time": "2:39:52", "throughput": 19908.82, "total_tokens": 54800192}
|
|
{"current_steps": 17420, "total_steps": 78105, "loss": 0.3489, "lr": 4.773036626624838e-06, "epoch": 1.1151654823634851, "percentage": 22.3, "elapsed_time": "0:45:53", "remaining_time": "2:39:51", "throughput": 19909.69, "total_tokens": 54815936}
|
|
{"current_steps": 17425, "total_steps": 78105, "loss": 0.277, "lr": 4.772803987451021e-06, "epoch": 1.1154855643044619, "percentage": 22.31, "elapsed_time": "0:45:53", "remaining_time": "2:39:50", "throughput": 19910.51, "total_tokens": 54831488}
|
|
{"current_steps": 17430, "total_steps": 78105, "loss": 0.2835, "lr": 4.772571234785212e-06, "epoch": 1.1158056462454389, "percentage": 22.32, "elapsed_time": "0:45:54", "remaining_time": "2:39:48", "throughput": 19911.3, "total_tokens": 54846784}
|
|
{"current_steps": 17435, "total_steps": 78105, "loss": 0.3408, "lr": 4.7723383686390324e-06, "epoch": 1.1161257281864156, "percentage": 22.32, "elapsed_time": "0:45:55", "remaining_time": "2:39:47", "throughput": 19912.21, "total_tokens": 54863040}
|
|
{"current_steps": 17440, "total_steps": 78105, "loss": 0.3138, "lr": 4.772105389024111e-06, "epoch": 1.1164458101273926, "percentage": 22.33, "elapsed_time": "0:45:55", "remaining_time": "2:39:46", "throughput": 19912.88, "total_tokens": 54877696}
|
|
{"current_steps": 17445, "total_steps": 78105, "loss": 0.3693, "lr": 4.771872295952082e-06, "epoch": 1.1167658920683694, "percentage": 22.34, "elapsed_time": "0:45:56", "remaining_time": "2:39:45", "throughput": 19913.6, "total_tokens": 54892672}
|
|
{"current_steps": 17450, "total_steps": 78105, "loss": 0.3598, "lr": 4.771639089434584e-06, "epoch": 1.1170859740093464, "percentage": 22.34, "elapsed_time": "0:45:57", "remaining_time": "2:39:43", "throughput": 19914.33, "total_tokens": 54907712}
|
|
{"current_steps": 17455, "total_steps": 78105, "loss": 0.2294, "lr": 4.771405769483262e-06, "epoch": 1.1174060559503234, "percentage": 22.35, "elapsed_time": "0:45:57", "remaining_time": "2:39:42", "throughput": 19915.08, "total_tokens": 54923072}
|
|
{"current_steps": 17460, "total_steps": 78105, "loss": 0.2831, "lr": 4.771172336109768e-06, "epoch": 1.1177261378913002, "percentage": 22.35, "elapsed_time": "0:45:58", "remaining_time": "2:39:41", "throughput": 19915.84, "total_tokens": 54938240}
|
|
{"current_steps": 17465, "total_steps": 78105, "loss": 0.4827, "lr": 4.7709387893257565e-06, "epoch": 1.1180462198322771, "percentage": 22.36, "elapsed_time": "0:45:59", "remaining_time": "2:39:40", "throughput": 19916.64, "total_tokens": 54953984}
|
|
{"current_steps": 17470, "total_steps": 78105, "loss": 0.3294, "lr": 4.770705129142892e-06, "epoch": 1.118366301773254, "percentage": 22.37, "elapsed_time": "0:45:59", "remaining_time": "2:39:38", "throughput": 19917.47, "total_tokens": 54969600}
|
|
{"current_steps": 17475, "total_steps": 78105, "loss": 0.4694, "lr": 4.77047135557284e-06, "epoch": 1.118686383714231, "percentage": 22.37, "elapsed_time": "0:46:00", "remaining_time": "2:39:37", "throughput": 19918.34, "total_tokens": 54985216}
|
|
{"current_steps": 17480, "total_steps": 78105, "loss": 0.3939, "lr": 4.770237468627276e-06, "epoch": 1.1190064656552077, "percentage": 22.38, "elapsed_time": "0:46:01", "remaining_time": "2:39:36", "throughput": 19919.06, "total_tokens": 55000064}
|
|
{"current_steps": 17485, "total_steps": 78105, "loss": 0.5221, "lr": 4.770003468317877e-06, "epoch": 1.1193265475961847, "percentage": 22.39, "elapsed_time": "0:46:01", "remaining_time": "2:39:35", "throughput": 19919.89, "total_tokens": 55015808}
|
|
{"current_steps": 17490, "total_steps": 78105, "loss": 0.287, "lr": 4.769769354656329e-06, "epoch": 1.1196466295371614, "percentage": 22.39, "elapsed_time": "0:46:02", "remaining_time": "2:39:34", "throughput": 19920.58, "total_tokens": 55030848}
|
|
{"current_steps": 17495, "total_steps": 78105, "loss": 0.258, "lr": 4.769535127654323e-06, "epoch": 1.1199667114781384, "percentage": 22.4, "elapsed_time": "0:46:03", "remaining_time": "2:39:32", "throughput": 19921.34, "total_tokens": 55046080}
|
|
{"current_steps": 17500, "total_steps": 78105, "loss": 0.2615, "lr": 4.769300787323553e-06, "epoch": 1.1202867934191152, "percentage": 22.41, "elapsed_time": "0:46:03", "remaining_time": "2:39:31", "throughput": 19922.21, "total_tokens": 55062208}
|
|
{"current_steps": 17505, "total_steps": 78105, "loss": 0.3679, "lr": 4.769066333675723e-06, "epoch": 1.1206068753600922, "percentage": 22.41, "elapsed_time": "0:46:04", "remaining_time": "2:39:30", "throughput": 19922.99, "total_tokens": 55077696}
|
|
{"current_steps": 17510, "total_steps": 78105, "loss": 0.2117, "lr": 4.768831766722539e-06, "epoch": 1.1209269573010692, "percentage": 22.42, "elapsed_time": "0:46:05", "remaining_time": "2:39:29", "throughput": 19923.72, "total_tokens": 55093184}
|
|
{"current_steps": 17515, "total_steps": 78105, "loss": 0.2977, "lr": 4.768597086475715e-06, "epoch": 1.121247039242046, "percentage": 22.42, "elapsed_time": "0:46:05", "remaining_time": "2:39:28", "throughput": 19924.63, "total_tokens": 55109440}
|
|
{"current_steps": 17520, "total_steps": 78105, "loss": 0.2157, "lr": 4.768362292946968e-06, "epoch": 1.121567121183023, "percentage": 22.43, "elapsed_time": "0:46:06", "remaining_time": "2:39:26", "throughput": 19925.24, "total_tokens": 55123968}
|
|
{"current_steps": 17525, "total_steps": 78105, "loss": 0.245, "lr": 4.768127386148025e-06, "epoch": 1.1218872031239997, "percentage": 22.44, "elapsed_time": "0:46:07", "remaining_time": "2:39:25", "throughput": 19926.19, "total_tokens": 55140416}
|
|
{"current_steps": 17530, "total_steps": 78105, "loss": 0.3131, "lr": 4.767892366090614e-06, "epoch": 1.1222072850649767, "percentage": 22.44, "elapsed_time": "0:46:07", "remaining_time": "2:39:24", "throughput": 19927.02, "total_tokens": 55156224}
|
|
{"current_steps": 17535, "total_steps": 78105, "loss": 0.4164, "lr": 4.767657232786471e-06, "epoch": 1.1225273670059535, "percentage": 22.45, "elapsed_time": "0:46:08", "remaining_time": "2:39:23", "throughput": 19927.79, "total_tokens": 55171584}
|
|
{"current_steps": 17540, "total_steps": 78105, "loss": 0.4182, "lr": 4.767421986247338e-06, "epoch": 1.1228474489469304, "percentage": 22.46, "elapsed_time": "0:46:09", "remaining_time": "2:39:22", "throughput": 19928.48, "total_tokens": 55186624}
|
|
{"current_steps": 17545, "total_steps": 78105, "loss": 0.5068, "lr": 4.767186626484962e-06, "epoch": 1.1231675308879072, "percentage": 22.46, "elapsed_time": "0:46:09", "remaining_time": "2:39:20", "throughput": 19929.36, "total_tokens": 55202944}
|
|
{"current_steps": 17550, "total_steps": 78105, "loss": 0.3982, "lr": 4.766951153511095e-06, "epoch": 1.1234876128288842, "percentage": 22.47, "elapsed_time": "0:46:10", "remaining_time": "2:39:19", "throughput": 19930.31, "total_tokens": 55219520}
|
|
{"current_steps": 17555, "total_steps": 78105, "loss": 0.3083, "lr": 4.766715567337494e-06, "epoch": 1.123807694769861, "percentage": 22.48, "elapsed_time": "0:46:11", "remaining_time": "2:39:18", "throughput": 19931.08, "total_tokens": 55235008}
|
|
{"current_steps": 17560, "total_steps": 78105, "loss": 0.2455, "lr": 4.766479867975926e-06, "epoch": 1.124127776710838, "percentage": 22.48, "elapsed_time": "0:46:11", "remaining_time": "2:39:17", "throughput": 19931.9, "total_tokens": 55251008}
|
|
{"current_steps": 17565, "total_steps": 78105, "loss": 0.4543, "lr": 4.766244055438159e-06, "epoch": 1.124447858651815, "percentage": 22.49, "elapsed_time": "0:46:12", "remaining_time": "2:39:16", "throughput": 19932.61, "total_tokens": 55265728}
|
|
{"current_steps": 17570, "total_steps": 78105, "loss": 0.3649, "lr": 4.766008129735968e-06, "epoch": 1.1247679405927917, "percentage": 22.5, "elapsed_time": "0:46:13", "remaining_time": "2:39:15", "throughput": 19933.51, "total_tokens": 55281984}
|
|
{"current_steps": 17575, "total_steps": 78105, "loss": 0.3141, "lr": 4.765772090881135e-06, "epoch": 1.1250880225337687, "percentage": 22.5, "elapsed_time": "0:46:13", "remaining_time": "2:39:13", "throughput": 19934.2, "total_tokens": 55297152}
|
|
{"current_steps": 17580, "total_steps": 78105, "loss": 0.2546, "lr": 4.765535938885444e-06, "epoch": 1.1254081044747455, "percentage": 22.51, "elapsed_time": "0:46:14", "remaining_time": "2:39:12", "throughput": 19935.0, "total_tokens": 55312768}
|
|
{"current_steps": 17585, "total_steps": 78105, "loss": 0.3107, "lr": 4.76529967376069e-06, "epoch": 1.1257281864157225, "percentage": 22.51, "elapsed_time": "0:46:15", "remaining_time": "2:39:11", "throughput": 19935.85, "total_tokens": 55328832}
|
|
{"current_steps": 17590, "total_steps": 78105, "loss": 0.3397, "lr": 4.7650632955186695e-06, "epoch": 1.1260482683566992, "percentage": 22.52, "elapsed_time": "0:46:16", "remaining_time": "2:39:10", "throughput": 19936.53, "total_tokens": 55343808}
|
|
{"current_steps": 17595, "total_steps": 78105, "loss": 0.2761, "lr": 4.764826804171186e-06, "epoch": 1.1263683502976762, "percentage": 22.53, "elapsed_time": "0:46:16", "remaining_time": "2:39:09", "throughput": 19937.41, "total_tokens": 55359872}
|
|
{"current_steps": 17600, "total_steps": 78105, "loss": 0.3031, "lr": 4.764590199730051e-06, "epoch": 1.126688432238653, "percentage": 22.53, "elapsed_time": "0:46:17", "remaining_time": "2:39:07", "throughput": 19938.14, "total_tokens": 55374784}
|
|
{"current_steps": 17605, "total_steps": 78105, "loss": 0.3103, "lr": 4.764353482207075e-06, "epoch": 1.12700851417963, "percentage": 22.54, "elapsed_time": "0:46:18", "remaining_time": "2:39:06", "throughput": 19939.17, "total_tokens": 55391872}
|
|
{"current_steps": 17610, "total_steps": 78105, "loss": 0.4341, "lr": 4.764116651614081e-06, "epoch": 1.127328596120607, "percentage": 22.55, "elapsed_time": "0:46:18", "remaining_time": "2:39:05", "throughput": 19939.9, "total_tokens": 55407232}
|
|
{"current_steps": 17615, "total_steps": 78105, "loss": 0.3801, "lr": 4.763879707962895e-06, "epoch": 1.1276486780615838, "percentage": 22.55, "elapsed_time": "0:46:19", "remaining_time": "2:39:04", "throughput": 19940.64, "total_tokens": 55422336}
|
|
{"current_steps": 17620, "total_steps": 78105, "loss": 0.2858, "lr": 4.7636426512653486e-06, "epoch": 1.1279687600025607, "percentage": 22.56, "elapsed_time": "0:46:20", "remaining_time": "2:39:03", "throughput": 19941.46, "total_tokens": 55438528}
|
|
{"current_steps": 17625, "total_steps": 78105, "loss": 0.4079, "lr": 4.76340548153328e-06, "epoch": 1.1282888419435375, "percentage": 22.57, "elapsed_time": "0:46:20", "remaining_time": "2:39:02", "throughput": 19942.72, "total_tokens": 55457152}
|
|
{"current_steps": 17630, "total_steps": 78105, "loss": 0.2035, "lr": 4.7631681987785305e-06, "epoch": 1.1286089238845145, "percentage": 22.57, "elapsed_time": "0:46:21", "remaining_time": "2:39:01", "throughput": 19943.5, "total_tokens": 55473024}
|
|
{"current_steps": 17635, "total_steps": 78105, "loss": 0.4639, "lr": 4.762930803012949e-06, "epoch": 1.1289290058254913, "percentage": 22.58, "elapsed_time": "0:46:22", "remaining_time": "2:39:00", "throughput": 19944.35, "total_tokens": 55489024}
|
|
{"current_steps": 17640, "total_steps": 78105, "loss": 0.3218, "lr": 4.762693294248391e-06, "epoch": 1.1292490877664683, "percentage": 22.58, "elapsed_time": "0:46:22", "remaining_time": "2:38:58", "throughput": 19945.14, "total_tokens": 55504832}
|
|
{"current_steps": 17645, "total_steps": 78105, "loss": 0.2533, "lr": 4.762455672496716e-06, "epoch": 1.129569169707445, "percentage": 22.59, "elapsed_time": "0:46:23", "remaining_time": "2:38:57", "throughput": 19945.97, "total_tokens": 55520320}
|
|
{"current_steps": 17650, "total_steps": 78105, "loss": 0.3186, "lr": 4.7622179377697895e-06, "epoch": 1.129889251648422, "percentage": 22.6, "elapsed_time": "0:46:24", "remaining_time": "2:38:56", "throughput": 19946.68, "total_tokens": 55535360}
|
|
{"current_steps": 17655, "total_steps": 78105, "loss": 0.4017, "lr": 4.761980090079482e-06, "epoch": 1.1302093335893988, "percentage": 22.6, "elapsed_time": "0:46:24", "remaining_time": "2:38:55", "throughput": 19947.38, "total_tokens": 55550464}
|
|
{"current_steps": 17660, "total_steps": 78105, "loss": 0.395, "lr": 4.761742129437671e-06, "epoch": 1.1305294155303758, "percentage": 22.61, "elapsed_time": "0:46:25", "remaining_time": "2:38:53", "throughput": 19948.03, "total_tokens": 55565248}
|
|
{"current_steps": 17665, "total_steps": 78105, "loss": 0.3577, "lr": 4.761504055856239e-06, "epoch": 1.1308494974713525, "percentage": 22.62, "elapsed_time": "0:46:26", "remaining_time": "2:38:52", "throughput": 19948.76, "total_tokens": 55580672}
|
|
{"current_steps": 17670, "total_steps": 78105, "loss": 0.562, "lr": 4.7612658693470745e-06, "epoch": 1.1311695794123295, "percentage": 22.62, "elapsed_time": "0:46:26", "remaining_time": "2:38:51", "throughput": 19949.62, "total_tokens": 55596608}
|
|
{"current_steps": 17675, "total_steps": 78105, "loss": 0.3617, "lr": 4.761027569922072e-06, "epoch": 1.1314896613533065, "percentage": 22.63, "elapsed_time": "0:46:27", "remaining_time": "2:38:50", "throughput": 19950.32, "total_tokens": 55611904}
|
|
{"current_steps": 17680, "total_steps": 78105, "loss": 0.3807, "lr": 4.760789157593129e-06, "epoch": 1.1318097432942833, "percentage": 22.64, "elapsed_time": "0:46:28", "remaining_time": "2:38:49", "throughput": 19950.05, "total_tokens": 55627968}
|
|
{"current_steps": 17685, "total_steps": 78105, "loss": 0.356, "lr": 4.760550632372151e-06, "epoch": 1.1321298252352603, "percentage": 22.64, "elapsed_time": "0:46:29", "remaining_time": "2:38:48", "throughput": 19951.03, "total_tokens": 55644928}
|
|
{"current_steps": 17690, "total_steps": 78105, "loss": 0.4617, "lr": 4.76031199427105e-06, "epoch": 1.132449907176237, "percentage": 22.65, "elapsed_time": "0:46:29", "remaining_time": "2:38:47", "throughput": 19951.8, "total_tokens": 55660352}
|
|
{"current_steps": 17695, "total_steps": 78105, "loss": 0.2799, "lr": 4.760073243301741e-06, "epoch": 1.132769989117214, "percentage": 22.66, "elapsed_time": "0:46:30", "remaining_time": "2:38:46", "throughput": 19952.68, "total_tokens": 55676672}
|
|
{"current_steps": 17700, "total_steps": 78105, "loss": 0.3001, "lr": 4.759834379476147e-06, "epoch": 1.1330900710581908, "percentage": 22.66, "elapsed_time": "0:46:31", "remaining_time": "2:38:45", "throughput": 19953.31, "total_tokens": 55691328}
|
|
{"current_steps": 17705, "total_steps": 78105, "loss": 0.5102, "lr": 4.759595402806194e-06, "epoch": 1.1334101529991678, "percentage": 22.67, "elapsed_time": "0:46:31", "remaining_time": "2:38:43", "throughput": 19954.04, "total_tokens": 55706752}
|
|
{"current_steps": 17710, "total_steps": 78105, "loss": 0.3296, "lr": 4.759356313303818e-06, "epoch": 1.1337302349401446, "percentage": 22.67, "elapsed_time": "0:46:32", "remaining_time": "2:38:42", "throughput": 19954.94, "total_tokens": 55722944}
|
|
{"current_steps": 17715, "total_steps": 78105, "loss": 0.2977, "lr": 4.759117110980955e-06, "epoch": 1.1340503168811216, "percentage": 22.68, "elapsed_time": "0:46:33", "remaining_time": "2:38:41", "throughput": 19955.88, "total_tokens": 55739776}
|
|
{"current_steps": 17720, "total_steps": 78105, "loss": 0.4228, "lr": 4.7588777958495515e-06, "epoch": 1.1343703988220986, "percentage": 22.69, "elapsed_time": "0:46:33", "remaining_time": "2:38:40", "throughput": 19956.67, "total_tokens": 55755712}
|
|
{"current_steps": 17725, "total_steps": 78105, "loss": 0.3189, "lr": 4.758638367921556e-06, "epoch": 1.1346904807630753, "percentage": 22.69, "elapsed_time": "0:46:34", "remaining_time": "2:38:39", "throughput": 19957.6, "total_tokens": 55772032}
|
|
{"current_steps": 17730, "total_steps": 78105, "loss": 0.3287, "lr": 4.758398827208927e-06, "epoch": 1.1350105627040523, "percentage": 22.7, "elapsed_time": "0:46:35", "remaining_time": "2:38:38", "throughput": 19958.62, "total_tokens": 55789248}
|
|
{"current_steps": 17735, "total_steps": 78105, "loss": 0.3426, "lr": 4.758159173723623e-06, "epoch": 1.135330644645029, "percentage": 22.71, "elapsed_time": "0:46:35", "remaining_time": "2:38:37", "throughput": 19959.32, "total_tokens": 55804224}
|
|
{"current_steps": 17740, "total_steps": 78105, "loss": 0.4571, "lr": 4.757919407477613e-06, "epoch": 1.135650726586006, "percentage": 22.71, "elapsed_time": "0:46:36", "remaining_time": "2:38:35", "throughput": 19959.93, "total_tokens": 55818816}
|
|
{"current_steps": 17745, "total_steps": 78105, "loss": 0.3233, "lr": 4.757679528482867e-06, "epoch": 1.1359708085269828, "percentage": 22.72, "elapsed_time": "0:46:37", "remaining_time": "2:38:34", "throughput": 19960.73, "total_tokens": 55834496}
|
|
{"current_steps": 17750, "total_steps": 78105, "loss": 0.3372, "lr": 4.757439536751367e-06, "epoch": 1.1362908904679598, "percentage": 22.73, "elapsed_time": "0:46:37", "remaining_time": "2:38:33", "throughput": 19961.63, "total_tokens": 55851200}
|
|
{"current_steps": 17755, "total_steps": 78105, "loss": 0.2493, "lr": 4.757199432295094e-06, "epoch": 1.1366109724089366, "percentage": 22.73, "elapsed_time": "0:46:38", "remaining_time": "2:38:32", "throughput": 19962.52, "total_tokens": 55867712}
|
|
{"current_steps": 17760, "total_steps": 78105, "loss": 0.2704, "lr": 4.756959215126039e-06, "epoch": 1.1369310543499136, "percentage": 22.74, "elapsed_time": "0:46:39", "remaining_time": "2:38:31", "throughput": 19963.17, "total_tokens": 55882496}
|
|
{"current_steps": 17765, "total_steps": 78105, "loss": 0.4733, "lr": 4.756718885256197e-06, "epoch": 1.1372511362908906, "percentage": 22.75, "elapsed_time": "0:46:39", "remaining_time": "2:38:30", "throughput": 19963.9, "total_tokens": 55897984}
|
|
{"current_steps": 17770, "total_steps": 78105, "loss": 0.3585, "lr": 4.7564784426975685e-06, "epoch": 1.1375712182318674, "percentage": 22.75, "elapsed_time": "0:46:40", "remaining_time": "2:38:29", "throughput": 19964.63, "total_tokens": 55913216}
|
|
{"current_steps": 17775, "total_steps": 78105, "loss": 0.2183, "lr": 4.756237887462161e-06, "epoch": 1.1378913001728443, "percentage": 22.76, "elapsed_time": "0:46:41", "remaining_time": "2:38:27", "throughput": 19965.31, "total_tokens": 55928320}
|
|
{"current_steps": 17780, "total_steps": 78105, "loss": 0.31, "lr": 4.755997219561984e-06, "epoch": 1.1382113821138211, "percentage": 22.76, "elapsed_time": "0:46:41", "remaining_time": "2:38:26", "throughput": 19966.14, "total_tokens": 55944576}
|
|
{"current_steps": 17785, "total_steps": 78105, "loss": 0.2849, "lr": 4.7557564390090585e-06, "epoch": 1.138531464054798, "percentage": 22.77, "elapsed_time": "0:46:42", "remaining_time": "2:38:25", "throughput": 19966.98, "total_tokens": 55960256}
|
|
{"current_steps": 17790, "total_steps": 78105, "loss": 0.3296, "lr": 4.755515545815406e-06, "epoch": 1.1388515459957749, "percentage": 22.78, "elapsed_time": "0:46:43", "remaining_time": "2:38:24", "throughput": 19967.77, "total_tokens": 55975872}
|
|
{"current_steps": 17795, "total_steps": 78105, "loss": 0.3347, "lr": 4.755274539993056e-06, "epoch": 1.1391716279367519, "percentage": 22.78, "elapsed_time": "0:46:44", "remaining_time": "2:38:23", "throughput": 19967.72, "total_tokens": 55992192}
|
|
{"current_steps": 17800, "total_steps": 78105, "loss": 0.3867, "lr": 4.755033421554042e-06, "epoch": 1.1394917098777286, "percentage": 22.79, "elapsed_time": "0:46:44", "remaining_time": "2:38:22", "throughput": 19968.52, "total_tokens": 56008064}
|
|
{"current_steps": 17805, "total_steps": 78105, "loss": 0.4431, "lr": 4.754792190510405e-06, "epoch": 1.1398117918187056, "percentage": 22.8, "elapsed_time": "0:46:45", "remaining_time": "2:38:21", "throughput": 19969.36, "total_tokens": 56024320}
|
|
{"current_steps": 17810, "total_steps": 78105, "loss": 0.4699, "lr": 4.7545508468741905e-06, "epoch": 1.1401318737596824, "percentage": 22.8, "elapsed_time": "0:46:46", "remaining_time": "2:38:20", "throughput": 19970.51, "total_tokens": 56042688}
|
|
{"current_steps": 17815, "total_steps": 78105, "loss": 0.3477, "lr": 4.754309390657451e-06, "epoch": 1.1404519557006594, "percentage": 22.81, "elapsed_time": "0:46:46", "remaining_time": "2:38:19", "throughput": 19971.12, "total_tokens": 56057280}
|
|
{"current_steps": 17820, "total_steps": 78105, "loss": 0.2841, "lr": 4.754067821872242e-06, "epoch": 1.1407720376416361, "percentage": 22.82, "elapsed_time": "0:46:47", "remaining_time": "2:38:18", "throughput": 19972.0, "total_tokens": 56073600}
|
|
{"current_steps": 17825, "total_steps": 78105, "loss": 0.3666, "lr": 4.753826140530628e-06, "epoch": 1.1410921195826131, "percentage": 22.82, "elapsed_time": "0:46:48", "remaining_time": "2:38:16", "throughput": 19972.74, "total_tokens": 56089344}
|
|
{"current_steps": 17830, "total_steps": 78105, "loss": 0.344, "lr": 4.753584346644675e-06, "epoch": 1.1414122015235901, "percentage": 22.83, "elapsed_time": "0:46:48", "remaining_time": "2:38:15", "throughput": 19973.66, "total_tokens": 56105856}
|
|
{"current_steps": 17835, "total_steps": 78105, "loss": 0.4252, "lr": 4.753342440226459e-06, "epoch": 1.141732283464567, "percentage": 22.83, "elapsed_time": "0:46:49", "remaining_time": "2:38:14", "throughput": 19974.43, "total_tokens": 56121344}
|
|
{"current_steps": 17840, "total_steps": 78105, "loss": 0.3486, "lr": 4.753100421288059e-06, "epoch": 1.142052365405544, "percentage": 22.84, "elapsed_time": "0:46:50", "remaining_time": "2:38:13", "throughput": 19975.3, "total_tokens": 56137984}
|
|
{"current_steps": 17845, "total_steps": 78105, "loss": 0.3367, "lr": 4.752858289841559e-06, "epoch": 1.1423724473465207, "percentage": 22.85, "elapsed_time": "0:46:51", "remaining_time": "2:38:12", "throughput": 19975.96, "total_tokens": 56153024}
|
|
{"current_steps": 17850, "total_steps": 78105, "loss": 0.2596, "lr": 4.7526160458990515e-06, "epoch": 1.1426925292874976, "percentage": 22.85, "elapsed_time": "0:46:51", "remaining_time": "2:38:11", "throughput": 19976.8, "total_tokens": 56169088}
|
|
{"current_steps": 17855, "total_steps": 78105, "loss": 0.2969, "lr": 4.752373689472633e-06, "epoch": 1.1430126112284744, "percentage": 22.86, "elapsed_time": "0:46:52", "remaining_time": "2:38:10", "throughput": 19977.56, "total_tokens": 56184576}
|
|
{"current_steps": 17860, "total_steps": 78105, "loss": 0.33, "lr": 4.752131220574403e-06, "epoch": 1.1433326931694514, "percentage": 22.87, "elapsed_time": "0:46:53", "remaining_time": "2:38:08", "throughput": 19978.38, "total_tokens": 56200320}
|
|
{"current_steps": 17865, "total_steps": 78105, "loss": 0.3765, "lr": 4.751888639216471e-06, "epoch": 1.1436527751104282, "percentage": 22.87, "elapsed_time": "0:46:53", "remaining_time": "2:38:07", "throughput": 19979.13, "total_tokens": 56215360}
|
|
{"current_steps": 17870, "total_steps": 78105, "loss": 0.3007, "lr": 4.751645945410951e-06, "epoch": 1.1439728570514052, "percentage": 22.88, "elapsed_time": "0:46:54", "remaining_time": "2:38:06", "throughput": 19979.97, "total_tokens": 56231360}
|
|
{"current_steps": 17875, "total_steps": 78105, "loss": 0.2715, "lr": 4.75140313916996e-06, "epoch": 1.1442929389923822, "percentage": 22.89, "elapsed_time": "0:46:55", "remaining_time": "2:38:05", "throughput": 19980.87, "total_tokens": 56247488}
|
|
{"current_steps": 17880, "total_steps": 78105, "loss": 0.4135, "lr": 4.751160220505623e-06, "epoch": 1.144613020933359, "percentage": 22.89, "elapsed_time": "0:46:55", "remaining_time": "2:38:04", "throughput": 19981.6, "total_tokens": 56262912}
|
|
{"current_steps": 17885, "total_steps": 78105, "loss": 0.3799, "lr": 4.750917189430072e-06, "epoch": 1.144933102874336, "percentage": 22.9, "elapsed_time": "0:46:56", "remaining_time": "2:38:03", "throughput": 19982.39, "total_tokens": 56278784}
|
|
{"current_steps": 17890, "total_steps": 78105, "loss": 0.3399, "lr": 4.75067404595544e-06, "epoch": 1.1452531848153127, "percentage": 22.91, "elapsed_time": "0:46:57", "remaining_time": "2:38:01", "throughput": 19983.02, "total_tokens": 56293312}
|
|
{"current_steps": 17895, "total_steps": 78105, "loss": 0.4924, "lr": 4.7504307900938694e-06, "epoch": 1.1455732667562897, "percentage": 22.91, "elapsed_time": "0:46:57", "remaining_time": "2:38:00", "throughput": 19983.69, "total_tokens": 56308352}
|
|
{"current_steps": 17900, "total_steps": 78105, "loss": 0.2666, "lr": 4.750187421857507e-06, "epoch": 1.1458933486972664, "percentage": 22.92, "elapsed_time": "0:46:58", "remaining_time": "2:37:59", "throughput": 19984.44, "total_tokens": 56323904}
|
|
{"current_steps": 17905, "total_steps": 78105, "loss": 0.3543, "lr": 4.749943941258507e-06, "epoch": 1.1462134306382434, "percentage": 22.92, "elapsed_time": "0:46:59", "remaining_time": "2:37:58", "throughput": 19985.17, "total_tokens": 56339328}
|
|
{"current_steps": 17910, "total_steps": 78105, "loss": 0.3061, "lr": 4.749700348309025e-06, "epoch": 1.1465335125792202, "percentage": 22.93, "elapsed_time": "0:46:59", "remaining_time": "2:37:57", "throughput": 19986.11, "total_tokens": 56355968}
|
|
{"current_steps": 17915, "total_steps": 78105, "loss": 0.2885, "lr": 4.749456643021226e-06, "epoch": 1.1468535945201972, "percentage": 22.94, "elapsed_time": "0:47:00", "remaining_time": "2:37:56", "throughput": 19986.99, "total_tokens": 56372544}
|
|
{"current_steps": 17920, "total_steps": 78105, "loss": 0.2448, "lr": 4.749212825407279e-06, "epoch": 1.1471736764611742, "percentage": 22.94, "elapsed_time": "0:47:01", "remaining_time": "2:37:54", "throughput": 19987.79, "total_tokens": 56388288}
|
|
{"current_steps": 17925, "total_steps": 78105, "loss": 0.3832, "lr": 4.74896889547936e-06, "epoch": 1.147493758402151, "percentage": 22.95, "elapsed_time": "0:47:01", "remaining_time": "2:37:53", "throughput": 19988.6, "total_tokens": 56404160}
|
|
{"current_steps": 17930, "total_steps": 78105, "loss": 0.2836, "lr": 4.748724853249648e-06, "epoch": 1.1478138403431277, "percentage": 22.96, "elapsed_time": "0:47:02", "remaining_time": "2:37:52", "throughput": 19989.34, "total_tokens": 56419264}
|
|
{"current_steps": 17935, "total_steps": 78105, "loss": 0.3568, "lr": 4.74848069873033e-06, "epoch": 1.1481339222841047, "percentage": 22.96, "elapsed_time": "0:47:03", "remaining_time": "2:37:51", "throughput": 19990.18, "total_tokens": 56435520}
|
|
{"current_steps": 17940, "total_steps": 78105, "loss": 0.3754, "lr": 4.748236431933598e-06, "epoch": 1.1484540042250817, "percentage": 22.97, "elapsed_time": "0:47:03", "remaining_time": "2:37:50", "throughput": 19990.91, "total_tokens": 56451264}
|
|
{"current_steps": 17945, "total_steps": 78105, "loss": 0.2532, "lr": 4.74799205287165e-06, "epoch": 1.1487740861660585, "percentage": 22.98, "elapsed_time": "0:47:04", "remaining_time": "2:37:49", "throughput": 19991.6, "total_tokens": 56466624}
|
|
{"current_steps": 17950, "total_steps": 78105, "loss": 0.3551, "lr": 4.747747561556687e-06, "epoch": 1.1490941681070355, "percentage": 22.98, "elapsed_time": "0:47:05", "remaining_time": "2:37:47", "throughput": 19992.31, "total_tokens": 56481856}
|
|
{"current_steps": 17955, "total_steps": 78105, "loss": 0.3701, "lr": 4.7475029580009205e-06, "epoch": 1.1494142500480122, "percentage": 22.99, "elapsed_time": "0:47:05", "remaining_time": "2:37:46", "throughput": 19993.15, "total_tokens": 56498048}
|
|
{"current_steps": 17960, "total_steps": 78105, "loss": 0.329, "lr": 4.747258242216561e-06, "epoch": 1.1497343319889892, "percentage": 22.99, "elapsed_time": "0:47:06", "remaining_time": "2:37:45", "throughput": 19993.83, "total_tokens": 56512960}
|
|
{"current_steps": 17965, "total_steps": 78105, "loss": 0.3293, "lr": 4.747013414215832e-06, "epoch": 1.150054413929966, "percentage": 23.0, "elapsed_time": "0:47:07", "remaining_time": "2:37:44", "throughput": 19994.51, "total_tokens": 56528256}
|
|
{"current_steps": 17970, "total_steps": 78105, "loss": 0.3477, "lr": 4.746768474010957e-06, "epoch": 1.150374495870943, "percentage": 23.01, "elapsed_time": "0:47:07", "remaining_time": "2:37:43", "throughput": 19995.27, "total_tokens": 56543552}
|
|
{"current_steps": 17975, "total_steps": 78105, "loss": 0.4648, "lr": 4.746523421614168e-06, "epoch": 1.1506945778119197, "percentage": 23.01, "elapsed_time": "0:47:08", "remaining_time": "2:37:42", "throughput": 19996.11, "total_tokens": 56559936}
|
|
{"current_steps": 17980, "total_steps": 78105, "loss": 0.3169, "lr": 4.7462782570377e-06, "epoch": 1.1510146597528967, "percentage": 23.02, "elapsed_time": "0:47:09", "remaining_time": "2:37:40", "throughput": 19996.87, "total_tokens": 56575872}
|
|
{"current_steps": 17985, "total_steps": 78105, "loss": 0.2964, "lr": 4.746032980293796e-06, "epoch": 1.1513347416938737, "percentage": 23.03, "elapsed_time": "0:47:09", "remaining_time": "2:37:39", "throughput": 19997.65, "total_tokens": 56591936}
|
|
{"current_steps": 17990, "total_steps": 78105, "loss": 0.2924, "lr": 4.745787591394705e-06, "epoch": 1.1516548236348505, "percentage": 23.03, "elapsed_time": "0:47:10", "remaining_time": "2:37:38", "throughput": 19998.36, "total_tokens": 56607040}
|
|
{"current_steps": 17995, "total_steps": 78105, "loss": 0.2968, "lr": 4.74554209035268e-06, "epoch": 1.1519749055758275, "percentage": 23.04, "elapsed_time": "0:47:11", "remaining_time": "2:37:37", "throughput": 19999.31, "total_tokens": 56623872}
|
|
{"current_steps": 18000, "total_steps": 78105, "loss": 0.3764, "lr": 4.745296477179978e-06, "epoch": 1.1522949875168043, "percentage": 23.05, "elapsed_time": "0:47:12", "remaining_time": "2:37:36", "throughput": 20000.34, "total_tokens": 56641280}
|
|
{"current_steps": 18005, "total_steps": 78105, "loss": 0.4219, "lr": 4.7450507518888665e-06, "epoch": 1.1526150694577812, "percentage": 23.05, "elapsed_time": "0:47:12", "remaining_time": "2:37:35", "throughput": 20001.17, "total_tokens": 56657408}
|
|
{"current_steps": 18010, "total_steps": 78105, "loss": 0.4024, "lr": 4.744804914491613e-06, "epoch": 1.152935151398758, "percentage": 23.06, "elapsed_time": "0:47:13", "remaining_time": "2:37:34", "throughput": 20001.95, "total_tokens": 56672896}
|
|
{"current_steps": 18015, "total_steps": 78105, "loss": 0.3147, "lr": 4.744558965000497e-06, "epoch": 1.153255233339735, "percentage": 23.07, "elapsed_time": "0:47:14", "remaining_time": "2:37:33", "throughput": 20002.78, "total_tokens": 56688960}
|
|
{"current_steps": 18020, "total_steps": 78105, "loss": 0.3024, "lr": 4.744312903427797e-06, "epoch": 1.1535753152807118, "percentage": 23.07, "elapsed_time": "0:47:14", "remaining_time": "2:37:32", "throughput": 20003.53, "total_tokens": 56704768}
|
|
{"current_steps": 18025, "total_steps": 78105, "loss": 0.4075, "lr": 4.7440667297858e-06, "epoch": 1.1538953972216888, "percentage": 23.08, "elapsed_time": "0:47:15", "remaining_time": "2:37:30", "throughput": 20004.3, "total_tokens": 56720576}
|
|
{"current_steps": 18030, "total_steps": 78105, "loss": 0.311, "lr": 4.7438204440868e-06, "epoch": 1.1542154791626658, "percentage": 23.08, "elapsed_time": "0:47:16", "remaining_time": "2:37:29", "throughput": 20005.18, "total_tokens": 56737344}
|
|
{"current_steps": 18035, "total_steps": 78105, "loss": 0.3828, "lr": 4.743574046343095e-06, "epoch": 1.1545355611036425, "percentage": 23.09, "elapsed_time": "0:47:16", "remaining_time": "2:37:28", "throughput": 20005.81, "total_tokens": 56752128}
|
|
{"current_steps": 18040, "total_steps": 78105, "loss": 0.1886, "lr": 4.743327536566988e-06, "epoch": 1.1548556430446195, "percentage": 23.1, "elapsed_time": "0:47:17", "remaining_time": "2:37:27", "throughput": 20006.47, "total_tokens": 56767424}
|
|
{"current_steps": 18045, "total_steps": 78105, "loss": 0.2718, "lr": 4.7430809147707885e-06, "epoch": 1.1551757249855963, "percentage": 23.1, "elapsed_time": "0:47:18", "remaining_time": "2:37:26", "throughput": 20007.3, "total_tokens": 56783552}
|
|
{"current_steps": 18050, "total_steps": 78105, "loss": 0.4257, "lr": 4.742834180966812e-06, "epoch": 1.1554958069265733, "percentage": 23.11, "elapsed_time": "0:47:18", "remaining_time": "2:37:25", "throughput": 20007.95, "total_tokens": 56798528}
|
|
{"current_steps": 18055, "total_steps": 78105, "loss": 0.3652, "lr": 4.74258733516738e-06, "epoch": 1.15581588886755, "percentage": 23.12, "elapsed_time": "0:47:19", "remaining_time": "2:37:24", "throughput": 20008.96, "total_tokens": 56815808}
|
|
{"current_steps": 18060, "total_steps": 78105, "loss": 0.2541, "lr": 4.742340377384816e-06, "epoch": 1.156135970808527, "percentage": 23.12, "elapsed_time": "0:47:20", "remaining_time": "2:37:22", "throughput": 20009.72, "total_tokens": 56831552}
|
|
{"current_steps": 18065, "total_steps": 78105, "loss": 0.2047, "lr": 4.742093307631456e-06, "epoch": 1.1564560527495038, "percentage": 23.13, "elapsed_time": "0:47:20", "remaining_time": "2:37:21", "throughput": 20010.39, "total_tokens": 56846656}
|
|
{"current_steps": 18070, "total_steps": 78105, "loss": 0.1776, "lr": 4.7418461259196326e-06, "epoch": 1.1567761346904808, "percentage": 23.14, "elapsed_time": "0:47:21", "remaining_time": "2:37:20", "throughput": 20011.21, "total_tokens": 56862656}
|
|
{"current_steps": 18075, "total_steps": 78105, "loss": 0.3047, "lr": 4.741598832261692e-06, "epoch": 1.1570962166314576, "percentage": 23.14, "elapsed_time": "0:47:22", "remaining_time": "2:37:19", "throughput": 20011.94, "total_tokens": 56878144}
|
|
{"current_steps": 18080, "total_steps": 78105, "loss": 0.2693, "lr": 4.741351426669982e-06, "epoch": 1.1574162985724346, "percentage": 23.15, "elapsed_time": "0:47:22", "remaining_time": "2:37:18", "throughput": 20013.01, "total_tokens": 56895808}
|
|
{"current_steps": 18085, "total_steps": 78105, "loss": 0.3429, "lr": 4.741103909156856e-06, "epoch": 1.1577363805134113, "percentage": 23.15, "elapsed_time": "0:47:23", "remaining_time": "2:37:17", "throughput": 20013.67, "total_tokens": 56910784}
|
|
{"current_steps": 18090, "total_steps": 78105, "loss": 0.2965, "lr": 4.740856279734674e-06, "epoch": 1.1580564624543883, "percentage": 23.16, "elapsed_time": "0:47:24", "remaining_time": "2:37:16", "throughput": 20014.65, "total_tokens": 56928000}
|
|
{"current_steps": 18095, "total_steps": 78105, "loss": 0.2712, "lr": 4.740608538415802e-06, "epoch": 1.1583765443953653, "percentage": 23.17, "elapsed_time": "0:47:24", "remaining_time": "2:37:15", "throughput": 20015.41, "total_tokens": 56943616}
|
|
{"current_steps": 18100, "total_steps": 78105, "loss": 0.4541, "lr": 4.74036068521261e-06, "epoch": 1.158696626336342, "percentage": 23.17, "elapsed_time": "0:47:25", "remaining_time": "2:37:13", "throughput": 20016.21, "total_tokens": 56959424}
|
|
{"current_steps": 18105, "total_steps": 78105, "loss": 0.4573, "lr": 4.740112720137476e-06, "epoch": 1.159016708277319, "percentage": 23.18, "elapsed_time": "0:47:26", "remaining_time": "2:37:12", "throughput": 20017.25, "total_tokens": 56977152}
|
|
{"current_steps": 18110, "total_steps": 78105, "loss": 0.3981, "lr": 4.73986464320278e-06, "epoch": 1.1593367902182958, "percentage": 23.19, "elapsed_time": "0:47:27", "remaining_time": "2:37:11", "throughput": 20018.03, "total_tokens": 56993024}
|
|
{"current_steps": 18115, "total_steps": 78105, "loss": 0.2842, "lr": 4.739616454420912e-06, "epoch": 1.1596568721592728, "percentage": 23.19, "elapsed_time": "0:47:27", "remaining_time": "2:37:10", "throughput": 20018.71, "total_tokens": 57008320}
|
|
{"current_steps": 18120, "total_steps": 78105, "loss": 0.3857, "lr": 4.739368153804264e-06, "epoch": 1.1599769541002496, "percentage": 23.2, "elapsed_time": "0:47:28", "remaining_time": "2:37:09", "throughput": 20019.48, "total_tokens": 57024256}
|
|
{"current_steps": 18125, "total_steps": 78105, "loss": 0.222, "lr": 4.739119741365233e-06, "epoch": 1.1602970360412266, "percentage": 23.21, "elapsed_time": "0:47:29", "remaining_time": "2:37:08", "throughput": 20020.17, "total_tokens": 57039616}
|
|
{"current_steps": 18130, "total_steps": 78105, "loss": 0.3577, "lr": 4.738871217116226e-06, "epoch": 1.1606171179822034, "percentage": 23.21, "elapsed_time": "0:47:29", "remaining_time": "2:37:07", "throughput": 20020.99, "total_tokens": 57055744}
|
|
{"current_steps": 18135, "total_steps": 78105, "loss": 0.3976, "lr": 4.7386225810696535e-06, "epoch": 1.1609371999231803, "percentage": 23.22, "elapsed_time": "0:47:30", "remaining_time": "2:37:06", "throughput": 20021.65, "total_tokens": 57070720}
|
|
{"current_steps": 18140, "total_steps": 78105, "loss": 0.3639, "lr": 4.7383738332379295e-06, "epoch": 1.1612572818641573, "percentage": 23.23, "elapsed_time": "0:47:31", "remaining_time": "2:37:04", "throughput": 20022.34, "total_tokens": 57086080}
|
|
{"current_steps": 18145, "total_steps": 78105, "loss": 0.3494, "lr": 4.738124973633474e-06, "epoch": 1.161577363805134, "percentage": 23.23, "elapsed_time": "0:47:31", "remaining_time": "2:37:03", "throughput": 20023.03, "total_tokens": 57101184}
|
|
{"current_steps": 18150, "total_steps": 78105, "loss": 0.3558, "lr": 4.737876002268716e-06, "epoch": 1.161897445746111, "percentage": 23.24, "elapsed_time": "0:47:32", "remaining_time": "2:37:02", "throughput": 20024.2, "total_tokens": 57119680}
|
|
{"current_steps": 18155, "total_steps": 78105, "loss": 0.2029, "lr": 4.737626919156089e-06, "epoch": 1.1622175276870879, "percentage": 23.24, "elapsed_time": "0:47:33", "remaining_time": "2:37:01", "throughput": 20025.05, "total_tokens": 57135808}
|
|
{"current_steps": 18160, "total_steps": 78105, "loss": 0.4155, "lr": 4.737377724308027e-06, "epoch": 1.1625376096280648, "percentage": 23.25, "elapsed_time": "0:47:33", "remaining_time": "2:37:00", "throughput": 20025.73, "total_tokens": 57151296}
|
|
{"current_steps": 18165, "total_steps": 78105, "loss": 0.3585, "lr": 4.737128417736976e-06, "epoch": 1.1628576915690416, "percentage": 23.26, "elapsed_time": "0:47:34", "remaining_time": "2:36:59", "throughput": 20026.46, "total_tokens": 57166656}
|
|
{"current_steps": 18170, "total_steps": 78105, "loss": 0.343, "lr": 4.7368789994553845e-06, "epoch": 1.1631777735100186, "percentage": 23.26, "elapsed_time": "0:47:35", "remaining_time": "2:36:58", "throughput": 20027.56, "total_tokens": 57184320}
|
|
{"current_steps": 18175, "total_steps": 78105, "loss": 0.2874, "lr": 4.736629469475708e-06, "epoch": 1.1634978554509954, "percentage": 23.27, "elapsed_time": "0:47:35", "remaining_time": "2:36:57", "throughput": 20028.2, "total_tokens": 57199360}
|
|
{"current_steps": 18180, "total_steps": 78105, "loss": 0.2837, "lr": 4.7363798278104046e-06, "epoch": 1.1638179373919724, "percentage": 23.28, "elapsed_time": "0:47:36", "remaining_time": "2:36:55", "throughput": 20028.85, "total_tokens": 57214528}
|
|
{"current_steps": 18185, "total_steps": 78105, "loss": 0.4724, "lr": 4.736130074471942e-06, "epoch": 1.1641380193329494, "percentage": 23.28, "elapsed_time": "0:47:37", "remaining_time": "2:36:54", "throughput": 20029.72, "total_tokens": 57230784}
|
|
{"current_steps": 18190, "total_steps": 78105, "loss": 0.253, "lr": 4.735880209472792e-06, "epoch": 1.1644581012739261, "percentage": 23.29, "elapsed_time": "0:47:37", "remaining_time": "2:36:53", "throughput": 20030.52, "total_tokens": 57246656}
|
|
{"current_steps": 18195, "total_steps": 78105, "loss": 0.282, "lr": 4.73563023282543e-06, "epoch": 1.164778183214903, "percentage": 23.3, "elapsed_time": "0:47:38", "remaining_time": "2:36:52", "throughput": 20031.17, "total_tokens": 57261696}
|
|
{"current_steps": 18200, "total_steps": 78105, "loss": 0.4472, "lr": 4.735380144542339e-06, "epoch": 1.1650982651558799, "percentage": 23.3, "elapsed_time": "0:47:39", "remaining_time": "2:36:51", "throughput": 20031.99, "total_tokens": 57277824}
|
|
{"current_steps": 18205, "total_steps": 78105, "loss": 0.3102, "lr": 4.7351299446360065e-06, "epoch": 1.1654183470968569, "percentage": 23.31, "elapsed_time": "0:47:39", "remaining_time": "2:36:50", "throughput": 20032.74, "total_tokens": 57293440}
|
|
{"current_steps": 18210, "total_steps": 78105, "loss": 0.2573, "lr": 4.734879633118928e-06, "epoch": 1.1657384290378336, "percentage": 23.31, "elapsed_time": "0:47:40", "remaining_time": "2:36:49", "throughput": 20033.46, "total_tokens": 57308928}
|
|
{"current_steps": 18215, "total_steps": 78105, "loss": 0.2893, "lr": 4.734629210003601e-06, "epoch": 1.1660585109788106, "percentage": 23.32, "elapsed_time": "0:47:41", "remaining_time": "2:36:47", "throughput": 20034.3, "total_tokens": 57325056}
|
|
{"current_steps": 18220, "total_steps": 78105, "loss": 0.3903, "lr": 4.734378675302532e-06, "epoch": 1.1663785929197874, "percentage": 23.33, "elapsed_time": "0:47:42", "remaining_time": "2:36:46", "throughput": 20035.02, "total_tokens": 57340608}
|
|
{"current_steps": 18225, "total_steps": 78105, "loss": 0.2948, "lr": 4.73412802902823e-06, "epoch": 1.1666986748607644, "percentage": 23.33, "elapsed_time": "0:47:42", "remaining_time": "2:36:45", "throughput": 20035.66, "total_tokens": 57355520}
|
|
{"current_steps": 18230, "total_steps": 78105, "loss": 0.2962, "lr": 4.733877271193211e-06, "epoch": 1.1670187568017412, "percentage": 23.34, "elapsed_time": "0:47:43", "remaining_time": "2:36:44", "throughput": 20036.41, "total_tokens": 57371328}
|
|
{"current_steps": 18235, "total_steps": 78105, "loss": 0.3113, "lr": 4.733626401809997e-06, "epoch": 1.1673388387427182, "percentage": 23.35, "elapsed_time": "0:47:44", "remaining_time": "2:36:43", "throughput": 20037.45, "total_tokens": 57389056}
|
|
{"current_steps": 18240, "total_steps": 78105, "loss": 0.195, "lr": 4.733375420891115e-06, "epoch": 1.167658920683695, "percentage": 23.35, "elapsed_time": "0:47:44", "remaining_time": "2:36:42", "throughput": 20038.01, "total_tokens": 57403456}
|
|
{"current_steps": 18245, "total_steps": 78105, "loss": 0.3968, "lr": 4.733124328449098e-06, "epoch": 1.167979002624672, "percentage": 23.36, "elapsed_time": "0:47:45", "remaining_time": "2:36:41", "throughput": 20038.76, "total_tokens": 57419072}
|
|
{"current_steps": 18250, "total_steps": 78105, "loss": 0.2418, "lr": 4.732873124496483e-06, "epoch": 1.168299084565649, "percentage": 23.37, "elapsed_time": "0:47:46", "remaining_time": "2:36:39", "throughput": 20039.5, "total_tokens": 57434624}
|
|
{"current_steps": 18255, "total_steps": 78105, "loss": 0.3917, "lr": 4.732621809045817e-06, "epoch": 1.1686191665066257, "percentage": 23.37, "elapsed_time": "0:47:46", "remaining_time": "2:36:38", "throughput": 20040.44, "total_tokens": 57451008}
|
|
{"current_steps": 18260, "total_steps": 78105, "loss": 0.4322, "lr": 4.732370382109644e-06, "epoch": 1.1689392484476027, "percentage": 23.38, "elapsed_time": "0:47:47", "remaining_time": "2:36:37", "throughput": 20041.25, "total_tokens": 57467264}
|
|
{"current_steps": 18265, "total_steps": 78105, "loss": 0.465, "lr": 4.732118843700525e-06, "epoch": 1.1692593303885794, "percentage": 23.39, "elapsed_time": "0:47:48", "remaining_time": "2:36:36", "throughput": 20042.01, "total_tokens": 57483072}
|
|
{"current_steps": 18270, "total_steps": 78105, "loss": 0.2412, "lr": 4.731867193831016e-06, "epoch": 1.1695794123295564, "percentage": 23.39, "elapsed_time": "0:47:48", "remaining_time": "2:36:35", "throughput": 20042.64, "total_tokens": 57498240}
|
|
{"current_steps": 18275, "total_steps": 78105, "loss": 0.3972, "lr": 4.7316154325136855e-06, "epoch": 1.1698994942705332, "percentage": 23.4, "elapsed_time": "0:47:49", "remaining_time": "2:36:34", "throughput": 20043.42, "total_tokens": 57514112}
|
|
{"current_steps": 18280, "total_steps": 78105, "loss": 0.3429, "lr": 4.731363559761104e-06, "epoch": 1.1702195762115102, "percentage": 23.4, "elapsed_time": "0:47:50", "remaining_time": "2:36:33", "throughput": 20044.25, "total_tokens": 57530496}
|
|
{"current_steps": 18285, "total_steps": 78105, "loss": 0.3387, "lr": 4.731111575585849e-06, "epoch": 1.170539658152487, "percentage": 23.41, "elapsed_time": "0:47:50", "remaining_time": "2:36:32", "throughput": 20045.17, "total_tokens": 57547648}
|
|
{"current_steps": 18290, "total_steps": 78105, "loss": 0.2618, "lr": 4.730859480000503e-06, "epoch": 1.170859740093464, "percentage": 23.42, "elapsed_time": "0:47:51", "remaining_time": "2:36:31", "throughput": 20045.87, "total_tokens": 57563136}
|
|
{"current_steps": 18295, "total_steps": 78105, "loss": 0.2651, "lr": 4.730607273017655e-06, "epoch": 1.171179822034441, "percentage": 23.42, "elapsed_time": "0:47:52", "remaining_time": "2:36:29", "throughput": 20046.62, "total_tokens": 57578880}
|
|
{"current_steps": 18300, "total_steps": 78105, "loss": 0.3716, "lr": 4.730354954649899e-06, "epoch": 1.1714999039754177, "percentage": 23.43, "elapsed_time": "0:47:52", "remaining_time": "2:36:28", "throughput": 20047.39, "total_tokens": 57594624}
|
|
{"current_steps": 18305, "total_steps": 78105, "loss": 0.2422, "lr": 4.730102524909835e-06, "epoch": 1.1718199859163947, "percentage": 23.44, "elapsed_time": "0:47:53", "remaining_time": "2:36:27", "throughput": 20048.1, "total_tokens": 57609856}
|
|
{"current_steps": 18310, "total_steps": 78105, "loss": 0.2457, "lr": 4.729849983810066e-06, "epoch": 1.1721400678573715, "percentage": 23.44, "elapsed_time": "0:47:54", "remaining_time": "2:36:26", "throughput": 20048.8, "total_tokens": 57624896}
|
|
{"current_steps": 18315, "total_steps": 78105, "loss": 0.396, "lr": 4.729597331363203e-06, "epoch": 1.1724601497983484, "percentage": 23.45, "elapsed_time": "0:47:54", "remaining_time": "2:36:25", "throughput": 20049.42, "total_tokens": 57639744}
|
|
{"current_steps": 18320, "total_steps": 78105, "loss": 0.2308, "lr": 4.7293445675818635e-06, "epoch": 1.1727802317393252, "percentage": 23.46, "elapsed_time": "0:47:55", "remaining_time": "2:36:23", "throughput": 20050.1, "total_tokens": 57654912}
|
|
{"current_steps": 18325, "total_steps": 78105, "loss": 0.2958, "lr": 4.729091692478668e-06, "epoch": 1.1731003136803022, "percentage": 23.46, "elapsed_time": "0:47:56", "remaining_time": "2:36:22", "throughput": 20050.73, "total_tokens": 57669760}
|
|
{"current_steps": 18330, "total_steps": 78105, "loss": 0.3218, "lr": 4.728838706066245e-06, "epoch": 1.173420395621279, "percentage": 23.47, "elapsed_time": "0:47:56", "remaining_time": "2:36:21", "throughput": 20051.53, "total_tokens": 57685888}
|
|
{"current_steps": 18335, "total_steps": 78105, "loss": 0.2418, "lr": 4.728585608357226e-06, "epoch": 1.173740477562256, "percentage": 23.47, "elapsed_time": "0:47:57", "remaining_time": "2:36:20", "throughput": 20052.36, "total_tokens": 57702080}
|
|
{"current_steps": 18340, "total_steps": 78105, "loss": 0.3131, "lr": 4.728332399364251e-06, "epoch": 1.1740605595032327, "percentage": 23.48, "elapsed_time": "0:47:58", "remaining_time": "2:36:19", "throughput": 20053.05, "total_tokens": 57716992}
|
|
{"current_steps": 18345, "total_steps": 78105, "loss": 0.2745, "lr": 4.728079079099962e-06, "epoch": 1.1743806414442097, "percentage": 23.49, "elapsed_time": "0:47:58", "remaining_time": "2:36:18", "throughput": 20053.91, "total_tokens": 57733248}
|
|
{"current_steps": 18350, "total_steps": 78105, "loss": 0.2247, "lr": 4.727825647577009e-06, "epoch": 1.1747007233851865, "percentage": 23.49, "elapsed_time": "0:47:59", "remaining_time": "2:36:17", "throughput": 20054.55, "total_tokens": 57748288}
|
|
{"current_steps": 18355, "total_steps": 78105, "loss": 0.3643, "lr": 4.727572104808049e-06, "epoch": 1.1750208053261635, "percentage": 23.5, "elapsed_time": "0:48:00", "remaining_time": "2:36:15", "throughput": 20055.39, "total_tokens": 57764480}
|
|
{"current_steps": 18360, "total_steps": 78105, "loss": 0.536, "lr": 4.72731845080574e-06, "epoch": 1.1753408872671405, "percentage": 23.51, "elapsed_time": "0:48:00", "remaining_time": "2:36:14", "throughput": 20056.15, "total_tokens": 57780160}
|
|
{"current_steps": 18365, "total_steps": 78105, "loss": 0.331, "lr": 4.72706468558275e-06, "epoch": 1.1756609692081172, "percentage": 23.51, "elapsed_time": "0:48:01", "remaining_time": "2:36:13", "throughput": 20056.7, "total_tokens": 57794560}
|
|
{"current_steps": 18370, "total_steps": 78105, "loss": 0.4336, "lr": 4.726810809151749e-06, "epoch": 1.1759810511490942, "percentage": 23.52, "elapsed_time": "0:48:02", "remaining_time": "2:36:12", "throughput": 20057.44, "total_tokens": 57810176}
|
|
{"current_steps": 18375, "total_steps": 78105, "loss": 0.4832, "lr": 4.726556821525415e-06, "epoch": 1.176301133090071, "percentage": 23.53, "elapsed_time": "0:48:02", "remaining_time": "2:36:11", "throughput": 20058.22, "total_tokens": 57826240}
|
|
{"current_steps": 18380, "total_steps": 78105, "loss": 0.274, "lr": 4.726302722716433e-06, "epoch": 1.176621215031048, "percentage": 23.53, "elapsed_time": "0:48:03", "remaining_time": "2:36:10", "throughput": 20059.05, "total_tokens": 57842560}
|
|
{"current_steps": 18385, "total_steps": 78105, "loss": 0.3844, "lr": 4.726048512737488e-06, "epoch": 1.1769412969720248, "percentage": 23.54, "elapsed_time": "0:48:04", "remaining_time": "2:36:09", "throughput": 20059.78, "total_tokens": 57858176}
|
|
{"current_steps": 18390, "total_steps": 78105, "loss": 0.2736, "lr": 4.725794191601275e-06, "epoch": 1.1772613789130018, "percentage": 23.55, "elapsed_time": "0:48:04", "remaining_time": "2:36:07", "throughput": 20060.57, "total_tokens": 57874432}
|
|
{"current_steps": 18395, "total_steps": 78105, "loss": 0.3454, "lr": 4.725539759320494e-06, "epoch": 1.1775814608539785, "percentage": 23.55, "elapsed_time": "0:48:05", "remaining_time": "2:36:06", "throughput": 20061.36, "total_tokens": 57890496}
|
|
{"current_steps": 18400, "total_steps": 78105, "loss": 0.2779, "lr": 4.72528521590785e-06, "epoch": 1.1779015427949555, "percentage": 23.56, "elapsed_time": "0:48:06", "remaining_time": "2:36:05", "throughput": 20062.03, "total_tokens": 57905856}
|
|
{"current_steps": 18405, "total_steps": 78105, "loss": 0.4571, "lr": 4.725030561376054e-06, "epoch": 1.1782216247359325, "percentage": 23.56, "elapsed_time": "0:48:07", "remaining_time": "2:36:04", "throughput": 20062.84, "total_tokens": 57922112}
|
|
{"current_steps": 18410, "total_steps": 78105, "loss": 0.3497, "lr": 4.724775795737821e-06, "epoch": 1.1785417066769093, "percentage": 23.57, "elapsed_time": "0:48:07", "remaining_time": "2:36:03", "throughput": 20063.47, "total_tokens": 57937152}
|
|
{"current_steps": 18415, "total_steps": 78105, "loss": 0.2677, "lr": 4.724520919005873e-06, "epoch": 1.1788617886178863, "percentage": 23.58, "elapsed_time": "0:48:08", "remaining_time": "2:36:02", "throughput": 20064.23, "total_tokens": 57952832}
|
|
{"current_steps": 18420, "total_steps": 78105, "loss": 0.2739, "lr": 4.724265931192938e-06, "epoch": 1.179181870558863, "percentage": 23.58, "elapsed_time": "0:48:09", "remaining_time": "2:36:01", "throughput": 20065.0, "total_tokens": 57968448}
|
|
{"current_steps": 18425, "total_steps": 78105, "loss": 0.333, "lr": 4.724010832311747e-06, "epoch": 1.17950195249984, "percentage": 23.59, "elapsed_time": "0:48:09", "remaining_time": "2:35:59", "throughput": 20065.75, "total_tokens": 57984192}
|
|
{"current_steps": 18430, "total_steps": 78105, "loss": 0.2662, "lr": 4.723755622375041e-06, "epoch": 1.1798220344408168, "percentage": 23.6, "elapsed_time": "0:48:10", "remaining_time": "2:35:58", "throughput": 20066.49, "total_tokens": 57999744}
|
|
{"current_steps": 18435, "total_steps": 78105, "loss": 0.3882, "lr": 4.723500301395562e-06, "epoch": 1.1801421163817938, "percentage": 23.6, "elapsed_time": "0:48:11", "remaining_time": "2:35:57", "throughput": 20067.17, "total_tokens": 58014976}
|
|
{"current_steps": 18440, "total_steps": 78105, "loss": 0.3419, "lr": 4.723244869386059e-06, "epoch": 1.1804621983227706, "percentage": 23.61, "elapsed_time": "0:48:11", "remaining_time": "2:35:56", "throughput": 20067.91, "total_tokens": 58030464}
|
|
{"current_steps": 18445, "total_steps": 78105, "loss": 0.3195, "lr": 4.7229893263592885e-06, "epoch": 1.1807822802637475, "percentage": 23.62, "elapsed_time": "0:48:12", "remaining_time": "2:35:55", "throughput": 20068.61, "total_tokens": 58045760}
|
|
{"current_steps": 18450, "total_steps": 78105, "loss": 0.276, "lr": 4.722733672328009e-06, "epoch": 1.1811023622047245, "percentage": 23.62, "elapsed_time": "0:48:13", "remaining_time": "2:35:54", "throughput": 20069.28, "total_tokens": 58061184}
|
|
{"current_steps": 18455, "total_steps": 78105, "loss": 0.2063, "lr": 4.722477907304988e-06, "epoch": 1.1814224441457013, "percentage": 23.63, "elapsed_time": "0:48:13", "remaining_time": "2:35:52", "throughput": 20069.95, "total_tokens": 58076288}
|
|
{"current_steps": 18460, "total_steps": 78105, "loss": 0.338, "lr": 4.722222031302998e-06, "epoch": 1.181742526086678, "percentage": 23.63, "elapsed_time": "0:48:14", "remaining_time": "2:35:51", "throughput": 20070.61, "total_tokens": 58091072}
|
|
{"current_steps": 18465, "total_steps": 78105, "loss": 0.415, "lr": 4.721966044334814e-06, "epoch": 1.182062608027655, "percentage": 23.64, "elapsed_time": "0:48:14", "remaining_time": "2:35:50", "throughput": 20071.24, "total_tokens": 58105984}
|
|
{"current_steps": 18470, "total_steps": 78105, "loss": 0.3179, "lr": 4.72170994641322e-06, "epoch": 1.182382689968632, "percentage": 23.65, "elapsed_time": "0:48:15", "remaining_time": "2:35:49", "throughput": 20072.07, "total_tokens": 58122176}
|
|
{"current_steps": 18475, "total_steps": 78105, "loss": 0.3703, "lr": 4.721453737551003e-06, "epoch": 1.1827027719096088, "percentage": 23.65, "elapsed_time": "0:48:16", "remaining_time": "2:35:48", "throughput": 20072.75, "total_tokens": 58137280}
|
|
{"current_steps": 18480, "total_steps": 78105, "loss": 0.2988, "lr": 4.721197417760958e-06, "epoch": 1.1830228538505858, "percentage": 23.66, "elapsed_time": "0:48:17", "remaining_time": "2:35:47", "throughput": 20073.5, "total_tokens": 58153216}
|
|
{"current_steps": 18485, "total_steps": 78105, "loss": 0.3089, "lr": 4.720940987055884e-06, "epoch": 1.1833429357915626, "percentage": 23.67, "elapsed_time": "0:48:17", "remaining_time": "2:35:46", "throughput": 20074.42, "total_tokens": 58170304}
|
|
{"current_steps": 18490, "total_steps": 78105, "loss": 0.2705, "lr": 4.720684445448585e-06, "epoch": 1.1836630177325396, "percentage": 23.67, "elapsed_time": "0:48:18", "remaining_time": "2:35:45", "throughput": 20075.31, "total_tokens": 58187328}
|
|
{"current_steps": 18495, "total_steps": 78105, "loss": 0.3306, "lr": 4.7204277929518725e-06, "epoch": 1.1839830996735163, "percentage": 23.68, "elapsed_time": "0:48:19", "remaining_time": "2:35:43", "throughput": 20076.05, "total_tokens": 58203008}
|
|
{"current_steps": 18500, "total_steps": 78105, "loss": 0.2484, "lr": 4.720171029578561e-06, "epoch": 1.1843031816144933, "percentage": 23.69, "elapsed_time": "0:48:19", "remaining_time": "2:35:43", "throughput": 20076.98, "total_tokens": 58220288}
|
|
{"current_steps": 18505, "total_steps": 78105, "loss": 0.4562, "lr": 4.719914155341473e-06, "epoch": 1.18462326355547, "percentage": 23.69, "elapsed_time": "0:48:20", "remaining_time": "2:35:41", "throughput": 20077.61, "total_tokens": 58235264}
|
|
{"current_steps": 18510, "total_steps": 78105, "loss": 0.367, "lr": 4.719657170253436e-06, "epoch": 1.184943345496447, "percentage": 23.7, "elapsed_time": "0:48:21", "remaining_time": "2:35:40", "throughput": 20078.24, "total_tokens": 58250176}
|
|
{"current_steps": 18515, "total_steps": 78105, "loss": 0.3158, "lr": 4.71940007432728e-06, "epoch": 1.185263427437424, "percentage": 23.71, "elapsed_time": "0:48:21", "remaining_time": "2:35:39", "throughput": 20079.05, "total_tokens": 58266432}
|
|
{"current_steps": 18520, "total_steps": 78105, "loss": 0.347, "lr": 4.719142867575847e-06, "epoch": 1.1855835093784008, "percentage": 23.71, "elapsed_time": "0:48:22", "remaining_time": "2:35:38", "throughput": 20079.83, "total_tokens": 58282368}
|
|
{"current_steps": 18525, "total_steps": 78105, "loss": 0.1971, "lr": 4.718885550011977e-06, "epoch": 1.1859035913193778, "percentage": 23.72, "elapsed_time": "0:48:23", "remaining_time": "2:35:37", "throughput": 20080.58, "total_tokens": 58297856}
|
|
{"current_steps": 18530, "total_steps": 78105, "loss": 0.372, "lr": 4.71862812164852e-06, "epoch": 1.1862236732603546, "percentage": 23.72, "elapsed_time": "0:48:23", "remaining_time": "2:35:36", "throughput": 20081.32, "total_tokens": 58313344}
|
|
{"current_steps": 18535, "total_steps": 78105, "loss": 0.38, "lr": 4.718370582498331e-06, "epoch": 1.1865437552013316, "percentage": 23.73, "elapsed_time": "0:48:24", "remaining_time": "2:35:34", "throughput": 20082.09, "total_tokens": 58329024}
|
|
{"current_steps": 18540, "total_steps": 78105, "loss": 0.5988, "lr": 4.718112932574271e-06, "epoch": 1.1868638371423084, "percentage": 23.74, "elapsed_time": "0:48:25", "remaining_time": "2:35:33", "throughput": 20082.79, "total_tokens": 58344320}
|
|
{"current_steps": 18545, "total_steps": 78105, "loss": 0.3548, "lr": 4.7178551718892045e-06, "epoch": 1.1871839190832854, "percentage": 23.74, "elapsed_time": "0:48:25", "remaining_time": "2:35:32", "throughput": 20083.87, "total_tokens": 58362432}
|
|
{"current_steps": 18550, "total_steps": 78105, "loss": 0.3206, "lr": 4.717597300456003e-06, "epoch": 1.1875040010242621, "percentage": 23.75, "elapsed_time": "0:48:26", "remaining_time": "2:35:31", "throughput": 20084.52, "total_tokens": 58377536}
|
|
{"current_steps": 18555, "total_steps": 78105, "loss": 0.4159, "lr": 4.717339318287543e-06, "epoch": 1.1878240829652391, "percentage": 23.76, "elapsed_time": "0:48:27", "remaining_time": "2:35:30", "throughput": 20085.27, "total_tokens": 58393088}
|
|
{"current_steps": 18560, "total_steps": 78105, "loss": 0.5395, "lr": 4.717081225396708e-06, "epoch": 1.188144164906216, "percentage": 23.76, "elapsed_time": "0:48:27", "remaining_time": "2:35:29", "throughput": 20085.9, "total_tokens": 58408000}
|
|
{"current_steps": 18565, "total_steps": 78105, "loss": 0.2805, "lr": 4.716823021796385e-06, "epoch": 1.1884642468471929, "percentage": 23.77, "elapsed_time": "0:48:28", "remaining_time": "2:35:28", "throughput": 20086.56, "total_tokens": 58423168}
|
|
{"current_steps": 18570, "total_steps": 78105, "loss": 0.4687, "lr": 4.716564707499467e-06, "epoch": 1.1887843287881699, "percentage": 23.78, "elapsed_time": "0:48:29", "remaining_time": "2:35:26", "throughput": 20087.17, "total_tokens": 58437952}
|
|
{"current_steps": 18575, "total_steps": 78105, "loss": 0.2759, "lr": 4.716306282518852e-06, "epoch": 1.1891044107291466, "percentage": 23.78, "elapsed_time": "0:48:29", "remaining_time": "2:35:25", "throughput": 20088.1, "total_tokens": 58454784}
|
|
{"current_steps": 18580, "total_steps": 78105, "loss": 0.4194, "lr": 4.716047746867447e-06, "epoch": 1.1894244926701236, "percentage": 23.79, "elapsed_time": "0:48:30", "remaining_time": "2:35:24", "throughput": 20088.84, "total_tokens": 58470592}
|
|
{"current_steps": 18585, "total_steps": 78105, "loss": 0.2592, "lr": 4.7157891005581605e-06, "epoch": 1.1897445746111004, "percentage": 23.79, "elapsed_time": "0:48:31", "remaining_time": "2:35:23", "throughput": 20089.51, "total_tokens": 58486080}
|
|
{"current_steps": 18590, "total_steps": 78105, "loss": 0.4028, "lr": 4.715530343603907e-06, "epoch": 1.1900646565520774, "percentage": 23.8, "elapsed_time": "0:48:31", "remaining_time": "2:35:22", "throughput": 20090.22, "total_tokens": 58502016}
|
|
{"current_steps": 18595, "total_steps": 78105, "loss": 0.372, "lr": 4.71527147601761e-06, "epoch": 1.1903847384930542, "percentage": 23.81, "elapsed_time": "0:48:32", "remaining_time": "2:35:21", "throughput": 20091.42, "total_tokens": 58521280}
|
|
{"current_steps": 18600, "total_steps": 78105, "loss": 0.5035, "lr": 4.7150124978121924e-06, "epoch": 1.1907048204340311, "percentage": 23.81, "elapsed_time": "0:48:33", "remaining_time": "2:35:20", "throughput": 20092.12, "total_tokens": 58536960}
|
|
{"current_steps": 18605, "total_steps": 78105, "loss": 0.3701, "lr": 4.7147534090005896e-06, "epoch": 1.191024902375008, "percentage": 23.82, "elapsed_time": "0:48:34", "remaining_time": "2:35:19", "throughput": 20092.8, "total_tokens": 58552448}
|
|
{"current_steps": 18610, "total_steps": 78105, "loss": 0.4091, "lr": 4.714494209595738e-06, "epoch": 1.191344984315985, "percentage": 23.83, "elapsed_time": "0:48:34", "remaining_time": "2:35:18", "throughput": 20093.42, "total_tokens": 58567424}
|
|
{"current_steps": 18615, "total_steps": 78105, "loss": 0.2487, "lr": 4.714234899610579e-06, "epoch": 1.1916650662569617, "percentage": 23.83, "elapsed_time": "0:48:35", "remaining_time": "2:35:17", "throughput": 20094.13, "total_tokens": 58583296}
|
|
{"current_steps": 18620, "total_steps": 78105, "loss": 0.2477, "lr": 4.713975479058064e-06, "epoch": 1.1919851481979387, "percentage": 23.84, "elapsed_time": "0:48:36", "remaining_time": "2:35:16", "throughput": 20094.87, "total_tokens": 58598976}
|
|
{"current_steps": 18625, "total_steps": 78105, "loss": 0.3458, "lr": 4.713715947951145e-06, "epoch": 1.1923052301389157, "percentage": 23.85, "elapsed_time": "0:48:36", "remaining_time": "2:35:15", "throughput": 20095.79, "total_tokens": 58615872}
|
|
{"current_steps": 18630, "total_steps": 78105, "loss": 0.2194, "lr": 4.7134563063027825e-06, "epoch": 1.1926253120798924, "percentage": 23.85, "elapsed_time": "0:48:37", "remaining_time": "2:35:13", "throughput": 20096.43, "total_tokens": 58630912}
|
|
{"current_steps": 18635, "total_steps": 78105, "loss": 0.4626, "lr": 4.713196554125942e-06, "epoch": 1.1929453940208694, "percentage": 23.86, "elapsed_time": "0:48:38", "remaining_time": "2:35:12", "throughput": 20097.08, "total_tokens": 58646144}
|
|
{"current_steps": 18640, "total_steps": 78105, "loss": 0.3525, "lr": 4.712936691433593e-06, "epoch": 1.1932654759618462, "percentage": 23.87, "elapsed_time": "0:48:38", "remaining_time": "2:35:11", "throughput": 20097.7, "total_tokens": 58661248}
|
|
{"current_steps": 18645, "total_steps": 78105, "loss": 0.3118, "lr": 4.712676718238714e-06, "epoch": 1.1935855579028232, "percentage": 23.87, "elapsed_time": "0:48:39", "remaining_time": "2:35:10", "throughput": 20098.53, "total_tokens": 58677824}
|
|
{"current_steps": 18650, "total_steps": 78105, "loss": 0.253, "lr": 4.712416634554283e-06, "epoch": 1.1939056398438, "percentage": 23.88, "elapsed_time": "0:48:40", "remaining_time": "2:35:09", "throughput": 20099.13, "total_tokens": 58692544}
|
|
{"current_steps": 18655, "total_steps": 78105, "loss": 0.311, "lr": 4.71215644039329e-06, "epoch": 1.194225721784777, "percentage": 23.88, "elapsed_time": "0:48:40", "remaining_time": "2:35:08", "throughput": 20100.0, "total_tokens": 58709248}
|
|
{"current_steps": 18660, "total_steps": 78105, "loss": 0.306, "lr": 4.711896135768728e-06, "epoch": 1.1945458037257537, "percentage": 23.89, "elapsed_time": "0:48:41", "remaining_time": "2:35:07", "throughput": 20100.65, "total_tokens": 58724416}
|
|
{"current_steps": 18665, "total_steps": 78105, "loss": 0.326, "lr": 4.711635720693593e-06, "epoch": 1.1948658856667307, "percentage": 23.9, "elapsed_time": "0:48:42", "remaining_time": "2:35:05", "throughput": 20101.28, "total_tokens": 58739392}
|
|
{"current_steps": 18670, "total_steps": 78105, "loss": 0.298, "lr": 4.711375195180891e-06, "epoch": 1.1951859676077077, "percentage": 23.9, "elapsed_time": "0:48:42", "remaining_time": "2:35:04", "throughput": 20102.01, "total_tokens": 58755072}
|
|
{"current_steps": 18675, "total_steps": 78105, "loss": 0.415, "lr": 4.71111455924363e-06, "epoch": 1.1955060495486844, "percentage": 23.91, "elapsed_time": "0:48:43", "remaining_time": "2:35:03", "throughput": 20102.89, "total_tokens": 58771840}
|
|
{"current_steps": 18680, "total_steps": 78105, "loss": 0.3134, "lr": 4.710853812894825e-06, "epoch": 1.1958261314896614, "percentage": 23.92, "elapsed_time": "0:48:44", "remaining_time": "2:35:02", "throughput": 20103.66, "total_tokens": 58787648}
|
|
{"current_steps": 18685, "total_steps": 78105, "loss": 0.1993, "lr": 4.710592956147497e-06, "epoch": 1.1961462134306382, "percentage": 23.92, "elapsed_time": "0:48:44", "remaining_time": "2:35:01", "throughput": 20104.41, "total_tokens": 58803520}
|
|
{"current_steps": 18690, "total_steps": 78105, "loss": 0.3489, "lr": 4.710331989014671e-06, "epoch": 1.1964662953716152, "percentage": 23.93, "elapsed_time": "0:48:45", "remaining_time": "2:35:00", "throughput": 20105.3, "total_tokens": 58820352}
|
|
{"current_steps": 18695, "total_steps": 78105, "loss": 0.3614, "lr": 4.710070911509379e-06, "epoch": 1.196786377312592, "percentage": 23.94, "elapsed_time": "0:48:46", "remaining_time": "2:34:59", "throughput": 20106.04, "total_tokens": 58836032}
|
|
{"current_steps": 18700, "total_steps": 78105, "loss": 0.3157, "lr": 4.709809723644657e-06, "epoch": 1.197106459253569, "percentage": 23.94, "elapsed_time": "0:48:46", "remaining_time": "2:34:58", "throughput": 20106.82, "total_tokens": 58852096}
|
|
{"current_steps": 18705, "total_steps": 78105, "loss": 0.3826, "lr": 4.709548425433548e-06, "epoch": 1.1974265411945457, "percentage": 23.95, "elapsed_time": "0:48:47", "remaining_time": "2:34:57", "throughput": 20107.48, "total_tokens": 58867776}
|
|
{"current_steps": 18710, "total_steps": 78105, "loss": 0.3949, "lr": 4.7092870168891e-06, "epoch": 1.1977466231355227, "percentage": 23.95, "elapsed_time": "0:48:48", "remaining_time": "2:34:55", "throughput": 20108.02, "total_tokens": 58882112}
|
|
{"current_steps": 18715, "total_steps": 78105, "loss": 0.3101, "lr": 4.709025498024367e-06, "epoch": 1.1980667050764997, "percentage": 23.96, "elapsed_time": "0:48:48", "remaining_time": "2:34:54", "throughput": 20108.76, "total_tokens": 58897792}
|
|
{"current_steps": 18720, "total_steps": 78105, "loss": 0.3016, "lr": 4.708763868852405e-06, "epoch": 1.1983867870174765, "percentage": 23.97, "elapsed_time": "0:48:49", "remaining_time": "2:34:53", "throughput": 20109.34, "total_tokens": 58912576}
|
|
{"current_steps": 18725, "total_steps": 78105, "loss": 0.2631, "lr": 4.708502129386282e-06, "epoch": 1.1987068689584532, "percentage": 23.97, "elapsed_time": "0:48:50", "remaining_time": "2:34:52", "throughput": 20110.09, "total_tokens": 58928704}
|
|
{"current_steps": 18730, "total_steps": 78105, "loss": 0.3297, "lr": 4.708240279639066e-06, "epoch": 1.1990269508994302, "percentage": 23.98, "elapsed_time": "0:48:51", "remaining_time": "2:34:51", "throughput": 20111.12, "total_tokens": 58946624}
|
|
{"current_steps": 18735, "total_steps": 78105, "loss": 0.3274, "lr": 4.7079783196238324e-06, "epoch": 1.1993470328404072, "percentage": 23.99, "elapsed_time": "0:48:51", "remaining_time": "2:34:50", "throughput": 20111.9, "total_tokens": 58962496}
|
|
{"current_steps": 18740, "total_steps": 78105, "loss": 0.3828, "lr": 4.707716249353662e-06, "epoch": 1.199667114781384, "percentage": 23.99, "elapsed_time": "0:48:52", "remaining_time": "2:34:49", "throughput": 20112.6, "total_tokens": 58978048}
|
|
{"current_steps": 18745, "total_steps": 78105, "loss": 0.2367, "lr": 4.7074540688416425e-06, "epoch": 1.199987196722361, "percentage": 24.0, "elapsed_time": "0:48:53", "remaining_time": "2:34:48", "throughput": 20113.47, "total_tokens": 58994688}
|
|
{"current_steps": 18750, "total_steps": 78105, "loss": 0.2651, "lr": 4.707191778100865e-06, "epoch": 1.2003072786633378, "percentage": 24.01, "elapsed_time": "0:48:53", "remaining_time": "2:34:47", "throughput": 20114.17, "total_tokens": 59010240}
|
|
{"current_steps": 18755, "total_steps": 78105, "loss": 0.3063, "lr": 4.706929377144427e-06, "epoch": 1.2006273606043147, "percentage": 24.01, "elapsed_time": "0:48:54", "remaining_time": "2:34:46", "throughput": 20114.93, "total_tokens": 59026368}
|
|
{"current_steps": 18760, "total_steps": 78105, "loss": 0.3671, "lr": 4.706666865985431e-06, "epoch": 1.2009474425452915, "percentage": 24.02, "elapsed_time": "0:48:55", "remaining_time": "2:34:44", "throughput": 20115.64, "total_tokens": 59041920}
|
|
{"current_steps": 18765, "total_steps": 78105, "loss": 0.3089, "lr": 4.706404244636986e-06, "epoch": 1.2012675244862685, "percentage": 24.03, "elapsed_time": "0:48:55", "remaining_time": "2:34:43", "throughput": 20116.37, "total_tokens": 59057600}
|
|
{"current_steps": 18770, "total_steps": 78105, "loss": 0.2474, "lr": 4.7061415131122055e-06, "epoch": 1.2015876064272453, "percentage": 24.03, "elapsed_time": "0:48:56", "remaining_time": "2:34:42", "throughput": 20117.24, "total_tokens": 59074304}
|
|
{"current_steps": 18775, "total_steps": 78105, "loss": 0.4899, "lr": 4.70587867142421e-06, "epoch": 1.2019076883682223, "percentage": 24.04, "elapsed_time": "0:48:57", "remaining_time": "2:34:41", "throughput": 20117.92, "total_tokens": 59089472}
|
|
{"current_steps": 18780, "total_steps": 78105, "loss": 0.2991, "lr": 4.705615719586123e-06, "epoch": 1.2022277703091993, "percentage": 24.04, "elapsed_time": "0:48:57", "remaining_time": "2:34:40", "throughput": 20118.71, "total_tokens": 59105664}
|
|
{"current_steps": 18785, "total_steps": 78105, "loss": 0.2843, "lr": 4.705352657611075e-06, "epoch": 1.202547852250176, "percentage": 24.05, "elapsed_time": "0:48:58", "remaining_time": "2:34:39", "throughput": 20119.46, "total_tokens": 59121408}
|
|
{"current_steps": 18790, "total_steps": 78105, "loss": 0.331, "lr": 4.705089485512203e-06, "epoch": 1.202867934191153, "percentage": 24.06, "elapsed_time": "0:48:59", "remaining_time": "2:34:38", "throughput": 20120.17, "total_tokens": 59137088}
|
|
{"current_steps": 18795, "total_steps": 78105, "loss": 0.3433, "lr": 4.704826203302649e-06, "epoch": 1.2031880161321298, "percentage": 24.06, "elapsed_time": "0:48:59", "remaining_time": "2:34:37", "throughput": 20121.01, "total_tokens": 59153344}
|
|
{"current_steps": 18800, "total_steps": 78105, "loss": 0.3083, "lr": 4.704562810995558e-06, "epoch": 1.2035080980731068, "percentage": 24.07, "elapsed_time": "0:49:00", "remaining_time": "2:34:36", "throughput": 20121.8, "total_tokens": 59169536}
|
|
{"current_steps": 18805, "total_steps": 78105, "loss": 0.4914, "lr": 4.704299308604083e-06, "epoch": 1.2038281800140835, "percentage": 24.08, "elapsed_time": "0:49:01", "remaining_time": "2:34:34", "throughput": 20122.49, "total_tokens": 59185216}
|
|
{"current_steps": 18810, "total_steps": 78105, "loss": 0.3957, "lr": 4.704035696141383e-06, "epoch": 1.2041482619550605, "percentage": 24.08, "elapsed_time": "0:49:01", "remaining_time": "2:34:33", "throughput": 20123.2, "total_tokens": 59200960}
|
|
{"current_steps": 18815, "total_steps": 78105, "loss": 0.3012, "lr": 4.703771973620621e-06, "epoch": 1.2044683438960373, "percentage": 24.09, "elapsed_time": "0:49:02", "remaining_time": "2:34:32", "throughput": 20123.85, "total_tokens": 59216192}
|
|
{"current_steps": 18820, "total_steps": 78105, "loss": 0.3132, "lr": 4.7035081410549645e-06, "epoch": 1.2047884258370143, "percentage": 24.1, "elapsed_time": "0:49:03", "remaining_time": "2:34:31", "throughput": 20124.59, "total_tokens": 59232576}
|
|
{"current_steps": 18825, "total_steps": 78105, "loss": 0.2981, "lr": 4.703244198457591e-06, "epoch": 1.2051085077779913, "percentage": 24.1, "elapsed_time": "0:49:03", "remaining_time": "2:34:30", "throughput": 20125.2, "total_tokens": 59247424}
|
|
{"current_steps": 18830, "total_steps": 78105, "loss": 0.297, "lr": 4.702980145841677e-06, "epoch": 1.205428589718968, "percentage": 24.11, "elapsed_time": "0:49:04", "remaining_time": "2:34:29", "throughput": 20125.87, "total_tokens": 59262848}
|
|
{"current_steps": 18835, "total_steps": 78105, "loss": 0.2949, "lr": 4.70271598322041e-06, "epoch": 1.205748671659945, "percentage": 24.11, "elapsed_time": "0:49:05", "remaining_time": "2:34:28", "throughput": 20126.49, "total_tokens": 59278208}
|
|
{"current_steps": 18840, "total_steps": 78105, "loss": 0.3256, "lr": 4.7024517106069785e-06, "epoch": 1.2060687536009218, "percentage": 24.12, "elapsed_time": "0:49:05", "remaining_time": "2:34:27", "throughput": 20127.13, "total_tokens": 59293504}
|
|
{"current_steps": 18845, "total_steps": 78105, "loss": 0.4049, "lr": 4.702187328014583e-06, "epoch": 1.2063888355418988, "percentage": 24.13, "elapsed_time": "0:49:06", "remaining_time": "2:34:25", "throughput": 20127.73, "total_tokens": 59308416}
|
|
{"current_steps": 18850, "total_steps": 78105, "loss": 0.3353, "lr": 4.701922835456423e-06, "epoch": 1.2067089174828756, "percentage": 24.13, "elapsed_time": "0:49:07", "remaining_time": "2:34:24", "throughput": 20128.34, "total_tokens": 59323264}
|
|
{"current_steps": 18855, "total_steps": 78105, "loss": 0.2967, "lr": 4.701658232945705e-06, "epoch": 1.2070289994238526, "percentage": 24.14, "elapsed_time": "0:49:07", "remaining_time": "2:34:23", "throughput": 20129.2, "total_tokens": 59339584}
|
|
{"current_steps": 18860, "total_steps": 78105, "loss": 0.3586, "lr": 4.701393520495643e-06, "epoch": 1.2073490813648293, "percentage": 24.15, "elapsed_time": "0:49:08", "remaining_time": "2:34:22", "throughput": 20129.87, "total_tokens": 59354880}
|
|
{"current_steps": 18865, "total_steps": 78105, "loss": 0.4013, "lr": 4.701128698119456e-06, "epoch": 1.2076691633058063, "percentage": 24.15, "elapsed_time": "0:49:09", "remaining_time": "2:34:21", "throughput": 20131.07, "total_tokens": 59374464}
|
|
{"current_steps": 18870, "total_steps": 78105, "loss": 0.3701, "lr": 4.700863765830366e-06, "epoch": 1.207989245246783, "percentage": 24.16, "elapsed_time": "0:49:10", "remaining_time": "2:34:20", "throughput": 20131.72, "total_tokens": 59389760}
|
|
{"current_steps": 18875, "total_steps": 78105, "loss": 0.2765, "lr": 4.700598723641604e-06, "epoch": 1.20830932718776, "percentage": 24.17, "elapsed_time": "0:49:10", "remaining_time": "2:34:19", "throughput": 20132.21, "total_tokens": 59403968}
|
|
{"current_steps": 18880, "total_steps": 78105, "loss": 0.3123, "lr": 4.700333571566405e-06, "epoch": 1.2086294091287368, "percentage": 24.17, "elapsed_time": "0:49:11", "remaining_time": "2:34:18", "throughput": 20132.9, "total_tokens": 59419520}
|
|
{"current_steps": 18885, "total_steps": 78105, "loss": 0.3644, "lr": 4.7000683096180065e-06, "epoch": 1.2089494910697138, "percentage": 24.18, "elapsed_time": "0:49:12", "remaining_time": "2:34:17", "throughput": 20133.55, "total_tokens": 59435136}
|
|
{"current_steps": 18890, "total_steps": 78105, "loss": 0.3423, "lr": 4.699802937809658e-06, "epoch": 1.2092695730106908, "percentage": 24.19, "elapsed_time": "0:49:12", "remaining_time": "2:34:15", "throughput": 20134.3, "total_tokens": 59450944}
|
|
{"current_steps": 18895, "total_steps": 78105, "loss": 0.2624, "lr": 4.699537456154609e-06, "epoch": 1.2095896549516676, "percentage": 24.19, "elapsed_time": "0:49:13", "remaining_time": "2:34:14", "throughput": 20135.17, "total_tokens": 59467840}
|
|
{"current_steps": 18900, "total_steps": 78105, "loss": 0.2928, "lr": 4.6992718646661165e-06, "epoch": 1.2099097368926446, "percentage": 24.2, "elapsed_time": "0:49:14", "remaining_time": "2:34:13", "throughput": 20135.92, "total_tokens": 59483584}
|
|
{"current_steps": 18905, "total_steps": 78105, "loss": 0.4202, "lr": 4.699006163357443e-06, "epoch": 1.2102298188336214, "percentage": 24.2, "elapsed_time": "0:49:14", "remaining_time": "2:34:12", "throughput": 20136.62, "total_tokens": 59499072}
|
|
{"current_steps": 18910, "total_steps": 78105, "loss": 0.2843, "lr": 4.698740352241854e-06, "epoch": 1.2105499007745983, "percentage": 24.21, "elapsed_time": "0:49:15", "remaining_time": "2:34:11", "throughput": 20137.23, "total_tokens": 59514176}
|
|
{"current_steps": 18915, "total_steps": 78105, "loss": 0.3067, "lr": 4.698474431332626e-06, "epoch": 1.210869982715575, "percentage": 24.22, "elapsed_time": "0:49:16", "remaining_time": "2:34:10", "throughput": 20138.03, "total_tokens": 59530560}
|
|
{"current_steps": 18920, "total_steps": 78105, "loss": 0.2967, "lr": 4.698208400643036e-06, "epoch": 1.211190064656552, "percentage": 24.22, "elapsed_time": "0:49:16", "remaining_time": "2:34:09", "throughput": 20138.76, "total_tokens": 59546304}
|
|
{"current_steps": 18925, "total_steps": 78105, "loss": 0.3578, "lr": 4.697942260186369e-06, "epoch": 1.2115101465975289, "percentage": 24.23, "elapsed_time": "0:49:17", "remaining_time": "2:34:08", "throughput": 20139.48, "total_tokens": 59562048}
|
|
{"current_steps": 18930, "total_steps": 78105, "loss": 0.3102, "lr": 4.697676009975914e-06, "epoch": 1.2118302285385059, "percentage": 24.24, "elapsed_time": "0:49:18", "remaining_time": "2:34:07", "throughput": 20140.25, "total_tokens": 59578304}
|
|
{"current_steps": 18935, "total_steps": 78105, "loss": 0.3362, "lr": 4.6974096500249665e-06, "epoch": 1.2121503104794829, "percentage": 24.24, "elapsed_time": "0:49:18", "remaining_time": "2:34:06", "throughput": 20141.1, "total_tokens": 59595200}
|
|
{"current_steps": 18940, "total_steps": 78105, "loss": 0.2218, "lr": 4.697143180346827e-06, "epoch": 1.2124703924204596, "percentage": 24.25, "elapsed_time": "0:49:19", "remaining_time": "2:34:05", "throughput": 20141.95, "total_tokens": 59611968}
|
|
{"current_steps": 18945, "total_steps": 78105, "loss": 0.3227, "lr": 4.696876600954801e-06, "epoch": 1.2127904743614366, "percentage": 24.26, "elapsed_time": "0:49:20", "remaining_time": "2:34:04", "throughput": 20142.66, "total_tokens": 59627648}
|
|
{"current_steps": 18950, "total_steps": 78105, "loss": 0.3606, "lr": 4.696609911862201e-06, "epoch": 1.2131105563024134, "percentage": 24.26, "elapsed_time": "0:49:20", "remaining_time": "2:34:03", "throughput": 20143.4, "total_tokens": 59643712}
|
|
{"current_steps": 18955, "total_steps": 78105, "loss": 0.4345, "lr": 4.696343113082344e-06, "epoch": 1.2134306382433904, "percentage": 24.27, "elapsed_time": "0:49:21", "remaining_time": "2:34:01", "throughput": 20144.0, "total_tokens": 59658560}
|
|
{"current_steps": 18960, "total_steps": 78105, "loss": 0.3038, "lr": 4.696076204628551e-06, "epoch": 1.2137507201843671, "percentage": 24.28, "elapsed_time": "0:49:22", "remaining_time": "2:34:00", "throughput": 20144.72, "total_tokens": 59674176}
|
|
{"current_steps": 18965, "total_steps": 78105, "loss": 0.3547, "lr": 4.6958091865141525e-06, "epoch": 1.2140708021253441, "percentage": 24.28, "elapsed_time": "0:49:22", "remaining_time": "2:33:59", "throughput": 20145.25, "total_tokens": 59688704}
|
|
{"current_steps": 18970, "total_steps": 78105, "loss": 0.438, "lr": 4.6955420587524804e-06, "epoch": 1.214390884066321, "percentage": 24.29, "elapsed_time": "0:49:23", "remaining_time": "2:33:58", "throughput": 20146.06, "total_tokens": 59705600}
|
|
{"current_steps": 18975, "total_steps": 78105, "loss": 0.2542, "lr": 4.695274821356874e-06, "epoch": 1.2147109660072979, "percentage": 24.29, "elapsed_time": "0:49:24", "remaining_time": "2:33:57", "throughput": 20146.7, "total_tokens": 59720896}
|
|
{"current_steps": 18980, "total_steps": 78105, "loss": 0.3752, "lr": 4.695007474340678e-06, "epoch": 1.2150310479482749, "percentage": 24.3, "elapsed_time": "0:49:24", "remaining_time": "2:33:56", "throughput": 20147.33, "total_tokens": 59736000}
|
|
{"current_steps": 18985, "total_steps": 78105, "loss": 0.3307, "lr": 4.694740017717243e-06, "epoch": 1.2153511298892516, "percentage": 24.31, "elapsed_time": "0:49:25", "remaining_time": "2:33:55", "throughput": 20147.96, "total_tokens": 59751168}
|
|
{"current_steps": 18990, "total_steps": 78105, "loss": 0.3134, "lr": 4.694472451499922e-06, "epoch": 1.2156712118302284, "percentage": 24.31, "elapsed_time": "0:49:26", "remaining_time": "2:33:53", "throughput": 20148.62, "total_tokens": 59766144}
|
|
{"current_steps": 18995, "total_steps": 78105, "loss": 0.248, "lr": 4.6942047757020784e-06, "epoch": 1.2159912937712054, "percentage": 24.32, "elapsed_time": "0:49:26", "remaining_time": "2:33:52", "throughput": 20149.31, "total_tokens": 59781632}
|
|
{"current_steps": 19000, "total_steps": 78105, "loss": 0.3223, "lr": 4.6939369903370765e-06, "epoch": 1.2163113757121824, "percentage": 24.33, "elapsed_time": "0:49:27", "remaining_time": "2:33:51", "throughput": 20150.04, "total_tokens": 59797632}
|
|
{"current_steps": 19005, "total_steps": 78105, "loss": 0.2876, "lr": 4.693669095418289e-06, "epoch": 1.2166314576531592, "percentage": 24.33, "elapsed_time": "0:49:28", "remaining_time": "2:33:50", "throughput": 20150.81, "total_tokens": 59813632}
|
|
{"current_steps": 19010, "total_steps": 78105, "loss": 0.4526, "lr": 4.693401090959095e-06, "epoch": 1.2169515395941362, "percentage": 24.34, "elapsed_time": "0:49:28", "remaining_time": "2:33:49", "throughput": 20151.49, "total_tokens": 59829056}
|
|
{"current_steps": 19015, "total_steps": 78105, "loss": 0.327, "lr": 4.693132976972874e-06, "epoch": 1.217271621535113, "percentage": 24.35, "elapsed_time": "0:49:29", "remaining_time": "2:33:48", "throughput": 20152.12, "total_tokens": 59844224}
|
|
{"current_steps": 19020, "total_steps": 78105, "loss": 0.3835, "lr": 4.692864753473016e-06, "epoch": 1.21759170347609, "percentage": 24.35, "elapsed_time": "0:49:30", "remaining_time": "2:33:47", "throughput": 20152.83, "total_tokens": 59859776}
|
|
{"current_steps": 19025, "total_steps": 78105, "loss": 0.455, "lr": 4.6925964204729145e-06, "epoch": 1.2179117854170667, "percentage": 24.36, "elapsed_time": "0:49:30", "remaining_time": "2:33:45", "throughput": 20153.55, "total_tokens": 59875456}
|
|
{"current_steps": 19030, "total_steps": 78105, "loss": 0.3682, "lr": 4.6923279779859685e-06, "epoch": 1.2182318673580437, "percentage": 24.36, "elapsed_time": "0:49:31", "remaining_time": "2:33:44", "throughput": 20154.31, "total_tokens": 59891840}
|
|
{"current_steps": 19035, "total_steps": 78105, "loss": 0.3199, "lr": 4.692059426025583e-06, "epoch": 1.2185519492990204, "percentage": 24.37, "elapsed_time": "0:49:32", "remaining_time": "2:33:43", "throughput": 20155.0, "total_tokens": 59907904}
|
|
{"current_steps": 19040, "total_steps": 78105, "loss": 0.3169, "lr": 4.691790764605168e-06, "epoch": 1.2188720312399974, "percentage": 24.38, "elapsed_time": "0:49:33", "remaining_time": "2:33:42", "throughput": 20155.85, "total_tokens": 59924672}
|
|
{"current_steps": 19045, "total_steps": 78105, "loss": 0.3917, "lr": 4.691521993738139e-06, "epoch": 1.2191921131809744, "percentage": 24.38, "elapsed_time": "0:49:33", "remaining_time": "2:33:41", "throughput": 20156.6, "total_tokens": 59940736}
|
|
{"current_steps": 19050, "total_steps": 78105, "loss": 0.2466, "lr": 4.691253113437916e-06, "epoch": 1.2195121951219512, "percentage": 24.39, "elapsed_time": "0:49:34", "remaining_time": "2:33:40", "throughput": 20157.43, "total_tokens": 59956992}
|
|
{"current_steps": 19055, "total_steps": 78105, "loss": 0.5251, "lr": 4.690984123717926e-06, "epoch": 1.2198322770629282, "percentage": 24.4, "elapsed_time": "0:49:35", "remaining_time": "2:33:39", "throughput": 20158.12, "total_tokens": 59972800}
|
|
{"current_steps": 19060, "total_steps": 78105, "loss": 0.3085, "lr": 4.690715024591603e-06, "epoch": 1.220152359003905, "percentage": 24.4, "elapsed_time": "0:49:35", "remaining_time": "2:33:38", "throughput": 20158.86, "total_tokens": 59989120}
|
|
{"current_steps": 19065, "total_steps": 78105, "loss": 0.3967, "lr": 4.690445816072381e-06, "epoch": 1.220472440944882, "percentage": 24.41, "elapsed_time": "0:49:36", "remaining_time": "2:33:37", "throughput": 20159.61, "total_tokens": 60004928}
|
|
{"current_steps": 19070, "total_steps": 78105, "loss": 0.27, "lr": 4.690176498173705e-06, "epoch": 1.2207925228858587, "percentage": 24.42, "elapsed_time": "0:49:37", "remaining_time": "2:33:36", "throughput": 20160.26, "total_tokens": 60020352}
|
|
{"current_steps": 19075, "total_steps": 78105, "loss": 0.4531, "lr": 4.689907070909024e-06, "epoch": 1.2211126048268357, "percentage": 24.42, "elapsed_time": "0:49:37", "remaining_time": "2:33:35", "throughput": 20160.9, "total_tokens": 60035456}
|
|
{"current_steps": 19080, "total_steps": 78105, "loss": 0.3457, "lr": 4.68963753429179e-06, "epoch": 1.2214326867678125, "percentage": 24.43, "elapsed_time": "0:49:38", "remaining_time": "2:33:34", "throughput": 20161.53, "total_tokens": 60050752}
|
|
{"current_steps": 19085, "total_steps": 78105, "loss": 0.298, "lr": 4.689367888335462e-06, "epoch": 1.2217527687087895, "percentage": 24.44, "elapsed_time": "0:49:39", "remaining_time": "2:33:32", "throughput": 20162.26, "total_tokens": 60066496}
|
|
{"current_steps": 19090, "total_steps": 78105, "loss": 0.3365, "lr": 4.689098133053507e-06, "epoch": 1.2220728506497665, "percentage": 24.44, "elapsed_time": "0:49:39", "remaining_time": "2:33:31", "throughput": 20162.92, "total_tokens": 60081728}
|
|
{"current_steps": 19095, "total_steps": 78105, "loss": 0.2386, "lr": 4.688828268459393e-06, "epoch": 1.2223929325907432, "percentage": 24.45, "elapsed_time": "0:49:40", "remaining_time": "2:33:30", "throughput": 20163.64, "total_tokens": 60098048}
|
|
{"current_steps": 19100, "total_steps": 78105, "loss": 0.4498, "lr": 4.6885582945665964e-06, "epoch": 1.2227130145317202, "percentage": 24.45, "elapsed_time": "0:49:41", "remaining_time": "2:33:29", "throughput": 20164.19, "total_tokens": 60112640}
|
|
{"current_steps": 19105, "total_steps": 78105, "loss": 0.2333, "lr": 4.688288211388599e-06, "epoch": 1.223033096472697, "percentage": 24.46, "elapsed_time": "0:49:41", "remaining_time": "2:33:28", "throughput": 20164.88, "total_tokens": 60128384}
|
|
{"current_steps": 19110, "total_steps": 78105, "loss": 0.3721, "lr": 4.688018018938887e-06, "epoch": 1.223353178413674, "percentage": 24.47, "elapsed_time": "0:49:42", "remaining_time": "2:33:27", "throughput": 20165.65, "total_tokens": 60144448}
|
|
{"current_steps": 19115, "total_steps": 78105, "loss": 0.4246, "lr": 4.6877477172309505e-06, "epoch": 1.2236732603546507, "percentage": 24.47, "elapsed_time": "0:49:43", "remaining_time": "2:33:26", "throughput": 20166.34, "total_tokens": 60159936}
|
|
{"current_steps": 19120, "total_steps": 78105, "loss": 0.4009, "lr": 4.687477306278289e-06, "epoch": 1.2239933422956277, "percentage": 24.48, "elapsed_time": "0:49:43", "remaining_time": "2:33:25", "throughput": 20167.09, "total_tokens": 60175808}
|
|
{"current_steps": 19125, "total_steps": 78105, "loss": 0.3246, "lr": 4.687206786094406e-06, "epoch": 1.2243134242366045, "percentage": 24.49, "elapsed_time": "0:49:44", "remaining_time": "2:33:24", "throughput": 20167.77, "total_tokens": 60191808}
|
|
{"current_steps": 19130, "total_steps": 78105, "loss": 0.3218, "lr": 4.686936156692809e-06, "epoch": 1.2246335061775815, "percentage": 24.49, "elapsed_time": "0:49:45", "remaining_time": "2:33:23", "throughput": 20168.55, "total_tokens": 60208192}
|
|
{"current_steps": 19135, "total_steps": 78105, "loss": 0.3329, "lr": 4.686665418087011e-06, "epoch": 1.2249535881185583, "percentage": 24.5, "elapsed_time": "0:49:45", "remaining_time": "2:33:21", "throughput": 20169.07, "total_tokens": 60222848}
|
|
{"current_steps": 19140, "total_steps": 78105, "loss": 0.3869, "lr": 4.686394570290532e-06, "epoch": 1.2252736700595352, "percentage": 24.51, "elapsed_time": "0:49:46", "remaining_time": "2:33:20", "throughput": 20169.72, "total_tokens": 60238336}
|
|
{"current_steps": 19145, "total_steps": 78105, "loss": 0.2233, "lr": 4.686123613316896e-06, "epoch": 1.225593752000512, "percentage": 24.51, "elapsed_time": "0:49:47", "remaining_time": "2:33:19", "throughput": 20170.33, "total_tokens": 60253376}
|
|
{"current_steps": 19150, "total_steps": 78105, "loss": 0.4171, "lr": 4.685852547179635e-06, "epoch": 1.225913833941489, "percentage": 24.52, "elapsed_time": "0:49:47", "remaining_time": "2:33:18", "throughput": 20171.07, "total_tokens": 60269056}
|
|
{"current_steps": 19155, "total_steps": 78105, "loss": 0.1714, "lr": 4.685581371892283e-06, "epoch": 1.226233915882466, "percentage": 24.52, "elapsed_time": "0:49:48", "remaining_time": "2:33:17", "throughput": 20172.23, "total_tokens": 60288704}
|
|
{"current_steps": 19160, "total_steps": 78105, "loss": 0.317, "lr": 4.685310087468381e-06, "epoch": 1.2265539978234428, "percentage": 24.53, "elapsed_time": "0:49:49", "remaining_time": "2:33:16", "throughput": 20173.03, "total_tokens": 60305152}
|
|
{"current_steps": 19165, "total_steps": 78105, "loss": 0.3379, "lr": 4.685038693921477e-06, "epoch": 1.2268740797644198, "percentage": 24.54, "elapsed_time": "0:49:50", "remaining_time": "2:33:15", "throughput": 20173.61, "total_tokens": 60319680}
|
|
{"current_steps": 19170, "total_steps": 78105, "loss": 0.243, "lr": 4.684767191265122e-06, "epoch": 1.2271941617053965, "percentage": 24.54, "elapsed_time": "0:49:50", "remaining_time": "2:33:14", "throughput": 20174.18, "total_tokens": 60334528}
|
|
{"current_steps": 19175, "total_steps": 78105, "loss": 0.2693, "lr": 4.684495579512873e-06, "epoch": 1.2275142436463735, "percentage": 24.55, "elapsed_time": "0:49:51", "remaining_time": "2:33:13", "throughput": 20174.85, "total_tokens": 60349824}
|
|
{"current_steps": 19180, "total_steps": 78105, "loss": 0.4104, "lr": 4.684223858678294e-06, "epoch": 1.2278343255873503, "percentage": 24.56, "elapsed_time": "0:49:51", "remaining_time": "2:33:12", "throughput": 20175.41, "total_tokens": 60364544}
|
|
{"current_steps": 19185, "total_steps": 78105, "loss": 0.443, "lr": 4.683952028774952e-06, "epoch": 1.2281544075283273, "percentage": 24.56, "elapsed_time": "0:49:52", "remaining_time": "2:33:10", "throughput": 20176.16, "total_tokens": 60380672}
|
|
{"current_steps": 19190, "total_steps": 78105, "loss": 0.3621, "lr": 4.683680089816422e-06, "epoch": 1.228474489469304, "percentage": 24.57, "elapsed_time": "0:49:53", "remaining_time": "2:33:09", "throughput": 20176.77, "total_tokens": 60396096}
|
|
{"current_steps": 19195, "total_steps": 78105, "loss": 0.336, "lr": 4.683408041816282e-06, "epoch": 1.228794571410281, "percentage": 24.58, "elapsed_time": "0:49:54", "remaining_time": "2:33:08", "throughput": 20177.53, "total_tokens": 60412224}
|
|
{"current_steps": 19200, "total_steps": 78105, "loss": 0.2713, "lr": 4.683135884788118e-06, "epoch": 1.229114653351258, "percentage": 24.58, "elapsed_time": "0:49:54", "remaining_time": "2:33:07", "throughput": 20178.17, "total_tokens": 60428160}
|
|
{"current_steps": 19205, "total_steps": 78105, "loss": 0.2679, "lr": 4.682863618745519e-06, "epoch": 1.2294347352922348, "percentage": 24.59, "elapsed_time": "0:49:55", "remaining_time": "2:33:06", "throughput": 20178.85, "total_tokens": 60444288}
|
|
{"current_steps": 19210, "total_steps": 78105, "loss": 0.2873, "lr": 4.6825912437020814e-06, "epoch": 1.2297548172332118, "percentage": 24.6, "elapsed_time": "0:49:56", "remaining_time": "2:33:05", "throughput": 20179.49, "total_tokens": 60459840}
|
|
{"current_steps": 19215, "total_steps": 78105, "loss": 0.404, "lr": 4.682318759671406e-06, "epoch": 1.2300748991741886, "percentage": 24.6, "elapsed_time": "0:49:56", "remaining_time": "2:33:04", "throughput": 20180.02, "total_tokens": 60474752}
|
|
{"current_steps": 19220, "total_steps": 78105, "loss": 0.1633, "lr": 4.682046166667098e-06, "epoch": 1.2303949811151655, "percentage": 24.61, "elapsed_time": "0:49:57", "remaining_time": "2:33:03", "throughput": 20180.54, "total_tokens": 60489408}
|
|
{"current_steps": 19225, "total_steps": 78105, "loss": 0.363, "lr": 4.6817734647027715e-06, "epoch": 1.2307150630561423, "percentage": 24.61, "elapsed_time": "0:49:58", "remaining_time": "2:33:02", "throughput": 20181.15, "total_tokens": 60504704}
|
|
{"current_steps": 19230, "total_steps": 78105, "loss": 0.2782, "lr": 4.6815006537920424e-06, "epoch": 1.2310351449971193, "percentage": 24.62, "elapsed_time": "0:49:58", "remaining_time": "2:33:01", "throughput": 20181.91, "total_tokens": 60520960}
|
|
{"current_steps": 19235, "total_steps": 78105, "loss": 0.4007, "lr": 4.681227733948533e-06, "epoch": 1.231355226938096, "percentage": 24.63, "elapsed_time": "0:49:59", "remaining_time": "2:32:59", "throughput": 20182.45, "total_tokens": 60535424}
|
|
{"current_steps": 19240, "total_steps": 78105, "loss": 0.2642, "lr": 4.680954705185873e-06, "epoch": 1.231675308879073, "percentage": 24.63, "elapsed_time": "0:50:00", "remaining_time": "2:32:58", "throughput": 20183.11, "total_tokens": 60550976}
|
|
{"current_steps": 19245, "total_steps": 78105, "loss": 0.2975, "lr": 4.680681567517694e-06, "epoch": 1.23199539082005, "percentage": 24.64, "elapsed_time": "0:50:00", "remaining_time": "2:32:57", "throughput": 20183.76, "total_tokens": 60566784}
|
|
{"current_steps": 19250, "total_steps": 78105, "loss": 0.3432, "lr": 4.6804083209576375e-06, "epoch": 1.2323154727610268, "percentage": 24.65, "elapsed_time": "0:50:01", "remaining_time": "2:32:56", "throughput": 20184.46, "total_tokens": 60582400}
|
|
{"current_steps": 19255, "total_steps": 78105, "loss": 0.3691, "lr": 4.680134965519346e-06, "epoch": 1.2326355547020036, "percentage": 24.65, "elapsed_time": "0:50:02", "remaining_time": "2:32:55", "throughput": 20184.95, "total_tokens": 60597056}
|
|
{"current_steps": 19260, "total_steps": 78105, "loss": 0.2932, "lr": 4.67986150121647e-06, "epoch": 1.2329556366429806, "percentage": 24.66, "elapsed_time": "0:50:02", "remaining_time": "2:32:54", "throughput": 20185.67, "total_tokens": 60612864}
|
|
{"current_steps": 19265, "total_steps": 78105, "loss": 0.326, "lr": 4.679587928062666e-06, "epoch": 1.2332757185839576, "percentage": 24.67, "elapsed_time": "0:50:03", "remaining_time": "2:32:53", "throughput": 20186.24, "total_tokens": 60627968}
|
|
{"current_steps": 19270, "total_steps": 78105, "loss": 0.4103, "lr": 4.679314246071594e-06, "epoch": 1.2335958005249343, "percentage": 24.67, "elapsed_time": "0:50:04", "remaining_time": "2:32:52", "throughput": 20187.0, "total_tokens": 60644160}
|
|
{"current_steps": 19275, "total_steps": 78105, "loss": 0.3367, "lr": 4.67904045525692e-06, "epoch": 1.2339158824659113, "percentage": 24.68, "elapsed_time": "0:50:04", "remaining_time": "2:32:51", "throughput": 20187.66, "total_tokens": 60659648}
|
|
{"current_steps": 19280, "total_steps": 78105, "loss": 0.4156, "lr": 4.678766555632315e-06, "epoch": 1.234235964406888, "percentage": 24.68, "elapsed_time": "0:50:05", "remaining_time": "2:32:49", "throughput": 20188.41, "total_tokens": 60675712}
|
|
{"current_steps": 19285, "total_steps": 78105, "loss": 0.2761, "lr": 4.678492547211459e-06, "epoch": 1.234556046347865, "percentage": 24.69, "elapsed_time": "0:50:06", "remaining_time": "2:32:48", "throughput": 20189.11, "total_tokens": 60691456}
|
|
{"current_steps": 19290, "total_steps": 78105, "loss": 0.3125, "lr": 4.67821843000803e-06, "epoch": 1.2348761282888419, "percentage": 24.7, "elapsed_time": "0:50:06", "remaining_time": "2:32:47", "throughput": 20189.95, "total_tokens": 60708096}
|
|
{"current_steps": 19295, "total_steps": 78105, "loss": 0.5368, "lr": 4.67794420403572e-06, "epoch": 1.2351962102298188, "percentage": 24.7, "elapsed_time": "0:50:07", "remaining_time": "2:32:46", "throughput": 20190.68, "total_tokens": 60724352}
|
|
{"current_steps": 19300, "total_steps": 78105, "loss": 0.3617, "lr": 4.677669869308221e-06, "epoch": 1.2355162921707956, "percentage": 24.71, "elapsed_time": "0:50:08", "remaining_time": "2:32:45", "throughput": 20191.31, "total_tokens": 60739968}
|
|
{"current_steps": 19305, "total_steps": 78105, "loss": 0.3916, "lr": 4.677395425839231e-06, "epoch": 1.2358363741117726, "percentage": 24.72, "elapsed_time": "0:50:08", "remaining_time": "2:32:44", "throughput": 20191.81, "total_tokens": 60754240}
|
|
{"current_steps": 19310, "total_steps": 78105, "loss": 0.5291, "lr": 4.677120873642455e-06, "epoch": 1.2361564560527496, "percentage": 24.72, "elapsed_time": "0:50:09", "remaining_time": "2:32:43", "throughput": 20192.46, "total_tokens": 60769728}
|
|
{"current_steps": 19315, "total_steps": 78105, "loss": 0.3187, "lr": 4.676846212731604e-06, "epoch": 1.2364765379937264, "percentage": 24.73, "elapsed_time": "0:50:10", "remaining_time": "2:32:42", "throughput": 20193.17, "total_tokens": 60785408}
|
|
{"current_steps": 19320, "total_steps": 78105, "loss": 0.3089, "lr": 4.6765714431203914e-06, "epoch": 1.2367966199347034, "percentage": 24.74, "elapsed_time": "0:50:10", "remaining_time": "2:32:41", "throughput": 20193.85, "total_tokens": 60801152}
|
|
{"current_steps": 19325, "total_steps": 78105, "loss": 0.2893, "lr": 4.676296564822538e-06, "epoch": 1.2371167018756801, "percentage": 24.74, "elapsed_time": "0:50:11", "remaining_time": "2:32:40", "throughput": 20194.57, "total_tokens": 60817088}
|
|
{"current_steps": 19330, "total_steps": 78105, "loss": 0.3399, "lr": 4.67602157785177e-06, "epoch": 1.2374367838166571, "percentage": 24.75, "elapsed_time": "0:50:12", "remaining_time": "2:32:39", "throughput": 20195.31, "total_tokens": 60833088}
|
|
{"current_steps": 19335, "total_steps": 78105, "loss": 0.2608, "lr": 4.675746482221819e-06, "epoch": 1.2377568657576339, "percentage": 24.76, "elapsed_time": "0:50:12", "remaining_time": "2:32:38", "throughput": 20196.18, "total_tokens": 60850496}
|
|
{"current_steps": 19340, "total_steps": 78105, "loss": 0.3079, "lr": 4.6754712779464215e-06, "epoch": 1.2380769476986109, "percentage": 24.76, "elapsed_time": "0:50:13", "remaining_time": "2:32:37", "throughput": 20196.99, "total_tokens": 60867200}
|
|
{"current_steps": 19345, "total_steps": 78105, "loss": 0.4516, "lr": 4.675195965039321e-06, "epoch": 1.2383970296395876, "percentage": 24.77, "elapsed_time": "0:50:14", "remaining_time": "2:32:36", "throughput": 20197.68, "total_tokens": 60883072}
|
|
{"current_steps": 19350, "total_steps": 78105, "loss": 0.3426, "lr": 4.674920543514263e-06, "epoch": 1.2387171115805646, "percentage": 24.77, "elapsed_time": "0:50:15", "remaining_time": "2:32:34", "throughput": 20198.47, "total_tokens": 60899392}
|
|
{"current_steps": 19355, "total_steps": 78105, "loss": 0.2972, "lr": 4.674645013385002e-06, "epoch": 1.2390371935215416, "percentage": 24.78, "elapsed_time": "0:50:15", "remaining_time": "2:32:33", "throughput": 20199.04, "total_tokens": 60914560}
|
|
{"current_steps": 19360, "total_steps": 78105, "loss": 0.4091, "lr": 4.674369374665297e-06, "epoch": 1.2393572754625184, "percentage": 24.79, "elapsed_time": "0:50:16", "remaining_time": "2:32:32", "throughput": 20199.56, "total_tokens": 60929088}
|
|
{"current_steps": 19365, "total_steps": 78105, "loss": 0.3021, "lr": 4.6740936273689105e-06, "epoch": 1.2396773574034954, "percentage": 24.79, "elapsed_time": "0:50:17", "remaining_time": "2:32:31", "throughput": 20200.18, "total_tokens": 60944576}
|
|
{"current_steps": 19370, "total_steps": 78105, "loss": 0.4317, "lr": 4.673817771509613e-06, "epoch": 1.2399974393444722, "percentage": 24.8, "elapsed_time": "0:50:17", "remaining_time": "2:32:30", "throughput": 20200.84, "total_tokens": 60959808}
|
|
{"current_steps": 19375, "total_steps": 78105, "loss": 0.2599, "lr": 4.67354180710118e-06, "epoch": 1.2403175212854491, "percentage": 24.81, "elapsed_time": "0:50:18", "remaining_time": "2:32:29", "throughput": 20201.55, "total_tokens": 60975488}
|
|
{"current_steps": 19380, "total_steps": 78105, "loss": 0.2838, "lr": 4.673265734157389e-06, "epoch": 1.240637603226426, "percentage": 24.81, "elapsed_time": "0:50:19", "remaining_time": "2:32:28", "throughput": 20202.22, "total_tokens": 60991168}
|
|
{"current_steps": 19385, "total_steps": 78105, "loss": 0.3576, "lr": 4.672989552692029e-06, "epoch": 1.240957685167403, "percentage": 24.82, "elapsed_time": "0:50:19", "remaining_time": "2:32:27", "throughput": 20202.92, "total_tokens": 61006912}
|
|
{"current_steps": 19390, "total_steps": 78105, "loss": 0.3117, "lr": 4.672713262718888e-06, "epoch": 1.2412777671083797, "percentage": 24.83, "elapsed_time": "0:50:20", "remaining_time": "2:32:26", "throughput": 20203.59, "total_tokens": 61022528}
|
|
{"current_steps": 19395, "total_steps": 78105, "loss": 0.4646, "lr": 4.672436864251765e-06, "epoch": 1.2415978490493567, "percentage": 24.83, "elapsed_time": "0:50:21", "remaining_time": "2:32:24", "throughput": 20204.14, "total_tokens": 61037056}
|
|
{"current_steps": 19400, "total_steps": 78105, "loss": 0.2904, "lr": 4.67216035730446e-06, "epoch": 1.2419179309903334, "percentage": 24.84, "elapsed_time": "0:50:21", "remaining_time": "2:32:23", "throughput": 20204.74, "total_tokens": 61052288}
|
|
{"current_steps": 19405, "total_steps": 78105, "loss": 0.3429, "lr": 4.671883741890782e-06, "epoch": 1.2422380129313104, "percentage": 24.84, "elapsed_time": "0:50:22", "remaining_time": "2:32:22", "throughput": 20205.6, "total_tokens": 61069248}
|
|
{"current_steps": 19410, "total_steps": 78105, "loss": 0.3064, "lr": 4.671607018024541e-06, "epoch": 1.2425580948722872, "percentage": 24.85, "elapsed_time": "0:50:23", "remaining_time": "2:32:21", "throughput": 20206.14, "total_tokens": 61084096}
|
|
{"current_steps": 19415, "total_steps": 78105, "loss": 0.2906, "lr": 4.671330185719559e-06, "epoch": 1.2428781768132642, "percentage": 24.86, "elapsed_time": "0:50:23", "remaining_time": "2:32:20", "throughput": 20206.72, "total_tokens": 61098752}
|
|
{"current_steps": 19420, "total_steps": 78105, "loss": 0.2736, "lr": 4.6710532449896575e-06, "epoch": 1.2431982587542412, "percentage": 24.86, "elapsed_time": "0:50:24", "remaining_time": "2:32:19", "throughput": 20207.41, "total_tokens": 61114560}
|
|
{"current_steps": 19425, "total_steps": 78105, "loss": 0.2584, "lr": 4.670776195848664e-06, "epoch": 1.243518340695218, "percentage": 24.87, "elapsed_time": "0:50:25", "remaining_time": "2:32:18", "throughput": 20208.03, "total_tokens": 61130176}
|
|
{"current_steps": 19430, "total_steps": 78105, "loss": 0.3111, "lr": 4.670499038310416e-06, "epoch": 1.243838422636195, "percentage": 24.88, "elapsed_time": "0:50:25", "remaining_time": "2:32:17", "throughput": 20208.62, "total_tokens": 61145472}
|
|
{"current_steps": 19435, "total_steps": 78105, "loss": 0.3217, "lr": 4.670221772388751e-06, "epoch": 1.2441585045771717, "percentage": 24.88, "elapsed_time": "0:50:26", "remaining_time": "2:32:16", "throughput": 20209.33, "total_tokens": 61161408}
|
|
{"current_steps": 19440, "total_steps": 78105, "loss": 0.2143, "lr": 4.669944398097515e-06, "epoch": 1.2444785865181487, "percentage": 24.89, "elapsed_time": "0:50:27", "remaining_time": "2:32:14", "throughput": 20209.94, "total_tokens": 61176576}
|
|
{"current_steps": 19445, "total_steps": 78105, "loss": 0.471, "lr": 4.669666915450559e-06, "epoch": 1.2447986684591255, "percentage": 24.9, "elapsed_time": "0:50:27", "remaining_time": "2:32:13", "throughput": 20210.57, "total_tokens": 61191616}
|
|
{"current_steps": 19450, "total_steps": 78105, "loss": 0.3311, "lr": 4.669389324461738e-06, "epoch": 1.2451187504001024, "percentage": 24.9, "elapsed_time": "0:50:28", "remaining_time": "2:32:12", "throughput": 20211.22, "total_tokens": 61207168}
|
|
{"current_steps": 19455, "total_steps": 78105, "loss": 0.4144, "lr": 4.669111625144914e-06, "epoch": 1.2454388323410792, "percentage": 24.91, "elapsed_time": "0:50:29", "remaining_time": "2:32:11", "throughput": 20212.01, "total_tokens": 61223872}
|
|
{"current_steps": 19460, "total_steps": 78105, "loss": 0.2513, "lr": 4.6688338175139544e-06, "epoch": 1.2457589142820562, "percentage": 24.92, "elapsed_time": "0:50:29", "remaining_time": "2:32:10", "throughput": 20212.65, "total_tokens": 61239680}
|
|
{"current_steps": 19465, "total_steps": 78105, "loss": 0.285, "lr": 4.668555901582731e-06, "epoch": 1.2460789962230332, "percentage": 24.92, "elapsed_time": "0:50:30", "remaining_time": "2:32:09", "throughput": 20213.36, "total_tokens": 61255616}
|
|
{"current_steps": 19470, "total_steps": 78105, "loss": 0.4098, "lr": 4.668277877365122e-06, "epoch": 1.24639907816401, "percentage": 24.93, "elapsed_time": "0:50:31", "remaining_time": "2:32:08", "throughput": 20214.0, "total_tokens": 61270976}
|
|
{"current_steps": 19475, "total_steps": 78105, "loss": 0.2872, "lr": 4.6679997448750095e-06, "epoch": 1.246719160104987, "percentage": 24.93, "elapsed_time": "0:50:31", "remaining_time": "2:32:07", "throughput": 20214.53, "total_tokens": 61285824}
|
|
{"current_steps": 19480, "total_steps": 78105, "loss": 0.4148, "lr": 4.667721504126281e-06, "epoch": 1.2470392420459637, "percentage": 24.94, "elapsed_time": "0:50:32", "remaining_time": "2:32:06", "throughput": 20215.05, "total_tokens": 61300672}
|
|
{"current_steps": 19485, "total_steps": 78105, "loss": 0.3055, "lr": 4.667443155132834e-06, "epoch": 1.2473593239869407, "percentage": 24.95, "elapsed_time": "0:50:33", "remaining_time": "2:32:05", "throughput": 20215.89, "total_tokens": 61317760}
|
|
{"current_steps": 19490, "total_steps": 78105, "loss": 0.4127, "lr": 4.667164697908564e-06, "epoch": 1.2476794059279175, "percentage": 24.95, "elapsed_time": "0:50:33", "remaining_time": "2:32:04", "throughput": 20216.59, "total_tokens": 61333888}
|
|
{"current_steps": 19495, "total_steps": 78105, "loss": 0.4217, "lr": 4.666886132467379e-06, "epoch": 1.2479994878688945, "percentage": 24.96, "elapsed_time": "0:50:34", "remaining_time": "2:32:03", "throughput": 20217.3, "total_tokens": 61350080}
|
|
{"current_steps": 19500, "total_steps": 78105, "loss": 0.4635, "lr": 4.666607458823186e-06, "epoch": 1.2483195698098712, "percentage": 24.97, "elapsed_time": "0:50:35", "remaining_time": "2:32:02", "throughput": 20218.08, "total_tokens": 61366912}
|
|
{"current_steps": 19505, "total_steps": 78105, "loss": 0.322, "lr": 4.666328676989903e-06, "epoch": 1.2486396517508482, "percentage": 24.97, "elapsed_time": "0:50:35", "remaining_time": "2:32:01", "throughput": 20218.92, "total_tokens": 61384256}
|
|
{"current_steps": 19510, "total_steps": 78105, "loss": 0.2763, "lr": 4.666049786981449e-06, "epoch": 1.2489597336918252, "percentage": 24.98, "elapsed_time": "0:50:36", "remaining_time": "2:31:59", "throughput": 20219.48, "total_tokens": 61399040}
|
|
{"current_steps": 19515, "total_steps": 78105, "loss": 0.2761, "lr": 4.665770788811751e-06, "epoch": 1.249279815632802, "percentage": 24.99, "elapsed_time": "0:50:37", "remaining_time": "2:31:59", "throughput": 20220.43, "total_tokens": 61416832}
|
|
{"current_steps": 19520, "total_steps": 78105, "loss": 0.3629, "lr": 4.6654916824947415e-06, "epoch": 1.249599897573779, "percentage": 24.99, "elapsed_time": "0:50:38", "remaining_time": "2:31:58", "throughput": 20221.22, "total_tokens": 61433152}
|
|
{"current_steps": 19525, "total_steps": 78105, "loss": 0.3424, "lr": 4.665212468044356e-06, "epoch": 1.2499199795147558, "percentage": 25.0, "elapsed_time": "0:50:38", "remaining_time": "2:31:57", "throughput": 20222.04, "total_tokens": 61450176}
|
|
{"current_steps": 19530, "total_steps": 78105, "loss": 0.265, "lr": 4.66493314547454e-06, "epoch": 1.2502400614557327, "percentage": 25.0, "elapsed_time": "0:50:39", "remaining_time": "2:31:55", "throughput": 20222.61, "total_tokens": 61465280}
|
|
{"current_steps": 19530, "total_steps": 78105, "eval_loss": 0.5115891098976135, "epoch": 1.2502400614557327, "percentage": 25.0, "elapsed_time": "0:51:30", "remaining_time": "2:34:29", "throughput": 19887.85, "total_tokens": 61465280}
|
|
{"current_steps": 19535, "total_steps": 78105, "loss": 0.3263, "lr": 4.664653714799239e-06, "epoch": 1.2505601433967095, "percentage": 25.01, "elapsed_time": "0:52:03", "remaining_time": "2:36:06", "throughput": 19680.97, "total_tokens": 61480960}
|
|
{"current_steps": 19540, "total_steps": 78105, "loss": 0.3483, "lr": 4.664374176032406e-06, "epoch": 1.2508802253376865, "percentage": 25.02, "elapsed_time": "0:52:04", "remaining_time": "2:36:04", "throughput": 19681.62, "total_tokens": 61496064}
|
|
{"current_steps": 19545, "total_steps": 78105, "loss": 0.3428, "lr": 4.6640945291880006e-06, "epoch": 1.2512003072786633, "percentage": 25.02, "elapsed_time": "0:52:05", "remaining_time": "2:36:03", "throughput": 19682.42, "total_tokens": 61512000}
|
|
{"current_steps": 19550, "total_steps": 78105, "loss": 0.3861, "lr": 4.6638147742799865e-06, "epoch": 1.2515203892196403, "percentage": 25.03, "elapsed_time": "0:52:05", "remaining_time": "2:36:02", "throughput": 19683.13, "total_tokens": 61527552}
|
|
{"current_steps": 19555, "total_steps": 78105, "loss": 0.3321, "lr": 4.663534911322334e-06, "epoch": 1.2518404711606173, "percentage": 25.04, "elapsed_time": "0:52:06", "remaining_time": "2:36:01", "throughput": 19683.81, "total_tokens": 61542656}
|
|
{"current_steps": 19560, "total_steps": 78105, "loss": 0.3228, "lr": 4.663254940329017e-06, "epoch": 1.252160553101594, "percentage": 25.04, "elapsed_time": "0:52:07", "remaining_time": "2:36:00", "throughput": 19684.45, "total_tokens": 61557568}
|
|
{"current_steps": 19565, "total_steps": 78105, "loss": 0.3364, "lr": 4.662974861314016e-06, "epoch": 1.2524806350425708, "percentage": 25.05, "elapsed_time": "0:52:07", "remaining_time": "2:35:58", "throughput": 19685.4, "total_tokens": 61574464}
|
|
{"current_steps": 19570, "total_steps": 78105, "loss": 0.294, "lr": 4.662694674291318e-06, "epoch": 1.2528007169835478, "percentage": 25.06, "elapsed_time": "0:52:08", "remaining_time": "2:35:58", "throughput": 19684.5, "total_tokens": 61589568}
|
|
{"current_steps": 19575, "total_steps": 78105, "loss": 0.4253, "lr": 4.662414379274912e-06, "epoch": 1.2531207989245248, "percentage": 25.06, "elapsed_time": "0:52:09", "remaining_time": "2:35:57", "throughput": 19685.26, "total_tokens": 61605440}
|
|
{"current_steps": 19580, "total_steps": 78105, "loss": 0.3526, "lr": 4.662133976278796e-06, "epoch": 1.2534408808655015, "percentage": 25.07, "elapsed_time": "0:52:10", "remaining_time": "2:35:56", "throughput": 19686.04, "total_tokens": 61621056}
|
|
{"current_steps": 19585, "total_steps": 78105, "loss": 0.2846, "lr": 4.661853465316971e-06, "epoch": 1.2537609628064785, "percentage": 25.08, "elapsed_time": "0:52:10", "remaining_time": "2:35:55", "throughput": 19686.77, "total_tokens": 61636736}
|
|
{"current_steps": 19590, "total_steps": 78105, "loss": 0.2533, "lr": 4.6615728464034455e-06, "epoch": 1.2540810447474553, "percentage": 25.08, "elapsed_time": "0:52:11", "remaining_time": "2:35:54", "throughput": 19687.75, "total_tokens": 61654208}
|
|
{"current_steps": 19595, "total_steps": 78105, "loss": 0.2326, "lr": 4.66129211955223e-06, "epoch": 1.2544011266884323, "percentage": 25.09, "elapsed_time": "0:52:12", "remaining_time": "2:35:52", "throughput": 19688.57, "total_tokens": 61670464}
|
|
{"current_steps": 19600, "total_steps": 78105, "loss": 0.3779, "lr": 4.661011284777345e-06, "epoch": 1.254721208629409, "percentage": 25.09, "elapsed_time": "0:52:12", "remaining_time": "2:35:51", "throughput": 19689.33, "total_tokens": 61686208}
|
|
{"current_steps": 19605, "total_steps": 78105, "loss": 0.275, "lr": 4.660730342092812e-06, "epoch": 1.255041290570386, "percentage": 25.1, "elapsed_time": "0:52:13", "remaining_time": "2:35:50", "throughput": 19690.27, "total_tokens": 61703232}
|
|
{"current_steps": 19610, "total_steps": 78105, "loss": 0.2649, "lr": 4.660449291512661e-06, "epoch": 1.2553613725113628, "percentage": 25.11, "elapsed_time": "0:52:14", "remaining_time": "2:35:49", "throughput": 19691.13, "total_tokens": 61719680}
|
|
{"current_steps": 19615, "total_steps": 78105, "loss": 0.2393, "lr": 4.6601681330509255e-06, "epoch": 1.2556814544523398, "percentage": 25.11, "elapsed_time": "0:52:15", "remaining_time": "2:35:48", "throughput": 19691.78, "total_tokens": 61734336}
|
|
{"current_steps": 19620, "total_steps": 78105, "loss": 0.4221, "lr": 4.6598868667216454e-06, "epoch": 1.2560015363933168, "percentage": 25.12, "elapsed_time": "0:52:15", "remaining_time": "2:35:47", "throughput": 19692.39, "total_tokens": 61749120}
|
|
{"current_steps": 19625, "total_steps": 78105, "loss": 0.3938, "lr": 4.659605492538866e-06, "epoch": 1.2563216183342936, "percentage": 25.13, "elapsed_time": "0:52:16", "remaining_time": "2:35:45", "throughput": 19692.9, "total_tokens": 61763072}
|
|
{"current_steps": 19630, "total_steps": 78105, "loss": 0.524, "lr": 4.659324010516637e-06, "epoch": 1.2566417002752703, "percentage": 25.13, "elapsed_time": "0:52:16", "remaining_time": "2:35:44", "throughput": 19693.58, "total_tokens": 61778432}
|
|
{"current_steps": 19635, "total_steps": 78105, "loss": 0.3252, "lr": 4.6590424206690145e-06, "epoch": 1.2569617822162473, "percentage": 25.14, "elapsed_time": "0:52:17", "remaining_time": "2:35:43", "throughput": 19694.24, "total_tokens": 61793088}
|
|
{"current_steps": 19640, "total_steps": 78105, "loss": 0.4921, "lr": 4.658760723010059e-06, "epoch": 1.2572818641572243, "percentage": 25.15, "elapsed_time": "0:52:18", "remaining_time": "2:35:42", "throughput": 19695.26, "total_tokens": 61810496}
|
|
{"current_steps": 19645, "total_steps": 78105, "loss": 0.2689, "lr": 4.6584789175538384e-06, "epoch": 1.257601946098201, "percentage": 25.15, "elapsed_time": "0:52:19", "remaining_time": "2:35:41", "throughput": 19696.01, "total_tokens": 61826240}
|
|
{"current_steps": 19650, "total_steps": 78105, "loss": 0.3473, "lr": 4.658197004314423e-06, "epoch": 1.257922028039178, "percentage": 25.16, "elapsed_time": "0:52:19", "remaining_time": "2:35:40", "throughput": 19696.86, "total_tokens": 61842624}
|
|
{"current_steps": 19655, "total_steps": 78105, "loss": 0.347, "lr": 4.657914983305891e-06, "epoch": 1.2582421099801548, "percentage": 25.16, "elapsed_time": "0:52:20", "remaining_time": "2:35:38", "throughput": 19697.65, "total_tokens": 61858240}
|
|
{"current_steps": 19660, "total_steps": 78105, "loss": 0.3243, "lr": 4.657632854542325e-06, "epoch": 1.2585621919211318, "percentage": 25.17, "elapsed_time": "0:52:21", "remaining_time": "2:35:37", "throughput": 19698.38, "total_tokens": 61873856}
|
|
{"current_steps": 19665, "total_steps": 78105, "loss": 0.3672, "lr": 4.657350618037813e-06, "epoch": 1.2588822738621088, "percentage": 25.18, "elapsed_time": "0:52:21", "remaining_time": "2:35:36", "throughput": 19699.38, "total_tokens": 61891008}
|
|
{"current_steps": 19670, "total_steps": 78105, "loss": 0.2677, "lr": 4.6570682738064485e-06, "epoch": 1.2592023558030856, "percentage": 25.18, "elapsed_time": "0:52:22", "remaining_time": "2:35:35", "throughput": 19700.15, "total_tokens": 61907200}
|
|
{"current_steps": 19675, "total_steps": 78105, "loss": 0.1919, "lr": 4.65678582186233e-06, "epoch": 1.2595224377440624, "percentage": 25.19, "elapsed_time": "0:52:23", "remaining_time": "2:35:34", "throughput": 19700.82, "total_tokens": 61922432}
|
|
{"current_steps": 19680, "total_steps": 78105, "loss": 0.4322, "lr": 4.6565032622195615e-06, "epoch": 1.2598425196850394, "percentage": 25.2, "elapsed_time": "0:52:23", "remaining_time": "2:35:33", "throughput": 19701.38, "total_tokens": 61936576}
|
|
{"current_steps": 19685, "total_steps": 78105, "loss": 0.3127, "lr": 4.656220594892253e-06, "epoch": 1.2601626016260163, "percentage": 25.2, "elapsed_time": "0:52:24", "remaining_time": "2:35:31", "throughput": 19702.12, "total_tokens": 61952064}
|
|
{"current_steps": 19690, "total_steps": 78105, "loss": 0.4121, "lr": 4.655937819894521e-06, "epoch": 1.2604826835669931, "percentage": 25.21, "elapsed_time": "0:52:25", "remaining_time": "2:35:30", "throughput": 19702.94, "total_tokens": 61968256}
|
|
{"current_steps": 19695, "total_steps": 78105, "loss": 0.2803, "lr": 4.655654937240482e-06, "epoch": 1.26080276550797, "percentage": 25.22, "elapsed_time": "0:52:25", "remaining_time": "2:35:29", "throughput": 19703.7, "total_tokens": 61984192}
|
|
{"current_steps": 19700, "total_steps": 78105, "loss": 0.3284, "lr": 4.655371946944265e-06, "epoch": 1.2611228474489469, "percentage": 25.22, "elapsed_time": "0:52:26", "remaining_time": "2:35:28", "throughput": 19704.41, "total_tokens": 61999872}
|
|
{"current_steps": 19705, "total_steps": 78105, "loss": 0.3631, "lr": 4.655088849019999e-06, "epoch": 1.2614429293899239, "percentage": 25.23, "elapsed_time": "0:52:27", "remaining_time": "2:35:27", "throughput": 19705.1, "total_tokens": 62015424}
|
|
{"current_steps": 19710, "total_steps": 78105, "loss": 0.5244, "lr": 4.654805643481822e-06, "epoch": 1.2617630113309006, "percentage": 25.24, "elapsed_time": "0:52:27", "remaining_time": "2:35:26", "throughput": 19705.76, "total_tokens": 62030912}
|
|
{"current_steps": 19715, "total_steps": 78105, "loss": 0.273, "lr": 4.654522330343875e-06, "epoch": 1.2620830932718776, "percentage": 25.24, "elapsed_time": "0:52:28", "remaining_time": "2:35:24", "throughput": 19706.38, "total_tokens": 62045696}
|
|
{"current_steps": 19720, "total_steps": 78105, "loss": 0.3918, "lr": 4.654238909620305e-06, "epoch": 1.2624031752128544, "percentage": 25.25, "elapsed_time": "0:52:29", "remaining_time": "2:35:23", "throughput": 19706.97, "total_tokens": 62060288}
|
|
{"current_steps": 19725, "total_steps": 78105, "loss": 0.2186, "lr": 4.6539553813252645e-06, "epoch": 1.2627232571538314, "percentage": 25.25, "elapsed_time": "0:52:29", "remaining_time": "2:35:22", "throughput": 19707.94, "total_tokens": 62077888}
|
|
{"current_steps": 19730, "total_steps": 78105, "loss": 0.3008, "lr": 4.653671745472913e-06, "epoch": 1.2630433390948084, "percentage": 25.26, "elapsed_time": "0:52:30", "remaining_time": "2:35:21", "throughput": 19708.73, "total_tokens": 62094336}
|
|
{"current_steps": 19735, "total_steps": 78105, "loss": 0.3185, "lr": 4.6533880020774126e-06, "epoch": 1.2633634210357851, "percentage": 25.27, "elapsed_time": "0:52:31", "remaining_time": "2:35:20", "throughput": 19709.49, "total_tokens": 62110400}
|
|
{"current_steps": 19740, "total_steps": 78105, "loss": 0.3232, "lr": 4.653104151152931e-06, "epoch": 1.2636835029767621, "percentage": 25.27, "elapsed_time": "0:52:31", "remaining_time": "2:35:19", "throughput": 19710.33, "total_tokens": 62126720}
|
|
{"current_steps": 19745, "total_steps": 78105, "loss": 0.3325, "lr": 4.652820192713643e-06, "epoch": 1.264003584917739, "percentage": 25.28, "elapsed_time": "0:52:32", "remaining_time": "2:35:18", "throughput": 19711.14, "total_tokens": 62142976}
|
|
{"current_steps": 19750, "total_steps": 78105, "loss": 0.2464, "lr": 4.652536126773729e-06, "epoch": 1.2643236668587159, "percentage": 25.29, "elapsed_time": "0:52:33", "remaining_time": "2:35:17", "throughput": 19711.79, "total_tokens": 62158080}
|
|
{"current_steps": 19755, "total_steps": 78105, "loss": 0.2626, "lr": 4.652251953347373e-06, "epoch": 1.2646437487996927, "percentage": 25.29, "elapsed_time": "0:52:34", "remaining_time": "2:35:16", "throughput": 19712.49, "total_tokens": 62173824}
|
|
{"current_steps": 19760, "total_steps": 78105, "loss": 0.4023, "lr": 4.651967672448765e-06, "epoch": 1.2649638307406696, "percentage": 25.3, "elapsed_time": "0:52:34", "remaining_time": "2:35:14", "throughput": 19713.22, "total_tokens": 62189248}
|
|
{"current_steps": 19765, "total_steps": 78105, "loss": 0.4716, "lr": 4.6516832840921e-06, "epoch": 1.2652839126816464, "percentage": 25.31, "elapsed_time": "0:52:35", "remaining_time": "2:35:13", "throughput": 19714.01, "total_tokens": 62205568}
|
|
{"current_steps": 19770, "total_steps": 78105, "loss": 0.4309, "lr": 4.65139878829158e-06, "epoch": 1.2656039946226234, "percentage": 25.31, "elapsed_time": "0:52:36", "remaining_time": "2:35:12", "throughput": 19714.69, "total_tokens": 62220992}
|
|
{"current_steps": 19775, "total_steps": 78105, "loss": 0.3571, "lr": 4.651114185061412e-06, "epoch": 1.2659240765636004, "percentage": 25.32, "elapsed_time": "0:52:36", "remaining_time": "2:35:11", "throughput": 19715.31, "total_tokens": 62235968}
|
|
{"current_steps": 19780, "total_steps": 78105, "loss": 0.3261, "lr": 4.650829474415805e-06, "epoch": 1.2662441585045772, "percentage": 25.32, "elapsed_time": "0:52:37", "remaining_time": "2:35:10", "throughput": 19716.01, "total_tokens": 62251136}
|
|
{"current_steps": 19785, "total_steps": 78105, "loss": 0.2417, "lr": 4.650544656368977e-06, "epoch": 1.266564240445554, "percentage": 25.33, "elapsed_time": "0:52:38", "remaining_time": "2:35:09", "throughput": 19716.77, "total_tokens": 62267264}
|
|
{"current_steps": 19790, "total_steps": 78105, "loss": 0.3685, "lr": 4.65025973093515e-06, "epoch": 1.266884322386531, "percentage": 25.34, "elapsed_time": "0:52:38", "remaining_time": "2:35:07", "throughput": 19717.39, "total_tokens": 62282112}
|
|
{"current_steps": 19795, "total_steps": 78105, "loss": 0.2891, "lr": 4.649974698128554e-06, "epoch": 1.267204404327508, "percentage": 25.34, "elapsed_time": "0:52:39", "remaining_time": "2:35:06", "throughput": 19718.01, "total_tokens": 62297408}
|
|
{"current_steps": 19800, "total_steps": 78105, "loss": 0.3548, "lr": 4.64968955796342e-06, "epoch": 1.2675244862684847, "percentage": 25.35, "elapsed_time": "0:52:40", "remaining_time": "2:35:05", "throughput": 19718.85, "total_tokens": 62314304}
|
|
{"current_steps": 19805, "total_steps": 78105, "loss": 0.3924, "lr": 4.6494043104539864e-06, "epoch": 1.2678445682094617, "percentage": 25.36, "elapsed_time": "0:52:40", "remaining_time": "2:35:04", "throughput": 19719.52, "total_tokens": 62329600}
|
|
{"current_steps": 19810, "total_steps": 78105, "loss": 0.2952, "lr": 4.649118955614498e-06, "epoch": 1.2681646501504384, "percentage": 25.36, "elapsed_time": "0:52:41", "remaining_time": "2:35:03", "throughput": 19720.45, "total_tokens": 62346880}
|
|
{"current_steps": 19815, "total_steps": 78105, "loss": 0.2824, "lr": 4.6488334934592025e-06, "epoch": 1.2684847320914154, "percentage": 25.37, "elapsed_time": "0:52:42", "remaining_time": "2:35:02", "throughput": 19721.31, "total_tokens": 62363392}
|
|
{"current_steps": 19820, "total_steps": 78105, "loss": 0.3602, "lr": 4.648547924002356e-06, "epoch": 1.2688048140323924, "percentage": 25.38, "elapsed_time": "0:52:42", "remaining_time": "2:35:01", "throughput": 19722.06, "total_tokens": 62379584}
|
|
{"current_steps": 19825, "total_steps": 78105, "loss": 0.3378, "lr": 4.648262247258217e-06, "epoch": 1.2691248959733692, "percentage": 25.38, "elapsed_time": "0:52:43", "remaining_time": "2:35:00", "throughput": 19722.75, "total_tokens": 62394880}
|
|
{"current_steps": 19830, "total_steps": 78105, "loss": 0.2125, "lr": 4.647976463241051e-06, "epoch": 1.269444977914346, "percentage": 25.39, "elapsed_time": "0:52:44", "remaining_time": "2:34:58", "throughput": 19723.33, "total_tokens": 62409600}
|
|
{"current_steps": 19835, "total_steps": 78105, "loss": 0.351, "lr": 4.64769057196513e-06, "epoch": 1.269765059855323, "percentage": 25.4, "elapsed_time": "0:52:44", "remaining_time": "2:34:57", "throughput": 19724.02, "total_tokens": 62424960}
|
|
{"current_steps": 19840, "total_steps": 78105, "loss": 0.4485, "lr": 4.647404573444727e-06, "epoch": 1.2700851417963, "percentage": 25.4, "elapsed_time": "0:52:45", "remaining_time": "2:34:56", "throughput": 19724.63, "total_tokens": 62439936}
|
|
{"current_steps": 19845, "total_steps": 78105, "loss": 0.4064, "lr": 4.647118467694126e-06, "epoch": 1.2704052237372767, "percentage": 25.41, "elapsed_time": "0:52:46", "remaining_time": "2:34:55", "throughput": 19725.31, "total_tokens": 62455360}
|
|
{"current_steps": 19850, "total_steps": 78105, "loss": 0.4465, "lr": 4.646832254727614e-06, "epoch": 1.2707253056782537, "percentage": 25.41, "elapsed_time": "0:52:46", "remaining_time": "2:34:54", "throughput": 19726.16, "total_tokens": 62472128}
|
|
{"current_steps": 19855, "total_steps": 78105, "loss": 0.4335, "lr": 4.646545934559479e-06, "epoch": 1.2710453876192305, "percentage": 25.42, "elapsed_time": "0:52:47", "remaining_time": "2:34:53", "throughput": 19726.88, "total_tokens": 62487936}
|
|
{"current_steps": 19860, "total_steps": 78105, "loss": 0.3685, "lr": 4.646259507204023e-06, "epoch": 1.2713654695602075, "percentage": 25.43, "elapsed_time": "0:52:48", "remaining_time": "2:34:51", "throughput": 19727.53, "total_tokens": 62502848}
|
|
{"current_steps": 19865, "total_steps": 78105, "loss": 0.5468, "lr": 4.645972972675545e-06, "epoch": 1.2716855515011842, "percentage": 25.43, "elapsed_time": "0:52:48", "remaining_time": "2:34:50", "throughput": 19728.19, "total_tokens": 62517888}
|
|
{"current_steps": 19870, "total_steps": 78105, "loss": 0.319, "lr": 4.645686330988356e-06, "epoch": 1.2720056334421612, "percentage": 25.44, "elapsed_time": "0:52:49", "remaining_time": "2:34:49", "throughput": 19728.89, "total_tokens": 62533312}
|
|
{"current_steps": 19875, "total_steps": 78105, "loss": 0.328, "lr": 4.645399582156768e-06, "epoch": 1.272325715383138, "percentage": 25.45, "elapsed_time": "0:52:50", "remaining_time": "2:34:48", "throughput": 19729.53, "total_tokens": 62548160}
|
|
{"current_steps": 19880, "total_steps": 78105, "loss": 0.2244, "lr": 4.645112726195099e-06, "epoch": 1.272645797324115, "percentage": 25.45, "elapsed_time": "0:52:50", "remaining_time": "2:34:47", "throughput": 19730.34, "total_tokens": 62564480}
|
|
{"current_steps": 19885, "total_steps": 78105, "loss": 0.2637, "lr": 4.644825763117675e-06, "epoch": 1.272965879265092, "percentage": 25.46, "elapsed_time": "0:52:51", "remaining_time": "2:34:46", "throughput": 19731.05, "total_tokens": 62580288}
|
|
{"current_steps": 19890, "total_steps": 78105, "loss": 0.3284, "lr": 4.644538692938824e-06, "epoch": 1.2732859612060687, "percentage": 25.47, "elapsed_time": "0:52:52", "remaining_time": "2:34:44", "throughput": 19731.71, "total_tokens": 62595392}
|
|
{"current_steps": 19895, "total_steps": 78105, "loss": 0.3854, "lr": 4.6442515156728806e-06, "epoch": 1.2736060431470455, "percentage": 25.47, "elapsed_time": "0:52:52", "remaining_time": "2:34:43", "throughput": 19732.31, "total_tokens": 62609792}
|
|
{"current_steps": 19900, "total_steps": 78105, "loss": 0.2897, "lr": 4.643964231334186e-06, "epoch": 1.2739261250880225, "percentage": 25.48, "elapsed_time": "0:52:53", "remaining_time": "2:34:42", "throughput": 19732.96, "total_tokens": 62624576}
|
|
{"current_steps": 19905, "total_steps": 78105, "loss": 0.2879, "lr": 4.643676839937086e-06, "epoch": 1.2742462070289995, "percentage": 25.48, "elapsed_time": "0:52:54", "remaining_time": "2:34:41", "throughput": 19733.87, "total_tokens": 62641664}
|
|
{"current_steps": 19910, "total_steps": 78105, "loss": 0.3125, "lr": 4.643389341495929e-06, "epoch": 1.2745662889699763, "percentage": 25.49, "elapsed_time": "0:52:54", "remaining_time": "2:34:40", "throughput": 19734.49, "total_tokens": 62656448}
|
|
{"current_steps": 19915, "total_steps": 78105, "loss": 0.3907, "lr": 4.643101736025074e-06, "epoch": 1.2748863709109532, "percentage": 25.5, "elapsed_time": "0:52:55", "remaining_time": "2:34:38", "throughput": 19735.26, "total_tokens": 62671936}
|
|
{"current_steps": 19920, "total_steps": 78105, "loss": 0.2957, "lr": 4.642814023538881e-06, "epoch": 1.27520645285193, "percentage": 25.5, "elapsed_time": "0:52:56", "remaining_time": "2:34:37", "throughput": 19735.93, "total_tokens": 62687168}
|
|
{"current_steps": 19925, "total_steps": 78105, "loss": 0.2726, "lr": 4.642526204051717e-06, "epoch": 1.275526534792907, "percentage": 25.51, "elapsed_time": "0:52:56", "remaining_time": "2:34:36", "throughput": 19736.65, "total_tokens": 62702400}
|
|
{"current_steps": 19930, "total_steps": 78105, "loss": 0.3796, "lr": 4.642238277577954e-06, "epoch": 1.275846616733884, "percentage": 25.52, "elapsed_time": "0:52:57", "remaining_time": "2:34:35", "throughput": 19737.37, "total_tokens": 62717824}
|
|
{"current_steps": 19935, "total_steps": 78105, "loss": 0.309, "lr": 4.64195024413197e-06, "epoch": 1.2761666986748608, "percentage": 25.52, "elapsed_time": "0:52:58", "remaining_time": "2:34:34", "throughput": 19738.29, "total_tokens": 62735040}
|
|
{"current_steps": 19940, "total_steps": 78105, "loss": 0.3371, "lr": 4.641662103728149e-06, "epoch": 1.2764867806158375, "percentage": 25.53, "elapsed_time": "0:52:59", "remaining_time": "2:34:33", "throughput": 19738.93, "total_tokens": 62750144}
|
|
{"current_steps": 19945, "total_steps": 78105, "loss": 0.3251, "lr": 4.641373856380878e-06, "epoch": 1.2768068625568145, "percentage": 25.54, "elapsed_time": "0:52:59", "remaining_time": "2:34:32", "throughput": 19739.64, "total_tokens": 62765824}
|
|
{"current_steps": 19950, "total_steps": 78105, "loss": 0.3829, "lr": 4.641085502104549e-06, "epoch": 1.2771269444977915, "percentage": 25.54, "elapsed_time": "0:53:00", "remaining_time": "2:34:30", "throughput": 19740.49, "total_tokens": 62782464}
|
|
{"current_steps": 19955, "total_steps": 78105, "loss": 0.3555, "lr": 4.640797040913563e-06, "epoch": 1.2774470264387683, "percentage": 25.55, "elapsed_time": "0:53:01", "remaining_time": "2:34:29", "throughput": 19741.38, "total_tokens": 62799040}
|
|
{"current_steps": 19960, "total_steps": 78105, "loss": 0.3871, "lr": 4.640508472822324e-06, "epoch": 1.2777671083797453, "percentage": 25.56, "elapsed_time": "0:53:01", "remaining_time": "2:34:28", "throughput": 19742.0, "total_tokens": 62813952}
|
|
{"current_steps": 19965, "total_steps": 78105, "loss": 0.2359, "lr": 4.640219797845242e-06, "epoch": 1.278087190320722, "percentage": 25.56, "elapsed_time": "0:53:02", "remaining_time": "2:34:27", "throughput": 19742.66, "total_tokens": 62829056}
|
|
{"current_steps": 19970, "total_steps": 78105, "loss": 0.315, "lr": 4.6399310159967306e-06, "epoch": 1.278407272261699, "percentage": 25.57, "elapsed_time": "0:53:03", "remaining_time": "2:34:26", "throughput": 19743.36, "total_tokens": 62844288}
|
|
{"current_steps": 19975, "total_steps": 78105, "loss": 0.3672, "lr": 4.639642127291211e-06, "epoch": 1.2787273542026758, "percentage": 25.57, "elapsed_time": "0:53:03", "remaining_time": "2:34:25", "throughput": 19744.01, "total_tokens": 62859584}
|
|
{"current_steps": 19980, "total_steps": 78105, "loss": 0.4307, "lr": 4.639353131743108e-06, "epoch": 1.2790474361436528, "percentage": 25.58, "elapsed_time": "0:53:04", "remaining_time": "2:34:23", "throughput": 19744.7, "total_tokens": 62874432}
|
|
{"current_steps": 19985, "total_steps": 78105, "loss": 0.2447, "lr": 4.639064029366854e-06, "epoch": 1.2793675180846296, "percentage": 25.59, "elapsed_time": "0:53:05", "remaining_time": "2:34:22", "throughput": 19745.42, "total_tokens": 62890112}
|
|
{"current_steps": 19990, "total_steps": 78105, "loss": 0.2688, "lr": 4.638774820176884e-06, "epoch": 1.2796876000256066, "percentage": 25.59, "elapsed_time": "0:53:05", "remaining_time": "2:34:21", "throughput": 19746.23, "total_tokens": 62906432}
|
|
{"current_steps": 19995, "total_steps": 78105, "loss": 0.3082, "lr": 4.63848550418764e-06, "epoch": 1.2800076819665835, "percentage": 25.6, "elapsed_time": "0:53:06", "remaining_time": "2:34:20", "throughput": 19747.25, "total_tokens": 62924288}
|
|
{"current_steps": 20000, "total_steps": 78105, "loss": 0.3916, "lr": 4.638196081413569e-06, "epoch": 1.2803277639075603, "percentage": 25.61, "elapsed_time": "0:53:07", "remaining_time": "2:34:19", "throughput": 19747.9, "total_tokens": 62939712}
|
|
{"current_steps": 20005, "total_steps": 78105, "loss": 0.3147, "lr": 4.637906551869123e-06, "epoch": 1.2806478458485373, "percentage": 25.61, "elapsed_time": "0:53:07", "remaining_time": "2:34:18", "throughput": 19748.56, "total_tokens": 62954816}
|
|
{"current_steps": 20010, "total_steps": 78105, "loss": 0.4312, "lr": 4.63761691556876e-06, "epoch": 1.280967927789514, "percentage": 25.62, "elapsed_time": "0:53:08", "remaining_time": "2:34:17", "throughput": 19749.39, "total_tokens": 62971008}
|
|
{"current_steps": 20015, "total_steps": 78105, "loss": 0.3879, "lr": 4.637327172526942e-06, "epoch": 1.281288009730491, "percentage": 25.63, "elapsed_time": "0:53:09", "remaining_time": "2:34:15", "throughput": 19750.1, "total_tokens": 62986240}
|
|
{"current_steps": 20020, "total_steps": 78105, "loss": 0.4595, "lr": 4.637037322758139e-06, "epoch": 1.2816080916714678, "percentage": 25.63, "elapsed_time": "0:53:09", "remaining_time": "2:34:14", "throughput": 19750.71, "total_tokens": 63000768}
|
|
{"current_steps": 20025, "total_steps": 78105, "loss": 0.287, "lr": 4.636747366276822e-06, "epoch": 1.2819281736124448, "percentage": 25.64, "elapsed_time": "0:53:10", "remaining_time": "2:34:13", "throughput": 19751.55, "total_tokens": 63017344}
|
|
{"current_steps": 20030, "total_steps": 78105, "loss": 0.3764, "lr": 4.6364573030974725e-06, "epoch": 1.2822482555534216, "percentage": 25.64, "elapsed_time": "0:53:11", "remaining_time": "2:34:12", "throughput": 19752.16, "total_tokens": 63031872}
|
|
{"current_steps": 20035, "total_steps": 78105, "loss": 0.3154, "lr": 4.636167133234574e-06, "epoch": 1.2825683374943986, "percentage": 25.65, "elapsed_time": "0:53:11", "remaining_time": "2:34:11", "throughput": 19752.89, "total_tokens": 63047936}
|
|
{"current_steps": 20040, "total_steps": 78105, "loss": 0.5253, "lr": 4.6358768567026145e-06, "epoch": 1.2828884194353756, "percentage": 25.66, "elapsed_time": "0:53:12", "remaining_time": "2:34:10", "throughput": 19753.44, "total_tokens": 63062144}
|
|
{"current_steps": 20045, "total_steps": 78105, "loss": 0.3548, "lr": 4.635586473516092e-06, "epoch": 1.2832085013763523, "percentage": 25.66, "elapsed_time": "0:53:13", "remaining_time": "2:34:08", "throughput": 19754.13, "total_tokens": 63077376}
|
|
{"current_steps": 20050, "total_steps": 78105, "loss": 0.3181, "lr": 4.6352959836895024e-06, "epoch": 1.283528583317329, "percentage": 25.67, "elapsed_time": "0:53:13", "remaining_time": "2:34:07", "throughput": 19754.88, "total_tokens": 63092928}
|
|
{"current_steps": 20055, "total_steps": 78105, "loss": 0.4008, "lr": 4.635005387237355e-06, "epoch": 1.283848665258306, "percentage": 25.68, "elapsed_time": "0:53:14", "remaining_time": "2:34:06", "throughput": 19755.59, "total_tokens": 63108224}
|
|
{"current_steps": 20060, "total_steps": 78105, "loss": 0.4035, "lr": 4.63471468417416e-06, "epoch": 1.284168747199283, "percentage": 25.68, "elapsed_time": "0:53:15", "remaining_time": "2:34:05", "throughput": 19756.29, "total_tokens": 63123584}
|
|
{"current_steps": 20065, "total_steps": 78105, "loss": 0.3448, "lr": 4.634423874514433e-06, "epoch": 1.2844888291402599, "percentage": 25.69, "elapsed_time": "0:53:15", "remaining_time": "2:34:04", "throughput": 19756.92, "total_tokens": 63138432}
|
|
{"current_steps": 20070, "total_steps": 78105, "loss": 0.3615, "lr": 4.634132958272694e-06, "epoch": 1.2848089110812368, "percentage": 25.7, "elapsed_time": "0:53:16", "remaining_time": "2:34:02", "throughput": 19757.59, "total_tokens": 63153600}
|
|
{"current_steps": 20075, "total_steps": 78105, "loss": 0.3817, "lr": 4.633841935463473e-06, "epoch": 1.2851289930222136, "percentage": 25.7, "elapsed_time": "0:53:17", "remaining_time": "2:34:01", "throughput": 19758.15, "total_tokens": 63167936}
|
|
{"current_steps": 20080, "total_steps": 78105, "loss": 0.2778, "lr": 4.6335508061013005e-06, "epoch": 1.2854490749631906, "percentage": 25.71, "elapsed_time": "0:53:17", "remaining_time": "2:34:00", "throughput": 19758.87, "total_tokens": 63183680}
|
|
{"current_steps": 20085, "total_steps": 78105, "loss": 0.39, "lr": 4.633259570200713e-06, "epoch": 1.2857691569041676, "percentage": 25.72, "elapsed_time": "0:53:18", "remaining_time": "2:33:59", "throughput": 19759.63, "total_tokens": 63199552}
|
|
{"current_steps": 20090, "total_steps": 78105, "loss": 0.397, "lr": 4.6329682277762544e-06, "epoch": 1.2860892388451444, "percentage": 25.72, "elapsed_time": "0:53:19", "remaining_time": "2:33:58", "throughput": 19760.32, "total_tokens": 63214848}
|
|
{"current_steps": 20095, "total_steps": 78105, "loss": 0.3408, "lr": 4.632676778842473e-06, "epoch": 1.2864093207861211, "percentage": 25.73, "elapsed_time": "0:53:19", "remaining_time": "2:33:56", "throughput": 19761.0, "total_tokens": 63230272}
|
|
{"current_steps": 20100, "total_steps": 78105, "loss": 0.2205, "lr": 4.632385223413922e-06, "epoch": 1.2867294027270981, "percentage": 25.73, "elapsed_time": "0:53:20", "remaining_time": "2:33:55", "throughput": 19761.92, "total_tokens": 63247424}
|
|
{"current_steps": 20105, "total_steps": 78105, "loss": 0.3853, "lr": 4.63209356150516e-06, "epoch": 1.2870494846680751, "percentage": 25.74, "elapsed_time": "0:53:21", "remaining_time": "2:33:54", "throughput": 19762.68, "total_tokens": 63263360}
|
|
{"current_steps": 20110, "total_steps": 78105, "loss": 0.2982, "lr": 4.631801793130751e-06, "epoch": 1.2873695666090519, "percentage": 25.75, "elapsed_time": "0:53:21", "remaining_time": "2:33:53", "throughput": 19763.38, "total_tokens": 63278784}
|
|
{"current_steps": 20115, "total_steps": 78105, "loss": 0.2606, "lr": 4.631509918305266e-06, "epoch": 1.2876896485500289, "percentage": 25.75, "elapsed_time": "0:53:22", "remaining_time": "2:33:52", "throughput": 19764.1, "total_tokens": 63294784}
|
|
{"current_steps": 20120, "total_steps": 78105, "loss": 0.3204, "lr": 4.6312179370432766e-06, "epoch": 1.2880097304910056, "percentage": 25.76, "elapsed_time": "0:53:23", "remaining_time": "2:33:51", "throughput": 19764.73, "total_tokens": 63309760}
|
|
{"current_steps": 20125, "total_steps": 78105, "loss": 0.4828, "lr": 4.630925849359366e-06, "epoch": 1.2883298124319826, "percentage": 25.77, "elapsed_time": "0:53:23", "remaining_time": "2:33:50", "throughput": 19765.49, "total_tokens": 63325312}
|
|
{"current_steps": 20130, "total_steps": 78105, "loss": 0.2336, "lr": 4.630633655268116e-06, "epoch": 1.2886498943729594, "percentage": 25.77, "elapsed_time": "0:53:24", "remaining_time": "2:33:49", "throughput": 19766.16, "total_tokens": 63340416}
|
|
{"current_steps": 20135, "total_steps": 78105, "loss": 0.4746, "lr": 4.6303413547841215e-06, "epoch": 1.2889699763139364, "percentage": 25.78, "elapsed_time": "0:53:25", "remaining_time": "2:33:47", "throughput": 19766.93, "total_tokens": 63356352}
|
|
{"current_steps": 20140, "total_steps": 78105, "loss": 0.3209, "lr": 4.6300489479219745e-06, "epoch": 1.2892900582549132, "percentage": 25.79, "elapsed_time": "0:53:25", "remaining_time": "2:33:46", "throughput": 19767.65, "total_tokens": 63371904}
|
|
{"current_steps": 20145, "total_steps": 78105, "loss": 0.2791, "lr": 4.62975643469628e-06, "epoch": 1.2896101401958902, "percentage": 25.79, "elapsed_time": "0:53:26", "remaining_time": "2:33:45", "throughput": 19768.4, "total_tokens": 63387712}
|
|
{"current_steps": 20150, "total_steps": 78105, "loss": 0.2382, "lr": 4.629463815121641e-06, "epoch": 1.2899302221368671, "percentage": 25.8, "elapsed_time": "0:53:27", "remaining_time": "2:33:44", "throughput": 19769.14, "total_tokens": 63403392}
|
|
{"current_steps": 20155, "total_steps": 78105, "loss": 0.4451, "lr": 4.6291710892126715e-06, "epoch": 1.290250304077844, "percentage": 25.81, "elapsed_time": "0:53:27", "remaining_time": "2:33:43", "throughput": 19769.82, "total_tokens": 63418496}
|
|
{"current_steps": 20160, "total_steps": 78105, "loss": 0.4002, "lr": 4.628878256983989e-06, "epoch": 1.2905703860188207, "percentage": 25.81, "elapsed_time": "0:53:28", "remaining_time": "2:33:42", "throughput": 19770.57, "total_tokens": 63434432}
|
|
{"current_steps": 20165, "total_steps": 78105, "loss": 0.3181, "lr": 4.628585318450214e-06, "epoch": 1.2908904679597977, "percentage": 25.82, "elapsed_time": "0:53:29", "remaining_time": "2:33:41", "throughput": 19771.37, "total_tokens": 63450752}
|
|
{"current_steps": 20170, "total_steps": 78105, "loss": 0.4293, "lr": 4.6282922736259764e-06, "epoch": 1.2912105499007747, "percentage": 25.82, "elapsed_time": "0:53:29", "remaining_time": "2:33:39", "throughput": 19772.01, "total_tokens": 63466176}
|
|
{"current_steps": 20175, "total_steps": 78105, "loss": 0.3507, "lr": 4.627999122525908e-06, "epoch": 1.2915306318417514, "percentage": 25.83, "elapsed_time": "0:53:30", "remaining_time": "2:33:38", "throughput": 19772.66, "total_tokens": 63481280}
|
|
{"current_steps": 20180, "total_steps": 78105, "loss": 0.4395, "lr": 4.627705865164648e-06, "epoch": 1.2918507137827284, "percentage": 25.84, "elapsed_time": "0:53:31", "remaining_time": "2:33:37", "throughput": 19773.32, "total_tokens": 63496640}
|
|
{"current_steps": 20185, "total_steps": 78105, "loss": 0.2385, "lr": 4.627412501556841e-06, "epoch": 1.2921707957237052, "percentage": 25.84, "elapsed_time": "0:53:31", "remaining_time": "2:33:36", "throughput": 19774.11, "total_tokens": 63513280}
|
|
{"current_steps": 20190, "total_steps": 78105, "loss": 0.2578, "lr": 4.627119031717133e-06, "epoch": 1.2924908776646822, "percentage": 25.85, "elapsed_time": "0:53:32", "remaining_time": "2:33:35", "throughput": 19774.89, "total_tokens": 63529472}
|
|
{"current_steps": 20195, "total_steps": 78105, "loss": 0.2907, "lr": 4.626825455660182e-06, "epoch": 1.2928109596056592, "percentage": 25.86, "elapsed_time": "0:53:33", "remaining_time": "2:33:34", "throughput": 19775.6, "total_tokens": 63545088}
|
|
{"current_steps": 20200, "total_steps": 78105, "loss": 0.4634, "lr": 4.6265317734006445e-06, "epoch": 1.293131041546636, "percentage": 25.86, "elapsed_time": "0:53:33", "remaining_time": "2:33:33", "throughput": 19776.23, "total_tokens": 63560064}
|
|
{"current_steps": 20205, "total_steps": 78105, "loss": 0.463, "lr": 4.626237984953188e-06, "epoch": 1.2934511234876127, "percentage": 25.87, "elapsed_time": "0:53:34", "remaining_time": "2:33:31", "throughput": 19777.03, "total_tokens": 63576064}
|
|
{"current_steps": 20210, "total_steps": 78105, "loss": 0.4143, "lr": 4.625944090332481e-06, "epoch": 1.2937712054285897, "percentage": 25.88, "elapsed_time": "0:53:35", "remaining_time": "2:33:30", "throughput": 19777.66, "total_tokens": 63590976}
|
|
{"current_steps": 20215, "total_steps": 78105, "loss": 0.3367, "lr": 4.625650089553201e-06, "epoch": 1.2940912873695667, "percentage": 25.88, "elapsed_time": "0:53:35", "remaining_time": "2:33:29", "throughput": 19778.49, "total_tokens": 63607552}
|
|
{"current_steps": 20220, "total_steps": 78105, "loss": 0.3755, "lr": 4.625355982630027e-06, "epoch": 1.2944113693105435, "percentage": 25.89, "elapsed_time": "0:53:36", "remaining_time": "2:33:28", "throughput": 19779.41, "total_tokens": 63624896}
|
|
{"current_steps": 20225, "total_steps": 78105, "loss": 0.2755, "lr": 4.625061769577645e-06, "epoch": 1.2947314512515204, "percentage": 25.89, "elapsed_time": "0:53:37", "remaining_time": "2:33:27", "throughput": 19780.16, "total_tokens": 63640576}
|
|
{"current_steps": 20230, "total_steps": 78105, "loss": 0.3411, "lr": 4.624767450410748e-06, "epoch": 1.2950515331924972, "percentage": 25.9, "elapsed_time": "0:53:38", "remaining_time": "2:33:26", "throughput": 19780.75, "total_tokens": 63655552}
|
|
{"current_steps": 20235, "total_steps": 78105, "loss": 0.4274, "lr": 4.624473025144031e-06, "epoch": 1.2953716151334742, "percentage": 25.91, "elapsed_time": "0:53:38", "remaining_time": "2:33:25", "throughput": 19781.48, "total_tokens": 63671296}
|
|
{"current_steps": 20240, "total_steps": 78105, "loss": 0.2648, "lr": 4.624178493792199e-06, "epoch": 1.295691697074451, "percentage": 25.91, "elapsed_time": "0:53:39", "remaining_time": "2:33:23", "throughput": 19782.04, "total_tokens": 63685632}
|
|
{"current_steps": 20245, "total_steps": 78105, "loss": 0.3499, "lr": 4.623883856369956e-06, "epoch": 1.296011779015428, "percentage": 25.92, "elapsed_time": "0:53:40", "remaining_time": "2:33:22", "throughput": 19782.77, "total_tokens": 63701184}
|
|
{"current_steps": 20250, "total_steps": 78105, "loss": 0.334, "lr": 4.623589112892017e-06, "epoch": 1.2963318609564047, "percentage": 25.93, "elapsed_time": "0:53:40", "remaining_time": "2:33:21", "throughput": 19783.48, "total_tokens": 63716992}
|
|
{"current_steps": 20255, "total_steps": 78105, "loss": 0.3291, "lr": 4.623294263373098e-06, "epoch": 1.2966519428973817, "percentage": 25.93, "elapsed_time": "0:53:41", "remaining_time": "2:33:20", "throughput": 19784.24, "total_tokens": 63733184}
|
|
{"current_steps": 20260, "total_steps": 78105, "loss": 0.3076, "lr": 4.6229993078279245e-06, "epoch": 1.2969720248383587, "percentage": 25.94, "elapsed_time": "0:53:42", "remaining_time": "2:33:19", "throughput": 19784.85, "total_tokens": 63747904}
|
|
{"current_steps": 20265, "total_steps": 78105, "loss": 0.2991, "lr": 4.6227042462712235e-06, "epoch": 1.2972921067793355, "percentage": 25.95, "elapsed_time": "0:53:42", "remaining_time": "2:33:18", "throughput": 19785.6, "total_tokens": 63763904}
|
|
{"current_steps": 20270, "total_steps": 78105, "loss": 0.272, "lr": 4.622409078717729e-06, "epoch": 1.2976121887203125, "percentage": 25.95, "elapsed_time": "0:53:43", "remaining_time": "2:33:17", "throughput": 19786.33, "total_tokens": 63779648}
|
|
{"current_steps": 20275, "total_steps": 78105, "loss": 0.4574, "lr": 4.62211380518218e-06, "epoch": 1.2979322706612892, "percentage": 25.96, "elapsed_time": "0:53:44", "remaining_time": "2:33:16", "throughput": 19786.96, "total_tokens": 63795008}
|
|
{"current_steps": 20280, "total_steps": 78105, "loss": 0.2077, "lr": 4.621818425679322e-06, "epoch": 1.2982523526022662, "percentage": 25.97, "elapsed_time": "0:53:44", "remaining_time": "2:33:14", "throughput": 19787.6, "total_tokens": 63810112}
|
|
{"current_steps": 20285, "total_steps": 78105, "loss": 0.3095, "lr": 4.621522940223903e-06, "epoch": 1.298572434543243, "percentage": 25.97, "elapsed_time": "0:53:45", "remaining_time": "2:33:13", "throughput": 19788.53, "total_tokens": 63827264}
|
|
{"current_steps": 20290, "total_steps": 78105, "loss": 0.5271, "lr": 4.6212273488306806e-06, "epoch": 1.29889251648422, "percentage": 25.98, "elapsed_time": "0:53:46", "remaining_time": "2:33:12", "throughput": 19789.28, "total_tokens": 63843520}
|
|
{"current_steps": 20295, "total_steps": 78105, "loss": 0.6525, "lr": 4.620931651514412e-06, "epoch": 1.2992125984251968, "percentage": 25.98, "elapsed_time": "0:53:46", "remaining_time": "2:33:11", "throughput": 19789.98, "total_tokens": 63859264}
|
|
{"current_steps": 20300, "total_steps": 78105, "loss": 0.4714, "lr": 4.620635848289865e-06, "epoch": 1.2995326803661738, "percentage": 25.99, "elapsed_time": "0:53:47", "remaining_time": "2:33:10", "throughput": 19790.69, "total_tokens": 63874816}
|
|
{"current_steps": 20305, "total_steps": 78105, "loss": 0.2975, "lr": 4.620339939171809e-06, "epoch": 1.2998527623071507, "percentage": 26.0, "elapsed_time": "0:53:48", "remaining_time": "2:33:09", "throughput": 19791.32, "total_tokens": 63889728}
|
|
{"current_steps": 20310, "total_steps": 78105, "loss": 0.3383, "lr": 4.620043924175022e-06, "epoch": 1.3001728442481275, "percentage": 26.0, "elapsed_time": "0:53:48", "remaining_time": "2:33:08", "throughput": 19792.05, "total_tokens": 63905472}
|
|
{"current_steps": 20315, "total_steps": 78105, "loss": 0.2967, "lr": 4.619747803314284e-06, "epoch": 1.3004929261891043, "percentage": 26.01, "elapsed_time": "0:53:49", "remaining_time": "2:33:06", "throughput": 19792.66, "total_tokens": 63920320}
|
|
{"current_steps": 20320, "total_steps": 78105, "loss": 0.293, "lr": 4.619451576604381e-06, "epoch": 1.3008130081300813, "percentage": 26.02, "elapsed_time": "0:53:50", "remaining_time": "2:33:05", "throughput": 19793.29, "total_tokens": 63935488}
|
|
{"current_steps": 20325, "total_steps": 78105, "loss": 0.2501, "lr": 4.619155244060107e-06, "epoch": 1.3011330900710583, "percentage": 26.02, "elapsed_time": "0:53:50", "remaining_time": "2:33:04", "throughput": 19794.05, "total_tokens": 63951616}
|
|
{"current_steps": 20330, "total_steps": 78105, "loss": 0.4311, "lr": 4.618858805696258e-06, "epoch": 1.301453172012035, "percentage": 26.03, "elapsed_time": "0:53:51", "remaining_time": "2:33:03", "throughput": 19794.61, "total_tokens": 63965952}
|
|
{"current_steps": 20335, "total_steps": 78105, "loss": 0.3449, "lr": 4.6185622615276375e-06, "epoch": 1.301773253953012, "percentage": 26.04, "elapsed_time": "0:53:52", "remaining_time": "2:33:02", "throughput": 19795.24, "total_tokens": 63981312}
|
|
{"current_steps": 20340, "total_steps": 78105, "loss": 0.4485, "lr": 4.618265611569052e-06, "epoch": 1.3020933358939888, "percentage": 26.04, "elapsed_time": "0:53:52", "remaining_time": "2:33:01", "throughput": 19795.81, "total_tokens": 63995584}
|
|
{"current_steps": 20345, "total_steps": 78105, "loss": 0.3986, "lr": 4.617968855835317e-06, "epoch": 1.3024134178349658, "percentage": 26.05, "elapsed_time": "0:53:53", "remaining_time": "2:32:59", "throughput": 19796.71, "total_tokens": 64012608}
|
|
{"current_steps": 20350, "total_steps": 78105, "loss": 0.2306, "lr": 4.617671994341248e-06, "epoch": 1.3027334997759428, "percentage": 26.05, "elapsed_time": "0:53:54", "remaining_time": "2:32:58", "throughput": 19797.51, "total_tokens": 64028992}
|
|
{"current_steps": 20355, "total_steps": 78105, "loss": 0.3203, "lr": 4.61737502710167e-06, "epoch": 1.3030535817169195, "percentage": 26.06, "elapsed_time": "0:53:54", "remaining_time": "2:32:57", "throughput": 19798.27, "total_tokens": 64045056}
|
|
{"current_steps": 20360, "total_steps": 78105, "loss": 0.4393, "lr": 4.6170779541314134e-06, "epoch": 1.3033736636578963, "percentage": 26.07, "elapsed_time": "0:53:55", "remaining_time": "2:32:56", "throughput": 19798.89, "total_tokens": 64059840}
|
|
{"current_steps": 20365, "total_steps": 78105, "loss": 0.3302, "lr": 4.616780775445311e-06, "epoch": 1.3036937455988733, "percentage": 26.07, "elapsed_time": "0:53:56", "remaining_time": "2:32:55", "throughput": 19799.72, "total_tokens": 64076224}
|
|
{"current_steps": 20370, "total_steps": 78105, "loss": 0.3779, "lr": 4.6164834910582014e-06, "epoch": 1.3040138275398503, "percentage": 26.08, "elapsed_time": "0:53:56", "remaining_time": "2:32:54", "throughput": 19800.38, "total_tokens": 64091328}
|
|
{"current_steps": 20375, "total_steps": 78105, "loss": 0.4574, "lr": 4.6161861009849315e-06, "epoch": 1.304333909480827, "percentage": 26.09, "elapsed_time": "0:53:57", "remaining_time": "2:32:53", "throughput": 19800.9, "total_tokens": 64105472}
|
|
{"current_steps": 20380, "total_steps": 78105, "loss": 0.3478, "lr": 4.615888605240351e-06, "epoch": 1.304653991421804, "percentage": 26.09, "elapsed_time": "0:53:58", "remaining_time": "2:32:51", "throughput": 19801.52, "total_tokens": 64120256}
|
|
{"current_steps": 20385, "total_steps": 78105, "loss": 0.3416, "lr": 4.615591003839314e-06, "epoch": 1.3049740733627808, "percentage": 26.1, "elapsed_time": "0:53:58", "remaining_time": "2:32:50", "throughput": 19802.2, "total_tokens": 64135680}
|
|
{"current_steps": 20390, "total_steps": 78105, "loss": 0.3507, "lr": 4.615293296796683e-06, "epoch": 1.3052941553037578, "percentage": 26.11, "elapsed_time": "0:53:59", "remaining_time": "2:32:49", "throughput": 19802.8, "total_tokens": 64150592}
|
|
{"current_steps": 20395, "total_steps": 78105, "loss": 0.3035, "lr": 4.614995484127322e-06, "epoch": 1.3056142372447346, "percentage": 26.11, "elapsed_time": "0:54:00", "remaining_time": "2:32:48", "throughput": 19803.5, "total_tokens": 64166080}
|
|
{"current_steps": 20400, "total_steps": 78105, "loss": 0.3247, "lr": 4.6146975658461025e-06, "epoch": 1.3059343191857116, "percentage": 26.12, "elapsed_time": "0:54:00", "remaining_time": "2:32:47", "throughput": 19804.19, "total_tokens": 64181632}
|
|
{"current_steps": 20405, "total_steps": 78105, "loss": 0.2535, "lr": 4.614399541967902e-06, "epoch": 1.3062544011266883, "percentage": 26.13, "elapsed_time": "0:54:01", "remaining_time": "2:32:46", "throughput": 19804.96, "total_tokens": 64197760}
|
|
{"current_steps": 20410, "total_steps": 78105, "loss": 0.3006, "lr": 4.614101412507602e-06, "epoch": 1.3065744830676653, "percentage": 26.13, "elapsed_time": "0:54:02", "remaining_time": "2:32:44", "throughput": 19805.58, "total_tokens": 64212416}
|
|
{"current_steps": 20415, "total_steps": 78105, "loss": 0.2959, "lr": 4.61380317748009e-06, "epoch": 1.3068945650086423, "percentage": 26.14, "elapsed_time": "0:54:02", "remaining_time": "2:32:43", "throughput": 19806.26, "total_tokens": 64228032}
|
|
{"current_steps": 20420, "total_steps": 78105, "loss": 0.2968, "lr": 4.613504836900258e-06, "epoch": 1.307214646949619, "percentage": 26.14, "elapsed_time": "0:54:03", "remaining_time": "2:32:42", "throughput": 19806.85, "total_tokens": 64243008}
|
|
{"current_steps": 20425, "total_steps": 78105, "loss": 0.4151, "lr": 4.613206390783003e-06, "epoch": 1.3075347288905959, "percentage": 26.15, "elapsed_time": "0:54:04", "remaining_time": "2:32:41", "throughput": 19807.53, "total_tokens": 64257920}
|
|
{"current_steps": 20430, "total_steps": 78105, "loss": 0.3256, "lr": 4.612907839143227e-06, "epoch": 1.3078548108315728, "percentage": 26.16, "elapsed_time": "0:54:04", "remaining_time": "2:32:40", "throughput": 19808.25, "total_tokens": 64273664}
|
|
{"current_steps": 20435, "total_steps": 78105, "loss": 0.2772, "lr": 4.61260918199584e-06, "epoch": 1.3081748927725498, "percentage": 26.16, "elapsed_time": "0:54:05", "remaining_time": "2:32:39", "throughput": 19808.86, "total_tokens": 64288704}
|
|
{"current_steps": 20440, "total_steps": 78105, "loss": 0.3003, "lr": 4.612310419355754e-06, "epoch": 1.3084949747135266, "percentage": 26.17, "elapsed_time": "0:54:06", "remaining_time": "2:32:37", "throughput": 19809.65, "total_tokens": 64304896}
|
|
{"current_steps": 20445, "total_steps": 78105, "loss": 0.2842, "lr": 4.61201155123789e-06, "epoch": 1.3088150566545036, "percentage": 26.18, "elapsed_time": "0:54:06", "remaining_time": "2:32:36", "throughput": 19810.47, "total_tokens": 64321664}
|
|
{"current_steps": 20450, "total_steps": 78105, "loss": 0.3828, "lr": 4.611712577657169e-06, "epoch": 1.3091351385954804, "percentage": 26.18, "elapsed_time": "0:54:07", "remaining_time": "2:32:35", "throughput": 19811.15, "total_tokens": 64337152}
|
|
{"current_steps": 20455, "total_steps": 78105, "loss": 0.3349, "lr": 4.611413498628521e-06, "epoch": 1.3094552205364574, "percentage": 26.19, "elapsed_time": "0:54:08", "remaining_time": "2:32:34", "throughput": 19811.82, "total_tokens": 64352320}
|
|
{"current_steps": 20460, "total_steps": 78105, "loss": 0.2901, "lr": 4.611114314166881e-06, "epoch": 1.3097753024774343, "percentage": 26.2, "elapsed_time": "0:54:08", "remaining_time": "2:32:33", "throughput": 19812.55, "total_tokens": 64368256}
|
|
{"current_steps": 20465, "total_steps": 78105, "loss": 0.297, "lr": 4.610815024287189e-06, "epoch": 1.3100953844184111, "percentage": 26.2, "elapsed_time": "0:54:09", "remaining_time": "2:32:32", "throughput": 19813.1, "total_tokens": 64382848}
|
|
{"current_steps": 20470, "total_steps": 78105, "loss": 0.3665, "lr": 4.610515629004391e-06, "epoch": 1.3104154663593879, "percentage": 26.21, "elapsed_time": "0:54:10", "remaining_time": "2:32:31", "throughput": 19813.75, "total_tokens": 64398336}
|
|
{"current_steps": 20475, "total_steps": 78105, "loss": 0.3125, "lr": 4.610216128333433e-06, "epoch": 1.3107355483003649, "percentage": 26.21, "elapsed_time": "0:54:10", "remaining_time": "2:32:30", "throughput": 19814.41, "total_tokens": 64413632}
|
|
{"current_steps": 20480, "total_steps": 78105, "loss": 0.3254, "lr": 4.6099165222892746e-06, "epoch": 1.3110556302413419, "percentage": 26.22, "elapsed_time": "0:54:11", "remaining_time": "2:32:28", "throughput": 19815.01, "total_tokens": 64428480}
|
|
{"current_steps": 20485, "total_steps": 78105, "loss": 0.382, "lr": 4.609616810886875e-06, "epoch": 1.3113757121823186, "percentage": 26.23, "elapsed_time": "0:54:12", "remaining_time": "2:32:27", "throughput": 19815.66, "total_tokens": 64443904}
|
|
{"current_steps": 20490, "total_steps": 78105, "loss": 0.3723, "lr": 4.609316994141201e-06, "epoch": 1.3116957941232956, "percentage": 26.23, "elapsed_time": "0:54:12", "remaining_time": "2:32:26", "throughput": 19816.54, "total_tokens": 64461056}
|
|
{"current_steps": 20495, "total_steps": 78105, "loss": 0.3374, "lr": 4.6090170720672234e-06, "epoch": 1.3120158760642724, "percentage": 26.24, "elapsed_time": "0:54:13", "remaining_time": "2:32:25", "throughput": 19817.25, "total_tokens": 64476416}
|
|
{"current_steps": 20500, "total_steps": 78105, "loss": 0.2786, "lr": 4.608717044679918e-06, "epoch": 1.3123359580052494, "percentage": 26.25, "elapsed_time": "0:54:14", "remaining_time": "2:32:24", "throughput": 19818.0, "total_tokens": 64492544}
|
|
{"current_steps": 20505, "total_steps": 78105, "loss": 0.3677, "lr": 4.6084169119942674e-06, "epoch": 1.3126560399462264, "percentage": 26.25, "elapsed_time": "0:54:14", "remaining_time": "2:32:23", "throughput": 19818.62, "total_tokens": 64507456}
|
|
{"current_steps": 20510, "total_steps": 78105, "loss": 0.3362, "lr": 4.608116674025259e-06, "epoch": 1.3129761218872031, "percentage": 26.26, "elapsed_time": "0:54:15", "remaining_time": "2:32:22", "throughput": 19819.33, "total_tokens": 64523392}
|
|
{"current_steps": 20515, "total_steps": 78105, "loss": 0.2361, "lr": 4.6078163307878845e-06, "epoch": 1.31329620382818, "percentage": 26.27, "elapsed_time": "0:54:16", "remaining_time": "2:32:21", "throughput": 19820.12, "total_tokens": 64539968}
|
|
{"current_steps": 20520, "total_steps": 78105, "loss": 0.2773, "lr": 4.607515882297141e-06, "epoch": 1.313616285769157, "percentage": 26.27, "elapsed_time": "0:54:16", "remaining_time": "2:32:19", "throughput": 19820.78, "total_tokens": 64555200}
|
|
{"current_steps": 20525, "total_steps": 78105, "loss": 0.4187, "lr": 4.607215328568032e-06, "epoch": 1.313936367710134, "percentage": 26.28, "elapsed_time": "0:54:17", "remaining_time": "2:32:18", "throughput": 19821.48, "total_tokens": 64570944}
|
|
{"current_steps": 20530, "total_steps": 78105, "loss": 0.3009, "lr": 4.6069146696155664e-06, "epoch": 1.3142564496511107, "percentage": 26.29, "elapsed_time": "0:54:18", "remaining_time": "2:32:17", "throughput": 19822.13, "total_tokens": 64585984}
|
|
{"current_steps": 20535, "total_steps": 78105, "loss": 0.4433, "lr": 4.606613905454757e-06, "epoch": 1.3145765315920876, "percentage": 26.29, "elapsed_time": "0:54:18", "remaining_time": "2:32:16", "throughput": 19822.77, "total_tokens": 64601216}
|
|
{"current_steps": 20540, "total_steps": 78105, "loss": 0.4154, "lr": 4.606313036100621e-06, "epoch": 1.3148966135330644, "percentage": 26.3, "elapsed_time": "0:54:19", "remaining_time": "2:32:15", "throughput": 19823.52, "total_tokens": 64617344}
|
|
{"current_steps": 20545, "total_steps": 78105, "loss": 0.3504, "lr": 4.606012061568184e-06, "epoch": 1.3152166954740414, "percentage": 26.3, "elapsed_time": "0:54:20", "remaining_time": "2:32:14", "throughput": 19824.16, "total_tokens": 64632576}
|
|
{"current_steps": 20550, "total_steps": 78105, "loss": 0.3389, "lr": 4.605710981872474e-06, "epoch": 1.3155367774150182, "percentage": 26.31, "elapsed_time": "0:54:20", "remaining_time": "2:32:13", "throughput": 19824.79, "total_tokens": 64647808}
|
|
{"current_steps": 20555, "total_steps": 78105, "loss": 0.4269, "lr": 4.605409797028526e-06, "epoch": 1.3158568593559952, "percentage": 26.32, "elapsed_time": "0:54:21", "remaining_time": "2:32:12", "throughput": 19825.69, "total_tokens": 64664960}
|
|
{"current_steps": 20560, "total_steps": 78105, "loss": 0.3313, "lr": 4.60510850705138e-06, "epoch": 1.316176941296972, "percentage": 26.32, "elapsed_time": "0:54:22", "remaining_time": "2:32:10", "throughput": 19826.48, "total_tokens": 64681152}
|
|
{"current_steps": 20565, "total_steps": 78105, "loss": 0.2658, "lr": 4.60480711195608e-06, "epoch": 1.316497023237949, "percentage": 26.33, "elapsed_time": "0:54:23", "remaining_time": "2:32:09", "throughput": 19827.19, "total_tokens": 64696576}
|
|
{"current_steps": 20570, "total_steps": 78105, "loss": 0.2836, "lr": 4.604505611757676e-06, "epoch": 1.316817105178926, "percentage": 26.34, "elapsed_time": "0:54:23", "remaining_time": "2:32:08", "throughput": 19827.84, "total_tokens": 64711552}
|
|
{"current_steps": 20575, "total_steps": 78105, "loss": 0.527, "lr": 4.604204006471223e-06, "epoch": 1.3171371871199027, "percentage": 26.34, "elapsed_time": "0:54:24", "remaining_time": "2:32:07", "throughput": 19828.49, "total_tokens": 64726720}
|
|
{"current_steps": 20580, "total_steps": 78105, "loss": 0.3877, "lr": 4.603902296111784e-06, "epoch": 1.3174572690608795, "percentage": 26.35, "elapsed_time": "0:54:24", "remaining_time": "2:32:06", "throughput": 19829.09, "total_tokens": 64741760}
|
|
{"current_steps": 20585, "total_steps": 78105, "loss": 0.366, "lr": 4.6036004806944225e-06, "epoch": 1.3177773510018564, "percentage": 26.36, "elapsed_time": "0:54:25", "remaining_time": "2:32:05", "throughput": 19829.79, "total_tokens": 64757696}
|
|
{"current_steps": 20590, "total_steps": 78105, "loss": 0.4845, "lr": 4.603298560234209e-06, "epoch": 1.3180974329428334, "percentage": 26.36, "elapsed_time": "0:54:26", "remaining_time": "2:32:04", "throughput": 19830.44, "total_tokens": 64773056}
|
|
{"current_steps": 20595, "total_steps": 78105, "loss": 0.2912, "lr": 4.602996534746223e-06, "epoch": 1.3184175148838102, "percentage": 26.37, "elapsed_time": "0:54:26", "remaining_time": "2:32:02", "throughput": 19831.05, "total_tokens": 64787904}
|
|
{"current_steps": 20600, "total_steps": 78105, "loss": 0.3269, "lr": 4.6026944042455434e-06, "epoch": 1.3187375968247872, "percentage": 26.37, "elapsed_time": "0:54:27", "remaining_time": "2:32:01", "throughput": 19831.75, "total_tokens": 64803712}
|
|
{"current_steps": 20605, "total_steps": 78105, "loss": 0.2514, "lr": 4.602392168747258e-06, "epoch": 1.319057678765764, "percentage": 26.38, "elapsed_time": "0:54:28", "remaining_time": "2:32:00", "throughput": 19832.45, "total_tokens": 64819264}
|
|
{"current_steps": 20610, "total_steps": 78105, "loss": 0.4147, "lr": 4.602089828266458e-06, "epoch": 1.319377760706741, "percentage": 26.39, "elapsed_time": "0:54:29", "remaining_time": "2:31:59", "throughput": 19833.19, "total_tokens": 64835136}
|
|
{"current_steps": 20615, "total_steps": 78105, "loss": 0.2751, "lr": 4.601787382818242e-06, "epoch": 1.319697842647718, "percentage": 26.39, "elapsed_time": "0:54:29", "remaining_time": "2:31:58", "throughput": 19833.87, "total_tokens": 64850560}
|
|
{"current_steps": 20620, "total_steps": 78105, "loss": 0.2697, "lr": 4.601484832417712e-06, "epoch": 1.3200179245886947, "percentage": 26.4, "elapsed_time": "0:54:30", "remaining_time": "2:31:57", "throughput": 19834.52, "total_tokens": 64865920}
|
|
{"current_steps": 20625, "total_steps": 78105, "loss": 0.3781, "lr": 4.6011821770799744e-06, "epoch": 1.3203380065296715, "percentage": 26.41, "elapsed_time": "0:54:31", "remaining_time": "2:31:56", "throughput": 19835.45, "total_tokens": 64883520}
|
|
{"current_steps": 20630, "total_steps": 78105, "loss": 0.2386, "lr": 4.600879416820146e-06, "epoch": 1.3206580884706485, "percentage": 26.41, "elapsed_time": "0:54:31", "remaining_time": "2:31:55", "throughput": 19836.14, "total_tokens": 64898816}
|
|
{"current_steps": 20635, "total_steps": 78105, "loss": 0.3511, "lr": 4.60057655165334e-06, "epoch": 1.3209781704116255, "percentage": 26.42, "elapsed_time": "0:54:32", "remaining_time": "2:31:54", "throughput": 19836.94, "total_tokens": 64915392}
|
|
{"current_steps": 20640, "total_steps": 78105, "loss": 0.3353, "lr": 4.6002735815946845e-06, "epoch": 1.3212982523526022, "percentage": 26.43, "elapsed_time": "0:54:33", "remaining_time": "2:31:52", "throughput": 19837.53, "total_tokens": 64930176}
|
|
{"current_steps": 20645, "total_steps": 78105, "loss": 0.3782, "lr": 4.5999705066593046e-06, "epoch": 1.3216183342935792, "percentage": 26.43, "elapsed_time": "0:54:33", "remaining_time": "2:31:51", "throughput": 19838.23, "total_tokens": 64945600}
|
|
{"current_steps": 20650, "total_steps": 78105, "loss": 0.2715, "lr": 4.599667326862337e-06, "epoch": 1.321938416234556, "percentage": 26.44, "elapsed_time": "0:54:34", "remaining_time": "2:31:50", "throughput": 19838.88, "total_tokens": 64960704}
|
|
{"current_steps": 20655, "total_steps": 78105, "loss": 0.3153, "lr": 4.599364042218919e-06, "epoch": 1.322258498175533, "percentage": 26.45, "elapsed_time": "0:54:35", "remaining_time": "2:31:49", "throughput": 19839.65, "total_tokens": 64976832}
|
|
{"current_steps": 20660, "total_steps": 78105, "loss": 0.2554, "lr": 4.599060652744197e-06, "epoch": 1.3225785801165097, "percentage": 26.45, "elapsed_time": "0:54:35", "remaining_time": "2:31:48", "throughput": 19840.32, "total_tokens": 64992256}
|
|
{"current_steps": 20665, "total_steps": 78105, "loss": 0.2524, "lr": 4.598757158453319e-06, "epoch": 1.3228986620574867, "percentage": 26.46, "elapsed_time": "0:54:36", "remaining_time": "2:31:47", "throughput": 19841.08, "total_tokens": 65008256}
|
|
{"current_steps": 20670, "total_steps": 78105, "loss": 0.2878, "lr": 4.598453559361441e-06, "epoch": 1.3232187439984635, "percentage": 26.46, "elapsed_time": "0:54:37", "remaining_time": "2:31:45", "throughput": 19841.61, "total_tokens": 65022720}
|
|
{"current_steps": 20675, "total_steps": 78105, "loss": 0.1734, "lr": 4.598149855483722e-06, "epoch": 1.3235388259394405, "percentage": 26.47, "elapsed_time": "0:54:37", "remaining_time": "2:31:44", "throughput": 19842.3, "total_tokens": 65038400}
|
|
{"current_steps": 20680, "total_steps": 78105, "loss": 0.411, "lr": 4.597846046835329e-06, "epoch": 1.3238589078804175, "percentage": 26.48, "elapsed_time": "0:54:38", "remaining_time": "2:31:43", "throughput": 19843.04, "total_tokens": 65054272}
|
|
{"current_steps": 20685, "total_steps": 78105, "loss": 0.4788, "lr": 4.597542133431431e-06, "epoch": 1.3241789898213943, "percentage": 26.48, "elapsed_time": "0:54:39", "remaining_time": "2:31:42", "throughput": 19843.93, "total_tokens": 65071488}
|
|
{"current_steps": 20690, "total_steps": 78105, "loss": 0.3476, "lr": 4.597238115287204e-06, "epoch": 1.3244990717623712, "percentage": 26.49, "elapsed_time": "0:54:39", "remaining_time": "2:31:41", "throughput": 19844.79, "total_tokens": 65088512}
|
|
{"current_steps": 20695, "total_steps": 78105, "loss": 0.4639, "lr": 4.596933992417831e-06, "epoch": 1.324819153703348, "percentage": 26.5, "elapsed_time": "0:54:40", "remaining_time": "2:31:40", "throughput": 19845.51, "total_tokens": 65104512}
|
|
{"current_steps": 20700, "total_steps": 78105, "loss": 0.2843, "lr": 4.596629764838496e-06, "epoch": 1.325139235644325, "percentage": 26.5, "elapsed_time": "0:54:41", "remaining_time": "2:31:39", "throughput": 19846.24, "total_tokens": 65120256}
|
|
{"current_steps": 20705, "total_steps": 78105, "loss": 0.4256, "lr": 4.596325432564392e-06, "epoch": 1.3254593175853018, "percentage": 26.51, "elapsed_time": "0:54:41", "remaining_time": "2:31:38", "throughput": 19846.8, "total_tokens": 65134720}
|
|
{"current_steps": 20710, "total_steps": 78105, "loss": 0.3155, "lr": 4.596020995610715e-06, "epoch": 1.3257793995262788, "percentage": 26.52, "elapsed_time": "0:54:42", "remaining_time": "2:31:37", "throughput": 19847.87, "total_tokens": 65153344}
|
|
{"current_steps": 20715, "total_steps": 78105, "loss": 0.3079, "lr": 4.595716453992668e-06, "epoch": 1.3260994814672555, "percentage": 26.52, "elapsed_time": "0:54:43", "remaining_time": "2:31:36", "throughput": 19848.61, "total_tokens": 65169280}
|
|
{"current_steps": 20720, "total_steps": 78105, "loss": 0.4038, "lr": 4.595411807725456e-06, "epoch": 1.3264195634082325, "percentage": 26.53, "elapsed_time": "0:54:43", "remaining_time": "2:31:35", "throughput": 19849.29, "total_tokens": 65184640}
|
|
{"current_steps": 20725, "total_steps": 78105, "loss": 0.4733, "lr": 4.595107056824294e-06, "epoch": 1.3267396453492095, "percentage": 26.53, "elapsed_time": "0:54:44", "remaining_time": "2:31:33", "throughput": 19850.01, "total_tokens": 65200128}
|
|
{"current_steps": 20730, "total_steps": 78105, "loss": 0.3263, "lr": 4.594802201304398e-06, "epoch": 1.3270597272901863, "percentage": 26.54, "elapsed_time": "0:54:45", "remaining_time": "2:31:32", "throughput": 19850.76, "total_tokens": 65216256}
|
|
{"current_steps": 20735, "total_steps": 78105, "loss": 0.4066, "lr": 4.594497241180992e-06, "epoch": 1.327379809231163, "percentage": 26.55, "elapsed_time": "0:54:46", "remaining_time": "2:31:31", "throughput": 19851.43, "total_tokens": 65231936}
|
|
{"current_steps": 20740, "total_steps": 78105, "loss": 0.318, "lr": 4.594192176469303e-06, "epoch": 1.32769989117214, "percentage": 26.55, "elapsed_time": "0:54:46", "remaining_time": "2:31:30", "throughput": 19852.12, "total_tokens": 65247872}
|
|
{"current_steps": 20745, "total_steps": 78105, "loss": 0.4635, "lr": 4.593887007184565e-06, "epoch": 1.328019973113117, "percentage": 26.56, "elapsed_time": "0:54:47", "remaining_time": "2:31:29", "throughput": 19852.76, "total_tokens": 65262976}
|
|
{"current_steps": 20750, "total_steps": 78105, "loss": 0.2729, "lr": 4.5935817333420176e-06, "epoch": 1.3283400550540938, "percentage": 26.57, "elapsed_time": "0:54:48", "remaining_time": "2:31:28", "throughput": 19853.42, "total_tokens": 65278336}
|
|
{"current_steps": 20755, "total_steps": 78105, "loss": 0.3281, "lr": 4.5932763549569026e-06, "epoch": 1.3286601369950708, "percentage": 26.57, "elapsed_time": "0:54:48", "remaining_time": "2:31:27", "throughput": 19854.2, "total_tokens": 65294912}
|
|
{"current_steps": 20760, "total_steps": 78105, "loss": 0.2953, "lr": 4.59297087204447e-06, "epoch": 1.3289802189360476, "percentage": 26.58, "elapsed_time": "0:54:49", "remaining_time": "2:31:26", "throughput": 19855.04, "total_tokens": 65312000}
|
|
{"current_steps": 20765, "total_steps": 78105, "loss": 0.2865, "lr": 4.592665284619975e-06, "epoch": 1.3293003008770246, "percentage": 26.59, "elapsed_time": "0:54:50", "remaining_time": "2:31:25", "throughput": 19855.68, "total_tokens": 65327360}
|
|
{"current_steps": 20770, "total_steps": 78105, "loss": 0.387, "lr": 4.5923595926986745e-06, "epoch": 1.3296203828180015, "percentage": 26.59, "elapsed_time": "0:54:50", "remaining_time": "2:31:24", "throughput": 19856.63, "total_tokens": 65344832}
|
|
{"current_steps": 20775, "total_steps": 78105, "loss": 0.344, "lr": 4.592053796295835e-06, "epoch": 1.3299404647589783, "percentage": 26.6, "elapsed_time": "0:54:51", "remaining_time": "2:31:23", "throughput": 19857.51, "total_tokens": 65362304}
|
|
{"current_steps": 20780, "total_steps": 78105, "loss": 0.3077, "lr": 4.591747895426726e-06, "epoch": 1.330260546699955, "percentage": 26.61, "elapsed_time": "0:54:52", "remaining_time": "2:31:22", "throughput": 19858.21, "total_tokens": 65378304}
|
|
{"current_steps": 20785, "total_steps": 78105, "loss": 0.2716, "lr": 4.591441890106623e-06, "epoch": 1.330580628640932, "percentage": 26.61, "elapsed_time": "0:54:52", "remaining_time": "2:31:21", "throughput": 19858.83, "total_tokens": 65393344}
|
|
{"current_steps": 20790, "total_steps": 78105, "loss": 0.315, "lr": 4.591135780350805e-06, "epoch": 1.330900710581909, "percentage": 26.62, "elapsed_time": "0:54:53", "remaining_time": "2:31:19", "throughput": 19859.53, "total_tokens": 65409408}
|
|
{"current_steps": 20795, "total_steps": 78105, "loss": 0.4502, "lr": 4.590829566174559e-06, "epoch": 1.3312207925228858, "percentage": 26.62, "elapsed_time": "0:54:54", "remaining_time": "2:31:18", "throughput": 19860.2, "total_tokens": 65424832}
|
|
{"current_steps": 20800, "total_steps": 78105, "loss": 0.3672, "lr": 4.590523247593176e-06, "epoch": 1.3315408744638628, "percentage": 26.63, "elapsed_time": "0:54:54", "remaining_time": "2:31:17", "throughput": 19861.01, "total_tokens": 65441216}
|
|
{"current_steps": 20805, "total_steps": 78105, "loss": 0.4188, "lr": 4.590216824621951e-06, "epoch": 1.3318609564048396, "percentage": 26.64, "elapsed_time": "0:54:55", "remaining_time": "2:31:16", "throughput": 19861.78, "total_tokens": 65457600}
|
|
{"current_steps": 20810, "total_steps": 78105, "loss": 0.4023, "lr": 4.589910297276185e-06, "epoch": 1.3321810383458166, "percentage": 26.64, "elapsed_time": "0:54:56", "remaining_time": "2:31:15", "throughput": 19862.51, "total_tokens": 65473728}
|
|
{"current_steps": 20815, "total_steps": 78105, "loss": 0.2281, "lr": 4.589603665571184e-06, "epoch": 1.3325011202867934, "percentage": 26.65, "elapsed_time": "0:54:57", "remaining_time": "2:31:14", "throughput": 19863.23, "total_tokens": 65489728}
|
|
{"current_steps": 20820, "total_steps": 78105, "loss": 0.2818, "lr": 4.5892969295222615e-06, "epoch": 1.3328212022277703, "percentage": 26.66, "elapsed_time": "0:54:57", "remaining_time": "2:31:13", "throughput": 19864.04, "total_tokens": 65506176}
|
|
{"current_steps": 20825, "total_steps": 78105, "loss": 0.3487, "lr": 4.588990089144731e-06, "epoch": 1.333141284168747, "percentage": 26.66, "elapsed_time": "0:54:58", "remaining_time": "2:31:12", "throughput": 19864.72, "total_tokens": 65521728}
|
|
{"current_steps": 20830, "total_steps": 78105, "loss": 0.2838, "lr": 4.5886831444539195e-06, "epoch": 1.333461366109724, "percentage": 26.67, "elapsed_time": "0:54:59", "remaining_time": "2:31:11", "throughput": 19865.4, "total_tokens": 65536832}
|
|
{"current_steps": 20835, "total_steps": 78105, "loss": 0.3904, "lr": 4.588376095465149e-06, "epoch": 1.333781448050701, "percentage": 26.68, "elapsed_time": "0:54:59", "remaining_time": "2:31:10", "throughput": 19866.46, "total_tokens": 65555776}
|
|
{"current_steps": 20840, "total_steps": 78105, "loss": 0.3607, "lr": 4.588068942193755e-06, "epoch": 1.3341015299916779, "percentage": 26.68, "elapsed_time": "0:55:00", "remaining_time": "2:31:09", "throughput": 19867.04, "total_tokens": 65570944}
|
|
{"current_steps": 20845, "total_steps": 78105, "loss": 0.284, "lr": 4.587761684655075e-06, "epoch": 1.3344216119326546, "percentage": 26.69, "elapsed_time": "0:55:01", "remaining_time": "2:31:08", "throughput": 19867.63, "total_tokens": 65586048}
|
|
{"current_steps": 20850, "total_steps": 78105, "loss": 0.4453, "lr": 4.587454322864451e-06, "epoch": 1.3347416938736316, "percentage": 26.69, "elapsed_time": "0:55:01", "remaining_time": "2:31:06", "throughput": 19868.32, "total_tokens": 65601408}
|
|
{"current_steps": 20855, "total_steps": 78105, "loss": 0.2531, "lr": 4.5871468568372305e-06, "epoch": 1.3350617758146086, "percentage": 26.7, "elapsed_time": "0:55:02", "remaining_time": "2:31:05", "throughput": 19868.9, "total_tokens": 65616256}
|
|
{"current_steps": 20860, "total_steps": 78105, "loss": 0.2526, "lr": 4.586839286588768e-06, "epoch": 1.3353818577555854, "percentage": 26.71, "elapsed_time": "0:55:03", "remaining_time": "2:31:04", "throughput": 19869.58, "total_tokens": 65631936}
|
|
{"current_steps": 20865, "total_steps": 78105, "loss": 0.3054, "lr": 4.5865316121344215e-06, "epoch": 1.3357019396965624, "percentage": 26.71, "elapsed_time": "0:55:03", "remaining_time": "2:31:03", "throughput": 19870.26, "total_tokens": 65647488}
|
|
{"current_steps": 20870, "total_steps": 78105, "loss": 0.2884, "lr": 4.586223833489555e-06, "epoch": 1.3360220216375391, "percentage": 26.72, "elapsed_time": "0:55:04", "remaining_time": "2:31:02", "throughput": 19871.14, "total_tokens": 65664704}
|
|
{"current_steps": 20875, "total_steps": 78105, "loss": 0.4319, "lr": 4.585915950669536e-06, "epoch": 1.3363421035785161, "percentage": 26.73, "elapsed_time": "0:55:05", "remaining_time": "2:31:01", "throughput": 19871.8, "total_tokens": 65680256}
|
|
{"current_steps": 20880, "total_steps": 78105, "loss": 0.1895, "lr": 4.585607963689741e-06, "epoch": 1.3366621855194931, "percentage": 26.73, "elapsed_time": "0:55:05", "remaining_time": "2:31:00", "throughput": 19872.5, "total_tokens": 65696000}
|
|
{"current_steps": 20885, "total_steps": 78105, "loss": 0.2608, "lr": 4.5852998725655465e-06, "epoch": 1.3369822674604699, "percentage": 26.74, "elapsed_time": "0:55:06", "remaining_time": "2:30:59", "throughput": 19873.27, "total_tokens": 65712640}
|
|
{"current_steps": 20890, "total_steps": 78105, "loss": 0.4189, "lr": 4.584991677312339e-06, "epoch": 1.3373023494014467, "percentage": 26.75, "elapsed_time": "0:55:07", "remaining_time": "2:30:58", "throughput": 19873.92, "total_tokens": 65728064}
|
|
{"current_steps": 20895, "total_steps": 78105, "loss": 0.2957, "lr": 4.584683377945507e-06, "epoch": 1.3376224313424236, "percentage": 26.75, "elapsed_time": "0:55:08", "remaining_time": "2:30:57", "throughput": 19874.84, "total_tokens": 65746048}
|
|
{"current_steps": 20900, "total_steps": 78105, "loss": 0.2619, "lr": 4.584374974480445e-06, "epoch": 1.3379425132834006, "percentage": 26.76, "elapsed_time": "0:55:08", "remaining_time": "2:30:56", "throughput": 19875.47, "total_tokens": 65761344}
|
|
{"current_steps": 20905, "total_steps": 78105, "loss": 0.2377, "lr": 4.584066466932556e-06, "epoch": 1.3382625952243774, "percentage": 26.77, "elapsed_time": "0:55:09", "remaining_time": "2:30:54", "throughput": 19876.16, "total_tokens": 65777088}
|
|
{"current_steps": 20910, "total_steps": 78105, "loss": 0.322, "lr": 4.583757855317242e-06, "epoch": 1.3385826771653544, "percentage": 26.77, "elapsed_time": "0:55:10", "remaining_time": "2:30:53", "throughput": 19876.87, "total_tokens": 65793024}
|
|
{"current_steps": 20915, "total_steps": 78105, "loss": 0.3514, "lr": 4.5834491396499145e-06, "epoch": 1.3389027591063312, "percentage": 26.78, "elapsed_time": "0:55:10", "remaining_time": "2:30:52", "throughput": 19877.41, "total_tokens": 65807744}
|
|
{"current_steps": 20920, "total_steps": 78105, "loss": 0.3269, "lr": 4.58314031994599e-06, "epoch": 1.3392228410473082, "percentage": 26.78, "elapsed_time": "0:55:11", "remaining_time": "2:30:51", "throughput": 19878.15, "total_tokens": 65824000}
|
|
{"current_steps": 20925, "total_steps": 78105, "loss": 0.3618, "lr": 4.582831396220888e-06, "epoch": 1.339542922988285, "percentage": 26.79, "elapsed_time": "0:55:12", "remaining_time": "2:30:50", "throughput": 19878.87, "total_tokens": 65840064}
|
|
{"current_steps": 20930, "total_steps": 78105, "loss": 0.3187, "lr": 4.582522368490035e-06, "epoch": 1.339863004929262, "percentage": 26.8, "elapsed_time": "0:55:12", "remaining_time": "2:30:49", "throughput": 19879.52, "total_tokens": 65855552}
|
|
{"current_steps": 20935, "total_steps": 78105, "loss": 0.2784, "lr": 4.582213236768863e-06, "epoch": 1.3401830868702387, "percentage": 26.8, "elapsed_time": "0:55:13", "remaining_time": "2:30:48", "throughput": 19880.39, "total_tokens": 65872448}
|
|
{"current_steps": 20940, "total_steps": 78105, "loss": 0.3654, "lr": 4.5819040010728064e-06, "epoch": 1.3405031688112157, "percentage": 26.81, "elapsed_time": "0:55:14", "remaining_time": "2:30:47", "throughput": 19881.14, "total_tokens": 65888448}
|
|
{"current_steps": 20945, "total_steps": 78105, "loss": 0.3229, "lr": 4.58159466141731e-06, "epoch": 1.3408232507521927, "percentage": 26.82, "elapsed_time": "0:55:14", "remaining_time": "2:30:46", "throughput": 19881.68, "total_tokens": 65902912}
|
|
{"current_steps": 20950, "total_steps": 78105, "loss": 0.4098, "lr": 4.581285217817818e-06, "epoch": 1.3411433326931694, "percentage": 26.82, "elapsed_time": "0:55:15", "remaining_time": "2:30:44", "throughput": 19882.28, "total_tokens": 65918016}
|
|
{"current_steps": 20955, "total_steps": 78105, "loss": 0.3622, "lr": 4.580975670289783e-06, "epoch": 1.3414634146341464, "percentage": 26.83, "elapsed_time": "0:55:16", "remaining_time": "2:30:43", "throughput": 19883.1, "total_tokens": 65934656}
|
|
{"current_steps": 20960, "total_steps": 78105, "loss": 0.3221, "lr": 4.580666018848663e-06, "epoch": 1.3417834965751232, "percentage": 26.84, "elapsed_time": "0:55:16", "remaining_time": "2:30:42", "throughput": 19883.74, "total_tokens": 65950080}
|
|
{"current_steps": 20965, "total_steps": 78105, "loss": 0.3835, "lr": 4.580356263509919e-06, "epoch": 1.3421035785161002, "percentage": 26.84, "elapsed_time": "0:55:17", "remaining_time": "2:30:41", "throughput": 19884.38, "total_tokens": 65965568}
|
|
{"current_steps": 20970, "total_steps": 78105, "loss": 0.406, "lr": 4.58004640428902e-06, "epoch": 1.342423660457077, "percentage": 26.85, "elapsed_time": "0:55:18", "remaining_time": "2:30:40", "throughput": 19884.94, "total_tokens": 65980672}
|
|
{"current_steps": 20975, "total_steps": 78105, "loss": 0.4468, "lr": 4.579736441201439e-06, "epoch": 1.342743742398054, "percentage": 26.85, "elapsed_time": "0:55:18", "remaining_time": "2:30:39", "throughput": 19885.57, "total_tokens": 65996096}
|
|
{"current_steps": 20980, "total_steps": 78105, "loss": 0.388, "lr": 4.579426374262652e-06, "epoch": 1.3430638243390307, "percentage": 26.86, "elapsed_time": "0:55:19", "remaining_time": "2:30:38", "throughput": 19886.45, "total_tokens": 66013120}
|
|
{"current_steps": 20985, "total_steps": 78105, "loss": 0.4042, "lr": 4.579116203488143e-06, "epoch": 1.3433839062800077, "percentage": 26.87, "elapsed_time": "0:55:20", "remaining_time": "2:30:37", "throughput": 19887.06, "total_tokens": 66028160}
|
|
{"current_steps": 20990, "total_steps": 78105, "loss": 0.4532, "lr": 4.578805928893401e-06, "epoch": 1.3437039882209847, "percentage": 26.87, "elapsed_time": "0:55:20", "remaining_time": "2:30:36", "throughput": 19887.72, "total_tokens": 66043904}
|
|
{"current_steps": 20995, "total_steps": 78105, "loss": 0.2902, "lr": 4.578495550493919e-06, "epoch": 1.3440240701619615, "percentage": 26.88, "elapsed_time": "0:55:21", "remaining_time": "2:30:35", "throughput": 19888.27, "total_tokens": 66058624}
|
|
{"current_steps": 21000, "total_steps": 78105, "loss": 0.3911, "lr": 4.578185068305195e-06, "epoch": 1.3443441521029382, "percentage": 26.89, "elapsed_time": "0:55:22", "remaining_time": "2:30:33", "throughput": 19888.97, "total_tokens": 66074816}
|
|
{"current_steps": 21005, "total_steps": 78105, "loss": 0.2529, "lr": 4.577874482342734e-06, "epoch": 1.3446642340439152, "percentage": 26.89, "elapsed_time": "0:55:22", "remaining_time": "2:30:32", "throughput": 19889.78, "total_tokens": 66091584}
|
|
{"current_steps": 21010, "total_steps": 78105, "loss": 0.3473, "lr": 4.577563792622046e-06, "epoch": 1.3449843159848922, "percentage": 26.9, "elapsed_time": "0:55:23", "remaining_time": "2:30:31", "throughput": 19890.41, "total_tokens": 66106752}
|
|
{"current_steps": 21015, "total_steps": 78105, "loss": 0.3127, "lr": 4.577252999158642e-06, "epoch": 1.345304397925869, "percentage": 26.91, "elapsed_time": "0:55:24", "remaining_time": "2:30:30", "throughput": 19891.13, "total_tokens": 66122880}
|
|
{"current_steps": 21020, "total_steps": 78105, "loss": 0.3402, "lr": 4.576942101968044e-06, "epoch": 1.345624479866846, "percentage": 26.91, "elapsed_time": "0:55:24", "remaining_time": "2:30:29", "throughput": 19891.74, "total_tokens": 66138368}
|
|
{"current_steps": 21025, "total_steps": 78105, "loss": 0.3136, "lr": 4.576631101065776e-06, "epoch": 1.3459445618078227, "percentage": 26.92, "elapsed_time": "0:55:25", "remaining_time": "2:30:28", "throughput": 19892.41, "total_tokens": 66154112}
|
|
{"current_steps": 21030, "total_steps": 78105, "loss": 0.2815, "lr": 4.576319996467366e-06, "epoch": 1.3462646437487997, "percentage": 26.93, "elapsed_time": "0:55:26", "remaining_time": "2:30:27", "throughput": 19893.01, "total_tokens": 66169408}
|
|
{"current_steps": 21035, "total_steps": 78105, "loss": 0.3331, "lr": 4.576008788188353e-06, "epoch": 1.3465847256897767, "percentage": 26.93, "elapsed_time": "0:55:26", "remaining_time": "2:30:26", "throughput": 19893.59, "total_tokens": 66184512}
|
|
{"current_steps": 21040, "total_steps": 78105, "loss": 0.3088, "lr": 4.575697476244273e-06, "epoch": 1.3469048076307535, "percentage": 26.94, "elapsed_time": "0:55:27", "remaining_time": "2:30:25", "throughput": 19894.31, "total_tokens": 66200512}
|
|
{"current_steps": 21045, "total_steps": 78105, "loss": 0.2739, "lr": 4.575386060650673e-06, "epoch": 1.3472248895717303, "percentage": 26.94, "elapsed_time": "0:55:28", "remaining_time": "2:30:24", "throughput": 19895.06, "total_tokens": 66216512}
|
|
{"current_steps": 21050, "total_steps": 78105, "loss": 0.2862, "lr": 4.575074541423104e-06, "epoch": 1.3475449715127072, "percentage": 26.95, "elapsed_time": "0:55:28", "remaining_time": "2:30:23", "throughput": 19895.75, "total_tokens": 66232576}
|
|
{"current_steps": 21055, "total_steps": 78105, "loss": 0.2462, "lr": 4.574762918577121e-06, "epoch": 1.3478650534536842, "percentage": 26.96, "elapsed_time": "0:55:29", "remaining_time": "2:30:21", "throughput": 19896.28, "total_tokens": 66247232}
|
|
{"current_steps": 21060, "total_steps": 78105, "loss": 0.449, "lr": 4.574451192128286e-06, "epoch": 1.348185135394661, "percentage": 26.96, "elapsed_time": "0:55:30", "remaining_time": "2:30:20", "throughput": 19896.94, "total_tokens": 66262784}
|
|
{"current_steps": 21065, "total_steps": 78105, "loss": 0.3133, "lr": 4.574139362092162e-06, "epoch": 1.348505217335638, "percentage": 26.97, "elapsed_time": "0:55:30", "remaining_time": "2:30:19", "throughput": 19897.58, "total_tokens": 66278272}
|
|
{"current_steps": 21070, "total_steps": 78105, "loss": 0.3309, "lr": 4.573827428484322e-06, "epoch": 1.3488252992766148, "percentage": 26.98, "elapsed_time": "0:55:31", "remaining_time": "2:30:18", "throughput": 19898.32, "total_tokens": 66294528}
|
|
{"current_steps": 21075, "total_steps": 78105, "loss": 0.4058, "lr": 4.5735153913203436e-06, "epoch": 1.3491453812175918, "percentage": 26.98, "elapsed_time": "0:55:32", "remaining_time": "2:30:17", "throughput": 19898.94, "total_tokens": 66309760}
|
|
{"current_steps": 21080, "total_steps": 78105, "loss": 0.3035, "lr": 4.573203250615807e-06, "epoch": 1.3494654631585685, "percentage": 26.99, "elapsed_time": "0:55:32", "remaining_time": "2:30:16", "throughput": 19899.53, "total_tokens": 66324800}
|
|
{"current_steps": 21085, "total_steps": 78105, "loss": 0.2789, "lr": 4.572891006386298e-06, "epoch": 1.3497855450995455, "percentage": 27.0, "elapsed_time": "0:55:33", "remaining_time": "2:30:15", "throughput": 19900.13, "total_tokens": 66340160}
|
|
{"current_steps": 21090, "total_steps": 78105, "loss": 0.339, "lr": 4.5725786586474105e-06, "epoch": 1.3501056270405223, "percentage": 27.0, "elapsed_time": "0:55:34", "remaining_time": "2:30:14", "throughput": 19900.78, "total_tokens": 66355584}
|
|
{"current_steps": 21095, "total_steps": 78105, "loss": 0.4497, "lr": 4.57226620741474e-06, "epoch": 1.3504257089814993, "percentage": 27.01, "elapsed_time": "0:55:34", "remaining_time": "2:30:12", "throughput": 19901.37, "total_tokens": 66370624}
|
|
{"current_steps": 21100, "total_steps": 78105, "loss": 0.3005, "lr": 4.571953652703889e-06, "epoch": 1.3507457909224763, "percentage": 27.01, "elapsed_time": "0:55:35", "remaining_time": "2:30:11", "throughput": 19901.99, "total_tokens": 66385728}
|
|
{"current_steps": 21105, "total_steps": 78105, "loss": 0.3838, "lr": 4.571640994530465e-06, "epoch": 1.351065872863453, "percentage": 27.02, "elapsed_time": "0:55:36", "remaining_time": "2:30:10", "throughput": 19902.65, "total_tokens": 66401600}
|
|
{"current_steps": 21110, "total_steps": 78105, "loss": 0.3371, "lr": 4.571328232910081e-06, "epoch": 1.3513859548044298, "percentage": 27.03, "elapsed_time": "0:55:36", "remaining_time": "2:30:09", "throughput": 19903.26, "total_tokens": 66416896}
|
|
{"current_steps": 21115, "total_steps": 78105, "loss": 0.4107, "lr": 4.571015367858354e-06, "epoch": 1.3517060367454068, "percentage": 27.03, "elapsed_time": "0:55:37", "remaining_time": "2:30:08", "throughput": 19903.86, "total_tokens": 66432000}
|
|
{"current_steps": 21120, "total_steps": 78105, "loss": 0.3327, "lr": 4.570702399390906e-06, "epoch": 1.3520261186863838, "percentage": 27.04, "elapsed_time": "0:55:38", "remaining_time": "2:30:07", "throughput": 19904.43, "total_tokens": 66447104}
|
|
{"current_steps": 21125, "total_steps": 78105, "loss": 0.4573, "lr": 4.570389327523367e-06, "epoch": 1.3523462006273606, "percentage": 27.05, "elapsed_time": "0:55:38", "remaining_time": "2:30:06", "throughput": 19904.98, "total_tokens": 66462080}
|
|
{"current_steps": 21130, "total_steps": 78105, "loss": 0.3239, "lr": 4.5700761522713694e-06, "epoch": 1.3526662825683375, "percentage": 27.05, "elapsed_time": "0:55:39", "remaining_time": "2:30:05", "throughput": 19905.76, "total_tokens": 66478720}
|
|
{"current_steps": 21135, "total_steps": 78105, "loss": 0.2951, "lr": 4.569762873650551e-06, "epoch": 1.3529863645093143, "percentage": 27.06, "elapsed_time": "0:55:40", "remaining_time": "2:30:03", "throughput": 19906.27, "total_tokens": 66492992}
|
|
{"current_steps": 21140, "total_steps": 78105, "loss": 0.2879, "lr": 4.5694494916765564e-06, "epoch": 1.3533064464502913, "percentage": 27.07, "elapsed_time": "0:55:40", "remaining_time": "2:30:02", "throughput": 19906.93, "total_tokens": 66508608}
|
|
{"current_steps": 21145, "total_steps": 78105, "loss": 0.3293, "lr": 4.569136006365033e-06, "epoch": 1.3536265283912683, "percentage": 27.07, "elapsed_time": "0:55:41", "remaining_time": "2:30:01", "throughput": 19907.65, "total_tokens": 66524608}
|
|
{"current_steps": 21150, "total_steps": 78105, "loss": 0.2751, "lr": 4.568822417731635e-06, "epoch": 1.353946610332245, "percentage": 27.08, "elapsed_time": "0:55:42", "remaining_time": "2:30:00", "throughput": 19908.21, "total_tokens": 66539328}
|
|
{"current_steps": 21155, "total_steps": 78105, "loss": 0.2727, "lr": 4.568508725792022e-06, "epoch": 1.3542666922732218, "percentage": 27.09, "elapsed_time": "0:55:43", "remaining_time": "2:29:59", "throughput": 19908.98, "total_tokens": 66555776}
|
|
{"current_steps": 21160, "total_steps": 78105, "loss": 0.3236, "lr": 4.568194930561857e-06, "epoch": 1.3545867742141988, "percentage": 27.09, "elapsed_time": "0:55:43", "remaining_time": "2:29:58", "throughput": 19909.54, "total_tokens": 66570816}
|
|
{"current_steps": 21165, "total_steps": 78105, "loss": 0.3128, "lr": 4.567881032056811e-06, "epoch": 1.3549068561551758, "percentage": 27.1, "elapsed_time": "0:55:44", "remaining_time": "2:29:57", "throughput": 19910.23, "total_tokens": 66586560}
|
|
{"current_steps": 21170, "total_steps": 78105, "loss": 0.4138, "lr": 4.567567030292557e-06, "epoch": 1.3552269380961526, "percentage": 27.1, "elapsed_time": "0:55:45", "remaining_time": "2:29:56", "throughput": 19911.14, "total_tokens": 66603840}
|
|
{"current_steps": 21175, "total_steps": 78105, "loss": 0.3511, "lr": 4.567252925284775e-06, "epoch": 1.3555470200371296, "percentage": 27.11, "elapsed_time": "0:55:45", "remaining_time": "2:29:55", "throughput": 19911.78, "total_tokens": 66619072}
|
|
{"current_steps": 21180, "total_steps": 78105, "loss": 0.3001, "lr": 4.56693871704915e-06, "epoch": 1.3558671019781063, "percentage": 27.12, "elapsed_time": "0:55:46", "remaining_time": "2:29:54", "throughput": 19912.4, "total_tokens": 66634560}
|
|
{"current_steps": 21185, "total_steps": 78105, "loss": 0.3181, "lr": 4.5666244056013705e-06, "epoch": 1.3561871839190833, "percentage": 27.12, "elapsed_time": "0:55:47", "remaining_time": "2:29:52", "throughput": 19913.05, "total_tokens": 66650112}
|
|
{"current_steps": 21190, "total_steps": 78105, "loss": 0.3988, "lr": 4.566309990957134e-06, "epoch": 1.35650726586006, "percentage": 27.13, "elapsed_time": "0:55:47", "remaining_time": "2:29:51", "throughput": 19913.58, "total_tokens": 66664832}
|
|
{"current_steps": 21195, "total_steps": 78105, "loss": 0.3962, "lr": 4.5659954731321405e-06, "epoch": 1.356827347801037, "percentage": 27.14, "elapsed_time": "0:55:48", "remaining_time": "2:29:50", "throughput": 19914.18, "total_tokens": 66680000}
|
|
{"current_steps": 21200, "total_steps": 78105, "loss": 0.3252, "lr": 4.565680852142093e-06, "epoch": 1.3571474297420139, "percentage": 27.14, "elapsed_time": "0:55:49", "remaining_time": "2:29:49", "throughput": 19914.79, "total_tokens": 66695296}
|
|
{"current_steps": 21205, "total_steps": 78105, "loss": 0.4093, "lr": 4.565366128002704e-06, "epoch": 1.3574675116829908, "percentage": 27.15, "elapsed_time": "0:55:49", "remaining_time": "2:29:48", "throughput": 19915.48, "total_tokens": 66711040}
|
|
{"current_steps": 21210, "total_steps": 78105, "loss": 0.3274, "lr": 4.565051300729689e-06, "epoch": 1.3577875936239678, "percentage": 27.16, "elapsed_time": "0:55:50", "remaining_time": "2:29:47", "throughput": 19916.12, "total_tokens": 66726720}
|
|
{"current_steps": 21215, "total_steps": 78105, "loss": 0.3752, "lr": 4.564736370338768e-06, "epoch": 1.3581076755649446, "percentage": 27.16, "elapsed_time": "0:55:51", "remaining_time": "2:29:46", "throughput": 19916.92, "total_tokens": 66743360}
|
|
{"current_steps": 21220, "total_steps": 78105, "loss": 0.2026, "lr": 4.564421336845668e-06, "epoch": 1.3584277575059216, "percentage": 27.17, "elapsed_time": "0:55:51", "remaining_time": "2:29:45", "throughput": 19917.54, "total_tokens": 66758912}
|
|
{"current_steps": 21225, "total_steps": 78105, "loss": 0.3119, "lr": 4.564106200266119e-06, "epoch": 1.3587478394468984, "percentage": 27.17, "elapsed_time": "0:55:52", "remaining_time": "2:29:44", "throughput": 19918.18, "total_tokens": 66774144}
|
|
{"current_steps": 21230, "total_steps": 78105, "loss": 0.3391, "lr": 4.563790960615858e-06, "epoch": 1.3590679213878754, "percentage": 27.18, "elapsed_time": "0:55:53", "remaining_time": "2:29:42", "throughput": 19918.83, "total_tokens": 66790080}
|
|
{"current_steps": 21235, "total_steps": 78105, "loss": 0.3171, "lr": 4.563475617910627e-06, "epoch": 1.3593880033288521, "percentage": 27.19, "elapsed_time": "0:55:53", "remaining_time": "2:29:41", "throughput": 19919.47, "total_tokens": 66805376}
|
|
{"current_steps": 21240, "total_steps": 78105, "loss": 0.2565, "lr": 4.5631601721661715e-06, "epoch": 1.3597080852698291, "percentage": 27.19, "elapsed_time": "0:55:54", "remaining_time": "2:29:40", "throughput": 19920.37, "total_tokens": 66823040}
|
|
{"current_steps": 21245, "total_steps": 78105, "loss": 0.3125, "lr": 4.562844623398242e-06, "epoch": 1.3600281672108059, "percentage": 27.2, "elapsed_time": "0:55:55", "remaining_time": "2:29:39", "throughput": 19920.94, "total_tokens": 66838144}
|
|
{"current_steps": 21250, "total_steps": 78105, "loss": 0.4704, "lr": 4.5625289716226e-06, "epoch": 1.3603482491517829, "percentage": 27.21, "elapsed_time": "0:55:55", "remaining_time": "2:29:38", "throughput": 19921.63, "total_tokens": 66854144}
|
|
{"current_steps": 21255, "total_steps": 78105, "loss": 0.4002, "lr": 4.5622132168550025e-06, "epoch": 1.3606683310927599, "percentage": 27.21, "elapsed_time": "0:55:56", "remaining_time": "2:29:37", "throughput": 19922.29, "total_tokens": 66869568}
|
|
{"current_steps": 21260, "total_steps": 78105, "loss": 0.3586, "lr": 4.5618973591112186e-06, "epoch": 1.3609884130337366, "percentage": 27.22, "elapsed_time": "0:55:57", "remaining_time": "2:29:36", "throughput": 19922.98, "total_tokens": 66885184}
|
|
{"current_steps": 21265, "total_steps": 78105, "loss": 0.393, "lr": 4.5615813984070215e-06, "epoch": 1.3613084949747134, "percentage": 27.23, "elapsed_time": "0:55:57", "remaining_time": "2:29:35", "throughput": 19923.63, "total_tokens": 66900480}
|
|
{"current_steps": 21270, "total_steps": 78105, "loss": 0.2858, "lr": 4.561265334758187e-06, "epoch": 1.3616285769156904, "percentage": 27.23, "elapsed_time": "0:55:58", "remaining_time": "2:29:34", "throughput": 19924.56, "total_tokens": 66918144}
|
|
{"current_steps": 21275, "total_steps": 78105, "loss": 0.3018, "lr": 4.560949168180499e-06, "epoch": 1.3619486588566674, "percentage": 27.24, "elapsed_time": "0:55:59", "remaining_time": "2:29:33", "throughput": 19925.2, "total_tokens": 66933376}
|
|
{"current_steps": 21280, "total_steps": 78105, "loss": 0.3694, "lr": 4.560632898689744e-06, "epoch": 1.3622687407976442, "percentage": 27.25, "elapsed_time": "0:55:59", "remaining_time": "2:29:32", "throughput": 19925.88, "total_tokens": 66949248}
|
|
{"current_steps": 21285, "total_steps": 78105, "loss": 0.4479, "lr": 4.560316526301716e-06, "epoch": 1.3625888227386211, "percentage": 27.25, "elapsed_time": "0:56:00", "remaining_time": "2:29:30", "throughput": 19926.47, "total_tokens": 66964224}
|
|
{"current_steps": 21290, "total_steps": 78105, "loss": 0.3103, "lr": 4.560000051032212e-06, "epoch": 1.362908904679598, "percentage": 27.26, "elapsed_time": "0:56:01", "remaining_time": "2:29:29", "throughput": 19927.09, "total_tokens": 66979264}
|
|
{"current_steps": 21295, "total_steps": 78105, "loss": 0.4098, "lr": 4.559683472897037e-06, "epoch": 1.363228986620575, "percentage": 27.26, "elapsed_time": "0:56:01", "remaining_time": "2:29:28", "throughput": 19927.68, "total_tokens": 66994176}
|
|
{"current_steps": 21300, "total_steps": 78105, "loss": 0.3061, "lr": 4.5593667919119956e-06, "epoch": 1.363549068561552, "percentage": 27.27, "elapsed_time": "0:56:02", "remaining_time": "2:29:27", "throughput": 19928.33, "total_tokens": 67009856}
|
|
{"current_steps": 21305, "total_steps": 78105, "loss": 0.2881, "lr": 4.559050008092904e-06, "epoch": 1.3638691505025287, "percentage": 27.28, "elapsed_time": "0:56:03", "remaining_time": "2:29:26", "throughput": 19929.09, "total_tokens": 67026304}
|
|
{"current_steps": 21310, "total_steps": 78105, "loss": 0.3375, "lr": 4.558733121455582e-06, "epoch": 1.3641892324435054, "percentage": 27.28, "elapsed_time": "0:56:03", "remaining_time": "2:29:25", "throughput": 19929.73, "total_tokens": 67041984}
|
|
{"current_steps": 21315, "total_steps": 78105, "loss": 0.4684, "lr": 4.55841613201585e-06, "epoch": 1.3645093143844824, "percentage": 27.29, "elapsed_time": "0:56:04", "remaining_time": "2:29:24", "throughput": 19930.34, "total_tokens": 67057344}
|
|
{"current_steps": 21320, "total_steps": 78105, "loss": 0.3709, "lr": 4.558099039789539e-06, "epoch": 1.3648293963254594, "percentage": 27.3, "elapsed_time": "0:56:05", "remaining_time": "2:29:23", "throughput": 19930.94, "total_tokens": 67072512}
|
|
{"current_steps": 21325, "total_steps": 78105, "loss": 0.3174, "lr": 4.557781844792482e-06, "epoch": 1.3651494782664362, "percentage": 27.3, "elapsed_time": "0:56:05", "remaining_time": "2:29:22", "throughput": 19931.65, "total_tokens": 67088384}
|
|
{"current_steps": 21330, "total_steps": 78105, "loss": 0.3135, "lr": 4.557464547040517e-06, "epoch": 1.3654695602074132, "percentage": 27.31, "elapsed_time": "0:56:06", "remaining_time": "2:29:21", "throughput": 19932.43, "total_tokens": 67105024}
|
|
{"current_steps": 21335, "total_steps": 78105, "loss": 0.4033, "lr": 4.557147146549491e-06, "epoch": 1.36578964214839, "percentage": 27.32, "elapsed_time": "0:56:07", "remaining_time": "2:29:21", "throughput": 19934.17, "total_tokens": 67134528}
|
|
{"current_steps": 21340, "total_steps": 78105, "loss": 0.3901, "lr": 4.556829643335251e-06, "epoch": 1.366109724089367, "percentage": 27.32, "elapsed_time": "0:56:08", "remaining_time": "2:29:20", "throughput": 19934.76, "total_tokens": 67149760}
|
|
{"current_steps": 21345, "total_steps": 78105, "loss": 0.2852, "lr": 4.556512037413653e-06, "epoch": 1.3664298060303437, "percentage": 27.33, "elapsed_time": "0:56:09", "remaining_time": "2:29:19", "throughput": 19935.31, "total_tokens": 67164800}
|
|
{"current_steps": 21350, "total_steps": 78105, "loss": 0.3968, "lr": 4.556194328800555e-06, "epoch": 1.3667498879713207, "percentage": 27.33, "elapsed_time": "0:56:09", "remaining_time": "2:29:17", "throughput": 19936.01, "total_tokens": 67180416}
|
|
{"current_steps": 21355, "total_steps": 78105, "loss": 0.3061, "lr": 4.555876517511822e-06, "epoch": 1.3670699699122975, "percentage": 27.34, "elapsed_time": "0:56:10", "remaining_time": "2:29:16", "throughput": 19936.62, "total_tokens": 67195840}
|
|
{"current_steps": 21360, "total_steps": 78105, "loss": 0.2871, "lr": 4.555558603563326e-06, "epoch": 1.3673900518532744, "percentage": 27.35, "elapsed_time": "0:56:11", "remaining_time": "2:29:15", "throughput": 19937.24, "total_tokens": 67211328}
|
|
{"current_steps": 21365, "total_steps": 78105, "loss": 0.3657, "lr": 4.55524058697094e-06, "epoch": 1.3677101337942514, "percentage": 27.35, "elapsed_time": "0:56:11", "remaining_time": "2:29:14", "throughput": 19937.79, "total_tokens": 67225920}
|
|
{"current_steps": 21370, "total_steps": 78105, "loss": 0.3366, "lr": 4.554922467750544e-06, "epoch": 1.3680302157352282, "percentage": 27.36, "elapsed_time": "0:56:12", "remaining_time": "2:29:13", "throughput": 19938.31, "total_tokens": 67240832}
|
|
{"current_steps": 21375, "total_steps": 78105, "loss": 0.4125, "lr": 4.554604245918024e-06, "epoch": 1.368350297676205, "percentage": 27.37, "elapsed_time": "0:56:13", "remaining_time": "2:29:12", "throughput": 19938.92, "total_tokens": 67256128}
|
|
{"current_steps": 21380, "total_steps": 78105, "loss": 0.3582, "lr": 4.55428592148927e-06, "epoch": 1.368670379617182, "percentage": 27.37, "elapsed_time": "0:56:13", "remaining_time": "2:29:11", "throughput": 19939.66, "total_tokens": 67272512}
|
|
{"current_steps": 21385, "total_steps": 78105, "loss": 0.2941, "lr": 4.553967494480177e-06, "epoch": 1.368990461558159, "percentage": 27.38, "elapsed_time": "0:56:14", "remaining_time": "2:29:10", "throughput": 19940.3, "total_tokens": 67288064}
|
|
{"current_steps": 21390, "total_steps": 78105, "loss": 0.2946, "lr": 4.553648964906646e-06, "epoch": 1.3693105434991357, "percentage": 27.39, "elapsed_time": "0:56:15", "remaining_time": "2:29:09", "throughput": 19941.0, "total_tokens": 67304128}
|
|
{"current_steps": 21395, "total_steps": 78105, "loss": 0.352, "lr": 4.553330332784583e-06, "epoch": 1.3696306254401127, "percentage": 27.39, "elapsed_time": "0:56:15", "remaining_time": "2:29:08", "throughput": 19941.58, "total_tokens": 67319232}
|
|
{"current_steps": 21400, "total_steps": 78105, "loss": 0.3625, "lr": 4.553011598129899e-06, "epoch": 1.3699507073810895, "percentage": 27.4, "elapsed_time": "0:56:16", "remaining_time": "2:29:06", "throughput": 19942.3, "total_tokens": 67335296}
|
|
{"current_steps": 21405, "total_steps": 78105, "loss": 0.3375, "lr": 4.552692760958509e-06, "epoch": 1.3702707893220665, "percentage": 27.41, "elapsed_time": "0:56:17", "remaining_time": "2:29:05", "throughput": 19942.77, "total_tokens": 67349760}
|
|
{"current_steps": 21410, "total_steps": 78105, "loss": 0.3049, "lr": 4.552373821286334e-06, "epoch": 1.3705908712630435, "percentage": 27.41, "elapsed_time": "0:56:17", "remaining_time": "2:29:04", "throughput": 19943.36, "total_tokens": 67364672}
|
|
{"current_steps": 21415, "total_steps": 78105, "loss": 0.3813, "lr": 4.5520547791293025e-06, "epoch": 1.3709109532040202, "percentage": 27.42, "elapsed_time": "0:56:18", "remaining_time": "2:29:03", "throughput": 19943.96, "total_tokens": 67379648}
|
|
{"current_steps": 21420, "total_steps": 78105, "loss": 0.2384, "lr": 4.551735634503343e-06, "epoch": 1.371231035144997, "percentage": 27.42, "elapsed_time": "0:56:19", "remaining_time": "2:29:02", "throughput": 19944.54, "total_tokens": 67394432}
|
|
{"current_steps": 21425, "total_steps": 78105, "loss": 0.3446, "lr": 4.551416387424393e-06, "epoch": 1.371551117085974, "percentage": 27.43, "elapsed_time": "0:56:19", "remaining_time": "2:29:01", "throughput": 19945.17, "total_tokens": 67409536}
|
|
{"current_steps": 21430, "total_steps": 78105, "loss": 0.3889, "lr": 4.551097037908394e-06, "epoch": 1.371871199026951, "percentage": 27.44, "elapsed_time": "0:56:20", "remaining_time": "2:29:00", "throughput": 19945.86, "total_tokens": 67425664}
|
|
{"current_steps": 21435, "total_steps": 78105, "loss": 0.3429, "lr": 4.5507775859712935e-06, "epoch": 1.3721912809679278, "percentage": 27.44, "elapsed_time": "0:56:21", "remaining_time": "2:28:58", "throughput": 19946.45, "total_tokens": 67440960}
|
|
{"current_steps": 21440, "total_steps": 78105, "loss": 0.3046, "lr": 4.550458031629041e-06, "epoch": 1.3725113629089047, "percentage": 27.45, "elapsed_time": "0:56:21", "remaining_time": "2:28:57", "throughput": 19946.94, "total_tokens": 67455744}
|
|
{"current_steps": 21445, "total_steps": 78105, "loss": 0.3499, "lr": 4.550138374897596e-06, "epoch": 1.3728314448498815, "percentage": 27.46, "elapsed_time": "0:56:22", "remaining_time": "2:28:56", "throughput": 19947.53, "total_tokens": 67471104}
|
|
{"current_steps": 21450, "total_steps": 78105, "loss": 0.4008, "lr": 4.549818615792919e-06, "epoch": 1.3731515267908585, "percentage": 27.46, "elapsed_time": "0:56:23", "remaining_time": "2:28:55", "throughput": 19948.13, "total_tokens": 67486272}
|
|
{"current_steps": 21455, "total_steps": 78105, "loss": 0.3321, "lr": 4.549498754330978e-06, "epoch": 1.3734716087318353, "percentage": 27.47, "elapsed_time": "0:56:23", "remaining_time": "2:28:54", "throughput": 19948.78, "total_tokens": 67502144}
|
|
{"current_steps": 21460, "total_steps": 78105, "loss": 0.285, "lr": 4.549178790527744e-06, "epoch": 1.3737916906728123, "percentage": 27.48, "elapsed_time": "0:56:24", "remaining_time": "2:28:53", "throughput": 19949.34, "total_tokens": 67517248}
|
|
{"current_steps": 21465, "total_steps": 78105, "loss": 0.3678, "lr": 4.548858724399195e-06, "epoch": 1.374111772613789, "percentage": 27.48, "elapsed_time": "0:56:25", "remaining_time": "2:28:52", "throughput": 19949.98, "total_tokens": 67532864}
|
|
{"current_steps": 21470, "total_steps": 78105, "loss": 0.2855, "lr": 4.548538555961314e-06, "epoch": 1.374431854554766, "percentage": 27.49, "elapsed_time": "0:56:25", "remaining_time": "2:28:51", "throughput": 19950.66, "total_tokens": 67548608}
|
|
{"current_steps": 21475, "total_steps": 78105, "loss": 0.3305, "lr": 4.548218285230087e-06, "epoch": 1.374751936495743, "percentage": 27.5, "elapsed_time": "0:56:26", "remaining_time": "2:28:50", "throughput": 19951.33, "total_tokens": 67564544}
|
|
{"current_steps": 21480, "total_steps": 78105, "loss": 0.3284, "lr": 4.547897912221509e-06, "epoch": 1.3750720184367198, "percentage": 27.5, "elapsed_time": "0:56:27", "remaining_time": "2:28:49", "throughput": 19951.92, "total_tokens": 67579776}
|
|
{"current_steps": 21485, "total_steps": 78105, "loss": 0.4116, "lr": 4.547577436951575e-06, "epoch": 1.3753921003776968, "percentage": 27.51, "elapsed_time": "0:56:27", "remaining_time": "2:28:48", "throughput": 19952.62, "total_tokens": 67596096}
|
|
{"current_steps": 21490, "total_steps": 78105, "loss": 0.3156, "lr": 4.547256859436289e-06, "epoch": 1.3757121823186735, "percentage": 27.51, "elapsed_time": "0:56:28", "remaining_time": "2:28:46", "throughput": 19953.2, "total_tokens": 67611136}
|
|
{"current_steps": 21495, "total_steps": 78105, "loss": 0.3529, "lr": 4.54693617969166e-06, "epoch": 1.3760322642596505, "percentage": 27.52, "elapsed_time": "0:56:29", "remaining_time": "2:28:45", "throughput": 19953.96, "total_tokens": 67627840}
|
|
{"current_steps": 21500, "total_steps": 78105, "loss": 0.348, "lr": 4.5466153977337004e-06, "epoch": 1.3763523462006273, "percentage": 27.53, "elapsed_time": "0:56:29", "remaining_time": "2:28:44", "throughput": 19954.6, "total_tokens": 67643456}
|
|
{"current_steps": 21505, "total_steps": 78105, "loss": 0.4476, "lr": 4.5462945135784285e-06, "epoch": 1.3766724281416043, "percentage": 27.53, "elapsed_time": "0:56:30", "remaining_time": "2:28:43", "throughput": 19955.33, "total_tokens": 67659840}
|
|
{"current_steps": 21510, "total_steps": 78105, "loss": 0.357, "lr": 4.545973527241867e-06, "epoch": 1.376992510082581, "percentage": 27.54, "elapsed_time": "0:56:31", "remaining_time": "2:28:42", "throughput": 19955.86, "total_tokens": 67674624}
|
|
{"current_steps": 21515, "total_steps": 78105, "loss": 0.4121, "lr": 4.545652438740045e-06, "epoch": 1.377312592023558, "percentage": 27.55, "elapsed_time": "0:56:31", "remaining_time": "2:28:41", "throughput": 19956.43, "total_tokens": 67689664}
|
|
{"current_steps": 21520, "total_steps": 78105, "loss": 0.3684, "lr": 4.5453312480889946e-06, "epoch": 1.377632673964535, "percentage": 27.55, "elapsed_time": "0:56:32", "remaining_time": "2:28:40", "throughput": 19957.04, "total_tokens": 67705280}
|
|
{"current_steps": 21525, "total_steps": 78105, "loss": 0.2759, "lr": 4.545009955304756e-06, "epoch": 1.3779527559055118, "percentage": 27.56, "elapsed_time": "0:56:33", "remaining_time": "2:28:39", "throughput": 19957.64, "total_tokens": 67720640}
|
|
{"current_steps": 21530, "total_steps": 78105, "loss": 0.3539, "lr": 4.544688560403372e-06, "epoch": 1.3782728378464886, "percentage": 27.57, "elapsed_time": "0:56:33", "remaining_time": "2:28:38", "throughput": 19958.17, "total_tokens": 67735424}
|
|
{"current_steps": 21535, "total_steps": 78105, "loss": 0.287, "lr": 4.544367063400893e-06, "epoch": 1.3785929197874656, "percentage": 27.57, "elapsed_time": "0:56:34", "remaining_time": "2:28:37", "throughput": 19958.8, "total_tokens": 67750528}
|
|
{"current_steps": 21540, "total_steps": 78105, "loss": 0.2853, "lr": 4.544045464313371e-06, "epoch": 1.3789130017284426, "percentage": 27.58, "elapsed_time": "0:56:35", "remaining_time": "2:28:35", "throughput": 19959.47, "total_tokens": 67766464}
|
|
{"current_steps": 21545, "total_steps": 78105, "loss": 0.2795, "lr": 4.543723763156864e-06, "epoch": 1.3792330836694193, "percentage": 27.58, "elapsed_time": "0:56:35", "remaining_time": "2:28:34", "throughput": 19960.08, "total_tokens": 67782144}
|
|
{"current_steps": 21550, "total_steps": 78105, "loss": 0.3607, "lr": 4.543401959947439e-06, "epoch": 1.3795531656103963, "percentage": 27.59, "elapsed_time": "0:56:36", "remaining_time": "2:28:33", "throughput": 19960.74, "total_tokens": 67798208}
|
|
{"current_steps": 21555, "total_steps": 78105, "loss": 0.278, "lr": 4.543080054701164e-06, "epoch": 1.379873247551373, "percentage": 27.6, "elapsed_time": "0:56:37", "remaining_time": "2:28:32", "throughput": 19961.37, "total_tokens": 67813504}
|
|
{"current_steps": 21560, "total_steps": 78105, "loss": 0.3043, "lr": 4.542758047434113e-06, "epoch": 1.38019332949235, "percentage": 27.6, "elapsed_time": "0:56:37", "remaining_time": "2:28:31", "throughput": 19962.0, "total_tokens": 67829248}
|
|
{"current_steps": 21565, "total_steps": 78105, "loss": 0.3572, "lr": 4.542435938162365e-06, "epoch": 1.380513411433327, "percentage": 27.61, "elapsed_time": "0:56:38", "remaining_time": "2:28:30", "throughput": 19962.6, "total_tokens": 67844544}
|
|
{"current_steps": 21570, "total_steps": 78105, "loss": 0.2984, "lr": 4.542113726902005e-06, "epoch": 1.3808334933743038, "percentage": 27.62, "elapsed_time": "0:56:39", "remaining_time": "2:28:29", "throughput": 19963.22, "total_tokens": 67860032}
|
|
{"current_steps": 21575, "total_steps": 78105, "loss": 0.4087, "lr": 4.5417914136691225e-06, "epoch": 1.3811535753152806, "percentage": 27.62, "elapsed_time": "0:56:39", "remaining_time": "2:28:28", "throughput": 19963.81, "total_tokens": 67875584}
|
|
{"current_steps": 21580, "total_steps": 78105, "loss": 0.3223, "lr": 4.541468998479812e-06, "epoch": 1.3814736572562576, "percentage": 27.63, "elapsed_time": "0:56:40", "remaining_time": "2:28:27", "throughput": 19964.51, "total_tokens": 67891584}
|
|
{"current_steps": 21585, "total_steps": 78105, "loss": 0.296, "lr": 4.5411464813501736e-06, "epoch": 1.3817937391972346, "percentage": 27.64, "elapsed_time": "0:56:41", "remaining_time": "2:28:26", "throughput": 19965.39, "total_tokens": 67909120}
|
|
{"current_steps": 21590, "total_steps": 78105, "loss": 0.2323, "lr": 4.540823862296311e-06, "epoch": 1.3821138211382114, "percentage": 27.64, "elapsed_time": "0:56:42", "remaining_time": "2:28:25", "throughput": 19966.16, "total_tokens": 67925952}
|
|
{"current_steps": 21595, "total_steps": 78105, "loss": 0.3601, "lr": 4.5405011413343355e-06, "epoch": 1.3824339030791883, "percentage": 27.65, "elapsed_time": "0:56:42", "remaining_time": "2:28:24", "throughput": 19966.76, "total_tokens": 67941120}
|
|
{"current_steps": 21600, "total_steps": 78105, "loss": 0.3833, "lr": 4.540178318480362e-06, "epoch": 1.382753985020165, "percentage": 27.66, "elapsed_time": "0:56:43", "remaining_time": "2:28:23", "throughput": 19967.46, "total_tokens": 67957120}
|
|
{"current_steps": 21605, "total_steps": 78105, "loss": 0.3215, "lr": 4.53985539375051e-06, "epoch": 1.383074066961142, "percentage": 27.66, "elapsed_time": "0:56:44", "remaining_time": "2:28:22", "throughput": 19968.06, "total_tokens": 67972288}
|
|
{"current_steps": 21610, "total_steps": 78105, "loss": 0.4772, "lr": 4.539532367160905e-06, "epoch": 1.3833941489021189, "percentage": 27.67, "elapsed_time": "0:56:44", "remaining_time": "2:28:21", "throughput": 19968.75, "total_tokens": 67988544}
|
|
{"current_steps": 21615, "total_steps": 78105, "loss": 0.2616, "lr": 4.539209238727677e-06, "epoch": 1.3837142308430959, "percentage": 27.67, "elapsed_time": "0:56:45", "remaining_time": "2:28:19", "throughput": 19969.29, "total_tokens": 68003200}
|
|
{"current_steps": 21620, "total_steps": 78105, "loss": 0.2397, "lr": 4.5388860084669616e-06, "epoch": 1.3840343127840726, "percentage": 27.68, "elapsed_time": "0:56:46", "remaining_time": "2:28:18", "throughput": 19969.96, "total_tokens": 68019392}
|
|
{"current_steps": 21625, "total_steps": 78105, "loss": 0.2829, "lr": 4.538562676394898e-06, "epoch": 1.3843543947250496, "percentage": 27.69, "elapsed_time": "0:56:46", "remaining_time": "2:28:17", "throughput": 19970.58, "total_tokens": 68035328}
|
|
{"current_steps": 21630, "total_steps": 78105, "loss": 0.3202, "lr": 4.5382392425276345e-06, "epoch": 1.3846744766660266, "percentage": 27.69, "elapsed_time": "0:56:47", "remaining_time": "2:28:16", "throughput": 19971.23, "total_tokens": 68051136}
|
|
{"current_steps": 21635, "total_steps": 78105, "loss": 0.3868, "lr": 4.537915706881319e-06, "epoch": 1.3849945586070034, "percentage": 27.7, "elapsed_time": "0:56:48", "remaining_time": "2:28:15", "throughput": 19971.77, "total_tokens": 68066176}
|
|
{"current_steps": 21640, "total_steps": 78105, "loss": 0.2949, "lr": 4.5375920694721085e-06, "epoch": 1.3853146405479801, "percentage": 27.71, "elapsed_time": "0:56:48", "remaining_time": "2:28:14", "throughput": 19972.37, "total_tokens": 68081536}
|
|
{"current_steps": 21645, "total_steps": 78105, "loss": 0.3576, "lr": 4.537268330316163e-06, "epoch": 1.3856347224889571, "percentage": 27.71, "elapsed_time": "0:56:49", "remaining_time": "2:28:13", "throughput": 19973.04, "total_tokens": 68097472}
|
|
{"current_steps": 21650, "total_steps": 78105, "loss": 0.4295, "lr": 4.53694448942965e-06, "epoch": 1.3859548044299341, "percentage": 27.72, "elapsed_time": "0:56:50", "remaining_time": "2:28:12", "throughput": 19973.73, "total_tokens": 68113728}
|
|
{"current_steps": 21655, "total_steps": 78105, "loss": 0.3789, "lr": 4.536620546828738e-06, "epoch": 1.386274886370911, "percentage": 27.73, "elapsed_time": "0:56:50", "remaining_time": "2:28:11", "throughput": 19974.41, "total_tokens": 68129920}
|
|
{"current_steps": 21660, "total_steps": 78105, "loss": 0.3784, "lr": 4.536296502529605e-06, "epoch": 1.3865949683118879, "percentage": 27.73, "elapsed_time": "0:56:51", "remaining_time": "2:28:10", "throughput": 19974.96, "total_tokens": 68144832}
|
|
{"current_steps": 21665, "total_steps": 78105, "loss": 0.2829, "lr": 4.535972356548431e-06, "epoch": 1.3869150502528647, "percentage": 27.74, "elapsed_time": "0:56:52", "remaining_time": "2:28:09", "throughput": 19975.73, "total_tokens": 68161728}
|
|
{"current_steps": 21670, "total_steps": 78105, "loss": 0.3346, "lr": 4.535648108901403e-06, "epoch": 1.3872351321938416, "percentage": 27.74, "elapsed_time": "0:56:52", "remaining_time": "2:28:08", "throughput": 19976.38, "total_tokens": 68177280}
|
|
{"current_steps": 21675, "total_steps": 78105, "loss": 0.3269, "lr": 4.535323759604712e-06, "epoch": 1.3875552141348186, "percentage": 27.75, "elapsed_time": "0:56:53", "remaining_time": "2:28:07", "throughput": 19977.02, "total_tokens": 68192640}
|
|
{"current_steps": 21680, "total_steps": 78105, "loss": 0.2403, "lr": 4.534999308674553e-06, "epoch": 1.3878752960757954, "percentage": 27.76, "elapsed_time": "0:56:54", "remaining_time": "2:28:06", "throughput": 19977.75, "total_tokens": 68208960}
|
|
{"current_steps": 21685, "total_steps": 78105, "loss": 0.3585, "lr": 4.534674756127129e-06, "epoch": 1.3881953780167722, "percentage": 27.76, "elapsed_time": "0:56:54", "remaining_time": "2:28:04", "throughput": 19978.31, "total_tokens": 68224192}
|
|
{"current_steps": 21690, "total_steps": 78105, "loss": 0.231, "lr": 4.534350101978646e-06, "epoch": 1.3885154599577492, "percentage": 27.77, "elapsed_time": "0:56:55", "remaining_time": "2:28:03", "throughput": 19978.99, "total_tokens": 68240128}
|
|
{"current_steps": 21695, "total_steps": 78105, "loss": 0.2199, "lr": 4.5340253462453155e-06, "epoch": 1.3888355418987262, "percentage": 27.78, "elapsed_time": "0:56:56", "remaining_time": "2:28:02", "throughput": 19979.81, "total_tokens": 68257408}
|
|
{"current_steps": 21700, "total_steps": 78105, "loss": 0.2658, "lr": 4.533700488943354e-06, "epoch": 1.389155623839703, "percentage": 27.78, "elapsed_time": "0:56:57", "remaining_time": "2:28:01", "throughput": 19980.46, "total_tokens": 68273408}
|
|
{"current_steps": 21705, "total_steps": 78105, "loss": 0.3838, "lr": 4.533375530088983e-06, "epoch": 1.38947570578068, "percentage": 27.79, "elapsed_time": "0:56:57", "remaining_time": "2:28:00", "throughput": 19981.28, "total_tokens": 68290688}
|
|
{"current_steps": 21710, "total_steps": 78105, "loss": 0.2756, "lr": 4.533050469698429e-06, "epoch": 1.3897957877216567, "percentage": 27.8, "elapsed_time": "0:56:58", "remaining_time": "2:27:59", "throughput": 19981.95, "total_tokens": 68306560}
|
|
{"current_steps": 21715, "total_steps": 78105, "loss": 0.2777, "lr": 4.5327253077879255e-06, "epoch": 1.3901158696626337, "percentage": 27.8, "elapsed_time": "0:56:59", "remaining_time": "2:27:58", "throughput": 19982.47, "total_tokens": 68321472}
|
|
{"current_steps": 21720, "total_steps": 78105, "loss": 0.3467, "lr": 4.532400044373707e-06, "epoch": 1.3904359516036104, "percentage": 27.81, "elapsed_time": "0:56:59", "remaining_time": "2:27:57", "throughput": 19983.04, "total_tokens": 68336640}
|
|
{"current_steps": 21725, "total_steps": 78105, "loss": 0.4255, "lr": 4.532074679472017e-06, "epoch": 1.3907560335445874, "percentage": 27.82, "elapsed_time": "0:57:00", "remaining_time": "2:27:56", "throughput": 19983.94, "total_tokens": 68354880}
|
|
{"current_steps": 21730, "total_steps": 78105, "loss": 0.2911, "lr": 4.531749213099103e-06, "epoch": 1.3910761154855642, "percentage": 27.82, "elapsed_time": "0:57:01", "remaining_time": "2:27:55", "throughput": 19984.65, "total_tokens": 68370944}
|
|
{"current_steps": 21735, "total_steps": 78105, "loss": 0.414, "lr": 4.531423645271215e-06, "epoch": 1.3913961974265412, "percentage": 27.83, "elapsed_time": "0:57:01", "remaining_time": "2:27:54", "throughput": 19985.37, "total_tokens": 68387264}
|
|
{"current_steps": 21740, "total_steps": 78105, "loss": 0.3035, "lr": 4.531097976004613e-06, "epoch": 1.3917162793675182, "percentage": 27.83, "elapsed_time": "0:57:02", "remaining_time": "2:27:53", "throughput": 19986.14, "total_tokens": 68403968}
|
|
{"current_steps": 21745, "total_steps": 78105, "loss": 0.4344, "lr": 4.530772205315557e-06, "epoch": 1.392036361308495, "percentage": 27.84, "elapsed_time": "0:57:03", "remaining_time": "2:27:52", "throughput": 19986.73, "total_tokens": 68419200}
|
|
{"current_steps": 21750, "total_steps": 78105, "loss": 0.2462, "lr": 4.530446333220314e-06, "epoch": 1.392356443249472, "percentage": 27.85, "elapsed_time": "0:57:03", "remaining_time": "2:27:51", "throughput": 19987.38, "total_tokens": 68434816}
|
|
{"current_steps": 21755, "total_steps": 78105, "loss": 0.3194, "lr": 4.530120359735158e-06, "epoch": 1.3926765251904487, "percentage": 27.85, "elapsed_time": "0:57:04", "remaining_time": "2:27:50", "throughput": 19988.08, "total_tokens": 68451200}
|
|
{"current_steps": 21760, "total_steps": 78105, "loss": 0.2332, "lr": 4.529794284876367e-06, "epoch": 1.3929966071314257, "percentage": 27.86, "elapsed_time": "0:57:05", "remaining_time": "2:27:49", "throughput": 19988.88, "total_tokens": 68468480}
|
|
{"current_steps": 21765, "total_steps": 78105, "loss": 0.3941, "lr": 4.529468108660221e-06, "epoch": 1.3933166890724025, "percentage": 27.87, "elapsed_time": "0:57:06", "remaining_time": "2:27:48", "throughput": 19989.57, "total_tokens": 68484928}
|
|
{"current_steps": 21770, "total_steps": 78105, "loss": 0.4212, "lr": 4.529141831103009e-06, "epoch": 1.3936367710133795, "percentage": 27.87, "elapsed_time": "0:57:06", "remaining_time": "2:27:47", "throughput": 19990.08, "total_tokens": 68499648}
|
|
{"current_steps": 21775, "total_steps": 78105, "loss": 0.2763, "lr": 4.528815452221023e-06, "epoch": 1.3939568529543562, "percentage": 27.88, "elapsed_time": "0:57:07", "remaining_time": "2:27:46", "throughput": 19990.72, "total_tokens": 68515072}
|
|
{"current_steps": 21780, "total_steps": 78105, "loss": 0.3863, "lr": 4.528488972030561e-06, "epoch": 1.3942769348953332, "percentage": 27.89, "elapsed_time": "0:57:08", "remaining_time": "2:27:45", "throughput": 19991.36, "total_tokens": 68530688}
|
|
{"current_steps": 21785, "total_steps": 78105, "loss": 0.3287, "lr": 4.528162390547926e-06, "epoch": 1.3945970168363102, "percentage": 27.89, "elapsed_time": "0:57:08", "remaining_time": "2:27:44", "throughput": 19991.9, "total_tokens": 68545536}
|
|
{"current_steps": 21790, "total_steps": 78105, "loss": 0.3477, "lr": 4.527835707789426e-06, "epoch": 1.394917098777287, "percentage": 27.9, "elapsed_time": "0:57:09", "remaining_time": "2:27:42", "throughput": 19992.36, "total_tokens": 68559680}
|
|
{"current_steps": 21795, "total_steps": 78105, "loss": 0.3721, "lr": 4.527508923771373e-06, "epoch": 1.3952371807182637, "percentage": 27.9, "elapsed_time": "0:57:09", "remaining_time": "2:27:41", "throughput": 19992.93, "total_tokens": 68574784}
|
|
{"current_steps": 21800, "total_steps": 78105, "loss": 0.3482, "lr": 4.527182038510085e-06, "epoch": 1.3955572626592407, "percentage": 27.91, "elapsed_time": "0:57:10", "remaining_time": "2:27:40", "throughput": 19993.56, "total_tokens": 68590336}
|
|
{"current_steps": 21805, "total_steps": 78105, "loss": 0.3791, "lr": 4.5268550520218854e-06, "epoch": 1.3958773446002177, "percentage": 27.92, "elapsed_time": "0:57:11", "remaining_time": "2:27:39", "throughput": 19994.26, "total_tokens": 68606592}
|
|
{"current_steps": 21810, "total_steps": 78105, "loss": 0.2334, "lr": 4.526527964323102e-06, "epoch": 1.3961974265411945, "percentage": 27.92, "elapsed_time": "0:57:11", "remaining_time": "2:27:38", "throughput": 19994.77, "total_tokens": 68621376}
|
|
{"current_steps": 21815, "total_steps": 78105, "loss": 0.494, "lr": 4.526200775430068e-06, "epoch": 1.3965175084821715, "percentage": 27.93, "elapsed_time": "0:57:12", "remaining_time": "2:27:37", "throughput": 19995.57, "total_tokens": 68638464}
|
|
{"current_steps": 21820, "total_steps": 78105, "loss": 0.3872, "lr": 4.525873485359121e-06, "epoch": 1.3968375904231483, "percentage": 27.94, "elapsed_time": "0:57:13", "remaining_time": "2:27:36", "throughput": 19996.2, "total_tokens": 68653824}
|
|
{"current_steps": 21825, "total_steps": 78105, "loss": 0.3141, "lr": 4.525546094126606e-06, "epoch": 1.3971576723641252, "percentage": 27.94, "elapsed_time": "0:57:14", "remaining_time": "2:27:35", "throughput": 19996.92, "total_tokens": 68670144}
|
|
{"current_steps": 21830, "total_steps": 78105, "loss": 0.2614, "lr": 4.525218601748867e-06, "epoch": 1.3974777543051022, "percentage": 27.95, "elapsed_time": "0:57:14", "remaining_time": "2:27:34", "throughput": 19997.59, "total_tokens": 68685824}
|
|
{"current_steps": 21835, "total_steps": 78105, "loss": 0.3664, "lr": 4.5248910082422625e-06, "epoch": 1.397797836246079, "percentage": 27.96, "elapsed_time": "0:57:15", "remaining_time": "2:27:33", "throughput": 19998.19, "total_tokens": 68701120}
|
|
{"current_steps": 21840, "total_steps": 78105, "loss": 0.3722, "lr": 4.524563313623147e-06, "epoch": 1.3981179181870558, "percentage": 27.96, "elapsed_time": "0:57:16", "remaining_time": "2:27:32", "throughput": 19998.78, "total_tokens": 68716416}
|
|
{"current_steps": 21845, "total_steps": 78105, "loss": 0.3258, "lr": 4.524235517907885e-06, "epoch": 1.3984380001280328, "percentage": 27.97, "elapsed_time": "0:57:16", "remaining_time": "2:27:30", "throughput": 19999.35, "total_tokens": 68731648}
|
|
{"current_steps": 21850, "total_steps": 78105, "loss": 0.2361, "lr": 4.523907621112846e-06, "epoch": 1.3987580820690098, "percentage": 27.98, "elapsed_time": "0:57:17", "remaining_time": "2:27:29", "throughput": 19999.91, "total_tokens": 68746560}
|
|
{"current_steps": 21855, "total_steps": 78105, "loss": 0.2422, "lr": 4.523579623254403e-06, "epoch": 1.3990781640099865, "percentage": 27.98, "elapsed_time": "0:57:18", "remaining_time": "2:27:28", "throughput": 20000.85, "total_tokens": 68764352}
|
|
{"current_steps": 21860, "total_steps": 78105, "loss": 0.3823, "lr": 4.523251524348933e-06, "epoch": 1.3993982459509635, "percentage": 27.99, "elapsed_time": "0:57:18", "remaining_time": "2:27:27", "throughput": 20001.45, "total_tokens": 68779584}
|
|
{"current_steps": 21865, "total_steps": 78105, "loss": 0.5307, "lr": 4.522923324412821e-06, "epoch": 1.3997183278919403, "percentage": 27.99, "elapsed_time": "0:57:19", "remaining_time": "2:27:26", "throughput": 20002.08, "total_tokens": 68795584}
|
|
{"current_steps": 21870, "total_steps": 78105, "loss": 0.4641, "lr": 4.5225950234624545e-06, "epoch": 1.4000384098329173, "percentage": 28.0, "elapsed_time": "0:57:20", "remaining_time": "2:27:25", "throughput": 20002.76, "total_tokens": 68811648}
|
|
{"current_steps": 21875, "total_steps": 78105, "loss": 0.3552, "lr": 4.5222666215142284e-06, "epoch": 1.400358491773894, "percentage": 28.01, "elapsed_time": "0:57:20", "remaining_time": "2:27:24", "throughput": 20003.3, "total_tokens": 68826816}
|
|
{"current_steps": 21880, "total_steps": 78105, "loss": 0.4762, "lr": 4.521938118584541e-06, "epoch": 1.400678573714871, "percentage": 28.01, "elapsed_time": "0:57:21", "remaining_time": "2:27:23", "throughput": 20003.96, "total_tokens": 68842432}
|
|
{"current_steps": 21885, "total_steps": 78105, "loss": 0.3016, "lr": 4.521609514689796e-06, "epoch": 1.4009986556558478, "percentage": 28.02, "elapsed_time": "0:57:22", "remaining_time": "2:27:22", "throughput": 20004.75, "total_tokens": 68859456}
|
|
{"current_steps": 21890, "total_steps": 78105, "loss": 0.416, "lr": 4.5212808098464015e-06, "epoch": 1.4013187375968248, "percentage": 28.03, "elapsed_time": "0:57:22", "remaining_time": "2:27:21", "throughput": 20005.35, "total_tokens": 68874816}
|
|
{"current_steps": 21895, "total_steps": 78105, "loss": 0.2954, "lr": 4.5209520040707725e-06, "epoch": 1.4016388195378018, "percentage": 28.03, "elapsed_time": "0:57:23", "remaining_time": "2:27:20", "throughput": 20005.95, "total_tokens": 68890752}
|
|
{"current_steps": 21900, "total_steps": 78105, "loss": 0.428, "lr": 4.5206230973793266e-06, "epoch": 1.4019589014787786, "percentage": 28.04, "elapsed_time": "0:57:24", "remaining_time": "2:27:19", "throughput": 20006.57, "total_tokens": 68906304}
|
|
{"current_steps": 21905, "total_steps": 78105, "loss": 0.3513, "lr": 4.520294089788488e-06, "epoch": 1.4022789834197553, "percentage": 28.05, "elapsed_time": "0:57:24", "remaining_time": "2:27:18", "throughput": 20007.28, "total_tokens": 68922752}
|
|
{"current_steps": 21910, "total_steps": 78105, "loss": 0.3209, "lr": 4.519964981314686e-06, "epoch": 1.4025990653607323, "percentage": 28.05, "elapsed_time": "0:57:25", "remaining_time": "2:27:17", "throughput": 20007.92, "total_tokens": 68938624}
|
|
{"current_steps": 21915, "total_steps": 78105, "loss": 0.364, "lr": 4.519635771974355e-06, "epoch": 1.4029191473017093, "percentage": 28.06, "elapsed_time": "0:57:26", "remaining_time": "2:27:16", "throughput": 20008.63, "total_tokens": 68955008}
|
|
{"current_steps": 21920, "total_steps": 78105, "loss": 0.3084, "lr": 4.519306461783933e-06, "epoch": 1.403239229242686, "percentage": 28.06, "elapsed_time": "0:57:26", "remaining_time": "2:27:15", "throughput": 20009.39, "total_tokens": 68971840}
|
|
{"current_steps": 21925, "total_steps": 78105, "loss": 0.3228, "lr": 4.5189770507598655e-06, "epoch": 1.403559311183663, "percentage": 28.07, "elapsed_time": "0:57:27", "remaining_time": "2:27:14", "throughput": 20010.07, "total_tokens": 68987968}
|
|
{"current_steps": 21930, "total_steps": 78105, "loss": 0.484, "lr": 4.518647538918599e-06, "epoch": 1.4038793931246398, "percentage": 28.08, "elapsed_time": "0:57:28", "remaining_time": "2:27:13", "throughput": 20010.66, "total_tokens": 69003136}
|
|
{"current_steps": 21935, "total_steps": 78105, "loss": 0.4617, "lr": 4.51831792627659e-06, "epoch": 1.4041994750656168, "percentage": 28.08, "elapsed_time": "0:57:28", "remaining_time": "2:27:12", "throughput": 20011.34, "total_tokens": 69019072}
|
|
{"current_steps": 21940, "total_steps": 78105, "loss": 0.2423, "lr": 4.517988212850296e-06, "epoch": 1.4045195570065938, "percentage": 28.09, "elapsed_time": "0:57:29", "remaining_time": "2:27:10", "throughput": 20011.85, "total_tokens": 69033728}
|
|
{"current_steps": 21945, "total_steps": 78105, "loss": 0.294, "lr": 4.517658398656183e-06, "epoch": 1.4048396389475706, "percentage": 28.1, "elapsed_time": "0:57:30", "remaining_time": "2:27:09", "throughput": 20012.57, "total_tokens": 69049920}
|
|
{"current_steps": 21950, "total_steps": 78105, "loss": 0.3204, "lr": 4.517328483710719e-06, "epoch": 1.4051597208885473, "percentage": 28.1, "elapsed_time": "0:57:30", "remaining_time": "2:27:08", "throughput": 20013.12, "total_tokens": 69065280}
|
|
{"current_steps": 21955, "total_steps": 78105, "loss": 0.2705, "lr": 4.516998468030378e-06, "epoch": 1.4054798028295243, "percentage": 28.11, "elapsed_time": "0:57:31", "remaining_time": "2:27:07", "throughput": 20013.77, "total_tokens": 69080704}
|
|
{"current_steps": 21960, "total_steps": 78105, "loss": 0.3338, "lr": 4.5166683516316405e-06, "epoch": 1.4057998847705013, "percentage": 28.12, "elapsed_time": "0:57:32", "remaining_time": "2:27:06", "throughput": 20014.38, "total_tokens": 69096128}
|
|
{"current_steps": 21965, "total_steps": 78105, "loss": 0.3733, "lr": 4.516338134530989e-06, "epoch": 1.406119966711478, "percentage": 28.12, "elapsed_time": "0:57:32", "remaining_time": "2:27:05", "throughput": 20014.93, "total_tokens": 69110976}
|
|
{"current_steps": 21970, "total_steps": 78105, "loss": 0.3231, "lr": 4.516007816744914e-06, "epoch": 1.406440048652455, "percentage": 28.13, "elapsed_time": "0:57:33", "remaining_time": "2:27:04", "throughput": 20015.5, "total_tokens": 69126080}
|
|
{"current_steps": 21975, "total_steps": 78105, "loss": 0.2929, "lr": 4.515677398289909e-06, "epoch": 1.4067601305934319, "percentage": 28.14, "elapsed_time": "0:57:34", "remaining_time": "2:27:03", "throughput": 20016.32, "total_tokens": 69143552}
|
|
{"current_steps": 21980, "total_steps": 78105, "loss": 0.2825, "lr": 4.515346879182475e-06, "epoch": 1.4070802125344088, "percentage": 28.14, "elapsed_time": "0:57:35", "remaining_time": "2:27:02", "throughput": 20016.96, "total_tokens": 69159424}
|
|
{"current_steps": 21985, "total_steps": 78105, "loss": 0.348, "lr": 4.515016259439115e-06, "epoch": 1.4074002944753856, "percentage": 28.15, "elapsed_time": "0:57:35", "remaining_time": "2:27:01", "throughput": 20017.61, "total_tokens": 69175744}
|
|
{"current_steps": 21990, "total_steps": 78105, "loss": 0.376, "lr": 4.514685539076339e-06, "epoch": 1.4077203764163626, "percentage": 28.15, "elapsed_time": "0:57:36", "remaining_time": "2:27:00", "throughput": 20018.19, "total_tokens": 69190784}
|
|
{"current_steps": 21995, "total_steps": 78105, "loss": 0.3178, "lr": 4.514354718110662e-06, "epoch": 1.4080404583573394, "percentage": 28.16, "elapsed_time": "0:57:37", "remaining_time": "2:26:59", "throughput": 20018.71, "total_tokens": 69205888}
|
|
{"current_steps": 22000, "total_steps": 78105, "loss": 0.4727, "lr": 4.514023796558601e-06, "epoch": 1.4083605402983164, "percentage": 28.17, "elapsed_time": "0:57:37", "remaining_time": "2:26:57", "throughput": 20019.21, "total_tokens": 69220352}
|
|
{"current_steps": 22005, "total_steps": 78105, "loss": 0.2668, "lr": 4.5136927744366835e-06, "epoch": 1.4086806222392934, "percentage": 28.17, "elapsed_time": "0:57:38", "remaining_time": "2:26:56", "throughput": 20019.94, "total_tokens": 69236992}
|
|
{"current_steps": 22010, "total_steps": 78105, "loss": 0.2625, "lr": 4.5133616517614375e-06, "epoch": 1.4090007041802701, "percentage": 28.18, "elapsed_time": "0:57:39", "remaining_time": "2:26:55", "throughput": 20020.65, "total_tokens": 69253376}
|
|
{"current_steps": 22015, "total_steps": 78105, "loss": 0.3506, "lr": 4.513030428549398e-06, "epoch": 1.4093207861212471, "percentage": 28.19, "elapsed_time": "0:57:39", "remaining_time": "2:26:54", "throughput": 20021.18, "total_tokens": 69268480}
|
|
{"current_steps": 22020, "total_steps": 78105, "loss": 0.3796, "lr": 4.512699104817104e-06, "epoch": 1.4096408680622239, "percentage": 28.19, "elapsed_time": "0:57:40", "remaining_time": "2:26:53", "throughput": 20021.73, "total_tokens": 69283456}
|
|
{"current_steps": 22025, "total_steps": 78105, "loss": 0.5586, "lr": 4.512367680581101e-06, "epoch": 1.4099609500032009, "percentage": 28.2, "elapsed_time": "0:57:41", "remaining_time": "2:26:52", "throughput": 20022.28, "total_tokens": 69298432}
|
|
{"current_steps": 22030, "total_steps": 78105, "loss": 0.4369, "lr": 4.512036155857939e-06, "epoch": 1.4102810319441776, "percentage": 28.21, "elapsed_time": "0:57:41", "remaining_time": "2:26:51", "throughput": 20022.77, "total_tokens": 69312960}
|
|
{"current_steps": 22035, "total_steps": 78105, "loss": 0.248, "lr": 4.511704530664169e-06, "epoch": 1.4106011138851546, "percentage": 28.21, "elapsed_time": "0:57:42", "remaining_time": "2:26:50", "throughput": 20023.27, "total_tokens": 69327488}
|
|
{"current_steps": 22040, "total_steps": 78105, "loss": 0.227, "lr": 4.511372805016355e-06, "epoch": 1.4109211958261314, "percentage": 28.22, "elapsed_time": "0:57:43", "remaining_time": "2:26:49", "throughput": 20023.85, "total_tokens": 69342912}
|
|
{"current_steps": 22045, "total_steps": 78105, "loss": 0.2542, "lr": 4.51104097893106e-06, "epoch": 1.4112412777671084, "percentage": 28.22, "elapsed_time": "0:57:43", "remaining_time": "2:26:48", "throughput": 20024.44, "total_tokens": 69358592}
|
|
{"current_steps": 22050, "total_steps": 78105, "loss": 0.336, "lr": 4.510709052424854e-06, "epoch": 1.4115613597080854, "percentage": 28.23, "elapsed_time": "0:57:44", "remaining_time": "2:26:46", "throughput": 20025.0, "total_tokens": 69373632}
|
|
{"current_steps": 22055, "total_steps": 78105, "loss": 0.2679, "lr": 4.510377025514311e-06, "epoch": 1.4118814416490622, "percentage": 28.24, "elapsed_time": "0:57:45", "remaining_time": "2:26:46", "throughput": 20025.73, "total_tokens": 69390720}
|
|
{"current_steps": 22060, "total_steps": 78105, "loss": 0.3682, "lr": 4.51004489821601e-06, "epoch": 1.412201523590039, "percentage": 28.24, "elapsed_time": "0:57:45", "remaining_time": "2:26:45", "throughput": 20026.36, "total_tokens": 69406528}
|
|
{"current_steps": 22065, "total_steps": 78105, "loss": 0.4413, "lr": 4.509712670546538e-06, "epoch": 1.412521605531016, "percentage": 28.25, "elapsed_time": "0:57:46", "remaining_time": "2:26:43", "throughput": 20027.08, "total_tokens": 69422976}
|
|
{"current_steps": 22070, "total_steps": 78105, "loss": 0.2929, "lr": 4.509380342522483e-06, "epoch": 1.412841687471993, "percentage": 28.26, "elapsed_time": "0:57:47", "remaining_time": "2:26:43", "throughput": 20027.9, "total_tokens": 69440448}
|
|
{"current_steps": 22075, "total_steps": 78105, "loss": 0.4309, "lr": 4.50904791416044e-06, "epoch": 1.4131617694129697, "percentage": 28.26, "elapsed_time": "0:57:47", "remaining_time": "2:26:41", "throughput": 20028.48, "total_tokens": 69455744}
|
|
{"current_steps": 22080, "total_steps": 78105, "loss": 0.2869, "lr": 4.508715385477009e-06, "epoch": 1.4134818513539467, "percentage": 28.27, "elapsed_time": "0:57:48", "remaining_time": "2:26:40", "throughput": 20029.07, "total_tokens": 69471168}
|
|
{"current_steps": 22085, "total_steps": 78105, "loss": 0.4174, "lr": 4.508382756488795e-06, "epoch": 1.4138019332949234, "percentage": 28.28, "elapsed_time": "0:57:49", "remaining_time": "2:26:39", "throughput": 20029.72, "total_tokens": 69486976}
|
|
{"current_steps": 22090, "total_steps": 78105, "loss": 0.4435, "lr": 4.508050027212407e-06, "epoch": 1.4141220152359004, "percentage": 28.28, "elapsed_time": "0:57:49", "remaining_time": "2:26:38", "throughput": 20030.43, "total_tokens": 69503616}
|
|
{"current_steps": 22095, "total_steps": 78105, "loss": 0.2814, "lr": 4.507717197664461e-06, "epoch": 1.4144420971768774, "percentage": 28.29, "elapsed_time": "0:57:50", "remaining_time": "2:26:37", "throughput": 20031.08, "total_tokens": 69519744}
|
|
{"current_steps": 22100, "total_steps": 78105, "loss": 0.3086, "lr": 4.507384267861576e-06, "epoch": 1.4147621791178542, "percentage": 28.3, "elapsed_time": "0:57:51", "remaining_time": "2:26:36", "throughput": 20031.81, "total_tokens": 69536256}
|
|
{"current_steps": 22105, "total_steps": 78105, "loss": 0.4418, "lr": 4.507051237820375e-06, "epoch": 1.415082261058831, "percentage": 28.3, "elapsed_time": "0:57:51", "remaining_time": "2:26:35", "throughput": 20032.49, "total_tokens": 69552384}
|
|
{"current_steps": 22110, "total_steps": 78105, "loss": 0.2333, "lr": 4.506718107557491e-06, "epoch": 1.415402342999808, "percentage": 28.31, "elapsed_time": "0:57:52", "remaining_time": "2:26:34", "throughput": 20033.12, "total_tokens": 69568064}
|
|
{"current_steps": 22115, "total_steps": 78105, "loss": 0.4221, "lr": 4.506384877089557e-06, "epoch": 1.415722424940785, "percentage": 28.31, "elapsed_time": "0:57:53", "remaining_time": "2:26:33", "throughput": 20033.66, "total_tokens": 69583040}
|
|
{"current_steps": 22120, "total_steps": 78105, "loss": 0.5593, "lr": 4.506051546433213e-06, "epoch": 1.4160425068817617, "percentage": 28.32, "elapsed_time": "0:57:53", "remaining_time": "2:26:32", "throughput": 20034.17, "total_tokens": 69598016}
|
|
{"current_steps": 22125, "total_steps": 78105, "loss": 0.2703, "lr": 4.505718115605103e-06, "epoch": 1.4163625888227387, "percentage": 28.33, "elapsed_time": "0:57:54", "remaining_time": "2:26:31", "throughput": 20034.65, "total_tokens": 69612736}
|
|
{"current_steps": 22130, "total_steps": 78105, "loss": 0.2639, "lr": 4.505384584621879e-06, "epoch": 1.4166826707637155, "percentage": 28.33, "elapsed_time": "0:57:55", "remaining_time": "2:26:30", "throughput": 20035.27, "total_tokens": 69628544}
|
|
{"current_steps": 22135, "total_steps": 78105, "loss": 0.312, "lr": 4.505050953500194e-06, "epoch": 1.4170027527046924, "percentage": 28.34, "elapsed_time": "0:57:55", "remaining_time": "2:26:29", "throughput": 20035.86, "total_tokens": 69643904}
|
|
{"current_steps": 22140, "total_steps": 78105, "loss": 0.3547, "lr": 4.50471722225671e-06, "epoch": 1.4173228346456692, "percentage": 28.35, "elapsed_time": "0:57:56", "remaining_time": "2:26:28", "throughput": 20036.43, "total_tokens": 69659200}
|
|
{"current_steps": 22145, "total_steps": 78105, "loss": 0.3342, "lr": 4.504383390908088e-06, "epoch": 1.4176429165866462, "percentage": 28.35, "elapsed_time": "0:57:57", "remaining_time": "2:26:27", "throughput": 20037.26, "total_tokens": 69676800}
|
|
{"current_steps": 22150, "total_steps": 78105, "loss": 0.3124, "lr": 4.504049459471e-06, "epoch": 1.417962998527623, "percentage": 28.36, "elapsed_time": "0:57:58", "remaining_time": "2:26:26", "throughput": 20037.83, "total_tokens": 69692544}
|
|
{"current_steps": 22155, "total_steps": 78105, "loss": 0.2454, "lr": 4.503715427962121e-06, "epoch": 1.4182830804686, "percentage": 28.37, "elapsed_time": "0:57:58", "remaining_time": "2:26:25", "throughput": 20038.46, "total_tokens": 69708096}
|
|
{"current_steps": 22160, "total_steps": 78105, "loss": 0.361, "lr": 4.503381296398131e-06, "epoch": 1.418603162409577, "percentage": 28.37, "elapsed_time": "0:57:59", "remaining_time": "2:26:23", "throughput": 20038.97, "total_tokens": 69722944}
|
|
{"current_steps": 22165, "total_steps": 78105, "loss": 0.2242, "lr": 4.503047064795713e-06, "epoch": 1.4189232443505537, "percentage": 28.38, "elapsed_time": "0:58:00", "remaining_time": "2:26:22", "throughput": 20039.53, "total_tokens": 69738048}
|
|
{"current_steps": 22170, "total_steps": 78105, "loss": 0.4147, "lr": 4.50271273317156e-06, "epoch": 1.4192433262915305, "percentage": 28.38, "elapsed_time": "0:58:00", "remaining_time": "2:26:21", "throughput": 20040.16, "total_tokens": 69753728}
|
|
{"current_steps": 22175, "total_steps": 78105, "loss": 0.3756, "lr": 4.502378301542364e-06, "epoch": 1.4195634082325075, "percentage": 28.39, "elapsed_time": "0:58:01", "remaining_time": "2:26:20", "throughput": 20040.85, "total_tokens": 69769728}
|
|
{"current_steps": 22180, "total_steps": 78105, "loss": 0.3585, "lr": 4.502043769924825e-06, "epoch": 1.4198834901734845, "percentage": 28.4, "elapsed_time": "0:58:02", "remaining_time": "2:26:19", "throughput": 20041.34, "total_tokens": 69784448}
|
|
{"current_steps": 22185, "total_steps": 78105, "loss": 0.4064, "lr": 4.501709138335649e-06, "epoch": 1.4202035721144612, "percentage": 28.4, "elapsed_time": "0:58:02", "remaining_time": "2:26:18", "throughput": 20042.01, "total_tokens": 69800704}
|
|
{"current_steps": 22190, "total_steps": 78105, "loss": 0.4838, "lr": 4.501374406791546e-06, "epoch": 1.4205236540554382, "percentage": 28.41, "elapsed_time": "0:58:03", "remaining_time": "2:26:17", "throughput": 20042.68, "total_tokens": 69817152}
|
|
{"current_steps": 22195, "total_steps": 78105, "loss": 0.3046, "lr": 4.501039575309229e-06, "epoch": 1.420843735996415, "percentage": 28.42, "elapsed_time": "0:58:04", "remaining_time": "2:26:16", "throughput": 20043.32, "total_tokens": 69833088}
|
|
{"current_steps": 22200, "total_steps": 78105, "loss": 0.3752, "lr": 4.500704643905418e-06, "epoch": 1.421163817937392, "percentage": 28.42, "elapsed_time": "0:58:04", "remaining_time": "2:26:15", "throughput": 20043.82, "total_tokens": 69847936}
|
|
{"current_steps": 22205, "total_steps": 78105, "loss": 0.4038, "lr": 4.500369612596839e-06, "epoch": 1.421483899878369, "percentage": 28.43, "elapsed_time": "0:58:05", "remaining_time": "2:26:14", "throughput": 20044.64, "total_tokens": 69865408}
|
|
{"current_steps": 22210, "total_steps": 78105, "loss": 0.3607, "lr": 4.5000344814002215e-06, "epoch": 1.4218039818193458, "percentage": 28.44, "elapsed_time": "0:58:06", "remaining_time": "2:26:13", "throughput": 20045.27, "total_tokens": 69881216}
|
|
{"current_steps": 22215, "total_steps": 78105, "loss": 0.4551, "lr": 4.499699250332299e-06, "epoch": 1.4221240637603225, "percentage": 28.44, "elapsed_time": "0:58:06", "remaining_time": "2:26:12", "throughput": 20046.21, "total_tokens": 69899520}
|
|
{"current_steps": 22220, "total_steps": 78105, "loss": 0.3379, "lr": 4.499363919409813e-06, "epoch": 1.4224441457012995, "percentage": 28.45, "elapsed_time": "0:58:07", "remaining_time": "2:26:11", "throughput": 20046.89, "total_tokens": 69915904}
|
|
{"current_steps": 22225, "total_steps": 78105, "loss": 0.2545, "lr": 4.499028488649506e-06, "epoch": 1.4227642276422765, "percentage": 28.46, "elapsed_time": "0:58:08", "remaining_time": "2:26:10", "throughput": 20047.6, "total_tokens": 69932416}
|
|
{"current_steps": 22230, "total_steps": 78105, "loss": 0.3255, "lr": 4.49869295806813e-06, "epoch": 1.4230843095832533, "percentage": 28.46, "elapsed_time": "0:58:08", "remaining_time": "2:26:09", "throughput": 20048.11, "total_tokens": 69947328}
|
|
{"current_steps": 22235, "total_steps": 78105, "loss": 0.4659, "lr": 4.498357327682437e-06, "epoch": 1.4234043915242303, "percentage": 28.47, "elapsed_time": "0:58:09", "remaining_time": "2:26:08", "throughput": 20048.63, "total_tokens": 69962048}
|
|
{"current_steps": 22240, "total_steps": 78105, "loss": 0.346, "lr": 4.49802159750919e-06, "epoch": 1.423724473465207, "percentage": 28.47, "elapsed_time": "0:58:10", "remaining_time": "2:26:07", "throughput": 20049.09, "total_tokens": 69976320}
|
|
{"current_steps": 22245, "total_steps": 78105, "loss": 0.2972, "lr": 4.497685767565151e-06, "epoch": 1.424044555406184, "percentage": 28.48, "elapsed_time": "0:58:10", "remaining_time": "2:26:06", "throughput": 20049.67, "total_tokens": 69991744}
|
|
{"current_steps": 22250, "total_steps": 78105, "loss": 0.3883, "lr": 4.49734983786709e-06, "epoch": 1.424364637347161, "percentage": 28.49, "elapsed_time": "0:58:11", "remaining_time": "2:26:05", "throughput": 20050.24, "total_tokens": 70006912}
|
|
{"current_steps": 22255, "total_steps": 78105, "loss": 0.3055, "lr": 4.497013808431781e-06, "epoch": 1.4246847192881378, "percentage": 28.49, "elapsed_time": "0:58:12", "remaining_time": "2:26:04", "throughput": 20050.93, "total_tokens": 70023360}
|
|
{"current_steps": 22260, "total_steps": 78105, "loss": 0.272, "lr": 4.496677679276006e-06, "epoch": 1.4250048012291145, "percentage": 28.5, "elapsed_time": "0:58:12", "remaining_time": "2:26:02", "throughput": 20051.55, "total_tokens": 70039232}
|
|
{"current_steps": 22265, "total_steps": 78105, "loss": 0.3403, "lr": 4.496341450416548e-06, "epoch": 1.4253248831700915, "percentage": 28.51, "elapsed_time": "0:58:13", "remaining_time": "2:26:01", "throughput": 20052.2, "total_tokens": 70055552}
|
|
{"current_steps": 22270, "total_steps": 78105, "loss": 0.4385, "lr": 4.496005121870196e-06, "epoch": 1.4256449651110685, "percentage": 28.51, "elapsed_time": "0:58:14", "remaining_time": "2:26:00", "throughput": 20052.82, "total_tokens": 70071360}
|
|
{"current_steps": 22275, "total_steps": 78105, "loss": 0.3796, "lr": 4.495668693653745e-06, "epoch": 1.4259650470520453, "percentage": 28.52, "elapsed_time": "0:58:15", "remaining_time": "2:25:59", "throughput": 20053.36, "total_tokens": 70086720}
|
|
{"current_steps": 22280, "total_steps": 78105, "loss": 0.3569, "lr": 4.495332165783995e-06, "epoch": 1.4262851289930223, "percentage": 28.53, "elapsed_time": "0:58:15", "remaining_time": "2:25:58", "throughput": 20053.96, "total_tokens": 70102336}
|
|
{"current_steps": 22285, "total_steps": 78105, "loss": 0.4547, "lr": 4.494995538277749e-06, "epoch": 1.426605210933999, "percentage": 28.53, "elapsed_time": "0:58:16", "remaining_time": "2:25:57", "throughput": 20054.5, "total_tokens": 70117504}
|
|
{"current_steps": 22290, "total_steps": 78105, "loss": 0.258, "lr": 4.494658811151817e-06, "epoch": 1.426925292874976, "percentage": 28.54, "elapsed_time": "0:58:17", "remaining_time": "2:25:56", "throughput": 20055.05, "total_tokens": 70133120}
|
|
{"current_steps": 22295, "total_steps": 78105, "loss": 0.2478, "lr": 4.4943219844230155e-06, "epoch": 1.4272453748159528, "percentage": 28.54, "elapsed_time": "0:58:17", "remaining_time": "2:25:55", "throughput": 20055.69, "total_tokens": 70149120}
|
|
{"current_steps": 22300, "total_steps": 78105, "loss": 0.3174, "lr": 4.493985058108161e-06, "epoch": 1.4275654567569298, "percentage": 28.55, "elapsed_time": "0:58:18", "remaining_time": "2:25:54", "throughput": 20056.34, "total_tokens": 70165056}
|
|
{"current_steps": 22305, "total_steps": 78105, "loss": 0.2791, "lr": 4.493648032224079e-06, "epoch": 1.4278855386979066, "percentage": 28.56, "elapsed_time": "0:58:19", "remaining_time": "2:25:53", "throughput": 20056.86, "total_tokens": 70180032}
|
|
{"current_steps": 22310, "total_steps": 78105, "loss": 0.341, "lr": 4.493310906787599e-06, "epoch": 1.4282056206388836, "percentage": 28.56, "elapsed_time": "0:58:19", "remaining_time": "2:25:52", "throughput": 20057.45, "total_tokens": 70195456}
|
|
{"current_steps": 22315, "total_steps": 78105, "loss": 0.2998, "lr": 4.492973681815555e-06, "epoch": 1.4285257025798606, "percentage": 28.57, "elapsed_time": "0:58:20", "remaining_time": "2:25:51", "throughput": 20058.07, "total_tokens": 70211264}
|
|
{"current_steps": 22320, "total_steps": 78105, "loss": 0.3334, "lr": 4.492636357324787e-06, "epoch": 1.4288457845208373, "percentage": 28.58, "elapsed_time": "0:58:21", "remaining_time": "2:25:50", "throughput": 20058.61, "total_tokens": 70226176}
|
|
{"current_steps": 22325, "total_steps": 78105, "loss": 0.2868, "lr": 4.492298933332138e-06, "epoch": 1.429165866461814, "percentage": 28.58, "elapsed_time": "0:58:21", "remaining_time": "2:25:49", "throughput": 20059.27, "total_tokens": 70242432}
|
|
{"current_steps": 22330, "total_steps": 78105, "loss": 0.3106, "lr": 4.4919614098544585e-06, "epoch": 1.429485948402791, "percentage": 28.59, "elapsed_time": "0:58:22", "remaining_time": "2:25:48", "throughput": 20059.92, "total_tokens": 70258560}
|
|
{"current_steps": 22335, "total_steps": 78105, "loss": 0.333, "lr": 4.491623786908601e-06, "epoch": 1.429806030343768, "percentage": 28.6, "elapsed_time": "0:58:23", "remaining_time": "2:25:47", "throughput": 20060.63, "total_tokens": 70274944}
|
|
{"current_steps": 22340, "total_steps": 78105, "loss": 0.3755, "lr": 4.491286064511426e-06, "epoch": 1.4301261122847448, "percentage": 28.6, "elapsed_time": "0:58:23", "remaining_time": "2:25:46", "throughput": 20061.21, "total_tokens": 70290560}
|
|
{"current_steps": 22345, "total_steps": 78105, "loss": 0.283, "lr": 4.490948242679796e-06, "epoch": 1.4304461942257218, "percentage": 28.61, "elapsed_time": "0:58:24", "remaining_time": "2:25:45", "throughput": 20061.89, "total_tokens": 70306880}
|
|
{"current_steps": 22350, "total_steps": 78105, "loss": 0.2891, "lr": 4.490610321430583e-06, "epoch": 1.4307662761666986, "percentage": 28.62, "elapsed_time": "0:58:25", "remaining_time": "2:25:44", "throughput": 20062.52, "total_tokens": 70322496}
|
|
{"current_steps": 22355, "total_steps": 78105, "loss": 0.2829, "lr": 4.4902723007806585e-06, "epoch": 1.4310863581076756, "percentage": 28.62, "elapsed_time": "0:58:25", "remaining_time": "2:25:43", "throughput": 20063.12, "total_tokens": 70338496}
|
|
{"current_steps": 22360, "total_steps": 78105, "loss": 0.2031, "lr": 4.4899341807469025e-06, "epoch": 1.4314064400486526, "percentage": 28.63, "elapsed_time": "0:58:26", "remaining_time": "2:25:42", "throughput": 20063.77, "total_tokens": 70354176}
|
|
{"current_steps": 22365, "total_steps": 78105, "loss": 0.4652, "lr": 4.489595961346198e-06, "epoch": 1.4317265219896294, "percentage": 28.63, "elapsed_time": "0:58:27", "remaining_time": "2:25:40", "throughput": 20064.46, "total_tokens": 70370304}
|
|
{"current_steps": 22370, "total_steps": 78105, "loss": 0.257, "lr": 4.489257642595436e-06, "epoch": 1.4320466039306061, "percentage": 28.64, "elapsed_time": "0:58:27", "remaining_time": "2:25:39", "throughput": 20065.06, "total_tokens": 70385856}
|
|
{"current_steps": 22375, "total_steps": 78105, "loss": 0.299, "lr": 4.488919224511508e-06, "epoch": 1.4323666858715831, "percentage": 28.65, "elapsed_time": "0:58:28", "remaining_time": "2:25:38", "throughput": 20065.64, "total_tokens": 70401344}
|
|
{"current_steps": 22380, "total_steps": 78105, "loss": 0.3748, "lr": 4.488580707111314e-06, "epoch": 1.43268676781256, "percentage": 28.65, "elapsed_time": "0:58:29", "remaining_time": "2:25:37", "throughput": 20066.19, "total_tokens": 70416384}
|
|
{"current_steps": 22385, "total_steps": 78105, "loss": 0.3671, "lr": 4.488242090411758e-06, "epoch": 1.4330068497535369, "percentage": 28.66, "elapsed_time": "0:58:29", "remaining_time": "2:25:36", "throughput": 20066.76, "total_tokens": 70431552}
|
|
{"current_steps": 22390, "total_steps": 78105, "loss": 0.3749, "lr": 4.487903374429748e-06, "epoch": 1.4333269316945139, "percentage": 28.67, "elapsed_time": "0:58:30", "remaining_time": "2:25:35", "throughput": 20067.49, "total_tokens": 70448192}
|
|
{"current_steps": 22395, "total_steps": 78105, "loss": 0.3445, "lr": 4.487564559182197e-06, "epoch": 1.4336470136354906, "percentage": 28.67, "elapsed_time": "0:58:31", "remaining_time": "2:25:34", "throughput": 20068.14, "total_tokens": 70464128}
|
|
{"current_steps": 22400, "total_steps": 78105, "loss": 0.3425, "lr": 4.487225644686027e-06, "epoch": 1.4339670955764676, "percentage": 28.68, "elapsed_time": "0:58:31", "remaining_time": "2:25:33", "throughput": 20068.8, "total_tokens": 70480192}
|
|
{"current_steps": 22405, "total_steps": 78105, "loss": 0.3538, "lr": 4.486886630958157e-06, "epoch": 1.4342871775174444, "percentage": 28.69, "elapsed_time": "0:58:32", "remaining_time": "2:25:32", "throughput": 20069.35, "total_tokens": 70495360}
|
|
{"current_steps": 22410, "total_steps": 78105, "loss": 0.3122, "lr": 4.486547518015519e-06, "epoch": 1.4346072594584214, "percentage": 28.69, "elapsed_time": "0:58:33", "remaining_time": "2:25:31", "throughput": 20069.95, "total_tokens": 70511232}
|
|
{"current_steps": 22415, "total_steps": 78105, "loss": 0.2743, "lr": 4.486208305875046e-06, "epoch": 1.4349273413993981, "percentage": 28.7, "elapsed_time": "0:58:33", "remaining_time": "2:25:30", "throughput": 20070.65, "total_tokens": 70527488}
|
|
{"current_steps": 22420, "total_steps": 78105, "loss": 0.3068, "lr": 4.4858689945536755e-06, "epoch": 1.4352474233403751, "percentage": 28.7, "elapsed_time": "0:58:34", "remaining_time": "2:25:29", "throughput": 20071.23, "total_tokens": 70542656}
|
|
{"current_steps": 22425, "total_steps": 78105, "loss": 0.2907, "lr": 4.4855295840683516e-06, "epoch": 1.4355675052813521, "percentage": 28.71, "elapsed_time": "0:58:35", "remaining_time": "2:25:28", "throughput": 20071.75, "total_tokens": 70557568}
|
|
{"current_steps": 22430, "total_steps": 78105, "loss": 0.2776, "lr": 4.485190074436022e-06, "epoch": 1.435887587222329, "percentage": 28.72, "elapsed_time": "0:58:35", "remaining_time": "2:25:27", "throughput": 20072.53, "total_tokens": 70574592}
|
|
{"current_steps": 22435, "total_steps": 78105, "loss": 0.2727, "lr": 4.484850465673641e-06, "epoch": 1.4362076691633059, "percentage": 28.72, "elapsed_time": "0:58:36", "remaining_time": "2:25:26", "throughput": 20073.2, "total_tokens": 70590784}
|
|
{"current_steps": 22440, "total_steps": 78105, "loss": 0.2752, "lr": 4.4845107577981676e-06, "epoch": 1.4365277511042827, "percentage": 28.73, "elapsed_time": "0:58:37", "remaining_time": "2:25:25", "throughput": 20073.63, "total_tokens": 70605248}
|
|
{"current_steps": 22445, "total_steps": 78105, "loss": 0.4739, "lr": 4.484170950826562e-06, "epoch": 1.4368478330452596, "percentage": 28.74, "elapsed_time": "0:58:38", "remaining_time": "2:25:24", "throughput": 20074.38, "total_tokens": 70622208}
|
|
{"current_steps": 22450, "total_steps": 78105, "loss": 0.3522, "lr": 4.483831044775795e-06, "epoch": 1.4371679149862364, "percentage": 28.74, "elapsed_time": "0:58:38", "remaining_time": "2:25:23", "throughput": 20075.18, "total_tokens": 70639680}
|
|
{"current_steps": 22455, "total_steps": 78105, "loss": 0.3858, "lr": 4.4834910396628394e-06, "epoch": 1.4374879969272134, "percentage": 28.75, "elapsed_time": "0:58:39", "remaining_time": "2:25:22", "throughput": 20075.8, "total_tokens": 70655296}
|
|
{"current_steps": 22460, "total_steps": 78105, "loss": 0.3428, "lr": 4.483150935504672e-06, "epoch": 1.4378080788681902, "percentage": 28.76, "elapsed_time": "0:58:40", "remaining_time": "2:25:21", "throughput": 20076.46, "total_tokens": 70671424}
|
|
{"current_steps": 22465, "total_steps": 78105, "loss": 0.3458, "lr": 4.482810732318278e-06, "epoch": 1.4381281608091672, "percentage": 28.76, "elapsed_time": "0:58:40", "remaining_time": "2:25:20", "throughput": 20077.02, "total_tokens": 70686528}
|
|
{"current_steps": 22470, "total_steps": 78105, "loss": 0.4523, "lr": 4.482470430120643e-06, "epoch": 1.4384482427501442, "percentage": 28.77, "elapsed_time": "0:58:41", "remaining_time": "2:25:19", "throughput": 20077.74, "total_tokens": 70703360}
|
|
{"current_steps": 22475, "total_steps": 78105, "loss": 0.4637, "lr": 4.482130028928761e-06, "epoch": 1.438768324691121, "percentage": 28.78, "elapsed_time": "0:58:42", "remaining_time": "2:25:17", "throughput": 20078.34, "total_tokens": 70718784}
|
|
{"current_steps": 22480, "total_steps": 78105, "loss": 0.3014, "lr": 4.481789528759631e-06, "epoch": 1.4390884066320977, "percentage": 28.78, "elapsed_time": "0:58:42", "remaining_time": "2:25:16", "throughput": 20078.98, "total_tokens": 70734976}
|
|
{"current_steps": 22485, "total_steps": 78105, "loss": 0.2595, "lr": 4.481448929630254e-06, "epoch": 1.4394084885730747, "percentage": 28.79, "elapsed_time": "0:58:43", "remaining_time": "2:25:16", "throughput": 20079.75, "total_tokens": 70752064}
|
|
{"current_steps": 22490, "total_steps": 78105, "loss": 0.3057, "lr": 4.481108231557639e-06, "epoch": 1.4397285705140517, "percentage": 28.79, "elapsed_time": "0:58:44", "remaining_time": "2:25:14", "throughput": 20080.26, "total_tokens": 70767040}
|
|
{"current_steps": 22495, "total_steps": 78105, "loss": 0.3347, "lr": 4.480767434558797e-06, "epoch": 1.4400486524550284, "percentage": 28.8, "elapsed_time": "0:58:44", "remaining_time": "2:25:13", "throughput": 20080.8, "total_tokens": 70782272}
|
|
{"current_steps": 22500, "total_steps": 78105, "loss": 0.3547, "lr": 4.480426538650747e-06, "epoch": 1.4403687343960054, "percentage": 28.81, "elapsed_time": "0:58:45", "remaining_time": "2:25:12", "throughput": 20081.4, "total_tokens": 70797760}
|
|
{"current_steps": 22505, "total_steps": 78105, "loss": 0.1942, "lr": 4.480085543850512e-06, "epoch": 1.4406888163369822, "percentage": 28.81, "elapsed_time": "0:58:46", "remaining_time": "2:25:11", "throughput": 20082.05, "total_tokens": 70813440}
|
|
{"current_steps": 22510, "total_steps": 78105, "loss": 0.3961, "lr": 4.479744450175119e-06, "epoch": 1.4410088982779592, "percentage": 28.82, "elapsed_time": "0:58:46", "remaining_time": "2:25:10", "throughput": 20082.64, "total_tokens": 70829248}
|
|
{"current_steps": 22515, "total_steps": 78105, "loss": 0.4574, "lr": 4.4794032576416005e-06, "epoch": 1.4413289802189362, "percentage": 28.83, "elapsed_time": "0:58:47", "remaining_time": "2:25:09", "throughput": 20083.22, "total_tokens": 70844800}
|
|
{"current_steps": 22520, "total_steps": 78105, "loss": 0.3219, "lr": 4.479061966266992e-06, "epoch": 1.441649062159913, "percentage": 28.83, "elapsed_time": "0:58:48", "remaining_time": "2:25:08", "throughput": 20083.89, "total_tokens": 70861056}
|
|
{"current_steps": 22525, "total_steps": 78105, "loss": 0.3847, "lr": 4.478720576068339e-06, "epoch": 1.4419691441008897, "percentage": 28.84, "elapsed_time": "0:58:48", "remaining_time": "2:25:07", "throughput": 20084.45, "total_tokens": 70876480}
|
|
{"current_steps": 22530, "total_steps": 78105, "loss": 0.2795, "lr": 4.478379087062687e-06, "epoch": 1.4422892260418667, "percentage": 28.85, "elapsed_time": "0:58:49", "remaining_time": "2:25:06", "throughput": 20085.0, "total_tokens": 70891840}
|
|
{"current_steps": 22535, "total_steps": 78105, "loss": 0.4677, "lr": 4.478037499267087e-06, "epoch": 1.4426093079828437, "percentage": 28.85, "elapsed_time": "0:58:50", "remaining_time": "2:25:05", "throughput": 20085.61, "total_tokens": 70907584}
|
|
{"current_steps": 22540, "total_steps": 78105, "loss": 0.3079, "lr": 4.477695812698599e-06, "epoch": 1.4429293899238205, "percentage": 28.86, "elapsed_time": "0:58:50", "remaining_time": "2:25:04", "throughput": 20086.1, "total_tokens": 70922496}
|
|
{"current_steps": 22545, "total_steps": 78105, "loss": 0.3876, "lr": 4.477354027374283e-06, "epoch": 1.4432494718647975, "percentage": 28.86, "elapsed_time": "0:58:51", "remaining_time": "2:25:03", "throughput": 20086.74, "total_tokens": 70938304}
|
|
{"current_steps": 22550, "total_steps": 78105, "loss": 0.423, "lr": 4.477012143311207e-06, "epoch": 1.4435695538057742, "percentage": 28.87, "elapsed_time": "0:58:52", "remaining_time": "2:25:02", "throughput": 20087.3, "total_tokens": 70953600}
|
|
{"current_steps": 22555, "total_steps": 78105, "loss": 0.3551, "lr": 4.476670160526442e-06, "epoch": 1.4438896357467512, "percentage": 28.88, "elapsed_time": "0:58:52", "remaining_time": "2:25:01", "throughput": 20088.0, "total_tokens": 70970240}
|
|
{"current_steps": 22560, "total_steps": 78105, "loss": 0.3449, "lr": 4.476328079037065e-06, "epoch": 1.444209717687728, "percentage": 28.88, "elapsed_time": "0:58:53", "remaining_time": "2:25:00", "throughput": 20088.44, "total_tokens": 70984512}
|
|
{"current_steps": 22565, "total_steps": 78105, "loss": 0.3704, "lr": 4.475985898860158e-06, "epoch": 1.444529799628705, "percentage": 28.89, "elapsed_time": "0:58:54", "remaining_time": "2:24:58", "throughput": 20088.93, "total_tokens": 70998976}
|
|
{"current_steps": 22570, "total_steps": 78105, "loss": 0.3395, "lr": 4.475643620012808e-06, "epoch": 1.4448498815696817, "percentage": 28.9, "elapsed_time": "0:58:54", "remaining_time": "2:24:57", "throughput": 20089.56, "total_tokens": 71014976}
|
|
{"current_steps": 22575, "total_steps": 78105, "loss": 0.3526, "lr": 4.475301242512107e-06, "epoch": 1.4451699635106587, "percentage": 28.9, "elapsed_time": "0:58:55", "remaining_time": "2:24:56", "throughput": 20090.14, "total_tokens": 71030464}
|
|
{"current_steps": 22580, "total_steps": 78105, "loss": 0.3768, "lr": 4.47495876637515e-06, "epoch": 1.4454900454516357, "percentage": 28.91, "elapsed_time": "0:58:56", "remaining_time": "2:24:55", "throughput": 20090.66, "total_tokens": 71045888}
|
|
{"current_steps": 22585, "total_steps": 78105, "loss": 0.4105, "lr": 4.474616191619041e-06, "epoch": 1.4458101273926125, "percentage": 28.92, "elapsed_time": "0:58:56", "remaining_time": "2:24:54", "throughput": 20091.21, "total_tokens": 71061248}
|
|
{"current_steps": 22590, "total_steps": 78105, "loss": 0.3056, "lr": 4.474273518260883e-06, "epoch": 1.4461302093335893, "percentage": 28.92, "elapsed_time": "0:58:57", "remaining_time": "2:24:53", "throughput": 20091.71, "total_tokens": 71076096}
|
|
{"current_steps": 22595, "total_steps": 78105, "loss": 0.2822, "lr": 4.47393074631779e-06, "epoch": 1.4464502912745663, "percentage": 28.93, "elapsed_time": "0:58:58", "remaining_time": "2:24:52", "throughput": 20092.29, "total_tokens": 71091776}
|
|
{"current_steps": 22600, "total_steps": 78105, "loss": 0.3365, "lr": 4.473587875806878e-06, "epoch": 1.4467703732155432, "percentage": 28.94, "elapsed_time": "0:58:58", "remaining_time": "2:24:51", "throughput": 20092.97, "total_tokens": 71108096}
|
|
{"current_steps": 22605, "total_steps": 78105, "loss": 0.3015, "lr": 4.473244906745265e-06, "epoch": 1.44709045515652, "percentage": 28.94, "elapsed_time": "0:58:59", "remaining_time": "2:24:50", "throughput": 20093.55, "total_tokens": 71123584}
|
|
{"current_steps": 22610, "total_steps": 78105, "loss": 0.3567, "lr": 4.472901839150082e-06, "epoch": 1.447410537097497, "percentage": 28.95, "elapsed_time": "0:59:00", "remaining_time": "2:24:49", "throughput": 20094.29, "total_tokens": 71140352}
|
|
{"current_steps": 22615, "total_steps": 78105, "loss": 0.4354, "lr": 4.472558673038457e-06, "epoch": 1.4477306190384738, "percentage": 28.95, "elapsed_time": "0:59:01", "remaining_time": "2:24:48", "throughput": 20095.02, "total_tokens": 71157376}
|
|
{"current_steps": 22620, "total_steps": 78105, "loss": 0.3941, "lr": 4.472215408427526e-06, "epoch": 1.4480507009794508, "percentage": 28.96, "elapsed_time": "0:59:01", "remaining_time": "2:24:47", "throughput": 20095.6, "total_tokens": 71173120}
|
|
{"current_steps": 22625, "total_steps": 78105, "loss": 0.4154, "lr": 4.471872045334431e-06, "epoch": 1.4483707829204278, "percentage": 28.97, "elapsed_time": "0:59:02", "remaining_time": "2:24:46", "throughput": 20096.28, "total_tokens": 71189632}
|
|
{"current_steps": 22630, "total_steps": 78105, "loss": 0.4497, "lr": 4.471528583776317e-06, "epoch": 1.4486908648614045, "percentage": 28.97, "elapsed_time": "0:59:03", "remaining_time": "2:24:45", "throughput": 20096.92, "total_tokens": 71205952}
|
|
{"current_steps": 22635, "total_steps": 78105, "loss": 0.5939, "lr": 4.471185023770335e-06, "epoch": 1.4490109468023813, "percentage": 28.98, "elapsed_time": "0:59:03", "remaining_time": "2:24:44", "throughput": 20097.51, "total_tokens": 71221696}
|
|
{"current_steps": 22640, "total_steps": 78105, "loss": 0.3606, "lr": 4.47084136533364e-06, "epoch": 1.4493310287433583, "percentage": 28.99, "elapsed_time": "0:59:04", "remaining_time": "2:24:43", "throughput": 20098.02, "total_tokens": 71236352}
|
|
{"current_steps": 22645, "total_steps": 78105, "loss": 0.3271, "lr": 4.470497608483393e-06, "epoch": 1.4496511106843353, "percentage": 28.99, "elapsed_time": "0:59:05", "remaining_time": "2:24:42", "throughput": 20098.51, "total_tokens": 71251136}
|
|
{"current_steps": 22650, "total_steps": 78105, "loss": 0.3462, "lr": 4.47015375323676e-06, "epoch": 1.449971192625312, "percentage": 29.0, "elapsed_time": "0:59:05", "remaining_time": "2:24:41", "throughput": 20099.13, "total_tokens": 71267008}
|
|
{"current_steps": 22655, "total_steps": 78105, "loss": 0.5195, "lr": 4.46980979961091e-06, "epoch": 1.450291274566289, "percentage": 29.01, "elapsed_time": "0:59:06", "remaining_time": "2:24:40", "throughput": 20099.59, "total_tokens": 71281536}
|
|
{"current_steps": 22660, "total_steps": 78105, "loss": 0.4385, "lr": 4.4694657476230195e-06, "epoch": 1.4506113565072658, "percentage": 29.01, "elapsed_time": "0:59:07", "remaining_time": "2:24:39", "throughput": 20100.17, "total_tokens": 71296768}
|
|
{"current_steps": 22665, "total_steps": 78105, "loss": 0.346, "lr": 4.469121597290268e-06, "epoch": 1.4509314384482428, "percentage": 29.02, "elapsed_time": "0:59:07", "remaining_time": "2:24:38", "throughput": 20100.83, "total_tokens": 71313216}
|
|
{"current_steps": 22670, "total_steps": 78105, "loss": 0.3773, "lr": 4.46877734862984e-06, "epoch": 1.4512515203892196, "percentage": 29.03, "elapsed_time": "0:59:08", "remaining_time": "2:24:37", "throughput": 20101.39, "total_tokens": 71328640}
|
|
{"current_steps": 22675, "total_steps": 78105, "loss": 0.4061, "lr": 4.468433001658927e-06, "epoch": 1.4515716023301966, "percentage": 29.03, "elapsed_time": "0:59:09", "remaining_time": "2:24:35", "throughput": 20101.95, "total_tokens": 71344000}
|
|
{"current_steps": 22680, "total_steps": 78105, "loss": 0.2994, "lr": 4.468088556394723e-06, "epoch": 1.4518916842711733, "percentage": 29.04, "elapsed_time": "0:59:09", "remaining_time": "2:24:34", "throughput": 20102.64, "total_tokens": 71360768}
|
|
{"current_steps": 22685, "total_steps": 78105, "loss": 0.31, "lr": 4.467744012854428e-06, "epoch": 1.4522117662121503, "percentage": 29.04, "elapsed_time": "0:59:10", "remaining_time": "2:24:33", "throughput": 20103.23, "total_tokens": 71376576}
|
|
{"current_steps": 22690, "total_steps": 78105, "loss": 0.4804, "lr": 4.467399371055246e-06, "epoch": 1.4525318481531273, "percentage": 29.05, "elapsed_time": "0:59:11", "remaining_time": "2:24:32", "throughput": 20103.69, "total_tokens": 71391488}
|
|
{"current_steps": 22695, "total_steps": 78105, "loss": 0.4239, "lr": 4.467054631014388e-06, "epoch": 1.452851930094104, "percentage": 29.06, "elapsed_time": "0:59:11", "remaining_time": "2:24:31", "throughput": 20104.25, "total_tokens": 71406912}
|
|
{"current_steps": 22700, "total_steps": 78105, "loss": 0.3706, "lr": 4.4667097927490675e-06, "epoch": 1.453172012035081, "percentage": 29.06, "elapsed_time": "0:59:12", "remaining_time": "2:24:30", "throughput": 20104.8, "total_tokens": 71422144}
|
|
{"current_steps": 22705, "total_steps": 78105, "loss": 0.4073, "lr": 4.4663648562765034e-06, "epoch": 1.4534920939760578, "percentage": 29.07, "elapsed_time": "0:59:13", "remaining_time": "2:24:29", "throughput": 20105.42, "total_tokens": 71438080}
|
|
{"current_steps": 22710, "total_steps": 78105, "loss": 0.3904, "lr": 4.466019821613923e-06, "epoch": 1.4538121759170348, "percentage": 29.08, "elapsed_time": "0:59:13", "remaining_time": "2:24:28", "throughput": 20105.95, "total_tokens": 71453312}
|
|
{"current_steps": 22715, "total_steps": 78105, "loss": 0.302, "lr": 4.46567468877855e-06, "epoch": 1.4541322578580116, "percentage": 29.08, "elapsed_time": "0:59:14", "remaining_time": "2:24:27", "throughput": 20106.43, "total_tokens": 71467776}
|
|
{"current_steps": 22720, "total_steps": 78105, "loss": 0.2381, "lr": 4.4653294577876245e-06, "epoch": 1.4544523397989886, "percentage": 29.09, "elapsed_time": "0:59:15", "remaining_time": "2:24:26", "throughput": 20106.89, "total_tokens": 71482496}
|
|
{"current_steps": 22725, "total_steps": 78105, "loss": 0.3746, "lr": 4.464984128658382e-06, "epoch": 1.4547724217399653, "percentage": 29.1, "elapsed_time": "0:59:15", "remaining_time": "2:24:25", "throughput": 20107.35, "total_tokens": 71496960}
|
|
{"current_steps": 22730, "total_steps": 78105, "loss": 0.4394, "lr": 4.4646387014080675e-06, "epoch": 1.4550925036809423, "percentage": 29.1, "elapsed_time": "0:59:16", "remaining_time": "2:24:24", "throughput": 20107.93, "total_tokens": 71512768}
|
|
{"current_steps": 22735, "total_steps": 78105, "loss": 0.3005, "lr": 4.46429317605393e-06, "epoch": 1.4554125856219193, "percentage": 29.11, "elapsed_time": "0:59:17", "remaining_time": "2:24:23", "throughput": 20108.63, "total_tokens": 71529344}
|
|
{"current_steps": 22740, "total_steps": 78105, "loss": 0.3661, "lr": 4.463947552613222e-06, "epoch": 1.455732667562896, "percentage": 29.11, "elapsed_time": "0:59:17", "remaining_time": "2:24:22", "throughput": 20109.16, "total_tokens": 71544576}
|
|
{"current_steps": 22745, "total_steps": 78105, "loss": 0.2361, "lr": 4.463601831103204e-06, "epoch": 1.4560527495038729, "percentage": 29.12, "elapsed_time": "0:59:18", "remaining_time": "2:24:21", "throughput": 20109.67, "total_tokens": 71559488}
|
|
{"current_steps": 22750, "total_steps": 78105, "loss": 0.3353, "lr": 4.463256011541138e-06, "epoch": 1.4563728314448499, "percentage": 29.13, "elapsed_time": "0:59:19", "remaining_time": "2:24:20", "throughput": 20110.27, "total_tokens": 71575424}
|
|
{"current_steps": 22755, "total_steps": 78105, "loss": 0.2346, "lr": 4.462910093944293e-06, "epoch": 1.4566929133858268, "percentage": 29.13, "elapsed_time": "0:59:19", "remaining_time": "2:24:19", "throughput": 20110.82, "total_tokens": 71590720}
|
|
{"current_steps": 22760, "total_steps": 78105, "loss": 0.6359, "lr": 4.462564078329942e-06, "epoch": 1.4570129953268036, "percentage": 29.14, "elapsed_time": "0:59:20", "remaining_time": "2:24:17", "throughput": 20111.34, "total_tokens": 71605696}
|
|
{"current_steps": 22765, "total_steps": 78105, "loss": 0.4651, "lr": 4.462217964715364e-06, "epoch": 1.4573330772677806, "percentage": 29.15, "elapsed_time": "0:59:21", "remaining_time": "2:24:16", "throughput": 20111.88, "total_tokens": 71620672}
|
|
{"current_steps": 22770, "total_steps": 78105, "loss": 0.3109, "lr": 4.4618717531178425e-06, "epoch": 1.4576531592087574, "percentage": 29.15, "elapsed_time": "0:59:21", "remaining_time": "2:24:15", "throughput": 20112.57, "total_tokens": 71637184}
|
|
{"current_steps": 22775, "total_steps": 78105, "loss": 0.2394, "lr": 4.461525443554663e-06, "epoch": 1.4579732411497344, "percentage": 29.16, "elapsed_time": "0:59:22", "remaining_time": "2:24:14", "throughput": 20113.11, "total_tokens": 71652480}
|
|
{"current_steps": 22780, "total_steps": 78105, "loss": 0.2355, "lr": 4.461179036043122e-06, "epoch": 1.4582933230907114, "percentage": 29.17, "elapsed_time": "0:59:23", "remaining_time": "2:24:13", "throughput": 20113.65, "total_tokens": 71668096}
|
|
{"current_steps": 22785, "total_steps": 78105, "loss": 0.4601, "lr": 4.460832530600514e-06, "epoch": 1.4586134050316881, "percentage": 29.17, "elapsed_time": "0:59:23", "remaining_time": "2:24:12", "throughput": 20114.08, "total_tokens": 71682560}
|
|
{"current_steps": 22790, "total_steps": 78105, "loss": 0.2997, "lr": 4.460485927244142e-06, "epoch": 1.458933486972665, "percentage": 29.18, "elapsed_time": "0:59:24", "remaining_time": "2:24:11", "throughput": 20114.6, "total_tokens": 71697920}
|
|
{"current_steps": 22795, "total_steps": 78105, "loss": 0.2865, "lr": 4.460139225991317e-06, "epoch": 1.4592535689136419, "percentage": 29.19, "elapsed_time": "0:59:25", "remaining_time": "2:24:10", "throughput": 20115.12, "total_tokens": 71713280}
|
|
{"current_steps": 22800, "total_steps": 78105, "loss": 0.3617, "lr": 4.4597924268593474e-06, "epoch": 1.4595736508546189, "percentage": 29.19, "elapsed_time": "0:59:25", "remaining_time": "2:24:09", "throughput": 20115.74, "total_tokens": 71729088}
|
|
{"current_steps": 22805, "total_steps": 78105, "loss": 0.3602, "lr": 4.459445529865553e-06, "epoch": 1.4598937327955956, "percentage": 29.2, "elapsed_time": "0:59:26", "remaining_time": "2:24:08", "throughput": 20116.34, "total_tokens": 71744896}
|
|
{"current_steps": 22810, "total_steps": 78105, "loss": 0.329, "lr": 4.4590985350272555e-06, "epoch": 1.4602138147365726, "percentage": 29.2, "elapsed_time": "0:59:27", "remaining_time": "2:24:07", "throughput": 20116.89, "total_tokens": 71759936}
|
|
{"current_steps": 22815, "total_steps": 78105, "loss": 0.354, "lr": 4.458751442361781e-06, "epoch": 1.4605338966775494, "percentage": 29.21, "elapsed_time": "0:59:27", "remaining_time": "2:24:06", "throughput": 20117.45, "total_tokens": 71775552}
|
|
{"current_steps": 22820, "total_steps": 78105, "loss": 0.2664, "lr": 4.458404251886462e-06, "epoch": 1.4608539786185264, "percentage": 29.22, "elapsed_time": "0:59:28", "remaining_time": "2:24:05", "throughput": 20118.09, "total_tokens": 71791936}
|
|
{"current_steps": 22825, "total_steps": 78105, "loss": 0.4964, "lr": 4.458056963618636e-06, "epoch": 1.4611740605595032, "percentage": 29.22, "elapsed_time": "0:59:29", "remaining_time": "2:24:04", "throughput": 20118.68, "total_tokens": 71807552}
|
|
{"current_steps": 22830, "total_steps": 78105, "loss": 0.3495, "lr": 4.4577095775756445e-06, "epoch": 1.4614941425004802, "percentage": 29.23, "elapsed_time": "0:59:29", "remaining_time": "2:24:03", "throughput": 20119.22, "total_tokens": 71823360}
|
|
{"current_steps": 22835, "total_steps": 78105, "loss": 0.2641, "lr": 4.457362093774834e-06, "epoch": 1.461814224441457, "percentage": 29.24, "elapsed_time": "0:59:30", "remaining_time": "2:24:02", "throughput": 20119.82, "total_tokens": 71839104}
|
|
{"current_steps": 22840, "total_steps": 78105, "loss": 0.2915, "lr": 4.457014512233557e-06, "epoch": 1.462134306382434, "percentage": 29.24, "elapsed_time": "0:59:31", "remaining_time": "2:24:01", "throughput": 20120.38, "total_tokens": 71854592}
|
|
{"current_steps": 22845, "total_steps": 78105, "loss": 0.2737, "lr": 4.456666832969167e-06, "epoch": 1.462454388323411, "percentage": 29.25, "elapsed_time": "0:59:31", "remaining_time": "2:24:00", "throughput": 20121.02, "total_tokens": 71870592}
|
|
{"current_steps": 22850, "total_steps": 78105, "loss": 0.4396, "lr": 4.456319055999027e-06, "epoch": 1.4627744702643877, "percentage": 29.26, "elapsed_time": "0:59:32", "remaining_time": "2:23:59", "throughput": 20121.93, "total_tokens": 71888832}
|
|
{"current_steps": 22855, "total_steps": 78105, "loss": 0.3411, "lr": 4.455971181340506e-06, "epoch": 1.4630945522053644, "percentage": 29.26, "elapsed_time": "0:59:33", "remaining_time": "2:23:58", "throughput": 20122.44, "total_tokens": 71903808}
|
|
{"current_steps": 22860, "total_steps": 78105, "loss": 0.4472, "lr": 4.455623209010971e-06, "epoch": 1.4634146341463414, "percentage": 29.27, "elapsed_time": "0:59:34", "remaining_time": "2:23:57", "throughput": 20123.3, "total_tokens": 71921792}
|
|
{"current_steps": 22865, "total_steps": 78105, "loss": 0.4584, "lr": 4.4552751390278e-06, "epoch": 1.4637347160873184, "percentage": 29.27, "elapsed_time": "0:59:34", "remaining_time": "2:23:56", "throughput": 20123.85, "total_tokens": 71937088}
|
|
{"current_steps": 22870, "total_steps": 78105, "loss": 0.2597, "lr": 4.454926971408372e-06, "epoch": 1.4640547980282952, "percentage": 29.28, "elapsed_time": "0:59:35", "remaining_time": "2:23:55", "throughput": 20124.4, "total_tokens": 71952256}
|
|
{"current_steps": 22875, "total_steps": 78105, "loss": 0.3663, "lr": 4.454578706170075e-06, "epoch": 1.4643748799692722, "percentage": 29.29, "elapsed_time": "0:59:36", "remaining_time": "2:23:54", "throughput": 20124.98, "total_tokens": 71967872}
|
|
{"current_steps": 22880, "total_steps": 78105, "loss": 0.231, "lr": 4.454230343330298e-06, "epoch": 1.464694961910249, "percentage": 29.29, "elapsed_time": "0:59:36", "remaining_time": "2:23:53", "throughput": 20125.46, "total_tokens": 71982912}
|
|
{"current_steps": 22885, "total_steps": 78105, "loss": 0.3808, "lr": 4.453881882906437e-06, "epoch": 1.465015043851226, "percentage": 29.3, "elapsed_time": "0:59:37", "remaining_time": "2:23:52", "throughput": 20126.16, "total_tokens": 71999424}
|
|
{"current_steps": 22890, "total_steps": 78105, "loss": 0.3723, "lr": 4.453533324915892e-06, "epoch": 1.465335125792203, "percentage": 29.31, "elapsed_time": "0:59:38", "remaining_time": "2:23:51", "throughput": 20126.91, "total_tokens": 72016512}
|
|
{"current_steps": 22895, "total_steps": 78105, "loss": 0.3125, "lr": 4.453184669376068e-06, "epoch": 1.4656552077331797, "percentage": 29.31, "elapsed_time": "0:59:38", "remaining_time": "2:23:50", "throughput": 20127.43, "total_tokens": 72031744}
|
|
{"current_steps": 22900, "total_steps": 78105, "loss": 0.2324, "lr": 4.4528359163043764e-06, "epoch": 1.4659752896741565, "percentage": 29.32, "elapsed_time": "0:59:39", "remaining_time": "2:23:48", "throughput": 20128.06, "total_tokens": 72047488}
|
|
{"current_steps": 22905, "total_steps": 78105, "loss": 0.3736, "lr": 4.452487065718231e-06, "epoch": 1.4662953716151335, "percentage": 29.33, "elapsed_time": "0:59:40", "remaining_time": "2:23:47", "throughput": 20128.65, "total_tokens": 72063296}
|
|
{"current_steps": 22910, "total_steps": 78105, "loss": 0.438, "lr": 4.452138117635051e-06, "epoch": 1.4666154535561104, "percentage": 29.33, "elapsed_time": "0:59:40", "remaining_time": "2:23:46", "throughput": 20129.23, "total_tokens": 72079104}
|
|
{"current_steps": 22915, "total_steps": 78105, "loss": 0.3682, "lr": 4.4517890720722625e-06, "epoch": 1.4669355354970872, "percentage": 29.34, "elapsed_time": "0:59:41", "remaining_time": "2:23:45", "throughput": 20129.79, "total_tokens": 72094400}
|
|
{"current_steps": 22920, "total_steps": 78105, "loss": 0.4157, "lr": 4.451439929047293e-06, "epoch": 1.4672556174380642, "percentage": 29.35, "elapsed_time": "0:59:42", "remaining_time": "2:23:44", "throughput": 20130.32, "total_tokens": 72109760}
|
|
{"current_steps": 22925, "total_steps": 78105, "loss": 0.4526, "lr": 4.451090688577579e-06, "epoch": 1.467575699379041, "percentage": 29.35, "elapsed_time": "0:59:42", "remaining_time": "2:23:43", "throughput": 20131.06, "total_tokens": 72126784}
|
|
{"current_steps": 22930, "total_steps": 78105, "loss": 0.2457, "lr": 4.450741350680559e-06, "epoch": 1.467895781320018, "percentage": 29.36, "elapsed_time": "0:59:43", "remaining_time": "2:23:42", "throughput": 20131.61, "total_tokens": 72142272}
|
|
{"current_steps": 22935, "total_steps": 78105, "loss": 0.4563, "lr": 4.450391915373676e-06, "epoch": 1.4682158632609947, "percentage": 29.36, "elapsed_time": "0:59:44", "remaining_time": "2:23:41", "throughput": 20132.21, "total_tokens": 72158208}
|
|
{"current_steps": 22940, "total_steps": 78105, "loss": 0.3258, "lr": 4.450042382674381e-06, "epoch": 1.4685359452019717, "percentage": 29.37, "elapsed_time": "0:59:44", "remaining_time": "2:23:40", "throughput": 20132.71, "total_tokens": 72173376}
|
|
{"current_steps": 22945, "total_steps": 78105, "loss": 0.2837, "lr": 4.449692752600127e-06, "epoch": 1.4688560271429485, "percentage": 29.38, "elapsed_time": "0:59:45", "remaining_time": "2:23:39", "throughput": 20133.24, "total_tokens": 72188480}
|
|
{"current_steps": 22950, "total_steps": 78105, "loss": 0.3976, "lr": 4.449343025168371e-06, "epoch": 1.4691761090839255, "percentage": 29.38, "elapsed_time": "0:59:46", "remaining_time": "2:23:38", "throughput": 20133.89, "total_tokens": 72205056}
|
|
{"current_steps": 22955, "total_steps": 78105, "loss": 0.3797, "lr": 4.44899320039658e-06, "epoch": 1.4694961910249025, "percentage": 29.39, "elapsed_time": "0:59:46", "remaining_time": "2:23:37", "throughput": 20134.37, "total_tokens": 72220160}
|
|
{"current_steps": 22960, "total_steps": 78105, "loss": 0.3993, "lr": 4.448643278302219e-06, "epoch": 1.4698162729658792, "percentage": 29.4, "elapsed_time": "0:59:47", "remaining_time": "2:23:36", "throughput": 20134.81, "total_tokens": 72234688}
|
|
{"current_steps": 22965, "total_steps": 78105, "loss": 0.34, "lr": 4.448293258902764e-06, "epoch": 1.4701363549068562, "percentage": 29.4, "elapsed_time": "0:59:48", "remaining_time": "2:23:35", "throughput": 20135.35, "total_tokens": 72249856}
|
|
{"current_steps": 22970, "total_steps": 78105, "loss": 0.2964, "lr": 4.44794314221569e-06, "epoch": 1.470456436847833, "percentage": 29.41, "elapsed_time": "0:59:48", "remaining_time": "2:23:34", "throughput": 20135.97, "total_tokens": 72266048}
|
|
{"current_steps": 22975, "total_steps": 78105, "loss": 0.3189, "lr": 4.447592928258484e-06, "epoch": 1.47077651878881, "percentage": 29.42, "elapsed_time": "0:59:49", "remaining_time": "2:23:33", "throughput": 20136.45, "total_tokens": 72280960}
|
|
{"current_steps": 22980, "total_steps": 78105, "loss": 0.3027, "lr": 4.44724261704863e-06, "epoch": 1.4710966007297868, "percentage": 29.42, "elapsed_time": "0:59:50", "remaining_time": "2:23:32", "throughput": 20137.19, "total_tokens": 72298304}
|
|
{"current_steps": 22985, "total_steps": 78105, "loss": 0.4437, "lr": 4.446892208603625e-06, "epoch": 1.4714166826707638, "percentage": 29.43, "elapsed_time": "0:59:50", "remaining_time": "2:23:31", "throughput": 20137.89, "total_tokens": 72314944}
|
|
{"current_steps": 22990, "total_steps": 78105, "loss": 0.3845, "lr": 4.446541702940962e-06, "epoch": 1.4717367646117405, "percentage": 29.43, "elapsed_time": "0:59:51", "remaining_time": "2:23:30", "throughput": 20138.45, "total_tokens": 72330880}
|
|
{"current_steps": 22995, "total_steps": 78105, "loss": 0.3411, "lr": 4.446191100078147e-06, "epoch": 1.4720568465527175, "percentage": 29.44, "elapsed_time": "0:59:52", "remaining_time": "2:23:29", "throughput": 20138.94, "total_tokens": 72345536}
|
|
{"current_steps": 23000, "total_steps": 78105, "loss": 0.2296, "lr": 4.445840400032684e-06, "epoch": 1.4723769284936945, "percentage": 29.45, "elapsed_time": "0:59:53", "remaining_time": "2:23:28", "throughput": 20139.54, "total_tokens": 72361408}
|
|
{"current_steps": 23005, "total_steps": 78105, "loss": 0.4182, "lr": 4.445489602822088e-06, "epoch": 1.4726970104346713, "percentage": 29.45, "elapsed_time": "0:59:53", "remaining_time": "2:23:27", "throughput": 20140.1, "total_tokens": 72377216}
|
|
{"current_steps": 23010, "total_steps": 78105, "loss": 0.3062, "lr": 4.445138708463875e-06, "epoch": 1.473017092375648, "percentage": 29.46, "elapsed_time": "0:59:54", "remaining_time": "2:23:26", "throughput": 20140.78, "total_tokens": 72393536}
|
|
{"current_steps": 23015, "total_steps": 78105, "loss": 0.2834, "lr": 4.444787716975567e-06, "epoch": 1.473337174316625, "percentage": 29.47, "elapsed_time": "0:59:55", "remaining_time": "2:23:25", "throughput": 20141.34, "total_tokens": 72408896}
|
|
{"current_steps": 23020, "total_steps": 78105, "loss": 0.4575, "lr": 4.44443662837469e-06, "epoch": 1.473657256257602, "percentage": 29.47, "elapsed_time": "0:59:55", "remaining_time": "2:23:24", "throughput": 20141.95, "total_tokens": 72424896}
|
|
{"current_steps": 23025, "total_steps": 78105, "loss": 0.3044, "lr": 4.444085442678776e-06, "epoch": 1.4739773381985788, "percentage": 29.48, "elapsed_time": "0:59:56", "remaining_time": "2:23:23", "throughput": 20142.52, "total_tokens": 72440384}
|
|
{"current_steps": 23030, "total_steps": 78105, "loss": 0.3461, "lr": 4.443734159905361e-06, "epoch": 1.4742974201395558, "percentage": 29.49, "elapsed_time": "0:59:57", "remaining_time": "2:23:22", "throughput": 20142.98, "total_tokens": 72455040}
|
|
{"current_steps": 23035, "total_steps": 78105, "loss": 0.3132, "lr": 4.443382780071987e-06, "epoch": 1.4746175020805325, "percentage": 29.49, "elapsed_time": "0:59:57", "remaining_time": "2:23:21", "throughput": 20143.58, "total_tokens": 72470912}
|
|
{"current_steps": 23040, "total_steps": 78105, "loss": 0.4875, "lr": 4.443031303196199e-06, "epoch": 1.4749375840215095, "percentage": 29.5, "elapsed_time": "0:59:58", "remaining_time": "2:23:20", "throughput": 20144.13, "total_tokens": 72486464}
|
|
{"current_steps": 23045, "total_steps": 78105, "loss": 0.3707, "lr": 4.4426797292955504e-06, "epoch": 1.4752576659624865, "percentage": 29.51, "elapsed_time": "0:59:59", "remaining_time": "2:23:18", "throughput": 20144.64, "total_tokens": 72501440}
|
|
{"current_steps": 23050, "total_steps": 78105, "loss": 0.2885, "lr": 4.4423280583875935e-06, "epoch": 1.4755777479034633, "percentage": 29.51, "elapsed_time": "0:59:59", "remaining_time": "2:23:17", "throughput": 20145.1, "total_tokens": 72516480}
|
|
{"current_steps": 23055, "total_steps": 78105, "loss": 0.42, "lr": 4.441976290489891e-06, "epoch": 1.47589782984444, "percentage": 29.52, "elapsed_time": "1:00:00", "remaining_time": "2:23:16", "throughput": 20145.69, "total_tokens": 72532608}
|
|
{"current_steps": 23060, "total_steps": 78105, "loss": 0.3192, "lr": 4.441624425620008e-06, "epoch": 1.476217911785417, "percentage": 29.52, "elapsed_time": "1:00:01", "remaining_time": "2:23:15", "throughput": 20146.3, "total_tokens": 72548480}
|
|
{"current_steps": 23065, "total_steps": 78105, "loss": 0.3426, "lr": 4.4412724637955154e-06, "epoch": 1.476537993726394, "percentage": 29.53, "elapsed_time": "1:00:01", "remaining_time": "2:23:14", "throughput": 20146.98, "total_tokens": 72564800}
|
|
{"current_steps": 23070, "total_steps": 78105, "loss": 0.3251, "lr": 4.440920405033987e-06, "epoch": 1.4768580756673708, "percentage": 29.54, "elapsed_time": "1:00:02", "remaining_time": "2:23:13", "throughput": 20147.54, "total_tokens": 72580672}
|
|
{"current_steps": 23075, "total_steps": 78105, "loss": 0.2693, "lr": 4.440568249353003e-06, "epoch": 1.4771781576083478, "percentage": 29.54, "elapsed_time": "1:00:03", "remaining_time": "2:23:12", "throughput": 20148.11, "total_tokens": 72596736}
|
|
{"current_steps": 23080, "total_steps": 78105, "loss": 0.2994, "lr": 4.44021599677015e-06, "epoch": 1.4774982395493246, "percentage": 29.55, "elapsed_time": "1:00:03", "remaining_time": "2:23:12", "throughput": 20149.05, "total_tokens": 72615296}
|
|
{"current_steps": 23085, "total_steps": 78105, "loss": 0.3802, "lr": 4.439863647303015e-06, "epoch": 1.4778183214903016, "percentage": 29.56, "elapsed_time": "1:00:04", "remaining_time": "2:23:10", "throughput": 20149.55, "total_tokens": 72630144}
|
|
{"current_steps": 23090, "total_steps": 78105, "loss": 0.2283, "lr": 4.439511200969194e-06, "epoch": 1.4781384034312783, "percentage": 29.56, "elapsed_time": "1:00:05", "remaining_time": "2:23:09", "throughput": 20150.08, "total_tokens": 72645440}
|
|
{"current_steps": 23095, "total_steps": 78105, "loss": 0.4254, "lr": 4.439158657786286e-06, "epoch": 1.4784584853722553, "percentage": 29.57, "elapsed_time": "1:00:05", "remaining_time": "2:23:08", "throughput": 20150.61, "total_tokens": 72660480}
|
|
{"current_steps": 23100, "total_steps": 78105, "loss": 0.3099, "lr": 4.438806017771897e-06, "epoch": 1.478778567313232, "percentage": 29.58, "elapsed_time": "1:00:06", "remaining_time": "2:23:07", "throughput": 20151.36, "total_tokens": 72677696}
|
|
{"current_steps": 23105, "total_steps": 78105, "loss": 0.3439, "lr": 4.438453280943633e-06, "epoch": 1.479098649254209, "percentage": 29.58, "elapsed_time": "1:00:07", "remaining_time": "2:23:06", "throughput": 20152.09, "total_tokens": 72694912}
|
|
{"current_steps": 23110, "total_steps": 78105, "loss": 0.4268, "lr": 4.438100447319109e-06, "epoch": 1.479418731195186, "percentage": 29.59, "elapsed_time": "1:00:07", "remaining_time": "2:23:05", "throughput": 20152.56, "total_tokens": 72709440}
|
|
{"current_steps": 23115, "total_steps": 78105, "loss": 0.3321, "lr": 4.437747516915945e-06, "epoch": 1.4797388131361628, "percentage": 29.59, "elapsed_time": "1:00:08", "remaining_time": "2:23:04", "throughput": 20153.04, "total_tokens": 72724416}
|
|
{"current_steps": 23120, "total_steps": 78105, "loss": 0.3163, "lr": 4.437394489751762e-06, "epoch": 1.4800588950771396, "percentage": 29.6, "elapsed_time": "1:00:09", "remaining_time": "2:23:03", "throughput": 20153.44, "total_tokens": 72738560}
|
|
{"current_steps": 23125, "total_steps": 78105, "loss": 0.29, "lr": 4.43704136584419e-06, "epoch": 1.4803789770181166, "percentage": 29.61, "elapsed_time": "1:00:09", "remaining_time": "2:23:02", "throughput": 20153.95, "total_tokens": 72753536}
|
|
{"current_steps": 23130, "total_steps": 78105, "loss": 0.4311, "lr": 4.436688145210862e-06, "epoch": 1.4806990589590936, "percentage": 29.61, "elapsed_time": "1:00:10", "remaining_time": "2:23:01", "throughput": 20154.55, "total_tokens": 72769728}
|
|
{"current_steps": 23135, "total_steps": 78105, "loss": 0.2633, "lr": 4.436334827869416e-06, "epoch": 1.4810191409000704, "percentage": 29.62, "elapsed_time": "1:00:11", "remaining_time": "2:23:00", "throughput": 20155.03, "total_tokens": 72784448}
|
|
{"current_steps": 23140, "total_steps": 78105, "loss": 0.3225, "lr": 4.435981413837495e-06, "epoch": 1.4813392228410474, "percentage": 29.63, "elapsed_time": "1:00:11", "remaining_time": "2:22:59", "throughput": 20155.62, "total_tokens": 72800384}
|
|
{"current_steps": 23145, "total_steps": 78105, "loss": 0.3205, "lr": 4.435627903132747e-06, "epoch": 1.4816593047820241, "percentage": 29.63, "elapsed_time": "1:00:12", "remaining_time": "2:22:58", "throughput": 20156.19, "total_tokens": 72816064}
|
|
{"current_steps": 23150, "total_steps": 78105, "loss": 0.3254, "lr": 4.435274295772822e-06, "epoch": 1.4819793867230011, "percentage": 29.64, "elapsed_time": "1:00:13", "remaining_time": "2:22:57", "throughput": 20156.88, "total_tokens": 72832960}
|
|
{"current_steps": 23155, "total_steps": 78105, "loss": 0.4106, "lr": 4.43492059177538e-06, "epoch": 1.482299468663978, "percentage": 29.65, "elapsed_time": "1:00:13", "remaining_time": "2:22:56", "throughput": 20157.34, "total_tokens": 72847424}
|
|
{"current_steps": 23160, "total_steps": 78105, "loss": 0.4817, "lr": 4.434566791158083e-06, "epoch": 1.4826195506049549, "percentage": 29.65, "elapsed_time": "1:00:14", "remaining_time": "2:22:55", "throughput": 20157.86, "total_tokens": 72863168}
|
|
{"current_steps": 23165, "total_steps": 78105, "loss": 0.2532, "lr": 4.434212893938596e-06, "epoch": 1.4829396325459316, "percentage": 29.66, "elapsed_time": "1:00:15", "remaining_time": "2:22:54", "throughput": 20158.41, "total_tokens": 72878592}
|
|
{"current_steps": 23170, "total_steps": 78105, "loss": 0.3743, "lr": 4.433858900134593e-06, "epoch": 1.4832597144869086, "percentage": 29.67, "elapsed_time": "1:00:15", "remaining_time": "2:22:53", "throughput": 20158.95, "total_tokens": 72894016}
|
|
{"current_steps": 23175, "total_steps": 78105, "loss": 0.4044, "lr": 4.433504809763749e-06, "epoch": 1.4835797964278856, "percentage": 29.67, "elapsed_time": "1:00:17", "remaining_time": "2:22:53", "throughput": 20158.01, "total_tokens": 72912000}
|
|
{"current_steps": 23180, "total_steps": 78105, "loss": 0.2907, "lr": 4.4331506228437465e-06, "epoch": 1.4838998783688624, "percentage": 29.68, "elapsed_time": "1:00:17", "remaining_time": "2:22:52", "throughput": 20158.6, "total_tokens": 72928128}
|
|
{"current_steps": 23185, "total_steps": 78105, "loss": 0.3626, "lr": 4.432796339392272e-06, "epoch": 1.4842199603098394, "percentage": 29.68, "elapsed_time": "1:00:18", "remaining_time": "2:22:51", "throughput": 20159.31, "total_tokens": 72945024}
|
|
{"current_steps": 23190, "total_steps": 78105, "loss": 0.3614, "lr": 4.432441959427014e-06, "epoch": 1.4845400422508161, "percentage": 29.69, "elapsed_time": "1:00:19", "remaining_time": "2:22:50", "throughput": 20159.93, "total_tokens": 72961216}
|
|
{"current_steps": 23195, "total_steps": 78105, "loss": 0.333, "lr": 4.432087482965671e-06, "epoch": 1.4848601241917931, "percentage": 29.7, "elapsed_time": "1:00:19", "remaining_time": "2:22:49", "throughput": 20160.46, "total_tokens": 72976640}
|
|
{"current_steps": 23200, "total_steps": 78105, "loss": 0.2421, "lr": 4.431732910025943e-06, "epoch": 1.48518020613277, "percentage": 29.7, "elapsed_time": "1:00:20", "remaining_time": "2:22:48", "throughput": 20161.01, "total_tokens": 72992256}
|
|
{"current_steps": 23205, "total_steps": 78105, "loss": 0.3324, "lr": 4.431378240625537e-06, "epoch": 1.485500288073747, "percentage": 29.71, "elapsed_time": "1:00:21", "remaining_time": "2:22:47", "throughput": 20161.57, "total_tokens": 73007936}
|
|
{"current_steps": 23210, "total_steps": 78105, "loss": 0.3451, "lr": 4.43102347478216e-06, "epoch": 1.4858203700147237, "percentage": 29.72, "elapsed_time": "1:00:21", "remaining_time": "2:22:46", "throughput": 20162.18, "total_tokens": 73023808}
|
|
{"current_steps": 23215, "total_steps": 78105, "loss": 0.2459, "lr": 4.4306686125135305e-06, "epoch": 1.4861404519557007, "percentage": 29.72, "elapsed_time": "1:00:22", "remaining_time": "2:22:45", "throughput": 20162.65, "total_tokens": 73038464}
|
|
{"current_steps": 23220, "total_steps": 78105, "loss": 0.2915, "lr": 4.430313653837365e-06, "epoch": 1.4864605338966776, "percentage": 29.73, "elapsed_time": "1:00:23", "remaining_time": "2:22:44", "throughput": 20163.3, "total_tokens": 73055104}
|
|
{"current_steps": 23225, "total_steps": 78105, "loss": 0.3593, "lr": 4.429958598771392e-06, "epoch": 1.4867806158376544, "percentage": 29.74, "elapsed_time": "1:00:23", "remaining_time": "2:22:43", "throughput": 20163.8, "total_tokens": 73070336}
|
|
{"current_steps": 23230, "total_steps": 78105, "loss": 0.2923, "lr": 4.429603447333338e-06, "epoch": 1.4871006977786314, "percentage": 29.74, "elapsed_time": "1:00:24", "remaining_time": "2:22:42", "throughput": 20164.46, "total_tokens": 73086912}
|
|
{"current_steps": 23235, "total_steps": 78105, "loss": 0.3545, "lr": 4.42924819954094e-06, "epoch": 1.4874207797196082, "percentage": 29.75, "elapsed_time": "1:00:25", "remaining_time": "2:22:41", "throughput": 20165.12, "total_tokens": 73103616}
|
|
{"current_steps": 23240, "total_steps": 78105, "loss": 0.3, "lr": 4.428892855411935e-06, "epoch": 1.4877408616605852, "percentage": 29.75, "elapsed_time": "1:00:25", "remaining_time": "2:22:40", "throughput": 20165.75, "total_tokens": 73119872}
|
|
{"current_steps": 23245, "total_steps": 78105, "loss": 0.3219, "lr": 4.428537414964069e-06, "epoch": 1.488060943601562, "percentage": 29.76, "elapsed_time": "1:00:26", "remaining_time": "2:22:39", "throughput": 20166.37, "total_tokens": 73136192}
|
|
{"current_steps": 23250, "total_steps": 78105, "loss": 0.3172, "lr": 4.4281818782150895e-06, "epoch": 1.488381025542539, "percentage": 29.77, "elapsed_time": "1:00:27", "remaining_time": "2:22:38", "throughput": 20166.89, "total_tokens": 73151744}
|
|
{"current_steps": 23255, "total_steps": 78105, "loss": 0.3102, "lr": 4.42782624518275e-06, "epoch": 1.4887011074835157, "percentage": 29.77, "elapsed_time": "1:00:27", "remaining_time": "2:22:37", "throughput": 20167.42, "total_tokens": 73166656}
|
|
{"current_steps": 23260, "total_steps": 78105, "loss": 0.2958, "lr": 4.42747051588481e-06, "epoch": 1.4890211894244927, "percentage": 29.78, "elapsed_time": "1:00:28", "remaining_time": "2:22:35", "throughput": 20167.85, "total_tokens": 73181248}
|
|
{"current_steps": 23265, "total_steps": 78105, "loss": 0.3511, "lr": 4.427114690339032e-06, "epoch": 1.4893412713654697, "percentage": 29.79, "elapsed_time": "1:00:29", "remaining_time": "2:22:34", "throughput": 20168.39, "total_tokens": 73196608}
|
|
{"current_steps": 23270, "total_steps": 78105, "loss": 0.3076, "lr": 4.4267587685631855e-06, "epoch": 1.4896613533064464, "percentage": 29.79, "elapsed_time": "1:00:29", "remaining_time": "2:22:33", "throughput": 20168.94, "total_tokens": 73212096}
|
|
{"current_steps": 23275, "total_steps": 78105, "loss": 0.3852, "lr": 4.426402750575041e-06, "epoch": 1.4899814352474232, "percentage": 29.8, "elapsed_time": "1:00:30", "remaining_time": "2:22:32", "throughput": 20169.64, "total_tokens": 73229376}
|
|
{"current_steps": 23280, "total_steps": 78105, "loss": 0.292, "lr": 4.426046636392377e-06, "epoch": 1.4903015171884002, "percentage": 29.81, "elapsed_time": "1:00:31", "remaining_time": "2:22:31", "throughput": 20170.16, "total_tokens": 73244608}
|
|
{"current_steps": 23285, "total_steps": 78105, "loss": 0.3003, "lr": 4.425690426032977e-06, "epoch": 1.4906215991293772, "percentage": 29.81, "elapsed_time": "1:00:32", "remaining_time": "2:22:30", "throughput": 20170.75, "total_tokens": 73260864}
|
|
{"current_steps": 23290, "total_steps": 78105, "loss": 0.3578, "lr": 4.425334119514628e-06, "epoch": 1.490941681070354, "percentage": 29.82, "elapsed_time": "1:00:32", "remaining_time": "2:22:29", "throughput": 20171.46, "total_tokens": 73277568}
|
|
{"current_steps": 23295, "total_steps": 78105, "loss": 0.3293, "lr": 4.424977716855121e-06, "epoch": 1.491261763011331, "percentage": 29.83, "elapsed_time": "1:00:33", "remaining_time": "2:22:28", "throughput": 20172.12, "total_tokens": 73294208}
|
|
{"current_steps": 23300, "total_steps": 78105, "loss": 0.42, "lr": 4.424621218072255e-06, "epoch": 1.4915818449523077, "percentage": 29.83, "elapsed_time": "1:00:34", "remaining_time": "2:22:27", "throughput": 20172.65, "total_tokens": 73309120}
|
|
{"current_steps": 23305, "total_steps": 78105, "loss": 0.4557, "lr": 4.424264623183829e-06, "epoch": 1.4919019268932847, "percentage": 29.84, "elapsed_time": "1:00:34", "remaining_time": "2:22:26", "throughput": 20173.18, "total_tokens": 73324288}
|
|
{"current_steps": 23310, "total_steps": 78105, "loss": 0.3435, "lr": 4.4239079322076515e-06, "epoch": 1.4922220088342617, "percentage": 29.84, "elapsed_time": "1:00:35", "remaining_time": "2:22:25", "throughput": 20173.85, "total_tokens": 73340736}
|
|
{"current_steps": 23315, "total_steps": 78105, "loss": 0.2707, "lr": 4.423551145161533e-06, "epoch": 1.4925420907752385, "percentage": 29.85, "elapsed_time": "1:00:36", "remaining_time": "2:22:24", "throughput": 20174.29, "total_tokens": 73355456}
|
|
{"current_steps": 23320, "total_steps": 78105, "loss": 0.2235, "lr": 4.4231942620632895e-06, "epoch": 1.4928621727162152, "percentage": 29.86, "elapsed_time": "1:00:36", "remaining_time": "2:22:23", "throughput": 20174.86, "total_tokens": 73370816}
|
|
{"current_steps": 23325, "total_steps": 78105, "loss": 0.2683, "lr": 4.422837282930743e-06, "epoch": 1.4931822546571922, "percentage": 29.86, "elapsed_time": "1:00:37", "remaining_time": "2:22:22", "throughput": 20175.34, "total_tokens": 73385536}
|
|
{"current_steps": 23330, "total_steps": 78105, "loss": 0.3443, "lr": 4.422480207781718e-06, "epoch": 1.4935023365981692, "percentage": 29.87, "elapsed_time": "1:00:38", "remaining_time": "2:22:21", "throughput": 20175.87, "total_tokens": 73400576}
|
|
{"current_steps": 23335, "total_steps": 78105, "loss": 0.3525, "lr": 4.422123036634044e-06, "epoch": 1.493822418539146, "percentage": 29.88, "elapsed_time": "1:00:38", "remaining_time": "2:22:20", "throughput": 20176.34, "total_tokens": 73415296}
|
|
{"current_steps": 23340, "total_steps": 78105, "loss": 0.3324, "lr": 4.421765769505559e-06, "epoch": 1.494142500480123, "percentage": 29.88, "elapsed_time": "1:00:39", "remaining_time": "2:22:19", "throughput": 20176.94, "total_tokens": 73431232}
|
|
{"current_steps": 23345, "total_steps": 78105, "loss": 0.3936, "lr": 4.421408406414101e-06, "epoch": 1.4944625824210998, "percentage": 29.89, "elapsed_time": "1:00:40", "remaining_time": "2:22:18", "throughput": 20177.45, "total_tokens": 73446336}
|
|
{"current_steps": 23350, "total_steps": 78105, "loss": 0.4062, "lr": 4.421050947377515e-06, "epoch": 1.4947826643620767, "percentage": 29.9, "elapsed_time": "1:00:40", "remaining_time": "2:22:17", "throughput": 20177.96, "total_tokens": 73461440}
|
|
{"current_steps": 23355, "total_steps": 78105, "loss": 0.3053, "lr": 4.4206933924136515e-06, "epoch": 1.4951027463030535, "percentage": 29.9, "elapsed_time": "1:00:41", "remaining_time": "2:22:16", "throughput": 20178.52, "total_tokens": 73476992}
|
|
{"current_steps": 23360, "total_steps": 78105, "loss": 0.395, "lr": 4.4203357415403645e-06, "epoch": 1.4954228282440305, "percentage": 29.91, "elapsed_time": "1:00:42", "remaining_time": "2:22:15", "throughput": 20179.27, "total_tokens": 73494208}
|
|
{"current_steps": 23365, "total_steps": 78105, "loss": 0.3168, "lr": 4.419977994775513e-06, "epoch": 1.4957429101850073, "percentage": 29.91, "elapsed_time": "1:00:42", "remaining_time": "2:22:14", "throughput": 20179.81, "total_tokens": 73509760}
|
|
{"current_steps": 23370, "total_steps": 78105, "loss": 0.211, "lr": 4.419620152136962e-06, "epoch": 1.4960629921259843, "percentage": 29.92, "elapsed_time": "1:00:43", "remaining_time": "2:22:13", "throughput": 20180.39, "total_tokens": 73525568}
|
|
{"current_steps": 23375, "total_steps": 78105, "loss": 0.3046, "lr": 4.419262213642579e-06, "epoch": 1.4963830740669612, "percentage": 29.93, "elapsed_time": "1:00:44", "remaining_time": "2:22:12", "throughput": 20181.01, "total_tokens": 73541824}
|
|
{"current_steps": 23380, "total_steps": 78105, "loss": 0.299, "lr": 4.418904179310238e-06, "epoch": 1.496703156007938, "percentage": 29.93, "elapsed_time": "1:00:44", "remaining_time": "2:22:11", "throughput": 20181.62, "total_tokens": 73558080}
|
|
{"current_steps": 23385, "total_steps": 78105, "loss": 0.3257, "lr": 4.418546049157817e-06, "epoch": 1.4970232379489148, "percentage": 29.94, "elapsed_time": "1:00:45", "remaining_time": "2:22:10", "throughput": 20182.36, "total_tokens": 73575232}
|
|
{"current_steps": 23390, "total_steps": 78105, "loss": 0.3447, "lr": 4.4181878232032005e-06, "epoch": 1.4973433198898918, "percentage": 29.95, "elapsed_time": "1:00:46", "remaining_time": "2:22:09", "throughput": 20182.92, "total_tokens": 73590720}
|
|
{"current_steps": 23395, "total_steps": 78105, "loss": 0.513, "lr": 4.417829501464276e-06, "epoch": 1.4976634018308688, "percentage": 29.95, "elapsed_time": "1:00:46", "remaining_time": "2:22:08", "throughput": 20183.48, "total_tokens": 73606336}
|
|
{"current_steps": 23400, "total_steps": 78105, "loss": 0.4223, "lr": 4.417471083958935e-06, "epoch": 1.4979834837718455, "percentage": 29.96, "elapsed_time": "1:00:47", "remaining_time": "2:22:07", "throughput": 20184.09, "total_tokens": 73622336}
|
|
{"current_steps": 23405, "total_steps": 78105, "loss": 0.471, "lr": 4.417112570705075e-06, "epoch": 1.4983035657128225, "percentage": 29.97, "elapsed_time": "1:00:48", "remaining_time": "2:22:06", "throughput": 20184.7, "total_tokens": 73640320}
|
|
{"current_steps": 23410, "total_steps": 78105, "loss": 0.4929, "lr": 4.4167539617206005e-06, "epoch": 1.4986236476537993, "percentage": 29.97, "elapsed_time": "1:00:49", "remaining_time": "2:22:05", "throughput": 20185.34, "total_tokens": 73656832}
|
|
{"current_steps": 23415, "total_steps": 78105, "loss": 0.3796, "lr": 4.416395257023417e-06, "epoch": 1.4989437295947763, "percentage": 29.98, "elapsed_time": "1:00:49", "remaining_time": "2:22:04", "throughput": 20185.91, "total_tokens": 73672448}
|
|
{"current_steps": 23420, "total_steps": 78105, "loss": 0.3427, "lr": 4.416036456631436e-06, "epoch": 1.4992638115357533, "percentage": 29.99, "elapsed_time": "1:00:50", "remaining_time": "2:22:03", "throughput": 20186.64, "total_tokens": 73689600}
|
|
{"current_steps": 23425, "total_steps": 78105, "loss": 0.3204, "lr": 4.415677560562575e-06, "epoch": 1.49958389347673, "percentage": 29.99, "elapsed_time": "1:00:51", "remaining_time": "2:22:02", "throughput": 20187.15, "total_tokens": 73705152}
|
|
{"current_steps": 23430, "total_steps": 78105, "loss": 0.3287, "lr": 4.4153185688347555e-06, "epoch": 1.4999039754177068, "percentage": 30.0, "elapsed_time": "1:00:51", "remaining_time": "2:22:01", "throughput": 20187.71, "total_tokens": 73720512}
|
|
{"current_steps": 23435, "total_steps": 78105, "loss": 0.2897, "lr": 4.414959481465902e-06, "epoch": 1.5002240573586838, "percentage": 30.0, "elapsed_time": "1:00:52", "remaining_time": "2:22:00", "throughput": 20188.29, "total_tokens": 73736704}
|
|
{"current_steps": 23436, "total_steps": 78105, "eval_loss": 0.4805953800678253, "epoch": 1.5002880737468791, "percentage": 30.01, "elapsed_time": "1:01:43", "remaining_time": "2:23:59", "throughput": 19910.54, "total_tokens": 73739776}
|
|
{"current_steps": 23440, "total_steps": 78105, "loss": 0.4865, "lr": 4.414600298473947e-06, "epoch": 1.5005441392996608, "percentage": 30.01, "elapsed_time": "1:02:16", "remaining_time": "2:25:13", "throughput": 19739.82, "total_tokens": 73752704}
|
|
{"current_steps": 23445, "total_steps": 78105, "loss": 0.3438, "lr": 4.414241019876826e-06, "epoch": 1.5008642212406376, "percentage": 30.02, "elapsed_time": "1:02:16", "remaining_time": "2:25:12", "throughput": 19740.51, "total_tokens": 73768896}
|
|
{"current_steps": 23450, "total_steps": 78105, "loss": 0.3562, "lr": 4.4138816456924795e-06, "epoch": 1.5011843031816143, "percentage": 30.02, "elapsed_time": "1:02:17", "remaining_time": "2:25:11", "throughput": 19741.05, "total_tokens": 73783744}
|
|
{"current_steps": 23455, "total_steps": 78105, "loss": 0.2767, "lr": 4.413522175938854e-06, "epoch": 1.5015043851225913, "percentage": 30.03, "elapsed_time": "1:02:18", "remaining_time": "2:25:10", "throughput": 19741.65, "total_tokens": 73798784}
|
|
{"current_steps": 23460, "total_steps": 78105, "loss": 0.2411, "lr": 4.413162610633896e-06, "epoch": 1.5018244670635683, "percentage": 30.04, "elapsed_time": "1:02:18", "remaining_time": "2:25:09", "throughput": 19742.42, "total_tokens": 73815424}
|
|
{"current_steps": 23465, "total_steps": 78105, "loss": 0.2666, "lr": 4.412802949795563e-06, "epoch": 1.5021445490045453, "percentage": 30.04, "elapsed_time": "1:02:19", "remaining_time": "2:25:07", "throughput": 19743.12, "total_tokens": 73831488}
|
|
{"current_steps": 23470, "total_steps": 78105, "loss": 0.3185, "lr": 4.412443193441815e-06, "epoch": 1.502464630945522, "percentage": 30.05, "elapsed_time": "1:02:20", "remaining_time": "2:25:06", "throughput": 19743.68, "total_tokens": 73846976}
|
|
{"current_steps": 23475, "total_steps": 78105, "loss": 0.1718, "lr": 4.412083341590615e-06, "epoch": 1.5027847128864988, "percentage": 30.06, "elapsed_time": "1:02:20", "remaining_time": "2:25:05", "throughput": 19744.38, "total_tokens": 73863168}
|
|
{"current_steps": 23480, "total_steps": 78105, "loss": 0.2745, "lr": 4.411723394259933e-06, "epoch": 1.5031047948274758, "percentage": 30.06, "elapsed_time": "1:02:21", "remaining_time": "2:25:04", "throughput": 19744.98, "total_tokens": 73878912}
|
|
{"current_steps": 23485, "total_steps": 78105, "loss": 0.3519, "lr": 4.411363351467742e-06, "epoch": 1.5034248767684528, "percentage": 30.07, "elapsed_time": "1:02:22", "remaining_time": "2:25:03", "throughput": 19745.69, "total_tokens": 73895104}
|
|
{"current_steps": 23490, "total_steps": 78105, "loss": 0.2996, "lr": 4.411003213232021e-06, "epoch": 1.5037449587094296, "percentage": 30.07, "elapsed_time": "1:02:22", "remaining_time": "2:25:02", "throughput": 19746.19, "total_tokens": 73909568}
|
|
{"current_steps": 23495, "total_steps": 78105, "loss": 0.3208, "lr": 4.410642979570754e-06, "epoch": 1.5040650406504064, "percentage": 30.08, "elapsed_time": "1:02:23", "remaining_time": "2:25:01", "throughput": 19747.1, "total_tokens": 73927744}
|
|
{"current_steps": 23500, "total_steps": 78105, "loss": 0.4484, "lr": 4.410282650501928e-06, "epoch": 1.5043851225913834, "percentage": 30.09, "elapsed_time": "1:02:24", "remaining_time": "2:25:00", "throughput": 19747.69, "total_tokens": 73943040}
|
|
{"current_steps": 23505, "total_steps": 78105, "loss": 0.3276, "lr": 4.409922226043537e-06, "epoch": 1.5047052045323603, "percentage": 30.09, "elapsed_time": "1:02:25", "remaining_time": "2:24:59", "throughput": 19748.35, "total_tokens": 73959360}
|
|
{"current_steps": 23510, "total_steps": 78105, "loss": 0.2826, "lr": 4.40956170621358e-06, "epoch": 1.5050252864733373, "percentage": 30.1, "elapsed_time": "1:02:25", "remaining_time": "2:24:58", "throughput": 19748.91, "total_tokens": 73974144}
|
|
{"current_steps": 23515, "total_steps": 78105, "loss": 0.3449, "lr": 4.409201091030057e-06, "epoch": 1.505345368414314, "percentage": 30.11, "elapsed_time": "1:02:26", "remaining_time": "2:24:57", "throughput": 19749.42, "total_tokens": 73988736}
|
|
{"current_steps": 23520, "total_steps": 78105, "loss": 0.2782, "lr": 4.408840380510975e-06, "epoch": 1.5056654503552909, "percentage": 30.11, "elapsed_time": "1:02:27", "remaining_time": "2:24:56", "throughput": 19750.1, "total_tokens": 74004928}
|
|
{"current_steps": 23525, "total_steps": 78105, "loss": 0.2831, "lr": 4.408479574674348e-06, "epoch": 1.5059855322962679, "percentage": 30.12, "elapsed_time": "1:02:27", "remaining_time": "2:24:55", "throughput": 19750.73, "total_tokens": 74020544}
|
|
{"current_steps": 23530, "total_steps": 78105, "loss": 0.3303, "lr": 4.408118673538192e-06, "epoch": 1.5063056142372448, "percentage": 30.13, "elapsed_time": "1:02:28", "remaining_time": "2:24:53", "throughput": 19751.27, "total_tokens": 74035648}
|
|
{"current_steps": 23535, "total_steps": 78105, "loss": 0.375, "lr": 4.407757677120529e-06, "epoch": 1.5066256961782216, "percentage": 30.13, "elapsed_time": "1:02:29", "remaining_time": "2:24:52", "throughput": 19751.83, "total_tokens": 74051008}
|
|
{"current_steps": 23540, "total_steps": 78105, "loss": 0.4906, "lr": 4.407396585439384e-06, "epoch": 1.5069457781191984, "percentage": 30.14, "elapsed_time": "1:02:29", "remaining_time": "2:24:51", "throughput": 19752.43, "total_tokens": 74066368}
|
|
{"current_steps": 23545, "total_steps": 78105, "loss": 0.3316, "lr": 4.407035398512789e-06, "epoch": 1.5072658600601754, "percentage": 30.15, "elapsed_time": "1:02:30", "remaining_time": "2:24:50", "throughput": 19752.99, "total_tokens": 74081344}
|
|
{"current_steps": 23550, "total_steps": 78105, "loss": 0.4379, "lr": 4.40667411635878e-06, "epoch": 1.5075859420011524, "percentage": 30.15, "elapsed_time": "1:02:31", "remaining_time": "2:24:49", "throughput": 19753.78, "total_tokens": 74098624}
|
|
{"current_steps": 23555, "total_steps": 78105, "loss": 0.2507, "lr": 4.406312738995397e-06, "epoch": 1.5079060239421291, "percentage": 30.16, "elapsed_time": "1:02:31", "remaining_time": "2:24:48", "throughput": 19754.48, "total_tokens": 74115072}
|
|
{"current_steps": 23560, "total_steps": 78105, "loss": 0.4069, "lr": 4.405951266440685e-06, "epoch": 1.5082261058831061, "percentage": 30.16, "elapsed_time": "1:02:32", "remaining_time": "2:24:47", "throughput": 19755.03, "total_tokens": 74130112}
|
|
{"current_steps": 23565, "total_steps": 78105, "loss": 0.5056, "lr": 4.405589698712695e-06, "epoch": 1.508546187824083, "percentage": 30.17, "elapsed_time": "1:02:33", "remaining_time": "2:24:46", "throughput": 19755.71, "total_tokens": 74146432}
|
|
{"current_steps": 23570, "total_steps": 78105, "loss": 0.3095, "lr": 4.40522803582948e-06, "epoch": 1.5088662697650599, "percentage": 30.18, "elapsed_time": "1:02:33", "remaining_time": "2:24:45", "throughput": 19756.27, "total_tokens": 74161664}
|
|
{"current_steps": 23575, "total_steps": 78105, "loss": 0.3228, "lr": 4.404866277809102e-06, "epoch": 1.5091863517060369, "percentage": 30.18, "elapsed_time": "1:02:34", "remaining_time": "2:24:44", "throughput": 19756.89, "total_tokens": 74177408}
|
|
{"current_steps": 23580, "total_steps": 78105, "loss": 0.3015, "lr": 4.404504424669624e-06, "epoch": 1.5095064336470136, "percentage": 30.19, "elapsed_time": "1:02:35", "remaining_time": "2:24:43", "throughput": 19757.49, "total_tokens": 74193088}
|
|
{"current_steps": 23585, "total_steps": 78105, "loss": 0.4241, "lr": 4.404142476429116e-06, "epoch": 1.5098265155879904, "percentage": 30.2, "elapsed_time": "1:02:35", "remaining_time": "2:24:42", "throughput": 19758.08, "total_tokens": 74208640}
|
|
{"current_steps": 23590, "total_steps": 78105, "loss": 0.2578, "lr": 4.40378043310565e-06, "epoch": 1.5101465975289674, "percentage": 30.2, "elapsed_time": "1:02:36", "remaining_time": "2:24:41", "throughput": 19758.73, "total_tokens": 74224320}
|
|
{"current_steps": 23595, "total_steps": 78105, "loss": 0.4185, "lr": 4.403418294717305e-06, "epoch": 1.5104666794699444, "percentage": 30.21, "elapsed_time": "1:02:37", "remaining_time": "2:24:40", "throughput": 19759.35, "total_tokens": 74240192}
|
|
{"current_steps": 23600, "total_steps": 78105, "loss": 0.3921, "lr": 4.403056061282166e-06, "epoch": 1.5107867614109212, "percentage": 30.22, "elapsed_time": "1:02:37", "remaining_time": "2:24:38", "throughput": 19759.82, "total_tokens": 74254976}
|
|
{"current_steps": 23605, "total_steps": 78105, "loss": 0.2923, "lr": 4.40269373281832e-06, "epoch": 1.511106843351898, "percentage": 30.22, "elapsed_time": "1:02:38", "remaining_time": "2:24:37", "throughput": 19760.38, "total_tokens": 74270208}
|
|
{"current_steps": 23610, "total_steps": 78105, "loss": 0.3144, "lr": 4.40233130934386e-06, "epoch": 1.511426925292875, "percentage": 30.23, "elapsed_time": "1:02:39", "remaining_time": "2:24:36", "throughput": 19760.92, "total_tokens": 74285184}
|
|
{"current_steps": 23615, "total_steps": 78105, "loss": 0.3244, "lr": 4.401968790876882e-06, "epoch": 1.511747007233852, "percentage": 30.23, "elapsed_time": "1:02:39", "remaining_time": "2:24:35", "throughput": 19761.55, "total_tokens": 74300992}
|
|
{"current_steps": 23620, "total_steps": 78105, "loss": 0.5305, "lr": 4.40160617743549e-06, "epoch": 1.512067089174829, "percentage": 30.24, "elapsed_time": "1:02:40", "remaining_time": "2:24:34", "throughput": 19762.23, "total_tokens": 74317504}
|
|
{"current_steps": 23625, "total_steps": 78105, "loss": 0.2176, "lr": 4.4012434690377916e-06, "epoch": 1.5123871711158057, "percentage": 30.25, "elapsed_time": "1:02:41", "remaining_time": "2:24:33", "throughput": 19762.79, "total_tokens": 74332928}
|
|
{"current_steps": 23630, "total_steps": 78105, "loss": 0.3203, "lr": 4.400880665701897e-06, "epoch": 1.5127072530567824, "percentage": 30.25, "elapsed_time": "1:02:41", "remaining_time": "2:24:32", "throughput": 19763.49, "total_tokens": 74349760}
|
|
{"current_steps": 23635, "total_steps": 78105, "loss": 0.2868, "lr": 4.400517767445923e-06, "epoch": 1.5130273349977594, "percentage": 30.26, "elapsed_time": "1:02:42", "remaining_time": "2:24:31", "throughput": 19764.0, "total_tokens": 74364544}
|
|
{"current_steps": 23640, "total_steps": 78105, "loss": 0.2648, "lr": 4.4001547742879915e-06, "epoch": 1.5133474169387364, "percentage": 30.27, "elapsed_time": "1:02:43", "remaining_time": "2:24:30", "throughput": 19764.64, "total_tokens": 74380736}
|
|
{"current_steps": 23645, "total_steps": 78105, "loss": 0.3199, "lr": 4.3997916862462286e-06, "epoch": 1.5136674988797132, "percentage": 30.27, "elapsed_time": "1:02:43", "remaining_time": "2:24:29", "throughput": 19765.23, "total_tokens": 74396032}
|
|
{"current_steps": 23650, "total_steps": 78105, "loss": 0.3758, "lr": 4.3994285033387635e-06, "epoch": 1.51398758082069, "percentage": 30.28, "elapsed_time": "1:02:44", "remaining_time": "2:24:28", "throughput": 19765.77, "total_tokens": 74411264}
|
|
{"current_steps": 23655, "total_steps": 78105, "loss": 0.2341, "lr": 4.399065225583733e-06, "epoch": 1.514307662761667, "percentage": 30.29, "elapsed_time": "1:02:45", "remaining_time": "2:24:27", "throughput": 19766.25, "total_tokens": 74425984}
|
|
{"current_steps": 23660, "total_steps": 78105, "loss": 0.3427, "lr": 4.398701852999277e-06, "epoch": 1.514627744702644, "percentage": 30.29, "elapsed_time": "1:02:45", "remaining_time": "2:24:26", "throughput": 19766.91, "total_tokens": 74441984}
|
|
{"current_steps": 23665, "total_steps": 78105, "loss": 0.2755, "lr": 4.398338385603541e-06, "epoch": 1.514947826643621, "percentage": 30.3, "elapsed_time": "1:02:46", "remaining_time": "2:24:25", "throughput": 19767.68, "total_tokens": 74459200}
|
|
{"current_steps": 23670, "total_steps": 78105, "loss": 0.2266, "lr": 4.397974823414673e-06, "epoch": 1.5152679085845977, "percentage": 30.31, "elapsed_time": "1:02:47", "remaining_time": "2:24:24", "throughput": 19768.4, "total_tokens": 74475776}
|
|
{"current_steps": 23675, "total_steps": 78105, "loss": 0.303, "lr": 4.39761116645083e-06, "epoch": 1.5155879905255745, "percentage": 30.31, "elapsed_time": "1:02:48", "remaining_time": "2:24:23", "throughput": 19769.04, "total_tokens": 74491712}
|
|
{"current_steps": 23680, "total_steps": 78105, "loss": 0.333, "lr": 4.397247414730169e-06, "epoch": 1.5159080724665515, "percentage": 30.32, "elapsed_time": "1:02:48", "remaining_time": "2:24:22", "throughput": 19769.74, "total_tokens": 74508224}
|
|
{"current_steps": 23685, "total_steps": 78105, "loss": 0.363, "lr": 4.396883568270855e-06, "epoch": 1.5162281544075285, "percentage": 30.32, "elapsed_time": "1:02:49", "remaining_time": "2:24:20", "throughput": 19770.33, "total_tokens": 74523968}
|
|
{"current_steps": 23690, "total_steps": 78105, "loss": 0.211, "lr": 4.396519627091055e-06, "epoch": 1.5165482363485052, "percentage": 30.33, "elapsed_time": "1:02:50", "remaining_time": "2:24:19", "throughput": 19771.04, "total_tokens": 74540416}
|
|
{"current_steps": 23695, "total_steps": 78105, "loss": 0.2272, "lr": 4.3961555912089434e-06, "epoch": 1.516868318289482, "percentage": 30.34, "elapsed_time": "1:02:50", "remaining_time": "2:24:18", "throughput": 19771.5, "total_tokens": 74555072}
|
|
{"current_steps": 23700, "total_steps": 78105, "loss": 0.2657, "lr": 4.395791460642698e-06, "epoch": 1.517188400230459, "percentage": 30.34, "elapsed_time": "1:02:51", "remaining_time": "2:24:17", "throughput": 19772.03, "total_tokens": 74570240}
|
|
{"current_steps": 23705, "total_steps": 78105, "loss": 0.3457, "lr": 4.395427235410504e-06, "epoch": 1.517508482171436, "percentage": 30.35, "elapsed_time": "1:02:52", "remaining_time": "2:24:16", "throughput": 19772.56, "total_tokens": 74585280}
|
|
{"current_steps": 23710, "total_steps": 78105, "loss": 0.4364, "lr": 4.395062915530545e-06, "epoch": 1.5178285641124127, "percentage": 30.36, "elapsed_time": "1:02:52", "remaining_time": "2:24:15", "throughput": 19773.12, "total_tokens": 74600576}
|
|
{"current_steps": 23715, "total_steps": 78105, "loss": 0.2814, "lr": 4.394698501021015e-06, "epoch": 1.5181486460533895, "percentage": 30.36, "elapsed_time": "1:02:53", "remaining_time": "2:24:14", "throughput": 19773.69, "total_tokens": 74616000}
|
|
{"current_steps": 23720, "total_steps": 78105, "loss": 0.2533, "lr": 4.394333991900111e-06, "epoch": 1.5184687279943665, "percentage": 30.37, "elapsed_time": "1:02:54", "remaining_time": "2:24:13", "throughput": 19774.22, "total_tokens": 74631360}
|
|
{"current_steps": 23725, "total_steps": 78105, "loss": 0.4002, "lr": 4.393969388186036e-06, "epoch": 1.5187888099353435, "percentage": 30.38, "elapsed_time": "1:02:54", "remaining_time": "2:24:12", "throughput": 19774.86, "total_tokens": 74647680}
|
|
{"current_steps": 23730, "total_steps": 78105, "loss": 0.507, "lr": 4.3936046898969945e-06, "epoch": 1.5191088918763205, "percentage": 30.38, "elapsed_time": "1:02:55", "remaining_time": "2:24:11", "throughput": 19775.35, "total_tokens": 74662912}
|
|
{"current_steps": 23735, "total_steps": 78105, "loss": 0.3759, "lr": 4.393239897051197e-06, "epoch": 1.5194289738172972, "percentage": 30.39, "elapsed_time": "1:02:56", "remaining_time": "2:24:10", "throughput": 19776.05, "total_tokens": 74679744}
|
|
{"current_steps": 23740, "total_steps": 78105, "loss": 0.4432, "lr": 4.392875009666862e-06, "epoch": 1.519749055758274, "percentage": 30.39, "elapsed_time": "1:02:56", "remaining_time": "2:24:09", "throughput": 19776.8, "total_tokens": 74696768}
|
|
{"current_steps": 23745, "total_steps": 78105, "loss": 0.463, "lr": 4.392510027762208e-06, "epoch": 1.520069137699251, "percentage": 30.4, "elapsed_time": "1:02:57", "remaining_time": "2:24:08", "throughput": 19777.31, "total_tokens": 74711936}
|
|
{"current_steps": 23750, "total_steps": 78105, "loss": 0.3643, "lr": 4.392144951355461e-06, "epoch": 1.520389219640228, "percentage": 30.41, "elapsed_time": "1:02:58", "remaining_time": "2:24:07", "throughput": 19777.87, "total_tokens": 74727552}
|
|
{"current_steps": 23755, "total_steps": 78105, "loss": 0.3228, "lr": 4.391779780464851e-06, "epoch": 1.5207093015812048, "percentage": 30.41, "elapsed_time": "1:02:59", "remaining_time": "2:24:06", "throughput": 19778.4, "total_tokens": 74742592}
|
|
{"current_steps": 23760, "total_steps": 78105, "loss": 0.2362, "lr": 4.391414515108613e-06, "epoch": 1.5210293835221815, "percentage": 30.42, "elapsed_time": "1:02:59", "remaining_time": "2:24:05", "throughput": 19778.98, "total_tokens": 74758400}
|
|
{"current_steps": 23765, "total_steps": 78105, "loss": 0.3775, "lr": 4.391049155304986e-06, "epoch": 1.5213494654631585, "percentage": 30.43, "elapsed_time": "1:03:00", "remaining_time": "2:24:04", "throughput": 19779.67, "total_tokens": 74774784}
|
|
{"current_steps": 23770, "total_steps": 78105, "loss": 0.4361, "lr": 4.390683701072214e-06, "epoch": 1.5216695474041355, "percentage": 30.43, "elapsed_time": "1:03:01", "remaining_time": "2:24:02", "throughput": 19780.21, "total_tokens": 74789888}
|
|
{"current_steps": 23775, "total_steps": 78105, "loss": 0.3499, "lr": 4.390318152428546e-06, "epoch": 1.5219896293451125, "percentage": 30.44, "elapsed_time": "1:03:01", "remaining_time": "2:24:01", "throughput": 19780.83, "total_tokens": 74805696}
|
|
{"current_steps": 23780, "total_steps": 78105, "loss": 0.3708, "lr": 4.3899525093922354e-06, "epoch": 1.5223097112860893, "percentage": 30.45, "elapsed_time": "1:03:02", "remaining_time": "2:24:00", "throughput": 19781.36, "total_tokens": 74820864}
|
|
{"current_steps": 23785, "total_steps": 78105, "loss": 0.2826, "lr": 4.3895867719815425e-06, "epoch": 1.522629793227066, "percentage": 30.45, "elapsed_time": "1:03:03", "remaining_time": "2:23:59", "throughput": 19782.08, "total_tokens": 74837312}
|
|
{"current_steps": 23790, "total_steps": 78105, "loss": 0.2652, "lr": 4.3892209402147275e-06, "epoch": 1.522949875168043, "percentage": 30.46, "elapsed_time": "1:03:03", "remaining_time": "2:23:58", "throughput": 19782.63, "total_tokens": 74852736}
|
|
{"current_steps": 23795, "total_steps": 78105, "loss": 0.231, "lr": 4.388855014110059e-06, "epoch": 1.52326995710902, "percentage": 30.47, "elapsed_time": "1:03:04", "remaining_time": "2:23:57", "throughput": 19783.15, "total_tokens": 74867840}
|
|
{"current_steps": 23800, "total_steps": 78105, "loss": 0.2718, "lr": 4.388488993685811e-06, "epoch": 1.5235900390499968, "percentage": 30.47, "elapsed_time": "1:03:05", "remaining_time": "2:23:56", "throughput": 19783.61, "total_tokens": 74882368}
|
|
{"current_steps": 23805, "total_steps": 78105, "loss": 0.3443, "lr": 4.388122878960259e-06, "epoch": 1.5239101209909736, "percentage": 30.48, "elapsed_time": "1:03:05", "remaining_time": "2:23:55", "throughput": 19784.06, "total_tokens": 74896576}
|
|
{"current_steps": 23810, "total_steps": 78105, "loss": 0.2606, "lr": 4.387756669951685e-06, "epoch": 1.5242302029319506, "percentage": 30.48, "elapsed_time": "1:03:06", "remaining_time": "2:23:54", "throughput": 19784.86, "total_tokens": 74913792}
|
|
{"current_steps": 23815, "total_steps": 78105, "loss": 0.3876, "lr": 4.387390366678376e-06, "epoch": 1.5245502848729275, "percentage": 30.49, "elapsed_time": "1:03:07", "remaining_time": "2:23:53", "throughput": 19785.42, "total_tokens": 74929152}
|
|
{"current_steps": 23820, "total_steps": 78105, "loss": 0.2947, "lr": 4.387023969158623e-06, "epoch": 1.5248703668139043, "percentage": 30.5, "elapsed_time": "1:03:07", "remaining_time": "2:23:52", "throughput": 19786.13, "total_tokens": 74945792}
|
|
{"current_steps": 23825, "total_steps": 78105, "loss": 0.423, "lr": 4.3866574774107226e-06, "epoch": 1.5251904487548813, "percentage": 30.5, "elapsed_time": "1:03:08", "remaining_time": "2:23:51", "throughput": 19786.57, "total_tokens": 74959872}
|
|
{"current_steps": 23830, "total_steps": 78105, "loss": 0.321, "lr": 4.3862908914529745e-06, "epoch": 1.525510530695858, "percentage": 30.51, "elapsed_time": "1:03:09", "remaining_time": "2:23:50", "throughput": 19787.21, "total_tokens": 74976064}
|
|
{"current_steps": 23835, "total_steps": 78105, "loss": 0.3477, "lr": 4.385924211303685e-06, "epoch": 1.525830612636835, "percentage": 30.52, "elapsed_time": "1:03:09", "remaining_time": "2:23:48", "throughput": 19787.75, "total_tokens": 74990848}
|
|
{"current_steps": 23840, "total_steps": 78105, "loss": 0.5634, "lr": 4.385557436981164e-06, "epoch": 1.526150694577812, "percentage": 30.52, "elapsed_time": "1:03:10", "remaining_time": "2:23:47", "throughput": 19788.44, "total_tokens": 75007232}
|
|
{"current_steps": 23845, "total_steps": 78105, "loss": 0.3472, "lr": 4.385190568503725e-06, "epoch": 1.5264707765187888, "percentage": 30.53, "elapsed_time": "1:03:11", "remaining_time": "2:23:46", "throughput": 19789.21, "total_tokens": 75024192}
|
|
{"current_steps": 23850, "total_steps": 78105, "loss": 0.244, "lr": 4.3848236058896894e-06, "epoch": 1.5267908584597656, "percentage": 30.54, "elapsed_time": "1:03:11", "remaining_time": "2:23:45", "throughput": 19789.72, "total_tokens": 75039104}
|
|
{"current_steps": 23855, "total_steps": 78105, "loss": 0.2927, "lr": 4.38445654915738e-06, "epoch": 1.5271109404007426, "percentage": 30.54, "elapsed_time": "1:03:12", "remaining_time": "2:23:44", "throughput": 19790.28, "total_tokens": 75054464}
|
|
{"current_steps": 23860, "total_steps": 78105, "loss": 0.2484, "lr": 4.384089398325128e-06, "epoch": 1.5274310223417196, "percentage": 30.55, "elapsed_time": "1:03:13", "remaining_time": "2:23:43", "throughput": 19790.92, "total_tokens": 75070400}
|
|
{"current_steps": 23865, "total_steps": 78105, "loss": 0.3555, "lr": 4.3837221534112636e-06, "epoch": 1.5277511042826963, "percentage": 30.56, "elapsed_time": "1:03:13", "remaining_time": "2:23:42", "throughput": 19791.56, "total_tokens": 75086528}
|
|
{"current_steps": 23870, "total_steps": 78105, "loss": 0.2417, "lr": 4.383354814434127e-06, "epoch": 1.528071186223673, "percentage": 30.56, "elapsed_time": "1:03:14", "remaining_time": "2:23:41", "throughput": 19792.14, "total_tokens": 75101760}
|
|
{"current_steps": 23875, "total_steps": 78105, "loss": 0.2892, "lr": 4.382987381412061e-06, "epoch": 1.52839126816465, "percentage": 30.57, "elapsed_time": "1:03:15", "remaining_time": "2:23:40", "throughput": 19792.88, "total_tokens": 75118400}
|
|
{"current_steps": 23880, "total_steps": 78105, "loss": 0.4187, "lr": 4.382619854363414e-06, "epoch": 1.528711350105627, "percentage": 30.57, "elapsed_time": "1:03:15", "remaining_time": "2:23:39", "throughput": 19793.52, "total_tokens": 75134336}
|
|
{"current_steps": 23885, "total_steps": 78105, "loss": 0.2502, "lr": 4.382252233306537e-06, "epoch": 1.529031432046604, "percentage": 30.58, "elapsed_time": "1:03:16", "remaining_time": "2:23:38", "throughput": 19794.47, "total_tokens": 75153280}
|
|
{"current_steps": 23890, "total_steps": 78105, "loss": 0.4253, "lr": 4.381884518259789e-06, "epoch": 1.5293515139875808, "percentage": 30.59, "elapsed_time": "1:03:17", "remaining_time": "2:23:37", "throughput": 19794.93, "total_tokens": 75167808}
|
|
{"current_steps": 23895, "total_steps": 78105, "loss": 0.2872, "lr": 4.381516709241529e-06, "epoch": 1.5296715959285576, "percentage": 30.59, "elapsed_time": "1:03:17", "remaining_time": "2:23:36", "throughput": 19795.49, "total_tokens": 75183104}
|
|
{"current_steps": 23900, "total_steps": 78105, "loss": 0.2429, "lr": 4.381148806270126e-06, "epoch": 1.5299916778695346, "percentage": 30.6, "elapsed_time": "1:03:18", "remaining_time": "2:23:35", "throughput": 19796.12, "total_tokens": 75198912}
|
|
{"current_steps": 23905, "total_steps": 78105, "loss": 0.3455, "lr": 4.3807808093639495e-06, "epoch": 1.5303117598105116, "percentage": 30.61, "elapsed_time": "1:03:19", "remaining_time": "2:23:34", "throughput": 19796.73, "total_tokens": 75214784}
|
|
{"current_steps": 23910, "total_steps": 78105, "loss": 0.2963, "lr": 4.380412718541376e-06, "epoch": 1.5306318417514884, "percentage": 30.61, "elapsed_time": "1:03:20", "remaining_time": "2:23:33", "throughput": 19797.35, "total_tokens": 75230592}
|
|
{"current_steps": 23915, "total_steps": 78105, "loss": 0.3013, "lr": 4.380044533820787e-06, "epoch": 1.5309519236924651, "percentage": 30.62, "elapsed_time": "1:03:20", "remaining_time": "2:23:32", "throughput": 19798.12, "total_tokens": 75247872}
|
|
{"current_steps": 23920, "total_steps": 78105, "loss": 0.3078, "lr": 4.379676255220566e-06, "epoch": 1.5312720056334421, "percentage": 30.63, "elapsed_time": "1:03:21", "remaining_time": "2:23:31", "throughput": 19798.79, "total_tokens": 75264064}
|
|
{"current_steps": 23925, "total_steps": 78105, "loss": 0.286, "lr": 4.379307882759104e-06, "epoch": 1.5315920875744191, "percentage": 30.63, "elapsed_time": "1:03:22", "remaining_time": "2:23:30", "throughput": 19799.34, "total_tokens": 75279424}
|
|
{"current_steps": 23930, "total_steps": 78105, "loss": 0.405, "lr": 4.378939416454795e-06, "epoch": 1.531912169515396, "percentage": 30.64, "elapsed_time": "1:03:22", "remaining_time": "2:23:29", "throughput": 19800.17, "total_tokens": 75297024}
|
|
{"current_steps": 23935, "total_steps": 78105, "loss": 0.3025, "lr": 4.378570856326039e-06, "epoch": 1.5322322514563729, "percentage": 30.64, "elapsed_time": "1:03:23", "remaining_time": "2:23:28", "throughput": 19800.8, "total_tokens": 75312768}
|
|
{"current_steps": 23940, "total_steps": 78105, "loss": 0.3311, "lr": 4.3782022023912394e-06, "epoch": 1.5325523333973496, "percentage": 30.65, "elapsed_time": "1:03:24", "remaining_time": "2:23:27", "throughput": 19801.43, "total_tokens": 75328896}
|
|
{"current_steps": 23945, "total_steps": 78105, "loss": 0.3668, "lr": 4.377833454668805e-06, "epoch": 1.5328724153383266, "percentage": 30.66, "elapsed_time": "1:03:24", "remaining_time": "2:23:26", "throughput": 19802.06, "total_tokens": 75344640}
|
|
{"current_steps": 23950, "total_steps": 78105, "loss": 0.2484, "lr": 4.377464613177148e-06, "epoch": 1.5331924972793036, "percentage": 30.66, "elapsed_time": "1:03:25", "remaining_time": "2:23:24", "throughput": 19802.57, "total_tokens": 75359232}
|
|
{"current_steps": 23955, "total_steps": 78105, "loss": 0.3305, "lr": 4.377095677934689e-06, "epoch": 1.5335125792202804, "percentage": 30.67, "elapsed_time": "1:03:26", "remaining_time": "2:23:23", "throughput": 19803.24, "total_tokens": 75375488}
|
|
{"current_steps": 23960, "total_steps": 78105, "loss": 0.3971, "lr": 4.376726648959849e-06, "epoch": 1.5338326611612572, "percentage": 30.68, "elapsed_time": "1:03:26", "remaining_time": "2:23:22", "throughput": 19803.87, "total_tokens": 75391168}
|
|
{"current_steps": 23965, "total_steps": 78105, "loss": 0.3744, "lr": 4.376357526271055e-06, "epoch": 1.5341527431022342, "percentage": 30.68, "elapsed_time": "1:03:27", "remaining_time": "2:23:21", "throughput": 19804.5, "total_tokens": 75407232}
|
|
{"current_steps": 23970, "total_steps": 78105, "loss": 0.2905, "lr": 4.375988309886741e-06, "epoch": 1.5344728250432111, "percentage": 30.69, "elapsed_time": "1:03:28", "remaining_time": "2:23:20", "throughput": 19805.07, "total_tokens": 75422656}
|
|
{"current_steps": 23975, "total_steps": 78105, "loss": 0.3688, "lr": 4.375618999825341e-06, "epoch": 1.534792906984188, "percentage": 30.7, "elapsed_time": "1:03:28", "remaining_time": "2:23:19", "throughput": 19805.6, "total_tokens": 75437632}
|
|
{"current_steps": 23980, "total_steps": 78105, "loss": 0.3125, "lr": 4.375249596105299e-06, "epoch": 1.5351129889251647, "percentage": 30.7, "elapsed_time": "1:03:29", "remaining_time": "2:23:18", "throughput": 19806.31, "total_tokens": 75454464}
|
|
{"current_steps": 23985, "total_steps": 78105, "loss": 0.3745, "lr": 4.37488009874506e-06, "epoch": 1.5354330708661417, "percentage": 30.71, "elapsed_time": "1:03:30", "remaining_time": "2:23:17", "throughput": 19806.85, "total_tokens": 75469696}
|
|
{"current_steps": 23990, "total_steps": 78105, "loss": 0.4366, "lr": 4.3745105077630745e-06, "epoch": 1.5357531528071187, "percentage": 30.72, "elapsed_time": "1:03:30", "remaining_time": "2:23:16", "throughput": 19807.48, "total_tokens": 75485696}
|
|
{"current_steps": 23995, "total_steps": 78105, "loss": 0.4604, "lr": 4.374140823177798e-06, "epoch": 1.5360732347480957, "percentage": 30.72, "elapsed_time": "1:03:31", "remaining_time": "2:23:15", "throughput": 19808.12, "total_tokens": 75501632}
|
|
{"current_steps": 24000, "total_steps": 78105, "loss": 0.3418, "lr": 4.3737710450076905e-06, "epoch": 1.5363933166890724, "percentage": 30.73, "elapsed_time": "1:03:32", "remaining_time": "2:23:14", "throughput": 19808.76, "total_tokens": 75517696}
|
|
{"current_steps": 24005, "total_steps": 78105, "loss": 0.3233, "lr": 4.373401173271218e-06, "epoch": 1.5367133986300492, "percentage": 30.73, "elapsed_time": "1:03:33", "remaining_time": "2:23:13", "throughput": 19809.34, "total_tokens": 75533056}
|
|
{"current_steps": 24010, "total_steps": 78105, "loss": 0.4352, "lr": 4.373031207986849e-06, "epoch": 1.5370334805710262, "percentage": 30.74, "elapsed_time": "1:03:33", "remaining_time": "2:23:12", "throughput": 19809.87, "total_tokens": 75547904}
|
|
{"current_steps": 24015, "total_steps": 78105, "loss": 0.5884, "lr": 4.3726611491730565e-06, "epoch": 1.5373535625120032, "percentage": 30.75, "elapsed_time": "1:03:34", "remaining_time": "2:23:11", "throughput": 19810.46, "total_tokens": 75563584}
|
|
{"current_steps": 24020, "total_steps": 78105, "loss": 0.3698, "lr": 4.372290996848322e-06, "epoch": 1.53767364445298, "percentage": 30.75, "elapsed_time": "1:03:34", "remaining_time": "2:23:10", "throughput": 19811.01, "total_tokens": 75578688}
|
|
{"current_steps": 24025, "total_steps": 78105, "loss": 0.3471, "lr": 4.371920751031127e-06, "epoch": 1.5379937263939567, "percentage": 30.76, "elapsed_time": "1:03:35", "remaining_time": "2:23:09", "throughput": 19811.88, "total_tokens": 75596672}
|
|
{"current_steps": 24030, "total_steps": 78105, "loss": 0.2921, "lr": 4.37155041173996e-06, "epoch": 1.5383138083349337, "percentage": 30.77, "elapsed_time": "1:03:36", "remaining_time": "2:23:08", "throughput": 19812.5, "total_tokens": 75612544}
|
|
{"current_steps": 24035, "total_steps": 78105, "loss": 0.2571, "lr": 4.3711799789933144e-06, "epoch": 1.5386338902759107, "percentage": 30.77, "elapsed_time": "1:03:37", "remaining_time": "2:23:07", "throughput": 19813.22, "total_tokens": 75629504}
|
|
{"current_steps": 24040, "total_steps": 78105, "loss": 0.2932, "lr": 4.370809452809687e-06, "epoch": 1.5389539722168877, "percentage": 30.78, "elapsed_time": "1:03:37", "remaining_time": "2:23:06", "throughput": 19814.05, "total_tokens": 75647424}
|
|
{"current_steps": 24045, "total_steps": 78105, "loss": 0.4218, "lr": 4.37043883320758e-06, "epoch": 1.5392740541578644, "percentage": 30.79, "elapsed_time": "1:03:38", "remaining_time": "2:23:05", "throughput": 19814.59, "total_tokens": 75662528}
|
|
{"current_steps": 24050, "total_steps": 78105, "loss": 0.4146, "lr": 4.3700681202055e-06, "epoch": 1.5395941360988412, "percentage": 30.79, "elapsed_time": "1:03:39", "remaining_time": "2:23:04", "throughput": 19815.15, "total_tokens": 75677824}
|
|
{"current_steps": 24055, "total_steps": 78105, "loss": 0.2612, "lr": 4.36969731382196e-06, "epoch": 1.5399142180398182, "percentage": 30.8, "elapsed_time": "1:03:39", "remaining_time": "2:23:02", "throughput": 19815.71, "total_tokens": 75693056}
|
|
{"current_steps": 24060, "total_steps": 78105, "loss": 0.2937, "lr": 4.369326414075474e-06, "epoch": 1.5402342999807952, "percentage": 30.8, "elapsed_time": "1:03:40", "remaining_time": "2:23:01", "throughput": 19816.35, "total_tokens": 75709184}
|
|
{"current_steps": 24065, "total_steps": 78105, "loss": 0.3962, "lr": 4.3689554209845645e-06, "epoch": 1.540554381921772, "percentage": 30.81, "elapsed_time": "1:03:41", "remaining_time": "2:23:00", "throughput": 19816.95, "total_tokens": 75724928}
|
|
{"current_steps": 24070, "total_steps": 78105, "loss": 0.3195, "lr": 4.368584334567757e-06, "epoch": 1.5408744638627487, "percentage": 30.82, "elapsed_time": "1:03:41", "remaining_time": "2:22:59", "throughput": 19817.58, "total_tokens": 75740800}
|
|
{"current_steps": 24075, "total_steps": 78105, "loss": 0.3436, "lr": 4.36821315484358e-06, "epoch": 1.5411945458037257, "percentage": 30.82, "elapsed_time": "1:03:42", "remaining_time": "2:22:58", "throughput": 19818.22, "total_tokens": 75756672}
|
|
{"current_steps": 24080, "total_steps": 78105, "loss": 0.2751, "lr": 4.367841881830569e-06, "epoch": 1.5415146277447027, "percentage": 30.83, "elapsed_time": "1:03:43", "remaining_time": "2:22:57", "throughput": 19818.94, "total_tokens": 75773440}
|
|
{"current_steps": 24085, "total_steps": 78105, "loss": 0.2876, "lr": 4.367470515547264e-06, "epoch": 1.5418347096856795, "percentage": 30.84, "elapsed_time": "1:03:44", "remaining_time": "2:22:57", "throughput": 19819.95, "total_tokens": 75793280}
|
|
{"current_steps": 24090, "total_steps": 78105, "loss": 0.1805, "lr": 4.367099056012209e-06, "epoch": 1.5421547916266565, "percentage": 30.84, "elapsed_time": "1:03:44", "remaining_time": "2:22:55", "throughput": 19820.47, "total_tokens": 75808384}
|
|
{"current_steps": 24095, "total_steps": 78105, "loss": 0.2682, "lr": 4.366727503243953e-06, "epoch": 1.5424748735676332, "percentage": 30.85, "elapsed_time": "1:03:45", "remaining_time": "2:22:54", "throughput": 19821.15, "total_tokens": 75824768}
|
|
{"current_steps": 24100, "total_steps": 78105, "loss": 0.3056, "lr": 4.366355857261048e-06, "epoch": 1.5427949555086102, "percentage": 30.86, "elapsed_time": "1:03:46", "remaining_time": "2:22:53", "throughput": 19821.94, "total_tokens": 75842240}
|
|
{"current_steps": 24105, "total_steps": 78105, "loss": 0.1769, "lr": 4.3659841180820525e-06, "epoch": 1.5431150374495872, "percentage": 30.86, "elapsed_time": "1:03:46", "remaining_time": "2:22:52", "throughput": 19822.47, "total_tokens": 75857344}
|
|
{"current_steps": 24110, "total_steps": 78105, "loss": 0.2045, "lr": 4.365612285725531e-06, "epoch": 1.543435119390564, "percentage": 30.87, "elapsed_time": "1:03:47", "remaining_time": "2:22:51", "throughput": 19823.04, "total_tokens": 75873024}
|
|
{"current_steps": 24115, "total_steps": 78105, "loss": 0.2491, "lr": 4.365240360210048e-06, "epoch": 1.5437552013315408, "percentage": 30.88, "elapsed_time": "1:03:48", "remaining_time": "2:22:50", "throughput": 19823.62, "total_tokens": 75888256}
|
|
{"current_steps": 24120, "total_steps": 78105, "loss": 0.3277, "lr": 4.364868341554179e-06, "epoch": 1.5440752832725178, "percentage": 30.88, "elapsed_time": "1:03:48", "remaining_time": "2:22:49", "throughput": 19824.39, "total_tokens": 75905728}
|
|
{"current_steps": 24125, "total_steps": 78105, "loss": 0.2696, "lr": 4.364496229776497e-06, "epoch": 1.5443953652134947, "percentage": 30.89, "elapsed_time": "1:03:49", "remaining_time": "2:22:48", "throughput": 19824.92, "total_tokens": 75921024}
|
|
{"current_steps": 24130, "total_steps": 78105, "loss": 0.283, "lr": 4.364124024895586e-06, "epoch": 1.5447154471544715, "percentage": 30.89, "elapsed_time": "1:03:50", "remaining_time": "2:22:47", "throughput": 19825.37, "total_tokens": 75935616}
|
|
{"current_steps": 24135, "total_steps": 78105, "loss": 0.2721, "lr": 4.363751726930031e-06, "epoch": 1.5450355290954483, "percentage": 30.9, "elapsed_time": "1:03:50", "remaining_time": "2:22:46", "throughput": 19825.97, "total_tokens": 75951296}
|
|
{"current_steps": 24140, "total_steps": 78105, "loss": 0.3857, "lr": 4.363379335898423e-06, "epoch": 1.5453556110364253, "percentage": 30.91, "elapsed_time": "1:03:51", "remaining_time": "2:22:45", "throughput": 19826.47, "total_tokens": 75966272}
|
|
{"current_steps": 24145, "total_steps": 78105, "loss": 0.4448, "lr": 4.363006851819357e-06, "epoch": 1.5456756929774023, "percentage": 30.91, "elapsed_time": "1:03:52", "remaining_time": "2:22:44", "throughput": 19827.06, "total_tokens": 75982080}
|
|
{"current_steps": 24150, "total_steps": 78105, "loss": 0.3493, "lr": 4.362634274711432e-06, "epoch": 1.5459957749183793, "percentage": 30.92, "elapsed_time": "1:03:52", "remaining_time": "2:22:43", "throughput": 19827.65, "total_tokens": 75997760}
|
|
{"current_steps": 24155, "total_steps": 78105, "loss": 0.372, "lr": 4.362261604593254e-06, "epoch": 1.546315856859356, "percentage": 30.93, "elapsed_time": "1:03:53", "remaining_time": "2:22:42", "throughput": 19828.35, "total_tokens": 76014528}
|
|
{"current_steps": 24160, "total_steps": 78105, "loss": 0.4029, "lr": 4.3618888414834315e-06, "epoch": 1.5466359388003328, "percentage": 30.93, "elapsed_time": "1:03:54", "remaining_time": "2:22:41", "throughput": 19829.0, "total_tokens": 76030656}
|
|
{"current_steps": 24165, "total_steps": 78105, "loss": 0.328, "lr": 4.361515985400578e-06, "epoch": 1.5469560207413098, "percentage": 30.94, "elapsed_time": "1:03:54", "remaining_time": "2:22:40", "throughput": 19829.46, "total_tokens": 76045248}
|
|
{"current_steps": 24170, "total_steps": 78105, "loss": 0.3907, "lr": 4.361143036363313e-06, "epoch": 1.5472761026822868, "percentage": 30.95, "elapsed_time": "1:03:55", "remaining_time": "2:22:39", "throughput": 19830.1, "total_tokens": 76061440}
|
|
{"current_steps": 24175, "total_steps": 78105, "loss": 0.3093, "lr": 4.3607699943902594e-06, "epoch": 1.5475961846232635, "percentage": 30.95, "elapsed_time": "1:03:56", "remaining_time": "2:22:38", "throughput": 19830.71, "total_tokens": 76077504}
|
|
{"current_steps": 24180, "total_steps": 78105, "loss": 0.3813, "lr": 4.3603968595000446e-06, "epoch": 1.5479162665642403, "percentage": 30.96, "elapsed_time": "1:03:56", "remaining_time": "2:22:37", "throughput": 19831.2, "total_tokens": 76092160}
|
|
{"current_steps": 24185, "total_steps": 78105, "loss": 0.5046, "lr": 4.360023631711301e-06, "epoch": 1.5482363485052173, "percentage": 30.96, "elapsed_time": "1:03:57", "remaining_time": "2:22:36", "throughput": 19831.84, "total_tokens": 76108352}
|
|
{"current_steps": 24190, "total_steps": 78105, "loss": 0.3662, "lr": 4.359650311042666e-06, "epoch": 1.5485564304461943, "percentage": 30.97, "elapsed_time": "1:03:58", "remaining_time": "2:22:35", "throughput": 19832.52, "total_tokens": 76125184}
|
|
{"current_steps": 24195, "total_steps": 78105, "loss": 0.3409, "lr": 4.3592768975127805e-06, "epoch": 1.5488765123871713, "percentage": 30.98, "elapsed_time": "1:03:59", "remaining_time": "2:22:34", "throughput": 19833.22, "total_tokens": 76141504}
|
|
{"current_steps": 24200, "total_steps": 78105, "loss": 0.27, "lr": 4.3589033911402915e-06, "epoch": 1.549196594328148, "percentage": 30.98, "elapsed_time": "1:03:59", "remaining_time": "2:22:33", "throughput": 19833.82, "total_tokens": 76157440}
|
|
{"current_steps": 24205, "total_steps": 78105, "loss": 0.3269, "lr": 4.35852979194385e-06, "epoch": 1.5495166762691248, "percentage": 30.99, "elapsed_time": "1:04:00", "remaining_time": "2:22:31", "throughput": 19834.41, "total_tokens": 76173120}
|
|
{"current_steps": 24210, "total_steps": 78105, "loss": 0.4056, "lr": 4.358156099942112e-06, "epoch": 1.5498367582101018, "percentage": 31.0, "elapsed_time": "1:04:01", "remaining_time": "2:22:30", "throughput": 19834.94, "total_tokens": 76188096}
|
|
{"current_steps": 24215, "total_steps": 78105, "loss": 0.3998, "lr": 4.357782315153736e-06, "epoch": 1.5501568401510788, "percentage": 31.0, "elapsed_time": "1:04:01", "remaining_time": "2:22:29", "throughput": 19835.72, "total_tokens": 76205312}
|
|
{"current_steps": 24220, "total_steps": 78105, "loss": 0.3335, "lr": 4.357408437597388e-06, "epoch": 1.5504769220920556, "percentage": 31.01, "elapsed_time": "1:04:02", "remaining_time": "2:22:28", "throughput": 19836.3, "total_tokens": 76220672}
|
|
{"current_steps": 24225, "total_steps": 78105, "loss": 0.3525, "lr": 4.357034467291737e-06, "epoch": 1.5507970040330323, "percentage": 31.02, "elapsed_time": "1:04:03", "remaining_time": "2:22:27", "throughput": 19836.82, "total_tokens": 76235392}
|
|
{"current_steps": 24230, "total_steps": 78105, "loss": 0.3526, "lr": 4.356660404255459e-06, "epoch": 1.5511170859740093, "percentage": 31.02, "elapsed_time": "1:04:03", "remaining_time": "2:22:26", "throughput": 19837.42, "total_tokens": 76251520}
|
|
{"current_steps": 24235, "total_steps": 78105, "loss": 0.3537, "lr": 4.356286248507231e-06, "epoch": 1.5514371679149863, "percentage": 31.03, "elapsed_time": "1:04:04", "remaining_time": "2:22:25", "throughput": 19838.1, "total_tokens": 76267904}
|
|
{"current_steps": 24240, "total_steps": 78105, "loss": 0.4103, "lr": 4.355912000065737e-06, "epoch": 1.551757249855963, "percentage": 31.04, "elapsed_time": "1:04:05", "remaining_time": "2:22:24", "throughput": 19838.65, "total_tokens": 76283008}
|
|
{"current_steps": 24245, "total_steps": 78105, "loss": 0.3751, "lr": 4.355537658949665e-06, "epoch": 1.5520773317969399, "percentage": 31.04, "elapsed_time": "1:04:05", "remaining_time": "2:22:23", "throughput": 19839.17, "total_tokens": 76297728}
|
|
{"current_steps": 24250, "total_steps": 78105, "loss": 0.3512, "lr": 4.355163225177707e-06, "epoch": 1.5523974137379168, "percentage": 31.05, "elapsed_time": "1:04:06", "remaining_time": "2:22:22", "throughput": 19839.64, "total_tokens": 76312384}
|
|
{"current_steps": 24255, "total_steps": 78105, "loss": 0.2696, "lr": 4.354788698768561e-06, "epoch": 1.5527174956788938, "percentage": 31.05, "elapsed_time": "1:04:07", "remaining_time": "2:22:21", "throughput": 19840.29, "total_tokens": 76328320}
|
|
{"current_steps": 24260, "total_steps": 78105, "loss": 0.3207, "lr": 4.354414079740928e-06, "epoch": 1.5530375776198708, "percentage": 31.06, "elapsed_time": "1:04:07", "remaining_time": "2:22:20", "throughput": 19840.9, "total_tokens": 76344320}
|
|
{"current_steps": 24265, "total_steps": 78105, "loss": 0.3513, "lr": 4.354039368113515e-06, "epoch": 1.5533576595608476, "percentage": 31.07, "elapsed_time": "1:04:08", "remaining_time": "2:22:19", "throughput": 19841.55, "total_tokens": 76360256}
|
|
{"current_steps": 24270, "total_steps": 78105, "loss": 0.3803, "lr": 4.353664563905034e-06, "epoch": 1.5536777415018244, "percentage": 31.07, "elapsed_time": "1:04:09", "remaining_time": "2:22:18", "throughput": 19842.03, "total_tokens": 76374848}
|
|
{"current_steps": 24275, "total_steps": 78105, "loss": 0.41, "lr": 4.353289667134201e-06, "epoch": 1.5539978234428014, "percentage": 31.08, "elapsed_time": "1:04:09", "remaining_time": "2:22:17", "throughput": 19842.65, "total_tokens": 76390912}
|
|
{"current_steps": 24280, "total_steps": 78105, "loss": 0.2854, "lr": 4.352914677819733e-06, "epoch": 1.5543179053837783, "percentage": 31.09, "elapsed_time": "1:04:10", "remaining_time": "2:22:15", "throughput": 19843.23, "total_tokens": 76406528}
|
|
{"current_steps": 24285, "total_steps": 78105, "loss": 0.3075, "lr": 4.352539595980359e-06, "epoch": 1.554637987324755, "percentage": 31.09, "elapsed_time": "1:04:11", "remaining_time": "2:22:14", "throughput": 19843.78, "total_tokens": 76421760}
|
|
{"current_steps": 24290, "total_steps": 78105, "loss": 0.3739, "lr": 4.352164421634807e-06, "epoch": 1.5549580692657319, "percentage": 31.1, "elapsed_time": "1:04:11", "remaining_time": "2:22:13", "throughput": 19844.23, "total_tokens": 76436224}
|
|
{"current_steps": 24295, "total_steps": 78105, "loss": 0.341, "lr": 4.351789154801811e-06, "epoch": 1.5552781512067089, "percentage": 31.11, "elapsed_time": "1:04:12", "remaining_time": "2:22:12", "throughput": 19844.82, "total_tokens": 76452224}
|
|
{"current_steps": 24300, "total_steps": 78105, "loss": 0.3821, "lr": 4.351413795500111e-06, "epoch": 1.5555982331476859, "percentage": 31.11, "elapsed_time": "1:04:13", "remaining_time": "2:22:11", "throughput": 19845.27, "total_tokens": 76466496}
|
|
{"current_steps": 24305, "total_steps": 78105, "loss": 0.3833, "lr": 4.3510383437484495e-06, "epoch": 1.5559183150886629, "percentage": 31.12, "elapsed_time": "1:04:13", "remaining_time": "2:22:10", "throughput": 19845.82, "total_tokens": 76481408}
|
|
{"current_steps": 24310, "total_steps": 78105, "loss": 0.2321, "lr": 4.350662799565574e-06, "epoch": 1.5562383970296396, "percentage": 31.12, "elapsed_time": "1:04:14", "remaining_time": "2:22:09", "throughput": 19846.61, "total_tokens": 76499136}
|
|
{"current_steps": 24315, "total_steps": 78105, "loss": 0.5105, "lr": 4.350287162970239e-06, "epoch": 1.5565584789706164, "percentage": 31.13, "elapsed_time": "1:04:15", "remaining_time": "2:22:08", "throughput": 19847.3, "total_tokens": 76515968}
|
|
{"current_steps": 24320, "total_steps": 78105, "loss": 0.4383, "lr": 4.3499114339812e-06, "epoch": 1.5568785609115934, "percentage": 31.14, "elapsed_time": "1:04:15", "remaining_time": "2:22:07", "throughput": 19848.0, "total_tokens": 76532928}
|
|
{"current_steps": 24325, "total_steps": 78105, "loss": 0.4107, "lr": 4.349535612617221e-06, "epoch": 1.5571986428525704, "percentage": 31.14, "elapsed_time": "1:04:16", "remaining_time": "2:22:06", "throughput": 19848.51, "total_tokens": 76547776}
|
|
{"current_steps": 24330, "total_steps": 78105, "loss": 0.1995, "lr": 4.349159698897066e-06, "epoch": 1.5575187247935471, "percentage": 31.15, "elapsed_time": "1:04:17", "remaining_time": "2:22:05", "throughput": 19848.97, "total_tokens": 76562304}
|
|
{"current_steps": 24335, "total_steps": 78105, "loss": 0.3399, "lr": 4.348783692839509e-06, "epoch": 1.557838806734524, "percentage": 31.16, "elapsed_time": "1:04:17", "remaining_time": "2:22:04", "throughput": 19849.64, "total_tokens": 76578816}
|
|
{"current_steps": 24340, "total_steps": 78105, "loss": 0.3594, "lr": 4.348407594463323e-06, "epoch": 1.558158888675501, "percentage": 31.16, "elapsed_time": "1:04:18", "remaining_time": "2:22:03", "throughput": 19850.32, "total_tokens": 76595136}
|
|
{"current_steps": 24345, "total_steps": 78105, "loss": 0.382, "lr": 4.34803140378729e-06, "epoch": 1.5584789706164779, "percentage": 31.17, "elapsed_time": "1:04:19", "remaining_time": "2:22:02", "throughput": 19850.79, "total_tokens": 76609792}
|
|
{"current_steps": 24350, "total_steps": 78105, "loss": 0.4403, "lr": 4.347655120830195e-06, "epoch": 1.5587990525574547, "percentage": 31.18, "elapsed_time": "1:04:19", "remaining_time": "2:22:01", "throughput": 19851.29, "total_tokens": 76624512}
|
|
{"current_steps": 24355, "total_steps": 78105, "loss": 0.4182, "lr": 4.347278745610828e-06, "epoch": 1.5591191344984316, "percentage": 31.18, "elapsed_time": "1:04:20", "remaining_time": "2:22:00", "throughput": 19851.89, "total_tokens": 76640256}
|
|
{"current_steps": 24360, "total_steps": 78105, "loss": 0.3146, "lr": 4.346902278147982e-06, "epoch": 1.5594392164394084, "percentage": 31.19, "elapsed_time": "1:04:21", "remaining_time": "2:21:59", "throughput": 19852.54, "total_tokens": 76656640}
|
|
{"current_steps": 24365, "total_steps": 78105, "loss": 0.3663, "lr": 4.346525718460456e-06, "epoch": 1.5597592983803854, "percentage": 31.2, "elapsed_time": "1:04:21", "remaining_time": "2:21:58", "throughput": 19853.15, "total_tokens": 76672192}
|
|
{"current_steps": 24370, "total_steps": 78105, "loss": 0.3104, "lr": 4.346149066567054e-06, "epoch": 1.5600793803213624, "percentage": 31.2, "elapsed_time": "1:04:22", "remaining_time": "2:21:56", "throughput": 19853.78, "total_tokens": 76688064}
|
|
{"current_steps": 24375, "total_steps": 78105, "loss": 0.3194, "lr": 4.345772322486584e-06, "epoch": 1.5603994622623392, "percentage": 31.21, "elapsed_time": "1:04:23", "remaining_time": "2:21:55", "throughput": 19854.2, "total_tokens": 76702208}
|
|
{"current_steps": 24380, "total_steps": 78105, "loss": 0.3413, "lr": 4.3453954862378586e-06, "epoch": 1.560719544203316, "percentage": 31.21, "elapsed_time": "1:04:23", "remaining_time": "2:21:54", "throughput": 19854.87, "total_tokens": 76718464}
|
|
{"current_steps": 24385, "total_steps": 78105, "loss": 0.3397, "lr": 4.345018557839695e-06, "epoch": 1.561039626144293, "percentage": 31.22, "elapsed_time": "1:04:24", "remaining_time": "2:21:53", "throughput": 19855.5, "total_tokens": 76734400}
|
|
{"current_steps": 24390, "total_steps": 78105, "loss": 0.3075, "lr": 4.344641537310915e-06, "epoch": 1.56135970808527, "percentage": 31.23, "elapsed_time": "1:04:25", "remaining_time": "2:21:52", "throughput": 19856.19, "total_tokens": 76750720}
|
|
{"current_steps": 24395, "total_steps": 78105, "loss": 0.2896, "lr": 4.344264424670345e-06, "epoch": 1.5616797900262467, "percentage": 31.23, "elapsed_time": "1:04:26", "remaining_time": "2:21:51", "throughput": 19857.0, "total_tokens": 76768320}
|
|
{"current_steps": 24400, "total_steps": 78105, "loss": 0.3499, "lr": 4.343887219936815e-06, "epoch": 1.5619998719672235, "percentage": 31.24, "elapsed_time": "1:04:26", "remaining_time": "2:21:50", "throughput": 19857.56, "total_tokens": 76783936}
|
|
{"current_steps": 24405, "total_steps": 78105, "loss": 0.246, "lr": 4.343509923129163e-06, "epoch": 1.5623199539082004, "percentage": 31.25, "elapsed_time": "1:04:27", "remaining_time": "2:21:49", "throughput": 19858.21, "total_tokens": 76800256}
|
|
{"current_steps": 24410, "total_steps": 78105, "loss": 0.4125, "lr": 4.343132534266228e-06, "epoch": 1.5626400358491774, "percentage": 31.25, "elapsed_time": "1:04:28", "remaining_time": "2:21:48", "throughput": 19858.77, "total_tokens": 76815616}
|
|
{"current_steps": 24415, "total_steps": 78105, "loss": 0.2967, "lr": 4.342755053366854e-06, "epoch": 1.5629601177901544, "percentage": 31.26, "elapsed_time": "1:04:28", "remaining_time": "2:21:47", "throughput": 19859.25, "total_tokens": 76830080}
|
|
{"current_steps": 24420, "total_steps": 78105, "loss": 0.4358, "lr": 4.342377480449892e-06, "epoch": 1.5632801997311312, "percentage": 31.27, "elapsed_time": "1:04:29", "remaining_time": "2:21:46", "throughput": 19859.96, "total_tokens": 76846656}
|
|
{"current_steps": 24425, "total_steps": 78105, "loss": 0.3601, "lr": 4.341999815534194e-06, "epoch": 1.563600281672108, "percentage": 31.27, "elapsed_time": "1:04:30", "remaining_time": "2:21:45", "throughput": 19860.5, "total_tokens": 76861888}
|
|
{"current_steps": 24430, "total_steps": 78105, "loss": 0.3554, "lr": 4.341622058638621e-06, "epoch": 1.563920363613085, "percentage": 31.28, "elapsed_time": "1:04:30", "remaining_time": "2:21:44", "throughput": 19861.26, "total_tokens": 76879360}
|
|
{"current_steps": 24435, "total_steps": 78105, "loss": 0.4199, "lr": 4.341244209782035e-06, "epoch": 1.564240445554062, "percentage": 31.28, "elapsed_time": "1:04:31", "remaining_time": "2:21:43", "throughput": 19861.75, "total_tokens": 76894208}
|
|
{"current_steps": 24440, "total_steps": 78105, "loss": 0.382, "lr": 4.340866268983303e-06, "epoch": 1.5645605274950387, "percentage": 31.29, "elapsed_time": "1:04:32", "remaining_time": "2:21:42", "throughput": 19862.36, "total_tokens": 76910016}
|
|
{"current_steps": 24445, "total_steps": 78105, "loss": 0.2945, "lr": 4.340488236261299e-06, "epoch": 1.5648806094360155, "percentage": 31.3, "elapsed_time": "1:04:32", "remaining_time": "2:21:41", "throughput": 19863.06, "total_tokens": 76926592}
|
|
{"current_steps": 24450, "total_steps": 78105, "loss": 0.3547, "lr": 4.340110111634899e-06, "epoch": 1.5652006913769925, "percentage": 31.3, "elapsed_time": "1:04:33", "remaining_time": "2:21:40", "throughput": 19863.68, "total_tokens": 76942464}
|
|
{"current_steps": 24455, "total_steps": 78105, "loss": 0.4236, "lr": 4.339731895122984e-06, "epoch": 1.5655207733179695, "percentage": 31.31, "elapsed_time": "1:04:34", "remaining_time": "2:21:39", "throughput": 19864.22, "total_tokens": 76957568}
|
|
{"current_steps": 24460, "total_steps": 78105, "loss": 0.2503, "lr": 4.339353586744442e-06, "epoch": 1.5658408552589465, "percentage": 31.32, "elapsed_time": "1:04:34", "remaining_time": "2:21:38", "throughput": 19864.93, "total_tokens": 76974336}
|
|
{"current_steps": 24465, "total_steps": 78105, "loss": 0.2646, "lr": 4.338975186518162e-06, "epoch": 1.5661609371999232, "percentage": 31.32, "elapsed_time": "1:04:35", "remaining_time": "2:21:37", "throughput": 19865.53, "total_tokens": 76990208}
|
|
{"current_steps": 24470, "total_steps": 78105, "loss": 0.2416, "lr": 4.338596694463041e-06, "epoch": 1.5664810191409, "percentage": 31.33, "elapsed_time": "1:04:36", "remaining_time": "2:21:36", "throughput": 19865.99, "total_tokens": 77005120}
|
|
{"current_steps": 24475, "total_steps": 78105, "loss": 0.3828, "lr": 4.338218110597977e-06, "epoch": 1.566801101081877, "percentage": 31.34, "elapsed_time": "1:04:36", "remaining_time": "2:21:35", "throughput": 19866.48, "total_tokens": 77019968}
|
|
{"current_steps": 24480, "total_steps": 78105, "loss": 0.4541, "lr": 4.337839434941876e-06, "epoch": 1.567121183022854, "percentage": 31.34, "elapsed_time": "1:04:37", "remaining_time": "2:21:34", "throughput": 19867.34, "total_tokens": 77038336}
|
|
{"current_steps": 24485, "total_steps": 78105, "loss": 0.2851, "lr": 4.337460667513647e-06, "epoch": 1.5674412649638307, "percentage": 31.35, "elapsed_time": "1:04:38", "remaining_time": "2:21:33", "throughput": 19867.89, "total_tokens": 77053760}
|
|
{"current_steps": 24490, "total_steps": 78105, "loss": 0.2864, "lr": 4.337081808332203e-06, "epoch": 1.5677613469048075, "percentage": 31.36, "elapsed_time": "1:04:38", "remaining_time": "2:21:32", "throughput": 19868.52, "total_tokens": 77069632}
|
|
{"current_steps": 24495, "total_steps": 78105, "loss": 0.2691, "lr": 4.3367028574164615e-06, "epoch": 1.5680814288457845, "percentage": 31.36, "elapsed_time": "1:04:39", "remaining_time": "2:21:31", "throughput": 19869.1, "total_tokens": 77084992}
|
|
{"current_steps": 24500, "total_steps": 78105, "loss": 0.2951, "lr": 4.3363238147853474e-06, "epoch": 1.5684015107867615, "percentage": 31.37, "elapsed_time": "1:04:40", "remaining_time": "2:21:29", "throughput": 19869.6, "total_tokens": 77099840}
|
|
{"current_steps": 24505, "total_steps": 78105, "loss": 0.291, "lr": 4.335944680457787e-06, "epoch": 1.5687215927277383, "percentage": 31.37, "elapsed_time": "1:04:40", "remaining_time": "2:21:28", "throughput": 19870.21, "total_tokens": 77115712}
|
|
{"current_steps": 24510, "total_steps": 78105, "loss": 0.3405, "lr": 4.335565454452713e-06, "epoch": 1.569041674668715, "percentage": 31.38, "elapsed_time": "1:04:41", "remaining_time": "2:21:27", "throughput": 19870.79, "total_tokens": 77131392}
|
|
{"current_steps": 24515, "total_steps": 78105, "loss": 0.294, "lr": 4.33518613678906e-06, "epoch": 1.569361756609692, "percentage": 31.39, "elapsed_time": "1:04:42", "remaining_time": "2:21:26", "throughput": 19871.37, "total_tokens": 77146944}
|
|
{"current_steps": 24520, "total_steps": 78105, "loss": 0.3917, "lr": 4.334806727485771e-06, "epoch": 1.569681838550669, "percentage": 31.39, "elapsed_time": "1:04:42", "remaining_time": "2:21:25", "throughput": 19871.9, "total_tokens": 77162240}
|
|
{"current_steps": 24525, "total_steps": 78105, "loss": 0.387, "lr": 4.33442722656179e-06, "epoch": 1.570001920491646, "percentage": 31.4, "elapsed_time": "1:04:43", "remaining_time": "2:21:24", "throughput": 19872.51, "total_tokens": 77178304}
|
|
{"current_steps": 24530, "total_steps": 78105, "loss": 0.3978, "lr": 4.33404763403607e-06, "epoch": 1.5703220024326228, "percentage": 31.41, "elapsed_time": "1:04:44", "remaining_time": "2:21:23", "throughput": 19873.38, "total_tokens": 77197056}
|
|
{"current_steps": 24535, "total_steps": 78105, "loss": 0.3741, "lr": 4.333667949927564e-06, "epoch": 1.5706420843735995, "percentage": 31.41, "elapsed_time": "1:04:45", "remaining_time": "2:21:22", "throughput": 19874.01, "total_tokens": 77213120}
|
|
{"current_steps": 24540, "total_steps": 78105, "loss": 0.2273, "lr": 4.3332881742552315e-06, "epoch": 1.5709621663145765, "percentage": 31.42, "elapsed_time": "1:04:45", "remaining_time": "2:21:21", "throughput": 19874.66, "total_tokens": 77229376}
|
|
{"current_steps": 24545, "total_steps": 78105, "loss": 0.2782, "lr": 4.332908307038037e-06, "epoch": 1.5712822482555535, "percentage": 31.43, "elapsed_time": "1:04:46", "remaining_time": "2:21:20", "throughput": 19875.23, "total_tokens": 77244800}
|
|
{"current_steps": 24550, "total_steps": 78105, "loss": 0.435, "lr": 4.33252834829495e-06, "epoch": 1.5716023301965303, "percentage": 31.43, "elapsed_time": "1:04:47", "remaining_time": "2:21:19", "throughput": 19875.83, "total_tokens": 77260864}
|
|
{"current_steps": 24555, "total_steps": 78105, "loss": 0.378, "lr": 4.332148298044941e-06, "epoch": 1.571922412137507, "percentage": 31.44, "elapsed_time": "1:04:47", "remaining_time": "2:21:18", "throughput": 19876.4, "total_tokens": 77276480}
|
|
{"current_steps": 24560, "total_steps": 78105, "loss": 0.3058, "lr": 4.33176815630699e-06, "epoch": 1.572242494078484, "percentage": 31.44, "elapsed_time": "1:04:48", "remaining_time": "2:21:17", "throughput": 19876.93, "total_tokens": 77291328}
|
|
{"current_steps": 24565, "total_steps": 78105, "loss": 0.2797, "lr": 4.331387923100078e-06, "epoch": 1.572562576019461, "percentage": 31.45, "elapsed_time": "1:04:49", "remaining_time": "2:21:16", "throughput": 19877.43, "total_tokens": 77306240}
|
|
{"current_steps": 24570, "total_steps": 78105, "loss": 0.3244, "lr": 4.331007598443193e-06, "epoch": 1.572882657960438, "percentage": 31.46, "elapsed_time": "1:04:49", "remaining_time": "2:21:15", "throughput": 19877.97, "total_tokens": 77321856}
|
|
{"current_steps": 24575, "total_steps": 78105, "loss": 0.3209, "lr": 4.3306271823553255e-06, "epoch": 1.5732027399014148, "percentage": 31.46, "elapsed_time": "1:04:50", "remaining_time": "2:21:14", "throughput": 19878.5, "total_tokens": 77337344}
|
|
{"current_steps": 24580, "total_steps": 78105, "loss": 0.2657, "lr": 4.330246674855473e-06, "epoch": 1.5735228218423916, "percentage": 31.47, "elapsed_time": "1:04:51", "remaining_time": "2:21:13", "throughput": 19879.09, "total_tokens": 77353216}
|
|
{"current_steps": 24585, "total_steps": 78105, "loss": 0.3113, "lr": 4.329866075962634e-06, "epoch": 1.5738429037833686, "percentage": 31.48, "elapsed_time": "1:04:51", "remaining_time": "2:21:12", "throughput": 19879.71, "total_tokens": 77369152}
|
|
{"current_steps": 24590, "total_steps": 78105, "loss": 0.4957, "lr": 4.329485385695815e-06, "epoch": 1.5741629857243455, "percentage": 31.48, "elapsed_time": "1:04:52", "remaining_time": "2:21:11", "throughput": 19880.21, "total_tokens": 77384192}
|
|
{"current_steps": 24595, "total_steps": 78105, "loss": 0.3878, "lr": 4.329104604074025e-06, "epoch": 1.5744830676653223, "percentage": 31.49, "elapsed_time": "1:04:53", "remaining_time": "2:21:10", "throughput": 19880.81, "total_tokens": 77400192}
|
|
{"current_steps": 24600, "total_steps": 78105, "loss": 0.3286, "lr": 4.3287237311162785e-06, "epoch": 1.574803149606299, "percentage": 31.5, "elapsed_time": "1:04:53", "remaining_time": "2:21:09", "throughput": 19881.56, "total_tokens": 77417472}
|
|
{"current_steps": 24605, "total_steps": 78105, "loss": 0.3442, "lr": 4.328342766841594e-06, "epoch": 1.575123231547276, "percentage": 31.5, "elapsed_time": "1:04:54", "remaining_time": "2:21:08", "throughput": 19882.12, "total_tokens": 77432896}
|
|
{"current_steps": 24610, "total_steps": 78105, "loss": 0.3132, "lr": 4.327961711268996e-06, "epoch": 1.575443313488253, "percentage": 31.51, "elapsed_time": "1:04:55", "remaining_time": "2:21:07", "throughput": 19882.65, "total_tokens": 77448064}
|
|
{"current_steps": 24615, "total_steps": 78105, "loss": 0.3277, "lr": 4.327580564417512e-06, "epoch": 1.5757633954292298, "percentage": 31.52, "elapsed_time": "1:04:55", "remaining_time": "2:21:06", "throughput": 19883.32, "total_tokens": 77464448}
|
|
{"current_steps": 24620, "total_steps": 78105, "loss": 0.2517, "lr": 4.327199326306173e-06, "epoch": 1.5760834773702068, "percentage": 31.52, "elapsed_time": "1:04:56", "remaining_time": "2:21:05", "throughput": 19883.89, "total_tokens": 77479936}
|
|
{"current_steps": 24625, "total_steps": 78105, "loss": 0.3847, "lr": 4.326817996954018e-06, "epoch": 1.5764035593111836, "percentage": 31.53, "elapsed_time": "1:04:57", "remaining_time": "2:21:04", "throughput": 19884.48, "total_tokens": 77495552}
|
|
{"current_steps": 24630, "total_steps": 78105, "loss": 0.4093, "lr": 4.326436576380087e-06, "epoch": 1.5767236412521606, "percentage": 31.53, "elapsed_time": "1:04:57", "remaining_time": "2:21:03", "throughput": 19885.07, "total_tokens": 77511552}
|
|
{"current_steps": 24635, "total_steps": 78105, "loss": 0.3874, "lr": 4.3260550646034276e-06, "epoch": 1.5770437231931376, "percentage": 31.54, "elapsed_time": "1:04:58", "remaining_time": "2:21:01", "throughput": 19885.66, "total_tokens": 77527232}
|
|
{"current_steps": 24640, "total_steps": 78105, "loss": 0.2757, "lr": 4.3256734616430896e-06, "epoch": 1.5773638051341143, "percentage": 31.55, "elapsed_time": "1:04:59", "remaining_time": "2:21:00", "throughput": 19886.18, "total_tokens": 77542208}
|
|
{"current_steps": 24645, "total_steps": 78105, "loss": 0.3221, "lr": 4.3252917675181286e-06, "epoch": 1.577683887075091, "percentage": 31.55, "elapsed_time": "1:04:59", "remaining_time": "2:20:59", "throughput": 19886.74, "total_tokens": 77557888}
|
|
{"current_steps": 24650, "total_steps": 78105, "loss": 0.3624, "lr": 4.324909982247604e-06, "epoch": 1.578003969016068, "percentage": 31.56, "elapsed_time": "1:05:00", "remaining_time": "2:20:58", "throughput": 19887.23, "total_tokens": 77573056}
|
|
{"current_steps": 24655, "total_steps": 78105, "loss": 0.3257, "lr": 4.324528105850581e-06, "epoch": 1.578324050957045, "percentage": 31.57, "elapsed_time": "1:05:01", "remaining_time": "2:20:57", "throughput": 19887.82, "total_tokens": 77589056}
|
|
{"current_steps": 24660, "total_steps": 78105, "loss": 0.2694, "lr": 4.324146138346127e-06, "epoch": 1.5786441328980219, "percentage": 31.57, "elapsed_time": "1:05:02", "remaining_time": "2:20:56", "throughput": 19888.52, "total_tokens": 77606016}
|
|
{"current_steps": 24665, "total_steps": 78105, "loss": 0.4408, "lr": 4.323764079753318e-06, "epoch": 1.5789642148389986, "percentage": 31.58, "elapsed_time": "1:05:02", "remaining_time": "2:20:55", "throughput": 19889.07, "total_tokens": 77621440}
|
|
{"current_steps": 24670, "total_steps": 78105, "loss": 0.3611, "lr": 4.323381930091229e-06, "epoch": 1.5792842967799756, "percentage": 31.59, "elapsed_time": "1:05:03", "remaining_time": "2:20:54", "throughput": 19889.63, "total_tokens": 77637184}
|
|
{"current_steps": 24675, "total_steps": 78105, "loss": 0.4302, "lr": 4.322999689378945e-06, "epoch": 1.5796043787209526, "percentage": 31.59, "elapsed_time": "1:05:04", "remaining_time": "2:20:54", "throughput": 19889.69, "total_tokens": 77653760}
|
|
{"current_steps": 24680, "total_steps": 78105, "loss": 0.5172, "lr": 4.322617357635553e-06, "epoch": 1.5799244606619296, "percentage": 31.6, "elapsed_time": "1:05:04", "remaining_time": "2:20:52", "throughput": 19890.17, "total_tokens": 77668480}
|
|
{"current_steps": 24685, "total_steps": 78105, "loss": 0.2008, "lr": 4.3222349348801415e-06, "epoch": 1.5802445426029064, "percentage": 31.6, "elapsed_time": "1:05:05", "remaining_time": "2:20:51", "throughput": 19890.94, "total_tokens": 77685632}
|
|
{"current_steps": 24690, "total_steps": 78105, "loss": 0.3615, "lr": 4.321852421131811e-06, "epoch": 1.5805646245438831, "percentage": 31.61, "elapsed_time": "1:05:06", "remaining_time": "2:20:50", "throughput": 19891.57, "total_tokens": 77701952}
|
|
{"current_steps": 24695, "total_steps": 78105, "loss": 0.1978, "lr": 4.321469816409659e-06, "epoch": 1.5808847064848601, "percentage": 31.62, "elapsed_time": "1:05:06", "remaining_time": "2:20:49", "throughput": 19892.11, "total_tokens": 77717056}
|
|
{"current_steps": 24700, "total_steps": 78105, "loss": 0.4455, "lr": 4.3210871207327935e-06, "epoch": 1.5812047884258371, "percentage": 31.62, "elapsed_time": "1:05:07", "remaining_time": "2:20:48", "throughput": 19892.7, "total_tokens": 77732800}
|
|
{"current_steps": 24705, "total_steps": 78105, "loss": 0.3804, "lr": 4.320704334120322e-06, "epoch": 1.5815248703668139, "percentage": 31.63, "elapsed_time": "1:05:08", "remaining_time": "2:20:47", "throughput": 19893.23, "total_tokens": 77748224}
|
|
{"current_steps": 24710, "total_steps": 78105, "loss": 0.3229, "lr": 4.320321456591359e-06, "epoch": 1.5818449523077907, "percentage": 31.64, "elapsed_time": "1:05:08", "remaining_time": "2:20:46", "throughput": 19893.91, "total_tokens": 77764928}
|
|
{"current_steps": 24715, "total_steps": 78105, "loss": 0.3176, "lr": 4.3199384881650255e-06, "epoch": 1.5821650342487676, "percentage": 31.64, "elapsed_time": "1:05:09", "remaining_time": "2:20:45", "throughput": 19894.57, "total_tokens": 77781184}
|
|
{"current_steps": 24720, "total_steps": 78105, "loss": 0.216, "lr": 4.319555428860443e-06, "epoch": 1.5824851161897446, "percentage": 31.65, "elapsed_time": "1:05:10", "remaining_time": "2:20:44", "throughput": 19895.23, "total_tokens": 77797888}
|
|
{"current_steps": 24725, "total_steps": 78105, "loss": 0.2957, "lr": 4.31917227869674e-06, "epoch": 1.5828051981307216, "percentage": 31.66, "elapsed_time": "1:05:11", "remaining_time": "2:20:43", "throughput": 19895.83, "total_tokens": 77813888}
|
|
{"current_steps": 24730, "total_steps": 78105, "loss": 0.2514, "lr": 4.31878903769305e-06, "epoch": 1.5831252800716984, "percentage": 31.66, "elapsed_time": "1:05:11", "remaining_time": "2:20:42", "throughput": 19896.37, "total_tokens": 77829632}
|
|
{"current_steps": 24735, "total_steps": 78105, "loss": 0.5077, "lr": 4.318405705868508e-06, "epoch": 1.5834453620126752, "percentage": 31.67, "elapsed_time": "1:05:12", "remaining_time": "2:20:41", "throughput": 19896.98, "total_tokens": 77845504}
|
|
{"current_steps": 24740, "total_steps": 78105, "loss": 0.285, "lr": 4.318022283242257e-06, "epoch": 1.5837654439536522, "percentage": 31.68, "elapsed_time": "1:05:13", "remaining_time": "2:20:40", "throughput": 19897.49, "total_tokens": 77860608}
|
|
{"current_steps": 24745, "total_steps": 78105, "loss": 0.3475, "lr": 4.3176387698334435e-06, "epoch": 1.5840855258946291, "percentage": 31.68, "elapsed_time": "1:05:13", "remaining_time": "2:20:39", "throughput": 19898.14, "total_tokens": 77876736}
|
|
{"current_steps": 24750, "total_steps": 78105, "loss": 0.3145, "lr": 4.317255165661217e-06, "epoch": 1.584405607835606, "percentage": 31.69, "elapsed_time": "1:05:14", "remaining_time": "2:20:38", "throughput": 19898.83, "total_tokens": 77894016}
|
|
{"current_steps": 24755, "total_steps": 78105, "loss": 0.2956, "lr": 4.3168714707447336e-06, "epoch": 1.5847256897765827, "percentage": 31.69, "elapsed_time": "1:05:15", "remaining_time": "2:20:37", "throughput": 19899.4, "total_tokens": 77909504}
|
|
{"current_steps": 24760, "total_steps": 78105, "loss": 0.3287, "lr": 4.316487685103153e-06, "epoch": 1.5850457717175597, "percentage": 31.7, "elapsed_time": "1:05:15", "remaining_time": "2:20:36", "throughput": 19900.02, "total_tokens": 77925568}
|
|
{"current_steps": 24765, "total_steps": 78105, "loss": 0.4362, "lr": 4.316103808755638e-06, "epoch": 1.5853658536585367, "percentage": 31.71, "elapsed_time": "1:05:16", "remaining_time": "2:20:35", "throughput": 19900.64, "total_tokens": 77941632}
|
|
{"current_steps": 24770, "total_steps": 78105, "loss": 0.2431, "lr": 4.31571984172136e-06, "epoch": 1.5856859355995134, "percentage": 31.71, "elapsed_time": "1:05:17", "remaining_time": "2:20:34", "throughput": 19901.28, "total_tokens": 77958208}
|
|
{"current_steps": 24775, "total_steps": 78105, "loss": 0.45, "lr": 4.31533578401949e-06, "epoch": 1.5860060175404904, "percentage": 31.72, "elapsed_time": "1:05:17", "remaining_time": "2:20:33", "throughput": 19901.79, "total_tokens": 77973312}
|
|
{"current_steps": 24780, "total_steps": 78105, "loss": 0.377, "lr": 4.314951635669207e-06, "epoch": 1.5863260994814672, "percentage": 31.73, "elapsed_time": "1:05:18", "remaining_time": "2:20:32", "throughput": 19902.4, "total_tokens": 77989120}
|
|
{"current_steps": 24785, "total_steps": 78105, "loss": 0.4339, "lr": 4.314567396689692e-06, "epoch": 1.5866461814224442, "percentage": 31.73, "elapsed_time": "1:05:19", "remaining_time": "2:20:31", "throughput": 19902.97, "total_tokens": 78004672}
|
|
{"current_steps": 24790, "total_steps": 78105, "loss": 0.3368, "lr": 4.3141830671001335e-06, "epoch": 1.5869662633634212, "percentage": 31.74, "elapsed_time": "1:05:19", "remaining_time": "2:20:30", "throughput": 19903.72, "total_tokens": 78022208}
|
|
{"current_steps": 24795, "total_steps": 78105, "loss": 0.4114, "lr": 4.313798646919723e-06, "epoch": 1.587286345304398, "percentage": 31.75, "elapsed_time": "1:05:20", "remaining_time": "2:20:29", "throughput": 19904.35, "total_tokens": 78038400}
|
|
{"current_steps": 24800, "total_steps": 78105, "loss": 0.3207, "lr": 4.3134141361676555e-06, "epoch": 1.5876064272453747, "percentage": 31.75, "elapsed_time": "1:05:21", "remaining_time": "2:20:28", "throughput": 19904.85, "total_tokens": 78053440}
|
|
{"current_steps": 24805, "total_steps": 78105, "loss": 0.276, "lr": 4.3130295348631315e-06, "epoch": 1.5879265091863517, "percentage": 31.76, "elapsed_time": "1:05:22", "remaining_time": "2:20:27", "throughput": 19905.45, "total_tokens": 78069312}
|
|
{"current_steps": 24810, "total_steps": 78105, "loss": 0.3519, "lr": 4.312644843025356e-06, "epoch": 1.5882465911273287, "percentage": 31.76, "elapsed_time": "1:05:22", "remaining_time": "2:20:26", "throughput": 19906.08, "total_tokens": 78085504}
|
|
{"current_steps": 24815, "total_steps": 78105, "loss": 0.2728, "lr": 4.31226006067354e-06, "epoch": 1.5885666730683055, "percentage": 31.77, "elapsed_time": "1:05:23", "remaining_time": "2:20:25", "throughput": 19906.65, "total_tokens": 78101312}
|
|
{"current_steps": 24820, "total_steps": 78105, "loss": 0.2978, "lr": 4.3118751878268965e-06, "epoch": 1.5888867550092822, "percentage": 31.78, "elapsed_time": "1:05:24", "remaining_time": "2:20:24", "throughput": 19907.32, "total_tokens": 78118144}
|
|
{"current_steps": 24825, "total_steps": 78105, "loss": 0.5495, "lr": 4.311490224504642e-06, "epoch": 1.5892068369502592, "percentage": 31.78, "elapsed_time": "1:05:24", "remaining_time": "2:20:23", "throughput": 19907.79, "total_tokens": 78133312}
|
|
{"current_steps": 24830, "total_steps": 78105, "loss": 0.315, "lr": 4.311105170726002e-06, "epoch": 1.5895269188912362, "percentage": 31.79, "elapsed_time": "1:05:25", "remaining_time": "2:20:22", "throughput": 19908.36, "total_tokens": 78149120}
|
|
{"current_steps": 24835, "total_steps": 78105, "loss": 0.3602, "lr": 4.310720026510204e-06, "epoch": 1.5898470008322132, "percentage": 31.8, "elapsed_time": "1:05:26", "remaining_time": "2:20:21", "throughput": 19908.9, "total_tokens": 78164480}
|
|
{"current_steps": 24840, "total_steps": 78105, "loss": 0.3107, "lr": 4.310334791876479e-06, "epoch": 1.59016708277319, "percentage": 31.8, "elapsed_time": "1:05:26", "remaining_time": "2:20:20", "throughput": 19909.35, "total_tokens": 78179200}
|
|
{"current_steps": 24845, "total_steps": 78105, "loss": 0.3701, "lr": 4.309949466844065e-06, "epoch": 1.5904871647141667, "percentage": 31.81, "elapsed_time": "1:05:27", "remaining_time": "2:20:19", "throughput": 19909.85, "total_tokens": 78193920}
|
|
{"current_steps": 24850, "total_steps": 78105, "loss": 0.3104, "lr": 4.3095640514322015e-06, "epoch": 1.5908072466551437, "percentage": 31.82, "elapsed_time": "1:05:28", "remaining_time": "2:20:18", "throughput": 19910.48, "total_tokens": 78210112}
|
|
{"current_steps": 24855, "total_steps": 78105, "loss": 0.3044, "lr": 4.309178545660136e-06, "epoch": 1.5911273285961207, "percentage": 31.82, "elapsed_time": "1:05:28", "remaining_time": "2:20:17", "throughput": 19911.01, "total_tokens": 78225664}
|
|
{"current_steps": 24860, "total_steps": 78105, "loss": 0.3001, "lr": 4.308792949547116e-06, "epoch": 1.5914474105370975, "percentage": 31.83, "elapsed_time": "1:05:29", "remaining_time": "2:20:16", "throughput": 19911.61, "total_tokens": 78241728}
|
|
{"current_steps": 24865, "total_steps": 78105, "loss": 0.21, "lr": 4.308407263112399e-06, "epoch": 1.5917674924780743, "percentage": 31.84, "elapsed_time": "1:05:30", "remaining_time": "2:20:15", "throughput": 19912.2, "total_tokens": 78258048}
|
|
{"current_steps": 24870, "total_steps": 78105, "loss": 0.3303, "lr": 4.308021486375243e-06, "epoch": 1.5920875744190512, "percentage": 31.84, "elapsed_time": "1:05:30", "remaining_time": "2:20:14", "throughput": 19912.69, "total_tokens": 78273088}
|
|
{"current_steps": 24875, "total_steps": 78105, "loss": 0.3772, "lr": 4.307635619354911e-06, "epoch": 1.5924076563600282, "percentage": 31.85, "elapsed_time": "1:05:31", "remaining_time": "2:20:13", "throughput": 19913.36, "total_tokens": 78289472}
|
|
{"current_steps": 24880, "total_steps": 78105, "loss": 0.4343, "lr": 4.307249662070671e-06, "epoch": 1.592727738301005, "percentage": 31.85, "elapsed_time": "1:05:32", "remaining_time": "2:20:11", "throughput": 19913.76, "total_tokens": 78303552}
|
|
{"current_steps": 24885, "total_steps": 78105, "loss": 0.255, "lr": 4.3068636145417984e-06, "epoch": 1.593047820241982, "percentage": 31.86, "elapsed_time": "1:05:32", "remaining_time": "2:20:10", "throughput": 19914.36, "total_tokens": 78319488}
|
|
{"current_steps": 24890, "total_steps": 78105, "loss": 0.5656, "lr": 4.306477476787567e-06, "epoch": 1.5933679021829588, "percentage": 31.87, "elapsed_time": "1:05:33", "remaining_time": "2:20:09", "throughput": 19914.91, "total_tokens": 78335296}
|
|
{"current_steps": 24895, "total_steps": 78105, "loss": 0.331, "lr": 4.30609124882726e-06, "epoch": 1.5936879841239358, "percentage": 31.87, "elapsed_time": "1:05:34", "remaining_time": "2:20:08", "throughput": 19915.36, "total_tokens": 78349888}
|
|
{"current_steps": 24900, "total_steps": 78105, "loss": 0.4926, "lr": 4.305704930680165e-06, "epoch": 1.5940080660649127, "percentage": 31.88, "elapsed_time": "1:05:34", "remaining_time": "2:20:07", "throughput": 19915.92, "total_tokens": 78365120}
|
|
{"current_steps": 24905, "total_steps": 78105, "loss": 0.3521, "lr": 4.30531852236557e-06, "epoch": 1.5943281480058895, "percentage": 31.89, "elapsed_time": "1:05:35", "remaining_time": "2:20:06", "throughput": 19916.35, "total_tokens": 78379456}
|
|
{"current_steps": 24910, "total_steps": 78105, "loss": 0.3758, "lr": 4.304932023902773e-06, "epoch": 1.5946482299468663, "percentage": 31.89, "elapsed_time": "1:05:36", "remaining_time": "2:20:05", "throughput": 19917.0, "total_tokens": 78395520}
|
|
{"current_steps": 24915, "total_steps": 78105, "loss": 0.3558, "lr": 4.30454543531107e-06, "epoch": 1.5949683118878433, "percentage": 31.9, "elapsed_time": "1:05:36", "remaining_time": "2:20:04", "throughput": 19917.64, "total_tokens": 78412224}
|
|
{"current_steps": 24920, "total_steps": 78105, "loss": 0.4059, "lr": 4.30415875660977e-06, "epoch": 1.5952883938288203, "percentage": 31.91, "elapsed_time": "1:05:37", "remaining_time": "2:20:03", "throughput": 19918.26, "total_tokens": 78428224}
|
|
{"current_steps": 24925, "total_steps": 78105, "loss": 0.3417, "lr": 4.303771987818177e-06, "epoch": 1.595608475769797, "percentage": 31.91, "elapsed_time": "1:05:38", "remaining_time": "2:20:02", "throughput": 19919.04, "total_tokens": 78445632}
|
|
{"current_steps": 24930, "total_steps": 78105, "loss": 0.3598, "lr": 4.303385128955609e-06, "epoch": 1.5959285577107738, "percentage": 31.92, "elapsed_time": "1:05:39", "remaining_time": "2:20:01", "throughput": 19919.27, "total_tokens": 78462464}
|
|
{"current_steps": 24935, "total_steps": 78105, "loss": 0.262, "lr": 4.302998180041379e-06, "epoch": 1.5962486396517508, "percentage": 31.92, "elapsed_time": "1:05:39", "remaining_time": "2:20:00", "throughput": 19919.5, "total_tokens": 78476864}
|
|
{"current_steps": 24940, "total_steps": 78105, "loss": 0.3377, "lr": 4.3026111410948115e-06, "epoch": 1.5965687215927278, "percentage": 31.93, "elapsed_time": "1:05:40", "remaining_time": "2:19:59", "throughput": 19920.09, "total_tokens": 78492800}
|
|
{"current_steps": 24945, "total_steps": 78105, "loss": 0.3009, "lr": 4.302224012135233e-06, "epoch": 1.5968888035337048, "percentage": 31.94, "elapsed_time": "1:05:41", "remaining_time": "2:19:58", "throughput": 19920.63, "total_tokens": 78508288}
|
|
{"current_steps": 24950, "total_steps": 78105, "loss": 0.4052, "lr": 4.301836793181976e-06, "epoch": 1.5972088854746815, "percentage": 31.94, "elapsed_time": "1:05:41", "remaining_time": "2:19:57", "throughput": 19921.28, "total_tokens": 78524544}
|
|
{"current_steps": 24955, "total_steps": 78105, "loss": 0.265, "lr": 4.3014494842543745e-06, "epoch": 1.5975289674156583, "percentage": 31.95, "elapsed_time": "1:05:42", "remaining_time": "2:19:56", "throughput": 19921.88, "total_tokens": 78540416}
|
|
{"current_steps": 24960, "total_steps": 78105, "loss": 0.4615, "lr": 4.3010620853717685e-06, "epoch": 1.5978490493566353, "percentage": 31.96, "elapsed_time": "1:05:43", "remaining_time": "2:19:55", "throughput": 19922.4, "total_tokens": 78555712}
|
|
{"current_steps": 24965, "total_steps": 78105, "loss": 0.3191, "lr": 4.300674596553504e-06, "epoch": 1.5981691312976123, "percentage": 31.96, "elapsed_time": "1:05:43", "remaining_time": "2:19:54", "throughput": 19922.94, "total_tokens": 78571200}
|
|
{"current_steps": 24970, "total_steps": 78105, "loss": 0.3562, "lr": 4.300287017818929e-06, "epoch": 1.598489213238589, "percentage": 31.97, "elapsed_time": "1:05:44", "remaining_time": "2:19:53", "throughput": 19923.53, "total_tokens": 78586944}
|
|
{"current_steps": 24975, "total_steps": 78105, "loss": 0.235, "lr": 4.299899349187399e-06, "epoch": 1.5988092951795658, "percentage": 31.98, "elapsed_time": "1:05:45", "remaining_time": "2:19:52", "throughput": 19924.05, "total_tokens": 78601984}
|
|
{"current_steps": 24980, "total_steps": 78105, "loss": 0.31, "lr": 4.299511590678269e-06, "epoch": 1.5991293771205428, "percentage": 31.98, "elapsed_time": "1:05:45", "remaining_time": "2:19:51", "throughput": 19924.64, "total_tokens": 78617920}
|
|
{"current_steps": 24985, "total_steps": 78105, "loss": 0.3823, "lr": 4.299123742310904e-06, "epoch": 1.5994494590615198, "percentage": 31.99, "elapsed_time": "1:05:46", "remaining_time": "2:19:50", "throughput": 19925.3, "total_tokens": 78634496}
|
|
{"current_steps": 24990, "total_steps": 78105, "loss": 0.2959, "lr": 4.298735804104672e-06, "epoch": 1.5997695410024968, "percentage": 32.0, "elapsed_time": "1:05:47", "remaining_time": "2:19:49", "throughput": 19925.65, "total_tokens": 78648512}
|
|
{"current_steps": 24995, "total_steps": 78105, "loss": 0.3299, "lr": 4.2983477760789414e-06, "epoch": 1.6000896229434736, "percentage": 32.0, "elapsed_time": "1:05:47", "remaining_time": "2:19:48", "throughput": 19926.36, "total_tokens": 78665472}
|
|
{"current_steps": 25000, "total_steps": 78105, "loss": 0.2835, "lr": 4.297959658253091e-06, "epoch": 1.6004097048844503, "percentage": 32.01, "elapsed_time": "1:05:48", "remaining_time": "2:19:47", "throughput": 19926.85, "total_tokens": 78680512}
|
|
{"current_steps": 25005, "total_steps": 78105, "loss": 0.349, "lr": 4.2975714506465e-06, "epoch": 1.6007297868254273, "percentage": 32.01, "elapsed_time": "1:05:49", "remaining_time": "2:19:46", "throughput": 19927.24, "total_tokens": 78694784}
|
|
{"current_steps": 25010, "total_steps": 78105, "loss": 0.3617, "lr": 4.297183153278555e-06, "epoch": 1.6010498687664043, "percentage": 32.02, "elapsed_time": "1:05:49", "remaining_time": "2:19:45", "throughput": 19927.79, "total_tokens": 78710464}
|
|
{"current_steps": 25015, "total_steps": 78105, "loss": 0.2122, "lr": 4.296794766168643e-06, "epoch": 1.601369950707381, "percentage": 32.03, "elapsed_time": "1:05:50", "remaining_time": "2:19:44", "throughput": 19928.36, "total_tokens": 78726336}
|
|
{"current_steps": 25020, "total_steps": 78105, "loss": 0.3485, "lr": 4.2964062893361614e-06, "epoch": 1.6016900326483579, "percentage": 32.03, "elapsed_time": "1:05:51", "remaining_time": "2:19:43", "throughput": 19928.84, "total_tokens": 78741440}
|
|
{"current_steps": 25025, "total_steps": 78105, "loss": 0.2863, "lr": 4.296017722800505e-06, "epoch": 1.6020101145893348, "percentage": 32.04, "elapsed_time": "1:05:51", "remaining_time": "2:19:42", "throughput": 19929.35, "total_tokens": 78756480}
|
|
{"current_steps": 25030, "total_steps": 78105, "loss": 0.2454, "lr": 4.29562906658108e-06, "epoch": 1.6023301965303118, "percentage": 32.05, "elapsed_time": "1:05:52", "remaining_time": "2:19:40", "throughput": 19929.86, "total_tokens": 78771520}
|
|
{"current_steps": 25035, "total_steps": 78105, "loss": 0.41, "lr": 4.295240320697292e-06, "epoch": 1.6026502784712886, "percentage": 32.05, "elapsed_time": "1:05:53", "remaining_time": "2:19:39", "throughput": 19930.41, "total_tokens": 78787136}
|
|
{"current_steps": 25040, "total_steps": 78105, "loss": 0.2647, "lr": 4.294851485168553e-06, "epoch": 1.6029703604122656, "percentage": 32.06, "elapsed_time": "1:05:53", "remaining_time": "2:19:38", "throughput": 19930.93, "total_tokens": 78802688}
|
|
{"current_steps": 25045, "total_steps": 78105, "loss": 0.282, "lr": 4.294462560014281e-06, "epoch": 1.6032904423532424, "percentage": 32.07, "elapsed_time": "1:05:54", "remaining_time": "2:19:37", "throughput": 19931.55, "total_tokens": 78819072}
|
|
{"current_steps": 25050, "total_steps": 78105, "loss": 0.1755, "lr": 4.294073545253895e-06, "epoch": 1.6036105242942194, "percentage": 32.07, "elapsed_time": "1:05:55", "remaining_time": "2:19:36", "throughput": 19932.09, "total_tokens": 78834624}
|
|
{"current_steps": 25055, "total_steps": 78105, "loss": 0.3974, "lr": 4.293684440906821e-06, "epoch": 1.6039306062351963, "percentage": 32.08, "elapsed_time": "1:05:55", "remaining_time": "2:19:35", "throughput": 19932.7, "total_tokens": 78850688}
|
|
{"current_steps": 25060, "total_steps": 78105, "loss": 0.3474, "lr": 4.29329524699249e-06, "epoch": 1.6042506881761731, "percentage": 32.09, "elapsed_time": "1:05:56", "remaining_time": "2:19:34", "throughput": 19933.25, "total_tokens": 78866816}
|
|
{"current_steps": 25065, "total_steps": 78105, "loss": 0.2823, "lr": 4.292905963530334e-06, "epoch": 1.6045707701171499, "percentage": 32.09, "elapsed_time": "1:05:57", "remaining_time": "2:19:33", "throughput": 19933.66, "total_tokens": 78881472}
|
|
{"current_steps": 25070, "total_steps": 78105, "loss": 0.2369, "lr": 4.292516590539793e-06, "epoch": 1.6048908520581269, "percentage": 32.1, "elapsed_time": "1:05:57", "remaining_time": "2:19:32", "throughput": 19934.16, "total_tokens": 78896704}
|
|
{"current_steps": 25075, "total_steps": 78105, "loss": 0.2956, "lr": 4.292127128040311e-06, "epoch": 1.6052109339991039, "percentage": 32.1, "elapsed_time": "1:05:58", "remaining_time": "2:19:31", "throughput": 19934.67, "total_tokens": 78912000}
|
|
{"current_steps": 25080, "total_steps": 78105, "loss": 0.2587, "lr": 4.291737576051335e-06, "epoch": 1.6055310159400806, "percentage": 32.11, "elapsed_time": "1:05:59", "remaining_time": "2:19:30", "throughput": 19935.22, "total_tokens": 78927488}
|
|
{"current_steps": 25085, "total_steps": 78105, "loss": 0.2982, "lr": 4.291347934592317e-06, "epoch": 1.6058510978810574, "percentage": 32.12, "elapsed_time": "1:05:59", "remaining_time": "2:19:29", "throughput": 19935.79, "total_tokens": 78943424}
|
|
{"current_steps": 25090, "total_steps": 78105, "loss": 0.3259, "lr": 4.290958203682715e-06, "epoch": 1.6061711798220344, "percentage": 32.12, "elapsed_time": "1:06:00", "remaining_time": "2:19:28", "throughput": 19936.3, "total_tokens": 78958784}
|
|
{"current_steps": 25095, "total_steps": 78105, "loss": 0.3878, "lr": 4.290568383341987e-06, "epoch": 1.6064912617630114, "percentage": 32.13, "elapsed_time": "1:06:01", "remaining_time": "2:19:27", "throughput": 19936.83, "total_tokens": 78974272}
|
|
{"current_steps": 25100, "total_steps": 78105, "loss": 0.3547, "lr": 4.290178473589602e-06, "epoch": 1.6068113437039884, "percentage": 32.14, "elapsed_time": "1:06:01", "remaining_time": "2:19:26", "throughput": 19937.34, "total_tokens": 78989440}
|
|
{"current_steps": 25105, "total_steps": 78105, "loss": 0.335, "lr": 4.289788474445029e-06, "epoch": 1.6071314256449651, "percentage": 32.14, "elapsed_time": "1:06:02", "remaining_time": "2:19:25", "throughput": 19937.88, "total_tokens": 79005056}
|
|
{"current_steps": 25110, "total_steps": 78105, "loss": 0.3209, "lr": 4.289398385927742e-06, "epoch": 1.607451507585942, "percentage": 32.15, "elapsed_time": "1:06:03", "remaining_time": "2:19:24", "throughput": 19938.43, "total_tokens": 79020864}
|
|
{"current_steps": 25115, "total_steps": 78105, "loss": 0.3117, "lr": 4.2890082080572205e-06, "epoch": 1.607771589526919, "percentage": 32.16, "elapsed_time": "1:06:03", "remaining_time": "2:19:23", "throughput": 19939.05, "total_tokens": 79037056}
|
|
{"current_steps": 25120, "total_steps": 78105, "loss": 0.3472, "lr": 4.288617940852947e-06, "epoch": 1.608091671467896, "percentage": 32.16, "elapsed_time": "1:06:04", "remaining_time": "2:19:22", "throughput": 19939.48, "total_tokens": 79051648}
|
|
{"current_steps": 25125, "total_steps": 78105, "loss": 0.3045, "lr": 4.28822758433441e-06, "epoch": 1.6084117534088727, "percentage": 32.17, "elapsed_time": "1:06:05", "remaining_time": "2:19:21", "throughput": 19940.21, "total_tokens": 79069120}
|
|
{"current_steps": 25130, "total_steps": 78105, "loss": 0.2427, "lr": 4.287837138521103e-06, "epoch": 1.6087318353498494, "percentage": 32.17, "elapsed_time": "1:06:06", "remaining_time": "2:19:20", "throughput": 19940.83, "total_tokens": 79085440}
|
|
{"current_steps": 25135, "total_steps": 78105, "loss": 0.4127, "lr": 4.287446603432522e-06, "epoch": 1.6090519172908264, "percentage": 32.18, "elapsed_time": "1:06:06", "remaining_time": "2:19:19", "throughput": 19941.34, "total_tokens": 79100672}
|
|
{"current_steps": 25140, "total_steps": 78105, "loss": 0.4773, "lr": 4.2870559790881665e-06, "epoch": 1.6093719992318034, "percentage": 32.19, "elapsed_time": "1:06:07", "remaining_time": "2:19:18", "throughput": 19941.85, "total_tokens": 79115904}
|
|
{"current_steps": 25145, "total_steps": 78105, "loss": 0.4103, "lr": 4.286665265507544e-06, "epoch": 1.6096920811727804, "percentage": 32.19, "elapsed_time": "1:06:07", "remaining_time": "2:19:17", "throughput": 19942.31, "total_tokens": 79130816}
|
|
{"current_steps": 25150, "total_steps": 78105, "loss": 0.2558, "lr": 4.286274462710166e-06, "epoch": 1.6100121631137572, "percentage": 32.2, "elapsed_time": "1:06:08", "remaining_time": "2:19:16", "throughput": 19942.8, "total_tokens": 79145792}
|
|
{"current_steps": 25155, "total_steps": 78105, "loss": 0.2833, "lr": 4.285883570715545e-06, "epoch": 1.610332245054734, "percentage": 32.21, "elapsed_time": "1:06:09", "remaining_time": "2:19:15", "throughput": 19943.4, "total_tokens": 79161920}
|
|
{"current_steps": 25160, "total_steps": 78105, "loss": 0.3262, "lr": 4.2854925895432005e-06, "epoch": 1.610652326995711, "percentage": 32.21, "elapsed_time": "1:06:10", "remaining_time": "2:19:14", "throughput": 19943.95, "total_tokens": 79177536}
|
|
{"current_steps": 25165, "total_steps": 78105, "loss": 0.4285, "lr": 4.285101519212657e-06, "epoch": 1.610972408936688, "percentage": 32.22, "elapsed_time": "1:06:10", "remaining_time": "2:19:13", "throughput": 19944.49, "total_tokens": 79193088}
|
|
{"current_steps": 25170, "total_steps": 78105, "loss": 0.3316, "lr": 4.2847103597434415e-06, "epoch": 1.6112924908776647, "percentage": 32.23, "elapsed_time": "1:06:11", "remaining_time": "2:19:12", "throughput": 19945.13, "total_tokens": 79209536}
|
|
{"current_steps": 25175, "total_steps": 78105, "loss": 0.2112, "lr": 4.284319111155086e-06, "epoch": 1.6116125728186415, "percentage": 32.23, "elapsed_time": "1:06:12", "remaining_time": "2:19:11", "throughput": 19945.69, "total_tokens": 79225344}
|
|
{"current_steps": 25180, "total_steps": 78105, "loss": 0.4177, "lr": 4.28392777346713e-06, "epoch": 1.6119326547596184, "percentage": 32.24, "elapsed_time": "1:06:12", "remaining_time": "2:19:10", "throughput": 19946.36, "total_tokens": 79242304}
|
|
{"current_steps": 25185, "total_steps": 78105, "loss": 0.2604, "lr": 4.283536346699112e-06, "epoch": 1.6122527367005954, "percentage": 32.25, "elapsed_time": "1:06:13", "remaining_time": "2:19:09", "throughput": 19946.78, "total_tokens": 79256832}
|
|
{"current_steps": 25190, "total_steps": 78105, "loss": 0.4069, "lr": 4.2831448308705795e-06, "epoch": 1.6125728186415722, "percentage": 32.25, "elapsed_time": "1:06:14", "remaining_time": "2:19:08", "throughput": 19947.34, "total_tokens": 79272448}
|
|
{"current_steps": 25195, "total_steps": 78105, "loss": 0.2348, "lr": 4.282753226001082e-06, "epoch": 1.612892900582549, "percentage": 32.26, "elapsed_time": "1:06:14", "remaining_time": "2:19:07", "throughput": 19947.94, "total_tokens": 79288832}
|
|
{"current_steps": 25200, "total_steps": 78105, "loss": 0.4387, "lr": 4.282361532110174e-06, "epoch": 1.613212982523526, "percentage": 32.26, "elapsed_time": "1:06:15", "remaining_time": "2:19:06", "throughput": 19948.58, "total_tokens": 79305408}
|
|
{"current_steps": 25205, "total_steps": 78105, "loss": 0.3998, "lr": 4.281969749217415e-06, "epoch": 1.613533064464503, "percentage": 32.27, "elapsed_time": "1:06:16", "remaining_time": "2:19:05", "throughput": 19949.12, "total_tokens": 79320960}
|
|
{"current_steps": 25210, "total_steps": 78105, "loss": 0.3832, "lr": 4.281577877342369e-06, "epoch": 1.61385314640548, "percentage": 32.28, "elapsed_time": "1:06:16", "remaining_time": "2:19:04", "throughput": 19949.6, "total_tokens": 79336064}
|
|
{"current_steps": 25215, "total_steps": 78105, "loss": 0.3635, "lr": 4.281185916504604e-06, "epoch": 1.6141732283464567, "percentage": 32.28, "elapsed_time": "1:06:17", "remaining_time": "2:19:03", "throughput": 19950.16, "total_tokens": 79351808}
|
|
{"current_steps": 25220, "total_steps": 78105, "loss": 0.3179, "lr": 4.2807938667236915e-06, "epoch": 1.6144933102874335, "percentage": 32.29, "elapsed_time": "1:06:18", "remaining_time": "2:19:01", "throughput": 19950.56, "total_tokens": 79366144}
|
|
{"current_steps": 25225, "total_steps": 78105, "loss": 0.3024, "lr": 4.28040172801921e-06, "epoch": 1.6148133922284105, "percentage": 32.3, "elapsed_time": "1:06:18", "remaining_time": "2:19:00", "throughput": 19951.08, "total_tokens": 79381632}
|
|
{"current_steps": 25230, "total_steps": 78105, "loss": 0.2843, "lr": 4.28000950041074e-06, "epoch": 1.6151334741693875, "percentage": 32.3, "elapsed_time": "1:06:19", "remaining_time": "2:18:59", "throughput": 19951.57, "total_tokens": 79397248}
|
|
{"current_steps": 25235, "total_steps": 78105, "loss": 0.2988, "lr": 4.279617183917866e-06, "epoch": 1.6154535561103642, "percentage": 32.31, "elapsed_time": "1:06:20", "remaining_time": "2:18:58", "throughput": 19952.08, "total_tokens": 79412992}
|
|
{"current_steps": 25240, "total_steps": 78105, "loss": 0.3663, "lr": 4.2792247785601805e-06, "epoch": 1.615773638051341, "percentage": 32.32, "elapsed_time": "1:06:20", "remaining_time": "2:18:57", "throughput": 19952.6, "total_tokens": 79428736}
|
|
{"current_steps": 25245, "total_steps": 78105, "loss": 0.2814, "lr": 4.278832284357277e-06, "epoch": 1.616093719992318, "percentage": 32.32, "elapsed_time": "1:06:21", "remaining_time": "2:18:56", "throughput": 19953.08, "total_tokens": 79444032}
|
|
{"current_steps": 25250, "total_steps": 78105, "loss": 0.2383, "lr": 4.278439701328755e-06, "epoch": 1.616413801933295, "percentage": 32.33, "elapsed_time": "1:06:22", "remaining_time": "2:18:55", "throughput": 19953.64, "total_tokens": 79459904}
|
|
{"current_steps": 25255, "total_steps": 78105, "loss": 0.2475, "lr": 4.278047029494218e-06, "epoch": 1.616733883874272, "percentage": 32.33, "elapsed_time": "1:06:22", "remaining_time": "2:18:54", "throughput": 19954.25, "total_tokens": 79476480}
|
|
{"current_steps": 25260, "total_steps": 78105, "loss": 0.2758, "lr": 4.2776542688732734e-06, "epoch": 1.6170539658152487, "percentage": 32.34, "elapsed_time": "1:06:23", "remaining_time": "2:18:53", "throughput": 19954.83, "total_tokens": 79492864}
|
|
{"current_steps": 25265, "total_steps": 78105, "loss": 0.4661, "lr": 4.277261419485534e-06, "epoch": 1.6173740477562255, "percentage": 32.35, "elapsed_time": "1:06:24", "remaining_time": "2:18:52", "throughput": 19955.4, "total_tokens": 79509056}
|
|
{"current_steps": 25270, "total_steps": 78105, "loss": 0.1872, "lr": 4.2768684813506166e-06, "epoch": 1.6176941296972025, "percentage": 32.35, "elapsed_time": "1:06:25", "remaining_time": "2:18:52", "throughput": 19956.04, "total_tokens": 79526016}
|
|
{"current_steps": 25275, "total_steps": 78105, "loss": 0.2831, "lr": 4.276475454488143e-06, "epoch": 1.6180142116381795, "percentage": 32.36, "elapsed_time": "1:06:25", "remaining_time": "2:18:50", "throughput": 19956.53, "total_tokens": 79541248}
|
|
{"current_steps": 25280, "total_steps": 78105, "loss": 0.3685, "lr": 4.276082338917739e-06, "epoch": 1.6183342935791563, "percentage": 32.37, "elapsed_time": "1:06:26", "remaining_time": "2:18:49", "throughput": 19957.07, "total_tokens": 79556736}
|
|
{"current_steps": 25285, "total_steps": 78105, "loss": 0.3198, "lr": 4.275689134659033e-06, "epoch": 1.618654375520133, "percentage": 32.37, "elapsed_time": "1:06:27", "remaining_time": "2:18:48", "throughput": 19957.56, "total_tokens": 79571648}
|
|
{"current_steps": 25290, "total_steps": 78105, "loss": 0.2793, "lr": 4.275295841731663e-06, "epoch": 1.61897445746111, "percentage": 32.38, "elapsed_time": "1:06:27", "remaining_time": "2:18:47", "throughput": 19957.98, "total_tokens": 79586304}
|
|
{"current_steps": 25295, "total_steps": 78105, "loss": 0.2783, "lr": 4.274902460155263e-06, "epoch": 1.619294539402087, "percentage": 32.39, "elapsed_time": "1:06:28", "remaining_time": "2:18:46", "throughput": 19958.45, "total_tokens": 79601408}
|
|
{"current_steps": 25300, "total_steps": 78105, "loss": 0.3915, "lr": 4.274508989949482e-06, "epoch": 1.6196146213430638, "percentage": 32.39, "elapsed_time": "1:06:29", "remaining_time": "2:18:45", "throughput": 19958.95, "total_tokens": 79616768}
|
|
{"current_steps": 25305, "total_steps": 78105, "loss": 0.3715, "lr": 4.274115431133963e-06, "epoch": 1.6199347032840408, "percentage": 32.4, "elapsed_time": "1:06:29", "remaining_time": "2:18:44", "throughput": 19959.45, "total_tokens": 79632128}
|
|
{"current_steps": 25310, "total_steps": 78105, "loss": 0.2483, "lr": 4.273721783728362e-06, "epoch": 1.6202547852250175, "percentage": 32.41, "elapsed_time": "1:06:30", "remaining_time": "2:18:43", "throughput": 19959.87, "total_tokens": 79646592}
|
|
{"current_steps": 25315, "total_steps": 78105, "loss": 0.3975, "lr": 4.273328047752333e-06, "epoch": 1.6205748671659945, "percentage": 32.41, "elapsed_time": "1:06:31", "remaining_time": "2:18:42", "throughput": 19960.34, "total_tokens": 79662080}
|
|
{"current_steps": 25320, "total_steps": 78105, "loss": 0.2074, "lr": 4.272934223225539e-06, "epoch": 1.6208949491069715, "percentage": 32.42, "elapsed_time": "1:06:31", "remaining_time": "2:18:41", "throughput": 19960.78, "total_tokens": 79676992}
|
|
{"current_steps": 25325, "total_steps": 78105, "loss": 0.2971, "lr": 4.272540310167644e-06, "epoch": 1.6212150310479483, "percentage": 32.42, "elapsed_time": "1:06:32", "remaining_time": "2:18:40", "throughput": 19961.26, "total_tokens": 79692224}
|
|
{"current_steps": 25330, "total_steps": 78105, "loss": 0.3212, "lr": 4.272146308598319e-06, "epoch": 1.621535112988925, "percentage": 32.43, "elapsed_time": "1:06:33", "remaining_time": "2:18:39", "throughput": 19961.83, "total_tokens": 79708672}
|
|
{"current_steps": 25335, "total_steps": 78105, "loss": 0.3313, "lr": 4.271752218537239e-06, "epoch": 1.621855194929902, "percentage": 32.44, "elapsed_time": "1:06:33", "remaining_time": "2:18:38", "throughput": 19962.36, "total_tokens": 79724544}
|
|
{"current_steps": 25340, "total_steps": 78105, "loss": 0.1934, "lr": 4.27135804000408e-06, "epoch": 1.622175276870879, "percentage": 32.44, "elapsed_time": "1:06:34", "remaining_time": "2:18:37", "throughput": 19962.82, "total_tokens": 79739456}
|
|
{"current_steps": 25345, "total_steps": 78105, "loss": 0.297, "lr": 4.270963773018528e-06, "epoch": 1.6224953588118558, "percentage": 32.45, "elapsed_time": "1:06:35", "remaining_time": "2:18:36", "throughput": 19963.4, "total_tokens": 79755648}
|
|
{"current_steps": 25350, "total_steps": 78105, "loss": 0.4037, "lr": 4.270569417600271e-06, "epoch": 1.6228154407528326, "percentage": 32.46, "elapsed_time": "1:06:35", "remaining_time": "2:18:35", "throughput": 19963.94, "total_tokens": 79771456}
|
|
{"current_steps": 25355, "total_steps": 78105, "loss": 0.3551, "lr": 4.270174973768998e-06, "epoch": 1.6231355226938096, "percentage": 32.46, "elapsed_time": "1:06:36", "remaining_time": "2:18:34", "throughput": 19964.45, "total_tokens": 79786624}
|
|
{"current_steps": 25360, "total_steps": 78105, "loss": 0.2503, "lr": 4.269780441544409e-06, "epoch": 1.6234556046347866, "percentage": 32.47, "elapsed_time": "1:06:37", "remaining_time": "2:18:33", "throughput": 19964.87, "total_tokens": 79801536}
|
|
{"current_steps": 25365, "total_steps": 78105, "loss": 0.4006, "lr": 4.269385820946203e-06, "epoch": 1.6237756865757635, "percentage": 32.48, "elapsed_time": "1:06:37", "remaining_time": "2:18:32", "throughput": 19965.55, "total_tokens": 79818368}
|
|
{"current_steps": 25370, "total_steps": 78105, "loss": 0.2708, "lr": 4.268991111994084e-06, "epoch": 1.6240957685167403, "percentage": 32.48, "elapsed_time": "1:06:38", "remaining_time": "2:18:31", "throughput": 19966.14, "total_tokens": 79834240}
|
|
{"current_steps": 25375, "total_steps": 78105, "loss": 0.369, "lr": 4.268596314707764e-06, "epoch": 1.624415850457717, "percentage": 32.49, "elapsed_time": "1:06:39", "remaining_time": "2:18:30", "throughput": 19966.67, "total_tokens": 79849792}
|
|
{"current_steps": 25380, "total_steps": 78105, "loss": 0.4254, "lr": 4.268201429106957e-06, "epoch": 1.624735932398694, "percentage": 32.49, "elapsed_time": "1:06:39", "remaining_time": "2:18:29", "throughput": 19967.21, "total_tokens": 79865472}
|
|
{"current_steps": 25385, "total_steps": 78105, "loss": 0.3663, "lr": 4.26780645521138e-06, "epoch": 1.625056014339671, "percentage": 32.5, "elapsed_time": "1:06:40", "remaining_time": "2:18:28", "throughput": 19967.79, "total_tokens": 79881792}
|
|
{"current_steps": 25390, "total_steps": 78105, "loss": 0.3271, "lr": 4.267411393040757e-06, "epoch": 1.6253760962806478, "percentage": 32.51, "elapsed_time": "1:06:41", "remaining_time": "2:18:27", "throughput": 19968.41, "total_tokens": 79897920}
|
|
{"current_steps": 25395, "total_steps": 78105, "loss": 0.3354, "lr": 4.267016242614816e-06, "epoch": 1.6256961782216246, "percentage": 32.51, "elapsed_time": "1:06:41", "remaining_time": "2:18:26", "throughput": 19968.86, "total_tokens": 79912640}
|
|
{"current_steps": 25400, "total_steps": 78105, "loss": 0.2257, "lr": 4.2666210039532865e-06, "epoch": 1.6260162601626016, "percentage": 32.52, "elapsed_time": "1:06:42", "remaining_time": "2:18:25", "throughput": 19969.37, "total_tokens": 79927936}
|
|
{"current_steps": 25405, "total_steps": 78105, "loss": 0.3853, "lr": 4.266225677075907e-06, "epoch": 1.6263363421035786, "percentage": 32.53, "elapsed_time": "1:06:43", "remaining_time": "2:18:24", "throughput": 19969.9, "total_tokens": 79943744}
|
|
{"current_steps": 25410, "total_steps": 78105, "loss": 0.2559, "lr": 4.265830262002416e-06, "epoch": 1.6266564240445556, "percentage": 32.53, "elapsed_time": "1:06:43", "remaining_time": "2:18:23", "throughput": 19970.35, "total_tokens": 79958720}
|
|
{"current_steps": 25415, "total_steps": 78105, "loss": 0.3398, "lr": 4.265434758752561e-06, "epoch": 1.6269765059855323, "percentage": 32.54, "elapsed_time": "1:06:44", "remaining_time": "2:18:22", "throughput": 19970.95, "total_tokens": 79974912}
|
|
{"current_steps": 25420, "total_steps": 78105, "loss": 0.3683, "lr": 4.265039167346089e-06, "epoch": 1.627296587926509, "percentage": 32.55, "elapsed_time": "1:06:45", "remaining_time": "2:18:21", "throughput": 19971.5, "total_tokens": 79990976}
|
|
{"current_steps": 25425, "total_steps": 78105, "loss": 0.3442, "lr": 4.264643487802756e-06, "epoch": 1.627616669867486, "percentage": 32.55, "elapsed_time": "1:06:45", "remaining_time": "2:18:20", "throughput": 19972.07, "total_tokens": 80007040}
|
|
{"current_steps": 25430, "total_steps": 78105, "loss": 0.3008, "lr": 4.264247720142317e-06, "epoch": 1.627936751808463, "percentage": 32.56, "elapsed_time": "1:06:46", "remaining_time": "2:18:19", "throughput": 19972.54, "total_tokens": 80022144}
|
|
{"current_steps": 25435, "total_steps": 78105, "loss": 0.3936, "lr": 4.2638518643845375e-06, "epoch": 1.6282568337494399, "percentage": 32.57, "elapsed_time": "1:06:47", "remaining_time": "2:18:18", "throughput": 19973.12, "total_tokens": 80038272}
|
|
{"current_steps": 25440, "total_steps": 78105, "loss": 0.3108, "lr": 4.263455920549184e-06, "epoch": 1.6285769156904166, "percentage": 32.57, "elapsed_time": "1:06:47", "remaining_time": "2:18:17", "throughput": 19973.73, "total_tokens": 80054592}
|
|
{"current_steps": 25445, "total_steps": 78105, "loss": 0.3196, "lr": 4.263059888656026e-06, "epoch": 1.6288969976313936, "percentage": 32.58, "elapsed_time": "1:06:48", "remaining_time": "2:18:16", "throughput": 19974.23, "total_tokens": 80069824}
|
|
{"current_steps": 25450, "total_steps": 78105, "loss": 0.5387, "lr": 4.262663768724841e-06, "epoch": 1.6292170795723706, "percentage": 32.58, "elapsed_time": "1:06:49", "remaining_time": "2:18:15", "throughput": 19974.77, "total_tokens": 80085568}
|
|
{"current_steps": 25455, "total_steps": 78105, "loss": 0.3174, "lr": 4.26226756077541e-06, "epoch": 1.6295371615133474, "percentage": 32.59, "elapsed_time": "1:06:49", "remaining_time": "2:18:14", "throughput": 19975.21, "total_tokens": 80100096}
|
|
{"current_steps": 25460, "total_steps": 78105, "loss": 0.3078, "lr": 4.261871264827515e-06, "epoch": 1.6298572434543241, "percentage": 32.6, "elapsed_time": "1:06:50", "remaining_time": "2:18:13", "throughput": 19975.74, "total_tokens": 80115712}
|
|
{"current_steps": 25465, "total_steps": 78105, "loss": 0.436, "lr": 4.2614748809009476e-06, "epoch": 1.6301773253953011, "percentage": 32.6, "elapsed_time": "1:06:51", "remaining_time": "2:18:11", "throughput": 19976.31, "total_tokens": 80131264}
|
|
{"current_steps": 25470, "total_steps": 78105, "loss": 0.4129, "lr": 4.261078409015499e-06, "epoch": 1.6304974073362781, "percentage": 32.61, "elapsed_time": "1:06:51", "remaining_time": "2:18:10", "throughput": 19976.79, "total_tokens": 80146432}
|
|
{"current_steps": 25475, "total_steps": 78105, "loss": 0.3428, "lr": 4.260681849190967e-06, "epoch": 1.6308174892772551, "percentage": 32.62, "elapsed_time": "1:06:52", "remaining_time": "2:18:09", "throughput": 19977.34, "total_tokens": 80162112}
|
|
{"current_steps": 25480, "total_steps": 78105, "loss": 0.4165, "lr": 4.260285201447156e-06, "epoch": 1.6311375712182319, "percentage": 32.62, "elapsed_time": "1:06:53", "remaining_time": "2:18:08", "throughput": 19977.85, "total_tokens": 80177472}
|
|
{"current_steps": 25485, "total_steps": 78105, "loss": 0.3656, "lr": 4.25988846580387e-06, "epoch": 1.6314576531592087, "percentage": 32.63, "elapsed_time": "1:06:54", "remaining_time": "2:18:07", "throughput": 19978.47, "total_tokens": 80193984}
|
|
{"current_steps": 25490, "total_steps": 78105, "loss": 0.2376, "lr": 4.2594916422809214e-06, "epoch": 1.6317777351001856, "percentage": 32.64, "elapsed_time": "1:06:54", "remaining_time": "2:18:06", "throughput": 19978.99, "total_tokens": 80209344}
|
|
{"current_steps": 25495, "total_steps": 78105, "loss": 0.2847, "lr": 4.259094730898125e-06, "epoch": 1.6320978170411626, "percentage": 32.64, "elapsed_time": "1:06:55", "remaining_time": "2:18:05", "throughput": 19979.46, "total_tokens": 80224448}
|
|
{"current_steps": 25500, "total_steps": 78105, "loss": 0.4157, "lr": 4.2586977316753e-06, "epoch": 1.6324178989821394, "percentage": 32.65, "elapsed_time": "1:06:56", "remaining_time": "2:18:04", "throughput": 19980.0, "total_tokens": 80240064}
|
|
{"current_steps": 25505, "total_steps": 78105, "loss": 0.3217, "lr": 4.258300644632272e-06, "epoch": 1.6327379809231162, "percentage": 32.65, "elapsed_time": "1:06:56", "remaining_time": "2:18:03", "throughput": 19980.49, "total_tokens": 80255104}
|
|
{"current_steps": 25510, "total_steps": 78105, "loss": 0.2862, "lr": 4.257903469788867e-06, "epoch": 1.6330580628640932, "percentage": 32.66, "elapsed_time": "1:06:57", "remaining_time": "2:18:02", "throughput": 19981.05, "total_tokens": 80270912}
|
|
{"current_steps": 25515, "total_steps": 78105, "loss": 0.2993, "lr": 4.257506207164921e-06, "epoch": 1.6333781448050702, "percentage": 32.67, "elapsed_time": "1:06:58", "remaining_time": "2:18:01", "throughput": 19981.51, "total_tokens": 80285760}
|
|
{"current_steps": 25520, "total_steps": 78105, "loss": 0.4181, "lr": 4.257108856780268e-06, "epoch": 1.6336982267460471, "percentage": 32.67, "elapsed_time": "1:06:58", "remaining_time": "2:18:00", "throughput": 19982.07, "total_tokens": 80301376}
|
|
{"current_steps": 25525, "total_steps": 78105, "loss": 0.3956, "lr": 4.256711418654753e-06, "epoch": 1.634018308687024, "percentage": 32.68, "elapsed_time": "1:06:59", "remaining_time": "2:17:59", "throughput": 19982.6, "total_tokens": 80316992}
|
|
{"current_steps": 25530, "total_steps": 78105, "loss": 0.247, "lr": 4.256313892808219e-06, "epoch": 1.6343383906280007, "percentage": 32.69, "elapsed_time": "1:06:59", "remaining_time": "2:17:58", "throughput": 19983.07, "total_tokens": 80331904}
|
|
{"current_steps": 25535, "total_steps": 78105, "loss": 0.2341, "lr": 4.255916279260517e-06, "epoch": 1.6346584725689777, "percentage": 32.69, "elapsed_time": "1:07:00", "remaining_time": "2:17:57", "throughput": 19983.55, "total_tokens": 80346944}
|
|
{"current_steps": 25540, "total_steps": 78105, "loss": 0.2443, "lr": 4.255518578031503e-06, "epoch": 1.6349785545099547, "percentage": 32.7, "elapsed_time": "1:07:01", "remaining_time": "2:17:56", "throughput": 19984.17, "total_tokens": 80363648}
|
|
{"current_steps": 25545, "total_steps": 78105, "loss": 0.303, "lr": 4.255120789141035e-06, "epoch": 1.6352986364509314, "percentage": 32.71, "elapsed_time": "1:07:02", "remaining_time": "2:17:55", "throughput": 19984.6, "total_tokens": 80378496}
|
|
{"current_steps": 25550, "total_steps": 78105, "loss": 0.3314, "lr": 4.254722912608977e-06, "epoch": 1.6356187183919082, "percentage": 32.71, "elapsed_time": "1:07:02", "remaining_time": "2:17:54", "throughput": 19985.12, "total_tokens": 80394112}
|
|
{"current_steps": 25555, "total_steps": 78105, "loss": 0.4088, "lr": 4.2543249484551976e-06, "epoch": 1.6359388003328852, "percentage": 32.72, "elapsed_time": "1:07:03", "remaining_time": "2:17:53", "throughput": 19985.69, "total_tokens": 80409984}
|
|
{"current_steps": 25560, "total_steps": 78105, "loss": 0.3809, "lr": 4.253926896699566e-06, "epoch": 1.6362588822738622, "percentage": 32.73, "elapsed_time": "1:07:04", "remaining_time": "2:17:52", "throughput": 19986.26, "total_tokens": 80426048}
|
|
{"current_steps": 25565, "total_steps": 78105, "loss": 0.3026, "lr": 4.2535287573619635e-06, "epoch": 1.636578964214839, "percentage": 32.73, "elapsed_time": "1:07:04", "remaining_time": "2:17:51", "throughput": 19986.81, "total_tokens": 80442112}
|
|
{"current_steps": 25570, "total_steps": 78105, "loss": 0.2828, "lr": 4.253130530462267e-06, "epoch": 1.636899046155816, "percentage": 32.74, "elapsed_time": "1:07:05", "remaining_time": "2:17:50", "throughput": 19987.29, "total_tokens": 80457216}
|
|
{"current_steps": 25575, "total_steps": 78105, "loss": 0.3377, "lr": 4.252732216020364e-06, "epoch": 1.6372191280967927, "percentage": 32.74, "elapsed_time": "1:07:06", "remaining_time": "2:17:49", "throughput": 19987.89, "total_tokens": 80473280}
|
|
{"current_steps": 25580, "total_steps": 78105, "loss": 0.4095, "lr": 4.252333814056145e-06, "epoch": 1.6375392100377697, "percentage": 32.75, "elapsed_time": "1:07:06", "remaining_time": "2:17:48", "throughput": 19988.36, "total_tokens": 80488320}
|
|
{"current_steps": 25585, "total_steps": 78105, "loss": 0.3433, "lr": 4.251935324589502e-06, "epoch": 1.6378592919787467, "percentage": 32.76, "elapsed_time": "1:07:07", "remaining_time": "2:17:47", "throughput": 19988.95, "total_tokens": 80504640}
|
|
{"current_steps": 25590, "total_steps": 78105, "loss": 0.4683, "lr": 4.2515367476403335e-06, "epoch": 1.6381793739197235, "percentage": 32.76, "elapsed_time": "1:07:08", "remaining_time": "2:17:46", "throughput": 19989.37, "total_tokens": 80519232}
|
|
{"current_steps": 25595, "total_steps": 78105, "loss": 0.2173, "lr": 4.251138083228544e-06, "epoch": 1.6384994558607002, "percentage": 32.77, "elapsed_time": "1:07:08", "remaining_time": "2:17:45", "throughput": 19989.87, "total_tokens": 80534720}
|
|
{"current_steps": 25600, "total_steps": 78105, "loss": 0.4885, "lr": 4.25073933137404e-06, "epoch": 1.6388195378016772, "percentage": 32.78, "elapsed_time": "1:07:09", "remaining_time": "2:17:44", "throughput": 19990.49, "total_tokens": 80551040}
|
|
{"current_steps": 25605, "total_steps": 78105, "loss": 0.2751, "lr": 4.2503404920967326e-06, "epoch": 1.6391396197426542, "percentage": 32.78, "elapsed_time": "1:07:10", "remaining_time": "2:17:43", "throughput": 19991.06, "total_tokens": 80566848}
|
|
{"current_steps": 25610, "total_steps": 78105, "loss": 0.3141, "lr": 4.249941565416539e-06, "epoch": 1.639459701683631, "percentage": 32.79, "elapsed_time": "1:07:10", "remaining_time": "2:17:42", "throughput": 19991.65, "total_tokens": 80583040}
|
|
{"current_steps": 25615, "total_steps": 78105, "loss": 0.3434, "lr": 4.249542551353378e-06, "epoch": 1.6397797836246077, "percentage": 32.8, "elapsed_time": "1:07:11", "remaining_time": "2:17:41", "throughput": 19992.15, "total_tokens": 80598144}
|
|
{"current_steps": 25620, "total_steps": 78105, "loss": 0.2803, "lr": 4.249143449927176e-06, "epoch": 1.6400998655655847, "percentage": 32.8, "elapsed_time": "1:07:12", "remaining_time": "2:17:40", "throughput": 19992.7, "total_tokens": 80614080}
|
|
{"current_steps": 25625, "total_steps": 78105, "loss": 0.4398, "lr": 4.24874426115786e-06, "epoch": 1.6404199475065617, "percentage": 32.81, "elapsed_time": "1:07:12", "remaining_time": "2:17:39", "throughput": 19993.28, "total_tokens": 80630144}
|
|
{"current_steps": 25630, "total_steps": 78105, "loss": 0.2719, "lr": 4.248344985065364e-06, "epoch": 1.6407400294475387, "percentage": 32.81, "elapsed_time": "1:07:13", "remaining_time": "2:17:38", "throughput": 19993.77, "total_tokens": 80645568}
|
|
{"current_steps": 25635, "total_steps": 78105, "loss": 0.3412, "lr": 4.247945621669628e-06, "epoch": 1.6410601113885155, "percentage": 32.82, "elapsed_time": "1:07:14", "remaining_time": "2:17:37", "throughput": 19994.58, "total_tokens": 80663872}
|
|
{"current_steps": 25640, "total_steps": 78105, "loss": 0.3643, "lr": 4.2475461709905915e-06, "epoch": 1.6413801933294923, "percentage": 32.83, "elapsed_time": "1:07:14", "remaining_time": "2:17:36", "throughput": 19995.13, "total_tokens": 80679872}
|
|
{"current_steps": 25645, "total_steps": 78105, "loss": 0.3802, "lr": 4.247146633048202e-06, "epoch": 1.6417002752704692, "percentage": 32.83, "elapsed_time": "1:07:15", "remaining_time": "2:17:35", "throughput": 19995.66, "total_tokens": 80695488}
|
|
{"current_steps": 25650, "total_steps": 78105, "loss": 0.2717, "lr": 4.24674700786241e-06, "epoch": 1.6420203572114462, "percentage": 32.84, "elapsed_time": "1:07:16", "remaining_time": "2:17:34", "throughput": 19996.24, "total_tokens": 80711360}
|
|
{"current_steps": 25655, "total_steps": 78105, "loss": 0.3598, "lr": 4.246347295453171e-06, "epoch": 1.642340439152423, "percentage": 32.85, "elapsed_time": "1:07:17", "remaining_time": "2:17:33", "throughput": 19996.79, "total_tokens": 80727808}
|
|
{"current_steps": 25660, "total_steps": 78105, "loss": 0.2513, "lr": 4.245947495840444e-06, "epoch": 1.6426605210933998, "percentage": 32.85, "elapsed_time": "1:07:17", "remaining_time": "2:17:32", "throughput": 19997.44, "total_tokens": 80744512}
|
|
{"current_steps": 25665, "total_steps": 78105, "loss": 0.408, "lr": 4.245547609044194e-06, "epoch": 1.6429806030343768, "percentage": 32.86, "elapsed_time": "1:07:18", "remaining_time": "2:17:31", "throughput": 19998.08, "total_tokens": 80761280}
|
|
{"current_steps": 25670, "total_steps": 78105, "loss": 0.4443, "lr": 4.2451476350843885e-06, "epoch": 1.6433006849753538, "percentage": 32.87, "elapsed_time": "1:07:19", "remaining_time": "2:17:30", "throughput": 19998.61, "total_tokens": 80776320}
|
|
{"current_steps": 25675, "total_steps": 78105, "loss": 0.3911, "lr": 4.244747573981e-06, "epoch": 1.6436207669163307, "percentage": 32.87, "elapsed_time": "1:07:19", "remaining_time": "2:17:29", "throughput": 19999.18, "total_tokens": 80792256}
|
|
{"current_steps": 25680, "total_steps": 78105, "loss": 0.3401, "lr": 4.2443474257540064e-06, "epoch": 1.6439408488573075, "percentage": 32.88, "elapsed_time": "1:07:20", "remaining_time": "2:17:28", "throughput": 19999.67, "total_tokens": 80807744}
|
|
{"current_steps": 25685, "total_steps": 78105, "loss": 0.2824, "lr": 4.243947190423387e-06, "epoch": 1.6442609307982843, "percentage": 32.89, "elapsed_time": "1:07:21", "remaining_time": "2:17:27", "throughput": 20000.21, "total_tokens": 80823360}
|
|
{"current_steps": 25690, "total_steps": 78105, "loss": 0.4377, "lr": 4.2435468680091305e-06, "epoch": 1.6445810127392613, "percentage": 32.89, "elapsed_time": "1:07:21", "remaining_time": "2:17:26", "throughput": 20000.82, "total_tokens": 80839424}
|
|
{"current_steps": 25695, "total_steps": 78105, "loss": 0.4255, "lr": 4.243146458531224e-06, "epoch": 1.6449010946802383, "percentage": 32.9, "elapsed_time": "1:07:22", "remaining_time": "2:17:25", "throughput": 20001.37, "total_tokens": 80855232}
|
|
{"current_steps": 25700, "total_steps": 78105, "loss": 0.3378, "lr": 4.2427459620096635e-06, "epoch": 1.645221176621215, "percentage": 32.9, "elapsed_time": "1:07:23", "remaining_time": "2:17:24", "throughput": 20001.88, "total_tokens": 80870592}
|
|
{"current_steps": 25705, "total_steps": 78105, "loss": 0.3124, "lr": 4.242345378464448e-06, "epoch": 1.6455412585621918, "percentage": 32.91, "elapsed_time": "1:07:23", "remaining_time": "2:17:23", "throughput": 20002.26, "total_tokens": 80884544}
|
|
{"current_steps": 25710, "total_steps": 78105, "loss": 0.4361, "lr": 4.241944707915579e-06, "epoch": 1.6458613405031688, "percentage": 32.92, "elapsed_time": "1:07:24", "remaining_time": "2:17:22", "throughput": 20002.84, "total_tokens": 80900416}
|
|
{"current_steps": 25715, "total_steps": 78105, "loss": 0.3588, "lr": 4.241543950383063e-06, "epoch": 1.6461814224441458, "percentage": 32.92, "elapsed_time": "1:07:25", "remaining_time": "2:17:21", "throughput": 20003.34, "total_tokens": 80915648}
|
|
{"current_steps": 25720, "total_steps": 78105, "loss": 0.2621, "lr": 4.241143105886916e-06, "epoch": 1.6465015043851225, "percentage": 32.93, "elapsed_time": "1:07:25", "remaining_time": "2:17:20", "throughput": 20003.84, "total_tokens": 80930944}
|
|
{"current_steps": 25725, "total_steps": 78105, "loss": 0.4127, "lr": 4.240742174447151e-06, "epoch": 1.6468215863260993, "percentage": 32.94, "elapsed_time": "1:07:26", "remaining_time": "2:17:19", "throughput": 20004.29, "total_tokens": 80945600}
|
|
{"current_steps": 25730, "total_steps": 78105, "loss": 0.3735, "lr": 4.240341156083789e-06, "epoch": 1.6471416682670763, "percentage": 32.94, "elapsed_time": "1:07:27", "remaining_time": "2:17:18", "throughput": 20004.88, "total_tokens": 80961856}
|
|
{"current_steps": 25735, "total_steps": 78105, "loss": 0.2615, "lr": 4.239940050816854e-06, "epoch": 1.6474617502080533, "percentage": 32.95, "elapsed_time": "1:07:27", "remaining_time": "2:17:17", "throughput": 20005.32, "total_tokens": 80976640}
|
|
{"current_steps": 25740, "total_steps": 78105, "loss": 0.3408, "lr": 4.239538858666377e-06, "epoch": 1.6477818321490303, "percentage": 32.96, "elapsed_time": "1:07:28", "remaining_time": "2:17:16", "throughput": 20005.79, "total_tokens": 80991552}
|
|
{"current_steps": 25745, "total_steps": 78105, "loss": 0.2641, "lr": 4.23913757965239e-06, "epoch": 1.648101914090007, "percentage": 32.96, "elapsed_time": "1:07:29", "remaining_time": "2:17:14", "throughput": 20006.35, "total_tokens": 81007168}
|
|
{"current_steps": 25750, "total_steps": 78105, "loss": 0.4058, "lr": 4.238736213794931e-06, "epoch": 1.6484219960309838, "percentage": 32.97, "elapsed_time": "1:07:29", "remaining_time": "2:17:13", "throughput": 20006.85, "total_tokens": 81022400}
|
|
{"current_steps": 25755, "total_steps": 78105, "loss": 0.2596, "lr": 4.238334761114042e-06, "epoch": 1.6487420779719608, "percentage": 32.97, "elapsed_time": "1:07:30", "remaining_time": "2:17:12", "throughput": 20007.37, "total_tokens": 81037888}
|
|
{"current_steps": 25760, "total_steps": 78105, "loss": 0.347, "lr": 4.23793322162977e-06, "epoch": 1.6490621599129378, "percentage": 32.98, "elapsed_time": "1:07:31", "remaining_time": "2:17:11", "throughput": 20007.76, "total_tokens": 81052352}
|
|
{"current_steps": 25765, "total_steps": 78105, "loss": 0.4247, "lr": 4.237531595362165e-06, "epoch": 1.6493822418539146, "percentage": 32.99, "elapsed_time": "1:07:31", "remaining_time": "2:17:10", "throughput": 20008.32, "total_tokens": 81068032}
|
|
{"current_steps": 25770, "total_steps": 78105, "loss": 0.407, "lr": 4.237129882331283e-06, "epoch": 1.6497023237948913, "percentage": 32.99, "elapsed_time": "1:07:32", "remaining_time": "2:17:09", "throughput": 20008.82, "total_tokens": 81083328}
|
|
{"current_steps": 25775, "total_steps": 78105, "loss": 0.2753, "lr": 4.236728082557183e-06, "epoch": 1.6500224057358683, "percentage": 33.0, "elapsed_time": "1:07:33", "remaining_time": "2:17:08", "throughput": 20009.43, "total_tokens": 81099712}
|
|
{"current_steps": 25780, "total_steps": 78105, "loss": 0.404, "lr": 4.236326196059929e-06, "epoch": 1.6503424876768453, "percentage": 33.01, "elapsed_time": "1:07:33", "remaining_time": "2:17:07", "throughput": 20010.02, "total_tokens": 81115968}
|
|
{"current_steps": 25785, "total_steps": 78105, "loss": 0.4052, "lr": 4.235924222859589e-06, "epoch": 1.6506625696178223, "percentage": 33.01, "elapsed_time": "1:07:34", "remaining_time": "2:17:06", "throughput": 20010.51, "total_tokens": 81130880}
|
|
{"current_steps": 25790, "total_steps": 78105, "loss": 0.3187, "lr": 4.235522162976234e-06, "epoch": 1.650982651558799, "percentage": 33.02, "elapsed_time": "1:07:35", "remaining_time": "2:17:05", "throughput": 20011.0, "total_tokens": 81146176}
|
|
{"current_steps": 25795, "total_steps": 78105, "loss": 0.2374, "lr": 4.235120016429945e-06, "epoch": 1.6513027334997759, "percentage": 33.03, "elapsed_time": "1:07:35", "remaining_time": "2:17:04", "throughput": 20011.51, "total_tokens": 81161472}
|
|
{"current_steps": 25800, "total_steps": 78105, "loss": 0.3276, "lr": 4.234717783240798e-06, "epoch": 1.6516228154407528, "percentage": 33.03, "elapsed_time": "1:07:36", "remaining_time": "2:17:03", "throughput": 20012.03, "total_tokens": 81177152}
|
|
{"current_steps": 25805, "total_steps": 78105, "loss": 0.2702, "lr": 4.234315463428881e-06, "epoch": 1.6519428973817298, "percentage": 33.04, "elapsed_time": "1:07:37", "remaining_time": "2:17:02", "throughput": 20012.76, "total_tokens": 81194688}
|
|
{"current_steps": 25810, "total_steps": 78105, "loss": 0.2992, "lr": 4.233913057014284e-06, "epoch": 1.6522629793227066, "percentage": 33.05, "elapsed_time": "1:07:37", "remaining_time": "2:17:01", "throughput": 20013.35, "total_tokens": 81210944}
|
|
{"current_steps": 25815, "total_steps": 78105, "loss": 0.3213, "lr": 4.233510564017101e-06, "epoch": 1.6525830612636834, "percentage": 33.05, "elapsed_time": "1:07:38", "remaining_time": "2:17:00", "throughput": 20013.9, "total_tokens": 81226624}
|
|
{"current_steps": 25820, "total_steps": 78105, "loss": 0.3132, "lr": 4.2331079844574295e-06, "epoch": 1.6529031432046604, "percentage": 33.06, "elapsed_time": "1:07:39", "remaining_time": "2:16:59", "throughput": 20014.59, "total_tokens": 81244224}
|
|
{"current_steps": 25825, "total_steps": 78105, "loss": 0.2685, "lr": 4.232705318355372e-06, "epoch": 1.6532232251456374, "percentage": 33.06, "elapsed_time": "1:07:39", "remaining_time": "2:16:58", "throughput": 20015.08, "total_tokens": 81259072}
|
|
{"current_steps": 25830, "total_steps": 78105, "loss": 0.241, "lr": 4.232302565731037e-06, "epoch": 1.6535433070866141, "percentage": 33.07, "elapsed_time": "1:07:40", "remaining_time": "2:16:57", "throughput": 20015.56, "total_tokens": 81273856}
|
|
{"current_steps": 25835, "total_steps": 78105, "loss": 0.39, "lr": 4.231899726604534e-06, "epoch": 1.6538633890275911, "percentage": 33.08, "elapsed_time": "1:07:41", "remaining_time": "2:16:56", "throughput": 20016.01, "total_tokens": 81288896}
|
|
{"current_steps": 25840, "total_steps": 78105, "loss": 0.3298, "lr": 4.2314968009959815e-06, "epoch": 1.6541834709685679, "percentage": 33.08, "elapsed_time": "1:07:41", "remaining_time": "2:16:55", "throughput": 20016.55, "total_tokens": 81304448}
|
|
{"current_steps": 25845, "total_steps": 78105, "loss": 0.3232, "lr": 4.231093788925497e-06, "epoch": 1.6545035529095449, "percentage": 33.09, "elapsed_time": "1:07:42", "remaining_time": "2:16:54", "throughput": 20017.11, "total_tokens": 81320576}
|
|
{"current_steps": 25850, "total_steps": 78105, "loss": 0.4215, "lr": 4.2306906904132054e-06, "epoch": 1.6548236348505219, "percentage": 33.1, "elapsed_time": "1:07:43", "remaining_time": "2:16:53", "throughput": 20017.69, "total_tokens": 81337152}
|
|
{"current_steps": 25855, "total_steps": 78105, "loss": 0.3967, "lr": 4.230287505479236e-06, "epoch": 1.6551437167914986, "percentage": 33.1, "elapsed_time": "1:07:43", "remaining_time": "2:16:52", "throughput": 20018.24, "total_tokens": 81352896}
|
|
{"current_steps": 25860, "total_steps": 78105, "loss": 0.3677, "lr": 4.2298842341437204e-06, "epoch": 1.6554637987324754, "percentage": 33.11, "elapsed_time": "1:07:44", "remaining_time": "2:16:51", "throughput": 20018.65, "total_tokens": 81367488}
|
|
{"current_steps": 25865, "total_steps": 78105, "loss": 0.3216, "lr": 4.229480876426798e-06, "epoch": 1.6557838806734524, "percentage": 33.12, "elapsed_time": "1:07:45", "remaining_time": "2:16:50", "throughput": 20019.11, "total_tokens": 81382528}
|
|
{"current_steps": 25870, "total_steps": 78105, "loss": 0.325, "lr": 4.2290774323486084e-06, "epoch": 1.6561039626144294, "percentage": 33.12, "elapsed_time": "1:07:45", "remaining_time": "2:16:49", "throughput": 20019.64, "total_tokens": 81398272}
|
|
{"current_steps": 25875, "total_steps": 78105, "loss": 0.2839, "lr": 4.228673901929298e-06, "epoch": 1.6564240445554061, "percentage": 33.13, "elapsed_time": "1:07:46", "remaining_time": "2:16:48", "throughput": 20020.01, "total_tokens": 81412544}
|
|
{"current_steps": 25880, "total_steps": 78105, "loss": 0.2743, "lr": 4.228270285189019e-06, "epoch": 1.656744126496383, "percentage": 33.13, "elapsed_time": "1:07:47", "remaining_time": "2:16:47", "throughput": 20020.49, "total_tokens": 81427968}
|
|
{"current_steps": 25885, "total_steps": 78105, "loss": 0.4283, "lr": 4.227866582147922e-06, "epoch": 1.65706420843736, "percentage": 33.14, "elapsed_time": "1:07:47", "remaining_time": "2:16:46", "throughput": 20020.87, "total_tokens": 81442368}
|
|
{"current_steps": 25890, "total_steps": 78105, "loss": 0.3572, "lr": 4.22746279282617e-06, "epoch": 1.657384290378337, "percentage": 33.15, "elapsed_time": "1:07:48", "remaining_time": "2:16:45", "throughput": 20021.34, "total_tokens": 81457280}
|
|
{"current_steps": 25895, "total_steps": 78105, "loss": 0.5254, "lr": 4.227058917243922e-06, "epoch": 1.657704372319314, "percentage": 33.15, "elapsed_time": "1:07:49", "remaining_time": "2:16:44", "throughput": 20021.89, "total_tokens": 81473280}
|
|
{"current_steps": 25900, "total_steps": 78105, "loss": 0.3787, "lr": 4.226654955421349e-06, "epoch": 1.6580244542602907, "percentage": 33.16, "elapsed_time": "1:07:49", "remaining_time": "2:16:43", "throughput": 20022.51, "total_tokens": 81489600}
|
|
{"current_steps": 25905, "total_steps": 78105, "loss": 0.4178, "lr": 4.226250907378622e-06, "epoch": 1.6583445362012674, "percentage": 33.17, "elapsed_time": "1:07:50", "remaining_time": "2:16:42", "throughput": 20023.1, "total_tokens": 81505792}
|
|
{"current_steps": 25910, "total_steps": 78105, "loss": 0.3319, "lr": 4.2258467731359145e-06, "epoch": 1.6586646181422444, "percentage": 33.17, "elapsed_time": "1:07:51", "remaining_time": "2:16:41", "throughput": 20023.65, "total_tokens": 81521536}
|
|
{"current_steps": 25915, "total_steps": 78105, "loss": 0.3064, "lr": 4.2254425527134105e-06, "epoch": 1.6589847000832214, "percentage": 33.18, "elapsed_time": "1:07:51", "remaining_time": "2:16:40", "throughput": 20024.17, "total_tokens": 81537024}
|
|
{"current_steps": 25920, "total_steps": 78105, "loss": 0.4137, "lr": 4.225038246131292e-06, "epoch": 1.6593047820241982, "percentage": 33.19, "elapsed_time": "1:07:52", "remaining_time": "2:16:39", "throughput": 20024.7, "total_tokens": 81552448}
|
|
{"current_steps": 25925, "total_steps": 78105, "loss": 0.2729, "lr": 4.224633853409749e-06, "epoch": 1.659624863965175, "percentage": 33.19, "elapsed_time": "1:07:53", "remaining_time": "2:16:38", "throughput": 20025.2, "total_tokens": 81567808}
|
|
{"current_steps": 25930, "total_steps": 78105, "loss": 0.4086, "lr": 4.224229374568974e-06, "epoch": 1.659944945906152, "percentage": 33.2, "elapsed_time": "1:07:53", "remaining_time": "2:16:37", "throughput": 20025.81, "total_tokens": 81584320}
|
|
{"current_steps": 25935, "total_steps": 78105, "loss": 0.3573, "lr": 4.2238248096291666e-06, "epoch": 1.660265027847129, "percentage": 33.21, "elapsed_time": "1:07:54", "remaining_time": "2:16:36", "throughput": 20026.26, "total_tokens": 81599168}
|
|
{"current_steps": 25940, "total_steps": 78105, "loss": 0.2321, "lr": 4.223420158610526e-06, "epoch": 1.660585109788106, "percentage": 33.21, "elapsed_time": "1:07:55", "remaining_time": "2:16:35", "throughput": 20026.78, "total_tokens": 81614720}
|
|
{"current_steps": 25945, "total_steps": 78105, "loss": 0.4207, "lr": 4.223015421533261e-06, "epoch": 1.6609051917290827, "percentage": 33.22, "elapsed_time": "1:07:55", "remaining_time": "2:16:34", "throughput": 20027.25, "total_tokens": 81629888}
|
|
{"current_steps": 25950, "total_steps": 78105, "loss": 0.526, "lr": 4.222610598417579e-06, "epoch": 1.6612252736700595, "percentage": 33.22, "elapsed_time": "1:07:56", "remaining_time": "2:16:33", "throughput": 20027.73, "total_tokens": 81645184}
|
|
{"current_steps": 25955, "total_steps": 78105, "loss": 0.289, "lr": 4.222205689283698e-06, "epoch": 1.6615453556110364, "percentage": 33.23, "elapsed_time": "1:07:57", "remaining_time": "2:16:32", "throughput": 20028.26, "total_tokens": 81660928}
|
|
{"current_steps": 25960, "total_steps": 78105, "loss": 0.3843, "lr": 4.221800694151835e-06, "epoch": 1.6618654375520134, "percentage": 33.24, "elapsed_time": "1:07:57", "remaining_time": "2:16:31", "throughput": 20028.88, "total_tokens": 81677312}
|
|
{"current_steps": 25965, "total_steps": 78105, "loss": 0.4712, "lr": 4.221395613042214e-06, "epoch": 1.6621855194929902, "percentage": 33.24, "elapsed_time": "1:07:58", "remaining_time": "2:16:30", "throughput": 20029.41, "total_tokens": 81692928}
|
|
{"current_steps": 25970, "total_steps": 78105, "loss": 0.3219, "lr": 4.220990445975062e-06, "epoch": 1.662505601433967, "percentage": 33.25, "elapsed_time": "1:07:59", "remaining_time": "2:16:29", "throughput": 20029.96, "total_tokens": 81708800}
|
|
{"current_steps": 25975, "total_steps": 78105, "loss": 0.3933, "lr": 4.220585192970611e-06, "epoch": 1.662825683374944, "percentage": 33.26, "elapsed_time": "1:07:59", "remaining_time": "2:16:28", "throughput": 20030.46, "total_tokens": 81724224}
|
|
{"current_steps": 25980, "total_steps": 78105, "loss": 0.4215, "lr": 4.220179854049099e-06, "epoch": 1.663145765315921, "percentage": 33.26, "elapsed_time": "1:08:00", "remaining_time": "2:16:27", "throughput": 20031.04, "total_tokens": 81740160}
|
|
{"current_steps": 25985, "total_steps": 78105, "loss": 0.3602, "lr": 4.219774429230765e-06, "epoch": 1.6634658472568977, "percentage": 33.27, "elapsed_time": "1:08:01", "remaining_time": "2:16:26", "throughput": 20031.45, "total_tokens": 81754560}
|
|
{"current_steps": 25990, "total_steps": 78105, "loss": 0.3129, "lr": 4.219368918535853e-06, "epoch": 1.6637859291978745, "percentage": 33.28, "elapsed_time": "1:08:01", "remaining_time": "2:16:25", "throughput": 20031.94, "total_tokens": 81769856}
|
|
{"current_steps": 25995, "total_steps": 78105, "loss": 0.2341, "lr": 4.218963321984614e-06, "epoch": 1.6641060111388515, "percentage": 33.28, "elapsed_time": "1:08:02", "remaining_time": "2:16:24", "throughput": 20032.48, "total_tokens": 81785344}
|
|
{"current_steps": 26000, "total_steps": 78105, "loss": 0.4718, "lr": 4.2185576395973e-06, "epoch": 1.6644260930798285, "percentage": 33.29, "elapsed_time": "1:08:03", "remaining_time": "2:16:23", "throughput": 20033.1, "total_tokens": 81801920}
|
|
{"current_steps": 26005, "total_steps": 78105, "loss": 0.2502, "lr": 4.2181518713941694e-06, "epoch": 1.6647461750208055, "percentage": 33.29, "elapsed_time": "1:08:04", "remaining_time": "2:16:22", "throughput": 20033.7, "total_tokens": 81818368}
|
|
{"current_steps": 26010, "total_steps": 78105, "loss": 0.3281, "lr": 4.217746017395483e-06, "epoch": 1.6650662569617822, "percentage": 33.3, "elapsed_time": "1:08:04", "remaining_time": "2:16:21", "throughput": 20034.17, "total_tokens": 81833728}
|
|
{"current_steps": 26015, "total_steps": 78105, "loss": 0.2023, "lr": 4.217340077621509e-06, "epoch": 1.665386338902759, "percentage": 33.31, "elapsed_time": "1:08:05", "remaining_time": "2:16:20", "throughput": 20034.67, "total_tokens": 81848960}
|
|
{"current_steps": 26020, "total_steps": 78105, "loss": 0.348, "lr": 4.216934052092516e-06, "epoch": 1.665706420843736, "percentage": 33.31, "elapsed_time": "1:08:06", "remaining_time": "2:16:19", "throughput": 20035.24, "total_tokens": 81865280}
|
|
{"current_steps": 26025, "total_steps": 78105, "loss": 0.3051, "lr": 4.2165279408287806e-06, "epoch": 1.666026502784713, "percentage": 33.32, "elapsed_time": "1:08:06", "remaining_time": "2:16:18", "throughput": 20035.78, "total_tokens": 81880896}
|
|
{"current_steps": 26030, "total_steps": 78105, "loss": 0.3356, "lr": 4.2161217438505794e-06, "epoch": 1.6663465847256898, "percentage": 33.33, "elapsed_time": "1:08:07", "remaining_time": "2:16:17", "throughput": 20036.3, "total_tokens": 81896320}
|
|
{"current_steps": 26035, "total_steps": 78105, "loss": 0.3459, "lr": 4.215715461178199e-06, "epoch": 1.6666666666666665, "percentage": 33.33, "elapsed_time": "1:08:08", "remaining_time": "2:16:16", "throughput": 20036.84, "total_tokens": 81912320}
|
|
{"current_steps": 26040, "total_steps": 78105, "loss": 0.2861, "lr": 4.215309092831925e-06, "epoch": 1.6669867486076435, "percentage": 33.34, "elapsed_time": "1:08:08", "remaining_time": "2:16:15", "throughput": 20037.4, "total_tokens": 81928448}
|
|
{"current_steps": 26045, "total_steps": 78105, "loss": 0.3841, "lr": 4.214902638832049e-06, "epoch": 1.6673068305486205, "percentage": 33.35, "elapsed_time": "1:08:09", "remaining_time": "2:16:14", "throughput": 20037.91, "total_tokens": 81944000}
|
|
{"current_steps": 26050, "total_steps": 78105, "loss": 0.277, "lr": 4.214496099198868e-06, "epoch": 1.6676269124895975, "percentage": 33.35, "elapsed_time": "1:08:10", "remaining_time": "2:16:13", "throughput": 20038.55, "total_tokens": 81961216}
|
|
{"current_steps": 26055, "total_steps": 78105, "loss": 0.3328, "lr": 4.214089473952682e-06, "epoch": 1.6679469944305743, "percentage": 33.36, "elapsed_time": "1:08:10", "remaining_time": "2:16:12", "throughput": 20039.07, "total_tokens": 81977024}
|
|
{"current_steps": 26060, "total_steps": 78105, "loss": 0.2846, "lr": 4.213682763113796e-06, "epoch": 1.668267076371551, "percentage": 33.37, "elapsed_time": "1:08:11", "remaining_time": "2:16:11", "throughput": 20039.61, "total_tokens": 81992768}
|
|
{"current_steps": 26065, "total_steps": 78105, "loss": 0.2839, "lr": 4.213275966702519e-06, "epoch": 1.668587158312528, "percentage": 33.37, "elapsed_time": "1:08:12", "remaining_time": "2:16:10", "throughput": 20040.28, "total_tokens": 82009856}
|
|
{"current_steps": 26070, "total_steps": 78105, "loss": 0.3912, "lr": 4.212869084739165e-06, "epoch": 1.668907240253505, "percentage": 33.38, "elapsed_time": "1:08:12", "remaining_time": "2:16:09", "throughput": 20040.73, "total_tokens": 82024960}
|
|
{"current_steps": 26075, "total_steps": 78105, "loss": 0.3148, "lr": 4.21246211724405e-06, "epoch": 1.6692273221944818, "percentage": 33.38, "elapsed_time": "1:08:13", "remaining_time": "2:16:08", "throughput": 20041.27, "total_tokens": 82040960}
|
|
{"current_steps": 26080, "total_steps": 78105, "loss": 0.3879, "lr": 4.212055064237498e-06, "epoch": 1.6695474041354585, "percentage": 33.39, "elapsed_time": "1:08:14", "remaining_time": "2:16:07", "throughput": 20041.83, "total_tokens": 82056896}
|
|
{"current_steps": 26085, "total_steps": 78105, "loss": 0.3053, "lr": 4.211647925739834e-06, "epoch": 1.6698674860764355, "percentage": 33.4, "elapsed_time": "1:08:14", "remaining_time": "2:16:06", "throughput": 20042.44, "total_tokens": 82073600}
|
|
{"current_steps": 26090, "total_steps": 78105, "loss": 0.2234, "lr": 4.211240701771387e-06, "epoch": 1.6701875680174125, "percentage": 33.4, "elapsed_time": "1:08:15", "remaining_time": "2:16:05", "throughput": 20042.95, "total_tokens": 82088640}
|
|
{"current_steps": 26095, "total_steps": 78105, "loss": 0.3423, "lr": 4.210833392352493e-06, "epoch": 1.6705076499583893, "percentage": 33.41, "elapsed_time": "1:08:16", "remaining_time": "2:16:04", "throughput": 20043.4, "total_tokens": 82103744}
|
|
{"current_steps": 26100, "total_steps": 78105, "loss": 0.3086, "lr": 4.210425997503491e-06, "epoch": 1.6708277318993663, "percentage": 33.42, "elapsed_time": "1:08:16", "remaining_time": "2:16:03", "throughput": 20043.9, "total_tokens": 82119040}
|
|
{"current_steps": 26105, "total_steps": 78105, "loss": 0.3197, "lr": 4.2100185172447236e-06, "epoch": 1.671147813840343, "percentage": 33.42, "elapsed_time": "1:08:17", "remaining_time": "2:16:02", "throughput": 20044.69, "total_tokens": 82137408}
|
|
{"current_steps": 26110, "total_steps": 78105, "loss": 0.4827, "lr": 4.20961095159654e-06, "epoch": 1.67146789578132, "percentage": 33.43, "elapsed_time": "1:08:18", "remaining_time": "2:16:01", "throughput": 20045.22, "total_tokens": 82153152}
|
|
{"current_steps": 26115, "total_steps": 78105, "loss": 0.2823, "lr": 4.209203300579289e-06, "epoch": 1.671787977722297, "percentage": 33.44, "elapsed_time": "1:08:19", "remaining_time": "2:16:00", "throughput": 20045.75, "total_tokens": 82169024}
|
|
{"current_steps": 26120, "total_steps": 78105, "loss": 0.4166, "lr": 4.208795564213329e-06, "epoch": 1.6721080596632738, "percentage": 33.44, "elapsed_time": "1:08:19", "remaining_time": "2:15:59", "throughput": 20046.26, "total_tokens": 82184576}
|
|
{"current_steps": 26125, "total_steps": 78105, "loss": 0.3562, "lr": 4.208387742519019e-06, "epoch": 1.6724281416042506, "percentage": 33.45, "elapsed_time": "1:08:20", "remaining_time": "2:15:58", "throughput": 20046.71, "total_tokens": 82199232}
|
|
{"current_steps": 26130, "total_steps": 78105, "loss": 0.4136, "lr": 4.207979835516724e-06, "epoch": 1.6727482235452276, "percentage": 33.45, "elapsed_time": "1:08:21", "remaining_time": "2:15:57", "throughput": 20047.3, "total_tokens": 82215744}
|
|
{"current_steps": 26135, "total_steps": 78105, "loss": 0.3964, "lr": 4.207571843226812e-06, "epoch": 1.6730683054862046, "percentage": 33.46, "elapsed_time": "1:08:21", "remaining_time": "2:15:56", "throughput": 20047.75, "total_tokens": 82230720}
|
|
{"current_steps": 26140, "total_steps": 78105, "loss": 0.2845, "lr": 4.207163765669657e-06, "epoch": 1.6733883874271813, "percentage": 33.47, "elapsed_time": "1:08:22", "remaining_time": "2:15:55", "throughput": 20048.44, "total_tokens": 82248256}
|
|
{"current_steps": 26145, "total_steps": 78105, "loss": 0.3653, "lr": 4.206755602865635e-06, "epoch": 1.673708469368158, "percentage": 33.47, "elapsed_time": "1:08:23", "remaining_time": "2:15:54", "throughput": 20048.9, "total_tokens": 82263488}
|
|
{"current_steps": 26150, "total_steps": 78105, "loss": 0.3548, "lr": 4.20634735483513e-06, "epoch": 1.674028551309135, "percentage": 33.48, "elapsed_time": "1:08:23", "remaining_time": "2:15:53", "throughput": 20049.45, "total_tokens": 82279360}
|
|
{"current_steps": 26155, "total_steps": 78105, "loss": 0.295, "lr": 4.205939021598525e-06, "epoch": 1.674348633250112, "percentage": 33.49, "elapsed_time": "1:08:24", "remaining_time": "2:15:52", "throughput": 20049.99, "total_tokens": 82295104}
|
|
{"current_steps": 26160, "total_steps": 78105, "loss": 0.2918, "lr": 4.205530603176212e-06, "epoch": 1.674668715191089, "percentage": 33.49, "elapsed_time": "1:08:25", "remaining_time": "2:15:51", "throughput": 20050.52, "total_tokens": 82311104}
|
|
{"current_steps": 26165, "total_steps": 78105, "loss": 0.313, "lr": 4.205122099588583e-06, "epoch": 1.6749887971320658, "percentage": 33.5, "elapsed_time": "1:08:25", "remaining_time": "2:15:50", "throughput": 20051.03, "total_tokens": 82326656}
|
|
{"current_steps": 26170, "total_steps": 78105, "loss": 0.2993, "lr": 4.204713510856039e-06, "epoch": 1.6753088790730426, "percentage": 33.51, "elapsed_time": "1:08:26", "remaining_time": "2:15:49", "throughput": 20051.63, "total_tokens": 82343168}
|
|
{"current_steps": 26175, "total_steps": 78105, "loss": 0.3222, "lr": 4.204304836998983e-06, "epoch": 1.6756289610140196, "percentage": 33.51, "elapsed_time": "1:08:27", "remaining_time": "2:15:48", "throughput": 20052.1, "total_tokens": 82358208}
|
|
{"current_steps": 26180, "total_steps": 78105, "loss": 0.3878, "lr": 4.203896078037819e-06, "epoch": 1.6759490429549966, "percentage": 33.52, "elapsed_time": "1:08:27", "remaining_time": "2:15:47", "throughput": 20052.53, "total_tokens": 82372928}
|
|
{"current_steps": 26185, "total_steps": 78105, "loss": 0.3604, "lr": 4.203487233992961e-06, "epoch": 1.6762691248959734, "percentage": 33.53, "elapsed_time": "1:08:28", "remaining_time": "2:15:46", "throughput": 20053.05, "total_tokens": 82388992}
|
|
{"current_steps": 26190, "total_steps": 78105, "loss": 0.2632, "lr": 4.203078304884823e-06, "epoch": 1.6765892068369501, "percentage": 33.53, "elapsed_time": "1:08:29", "remaining_time": "2:15:45", "throughput": 20053.54, "total_tokens": 82404416}
|
|
{"current_steps": 26195, "total_steps": 78105, "loss": 0.3213, "lr": 4.202669290733825e-06, "epoch": 1.676909288777927, "percentage": 33.54, "elapsed_time": "1:08:29", "remaining_time": "2:15:44", "throughput": 20053.98, "total_tokens": 82419072}
|
|
{"current_steps": 26200, "total_steps": 78105, "loss": 0.4798, "lr": 4.202260191560393e-06, "epoch": 1.677229370718904, "percentage": 33.54, "elapsed_time": "1:08:30", "remaining_time": "2:15:43", "throughput": 20054.54, "total_tokens": 82435200}
|
|
{"current_steps": 26205, "total_steps": 78105, "loss": 0.4395, "lr": 4.201851007384953e-06, "epoch": 1.677549452659881, "percentage": 33.55, "elapsed_time": "1:08:31", "remaining_time": "2:15:42", "throughput": 20055.05, "total_tokens": 82450752}
|
|
{"current_steps": 26210, "total_steps": 78105, "loss": 0.3904, "lr": 4.201441738227938e-06, "epoch": 1.6778695346008579, "percentage": 33.56, "elapsed_time": "1:08:31", "remaining_time": "2:15:41", "throughput": 20055.51, "total_tokens": 82465792}
|
|
{"current_steps": 26215, "total_steps": 78105, "loss": 0.2622, "lr": 4.2010323841097855e-06, "epoch": 1.6781896165418346, "percentage": 33.56, "elapsed_time": "1:08:32", "remaining_time": "2:15:40", "throughput": 20056.01, "total_tokens": 82481216}
|
|
{"current_steps": 26220, "total_steps": 78105, "loss": 0.3196, "lr": 4.200622945050936e-06, "epoch": 1.6785096984828116, "percentage": 33.57, "elapsed_time": "1:08:33", "remaining_time": "2:15:39", "throughput": 20056.51, "total_tokens": 82496640}
|
|
{"current_steps": 26225, "total_steps": 78105, "loss": 0.3211, "lr": 4.200213421071836e-06, "epoch": 1.6788297804237886, "percentage": 33.58, "elapsed_time": "1:08:33", "remaining_time": "2:15:38", "throughput": 20057.17, "total_tokens": 82513600}
|
|
{"current_steps": 26230, "total_steps": 78105, "loss": 0.313, "lr": 4.199803812192932e-06, "epoch": 1.6791498623647654, "percentage": 33.58, "elapsed_time": "1:08:34", "remaining_time": "2:15:37", "throughput": 20057.59, "total_tokens": 82528384}
|
|
{"current_steps": 26235, "total_steps": 78105, "loss": 0.3087, "lr": 4.199394118434681e-06, "epoch": 1.6794699443057421, "percentage": 33.59, "elapsed_time": "1:08:35", "remaining_time": "2:15:36", "throughput": 20058.08, "total_tokens": 82543552}
|
|
{"current_steps": 26240, "total_steps": 78105, "loss": 0.4927, "lr": 4.198984339817538e-06, "epoch": 1.6797900262467191, "percentage": 33.6, "elapsed_time": "1:08:35", "remaining_time": "2:15:35", "throughput": 20058.69, "total_tokens": 82560256}
|
|
{"current_steps": 26245, "total_steps": 78105, "loss": 0.3095, "lr": 4.198574476361969e-06, "epoch": 1.6801101081876961, "percentage": 33.6, "elapsed_time": "1:08:36", "remaining_time": "2:15:34", "throughput": 20059.27, "total_tokens": 82576320}
|
|
{"current_steps": 26250, "total_steps": 78105, "loss": 0.3584, "lr": 4.198164528088436e-06, "epoch": 1.680430190128673, "percentage": 33.61, "elapsed_time": "1:08:37", "remaining_time": "2:15:33", "throughput": 20059.92, "total_tokens": 82593280}
|
|
{"current_steps": 26255, "total_steps": 78105, "loss": 0.2621, "lr": 4.197754495017413e-06, "epoch": 1.6807502720696497, "percentage": 33.62, "elapsed_time": "1:08:38", "remaining_time": "2:15:32", "throughput": 20060.42, "total_tokens": 82608832}
|
|
{"current_steps": 26260, "total_steps": 78105, "loss": 0.2594, "lr": 4.1973443771693735e-06, "epoch": 1.6810703540106267, "percentage": 33.62, "elapsed_time": "1:08:38", "remaining_time": "2:15:31", "throughput": 20060.98, "total_tokens": 82625024}
|
|
{"current_steps": 26265, "total_steps": 78105, "loss": 0.3151, "lr": 4.1969341745647966e-06, "epoch": 1.6813904359516036, "percentage": 33.63, "elapsed_time": "1:08:39", "remaining_time": "2:15:30", "throughput": 20061.49, "total_tokens": 82640896}
|
|
{"current_steps": 26270, "total_steps": 78105, "loss": 0.4024, "lr": 4.196523887224167e-06, "epoch": 1.6817105178925806, "percentage": 33.63, "elapsed_time": "1:08:40", "remaining_time": "2:15:29", "throughput": 20062.07, "total_tokens": 82656960}
|
|
{"current_steps": 26275, "total_steps": 78105, "loss": 0.3083, "lr": 4.1961135151679695e-06, "epoch": 1.6820305998335574, "percentage": 33.64, "elapsed_time": "1:08:40", "remaining_time": "2:15:28", "throughput": 20062.63, "total_tokens": 82673024}
|
|
{"current_steps": 26280, "total_steps": 78105, "loss": 0.4133, "lr": 4.195703058416701e-06, "epoch": 1.6823506817745342, "percentage": 33.65, "elapsed_time": "1:08:41", "remaining_time": "2:15:27", "throughput": 20063.29, "total_tokens": 82689920}
|
|
{"current_steps": 26285, "total_steps": 78105, "loss": 0.4182, "lr": 4.195292516990851e-06, "epoch": 1.6826707637155112, "percentage": 33.65, "elapsed_time": "1:08:42", "remaining_time": "2:15:26", "throughput": 20063.79, "total_tokens": 82705152}
|
|
{"current_steps": 26290, "total_steps": 78105, "loss": 0.3226, "lr": 4.194881890910924e-06, "epoch": 1.6829908456564882, "percentage": 33.66, "elapsed_time": "1:08:42", "remaining_time": "2:15:25", "throughput": 20064.28, "total_tokens": 82720448}
|
|
{"current_steps": 26295, "total_steps": 78105, "loss": 0.3272, "lr": 4.194471180197425e-06, "epoch": 1.683310927597465, "percentage": 33.67, "elapsed_time": "1:08:43", "remaining_time": "2:15:24", "throughput": 20064.95, "total_tokens": 82737536}
|
|
{"current_steps": 26300, "total_steps": 78105, "loss": 0.5357, "lr": 4.1940603848708605e-06, "epoch": 1.6836310095384417, "percentage": 33.67, "elapsed_time": "1:08:44", "remaining_time": "2:15:23", "throughput": 20065.42, "total_tokens": 82752896}
|
|
{"current_steps": 26305, "total_steps": 78105, "loss": 0.3715, "lr": 4.193649504951745e-06, "epoch": 1.6839510914794187, "percentage": 33.68, "elapsed_time": "1:08:44", "remaining_time": "2:15:22", "throughput": 20066.11, "total_tokens": 82769920}
|
|
{"current_steps": 26310, "total_steps": 78105, "loss": 0.3821, "lr": 4.193238540460595e-06, "epoch": 1.6842711734203957, "percentage": 33.69, "elapsed_time": "1:08:45", "remaining_time": "2:15:21", "throughput": 20066.66, "total_tokens": 82785728}
|
|
{"current_steps": 26315, "total_steps": 78105, "loss": 0.306, "lr": 4.192827491417931e-06, "epoch": 1.6845912553613727, "percentage": 33.69, "elapsed_time": "1:08:46", "remaining_time": "2:15:20", "throughput": 20067.25, "total_tokens": 82801920}
|
|
{"current_steps": 26320, "total_steps": 78105, "loss": 0.3343, "lr": 4.192416357844281e-06, "epoch": 1.6849113373023494, "percentage": 33.7, "elapsed_time": "1:08:46", "remaining_time": "2:15:19", "throughput": 20067.72, "total_tokens": 82817216}
|
|
{"current_steps": 26325, "total_steps": 78105, "loss": 0.2941, "lr": 4.192005139760172e-06, "epoch": 1.6852314192433262, "percentage": 33.7, "elapsed_time": "1:08:47", "remaining_time": "2:15:18", "throughput": 20068.19, "total_tokens": 82832576}
|
|
{"current_steps": 26330, "total_steps": 78105, "loss": 0.3261, "lr": 4.191593837186142e-06, "epoch": 1.6855515011843032, "percentage": 33.71, "elapsed_time": "1:08:48", "remaining_time": "2:15:17", "throughput": 20068.72, "total_tokens": 82848448}
|
|
{"current_steps": 26335, "total_steps": 78105, "loss": 0.3725, "lr": 4.191182450142725e-06, "epoch": 1.6858715831252802, "percentage": 33.72, "elapsed_time": "1:08:48", "remaining_time": "2:15:16", "throughput": 20069.19, "total_tokens": 82863936}
|
|
{"current_steps": 26340, "total_steps": 78105, "loss": 0.3422, "lr": 4.190770978650466e-06, "epoch": 1.686191665066257, "percentage": 33.72, "elapsed_time": "1:08:49", "remaining_time": "2:15:15", "throughput": 20069.74, "total_tokens": 82880192}
|
|
{"current_steps": 26345, "total_steps": 78105, "loss": 0.2793, "lr": 4.190359422729911e-06, "epoch": 1.6865117470072337, "percentage": 33.73, "elapsed_time": "1:08:50", "remaining_time": "2:15:14", "throughput": 20070.17, "total_tokens": 82894656}
|
|
{"current_steps": 26350, "total_steps": 78105, "loss": 0.2528, "lr": 4.189947782401612e-06, "epoch": 1.6868318289482107, "percentage": 33.74, "elapsed_time": "1:08:50", "remaining_time": "2:15:13", "throughput": 20070.62, "total_tokens": 82910016}
|
|
{"current_steps": 26355, "total_steps": 78105, "loss": 0.3624, "lr": 4.189536057686123e-06, "epoch": 1.6871519108891877, "percentage": 33.74, "elapsed_time": "1:08:51", "remaining_time": "2:15:12", "throughput": 20071.07, "total_tokens": 82925120}
|
|
{"current_steps": 26360, "total_steps": 78105, "loss": 0.3493, "lr": 4.189124248604004e-06, "epoch": 1.6874719928301645, "percentage": 33.75, "elapsed_time": "1:08:52", "remaining_time": "2:15:11", "throughput": 20071.62, "total_tokens": 82941184}
|
|
{"current_steps": 26365, "total_steps": 78105, "loss": 0.3695, "lr": 4.188712355175818e-06, "epoch": 1.6877920747711415, "percentage": 33.76, "elapsed_time": "1:08:52", "remaining_time": "2:15:10", "throughput": 20072.13, "total_tokens": 82956736}
|
|
{"current_steps": 26370, "total_steps": 78105, "loss": 0.2668, "lr": 4.1883003774221335e-06, "epoch": 1.6881121567121182, "percentage": 33.76, "elapsed_time": "1:08:53", "remaining_time": "2:15:09", "throughput": 20072.52, "total_tokens": 82971200}
|
|
{"current_steps": 26375, "total_steps": 78105, "loss": 0.3116, "lr": 4.1878883153635205e-06, "epoch": 1.6884322386530952, "percentage": 33.77, "elapsed_time": "1:08:54", "remaining_time": "2:15:08", "throughput": 20073.05, "total_tokens": 82987136}
|
|
{"current_steps": 26380, "total_steps": 78105, "loss": 0.3113, "lr": 4.187476169020559e-06, "epoch": 1.6887523205940722, "percentage": 33.78, "elapsed_time": "1:08:54", "remaining_time": "2:15:07", "throughput": 20073.61, "total_tokens": 83003392}
|
|
{"current_steps": 26385, "total_steps": 78105, "loss": 0.4604, "lr": 4.187063938413827e-06, "epoch": 1.689072402535049, "percentage": 33.78, "elapsed_time": "1:08:55", "remaining_time": "2:15:06", "throughput": 20074.05, "total_tokens": 83018624}
|
|
{"current_steps": 26390, "total_steps": 78105, "loss": 0.3172, "lr": 4.186651623563908e-06, "epoch": 1.6893924844760257, "percentage": 33.79, "elapsed_time": "1:08:56", "remaining_time": "2:15:05", "throughput": 20074.5, "total_tokens": 83033408}
|
|
{"current_steps": 26395, "total_steps": 78105, "loss": 0.3299, "lr": 4.186239224491394e-06, "epoch": 1.6897125664170027, "percentage": 33.79, "elapsed_time": "1:08:57", "remaining_time": "2:15:04", "throughput": 20075.27, "total_tokens": 83051520}
|
|
{"current_steps": 26400, "total_steps": 78105, "loss": 0.2201, "lr": 4.185826741216876e-06, "epoch": 1.6900326483579797, "percentage": 33.8, "elapsed_time": "1:08:57", "remaining_time": "2:15:03", "throughput": 20075.77, "total_tokens": 83066816}
|
|
{"current_steps": 26405, "total_steps": 78105, "loss": 0.3626, "lr": 4.185414173760952e-06, "epoch": 1.6903527302989565, "percentage": 33.81, "elapsed_time": "1:08:58", "remaining_time": "2:15:02", "throughput": 20076.34, "total_tokens": 83083136}
|
|
{"current_steps": 26410, "total_steps": 78105, "loss": 0.3798, "lr": 4.185001522144223e-06, "epoch": 1.6906728122399333, "percentage": 33.81, "elapsed_time": "1:08:58", "remaining_time": "2:15:01", "throughput": 20076.77, "total_tokens": 83097728}
|
|
{"current_steps": 26415, "total_steps": 78105, "loss": 0.3527, "lr": 4.184588786387295e-06, "epoch": 1.6909928941809103, "percentage": 33.82, "elapsed_time": "1:08:59", "remaining_time": "2:15:00", "throughput": 20077.23, "total_tokens": 83112704}
|
|
{"current_steps": 26420, "total_steps": 78105, "loss": 0.2633, "lr": 4.184175966510777e-06, "epoch": 1.6913129761218872, "percentage": 33.83, "elapsed_time": "1:09:00", "remaining_time": "2:14:59", "throughput": 20077.74, "total_tokens": 83128064}
|
|
{"current_steps": 26425, "total_steps": 78105, "loss": 0.5295, "lr": 4.183763062535285e-06, "epoch": 1.6916330580628642, "percentage": 33.83, "elapsed_time": "1:09:01", "remaining_time": "2:14:59", "throughput": 20077.12, "total_tokens": 83144384}
|
|
{"current_steps": 26430, "total_steps": 78105, "loss": 0.4544, "lr": 4.183350074481434e-06, "epoch": 1.691953140003841, "percentage": 33.84, "elapsed_time": "1:09:01", "remaining_time": "2:14:58", "throughput": 20077.52, "total_tokens": 83159168}
|
|
{"current_steps": 26435, "total_steps": 78105, "loss": 0.292, "lr": 4.18293700236985e-06, "epoch": 1.6922732219448178, "percentage": 33.85, "elapsed_time": "1:09:02", "remaining_time": "2:14:57", "throughput": 20078.04, "total_tokens": 83174720}
|
|
{"current_steps": 26440, "total_steps": 78105, "loss": 0.3355, "lr": 4.182523846221158e-06, "epoch": 1.6925933038857948, "percentage": 33.85, "elapsed_time": "1:09:03", "remaining_time": "2:14:56", "throughput": 20078.48, "total_tokens": 83189504}
|
|
{"current_steps": 26445, "total_steps": 78105, "loss": 0.444, "lr": 4.182110606055989e-06, "epoch": 1.6929133858267718, "percentage": 33.86, "elapsed_time": "1:09:03", "remaining_time": "2:14:55", "throughput": 20078.94, "total_tokens": 83204992}
|
|
{"current_steps": 26450, "total_steps": 78105, "loss": 0.3017, "lr": 4.181697281894978e-06, "epoch": 1.6932334677677485, "percentage": 33.86, "elapsed_time": "1:09:04", "remaining_time": "2:14:54", "throughput": 20079.46, "total_tokens": 83221056}
|
|
{"current_steps": 26455, "total_steps": 78105, "loss": 0.4095, "lr": 4.181283873758765e-06, "epoch": 1.6935535497087253, "percentage": 33.87, "elapsed_time": "1:09:05", "remaining_time": "2:14:53", "throughput": 20080.08, "total_tokens": 83237568}
|
|
{"current_steps": 26460, "total_steps": 78105, "loss": 0.3218, "lr": 4.1808703816679915e-06, "epoch": 1.6938736316497023, "percentage": 33.88, "elapsed_time": "1:09:05", "remaining_time": "2:14:52", "throughput": 20080.69, "total_tokens": 83254144}
|
|
{"current_steps": 26465, "total_steps": 78105, "loss": 0.3484, "lr": 4.180456805643308e-06, "epoch": 1.6941937135906793, "percentage": 33.88, "elapsed_time": "1:09:06", "remaining_time": "2:14:51", "throughput": 20081.16, "total_tokens": 83269056}
|
|
{"current_steps": 26470, "total_steps": 78105, "loss": 0.4254, "lr": 4.180043145705363e-06, "epoch": 1.6945137955316563, "percentage": 33.89, "elapsed_time": "1:09:07", "remaining_time": "2:14:50", "throughput": 20081.72, "total_tokens": 83285504}
|
|
{"current_steps": 26475, "total_steps": 78105, "loss": 0.3334, "lr": 4.1796294018748165e-06, "epoch": 1.694833877472633, "percentage": 33.9, "elapsed_time": "1:09:07", "remaining_time": "2:14:49", "throughput": 20082.27, "total_tokens": 83301248}
|
|
{"current_steps": 26480, "total_steps": 78105, "loss": 0.4201, "lr": 4.179215574172325e-06, "epoch": 1.6951539594136098, "percentage": 33.9, "elapsed_time": "1:09:08", "remaining_time": "2:14:48", "throughput": 20082.77, "total_tokens": 83316864}
|
|
{"current_steps": 26485, "total_steps": 78105, "loss": 0.3501, "lr": 4.178801662618555e-06, "epoch": 1.6954740413545868, "percentage": 33.91, "elapsed_time": "1:09:09", "remaining_time": "2:14:47", "throughput": 20083.26, "total_tokens": 83332480}
|
|
{"current_steps": 26490, "total_steps": 78105, "loss": 0.311, "lr": 4.178387667234174e-06, "epoch": 1.6957941232955638, "percentage": 33.92, "elapsed_time": "1:09:10", "remaining_time": "2:14:46", "throughput": 20083.75, "total_tokens": 83348224}
|
|
{"current_steps": 26495, "total_steps": 78105, "loss": 0.4381, "lr": 4.177973588039856e-06, "epoch": 1.6961142052365406, "percentage": 33.92, "elapsed_time": "1:09:10", "remaining_time": "2:14:45", "throughput": 20084.2, "total_tokens": 83363776}
|
|
{"current_steps": 26500, "total_steps": 78105, "loss": 0.3925, "lr": 4.177559425056277e-06, "epoch": 1.6964342871775173, "percentage": 33.93, "elapsed_time": "1:09:11", "remaining_time": "2:14:44", "throughput": 20084.85, "total_tokens": 83380800}
|
|
{"current_steps": 26505, "total_steps": 78105, "loss": 0.4818, "lr": 4.177145178304118e-06, "epoch": 1.6967543691184943, "percentage": 33.94, "elapsed_time": "1:09:12", "remaining_time": "2:14:43", "throughput": 20085.49, "total_tokens": 83397632}
|
|
{"current_steps": 26510, "total_steps": 78105, "loss": 0.4123, "lr": 4.1767308478040655e-06, "epoch": 1.6970744510594713, "percentage": 33.94, "elapsed_time": "1:09:12", "remaining_time": "2:14:42", "throughput": 20085.94, "total_tokens": 83412352}
|
|
{"current_steps": 26515, "total_steps": 78105, "loss": 0.3334, "lr": 4.176316433576808e-06, "epoch": 1.697394533000448, "percentage": 33.95, "elapsed_time": "1:09:13", "remaining_time": "2:14:41", "throughput": 20086.41, "total_tokens": 83427584}
|
|
{"current_steps": 26520, "total_steps": 78105, "loss": 0.2709, "lr": 4.1759019356430395e-06, "epoch": 1.697714614941425, "percentage": 33.95, "elapsed_time": "1:09:14", "remaining_time": "2:14:40", "throughput": 20086.81, "total_tokens": 83442240}
|
|
{"current_steps": 26525, "total_steps": 78105, "loss": 0.355, "lr": 4.1754873540234575e-06, "epoch": 1.6980346968824018, "percentage": 33.96, "elapsed_time": "1:09:14", "remaining_time": "2:14:39", "throughput": 20087.19, "total_tokens": 83456640}
|
|
{"current_steps": 26530, "total_steps": 78105, "loss": 0.2774, "lr": 4.175072688738765e-06, "epoch": 1.6983547788233788, "percentage": 33.97, "elapsed_time": "1:09:15", "remaining_time": "2:14:38", "throughput": 20087.71, "total_tokens": 83472512}
|
|
{"current_steps": 26535, "total_steps": 78105, "loss": 0.3193, "lr": 4.174657939809666e-06, "epoch": 1.6986748607643558, "percentage": 33.97, "elapsed_time": "1:09:16", "remaining_time": "2:14:37", "throughput": 20088.29, "total_tokens": 83488768}
|
|
{"current_steps": 26540, "total_steps": 78105, "loss": 0.3276, "lr": 4.174243107256874e-06, "epoch": 1.6989949427053326, "percentage": 33.98, "elapsed_time": "1:09:16", "remaining_time": "2:14:36", "throughput": 20088.8, "total_tokens": 83504512}
|
|
{"current_steps": 26545, "total_steps": 78105, "loss": 0.3254, "lr": 4.173828191101101e-06, "epoch": 1.6993150246463093, "percentage": 33.99, "elapsed_time": "1:09:17", "remaining_time": "2:14:35", "throughput": 20089.24, "total_tokens": 83519808}
|
|
{"current_steps": 26550, "total_steps": 78105, "loss": 0.4312, "lr": 4.173413191363068e-06, "epoch": 1.6996351065872863, "percentage": 33.99, "elapsed_time": "1:09:18", "remaining_time": "2:14:34", "throughput": 20089.71, "total_tokens": 83535104}
|
|
{"current_steps": 26555, "total_steps": 78105, "loss": 0.3073, "lr": 4.172998108063495e-06, "epoch": 1.6999551885282633, "percentage": 34.0, "elapsed_time": "1:09:18", "remaining_time": "2:14:33", "throughput": 20090.22, "total_tokens": 83550784}
|
|
{"current_steps": 26560, "total_steps": 78105, "loss": 0.3819, "lr": 4.172582941223112e-06, "epoch": 1.70027527046924, "percentage": 34.01, "elapsed_time": "1:09:19", "remaining_time": "2:14:32", "throughput": 20090.82, "total_tokens": 83567296}
|
|
{"current_steps": 26565, "total_steps": 78105, "loss": 0.35, "lr": 4.17216769086265e-06, "epoch": 1.7005953524102169, "percentage": 34.01, "elapsed_time": "1:09:20", "remaining_time": "2:14:31", "throughput": 20091.23, "total_tokens": 83582144}
|
|
{"current_steps": 26570, "total_steps": 78105, "loss": 0.2856, "lr": 4.171752357002842e-06, "epoch": 1.7009154343511939, "percentage": 34.02, "elapsed_time": "1:09:20", "remaining_time": "2:14:30", "throughput": 20091.74, "total_tokens": 83597952}
|
|
{"current_steps": 26575, "total_steps": 78105, "loss": 0.4236, "lr": 4.171336939664429e-06, "epoch": 1.7012355162921708, "percentage": 34.02, "elapsed_time": "1:09:21", "remaining_time": "2:14:29", "throughput": 20092.28, "total_tokens": 83614080}
|
|
{"current_steps": 26580, "total_steps": 78105, "loss": 0.2606, "lr": 4.1709214388681565e-06, "epoch": 1.7015555982331478, "percentage": 34.03, "elapsed_time": "1:09:22", "remaining_time": "2:14:28", "throughput": 20092.79, "total_tokens": 83629568}
|
|
{"current_steps": 26585, "total_steps": 78105, "loss": 0.4471, "lr": 4.1705058546347694e-06, "epoch": 1.7018756801741246, "percentage": 34.04, "elapsed_time": "1:09:22", "remaining_time": "2:14:27", "throughput": 20093.31, "total_tokens": 83645440}
|
|
{"current_steps": 26590, "total_steps": 78105, "loss": 0.4335, "lr": 4.170090186985022e-06, "epoch": 1.7021957621151014, "percentage": 34.04, "elapsed_time": "1:09:23", "remaining_time": "2:14:26", "throughput": 20093.82, "total_tokens": 83660864}
|
|
{"current_steps": 26595, "total_steps": 78105, "loss": 0.3545, "lr": 4.169674435939669e-06, "epoch": 1.7025158440560784, "percentage": 34.05, "elapsed_time": "1:09:24", "remaining_time": "2:14:25", "throughput": 20094.24, "total_tokens": 83675840}
|
|
{"current_steps": 26600, "total_steps": 78105, "loss": 0.43, "lr": 4.169258601519473e-06, "epoch": 1.7028359259970554, "percentage": 34.06, "elapsed_time": "1:09:24", "remaining_time": "2:14:24", "throughput": 20094.85, "total_tokens": 83692672}
|
|
{"current_steps": 26605, "total_steps": 78105, "loss": 0.2911, "lr": 4.168842683745196e-06, "epoch": 1.7031560079380321, "percentage": 34.06, "elapsed_time": "1:09:25", "remaining_time": "2:14:23", "throughput": 20095.48, "total_tokens": 83709376}
|
|
{"current_steps": 26610, "total_steps": 78105, "loss": 0.3119, "lr": 4.168426682637609e-06, "epoch": 1.703476089879009, "percentage": 34.07, "elapsed_time": "1:09:26", "remaining_time": "2:14:22", "throughput": 20096.17, "total_tokens": 83726976}
|
|
{"current_steps": 26615, "total_steps": 78105, "loss": 0.2443, "lr": 4.168010598217482e-06, "epoch": 1.7037961718199859, "percentage": 34.08, "elapsed_time": "1:09:26", "remaining_time": "2:14:21", "throughput": 20096.59, "total_tokens": 83741888}
|
|
{"current_steps": 26620, "total_steps": 78105, "loss": 0.4303, "lr": 4.167594430505596e-06, "epoch": 1.7041162537609629, "percentage": 34.08, "elapsed_time": "1:09:27", "remaining_time": "2:14:20", "throughput": 20097.03, "total_tokens": 83757056}
|
|
{"current_steps": 26625, "total_steps": 78105, "loss": 0.3313, "lr": 4.1671781795227305e-06, "epoch": 1.7044363357019396, "percentage": 34.09, "elapsed_time": "1:09:28", "remaining_time": "2:14:19", "throughput": 20097.65, "total_tokens": 83773632}
|
|
{"current_steps": 26630, "total_steps": 78105, "loss": 0.3153, "lr": 4.16676184528967e-06, "epoch": 1.7047564176429166, "percentage": 34.1, "elapsed_time": "1:09:29", "remaining_time": "2:14:18", "throughput": 20098.13, "total_tokens": 83789312}
|
|
{"current_steps": 26635, "total_steps": 78105, "loss": 0.4139, "lr": 4.1663454278272056e-06, "epoch": 1.7050764995838934, "percentage": 34.1, "elapsed_time": "1:09:29", "remaining_time": "2:14:17", "throughput": 20098.66, "total_tokens": 83805312}
|
|
{"current_steps": 26640, "total_steps": 78105, "loss": 0.3763, "lr": 4.165928927156129e-06, "epoch": 1.7053965815248704, "percentage": 34.11, "elapsed_time": "1:09:30", "remaining_time": "2:14:16", "throughput": 20099.22, "total_tokens": 83821504}
|
|
{"current_steps": 26645, "total_steps": 78105, "loss": 0.3256, "lr": 4.16551234329724e-06, "epoch": 1.7057166634658474, "percentage": 34.11, "elapsed_time": "1:09:31", "remaining_time": "2:14:15", "throughput": 20099.73, "total_tokens": 83837440}
|
|
{"current_steps": 26650, "total_steps": 78105, "loss": 0.3067, "lr": 4.1650956762713395e-06, "epoch": 1.7060367454068242, "percentage": 34.12, "elapsed_time": "1:09:31", "remaining_time": "2:14:14", "throughput": 20100.3, "total_tokens": 83853760}
|
|
{"current_steps": 26655, "total_steps": 78105, "loss": 0.3456, "lr": 4.164678926099234e-06, "epoch": 1.706356827347801, "percentage": 34.13, "elapsed_time": "1:09:32", "remaining_time": "2:14:13", "throughput": 20100.88, "total_tokens": 83870080}
|
|
{"current_steps": 26660, "total_steps": 78105, "loss": 0.4066, "lr": 4.164262092801734e-06, "epoch": 1.706676909288778, "percentage": 34.13, "elapsed_time": "1:09:33", "remaining_time": "2:14:12", "throughput": 20101.38, "total_tokens": 83885632}
|
|
{"current_steps": 26665, "total_steps": 78105, "loss": 0.3023, "lr": 4.163845176399655e-06, "epoch": 1.706996991229755, "percentage": 34.14, "elapsed_time": "1:09:33", "remaining_time": "2:14:11", "throughput": 20101.86, "total_tokens": 83900800}
|
|
{"current_steps": 26670, "total_steps": 78105, "loss": 0.1733, "lr": 4.163428176913813e-06, "epoch": 1.7073170731707317, "percentage": 34.15, "elapsed_time": "1:09:34", "remaining_time": "2:14:10", "throughput": 20102.32, "total_tokens": 83915840}
|
|
{"current_steps": 26675, "total_steps": 78105, "loss": 0.4431, "lr": 4.1630110943650335e-06, "epoch": 1.7076371551117084, "percentage": 34.15, "elapsed_time": "1:09:35", "remaining_time": "2:14:09", "throughput": 20102.78, "total_tokens": 83931008}
|
|
{"current_steps": 26680, "total_steps": 78105, "loss": 0.2354, "lr": 4.162593928774142e-06, "epoch": 1.7079572370526854, "percentage": 34.16, "elapsed_time": "1:09:36", "remaining_time": "2:14:09", "throughput": 20102.06, "total_tokens": 83947648}
|
|
{"current_steps": 26685, "total_steps": 78105, "loss": 0.3764, "lr": 4.1621766801619695e-06, "epoch": 1.7082773189936624, "percentage": 34.17, "elapsed_time": "1:09:36", "remaining_time": "2:14:08", "throughput": 20102.53, "total_tokens": 83963008}
|
|
{"current_steps": 26690, "total_steps": 78105, "loss": 0.2491, "lr": 4.161759348549352e-06, "epoch": 1.7085974009346394, "percentage": 34.17, "elapsed_time": "1:09:37", "remaining_time": "2:14:07", "throughput": 20103.21, "total_tokens": 83980672}
|
|
{"current_steps": 26695, "total_steps": 78105, "loss": 0.3438, "lr": 4.1613419339571284e-06, "epoch": 1.7089174828756162, "percentage": 34.18, "elapsed_time": "1:09:38", "remaining_time": "2:14:06", "throughput": 20103.73, "total_tokens": 83996160}
|
|
{"current_steps": 26700, "total_steps": 78105, "loss": 0.3834, "lr": 4.160924436406142e-06, "epoch": 1.709237564816593, "percentage": 34.18, "elapsed_time": "1:09:38", "remaining_time": "2:14:05", "throughput": 20104.18, "total_tokens": 84013824}
|
|
{"current_steps": 26705, "total_steps": 78105, "loss": 0.3901, "lr": 4.16050685591724e-06, "epoch": 1.70955764675757, "percentage": 34.19, "elapsed_time": "1:09:39", "remaining_time": "2:14:05", "throughput": 20103.44, "total_tokens": 84030336}
|
|
{"current_steps": 26710, "total_steps": 78105, "loss": 0.3067, "lr": 4.160089192511276e-06, "epoch": 1.709877728698547, "percentage": 34.2, "elapsed_time": "1:09:40", "remaining_time": "2:14:04", "throughput": 20103.92, "total_tokens": 84045824}
|
|
{"current_steps": 26715, "total_steps": 78105, "loss": 0.2405, "lr": 4.159671446209104e-06, "epoch": 1.7101978106395237, "percentage": 34.2, "elapsed_time": "1:09:41", "remaining_time": "2:14:03", "throughput": 20104.44, "total_tokens": 84061568}
|
|
{"current_steps": 26720, "total_steps": 78105, "loss": 0.3842, "lr": 4.159253617031585e-06, "epoch": 1.7105178925805005, "percentage": 34.21, "elapsed_time": "1:09:41", "remaining_time": "2:14:02", "throughput": 20105.01, "total_tokens": 84078080}
|
|
{"current_steps": 26725, "total_steps": 78105, "loss": 0.3621, "lr": 4.158835704999583e-06, "epoch": 1.7108379745214775, "percentage": 34.22, "elapsed_time": "1:09:42", "remaining_time": "2:14:01", "throughput": 20105.48, "total_tokens": 84093248}
|
|
{"current_steps": 26730, "total_steps": 78105, "loss": 0.2942, "lr": 4.1584177101339675e-06, "epoch": 1.7111580564624544, "percentage": 34.22, "elapsed_time": "1:09:43", "remaining_time": "2:14:00", "throughput": 20106.11, "total_tokens": 84110080}
|
|
{"current_steps": 26735, "total_steps": 78105, "loss": 0.3206, "lr": 4.157999632455609e-06, "epoch": 1.7114781384034314, "percentage": 34.23, "elapsed_time": "1:09:43", "remaining_time": "2:13:59", "throughput": 20106.54, "total_tokens": 84125248}
|
|
{"current_steps": 26740, "total_steps": 78105, "loss": 0.4036, "lr": 4.157581471985386e-06, "epoch": 1.7117982203444082, "percentage": 34.24, "elapsed_time": "1:09:44", "remaining_time": "2:13:58", "throughput": 20107.14, "total_tokens": 84141824}
|
|
{"current_steps": 26745, "total_steps": 78105, "loss": 0.3949, "lr": 4.157163228744177e-06, "epoch": 1.712118302285385, "percentage": 34.24, "elapsed_time": "1:09:45", "remaining_time": "2:13:57", "throughput": 20107.59, "total_tokens": 84157056}
|
|
{"current_steps": 26750, "total_steps": 78105, "loss": 0.2899, "lr": 4.156744902752868e-06, "epoch": 1.712438384226362, "percentage": 34.25, "elapsed_time": "1:09:45", "remaining_time": "2:13:56", "throughput": 20107.98, "total_tokens": 84171648}
|
|
{"current_steps": 26755, "total_steps": 78105, "loss": 0.3951, "lr": 4.15632649403235e-06, "epoch": 1.712758466167339, "percentage": 34.26, "elapsed_time": "1:09:46", "remaining_time": "2:13:55", "throughput": 20108.47, "total_tokens": 84187136}
|
|
{"current_steps": 26760, "total_steps": 78105, "loss": 0.4079, "lr": 4.155908002603513e-06, "epoch": 1.7130785481083157, "percentage": 34.26, "elapsed_time": "1:09:47", "remaining_time": "2:13:54", "throughput": 20109.02, "total_tokens": 84203392}
|
|
{"current_steps": 26765, "total_steps": 78105, "loss": 0.4703, "lr": 4.155489428487256e-06, "epoch": 1.7133986300492925, "percentage": 34.27, "elapsed_time": "1:09:48", "remaining_time": "2:13:53", "throughput": 20109.51, "total_tokens": 84219136}
|
|
{"current_steps": 26770, "total_steps": 78105, "loss": 0.4732, "lr": 4.15507077170448e-06, "epoch": 1.7137187119902695, "percentage": 34.27, "elapsed_time": "1:09:48", "remaining_time": "2:13:52", "throughput": 20109.97, "total_tokens": 84234304}
|
|
{"current_steps": 26775, "total_steps": 78105, "loss": 0.3154, "lr": 4.154652032276091e-06, "epoch": 1.7140387939312465, "percentage": 34.28, "elapsed_time": "1:09:49", "remaining_time": "2:13:51", "throughput": 20110.47, "total_tokens": 84249920}
|
|
{"current_steps": 26780, "total_steps": 78105, "loss": 0.2278, "lr": 4.1542332102229975e-06, "epoch": 1.7143588758722232, "percentage": 34.29, "elapsed_time": "1:09:50", "remaining_time": "2:13:50", "throughput": 20110.97, "total_tokens": 84265600}
|
|
{"current_steps": 26785, "total_steps": 78105, "loss": 0.3779, "lr": 4.153814305566115e-06, "epoch": 1.7146789578132002, "percentage": 34.29, "elapsed_time": "1:09:50", "remaining_time": "2:13:49", "throughput": 20111.66, "total_tokens": 84283072}
|
|
{"current_steps": 26790, "total_steps": 78105, "loss": 0.4423, "lr": 4.15339531832636e-06, "epoch": 1.714999039754177, "percentage": 34.3, "elapsed_time": "1:09:51", "remaining_time": "2:13:48", "throughput": 20112.23, "total_tokens": 84299200}
|
|
{"current_steps": 26795, "total_steps": 78105, "loss": 0.329, "lr": 4.152976248524655e-06, "epoch": 1.715319121695154, "percentage": 34.31, "elapsed_time": "1:09:52", "remaining_time": "2:13:47", "throughput": 20112.79, "total_tokens": 84315840}
|
|
{"current_steps": 26800, "total_steps": 78105, "loss": 0.3311, "lr": 4.152557096181927e-06, "epoch": 1.715639203636131, "percentage": 34.31, "elapsed_time": "1:09:52", "remaining_time": "2:13:46", "throughput": 20113.19, "total_tokens": 84330240}
|
|
{"current_steps": 26805, "total_steps": 78105, "loss": 0.4372, "lr": 4.152137861319104e-06, "epoch": 1.7159592855771078, "percentage": 34.32, "elapsed_time": "1:09:53", "remaining_time": "2:13:45", "throughput": 20113.66, "total_tokens": 84345344}
|
|
{"current_steps": 26810, "total_steps": 78105, "loss": 0.4629, "lr": 4.151718543957123e-06, "epoch": 1.7162793675180845, "percentage": 34.33, "elapsed_time": "1:09:54", "remaining_time": "2:13:44", "throughput": 20114.11, "total_tokens": 84360448}
|
|
{"current_steps": 26815, "total_steps": 78105, "loss": 0.3464, "lr": 4.151299144116922e-06, "epoch": 1.7165994494590615, "percentage": 34.33, "elapsed_time": "1:09:54", "remaining_time": "2:13:43", "throughput": 20114.69, "total_tokens": 84376256}
|
|
{"current_steps": 26820, "total_steps": 78105, "loss": 0.3006, "lr": 4.150879661819441e-06, "epoch": 1.7169195314000385, "percentage": 34.34, "elapsed_time": "1:09:55", "remaining_time": "2:13:42", "throughput": 20115.12, "total_tokens": 84391168}
|
|
{"current_steps": 26825, "total_steps": 78105, "loss": 0.3412, "lr": 4.15046009708563e-06, "epoch": 1.7172396133410153, "percentage": 34.34, "elapsed_time": "1:09:56", "remaining_time": "2:13:41", "throughput": 20115.55, "total_tokens": 84406336}
|
|
{"current_steps": 26830, "total_steps": 78105, "loss": 0.3759, "lr": 4.150040449936439e-06, "epoch": 1.717559695281992, "percentage": 34.35, "elapsed_time": "1:09:56", "remaining_time": "2:13:40", "throughput": 20116.09, "total_tokens": 84422272}
|
|
{"current_steps": 26835, "total_steps": 78105, "loss": 0.2669, "lr": 4.1496207203928215e-06, "epoch": 1.717879777222969, "percentage": 34.36, "elapsed_time": "1:09:57", "remaining_time": "2:13:39", "throughput": 20116.52, "total_tokens": 84436928}
|
|
{"current_steps": 26840, "total_steps": 78105, "loss": 0.3133, "lr": 4.149200908475739e-06, "epoch": 1.718199859163946, "percentage": 34.36, "elapsed_time": "1:09:58", "remaining_time": "2:13:38", "throughput": 20116.99, "total_tokens": 84452608}
|
|
{"current_steps": 26845, "total_steps": 78105, "loss": 0.37, "lr": 4.1487810142061525e-06, "epoch": 1.718519941104923, "percentage": 34.37, "elapsed_time": "1:09:58", "remaining_time": "2:13:37", "throughput": 20117.52, "total_tokens": 84468736}
|
|
{"current_steps": 26850, "total_steps": 78105, "loss": 0.3054, "lr": 4.148361037605031e-06, "epoch": 1.7188400230458998, "percentage": 34.38, "elapsed_time": "1:09:59", "remaining_time": "2:13:36", "throughput": 20117.96, "total_tokens": 84483840}
|
|
{"current_steps": 26855, "total_steps": 78105, "loss": 0.3229, "lr": 4.1479409786933455e-06, "epoch": 1.7191601049868765, "percentage": 34.38, "elapsed_time": "1:10:00", "remaining_time": "2:13:35", "throughput": 20118.46, "total_tokens": 84499648}
|
|
{"current_steps": 26860, "total_steps": 78105, "loss": 0.383, "lr": 4.147520837492071e-06, "epoch": 1.7194801869278535, "percentage": 34.39, "elapsed_time": "1:10:00", "remaining_time": "2:13:34", "throughput": 20119.05, "total_tokens": 84516224}
|
|
{"current_steps": 26865, "total_steps": 78105, "loss": 0.328, "lr": 4.1471006140221876e-06, "epoch": 1.7198002688688305, "percentage": 34.4, "elapsed_time": "1:10:01", "remaining_time": "2:13:33", "throughput": 20119.6, "total_tokens": 84532288}
|
|
{"current_steps": 26870, "total_steps": 78105, "loss": 0.2931, "lr": 4.146680308304679e-06, "epoch": 1.7201203508098073, "percentage": 34.4, "elapsed_time": "1:10:02", "remaining_time": "2:13:32", "throughput": 20120.08, "total_tokens": 84547584}
|
|
{"current_steps": 26875, "total_steps": 78105, "loss": 0.342, "lr": 4.146259920360533e-06, "epoch": 1.720440432750784, "percentage": 34.41, "elapsed_time": "1:10:02", "remaining_time": "2:13:31", "throughput": 20120.5, "total_tokens": 84562432}
|
|
{"current_steps": 26880, "total_steps": 78105, "loss": 0.3926, "lr": 4.145839450210741e-06, "epoch": 1.720760514691761, "percentage": 34.42, "elapsed_time": "1:10:03", "remaining_time": "2:13:30", "throughput": 20121.01, "total_tokens": 84578176}
|
|
{"current_steps": 26885, "total_steps": 78105, "loss": 0.2463, "lr": 4.145418897876301e-06, "epoch": 1.721080596632738, "percentage": 34.42, "elapsed_time": "1:10:04", "remaining_time": "2:13:29", "throughput": 20121.38, "total_tokens": 84592448}
|
|
{"current_steps": 26890, "total_steps": 78105, "loss": 0.4445, "lr": 4.144998263378211e-06, "epoch": 1.721400678573715, "percentage": 34.43, "elapsed_time": "1:10:04", "remaining_time": "2:13:28", "throughput": 20121.81, "total_tokens": 84607360}
|
|
{"current_steps": 26895, "total_steps": 78105, "loss": 0.2856, "lr": 4.144577546737476e-06, "epoch": 1.7217207605146918, "percentage": 34.43, "elapsed_time": "1:10:05", "remaining_time": "2:13:27", "throughput": 20122.26, "total_tokens": 84622528}
|
|
{"current_steps": 26900, "total_steps": 78105, "loss": 0.2865, "lr": 4.144156747975105e-06, "epoch": 1.7220408424556686, "percentage": 34.44, "elapsed_time": "1:10:06", "remaining_time": "2:13:26", "throughput": 20122.67, "total_tokens": 84637504}
|
|
{"current_steps": 26905, "total_steps": 78105, "loss": 0.2745, "lr": 4.1437358671121095e-06, "epoch": 1.7223609243966456, "percentage": 34.45, "elapsed_time": "1:10:06", "remaining_time": "2:13:25", "throughput": 20123.17, "total_tokens": 84653120}
|
|
{"current_steps": 26910, "total_steps": 78105, "loss": 0.2456, "lr": 4.143314904169508e-06, "epoch": 1.7226810063376226, "percentage": 34.45, "elapsed_time": "1:10:07", "remaining_time": "2:13:24", "throughput": 20123.62, "total_tokens": 84668416}
|
|
{"current_steps": 26915, "total_steps": 78105, "loss": 0.2846, "lr": 4.142893859168319e-06, "epoch": 1.7230010882785993, "percentage": 34.46, "elapsed_time": "1:10:08", "remaining_time": "2:13:23", "throughput": 20124.1, "total_tokens": 84683584}
|
|
{"current_steps": 26920, "total_steps": 78105, "loss": 0.4768, "lr": 4.1424727321295684e-06, "epoch": 1.723321170219576, "percentage": 34.47, "elapsed_time": "1:10:08", "remaining_time": "2:13:22", "throughput": 20124.59, "total_tokens": 84699328}
|
|
{"current_steps": 26925, "total_steps": 78105, "loss": 0.3569, "lr": 4.142051523074285e-06, "epoch": 1.723641252160553, "percentage": 34.47, "elapsed_time": "1:10:09", "remaining_time": "2:13:21", "throughput": 20125.04, "total_tokens": 84714688}
|
|
{"current_steps": 26930, "total_steps": 78105, "loss": 0.4428, "lr": 4.141630232023502e-06, "epoch": 1.72396133410153, "percentage": 34.48, "elapsed_time": "1:10:10", "remaining_time": "2:13:20", "throughput": 20125.56, "total_tokens": 84730432}
|
|
{"current_steps": 26935, "total_steps": 78105, "loss": 0.2943, "lr": 4.1412088589982554e-06, "epoch": 1.7242814160425068, "percentage": 34.49, "elapsed_time": "1:10:10", "remaining_time": "2:13:19", "throughput": 20126.1, "total_tokens": 84746496}
|
|
{"current_steps": 26940, "total_steps": 78105, "loss": 0.3809, "lr": 4.1407874040195875e-06, "epoch": 1.7246014979834836, "percentage": 34.49, "elapsed_time": "1:10:11", "remaining_time": "2:13:18", "throughput": 20126.66, "total_tokens": 84762880}
|
|
{"current_steps": 26945, "total_steps": 78105, "loss": 0.2378, "lr": 4.140365867108543e-06, "epoch": 1.7249215799244606, "percentage": 34.5, "elapsed_time": "1:10:12", "remaining_time": "2:13:17", "throughput": 20127.13, "total_tokens": 84777984}
|
|
{"current_steps": 26950, "total_steps": 78105, "loss": 0.3827, "lr": 4.139944248286172e-06, "epoch": 1.7252416618654376, "percentage": 34.5, "elapsed_time": "1:10:12", "remaining_time": "2:13:16", "throughput": 20127.55, "total_tokens": 84793216}
|
|
{"current_steps": 26955, "total_steps": 78105, "loss": 0.4733, "lr": 4.1395225475735265e-06, "epoch": 1.7255617438064146, "percentage": 34.51, "elapsed_time": "1:10:13", "remaining_time": "2:13:15", "throughput": 20128.04, "total_tokens": 84808512}
|
|
{"current_steps": 26960, "total_steps": 78105, "loss": 0.3325, "lr": 4.139100764991665e-06, "epoch": 1.7258818257473914, "percentage": 34.52, "elapsed_time": "1:10:14", "remaining_time": "2:13:14", "throughput": 20128.66, "total_tokens": 84825216}
|
|
{"current_steps": 26965, "total_steps": 78105, "loss": 0.269, "lr": 4.13867890056165e-06, "epoch": 1.7262019076883681, "percentage": 34.52, "elapsed_time": "1:10:14", "remaining_time": "2:13:13", "throughput": 20129.17, "total_tokens": 84841216}
|
|
{"current_steps": 26970, "total_steps": 78105, "loss": 0.3949, "lr": 4.138256954304546e-06, "epoch": 1.726521989629345, "percentage": 34.53, "elapsed_time": "1:10:15", "remaining_time": "2:13:12", "throughput": 20129.66, "total_tokens": 84857024}
|
|
{"current_steps": 26975, "total_steps": 78105, "loss": 0.3604, "lr": 4.137834926241423e-06, "epoch": 1.726842071570322, "percentage": 34.54, "elapsed_time": "1:10:16", "remaining_time": "2:13:11", "throughput": 20130.13, "total_tokens": 84872448}
|
|
{"current_steps": 26980, "total_steps": 78105, "loss": 0.2039, "lr": 4.137412816393355e-06, "epoch": 1.7271621535112989, "percentage": 34.54, "elapsed_time": "1:10:16", "remaining_time": "2:13:10", "throughput": 20130.54, "total_tokens": 84886976}
|
|
{"current_steps": 26985, "total_steps": 78105, "loss": 0.3646, "lr": 4.1369906247814195e-06, "epoch": 1.7274822354522756, "percentage": 34.55, "elapsed_time": "1:10:17", "remaining_time": "2:13:09", "throughput": 20131.0, "total_tokens": 84902144}
|
|
{"current_steps": 26990, "total_steps": 78105, "loss": 0.3085, "lr": 4.136568351426698e-06, "epoch": 1.7278023173932526, "percentage": 34.56, "elapsed_time": "1:10:18", "remaining_time": "2:13:08", "throughput": 20131.38, "total_tokens": 84916800}
|
|
{"current_steps": 26995, "total_steps": 78105, "loss": 0.3572, "lr": 4.13614599635028e-06, "epoch": 1.7281223993342296, "percentage": 34.56, "elapsed_time": "1:10:18", "remaining_time": "2:13:07", "throughput": 20131.81, "total_tokens": 84931904}
|
|
{"current_steps": 27000, "total_steps": 78105, "loss": 0.2276, "lr": 4.135723559573251e-06, "epoch": 1.7284424812752066, "percentage": 34.57, "elapsed_time": "1:10:19", "remaining_time": "2:13:06", "throughput": 20132.22, "total_tokens": 84946880}
|
|
{"current_steps": 27005, "total_steps": 78105, "loss": 0.385, "lr": 4.135301041116709e-06, "epoch": 1.7287625632161834, "percentage": 34.58, "elapsed_time": "1:10:20", "remaining_time": "2:13:05", "throughput": 20132.72, "total_tokens": 84962752}
|
|
{"current_steps": 27010, "total_steps": 78105, "loss": 0.28, "lr": 4.134878441001751e-06, "epoch": 1.7290826451571601, "percentage": 34.58, "elapsed_time": "1:10:20", "remaining_time": "2:13:04", "throughput": 20133.18, "total_tokens": 84977920}
|
|
{"current_steps": 27015, "total_steps": 78105, "loss": 0.3346, "lr": 4.134455759249479e-06, "epoch": 1.7294027270981371, "percentage": 34.59, "elapsed_time": "1:10:21", "remaining_time": "2:13:03", "throughput": 20133.7, "total_tokens": 84994048}
|
|
{"current_steps": 27020, "total_steps": 78105, "loss": 0.3173, "lr": 4.134032995881e-06, "epoch": 1.7297228090391141, "percentage": 34.59, "elapsed_time": "1:10:22", "remaining_time": "2:13:02", "throughput": 20134.39, "total_tokens": 85011648}
|
|
{"current_steps": 27025, "total_steps": 78105, "loss": 0.3429, "lr": 4.133610150917425e-06, "epoch": 1.730042890980091, "percentage": 34.6, "elapsed_time": "1:10:22", "remaining_time": "2:13:01", "throughput": 20134.91, "total_tokens": 85028096}
|
|
{"current_steps": 27030, "total_steps": 78105, "loss": 0.1905, "lr": 4.133187224379868e-06, "epoch": 1.7303629729210677, "percentage": 34.61, "elapsed_time": "1:10:23", "remaining_time": "2:13:00", "throughput": 20135.34, "total_tokens": 85043200}
|
|
{"current_steps": 27035, "total_steps": 78105, "loss": 0.3828, "lr": 4.1327642162894475e-06, "epoch": 1.7306830548620447, "percentage": 34.61, "elapsed_time": "1:10:24", "remaining_time": "2:12:59", "throughput": 20135.87, "total_tokens": 85059264}
|
|
{"current_steps": 27040, "total_steps": 78105, "loss": 0.2137, "lr": 4.132341126667287e-06, "epoch": 1.7310031368030216, "percentage": 34.62, "elapsed_time": "1:10:24", "remaining_time": "2:12:58", "throughput": 20136.35, "total_tokens": 85074624}
|
|
{"current_steps": 27045, "total_steps": 78105, "loss": 0.2352, "lr": 4.131917955534514e-06, "epoch": 1.7313232187439984, "percentage": 34.63, "elapsed_time": "1:10:25", "remaining_time": "2:12:57", "throughput": 20136.89, "total_tokens": 85090432}
|
|
{"current_steps": 27050, "total_steps": 78105, "loss": 0.4471, "lr": 4.131494702912258e-06, "epoch": 1.7316433006849754, "percentage": 34.63, "elapsed_time": "1:10:26", "remaining_time": "2:12:56", "throughput": 20137.32, "total_tokens": 85105472}
|
|
{"current_steps": 27055, "total_steps": 78105, "loss": 0.346, "lr": 4.131071368821654e-06, "epoch": 1.7319633826259522, "percentage": 34.64, "elapsed_time": "1:10:26", "remaining_time": "2:12:55", "throughput": 20137.8, "total_tokens": 85120832}
|
|
{"current_steps": 27060, "total_steps": 78105, "loss": 0.3021, "lr": 4.130647953283843e-06, "epoch": 1.7322834645669292, "percentage": 34.65, "elapsed_time": "1:10:27", "remaining_time": "2:12:54", "throughput": 20138.25, "total_tokens": 85136128}
|
|
{"current_steps": 27065, "total_steps": 78105, "loss": 0.3896, "lr": 4.130224456319966e-06, "epoch": 1.7326035465079062, "percentage": 34.65, "elapsed_time": "1:10:28", "remaining_time": "2:12:53", "throughput": 20138.74, "total_tokens": 85151744}
|
|
{"current_steps": 27070, "total_steps": 78105, "loss": 0.4597, "lr": 4.129800877951171e-06, "epoch": 1.732923628448883, "percentage": 34.66, "elapsed_time": "1:10:28", "remaining_time": "2:12:52", "throughput": 20139.21, "total_tokens": 85167104}
|
|
{"current_steps": 27075, "total_steps": 78105, "loss": 0.3382, "lr": 4.1293772181986105e-06, "epoch": 1.7332437103898597, "percentage": 34.66, "elapsed_time": "1:10:29", "remaining_time": "2:12:51", "throughput": 20139.68, "total_tokens": 85182592}
|
|
{"current_steps": 27080, "total_steps": 78105, "loss": 0.4818, "lr": 4.128953477083438e-06, "epoch": 1.7335637923308367, "percentage": 34.67, "elapsed_time": "1:10:30", "remaining_time": "2:12:50", "throughput": 20140.12, "total_tokens": 85197440}
|
|
{"current_steps": 27085, "total_steps": 78105, "loss": 0.4165, "lr": 4.128529654626814e-06, "epoch": 1.7338838742718137, "percentage": 34.68, "elapsed_time": "1:10:30", "remaining_time": "2:12:49", "throughput": 20140.61, "total_tokens": 85212992}
|
|
{"current_steps": 27090, "total_steps": 78105, "loss": 0.3688, "lr": 4.128105750849902e-06, "epoch": 1.7342039562127904, "percentage": 34.68, "elapsed_time": "1:10:31", "remaining_time": "2:12:48", "throughput": 20141.04, "total_tokens": 85227776}
|
|
{"current_steps": 27095, "total_steps": 78105, "loss": 0.3282, "lr": 4.12768176577387e-06, "epoch": 1.7345240381537672, "percentage": 34.69, "elapsed_time": "1:10:32", "remaining_time": "2:12:47", "throughput": 20141.56, "total_tokens": 85243648}
|
|
{"current_steps": 27100, "total_steps": 78105, "loss": 0.4574, "lr": 4.127257699419888e-06, "epoch": 1.7348441200947442, "percentage": 34.7, "elapsed_time": "1:10:32", "remaining_time": "2:12:46", "throughput": 20142.07, "total_tokens": 85259584}
|
|
{"current_steps": 27105, "total_steps": 78105, "loss": 0.366, "lr": 4.126833551809132e-06, "epoch": 1.7351642020357212, "percentage": 34.7, "elapsed_time": "1:10:33", "remaining_time": "2:12:45", "throughput": 20142.56, "total_tokens": 85275392}
|
|
{"current_steps": 27110, "total_steps": 78105, "loss": 0.3598, "lr": 4.126409322962783e-06, "epoch": 1.7354842839766982, "percentage": 34.71, "elapsed_time": "1:10:34", "remaining_time": "2:12:44", "throughput": 20142.96, "total_tokens": 85290240}
|
|
{"current_steps": 27115, "total_steps": 78105, "loss": 0.3588, "lr": 4.125985012902024e-06, "epoch": 1.735804365917675, "percentage": 34.72, "elapsed_time": "1:10:34", "remaining_time": "2:12:43", "throughput": 20143.45, "total_tokens": 85305536}
|
|
{"current_steps": 27120, "total_steps": 78105, "loss": 0.3226, "lr": 4.125560621648043e-06, "epoch": 1.7361244478586517, "percentage": 34.72, "elapsed_time": "1:10:35", "remaining_time": "2:12:42", "throughput": 20143.91, "total_tokens": 85321216}
|
|
{"current_steps": 27125, "total_steps": 78105, "loss": 0.3335, "lr": 4.125136149222032e-06, "epoch": 1.7364445297996287, "percentage": 34.73, "elapsed_time": "1:10:36", "remaining_time": "2:12:41", "throughput": 20144.41, "total_tokens": 85336640}
|
|
{"current_steps": 27130, "total_steps": 78105, "loss": 0.3804, "lr": 4.124711595645186e-06, "epoch": 1.7367646117406057, "percentage": 34.74, "elapsed_time": "1:10:36", "remaining_time": "2:12:40", "throughput": 20144.93, "total_tokens": 85352896}
|
|
{"current_steps": 27135, "total_steps": 78105, "loss": 0.3106, "lr": 4.124286960938706e-06, "epoch": 1.7370846936815825, "percentage": 34.74, "elapsed_time": "1:10:37", "remaining_time": "2:12:39", "throughput": 20145.32, "total_tokens": 85367488}
|
|
{"current_steps": 27140, "total_steps": 78105, "loss": 0.305, "lr": 4.123862245123796e-06, "epoch": 1.7374047756225592, "percentage": 34.75, "elapsed_time": "1:10:38", "remaining_time": "2:12:38", "throughput": 20145.95, "total_tokens": 85384768}
|
|
{"current_steps": 27145, "total_steps": 78105, "loss": 0.404, "lr": 4.123437448221664e-06, "epoch": 1.7377248575635362, "percentage": 34.75, "elapsed_time": "1:10:38", "remaining_time": "2:12:37", "throughput": 20146.36, "total_tokens": 85399552}
|
|
{"current_steps": 27150, "total_steps": 78105, "loss": 0.273, "lr": 4.123012570253522e-06, "epoch": 1.7380449395045132, "percentage": 34.76, "elapsed_time": "1:10:39", "remaining_time": "2:12:36", "throughput": 20146.79, "total_tokens": 85414272}
|
|
{"current_steps": 27155, "total_steps": 78105, "loss": 0.1753, "lr": 4.122587611240586e-06, "epoch": 1.7383650214454902, "percentage": 34.77, "elapsed_time": "1:10:40", "remaining_time": "2:12:35", "throughput": 20147.25, "total_tokens": 85429376}
|
|
{"current_steps": 27160, "total_steps": 78105, "loss": 0.3394, "lr": 4.122162571204076e-06, "epoch": 1.738685103386467, "percentage": 34.77, "elapsed_time": "1:10:40", "remaining_time": "2:12:34", "throughput": 20147.72, "total_tokens": 85444800}
|
|
{"current_steps": 27165, "total_steps": 78105, "loss": 0.3662, "lr": 4.121737450165217e-06, "epoch": 1.7390051853274437, "percentage": 34.78, "elapsed_time": "1:10:41", "remaining_time": "2:12:33", "throughput": 20148.28, "total_tokens": 85460928}
|
|
{"current_steps": 27170, "total_steps": 78105, "loss": 0.3124, "lr": 4.1213122481452376e-06, "epoch": 1.7393252672684207, "percentage": 34.79, "elapsed_time": "1:10:42", "remaining_time": "2:12:32", "throughput": 20148.66, "total_tokens": 85475328}
|
|
{"current_steps": 27175, "total_steps": 78105, "loss": 0.305, "lr": 4.1208869651653695e-06, "epoch": 1.7396453492093977, "percentage": 34.79, "elapsed_time": "1:10:42", "remaining_time": "2:12:31", "throughput": 20149.13, "total_tokens": 85491136}
|
|
{"current_steps": 27180, "total_steps": 78105, "loss": 0.4092, "lr": 4.120461601246849e-06, "epoch": 1.7399654311503745, "percentage": 34.8, "elapsed_time": "1:10:43", "remaining_time": "2:12:30", "throughput": 20149.68, "total_tokens": 85507520}
|
|
{"current_steps": 27185, "total_steps": 78105, "loss": 0.456, "lr": 4.120036156410917e-06, "epoch": 1.7402855130913513, "percentage": 34.81, "elapsed_time": "1:10:44", "remaining_time": "2:12:29", "throughput": 20150.1, "total_tokens": 85522368}
|
|
{"current_steps": 27190, "total_steps": 78105, "loss": 0.3396, "lr": 4.1196106306788174e-06, "epoch": 1.7406055950323283, "percentage": 34.81, "elapsed_time": "1:10:44", "remaining_time": "2:12:28", "throughput": 20150.68, "total_tokens": 85538944}
|
|
{"current_steps": 27195, "total_steps": 78105, "loss": 0.2535, "lr": 4.119185024071799e-06, "epoch": 1.7409256769733052, "percentage": 34.82, "elapsed_time": "1:10:45", "remaining_time": "2:12:27", "throughput": 20151.13, "total_tokens": 85554368}
|
|
{"current_steps": 27200, "total_steps": 78105, "loss": 0.361, "lr": 4.118759336611116e-06, "epoch": 1.741245758914282, "percentage": 34.82, "elapsed_time": "1:10:46", "remaining_time": "2:12:26", "throughput": 20151.56, "total_tokens": 85569408}
|
|
{"current_steps": 27205, "total_steps": 78105, "loss": 0.4347, "lr": 4.118333568318022e-06, "epoch": 1.7415658408552588, "percentage": 34.83, "elapsed_time": "1:10:46", "remaining_time": "2:12:26", "throughput": 20152.11, "total_tokens": 85585792}
|
|
{"current_steps": 27210, "total_steps": 78105, "loss": 0.462, "lr": 4.11790771921378e-06, "epoch": 1.7418859227962358, "percentage": 34.84, "elapsed_time": "1:10:47", "remaining_time": "2:12:25", "throughput": 20152.75, "total_tokens": 85603200}
|
|
{"current_steps": 27215, "total_steps": 78105, "loss": 0.3331, "lr": 4.117481789319653e-06, "epoch": 1.7422060047372128, "percentage": 34.84, "elapsed_time": "1:10:48", "remaining_time": "2:12:24", "throughput": 20153.14, "total_tokens": 85617984}
|
|
{"current_steps": 27220, "total_steps": 78105, "loss": 0.2692, "lr": 4.117055778656911e-06, "epoch": 1.7425260866781898, "percentage": 34.85, "elapsed_time": "1:10:49", "remaining_time": "2:12:23", "throughput": 20153.65, "total_tokens": 85633664}
|
|
{"current_steps": 27225, "total_steps": 78105, "loss": 0.247, "lr": 4.116629687246827e-06, "epoch": 1.7428461686191665, "percentage": 34.86, "elapsed_time": "1:10:49", "remaining_time": "2:12:22", "throughput": 20154.24, "total_tokens": 85650496}
|
|
{"current_steps": 27230, "total_steps": 78105, "loss": 0.3757, "lr": 4.116203515110676e-06, "epoch": 1.7431662505601433, "percentage": 34.86, "elapsed_time": "1:10:50", "remaining_time": "2:12:21", "throughput": 20154.65, "total_tokens": 85665408}
|
|
{"current_steps": 27235, "total_steps": 78105, "loss": 0.1952, "lr": 4.115777262269741e-06, "epoch": 1.7434863325011203, "percentage": 34.87, "elapsed_time": "1:10:51", "remaining_time": "2:12:20", "throughput": 20155.18, "total_tokens": 85681408}
|
|
{"current_steps": 27240, "total_steps": 78105, "loss": 0.3594, "lr": 4.1153509287453054e-06, "epoch": 1.7438064144420973, "percentage": 34.88, "elapsed_time": "1:10:51", "remaining_time": "2:12:19", "throughput": 20155.72, "total_tokens": 85697472}
|
|
{"current_steps": 27245, "total_steps": 78105, "loss": 0.6312, "lr": 4.114924514558658e-06, "epoch": 1.744126496383074, "percentage": 34.88, "elapsed_time": "1:10:52", "remaining_time": "2:12:18", "throughput": 20156.28, "total_tokens": 85714048}
|
|
{"current_steps": 27250, "total_steps": 78105, "loss": 0.3488, "lr": 4.114498019731093e-06, "epoch": 1.7444465783240508, "percentage": 34.89, "elapsed_time": "1:10:53", "remaining_time": "2:12:17", "throughput": 20156.69, "total_tokens": 85728896}
|
|
{"current_steps": 27255, "total_steps": 78105, "loss": 0.3511, "lr": 4.114071444283905e-06, "epoch": 1.7447666602650278, "percentage": 34.9, "elapsed_time": "1:10:53", "remaining_time": "2:12:16", "throughput": 20157.14, "total_tokens": 85744128}
|
|
{"current_steps": 27260, "total_steps": 78105, "loss": 0.2834, "lr": 4.113644788238398e-06, "epoch": 1.7450867422060048, "percentage": 34.9, "elapsed_time": "1:10:54", "remaining_time": "2:12:15", "throughput": 20157.72, "total_tokens": 85760896}
|
|
{"current_steps": 27265, "total_steps": 78105, "loss": 0.348, "lr": 4.113218051615875e-06, "epoch": 1.7454068241469818, "percentage": 34.91, "elapsed_time": "1:10:55", "remaining_time": "2:12:14", "throughput": 20158.31, "total_tokens": 85777600}
|
|
{"current_steps": 27270, "total_steps": 78105, "loss": 0.3729, "lr": 4.112791234437647e-06, "epoch": 1.7457269060879586, "percentage": 34.91, "elapsed_time": "1:10:55", "remaining_time": "2:12:13", "throughput": 20158.76, "total_tokens": 85792832}
|
|
{"current_steps": 27275, "total_steps": 78105, "loss": 0.4245, "lr": 4.112364336725023e-06, "epoch": 1.7460469880289353, "percentage": 34.92, "elapsed_time": "1:10:56", "remaining_time": "2:12:12", "throughput": 20159.15, "total_tokens": 85807424}
|
|
{"current_steps": 27280, "total_steps": 78105, "loss": 0.4008, "lr": 4.111937358499324e-06, "epoch": 1.7463670699699123, "percentage": 34.93, "elapsed_time": "1:10:57", "remaining_time": "2:12:11", "throughput": 20159.59, "total_tokens": 85822656}
|
|
{"current_steps": 27285, "total_steps": 78105, "loss": 0.2852, "lr": 4.111510299781869e-06, "epoch": 1.7466871519108893, "percentage": 34.93, "elapsed_time": "1:10:57", "remaining_time": "2:12:10", "throughput": 20160.03, "total_tokens": 85837632}
|
|
{"current_steps": 27290, "total_steps": 78105, "loss": 0.4014, "lr": 4.111083160593983e-06, "epoch": 1.747007233851866, "percentage": 34.94, "elapsed_time": "1:10:58", "remaining_time": "2:12:09", "throughput": 20160.5, "total_tokens": 85853376}
|
|
{"current_steps": 27295, "total_steps": 78105, "loss": 0.2782, "lr": 4.110655940956997e-06, "epoch": 1.7473273157928428, "percentage": 34.95, "elapsed_time": "1:10:59", "remaining_time": "2:12:08", "throughput": 20160.93, "total_tokens": 85868480}
|
|
{"current_steps": 27300, "total_steps": 78105, "loss": 0.2557, "lr": 4.110228640892242e-06, "epoch": 1.7476473977338198, "percentage": 34.95, "elapsed_time": "1:10:59", "remaining_time": "2:12:07", "throughput": 20161.48, "total_tokens": 85884608}
|
|
{"current_steps": 27305, "total_steps": 78105, "loss": 0.2501, "lr": 4.109801260421057e-06, "epoch": 1.7479674796747968, "percentage": 34.96, "elapsed_time": "1:11:00", "remaining_time": "2:12:06", "throughput": 20161.9, "total_tokens": 85899776}
|
|
{"current_steps": 27310, "total_steps": 78105, "loss": 0.2386, "lr": 4.109373799564782e-06, "epoch": 1.7482875616157736, "percentage": 34.97, "elapsed_time": "1:11:01", "remaining_time": "2:12:05", "throughput": 20162.47, "total_tokens": 85915904}
|
|
{"current_steps": 27315, "total_steps": 78105, "loss": 0.4077, "lr": 4.1089462583447615e-06, "epoch": 1.7486076435567506, "percentage": 34.97, "elapsed_time": "1:11:01", "remaining_time": "2:12:04", "throughput": 20162.9, "total_tokens": 85930688}
|
|
{"current_steps": 27320, "total_steps": 78105, "loss": 0.3356, "lr": 4.108518636782346e-06, "epoch": 1.7489277254977273, "percentage": 34.98, "elapsed_time": "1:11:02", "remaining_time": "2:12:03", "throughput": 20163.39, "total_tokens": 85946304}
|
|
{"current_steps": 27325, "total_steps": 78105, "loss": 0.34, "lr": 4.108090934898888e-06, "epoch": 1.7492478074387043, "percentage": 34.98, "elapsed_time": "1:11:03", "remaining_time": "2:12:02", "throughput": 20163.84, "total_tokens": 85961856}
|
|
{"current_steps": 27330, "total_steps": 78105, "loss": 0.4829, "lr": 4.107663152715746e-06, "epoch": 1.7495678893796813, "percentage": 34.99, "elapsed_time": "1:11:03", "remaining_time": "2:12:01", "throughput": 20164.36, "total_tokens": 85978048}
|
|
{"current_steps": 27335, "total_steps": 78105, "loss": 0.3376, "lr": 4.107235290254279e-06, "epoch": 1.749887971320658, "percentage": 35.0, "elapsed_time": "1:11:04", "remaining_time": "2:12:00", "throughput": 20164.86, "total_tokens": 85994176}
|
|
{"current_steps": 27340, "total_steps": 78105, "loss": 0.2995, "lr": 4.106807347535854e-06, "epoch": 1.7502080532616349, "percentage": 35.0, "elapsed_time": "1:11:05", "remaining_time": "2:11:59", "throughput": 20165.36, "total_tokens": 86010048}
|
|
{"current_steps": 27342, "total_steps": 78105, "eval_loss": 0.477387011051178, "epoch": 1.7503360860380257, "percentage": 35.01, "elapsed_time": "1:11:56", "remaining_time": "2:13:33", "throughput": 19927.31, "total_tokens": 86015936}
|
|
{"current_steps": 27345, "total_steps": 78105, "loss": 0.5011, "lr": 4.10637932458184e-06, "epoch": 1.7505281352026119, "percentage": 35.01, "elapsed_time": "1:12:33", "remaining_time": "2:14:41", "throughput": 19758.84, "total_tokens": 86026496}
|
|
{"current_steps": 27350, "total_steps": 78105, "loss": 0.4033, "lr": 4.105951221413609e-06, "epoch": 1.7508482171435888, "percentage": 35.02, "elapsed_time": "1:12:34", "remaining_time": "2:14:40", "throughput": 19759.35, "total_tokens": 86041792}
|
|
{"current_steps": 27355, "total_steps": 78105, "loss": 0.4054, "lr": 4.10552303805254e-06, "epoch": 1.7511682990845656, "percentage": 35.02, "elapsed_time": "1:12:35", "remaining_time": "2:14:39", "throughput": 19759.94, "total_tokens": 86058112}
|
|
{"current_steps": 27360, "total_steps": 78105, "loss": 0.5139, "lr": 4.105094774520012e-06, "epoch": 1.7514883810255424, "percentage": 35.03, "elapsed_time": "1:12:35", "remaining_time": "2:14:39", "throughput": 19760.75, "total_tokens": 86076608}
|
|
{"current_steps": 27365, "total_steps": 78105, "loss": 0.2469, "lr": 4.104666430837413e-06, "epoch": 1.7518084629665194, "percentage": 35.04, "elapsed_time": "1:12:36", "remaining_time": "2:14:38", "throughput": 19761.38, "total_tokens": 86092992}
|
|
{"current_steps": 27370, "total_steps": 78105, "loss": 0.3058, "lr": 4.10423800702613e-06, "epoch": 1.7521285449074964, "percentage": 35.04, "elapsed_time": "1:12:37", "remaining_time": "2:14:36", "throughput": 19761.82, "total_tokens": 86107904}
|
|
{"current_steps": 27375, "total_steps": 78105, "loss": 0.3834, "lr": 4.103809503107557e-06, "epoch": 1.7524486268484734, "percentage": 35.05, "elapsed_time": "1:12:37", "remaining_time": "2:14:36", "throughput": 19762.48, "total_tokens": 86124800}
|
|
{"current_steps": 27380, "total_steps": 78105, "loss": 0.2621, "lr": 4.103380919103092e-06, "epoch": 1.7527687087894501, "percentage": 35.06, "elapsed_time": "1:12:38", "remaining_time": "2:14:34", "throughput": 19762.96, "total_tokens": 86140096}
|
|
{"current_steps": 27385, "total_steps": 78105, "loss": 0.3195, "lr": 4.102952255034135e-06, "epoch": 1.753088790730427, "percentage": 35.06, "elapsed_time": "1:12:39", "remaining_time": "2:14:33", "throughput": 19763.42, "total_tokens": 86155008}
|
|
{"current_steps": 27390, "total_steps": 78105, "loss": 0.5096, "lr": 4.102523510922092e-06, "epoch": 1.7534088726714039, "percentage": 35.07, "elapsed_time": "1:12:39", "remaining_time": "2:14:32", "throughput": 19763.96, "total_tokens": 86170496}
|
|
{"current_steps": 27395, "total_steps": 78105, "loss": 0.3476, "lr": 4.102094686788373e-06, "epoch": 1.7537289546123809, "percentage": 35.07, "elapsed_time": "1:12:40", "remaining_time": "2:14:31", "throughput": 19764.41, "total_tokens": 86185472}
|
|
{"current_steps": 27400, "total_steps": 78105, "loss": 0.4326, "lr": 4.1016657826543895e-06, "epoch": 1.7540490365533576, "percentage": 35.08, "elapsed_time": "1:12:41", "remaining_time": "2:14:30", "throughput": 19764.96, "total_tokens": 86201216}
|
|
{"current_steps": 27405, "total_steps": 78105, "loss": 0.2136, "lr": 4.1012367985415595e-06, "epoch": 1.7543691184943344, "percentage": 35.09, "elapsed_time": "1:12:41", "remaining_time": "2:14:29", "throughput": 19765.33, "total_tokens": 86215040}
|
|
{"current_steps": 27410, "total_steps": 78105, "loss": 0.4943, "lr": 4.1008077344713045e-06, "epoch": 1.7546892004353114, "percentage": 35.09, "elapsed_time": "1:12:42", "remaining_time": "2:14:28", "throughput": 19765.75, "total_tokens": 86229632}
|
|
{"current_steps": 27415, "total_steps": 78105, "loss": 0.3653, "lr": 4.100378590465049e-06, "epoch": 1.7550092823762884, "percentage": 35.1, "elapsed_time": "1:12:43", "remaining_time": "2:14:27", "throughput": 19766.14, "total_tokens": 86243840}
|
|
{"current_steps": 27420, "total_steps": 78105, "loss": 0.2443, "lr": 4.099949366544224e-06, "epoch": 1.7553293643172654, "percentage": 35.11, "elapsed_time": "1:12:43", "remaining_time": "2:14:26", "throughput": 19766.61, "total_tokens": 86258368}
|
|
{"current_steps": 27425, "total_steps": 78105, "loss": 0.4385, "lr": 4.09952006273026e-06, "epoch": 1.7556494462582422, "percentage": 35.11, "elapsed_time": "1:12:44", "remaining_time": "2:14:25", "throughput": 19767.18, "total_tokens": 86274304}
|
|
{"current_steps": 27430, "total_steps": 78105, "loss": 0.4115, "lr": 4.099090679044597e-06, "epoch": 1.755969528199219, "percentage": 35.12, "elapsed_time": "1:12:45", "remaining_time": "2:14:24", "throughput": 19767.74, "total_tokens": 86290304}
|
|
{"current_steps": 27435, "total_steps": 78105, "loss": 0.3621, "lr": 4.0986612155086745e-06, "epoch": 1.756289610140196, "percentage": 35.13, "elapsed_time": "1:12:45", "remaining_time": "2:14:23", "throughput": 19768.22, "total_tokens": 86305408}
|
|
{"current_steps": 27440, "total_steps": 78105, "loss": 0.6266, "lr": 4.098231672143938e-06, "epoch": 1.756609692081173, "percentage": 35.13, "elapsed_time": "1:12:47", "remaining_time": "2:14:23", "throughput": 19769.53, "total_tokens": 86334336}
|
|
{"current_steps": 27445, "total_steps": 78105, "loss": 0.3024, "lr": 4.097802048971836e-06, "epoch": 1.7569297740221497, "percentage": 35.14, "elapsed_time": "1:12:47", "remaining_time": "2:14:22", "throughput": 19770.2, "total_tokens": 86351296}
|
|
{"current_steps": 27450, "total_steps": 78105, "loss": 0.357, "lr": 4.097372346013823e-06, "epoch": 1.7572498559631264, "percentage": 35.14, "elapsed_time": "1:12:48", "remaining_time": "2:14:21", "throughput": 19771.04, "total_tokens": 86369920}
|
|
{"current_steps": 27455, "total_steps": 78105, "loss": 0.3462, "lr": 4.096942563291355e-06, "epoch": 1.7575699379041034, "percentage": 35.15, "elapsed_time": "1:12:49", "remaining_time": "2:14:20", "throughput": 19771.51, "total_tokens": 86384960}
|
|
{"current_steps": 27460, "total_steps": 78105, "loss": 0.358, "lr": 4.096512700825894e-06, "epoch": 1.7578900198450804, "percentage": 35.16, "elapsed_time": "1:12:49", "remaining_time": "2:14:19", "throughput": 19771.98, "total_tokens": 86399808}
|
|
{"current_steps": 27465, "total_steps": 78105, "loss": 0.4086, "lr": 4.096082758638904e-06, "epoch": 1.7582101017860572, "percentage": 35.16, "elapsed_time": "1:12:50", "remaining_time": "2:14:18", "throughput": 19772.51, "total_tokens": 86415616}
|
|
{"current_steps": 27470, "total_steps": 78105, "loss": 0.3551, "lr": 4.095652736751856e-06, "epoch": 1.758530183727034, "percentage": 35.17, "elapsed_time": "1:12:51", "remaining_time": "2:14:17", "throughput": 19773.1, "total_tokens": 86432128}
|
|
{"current_steps": 27475, "total_steps": 78105, "loss": 0.415, "lr": 4.0952226351862204e-06, "epoch": 1.758850265668011, "percentage": 35.18, "elapsed_time": "1:12:51", "remaining_time": "2:14:16", "throughput": 19773.5, "total_tokens": 86446336}
|
|
{"current_steps": 27480, "total_steps": 78105, "loss": 0.2837, "lr": 4.094792453963476e-06, "epoch": 1.759170347608988, "percentage": 35.18, "elapsed_time": "1:12:52", "remaining_time": "2:14:15", "throughput": 19773.91, "total_tokens": 86461056}
|
|
{"current_steps": 27485, "total_steps": 78105, "loss": 0.4098, "lr": 4.094362193105104e-06, "epoch": 1.759490429549965, "percentage": 35.19, "elapsed_time": "1:12:53", "remaining_time": "2:14:14", "throughput": 19774.44, "total_tokens": 86476800}
|
|
{"current_steps": 27490, "total_steps": 78105, "loss": 0.3081, "lr": 4.093931852632588e-06, "epoch": 1.7598105114909417, "percentage": 35.2, "elapsed_time": "1:12:53", "remaining_time": "2:14:13", "throughput": 19775.04, "total_tokens": 86493376}
|
|
{"current_steps": 27495, "total_steps": 78105, "loss": 0.3491, "lr": 4.093501432567418e-06, "epoch": 1.7601305934319185, "percentage": 35.2, "elapsed_time": "1:12:54", "remaining_time": "2:14:12", "throughput": 19775.5, "total_tokens": 86508544}
|
|
{"current_steps": 27500, "total_steps": 78105, "loss": 0.3194, "lr": 4.093070932931087e-06, "epoch": 1.7604506753728955, "percentage": 35.21, "elapsed_time": "1:12:55", "remaining_time": "2:14:11", "throughput": 19776.07, "total_tokens": 86524480}
|
|
{"current_steps": 27505, "total_steps": 78105, "loss": 0.2945, "lr": 4.092640353745092e-06, "epoch": 1.7607707573138724, "percentage": 35.22, "elapsed_time": "1:12:55", "remaining_time": "2:14:10", "throughput": 19776.49, "total_tokens": 86539136}
|
|
{"current_steps": 27510, "total_steps": 78105, "loss": 0.3528, "lr": 4.092209695030933e-06, "epoch": 1.7610908392548492, "percentage": 35.22, "elapsed_time": "1:12:56", "remaining_time": "2:14:09", "throughput": 19777.04, "total_tokens": 86555328}
|
|
{"current_steps": 27515, "total_steps": 78105, "loss": 0.2385, "lr": 4.091778956810115e-06, "epoch": 1.761410921195826, "percentage": 35.23, "elapsed_time": "1:12:57", "remaining_time": "2:14:08", "throughput": 19777.5, "total_tokens": 86570624}
|
|
{"current_steps": 27520, "total_steps": 78105, "loss": 0.3079, "lr": 4.091348139104147e-06, "epoch": 1.761731003136803, "percentage": 35.23, "elapsed_time": "1:12:57", "remaining_time": "2:14:07", "throughput": 19777.97, "total_tokens": 86585600}
|
|
{"current_steps": 27525, "total_steps": 78105, "loss": 0.4022, "lr": 4.0909172419345436e-06, "epoch": 1.76205108507778, "percentage": 35.24, "elapsed_time": "1:12:58", "remaining_time": "2:14:06", "throughput": 19778.47, "total_tokens": 86601216}
|
|
{"current_steps": 27530, "total_steps": 78105, "loss": 0.2999, "lr": 4.090486265322818e-06, "epoch": 1.762371167018757, "percentage": 35.25, "elapsed_time": "1:12:59", "remaining_time": "2:14:05", "throughput": 19779.03, "total_tokens": 86617216}
|
|
{"current_steps": 27535, "total_steps": 78105, "loss": 0.2846, "lr": 4.090055209290494e-06, "epoch": 1.7626912489597337, "percentage": 35.25, "elapsed_time": "1:12:59", "remaining_time": "2:14:04", "throughput": 19779.53, "total_tokens": 86632960}
|
|
{"current_steps": 27540, "total_steps": 78105, "loss": 0.3345, "lr": 4.089624073859095e-06, "epoch": 1.7630113309007105, "percentage": 35.26, "elapsed_time": "1:13:00", "remaining_time": "2:14:03", "throughput": 19780.05, "total_tokens": 86648576}
|
|
{"current_steps": 27545, "total_steps": 78105, "loss": 0.2538, "lr": 4.08919285905015e-06, "epoch": 1.7633314128416875, "percentage": 35.27, "elapsed_time": "1:13:01", "remaining_time": "2:14:01", "throughput": 19780.47, "total_tokens": 86663232}
|
|
{"current_steps": 27550, "total_steps": 78105, "loss": 0.2743, "lr": 4.0887615648851906e-06, "epoch": 1.7636514947826645, "percentage": 35.27, "elapsed_time": "1:13:01", "remaining_time": "2:14:00", "throughput": 19781.02, "total_tokens": 86679104}
|
|
{"current_steps": 27555, "total_steps": 78105, "loss": 0.3117, "lr": 4.088330191385754e-06, "epoch": 1.7639715767236412, "percentage": 35.28, "elapsed_time": "1:13:02", "remaining_time": "2:14:00", "throughput": 19781.63, "total_tokens": 86695872}
|
|
{"current_steps": 27560, "total_steps": 78105, "loss": 0.4026, "lr": 4.087898738573382e-06, "epoch": 1.764291658664618, "percentage": 35.29, "elapsed_time": "1:13:03", "remaining_time": "2:13:59", "throughput": 19782.21, "total_tokens": 86712064}
|
|
{"current_steps": 27565, "total_steps": 78105, "loss": 0.3008, "lr": 4.087467206469617e-06, "epoch": 1.764611740605595, "percentage": 35.29, "elapsed_time": "1:13:04", "remaining_time": "2:13:58", "throughput": 19782.84, "total_tokens": 86728832}
|
|
{"current_steps": 27570, "total_steps": 78105, "loss": 0.3186, "lr": 4.087035595096009e-06, "epoch": 1.764931822546572, "percentage": 35.3, "elapsed_time": "1:13:04", "remaining_time": "2:13:57", "throughput": 19783.25, "total_tokens": 86743872}
|
|
{"current_steps": 27575, "total_steps": 78105, "loss": 0.2668, "lr": 4.08660390447411e-06, "epoch": 1.7652519044875488, "percentage": 35.31, "elapsed_time": "1:13:05", "remaining_time": "2:13:56", "throughput": 19783.67, "total_tokens": 86758720}
|
|
{"current_steps": 27580, "total_steps": 78105, "loss": 0.4228, "lr": 4.086172134625477e-06, "epoch": 1.7655719864285258, "percentage": 35.31, "elapsed_time": "1:13:06", "remaining_time": "2:13:55", "throughput": 19784.17, "total_tokens": 86774464}
|
|
{"current_steps": 27585, "total_steps": 78105, "loss": 0.2753, "lr": 4.08574028557167e-06, "epoch": 1.7658920683695025, "percentage": 35.32, "elapsed_time": "1:13:06", "remaining_time": "2:13:54", "throughput": 19784.71, "total_tokens": 86790528}
|
|
{"current_steps": 27590, "total_steps": 78105, "loss": 0.2646, "lr": 4.085308357334251e-06, "epoch": 1.7662121503104795, "percentage": 35.32, "elapsed_time": "1:13:07", "remaining_time": "2:13:53", "throughput": 19785.18, "total_tokens": 86806016}
|
|
{"current_steps": 27595, "total_steps": 78105, "loss": 0.258, "lr": 4.084876349934792e-06, "epoch": 1.7665322322514565, "percentage": 35.33, "elapsed_time": "1:13:08", "remaining_time": "2:13:51", "throughput": 19785.62, "total_tokens": 86821312}
|
|
{"current_steps": 27600, "total_steps": 78105, "loss": 0.3119, "lr": 4.084444263394863e-06, "epoch": 1.7668523141924333, "percentage": 35.34, "elapsed_time": "1:13:08", "remaining_time": "2:13:50", "throughput": 19786.11, "total_tokens": 86836800}
|
|
{"current_steps": 27605, "total_steps": 78105, "loss": 0.4817, "lr": 4.084012097736039e-06, "epoch": 1.76717239613341, "percentage": 35.34, "elapsed_time": "1:13:09", "remaining_time": "2:13:49", "throughput": 19786.55, "total_tokens": 86852160}
|
|
{"current_steps": 27610, "total_steps": 78105, "loss": 0.3468, "lr": 4.083579852979903e-06, "epoch": 1.767492478074387, "percentage": 35.35, "elapsed_time": "1:13:10", "remaining_time": "2:13:48", "throughput": 19787.09, "total_tokens": 86868032}
|
|
{"current_steps": 27615, "total_steps": 78105, "loss": 0.4362, "lr": 4.083147529148038e-06, "epoch": 1.767812560015364, "percentage": 35.36, "elapsed_time": "1:13:10", "remaining_time": "2:13:47", "throughput": 19787.59, "total_tokens": 86883456}
|
|
{"current_steps": 27620, "total_steps": 78105, "loss": 0.3842, "lr": 4.082715126262031e-06, "epoch": 1.7681326419563408, "percentage": 35.36, "elapsed_time": "1:13:11", "remaining_time": "2:13:46", "throughput": 19788.07, "total_tokens": 86898688}
|
|
{"current_steps": 27625, "total_steps": 78105, "loss": 0.2753, "lr": 4.082282644343475e-06, "epoch": 1.7684527238973176, "percentage": 35.37, "elapsed_time": "1:13:12", "remaining_time": "2:13:45", "throughput": 19788.65, "total_tokens": 86915392}
|
|
{"current_steps": 27630, "total_steps": 78105, "loss": 0.3564, "lr": 4.0818500834139655e-06, "epoch": 1.7687728058382945, "percentage": 35.38, "elapsed_time": "1:13:12", "remaining_time": "2:13:44", "throughput": 19789.19, "total_tokens": 86931520}
|
|
{"current_steps": 27635, "total_steps": 78105, "loss": 0.3086, "lr": 4.081417443495103e-06, "epoch": 1.7690928877792715, "percentage": 35.38, "elapsed_time": "1:13:13", "remaining_time": "2:13:44", "throughput": 19789.85, "total_tokens": 86948608}
|
|
{"current_steps": 27640, "total_steps": 78105, "loss": 0.246, "lr": 4.08098472460849e-06, "epoch": 1.7694129697202485, "percentage": 35.39, "elapsed_time": "1:13:14", "remaining_time": "2:13:43", "throughput": 19790.42, "total_tokens": 86964928}
|
|
{"current_steps": 27645, "total_steps": 78105, "loss": 0.3752, "lr": 4.080551926775735e-06, "epoch": 1.7697330516612253, "percentage": 35.39, "elapsed_time": "1:13:14", "remaining_time": "2:13:42", "throughput": 19791.01, "total_tokens": 86981440}
|
|
{"current_steps": 27650, "total_steps": 78105, "loss": 0.3438, "lr": 4.08011905001845e-06, "epoch": 1.770053133602202, "percentage": 35.4, "elapsed_time": "1:13:15", "remaining_time": "2:13:41", "throughput": 19791.56, "total_tokens": 86997888}
|
|
{"current_steps": 27655, "total_steps": 78105, "loss": 0.3245, "lr": 4.079686094358248e-06, "epoch": 1.770373215543179, "percentage": 35.41, "elapsed_time": "1:13:16", "remaining_time": "2:13:40", "throughput": 19791.96, "total_tokens": 87012288}
|
|
{"current_steps": 27660, "total_steps": 78105, "loss": 0.2806, "lr": 4.079253059816753e-06, "epoch": 1.770693297484156, "percentage": 35.41, "elapsed_time": "1:13:17", "remaining_time": "2:13:39", "throughput": 19792.51, "total_tokens": 87028160}
|
|
{"current_steps": 27665, "total_steps": 78105, "loss": 0.4045, "lr": 4.078819946415586e-06, "epoch": 1.7710133794251328, "percentage": 35.42, "elapsed_time": "1:13:17", "remaining_time": "2:13:38", "throughput": 19793.04, "total_tokens": 87043968}
|
|
{"current_steps": 27670, "total_steps": 78105, "loss": 0.4153, "lr": 4.078386754176374e-06, "epoch": 1.7713334613661096, "percentage": 35.43, "elapsed_time": "1:13:18", "remaining_time": "2:13:37", "throughput": 19793.57, "total_tokens": 87059712}
|
|
{"current_steps": 27675, "total_steps": 78105, "loss": 0.3821, "lr": 4.077953483120749e-06, "epoch": 1.7716535433070866, "percentage": 35.43, "elapsed_time": "1:13:19", "remaining_time": "2:13:36", "throughput": 19794.33, "total_tokens": 87077504}
|
|
{"current_steps": 27680, "total_steps": 78105, "loss": 0.449, "lr": 4.077520133270347e-06, "epoch": 1.7719736252480636, "percentage": 35.44, "elapsed_time": "1:13:19", "remaining_time": "2:13:35", "throughput": 19794.81, "total_tokens": 87092544}
|
|
{"current_steps": 27685, "total_steps": 78105, "loss": 0.4026, "lr": 4.077086704646807e-06, "epoch": 1.7722937071890406, "percentage": 35.45, "elapsed_time": "1:13:20", "remaining_time": "2:13:34", "throughput": 19795.39, "total_tokens": 87108800}
|
|
{"current_steps": 27690, "total_steps": 78105, "loss": 0.486, "lr": 4.07665319727177e-06, "epoch": 1.7726137891300173, "percentage": 35.45, "elapsed_time": "1:13:21", "remaining_time": "2:13:33", "throughput": 19795.88, "total_tokens": 87123776}
|
|
{"current_steps": 27695, "total_steps": 78105, "loss": 0.4676, "lr": 4.076219611166886e-06, "epoch": 1.772933871070994, "percentage": 35.46, "elapsed_time": "1:13:21", "remaining_time": "2:13:32", "throughput": 19796.51, "total_tokens": 87140544}
|
|
{"current_steps": 27700, "total_steps": 78105, "loss": 0.4814, "lr": 4.075785946353805e-06, "epoch": 1.773253953011971, "percentage": 35.47, "elapsed_time": "1:13:22", "remaining_time": "2:13:31", "throughput": 19797.15, "total_tokens": 87157440}
|
|
{"current_steps": 27705, "total_steps": 78105, "loss": 0.3255, "lr": 4.075352202854181e-06, "epoch": 1.773574034952948, "percentage": 35.47, "elapsed_time": "1:13:23", "remaining_time": "2:13:30", "throughput": 19797.65, "total_tokens": 87172608}
|
|
{"current_steps": 27710, "total_steps": 78105, "loss": 0.296, "lr": 4.0749183806896745e-06, "epoch": 1.7738941168939248, "percentage": 35.48, "elapsed_time": "1:13:23", "remaining_time": "2:13:29", "throughput": 19798.33, "total_tokens": 87189888}
|
|
{"current_steps": 27715, "total_steps": 78105, "loss": 0.3075, "lr": 4.074484479881947e-06, "epoch": 1.7742141988349016, "percentage": 35.48, "elapsed_time": "1:13:24", "remaining_time": "2:13:28", "throughput": 19799.07, "total_tokens": 87207680}
|
|
{"current_steps": 27720, "total_steps": 78105, "loss": 0.2536, "lr": 4.0740505004526655e-06, "epoch": 1.7745342807758786, "percentage": 35.49, "elapsed_time": "1:13:25", "remaining_time": "2:13:27", "throughput": 19799.65, "total_tokens": 87223936}
|
|
{"current_steps": 27725, "total_steps": 78105, "loss": 0.3287, "lr": 4.073616442423502e-06, "epoch": 1.7748543627168556, "percentage": 35.5, "elapsed_time": "1:13:25", "remaining_time": "2:13:26", "throughput": 19800.08, "total_tokens": 87238656}
|
|
{"current_steps": 27730, "total_steps": 78105, "loss": 0.2594, "lr": 4.073182305816129e-06, "epoch": 1.7751744446578324, "percentage": 35.5, "elapsed_time": "1:13:26", "remaining_time": "2:13:25", "throughput": 19800.62, "total_tokens": 87254784}
|
|
{"current_steps": 27735, "total_steps": 78105, "loss": 0.2666, "lr": 4.072748090652226e-06, "epoch": 1.7754945265988091, "percentage": 35.51, "elapsed_time": "1:13:27", "remaining_time": "2:13:24", "throughput": 19801.21, "total_tokens": 87271424}
|
|
{"current_steps": 27740, "total_steps": 78105, "loss": 0.4311, "lr": 4.072313796953476e-06, "epoch": 1.7758146085397861, "percentage": 35.52, "elapsed_time": "1:13:28", "remaining_time": "2:13:23", "throughput": 19801.77, "total_tokens": 87287488}
|
|
{"current_steps": 27745, "total_steps": 78105, "loss": 0.2449, "lr": 4.071879424741565e-06, "epoch": 1.7761346904807631, "percentage": 35.52, "elapsed_time": "1:13:28", "remaining_time": "2:13:22", "throughput": 19802.26, "total_tokens": 87302784}
|
|
{"current_steps": 27750, "total_steps": 78105, "loss": 0.2954, "lr": 4.071444974038182e-06, "epoch": 1.77645477242174, "percentage": 35.53, "elapsed_time": "1:13:29", "remaining_time": "2:13:21", "throughput": 19802.8, "total_tokens": 87318848}
|
|
{"current_steps": 27755, "total_steps": 78105, "loss": 0.3946, "lr": 4.071010444865024e-06, "epoch": 1.7767748543627169, "percentage": 35.54, "elapsed_time": "1:13:30", "remaining_time": "2:13:20", "throughput": 19803.28, "total_tokens": 87334080}
|
|
{"current_steps": 27760, "total_steps": 78105, "loss": 0.3141, "lr": 4.070575837243786e-06, "epoch": 1.7770949363036936, "percentage": 35.54, "elapsed_time": "1:13:30", "remaining_time": "2:13:19", "throughput": 19803.78, "total_tokens": 87349504}
|
|
{"current_steps": 27765, "total_steps": 78105, "loss": 0.3426, "lr": 4.070141151196173e-06, "epoch": 1.7774150182446706, "percentage": 35.55, "elapsed_time": "1:13:31", "remaining_time": "2:13:18", "throughput": 19804.34, "total_tokens": 87365632}
|
|
{"current_steps": 27770, "total_steps": 78105, "loss": 0.391, "lr": 4.069706386743888e-06, "epoch": 1.7777351001856476, "percentage": 35.55, "elapsed_time": "1:13:32", "remaining_time": "2:13:17", "throughput": 19804.84, "total_tokens": 87381248}
|
|
{"current_steps": 27775, "total_steps": 78105, "loss": 0.3247, "lr": 4.0692715439086435e-06, "epoch": 1.7780551821266244, "percentage": 35.56, "elapsed_time": "1:13:32", "remaining_time": "2:13:16", "throughput": 19805.34, "total_tokens": 87396608}
|
|
{"current_steps": 27780, "total_steps": 78105, "loss": 0.3074, "lr": 4.068836622712151e-06, "epoch": 1.7783752640676012, "percentage": 35.57, "elapsed_time": "1:13:33", "remaining_time": "2:13:15", "throughput": 19805.83, "total_tokens": 87411840}
|
|
{"current_steps": 27785, "total_steps": 78105, "loss": 0.3801, "lr": 4.068401623176131e-06, "epoch": 1.7786953460085781, "percentage": 35.57, "elapsed_time": "1:13:34", "remaining_time": "2:13:14", "throughput": 19806.45, "total_tokens": 87428416}
|
|
{"current_steps": 27790, "total_steps": 78105, "loss": 0.2938, "lr": 4.067966545322302e-06, "epoch": 1.7790154279495551, "percentage": 35.58, "elapsed_time": "1:13:34", "remaining_time": "2:13:13", "throughput": 19806.9, "total_tokens": 87443776}
|
|
{"current_steps": 27795, "total_steps": 78105, "loss": 0.4359, "lr": 4.067531389172392e-06, "epoch": 1.7793355098905321, "percentage": 35.59, "elapsed_time": "1:13:35", "remaining_time": "2:13:12", "throughput": 19807.63, "total_tokens": 87461056}
|
|
{"current_steps": 27800, "total_steps": 78105, "loss": 0.2891, "lr": 4.067096154748129e-06, "epoch": 1.779655591831509, "percentage": 35.59, "elapsed_time": "1:13:36", "remaining_time": "2:13:11", "throughput": 19808.23, "total_tokens": 87477248}
|
|
{"current_steps": 27805, "total_steps": 78105, "loss": 0.3999, "lr": 4.066660842071246e-06, "epoch": 1.7799756737724857, "percentage": 35.6, "elapsed_time": "1:13:36", "remaining_time": "2:13:10", "throughput": 19808.91, "total_tokens": 87494912}
|
|
{"current_steps": 27810, "total_steps": 78105, "loss": 0.3158, "lr": 4.066225451163482e-06, "epoch": 1.7802957557134627, "percentage": 35.61, "elapsed_time": "1:13:37", "remaining_time": "2:13:09", "throughput": 19809.55, "total_tokens": 87511872}
|
|
{"current_steps": 27815, "total_steps": 78105, "loss": 0.4262, "lr": 4.065789982046576e-06, "epoch": 1.7806158376544396, "percentage": 35.61, "elapsed_time": "1:13:38", "remaining_time": "2:13:08", "throughput": 19810.08, "total_tokens": 87527488}
|
|
{"current_steps": 27820, "total_steps": 78105, "loss": 0.4085, "lr": 4.065354434742276e-06, "epoch": 1.7809359195954164, "percentage": 35.62, "elapsed_time": "1:13:39", "remaining_time": "2:13:07", "throughput": 19810.59, "total_tokens": 87543168}
|
|
{"current_steps": 27825, "total_steps": 78105, "loss": 0.2856, "lr": 4.064918809272328e-06, "epoch": 1.7812560015363932, "percentage": 35.63, "elapsed_time": "1:13:39", "remaining_time": "2:13:06", "throughput": 19811.16, "total_tokens": 87559424}
|
|
{"current_steps": 27830, "total_steps": 78105, "loss": 0.4188, "lr": 4.064483105658486e-06, "epoch": 1.7815760834773702, "percentage": 35.63, "elapsed_time": "1:13:40", "remaining_time": "2:13:05", "throughput": 19811.72, "total_tokens": 87575360}
|
|
{"current_steps": 27835, "total_steps": 78105, "loss": 0.3875, "lr": 4.064047323922506e-06, "epoch": 1.7818961654183472, "percentage": 35.64, "elapsed_time": "1:13:41", "remaining_time": "2:13:04", "throughput": 19812.3, "total_tokens": 87591680}
|
|
{"current_steps": 27840, "total_steps": 78105, "loss": 0.4363, "lr": 4.063611464086151e-06, "epoch": 1.782216247359324, "percentage": 35.64, "elapsed_time": "1:13:41", "remaining_time": "2:13:03", "throughput": 19812.78, "total_tokens": 87607168}
|
|
{"current_steps": 27845, "total_steps": 78105, "loss": 0.3222, "lr": 4.0631755261711835e-06, "epoch": 1.782536329300301, "percentage": 35.65, "elapsed_time": "1:13:42", "remaining_time": "2:13:02", "throughput": 19813.24, "total_tokens": 87622080}
|
|
{"current_steps": 27850, "total_steps": 78105, "loss": 0.4569, "lr": 4.0627395101993725e-06, "epoch": 1.7828564112412777, "percentage": 35.66, "elapsed_time": "1:13:43", "remaining_time": "2:13:01", "throughput": 19813.81, "total_tokens": 87638464}
|
|
{"current_steps": 27855, "total_steps": 78105, "loss": 0.2934, "lr": 4.0623034161924905e-06, "epoch": 1.7831764931822547, "percentage": 35.66, "elapsed_time": "1:13:43", "remaining_time": "2:13:00", "throughput": 19814.4, "total_tokens": 87654784}
|
|
{"current_steps": 27860, "total_steps": 78105, "loss": 0.4392, "lr": 4.061867244172313e-06, "epoch": 1.7834965751232317, "percentage": 35.67, "elapsed_time": "1:13:44", "remaining_time": "2:12:59", "throughput": 19814.83, "total_tokens": 87669248}
|
|
{"current_steps": 27865, "total_steps": 78105, "loss": 0.3091, "lr": 4.061430994160621e-06, "epoch": 1.7838166570642084, "percentage": 35.68, "elapsed_time": "1:13:45", "remaining_time": "2:12:58", "throughput": 19815.34, "total_tokens": 87684864}
|
|
{"current_steps": 27870, "total_steps": 78105, "loss": 0.3996, "lr": 4.060994666179199e-06, "epoch": 1.7841367390051852, "percentage": 35.68, "elapsed_time": "1:13:45", "remaining_time": "2:12:57", "throughput": 19815.83, "total_tokens": 87700480}
|
|
{"current_steps": 27875, "total_steps": 78105, "loss": 0.4457, "lr": 4.0605582602498336e-06, "epoch": 1.7844568209461622, "percentage": 35.69, "elapsed_time": "1:13:46", "remaining_time": "2:12:56", "throughput": 19816.34, "total_tokens": 87716160}
|
|
{"current_steps": 27880, "total_steps": 78105, "loss": 0.2791, "lr": 4.060121776394318e-06, "epoch": 1.7847769028871392, "percentage": 35.7, "elapsed_time": "1:13:47", "remaining_time": "2:12:55", "throughput": 19816.88, "total_tokens": 87732160}
|
|
{"current_steps": 27885, "total_steps": 78105, "loss": 0.4283, "lr": 4.0596852146344465e-06, "epoch": 1.785096984828116, "percentage": 35.7, "elapsed_time": "1:13:47", "remaining_time": "2:12:54", "throughput": 19817.38, "total_tokens": 87747968}
|
|
{"current_steps": 27890, "total_steps": 78105, "loss": 0.3028, "lr": 4.05924857499202e-06, "epoch": 1.7854170667690927, "percentage": 35.71, "elapsed_time": "1:13:48", "remaining_time": "2:12:53", "throughput": 19817.88, "total_tokens": 87763456}
|
|
{"current_steps": 27895, "total_steps": 78105, "loss": 0.226, "lr": 4.0588118574888415e-06, "epoch": 1.7857371487100697, "percentage": 35.71, "elapsed_time": "1:13:49", "remaining_time": "2:12:52", "throughput": 19818.39, "total_tokens": 87778752}
|
|
{"current_steps": 27900, "total_steps": 78105, "loss": 0.2787, "lr": 4.0583750621467175e-06, "epoch": 1.7860572306510467, "percentage": 35.72, "elapsed_time": "1:13:49", "remaining_time": "2:12:51", "throughput": 19818.9, "total_tokens": 87794496}
|
|
{"current_steps": 27905, "total_steps": 78105, "loss": 0.243, "lr": 4.0579381889874615e-06, "epoch": 1.7863773125920237, "percentage": 35.73, "elapsed_time": "1:13:50", "remaining_time": "2:12:50", "throughput": 19819.42, "total_tokens": 87810368}
|
|
{"current_steps": 27910, "total_steps": 78105, "loss": 0.3199, "lr": 4.057501238032886e-06, "epoch": 1.7866973945330005, "percentage": 35.73, "elapsed_time": "1:13:51", "remaining_time": "2:12:49", "throughput": 19820.03, "total_tokens": 87827136}
|
|
{"current_steps": 27915, "total_steps": 78105, "loss": 0.3267, "lr": 4.057064209304813e-06, "epoch": 1.7870174764739772, "percentage": 35.74, "elapsed_time": "1:13:51", "remaining_time": "2:12:48", "throughput": 19820.76, "total_tokens": 87844800}
|
|
{"current_steps": 27920, "total_steps": 78105, "loss": 0.2677, "lr": 4.056627102825062e-06, "epoch": 1.7873375584149542, "percentage": 35.75, "elapsed_time": "1:13:53", "remaining_time": "2:12:48", "throughput": 19822.02, "total_tokens": 87873472}
|
|
{"current_steps": 27925, "total_steps": 78105, "loss": 0.2775, "lr": 4.056189918615464e-06, "epoch": 1.7876576403559312, "percentage": 35.75, "elapsed_time": "1:13:53", "remaining_time": "2:12:47", "throughput": 19822.46, "total_tokens": 87888448}
|
|
{"current_steps": 27930, "total_steps": 78105, "loss": 0.3773, "lr": 4.0557526566978454e-06, "epoch": 1.787977722296908, "percentage": 35.76, "elapsed_time": "1:13:54", "remaining_time": "2:12:46", "throughput": 19822.99, "total_tokens": 87904128}
|
|
{"current_steps": 27935, "total_steps": 78105, "loss": 0.3589, "lr": 4.055315317094044e-06, "epoch": 1.7882978042378848, "percentage": 35.77, "elapsed_time": "1:13:55", "remaining_time": "2:12:45", "throughput": 19823.57, "total_tokens": 87920384}
|
|
{"current_steps": 27940, "total_steps": 78105, "loss": 0.2476, "lr": 4.054877899825896e-06, "epoch": 1.7886178861788617, "percentage": 35.77, "elapsed_time": "1:13:55", "remaining_time": "2:12:44", "throughput": 19824.04, "total_tokens": 87935616}
|
|
{"current_steps": 27945, "total_steps": 78105, "loss": 0.3289, "lr": 4.054440404915245e-06, "epoch": 1.7889379681198387, "percentage": 35.78, "elapsed_time": "1:13:56", "remaining_time": "2:12:43", "throughput": 19824.53, "total_tokens": 87951424}
|
|
{"current_steps": 27950, "total_steps": 78105, "loss": 0.2972, "lr": 4.054002832383936e-06, "epoch": 1.7892580500608157, "percentage": 35.79, "elapsed_time": "1:13:57", "remaining_time": "2:12:42", "throughput": 19825.06, "total_tokens": 87967296}
|
|
{"current_steps": 27955, "total_steps": 78105, "loss": 0.5008, "lr": 4.053565182253822e-06, "epoch": 1.7895781320017925, "percentage": 35.79, "elapsed_time": "1:13:57", "remaining_time": "2:12:41", "throughput": 19825.47, "total_tokens": 87982144}
|
|
{"current_steps": 27960, "total_steps": 78105, "loss": 0.4279, "lr": 4.053127454546755e-06, "epoch": 1.7898982139427693, "percentage": 35.8, "elapsed_time": "1:13:58", "remaining_time": "2:12:40", "throughput": 19825.91, "total_tokens": 87997120}
|
|
{"current_steps": 27965, "total_steps": 78105, "loss": 0.3353, "lr": 4.0526896492845914e-06, "epoch": 1.7902182958837463, "percentage": 35.8, "elapsed_time": "1:13:59", "remaining_time": "2:12:39", "throughput": 19826.37, "total_tokens": 88012288}
|
|
{"current_steps": 27970, "total_steps": 78105, "loss": 0.3642, "lr": 4.0522517664891944e-06, "epoch": 1.7905383778247232, "percentage": 35.81, "elapsed_time": "1:13:59", "remaining_time": "2:12:38", "throughput": 19826.85, "total_tokens": 88027776}
|
|
{"current_steps": 27975, "total_steps": 78105, "loss": 0.2299, "lr": 4.051813806182431e-06, "epoch": 1.7908584597657, "percentage": 35.82, "elapsed_time": "1:14:00", "remaining_time": "2:12:37", "throughput": 19827.35, "total_tokens": 88042816}
|
|
{"current_steps": 27980, "total_steps": 78105, "loss": 0.2514, "lr": 4.051375768386168e-06, "epoch": 1.7911785417066768, "percentage": 35.82, "elapsed_time": "1:14:01", "remaining_time": "2:12:36", "throughput": 19827.8, "total_tokens": 88058112}
|
|
{"current_steps": 27985, "total_steps": 78105, "loss": 0.323, "lr": 4.050937653122281e-06, "epoch": 1.7914986236476538, "percentage": 35.83, "elapsed_time": "1:14:01", "remaining_time": "2:12:35", "throughput": 19828.39, "total_tokens": 88074496}
|
|
{"current_steps": 27990, "total_steps": 78105, "loss": 0.3806, "lr": 4.0504994604126446e-06, "epoch": 1.7918187055886308, "percentage": 35.84, "elapsed_time": "1:14:02", "remaining_time": "2:12:34", "throughput": 19828.93, "total_tokens": 88090496}
|
|
{"current_steps": 27995, "total_steps": 78105, "loss": 0.4742, "lr": 4.0500611902791435e-06, "epoch": 1.7921387875296075, "percentage": 35.84, "elapsed_time": "1:14:03", "remaining_time": "2:12:33", "throughput": 19829.41, "total_tokens": 88105920}
|
|
{"current_steps": 28000, "total_steps": 78105, "loss": 0.5227, "lr": 4.049622842743659e-06, "epoch": 1.7924588694705843, "percentage": 35.85, "elapsed_time": "1:14:03", "remaining_time": "2:12:32", "throughput": 19829.92, "total_tokens": 88121600}
|
|
{"current_steps": 28005, "total_steps": 78105, "loss": 0.3019, "lr": 4.049184417828081e-06, "epoch": 1.7927789514115613, "percentage": 35.86, "elapsed_time": "1:14:04", "remaining_time": "2:12:31", "throughput": 19830.44, "total_tokens": 88137216}
|
|
{"current_steps": 28010, "total_steps": 78105, "loss": 0.346, "lr": 4.048745915554303e-06, "epoch": 1.7930990333525383, "percentage": 35.86, "elapsed_time": "1:14:05", "remaining_time": "2:12:30", "throughput": 19830.94, "total_tokens": 88152832}
|
|
{"current_steps": 28015, "total_steps": 78105, "loss": 0.4016, "lr": 4.048307335944221e-06, "epoch": 1.7934191152935153, "percentage": 35.87, "elapsed_time": "1:14:05", "remaining_time": "2:12:29", "throughput": 19831.5, "total_tokens": 88169024}
|
|
{"current_steps": 28020, "total_steps": 78105, "loss": 0.2495, "lr": 4.047868679019736e-06, "epoch": 1.793739197234492, "percentage": 35.87, "elapsed_time": "1:14:06", "remaining_time": "2:12:28", "throughput": 19831.97, "total_tokens": 88184064}
|
|
{"current_steps": 28025, "total_steps": 78105, "loss": 0.4761, "lr": 4.047429944802752e-06, "epoch": 1.7940592791754688, "percentage": 35.88, "elapsed_time": "1:14:07", "remaining_time": "2:12:27", "throughput": 19832.39, "total_tokens": 88198848}
|
|
{"current_steps": 28030, "total_steps": 78105, "loss": 0.4764, "lr": 4.046991133315177e-06, "epoch": 1.7943793611164458, "percentage": 35.89, "elapsed_time": "1:14:07", "remaining_time": "2:12:26", "throughput": 19832.88, "total_tokens": 88214144}
|
|
{"current_steps": 28035, "total_steps": 78105, "loss": 0.316, "lr": 4.046552244578922e-06, "epoch": 1.7946994430574228, "percentage": 35.89, "elapsed_time": "1:14:08", "remaining_time": "2:12:25", "throughput": 19833.46, "total_tokens": 88230208}
|
|
{"current_steps": 28040, "total_steps": 78105, "loss": 0.2648, "lr": 4.046113278615904e-06, "epoch": 1.7950195249983996, "percentage": 35.9, "elapsed_time": "1:14:09", "remaining_time": "2:12:24", "throughput": 19833.94, "total_tokens": 88245440}
|
|
{"current_steps": 28045, "total_steps": 78105, "loss": 0.298, "lr": 4.045674235448042e-06, "epoch": 1.7953396069393763, "percentage": 35.91, "elapsed_time": "1:14:09", "remaining_time": "2:12:22", "throughput": 19834.38, "total_tokens": 88260224}
|
|
{"current_steps": 28050, "total_steps": 78105, "loss": 0.3008, "lr": 4.04523511509726e-06, "epoch": 1.7956596888803533, "percentage": 35.91, "elapsed_time": "1:14:10", "remaining_time": "2:12:21", "throughput": 19834.93, "total_tokens": 88276032}
|
|
{"current_steps": 28055, "total_steps": 78105, "loss": 0.2457, "lr": 4.044795917585485e-06, "epoch": 1.7959797708213303, "percentage": 35.92, "elapsed_time": "1:14:11", "remaining_time": "2:12:20", "throughput": 19835.39, "total_tokens": 88290816}
|
|
{"current_steps": 28060, "total_steps": 78105, "loss": 0.3286, "lr": 4.044356642934649e-06, "epoch": 1.7962998527623073, "percentage": 35.93, "elapsed_time": "1:14:12", "remaining_time": "2:12:20", "throughput": 19833.56, "total_tokens": 88306112}
|
|
{"current_steps": 28065, "total_steps": 78105, "loss": 0.2843, "lr": 4.043917291166686e-06, "epoch": 1.796619934703284, "percentage": 35.93, "elapsed_time": "1:14:13", "remaining_time": "2:12:19", "throughput": 19833.61, "total_tokens": 88322112}
|
|
{"current_steps": 28070, "total_steps": 78105, "loss": 0.4775, "lr": 4.043477862303535e-06, "epoch": 1.7969400166442608, "percentage": 35.94, "elapsed_time": "1:14:13", "remaining_time": "2:12:19", "throughput": 19834.19, "total_tokens": 88338688}
|
|
{"current_steps": 28075, "total_steps": 78105, "loss": 0.3853, "lr": 4.0430383563671395e-06, "epoch": 1.7972600985852378, "percentage": 35.95, "elapsed_time": "1:14:14", "remaining_time": "2:12:18", "throughput": 19834.82, "total_tokens": 88355584}
|
|
{"current_steps": 28080, "total_steps": 78105, "loss": 0.4064, "lr": 4.042598773379447e-06, "epoch": 1.7975801805262148, "percentage": 35.95, "elapsed_time": "1:14:15", "remaining_time": "2:12:17", "throughput": 19835.32, "total_tokens": 88371328}
|
|
{"current_steps": 28085, "total_steps": 78105, "loss": 0.358, "lr": 4.042159113362406e-06, "epoch": 1.7979002624671916, "percentage": 35.96, "elapsed_time": "1:14:15", "remaining_time": "2:12:16", "throughput": 19835.96, "total_tokens": 88388608}
|
|
{"current_steps": 28090, "total_steps": 78105, "loss": 0.369, "lr": 4.041719376337971e-06, "epoch": 1.7982203444081684, "percentage": 35.96, "elapsed_time": "1:14:16", "remaining_time": "2:12:15", "throughput": 19836.43, "total_tokens": 88404096}
|
|
{"current_steps": 28095, "total_steps": 78105, "loss": 0.276, "lr": 4.041279562328102e-06, "epoch": 1.7985404263491453, "percentage": 35.97, "elapsed_time": "1:14:17", "remaining_time": "2:12:14", "throughput": 19836.87, "total_tokens": 88418752}
|
|
{"current_steps": 28100, "total_steps": 78105, "loss": 0.4765, "lr": 4.040839671354759e-06, "epoch": 1.7988605082901223, "percentage": 35.98, "elapsed_time": "1:14:17", "remaining_time": "2:12:13", "throughput": 19837.4, "total_tokens": 88434752}
|
|
{"current_steps": 28105, "total_steps": 78105, "loss": 0.3977, "lr": 4.04039970343991e-06, "epoch": 1.799180590231099, "percentage": 35.98, "elapsed_time": "1:14:18", "remaining_time": "2:12:12", "throughput": 19837.89, "total_tokens": 88450304}
|
|
{"current_steps": 28110, "total_steps": 78105, "loss": 0.3332, "lr": 4.039959658605522e-06, "epoch": 1.799500672172076, "percentage": 35.99, "elapsed_time": "1:14:19", "remaining_time": "2:12:11", "throughput": 19838.4, "total_tokens": 88465984}
|
|
{"current_steps": 28115, "total_steps": 78105, "loss": 0.264, "lr": 4.039519536873571e-06, "epoch": 1.7998207541130529, "percentage": 36.0, "elapsed_time": "1:14:19", "remaining_time": "2:12:10", "throughput": 19838.83, "total_tokens": 88480576}
|
|
{"current_steps": 28120, "total_steps": 78105, "loss": 0.2865, "lr": 4.039079338266033e-06, "epoch": 1.8001408360540299, "percentage": 36.0, "elapsed_time": "1:14:20", "remaining_time": "2:12:09", "throughput": 19839.38, "total_tokens": 88497024}
|
|
{"current_steps": 28125, "total_steps": 78105, "loss": 0.3221, "lr": 4.038639062804889e-06, "epoch": 1.8004609179950068, "percentage": 36.01, "elapsed_time": "1:14:21", "remaining_time": "2:12:08", "throughput": 19839.86, "total_tokens": 88512448}
|
|
{"current_steps": 28130, "total_steps": 78105, "loss": 0.2948, "lr": 4.038198710512126e-06, "epoch": 1.8007809999359836, "percentage": 36.02, "elapsed_time": "1:14:22", "remaining_time": "2:12:07", "throughput": 19840.37, "total_tokens": 88528192}
|
|
{"current_steps": 28135, "total_steps": 78105, "loss": 0.2623, "lr": 4.0377582814097305e-06, "epoch": 1.8011010818769604, "percentage": 36.02, "elapsed_time": "1:14:22", "remaining_time": "2:12:06", "throughput": 19840.83, "total_tokens": 88543424}
|
|
{"current_steps": 28140, "total_steps": 78105, "loss": 0.2644, "lr": 4.037317775519697e-06, "epoch": 1.8014211638179374, "percentage": 36.03, "elapsed_time": "1:14:23", "remaining_time": "2:12:05", "throughput": 19841.31, "total_tokens": 88558784}
|
|
{"current_steps": 28145, "total_steps": 78105, "loss": 0.2883, "lr": 4.036877192864021e-06, "epoch": 1.8017412457589144, "percentage": 36.03, "elapsed_time": "1:14:23", "remaining_time": "2:12:04", "throughput": 19841.71, "total_tokens": 88573376}
|
|
{"current_steps": 28150, "total_steps": 78105, "loss": 0.3588, "lr": 4.036436533464703e-06, "epoch": 1.8020613276998911, "percentage": 36.04, "elapsed_time": "1:14:24", "remaining_time": "2:12:03", "throughput": 19842.24, "total_tokens": 88589504}
|
|
{"current_steps": 28155, "total_steps": 78105, "loss": 0.4704, "lr": 4.035995797343748e-06, "epoch": 1.802381409640868, "percentage": 36.05, "elapsed_time": "1:14:25", "remaining_time": "2:12:02", "throughput": 19842.74, "total_tokens": 88605184}
|
|
{"current_steps": 28160, "total_steps": 78105, "loss": 0.3376, "lr": 4.035554984523163e-06, "epoch": 1.802701491581845, "percentage": 36.05, "elapsed_time": "1:14:26", "remaining_time": "2:12:01", "throughput": 19843.27, "total_tokens": 88620928}
|
|
{"current_steps": 28165, "total_steps": 78105, "loss": 0.4225, "lr": 4.035114095024963e-06, "epoch": 1.8030215735228219, "percentage": 36.06, "elapsed_time": "1:14:26", "remaining_time": "2:12:00", "throughput": 19843.91, "total_tokens": 88637568}
|
|
{"current_steps": 28170, "total_steps": 78105, "loss": 0.257, "lr": 4.034673128871159e-06, "epoch": 1.8033416554637989, "percentage": 36.07, "elapsed_time": "1:14:27", "remaining_time": "2:11:59", "throughput": 19844.7, "total_tokens": 88656128}
|
|
{"current_steps": 28175, "total_steps": 78105, "loss": 0.4035, "lr": 4.034232086083772e-06, "epoch": 1.8036617374047756, "percentage": 36.07, "elapsed_time": "1:14:28", "remaining_time": "2:11:58", "throughput": 19845.19, "total_tokens": 88671360}
|
|
{"current_steps": 28180, "total_steps": 78105, "loss": 0.3485, "lr": 4.033790966684828e-06, "epoch": 1.8039818193457524, "percentage": 36.08, "elapsed_time": "1:14:28", "remaining_time": "2:11:57", "throughput": 19845.67, "total_tokens": 88686784}
|
|
{"current_steps": 28185, "total_steps": 78105, "loss": 0.3891, "lr": 4.033349770696351e-06, "epoch": 1.8043019012867294, "percentage": 36.09, "elapsed_time": "1:14:29", "remaining_time": "2:11:56", "throughput": 19846.26, "total_tokens": 88703616}
|
|
{"current_steps": 28190, "total_steps": 78105, "loss": 0.451, "lr": 4.032908498140373e-06, "epoch": 1.8046219832277064, "percentage": 36.09, "elapsed_time": "1:14:30", "remaining_time": "2:11:55", "throughput": 19846.74, "total_tokens": 88719168}
|
|
{"current_steps": 28195, "total_steps": 78105, "loss": 0.3382, "lr": 4.0324671490389306e-06, "epoch": 1.8049420651686832, "percentage": 36.1, "elapsed_time": "1:14:30", "remaining_time": "2:11:54", "throughput": 19847.17, "total_tokens": 88734080}
|
|
{"current_steps": 28200, "total_steps": 78105, "loss": 0.2028, "lr": 4.03202572341406e-06, "epoch": 1.80526214710966, "percentage": 36.11, "elapsed_time": "1:14:31", "remaining_time": "2:11:53", "throughput": 19847.7, "total_tokens": 88750464}
|
|
{"current_steps": 28205, "total_steps": 78105, "loss": 0.4104, "lr": 4.031584221287806e-06, "epoch": 1.805582229050637, "percentage": 36.11, "elapsed_time": "1:14:32", "remaining_time": "2:11:52", "throughput": 19848.32, "total_tokens": 88767616}
|
|
{"current_steps": 28210, "total_steps": 78105, "loss": 0.2934, "lr": 4.031142642682213e-06, "epoch": 1.805902310991614, "percentage": 36.12, "elapsed_time": "1:14:32", "remaining_time": "2:11:51", "throughput": 19848.84, "total_tokens": 88783552}
|
|
{"current_steps": 28215, "total_steps": 78105, "loss": 0.4233, "lr": 4.030700987619332e-06, "epoch": 1.806222392932591, "percentage": 36.12, "elapsed_time": "1:14:33", "remaining_time": "2:11:50", "throughput": 19849.27, "total_tokens": 88798464}
|
|
{"current_steps": 28220, "total_steps": 78105, "loss": 0.2509, "lr": 4.030259256121217e-06, "epoch": 1.8065424748735677, "percentage": 36.13, "elapsed_time": "1:14:34", "remaining_time": "2:11:49", "throughput": 19849.68, "total_tokens": 88813376}
|
|
{"current_steps": 28225, "total_steps": 78105, "loss": 0.3985, "lr": 4.029817448209926e-06, "epoch": 1.8068625568145444, "percentage": 36.14, "elapsed_time": "1:14:34", "remaining_time": "2:11:48", "throughput": 19850.19, "total_tokens": 88829184}
|
|
{"current_steps": 28230, "total_steps": 78105, "loss": 0.2694, "lr": 4.029375563907519e-06, "epoch": 1.8071826387555214, "percentage": 36.14, "elapsed_time": "1:14:35", "remaining_time": "2:11:47", "throughput": 19850.64, "total_tokens": 88844160}
|
|
{"current_steps": 28235, "total_steps": 78105, "loss": 0.2542, "lr": 4.0289336032360635e-06, "epoch": 1.8075027206964984, "percentage": 36.15, "elapsed_time": "1:14:36", "remaining_time": "2:11:46", "throughput": 19851.21, "total_tokens": 88860288}
|
|
{"current_steps": 28240, "total_steps": 78105, "loss": 0.2856, "lr": 4.028491566217626e-06, "epoch": 1.8078228026374752, "percentage": 36.16, "elapsed_time": "1:14:36", "remaining_time": "2:11:45", "throughput": 19851.65, "total_tokens": 88875264}
|
|
{"current_steps": 28245, "total_steps": 78105, "loss": 0.4487, "lr": 4.028049452874283e-06, "epoch": 1.808142884578452, "percentage": 36.16, "elapsed_time": "1:14:37", "remaining_time": "2:11:44", "throughput": 19852.12, "total_tokens": 88890816}
|
|
{"current_steps": 28250, "total_steps": 78105, "loss": 0.2714, "lr": 4.027607263228109e-06, "epoch": 1.808462966519429, "percentage": 36.17, "elapsed_time": "1:14:38", "remaining_time": "2:11:43", "throughput": 19852.65, "total_tokens": 88906816}
|
|
{"current_steps": 28255, "total_steps": 78105, "loss": 0.2811, "lr": 4.0271649973011864e-06, "epoch": 1.808783048460406, "percentage": 36.18, "elapsed_time": "1:14:39", "remaining_time": "2:11:42", "throughput": 19852.96, "total_tokens": 88922496}
|
|
{"current_steps": 28260, "total_steps": 78105, "loss": 0.4157, "lr": 4.026722655115598e-06, "epoch": 1.8091031304013827, "percentage": 36.18, "elapsed_time": "1:14:39", "remaining_time": "2:11:41", "throughput": 19853.54, "total_tokens": 88939072}
|
|
{"current_steps": 28265, "total_steps": 78105, "loss": 0.2673, "lr": 4.026280236693433e-06, "epoch": 1.8094232123423597, "percentage": 36.19, "elapsed_time": "1:14:40", "remaining_time": "2:11:40", "throughput": 19854.02, "total_tokens": 88954432}
|
|
{"current_steps": 28270, "total_steps": 78105, "loss": 0.3804, "lr": 4.025837742056782e-06, "epoch": 1.8097432942833365, "percentage": 36.19, "elapsed_time": "1:14:41", "remaining_time": "2:11:39", "throughput": 19854.46, "total_tokens": 88969408}
|
|
{"current_steps": 28275, "total_steps": 78105, "loss": 0.3138, "lr": 4.025395171227742e-06, "epoch": 1.8100633762243135, "percentage": 36.2, "elapsed_time": "1:14:41", "remaining_time": "2:11:38", "throughput": 19854.98, "total_tokens": 88985280}
|
|
{"current_steps": 28280, "total_steps": 78105, "loss": 0.1757, "lr": 4.024952524228413e-06, "epoch": 1.8103834581652904, "percentage": 36.21, "elapsed_time": "1:14:42", "remaining_time": "2:11:37", "throughput": 19855.58, "total_tokens": 89001664}
|
|
{"current_steps": 28285, "total_steps": 78105, "loss": 0.2732, "lr": 4.024509801080899e-06, "epoch": 1.8107035401062672, "percentage": 36.21, "elapsed_time": "1:14:43", "remaining_time": "2:11:36", "throughput": 19855.98, "total_tokens": 89016320}
|
|
{"current_steps": 28290, "total_steps": 78105, "loss": 0.384, "lr": 4.024067001807305e-06, "epoch": 1.811023622047244, "percentage": 36.22, "elapsed_time": "1:14:43", "remaining_time": "2:11:35", "throughput": 19856.48, "total_tokens": 89032000}
|
|
{"current_steps": 28295, "total_steps": 78105, "loss": 0.2746, "lr": 4.023624126429743e-06, "epoch": 1.811343703988221, "percentage": 36.23, "elapsed_time": "1:14:44", "remaining_time": "2:11:34", "throughput": 19856.93, "total_tokens": 89047104}
|
|
{"current_steps": 28300, "total_steps": 78105, "loss": 0.3309, "lr": 4.02318117497033e-06, "epoch": 1.811663785929198, "percentage": 36.23, "elapsed_time": "1:14:45", "remaining_time": "2:11:33", "throughput": 19857.49, "total_tokens": 89063552}
|
|
{"current_steps": 28305, "total_steps": 78105, "loss": 0.2325, "lr": 4.022738147451183e-06, "epoch": 1.8119838678701747, "percentage": 36.24, "elapsed_time": "1:14:45", "remaining_time": "2:11:32", "throughput": 19858.08, "total_tokens": 89080320}
|
|
{"current_steps": 28310, "total_steps": 78105, "loss": 0.453, "lr": 4.022295043894424e-06, "epoch": 1.8123039498111515, "percentage": 36.25, "elapsed_time": "1:14:46", "remaining_time": "2:11:31", "throughput": 19858.77, "total_tokens": 89098112}
|
|
{"current_steps": 28315, "total_steps": 78105, "loss": 0.263, "lr": 4.02185186432218e-06, "epoch": 1.8126240317521285, "percentage": 36.25, "elapsed_time": "1:14:47", "remaining_time": "2:11:30", "throughput": 19859.2, "total_tokens": 89113088}
|
|
{"current_steps": 28320, "total_steps": 78105, "loss": 0.3333, "lr": 4.021408608756581e-06, "epoch": 1.8129441136931055, "percentage": 36.26, "elapsed_time": "1:14:47", "remaining_time": "2:11:29", "throughput": 19859.62, "total_tokens": 89128192}
|
|
{"current_steps": 28325, "total_steps": 78105, "loss": 0.3234, "lr": 4.020965277219761e-06, "epoch": 1.8132641956340825, "percentage": 36.27, "elapsed_time": "1:14:48", "remaining_time": "2:11:28", "throughput": 19860.15, "total_tokens": 89144256}
|
|
{"current_steps": 28330, "total_steps": 78105, "loss": 0.3372, "lr": 4.020521869733858e-06, "epoch": 1.8135842775750592, "percentage": 36.27, "elapsed_time": "1:14:49", "remaining_time": "2:11:27", "throughput": 19860.63, "total_tokens": 89159744}
|
|
{"current_steps": 28335, "total_steps": 78105, "loss": 0.3835, "lr": 4.020078386321011e-06, "epoch": 1.813904359516036, "percentage": 36.28, "elapsed_time": "1:14:49", "remaining_time": "2:11:26", "throughput": 19861.1, "total_tokens": 89175424}
|
|
{"current_steps": 28340, "total_steps": 78105, "loss": 0.4467, "lr": 4.019634827003369e-06, "epoch": 1.814224441457013, "percentage": 36.28, "elapsed_time": "1:14:50", "remaining_time": "2:11:25", "throughput": 19861.65, "total_tokens": 89191552}
|
|
{"current_steps": 28345, "total_steps": 78105, "loss": 0.3148, "lr": 4.019191191803078e-06, "epoch": 1.81454452339799, "percentage": 36.29, "elapsed_time": "1:14:51", "remaining_time": "2:11:24", "throughput": 19862.25, "total_tokens": 89208384}
|
|
{"current_steps": 28350, "total_steps": 78105, "loss": 0.3173, "lr": 4.0187474807422935e-06, "epoch": 1.8148646053389668, "percentage": 36.3, "elapsed_time": "1:14:52", "remaining_time": "2:11:23", "throughput": 19862.71, "total_tokens": 89223488}
|
|
{"current_steps": 28355, "total_steps": 78105, "loss": 0.2967, "lr": 4.0183036938431695e-06, "epoch": 1.8151846872799435, "percentage": 36.3, "elapsed_time": "1:14:52", "remaining_time": "2:11:22", "throughput": 19863.17, "total_tokens": 89238528}
|
|
{"current_steps": 28360, "total_steps": 78105, "loss": 0.3506, "lr": 4.017859831127868e-06, "epoch": 1.8155047692209205, "percentage": 36.31, "elapsed_time": "1:14:53", "remaining_time": "2:11:21", "throughput": 19863.7, "total_tokens": 89254592}
|
|
{"current_steps": 28365, "total_steps": 78105, "loss": 0.2924, "lr": 4.017415892618552e-06, "epoch": 1.8158248511618975, "percentage": 36.32, "elapsed_time": "1:14:54", "remaining_time": "2:11:20", "throughput": 19864.19, "total_tokens": 89269824}
|
|
{"current_steps": 28370, "total_steps": 78105, "loss": 0.3246, "lr": 4.0169718783373915e-06, "epoch": 1.8161449331028743, "percentage": 36.32, "elapsed_time": "1:14:54", "remaining_time": "2:11:19", "throughput": 19864.74, "total_tokens": 89285632}
|
|
{"current_steps": 28375, "total_steps": 78105, "loss": 0.3985, "lr": 4.0165277883065565e-06, "epoch": 1.8164650150438513, "percentage": 36.33, "elapsed_time": "1:14:55", "remaining_time": "2:11:18", "throughput": 19865.24, "total_tokens": 89301248}
|
|
{"current_steps": 28380, "total_steps": 78105, "loss": 0.4508, "lr": 4.016083622548222e-06, "epoch": 1.816785096984828, "percentage": 36.34, "elapsed_time": "1:14:56", "remaining_time": "2:11:17", "throughput": 19865.81, "total_tokens": 89317504}
|
|
{"current_steps": 28385, "total_steps": 78105, "loss": 0.457, "lr": 4.01563938108457e-06, "epoch": 1.817105178925805, "percentage": 36.34, "elapsed_time": "1:14:56", "remaining_time": "2:11:16", "throughput": 19866.34, "total_tokens": 89333632}
|
|
{"current_steps": 28390, "total_steps": 78105, "loss": 0.2177, "lr": 4.015195063937781e-06, "epoch": 1.817425260866782, "percentage": 36.35, "elapsed_time": "1:14:57", "remaining_time": "2:11:15", "throughput": 19866.8, "total_tokens": 89349184}
|
|
{"current_steps": 28395, "total_steps": 78105, "loss": 0.3487, "lr": 4.014750671130044e-06, "epoch": 1.8177453428077588, "percentage": 36.35, "elapsed_time": "1:14:58", "remaining_time": "2:11:14", "throughput": 19867.35, "total_tokens": 89365248}
|
|
{"current_steps": 28400, "total_steps": 78105, "loss": 0.2793, "lr": 4.014306202683548e-06, "epoch": 1.8180654247487356, "percentage": 36.36, "elapsed_time": "1:14:58", "remaining_time": "2:11:13", "throughput": 19867.87, "total_tokens": 89381056}
|
|
{"current_steps": 28405, "total_steps": 78105, "loss": 0.2725, "lr": 4.013861658620487e-06, "epoch": 1.8183855066897125, "percentage": 36.37, "elapsed_time": "1:14:59", "remaining_time": "2:11:12", "throughput": 19868.39, "total_tokens": 89396800}
|
|
{"current_steps": 28410, "total_steps": 78105, "loss": 0.403, "lr": 4.013417038963061e-06, "epoch": 1.8187055886306895, "percentage": 36.37, "elapsed_time": "1:15:00", "remaining_time": "2:11:11", "throughput": 19868.91, "total_tokens": 89412992}
|
|
{"current_steps": 28415, "total_steps": 78105, "loss": 0.2654, "lr": 4.012972343733471e-06, "epoch": 1.8190256705716663, "percentage": 36.38, "elapsed_time": "1:15:00", "remaining_time": "2:11:10", "throughput": 19869.45, "total_tokens": 89429184}
|
|
{"current_steps": 28420, "total_steps": 78105, "loss": 0.2509, "lr": 4.012527572953923e-06, "epoch": 1.819345752512643, "percentage": 36.39, "elapsed_time": "1:15:01", "remaining_time": "2:11:09", "throughput": 19870.0, "total_tokens": 89445312}
|
|
{"current_steps": 28425, "total_steps": 78105, "loss": 0.3218, "lr": 4.012082726646627e-06, "epoch": 1.81966583445362, "percentage": 36.39, "elapsed_time": "1:15:02", "remaining_time": "2:11:08", "throughput": 19870.46, "total_tokens": 89460160}
|
|
{"current_steps": 28430, "total_steps": 78105, "loss": 0.333, "lr": 4.011637804833795e-06, "epoch": 1.819985916394597, "percentage": 36.4, "elapsed_time": "1:15:02", "remaining_time": "2:11:07", "throughput": 19870.9, "total_tokens": 89475200}
|
|
{"current_steps": 28435, "total_steps": 78105, "loss": 0.4607, "lr": 4.011192807537645e-06, "epoch": 1.820305998335574, "percentage": 36.41, "elapsed_time": "1:15:03", "remaining_time": "2:11:06", "throughput": 19871.39, "total_tokens": 89490944}
|
|
{"current_steps": 28440, "total_steps": 78105, "loss": 0.3291, "lr": 4.010747734780398e-06, "epoch": 1.8206260802765508, "percentage": 36.41, "elapsed_time": "1:15:04", "remaining_time": "2:11:05", "throughput": 19871.8, "total_tokens": 89505600}
|
|
{"current_steps": 28445, "total_steps": 78105, "loss": 0.348, "lr": 4.0103025865842785e-06, "epoch": 1.8209461622175276, "percentage": 36.42, "elapsed_time": "1:15:04", "remaining_time": "2:11:04", "throughput": 19872.23, "total_tokens": 89520704}
|
|
{"current_steps": 28450, "total_steps": 78105, "loss": 0.3568, "lr": 4.009857362971514e-06, "epoch": 1.8212662441585046, "percentage": 36.43, "elapsed_time": "1:15:05", "remaining_time": "2:11:03", "throughput": 19872.78, "total_tokens": 89537152}
|
|
{"current_steps": 28455, "total_steps": 78105, "loss": 0.31, "lr": 4.009412063964338e-06, "epoch": 1.8215863260994816, "percentage": 36.43, "elapsed_time": "1:15:06", "remaining_time": "2:11:02", "throughput": 19873.55, "total_tokens": 89556032}
|
|
{"current_steps": 28460, "total_steps": 78105, "loss": 0.2255, "lr": 4.008966689584985e-06, "epoch": 1.8219064080404583, "percentage": 36.44, "elapsed_time": "1:15:06", "remaining_time": "2:11:01", "throughput": 19873.96, "total_tokens": 89570880}
|
|
{"current_steps": 28465, "total_steps": 78105, "loss": 0.3559, "lr": 4.008521239855697e-06, "epoch": 1.822226489981435, "percentage": 36.44, "elapsed_time": "1:15:07", "remaining_time": "2:11:00", "throughput": 19874.43, "total_tokens": 89586048}
|
|
{"current_steps": 28470, "total_steps": 78105, "loss": 0.277, "lr": 4.0080757147987135e-06, "epoch": 1.822546571922412, "percentage": 36.45, "elapsed_time": "1:15:08", "remaining_time": "2:10:59", "throughput": 19874.98, "total_tokens": 89602240}
|
|
{"current_steps": 28475, "total_steps": 78105, "loss": 0.3515, "lr": 4.0076301144362865e-06, "epoch": 1.822866653863389, "percentage": 36.46, "elapsed_time": "1:15:08", "remaining_time": "2:10:58", "throughput": 19875.45, "total_tokens": 89617600}
|
|
{"current_steps": 28480, "total_steps": 78105, "loss": 0.371, "lr": 4.007184438790663e-06, "epoch": 1.823186735804366, "percentage": 36.46, "elapsed_time": "1:15:09", "remaining_time": "2:10:57", "throughput": 19875.95, "total_tokens": 89633344}
|
|
{"current_steps": 28485, "total_steps": 78105, "loss": 0.2706, "lr": 4.0067386878840995e-06, "epoch": 1.8235068177453428, "percentage": 36.47, "elapsed_time": "1:15:10", "remaining_time": "2:10:56", "throughput": 19876.43, "total_tokens": 89648832}
|
|
{"current_steps": 28490, "total_steps": 78105, "loss": 0.3214, "lr": 4.006292861738855e-06, "epoch": 1.8238268996863196, "percentage": 36.48, "elapsed_time": "1:15:10", "remaining_time": "2:10:55", "throughput": 19876.92, "total_tokens": 89664256}
|
|
{"current_steps": 28495, "total_steps": 78105, "loss": 0.3532, "lr": 4.00584696037719e-06, "epoch": 1.8241469816272966, "percentage": 36.48, "elapsed_time": "1:15:11", "remaining_time": "2:10:54", "throughput": 19877.38, "total_tokens": 89679488}
|
|
{"current_steps": 28500, "total_steps": 78105, "loss": 0.4889, "lr": 4.005400983821373e-06, "epoch": 1.8244670635682736, "percentage": 36.49, "elapsed_time": "1:15:12", "remaining_time": "2:10:53", "throughput": 19877.87, "total_tokens": 89695488}
|
|
{"current_steps": 28505, "total_steps": 78105, "loss": 0.3353, "lr": 4.00495493209367e-06, "epoch": 1.8247871455092504, "percentage": 36.5, "elapsed_time": "1:15:12", "remaining_time": "2:10:52", "throughput": 19878.29, "total_tokens": 89710592}
|
|
{"current_steps": 28510, "total_steps": 78105, "loss": 0.2648, "lr": 4.0045088052163585e-06, "epoch": 1.8251072274502271, "percentage": 36.5, "elapsed_time": "1:15:13", "remaining_time": "2:10:51", "throughput": 19878.74, "total_tokens": 89725760}
|
|
{"current_steps": 28515, "total_steps": 78105, "loss": 0.3814, "lr": 4.004062603211714e-06, "epoch": 1.8254273093912041, "percentage": 36.51, "elapsed_time": "1:15:14", "remaining_time": "2:10:50", "throughput": 19879.29, "total_tokens": 89741696}
|
|
{"current_steps": 28520, "total_steps": 78105, "loss": 0.2767, "lr": 4.003616326102015e-06, "epoch": 1.8257473913321811, "percentage": 36.51, "elapsed_time": "1:15:15", "remaining_time": "2:10:49", "throughput": 19879.79, "total_tokens": 89757376}
|
|
{"current_steps": 28525, "total_steps": 78105, "loss": 0.4397, "lr": 4.003169973909551e-06, "epoch": 1.8260674732731579, "percentage": 36.52, "elapsed_time": "1:15:15", "remaining_time": "2:10:48", "throughput": 19880.44, "total_tokens": 89774592}
|
|
{"current_steps": 28530, "total_steps": 78105, "loss": 0.2701, "lr": 4.002723546656608e-06, "epoch": 1.8263875552141349, "percentage": 36.53, "elapsed_time": "1:15:16", "remaining_time": "2:10:47", "throughput": 19880.97, "total_tokens": 89790784}
|
|
{"current_steps": 28535, "total_steps": 78105, "loss": 0.2188, "lr": 4.002277044365478e-06, "epoch": 1.8267076371551116, "percentage": 36.53, "elapsed_time": "1:15:17", "remaining_time": "2:10:46", "throughput": 19881.45, "total_tokens": 89806208}
|
|
{"current_steps": 28540, "total_steps": 78105, "loss": 0.2906, "lr": 4.001830467058458e-06, "epoch": 1.8270277190960886, "percentage": 36.54, "elapsed_time": "1:15:17", "remaining_time": "2:10:45", "throughput": 19882.03, "total_tokens": 89822592}
|
|
{"current_steps": 28545, "total_steps": 78105, "loss": 0.494, "lr": 4.001383814757847e-06, "epoch": 1.8273478010370656, "percentage": 36.55, "elapsed_time": "1:15:18", "remaining_time": "2:10:44", "throughput": 19882.58, "total_tokens": 89838656}
|
|
{"current_steps": 28550, "total_steps": 78105, "loss": 0.2168, "lr": 4.000937087485948e-06, "epoch": 1.8276678829780424, "percentage": 36.55, "elapsed_time": "1:15:19", "remaining_time": "2:10:43", "throughput": 19883.08, "total_tokens": 89854272}
|
|
{"current_steps": 28555, "total_steps": 78105, "loss": 0.3996, "lr": 4.00049028526507e-06, "epoch": 1.8279879649190192, "percentage": 36.56, "elapsed_time": "1:15:19", "remaining_time": "2:10:42", "throughput": 19883.53, "total_tokens": 89869504}
|
|
{"current_steps": 28560, "total_steps": 78105, "loss": 0.3058, "lr": 4.000043408117523e-06, "epoch": 1.8283080468599961, "percentage": 36.57, "elapsed_time": "1:15:20", "remaining_time": "2:10:41", "throughput": 19884.03, "total_tokens": 89884928}
|
|
{"current_steps": 28565, "total_steps": 78105, "loss": 0.3295, "lr": 3.999596456065621e-06, "epoch": 1.8286281288009731, "percentage": 36.57, "elapsed_time": "1:15:21", "remaining_time": "2:10:40", "throughput": 19884.54, "total_tokens": 89901056}
|
|
{"current_steps": 28570, "total_steps": 78105, "loss": 0.371, "lr": 3.999149429131683e-06, "epoch": 1.82894821074195, "percentage": 36.58, "elapsed_time": "1:15:21", "remaining_time": "2:10:40", "throughput": 19885.16, "total_tokens": 89917952}
|
|
{"current_steps": 28575, "total_steps": 78105, "loss": 0.3834, "lr": 3.998702327338031e-06, "epoch": 1.8292682926829267, "percentage": 36.59, "elapsed_time": "1:15:22", "remaining_time": "2:10:39", "throughput": 19885.64, "total_tokens": 89932992}
|
|
{"current_steps": 28580, "total_steps": 78105, "loss": 0.31, "lr": 3.998255150706993e-06, "epoch": 1.8295883746239037, "percentage": 36.59, "elapsed_time": "1:15:23", "remaining_time": "2:10:37", "throughput": 19886.09, "total_tokens": 89948096}
|
|
{"current_steps": 28585, "total_steps": 78105, "loss": 0.2606, "lr": 3.9978078992608955e-06, "epoch": 1.8299084565648807, "percentage": 36.6, "elapsed_time": "1:15:23", "remaining_time": "2:10:36", "throughput": 19886.62, "total_tokens": 89963776}
|
|
{"current_steps": 28590, "total_steps": 78105, "loss": 0.3278, "lr": 3.997360573022073e-06, "epoch": 1.8302285385058576, "percentage": 36.6, "elapsed_time": "1:15:24", "remaining_time": "2:10:35", "throughput": 19887.03, "total_tokens": 89978368}
|
|
{"current_steps": 28595, "total_steps": 78105, "loss": 0.2976, "lr": 3.996913172012864e-06, "epoch": 1.8305486204468344, "percentage": 36.61, "elapsed_time": "1:15:25", "remaining_time": "2:10:34", "throughput": 19887.48, "total_tokens": 89993792}
|
|
{"current_steps": 28600, "total_steps": 78105, "loss": 0.3669, "lr": 3.996465696255607e-06, "epoch": 1.8308687023878112, "percentage": 36.62, "elapsed_time": "1:15:25", "remaining_time": "2:10:33", "throughput": 19887.96, "total_tokens": 90009088}
|
|
{"current_steps": 28605, "total_steps": 78105, "loss": 0.3186, "lr": 3.996018145772649e-06, "epoch": 1.8311887843287882, "percentage": 36.62, "elapsed_time": "1:15:26", "remaining_time": "2:10:32", "throughput": 19888.44, "total_tokens": 90024576}
|
|
{"current_steps": 28610, "total_steps": 78105, "loss": 0.385, "lr": 3.9955705205863375e-06, "epoch": 1.8315088662697652, "percentage": 36.63, "elapsed_time": "1:15:27", "remaining_time": "2:10:31", "throughput": 19888.96, "total_tokens": 90040064}
|
|
{"current_steps": 28615, "total_steps": 78105, "loss": 0.3319, "lr": 3.995122820719023e-06, "epoch": 1.831828948210742, "percentage": 36.64, "elapsed_time": "1:15:27", "remaining_time": "2:10:30", "throughput": 19889.5, "total_tokens": 90056384}
|
|
{"current_steps": 28620, "total_steps": 78105, "loss": 0.334, "lr": 3.994675046193064e-06, "epoch": 1.8321490301517187, "percentage": 36.64, "elapsed_time": "1:15:28", "remaining_time": "2:10:29", "throughput": 19890.02, "total_tokens": 90072192}
|
|
{"current_steps": 28625, "total_steps": 78105, "loss": 0.3245, "lr": 3.994227197030818e-06, "epoch": 1.8324691120926957, "percentage": 36.65, "elapsed_time": "1:15:29", "remaining_time": "2:10:28", "throughput": 19890.53, "total_tokens": 90087936}
|
|
{"current_steps": 28630, "total_steps": 78105, "loss": 0.3607, "lr": 3.993779273254649e-06, "epoch": 1.8327891940336727, "percentage": 36.66, "elapsed_time": "1:15:29", "remaining_time": "2:10:27", "throughput": 19891.04, "total_tokens": 90103616}
|
|
{"current_steps": 28635, "total_steps": 78105, "loss": 0.2726, "lr": 3.993331274886923e-06, "epoch": 1.8331092759746497, "percentage": 36.66, "elapsed_time": "1:15:30", "remaining_time": "2:10:26", "throughput": 19891.5, "total_tokens": 90118656}
|
|
{"current_steps": 28640, "total_steps": 78105, "loss": 0.2995, "lr": 3.992883201950013e-06, "epoch": 1.8334293579156264, "percentage": 36.67, "elapsed_time": "1:15:31", "remaining_time": "2:10:26", "throughput": 19892.2, "total_tokens": 90136384}
|
|
{"current_steps": 28645, "total_steps": 78105, "loss": 0.3335, "lr": 3.992435054466291e-06, "epoch": 1.8337494398566032, "percentage": 36.67, "elapsed_time": "1:15:31", "remaining_time": "2:10:25", "throughput": 19892.62, "total_tokens": 90151424}
|
|
{"current_steps": 28650, "total_steps": 78105, "loss": 0.4017, "lr": 3.991986832458138e-06, "epoch": 1.8340695217975802, "percentage": 36.68, "elapsed_time": "1:15:32", "remaining_time": "2:10:24", "throughput": 19893.14, "total_tokens": 90167360}
|
|
{"current_steps": 28655, "total_steps": 78105, "loss": 0.4695, "lr": 3.991538535947933e-06, "epoch": 1.8343896037385572, "percentage": 36.69, "elapsed_time": "1:15:33", "remaining_time": "2:10:23", "throughput": 19893.67, "total_tokens": 90183296}
|
|
{"current_steps": 28660, "total_steps": 78105, "loss": 0.2858, "lr": 3.991090164958062e-06, "epoch": 1.834709685679534, "percentage": 36.69, "elapsed_time": "1:15:33", "remaining_time": "2:10:22", "throughput": 19894.16, "total_tokens": 90199040}
|
|
{"current_steps": 28665, "total_steps": 78105, "loss": 0.4025, "lr": 3.990641719510916e-06, "epoch": 1.8350297676205107, "percentage": 36.7, "elapsed_time": "1:15:34", "remaining_time": "2:10:21", "throughput": 19894.64, "total_tokens": 90214656}
|
|
{"current_steps": 28670, "total_steps": 78105, "loss": 0.3675, "lr": 3.990193199628886e-06, "epoch": 1.8353498495614877, "percentage": 36.71, "elapsed_time": "1:15:35", "remaining_time": "2:10:20", "throughput": 19895.13, "total_tokens": 90230336}
|
|
{"current_steps": 28675, "total_steps": 78105, "loss": 0.2516, "lr": 3.9897446053343705e-06, "epoch": 1.8356699315024647, "percentage": 36.71, "elapsed_time": "1:15:35", "remaining_time": "2:10:19", "throughput": 19895.58, "total_tokens": 90245248}
|
|
{"current_steps": 28680, "total_steps": 78105, "loss": 0.3023, "lr": 3.989295936649769e-06, "epoch": 1.8359900134434415, "percentage": 36.72, "elapsed_time": "1:15:36", "remaining_time": "2:10:18", "throughput": 19896.06, "total_tokens": 90260608}
|
|
{"current_steps": 28685, "total_steps": 78105, "loss": 0.3535, "lr": 3.988847193597486e-06, "epoch": 1.8363100953844183, "percentage": 36.73, "elapsed_time": "1:15:37", "remaining_time": "2:10:17", "throughput": 19896.59, "total_tokens": 90276672}
|
|
{"current_steps": 28690, "total_steps": 78105, "loss": 0.5439, "lr": 3.988398376199929e-06, "epoch": 1.8366301773253952, "percentage": 36.73, "elapsed_time": "1:15:37", "remaining_time": "2:10:16", "throughput": 19897.03, "total_tokens": 90291840}
|
|
{"current_steps": 28695, "total_steps": 78105, "loss": 0.2631, "lr": 3.9879494844795095e-06, "epoch": 1.8369502592663722, "percentage": 36.74, "elapsed_time": "1:15:38", "remaining_time": "2:10:15", "throughput": 19897.59, "total_tokens": 90308352}
|
|
{"current_steps": 28700, "total_steps": 78105, "loss": 0.2444, "lr": 3.987500518458644e-06, "epoch": 1.8372703412073492, "percentage": 36.75, "elapsed_time": "1:15:39", "remaining_time": "2:10:14", "throughput": 19898.07, "total_tokens": 90323840}
|
|
{"current_steps": 28705, "total_steps": 78105, "loss": 0.3744, "lr": 3.98705147815975e-06, "epoch": 1.837590423148326, "percentage": 36.75, "elapsed_time": "1:15:40", "remaining_time": "2:10:13", "throughput": 19898.62, "total_tokens": 90339968}
|
|
{"current_steps": 28710, "total_steps": 78105, "loss": 0.359, "lr": 3.986602363605251e-06, "epoch": 1.8379105050893028, "percentage": 36.76, "elapsed_time": "1:15:40", "remaining_time": "2:10:12", "throughput": 19899.12, "total_tokens": 90355328}
|
|
{"current_steps": 28715, "total_steps": 78105, "loss": 0.2048, "lr": 3.986153174817574e-06, "epoch": 1.8382305870302798, "percentage": 36.76, "elapsed_time": "1:15:41", "remaining_time": "2:10:11", "throughput": 19899.56, "total_tokens": 90370240}
|
|
{"current_steps": 28720, "total_steps": 78105, "loss": 0.3128, "lr": 3.985703911819149e-06, "epoch": 1.8385506689712567, "percentage": 36.77, "elapsed_time": "1:15:42", "remaining_time": "2:10:10", "throughput": 19900.37, "total_tokens": 90389696}
|
|
{"current_steps": 28725, "total_steps": 78105, "loss": 0.3714, "lr": 3.985254574632408e-06, "epoch": 1.8388707509122335, "percentage": 36.78, "elapsed_time": "1:15:42", "remaining_time": "2:10:09", "throughput": 19900.82, "total_tokens": 90405184}
|
|
{"current_steps": 28730, "total_steps": 78105, "loss": 0.3571, "lr": 3.984805163279791e-06, "epoch": 1.8391908328532103, "percentage": 36.78, "elapsed_time": "1:15:43", "remaining_time": "2:10:08", "throughput": 19901.48, "total_tokens": 90422528}
|
|
{"current_steps": 28735, "total_steps": 78105, "loss": 0.2416, "lr": 3.984355677783738e-06, "epoch": 1.8395109147941873, "percentage": 36.79, "elapsed_time": "1:15:44", "remaining_time": "2:10:07", "throughput": 19902.06, "total_tokens": 90439552}
|
|
{"current_steps": 28740, "total_steps": 78105, "loss": 0.4798, "lr": 3.983906118166694e-06, "epoch": 1.8398309967351643, "percentage": 36.8, "elapsed_time": "1:15:44", "remaining_time": "2:10:06", "throughput": 19902.47, "total_tokens": 90454336}
|
|
{"current_steps": 28745, "total_steps": 78105, "loss": 0.2643, "lr": 3.983456484451108e-06, "epoch": 1.8401510786761412, "percentage": 36.8, "elapsed_time": "1:15:45", "remaining_time": "2:10:05", "throughput": 19902.99, "total_tokens": 90470336}
|
|
{"current_steps": 28750, "total_steps": 78105, "loss": 0.3677, "lr": 3.983006776659432e-06, "epoch": 1.840471160617118, "percentage": 36.81, "elapsed_time": "1:15:46", "remaining_time": "2:10:04", "throughput": 19903.49, "total_tokens": 90486144}
|
|
{"current_steps": 28755, "total_steps": 78105, "loss": 0.3843, "lr": 3.982556994814122e-06, "epoch": 1.8407912425580948, "percentage": 36.82, "elapsed_time": "1:15:46", "remaining_time": "2:10:03", "throughput": 19903.96, "total_tokens": 90501504}
|
|
{"current_steps": 28760, "total_steps": 78105, "loss": 0.366, "lr": 3.982107138937638e-06, "epoch": 1.8411113244990718, "percentage": 36.82, "elapsed_time": "1:15:47", "remaining_time": "2:10:02", "throughput": 19904.45, "total_tokens": 90517632}
|
|
{"current_steps": 28765, "total_steps": 78105, "loss": 0.4233, "lr": 3.9816572090524445e-06, "epoch": 1.8414314064400488, "percentage": 36.83, "elapsed_time": "1:15:48", "remaining_time": "2:10:01", "throughput": 19904.97, "total_tokens": 90533440}
|
|
{"current_steps": 28770, "total_steps": 78105, "loss": 0.4106, "lr": 3.981207205181006e-06, "epoch": 1.8417514883810255, "percentage": 36.84, "elapsed_time": "1:15:48", "remaining_time": "2:10:00", "throughput": 19905.36, "total_tokens": 90547840}
|
|
{"current_steps": 28775, "total_steps": 78105, "loss": 0.3703, "lr": 3.980757127345796e-06, "epoch": 1.8420715703220023, "percentage": 36.84, "elapsed_time": "1:15:49", "remaining_time": "2:09:59", "throughput": 19905.77, "total_tokens": 90562560}
|
|
{"current_steps": 28780, "total_steps": 78105, "loss": 0.2585, "lr": 3.980306975569288e-06, "epoch": 1.8423916522629793, "percentage": 36.85, "elapsed_time": "1:15:50", "remaining_time": "2:09:58", "throughput": 19906.22, "total_tokens": 90577664}
|
|
{"current_steps": 28785, "total_steps": 78105, "loss": 0.3285, "lr": 3.9798567498739605e-06, "epoch": 1.8427117342039563, "percentage": 36.85, "elapsed_time": "1:15:50", "remaining_time": "2:09:57", "throughput": 19906.68, "total_tokens": 90593024}
|
|
{"current_steps": 28790, "total_steps": 78105, "loss": 0.3128, "lr": 3.979406450282295e-06, "epoch": 1.843031816144933, "percentage": 36.86, "elapsed_time": "1:15:51", "remaining_time": "2:09:56", "throughput": 19907.1, "total_tokens": 90607616}
|
|
{"current_steps": 28795, "total_steps": 78105, "loss": 0.2937, "lr": 3.978956076816778e-06, "epoch": 1.84335189808591, "percentage": 36.87, "elapsed_time": "1:15:52", "remaining_time": "2:09:55", "throughput": 19907.5, "total_tokens": 90622528}
|
|
{"current_steps": 28800, "total_steps": 78105, "loss": 0.5434, "lr": 3.978505629499897e-06, "epoch": 1.8436719800268868, "percentage": 36.87, "elapsed_time": "1:15:52", "remaining_time": "2:09:54", "throughput": 19907.98, "total_tokens": 90638272}
|
|
{"current_steps": 28805, "total_steps": 78105, "loss": 0.2492, "lr": 3.9780551083541465e-06, "epoch": 1.8439920619678638, "percentage": 36.88, "elapsed_time": "1:15:53", "remaining_time": "2:09:53", "throughput": 19908.49, "total_tokens": 90654080}
|
|
{"current_steps": 28810, "total_steps": 78105, "loss": 0.3155, "lr": 3.9776045134020234e-06, "epoch": 1.8443121439088408, "percentage": 36.89, "elapsed_time": "1:15:54", "remaining_time": "2:09:52", "throughput": 19908.93, "total_tokens": 90668992}
|
|
{"current_steps": 28815, "total_steps": 78105, "loss": 0.3145, "lr": 3.977153844666026e-06, "epoch": 1.8446322258498176, "percentage": 36.89, "elapsed_time": "1:15:54", "remaining_time": "2:09:51", "throughput": 19909.42, "total_tokens": 90684736}
|
|
{"current_steps": 28820, "total_steps": 78105, "loss": 0.3716, "lr": 3.976703102168661e-06, "epoch": 1.8449523077907943, "percentage": 36.9, "elapsed_time": "1:15:55", "remaining_time": "2:09:50", "throughput": 19909.91, "total_tokens": 90700416}
|
|
{"current_steps": 28825, "total_steps": 78105, "loss": 0.3012, "lr": 3.976252285932435e-06, "epoch": 1.8452723897317713, "percentage": 36.91, "elapsed_time": "1:15:56", "remaining_time": "2:09:49", "throughput": 19910.38, "total_tokens": 90715648}
|
|
{"current_steps": 28830, "total_steps": 78105, "loss": 0.3135, "lr": 3.975801395979859e-06, "epoch": 1.8455924716727483, "percentage": 36.91, "elapsed_time": "1:15:56", "remaining_time": "2:09:48", "throughput": 19910.81, "total_tokens": 90730432}
|
|
{"current_steps": 28835, "total_steps": 78105, "loss": 0.4524, "lr": 3.975350432333449e-06, "epoch": 1.845912553613725, "percentage": 36.92, "elapsed_time": "1:15:57", "remaining_time": "2:09:47", "throughput": 19911.3, "total_tokens": 90746240}
|
|
{"current_steps": 28840, "total_steps": 78105, "loss": 0.301, "lr": 3.974899395015722e-06, "epoch": 1.8462326355547019, "percentage": 36.92, "elapsed_time": "1:15:58", "remaining_time": "2:09:46", "throughput": 19911.71, "total_tokens": 90760768}
|
|
{"current_steps": 28845, "total_steps": 78105, "loss": 0.3463, "lr": 3.974448284049202e-06, "epoch": 1.8465527174956788, "percentage": 36.93, "elapsed_time": "1:15:58", "remaining_time": "2:09:45", "throughput": 19912.23, "total_tokens": 90776896}
|
|
{"current_steps": 28850, "total_steps": 78105, "loss": 0.2872, "lr": 3.973997099456416e-06, "epoch": 1.8468727994366558, "percentage": 36.94, "elapsed_time": "1:15:59", "remaining_time": "2:09:44", "throughput": 19912.71, "total_tokens": 90792512}
|
|
{"current_steps": 28855, "total_steps": 78105, "loss": 0.272, "lr": 3.973545841259892e-06, "epoch": 1.8471928813776328, "percentage": 36.94, "elapsed_time": "1:16:00", "remaining_time": "2:09:43", "throughput": 19913.29, "total_tokens": 90808960}
|
|
{"current_steps": 28860, "total_steps": 78105, "loss": 0.2062, "lr": 3.973094509482164e-06, "epoch": 1.8475129633186096, "percentage": 36.95, "elapsed_time": "1:16:00", "remaining_time": "2:09:42", "throughput": 19913.7, "total_tokens": 90823744}
|
|
{"current_steps": 28865, "total_steps": 78105, "loss": 0.3953, "lr": 3.972643104145769e-06, "epoch": 1.8478330452595864, "percentage": 36.96, "elapsed_time": "1:16:01", "remaining_time": "2:09:41", "throughput": 19914.3, "total_tokens": 90840576}
|
|
{"current_steps": 28870, "total_steps": 78105, "loss": 0.2605, "lr": 3.972191625273248e-06, "epoch": 1.8481531272005634, "percentage": 36.96, "elapsed_time": "1:16:02", "remaining_time": "2:09:40", "throughput": 19914.75, "total_tokens": 90855872}
|
|
{"current_steps": 28875, "total_steps": 78105, "loss": 0.3846, "lr": 3.971740072887146e-06, "epoch": 1.8484732091415403, "percentage": 36.97, "elapsed_time": "1:16:02", "remaining_time": "2:09:39", "throughput": 19915.18, "total_tokens": 90871040}
|
|
{"current_steps": 28880, "total_steps": 78105, "loss": 0.239, "lr": 3.971288447010011e-06, "epoch": 1.848793291082517, "percentage": 36.98, "elapsed_time": "1:16:03", "remaining_time": "2:09:38", "throughput": 19915.71, "total_tokens": 90887360}
|
|
{"current_steps": 28885, "total_steps": 78105, "loss": 0.3381, "lr": 3.970836747664394e-06, "epoch": 1.8491133730234939, "percentage": 36.98, "elapsed_time": "1:16:04", "remaining_time": "2:09:37", "throughput": 19916.14, "total_tokens": 90902464}
|
|
{"current_steps": 28890, "total_steps": 78105, "loss": 0.33, "lr": 3.970384974872851e-06, "epoch": 1.8494334549644709, "percentage": 36.99, "elapsed_time": "1:16:04", "remaining_time": "2:09:36", "throughput": 19916.52, "total_tokens": 90917056}
|
|
{"current_steps": 28895, "total_steps": 78105, "loss": 0.4705, "lr": 3.969933128657942e-06, "epoch": 1.8497535369054479, "percentage": 37.0, "elapsed_time": "1:16:05", "remaining_time": "2:09:35", "throughput": 19916.97, "total_tokens": 90932160}
|
|
{"current_steps": 28900, "total_steps": 78105, "loss": 0.3002, "lr": 3.9694812090422294e-06, "epoch": 1.8500736188464248, "percentage": 37.0, "elapsed_time": "1:16:06", "remaining_time": "2:09:34", "throughput": 19917.43, "total_tokens": 90947328}
|
|
{"current_steps": 28905, "total_steps": 78105, "loss": 0.3778, "lr": 3.969029216048279e-06, "epoch": 1.8503937007874016, "percentage": 37.01, "elapsed_time": "1:16:06", "remaining_time": "2:09:33", "throughput": 19917.93, "total_tokens": 90962880}
|
|
{"current_steps": 28910, "total_steps": 78105, "loss": 0.313, "lr": 3.968577149698661e-06, "epoch": 1.8507137827283784, "percentage": 37.01, "elapsed_time": "1:16:07", "remaining_time": "2:09:32", "throughput": 19918.44, "total_tokens": 90978944}
|
|
{"current_steps": 28915, "total_steps": 78105, "loss": 0.3712, "lr": 3.9681250100159505e-06, "epoch": 1.8510338646693554, "percentage": 37.02, "elapsed_time": "1:16:08", "remaining_time": "2:09:31", "throughput": 19918.89, "total_tokens": 90994368}
|
|
{"current_steps": 28920, "total_steps": 78105, "loss": 0.4533, "lr": 3.967672797022724e-06, "epoch": 1.8513539466103324, "percentage": 37.03, "elapsed_time": "1:16:08", "remaining_time": "2:09:30", "throughput": 19919.48, "total_tokens": 91011072}
|
|
{"current_steps": 28925, "total_steps": 78105, "loss": 0.2506, "lr": 3.967220510741562e-06, "epoch": 1.8516740285513091, "percentage": 37.03, "elapsed_time": "1:16:09", "remaining_time": "2:09:29", "throughput": 19919.96, "total_tokens": 91026688}
|
|
{"current_steps": 28930, "total_steps": 78105, "loss": 0.3414, "lr": 3.966768151195051e-06, "epoch": 1.851994110492286, "percentage": 37.04, "elapsed_time": "1:16:10", "remaining_time": "2:09:28", "throughput": 19920.41, "total_tokens": 91042176}
|
|
{"current_steps": 28935, "total_steps": 78105, "loss": 0.2907, "lr": 3.966315718405779e-06, "epoch": 1.852314192433263, "percentage": 37.05, "elapsed_time": "1:16:10", "remaining_time": "2:09:27", "throughput": 19920.88, "total_tokens": 91057600}
|
|
{"current_steps": 28940, "total_steps": 78105, "loss": 0.3664, "lr": 3.965863212396337e-06, "epoch": 1.8526342743742399, "percentage": 37.05, "elapsed_time": "1:16:11", "remaining_time": "2:09:26", "throughput": 19921.38, "total_tokens": 91073152}
|
|
{"current_steps": 28945, "total_steps": 78105, "loss": 0.3997, "lr": 3.965410633189321e-06, "epoch": 1.8529543563152167, "percentage": 37.06, "elapsed_time": "1:16:12", "remaining_time": "2:09:25", "throughput": 19921.85, "total_tokens": 91088896}
|
|
{"current_steps": 28950, "total_steps": 78105, "loss": 0.4065, "lr": 3.964957980807332e-06, "epoch": 1.8532744382561934, "percentage": 37.07, "elapsed_time": "1:16:12", "remaining_time": "2:09:24", "throughput": 19922.33, "total_tokens": 91104256}
|
|
{"current_steps": 28955, "total_steps": 78105, "loss": 0.3937, "lr": 3.9645052552729725e-06, "epoch": 1.8535945201971704, "percentage": 37.07, "elapsed_time": "1:16:13", "remaining_time": "2:09:23", "throughput": 19922.75, "total_tokens": 91119488}
|
|
{"current_steps": 28960, "total_steps": 78105, "loss": 0.3684, "lr": 3.964052456608848e-06, "epoch": 1.8539146021381474, "percentage": 37.08, "elapsed_time": "1:16:14", "remaining_time": "2:09:22", "throughput": 19923.2, "total_tokens": 91134848}
|
|
{"current_steps": 28965, "total_steps": 78105, "loss": 0.2936, "lr": 3.9635995848375706e-06, "epoch": 1.8542346840791244, "percentage": 37.08, "elapsed_time": "1:16:15", "remaining_time": "2:09:21", "throughput": 19923.76, "total_tokens": 91151488}
|
|
{"current_steps": 28970, "total_steps": 78105, "loss": 0.3816, "lr": 3.963146639981753e-06, "epoch": 1.8545547660201012, "percentage": 37.09, "elapsed_time": "1:16:15", "remaining_time": "2:09:20", "throughput": 19924.2, "total_tokens": 91166912}
|
|
{"current_steps": 28975, "total_steps": 78105, "loss": 0.3236, "lr": 3.962693622064013e-06, "epoch": 1.854874847961078, "percentage": 37.1, "elapsed_time": "1:16:16", "remaining_time": "2:09:19", "throughput": 19924.72, "total_tokens": 91183360}
|
|
{"current_steps": 28980, "total_steps": 78105, "loss": 0.4398, "lr": 3.962240531106973e-06, "epoch": 1.855194929902055, "percentage": 37.1, "elapsed_time": "1:16:17", "remaining_time": "2:09:18", "throughput": 19925.17, "total_tokens": 91198592}
|
|
{"current_steps": 28985, "total_steps": 78105, "loss": 0.2922, "lr": 3.961787367133258e-06, "epoch": 1.855515011843032, "percentage": 37.11, "elapsed_time": "1:16:17", "remaining_time": "2:09:17", "throughput": 19925.59, "total_tokens": 91213696}
|
|
{"current_steps": 28990, "total_steps": 78105, "loss": 0.2227, "lr": 3.9613341301654954e-06, "epoch": 1.8558350937840087, "percentage": 37.12, "elapsed_time": "1:16:18", "remaining_time": "2:09:16", "throughput": 19926.0, "total_tokens": 91228672}
|
|
{"current_steps": 28995, "total_steps": 78105, "loss": 0.3278, "lr": 3.960880820226318e-06, "epoch": 1.8561551757249855, "percentage": 37.12, "elapsed_time": "1:16:19", "remaining_time": "2:09:15", "throughput": 19926.47, "total_tokens": 91244160}
|
|
{"current_steps": 29000, "total_steps": 78105, "loss": 0.2866, "lr": 3.960427437338362e-06, "epoch": 1.8564752576659624, "percentage": 37.13, "elapsed_time": "1:16:19", "remaining_time": "2:09:14", "throughput": 19926.94, "total_tokens": 91259904}
|
|
{"current_steps": 29005, "total_steps": 78105, "loss": 0.2409, "lr": 3.9599739815242665e-06, "epoch": 1.8567953396069394, "percentage": 37.14, "elapsed_time": "1:16:20", "remaining_time": "2:09:13", "throughput": 19927.36, "total_tokens": 91274816}
|
|
{"current_steps": 29010, "total_steps": 78105, "loss": 0.3317, "lr": 3.959520452806675e-06, "epoch": 1.8571154215479164, "percentage": 37.14, "elapsed_time": "1:16:21", "remaining_time": "2:09:12", "throughput": 19927.87, "total_tokens": 91290624}
|
|
{"current_steps": 29015, "total_steps": 78105, "loss": 0.2801, "lr": 3.9590668512082355e-06, "epoch": 1.8574355034888932, "percentage": 37.15, "elapsed_time": "1:16:21", "remaining_time": "2:09:11", "throughput": 19928.45, "total_tokens": 91307264}
|
|
{"current_steps": 29020, "total_steps": 78105, "loss": 0.3701, "lr": 3.958613176751597e-06, "epoch": 1.85775558542987, "percentage": 37.16, "elapsed_time": "1:16:22", "remaining_time": "2:09:10", "throughput": 19928.86, "total_tokens": 91322112}
|
|
{"current_steps": 29025, "total_steps": 78105, "loss": 0.3043, "lr": 3.9581594294594135e-06, "epoch": 1.858075667370847, "percentage": 37.16, "elapsed_time": "1:16:23", "remaining_time": "2:09:09", "throughput": 19929.41, "total_tokens": 91338112}
|
|
{"current_steps": 29030, "total_steps": 78105, "loss": 0.4275, "lr": 3.957705609354345e-06, "epoch": 1.858395749311824, "percentage": 37.17, "elapsed_time": "1:16:23", "remaining_time": "2:09:08", "throughput": 19929.87, "total_tokens": 91353792}
|
|
{"current_steps": 29035, "total_steps": 78105, "loss": 0.5796, "lr": 3.9572517164590495e-06, "epoch": 1.8587158312528007, "percentage": 37.17, "elapsed_time": "1:16:24", "remaining_time": "2:09:07", "throughput": 19930.33, "total_tokens": 91369216}
|
|
{"current_steps": 29040, "total_steps": 78105, "loss": 0.3269, "lr": 3.956797750796195e-06, "epoch": 1.8590359131937775, "percentage": 37.18, "elapsed_time": "1:16:25", "remaining_time": "2:09:06", "throughput": 19930.73, "total_tokens": 91384000}
|
|
{"current_steps": 29045, "total_steps": 78105, "loss": 0.3778, "lr": 3.956343712388448e-06, "epoch": 1.8593559951347545, "percentage": 37.19, "elapsed_time": "1:16:25", "remaining_time": "2:09:05", "throughput": 19931.27, "total_tokens": 91400064}
|
|
{"current_steps": 29050, "total_steps": 78105, "loss": 0.3251, "lr": 3.955889601258483e-06, "epoch": 1.8596760770757315, "percentage": 37.19, "elapsed_time": "1:16:26", "remaining_time": "2:09:04", "throughput": 19931.71, "total_tokens": 91415616}
|
|
{"current_steps": 29055, "total_steps": 78105, "loss": 0.3249, "lr": 3.955435417428973e-06, "epoch": 1.8599961590167082, "percentage": 37.2, "elapsed_time": "1:16:27", "remaining_time": "2:09:03", "throughput": 19932.28, "total_tokens": 91432192}
|
|
{"current_steps": 29060, "total_steps": 78105, "loss": 0.483, "lr": 3.954981160922601e-06, "epoch": 1.8603162409576852, "percentage": 37.21, "elapsed_time": "1:16:27", "remaining_time": "2:09:02", "throughput": 19932.75, "total_tokens": 91447808}
|
|
{"current_steps": 29065, "total_steps": 78105, "loss": 0.2738, "lr": 3.954526831762048e-06, "epoch": 1.860636322898662, "percentage": 37.21, "elapsed_time": "1:16:28", "remaining_time": "2:09:01", "throughput": 19933.19, "total_tokens": 91462912}
|
|
{"current_steps": 29070, "total_steps": 78105, "loss": 0.3437, "lr": 3.954072429970002e-06, "epoch": 1.860956404839639, "percentage": 37.22, "elapsed_time": "1:16:29", "remaining_time": "2:09:00", "throughput": 19933.66, "total_tokens": 91478656}
|
|
{"current_steps": 29075, "total_steps": 78105, "loss": 0.2974, "lr": 3.953617955569151e-06, "epoch": 1.861276486780616, "percentage": 37.23, "elapsed_time": "1:16:29", "remaining_time": "2:08:59", "throughput": 19934.11, "total_tokens": 91493824}
|
|
{"current_steps": 29080, "total_steps": 78105, "loss": 0.5277, "lr": 3.953163408582193e-06, "epoch": 1.8615965687215927, "percentage": 37.23, "elapsed_time": "1:16:30", "remaining_time": "2:08:58", "throughput": 19934.57, "total_tokens": 91509184}
|
|
{"current_steps": 29085, "total_steps": 78105, "loss": 0.2569, "lr": 3.952708789031822e-06, "epoch": 1.8619166506625695, "percentage": 37.24, "elapsed_time": "1:16:31", "remaining_time": "2:08:57", "throughput": 19935.05, "total_tokens": 91524992}
|
|
{"current_steps": 29090, "total_steps": 78105, "loss": 0.3735, "lr": 3.952254096940742e-06, "epoch": 1.8622367326035465, "percentage": 37.24, "elapsed_time": "1:16:31", "remaining_time": "2:08:56", "throughput": 19935.51, "total_tokens": 91540800}
|
|
{"current_steps": 29095, "total_steps": 78105, "loss": 0.2988, "lr": 3.951799332331656e-06, "epoch": 1.8625568145445235, "percentage": 37.25, "elapsed_time": "1:16:32", "remaining_time": "2:08:55", "throughput": 19935.95, "total_tokens": 91555904}
|
|
{"current_steps": 29100, "total_steps": 78105, "loss": 0.3661, "lr": 3.951344495227275e-06, "epoch": 1.8628768964855003, "percentage": 37.26, "elapsed_time": "1:16:33", "remaining_time": "2:08:54", "throughput": 19936.37, "total_tokens": 91570944}
|
|
{"current_steps": 29105, "total_steps": 78105, "loss": 0.3106, "lr": 3.950889585650308e-06, "epoch": 1.863196978426477, "percentage": 37.26, "elapsed_time": "1:16:33", "remaining_time": "2:08:53", "throughput": 19936.82, "total_tokens": 91586432}
|
|
{"current_steps": 29110, "total_steps": 78105, "loss": 0.3557, "lr": 3.950434603623474e-06, "epoch": 1.863517060367454, "percentage": 37.27, "elapsed_time": "1:16:34", "remaining_time": "2:08:52", "throughput": 19937.23, "total_tokens": 91601216}
|
|
{"current_steps": 29115, "total_steps": 78105, "loss": 0.3512, "lr": 3.949979549169489e-06, "epoch": 1.863837142308431, "percentage": 37.28, "elapsed_time": "1:16:35", "remaining_time": "2:08:52", "throughput": 19937.78, "total_tokens": 91617792}
|
|
{"current_steps": 29120, "total_steps": 78105, "loss": 0.2518, "lr": 3.9495244223110795e-06, "epoch": 1.864157224249408, "percentage": 37.28, "elapsed_time": "1:16:35", "remaining_time": "2:08:51", "throughput": 19938.28, "total_tokens": 91633600}
|
|
{"current_steps": 29125, "total_steps": 78105, "loss": 0.2783, "lr": 3.94906922307097e-06, "epoch": 1.8644773061903848, "percentage": 37.29, "elapsed_time": "1:16:36", "remaining_time": "2:08:50", "throughput": 19938.75, "total_tokens": 91649408}
|
|
{"current_steps": 29130, "total_steps": 78105, "loss": 0.2948, "lr": 3.948613951471892e-06, "epoch": 1.8647973881313615, "percentage": 37.3, "elapsed_time": "1:16:37", "remaining_time": "2:08:49", "throughput": 19939.25, "total_tokens": 91665280}
|
|
{"current_steps": 29135, "total_steps": 78105, "loss": 0.2225, "lr": 3.948158607536579e-06, "epoch": 1.8651174700723385, "percentage": 37.3, "elapsed_time": "1:16:37", "remaining_time": "2:08:48", "throughput": 19939.74, "total_tokens": 91681152}
|
|
{"current_steps": 29140, "total_steps": 78105, "loss": 0.3353, "lr": 3.947703191287768e-06, "epoch": 1.8654375520133155, "percentage": 37.31, "elapsed_time": "1:16:38", "remaining_time": "2:08:47", "throughput": 19940.22, "total_tokens": 91696768}
|
|
{"current_steps": 29145, "total_steps": 78105, "loss": 0.4083, "lr": 3.9472477027482e-06, "epoch": 1.8657576339542923, "percentage": 37.32, "elapsed_time": "1:16:39", "remaining_time": "2:08:46", "throughput": 19940.79, "total_tokens": 91713344}
|
|
{"current_steps": 29150, "total_steps": 78105, "loss": 0.3764, "lr": 3.946792141940621e-06, "epoch": 1.866077715895269, "percentage": 37.32, "elapsed_time": "1:16:39", "remaining_time": "2:08:45", "throughput": 19941.28, "total_tokens": 91728768}
|
|
{"current_steps": 29155, "total_steps": 78105, "loss": 0.1963, "lr": 3.946336508887778e-06, "epoch": 1.866397797836246, "percentage": 37.33, "elapsed_time": "1:16:40", "remaining_time": "2:08:44", "throughput": 19941.77, "total_tokens": 91744320}
|
|
{"current_steps": 29160, "total_steps": 78105, "loss": 0.4213, "lr": 3.945880803612423e-06, "epoch": 1.866717879777223, "percentage": 37.33, "elapsed_time": "1:16:41", "remaining_time": "2:08:43", "throughput": 19942.21, "total_tokens": 91759616}
|
|
{"current_steps": 29165, "total_steps": 78105, "loss": 0.3482, "lr": 3.945425026137313e-06, "epoch": 1.8670379617182, "percentage": 37.34, "elapsed_time": "1:16:41", "remaining_time": "2:08:42", "throughput": 19942.7, "total_tokens": 91775040}
|
|
{"current_steps": 29170, "total_steps": 78105, "loss": 0.3791, "lr": 3.9449691764852045e-06, "epoch": 1.8673580436591768, "percentage": 37.35, "elapsed_time": "1:16:42", "remaining_time": "2:08:41", "throughput": 19943.06, "total_tokens": 91789568}
|
|
{"current_steps": 29175, "total_steps": 78105, "loss": 0.3363, "lr": 3.944513254678863e-06, "epoch": 1.8676781256001536, "percentage": 37.35, "elapsed_time": "1:16:43", "remaining_time": "2:08:40", "throughput": 19943.69, "total_tokens": 91807232}
|
|
{"current_steps": 29180, "total_steps": 78105, "loss": 0.3427, "lr": 3.944057260741054e-06, "epoch": 1.8679982075411306, "percentage": 37.36, "elapsed_time": "1:16:43", "remaining_time": "2:08:39", "throughput": 19944.17, "total_tokens": 91822848}
|
|
{"current_steps": 29185, "total_steps": 78105, "loss": 0.3246, "lr": 3.943601194694547e-06, "epoch": 1.8683182894821075, "percentage": 37.37, "elapsed_time": "1:16:44", "remaining_time": "2:08:38", "throughput": 19944.66, "total_tokens": 91838080}
|
|
{"current_steps": 29190, "total_steps": 78105, "loss": 0.3411, "lr": 3.943145056562116e-06, "epoch": 1.8686383714230843, "percentage": 37.37, "elapsed_time": "1:16:45", "remaining_time": "2:08:37", "throughput": 19945.06, "total_tokens": 91852672}
|
|
{"current_steps": 29195, "total_steps": 78105, "loss": 0.2123, "lr": 3.942688846366537e-06, "epoch": 1.868958453364061, "percentage": 37.38, "elapsed_time": "1:16:45", "remaining_time": "2:08:36", "throughput": 19945.49, "total_tokens": 91867520}
|
|
{"current_steps": 29200, "total_steps": 78105, "loss": 0.411, "lr": 3.942232564130592e-06, "epoch": 1.869278535305038, "percentage": 37.39, "elapsed_time": "1:16:46", "remaining_time": "2:08:35", "throughput": 19945.88, "total_tokens": 91882368}
|
|
{"current_steps": 29205, "total_steps": 78105, "loss": 0.2532, "lr": 3.941776209877066e-06, "epoch": 1.869598617246015, "percentage": 37.39, "elapsed_time": "1:16:47", "remaining_time": "2:08:34", "throughput": 19946.28, "total_tokens": 91897216}
|
|
{"current_steps": 29210, "total_steps": 78105, "loss": 0.2938, "lr": 3.941319783628745e-06, "epoch": 1.8699186991869918, "percentage": 37.4, "elapsed_time": "1:16:47", "remaining_time": "2:08:33", "throughput": 19946.8, "total_tokens": 91913088}
|
|
{"current_steps": 29215, "total_steps": 78105, "loss": 0.3412, "lr": 3.940863285408423e-06, "epoch": 1.8702387811279686, "percentage": 37.4, "elapsed_time": "1:16:48", "remaining_time": "2:08:32", "throughput": 19947.35, "total_tokens": 91929152}
|
|
{"current_steps": 29220, "total_steps": 78105, "loss": 0.2966, "lr": 3.940406715238892e-06, "epoch": 1.8705588630689456, "percentage": 37.41, "elapsed_time": "1:16:49", "remaining_time": "2:08:31", "throughput": 19947.79, "total_tokens": 91944256}
|
|
{"current_steps": 29225, "total_steps": 78105, "loss": 0.2381, "lr": 3.939950073142954e-06, "epoch": 1.8708789450099226, "percentage": 37.42, "elapsed_time": "1:16:49", "remaining_time": "2:08:30", "throughput": 19948.39, "total_tokens": 91961216}
|
|
{"current_steps": 29230, "total_steps": 78105, "loss": 0.4507, "lr": 3.939493359143408e-06, "epoch": 1.8711990269508996, "percentage": 37.42, "elapsed_time": "1:16:50", "remaining_time": "2:08:29", "throughput": 19948.8, "total_tokens": 91976384}
|
|
{"current_steps": 29235, "total_steps": 78105, "loss": 0.2847, "lr": 3.939036573263063e-06, "epoch": 1.8715191088918763, "percentage": 37.43, "elapsed_time": "1:16:51", "remaining_time": "2:08:28", "throughput": 19949.32, "total_tokens": 91992512}
|
|
{"current_steps": 29240, "total_steps": 78105, "loss": 0.2979, "lr": 3.938579715524726e-06, "epoch": 1.871839190832853, "percentage": 37.44, "elapsed_time": "1:16:51", "remaining_time": "2:08:27", "throughput": 19949.72, "total_tokens": 92007360}
|
|
{"current_steps": 29245, "total_steps": 78105, "loss": 0.2641, "lr": 3.9381227859512125e-06, "epoch": 1.87215927277383, "percentage": 37.44, "elapsed_time": "1:16:52", "remaining_time": "2:08:26", "throughput": 19950.2, "total_tokens": 92023040}
|
|
{"current_steps": 29250, "total_steps": 78105, "loss": 0.3321, "lr": 3.9376657845653386e-06, "epoch": 1.872479354714807, "percentage": 37.45, "elapsed_time": "1:16:53", "remaining_time": "2:08:25", "throughput": 19950.7, "total_tokens": 92038912}
|
|
{"current_steps": 29255, "total_steps": 78105, "loss": 0.2838, "lr": 3.9372087113899224e-06, "epoch": 1.8727994366557839, "percentage": 37.46, "elapsed_time": "1:16:53", "remaining_time": "2:08:24", "throughput": 19951.14, "total_tokens": 92054080}
|
|
{"current_steps": 29260, "total_steps": 78105, "loss": 0.2504, "lr": 3.93675156644779e-06, "epoch": 1.8731195185967606, "percentage": 37.46, "elapsed_time": "1:16:54", "remaining_time": "2:08:23", "throughput": 19951.58, "total_tokens": 92069312}
|
|
{"current_steps": 29265, "total_steps": 78105, "loss": 0.3596, "lr": 3.936294349761768e-06, "epoch": 1.8734396005377376, "percentage": 37.47, "elapsed_time": "1:16:55", "remaining_time": "2:08:22", "throughput": 19952.22, "total_tokens": 92086528}
|
|
{"current_steps": 29270, "total_steps": 78105, "loss": 0.4362, "lr": 3.935837061354687e-06, "epoch": 1.8737596824787146, "percentage": 37.48, "elapsed_time": "1:16:56", "remaining_time": "2:08:21", "throughput": 19952.71, "total_tokens": 92102336}
|
|
{"current_steps": 29275, "total_steps": 78105, "loss": 0.5054, "lr": 3.935379701249383e-06, "epoch": 1.8740797644196916, "percentage": 37.48, "elapsed_time": "1:16:56", "remaining_time": "2:08:20", "throughput": 19953.49, "total_tokens": 92121344}
|
|
{"current_steps": 29280, "total_steps": 78105, "loss": 0.2932, "lr": 3.9349222694686926e-06, "epoch": 1.8743998463606684, "percentage": 37.49, "elapsed_time": "1:16:57", "remaining_time": "2:08:19", "throughput": 19953.98, "total_tokens": 92136896}
|
|
{"current_steps": 29285, "total_steps": 78105, "loss": 0.3226, "lr": 3.93446476603546e-06, "epoch": 1.8747199283016451, "percentage": 37.49, "elapsed_time": "1:16:58", "remaining_time": "2:08:18", "throughput": 19954.33, "total_tokens": 92151168}
|
|
{"current_steps": 29290, "total_steps": 78105, "loss": 0.2129, "lr": 3.934007190972527e-06, "epoch": 1.8750400102426221, "percentage": 37.5, "elapsed_time": "1:16:58", "remaining_time": "2:08:17", "throughput": 19954.79, "total_tokens": 92166848}
|
|
{"current_steps": 29295, "total_steps": 78105, "loss": 0.3391, "lr": 3.933549544302745e-06, "epoch": 1.8753600921835991, "percentage": 37.51, "elapsed_time": "1:16:59", "remaining_time": "2:08:16", "throughput": 19955.28, "total_tokens": 92182400}
|
|
{"current_steps": 29300, "total_steps": 78105, "loss": 0.3735, "lr": 3.9330918260489656e-06, "epoch": 1.8756801741245759, "percentage": 37.51, "elapsed_time": "1:17:00", "remaining_time": "2:08:15", "throughput": 19955.72, "total_tokens": 92197824}
|
|
{"current_steps": 29305, "total_steps": 78105, "loss": 0.3093, "lr": 3.932634036234045e-06, "epoch": 1.8760002560655527, "percentage": 37.52, "elapsed_time": "1:17:00", "remaining_time": "2:08:14", "throughput": 19956.14, "total_tokens": 92212992}
|
|
{"current_steps": 29310, "total_steps": 78105, "loss": 0.3101, "lr": 3.932176174880843e-06, "epoch": 1.8763203380065296, "percentage": 37.53, "elapsed_time": "1:17:01", "remaining_time": "2:08:13", "throughput": 19956.64, "total_tokens": 92228864}
|
|
{"current_steps": 29315, "total_steps": 78105, "loss": 0.3471, "lr": 3.931718242012223e-06, "epoch": 1.8766404199475066, "percentage": 37.53, "elapsed_time": "1:17:02", "remaining_time": "2:08:12", "throughput": 19957.17, "total_tokens": 92244800}
|
|
{"current_steps": 29320, "total_steps": 78105, "loss": 0.2309, "lr": 3.931260237651052e-06, "epoch": 1.8769605018884834, "percentage": 37.54, "elapsed_time": "1:17:02", "remaining_time": "2:08:11", "throughput": 19957.57, "total_tokens": 92259648}
|
|
{"current_steps": 29325, "total_steps": 78105, "loss": 0.332, "lr": 3.930802161820199e-06, "epoch": 1.8772805838294604, "percentage": 37.55, "elapsed_time": "1:17:03", "remaining_time": "2:08:10", "throughput": 19958.02, "total_tokens": 92275136}
|
|
{"current_steps": 29330, "total_steps": 78105, "loss": 0.4056, "lr": 3.93034401454254e-06, "epoch": 1.8776006657704372, "percentage": 37.55, "elapsed_time": "1:17:04", "remaining_time": "2:08:09", "throughput": 19958.5, "total_tokens": 92290752}
|
|
{"current_steps": 29335, "total_steps": 78105, "loss": 0.3318, "lr": 3.92988579584095e-06, "epoch": 1.8779207477114142, "percentage": 37.56, "elapsed_time": "1:17:04", "remaining_time": "2:08:08", "throughput": 19958.92, "total_tokens": 92305728}
|
|
{"current_steps": 29340, "total_steps": 78105, "loss": 0.338, "lr": 3.929427505738312e-06, "epoch": 1.8782408296523911, "percentage": 37.56, "elapsed_time": "1:17:05", "remaining_time": "2:08:07", "throughput": 19959.37, "total_tokens": 92321088}
|
|
{"current_steps": 29345, "total_steps": 78105, "loss": 0.359, "lr": 3.92896914425751e-06, "epoch": 1.878560911593368, "percentage": 37.57, "elapsed_time": "1:17:06", "remaining_time": "2:08:06", "throughput": 19959.85, "total_tokens": 92336512}
|
|
{"current_steps": 29350, "total_steps": 78105, "loss": 0.437, "lr": 3.928510711421431e-06, "epoch": 1.8788809935343447, "percentage": 37.58, "elapsed_time": "1:17:06", "remaining_time": "2:08:05", "throughput": 19960.31, "total_tokens": 92352128}
|
|
{"current_steps": 29355, "total_steps": 78105, "loss": 0.3809, "lr": 3.9280522072529694e-06, "epoch": 1.8792010754753217, "percentage": 37.58, "elapsed_time": "1:17:07", "remaining_time": "2:08:04", "throughput": 19960.77, "total_tokens": 92367616}
|
|
{"current_steps": 29360, "total_steps": 78105, "loss": 0.3917, "lr": 3.927593631775019e-06, "epoch": 1.8795211574162987, "percentage": 37.59, "elapsed_time": "1:17:08", "remaining_time": "2:08:03", "throughput": 19961.23, "total_tokens": 92383104}
|
|
{"current_steps": 29365, "total_steps": 78105, "loss": 0.3139, "lr": 3.927134985010478e-06, "epoch": 1.8798412393572754, "percentage": 37.6, "elapsed_time": "1:17:08", "remaining_time": "2:08:02", "throughput": 19961.79, "total_tokens": 92399552}
|
|
{"current_steps": 29370, "total_steps": 78105, "loss": 0.292, "lr": 3.92667626698225e-06, "epoch": 1.8801613212982522, "percentage": 37.6, "elapsed_time": "1:17:09", "remaining_time": "2:08:01", "throughput": 19962.2, "total_tokens": 92414272}
|
|
{"current_steps": 29375, "total_steps": 78105, "loss": 0.4266, "lr": 3.92621747771324e-06, "epoch": 1.8804814032392292, "percentage": 37.61, "elapsed_time": "1:17:10", "remaining_time": "2:08:00", "throughput": 19962.69, "total_tokens": 92430464}
|
|
{"current_steps": 29380, "total_steps": 78105, "loss": 0.2903, "lr": 3.925758617226358e-06, "epoch": 1.8808014851802062, "percentage": 37.62, "elapsed_time": "1:17:10", "remaining_time": "2:07:59", "throughput": 19963.09, "total_tokens": 92445248}
|
|
{"current_steps": 29385, "total_steps": 78105, "loss": 0.3035, "lr": 3.925299685544518e-06, "epoch": 1.8811215671211832, "percentage": 37.62, "elapsed_time": "1:17:11", "remaining_time": "2:07:58", "throughput": 19963.6, "total_tokens": 92461376}
|
|
{"current_steps": 29390, "total_steps": 78105, "loss": 0.2456, "lr": 3.924840682690636e-06, "epoch": 1.88144164906216, "percentage": 37.63, "elapsed_time": "1:17:12", "remaining_time": "2:07:57", "throughput": 19964.05, "total_tokens": 92476736}
|
|
{"current_steps": 29395, "total_steps": 78105, "loss": 0.3066, "lr": 3.924381608687631e-06, "epoch": 1.8817617310031367, "percentage": 37.64, "elapsed_time": "1:17:12", "remaining_time": "2:07:56", "throughput": 19964.43, "total_tokens": 92491392}
|
|
{"current_steps": 29400, "total_steps": 78105, "loss": 0.4081, "lr": 3.923922463558428e-06, "epoch": 1.8820818129441137, "percentage": 37.64, "elapsed_time": "1:17:13", "remaining_time": "2:07:55", "throughput": 19964.82, "total_tokens": 92506240}
|
|
{"current_steps": 29405, "total_steps": 78105, "loss": 0.3408, "lr": 3.9234632473259535e-06, "epoch": 1.8824018948850907, "percentage": 37.65, "elapsed_time": "1:17:14", "remaining_time": "2:07:55", "throughput": 19965.28, "total_tokens": 92522240}
|
|
{"current_steps": 29410, "total_steps": 78105, "loss": 0.3953, "lr": 3.92300396001314e-06, "epoch": 1.8827219768260675, "percentage": 37.65, "elapsed_time": "1:17:14", "remaining_time": "2:07:54", "throughput": 19965.85, "total_tokens": 92538688}
|
|
{"current_steps": 29415, "total_steps": 78105, "loss": 0.2775, "lr": 3.922544601642921e-06, "epoch": 1.8830420587670442, "percentage": 37.66, "elapsed_time": "1:17:15", "remaining_time": "2:07:53", "throughput": 19966.23, "total_tokens": 92553664}
|
|
{"current_steps": 29420, "total_steps": 78105, "loss": 0.3444, "lr": 3.922085172238234e-06, "epoch": 1.8833621407080212, "percentage": 37.67, "elapsed_time": "1:17:16", "remaining_time": "2:07:52", "throughput": 19966.68, "total_tokens": 92569152}
|
|
{"current_steps": 29425, "total_steps": 78105, "loss": 0.3184, "lr": 3.921625671822021e-06, "epoch": 1.8836822226489982, "percentage": 37.67, "elapsed_time": "1:17:16", "remaining_time": "2:07:51", "throughput": 19967.12, "total_tokens": 92584320}
|
|
{"current_steps": 29430, "total_steps": 78105, "loss": 0.3453, "lr": 3.9211661004172265e-06, "epoch": 1.8840023045899752, "percentage": 37.68, "elapsed_time": "1:17:17", "remaining_time": "2:07:50", "throughput": 19967.64, "total_tokens": 92600576}
|
|
{"current_steps": 29435, "total_steps": 78105, "loss": 0.3237, "lr": 3.9207064580468e-06, "epoch": 1.884322386530952, "percentage": 37.69, "elapsed_time": "1:17:18", "remaining_time": "2:07:49", "throughput": 19968.12, "total_tokens": 92616384}
|
|
{"current_steps": 29440, "total_steps": 78105, "loss": 0.3214, "lr": 3.9202467447336935e-06, "epoch": 1.8846424684719287, "percentage": 37.69, "elapsed_time": "1:17:18", "remaining_time": "2:07:48", "throughput": 19968.72, "total_tokens": 92633216}
|
|
{"current_steps": 29445, "total_steps": 78105, "loss": 0.4061, "lr": 3.919786960500862e-06, "epoch": 1.8849625504129057, "percentage": 37.7, "elapsed_time": "1:17:19", "remaining_time": "2:07:47", "throughput": 19969.33, "total_tokens": 92650368}
|
|
{"current_steps": 29450, "total_steps": 78105, "loss": 0.3836, "lr": 3.919327105371264e-06, "epoch": 1.8852826323538827, "percentage": 37.71, "elapsed_time": "1:17:20", "remaining_time": "2:07:46", "throughput": 19969.95, "total_tokens": 92667520}
|
|
{"current_steps": 29455, "total_steps": 78105, "loss": 0.3477, "lr": 3.918867179367865e-06, "epoch": 1.8856027142948595, "percentage": 37.71, "elapsed_time": "1:17:21", "remaining_time": "2:07:45", "throughput": 19970.44, "total_tokens": 92683520}
|
|
{"current_steps": 29460, "total_steps": 78105, "loss": 0.3697, "lr": 3.918407182513629e-06, "epoch": 1.8859227962358363, "percentage": 37.72, "elapsed_time": "1:17:21", "remaining_time": "2:07:44", "throughput": 19971.09, "total_tokens": 92700928}
|
|
{"current_steps": 29465, "total_steps": 78105, "loss": 0.3807, "lr": 3.917947114831526e-06, "epoch": 1.8862428781768132, "percentage": 37.72, "elapsed_time": "1:17:22", "remaining_time": "2:07:43", "throughput": 19971.56, "total_tokens": 92716672}
|
|
{"current_steps": 29470, "total_steps": 78105, "loss": 0.2563, "lr": 3.91748697634453e-06, "epoch": 1.8865629601177902, "percentage": 37.73, "elapsed_time": "1:17:23", "remaining_time": "2:07:42", "throughput": 19972.11, "total_tokens": 92732928}
|
|
{"current_steps": 29475, "total_steps": 78105, "loss": 0.3888, "lr": 3.917026767075619e-06, "epoch": 1.886883042058767, "percentage": 37.74, "elapsed_time": "1:17:23", "remaining_time": "2:07:41", "throughput": 19972.54, "total_tokens": 92748224}
|
|
{"current_steps": 29480, "total_steps": 78105, "loss": 0.3139, "lr": 3.916566487047772e-06, "epoch": 1.8872031239997438, "percentage": 37.74, "elapsed_time": "1:17:24", "remaining_time": "2:07:40", "throughput": 19973.04, "total_tokens": 92764224}
|
|
{"current_steps": 29485, "total_steps": 78105, "loss": 0.3486, "lr": 3.916106136283973e-06, "epoch": 1.8875232059407208, "percentage": 37.75, "elapsed_time": "1:17:25", "remaining_time": "2:07:39", "throughput": 19973.5, "total_tokens": 92779776}
|
|
{"current_steps": 29490, "total_steps": 78105, "loss": 0.3538, "lr": 3.915645714807209e-06, "epoch": 1.8878432878816978, "percentage": 37.76, "elapsed_time": "1:17:25", "remaining_time": "2:07:38", "throughput": 19973.91, "total_tokens": 92795072}
|
|
{"current_steps": 29495, "total_steps": 78105, "loss": 0.1942, "lr": 3.9151852226404714e-06, "epoch": 1.8881633698226747, "percentage": 37.76, "elapsed_time": "1:17:26", "remaining_time": "2:07:37", "throughput": 19974.38, "total_tokens": 92810432}
|
|
{"current_steps": 29500, "total_steps": 78105, "loss": 0.3684, "lr": 3.914724659806757e-06, "epoch": 1.8884834517636515, "percentage": 37.77, "elapsed_time": "1:17:27", "remaining_time": "2:07:36", "throughput": 19974.86, "total_tokens": 92826048}
|
|
{"current_steps": 29505, "total_steps": 78105, "loss": 0.3988, "lr": 3.914264026329062e-06, "epoch": 1.8888035337046283, "percentage": 37.78, "elapsed_time": "1:17:27", "remaining_time": "2:07:35", "throughput": 19975.41, "total_tokens": 92842624}
|
|
{"current_steps": 29510, "total_steps": 78105, "loss": 0.4192, "lr": 3.9138033222303876e-06, "epoch": 1.8891236156456053, "percentage": 37.78, "elapsed_time": "1:17:28", "remaining_time": "2:07:34", "throughput": 19975.91, "total_tokens": 92859008}
|
|
{"current_steps": 29515, "total_steps": 78105, "loss": 0.2912, "lr": 3.913342547533739e-06, "epoch": 1.8894436975865823, "percentage": 37.79, "elapsed_time": "1:17:29", "remaining_time": "2:07:33", "throughput": 19976.34, "total_tokens": 92874560}
|
|
{"current_steps": 29520, "total_steps": 78105, "loss": 0.3068, "lr": 3.9128817022621255e-06, "epoch": 1.889763779527559, "percentage": 37.8, "elapsed_time": "1:17:29", "remaining_time": "2:07:32", "throughput": 19976.75, "total_tokens": 92889472}
|
|
{"current_steps": 29525, "total_steps": 78105, "loss": 0.2897, "lr": 3.91242078643856e-06, "epoch": 1.8900838614685358, "percentage": 37.8, "elapsed_time": "1:17:30", "remaining_time": "2:07:31", "throughput": 19977.19, "total_tokens": 92904640}
|
|
{"current_steps": 29530, "total_steps": 78105, "loss": 0.5306, "lr": 3.911959800086057e-06, "epoch": 1.8904039434095128, "percentage": 37.81, "elapsed_time": "1:17:31", "remaining_time": "2:07:30", "throughput": 19977.57, "total_tokens": 92919296}
|
|
{"current_steps": 29535, "total_steps": 78105, "loss": 0.3709, "lr": 3.911498743227636e-06, "epoch": 1.8907240253504898, "percentage": 37.81, "elapsed_time": "1:17:31", "remaining_time": "2:07:29", "throughput": 19978.0, "total_tokens": 92934656}
|
|
{"current_steps": 29540, "total_steps": 78105, "loss": 0.3673, "lr": 3.911037615886319e-06, "epoch": 1.8910441072914668, "percentage": 37.82, "elapsed_time": "1:17:32", "remaining_time": "2:07:28", "throughput": 19978.48, "total_tokens": 92950336}
|
|
{"current_steps": 29545, "total_steps": 78105, "loss": 0.3713, "lr": 3.910576418085135e-06, "epoch": 1.8913641892324435, "percentage": 37.83, "elapsed_time": "1:17:33", "remaining_time": "2:07:27", "throughput": 19978.94, "total_tokens": 92965696}
|
|
{"current_steps": 29550, "total_steps": 78105, "loss": 0.2579, "lr": 3.910115149847112e-06, "epoch": 1.8916842711734203, "percentage": 37.83, "elapsed_time": "1:17:33", "remaining_time": "2:07:26", "throughput": 19979.39, "total_tokens": 92981184}
|
|
{"current_steps": 29555, "total_steps": 78105, "loss": 0.2935, "lr": 3.909653811195283e-06, "epoch": 1.8920043531143973, "percentage": 37.84, "elapsed_time": "1:17:34", "remaining_time": "2:07:25", "throughput": 19979.9, "total_tokens": 92997056}
|
|
{"current_steps": 29560, "total_steps": 78105, "loss": 0.4026, "lr": 3.909192402152685e-06, "epoch": 1.8923244350553743, "percentage": 37.85, "elapsed_time": "1:17:35", "remaining_time": "2:07:25", "throughput": 19980.36, "total_tokens": 93012992}
|
|
{"current_steps": 29565, "total_steps": 78105, "loss": 0.3713, "lr": 3.90873092274236e-06, "epoch": 1.892644516996351, "percentage": 37.85, "elapsed_time": "1:17:35", "remaining_time": "2:07:24", "throughput": 19980.95, "total_tokens": 93030016}
|
|
{"current_steps": 29570, "total_steps": 78105, "loss": 0.4132, "lr": 3.908269372987349e-06, "epoch": 1.8929645989373278, "percentage": 37.86, "elapsed_time": "1:17:36", "remaining_time": "2:07:23", "throughput": 19981.51, "total_tokens": 93046592}
|
|
{"current_steps": 29575, "total_steps": 78105, "loss": 0.3479, "lr": 3.907807752910701e-06, "epoch": 1.8932846808783048, "percentage": 37.87, "elapsed_time": "1:17:37", "remaining_time": "2:07:22", "throughput": 19982.02, "total_tokens": 93062528}
|
|
{"current_steps": 29580, "total_steps": 78105, "loss": 0.3162, "lr": 3.907346062535467e-06, "epoch": 1.8936047628192818, "percentage": 37.87, "elapsed_time": "1:17:37", "remaining_time": "2:07:21", "throughput": 19982.51, "total_tokens": 93078464}
|
|
{"current_steps": 29585, "total_steps": 78105, "loss": 0.2174, "lr": 3.906884301884702e-06, "epoch": 1.8939248447602586, "percentage": 37.88, "elapsed_time": "1:17:38", "remaining_time": "2:07:20", "throughput": 19982.86, "total_tokens": 93092928}
|
|
{"current_steps": 29590, "total_steps": 78105, "loss": 0.2567, "lr": 3.906422470981462e-06, "epoch": 1.8942449267012356, "percentage": 37.88, "elapsed_time": "1:17:39", "remaining_time": "2:07:19", "throughput": 19983.28, "total_tokens": 93107648}
|
|
{"current_steps": 29595, "total_steps": 78105, "loss": 0.3699, "lr": 3.905960569848811e-06, "epoch": 1.8945650086422123, "percentage": 37.89, "elapsed_time": "1:17:39", "remaining_time": "2:07:18", "throughput": 19983.78, "total_tokens": 93123584}
|
|
{"current_steps": 29600, "total_steps": 78105, "loss": 0.3734, "lr": 3.905498598509812e-06, "epoch": 1.8948850905831893, "percentage": 37.9, "elapsed_time": "1:17:40", "remaining_time": "2:07:17", "throughput": 19984.25, "total_tokens": 93139328}
|
|
{"current_steps": 29605, "total_steps": 78105, "loss": 0.2258, "lr": 3.905036556987534e-06, "epoch": 1.8952051725241663, "percentage": 37.9, "elapsed_time": "1:17:41", "remaining_time": "2:07:16", "throughput": 19984.69, "total_tokens": 93154304}
|
|
{"current_steps": 29610, "total_steps": 78105, "loss": 0.3358, "lr": 3.904574445305048e-06, "epoch": 1.895525254465143, "percentage": 37.91, "elapsed_time": "1:17:41", "remaining_time": "2:07:15", "throughput": 19985.12, "total_tokens": 93169344}
|
|
{"current_steps": 29615, "total_steps": 78105, "loss": 0.2756, "lr": 3.904112263485431e-06, "epoch": 1.8958453364061199, "percentage": 37.92, "elapsed_time": "1:17:42", "remaining_time": "2:07:14", "throughput": 19985.57, "total_tokens": 93184704}
|
|
{"current_steps": 29620, "total_steps": 78105, "loss": 0.3054, "lr": 3.903650011551762e-06, "epoch": 1.8961654183470968, "percentage": 37.92, "elapsed_time": "1:17:43", "remaining_time": "2:07:13", "throughput": 19985.93, "total_tokens": 93199232}
|
|
{"current_steps": 29625, "total_steps": 78105, "loss": 0.2537, "lr": 3.903187689527121e-06, "epoch": 1.8964855002880738, "percentage": 37.93, "elapsed_time": "1:17:43", "remaining_time": "2:07:12", "throughput": 19986.43, "total_tokens": 93215552}
|
|
{"current_steps": 29630, "total_steps": 78105, "loss": 0.3309, "lr": 3.902725297434598e-06, "epoch": 1.8968055822290506, "percentage": 37.94, "elapsed_time": "1:17:44", "remaining_time": "2:07:11", "throughput": 19986.91, "total_tokens": 93231232}
|
|
{"current_steps": 29635, "total_steps": 78105, "loss": 0.3343, "lr": 3.902262835297278e-06, "epoch": 1.8971256641700274, "percentage": 37.94, "elapsed_time": "1:17:45", "remaining_time": "2:07:10", "throughput": 19987.33, "total_tokens": 93246464}
|
|
{"current_steps": 29640, "total_steps": 78105, "loss": 0.4265, "lr": 3.9018003031382565e-06, "epoch": 1.8974457461110044, "percentage": 37.95, "elapsed_time": "1:17:45", "remaining_time": "2:07:09", "throughput": 19987.87, "total_tokens": 93262976}
|
|
{"current_steps": 29645, "total_steps": 78105, "loss": 0.41, "lr": 3.9013377009806306e-06, "epoch": 1.8977658280519814, "percentage": 37.96, "elapsed_time": "1:17:46", "remaining_time": "2:07:08", "throughput": 19988.28, "total_tokens": 93278208}
|
|
{"current_steps": 29650, "total_steps": 78105, "loss": 0.3079, "lr": 3.900875028847499e-06, "epoch": 1.8980859099929583, "percentage": 37.96, "elapsed_time": "1:17:47", "remaining_time": "2:07:07", "throughput": 19988.91, "total_tokens": 93295744}
|
|
{"current_steps": 29655, "total_steps": 78105, "loss": 0.2987, "lr": 3.900412286761965e-06, "epoch": 1.898405991933935, "percentage": 37.97, "elapsed_time": "1:17:48", "remaining_time": "2:07:06", "throughput": 19989.25, "total_tokens": 93310016}
|
|
{"current_steps": 29660, "total_steps": 78105, "loss": 0.2464, "lr": 3.899949474747136e-06, "epoch": 1.8987260738749119, "percentage": 37.97, "elapsed_time": "1:17:48", "remaining_time": "2:07:05", "throughput": 19989.73, "total_tokens": 93326016}
|
|
{"current_steps": 29665, "total_steps": 78105, "loss": 0.3691, "lr": 3.899486592826122e-06, "epoch": 1.8990461558158889, "percentage": 37.98, "elapsed_time": "1:17:49", "remaining_time": "2:07:04", "throughput": 19990.26, "total_tokens": 93342272}
|
|
{"current_steps": 29670, "total_steps": 78105, "loss": 0.2123, "lr": 3.899023641022038e-06, "epoch": 1.8993662377568659, "percentage": 37.99, "elapsed_time": "1:17:50", "remaining_time": "2:07:03", "throughput": 19990.69, "total_tokens": 93357248}
|
|
{"current_steps": 29675, "total_steps": 78105, "loss": 0.2229, "lr": 3.898560619358001e-06, "epoch": 1.8996863196978426, "percentage": 37.99, "elapsed_time": "1:17:50", "remaining_time": "2:07:02", "throughput": 19991.14, "total_tokens": 93372480}
|
|
{"current_steps": 29680, "total_steps": 78105, "loss": 0.3734, "lr": 3.89809752785713e-06, "epoch": 1.9000064016388194, "percentage": 38.0, "elapsed_time": "1:17:51", "remaining_time": "2:07:01", "throughput": 19991.72, "total_tokens": 93389376}
|
|
{"current_steps": 29685, "total_steps": 78105, "loss": 0.2921, "lr": 3.897634366542552e-06, "epoch": 1.9003264835797964, "percentage": 38.01, "elapsed_time": "1:17:52", "remaining_time": "2:07:00", "throughput": 19992.24, "total_tokens": 93405568}
|
|
{"current_steps": 29690, "total_steps": 78105, "loss": 0.2887, "lr": 3.897171135437393e-06, "epoch": 1.9006465655207734, "percentage": 38.01, "elapsed_time": "1:17:52", "remaining_time": "2:06:59", "throughput": 19992.64, "total_tokens": 93420224}
|
|
{"current_steps": 29695, "total_steps": 78105, "loss": 0.3296, "lr": 3.896707834564785e-06, "epoch": 1.9009666474617504, "percentage": 38.02, "elapsed_time": "1:17:53", "remaining_time": "2:06:58", "throughput": 19993.08, "total_tokens": 93435904}
|
|
{"current_steps": 29700, "total_steps": 78105, "loss": 0.3328, "lr": 3.896244463947863e-06, "epoch": 1.9012867294027271, "percentage": 38.03, "elapsed_time": "1:17:54", "remaining_time": "2:06:57", "throughput": 19993.44, "total_tokens": 93450240}
|
|
{"current_steps": 29705, "total_steps": 78105, "loss": 0.2082, "lr": 3.895781023609766e-06, "epoch": 1.901606811343704, "percentage": 38.03, "elapsed_time": "1:17:54", "remaining_time": "2:06:56", "throughput": 19993.87, "total_tokens": 93465536}
|
|
{"current_steps": 29710, "total_steps": 78105, "loss": 0.3424, "lr": 3.8953175135736345e-06, "epoch": 1.901926893284681, "percentage": 38.04, "elapsed_time": "1:17:55", "remaining_time": "2:06:55", "throughput": 19994.26, "total_tokens": 93480704}
|
|
{"current_steps": 29715, "total_steps": 78105, "loss": 0.4787, "lr": 3.894853933862614e-06, "epoch": 1.9022469752256579, "percentage": 38.04, "elapsed_time": "1:17:56", "remaining_time": "2:06:54", "throughput": 19994.72, "total_tokens": 93496128}
|
|
{"current_steps": 29720, "total_steps": 78105, "loss": 0.2776, "lr": 3.894390284499855e-06, "epoch": 1.9025670571666347, "percentage": 38.05, "elapsed_time": "1:17:56", "remaining_time": "2:06:53", "throughput": 19995.14, "total_tokens": 93511232}
|
|
{"current_steps": 29725, "total_steps": 78105, "loss": 0.4588, "lr": 3.893926565508507e-06, "epoch": 1.9028871391076114, "percentage": 38.06, "elapsed_time": "1:17:57", "remaining_time": "2:06:52", "throughput": 19995.63, "total_tokens": 93526848}
|
|
{"current_steps": 29730, "total_steps": 78105, "loss": 0.3266, "lr": 3.893462776911727e-06, "epoch": 1.9032072210485884, "percentage": 38.06, "elapsed_time": "1:17:58", "remaining_time": "2:06:51", "throughput": 19996.02, "total_tokens": 93541504}
|
|
{"current_steps": 29735, "total_steps": 78105, "loss": 0.3062, "lr": 3.8929989187326745e-06, "epoch": 1.9035273029895654, "percentage": 38.07, "elapsed_time": "1:17:58", "remaining_time": "2:06:50", "throughput": 19996.49, "total_tokens": 93557056}
|
|
{"current_steps": 29740, "total_steps": 78105, "loss": 0.4412, "lr": 3.892534990994512e-06, "epoch": 1.9038473849305422, "percentage": 38.08, "elapsed_time": "1:17:59", "remaining_time": "2:06:49", "throughput": 19997.08, "total_tokens": 93574016}
|
|
{"current_steps": 29745, "total_steps": 78105, "loss": 0.3652, "lr": 3.892070993720405e-06, "epoch": 1.904167466871519, "percentage": 38.08, "elapsed_time": "1:18:00", "remaining_time": "2:06:48", "throughput": 19997.59, "total_tokens": 93590208}
|
|
{"current_steps": 29750, "total_steps": 78105, "loss": 0.3777, "lr": 3.891606926933524e-06, "epoch": 1.904487548812496, "percentage": 38.09, "elapsed_time": "1:18:00", "remaining_time": "2:06:48", "throughput": 19998.11, "total_tokens": 93606592}
|
|
{"current_steps": 29755, "total_steps": 78105, "loss": 0.3252, "lr": 3.891142790657041e-06, "epoch": 1.904807630753473, "percentage": 38.1, "elapsed_time": "1:18:01", "remaining_time": "2:06:47", "throughput": 19998.54, "total_tokens": 93621632}
|
|
{"current_steps": 29760, "total_steps": 78105, "loss": 0.2774, "lr": 3.890678584914134e-06, "epoch": 1.90512771269445, "percentage": 38.1, "elapsed_time": "1:18:02", "remaining_time": "2:06:46", "throughput": 19999.16, "total_tokens": 93638912}
|
|
{"current_steps": 29765, "total_steps": 78105, "loss": 0.4059, "lr": 3.890214309727983e-06, "epoch": 1.9054477946354267, "percentage": 38.11, "elapsed_time": "1:18:02", "remaining_time": "2:06:45", "throughput": 19999.71, "total_tokens": 93655552}
|
|
{"current_steps": 29770, "total_steps": 78105, "loss": 0.3385, "lr": 3.8897499651217695e-06, "epoch": 1.9057678765764035, "percentage": 38.12, "elapsed_time": "1:18:03", "remaining_time": "2:06:44", "throughput": 20000.11, "total_tokens": 93670400}
|
|
{"current_steps": 29775, "total_steps": 78105, "loss": 0.3391, "lr": 3.889285551118682e-06, "epoch": 1.9060879585173804, "percentage": 38.12, "elapsed_time": "1:18:04", "remaining_time": "2:06:43", "throughput": 20000.52, "total_tokens": 93686016}
|
|
{"current_steps": 29780, "total_steps": 78105, "loss": 0.2084, "lr": 3.8888210677419105e-06, "epoch": 1.9064080404583574, "percentage": 38.13, "elapsed_time": "1:18:04", "remaining_time": "2:06:42", "throughput": 20000.81, "total_tokens": 93699840}
|
|
{"current_steps": 29785, "total_steps": 78105, "loss": 0.2909, "lr": 3.888356515014649e-06, "epoch": 1.9067281223993342, "percentage": 38.13, "elapsed_time": "1:18:05", "remaining_time": "2:06:41", "throughput": 20001.26, "total_tokens": 93715456}
|
|
{"current_steps": 29790, "total_steps": 78105, "loss": 0.436, "lr": 3.887891892960095e-06, "epoch": 1.907048204340311, "percentage": 38.14, "elapsed_time": "1:18:06", "remaining_time": "2:06:40", "throughput": 20001.68, "total_tokens": 93730368}
|
|
{"current_steps": 29795, "total_steps": 78105, "loss": 0.3454, "lr": 3.8874272016014494e-06, "epoch": 1.907368286281288, "percentage": 38.15, "elapsed_time": "1:18:06", "remaining_time": "2:06:39", "throughput": 20002.09, "total_tokens": 93745536}
|
|
{"current_steps": 29800, "total_steps": 78105, "loss": 0.3915, "lr": 3.886962440961916e-06, "epoch": 1.907688368222265, "percentage": 38.15, "elapsed_time": "1:18:07", "remaining_time": "2:06:38", "throughput": 20002.6, "total_tokens": 93761664}
|
|
{"current_steps": 29805, "total_steps": 78105, "loss": 0.2676, "lr": 3.886497611064703e-06, "epoch": 1.908008450163242, "percentage": 38.16, "elapsed_time": "1:18:08", "remaining_time": "2:06:37", "throughput": 20003.22, "total_tokens": 93778880}
|
|
{"current_steps": 29810, "total_steps": 78105, "loss": 0.313, "lr": 3.886032711933021e-06, "epoch": 1.9083285321042187, "percentage": 38.17, "elapsed_time": "1:18:08", "remaining_time": "2:06:36", "throughput": 20003.75, "total_tokens": 93795456}
|
|
{"current_steps": 29815, "total_steps": 78105, "loss": 0.3767, "lr": 3.885567743590084e-06, "epoch": 1.9086486140451955, "percentage": 38.17, "elapsed_time": "1:18:09", "remaining_time": "2:06:35", "throughput": 20004.2, "total_tokens": 93810944}
|
|
{"current_steps": 29820, "total_steps": 78105, "loss": 0.3712, "lr": 3.885102706059112e-06, "epoch": 1.9089686959861725, "percentage": 38.18, "elapsed_time": "1:18:10", "remaining_time": "2:06:34", "throughput": 20004.65, "total_tokens": 93826432}
|
|
{"current_steps": 29825, "total_steps": 78105, "loss": 0.4187, "lr": 3.884637599363325e-06, "epoch": 1.9092887779271495, "percentage": 38.19, "elapsed_time": "1:18:10", "remaining_time": "2:06:33", "throughput": 20005.19, "total_tokens": 93843200}
|
|
{"current_steps": 29830, "total_steps": 78105, "loss": 0.4812, "lr": 3.8841724235259495e-06, "epoch": 1.9096088598681262, "percentage": 38.19, "elapsed_time": "1:18:11", "remaining_time": "2:06:32", "throughput": 20005.9, "total_tokens": 93861760}
|
|
{"current_steps": 29835, "total_steps": 78105, "loss": 0.3444, "lr": 3.883707178570213e-06, "epoch": 1.909928941809103, "percentage": 38.2, "elapsed_time": "1:18:12", "remaining_time": "2:06:31", "throughput": 20006.33, "total_tokens": 93877440}
|
|
{"current_steps": 29840, "total_steps": 78105, "loss": 0.3317, "lr": 3.883241864519347e-06, "epoch": 1.91024902375008, "percentage": 38.2, "elapsed_time": "1:18:13", "remaining_time": "2:06:30", "throughput": 20006.76, "total_tokens": 93892672}
|
|
{"current_steps": 29845, "total_steps": 78105, "loss": 0.2844, "lr": 3.882776481396588e-06, "epoch": 1.910569105691057, "percentage": 38.21, "elapsed_time": "1:18:13", "remaining_time": "2:06:29", "throughput": 20007.17, "total_tokens": 93907776}
|
|
{"current_steps": 29850, "total_steps": 78105, "loss": 0.4965, "lr": 3.882311029225173e-06, "epoch": 1.9108891876320337, "percentage": 38.22, "elapsed_time": "1:18:14", "remaining_time": "2:06:28", "throughput": 20007.59, "total_tokens": 93922880}
|
|
{"current_steps": 29855, "total_steps": 78105, "loss": 0.4514, "lr": 3.881845508028346e-06, "epoch": 1.9112092695730107, "percentage": 38.22, "elapsed_time": "1:18:15", "remaining_time": "2:06:27", "throughput": 20008.06, "total_tokens": 93938752}
|
|
{"current_steps": 29860, "total_steps": 78105, "loss": 0.4591, "lr": 3.8813799178293525e-06, "epoch": 1.9115293515139875, "percentage": 38.23, "elapsed_time": "1:18:15", "remaining_time": "2:06:26", "throughput": 20008.55, "total_tokens": 93954752}
|
|
{"current_steps": 29865, "total_steps": 78105, "loss": 0.3295, "lr": 3.8809142586514406e-06, "epoch": 1.9118494334549645, "percentage": 38.24, "elapsed_time": "1:18:16", "remaining_time": "2:06:25", "throughput": 20008.99, "total_tokens": 93970240}
|
|
{"current_steps": 29870, "total_steps": 78105, "loss": 0.2836, "lr": 3.880448530517864e-06, "epoch": 1.9121695153959415, "percentage": 38.24, "elapsed_time": "1:18:17", "remaining_time": "2:06:24", "throughput": 20009.4, "total_tokens": 93985152}
|
|
{"current_steps": 29875, "total_steps": 78105, "loss": 0.5027, "lr": 3.879982733451878e-06, "epoch": 1.9124895973369183, "percentage": 38.25, "elapsed_time": "1:18:17", "remaining_time": "2:06:23", "throughput": 20009.82, "total_tokens": 94000832}
|
|
{"current_steps": 29880, "total_steps": 78105, "loss": 0.2764, "lr": 3.8795168674767425e-06, "epoch": 1.912809679277895, "percentage": 38.26, "elapsed_time": "1:18:18", "remaining_time": "2:06:23", "throughput": 20010.28, "total_tokens": 94016640}
|
|
{"current_steps": 29885, "total_steps": 78105, "loss": 0.3356, "lr": 3.87905093261572e-06, "epoch": 1.913129761218872, "percentage": 38.26, "elapsed_time": "1:18:19", "remaining_time": "2:06:22", "throughput": 20010.86, "total_tokens": 94033472}
|
|
{"current_steps": 29890, "total_steps": 78105, "loss": 0.3717, "lr": 3.878584928892077e-06, "epoch": 1.913449843159849, "percentage": 38.27, "elapsed_time": "1:18:19", "remaining_time": "2:06:21", "throughput": 20011.46, "total_tokens": 94050432}
|
|
{"current_steps": 29895, "total_steps": 78105, "loss": 0.3477, "lr": 3.878118856329084e-06, "epoch": 1.9137699251008258, "percentage": 38.28, "elapsed_time": "1:18:20", "remaining_time": "2:06:20", "throughput": 20011.87, "total_tokens": 94065536}
|
|
{"current_steps": 29900, "total_steps": 78105, "loss": 0.3671, "lr": 3.877652714950014e-06, "epoch": 1.9140900070418025, "percentage": 38.28, "elapsed_time": "1:18:21", "remaining_time": "2:06:19", "throughput": 20012.26, "total_tokens": 94080256}
|
|
{"current_steps": 29905, "total_steps": 78105, "loss": 0.2348, "lr": 3.877186504778143e-06, "epoch": 1.9144100889827795, "percentage": 38.29, "elapsed_time": "1:18:21", "remaining_time": "2:06:18", "throughput": 20012.69, "total_tokens": 94095424}
|
|
{"current_steps": 29910, "total_steps": 78105, "loss": 0.2735, "lr": 3.876720225836751e-06, "epoch": 1.9147301709237565, "percentage": 38.29, "elapsed_time": "1:18:22", "remaining_time": "2:06:17", "throughput": 20013.11, "total_tokens": 94110464}
|
|
{"current_steps": 29915, "total_steps": 78105, "loss": 0.2435, "lr": 3.876253878149122e-06, "epoch": 1.9150502528647335, "percentage": 38.3, "elapsed_time": "1:18:23", "remaining_time": "2:06:16", "throughput": 20013.63, "total_tokens": 94126976}
|
|
{"current_steps": 29920, "total_steps": 78105, "loss": 0.2175, "lr": 3.875787461738544e-06, "epoch": 1.9153703348057103, "percentage": 38.31, "elapsed_time": "1:18:23", "remaining_time": "2:06:15", "throughput": 20014.17, "total_tokens": 94143616}
|
|
{"current_steps": 29925, "total_steps": 78105, "loss": 0.2355, "lr": 3.875320976628305e-06, "epoch": 1.915690416746687, "percentage": 38.31, "elapsed_time": "1:18:24", "remaining_time": "2:06:14", "throughput": 20014.67, "total_tokens": 94159744}
|
|
{"current_steps": 29930, "total_steps": 78105, "loss": 0.3534, "lr": 3.874854422841701e-06, "epoch": 1.916010498687664, "percentage": 38.32, "elapsed_time": "1:18:25", "remaining_time": "2:06:13", "throughput": 20015.19, "total_tokens": 94176064}
|
|
{"current_steps": 29935, "total_steps": 78105, "loss": 0.3364, "lr": 3.874387800402028e-06, "epoch": 1.916330580628641, "percentage": 38.33, "elapsed_time": "1:18:25", "remaining_time": "2:06:12", "throughput": 20015.62, "total_tokens": 94191424}
|
|
{"current_steps": 29940, "total_steps": 78105, "loss": 0.3102, "lr": 3.873921109332587e-06, "epoch": 1.9166506625696178, "percentage": 38.33, "elapsed_time": "1:18:26", "remaining_time": "2:06:11", "throughput": 20016.12, "total_tokens": 94207552}
|
|
{"current_steps": 29945, "total_steps": 78105, "loss": 0.3507, "lr": 3.873454349656683e-06, "epoch": 1.9169707445105946, "percentage": 38.34, "elapsed_time": "1:18:27", "remaining_time": "2:06:10", "throughput": 20016.53, "total_tokens": 94222784}
|
|
{"current_steps": 29950, "total_steps": 78105, "loss": 0.3056, "lr": 3.8729875213976235e-06, "epoch": 1.9172908264515716, "percentage": 38.35, "elapsed_time": "1:18:27", "remaining_time": "2:06:09", "throughput": 20017.01, "total_tokens": 94238784}
|
|
{"current_steps": 29955, "total_steps": 78105, "loss": 0.2813, "lr": 3.8725206245787175e-06, "epoch": 1.9176109083925486, "percentage": 38.35, "elapsed_time": "1:18:28", "remaining_time": "2:06:08", "throughput": 20017.5, "total_tokens": 94254912}
|
|
{"current_steps": 29960, "total_steps": 78105, "loss": 0.3429, "lr": 3.872053659223281e-06, "epoch": 1.9179309903335255, "percentage": 38.36, "elapsed_time": "1:18:29", "remaining_time": "2:06:07", "throughput": 20018.0, "total_tokens": 94270912}
|
|
{"current_steps": 29965, "total_steps": 78105, "loss": 0.2434, "lr": 3.871586625354632e-06, "epoch": 1.9182510722745023, "percentage": 38.37, "elapsed_time": "1:18:30", "remaining_time": "2:06:06", "throughput": 20018.51, "total_tokens": 94287296}
|
|
{"current_steps": 29970, "total_steps": 78105, "loss": 0.3745, "lr": 3.871119522996091e-06, "epoch": 1.918571154215479, "percentage": 38.37, "elapsed_time": "1:18:30", "remaining_time": "2:06:05", "throughput": 20019.06, "total_tokens": 94303744}
|
|
{"current_steps": 29975, "total_steps": 78105, "loss": 0.3355, "lr": 3.870652352170983e-06, "epoch": 1.918891236156456, "percentage": 38.38, "elapsed_time": "1:18:31", "remaining_time": "2:06:04", "throughput": 20019.5, "total_tokens": 94319552}
|
|
{"current_steps": 29980, "total_steps": 78105, "loss": 0.3076, "lr": 3.870185112902636e-06, "epoch": 1.919211318097433, "percentage": 38.38, "elapsed_time": "1:18:32", "remaining_time": "2:06:03", "throughput": 20020.0, "total_tokens": 94335616}
|
|
{"current_steps": 29985, "total_steps": 78105, "loss": 0.2829, "lr": 3.869717805214381e-06, "epoch": 1.9195314000384098, "percentage": 38.39, "elapsed_time": "1:18:32", "remaining_time": "2:06:03", "throughput": 20020.42, "total_tokens": 94351168}
|
|
{"current_steps": 29990, "total_steps": 78105, "loss": 0.3977, "lr": 3.869250429129553e-06, "epoch": 1.9198514819793866, "percentage": 38.4, "elapsed_time": "1:18:33", "remaining_time": "2:06:02", "throughput": 20020.9, "total_tokens": 94367104}
|
|
{"current_steps": 29995, "total_steps": 78105, "loss": 0.2625, "lr": 3.8687829846714915e-06, "epoch": 1.9201715639203636, "percentage": 38.4, "elapsed_time": "1:18:34", "remaining_time": "2:06:01", "throughput": 20021.3, "total_tokens": 94382720}
|
|
{"current_steps": 30000, "total_steps": 78105, "loss": 0.3499, "lr": 3.868315471863537e-06, "epoch": 1.9204916458613406, "percentage": 38.41, "elapsed_time": "1:18:34", "remaining_time": "2:06:00", "throughput": 20021.77, "total_tokens": 94398528}
|
|
{"current_steps": 30005, "total_steps": 78105, "loss": 0.353, "lr": 3.8678478907290365e-06, "epoch": 1.9208117278023173, "percentage": 38.42, "elapsed_time": "1:18:35", "remaining_time": "2:05:59", "throughput": 20022.19, "total_tokens": 94413568}
|
|
{"current_steps": 30010, "total_steps": 78105, "loss": 0.295, "lr": 3.8673802412913355e-06, "epoch": 1.9211318097432943, "percentage": 38.42, "elapsed_time": "1:18:36", "remaining_time": "2:05:58", "throughput": 20022.67, "total_tokens": 94429376}
|
|
{"current_steps": 30015, "total_steps": 78105, "loss": 0.3622, "lr": 3.8669125235737894e-06, "epoch": 1.921451891684271, "percentage": 38.43, "elapsed_time": "1:18:36", "remaining_time": "2:05:57", "throughput": 20023.11, "total_tokens": 94444608}
|
|
{"current_steps": 30020, "total_steps": 78105, "loss": 0.2796, "lr": 3.86644473759975e-06, "epoch": 1.921771973625248, "percentage": 38.44, "elapsed_time": "1:18:37", "remaining_time": "2:05:56", "throughput": 20023.55, "total_tokens": 94460224}
|
|
{"current_steps": 30025, "total_steps": 78105, "loss": 0.3065, "lr": 3.8659768833925795e-06, "epoch": 1.922092055566225, "percentage": 38.44, "elapsed_time": "1:18:38", "remaining_time": "2:05:55", "throughput": 20023.99, "total_tokens": 94476032}
|
|
{"current_steps": 30030, "total_steps": 78105, "loss": 0.2967, "lr": 3.865508960975638e-06, "epoch": 1.9224121375072019, "percentage": 38.45, "elapsed_time": "1:18:38", "remaining_time": "2:05:54", "throughput": 20024.24, "total_tokens": 94491264}
|
|
{"current_steps": 30035, "total_steps": 78105, "loss": 0.4575, "lr": 3.865040970372291e-06, "epoch": 1.9227322194481786, "percentage": 38.45, "elapsed_time": "1:18:39", "remaining_time": "2:05:53", "throughput": 20024.62, "total_tokens": 94505920}
|
|
{"current_steps": 30040, "total_steps": 78105, "loss": 0.3527, "lr": 3.86457291160591e-06, "epoch": 1.9230523013891556, "percentage": 38.46, "elapsed_time": "1:18:40", "remaining_time": "2:05:52", "throughput": 20025.05, "total_tokens": 94521216}
|
|
{"current_steps": 30045, "total_steps": 78105, "loss": 0.4377, "lr": 3.864104784699864e-06, "epoch": 1.9233723833301326, "percentage": 38.47, "elapsed_time": "1:18:40", "remaining_time": "2:05:51", "throughput": 20025.44, "total_tokens": 94536064}
|
|
{"current_steps": 30050, "total_steps": 78105, "loss": 0.3294, "lr": 3.86363658967753e-06, "epoch": 1.9236924652711094, "percentage": 38.47, "elapsed_time": "1:18:41", "remaining_time": "2:05:50", "throughput": 20025.88, "total_tokens": 94551808}
|
|
{"current_steps": 30055, "total_steps": 78105, "loss": 0.2252, "lr": 3.863168326562289e-06, "epoch": 1.9240125472120861, "percentage": 38.48, "elapsed_time": "1:18:42", "remaining_time": "2:05:49", "throughput": 20026.29, "total_tokens": 94567104}
|
|
{"current_steps": 30060, "total_steps": 78105, "loss": 0.3768, "lr": 3.862699995377521e-06, "epoch": 1.9243326291530631, "percentage": 38.49, "elapsed_time": "1:18:42", "remaining_time": "2:05:48", "throughput": 20027.0, "total_tokens": 94585536}
|
|
{"current_steps": 30065, "total_steps": 78105, "loss": 0.296, "lr": 3.862231596146614e-06, "epoch": 1.9246527110940401, "percentage": 38.49, "elapsed_time": "1:18:43", "remaining_time": "2:05:47", "throughput": 20027.36, "total_tokens": 94600448}
|
|
{"current_steps": 30070, "total_steps": 78105, "loss": 0.42, "lr": 3.861763128892957e-06, "epoch": 1.9249727930350171, "percentage": 38.5, "elapsed_time": "1:18:44", "remaining_time": "2:05:46", "throughput": 20027.8, "total_tokens": 94615872}
|
|
{"current_steps": 30075, "total_steps": 78105, "loss": 0.4731, "lr": 3.861294593639942e-06, "epoch": 1.9252928749759939, "percentage": 38.51, "elapsed_time": "1:18:44", "remaining_time": "2:05:45", "throughput": 20028.31, "total_tokens": 94631744}
|
|
{"current_steps": 30080, "total_steps": 78105, "loss": 0.3117, "lr": 3.860825990410966e-06, "epoch": 1.9256129569169707, "percentage": 38.51, "elapsed_time": "1:18:45", "remaining_time": "2:05:44", "throughput": 20028.77, "total_tokens": 94647552}
|
|
{"current_steps": 30085, "total_steps": 78105, "loss": 0.2447, "lr": 3.860357319229428e-06, "epoch": 1.9259330388579476, "percentage": 38.52, "elapsed_time": "1:18:46", "remaining_time": "2:05:43", "throughput": 20029.32, "total_tokens": 94664384}
|
|
{"current_steps": 30090, "total_steps": 78105, "loss": 0.3645, "lr": 3.859888580118732e-06, "epoch": 1.9262531207989246, "percentage": 38.53, "elapsed_time": "1:18:46", "remaining_time": "2:05:42", "throughput": 20029.68, "total_tokens": 94679296}
|
|
{"current_steps": 30095, "total_steps": 78105, "loss": 0.303, "lr": 3.859419773102283e-06, "epoch": 1.9265732027399014, "percentage": 38.53, "elapsed_time": "1:18:47", "remaining_time": "2:05:41", "throughput": 20030.19, "total_tokens": 94695424}
|
|
{"current_steps": 30100, "total_steps": 78105, "loss": 0.3463, "lr": 3.858950898203491e-06, "epoch": 1.9268932846808782, "percentage": 38.54, "elapsed_time": "1:18:48", "remaining_time": "2:05:40", "throughput": 20030.7, "total_tokens": 94711872}
|
|
{"current_steps": 30105, "total_steps": 78105, "loss": 0.2347, "lr": 3.858481955445771e-06, "epoch": 1.9272133666218552, "percentage": 38.54, "elapsed_time": "1:18:49", "remaining_time": "2:05:40", "throughput": 20031.21, "total_tokens": 94728320}
|
|
{"current_steps": 30110, "total_steps": 78105, "loss": 0.4611, "lr": 3.8580129448525375e-06, "epoch": 1.9275334485628322, "percentage": 38.55, "elapsed_time": "1:18:49", "remaining_time": "2:05:39", "throughput": 20031.6, "total_tokens": 94743488}
|
|
{"current_steps": 30115, "total_steps": 78105, "loss": 0.2032, "lr": 3.857543866447211e-06, "epoch": 1.927853530503809, "percentage": 38.56, "elapsed_time": "1:18:50", "remaining_time": "2:05:38", "throughput": 20032.03, "total_tokens": 94758912}
|
|
{"current_steps": 30120, "total_steps": 78105, "loss": 0.267, "lr": 3.857074720253215e-06, "epoch": 1.928173612444786, "percentage": 38.56, "elapsed_time": "1:18:51", "remaining_time": "2:05:37", "throughput": 20032.47, "total_tokens": 94774720}
|
|
{"current_steps": 30125, "total_steps": 78105, "loss": 0.2482, "lr": 3.856605506293977e-06, "epoch": 1.9284936943857627, "percentage": 38.57, "elapsed_time": "1:18:51", "remaining_time": "2:05:36", "throughput": 20032.99, "total_tokens": 94791296}
|
|
{"current_steps": 30130, "total_steps": 78105, "loss": 0.4299, "lr": 3.856136224592926e-06, "epoch": 1.9288137763267397, "percentage": 38.58, "elapsed_time": "1:18:52", "remaining_time": "2:05:35", "throughput": 20033.56, "total_tokens": 94808320}
|
|
{"current_steps": 30135, "total_steps": 78105, "loss": 0.3139, "lr": 3.855666875173496e-06, "epoch": 1.9291338582677167, "percentage": 38.58, "elapsed_time": "1:18:53", "remaining_time": "2:05:34", "throughput": 20034.02, "total_tokens": 94824192}
|
|
{"current_steps": 30140, "total_steps": 78105, "loss": 0.3475, "lr": 3.855197458059122e-06, "epoch": 1.9294539402086934, "percentage": 38.59, "elapsed_time": "1:18:53", "remaining_time": "2:05:33", "throughput": 20034.51, "total_tokens": 94839808}
|
|
{"current_steps": 30145, "total_steps": 78105, "loss": 0.2858, "lr": 3.854727973273248e-06, "epoch": 1.9297740221496702, "percentage": 38.6, "elapsed_time": "1:18:54", "remaining_time": "2:05:32", "throughput": 20034.96, "total_tokens": 94855424}
|
|
{"current_steps": 30150, "total_steps": 78105, "loss": 0.2617, "lr": 3.854258420839315e-06, "epoch": 1.9300941040906472, "percentage": 38.6, "elapsed_time": "1:18:57", "remaining_time": "2:05:34", "throughput": 20026.69, "total_tokens": 94872000}
|
|
{"current_steps": 30155, "total_steps": 78105, "loss": 0.3139, "lr": 3.85378880078077e-06, "epoch": 1.9304141860316242, "percentage": 38.61, "elapsed_time": "1:18:57", "remaining_time": "2:05:33", "throughput": 20027.23, "total_tokens": 94888512}
|
|
{"current_steps": 30160, "total_steps": 78105, "loss": 0.3184, "lr": 3.853319113121064e-06, "epoch": 1.930734267972601, "percentage": 38.61, "elapsed_time": "1:18:58", "remaining_time": "2:05:33", "throughput": 20027.68, "total_tokens": 94904640}
|
|
{"current_steps": 30165, "total_steps": 78105, "loss": 0.3605, "lr": 3.85284935788365e-06, "epoch": 1.9310543499135777, "percentage": 38.62, "elapsed_time": "1:18:59", "remaining_time": "2:05:32", "throughput": 20028.19, "total_tokens": 94920384}
|
|
{"current_steps": 30170, "total_steps": 78105, "loss": 0.3403, "lr": 3.852379535091987e-06, "epoch": 1.9313744318545547, "percentage": 38.63, "elapsed_time": "1:18:59", "remaining_time": "2:05:31", "throughput": 20028.6, "total_tokens": 94935424}
|
|
{"current_steps": 30175, "total_steps": 78105, "loss": 0.4861, "lr": 3.851909644769534e-06, "epoch": 1.9316945137955317, "percentage": 38.63, "elapsed_time": "1:19:00", "remaining_time": "2:05:30", "throughput": 20029.02, "total_tokens": 94950656}
|
|
{"current_steps": 30180, "total_steps": 78105, "loss": 0.2875, "lr": 3.851439686939755e-06, "epoch": 1.9320145957365087, "percentage": 38.64, "elapsed_time": "1:19:01", "remaining_time": "2:05:29", "throughput": 20029.51, "total_tokens": 94966976}
|
|
{"current_steps": 30185, "total_steps": 78105, "loss": 0.273, "lr": 3.8509696616261175e-06, "epoch": 1.9323346776774855, "percentage": 38.65, "elapsed_time": "1:19:02", "remaining_time": "2:05:28", "throughput": 20029.96, "total_tokens": 94982336}
|
|
{"current_steps": 30190, "total_steps": 78105, "loss": 0.4421, "lr": 3.850499568852092e-06, "epoch": 1.9326547596184622, "percentage": 38.65, "elapsed_time": "1:19:02", "remaining_time": "2:05:27", "throughput": 20030.39, "total_tokens": 94997568}
|
|
{"current_steps": 30195, "total_steps": 78105, "loss": 0.384, "lr": 3.850029408641153e-06, "epoch": 1.9329748415594392, "percentage": 38.66, "elapsed_time": "1:19:03", "remaining_time": "2:05:26", "throughput": 20030.8, "total_tokens": 95012608}
|
|
{"current_steps": 30200, "total_steps": 78105, "loss": 0.423, "lr": 3.849559181016777e-06, "epoch": 1.9332949235004162, "percentage": 38.67, "elapsed_time": "1:19:04", "remaining_time": "2:05:25", "throughput": 20031.26, "total_tokens": 95028544}
|
|
{"current_steps": 30205, "total_steps": 78105, "loss": 0.3546, "lr": 3.849088886002445e-06, "epoch": 1.933615005441393, "percentage": 38.67, "elapsed_time": "1:19:04", "remaining_time": "2:05:24", "throughput": 20031.66, "total_tokens": 95043648}
|
|
{"current_steps": 30210, "total_steps": 78105, "loss": 0.4762, "lr": 3.848618523621642e-06, "epoch": 1.9339350873823697, "percentage": 38.68, "elapsed_time": "1:19:05", "remaining_time": "2:05:23", "throughput": 20032.06, "total_tokens": 95059136}
|
|
{"current_steps": 30215, "total_steps": 78105, "loss": 0.3033, "lr": 3.8481480938978536e-06, "epoch": 1.9342551693233467, "percentage": 38.69, "elapsed_time": "1:19:06", "remaining_time": "2:05:22", "throughput": 20032.54, "total_tokens": 95075456}
|
|
{"current_steps": 30220, "total_steps": 78105, "loss": 0.2361, "lr": 3.847677596854572e-06, "epoch": 1.9345752512643237, "percentage": 38.69, "elapsed_time": "1:19:06", "remaining_time": "2:05:21", "throughput": 20032.98, "total_tokens": 95090944}
|
|
{"current_steps": 30225, "total_steps": 78105, "loss": 0.4519, "lr": 3.84720703251529e-06, "epoch": 1.9348953332053007, "percentage": 38.7, "elapsed_time": "1:19:08", "remaining_time": "2:05:21", "throughput": 20030.46, "total_tokens": 95106560}
|
|
{"current_steps": 30230, "total_steps": 78105, "loss": 0.2382, "lr": 3.846736400903507e-06, "epoch": 1.9352154151462775, "percentage": 38.7, "elapsed_time": "1:19:09", "remaining_time": "2:05:21", "throughput": 20027.5, "total_tokens": 95122560}
|
|
{"current_steps": 30235, "total_steps": 78105, "loss": 0.2277, "lr": 3.846265702042724e-06, "epoch": 1.9355354970872543, "percentage": 38.71, "elapsed_time": "1:19:10", "remaining_time": "2:05:20", "throughput": 20027.87, "total_tokens": 95137344}
|
|
{"current_steps": 30240, "total_steps": 78105, "loss": 0.3356, "lr": 3.845794935956443e-06, "epoch": 1.9358555790282312, "percentage": 38.72, "elapsed_time": "1:19:10", "remaining_time": "2:05:19", "throughput": 20028.35, "total_tokens": 95153216}
|
|
{"current_steps": 30245, "total_steps": 78105, "loss": 0.2712, "lr": 3.845324102668173e-06, "epoch": 1.9361756609692082, "percentage": 38.72, "elapsed_time": "1:19:11", "remaining_time": "2:05:19", "throughput": 20028.79, "total_tokens": 95169088}
|
|
{"current_steps": 30250, "total_steps": 78105, "loss": 0.3282, "lr": 3.844853202201425e-06, "epoch": 1.936495742910185, "percentage": 38.73, "elapsed_time": "1:19:13", "remaining_time": "2:05:20", "throughput": 20023.49, "total_tokens": 95184256}
|
|
{"current_steps": 30255, "total_steps": 78105, "loss": 0.3786, "lr": 3.844382234579714e-06, "epoch": 1.9368158248511618, "percentage": 38.74, "elapsed_time": "1:19:14", "remaining_time": "2:05:19", "throughput": 20023.98, "total_tokens": 95200640}
|
|
{"current_steps": 30260, "total_steps": 78105, "loss": 0.2359, "lr": 3.843911199826556e-06, "epoch": 1.9371359067921388, "percentage": 38.74, "elapsed_time": "1:19:14", "remaining_time": "2:05:18", "throughput": 20024.32, "total_tokens": 95215232}
|
|
{"current_steps": 30265, "total_steps": 78105, "loss": 0.3188, "lr": 3.843440097965473e-06, "epoch": 1.9374559887331158, "percentage": 38.75, "elapsed_time": "1:19:15", "remaining_time": "2:05:17", "throughput": 20024.72, "total_tokens": 95230208}
|
|
{"current_steps": 30270, "total_steps": 78105, "loss": 0.4323, "lr": 3.84296892901999e-06, "epoch": 1.9377760706740925, "percentage": 38.76, "elapsed_time": "1:19:16", "remaining_time": "2:05:16", "throughput": 20025.1, "total_tokens": 95245056}
|
|
{"current_steps": 30275, "total_steps": 78105, "loss": 0.278, "lr": 3.842497693013632e-06, "epoch": 1.9380961526150695, "percentage": 38.76, "elapsed_time": "1:19:16", "remaining_time": "2:05:15", "throughput": 20025.55, "total_tokens": 95260992}
|
|
{"current_steps": 30280, "total_steps": 78105, "loss": 0.2226, "lr": 3.842026389969933e-06, "epoch": 1.9384162345560463, "percentage": 38.77, "elapsed_time": "1:19:17", "remaining_time": "2:05:14", "throughput": 20026.08, "total_tokens": 95277696}
|
|
{"current_steps": 30285, "total_steps": 78105, "loss": 0.2805, "lr": 3.841555019912427e-06, "epoch": 1.9387363164970233, "percentage": 38.77, "elapsed_time": "1:19:18", "remaining_time": "2:05:13", "throughput": 20026.45, "total_tokens": 95292352}
|
|
{"current_steps": 30290, "total_steps": 78105, "loss": 0.4224, "lr": 3.841083582864651e-06, "epoch": 1.9390563984380003, "percentage": 38.78, "elapsed_time": "1:19:19", "remaining_time": "2:05:12", "throughput": 20026.94, "total_tokens": 95308224}
|
|
{"current_steps": 30295, "total_steps": 78105, "loss": 0.2287, "lr": 3.840612078850146e-06, "epoch": 1.939376480378977, "percentage": 38.79, "elapsed_time": "1:19:19", "remaining_time": "2:05:11", "throughput": 20027.39, "total_tokens": 95323840}
|
|
{"current_steps": 30300, "total_steps": 78105, "loss": 0.3425, "lr": 3.840140507892457e-06, "epoch": 1.9396965623199538, "percentage": 38.79, "elapsed_time": "1:19:20", "remaining_time": "2:05:10", "throughput": 20027.76, "total_tokens": 95338624}
|
|
{"current_steps": 30305, "total_steps": 78105, "loss": 0.2687, "lr": 3.839668870015131e-06, "epoch": 1.9400166442609308, "percentage": 38.8, "elapsed_time": "1:19:20", "remaining_time": "2:05:09", "throughput": 20028.17, "total_tokens": 95353920}
|
|
{"current_steps": 30310, "total_steps": 78105, "loss": 0.2283, "lr": 3.8391971652417206e-06, "epoch": 1.9403367262019078, "percentage": 38.81, "elapsed_time": "1:19:21", "remaining_time": "2:05:08", "throughput": 20028.54, "total_tokens": 95368704}
|
|
{"current_steps": 30315, "total_steps": 78105, "loss": 0.2701, "lr": 3.838725393595779e-06, "epoch": 1.9406568081428845, "percentage": 38.81, "elapsed_time": "1:19:22", "remaining_time": "2:05:07", "throughput": 20028.93, "total_tokens": 95384192}
|
|
{"current_steps": 30320, "total_steps": 78105, "loss": 0.3183, "lr": 3.838253555100865e-06, "epoch": 1.9409768900838613, "percentage": 38.82, "elapsed_time": "1:19:22", "remaining_time": "2:05:06", "throughput": 20029.28, "total_tokens": 95399040}
|
|
{"current_steps": 30325, "total_steps": 78105, "loss": 0.353, "lr": 3.837781649780539e-06, "epoch": 1.9412969720248383, "percentage": 38.83, "elapsed_time": "1:19:23", "remaining_time": "2:05:05", "throughput": 20029.74, "total_tokens": 95414784}
|
|
{"current_steps": 30330, "total_steps": 78105, "loss": 0.3793, "lr": 3.837309677658365e-06, "epoch": 1.9416170539658153, "percentage": 38.83, "elapsed_time": "1:19:24", "remaining_time": "2:05:04", "throughput": 20030.26, "total_tokens": 95431552}
|
|
{"current_steps": 30335, "total_steps": 78105, "loss": 0.3414, "lr": 3.836837638757911e-06, "epoch": 1.9419371359067923, "percentage": 38.84, "elapsed_time": "1:19:25", "remaining_time": "2:05:03", "throughput": 20030.96, "total_tokens": 95450112}
|
|
{"current_steps": 30340, "total_steps": 78105, "loss": 0.4655, "lr": 3.83636553310275e-06, "epoch": 1.942257217847769, "percentage": 38.85, "elapsed_time": "1:19:25", "remaining_time": "2:05:02", "throughput": 20031.37, "total_tokens": 95465344}
|
|
{"current_steps": 30345, "total_steps": 78105, "loss": 0.269, "lr": 3.835893360716454e-06, "epoch": 1.9425772997887458, "percentage": 38.85, "elapsed_time": "1:19:26", "remaining_time": "2:05:01", "throughput": 20031.75, "total_tokens": 95480256}
|
|
{"current_steps": 30350, "total_steps": 78105, "loss": 0.2237, "lr": 3.835421121622603e-06, "epoch": 1.9428973817297228, "percentage": 38.86, "elapsed_time": "1:19:27", "remaining_time": "2:05:00", "throughput": 20032.2, "total_tokens": 95495744}
|
|
{"current_steps": 30355, "total_steps": 78105, "loss": 0.3931, "lr": 3.8349488158447765e-06, "epoch": 1.9432174636706998, "percentage": 38.86, "elapsed_time": "1:19:27", "remaining_time": "2:05:00", "throughput": 20032.74, "total_tokens": 95512704}
|
|
{"current_steps": 30360, "total_steps": 78105, "loss": 0.3496, "lr": 3.83447644340656e-06, "epoch": 1.9435375456116766, "percentage": 38.87, "elapsed_time": "1:19:28", "remaining_time": "2:04:59", "throughput": 20033.22, "total_tokens": 95528832}
|
|
{"current_steps": 30365, "total_steps": 78105, "loss": 0.3195, "lr": 3.834004004331541e-06, "epoch": 1.9438576275526533, "percentage": 38.88, "elapsed_time": "1:19:29", "remaining_time": "2:04:58", "throughput": 20033.55, "total_tokens": 95543360}
|
|
{"current_steps": 30370, "total_steps": 78105, "loss": 0.3097, "lr": 3.83353149864331e-06, "epoch": 1.9441777094936303, "percentage": 38.88, "elapsed_time": "1:19:29", "remaining_time": "2:04:57", "throughput": 20033.95, "total_tokens": 95558720}
|
|
{"current_steps": 30375, "total_steps": 78105, "loss": 0.3635, "lr": 3.833058926365463e-06, "epoch": 1.9444977914346073, "percentage": 38.89, "elapsed_time": "1:19:30", "remaining_time": "2:04:56", "throughput": 20034.4, "total_tokens": 95574592}
|
|
{"current_steps": 30380, "total_steps": 78105, "loss": 0.3275, "lr": 3.8325862875215956e-06, "epoch": 1.944817873375584, "percentage": 38.9, "elapsed_time": "1:19:31", "remaining_time": "2:04:55", "throughput": 20034.82, "total_tokens": 95589952}
|
|
{"current_steps": 30385, "total_steps": 78105, "loss": 0.3504, "lr": 3.8321135821353114e-06, "epoch": 1.945137955316561, "percentage": 38.9, "elapsed_time": "1:19:31", "remaining_time": "2:04:54", "throughput": 20035.35, "total_tokens": 95606784}
|
|
{"current_steps": 30390, "total_steps": 78105, "loss": 0.4305, "lr": 3.831640810230213e-06, "epoch": 1.9454580372575379, "percentage": 38.91, "elapsed_time": "1:19:32", "remaining_time": "2:04:53", "throughput": 20035.78, "total_tokens": 95622464}
|
|
{"current_steps": 30395, "total_steps": 78105, "loss": 0.2161, "lr": 3.831167971829909e-06, "epoch": 1.9457781191985148, "percentage": 38.92, "elapsed_time": "1:19:33", "remaining_time": "2:04:52", "throughput": 20036.34, "total_tokens": 95639744}
|
|
{"current_steps": 30400, "total_steps": 78105, "loss": 0.3016, "lr": 3.8306950669580105e-06, "epoch": 1.9460982011394918, "percentage": 38.92, "elapsed_time": "1:19:34", "remaining_time": "2:04:51", "throughput": 20036.84, "total_tokens": 95656320}
|
|
{"current_steps": 30405, "total_steps": 78105, "loss": 0.2999, "lr": 3.8302220956381305e-06, "epoch": 1.9464182830804686, "percentage": 38.93, "elapsed_time": "1:19:34", "remaining_time": "2:04:50", "throughput": 20037.23, "total_tokens": 95671552}
|
|
{"current_steps": 30410, "total_steps": 78105, "loss": 0.3643, "lr": 3.829749057893889e-06, "epoch": 1.9467383650214454, "percentage": 38.93, "elapsed_time": "1:19:35", "remaining_time": "2:04:49", "throughput": 20037.68, "total_tokens": 95687360}
|
|
{"current_steps": 30415, "total_steps": 78105, "loss": 0.3055, "lr": 3.829275953748906e-06, "epoch": 1.9470584469624224, "percentage": 38.94, "elapsed_time": "1:19:36", "remaining_time": "2:04:48", "throughput": 20038.11, "total_tokens": 95702848}
|
|
{"current_steps": 30420, "total_steps": 78105, "loss": 0.2595, "lr": 3.828802783226805e-06, "epoch": 1.9473785289033994, "percentage": 38.95, "elapsed_time": "1:19:36", "remaining_time": "2:04:47", "throughput": 20038.61, "total_tokens": 95719040}
|
|
{"current_steps": 30425, "total_steps": 78105, "loss": 0.3429, "lr": 3.828329546351216e-06, "epoch": 1.9476986108443761, "percentage": 38.95, "elapsed_time": "1:19:37", "remaining_time": "2:04:46", "throughput": 20039.1, "total_tokens": 95735360}
|
|
{"current_steps": 30430, "total_steps": 78105, "loss": 0.3531, "lr": 3.827856243145768e-06, "epoch": 1.948018692785353, "percentage": 38.96, "elapsed_time": "1:19:38", "remaining_time": "2:04:45", "throughput": 20039.6, "total_tokens": 95751616}
|
|
{"current_steps": 30435, "total_steps": 78105, "loss": 0.5013, "lr": 3.827382873634095e-06, "epoch": 1.9483387747263299, "percentage": 38.97, "elapsed_time": "1:19:38", "remaining_time": "2:04:44", "throughput": 20040.02, "total_tokens": 95767168}
|
|
{"current_steps": 30440, "total_steps": 78105, "loss": 0.372, "lr": 3.826909437839836e-06, "epoch": 1.9486588566673069, "percentage": 38.97, "elapsed_time": "1:19:39", "remaining_time": "2:04:44", "throughput": 20040.4, "total_tokens": 95782464}
|
|
{"current_steps": 30445, "total_steps": 78105, "loss": 0.399, "lr": 3.82643593578663e-06, "epoch": 1.9489789386082839, "percentage": 38.98, "elapsed_time": "1:19:40", "remaining_time": "2:04:43", "throughput": 20040.83, "total_tokens": 95798400}
|
|
{"current_steps": 30450, "total_steps": 78105, "loss": 0.3005, "lr": 3.825962367498124e-06, "epoch": 1.9492990205492606, "percentage": 38.99, "elapsed_time": "1:19:40", "remaining_time": "2:04:42", "throughput": 20041.33, "total_tokens": 95814912}
|
|
{"current_steps": 30455, "total_steps": 78105, "loss": 0.2929, "lr": 3.825488732997963e-06, "epoch": 1.9496191024902374, "percentage": 38.99, "elapsed_time": "1:19:41", "remaining_time": "2:04:41", "throughput": 20041.8, "total_tokens": 95831232}
|
|
{"current_steps": 30460, "total_steps": 78105, "loss": 0.3082, "lr": 3.8250150323098e-06, "epoch": 1.9499391844312144, "percentage": 39.0, "elapsed_time": "1:19:42", "remaining_time": "2:04:40", "throughput": 20042.26, "total_tokens": 95847232}
|
|
{"current_steps": 30465, "total_steps": 78105, "loss": 0.223, "lr": 3.824541265457287e-06, "epoch": 1.9502592663721914, "percentage": 39.01, "elapsed_time": "1:19:42", "remaining_time": "2:04:39", "throughput": 20042.72, "total_tokens": 95863040}
|
|
{"current_steps": 30470, "total_steps": 78105, "loss": 0.3478, "lr": 3.824067432464083e-06, "epoch": 1.9505793483131681, "percentage": 39.01, "elapsed_time": "1:19:43", "remaining_time": "2:04:38", "throughput": 20043.13, "total_tokens": 95879040}
|
|
{"current_steps": 30475, "total_steps": 78105, "loss": 0.3021, "lr": 3.823593533353849e-06, "epoch": 1.950899430254145, "percentage": 39.02, "elapsed_time": "1:19:44", "remaining_time": "2:04:37", "throughput": 20043.61, "total_tokens": 95895360}
|
|
{"current_steps": 30480, "total_steps": 78105, "loss": 0.3454, "lr": 3.823119568150247e-06, "epoch": 1.951219512195122, "percentage": 39.02, "elapsed_time": "1:19:45", "remaining_time": "2:04:36", "throughput": 20044.11, "total_tokens": 95911872}
|
|
{"current_steps": 30485, "total_steps": 78105, "loss": 0.4653, "lr": 3.822645536876946e-06, "epoch": 1.951539594136099, "percentage": 39.03, "elapsed_time": "1:19:45", "remaining_time": "2:04:35", "throughput": 20044.59, "total_tokens": 95928256}
|
|
{"current_steps": 30490, "total_steps": 78105, "loss": 0.3173, "lr": 3.822171439557617e-06, "epoch": 1.951859676077076, "percentage": 39.04, "elapsed_time": "1:19:46", "remaining_time": "2:04:34", "throughput": 20045.23, "total_tokens": 95946560}
|
|
{"current_steps": 30495, "total_steps": 78105, "loss": 0.3329, "lr": 3.8216972762159325e-06, "epoch": 1.9521797580180527, "percentage": 39.04, "elapsed_time": "1:19:47", "remaining_time": "2:04:34", "throughput": 20045.76, "total_tokens": 95963520}
|
|
{"current_steps": 30500, "total_steps": 78105, "loss": 0.2395, "lr": 3.821223046875571e-06, "epoch": 1.9524998399590294, "percentage": 39.05, "elapsed_time": "1:19:47", "remaining_time": "2:04:33", "throughput": 20046.21, "total_tokens": 95979200}
|
|
{"current_steps": 30505, "total_steps": 78105, "loss": 0.228, "lr": 3.8207487515602115e-06, "epoch": 1.9528199219000064, "percentage": 39.06, "elapsed_time": "1:19:48", "remaining_time": "2:04:32", "throughput": 20046.61, "total_tokens": 95994368}
|
|
{"current_steps": 30510, "total_steps": 78105, "loss": 0.3221, "lr": 3.82027439029354e-06, "epoch": 1.9531400038409834, "percentage": 39.06, "elapsed_time": "1:19:49", "remaining_time": "2:04:31", "throughput": 20047.27, "total_tokens": 96012992}
|
|
{"current_steps": 30515, "total_steps": 78105, "loss": 0.4123, "lr": 3.819799963099242e-06, "epoch": 1.9534600857819602, "percentage": 39.07, "elapsed_time": "1:19:49", "remaining_time": "2:04:30", "throughput": 20047.67, "total_tokens": 96028288}
|
|
{"current_steps": 30520, "total_steps": 78105, "loss": 0.4322, "lr": 3.819325470001008e-06, "epoch": 1.953780167722937, "percentage": 39.08, "elapsed_time": "1:19:50", "remaining_time": "2:04:29", "throughput": 20048.18, "total_tokens": 96044992}
|
|
{"current_steps": 30525, "total_steps": 78105, "loss": 0.291, "lr": 3.818850911022534e-06, "epoch": 1.954100249663914, "percentage": 39.08, "elapsed_time": "1:19:51", "remaining_time": "2:04:28", "throughput": 20048.59, "total_tokens": 96060544}
|
|
{"current_steps": 30530, "total_steps": 78105, "loss": 0.3012, "lr": 3.818376286187513e-06, "epoch": 1.954420331604891, "percentage": 39.09, "elapsed_time": "1:19:52", "remaining_time": "2:04:27", "throughput": 20049.01, "total_tokens": 96076416}
|
|
{"current_steps": 30535, "total_steps": 78105, "loss": 0.3354, "lr": 3.817901595519648e-06, "epoch": 1.9547404135458677, "percentage": 39.09, "elapsed_time": "1:19:52", "remaining_time": "2:04:26", "throughput": 20049.35, "total_tokens": 96091136}
|
|
{"current_steps": 30540, "total_steps": 78105, "loss": 0.2579, "lr": 3.817426839042643e-06, "epoch": 1.9550604954868447, "percentage": 39.1, "elapsed_time": "1:19:53", "remaining_time": "2:04:25", "throughput": 20049.73, "total_tokens": 96106304}
|
|
{"current_steps": 30545, "total_steps": 78105, "loss": 0.2388, "lr": 3.816952016780203e-06, "epoch": 1.9553805774278215, "percentage": 39.11, "elapsed_time": "1:19:54", "remaining_time": "2:04:24", "throughput": 20050.18, "total_tokens": 96122624}
|
|
{"current_steps": 30550, "total_steps": 78105, "loss": 0.3569, "lr": 3.8164771287560394e-06, "epoch": 1.9557006593687984, "percentage": 39.11, "elapsed_time": "1:19:54", "remaining_time": "2:04:23", "throughput": 20050.64, "total_tokens": 96138688}
|
|
{"current_steps": 30555, "total_steps": 78105, "loss": 0.3053, "lr": 3.816002174993865e-06, "epoch": 1.9560207413097754, "percentage": 39.12, "elapsed_time": "1:19:55", "remaining_time": "2:04:22", "throughput": 20051.08, "total_tokens": 96154560}
|
|
{"current_steps": 30560, "total_steps": 78105, "loss": 0.2488, "lr": 3.815527155517397e-06, "epoch": 1.9563408232507522, "percentage": 39.13, "elapsed_time": "1:19:56", "remaining_time": "2:04:21", "throughput": 20051.52, "total_tokens": 96170624}
|
|
{"current_steps": 30565, "total_steps": 78105, "loss": 0.4322, "lr": 3.815052070350355e-06, "epoch": 1.956660905191729, "percentage": 39.13, "elapsed_time": "1:19:56", "remaining_time": "2:04:20", "throughput": 20051.94, "total_tokens": 96185984}
|
|
{"current_steps": 30570, "total_steps": 78105, "loss": 0.3333, "lr": 3.8145769195164628e-06, "epoch": 1.956980987132706, "percentage": 39.14, "elapsed_time": "1:19:57", "remaining_time": "2:04:19", "throughput": 20052.44, "total_tokens": 96202368}
|
|
{"current_steps": 30575, "total_steps": 78105, "loss": 0.2573, "lr": 3.8141017030394454e-06, "epoch": 1.957301069073683, "percentage": 39.15, "elapsed_time": "1:19:58", "remaining_time": "2:04:19", "throughput": 20052.94, "total_tokens": 96218816}
|
|
{"current_steps": 30580, "total_steps": 78105, "loss": 0.3146, "lr": 3.813626420943035e-06, "epoch": 1.9576211510146597, "percentage": 39.15, "elapsed_time": "1:19:58", "remaining_time": "2:04:18", "throughput": 20053.28, "total_tokens": 96233472}
|
|
{"current_steps": 30585, "total_steps": 78105, "loss": 0.3778, "lr": 3.8131510732509636e-06, "epoch": 1.9579412329556365, "percentage": 39.16, "elapsed_time": "1:19:59", "remaining_time": "2:04:17", "throughput": 20053.62, "total_tokens": 96248128}
|
|
{"current_steps": 30590, "total_steps": 78105, "loss": 0.3476, "lr": 3.8126756599869673e-06, "epoch": 1.9582613148966135, "percentage": 39.17, "elapsed_time": "1:20:00", "remaining_time": "2:04:16", "throughput": 20053.99, "total_tokens": 96263296}
|
|
{"current_steps": 30595, "total_steps": 78105, "loss": 0.2856, "lr": 3.812200181174786e-06, "epoch": 1.9585813968375905, "percentage": 39.17, "elapsed_time": "1:20:00", "remaining_time": "2:04:15", "throughput": 20054.38, "total_tokens": 96278400}
|
|
{"current_steps": 30600, "total_steps": 78105, "loss": 0.3761, "lr": 3.8117246368381627e-06, "epoch": 1.9589014787785675, "percentage": 39.18, "elapsed_time": "1:20:01", "remaining_time": "2:04:14", "throughput": 20054.8, "total_tokens": 96294080}
|
|
{"current_steps": 30605, "total_steps": 78105, "loss": 0.271, "lr": 3.811249027000844e-06, "epoch": 1.9592215607195442, "percentage": 39.18, "elapsed_time": "1:20:02", "remaining_time": "2:04:13", "throughput": 20055.11, "total_tokens": 96308416}
|
|
{"current_steps": 30610, "total_steps": 78105, "loss": 0.2707, "lr": 3.8107733516865787e-06, "epoch": 1.959541642660521, "percentage": 39.19, "elapsed_time": "1:20:02", "remaining_time": "2:04:12", "throughput": 20055.57, "total_tokens": 96324672}
|
|
{"current_steps": 30615, "total_steps": 78105, "loss": 0.5187, "lr": 3.810297610919119e-06, "epoch": 1.959861724601498, "percentage": 39.2, "elapsed_time": "1:20:03", "remaining_time": "2:04:11", "throughput": 20056.0, "total_tokens": 96340416}
|
|
{"current_steps": 30620, "total_steps": 78105, "loss": 0.3842, "lr": 3.8098218047222225e-06, "epoch": 1.960181806542475, "percentage": 39.2, "elapsed_time": "1:20:04", "remaining_time": "2:04:10", "throughput": 20056.4, "total_tokens": 96355584}
|
|
{"current_steps": 30625, "total_steps": 78105, "loss": 0.4289, "lr": 3.8093459331196474e-06, "epoch": 1.9605018884834517, "percentage": 39.21, "elapsed_time": "1:20:04", "remaining_time": "2:04:09", "throughput": 20056.79, "total_tokens": 96370752}
|
|
{"current_steps": 30630, "total_steps": 78105, "loss": 0.3577, "lr": 3.8088699961351573e-06, "epoch": 1.9608219704244285, "percentage": 39.22, "elapsed_time": "1:20:05", "remaining_time": "2:04:08", "throughput": 20057.32, "total_tokens": 96387136}
|
|
{"current_steps": 30635, "total_steps": 78105, "loss": 0.3082, "lr": 3.8083939937925157e-06, "epoch": 1.9611420523654055, "percentage": 39.22, "elapsed_time": "1:20:06", "remaining_time": "2:04:07", "throughput": 20057.73, "total_tokens": 96402560}
|
|
{"current_steps": 30640, "total_steps": 78105, "loss": 0.3179, "lr": 3.8079179261154942e-06, "epoch": 1.9614621343063825, "percentage": 39.23, "elapsed_time": "1:20:06", "remaining_time": "2:04:06", "throughput": 20058.32, "total_tokens": 96420288}
|
|
{"current_steps": 30645, "total_steps": 78105, "loss": 0.2698, "lr": 3.807441793127864e-06, "epoch": 1.9617822162473595, "percentage": 39.24, "elapsed_time": "1:20:07", "remaining_time": "2:04:05", "throughput": 20058.83, "total_tokens": 96436800}
|
|
{"current_steps": 30650, "total_steps": 78105, "loss": 0.2348, "lr": 3.8069655948534006e-06, "epoch": 1.9621022981883363, "percentage": 39.24, "elapsed_time": "1:20:08", "remaining_time": "2:04:04", "throughput": 20059.24, "total_tokens": 96451968}
|
|
{"current_steps": 30655, "total_steps": 78105, "loss": 0.4277, "lr": 3.8064893313158834e-06, "epoch": 1.962422380129313, "percentage": 39.25, "elapsed_time": "1:20:09", "remaining_time": "2:04:03", "throughput": 20059.69, "total_tokens": 96467840}
|
|
{"current_steps": 30660, "total_steps": 78105, "loss": 0.5217, "lr": 3.8060130025390946e-06, "epoch": 1.96274246207029, "percentage": 39.25, "elapsed_time": "1:20:09", "remaining_time": "2:04:02", "throughput": 20060.15, "total_tokens": 96483392}
|
|
{"current_steps": 30665, "total_steps": 78105, "loss": 0.2882, "lr": 3.805536608546819e-06, "epoch": 1.963062544011267, "percentage": 39.26, "elapsed_time": "1:20:10", "remaining_time": "2:04:01", "throughput": 20060.67, "total_tokens": 96500032}
|
|
{"current_steps": 30670, "total_steps": 78105, "loss": 0.5469, "lr": 3.805060149362846e-06, "epoch": 1.9633826259522438, "percentage": 39.27, "elapsed_time": "1:20:11", "remaining_time": "2:04:00", "throughput": 20061.13, "total_tokens": 96515904}
|
|
{"current_steps": 30675, "total_steps": 78105, "loss": 0.2334, "lr": 3.804583625010966e-06, "epoch": 1.9637027078932205, "percentage": 39.27, "elapsed_time": "1:20:11", "remaining_time": "2:03:59", "throughput": 20061.53, "total_tokens": 96531264}
|
|
{"current_steps": 30680, "total_steps": 78105, "loss": 0.3117, "lr": 3.804107035514975e-06, "epoch": 1.9640227898341975, "percentage": 39.28, "elapsed_time": "1:20:12", "remaining_time": "2:03:59", "throughput": 20061.94, "total_tokens": 96546816}
|
|
{"current_steps": 30685, "total_steps": 78105, "loss": 0.3487, "lr": 3.803630380898672e-06, "epoch": 1.9643428717751745, "percentage": 39.29, "elapsed_time": "1:20:13", "remaining_time": "2:03:58", "throughput": 20062.3, "total_tokens": 96561408}
|
|
{"current_steps": 30690, "total_steps": 78105, "loss": 0.3256, "lr": 3.803153661185858e-06, "epoch": 1.9646629537161513, "percentage": 39.29, "elapsed_time": "1:20:13", "remaining_time": "2:03:57", "throughput": 20062.78, "total_tokens": 96577152}
|
|
{"current_steps": 30695, "total_steps": 78105, "loss": 0.3654, "lr": 3.802676876400338e-06, "epoch": 1.964983035657128, "percentage": 39.3, "elapsed_time": "1:20:14", "remaining_time": "2:03:56", "throughput": 20063.23, "total_tokens": 96592960}
|
|
{"current_steps": 30700, "total_steps": 78105, "loss": 0.5017, "lr": 3.802200026565921e-06, "epoch": 1.965303117598105, "percentage": 39.31, "elapsed_time": "1:20:15", "remaining_time": "2:03:55", "throughput": 20063.62, "total_tokens": 96608192}
|
|
{"current_steps": 30705, "total_steps": 78105, "loss": 0.284, "lr": 3.8017231117064166e-06, "epoch": 1.965623199539082, "percentage": 39.31, "elapsed_time": "1:20:15", "remaining_time": "2:03:54", "throughput": 20064.08, "total_tokens": 96623680}
|
|
{"current_steps": 30710, "total_steps": 78105, "loss": 0.3915, "lr": 3.8012461318456407e-06, "epoch": 1.965943281480059, "percentage": 39.32, "elapsed_time": "1:20:16", "remaining_time": "2:03:53", "throughput": 20064.48, "total_tokens": 96638912}
|
|
{"current_steps": 30715, "total_steps": 78105, "loss": 0.3317, "lr": 3.800769087007411e-06, "epoch": 1.9662633634210358, "percentage": 39.33, "elapsed_time": "1:20:17", "remaining_time": "2:03:52", "throughput": 20065.02, "total_tokens": 96655744}
|
|
{"current_steps": 30720, "total_steps": 78105, "loss": 0.2268, "lr": 3.8002919772155477e-06, "epoch": 1.9665834453620126, "percentage": 39.33, "elapsed_time": "1:20:17", "remaining_time": "2:03:51", "throughput": 20065.47, "total_tokens": 96671616}
|
|
{"current_steps": 30725, "total_steps": 78105, "loss": 0.3062, "lr": 3.7998148024938775e-06, "epoch": 1.9669035273029896, "percentage": 39.34, "elapsed_time": "1:20:18", "remaining_time": "2:03:50", "throughput": 20065.86, "total_tokens": 96686976}
|
|
{"current_steps": 30730, "total_steps": 78105, "loss": 0.3562, "lr": 3.799337562866226e-06, "epoch": 1.9672236092439666, "percentage": 39.34, "elapsed_time": "1:20:19", "remaining_time": "2:03:49", "throughput": 20066.39, "total_tokens": 96703744}
|
|
{"current_steps": 30735, "total_steps": 78105, "loss": 0.4155, "lr": 3.7988602583564236e-06, "epoch": 1.9675436911849433, "percentage": 39.35, "elapsed_time": "1:20:19", "remaining_time": "2:03:48", "throughput": 20066.81, "total_tokens": 96719488}
|
|
{"current_steps": 30740, "total_steps": 78105, "loss": 0.3041, "lr": 3.7983828889883067e-06, "epoch": 1.96786377312592, "percentage": 39.36, "elapsed_time": "1:20:20", "remaining_time": "2:03:47", "throughput": 20067.37, "total_tokens": 96736448}
|
|
{"current_steps": 30745, "total_steps": 78105, "loss": 0.3438, "lr": 3.797905454785711e-06, "epoch": 1.968183855066897, "percentage": 39.36, "elapsed_time": "1:20:21", "remaining_time": "2:03:46", "throughput": 20067.85, "total_tokens": 96752960}
|
|
{"current_steps": 30750, "total_steps": 78105, "loss": 0.4202, "lr": 3.797427955772477e-06, "epoch": 1.968503937007874, "percentage": 39.37, "elapsed_time": "1:20:21", "remaining_time": "2:03:45", "throughput": 20068.22, "total_tokens": 96768192}
|
|
{"current_steps": 30755, "total_steps": 78105, "loss": 0.2469, "lr": 3.7969503919724493e-06, "epoch": 1.968824018948851, "percentage": 39.38, "elapsed_time": "1:20:22", "remaining_time": "2:03:44", "throughput": 20068.6, "total_tokens": 96783296}
|
|
{"current_steps": 30760, "total_steps": 78105, "loss": 0.3581, "lr": 3.796472763409475e-06, "epoch": 1.9691441008898278, "percentage": 39.38, "elapsed_time": "1:20:23", "remaining_time": "2:03:43", "throughput": 20069.05, "total_tokens": 96798976}
|
|
{"current_steps": 30765, "total_steps": 78105, "loss": 0.2204, "lr": 3.795995070107403e-06, "epoch": 1.9694641828308046, "percentage": 39.39, "elapsed_time": "1:20:23", "remaining_time": "2:03:42", "throughput": 20069.46, "total_tokens": 96814464}
|
|
{"current_steps": 30770, "total_steps": 78105, "loss": 0.2773, "lr": 3.795517312090089e-06, "epoch": 1.9697842647717816, "percentage": 39.4, "elapsed_time": "1:20:24", "remaining_time": "2:03:41", "throughput": 20069.82, "total_tokens": 96829120}
|
|
{"current_steps": 30775, "total_steps": 78105, "loss": 0.3547, "lr": 3.795039489381388e-06, "epoch": 1.9701043467127586, "percentage": 39.4, "elapsed_time": "1:20:25", "remaining_time": "2:03:40", "throughput": 20070.27, "total_tokens": 96844672}
|
|
{"current_steps": 30780, "total_steps": 78105, "loss": 0.3898, "lr": 3.79456160200516e-06, "epoch": 1.9704244286537353, "percentage": 39.41, "elapsed_time": "1:20:25", "remaining_time": "2:03:40", "throughput": 20070.73, "total_tokens": 96860544}
|
|
{"current_steps": 30785, "total_steps": 78105, "loss": 0.4881, "lr": 3.7940836499852697e-06, "epoch": 1.9707445105947121, "percentage": 39.41, "elapsed_time": "1:20:26", "remaining_time": "2:03:39", "throughput": 20071.15, "total_tokens": 96875968}
|
|
{"current_steps": 30790, "total_steps": 78105, "loss": 0.4323, "lr": 3.793605633345582e-06, "epoch": 1.971064592535689, "percentage": 39.42, "elapsed_time": "1:20:27", "remaining_time": "2:03:38", "throughput": 20071.62, "total_tokens": 96891904}
|
|
{"current_steps": 30795, "total_steps": 78105, "loss": 0.2951, "lr": 3.793127552109966e-06, "epoch": 1.971384674476666, "percentage": 39.43, "elapsed_time": "1:20:27", "remaining_time": "2:03:37", "throughput": 20072.05, "total_tokens": 96907840}
|
|
{"current_steps": 30800, "total_steps": 78105, "loss": 0.5639, "lr": 3.7926494063022978e-06, "epoch": 1.9717047564176429, "percentage": 39.43, "elapsed_time": "1:20:28", "remaining_time": "2:03:36", "throughput": 20072.45, "total_tokens": 96923072}
|
|
{"current_steps": 30805, "total_steps": 78105, "loss": 0.441, "lr": 3.79217119594645e-06, "epoch": 1.9720248383586199, "percentage": 39.44, "elapsed_time": "1:20:29", "remaining_time": "2:03:35", "throughput": 20072.83, "total_tokens": 96937792}
|
|
{"current_steps": 30810, "total_steps": 78105, "loss": 0.2818, "lr": 3.7916929210663037e-06, "epoch": 1.9723449202995966, "percentage": 39.45, "elapsed_time": "1:20:29", "remaining_time": "2:03:34", "throughput": 20073.21, "total_tokens": 96952960}
|
|
{"current_steps": 30815, "total_steps": 78105, "loss": 0.3657, "lr": 3.7912145816857414e-06, "epoch": 1.9726650022405736, "percentage": 39.45, "elapsed_time": "1:20:30", "remaining_time": "2:03:33", "throughput": 20073.72, "total_tokens": 96969792}
|
|
{"current_steps": 30820, "total_steps": 78105, "loss": 0.269, "lr": 3.790736177828648e-06, "epoch": 1.9729850841815506, "percentage": 39.46, "elapsed_time": "1:20:31", "remaining_time": "2:03:32", "throughput": 20074.3, "total_tokens": 96987072}
|
|
{"current_steps": 30825, "total_steps": 78105, "loss": 0.2769, "lr": 3.7902577095189137e-06, "epoch": 1.9733051661225274, "percentage": 39.47, "elapsed_time": "1:20:32", "remaining_time": "2:03:31", "throughput": 20074.83, "total_tokens": 97003968}
|
|
{"current_steps": 30830, "total_steps": 78105, "loss": 0.213, "lr": 3.78977917678043e-06, "epoch": 1.9736252480635041, "percentage": 39.47, "elapsed_time": "1:20:32", "remaining_time": "2:03:30", "throughput": 20075.25, "total_tokens": 97019456}
|
|
{"current_steps": 30835, "total_steps": 78105, "loss": 0.2732, "lr": 3.7893005796370924e-06, "epoch": 1.9739453300044811, "percentage": 39.48, "elapsed_time": "1:20:33", "remaining_time": "2:03:29", "throughput": 20075.64, "total_tokens": 97034432}
|
|
{"current_steps": 30840, "total_steps": 78105, "loss": 0.2402, "lr": 3.7888219181127995e-06, "epoch": 1.9742654119454581, "percentage": 39.49, "elapsed_time": "1:20:34", "remaining_time": "2:03:28", "throughput": 20076.17, "total_tokens": 97051392}
|
|
{"current_steps": 30845, "total_steps": 78105, "loss": 0.3779, "lr": 3.7883431922314532e-06, "epoch": 1.974585493886435, "percentage": 39.49, "elapsed_time": "1:20:34", "remaining_time": "2:03:27", "throughput": 20076.64, "total_tokens": 97067648}
|
|
{"current_steps": 30850, "total_steps": 78105, "loss": 0.4301, "lr": 3.7878644020169585e-06, "epoch": 1.9749055758274117, "percentage": 39.5, "elapsed_time": "1:20:35", "remaining_time": "2:03:26", "throughput": 20077.03, "total_tokens": 97082624}
|
|
{"current_steps": 30855, "total_steps": 78105, "loss": 0.423, "lr": 3.787385547493224e-06, "epoch": 1.9752256577683887, "percentage": 39.5, "elapsed_time": "1:20:36", "remaining_time": "2:03:25", "throughput": 20077.45, "total_tokens": 97098240}
|
|
{"current_steps": 30860, "total_steps": 78105, "loss": 0.4389, "lr": 3.7869066286841612e-06, "epoch": 1.9755457397093656, "percentage": 39.51, "elapsed_time": "1:20:36", "remaining_time": "2:03:25", "throughput": 20078.05, "total_tokens": 97115584}
|
|
{"current_steps": 30865, "total_steps": 78105, "loss": 0.2542, "lr": 3.786427645613684e-06, "epoch": 1.9758658216503426, "percentage": 39.52, "elapsed_time": "1:20:37", "remaining_time": "2:03:24", "throughput": 20078.45, "total_tokens": 97130432}
|
|
{"current_steps": 30870, "total_steps": 78105, "loss": 0.3285, "lr": 3.785948598305711e-06, "epoch": 1.9761859035913194, "percentage": 39.52, "elapsed_time": "1:20:38", "remaining_time": "2:03:23", "throughput": 20078.87, "total_tokens": 97145728}
|
|
{"current_steps": 30875, "total_steps": 78105, "loss": 0.3806, "lr": 3.7854694867841637e-06, "epoch": 1.9765059855322962, "percentage": 39.53, "elapsed_time": "1:20:38", "remaining_time": "2:03:22", "throughput": 20079.28, "total_tokens": 97161152}
|
|
{"current_steps": 30880, "total_steps": 78105, "loss": 0.2684, "lr": 3.784990311072966e-06, "epoch": 1.9768260674732732, "percentage": 39.54, "elapsed_time": "1:20:39", "remaining_time": "2:03:21", "throughput": 20079.66, "total_tokens": 97176128}
|
|
{"current_steps": 30885, "total_steps": 78105, "loss": 0.3563, "lr": 3.784511071196045e-06, "epoch": 1.9771461494142502, "percentage": 39.54, "elapsed_time": "1:20:40", "remaining_time": "2:03:20", "throughput": 20080.01, "total_tokens": 97190912}
|
|
{"current_steps": 30890, "total_steps": 78105, "loss": 0.2885, "lr": 3.784031767177332e-06, "epoch": 1.977466231355227, "percentage": 39.55, "elapsed_time": "1:20:40", "remaining_time": "2:03:19", "throughput": 20080.57, "total_tokens": 97208128}
|
|
{"current_steps": 30895, "total_steps": 78105, "loss": 0.3819, "lr": 3.783552399040761e-06, "epoch": 1.9777863132962037, "percentage": 39.56, "elapsed_time": "1:20:41", "remaining_time": "2:03:18", "throughput": 20080.97, "total_tokens": 97223744}
|
|
{"current_steps": 30900, "total_steps": 78105, "loss": 0.3393, "lr": 3.7830729668102694e-06, "epoch": 1.9781063952371807, "percentage": 39.56, "elapsed_time": "1:20:42", "remaining_time": "2:03:17", "throughput": 20081.44, "total_tokens": 97239872}
|
|
{"current_steps": 30905, "total_steps": 78105, "loss": 0.5342, "lr": 3.782593470509796e-06, "epoch": 1.9784264771781577, "percentage": 39.57, "elapsed_time": "1:20:42", "remaining_time": "2:03:16", "throughput": 20081.91, "total_tokens": 97255936}
|
|
{"current_steps": 30910, "total_steps": 78105, "loss": 0.2545, "lr": 3.7821139101632864e-06, "epoch": 1.9787465591191347, "percentage": 39.57, "elapsed_time": "1:20:43", "remaining_time": "2:03:15", "throughput": 20082.33, "total_tokens": 97271424}
|
|
{"current_steps": 30915, "total_steps": 78105, "loss": 0.4111, "lr": 3.7816342857946864e-06, "epoch": 1.9790666410601114, "percentage": 39.58, "elapsed_time": "1:20:44", "remaining_time": "2:03:14", "throughput": 20082.69, "total_tokens": 97286144}
|
|
{"current_steps": 30920, "total_steps": 78105, "loss": 0.4032, "lr": 3.7811545974279454e-06, "epoch": 1.9793867230010882, "percentage": 39.59, "elapsed_time": "1:20:44", "remaining_time": "2:03:13", "throughput": 20082.97, "total_tokens": 97300160}
|
|
{"current_steps": 30925, "total_steps": 78105, "loss": 0.3373, "lr": 3.780674845087017e-06, "epoch": 1.9797068049420652, "percentage": 39.59, "elapsed_time": "1:20:45", "remaining_time": "2:03:12", "throughput": 20083.31, "total_tokens": 97314816}
|
|
{"current_steps": 30930, "total_steps": 78105, "loss": 0.3002, "lr": 3.780195028795858e-06, "epoch": 1.9800268868830422, "percentage": 39.6, "elapsed_time": "1:20:46", "remaining_time": "2:03:11", "throughput": 20083.71, "total_tokens": 97329984}
|
|
{"current_steps": 30935, "total_steps": 78105, "loss": 0.311, "lr": 3.7797151485784277e-06, "epoch": 1.980346968824019, "percentage": 39.61, "elapsed_time": "1:20:46", "remaining_time": "2:03:10", "throughput": 20084.15, "total_tokens": 97345792}
|
|
{"current_steps": 30940, "total_steps": 78105, "loss": 0.322, "lr": 3.779235204458689e-06, "epoch": 1.9806670507649957, "percentage": 39.61, "elapsed_time": "1:20:47", "remaining_time": "2:03:09", "throughput": 20084.57, "total_tokens": 97361536}
|
|
{"current_steps": 30945, "total_steps": 78105, "loss": 0.2427, "lr": 3.7787551964606066e-06, "epoch": 1.9809871327059727, "percentage": 39.62, "elapsed_time": "1:20:48", "remaining_time": "2:03:08", "throughput": 20085.02, "total_tokens": 97377472}
|
|
{"current_steps": 30950, "total_steps": 78105, "loss": 0.4856, "lr": 3.7782751246081513e-06, "epoch": 1.9813072146469497, "percentage": 39.63, "elapsed_time": "1:20:48", "remaining_time": "2:03:07", "throughput": 20085.64, "total_tokens": 97395200}
|
|
{"current_steps": 30955, "total_steps": 78105, "loss": 0.3891, "lr": 3.7777949889252942e-06, "epoch": 1.9816272965879265, "percentage": 39.63, "elapsed_time": "1:20:49", "remaining_time": "2:03:06", "throughput": 20086.04, "total_tokens": 97410560}
|
|
{"current_steps": 30960, "total_steps": 78105, "loss": 0.2833, "lr": 3.7773147894360106e-06, "epoch": 1.9819473785289032, "percentage": 39.64, "elapsed_time": "1:20:50", "remaining_time": "2:03:05", "throughput": 20086.35, "total_tokens": 97424960}
|
|
{"current_steps": 30965, "total_steps": 78105, "loss": 0.3072, "lr": 3.7768345261642804e-06, "epoch": 1.9822674604698802, "percentage": 39.65, "elapsed_time": "1:20:50", "remaining_time": "2:03:04", "throughput": 20086.78, "total_tokens": 97440768}
|
|
{"current_steps": 30970, "total_steps": 78105, "loss": 0.3888, "lr": 3.776354199134085e-06, "epoch": 1.9825875424108572, "percentage": 39.65, "elapsed_time": "1:20:51", "remaining_time": "2:03:04", "throughput": 20087.25, "total_tokens": 97456960}
|
|
{"current_steps": 30975, "total_steps": 78105, "loss": 0.2775, "lr": 3.7758738083694084e-06, "epoch": 1.9829076243518342, "percentage": 39.66, "elapsed_time": "1:20:52", "remaining_time": "2:03:03", "throughput": 20087.59, "total_tokens": 97471424}
|
|
{"current_steps": 30980, "total_steps": 78105, "loss": 0.3476, "lr": 3.77539335389424e-06, "epoch": 1.983227706292811, "percentage": 39.66, "elapsed_time": "1:20:52", "remaining_time": "2:03:02", "throughput": 20087.99, "total_tokens": 97486784}
|
|
{"current_steps": 30985, "total_steps": 78105, "loss": 0.2606, "lr": 3.7749128357325706e-06, "epoch": 1.9835477882337877, "percentage": 39.67, "elapsed_time": "1:20:53", "remaining_time": "2:03:01", "throughput": 20088.46, "total_tokens": 97503104}
|
|
{"current_steps": 30990, "total_steps": 78105, "loss": 0.4286, "lr": 3.7744322539083956e-06, "epoch": 1.9838678701747647, "percentage": 39.68, "elapsed_time": "1:20:54", "remaining_time": "2:03:00", "throughput": 20088.92, "total_tokens": 97519296}
|
|
{"current_steps": 30995, "total_steps": 78105, "loss": 0.3691, "lr": 3.7739516084457104e-06, "epoch": 1.9841879521157417, "percentage": 39.68, "elapsed_time": "1:20:55", "remaining_time": "2:02:59", "throughput": 20089.34, "total_tokens": 97534656}
|
|
{"current_steps": 31000, "total_steps": 78105, "loss": 0.3158, "lr": 3.7734708993685194e-06, "epoch": 1.9845080340567185, "percentage": 39.69, "elapsed_time": "1:20:56", "remaining_time": "2:02:59", "throughput": 20090.54, "total_tokens": 97564672}
|
|
{"current_steps": 31005, "total_steps": 78105, "loss": 0.3386, "lr": 3.7729901267008246e-06, "epoch": 1.9848281159976953, "percentage": 39.7, "elapsed_time": "1:20:56", "remaining_time": "2:02:58", "throughput": 20090.96, "total_tokens": 97579904}
|
|
{"current_steps": 31010, "total_steps": 78105, "loss": 0.2765, "lr": 3.7725092904666337e-06, "epoch": 1.9851481979386723, "percentage": 39.7, "elapsed_time": "1:20:57", "remaining_time": "2:02:57", "throughput": 20091.37, "total_tokens": 97595520}
|
|
{"current_steps": 31015, "total_steps": 78105, "loss": 0.2711, "lr": 3.772028390689957e-06, "epoch": 1.9854682798796492, "percentage": 39.71, "elapsed_time": "1:20:58", "remaining_time": "2:02:56", "throughput": 20091.82, "total_tokens": 97611392}
|
|
{"current_steps": 31020, "total_steps": 78105, "loss": 0.4288, "lr": 3.771547427394807e-06, "epoch": 1.9857883618206262, "percentage": 39.72, "elapsed_time": "1:20:58", "remaining_time": "2:02:55", "throughput": 20092.28, "total_tokens": 97627328}
|
|
{"current_steps": 31025, "total_steps": 78105, "loss": 0.3723, "lr": 3.771066400605203e-06, "epoch": 1.986108443761603, "percentage": 39.72, "elapsed_time": "1:20:59", "remaining_time": "2:02:54", "throughput": 20092.63, "total_tokens": 97641984}
|
|
{"current_steps": 31030, "total_steps": 78105, "loss": 0.3042, "lr": 3.770585310345164e-06, "epoch": 1.9864285257025798, "percentage": 39.73, "elapsed_time": "1:21:00", "remaining_time": "2:02:53", "throughput": 20093.07, "total_tokens": 97657856}
|
|
{"current_steps": 31035, "total_steps": 78105, "loss": 0.2281, "lr": 3.7701041566387123e-06, "epoch": 1.9867486076435568, "percentage": 39.73, "elapsed_time": "1:21:00", "remaining_time": "2:02:52", "throughput": 20093.52, "total_tokens": 97673792}
|
|
{"current_steps": 31040, "total_steps": 78105, "loss": 0.3068, "lr": 3.769622939509875e-06, "epoch": 1.9870686895845338, "percentage": 39.74, "elapsed_time": "1:21:02", "remaining_time": "2:02:52", "throughput": 20091.69, "total_tokens": 97688768}
|
|
{"current_steps": 31045, "total_steps": 78105, "loss": 0.2365, "lr": 3.769141658982681e-06, "epoch": 1.9873887715255105, "percentage": 39.75, "elapsed_time": "1:21:02", "remaining_time": "2:02:51", "throughput": 20092.14, "total_tokens": 97704768}
|
|
{"current_steps": 31050, "total_steps": 78105, "loss": 0.3298, "lr": 3.768660315081163e-06, "epoch": 1.9877088534664873, "percentage": 39.75, "elapsed_time": "1:21:03", "remaining_time": "2:02:50", "throughput": 20092.49, "total_tokens": 97719360}
|
|
{"current_steps": 31055, "total_steps": 78105, "loss": 0.3928, "lr": 3.7681789078293575e-06, "epoch": 1.9880289354074643, "percentage": 39.76, "elapsed_time": "1:21:04", "remaining_time": "2:02:49", "throughput": 20092.88, "total_tokens": 97734528}
|
|
{"current_steps": 31060, "total_steps": 78105, "loss": 0.3649, "lr": 3.767697437251303e-06, "epoch": 1.9883490173484413, "percentage": 39.77, "elapsed_time": "1:21:04", "remaining_time": "2:02:48", "throughput": 20093.29, "total_tokens": 97749760}
|
|
{"current_steps": 31065, "total_steps": 78105, "loss": 0.4059, "lr": 3.7672159033710422e-06, "epoch": 1.988669099289418, "percentage": 39.77, "elapsed_time": "1:21:05", "remaining_time": "2:02:47", "throughput": 20093.79, "total_tokens": 97766144}
|
|
{"current_steps": 31070, "total_steps": 78105, "loss": 0.2611, "lr": 3.7667343062126193e-06, "epoch": 1.988989181230395, "percentage": 39.78, "elapsed_time": "1:21:06", "remaining_time": "2:02:46", "throughput": 20094.34, "total_tokens": 97783104}
|
|
{"current_steps": 31075, "total_steps": 78105, "loss": 0.3792, "lr": 3.766252645800083e-06, "epoch": 1.9893092631713718, "percentage": 39.79, "elapsed_time": "1:21:06", "remaining_time": "2:02:45", "throughput": 20094.75, "total_tokens": 97798528}
|
|
{"current_steps": 31080, "total_steps": 78105, "loss": 0.2394, "lr": 3.7657709221574856e-06, "epoch": 1.9896293451123488, "percentage": 39.79, "elapsed_time": "1:21:07", "remaining_time": "2:02:44", "throughput": 20095.13, "total_tokens": 97813376}
|
|
{"current_steps": 31085, "total_steps": 78105, "loss": 0.2987, "lr": 3.7652891353088814e-06, "epoch": 1.9899494270533258, "percentage": 39.8, "elapsed_time": "1:21:08", "remaining_time": "2:02:43", "throughput": 20095.49, "total_tokens": 97828480}
|
|
{"current_steps": 31090, "total_steps": 78105, "loss": 0.3523, "lr": 3.764807285278329e-06, "epoch": 1.9902695089943025, "percentage": 39.81, "elapsed_time": "1:21:08", "remaining_time": "2:02:42", "throughput": 20095.98, "total_tokens": 97844864}
|
|
{"current_steps": 31095, "total_steps": 78105, "loss": 0.3876, "lr": 3.7643253720898882e-06, "epoch": 1.9905895909352793, "percentage": 39.81, "elapsed_time": "1:21:09", "remaining_time": "2:02:41", "throughput": 20096.37, "total_tokens": 97859648}
|
|
{"current_steps": 31100, "total_steps": 78105, "loss": 0.4626, "lr": 3.763843395767624e-06, "epoch": 1.9909096728762563, "percentage": 39.82, "elapsed_time": "1:21:10", "remaining_time": "2:02:40", "throughput": 20096.79, "total_tokens": 97875136}
|
|
{"current_steps": 31105, "total_steps": 78105, "loss": 0.2729, "lr": 3.763361356335604e-06, "epoch": 1.9912297548172333, "percentage": 39.82, "elapsed_time": "1:21:10", "remaining_time": "2:02:39", "throughput": 20097.37, "total_tokens": 97892352}
|
|
{"current_steps": 31110, "total_steps": 78105, "loss": 0.286, "lr": 3.762879253817898e-06, "epoch": 1.99154983675821, "percentage": 39.83, "elapsed_time": "1:21:11", "remaining_time": "2:02:39", "throughput": 20097.8, "total_tokens": 97907712}
|
|
{"current_steps": 31115, "total_steps": 78105, "loss": 0.3811, "lr": 3.7623970882385806e-06, "epoch": 1.9918699186991868, "percentage": 39.84, "elapsed_time": "1:21:12", "remaining_time": "2:02:38", "throughput": 20098.24, "total_tokens": 97923456}
|
|
{"current_steps": 31120, "total_steps": 78105, "loss": 0.4266, "lr": 3.7619148596217283e-06, "epoch": 1.9921900006401638, "percentage": 39.84, "elapsed_time": "1:21:12", "remaining_time": "2:02:37", "throughput": 20098.68, "total_tokens": 97939136}
|
|
{"current_steps": 31125, "total_steps": 78105, "loss": 0.227, "lr": 3.761432567991421e-06, "epoch": 1.9925100825811408, "percentage": 39.85, "elapsed_time": "1:21:13", "remaining_time": "2:02:36", "throughput": 20099.07, "total_tokens": 97954368}
|
|
{"current_steps": 31130, "total_steps": 78105, "loss": 0.2321, "lr": 3.760950213371742e-06, "epoch": 1.9928301645221178, "percentage": 39.86, "elapsed_time": "1:21:14", "remaining_time": "2:02:35", "throughput": 20099.52, "total_tokens": 97970240}
|
|
{"current_steps": 31135, "total_steps": 78105, "loss": 0.3824, "lr": 3.7604677957867763e-06, "epoch": 1.9931502464630946, "percentage": 39.86, "elapsed_time": "1:21:14", "remaining_time": "2:02:34", "throughput": 20099.95, "total_tokens": 97986304}
|
|
{"current_steps": 31140, "total_steps": 78105, "loss": 0.3487, "lr": 3.7599853152606157e-06, "epoch": 1.9934703284040713, "percentage": 39.87, "elapsed_time": "1:21:15", "remaining_time": "2:02:33", "throughput": 20100.41, "total_tokens": 98002880}
|
|
{"current_steps": 31145, "total_steps": 78105, "loss": 0.245, "lr": 3.759502771817351e-06, "epoch": 1.9937904103450483, "percentage": 39.88, "elapsed_time": "1:21:16", "remaining_time": "2:02:32", "throughput": 20100.79, "total_tokens": 98017920}
|
|
{"current_steps": 31150, "total_steps": 78105, "loss": 0.3172, "lr": 3.759020165481079e-06, "epoch": 1.9941104922860253, "percentage": 39.88, "elapsed_time": "1:21:17", "remaining_time": "2:02:31", "throughput": 20101.24, "total_tokens": 98033792}
|
|
{"current_steps": 31155, "total_steps": 78105, "loss": 0.5168, "lr": 3.758537496275897e-06, "epoch": 1.994430574227002, "percentage": 39.89, "elapsed_time": "1:21:17", "remaining_time": "2:02:30", "throughput": 20101.71, "total_tokens": 98049920}
|
|
{"current_steps": 31160, "total_steps": 78105, "loss": 0.4145, "lr": 3.758054764225908e-06, "epoch": 1.9947506561679789, "percentage": 39.9, "elapsed_time": "1:21:18", "remaining_time": "2:02:29", "throughput": 20102.15, "total_tokens": 98065536}
|
|
{"current_steps": 31165, "total_steps": 78105, "loss": 0.3843, "lr": 3.7575719693552165e-06, "epoch": 1.9950707381089559, "percentage": 39.9, "elapsed_time": "1:21:19", "remaining_time": "2:02:28", "throughput": 20102.57, "total_tokens": 98080832}
|
|
{"current_steps": 31170, "total_steps": 78105, "loss": 0.3953, "lr": 3.7570891116879326e-06, "epoch": 1.9953908200499328, "percentage": 39.91, "elapsed_time": "1:21:19", "remaining_time": "2:02:27", "throughput": 20102.97, "total_tokens": 98096192}
|
|
{"current_steps": 31175, "total_steps": 78105, "loss": 0.2898, "lr": 3.7566061912481657e-06, "epoch": 1.9957109019909098, "percentage": 39.91, "elapsed_time": "1:21:20", "remaining_time": "2:02:26", "throughput": 20103.35, "total_tokens": 98111232}
|
|
{"current_steps": 31180, "total_steps": 78105, "loss": 0.2436, "lr": 3.756123208060031e-06, "epoch": 1.9960309839318866, "percentage": 39.92, "elapsed_time": "1:21:20", "remaining_time": "2:02:25", "throughput": 20103.73, "total_tokens": 98126208}
|
|
{"current_steps": 31185, "total_steps": 78105, "loss": 0.2302, "lr": 3.7556401621476466e-06, "epoch": 1.9963510658728634, "percentage": 39.93, "elapsed_time": "1:21:21", "remaining_time": "2:02:24", "throughput": 20104.17, "total_tokens": 98142272}
|
|
{"current_steps": 31190, "total_steps": 78105, "loss": 0.2753, "lr": 3.7551570535351334e-06, "epoch": 1.9966711478138404, "percentage": 39.93, "elapsed_time": "1:21:22", "remaining_time": "2:02:23", "throughput": 20104.56, "total_tokens": 98157120}
|
|
{"current_steps": 31195, "total_steps": 78105, "loss": 0.2842, "lr": 3.7546738822466134e-06, "epoch": 1.9969912297548174, "percentage": 39.94, "elapsed_time": "1:21:23", "remaining_time": "2:02:22", "throughput": 20105.02, "total_tokens": 98173248}
|
|
{"current_steps": 31200, "total_steps": 78105, "loss": 0.2516, "lr": 3.754190648306216e-06, "epoch": 1.9973113116957941, "percentage": 39.95, "elapsed_time": "1:21:23", "remaining_time": "2:02:22", "throughput": 20105.53, "total_tokens": 98190080}
|
|
{"current_steps": 31205, "total_steps": 78105, "loss": 0.3013, "lr": 3.75370735173807e-06, "epoch": 1.997631393636771, "percentage": 39.95, "elapsed_time": "1:21:24", "remaining_time": "2:02:21", "throughput": 20105.99, "total_tokens": 98206400}
|
|
{"current_steps": 31210, "total_steps": 78105, "loss": 0.3755, "lr": 3.7532239925663094e-06, "epoch": 1.9979514755777479, "percentage": 39.96, "elapsed_time": "1:21:25", "remaining_time": "2:02:20", "throughput": 20106.39, "total_tokens": 98221312}
|
|
{"current_steps": 31215, "total_steps": 78105, "loss": 0.3245, "lr": 3.7527405708150707e-06, "epoch": 1.9982715575187249, "percentage": 39.97, "elapsed_time": "1:21:25", "remaining_time": "2:02:19", "throughput": 20106.79, "total_tokens": 98236672}
|
|
{"current_steps": 31220, "total_steps": 78105, "loss": 0.3046, "lr": 3.752257086508493e-06, "epoch": 1.9985916394597016, "percentage": 39.97, "elapsed_time": "1:21:26", "remaining_time": "2:02:18", "throughput": 20107.22, "total_tokens": 98252224}
|
|
{"current_steps": 31225, "total_steps": 78105, "loss": 0.3517, "lr": 3.7517735396707184e-06, "epoch": 1.9989117214006784, "percentage": 39.98, "elapsed_time": "1:21:27", "remaining_time": "2:02:17", "throughput": 20107.62, "total_tokens": 98267776}
|
|
{"current_steps": 31230, "total_steps": 78105, "loss": 0.2957, "lr": 3.7512899303258943e-06, "epoch": 1.9992318033416554, "percentage": 39.98, "elapsed_time": "1:21:27", "remaining_time": "2:02:16", "throughput": 20108.16, "total_tokens": 98284672}
|
|
{"current_steps": 31235, "total_steps": 78105, "loss": 0.3742, "lr": 3.7508062584981686e-06, "epoch": 1.9995518852826324, "percentage": 39.99, "elapsed_time": "1:21:28", "remaining_time": "2:02:15", "throughput": 20108.69, "total_tokens": 98301696}
|
|
{"current_steps": 31240, "total_steps": 78105, "loss": 0.3667, "lr": 3.7503225242116937e-06, "epoch": 1.9998719672236094, "percentage": 40.0, "elapsed_time": "1:21:29", "remaining_time": "2:02:14", "throughput": 20109.1, "total_tokens": 98317056}
|
|
{"current_steps": 31245, "total_steps": 78105, "loss": 0.2658, "lr": 3.7498387274906253e-06, "epoch": 2.000192049164586, "percentage": 40.0, "elapsed_time": "1:21:29", "remaining_time": "2:02:13", "throughput": 20109.11, "total_tokens": 98332416}
|
|
{"current_steps": 31248, "total_steps": 78105, "eval_loss": 0.4481422007083893, "epoch": 2.0003840983291723, "percentage": 40.01, "elapsed_time": "1:22:21", "remaining_time": "2:03:29", "throughput": 19901.81, "total_tokens": 98341056}
|
|
{"current_steps": 31250, "total_steps": 78105, "loss": 0.1909, "lr": 3.7493548683591198e-06, "epoch": 2.000512131105563, "percentage": 40.01, "elapsed_time": "1:22:53", "remaining_time": "2:04:17", "throughput": 19772.43, "total_tokens": 98346752}
|
|
{"current_steps": 31255, "total_steps": 78105, "loss": 0.2203, "lr": 3.7488709468413405e-06, "epoch": 2.00083221304654, "percentage": 40.02, "elapsed_time": "1:22:54", "remaining_time": "2:04:16", "throughput": 19772.94, "total_tokens": 98362944}
|
|
{"current_steps": 31260, "total_steps": 78105, "loss": 0.2375, "lr": 3.748386962961451e-06, "epoch": 2.001152294987517, "percentage": 40.02, "elapsed_time": "1:22:55", "remaining_time": "2:04:15", "throughput": 19773.38, "total_tokens": 98378112}
|
|
{"current_steps": 31265, "total_steps": 78105, "loss": 0.1562, "lr": 3.7479029167436193e-06, "epoch": 2.001472376928494, "percentage": 40.03, "elapsed_time": "1:22:55", "remaining_time": "2:04:14", "throughput": 19773.82, "total_tokens": 98393536}
|
|
{"current_steps": 31270, "total_steps": 78105, "loss": 0.2236, "lr": 3.747418808212016e-06, "epoch": 2.0017924588694704, "percentage": 40.04, "elapsed_time": "1:22:56", "remaining_time": "2:04:13", "throughput": 19774.26, "total_tokens": 98409088}
|
|
{"current_steps": 31275, "total_steps": 78105, "loss": 0.1775, "lr": 3.7469346373908145e-06, "epoch": 2.0021125408104474, "percentage": 40.04, "elapsed_time": "1:22:57", "remaining_time": "2:04:12", "throughput": 19774.73, "total_tokens": 98424768}
|
|
{"current_steps": 31280, "total_steps": 78105, "loss": 0.1839, "lr": 3.7464504043041925e-06, "epoch": 2.0024326227514244, "percentage": 40.05, "elapsed_time": "1:22:57", "remaining_time": "2:04:11", "throughput": 19775.15, "total_tokens": 98439872}
|
|
{"current_steps": 31285, "total_steps": 78105, "loss": 0.167, "lr": 3.74596610897633e-06, "epoch": 2.0027527046924014, "percentage": 40.06, "elapsed_time": "1:22:58", "remaining_time": "2:04:10", "throughput": 19775.6, "total_tokens": 98455296}
|
|
{"current_steps": 31290, "total_steps": 78105, "loss": 0.211, "lr": 3.74548175143141e-06, "epoch": 2.003072786633378, "percentage": 40.06, "elapsed_time": "1:22:59", "remaining_time": "2:04:09", "throughput": 19776.09, "total_tokens": 98471360}
|
|
{"current_steps": 31295, "total_steps": 78105, "loss": 0.1803, "lr": 3.7449973316936195e-06, "epoch": 2.003392868574355, "percentage": 40.07, "elapsed_time": "1:23:00", "remaining_time": "2:04:08", "throughput": 19776.58, "total_tokens": 98487360}
|
|
{"current_steps": 31300, "total_steps": 78105, "loss": 0.2111, "lr": 3.7445128497871463e-06, "epoch": 2.003712950515332, "percentage": 40.07, "elapsed_time": "1:23:00", "remaining_time": "2:04:07", "throughput": 19777.02, "total_tokens": 98503104}
|
|
{"current_steps": 31305, "total_steps": 78105, "loss": 0.1779, "lr": 3.7440283057361844e-06, "epoch": 2.004033032456309, "percentage": 40.08, "elapsed_time": "1:23:01", "remaining_time": "2:04:07", "throughput": 19777.54, "total_tokens": 98519744}
|
|
{"current_steps": 31310, "total_steps": 78105, "loss": 0.176, "lr": 3.7435436995649282e-06, "epoch": 2.004353114397286, "percentage": 40.09, "elapsed_time": "1:23:02", "remaining_time": "2:04:06", "throughput": 19778.16, "total_tokens": 98537408}
|
|
{"current_steps": 31315, "total_steps": 78105, "loss": 0.1705, "lr": 3.7430590312975774e-06, "epoch": 2.0046731963382625, "percentage": 40.09, "elapsed_time": "1:23:02", "remaining_time": "2:04:05", "throughput": 19778.68, "total_tokens": 98553728}
|
|
{"current_steps": 31320, "total_steps": 78105, "loss": 0.2459, "lr": 3.742574300958334e-06, "epoch": 2.0049932782792395, "percentage": 40.1, "elapsed_time": "1:23:03", "remaining_time": "2:04:04", "throughput": 19779.23, "total_tokens": 98570432}
|
|
{"current_steps": 31325, "total_steps": 78105, "loss": 0.2012, "lr": 3.7420895085714014e-06, "epoch": 2.0053133602202164, "percentage": 40.11, "elapsed_time": "1:23:04", "remaining_time": "2:04:03", "throughput": 19779.77, "total_tokens": 98586816}
|
|
{"current_steps": 31330, "total_steps": 78105, "loss": 0.2318, "lr": 3.7416046541609892e-06, "epoch": 2.0056334421611934, "percentage": 40.11, "elapsed_time": "1:23:04", "remaining_time": "2:04:02", "throughput": 19780.23, "total_tokens": 98602432}
|
|
{"current_steps": 31335, "total_steps": 78105, "loss": 0.1981, "lr": 3.741119737751307e-06, "epoch": 2.00595352410217, "percentage": 40.12, "elapsed_time": "1:23:05", "remaining_time": "2:04:01", "throughput": 19780.66, "total_tokens": 98617664}
|
|
{"current_steps": 31340, "total_steps": 78105, "loss": 0.2127, "lr": 3.74063475936657e-06, "epoch": 2.006273606043147, "percentage": 40.13, "elapsed_time": "1:23:06", "remaining_time": "2:04:00", "throughput": 19781.1, "total_tokens": 98632896}
|
|
{"current_steps": 31345, "total_steps": 78105, "loss": 0.1451, "lr": 3.7401497190309955e-06, "epoch": 2.006593687984124, "percentage": 40.13, "elapsed_time": "1:23:06", "remaining_time": "2:03:59", "throughput": 19781.56, "total_tokens": 98648768}
|
|
{"current_steps": 31350, "total_steps": 78105, "loss": 0.144, "lr": 3.7396646167688043e-06, "epoch": 2.006913769925101, "percentage": 40.14, "elapsed_time": "1:23:07", "remaining_time": "2:03:58", "throughput": 19781.98, "total_tokens": 98664000}
|
|
{"current_steps": 31355, "total_steps": 78105, "loss": 0.3766, "lr": 3.7391794526042192e-06, "epoch": 2.0072338518660775, "percentage": 40.14, "elapsed_time": "1:23:08", "remaining_time": "2:03:57", "throughput": 19782.38, "total_tokens": 98678912}
|
|
{"current_steps": 31360, "total_steps": 78105, "loss": 0.2094, "lr": 3.7386942265614667e-06, "epoch": 2.0075539338070545, "percentage": 40.15, "elapsed_time": "1:23:08", "remaining_time": "2:03:56", "throughput": 19782.83, "total_tokens": 98694144}
|
|
{"current_steps": 31365, "total_steps": 78105, "loss": 0.2508, "lr": 3.738208938664776e-06, "epoch": 2.0078740157480315, "percentage": 40.16, "elapsed_time": "1:23:09", "remaining_time": "2:03:55", "throughput": 19783.25, "total_tokens": 98709376}
|
|
{"current_steps": 31370, "total_steps": 78105, "loss": 0.1977, "lr": 3.737723588938381e-06, "epoch": 2.0081940976890085, "percentage": 40.16, "elapsed_time": "1:23:10", "remaining_time": "2:03:54", "throughput": 19783.7, "total_tokens": 98724864}
|
|
{"current_steps": 31375, "total_steps": 78105, "loss": 0.1377, "lr": 3.737238177406518e-06, "epoch": 2.0085141796299855, "percentage": 40.17, "elapsed_time": "1:23:10", "remaining_time": "2:03:53", "throughput": 19784.13, "total_tokens": 98740160}
|
|
{"current_steps": 31380, "total_steps": 78105, "loss": 0.1949, "lr": 3.736752704093424e-06, "epoch": 2.008834261570962, "percentage": 40.18, "elapsed_time": "1:23:11", "remaining_time": "2:03:52", "throughput": 19784.54, "total_tokens": 98755456}
|
|
{"current_steps": 31385, "total_steps": 78105, "loss": 0.2044, "lr": 3.736267169023342e-06, "epoch": 2.009154343511939, "percentage": 40.18, "elapsed_time": "1:23:12", "remaining_time": "2:03:51", "throughput": 19785.19, "total_tokens": 98773504}
|
|
{"current_steps": 31390, "total_steps": 78105, "loss": 0.1996, "lr": 3.735781572220517e-06, "epoch": 2.009474425452916, "percentage": 40.19, "elapsed_time": "1:23:12", "remaining_time": "2:03:50", "throughput": 19785.65, "total_tokens": 98789120}
|
|
{"current_steps": 31395, "total_steps": 78105, "loss": 0.2712, "lr": 3.735295913709197e-06, "epoch": 2.009794507393893, "percentage": 40.2, "elapsed_time": "1:23:13", "remaining_time": "2:03:49", "throughput": 19786.19, "total_tokens": 98805440}
|
|
{"current_steps": 31400, "total_steps": 78105, "loss": 0.1854, "lr": 3.734810193513634e-06, "epoch": 2.0101145893348695, "percentage": 40.2, "elapsed_time": "1:23:14", "remaining_time": "2:03:48", "throughput": 19786.68, "total_tokens": 98821824}
|
|
{"current_steps": 31405, "total_steps": 78105, "loss": 0.2738, "lr": 3.7343244116580816e-06, "epoch": 2.0104346712758465, "percentage": 40.21, "elapsed_time": "1:23:15", "remaining_time": "2:03:47", "throughput": 19787.12, "total_tokens": 98837184}
|
|
{"current_steps": 31410, "total_steps": 78105, "loss": 0.2292, "lr": 3.7338385681667976e-06, "epoch": 2.0107547532168235, "percentage": 40.22, "elapsed_time": "1:23:15", "remaining_time": "2:03:46", "throughput": 19787.57, "total_tokens": 98852800}
|
|
{"current_steps": 31415, "total_steps": 78105, "loss": 0.2003, "lr": 3.7333526630640425e-06, "epoch": 2.0110748351578005, "percentage": 40.22, "elapsed_time": "1:23:16", "remaining_time": "2:03:45", "throughput": 19788.01, "total_tokens": 98868480}
|
|
{"current_steps": 31420, "total_steps": 78105, "loss": 0.1576, "lr": 3.732866696374079e-06, "epoch": 2.0113949170987775, "percentage": 40.23, "elapsed_time": "1:23:17", "remaining_time": "2:03:44", "throughput": 19788.38, "total_tokens": 98883072}
|
|
{"current_steps": 31425, "total_steps": 78105, "loss": 0.1872, "lr": 3.732380668121175e-06, "epoch": 2.011714999039754, "percentage": 40.23, "elapsed_time": "1:23:17", "remaining_time": "2:03:43", "throughput": 19788.8, "total_tokens": 98898560}
|
|
{"current_steps": 31430, "total_steps": 78105, "loss": 0.2457, "lr": 3.7318945783296002e-06, "epoch": 2.012035080980731, "percentage": 40.24, "elapsed_time": "1:23:18", "remaining_time": "2:03:42", "throughput": 19789.22, "total_tokens": 98914112}
|
|
{"current_steps": 31435, "total_steps": 78105, "loss": 0.1851, "lr": 3.731408427023626e-06, "epoch": 2.012355162921708, "percentage": 40.25, "elapsed_time": "1:23:19", "remaining_time": "2:03:41", "throughput": 19789.75, "total_tokens": 98930752}
|
|
{"current_steps": 31440, "total_steps": 78105, "loss": 0.1586, "lr": 3.730922214227529e-06, "epoch": 2.012675244862685, "percentage": 40.25, "elapsed_time": "1:23:19", "remaining_time": "2:03:40", "throughput": 19790.18, "total_tokens": 98945920}
|
|
{"current_steps": 31445, "total_steps": 78105, "loss": 0.1758, "lr": 3.730435939965589e-06, "epoch": 2.0129953268036616, "percentage": 40.26, "elapsed_time": "1:23:20", "remaining_time": "2:03:39", "throughput": 19790.71, "total_tokens": 98962560}
|
|
{"current_steps": 31450, "total_steps": 78105, "loss": 0.1546, "lr": 3.729949604262087e-06, "epoch": 2.0133154087446385, "percentage": 40.27, "elapsed_time": "1:23:21", "remaining_time": "2:03:39", "throughput": 19791.2, "total_tokens": 98978560}
|
|
{"current_steps": 31455, "total_steps": 78105, "loss": 0.1061, "lr": 3.7294632071413076e-06, "epoch": 2.0136354906856155, "percentage": 40.27, "elapsed_time": "1:23:21", "remaining_time": "2:03:38", "throughput": 19791.57, "total_tokens": 98993408}
|
|
{"current_steps": 31460, "total_steps": 78105, "loss": 0.123, "lr": 3.7289767486275406e-06, "epoch": 2.0139555726265925, "percentage": 40.28, "elapsed_time": "1:23:22", "remaining_time": "2:03:37", "throughput": 19792.12, "total_tokens": 99010112}
|
|
{"current_steps": 31465, "total_steps": 78105, "loss": 0.2356, "lr": 3.7284902287450765e-06, "epoch": 2.0142756545675695, "percentage": 40.29, "elapsed_time": "1:23:23", "remaining_time": "2:03:36", "throughput": 19792.65, "total_tokens": 99026880}
|
|
{"current_steps": 31470, "total_steps": 78105, "loss": 0.1587, "lr": 3.7280036475182083e-06, "epoch": 2.014595736508546, "percentage": 40.29, "elapsed_time": "1:23:23", "remaining_time": "2:03:35", "throughput": 19793.17, "total_tokens": 99043328}
|
|
{"current_steps": 31475, "total_steps": 78105, "loss": 0.2923, "lr": 3.7275170049712352e-06, "epoch": 2.014915818449523, "percentage": 40.3, "elapsed_time": "1:23:24", "remaining_time": "2:03:34", "throughput": 19793.75, "total_tokens": 99060544}
|
|
{"current_steps": 31480, "total_steps": 78105, "loss": 0.2537, "lr": 3.7270303011284557e-06, "epoch": 2.0152359003905, "percentage": 40.3, "elapsed_time": "1:23:25", "remaining_time": "2:03:33", "throughput": 19794.14, "total_tokens": 99075136}
|
|
{"current_steps": 31485, "total_steps": 78105, "loss": 0.2203, "lr": 3.7265435360141757e-06, "epoch": 2.015555982331477, "percentage": 40.31, "elapsed_time": "1:23:25", "remaining_time": "2:03:32", "throughput": 19794.61, "total_tokens": 99091200}
|
|
{"current_steps": 31490, "total_steps": 78105, "loss": 0.0922, "lr": 3.7260567096527e-06, "epoch": 2.0158760642724536, "percentage": 40.32, "elapsed_time": "1:23:26", "remaining_time": "2:03:31", "throughput": 19795.05, "total_tokens": 99106880}
|
|
{"current_steps": 31495, "total_steps": 78105, "loss": 0.2122, "lr": 3.7255698220683386e-06, "epoch": 2.0161961462134306, "percentage": 40.32, "elapsed_time": "1:23:27", "remaining_time": "2:03:30", "throughput": 19795.51, "total_tokens": 99122624}
|
|
{"current_steps": 31500, "total_steps": 78105, "loss": 0.2027, "lr": 3.7250828732854037e-06, "epoch": 2.0165162281544076, "percentage": 40.33, "elapsed_time": "1:23:27", "remaining_time": "2:03:29", "throughput": 19795.87, "total_tokens": 99137088}
|
|
{"current_steps": 31505, "total_steps": 78105, "loss": 0.2087, "lr": 3.7245958633282107e-06, "epoch": 2.0168363100953846, "percentage": 40.34, "elapsed_time": "1:23:28", "remaining_time": "2:03:28", "throughput": 19796.28, "total_tokens": 99152448}
|
|
{"current_steps": 31510, "total_steps": 78105, "loss": 0.2042, "lr": 3.7241087922210796e-06, "epoch": 2.017156392036361, "percentage": 40.34, "elapsed_time": "1:23:29", "remaining_time": "2:03:27", "throughput": 19796.71, "total_tokens": 99168128}
|
|
{"current_steps": 31515, "total_steps": 78105, "loss": 0.218, "lr": 3.7236216599883317e-06, "epoch": 2.017476473977338, "percentage": 40.35, "elapsed_time": "1:23:30", "remaining_time": "2:03:26", "throughput": 19797.13, "total_tokens": 99183808}
|
|
{"current_steps": 31520, "total_steps": 78105, "loss": 0.1838, "lr": 3.7231344666542917e-06, "epoch": 2.017796555918315, "percentage": 40.36, "elapsed_time": "1:23:30", "remaining_time": "2:03:25", "throughput": 19797.49, "total_tokens": 99198528}
|
|
{"current_steps": 31525, "total_steps": 78105, "loss": 0.2434, "lr": 3.7226472122432877e-06, "epoch": 2.018116637859292, "percentage": 40.36, "elapsed_time": "1:23:31", "remaining_time": "2:03:24", "throughput": 19797.81, "total_tokens": 99212864}
|
|
{"current_steps": 31530, "total_steps": 78105, "loss": 0.209, "lr": 3.72215989677965e-06, "epoch": 2.018436719800269, "percentage": 40.37, "elapsed_time": "1:23:31", "remaining_time": "2:03:23", "throughput": 19798.21, "total_tokens": 99228224}
|
|
{"current_steps": 31535, "total_steps": 78105, "loss": 0.193, "lr": 3.721672520287713e-06, "epoch": 2.0187568017412456, "percentage": 40.38, "elapsed_time": "1:23:32", "remaining_time": "2:03:22", "throughput": 19798.69, "total_tokens": 99244736}
|
|
{"current_steps": 31540, "total_steps": 78105, "loss": 0.264, "lr": 3.721185082791814e-06, "epoch": 2.0190768836822226, "percentage": 40.38, "elapsed_time": "1:23:33", "remaining_time": "2:03:21", "throughput": 19799.13, "total_tokens": 99260224}
|
|
{"current_steps": 31545, "total_steps": 78105, "loss": 0.1281, "lr": 3.720697584316293e-06, "epoch": 2.0193969656231996, "percentage": 40.39, "elapsed_time": "1:23:34", "remaining_time": "2:03:20", "throughput": 19799.56, "total_tokens": 99275968}
|
|
{"current_steps": 31550, "total_steps": 78105, "loss": 0.2161, "lr": 3.7202100248854932e-06, "epoch": 2.0197170475641766, "percentage": 40.39, "elapsed_time": "1:23:34", "remaining_time": "2:03:19", "throughput": 19800.03, "total_tokens": 99292032}
|
|
{"current_steps": 31555, "total_steps": 78105, "loss": 0.1193, "lr": 3.719722404523761e-06, "epoch": 2.020037129505153, "percentage": 40.4, "elapsed_time": "1:23:35", "remaining_time": "2:03:18", "throughput": 19800.51, "total_tokens": 99308288}
|
|
{"current_steps": 31560, "total_steps": 78105, "loss": 0.1895, "lr": 3.719234723255444e-06, "epoch": 2.02035721144613, "percentage": 40.41, "elapsed_time": "1:23:36", "remaining_time": "2:03:17", "throughput": 19800.99, "total_tokens": 99324480}
|
|
{"current_steps": 31565, "total_steps": 78105, "loss": 0.1933, "lr": 3.718746981104896e-06, "epoch": 2.020677293387107, "percentage": 40.41, "elapsed_time": "1:23:36", "remaining_time": "2:03:16", "throughput": 19801.56, "total_tokens": 99341696}
|
|
{"current_steps": 31570, "total_steps": 78105, "loss": 0.1834, "lr": 3.7182591780964715e-06, "epoch": 2.020997375328084, "percentage": 40.42, "elapsed_time": "1:23:37", "remaining_time": "2:03:15", "throughput": 19802.0, "total_tokens": 99356992}
|
|
{"current_steps": 31575, "total_steps": 78105, "loss": 0.1159, "lr": 3.7177713142545306e-06, "epoch": 2.021317457269061, "percentage": 40.43, "elapsed_time": "1:23:38", "remaining_time": "2:03:15", "throughput": 19802.51, "total_tokens": 99373248}
|
|
{"current_steps": 31580, "total_steps": 78105, "loss": 0.1897, "lr": 3.7172833896034332e-06, "epoch": 2.0216375392100376, "percentage": 40.43, "elapsed_time": "1:23:38", "remaining_time": "2:03:14", "throughput": 19802.9, "total_tokens": 99388160}
|
|
{"current_steps": 31585, "total_steps": 78105, "loss": 0.1918, "lr": 3.7167954041675435e-06, "epoch": 2.0219576211510146, "percentage": 40.44, "elapsed_time": "1:23:39", "remaining_time": "2:03:13", "throughput": 19803.43, "total_tokens": 99404800}
|
|
{"current_steps": 31590, "total_steps": 78105, "loss": 0.2138, "lr": 3.7163073579712294e-06, "epoch": 2.0222777030919916, "percentage": 40.45, "elapsed_time": "1:23:40", "remaining_time": "2:03:12", "throughput": 19803.9, "total_tokens": 99420736}
|
|
{"current_steps": 31595, "total_steps": 78105, "loss": 0.247, "lr": 3.715819251038861e-06, "epoch": 2.0225977850329686, "percentage": 40.45, "elapsed_time": "1:23:40", "remaining_time": "2:03:11", "throughput": 19804.33, "total_tokens": 99436288}
|
|
{"current_steps": 31600, "total_steps": 78105, "loss": 0.229, "lr": 3.715331083394813e-06, "epoch": 2.022917866973945, "percentage": 40.46, "elapsed_time": "1:23:41", "remaining_time": "2:03:10", "throughput": 19804.77, "total_tokens": 99451968}
|
|
{"current_steps": 31605, "total_steps": 78105, "loss": 0.219, "lr": 3.7148428550634607e-06, "epoch": 2.023237948914922, "percentage": 40.46, "elapsed_time": "1:23:42", "remaining_time": "2:03:09", "throughput": 19805.22, "total_tokens": 99467648}
|
|
{"current_steps": 31610, "total_steps": 78105, "loss": 0.174, "lr": 3.7143545660691845e-06, "epoch": 2.023558030855899, "percentage": 40.47, "elapsed_time": "1:23:42", "remaining_time": "2:03:08", "throughput": 19805.63, "total_tokens": 99482624}
|
|
{"current_steps": 31615, "total_steps": 78105, "loss": 0.316, "lr": 3.713866216436366e-06, "epoch": 2.023878112796876, "percentage": 40.48, "elapsed_time": "1:23:43", "remaining_time": "2:03:07", "throughput": 19806.06, "total_tokens": 99498240}
|
|
{"current_steps": 31620, "total_steps": 78105, "loss": 0.2307, "lr": 3.7133778061893915e-06, "epoch": 2.0241981947378527, "percentage": 40.48, "elapsed_time": "1:23:44", "remaining_time": "2:03:06", "throughput": 19806.5, "total_tokens": 99513536}
|
|
{"current_steps": 31625, "total_steps": 78105, "loss": 0.193, "lr": 3.7128893353526495e-06, "epoch": 2.0245182766788297, "percentage": 40.49, "elapsed_time": "1:23:45", "remaining_time": "2:03:05", "throughput": 19807.09, "total_tokens": 99530816}
|
|
{"current_steps": 31630, "total_steps": 78105, "loss": 0.224, "lr": 3.712400803950532e-06, "epoch": 2.0248383586198067, "percentage": 40.5, "elapsed_time": "1:23:45", "remaining_time": "2:03:04", "throughput": 19807.55, "total_tokens": 99546688}
|
|
{"current_steps": 31635, "total_steps": 78105, "loss": 0.1821, "lr": 3.7119122120074334e-06, "epoch": 2.0251584405607836, "percentage": 40.5, "elapsed_time": "1:23:46", "remaining_time": "2:03:03", "throughput": 19808.09, "total_tokens": 99563072}
|
|
{"current_steps": 31640, "total_steps": 78105, "loss": 0.172, "lr": 3.7114235595477522e-06, "epoch": 2.0254785225017606, "percentage": 40.51, "elapsed_time": "1:23:47", "remaining_time": "2:03:02", "throughput": 19808.55, "total_tokens": 99578432}
|
|
{"current_steps": 31645, "total_steps": 78105, "loss": 0.2526, "lr": 3.7109348465958884e-06, "epoch": 2.025798604442737, "percentage": 40.52, "elapsed_time": "1:23:47", "remaining_time": "2:03:01", "throughput": 19809.02, "total_tokens": 99594048}
|
|
{"current_steps": 31650, "total_steps": 78105, "loss": 0.1652, "lr": 3.710446073176245e-06, "epoch": 2.026118686383714, "percentage": 40.52, "elapsed_time": "1:23:48", "remaining_time": "2:03:00", "throughput": 19809.56, "total_tokens": 99610496}
|
|
{"current_steps": 31655, "total_steps": 78105, "loss": 0.2609, "lr": 3.7099572393132303e-06, "epoch": 2.026438768324691, "percentage": 40.53, "elapsed_time": "1:23:49", "remaining_time": "2:02:59", "throughput": 19810.06, "total_tokens": 99626432}
|
|
{"current_steps": 31660, "total_steps": 78105, "loss": 0.1763, "lr": 3.709468345031254e-06, "epoch": 2.026758850265668, "percentage": 40.54, "elapsed_time": "1:23:49", "remaining_time": "2:02:58", "throughput": 19810.41, "total_tokens": 99641088}
|
|
{"current_steps": 31665, "total_steps": 78105, "loss": 0.2441, "lr": 3.7089793903547276e-06, "epoch": 2.0270789322066447, "percentage": 40.54, "elapsed_time": "1:23:50", "remaining_time": "2:02:57", "throughput": 19810.85, "total_tokens": 99656448}
|
|
{"current_steps": 31670, "total_steps": 78105, "loss": 0.2303, "lr": 3.708490375308068e-06, "epoch": 2.0273990141476217, "percentage": 40.55, "elapsed_time": "1:23:51", "remaining_time": "2:02:56", "throughput": 19811.26, "total_tokens": 99671424}
|
|
{"current_steps": 31675, "total_steps": 78105, "loss": 0.2227, "lr": 3.7080012999156943e-06, "epoch": 2.0277190960885987, "percentage": 40.55, "elapsed_time": "1:23:51", "remaining_time": "2:02:55", "throughput": 19811.64, "total_tokens": 99685952}
|
|
{"current_steps": 31680, "total_steps": 78105, "loss": 0.1612, "lr": 3.7075121642020273e-06, "epoch": 2.0280391780295757, "percentage": 40.56, "elapsed_time": "1:23:52", "remaining_time": "2:02:54", "throughput": 19812.1, "total_tokens": 99701760}
|
|
{"current_steps": 31685, "total_steps": 78105, "loss": 0.1307, "lr": 3.7070229681914927e-06, "epoch": 2.0283592599705527, "percentage": 40.57, "elapsed_time": "1:23:53", "remaining_time": "2:02:53", "throughput": 19812.51, "total_tokens": 99717056}
|
|
{"current_steps": 31690, "total_steps": 78105, "loss": 0.2032, "lr": 3.7065337119085182e-06, "epoch": 2.028679341911529, "percentage": 40.57, "elapsed_time": "1:23:53", "remaining_time": "2:02:52", "throughput": 19813.0, "total_tokens": 99733184}
|
|
{"current_steps": 31695, "total_steps": 78105, "loss": 0.2394, "lr": 3.706044395377535e-06, "epoch": 2.028999423852506, "percentage": 40.58, "elapsed_time": "1:23:54", "remaining_time": "2:02:51", "throughput": 19813.57, "total_tokens": 99750464}
|
|
{"current_steps": 31700, "total_steps": 78105, "loss": 0.2861, "lr": 3.7055550186229765e-06, "epoch": 2.029319505793483, "percentage": 40.59, "elapsed_time": "1:23:55", "remaining_time": "2:02:50", "throughput": 19813.99, "total_tokens": 99765504}
|
|
{"current_steps": 31705, "total_steps": 78105, "loss": 0.26, "lr": 3.7050655816692804e-06, "epoch": 2.02963958773446, "percentage": 40.59, "elapsed_time": "1:23:55", "remaining_time": "2:02:49", "throughput": 19814.46, "total_tokens": 99781376}
|
|
{"current_steps": 31710, "total_steps": 78105, "loss": 0.2029, "lr": 3.7045760845408853e-06, "epoch": 2.0299596696754367, "percentage": 40.6, "elapsed_time": "1:23:56", "remaining_time": "2:02:48", "throughput": 19815.06, "total_tokens": 99798528}
|
|
{"current_steps": 31715, "total_steps": 78105, "loss": 0.2184, "lr": 3.7040865272622352e-06, "epoch": 2.0302797516164137, "percentage": 40.61, "elapsed_time": "1:23:57", "remaining_time": "2:02:47", "throughput": 19815.5, "total_tokens": 99813952}
|
|
{"current_steps": 31720, "total_steps": 78105, "loss": 0.2437, "lr": 3.7035969098577764e-06, "epoch": 2.0305998335573907, "percentage": 40.61, "elapsed_time": "1:23:57", "remaining_time": "2:02:46", "throughput": 19815.85, "total_tokens": 99828480}
|
|
{"current_steps": 31725, "total_steps": 78105, "loss": 0.2714, "lr": 3.7031072323519567e-06, "epoch": 2.0309199154983677, "percentage": 40.62, "elapsed_time": "1:23:58", "remaining_time": "2:02:45", "throughput": 19816.35, "total_tokens": 99844608}
|
|
{"current_steps": 31730, "total_steps": 78105, "loss": 0.2792, "lr": 3.7026174947692284e-06, "epoch": 2.0312399974393447, "percentage": 40.62, "elapsed_time": "1:23:59", "remaining_time": "2:02:45", "throughput": 19816.81, "total_tokens": 99860608}
|
|
{"current_steps": 31735, "total_steps": 78105, "loss": 0.2103, "lr": 3.702127697134047e-06, "epoch": 2.0315600793803212, "percentage": 40.63, "elapsed_time": "1:23:59", "remaining_time": "2:02:44", "throughput": 19817.34, "total_tokens": 99877248}
|
|
{"current_steps": 31740, "total_steps": 78105, "loss": 0.2149, "lr": 3.7016378394708694e-06, "epoch": 2.0318801613212982, "percentage": 40.64, "elapsed_time": "1:24:00", "remaining_time": "2:02:43", "throughput": 19817.91, "total_tokens": 99894016}
|
|
{"current_steps": 31745, "total_steps": 78105, "loss": 0.2266, "lr": 3.701147921804158e-06, "epoch": 2.032200243262275, "percentage": 40.64, "elapsed_time": "1:24:01", "remaining_time": "2:02:42", "throughput": 19818.39, "total_tokens": 99909888}
|
|
{"current_steps": 31750, "total_steps": 78105, "loss": 0.1853, "lr": 3.7006579441583756e-06, "epoch": 2.032520325203252, "percentage": 40.65, "elapsed_time": "1:24:01", "remaining_time": "2:02:41", "throughput": 19818.9, "total_tokens": 99926272}
|
|
{"current_steps": 31755, "total_steps": 78105, "loss": 0.2258, "lr": 3.7001679065579893e-06, "epoch": 2.0328404071442288, "percentage": 40.66, "elapsed_time": "1:24:02", "remaining_time": "2:02:40", "throughput": 19819.39, "total_tokens": 99942464}
|
|
{"current_steps": 31760, "total_steps": 78105, "loss": 0.2136, "lr": 3.69967780902747e-06, "epoch": 2.0331604890852057, "percentage": 40.66, "elapsed_time": "1:24:03", "remaining_time": "2:02:39", "throughput": 19819.73, "total_tokens": 99956544}
|
|
{"current_steps": 31765, "total_steps": 78105, "loss": 0.1381, "lr": 3.699187651591288e-06, "epoch": 2.0334805710261827, "percentage": 40.67, "elapsed_time": "1:24:03", "remaining_time": "2:02:38", "throughput": 19820.17, "total_tokens": 99972032}
|
|
{"current_steps": 31770, "total_steps": 78105, "loss": 0.2557, "lr": 3.698697434273923e-06, "epoch": 2.0338006529671597, "percentage": 40.68, "elapsed_time": "1:24:04", "remaining_time": "2:02:37", "throughput": 19820.61, "total_tokens": 99987520}
|
|
{"current_steps": 31775, "total_steps": 78105, "loss": 0.2953, "lr": 3.6982071570998512e-06, "epoch": 2.0341207349081363, "percentage": 40.68, "elapsed_time": "1:24:05", "remaining_time": "2:02:36", "throughput": 19821.03, "total_tokens": 100003008}
|
|
{"current_steps": 31780, "total_steps": 78105, "loss": 0.1572, "lr": 3.6977168200935554e-06, "epoch": 2.0344408168491133, "percentage": 40.69, "elapsed_time": "1:24:06", "remaining_time": "2:02:36", "throughput": 19819.64, "total_tokens": 100019136}
|
|
{"current_steps": 31785, "total_steps": 78105, "loss": 0.2309, "lr": 3.6972264232795198e-06, "epoch": 2.0347608987900903, "percentage": 40.7, "elapsed_time": "1:24:07", "remaining_time": "2:02:35", "throughput": 19820.1, "total_tokens": 100034688}
|
|
{"current_steps": 31790, "total_steps": 78105, "loss": 0.2005, "lr": 3.6967359666822333e-06, "epoch": 2.0350809807310672, "percentage": 40.7, "elapsed_time": "1:24:07", "remaining_time": "2:02:34", "throughput": 19820.47, "total_tokens": 100049536}
|
|
{"current_steps": 31795, "total_steps": 78105, "loss": 0.1672, "lr": 3.696245450326186e-06, "epoch": 2.0354010626720442, "percentage": 40.71, "elapsed_time": "1:24:08", "remaining_time": "2:02:33", "throughput": 19820.91, "total_tokens": 100064768}
|
|
{"current_steps": 31800, "total_steps": 78105, "loss": 0.1763, "lr": 3.6957548742358706e-06, "epoch": 2.035721144613021, "percentage": 40.71, "elapsed_time": "1:24:09", "remaining_time": "2:02:32", "throughput": 19821.35, "total_tokens": 100080448}
|
|
{"current_steps": 31805, "total_steps": 78105, "loss": 0.1021, "lr": 3.6952642384357867e-06, "epoch": 2.0360412265539978, "percentage": 40.72, "elapsed_time": "1:24:09", "remaining_time": "2:02:31", "throughput": 19821.74, "total_tokens": 100095296}
|
|
{"current_steps": 31810, "total_steps": 78105, "loss": 0.1704, "lr": 3.694773542950433e-06, "epoch": 2.0363613084949748, "percentage": 40.73, "elapsed_time": "1:24:10", "remaining_time": "2:02:30", "throughput": 19822.27, "total_tokens": 100111936}
|
|
{"current_steps": 31815, "total_steps": 78105, "loss": 0.1592, "lr": 3.694282787804311e-06, "epoch": 2.0366813904359518, "percentage": 40.73, "elapsed_time": "1:24:11", "remaining_time": "2:02:29", "throughput": 19822.72, "total_tokens": 100127296}
|
|
{"current_steps": 31820, "total_steps": 78105, "loss": 0.256, "lr": 3.693791973021928e-06, "epoch": 2.0370014723769283, "percentage": 40.74, "elapsed_time": "1:24:11", "remaining_time": "2:02:28", "throughput": 19823.15, "total_tokens": 100142464}
|
|
{"current_steps": 31825, "total_steps": 78105, "loss": 0.2572, "lr": 3.693301098627791e-06, "epoch": 2.0373215543179053, "percentage": 40.75, "elapsed_time": "1:24:12", "remaining_time": "2:02:27", "throughput": 19823.59, "total_tokens": 100157952}
|
|
{"current_steps": 31830, "total_steps": 78105, "loss": 0.1787, "lr": 3.692810164646414e-06, "epoch": 2.0376416362588823, "percentage": 40.75, "elapsed_time": "1:24:13", "remaining_time": "2:02:26", "throughput": 19824.02, "total_tokens": 100173312}
|
|
{"current_steps": 31835, "total_steps": 78105, "loss": 0.2623, "lr": 3.6923191711023097e-06, "epoch": 2.0379617181998593, "percentage": 40.76, "elapsed_time": "1:24:14", "remaining_time": "2:02:25", "throughput": 19823.1, "total_tokens": 100188352}
|
|
{"current_steps": 31840, "total_steps": 78105, "loss": 0.2514, "lr": 3.6918281180199977e-06, "epoch": 2.0382818001408363, "percentage": 40.77, "elapsed_time": "1:24:14", "remaining_time": "2:02:24", "throughput": 19823.54, "total_tokens": 100204032}
|
|
{"current_steps": 31845, "total_steps": 78105, "loss": 0.1957, "lr": 3.6913370054239967e-06, "epoch": 2.038601882081813, "percentage": 40.77, "elapsed_time": "1:24:15", "remaining_time": "2:02:23", "throughput": 19823.96, "total_tokens": 100219008}
|
|
{"current_steps": 31850, "total_steps": 78105, "loss": 0.2899, "lr": 3.690845833338831e-06, "epoch": 2.03892196402279, "percentage": 40.78, "elapsed_time": "1:24:16", "remaining_time": "2:02:22", "throughput": 19824.35, "total_tokens": 100233856}
|
|
{"current_steps": 31855, "total_steps": 78105, "loss": 0.2437, "lr": 3.6903546017890275e-06, "epoch": 2.039242045963767, "percentage": 40.78, "elapsed_time": "1:24:16", "remaining_time": "2:02:21", "throughput": 19824.7, "total_tokens": 100248256}
|
|
{"current_steps": 31860, "total_steps": 78105, "loss": 0.1458, "lr": 3.6898633107991143e-06, "epoch": 2.039562127904744, "percentage": 40.79, "elapsed_time": "1:24:17", "remaining_time": "2:02:20", "throughput": 19825.12, "total_tokens": 100263616}
|
|
{"current_steps": 31865, "total_steps": 78105, "loss": 0.1635, "lr": 3.689371960393627e-06, "epoch": 2.0398822098457203, "percentage": 40.8, "elapsed_time": "1:24:18", "remaining_time": "2:02:19", "throughput": 19825.6, "total_tokens": 100279744}
|
|
{"current_steps": 31870, "total_steps": 78105, "loss": 0.1918, "lr": 3.688880550597098e-06, "epoch": 2.0402022917866973, "percentage": 40.8, "elapsed_time": "1:24:18", "remaining_time": "2:02:18", "throughput": 19826.07, "total_tokens": 100295552}
|
|
{"current_steps": 31875, "total_steps": 78105, "loss": 0.2051, "lr": 3.6883890814340678e-06, "epoch": 2.0405223737276743, "percentage": 40.81, "elapsed_time": "1:24:19", "remaining_time": "2:02:17", "throughput": 19826.49, "total_tokens": 100310912}
|
|
{"current_steps": 31880, "total_steps": 78105, "loss": 0.2058, "lr": 3.687897552929076e-06, "epoch": 2.0408424556686513, "percentage": 40.82, "elapsed_time": "1:24:20", "remaining_time": "2:02:17", "throughput": 19826.91, "total_tokens": 100326464}
|
|
{"current_steps": 31885, "total_steps": 78105, "loss": 0.2004, "lr": 3.687405965106669e-06, "epoch": 2.041162537609628, "percentage": 40.82, "elapsed_time": "1:24:20", "remaining_time": "2:02:16", "throughput": 19827.3, "total_tokens": 100341568}
|
|
{"current_steps": 31890, "total_steps": 78105, "loss": 0.1228, "lr": 3.6869143179913923e-06, "epoch": 2.041482619550605, "percentage": 40.83, "elapsed_time": "1:24:21", "remaining_time": "2:02:15", "throughput": 19827.8, "total_tokens": 100357696}
|
|
{"current_steps": 31895, "total_steps": 78105, "loss": 0.2019, "lr": 3.6864226116077977e-06, "epoch": 2.041802701491582, "percentage": 40.84, "elapsed_time": "1:24:22", "remaining_time": "2:02:14", "throughput": 19828.25, "total_tokens": 100373248}
|
|
{"current_steps": 31900, "total_steps": 78105, "loss": 0.1433, "lr": 3.685930845980438e-06, "epoch": 2.042122783432559, "percentage": 40.84, "elapsed_time": "1:24:22", "remaining_time": "2:02:13", "throughput": 19828.7, "total_tokens": 100388672}
|
|
{"current_steps": 31905, "total_steps": 78105, "loss": 0.2607, "lr": 3.685439021133868e-06, "epoch": 2.042442865373536, "percentage": 40.85, "elapsed_time": "1:24:23", "remaining_time": "2:02:12", "throughput": 19829.25, "total_tokens": 100405376}
|
|
{"current_steps": 31910, "total_steps": 78105, "loss": 0.248, "lr": 3.6849471370926483e-06, "epoch": 2.0427629473145124, "percentage": 40.86, "elapsed_time": "1:24:24", "remaining_time": "2:02:11", "throughput": 19829.71, "total_tokens": 100421376}
|
|
{"current_steps": 31915, "total_steps": 78105, "loss": 0.1678, "lr": 3.68445519388134e-06, "epoch": 2.0430830292554893, "percentage": 40.86, "elapsed_time": "1:24:24", "remaining_time": "2:02:10", "throughput": 19830.08, "total_tokens": 100436096}
|
|
{"current_steps": 31920, "total_steps": 78105, "loss": 0.2534, "lr": 3.68396319152451e-06, "epoch": 2.0434031111964663, "percentage": 40.87, "elapsed_time": "1:24:25", "remaining_time": "2:02:09", "throughput": 19830.45, "total_tokens": 100450752}
|
|
{"current_steps": 31925, "total_steps": 78105, "loss": 0.302, "lr": 3.6834711300467254e-06, "epoch": 2.0437231931374433, "percentage": 40.87, "elapsed_time": "1:24:26", "remaining_time": "2:02:08", "throughput": 19830.99, "total_tokens": 100467392}
|
|
{"current_steps": 31930, "total_steps": 78105, "loss": 0.1205, "lr": 3.682979009472557e-06, "epoch": 2.04404327507842, "percentage": 40.88, "elapsed_time": "1:24:26", "remaining_time": "2:02:07", "throughput": 19831.48, "total_tokens": 100483712}
|
|
{"current_steps": 31935, "total_steps": 78105, "loss": 0.269, "lr": 3.6824868298265782e-06, "epoch": 2.044363357019397, "percentage": 40.89, "elapsed_time": "1:24:27", "remaining_time": "2:02:06", "throughput": 19831.97, "total_tokens": 100499648}
|
|
{"current_steps": 31940, "total_steps": 78105, "loss": 0.2446, "lr": 3.6819945911333666e-06, "epoch": 2.044683438960374, "percentage": 40.89, "elapsed_time": "1:24:28", "remaining_time": "2:02:05", "throughput": 19832.4, "total_tokens": 100515008}
|
|
{"current_steps": 31945, "total_steps": 78105, "loss": 0.1921, "lr": 3.681502293417502e-06, "epoch": 2.045003520901351, "percentage": 40.9, "elapsed_time": "1:24:28", "remaining_time": "2:02:04", "throughput": 19832.78, "total_tokens": 100529664}
|
|
{"current_steps": 31950, "total_steps": 78105, "loss": 0.2747, "lr": 3.681009936703567e-06, "epoch": 2.045323602842328, "percentage": 40.91, "elapsed_time": "1:24:29", "remaining_time": "2:02:03", "throughput": 19833.21, "total_tokens": 100545088}
|
|
{"current_steps": 31955, "total_steps": 78105, "loss": 0.209, "lr": 3.680517521016148e-06, "epoch": 2.0456436847833044, "percentage": 40.91, "elapsed_time": "1:24:30", "remaining_time": "2:02:02", "throughput": 19833.64, "total_tokens": 100560576}
|
|
{"current_steps": 31960, "total_steps": 78105, "loss": 0.2114, "lr": 3.680025046379833e-06, "epoch": 2.0459637667242814, "percentage": 40.92, "elapsed_time": "1:24:30", "remaining_time": "2:02:01", "throughput": 19834.13, "total_tokens": 100576640}
|
|
{"current_steps": 31965, "total_steps": 78105, "loss": 0.2232, "lr": 3.679532512819213e-06, "epoch": 2.0462838486652584, "percentage": 40.93, "elapsed_time": "1:24:31", "remaining_time": "2:02:00", "throughput": 19834.5, "total_tokens": 100591360}
|
|
{"current_steps": 31970, "total_steps": 78105, "loss": 0.1649, "lr": 3.6790399203588834e-06, "epoch": 2.0466039306062354, "percentage": 40.93, "elapsed_time": "1:24:32", "remaining_time": "2:01:59", "throughput": 19834.93, "total_tokens": 100606912}
|
|
{"current_steps": 31975, "total_steps": 78105, "loss": 0.2078, "lr": 3.6785472690234415e-06, "epoch": 2.046924012547212, "percentage": 40.94, "elapsed_time": "1:24:32", "remaining_time": "2:01:58", "throughput": 19835.45, "total_tokens": 100623552}
|
|
{"current_steps": 31980, "total_steps": 78105, "loss": 0.2281, "lr": 3.6780545588374883e-06, "epoch": 2.047244094488189, "percentage": 40.94, "elapsed_time": "1:24:33", "remaining_time": "2:01:57", "throughput": 19835.86, "total_tokens": 100638720}
|
|
{"current_steps": 31985, "total_steps": 78105, "loss": 0.2779, "lr": 3.6775617898256267e-06, "epoch": 2.047564176429166, "percentage": 40.95, "elapsed_time": "1:24:34", "remaining_time": "2:01:56", "throughput": 19836.32, "total_tokens": 100654592}
|
|
{"current_steps": 31990, "total_steps": 78105, "loss": 0.2415, "lr": 3.6770689620124626e-06, "epoch": 2.047884258370143, "percentage": 40.96, "elapsed_time": "1:24:34", "remaining_time": "2:01:55", "throughput": 19836.76, "total_tokens": 100670336}
|
|
{"current_steps": 31995, "total_steps": 78105, "loss": 0.1341, "lr": 3.6765760754226053e-06, "epoch": 2.04820434031112, "percentage": 40.96, "elapsed_time": "1:24:35", "remaining_time": "2:01:54", "throughput": 19837.18, "total_tokens": 100685696}
|
|
{"current_steps": 32000, "total_steps": 78105, "loss": 0.1803, "lr": 3.6760831300806687e-06, "epoch": 2.0485244222520964, "percentage": 40.97, "elapsed_time": "1:24:36", "remaining_time": "2:01:53", "throughput": 19837.53, "total_tokens": 100700352}
|
|
{"current_steps": 32005, "total_steps": 78105, "loss": 0.2749, "lr": 3.675590126011266e-06, "epoch": 2.0488445041930734, "percentage": 40.98, "elapsed_time": "1:24:36", "remaining_time": "2:01:52", "throughput": 19838.04, "total_tokens": 100716544}
|
|
{"current_steps": 32010, "total_steps": 78105, "loss": 0.2017, "lr": 3.6750970632390156e-06, "epoch": 2.0491645861340504, "percentage": 40.98, "elapsed_time": "1:24:37", "remaining_time": "2:01:51", "throughput": 19838.48, "total_tokens": 100732096}
|
|
{"current_steps": 32015, "total_steps": 78105, "loss": 0.2598, "lr": 3.6746039417885387e-06, "epoch": 2.0494846680750274, "percentage": 40.99, "elapsed_time": "1:24:38", "remaining_time": "2:01:50", "throughput": 19838.96, "total_tokens": 100747968}
|
|
{"current_steps": 32020, "total_steps": 78105, "loss": 0.1603, "lr": 3.674110761684459e-06, "epoch": 2.049804750016004, "percentage": 41.0, "elapsed_time": "1:24:38", "remaining_time": "2:01:49", "throughput": 19839.41, "total_tokens": 100763648}
|
|
{"current_steps": 32025, "total_steps": 78105, "loss": 0.2008, "lr": 3.6736175229514036e-06, "epoch": 2.050124831956981, "percentage": 41.0, "elapsed_time": "1:24:39", "remaining_time": "2:01:49", "throughput": 19840.2, "total_tokens": 100783488}
|
|
{"current_steps": 32030, "total_steps": 78105, "loss": 0.2693, "lr": 3.6731242256140033e-06, "epoch": 2.050444913897958, "percentage": 41.01, "elapsed_time": "1:24:40", "remaining_time": "2:01:48", "throughput": 19840.7, "total_tokens": 100799552}
|
|
{"current_steps": 32035, "total_steps": 78105, "loss": 0.273, "lr": 3.672630869696888e-06, "epoch": 2.050764995838935, "percentage": 41.02, "elapsed_time": "1:24:41", "remaining_time": "2:01:47", "throughput": 19841.23, "total_tokens": 100816192}
|
|
{"current_steps": 32040, "total_steps": 78105, "loss": 0.1789, "lr": 3.6721374552246962e-06, "epoch": 2.0510850777799114, "percentage": 41.02, "elapsed_time": "1:24:41", "remaining_time": "2:01:46", "throughput": 19841.68, "total_tokens": 100831680}
|
|
{"current_steps": 32045, "total_steps": 78105, "loss": 0.2565, "lr": 3.6716439822220656e-06, "epoch": 2.0514051597208884, "percentage": 41.03, "elapsed_time": "1:24:42", "remaining_time": "2:01:45", "throughput": 19842.04, "total_tokens": 100846400}
|
|
{"current_steps": 32050, "total_steps": 78105, "loss": 0.1367, "lr": 3.671150450713637e-06, "epoch": 2.0517252416618654, "percentage": 41.03, "elapsed_time": "1:24:43", "remaining_time": "2:01:44", "throughput": 19842.49, "total_tokens": 100862400}
|
|
{"current_steps": 32055, "total_steps": 78105, "loss": 0.2691, "lr": 3.670656860724055e-06, "epoch": 2.0520453236028424, "percentage": 41.04, "elapsed_time": "1:24:43", "remaining_time": "2:01:43", "throughput": 19842.88, "total_tokens": 100877312}
|
|
{"current_steps": 32060, "total_steps": 78105, "loss": 0.1851, "lr": 3.6701632122779674e-06, "epoch": 2.0523654055438194, "percentage": 41.05, "elapsed_time": "1:24:44", "remaining_time": "2:01:42", "throughput": 19843.29, "total_tokens": 100892416}
|
|
{"current_steps": 32065, "total_steps": 78105, "loss": 0.1442, "lr": 3.669669505400024e-06, "epoch": 2.052685487484796, "percentage": 41.05, "elapsed_time": "1:24:45", "remaining_time": "2:01:41", "throughput": 19843.73, "total_tokens": 100908032}
|
|
{"current_steps": 32070, "total_steps": 78105, "loss": 0.2362, "lr": 3.6691757401148785e-06, "epoch": 2.053005569425773, "percentage": 41.06, "elapsed_time": "1:24:45", "remaining_time": "2:01:40", "throughput": 19844.24, "total_tokens": 100924352}
|
|
{"current_steps": 32075, "total_steps": 78105, "loss": 0.1598, "lr": 3.668681916447186e-06, "epoch": 2.05332565136675, "percentage": 41.07, "elapsed_time": "1:24:46", "remaining_time": "2:01:39", "throughput": 19844.72, "total_tokens": 100940032}
|
|
{"current_steps": 32080, "total_steps": 78105, "loss": 0.2386, "lr": 3.668188034421606e-06, "epoch": 2.053645733307727, "percentage": 41.07, "elapsed_time": "1:24:47", "remaining_time": "2:01:38", "throughput": 19845.29, "total_tokens": 100957376}
|
|
{"current_steps": 32085, "total_steps": 78105, "loss": 0.2209, "lr": 3.6676940940628007e-06, "epoch": 2.0539658152487035, "percentage": 41.08, "elapsed_time": "1:24:47", "remaining_time": "2:01:37", "throughput": 19845.68, "total_tokens": 100972544}
|
|
{"current_steps": 32090, "total_steps": 78105, "loss": 0.1616, "lr": 3.6672000953954346e-06, "epoch": 2.0542858971896805, "percentage": 41.09, "elapsed_time": "1:24:48", "remaining_time": "2:01:36", "throughput": 19846.15, "total_tokens": 100988224}
|
|
{"current_steps": 32095, "total_steps": 78105, "loss": 0.1884, "lr": 3.666706038444175e-06, "epoch": 2.0546059791306575, "percentage": 41.09, "elapsed_time": "1:24:49", "remaining_time": "2:01:35", "throughput": 19846.6, "total_tokens": 101003968}
|
|
{"current_steps": 32100, "total_steps": 78105, "loss": 0.3679, "lr": 3.6662119232336933e-06, "epoch": 2.0549260610716344, "percentage": 41.1, "elapsed_time": "1:24:49", "remaining_time": "2:01:34", "throughput": 19846.94, "total_tokens": 101018368}
|
|
{"current_steps": 32105, "total_steps": 78105, "loss": 0.1868, "lr": 3.6657177497886633e-06, "epoch": 2.0552461430126114, "percentage": 41.1, "elapsed_time": "1:24:50", "remaining_time": "2:01:33", "throughput": 19847.3, "total_tokens": 101032896}
|
|
{"current_steps": 32110, "total_steps": 78105, "loss": 0.1823, "lr": 3.6652235181337605e-06, "epoch": 2.055566224953588, "percentage": 41.11, "elapsed_time": "1:24:51", "remaining_time": "2:01:32", "throughput": 19847.85, "total_tokens": 101049792}
|
|
{"current_steps": 32115, "total_steps": 78105, "loss": 0.1792, "lr": 3.664729228293664e-06, "epoch": 2.055886306894565, "percentage": 41.12, "elapsed_time": "1:24:51", "remaining_time": "2:01:31", "throughput": 19848.31, "total_tokens": 101065600}
|
|
{"current_steps": 32120, "total_steps": 78105, "loss": 0.1921, "lr": 3.6642348802930576e-06, "epoch": 2.056206388835542, "percentage": 41.12, "elapsed_time": "1:24:52", "remaining_time": "2:01:30", "throughput": 19848.74, "total_tokens": 101081088}
|
|
{"current_steps": 32125, "total_steps": 78105, "loss": 0.209, "lr": 3.663740474156625e-06, "epoch": 2.056526470776519, "percentage": 41.13, "elapsed_time": "1:24:53", "remaining_time": "2:01:29", "throughput": 19849.15, "total_tokens": 101096704}
|
|
{"current_steps": 32130, "total_steps": 78105, "loss": 0.1708, "lr": 3.6632460099090555e-06, "epoch": 2.0568465527174955, "percentage": 41.14, "elapsed_time": "1:24:53", "remaining_time": "2:01:28", "throughput": 19849.58, "total_tokens": 101111936}
|
|
{"current_steps": 32135, "total_steps": 78105, "loss": 0.3031, "lr": 3.6627514875750386e-06, "epoch": 2.0571666346584725, "percentage": 41.14, "elapsed_time": "1:24:54", "remaining_time": "2:01:27", "throughput": 19850.07, "total_tokens": 101127744}
|
|
{"current_steps": 32140, "total_steps": 78105, "loss": 0.1409, "lr": 3.662256907179269e-06, "epoch": 2.0574867165994495, "percentage": 41.15, "elapsed_time": "1:24:55", "remaining_time": "2:01:26", "throughput": 19850.59, "total_tokens": 101143872}
|
|
{"current_steps": 32145, "total_steps": 78105, "loss": 0.2047, "lr": 3.6617622687464446e-06, "epoch": 2.0578067985404265, "percentage": 41.16, "elapsed_time": "1:24:55", "remaining_time": "2:01:26", "throughput": 19851.07, "total_tokens": 101160000}
|
|
{"current_steps": 32150, "total_steps": 78105, "loss": 0.3062, "lr": 3.6612675723012626e-06, "epoch": 2.058126880481403, "percentage": 41.16, "elapsed_time": "1:24:56", "remaining_time": "2:01:25", "throughput": 19851.55, "total_tokens": 101176256}
|
|
{"current_steps": 32155, "total_steps": 78105, "loss": 0.1899, "lr": 3.660772817868427e-06, "epoch": 2.05844696242238, "percentage": 41.17, "elapsed_time": "1:24:57", "remaining_time": "2:01:24", "throughput": 19852.02, "total_tokens": 101192192}
|
|
{"current_steps": 32160, "total_steps": 78105, "loss": 0.173, "lr": 3.660278005472643e-06, "epoch": 2.058767044363357, "percentage": 41.18, "elapsed_time": "1:24:58", "remaining_time": "2:01:23", "throughput": 19852.49, "total_tokens": 101208512}
|
|
{"current_steps": 32165, "total_steps": 78105, "loss": 0.1725, "lr": 3.6597831351386203e-06, "epoch": 2.059087126304334, "percentage": 41.18, "elapsed_time": "1:24:58", "remaining_time": "2:01:22", "throughput": 19852.96, "total_tokens": 101224384}
|
|
{"current_steps": 32170, "total_steps": 78105, "loss": 0.1945, "lr": 3.6592882068910673e-06, "epoch": 2.059407208245311, "percentage": 41.19, "elapsed_time": "1:24:59", "remaining_time": "2:01:21", "throughput": 19853.36, "total_tokens": 101239360}
|
|
{"current_steps": 32175, "total_steps": 78105, "loss": 0.1768, "lr": 3.6587932207547004e-06, "epoch": 2.0597272901862875, "percentage": 41.19, "elapsed_time": "1:25:00", "remaining_time": "2:01:20", "throughput": 19853.78, "total_tokens": 101254528}
|
|
{"current_steps": 32180, "total_steps": 78105, "loss": 0.2859, "lr": 3.658298176754237e-06, "epoch": 2.0600473721272645, "percentage": 41.2, "elapsed_time": "1:25:00", "remaining_time": "2:01:19", "throughput": 19854.22, "total_tokens": 101270144}
|
|
{"current_steps": 32185, "total_steps": 78105, "loss": 0.2116, "lr": 3.657803074914395e-06, "epoch": 2.0603674540682415, "percentage": 41.21, "elapsed_time": "1:25:01", "remaining_time": "2:01:18", "throughput": 19854.63, "total_tokens": 101284992}
|
|
{"current_steps": 32190, "total_steps": 78105, "loss": 0.2396, "lr": 3.6573079152598982e-06, "epoch": 2.0606875360092185, "percentage": 41.21, "elapsed_time": "1:25:02", "remaining_time": "2:01:17", "throughput": 19855.21, "total_tokens": 101302784}
|
|
{"current_steps": 32195, "total_steps": 78105, "loss": 0.2634, "lr": 3.656812697815472e-06, "epoch": 2.061007617950195, "percentage": 41.22, "elapsed_time": "1:25:02", "remaining_time": "2:01:16", "throughput": 19855.73, "total_tokens": 101319552}
|
|
{"current_steps": 32200, "total_steps": 78105, "loss": 0.1979, "lr": 3.656317422605846e-06, "epoch": 2.061327699891172, "percentage": 41.23, "elapsed_time": "1:25:03", "remaining_time": "2:01:15", "throughput": 19856.12, "total_tokens": 101334272}
|
|
{"current_steps": 32205, "total_steps": 78105, "loss": 0.1825, "lr": 3.655822089655751e-06, "epoch": 2.061647781832149, "percentage": 41.23, "elapsed_time": "1:25:04", "remaining_time": "2:01:14", "throughput": 19856.59, "total_tokens": 101350208}
|
|
{"current_steps": 32210, "total_steps": 78105, "loss": 0.1583, "lr": 3.6553266989899207e-06, "epoch": 2.061967863773126, "percentage": 41.24, "elapsed_time": "1:25:04", "remaining_time": "2:01:13", "throughput": 19856.95, "total_tokens": 101364928}
|
|
{"current_steps": 32215, "total_steps": 78105, "loss": 0.1841, "lr": 3.6548312506330934e-06, "epoch": 2.062287945714103, "percentage": 41.25, "elapsed_time": "1:25:05", "remaining_time": "2:01:12", "throughput": 19857.54, "total_tokens": 101382208}
|
|
{"current_steps": 32220, "total_steps": 78105, "loss": 0.2441, "lr": 3.6543357446100085e-06, "epoch": 2.0626080276550796, "percentage": 41.25, "elapsed_time": "1:25:06", "remaining_time": "2:01:11", "throughput": 19858.02, "total_tokens": 101398400}
|
|
{"current_steps": 32225, "total_steps": 78105, "loss": 0.2626, "lr": 3.653840180945409e-06, "epoch": 2.0629281095960565, "percentage": 41.26, "elapsed_time": "1:25:06", "remaining_time": "2:01:10", "throughput": 19858.55, "total_tokens": 101414720}
|
|
{"current_steps": 32230, "total_steps": 78105, "loss": 0.1868, "lr": 3.6533445596640414e-06, "epoch": 2.0632481915370335, "percentage": 41.26, "elapsed_time": "1:25:07", "remaining_time": "2:01:09", "throughput": 19858.93, "total_tokens": 101429440}
|
|
{"current_steps": 32235, "total_steps": 78105, "loss": 0.2584, "lr": 3.6528488807906543e-06, "epoch": 2.0635682734780105, "percentage": 41.27, "elapsed_time": "1:25:08", "remaining_time": "2:01:08", "throughput": 19859.34, "total_tokens": 101444608}
|
|
{"current_steps": 32240, "total_steps": 78105, "loss": 0.2329, "lr": 3.652353144349999e-06, "epoch": 2.063888355418987, "percentage": 41.28, "elapsed_time": "1:25:08", "remaining_time": "2:01:07", "throughput": 19859.84, "total_tokens": 101460800}
|
|
{"current_steps": 32245, "total_steps": 78105, "loss": 0.2626, "lr": 3.65185735036683e-06, "epoch": 2.064208437359964, "percentage": 41.28, "elapsed_time": "1:25:09", "remaining_time": "2:01:07", "throughput": 19860.38, "total_tokens": 101477888}
|
|
{"current_steps": 32250, "total_steps": 78105, "loss": 0.1905, "lr": 3.651361498865904e-06, "epoch": 2.064528519300941, "percentage": 41.29, "elapsed_time": "1:25:10", "remaining_time": "2:01:06", "throughput": 19860.82, "total_tokens": 101493632}
|
|
{"current_steps": 32255, "total_steps": 78105, "loss": 0.1524, "lr": 3.6508655898719824e-06, "epoch": 2.064848601241918, "percentage": 41.3, "elapsed_time": "1:25:10", "remaining_time": "2:01:05", "throughput": 19861.28, "total_tokens": 101509760}
|
|
{"current_steps": 32260, "total_steps": 78105, "loss": 0.2483, "lr": 3.650369623409828e-06, "epoch": 2.065168683182895, "percentage": 41.3, "elapsed_time": "1:25:11", "remaining_time": "2:01:04", "throughput": 19861.87, "total_tokens": 101527104}
|
|
{"current_steps": 32265, "total_steps": 78105, "loss": 0.1585, "lr": 3.6498735995042066e-06, "epoch": 2.0654887651238716, "percentage": 41.31, "elapsed_time": "1:25:12", "remaining_time": "2:01:03", "throughput": 19862.31, "total_tokens": 101543040}
|
|
{"current_steps": 32270, "total_steps": 78105, "loss": 0.2534, "lr": 3.6493775181798866e-06, "epoch": 2.0658088470648486, "percentage": 41.32, "elapsed_time": "1:25:13", "remaining_time": "2:01:02", "throughput": 19862.92, "total_tokens": 101561024}
|
|
{"current_steps": 32275, "total_steps": 78105, "loss": 0.2004, "lr": 3.6488813794616412e-06, "epoch": 2.0661289290058256, "percentage": 41.32, "elapsed_time": "1:25:13", "remaining_time": "2:01:01", "throughput": 19863.35, "total_tokens": 101576448}
|
|
{"current_steps": 32280, "total_steps": 78105, "loss": 0.2001, "lr": 3.6483851833742435e-06, "epoch": 2.0664490109468026, "percentage": 41.33, "elapsed_time": "1:25:14", "remaining_time": "2:01:00", "throughput": 19863.79, "total_tokens": 101592000}
|
|
{"current_steps": 32285, "total_steps": 78105, "loss": 0.2706, "lr": 3.6478889299424713e-06, "epoch": 2.066769092887779, "percentage": 41.34, "elapsed_time": "1:25:15", "remaining_time": "2:00:59", "throughput": 19864.29, "total_tokens": 101608192}
|
|
{"current_steps": 32290, "total_steps": 78105, "loss": 0.2167, "lr": 3.647392619191106e-06, "epoch": 2.067089174828756, "percentage": 41.34, "elapsed_time": "1:25:15", "remaining_time": "2:00:58", "throughput": 19864.78, "total_tokens": 101624320}
|
|
{"current_steps": 32295, "total_steps": 78105, "loss": 0.2007, "lr": 3.6468962511449293e-06, "epoch": 2.067409256769733, "percentage": 41.35, "elapsed_time": "1:25:16", "remaining_time": "2:00:57", "throughput": 19865.24, "total_tokens": 101640128}
|
|
{"current_steps": 32300, "total_steps": 78105, "loss": 0.2372, "lr": 3.646399825828727e-06, "epoch": 2.06772933871071, "percentage": 41.35, "elapsed_time": "1:25:17", "remaining_time": "2:00:56", "throughput": 19865.66, "total_tokens": 101655808}
|
|
{"current_steps": 32305, "total_steps": 78105, "loss": 0.2353, "lr": 3.6459033432672898e-06, "epoch": 2.0680494206516866, "percentage": 41.36, "elapsed_time": "1:25:17", "remaining_time": "2:00:55", "throughput": 19866.0, "total_tokens": 101670080}
|
|
{"current_steps": 32310, "total_steps": 78105, "loss": 0.2576, "lr": 3.645406803485407e-06, "epoch": 2.0683695025926636, "percentage": 41.37, "elapsed_time": "1:25:18", "remaining_time": "2:00:54", "throughput": 19866.43, "total_tokens": 101685184}
|
|
{"current_steps": 32315, "total_steps": 78105, "loss": 0.2226, "lr": 3.6449102065078767e-06, "epoch": 2.0686895845336406, "percentage": 41.37, "elapsed_time": "1:25:19", "remaining_time": "2:00:53", "throughput": 19866.9, "total_tokens": 101701184}
|
|
{"current_steps": 32320, "total_steps": 78105, "loss": 0.3052, "lr": 3.6444135523594933e-06, "epoch": 2.0690096664746176, "percentage": 41.38, "elapsed_time": "1:25:19", "remaining_time": "2:00:52", "throughput": 19867.47, "total_tokens": 101718592}
|
|
{"current_steps": 32325, "total_steps": 78105, "loss": 0.2081, "lr": 3.6439168410650576e-06, "epoch": 2.0693297484155946, "percentage": 41.39, "elapsed_time": "1:25:20", "remaining_time": "2:00:51", "throughput": 19867.89, "total_tokens": 101733632}
|
|
{"current_steps": 32330, "total_steps": 78105, "loss": 0.2453, "lr": 3.643420072649374e-06, "epoch": 2.069649830356571, "percentage": 41.39, "elapsed_time": "1:25:21", "remaining_time": "2:00:50", "throughput": 19868.42, "total_tokens": 101750400}
|
|
{"current_steps": 32335, "total_steps": 78105, "loss": 0.1596, "lr": 3.6429232471372478e-06, "epoch": 2.069969912297548, "percentage": 41.4, "elapsed_time": "1:25:21", "remaining_time": "2:00:49", "throughput": 19868.87, "total_tokens": 101765888}
|
|
{"current_steps": 32340, "total_steps": 78105, "loss": 0.1673, "lr": 3.6424263645534873e-06, "epoch": 2.070289994238525, "percentage": 41.41, "elapsed_time": "1:25:22", "remaining_time": "2:00:49", "throughput": 19869.29, "total_tokens": 101781376}
|
|
{"current_steps": 32345, "total_steps": 78105, "loss": 0.1531, "lr": 3.6419294249229053e-06, "epoch": 2.070610076179502, "percentage": 41.41, "elapsed_time": "1:25:23", "remaining_time": "2:00:48", "throughput": 19869.79, "total_tokens": 101797760}
|
|
{"current_steps": 32350, "total_steps": 78105, "loss": 0.1952, "lr": 3.641432428270316e-06, "epoch": 2.0709301581204786, "percentage": 41.42, "elapsed_time": "1:25:23", "remaining_time": "2:00:47", "throughput": 19870.25, "total_tokens": 101813760}
|
|
{"current_steps": 32355, "total_steps": 78105, "loss": 0.3066, "lr": 3.6409353746205367e-06, "epoch": 2.0712502400614556, "percentage": 41.43, "elapsed_time": "1:25:24", "remaining_time": "2:00:46", "throughput": 19870.76, "total_tokens": 101830144}
|
|
{"current_steps": 32360, "total_steps": 78105, "loss": 0.4124, "lr": 3.6404382639983883e-06, "epoch": 2.0715703220024326, "percentage": 41.43, "elapsed_time": "1:25:25", "remaining_time": "2:00:45", "throughput": 19871.31, "total_tokens": 101846848}
|
|
{"current_steps": 32365, "total_steps": 78105, "loss": 0.2141, "lr": 3.639941096428692e-06, "epoch": 2.0718904039434096, "percentage": 41.44, "elapsed_time": "1:25:25", "remaining_time": "2:00:44", "throughput": 19871.74, "total_tokens": 101861888}
|
|
{"current_steps": 32370, "total_steps": 78105, "loss": 0.2322, "lr": 3.6394438719362758e-06, "epoch": 2.0722104858843866, "percentage": 41.44, "elapsed_time": "1:25:26", "remaining_time": "2:00:43", "throughput": 19872.2, "total_tokens": 101877696}
|
|
{"current_steps": 32375, "total_steps": 78105, "loss": 0.1493, "lr": 3.638946590545968e-06, "epoch": 2.072530567825363, "percentage": 41.45, "elapsed_time": "1:25:27", "remaining_time": "2:00:42", "throughput": 19872.57, "total_tokens": 101892544}
|
|
{"current_steps": 32380, "total_steps": 78105, "loss": 0.2721, "lr": 3.6384492522826e-06, "epoch": 2.07285064976634, "percentage": 41.46, "elapsed_time": "1:25:27", "remaining_time": "2:00:41", "throughput": 19873.02, "total_tokens": 101908416}
|
|
{"current_steps": 32385, "total_steps": 78105, "loss": 0.2413, "lr": 3.6379518571710053e-06, "epoch": 2.073170731707317, "percentage": 41.46, "elapsed_time": "1:25:28", "remaining_time": "2:00:40", "throughput": 19873.44, "total_tokens": 101923712}
|
|
{"current_steps": 32390, "total_steps": 78105, "loss": 0.1763, "lr": 3.637454405236023e-06, "epoch": 2.073490813648294, "percentage": 41.47, "elapsed_time": "1:25:29", "remaining_time": "2:00:39", "throughput": 19873.93, "total_tokens": 101939968}
|
|
{"current_steps": 32395, "total_steps": 78105, "loss": 0.1755, "lr": 3.636956896502493e-06, "epoch": 2.0738108955892707, "percentage": 41.48, "elapsed_time": "1:25:29", "remaining_time": "2:00:38", "throughput": 19874.29, "total_tokens": 101954816}
|
|
{"current_steps": 32400, "total_steps": 78105, "loss": 0.2567, "lr": 3.636459330995257e-06, "epoch": 2.0741309775302477, "percentage": 41.48, "elapsed_time": "1:25:30", "remaining_time": "2:00:37", "throughput": 19874.69, "total_tokens": 101970048}
|
|
{"current_steps": 32405, "total_steps": 78105, "loss": 0.2265, "lr": 3.635961708739162e-06, "epoch": 2.0744510594712247, "percentage": 41.49, "elapsed_time": "1:25:31", "remaining_time": "2:00:36", "throughput": 19875.09, "total_tokens": 101985344}
|
|
{"current_steps": 32410, "total_steps": 78105, "loss": 0.2665, "lr": 3.635464029759056e-06, "epoch": 2.0747711414122016, "percentage": 41.5, "elapsed_time": "1:25:32", "remaining_time": "2:00:35", "throughput": 19875.49, "total_tokens": 102001088}
|
|
{"current_steps": 32415, "total_steps": 78105, "loss": 0.3026, "lr": 3.634966294079791e-06, "epoch": 2.075091223353178, "percentage": 41.5, "elapsed_time": "1:25:32", "remaining_time": "2:00:34", "throughput": 19876.0, "total_tokens": 102018048}
|
|
{"current_steps": 32420, "total_steps": 78105, "loss": 0.2463, "lr": 3.6344685017262216e-06, "epoch": 2.075411305294155, "percentage": 41.51, "elapsed_time": "1:25:33", "remaining_time": "2:00:33", "throughput": 19876.43, "total_tokens": 102033344}
|
|
{"current_steps": 32425, "total_steps": 78105, "loss": 0.249, "lr": 3.633970652723203e-06, "epoch": 2.075731387235132, "percentage": 41.51, "elapsed_time": "1:25:34", "remaining_time": "2:00:32", "throughput": 19876.8, "total_tokens": 102048448}
|
|
{"current_steps": 32430, "total_steps": 78105, "loss": 0.2439, "lr": 3.6334727470955978e-06, "epoch": 2.076051469176109, "percentage": 41.52, "elapsed_time": "1:25:34", "remaining_time": "2:00:31", "throughput": 19877.37, "total_tokens": 102065792}
|
|
{"current_steps": 32435, "total_steps": 78105, "loss": 0.1378, "lr": 3.6329747848682674e-06, "epoch": 2.076371551117086, "percentage": 41.53, "elapsed_time": "1:25:35", "remaining_time": "2:00:30", "throughput": 19877.74, "total_tokens": 102080704}
|
|
{"current_steps": 32440, "total_steps": 78105, "loss": 0.3276, "lr": 3.632476766066078e-06, "epoch": 2.0766916330580627, "percentage": 41.53, "elapsed_time": "1:25:36", "remaining_time": "2:00:29", "throughput": 19878.19, "total_tokens": 102096512}
|
|
{"current_steps": 32445, "total_steps": 78105, "loss": 0.2661, "lr": 3.6319786907138973e-06, "epoch": 2.0770117149990397, "percentage": 41.54, "elapsed_time": "1:25:36", "remaining_time": "2:00:29", "throughput": 19878.62, "total_tokens": 102112320}
|
|
{"current_steps": 32450, "total_steps": 78105, "loss": 0.1498, "lr": 3.631480558836597e-06, "epoch": 2.0773317969400167, "percentage": 41.55, "elapsed_time": "1:25:37", "remaining_time": "2:00:28", "throughput": 19879.05, "total_tokens": 102127936}
|
|
{"current_steps": 32455, "total_steps": 78105, "loss": 0.2003, "lr": 3.6309823704590506e-06, "epoch": 2.0776518788809937, "percentage": 41.55, "elapsed_time": "1:25:38", "remaining_time": "2:00:27", "throughput": 19879.47, "total_tokens": 102143552}
|
|
{"current_steps": 32460, "total_steps": 78105, "loss": 0.2259, "lr": 3.6304841256061373e-06, "epoch": 2.07797196082197, "percentage": 41.56, "elapsed_time": "1:25:38", "remaining_time": "2:00:26", "throughput": 19879.91, "total_tokens": 102159104}
|
|
{"current_steps": 32465, "total_steps": 78105, "loss": 0.2576, "lr": 3.629985824302734e-06, "epoch": 2.078292042762947, "percentage": 41.57, "elapsed_time": "1:25:39", "remaining_time": "2:00:25", "throughput": 19880.37, "total_tokens": 102174848}
|
|
{"current_steps": 32470, "total_steps": 78105, "loss": 0.1689, "lr": 3.6294874665737257e-06, "epoch": 2.078612124703924, "percentage": 41.57, "elapsed_time": "1:25:40", "remaining_time": "2:00:24", "throughput": 19880.9, "total_tokens": 102191360}
|
|
{"current_steps": 32475, "total_steps": 78105, "loss": 0.216, "lr": 3.6289890524439956e-06, "epoch": 2.078932206644901, "percentage": 41.58, "elapsed_time": "1:25:40", "remaining_time": "2:00:23", "throughput": 19881.27, "total_tokens": 102206016}
|
|
{"current_steps": 32480, "total_steps": 78105, "loss": 0.2358, "lr": 3.628490581938433e-06, "epoch": 2.079252288585878, "percentage": 41.59, "elapsed_time": "1:25:41", "remaining_time": "2:00:22", "throughput": 19881.68, "total_tokens": 102221312}
|
|
{"current_steps": 32485, "total_steps": 78105, "loss": 0.1419, "lr": 3.627992055081929e-06, "epoch": 2.0795723705268547, "percentage": 41.59, "elapsed_time": "1:25:42", "remaining_time": "2:00:21", "throughput": 19882.1, "total_tokens": 102236928}
|
|
{"current_steps": 32490, "total_steps": 78105, "loss": 0.2382, "lr": 3.6274934718993776e-06, "epoch": 2.0798924524678317, "percentage": 41.6, "elapsed_time": "1:25:42", "remaining_time": "2:00:20", "throughput": 19882.5, "total_tokens": 102252160}
|
|
{"current_steps": 32495, "total_steps": 78105, "loss": 0.2546, "lr": 3.626994832415675e-06, "epoch": 2.0802125344088087, "percentage": 41.6, "elapsed_time": "1:25:43", "remaining_time": "2:00:19", "throughput": 19882.93, "total_tokens": 102267328}
|
|
{"current_steps": 32500, "total_steps": 78105, "loss": 0.2345, "lr": 3.62649613665572e-06, "epoch": 2.0805326163497857, "percentage": 41.61, "elapsed_time": "1:25:44", "remaining_time": "2:00:18", "throughput": 19883.38, "total_tokens": 102283136}
|
|
{"current_steps": 32505, "total_steps": 78105, "loss": 0.2405, "lr": 3.6259973846444162e-06, "epoch": 2.0808526982907622, "percentage": 41.62, "elapsed_time": "1:25:44", "remaining_time": "2:00:17", "throughput": 19883.79, "total_tokens": 102298560}
|
|
{"current_steps": 32510, "total_steps": 78105, "loss": 0.171, "lr": 3.6254985764066688e-06, "epoch": 2.0811727802317392, "percentage": 41.62, "elapsed_time": "1:25:45", "remaining_time": "2:00:16", "throughput": 19884.2, "total_tokens": 102313728}
|
|
{"current_steps": 32515, "total_steps": 78105, "loss": 0.2459, "lr": 3.6249997119673835e-06, "epoch": 2.0814928621727162, "percentage": 41.63, "elapsed_time": "1:25:46", "remaining_time": "2:00:15", "throughput": 19884.59, "total_tokens": 102329088}
|
|
{"current_steps": 32520, "total_steps": 78105, "loss": 0.2137, "lr": 3.6245007913514736e-06, "epoch": 2.081812944113693, "percentage": 41.64, "elapsed_time": "1:25:46", "remaining_time": "2:00:14", "throughput": 19885.03, "total_tokens": 102344768}
|
|
{"current_steps": 32525, "total_steps": 78105, "loss": 0.3477, "lr": 3.6240018145838513e-06, "epoch": 2.08213302605467, "percentage": 41.64, "elapsed_time": "1:25:47", "remaining_time": "2:00:13", "throughput": 19885.47, "total_tokens": 102360256}
|
|
{"current_steps": 32530, "total_steps": 78105, "loss": 0.1191, "lr": 3.623502781689433e-06, "epoch": 2.0824531079956468, "percentage": 41.65, "elapsed_time": "1:25:48", "remaining_time": "2:00:12", "throughput": 19885.96, "total_tokens": 102376640}
|
|
{"current_steps": 32535, "total_steps": 78105, "loss": 0.1723, "lr": 3.623003692693138e-06, "epoch": 2.0827731899366237, "percentage": 41.66, "elapsed_time": "1:25:48", "remaining_time": "2:00:11", "throughput": 19886.36, "total_tokens": 102391744}
|
|
{"current_steps": 32540, "total_steps": 78105, "loss": 0.2269, "lr": 3.6225045476198876e-06, "epoch": 2.0830932718776007, "percentage": 41.66, "elapsed_time": "1:25:49", "remaining_time": "2:00:10", "throughput": 19886.82, "total_tokens": 102407936}
|
|
{"current_steps": 32545, "total_steps": 78105, "loss": 0.1461, "lr": 3.6220053464946074e-06, "epoch": 2.0834133538185777, "percentage": 41.67, "elapsed_time": "1:25:50", "remaining_time": "2:00:09", "throughput": 19887.27, "total_tokens": 102423744}
|
|
{"current_steps": 32550, "total_steps": 78105, "loss": 0.2772, "lr": 3.621506089342225e-06, "epoch": 2.0837334357595543, "percentage": 41.67, "elapsed_time": "1:25:50", "remaining_time": "2:00:08", "throughput": 19887.79, "total_tokens": 102440384}
|
|
{"current_steps": 32555, "total_steps": 78105, "loss": 0.2889, "lr": 3.62100677618767e-06, "epoch": 2.0840535177005313, "percentage": 41.68, "elapsed_time": "1:25:51", "remaining_time": "2:00:07", "throughput": 19888.14, "total_tokens": 102454784}
|
|
{"current_steps": 32560, "total_steps": 78105, "loss": 0.1941, "lr": 3.6205074070558747e-06, "epoch": 2.0843735996415083, "percentage": 41.69, "elapsed_time": "1:25:52", "remaining_time": "2:00:06", "throughput": 19888.63, "total_tokens": 102471232}
|
|
{"current_steps": 32565, "total_steps": 78105, "loss": 0.1543, "lr": 3.620007981971777e-06, "epoch": 2.0846936815824852, "percentage": 41.69, "elapsed_time": "1:25:52", "remaining_time": "2:00:05", "throughput": 19888.99, "total_tokens": 102485824}
|
|
{"current_steps": 32570, "total_steps": 78105, "loss": 0.1995, "lr": 3.6195085009603136e-06, "epoch": 2.085013763523462, "percentage": 41.7, "elapsed_time": "1:25:53", "remaining_time": "2:00:05", "throughput": 19889.34, "total_tokens": 102500928}
|
|
{"current_steps": 32575, "total_steps": 78105, "loss": 0.2033, "lr": 3.6190089640464283e-06, "epoch": 2.085333845464439, "percentage": 41.71, "elapsed_time": "1:25:54", "remaining_time": "2:00:04", "throughput": 19889.77, "total_tokens": 102516608}
|
|
{"current_steps": 32580, "total_steps": 78105, "loss": 0.2386, "lr": 3.6185093712550636e-06, "epoch": 2.0856539274054158, "percentage": 41.71, "elapsed_time": "1:25:54", "remaining_time": "2:00:03", "throughput": 19890.3, "total_tokens": 102533440}
|
|
{"current_steps": 32585, "total_steps": 78105, "loss": 0.1653, "lr": 3.6180097226111667e-06, "epoch": 2.0859740093463928, "percentage": 41.72, "elapsed_time": "1:25:55", "remaining_time": "2:00:02", "throughput": 19890.86, "total_tokens": 102550464}
|
|
{"current_steps": 32590, "total_steps": 78105, "loss": 0.1673, "lr": 3.6175100181396882e-06, "epoch": 2.0862940912873698, "percentage": 41.73, "elapsed_time": "1:25:56", "remaining_time": "2:00:01", "throughput": 19891.33, "total_tokens": 102566848}
|
|
{"current_steps": 32595, "total_steps": 78105, "loss": 0.1834, "lr": 3.6170102578655797e-06, "epoch": 2.0866141732283463, "percentage": 41.73, "elapsed_time": "1:25:57", "remaining_time": "2:00:00", "throughput": 19891.87, "total_tokens": 102583744}
|
|
{"current_steps": 32600, "total_steps": 78105, "loss": 0.167, "lr": 3.6165104418137976e-06, "epoch": 2.0869342551693233, "percentage": 41.74, "elapsed_time": "1:25:57", "remaining_time": "1:59:59", "throughput": 19892.26, "total_tokens": 102598848}
|
|
{"current_steps": 32605, "total_steps": 78105, "loss": 0.1778, "lr": 3.6160105700092994e-06, "epoch": 2.0872543371103003, "percentage": 41.75, "elapsed_time": "1:25:58", "remaining_time": "1:59:58", "throughput": 19892.7, "total_tokens": 102614464}
|
|
{"current_steps": 32610, "total_steps": 78105, "loss": 0.316, "lr": 3.615510642477047e-06, "epoch": 2.0875744190512773, "percentage": 41.75, "elapsed_time": "1:25:59", "remaining_time": "1:59:57", "throughput": 19893.08, "total_tokens": 102629632}
|
|
{"current_steps": 32615, "total_steps": 78105, "loss": 0.205, "lr": 3.615010659242003e-06, "epoch": 2.087894500992254, "percentage": 41.76, "elapsed_time": "1:25:59", "remaining_time": "1:59:56", "throughput": 19893.59, "total_tokens": 102646528}
|
|
{"current_steps": 32620, "total_steps": 78105, "loss": 0.2306, "lr": 3.614510620329135e-06, "epoch": 2.088214582933231, "percentage": 41.76, "elapsed_time": "1:26:00", "remaining_time": "1:59:55", "throughput": 19894.01, "total_tokens": 102662144}
|
|
{"current_steps": 32625, "total_steps": 78105, "loss": 0.254, "lr": 3.6140105257634117e-06, "epoch": 2.088534664874208, "percentage": 41.77, "elapsed_time": "1:26:01", "remaining_time": "1:59:54", "throughput": 19894.45, "total_tokens": 102677760}
|
|
{"current_steps": 32630, "total_steps": 78105, "loss": 0.2597, "lr": 3.613510375569805e-06, "epoch": 2.088854746815185, "percentage": 41.78, "elapsed_time": "1:26:01", "remaining_time": "1:59:53", "throughput": 19895.02, "total_tokens": 102695232}
|
|
{"current_steps": 32635, "total_steps": 78105, "loss": 0.1964, "lr": 3.6130101697732906e-06, "epoch": 2.089174828756162, "percentage": 41.78, "elapsed_time": "1:26:02", "remaining_time": "1:59:52", "throughput": 19895.43, "total_tokens": 102710464}
|
|
{"current_steps": 32640, "total_steps": 78105, "loss": 0.262, "lr": 3.612509908398846e-06, "epoch": 2.0894949106971383, "percentage": 41.79, "elapsed_time": "1:26:03", "remaining_time": "1:59:51", "throughput": 19895.88, "total_tokens": 102726528}
|
|
{"current_steps": 32645, "total_steps": 78105, "loss": 0.228, "lr": 3.6120095914714514e-06, "epoch": 2.0898149926381153, "percentage": 41.8, "elapsed_time": "1:26:03", "remaining_time": "1:59:50", "throughput": 19896.33, "total_tokens": 102742208}
|
|
{"current_steps": 32650, "total_steps": 78105, "loss": 0.2535, "lr": 3.61150921901609e-06, "epoch": 2.0901350745790923, "percentage": 41.8, "elapsed_time": "1:26:04", "remaining_time": "1:59:50", "throughput": 19896.79, "total_tokens": 102758144}
|
|
{"current_steps": 32655, "total_steps": 78105, "loss": 0.337, "lr": 3.6110087910577474e-06, "epoch": 2.0904551565200693, "percentage": 41.81, "elapsed_time": "1:26:05", "remaining_time": "1:59:49", "throughput": 19897.19, "total_tokens": 102773440}
|
|
{"current_steps": 32660, "total_steps": 78105, "loss": 0.3579, "lr": 3.610508307621413e-06, "epoch": 2.090775238461046, "percentage": 41.82, "elapsed_time": "1:26:05", "remaining_time": "1:59:48", "throughput": 19897.68, "total_tokens": 102789824}
|
|
{"current_steps": 32665, "total_steps": 78105, "loss": 0.3105, "lr": 3.6100077687320783e-06, "epoch": 2.091095320402023, "percentage": 41.82, "elapsed_time": "1:26:06", "remaining_time": "1:59:47", "throughput": 19898.33, "total_tokens": 102808000}
|
|
{"current_steps": 32670, "total_steps": 78105, "loss": 0.2647, "lr": 3.6095071744147377e-06, "epoch": 2.091415402343, "percentage": 41.83, "elapsed_time": "1:26:07", "remaining_time": "1:59:46", "throughput": 19898.79, "total_tokens": 102824064}
|
|
{"current_steps": 32675, "total_steps": 78105, "loss": 0.2256, "lr": 3.6090065246943875e-06, "epoch": 2.091735484283977, "percentage": 41.83, "elapsed_time": "1:26:08", "remaining_time": "1:59:45", "throughput": 19899.35, "total_tokens": 102841408}
|
|
{"current_steps": 32680, "total_steps": 78105, "loss": 0.2509, "lr": 3.608505819596028e-06, "epoch": 2.0920555662249534, "percentage": 41.84, "elapsed_time": "1:26:08", "remaining_time": "1:59:44", "throughput": 19899.76, "total_tokens": 102856768}
|
|
{"current_steps": 32685, "total_steps": 78105, "loss": 0.2774, "lr": 3.608005059144662e-06, "epoch": 2.0923756481659304, "percentage": 41.85, "elapsed_time": "1:26:09", "remaining_time": "1:59:43", "throughput": 19900.22, "total_tokens": 102872640}
|
|
{"current_steps": 32690, "total_steps": 78105, "loss": 0.1289, "lr": 3.6075042433652953e-06, "epoch": 2.0926957301069073, "percentage": 41.85, "elapsed_time": "1:26:10", "remaining_time": "1:59:42", "throughput": 19900.68, "total_tokens": 102888640}
|
|
{"current_steps": 32695, "total_steps": 78105, "loss": 0.2728, "lr": 3.607003372282935e-06, "epoch": 2.0930158120478843, "percentage": 41.86, "elapsed_time": "1:26:10", "remaining_time": "1:59:41", "throughput": 19900.97, "total_tokens": 102902400}
|
|
{"current_steps": 32700, "total_steps": 78105, "loss": 0.2277, "lr": 3.606502445922593e-06, "epoch": 2.0933358939888613, "percentage": 41.87, "elapsed_time": "1:26:11", "remaining_time": "1:59:40", "throughput": 19901.3, "total_tokens": 102916992}
|
|
{"current_steps": 32705, "total_steps": 78105, "loss": 0.1895, "lr": 3.6060014643092815e-06, "epoch": 2.093655975929838, "percentage": 41.87, "elapsed_time": "1:26:12", "remaining_time": "1:59:39", "throughput": 19901.7, "total_tokens": 102932352}
|
|
{"current_steps": 32710, "total_steps": 78105, "loss": 0.1889, "lr": 3.6055004274680183e-06, "epoch": 2.093976057870815, "percentage": 41.88, "elapsed_time": "1:26:12", "remaining_time": "1:59:38", "throughput": 19902.09, "total_tokens": 102947392}
|
|
{"current_steps": 32715, "total_steps": 78105, "loss": 0.2022, "lr": 3.6049993354238223e-06, "epoch": 2.094296139811792, "percentage": 41.89, "elapsed_time": "1:26:13", "remaining_time": "1:59:37", "throughput": 19902.59, "total_tokens": 102963520}
|
|
{"current_steps": 32720, "total_steps": 78105, "loss": 0.2681, "lr": 3.604498188201715e-06, "epoch": 2.094616221752769, "percentage": 41.89, "elapsed_time": "1:26:14", "remaining_time": "1:59:36", "throughput": 19902.96, "total_tokens": 102978432}
|
|
{"current_steps": 32725, "total_steps": 78105, "loss": 0.2638, "lr": 3.6039969858267208e-06, "epoch": 2.0949363036937454, "percentage": 41.9, "elapsed_time": "1:26:14", "remaining_time": "1:59:35", "throughput": 19903.38, "total_tokens": 102993856}
|
|
{"current_steps": 32730, "total_steps": 78105, "loss": 0.174, "lr": 3.6034957283238686e-06, "epoch": 2.0952563856347224, "percentage": 41.91, "elapsed_time": "1:26:15", "remaining_time": "1:59:34", "throughput": 19903.77, "total_tokens": 103008768}
|
|
{"current_steps": 32735, "total_steps": 78105, "loss": 0.2459, "lr": 3.6029944157181868e-06, "epoch": 2.0955764675756994, "percentage": 41.91, "elapsed_time": "1:26:16", "remaining_time": "1:59:33", "throughput": 19904.27, "total_tokens": 103025408}
|
|
{"current_steps": 32740, "total_steps": 78105, "loss": 0.1956, "lr": 3.6024930480347097e-06, "epoch": 2.0958965495166764, "percentage": 41.92, "elapsed_time": "1:26:16", "remaining_time": "1:59:32", "throughput": 19904.68, "total_tokens": 103040640}
|
|
{"current_steps": 32745, "total_steps": 78105, "loss": 0.1956, "lr": 3.6019916252984715e-06, "epoch": 2.0962166314576534, "percentage": 41.92, "elapsed_time": "1:26:17", "remaining_time": "1:59:31", "throughput": 19905.12, "total_tokens": 103056192}
|
|
{"current_steps": 32750, "total_steps": 78105, "loss": 0.1755, "lr": 3.6014901475345126e-06, "epoch": 2.09653671339863, "percentage": 41.93, "elapsed_time": "1:26:18", "remaining_time": "1:59:30", "throughput": 19905.51, "total_tokens": 103071552}
|
|
{"current_steps": 32755, "total_steps": 78105, "loss": 0.1854, "lr": 3.600988614767873e-06, "epoch": 2.096856795339607, "percentage": 41.94, "elapsed_time": "1:26:18", "remaining_time": "1:59:30", "throughput": 19905.98, "total_tokens": 103087936}
|
|
{"current_steps": 32760, "total_steps": 78105, "loss": 0.2523, "lr": 3.6004870270235968e-06, "epoch": 2.097176877280584, "percentage": 41.94, "elapsed_time": "1:26:19", "remaining_time": "1:59:29", "throughput": 19906.43, "total_tokens": 103104384}
|
|
{"current_steps": 32765, "total_steps": 78105, "loss": 0.1531, "lr": 3.599985384326731e-06, "epoch": 2.097496959221561, "percentage": 41.95, "elapsed_time": "1:26:20", "remaining_time": "1:59:28", "throughput": 19906.87, "total_tokens": 103119936}
|
|
{"current_steps": 32770, "total_steps": 78105, "loss": 0.1785, "lr": 3.599483686702324e-06, "epoch": 2.0978170411625374, "percentage": 41.96, "elapsed_time": "1:26:20", "remaining_time": "1:59:27", "throughput": 19907.31, "total_tokens": 103135744}
|
|
{"current_steps": 32775, "total_steps": 78105, "loss": 0.2357, "lr": 3.5989819341754295e-06, "epoch": 2.0981371231035144, "percentage": 41.96, "elapsed_time": "1:26:21", "remaining_time": "1:59:26", "throughput": 19907.84, "total_tokens": 103152704}
|
|
{"current_steps": 32780, "total_steps": 78105, "loss": 0.2085, "lr": 3.598480126771101e-06, "epoch": 2.0984572050444914, "percentage": 41.97, "elapsed_time": "1:26:22", "remaining_time": "1:59:25", "throughput": 19908.19, "total_tokens": 103167360}
|
|
{"current_steps": 32785, "total_steps": 78105, "loss": 0.2125, "lr": 3.597978264514397e-06, "epoch": 2.0987772869854684, "percentage": 41.98, "elapsed_time": "1:26:22", "remaining_time": "1:59:24", "throughput": 19908.57, "total_tokens": 103182336}
|
|
{"current_steps": 32790, "total_steps": 78105, "loss": 0.2493, "lr": 3.5974763474303776e-06, "epoch": 2.0990973689264454, "percentage": 41.98, "elapsed_time": "1:26:23", "remaining_time": "1:59:23", "throughput": 19909.01, "total_tokens": 103198336}
|
|
{"current_steps": 32795, "total_steps": 78105, "loss": 0.1539, "lr": 3.5969743755441055e-06, "epoch": 2.099417450867422, "percentage": 41.99, "elapsed_time": "1:26:24", "remaining_time": "1:59:22", "throughput": 19909.49, "total_tokens": 103214272}
|
|
{"current_steps": 32800, "total_steps": 78105, "loss": 0.1957, "lr": 3.5964723488806473e-06, "epoch": 2.099737532808399, "percentage": 41.99, "elapsed_time": "1:26:24", "remaining_time": "1:59:21", "throughput": 19910.21, "total_tokens": 103233792}
|
|
{"current_steps": 32805, "total_steps": 78105, "loss": 0.2853, "lr": 3.5959702674650717e-06, "epoch": 2.100057614749376, "percentage": 42.0, "elapsed_time": "1:26:25", "remaining_time": "1:59:20", "throughput": 19910.72, "total_tokens": 103250368}
|
|
{"current_steps": 32810, "total_steps": 78105, "loss": 0.2134, "lr": 3.595468131322449e-06, "epoch": 2.100377696690353, "percentage": 42.01, "elapsed_time": "1:26:26", "remaining_time": "1:59:19", "throughput": 19911.23, "total_tokens": 103266560}
|
|
{"current_steps": 32815, "total_steps": 78105, "loss": 0.1418, "lr": 3.594965940477855e-06, "epoch": 2.1006977786313294, "percentage": 42.01, "elapsed_time": "1:26:27", "remaining_time": "1:59:18", "throughput": 19911.79, "total_tokens": 103283456}
|
|
{"current_steps": 32820, "total_steps": 78105, "loss": 0.2374, "lr": 3.5944636949563644e-06, "epoch": 2.1010178605723064, "percentage": 42.02, "elapsed_time": "1:26:27", "remaining_time": "1:59:18", "throughput": 19912.36, "total_tokens": 103300864}
|
|
{"current_steps": 32825, "total_steps": 78105, "loss": 0.1968, "lr": 3.5939613947830576e-06, "epoch": 2.1013379425132834, "percentage": 42.03, "elapsed_time": "1:26:28", "remaining_time": "1:59:17", "throughput": 19912.73, "total_tokens": 103315648}
|
|
{"current_steps": 32830, "total_steps": 78105, "loss": 0.2184, "lr": 3.593459039983017e-06, "epoch": 2.1016580244542604, "percentage": 42.03, "elapsed_time": "1:26:29", "remaining_time": "1:59:16", "throughput": 19913.12, "total_tokens": 103331136}
|
|
{"current_steps": 32835, "total_steps": 78105, "loss": 0.2968, "lr": 3.5929566305813286e-06, "epoch": 2.101978106395237, "percentage": 42.04, "elapsed_time": "1:26:29", "remaining_time": "1:59:15", "throughput": 19913.5, "total_tokens": 103346304}
|
|
{"current_steps": 32840, "total_steps": 78105, "loss": 0.1904, "lr": 3.592454166603079e-06, "epoch": 2.102298188336214, "percentage": 42.05, "elapsed_time": "1:26:30", "remaining_time": "1:59:14", "throughput": 19913.88, "total_tokens": 103361600}
|
|
{"current_steps": 32845, "total_steps": 78105, "loss": 0.1331, "lr": 3.5919516480733573e-06, "epoch": 2.102618270277191, "percentage": 42.05, "elapsed_time": "1:26:31", "remaining_time": "1:59:13", "throughput": 19914.26, "total_tokens": 103376768}
|
|
{"current_steps": 32850, "total_steps": 78105, "loss": 0.2789, "lr": 3.5914490750172594e-06, "epoch": 2.102938352218168, "percentage": 42.06, "elapsed_time": "1:26:31", "remaining_time": "1:59:12", "throughput": 19914.7, "total_tokens": 103392384}
|
|
{"current_steps": 32855, "total_steps": 78105, "loss": 0.2005, "lr": 3.5909464474598786e-06, "epoch": 2.103258434159145, "percentage": 42.07, "elapsed_time": "1:26:32", "remaining_time": "1:59:11", "throughput": 19915.13, "total_tokens": 103408320}
|
|
{"current_steps": 32860, "total_steps": 78105, "loss": 0.3303, "lr": 3.5904437654263157e-06, "epoch": 2.1035785161001215, "percentage": 42.07, "elapsed_time": "1:26:33", "remaining_time": "1:59:10", "throughput": 19915.52, "total_tokens": 103423360}
|
|
{"current_steps": 32865, "total_steps": 78105, "loss": 0.2978, "lr": 3.589941028941671e-06, "epoch": 2.1038985980410985, "percentage": 42.08, "elapsed_time": "1:26:33", "remaining_time": "1:59:09", "throughput": 19915.96, "total_tokens": 103439424}
|
|
{"current_steps": 32870, "total_steps": 78105, "loss": 0.1799, "lr": 3.5894382380310483e-06, "epoch": 2.1042186799820755, "percentage": 42.08, "elapsed_time": "1:26:34", "remaining_time": "1:59:08", "throughput": 19916.35, "total_tokens": 103454848}
|
|
{"current_steps": 32875, "total_steps": 78105, "loss": 0.2245, "lr": 3.5889353927195546e-06, "epoch": 2.1045387619230524, "percentage": 42.09, "elapsed_time": "1:26:35", "remaining_time": "1:59:07", "throughput": 19916.81, "total_tokens": 103470656}
|
|
{"current_steps": 32880, "total_steps": 78105, "loss": 0.2617, "lr": 3.5884324930323e-06, "epoch": 2.104858843864029, "percentage": 42.1, "elapsed_time": "1:26:35", "remaining_time": "1:59:06", "throughput": 19917.25, "total_tokens": 103486464}
|
|
{"current_steps": 32885, "total_steps": 78105, "loss": 0.2073, "lr": 3.5879295389943957e-06, "epoch": 2.105178925805006, "percentage": 42.1, "elapsed_time": "1:26:36", "remaining_time": "1:59:05", "throughput": 19917.64, "total_tokens": 103501312}
|
|
{"current_steps": 32890, "total_steps": 78105, "loss": 0.2908, "lr": 3.5874265306309574e-06, "epoch": 2.105499007745983, "percentage": 42.11, "elapsed_time": "1:26:37", "remaining_time": "1:59:04", "throughput": 19918.1, "total_tokens": 103517376}
|
|
{"current_steps": 32895, "total_steps": 78105, "loss": 0.1572, "lr": 3.5869234679671024e-06, "epoch": 2.10581908968696, "percentage": 42.12, "elapsed_time": "1:26:37", "remaining_time": "1:59:03", "throughput": 19918.55, "total_tokens": 103532928}
|
|
{"current_steps": 32900, "total_steps": 78105, "loss": 0.2208, "lr": 3.5864203510279503e-06, "epoch": 2.106139171627937, "percentage": 42.12, "elapsed_time": "1:26:38", "remaining_time": "1:59:02", "throughput": 19918.94, "total_tokens": 103548096}
|
|
{"current_steps": 32905, "total_steps": 78105, "loss": 0.1649, "lr": 3.5859171798386257e-06, "epoch": 2.1064592535689135, "percentage": 42.13, "elapsed_time": "1:26:39", "remaining_time": "1:59:01", "throughput": 19919.48, "total_tokens": 103564736}
|
|
{"current_steps": 32910, "total_steps": 78105, "loss": 0.1681, "lr": 3.585413954424252e-06, "epoch": 2.1067793355098905, "percentage": 42.14, "elapsed_time": "1:26:39", "remaining_time": "1:59:00", "throughput": 19919.92, "total_tokens": 103580800}
|
|
{"current_steps": 32915, "total_steps": 78105, "loss": 0.2048, "lr": 3.5849106748099606e-06, "epoch": 2.1070994174508675, "percentage": 42.14, "elapsed_time": "1:26:40", "remaining_time": "1:58:59", "throughput": 19920.33, "total_tokens": 103596480}
|
|
{"current_steps": 32920, "total_steps": 78105, "loss": 0.1806, "lr": 3.58440734102088e-06, "epoch": 2.1074194993918445, "percentage": 42.15, "elapsed_time": "1:26:41", "remaining_time": "1:58:59", "throughput": 19920.77, "total_tokens": 103612416}
|
|
{"current_steps": 32925, "total_steps": 78105, "loss": 0.1449, "lr": 3.583903953082146e-06, "epoch": 2.107739581332821, "percentage": 42.15, "elapsed_time": "1:26:41", "remaining_time": "1:58:58", "throughput": 19921.13, "total_tokens": 103627648}
|
|
{"current_steps": 32930, "total_steps": 78105, "loss": 0.2911, "lr": 3.5834005110188953e-06, "epoch": 2.108059663273798, "percentage": 42.16, "elapsed_time": "1:26:42", "remaining_time": "1:58:57", "throughput": 19921.67, "total_tokens": 103644736}
|
|
{"current_steps": 32935, "total_steps": 78105, "loss": 0.3524, "lr": 3.582897014856265e-06, "epoch": 2.108379745214775, "percentage": 42.17, "elapsed_time": "1:26:43", "remaining_time": "1:58:56", "throughput": 19922.21, "total_tokens": 103661760}
|
|
{"current_steps": 32940, "total_steps": 78105, "loss": 0.3149, "lr": 3.5823934646193986e-06, "epoch": 2.108699827155752, "percentage": 42.17, "elapsed_time": "1:26:43", "remaining_time": "1:58:55", "throughput": 19922.62, "total_tokens": 103677184}
|
|
{"current_steps": 32945, "total_steps": 78105, "loss": 0.1633, "lr": 3.5818898603334413e-06, "epoch": 2.1090199090967285, "percentage": 42.18, "elapsed_time": "1:26:44", "remaining_time": "1:58:54", "throughput": 19923.1, "total_tokens": 103693632}
|
|
{"current_steps": 32950, "total_steps": 78105, "loss": 0.12, "lr": 3.581386202023539e-06, "epoch": 2.1093399910377055, "percentage": 42.19, "elapsed_time": "1:26:45", "remaining_time": "1:58:53", "throughput": 19923.47, "total_tokens": 103708800}
|
|
{"current_steps": 32955, "total_steps": 78105, "loss": 0.1988, "lr": 3.580882489714843e-06, "epoch": 2.1096600729786825, "percentage": 42.19, "elapsed_time": "1:26:46", "remaining_time": "1:58:52", "throughput": 19924.1, "total_tokens": 103726656}
|
|
{"current_steps": 32960, "total_steps": 78105, "loss": 0.2024, "lr": 3.5803787234325057e-06, "epoch": 2.1099801549196595, "percentage": 42.2, "elapsed_time": "1:26:46", "remaining_time": "1:58:51", "throughput": 19924.56, "total_tokens": 103742848}
|
|
{"current_steps": 32965, "total_steps": 78105, "loss": 0.2063, "lr": 3.579874903201681e-06, "epoch": 2.1103002368606365, "percentage": 42.21, "elapsed_time": "1:26:47", "remaining_time": "1:58:50", "throughput": 19924.96, "total_tokens": 103758464}
|
|
{"current_steps": 32970, "total_steps": 78105, "loss": 0.2494, "lr": 3.57937102904753e-06, "epoch": 2.110620318801613, "percentage": 42.21, "elapsed_time": "1:26:48", "remaining_time": "1:58:49", "throughput": 19925.56, "total_tokens": 103776128}
|
|
{"current_steps": 32975, "total_steps": 78105, "loss": 0.26, "lr": 3.578867100995211e-06, "epoch": 2.11094040074259, "percentage": 42.22, "elapsed_time": "1:26:48", "remaining_time": "1:58:48", "throughput": 19925.96, "total_tokens": 103791168}
|
|
{"current_steps": 32980, "total_steps": 78105, "loss": 0.1586, "lr": 3.578363119069889e-06, "epoch": 2.111260482683567, "percentage": 42.23, "elapsed_time": "1:26:49", "remaining_time": "1:58:47", "throughput": 19926.45, "total_tokens": 103807808}
|
|
{"current_steps": 32985, "total_steps": 78105, "loss": 0.2688, "lr": 3.5778590832967305e-06, "epoch": 2.111580564624544, "percentage": 42.23, "elapsed_time": "1:26:50", "remaining_time": "1:58:47", "throughput": 19926.84, "total_tokens": 103823040}
|
|
{"current_steps": 32990, "total_steps": 78105, "loss": 0.2284, "lr": 3.577354993700903e-06, "epoch": 2.1119006465655206, "percentage": 42.24, "elapsed_time": "1:26:50", "remaining_time": "1:58:46", "throughput": 19927.17, "total_tokens": 103837632}
|
|
{"current_steps": 32995, "total_steps": 78105, "loss": 0.2555, "lr": 3.5768508503075795e-06, "epoch": 2.1122207285064976, "percentage": 42.24, "elapsed_time": "1:26:51", "remaining_time": "1:58:45", "throughput": 19927.59, "total_tokens": 103853440}
|
|
{"current_steps": 33000, "total_steps": 78105, "loss": 0.2679, "lr": 3.5763466531419328e-06, "epoch": 2.1125408104474745, "percentage": 42.25, "elapsed_time": "1:26:52", "remaining_time": "1:58:44", "throughput": 19928.01, "total_tokens": 103869120}
|
|
{"current_steps": 33005, "total_steps": 78105, "loss": 0.315, "lr": 3.5758424022291415e-06, "epoch": 2.1128608923884515, "percentage": 42.26, "elapsed_time": "1:26:52", "remaining_time": "1:58:43", "throughput": 19928.41, "total_tokens": 103884608}
|
|
{"current_steps": 33010, "total_steps": 78105, "loss": 0.1961, "lr": 3.575338097594384e-06, "epoch": 2.1131809743294285, "percentage": 42.26, "elapsed_time": "1:26:53", "remaining_time": "1:58:42", "throughput": 19928.92, "total_tokens": 103901056}
|
|
{"current_steps": 33015, "total_steps": 78105, "loss": 0.2865, "lr": 3.5748337392628434e-06, "epoch": 2.113501056270405, "percentage": 42.27, "elapsed_time": "1:26:54", "remaining_time": "1:58:41", "throughput": 19929.31, "total_tokens": 103916160}
|
|
{"current_steps": 33020, "total_steps": 78105, "loss": 0.1648, "lr": 3.5743293272597045e-06, "epoch": 2.113821138211382, "percentage": 42.28, "elapsed_time": "1:26:54", "remaining_time": "1:58:40", "throughput": 19929.71, "total_tokens": 103931328}
|
|
{"current_steps": 33025, "total_steps": 78105, "loss": 0.1693, "lr": 3.5738248616101545e-06, "epoch": 2.114141220152359, "percentage": 42.28, "elapsed_time": "1:26:55", "remaining_time": "1:58:39", "throughput": 19930.15, "total_tokens": 103947392}
|
|
{"current_steps": 33030, "total_steps": 78105, "loss": 0.1568, "lr": 3.573320342339384e-06, "epoch": 2.114461302093336, "percentage": 42.29, "elapsed_time": "1:26:56", "remaining_time": "1:58:38", "throughput": 19930.53, "total_tokens": 103962496}
|
|
{"current_steps": 33035, "total_steps": 78105, "loss": 0.1916, "lr": 3.5728157694725875e-06, "epoch": 2.1147813840343126, "percentage": 42.3, "elapsed_time": "1:26:56", "remaining_time": "1:58:37", "throughput": 19931.0, "total_tokens": 103978688}
|
|
{"current_steps": 33040, "total_steps": 78105, "loss": 0.194, "lr": 3.5723111430349587e-06, "epoch": 2.1151014659752896, "percentage": 42.3, "elapsed_time": "1:26:57", "remaining_time": "1:58:36", "throughput": 19931.49, "total_tokens": 103995392}
|
|
{"current_steps": 33045, "total_steps": 78105, "loss": 0.3004, "lr": 3.5718064630516975e-06, "epoch": 2.1154215479162666, "percentage": 42.31, "elapsed_time": "1:26:58", "remaining_time": "1:58:35", "throughput": 19931.96, "total_tokens": 104011648}
|
|
{"current_steps": 33050, "total_steps": 78105, "loss": 0.2046, "lr": 3.571301729548004e-06, "epoch": 2.1157416298572436, "percentage": 42.31, "elapsed_time": "1:26:59", "remaining_time": "1:58:34", "throughput": 19932.41, "total_tokens": 104027392}
|
|
{"current_steps": 33055, "total_steps": 78105, "loss": 0.1911, "lr": 3.570796942549082e-06, "epoch": 2.1160617117982206, "percentage": 42.32, "elapsed_time": "1:26:59", "remaining_time": "1:58:33", "throughput": 19932.84, "total_tokens": 104043008}
|
|
{"current_steps": 33060, "total_steps": 78105, "loss": 0.2238, "lr": 3.5702921020801385e-06, "epoch": 2.116381793739197, "percentage": 42.33, "elapsed_time": "1:27:00", "remaining_time": "1:58:32", "throughput": 19933.36, "total_tokens": 104059968}
|
|
{"current_steps": 33065, "total_steps": 78105, "loss": 0.2035, "lr": 3.5697872081663826e-06, "epoch": 2.116701875680174, "percentage": 42.33, "elapsed_time": "1:27:01", "remaining_time": "1:58:31", "throughput": 19933.76, "total_tokens": 104075456}
|
|
{"current_steps": 33070, "total_steps": 78105, "loss": 0.2817, "lr": 3.569282260833026e-06, "epoch": 2.117021957621151, "percentage": 42.34, "elapsed_time": "1:27:01", "remaining_time": "1:58:31", "throughput": 19934.2, "total_tokens": 104091264}
|
|
{"current_steps": 33075, "total_steps": 78105, "loss": 0.15, "lr": 3.568777260105283e-06, "epoch": 2.117342039562128, "percentage": 42.35, "elapsed_time": "1:27:02", "remaining_time": "1:58:30", "throughput": 19934.63, "total_tokens": 104107008}
|
|
{"current_steps": 33080, "total_steps": 78105, "loss": 0.2618, "lr": 3.5682722060083695e-06, "epoch": 2.1176621215031046, "percentage": 42.35, "elapsed_time": "1:27:03", "remaining_time": "1:58:29", "throughput": 19935.01, "total_tokens": 104122304}
|
|
{"current_steps": 33085, "total_steps": 78105, "loss": 0.1627, "lr": 3.5677670985675072e-06, "epoch": 2.1179822034440816, "percentage": 42.36, "elapsed_time": "1:27:03", "remaining_time": "1:58:28", "throughput": 19935.42, "total_tokens": 104137600}
|
|
{"current_steps": 33090, "total_steps": 78105, "loss": 0.3528, "lr": 3.5672619378079183e-06, "epoch": 2.1183022853850586, "percentage": 42.37, "elapsed_time": "1:27:04", "remaining_time": "1:58:27", "throughput": 19935.87, "total_tokens": 104153344}
|
|
{"current_steps": 33095, "total_steps": 78105, "loss": 0.2339, "lr": 3.566756723754827e-06, "epoch": 2.1186223673260356, "percentage": 42.37, "elapsed_time": "1:27:05", "remaining_time": "1:58:26", "throughput": 19936.17, "total_tokens": 104167680}
|
|
{"current_steps": 33100, "total_steps": 78105, "loss": 0.23, "lr": 3.5662514564334616e-06, "epoch": 2.118942449267012, "percentage": 42.38, "elapsed_time": "1:27:05", "remaining_time": "1:58:25", "throughput": 19936.52, "total_tokens": 104182464}
|
|
{"current_steps": 33105, "total_steps": 78105, "loss": 0.2296, "lr": 3.5657461358690526e-06, "epoch": 2.119262531207989, "percentage": 42.39, "elapsed_time": "1:27:06", "remaining_time": "1:58:24", "throughput": 19936.93, "total_tokens": 104197760}
|
|
{"current_steps": 33110, "total_steps": 78105, "loss": 0.2549, "lr": 3.5652407620868323e-06, "epoch": 2.119582613148966, "percentage": 42.39, "elapsed_time": "1:27:07", "remaining_time": "1:58:23", "throughput": 19937.48, "total_tokens": 104215168}
|
|
{"current_steps": 33115, "total_steps": 78105, "loss": 0.187, "lr": 3.5647353351120374e-06, "epoch": 2.119902695089943, "percentage": 42.4, "elapsed_time": "1:27:07", "remaining_time": "1:58:22", "throughput": 19937.91, "total_tokens": 104230720}
|
|
{"current_steps": 33120, "total_steps": 78105, "loss": 0.1922, "lr": 3.5642298549699054e-06, "epoch": 2.12022277703092, "percentage": 42.4, "elapsed_time": "1:27:08", "remaining_time": "1:58:21", "throughput": 19938.32, "total_tokens": 104246144}
|
|
{"current_steps": 33125, "total_steps": 78105, "loss": 0.2326, "lr": 3.563724321685678e-06, "epoch": 2.1205428589718966, "percentage": 42.41, "elapsed_time": "1:27:09", "remaining_time": "1:58:20", "throughput": 19938.75, "total_tokens": 104261568}
|
|
{"current_steps": 33130, "total_steps": 78105, "loss": 0.1748, "lr": 3.563218735284599e-06, "epoch": 2.1208629409128736, "percentage": 42.42, "elapsed_time": "1:27:09", "remaining_time": "1:58:19", "throughput": 19939.28, "total_tokens": 104278528}
|
|
{"current_steps": 33135, "total_steps": 78105, "loss": 0.1412, "lr": 3.5627130957919144e-06, "epoch": 2.1211830228538506, "percentage": 42.42, "elapsed_time": "1:27:10", "remaining_time": "1:58:18", "throughput": 19939.68, "total_tokens": 104293760}
|
|
{"current_steps": 33140, "total_steps": 78105, "loss": 0.2032, "lr": 3.562207403232873e-06, "epoch": 2.1215031047948276, "percentage": 42.43, "elapsed_time": "1:27:11", "remaining_time": "1:58:17", "throughput": 19940.06, "total_tokens": 104308928}
|
|
{"current_steps": 33145, "total_steps": 78105, "loss": 0.223, "lr": 3.561701657632726e-06, "epoch": 2.121823186735804, "percentage": 42.44, "elapsed_time": "1:27:12", "remaining_time": "1:58:17", "throughput": 19939.55, "total_tokens": 104324032}
|
|
{"current_steps": 33150, "total_steps": 78105, "loss": 0.3532, "lr": 3.5611958590167295e-06, "epoch": 2.122143268676781, "percentage": 42.44, "elapsed_time": "1:27:12", "remaining_time": "1:58:16", "throughput": 19939.88, "total_tokens": 104338816}
|
|
{"current_steps": 33155, "total_steps": 78105, "loss": 0.334, "lr": 3.560690007410139e-06, "epoch": 2.122463350617758, "percentage": 42.45, "elapsed_time": "1:27:13", "remaining_time": "1:58:15", "throughput": 19940.27, "total_tokens": 104353984}
|
|
{"current_steps": 33160, "total_steps": 78105, "loss": 0.2286, "lr": 3.560184102838215e-06, "epoch": 2.122783432558735, "percentage": 42.46, "elapsed_time": "1:27:13", "remaining_time": "1:58:14", "throughput": 19940.67, "total_tokens": 104369408}
|
|
{"current_steps": 33165, "total_steps": 78105, "loss": 0.2311, "lr": 3.5596781453262193e-06, "epoch": 2.123103514499712, "percentage": 42.46, "elapsed_time": "1:27:14", "remaining_time": "1:58:13", "throughput": 19941.08, "total_tokens": 104384832}
|
|
{"current_steps": 33170, "total_steps": 78105, "loss": 0.2808, "lr": 3.5591721348994157e-06, "epoch": 2.1234235964406887, "percentage": 42.47, "elapsed_time": "1:27:15", "remaining_time": "1:58:12", "throughput": 19941.44, "total_tokens": 104400000}
|
|
{"current_steps": 33175, "total_steps": 78105, "loss": 0.273, "lr": 3.5586660715830727e-06, "epoch": 2.1237436783816657, "percentage": 42.47, "elapsed_time": "1:27:16", "remaining_time": "1:58:11", "throughput": 19941.95, "total_tokens": 104416704}
|
|
{"current_steps": 33180, "total_steps": 78105, "loss": 0.1704, "lr": 3.5581599554024615e-06, "epoch": 2.1240637603226427, "percentage": 42.48, "elapsed_time": "1:27:16", "remaining_time": "1:58:10", "throughput": 19942.4, "total_tokens": 104432640}
|
|
{"current_steps": 33185, "total_steps": 78105, "loss": 0.2523, "lr": 3.557653786382853e-06, "epoch": 2.1243838422636196, "percentage": 42.49, "elapsed_time": "1:27:17", "remaining_time": "1:58:09", "throughput": 19943.04, "total_tokens": 104450880}
|
|
{"current_steps": 33190, "total_steps": 78105, "loss": 0.2722, "lr": 3.5571475645495244e-06, "epoch": 2.124703924204596, "percentage": 42.49, "elapsed_time": "1:27:18", "remaining_time": "1:58:08", "throughput": 19943.44, "total_tokens": 104466240}
|
|
{"current_steps": 33195, "total_steps": 78105, "loss": 0.2074, "lr": 3.556641289927752e-06, "epoch": 2.125024006145573, "percentage": 42.5, "elapsed_time": "1:27:18", "remaining_time": "1:58:07", "throughput": 19943.78, "total_tokens": 104481024}
|
|
{"current_steps": 33200, "total_steps": 78105, "loss": 0.1555, "lr": 3.556134962542818e-06, "epoch": 2.12534408808655, "percentage": 42.51, "elapsed_time": "1:27:19", "remaining_time": "1:58:06", "throughput": 19944.4, "total_tokens": 104498880}
|
|
{"current_steps": 33205, "total_steps": 78105, "loss": 0.2333, "lr": 3.5556285824200043e-06, "epoch": 2.125664170027527, "percentage": 42.51, "elapsed_time": "1:27:20", "remaining_time": "1:58:05", "throughput": 19944.85, "total_tokens": 104515008}
|
|
{"current_steps": 33210, "total_steps": 78105, "loss": 0.2938, "lr": 3.5551221495845987e-06, "epoch": 2.1259842519685037, "percentage": 42.52, "elapsed_time": "1:27:20", "remaining_time": "1:58:04", "throughput": 19945.26, "total_tokens": 104530368}
|
|
{"current_steps": 33215, "total_steps": 78105, "loss": 0.2368, "lr": 3.5546156640618888e-06, "epoch": 2.1263043339094807, "percentage": 42.53, "elapsed_time": "1:27:21", "remaining_time": "1:58:03", "throughput": 19945.76, "total_tokens": 104546944}
|
|
{"current_steps": 33220, "total_steps": 78105, "loss": 0.2845, "lr": 3.5541091258771652e-06, "epoch": 2.1266244158504577, "percentage": 42.53, "elapsed_time": "1:27:22", "remaining_time": "1:58:02", "throughput": 19946.09, "total_tokens": 104561344}
|
|
{"current_steps": 33225, "total_steps": 78105, "loss": 0.273, "lr": 3.5536025350557235e-06, "epoch": 2.1269444977914347, "percentage": 42.54, "elapsed_time": "1:27:22", "remaining_time": "1:58:01", "throughput": 19946.49, "total_tokens": 104576512}
|
|
{"current_steps": 33230, "total_steps": 78105, "loss": 0.2434, "lr": 3.5530958916228584e-06, "epoch": 2.1272645797324117, "percentage": 42.55, "elapsed_time": "1:27:23", "remaining_time": "1:58:01", "throughput": 19946.84, "total_tokens": 104591744}
|
|
{"current_steps": 33235, "total_steps": 78105, "loss": 0.1655, "lr": 3.5525891956038705e-06, "epoch": 2.1275846616733882, "percentage": 42.55, "elapsed_time": "1:27:24", "remaining_time": "1:58:00", "throughput": 19947.33, "total_tokens": 104608256}
|
|
{"current_steps": 33240, "total_steps": 78105, "loss": 0.1644, "lr": 3.5520824470240606e-06, "epoch": 2.127904743614365, "percentage": 42.56, "elapsed_time": "1:27:24", "remaining_time": "1:57:59", "throughput": 19947.89, "total_tokens": 104625536}
|
|
{"current_steps": 33245, "total_steps": 78105, "loss": 0.147, "lr": 3.5515756459087334e-06, "epoch": 2.128224825555342, "percentage": 42.56, "elapsed_time": "1:27:25", "remaining_time": "1:57:58", "throughput": 19948.19, "total_tokens": 104639680}
|
|
{"current_steps": 33250, "total_steps": 78105, "loss": 0.2699, "lr": 3.5510687922831966e-06, "epoch": 2.128544907496319, "percentage": 42.57, "elapsed_time": "1:27:26", "remaining_time": "1:57:57", "throughput": 19948.66, "total_tokens": 104656000}
|
|
{"current_steps": 33255, "total_steps": 78105, "loss": 0.1568, "lr": 3.5505618861727574e-06, "epoch": 2.1288649894372957, "percentage": 42.58, "elapsed_time": "1:27:26", "remaining_time": "1:57:56", "throughput": 19949.08, "total_tokens": 104671744}
|
|
{"current_steps": 33260, "total_steps": 78105, "loss": 0.1426, "lr": 3.5500549276027306e-06, "epoch": 2.1291850713782727, "percentage": 42.58, "elapsed_time": "1:27:27", "remaining_time": "1:57:55", "throughput": 19949.43, "total_tokens": 104686144}
|
|
{"current_steps": 33265, "total_steps": 78105, "loss": 0.1269, "lr": 3.5495479165984307e-06, "epoch": 2.1295051533192497, "percentage": 42.59, "elapsed_time": "1:27:28", "remaining_time": "1:57:54", "throughput": 19949.88, "total_tokens": 104702208}
|
|
{"current_steps": 33270, "total_steps": 78105, "loss": 0.2102, "lr": 3.549040853185175e-06, "epoch": 2.1298252352602267, "percentage": 42.6, "elapsed_time": "1:27:28", "remaining_time": "1:57:53", "throughput": 19950.37, "total_tokens": 104718784}
|
|
{"current_steps": 33275, "total_steps": 78105, "loss": 0.2664, "lr": 3.5485337373882827e-06, "epoch": 2.1301453172012037, "percentage": 42.6, "elapsed_time": "1:27:29", "remaining_time": "1:57:52", "throughput": 19950.84, "total_tokens": 104734848}
|
|
{"current_steps": 33280, "total_steps": 78105, "loss": 0.1545, "lr": 3.5480265692330775e-06, "epoch": 2.1304653991421802, "percentage": 42.61, "elapsed_time": "1:27:30", "remaining_time": "1:57:51", "throughput": 19951.19, "total_tokens": 104749696}
|
|
{"current_steps": 33285, "total_steps": 78105, "loss": 0.1183, "lr": 3.547519348744884e-06, "epoch": 2.1307854810831572, "percentage": 42.62, "elapsed_time": "1:27:30", "remaining_time": "1:57:50", "throughput": 19951.6, "total_tokens": 104765376}
|
|
{"current_steps": 33290, "total_steps": 78105, "loss": 0.1367, "lr": 3.5470120759490305e-06, "epoch": 2.1311055630241342, "percentage": 42.62, "elapsed_time": "1:27:31", "remaining_time": "1:57:49", "throughput": 19952.03, "total_tokens": 104781056}
|
|
{"current_steps": 33295, "total_steps": 78105, "loss": 0.1776, "lr": 3.5465047508708485e-06, "epoch": 2.131425644965111, "percentage": 42.63, "elapsed_time": "1:27:32", "remaining_time": "1:57:48", "throughput": 19952.46, "total_tokens": 104796864}
|
|
{"current_steps": 33300, "total_steps": 78105, "loss": 0.1945, "lr": 3.5459973735356697e-06, "epoch": 2.1317457269060878, "percentage": 42.63, "elapsed_time": "1:27:33", "remaining_time": "1:57:47", "throughput": 19952.99, "total_tokens": 104813888}
|
|
{"current_steps": 33305, "total_steps": 78105, "loss": 0.1501, "lr": 3.5454899439688306e-06, "epoch": 2.1320658088470648, "percentage": 42.64, "elapsed_time": "1:27:33", "remaining_time": "1:57:46", "throughput": 19953.43, "total_tokens": 104829504}
|
|
{"current_steps": 33310, "total_steps": 78105, "loss": 0.2541, "lr": 3.5449824621956685e-06, "epoch": 2.1323858907880417, "percentage": 42.65, "elapsed_time": "1:27:34", "remaining_time": "1:57:46", "throughput": 19953.84, "total_tokens": 104845312}
|
|
{"current_steps": 33315, "total_steps": 78105, "loss": 0.2544, "lr": 3.5444749282415266e-06, "epoch": 2.1327059727290187, "percentage": 42.65, "elapsed_time": "1:27:35", "remaining_time": "1:57:45", "throughput": 19954.34, "total_tokens": 104861888}
|
|
{"current_steps": 33320, "total_steps": 78105, "loss": 0.1929, "lr": 3.543967342131747e-06, "epoch": 2.1330260546699957, "percentage": 42.66, "elapsed_time": "1:27:35", "remaining_time": "1:57:44", "throughput": 19954.78, "total_tokens": 104877888}
|
|
{"current_steps": 33325, "total_steps": 78105, "loss": 0.1913, "lr": 3.543459703891675e-06, "epoch": 2.1333461366109723, "percentage": 42.67, "elapsed_time": "1:27:36", "remaining_time": "1:57:43", "throughput": 19955.24, "total_tokens": 104893888}
|
|
{"current_steps": 33330, "total_steps": 78105, "loss": 0.2014, "lr": 3.542952013546662e-06, "epoch": 2.1336662185519493, "percentage": 42.67, "elapsed_time": "1:27:37", "remaining_time": "1:57:42", "throughput": 19955.68, "total_tokens": 104909952}
|
|
{"current_steps": 33335, "total_steps": 78105, "loss": 0.2067, "lr": 3.5424442711220582e-06, "epoch": 2.1339863004929263, "percentage": 42.68, "elapsed_time": "1:27:37", "remaining_time": "1:57:41", "throughput": 19956.1, "total_tokens": 104925120}
|
|
{"current_steps": 33340, "total_steps": 78105, "loss": 0.2051, "lr": 3.541936476643216e-06, "epoch": 2.1343063824339032, "percentage": 42.69, "elapsed_time": "1:27:38", "remaining_time": "1:57:40", "throughput": 19956.53, "total_tokens": 104941056}
|
|
{"current_steps": 33345, "total_steps": 78105, "loss": 0.2205, "lr": 3.5414286301354943e-06, "epoch": 2.13462646437488, "percentage": 42.69, "elapsed_time": "1:27:39", "remaining_time": "1:57:39", "throughput": 19956.85, "total_tokens": 104955392}
|
|
{"current_steps": 33350, "total_steps": 78105, "loss": 0.3438, "lr": 3.540920731624251e-06, "epoch": 2.134946546315857, "percentage": 42.7, "elapsed_time": "1:27:39", "remaining_time": "1:57:38", "throughput": 19957.27, "total_tokens": 104971072}
|
|
{"current_steps": 33355, "total_steps": 78105, "loss": 0.2247, "lr": 3.5404127811348486e-06, "epoch": 2.1352666282568338, "percentage": 42.71, "elapsed_time": "1:27:40", "remaining_time": "1:57:37", "throughput": 19957.71, "total_tokens": 104987072}
|
|
{"current_steps": 33360, "total_steps": 78105, "loss": 0.3341, "lr": 3.5399047786926514e-06, "epoch": 2.1355867101978108, "percentage": 42.71, "elapsed_time": "1:27:41", "remaining_time": "1:57:36", "throughput": 19958.09, "total_tokens": 105002176}
|
|
{"current_steps": 33365, "total_steps": 78105, "loss": 0.239, "lr": 3.5393967243230265e-06, "epoch": 2.1359067921387878, "percentage": 42.72, "elapsed_time": "1:27:41", "remaining_time": "1:57:35", "throughput": 19958.53, "total_tokens": 105018048}
|
|
{"current_steps": 33370, "total_steps": 78105, "loss": 0.1765, "lr": 3.538888618051342e-06, "epoch": 2.1362268740797643, "percentage": 42.72, "elapsed_time": "1:27:42", "remaining_time": "1:57:34", "throughput": 19958.9, "total_tokens": 105032960}
|
|
{"current_steps": 33375, "total_steps": 78105, "loss": 0.2707, "lr": 3.5383804599029725e-06, "epoch": 2.1365469560207413, "percentage": 42.73, "elapsed_time": "1:27:43", "remaining_time": "1:57:33", "throughput": 19959.44, "total_tokens": 105050176}
|
|
{"current_steps": 33380, "total_steps": 78105, "loss": 0.154, "lr": 3.5378722499032904e-06, "epoch": 2.1368670379617183, "percentage": 42.74, "elapsed_time": "1:27:43", "remaining_time": "1:57:32", "throughput": 19959.83, "total_tokens": 105065280}
|
|
{"current_steps": 33385, "total_steps": 78105, "loss": 0.2655, "lr": 3.537363988077675e-06, "epoch": 2.1371871199026953, "percentage": 42.74, "elapsed_time": "1:27:44", "remaining_time": "1:57:31", "throughput": 19960.21, "total_tokens": 105080448}
|
|
{"current_steps": 33390, "total_steps": 78105, "loss": 0.2169, "lr": 3.5368556744515057e-06, "epoch": 2.137507201843672, "percentage": 42.75, "elapsed_time": "1:27:45", "remaining_time": "1:57:30", "throughput": 19960.65, "total_tokens": 105096256}
|
|
{"current_steps": 33395, "total_steps": 78105, "loss": 0.2677, "lr": 3.536347309050164e-06, "epoch": 2.137827283784649, "percentage": 42.76, "elapsed_time": "1:27:45", "remaining_time": "1:57:30", "throughput": 19961.11, "total_tokens": 105112704}
|
|
{"current_steps": 33400, "total_steps": 78105, "loss": 0.1674, "lr": 3.535838891899036e-06, "epoch": 2.138147365725626, "percentage": 42.76, "elapsed_time": "1:27:46", "remaining_time": "1:57:29", "throughput": 19961.58, "total_tokens": 105129024}
|
|
{"current_steps": 33405, "total_steps": 78105, "loss": 0.2576, "lr": 3.5353304230235096e-06, "epoch": 2.138467447666603, "percentage": 42.77, "elapsed_time": "1:27:47", "remaining_time": "1:57:28", "throughput": 19962.02, "total_tokens": 105144832}
|
|
{"current_steps": 33410, "total_steps": 78105, "loss": 0.1551, "lr": 3.534821902448975e-06, "epoch": 2.1387875296075793, "percentage": 42.78, "elapsed_time": "1:27:47", "remaining_time": "1:57:27", "throughput": 19962.4, "total_tokens": 105160256}
|
|
{"current_steps": 33415, "total_steps": 78105, "loss": 0.174, "lr": 3.534313330200825e-06, "epoch": 2.1391076115485563, "percentage": 42.78, "elapsed_time": "1:27:48", "remaining_time": "1:57:26", "throughput": 19962.83, "total_tokens": 105175424}
|
|
{"current_steps": 33420, "total_steps": 78105, "loss": 0.3091, "lr": 3.533804706304454e-06, "epoch": 2.1394276934895333, "percentage": 42.79, "elapsed_time": "1:27:49", "remaining_time": "1:57:25", "throughput": 19963.22, "total_tokens": 105190464}
|
|
{"current_steps": 33425, "total_steps": 78105, "loss": 0.2082, "lr": 3.5332960307852604e-06, "epoch": 2.1397477754305103, "percentage": 42.79, "elapsed_time": "1:27:49", "remaining_time": "1:57:24", "throughput": 19963.69, "total_tokens": 105206912}
|
|
{"current_steps": 33430, "total_steps": 78105, "loss": 0.0997, "lr": 3.5327873036686466e-06, "epoch": 2.1400678573714873, "percentage": 42.8, "elapsed_time": "1:27:50", "remaining_time": "1:57:23", "throughput": 19964.08, "total_tokens": 105222336}
|
|
{"current_steps": 33435, "total_steps": 78105, "loss": 0.14, "lr": 3.532278524980013e-06, "epoch": 2.140387939312464, "percentage": 42.81, "elapsed_time": "1:27:51", "remaining_time": "1:57:22", "throughput": 19964.4, "total_tokens": 105236544}
|
|
{"current_steps": 33440, "total_steps": 78105, "loss": 0.2127, "lr": 3.531769694744768e-06, "epoch": 2.140708021253441, "percentage": 42.81, "elapsed_time": "1:27:51", "remaining_time": "1:57:21", "throughput": 19964.82, "total_tokens": 105252096}
|
|
{"current_steps": 33445, "total_steps": 78105, "loss": 0.18, "lr": 3.5312608129883186e-06, "epoch": 2.141028103194418, "percentage": 42.82, "elapsed_time": "1:27:52", "remaining_time": "1:57:20", "throughput": 19965.27, "total_tokens": 105268224}
|
|
{"current_steps": 33450, "total_steps": 78105, "loss": 0.2038, "lr": 3.5307518797360756e-06, "epoch": 2.141348185135395, "percentage": 42.83, "elapsed_time": "1:27:53", "remaining_time": "1:57:19", "throughput": 19965.86, "total_tokens": 105285824}
|
|
{"current_steps": 33455, "total_steps": 78105, "loss": 0.2503, "lr": 3.5302428950134525e-06, "epoch": 2.1416682670763714, "percentage": 42.83, "elapsed_time": "1:27:53", "remaining_time": "1:57:18", "throughput": 19966.28, "total_tokens": 105301504}
|
|
{"current_steps": 33460, "total_steps": 78105, "loss": 0.2744, "lr": 3.529733858845866e-06, "epoch": 2.1419883490173484, "percentage": 42.84, "elapsed_time": "1:27:54", "remaining_time": "1:57:17", "throughput": 19966.75, "total_tokens": 105317952}
|
|
{"current_steps": 33465, "total_steps": 78105, "loss": 0.1624, "lr": 3.5292247712587336e-06, "epoch": 2.1423084309583253, "percentage": 42.85, "elapsed_time": "1:27:55", "remaining_time": "1:57:16", "throughput": 19967.17, "total_tokens": 105333440}
|
|
{"current_steps": 33470, "total_steps": 78105, "loss": 0.2341, "lr": 3.528715632277478e-06, "epoch": 2.1426285128993023, "percentage": 42.85, "elapsed_time": "1:27:56", "remaining_time": "1:57:16", "throughput": 19967.71, "total_tokens": 105350720}
|
|
{"current_steps": 33475, "total_steps": 78105, "loss": 0.2384, "lr": 3.5282064419275217e-06, "epoch": 2.142948594840279, "percentage": 42.86, "elapsed_time": "1:27:56", "remaining_time": "1:57:15", "throughput": 19968.09, "total_tokens": 105366144}
|
|
{"current_steps": 33480, "total_steps": 78105, "loss": 0.2245, "lr": 3.527697200234291e-06, "epoch": 2.143268676781256, "percentage": 42.87, "elapsed_time": "1:27:57", "remaining_time": "1:57:14", "throughput": 19968.54, "total_tokens": 105382656}
|
|
{"current_steps": 33485, "total_steps": 78105, "loss": 0.2075, "lr": 3.5271879072232158e-06, "epoch": 2.143588758722233, "percentage": 42.87, "elapsed_time": "1:27:58", "remaining_time": "1:57:13", "throughput": 19968.92, "total_tokens": 105398016}
|
|
{"current_steps": 33490, "total_steps": 78105, "loss": 0.1965, "lr": 3.526678562919727e-06, "epoch": 2.14390884066321, "percentage": 42.88, "elapsed_time": "1:27:58", "remaining_time": "1:57:12", "throughput": 19969.34, "total_tokens": 105413696}
|
|
{"current_steps": 33495, "total_steps": 78105, "loss": 0.2447, "lr": 3.526169167349258e-06, "epoch": 2.144228922604187, "percentage": 42.88, "elapsed_time": "1:27:59", "remaining_time": "1:57:11", "throughput": 19969.8, "total_tokens": 105429760}
|
|
{"current_steps": 33500, "total_steps": 78105, "loss": 0.1897, "lr": 3.5256597205372463e-06, "epoch": 2.1445490045451634, "percentage": 42.89, "elapsed_time": "1:28:00", "remaining_time": "1:57:10", "throughput": 19970.27, "total_tokens": 105446272}
|
|
{"current_steps": 33505, "total_steps": 78105, "loss": 0.1563, "lr": 3.5251502225091305e-06, "epoch": 2.1448690864861404, "percentage": 42.9, "elapsed_time": "1:28:00", "remaining_time": "1:57:09", "throughput": 19970.75, "total_tokens": 105462400}
|
|
{"current_steps": 33510, "total_steps": 78105, "loss": 0.1698, "lr": 3.5246406732903525e-06, "epoch": 2.1451891684271174, "percentage": 42.9, "elapsed_time": "1:28:01", "remaining_time": "1:57:08", "throughput": 19971.23, "total_tokens": 105478912}
|
|
{"current_steps": 33515, "total_steps": 78105, "loss": 0.1394, "lr": 3.524131072906356e-06, "epoch": 2.1455092503680944, "percentage": 42.91, "elapsed_time": "1:28:02", "remaining_time": "1:57:07", "throughput": 19971.75, "total_tokens": 105495936}
|
|
{"current_steps": 33520, "total_steps": 78105, "loss": 0.1046, "lr": 3.523621421382589e-06, "epoch": 2.145829332309071, "percentage": 42.92, "elapsed_time": "1:28:02", "remaining_time": "1:57:06", "throughput": 19972.14, "total_tokens": 105511296}
|
|
{"current_steps": 33525, "total_steps": 78105, "loss": 0.1298, "lr": 3.5231117187444998e-06, "epoch": 2.146149414250048, "percentage": 42.92, "elapsed_time": "1:28:03", "remaining_time": "1:57:05", "throughput": 19972.51, "total_tokens": 105526592}
|
|
{"current_steps": 33530, "total_steps": 78105, "loss": 0.2161, "lr": 3.52260196501754e-06, "epoch": 2.146469496191025, "percentage": 42.93, "elapsed_time": "1:28:04", "remaining_time": "1:57:04", "throughput": 19972.94, "total_tokens": 105542464}
|
|
{"current_steps": 33535, "total_steps": 78105, "loss": 0.2425, "lr": 3.522092160227165e-06, "epoch": 2.146789578132002, "percentage": 42.94, "elapsed_time": "1:28:04", "remaining_time": "1:57:03", "throughput": 19973.25, "total_tokens": 105557056}
|
|
{"current_steps": 33540, "total_steps": 78105, "loss": 0.2174, "lr": 3.521582304398832e-06, "epoch": 2.147109660072979, "percentage": 42.94, "elapsed_time": "1:28:05", "remaining_time": "1:57:03", "throughput": 19973.65, "total_tokens": 105572224}
|
|
{"current_steps": 33545, "total_steps": 78105, "loss": 0.2682, "lr": 3.5210723975579992e-06, "epoch": 2.1474297420139554, "percentage": 42.95, "elapsed_time": "1:28:06", "remaining_time": "1:57:02", "throughput": 19974.24, "total_tokens": 105590016}
|
|
{"current_steps": 33550, "total_steps": 78105, "loss": 0.1951, "lr": 3.5205624397301297e-06, "epoch": 2.1477498239549324, "percentage": 42.95, "elapsed_time": "1:28:07", "remaining_time": "1:57:01", "throughput": 19974.75, "total_tokens": 105606656}
|
|
{"current_steps": 33555, "total_steps": 78105, "loss": 0.2774, "lr": 3.520052430940687e-06, "epoch": 2.1480699058959094, "percentage": 42.96, "elapsed_time": "1:28:07", "remaining_time": "1:57:00", "throughput": 19975.17, "total_tokens": 105622464}
|
|
{"current_steps": 33560, "total_steps": 78105, "loss": 0.1945, "lr": 3.51954237121514e-06, "epoch": 2.1483899878368864, "percentage": 42.97, "elapsed_time": "1:28:08", "remaining_time": "1:56:59", "throughput": 19975.55, "total_tokens": 105637504}
|
|
{"current_steps": 33565, "total_steps": 78105, "loss": 0.2432, "lr": 3.519032260578958e-06, "epoch": 2.148710069777863, "percentage": 42.97, "elapsed_time": "1:28:08", "remaining_time": "1:56:58", "throughput": 19975.89, "total_tokens": 105652160}
|
|
{"current_steps": 33570, "total_steps": 78105, "loss": 0.1757, "lr": 3.5185220990576123e-06, "epoch": 2.14903015171884, "percentage": 42.98, "elapsed_time": "1:28:09", "remaining_time": "1:56:57", "throughput": 19976.29, "total_tokens": 105668160}
|
|
{"current_steps": 33575, "total_steps": 78105, "loss": 0.209, "lr": 3.518011886676578e-06, "epoch": 2.149350233659817, "percentage": 42.99, "elapsed_time": "1:28:10", "remaining_time": "1:56:56", "throughput": 19976.66, "total_tokens": 105682944}
|
|
{"current_steps": 33580, "total_steps": 78105, "loss": 0.2531, "lr": 3.5175016234613334e-06, "epoch": 2.149670315600794, "percentage": 42.99, "elapsed_time": "1:28:11", "remaining_time": "1:56:55", "throughput": 19977.09, "total_tokens": 105698816}
|
|
{"current_steps": 33585, "total_steps": 78105, "loss": 0.1416, "lr": 3.5169913094373575e-06, "epoch": 2.149990397541771, "percentage": 43.0, "elapsed_time": "1:28:11", "remaining_time": "1:56:54", "throughput": 19977.44, "total_tokens": 105713728}
|
|
{"current_steps": 33590, "total_steps": 78105, "loss": 0.2043, "lr": 3.516480944630133e-06, "epoch": 2.1503104794827474, "percentage": 43.01, "elapsed_time": "1:28:12", "remaining_time": "1:56:53", "throughput": 19977.84, "total_tokens": 105729344}
|
|
{"current_steps": 33595, "total_steps": 78105, "loss": 0.1679, "lr": 3.515970529065145e-06, "epoch": 2.1506305614237244, "percentage": 43.01, "elapsed_time": "1:28:13", "remaining_time": "1:56:52", "throughput": 19978.4, "total_tokens": 105746560}
|
|
{"current_steps": 33600, "total_steps": 78105, "loss": 0.2972, "lr": 3.5154600627678792e-06, "epoch": 2.1509506433647014, "percentage": 43.02, "elapsed_time": "1:28:13", "remaining_time": "1:56:51", "throughput": 19978.97, "total_tokens": 105764480}
|
|
{"current_steps": 33605, "total_steps": 78105, "loss": 0.1771, "lr": 3.514949545763828e-06, "epoch": 2.1512707253056784, "percentage": 43.03, "elapsed_time": "1:28:14", "remaining_time": "1:56:51", "throughput": 19979.45, "total_tokens": 105780992}
|
|
{"current_steps": 33610, "total_steps": 78105, "loss": 0.3079, "lr": 3.514438978078484e-06, "epoch": 2.151590807246655, "percentage": 43.03, "elapsed_time": "1:28:15", "remaining_time": "1:56:50", "throughput": 19979.87, "total_tokens": 105796928}
|
|
{"current_steps": 33615, "total_steps": 78105, "loss": 0.2153, "lr": 3.513928359737341e-06, "epoch": 2.151910889187632, "percentage": 43.04, "elapsed_time": "1:28:15", "remaining_time": "1:56:49", "throughput": 19980.25, "total_tokens": 105811712}
|
|
{"current_steps": 33620, "total_steps": 78105, "loss": 0.1891, "lr": 3.5134176907658966e-06, "epoch": 2.152230971128609, "percentage": 43.04, "elapsed_time": "1:28:16", "remaining_time": "1:56:48", "throughput": 19980.79, "total_tokens": 105828800}
|
|
{"current_steps": 33625, "total_steps": 78105, "loss": 0.2186, "lr": 3.512906971189652e-06, "epoch": 2.152551053069586, "percentage": 43.05, "elapsed_time": "1:28:17", "remaining_time": "1:56:47", "throughput": 19981.34, "total_tokens": 105846272}
|
|
{"current_steps": 33630, "total_steps": 78105, "loss": 0.2392, "lr": 3.512396201034109e-06, "epoch": 2.152871135010563, "percentage": 43.06, "elapsed_time": "1:28:17", "remaining_time": "1:56:46", "throughput": 19981.82, "total_tokens": 105862848}
|
|
{"current_steps": 33635, "total_steps": 78105, "loss": 0.2205, "lr": 3.5118853803247733e-06, "epoch": 2.1531912169515395, "percentage": 43.06, "elapsed_time": "1:28:18", "remaining_time": "1:56:45", "throughput": 19982.15, "total_tokens": 105877440}
|
|
{"current_steps": 33640, "total_steps": 78105, "loss": 0.3131, "lr": 3.5113745090871526e-06, "epoch": 2.1535112988925165, "percentage": 43.07, "elapsed_time": "1:28:19", "remaining_time": "1:56:44", "throughput": 19982.56, "total_tokens": 105892928}
|
|
{"current_steps": 33645, "total_steps": 78105, "loss": 0.1858, "lr": 3.5108635873467565e-06, "epoch": 2.1538313808334935, "percentage": 43.08, "elapsed_time": "1:28:19", "remaining_time": "1:56:43", "throughput": 19982.93, "total_tokens": 105908160}
|
|
{"current_steps": 33650, "total_steps": 78105, "loss": 0.2167, "lr": 3.510352615129099e-06, "epoch": 2.1541514627744704, "percentage": 43.08, "elapsed_time": "1:28:20", "remaining_time": "1:56:42", "throughput": 19983.32, "total_tokens": 105923392}
|
|
{"current_steps": 33655, "total_steps": 78105, "loss": 0.1931, "lr": 3.509841592459694e-06, "epoch": 2.154471544715447, "percentage": 43.09, "elapsed_time": "1:28:21", "remaining_time": "1:56:41", "throughput": 19983.75, "total_tokens": 105939136}
|
|
{"current_steps": 33660, "total_steps": 78105, "loss": 0.1777, "lr": 3.5093305193640597e-06, "epoch": 2.154791626656424, "percentage": 43.1, "elapsed_time": "1:28:21", "remaining_time": "1:56:40", "throughput": 19984.2, "total_tokens": 105955584}
|
|
{"current_steps": 33665, "total_steps": 78105, "loss": 0.3062, "lr": 3.5088193958677174e-06, "epoch": 2.155111708597401, "percentage": 43.1, "elapsed_time": "1:28:22", "remaining_time": "1:56:39", "throughput": 19984.61, "total_tokens": 105971200}
|
|
{"current_steps": 33670, "total_steps": 78105, "loss": 0.1523, "lr": 3.5083082219961883e-06, "epoch": 2.155431790538378, "percentage": 43.11, "elapsed_time": "1:28:23", "remaining_time": "1:56:38", "throughput": 19985.02, "total_tokens": 105986816}
|
|
{"current_steps": 33675, "total_steps": 78105, "loss": 0.1629, "lr": 3.5077969977749993e-06, "epoch": 2.1557518724793545, "percentage": 43.12, "elapsed_time": "1:28:24", "remaining_time": "1:56:37", "throughput": 19985.47, "total_tokens": 106003072}
|
|
{"current_steps": 33680, "total_steps": 78105, "loss": 0.2281, "lr": 3.507285723229678e-06, "epoch": 2.1560719544203315, "percentage": 43.12, "elapsed_time": "1:28:24", "remaining_time": "1:56:37", "throughput": 19985.9, "total_tokens": 106019136}
|
|
{"current_steps": 33685, "total_steps": 78105, "loss": 0.1908, "lr": 3.5067743983857538e-06, "epoch": 2.1563920363613085, "percentage": 43.13, "elapsed_time": "1:28:25", "remaining_time": "1:56:36", "throughput": 19986.28, "total_tokens": 106034176}
|
|
{"current_steps": 33690, "total_steps": 78105, "loss": 0.2205, "lr": 3.5062630232687606e-06, "epoch": 2.1567121183022855, "percentage": 43.13, "elapsed_time": "1:28:26", "remaining_time": "1:56:35", "throughput": 19986.69, "total_tokens": 106050176}
|
|
{"current_steps": 33695, "total_steps": 78105, "loss": 0.2417, "lr": 3.5057515979042335e-06, "epoch": 2.1570322002432625, "percentage": 43.14, "elapsed_time": "1:28:26", "remaining_time": "1:56:34", "throughput": 19987.14, "total_tokens": 106066432}
|
|
{"current_steps": 33700, "total_steps": 78105, "loss": 0.2782, "lr": 3.50524012231771e-06, "epoch": 2.157352282184239, "percentage": 43.15, "elapsed_time": "1:28:27", "remaining_time": "1:56:33", "throughput": 19987.55, "total_tokens": 106082240}
|
|
{"current_steps": 33705, "total_steps": 78105, "loss": 0.2555, "lr": 3.504728596534731e-06, "epoch": 2.157672364125216, "percentage": 43.15, "elapsed_time": "1:28:28", "remaining_time": "1:56:32", "throughput": 19987.88, "total_tokens": 106097088}
|
|
{"current_steps": 33710, "total_steps": 78105, "loss": 0.1407, "lr": 3.5042170205808403e-06, "epoch": 2.157992446066193, "percentage": 43.16, "elapsed_time": "1:28:28", "remaining_time": "1:56:31", "throughput": 19988.24, "total_tokens": 106112064}
|
|
{"current_steps": 33715, "total_steps": 78105, "loss": 0.2465, "lr": 3.503705394481581e-06, "epoch": 2.15831252800717, "percentage": 43.17, "elapsed_time": "1:28:29", "remaining_time": "1:56:30", "throughput": 19988.66, "total_tokens": 106128000}
|
|
{"current_steps": 33720, "total_steps": 78105, "loss": 0.1645, "lr": 3.5031937182625023e-06, "epoch": 2.1586326099481465, "percentage": 43.17, "elapsed_time": "1:28:30", "remaining_time": "1:56:29", "throughput": 19988.99, "total_tokens": 106142720}
|
|
{"current_steps": 33725, "total_steps": 78105, "loss": 0.2709, "lr": 3.5026819919491563e-06, "epoch": 2.1589526918891235, "percentage": 43.18, "elapsed_time": "1:28:30", "remaining_time": "1:56:28", "throughput": 19989.36, "total_tokens": 106158144}
|
|
{"current_steps": 33730, "total_steps": 78105, "loss": 0.1932, "lr": 3.502170215567092e-06, "epoch": 2.1592727738301005, "percentage": 43.19, "elapsed_time": "1:28:31", "remaining_time": "1:56:27", "throughput": 19990.06, "total_tokens": 106177984}
|
|
{"current_steps": 33735, "total_steps": 78105, "loss": 0.2208, "lr": 3.501658389141869e-06, "epoch": 2.1595928557710775, "percentage": 43.19, "elapsed_time": "1:28:32", "remaining_time": "1:56:26", "throughput": 19990.46, "total_tokens": 106193472}
|
|
{"current_steps": 33740, "total_steps": 78105, "loss": 0.1614, "lr": 3.501146512699044e-06, "epoch": 2.159912937712054, "percentage": 43.2, "elapsed_time": "1:28:32", "remaining_time": "1:56:25", "throughput": 19990.82, "total_tokens": 106208640}
|
|
{"current_steps": 33745, "total_steps": 78105, "loss": 0.2378, "lr": 3.5006345862641755e-06, "epoch": 2.160233019653031, "percentage": 43.2, "elapsed_time": "1:28:33", "remaining_time": "1:56:25", "throughput": 19991.26, "total_tokens": 106224448}
|
|
{"current_steps": 33750, "total_steps": 78105, "loss": 0.1367, "lr": 3.5001226098628288e-06, "epoch": 2.160553101594008, "percentage": 43.21, "elapsed_time": "1:28:34", "remaining_time": "1:56:24", "throughput": 19991.69, "total_tokens": 106240448}
|
|
{"current_steps": 33755, "total_steps": 78105, "loss": 0.2617, "lr": 3.499610583520568e-06, "epoch": 2.160873183534985, "percentage": 43.22, "elapsed_time": "1:28:34", "remaining_time": "1:56:23", "throughput": 19992.07, "total_tokens": 106256128}
|
|
{"current_steps": 33760, "total_steps": 78105, "loss": 0.1985, "lr": 3.4990985072629617e-06, "epoch": 2.161193265475962, "percentage": 43.22, "elapsed_time": "1:28:35", "remaining_time": "1:56:22", "throughput": 19992.4, "total_tokens": 106270848}
|
|
{"current_steps": 33765, "total_steps": 78105, "loss": 0.216, "lr": 3.49858638111558e-06, "epoch": 2.1615133474169386, "percentage": 43.23, "elapsed_time": "1:28:36", "remaining_time": "1:56:21", "throughput": 19992.8, "total_tokens": 106286528}
|
|
{"current_steps": 33770, "total_steps": 78105, "loss": 0.2746, "lr": 3.4980742051039957e-06, "epoch": 2.1618334293579156, "percentage": 43.24, "elapsed_time": "1:28:36", "remaining_time": "1:56:20", "throughput": 19993.17, "total_tokens": 106301568}
|
|
{"current_steps": 33775, "total_steps": 78105, "loss": 0.1873, "lr": 3.4975619792537846e-06, "epoch": 2.1621535112988925, "percentage": 43.24, "elapsed_time": "1:28:37", "remaining_time": "1:56:19", "throughput": 19993.53, "total_tokens": 106316416}
|
|
{"current_steps": 33780, "total_steps": 78105, "loss": 0.3015, "lr": 3.4970497035905244e-06, "epoch": 2.1624735932398695, "percentage": 43.25, "elapsed_time": "1:28:38", "remaining_time": "1:56:18", "throughput": 19993.93, "total_tokens": 106331968}
|
|
{"current_steps": 33785, "total_steps": 78105, "loss": 0.2405, "lr": 3.496537378139795e-06, "epoch": 2.162793675180846, "percentage": 43.26, "elapsed_time": "1:28:38", "remaining_time": "1:56:17", "throughput": 19994.35, "total_tokens": 106347840}
|
|
{"current_steps": 33790, "total_steps": 78105, "loss": 0.296, "lr": 3.4960250029271795e-06, "epoch": 2.163113757121823, "percentage": 43.26, "elapsed_time": "1:28:39", "remaining_time": "1:56:16", "throughput": 19994.31, "total_tokens": 106364096}
|
|
{"current_steps": 33795, "total_steps": 78105, "loss": 0.2345, "lr": 3.4955125779782646e-06, "epoch": 2.1634338390628, "percentage": 43.27, "elapsed_time": "1:28:40", "remaining_time": "1:56:15", "throughput": 19994.65, "total_tokens": 106379520}
|
|
{"current_steps": 33800, "total_steps": 78105, "loss": 0.1912, "lr": 3.495000103318636e-06, "epoch": 2.163753921003777, "percentage": 43.28, "elapsed_time": "1:28:41", "remaining_time": "1:56:14", "throughput": 19995.01, "total_tokens": 106394240}
|
|
{"current_steps": 33805, "total_steps": 78105, "loss": 0.1327, "lr": 3.4944875789738852e-06, "epoch": 2.164074002944754, "percentage": 43.28, "elapsed_time": "1:28:41", "remaining_time": "1:56:13", "throughput": 19995.45, "total_tokens": 106410304}
|
|
{"current_steps": 33810, "total_steps": 78105, "loss": 0.1797, "lr": 3.4939750049696053e-06, "epoch": 2.1643940848857306, "percentage": 43.29, "elapsed_time": "1:28:42", "remaining_time": "1:56:12", "throughput": 19995.79, "total_tokens": 106425088}
|
|
{"current_steps": 33815, "total_steps": 78105, "loss": 0.1659, "lr": 3.4934623813313905e-06, "epoch": 2.1647141668267076, "percentage": 43.29, "elapsed_time": "1:28:43", "remaining_time": "1:56:12", "throughput": 19996.1, "total_tokens": 106440512}
|
|
{"current_steps": 33820, "total_steps": 78105, "loss": 0.2589, "lr": 3.4929497080848396e-06, "epoch": 2.1650342487676846, "percentage": 43.3, "elapsed_time": "1:28:43", "remaining_time": "1:56:11", "throughput": 19996.55, "total_tokens": 106456704}
|
|
{"current_steps": 33825, "total_steps": 78105, "loss": 0.2504, "lr": 3.4924369852555524e-06, "epoch": 2.1653543307086616, "percentage": 43.31, "elapsed_time": "1:28:44", "remaining_time": "1:56:10", "throughput": 19996.9, "total_tokens": 106471488}
|
|
{"current_steps": 33830, "total_steps": 78105, "loss": 0.1923, "lr": 3.491924212869131e-06, "epoch": 2.165674412649638, "percentage": 43.31, "elapsed_time": "1:28:45", "remaining_time": "1:56:09", "throughput": 19997.32, "total_tokens": 106487488}
|
|
{"current_steps": 33835, "total_steps": 78105, "loss": 0.1552, "lr": 3.4914113909511813e-06, "epoch": 2.165994494590615, "percentage": 43.32, "elapsed_time": "1:28:45", "remaining_time": "1:56:08", "throughput": 19997.74, "total_tokens": 106503232}
|
|
{"current_steps": 33840, "total_steps": 78105, "loss": 0.2846, "lr": 3.4908985195273116e-06, "epoch": 2.166314576531592, "percentage": 43.33, "elapsed_time": "1:28:46", "remaining_time": "1:56:07", "throughput": 19998.11, "total_tokens": 106518464}
|
|
{"current_steps": 33845, "total_steps": 78105, "loss": 0.1956, "lr": 3.49038559862313e-06, "epoch": 2.166634658472569, "percentage": 43.33, "elapsed_time": "1:28:47", "remaining_time": "1:56:06", "throughput": 19998.56, "total_tokens": 106534720}
|
|
{"current_steps": 33850, "total_steps": 78105, "loss": 0.2281, "lr": 3.489872628264251e-06, "epoch": 2.166954740413546, "percentage": 43.34, "elapsed_time": "1:28:47", "remaining_time": "1:56:05", "throughput": 19999.12, "total_tokens": 106552128}
|
|
{"current_steps": 33855, "total_steps": 78105, "loss": 0.383, "lr": 3.489359608476289e-06, "epoch": 2.1672748223545226, "percentage": 43.35, "elapsed_time": "1:28:48", "remaining_time": "1:56:04", "throughput": 19999.37, "total_tokens": 106568192}
|
|
{"current_steps": 33860, "total_steps": 78105, "loss": 0.1301, "lr": 3.4888465392848613e-06, "epoch": 2.1675949042954996, "percentage": 43.35, "elapsed_time": "1:28:49", "remaining_time": "1:56:03", "throughput": 19999.74, "total_tokens": 106583296}
|
|
{"current_steps": 33865, "total_steps": 78105, "loss": 0.2674, "lr": 3.4883334207155885e-06, "epoch": 2.1679149862364766, "percentage": 43.36, "elapsed_time": "1:28:49", "remaining_time": "1:56:02", "throughput": 20000.11, "total_tokens": 106598464}
|
|
{"current_steps": 33870, "total_steps": 78105, "loss": 0.1899, "lr": 3.4878202527940926e-06, "epoch": 2.1682350681774536, "percentage": 43.36, "elapsed_time": "1:28:50", "remaining_time": "1:56:01", "throughput": 20000.58, "total_tokens": 106614464}
|
|
{"current_steps": 33875, "total_steps": 78105, "loss": 0.1777, "lr": 3.4873070355459986e-06, "epoch": 2.16855515011843, "percentage": 43.37, "elapsed_time": "1:28:51", "remaining_time": "1:56:00", "throughput": 20000.97, "total_tokens": 106630016}
|
|
{"current_steps": 33880, "total_steps": 78105, "loss": 0.1854, "lr": 3.4867937689969337e-06, "epoch": 2.168875232059407, "percentage": 43.38, "elapsed_time": "1:28:51", "remaining_time": "1:55:59", "throughput": 20001.39, "total_tokens": 106645952}
|
|
{"current_steps": 33885, "total_steps": 78105, "loss": 0.2888, "lr": 3.486280453172528e-06, "epoch": 2.169195314000384, "percentage": 43.38, "elapsed_time": "1:28:52", "remaining_time": "1:55:59", "throughput": 20001.9, "total_tokens": 106662784}
|
|
{"current_steps": 33890, "total_steps": 78105, "loss": 0.2578, "lr": 3.485767088098414e-06, "epoch": 2.169515395941361, "percentage": 43.39, "elapsed_time": "1:28:53", "remaining_time": "1:55:58", "throughput": 20002.34, "total_tokens": 106679104}
|
|
{"current_steps": 33895, "total_steps": 78105, "loss": 0.1939, "lr": 3.4852536738002266e-06, "epoch": 2.169835477882338, "percentage": 43.4, "elapsed_time": "1:28:54", "remaining_time": "1:55:57", "throughput": 20002.73, "total_tokens": 106694592}
|
|
{"current_steps": 33900, "total_steps": 78105, "loss": 0.1252, "lr": 3.4847402103036027e-06, "epoch": 2.1701555598233147, "percentage": 43.4, "elapsed_time": "1:28:54", "remaining_time": "1:55:56", "throughput": 20003.11, "total_tokens": 106710272}
|
|
{"current_steps": 33905, "total_steps": 78105, "loss": 0.1469, "lr": 3.4842266976341814e-06, "epoch": 2.1704756417642916, "percentage": 43.41, "elapsed_time": "1:28:55", "remaining_time": "1:55:55", "throughput": 20003.56, "total_tokens": 106726272}
|
|
{"current_steps": 33910, "total_steps": 78105, "loss": 0.2561, "lr": 3.483713135817606e-06, "epoch": 2.1707957237052686, "percentage": 43.42, "elapsed_time": "1:28:55", "remaining_time": "1:55:54", "throughput": 20003.84, "total_tokens": 106740352}
|
|
{"current_steps": 33915, "total_steps": 78105, "loss": 0.2273, "lr": 3.4831995248795204e-06, "epoch": 2.1711158056462456, "percentage": 43.42, "elapsed_time": "1:28:56", "remaining_time": "1:55:53", "throughput": 20004.29, "total_tokens": 106756416}
|
|
{"current_steps": 33920, "total_steps": 78105, "loss": 0.2186, "lr": 3.482685864845572e-06, "epoch": 2.171435887587222, "percentage": 43.43, "elapsed_time": "1:28:57", "remaining_time": "1:55:52", "throughput": 20004.62, "total_tokens": 106771072}
|
|
{"current_steps": 33925, "total_steps": 78105, "loss": 0.2672, "lr": 3.4821721557414094e-06, "epoch": 2.171755969528199, "percentage": 43.44, "elapsed_time": "1:28:57", "remaining_time": "1:55:51", "throughput": 20005.06, "total_tokens": 106786816}
|
|
{"current_steps": 33930, "total_steps": 78105, "loss": 0.354, "lr": 3.4816583975926865e-06, "epoch": 2.172076051469176, "percentage": 43.44, "elapsed_time": "1:28:58", "remaining_time": "1:55:50", "throughput": 20005.43, "total_tokens": 106802048}
|
|
{"current_steps": 33935, "total_steps": 78105, "loss": 0.1862, "lr": 3.4811445904250557e-06, "epoch": 2.172396133410153, "percentage": 43.45, "elapsed_time": "1:28:59", "remaining_time": "1:55:49", "throughput": 20005.83, "total_tokens": 106817344}
|
|
{"current_steps": 33940, "total_steps": 78105, "loss": 0.1708, "lr": 3.480630734264175e-06, "epoch": 2.1727162153511297, "percentage": 43.45, "elapsed_time": "1:28:59", "remaining_time": "1:55:48", "throughput": 20006.26, "total_tokens": 106833408}
|
|
{"current_steps": 33945, "total_steps": 78105, "loss": 0.2142, "lr": 3.480116829135703e-06, "epoch": 2.1730362972921067, "percentage": 43.46, "elapsed_time": "1:29:00", "remaining_time": "1:55:47", "throughput": 20006.69, "total_tokens": 106849344}
|
|
{"current_steps": 33950, "total_steps": 78105, "loss": 0.1494, "lr": 3.479602875065302e-06, "epoch": 2.1733563792330837, "percentage": 43.47, "elapsed_time": "1:29:01", "remaining_time": "1:55:46", "throughput": 20007.1, "total_tokens": 106865088}
|
|
{"current_steps": 33955, "total_steps": 78105, "loss": 0.1881, "lr": 3.4790888720786357e-06, "epoch": 2.1736764611740607, "percentage": 43.47, "elapsed_time": "1:29:02", "remaining_time": "1:55:45", "throughput": 20007.5, "total_tokens": 106880576}
|
|
{"current_steps": 33960, "total_steps": 78105, "loss": 0.2463, "lr": 3.4785748202013712e-06, "epoch": 2.1739965431150376, "percentage": 43.48, "elapsed_time": "1:29:02", "remaining_time": "1:55:45", "throughput": 20007.94, "total_tokens": 106896768}
|
|
{"current_steps": 33965, "total_steps": 78105, "loss": 0.3227, "lr": 3.4780607194591775e-06, "epoch": 2.174316625056014, "percentage": 43.49, "elapsed_time": "1:29:03", "remaining_time": "1:55:44", "throughput": 20008.27, "total_tokens": 106911360}
|
|
{"current_steps": 33970, "total_steps": 78105, "loss": 0.1808, "lr": 3.477546569877726e-06, "epoch": 2.174636706996991, "percentage": 43.49, "elapsed_time": "1:29:04", "remaining_time": "1:55:43", "throughput": 20008.69, "total_tokens": 106927424}
|
|
{"current_steps": 33975, "total_steps": 78105, "loss": 0.2348, "lr": 3.4770323714826903e-06, "epoch": 2.174956788937968, "percentage": 43.5, "elapsed_time": "1:29:04", "remaining_time": "1:55:42", "throughput": 20009.05, "total_tokens": 106942720}
|
|
{"current_steps": 33980, "total_steps": 78105, "loss": 0.2581, "lr": 3.4765181242997474e-06, "epoch": 2.175276870878945, "percentage": 43.51, "elapsed_time": "1:29:05", "remaining_time": "1:55:41", "throughput": 20009.53, "total_tokens": 106959680}
|
|
{"current_steps": 33985, "total_steps": 78105, "loss": 0.1787, "lr": 3.476003828354576e-06, "epoch": 2.1755969528199217, "percentage": 43.51, "elapsed_time": "1:29:06", "remaining_time": "1:55:40", "throughput": 20009.9, "total_tokens": 106974464}
|
|
{"current_steps": 33990, "total_steps": 78105, "loss": 0.1541, "lr": 3.4754894836728576e-06, "epoch": 2.1759170347608987, "percentage": 43.52, "elapsed_time": "1:29:06", "remaining_time": "1:55:39", "throughput": 20010.27, "total_tokens": 106989312}
|
|
{"current_steps": 33995, "total_steps": 78105, "loss": 0.1986, "lr": 3.474975090280275e-06, "epoch": 2.1762371167018757, "percentage": 43.52, "elapsed_time": "1:29:07", "remaining_time": "1:55:38", "throughput": 20010.65, "total_tokens": 107004480}
|
|
{"current_steps": 34000, "total_steps": 78105, "loss": 0.1989, "lr": 3.4744606482025145e-06, "epoch": 2.1765571986428527, "percentage": 43.53, "elapsed_time": "1:29:08", "remaining_time": "1:55:37", "throughput": 20010.99, "total_tokens": 107019456}
|
|
{"current_steps": 34005, "total_steps": 78105, "loss": 0.2142, "lr": 3.4739461574652654e-06, "epoch": 2.1768772805838292, "percentage": 43.54, "elapsed_time": "1:29:08", "remaining_time": "1:55:36", "throughput": 20011.4, "total_tokens": 107035200}
|
|
{"current_steps": 34010, "total_steps": 78105, "loss": 0.3719, "lr": 3.4734316180942186e-06, "epoch": 2.1771973625248062, "percentage": 43.54, "elapsed_time": "1:29:09", "remaining_time": "1:55:35", "throughput": 20011.78, "total_tokens": 107050624}
|
|
{"current_steps": 34015, "total_steps": 78105, "loss": 0.1449, "lr": 3.4729170301150673e-06, "epoch": 2.177517444465783, "percentage": 43.55, "elapsed_time": "1:29:10", "remaining_time": "1:55:34", "throughput": 20012.2, "total_tokens": 107066752}
|
|
{"current_steps": 34020, "total_steps": 78105, "loss": 0.2014, "lr": 3.472402393553507e-06, "epoch": 2.17783752640676, "percentage": 43.56, "elapsed_time": "1:29:10", "remaining_time": "1:55:33", "throughput": 20012.47, "total_tokens": 107081088}
|
|
{"current_steps": 34025, "total_steps": 78105, "loss": 0.2346, "lr": 3.4718877084352352e-06, "epoch": 2.178157608347737, "percentage": 43.56, "elapsed_time": "1:29:11", "remaining_time": "1:55:32", "throughput": 20012.84, "total_tokens": 107096512}
|
|
{"current_steps": 34030, "total_steps": 78105, "loss": 0.242, "lr": 3.471372974785955e-06, "epoch": 2.1784776902887137, "percentage": 43.57, "elapsed_time": "1:29:12", "remaining_time": "1:55:31", "throughput": 20013.26, "total_tokens": 107112512}
|
|
{"current_steps": 34035, "total_steps": 78105, "loss": 0.2467, "lr": 3.470858192631368e-06, "epoch": 2.1787977722296907, "percentage": 43.58, "elapsed_time": "1:29:12", "remaining_time": "1:55:30", "throughput": 20013.65, "total_tokens": 107127872}
|
|
{"current_steps": 34040, "total_steps": 78105, "loss": 0.1246, "lr": 3.4703433619971794e-06, "epoch": 2.1791178541706677, "percentage": 43.58, "elapsed_time": "1:29:13", "remaining_time": "1:55:30", "throughput": 20014.04, "total_tokens": 107143104}
|
|
{"current_steps": 34045, "total_steps": 78105, "loss": 0.2645, "lr": 3.469828482909098e-06, "epoch": 2.1794379361116447, "percentage": 43.59, "elapsed_time": "1:29:14", "remaining_time": "1:55:29", "throughput": 20014.42, "total_tokens": 107158272}
|
|
{"current_steps": 34050, "total_steps": 78105, "loss": 0.2499, "lr": 3.4693135553928337e-06, "epoch": 2.1797580180526213, "percentage": 43.6, "elapsed_time": "1:29:14", "remaining_time": "1:55:28", "throughput": 20014.81, "total_tokens": 107173696}
|
|
{"current_steps": 34055, "total_steps": 78105, "loss": 0.2194, "lr": 3.4687985794740993e-06, "epoch": 2.1800780999935983, "percentage": 43.6, "elapsed_time": "1:29:15", "remaining_time": "1:55:27", "throughput": 20015.23, "total_tokens": 107189696}
|
|
{"current_steps": 34060, "total_steps": 78105, "loss": 0.2478, "lr": 3.4682835551786097e-06, "epoch": 2.1803981819345752, "percentage": 43.61, "elapsed_time": "1:29:16", "remaining_time": "1:55:26", "throughput": 20015.65, "total_tokens": 107205184}
|
|
{"current_steps": 34065, "total_steps": 78105, "loss": 0.2267, "lr": 3.4677684825320835e-06, "epoch": 2.1807182638755522, "percentage": 43.61, "elapsed_time": "1:29:16", "remaining_time": "1:55:25", "throughput": 20016.11, "total_tokens": 107221824}
|
|
{"current_steps": 34070, "total_steps": 78105, "loss": 0.2264, "lr": 3.46725336156024e-06, "epoch": 2.1810383458165292, "percentage": 43.62, "elapsed_time": "1:29:17", "remaining_time": "1:55:24", "throughput": 20016.63, "total_tokens": 107238976}
|
|
{"current_steps": 34075, "total_steps": 78105, "loss": 0.2012, "lr": 3.466738192288802e-06, "epoch": 2.1813584277575058, "percentage": 43.63, "elapsed_time": "1:29:18", "remaining_time": "1:55:23", "throughput": 20017.15, "total_tokens": 107256128}
|
|
{"current_steps": 34080, "total_steps": 78105, "loss": 0.23, "lr": 3.466222974743494e-06, "epoch": 2.1816785096984828, "percentage": 43.63, "elapsed_time": "1:29:18", "remaining_time": "1:55:22", "throughput": 20017.56, "total_tokens": 107271680}
|
|
{"current_steps": 34085, "total_steps": 78105, "loss": 0.1558, "lr": 3.465707708950044e-06, "epoch": 2.1819985916394598, "percentage": 43.64, "elapsed_time": "1:29:19", "remaining_time": "1:55:21", "throughput": 20018.08, "total_tokens": 107288960}
|
|
{"current_steps": 34090, "total_steps": 78105, "loss": 0.1799, "lr": 3.4651923949341804e-06, "epoch": 2.1823186735804367, "percentage": 43.65, "elapsed_time": "1:29:20", "remaining_time": "1:55:20", "throughput": 20018.54, "total_tokens": 107305280}
|
|
{"current_steps": 34095, "total_steps": 78105, "loss": 0.3084, "lr": 3.4646770327216367e-06, "epoch": 2.1826387555214133, "percentage": 43.65, "elapsed_time": "1:29:20", "remaining_time": "1:55:19", "throughput": 20018.91, "total_tokens": 107320704}
|
|
{"current_steps": 34100, "total_steps": 78105, "loss": 0.2897, "lr": 3.4641616223381467e-06, "epoch": 2.1829588374623903, "percentage": 43.66, "elapsed_time": "1:29:21", "remaining_time": "1:55:19", "throughput": 20019.3, "total_tokens": 107336256}
|
|
{"current_steps": 34105, "total_steps": 78105, "loss": 0.1803, "lr": 3.4636461638094476e-06, "epoch": 2.1832789194033673, "percentage": 43.67, "elapsed_time": "1:29:22", "remaining_time": "1:55:18", "throughput": 20019.67, "total_tokens": 107351872}
|
|
{"current_steps": 34110, "total_steps": 78105, "loss": 0.1921, "lr": 3.463130657161279e-06, "epoch": 2.1835990013443443, "percentage": 43.67, "elapsed_time": "1:29:22", "remaining_time": "1:55:17", "throughput": 20020.04, "total_tokens": 107367040}
|
|
{"current_steps": 34115, "total_steps": 78105, "loss": 0.2382, "lr": 3.462615102419381e-06, "epoch": 2.1839190832853212, "percentage": 43.68, "elapsed_time": "1:29:23", "remaining_time": "1:55:16", "throughput": 20020.44, "total_tokens": 107382656}
|
|
{"current_steps": 34120, "total_steps": 78105, "loss": 0.2082, "lr": 3.4620994996094993e-06, "epoch": 2.184239165226298, "percentage": 43.68, "elapsed_time": "1:29:24", "remaining_time": "1:55:15", "throughput": 20020.86, "total_tokens": 107398720}
|
|
{"current_steps": 34125, "total_steps": 78105, "loss": 0.1904, "lr": 3.4615838487573803e-06, "epoch": 2.184559247167275, "percentage": 43.69, "elapsed_time": "1:29:24", "remaining_time": "1:55:14", "throughput": 20021.17, "total_tokens": 107412864}
|
|
{"current_steps": 34130, "total_steps": 78105, "loss": 0.1471, "lr": 3.461068149888773e-06, "epoch": 2.1848793291082518, "percentage": 43.7, "elapsed_time": "1:29:25", "remaining_time": "1:55:13", "throughput": 20021.52, "total_tokens": 107427776}
|
|
{"current_steps": 34135, "total_steps": 78105, "loss": 0.249, "lr": 3.460552403029427e-06, "epoch": 2.1851994110492288, "percentage": 43.7, "elapsed_time": "1:29:26", "remaining_time": "1:55:12", "throughput": 20021.88, "total_tokens": 107443072}
|
|
{"current_steps": 34140, "total_steps": 78105, "loss": 0.2319, "lr": 3.4600366082050977e-06, "epoch": 2.1855194929902053, "percentage": 43.71, "elapsed_time": "1:29:26", "remaining_time": "1:55:11", "throughput": 20022.24, "total_tokens": 107457920}
|
|
{"current_steps": 34145, "total_steps": 78105, "loss": 0.351, "lr": 3.459520765441541e-06, "epoch": 2.1858395749311823, "percentage": 43.72, "elapsed_time": "1:29:27", "remaining_time": "1:55:10", "throughput": 20022.65, "total_tokens": 107473600}
|
|
{"current_steps": 34150, "total_steps": 78105, "loss": 0.3121, "lr": 3.4590048747645154e-06, "epoch": 2.1861596568721593, "percentage": 43.72, "elapsed_time": "1:29:28", "remaining_time": "1:55:09", "throughput": 20023.03, "total_tokens": 107488832}
|
|
{"current_steps": 34155, "total_steps": 78105, "loss": 0.2794, "lr": 3.458488936199782e-06, "epoch": 2.1864797388131363, "percentage": 43.73, "elapsed_time": "1:29:28", "remaining_time": "1:55:08", "throughput": 20023.52, "total_tokens": 107505920}
|
|
{"current_steps": 34160, "total_steps": 78105, "loss": 0.1562, "lr": 3.4579729497731037e-06, "epoch": 2.1867998207541133, "percentage": 43.74, "elapsed_time": "1:29:29", "remaining_time": "1:55:07", "throughput": 20023.97, "total_tokens": 107522624}
|
|
{"current_steps": 34165, "total_steps": 78105, "loss": 0.1315, "lr": 3.4574569155102455e-06, "epoch": 2.18711990269509, "percentage": 43.74, "elapsed_time": "1:29:30", "remaining_time": "1:55:06", "throughput": 20024.46, "total_tokens": 107539136}
|
|
{"current_steps": 34170, "total_steps": 78105, "loss": 0.1624, "lr": 3.4569408334369768e-06, "epoch": 2.187439984636067, "percentage": 43.75, "elapsed_time": "1:29:31", "remaining_time": "1:55:06", "throughput": 20024.85, "total_tokens": 107554944}
|
|
{"current_steps": 34175, "total_steps": 78105, "loss": 0.2167, "lr": 3.4564247035790664e-06, "epoch": 2.187760066577044, "percentage": 43.76, "elapsed_time": "1:29:31", "remaining_time": "1:55:05", "throughput": 20025.31, "total_tokens": 107571456}
|
|
{"current_steps": 34180, "total_steps": 78105, "loss": 0.2237, "lr": 3.4559085259622893e-06, "epoch": 2.188080148518021, "percentage": 43.76, "elapsed_time": "1:29:32", "remaining_time": "1:55:04", "throughput": 20025.69, "total_tokens": 107586880}
|
|
{"current_steps": 34185, "total_steps": 78105, "loss": 0.3577, "lr": 3.4553923006124192e-06, "epoch": 2.1884002304589973, "percentage": 43.77, "elapsed_time": "1:29:33", "remaining_time": "1:55:03", "throughput": 20026.07, "total_tokens": 107602560}
|
|
{"current_steps": 34190, "total_steps": 78105, "loss": 0.2601, "lr": 3.454876027555234e-06, "epoch": 2.1887203123999743, "percentage": 43.77, "elapsed_time": "1:29:33", "remaining_time": "1:55:02", "throughput": 20026.46, "total_tokens": 107618112}
|
|
{"current_steps": 34195, "total_steps": 78105, "loss": 0.1799, "lr": 3.454359706816513e-06, "epoch": 2.1890403943409513, "percentage": 43.78, "elapsed_time": "1:29:34", "remaining_time": "1:55:01", "throughput": 20027.07, "total_tokens": 107636288}
|
|
{"current_steps": 34200, "total_steps": 78105, "loss": 0.2103, "lr": 3.4538433384220403e-06, "epoch": 2.1893604762819283, "percentage": 43.79, "elapsed_time": "1:29:35", "remaining_time": "1:55:00", "throughput": 20027.48, "total_tokens": 107652032}
|
|
{"current_steps": 34205, "total_steps": 78105, "loss": 0.2011, "lr": 3.453326922397599e-06, "epoch": 2.189680558222905, "percentage": 43.79, "elapsed_time": "1:29:35", "remaining_time": "1:54:59", "throughput": 20027.87, "total_tokens": 107667328}
|
|
{"current_steps": 34210, "total_steps": 78105, "loss": 0.1632, "lr": 3.452810458768977e-06, "epoch": 2.190000640163882, "percentage": 43.8, "elapsed_time": "1:29:36", "remaining_time": "1:54:58", "throughput": 20028.32, "total_tokens": 107683904}
|
|
{"current_steps": 34215, "total_steps": 78105, "loss": 0.1922, "lr": 3.452293947561964e-06, "epoch": 2.190320722104859, "percentage": 43.81, "elapsed_time": "1:29:37", "remaining_time": "1:54:57", "throughput": 20028.71, "total_tokens": 107699520}
|
|
{"current_steps": 34220, "total_steps": 78105, "loss": 0.2186, "lr": 3.4517773888023522e-06, "epoch": 2.190640804045836, "percentage": 43.81, "elapsed_time": "1:29:37", "remaining_time": "1:54:56", "throughput": 20029.09, "total_tokens": 107715008}
|
|
{"current_steps": 34225, "total_steps": 78105, "loss": 0.1719, "lr": 3.4512607825159343e-06, "epoch": 2.190960885986813, "percentage": 43.82, "elapsed_time": "1:29:38", "remaining_time": "1:54:55", "throughput": 20029.43, "total_tokens": 107729984}
|
|
{"current_steps": 34230, "total_steps": 78105, "loss": 0.1818, "lr": 3.4507441287285083e-06, "epoch": 2.1912809679277894, "percentage": 43.83, "elapsed_time": "1:29:39", "remaining_time": "1:54:55", "throughput": 20029.85, "total_tokens": 107746176}
|
|
{"current_steps": 34235, "total_steps": 78105, "loss": 0.2136, "lr": 3.4502274274658724e-06, "epoch": 2.1916010498687664, "percentage": 43.83, "elapsed_time": "1:29:39", "remaining_time": "1:54:54", "throughput": 20030.27, "total_tokens": 107761664}
|
|
{"current_steps": 34240, "total_steps": 78105, "loss": 0.2131, "lr": 3.4497106787538286e-06, "epoch": 2.1919211318097434, "percentage": 43.84, "elapsed_time": "1:29:40", "remaining_time": "1:54:53", "throughput": 20030.76, "total_tokens": 107778240}
|
|
{"current_steps": 34245, "total_steps": 78105, "loss": 0.1925, "lr": 3.4491938826181803e-06, "epoch": 2.1922412137507203, "percentage": 43.84, "elapsed_time": "1:29:41", "remaining_time": "1:54:52", "throughput": 20031.11, "total_tokens": 107793216}
|
|
{"current_steps": 34250, "total_steps": 78105, "loss": 0.1419, "lr": 3.448677039084734e-06, "epoch": 2.192561295691697, "percentage": 43.85, "elapsed_time": "1:29:41", "remaining_time": "1:54:51", "throughput": 20031.5, "total_tokens": 107808768}
|
|
{"current_steps": 34255, "total_steps": 78105, "loss": 0.156, "lr": 3.448160148179297e-06, "epoch": 2.192881377632674, "percentage": 43.86, "elapsed_time": "1:29:42", "remaining_time": "1:54:50", "throughput": 20031.82, "total_tokens": 107823808}
|
|
{"current_steps": 34260, "total_steps": 78105, "loss": 0.5304, "lr": 3.4476432099276813e-06, "epoch": 2.193201459573651, "percentage": 43.86, "elapsed_time": "1:29:43", "remaining_time": "1:54:49", "throughput": 20032.16, "total_tokens": 107838912}
|
|
{"current_steps": 34265, "total_steps": 78105, "loss": 0.323, "lr": 3.4471262243556995e-06, "epoch": 2.193521541514628, "percentage": 43.87, "elapsed_time": "1:29:43", "remaining_time": "1:54:48", "throughput": 20032.58, "total_tokens": 107854784}
|
|
{"current_steps": 34270, "total_steps": 78105, "loss": 0.1791, "lr": 3.4466091914891685e-06, "epoch": 2.1938416234556044, "percentage": 43.88, "elapsed_time": "1:29:44", "remaining_time": "1:54:47", "throughput": 20032.96, "total_tokens": 107870208}
|
|
{"current_steps": 34275, "total_steps": 78105, "loss": 0.1536, "lr": 3.4460921113539046e-06, "epoch": 2.1941617053965814, "percentage": 43.88, "elapsed_time": "1:29:45", "remaining_time": "1:54:46", "throughput": 20033.35, "total_tokens": 107886144}
|
|
{"current_steps": 34280, "total_steps": 78105, "loss": 0.2229, "lr": 3.445574983975729e-06, "epoch": 2.1944817873375584, "percentage": 43.89, "elapsed_time": "1:29:46", "remaining_time": "1:54:45", "throughput": 20033.8, "total_tokens": 107902720}
|
|
{"current_steps": 34285, "total_steps": 78105, "loss": 0.2088, "lr": 3.445057809380463e-06, "epoch": 2.1948018692785354, "percentage": 43.9, "elapsed_time": "1:29:46", "remaining_time": "1:54:44", "throughput": 20034.08, "total_tokens": 107917056}
|
|
{"current_steps": 34290, "total_steps": 78105, "loss": 0.1265, "lr": 3.4445405875939337e-06, "epoch": 2.1951219512195124, "percentage": 43.9, "elapsed_time": "1:29:47", "remaining_time": "1:54:43", "throughput": 20034.42, "total_tokens": 107931968}
|
|
{"current_steps": 34295, "total_steps": 78105, "loss": 0.1757, "lr": 3.4440233186419668e-06, "epoch": 2.195442033160489, "percentage": 43.91, "elapsed_time": "1:29:48", "remaining_time": "1:54:42", "throughput": 20034.86, "total_tokens": 107948288}
|
|
{"current_steps": 34300, "total_steps": 78105, "loss": 0.1888, "lr": 3.4435060025503932e-06, "epoch": 2.195762115101466, "percentage": 43.92, "elapsed_time": "1:29:48", "remaining_time": "1:54:41", "throughput": 20035.23, "total_tokens": 107963584}
|
|
{"current_steps": 34305, "total_steps": 78105, "loss": 0.2399, "lr": 3.442988639345044e-06, "epoch": 2.196082197042443, "percentage": 43.92, "elapsed_time": "1:29:49", "remaining_time": "1:54:41", "throughput": 20035.69, "total_tokens": 107980096}
|
|
{"current_steps": 34310, "total_steps": 78105, "loss": 0.2176, "lr": 3.4424712290517535e-06, "epoch": 2.19640227898342, "percentage": 43.93, "elapsed_time": "1:29:50", "remaining_time": "1:54:40", "throughput": 20035.99, "total_tokens": 107994816}
|
|
{"current_steps": 34315, "total_steps": 78105, "loss": 0.2193, "lr": 3.4419537716963597e-06, "epoch": 2.1967223609243964, "percentage": 43.93, "elapsed_time": "1:29:50", "remaining_time": "1:54:39", "throughput": 20036.32, "total_tokens": 108010048}
|
|
{"current_steps": 34320, "total_steps": 78105, "loss": 0.2667, "lr": 3.4414362673047008e-06, "epoch": 2.1970424428653734, "percentage": 43.94, "elapsed_time": "1:29:51", "remaining_time": "1:54:38", "throughput": 20036.72, "total_tokens": 108025984}
|
|
{"current_steps": 34325, "total_steps": 78105, "loss": 0.1844, "lr": 3.440918715902618e-06, "epoch": 2.1973625248063504, "percentage": 43.95, "elapsed_time": "1:29:52", "remaining_time": "1:54:37", "throughput": 20037.08, "total_tokens": 108041344}
|
|
{"current_steps": 34330, "total_steps": 78105, "loss": 0.2452, "lr": 3.4404011175159564e-06, "epoch": 2.1976826067473274, "percentage": 43.95, "elapsed_time": "1:29:52", "remaining_time": "1:54:36", "throughput": 20037.57, "total_tokens": 108057984}
|
|
{"current_steps": 34335, "total_steps": 78105, "loss": 0.2462, "lr": 3.439883472170561e-06, "epoch": 2.1980026886883044, "percentage": 43.96, "elapsed_time": "1:29:53", "remaining_time": "1:54:35", "throughput": 20037.91, "total_tokens": 108073024}
|
|
{"current_steps": 34340, "total_steps": 78105, "loss": 0.1926, "lr": 3.4393657798922807e-06, "epoch": 2.198322770629281, "percentage": 43.97, "elapsed_time": "1:29:54", "remaining_time": "1:54:34", "throughput": 20038.36, "total_tokens": 108089536}
|
|
{"current_steps": 34345, "total_steps": 78105, "loss": 0.1936, "lr": 3.4388480407069665e-06, "epoch": 2.198642852570258, "percentage": 43.97, "elapsed_time": "1:29:54", "remaining_time": "1:54:33", "throughput": 20038.77, "total_tokens": 108105536}
|
|
{"current_steps": 34350, "total_steps": 78105, "loss": 0.1931, "lr": 3.4383302546404713e-06, "epoch": 2.198962934511235, "percentage": 43.98, "elapsed_time": "1:29:55", "remaining_time": "1:54:32", "throughput": 20039.31, "total_tokens": 108123200}
|
|
{"current_steps": 34355, "total_steps": 78105, "loss": 0.2237, "lr": 3.4378124217186503e-06, "epoch": 2.199283016452212, "percentage": 43.99, "elapsed_time": "1:29:56", "remaining_time": "1:54:31", "throughput": 20039.75, "total_tokens": 108139456}
|
|
{"current_steps": 34360, "total_steps": 78105, "loss": 0.2067, "lr": 3.4372945419673626e-06, "epoch": 2.1996030983931885, "percentage": 43.99, "elapsed_time": "1:29:56", "remaining_time": "1:54:31", "throughput": 20040.19, "total_tokens": 108155648}
|
|
{"current_steps": 34365, "total_steps": 78105, "loss": 0.1792, "lr": 3.436776615412467e-06, "epoch": 2.1999231803341655, "percentage": 44.0, "elapsed_time": "1:29:57", "remaining_time": "1:54:30", "throughput": 20040.61, "total_tokens": 108171712}
|
|
{"current_steps": 34370, "total_steps": 78105, "loss": 0.2654, "lr": 3.436258642079827e-06, "epoch": 2.2002432622751424, "percentage": 44.0, "elapsed_time": "1:29:58", "remaining_time": "1:54:29", "throughput": 20041.04, "total_tokens": 108188032}
|
|
{"current_steps": 34375, "total_steps": 78105, "loss": 0.2347, "lr": 3.435740621995307e-06, "epoch": 2.2005633442161194, "percentage": 44.01, "elapsed_time": "1:29:58", "remaining_time": "1:54:28", "throughput": 20041.35, "total_tokens": 108202432}
|
|
{"current_steps": 34380, "total_steps": 78105, "loss": 0.1676, "lr": 3.4352225551847747e-06, "epoch": 2.2008834261570964, "percentage": 44.02, "elapsed_time": "1:29:59", "remaining_time": "1:54:27", "throughput": 20041.68, "total_tokens": 108217536}
|
|
{"current_steps": 34385, "total_steps": 78105, "loss": 0.2502, "lr": 3.434704441674099e-06, "epoch": 2.201203508098073, "percentage": 44.02, "elapsed_time": "1:30:00", "remaining_time": "1:54:26", "throughput": 20042.26, "total_tokens": 108235072}
|
|
{"current_steps": 34390, "total_steps": 78105, "loss": 0.2983, "lr": 3.434186281489153e-06, "epoch": 2.20152359003905, "percentage": 44.03, "elapsed_time": "1:30:01", "remaining_time": "1:54:25", "throughput": 20042.68, "total_tokens": 108250752}
|
|
{"current_steps": 34395, "total_steps": 78105, "loss": 0.1947, "lr": 3.43366807465581e-06, "epoch": 2.201843671980027, "percentage": 44.04, "elapsed_time": "1:30:01", "remaining_time": "1:54:24", "throughput": 20043.08, "total_tokens": 108266304}
|
|
{"current_steps": 34400, "total_steps": 78105, "loss": 0.1995, "lr": 3.433149821199946e-06, "epoch": 2.202163753921004, "percentage": 44.04, "elapsed_time": "1:30:02", "remaining_time": "1:54:23", "throughput": 20043.51, "total_tokens": 108282304}
|
|
{"current_steps": 34405, "total_steps": 78105, "loss": 0.1841, "lr": 3.4326315211474408e-06, "epoch": 2.2024838358619805, "percentage": 44.05, "elapsed_time": "1:30:03", "remaining_time": "1:54:22", "throughput": 20043.93, "total_tokens": 108298176}
|
|
{"current_steps": 34410, "total_steps": 78105, "loss": 0.1742, "lr": 3.432113174524175e-06, "epoch": 2.2028039178029575, "percentage": 44.06, "elapsed_time": "1:30:03", "remaining_time": "1:54:21", "throughput": 20044.28, "total_tokens": 108313152}
|
|
{"current_steps": 34415, "total_steps": 78105, "loss": 0.1972, "lr": 3.4315947813560337e-06, "epoch": 2.2031239997439345, "percentage": 44.06, "elapsed_time": "1:30:04", "remaining_time": "1:54:20", "throughput": 20044.63, "total_tokens": 108328448}
|
|
{"current_steps": 34420, "total_steps": 78105, "loss": 0.1578, "lr": 3.4310763416689003e-06, "epoch": 2.2034440816849115, "percentage": 44.07, "elapsed_time": "1:30:05", "remaining_time": "1:54:19", "throughput": 20045.04, "total_tokens": 108344128}
|
|
{"current_steps": 34425, "total_steps": 78105, "loss": 0.2849, "lr": 3.4305578554886647e-06, "epoch": 2.2037641636258885, "percentage": 44.08, "elapsed_time": "1:30:05", "remaining_time": "1:54:19", "throughput": 20045.47, "total_tokens": 108360192}
|
|
{"current_steps": 34430, "total_steps": 78105, "loss": 0.187, "lr": 3.4300393228412167e-06, "epoch": 2.204084245566865, "percentage": 44.08, "elapsed_time": "1:30:06", "remaining_time": "1:54:18", "throughput": 20045.8, "total_tokens": 108374784}
|
|
{"current_steps": 34435, "total_steps": 78105, "loss": 0.1217, "lr": 3.4295207437524485e-06, "epoch": 2.204404327507842, "percentage": 44.09, "elapsed_time": "1:30:07", "remaining_time": "1:54:17", "throughput": 20046.17, "total_tokens": 108390144}
|
|
{"current_steps": 34440, "total_steps": 78105, "loss": 0.2001, "lr": 3.429002118248257e-06, "epoch": 2.204724409448819, "percentage": 44.09, "elapsed_time": "1:30:07", "remaining_time": "1:54:16", "throughput": 20046.5, "total_tokens": 108405056}
|
|
{"current_steps": 34445, "total_steps": 78105, "loss": 0.2279, "lr": 3.428483446354539e-06, "epoch": 2.205044491389796, "percentage": 44.1, "elapsed_time": "1:30:08", "remaining_time": "1:54:15", "throughput": 20046.86, "total_tokens": 108420160}
|
|
{"current_steps": 34450, "total_steps": 78105, "loss": 0.1814, "lr": 3.427964728097194e-06, "epoch": 2.2053645733307725, "percentage": 44.11, "elapsed_time": "1:30:08", "remaining_time": "1:54:14", "throughput": 20047.17, "total_tokens": 108434560}
|
|
{"current_steps": 34455, "total_steps": 78105, "loss": 0.2064, "lr": 3.4274459635021236e-06, "epoch": 2.2056846552717495, "percentage": 44.11, "elapsed_time": "1:30:09", "remaining_time": "1:54:13", "throughput": 20047.48, "total_tokens": 108449216}
|
|
{"current_steps": 34460, "total_steps": 78105, "loss": 0.3005, "lr": 3.426927152595232e-06, "epoch": 2.2060047372127265, "percentage": 44.12, "elapsed_time": "1:30:10", "remaining_time": "1:54:12", "throughput": 20047.84, "total_tokens": 108464768}
|
|
{"current_steps": 34465, "total_steps": 78105, "loss": 0.2335, "lr": 3.4264082954024275e-06, "epoch": 2.2063248191537035, "percentage": 44.13, "elapsed_time": "1:30:10", "remaining_time": "1:54:11", "throughput": 20048.15, "total_tokens": 108479296}
|
|
{"current_steps": 34470, "total_steps": 78105, "loss": 0.3266, "lr": 3.4258893919496182e-06, "epoch": 2.20664490109468, "percentage": 44.13, "elapsed_time": "1:30:11", "remaining_time": "1:54:10", "throughput": 20048.48, "total_tokens": 108493888}
|
|
{"current_steps": 34475, "total_steps": 78105, "loss": 0.1955, "lr": 3.4253704422627153e-06, "epoch": 2.206964983035657, "percentage": 44.14, "elapsed_time": "1:30:12", "remaining_time": "1:54:09", "throughput": 20048.85, "total_tokens": 108509248}
|
|
{"current_steps": 34480, "total_steps": 78105, "loss": 0.2876, "lr": 3.424851446367633e-06, "epoch": 2.207285064976634, "percentage": 44.15, "elapsed_time": "1:30:12", "remaining_time": "1:54:08", "throughput": 20049.38, "total_tokens": 108526464}
|
|
{"current_steps": 34485, "total_steps": 78105, "loss": 0.2008, "lr": 3.424332404290286e-06, "epoch": 2.207605146917611, "percentage": 44.15, "elapsed_time": "1:30:13", "remaining_time": "1:54:07", "throughput": 20049.8, "total_tokens": 108542208}
|
|
{"current_steps": 34490, "total_steps": 78105, "loss": 0.232, "lr": 3.4238133160565934e-06, "epoch": 2.207925228858588, "percentage": 44.16, "elapsed_time": "1:30:14", "remaining_time": "1:54:06", "throughput": 20050.16, "total_tokens": 108557056}
|
|
{"current_steps": 34495, "total_steps": 78105, "loss": 0.1912, "lr": 3.4232941816924764e-06, "epoch": 2.2082453107995645, "percentage": 44.16, "elapsed_time": "1:30:14", "remaining_time": "1:54:05", "throughput": 20050.55, "total_tokens": 108572608}
|
|
{"current_steps": 34500, "total_steps": 78105, "loss": 0.2744, "lr": 3.422775001223857e-06, "epoch": 2.2085653927405415, "percentage": 44.17, "elapsed_time": "1:30:15", "remaining_time": "1:54:04", "throughput": 20050.96, "total_tokens": 108588544}
|
|
{"current_steps": 34505, "total_steps": 78105, "loss": 0.1612, "lr": 3.4222557746766604e-06, "epoch": 2.2088854746815185, "percentage": 44.18, "elapsed_time": "1:30:16", "remaining_time": "1:54:03", "throughput": 20051.42, "total_tokens": 108604800}
|
|
{"current_steps": 34510, "total_steps": 78105, "loss": 0.1753, "lr": 3.421736502076815e-06, "epoch": 2.2092055566224955, "percentage": 44.18, "elapsed_time": "1:30:16", "remaining_time": "1:54:03", "throughput": 20051.76, "total_tokens": 108619776}
|
|
{"current_steps": 34515, "total_steps": 78105, "loss": 0.153, "lr": 3.421217183450249e-06, "epoch": 2.209525638563472, "percentage": 44.19, "elapsed_time": "1:30:17", "remaining_time": "1:54:02", "throughput": 20052.18, "total_tokens": 108636032}
|
|
{"current_steps": 34520, "total_steps": 78105, "loss": 0.2578, "lr": 3.4206978188228955e-06, "epoch": 2.209845720504449, "percentage": 44.2, "elapsed_time": "1:30:18", "remaining_time": "1:54:01", "throughput": 20052.54, "total_tokens": 108651520}
|
|
{"current_steps": 34525, "total_steps": 78105, "loss": 0.1968, "lr": 3.420178408220689e-06, "epoch": 2.210165802445426, "percentage": 44.2, "elapsed_time": "1:30:19", "remaining_time": "1:54:00", "throughput": 20053.11, "total_tokens": 108669440}
|
|
{"current_steps": 34530, "total_steps": 78105, "loss": 0.2407, "lr": 3.4196589516695655e-06, "epoch": 2.210485884386403, "percentage": 44.21, "elapsed_time": "1:30:19", "remaining_time": "1:53:59", "throughput": 20053.57, "total_tokens": 108685952}
|
|
{"current_steps": 34535, "total_steps": 78105, "loss": 0.2009, "lr": 3.419139449195465e-06, "epoch": 2.2108059663273796, "percentage": 44.22, "elapsed_time": "1:30:20", "remaining_time": "1:53:58", "throughput": 20053.92, "total_tokens": 108701440}
|
|
{"current_steps": 34540, "total_steps": 78105, "loss": 0.1482, "lr": 3.418619900824327e-06, "epoch": 2.2111260482683566, "percentage": 44.22, "elapsed_time": "1:30:21", "remaining_time": "1:53:57", "throughput": 20054.42, "total_tokens": 108718656}
|
|
{"current_steps": 34545, "total_steps": 78105, "loss": 0.3677, "lr": 3.4181003065820963e-06, "epoch": 2.2114461302093336, "percentage": 44.23, "elapsed_time": "1:30:21", "remaining_time": "1:53:56", "throughput": 20054.93, "total_tokens": 108735680}
|
|
{"current_steps": 34550, "total_steps": 78105, "loss": 0.1548, "lr": 3.4175806664947186e-06, "epoch": 2.2117662121503106, "percentage": 44.24, "elapsed_time": "1:30:22", "remaining_time": "1:53:55", "throughput": 20055.33, "total_tokens": 108751488}
|
|
{"current_steps": 34555, "total_steps": 78105, "loss": 0.2066, "lr": 3.417060980588142e-06, "epoch": 2.2120862940912875, "percentage": 44.24, "elapsed_time": "1:30:23", "remaining_time": "1:53:54", "throughput": 20055.67, "total_tokens": 108766272}
|
|
{"current_steps": 34560, "total_steps": 78105, "loss": 0.2848, "lr": 3.4165412488883173e-06, "epoch": 2.212406376032264, "percentage": 44.25, "elapsed_time": "1:30:23", "remaining_time": "1:53:54", "throughput": 20056.04, "total_tokens": 108781632}
|
|
{"current_steps": 34565, "total_steps": 78105, "loss": 0.1604, "lr": 3.416021471421196e-06, "epoch": 2.212726457973241, "percentage": 44.25, "elapsed_time": "1:30:24", "remaining_time": "1:53:53", "throughput": 20056.45, "total_tokens": 108797376}
|
|
{"current_steps": 34570, "total_steps": 78105, "loss": 0.1568, "lr": 3.415501648212734e-06, "epoch": 2.213046539914218, "percentage": 44.26, "elapsed_time": "1:30:25", "remaining_time": "1:53:52", "throughput": 20056.97, "total_tokens": 108814656}
|
|
{"current_steps": 34575, "total_steps": 78105, "loss": 0.2778, "lr": 3.414981779288888e-06, "epoch": 2.213366621855195, "percentage": 44.27, "elapsed_time": "1:30:25", "remaining_time": "1:53:51", "throughput": 20057.34, "total_tokens": 108829888}
|
|
{"current_steps": 34580, "total_steps": 78105, "loss": 0.2578, "lr": 3.4144618646756187e-06, "epoch": 2.2136867037961716, "percentage": 44.27, "elapsed_time": "1:30:26", "remaining_time": "1:53:50", "throughput": 20057.72, "total_tokens": 108845376}
|
|
{"current_steps": 34585, "total_steps": 78105, "loss": 0.1357, "lr": 3.413941904398887e-06, "epoch": 2.2140067857371486, "percentage": 44.28, "elapsed_time": "1:30:27", "remaining_time": "1:53:49", "throughput": 20058.11, "total_tokens": 108861056}
|
|
{"current_steps": 34590, "total_steps": 78105, "loss": 0.1876, "lr": 3.413421898484657e-06, "epoch": 2.2143268676781256, "percentage": 44.29, "elapsed_time": "1:30:27", "remaining_time": "1:53:48", "throughput": 20058.46, "total_tokens": 108876096}
|
|
{"current_steps": 34595, "total_steps": 78105, "loss": 0.2488, "lr": 3.412901846958895e-06, "epoch": 2.2146469496191026, "percentage": 44.29, "elapsed_time": "1:30:28", "remaining_time": "1:53:47", "throughput": 20058.9, "total_tokens": 108892288}
|
|
{"current_steps": 34600, "total_steps": 78105, "loss": 0.3183, "lr": 3.41238174984757e-06, "epoch": 2.2149670315600796, "percentage": 44.3, "elapsed_time": "1:30:29", "remaining_time": "1:53:46", "throughput": 20059.25, "total_tokens": 108907456}
|
|
{"current_steps": 34605, "total_steps": 78105, "loss": 0.1888, "lr": 3.4118616071766526e-06, "epoch": 2.215287113501056, "percentage": 44.31, "elapsed_time": "1:30:29", "remaining_time": "1:53:45", "throughput": 20059.72, "total_tokens": 108924224}
|
|
{"current_steps": 34610, "total_steps": 78105, "loss": 0.1472, "lr": 3.411341418972116e-06, "epoch": 2.215607195442033, "percentage": 44.31, "elapsed_time": "1:30:30", "remaining_time": "1:53:44", "throughput": 20060.12, "total_tokens": 108939776}
|
|
{"current_steps": 34615, "total_steps": 78105, "loss": 0.2511, "lr": 3.410821185259937e-06, "epoch": 2.21592727738301, "percentage": 44.32, "elapsed_time": "1:30:31", "remaining_time": "1:53:43", "throughput": 20060.44, "total_tokens": 108954560}
|
|
{"current_steps": 34620, "total_steps": 78105, "loss": 0.1248, "lr": 3.4103009060660917e-06, "epoch": 2.216247359323987, "percentage": 44.32, "elapsed_time": "1:30:31", "remaining_time": "1:53:42", "throughput": 20060.85, "total_tokens": 108970304}
|
|
{"current_steps": 34625, "total_steps": 78105, "loss": 0.2162, "lr": 3.4097805814165608e-06, "epoch": 2.2165674412649636, "percentage": 44.33, "elapsed_time": "1:30:32", "remaining_time": "1:53:41", "throughput": 20061.17, "total_tokens": 108984832}
|
|
{"current_steps": 34630, "total_steps": 78105, "loss": 0.1608, "lr": 3.4092602113373265e-06, "epoch": 2.2168875232059406, "percentage": 44.34, "elapsed_time": "1:30:33", "remaining_time": "1:53:41", "throughput": 20061.57, "total_tokens": 109000576}
|
|
{"current_steps": 34635, "total_steps": 78105, "loss": 0.2242, "lr": 3.4087397958543735e-06, "epoch": 2.2172076051469176, "percentage": 44.34, "elapsed_time": "1:30:34", "remaining_time": "1:53:40", "throughput": 20062.01, "total_tokens": 109017152}
|
|
{"current_steps": 34640, "total_steps": 78105, "loss": 0.2537, "lr": 3.4082193349936885e-06, "epoch": 2.2175276870878946, "percentage": 44.35, "elapsed_time": "1:30:34", "remaining_time": "1:53:39", "throughput": 20062.36, "total_tokens": 109032448}
|
|
{"current_steps": 34645, "total_steps": 78105, "loss": 0.2881, "lr": 3.407698828781261e-06, "epoch": 2.2178477690288716, "percentage": 44.36, "elapsed_time": "1:30:35", "remaining_time": "1:53:38", "throughput": 20062.69, "total_tokens": 109047296}
|
|
{"current_steps": 34650, "total_steps": 78105, "loss": 0.2061, "lr": 3.4071782772430816e-06, "epoch": 2.218167850969848, "percentage": 44.36, "elapsed_time": "1:30:35", "remaining_time": "1:53:37", "throughput": 20063.04, "total_tokens": 109062528}
|
|
{"current_steps": 34655, "total_steps": 78105, "loss": 0.2075, "lr": 3.4066576804051445e-06, "epoch": 2.218487932910825, "percentage": 44.37, "elapsed_time": "1:30:36", "remaining_time": "1:53:36", "throughput": 20063.37, "total_tokens": 109077376}
|
|
{"current_steps": 34660, "total_steps": 78105, "loss": 0.1857, "lr": 3.406137038293445e-06, "epoch": 2.218808014851802, "percentage": 44.38, "elapsed_time": "1:30:37", "remaining_time": "1:53:35", "throughput": 20063.79, "total_tokens": 109093824}
|
|
{"current_steps": 34665, "total_steps": 78105, "loss": 0.2564, "lr": 3.4056163509339816e-06, "epoch": 2.219128096792779, "percentage": 44.38, "elapsed_time": "1:30:38", "remaining_time": "1:53:34", "throughput": 20064.3, "total_tokens": 109111104}
|
|
{"current_steps": 34670, "total_steps": 78105, "loss": 0.1952, "lr": 3.4050956183527556e-06, "epoch": 2.2194481787337557, "percentage": 44.39, "elapsed_time": "1:30:38", "remaining_time": "1:53:33", "throughput": 20064.66, "total_tokens": 109126528}
|
|
{"current_steps": 34675, "total_steps": 78105, "loss": 0.1603, "lr": 3.4045748405757683e-06, "epoch": 2.2197682606747327, "percentage": 44.4, "elapsed_time": "1:30:39", "remaining_time": "1:53:32", "throughput": 20065.07, "total_tokens": 109142464}
|
|
{"current_steps": 34680, "total_steps": 78105, "loss": 0.2068, "lr": 3.404054017629026e-06, "epoch": 2.2200883426157096, "percentage": 44.4, "elapsed_time": "1:30:40", "remaining_time": "1:53:31", "throughput": 20065.45, "total_tokens": 109158144}
|
|
{"current_steps": 34685, "total_steps": 78105, "loss": 0.146, "lr": 3.403533149538535e-06, "epoch": 2.2204084245566866, "percentage": 44.41, "elapsed_time": "1:30:40", "remaining_time": "1:53:30", "throughput": 20065.87, "total_tokens": 109173952}
|
|
{"current_steps": 34690, "total_steps": 78105, "loss": 0.1901, "lr": 3.4030122363303037e-06, "epoch": 2.2207285064976636, "percentage": 44.41, "elapsed_time": "1:30:41", "remaining_time": "1:53:30", "throughput": 20066.3, "total_tokens": 109190144}
|
|
{"current_steps": 34695, "total_steps": 78105, "loss": 0.3357, "lr": 3.4024912780303457e-06, "epoch": 2.22104858843864, "percentage": 44.42, "elapsed_time": "1:30:42", "remaining_time": "1:53:29", "throughput": 20066.71, "total_tokens": 109206080}
|
|
{"current_steps": 34700, "total_steps": 78105, "loss": 0.1982, "lr": 3.4019702746646745e-06, "epoch": 2.221368670379617, "percentage": 44.43, "elapsed_time": "1:30:42", "remaining_time": "1:53:28", "throughput": 20067.13, "total_tokens": 109222528}
|
|
{"current_steps": 34705, "total_steps": 78105, "loss": 0.2625, "lr": 3.401449226259306e-06, "epoch": 2.221688752320594, "percentage": 44.43, "elapsed_time": "1:30:43", "remaining_time": "1:53:27", "throughput": 20067.46, "total_tokens": 109237376}
|
|
{"current_steps": 34710, "total_steps": 78105, "loss": 0.2085, "lr": 3.4009281328402583e-06, "epoch": 2.222008834261571, "percentage": 44.44, "elapsed_time": "1:30:44", "remaining_time": "1:53:26", "throughput": 20067.85, "total_tokens": 109253120}
|
|
{"current_steps": 34715, "total_steps": 78105, "loss": 0.2405, "lr": 3.4004069944335515e-06, "epoch": 2.2223289162025477, "percentage": 44.45, "elapsed_time": "1:30:44", "remaining_time": "1:53:25", "throughput": 20068.27, "total_tokens": 109269184}
|
|
{"current_steps": 34720, "total_steps": 78105, "loss": 0.213, "lr": 3.39988581106521e-06, "epoch": 2.2226489981435247, "percentage": 44.45, "elapsed_time": "1:30:45", "remaining_time": "1:53:24", "throughput": 20068.59, "total_tokens": 109284096}
|
|
{"current_steps": 34725, "total_steps": 78105, "loss": 0.3387, "lr": 3.399364582761259e-06, "epoch": 2.2229690800845017, "percentage": 44.46, "elapsed_time": "1:30:46", "remaining_time": "1:53:23", "throughput": 20068.93, "total_tokens": 109299200}
|
|
{"current_steps": 34730, "total_steps": 78105, "loss": 0.1595, "lr": 3.3988433095477252e-06, "epoch": 2.2232891620254787, "percentage": 44.47, "elapsed_time": "1:30:46", "remaining_time": "1:53:22", "throughput": 20069.29, "total_tokens": 109314176}
|
|
{"current_steps": 34735, "total_steps": 78105, "loss": 0.3225, "lr": 3.398321991450639e-06, "epoch": 2.223609243966455, "percentage": 44.47, "elapsed_time": "1:30:47", "remaining_time": "1:53:21", "throughput": 20069.62, "total_tokens": 109329344}
|
|
{"current_steps": 34740, "total_steps": 78105, "loss": 0.2605, "lr": 3.3978006284960306e-06, "epoch": 2.223929325907432, "percentage": 44.48, "elapsed_time": "1:30:48", "remaining_time": "1:53:20", "throughput": 20070.01, "total_tokens": 109345280}
|
|
{"current_steps": 34745, "total_steps": 78105, "loss": 0.1708, "lr": 3.397279220709936e-06, "epoch": 2.224249407848409, "percentage": 44.48, "elapsed_time": "1:30:48", "remaining_time": "1:53:19", "throughput": 20070.45, "total_tokens": 109361664}
|
|
{"current_steps": 34750, "total_steps": 78105, "loss": 0.2029, "lr": 3.3967577681183906e-06, "epoch": 2.224569489789386, "percentage": 44.49, "elapsed_time": "1:30:49", "remaining_time": "1:53:19", "throughput": 20070.84, "total_tokens": 109377216}
|
|
{"current_steps": 34755, "total_steps": 78105, "loss": 0.304, "lr": 3.396236270747433e-06, "epoch": 2.224889571730363, "percentage": 44.5, "elapsed_time": "1:30:50", "remaining_time": "1:53:18", "throughput": 20071.26, "total_tokens": 109393216}
|
|
{"current_steps": 34760, "total_steps": 78105, "loss": 0.213, "lr": 3.395714728623105e-06, "epoch": 2.2252096536713397, "percentage": 44.5, "elapsed_time": "1:30:50", "remaining_time": "1:53:17", "throughput": 20071.68, "total_tokens": 109409536}
|
|
{"current_steps": 34765, "total_steps": 78105, "loss": 0.1936, "lr": 3.395193141771449e-06, "epoch": 2.2255297356123167, "percentage": 44.51, "elapsed_time": "1:30:51", "remaining_time": "1:53:16", "throughput": 20072.2, "total_tokens": 109426624}
|
|
{"current_steps": 34770, "total_steps": 78105, "loss": 0.2805, "lr": 3.39467151021851e-06, "epoch": 2.2258498175532937, "percentage": 44.52, "elapsed_time": "1:30:52", "remaining_time": "1:53:15", "throughput": 20072.66, "total_tokens": 109443328}
|
|
{"current_steps": 34775, "total_steps": 78105, "loss": 0.1941, "lr": 3.3941498339903357e-06, "epoch": 2.2261698994942707, "percentage": 44.52, "elapsed_time": "1:30:53", "remaining_time": "1:53:14", "throughput": 20073.06, "total_tokens": 109459456}
|
|
{"current_steps": 34780, "total_steps": 78105, "loss": 0.1622, "lr": 3.3936281131129762e-06, "epoch": 2.2264899814352472, "percentage": 44.53, "elapsed_time": "1:30:53", "remaining_time": "1:53:13", "throughput": 20073.49, "total_tokens": 109475968}
|
|
{"current_steps": 34785, "total_steps": 78105, "loss": 0.1761, "lr": 3.3931063476124845e-06, "epoch": 2.2268100633762242, "percentage": 44.54, "elapsed_time": "1:30:54", "remaining_time": "1:53:12", "throughput": 20073.92, "total_tokens": 109492160}
|
|
{"current_steps": 34790, "total_steps": 78105, "loss": 0.2776, "lr": 3.392584537514913e-06, "epoch": 2.227130145317201, "percentage": 44.54, "elapsed_time": "1:30:55", "remaining_time": "1:53:11", "throughput": 20074.2, "total_tokens": 109506560}
|
|
{"current_steps": 34795, "total_steps": 78105, "loss": 0.1537, "lr": 3.392062682846319e-06, "epoch": 2.227450227258178, "percentage": 44.55, "elapsed_time": "1:30:55", "remaining_time": "1:53:10", "throughput": 20074.56, "total_tokens": 109521600}
|
|
{"current_steps": 34800, "total_steps": 78105, "loss": 0.2741, "lr": 3.391540783632762e-06, "epoch": 2.2277703091991548, "percentage": 44.56, "elapsed_time": "1:30:56", "remaining_time": "1:53:09", "throughput": 20074.83, "total_tokens": 109535616}
|
|
{"current_steps": 34805, "total_steps": 78105, "loss": 0.2724, "lr": 3.391018839900301e-06, "epoch": 2.2280903911401317, "percentage": 44.56, "elapsed_time": "1:30:57", "remaining_time": "1:53:08", "throughput": 20075.25, "total_tokens": 109551488}
|
|
{"current_steps": 34810, "total_steps": 78105, "loss": 0.2132, "lr": 3.3904968516750002e-06, "epoch": 2.2284104730811087, "percentage": 44.57, "elapsed_time": "1:30:57", "remaining_time": "1:53:08", "throughput": 20075.58, "total_tokens": 109566592}
|
|
{"current_steps": 34815, "total_steps": 78105, "loss": 0.283, "lr": 3.3899748189829263e-06, "epoch": 2.2287305550220857, "percentage": 44.57, "elapsed_time": "1:30:58", "remaining_time": "1:53:07", "throughput": 20075.97, "total_tokens": 109581952}
|
|
{"current_steps": 34820, "total_steps": 78105, "loss": 0.1845, "lr": 3.389452741850145e-06, "epoch": 2.2290506369630627, "percentage": 44.58, "elapsed_time": "1:30:59", "remaining_time": "1:53:06", "throughput": 20076.35, "total_tokens": 109597632}
|
|
{"current_steps": 34825, "total_steps": 78105, "loss": 0.1912, "lr": 3.3889306203027266e-06, "epoch": 2.2293707189040393, "percentage": 44.59, "elapsed_time": "1:30:59", "remaining_time": "1:53:05", "throughput": 20076.74, "total_tokens": 109613248}
|
|
{"current_steps": 34830, "total_steps": 78105, "loss": 0.2176, "lr": 3.388408454366743e-06, "epoch": 2.2296908008450163, "percentage": 44.59, "elapsed_time": "1:31:00", "remaining_time": "1:53:04", "throughput": 20077.15, "total_tokens": 109628992}
|
|
{"current_steps": 34835, "total_steps": 78105, "loss": 0.1843, "lr": 3.387886244068269e-06, "epoch": 2.2300108827859932, "percentage": 44.6, "elapsed_time": "1:31:01", "remaining_time": "1:53:03", "throughput": 20077.52, "total_tokens": 109644352}
|
|
{"current_steps": 34840, "total_steps": 78105, "loss": 0.2576, "lr": 3.387363989433381e-06, "epoch": 2.2303309647269702, "percentage": 44.61, "elapsed_time": "1:31:01", "remaining_time": "1:53:02", "throughput": 20077.97, "total_tokens": 109660736}
|
|
{"current_steps": 34845, "total_steps": 78105, "loss": 0.2127, "lr": 3.3868416904881575e-06, "epoch": 2.230651046667947, "percentage": 44.61, "elapsed_time": "1:31:02", "remaining_time": "1:53:01", "throughput": 20078.28, "total_tokens": 109675648}
|
|
{"current_steps": 34850, "total_steps": 78105, "loss": 0.3412, "lr": 3.3863193472586797e-06, "epoch": 2.2309711286089238, "percentage": 44.62, "elapsed_time": "1:31:03", "remaining_time": "1:53:00", "throughput": 20078.66, "total_tokens": 109691264}
|
|
{"current_steps": 34855, "total_steps": 78105, "loss": 0.2122, "lr": 3.38579695977103e-06, "epoch": 2.2312912105499008, "percentage": 44.63, "elapsed_time": "1:31:03", "remaining_time": "1:52:59", "throughput": 20079.15, "total_tokens": 109708544}
|
|
{"current_steps": 34860, "total_steps": 78105, "loss": 0.2122, "lr": 3.3852745280512933e-06, "epoch": 2.2316112924908778, "percentage": 44.63, "elapsed_time": "1:31:04", "remaining_time": "1:52:58", "throughput": 20079.52, "total_tokens": 109723968}
|
|
{"current_steps": 34865, "total_steps": 78105, "loss": 0.3039, "lr": 3.384752052125559e-06, "epoch": 2.2319313744318547, "percentage": 44.64, "elapsed_time": "1:31:05", "remaining_time": "1:52:57", "throughput": 20079.9, "total_tokens": 109739456}
|
|
{"current_steps": 34870, "total_steps": 78105, "loss": 0.2535, "lr": 3.384229532019915e-06, "epoch": 2.2322514563728313, "percentage": 44.65, "elapsed_time": "1:31:05", "remaining_time": "1:52:57", "throughput": 20080.29, "total_tokens": 109755200}
|
|
{"current_steps": 34875, "total_steps": 78105, "loss": 0.176, "lr": 3.383706967760455e-06, "epoch": 2.2325715383138083, "percentage": 44.65, "elapsed_time": "1:31:06", "remaining_time": "1:52:56", "throughput": 20080.66, "total_tokens": 109770944}
|
|
{"current_steps": 34880, "total_steps": 78105, "loss": 0.1979, "lr": 3.3831843593732715e-06, "epoch": 2.2328916202547853, "percentage": 44.66, "elapsed_time": "1:31:07", "remaining_time": "1:52:55", "throughput": 20081.07, "total_tokens": 109786624}
|
|
{"current_steps": 34885, "total_steps": 78105, "loss": 0.2653, "lr": 3.382661706884461e-06, "epoch": 2.2332117021957623, "percentage": 44.66, "elapsed_time": "1:31:07", "remaining_time": "1:52:54", "throughput": 20081.41, "total_tokens": 109801344}
|
|
{"current_steps": 34890, "total_steps": 78105, "loss": 0.3099, "lr": 3.382139010320123e-06, "epoch": 2.233531784136739, "percentage": 44.67, "elapsed_time": "1:31:08", "remaining_time": "1:52:53", "throughput": 20081.87, "total_tokens": 109818048}
|
|
{"current_steps": 34895, "total_steps": 78105, "loss": 0.1537, "lr": 3.381616269706357e-06, "epoch": 2.233851866077716, "percentage": 44.68, "elapsed_time": "1:31:09", "remaining_time": "1:52:52", "throughput": 20082.38, "total_tokens": 109835328}
|
|
{"current_steps": 34900, "total_steps": 78105, "loss": 0.1815, "lr": 3.381093485069267e-06, "epoch": 2.234171948018693, "percentage": 44.68, "elapsed_time": "1:31:09", "remaining_time": "1:52:51", "throughput": 20082.85, "total_tokens": 109852160}
|
|
{"current_steps": 34905, "total_steps": 78105, "loss": 0.2513, "lr": 3.3805706564349583e-06, "epoch": 2.23449202995967, "percentage": 44.69, "elapsed_time": "1:31:10", "remaining_time": "1:52:50", "throughput": 20083.29, "total_tokens": 109868352}
|
|
{"current_steps": 34910, "total_steps": 78105, "loss": 0.2768, "lr": 3.3800477838295375e-06, "epoch": 2.2348121119006468, "percentage": 44.7, "elapsed_time": "1:31:11", "remaining_time": "1:52:49", "throughput": 20083.62, "total_tokens": 109883200}
|
|
{"current_steps": 34915, "total_steps": 78105, "loss": 0.1918, "lr": 3.379524867279115e-06, "epoch": 2.2351321938416233, "percentage": 44.7, "elapsed_time": "1:31:11", "remaining_time": "1:52:48", "throughput": 20084.01, "total_tokens": 109898944}
|
|
{"current_steps": 34920, "total_steps": 78105, "loss": 0.2025, "lr": 3.3790019068098e-06, "epoch": 2.2354522757826003, "percentage": 44.71, "elapsed_time": "1:31:12", "remaining_time": "1:52:47", "throughput": 20084.36, "total_tokens": 109913920}
|
|
{"current_steps": 34925, "total_steps": 78105, "loss": 0.2429, "lr": 3.37847890244771e-06, "epoch": 2.2357723577235773, "percentage": 44.72, "elapsed_time": "1:31:13", "remaining_time": "1:52:47", "throughput": 20084.85, "total_tokens": 109931008}
|
|
{"current_steps": 34930, "total_steps": 78105, "loss": 0.1842, "lr": 3.3779558542189595e-06, "epoch": 2.2360924396645543, "percentage": 44.72, "elapsed_time": "1:31:13", "remaining_time": "1:52:46", "throughput": 20085.22, "total_tokens": 109946368}
|
|
{"current_steps": 34935, "total_steps": 78105, "loss": 0.2256, "lr": 3.377432762149666e-06, "epoch": 2.236412521605531, "percentage": 44.73, "elapsed_time": "1:31:14", "remaining_time": "1:52:45", "throughput": 20085.53, "total_tokens": 109960960}
|
|
{"current_steps": 34940, "total_steps": 78105, "loss": 0.1823, "lr": 3.376909626265951e-06, "epoch": 2.236732603546508, "percentage": 44.73, "elapsed_time": "1:31:15", "remaining_time": "1:52:44", "throughput": 20086.05, "total_tokens": 109978368}
|
|
{"current_steps": 34945, "total_steps": 78105, "loss": 0.215, "lr": 3.3763864465939367e-06, "epoch": 2.237052685487485, "percentage": 44.74, "elapsed_time": "1:31:16", "remaining_time": "1:52:43", "throughput": 20086.47, "total_tokens": 109994304}
|
|
{"current_steps": 34950, "total_steps": 78105, "loss": 0.234, "lr": 3.375863223159749e-06, "epoch": 2.237372767428462, "percentage": 44.75, "elapsed_time": "1:31:16", "remaining_time": "1:52:42", "throughput": 20086.8, "total_tokens": 110009280}
|
|
{"current_steps": 34955, "total_steps": 78105, "loss": 0.2539, "lr": 3.3753399559895127e-06, "epoch": 2.237692849369439, "percentage": 44.75, "elapsed_time": "1:31:17", "remaining_time": "1:52:41", "throughput": 20087.34, "total_tokens": 110026816}
|
|
{"current_steps": 34960, "total_steps": 78105, "loss": 0.2392, "lr": 3.3748166451093596e-06, "epoch": 2.2380129313104153, "percentage": 44.76, "elapsed_time": "1:31:18", "remaining_time": "1:52:40", "throughput": 20087.7, "total_tokens": 110042240}
|
|
{"current_steps": 34965, "total_steps": 78105, "loss": 0.259, "lr": 3.3742932905454194e-06, "epoch": 2.2383330132513923, "percentage": 44.77, "elapsed_time": "1:31:18", "remaining_time": "1:52:39", "throughput": 20088.04, "total_tokens": 110057600}
|
|
{"current_steps": 34970, "total_steps": 78105, "loss": 0.1604, "lr": 3.3737698923238273e-06, "epoch": 2.2386530951923693, "percentage": 44.77, "elapsed_time": "1:31:19", "remaining_time": "1:52:38", "throughput": 20088.46, "total_tokens": 110073856}
|
|
{"current_steps": 34975, "total_steps": 78105, "loss": 0.2358, "lr": 3.3732464504707164e-06, "epoch": 2.2389731771333463, "percentage": 44.78, "elapsed_time": "1:31:20", "remaining_time": "1:52:37", "throughput": 20088.83, "total_tokens": 110089536}
|
|
{"current_steps": 34980, "total_steps": 78105, "loss": 0.3496, "lr": 3.3727229650122274e-06, "epoch": 2.239293259074323, "percentage": 44.79, "elapsed_time": "1:31:20", "remaining_time": "1:52:37", "throughput": 20089.24, "total_tokens": 110105536}
|
|
{"current_steps": 34985, "total_steps": 78105, "loss": 0.2086, "lr": 3.3721994359745e-06, "epoch": 2.2396133410153, "percentage": 44.79, "elapsed_time": "1:31:21", "remaining_time": "1:52:36", "throughput": 20089.68, "total_tokens": 110121920}
|
|
{"current_steps": 34990, "total_steps": 78105, "loss": 0.2684, "lr": 3.371675863383675e-06, "epoch": 2.239933422956277, "percentage": 44.8, "elapsed_time": "1:31:22", "remaining_time": "1:52:35", "throughput": 20090.01, "total_tokens": 110136704}
|
|
{"current_steps": 34995, "total_steps": 78105, "loss": 0.204, "lr": 3.371152247265898e-06, "epoch": 2.240253504897254, "percentage": 44.81, "elapsed_time": "1:31:22", "remaining_time": "1:52:34", "throughput": 20090.46, "total_tokens": 110153472}
|
|
{"current_steps": 35000, "total_steps": 78105, "loss": 0.184, "lr": 3.370628587647315e-06, "epoch": 2.2405735868382304, "percentage": 44.81, "elapsed_time": "1:31:23", "remaining_time": "1:52:33", "throughput": 20090.82, "total_tokens": 110168768}
|
|
{"current_steps": 35005, "total_steps": 78105, "loss": 0.2724, "lr": 3.3701048845540757e-06, "epoch": 2.2408936687792074, "percentage": 44.82, "elapsed_time": "1:31:24", "remaining_time": "1:52:32", "throughput": 20091.27, "total_tokens": 110185216}
|
|
{"current_steps": 35010, "total_steps": 78105, "loss": 0.219, "lr": 3.369581138012331e-06, "epoch": 2.2412137507201844, "percentage": 44.82, "elapsed_time": "1:31:24", "remaining_time": "1:52:31", "throughput": 20091.65, "total_tokens": 110200576}
|
|
{"current_steps": 35015, "total_steps": 78105, "loss": 0.2744, "lr": 3.369057348048233e-06, "epoch": 2.2415338326611614, "percentage": 44.83, "elapsed_time": "1:31:25", "remaining_time": "1:52:30", "throughput": 20092.02, "total_tokens": 110216192}
|
|
{"current_steps": 35020, "total_steps": 78105, "loss": 0.1438, "lr": 3.3685335146879384e-06, "epoch": 2.2418539146021383, "percentage": 44.84, "elapsed_time": "1:31:26", "remaining_time": "1:52:29", "throughput": 20092.37, "total_tokens": 110231296}
|
|
{"current_steps": 35025, "total_steps": 78105, "loss": 0.1885, "lr": 3.3680096379576037e-06, "epoch": 2.242173996543115, "percentage": 44.84, "elapsed_time": "1:31:26", "remaining_time": "1:52:28", "throughput": 20092.7, "total_tokens": 110246144}
|
|
{"current_steps": 35030, "total_steps": 78105, "loss": 0.1477, "lr": 3.367485717883389e-06, "epoch": 2.242494078484092, "percentage": 44.85, "elapsed_time": "1:31:27", "remaining_time": "1:52:27", "throughput": 20093.06, "total_tokens": 110261440}
|
|
{"current_steps": 35035, "total_steps": 78105, "loss": 0.2131, "lr": 3.3669617544914562e-06, "epoch": 2.242814160425069, "percentage": 44.86, "elapsed_time": "1:31:28", "remaining_time": "1:52:26", "throughput": 20093.39, "total_tokens": 110276416}
|
|
{"current_steps": 35040, "total_steps": 78105, "loss": 0.1741, "lr": 3.36643774780797e-06, "epoch": 2.243134242366046, "percentage": 44.86, "elapsed_time": "1:31:28", "remaining_time": "1:52:25", "throughput": 20093.69, "total_tokens": 110290944}
|
|
{"current_steps": 35045, "total_steps": 78105, "loss": 0.263, "lr": 3.3659136978590946e-06, "epoch": 2.2434543243070224, "percentage": 44.87, "elapsed_time": "1:31:29", "remaining_time": "1:52:24", "throughput": 20094.05, "total_tokens": 110306048}
|
|
{"current_steps": 35050, "total_steps": 78105, "loss": 0.2051, "lr": 3.365389604671e-06, "epoch": 2.2437744062479994, "percentage": 44.88, "elapsed_time": "1:31:30", "remaining_time": "1:52:24", "throughput": 20094.44, "total_tokens": 110322240}
|
|
{"current_steps": 35055, "total_steps": 78105, "loss": 0.2442, "lr": 3.364865468269857e-06, "epoch": 2.2440944881889764, "percentage": 44.88, "elapsed_time": "1:31:30", "remaining_time": "1:52:23", "throughput": 20094.89, "total_tokens": 110338560}
|
|
{"current_steps": 35060, "total_steps": 78105, "loss": 0.2658, "lr": 3.364341288681836e-06, "epoch": 2.2444145701299534, "percentage": 44.89, "elapsed_time": "1:31:31", "remaining_time": "1:52:22", "throughput": 20095.24, "total_tokens": 110353728}
|
|
{"current_steps": 35065, "total_steps": 78105, "loss": 0.3488, "lr": 3.3638170659331138e-06, "epoch": 2.24473465207093, "percentage": 44.89, "elapsed_time": "1:31:32", "remaining_time": "1:52:21", "throughput": 20095.65, "total_tokens": 110369472}
|
|
{"current_steps": 35070, "total_steps": 78105, "loss": 0.2753, "lr": 3.3632928000498667e-06, "epoch": 2.245054734011907, "percentage": 44.9, "elapsed_time": "1:31:32", "remaining_time": "1:52:20", "throughput": 20096.0, "total_tokens": 110384832}
|
|
{"current_steps": 35075, "total_steps": 78105, "loss": 0.2681, "lr": 3.3627684910582737e-06, "epoch": 2.245374815952884, "percentage": 44.91, "elapsed_time": "1:31:33", "remaining_time": "1:52:19", "throughput": 20096.34, "total_tokens": 110399872}
|
|
{"current_steps": 35080, "total_steps": 78105, "loss": 0.2491, "lr": 3.3622441389845166e-06, "epoch": 2.245694897893861, "percentage": 44.91, "elapsed_time": "1:31:34", "remaining_time": "1:52:18", "throughput": 20096.77, "total_tokens": 110416064}
|
|
{"current_steps": 35085, "total_steps": 78105, "loss": 0.1607, "lr": 3.3617197438547787e-06, "epoch": 2.246014979834838, "percentage": 44.92, "elapsed_time": "1:31:34", "remaining_time": "1:52:17", "throughput": 20097.18, "total_tokens": 110432064}
|
|
{"current_steps": 35090, "total_steps": 78105, "loss": 0.2389, "lr": 3.361195305695245e-06, "epoch": 2.2463350617758144, "percentage": 44.93, "elapsed_time": "1:31:35", "remaining_time": "1:52:16", "throughput": 20097.59, "total_tokens": 110448256}
|
|
{"current_steps": 35095, "total_steps": 78105, "loss": 0.2655, "lr": 3.3606708245321035e-06, "epoch": 2.2466551437167914, "percentage": 44.93, "elapsed_time": "1:31:36", "remaining_time": "1:52:15", "throughput": 20098.01, "total_tokens": 110464576}
|
|
{"current_steps": 35100, "total_steps": 78105, "loss": 0.2452, "lr": 3.360146300391545e-06, "epoch": 2.2469752256577684, "percentage": 44.94, "elapsed_time": "1:31:36", "remaining_time": "1:52:14", "throughput": 20098.34, "total_tokens": 110479680}
|
|
{"current_steps": 35105, "total_steps": 78105, "loss": 0.3262, "lr": 3.3596217332997594e-06, "epoch": 2.2472953075987454, "percentage": 44.95, "elapsed_time": "1:31:37", "remaining_time": "1:52:14", "throughput": 20098.72, "total_tokens": 110495296}
|
|
{"current_steps": 35110, "total_steps": 78105, "loss": 0.2149, "lr": 3.359097123282943e-06, "epoch": 2.247615389539722, "percentage": 44.95, "elapsed_time": "1:31:38", "remaining_time": "1:52:13", "throughput": 20099.16, "total_tokens": 110511232}
|
|
{"current_steps": 35115, "total_steps": 78105, "loss": 0.241, "lr": 3.3585724703672894e-06, "epoch": 2.247935471480699, "percentage": 44.96, "elapsed_time": "1:31:38", "remaining_time": "1:52:12", "throughput": 20099.56, "total_tokens": 110527232}
|
|
{"current_steps": 35120, "total_steps": 78105, "loss": 0.2237, "lr": 3.358047774579e-06, "epoch": 2.248255553421676, "percentage": 44.97, "elapsed_time": "1:31:39", "remaining_time": "1:52:11", "throughput": 20099.9, "total_tokens": 110542464}
|
|
{"current_steps": 35125, "total_steps": 78105, "loss": 0.2935, "lr": 3.3575230359442747e-06, "epoch": 2.248575635362653, "percentage": 44.97, "elapsed_time": "1:31:40", "remaining_time": "1:52:10", "throughput": 20100.3, "total_tokens": 110558336}
|
|
{"current_steps": 35130, "total_steps": 78105, "loss": 0.3091, "lr": 3.3569982544893144e-06, "epoch": 2.24889571730363, "percentage": 44.98, "elapsed_time": "1:31:41", "remaining_time": "1:52:09", "throughput": 20100.64, "total_tokens": 110573632}
|
|
{"current_steps": 35135, "total_steps": 78105, "loss": 0.2674, "lr": 3.356473430240326e-06, "epoch": 2.2492157992446065, "percentage": 44.98, "elapsed_time": "1:31:41", "remaining_time": "1:52:08", "throughput": 20101.05, "total_tokens": 110590080}
|
|
{"current_steps": 35140, "total_steps": 78105, "loss": 0.2304, "lr": 3.3559485632235155e-06, "epoch": 2.2495358811855835, "percentage": 44.99, "elapsed_time": "1:31:42", "remaining_time": "1:52:07", "throughput": 20101.52, "total_tokens": 110606464}
|
|
{"current_steps": 35145, "total_steps": 78105, "loss": 0.1817, "lr": 3.355423653465093e-06, "epoch": 2.2498559631265604, "percentage": 45.0, "elapsed_time": "1:31:43", "remaining_time": "1:52:06", "throughput": 20101.85, "total_tokens": 110621376}
|
|
{"current_steps": 35150, "total_steps": 78105, "loss": 0.2663, "lr": 3.3548987009912676e-06, "epoch": 2.2501760450675374, "percentage": 45.0, "elapsed_time": "1:31:43", "remaining_time": "1:52:05", "throughput": 20102.23, "total_tokens": 110637184}
|
|
{"current_steps": 35154, "total_steps": 78105, "eval_loss": 0.5257287621498108, "epoch": 2.2504321106203187, "percentage": 45.01, "elapsed_time": "1:32:35", "remaining_time": "1:53:07", "throughput": 19918.1, "total_tokens": 110649216}
|
|
{"current_steps": 35155, "total_steps": 78105, "loss": 0.2498, "lr": 3.354373705828255e-06, "epoch": 2.250496127008514, "percentage": 45.01, "elapsed_time": "1:33:17", "remaining_time": "1:53:58", "throughput": 19767.82, "total_tokens": 110652352}
|
|
{"current_steps": 35160, "total_steps": 78105, "loss": 0.2259, "lr": 3.3538486680022695e-06, "epoch": 2.250816208949491, "percentage": 45.02, "elapsed_time": "1:33:18", "remaining_time": "1:53:57", "throughput": 19768.15, "total_tokens": 110667328}
|
|
{"current_steps": 35165, "total_steps": 78105, "loss": 0.2664, "lr": 3.3533235875395293e-06, "epoch": 2.251136290890468, "percentage": 45.02, "elapsed_time": "1:33:18", "remaining_time": "1:53:56", "throughput": 19768.5, "total_tokens": 110682432}
|
|
{"current_steps": 35170, "total_steps": 78105, "loss": 0.1614, "lr": 3.3527984644662538e-06, "epoch": 2.251456372831445, "percentage": 45.03, "elapsed_time": "1:33:19", "remaining_time": "1:53:55", "throughput": 19768.88, "total_tokens": 110697856}
|
|
{"current_steps": 35175, "total_steps": 78105, "loss": 0.2603, "lr": 3.352273298808665e-06, "epoch": 2.251776454772422, "percentage": 45.04, "elapsed_time": "1:33:20", "remaining_time": "1:53:54", "throughput": 19769.24, "total_tokens": 110712832}
|
|
{"current_steps": 35180, "total_steps": 78105, "loss": 0.1891, "lr": 3.351748090592987e-06, "epoch": 2.2520965367133985, "percentage": 45.04, "elapsed_time": "1:33:20", "remaining_time": "1:53:53", "throughput": 19769.57, "total_tokens": 110727552}
|
|
{"current_steps": 35185, "total_steps": 78105, "loss": 0.2633, "lr": 3.351222839845446e-06, "epoch": 2.2524166186543755, "percentage": 45.05, "elapsed_time": "1:33:21", "remaining_time": "1:53:53", "throughput": 19770.01, "total_tokens": 110743680}
|
|
{"current_steps": 35190, "total_steps": 78105, "loss": 0.1996, "lr": 3.3506975465922697e-06, "epoch": 2.2527367005953525, "percentage": 45.05, "elapsed_time": "1:33:22", "remaining_time": "1:53:52", "throughput": 19770.54, "total_tokens": 110760512}
|
|
{"current_steps": 35195, "total_steps": 78105, "loss": 0.2406, "lr": 3.3501722108596896e-06, "epoch": 2.2530567825363295, "percentage": 45.06, "elapsed_time": "1:33:22", "remaining_time": "1:53:51", "throughput": 19770.93, "total_tokens": 110775616}
|
|
{"current_steps": 35200, "total_steps": 78105, "loss": 0.1835, "lr": 3.3496468326739377e-06, "epoch": 2.253376864477306, "percentage": 45.07, "elapsed_time": "1:33:23", "remaining_time": "1:53:50", "throughput": 19771.33, "total_tokens": 110790912}
|
|
{"current_steps": 35205, "total_steps": 78105, "loss": 0.2762, "lr": 3.349121412061248e-06, "epoch": 2.253696946418283, "percentage": 45.07, "elapsed_time": "1:33:24", "remaining_time": "1:53:49", "throughput": 19771.73, "total_tokens": 110806208}
|
|
{"current_steps": 35210, "total_steps": 78105, "loss": 0.2097, "lr": 3.3485959490478587e-06, "epoch": 2.25401702835926, "percentage": 45.08, "elapsed_time": "1:33:24", "remaining_time": "1:53:48", "throughput": 19772.14, "total_tokens": 110822080}
|
|
{"current_steps": 35215, "total_steps": 78105, "loss": 0.3238, "lr": 3.3480704436600083e-06, "epoch": 2.254337110300237, "percentage": 45.09, "elapsed_time": "1:33:25", "remaining_time": "1:53:47", "throughput": 19772.63, "total_tokens": 110838720}
|
|
{"current_steps": 35220, "total_steps": 78105, "loss": 0.2346, "lr": 3.3475448959239364e-06, "epoch": 2.254657192241214, "percentage": 45.09, "elapsed_time": "1:33:26", "remaining_time": "1:53:46", "throughput": 19773.13, "total_tokens": 110855488}
|
|
{"current_steps": 35225, "total_steps": 78105, "loss": 0.321, "lr": 3.3470193058658874e-06, "epoch": 2.2549772741821905, "percentage": 45.1, "elapsed_time": "1:33:27", "remaining_time": "1:53:45", "throughput": 19773.52, "total_tokens": 110870784}
|
|
{"current_steps": 35230, "total_steps": 78105, "loss": 0.2382, "lr": 3.346493673512106e-06, "epoch": 2.2552973561231675, "percentage": 45.11, "elapsed_time": "1:33:27", "remaining_time": "1:53:44", "throughput": 19773.9, "total_tokens": 110886016}
|
|
{"current_steps": 35235, "total_steps": 78105, "loss": 0.1633, "lr": 3.34596799888884e-06, "epoch": 2.2556174380641445, "percentage": 45.11, "elapsed_time": "1:33:28", "remaining_time": "1:53:43", "throughput": 19774.3, "total_tokens": 110901568}
|
|
{"current_steps": 35240, "total_steps": 78105, "loss": 0.1878, "lr": 3.345442282022339e-06, "epoch": 2.2559375200051215, "percentage": 45.12, "elapsed_time": "1:33:29", "remaining_time": "1:53:42", "throughput": 19774.72, "total_tokens": 110917056}
|
|
{"current_steps": 35245, "total_steps": 78105, "loss": 0.1487, "lr": 3.344916522938853e-06, "epoch": 2.256257601946098, "percentage": 45.13, "elapsed_time": "1:33:29", "remaining_time": "1:53:41", "throughput": 19775.16, "total_tokens": 110932992}
|
|
{"current_steps": 35250, "total_steps": 78105, "loss": 0.2015, "lr": 3.3443907216646378e-06, "epoch": 2.256577683887075, "percentage": 45.13, "elapsed_time": "1:33:30", "remaining_time": "1:53:40", "throughput": 19775.55, "total_tokens": 110947904}
|
|
{"current_steps": 35255, "total_steps": 78105, "loss": 0.1884, "lr": 3.3438648782259487e-06, "epoch": 2.256897765828052, "percentage": 45.14, "elapsed_time": "1:33:31", "remaining_time": "1:53:39", "throughput": 19775.95, "total_tokens": 110963392}
|
|
{"current_steps": 35260, "total_steps": 78105, "loss": 0.2113, "lr": 3.343338992649042e-06, "epoch": 2.257217847769029, "percentage": 45.14, "elapsed_time": "1:33:31", "remaining_time": "1:53:38", "throughput": 19776.35, "total_tokens": 110979200}
|
|
{"current_steps": 35265, "total_steps": 78105, "loss": 0.2611, "lr": 3.342813064960179e-06, "epoch": 2.257537929710006, "percentage": 45.15, "elapsed_time": "1:33:32", "remaining_time": "1:53:38", "throughput": 19776.85, "total_tokens": 110996544}
|
|
{"current_steps": 35270, "total_steps": 78105, "loss": 0.1967, "lr": 3.3422870951856222e-06, "epoch": 2.2578580116509825, "percentage": 45.16, "elapsed_time": "1:33:33", "remaining_time": "1:53:37", "throughput": 19777.34, "total_tokens": 111013376}
|
|
{"current_steps": 35275, "total_steps": 78105, "loss": 0.1391, "lr": 3.3417610833516346e-06, "epoch": 2.2581780935919595, "percentage": 45.16, "elapsed_time": "1:33:33", "remaining_time": "1:53:36", "throughput": 19777.68, "total_tokens": 111028608}
|
|
{"current_steps": 35280, "total_steps": 78105, "loss": 0.214, "lr": 3.3412350294844836e-06, "epoch": 2.2584981755329365, "percentage": 45.17, "elapsed_time": "1:33:34", "remaining_time": "1:53:35", "throughput": 19778.06, "total_tokens": 111044480}
|
|
{"current_steps": 35285, "total_steps": 78105, "loss": 0.1921, "lr": 3.340708933610437e-06, "epoch": 2.2588182574739135, "percentage": 45.18, "elapsed_time": "1:33:35", "remaining_time": "1:53:34", "throughput": 19778.43, "total_tokens": 111059840}
|
|
{"current_steps": 35290, "total_steps": 78105, "loss": 0.1358, "lr": 3.340182795755765e-06, "epoch": 2.25913833941489, "percentage": 45.18, "elapsed_time": "1:33:35", "remaining_time": "1:53:33", "throughput": 19778.73, "total_tokens": 111074368}
|
|
{"current_steps": 35295, "total_steps": 78105, "loss": 0.3094, "lr": 3.3396566159467404e-06, "epoch": 2.259458421355867, "percentage": 45.19, "elapsed_time": "1:33:36", "remaining_time": "1:53:32", "throughput": 19779.18, "total_tokens": 111090432}
|
|
{"current_steps": 35300, "total_steps": 78105, "loss": 0.1856, "lr": 3.339130394209639e-06, "epoch": 2.259778503296844, "percentage": 45.2, "elapsed_time": "1:33:37", "remaining_time": "1:53:31", "throughput": 19779.59, "total_tokens": 111105920}
|
|
{"current_steps": 35305, "total_steps": 78105, "loss": 0.3488, "lr": 3.338604130570735e-06, "epoch": 2.260098585237821, "percentage": 45.2, "elapsed_time": "1:33:37", "remaining_time": "1:53:30", "throughput": 19779.97, "total_tokens": 111121216}
|
|
{"current_steps": 35310, "total_steps": 78105, "loss": 0.1892, "lr": 3.3380778250563097e-06, "epoch": 2.2604186671787976, "percentage": 45.21, "elapsed_time": "1:33:38", "remaining_time": "1:53:29", "throughput": 19780.38, "total_tokens": 111136896}
|
|
{"current_steps": 35315, "total_steps": 78105, "loss": 0.1966, "lr": 3.337551477692643e-06, "epoch": 2.2607387491197746, "percentage": 45.21, "elapsed_time": "1:33:39", "remaining_time": "1:53:28", "throughput": 19780.84, "total_tokens": 111153152}
|
|
{"current_steps": 35320, "total_steps": 78105, "loss": 0.3031, "lr": 3.3370250885060183e-06, "epoch": 2.2610588310607516, "percentage": 45.22, "elapsed_time": "1:33:39", "remaining_time": "1:53:27", "throughput": 19781.31, "total_tokens": 111169728}
|
|
{"current_steps": 35325, "total_steps": 78105, "loss": 0.1913, "lr": 3.336498657522721e-06, "epoch": 2.2613789130017286, "percentage": 45.23, "elapsed_time": "1:33:40", "remaining_time": "1:53:26", "throughput": 19781.81, "total_tokens": 111186816}
|
|
{"current_steps": 35330, "total_steps": 78105, "loss": 0.3084, "lr": 3.3359721847690375e-06, "epoch": 2.261698994942705, "percentage": 45.23, "elapsed_time": "1:33:41", "remaining_time": "1:53:25", "throughput": 19782.3, "total_tokens": 111203136}
|
|
{"current_steps": 35335, "total_steps": 78105, "loss": 0.2074, "lr": 3.335445670271258e-06, "epoch": 2.262019076883682, "percentage": 45.24, "elapsed_time": "1:33:42", "remaining_time": "1:53:24", "throughput": 19782.69, "total_tokens": 111218880}
|
|
{"current_steps": 35340, "total_steps": 78105, "loss": 0.2325, "lr": 3.334919114055673e-06, "epoch": 2.262339158824659, "percentage": 45.25, "elapsed_time": "1:33:42", "remaining_time": "1:53:24", "throughput": 19783.11, "total_tokens": 111234624}
|
|
{"current_steps": 35345, "total_steps": 78105, "loss": 0.2509, "lr": 3.3343925161485757e-06, "epoch": 2.262659240765636, "percentage": 45.25, "elapsed_time": "1:33:43", "remaining_time": "1:53:23", "throughput": 19783.62, "total_tokens": 111251712}
|
|
{"current_steps": 35350, "total_steps": 78105, "loss": 0.1662, "lr": 3.3338658765762635e-06, "epoch": 2.262979322706613, "percentage": 45.26, "elapsed_time": "1:33:44", "remaining_time": "1:53:22", "throughput": 19783.92, "total_tokens": 111266176}
|
|
{"current_steps": 35355, "total_steps": 78105, "loss": 0.1905, "lr": 3.3333391953650326e-06, "epoch": 2.2632994046475896, "percentage": 45.27, "elapsed_time": "1:33:44", "remaining_time": "1:53:21", "throughput": 19784.31, "total_tokens": 111281472}
|
|
{"current_steps": 35360, "total_steps": 78105, "loss": 0.1882, "lr": 3.3328124725411825e-06, "epoch": 2.2636194865885666, "percentage": 45.27, "elapsed_time": "1:33:45", "remaining_time": "1:53:20", "throughput": 19784.71, "total_tokens": 111297024}
|
|
{"current_steps": 35365, "total_steps": 78105, "loss": 0.3215, "lr": 3.3322857081310158e-06, "epoch": 2.2639395685295436, "percentage": 45.28, "elapsed_time": "1:33:46", "remaining_time": "1:53:19", "throughput": 19785.07, "total_tokens": 111311936}
|
|
{"current_steps": 35370, "total_steps": 78105, "loss": 0.2659, "lr": 3.3317589021608353e-06, "epoch": 2.2642596504705206, "percentage": 45.29, "elapsed_time": "1:33:46", "remaining_time": "1:53:18", "throughput": 19785.55, "total_tokens": 111328576}
|
|
{"current_steps": 35375, "total_steps": 78105, "loss": 0.1803, "lr": 3.3312320546569486e-06, "epoch": 2.264579732411497, "percentage": 45.29, "elapsed_time": "1:33:47", "remaining_time": "1:53:17", "throughput": 19785.95, "total_tokens": 111344000}
|
|
{"current_steps": 35380, "total_steps": 78105, "loss": 0.2751, "lr": 3.3307051656456624e-06, "epoch": 2.264899814352474, "percentage": 45.3, "elapsed_time": "1:33:48", "remaining_time": "1:53:16", "throughput": 19786.38, "total_tokens": 111359936}
|
|
{"current_steps": 35385, "total_steps": 78105, "loss": 0.1414, "lr": 3.3301782351532874e-06, "epoch": 2.265219896293451, "percentage": 45.3, "elapsed_time": "1:33:48", "remaining_time": "1:53:15", "throughput": 19786.76, "total_tokens": 111375488}
|
|
{"current_steps": 35390, "total_steps": 78105, "loss": 0.2949, "lr": 3.329651263206136e-06, "epoch": 2.265539978234428, "percentage": 45.31, "elapsed_time": "1:33:49", "remaining_time": "1:53:14", "throughput": 19787.19, "total_tokens": 111391552}
|
|
{"current_steps": 35395, "total_steps": 78105, "loss": 0.2348, "lr": 3.3291242498305215e-06, "epoch": 2.265860060175405, "percentage": 45.32, "elapsed_time": "1:33:50", "remaining_time": "1:53:13", "throughput": 19787.55, "total_tokens": 111406976}
|
|
{"current_steps": 35400, "total_steps": 78105, "loss": 0.2822, "lr": 3.328597195052761e-06, "epoch": 2.2661801421163816, "percentage": 45.32, "elapsed_time": "1:33:50", "remaining_time": "1:53:12", "throughput": 19788.0, "total_tokens": 111423360}
|
|
{"current_steps": 35405, "total_steps": 78105, "loss": 0.2325, "lr": 3.328070098899172e-06, "epoch": 2.2665002240573586, "percentage": 45.33, "elapsed_time": "1:33:51", "remaining_time": "1:53:11", "throughput": 19788.38, "total_tokens": 111438272}
|
|
{"current_steps": 35410, "total_steps": 78105, "loss": 0.2491, "lr": 3.327542961396076e-06, "epoch": 2.2668203059983356, "percentage": 45.34, "elapsed_time": "1:33:52", "remaining_time": "1:53:10", "throughput": 19788.71, "total_tokens": 111453120}
|
|
{"current_steps": 35415, "total_steps": 78105, "loss": 0.2223, "lr": 3.327015782569795e-06, "epoch": 2.2671403879393126, "percentage": 45.34, "elapsed_time": "1:33:52", "remaining_time": "1:53:09", "throughput": 19789.1, "total_tokens": 111468608}
|
|
{"current_steps": 35420, "total_steps": 78105, "loss": 0.1699, "lr": 3.3264885624466537e-06, "epoch": 2.267460469880289, "percentage": 45.35, "elapsed_time": "1:33:53", "remaining_time": "1:53:08", "throughput": 19789.43, "total_tokens": 111483200}
|
|
{"current_steps": 35425, "total_steps": 78105, "loss": 0.2718, "lr": 3.325961301052978e-06, "epoch": 2.267780551821266, "percentage": 45.36, "elapsed_time": "1:33:54", "remaining_time": "1:53:08", "throughput": 19789.82, "total_tokens": 111499328}
|
|
{"current_steps": 35430, "total_steps": 78105, "loss": 0.1949, "lr": 3.325433998415098e-06, "epoch": 2.268100633762243, "percentage": 45.36, "elapsed_time": "1:33:54", "remaining_time": "1:53:07", "throughput": 19790.21, "total_tokens": 111514880}
|
|
{"current_steps": 35435, "total_steps": 78105, "loss": 0.2217, "lr": 3.3249066545593428e-06, "epoch": 2.26842071570322, "percentage": 45.37, "elapsed_time": "1:33:55", "remaining_time": "1:53:06", "throughput": 19790.54, "total_tokens": 111529920}
|
|
{"current_steps": 35440, "total_steps": 78105, "loss": 0.231, "lr": 3.324379269512047e-06, "epoch": 2.268740797644197, "percentage": 45.37, "elapsed_time": "1:33:56", "remaining_time": "1:53:05", "throughput": 19791.17, "total_tokens": 111548736}
|
|
{"current_steps": 35445, "total_steps": 78105, "loss": 0.171, "lr": 3.323851843299544e-06, "epoch": 2.2690608795851737, "percentage": 45.38, "elapsed_time": "1:33:56", "remaining_time": "1:53:04", "throughput": 19791.55, "total_tokens": 111564032}
|
|
{"current_steps": 35450, "total_steps": 78105, "loss": 0.1846, "lr": 3.3233243759481717e-06, "epoch": 2.2693809615261507, "percentage": 45.39, "elapsed_time": "1:33:57", "remaining_time": "1:53:03", "throughput": 19792.07, "total_tokens": 111581184}
|
|
{"current_steps": 35455, "total_steps": 78105, "loss": 0.1711, "lr": 3.322796867484268e-06, "epoch": 2.2697010434671276, "percentage": 45.39, "elapsed_time": "1:33:58", "remaining_time": "1:53:02", "throughput": 19792.42, "total_tokens": 111596416}
|
|
{"current_steps": 35460, "total_steps": 78105, "loss": 0.2638, "lr": 3.3222693179341743e-06, "epoch": 2.2700211254081046, "percentage": 45.4, "elapsed_time": "1:33:58", "remaining_time": "1:53:01", "throughput": 19792.75, "total_tokens": 111611136}
|
|
{"current_steps": 35465, "total_steps": 78105, "loss": 0.2347, "lr": 3.3217417273242346e-06, "epoch": 2.270341207349081, "percentage": 45.41, "elapsed_time": "1:33:59", "remaining_time": "1:53:00", "throughput": 19793.08, "total_tokens": 111625920}
|
|
{"current_steps": 35470, "total_steps": 78105, "loss": 0.2005, "lr": 3.321214095680793e-06, "epoch": 2.270661289290058, "percentage": 45.41, "elapsed_time": "1:34:00", "remaining_time": "1:52:59", "throughput": 19793.47, "total_tokens": 111641536}
|
|
{"current_steps": 35475, "total_steps": 78105, "loss": 0.2397, "lr": 3.3206864230301974e-06, "epoch": 2.270981371231035, "percentage": 45.42, "elapsed_time": "1:34:01", "remaining_time": "1:52:58", "throughput": 19793.86, "total_tokens": 111657216}
|
|
{"current_steps": 35480, "total_steps": 78105, "loss": 0.213, "lr": 3.320158709398796e-06, "epoch": 2.271301453172012, "percentage": 45.43, "elapsed_time": "1:34:01", "remaining_time": "1:52:57", "throughput": 19794.21, "total_tokens": 111672192}
|
|
{"current_steps": 35485, "total_steps": 78105, "loss": 0.2734, "lr": 3.319630954812941e-06, "epoch": 2.271621535112989, "percentage": 45.43, "elapsed_time": "1:34:02", "remaining_time": "1:52:56", "throughput": 19794.58, "total_tokens": 111687488}
|
|
{"current_steps": 35490, "total_steps": 78105, "loss": 0.2439, "lr": 3.319103159298985e-06, "epoch": 2.2719416170539657, "percentage": 45.44, "elapsed_time": "1:34:02", "remaining_time": "1:52:55", "throughput": 19794.96, "total_tokens": 111702976}
|
|
{"current_steps": 35495, "total_steps": 78105, "loss": 0.2182, "lr": 3.318575322883284e-06, "epoch": 2.2722616989949427, "percentage": 45.45, "elapsed_time": "1:34:03", "remaining_time": "1:52:54", "throughput": 19795.37, "total_tokens": 111718592}
|
|
{"current_steps": 35500, "total_steps": 78105, "loss": 0.181, "lr": 3.3180474455921958e-06, "epoch": 2.2725817809359197, "percentage": 45.45, "elapsed_time": "1:34:04", "remaining_time": "1:52:54", "throughput": 19795.76, "total_tokens": 111734592}
|
|
{"current_steps": 35505, "total_steps": 78105, "loss": 0.2401, "lr": 3.3175195274520786e-06, "epoch": 2.2729018628768967, "percentage": 45.46, "elapsed_time": "1:34:05", "remaining_time": "1:52:53", "throughput": 19796.28, "total_tokens": 111752064}
|
|
{"current_steps": 35510, "total_steps": 78105, "loss": 0.2504, "lr": 3.316991568489295e-06, "epoch": 2.273221944817873, "percentage": 45.46, "elapsed_time": "1:34:05", "remaining_time": "1:52:52", "throughput": 19796.62, "total_tokens": 111766976}
|
|
{"current_steps": 35515, "total_steps": 78105, "loss": 0.1993, "lr": 3.3164635687302084e-06, "epoch": 2.27354202675885, "percentage": 45.47, "elapsed_time": "1:34:06", "remaining_time": "1:52:51", "throughput": 19797.02, "total_tokens": 111782848}
|
|
{"current_steps": 35520, "total_steps": 78105, "loss": 0.2055, "lr": 3.315935528201183e-06, "epoch": 2.273862108699827, "percentage": 45.48, "elapsed_time": "1:34:07", "remaining_time": "1:52:50", "throughput": 19797.46, "total_tokens": 111799296}
|
|
{"current_steps": 35525, "total_steps": 78105, "loss": 0.1978, "lr": 3.3154074469285876e-06, "epoch": 2.274182190640804, "percentage": 45.48, "elapsed_time": "1:34:07", "remaining_time": "1:52:49", "throughput": 19797.87, "total_tokens": 111815424}
|
|
{"current_steps": 35530, "total_steps": 78105, "loss": 0.236, "lr": 3.314879324938792e-06, "epoch": 2.274502272581781, "percentage": 45.49, "elapsed_time": "1:34:08", "remaining_time": "1:52:48", "throughput": 19798.33, "total_tokens": 111831936}
|
|
{"current_steps": 35535, "total_steps": 78105, "loss": 0.2359, "lr": 3.314351162258167e-06, "epoch": 2.2748223545227577, "percentage": 45.5, "elapsed_time": "1:34:09", "remaining_time": "1:52:47", "throughput": 19798.64, "total_tokens": 111846848}
|
|
{"current_steps": 35540, "total_steps": 78105, "loss": 0.1994, "lr": 3.3138229589130867e-06, "epoch": 2.2751424364637347, "percentage": 45.5, "elapsed_time": "1:34:09", "remaining_time": "1:52:46", "throughput": 19799.05, "total_tokens": 111862784}
|
|
{"current_steps": 35545, "total_steps": 78105, "loss": 0.2478, "lr": 3.313294714929927e-06, "epoch": 2.2754625184047117, "percentage": 45.51, "elapsed_time": "1:34:10", "remaining_time": "1:52:45", "throughput": 19799.55, "total_tokens": 111879872}
|
|
{"current_steps": 35550, "total_steps": 78105, "loss": 0.2394, "lr": 3.3127664303350653e-06, "epoch": 2.2757826003456887, "percentage": 45.52, "elapsed_time": "1:34:11", "remaining_time": "1:52:44", "throughput": 19799.93, "total_tokens": 111895360}
|
|
{"current_steps": 35555, "total_steps": 78105, "loss": 0.1685, "lr": 3.312238105154882e-06, "epoch": 2.2761026822866652, "percentage": 45.52, "elapsed_time": "1:34:11", "remaining_time": "1:52:43", "throughput": 19800.36, "total_tokens": 111911296}
|
|
{"current_steps": 35560, "total_steps": 78105, "loss": 0.184, "lr": 3.3117097394157583e-06, "epoch": 2.2764227642276422, "percentage": 45.53, "elapsed_time": "1:34:12", "remaining_time": "1:52:43", "throughput": 19800.94, "total_tokens": 111929024}
|
|
{"current_steps": 35565, "total_steps": 78105, "loss": 0.2587, "lr": 3.311181333144079e-06, "epoch": 2.276742846168619, "percentage": 45.53, "elapsed_time": "1:34:13", "remaining_time": "1:52:42", "throughput": 19801.35, "total_tokens": 111944704}
|
|
{"current_steps": 35570, "total_steps": 78105, "loss": 0.2375, "lr": 3.3106528863662286e-06, "epoch": 2.277062928109596, "percentage": 45.54, "elapsed_time": "1:34:14", "remaining_time": "1:52:41", "throughput": 19801.84, "total_tokens": 111961600}
|
|
{"current_steps": 35575, "total_steps": 78105, "loss": 0.1761, "lr": 3.310124399108595e-06, "epoch": 2.2773830100505728, "percentage": 45.55, "elapsed_time": "1:34:14", "remaining_time": "1:52:40", "throughput": 19802.25, "total_tokens": 111976960}
|
|
{"current_steps": 35580, "total_steps": 78105, "loss": 0.1755, "lr": 3.3095958713975697e-06, "epoch": 2.2777030919915497, "percentage": 45.55, "elapsed_time": "1:34:15", "remaining_time": "1:52:39", "throughput": 19802.68, "total_tokens": 111992576}
|
|
{"current_steps": 35585, "total_steps": 78105, "loss": 0.1489, "lr": 3.3090673032595435e-06, "epoch": 2.2780231739325267, "percentage": 45.56, "elapsed_time": "1:34:16", "remaining_time": "1:52:38", "throughput": 19803.09, "total_tokens": 112008128}
|
|
{"current_steps": 35590, "total_steps": 78105, "loss": 0.176, "lr": 3.3085386947209103e-06, "epoch": 2.2783432558735037, "percentage": 45.57, "elapsed_time": "1:34:16", "remaining_time": "1:52:37", "throughput": 19803.47, "total_tokens": 112023424}
|
|
{"current_steps": 35595, "total_steps": 78105, "loss": 0.1759, "lr": 3.3080100458080655e-06, "epoch": 2.2786633378144803, "percentage": 45.57, "elapsed_time": "1:34:17", "remaining_time": "1:52:36", "throughput": 19803.99, "total_tokens": 112040768}
|
|
{"current_steps": 35600, "total_steps": 78105, "loss": 0.1716, "lr": 3.3074813565474084e-06, "epoch": 2.2789834197554573, "percentage": 45.58, "elapsed_time": "1:34:18", "remaining_time": "1:52:35", "throughput": 19804.39, "total_tokens": 112056448}
|
|
{"current_steps": 35605, "total_steps": 78105, "loss": 0.1691, "lr": 3.3069526269653386e-06, "epoch": 2.2793035016964343, "percentage": 45.59, "elapsed_time": "1:34:18", "remaining_time": "1:52:34", "throughput": 19804.76, "total_tokens": 112071808}
|
|
{"current_steps": 35610, "total_steps": 78105, "loss": 0.2409, "lr": 3.3064238570882573e-06, "epoch": 2.2796235836374112, "percentage": 45.59, "elapsed_time": "1:34:19", "remaining_time": "1:52:33", "throughput": 19805.2, "total_tokens": 112087680}
|
|
{"current_steps": 35615, "total_steps": 78105, "loss": 0.1783, "lr": 3.3058950469425694e-06, "epoch": 2.2799436655783882, "percentage": 45.6, "elapsed_time": "1:34:20", "remaining_time": "1:52:32", "throughput": 19805.67, "total_tokens": 112104384}
|
|
{"current_steps": 35620, "total_steps": 78105, "loss": 0.163, "lr": 3.3053661965546807e-06, "epoch": 2.280263747519365, "percentage": 45.61, "elapsed_time": "1:34:20", "remaining_time": "1:52:31", "throughput": 19806.13, "total_tokens": 112120768}
|
|
{"current_steps": 35625, "total_steps": 78105, "loss": 0.2691, "lr": 3.3048373059509993e-06, "epoch": 2.2805838294603418, "percentage": 45.61, "elapsed_time": "1:34:21", "remaining_time": "1:52:30", "throughput": 19806.53, "total_tokens": 112136384}
|
|
{"current_steps": 35630, "total_steps": 78105, "loss": 0.2316, "lr": 3.304308375157935e-06, "epoch": 2.2809039114013188, "percentage": 45.62, "elapsed_time": "1:34:22", "remaining_time": "1:52:30", "throughput": 19806.91, "total_tokens": 112151552}
|
|
{"current_steps": 35635, "total_steps": 78105, "loss": 0.2818, "lr": 3.3037794042018995e-06, "epoch": 2.2812239933422958, "percentage": 45.62, "elapsed_time": "1:34:22", "remaining_time": "1:52:29", "throughput": 19807.28, "total_tokens": 112166848}
|
|
{"current_steps": 35640, "total_steps": 78105, "loss": 0.1925, "lr": 3.303250393109307e-06, "epoch": 2.2815440752832723, "percentage": 45.63, "elapsed_time": "1:34:23", "remaining_time": "1:52:28", "throughput": 19807.66, "total_tokens": 112182336}
|
|
{"current_steps": 35645, "total_steps": 78105, "loss": 0.2369, "lr": 3.302721341906575e-06, "epoch": 2.2818641572242493, "percentage": 45.64, "elapsed_time": "1:34:24", "remaining_time": "1:52:27", "throughput": 19808.05, "total_tokens": 112198272}
|
|
{"current_steps": 35650, "total_steps": 78105, "loss": 0.2024, "lr": 3.30219225062012e-06, "epoch": 2.2821842391652263, "percentage": 45.64, "elapsed_time": "1:34:24", "remaining_time": "1:52:26", "throughput": 19808.41, "total_tokens": 112213376}
|
|
{"current_steps": 35655, "total_steps": 78105, "loss": 0.2214, "lr": 3.301663119276362e-06, "epoch": 2.2825043211062033, "percentage": 45.65, "elapsed_time": "1:34:25", "remaining_time": "1:52:25", "throughput": 19808.73, "total_tokens": 112228032}
|
|
{"current_steps": 35660, "total_steps": 78105, "loss": 0.2142, "lr": 3.3011339479017242e-06, "epoch": 2.2828244030471803, "percentage": 45.66, "elapsed_time": "1:34:26", "remaining_time": "1:52:24", "throughput": 19809.09, "total_tokens": 112242752}
|
|
{"current_steps": 35665, "total_steps": 78105, "loss": 0.2561, "lr": 3.300604736522629e-06, "epoch": 2.283144484988157, "percentage": 45.66, "elapsed_time": "1:34:26", "remaining_time": "1:52:23", "throughput": 19809.53, "total_tokens": 112258880}
|
|
{"current_steps": 35670, "total_steps": 78105, "loss": 0.2027, "lr": 3.300075485165504e-06, "epoch": 2.283464566929134, "percentage": 45.67, "elapsed_time": "1:34:27", "remaining_time": "1:52:22", "throughput": 19810.0, "total_tokens": 112275136}
|
|
{"current_steps": 35675, "total_steps": 78105, "loss": 0.3039, "lr": 3.2995461938567773e-06, "epoch": 2.283784648870111, "percentage": 45.68, "elapsed_time": "1:34:28", "remaining_time": "1:52:21", "throughput": 19810.39, "total_tokens": 112290752}
|
|
{"current_steps": 35680, "total_steps": 78105, "loss": 0.1981, "lr": 3.299016862622878e-06, "epoch": 2.284104730811088, "percentage": 45.68, "elapsed_time": "1:34:28", "remaining_time": "1:52:20", "throughput": 19810.76, "total_tokens": 112305856}
|
|
{"current_steps": 35685, "total_steps": 78105, "loss": 0.1854, "lr": 3.298487491490238e-06, "epoch": 2.2844248127520643, "percentage": 45.69, "elapsed_time": "1:34:29", "remaining_time": "1:52:19", "throughput": 19811.2, "total_tokens": 112321984}
|
|
{"current_steps": 35690, "total_steps": 78105, "loss": 0.1716, "lr": 3.2979580804852923e-06, "epoch": 2.2847448946930413, "percentage": 45.69, "elapsed_time": "1:34:30", "remaining_time": "1:52:18", "throughput": 19811.6, "total_tokens": 112337280}
|
|
{"current_steps": 35695, "total_steps": 78105, "loss": 0.229, "lr": 3.2974286296344763e-06, "epoch": 2.2850649766340183, "percentage": 45.7, "elapsed_time": "1:34:31", "remaining_time": "1:52:17", "throughput": 19812.13, "total_tokens": 112354688}
|
|
{"current_steps": 35700, "total_steps": 78105, "loss": 0.1905, "lr": 3.2968991389642286e-06, "epoch": 2.2853850585749953, "percentage": 45.71, "elapsed_time": "1:34:31", "remaining_time": "1:52:16", "throughput": 19812.47, "total_tokens": 112369472}
|
|
{"current_steps": 35705, "total_steps": 78105, "loss": 0.217, "lr": 3.2963696085009884e-06, "epoch": 2.2857051405159723, "percentage": 45.71, "elapsed_time": "1:34:32", "remaining_time": "1:52:15", "throughput": 19812.92, "total_tokens": 112386048}
|
|
{"current_steps": 35710, "total_steps": 78105, "loss": 0.2465, "lr": 3.2958400382711984e-06, "epoch": 2.286025222456949, "percentage": 45.72, "elapsed_time": "1:34:33", "remaining_time": "1:52:15", "throughput": 19813.29, "total_tokens": 112401536}
|
|
{"current_steps": 35715, "total_steps": 78105, "loss": 0.1618, "lr": 3.2953104283013016e-06, "epoch": 2.286345304397926, "percentage": 45.73, "elapsed_time": "1:34:33", "remaining_time": "1:52:14", "throughput": 19813.67, "total_tokens": 112416896}
|
|
{"current_steps": 35720, "total_steps": 78105, "loss": 0.1625, "lr": 3.294780778617745e-06, "epoch": 2.286665386338903, "percentage": 45.73, "elapsed_time": "1:34:34", "remaining_time": "1:52:13", "throughput": 19814.09, "total_tokens": 112432704}
|
|
{"current_steps": 35725, "total_steps": 78105, "loss": 0.1489, "lr": 3.294251089246976e-06, "epoch": 2.28698546827988, "percentage": 45.74, "elapsed_time": "1:34:35", "remaining_time": "1:52:12", "throughput": 19814.47, "total_tokens": 112448320}
|
|
{"current_steps": 35730, "total_steps": 78105, "loss": 0.248, "lr": 3.293721360215445e-06, "epoch": 2.2873055502208564, "percentage": 45.75, "elapsed_time": "1:34:35", "remaining_time": "1:52:11", "throughput": 19814.9, "total_tokens": 112464448}
|
|
{"current_steps": 35735, "total_steps": 78105, "loss": 0.1548, "lr": 3.293191591549604e-06, "epoch": 2.2876256321618333, "percentage": 45.75, "elapsed_time": "1:34:36", "remaining_time": "1:52:10", "throughput": 19815.32, "total_tokens": 112480384}
|
|
{"current_steps": 35740, "total_steps": 78105, "loss": 0.2323, "lr": 3.2926617832759055e-06, "epoch": 2.2879457141028103, "percentage": 45.76, "elapsed_time": "1:34:37", "remaining_time": "1:52:09", "throughput": 19815.62, "total_tokens": 112494592}
|
|
{"current_steps": 35745, "total_steps": 78105, "loss": 0.2985, "lr": 3.2921319354208074e-06, "epoch": 2.2882657960437873, "percentage": 45.77, "elapsed_time": "1:34:37", "remaining_time": "1:52:08", "throughput": 19815.99, "total_tokens": 112509568}
|
|
{"current_steps": 35750, "total_steps": 78105, "loss": 0.232, "lr": 3.291602048010766e-06, "epoch": 2.2885858779847643, "percentage": 45.77, "elapsed_time": "1:34:38", "remaining_time": "1:52:07", "throughput": 19816.38, "total_tokens": 112525568}
|
|
{"current_steps": 35755, "total_steps": 78105, "loss": 0.3065, "lr": 3.291072121072242e-06, "epoch": 2.288905959925741, "percentage": 45.78, "elapsed_time": "1:34:39", "remaining_time": "1:52:06", "throughput": 19816.79, "total_tokens": 112541376}
|
|
{"current_steps": 35760, "total_steps": 78105, "loss": 0.1949, "lr": 3.2905421546316963e-06, "epoch": 2.289226041866718, "percentage": 45.78, "elapsed_time": "1:34:39", "remaining_time": "1:52:05", "throughput": 19817.33, "total_tokens": 112558848}
|
|
{"current_steps": 35765, "total_steps": 78105, "loss": 0.2198, "lr": 3.290012148715593e-06, "epoch": 2.289546123807695, "percentage": 45.79, "elapsed_time": "1:34:40", "remaining_time": "1:52:04", "throughput": 19817.74, "total_tokens": 112574656}
|
|
{"current_steps": 35770, "total_steps": 78105, "loss": 0.3517, "lr": 3.289482103350398e-06, "epoch": 2.289866205748672, "percentage": 45.8, "elapsed_time": "1:34:41", "remaining_time": "1:52:03", "throughput": 19818.05, "total_tokens": 112588928}
|
|
{"current_steps": 35775, "total_steps": 78105, "loss": 0.1504, "lr": 3.2889520185625802e-06, "epoch": 2.2901862876896484, "percentage": 45.8, "elapsed_time": "1:34:41", "remaining_time": "1:52:02", "throughput": 19818.48, "total_tokens": 112604864}
|
|
{"current_steps": 35780, "total_steps": 78105, "loss": 0.2392, "lr": 3.288421894378606e-06, "epoch": 2.2905063696306254, "percentage": 45.81, "elapsed_time": "1:34:42", "remaining_time": "1:52:01", "throughput": 19818.82, "total_tokens": 112619840}
|
|
{"current_steps": 35785, "total_steps": 78105, "loss": 0.2417, "lr": 3.2878917308249514e-06, "epoch": 2.2908264515716024, "percentage": 45.82, "elapsed_time": "1:34:43", "remaining_time": "1:52:00", "throughput": 19819.17, "total_tokens": 112634624}
|
|
{"current_steps": 35790, "total_steps": 78105, "loss": 0.3495, "lr": 3.2873615279280868e-06, "epoch": 2.2911465335125794, "percentage": 45.82, "elapsed_time": "1:34:43", "remaining_time": "1:52:00", "throughput": 19819.55, "total_tokens": 112650112}
|
|
{"current_steps": 35795, "total_steps": 78105, "loss": 0.2281, "lr": 3.2868312857144887e-06, "epoch": 2.2914666154535563, "percentage": 45.83, "elapsed_time": "1:34:44", "remaining_time": "1:51:59", "throughput": 19819.86, "total_tokens": 112664576}
|
|
{"current_steps": 35800, "total_steps": 78105, "loss": 0.2212, "lr": 3.2863010042106347e-06, "epoch": 2.291786697394533, "percentage": 45.84, "elapsed_time": "1:34:45", "remaining_time": "1:51:58", "throughput": 19820.25, "total_tokens": 112680064}
|
|
{"current_steps": 35805, "total_steps": 78105, "loss": 0.2435, "lr": 3.2857706834430037e-06, "epoch": 2.29210677933551, "percentage": 45.84, "elapsed_time": "1:34:45", "remaining_time": "1:51:57", "throughput": 19820.69, "total_tokens": 112696576}
|
|
{"current_steps": 35810, "total_steps": 78105, "loss": 0.1967, "lr": 3.285240323438078e-06, "epoch": 2.292426861276487, "percentage": 45.85, "elapsed_time": "1:34:46", "remaining_time": "1:51:56", "throughput": 19821.01, "total_tokens": 112711232}
|
|
{"current_steps": 35815, "total_steps": 78105, "loss": 0.2464, "lr": 3.284709924222341e-06, "epoch": 2.292746943217464, "percentage": 45.85, "elapsed_time": "1:34:47", "remaining_time": "1:51:55", "throughput": 19821.45, "total_tokens": 112727424}
|
|
{"current_steps": 35820, "total_steps": 78105, "loss": 0.2852, "lr": 3.2841794858222783e-06, "epoch": 2.2930670251584404, "percentage": 45.86, "elapsed_time": "1:34:47", "remaining_time": "1:51:54", "throughput": 19821.96, "total_tokens": 112744576}
|
|
{"current_steps": 35825, "total_steps": 78105, "loss": 0.1911, "lr": 3.2836490082643756e-06, "epoch": 2.2933871070994174, "percentage": 45.87, "elapsed_time": "1:34:48", "remaining_time": "1:51:53", "throughput": 19822.36, "total_tokens": 112760192}
|
|
{"current_steps": 35830, "total_steps": 78105, "loss": 0.2908, "lr": 3.2831184915751233e-06, "epoch": 2.2937071890403944, "percentage": 45.87, "elapsed_time": "1:34:49", "remaining_time": "1:51:52", "throughput": 19822.72, "total_tokens": 112775168}
|
|
{"current_steps": 35835, "total_steps": 78105, "loss": 0.1631, "lr": 3.2825879357810136e-06, "epoch": 2.2940272709813714, "percentage": 45.88, "elapsed_time": "1:34:49", "remaining_time": "1:51:51", "throughput": 19823.09, "total_tokens": 112790912}
|
|
{"current_steps": 35840, "total_steps": 78105, "loss": 0.1283, "lr": 3.2820573409085383e-06, "epoch": 2.2943473529223484, "percentage": 45.89, "elapsed_time": "1:34:50", "remaining_time": "1:51:50", "throughput": 19823.48, "total_tokens": 112806144}
|
|
{"current_steps": 35845, "total_steps": 78105, "loss": 0.2341, "lr": 3.281526706984193e-06, "epoch": 2.294667434863325, "percentage": 45.89, "elapsed_time": "1:34:51", "remaining_time": "1:51:49", "throughput": 19823.93, "total_tokens": 112822592}
|
|
{"current_steps": 35850, "total_steps": 78105, "loss": 0.1831, "lr": 3.280996034034475e-06, "epoch": 2.294987516804302, "percentage": 45.9, "elapsed_time": "1:34:51", "remaining_time": "1:51:48", "throughput": 19824.34, "total_tokens": 112838592}
|
|
{"current_steps": 35855, "total_steps": 78105, "loss": 0.1915, "lr": 3.2804653220858828e-06, "epoch": 2.295307598745279, "percentage": 45.91, "elapsed_time": "1:34:52", "remaining_time": "1:51:47", "throughput": 19824.75, "total_tokens": 112854336}
|
|
{"current_steps": 35860, "total_steps": 78105, "loss": 0.2168, "lr": 3.2799345711649176e-06, "epoch": 2.2956276806862554, "percentage": 45.91, "elapsed_time": "1:34:53", "remaining_time": "1:51:46", "throughput": 19825.12, "total_tokens": 112869568}
|
|
{"current_steps": 35865, "total_steps": 78105, "loss": 0.2558, "lr": 3.2794037812980823e-06, "epoch": 2.2959477626272324, "percentage": 45.92, "elapsed_time": "1:34:53", "remaining_time": "1:51:46", "throughput": 19825.53, "total_tokens": 112885440}
|
|
{"current_steps": 35870, "total_steps": 78105, "loss": 0.1851, "lr": 3.278872952511882e-06, "epoch": 2.2962678445682094, "percentage": 45.93, "elapsed_time": "1:34:54", "remaining_time": "1:51:45", "throughput": 19825.84, "total_tokens": 112899968}
|
|
{"current_steps": 35875, "total_steps": 78105, "loss": 0.2553, "lr": 3.278342084832824e-06, "epoch": 2.2965879265091864, "percentage": 45.93, "elapsed_time": "1:34:55", "remaining_time": "1:51:44", "throughput": 19826.26, "total_tokens": 112915904}
|
|
{"current_steps": 35880, "total_steps": 78105, "loss": 0.2898, "lr": 3.2778111782874168e-06, "epoch": 2.2969080084501634, "percentage": 45.94, "elapsed_time": "1:34:55", "remaining_time": "1:51:43", "throughput": 19826.67, "total_tokens": 112932288}
|
|
{"current_steps": 35885, "total_steps": 78105, "loss": 0.2115, "lr": 3.2772802329021704e-06, "epoch": 2.29722809039114, "percentage": 45.94, "elapsed_time": "1:34:56", "remaining_time": "1:51:42", "throughput": 19827.14, "total_tokens": 112948672}
|
|
{"current_steps": 35890, "total_steps": 78105, "loss": 0.2639, "lr": 3.2767492487035975e-06, "epoch": 2.297548172332117, "percentage": 45.95, "elapsed_time": "1:34:57", "remaining_time": "1:51:41", "throughput": 19827.52, "total_tokens": 112964544}
|
|
{"current_steps": 35895, "total_steps": 78105, "loss": 0.1864, "lr": 3.276218225718213e-06, "epoch": 2.297868254273094, "percentage": 45.96, "elapsed_time": "1:34:58", "remaining_time": "1:51:40", "throughput": 19827.91, "total_tokens": 112980032}
|
|
{"current_steps": 35900, "total_steps": 78105, "loss": 0.2037, "lr": 3.2756871639725335e-06, "epoch": 2.298188336214071, "percentage": 45.96, "elapsed_time": "1:34:58", "remaining_time": "1:51:39", "throughput": 19828.34, "total_tokens": 112996224}
|
|
{"current_steps": 35905, "total_steps": 78105, "loss": 0.237, "lr": 3.2751560634930777e-06, "epoch": 2.2985084181550475, "percentage": 45.97, "elapsed_time": "1:34:59", "remaining_time": "1:51:38", "throughput": 19828.7, "total_tokens": 113011328}
|
|
{"current_steps": 35910, "total_steps": 78105, "loss": 0.2893, "lr": 3.274624924306366e-06, "epoch": 2.2988285000960245, "percentage": 45.98, "elapsed_time": "1:35:00", "remaining_time": "1:51:37", "throughput": 19829.14, "total_tokens": 113027392}
|
|
{"current_steps": 35915, "total_steps": 78105, "loss": 0.2912, "lr": 3.27409374643892e-06, "epoch": 2.2991485820370015, "percentage": 45.98, "elapsed_time": "1:35:00", "remaining_time": "1:51:36", "throughput": 19829.62, "total_tokens": 113044032}
|
|
{"current_steps": 35920, "total_steps": 78105, "loss": 0.2751, "lr": 3.2735625299172645e-06, "epoch": 2.2994686639779784, "percentage": 45.99, "elapsed_time": "1:35:01", "remaining_time": "1:51:35", "throughput": 19829.98, "total_tokens": 113059072}
|
|
{"current_steps": 35925, "total_steps": 78105, "loss": 0.2352, "lr": 3.273031274767926e-06, "epoch": 2.2997887459189554, "percentage": 46.0, "elapsed_time": "1:35:02", "remaining_time": "1:51:34", "throughput": 19830.37, "total_tokens": 113074880}
|
|
{"current_steps": 35930, "total_steps": 78105, "loss": 0.1902, "lr": 3.2724999810174318e-06, "epoch": 2.300108827859932, "percentage": 46.0, "elapsed_time": "1:35:02", "remaining_time": "1:51:33", "throughput": 19830.76, "total_tokens": 113090624}
|
|
{"current_steps": 35935, "total_steps": 78105, "loss": 0.2567, "lr": 3.2719686486923126e-06, "epoch": 2.300428909800909, "percentage": 46.01, "elapsed_time": "1:35:03", "remaining_time": "1:51:33", "throughput": 19831.17, "total_tokens": 113106368}
|
|
{"current_steps": 35940, "total_steps": 78105, "loss": 0.196, "lr": 3.2714372778191e-06, "epoch": 2.300748991741886, "percentage": 46.01, "elapsed_time": "1:35:04", "remaining_time": "1:51:32", "throughput": 19831.53, "total_tokens": 113121472}
|
|
{"current_steps": 35945, "total_steps": 78105, "loss": 0.3752, "lr": 3.270905868424328e-06, "epoch": 2.301069073682863, "percentage": 46.02, "elapsed_time": "1:35:04", "remaining_time": "1:51:31", "throughput": 19831.88, "total_tokens": 113136448}
|
|
{"current_steps": 35950, "total_steps": 78105, "loss": 0.206, "lr": 3.2703744205345316e-06, "epoch": 2.3013891556238395, "percentage": 46.03, "elapsed_time": "1:35:05", "remaining_time": "1:51:30", "throughput": 19832.3, "total_tokens": 113152512}
|
|
{"current_steps": 35955, "total_steps": 78105, "loss": 0.1289, "lr": 3.2698429341762507e-06, "epoch": 2.3017092375648165, "percentage": 46.03, "elapsed_time": "1:35:06", "remaining_time": "1:51:29", "throughput": 19832.64, "total_tokens": 113167296}
|
|
{"current_steps": 35960, "total_steps": 78105, "loss": 0.2085, "lr": 3.2693114093760233e-06, "epoch": 2.3020293195057935, "percentage": 46.04, "elapsed_time": "1:35:06", "remaining_time": "1:51:28", "throughput": 19833.07, "total_tokens": 113183552}
|
|
{"current_steps": 35965, "total_steps": 78105, "loss": 0.2432, "lr": 3.268779846160391e-06, "epoch": 2.3023494014467705, "percentage": 46.05, "elapsed_time": "1:35:07", "remaining_time": "1:51:27", "throughput": 19833.49, "total_tokens": 113199424}
|
|
{"current_steps": 35970, "total_steps": 78105, "loss": 0.1899, "lr": 3.2682482445558975e-06, "epoch": 2.3026694833877475, "percentage": 46.05, "elapsed_time": "1:35:08", "remaining_time": "1:51:26", "throughput": 19833.81, "total_tokens": 113214336}
|
|
{"current_steps": 35975, "total_steps": 78105, "loss": 0.2147, "lr": 3.267716604589089e-06, "epoch": 2.302989565328724, "percentage": 46.06, "elapsed_time": "1:35:08", "remaining_time": "1:51:25", "throughput": 19834.2, "total_tokens": 113229824}
|
|
{"current_steps": 35980, "total_steps": 78105, "loss": 0.1596, "lr": 3.2671849262865114e-06, "epoch": 2.303309647269701, "percentage": 46.07, "elapsed_time": "1:35:09", "remaining_time": "1:51:24", "throughput": 19834.61, "total_tokens": 113246016}
|
|
{"current_steps": 35985, "total_steps": 78105, "loss": 0.1786, "lr": 3.266653209674715e-06, "epoch": 2.303629729210678, "percentage": 46.07, "elapsed_time": "1:35:10", "remaining_time": "1:51:23", "throughput": 19834.98, "total_tokens": 113261376}
|
|
{"current_steps": 35990, "total_steps": 78105, "loss": 0.2039, "lr": 3.2661214547802512e-06, "epoch": 2.303949811151655, "percentage": 46.08, "elapsed_time": "1:35:10", "remaining_time": "1:51:22", "throughput": 19835.33, "total_tokens": 113276480}
|
|
{"current_steps": 35995, "total_steps": 78105, "loss": 0.1284, "lr": 3.2655896616296722e-06, "epoch": 2.3042698930926315, "percentage": 46.09, "elapsed_time": "1:35:11", "remaining_time": "1:51:21", "throughput": 19835.72, "total_tokens": 113292352}
|
|
{"current_steps": 36000, "total_steps": 78105, "loss": 0.1803, "lr": 3.265057830249533e-06, "epoch": 2.3045899750336085, "percentage": 46.09, "elapsed_time": "1:35:12", "remaining_time": "1:51:20", "throughput": 19836.13, "total_tokens": 113308160}
|
|
{"current_steps": 36005, "total_steps": 78105, "loss": 0.2459, "lr": 3.2645259606663904e-06, "epoch": 2.3049100569745855, "percentage": 46.1, "elapsed_time": "1:35:12", "remaining_time": "1:51:19", "throughput": 19836.52, "total_tokens": 113323584}
|
|
{"current_steps": 36010, "total_steps": 78105, "loss": 0.1979, "lr": 3.263994052906804e-06, "epoch": 2.3052301389155625, "percentage": 46.1, "elapsed_time": "1:35:13", "remaining_time": "1:51:19", "throughput": 19836.9, "total_tokens": 113338880}
|
|
{"current_steps": 36015, "total_steps": 78105, "loss": 0.3052, "lr": 3.2634621069973344e-06, "epoch": 2.3055502208565395, "percentage": 46.11, "elapsed_time": "1:35:14", "remaining_time": "1:51:18", "throughput": 19837.31, "total_tokens": 113354944}
|
|
{"current_steps": 36020, "total_steps": 78105, "loss": 0.1532, "lr": 3.262930122964544e-06, "epoch": 2.305870302797516, "percentage": 46.12, "elapsed_time": "1:35:14", "remaining_time": "1:51:17", "throughput": 19837.71, "total_tokens": 113370432}
|
|
{"current_steps": 36025, "total_steps": 78105, "loss": 0.1774, "lr": 3.262398100834997e-06, "epoch": 2.306190384738493, "percentage": 46.12, "elapsed_time": "1:35:15", "remaining_time": "1:51:16", "throughput": 19838.04, "total_tokens": 113385408}
|
|
{"current_steps": 36030, "total_steps": 78105, "loss": 0.2313, "lr": 3.26186604063526e-06, "epoch": 2.30651046667947, "percentage": 46.13, "elapsed_time": "1:35:16", "remaining_time": "1:51:15", "throughput": 19838.51, "total_tokens": 113401920}
|
|
{"current_steps": 36035, "total_steps": 78105, "loss": 0.2411, "lr": 3.261333942391901e-06, "epoch": 2.306830548620447, "percentage": 46.14, "elapsed_time": "1:35:16", "remaining_time": "1:51:14", "throughput": 19838.85, "total_tokens": 113416768}
|
|
{"current_steps": 36040, "total_steps": 78105, "loss": 0.1825, "lr": 3.2608018061314906e-06, "epoch": 2.3071506305614236, "percentage": 46.14, "elapsed_time": "1:35:17", "remaining_time": "1:51:13", "throughput": 19839.26, "total_tokens": 113432896}
|
|
{"current_steps": 36045, "total_steps": 78105, "loss": 0.1812, "lr": 3.2602696318806017e-06, "epoch": 2.3074707125024005, "percentage": 46.15, "elapsed_time": "1:35:18", "remaining_time": "1:51:12", "throughput": 19839.72, "total_tokens": 113449472}
|
|
{"current_steps": 36050, "total_steps": 78105, "loss": 0.129, "lr": 3.259737419665807e-06, "epoch": 2.3077907944433775, "percentage": 46.16, "elapsed_time": "1:35:18", "remaining_time": "1:51:11", "throughput": 19840.17, "total_tokens": 113465728}
|
|
{"current_steps": 36055, "total_steps": 78105, "loss": 0.2606, "lr": 3.2592051695136828e-06, "epoch": 2.3081108763843545, "percentage": 46.16, "elapsed_time": "1:35:19", "remaining_time": "1:51:10", "throughput": 19840.58, "total_tokens": 113481216}
|
|
{"current_steps": 36060, "total_steps": 78105, "loss": 0.1819, "lr": 3.2586728814508066e-06, "epoch": 2.3084309583253315, "percentage": 46.17, "elapsed_time": "1:35:20", "remaining_time": "1:51:09", "throughput": 19840.95, "total_tokens": 113496640}
|
|
{"current_steps": 36065, "total_steps": 78105, "loss": 0.2935, "lr": 3.258140555503759e-06, "epoch": 2.308751040266308, "percentage": 46.18, "elapsed_time": "1:35:20", "remaining_time": "1:51:08", "throughput": 19841.27, "total_tokens": 113511296}
|
|
{"current_steps": 36070, "total_steps": 78105, "loss": 0.2085, "lr": 3.2576081916991208e-06, "epoch": 2.309071122207285, "percentage": 46.18, "elapsed_time": "1:35:21", "remaining_time": "1:51:07", "throughput": 19841.63, "total_tokens": 113526400}
|
|
{"current_steps": 36075, "total_steps": 78105, "loss": 0.1803, "lr": 3.257075790063476e-06, "epoch": 2.309391204148262, "percentage": 46.19, "elapsed_time": "1:35:22", "remaining_time": "1:51:06", "throughput": 19842.0, "total_tokens": 113541824}
|
|
{"current_steps": 36080, "total_steps": 78105, "loss": 0.3065, "lr": 3.2565433506234095e-06, "epoch": 2.309711286089239, "percentage": 46.19, "elapsed_time": "1:35:22", "remaining_time": "1:51:05", "throughput": 19842.42, "total_tokens": 113557952}
|
|
{"current_steps": 36085, "total_steps": 78105, "loss": 0.1975, "lr": 3.256010873405509e-06, "epoch": 2.3100313680302156, "percentage": 46.2, "elapsed_time": "1:35:23", "remaining_time": "1:51:05", "throughput": 19842.79, "total_tokens": 113573312}
|
|
{"current_steps": 36090, "total_steps": 78105, "loss": 0.1953, "lr": 3.2554783584363635e-06, "epoch": 2.3103514499711926, "percentage": 46.21, "elapsed_time": "1:35:24", "remaining_time": "1:51:04", "throughput": 19843.19, "total_tokens": 113589120}
|
|
{"current_steps": 36095, "total_steps": 78105, "loss": 0.2128, "lr": 3.2549458057425633e-06, "epoch": 2.3106715319121696, "percentage": 46.21, "elapsed_time": "1:35:24", "remaining_time": "1:51:03", "throughput": 19843.56, "total_tokens": 113604224}
|
|
{"current_steps": 36100, "total_steps": 78105, "loss": 0.1691, "lr": 3.2544132153507024e-06, "epoch": 2.3109916138531466, "percentage": 46.22, "elapsed_time": "1:35:25", "remaining_time": "1:51:02", "throughput": 19843.93, "total_tokens": 113619520}
|
|
{"current_steps": 36105, "total_steps": 78105, "loss": 0.2388, "lr": 3.2538805872873753e-06, "epoch": 2.3113116957941235, "percentage": 46.23, "elapsed_time": "1:35:26", "remaining_time": "1:51:01", "throughput": 19844.28, "total_tokens": 113634560}
|
|
{"current_steps": 36110, "total_steps": 78105, "loss": 0.2767, "lr": 3.2533479215791786e-06, "epoch": 2.3116317777351, "percentage": 46.23, "elapsed_time": "1:35:26", "remaining_time": "1:51:00", "throughput": 19844.68, "total_tokens": 113650176}
|
|
{"current_steps": 36115, "total_steps": 78105, "loss": 0.2197, "lr": 3.2528152182527105e-06, "epoch": 2.311951859676077, "percentage": 46.24, "elapsed_time": "1:35:27", "remaining_time": "1:50:59", "throughput": 19845.08, "total_tokens": 113665856}
|
|
{"current_steps": 36120, "total_steps": 78105, "loss": 0.2403, "lr": 3.2522824773345713e-06, "epoch": 2.312271941617054, "percentage": 46.25, "elapsed_time": "1:35:28", "remaining_time": "1:50:58", "throughput": 19845.45, "total_tokens": 113680896}
|
|
{"current_steps": 36125, "total_steps": 78105, "loss": 0.2539, "lr": 3.251749698851363e-06, "epoch": 2.3125920235580306, "percentage": 46.25, "elapsed_time": "1:35:28", "remaining_time": "1:50:57", "throughput": 19845.76, "total_tokens": 113695680}
|
|
{"current_steps": 36130, "total_steps": 78105, "loss": 0.234, "lr": 3.251216882829692e-06, "epoch": 2.3129121054990076, "percentage": 46.26, "elapsed_time": "1:35:29", "remaining_time": "1:50:56", "throughput": 19846.2, "total_tokens": 113711872}
|
|
{"current_steps": 36135, "total_steps": 78105, "loss": 0.2084, "lr": 3.250684029296162e-06, "epoch": 2.3132321874399846, "percentage": 46.26, "elapsed_time": "1:35:30", "remaining_time": "1:50:55", "throughput": 19846.63, "total_tokens": 113728000}
|
|
{"current_steps": 36140, "total_steps": 78105, "loss": 0.136, "lr": 3.250151138277382e-06, "epoch": 2.3135522693809616, "percentage": 46.27, "elapsed_time": "1:35:31", "remaining_time": "1:50:54", "throughput": 19847.02, "total_tokens": 113743488}
|
|
{"current_steps": 36145, "total_steps": 78105, "loss": 0.2399, "lr": 3.2496182097999617e-06, "epoch": 2.3138723513219386, "percentage": 46.28, "elapsed_time": "1:35:31", "remaining_time": "1:50:53", "throughput": 19847.39, "total_tokens": 113758784}
|
|
{"current_steps": 36150, "total_steps": 78105, "loss": 0.2673, "lr": 3.2490852438905123e-06, "epoch": 2.314192433262915, "percentage": 46.28, "elapsed_time": "1:35:32", "remaining_time": "1:50:52", "throughput": 19847.82, "total_tokens": 113775040}
|
|
{"current_steps": 36155, "total_steps": 78105, "loss": 0.2352, "lr": 3.2485522405756475e-06, "epoch": 2.314512515203892, "percentage": 46.29, "elapsed_time": "1:35:33", "remaining_time": "1:50:51", "throughput": 19848.26, "total_tokens": 113791168}
|
|
{"current_steps": 36160, "total_steps": 78105, "loss": 0.1866, "lr": 3.2480191998819836e-06, "epoch": 2.314832597144869, "percentage": 46.3, "elapsed_time": "1:35:33", "remaining_time": "1:50:51", "throughput": 19848.68, "total_tokens": 113807104}
|
|
{"current_steps": 36165, "total_steps": 78105, "loss": 0.2246, "lr": 3.247486121836137e-06, "epoch": 2.315152679085846, "percentage": 46.3, "elapsed_time": "1:35:34", "remaining_time": "1:50:50", "throughput": 19849.1, "total_tokens": 113823232}
|
|
{"current_steps": 36170, "total_steps": 78105, "loss": 0.2279, "lr": 3.246953006464727e-06, "epoch": 2.3154727610268226, "percentage": 46.31, "elapsed_time": "1:35:35", "remaining_time": "1:50:49", "throughput": 19849.44, "total_tokens": 113837888}
|
|
{"current_steps": 36175, "total_steps": 78105, "loss": 0.2344, "lr": 3.2464198537943736e-06, "epoch": 2.3157928429677996, "percentage": 46.32, "elapsed_time": "1:35:35", "remaining_time": "1:50:48", "throughput": 19849.82, "total_tokens": 113853184}
|
|
{"current_steps": 36180, "total_steps": 78105, "loss": 0.2208, "lr": 3.245886663851701e-06, "epoch": 2.3161129249087766, "percentage": 46.32, "elapsed_time": "1:35:36", "remaining_time": "1:50:47", "throughput": 19850.23, "total_tokens": 113868864}
|
|
{"current_steps": 36185, "total_steps": 78105, "loss": 0.3125, "lr": 3.2453534366633343e-06, "epoch": 2.3164330068497536, "percentage": 46.33, "elapsed_time": "1:35:37", "remaining_time": "1:50:46", "throughput": 19850.57, "total_tokens": 113883904}
|
|
{"current_steps": 36190, "total_steps": 78105, "loss": 0.2061, "lr": 3.244820172255899e-06, "epoch": 2.3167530887907306, "percentage": 46.34, "elapsed_time": "1:35:37", "remaining_time": "1:50:45", "throughput": 19850.97, "total_tokens": 113899584}
|
|
{"current_steps": 36195, "total_steps": 78105, "loss": 0.2645, "lr": 3.2442868706560237e-06, "epoch": 2.317073170731707, "percentage": 46.34, "elapsed_time": "1:35:38", "remaining_time": "1:50:44", "throughput": 19851.29, "total_tokens": 113914432}
|
|
{"current_steps": 36200, "total_steps": 78105, "loss": 0.2573, "lr": 3.2437535318903395e-06, "epoch": 2.317393252672684, "percentage": 46.35, "elapsed_time": "1:35:39", "remaining_time": "1:50:43", "throughput": 19851.66, "total_tokens": 113929856}
|
|
{"current_steps": 36205, "total_steps": 78105, "loss": 0.1858, "lr": 3.243220155985478e-06, "epoch": 2.317713334613661, "percentage": 46.35, "elapsed_time": "1:35:39", "remaining_time": "1:50:42", "throughput": 19852.04, "total_tokens": 113945280}
|
|
{"current_steps": 36210, "total_steps": 78105, "loss": 0.1597, "lr": 3.2426867429680725e-06, "epoch": 2.318033416554638, "percentage": 46.36, "elapsed_time": "1:35:40", "remaining_time": "1:50:41", "throughput": 19852.46, "total_tokens": 113961408}
|
|
{"current_steps": 36215, "total_steps": 78105, "loss": 0.1967, "lr": 3.2421532928647605e-06, "epoch": 2.3183534984956147, "percentage": 46.37, "elapsed_time": "1:35:41", "remaining_time": "1:50:40", "throughput": 19852.81, "total_tokens": 113976384}
|
|
{"current_steps": 36220, "total_steps": 78105, "loss": 0.2269, "lr": 3.241619805702178e-06, "epoch": 2.3186735804365917, "percentage": 46.37, "elapsed_time": "1:35:41", "remaining_time": "1:50:39", "throughput": 19853.16, "total_tokens": 113991680}
|
|
{"current_steps": 36225, "total_steps": 78105, "loss": 0.2183, "lr": 3.2410862815069656e-06, "epoch": 2.3189936623775687, "percentage": 46.38, "elapsed_time": "1:35:42", "remaining_time": "1:50:38", "throughput": 19853.64, "total_tokens": 114008512}
|
|
{"current_steps": 36230, "total_steps": 78105, "loss": 0.2233, "lr": 3.2405527203057645e-06, "epoch": 2.3193137443185456, "percentage": 46.39, "elapsed_time": "1:35:43", "remaining_time": "1:50:38", "throughput": 19854.33, "total_tokens": 114028032}
|
|
{"current_steps": 36235, "total_steps": 78105, "loss": 0.2322, "lr": 3.2400191221252173e-06, "epoch": 2.3196338262595226, "percentage": 46.39, "elapsed_time": "1:35:43", "remaining_time": "1:50:37", "throughput": 19854.73, "total_tokens": 114044032}
|
|
{"current_steps": 36240, "total_steps": 78105, "loss": 0.184, "lr": 3.23948548699197e-06, "epoch": 2.319953908200499, "percentage": 46.4, "elapsed_time": "1:35:44", "remaining_time": "1:50:36", "throughput": 19855.06, "total_tokens": 114058816}
|
|
{"current_steps": 36245, "total_steps": 78105, "loss": 0.2354, "lr": 3.2389518149326702e-06, "epoch": 2.320273990141476, "percentage": 46.41, "elapsed_time": "1:35:45", "remaining_time": "1:50:35", "throughput": 19855.44, "total_tokens": 114074368}
|
|
{"current_steps": 36250, "total_steps": 78105, "loss": 0.1831, "lr": 3.2384181059739656e-06, "epoch": 2.320594072082453, "percentage": 46.41, "elapsed_time": "1:35:45", "remaining_time": "1:50:34", "throughput": 19855.8, "total_tokens": 114089728}
|
|
{"current_steps": 36255, "total_steps": 78105, "loss": 0.1526, "lr": 3.237884360142507e-06, "epoch": 2.32091415402343, "percentage": 46.42, "elapsed_time": "1:35:46", "remaining_time": "1:50:33", "throughput": 19856.22, "total_tokens": 114105664}
|
|
{"current_steps": 36260, "total_steps": 78105, "loss": 0.1939, "lr": 3.2373505774649464e-06, "epoch": 2.3212342359644067, "percentage": 46.42, "elapsed_time": "1:35:47", "remaining_time": "1:50:32", "throughput": 19856.62, "total_tokens": 114121408}
|
|
{"current_steps": 36265, "total_steps": 78105, "loss": 0.1495, "lr": 3.2368167579679386e-06, "epoch": 2.3215543179053837, "percentage": 46.43, "elapsed_time": "1:35:47", "remaining_time": "1:50:31", "throughput": 19857.01, "total_tokens": 114136960}
|
|
{"current_steps": 36270, "total_steps": 78105, "loss": 0.231, "lr": 3.2362829016781405e-06, "epoch": 2.3218743998463607, "percentage": 46.44, "elapsed_time": "1:35:48", "remaining_time": "1:50:30", "throughput": 19857.47, "total_tokens": 114153216}
|
|
{"current_steps": 36275, "total_steps": 78105, "loss": 0.2567, "lr": 3.2357490086222093e-06, "epoch": 2.3221944817873377, "percentage": 46.44, "elapsed_time": "1:35:49", "remaining_time": "1:50:29", "throughput": 19857.9, "total_tokens": 114169280}
|
|
{"current_steps": 36280, "total_steps": 78105, "loss": 0.2084, "lr": 3.2352150788268056e-06, "epoch": 2.3225145637283147, "percentage": 46.45, "elapsed_time": "1:35:50", "remaining_time": "1:50:28", "throughput": 19858.37, "total_tokens": 114185728}
|
|
{"current_steps": 36285, "total_steps": 78105, "loss": 0.1672, "lr": 3.2346811123185896e-06, "epoch": 2.322834645669291, "percentage": 46.46, "elapsed_time": "1:35:50", "remaining_time": "1:50:27", "throughput": 19858.79, "total_tokens": 114201920}
|
|
{"current_steps": 36290, "total_steps": 78105, "loss": 0.2174, "lr": 3.2341471091242256e-06, "epoch": 2.323154727610268, "percentage": 46.46, "elapsed_time": "1:35:51", "remaining_time": "1:50:27", "throughput": 19859.26, "total_tokens": 114218688}
|
|
{"current_steps": 36295, "total_steps": 78105, "loss": 0.2327, "lr": 3.2336130692703794e-06, "epoch": 2.323474809551245, "percentage": 46.47, "elapsed_time": "1:35:52", "remaining_time": "1:50:26", "throughput": 19859.74, "total_tokens": 114235520}
|
|
{"current_steps": 36300, "total_steps": 78105, "loss": 0.2098, "lr": 3.233078992783717e-06, "epoch": 2.323794891492222, "percentage": 46.48, "elapsed_time": "1:35:52", "remaining_time": "1:50:25", "throughput": 19860.09, "total_tokens": 114250304}
|
|
{"current_steps": 36305, "total_steps": 78105, "loss": 0.2274, "lr": 3.2325448796909085e-06, "epoch": 2.3241149734331987, "percentage": 46.48, "elapsed_time": "1:35:53", "remaining_time": "1:50:24", "throughput": 19860.44, "total_tokens": 114265536}
|
|
{"current_steps": 36310, "total_steps": 78105, "loss": 0.2361, "lr": 3.232010730018625e-06, "epoch": 2.3244350553741757, "percentage": 46.49, "elapsed_time": "1:35:54", "remaining_time": "1:50:23", "throughput": 19860.9, "total_tokens": 114281984}
|
|
{"current_steps": 36315, "total_steps": 78105, "loss": 0.245, "lr": 3.2314765437935386e-06, "epoch": 2.3247551373151527, "percentage": 46.5, "elapsed_time": "1:35:54", "remaining_time": "1:50:22", "throughput": 19861.37, "total_tokens": 114299008}
|
|
{"current_steps": 36320, "total_steps": 78105, "loss": 0.3888, "lr": 3.230942321042323e-06, "epoch": 2.3250752192561297, "percentage": 46.5, "elapsed_time": "1:35:55", "remaining_time": "1:50:21", "throughput": 19861.75, "total_tokens": 114314560}
|
|
{"current_steps": 36325, "total_steps": 78105, "loss": 0.1697, "lr": 3.2304080617916556e-06, "epoch": 2.3253953011971067, "percentage": 46.51, "elapsed_time": "1:35:56", "remaining_time": "1:50:20", "throughput": 19862.18, "total_tokens": 114330752}
|
|
{"current_steps": 36330, "total_steps": 78105, "loss": 0.1558, "lr": 3.229873766068214e-06, "epoch": 2.3257153831380832, "percentage": 46.51, "elapsed_time": "1:35:56", "remaining_time": "1:50:19", "throughput": 19862.6, "total_tokens": 114346880}
|
|
{"current_steps": 36335, "total_steps": 78105, "loss": 0.198, "lr": 3.229339433898678e-06, "epoch": 2.3260354650790602, "percentage": 46.52, "elapsed_time": "1:35:57", "remaining_time": "1:50:18", "throughput": 19863.03, "total_tokens": 114363200}
|
|
{"current_steps": 36340, "total_steps": 78105, "loss": 0.2137, "lr": 3.2288050653097307e-06, "epoch": 2.326355547020037, "percentage": 46.53, "elapsed_time": "1:35:58", "remaining_time": "1:50:17", "throughput": 19863.46, "total_tokens": 114379392}
|
|
{"current_steps": 36345, "total_steps": 78105, "loss": 0.1978, "lr": 3.2282706603280545e-06, "epoch": 2.326675628961014, "percentage": 46.53, "elapsed_time": "1:35:58", "remaining_time": "1:50:16", "throughput": 19863.81, "total_tokens": 114394752}
|
|
{"current_steps": 36350, "total_steps": 78105, "loss": 0.2727, "lr": 3.2277362189803342e-06, "epoch": 2.3269957109019908, "percentage": 46.54, "elapsed_time": "1:35:59", "remaining_time": "1:50:16", "throughput": 19864.19, "total_tokens": 114410560}
|
|
{"current_steps": 36355, "total_steps": 78105, "loss": 0.303, "lr": 3.2272017412932573e-06, "epoch": 2.3273157928429677, "percentage": 46.55, "elapsed_time": "1:36:00", "remaining_time": "1:50:15", "throughput": 19864.57, "total_tokens": 114426432}
|
|
{"current_steps": 36360, "total_steps": 78105, "loss": 0.191, "lr": 3.226667227293514e-06, "epoch": 2.3276358747839447, "percentage": 46.55, "elapsed_time": "1:36:00", "remaining_time": "1:50:14", "throughput": 19864.96, "total_tokens": 114441792}
|
|
{"current_steps": 36365, "total_steps": 78105, "loss": 0.2371, "lr": 3.226132677007795e-06, "epoch": 2.3279559567249217, "percentage": 46.56, "elapsed_time": "1:36:01", "remaining_time": "1:50:13", "throughput": 19865.45, "total_tokens": 114458816}
|
|
{"current_steps": 36370, "total_steps": 78105, "loss": 0.2465, "lr": 3.2255980904627914e-06, "epoch": 2.3282760386658987, "percentage": 46.57, "elapsed_time": "1:36:02", "remaining_time": "1:50:12", "throughput": 19865.84, "total_tokens": 114474240}
|
|
{"current_steps": 36375, "total_steps": 78105, "loss": 0.1747, "lr": 3.2250634676851992e-06, "epoch": 2.3285961206068753, "percentage": 46.57, "elapsed_time": "1:36:03", "remaining_time": "1:50:11", "throughput": 19866.36, "total_tokens": 114491456}
|
|
{"current_steps": 36380, "total_steps": 78105, "loss": 0.2339, "lr": 3.224528808701714e-06, "epoch": 2.3289162025478523, "percentage": 46.58, "elapsed_time": "1:36:03", "remaining_time": "1:50:10", "throughput": 19866.74, "total_tokens": 114507200}
|
|
{"current_steps": 36385, "total_steps": 78105, "loss": 0.2379, "lr": 3.223994113539034e-06, "epoch": 2.3292362844888292, "percentage": 46.58, "elapsed_time": "1:36:04", "remaining_time": "1:50:09", "throughput": 19867.21, "total_tokens": 114523904}
|
|
{"current_steps": 36390, "total_steps": 78105, "loss": 0.1407, "lr": 3.2234593822238598e-06, "epoch": 2.329556366429806, "percentage": 46.59, "elapsed_time": "1:36:05", "remaining_time": "1:50:08", "throughput": 19867.63, "total_tokens": 114539840}
|
|
{"current_steps": 36395, "total_steps": 78105, "loss": 0.1382, "lr": 3.222924614782892e-06, "epoch": 2.329876448370783, "percentage": 46.6, "elapsed_time": "1:36:05", "remaining_time": "1:50:07", "throughput": 19867.99, "total_tokens": 114555072}
|
|
{"current_steps": 36400, "total_steps": 78105, "loss": 0.213, "lr": 3.2223898112428344e-06, "epoch": 2.3301965303117598, "percentage": 46.6, "elapsed_time": "1:36:06", "remaining_time": "1:50:06", "throughput": 19868.34, "total_tokens": 114570176}
|
|
{"current_steps": 36405, "total_steps": 78105, "loss": 0.2016, "lr": 3.2218549716303914e-06, "epoch": 2.3305166122527368, "percentage": 46.61, "elapsed_time": "1:36:07", "remaining_time": "1:50:05", "throughput": 19868.73, "total_tokens": 114585728}
|
|
{"current_steps": 36410, "total_steps": 78105, "loss": 0.2206, "lr": 3.2213200959722724e-06, "epoch": 2.3308366941937138, "percentage": 46.62, "elapsed_time": "1:36:07", "remaining_time": "1:50:05", "throughput": 19869.15, "total_tokens": 114601792}
|
|
{"current_steps": 36415, "total_steps": 78105, "loss": 0.1338, "lr": 3.2207851842951844e-06, "epoch": 2.3311567761346903, "percentage": 46.62, "elapsed_time": "1:36:08", "remaining_time": "1:50:04", "throughput": 19869.57, "total_tokens": 114617664}
|
|
{"current_steps": 36420, "total_steps": 78105, "loss": 0.2276, "lr": 3.2202502366258394e-06, "epoch": 2.3314768580756673, "percentage": 46.63, "elapsed_time": "1:36:09", "remaining_time": "1:50:03", "throughput": 19869.87, "total_tokens": 114632000}
|
|
{"current_steps": 36425, "total_steps": 78105, "loss": 0.2008, "lr": 3.2197152529909486e-06, "epoch": 2.3317969400166443, "percentage": 46.64, "elapsed_time": "1:36:09", "remaining_time": "1:50:02", "throughput": 19870.27, "total_tokens": 114647872}
|
|
{"current_steps": 36430, "total_steps": 78105, "loss": 0.2457, "lr": 3.2191802334172275e-06, "epoch": 2.3321170219576213, "percentage": 46.64, "elapsed_time": "1:36:10", "remaining_time": "1:50:01", "throughput": 19870.63, "total_tokens": 114663168}
|
|
{"current_steps": 36435, "total_steps": 78105, "loss": 0.1498, "lr": 3.2186451779313914e-06, "epoch": 2.332437103898598, "percentage": 46.65, "elapsed_time": "1:36:11", "remaining_time": "1:50:00", "throughput": 19870.95, "total_tokens": 114677824}
|
|
{"current_steps": 36440, "total_steps": 78105, "loss": 0.2776, "lr": 3.218110086560158e-06, "epoch": 2.332757185839575, "percentage": 46.66, "elapsed_time": "1:36:11", "remaining_time": "1:49:59", "throughput": 19871.26, "total_tokens": 114692672}
|
|
{"current_steps": 36445, "total_steps": 78105, "loss": 0.3512, "lr": 3.217574959330248e-06, "epoch": 2.333077267780552, "percentage": 46.66, "elapsed_time": "1:36:12", "remaining_time": "1:49:58", "throughput": 19871.61, "total_tokens": 114707968}
|
|
{"current_steps": 36450, "total_steps": 78105, "loss": 0.1954, "lr": 3.2170397962683818e-06, "epoch": 2.333397349721529, "percentage": 46.67, "elapsed_time": "1:36:13", "remaining_time": "1:49:57", "throughput": 19871.99, "total_tokens": 114723712}
|
|
{"current_steps": 36455, "total_steps": 78105, "loss": 0.272, "lr": 3.2165045974012832e-06, "epoch": 2.333717431662506, "percentage": 46.67, "elapsed_time": "1:36:13", "remaining_time": "1:49:56", "throughput": 19872.33, "total_tokens": 114738880}
|
|
{"current_steps": 36460, "total_steps": 78105, "loss": 0.2548, "lr": 3.2159693627556777e-06, "epoch": 2.3340375136034823, "percentage": 46.68, "elapsed_time": "1:36:14", "remaining_time": "1:49:55", "throughput": 19872.64, "total_tokens": 114753472}
|
|
{"current_steps": 36465, "total_steps": 78105, "loss": 0.1387, "lr": 3.2154340923582904e-06, "epoch": 2.3343575955444593, "percentage": 46.69, "elapsed_time": "1:36:15", "remaining_time": "1:49:54", "throughput": 19872.95, "total_tokens": 114768000}
|
|
{"current_steps": 36470, "total_steps": 78105, "loss": 0.3848, "lr": 3.214898786235852e-06, "epoch": 2.3346776774854363, "percentage": 46.69, "elapsed_time": "1:36:15", "remaining_time": "1:49:53", "throughput": 19872.78, "total_tokens": 114784000}
|
|
{"current_steps": 36475, "total_steps": 78105, "loss": 0.2352, "lr": 3.214363444415091e-06, "epoch": 2.3349977594264133, "percentage": 46.7, "elapsed_time": "1:36:16", "remaining_time": "1:49:52", "throughput": 19873.13, "total_tokens": 114798848}
|
|
{"current_steps": 36480, "total_steps": 78105, "loss": 0.259, "lr": 3.2138280669227416e-06, "epoch": 2.33531784136739, "percentage": 46.71, "elapsed_time": "1:36:17", "remaining_time": "1:49:52", "throughput": 19873.44, "total_tokens": 114813632}
|
|
{"current_steps": 36485, "total_steps": 78105, "loss": 0.2253, "lr": 3.213292653785537e-06, "epoch": 2.335637923308367, "percentage": 46.71, "elapsed_time": "1:36:17", "remaining_time": "1:49:51", "throughput": 19873.86, "total_tokens": 114830208}
|
|
{"current_steps": 36490, "total_steps": 78105, "loss": 0.2324, "lr": 3.212757205030212e-06, "epoch": 2.335958005249344, "percentage": 46.72, "elapsed_time": "1:36:18", "remaining_time": "1:49:50", "throughput": 19874.31, "total_tokens": 114846784}
|
|
{"current_steps": 36495, "total_steps": 78105, "loss": 0.2594, "lr": 3.2122217206835043e-06, "epoch": 2.336278087190321, "percentage": 46.73, "elapsed_time": "1:36:19", "remaining_time": "1:49:49", "throughput": 19874.66, "total_tokens": 114861888}
|
|
{"current_steps": 36500, "total_steps": 78105, "loss": 0.1692, "lr": 3.2116862007721544e-06, "epoch": 2.336598169131298, "percentage": 46.73, "elapsed_time": "1:36:19", "remaining_time": "1:49:48", "throughput": 19875.04, "total_tokens": 114877504}
|
|
{"current_steps": 36505, "total_steps": 78105, "loss": 0.2188, "lr": 3.2111506453229024e-06, "epoch": 2.3369182510722744, "percentage": 46.74, "elapsed_time": "1:36:20", "remaining_time": "1:49:47", "throughput": 19875.33, "total_tokens": 114891840}
|
|
{"current_steps": 36510, "total_steps": 78105, "loss": 0.3845, "lr": 3.210615054362492e-06, "epoch": 2.3372383330132513, "percentage": 46.74, "elapsed_time": "1:36:21", "remaining_time": "1:49:46", "throughput": 19875.68, "total_tokens": 114906944}
|
|
{"current_steps": 36515, "total_steps": 78105, "loss": 0.1854, "lr": 3.2100794279176674e-06, "epoch": 2.3375584149542283, "percentage": 46.75, "elapsed_time": "1:36:21", "remaining_time": "1:49:45", "throughput": 19876.03, "total_tokens": 114922176}
|
|
{"current_steps": 36520, "total_steps": 78105, "loss": 0.2246, "lr": 3.2095437660151734e-06, "epoch": 2.3378784968952053, "percentage": 46.76, "elapsed_time": "1:36:22", "remaining_time": "1:49:44", "throughput": 19876.45, "total_tokens": 114938432}
|
|
{"current_steps": 36525, "total_steps": 78105, "loss": 0.318, "lr": 3.2090080686817605e-06, "epoch": 2.338198578836182, "percentage": 46.76, "elapsed_time": "1:36:23", "remaining_time": "1:49:43", "throughput": 19876.76, "total_tokens": 114953152}
|
|
{"current_steps": 36530, "total_steps": 78105, "loss": 0.221, "lr": 3.208472335944177e-06, "epoch": 2.338518660777159, "percentage": 46.77, "elapsed_time": "1:36:23", "remaining_time": "1:49:42", "throughput": 19877.2, "total_tokens": 114969344}
|
|
{"current_steps": 36535, "total_steps": 78105, "loss": 0.1831, "lr": 3.2079365678291767e-06, "epoch": 2.338838742718136, "percentage": 46.78, "elapsed_time": "1:36:24", "remaining_time": "1:49:41", "throughput": 19877.58, "total_tokens": 114984960}
|
|
{"current_steps": 36540, "total_steps": 78105, "loss": 0.2345, "lr": 3.207400764363511e-06, "epoch": 2.339158824659113, "percentage": 46.78, "elapsed_time": "1:36:25", "remaining_time": "1:49:40", "throughput": 19877.96, "total_tokens": 115000576}
|
|
{"current_steps": 36545, "total_steps": 78105, "loss": 0.1756, "lr": 3.206864925573936e-06, "epoch": 2.33947890660009, "percentage": 46.79, "elapsed_time": "1:36:25", "remaining_time": "1:49:39", "throughput": 19878.3, "total_tokens": 115015552}
|
|
{"current_steps": 36550, "total_steps": 78105, "loss": 0.1685, "lr": 3.206329051487208e-06, "epoch": 2.3397989885410664, "percentage": 46.8, "elapsed_time": "1:36:26", "remaining_time": "1:49:39", "throughput": 19878.61, "total_tokens": 115030016}
|
|
{"current_steps": 36555, "total_steps": 78105, "loss": 0.1446, "lr": 3.205793142130087e-06, "epoch": 2.3401190704820434, "percentage": 46.8, "elapsed_time": "1:36:27", "remaining_time": "1:49:38", "throughput": 19878.9, "total_tokens": 115044416}
|
|
{"current_steps": 36560, "total_steps": 78105, "loss": 0.3622, "lr": 3.205257197529333e-06, "epoch": 2.3404391524230204, "percentage": 46.81, "elapsed_time": "1:36:27", "remaining_time": "1:49:37", "throughput": 19879.28, "total_tokens": 115059840}
|
|
{"current_steps": 36565, "total_steps": 78105, "loss": 0.1435, "lr": 3.2047212177117078e-06, "epoch": 2.3407592343639974, "percentage": 46.82, "elapsed_time": "1:36:28", "remaining_time": "1:49:36", "throughput": 19879.69, "total_tokens": 115075840}
|
|
{"current_steps": 36570, "total_steps": 78105, "loss": 0.22, "lr": 3.2041852027039755e-06, "epoch": 2.341079316304974, "percentage": 46.82, "elapsed_time": "1:36:29", "remaining_time": "1:49:35", "throughput": 19880.08, "total_tokens": 115091648}
|
|
{"current_steps": 36575, "total_steps": 78105, "loss": 0.2323, "lr": 3.203649152532902e-06, "epoch": 2.341399398245951, "percentage": 46.83, "elapsed_time": "1:36:30", "remaining_time": "1:49:34", "throughput": 19880.56, "total_tokens": 115108608}
|
|
{"current_steps": 36580, "total_steps": 78105, "loss": 0.1537, "lr": 3.2031130672252552e-06, "epoch": 2.341719480186928, "percentage": 46.83, "elapsed_time": "1:36:30", "remaining_time": "1:49:33", "throughput": 19880.98, "total_tokens": 115124800}
|
|
{"current_steps": 36585, "total_steps": 78105, "loss": 0.2829, "lr": 3.2025769468078044e-06, "epoch": 2.342039562127905, "percentage": 46.84, "elapsed_time": "1:36:31", "remaining_time": "1:49:32", "throughput": 19881.33, "total_tokens": 115140160}
|
|
{"current_steps": 36590, "total_steps": 78105, "loss": 0.227, "lr": 3.20204079130732e-06, "epoch": 2.342359644068882, "percentage": 46.85, "elapsed_time": "1:36:32", "remaining_time": "1:49:31", "throughput": 19881.71, "total_tokens": 115156096}
|
|
{"current_steps": 36595, "total_steps": 78105, "loss": 0.2746, "lr": 3.2015046007505757e-06, "epoch": 2.3426797260098584, "percentage": 46.85, "elapsed_time": "1:36:32", "remaining_time": "1:49:30", "throughput": 19882.02, "total_tokens": 115170816}
|
|
{"current_steps": 36600, "total_steps": 78105, "loss": 0.2314, "lr": 3.2009683751643457e-06, "epoch": 2.3429998079508354, "percentage": 46.86, "elapsed_time": "1:36:33", "remaining_time": "1:49:29", "throughput": 19882.36, "total_tokens": 115185536}
|
|
{"current_steps": 36605, "total_steps": 78105, "loss": 0.1881, "lr": 3.2004321145754065e-06, "epoch": 2.3433198898918124, "percentage": 46.87, "elapsed_time": "1:36:34", "remaining_time": "1:49:28", "throughput": 19882.72, "total_tokens": 115201024}
|
|
{"current_steps": 36610, "total_steps": 78105, "loss": 0.2162, "lr": 3.1998958190105352e-06, "epoch": 2.3436399718327894, "percentage": 46.87, "elapsed_time": "1:36:34", "remaining_time": "1:49:27", "throughput": 19883.05, "total_tokens": 115215744}
|
|
{"current_steps": 36615, "total_steps": 78105, "loss": 0.254, "lr": 3.1993594884965123e-06, "epoch": 2.343960053773766, "percentage": 46.88, "elapsed_time": "1:36:35", "remaining_time": "1:49:27", "throughput": 19883.5, "total_tokens": 115232704}
|
|
{"current_steps": 36620, "total_steps": 78105, "loss": 0.2069, "lr": 3.1988231230601198e-06, "epoch": 2.344280135714743, "percentage": 46.89, "elapsed_time": "1:36:36", "remaining_time": "1:49:26", "throughput": 19883.88, "total_tokens": 115248064}
|
|
{"current_steps": 36625, "total_steps": 78105, "loss": 0.223, "lr": 3.198286722728141e-06, "epoch": 2.34460021765572, "percentage": 46.89, "elapsed_time": "1:36:36", "remaining_time": "1:49:25", "throughput": 19884.26, "total_tokens": 115263296}
|
|
{"current_steps": 36630, "total_steps": 78105, "loss": 0.2543, "lr": 3.1977502875273605e-06, "epoch": 2.344920299596697, "percentage": 46.9, "elapsed_time": "1:36:37", "remaining_time": "1:49:24", "throughput": 19884.71, "total_tokens": 115279872}
|
|
{"current_steps": 36635, "total_steps": 78105, "loss": 0.1653, "lr": 3.1972138174845635e-06, "epoch": 2.345240381537674, "percentage": 46.9, "elapsed_time": "1:36:38", "remaining_time": "1:49:23", "throughput": 19885.08, "total_tokens": 115294976}
|
|
{"current_steps": 36640, "total_steps": 78105, "loss": 0.1667, "lr": 3.1966773126265415e-06, "epoch": 2.3455604634786504, "percentage": 46.91, "elapsed_time": "1:36:38", "remaining_time": "1:49:22", "throughput": 19885.46, "total_tokens": 115310784}
|
|
{"current_steps": 36645, "total_steps": 78105, "loss": 0.2562, "lr": 3.196140772980083e-06, "epoch": 2.3458805454196274, "percentage": 46.92, "elapsed_time": "1:36:39", "remaining_time": "1:49:21", "throughput": 19885.84, "total_tokens": 115326592}
|
|
{"current_steps": 36650, "total_steps": 78105, "loss": 0.2537, "lr": 3.19560419857198e-06, "epoch": 2.3462006273606044, "percentage": 46.92, "elapsed_time": "1:36:40", "remaining_time": "1:49:20", "throughput": 19886.34, "total_tokens": 115343616}
|
|
{"current_steps": 36655, "total_steps": 78105, "loss": 0.1709, "lr": 3.1950675894290275e-06, "epoch": 2.346520709301581, "percentage": 46.93, "elapsed_time": "1:36:40", "remaining_time": "1:49:19", "throughput": 19886.71, "total_tokens": 115358656}
|
|
{"current_steps": 36660, "total_steps": 78105, "loss": 0.3394, "lr": 3.1945309455780195e-06, "epoch": 2.346840791242558, "percentage": 46.94, "elapsed_time": "1:36:41", "remaining_time": "1:49:18", "throughput": 19887.17, "total_tokens": 115375296}
|
|
{"current_steps": 36665, "total_steps": 78105, "loss": 0.2417, "lr": 3.1939942670457536e-06, "epoch": 2.347160873183535, "percentage": 46.94, "elapsed_time": "1:36:42", "remaining_time": "1:49:17", "throughput": 19887.57, "total_tokens": 115390976}
|
|
{"current_steps": 36670, "total_steps": 78105, "loss": 0.2483, "lr": 3.193457553859029e-06, "epoch": 2.347480955124512, "percentage": 46.95, "elapsed_time": "1:36:42", "remaining_time": "1:49:16", "throughput": 19887.97, "total_tokens": 115406592}
|
|
{"current_steps": 36675, "total_steps": 78105, "loss": 0.2299, "lr": 3.1929208060446465e-06, "epoch": 2.347801037065489, "percentage": 46.96, "elapsed_time": "1:36:43", "remaining_time": "1:49:15", "throughput": 19888.29, "total_tokens": 115421376}
|
|
{"current_steps": 36680, "total_steps": 78105, "loss": 0.2541, "lr": 3.1923840236294085e-06, "epoch": 2.3481211190064655, "percentage": 46.96, "elapsed_time": "1:36:44", "remaining_time": "1:49:14", "throughput": 19888.61, "total_tokens": 115436096}
|
|
{"current_steps": 36685, "total_steps": 78105, "loss": 0.2223, "lr": 3.1918472066401185e-06, "epoch": 2.3484412009474425, "percentage": 46.97, "elapsed_time": "1:36:44", "remaining_time": "1:49:14", "throughput": 19889.04, "total_tokens": 115452672}
|
|
{"current_steps": 36690, "total_steps": 78105, "loss": 0.2102, "lr": 3.191310355103583e-06, "epoch": 2.3487612828884195, "percentage": 46.98, "elapsed_time": "1:36:45", "remaining_time": "1:49:13", "throughput": 19889.45, "total_tokens": 115468544}
|
|
{"current_steps": 36695, "total_steps": 78105, "loss": 0.3369, "lr": 3.1907734690466086e-06, "epoch": 2.3490813648293964, "percentage": 46.98, "elapsed_time": "1:36:46", "remaining_time": "1:49:12", "throughput": 19889.82, "total_tokens": 115483904}
|
|
{"current_steps": 36700, "total_steps": 78105, "loss": 0.2031, "lr": 3.190236548496005e-06, "epoch": 2.349401446770373, "percentage": 46.99, "elapsed_time": "1:36:46", "remaining_time": "1:49:11", "throughput": 19890.24, "total_tokens": 115500032}
|
|
{"current_steps": 36705, "total_steps": 78105, "loss": 0.2442, "lr": 3.189699593478584e-06, "epoch": 2.34972152871135, "percentage": 46.99, "elapsed_time": "1:36:47", "remaining_time": "1:49:10", "throughput": 19890.62, "total_tokens": 115515712}
|
|
{"current_steps": 36710, "total_steps": 78105, "loss": 0.2571, "lr": 3.1891626040211582e-06, "epoch": 2.350041610652327, "percentage": 47.0, "elapsed_time": "1:36:48", "remaining_time": "1:49:09", "throughput": 19891.01, "total_tokens": 115531200}
|
|
{"current_steps": 36715, "total_steps": 78105, "loss": 0.1583, "lr": 3.1886255801505418e-06, "epoch": 2.350361692593304, "percentage": 47.01, "elapsed_time": "1:36:48", "remaining_time": "1:49:08", "throughput": 19891.4, "total_tokens": 115547072}
|
|
{"current_steps": 36720, "total_steps": 78105, "loss": 0.2909, "lr": 3.1880885218935505e-06, "epoch": 2.350681774534281, "percentage": 47.01, "elapsed_time": "1:36:49", "remaining_time": "1:49:07", "throughput": 19891.78, "total_tokens": 115562560}
|
|
{"current_steps": 36725, "total_steps": 78105, "loss": 0.1459, "lr": 3.1875514292770015e-06, "epoch": 2.3510018564752575, "percentage": 47.02, "elapsed_time": "1:36:50", "remaining_time": "1:49:06", "throughput": 19892.14, "total_tokens": 115577856}
|
|
{"current_steps": 36730, "total_steps": 78105, "loss": 0.2942, "lr": 3.1870143023277168e-06, "epoch": 2.3513219384162345, "percentage": 47.03, "elapsed_time": "1:36:50", "remaining_time": "1:49:05", "throughput": 19892.5, "total_tokens": 115593152}
|
|
{"current_steps": 36735, "total_steps": 78105, "loss": 0.2979, "lr": 3.186477141072516e-06, "epoch": 2.3516420203572115, "percentage": 47.03, "elapsed_time": "1:36:51", "remaining_time": "1:49:04", "throughput": 19892.88, "total_tokens": 115609024}
|
|
{"current_steps": 36740, "total_steps": 78105, "loss": 0.2339, "lr": 3.1859399455382228e-06, "epoch": 2.3519621022981885, "percentage": 47.04, "elapsed_time": "1:36:52", "remaining_time": "1:49:03", "throughput": 19893.27, "total_tokens": 115625024}
|
|
{"current_steps": 36745, "total_steps": 78105, "loss": 0.2626, "lr": 3.1854027157516615e-06, "epoch": 2.352282184239165, "percentage": 47.05, "elapsed_time": "1:36:52", "remaining_time": "1:49:03", "throughput": 19893.64, "total_tokens": 115640512}
|
|
{"current_steps": 36750, "total_steps": 78105, "loss": 0.1688, "lr": 3.1848654517396583e-06, "epoch": 2.352602266180142, "percentage": 47.05, "elapsed_time": "1:36:53", "remaining_time": "1:49:02", "throughput": 19894.12, "total_tokens": 115657920}
|
|
{"current_steps": 36755, "total_steps": 78105, "loss": 0.278, "lr": 3.184328153529042e-06, "epoch": 2.352922348121119, "percentage": 47.06, "elapsed_time": "1:36:54", "remaining_time": "1:49:01", "throughput": 19894.47, "total_tokens": 115672896}
|
|
{"current_steps": 36760, "total_steps": 78105, "loss": 0.2092, "lr": 3.183790821146642e-06, "epoch": 2.353242430062096, "percentage": 47.06, "elapsed_time": "1:36:55", "remaining_time": "1:49:00", "throughput": 19895.0, "total_tokens": 115690624}
|
|
{"current_steps": 36765, "total_steps": 78105, "loss": 0.2352, "lr": 3.1832534546192908e-06, "epoch": 2.353562512003073, "percentage": 47.07, "elapsed_time": "1:36:55", "remaining_time": "1:48:59", "throughput": 19895.38, "total_tokens": 115706624}
|
|
{"current_steps": 36770, "total_steps": 78105, "loss": 0.1923, "lr": 3.18271605397382e-06, "epoch": 2.3538825939440495, "percentage": 47.08, "elapsed_time": "1:36:56", "remaining_time": "1:48:58", "throughput": 19895.69, "total_tokens": 115721344}
|
|
{"current_steps": 36775, "total_steps": 78105, "loss": 0.1588, "lr": 3.182178619237066e-06, "epoch": 2.3542026758850265, "percentage": 47.08, "elapsed_time": "1:36:57", "remaining_time": "1:48:57", "throughput": 19896.04, "total_tokens": 115736576}
|
|
{"current_steps": 36780, "total_steps": 78105, "loss": 0.224, "lr": 3.181641150435866e-06, "epoch": 2.3545227578260035, "percentage": 47.09, "elapsed_time": "1:36:57", "remaining_time": "1:48:56", "throughput": 19896.47, "total_tokens": 115752896}
|
|
{"current_steps": 36785, "total_steps": 78105, "loss": 0.1854, "lr": 3.1811036475970563e-06, "epoch": 2.3548428397669805, "percentage": 47.1, "elapsed_time": "1:36:58", "remaining_time": "1:48:55", "throughput": 19896.85, "total_tokens": 115768320}
|
|
{"current_steps": 36790, "total_steps": 78105, "loss": 0.241, "lr": 3.180566110747479e-06, "epoch": 2.355162921707957, "percentage": 47.1, "elapsed_time": "1:36:59", "remaining_time": "1:48:54", "throughput": 19897.29, "total_tokens": 115784640}
|
|
{"current_steps": 36795, "total_steps": 78105, "loss": 0.2999, "lr": 3.1800285399139747e-06, "epoch": 2.355483003648934, "percentage": 47.11, "elapsed_time": "1:36:59", "remaining_time": "1:48:53", "throughput": 19897.6, "total_tokens": 115799424}
|
|
{"current_steps": 36800, "total_steps": 78105, "loss": 0.1685, "lr": 3.1794909351233873e-06, "epoch": 2.355803085589911, "percentage": 47.12, "elapsed_time": "1:37:00", "remaining_time": "1:48:53", "throughput": 19898.08, "total_tokens": 115816384}
|
|
{"current_steps": 36805, "total_steps": 78105, "loss": 0.2579, "lr": 3.178953296402562e-06, "epoch": 2.356123167530888, "percentage": 47.12, "elapsed_time": "1:37:01", "remaining_time": "1:48:52", "throughput": 19898.45, "total_tokens": 115831808}
|
|
{"current_steps": 36810, "total_steps": 78105, "loss": 0.2363, "lr": 3.1784156237783453e-06, "epoch": 2.356443249471865, "percentage": 47.13, "elapsed_time": "1:37:01", "remaining_time": "1:48:51", "throughput": 19898.84, "total_tokens": 115847552}
|
|
{"current_steps": 36815, "total_steps": 78105, "loss": 0.2009, "lr": 3.1778779172775866e-06, "epoch": 2.3567633314128416, "percentage": 47.14, "elapsed_time": "1:37:02", "remaining_time": "1:48:50", "throughput": 19899.27, "total_tokens": 115863808}
|
|
{"current_steps": 36820, "total_steps": 78105, "loss": 0.2429, "lr": 3.1773401769271357e-06, "epoch": 2.3570834133538185, "percentage": 47.14, "elapsed_time": "1:37:03", "remaining_time": "1:48:49", "throughput": 19899.72, "total_tokens": 115879936}
|
|
{"current_steps": 36825, "total_steps": 78105, "loss": 0.2146, "lr": 3.1768024027538435e-06, "epoch": 2.3574034952947955, "percentage": 47.15, "elapsed_time": "1:37:03", "remaining_time": "1:48:48", "throughput": 19900.07, "total_tokens": 115894912}
|
|
{"current_steps": 36830, "total_steps": 78105, "loss": 0.154, "lr": 3.1762645947845656e-06, "epoch": 2.3577235772357725, "percentage": 47.15, "elapsed_time": "1:37:04", "remaining_time": "1:48:47", "throughput": 19900.47, "total_tokens": 115910784}
|
|
{"current_steps": 36835, "total_steps": 78105, "loss": 0.1963, "lr": 3.175726753046156e-06, "epoch": 2.358043659176749, "percentage": 47.16, "elapsed_time": "1:37:05", "remaining_time": "1:48:46", "throughput": 19900.94, "total_tokens": 115927424}
|
|
{"current_steps": 36840, "total_steps": 78105, "loss": 0.2708, "lr": 3.1751888775654715e-06, "epoch": 2.358363741117726, "percentage": 47.17, "elapsed_time": "1:37:05", "remaining_time": "1:48:45", "throughput": 19901.39, "total_tokens": 115943936}
|
|
{"current_steps": 36845, "total_steps": 78105, "loss": 0.2317, "lr": 3.1746509683693727e-06, "epoch": 2.358683823058703, "percentage": 47.17, "elapsed_time": "1:37:06", "remaining_time": "1:48:44", "throughput": 19901.89, "total_tokens": 115961152}
|
|
{"current_steps": 36850, "total_steps": 78105, "loss": 0.2335, "lr": 3.174113025484718e-06, "epoch": 2.35900390499968, "percentage": 47.18, "elapsed_time": "1:37:07", "remaining_time": "1:48:43", "throughput": 19902.29, "total_tokens": 115977344}
|
|
{"current_steps": 36855, "total_steps": 78105, "loss": 0.1963, "lr": 3.1735750489383703e-06, "epoch": 2.359323986940657, "percentage": 47.19, "elapsed_time": "1:37:08", "remaining_time": "1:48:43", "throughput": 19902.7, "total_tokens": 115993472}
|
|
{"current_steps": 36860, "total_steps": 78105, "loss": 0.1885, "lr": 3.173037038757193e-06, "epoch": 2.3596440688816336, "percentage": 47.19, "elapsed_time": "1:37:08", "remaining_time": "1:48:42", "throughput": 19903.04, "total_tokens": 116008640}
|
|
{"current_steps": 36865, "total_steps": 78105, "loss": 0.1878, "lr": 3.172498994968052e-06, "epoch": 2.3599641508226106, "percentage": 47.2, "elapsed_time": "1:37:09", "remaining_time": "1:48:41", "throughput": 19903.51, "total_tokens": 116025280}
|
|
{"current_steps": 36870, "total_steps": 78105, "loss": 0.1902, "lr": 3.1719609175978133e-06, "epoch": 2.3602842327635876, "percentage": 47.21, "elapsed_time": "1:37:10", "remaining_time": "1:48:40", "throughput": 19903.83, "total_tokens": 116040064}
|
|
{"current_steps": 36875, "total_steps": 78105, "loss": 0.1829, "lr": 3.1714228066733477e-06, "epoch": 2.3606043147045646, "percentage": 47.21, "elapsed_time": "1:37:10", "remaining_time": "1:48:39", "throughput": 19904.3, "total_tokens": 116057088}
|
|
{"current_steps": 36880, "total_steps": 78105, "loss": 0.2213, "lr": 3.170884662221523e-06, "epoch": 2.360924396645541, "percentage": 47.22, "elapsed_time": "1:37:11", "remaining_time": "1:48:38", "throughput": 19904.69, "total_tokens": 116072960}
|
|
{"current_steps": 36885, "total_steps": 78105, "loss": 0.272, "lr": 3.1703464842692138e-06, "epoch": 2.361244478586518, "percentage": 47.22, "elapsed_time": "1:37:12", "remaining_time": "1:48:37", "throughput": 19905.07, "total_tokens": 116088704}
|
|
{"current_steps": 36890, "total_steps": 78105, "loss": 0.1445, "lr": 3.169808272843293e-06, "epoch": 2.361564560527495, "percentage": 47.23, "elapsed_time": "1:37:12", "remaining_time": "1:48:36", "throughput": 19905.54, "total_tokens": 116105664}
|
|
{"current_steps": 36895, "total_steps": 78105, "loss": 0.2431, "lr": 3.169270027970635e-06, "epoch": 2.361884642468472, "percentage": 47.24, "elapsed_time": "1:37:13", "remaining_time": "1:48:35", "throughput": 19905.89, "total_tokens": 116121024}
|
|
{"current_steps": 36900, "total_steps": 78105, "loss": 0.2349, "lr": 3.168731749678119e-06, "epoch": 2.362204724409449, "percentage": 47.24, "elapsed_time": "1:37:14", "remaining_time": "1:48:34", "throughput": 19906.21, "total_tokens": 116135808}
|
|
{"current_steps": 36905, "total_steps": 78105, "loss": 0.2027, "lr": 3.168193437992622e-06, "epoch": 2.3625248063504256, "percentage": 47.25, "elapsed_time": "1:37:14", "remaining_time": "1:48:33", "throughput": 19906.55, "total_tokens": 116150528}
|
|
{"current_steps": 36910, "total_steps": 78105, "loss": 0.192, "lr": 3.1676550929410256e-06, "epoch": 2.3628448882914026, "percentage": 47.26, "elapsed_time": "1:37:15", "remaining_time": "1:48:32", "throughput": 19906.87, "total_tokens": 116165312}
|
|
{"current_steps": 36915, "total_steps": 78105, "loss": 0.1879, "lr": 3.1671167145502114e-06, "epoch": 2.3631649702323796, "percentage": 47.26, "elapsed_time": "1:37:16", "remaining_time": "1:48:31", "throughput": 19907.21, "total_tokens": 116180416}
|
|
{"current_steps": 36920, "total_steps": 78105, "loss": 0.2415, "lr": 3.1665783028470636e-06, "epoch": 2.363485052173356, "percentage": 47.27, "elapsed_time": "1:37:16", "remaining_time": "1:48:31", "throughput": 19907.6, "total_tokens": 116196288}
|
|
{"current_steps": 36925, "total_steps": 78105, "loss": 0.2012, "lr": 3.1660398578584665e-06, "epoch": 2.363805134114333, "percentage": 47.28, "elapsed_time": "1:37:17", "remaining_time": "1:48:30", "throughput": 19907.94, "total_tokens": 116211072}
|
|
{"current_steps": 36930, "total_steps": 78105, "loss": 0.2828, "lr": 3.1655013796113092e-06, "epoch": 2.36412521605531, "percentage": 47.28, "elapsed_time": "1:37:18", "remaining_time": "1:48:29", "throughput": 19908.26, "total_tokens": 116225856}
|
|
{"current_steps": 36935, "total_steps": 78105, "loss": 0.2462, "lr": 3.1649628681324795e-06, "epoch": 2.364445297996287, "percentage": 47.29, "elapsed_time": "1:37:18", "remaining_time": "1:48:28", "throughput": 19908.58, "total_tokens": 116240448}
|
|
{"current_steps": 36940, "total_steps": 78105, "loss": 0.273, "lr": 3.164424323448867e-06, "epoch": 2.364765379937264, "percentage": 47.3, "elapsed_time": "1:37:19", "remaining_time": "1:48:27", "throughput": 19908.99, "total_tokens": 116256448}
|
|
{"current_steps": 36945, "total_steps": 78105, "loss": 0.2831, "lr": 3.163885745587365e-06, "epoch": 2.3650854618782406, "percentage": 47.3, "elapsed_time": "1:37:20", "remaining_time": "1:48:26", "throughput": 19909.39, "total_tokens": 116272640}
|
|
{"current_steps": 36950, "total_steps": 78105, "loss": 0.2958, "lr": 3.1633471345748672e-06, "epoch": 2.3654055438192176, "percentage": 47.31, "elapsed_time": "1:37:20", "remaining_time": "1:48:25", "throughput": 19909.83, "total_tokens": 116288896}
|
|
{"current_steps": 36955, "total_steps": 78105, "loss": 0.1791, "lr": 3.1628084904382685e-06, "epoch": 2.3657256257601946, "percentage": 47.31, "elapsed_time": "1:37:21", "remaining_time": "1:48:24", "throughput": 19910.18, "total_tokens": 116304000}
|
|
{"current_steps": 36960, "total_steps": 78105, "loss": 0.2435, "lr": 3.162269813204466e-06, "epoch": 2.3660457077011716, "percentage": 47.32, "elapsed_time": "1:37:22", "remaining_time": "1:48:23", "throughput": 19910.64, "total_tokens": 116320896}
|
|
{"current_steps": 36965, "total_steps": 78105, "loss": 0.1587, "lr": 3.1617311029003593e-06, "epoch": 2.366365789642148, "percentage": 47.33, "elapsed_time": "1:37:22", "remaining_time": "1:48:22", "throughput": 19911.06, "total_tokens": 116337088}
|
|
{"current_steps": 36970, "total_steps": 78105, "loss": 0.2713, "lr": 3.161192359552848e-06, "epoch": 2.366685871583125, "percentage": 47.33, "elapsed_time": "1:37:23", "remaining_time": "1:48:21", "throughput": 19911.36, "total_tokens": 116351488}
|
|
{"current_steps": 36975, "total_steps": 78105, "loss": 0.3221, "lr": 3.160653583188834e-06, "epoch": 2.367005953524102, "percentage": 47.34, "elapsed_time": "1:37:24", "remaining_time": "1:48:20", "throughput": 19911.73, "total_tokens": 116366976}
|
|
{"current_steps": 36980, "total_steps": 78105, "loss": 0.1425, "lr": 3.1601147738352212e-06, "epoch": 2.367326035465079, "percentage": 47.35, "elapsed_time": "1:37:24", "remaining_time": "1:48:20", "throughput": 19912.23, "total_tokens": 116384384}
|
|
{"current_steps": 36985, "total_steps": 78105, "loss": 0.2174, "lr": 3.1595759315189155e-06, "epoch": 2.367646117406056, "percentage": 47.35, "elapsed_time": "1:37:25", "remaining_time": "1:48:19", "throughput": 19912.61, "total_tokens": 116400384}
|
|
{"current_steps": 36990, "total_steps": 78105, "loss": 0.2824, "lr": 3.1590370562668233e-06, "epoch": 2.3679661993470327, "percentage": 47.36, "elapsed_time": "1:37:26", "remaining_time": "1:48:18", "throughput": 19912.92, "total_tokens": 116414976}
|
|
{"current_steps": 36995, "total_steps": 78105, "loss": 0.2902, "lr": 3.158498148105853e-06, "epoch": 2.3682862812880097, "percentage": 47.37, "elapsed_time": "1:37:26", "remaining_time": "1:48:17", "throughput": 19913.26, "total_tokens": 116430336}
|
|
{"current_steps": 37000, "total_steps": 78105, "loss": 0.1279, "lr": 3.157959207062915e-06, "epoch": 2.3686063632289867, "percentage": 47.37, "elapsed_time": "1:37:27", "remaining_time": "1:48:16", "throughput": 19913.65, "total_tokens": 116445888}
|
|
{"current_steps": 37005, "total_steps": 78105, "loss": 0.2083, "lr": 3.1574202331649216e-06, "epoch": 2.3689264451699636, "percentage": 47.38, "elapsed_time": "1:37:28", "remaining_time": "1:48:15", "throughput": 19913.98, "total_tokens": 116460736}
|
|
{"current_steps": 37010, "total_steps": 78105, "loss": 0.2319, "lr": 3.156881226438786e-06, "epoch": 2.36924652711094, "percentage": 47.38, "elapsed_time": "1:37:28", "remaining_time": "1:48:14", "throughput": 19914.37, "total_tokens": 116475904}
|
|
{"current_steps": 37015, "total_steps": 78105, "loss": 0.1774, "lr": 3.1563421869114236e-06, "epoch": 2.369566609051917, "percentage": 47.39, "elapsed_time": "1:37:29", "remaining_time": "1:48:13", "throughput": 19914.8, "total_tokens": 116492352}
|
|
{"current_steps": 37020, "total_steps": 78105, "loss": 0.1879, "lr": 3.155803114609751e-06, "epoch": 2.369886690992894, "percentage": 47.4, "elapsed_time": "1:37:30", "remaining_time": "1:48:12", "throughput": 19915.14, "total_tokens": 116507520}
|
|
{"current_steps": 37025, "total_steps": 78105, "loss": 0.1764, "lr": 3.155264009560688e-06, "epoch": 2.370206772933871, "percentage": 47.4, "elapsed_time": "1:37:30", "remaining_time": "1:48:11", "throughput": 19915.54, "total_tokens": 116523712}
|
|
{"current_steps": 37030, "total_steps": 78105, "loss": 0.229, "lr": 3.1547248717911522e-06, "epoch": 2.370526854874848, "percentage": 47.41, "elapsed_time": "1:37:31", "remaining_time": "1:48:10", "throughput": 19915.87, "total_tokens": 116538624}
|
|
{"current_steps": 37035, "total_steps": 78105, "loss": 0.216, "lr": 3.1541857013280676e-06, "epoch": 2.3708469368158247, "percentage": 47.42, "elapsed_time": "1:37:32", "remaining_time": "1:48:09", "throughput": 19916.28, "total_tokens": 116554688}
|
|
{"current_steps": 37040, "total_steps": 78105, "loss": 0.2766, "lr": 3.153646498198355e-06, "epoch": 2.3711670187568017, "percentage": 47.42, "elapsed_time": "1:37:32", "remaining_time": "1:48:08", "throughput": 19916.7, "total_tokens": 116570944}
|
|
{"current_steps": 37045, "total_steps": 78105, "loss": 0.2411, "lr": 3.153107262428943e-06, "epoch": 2.3714871006977787, "percentage": 47.43, "elapsed_time": "1:37:33", "remaining_time": "1:48:08", "throughput": 19917.07, "total_tokens": 116586496}
|
|
{"current_steps": 37050, "total_steps": 78105, "loss": 0.284, "lr": 3.1525679940467548e-06, "epoch": 2.3718071826387557, "percentage": 47.44, "elapsed_time": "1:37:34", "remaining_time": "1:48:07", "throughput": 19917.42, "total_tokens": 116601792}
|
|
{"current_steps": 37055, "total_steps": 78105, "loss": 0.1597, "lr": 3.15202869307872e-06, "epoch": 2.372127264579732, "percentage": 47.44, "elapsed_time": "1:37:34", "remaining_time": "1:48:06", "throughput": 19917.75, "total_tokens": 116616768}
|
|
{"current_steps": 37060, "total_steps": 78105, "loss": 0.3351, "lr": 3.1514893595517694e-06, "epoch": 2.372447346520709, "percentage": 47.45, "elapsed_time": "1:37:35", "remaining_time": "1:48:05", "throughput": 19918.18, "total_tokens": 116632960}
|
|
{"current_steps": 37065, "total_steps": 78105, "loss": 0.1726, "lr": 3.150949993492833e-06, "epoch": 2.372767428461686, "percentage": 47.46, "elapsed_time": "1:37:36", "remaining_time": "1:48:04", "throughput": 19918.57, "total_tokens": 116648640}
|
|
{"current_steps": 37070, "total_steps": 78105, "loss": 0.3061, "lr": 3.150410594928845e-06, "epoch": 2.373087510402663, "percentage": 47.46, "elapsed_time": "1:37:36", "remaining_time": "1:48:03", "throughput": 19919.0, "total_tokens": 116665088}
|
|
{"current_steps": 37075, "total_steps": 78105, "loss": 0.1316, "lr": 3.1498711638867395e-06, "epoch": 2.37340759234364, "percentage": 47.47, "elapsed_time": "1:37:37", "remaining_time": "1:48:02", "throughput": 19919.4, "total_tokens": 116681088}
|
|
{"current_steps": 37080, "total_steps": 78105, "loss": 0.2597, "lr": 3.149331700393454e-06, "epoch": 2.3737276742846167, "percentage": 47.47, "elapsed_time": "1:37:38", "remaining_time": "1:48:01", "throughput": 19919.73, "total_tokens": 116695936}
|
|
{"current_steps": 37085, "total_steps": 78105, "loss": 0.2362, "lr": 3.1487922044759244e-06, "epoch": 2.3740477562255937, "percentage": 47.48, "elapsed_time": "1:37:38", "remaining_time": "1:48:00", "throughput": 19920.12, "total_tokens": 116711488}
|
|
{"current_steps": 37090, "total_steps": 78105, "loss": 0.1329, "lr": 3.148252676161093e-06, "epoch": 2.3743678381665707, "percentage": 47.49, "elapsed_time": "1:37:39", "remaining_time": "1:47:59", "throughput": 19920.53, "total_tokens": 116727360}
|
|
{"current_steps": 37095, "total_steps": 78105, "loss": 0.2023, "lr": 3.1477131154758976e-06, "epoch": 2.3746879201075477, "percentage": 47.49, "elapsed_time": "1:37:40", "remaining_time": "1:47:58", "throughput": 19920.94, "total_tokens": 116743616}
|
|
{"current_steps": 37100, "total_steps": 78105, "loss": 0.3928, "lr": 3.1471735224472842e-06, "epoch": 2.3750080020485242, "percentage": 47.5, "elapsed_time": "1:37:41", "remaining_time": "1:47:57", "throughput": 19921.28, "total_tokens": 116758784}
|
|
{"current_steps": 37105, "total_steps": 78105, "loss": 0.2498, "lr": 3.1466338971021963e-06, "epoch": 2.3753280839895012, "percentage": 47.51, "elapsed_time": "1:37:41", "remaining_time": "1:47:56", "throughput": 19921.65, "total_tokens": 116774144}
|
|
{"current_steps": 37110, "total_steps": 78105, "loss": 0.2343, "lr": 3.1460942394675798e-06, "epoch": 2.3756481659304782, "percentage": 47.51, "elapsed_time": "1:37:42", "remaining_time": "1:47:56", "throughput": 19922.04, "total_tokens": 116790336}
|
|
{"current_steps": 37115, "total_steps": 78105, "loss": 0.1273, "lr": 3.1455545495703816e-06, "epoch": 2.375968247871455, "percentage": 47.52, "elapsed_time": "1:37:43", "remaining_time": "1:47:55", "throughput": 19922.5, "total_tokens": 116806976}
|
|
{"current_steps": 37120, "total_steps": 78105, "loss": 0.312, "lr": 3.1450148274375523e-06, "epoch": 2.376288329812432, "percentage": 47.53, "elapsed_time": "1:37:43", "remaining_time": "1:47:54", "throughput": 19922.88, "total_tokens": 116822912}
|
|
{"current_steps": 37125, "total_steps": 78105, "loss": 0.2116, "lr": 3.1444750730960415e-06, "epoch": 2.3766084117534088, "percentage": 47.53, "elapsed_time": "1:37:44", "remaining_time": "1:47:53", "throughput": 19923.24, "total_tokens": 116838272}
|
|
{"current_steps": 37130, "total_steps": 78105, "loss": 0.1827, "lr": 3.1439352865728035e-06, "epoch": 2.3769284936943857, "percentage": 47.54, "elapsed_time": "1:37:45", "remaining_time": "1:47:52", "throughput": 19923.69, "total_tokens": 116855040}
|
|
{"current_steps": 37135, "total_steps": 78105, "loss": 0.2978, "lr": 3.1433954678947916e-06, "epoch": 2.3772485756353627, "percentage": 47.54, "elapsed_time": "1:37:45", "remaining_time": "1:47:51", "throughput": 19924.01, "total_tokens": 116869888}
|
|
{"current_steps": 37140, "total_steps": 78105, "loss": 0.2941, "lr": 3.1428556170889607e-06, "epoch": 2.3775686575763397, "percentage": 47.55, "elapsed_time": "1:37:46", "remaining_time": "1:47:50", "throughput": 19924.33, "total_tokens": 116884736}
|
|
{"current_steps": 37145, "total_steps": 78105, "loss": 0.2497, "lr": 3.1423157341822693e-06, "epoch": 2.3778887395173163, "percentage": 47.56, "elapsed_time": "1:37:47", "remaining_time": "1:47:49", "throughput": 19924.68, "total_tokens": 116900032}
|
|
{"current_steps": 37150, "total_steps": 78105, "loss": 0.1966, "lr": 3.141775819201675e-06, "epoch": 2.3782088214582933, "percentage": 47.56, "elapsed_time": "1:37:47", "remaining_time": "1:47:48", "throughput": 19925.06, "total_tokens": 116915712}
|
|
{"current_steps": 37155, "total_steps": 78105, "loss": 0.3116, "lr": 3.14123587217414e-06, "epoch": 2.3785289033992703, "percentage": 47.57, "elapsed_time": "1:37:48", "remaining_time": "1:47:47", "throughput": 19925.38, "total_tokens": 116930560}
|
|
{"current_steps": 37160, "total_steps": 78105, "loss": 0.181, "lr": 3.140695893126625e-06, "epoch": 2.3788489853402472, "percentage": 47.58, "elapsed_time": "1:37:49", "remaining_time": "1:47:46", "throughput": 19925.76, "total_tokens": 116946496}
|
|
{"current_steps": 37165, "total_steps": 78105, "loss": 0.1874, "lr": 3.140155882086095e-06, "epoch": 2.3791690672812242, "percentage": 47.58, "elapsed_time": "1:37:49", "remaining_time": "1:47:46", "throughput": 19926.13, "total_tokens": 116962112}
|
|
{"current_steps": 37170, "total_steps": 78105, "loss": 0.2267, "lr": 3.1396158390795143e-06, "epoch": 2.379489149222201, "percentage": 47.59, "elapsed_time": "1:37:50", "remaining_time": "1:47:45", "throughput": 19926.61, "total_tokens": 116978880}
|
|
{"current_steps": 37175, "total_steps": 78105, "loss": 0.3445, "lr": 3.1390757641338497e-06, "epoch": 2.3798092311631778, "percentage": 47.6, "elapsed_time": "1:37:51", "remaining_time": "1:47:44", "throughput": 19926.95, "total_tokens": 116993920}
|
|
{"current_steps": 37180, "total_steps": 78105, "loss": 0.3064, "lr": 3.138535657276071e-06, "epoch": 2.3801293131041548, "percentage": 47.6, "elapsed_time": "1:37:51", "remaining_time": "1:47:43", "throughput": 19927.29, "total_tokens": 117009280}
|
|
{"current_steps": 37185, "total_steps": 78105, "loss": 0.1395, "lr": 3.1379955185331474e-06, "epoch": 2.3804493950451313, "percentage": 47.61, "elapsed_time": "1:37:52", "remaining_time": "1:47:42", "throughput": 19927.62, "total_tokens": 117024128}
|
|
{"current_steps": 37190, "total_steps": 78105, "loss": 0.175, "lr": 3.137455347932051e-06, "epoch": 2.3807694769861083, "percentage": 47.62, "elapsed_time": "1:37:53", "remaining_time": "1:47:41", "throughput": 19927.93, "total_tokens": 117039104}
|
|
{"current_steps": 37195, "total_steps": 78105, "loss": 0.2272, "lr": 3.1369151454997547e-06, "epoch": 2.3810895589270853, "percentage": 47.62, "elapsed_time": "1:37:53", "remaining_time": "1:47:40", "throughput": 19928.28, "total_tokens": 117054016}
|
|
{"current_steps": 37200, "total_steps": 78105, "loss": 0.3674, "lr": 3.1363749112632336e-06, "epoch": 2.3814096408680623, "percentage": 47.63, "elapsed_time": "1:37:54", "remaining_time": "1:47:39", "throughput": 19928.62, "total_tokens": 117068992}
|
|
{"current_steps": 37205, "total_steps": 78105, "loss": 0.1852, "lr": 3.1358346452494647e-06, "epoch": 2.3817297228090393, "percentage": 47.63, "elapsed_time": "1:37:55", "remaining_time": "1:47:38", "throughput": 19929.05, "total_tokens": 117085312}
|
|
{"current_steps": 37210, "total_steps": 78105, "loss": 0.1488, "lr": 3.1352943474854246e-06, "epoch": 2.382049804750016, "percentage": 47.64, "elapsed_time": "1:37:55", "remaining_time": "1:47:37", "throughput": 19929.38, "total_tokens": 117100288}
|
|
{"current_steps": 37215, "total_steps": 78105, "loss": 0.262, "lr": 3.1347540179980945e-06, "epoch": 2.382369886690993, "percentage": 47.65, "elapsed_time": "1:37:56", "remaining_time": "1:47:36", "throughput": 19929.77, "total_tokens": 117116352}
|
|
{"current_steps": 37220, "total_steps": 78105, "loss": 0.1892, "lr": 3.1342136568144556e-06, "epoch": 2.38268996863197, "percentage": 47.65, "elapsed_time": "1:37:57", "remaining_time": "1:47:35", "throughput": 19930.17, "total_tokens": 117132160}
|
|
{"current_steps": 37225, "total_steps": 78105, "loss": 0.2432, "lr": 3.1336732639614894e-06, "epoch": 2.383010050572947, "percentage": 47.66, "elapsed_time": "1:37:57", "remaining_time": "1:47:34", "throughput": 19930.51, "total_tokens": 117147456}
|
|
{"current_steps": 37230, "total_steps": 78105, "loss": 0.319, "lr": 3.133132839466181e-06, "epoch": 2.3833301325139233, "percentage": 47.67, "elapsed_time": "1:37:58", "remaining_time": "1:47:34", "throughput": 19930.87, "total_tokens": 117163008}
|
|
{"current_steps": 37235, "total_steps": 78105, "loss": 0.1343, "lr": 3.1325923833555165e-06, "epoch": 2.3836502144549003, "percentage": 47.67, "elapsed_time": "1:37:59", "remaining_time": "1:47:33", "throughput": 19931.25, "total_tokens": 117178752}
|
|
{"current_steps": 37240, "total_steps": 78105, "loss": 0.1841, "lr": 3.1320518956564833e-06, "epoch": 2.3839702963958773, "percentage": 47.68, "elapsed_time": "1:37:59", "remaining_time": "1:47:32", "throughput": 19931.63, "total_tokens": 117194304}
|
|
{"current_steps": 37245, "total_steps": 78105, "loss": 0.2999, "lr": 3.1315113763960707e-06, "epoch": 2.3842903783368543, "percentage": 47.69, "elapsed_time": "1:38:00", "remaining_time": "1:47:31", "throughput": 19931.94, "total_tokens": 117209408}
|
|
{"current_steps": 37250, "total_steps": 78105, "loss": 0.1687, "lr": 3.13097082560127e-06, "epoch": 2.3846104602778313, "percentage": 47.69, "elapsed_time": "1:38:01", "remaining_time": "1:47:30", "throughput": 19932.36, "total_tokens": 117225408}
|
|
{"current_steps": 37255, "total_steps": 78105, "loss": 0.3064, "lr": 3.1304302432990728e-06, "epoch": 2.384930542218808, "percentage": 47.7, "elapsed_time": "1:38:01", "remaining_time": "1:47:29", "throughput": 19932.66, "total_tokens": 117240128}
|
|
{"current_steps": 37260, "total_steps": 78105, "loss": 0.2303, "lr": 3.1298896295164727e-06, "epoch": 2.385250624159785, "percentage": 47.71, "elapsed_time": "1:38:02", "remaining_time": "1:47:28", "throughput": 19933.04, "total_tokens": 117255808}
|
|
{"current_steps": 37265, "total_steps": 78105, "loss": 0.1998, "lr": 3.1293489842804654e-06, "epoch": 2.385570706100762, "percentage": 47.71, "elapsed_time": "1:38:03", "remaining_time": "1:47:27", "throughput": 19933.57, "total_tokens": 117273600}
|
|
{"current_steps": 37270, "total_steps": 78105, "loss": 0.2555, "lr": 3.128808307618048e-06, "epoch": 2.385890788041739, "percentage": 47.72, "elapsed_time": "1:38:04", "remaining_time": "1:47:27", "throughput": 19932.79, "total_tokens": 117291136}
|
|
{"current_steps": 37275, "total_steps": 78105, "loss": 0.262, "lr": 3.1282675995562195e-06, "epoch": 2.3862108699827154, "percentage": 47.72, "elapsed_time": "1:38:05", "remaining_time": "1:47:26", "throughput": 19933.12, "total_tokens": 117306560}
|
|
{"current_steps": 37280, "total_steps": 78105, "loss": 0.3019, "lr": 3.1277268601219797e-06, "epoch": 2.3865309519236924, "percentage": 47.73, "elapsed_time": "1:38:05", "remaining_time": "1:47:25", "throughput": 19933.5, "total_tokens": 117322176}
|
|
{"current_steps": 37285, "total_steps": 78105, "loss": 0.3054, "lr": 3.12718608934233e-06, "epoch": 2.3868510338646693, "percentage": 47.74, "elapsed_time": "1:38:06", "remaining_time": "1:47:24", "throughput": 19933.96, "total_tokens": 117339072}
|
|
{"current_steps": 37290, "total_steps": 78105, "loss": 0.1775, "lr": 3.126645287244273e-06, "epoch": 2.3871711158056463, "percentage": 47.74, "elapsed_time": "1:38:07", "remaining_time": "1:47:24", "throughput": 19934.87, "total_tokens": 117367936}
|
|
{"current_steps": 37295, "total_steps": 78105, "loss": 0.2964, "lr": 3.126104453854815e-06, "epoch": 2.3874911977466233, "percentage": 47.75, "elapsed_time": "1:38:08", "remaining_time": "1:47:23", "throughput": 19935.26, "total_tokens": 117383680}
|
|
{"current_steps": 37300, "total_steps": 78105, "loss": 0.2318, "lr": 3.1255635892009617e-06, "epoch": 2.3878112796876, "percentage": 47.76, "elapsed_time": "1:38:08", "remaining_time": "1:47:22", "throughput": 19935.68, "total_tokens": 117399744}
|
|
{"current_steps": 37305, "total_steps": 78105, "loss": 0.2093, "lr": 3.1250226933097216e-06, "epoch": 2.388131361628577, "percentage": 47.76, "elapsed_time": "1:38:10", "remaining_time": "1:47:21", "throughput": 19934.58, "total_tokens": 117415872}
|
|
{"current_steps": 37310, "total_steps": 78105, "loss": 0.2072, "lr": 3.1244817662081038e-06, "epoch": 2.388451443569554, "percentage": 47.77, "elapsed_time": "1:38:10", "remaining_time": "1:47:20", "throughput": 19934.95, "total_tokens": 117430976}
|
|
{"current_steps": 37315, "total_steps": 78105, "loss": 0.2696, "lr": 3.12394080792312e-06, "epoch": 2.388771525510531, "percentage": 47.78, "elapsed_time": "1:38:11", "remaining_time": "1:47:20", "throughput": 19935.35, "total_tokens": 117447040}
|
|
{"current_steps": 37320, "total_steps": 78105, "loss": 0.3026, "lr": 3.1233998184817813e-06, "epoch": 2.3890916074515074, "percentage": 47.78, "elapsed_time": "1:38:12", "remaining_time": "1:47:19", "throughput": 19935.69, "total_tokens": 117462144}
|
|
{"current_steps": 37325, "total_steps": 78105, "loss": 0.2848, "lr": 3.1228587979111027e-06, "epoch": 2.3894116893924844, "percentage": 47.79, "elapsed_time": "1:38:12", "remaining_time": "1:47:18", "throughput": 19936.02, "total_tokens": 117477056}
|
|
{"current_steps": 37330, "total_steps": 78105, "loss": 0.26, "lr": 3.1223177462381005e-06, "epoch": 2.3897317713334614, "percentage": 47.79, "elapsed_time": "1:38:13", "remaining_time": "1:47:17", "throughput": 19936.36, "total_tokens": 117492224}
|
|
{"current_steps": 37335, "total_steps": 78105, "loss": 0.3487, "lr": 3.1217766634897918e-06, "epoch": 2.3900518532744384, "percentage": 47.8, "elapsed_time": "1:38:14", "remaining_time": "1:47:16", "throughput": 19936.69, "total_tokens": 117507456}
|
|
{"current_steps": 37340, "total_steps": 78105, "loss": 0.2269, "lr": 3.121235549693195e-06, "epoch": 2.3903719352154154, "percentage": 47.81, "elapsed_time": "1:38:14", "remaining_time": "1:47:15", "throughput": 19936.98, "total_tokens": 117521728}
|
|
{"current_steps": 37345, "total_steps": 78105, "loss": 0.2479, "lr": 3.1206944048753307e-06, "epoch": 2.390692017156392, "percentage": 47.81, "elapsed_time": "1:38:15", "remaining_time": "1:47:14", "throughput": 19937.3, "total_tokens": 117536640}
|
|
{"current_steps": 37350, "total_steps": 78105, "loss": 0.1789, "lr": 3.1201532290632207e-06, "epoch": 2.391012099097369, "percentage": 47.82, "elapsed_time": "1:38:15", "remaining_time": "1:47:13", "throughput": 19937.68, "total_tokens": 117552512}
|
|
{"current_steps": 37355, "total_steps": 78105, "loss": 0.2537, "lr": 3.119612022283889e-06, "epoch": 2.391332181038346, "percentage": 47.83, "elapsed_time": "1:38:16", "remaining_time": "1:47:12", "throughput": 19938.0, "total_tokens": 117567424}
|
|
{"current_steps": 37360, "total_steps": 78105, "loss": 0.28, "lr": 3.1190707845643605e-06, "epoch": 2.391652262979323, "percentage": 47.83, "elapsed_time": "1:38:17", "remaining_time": "1:47:11", "throughput": 19938.37, "total_tokens": 117583424}
|
|
{"current_steps": 37365, "total_steps": 78105, "loss": 0.1772, "lr": 3.1185295159316608e-06, "epoch": 2.3919723449202994, "percentage": 47.84, "elapsed_time": "1:38:18", "remaining_time": "1:47:10", "throughput": 19938.76, "total_tokens": 117599424}
|
|
{"current_steps": 37370, "total_steps": 78105, "loss": 0.2561, "lr": 3.1179882164128202e-06, "epoch": 2.3922924268612764, "percentage": 47.85, "elapsed_time": "1:38:18", "remaining_time": "1:47:09", "throughput": 19939.11, "total_tokens": 117614720}
|
|
{"current_steps": 37375, "total_steps": 78105, "loss": 0.177, "lr": 3.117446886034866e-06, "epoch": 2.3926125088022534, "percentage": 47.85, "elapsed_time": "1:38:19", "remaining_time": "1:47:08", "throughput": 19939.42, "total_tokens": 117629504}
|
|
{"current_steps": 37380, "total_steps": 78105, "loss": 0.1672, "lr": 3.116905524824831e-06, "epoch": 2.3929325907432304, "percentage": 47.86, "elapsed_time": "1:38:20", "remaining_time": "1:47:08", "throughput": 19939.8, "total_tokens": 117645760}
|
|
{"current_steps": 37385, "total_steps": 78105, "loss": 0.2088, "lr": 3.1163641328097472e-06, "epoch": 2.3932526726842074, "percentage": 47.87, "elapsed_time": "1:38:20", "remaining_time": "1:47:07", "throughput": 19940.13, "total_tokens": 117660480}
|
|
{"current_steps": 37390, "total_steps": 78105, "loss": 0.265, "lr": 3.115822710016649e-06, "epoch": 2.393572754625184, "percentage": 47.87, "elapsed_time": "1:38:21", "remaining_time": "1:47:06", "throughput": 19940.5, "total_tokens": 117676224}
|
|
{"current_steps": 37395, "total_steps": 78105, "loss": 0.3185, "lr": 3.115281256472573e-06, "epoch": 2.393892836566161, "percentage": 47.88, "elapsed_time": "1:38:22", "remaining_time": "1:47:05", "throughput": 19940.82, "total_tokens": 117690880}
|
|
{"current_steps": 37400, "total_steps": 78105, "loss": 0.2079, "lr": 3.1147397722045558e-06, "epoch": 2.394212918507138, "percentage": 47.88, "elapsed_time": "1:38:22", "remaining_time": "1:47:04", "throughput": 19941.16, "total_tokens": 117706240}
|
|
{"current_steps": 37405, "total_steps": 78105, "loss": 0.2328, "lr": 3.1141982572396355e-06, "epoch": 2.394533000448115, "percentage": 47.89, "elapsed_time": "1:38:23", "remaining_time": "1:47:03", "throughput": 19941.49, "total_tokens": 117721536}
|
|
{"current_steps": 37410, "total_steps": 78105, "loss": 0.273, "lr": 3.1136567116048545e-06, "epoch": 2.3948530823890914, "percentage": 47.9, "elapsed_time": "1:38:23", "remaining_time": "1:47:02", "throughput": 19941.78, "total_tokens": 117735936}
|
|
{"current_steps": 37415, "total_steps": 78105, "loss": 0.2035, "lr": 3.1131151353272527e-06, "epoch": 2.3951731643300684, "percentage": 47.9, "elapsed_time": "1:38:24", "remaining_time": "1:47:01", "throughput": 19942.23, "total_tokens": 117752384}
|
|
{"current_steps": 37420, "total_steps": 78105, "loss": 0.157, "lr": 3.1125735284338754e-06, "epoch": 2.3954932462710454, "percentage": 47.91, "elapsed_time": "1:38:25", "remaining_time": "1:47:00", "throughput": 19942.62, "total_tokens": 117768128}
|
|
{"current_steps": 37425, "total_steps": 78105, "loss": 0.2292, "lr": 3.112031890951767e-06, "epoch": 2.3958133282120224, "percentage": 47.92, "elapsed_time": "1:38:26", "remaining_time": "1:46:59", "throughput": 19943.02, "total_tokens": 117784128}
|
|
{"current_steps": 37430, "total_steps": 78105, "loss": 0.1662, "lr": 3.1114902229079737e-06, "epoch": 2.3961334101529994, "percentage": 47.92, "elapsed_time": "1:38:26", "remaining_time": "1:46:58", "throughput": 19943.32, "total_tokens": 117798848}
|
|
{"current_steps": 37435, "total_steps": 78105, "loss": 0.195, "lr": 3.110948524329544e-06, "epoch": 2.396453492093976, "percentage": 47.93, "elapsed_time": "1:38:27", "remaining_time": "1:46:57", "throughput": 19943.68, "total_tokens": 117814720}
|
|
{"current_steps": 37440, "total_steps": 78105, "loss": 0.225, "lr": 3.1104067952435267e-06, "epoch": 2.396773574034953, "percentage": 47.94, "elapsed_time": "1:38:28", "remaining_time": "1:46:56", "throughput": 19944.02, "total_tokens": 117830016}
|
|
{"current_steps": 37445, "total_steps": 78105, "loss": 0.1745, "lr": 3.1098650356769745e-06, "epoch": 2.39709365597593, "percentage": 47.94, "elapsed_time": "1:38:28", "remaining_time": "1:46:56", "throughput": 19944.44, "total_tokens": 117846272}
|
|
{"current_steps": 37450, "total_steps": 78105, "loss": 0.1957, "lr": 3.1093232456569384e-06, "epoch": 2.3974137379169065, "percentage": 47.95, "elapsed_time": "1:38:29", "remaining_time": "1:46:55", "throughput": 19944.74, "total_tokens": 117860736}
|
|
{"current_steps": 37455, "total_steps": 78105, "loss": 0.2226, "lr": 3.108781425210473e-06, "epoch": 2.3977338198578835, "percentage": 47.95, "elapsed_time": "1:38:29", "remaining_time": "1:46:54", "throughput": 19945.02, "total_tokens": 117875008}
|
|
{"current_steps": 37460, "total_steps": 78105, "loss": 0.2238, "lr": 3.1082395743646353e-06, "epoch": 2.3980539017988605, "percentage": 47.96, "elapsed_time": "1:38:30", "remaining_time": "1:46:53", "throughput": 19945.55, "total_tokens": 117892736}
|
|
{"current_steps": 37465, "total_steps": 78105, "loss": 0.2795, "lr": 3.1076976931464803e-06, "epoch": 2.3983739837398375, "percentage": 47.97, "elapsed_time": "1:38:31", "remaining_time": "1:46:52", "throughput": 19945.93, "total_tokens": 117908352}
|
|
{"current_steps": 37470, "total_steps": 78105, "loss": 0.1588, "lr": 3.107155781583068e-06, "epoch": 2.3986940656808144, "percentage": 47.97, "elapsed_time": "1:38:32", "remaining_time": "1:46:51", "throughput": 19946.34, "total_tokens": 117924672}
|
|
{"current_steps": 37475, "total_steps": 78105, "loss": 0.1354, "lr": 3.106613839701459e-06, "epoch": 2.399014147621791, "percentage": 47.98, "elapsed_time": "1:38:32", "remaining_time": "1:46:50", "throughput": 19946.7, "total_tokens": 117940160}
|
|
{"current_steps": 37480, "total_steps": 78105, "loss": 0.2088, "lr": 3.106071867528715e-06, "epoch": 2.399334229562768, "percentage": 47.99, "elapsed_time": "1:38:33", "remaining_time": "1:46:49", "throughput": 19947.04, "total_tokens": 117955136}
|
|
{"current_steps": 37485, "total_steps": 78105, "loss": 0.2431, "lr": 3.105529865091898e-06, "epoch": 2.399654311503745, "percentage": 47.99, "elapsed_time": "1:38:34", "remaining_time": "1:46:48", "throughput": 19947.36, "total_tokens": 117969856}
|
|
{"current_steps": 37490, "total_steps": 78105, "loss": 0.1642, "lr": 3.1049878324180745e-06, "epoch": 2.399974393444722, "percentage": 48.0, "elapsed_time": "1:38:34", "remaining_time": "1:46:47", "throughput": 19947.66, "total_tokens": 117984704}
|
|
{"current_steps": 37495, "total_steps": 78105, "loss": 0.2134, "lr": 3.104445769534309e-06, "epoch": 2.4002944753856985, "percentage": 48.01, "elapsed_time": "1:38:35", "remaining_time": "1:46:46", "throughput": 19948.02, "total_tokens": 118000128}
|
|
{"current_steps": 37500, "total_steps": 78105, "loss": 0.1757, "lr": 3.1039036764676704e-06, "epoch": 2.4006145573266755, "percentage": 48.01, "elapsed_time": "1:38:36", "remaining_time": "1:46:45", "throughput": 19948.45, "total_tokens": 118016640}
|
|
{"current_steps": 37505, "total_steps": 78105, "loss": 0.3412, "lr": 3.1033615532452283e-06, "epoch": 2.4009346392676525, "percentage": 48.02, "elapsed_time": "1:38:36", "remaining_time": "1:46:45", "throughput": 19948.79, "total_tokens": 118031872}
|
|
{"current_steps": 37510, "total_steps": 78105, "loss": 0.2326, "lr": 3.1028193998940525e-06, "epoch": 2.4012547212086295, "percentage": 48.03, "elapsed_time": "1:38:37", "remaining_time": "1:46:44", "throughput": 19949.11, "total_tokens": 118046848}
|
|
{"current_steps": 37515, "total_steps": 78105, "loss": 0.2026, "lr": 3.1022772164412168e-06, "epoch": 2.4015748031496065, "percentage": 48.03, "elapsed_time": "1:38:38", "remaining_time": "1:46:43", "throughput": 19949.51, "total_tokens": 118062912}
|
|
{"current_steps": 37520, "total_steps": 78105, "loss": 0.214, "lr": 3.101735002913793e-06, "epoch": 2.401894885090583, "percentage": 48.04, "elapsed_time": "1:38:38", "remaining_time": "1:46:42", "throughput": 19949.99, "total_tokens": 118080128}
|
|
{"current_steps": 37525, "total_steps": 78105, "loss": 0.2351, "lr": 3.101192759338858e-06, "epoch": 2.40221496703156, "percentage": 48.04, "elapsed_time": "1:38:39", "remaining_time": "1:46:41", "throughput": 19950.36, "total_tokens": 118096064}
|
|
{"current_steps": 37530, "total_steps": 78105, "loss": 0.1285, "lr": 3.100650485743487e-06, "epoch": 2.402535048972537, "percentage": 48.05, "elapsed_time": "1:38:40", "remaining_time": "1:46:40", "throughput": 19950.84, "total_tokens": 118113152}
|
|
{"current_steps": 37535, "total_steps": 78105, "loss": 0.3104, "lr": 3.1001081821547605e-06, "epoch": 2.402855130913514, "percentage": 48.06, "elapsed_time": "1:38:40", "remaining_time": "1:46:39", "throughput": 19951.16, "total_tokens": 118128000}
|
|
{"current_steps": 37540, "total_steps": 78105, "loss": 0.2372, "lr": 3.0995658485997575e-06, "epoch": 2.4031752128544905, "percentage": 48.06, "elapsed_time": "1:38:41", "remaining_time": "1:46:38", "throughput": 19951.7, "total_tokens": 118145728}
|
|
{"current_steps": 37545, "total_steps": 78105, "loss": 0.1751, "lr": 3.099023485105559e-06, "epoch": 2.4034952947954675, "percentage": 48.07, "elapsed_time": "1:38:42", "remaining_time": "1:46:37", "throughput": 19952.06, "total_tokens": 118161408}
|
|
{"current_steps": 37550, "total_steps": 78105, "loss": 0.239, "lr": 3.0984810916992475e-06, "epoch": 2.4038153767364445, "percentage": 48.08, "elapsed_time": "1:38:42", "remaining_time": "1:46:36", "throughput": 19952.42, "total_tokens": 118176896}
|
|
{"current_steps": 37555, "total_steps": 78105, "loss": 0.2408, "lr": 3.097938668407907e-06, "epoch": 2.4041354586774215, "percentage": 48.08, "elapsed_time": "1:38:43", "remaining_time": "1:46:35", "throughput": 19952.71, "total_tokens": 118191360}
|
|
{"current_steps": 37560, "total_steps": 78105, "loss": 0.2011, "lr": 3.0973962152586247e-06, "epoch": 2.4044555406183985, "percentage": 48.09, "elapsed_time": "1:38:44", "remaining_time": "1:46:35", "throughput": 19953.14, "total_tokens": 118207936}
|
|
{"current_steps": 37565, "total_steps": 78105, "loss": 0.2181, "lr": 3.096853732278487e-06, "epoch": 2.404775622559375, "percentage": 48.1, "elapsed_time": "1:38:44", "remaining_time": "1:46:34", "throughput": 19953.51, "total_tokens": 118223744}
|
|
{"current_steps": 37570, "total_steps": 78105, "loss": 0.1763, "lr": 3.0963112194945834e-06, "epoch": 2.405095704500352, "percentage": 48.1, "elapsed_time": "1:38:45", "remaining_time": "1:46:33", "throughput": 19953.91, "total_tokens": 118240064}
|
|
{"current_steps": 37575, "total_steps": 78105, "loss": 0.1374, "lr": 3.095768676934003e-06, "epoch": 2.405415786441329, "percentage": 48.11, "elapsed_time": "1:38:46", "remaining_time": "1:46:32", "throughput": 19954.25, "total_tokens": 118255424}
|
|
{"current_steps": 37580, "total_steps": 78105, "loss": 0.1365, "lr": 3.0952261046238375e-06, "epoch": 2.405735868382306, "percentage": 48.11, "elapsed_time": "1:38:46", "remaining_time": "1:46:31", "throughput": 19954.59, "total_tokens": 118270464}
|
|
{"current_steps": 37585, "total_steps": 78105, "loss": 0.178, "lr": 3.094683502591181e-06, "epoch": 2.4060559503232826, "percentage": 48.12, "elapsed_time": "1:38:47", "remaining_time": "1:46:30", "throughput": 19954.95, "total_tokens": 118285888}
|
|
{"current_steps": 37590, "total_steps": 78105, "loss": 0.2249, "lr": 3.0941408708631282e-06, "epoch": 2.4063760322642596, "percentage": 48.13, "elapsed_time": "1:38:48", "remaining_time": "1:46:29", "throughput": 19955.46, "total_tokens": 118303616}
|
|
{"current_steps": 37595, "total_steps": 78105, "loss": 0.1757, "lr": 3.0935982094667754e-06, "epoch": 2.4066961142052365, "percentage": 48.13, "elapsed_time": "1:38:49", "remaining_time": "1:46:28", "throughput": 19955.86, "total_tokens": 118319936}
|
|
{"current_steps": 37600, "total_steps": 78105, "loss": 0.2439, "lr": 3.09305551842922e-06, "epoch": 2.4070161961462135, "percentage": 48.14, "elapsed_time": "1:38:49", "remaining_time": "1:46:27", "throughput": 19956.16, "total_tokens": 118334848}
|
|
{"current_steps": 37605, "total_steps": 78105, "loss": 0.2724, "lr": 3.0925127977775608e-06, "epoch": 2.4073362780871905, "percentage": 48.15, "elapsed_time": "1:38:50", "remaining_time": "1:46:26", "throughput": 19956.51, "total_tokens": 118350592}
|
|
{"current_steps": 37610, "total_steps": 78105, "loss": 0.2555, "lr": 3.0919700475388985e-06, "epoch": 2.407656360028167, "percentage": 48.15, "elapsed_time": "1:38:51", "remaining_time": "1:46:26", "throughput": 19956.97, "total_tokens": 118367488}
|
|
{"current_steps": 37615, "total_steps": 78105, "loss": 0.1574, "lr": 3.091427267740336e-06, "epoch": 2.407976441969144, "percentage": 48.16, "elapsed_time": "1:38:51", "remaining_time": "1:46:25", "throughput": 19957.32, "total_tokens": 118382720}
|
|
{"current_steps": 37620, "total_steps": 78105, "loss": 0.2141, "lr": 3.0908844584089764e-06, "epoch": 2.408296523910121, "percentage": 48.17, "elapsed_time": "1:38:52", "remaining_time": "1:46:24", "throughput": 19957.71, "total_tokens": 118399040}
|
|
{"current_steps": 37625, "total_steps": 78105, "loss": 0.1975, "lr": 3.090341619571925e-06, "epoch": 2.408616605851098, "percentage": 48.17, "elapsed_time": "1:38:53", "remaining_time": "1:46:23", "throughput": 19958.01, "total_tokens": 118413824}
|
|
{"current_steps": 37630, "total_steps": 78105, "loss": 0.2963, "lr": 3.089798751256288e-06, "epoch": 2.4089366877920746, "percentage": 48.18, "elapsed_time": "1:38:53", "remaining_time": "1:46:22", "throughput": 19958.42, "total_tokens": 118430208}
|
|
{"current_steps": 37635, "total_steps": 78105, "loss": 0.1646, "lr": 3.089255853489173e-06, "epoch": 2.4092567697330516, "percentage": 48.19, "elapsed_time": "1:38:54", "remaining_time": "1:46:21", "throughput": 19958.75, "total_tokens": 118445248}
|
|
{"current_steps": 37640, "total_steps": 78105, "loss": 0.1595, "lr": 3.08871292629769e-06, "epoch": 2.4095768516740286, "percentage": 48.19, "elapsed_time": "1:38:55", "remaining_time": "1:46:20", "throughput": 19959.37, "total_tokens": 118464896}
|
|
{"current_steps": 37645, "total_steps": 78105, "loss": 0.1514, "lr": 3.0881699697089494e-06, "epoch": 2.4098969336150056, "percentage": 48.2, "elapsed_time": "1:38:55", "remaining_time": "1:46:19", "throughput": 19959.74, "total_tokens": 118480768}
|
|
{"current_steps": 37650, "total_steps": 78105, "loss": 0.2708, "lr": 3.087626983750065e-06, "epoch": 2.4102170155559826, "percentage": 48.2, "elapsed_time": "1:38:56", "remaining_time": "1:46:18", "throughput": 19960.02, "total_tokens": 118495360}
|
|
{"current_steps": 37655, "total_steps": 78105, "loss": 0.2412, "lr": 3.0870839684481502e-06, "epoch": 2.410537097496959, "percentage": 48.21, "elapsed_time": "1:38:57", "remaining_time": "1:46:18", "throughput": 19960.36, "total_tokens": 118510848}
|
|
{"current_steps": 37660, "total_steps": 78105, "loss": 0.2465, "lr": 3.0865409238303196e-06, "epoch": 2.410857179437936, "percentage": 48.22, "elapsed_time": "1:38:57", "remaining_time": "1:46:17", "throughput": 19960.71, "total_tokens": 118526336}
|
|
{"current_steps": 37665, "total_steps": 78105, "loss": 0.1733, "lr": 3.085997849923691e-06, "epoch": 2.411177261378913, "percentage": 48.22, "elapsed_time": "1:38:58", "remaining_time": "1:46:16", "throughput": 19961.03, "total_tokens": 118541312}
|
|
{"current_steps": 37670, "total_steps": 78105, "loss": 0.2339, "lr": 3.085454746755381e-06, "epoch": 2.41149734331989, "percentage": 48.23, "elapsed_time": "1:38:59", "remaining_time": "1:46:15", "throughput": 19961.55, "total_tokens": 118558912}
|
|
{"current_steps": 37675, "total_steps": 78105, "loss": 0.3016, "lr": 3.084911614352511e-06, "epoch": 2.4118174252608666, "percentage": 48.24, "elapsed_time": "1:39:00", "remaining_time": "1:46:14", "throughput": 19961.87, "total_tokens": 118573952}
|
|
{"current_steps": 37680, "total_steps": 78105, "loss": 0.1975, "lr": 3.084368452742202e-06, "epoch": 2.4121375072018436, "percentage": 48.24, "elapsed_time": "1:39:00", "remaining_time": "1:46:13", "throughput": 19962.24, "total_tokens": 118589376}
|
|
{"current_steps": 37685, "total_steps": 78105, "loss": 0.1254, "lr": 3.0838252619515764e-06, "epoch": 2.4124575891428206, "percentage": 48.25, "elapsed_time": "1:39:01", "remaining_time": "1:46:12", "throughput": 19962.57, "total_tokens": 118604608}
|
|
{"current_steps": 37690, "total_steps": 78105, "loss": 0.2196, "lr": 3.0832820420077576e-06, "epoch": 2.4127776710837976, "percentage": 48.26, "elapsed_time": "1:39:01", "remaining_time": "1:46:11", "throughput": 19962.87, "total_tokens": 118619200}
|
|
{"current_steps": 37695, "total_steps": 78105, "loss": 0.1882, "lr": 3.0827387929378716e-06, "epoch": 2.4130977530247746, "percentage": 48.26, "elapsed_time": "1:39:02", "remaining_time": "1:46:10", "throughput": 19963.28, "total_tokens": 118635264}
|
|
{"current_steps": 37700, "total_steps": 78105, "loss": 0.3483, "lr": 3.082195514769046e-06, "epoch": 2.413417834965751, "percentage": 48.27, "elapsed_time": "1:39:03", "remaining_time": "1:46:09", "throughput": 19963.63, "total_tokens": 118651008}
|
|
{"current_steps": 37705, "total_steps": 78105, "loss": 0.2173, "lr": 3.0816522075284093e-06, "epoch": 2.413737916906728, "percentage": 48.27, "elapsed_time": "1:39:04", "remaining_time": "1:46:08", "throughput": 19964.05, "total_tokens": 118667328}
|
|
{"current_steps": 37710, "total_steps": 78105, "loss": 0.2104, "lr": 3.081108871243091e-06, "epoch": 2.414057998847705, "percentage": 48.28, "elapsed_time": "1:39:04", "remaining_time": "1:46:07", "throughput": 19964.39, "total_tokens": 118682560}
|
|
{"current_steps": 37715, "total_steps": 78105, "loss": 0.1763, "lr": 3.080565505940223e-06, "epoch": 2.4143780807886817, "percentage": 48.29, "elapsed_time": "1:39:05", "remaining_time": "1:46:07", "throughput": 19964.82, "total_tokens": 118699136}
|
|
{"current_steps": 37720, "total_steps": 78105, "loss": 0.2768, "lr": 3.080022111646937e-06, "epoch": 2.4146981627296586, "percentage": 48.29, "elapsed_time": "1:39:06", "remaining_time": "1:46:06", "throughput": 19965.25, "total_tokens": 118715712}
|
|
{"current_steps": 37725, "total_steps": 78105, "loss": 0.2486, "lr": 3.0794786883903675e-06, "epoch": 2.4150182446706356, "percentage": 48.3, "elapsed_time": "1:39:06", "remaining_time": "1:46:05", "throughput": 19965.59, "total_tokens": 118731072}
|
|
{"current_steps": 37730, "total_steps": 78105, "loss": 0.3058, "lr": 3.0789352361976515e-06, "epoch": 2.4153383266116126, "percentage": 48.31, "elapsed_time": "1:39:07", "remaining_time": "1:46:04", "throughput": 19965.99, "total_tokens": 118747008}
|
|
{"current_steps": 37735, "total_steps": 78105, "loss": 0.3504, "lr": 3.078391755095925e-06, "epoch": 2.4156584085525896, "percentage": 48.31, "elapsed_time": "1:39:08", "remaining_time": "1:46:03", "throughput": 19966.33, "total_tokens": 118762368}
|
|
{"current_steps": 37740, "total_steps": 78105, "loss": 0.1283, "lr": 3.0778482451123276e-06, "epoch": 2.415978490493566, "percentage": 48.32, "elapsed_time": "1:39:08", "remaining_time": "1:46:02", "throughput": 19966.74, "total_tokens": 118778688}
|
|
{"current_steps": 37745, "total_steps": 78105, "loss": 0.2192, "lr": 3.0773047062739987e-06, "epoch": 2.416298572434543, "percentage": 48.33, "elapsed_time": "1:39:09", "remaining_time": "1:46:01", "throughput": 19967.09, "total_tokens": 118794496}
|
|
{"current_steps": 37750, "total_steps": 78105, "loss": 0.3876, "lr": 3.076761138608079e-06, "epoch": 2.41661865437552, "percentage": 48.33, "elapsed_time": "1:39:10", "remaining_time": "1:46:00", "throughput": 19967.39, "total_tokens": 118809088}
|
|
{"current_steps": 37755, "total_steps": 78105, "loss": 0.1543, "lr": 3.076217542141713e-06, "epoch": 2.416938736316497, "percentage": 48.34, "elapsed_time": "1:39:10", "remaining_time": "1:45:59", "throughput": 19967.76, "total_tokens": 118824704}
|
|
{"current_steps": 37760, "total_steps": 78105, "loss": 0.21, "lr": 3.0756739169020434e-06, "epoch": 2.4172588182574737, "percentage": 48.35, "elapsed_time": "1:39:11", "remaining_time": "1:45:58", "throughput": 19968.15, "total_tokens": 118840704}
|
|
{"current_steps": 37765, "total_steps": 78105, "loss": 0.3376, "lr": 3.0751302629162176e-06, "epoch": 2.4175789001984507, "percentage": 48.35, "elapsed_time": "1:39:12", "remaining_time": "1:45:58", "throughput": 19968.57, "total_tokens": 118857088}
|
|
{"current_steps": 37770, "total_steps": 78105, "loss": 0.206, "lr": 3.0745865802113828e-06, "epoch": 2.4178989821394277, "percentage": 48.36, "elapsed_time": "1:39:12", "remaining_time": "1:45:57", "throughput": 19968.98, "total_tokens": 118873344}
|
|
{"current_steps": 37775, "total_steps": 78105, "loss": 0.2648, "lr": 3.0740428688146862e-06, "epoch": 2.4182190640804047, "percentage": 48.36, "elapsed_time": "1:39:13", "remaining_time": "1:45:56", "throughput": 19969.38, "total_tokens": 118889600}
|
|
{"current_steps": 37780, "total_steps": 78105, "loss": 0.2679, "lr": 3.0734991287532796e-06, "epoch": 2.4185391460213816, "percentage": 48.37, "elapsed_time": "1:39:14", "remaining_time": "1:45:55", "throughput": 19969.75, "total_tokens": 118905408}
|
|
{"current_steps": 37785, "total_steps": 78105, "loss": 0.233, "lr": 3.072955360054313e-06, "epoch": 2.418859227962358, "percentage": 48.38, "elapsed_time": "1:39:14", "remaining_time": "1:45:54", "throughput": 19970.14, "total_tokens": 118921408}
|
|
{"current_steps": 37790, "total_steps": 78105, "loss": 0.2814, "lr": 3.0724115627449403e-06, "epoch": 2.419179309903335, "percentage": 48.38, "elapsed_time": "1:39:15", "remaining_time": "1:45:53", "throughput": 19970.65, "total_tokens": 118939136}
|
|
{"current_steps": 37795, "total_steps": 78105, "loss": 0.2321, "lr": 3.0718677368523164e-06, "epoch": 2.419499391844312, "percentage": 48.39, "elapsed_time": "1:39:16", "remaining_time": "1:45:52", "throughput": 19971.05, "total_tokens": 118955200}
|
|
{"current_steps": 37800, "total_steps": 78105, "loss": 0.196, "lr": 3.0713238824035957e-06, "epoch": 2.419819473785289, "percentage": 48.4, "elapsed_time": "1:39:17", "remaining_time": "1:45:51", "throughput": 19971.43, "total_tokens": 118970624}
|
|
{"current_steps": 37805, "total_steps": 78105, "loss": 0.1438, "lr": 3.0707799994259367e-06, "epoch": 2.4201395557262657, "percentage": 48.4, "elapsed_time": "1:39:17", "remaining_time": "1:45:50", "throughput": 19971.84, "total_tokens": 118987328}
|
|
{"current_steps": 37810, "total_steps": 78105, "loss": 0.1631, "lr": 3.070236087946497e-06, "epoch": 2.4204596376672427, "percentage": 48.41, "elapsed_time": "1:39:18", "remaining_time": "1:45:50", "throughput": 19972.18, "total_tokens": 119002624}
|
|
{"current_steps": 37815, "total_steps": 78105, "loss": 0.3156, "lr": 3.0696921479924376e-06, "epoch": 2.4207797196082197, "percentage": 48.42, "elapsed_time": "1:39:19", "remaining_time": "1:45:49", "throughput": 19972.54, "total_tokens": 119018240}
|
|
{"current_steps": 37820, "total_steps": 78105, "loss": 0.1863, "lr": 3.06914817959092e-06, "epoch": 2.4210998015491967, "percentage": 48.42, "elapsed_time": "1:39:19", "remaining_time": "1:45:48", "throughput": 19972.96, "total_tokens": 119034560}
|
|
{"current_steps": 37825, "total_steps": 78105, "loss": 0.2396, "lr": 3.0686041827691066e-06, "epoch": 2.4214198834901737, "percentage": 48.43, "elapsed_time": "1:39:20", "remaining_time": "1:45:47", "throughput": 19973.43, "total_tokens": 119051712}
|
|
{"current_steps": 37830, "total_steps": 78105, "loss": 0.1676, "lr": 3.0680601575541625e-06, "epoch": 2.42173996543115, "percentage": 48.43, "elapsed_time": "1:39:21", "remaining_time": "1:45:46", "throughput": 19973.86, "total_tokens": 119067968}
|
|
{"current_steps": 37835, "total_steps": 78105, "loss": 0.2777, "lr": 3.0675161039732526e-06, "epoch": 2.422060047372127, "percentage": 48.44, "elapsed_time": "1:39:21", "remaining_time": "1:45:45", "throughput": 19974.2, "total_tokens": 119083200}
|
|
{"current_steps": 37840, "total_steps": 78105, "loss": 0.1946, "lr": 3.0669720220535447e-06, "epoch": 2.422380129313104, "percentage": 48.45, "elapsed_time": "1:39:22", "remaining_time": "1:45:44", "throughput": 19974.45, "total_tokens": 119097408}
|
|
{"current_steps": 37845, "total_steps": 78105, "loss": 0.2237, "lr": 3.0664279118222074e-06, "epoch": 2.422700211254081, "percentage": 48.45, "elapsed_time": "1:39:23", "remaining_time": "1:45:43", "throughput": 19974.76, "total_tokens": 119112448}
|
|
{"current_steps": 37850, "total_steps": 78105, "loss": 0.1967, "lr": 3.065883773306411e-06, "epoch": 2.4230202931950577, "percentage": 48.46, "elapsed_time": "1:39:23", "remaining_time": "1:45:42", "throughput": 19975.12, "total_tokens": 119127680}
|
|
{"current_steps": 37855, "total_steps": 78105, "loss": 0.1469, "lr": 3.065339606533326e-06, "epoch": 2.4233403751360347, "percentage": 48.47, "elapsed_time": "1:39:24", "remaining_time": "1:45:41", "throughput": 19975.49, "total_tokens": 119143232}
|
|
{"current_steps": 37860, "total_steps": 78105, "loss": 0.2169, "lr": 3.064795411530126e-06, "epoch": 2.4236604570770117, "percentage": 48.47, "elapsed_time": "1:39:25", "remaining_time": "1:45:40", "throughput": 19975.82, "total_tokens": 119158400}
|
|
{"current_steps": 37865, "total_steps": 78105, "loss": 0.164, "lr": 3.0642511883239846e-06, "epoch": 2.4239805390179887, "percentage": 48.48, "elapsed_time": "1:39:25", "remaining_time": "1:45:39", "throughput": 19976.16, "total_tokens": 119173696}
|
|
{"current_steps": 37870, "total_steps": 78105, "loss": 0.3497, "lr": 3.063706936942078e-06, "epoch": 2.4243006209589657, "percentage": 48.49, "elapsed_time": "1:39:26", "remaining_time": "1:45:39", "throughput": 19976.55, "total_tokens": 119189568}
|
|
{"current_steps": 37875, "total_steps": 78105, "loss": 0.2508, "lr": 3.063162657411583e-06, "epoch": 2.4246207028999422, "percentage": 48.49, "elapsed_time": "1:39:27", "remaining_time": "1:45:38", "throughput": 19976.89, "total_tokens": 119204992}
|
|
{"current_steps": 37880, "total_steps": 78105, "loss": 0.2146, "lr": 3.062618349759679e-06, "epoch": 2.4249407848409192, "percentage": 48.5, "elapsed_time": "1:39:27", "remaining_time": "1:45:37", "throughput": 19977.24, "total_tokens": 119220224}
|
|
{"current_steps": 37885, "total_steps": 78105, "loss": 0.2724, "lr": 3.062074014013545e-06, "epoch": 2.4252608667818962, "percentage": 48.51, "elapsed_time": "1:39:28", "remaining_time": "1:45:36", "throughput": 19977.51, "total_tokens": 119234432}
|
|
{"current_steps": 37890, "total_steps": 78105, "loss": 0.1117, "lr": 3.0615296502003627e-06, "epoch": 2.425580948722873, "percentage": 48.51, "elapsed_time": "1:39:29", "remaining_time": "1:45:35", "throughput": 19977.82, "total_tokens": 119249088}
|
|
{"current_steps": 37895, "total_steps": 78105, "loss": 0.1979, "lr": 3.060985258347314e-06, "epoch": 2.4259010306638498, "percentage": 48.52, "elapsed_time": "1:39:29", "remaining_time": "1:45:34", "throughput": 19978.15, "total_tokens": 119264064}
|
|
{"current_steps": 37900, "total_steps": 78105, "loss": 0.2217, "lr": 3.0604408384815836e-06, "epoch": 2.4262211126048268, "percentage": 48.52, "elapsed_time": "1:39:30", "remaining_time": "1:45:33", "throughput": 19978.45, "total_tokens": 119278592}
|
|
{"current_steps": 37905, "total_steps": 78105, "loss": 0.2541, "lr": 3.0598963906303576e-06, "epoch": 2.4265411945458037, "percentage": 48.53, "elapsed_time": "1:39:31", "remaining_time": "1:45:32", "throughput": 19978.74, "total_tokens": 119293440}
|
|
{"current_steps": 37910, "total_steps": 78105, "loss": 0.2013, "lr": 3.0593519148208217e-06, "epoch": 2.4268612764867807, "percentage": 48.54, "elapsed_time": "1:39:31", "remaining_time": "1:45:31", "throughput": 19979.07, "total_tokens": 119308544}
|
|
{"current_steps": 37915, "total_steps": 78105, "loss": 0.2045, "lr": 3.058807411080165e-06, "epoch": 2.4271813584277577, "percentage": 48.54, "elapsed_time": "1:39:32", "remaining_time": "1:45:30", "throughput": 19979.49, "total_tokens": 119324928}
|
|
{"current_steps": 37920, "total_steps": 78105, "loss": 0.3166, "lr": 3.0582628794355775e-06, "epoch": 2.4275014403687343, "percentage": 48.55, "elapsed_time": "1:39:33", "remaining_time": "1:45:29", "throughput": 19979.8, "total_tokens": 119339712}
|
|
{"current_steps": 37925, "total_steps": 78105, "loss": 0.2321, "lr": 3.057718319914248e-06, "epoch": 2.4278215223097113, "percentage": 48.56, "elapsed_time": "1:39:33", "remaining_time": "1:45:28", "throughput": 19980.21, "total_tokens": 119356096}
|
|
{"current_steps": 37930, "total_steps": 78105, "loss": 0.1764, "lr": 3.057173732543372e-06, "epoch": 2.4281416042506883, "percentage": 48.56, "elapsed_time": "1:39:34", "remaining_time": "1:45:28", "throughput": 19980.67, "total_tokens": 119373056}
|
|
{"current_steps": 37935, "total_steps": 78105, "loss": 0.2173, "lr": 3.056629117350141e-06, "epoch": 2.4284616861916652, "percentage": 48.57, "elapsed_time": "1:39:35", "remaining_time": "1:45:27", "throughput": 19981.04, "total_tokens": 119388672}
|
|
{"current_steps": 37940, "total_steps": 78105, "loss": 0.3087, "lr": 3.056084474361752e-06, "epoch": 2.428781768132642, "percentage": 48.58, "elapsed_time": "1:39:35", "remaining_time": "1:45:26", "throughput": 19981.37, "total_tokens": 119404032}
|
|
{"current_steps": 37945, "total_steps": 78105, "loss": 0.1567, "lr": 3.0555398036054006e-06, "epoch": 2.429101850073619, "percentage": 48.58, "elapsed_time": "1:39:36", "remaining_time": "1:45:25", "throughput": 19981.63, "total_tokens": 119418368}
|
|
{"current_steps": 37950, "total_steps": 78105, "loss": 0.2722, "lr": 3.054995105108286e-06, "epoch": 2.4294219320145958, "percentage": 48.59, "elapsed_time": "1:39:37", "remaining_time": "1:45:24", "throughput": 19981.94, "total_tokens": 119433088}
|
|
{"current_steps": 37955, "total_steps": 78105, "loss": 0.2747, "lr": 3.0544503788976054e-06, "epoch": 2.4297420139555728, "percentage": 48.59, "elapsed_time": "1:39:37", "remaining_time": "1:45:23", "throughput": 19982.28, "total_tokens": 119448320}
|
|
{"current_steps": 37960, "total_steps": 78105, "loss": 0.2972, "lr": 3.0539056250005617e-06, "epoch": 2.4300620958965498, "percentage": 48.6, "elapsed_time": "1:39:38", "remaining_time": "1:45:22", "throughput": 19982.64, "total_tokens": 119463872}
|
|
{"current_steps": 37965, "total_steps": 78105, "loss": 0.1852, "lr": 3.0533608434443562e-06, "epoch": 2.4303821778375263, "percentage": 48.61, "elapsed_time": "1:39:39", "remaining_time": "1:45:21", "throughput": 19983.05, "total_tokens": 119480064}
|
|
{"current_steps": 37970, "total_steps": 78105, "loss": 0.1684, "lr": 3.0528160342561925e-06, "epoch": 2.4307022597785033, "percentage": 48.61, "elapsed_time": "1:39:39", "remaining_time": "1:45:20", "throughput": 19983.36, "total_tokens": 119495168}
|
|
{"current_steps": 37975, "total_steps": 78105, "loss": 0.2119, "lr": 3.0522711974632758e-06, "epoch": 2.4310223417194803, "percentage": 48.62, "elapsed_time": "1:39:40", "remaining_time": "1:45:19", "throughput": 19983.71, "total_tokens": 119510720}
|
|
{"current_steps": 37980, "total_steps": 78105, "loss": 0.2815, "lr": 3.0517263330928116e-06, "epoch": 2.431342423660457, "percentage": 48.63, "elapsed_time": "1:39:41", "remaining_time": "1:45:18", "throughput": 19984.08, "total_tokens": 119526336}
|
|
{"current_steps": 37985, "total_steps": 78105, "loss": 0.2965, "lr": 3.0511814411720083e-06, "epoch": 2.431662505601434, "percentage": 48.63, "elapsed_time": "1:39:41", "remaining_time": "1:45:17", "throughput": 19984.44, "total_tokens": 119541952}
|
|
{"current_steps": 37990, "total_steps": 78105, "loss": 0.2018, "lr": 3.0506365217280742e-06, "epoch": 2.431982587542411, "percentage": 48.64, "elapsed_time": "1:39:42", "remaining_time": "1:45:17", "throughput": 19984.77, "total_tokens": 119557248}
|
|
{"current_steps": 37995, "total_steps": 78105, "loss": 0.2062, "lr": 3.0500915747882213e-06, "epoch": 2.432302669483388, "percentage": 48.65, "elapsed_time": "1:39:43", "remaining_time": "1:45:16", "throughput": 19985.02, "total_tokens": 119571456}
|
|
{"current_steps": 38000, "total_steps": 78105, "loss": 0.1656, "lr": 3.0495466003796603e-06, "epoch": 2.432622751424365, "percentage": 48.65, "elapsed_time": "1:39:43", "remaining_time": "1:45:15", "throughput": 19985.38, "total_tokens": 119587456}
|
|
{"current_steps": 38005, "total_steps": 78105, "loss": 0.354, "lr": 3.0490015985296053e-06, "epoch": 2.4329428333653413, "percentage": 48.66, "elapsed_time": "1:39:44", "remaining_time": "1:45:14", "throughput": 19985.74, "total_tokens": 119603264}
|
|
{"current_steps": 38010, "total_steps": 78105, "loss": 0.1856, "lr": 3.048456569265269e-06, "epoch": 2.4332629153063183, "percentage": 48.67, "elapsed_time": "1:39:45", "remaining_time": "1:45:13", "throughput": 19986.04, "total_tokens": 119618048}
|
|
{"current_steps": 38015, "total_steps": 78105, "loss": 0.3026, "lr": 3.047911512613869e-06, "epoch": 2.4335829972472953, "percentage": 48.67, "elapsed_time": "1:39:45", "remaining_time": "1:45:12", "throughput": 19986.44, "total_tokens": 119634432}
|
|
{"current_steps": 38020, "total_steps": 78105, "loss": 0.2709, "lr": 3.0473664286026216e-06, "epoch": 2.4339030791882723, "percentage": 48.68, "elapsed_time": "1:39:46", "remaining_time": "1:45:11", "throughput": 19986.79, "total_tokens": 119649984}
|
|
{"current_steps": 38025, "total_steps": 78105, "loss": 0.233, "lr": 3.0468213172587465e-06, "epoch": 2.434223161129249, "percentage": 48.68, "elapsed_time": "1:39:47", "remaining_time": "1:45:10", "throughput": 19987.17, "total_tokens": 119666176}
|
|
{"current_steps": 38030, "total_steps": 78105, "loss": 0.2561, "lr": 3.0462761786094624e-06, "epoch": 2.434543243070226, "percentage": 48.69, "elapsed_time": "1:39:47", "remaining_time": "1:45:09", "throughput": 19987.43, "total_tokens": 119680384}
|
|
{"current_steps": 38035, "total_steps": 78105, "loss": 0.2369, "lr": 3.0457310126819917e-06, "epoch": 2.434863325011203, "percentage": 48.7, "elapsed_time": "1:39:48", "remaining_time": "1:45:08", "throughput": 19987.77, "total_tokens": 119696000}
|
|
{"current_steps": 38040, "total_steps": 78105, "loss": 0.4032, "lr": 3.0451858195035564e-06, "epoch": 2.43518340695218, "percentage": 48.7, "elapsed_time": "1:39:49", "remaining_time": "1:45:07", "throughput": 19988.18, "total_tokens": 119712192}
|
|
{"current_steps": 38045, "total_steps": 78105, "loss": 0.1391, "lr": 3.044640599101382e-06, "epoch": 2.435503488893157, "percentage": 48.71, "elapsed_time": "1:39:49", "remaining_time": "1:45:07", "throughput": 19988.7, "total_tokens": 119730368}
|
|
{"current_steps": 38050, "total_steps": 78105, "loss": 0.224, "lr": 3.0440953515026916e-06, "epoch": 2.4358235708341334, "percentage": 48.72, "elapsed_time": "1:39:50", "remaining_time": "1:45:06", "throughput": 19989.13, "total_tokens": 119747136}
|
|
{"current_steps": 38055, "total_steps": 78105, "loss": 0.2364, "lr": 3.043550076734715e-06, "epoch": 2.4361436527751104, "percentage": 48.72, "elapsed_time": "1:39:51", "remaining_time": "1:45:05", "throughput": 19989.53, "total_tokens": 119763392}
|
|
{"current_steps": 38060, "total_steps": 78105, "loss": 0.2292, "lr": 3.043004774824678e-06, "epoch": 2.4364637347160873, "percentage": 48.73, "elapsed_time": "1:39:51", "remaining_time": "1:45:04", "throughput": 19989.87, "total_tokens": 119778944}
|
|
{"current_steps": 38065, "total_steps": 78105, "loss": 0.159, "lr": 3.0424594457998115e-06, "epoch": 2.4367838166570643, "percentage": 48.74, "elapsed_time": "1:39:52", "remaining_time": "1:45:03", "throughput": 19990.27, "total_tokens": 119795520}
|
|
{"current_steps": 38070, "total_steps": 78105, "loss": 0.1652, "lr": 3.0419140896873446e-06, "epoch": 2.437103898598041, "percentage": 48.74, "elapsed_time": "1:39:53", "remaining_time": "1:45:02", "throughput": 19990.59, "total_tokens": 119810688}
|
|
{"current_steps": 38075, "total_steps": 78105, "loss": 0.3335, "lr": 3.041368706514512e-06, "epoch": 2.437423980539018, "percentage": 48.75, "elapsed_time": "1:39:53", "remaining_time": "1:45:01", "throughput": 19990.88, "total_tokens": 119825280}
|
|
{"current_steps": 38080, "total_steps": 78105, "loss": 0.3407, "lr": 3.040823296308546e-06, "epoch": 2.437744062479995, "percentage": 48.75, "elapsed_time": "1:39:54", "remaining_time": "1:45:00", "throughput": 19991.2, "total_tokens": 119840384}
|
|
{"current_steps": 38085, "total_steps": 78105, "loss": 0.2037, "lr": 3.0402778590966814e-06, "epoch": 2.438064144420972, "percentage": 48.76, "elapsed_time": "1:39:55", "remaining_time": "1:44:59", "throughput": 19991.5, "total_tokens": 119855168}
|
|
{"current_steps": 38090, "total_steps": 78105, "loss": 0.2166, "lr": 3.039732394906155e-06, "epoch": 2.438384226361949, "percentage": 48.77, "elapsed_time": "1:39:55", "remaining_time": "1:44:58", "throughput": 19991.84, "total_tokens": 119870464}
|
|
{"current_steps": 38095, "total_steps": 78105, "loss": 0.1906, "lr": 3.039186903764203e-06, "epoch": 2.4387043083029254, "percentage": 48.77, "elapsed_time": "1:39:56", "remaining_time": "1:44:58", "throughput": 19992.15, "total_tokens": 119885696}
|
|
{"current_steps": 38100, "total_steps": 78105, "loss": 0.2295, "lr": 3.0386413856980666e-06, "epoch": 2.4390243902439024, "percentage": 48.78, "elapsed_time": "1:39:57", "remaining_time": "1:44:57", "throughput": 19992.53, "total_tokens": 119901312}
|
|
{"current_steps": 38105, "total_steps": 78105, "loss": 0.249, "lr": 3.038095840734985e-06, "epoch": 2.4393444721848794, "percentage": 48.79, "elapsed_time": "1:39:57", "remaining_time": "1:44:56", "throughput": 19992.89, "total_tokens": 119916992}
|
|
{"current_steps": 38110, "total_steps": 78105, "loss": 0.2908, "lr": 3.037550268902199e-06, "epoch": 2.4396645541258564, "percentage": 48.79, "elapsed_time": "1:39:58", "remaining_time": "1:44:55", "throughput": 19993.28, "total_tokens": 119932928}
|
|
{"current_steps": 38115, "total_steps": 78105, "loss": 0.2406, "lr": 3.037004670226954e-06, "epoch": 2.439984636066833, "percentage": 48.8, "elapsed_time": "1:39:59", "remaining_time": "1:44:54", "throughput": 19993.67, "total_tokens": 119948992}
|
|
{"current_steps": 38120, "total_steps": 78105, "loss": 0.2393, "lr": 3.0364590447364923e-06, "epoch": 2.44030471800781, "percentage": 48.81, "elapsed_time": "1:39:59", "remaining_time": "1:44:53", "throughput": 19993.96, "total_tokens": 119963712}
|
|
{"current_steps": 38125, "total_steps": 78105, "loss": 0.1609, "lr": 3.03591339245806e-06, "epoch": 2.440624799948787, "percentage": 48.81, "elapsed_time": "1:40:00", "remaining_time": "1:44:52", "throughput": 19994.27, "total_tokens": 119978752}
|
|
{"current_steps": 38130, "total_steps": 78105, "loss": 0.2331, "lr": 3.0353677134189042e-06, "epoch": 2.440944881889764, "percentage": 48.82, "elapsed_time": "1:40:01", "remaining_time": "1:44:51", "throughput": 19994.62, "total_tokens": 119994432}
|
|
{"current_steps": 38135, "total_steps": 78105, "loss": 0.2935, "lr": 3.0348220076462743e-06, "epoch": 2.441264963830741, "percentage": 48.83, "elapsed_time": "1:40:02", "remaining_time": "1:44:50", "throughput": 19995.03, "total_tokens": 120010816}
|
|
{"current_steps": 38140, "total_steps": 78105, "loss": 0.1801, "lr": 3.034276275167419e-06, "epoch": 2.4415850457717174, "percentage": 48.83, "elapsed_time": "1:40:02", "remaining_time": "1:44:49", "throughput": 19995.39, "total_tokens": 120026368}
|
|
{"current_steps": 38145, "total_steps": 78105, "loss": 0.1317, "lr": 3.033730516009589e-06, "epoch": 2.4419051277126944, "percentage": 48.84, "elapsed_time": "1:40:03", "remaining_time": "1:44:49", "throughput": 19995.73, "total_tokens": 120041920}
|
|
{"current_steps": 38150, "total_steps": 78105, "loss": 0.2711, "lr": 3.0331847302000373e-06, "epoch": 2.4422252096536714, "percentage": 48.84, "elapsed_time": "1:40:04", "remaining_time": "1:44:48", "throughput": 19996.15, "total_tokens": 120057984}
|
|
{"current_steps": 38155, "total_steps": 78105, "loss": 0.2212, "lr": 3.0326389177660164e-06, "epoch": 2.4425452915946484, "percentage": 48.85, "elapsed_time": "1:40:04", "remaining_time": "1:44:47", "throughput": 19996.59, "total_tokens": 120075456}
|
|
{"current_steps": 38160, "total_steps": 78105, "loss": 0.2365, "lr": 3.0320930787347835e-06, "epoch": 2.442865373535625, "percentage": 48.86, "elapsed_time": "1:40:05", "remaining_time": "1:44:46", "throughput": 19996.95, "total_tokens": 120091328}
|
|
{"current_steps": 38165, "total_steps": 78105, "loss": 0.2407, "lr": 3.0315472131335934e-06, "epoch": 2.443185455476602, "percentage": 48.86, "elapsed_time": "1:40:06", "remaining_time": "1:44:45", "throughput": 19997.35, "total_tokens": 120107392}
|
|
{"current_steps": 38170, "total_steps": 78105, "loss": 0.2258, "lr": 3.031001320989705e-06, "epoch": 2.443505537417579, "percentage": 48.87, "elapsed_time": "1:40:06", "remaining_time": "1:44:44", "throughput": 19997.79, "total_tokens": 120124096}
|
|
{"current_steps": 38175, "total_steps": 78105, "loss": 0.2009, "lr": 3.0304554023303757e-06, "epoch": 2.443825619358556, "percentage": 48.88, "elapsed_time": "1:40:07", "remaining_time": "1:44:43", "throughput": 19998.19, "total_tokens": 120140224}
|
|
{"current_steps": 38180, "total_steps": 78105, "loss": 0.2672, "lr": 3.0299094571828667e-06, "epoch": 2.444145701299533, "percentage": 48.88, "elapsed_time": "1:40:08", "remaining_time": "1:44:42", "throughput": 19998.66, "total_tokens": 120157504}
|
|
{"current_steps": 38185, "total_steps": 78105, "loss": 0.2614, "lr": 3.0293634855744397e-06, "epoch": 2.4444657832405094, "percentage": 48.89, "elapsed_time": "1:40:08", "remaining_time": "1:44:41", "throughput": 19999.03, "total_tokens": 120173184}
|
|
{"current_steps": 38190, "total_steps": 78105, "loss": 0.2464, "lr": 3.028817487532358e-06, "epoch": 2.4447858651814864, "percentage": 48.9, "elapsed_time": "1:40:09", "remaining_time": "1:44:41", "throughput": 19999.33, "total_tokens": 120187776}
|
|
{"current_steps": 38195, "total_steps": 78105, "loss": 0.3033, "lr": 3.028271463083885e-06, "epoch": 2.4451059471224634, "percentage": 48.9, "elapsed_time": "1:40:10", "remaining_time": "1:44:40", "throughput": 19999.64, "total_tokens": 120202368}
|
|
{"current_steps": 38200, "total_steps": 78105, "loss": 0.223, "lr": 3.0277254122562873e-06, "epoch": 2.4454260290634404, "percentage": 48.91, "elapsed_time": "1:40:10", "remaining_time": "1:44:39", "throughput": 20000.02, "total_tokens": 120218624}
|
|
{"current_steps": 38205, "total_steps": 78105, "loss": 0.2247, "lr": 3.0271793350768307e-06, "epoch": 2.445746111004417, "percentage": 48.91, "elapsed_time": "1:40:11", "remaining_time": "1:44:38", "throughput": 20000.41, "total_tokens": 120234432}
|
|
{"current_steps": 38210, "total_steps": 78105, "loss": 0.2486, "lr": 3.0266332315727843e-06, "epoch": 2.446066192945394, "percentage": 48.92, "elapsed_time": "1:40:12", "remaining_time": "1:44:37", "throughput": 20000.75, "total_tokens": 120249984}
|
|
{"current_steps": 38215, "total_steps": 78105, "loss": 0.1844, "lr": 3.0260871017714177e-06, "epoch": 2.446386274886371, "percentage": 48.93, "elapsed_time": "1:40:12", "remaining_time": "1:44:36", "throughput": 20001.04, "total_tokens": 120264448}
|
|
{"current_steps": 38220, "total_steps": 78105, "loss": 0.3329, "lr": 3.0255409457000014e-06, "epoch": 2.446706356827348, "percentage": 48.93, "elapsed_time": "1:40:13", "remaining_time": "1:44:35", "throughput": 20001.38, "total_tokens": 120279872}
|
|
{"current_steps": 38225, "total_steps": 78105, "loss": 0.1865, "lr": 3.0249947633858074e-06, "epoch": 2.447026438768325, "percentage": 48.94, "elapsed_time": "1:40:14", "remaining_time": "1:44:34", "throughput": 20001.71, "total_tokens": 120294976}
|
|
{"current_steps": 38230, "total_steps": 78105, "loss": 0.1945, "lr": 3.0244485548561097e-06, "epoch": 2.4473465207093015, "percentage": 48.95, "elapsed_time": "1:40:14", "remaining_time": "1:44:33", "throughput": 20002.09, "total_tokens": 120311040}
|
|
{"current_steps": 38235, "total_steps": 78105, "loss": 0.3044, "lr": 3.0239023201381836e-06, "epoch": 2.4476666026502785, "percentage": 48.95, "elapsed_time": "1:40:15", "remaining_time": "1:44:32", "throughput": 20002.44, "total_tokens": 120327040}
|
|
{"current_steps": 38240, "total_steps": 78105, "loss": 0.1835, "lr": 3.0233560592593036e-06, "epoch": 2.4479866845912555, "percentage": 48.96, "elapsed_time": "1:40:16", "remaining_time": "1:44:31", "throughput": 20002.77, "total_tokens": 120342208}
|
|
{"current_steps": 38245, "total_steps": 78105, "loss": 0.2948, "lr": 3.0228097722467483e-06, "epoch": 2.448306766532232, "percentage": 48.97, "elapsed_time": "1:40:16", "remaining_time": "1:44:31", "throughput": 20003.13, "total_tokens": 120357568}
|
|
{"current_steps": 38250, "total_steps": 78105, "loss": 0.1664, "lr": 3.022263459127796e-06, "epoch": 2.448626848473209, "percentage": 48.97, "elapsed_time": "1:40:17", "remaining_time": "1:44:30", "throughput": 20003.42, "total_tokens": 120372032}
|
|
{"current_steps": 38255, "total_steps": 78105, "loss": 0.1201, "lr": 3.0217171199297273e-06, "epoch": 2.448946930414186, "percentage": 48.98, "elapsed_time": "1:40:18", "remaining_time": "1:44:29", "throughput": 20003.75, "total_tokens": 120387520}
|
|
{"current_steps": 38260, "total_steps": 78105, "loss": 0.2732, "lr": 3.0211707546798235e-06, "epoch": 2.449267012355163, "percentage": 48.99, "elapsed_time": "1:40:18", "remaining_time": "1:44:28", "throughput": 20004.05, "total_tokens": 120402176}
|
|
{"current_steps": 38265, "total_steps": 78105, "loss": 0.1979, "lr": 3.0206243634053666e-06, "epoch": 2.44958709429614, "percentage": 48.99, "elapsed_time": "1:40:19", "remaining_time": "1:44:27", "throughput": 20004.42, "total_tokens": 120418176}
|
|
{"current_steps": 38270, "total_steps": 78105, "loss": 0.2882, "lr": 3.0200779461336397e-06, "epoch": 2.4499071762371165, "percentage": 49.0, "elapsed_time": "1:40:20", "remaining_time": "1:44:26", "throughput": 20004.9, "total_tokens": 120435328}
|
|
{"current_steps": 38275, "total_steps": 78105, "loss": 0.3605, "lr": 3.019531502891931e-06, "epoch": 2.4502272581780935, "percentage": 49.0, "elapsed_time": "1:40:20", "remaining_time": "1:44:25", "throughput": 20005.25, "total_tokens": 120450752}
|
|
{"current_steps": 38280, "total_steps": 78105, "loss": 0.1623, "lr": 3.0189850337075245e-06, "epoch": 2.4505473401190705, "percentage": 49.01, "elapsed_time": "1:40:21", "remaining_time": "1:44:24", "throughput": 20005.66, "total_tokens": 120467072}
|
|
{"current_steps": 38285, "total_steps": 78105, "loss": 0.2415, "lr": 3.0184385386077076e-06, "epoch": 2.4508674220600475, "percentage": 49.02, "elapsed_time": "1:40:22", "remaining_time": "1:44:23", "throughput": 20006.01, "total_tokens": 120482496}
|
|
{"current_steps": 38290, "total_steps": 78105, "loss": 0.2557, "lr": 3.017892017619772e-06, "epoch": 2.451187504001024, "percentage": 49.02, "elapsed_time": "1:40:22", "remaining_time": "1:44:22", "throughput": 20006.39, "total_tokens": 120498368}
|
|
{"current_steps": 38295, "total_steps": 78105, "loss": 0.2319, "lr": 3.017345470771006e-06, "epoch": 2.451507585942001, "percentage": 49.03, "elapsed_time": "1:40:23", "remaining_time": "1:44:21", "throughput": 20006.77, "total_tokens": 120514432}
|
|
{"current_steps": 38300, "total_steps": 78105, "loss": 0.225, "lr": 3.016798898088702e-06, "epoch": 2.451827667882978, "percentage": 49.04, "elapsed_time": "1:40:24", "remaining_time": "1:44:21", "throughput": 20007.06, "total_tokens": 120528960}
|
|
{"current_steps": 38305, "total_steps": 78105, "loss": 0.2271, "lr": 3.016252299600153e-06, "epoch": 2.452147749823955, "percentage": 49.04, "elapsed_time": "1:40:24", "remaining_time": "1:44:20", "throughput": 20007.45, "total_tokens": 120544896}
|
|
{"current_steps": 38310, "total_steps": 78105, "loss": 0.3316, "lr": 3.015705675332653e-06, "epoch": 2.452467831764932, "percentage": 49.05, "elapsed_time": "1:40:25", "remaining_time": "1:44:19", "throughput": 20007.78, "total_tokens": 120559936}
|
|
{"current_steps": 38315, "total_steps": 78105, "loss": 0.2252, "lr": 3.015159025313498e-06, "epoch": 2.4527879137059085, "percentage": 49.06, "elapsed_time": "1:40:26", "remaining_time": "1:44:18", "throughput": 20008.08, "total_tokens": 120574528}
|
|
{"current_steps": 38320, "total_steps": 78105, "loss": 0.2107, "lr": 3.014612349569985e-06, "epoch": 2.4531079956468855, "percentage": 49.06, "elapsed_time": "1:40:26", "remaining_time": "1:44:17", "throughput": 20008.37, "total_tokens": 120589056}
|
|
{"current_steps": 38325, "total_steps": 78105, "loss": 0.157, "lr": 3.0140656481294114e-06, "epoch": 2.4534280775878625, "percentage": 49.07, "elapsed_time": "1:40:27", "remaining_time": "1:44:16", "throughput": 20008.79, "total_tokens": 120605760}
|
|
{"current_steps": 38330, "total_steps": 78105, "loss": 0.2334, "lr": 3.013518921019077e-06, "epoch": 2.4537481595288395, "percentage": 49.07, "elapsed_time": "1:40:28", "remaining_time": "1:44:15", "throughput": 20009.13, "total_tokens": 120621184}
|
|
{"current_steps": 38335, "total_steps": 78105, "loss": 0.1959, "lr": 3.0129721682662826e-06, "epoch": 2.454068241469816, "percentage": 49.08, "elapsed_time": "1:40:28", "remaining_time": "1:44:14", "throughput": 20009.5, "total_tokens": 120636992}
|
|
{"current_steps": 38340, "total_steps": 78105, "loss": 0.1609, "lr": 3.0124253898983295e-06, "epoch": 2.454388323410793, "percentage": 49.09, "elapsed_time": "1:40:29", "remaining_time": "1:44:13", "throughput": 20009.83, "total_tokens": 120652224}
|
|
{"current_steps": 38345, "total_steps": 78105, "loss": 0.2677, "lr": 3.011878585942522e-06, "epoch": 2.45470840535177, "percentage": 49.09, "elapsed_time": "1:40:30", "remaining_time": "1:44:12", "throughput": 20010.15, "total_tokens": 120667328}
|
|
{"current_steps": 38350, "total_steps": 78105, "loss": 0.1615, "lr": 3.0113317564261646e-06, "epoch": 2.455028487292747, "percentage": 49.1, "elapsed_time": "1:40:30", "remaining_time": "1:44:11", "throughput": 20010.45, "total_tokens": 120682112}
|
|
{"current_steps": 38355, "total_steps": 78105, "loss": 0.2636, "lr": 3.0107849013765623e-06, "epoch": 2.455348569233724, "percentage": 49.11, "elapsed_time": "1:40:31", "remaining_time": "1:44:10", "throughput": 20010.81, "total_tokens": 120697728}
|
|
{"current_steps": 38360, "total_steps": 78105, "loss": 0.245, "lr": 3.0102380208210225e-06, "epoch": 2.4556686511747006, "percentage": 49.11, "elapsed_time": "1:40:32", "remaining_time": "1:44:10", "throughput": 20011.12, "total_tokens": 120712576}
|
|
{"current_steps": 38365, "total_steps": 78105, "loss": 0.1978, "lr": 3.009691114786854e-06, "epoch": 2.4559887331156776, "percentage": 49.12, "elapsed_time": "1:40:32", "remaining_time": "1:44:09", "throughput": 20011.48, "total_tokens": 120728384}
|
|
{"current_steps": 38370, "total_steps": 78105, "loss": 0.1854, "lr": 3.0091441833013663e-06, "epoch": 2.4563088150566545, "percentage": 49.13, "elapsed_time": "1:40:33", "remaining_time": "1:44:08", "throughput": 20012.02, "total_tokens": 120746880}
|
|
{"current_steps": 38375, "total_steps": 78105, "loss": 0.2427, "lr": 3.0085972263918694e-06, "epoch": 2.4566288969976315, "percentage": 49.13, "elapsed_time": "1:40:34", "remaining_time": "1:44:07", "throughput": 20012.4, "total_tokens": 120763072}
|
|
{"current_steps": 38380, "total_steps": 78105, "loss": 0.256, "lr": 3.008050244085677e-06, "epoch": 2.456948978938608, "percentage": 49.14, "elapsed_time": "1:40:35", "remaining_time": "1:44:06", "throughput": 20012.72, "total_tokens": 120778368}
|
|
{"current_steps": 38385, "total_steps": 78105, "loss": 0.2559, "lr": 3.007503236410101e-06, "epoch": 2.457269060879585, "percentage": 49.15, "elapsed_time": "1:40:35", "remaining_time": "1:44:05", "throughput": 20013.08, "total_tokens": 120794048}
|
|
{"current_steps": 38390, "total_steps": 78105, "loss": 0.2848, "lr": 3.0069562033924573e-06, "epoch": 2.457589142820562, "percentage": 49.15, "elapsed_time": "1:40:36", "remaining_time": "1:44:04", "throughput": 20013.4, "total_tokens": 120809088}
|
|
{"current_steps": 38395, "total_steps": 78105, "loss": 0.2871, "lr": 3.006409145060061e-06, "epoch": 2.457909224761539, "percentage": 49.16, "elapsed_time": "1:40:37", "remaining_time": "1:44:03", "throughput": 20013.82, "total_tokens": 120826048}
|
|
{"current_steps": 38400, "total_steps": 78105, "loss": 0.1771, "lr": 3.00586206144023e-06, "epoch": 2.458229306702516, "percentage": 49.16, "elapsed_time": "1:40:37", "remaining_time": "1:44:02", "throughput": 20014.17, "total_tokens": 120841344}
|
|
{"current_steps": 38405, "total_steps": 78105, "loss": 0.2119, "lr": 3.0053149525602834e-06, "epoch": 2.4585493886434926, "percentage": 49.17, "elapsed_time": "1:40:38", "remaining_time": "1:44:02", "throughput": 20013.41, "total_tokens": 120857856}
|
|
{"current_steps": 38410, "total_steps": 78105, "loss": 0.2947, "lr": 3.004767818447539e-06, "epoch": 2.4588694705844696, "percentage": 49.18, "elapsed_time": "1:40:39", "remaining_time": "1:44:01", "throughput": 20013.75, "total_tokens": 120873408}
|
|
{"current_steps": 38415, "total_steps": 78105, "loss": 0.1922, "lr": 3.0042206591293195e-06, "epoch": 2.4591895525254466, "percentage": 49.18, "elapsed_time": "1:40:40", "remaining_time": "1:44:00", "throughput": 20014.06, "total_tokens": 120888512}
|
|
{"current_steps": 38420, "total_steps": 78105, "loss": 0.2306, "lr": 3.0036734746329465e-06, "epoch": 2.4595096344664236, "percentage": 49.19, "elapsed_time": "1:40:40", "remaining_time": "1:43:59", "throughput": 20014.4, "total_tokens": 120904064}
|
|
{"current_steps": 38425, "total_steps": 78105, "loss": 0.1751, "lr": 3.003126264985744e-06, "epoch": 2.4598297164074, "percentage": 49.2, "elapsed_time": "1:40:41", "remaining_time": "1:43:58", "throughput": 20014.72, "total_tokens": 120919296}
|
|
{"current_steps": 38430, "total_steps": 78105, "loss": 0.2463, "lr": 3.0025790302150365e-06, "epoch": 2.460149798348377, "percentage": 49.2, "elapsed_time": "1:40:42", "remaining_time": "1:43:57", "throughput": 20015.11, "total_tokens": 120935296}
|
|
{"current_steps": 38435, "total_steps": 78105, "loss": 0.338, "lr": 3.00203177034815e-06, "epoch": 2.460469880289354, "percentage": 49.21, "elapsed_time": "1:40:42", "remaining_time": "1:43:57", "throughput": 20015.54, "total_tokens": 120952000}
|
|
{"current_steps": 38440, "total_steps": 78105, "loss": 0.231, "lr": 3.0014844854124114e-06, "epoch": 2.460789962230331, "percentage": 49.22, "elapsed_time": "1:40:43", "remaining_time": "1:43:56", "throughput": 20015.85, "total_tokens": 120967104}
|
|
{"current_steps": 38445, "total_steps": 78105, "loss": 0.3679, "lr": 3.0009371754351503e-06, "epoch": 2.461110044171308, "percentage": 49.22, "elapsed_time": "1:40:44", "remaining_time": "1:43:55", "throughput": 20016.2, "total_tokens": 120982848}
|
|
{"current_steps": 38450, "total_steps": 78105, "loss": 0.1984, "lr": 3.000389840443696e-06, "epoch": 2.4614301261122846, "percentage": 49.23, "elapsed_time": "1:40:44", "remaining_time": "1:43:54", "throughput": 20016.63, "total_tokens": 120999424}
|
|
{"current_steps": 38455, "total_steps": 78105, "loss": 0.239, "lr": 2.9998424804653794e-06, "epoch": 2.4617502080532616, "percentage": 49.24, "elapsed_time": "1:40:45", "remaining_time": "1:43:53", "throughput": 20016.98, "total_tokens": 121015360}
|
|
{"current_steps": 38460, "total_steps": 78105, "loss": 0.2238, "lr": 2.9992950955275325e-06, "epoch": 2.4620702899942386, "percentage": 49.24, "elapsed_time": "1:40:46", "remaining_time": "1:43:52", "throughput": 20017.41, "total_tokens": 121032320}
|
|
{"current_steps": 38465, "total_steps": 78105, "loss": 0.1826, "lr": 2.9987476856574897e-06, "epoch": 2.4623903719352156, "percentage": 49.25, "elapsed_time": "1:40:47", "remaining_time": "1:43:51", "throughput": 20017.9, "total_tokens": 121049856}
|
|
{"current_steps": 38470, "total_steps": 78105, "loss": 0.1803, "lr": 2.9982002508825853e-06, "epoch": 2.462710453876192, "percentage": 49.25, "elapsed_time": "1:40:47", "remaining_time": "1:43:50", "throughput": 20018.25, "total_tokens": 121065856}
|
|
{"current_steps": 38475, "total_steps": 78105, "loss": 0.1846, "lr": 2.997652791230155e-06, "epoch": 2.463030535817169, "percentage": 49.26, "elapsed_time": "1:40:48", "remaining_time": "1:43:50", "throughput": 20018.62, "total_tokens": 121081664}
|
|
{"current_steps": 38480, "total_steps": 78105, "loss": 0.3221, "lr": 2.997105306727537e-06, "epoch": 2.463350617758146, "percentage": 49.27, "elapsed_time": "1:40:49", "remaining_time": "1:43:49", "throughput": 20019.0, "total_tokens": 121097856}
|
|
{"current_steps": 38485, "total_steps": 78105, "loss": 0.1719, "lr": 2.996557797402069e-06, "epoch": 2.463670699699123, "percentage": 49.27, "elapsed_time": "1:40:49", "remaining_time": "1:43:48", "throughput": 20019.38, "total_tokens": 121113536}
|
|
{"current_steps": 38490, "total_steps": 78105, "loss": 0.2739, "lr": 2.9960102632810915e-06, "epoch": 2.4639907816401, "percentage": 49.28, "elapsed_time": "1:40:50", "remaining_time": "1:43:47", "throughput": 20019.74, "total_tokens": 121129152}
|
|
{"current_steps": 38495, "total_steps": 78105, "loss": 0.182, "lr": 2.9954627043919448e-06, "epoch": 2.4643108635810766, "percentage": 49.29, "elapsed_time": "1:40:51", "remaining_time": "1:43:46", "throughput": 20020.09, "total_tokens": 121144704}
|
|
{"current_steps": 38500, "total_steps": 78105, "loss": 0.2309, "lr": 2.994915120761971e-06, "epoch": 2.4646309455220536, "percentage": 49.29, "elapsed_time": "1:40:51", "remaining_time": "1:43:45", "throughput": 20020.45, "total_tokens": 121160704}
|
|
{"current_steps": 38505, "total_steps": 78105, "loss": 0.2265, "lr": 2.9943675124185147e-06, "epoch": 2.4649510274630306, "percentage": 49.3, "elapsed_time": "1:40:52", "remaining_time": "1:43:44", "throughput": 20020.78, "total_tokens": 121175808}
|
|
{"current_steps": 38510, "total_steps": 78105, "loss": 0.2576, "lr": 2.9938198793889197e-06, "epoch": 2.465271109404007, "percentage": 49.31, "elapsed_time": "1:40:53", "remaining_time": "1:43:43", "throughput": 20021.14, "total_tokens": 121191616}
|
|
{"current_steps": 38515, "total_steps": 78105, "loss": 0.2165, "lr": 2.9932722217005315e-06, "epoch": 2.465591191344984, "percentage": 49.31, "elapsed_time": "1:40:53", "remaining_time": "1:43:42", "throughput": 20021.49, "total_tokens": 121207168}
|
|
{"current_steps": 38520, "total_steps": 78105, "loss": 0.1199, "lr": 2.9927245393806985e-06, "epoch": 2.465911273285961, "percentage": 49.32, "elapsed_time": "1:40:54", "remaining_time": "1:43:41", "throughput": 20021.82, "total_tokens": 121222656}
|
|
{"current_steps": 38525, "total_steps": 78105, "loss": 0.2052, "lr": 2.9921768324567686e-06, "epoch": 2.466231355226938, "percentage": 49.32, "elapsed_time": "1:40:55", "remaining_time": "1:43:41", "throughput": 20022.34, "total_tokens": 121240832}
|
|
{"current_steps": 38530, "total_steps": 78105, "loss": 0.3025, "lr": 2.991629100956091e-06, "epoch": 2.466551437167915, "percentage": 49.33, "elapsed_time": "1:40:55", "remaining_time": "1:43:40", "throughput": 20022.66, "total_tokens": 121256320}
|
|
{"current_steps": 38535, "total_steps": 78105, "loss": 0.1865, "lr": 2.991081344906017e-06, "epoch": 2.4668715191088917, "percentage": 49.34, "elapsed_time": "1:40:56", "remaining_time": "1:43:39", "throughput": 20022.98, "total_tokens": 121271616}
|
|
{"current_steps": 38540, "total_steps": 78105, "loss": 0.2554, "lr": 2.990533564333899e-06, "epoch": 2.4671916010498687, "percentage": 49.34, "elapsed_time": "1:40:57", "remaining_time": "1:43:38", "throughput": 20023.27, "total_tokens": 121286336}
|
|
{"current_steps": 38545, "total_steps": 78105, "loss": 0.1651, "lr": 2.9899857592670893e-06, "epoch": 2.4675116829908457, "percentage": 49.35, "elapsed_time": "1:40:57", "remaining_time": "1:43:37", "throughput": 20023.61, "total_tokens": 121301952}
|
|
{"current_steps": 38550, "total_steps": 78105, "loss": 0.3387, "lr": 2.9894379297329435e-06, "epoch": 2.4678317649318227, "percentage": 49.36, "elapsed_time": "1:40:58", "remaining_time": "1:43:36", "throughput": 20024.0, "total_tokens": 121318080}
|
|
{"current_steps": 38555, "total_steps": 78105, "loss": 0.1882, "lr": 2.988890075758816e-06, "epoch": 2.468151846872799, "percentage": 49.36, "elapsed_time": "1:40:59", "remaining_time": "1:43:35", "throughput": 20024.38, "total_tokens": 121334080}
|
|
{"current_steps": 38560, "total_steps": 78105, "loss": 0.242, "lr": 2.988342197372066e-06, "epoch": 2.468471928813776, "percentage": 49.37, "elapsed_time": "1:40:59", "remaining_time": "1:43:34", "throughput": 20024.71, "total_tokens": 121349248}
|
|
{"current_steps": 38565, "total_steps": 78105, "loss": 0.2495, "lr": 2.9877942946000494e-06, "epoch": 2.468792010754753, "percentage": 49.38, "elapsed_time": "1:41:00", "remaining_time": "1:43:33", "throughput": 20025.0, "total_tokens": 121363840}
|
|
{"current_steps": 38570, "total_steps": 78105, "loss": 0.2575, "lr": 2.9872463674701267e-06, "epoch": 2.46911209269573, "percentage": 49.38, "elapsed_time": "1:41:01", "remaining_time": "1:43:32", "throughput": 20025.3, "total_tokens": 121379136}
|
|
{"current_steps": 38575, "total_steps": 78105, "loss": 0.3895, "lr": 2.9866984160096586e-06, "epoch": 2.469432174636707, "percentage": 49.39, "elapsed_time": "1:41:02", "remaining_time": "1:43:32", "throughput": 20025.81, "total_tokens": 121397376}
|
|
{"current_steps": 38580, "total_steps": 78105, "loss": 0.1979, "lr": 2.986150440246007e-06, "epoch": 2.4697522565776837, "percentage": 49.4, "elapsed_time": "1:41:02", "remaining_time": "1:43:31", "throughput": 20026.21, "total_tokens": 121413632}
|
|
{"current_steps": 38585, "total_steps": 78105, "loss": 0.1206, "lr": 2.9856024402065344e-06, "epoch": 2.4700723385186607, "percentage": 49.4, "elapsed_time": "1:41:03", "remaining_time": "1:43:30", "throughput": 20026.52, "total_tokens": 121428544}
|
|
{"current_steps": 38590, "total_steps": 78105, "loss": 0.2129, "lr": 2.9850544159186046e-06, "epoch": 2.4703924204596377, "percentage": 49.41, "elapsed_time": "1:41:04", "remaining_time": "1:43:29", "throughput": 20026.78, "total_tokens": 121442752}
|
|
{"current_steps": 38595, "total_steps": 78105, "loss": 0.2264, "lr": 2.9845063674095844e-06, "epoch": 2.4707125024006147, "percentage": 49.41, "elapsed_time": "1:41:04", "remaining_time": "1:43:28", "throughput": 20027.14, "total_tokens": 121458368}
|
|
{"current_steps": 38600, "total_steps": 78105, "loss": 0.1578, "lr": 2.9839582947068404e-06, "epoch": 2.4710325843415912, "percentage": 49.42, "elapsed_time": "1:41:05", "remaining_time": "1:43:27", "throughput": 20027.53, "total_tokens": 121474432}
|
|
{"current_steps": 38605, "total_steps": 78105, "loss": 0.3306, "lr": 2.9834101978377394e-06, "epoch": 2.4713526662825682, "percentage": 49.43, "elapsed_time": "1:41:06", "remaining_time": "1:43:26", "throughput": 20027.88, "total_tokens": 121490240}
|
|
{"current_steps": 38610, "total_steps": 78105, "loss": 0.1765, "lr": 2.9828620768296518e-06, "epoch": 2.471672748223545, "percentage": 49.43, "elapsed_time": "1:41:06", "remaining_time": "1:43:25", "throughput": 20028.25, "total_tokens": 121506496}
|
|
{"current_steps": 38615, "total_steps": 78105, "loss": 0.1379, "lr": 2.982313931709946e-06, "epoch": 2.471992830164522, "percentage": 49.44, "elapsed_time": "1:41:07", "remaining_time": "1:43:24", "throughput": 20028.7, "total_tokens": 121523392}
|
|
{"current_steps": 38620, "total_steps": 78105, "loss": 0.2563, "lr": 2.981765762505996e-06, "epoch": 2.472312912105499, "percentage": 49.45, "elapsed_time": "1:41:08", "remaining_time": "1:43:24", "throughput": 20029.04, "total_tokens": 121538624}
|
|
{"current_steps": 38625, "total_steps": 78105, "loss": 0.2458, "lr": 2.981217569245173e-06, "epoch": 2.4726329940464757, "percentage": 49.45, "elapsed_time": "1:41:08", "remaining_time": "1:43:23", "throughput": 20029.37, "total_tokens": 121553728}
|
|
{"current_steps": 38630, "total_steps": 78105, "loss": 0.1847, "lr": 2.980669351954851e-06, "epoch": 2.4729530759874527, "percentage": 49.46, "elapsed_time": "1:41:09", "remaining_time": "1:43:22", "throughput": 20029.73, "total_tokens": 121569472}
|
|
{"current_steps": 38635, "total_steps": 78105, "loss": 0.1923, "lr": 2.980121110662404e-06, "epoch": 2.4732731579284297, "percentage": 49.47, "elapsed_time": "1:41:10", "remaining_time": "1:43:21", "throughput": 20030.16, "total_tokens": 121586048}
|
|
{"current_steps": 38640, "total_steps": 78105, "loss": 0.3104, "lr": 2.979572845395211e-06, "epoch": 2.4735932398694067, "percentage": 49.47, "elapsed_time": "1:41:10", "remaining_time": "1:43:20", "throughput": 20030.43, "total_tokens": 121600576}
|
|
{"current_steps": 38645, "total_steps": 78105, "loss": 0.1758, "lr": 2.9790245561806475e-06, "epoch": 2.4739133218103833, "percentage": 49.48, "elapsed_time": "1:41:11", "remaining_time": "1:43:19", "throughput": 20030.77, "total_tokens": 121616000}
|
|
{"current_steps": 38650, "total_steps": 78105, "loss": 0.3138, "lr": 2.9784762430460933e-06, "epoch": 2.4742334037513602, "percentage": 49.48, "elapsed_time": "1:41:12", "remaining_time": "1:43:18", "throughput": 20031.13, "total_tokens": 121631936}
|
|
{"current_steps": 38655, "total_steps": 78105, "loss": 0.2557, "lr": 2.977927906018928e-06, "epoch": 2.4745534856923372, "percentage": 49.49, "elapsed_time": "1:41:12", "remaining_time": "1:43:17", "throughput": 20031.49, "total_tokens": 121647808}
|
|
{"current_steps": 38660, "total_steps": 78105, "loss": 0.1177, "lr": 2.9773795451265318e-06, "epoch": 2.4748735676333142, "percentage": 49.5, "elapsed_time": "1:41:13", "remaining_time": "1:43:16", "throughput": 20031.82, "total_tokens": 121663104}
|
|
{"current_steps": 38665, "total_steps": 78105, "loss": 0.2509, "lr": 2.9768311603962886e-06, "epoch": 2.475193649574291, "percentage": 49.5, "elapsed_time": "1:41:14", "remaining_time": "1:43:15", "throughput": 20032.2, "total_tokens": 121679232}
|
|
{"current_steps": 38670, "total_steps": 78105, "loss": 0.2307, "lr": 2.97628275185558e-06, "epoch": 2.4755137315152678, "percentage": 49.51, "elapsed_time": "1:41:14", "remaining_time": "1:43:15", "throughput": 20032.51, "total_tokens": 121694208}
|
|
{"current_steps": 38675, "total_steps": 78105, "loss": 0.3046, "lr": 2.975734319531792e-06, "epoch": 2.4758338134562448, "percentage": 49.52, "elapsed_time": "1:41:15", "remaining_time": "1:43:14", "throughput": 20032.88, "total_tokens": 121710272}
|
|
{"current_steps": 38680, "total_steps": 78105, "loss": 0.1934, "lr": 2.9751858634523105e-06, "epoch": 2.4761538953972217, "percentage": 49.52, "elapsed_time": "1:41:16", "remaining_time": "1:43:13", "throughput": 20033.23, "total_tokens": 121726144}
|
|
{"current_steps": 38685, "total_steps": 78105, "loss": 0.2431, "lr": 2.974637383644522e-06, "epoch": 2.4764739773381987, "percentage": 49.53, "elapsed_time": "1:41:16", "remaining_time": "1:43:12", "throughput": 20033.6, "total_tokens": 121741888}
|
|
{"current_steps": 38690, "total_steps": 78105, "loss": 0.221, "lr": 2.974088880135814e-06, "epoch": 2.4767940592791753, "percentage": 49.54, "elapsed_time": "1:41:17", "remaining_time": "1:43:11", "throughput": 20033.95, "total_tokens": 121757312}
|
|
{"current_steps": 38695, "total_steps": 78105, "loss": 0.1129, "lr": 2.9735403529535777e-06, "epoch": 2.4771141412201523, "percentage": 49.54, "elapsed_time": "1:41:18", "remaining_time": "1:43:10", "throughput": 20034.25, "total_tokens": 121772224}
|
|
{"current_steps": 38700, "total_steps": 78105, "loss": 0.2844, "lr": 2.9729918021252023e-06, "epoch": 2.4774342231611293, "percentage": 49.55, "elapsed_time": "1:41:18", "remaining_time": "1:43:09", "throughput": 20034.65, "total_tokens": 121788672}
|
|
{"current_steps": 38705, "total_steps": 78105, "loss": 0.239, "lr": 2.9724432276780803e-06, "epoch": 2.4777543051021063, "percentage": 49.56, "elapsed_time": "1:41:19", "remaining_time": "1:43:08", "throughput": 20035.04, "total_tokens": 121804928}
|
|
{"current_steps": 38710, "total_steps": 78105, "loss": 0.3126, "lr": 2.9718946296396045e-06, "epoch": 2.4780743870430832, "percentage": 49.56, "elapsed_time": "1:41:20", "remaining_time": "1:43:07", "throughput": 20035.35, "total_tokens": 121819840}
|
|
{"current_steps": 38715, "total_steps": 78105, "loss": 0.3743, "lr": 2.9713460080371685e-06, "epoch": 2.47839446898406, "percentage": 49.57, "elapsed_time": "1:41:20", "remaining_time": "1:43:06", "throughput": 20035.64, "total_tokens": 121834624}
|
|
{"current_steps": 38720, "total_steps": 78105, "loss": 0.2391, "lr": 2.9707973628981683e-06, "epoch": 2.478714550925037, "percentage": 49.57, "elapsed_time": "1:41:21", "remaining_time": "1:43:06", "throughput": 20035.99, "total_tokens": 121850176}
|
|
{"current_steps": 38725, "total_steps": 78105, "loss": 0.2137, "lr": 2.9702486942500003e-06, "epoch": 2.4790346328660138, "percentage": 49.58, "elapsed_time": "1:41:22", "remaining_time": "1:43:05", "throughput": 20036.38, "total_tokens": 121865920}
|
|
{"current_steps": 38730, "total_steps": 78105, "loss": 0.218, "lr": 2.9697000021200613e-06, "epoch": 2.4793547148069908, "percentage": 49.59, "elapsed_time": "1:41:22", "remaining_time": "1:43:04", "throughput": 20036.77, "total_tokens": 121882112}
|
|
{"current_steps": 38735, "total_steps": 78105, "loss": 0.227, "lr": 2.9691512865357513e-06, "epoch": 2.4796747967479673, "percentage": 49.59, "elapsed_time": "1:41:23", "remaining_time": "1:43:03", "throughput": 20037.14, "total_tokens": 121897920}
|
|
{"current_steps": 38740, "total_steps": 78105, "loss": 0.2508, "lr": 2.9686025475244695e-06, "epoch": 2.4799948786889443, "percentage": 49.6, "elapsed_time": "1:41:24", "remaining_time": "1:43:02", "throughput": 20037.48, "total_tokens": 121913472}
|
|
{"current_steps": 38745, "total_steps": 78105, "loss": 0.2228, "lr": 2.9680537851136176e-06, "epoch": 2.4803149606299213, "percentage": 49.61, "elapsed_time": "1:41:24", "remaining_time": "1:43:01", "throughput": 20037.87, "total_tokens": 121929920}
|
|
{"current_steps": 38750, "total_steps": 78105, "loss": 0.1358, "lr": 2.967504999330597e-06, "epoch": 2.4806350425708983, "percentage": 49.61, "elapsed_time": "1:41:25", "remaining_time": "1:43:00", "throughput": 20038.21, "total_tokens": 121945472}
|
|
{"current_steps": 38755, "total_steps": 78105, "loss": 0.2594, "lr": 2.9669561902028125e-06, "epoch": 2.4809551245118753, "percentage": 49.62, "elapsed_time": "1:41:26", "remaining_time": "1:42:59", "throughput": 20038.54, "total_tokens": 121960512}
|
|
{"current_steps": 38760, "total_steps": 78105, "loss": 0.1662, "lr": 2.966407357757668e-06, "epoch": 2.481275206452852, "percentage": 49.63, "elapsed_time": "1:41:27", "remaining_time": "1:42:58", "throughput": 20039.02, "total_tokens": 121978304}
|
|
{"current_steps": 38765, "total_steps": 78105, "loss": 0.302, "lr": 2.9658585020225695e-06, "epoch": 2.481595288393829, "percentage": 49.63, "elapsed_time": "1:41:27", "remaining_time": "1:42:58", "throughput": 20039.43, "total_tokens": 121994880}
|
|
{"current_steps": 38770, "total_steps": 78105, "loss": 0.1458, "lr": 2.965309623024924e-06, "epoch": 2.481915370334806, "percentage": 49.64, "elapsed_time": "1:41:28", "remaining_time": "1:42:57", "throughput": 20039.7, "total_tokens": 122009600}
|
|
{"current_steps": 38775, "total_steps": 78105, "loss": 0.2354, "lr": 2.96476072079214e-06, "epoch": 2.482235452275783, "percentage": 49.64, "elapsed_time": "1:41:29", "remaining_time": "1:42:56", "throughput": 20040.09, "total_tokens": 122026304}
|
|
{"current_steps": 38780, "total_steps": 78105, "loss": 0.2299, "lr": 2.964211795351626e-06, "epoch": 2.4825555342167593, "percentage": 49.65, "elapsed_time": "1:41:29", "remaining_time": "1:42:55", "throughput": 20040.44, "total_tokens": 122041664}
|
|
{"current_steps": 38785, "total_steps": 78105, "loss": 0.2043, "lr": 2.9636628467307927e-06, "epoch": 2.4828756161577363, "percentage": 49.66, "elapsed_time": "1:41:30", "remaining_time": "1:42:54", "throughput": 20040.76, "total_tokens": 122056896}
|
|
{"current_steps": 38790, "total_steps": 78105, "loss": 0.2083, "lr": 2.963113874957052e-06, "epoch": 2.4831956980987133, "percentage": 49.66, "elapsed_time": "1:41:31", "remaining_time": "1:42:53", "throughput": 20041.06, "total_tokens": 122071488}
|
|
{"current_steps": 38795, "total_steps": 78105, "loss": 0.2005, "lr": 2.962564880057817e-06, "epoch": 2.4835157800396903, "percentage": 49.67, "elapsed_time": "1:41:31", "remaining_time": "1:42:52", "throughput": 20041.37, "total_tokens": 122086400}
|
|
{"current_steps": 38800, "total_steps": 78105, "loss": 0.1575, "lr": 2.9620158620605018e-06, "epoch": 2.483835861980667, "percentage": 49.68, "elapsed_time": "1:41:32", "remaining_time": "1:42:51", "throughput": 20041.79, "total_tokens": 122102784}
|
|
{"current_steps": 38805, "total_steps": 78105, "loss": 0.2453, "lr": 2.96146682099252e-06, "epoch": 2.484155943921644, "percentage": 49.68, "elapsed_time": "1:41:33", "remaining_time": "1:42:50", "throughput": 20042.18, "total_tokens": 122118848}
|
|
{"current_steps": 38810, "total_steps": 78105, "loss": 0.1629, "lr": 2.960917756881289e-06, "epoch": 2.484476025862621, "percentage": 49.69, "elapsed_time": "1:41:33", "remaining_time": "1:42:49", "throughput": 20042.51, "total_tokens": 122134080}
|
|
{"current_steps": 38815, "total_steps": 78105, "loss": 0.4773, "lr": 2.960368669754226e-06, "epoch": 2.484796107803598, "percentage": 49.7, "elapsed_time": "1:41:34", "remaining_time": "1:42:48", "throughput": 20042.83, "total_tokens": 122148992}
|
|
{"current_steps": 38820, "total_steps": 78105, "loss": 0.1538, "lr": 2.9598195596387504e-06, "epoch": 2.4851161897445744, "percentage": 49.7, "elapsed_time": "1:41:35", "remaining_time": "1:42:48", "throughput": 20043.2, "total_tokens": 122164608}
|
|
{"current_steps": 38825, "total_steps": 78105, "loss": 0.232, "lr": 2.9592704265622807e-06, "epoch": 2.4854362716855514, "percentage": 49.71, "elapsed_time": "1:41:35", "remaining_time": "1:42:47", "throughput": 20043.59, "total_tokens": 122180608}
|
|
{"current_steps": 38830, "total_steps": 78105, "loss": 0.2174, "lr": 2.9587212705522384e-06, "epoch": 2.4857563536265284, "percentage": 49.72, "elapsed_time": "1:41:36", "remaining_time": "1:42:46", "throughput": 20043.89, "total_tokens": 122195712}
|
|
{"current_steps": 38835, "total_steps": 78105, "loss": 0.3016, "lr": 2.9581720916360447e-06, "epoch": 2.4860764355675053, "percentage": 49.72, "elapsed_time": "1:41:37", "remaining_time": "1:42:45", "throughput": 20044.27, "total_tokens": 122211904}
|
|
{"current_steps": 38840, "total_steps": 78105, "loss": 0.2784, "lr": 2.9576228898411234e-06, "epoch": 2.4863965175084823, "percentage": 49.73, "elapsed_time": "1:41:37", "remaining_time": "1:42:44", "throughput": 20044.58, "total_tokens": 122226688}
|
|
{"current_steps": 38845, "total_steps": 78105, "loss": 0.1753, "lr": 2.957073665194898e-06, "epoch": 2.486716599449459, "percentage": 49.73, "elapsed_time": "1:41:38", "remaining_time": "1:42:43", "throughput": 20044.87, "total_tokens": 122241408}
|
|
{"current_steps": 38850, "total_steps": 78105, "loss": 0.194, "lr": 2.956524417724796e-06, "epoch": 2.487036681390436, "percentage": 49.74, "elapsed_time": "1:41:39", "remaining_time": "1:42:42", "throughput": 20045.24, "total_tokens": 122257216}
|
|
{"current_steps": 38855, "total_steps": 78105, "loss": 0.1745, "lr": 2.955975147458242e-06, "epoch": 2.487356763331413, "percentage": 49.75, "elapsed_time": "1:41:39", "remaining_time": "1:42:41", "throughput": 20045.56, "total_tokens": 122272512}
|
|
{"current_steps": 38860, "total_steps": 78105, "loss": 0.3614, "lr": 2.9554258544226644e-06, "epoch": 2.48767684527239, "percentage": 49.75, "elapsed_time": "1:41:40", "remaining_time": "1:42:40", "throughput": 20045.87, "total_tokens": 122287360}
|
|
{"current_steps": 38865, "total_steps": 78105, "loss": 0.2425, "lr": 2.954876538645491e-06, "epoch": 2.4879969272133664, "percentage": 49.76, "elapsed_time": "1:41:41", "remaining_time": "1:42:39", "throughput": 20046.24, "total_tokens": 122303488}
|
|
{"current_steps": 38870, "total_steps": 78105, "loss": 0.2122, "lr": 2.9543272001541535e-06, "epoch": 2.4883170091543434, "percentage": 49.77, "elapsed_time": "1:41:41", "remaining_time": "1:42:39", "throughput": 20046.59, "total_tokens": 122318976}
|
|
{"current_steps": 38875, "total_steps": 78105, "loss": 0.2268, "lr": 2.9537778389760812e-06, "epoch": 2.4886370910953204, "percentage": 49.77, "elapsed_time": "1:41:42", "remaining_time": "1:42:38", "throughput": 20046.91, "total_tokens": 122334272}
|
|
{"current_steps": 38880, "total_steps": 78105, "loss": 0.1778, "lr": 2.953228455138708e-06, "epoch": 2.4889571730362974, "percentage": 49.78, "elapsed_time": "1:41:43", "remaining_time": "1:42:37", "throughput": 20047.27, "total_tokens": 122350016}
|
|
{"current_steps": 38885, "total_steps": 78105, "loss": 0.1932, "lr": 2.952679048669467e-06, "epoch": 2.4892772549772744, "percentage": 49.79, "elapsed_time": "1:41:43", "remaining_time": "1:42:36", "throughput": 20047.66, "total_tokens": 122366336}
|
|
{"current_steps": 38890, "total_steps": 78105, "loss": 0.2486, "lr": 2.952129619595791e-06, "epoch": 2.489597336918251, "percentage": 49.79, "elapsed_time": "1:41:44", "remaining_time": "1:42:35", "throughput": 20048.01, "total_tokens": 122382016}
|
|
{"current_steps": 38895, "total_steps": 78105, "loss": 0.2206, "lr": 2.9515801679451177e-06, "epoch": 2.489917418859228, "percentage": 49.8, "elapsed_time": "1:41:45", "remaining_time": "1:42:34", "throughput": 20048.32, "total_tokens": 122397568}
|
|
{"current_steps": 38900, "total_steps": 78105, "loss": 0.2299, "lr": 2.9510306937448825e-06, "epoch": 2.490237500800205, "percentage": 49.8, "elapsed_time": "1:41:45", "remaining_time": "1:42:33", "throughput": 20048.69, "total_tokens": 122413568}
|
|
{"current_steps": 38905, "total_steps": 78105, "loss": 0.1943, "lr": 2.9504811970225246e-06, "epoch": 2.490557582741182, "percentage": 49.81, "elapsed_time": "1:41:46", "remaining_time": "1:42:32", "throughput": 20049.05, "total_tokens": 122429184}
|
|
{"current_steps": 38910, "total_steps": 78105, "loss": 0.2332, "lr": 2.9499316778054817e-06, "epoch": 2.4908776646821584, "percentage": 49.82, "elapsed_time": "1:41:47", "remaining_time": "1:42:31", "throughput": 20049.4, "total_tokens": 122444672}
|
|
{"current_steps": 38915, "total_steps": 78105, "loss": 0.2755, "lr": 2.9493821361211944e-06, "epoch": 2.4911977466231354, "percentage": 49.82, "elapsed_time": "1:41:47", "remaining_time": "1:42:30", "throughput": 20049.7, "total_tokens": 122459648}
|
|
{"current_steps": 38920, "total_steps": 78105, "loss": 0.1965, "lr": 2.948832571997104e-06, "epoch": 2.4915178285641124, "percentage": 49.83, "elapsed_time": "1:41:48", "remaining_time": "1:42:30", "throughput": 20050.0, "total_tokens": 122474688}
|
|
{"current_steps": 38925, "total_steps": 78105, "loss": 0.2573, "lr": 2.948282985460652e-06, "epoch": 2.4918379105050894, "percentage": 49.84, "elapsed_time": "1:41:49", "remaining_time": "1:42:29", "throughput": 20050.39, "total_tokens": 122490752}
|
|
{"current_steps": 38930, "total_steps": 78105, "loss": 0.207, "lr": 2.9477333765392835e-06, "epoch": 2.4921579924460664, "percentage": 49.84, "elapsed_time": "1:41:49", "remaining_time": "1:42:28", "throughput": 20050.74, "total_tokens": 122505984}
|
|
{"current_steps": 38935, "total_steps": 78105, "loss": 0.2417, "lr": 2.9471837452604424e-06, "epoch": 2.492478074387043, "percentage": 49.85, "elapsed_time": "1:41:50", "remaining_time": "1:42:27", "throughput": 20051.07, "total_tokens": 122521024}
|
|
{"current_steps": 38940, "total_steps": 78105, "loss": 0.2529, "lr": 2.9466340916515746e-06, "epoch": 2.49279815632802, "percentage": 49.86, "elapsed_time": "1:41:51", "remaining_time": "1:42:26", "throughput": 20051.38, "total_tokens": 122536448}
|
|
{"current_steps": 38945, "total_steps": 78105, "loss": 0.2426, "lr": 2.9460844157401263e-06, "epoch": 2.493118238268997, "percentage": 49.86, "elapsed_time": "1:41:51", "remaining_time": "1:42:25", "throughput": 20051.7, "total_tokens": 122551872}
|
|
{"current_steps": 38950, "total_steps": 78105, "loss": 0.1973, "lr": 2.945534717553546e-06, "epoch": 2.493438320209974, "percentage": 49.87, "elapsed_time": "1:41:52", "remaining_time": "1:42:24", "throughput": 20052.06, "total_tokens": 122568192}
|
|
{"current_steps": 38955, "total_steps": 78105, "loss": 0.2229, "lr": 2.944984997119283e-06, "epoch": 2.4937584021509505, "percentage": 49.88, "elapsed_time": "1:41:53", "remaining_time": "1:42:23", "throughput": 20052.5, "total_tokens": 122585344}
|
|
{"current_steps": 38960, "total_steps": 78105, "loss": 0.2131, "lr": 2.944435254464786e-06, "epoch": 2.4940784840919275, "percentage": 49.88, "elapsed_time": "1:41:53", "remaining_time": "1:42:22", "throughput": 20052.8, "total_tokens": 122600640}
|
|
{"current_steps": 38965, "total_steps": 78105, "loss": 0.1986, "lr": 2.943885489617508e-06, "epoch": 2.4943985660329044, "percentage": 49.89, "elapsed_time": "1:41:54", "remaining_time": "1:42:22", "throughput": 20053.14, "total_tokens": 122616064}
|
|
{"current_steps": 38970, "total_steps": 78105, "loss": 0.306, "lr": 2.943335702604901e-06, "epoch": 2.4947186479738814, "percentage": 49.89, "elapsed_time": "1:41:55", "remaining_time": "1:42:21", "throughput": 20053.46, "total_tokens": 122631232}
|
|
{"current_steps": 38975, "total_steps": 78105, "loss": 0.3408, "lr": 2.9427858934544183e-06, "epoch": 2.4950387299148584, "percentage": 49.9, "elapsed_time": "1:41:55", "remaining_time": "1:42:20", "throughput": 20053.74, "total_tokens": 122645824}
|
|
{"current_steps": 38980, "total_steps": 78105, "loss": 0.2662, "lr": 2.942236062193514e-06, "epoch": 2.495358811855835, "percentage": 49.91, "elapsed_time": "1:41:56", "remaining_time": "1:42:19", "throughput": 20054.07, "total_tokens": 122661184}
|
|
{"current_steps": 38985, "total_steps": 78105, "loss": 0.1876, "lr": 2.9416862088496444e-06, "epoch": 2.495678893796812, "percentage": 49.91, "elapsed_time": "1:41:57", "remaining_time": "1:42:18", "throughput": 20054.35, "total_tokens": 122675840}
|
|
{"current_steps": 38990, "total_steps": 78105, "loss": 0.2183, "lr": 2.941136333450266e-06, "epoch": 2.495998975737789, "percentage": 49.92, "elapsed_time": "1:41:57", "remaining_time": "1:42:17", "throughput": 20054.74, "total_tokens": 122692096}
|
|
{"current_steps": 38995, "total_steps": 78105, "loss": 0.2081, "lr": 2.940586436022837e-06, "epoch": 2.496319057678766, "percentage": 49.93, "elapsed_time": "1:41:58", "remaining_time": "1:42:16", "throughput": 20055.01, "total_tokens": 122706560}
|
|
{"current_steps": 39000, "total_steps": 78105, "loss": 0.2781, "lr": 2.9400365165948163e-06, "epoch": 2.4966391396197425, "percentage": 49.93, "elapsed_time": "1:41:59", "remaining_time": "1:42:15", "throughput": 20055.35, "total_tokens": 122722048}
|
|
{"current_steps": 39005, "total_steps": 78105, "loss": 0.2418, "lr": 2.939486575193665e-06, "epoch": 2.4969592215607195, "percentage": 49.94, "elapsed_time": "1:41:59", "remaining_time": "1:42:14", "throughput": 20055.66, "total_tokens": 122737152}
|
|
{"current_steps": 39010, "total_steps": 78105, "loss": 0.42, "lr": 2.9389366118468422e-06, "epoch": 2.4972793035016965, "percentage": 49.95, "elapsed_time": "1:42:00", "remaining_time": "1:42:13", "throughput": 20055.97, "total_tokens": 122752192}
|
|
{"current_steps": 39015, "total_steps": 78105, "loss": 0.1836, "lr": 2.9383866265818118e-06, "epoch": 2.4975993854426735, "percentage": 49.95, "elapsed_time": "1:42:01", "remaining_time": "1:42:12", "throughput": 20056.35, "total_tokens": 122768320}
|
|
{"current_steps": 39020, "total_steps": 78105, "loss": 0.2326, "lr": 2.9378366194260364e-06, "epoch": 2.4979194673836504, "percentage": 49.96, "elapsed_time": "1:42:01", "remaining_time": "1:42:12", "throughput": 20056.74, "total_tokens": 122784640}
|
|
{"current_steps": 39025, "total_steps": 78105, "loss": 0.2853, "lr": 2.937286590406981e-06, "epoch": 2.498239549324627, "percentage": 49.96, "elapsed_time": "1:42:02", "remaining_time": "1:42:11", "throughput": 20057.16, "total_tokens": 122801408}
|
|
{"current_steps": 39030, "total_steps": 78105, "loss": 0.2938, "lr": 2.936736539552111e-06, "epoch": 2.498559631265604, "percentage": 49.97, "elapsed_time": "1:42:03", "remaining_time": "1:42:10", "throughput": 20057.55, "total_tokens": 122817600}
|
|
{"current_steps": 39035, "total_steps": 78105, "loss": 0.1987, "lr": 2.936186466888894e-06, "epoch": 2.498879713206581, "percentage": 49.98, "elapsed_time": "1:42:03", "remaining_time": "1:42:09", "throughput": 20057.83, "total_tokens": 122832896}
|
|
{"current_steps": 39040, "total_steps": 78105, "loss": 0.1753, "lr": 2.9356363724447958e-06, "epoch": 2.499199795147558, "percentage": 49.98, "elapsed_time": "1:42:04", "remaining_time": "1:42:08", "throughput": 20058.09, "total_tokens": 122846976}
|
|
{"current_steps": 39045, "total_steps": 78105, "loss": 0.1784, "lr": 2.935086256247286e-06, "epoch": 2.4995198770885345, "percentage": 49.99, "elapsed_time": "1:42:05", "remaining_time": "1:42:07", "throughput": 20058.45, "total_tokens": 122862912}
|
|
{"current_steps": 39050, "total_steps": 78105, "loss": 0.199, "lr": 2.934536118323835e-06, "epoch": 2.4998399590295115, "percentage": 50.0, "elapsed_time": "1:42:05", "remaining_time": "1:42:06", "throughput": 20058.8, "total_tokens": 122878720}
|
|
{"current_steps": 39055, "total_steps": 78105, "loss": 0.2244, "lr": 2.9339859587019145e-06, "epoch": 2.5001600409704885, "percentage": 50.0, "elapsed_time": "1:42:06", "remaining_time": "1:42:05", "throughput": 20059.2, "total_tokens": 122895104}
|
|
{"current_steps": 39060, "total_steps": 78105, "loss": 0.1792, "lr": 2.9334357774089956e-06, "epoch": 2.5004801229114655, "percentage": 50.01, "elapsed_time": "1:42:07", "remaining_time": "1:42:04", "throughput": 20059.54, "total_tokens": 122910592}
|
|
{"current_steps": 39060, "total_steps": 78105, "eval_loss": 0.5070953965187073, "epoch": 2.5004801229114655, "percentage": 50.01, "elapsed_time": "1:42:58", "remaining_time": "1:42:56", "throughput": 19893.63, "total_tokens": 122910592}
|
|
{"current_steps": 39065, "total_steps": 78105, "loss": 0.2447, "lr": 2.9328855744725515e-06, "epoch": 2.5008002048524425, "percentage": 50.02, "elapsed_time": "1:43:33", "remaining_time": "1:43:29", "throughput": 19784.14, "total_tokens": 122925952}
|
|
{"current_steps": 39070, "total_steps": 78105, "loss": 0.2185, "lr": 2.932335349920056e-06, "epoch": 2.501120286793419, "percentage": 50.02, "elapsed_time": "1:43:34", "remaining_time": "1:43:28", "throughput": 19784.43, "total_tokens": 122940736}
|
|
{"current_steps": 39075, "total_steps": 78105, "loss": 0.2521, "lr": 2.9317851037789856e-06, "epoch": 2.501440368734396, "percentage": 50.03, "elapsed_time": "1:43:34", "remaining_time": "1:43:27", "throughput": 19784.76, "total_tokens": 122956288}
|
|
{"current_steps": 39080, "total_steps": 78105, "loss": 0.2858, "lr": 2.9312348360768163e-06, "epoch": 2.501760450675373, "percentage": 50.04, "elapsed_time": "1:43:35", "remaining_time": "1:43:26", "throughput": 19785.2, "total_tokens": 122973056}
|
|
{"current_steps": 39085, "total_steps": 78105, "loss": 0.2661, "lr": 2.9306845468410256e-06, "epoch": 2.5020805326163496, "percentage": 50.04, "elapsed_time": "1:43:36", "remaining_time": "1:43:25", "throughput": 19785.55, "total_tokens": 122988672}
|
|
{"current_steps": 39090, "total_steps": 78105, "loss": 0.2316, "lr": 2.930134236099092e-06, "epoch": 2.5024006145573265, "percentage": 50.05, "elapsed_time": "1:43:36", "remaining_time": "1:43:24", "throughput": 19785.93, "total_tokens": 123004480}
|
|
{"current_steps": 39095, "total_steps": 78105, "loss": 0.2113, "lr": 2.929583903878495e-06, "epoch": 2.5027206964983035, "percentage": 50.05, "elapsed_time": "1:43:37", "remaining_time": "1:43:23", "throughput": 19786.27, "total_tokens": 123020224}
|
|
{"current_steps": 39100, "total_steps": 78105, "loss": 0.2862, "lr": 2.929033550206715e-06, "epoch": 2.5030407784392805, "percentage": 50.06, "elapsed_time": "1:43:38", "remaining_time": "1:43:23", "throughput": 19786.59, "total_tokens": 123035200}
|
|
{"current_steps": 39105, "total_steps": 78105, "loss": 0.2525, "lr": 2.928483175111234e-06, "epoch": 2.5033608603802575, "percentage": 50.07, "elapsed_time": "1:43:38", "remaining_time": "1:43:22", "throughput": 19787.02, "total_tokens": 123051968}
|
|
{"current_steps": 39110, "total_steps": 78105, "loss": 0.2098, "lr": 2.927932778619536e-06, "epoch": 2.5036809423212345, "percentage": 50.07, "elapsed_time": "1:43:39", "remaining_time": "1:43:21", "throughput": 19787.4, "total_tokens": 123068096}
|
|
{"current_steps": 39115, "total_steps": 78105, "loss": 0.1985, "lr": 2.927382360759104e-06, "epoch": 2.504001024262211, "percentage": 50.08, "elapsed_time": "1:43:40", "remaining_time": "1:43:20", "throughput": 19787.76, "total_tokens": 123083584}
|
|
{"current_steps": 39120, "total_steps": 78105, "loss": 0.222, "lr": 2.9268319215574232e-06, "epoch": 2.504321106203188, "percentage": 50.09, "elapsed_time": "1:43:40", "remaining_time": "1:43:19", "throughput": 19788.2, "total_tokens": 123099968}
|
|
{"current_steps": 39125, "total_steps": 78105, "loss": 0.1681, "lr": 2.926281461041979e-06, "epoch": 2.504641188144165, "percentage": 50.09, "elapsed_time": "1:43:41", "remaining_time": "1:43:18", "throughput": 19788.56, "total_tokens": 123115776}
|
|
{"current_steps": 39130, "total_steps": 78105, "loss": 0.2601, "lr": 2.925730979240259e-06, "epoch": 2.5049612700851416, "percentage": 50.1, "elapsed_time": "1:43:42", "remaining_time": "1:43:17", "throughput": 19788.94, "total_tokens": 123131904}
|
|
{"current_steps": 39135, "total_steps": 78105, "loss": 0.197, "lr": 2.925180476179752e-06, "epoch": 2.5052813520261186, "percentage": 50.11, "elapsed_time": "1:43:42", "remaining_time": "1:43:16", "throughput": 19789.34, "total_tokens": 123147968}
|
|
{"current_steps": 39140, "total_steps": 78105, "loss": 0.4018, "lr": 2.924629951887946e-06, "epoch": 2.5056014339670956, "percentage": 50.11, "elapsed_time": "1:43:43", "remaining_time": "1:43:15", "throughput": 19789.72, "total_tokens": 123163840}
|
|
{"current_steps": 39145, "total_steps": 78105, "loss": 0.2582, "lr": 2.9240794063923327e-06, "epoch": 2.5059215159080725, "percentage": 50.12, "elapsed_time": "1:43:44", "remaining_time": "1:43:14", "throughput": 19790.13, "total_tokens": 123180352}
|
|
{"current_steps": 39150, "total_steps": 78105, "loss": 0.1756, "lr": 2.923528839720403e-06, "epoch": 2.5062415978490495, "percentage": 50.12, "elapsed_time": "1:43:45", "remaining_time": "1:43:14", "throughput": 19790.55, "total_tokens": 123196608}
|
|
{"current_steps": 39155, "total_steps": 78105, "loss": 0.2078, "lr": 2.922978251899648e-06, "epoch": 2.506561679790026, "percentage": 50.13, "elapsed_time": "1:43:45", "remaining_time": "1:43:13", "throughput": 19790.93, "total_tokens": 123212480}
|
|
{"current_steps": 39160, "total_steps": 78105, "loss": 0.1229, "lr": 2.9224276429575626e-06, "epoch": 2.506881761731003, "percentage": 50.14, "elapsed_time": "1:43:46", "remaining_time": "1:43:12", "throughput": 19791.44, "total_tokens": 123230016}
|
|
{"current_steps": 39165, "total_steps": 78105, "loss": 0.1453, "lr": 2.9218770129216417e-06, "epoch": 2.50720184367198, "percentage": 50.14, "elapsed_time": "1:43:47", "remaining_time": "1:43:11", "throughput": 19791.79, "total_tokens": 123245632}
|
|
{"current_steps": 39170, "total_steps": 78105, "loss": 0.3071, "lr": 2.92132636181938e-06, "epoch": 2.507521925612957, "percentage": 50.15, "elapsed_time": "1:43:47", "remaining_time": "1:43:10", "throughput": 19792.23, "total_tokens": 123262272}
|
|
{"current_steps": 39175, "total_steps": 78105, "loss": 0.2318, "lr": 2.9207756896782746e-06, "epoch": 2.5078420075539336, "percentage": 50.16, "elapsed_time": "1:43:48", "remaining_time": "1:43:09", "throughput": 19792.59, "total_tokens": 123277888}
|
|
{"current_steps": 39180, "total_steps": 78105, "loss": 0.1895, "lr": 2.920224996525823e-06, "epoch": 2.5081620894949106, "percentage": 50.16, "elapsed_time": "1:43:49", "remaining_time": "1:43:08", "throughput": 19792.97, "total_tokens": 123293824}
|
|
{"current_steps": 39185, "total_steps": 78105, "loss": 0.2957, "lr": 2.919674282389524e-06, "epoch": 2.5084821714358876, "percentage": 50.17, "elapsed_time": "1:43:49", "remaining_time": "1:43:07", "throughput": 19793.32, "total_tokens": 123309248}
|
|
{"current_steps": 39190, "total_steps": 78105, "loss": 0.2751, "lr": 2.9191235472968767e-06, "epoch": 2.5088022533768646, "percentage": 50.18, "elapsed_time": "1:43:50", "remaining_time": "1:43:06", "throughput": 19793.68, "total_tokens": 123325120}
|
|
{"current_steps": 39195, "total_steps": 78105, "loss": 0.2485, "lr": 2.9185727912753827e-06, "epoch": 2.5091223353178416, "percentage": 50.18, "elapsed_time": "1:43:51", "remaining_time": "1:43:05", "throughput": 19793.99, "total_tokens": 123339776}
|
|
{"current_steps": 39200, "total_steps": 78105, "loss": 0.2622, "lr": 2.918022014352544e-06, "epoch": 2.509442417258818, "percentage": 50.19, "elapsed_time": "1:43:51", "remaining_time": "1:43:04", "throughput": 19794.34, "total_tokens": 123354944}
|
|
{"current_steps": 39205, "total_steps": 78105, "loss": 0.2675, "lr": 2.917471216555864e-06, "epoch": 2.509762499199795, "percentage": 50.2, "elapsed_time": "1:43:52", "remaining_time": "1:43:04", "throughput": 19794.76, "total_tokens": 123371520}
|
|
{"current_steps": 39210, "total_steps": 78105, "loss": 0.2917, "lr": 2.916920397912845e-06, "epoch": 2.510082581140772, "percentage": 50.2, "elapsed_time": "1:43:53", "remaining_time": "1:43:03", "throughput": 19795.13, "total_tokens": 123387072}
|
|
{"current_steps": 39215, "total_steps": 78105, "loss": 0.2082, "lr": 2.9163695584509934e-06, "epoch": 2.510402663081749, "percentage": 50.21, "elapsed_time": "1:43:53", "remaining_time": "1:43:02", "throughput": 19795.5, "total_tokens": 123402624}
|
|
{"current_steps": 39220, "total_steps": 78105, "loss": 0.3232, "lr": 2.915818698197814e-06, "epoch": 2.5107227450227256, "percentage": 50.21, "elapsed_time": "1:43:54", "remaining_time": "1:43:01", "throughput": 19795.92, "total_tokens": 123418880}
|
|
{"current_steps": 39225, "total_steps": 78105, "loss": 0.1872, "lr": 2.9152678171808156e-06, "epoch": 2.5110428269637026, "percentage": 50.22, "elapsed_time": "1:43:55", "remaining_time": "1:43:00", "throughput": 19796.35, "total_tokens": 123435584}
|
|
{"current_steps": 39230, "total_steps": 78105, "loss": 0.1625, "lr": 2.914716915427505e-06, "epoch": 2.5113629089046796, "percentage": 50.23, "elapsed_time": "1:43:55", "remaining_time": "1:42:59", "throughput": 19796.7, "total_tokens": 123450880}
|
|
{"current_steps": 39235, "total_steps": 78105, "loss": 0.3469, "lr": 2.9141659929653925e-06, "epoch": 2.5116829908456566, "percentage": 50.23, "elapsed_time": "1:43:56", "remaining_time": "1:42:58", "throughput": 19797.0, "total_tokens": 123465600}
|
|
{"current_steps": 39240, "total_steps": 78105, "loss": 0.2009, "lr": 2.9136150498219872e-06, "epoch": 2.5120030727866336, "percentage": 50.24, "elapsed_time": "1:43:57", "remaining_time": "1:42:57", "throughput": 19797.35, "total_tokens": 123481472}
|
|
{"current_steps": 39245, "total_steps": 78105, "loss": 0.2325, "lr": 2.9130640860248003e-06, "epoch": 2.51232315472761, "percentage": 50.25, "elapsed_time": "1:43:57", "remaining_time": "1:42:56", "throughput": 19797.68, "total_tokens": 123496512}
|
|
{"current_steps": 39250, "total_steps": 78105, "loss": 0.2488, "lr": 2.912513101601345e-06, "epoch": 2.512643236668587, "percentage": 50.25, "elapsed_time": "1:43:58", "remaining_time": "1:42:55", "throughput": 19798.08, "total_tokens": 123512640}
|
|
{"current_steps": 39255, "total_steps": 78105, "loss": 0.1831, "lr": 2.9119620965791346e-06, "epoch": 2.512963318609564, "percentage": 50.26, "elapsed_time": "1:43:59", "remaining_time": "1:42:54", "throughput": 19798.43, "total_tokens": 123528192}
|
|
{"current_steps": 39260, "total_steps": 78105, "loss": 0.2414, "lr": 2.911411070985683e-06, "epoch": 2.5132834005505407, "percentage": 50.27, "elapsed_time": "1:43:59", "remaining_time": "1:42:54", "throughput": 19798.78, "total_tokens": 123543680}
|
|
{"current_steps": 39265, "total_steps": 78105, "loss": 0.1765, "lr": 2.910860024848505e-06, "epoch": 2.5136034824915177, "percentage": 50.27, "elapsed_time": "1:44:00", "remaining_time": "1:42:53", "throughput": 19799.25, "total_tokens": 123560896}
|
|
{"current_steps": 39270, "total_steps": 78105, "loss": 0.1817, "lr": 2.9103089581951166e-06, "epoch": 2.5139235644324947, "percentage": 50.28, "elapsed_time": "1:44:01", "remaining_time": "1:42:52", "throughput": 19799.56, "total_tokens": 123575808}
|
|
{"current_steps": 39275, "total_steps": 78105, "loss": 0.2243, "lr": 2.909757871053037e-06, "epoch": 2.5142436463734716, "percentage": 50.28, "elapsed_time": "1:44:02", "remaining_time": "1:42:51", "throughput": 19799.93, "total_tokens": 123591552}
|
|
{"current_steps": 39280, "total_steps": 78105, "loss": 0.2163, "lr": 2.9092067634497835e-06, "epoch": 2.5145637283144486, "percentage": 50.29, "elapsed_time": "1:44:02", "remaining_time": "1:42:50", "throughput": 19800.32, "total_tokens": 123607488}
|
|
{"current_steps": 39285, "total_steps": 78105, "loss": 0.1746, "lr": 2.908655635412876e-06, "epoch": 2.5148838102554256, "percentage": 50.3, "elapsed_time": "1:44:03", "remaining_time": "1:42:49", "throughput": 19800.62, "total_tokens": 123622144}
|
|
{"current_steps": 39290, "total_steps": 78105, "loss": 0.2954, "lr": 2.9081044869698342e-06, "epoch": 2.515203892196402, "percentage": 50.3, "elapsed_time": "1:44:04", "remaining_time": "1:42:48", "throughput": 19801.0, "total_tokens": 123638016}
|
|
{"current_steps": 39295, "total_steps": 78105, "loss": 0.2129, "lr": 2.90755331814818e-06, "epoch": 2.515523974137379, "percentage": 50.31, "elapsed_time": "1:44:04", "remaining_time": "1:42:47", "throughput": 19801.43, "total_tokens": 123654976}
|
|
{"current_steps": 39300, "total_steps": 78105, "loss": 0.3048, "lr": 2.9070021289754368e-06, "epoch": 2.515844056078356, "percentage": 50.32, "elapsed_time": "1:44:05", "remaining_time": "1:42:46", "throughput": 19801.78, "total_tokens": 123670528}
|
|
{"current_steps": 39305, "total_steps": 78105, "loss": 0.3119, "lr": 2.9064509194791263e-06, "epoch": 2.5161641380193327, "percentage": 50.32, "elapsed_time": "1:44:06", "remaining_time": "1:42:45", "throughput": 19802.17, "total_tokens": 123686720}
|
|
{"current_steps": 39310, "total_steps": 78105, "loss": 0.3004, "lr": 2.9058996896867743e-06, "epoch": 2.5164842199603097, "percentage": 50.33, "elapsed_time": "1:44:06", "remaining_time": "1:42:44", "throughput": 19802.51, "total_tokens": 123702016}
|
|
{"current_steps": 39315, "total_steps": 78105, "loss": 0.1985, "lr": 2.905348439625906e-06, "epoch": 2.5168043019012867, "percentage": 50.34, "elapsed_time": "1:44:07", "remaining_time": "1:42:43", "throughput": 19802.74, "total_tokens": 123715968}
|
|
{"current_steps": 39320, "total_steps": 78105, "loss": 0.2372, "lr": 2.9047971693240485e-06, "epoch": 2.5171243838422637, "percentage": 50.34, "elapsed_time": "1:44:08", "remaining_time": "1:42:43", "throughput": 19803.03, "total_tokens": 123730880}
|
|
{"current_steps": 39325, "total_steps": 78105, "loss": 0.2031, "lr": 2.904245878808729e-06, "epoch": 2.5174444657832407, "percentage": 50.35, "elapsed_time": "1:44:08", "remaining_time": "1:42:42", "throughput": 19803.36, "total_tokens": 123745920}
|
|
{"current_steps": 39330, "total_steps": 78105, "loss": 0.2428, "lr": 2.9036945681074745e-06, "epoch": 2.5177645477242176, "percentage": 50.36, "elapsed_time": "1:44:09", "remaining_time": "1:42:41", "throughput": 19803.71, "total_tokens": 123761536}
|
|
{"current_steps": 39335, "total_steps": 78105, "loss": 0.2125, "lr": 2.9031432372478167e-06, "epoch": 2.518084629665194, "percentage": 50.36, "elapsed_time": "1:44:10", "remaining_time": "1:42:40", "throughput": 19804.03, "total_tokens": 123776832}
|
|
{"current_steps": 39340, "total_steps": 78105, "loss": 0.3436, "lr": 2.9025918862572866e-06, "epoch": 2.518404711606171, "percentage": 50.37, "elapsed_time": "1:44:10", "remaining_time": "1:42:39", "throughput": 19804.49, "total_tokens": 123794688}
|
|
{"current_steps": 39345, "total_steps": 78105, "loss": 0.2761, "lr": 2.902040515163414e-06, "epoch": 2.518724793547148, "percentage": 50.37, "elapsed_time": "1:44:11", "remaining_time": "1:42:38", "throughput": 19804.81, "total_tokens": 123809792}
|
|
{"current_steps": 39350, "total_steps": 78105, "loss": 0.2474, "lr": 2.901489123993732e-06, "epoch": 2.5190448754881247, "percentage": 50.38, "elapsed_time": "1:44:12", "remaining_time": "1:42:37", "throughput": 19805.16, "total_tokens": 123825664}
|
|
{"current_steps": 39355, "total_steps": 78105, "loss": 0.2624, "lr": 2.9009377127757747e-06, "epoch": 2.5193649574291017, "percentage": 50.39, "elapsed_time": "1:44:12", "remaining_time": "1:42:36", "throughput": 19805.57, "total_tokens": 123842304}
|
|
{"current_steps": 39360, "total_steps": 78105, "loss": 0.2393, "lr": 2.9003862815370764e-06, "epoch": 2.5196850393700787, "percentage": 50.39, "elapsed_time": "1:44:13", "remaining_time": "1:42:35", "throughput": 19805.85, "total_tokens": 123856960}
|
|
{"current_steps": 39365, "total_steps": 78105, "loss": 0.2521, "lr": 2.8998348303051725e-06, "epoch": 2.5200051213110557, "percentage": 50.4, "elapsed_time": "1:44:14", "remaining_time": "1:42:34", "throughput": 19806.24, "total_tokens": 123873344}
|
|
{"current_steps": 39370, "total_steps": 78105, "loss": 0.1666, "lr": 2.8992833591076e-06, "epoch": 2.5203252032520327, "percentage": 50.41, "elapsed_time": "1:44:14", "remaining_time": "1:42:34", "throughput": 19806.65, "total_tokens": 123889984}
|
|
{"current_steps": 39375, "total_steps": 78105, "loss": 0.2193, "lr": 2.8987318679718966e-06, "epoch": 2.5206452851930097, "percentage": 50.41, "elapsed_time": "1:44:15", "remaining_time": "1:42:33", "throughput": 19807.07, "total_tokens": 123906240}
|
|
{"current_steps": 39380, "total_steps": 78105, "loss": 0.3724, "lr": 2.8981803569256007e-06, "epoch": 2.5209653671339862, "percentage": 50.42, "elapsed_time": "1:44:16", "remaining_time": "1:42:32", "throughput": 19807.42, "total_tokens": 123921920}
|
|
{"current_steps": 39385, "total_steps": 78105, "loss": 0.2095, "lr": 2.8976288259962508e-06, "epoch": 2.521285449074963, "percentage": 50.43, "elapsed_time": "1:44:16", "remaining_time": "1:42:31", "throughput": 19807.72, "total_tokens": 123936832}
|
|
{"current_steps": 39390, "total_steps": 78105, "loss": 0.2137, "lr": 2.89707727521139e-06, "epoch": 2.52160553101594, "percentage": 50.43, "elapsed_time": "1:44:17", "remaining_time": "1:42:30", "throughput": 19808.03, "total_tokens": 123951616}
|
|
{"current_steps": 39395, "total_steps": 78105, "loss": 0.1472, "lr": 2.896525704598557e-06, "epoch": 2.5219256129569168, "percentage": 50.44, "elapsed_time": "1:44:18", "remaining_time": "1:42:29", "throughput": 19808.28, "total_tokens": 123968384}
|
|
{"current_steps": 39400, "total_steps": 78105, "loss": 0.1806, "lr": 2.8959741141852963e-06, "epoch": 2.5222456948978937, "percentage": 50.44, "elapsed_time": "1:44:19", "remaining_time": "1:42:28", "throughput": 19808.69, "total_tokens": 123984640}
|
|
{"current_steps": 39405, "total_steps": 78105, "loss": 0.1203, "lr": 2.8954225039991514e-06, "epoch": 2.5225657768388707, "percentage": 50.45, "elapsed_time": "1:44:19", "remaining_time": "1:42:27", "throughput": 19809.12, "total_tokens": 124001280}
|
|
{"current_steps": 39410, "total_steps": 78105, "loss": 0.2126, "lr": 2.8948708740676657e-06, "epoch": 2.5228858587798477, "percentage": 50.46, "elapsed_time": "1:44:20", "remaining_time": "1:42:26", "throughput": 19809.48, "total_tokens": 124016640}
|
|
{"current_steps": 39415, "total_steps": 78105, "loss": 0.2419, "lr": 2.894319224418386e-06, "epoch": 2.5232059407208247, "percentage": 50.46, "elapsed_time": "1:44:21", "remaining_time": "1:42:25", "throughput": 19809.86, "total_tokens": 124032384}
|
|
{"current_steps": 39420, "total_steps": 78105, "loss": 0.1775, "lr": 2.893767555078857e-06, "epoch": 2.5235260226618013, "percentage": 50.47, "elapsed_time": "1:44:21", "remaining_time": "1:42:25", "throughput": 19810.17, "total_tokens": 124047232}
|
|
{"current_steps": 39425, "total_steps": 78105, "loss": 0.2091, "lr": 2.893215866076628e-06, "epoch": 2.5238461046027783, "percentage": 50.48, "elapsed_time": "1:44:22", "remaining_time": "1:42:24", "throughput": 19810.61, "total_tokens": 124063744}
|
|
{"current_steps": 39430, "total_steps": 78105, "loss": 0.1578, "lr": 2.8926641574392473e-06, "epoch": 2.5241661865437552, "percentage": 50.48, "elapsed_time": "1:44:23", "remaining_time": "1:42:23", "throughput": 19810.92, "total_tokens": 124078336}
|
|
{"current_steps": 39435, "total_steps": 78105, "loss": 0.126, "lr": 2.8921124291942635e-06, "epoch": 2.5244862684847322, "percentage": 50.49, "elapsed_time": "1:44:23", "remaining_time": "1:42:22", "throughput": 19811.22, "total_tokens": 124093056}
|
|
{"current_steps": 39440, "total_steps": 78105, "loss": 0.2111, "lr": 2.8915606813692276e-06, "epoch": 2.524806350425709, "percentage": 50.5, "elapsed_time": "1:44:24", "remaining_time": "1:42:21", "throughput": 19811.51, "total_tokens": 124107392}
|
|
{"current_steps": 39445, "total_steps": 78105, "loss": 0.1535, "lr": 2.89100891399169e-06, "epoch": 2.5251264323666858, "percentage": 50.5, "elapsed_time": "1:44:25", "remaining_time": "1:42:20", "throughput": 19811.85, "total_tokens": 124122496}
|
|
{"current_steps": 39450, "total_steps": 78105, "loss": 0.1801, "lr": 2.8904571270892046e-06, "epoch": 2.5254465143076628, "percentage": 50.51, "elapsed_time": "1:44:25", "remaining_time": "1:42:19", "throughput": 19812.23, "total_tokens": 124138176}
|
|
{"current_steps": 39455, "total_steps": 78105, "loss": 0.2439, "lr": 2.889905320689324e-06, "epoch": 2.5257665962486398, "percentage": 50.52, "elapsed_time": "1:44:26", "remaining_time": "1:42:18", "throughput": 19812.53, "total_tokens": 124152640}
|
|
{"current_steps": 39460, "total_steps": 78105, "loss": 0.2297, "lr": 2.889353494819603e-06, "epoch": 2.5260866781896167, "percentage": 50.52, "elapsed_time": "1:44:27", "remaining_time": "1:42:17", "throughput": 19813.04, "total_tokens": 124170432}
|
|
{"current_steps": 39465, "total_steps": 78105, "loss": 0.2656, "lr": 2.888801649507596e-06, "epoch": 2.5264067601305933, "percentage": 50.53, "elapsed_time": "1:44:27", "remaining_time": "1:42:16", "throughput": 19813.36, "total_tokens": 124186048}
|
|
{"current_steps": 39470, "total_steps": 78105, "loss": 0.3109, "lr": 2.8882497847808605e-06, "epoch": 2.5267268420715703, "percentage": 50.53, "elapsed_time": "1:44:28", "remaining_time": "1:42:15", "throughput": 19813.72, "total_tokens": 124202048}
|
|
{"current_steps": 39475, "total_steps": 78105, "loss": 0.2268, "lr": 2.887697900666952e-06, "epoch": 2.5270469240125473, "percentage": 50.54, "elapsed_time": "1:44:29", "remaining_time": "1:42:14", "throughput": 19814.14, "total_tokens": 124218688}
|
|
{"current_steps": 39480, "total_steps": 78105, "loss": 0.2231, "lr": 2.8871459971934307e-06, "epoch": 2.5273670059535243, "percentage": 50.55, "elapsed_time": "1:44:29", "remaining_time": "1:42:14", "throughput": 19814.47, "total_tokens": 124234048}
|
|
{"current_steps": 39485, "total_steps": 78105, "loss": 0.1872, "lr": 2.8865940743878546e-06, "epoch": 2.527687087894501, "percentage": 50.55, "elapsed_time": "1:44:30", "remaining_time": "1:42:13", "throughput": 19814.81, "total_tokens": 124249280}
|
|
{"current_steps": 39490, "total_steps": 78105, "loss": 0.1659, "lr": 2.8860421322777844e-06, "epoch": 2.528007169835478, "percentage": 50.56, "elapsed_time": "1:44:31", "remaining_time": "1:42:12", "throughput": 19815.17, "total_tokens": 124264832}
|
|
{"current_steps": 39495, "total_steps": 78105, "loss": 0.2652, "lr": 2.885490170890781e-06, "epoch": 2.528327251776455, "percentage": 50.57, "elapsed_time": "1:44:31", "remaining_time": "1:42:11", "throughput": 19815.52, "total_tokens": 124280448}
|
|
{"current_steps": 39500, "total_steps": 78105, "loss": 0.2381, "lr": 2.884938190254405e-06, "epoch": 2.5286473337174318, "percentage": 50.57, "elapsed_time": "1:44:32", "remaining_time": "1:42:10", "throughput": 19815.95, "total_tokens": 124296896}
|
|
{"current_steps": 39505, "total_steps": 78105, "loss": 0.1968, "lr": 2.884386190396222e-06, "epoch": 2.5289674156584088, "percentage": 50.58, "elapsed_time": "1:44:33", "remaining_time": "1:42:09", "throughput": 19816.23, "total_tokens": 124311296}
|
|
{"current_steps": 39510, "total_steps": 78105, "loss": 0.2005, "lr": 2.8838341713437944e-06, "epoch": 2.5292874975993853, "percentage": 50.59, "elapsed_time": "1:44:33", "remaining_time": "1:42:08", "throughput": 19816.56, "total_tokens": 124326272}
|
|
{"current_steps": 39515, "total_steps": 78105, "loss": 0.1836, "lr": 2.8832821331246878e-06, "epoch": 2.5296075795403623, "percentage": 50.59, "elapsed_time": "1:44:34", "remaining_time": "1:42:07", "throughput": 19816.89, "total_tokens": 124341248}
|
|
{"current_steps": 39520, "total_steps": 78105, "loss": 0.1753, "lr": 2.8827300757664686e-06, "epoch": 2.5299276614813393, "percentage": 50.6, "elapsed_time": "1:44:35", "remaining_time": "1:42:06", "throughput": 19817.38, "total_tokens": 124359040}
|
|
{"current_steps": 39525, "total_steps": 78105, "loss": 0.3078, "lr": 2.8821779992967026e-06, "epoch": 2.530247743422316, "percentage": 50.6, "elapsed_time": "1:44:35", "remaining_time": "1:42:05", "throughput": 19817.75, "total_tokens": 124374720}
|
|
{"current_steps": 39530, "total_steps": 78105, "loss": 0.208, "lr": 2.8816259037429573e-06, "epoch": 2.530567825363293, "percentage": 50.61, "elapsed_time": "1:44:36", "remaining_time": "1:42:04", "throughput": 19818.13, "total_tokens": 124391040}
|
|
{"current_steps": 39535, "total_steps": 78105, "loss": 0.2192, "lr": 2.881073789132803e-06, "epoch": 2.53088790730427, "percentage": 50.62, "elapsed_time": "1:44:37", "remaining_time": "1:42:04", "throughput": 19818.57, "total_tokens": 124407680}
|
|
{"current_steps": 39540, "total_steps": 78105, "loss": 0.2628, "lr": 2.880521655493808e-06, "epoch": 2.531207989245247, "percentage": 50.62, "elapsed_time": "1:44:38", "remaining_time": "1:42:03", "throughput": 19818.94, "total_tokens": 124423744}
|
|
{"current_steps": 39545, "total_steps": 78105, "loss": 0.2605, "lr": 2.879969502853544e-06, "epoch": 2.531528071186224, "percentage": 50.63, "elapsed_time": "1:44:38", "remaining_time": "1:42:02", "throughput": 19819.26, "total_tokens": 124438720}
|
|
{"current_steps": 39550, "total_steps": 78105, "loss": 0.177, "lr": 2.879417331239582e-06, "epoch": 2.531848153127201, "percentage": 50.64, "elapsed_time": "1:44:39", "remaining_time": "1:42:01", "throughput": 19819.67, "total_tokens": 124455104}
|
|
{"current_steps": 39555, "total_steps": 78105, "loss": 0.2341, "lr": 2.8788651406794956e-06, "epoch": 2.5321682350681773, "percentage": 50.64, "elapsed_time": "1:44:40", "remaining_time": "1:42:00", "throughput": 19820.08, "total_tokens": 124471488}
|
|
{"current_steps": 39560, "total_steps": 78105, "loss": 0.1969, "lr": 2.878312931200856e-06, "epoch": 2.5324883170091543, "percentage": 50.65, "elapsed_time": "1:44:40", "remaining_time": "1:41:59", "throughput": 19820.43, "total_tokens": 124486976}
|
|
{"current_steps": 39565, "total_steps": 78105, "loss": 0.2184, "lr": 2.8777607028312406e-06, "epoch": 2.5328083989501313, "percentage": 50.66, "elapsed_time": "1:44:41", "remaining_time": "1:41:58", "throughput": 19820.86, "total_tokens": 124503616}
|
|
{"current_steps": 39570, "total_steps": 78105, "loss": 0.1877, "lr": 2.877208455598222e-06, "epoch": 2.533128480891108, "percentage": 50.66, "elapsed_time": "1:44:42", "remaining_time": "1:41:57", "throughput": 19821.26, "total_tokens": 124520000}
|
|
{"current_steps": 39575, "total_steps": 78105, "loss": 0.5476, "lr": 2.876656189529379e-06, "epoch": 2.533448562832085, "percentage": 50.67, "elapsed_time": "1:44:42", "remaining_time": "1:41:56", "throughput": 19821.57, "total_tokens": 124535296}
|
|
{"current_steps": 39580, "total_steps": 78105, "loss": 0.2581, "lr": 2.8761039046522874e-06, "epoch": 2.533768644773062, "percentage": 50.68, "elapsed_time": "1:44:43", "remaining_time": "1:41:56", "throughput": 19822.03, "total_tokens": 124552064}
|
|
{"current_steps": 39585, "total_steps": 78105, "loss": 0.1697, "lr": 2.8755516009945266e-06, "epoch": 2.534088726714039, "percentage": 50.68, "elapsed_time": "1:44:44", "remaining_time": "1:41:55", "throughput": 19822.37, "total_tokens": 124567552}
|
|
{"current_steps": 39590, "total_steps": 78105, "loss": 0.235, "lr": 2.874999278583674e-06, "epoch": 2.534408808655016, "percentage": 50.69, "elapsed_time": "1:44:44", "remaining_time": "1:41:54", "throughput": 19822.75, "total_tokens": 124583360}
|
|
{"current_steps": 39595, "total_steps": 78105, "loss": 0.2169, "lr": 2.874446937447311e-06, "epoch": 2.534728890595993, "percentage": 50.69, "elapsed_time": "1:44:45", "remaining_time": "1:41:53", "throughput": 19823.06, "total_tokens": 124598080}
|
|
{"current_steps": 39600, "total_steps": 78105, "loss": 0.2462, "lr": 2.8738945776130188e-06, "epoch": 2.5350489725369694, "percentage": 50.7, "elapsed_time": "1:44:46", "remaining_time": "1:41:52", "throughput": 19823.39, "total_tokens": 124612992}
|
|
{"current_steps": 39605, "total_steps": 78105, "loss": 0.2994, "lr": 2.8733421991083787e-06, "epoch": 2.5353690544779464, "percentage": 50.71, "elapsed_time": "1:44:46", "remaining_time": "1:41:51", "throughput": 19823.72, "total_tokens": 124628544}
|
|
{"current_steps": 39610, "total_steps": 78105, "loss": 0.186, "lr": 2.872789801960974e-06, "epoch": 2.5356891364189234, "percentage": 50.71, "elapsed_time": "1:44:47", "remaining_time": "1:41:50", "throughput": 19824.15, "total_tokens": 124645120}
|
|
{"current_steps": 39615, "total_steps": 78105, "loss": 0.2054, "lr": 2.8722373861983875e-06, "epoch": 2.5360092183599, "percentage": 50.72, "elapsed_time": "1:44:48", "remaining_time": "1:41:49", "throughput": 19824.53, "total_tokens": 124661056}
|
|
{"current_steps": 39620, "total_steps": 78105, "loss": 0.1903, "lr": 2.8716849518482058e-06, "epoch": 2.536329300300877, "percentage": 50.73, "elapsed_time": "1:44:48", "remaining_time": "1:41:48", "throughput": 19824.93, "total_tokens": 124677120}
|
|
{"current_steps": 39625, "total_steps": 78105, "loss": 0.1526, "lr": 2.8711324989380135e-06, "epoch": 2.536649382241854, "percentage": 50.73, "elapsed_time": "1:44:49", "remaining_time": "1:41:47", "throughput": 19825.26, "total_tokens": 124692224}
|
|
{"current_steps": 39630, "total_steps": 78105, "loss": 0.1917, "lr": 2.870580027495397e-06, "epoch": 2.536969464182831, "percentage": 50.74, "elapsed_time": "1:44:50", "remaining_time": "1:41:46", "throughput": 19825.59, "total_tokens": 124707904}
|
|
{"current_steps": 39635, "total_steps": 78105, "loss": 0.2173, "lr": 2.8700275375479447e-06, "epoch": 2.537289546123808, "percentage": 50.75, "elapsed_time": "1:44:50", "remaining_time": "1:41:45", "throughput": 19825.91, "total_tokens": 124722944}
|
|
{"current_steps": 39640, "total_steps": 78105, "loss": 0.1564, "lr": 2.8694750291232443e-06, "epoch": 2.537609628064785, "percentage": 50.75, "elapsed_time": "1:44:51", "remaining_time": "1:41:45", "throughput": 19826.24, "total_tokens": 124738368}
|
|
{"current_steps": 39645, "total_steps": 78105, "loss": 0.2893, "lr": 2.8689225022488852e-06, "epoch": 2.5379297100057614, "percentage": 50.76, "elapsed_time": "1:44:52", "remaining_time": "1:41:44", "throughput": 19826.57, "total_tokens": 124753792}
|
|
{"current_steps": 39650, "total_steps": 78105, "loss": 0.2568, "lr": 2.8683699569524587e-06, "epoch": 2.5382497919467384, "percentage": 50.76, "elapsed_time": "1:44:52", "remaining_time": "1:41:43", "throughput": 19827.01, "total_tokens": 124770304}
|
|
{"current_steps": 39655, "total_steps": 78105, "loss": 0.271, "lr": 2.867817393261555e-06, "epoch": 2.5385698738877154, "percentage": 50.77, "elapsed_time": "1:44:53", "remaining_time": "1:41:42", "throughput": 19827.52, "total_tokens": 124788480}
|
|
{"current_steps": 39660, "total_steps": 78105, "loss": 0.1103, "lr": 2.8672648112037665e-06, "epoch": 2.538889955828692, "percentage": 50.78, "elapsed_time": "1:44:54", "remaining_time": "1:41:41", "throughput": 19827.86, "total_tokens": 124803904}
|
|
{"current_steps": 39665, "total_steps": 78105, "loss": 0.2727, "lr": 2.8667122108066863e-06, "epoch": 2.539210037769669, "percentage": 50.78, "elapsed_time": "1:44:55", "remaining_time": "1:41:40", "throughput": 19828.21, "total_tokens": 124819392}
|
|
{"current_steps": 39670, "total_steps": 78105, "loss": 0.2652, "lr": 2.8661595920979087e-06, "epoch": 2.539530119710646, "percentage": 50.79, "elapsed_time": "1:44:55", "remaining_time": "1:41:39", "throughput": 19828.67, "total_tokens": 124836544}
|
|
{"current_steps": 39675, "total_steps": 78105, "loss": 0.3355, "lr": 2.8656069551050275e-06, "epoch": 2.539850201651623, "percentage": 50.8, "elapsed_time": "1:44:56", "remaining_time": "1:41:38", "throughput": 19828.99, "total_tokens": 124851648}
|
|
{"current_steps": 39680, "total_steps": 78105, "loss": 0.2043, "lr": 2.8650542998556403e-06, "epoch": 2.5401702835926, "percentage": 50.8, "elapsed_time": "1:44:57", "remaining_time": "1:41:37", "throughput": 19829.38, "total_tokens": 124867840}
|
|
{"current_steps": 39685, "total_steps": 78105, "loss": 0.2482, "lr": 2.8645016263773418e-06, "epoch": 2.5404903655335764, "percentage": 50.81, "elapsed_time": "1:44:57", "remaining_time": "1:41:37", "throughput": 19829.86, "total_tokens": 124884928}
|
|
{"current_steps": 39690, "total_steps": 78105, "loss": 0.3038, "lr": 2.8639489346977316e-06, "epoch": 2.5408104474745534, "percentage": 50.82, "elapsed_time": "1:44:58", "remaining_time": "1:41:36", "throughput": 19830.24, "total_tokens": 124900992}
|
|
{"current_steps": 39695, "total_steps": 78105, "loss": 0.2139, "lr": 2.8633962248444074e-06, "epoch": 2.5411305294155304, "percentage": 50.82, "elapsed_time": "1:44:59", "remaining_time": "1:41:35", "throughput": 19830.65, "total_tokens": 124917248}
|
|
{"current_steps": 39700, "total_steps": 78105, "loss": 0.2057, "lr": 2.862843496844968e-06, "epoch": 2.5414506113565074, "percentage": 50.83, "elapsed_time": "1:44:59", "remaining_time": "1:41:34", "throughput": 19830.94, "total_tokens": 124931648}
|
|
{"current_steps": 39705, "total_steps": 78105, "loss": 0.2156, "lr": 2.862290750727015e-06, "epoch": 2.541770693297484, "percentage": 50.84, "elapsed_time": "1:45:00", "remaining_time": "1:41:33", "throughput": 19831.23, "total_tokens": 124946048}
|
|
{"current_steps": 39710, "total_steps": 78105, "loss": 0.3019, "lr": 2.8617379865181483e-06, "epoch": 2.542090775238461, "percentage": 50.84, "elapsed_time": "1:45:01", "remaining_time": "1:41:32", "throughput": 19831.59, "total_tokens": 124961600}
|
|
{"current_steps": 39715, "total_steps": 78105, "loss": 0.2215, "lr": 2.8611852042459715e-06, "epoch": 2.542410857179438, "percentage": 50.85, "elapsed_time": "1:45:01", "remaining_time": "1:41:31", "throughput": 19832.1, "total_tokens": 124979648}
|
|
{"current_steps": 39720, "total_steps": 78105, "loss": 0.3225, "lr": 2.8606324039380865e-06, "epoch": 2.542730939120415, "percentage": 50.85, "elapsed_time": "1:45:02", "remaining_time": "1:41:30", "throughput": 19832.46, "total_tokens": 124995008}
|
|
{"current_steps": 39725, "total_steps": 78105, "loss": 0.3222, "lr": 2.860079585622098e-06, "epoch": 2.543051021061392, "percentage": 50.86, "elapsed_time": "1:45:03", "remaining_time": "1:41:29", "throughput": 19832.82, "total_tokens": 125010560}
|
|
{"current_steps": 39730, "total_steps": 78105, "loss": 0.3629, "lr": 2.8595267493256103e-06, "epoch": 2.5433711030023685, "percentage": 50.87, "elapsed_time": "1:45:03", "remaining_time": "1:41:28", "throughput": 19833.18, "total_tokens": 125026112}
|
|
{"current_steps": 39735, "total_steps": 78105, "loss": 0.1813, "lr": 2.858973895076229e-06, "epoch": 2.5436911849433455, "percentage": 50.87, "elapsed_time": "1:45:04", "remaining_time": "1:41:28", "throughput": 19833.56, "total_tokens": 125042304}
|
|
{"current_steps": 39740, "total_steps": 78105, "loss": 0.2903, "lr": 2.8584210229015622e-06, "epoch": 2.5440112668843224, "percentage": 50.88, "elapsed_time": "1:45:05", "remaining_time": "1:41:27", "throughput": 19833.94, "total_tokens": 125058368}
|
|
{"current_steps": 39745, "total_steps": 78105, "loss": 0.3299, "lr": 2.857868132829216e-06, "epoch": 2.5443313488252994, "percentage": 50.89, "elapsed_time": "1:45:05", "remaining_time": "1:41:26", "throughput": 19834.32, "total_tokens": 125074496}
|
|
{"current_steps": 39750, "total_steps": 78105, "loss": 0.1938, "lr": 2.857315224886799e-06, "epoch": 2.544651430766276, "percentage": 50.89, "elapsed_time": "1:45:06", "remaining_time": "1:41:25", "throughput": 19834.66, "total_tokens": 125089792}
|
|
{"current_steps": 39755, "total_steps": 78105, "loss": 0.2714, "lr": 2.856762299101921e-06, "epoch": 2.544971512707253, "percentage": 50.9, "elapsed_time": "1:45:07", "remaining_time": "1:41:24", "throughput": 19834.97, "total_tokens": 125104704}
|
|
{"current_steps": 39760, "total_steps": 78105, "loss": 0.2626, "lr": 2.856209355502192e-06, "epoch": 2.54529159464823, "percentage": 50.91, "elapsed_time": "1:45:07", "remaining_time": "1:41:23", "throughput": 19835.38, "total_tokens": 125120896}
|
|
{"current_steps": 39765, "total_steps": 78105, "loss": 0.2318, "lr": 2.8556563941152236e-06, "epoch": 2.545611676589207, "percentage": 50.91, "elapsed_time": "1:45:08", "remaining_time": "1:41:22", "throughput": 19835.7, "total_tokens": 125136192}
|
|
{"current_steps": 39770, "total_steps": 78105, "loss": 0.2387, "lr": 2.8551034149686274e-06, "epoch": 2.545931758530184, "percentage": 50.92, "elapsed_time": "1:45:09", "remaining_time": "1:41:21", "throughput": 19835.99, "total_tokens": 125150592}
|
|
{"current_steps": 39775, "total_steps": 78105, "loss": 0.191, "lr": 2.8545504180900164e-06, "epoch": 2.5462518404711605, "percentage": 50.93, "elapsed_time": "1:45:09", "remaining_time": "1:41:20", "throughput": 19836.4, "total_tokens": 125167360}
|
|
{"current_steps": 39780, "total_steps": 78105, "loss": 0.2008, "lr": 2.8539974035070044e-06, "epoch": 2.5465719224121375, "percentage": 50.93, "elapsed_time": "1:45:10", "remaining_time": "1:41:19", "throughput": 19836.82, "total_tokens": 125184128}
|
|
{"current_steps": 39785, "total_steps": 78105, "loss": 0.2601, "lr": 2.853444371247206e-06, "epoch": 2.5468920043531145, "percentage": 50.94, "elapsed_time": "1:45:11", "remaining_time": "1:41:19", "throughput": 19837.35, "total_tokens": 125202560}
|
|
{"current_steps": 39790, "total_steps": 78105, "loss": 0.243, "lr": 2.852891321338236e-06, "epoch": 2.547212086294091, "percentage": 50.94, "elapsed_time": "1:45:12", "remaining_time": "1:41:18", "throughput": 19837.7, "total_tokens": 125218176}
|
|
{"current_steps": 39795, "total_steps": 78105, "loss": 0.2192, "lr": 2.852338253807712e-06, "epoch": 2.547532168235068, "percentage": 50.95, "elapsed_time": "1:45:12", "remaining_time": "1:41:17", "throughput": 19838.1, "total_tokens": 125234496}
|
|
{"current_steps": 39800, "total_steps": 78105, "loss": 0.2298, "lr": 2.8517851686832506e-06, "epoch": 2.547852250176045, "percentage": 50.96, "elapsed_time": "1:45:13", "remaining_time": "1:41:16", "throughput": 19838.43, "total_tokens": 125249664}
|
|
{"current_steps": 39805, "total_steps": 78105, "loss": 0.327, "lr": 2.8512320659924703e-06, "epoch": 2.548172332117022, "percentage": 50.96, "elapsed_time": "1:45:14", "remaining_time": "1:41:15", "throughput": 19838.92, "total_tokens": 125267008}
|
|
{"current_steps": 39810, "total_steps": 78105, "loss": 0.235, "lr": 2.8506789457629906e-06, "epoch": 2.548492414057999, "percentage": 50.97, "elapsed_time": "1:45:14", "remaining_time": "1:41:14", "throughput": 19839.26, "total_tokens": 125282496}
|
|
{"current_steps": 39815, "total_steps": 78105, "loss": 0.1675, "lr": 2.8501258080224308e-06, "epoch": 2.548812495998976, "percentage": 50.98, "elapsed_time": "1:45:15", "remaining_time": "1:41:13", "throughput": 19839.56, "total_tokens": 125297408}
|
|
{"current_steps": 39820, "total_steps": 78105, "loss": 0.1942, "lr": 2.8495726527984112e-06, "epoch": 2.5491325779399525, "percentage": 50.98, "elapsed_time": "1:45:16", "remaining_time": "1:41:12", "throughput": 19839.92, "total_tokens": 125313088}
|
|
{"current_steps": 39825, "total_steps": 78105, "loss": 0.2023, "lr": 2.849019480118555e-06, "epoch": 2.5494526598809295, "percentage": 50.99, "elapsed_time": "1:45:16", "remaining_time": "1:41:11", "throughput": 19840.27, "total_tokens": 125328448}
|
|
{"current_steps": 39830, "total_steps": 78105, "loss": 0.3413, "lr": 2.8484662900104835e-06, "epoch": 2.5497727418219065, "percentage": 51.0, "elapsed_time": "1:45:17", "remaining_time": "1:41:10", "throughput": 19840.63, "total_tokens": 125343744}
|
|
{"current_steps": 39835, "total_steps": 78105, "loss": 0.28, "lr": 2.8479130825018214e-06, "epoch": 2.550092823762883, "percentage": 51.0, "elapsed_time": "1:45:18", "remaining_time": "1:41:09", "throughput": 19841.02, "total_tokens": 125359936}
|
|
{"current_steps": 39840, "total_steps": 78105, "loss": 0.2827, "lr": 2.8473598576201916e-06, "epoch": 2.55041290570386, "percentage": 51.01, "elapsed_time": "1:45:18", "remaining_time": "1:41:09", "throughput": 19841.42, "total_tokens": 125376128}
|
|
{"current_steps": 39845, "total_steps": 78105, "loss": 0.287, "lr": 2.8468066153932193e-06, "epoch": 2.550732987644837, "percentage": 51.01, "elapsed_time": "1:45:19", "remaining_time": "1:41:08", "throughput": 19841.77, "total_tokens": 125392000}
|
|
{"current_steps": 39850, "total_steps": 78105, "loss": 0.1688, "lr": 2.846253355848532e-06, "epoch": 2.551053069585814, "percentage": 51.02, "elapsed_time": "1:45:20", "remaining_time": "1:41:07", "throughput": 19842.17, "total_tokens": 125408512}
|
|
{"current_steps": 39855, "total_steps": 78105, "loss": 0.2146, "lr": 2.845700079013755e-06, "epoch": 2.551373151526791, "percentage": 51.03, "elapsed_time": "1:45:21", "remaining_time": "1:41:06", "throughput": 19842.61, "total_tokens": 125425408}
|
|
{"current_steps": 39860, "total_steps": 78105, "loss": 0.1933, "lr": 2.8451467849165167e-06, "epoch": 2.551693233467768, "percentage": 51.03, "elapsed_time": "1:45:21", "remaining_time": "1:41:05", "throughput": 19842.92, "total_tokens": 125440704}
|
|
{"current_steps": 39865, "total_steps": 78105, "loss": 0.1679, "lr": 2.844593473584446e-06, "epoch": 2.5520133154087445, "percentage": 51.04, "elapsed_time": "1:45:22", "remaining_time": "1:41:04", "throughput": 19843.28, "total_tokens": 125456448}
|
|
{"current_steps": 39870, "total_steps": 78105, "loss": 0.2809, "lr": 2.8440401450451727e-06, "epoch": 2.5523333973497215, "percentage": 51.05, "elapsed_time": "1:45:23", "remaining_time": "1:41:03", "throughput": 19843.6, "total_tokens": 125471552}
|
|
{"current_steps": 39875, "total_steps": 78105, "loss": 0.2265, "lr": 2.8434867993263254e-06, "epoch": 2.5526534792906985, "percentage": 51.05, "elapsed_time": "1:45:23", "remaining_time": "1:41:02", "throughput": 19843.9, "total_tokens": 125486400}
|
|
{"current_steps": 39880, "total_steps": 78105, "loss": 0.2882, "lr": 2.8429334364555374e-06, "epoch": 2.552973561231675, "percentage": 51.06, "elapsed_time": "1:45:24", "remaining_time": "1:41:01", "throughput": 19844.25, "total_tokens": 125501760}
|
|
{"current_steps": 39885, "total_steps": 78105, "loss": 0.1316, "lr": 2.8423800564604397e-06, "epoch": 2.553293643172652, "percentage": 51.07, "elapsed_time": "1:45:25", "remaining_time": "1:41:00", "throughput": 19844.68, "total_tokens": 125518336}
|
|
{"current_steps": 39890, "total_steps": 78105, "loss": 0.2625, "lr": 2.8418266593686655e-06, "epoch": 2.553613725113629, "percentage": 51.07, "elapsed_time": "1:45:25", "remaining_time": "1:41:00", "throughput": 19844.98, "total_tokens": 125533312}
|
|
{"current_steps": 39895, "total_steps": 78105, "loss": 0.2524, "lr": 2.8412732452078484e-06, "epoch": 2.553933807054606, "percentage": 51.08, "elapsed_time": "1:45:26", "remaining_time": "1:40:59", "throughput": 19845.39, "total_tokens": 125549632}
|
|
{"current_steps": 39900, "total_steps": 78105, "loss": 0.171, "lr": 2.840719814005623e-06, "epoch": 2.554253888995583, "percentage": 51.09, "elapsed_time": "1:45:27", "remaining_time": "1:40:58", "throughput": 19845.67, "total_tokens": 125564224}
|
|
{"current_steps": 39905, "total_steps": 78105, "loss": 0.2254, "lr": 2.8401663657896247e-06, "epoch": 2.55457397093656, "percentage": 51.09, "elapsed_time": "1:45:27", "remaining_time": "1:40:57", "throughput": 19846.11, "total_tokens": 125580928}
|
|
{"current_steps": 39910, "total_steps": 78105, "loss": 0.3978, "lr": 2.83961290058749e-06, "epoch": 2.5548940528775366, "percentage": 51.1, "elapsed_time": "1:45:28", "remaining_time": "1:40:56", "throughput": 19846.68, "total_tokens": 125600128}
|
|
{"current_steps": 39915, "total_steps": 78105, "loss": 0.3144, "lr": 2.839059418426856e-06, "epoch": 2.5552141348185136, "percentage": 51.1, "elapsed_time": "1:45:29", "remaining_time": "1:40:55", "throughput": 19847.02, "total_tokens": 125615744}
|
|
{"current_steps": 39920, "total_steps": 78105, "loss": 0.2625, "lr": 2.8385059193353602e-06, "epoch": 2.5555342167594906, "percentage": 51.11, "elapsed_time": "1:45:29", "remaining_time": "1:40:54", "throughput": 19847.35, "total_tokens": 125631040}
|
|
{"current_steps": 39925, "total_steps": 78105, "loss": 0.2009, "lr": 2.837952403340643e-06, "epoch": 2.555854298700467, "percentage": 51.12, "elapsed_time": "1:45:30", "remaining_time": "1:40:53", "throughput": 19847.82, "total_tokens": 125648448}
|
|
{"current_steps": 39930, "total_steps": 78105, "loss": 0.1901, "lr": 2.8373988704703426e-06, "epoch": 2.556174380641444, "percentage": 51.12, "elapsed_time": "1:45:31", "remaining_time": "1:40:53", "throughput": 19848.16, "total_tokens": 125664128}
|
|
{"current_steps": 39935, "total_steps": 78105, "loss": 0.2775, "lr": 2.8368453207520998e-06, "epoch": 2.556494462582421, "percentage": 51.13, "elapsed_time": "1:45:31", "remaining_time": "1:40:52", "throughput": 19848.5, "total_tokens": 125679360}
|
|
{"current_steps": 39940, "total_steps": 78105, "loss": 0.2407, "lr": 2.836291754213557e-06, "epoch": 2.556814544523398, "percentage": 51.14, "elapsed_time": "1:45:32", "remaining_time": "1:40:51", "throughput": 19848.8, "total_tokens": 125694144}
|
|
{"current_steps": 39945, "total_steps": 78105, "loss": 0.1919, "lr": 2.835738170882355e-06, "epoch": 2.557134626464375, "percentage": 51.14, "elapsed_time": "1:45:33", "remaining_time": "1:40:50", "throughput": 19849.18, "total_tokens": 125710080}
|
|
{"current_steps": 39950, "total_steps": 78105, "loss": 0.1835, "lr": 2.835184570786138e-06, "epoch": 2.5574547084053516, "percentage": 51.15, "elapsed_time": "1:45:33", "remaining_time": "1:40:49", "throughput": 19849.48, "total_tokens": 125724544}
|
|
{"current_steps": 39955, "total_steps": 78105, "loss": 0.1916, "lr": 2.8346309539525494e-06, "epoch": 2.5577747903463286, "percentage": 51.16, "elapsed_time": "1:45:34", "remaining_time": "1:40:48", "throughput": 19849.84, "total_tokens": 125740224}
|
|
{"current_steps": 39960, "total_steps": 78105, "loss": 0.2222, "lr": 2.834077320409234e-06, "epoch": 2.5580948722873056, "percentage": 51.16, "elapsed_time": "1:45:35", "remaining_time": "1:40:47", "throughput": 19849.46, "total_tokens": 125756672}
|
|
{"current_steps": 39965, "total_steps": 78105, "loss": 0.1859, "lr": 2.833523670183837e-06, "epoch": 2.5584149542282826, "percentage": 51.17, "elapsed_time": "1:45:36", "remaining_time": "1:40:46", "throughput": 19849.83, "total_tokens": 125772480}
|
|
{"current_steps": 39970, "total_steps": 78105, "loss": 0.239, "lr": 2.832970003304005e-06, "epoch": 2.558735036169259, "percentage": 51.17, "elapsed_time": "1:45:36", "remaining_time": "1:40:45", "throughput": 19850.18, "total_tokens": 125788096}
|
|
{"current_steps": 39975, "total_steps": 78105, "loss": 0.2278, "lr": 2.832416319797386e-06, "epoch": 2.559055118110236, "percentage": 51.18, "elapsed_time": "1:45:37", "remaining_time": "1:40:45", "throughput": 19850.49, "total_tokens": 125802944}
|
|
{"current_steps": 39980, "total_steps": 78105, "loss": 0.1894, "lr": 2.831862619691627e-06, "epoch": 2.559375200051213, "percentage": 51.19, "elapsed_time": "1:45:38", "remaining_time": "1:40:44", "throughput": 19850.84, "total_tokens": 125818432}
|
|
{"current_steps": 39985, "total_steps": 78105, "loss": 0.2023, "lr": 2.831308903014378e-06, "epoch": 2.55969528199219, "percentage": 51.19, "elapsed_time": "1:45:38", "remaining_time": "1:40:43", "throughput": 19851.26, "total_tokens": 125835264}
|
|
{"current_steps": 39990, "total_steps": 78105, "loss": 0.3285, "lr": 2.8307551697932874e-06, "epoch": 2.560015363933167, "percentage": 51.2, "elapsed_time": "1:45:39", "remaining_time": "1:40:42", "throughput": 19851.67, "total_tokens": 125851712}
|
|
{"current_steps": 39995, "total_steps": 78105, "loss": 0.2648, "lr": 2.8302014200560068e-06, "epoch": 2.5603354458741436, "percentage": 51.21, "elapsed_time": "1:45:40", "remaining_time": "1:40:41", "throughput": 19852.26, "total_tokens": 125871104}
|
|
{"current_steps": 40000, "total_steps": 78105, "loss": 0.2592, "lr": 2.8296476538301875e-06, "epoch": 2.5606555278151206, "percentage": 51.21, "elapsed_time": "1:45:41", "remaining_time": "1:40:40", "throughput": 19852.63, "total_tokens": 125886976}
|
|
{"current_steps": 40005, "total_steps": 78105, "loss": 0.1919, "lr": 2.829093871143481e-06, "epoch": 2.5609756097560976, "percentage": 51.22, "elapsed_time": "1:45:41", "remaining_time": "1:40:39", "throughput": 19852.92, "total_tokens": 125901248}
|
|
{"current_steps": 40010, "total_steps": 78105, "loss": 0.2342, "lr": 2.8285400720235414e-06, "epoch": 2.5612956916970746, "percentage": 51.23, "elapsed_time": "1:45:42", "remaining_time": "1:40:38", "throughput": 19853.2, "total_tokens": 125915584}
|
|
{"current_steps": 40015, "total_steps": 78105, "loss": 0.2611, "lr": 2.8279862564980215e-06, "epoch": 2.561615773638051, "percentage": 51.23, "elapsed_time": "1:45:43", "remaining_time": "1:40:37", "throughput": 19853.55, "total_tokens": 125931328}
|
|
{"current_steps": 40020, "total_steps": 78105, "loss": 0.1823, "lr": 2.827432424594576e-06, "epoch": 2.561935855579028, "percentage": 51.24, "elapsed_time": "1:45:43", "remaining_time": "1:40:36", "throughput": 19853.93, "total_tokens": 125947392}
|
|
{"current_steps": 40025, "total_steps": 78105, "loss": 0.1831, "lr": 2.826878576340861e-06, "epoch": 2.562255937520005, "percentage": 51.25, "elapsed_time": "1:45:44", "remaining_time": "1:40:36", "throughput": 19854.3, "total_tokens": 125963264}
|
|
{"current_steps": 40030, "total_steps": 78105, "loss": 0.2589, "lr": 2.826324711764533e-06, "epoch": 2.562576019460982, "percentage": 51.25, "elapsed_time": "1:45:45", "remaining_time": "1:40:35", "throughput": 19854.65, "total_tokens": 125978944}
|
|
{"current_steps": 40035, "total_steps": 78105, "loss": 0.2792, "lr": 2.825770830893248e-06, "epoch": 2.562896101401959, "percentage": 51.26, "elapsed_time": "1:45:45", "remaining_time": "1:40:34", "throughput": 19855.01, "total_tokens": 125994880}
|
|
{"current_steps": 40040, "total_steps": 78105, "loss": 0.1957, "lr": 2.825216933754665e-06, "epoch": 2.5632161833429357, "percentage": 51.26, "elapsed_time": "1:45:46", "remaining_time": "1:40:33", "throughput": 19855.34, "total_tokens": 126010368}
|
|
{"current_steps": 40045, "total_steps": 78105, "loss": 0.1953, "lr": 2.8246630203764426e-06, "epoch": 2.5635362652839127, "percentage": 51.27, "elapsed_time": "1:45:47", "remaining_time": "1:40:32", "throughput": 19855.91, "total_tokens": 126029504}
|
|
{"current_steps": 40050, "total_steps": 78105, "loss": 0.1655, "lr": 2.8241090907862388e-06, "epoch": 2.5638563472248896, "percentage": 51.28, "elapsed_time": "1:45:47", "remaining_time": "1:40:31", "throughput": 19856.29, "total_tokens": 126045696}
|
|
{"current_steps": 40055, "total_steps": 78105, "loss": 0.24, "lr": 2.823555145011716e-06, "epoch": 2.564176429165866, "percentage": 51.28, "elapsed_time": "1:45:48", "remaining_time": "1:40:30", "throughput": 19856.69, "total_tokens": 126061888}
|
|
{"current_steps": 40060, "total_steps": 78105, "loss": 0.194, "lr": 2.8230011830805347e-06, "epoch": 2.564496511106843, "percentage": 51.29, "elapsed_time": "1:45:49", "remaining_time": "1:40:29", "throughput": 19857.01, "total_tokens": 126077184}
|
|
{"current_steps": 40065, "total_steps": 78105, "loss": 0.2946, "lr": 2.822447205020357e-06, "epoch": 2.56481659304782, "percentage": 51.3, "elapsed_time": "1:45:49", "remaining_time": "1:40:28", "throughput": 19857.33, "total_tokens": 126092352}
|
|
{"current_steps": 40070, "total_steps": 78105, "loss": 0.2528, "lr": 2.821893210858846e-06, "epoch": 2.565136674988797, "percentage": 51.3, "elapsed_time": "1:45:50", "remaining_time": "1:40:28", "throughput": 19857.66, "total_tokens": 126107456}
|
|
{"current_steps": 40075, "total_steps": 78105, "loss": 0.2257, "lr": 2.8213392006236635e-06, "epoch": 2.565456756929774, "percentage": 51.31, "elapsed_time": "1:45:51", "remaining_time": "1:40:27", "throughput": 19857.99, "total_tokens": 126122688}
|
|
{"current_steps": 40080, "total_steps": 78105, "loss": 0.2635, "lr": 2.8207851743424753e-06, "epoch": 2.565776838870751, "percentage": 51.32, "elapsed_time": "1:45:51", "remaining_time": "1:40:26", "throughput": 19858.29, "total_tokens": 126137472}
|
|
{"current_steps": 40085, "total_steps": 78105, "loss": 0.2849, "lr": 2.820231132042947e-06, "epoch": 2.5660969208117277, "percentage": 51.32, "elapsed_time": "1:45:52", "remaining_time": "1:40:25", "throughput": 19858.67, "total_tokens": 126153280}
|
|
{"current_steps": 40090, "total_steps": 78105, "loss": 0.1777, "lr": 2.8196770737527434e-06, "epoch": 2.5664170027527047, "percentage": 51.33, "elapsed_time": "1:45:53", "remaining_time": "1:40:24", "throughput": 19859.04, "total_tokens": 126169152}
|
|
{"current_steps": 40095, "total_steps": 78105, "loss": 0.2273, "lr": 2.8191229994995317e-06, "epoch": 2.5667370846936817, "percentage": 51.33, "elapsed_time": "1:45:53", "remaining_time": "1:40:23", "throughput": 19859.45, "total_tokens": 126185600}
|
|
{"current_steps": 40100, "total_steps": 78105, "loss": 0.2582, "lr": 2.8185689093109803e-06, "epoch": 2.567057166634658, "percentage": 51.34, "elapsed_time": "1:45:54", "remaining_time": "1:40:22", "throughput": 19859.88, "total_tokens": 126202368}
|
|
{"current_steps": 40105, "total_steps": 78105, "loss": 0.2239, "lr": 2.8180148032147563e-06, "epoch": 2.567377248575635, "percentage": 51.35, "elapsed_time": "1:45:55", "remaining_time": "1:40:21", "throughput": 19860.23, "total_tokens": 126217856}
|
|
{"current_steps": 40110, "total_steps": 78105, "loss": 0.2796, "lr": 2.81746068123853e-06, "epoch": 2.567697330516612, "percentage": 51.35, "elapsed_time": "1:45:55", "remaining_time": "1:40:20", "throughput": 19860.55, "total_tokens": 126232768}
|
|
{"current_steps": 40115, "total_steps": 78105, "loss": 0.2338, "lr": 2.8169065434099714e-06, "epoch": 2.568017412457589, "percentage": 51.36, "elapsed_time": "1:45:56", "remaining_time": "1:40:19", "throughput": 19860.9, "total_tokens": 126248320}
|
|
{"current_steps": 40120, "total_steps": 78105, "loss": 0.3427, "lr": 2.81635238975675e-06, "epoch": 2.568337494398566, "percentage": 51.37, "elapsed_time": "1:45:57", "remaining_time": "1:40:18", "throughput": 19861.25, "total_tokens": 126263872}
|
|
{"current_steps": 40125, "total_steps": 78105, "loss": 0.1515, "lr": 2.815798220306538e-06, "epoch": 2.568657576339543, "percentage": 51.37, "elapsed_time": "1:45:57", "remaining_time": "1:40:18", "throughput": 19861.57, "total_tokens": 126278720}
|
|
{"current_steps": 40130, "total_steps": 78105, "loss": 0.2018, "lr": 2.815244035087008e-06, "epoch": 2.5689776582805197, "percentage": 51.38, "elapsed_time": "1:45:58", "remaining_time": "1:40:17", "throughput": 19861.83, "total_tokens": 126293248}
|
|
{"current_steps": 40135, "total_steps": 78105, "loss": 0.2389, "lr": 2.8146898341258328e-06, "epoch": 2.5692977402214967, "percentage": 51.39, "elapsed_time": "1:45:59", "remaining_time": "1:40:16", "throughput": 19862.2, "total_tokens": 126308992}
|
|
{"current_steps": 40140, "total_steps": 78105, "loss": 0.2551, "lr": 2.8141356174506865e-06, "epoch": 2.5696178221624737, "percentage": 51.39, "elapsed_time": "1:45:59", "remaining_time": "1:40:15", "throughput": 19862.56, "total_tokens": 126324480}
|
|
{"current_steps": 40145, "total_steps": 78105, "loss": 0.2702, "lr": 2.813581385089244e-06, "epoch": 2.5699379041034502, "percentage": 51.4, "elapsed_time": "1:46:00", "remaining_time": "1:40:14", "throughput": 19862.88, "total_tokens": 126339392}
|
|
{"current_steps": 40150, "total_steps": 78105, "loss": 0.1983, "lr": 2.8130271370691802e-06, "epoch": 2.5702579860444272, "percentage": 51.41, "elapsed_time": "1:46:01", "remaining_time": "1:40:13", "throughput": 19863.2, "total_tokens": 126354304}
|
|
{"current_steps": 40155, "total_steps": 78105, "loss": 0.2871, "lr": 2.8124728734181716e-06, "epoch": 2.5705780679854042, "percentage": 51.41, "elapsed_time": "1:46:01", "remaining_time": "1:40:12", "throughput": 19863.56, "total_tokens": 126370240}
|
|
{"current_steps": 40160, "total_steps": 78105, "loss": 0.3206, "lr": 2.811918594163896e-06, "epoch": 2.570898149926381, "percentage": 51.42, "elapsed_time": "1:46:02", "remaining_time": "1:40:11", "throughput": 19863.95, "total_tokens": 126386560}
|
|
{"current_steps": 40165, "total_steps": 78105, "loss": 0.1632, "lr": 2.8113642993340296e-06, "epoch": 2.571218231867358, "percentage": 51.42, "elapsed_time": "1:46:03", "remaining_time": "1:40:10", "throughput": 19864.37, "total_tokens": 126403136}
|
|
{"current_steps": 40170, "total_steps": 78105, "loss": 0.2099, "lr": 2.8108099889562524e-06, "epoch": 2.571538313808335, "percentage": 51.43, "elapsed_time": "1:46:03", "remaining_time": "1:40:09", "throughput": 19864.68, "total_tokens": 126417856}
|
|
{"current_steps": 40175, "total_steps": 78105, "loss": 0.2497, "lr": 2.8102556630582435e-06, "epoch": 2.5718583957493117, "percentage": 51.44, "elapsed_time": "1:46:04", "remaining_time": "1:40:08", "throughput": 19865.03, "total_tokens": 126433408}
|
|
{"current_steps": 40180, "total_steps": 78105, "loss": 0.2417, "lr": 2.8097013216676826e-06, "epoch": 2.5721784776902887, "percentage": 51.44, "elapsed_time": "1:46:05", "remaining_time": "1:40:08", "throughput": 19865.32, "total_tokens": 126448064}
|
|
{"current_steps": 40185, "total_steps": 78105, "loss": 0.1375, "lr": 2.809146964812251e-06, "epoch": 2.5724985596312657, "percentage": 51.45, "elapsed_time": "1:46:05", "remaining_time": "1:40:07", "throughput": 19865.61, "total_tokens": 126462784}
|
|
{"current_steps": 40190, "total_steps": 78105, "loss": 0.2106, "lr": 2.80859259251963e-06, "epoch": 2.5728186415722423, "percentage": 51.46, "elapsed_time": "1:46:06", "remaining_time": "1:40:06", "throughput": 19865.96, "total_tokens": 126478336}
|
|
{"current_steps": 40195, "total_steps": 78105, "loss": 0.1834, "lr": 2.8080382048175026e-06, "epoch": 2.5731387235132193, "percentage": 51.46, "elapsed_time": "1:46:07", "remaining_time": "1:40:05", "throughput": 19866.27, "total_tokens": 126493184}
|
|
{"current_steps": 40200, "total_steps": 78105, "loss": 0.2348, "lr": 2.8074838017335515e-06, "epoch": 2.5734588054541963, "percentage": 51.47, "elapsed_time": "1:46:07", "remaining_time": "1:40:04", "throughput": 19866.7, "total_tokens": 126509824}
|
|
{"current_steps": 40205, "total_steps": 78105, "loss": 0.2725, "lr": 2.8069293832954613e-06, "epoch": 2.5737788873951732, "percentage": 51.48, "elapsed_time": "1:46:08", "remaining_time": "1:40:03", "throughput": 19867.1, "total_tokens": 126526080}
|
|
{"current_steps": 40210, "total_steps": 78105, "loss": 0.2202, "lr": 2.806374949530916e-06, "epoch": 2.5740989693361502, "percentage": 51.48, "elapsed_time": "1:46:09", "remaining_time": "1:40:02", "throughput": 19867.45, "total_tokens": 126541568}
|
|
{"current_steps": 40215, "total_steps": 78105, "loss": 0.2271, "lr": 2.805820500467602e-06, "epoch": 2.574419051277127, "percentage": 51.49, "elapsed_time": "1:46:09", "remaining_time": "1:40:01", "throughput": 19867.78, "total_tokens": 126556864}
|
|
{"current_steps": 40220, "total_steps": 78105, "loss": 0.2008, "lr": 2.8052660361332045e-06, "epoch": 2.5747391332181038, "percentage": 51.49, "elapsed_time": "1:46:10", "remaining_time": "1:40:00", "throughput": 19868.19, "total_tokens": 126573376}
|
|
{"current_steps": 40225, "total_steps": 78105, "loss": 0.1942, "lr": 2.804711556555412e-06, "epoch": 2.5750592151590808, "percentage": 51.5, "elapsed_time": "1:46:11", "remaining_time": "1:39:59", "throughput": 19868.69, "total_tokens": 126591168}
|
|
{"current_steps": 40230, "total_steps": 78105, "loss": 0.2544, "lr": 2.8041570617619114e-06, "epoch": 2.5753792971000578, "percentage": 51.51, "elapsed_time": "1:46:12", "remaining_time": "1:39:59", "throughput": 19869.04, "total_tokens": 126606784}
|
|
{"current_steps": 40235, "total_steps": 78105, "loss": 0.1715, "lr": 2.8036025517803915e-06, "epoch": 2.5756993790410343, "percentage": 51.51, "elapsed_time": "1:46:12", "remaining_time": "1:39:58", "throughput": 19869.46, "total_tokens": 126622976}
|
|
{"current_steps": 40240, "total_steps": 78105, "loss": 0.2383, "lr": 2.8030480266385413e-06, "epoch": 2.5760194609820113, "percentage": 51.52, "elapsed_time": "1:46:13", "remaining_time": "1:39:57", "throughput": 19869.82, "total_tokens": 126638720}
|
|
{"current_steps": 40245, "total_steps": 78105, "loss": 0.1301, "lr": 2.8024934863640507e-06, "epoch": 2.5763395429229883, "percentage": 51.53, "elapsed_time": "1:46:14", "remaining_time": "1:39:56", "throughput": 19870.17, "total_tokens": 126654144}
|
|
{"current_steps": 40250, "total_steps": 78105, "loss": 0.2135, "lr": 2.8019389309846116e-06, "epoch": 2.5766596248639653, "percentage": 51.53, "elapsed_time": "1:46:14", "remaining_time": "1:39:55", "throughput": 19870.49, "total_tokens": 126669056}
|
|
{"current_steps": 40255, "total_steps": 78105, "loss": 0.2559, "lr": 2.801384360527916e-06, "epoch": 2.5769797068049423, "percentage": 51.54, "elapsed_time": "1:46:15", "remaining_time": "1:39:54", "throughput": 19870.78, "total_tokens": 126683904}
|
|
{"current_steps": 40260, "total_steps": 78105, "loss": 0.1696, "lr": 2.800829775021654e-06, "epoch": 2.577299788745919, "percentage": 51.55, "elapsed_time": "1:46:16", "remaining_time": "1:39:53", "throughput": 19871.14, "total_tokens": 126699648}
|
|
{"current_steps": 40265, "total_steps": 78105, "loss": 0.2481, "lr": 2.8002751744935204e-06, "epoch": 2.577619870686896, "percentage": 51.55, "elapsed_time": "1:46:16", "remaining_time": "1:39:52", "throughput": 19871.47, "total_tokens": 126715072}
|
|
{"current_steps": 40270, "total_steps": 78105, "loss": 0.1637, "lr": 2.799720558971208e-06, "epoch": 2.577939952627873, "percentage": 51.56, "elapsed_time": "1:46:17", "remaining_time": "1:39:51", "throughput": 19871.85, "total_tokens": 126731072}
|
|
{"current_steps": 40275, "total_steps": 78105, "loss": 0.332, "lr": 2.799165928482413e-06, "epoch": 2.57826003456885, "percentage": 51.57, "elapsed_time": "1:46:18", "remaining_time": "1:39:50", "throughput": 19872.23, "total_tokens": 126747072}
|
|
{"current_steps": 40280, "total_steps": 78105, "loss": 0.1858, "lr": 2.798611283054829e-06, "epoch": 2.5785801165098263, "percentage": 51.57, "elapsed_time": "1:46:18", "remaining_time": "1:39:49", "throughput": 19872.58, "total_tokens": 126762624}
|
|
{"current_steps": 40285, "total_steps": 78105, "loss": 0.219, "lr": 2.798056622716153e-06, "epoch": 2.5789001984508033, "percentage": 51.58, "elapsed_time": "1:46:19", "remaining_time": "1:39:49", "throughput": 19872.92, "total_tokens": 126778112}
|
|
{"current_steps": 40290, "total_steps": 78105, "loss": 0.264, "lr": 2.797501947494083e-06, "epoch": 2.5792202803917803, "percentage": 51.58, "elapsed_time": "1:46:20", "remaining_time": "1:39:48", "throughput": 19873.27, "total_tokens": 126794112}
|
|
{"current_steps": 40295, "total_steps": 78105, "loss": 0.2105, "lr": 2.7969472574163142e-06, "epoch": 2.5795403623327573, "percentage": 51.59, "elapsed_time": "1:46:20", "remaining_time": "1:39:47", "throughput": 19873.58, "total_tokens": 126808832}
|
|
{"current_steps": 40300, "total_steps": 78105, "loss": 0.205, "lr": 2.7963925525105467e-06, "epoch": 2.5798604442737343, "percentage": 51.6, "elapsed_time": "1:46:21", "remaining_time": "1:39:46", "throughput": 19873.98, "total_tokens": 126824960}
|
|
{"current_steps": 40305, "total_steps": 78105, "loss": 0.1652, "lr": 2.7958378328044784e-06, "epoch": 2.580180526214711, "percentage": 51.6, "elapsed_time": "1:46:22", "remaining_time": "1:39:45", "throughput": 19874.26, "total_tokens": 126839552}
|
|
{"current_steps": 40310, "total_steps": 78105, "loss": 0.2387, "lr": 2.79528309832581e-06, "epoch": 2.580500608155688, "percentage": 51.61, "elapsed_time": "1:46:22", "remaining_time": "1:39:44", "throughput": 19874.62, "total_tokens": 126855488}
|
|
{"current_steps": 40315, "total_steps": 78105, "loss": 0.1717, "lr": 2.794728349102242e-06, "epoch": 2.580820690096665, "percentage": 51.62, "elapsed_time": "1:46:23", "remaining_time": "1:39:43", "throughput": 19875.07, "total_tokens": 126872320}
|
|
{"current_steps": 40320, "total_steps": 78105, "loss": 0.357, "lr": 2.7941735851614755e-06, "epoch": 2.5811407720376414, "percentage": 51.62, "elapsed_time": "1:46:24", "remaining_time": "1:39:42", "throughput": 19875.41, "total_tokens": 126887680}
|
|
{"current_steps": 40325, "total_steps": 78105, "loss": 0.1238, "lr": 2.793618806531212e-06, "epoch": 2.5814608539786184, "percentage": 51.63, "elapsed_time": "1:46:24", "remaining_time": "1:39:41", "throughput": 19875.81, "total_tokens": 126904064}
|
|
{"current_steps": 40330, "total_steps": 78105, "loss": 0.2239, "lr": 2.7930640132391545e-06, "epoch": 2.5817809359195953, "percentage": 51.64, "elapsed_time": "1:46:25", "remaining_time": "1:39:40", "throughput": 19876.13, "total_tokens": 126919232}
|
|
{"current_steps": 40335, "total_steps": 78105, "loss": 0.2197, "lr": 2.7925092053130064e-06, "epoch": 2.5821010178605723, "percentage": 51.64, "elapsed_time": "1:46:26", "remaining_time": "1:39:40", "throughput": 19876.47, "total_tokens": 126934784}
|
|
{"current_steps": 40340, "total_steps": 78105, "loss": 0.3262, "lr": 2.7919543827804735e-06, "epoch": 2.5824210998015493, "percentage": 51.65, "elapsed_time": "1:46:26", "remaining_time": "1:39:39", "throughput": 19876.84, "total_tokens": 126950464}
|
|
{"current_steps": 40345, "total_steps": 78105, "loss": 0.2917, "lr": 2.791399545669259e-06, "epoch": 2.5827411817425263, "percentage": 51.65, "elapsed_time": "1:46:28", "remaining_time": "1:39:38", "throughput": 19877.72, "total_tokens": 126979456}
|
|
{"current_steps": 40350, "total_steps": 78105, "loss": 0.2101, "lr": 2.7908446940070687e-06, "epoch": 2.583061263683503, "percentage": 51.66, "elapsed_time": "1:46:28", "remaining_time": "1:39:37", "throughput": 19878.07, "total_tokens": 126995136}
|
|
{"current_steps": 40355, "total_steps": 78105, "loss": 0.2366, "lr": 2.7902898278216095e-06, "epoch": 2.58338134562448, "percentage": 51.67, "elapsed_time": "1:46:29", "remaining_time": "1:39:36", "throughput": 19878.41, "total_tokens": 127010816}
|
|
{"current_steps": 40360, "total_steps": 78105, "loss": 0.2774, "lr": 2.7897349471405884e-06, "epoch": 2.583701427565457, "percentage": 51.67, "elapsed_time": "1:46:30", "remaining_time": "1:39:36", "throughput": 19878.76, "total_tokens": 127026432}
|
|
{"current_steps": 40365, "total_steps": 78105, "loss": 0.178, "lr": 2.789180051991713e-06, "epoch": 2.5840215095064334, "percentage": 51.68, "elapsed_time": "1:46:30", "remaining_time": "1:39:35", "throughput": 19879.14, "total_tokens": 127042496}
|
|
{"current_steps": 40370, "total_steps": 78105, "loss": 0.2812, "lr": 2.7886251424026924e-06, "epoch": 2.5843415914474104, "percentage": 51.69, "elapsed_time": "1:46:31", "remaining_time": "1:39:34", "throughput": 19879.47, "total_tokens": 127058112}
|
|
{"current_steps": 40375, "total_steps": 78105, "loss": 0.1885, "lr": 2.7880702184012355e-06, "epoch": 2.5846616733883874, "percentage": 51.69, "elapsed_time": "1:46:32", "remaining_time": "1:39:33", "throughput": 19879.81, "total_tokens": 127073856}
|
|
{"current_steps": 40380, "total_steps": 78105, "loss": 0.207, "lr": 2.787515280015052e-06, "epoch": 2.5849817553293644, "percentage": 51.7, "elapsed_time": "1:46:32", "remaining_time": "1:39:32", "throughput": 19880.17, "total_tokens": 127089856}
|
|
{"current_steps": 40385, "total_steps": 78105, "loss": 0.176, "lr": 2.786960327271853e-06, "epoch": 2.5853018372703414, "percentage": 51.71, "elapsed_time": "1:46:33", "remaining_time": "1:39:31", "throughput": 19880.48, "total_tokens": 127104448}
|
|
{"current_steps": 40390, "total_steps": 78105, "loss": 0.2844, "lr": 2.7864053601993506e-06, "epoch": 2.5856219192113183, "percentage": 51.71, "elapsed_time": "1:46:34", "remaining_time": "1:39:30", "throughput": 19880.83, "total_tokens": 127120000}
|
|
{"current_steps": 40395, "total_steps": 78105, "loss": 0.3363, "lr": 2.7858503788252562e-06, "epoch": 2.585942001152295, "percentage": 51.72, "elapsed_time": "1:46:34", "remaining_time": "1:39:29", "throughput": 19881.27, "total_tokens": 127137024}
|
|
{"current_steps": 40400, "total_steps": 78105, "loss": 0.2154, "lr": 2.785295383177283e-06, "epoch": 2.586262083093272, "percentage": 51.73, "elapsed_time": "1:46:35", "remaining_time": "1:39:28", "throughput": 19881.62, "total_tokens": 127152384}
|
|
{"current_steps": 40405, "total_steps": 78105, "loss": 0.1823, "lr": 2.784740373283144e-06, "epoch": 2.586582165034249, "percentage": 51.73, "elapsed_time": "1:46:36", "remaining_time": "1:39:27", "throughput": 19881.94, "total_tokens": 127168192}
|
|
{"current_steps": 40410, "total_steps": 78105, "loss": 0.2541, "lr": 2.784185349170554e-06, "epoch": 2.5869022469752254, "percentage": 51.74, "elapsed_time": "1:46:36", "remaining_time": "1:39:27", "throughput": 19882.31, "total_tokens": 127184192}
|
|
{"current_steps": 40415, "total_steps": 78105, "loss": 0.2417, "lr": 2.783630310867228e-06, "epoch": 2.5872223289162024, "percentage": 51.74, "elapsed_time": "1:46:37", "remaining_time": "1:39:26", "throughput": 19882.64, "total_tokens": 127199360}
|
|
{"current_steps": 40420, "total_steps": 78105, "loss": 0.2424, "lr": 2.783075258400881e-06, "epoch": 2.5875424108571794, "percentage": 51.75, "elapsed_time": "1:46:38", "remaining_time": "1:39:25", "throughput": 19882.92, "total_tokens": 127214016}
|
|
{"current_steps": 40425, "total_steps": 78105, "loss": 0.2632, "lr": 2.782520191799231e-06, "epoch": 2.5878624927981564, "percentage": 51.76, "elapsed_time": "1:46:38", "remaining_time": "1:39:24", "throughput": 19883.43, "total_tokens": 127232000}
|
|
{"current_steps": 40430, "total_steps": 78105, "loss": 0.2933, "lr": 2.7819651110899943e-06, "epoch": 2.5881825747391334, "percentage": 51.76, "elapsed_time": "1:46:39", "remaining_time": "1:39:23", "throughput": 19883.78, "total_tokens": 127247872}
|
|
{"current_steps": 40435, "total_steps": 78105, "loss": 0.3145, "lr": 2.781410016300888e-06, "epoch": 2.5885026566801104, "percentage": 51.77, "elapsed_time": "1:46:40", "remaining_time": "1:39:22", "throughput": 19884.17, "total_tokens": 127264128}
|
|
{"current_steps": 40440, "total_steps": 78105, "loss": 0.1872, "lr": 2.7808549074596318e-06, "epoch": 2.588822738621087, "percentage": 51.78, "elapsed_time": "1:46:40", "remaining_time": "1:39:21", "throughput": 19884.48, "total_tokens": 127279296}
|
|
{"current_steps": 40445, "total_steps": 78105, "loss": 0.1627, "lr": 2.780299784593944e-06, "epoch": 2.589142820562064, "percentage": 51.78, "elapsed_time": "1:46:41", "remaining_time": "1:39:20", "throughput": 19884.86, "total_tokens": 127295232}
|
|
{"current_steps": 40450, "total_steps": 78105, "loss": 0.2176, "lr": 2.7797446477315448e-06, "epoch": 2.589462902503041, "percentage": 51.79, "elapsed_time": "1:46:42", "remaining_time": "1:39:19", "throughput": 19885.23, "total_tokens": 127311488}
|
|
{"current_steps": 40455, "total_steps": 78105, "loss": 0.2127, "lr": 2.7791894969001555e-06, "epoch": 2.5897829844440174, "percentage": 51.8, "elapsed_time": "1:46:42", "remaining_time": "1:39:19", "throughput": 19885.57, "total_tokens": 127326912}
|
|
{"current_steps": 40460, "total_steps": 78105, "loss": 0.2832, "lr": 2.7786343321274967e-06, "epoch": 2.5901030663849944, "percentage": 51.8, "elapsed_time": "1:46:43", "remaining_time": "1:39:18", "throughput": 19885.88, "total_tokens": 127341888}
|
|
{"current_steps": 40465, "total_steps": 78105, "loss": 0.1455, "lr": 2.778079153441291e-06, "epoch": 2.5904231483259714, "percentage": 51.81, "elapsed_time": "1:46:44", "remaining_time": "1:39:17", "throughput": 19886.23, "total_tokens": 127357120}
|
|
{"current_steps": 40470, "total_steps": 78105, "loss": 0.3235, "lr": 2.777523960869261e-06, "epoch": 2.5907432302669484, "percentage": 51.81, "elapsed_time": "1:46:44", "remaining_time": "1:39:16", "throughput": 19886.64, "total_tokens": 127373696}
|
|
{"current_steps": 40475, "total_steps": 78105, "loss": 0.2108, "lr": 2.776968754439129e-06, "epoch": 2.5910633122079254, "percentage": 51.82, "elapsed_time": "1:46:45", "remaining_time": "1:39:15", "throughput": 19886.99, "total_tokens": 127389440}
|
|
{"current_steps": 40480, "total_steps": 78105, "loss": 0.2055, "lr": 2.7764135341786214e-06, "epoch": 2.591383394148902, "percentage": 51.83, "elapsed_time": "1:46:46", "remaining_time": "1:39:14", "throughput": 19887.31, "total_tokens": 127404800}
|
|
{"current_steps": 40485, "total_steps": 78105, "loss": 0.2051, "lr": 2.775858300115461e-06, "epoch": 2.591703476089879, "percentage": 51.83, "elapsed_time": "1:46:46", "remaining_time": "1:39:13", "throughput": 19887.62, "total_tokens": 127419904}
|
|
{"current_steps": 40490, "total_steps": 78105, "loss": 0.3448, "lr": 2.7753030522773745e-06, "epoch": 2.592023558030856, "percentage": 51.84, "elapsed_time": "1:46:47", "remaining_time": "1:39:12", "throughput": 19887.92, "total_tokens": 127435200}
|
|
{"current_steps": 40495, "total_steps": 78105, "loss": 0.2214, "lr": 2.774747790692087e-06, "epoch": 2.592343639971833, "percentage": 51.85, "elapsed_time": "1:46:48", "remaining_time": "1:39:11", "throughput": 19888.23, "total_tokens": 127450496}
|
|
{"current_steps": 40500, "total_steps": 78105, "loss": 0.1552, "lr": 2.7741925153873263e-06, "epoch": 2.5926637219128095, "percentage": 51.85, "elapsed_time": "1:46:49", "remaining_time": "1:39:10", "throughput": 19888.64, "total_tokens": 127467328}
|
|
{"current_steps": 40505, "total_steps": 78105, "loss": 0.2552, "lr": 2.773637226390819e-06, "epoch": 2.5929838038537865, "percentage": 51.86, "elapsed_time": "1:46:49", "remaining_time": "1:39:10", "throughput": 19888.99, "total_tokens": 127483072}
|
|
{"current_steps": 40510, "total_steps": 78105, "loss": 0.2069, "lr": 2.7730819237302935e-06, "epoch": 2.5933038857947635, "percentage": 51.87, "elapsed_time": "1:46:50", "remaining_time": "1:39:09", "throughput": 19889.42, "total_tokens": 127499840}
|
|
{"current_steps": 40515, "total_steps": 78105, "loss": 0.2342, "lr": 2.7725266074334807e-06, "epoch": 2.5936239677357404, "percentage": 51.87, "elapsed_time": "1:46:51", "remaining_time": "1:39:08", "throughput": 19889.71, "total_tokens": 127514816}
|
|
{"current_steps": 40520, "total_steps": 78105, "loss": 0.2927, "lr": 2.771971277528108e-06, "epoch": 2.5939440496767174, "percentage": 51.88, "elapsed_time": "1:46:51", "remaining_time": "1:39:07", "throughput": 19890.09, "total_tokens": 127531072}
|
|
{"current_steps": 40525, "total_steps": 78105, "loss": 0.2973, "lr": 2.771415934041906e-06, "epoch": 2.594264131617694, "percentage": 51.89, "elapsed_time": "1:46:52", "remaining_time": "1:39:06", "throughput": 19890.61, "total_tokens": 127549952}
|
|
{"current_steps": 40530, "total_steps": 78105, "loss": 0.1886, "lr": 2.7708605770026064e-06, "epoch": 2.594584213558671, "percentage": 51.89, "elapsed_time": "1:46:53", "remaining_time": "1:39:05", "throughput": 19891.1, "total_tokens": 127567872}
|
|
{"current_steps": 40535, "total_steps": 78105, "loss": 0.15, "lr": 2.7703052064379398e-06, "epoch": 2.594904295499648, "percentage": 51.9, "elapsed_time": "1:46:53", "remaining_time": "1:39:04", "throughput": 19891.38, "total_tokens": 127582528}
|
|
{"current_steps": 40540, "total_steps": 78105, "loss": 0.1754, "lr": 2.76974982237564e-06, "epoch": 2.595224377440625, "percentage": 51.9, "elapsed_time": "1:46:54", "remaining_time": "1:39:03", "throughput": 19891.73, "total_tokens": 127598144}
|
|
{"current_steps": 40545, "total_steps": 78105, "loss": 0.2159, "lr": 2.7691944248434388e-06, "epoch": 2.5955444593816015, "percentage": 51.91, "elapsed_time": "1:46:55", "remaining_time": "1:39:03", "throughput": 19892.16, "total_tokens": 127615360}
|
|
{"current_steps": 40550, "total_steps": 78105, "loss": 0.2137, "lr": 2.7686390138690706e-06, "epoch": 2.5958645413225785, "percentage": 51.92, "elapsed_time": "1:46:56", "remaining_time": "1:39:02", "throughput": 19892.44, "total_tokens": 127629888}
|
|
{"current_steps": 40555, "total_steps": 78105, "loss": 0.2748, "lr": 2.7680835894802693e-06, "epoch": 2.5961846232635555, "percentage": 51.92, "elapsed_time": "1:46:56", "remaining_time": "1:39:01", "throughput": 19892.69, "total_tokens": 127644224}
|
|
{"current_steps": 40560, "total_steps": 78105, "loss": 0.2182, "lr": 2.767528151704769e-06, "epoch": 2.5965047052045325, "percentage": 51.93, "elapsed_time": "1:46:57", "remaining_time": "1:39:00", "throughput": 19893.07, "total_tokens": 127659904}
|
|
{"current_steps": 40565, "total_steps": 78105, "loss": 0.2189, "lr": 2.766972700570306e-06, "epoch": 2.5968247871455095, "percentage": 51.94, "elapsed_time": "1:46:57", "remaining_time": "1:38:59", "throughput": 19893.43, "total_tokens": 127675904}
|
|
{"current_steps": 40570, "total_steps": 78105, "loss": 0.2486, "lr": 2.766417236104618e-06, "epoch": 2.597144869086486, "percentage": 51.94, "elapsed_time": "1:46:58", "remaining_time": "1:38:58", "throughput": 19893.78, "total_tokens": 127691904}
|
|
{"current_steps": 40575, "total_steps": 78105, "loss": 0.3322, "lr": 2.765861758335441e-06, "epoch": 2.597464951027463, "percentage": 51.95, "elapsed_time": "1:46:59", "remaining_time": "1:38:57", "throughput": 19894.11, "total_tokens": 127707264}
|
|
{"current_steps": 40580, "total_steps": 78105, "loss": 0.2168, "lr": 2.765306267290513e-06, "epoch": 2.59778503296844, "percentage": 51.96, "elapsed_time": "1:47:00", "remaining_time": "1:38:56", "throughput": 19894.56, "total_tokens": 127724480}
|
|
{"current_steps": 40585, "total_steps": 78105, "loss": 0.163, "lr": 2.764750762997571e-06, "epoch": 2.5981051149094165, "percentage": 51.96, "elapsed_time": "1:47:00", "remaining_time": "1:38:55", "throughput": 19894.97, "total_tokens": 127740736}
|
|
{"current_steps": 40590, "total_steps": 78105, "loss": 0.1926, "lr": 2.764195245484355e-06, "epoch": 2.5984251968503935, "percentage": 51.97, "elapsed_time": "1:47:01", "remaining_time": "1:38:54", "throughput": 19895.35, "total_tokens": 127756480}
|
|
{"current_steps": 40595, "total_steps": 78105, "loss": 0.174, "lr": 2.7636397147786053e-06, "epoch": 2.5987452787913705, "percentage": 51.97, "elapsed_time": "1:47:02", "remaining_time": "1:38:54", "throughput": 19895.71, "total_tokens": 127772352}
|
|
{"current_steps": 40600, "total_steps": 78105, "loss": 0.2995, "lr": 2.7630841709080615e-06, "epoch": 2.5990653607323475, "percentage": 51.98, "elapsed_time": "1:47:02", "remaining_time": "1:38:53", "throughput": 19896.0, "total_tokens": 127787136}
|
|
{"current_steps": 40605, "total_steps": 78105, "loss": 0.249, "lr": 2.7625286139004635e-06, "epoch": 2.5993854426733245, "percentage": 51.99, "elapsed_time": "1:47:03", "remaining_time": "1:38:52", "throughput": 19896.44, "total_tokens": 127803840}
|
|
{"current_steps": 40610, "total_steps": 78105, "loss": 0.1906, "lr": 2.761973043783555e-06, "epoch": 2.5997055246143015, "percentage": 51.99, "elapsed_time": "1:47:04", "remaining_time": "1:38:51", "throughput": 19896.8, "total_tokens": 127819712}
|
|
{"current_steps": 40615, "total_steps": 78105, "loss": 0.3053, "lr": 2.7614174605850776e-06, "epoch": 2.600025606555278, "percentage": 52.0, "elapsed_time": "1:47:04", "remaining_time": "1:38:50", "throughput": 19897.16, "total_tokens": 127835328}
|
|
{"current_steps": 40620, "total_steps": 78105, "loss": 0.3161, "lr": 2.7608618643327724e-06, "epoch": 2.600345688496255, "percentage": 52.01, "elapsed_time": "1:47:05", "remaining_time": "1:38:49", "throughput": 19897.57, "total_tokens": 127851904}
|
|
{"current_steps": 40625, "total_steps": 78105, "loss": 0.1776, "lr": 2.7603062550543846e-06, "epoch": 2.600665770437232, "percentage": 52.01, "elapsed_time": "1:47:06", "remaining_time": "1:38:48", "throughput": 19897.88, "total_tokens": 127866624}
|
|
{"current_steps": 40630, "total_steps": 78105, "loss": 0.3068, "lr": 2.759750632777659e-06, "epoch": 2.6009858523782086, "percentage": 52.02, "elapsed_time": "1:47:06", "remaining_time": "1:38:47", "throughput": 19898.19, "total_tokens": 127881664}
|
|
{"current_steps": 40635, "total_steps": 78105, "loss": 0.1777, "lr": 2.75919499753034e-06, "epoch": 2.6013059343191856, "percentage": 52.03, "elapsed_time": "1:47:07", "remaining_time": "1:38:46", "throughput": 19898.5, "total_tokens": 127896704}
|
|
{"current_steps": 40640, "total_steps": 78105, "loss": 0.2716, "lr": 2.758639349340173e-06, "epoch": 2.6016260162601625, "percentage": 52.03, "elapsed_time": "1:47:08", "remaining_time": "1:38:46", "throughput": 19899.03, "total_tokens": 127915264}
|
|
{"current_steps": 40645, "total_steps": 78105, "loss": 0.2362, "lr": 2.758083688234904e-06, "epoch": 2.6019460982011395, "percentage": 52.04, "elapsed_time": "1:47:08", "remaining_time": "1:38:45", "throughput": 19899.4, "total_tokens": 127931264}
|
|
{"current_steps": 40650, "total_steps": 78105, "loss": 0.1817, "lr": 2.757528014242279e-06, "epoch": 2.6022661801421165, "percentage": 52.05, "elapsed_time": "1:47:09", "remaining_time": "1:38:44", "throughput": 19899.81, "total_tokens": 127948032}
|
|
{"current_steps": 40655, "total_steps": 78105, "loss": 0.2391, "lr": 2.7569723273900474e-06, "epoch": 2.6025862620830935, "percentage": 52.05, "elapsed_time": "1:47:10", "remaining_time": "1:38:43", "throughput": 19900.28, "total_tokens": 127965312}
|
|
{"current_steps": 40660, "total_steps": 78105, "loss": 0.1652, "lr": 2.756416627705956e-06, "epoch": 2.60290634402407, "percentage": 52.06, "elapsed_time": "1:47:10", "remaining_time": "1:38:42", "throughput": 19900.6, "total_tokens": 127980544}
|
|
{"current_steps": 40665, "total_steps": 78105, "loss": 0.1585, "lr": 2.755860915217754e-06, "epoch": 2.603226425965047, "percentage": 52.06, "elapsed_time": "1:47:11", "remaining_time": "1:38:41", "throughput": 19900.96, "total_tokens": 127996224}
|
|
{"current_steps": 40670, "total_steps": 78105, "loss": 0.163, "lr": 2.75530518995319e-06, "epoch": 2.603546507906024, "percentage": 52.07, "elapsed_time": "1:47:12", "remaining_time": "1:38:40", "throughput": 19901.35, "total_tokens": 128012608}
|
|
{"current_steps": 40675, "total_steps": 78105, "loss": 0.3127, "lr": 2.754749451940015e-06, "epoch": 2.6038665898470006, "percentage": 52.08, "elapsed_time": "1:47:13", "remaining_time": "1:38:39", "throughput": 19901.66, "total_tokens": 128027584}
|
|
{"current_steps": 40680, "total_steps": 78105, "loss": 0.2108, "lr": 2.7541937012059787e-06, "epoch": 2.6041866717879776, "percentage": 52.08, "elapsed_time": "1:47:13", "remaining_time": "1:38:38", "throughput": 19902.0, "total_tokens": 128043648}
|
|
{"current_steps": 40685, "total_steps": 78105, "loss": 0.2278, "lr": 2.753637937778834e-06, "epoch": 2.6045067537289546, "percentage": 52.09, "elapsed_time": "1:47:14", "remaining_time": "1:38:38", "throughput": 19902.34, "total_tokens": 128058880}
|
|
{"current_steps": 40690, "total_steps": 78105, "loss": 0.2063, "lr": 2.7530821616863317e-06, "epoch": 2.6048268356699316, "percentage": 52.1, "elapsed_time": "1:47:15", "remaining_time": "1:38:37", "throughput": 19902.73, "total_tokens": 128075264}
|
|
{"current_steps": 40695, "total_steps": 78105, "loss": 0.2888, "lr": 2.7525263729562247e-06, "epoch": 2.6051469176109086, "percentage": 52.1, "elapsed_time": "1:47:15", "remaining_time": "1:38:36", "throughput": 19903.13, "total_tokens": 128091904}
|
|
{"current_steps": 40700, "total_steps": 78105, "loss": 0.2072, "lr": 2.7519705716162653e-06, "epoch": 2.6054669995518855, "percentage": 52.11, "elapsed_time": "1:47:16", "remaining_time": "1:38:35", "throughput": 19903.41, "total_tokens": 128106560}
|
|
{"current_steps": 40705, "total_steps": 78105, "loss": 0.1759, "lr": 2.751414757694208e-06, "epoch": 2.605787081492862, "percentage": 52.12, "elapsed_time": "1:47:17", "remaining_time": "1:38:34", "throughput": 19903.76, "total_tokens": 128122496}
|
|
{"current_steps": 40710, "total_steps": 78105, "loss": 0.2457, "lr": 2.750858931217808e-06, "epoch": 2.606107163433839, "percentage": 52.12, "elapsed_time": "1:47:17", "remaining_time": "1:38:33", "throughput": 19904.06, "total_tokens": 128136960}
|
|
{"current_steps": 40715, "total_steps": 78105, "loss": 0.1815, "lr": 2.7503030922148194e-06, "epoch": 2.606427245374816, "percentage": 52.13, "elapsed_time": "1:47:18", "remaining_time": "1:38:32", "throughput": 19904.45, "total_tokens": 128153472}
|
|
{"current_steps": 40720, "total_steps": 78105, "loss": 0.2602, "lr": 2.7497472407129984e-06, "epoch": 2.6067473273157926, "percentage": 52.13, "elapsed_time": "1:47:19", "remaining_time": "1:38:31", "throughput": 19904.83, "total_tokens": 128169664}
|
|
{"current_steps": 40725, "total_steps": 78105, "loss": 0.1712, "lr": 2.749191376740101e-06, "epoch": 2.6070674092567696, "percentage": 52.14, "elapsed_time": "1:47:19", "remaining_time": "1:38:30", "throughput": 19905.23, "total_tokens": 128186240}
|
|
{"current_steps": 40730, "total_steps": 78105, "loss": 0.2503, "lr": 2.748635500323883e-06, "epoch": 2.6073874911977466, "percentage": 52.15, "elapsed_time": "1:47:20", "remaining_time": "1:38:29", "throughput": 19905.53, "total_tokens": 128201216}
|
|
{"current_steps": 40735, "total_steps": 78105, "loss": 0.2093, "lr": 2.7480796114921044e-06, "epoch": 2.6077075731387236, "percentage": 52.15, "elapsed_time": "1:47:21", "remaining_time": "1:38:29", "throughput": 19905.87, "total_tokens": 128217152}
|
|
{"current_steps": 40740, "total_steps": 78105, "loss": 0.2199, "lr": 2.7475237102725216e-06, "epoch": 2.6080276550797006, "percentage": 52.16, "elapsed_time": "1:47:21", "remaining_time": "1:38:28", "throughput": 19906.2, "total_tokens": 128232576}
|
|
{"current_steps": 40745, "total_steps": 78105, "loss": 0.2027, "lr": 2.7469677966928943e-06, "epoch": 2.6083477370206776, "percentage": 52.17, "elapsed_time": "1:47:22", "remaining_time": "1:38:27", "throughput": 19906.49, "total_tokens": 128247360}
|
|
{"current_steps": 40750, "total_steps": 78105, "loss": 0.3396, "lr": 2.746411870780982e-06, "epoch": 2.608667818961654, "percentage": 52.17, "elapsed_time": "1:47:23", "remaining_time": "1:38:26", "throughput": 19906.82, "total_tokens": 128262976}
|
|
{"current_steps": 40755, "total_steps": 78105, "loss": 0.2575, "lr": 2.745855932564544e-06, "epoch": 2.608987900902631, "percentage": 52.18, "elapsed_time": "1:47:23", "remaining_time": "1:38:25", "throughput": 19907.16, "total_tokens": 128278528}
|
|
{"current_steps": 40760, "total_steps": 78105, "loss": 0.3572, "lr": 2.7452999820713417e-06, "epoch": 2.609307982843608, "percentage": 52.19, "elapsed_time": "1:47:24", "remaining_time": "1:38:24", "throughput": 19907.49, "total_tokens": 128293952}
|
|
{"current_steps": 40765, "total_steps": 78105, "loss": 0.2125, "lr": 2.7447440193291353e-06, "epoch": 2.6096280647845846, "percentage": 52.19, "elapsed_time": "1:47:25", "remaining_time": "1:38:23", "throughput": 19907.87, "total_tokens": 128310208}
|
|
{"current_steps": 40770, "total_steps": 78105, "loss": 0.219, "lr": 2.744188044365688e-06, "epoch": 2.6099481467255616, "percentage": 52.2, "elapsed_time": "1:47:25", "remaining_time": "1:38:22", "throughput": 19908.2, "total_tokens": 128325376}
|
|
{"current_steps": 40775, "total_steps": 78105, "loss": 0.1755, "lr": 2.7436320572087614e-06, "epoch": 2.6102682286665386, "percentage": 52.21, "elapsed_time": "1:47:26", "remaining_time": "1:38:21", "throughput": 19908.57, "total_tokens": 128341952}
|
|
{"current_steps": 40780, "total_steps": 78105, "loss": 0.2195, "lr": 2.743076057886119e-06, "epoch": 2.6105883106075156, "percentage": 52.21, "elapsed_time": "1:47:27", "remaining_time": "1:38:21", "throughput": 19908.98, "total_tokens": 128358528}
|
|
{"current_steps": 40785, "total_steps": 78105, "loss": 0.2159, "lr": 2.7425200464255253e-06, "epoch": 2.6109083925484926, "percentage": 52.22, "elapsed_time": "1:47:27", "remaining_time": "1:38:20", "throughput": 19909.27, "total_tokens": 128373568}
|
|
{"current_steps": 40790, "total_steps": 78105, "loss": 0.2201, "lr": 2.741964022854742e-06, "epoch": 2.611228474489469, "percentage": 52.22, "elapsed_time": "1:47:28", "remaining_time": "1:38:19", "throughput": 19909.58, "total_tokens": 128389056}
|
|
{"current_steps": 40795, "total_steps": 78105, "loss": 0.2496, "lr": 2.7414079872015367e-06, "epoch": 2.611548556430446, "percentage": 52.23, "elapsed_time": "1:47:29", "remaining_time": "1:38:18", "throughput": 19909.86, "total_tokens": 128404160}
|
|
{"current_steps": 40800, "total_steps": 78105, "loss": 0.2125, "lr": 2.740851939493674e-06, "epoch": 2.611868638371423, "percentage": 52.24, "elapsed_time": "1:47:29", "remaining_time": "1:38:17", "throughput": 19910.21, "total_tokens": 128419904}
|
|
{"current_steps": 40805, "total_steps": 78105, "loss": 0.2583, "lr": 2.7402958797589197e-06, "epoch": 2.6121887203124, "percentage": 52.24, "elapsed_time": "1:47:30", "remaining_time": "1:38:16", "throughput": 19910.56, "total_tokens": 128435392}
|
|
{"current_steps": 40810, "total_steps": 78105, "loss": 0.2339, "lr": 2.739739808025041e-06, "epoch": 2.6125088022533767, "percentage": 52.25, "elapsed_time": "1:47:31", "remaining_time": "1:38:15", "throughput": 19910.89, "total_tokens": 128450880}
|
|
{"current_steps": 40815, "total_steps": 78105, "loss": 0.3018, "lr": 2.7391837243198056e-06, "epoch": 2.6128288841943537, "percentage": 52.26, "elapsed_time": "1:47:32", "remaining_time": "1:38:14", "throughput": 19911.34, "total_tokens": 128468096}
|
|
{"current_steps": 40820, "total_steps": 78105, "loss": 0.2648, "lr": 2.7386276286709795e-06, "epoch": 2.6131489661353307, "percentage": 52.26, "elapsed_time": "1:47:32", "remaining_time": "1:38:13", "throughput": 19911.71, "total_tokens": 128483904}
|
|
{"current_steps": 40825, "total_steps": 78105, "loss": 0.304, "lr": 2.7380715211063336e-06, "epoch": 2.6134690480763076, "percentage": 52.27, "elapsed_time": "1:47:33", "remaining_time": "1:38:12", "throughput": 19912.05, "total_tokens": 128499648}
|
|
{"current_steps": 40830, "total_steps": 78105, "loss": 0.196, "lr": 2.7375154016536357e-06, "epoch": 2.6137891300172846, "percentage": 52.28, "elapsed_time": "1:47:34", "remaining_time": "1:38:12", "throughput": 19912.37, "total_tokens": 128515136}
|
|
{"current_steps": 40835, "total_steps": 78105, "loss": 0.24, "lr": 2.736959270340656e-06, "epoch": 2.614109211958261, "percentage": 52.28, "elapsed_time": "1:47:34", "remaining_time": "1:38:11", "throughput": 19912.64, "total_tokens": 128529728}
|
|
{"current_steps": 40840, "total_steps": 78105, "loss": 0.2053, "lr": 2.736403127195165e-06, "epoch": 2.614429293899238, "percentage": 52.29, "elapsed_time": "1:47:35", "remaining_time": "1:38:10", "throughput": 19912.97, "total_tokens": 128545472}
|
|
{"current_steps": 40845, "total_steps": 78105, "loss": 0.1457, "lr": 2.735846972244932e-06, "epoch": 2.614749375840215, "percentage": 52.29, "elapsed_time": "1:47:36", "remaining_time": "1:38:09", "throughput": 19913.27, "total_tokens": 128560384}
|
|
{"current_steps": 40850, "total_steps": 78105, "loss": 0.2336, "lr": 2.7352908055177306e-06, "epoch": 2.6150694577811917, "percentage": 52.3, "elapsed_time": "1:47:36", "remaining_time": "1:38:08", "throughput": 19913.64, "total_tokens": 128576448}
|
|
{"current_steps": 40855, "total_steps": 78105, "loss": 0.278, "lr": 2.7347346270413316e-06, "epoch": 2.6153895397221687, "percentage": 52.31, "elapsed_time": "1:47:37", "remaining_time": "1:38:07", "throughput": 19913.97, "total_tokens": 128591616}
|
|
{"current_steps": 40860, "total_steps": 78105, "loss": 0.1925, "lr": 2.7341784368435086e-06, "epoch": 2.6157096216631457, "percentage": 52.31, "elapsed_time": "1:47:38", "remaining_time": "1:38:06", "throughput": 19914.28, "total_tokens": 128606720}
|
|
{"current_steps": 40865, "total_steps": 78105, "loss": 0.2862, "lr": 2.7336222349520336e-06, "epoch": 2.6160297036041227, "percentage": 52.32, "elapsed_time": "1:47:38", "remaining_time": "1:38:05", "throughput": 19914.62, "total_tokens": 128622336}
|
|
{"current_steps": 40870, "total_steps": 78105, "loss": 0.3135, "lr": 2.733066021394682e-06, "epoch": 2.6163497855450997, "percentage": 52.33, "elapsed_time": "1:47:39", "remaining_time": "1:38:04", "throughput": 19914.94, "total_tokens": 128637568}
|
|
{"current_steps": 40875, "total_steps": 78105, "loss": 0.1468, "lr": 2.7325097961992264e-06, "epoch": 2.6166698674860767, "percentage": 52.33, "elapsed_time": "1:47:40", "remaining_time": "1:38:03", "throughput": 19915.21, "total_tokens": 128652672}
|
|
{"current_steps": 40880, "total_steps": 78105, "loss": 0.196, "lr": 2.7319535593934426e-06, "epoch": 2.616989949427053, "percentage": 52.34, "elapsed_time": "1:47:40", "remaining_time": "1:38:03", "throughput": 19915.53, "total_tokens": 128667968}
|
|
{"current_steps": 40885, "total_steps": 78105, "loss": 0.268, "lr": 2.7313973110051067e-06, "epoch": 2.61731003136803, "percentage": 52.35, "elapsed_time": "1:47:41", "remaining_time": "1:38:02", "throughput": 19915.89, "total_tokens": 128684288}
|
|
{"current_steps": 40890, "total_steps": 78105, "loss": 0.2837, "lr": 2.7308410510619945e-06, "epoch": 2.617630113309007, "percentage": 52.35, "elapsed_time": "1:47:42", "remaining_time": "1:38:01", "throughput": 19916.17, "total_tokens": 128698816}
|
|
{"current_steps": 40895, "total_steps": 78105, "loss": 0.2696, "lr": 2.7302847795918824e-06, "epoch": 2.6179501952499837, "percentage": 52.36, "elapsed_time": "1:47:42", "remaining_time": "1:38:00", "throughput": 19916.54, "total_tokens": 128714816}
|
|
{"current_steps": 40900, "total_steps": 78105, "loss": 0.2631, "lr": 2.729728496622548e-06, "epoch": 2.6182702771909607, "percentage": 52.37, "elapsed_time": "1:47:43", "remaining_time": "1:37:59", "throughput": 19916.85, "total_tokens": 128729536}
|
|
{"current_steps": 40905, "total_steps": 78105, "loss": 0.2565, "lr": 2.7291722021817684e-06, "epoch": 2.6185903591319377, "percentage": 52.37, "elapsed_time": "1:47:44", "remaining_time": "1:37:58", "throughput": 19917.13, "total_tokens": 128744576}
|
|
{"current_steps": 40910, "total_steps": 78105, "loss": 0.209, "lr": 2.728615896297323e-06, "epoch": 2.6189104410729147, "percentage": 52.38, "elapsed_time": "1:47:44", "remaining_time": "1:37:57", "throughput": 19917.49, "total_tokens": 128760192}
|
|
{"current_steps": 40915, "total_steps": 78105, "loss": 0.1708, "lr": 2.728059578996991e-06, "epoch": 2.6192305230138917, "percentage": 52.38, "elapsed_time": "1:47:45", "remaining_time": "1:37:56", "throughput": 19917.77, "total_tokens": 128774656}
|
|
{"current_steps": 40920, "total_steps": 78105, "loss": 0.1691, "lr": 2.7275032503085515e-06, "epoch": 2.6195506049548687, "percentage": 52.39, "elapsed_time": "1:47:46", "remaining_time": "1:37:55", "throughput": 19918.18, "total_tokens": 128791360}
|
|
{"current_steps": 40925, "total_steps": 78105, "loss": 0.2619, "lr": 2.7269469102597844e-06, "epoch": 2.6198706868958452, "percentage": 52.4, "elapsed_time": "1:47:46", "remaining_time": "1:37:54", "throughput": 19918.51, "total_tokens": 128806912}
|
|
{"current_steps": 40930, "total_steps": 78105, "loss": 0.1432, "lr": 2.726390558878471e-06, "epoch": 2.6201907688368222, "percentage": 52.4, "elapsed_time": "1:47:47", "remaining_time": "1:37:54", "throughput": 19918.95, "total_tokens": 128824128}
|
|
{"current_steps": 40935, "total_steps": 78105, "loss": 0.2237, "lr": 2.7258341961923923e-06, "epoch": 2.620510850777799, "percentage": 52.41, "elapsed_time": "1:47:48", "remaining_time": "1:37:53", "throughput": 19919.34, "total_tokens": 128840384}
|
|
{"current_steps": 40940, "total_steps": 78105, "loss": 0.1387, "lr": 2.72527782222933e-06, "epoch": 2.6208309327187758, "percentage": 52.42, "elapsed_time": "1:47:48", "remaining_time": "1:37:52", "throughput": 19919.78, "total_tokens": 128857792}
|
|
{"current_steps": 40945, "total_steps": 78105, "loss": 0.2324, "lr": 2.724721437017067e-06, "epoch": 2.6211510146597528, "percentage": 52.42, "elapsed_time": "1:47:49", "remaining_time": "1:37:51", "throughput": 19920.17, "total_tokens": 128874176}
|
|
{"current_steps": 40950, "total_steps": 78105, "loss": 0.203, "lr": 2.724165040583386e-06, "epoch": 2.6214710966007297, "percentage": 52.43, "elapsed_time": "1:47:50", "remaining_time": "1:37:50", "throughput": 19920.54, "total_tokens": 128889920}
|
|
{"current_steps": 40955, "total_steps": 78105, "loss": 0.2569, "lr": 2.7236086329560703e-06, "epoch": 2.6217911785417067, "percentage": 52.44, "elapsed_time": "1:47:50", "remaining_time": "1:37:49", "throughput": 19920.85, "total_tokens": 128905088}
|
|
{"current_steps": 40960, "total_steps": 78105, "loss": 0.2041, "lr": 2.723052214162904e-06, "epoch": 2.6221112604826837, "percentage": 52.44, "elapsed_time": "1:47:51", "remaining_time": "1:37:48", "throughput": 19921.23, "total_tokens": 128920960}
|
|
{"current_steps": 40965, "total_steps": 78105, "loss": 0.1669, "lr": 2.7224957842316717e-06, "epoch": 2.6224313424236607, "percentage": 52.45, "elapsed_time": "1:47:52", "remaining_time": "1:37:47", "throughput": 19921.53, "total_tokens": 128935616}
|
|
{"current_steps": 40970, "total_steps": 78105, "loss": 0.2564, "lr": 2.721939343190159e-06, "epoch": 2.6227514243646373, "percentage": 52.46, "elapsed_time": "1:47:52", "remaining_time": "1:37:46", "throughput": 19921.94, "total_tokens": 128952448}
|
|
{"current_steps": 40975, "total_steps": 78105, "loss": 0.1982, "lr": 2.721382891066152e-06, "epoch": 2.6230715063056143, "percentage": 52.46, "elapsed_time": "1:47:53", "remaining_time": "1:37:46", "throughput": 19922.25, "total_tokens": 128967744}
|
|
{"current_steps": 40980, "total_steps": 78105, "loss": 0.2127, "lr": 2.720826427887437e-06, "epoch": 2.6233915882465912, "percentage": 52.47, "elapsed_time": "1:47:54", "remaining_time": "1:37:45", "throughput": 19922.6, "total_tokens": 128983168}
|
|
{"current_steps": 40985, "total_steps": 78105, "loss": 0.2716, "lr": 2.7202699536818e-06, "epoch": 2.623711670187568, "percentage": 52.47, "elapsed_time": "1:47:54", "remaining_time": "1:37:44", "throughput": 19922.9, "total_tokens": 128998272}
|
|
{"current_steps": 40990, "total_steps": 78105, "loss": 0.2002, "lr": 2.7197134684770288e-06, "epoch": 2.624031752128545, "percentage": 52.48, "elapsed_time": "1:47:55", "remaining_time": "1:37:43", "throughput": 19923.22, "total_tokens": 129013888}
|
|
{"current_steps": 40995, "total_steps": 78105, "loss": 0.2078, "lr": 2.719156972300911e-06, "epoch": 2.6243518340695218, "percentage": 52.49, "elapsed_time": "1:47:56", "remaining_time": "1:37:42", "throughput": 19923.57, "total_tokens": 129029376}
|
|
{"current_steps": 41000, "total_steps": 78105, "loss": 0.2925, "lr": 2.718600465181236e-06, "epoch": 2.6246719160104988, "percentage": 52.49, "elapsed_time": "1:47:56", "remaining_time": "1:37:41", "throughput": 19923.98, "total_tokens": 129045888}
|
|
{"current_steps": 41005, "total_steps": 78105, "loss": 0.1804, "lr": 2.718043947145792e-06, "epoch": 2.6249919979514758, "percentage": 52.5, "elapsed_time": "1:47:57", "remaining_time": "1:37:40", "throughput": 19924.27, "total_tokens": 129060928}
|
|
{"current_steps": 41010, "total_steps": 78105, "loss": 0.3181, "lr": 2.71748741822237e-06, "epoch": 2.6253120798924527, "percentage": 52.51, "elapsed_time": "1:47:58", "remaining_time": "1:37:39", "throughput": 19924.54, "total_tokens": 129075776}
|
|
{"current_steps": 41015, "total_steps": 78105, "loss": 0.1856, "lr": 2.716930878438758e-06, "epoch": 2.6256321618334293, "percentage": 52.51, "elapsed_time": "1:47:58", "remaining_time": "1:37:38", "throughput": 19924.97, "total_tokens": 129092928}
|
|
{"current_steps": 41020, "total_steps": 78105, "loss": 0.1736, "lr": 2.716374327822748e-06, "epoch": 2.6259522437744063, "percentage": 52.52, "elapsed_time": "1:47:59", "remaining_time": "1:37:38", "throughput": 19925.29, "total_tokens": 129107776}
|
|
{"current_steps": 41025, "total_steps": 78105, "loss": 0.224, "lr": 2.7158177664021316e-06, "epoch": 2.6262723257153833, "percentage": 52.53, "elapsed_time": "1:48:00", "remaining_time": "1:37:37", "throughput": 19925.63, "total_tokens": 129123392}
|
|
{"current_steps": 41030, "total_steps": 78105, "loss": 0.2394, "lr": 2.7152611942047e-06, "epoch": 2.62659240765636, "percentage": 52.53, "elapsed_time": "1:48:00", "remaining_time": "1:37:36", "throughput": 19925.98, "total_tokens": 129139072}
|
|
{"current_steps": 41035, "total_steps": 78105, "loss": 0.1845, "lr": 2.714704611258246e-06, "epoch": 2.626912489597337, "percentage": 52.54, "elapsed_time": "1:48:01", "remaining_time": "1:37:35", "throughput": 19926.26, "total_tokens": 129153856}
|
|
{"current_steps": 41040, "total_steps": 78105, "loss": 0.2173, "lr": 2.7141480175905615e-06, "epoch": 2.627232571538314, "percentage": 52.54, "elapsed_time": "1:48:02", "remaining_time": "1:37:34", "throughput": 19926.65, "total_tokens": 129169920}
|
|
{"current_steps": 41045, "total_steps": 78105, "loss": 0.1824, "lr": 2.713591413229441e-06, "epoch": 2.627552653479291, "percentage": 52.55, "elapsed_time": "1:48:02", "remaining_time": "1:37:33", "throughput": 19926.97, "total_tokens": 129185344}
|
|
{"current_steps": 41050, "total_steps": 78105, "loss": 0.1922, "lr": 2.713034798202677e-06, "epoch": 2.627872735420268, "percentage": 52.56, "elapsed_time": "1:48:03", "remaining_time": "1:37:32", "throughput": 19927.3, "total_tokens": 129201152}
|
|
{"current_steps": 41055, "total_steps": 78105, "loss": 0.2191, "lr": 2.7124781725380657e-06, "epoch": 2.6281928173612443, "percentage": 52.56, "elapsed_time": "1:48:04", "remaining_time": "1:37:31", "throughput": 19927.61, "total_tokens": 129216384}
|
|
{"current_steps": 41060, "total_steps": 78105, "loss": 0.1356, "lr": 2.711921536263401e-06, "epoch": 2.6285128993022213, "percentage": 52.57, "elapsed_time": "1:48:04", "remaining_time": "1:37:30", "throughput": 19927.96, "total_tokens": 129232128}
|
|
{"current_steps": 41065, "total_steps": 78105, "loss": 0.2034, "lr": 2.711364889406479e-06, "epoch": 2.6288329812431983, "percentage": 52.58, "elapsed_time": "1:48:05", "remaining_time": "1:37:29", "throughput": 19928.34, "total_tokens": 129248512}
|
|
{"current_steps": 41070, "total_steps": 78105, "loss": 0.1895, "lr": 2.7108082319950946e-06, "epoch": 2.6291530631841753, "percentage": 52.58, "elapsed_time": "1:48:06", "remaining_time": "1:37:29", "throughput": 19928.69, "total_tokens": 129264448}
|
|
{"current_steps": 41075, "total_steps": 78105, "loss": 0.2898, "lr": 2.710251564057045e-06, "epoch": 2.629473145125152, "percentage": 52.59, "elapsed_time": "1:48:06", "remaining_time": "1:37:28", "throughput": 19928.98, "total_tokens": 129279296}
|
|
{"current_steps": 41080, "total_steps": 78105, "loss": 0.1662, "lr": 2.709694885620128e-06, "epoch": 2.629793227066129, "percentage": 52.6, "elapsed_time": "1:48:07", "remaining_time": "1:37:27", "throughput": 19929.36, "total_tokens": 129295552}
|
|
{"current_steps": 41085, "total_steps": 78105, "loss": 0.2313, "lr": 2.7091381967121405e-06, "epoch": 2.630113309007106, "percentage": 52.6, "elapsed_time": "1:48:08", "remaining_time": "1:37:26", "throughput": 19929.65, "total_tokens": 129310720}
|
|
{"current_steps": 41090, "total_steps": 78105, "loss": 0.3258, "lr": 2.708581497360881e-06, "epoch": 2.630433390948083, "percentage": 52.61, "elapsed_time": "1:48:09", "remaining_time": "1:37:25", "throughput": 19930.0, "total_tokens": 129326400}
|
|
{"current_steps": 41095, "total_steps": 78105, "loss": 0.1606, "lr": 2.708024787594148e-06, "epoch": 2.63075347288906, "percentage": 52.62, "elapsed_time": "1:48:09", "remaining_time": "1:37:24", "throughput": 19930.44, "total_tokens": 129343232}
|
|
{"current_steps": 41100, "total_steps": 78105, "loss": 0.2334, "lr": 2.7074680674397408e-06, "epoch": 2.6310735548300364, "percentage": 52.62, "elapsed_time": "1:48:10", "remaining_time": "1:37:23", "throughput": 19930.73, "total_tokens": 129358080}
|
|
{"current_steps": 41105, "total_steps": 78105, "loss": 0.2308, "lr": 2.7069113369254584e-06, "epoch": 2.6313936367710133, "percentage": 52.63, "elapsed_time": "1:48:11", "remaining_time": "1:37:22", "throughput": 19931.01, "total_tokens": 129373120}
|
|
{"current_steps": 41110, "total_steps": 78105, "loss": 0.2637, "lr": 2.7063545960791015e-06, "epoch": 2.6317137187119903, "percentage": 52.63, "elapsed_time": "1:48:11", "remaining_time": "1:37:21", "throughput": 19931.36, "total_tokens": 129388800}
|
|
{"current_steps": 41115, "total_steps": 78105, "loss": 0.254, "lr": 2.7057978449284716e-06, "epoch": 2.6320338006529673, "percentage": 52.64, "elapsed_time": "1:48:12", "remaining_time": "1:37:21", "throughput": 19931.77, "total_tokens": 129405248}
|
|
{"current_steps": 41120, "total_steps": 78105, "loss": 0.4129, "lr": 2.7052410835013694e-06, "epoch": 2.632353882593944, "percentage": 52.65, "elapsed_time": "1:48:13", "remaining_time": "1:37:20", "throughput": 19932.24, "total_tokens": 129422784}
|
|
{"current_steps": 41125, "total_steps": 78105, "loss": 0.2204, "lr": 2.7046843118255963e-06, "epoch": 2.632673964534921, "percentage": 52.65, "elapsed_time": "1:48:13", "remaining_time": "1:37:19", "throughput": 19932.57, "total_tokens": 129438528}
|
|
{"current_steps": 41130, "total_steps": 78105, "loss": 0.2269, "lr": 2.704127529928955e-06, "epoch": 2.632994046475898, "percentage": 52.66, "elapsed_time": "1:48:14", "remaining_time": "1:37:18", "throughput": 19932.89, "total_tokens": 129453632}
|
|
{"current_steps": 41135, "total_steps": 78105, "loss": 0.16, "lr": 2.7035707378392473e-06, "epoch": 2.633314128416875, "percentage": 52.67, "elapsed_time": "1:48:15", "remaining_time": "1:37:17", "throughput": 19933.25, "total_tokens": 129469376}
|
|
{"current_steps": 41140, "total_steps": 78105, "loss": 0.2356, "lr": 2.7030139355842783e-06, "epoch": 2.633634210357852, "percentage": 52.67, "elapsed_time": "1:48:15", "remaining_time": "1:37:16", "throughput": 19933.59, "total_tokens": 129484928}
|
|
{"current_steps": 41145, "total_steps": 78105, "loss": 0.2529, "lr": 2.7024571231918495e-06, "epoch": 2.6339542922988284, "percentage": 52.68, "elapsed_time": "1:48:16", "remaining_time": "1:37:15", "throughput": 19933.92, "total_tokens": 129500544}
|
|
{"current_steps": 41150, "total_steps": 78105, "loss": 0.2412, "lr": 2.7019003006897675e-06, "epoch": 2.6342743742398054, "percentage": 52.69, "elapsed_time": "1:48:17", "remaining_time": "1:37:14", "throughput": 19934.24, "total_tokens": 129515904}
|
|
{"current_steps": 41155, "total_steps": 78105, "loss": 0.1943, "lr": 2.7013434681058363e-06, "epoch": 2.6345944561807824, "percentage": 52.69, "elapsed_time": "1:48:17", "remaining_time": "1:37:13", "throughput": 19934.57, "total_tokens": 129531328}
|
|
{"current_steps": 41160, "total_steps": 78105, "loss": 0.2205, "lr": 2.700786625467861e-06, "epoch": 2.634914538121759, "percentage": 52.7, "elapsed_time": "1:48:18", "remaining_time": "1:37:13", "throughput": 19934.92, "total_tokens": 129546752}
|
|
{"current_steps": 41165, "total_steps": 78105, "loss": 0.2483, "lr": 2.700229772803647e-06, "epoch": 2.635234620062736, "percentage": 52.7, "elapsed_time": "1:48:19", "remaining_time": "1:37:12", "throughput": 19935.36, "total_tokens": 129564096}
|
|
{"current_steps": 41170, "total_steps": 78105, "loss": 0.2401, "lr": 2.6996729101410013e-06, "epoch": 2.635554702003713, "percentage": 52.71, "elapsed_time": "1:48:19", "remaining_time": "1:37:11", "throughput": 19935.72, "total_tokens": 129580032}
|
|
{"current_steps": 41175, "total_steps": 78105, "loss": 0.2484, "lr": 2.6991160375077308e-06, "epoch": 2.63587478394469, "percentage": 52.72, "elapsed_time": "1:48:20", "remaining_time": "1:37:10", "throughput": 19936.03, "total_tokens": 129594944}
|
|
{"current_steps": 41180, "total_steps": 78105, "loss": 0.229, "lr": 2.6985591549316424e-06, "epoch": 2.636194865885667, "percentage": 52.72, "elapsed_time": "1:48:21", "remaining_time": "1:37:09", "throughput": 19936.39, "total_tokens": 129610752}
|
|
{"current_steps": 41185, "total_steps": 78105, "loss": 0.1819, "lr": 2.6980022624405446e-06, "epoch": 2.636514947826644, "percentage": 52.73, "elapsed_time": "1:48:21", "remaining_time": "1:37:08", "throughput": 19936.72, "total_tokens": 129626112}
|
|
{"current_steps": 41190, "total_steps": 78105, "loss": 0.1814, "lr": 2.6974453600622442e-06, "epoch": 2.6368350297676204, "percentage": 52.74, "elapsed_time": "1:48:22", "remaining_time": "1:37:07", "throughput": 19937.05, "total_tokens": 129641408}
|
|
{"current_steps": 41195, "total_steps": 78105, "loss": 0.2203, "lr": 2.6968884478245515e-06, "epoch": 2.6371551117085974, "percentage": 52.74, "elapsed_time": "1:48:23", "remaining_time": "1:37:06", "throughput": 19937.37, "total_tokens": 129656448}
|
|
{"current_steps": 41200, "total_steps": 78105, "loss": 0.1885, "lr": 2.6963315257552754e-06, "epoch": 2.6374751936495744, "percentage": 52.75, "elapsed_time": "1:48:23", "remaining_time": "1:37:05", "throughput": 19937.66, "total_tokens": 129671488}
|
|
{"current_steps": 41205, "total_steps": 78105, "loss": 0.3412, "lr": 2.6957745938822248e-06, "epoch": 2.637795275590551, "percentage": 52.76, "elapsed_time": "1:48:24", "remaining_time": "1:37:04", "throughput": 19938.07, "total_tokens": 129688192}
|
|
{"current_steps": 41210, "total_steps": 78105, "loss": 0.2104, "lr": 2.695217652233211e-06, "epoch": 2.638115357531528, "percentage": 52.76, "elapsed_time": "1:48:25", "remaining_time": "1:37:04", "throughput": 19938.49, "total_tokens": 129705024}
|
|
{"current_steps": 41215, "total_steps": 78105, "loss": 0.2396, "lr": 2.694660700836045e-06, "epoch": 2.638435439472505, "percentage": 52.77, "elapsed_time": "1:48:25", "remaining_time": "1:37:03", "throughput": 19938.77, "total_tokens": 129719616}
|
|
{"current_steps": 41220, "total_steps": 78105, "loss": 0.2503, "lr": 2.6941037397185375e-06, "epoch": 2.638755521413482, "percentage": 52.78, "elapsed_time": "1:48:26", "remaining_time": "1:37:02", "throughput": 19939.07, "total_tokens": 129734848}
|
|
{"current_steps": 41225, "total_steps": 78105, "loss": 0.3266, "lr": 2.6935467689085e-06, "epoch": 2.639075603354459, "percentage": 52.78, "elapsed_time": "1:48:27", "remaining_time": "1:37:01", "throughput": 19939.43, "total_tokens": 129750784}
|
|
{"current_steps": 41230, "total_steps": 78105, "loss": 0.2465, "lr": 2.6929897884337454e-06, "epoch": 2.639395685295436, "percentage": 52.79, "elapsed_time": "1:48:27", "remaining_time": "1:37:00", "throughput": 19939.77, "total_tokens": 129766720}
|
|
{"current_steps": 41235, "total_steps": 78105, "loss": 0.2098, "lr": 2.6924327983220856e-06, "epoch": 2.6397157672364124, "percentage": 52.79, "elapsed_time": "1:48:28", "remaining_time": "1:36:59", "throughput": 19940.22, "total_tokens": 129784192}
|
|
{"current_steps": 41240, "total_steps": 78105, "loss": 0.2653, "lr": 2.6918757986013346e-06, "epoch": 2.6400358491773894, "percentage": 52.8, "elapsed_time": "1:48:29", "remaining_time": "1:36:58", "throughput": 19940.52, "total_tokens": 129799424}
|
|
{"current_steps": 41245, "total_steps": 78105, "loss": 0.2272, "lr": 2.6913187892993053e-06, "epoch": 2.6403559311183664, "percentage": 52.81, "elapsed_time": "1:48:29", "remaining_time": "1:36:57", "throughput": 19940.83, "total_tokens": 129814720}
|
|
{"current_steps": 41250, "total_steps": 78105, "loss": 0.1359, "lr": 2.6907617704438115e-06, "epoch": 2.640676013059343, "percentage": 52.81, "elapsed_time": "1:48:30", "remaining_time": "1:36:57", "throughput": 19941.22, "total_tokens": 129831040}
|
|
{"current_steps": 41255, "total_steps": 78105, "loss": 0.3712, "lr": 2.69020474206267e-06, "epoch": 2.64099609500032, "percentage": 52.82, "elapsed_time": "1:48:31", "remaining_time": "1:36:56", "throughput": 19941.49, "total_tokens": 129845440}
|
|
{"current_steps": 41260, "total_steps": 78105, "loss": 0.2336, "lr": 2.6896477041836934e-06, "epoch": 2.641316176941297, "percentage": 52.83, "elapsed_time": "1:48:32", "remaining_time": "1:36:55", "throughput": 19941.88, "total_tokens": 129861568}
|
|
{"current_steps": 41265, "total_steps": 78105, "loss": 0.2963, "lr": 2.6890906568346987e-06, "epoch": 2.641636258882274, "percentage": 52.83, "elapsed_time": "1:48:32", "remaining_time": "1:36:54", "throughput": 19942.2, "total_tokens": 129877056}
|
|
{"current_steps": 41270, "total_steps": 78105, "loss": 0.2921, "lr": 2.6885336000435016e-06, "epoch": 2.641956340823251, "percentage": 52.84, "elapsed_time": "1:48:33", "remaining_time": "1:36:53", "throughput": 19942.48, "total_tokens": 129891776}
|
|
{"current_steps": 41275, "total_steps": 78105, "loss": 0.1619, "lr": 2.6879765338379183e-06, "epoch": 2.642276422764228, "percentage": 52.85, "elapsed_time": "1:48:33", "remaining_time": "1:36:52", "throughput": 19942.81, "total_tokens": 129906816}
|
|
{"current_steps": 41280, "total_steps": 78105, "loss": 0.1129, "lr": 2.6874194582457657e-06, "epoch": 2.6425965047052045, "percentage": 52.85, "elapsed_time": "1:48:34", "remaining_time": "1:36:51", "throughput": 19943.12, "total_tokens": 129922368}
|
|
{"current_steps": 41285, "total_steps": 78105, "loss": 0.2401, "lr": 2.6868623732948617e-06, "epoch": 2.6429165866461815, "percentage": 52.86, "elapsed_time": "1:48:35", "remaining_time": "1:36:50", "throughput": 19943.48, "total_tokens": 129938624}
|
|
{"current_steps": 41290, "total_steps": 78105, "loss": 0.1611, "lr": 2.686305279013025e-06, "epoch": 2.6432366685871584, "percentage": 52.86, "elapsed_time": "1:48:36", "remaining_time": "1:36:49", "throughput": 19943.8, "total_tokens": 129953984}
|
|
{"current_steps": 41295, "total_steps": 78105, "loss": 0.2679, "lr": 2.685748175428072e-06, "epoch": 2.643556750528135, "percentage": 52.87, "elapsed_time": "1:48:36", "remaining_time": "1:36:48", "throughput": 19944.12, "total_tokens": 129969408}
|
|
{"current_steps": 41300, "total_steps": 78105, "loss": 0.1871, "lr": 2.685191062567824e-06, "epoch": 2.643876832469112, "percentage": 52.88, "elapsed_time": "1:48:37", "remaining_time": "1:36:48", "throughput": 19944.44, "total_tokens": 129984704}
|
|
{"current_steps": 41305, "total_steps": 78105, "loss": 0.2274, "lr": 2.6846339404600973e-06, "epoch": 2.644196914410089, "percentage": 52.88, "elapsed_time": "1:48:38", "remaining_time": "1:36:47", "throughput": 19944.81, "total_tokens": 130000704}
|
|
{"current_steps": 41310, "total_steps": 78105, "loss": 0.2339, "lr": 2.6840768091327147e-06, "epoch": 2.644516996351066, "percentage": 52.89, "elapsed_time": "1:48:38", "remaining_time": "1:36:46", "throughput": 19945.08, "total_tokens": 130015232}
|
|
{"current_steps": 41315, "total_steps": 78105, "loss": 0.2078, "lr": 2.6835196686134945e-06, "epoch": 2.644837078292043, "percentage": 52.9, "elapsed_time": "1:48:39", "remaining_time": "1:36:45", "throughput": 19945.41, "total_tokens": 130030720}
|
|
{"current_steps": 41320, "total_steps": 78105, "loss": 0.1687, "lr": 2.682962518930258e-06, "epoch": 2.6451571602330195, "percentage": 52.9, "elapsed_time": "1:48:40", "remaining_time": "1:36:44", "throughput": 19945.74, "total_tokens": 130046272}
|
|
{"current_steps": 41325, "total_steps": 78105, "loss": 0.2655, "lr": 2.682405360110826e-06, "epoch": 2.6454772421739965, "percentage": 52.91, "elapsed_time": "1:48:40", "remaining_time": "1:36:43", "throughput": 19946.05, "total_tokens": 130061760}
|
|
{"current_steps": 41330, "total_steps": 78105, "loss": 0.1826, "lr": 2.6818481921830207e-06, "epoch": 2.6457973241149735, "percentage": 52.92, "elapsed_time": "1:48:41", "remaining_time": "1:36:42", "throughput": 19946.37, "total_tokens": 130077312}
|
|
{"current_steps": 41335, "total_steps": 78105, "loss": 0.1732, "lr": 2.681291015174664e-06, "epoch": 2.6461174060559505, "percentage": 52.92, "elapsed_time": "1:48:42", "remaining_time": "1:36:41", "throughput": 19946.88, "total_tokens": 130095808}
|
|
{"current_steps": 41340, "total_steps": 78105, "loss": 0.2907, "lr": 2.680733829113578e-06, "epoch": 2.646437487996927, "percentage": 52.93, "elapsed_time": "1:48:42", "remaining_time": "1:36:40", "throughput": 19947.16, "total_tokens": 130110784}
|
|
{"current_steps": 41345, "total_steps": 78105, "loss": 0.2794, "lr": 2.6801766340275865e-06, "epoch": 2.646757569937904, "percentage": 52.94, "elapsed_time": "1:48:43", "remaining_time": "1:36:40", "throughput": 19947.51, "total_tokens": 130126400}
|
|
{"current_steps": 41350, "total_steps": 78105, "loss": 0.2194, "lr": 2.6796194299445123e-06, "epoch": 2.647077651878881, "percentage": 52.94, "elapsed_time": "1:48:44", "remaining_time": "1:36:39", "throughput": 19947.84, "total_tokens": 130142208}
|
|
{"current_steps": 41355, "total_steps": 78105, "loss": 0.2653, "lr": 2.6790622168921794e-06, "epoch": 2.647397733819858, "percentage": 52.95, "elapsed_time": "1:48:44", "remaining_time": "1:36:38", "throughput": 19948.17, "total_tokens": 130157504}
|
|
{"current_steps": 41360, "total_steps": 78105, "loss": 0.158, "lr": 2.6785049948984122e-06, "epoch": 2.647717815760835, "percentage": 52.95, "elapsed_time": "1:48:45", "remaining_time": "1:36:37", "throughput": 19948.53, "total_tokens": 130173376}
|
|
{"current_steps": 41365, "total_steps": 78105, "loss": 0.1953, "lr": 2.6779477639910344e-06, "epoch": 2.6480378977018115, "percentage": 52.96, "elapsed_time": "1:48:46", "remaining_time": "1:36:36", "throughput": 19948.85, "total_tokens": 130188800}
|
|
{"current_steps": 41370, "total_steps": 78105, "loss": 0.1425, "lr": 2.677390524197873e-06, "epoch": 2.6483579796427885, "percentage": 52.97, "elapsed_time": "1:48:46", "remaining_time": "1:36:35", "throughput": 19949.32, "total_tokens": 130206592}
|
|
{"current_steps": 41375, "total_steps": 78105, "loss": 0.1956, "lr": 2.676833275546753e-06, "epoch": 2.6486780615837655, "percentage": 52.97, "elapsed_time": "1:48:47", "remaining_time": "1:36:34", "throughput": 19949.78, "total_tokens": 130224576}
|
|
{"current_steps": 41380, "total_steps": 78105, "loss": 0.1839, "lr": 2.6762760180654995e-06, "epoch": 2.6489981435247425, "percentage": 52.98, "elapsed_time": "1:48:48", "remaining_time": "1:36:33", "throughput": 19950.21, "total_tokens": 130241920}
|
|
{"current_steps": 41385, "total_steps": 78105, "loss": 0.2494, "lr": 2.6757187517819406e-06, "epoch": 2.649318225465719, "percentage": 52.99, "elapsed_time": "1:48:49", "remaining_time": "1:36:33", "throughput": 19950.62, "total_tokens": 130258624}
|
|
{"current_steps": 41390, "total_steps": 78105, "loss": 0.211, "lr": 2.675161476723902e-06, "epoch": 2.649638307406696, "percentage": 52.99, "elapsed_time": "1:48:49", "remaining_time": "1:36:32", "throughput": 19950.93, "total_tokens": 130273536}
|
|
{"current_steps": 41395, "total_steps": 78105, "loss": 0.158, "lr": 2.6746041929192113e-06, "epoch": 2.649958389347673, "percentage": 53.0, "elapsed_time": "1:48:50", "remaining_time": "1:36:31", "throughput": 19951.22, "total_tokens": 130288384}
|
|
{"current_steps": 41400, "total_steps": 78105, "loss": 0.3643, "lr": 2.6740469003956974e-06, "epoch": 2.65027847128865, "percentage": 53.01, "elapsed_time": "1:48:51", "remaining_time": "1:36:30", "throughput": 19951.58, "total_tokens": 130304192}
|
|
{"current_steps": 41405, "total_steps": 78105, "loss": 0.1569, "lr": 2.6734895991811884e-06, "epoch": 2.650598553229627, "percentage": 53.01, "elapsed_time": "1:48:51", "remaining_time": "1:36:29", "throughput": 19951.92, "total_tokens": 130319680}
|
|
{"current_steps": 41410, "total_steps": 78105, "loss": 0.1823, "lr": 2.6729322893035114e-06, "epoch": 2.6509186351706036, "percentage": 53.02, "elapsed_time": "1:48:52", "remaining_time": "1:36:28", "throughput": 19952.21, "total_tokens": 130334592}
|
|
{"current_steps": 41415, "total_steps": 78105, "loss": 0.1812, "lr": 2.6723749707904974e-06, "epoch": 2.6512387171115805, "percentage": 53.02, "elapsed_time": "1:48:53", "remaining_time": "1:36:27", "throughput": 19952.55, "total_tokens": 130350272}
|
|
{"current_steps": 41420, "total_steps": 78105, "loss": 0.2267, "lr": 2.6718176436699744e-06, "epoch": 2.6515587990525575, "percentage": 53.03, "elapsed_time": "1:48:53", "remaining_time": "1:36:26", "throughput": 19952.83, "total_tokens": 130364992}
|
|
{"current_steps": 41425, "total_steps": 78105, "loss": 0.301, "lr": 2.6712603079697742e-06, "epoch": 2.651878880993534, "percentage": 53.04, "elapsed_time": "1:48:54", "remaining_time": "1:36:25", "throughput": 19953.24, "total_tokens": 130381376}
|
|
{"current_steps": 41430, "total_steps": 78105, "loss": 0.2851, "lr": 2.6707029637177257e-06, "epoch": 2.652198962934511, "percentage": 53.04, "elapsed_time": "1:48:55", "remaining_time": "1:36:25", "throughput": 19953.65, "total_tokens": 130398016}
|
|
{"current_steps": 41435, "total_steps": 78105, "loss": 0.1436, "lr": 2.6701456109416602e-06, "epoch": 2.652519044875488, "percentage": 53.05, "elapsed_time": "1:48:55", "remaining_time": "1:36:24", "throughput": 19953.97, "total_tokens": 130413248}
|
|
{"current_steps": 41440, "total_steps": 78105, "loss": 0.2448, "lr": 2.669588249669409e-06, "epoch": 2.652839126816465, "percentage": 53.06, "elapsed_time": "1:48:56", "remaining_time": "1:36:23", "throughput": 19954.24, "total_tokens": 130427776}
|
|
{"current_steps": 41445, "total_steps": 78105, "loss": 0.2274, "lr": 2.6690308799288043e-06, "epoch": 2.653159208757442, "percentage": 53.06, "elapsed_time": "1:48:57", "remaining_time": "1:36:22", "throughput": 19954.63, "total_tokens": 130444288}
|
|
{"current_steps": 41450, "total_steps": 78105, "loss": 0.2413, "lr": 2.668473501747679e-06, "epoch": 2.653479290698419, "percentage": 53.07, "elapsed_time": "1:48:57", "remaining_time": "1:36:21", "throughput": 19954.92, "total_tokens": 130459456}
|
|
{"current_steps": 41455, "total_steps": 78105, "loss": 0.2521, "lr": 2.6679161151538623e-06, "epoch": 2.6537993726393956, "percentage": 53.08, "elapsed_time": "1:48:58", "remaining_time": "1:36:20", "throughput": 19955.24, "total_tokens": 130474880}
|
|
{"current_steps": 41460, "total_steps": 78105, "loss": 0.2512, "lr": 2.6673587201751912e-06, "epoch": 2.6541194545803726, "percentage": 53.08, "elapsed_time": "1:48:59", "remaining_time": "1:36:19", "throughput": 19955.63, "total_tokens": 130491136}
|
|
{"current_steps": 41465, "total_steps": 78105, "loss": 0.1614, "lr": 2.666801316839497e-06, "epoch": 2.6544395365213496, "percentage": 53.09, "elapsed_time": "1:48:59", "remaining_time": "1:36:18", "throughput": 19955.98, "total_tokens": 130507072}
|
|
{"current_steps": 41470, "total_steps": 78105, "loss": 0.237, "lr": 2.6662439051746136e-06, "epoch": 2.654759618462326, "percentage": 53.1, "elapsed_time": "1:49:00", "remaining_time": "1:36:17", "throughput": 19956.3, "total_tokens": 130522624}
|
|
{"current_steps": 41475, "total_steps": 78105, "loss": 0.1732, "lr": 2.6656864852083758e-06, "epoch": 2.655079700403303, "percentage": 53.1, "elapsed_time": "1:49:01", "remaining_time": "1:36:16", "throughput": 19956.59, "total_tokens": 130537536}
|
|
{"current_steps": 41480, "total_steps": 78105, "loss": 0.3565, "lr": 2.6651290569686174e-06, "epoch": 2.65539978234428, "percentage": 53.11, "elapsed_time": "1:49:01", "remaining_time": "1:36:16", "throughput": 19956.91, "total_tokens": 130552960}
|
|
{"current_steps": 41485, "total_steps": 78105, "loss": 0.152, "lr": 2.6645716204831745e-06, "epoch": 2.655719864285257, "percentage": 53.11, "elapsed_time": "1:49:02", "remaining_time": "1:36:15", "throughput": 19957.32, "total_tokens": 130569856}
|
|
{"current_steps": 41490, "total_steps": 78105, "loss": 0.2822, "lr": 2.664014175779882e-06, "epoch": 2.656039946226234, "percentage": 53.12, "elapsed_time": "1:49:03", "remaining_time": "1:36:14", "throughput": 19957.61, "total_tokens": 130585024}
|
|
{"current_steps": 41495, "total_steps": 78105, "loss": 0.2037, "lr": 2.663456722886576e-06, "epoch": 2.656360028167211, "percentage": 53.13, "elapsed_time": "1:49:03", "remaining_time": "1:36:13", "throughput": 19957.92, "total_tokens": 130600384}
|
|
{"current_steps": 41500, "total_steps": 78105, "loss": 0.3533, "lr": 2.6628992618310923e-06, "epoch": 2.6566801101081876, "percentage": 53.13, "elapsed_time": "1:49:04", "remaining_time": "1:36:12", "throughput": 19958.3, "total_tokens": 130616768}
|
|
{"current_steps": 41505, "total_steps": 78105, "loss": 0.204, "lr": 2.6623417926412686e-06, "epoch": 2.6570001920491646, "percentage": 53.14, "elapsed_time": "1:49:05", "remaining_time": "1:36:11", "throughput": 19958.7, "total_tokens": 130633536}
|
|
{"current_steps": 41510, "total_steps": 78105, "loss": 0.2205, "lr": 2.6617843153449407e-06, "epoch": 2.6573202739901416, "percentage": 53.15, "elapsed_time": "1:49:05", "remaining_time": "1:36:10", "throughput": 19959.03, "total_tokens": 130649152}
|
|
{"current_steps": 41515, "total_steps": 78105, "loss": 0.199, "lr": 2.6612268299699474e-06, "epoch": 2.657640355931118, "percentage": 53.15, "elapsed_time": "1:49:06", "remaining_time": "1:36:09", "throughput": 19959.41, "total_tokens": 130665792}
|
|
{"current_steps": 41520, "total_steps": 78105, "loss": 0.2726, "lr": 2.6606693365441254e-06, "epoch": 2.657960437872095, "percentage": 53.16, "elapsed_time": "1:49:07", "remaining_time": "1:36:09", "throughput": 19959.72, "total_tokens": 130681472}
|
|
{"current_steps": 41525, "total_steps": 78105, "loss": 0.232, "lr": 2.660111835095314e-06, "epoch": 2.658280519813072, "percentage": 53.17, "elapsed_time": "1:49:07", "remaining_time": "1:36:08", "throughput": 19960.06, "total_tokens": 130697408}
|
|
{"current_steps": 41530, "total_steps": 78105, "loss": 0.2411, "lr": 2.6595543256513515e-06, "epoch": 2.658600601754049, "percentage": 53.17, "elapsed_time": "1:49:08", "remaining_time": "1:36:07", "throughput": 19960.34, "total_tokens": 130712512}
|
|
{"current_steps": 41535, "total_steps": 78105, "loss": 0.2569, "lr": 2.658996808240077e-06, "epoch": 2.658920683695026, "percentage": 53.18, "elapsed_time": "1:49:09", "remaining_time": "1:36:06", "throughput": 19960.7, "total_tokens": 130728704}
|
|
{"current_steps": 41540, "total_steps": 78105, "loss": 0.2186, "lr": 2.65843928288933e-06, "epoch": 2.659240765636003, "percentage": 53.18, "elapsed_time": "1:49:09", "remaining_time": "1:36:05", "throughput": 19961.02, "total_tokens": 130744064}
|
|
{"current_steps": 41545, "total_steps": 78105, "loss": 0.1895, "lr": 2.657881749626951e-06, "epoch": 2.6595608475769796, "percentage": 53.19, "elapsed_time": "1:49:10", "remaining_time": "1:36:04", "throughput": 19961.39, "total_tokens": 130760384}
|
|
{"current_steps": 41550, "total_steps": 78105, "loss": 0.2509, "lr": 2.6573242084807794e-06, "epoch": 2.6598809295179566, "percentage": 53.2, "elapsed_time": "1:49:11", "remaining_time": "1:36:03", "throughput": 19961.74, "total_tokens": 130776256}
|
|
{"current_steps": 41555, "total_steps": 78105, "loss": 0.2283, "lr": 2.656766659478656e-06, "epoch": 2.6602010114589336, "percentage": 53.2, "elapsed_time": "1:49:12", "remaining_time": "1:36:02", "throughput": 19962.15, "total_tokens": 130793024}
|
|
{"current_steps": 41560, "total_steps": 78105, "loss": 0.1837, "lr": 2.6562091026484226e-06, "epoch": 2.66052109339991, "percentage": 53.21, "elapsed_time": "1:49:12", "remaining_time": "1:36:02", "throughput": 19962.45, "total_tokens": 130808384}
|
|
{"current_steps": 41565, "total_steps": 78105, "loss": 0.248, "lr": 2.6556515380179203e-06, "epoch": 2.660841175340887, "percentage": 53.22, "elapsed_time": "1:49:13", "remaining_time": "1:36:01", "throughput": 19962.71, "total_tokens": 130822848}
|
|
{"current_steps": 41570, "total_steps": 78105, "loss": 0.1602, "lr": 2.6550939656149906e-06, "epoch": 2.661161257281864, "percentage": 53.22, "elapsed_time": "1:49:14", "remaining_time": "1:36:00", "throughput": 19963.05, "total_tokens": 130838784}
|
|
{"current_steps": 41575, "total_steps": 78105, "loss": 0.2316, "lr": 2.6545363854674765e-06, "epoch": 2.661481339222841, "percentage": 53.23, "elapsed_time": "1:49:14", "remaining_time": "1:35:59", "throughput": 19963.36, "total_tokens": 130854208}
|
|
{"current_steps": 41580, "total_steps": 78105, "loss": 0.2155, "lr": 2.6539787976032204e-06, "epoch": 2.661801421163818, "percentage": 53.24, "elapsed_time": "1:49:15", "remaining_time": "1:35:58", "throughput": 19963.68, "total_tokens": 130869248}
|
|
{"current_steps": 41585, "total_steps": 78105, "loss": 0.2503, "lr": 2.653421202050065e-06, "epoch": 2.6621215031047947, "percentage": 53.24, "elapsed_time": "1:49:16", "remaining_time": "1:35:57", "throughput": 19964.0, "total_tokens": 130884480}
|
|
{"current_steps": 41590, "total_steps": 78105, "loss": 0.248, "lr": 2.6528635988358537e-06, "epoch": 2.6624415850457717, "percentage": 53.25, "elapsed_time": "1:49:16", "remaining_time": "1:35:56", "throughput": 19964.29, "total_tokens": 130899200}
|
|
{"current_steps": 41595, "total_steps": 78105, "loss": 0.1598, "lr": 2.6523059879884305e-06, "epoch": 2.6627616669867487, "percentage": 53.26, "elapsed_time": "1:49:17", "remaining_time": "1:35:55", "throughput": 19964.6, "total_tokens": 130914752}
|
|
{"current_steps": 41600, "total_steps": 78105, "loss": 0.2021, "lr": 2.65174836953564e-06, "epoch": 2.6630817489277256, "percentage": 53.26, "elapsed_time": "1:49:18", "remaining_time": "1:35:54", "throughput": 19964.92, "total_tokens": 130930176}
|
|
{"current_steps": 41605, "total_steps": 78105, "loss": 0.1484, "lr": 2.651190743505326e-06, "epoch": 2.663401830868702, "percentage": 53.27, "elapsed_time": "1:49:18", "remaining_time": "1:35:53", "throughput": 19965.25, "total_tokens": 130945472}
|
|
{"current_steps": 41610, "total_steps": 78105, "loss": 0.2114, "lr": 2.6506331099253333e-06, "epoch": 2.663721912809679, "percentage": 53.27, "elapsed_time": "1:49:19", "remaining_time": "1:35:52", "throughput": 19965.53, "total_tokens": 130960128}
|
|
{"current_steps": 41615, "total_steps": 78105, "loss": 0.2003, "lr": 2.6500754688235085e-06, "epoch": 2.664041994750656, "percentage": 53.28, "elapsed_time": "1:49:19", "remaining_time": "1:35:52", "throughput": 19965.85, "total_tokens": 130975424}
|
|
{"current_steps": 41620, "total_steps": 78105, "loss": 0.2115, "lr": 2.6495178202276965e-06, "epoch": 2.664362076691633, "percentage": 53.29, "elapsed_time": "1:49:20", "remaining_time": "1:35:51", "throughput": 19966.14, "total_tokens": 130990016}
|
|
{"current_steps": 41625, "total_steps": 78105, "loss": 0.255, "lr": 2.6489601641657426e-06, "epoch": 2.66468215863261, "percentage": 53.29, "elapsed_time": "1:49:21", "remaining_time": "1:35:50", "throughput": 19966.5, "total_tokens": 131006272}
|
|
{"current_steps": 41630, "total_steps": 78105, "loss": 0.1549, "lr": 2.6484025006654952e-06, "epoch": 2.6650022405735867, "percentage": 53.3, "elapsed_time": "1:49:22", "remaining_time": "1:35:49", "throughput": 19966.87, "total_tokens": 131022720}
|
|
{"current_steps": 41635, "total_steps": 78105, "loss": 0.1778, "lr": 2.6478448297547993e-06, "epoch": 2.6653223225145637, "percentage": 53.31, "elapsed_time": "1:49:22", "remaining_time": "1:35:48", "throughput": 19967.15, "total_tokens": 131037312}
|
|
{"current_steps": 41640, "total_steps": 78105, "loss": 0.1708, "lr": 2.6472871514615026e-06, "epoch": 2.6656424044555407, "percentage": 53.31, "elapsed_time": "1:49:23", "remaining_time": "1:35:47", "throughput": 19967.49, "total_tokens": 131052928}
|
|
{"current_steps": 41645, "total_steps": 78105, "loss": 0.2943, "lr": 2.646729465813453e-06, "epoch": 2.6659624863965177, "percentage": 53.32, "elapsed_time": "1:49:23", "remaining_time": "1:35:46", "throughput": 19967.8, "total_tokens": 131068544}
|
|
{"current_steps": 41650, "total_steps": 78105, "loss": 0.2628, "lr": 2.6461717728384972e-06, "epoch": 2.666282568337494, "percentage": 53.33, "elapsed_time": "1:49:24", "remaining_time": "1:35:45", "throughput": 19968.12, "total_tokens": 131083968}
|
|
{"current_steps": 41655, "total_steps": 78105, "loss": 0.3024, "lr": 2.6456140725644856e-06, "epoch": 2.666602650278471, "percentage": 53.33, "elapsed_time": "1:49:25", "remaining_time": "1:35:44", "throughput": 19968.46, "total_tokens": 131099648}
|
|
{"current_steps": 41660, "total_steps": 78105, "loss": 0.2815, "lr": 2.6450563650192657e-06, "epoch": 2.666922732219448, "percentage": 53.34, "elapsed_time": "1:49:25", "remaining_time": "1:35:44", "throughput": 19968.76, "total_tokens": 131114496}
|
|
{"current_steps": 41665, "total_steps": 78105, "loss": 0.2378, "lr": 2.6444986502306863e-06, "epoch": 2.667242814160425, "percentage": 53.34, "elapsed_time": "1:49:26", "remaining_time": "1:35:43", "throughput": 19969.06, "total_tokens": 131129472}
|
|
{"current_steps": 41670, "total_steps": 78105, "loss": 0.2157, "lr": 2.643940928226596e-06, "epoch": 2.667562896101402, "percentage": 53.35, "elapsed_time": "1:49:27", "remaining_time": "1:35:42", "throughput": 19969.33, "total_tokens": 131144128}
|
|
{"current_steps": 41675, "total_steps": 78105, "loss": 0.2313, "lr": 2.643383199034847e-06, "epoch": 2.6678829780423787, "percentage": 53.36, "elapsed_time": "1:49:27", "remaining_time": "1:35:41", "throughput": 19969.68, "total_tokens": 131160192}
|
|
{"current_steps": 41680, "total_steps": 78105, "loss": 0.2001, "lr": 2.6428254626832872e-06, "epoch": 2.6682030599833557, "percentage": 53.36, "elapsed_time": "1:49:28", "remaining_time": "1:35:40", "throughput": 19970.03, "total_tokens": 131176128}
|
|
{"current_steps": 41685, "total_steps": 78105, "loss": 0.3001, "lr": 2.6422677191997668e-06, "epoch": 2.6685231419243327, "percentage": 53.37, "elapsed_time": "1:49:29", "remaining_time": "1:35:39", "throughput": 19970.46, "total_tokens": 131193536}
|
|
{"current_steps": 41690, "total_steps": 78105, "loss": 0.1506, "lr": 2.641709968612139e-06, "epoch": 2.6688432238653093, "percentage": 53.38, "elapsed_time": "1:49:30", "remaining_time": "1:35:38", "throughput": 19970.78, "total_tokens": 131209152}
|
|
{"current_steps": 41695, "total_steps": 78105, "loss": 0.4037, "lr": 2.641152210948253e-06, "epoch": 2.6691633058062862, "percentage": 53.38, "elapsed_time": "1:49:30", "remaining_time": "1:35:37", "throughput": 19971.11, "total_tokens": 131224704}
|
|
{"current_steps": 41700, "total_steps": 78105, "loss": 0.1781, "lr": 2.640594446235961e-06, "epoch": 2.6694833877472632, "percentage": 53.39, "elapsed_time": "1:49:31", "remaining_time": "1:35:36", "throughput": 19971.48, "total_tokens": 131240896}
|
|
{"current_steps": 41705, "total_steps": 78105, "loss": 0.2807, "lr": 2.6400366745031147e-06, "epoch": 2.6698034696882402, "percentage": 53.4, "elapsed_time": "1:49:32", "remaining_time": "1:35:36", "throughput": 19971.83, "total_tokens": 131256768}
|
|
{"current_steps": 41710, "total_steps": 78105, "loss": 0.236, "lr": 2.6394788957775657e-06, "epoch": 2.670123551629217, "percentage": 53.4, "elapsed_time": "1:49:32", "remaining_time": "1:35:35", "throughput": 19972.13, "total_tokens": 131271552}
|
|
{"current_steps": 41715, "total_steps": 78105, "loss": 0.2688, "lr": 2.6389211100871677e-06, "epoch": 2.670443633570194, "percentage": 53.41, "elapsed_time": "1:49:33", "remaining_time": "1:35:34", "throughput": 19972.42, "total_tokens": 131286784}
|
|
{"current_steps": 41720, "total_steps": 78105, "loss": 0.1437, "lr": 2.6383633174597735e-06, "epoch": 2.6707637155111708, "percentage": 53.42, "elapsed_time": "1:49:34", "remaining_time": "1:35:33", "throughput": 19972.75, "total_tokens": 131302272}
|
|
{"current_steps": 41725, "total_steps": 78105, "loss": 0.2011, "lr": 2.6378055179232354e-06, "epoch": 2.6710837974521477, "percentage": 53.42, "elapsed_time": "1:49:34", "remaining_time": "1:35:32", "throughput": 19973.2, "total_tokens": 131319360}
|
|
{"current_steps": 41730, "total_steps": 78105, "loss": 0.2875, "lr": 2.6372477115054067e-06, "epoch": 2.6714038793931247, "percentage": 53.43, "elapsed_time": "1:49:35", "remaining_time": "1:35:31", "throughput": 19973.54, "total_tokens": 131335232}
|
|
{"current_steps": 41735, "total_steps": 78105, "loss": 0.3015, "lr": 2.6366898982341433e-06, "epoch": 2.6717239613341013, "percentage": 53.43, "elapsed_time": "1:49:36", "remaining_time": "1:35:30", "throughput": 19973.82, "total_tokens": 131350272}
|
|
{"current_steps": 41740, "total_steps": 78105, "loss": 0.205, "lr": 2.636132078137298e-06, "epoch": 2.6720440432750783, "percentage": 53.44, "elapsed_time": "1:49:36", "remaining_time": "1:35:29", "throughput": 19974.24, "total_tokens": 131367296}
|
|
{"current_steps": 41745, "total_steps": 78105, "loss": 0.1604, "lr": 2.635574251242726e-06, "epoch": 2.6723641252160553, "percentage": 53.45, "elapsed_time": "1:49:37", "remaining_time": "1:35:29", "throughput": 19974.52, "total_tokens": 131382080}
|
|
{"current_steps": 41750, "total_steps": 78105, "loss": 0.1906, "lr": 2.6350164175782816e-06, "epoch": 2.6726842071570323, "percentage": 53.45, "elapsed_time": "1:49:38", "remaining_time": "1:35:28", "throughput": 19974.84, "total_tokens": 131397632}
|
|
{"current_steps": 41755, "total_steps": 78105, "loss": 0.26, "lr": 2.6344585771718214e-06, "epoch": 2.6730042890980092, "percentage": 53.46, "elapsed_time": "1:49:38", "remaining_time": "1:35:27", "throughput": 19975.28, "total_tokens": 131414720}
|
|
{"current_steps": 41760, "total_steps": 78105, "loss": 0.1476, "lr": 2.6339007300512e-06, "epoch": 2.6733243710389862, "percentage": 53.47, "elapsed_time": "1:49:39", "remaining_time": "1:35:26", "throughput": 19975.68, "total_tokens": 131431104}
|
|
{"current_steps": 41765, "total_steps": 78105, "loss": 0.2659, "lr": 2.6333428762442724e-06, "epoch": 2.673644452979963, "percentage": 53.47, "elapsed_time": "1:49:40", "remaining_time": "1:35:25", "throughput": 19975.99, "total_tokens": 131446208}
|
|
{"current_steps": 41770, "total_steps": 78105, "loss": 0.2564, "lr": 2.6327850157788976e-06, "epoch": 2.6739645349209398, "percentage": 53.48, "elapsed_time": "1:49:40", "remaining_time": "1:35:24", "throughput": 19976.34, "total_tokens": 131462080}
|
|
{"current_steps": 41775, "total_steps": 78105, "loss": 0.2235, "lr": 2.6322271486829303e-06, "epoch": 2.6742846168619168, "percentage": 53.49, "elapsed_time": "1:49:41", "remaining_time": "1:35:23", "throughput": 19976.67, "total_tokens": 131477696}
|
|
{"current_steps": 41780, "total_steps": 78105, "loss": 0.2871, "lr": 2.631669274984228e-06, "epoch": 2.6746046988028933, "percentage": 53.49, "elapsed_time": "1:49:42", "remaining_time": "1:35:22", "throughput": 19976.98, "total_tokens": 131492672}
|
|
{"current_steps": 41785, "total_steps": 78105, "loss": 0.1942, "lr": 2.631111394710648e-06, "epoch": 2.6749247807438703, "percentage": 53.5, "elapsed_time": "1:49:42", "remaining_time": "1:35:21", "throughput": 19977.37, "total_tokens": 131509184}
|
|
{"current_steps": 41790, "total_steps": 78105, "loss": 0.2044, "lr": 2.6305535078900473e-06, "epoch": 2.6752448626848473, "percentage": 53.5, "elapsed_time": "1:49:43", "remaining_time": "1:35:21", "throughput": 19977.69, "total_tokens": 131524672}
|
|
{"current_steps": 41795, "total_steps": 78105, "loss": 0.2911, "lr": 2.629995614550285e-06, "epoch": 2.6755649446258243, "percentage": 53.51, "elapsed_time": "1:49:44", "remaining_time": "1:35:20", "throughput": 19978.03, "total_tokens": 131540800}
|
|
{"current_steps": 41800, "total_steps": 78105, "loss": 0.1487, "lr": 2.6294377147192186e-06, "epoch": 2.6758850265668013, "percentage": 53.52, "elapsed_time": "1:49:44", "remaining_time": "1:35:19", "throughput": 19978.32, "total_tokens": 131555520}
|
|
{"current_steps": 41805, "total_steps": 78105, "loss": 0.2589, "lr": 2.628879808424707e-06, "epoch": 2.6762051085077783, "percentage": 53.52, "elapsed_time": "1:49:45", "remaining_time": "1:35:18", "throughput": 19978.66, "total_tokens": 131571520}
|
|
{"current_steps": 41810, "total_steps": 78105, "loss": 0.2274, "lr": 2.6283218956946096e-06, "epoch": 2.676525190448755, "percentage": 53.53, "elapsed_time": "1:49:46", "remaining_time": "1:35:17", "throughput": 19978.94, "total_tokens": 131586304}
|
|
{"current_steps": 41815, "total_steps": 78105, "loss": 0.2239, "lr": 2.6277639765567842e-06, "epoch": 2.676845272389732, "percentage": 53.54, "elapsed_time": "1:49:46", "remaining_time": "1:35:16", "throughput": 19979.23, "total_tokens": 131601472}
|
|
{"current_steps": 41820, "total_steps": 78105, "loss": 0.2437, "lr": 2.627206051039092e-06, "epoch": 2.677165354330709, "percentage": 53.54, "elapsed_time": "1:49:47", "remaining_time": "1:35:15", "throughput": 19979.61, "total_tokens": 131618176}
|
|
{"current_steps": 41825, "total_steps": 78105, "loss": 0.1405, "lr": 2.6266481191693915e-06, "epoch": 2.6774854362716853, "percentage": 53.55, "elapsed_time": "1:49:48", "remaining_time": "1:35:14", "throughput": 19979.97, "total_tokens": 131634368}
|
|
{"current_steps": 41830, "total_steps": 78105, "loss": 0.3287, "lr": 2.626090180975544e-06, "epoch": 2.6778055182126623, "percentage": 53.56, "elapsed_time": "1:49:48", "remaining_time": "1:35:13", "throughput": 19980.27, "total_tokens": 131649472}
|
|
{"current_steps": 41835, "total_steps": 78105, "loss": 0.1763, "lr": 2.62553223648541e-06, "epoch": 2.6781256001536393, "percentage": 53.56, "elapsed_time": "1:49:49", "remaining_time": "1:35:13", "throughput": 19980.52, "total_tokens": 131663552}
|
|
{"current_steps": 41840, "total_steps": 78105, "loss": 0.2578, "lr": 2.62497428572685e-06, "epoch": 2.6784456820946163, "percentage": 53.57, "elapsed_time": "1:49:50", "remaining_time": "1:35:12", "throughput": 19980.76, "total_tokens": 131677760}
|
|
{"current_steps": 41845, "total_steps": 78105, "loss": 0.3469, "lr": 2.6244163287277247e-06, "epoch": 2.6787657640355933, "percentage": 53.58, "elapsed_time": "1:49:50", "remaining_time": "1:35:11", "throughput": 19981.08, "total_tokens": 131693504}
|
|
{"current_steps": 41850, "total_steps": 78105, "loss": 0.163, "lr": 2.623858365515896e-06, "epoch": 2.67908584597657, "percentage": 53.58, "elapsed_time": "1:49:51", "remaining_time": "1:35:10", "throughput": 19981.4, "total_tokens": 131708992}
|
|
{"current_steps": 41855, "total_steps": 78105, "loss": 0.2484, "lr": 2.6233003961192255e-06, "epoch": 2.679405927917547, "percentage": 53.59, "elapsed_time": "1:49:52", "remaining_time": "1:35:09", "throughput": 19981.72, "total_tokens": 131724416}
|
|
{"current_steps": 41860, "total_steps": 78105, "loss": 0.1891, "lr": 2.6227424205655762e-06, "epoch": 2.679726009858524, "percentage": 53.59, "elapsed_time": "1:49:52", "remaining_time": "1:35:08", "throughput": 19982.02, "total_tokens": 131739264}
|
|
{"current_steps": 41865, "total_steps": 78105, "loss": 0.2357, "lr": 2.6221844388828092e-06, "epoch": 2.680046091799501, "percentage": 53.6, "elapsed_time": "1:49:53", "remaining_time": "1:35:07", "throughput": 19982.38, "total_tokens": 131755520}
|
|
{"current_steps": 41870, "total_steps": 78105, "loss": 0.1739, "lr": 2.621626451098788e-06, "epoch": 2.6803661737404774, "percentage": 53.61, "elapsed_time": "1:49:54", "remaining_time": "1:35:06", "throughput": 19982.74, "total_tokens": 131771840}
|
|
{"current_steps": 41875, "total_steps": 78105, "loss": 0.3005, "lr": 2.621068457241375e-06, "epoch": 2.6806862556814544, "percentage": 53.61, "elapsed_time": "1:49:54", "remaining_time": "1:35:05", "throughput": 19983.11, "total_tokens": 131788032}
|
|
{"current_steps": 41880, "total_steps": 78105, "loss": 0.2631, "lr": 2.6205104573384344e-06, "epoch": 2.6810063376224313, "percentage": 53.62, "elapsed_time": "1:49:55", "remaining_time": "1:35:05", "throughput": 19983.49, "total_tokens": 131804416}
|
|
{"current_steps": 41885, "total_steps": 78105, "loss": 0.1928, "lr": 2.6199524514178285e-06, "epoch": 2.6813264195634083, "percentage": 53.63, "elapsed_time": "1:49:56", "remaining_time": "1:35:04", "throughput": 19983.84, "total_tokens": 131820096}
|
|
{"current_steps": 41890, "total_steps": 78105, "loss": 0.1743, "lr": 2.619394439507423e-06, "epoch": 2.6816465015043853, "percentage": 53.63, "elapsed_time": "1:49:57", "remaining_time": "1:35:03", "throughput": 19984.18, "total_tokens": 131836032}
|
|
{"current_steps": 41895, "total_steps": 78105, "loss": 0.2193, "lr": 2.618836421635081e-06, "epoch": 2.681966583445362, "percentage": 53.64, "elapsed_time": "1:49:57", "remaining_time": "1:35:02", "throughput": 19984.48, "total_tokens": 131850816}
|
|
{"current_steps": 41900, "total_steps": 78105, "loss": 0.2378, "lr": 2.618278397828666e-06, "epoch": 2.682286665386339, "percentage": 53.65, "elapsed_time": "1:49:58", "remaining_time": "1:35:01", "throughput": 19984.8, "total_tokens": 131866112}
|
|
{"current_steps": 41905, "total_steps": 78105, "loss": 0.2459, "lr": 2.6177203681160445e-06, "epoch": 2.682606747327316, "percentage": 53.65, "elapsed_time": "1:49:58", "remaining_time": "1:35:00", "throughput": 19985.1, "total_tokens": 131881344}
|
|
{"current_steps": 41910, "total_steps": 78105, "loss": 0.2885, "lr": 2.617162332525081e-06, "epoch": 2.682926829268293, "percentage": 53.66, "elapsed_time": "1:49:59", "remaining_time": "1:34:59", "throughput": 19985.38, "total_tokens": 131896064}
|
|
{"current_steps": 41915, "total_steps": 78105, "loss": 0.1519, "lr": 2.6166042910836408e-06, "epoch": 2.6832469112092694, "percentage": 53.66, "elapsed_time": "1:50:00", "remaining_time": "1:34:58", "throughput": 19985.93, "total_tokens": 131915648}
|
|
{"current_steps": 41920, "total_steps": 78105, "loss": 0.2115, "lr": 2.6160462438195905e-06, "epoch": 2.6835669931502464, "percentage": 53.67, "elapsed_time": "1:50:01", "remaining_time": "1:34:58", "throughput": 19986.21, "total_tokens": 131930688}
|
|
{"current_steps": 41925, "total_steps": 78105, "loss": 0.2692, "lr": 2.6154881907607943e-06, "epoch": 2.6838870750912234, "percentage": 53.68, "elapsed_time": "1:50:01", "remaining_time": "1:34:57", "throughput": 19986.54, "total_tokens": 131946112}
|
|
{"current_steps": 41930, "total_steps": 78105, "loss": 0.2683, "lr": 2.61493013193512e-06, "epoch": 2.6842071570322004, "percentage": 53.68, "elapsed_time": "1:50:02", "remaining_time": "1:34:56", "throughput": 19986.91, "total_tokens": 131962304}
|
|
{"current_steps": 41935, "total_steps": 78105, "loss": 0.2109, "lr": 2.6143720673704337e-06, "epoch": 2.6845272389731774, "percentage": 53.69, "elapsed_time": "1:50:03", "remaining_time": "1:34:55", "throughput": 19987.22, "total_tokens": 131977664}
|
|
{"current_steps": 41940, "total_steps": 78105, "loss": 0.2304, "lr": 2.613813997094601e-06, "epoch": 2.684847320914154, "percentage": 53.7, "elapsed_time": "1:50:03", "remaining_time": "1:34:54", "throughput": 19987.54, "total_tokens": 131993024}
|
|
{"current_steps": 41945, "total_steps": 78105, "loss": 0.2856, "lr": 2.6132559211354907e-06, "epoch": 2.685167402855131, "percentage": 53.7, "elapsed_time": "1:50:04", "remaining_time": "1:34:53", "throughput": 19987.83, "total_tokens": 132008192}
|
|
{"current_steps": 41950, "total_steps": 78105, "loss": 0.2237, "lr": 2.61269783952097e-06, "epoch": 2.685487484796108, "percentage": 53.71, "elapsed_time": "1:50:05", "remaining_time": "1:34:52", "throughput": 19988.11, "total_tokens": 132023232}
|
|
{"current_steps": 41955, "total_steps": 78105, "loss": 0.3079, "lr": 2.612139752278906e-06, "epoch": 2.6858075667370844, "percentage": 53.72, "elapsed_time": "1:50:05", "remaining_time": "1:34:51", "throughput": 19988.46, "total_tokens": 132039552}
|
|
{"current_steps": 41960, "total_steps": 78105, "loss": 0.1774, "lr": 2.6115816594371674e-06, "epoch": 2.6861276486780614, "percentage": 53.72, "elapsed_time": "1:50:06", "remaining_time": "1:34:50", "throughput": 19988.79, "total_tokens": 132055168}
|
|
{"current_steps": 41965, "total_steps": 78105, "loss": 0.1585, "lr": 2.6110235610236207e-06, "epoch": 2.6864477306190384, "percentage": 53.73, "elapsed_time": "1:50:07", "remaining_time": "1:34:50", "throughput": 19989.09, "total_tokens": 132070464}
|
|
{"current_steps": 41970, "total_steps": 78105, "loss": 0.275, "lr": 2.6104654570661357e-06, "epoch": 2.6867678125600154, "percentage": 53.74, "elapsed_time": "1:50:07", "remaining_time": "1:34:49", "throughput": 19989.45, "total_tokens": 132086848}
|
|
{"current_steps": 41975, "total_steps": 78105, "loss": 0.3131, "lr": 2.6099073475925824e-06, "epoch": 2.6870878945009924, "percentage": 53.74, "elapsed_time": "1:50:08", "remaining_time": "1:34:48", "throughput": 19989.67, "total_tokens": 132100864}
|
|
{"current_steps": 41980, "total_steps": 78105, "loss": 0.2236, "lr": 2.609349232630828e-06, "epoch": 2.6874079764419694, "percentage": 53.75, "elapsed_time": "1:50:09", "remaining_time": "1:34:47", "throughput": 19990.0, "total_tokens": 132116672}
|
|
{"current_steps": 41985, "total_steps": 78105, "loss": 0.287, "lr": 2.6087911122087427e-06, "epoch": 2.687728058382946, "percentage": 53.75, "elapsed_time": "1:50:09", "remaining_time": "1:34:46", "throughput": 19990.41, "total_tokens": 132133632}
|
|
{"current_steps": 41990, "total_steps": 78105, "loss": 0.2718, "lr": 2.6082329863541956e-06, "epoch": 2.688048140323923, "percentage": 53.76, "elapsed_time": "1:50:10", "remaining_time": "1:34:45", "throughput": 19990.73, "total_tokens": 132149376}
|
|
{"current_steps": 41995, "total_steps": 78105, "loss": 0.1877, "lr": 2.607674855095057e-06, "epoch": 2.6883682222649, "percentage": 53.77, "elapsed_time": "1:50:11", "remaining_time": "1:34:44", "throughput": 19991.03, "total_tokens": 132164736}
|
|
{"current_steps": 42000, "total_steps": 78105, "loss": 0.2317, "lr": 2.607116718459197e-06, "epoch": 2.6886883042058765, "percentage": 53.77, "elapsed_time": "1:50:11", "remaining_time": "1:34:43", "throughput": 19991.32, "total_tokens": 132179840}
|
|
{"current_steps": 42005, "total_steps": 78105, "loss": 0.3007, "lr": 2.606558576474486e-06, "epoch": 2.6890083861468534, "percentage": 53.78, "elapsed_time": "1:50:12", "remaining_time": "1:34:42", "throughput": 19991.59, "total_tokens": 132194624}
|
|
{"current_steps": 42010, "total_steps": 78105, "loss": 0.1507, "lr": 2.6060004291687953e-06, "epoch": 2.6893284680878304, "percentage": 53.79, "elapsed_time": "1:50:13", "remaining_time": "1:34:42", "throughput": 19991.88, "total_tokens": 132209728}
|
|
{"current_steps": 42015, "total_steps": 78105, "loss": 0.1296, "lr": 2.605442276569995e-06, "epoch": 2.6896485500288074, "percentage": 53.79, "elapsed_time": "1:50:13", "remaining_time": "1:34:41", "throughput": 19992.22, "total_tokens": 132225536}
|
|
{"current_steps": 42020, "total_steps": 78105, "loss": 0.186, "lr": 2.604884118705956e-06, "epoch": 2.6899686319697844, "percentage": 53.8, "elapsed_time": "1:50:14", "remaining_time": "1:34:40", "throughput": 19992.53, "total_tokens": 132240832}
|
|
{"current_steps": 42025, "total_steps": 78105, "loss": 0.1699, "lr": 2.6043259556045507e-06, "epoch": 2.6902887139107614, "percentage": 53.81, "elapsed_time": "1:50:15", "remaining_time": "1:34:39", "throughput": 19992.88, "total_tokens": 132256512}
|
|
{"current_steps": 42030, "total_steps": 78105, "loss": 0.2736, "lr": 2.6037677872936508e-06, "epoch": 2.690608795851738, "percentage": 53.81, "elapsed_time": "1:50:15", "remaining_time": "1:34:38", "throughput": 19993.2, "total_tokens": 132272000}
|
|
{"current_steps": 42035, "total_steps": 78105, "loss": 0.2222, "lr": 2.603209613801128e-06, "epoch": 2.690928877792715, "percentage": 53.82, "elapsed_time": "1:50:16", "remaining_time": "1:34:37", "throughput": 19993.55, "total_tokens": 132288000}
|
|
{"current_steps": 42040, "total_steps": 78105, "loss": 0.2271, "lr": 2.6026514351548556e-06, "epoch": 2.691248959733692, "percentage": 53.82, "elapsed_time": "1:50:17", "remaining_time": "1:34:36", "throughput": 19993.98, "total_tokens": 132305152}
|
|
{"current_steps": 42045, "total_steps": 78105, "loss": 0.2093, "lr": 2.602093251382704e-06, "epoch": 2.6915690416746685, "percentage": 53.83, "elapsed_time": "1:50:17", "remaining_time": "1:34:35", "throughput": 19994.31, "total_tokens": 132320768}
|
|
{"current_steps": 42050, "total_steps": 78105, "loss": 0.1654, "lr": 2.601535062512548e-06, "epoch": 2.6918891236156455, "percentage": 53.84, "elapsed_time": "1:50:18", "remaining_time": "1:34:34", "throughput": 19994.62, "total_tokens": 132336256}
|
|
{"current_steps": 42055, "total_steps": 78105, "loss": 0.1773, "lr": 2.600976868572259e-06, "epoch": 2.6922092055566225, "percentage": 53.84, "elapsed_time": "1:50:19", "remaining_time": "1:34:34", "throughput": 19994.9, "total_tokens": 132351040}
|
|
{"current_steps": 42060, "total_steps": 78105, "loss": 0.2258, "lr": 2.600418669589712e-06, "epoch": 2.6925292874975995, "percentage": 53.85, "elapsed_time": "1:50:19", "remaining_time": "1:34:33", "throughput": 19995.21, "total_tokens": 132366336}
|
|
{"current_steps": 42065, "total_steps": 78105, "loss": 0.1983, "lr": 2.5998604655927794e-06, "epoch": 2.6928493694385764, "percentage": 53.86, "elapsed_time": "1:50:20", "remaining_time": "1:34:32", "throughput": 19995.48, "total_tokens": 132380864}
|
|
{"current_steps": 42070, "total_steps": 78105, "loss": 0.2047, "lr": 2.599302256609335e-06, "epoch": 2.6931694513795534, "percentage": 53.86, "elapsed_time": "1:50:21", "remaining_time": "1:34:31", "throughput": 19995.84, "total_tokens": 132397248}
|
|
{"current_steps": 42075, "total_steps": 78105, "loss": 0.1606, "lr": 2.5987440426672537e-06, "epoch": 2.69348953332053, "percentage": 53.87, "elapsed_time": "1:50:21", "remaining_time": "1:34:30", "throughput": 19996.18, "total_tokens": 132412928}
|
|
{"current_steps": 42080, "total_steps": 78105, "loss": 0.293, "lr": 2.5981858237944084e-06, "epoch": 2.693809615261507, "percentage": 53.88, "elapsed_time": "1:50:22", "remaining_time": "1:34:29", "throughput": 19996.5, "total_tokens": 132428416}
|
|
{"current_steps": 42085, "total_steps": 78105, "loss": 0.2504, "lr": 2.597627600018675e-06, "epoch": 2.694129697202484, "percentage": 53.88, "elapsed_time": "1:50:23", "remaining_time": "1:34:28", "throughput": 19996.85, "total_tokens": 132444352}
|
|
{"current_steps": 42090, "total_steps": 78105, "loss": 0.2006, "lr": 2.597069371367928e-06, "epoch": 2.6944497791434605, "percentage": 53.89, "elapsed_time": "1:50:23", "remaining_time": "1:34:27", "throughput": 19997.17, "total_tokens": 132459712}
|
|
{"current_steps": 42095, "total_steps": 78105, "loss": 0.18, "lr": 2.596511137870042e-06, "epoch": 2.6947698610844375, "percentage": 53.9, "elapsed_time": "1:50:24", "remaining_time": "1:34:27", "throughput": 19997.58, "total_tokens": 132476672}
|
|
{"current_steps": 42100, "total_steps": 78105, "loss": 0.2087, "lr": 2.595952899552893e-06, "epoch": 2.6950899430254145, "percentage": 53.9, "elapsed_time": "1:50:25", "remaining_time": "1:34:26", "throughput": 19997.91, "total_tokens": 132492224}
|
|
{"current_steps": 42105, "total_steps": 78105, "loss": 0.2058, "lr": 2.5953946564443554e-06, "epoch": 2.6954100249663915, "percentage": 53.91, "elapsed_time": "1:50:25", "remaining_time": "1:34:25", "throughput": 19998.21, "total_tokens": 132507456}
|
|
{"current_steps": 42110, "total_steps": 78105, "loss": 0.2725, "lr": 2.5948364085723055e-06, "epoch": 2.6957301069073685, "percentage": 53.91, "elapsed_time": "1:50:26", "remaining_time": "1:34:24", "throughput": 19998.54, "total_tokens": 132523392}
|
|
{"current_steps": 42115, "total_steps": 78105, "loss": 0.2302, "lr": 2.5942781559646197e-06, "epoch": 2.696050188848345, "percentage": 53.92, "elapsed_time": "1:50:27", "remaining_time": "1:34:23", "throughput": 19998.87, "total_tokens": 132538816}
|
|
{"current_steps": 42120, "total_steps": 78105, "loss": 0.1905, "lr": 2.5937198986491735e-06, "epoch": 2.696370270789322, "percentage": 53.93, "elapsed_time": "1:50:27", "remaining_time": "1:34:22", "throughput": 19999.19, "total_tokens": 132554432}
|
|
{"current_steps": 42125, "total_steps": 78105, "loss": 0.1991, "lr": 2.5931616366538444e-06, "epoch": 2.696690352730299, "percentage": 53.93, "elapsed_time": "1:50:28", "remaining_time": "1:34:21", "throughput": 19999.54, "total_tokens": 132570368}
|
|
{"current_steps": 42130, "total_steps": 78105, "loss": 0.2057, "lr": 2.592603370006508e-06, "epoch": 2.697010434671276, "percentage": 53.94, "elapsed_time": "1:50:29", "remaining_time": "1:34:20", "throughput": 19999.83, "total_tokens": 132586048}
|
|
{"current_steps": 42135, "total_steps": 78105, "loss": 0.292, "lr": 2.592045098735041e-06, "epoch": 2.6973305166122525, "percentage": 53.95, "elapsed_time": "1:50:30", "remaining_time": "1:34:19", "throughput": 20000.12, "total_tokens": 132601088}
|
|
{"current_steps": 42140, "total_steps": 78105, "loss": 0.1806, "lr": 2.5914868228673214e-06, "epoch": 2.6976505985532295, "percentage": 53.95, "elapsed_time": "1:50:30", "remaining_time": "1:34:19", "throughput": 20000.43, "total_tokens": 132616448}
|
|
{"current_steps": 42145, "total_steps": 78105, "loss": 0.2344, "lr": 2.590928542431227e-06, "epoch": 2.6979706804942065, "percentage": 53.96, "elapsed_time": "1:50:31", "remaining_time": "1:34:18", "throughput": 20000.72, "total_tokens": 132631296}
|
|
{"current_steps": 42150, "total_steps": 78105, "loss": 0.1493, "lr": 2.5903702574546346e-06, "epoch": 2.6982907624351835, "percentage": 53.97, "elapsed_time": "1:50:32", "remaining_time": "1:34:17", "throughput": 20001.13, "total_tokens": 132648256}
|
|
{"current_steps": 42155, "total_steps": 78105, "loss": 0.2154, "lr": 2.589811967965423e-06, "epoch": 2.6986108443761605, "percentage": 53.97, "elapsed_time": "1:50:32", "remaining_time": "1:34:16", "throughput": 20001.47, "total_tokens": 132664448}
|
|
{"current_steps": 42160, "total_steps": 78105, "loss": 0.2631, "lr": 2.5892536739914685e-06, "epoch": 2.698930926317137, "percentage": 53.98, "elapsed_time": "1:50:33", "remaining_time": "1:34:15", "throughput": 20001.78, "total_tokens": 132679872}
|
|
{"current_steps": 42165, "total_steps": 78105, "loss": 0.2164, "lr": 2.588695375560651e-06, "epoch": 2.699251008258114, "percentage": 53.99, "elapsed_time": "1:50:34", "remaining_time": "1:34:14", "throughput": 20002.05, "total_tokens": 132694464}
|
|
{"current_steps": 42170, "total_steps": 78105, "loss": 0.2184, "lr": 2.5881370727008474e-06, "epoch": 2.699571090199091, "percentage": 53.99, "elapsed_time": "1:50:34", "remaining_time": "1:34:13", "throughput": 20002.42, "total_tokens": 132710784}
|
|
{"current_steps": 42175, "total_steps": 78105, "loss": 0.1727, "lr": 2.5875787654399387e-06, "epoch": 2.699891172140068, "percentage": 54.0, "elapsed_time": "1:50:35", "remaining_time": "1:34:12", "throughput": 20002.78, "total_tokens": 132726976}
|
|
{"current_steps": 42180, "total_steps": 78105, "loss": 0.3153, "lr": 2.5870204538058023e-06, "epoch": 2.7002112540810446, "percentage": 54.0, "elapsed_time": "1:50:36", "remaining_time": "1:34:11", "throughput": 20003.09, "total_tokens": 132742144}
|
|
{"current_steps": 42185, "total_steps": 78105, "loss": 0.1987, "lr": 2.586462137826318e-06, "epoch": 2.7005313360220216, "percentage": 54.01, "elapsed_time": "1:50:36", "remaining_time": "1:34:11", "throughput": 20003.37, "total_tokens": 132757056}
|
|
{"current_steps": 42190, "total_steps": 78105, "loss": 0.2785, "lr": 2.5859038175293645e-06, "epoch": 2.7008514179629985, "percentage": 54.02, "elapsed_time": "1:50:37", "remaining_time": "1:34:10", "throughput": 20003.66, "total_tokens": 132772160}
|
|
{"current_steps": 42195, "total_steps": 78105, "loss": 0.2969, "lr": 2.5853454929428213e-06, "epoch": 2.7011714999039755, "percentage": 54.02, "elapsed_time": "1:50:38", "remaining_time": "1:34:09", "throughput": 20003.93, "total_tokens": 132786880}
|
|
{"current_steps": 42200, "total_steps": 78105, "loss": 0.1321, "lr": 2.584787164094569e-06, "epoch": 2.7014915818449525, "percentage": 54.03, "elapsed_time": "1:50:38", "remaining_time": "1:34:08", "throughput": 20004.44, "total_tokens": 132805760}
|
|
{"current_steps": 42205, "total_steps": 78105, "loss": 0.2687, "lr": 2.5842288310124876e-06, "epoch": 2.701811663785929, "percentage": 54.04, "elapsed_time": "1:50:39", "remaining_time": "1:34:07", "throughput": 20004.81, "total_tokens": 132822272}
|
|
{"current_steps": 42210, "total_steps": 78105, "loss": 0.1725, "lr": 2.583670493724457e-06, "epoch": 2.702131745726906, "percentage": 54.04, "elapsed_time": "1:50:40", "remaining_time": "1:34:06", "throughput": 20005.16, "total_tokens": 132837888}
|
|
{"current_steps": 42215, "total_steps": 78105, "loss": 0.187, "lr": 2.583112152258358e-06, "epoch": 2.702451827667883, "percentage": 54.05, "elapsed_time": "1:50:40", "remaining_time": "1:34:05", "throughput": 20005.41, "total_tokens": 132852544}
|
|
{"current_steps": 42220, "total_steps": 78105, "loss": 0.3097, "lr": 2.5825538066420713e-06, "epoch": 2.7027719096088596, "percentage": 54.06, "elapsed_time": "1:50:41", "remaining_time": "1:34:04", "throughput": 20005.78, "total_tokens": 132868800}
|
|
{"current_steps": 42225, "total_steps": 78105, "loss": 0.2885, "lr": 2.5819954569034767e-06, "epoch": 2.7030919915498366, "percentage": 54.06, "elapsed_time": "1:50:42", "remaining_time": "1:34:04", "throughput": 20006.07, "total_tokens": 132884160}
|
|
{"current_steps": 42230, "total_steps": 78105, "loss": 0.2297, "lr": 2.5814371030704566e-06, "epoch": 2.7034120734908136, "percentage": 54.07, "elapsed_time": "1:50:42", "remaining_time": "1:34:03", "throughput": 20006.44, "total_tokens": 132900096}
|
|
{"current_steps": 42235, "total_steps": 78105, "loss": 0.1881, "lr": 2.5808787451708915e-06, "epoch": 2.7037321554317906, "percentage": 54.07, "elapsed_time": "1:50:43", "remaining_time": "1:34:02", "throughput": 20006.77, "total_tokens": 132915392}
|
|
{"current_steps": 42240, "total_steps": 78105, "loss": 0.1998, "lr": 2.580320383232664e-06, "epoch": 2.7040522373727676, "percentage": 54.08, "elapsed_time": "1:50:44", "remaining_time": "1:34:01", "throughput": 20007.17, "total_tokens": 132932096}
|
|
{"current_steps": 42245, "total_steps": 78105, "loss": 0.1977, "lr": 2.579762017283654e-06, "epoch": 2.7043723193137446, "percentage": 54.09, "elapsed_time": "1:50:44", "remaining_time": "1:34:00", "throughput": 20007.52, "total_tokens": 132948224}
|
|
{"current_steps": 42250, "total_steps": 78105, "loss": 0.2544, "lr": 2.5792036473517444e-06, "epoch": 2.704692401254721, "percentage": 54.09, "elapsed_time": "1:50:45", "remaining_time": "1:33:59", "throughput": 20007.86, "total_tokens": 132964096}
|
|
{"current_steps": 42255, "total_steps": 78105, "loss": 0.2317, "lr": 2.578645273464817e-06, "epoch": 2.705012483195698, "percentage": 54.1, "elapsed_time": "1:50:46", "remaining_time": "1:33:58", "throughput": 20008.16, "total_tokens": 132979392}
|
|
{"current_steps": 42260, "total_steps": 78105, "loss": 0.3085, "lr": 2.578086895650755e-06, "epoch": 2.705332565136675, "percentage": 54.11, "elapsed_time": "1:50:46", "remaining_time": "1:33:57", "throughput": 20008.53, "total_tokens": 132995840}
|
|
{"current_steps": 42265, "total_steps": 78105, "loss": 0.148, "lr": 2.5775285139374396e-06, "epoch": 2.7056526470776516, "percentage": 54.11, "elapsed_time": "1:50:47", "remaining_time": "1:33:57", "throughput": 20008.85, "total_tokens": 133011840}
|
|
{"current_steps": 42270, "total_steps": 78105, "loss": 0.2201, "lr": 2.576970128352755e-06, "epoch": 2.7059727290186286, "percentage": 54.12, "elapsed_time": "1:50:48", "remaining_time": "1:33:56", "throughput": 20009.13, "total_tokens": 133027200}
|
|
{"current_steps": 42275, "total_steps": 78105, "loss": 0.1732, "lr": 2.5764117389245822e-06, "epoch": 2.7062928109596056, "percentage": 54.13, "elapsed_time": "1:50:48", "remaining_time": "1:33:55", "throughput": 20009.36, "total_tokens": 133041280}
|
|
{"current_steps": 42280, "total_steps": 78105, "loss": 0.2119, "lr": 2.5758533456808055e-06, "epoch": 2.7066128929005826, "percentage": 54.13, "elapsed_time": "1:50:49", "remaining_time": "1:33:54", "throughput": 20009.63, "total_tokens": 133056128}
|
|
{"current_steps": 42285, "total_steps": 78105, "loss": 0.2754, "lr": 2.575294948649307e-06, "epoch": 2.7069329748415596, "percentage": 54.14, "elapsed_time": "1:50:50", "remaining_time": "1:33:53", "throughput": 20009.99, "total_tokens": 133072128}
|
|
{"current_steps": 42290, "total_steps": 78105, "loss": 0.1819, "lr": 2.574736547857972e-06, "epoch": 2.7072530567825366, "percentage": 54.15, "elapsed_time": "1:50:50", "remaining_time": "1:33:52", "throughput": 20010.26, "total_tokens": 133086784}
|
|
{"current_steps": 42295, "total_steps": 78105, "loss": 0.1896, "lr": 2.574178143334683e-06, "epoch": 2.707573138723513, "percentage": 54.15, "elapsed_time": "1:50:51", "remaining_time": "1:33:51", "throughput": 20010.64, "total_tokens": 133103488}
|
|
{"current_steps": 42300, "total_steps": 78105, "loss": 0.2065, "lr": 2.573619735107324e-06, "epoch": 2.70789322066449, "percentage": 54.16, "elapsed_time": "1:50:52", "remaining_time": "1:33:50", "throughput": 20010.92, "total_tokens": 133118528}
|
|
{"current_steps": 42305, "total_steps": 78105, "loss": 0.1918, "lr": 2.5730613232037787e-06, "epoch": 2.708213302605467, "percentage": 54.16, "elapsed_time": "1:50:52", "remaining_time": "1:33:50", "throughput": 20011.33, "total_tokens": 133135360}
|
|
{"current_steps": 42310, "total_steps": 78105, "loss": 0.3044, "lr": 2.5725029076519308e-06, "epoch": 2.7085333845464437, "percentage": 54.17, "elapsed_time": "1:50:53", "remaining_time": "1:33:49", "throughput": 20011.67, "total_tokens": 133151168}
|
|
{"current_steps": 42315, "total_steps": 78105, "loss": 0.3606, "lr": 2.5719444884796653e-06, "epoch": 2.7088534664874206, "percentage": 54.18, "elapsed_time": "1:50:54", "remaining_time": "1:33:48", "throughput": 20012.01, "total_tokens": 133167232}
|
|
{"current_steps": 42320, "total_steps": 78105, "loss": 0.177, "lr": 2.5713860657148677e-06, "epoch": 2.7091735484283976, "percentage": 54.18, "elapsed_time": "1:50:55", "remaining_time": "1:33:47", "throughput": 20012.36, "total_tokens": 133183616}
|
|
{"current_steps": 42325, "total_steps": 78105, "loss": 0.2269, "lr": 2.5708276393854216e-06, "epoch": 2.7094936303693746, "percentage": 54.19, "elapsed_time": "1:50:55", "remaining_time": "1:33:46", "throughput": 20012.64, "total_tokens": 133198528}
|
|
{"current_steps": 42330, "total_steps": 78105, "loss": 0.2223, "lr": 2.5702692095192123e-06, "epoch": 2.7098137123103516, "percentage": 54.2, "elapsed_time": "1:50:56", "remaining_time": "1:33:45", "throughput": 20012.93, "total_tokens": 133213696}
|
|
{"current_steps": 42335, "total_steps": 78105, "loss": 0.2247, "lr": 2.569710776144124e-06, "epoch": 2.7101337942513286, "percentage": 54.2, "elapsed_time": "1:50:57", "remaining_time": "1:33:44", "throughput": 20013.26, "total_tokens": 133229248}
|
|
{"current_steps": 42340, "total_steps": 78105, "loss": 0.3229, "lr": 2.5691523392880428e-06, "epoch": 2.710453876192305, "percentage": 54.21, "elapsed_time": "1:50:57", "remaining_time": "1:33:43", "throughput": 20013.62, "total_tokens": 133245568}
|
|
{"current_steps": 42345, "total_steps": 78105, "loss": 0.2048, "lr": 2.5685938989788545e-06, "epoch": 2.710773958133282, "percentage": 54.22, "elapsed_time": "1:50:58", "remaining_time": "1:33:42", "throughput": 20013.94, "total_tokens": 133260992}
|
|
{"current_steps": 42350, "total_steps": 78105, "loss": 0.2256, "lr": 2.568035455244444e-06, "epoch": 2.711094040074259, "percentage": 54.22, "elapsed_time": "1:50:59", "remaining_time": "1:33:42", "throughput": 20014.27, "total_tokens": 133276544}
|
|
{"current_steps": 42355, "total_steps": 78105, "loss": 0.3146, "lr": 2.5674770081126977e-06, "epoch": 2.7114141220152357, "percentage": 54.23, "elapsed_time": "1:50:59", "remaining_time": "1:33:41", "throughput": 20014.65, "total_tokens": 133293184}
|
|
{"current_steps": 42360, "total_steps": 78105, "loss": 0.1427, "lr": 2.5669185576115004e-06, "epoch": 2.7117342039562127, "percentage": 54.23, "elapsed_time": "1:51:00", "remaining_time": "1:33:40", "throughput": 20014.96, "total_tokens": 133308352}
|
|
{"current_steps": 42365, "total_steps": 78105, "loss": 0.2569, "lr": 2.5663601037687386e-06, "epoch": 2.7120542858971897, "percentage": 54.24, "elapsed_time": "1:51:01", "remaining_time": "1:33:39", "throughput": 20015.28, "total_tokens": 133323968}
|
|
{"current_steps": 42370, "total_steps": 78105, "loss": 0.2647, "lr": 2.565801646612299e-06, "epoch": 2.7123743678381667, "percentage": 54.25, "elapsed_time": "1:51:01", "remaining_time": "1:33:38", "throughput": 20015.58, "total_tokens": 133339584}
|
|
{"current_steps": 42375, "total_steps": 78105, "loss": 0.334, "lr": 2.5652431861700683e-06, "epoch": 2.7126944497791436, "percentage": 54.25, "elapsed_time": "1:51:02", "remaining_time": "1:33:37", "throughput": 20015.93, "total_tokens": 133355584}
|
|
{"current_steps": 42380, "total_steps": 78105, "loss": 0.3551, "lr": 2.5646847224699327e-06, "epoch": 2.71301453172012, "percentage": 54.26, "elapsed_time": "1:51:03", "remaining_time": "1:33:36", "throughput": 20016.23, "total_tokens": 133370944}
|
|
{"current_steps": 42385, "total_steps": 78105, "loss": 0.2646, "lr": 2.5641262555397787e-06, "epoch": 2.713334613661097, "percentage": 54.27, "elapsed_time": "1:51:03", "remaining_time": "1:33:35", "throughput": 20016.61, "total_tokens": 133387456}
|
|
{"current_steps": 42390, "total_steps": 78105, "loss": 0.1974, "lr": 2.5635677854074942e-06, "epoch": 2.713654695602074, "percentage": 54.27, "elapsed_time": "1:51:04", "remaining_time": "1:33:35", "throughput": 20016.98, "total_tokens": 133403904}
|
|
{"current_steps": 42395, "total_steps": 78105, "loss": 0.1941, "lr": 2.563009312100965e-06, "epoch": 2.713974777543051, "percentage": 54.28, "elapsed_time": "1:51:05", "remaining_time": "1:33:34", "throughput": 20017.39, "total_tokens": 133420736}
|
|
{"current_steps": 42400, "total_steps": 78105, "loss": 0.2239, "lr": 2.5624508356480794e-06, "epoch": 2.7142948594840277, "percentage": 54.29, "elapsed_time": "1:51:05", "remaining_time": "1:33:33", "throughput": 20017.73, "total_tokens": 133436608}
|
|
{"current_steps": 42405, "total_steps": 78105, "loss": 0.2457, "lr": 2.561892356076724e-06, "epoch": 2.7146149414250047, "percentage": 54.29, "elapsed_time": "1:51:06", "remaining_time": "1:33:32", "throughput": 20018.04, "total_tokens": 133451840}
|
|
{"current_steps": 42410, "total_steps": 78105, "loss": 0.1412, "lr": 2.561333873414787e-06, "epoch": 2.7149350233659817, "percentage": 54.3, "elapsed_time": "1:51:07", "remaining_time": "1:33:31", "throughput": 20018.33, "total_tokens": 133467328}
|
|
{"current_steps": 42415, "total_steps": 78105, "loss": 0.2157, "lr": 2.560775387690156e-06, "epoch": 2.7152551053069587, "percentage": 54.31, "elapsed_time": "1:51:07", "remaining_time": "1:33:30", "throughput": 20018.65, "total_tokens": 133483264}
|
|
{"current_steps": 42420, "total_steps": 78105, "loss": 0.1953, "lr": 2.5602168989307187e-06, "epoch": 2.7155751872479357, "percentage": 54.31, "elapsed_time": "1:51:08", "remaining_time": "1:33:29", "throughput": 20019.01, "total_tokens": 133499584}
|
|
{"current_steps": 42425, "total_steps": 78105, "loss": 0.3926, "lr": 2.559658407164362e-06, "epoch": 2.715895269188912, "percentage": 54.32, "elapsed_time": "1:51:09", "remaining_time": "1:33:29", "throughput": 20019.41, "total_tokens": 133516480}
|
|
{"current_steps": 42430, "total_steps": 78105, "loss": 0.2246, "lr": 2.5590999124189754e-06, "epoch": 2.716215351129889, "percentage": 54.32, "elapsed_time": "1:51:10", "remaining_time": "1:33:28", "throughput": 20019.73, "total_tokens": 133532480}
|
|
{"current_steps": 42435, "total_steps": 78105, "loss": 0.2274, "lr": 2.558541414722448e-06, "epoch": 2.716535433070866, "percentage": 54.33, "elapsed_time": "1:51:10", "remaining_time": "1:33:27", "throughput": 20020.0, "total_tokens": 133547136}
|
|
{"current_steps": 42440, "total_steps": 78105, "loss": 0.2198, "lr": 2.5579829141026667e-06, "epoch": 2.716855515011843, "percentage": 54.34, "elapsed_time": "1:51:11", "remaining_time": "1:33:26", "throughput": 20020.31, "total_tokens": 133562880}
|
|
{"current_steps": 42445, "total_steps": 78105, "loss": 0.233, "lr": 2.5574244105875212e-06, "epoch": 2.7171755969528197, "percentage": 54.34, "elapsed_time": "1:51:12", "remaining_time": "1:33:25", "throughput": 20020.61, "total_tokens": 133578048}
|
|
{"current_steps": 42450, "total_steps": 78105, "loss": 0.2013, "lr": 2.5568659042048994e-06, "epoch": 2.7174956788937967, "percentage": 54.35, "elapsed_time": "1:51:12", "remaining_time": "1:33:24", "throughput": 20020.93, "total_tokens": 133593920}
|
|
{"current_steps": 42455, "total_steps": 78105, "loss": 0.223, "lr": 2.5563073949826907e-06, "epoch": 2.7178157608347737, "percentage": 54.36, "elapsed_time": "1:51:13", "remaining_time": "1:33:23", "throughput": 20021.49, "total_tokens": 133613376}
|
|
{"current_steps": 42460, "total_steps": 78105, "loss": 0.1625, "lr": 2.5557488829487836e-06, "epoch": 2.7181358427757507, "percentage": 54.36, "elapsed_time": "1:51:14", "remaining_time": "1:33:22", "throughput": 20021.79, "total_tokens": 133628480}
|
|
{"current_steps": 42465, "total_steps": 78105, "loss": 0.1886, "lr": 2.555190368131068e-06, "epoch": 2.7184559247167277, "percentage": 54.37, "elapsed_time": "1:51:14", "remaining_time": "1:33:22", "throughput": 20022.09, "total_tokens": 133643904}
|
|
{"current_steps": 42470, "total_steps": 78105, "loss": 0.269, "lr": 2.554631850557433e-06, "epoch": 2.7187760066577042, "percentage": 54.38, "elapsed_time": "1:51:15", "remaining_time": "1:33:21", "throughput": 20022.4, "total_tokens": 133659584}
|
|
{"current_steps": 42475, "total_steps": 78105, "loss": 0.241, "lr": 2.554073330255768e-06, "epoch": 2.7190960885986812, "percentage": 54.38, "elapsed_time": "1:51:16", "remaining_time": "1:33:20", "throughput": 20022.68, "total_tokens": 133674880}
|
|
{"current_steps": 42480, "total_steps": 78105, "loss": 0.1952, "lr": 2.553514807253962e-06, "epoch": 2.7194161705396582, "percentage": 54.39, "elapsed_time": "1:51:16", "remaining_time": "1:33:19", "throughput": 20022.99, "total_tokens": 133690368}
|
|
{"current_steps": 42485, "total_steps": 78105, "loss": 0.1502, "lr": 2.552956281579906e-06, "epoch": 2.7197362524806348, "percentage": 54.39, "elapsed_time": "1:51:17", "remaining_time": "1:33:18", "throughput": 20023.27, "total_tokens": 133705664}
|
|
{"current_steps": 42490, "total_steps": 78105, "loss": 0.27, "lr": 2.552397753261488e-06, "epoch": 2.7200563344216118, "percentage": 54.4, "elapsed_time": "1:51:18", "remaining_time": "1:33:17", "throughput": 20023.54, "total_tokens": 133720768}
|
|
{"current_steps": 42495, "total_steps": 78105, "loss": 0.1744, "lr": 2.5518392223266e-06, "epoch": 2.7203764163625888, "percentage": 54.41, "elapsed_time": "1:51:18", "remaining_time": "1:33:16", "throughput": 20023.79, "total_tokens": 133735232}
|
|
{"current_steps": 42500, "total_steps": 78105, "loss": 0.3103, "lr": 2.551280688803131e-06, "epoch": 2.7206964983035657, "percentage": 54.41, "elapsed_time": "1:51:19", "remaining_time": "1:33:15", "throughput": 20024.06, "total_tokens": 133750080}
|
|
{"current_steps": 42505, "total_steps": 78105, "loss": 0.2276, "lr": 2.550722152718972e-06, "epoch": 2.7210165802445427, "percentage": 54.42, "elapsed_time": "1:51:20", "remaining_time": "1:33:14", "throughput": 20024.44, "total_tokens": 133766464}
|
|
{"current_steps": 42510, "total_steps": 78105, "loss": 0.2621, "lr": 2.550163614102012e-06, "epoch": 2.7213366621855197, "percentage": 54.43, "elapsed_time": "1:51:20", "remaining_time": "1:33:14", "throughput": 20024.8, "total_tokens": 133782848}
|
|
{"current_steps": 42515, "total_steps": 78105, "loss": 0.1367, "lr": 2.549605072980143e-06, "epoch": 2.7216567441264963, "percentage": 54.43, "elapsed_time": "1:51:21", "remaining_time": "1:33:13", "throughput": 20025.07, "total_tokens": 133797696}
|
|
{"current_steps": 42520, "total_steps": 78105, "loss": 0.1574, "lr": 2.549046529381255e-06, "epoch": 2.7219768260674733, "percentage": 54.44, "elapsed_time": "1:51:22", "remaining_time": "1:33:12", "throughput": 20025.34, "total_tokens": 133813120}
|
|
{"current_steps": 42525, "total_steps": 78105, "loss": 0.1621, "lr": 2.5484879833332392e-06, "epoch": 2.7222969080084503, "percentage": 54.45, "elapsed_time": "1:51:22", "remaining_time": "1:33:11", "throughput": 20025.63, "total_tokens": 133828608}
|
|
{"current_steps": 42530, "total_steps": 78105, "loss": 0.2898, "lr": 2.5479294348639855e-06, "epoch": 2.722616989949427, "percentage": 54.45, "elapsed_time": "1:51:23", "remaining_time": "1:33:10", "throughput": 20025.93, "total_tokens": 133844224}
|
|
{"current_steps": 42535, "total_steps": 78105, "loss": 0.274, "lr": 2.547370884001386e-06, "epoch": 2.722937071890404, "percentage": 54.46, "elapsed_time": "1:51:24", "remaining_time": "1:33:09", "throughput": 20026.3, "total_tokens": 133860416}
|
|
{"current_steps": 42540, "total_steps": 78105, "loss": 0.2179, "lr": 2.5468123307733304e-06, "epoch": 2.723257153831381, "percentage": 54.47, "elapsed_time": "1:51:24", "remaining_time": "1:33:08", "throughput": 20026.71, "total_tokens": 133877568}
|
|
{"current_steps": 42545, "total_steps": 78105, "loss": 0.2668, "lr": 2.546253775207711e-06, "epoch": 2.7235772357723578, "percentage": 54.47, "elapsed_time": "1:51:25", "remaining_time": "1:33:08", "throughput": 20027.01, "total_tokens": 133893568}
|
|
{"current_steps": 42550, "total_steps": 78105, "loss": 0.185, "lr": 2.5456952173324197e-06, "epoch": 2.7238973177133348, "percentage": 54.48, "elapsed_time": "1:51:26", "remaining_time": "1:33:07", "throughput": 20027.32, "total_tokens": 133909120}
|
|
{"current_steps": 42555, "total_steps": 78105, "loss": 0.3923, "lr": 2.5451366571753467e-06, "epoch": 2.7242173996543118, "percentage": 54.48, "elapsed_time": "1:51:26", "remaining_time": "1:33:06", "throughput": 20027.66, "total_tokens": 133924864}
|
|
{"current_steps": 42560, "total_steps": 78105, "loss": 0.2699, "lr": 2.5445780947643842e-06, "epoch": 2.7245374815952883, "percentage": 54.49, "elapsed_time": "1:51:27", "remaining_time": "1:33:05", "throughput": 20028.01, "total_tokens": 133940672}
|
|
{"current_steps": 42565, "total_steps": 78105, "loss": 0.292, "lr": 2.5440195301274247e-06, "epoch": 2.7248575635362653, "percentage": 54.5, "elapsed_time": "1:51:28", "remaining_time": "1:33:04", "throughput": 20028.34, "total_tokens": 133956672}
|
|
{"current_steps": 42570, "total_steps": 78105, "loss": 0.2636, "lr": 2.543460963292358e-06, "epoch": 2.7251776454772423, "percentage": 54.5, "elapsed_time": "1:51:29", "remaining_time": "1:33:03", "throughput": 20028.66, "total_tokens": 133972416}
|
|
{"current_steps": 42575, "total_steps": 78105, "loss": 0.2519, "lr": 2.542902394287078e-06, "epoch": 2.725497727418219, "percentage": 54.51, "elapsed_time": "1:51:29", "remaining_time": "1:33:02", "throughput": 20029.12, "total_tokens": 133990144}
|
|
{"current_steps": 42580, "total_steps": 78105, "loss": 0.2507, "lr": 2.5423438231394754e-06, "epoch": 2.725817809359196, "percentage": 54.52, "elapsed_time": "1:51:30", "remaining_time": "1:33:02", "throughput": 20027.82, "total_tokens": 134005632}
|
|
{"current_steps": 42585, "total_steps": 78105, "loss": 0.1917, "lr": 2.5417852498774433e-06, "epoch": 2.726137891300173, "percentage": 54.52, "elapsed_time": "1:51:31", "remaining_time": "1:33:01", "throughput": 20028.13, "total_tokens": 134020928}
|
|
{"current_steps": 42590, "total_steps": 78105, "loss": 0.2271, "lr": 2.541226674528873e-06, "epoch": 2.72645797324115, "percentage": 54.53, "elapsed_time": "1:51:32", "remaining_time": "1:33:00", "throughput": 20028.45, "total_tokens": 134036928}
|
|
{"current_steps": 42595, "total_steps": 78105, "loss": 0.2658, "lr": 2.5406680971216567e-06, "epoch": 2.726778055182127, "percentage": 54.54, "elapsed_time": "1:51:33", "remaining_time": "1:32:59", "throughput": 20028.8, "total_tokens": 134053056}
|
|
{"current_steps": 42600, "total_steps": 78105, "loss": 0.2035, "lr": 2.5401095176836887e-06, "epoch": 2.727098137123104, "percentage": 54.54, "elapsed_time": "1:51:33", "remaining_time": "1:32:58", "throughput": 20029.12, "total_tokens": 134068800}
|
|
{"current_steps": 42605, "total_steps": 78105, "loss": 0.224, "lr": 2.5395509362428593e-06, "epoch": 2.7274182190640803, "percentage": 54.55, "elapsed_time": "1:51:34", "remaining_time": "1:32:57", "throughput": 20029.48, "total_tokens": 134084736}
|
|
{"current_steps": 42610, "total_steps": 78105, "loss": 0.2952, "lr": 2.538992352827063e-06, "epoch": 2.7277383010050573, "percentage": 54.55, "elapsed_time": "1:51:35", "remaining_time": "1:32:57", "throughput": 20029.83, "total_tokens": 134101056}
|
|
{"current_steps": 42615, "total_steps": 78105, "loss": 0.2335, "lr": 2.5384337674641913e-06, "epoch": 2.7280583829460343, "percentage": 54.56, "elapsed_time": "1:51:35", "remaining_time": "1:32:56", "throughput": 20030.1, "total_tokens": 134115648}
|
|
{"current_steps": 42620, "total_steps": 78105, "loss": 0.1952, "lr": 2.5378751801821375e-06, "epoch": 2.728378464887011, "percentage": 54.57, "elapsed_time": "1:51:36", "remaining_time": "1:32:55", "throughput": 20029.95, "total_tokens": 134130880}
|
|
{"current_steps": 42625, "total_steps": 78105, "loss": 0.2794, "lr": 2.5373165910087943e-06, "epoch": 2.728698546827988, "percentage": 54.57, "elapsed_time": "1:51:37", "remaining_time": "1:32:54", "throughput": 20030.29, "total_tokens": 134147072}
|
|
{"current_steps": 42630, "total_steps": 78105, "loss": 0.1391, "lr": 2.5367579999720552e-06, "epoch": 2.729018628768965, "percentage": 54.58, "elapsed_time": "1:51:37", "remaining_time": "1:32:53", "throughput": 20030.65, "total_tokens": 134163520}
|
|
{"current_steps": 42635, "total_steps": 78105, "loss": 0.1248, "lr": 2.5361994070998135e-06, "epoch": 2.729338710709942, "percentage": 54.59, "elapsed_time": "1:51:38", "remaining_time": "1:32:52", "throughput": 20031.03, "total_tokens": 134179968}
|
|
{"current_steps": 42640, "total_steps": 78105, "loss": 0.2446, "lr": 2.5356408124199616e-06, "epoch": 2.729658792650919, "percentage": 54.59, "elapsed_time": "1:51:39", "remaining_time": "1:32:51", "throughput": 20031.33, "total_tokens": 134195072}
|
|
{"current_steps": 42645, "total_steps": 78105, "loss": 0.2583, "lr": 2.5350822159603927e-06, "epoch": 2.7299788745918954, "percentage": 54.6, "elapsed_time": "1:51:39", "remaining_time": "1:32:51", "throughput": 20031.66, "total_tokens": 134210816}
|
|
{"current_steps": 42650, "total_steps": 78105, "loss": 0.1995, "lr": 2.5345236177490012e-06, "epoch": 2.7302989565328724, "percentage": 54.61, "elapsed_time": "1:51:40", "remaining_time": "1:32:50", "throughput": 20031.98, "total_tokens": 134226432}
|
|
{"current_steps": 42655, "total_steps": 78105, "loss": 0.2268, "lr": 2.53396501781368e-06, "epoch": 2.7306190384738493, "percentage": 54.61, "elapsed_time": "1:51:41", "remaining_time": "1:32:49", "throughput": 20032.28, "total_tokens": 134241536}
|
|
{"current_steps": 42660, "total_steps": 78105, "loss": 0.2936, "lr": 2.5334064161823223e-06, "epoch": 2.7309391204148263, "percentage": 54.62, "elapsed_time": "1:51:41", "remaining_time": "1:32:48", "throughput": 20032.57, "total_tokens": 134256896}
|
|
{"current_steps": 42665, "total_steps": 78105, "loss": 0.1958, "lr": 2.532847812882822e-06, "epoch": 2.731259202355803, "percentage": 54.63, "elapsed_time": "1:51:42", "remaining_time": "1:32:47", "throughput": 20032.88, "total_tokens": 134272704}
|
|
{"current_steps": 42670, "total_steps": 78105, "loss": 0.2381, "lr": 2.532289207943074e-06, "epoch": 2.73157928429678, "percentage": 54.63, "elapsed_time": "1:51:43", "remaining_time": "1:32:46", "throughput": 20033.15, "total_tokens": 134287616}
|
|
{"current_steps": 42675, "total_steps": 78105, "loss": 0.2003, "lr": 2.5317306013909703e-06, "epoch": 2.731899366237757, "percentage": 54.64, "elapsed_time": "1:51:43", "remaining_time": "1:32:45", "throughput": 20033.49, "total_tokens": 134303552}
|
|
{"current_steps": 42680, "total_steps": 78105, "loss": 0.2278, "lr": 2.5311719932544067e-06, "epoch": 2.732219448178734, "percentage": 54.64, "elapsed_time": "1:51:44", "remaining_time": "1:32:44", "throughput": 20033.85, "total_tokens": 134319936}
|
|
{"current_steps": 42685, "total_steps": 78105, "loss": 0.2232, "lr": 2.5306133835612744e-06, "epoch": 2.732539530119711, "percentage": 54.65, "elapsed_time": "1:51:45", "remaining_time": "1:32:44", "throughput": 20034.16, "total_tokens": 134335616}
|
|
{"current_steps": 42690, "total_steps": 78105, "loss": 0.2385, "lr": 2.530054772339471e-06, "epoch": 2.7328596120606874, "percentage": 54.66, "elapsed_time": "1:51:46", "remaining_time": "1:32:43", "throughput": 20034.52, "total_tokens": 134352320}
|
|
{"current_steps": 42695, "total_steps": 78105, "loss": 0.1843, "lr": 2.5294961596168876e-06, "epoch": 2.7331796940016644, "percentage": 54.66, "elapsed_time": "1:51:46", "remaining_time": "1:32:42", "throughput": 20034.8, "total_tokens": 134367104}
|
|
{"current_steps": 42700, "total_steps": 78105, "loss": 0.1649, "lr": 2.5289375454214203e-06, "epoch": 2.7334997759426414, "percentage": 54.67, "elapsed_time": "1:51:47", "remaining_time": "1:32:41", "throughput": 20035.14, "total_tokens": 134382976}
|
|
{"current_steps": 42705, "total_steps": 78105, "loss": 0.2481, "lr": 2.5283789297809626e-06, "epoch": 2.7338198578836184, "percentage": 54.68, "elapsed_time": "1:51:48", "remaining_time": "1:32:40", "throughput": 20035.45, "total_tokens": 134398528}
|
|
{"current_steps": 42710, "total_steps": 78105, "loss": 0.2089, "lr": 2.5278203127234087e-06, "epoch": 2.734139939824595, "percentage": 54.68, "elapsed_time": "1:51:48", "remaining_time": "1:32:39", "throughput": 20035.76, "total_tokens": 134414080}
|
|
{"current_steps": 42715, "total_steps": 78105, "loss": 0.2383, "lr": 2.5272616942766533e-06, "epoch": 2.734460021765572, "percentage": 54.69, "elapsed_time": "1:51:49", "remaining_time": "1:32:38", "throughput": 20036.1, "total_tokens": 134429824}
|
|
{"current_steps": 42720, "total_steps": 78105, "loss": 0.2569, "lr": 2.5267030744685907e-06, "epoch": 2.734780103706549, "percentage": 54.7, "elapsed_time": "1:51:50", "remaining_time": "1:32:37", "throughput": 20036.45, "total_tokens": 134445824}
|
|
{"current_steps": 42725, "total_steps": 78105, "loss": 0.2395, "lr": 2.5261444533271157e-06, "epoch": 2.735100185647526, "percentage": 54.7, "elapsed_time": "1:51:50", "remaining_time": "1:32:37", "throughput": 20036.78, "total_tokens": 134461376}
|
|
{"current_steps": 42730, "total_steps": 78105, "loss": 0.215, "lr": 2.5255858308801236e-06, "epoch": 2.735420267588503, "percentage": 54.71, "elapsed_time": "1:51:51", "remaining_time": "1:32:36", "throughput": 20037.08, "total_tokens": 134476992}
|
|
{"current_steps": 42735, "total_steps": 78105, "loss": 0.2526, "lr": 2.525027207155508e-06, "epoch": 2.7357403495294794, "percentage": 54.71, "elapsed_time": "1:51:52", "remaining_time": "1:32:35", "throughput": 20037.48, "total_tokens": 134493632}
|
|
{"current_steps": 42740, "total_steps": 78105, "loss": 0.2965, "lr": 2.5244685821811633e-06, "epoch": 2.7360604314704564, "percentage": 54.72, "elapsed_time": "1:51:52", "remaining_time": "1:32:34", "throughput": 20037.77, "total_tokens": 134508992}
|
|
{"current_steps": 42745, "total_steps": 78105, "loss": 0.1246, "lr": 2.523909955984986e-06, "epoch": 2.7363805134114334, "percentage": 54.73, "elapsed_time": "1:51:53", "remaining_time": "1:32:33", "throughput": 20038.09, "total_tokens": 134524608}
|
|
{"current_steps": 42750, "total_steps": 78105, "loss": 0.2433, "lr": 2.5233513285948694e-06, "epoch": 2.73670059535241, "percentage": 54.73, "elapsed_time": "1:51:54", "remaining_time": "1:32:32", "throughput": 20038.32, "total_tokens": 134538880}
|
|
{"current_steps": 42755, "total_steps": 78105, "loss": 0.2578, "lr": 2.52279270003871e-06, "epoch": 2.737020677293387, "percentage": 54.74, "elapsed_time": "1:51:54", "remaining_time": "1:32:31", "throughput": 20038.66, "total_tokens": 134554752}
|
|
{"current_steps": 42760, "total_steps": 78105, "loss": 0.3722, "lr": 2.5222340703444014e-06, "epoch": 2.737340759234364, "percentage": 54.75, "elapsed_time": "1:51:55", "remaining_time": "1:32:30", "throughput": 20038.94, "total_tokens": 134569728}
|
|
{"current_steps": 42765, "total_steps": 78105, "loss": 0.2052, "lr": 2.521675439539839e-06, "epoch": 2.737660841175341, "percentage": 54.75, "elapsed_time": "1:51:56", "remaining_time": "1:32:30", "throughput": 20039.24, "total_tokens": 134585088}
|
|
{"current_steps": 42770, "total_steps": 78105, "loss": 0.3977, "lr": 2.521116807652918e-06, "epoch": 2.737980923116318, "percentage": 54.76, "elapsed_time": "1:51:56", "remaining_time": "1:32:29", "throughput": 20039.47, "total_tokens": 134599296}
|
|
{"current_steps": 42775, "total_steps": 78105, "loss": 0.1524, "lr": 2.5205581747115337e-06, "epoch": 2.738301005057295, "percentage": 54.77, "elapsed_time": "1:51:57", "remaining_time": "1:32:28", "throughput": 20039.76, "total_tokens": 134614528}
|
|
{"current_steps": 42780, "total_steps": 78105, "loss": 0.2973, "lr": 2.519999540743581e-06, "epoch": 2.7386210869982714, "percentage": 54.77, "elapsed_time": "1:51:58", "remaining_time": "1:32:27", "throughput": 20040.01, "total_tokens": 134629312}
|
|
{"current_steps": 42785, "total_steps": 78105, "loss": 0.1537, "lr": 2.519440905776956e-06, "epoch": 2.7389411689392484, "percentage": 54.78, "elapsed_time": "1:51:58", "remaining_time": "1:32:26", "throughput": 20040.33, "total_tokens": 134644736}
|
|
{"current_steps": 42790, "total_steps": 78105, "loss": 0.2517, "lr": 2.5188822698395533e-06, "epoch": 2.7392612508802254, "percentage": 54.79, "elapsed_time": "1:51:59", "remaining_time": "1:32:25", "throughput": 20040.61, "total_tokens": 134660224}
|
|
{"current_steps": 42795, "total_steps": 78105, "loss": 0.3474, "lr": 2.518323632959268e-06, "epoch": 2.739581332821202, "percentage": 54.79, "elapsed_time": "1:52:00", "remaining_time": "1:32:24", "throughput": 20040.9, "total_tokens": 134675584}
|
|
{"current_steps": 42800, "total_steps": 78105, "loss": 0.2745, "lr": 2.5177649951639958e-06, "epoch": 2.739901414762179, "percentage": 54.8, "elapsed_time": "1:52:00", "remaining_time": "1:32:23", "throughput": 20041.3, "total_tokens": 134692416}
|
|
{"current_steps": 42805, "total_steps": 78105, "loss": 0.2483, "lr": 2.517206356481633e-06, "epoch": 2.740221496703156, "percentage": 54.8, "elapsed_time": "1:52:01", "remaining_time": "1:32:22", "throughput": 20041.59, "total_tokens": 134707392}
|
|
{"current_steps": 42810, "total_steps": 78105, "loss": 0.2671, "lr": 2.5166477169400736e-06, "epoch": 2.740541578644133, "percentage": 54.81, "elapsed_time": "1:52:02", "remaining_time": "1:32:22", "throughput": 20041.88, "total_tokens": 134722368}
|
|
{"current_steps": 42815, "total_steps": 78105, "loss": 0.2706, "lr": 2.516089076567215e-06, "epoch": 2.74086166058511, "percentage": 54.82, "elapsed_time": "1:52:02", "remaining_time": "1:32:21", "throughput": 20042.15, "total_tokens": 134737472}
|
|
{"current_steps": 42820, "total_steps": 78105, "loss": 0.2192, "lr": 2.515530435390951e-06, "epoch": 2.741181742526087, "percentage": 54.82, "elapsed_time": "1:52:03", "remaining_time": "1:32:20", "throughput": 20042.43, "total_tokens": 134752448}
|
|
{"current_steps": 42825, "total_steps": 78105, "loss": 0.2388, "lr": 2.514971793439178e-06, "epoch": 2.7415018244670635, "percentage": 54.83, "elapsed_time": "1:52:04", "remaining_time": "1:32:19", "throughput": 20042.95, "total_tokens": 134771520}
|
|
{"current_steps": 42830, "total_steps": 78105, "loss": 0.2213, "lr": 2.514413150739791e-06, "epoch": 2.7418219064080405, "percentage": 54.84, "elapsed_time": "1:52:04", "remaining_time": "1:32:18", "throughput": 20043.24, "total_tokens": 134787264}
|
|
{"current_steps": 42835, "total_steps": 78105, "loss": 0.2271, "lr": 2.513854507320687e-06, "epoch": 2.7421419883490175, "percentage": 54.84, "elapsed_time": "1:52:05", "remaining_time": "1:32:17", "throughput": 20043.59, "total_tokens": 134803584}
|
|
{"current_steps": 42840, "total_steps": 78105, "loss": 0.2145, "lr": 2.513295863209761e-06, "epoch": 2.742462070289994, "percentage": 54.85, "elapsed_time": "1:52:06", "remaining_time": "1:32:16", "throughput": 20043.96, "total_tokens": 134819904}
|
|
{"current_steps": 42845, "total_steps": 78105, "loss": 0.1872, "lr": 2.5127372184349084e-06, "epoch": 2.742782152230971, "percentage": 54.86, "elapsed_time": "1:52:06", "remaining_time": "1:32:16", "throughput": 20044.34, "total_tokens": 134836736}
|
|
{"current_steps": 42850, "total_steps": 78105, "loss": 0.1589, "lr": 2.512178573024026e-06, "epoch": 2.743102234171948, "percentage": 54.86, "elapsed_time": "1:52:07", "remaining_time": "1:32:15", "throughput": 20044.61, "total_tokens": 134851584}
|
|
{"current_steps": 42855, "total_steps": 78105, "loss": 0.148, "lr": 2.511619927005009e-06, "epoch": 2.743422316112925, "percentage": 54.87, "elapsed_time": "1:52:08", "remaining_time": "1:32:14", "throughput": 20044.92, "total_tokens": 134867136}
|
|
{"current_steps": 42860, "total_steps": 78105, "loss": 0.1865, "lr": 2.511061280405753e-06, "epoch": 2.743742398053902, "percentage": 54.87, "elapsed_time": "1:52:08", "remaining_time": "1:32:13", "throughput": 20045.31, "total_tokens": 134884288}
|
|
{"current_steps": 42865, "total_steps": 78105, "loss": 0.3706, "lr": 2.5105026332541553e-06, "epoch": 2.744062479994879, "percentage": 54.88, "elapsed_time": "1:52:09", "remaining_time": "1:32:12", "throughput": 20045.69, "total_tokens": 134901120}
|
|
{"current_steps": 42870, "total_steps": 78105, "loss": 0.2, "lr": 2.5099439855781107e-06, "epoch": 2.7443825619358555, "percentage": 54.89, "elapsed_time": "1:52:10", "remaining_time": "1:32:11", "throughput": 20046.0, "total_tokens": 134916736}
|
|
{"current_steps": 42875, "total_steps": 78105, "loss": 0.3615, "lr": 2.509385337405514e-06, "epoch": 2.7447026438768325, "percentage": 54.89, "elapsed_time": "1:52:11", "remaining_time": "1:32:10", "throughput": 20046.4, "total_tokens": 134933312}
|
|
{"current_steps": 42880, "total_steps": 78105, "loss": 0.2678, "lr": 2.5088266887642637e-06, "epoch": 2.7450227258178095, "percentage": 54.9, "elapsed_time": "1:52:11", "remaining_time": "1:32:09", "throughput": 20046.65, "total_tokens": 134948224}
|
|
{"current_steps": 42885, "total_steps": 78105, "loss": 0.2857, "lr": 2.5082680396822533e-06, "epoch": 2.745342807758786, "percentage": 54.91, "elapsed_time": "1:52:12", "remaining_time": "1:32:09", "throughput": 20047.02, "total_tokens": 134964672}
|
|
{"current_steps": 42890, "total_steps": 78105, "loss": 0.2464, "lr": 2.507709390187381e-06, "epoch": 2.745662889699763, "percentage": 54.91, "elapsed_time": "1:52:13", "remaining_time": "1:32:08", "throughput": 20047.3, "total_tokens": 134979840}
|
|
{"current_steps": 42895, "total_steps": 78105, "loss": 0.2792, "lr": 2.507150740307541e-06, "epoch": 2.74598297164074, "percentage": 54.92, "elapsed_time": "1:52:13", "remaining_time": "1:32:07", "throughput": 20047.61, "total_tokens": 134995648}
|
|
{"current_steps": 42900, "total_steps": 78105, "loss": 0.2634, "lr": 2.5065920900706313e-06, "epoch": 2.746303053581717, "percentage": 54.93, "elapsed_time": "1:52:14", "remaining_time": "1:32:06", "throughput": 20047.95, "total_tokens": 135011904}
|
|
{"current_steps": 42905, "total_steps": 78105, "loss": 0.2425, "lr": 2.5060334395045467e-06, "epoch": 2.746623135522694, "percentage": 54.93, "elapsed_time": "1:52:15", "remaining_time": "1:32:05", "throughput": 20048.36, "total_tokens": 135029440}
|
|
{"current_steps": 42910, "total_steps": 78105, "loss": 0.3343, "lr": 2.505474788637184e-06, "epoch": 2.7469432174636705, "percentage": 54.94, "elapsed_time": "1:52:15", "remaining_time": "1:32:04", "throughput": 20048.65, "total_tokens": 135044992}
|
|
{"current_steps": 42915, "total_steps": 78105, "loss": 0.1481, "lr": 2.5049161374964375e-06, "epoch": 2.7472632994046475, "percentage": 54.95, "elapsed_time": "1:52:16", "remaining_time": "1:32:03", "throughput": 20049.02, "total_tokens": 135061824}
|
|
{"current_steps": 42920, "total_steps": 78105, "loss": 0.2286, "lr": 2.5043574861102056e-06, "epoch": 2.7475833813456245, "percentage": 54.95, "elapsed_time": "1:52:17", "remaining_time": "1:32:03", "throughput": 20049.3, "total_tokens": 135076736}
|
|
{"current_steps": 42925, "total_steps": 78105, "loss": 0.4465, "lr": 2.5037988345063837e-06, "epoch": 2.7479034632866015, "percentage": 54.96, "elapsed_time": "1:52:17", "remaining_time": "1:32:02", "throughput": 20049.73, "total_tokens": 135094528}
|
|
{"current_steps": 42930, "total_steps": 78105, "loss": 0.2589, "lr": 2.503240182712868e-06, "epoch": 2.748223545227578, "percentage": 54.96, "elapsed_time": "1:52:18", "remaining_time": "1:32:01", "throughput": 20050.09, "total_tokens": 135111104}
|
|
{"current_steps": 42935, "total_steps": 78105, "loss": 0.1621, "lr": 2.5026815307575536e-06, "epoch": 2.748543627168555, "percentage": 54.97, "elapsed_time": "1:52:19", "remaining_time": "1:32:00", "throughput": 20050.39, "total_tokens": 135126336}
|
|
{"current_steps": 42940, "total_steps": 78105, "loss": 0.2015, "lr": 2.5021228786683377e-06, "epoch": 2.748863709109532, "percentage": 54.98, "elapsed_time": "1:52:19", "remaining_time": "1:31:59", "throughput": 20050.64, "total_tokens": 135140928}
|
|
{"current_steps": 42945, "total_steps": 78105, "loss": 0.1642, "lr": 2.5015642264731165e-06, "epoch": 2.749183791050509, "percentage": 54.98, "elapsed_time": "1:52:20", "remaining_time": "1:31:58", "throughput": 20050.96, "total_tokens": 135156736}
|
|
{"current_steps": 42950, "total_steps": 78105, "loss": 0.2276, "lr": 2.5010055741997862e-06, "epoch": 2.749503872991486, "percentage": 54.99, "elapsed_time": "1:52:21", "remaining_time": "1:31:57", "throughput": 20051.22, "total_tokens": 135171264}
|
|
{"current_steps": 42955, "total_steps": 78105, "loss": 0.2371, "lr": 2.500446921876242e-06, "epoch": 2.7498239549324626, "percentage": 55.0, "elapsed_time": "1:52:22", "remaining_time": "1:31:56", "throughput": 20051.62, "total_tokens": 135188096}
|
|
{"current_steps": 42960, "total_steps": 78105, "loss": 0.252, "lr": 2.4998882695303825e-06, "epoch": 2.7501440368734396, "percentage": 55.0, "elapsed_time": "1:52:22", "remaining_time": "1:31:56", "throughput": 20051.95, "total_tokens": 135203840}
|
|
{"current_steps": 42965, "total_steps": 78105, "loss": 0.2395, "lr": 2.4993296171901006e-06, "epoch": 2.7504641188144165, "percentage": 55.01, "elapsed_time": "1:52:23", "remaining_time": "1:31:55", "throughput": 20052.27, "total_tokens": 135219840}
|
|
{"current_steps": 42966, "total_steps": 78105, "eval_loss": 0.5056279301643372, "epoch": 2.750528135202612, "percentage": 55.01, "elapsed_time": "1:53:14", "remaining_time": "1:32:36", "throughput": 19901.93, "total_tokens": 135222656}
|
|
{"current_steps": 42970, "total_steps": 78105, "loss": 0.208, "lr": 2.4987709648832947e-06, "epoch": 2.7507842007553935, "percentage": 55.02, "elapsed_time": "1:53:47", "remaining_time": "1:33:02", "throughput": 19806.45, "total_tokens": 135235328}
|
|
{"current_steps": 42975, "total_steps": 78105, "loss": 0.3785, "lr": 2.49821231263786e-06, "epoch": 2.75110428269637, "percentage": 55.02, "elapsed_time": "1:53:48", "remaining_time": "1:33:01", "throughput": 19806.74, "total_tokens": 135249984}
|
|
{"current_steps": 42980, "total_steps": 78105, "loss": 0.3224, "lr": 2.4976536604816944e-06, "epoch": 2.751424364637347, "percentage": 55.03, "elapsed_time": "1:53:49", "remaining_time": "1:33:01", "throughput": 19807.04, "total_tokens": 135265024}
|
|
{"current_steps": 42985, "total_steps": 78105, "loss": 0.1517, "lr": 2.497095008442691e-06, "epoch": 2.751744446578324, "percentage": 55.03, "elapsed_time": "1:53:49", "remaining_time": "1:33:00", "throughput": 19807.48, "total_tokens": 135282368}
|
|
{"current_steps": 42990, "total_steps": 78105, "loss": 0.1871, "lr": 2.4965363565487485e-06, "epoch": 2.752064528519301, "percentage": 55.04, "elapsed_time": "1:53:50", "remaining_time": "1:32:59", "throughput": 19807.85, "total_tokens": 135298560}
|
|
{"current_steps": 42995, "total_steps": 78105, "loss": 0.2182, "lr": 2.4959777048277624e-06, "epoch": 2.752384610460278, "percentage": 55.05, "elapsed_time": "1:53:51", "remaining_time": "1:32:58", "throughput": 19808.17, "total_tokens": 135314112}
|
|
{"current_steps": 43000, "total_steps": 78105, "loss": 0.2873, "lr": 2.495419053307629e-06, "epoch": 2.7527046924012546, "percentage": 55.05, "elapsed_time": "1:53:51", "remaining_time": "1:32:57", "throughput": 19808.46, "total_tokens": 135328832}
|
|
{"current_steps": 43005, "total_steps": 78105, "loss": 0.2026, "lr": 2.494860402016244e-06, "epoch": 2.7530247743422316, "percentage": 55.06, "elapsed_time": "1:53:52", "remaining_time": "1:32:56", "throughput": 19808.74, "total_tokens": 135343360}
|
|
{"current_steps": 43010, "total_steps": 78105, "loss": 0.2787, "lr": 2.4943017509815034e-06, "epoch": 2.7533448562832086, "percentage": 55.07, "elapsed_time": "1:53:53", "remaining_time": "1:32:55", "throughput": 19809.06, "total_tokens": 135358720}
|
|
{"current_steps": 43015, "total_steps": 78105, "loss": 0.261, "lr": 2.493743100231305e-06, "epoch": 2.753664938224185, "percentage": 55.07, "elapsed_time": "1:53:53", "remaining_time": "1:32:54", "throughput": 19809.32, "total_tokens": 135373248}
|
|
{"current_steps": 43020, "total_steps": 78105, "loss": 0.2126, "lr": 2.493184449793542e-06, "epoch": 2.753985020165162, "percentage": 55.08, "elapsed_time": "1:53:54", "remaining_time": "1:32:53", "throughput": 19809.66, "total_tokens": 135388800}
|
|
{"current_steps": 43025, "total_steps": 78105, "loss": 0.3461, "lr": 2.492625799696113e-06, "epoch": 2.754305102106139, "percentage": 55.09, "elapsed_time": "1:53:55", "remaining_time": "1:32:52", "throughput": 19810.02, "total_tokens": 135404736}
|
|
{"current_steps": 43030, "total_steps": 78105, "loss": 0.155, "lr": 2.4920671499669122e-06, "epoch": 2.754625184047116, "percentage": 55.09, "elapsed_time": "1:53:55", "remaining_time": "1:32:52", "throughput": 19810.32, "total_tokens": 135419840}
|
|
{"current_steps": 43035, "total_steps": 78105, "loss": 0.2702, "lr": 2.491508500633838e-06, "epoch": 2.754945265988093, "percentage": 55.1, "elapsed_time": "1:53:56", "remaining_time": "1:32:51", "throughput": 19810.68, "total_tokens": 135435840}
|
|
{"current_steps": 43040, "total_steps": 78105, "loss": 0.2132, "lr": 2.490949851724784e-06, "epoch": 2.75526534792907, "percentage": 55.11, "elapsed_time": "1:53:57", "remaining_time": "1:32:50", "throughput": 19811.05, "total_tokens": 135451840}
|
|
{"current_steps": 43045, "total_steps": 78105, "loss": 0.2534, "lr": 2.490391203267647e-06, "epoch": 2.7555854298700466, "percentage": 55.11, "elapsed_time": "1:53:57", "remaining_time": "1:32:49", "throughput": 19811.38, "total_tokens": 135467392}
|
|
{"current_steps": 43050, "total_steps": 78105, "loss": 0.2481, "lr": 2.4898325552903247e-06, "epoch": 2.7559055118110236, "percentage": 55.12, "elapsed_time": "1:53:58", "remaining_time": "1:32:48", "throughput": 19811.65, "total_tokens": 135482048}
|
|
{"current_steps": 43055, "total_steps": 78105, "loss": 0.4394, "lr": 2.48927390782071e-06, "epoch": 2.7562255937520006, "percentage": 55.12, "elapsed_time": "1:53:59", "remaining_time": "1:32:47", "throughput": 19811.98, "total_tokens": 135497792}
|
|
{"current_steps": 43060, "total_steps": 78105, "loss": 0.2531, "lr": 2.4887152608867015e-06, "epoch": 2.756545675692977, "percentage": 55.13, "elapsed_time": "1:53:59", "remaining_time": "1:32:46", "throughput": 19812.35, "total_tokens": 135514176}
|
|
{"current_steps": 43065, "total_steps": 78105, "loss": 0.2848, "lr": 2.4881566145161934e-06, "epoch": 2.756865757633954, "percentage": 55.14, "elapsed_time": "1:54:00", "remaining_time": "1:32:45", "throughput": 19812.68, "total_tokens": 135529792}
|
|
{"current_steps": 43070, "total_steps": 78105, "loss": 0.3337, "lr": 2.4875979687370834e-06, "epoch": 2.757185839574931, "percentage": 55.14, "elapsed_time": "1:54:01", "remaining_time": "1:32:44", "throughput": 19813.03, "total_tokens": 135545664}
|
|
{"current_steps": 43075, "total_steps": 78105, "loss": 0.2029, "lr": 2.4870393235772653e-06, "epoch": 2.757505921515908, "percentage": 55.15, "elapsed_time": "1:54:01", "remaining_time": "1:32:44", "throughput": 19813.37, "total_tokens": 135561472}
|
|
{"current_steps": 43080, "total_steps": 78105, "loss": 0.2714, "lr": 2.4864806790646364e-06, "epoch": 2.757826003456885, "percentage": 55.16, "elapsed_time": "1:54:02", "remaining_time": "1:32:43", "throughput": 19813.7, "total_tokens": 135577024}
|
|
{"current_steps": 43085, "total_steps": 78105, "loss": 0.2499, "lr": 2.485922035227091e-06, "epoch": 2.758146085397862, "percentage": 55.16, "elapsed_time": "1:54:03", "remaining_time": "1:32:42", "throughput": 19814.02, "total_tokens": 135592320}
|
|
{"current_steps": 43090, "total_steps": 78105, "loss": 0.171, "lr": 2.485363392092527e-06, "epoch": 2.7584661673388386, "percentage": 55.17, "elapsed_time": "1:54:03", "remaining_time": "1:32:41", "throughput": 19814.35, "total_tokens": 135608000}
|
|
{"current_steps": 43095, "total_steps": 78105, "loss": 0.2985, "lr": 2.484804749688838e-06, "epoch": 2.7587862492798156, "percentage": 55.18, "elapsed_time": "1:54:04", "remaining_time": "1:32:40", "throughput": 19814.65, "total_tokens": 135623232}
|
|
{"current_steps": 43100, "total_steps": 78105, "loss": 0.1919, "lr": 2.484246108043921e-06, "epoch": 2.7591063312207926, "percentage": 55.18, "elapsed_time": "1:54:05", "remaining_time": "1:32:39", "throughput": 19815.02, "total_tokens": 135639744}
|
|
{"current_steps": 43105, "total_steps": 78105, "loss": 0.2325, "lr": 2.483687467185672e-06, "epoch": 2.759426413161769, "percentage": 55.19, "elapsed_time": "1:54:05", "remaining_time": "1:32:38", "throughput": 19815.33, "total_tokens": 135654976}
|
|
{"current_steps": 43110, "total_steps": 78105, "loss": 0.2432, "lr": 2.483128827141985e-06, "epoch": 2.759746495102746, "percentage": 55.19, "elapsed_time": "1:54:06", "remaining_time": "1:32:37", "throughput": 19815.77, "total_tokens": 135672192}
|
|
{"current_steps": 43115, "total_steps": 78105, "loss": 0.2071, "lr": 2.482570187940757e-06, "epoch": 2.760066577043723, "percentage": 55.2, "elapsed_time": "1:54:07", "remaining_time": "1:32:36", "throughput": 19816.08, "total_tokens": 135687808}
|
|
{"current_steps": 43120, "total_steps": 78105, "loss": 0.2047, "lr": 2.4820115496098823e-06, "epoch": 2.7603866589847, "percentage": 55.21, "elapsed_time": "1:54:08", "remaining_time": "1:32:36", "throughput": 19816.34, "total_tokens": 135702336}
|
|
{"current_steps": 43125, "total_steps": 78105, "loss": 0.1747, "lr": 2.4814529121772583e-06, "epoch": 2.760706740925677, "percentage": 55.21, "elapsed_time": "1:54:08", "remaining_time": "1:32:35", "throughput": 19816.65, "total_tokens": 135718016}
|
|
{"current_steps": 43130, "total_steps": 78105, "loss": 0.311, "lr": 2.4808942756707784e-06, "epoch": 2.761026822866654, "percentage": 55.22, "elapsed_time": "1:54:09", "remaining_time": "1:32:34", "throughput": 19816.92, "total_tokens": 135732672}
|
|
{"current_steps": 43135, "total_steps": 78105, "loss": 0.1994, "lr": 2.4803356401183397e-06, "epoch": 2.7613469048076307, "percentage": 55.23, "elapsed_time": "1:54:09", "remaining_time": "1:32:33", "throughput": 19817.2, "total_tokens": 135747648}
|
|
{"current_steps": 43140, "total_steps": 78105, "loss": 0.27, "lr": 2.479777005547836e-06, "epoch": 2.7616669867486077, "percentage": 55.23, "elapsed_time": "1:54:10", "remaining_time": "1:32:32", "throughput": 19817.49, "total_tokens": 135762944}
|
|
{"current_steps": 43145, "total_steps": 78105, "loss": 0.2364, "lr": 2.4792183719871633e-06, "epoch": 2.7619870686895847, "percentage": 55.24, "elapsed_time": "1:54:11", "remaining_time": "1:32:31", "throughput": 19817.83, "total_tokens": 135779008}
|
|
{"current_steps": 43150, "total_steps": 78105, "loss": 0.1714, "lr": 2.4786597394642162e-06, "epoch": 2.762307150630561, "percentage": 55.25, "elapsed_time": "1:54:12", "remaining_time": "1:32:30", "throughput": 19818.14, "total_tokens": 135794560}
|
|
{"current_steps": 43155, "total_steps": 78105, "loss": 0.1911, "lr": 2.478101108006892e-06, "epoch": 2.762627232571538, "percentage": 55.25, "elapsed_time": "1:54:12", "remaining_time": "1:32:29", "throughput": 19818.43, "total_tokens": 135809280}
|
|
{"current_steps": 43160, "total_steps": 78105, "loss": 0.2429, "lr": 2.4775424776430833e-06, "epoch": 2.762947314512515, "percentage": 55.26, "elapsed_time": "1:54:13", "remaining_time": "1:32:28", "throughput": 19818.71, "total_tokens": 135824192}
|
|
{"current_steps": 43165, "total_steps": 78105, "loss": 0.2434, "lr": 2.4769838484006864e-06, "epoch": 2.763267396453492, "percentage": 55.27, "elapsed_time": "1:54:14", "remaining_time": "1:32:28", "throughput": 19819.06, "total_tokens": 135840576}
|
|
{"current_steps": 43170, "total_steps": 78105, "loss": 0.2945, "lr": 2.4764252203075968e-06, "epoch": 2.763587478394469, "percentage": 55.27, "elapsed_time": "1:54:14", "remaining_time": "1:32:27", "throughput": 19819.42, "total_tokens": 135856640}
|
|
{"current_steps": 43175, "total_steps": 78105, "loss": 0.1681, "lr": 2.475866593391708e-06, "epoch": 2.7639075603354457, "percentage": 55.28, "elapsed_time": "1:54:15", "remaining_time": "1:32:26", "throughput": 19819.74, "total_tokens": 135872576}
|
|
{"current_steps": 43180, "total_steps": 78105, "loss": 0.3037, "lr": 2.475307967680917e-06, "epoch": 2.7642276422764227, "percentage": 55.28, "elapsed_time": "1:54:16", "remaining_time": "1:32:25", "throughput": 19820.03, "total_tokens": 135887680}
|
|
{"current_steps": 43185, "total_steps": 78105, "loss": 0.3184, "lr": 2.474749343203117e-06, "epoch": 2.7645477242173997, "percentage": 55.29, "elapsed_time": "1:54:16", "remaining_time": "1:32:24", "throughput": 19820.35, "total_tokens": 135903488}
|
|
{"current_steps": 43190, "total_steps": 78105, "loss": 0.1819, "lr": 2.4741907199862037e-06, "epoch": 2.7648678061583767, "percentage": 55.3, "elapsed_time": "1:54:17", "remaining_time": "1:32:23", "throughput": 19820.75, "total_tokens": 135920192}
|
|
{"current_steps": 43195, "total_steps": 78105, "loss": 0.2257, "lr": 2.4736320980580706e-06, "epoch": 2.7651878880993532, "percentage": 55.3, "elapsed_time": "1:54:18", "remaining_time": "1:32:22", "throughput": 19821.07, "total_tokens": 135935552}
|
|
{"current_steps": 43200, "total_steps": 78105, "loss": 0.1637, "lr": 2.4730734774466137e-06, "epoch": 2.76550797004033, "percentage": 55.31, "elapsed_time": "1:54:18", "remaining_time": "1:32:21", "throughput": 19821.43, "total_tokens": 135951936}
|
|
{"current_steps": 43205, "total_steps": 78105, "loss": 0.1806, "lr": 2.4725148581797264e-06, "epoch": 2.765828051981307, "percentage": 55.32, "elapsed_time": "1:54:19", "remaining_time": "1:32:20", "throughput": 19821.67, "total_tokens": 135966144}
|
|
{"current_steps": 43210, "total_steps": 78105, "loss": 0.2165, "lr": 2.4719562402853047e-06, "epoch": 2.766148133922284, "percentage": 55.32, "elapsed_time": "1:54:20", "remaining_time": "1:32:20", "throughput": 19821.97, "total_tokens": 135981568}
|
|
{"current_steps": 43215, "total_steps": 78105, "loss": 0.264, "lr": 2.471397623791242e-06, "epoch": 2.766468215863261, "percentage": 55.33, "elapsed_time": "1:54:20", "remaining_time": "1:32:19", "throughput": 19822.28, "total_tokens": 135997248}
|
|
{"current_steps": 43220, "total_steps": 78105, "loss": 0.1761, "lr": 2.4708390087254322e-06, "epoch": 2.7667882978042377, "percentage": 55.34, "elapsed_time": "1:54:21", "remaining_time": "1:32:18", "throughput": 19822.58, "total_tokens": 136012416}
|
|
{"current_steps": 43225, "total_steps": 78105, "loss": 0.2544, "lr": 2.470280395115771e-06, "epoch": 2.7671083797452147, "percentage": 55.34, "elapsed_time": "1:54:22", "remaining_time": "1:32:17", "throughput": 19822.84, "total_tokens": 136027264}
|
|
{"current_steps": 43230, "total_steps": 78105, "loss": 0.2152, "lr": 2.4697217829901526e-06, "epoch": 2.7674284616861917, "percentage": 55.35, "elapsed_time": "1:54:22", "remaining_time": "1:32:16", "throughput": 19823.1, "total_tokens": 136041600}
|
|
{"current_steps": 43235, "total_steps": 78105, "loss": 0.1551, "lr": 2.4691631723764704e-06, "epoch": 2.7677485436271687, "percentage": 55.35, "elapsed_time": "1:54:23", "remaining_time": "1:32:15", "throughput": 19823.43, "total_tokens": 136057600}
|
|
{"current_steps": 43240, "total_steps": 78105, "loss": 0.2603, "lr": 2.4686045633026183e-06, "epoch": 2.7680686255681453, "percentage": 55.36, "elapsed_time": "1:54:24", "remaining_time": "1:32:14", "throughput": 19823.79, "total_tokens": 136073856}
|
|
{"current_steps": 43245, "total_steps": 78105, "loss": 0.1746, "lr": 2.4680459557964916e-06, "epoch": 2.7683887075091222, "percentage": 55.37, "elapsed_time": "1:54:24", "remaining_time": "1:32:13", "throughput": 19824.1, "total_tokens": 136089600}
|
|
{"current_steps": 43250, "total_steps": 78105, "loss": 0.2231, "lr": 2.4674873498859827e-06, "epoch": 2.7687087894500992, "percentage": 55.37, "elapsed_time": "1:54:25", "remaining_time": "1:32:12", "throughput": 19824.6, "total_tokens": 136108224}
|
|
{"current_steps": 43255, "total_steps": 78105, "loss": 0.2962, "lr": 2.4669287455989866e-06, "epoch": 2.7690288713910762, "percentage": 55.38, "elapsed_time": "1:54:26", "remaining_time": "1:32:12", "throughput": 19824.87, "total_tokens": 136123648}
|
|
{"current_steps": 43260, "total_steps": 78105, "loss": 0.1811, "lr": 2.466370142963396e-06, "epoch": 2.769348953332053, "percentage": 55.39, "elapsed_time": "1:54:27", "remaining_time": "1:32:11", "throughput": 19825.26, "total_tokens": 136140224}
|
|
{"current_steps": 43265, "total_steps": 78105, "loss": 0.1884, "lr": 2.4658115420071065e-06, "epoch": 2.7696690352730298, "percentage": 55.39, "elapsed_time": "1:54:27", "remaining_time": "1:32:10", "throughput": 19825.63, "total_tokens": 136157056}
|
|
{"current_steps": 43270, "total_steps": 78105, "loss": 0.2015, "lr": 2.4652529427580095e-06, "epoch": 2.7699891172140068, "percentage": 55.4, "elapsed_time": "1:54:28", "remaining_time": "1:32:09", "throughput": 19825.97, "total_tokens": 136173056}
|
|
{"current_steps": 43275, "total_steps": 78105, "loss": 0.2896, "lr": 2.4646943452439993e-06, "epoch": 2.7703091991549837, "percentage": 55.41, "elapsed_time": "1:54:29", "remaining_time": "1:32:08", "throughput": 19826.24, "total_tokens": 136187648}
|
|
{"current_steps": 43280, "total_steps": 78105, "loss": 0.2189, "lr": 2.464135749492971e-06, "epoch": 2.7706292810959603, "percentage": 55.41, "elapsed_time": "1:54:29", "remaining_time": "1:32:07", "throughput": 19826.53, "total_tokens": 136202880}
|
|
{"current_steps": 43285, "total_steps": 78105, "loss": 0.1735, "lr": 2.463577155532815e-06, "epoch": 2.7709493630369373, "percentage": 55.42, "elapsed_time": "1:54:30", "remaining_time": "1:32:06", "throughput": 19826.81, "total_tokens": 136218048}
|
|
{"current_steps": 43290, "total_steps": 78105, "loss": 0.2068, "lr": 2.463018563391427e-06, "epoch": 2.7712694449779143, "percentage": 55.43, "elapsed_time": "1:54:31", "remaining_time": "1:32:05", "throughput": 19827.09, "total_tokens": 136232960}
|
|
{"current_steps": 43295, "total_steps": 78105, "loss": 0.151, "lr": 2.462459973096699e-06, "epoch": 2.7715895269188913, "percentage": 55.43, "elapsed_time": "1:54:31", "remaining_time": "1:32:04", "throughput": 19827.46, "total_tokens": 136248896}
|
|
{"current_steps": 43300, "total_steps": 78105, "loss": 0.2065, "lr": 2.461901384676525e-06, "epoch": 2.7719096088598683, "percentage": 55.44, "elapsed_time": "1:54:32", "remaining_time": "1:32:04", "throughput": 19827.78, "total_tokens": 136264128}
|
|
{"current_steps": 43305, "total_steps": 78105, "loss": 0.228, "lr": 2.4613427981587967e-06, "epoch": 2.7722296908008452, "percentage": 55.44, "elapsed_time": "1:54:33", "remaining_time": "1:32:03", "throughput": 19828.09, "total_tokens": 136279232}
|
|
{"current_steps": 43310, "total_steps": 78105, "loss": 0.1915, "lr": 2.460784213571408e-06, "epoch": 2.772549772741822, "percentage": 55.45, "elapsed_time": "1:54:33", "remaining_time": "1:32:02", "throughput": 19828.48, "total_tokens": 136295872}
|
|
{"current_steps": 43315, "total_steps": 78105, "loss": 0.2784, "lr": 2.460225630942251e-06, "epoch": 2.772869854682799, "percentage": 55.46, "elapsed_time": "1:54:34", "remaining_time": "1:32:01", "throughput": 19828.8, "total_tokens": 136311360}
|
|
{"current_steps": 43320, "total_steps": 78105, "loss": 0.2536, "lr": 2.45966705029922e-06, "epoch": 2.7731899366237758, "percentage": 55.46, "elapsed_time": "1:54:35", "remaining_time": "1:32:00", "throughput": 19829.13, "total_tokens": 136326848}
|
|
{"current_steps": 43325, "total_steps": 78105, "loss": 0.2467, "lr": 2.459108471670205e-06, "epoch": 2.7735100185647523, "percentage": 55.47, "elapsed_time": "1:54:35", "remaining_time": "1:31:59", "throughput": 19829.46, "total_tokens": 136342400}
|
|
{"current_steps": 43330, "total_steps": 78105, "loss": 0.2912, "lr": 2.4585498950831012e-06, "epoch": 2.7738301005057293, "percentage": 55.48, "elapsed_time": "1:54:36", "remaining_time": "1:31:58", "throughput": 19829.77, "total_tokens": 136358016}
|
|
{"current_steps": 43335, "total_steps": 78105, "loss": 0.2382, "lr": 2.457991320565799e-06, "epoch": 2.7741501824467063, "percentage": 55.48, "elapsed_time": "1:54:37", "remaining_time": "1:31:57", "throughput": 19830.12, "total_tokens": 136373824}
|
|
{"current_steps": 43340, "total_steps": 78105, "loss": 0.2367, "lr": 2.457432748146191e-06, "epoch": 2.7744702643876833, "percentage": 55.49, "elapsed_time": "1:54:37", "remaining_time": "1:31:57", "throughput": 19830.56, "total_tokens": 136390976}
|
|
{"current_steps": 43345, "total_steps": 78105, "loss": 0.3068, "lr": 2.45687417785217e-06, "epoch": 2.7747903463286603, "percentage": 55.5, "elapsed_time": "1:54:38", "remaining_time": "1:31:56", "throughput": 19830.87, "total_tokens": 136406336}
|
|
{"current_steps": 43350, "total_steps": 78105, "loss": 0.3203, "lr": 2.4563156097116282e-06, "epoch": 2.7751104282696373, "percentage": 55.5, "elapsed_time": "1:54:39", "remaining_time": "1:31:55", "throughput": 19831.19, "total_tokens": 136421888}
|
|
{"current_steps": 43355, "total_steps": 78105, "loss": 0.1842, "lr": 2.455757043752458e-06, "epoch": 2.775430510210614, "percentage": 55.51, "elapsed_time": "1:54:39", "remaining_time": "1:31:54", "throughput": 19831.5, "total_tokens": 136437120}
|
|
{"current_steps": 43360, "total_steps": 78105, "loss": 0.2768, "lr": 2.4551984800025494e-06, "epoch": 2.775750592151591, "percentage": 55.52, "elapsed_time": "1:54:40", "remaining_time": "1:31:53", "throughput": 19831.82, "total_tokens": 136452608}
|
|
{"current_steps": 43365, "total_steps": 78105, "loss": 0.1828, "lr": 2.4546399184897963e-06, "epoch": 2.776070674092568, "percentage": 55.52, "elapsed_time": "1:54:41", "remaining_time": "1:31:52", "throughput": 19832.1, "total_tokens": 136467584}
|
|
{"current_steps": 43370, "total_steps": 78105, "loss": 0.2901, "lr": 2.4540813592420885e-06, "epoch": 2.7763907560335443, "percentage": 55.53, "elapsed_time": "1:54:41", "remaining_time": "1:31:51", "throughput": 19832.44, "total_tokens": 136483584}
|
|
{"current_steps": 43375, "total_steps": 78105, "loss": 0.2584, "lr": 2.4535228022873193e-06, "epoch": 2.7767108379745213, "percentage": 55.53, "elapsed_time": "1:54:42", "remaining_time": "1:31:50", "throughput": 19832.76, "total_tokens": 136498816}
|
|
{"current_steps": 43380, "total_steps": 78105, "loss": 0.3089, "lr": 2.4529642476533784e-06, "epoch": 2.7770309199154983, "percentage": 55.54, "elapsed_time": "1:54:43", "remaining_time": "1:31:49", "throughput": 19833.09, "total_tokens": 136514368}
|
|
{"current_steps": 43385, "total_steps": 78105, "loss": 0.2567, "lr": 2.4524056953681595e-06, "epoch": 2.7773510018564753, "percentage": 55.55, "elapsed_time": "1:54:43", "remaining_time": "1:31:48", "throughput": 19833.41, "total_tokens": 136530240}
|
|
{"current_steps": 43390, "total_steps": 78105, "loss": 0.2025, "lr": 2.4518471454595503e-06, "epoch": 2.7776710837974523, "percentage": 55.55, "elapsed_time": "1:54:44", "remaining_time": "1:31:48", "throughput": 19833.7, "total_tokens": 136545216}
|
|
{"current_steps": 43395, "total_steps": 78105, "loss": 0.2924, "lr": 2.4512885979554445e-06, "epoch": 2.7779911657384293, "percentage": 55.56, "elapsed_time": "1:54:45", "remaining_time": "1:31:47", "throughput": 19834.01, "total_tokens": 136560576}
|
|
{"current_steps": 43400, "total_steps": 78105, "loss": 0.3014, "lr": 2.450730052883733e-06, "epoch": 2.778311247679406, "percentage": 55.57, "elapsed_time": "1:54:45", "remaining_time": "1:31:46", "throughput": 19834.27, "total_tokens": 136574912}
|
|
{"current_steps": 43405, "total_steps": 78105, "loss": 0.1743, "lr": 2.450171510272305e-06, "epoch": 2.778631329620383, "percentage": 55.57, "elapsed_time": "1:54:46", "remaining_time": "1:31:45", "throughput": 19834.58, "total_tokens": 136590336}
|
|
{"current_steps": 43410, "total_steps": 78105, "loss": 0.2105, "lr": 2.4496129701490537e-06, "epoch": 2.77895141156136, "percentage": 55.58, "elapsed_time": "1:54:47", "remaining_time": "1:31:44", "throughput": 19835.02, "total_tokens": 136607616}
|
|
{"current_steps": 43415, "total_steps": 78105, "loss": 0.1322, "lr": 2.4490544325418667e-06, "epoch": 2.7792714935023364, "percentage": 55.59, "elapsed_time": "1:54:47", "remaining_time": "1:31:43", "throughput": 19835.45, "total_tokens": 136624768}
|
|
{"current_steps": 43420, "total_steps": 78105, "loss": 0.1722, "lr": 2.4484958974786378e-06, "epoch": 2.7795915754433134, "percentage": 55.59, "elapsed_time": "1:54:48", "remaining_time": "1:31:42", "throughput": 19835.81, "total_tokens": 136641280}
|
|
{"current_steps": 43425, "total_steps": 78105, "loss": 0.2395, "lr": 2.447937364987254e-06, "epoch": 2.7799116573842904, "percentage": 55.6, "elapsed_time": "1:54:49", "remaining_time": "1:31:41", "throughput": 19836.09, "total_tokens": 136656000}
|
|
{"current_steps": 43430, "total_steps": 78105, "loss": 0.2012, "lr": 2.447378835095607e-06, "epoch": 2.7802317393252673, "percentage": 55.6, "elapsed_time": "1:54:49", "remaining_time": "1:31:41", "throughput": 19836.43, "total_tokens": 136671680}
|
|
{"current_steps": 43435, "total_steps": 78105, "loss": 0.2567, "lr": 2.4468203078315874e-06, "epoch": 2.7805518212662443, "percentage": 55.61, "elapsed_time": "1:54:50", "remaining_time": "1:31:40", "throughput": 19836.7, "total_tokens": 136686400}
|
|
{"current_steps": 43440, "total_steps": 78105, "loss": 0.2767, "lr": 2.446261783223085e-06, "epoch": 2.780871903207221, "percentage": 55.62, "elapsed_time": "1:54:51", "remaining_time": "1:31:39", "throughput": 19837.04, "total_tokens": 136702272}
|
|
{"current_steps": 43445, "total_steps": 78105, "loss": 0.3365, "lr": 2.4457032612979883e-06, "epoch": 2.781191985148198, "percentage": 55.62, "elapsed_time": "1:54:51", "remaining_time": "1:31:38", "throughput": 19837.38, "total_tokens": 136718144}
|
|
{"current_steps": 43450, "total_steps": 78105, "loss": 0.201, "lr": 2.4451447420841883e-06, "epoch": 2.781512067089175, "percentage": 55.63, "elapsed_time": "1:54:52", "remaining_time": "1:31:37", "throughput": 19837.73, "total_tokens": 136734016}
|
|
{"current_steps": 43455, "total_steps": 78105, "loss": 0.2646, "lr": 2.444586225609574e-06, "epoch": 2.781832149030152, "percentage": 55.64, "elapsed_time": "1:54:53", "remaining_time": "1:31:36", "throughput": 19838.11, "total_tokens": 136750656}
|
|
{"current_steps": 43460, "total_steps": 78105, "loss": 0.1631, "lr": 2.4440277119020347e-06, "epoch": 2.7821522309711284, "percentage": 55.64, "elapsed_time": "1:54:53", "remaining_time": "1:31:35", "throughput": 19838.38, "total_tokens": 136765504}
|
|
{"current_steps": 43465, "total_steps": 78105, "loss": 0.2374, "lr": 2.443469200989461e-06, "epoch": 2.7824723129121054, "percentage": 55.65, "elapsed_time": "1:54:54", "remaining_time": "1:31:34", "throughput": 19838.66, "total_tokens": 136780416}
|
|
{"current_steps": 43470, "total_steps": 78105, "loss": 0.3309, "lr": 2.44291069289974e-06, "epoch": 2.7827923948530824, "percentage": 55.66, "elapsed_time": "1:54:55", "remaining_time": "1:31:33", "throughput": 19839.01, "total_tokens": 136796544}
|
|
{"current_steps": 43475, "total_steps": 78105, "loss": 0.2476, "lr": 2.4423521876607628e-06, "epoch": 2.7831124767940594, "percentage": 55.66, "elapsed_time": "1:54:56", "remaining_time": "1:31:33", "throughput": 19839.36, "total_tokens": 136812672}
|
|
{"current_steps": 43480, "total_steps": 78105, "loss": 0.2369, "lr": 2.441793685300416e-06, "epoch": 2.7834325587350364, "percentage": 55.67, "elapsed_time": "1:54:56", "remaining_time": "1:31:32", "throughput": 19839.68, "total_tokens": 136828224}
|
|
{"current_steps": 43485, "total_steps": 78105, "loss": 0.2058, "lr": 2.441235185846589e-06, "epoch": 2.783752640676013, "percentage": 55.68, "elapsed_time": "1:54:57", "remaining_time": "1:31:31", "throughput": 19839.93, "total_tokens": 136842624}
|
|
{"current_steps": 43490, "total_steps": 78105, "loss": 0.1918, "lr": 2.4406766893271714e-06, "epoch": 2.78407272261699, "percentage": 55.68, "elapsed_time": "1:54:58", "remaining_time": "1:31:30", "throughput": 19840.25, "total_tokens": 136858368}
|
|
{"current_steps": 43495, "total_steps": 78105, "loss": 0.2121, "lr": 2.4401181957700514e-06, "epoch": 2.784392804557967, "percentage": 55.69, "elapsed_time": "1:54:58", "remaining_time": "1:31:29", "throughput": 19840.51, "total_tokens": 136872704}
|
|
{"current_steps": 43500, "total_steps": 78105, "loss": 0.2706, "lr": 2.4395597052031157e-06, "epoch": 2.784712886498944, "percentage": 55.69, "elapsed_time": "1:54:59", "remaining_time": "1:31:28", "throughput": 19840.88, "total_tokens": 136889088}
|
|
{"current_steps": 43505, "total_steps": 78105, "loss": 0.2903, "lr": 2.439001217654255e-06, "epoch": 2.7850329684399204, "percentage": 55.7, "elapsed_time": "1:55:00", "remaining_time": "1:31:27", "throughput": 19841.24, "total_tokens": 136904960}
|
|
{"current_steps": 43510, "total_steps": 78105, "loss": 0.1893, "lr": 2.438442733151354e-06, "epoch": 2.7853530503808974, "percentage": 55.71, "elapsed_time": "1:55:00", "remaining_time": "1:31:26", "throughput": 19841.5, "total_tokens": 136919360}
|
|
{"current_steps": 43515, "total_steps": 78105, "loss": 0.1027, "lr": 2.437884251722303e-06, "epoch": 2.7856731323218744, "percentage": 55.71, "elapsed_time": "1:55:01", "remaining_time": "1:31:25", "throughput": 19841.77, "total_tokens": 136934208}
|
|
{"current_steps": 43520, "total_steps": 78105, "loss": 0.1721, "lr": 2.437325773394989e-06, "epoch": 2.7859932142628514, "percentage": 55.72, "elapsed_time": "1:55:01", "remaining_time": "1:31:24", "throughput": 19842.12, "total_tokens": 136950208}
|
|
{"current_steps": 43525, "total_steps": 78105, "loss": 0.2443, "lr": 2.436767298197299e-06, "epoch": 2.7863132962038284, "percentage": 55.73, "elapsed_time": "1:55:02", "remaining_time": "1:31:24", "throughput": 19842.47, "total_tokens": 136966208}
|
|
{"current_steps": 43530, "total_steps": 78105, "loss": 0.2573, "lr": 2.4362088261571218e-06, "epoch": 2.786633378144805, "percentage": 55.73, "elapsed_time": "1:55:03", "remaining_time": "1:31:23", "throughput": 19842.84, "total_tokens": 136982528}
|
|
{"current_steps": 43535, "total_steps": 78105, "loss": 0.2591, "lr": 2.4356503573023425e-06, "epoch": 2.786953460085782, "percentage": 55.74, "elapsed_time": "1:55:04", "remaining_time": "1:31:22", "throughput": 19843.07, "total_tokens": 136996928}
|
|
{"current_steps": 43540, "total_steps": 78105, "loss": 0.2069, "lr": 2.4350918916608497e-06, "epoch": 2.787273542026759, "percentage": 55.75, "elapsed_time": "1:55:04", "remaining_time": "1:31:21", "throughput": 19843.36, "total_tokens": 137011840}
|
|
{"current_steps": 43545, "total_steps": 78105, "loss": 0.2047, "lr": 2.4345334292605293e-06, "epoch": 2.7875936239677355, "percentage": 55.75, "elapsed_time": "1:55:05", "remaining_time": "1:31:20", "throughput": 19843.67, "total_tokens": 137027072}
|
|
{"current_steps": 43550, "total_steps": 78105, "loss": 0.2738, "lr": 2.433974970129269e-06, "epoch": 2.7879137059087125, "percentage": 55.76, "elapsed_time": "1:55:05", "remaining_time": "1:31:19", "throughput": 19843.96, "total_tokens": 137041856}
|
|
{"current_steps": 43555, "total_steps": 78105, "loss": 0.197, "lr": 2.4334165142949545e-06, "epoch": 2.7882337878496894, "percentage": 55.76, "elapsed_time": "1:55:06", "remaining_time": "1:31:18", "throughput": 19844.31, "total_tokens": 137058176}
|
|
{"current_steps": 43560, "total_steps": 78105, "loss": 0.2104, "lr": 2.432858061785473e-06, "epoch": 2.7885538697906664, "percentage": 55.77, "elapsed_time": "1:55:07", "remaining_time": "1:31:17", "throughput": 19844.64, "total_tokens": 137073920}
|
|
{"current_steps": 43565, "total_steps": 78105, "loss": 0.3731, "lr": 2.432299612628709e-06, "epoch": 2.7888739517316434, "percentage": 55.78, "elapsed_time": "1:55:08", "remaining_time": "1:31:16", "throughput": 19845.05, "total_tokens": 137091008}
|
|
{"current_steps": 43570, "total_steps": 78105, "loss": 0.2411, "lr": 2.4317411668525505e-06, "epoch": 2.7891940336726204, "percentage": 55.78, "elapsed_time": "1:55:08", "remaining_time": "1:31:16", "throughput": 19845.35, "total_tokens": 137106112}
|
|
{"current_steps": 43575, "total_steps": 78105, "loss": 0.3352, "lr": 2.4311827244848824e-06, "epoch": 2.789514115613597, "percentage": 55.79, "elapsed_time": "1:55:09", "remaining_time": "1:31:15", "throughput": 19845.65, "total_tokens": 137120960}
|
|
{"current_steps": 43580, "total_steps": 78105, "loss": 0.1853, "lr": 2.43062428555359e-06, "epoch": 2.789834197554574, "percentage": 55.8, "elapsed_time": "1:55:10", "remaining_time": "1:31:14", "throughput": 19845.99, "total_tokens": 137136896}
|
|
{"current_steps": 43585, "total_steps": 78105, "loss": 0.1409, "lr": 2.430065850086561e-06, "epoch": 2.790154279495551, "percentage": 55.8, "elapsed_time": "1:55:10", "remaining_time": "1:31:13", "throughput": 19846.29, "total_tokens": 137152448}
|
|
{"current_steps": 43590, "total_steps": 78105, "loss": 0.1979, "lr": 2.4295074181116777e-06, "epoch": 2.7904743614365275, "percentage": 55.81, "elapsed_time": "1:55:11", "remaining_time": "1:31:12", "throughput": 19846.69, "total_tokens": 137169472}
|
|
{"current_steps": 43595, "total_steps": 78105, "loss": 0.2905, "lr": 2.4289489896568273e-06, "epoch": 2.7907944433775045, "percentage": 55.82, "elapsed_time": "1:55:12", "remaining_time": "1:31:11", "throughput": 19847.03, "total_tokens": 137185344}
|
|
{"current_steps": 43600, "total_steps": 78105, "loss": 0.2394, "lr": 2.4283905647498943e-06, "epoch": 2.7911145253184815, "percentage": 55.82, "elapsed_time": "1:55:12", "remaining_time": "1:31:10", "throughput": 19847.47, "total_tokens": 137202816}
|
|
{"current_steps": 43605, "total_steps": 78105, "loss": 0.172, "lr": 2.427832143418764e-06, "epoch": 2.7914346072594585, "percentage": 55.83, "elapsed_time": "1:55:13", "remaining_time": "1:31:09", "throughput": 19847.79, "total_tokens": 137218432}
|
|
{"current_steps": 43610, "total_steps": 78105, "loss": 0.3095, "lr": 2.4272737256913194e-06, "epoch": 2.7917546892004355, "percentage": 55.84, "elapsed_time": "1:55:14", "remaining_time": "1:31:09", "throughput": 19848.23, "total_tokens": 137235776}
|
|
{"current_steps": 43615, "total_steps": 78105, "loss": 0.2425, "lr": 2.4267153115954474e-06, "epoch": 2.7920747711414124, "percentage": 55.84, "elapsed_time": "1:55:14", "remaining_time": "1:31:08", "throughput": 19848.55, "total_tokens": 137251328}
|
|
{"current_steps": 43620, "total_steps": 78105, "loss": 0.1534, "lr": 2.4261569011590296e-06, "epoch": 2.792394853082389, "percentage": 55.85, "elapsed_time": "1:55:15", "remaining_time": "1:31:07", "throughput": 19848.82, "total_tokens": 137266176}
|
|
{"current_steps": 43625, "total_steps": 78105, "loss": 0.2048, "lr": 2.425598494409952e-06, "epoch": 2.792714935023366, "percentage": 55.85, "elapsed_time": "1:55:16", "remaining_time": "1:31:06", "throughput": 19849.13, "total_tokens": 137281600}
|
|
{"current_steps": 43630, "total_steps": 78105, "loss": 0.2732, "lr": 2.4250400913760982e-06, "epoch": 2.793035016964343, "percentage": 55.86, "elapsed_time": "1:55:16", "remaining_time": "1:31:05", "throughput": 19849.42, "total_tokens": 137296448}
|
|
{"current_steps": 43635, "total_steps": 78105, "loss": 0.2189, "lr": 2.4244816920853516e-06, "epoch": 2.7933550989053195, "percentage": 55.87, "elapsed_time": "1:55:17", "remaining_time": "1:31:04", "throughput": 19849.83, "total_tokens": 137313152}
|
|
{"current_steps": 43640, "total_steps": 78105, "loss": 0.2186, "lr": 2.423923296565597e-06, "epoch": 2.7936751808462965, "percentage": 55.87, "elapsed_time": "1:55:18", "remaining_time": "1:31:03", "throughput": 19850.09, "total_tokens": 137328128}
|
|
{"current_steps": 43645, "total_steps": 78105, "loss": 0.1657, "lr": 2.423364904844715e-06, "epoch": 2.7939952627872735, "percentage": 55.88, "elapsed_time": "1:55:18", "remaining_time": "1:31:02", "throughput": 19850.36, "total_tokens": 137342592}
|
|
{"current_steps": 43650, "total_steps": 78105, "loss": 0.1955, "lr": 2.4228065169505923e-06, "epoch": 2.7943153447282505, "percentage": 55.89, "elapsed_time": "1:55:19", "remaining_time": "1:31:01", "throughput": 19850.68, "total_tokens": 137357760}
|
|
{"current_steps": 43655, "total_steps": 78105, "loss": 0.1772, "lr": 2.422248132911109e-06, "epoch": 2.7946354266692275, "percentage": 55.89, "elapsed_time": "1:55:20", "remaining_time": "1:31:01", "throughput": 19851.02, "total_tokens": 137373504}
|
|
{"current_steps": 43660, "total_steps": 78105, "loss": 0.2534, "lr": 2.421689752754149e-06, "epoch": 2.7949555086102045, "percentage": 55.9, "elapsed_time": "1:55:20", "remaining_time": "1:31:00", "throughput": 19851.33, "total_tokens": 137388992}
|
|
{"current_steps": 43665, "total_steps": 78105, "loss": 0.1881, "lr": 2.4211313765075945e-06, "epoch": 2.795275590551181, "percentage": 55.91, "elapsed_time": "1:55:21", "remaining_time": "1:30:59", "throughput": 19851.64, "total_tokens": 137404416}
|
|
{"current_steps": 43670, "total_steps": 78105, "loss": 0.1914, "lr": 2.4205730041993294e-06, "epoch": 2.795595672492158, "percentage": 55.91, "elapsed_time": "1:55:22", "remaining_time": "1:30:58", "throughput": 19851.87, "total_tokens": 137418304}
|
|
{"current_steps": 43675, "total_steps": 78105, "loss": 0.209, "lr": 2.4200146358572333e-06, "epoch": 2.795915754433135, "percentage": 55.92, "elapsed_time": "1:55:22", "remaining_time": "1:30:57", "throughput": 19852.27, "total_tokens": 137435008}
|
|
{"current_steps": 43680, "total_steps": 78105, "loss": 0.2195, "lr": 2.4194562715091906e-06, "epoch": 2.7962358363741115, "percentage": 55.92, "elapsed_time": "1:55:23", "remaining_time": "1:30:56", "throughput": 19852.55, "total_tokens": 137449856}
|
|
{"current_steps": 43685, "total_steps": 78105, "loss": 0.2168, "lr": 2.418897911183082e-06, "epoch": 2.7965559183150885, "percentage": 55.93, "elapsed_time": "1:55:24", "remaining_time": "1:30:55", "throughput": 19852.86, "total_tokens": 137465088}
|
|
{"current_steps": 43690, "total_steps": 78105, "loss": 0.1584, "lr": 2.4183395549067885e-06, "epoch": 2.7968760002560655, "percentage": 55.94, "elapsed_time": "1:55:24", "remaining_time": "1:30:54", "throughput": 19853.17, "total_tokens": 137480512}
|
|
{"current_steps": 43695, "total_steps": 78105, "loss": 0.2449, "lr": 2.417781202708194e-06, "epoch": 2.7971960821970425, "percentage": 55.94, "elapsed_time": "1:55:25", "remaining_time": "1:30:53", "throughput": 19853.42, "total_tokens": 137494976}
|
|
{"current_steps": 43700, "total_steps": 78105, "loss": 0.2626, "lr": 2.4172228546151763e-06, "epoch": 2.7975161641380195, "percentage": 55.95, "elapsed_time": "1:55:26", "remaining_time": "1:30:52", "throughput": 19853.81, "total_tokens": 137511552}
|
|
{"current_steps": 43705, "total_steps": 78105, "loss": 0.1722, "lr": 2.4166645106556195e-06, "epoch": 2.797836246078996, "percentage": 55.96, "elapsed_time": "1:55:26", "remaining_time": "1:30:52", "throughput": 19854.12, "total_tokens": 137526784}
|
|
{"current_steps": 43710, "total_steps": 78105, "loss": 0.2264, "lr": 2.4161061708574017e-06, "epoch": 2.798156328019973, "percentage": 55.96, "elapsed_time": "1:55:27", "remaining_time": "1:30:51", "throughput": 19854.41, "total_tokens": 137541952}
|
|
{"current_steps": 43715, "total_steps": 78105, "loss": 0.1979, "lr": 2.415547835248405e-06, "epoch": 2.79847640996095, "percentage": 55.97, "elapsed_time": "1:55:28", "remaining_time": "1:30:50", "throughput": 19854.78, "total_tokens": 137558144}
|
|
{"current_steps": 43720, "total_steps": 78105, "loss": 0.3175, "lr": 2.4149895038565094e-06, "epoch": 2.798796491901927, "percentage": 55.98, "elapsed_time": "1:55:28", "remaining_time": "1:30:49", "throughput": 19855.14, "total_tokens": 137574016}
|
|
{"current_steps": 43725, "total_steps": 78105, "loss": 0.2088, "lr": 2.4144311767095965e-06, "epoch": 2.7991165738429036, "percentage": 55.98, "elapsed_time": "1:55:29", "remaining_time": "1:30:48", "throughput": 19855.43, "total_tokens": 137589120}
|
|
{"current_steps": 43730, "total_steps": 78105, "loss": 0.3295, "lr": 2.4138728538355433e-06, "epoch": 2.7994366557838806, "percentage": 55.99, "elapsed_time": "1:55:30", "remaining_time": "1:30:47", "throughput": 19855.78, "total_tokens": 137605120}
|
|
{"current_steps": 43735, "total_steps": 78105, "loss": 0.2233, "lr": 2.4133145352622327e-06, "epoch": 2.7997567377248576, "percentage": 56.0, "elapsed_time": "1:55:30", "remaining_time": "1:30:46", "throughput": 19856.21, "total_tokens": 137622528}
|
|
{"current_steps": 43740, "total_steps": 78105, "loss": 0.1735, "lr": 2.412756221017541e-06, "epoch": 2.8000768196658345, "percentage": 56.0, "elapsed_time": "1:55:31", "remaining_time": "1:30:45", "throughput": 19856.52, "total_tokens": 137638016}
|
|
{"current_steps": 43745, "total_steps": 78105, "loss": 0.1693, "lr": 2.4121979111293497e-06, "epoch": 2.8003969016068115, "percentage": 56.01, "elapsed_time": "1:55:32", "remaining_time": "1:30:45", "throughput": 19856.82, "total_tokens": 137653376}
|
|
{"current_steps": 43750, "total_steps": 78105, "loss": 0.2275, "lr": 2.411639605625538e-06, "epoch": 2.800716983547788, "percentage": 56.01, "elapsed_time": "1:55:32", "remaining_time": "1:30:44", "throughput": 19857.18, "total_tokens": 137669632}
|
|
{"current_steps": 43755, "total_steps": 78105, "loss": 0.2669, "lr": 2.411081304533983e-06, "epoch": 2.801037065488765, "percentage": 56.02, "elapsed_time": "1:55:33", "remaining_time": "1:30:43", "throughput": 19857.51, "total_tokens": 137685568}
|
|
{"current_steps": 43760, "total_steps": 78105, "loss": 0.2473, "lr": 2.410523007882566e-06, "epoch": 2.801357147429742, "percentage": 56.03, "elapsed_time": "1:55:34", "remaining_time": "1:30:42", "throughput": 19857.81, "total_tokens": 137700864}
|
|
{"current_steps": 43765, "total_steps": 78105, "loss": 0.215, "lr": 2.409964715699163e-06, "epoch": 2.801677229370719, "percentage": 56.03, "elapsed_time": "1:55:35", "remaining_time": "1:30:41", "throughput": 19858.15, "total_tokens": 137717056}
|
|
{"current_steps": 43770, "total_steps": 78105, "loss": 0.267, "lr": 2.4094064280116533e-06, "epoch": 2.8019973113116956, "percentage": 56.04, "elapsed_time": "1:55:35", "remaining_time": "1:30:40", "throughput": 19858.47, "total_tokens": 137732096}
|
|
{"current_steps": 43775, "total_steps": 78105, "loss": 0.3291, "lr": 2.4088481448479146e-06, "epoch": 2.8023173932526726, "percentage": 56.05, "elapsed_time": "1:55:36", "remaining_time": "1:30:39", "throughput": 19858.82, "total_tokens": 137748352}
|
|
{"current_steps": 43780, "total_steps": 78105, "loss": 0.2483, "lr": 2.408289866235825e-06, "epoch": 2.8026374751936496, "percentage": 56.05, "elapsed_time": "1:55:37", "remaining_time": "1:30:38", "throughput": 19859.16, "total_tokens": 137764096}
|
|
{"current_steps": 43785, "total_steps": 78105, "loss": 0.2571, "lr": 2.4077315922032617e-06, "epoch": 2.8029575571346266, "percentage": 56.06, "elapsed_time": "1:55:37", "remaining_time": "1:30:37", "throughput": 19859.49, "total_tokens": 137779712}
|
|
{"current_steps": 43790, "total_steps": 78105, "loss": 0.1611, "lr": 2.4071733227781027e-06, "epoch": 2.8032776390756036, "percentage": 56.07, "elapsed_time": "1:55:38", "remaining_time": "1:30:37", "throughput": 19859.76, "total_tokens": 137794496}
|
|
{"current_steps": 43795, "total_steps": 78105, "loss": 0.3214, "lr": 2.4066150579882234e-06, "epoch": 2.80359772101658, "percentage": 56.07, "elapsed_time": "1:55:39", "remaining_time": "1:30:36", "throughput": 19860.08, "total_tokens": 137809792}
|
|
{"current_steps": 43800, "total_steps": 78105, "loss": 0.2097, "lr": 2.406056797861502e-06, "epoch": 2.803917802957557, "percentage": 56.08, "elapsed_time": "1:55:39", "remaining_time": "1:30:35", "throughput": 19860.42, "total_tokens": 137826048}
|
|
{"current_steps": 43805, "total_steps": 78105, "loss": 0.2012, "lr": 2.4054985424258146e-06, "epoch": 2.804237884898534, "percentage": 56.08, "elapsed_time": "1:55:40", "remaining_time": "1:30:34", "throughput": 19860.74, "total_tokens": 137841536}
|
|
{"current_steps": 43810, "total_steps": 78105, "loss": 0.1748, "lr": 2.4049402917090375e-06, "epoch": 2.8045579668395106, "percentage": 56.09, "elapsed_time": "1:55:41", "remaining_time": "1:30:33", "throughput": 19861.03, "total_tokens": 137856640}
|
|
{"current_steps": 43815, "total_steps": 78105, "loss": 0.2603, "lr": 2.4043820457390483e-06, "epoch": 2.8048780487804876, "percentage": 56.1, "elapsed_time": "1:55:41", "remaining_time": "1:30:32", "throughput": 19861.37, "total_tokens": 137872576}
|
|
{"current_steps": 43820, "total_steps": 78105, "loss": 0.1837, "lr": 2.4038238045437203e-06, "epoch": 2.8051981307214646, "percentage": 56.1, "elapsed_time": "1:55:42", "remaining_time": "1:30:31", "throughput": 19861.81, "total_tokens": 137890432}
|
|
{"current_steps": 43825, "total_steps": 78105, "loss": 0.25, "lr": 2.4032655681509315e-06, "epoch": 2.8055182126624416, "percentage": 56.11, "elapsed_time": "1:55:43", "remaining_time": "1:30:30", "throughput": 19862.15, "total_tokens": 137906112}
|
|
{"current_steps": 43830, "total_steps": 78105, "loss": 0.2715, "lr": 2.402707336588556e-06, "epoch": 2.8058382946034186, "percentage": 56.12, "elapsed_time": "1:55:43", "remaining_time": "1:30:30", "throughput": 19862.45, "total_tokens": 137921472}
|
|
{"current_steps": 43835, "total_steps": 78105, "loss": 0.3797, "lr": 2.4021491098844697e-06, "epoch": 2.8061583765443956, "percentage": 56.12, "elapsed_time": "1:55:44", "remaining_time": "1:30:29", "throughput": 19862.83, "total_tokens": 137938368}
|
|
{"current_steps": 43840, "total_steps": 78105, "loss": 0.2277, "lr": 2.4015908880665463e-06, "epoch": 2.806478458485372, "percentage": 56.13, "elapsed_time": "1:55:45", "remaining_time": "1:30:28", "throughput": 19863.22, "total_tokens": 137955008}
|
|
{"current_steps": 43845, "total_steps": 78105, "loss": 0.2341, "lr": 2.4010326711626625e-06, "epoch": 2.806798540426349, "percentage": 56.14, "elapsed_time": "1:55:45", "remaining_time": "1:30:27", "throughput": 19863.54, "total_tokens": 137970432}
|
|
{"current_steps": 43850, "total_steps": 78105, "loss": 0.2173, "lr": 2.400474459200691e-06, "epoch": 2.807118622367326, "percentage": 56.14, "elapsed_time": "1:55:46", "remaining_time": "1:30:26", "throughput": 19863.87, "total_tokens": 137986112}
|
|
{"current_steps": 43855, "total_steps": 78105, "loss": 0.2485, "lr": 2.3999162522085062e-06, "epoch": 2.8074387043083027, "percentage": 56.15, "elapsed_time": "1:55:47", "remaining_time": "1:30:25", "throughput": 19864.16, "total_tokens": 138001024}
|
|
{"current_steps": 43860, "total_steps": 78105, "loss": 0.2806, "lr": 2.3993580502139825e-06, "epoch": 2.8077587862492797, "percentage": 56.16, "elapsed_time": "1:55:47", "remaining_time": "1:30:24", "throughput": 19864.45, "total_tokens": 138016384}
|
|
{"current_steps": 43865, "total_steps": 78105, "loss": 0.2032, "lr": 2.3987998532449937e-06, "epoch": 2.8080788681902566, "percentage": 56.16, "elapsed_time": "1:55:48", "remaining_time": "1:30:23", "throughput": 19864.93, "total_tokens": 138034560}
|
|
{"current_steps": 43870, "total_steps": 78105, "loss": 0.2277, "lr": 2.3982416613294137e-06, "epoch": 2.8083989501312336, "percentage": 56.17, "elapsed_time": "1:55:49", "remaining_time": "1:30:23", "throughput": 19865.22, "total_tokens": 138049536}
|
|
{"current_steps": 43875, "total_steps": 78105, "loss": 0.1843, "lr": 2.3976834744951144e-06, "epoch": 2.8087190320722106, "percentage": 56.17, "elapsed_time": "1:55:49", "remaining_time": "1:30:22", "throughput": 19865.54, "total_tokens": 138065088}
|
|
{"current_steps": 43880, "total_steps": 78105, "loss": 0.1947, "lr": 2.3971252927699705e-06, "epoch": 2.8090391140131876, "percentage": 56.18, "elapsed_time": "1:55:50", "remaining_time": "1:30:21", "throughput": 19865.9, "total_tokens": 138081152}
|
|
{"current_steps": 43885, "total_steps": 78105, "loss": 0.2614, "lr": 2.3965671161818522e-06, "epoch": 2.809359195954164, "percentage": 56.19, "elapsed_time": "1:55:51", "remaining_time": "1:30:20", "throughput": 19866.24, "total_tokens": 138096896}
|
|
{"current_steps": 43890, "total_steps": 78105, "loss": 0.2865, "lr": 2.396008944758634e-06, "epoch": 2.809679277895141, "percentage": 56.19, "elapsed_time": "1:55:51", "remaining_time": "1:30:19", "throughput": 19866.52, "total_tokens": 138112000}
|
|
{"current_steps": 43895, "total_steps": 78105, "loss": 0.1542, "lr": 2.395450778528187e-06, "epoch": 2.809999359836118, "percentage": 56.2, "elapsed_time": "1:55:52", "remaining_time": "1:30:18", "throughput": 19866.85, "total_tokens": 138127808}
|
|
{"current_steps": 43900, "total_steps": 78105, "loss": 0.2239, "lr": 2.394892617518385e-06, "epoch": 2.8103194417770947, "percentage": 56.21, "elapsed_time": "1:55:53", "remaining_time": "1:30:17", "throughput": 19867.13, "total_tokens": 138142784}
|
|
{"current_steps": 43905, "total_steps": 78105, "loss": 0.2381, "lr": 2.3943344617570967e-06, "epoch": 2.8106395237180717, "percentage": 56.21, "elapsed_time": "1:55:53", "remaining_time": "1:30:16", "throughput": 19867.41, "total_tokens": 138157696}
|
|
{"current_steps": 43910, "total_steps": 78105, "loss": 0.2195, "lr": 2.3937763112721953e-06, "epoch": 2.8109596056590487, "percentage": 56.22, "elapsed_time": "1:55:54", "remaining_time": "1:30:15", "throughput": 19867.74, "total_tokens": 138173120}
|
|
{"current_steps": 43915, "total_steps": 78105, "loss": 0.2862, "lr": 2.3932181660915514e-06, "epoch": 2.8112796876000257, "percentage": 56.23, "elapsed_time": "1:55:55", "remaining_time": "1:30:15", "throughput": 19868.12, "total_tokens": 138189568}
|
|
{"current_steps": 43920, "total_steps": 78105, "loss": 0.2011, "lr": 2.3926600262430362e-06, "epoch": 2.8115997695410027, "percentage": 56.23, "elapsed_time": "1:55:55", "remaining_time": "1:30:14", "throughput": 19868.43, "total_tokens": 138204736}
|
|
{"current_steps": 43925, "total_steps": 78105, "loss": 0.1736, "lr": 2.392101891754521e-06, "epoch": 2.8119198514819796, "percentage": 56.24, "elapsed_time": "1:55:56", "remaining_time": "1:30:13", "throughput": 19868.78, "total_tokens": 138220544}
|
|
{"current_steps": 43930, "total_steps": 78105, "loss": 0.1633, "lr": 2.3915437626538743e-06, "epoch": 2.812239933422956, "percentage": 56.24, "elapsed_time": "1:55:57", "remaining_time": "1:30:12", "throughput": 19869.02, "total_tokens": 138234816}
|
|
{"current_steps": 43935, "total_steps": 78105, "loss": 0.2308, "lr": 2.3909856389689683e-06, "epoch": 2.812560015363933, "percentage": 56.25, "elapsed_time": "1:55:57", "remaining_time": "1:30:11", "throughput": 19869.32, "total_tokens": 138250048}
|
|
{"current_steps": 43940, "total_steps": 78105, "loss": 0.2256, "lr": 2.3904275207276707e-06, "epoch": 2.81288009730491, "percentage": 56.26, "elapsed_time": "1:55:58", "remaining_time": "1:30:10", "throughput": 19869.6, "total_tokens": 138265088}
|
|
{"current_steps": 43945, "total_steps": 78105, "loss": 0.212, "lr": 2.3898694079578522e-06, "epoch": 2.8132001792458867, "percentage": 56.26, "elapsed_time": "1:55:59", "remaining_time": "1:30:09", "throughput": 19869.86, "total_tokens": 138279744}
|
|
{"current_steps": 43950, "total_steps": 78105, "loss": 0.2526, "lr": 2.3893113006873813e-06, "epoch": 2.8135202611868637, "percentage": 56.27, "elapsed_time": "1:55:59", "remaining_time": "1:30:08", "throughput": 19870.23, "total_tokens": 138296128}
|
|
{"current_steps": 43955, "total_steps": 78105, "loss": 0.2185, "lr": 2.388753198944129e-06, "epoch": 2.8138403431278407, "percentage": 56.28, "elapsed_time": "1:56:00", "remaining_time": "1:30:07", "throughput": 19870.57, "total_tokens": 138312064}
|
|
{"current_steps": 43960, "total_steps": 78105, "loss": 0.2491, "lr": 2.388195102755961e-06, "epoch": 2.8141604250688177, "percentage": 56.28, "elapsed_time": "1:56:01", "remaining_time": "1:30:07", "throughput": 19870.85, "total_tokens": 138327104}
|
|
{"current_steps": 43965, "total_steps": 78105, "loss": 0.2342, "lr": 2.3876370121507484e-06, "epoch": 2.8144805070097947, "percentage": 56.29, "elapsed_time": "1:56:01", "remaining_time": "1:30:06", "throughput": 19871.19, "total_tokens": 138342976}
|
|
{"current_steps": 43970, "total_steps": 78105, "loss": 0.242, "lr": 2.3870789271563567e-06, "epoch": 2.8148005889507712, "percentage": 56.3, "elapsed_time": "1:56:02", "remaining_time": "1:30:05", "throughput": 19871.59, "total_tokens": 138359872}
|
|
{"current_steps": 43975, "total_steps": 78105, "loss": 0.3118, "lr": 2.3865208478006565e-06, "epoch": 2.8151206708917482, "percentage": 56.3, "elapsed_time": "1:56:03", "remaining_time": "1:30:04", "throughput": 19871.92, "total_tokens": 138375808}
|
|
{"current_steps": 43980, "total_steps": 78105, "loss": 0.206, "lr": 2.3859627741115127e-06, "epoch": 2.815440752832725, "percentage": 56.31, "elapsed_time": "1:56:04", "remaining_time": "1:30:03", "throughput": 19872.22, "total_tokens": 138390976}
|
|
{"current_steps": 43985, "total_steps": 78105, "loss": 0.1974, "lr": 2.3854047061167944e-06, "epoch": 2.815760834773702, "percentage": 56.32, "elapsed_time": "1:56:04", "remaining_time": "1:30:02", "throughput": 19872.52, "total_tokens": 138406464}
|
|
{"current_steps": 43990, "total_steps": 78105, "loss": 0.2446, "lr": 2.3848466438443694e-06, "epoch": 2.8160809167146788, "percentage": 56.32, "elapsed_time": "1:56:05", "remaining_time": "1:30:01", "throughput": 19872.84, "total_tokens": 138422272}
|
|
{"current_steps": 43995, "total_steps": 78105, "loss": 0.1551, "lr": 2.3842885873221016e-06, "epoch": 2.8164009986556557, "percentage": 56.33, "elapsed_time": "1:56:06", "remaining_time": "1:30:00", "throughput": 19873.16, "total_tokens": 138437888}
|
|
{"current_steps": 44000, "total_steps": 78105, "loss": 0.1492, "lr": 2.3837305365778602e-06, "epoch": 2.8167210805966327, "percentage": 56.33, "elapsed_time": "1:56:06", "remaining_time": "1:30:00", "throughput": 19873.51, "total_tokens": 138453952}
|
|
{"current_steps": 44005, "total_steps": 78105, "loss": 0.2079, "lr": 2.383172491639509e-06, "epoch": 2.8170411625376097, "percentage": 56.34, "elapsed_time": "1:56:07", "remaining_time": "1:29:59", "throughput": 19873.81, "total_tokens": 138469120}
|
|
{"current_steps": 44010, "total_steps": 78105, "loss": 0.2003, "lr": 2.3826144525349163e-06, "epoch": 2.8173612444785867, "percentage": 56.35, "elapsed_time": "1:56:08", "remaining_time": "1:29:58", "throughput": 19874.11, "total_tokens": 138484608}
|
|
{"current_steps": 44015, "total_steps": 78105, "loss": 0.2145, "lr": 2.3820564192919456e-06, "epoch": 2.8176813264195633, "percentage": 56.35, "elapsed_time": "1:56:08", "remaining_time": "1:29:57", "throughput": 19874.41, "total_tokens": 138500160}
|
|
{"current_steps": 44020, "total_steps": 78105, "loss": 0.233, "lr": 2.381498391938464e-06, "epoch": 2.8180014083605402, "percentage": 56.36, "elapsed_time": "1:56:09", "remaining_time": "1:29:56", "throughput": 19874.72, "total_tokens": 138515584}
|
|
{"current_steps": 44025, "total_steps": 78105, "loss": 0.1965, "lr": 2.3809403705023342e-06, "epoch": 2.8183214903015172, "percentage": 56.37, "elapsed_time": "1:56:10", "remaining_time": "1:29:55", "throughput": 19875.05, "total_tokens": 138531456}
|
|
{"current_steps": 44030, "total_steps": 78105, "loss": 0.2553, "lr": 2.380382355011423e-06, "epoch": 2.8186415722424942, "percentage": 56.37, "elapsed_time": "1:56:10", "remaining_time": "1:29:54", "throughput": 19875.36, "total_tokens": 138546816}
|
|
{"current_steps": 44035, "total_steps": 78105, "loss": 0.2613, "lr": 2.3798243454935934e-06, "epoch": 2.8189616541834708, "percentage": 56.38, "elapsed_time": "1:56:11", "remaining_time": "1:29:53", "throughput": 19875.69, "total_tokens": 138562752}
|
|
{"current_steps": 44040, "total_steps": 78105, "loss": 0.1717, "lr": 2.3792663419767095e-06, "epoch": 2.8192817361244478, "percentage": 56.39, "elapsed_time": "1:56:12", "remaining_time": "1:29:52", "throughput": 19876.0, "total_tokens": 138577856}
|
|
{"current_steps": 44045, "total_steps": 78105, "loss": 0.1816, "lr": 2.3787083444886368e-06, "epoch": 2.8196018180654248, "percentage": 56.39, "elapsed_time": "1:56:12", "remaining_time": "1:29:52", "throughput": 19876.33, "total_tokens": 138593728}
|
|
{"current_steps": 44050, "total_steps": 78105, "loss": 0.2282, "lr": 2.3781503530572367e-06, "epoch": 2.8199219000064017, "percentage": 56.4, "elapsed_time": "1:56:13", "remaining_time": "1:29:51", "throughput": 19876.64, "total_tokens": 138609024}
|
|
{"current_steps": 44055, "total_steps": 78105, "loss": 0.1734, "lr": 2.3775923677103738e-06, "epoch": 2.8202419819473787, "percentage": 56.4, "elapsed_time": "1:56:14", "remaining_time": "1:29:50", "throughput": 19877.03, "total_tokens": 138625408}
|
|
{"current_steps": 44060, "total_steps": 78105, "loss": 0.2477, "lr": 2.37703438847591e-06, "epoch": 2.8205620638883553, "percentage": 56.41, "elapsed_time": "1:56:14", "remaining_time": "1:29:49", "throughput": 19877.45, "total_tokens": 138642688}
|
|
{"current_steps": 44065, "total_steps": 78105, "loss": 0.4014, "lr": 2.376476415381709e-06, "epoch": 2.8208821458293323, "percentage": 56.42, "elapsed_time": "1:56:15", "remaining_time": "1:29:48", "throughput": 19877.75, "total_tokens": 138657856}
|
|
{"current_steps": 44070, "total_steps": 78105, "loss": 0.176, "lr": 2.3759184484556317e-06, "epoch": 2.8212022277703093, "percentage": 56.42, "elapsed_time": "1:56:16", "remaining_time": "1:29:47", "throughput": 19878.03, "total_tokens": 138672640}
|
|
{"current_steps": 44075, "total_steps": 78105, "loss": 0.2552, "lr": 2.3753604877255422e-06, "epoch": 2.821522309711286, "percentage": 56.43, "elapsed_time": "1:56:16", "remaining_time": "1:29:46", "throughput": 19878.48, "total_tokens": 138690560}
|
|
{"current_steps": 44080, "total_steps": 78105, "loss": 0.2514, "lr": 2.374802533219299e-06, "epoch": 2.821842391652263, "percentage": 56.44, "elapsed_time": "1:56:17", "remaining_time": "1:29:45", "throughput": 19878.86, "total_tokens": 138707008}
|
|
{"current_steps": 44085, "total_steps": 78105, "loss": 0.1726, "lr": 2.374244584964766e-06, "epoch": 2.82216247359324, "percentage": 56.44, "elapsed_time": "1:56:18", "remaining_time": "1:29:45", "throughput": 19879.18, "total_tokens": 138722368}
|
|
{"current_steps": 44090, "total_steps": 78105, "loss": 0.1757, "lr": 2.3736866429898027e-06, "epoch": 2.822482555534217, "percentage": 56.45, "elapsed_time": "1:56:18", "remaining_time": "1:29:44", "throughput": 19879.5, "total_tokens": 138738112}
|
|
{"current_steps": 44095, "total_steps": 78105, "loss": 0.2233, "lr": 2.3731287073222705e-06, "epoch": 2.8228026374751938, "percentage": 56.46, "elapsed_time": "1:56:19", "remaining_time": "1:29:43", "throughput": 19879.78, "total_tokens": 138753280}
|
|
{"current_steps": 44100, "total_steps": 78105, "loss": 0.3008, "lr": 2.3725707779900313e-06, "epoch": 2.8231227194161708, "percentage": 56.46, "elapsed_time": "1:56:20", "remaining_time": "1:29:42", "throughput": 19880.33, "total_tokens": 138773120}
|
|
{"current_steps": 44105, "total_steps": 78105, "loss": 0.2513, "lr": 2.372012855020942e-06, "epoch": 2.8234428013571473, "percentage": 56.47, "elapsed_time": "1:56:21", "remaining_time": "1:29:41", "throughput": 19880.68, "total_tokens": 138788800}
|
|
{"current_steps": 44110, "total_steps": 78105, "loss": 0.1958, "lr": 2.371454938442865e-06, "epoch": 2.8237628832981243, "percentage": 56.48, "elapsed_time": "1:56:21", "remaining_time": "1:29:40", "throughput": 19881.01, "total_tokens": 138804672}
|
|
{"current_steps": 44115, "total_steps": 78105, "loss": 0.1894, "lr": 2.3708970282836585e-06, "epoch": 2.8240829652391013, "percentage": 56.48, "elapsed_time": "1:56:22", "remaining_time": "1:29:39", "throughput": 19881.29, "total_tokens": 138819520}
|
|
{"current_steps": 44120, "total_steps": 78105, "loss": 0.2009, "lr": 2.3703391245711823e-06, "epoch": 2.824403047180078, "percentage": 56.49, "elapsed_time": "1:56:23", "remaining_time": "1:29:38", "throughput": 19881.59, "total_tokens": 138834752}
|
|
{"current_steps": 44125, "total_steps": 78105, "loss": 0.2332, "lr": 2.369781227333294e-06, "epoch": 2.824723129121055, "percentage": 56.49, "elapsed_time": "1:56:23", "remaining_time": "1:29:38", "throughput": 19881.89, "total_tokens": 138850048}
|
|
{"current_steps": 44130, "total_steps": 78105, "loss": 0.1679, "lr": 2.369223336597854e-06, "epoch": 2.825043211062032, "percentage": 56.5, "elapsed_time": "1:56:24", "remaining_time": "1:29:37", "throughput": 19882.17, "total_tokens": 138865152}
|
|
{"current_steps": 44135, "total_steps": 78105, "loss": 0.2943, "lr": 2.3686654523927187e-06, "epoch": 2.825363293003009, "percentage": 56.51, "elapsed_time": "1:56:25", "remaining_time": "1:29:36", "throughput": 19882.65, "total_tokens": 138882880}
|
|
{"current_steps": 44140, "total_steps": 78105, "loss": 0.1469, "lr": 2.368107574745747e-06, "epoch": 2.825683374943986, "percentage": 56.51, "elapsed_time": "1:56:25", "remaining_time": "1:29:35", "throughput": 19882.94, "total_tokens": 138898112}
|
|
{"current_steps": 44145, "total_steps": 78105, "loss": 0.1727, "lr": 2.367549703684795e-06, "epoch": 2.826003456884963, "percentage": 56.52, "elapsed_time": "1:56:26", "remaining_time": "1:29:34", "throughput": 19883.32, "total_tokens": 138914752}
|
|
{"current_steps": 44150, "total_steps": 78105, "loss": 0.135, "lr": 2.3669918392377224e-06, "epoch": 2.8263235388259393, "percentage": 56.53, "elapsed_time": "1:56:27", "remaining_time": "1:29:33", "throughput": 19883.63, "total_tokens": 138930688}
|
|
{"current_steps": 44155, "total_steps": 78105, "loss": 0.1783, "lr": 2.3664339814323835e-06, "epoch": 2.8266436207669163, "percentage": 56.53, "elapsed_time": "1:56:27", "remaining_time": "1:29:32", "throughput": 19883.93, "total_tokens": 138945920}
|
|
{"current_steps": 44160, "total_steps": 78105, "loss": 0.1945, "lr": 2.3658761302966357e-06, "epoch": 2.8269637027078933, "percentage": 56.54, "elapsed_time": "1:56:28", "remaining_time": "1:29:31", "throughput": 19884.25, "total_tokens": 138961408}
|
|
{"current_steps": 44165, "total_steps": 78105, "loss": 0.1959, "lr": 2.3653182858583364e-06, "epoch": 2.82728378464887, "percentage": 56.55, "elapsed_time": "1:56:29", "remaining_time": "1:29:31", "throughput": 19884.68, "total_tokens": 138979008}
|
|
{"current_steps": 44170, "total_steps": 78105, "loss": 0.2449, "lr": 2.3647604481453393e-06, "epoch": 2.827603866589847, "percentage": 56.55, "elapsed_time": "1:56:29", "remaining_time": "1:29:30", "throughput": 19884.96, "total_tokens": 138994112}
|
|
{"current_steps": 44175, "total_steps": 78105, "loss": 0.3041, "lr": 2.3642026171855013e-06, "epoch": 2.827923948530824, "percentage": 56.56, "elapsed_time": "1:56:30", "remaining_time": "1:29:29", "throughput": 19885.3, "total_tokens": 139010176}
|
|
{"current_steps": 44180, "total_steps": 78105, "loss": 0.3095, "lr": 2.363644793006677e-06, "epoch": 2.828244030471801, "percentage": 56.56, "elapsed_time": "1:56:31", "remaining_time": "1:29:28", "throughput": 19885.63, "total_tokens": 139026048}
|
|
{"current_steps": 44185, "total_steps": 78105, "loss": 0.2471, "lr": 2.363086975636723e-06, "epoch": 2.828564112412778, "percentage": 56.57, "elapsed_time": "1:56:31", "remaining_time": "1:29:27", "throughput": 19885.95, "total_tokens": 139041856}
|
|
{"current_steps": 44190, "total_steps": 78105, "loss": 0.2208, "lr": 2.3625291651034905e-06, "epoch": 2.828884194353755, "percentage": 56.58, "elapsed_time": "1:56:32", "remaining_time": "1:29:26", "throughput": 19886.27, "total_tokens": 139057408}
|
|
{"current_steps": 44195, "total_steps": 78105, "loss": 0.1601, "lr": 2.361971361434837e-06, "epoch": 2.8292042762947314, "percentage": 56.58, "elapsed_time": "1:56:33", "remaining_time": "1:29:25", "throughput": 19886.59, "total_tokens": 139073344}
|
|
{"current_steps": 44200, "total_steps": 78105, "loss": 0.1999, "lr": 2.361413564658613e-06, "epoch": 2.8295243582357084, "percentage": 56.59, "elapsed_time": "1:56:34", "remaining_time": "1:29:25", "throughput": 19887.08, "total_tokens": 139092096}
|
|
{"current_steps": 44205, "total_steps": 78105, "loss": 0.2826, "lr": 2.3608557748026747e-06, "epoch": 2.8298444401766853, "percentage": 56.6, "elapsed_time": "1:56:34", "remaining_time": "1:29:24", "throughput": 19887.44, "total_tokens": 139108544}
|
|
{"current_steps": 44210, "total_steps": 78105, "loss": 0.2114, "lr": 2.360297991894874e-06, "epoch": 2.830164522117662, "percentage": 56.6, "elapsed_time": "1:56:35", "remaining_time": "1:29:23", "throughput": 19887.75, "total_tokens": 139124352}
|
|
{"current_steps": 44215, "total_steps": 78105, "loss": 0.1929, "lr": 2.3597402159630632e-06, "epoch": 2.830484604058639, "percentage": 56.61, "elapsed_time": "1:56:36", "remaining_time": "1:29:22", "throughput": 19888.03, "total_tokens": 139139072}
|
|
{"current_steps": 44220, "total_steps": 78105, "loss": 0.2083, "lr": 2.359182447035097e-06, "epoch": 2.830804685999616, "percentage": 56.62, "elapsed_time": "1:56:36", "remaining_time": "1:29:21", "throughput": 19888.38, "total_tokens": 139155392}
|
|
{"current_steps": 44225, "total_steps": 78105, "loss": 0.1707, "lr": 2.3586246851388243e-06, "epoch": 2.831124767940593, "percentage": 56.62, "elapsed_time": "1:56:37", "remaining_time": "1:29:20", "throughput": 19888.75, "total_tokens": 139171968}
|
|
{"current_steps": 44230, "total_steps": 78105, "loss": 0.1931, "lr": 2.3580669303021e-06, "epoch": 2.83144484988157, "percentage": 56.63, "elapsed_time": "1:56:38", "remaining_time": "1:29:19", "throughput": 19889.07, "total_tokens": 139187648}
|
|
{"current_steps": 44235, "total_steps": 78105, "loss": 0.3123, "lr": 2.3575091825527724e-06, "epoch": 2.831764931822547, "percentage": 56.64, "elapsed_time": "1:56:38", "remaining_time": "1:29:18", "throughput": 19889.41, "total_tokens": 139203584}
|
|
{"current_steps": 44240, "total_steps": 78105, "loss": 0.2331, "lr": 2.3569514419186953e-06, "epoch": 2.8320850137635234, "percentage": 56.64, "elapsed_time": "1:56:39", "remaining_time": "1:29:18", "throughput": 19889.69, "total_tokens": 139218688}
|
|
{"current_steps": 44245, "total_steps": 78105, "loss": 0.2012, "lr": 2.3563937084277174e-06, "epoch": 2.8324050957045004, "percentage": 56.65, "elapsed_time": "1:56:40", "remaining_time": "1:29:17", "throughput": 19889.99, "total_tokens": 139233920}
|
|
{"current_steps": 44250, "total_steps": 78105, "loss": 0.3717, "lr": 2.3558359821076903e-06, "epoch": 2.8327251776454774, "percentage": 56.65, "elapsed_time": "1:56:40", "remaining_time": "1:29:16", "throughput": 19890.41, "total_tokens": 139251392}
|
|
{"current_steps": 44255, "total_steps": 78105, "loss": 0.2139, "lr": 2.3552782629864625e-06, "epoch": 2.833045259586454, "percentage": 56.66, "elapsed_time": "1:56:41", "remaining_time": "1:29:15", "throughput": 19890.68, "total_tokens": 139266304}
|
|
{"current_steps": 44260, "total_steps": 78105, "loss": 0.1798, "lr": 2.3547205510918846e-06, "epoch": 2.833365341527431, "percentage": 56.67, "elapsed_time": "1:56:42", "remaining_time": "1:29:14", "throughput": 19891.02, "total_tokens": 139282304}
|
|
{"current_steps": 44265, "total_steps": 78105, "loss": 0.251, "lr": 2.3541628464518056e-06, "epoch": 2.833685423468408, "percentage": 56.67, "elapsed_time": "1:56:42", "remaining_time": "1:29:13", "throughput": 19891.37, "total_tokens": 139298816}
|
|
{"current_steps": 44270, "total_steps": 78105, "loss": 0.1523, "lr": 2.353605149094074e-06, "epoch": 2.834005505409385, "percentage": 56.68, "elapsed_time": "1:56:43", "remaining_time": "1:29:12", "throughput": 19891.67, "total_tokens": 139313920}
|
|
{"current_steps": 44275, "total_steps": 78105, "loss": 0.2123, "lr": 2.3530474590465403e-06, "epoch": 2.834325587350362, "percentage": 56.69, "elapsed_time": "1:56:44", "remaining_time": "1:29:11", "throughput": 19891.99, "total_tokens": 139329472}
|
|
{"current_steps": 44280, "total_steps": 78105, "loss": 0.2351, "lr": 2.3524897763370493e-06, "epoch": 2.8346456692913384, "percentage": 56.69, "elapsed_time": "1:56:44", "remaining_time": "1:29:11", "throughput": 19892.32, "total_tokens": 139344960}
|
|
{"current_steps": 44285, "total_steps": 78105, "loss": 0.3499, "lr": 2.3519321009934513e-06, "epoch": 2.8349657512323154, "percentage": 56.7, "elapsed_time": "1:56:45", "remaining_time": "1:29:10", "throughput": 19892.56, "total_tokens": 139359296}
|
|
{"current_steps": 44290, "total_steps": 78105, "loss": 0.1809, "lr": 2.351374433043593e-06, "epoch": 2.8352858331732924, "percentage": 56.71, "elapsed_time": "1:56:46", "remaining_time": "1:29:09", "throughput": 19892.85, "total_tokens": 139374528}
|
|
{"current_steps": 44295, "total_steps": 78105, "loss": 0.2806, "lr": 2.3508167725153216e-06, "epoch": 2.8356059151142694, "percentage": 56.71, "elapsed_time": "1:56:46", "remaining_time": "1:29:08", "throughput": 19893.25, "total_tokens": 139391488}
|
|
{"current_steps": 44300, "total_steps": 78105, "loss": 0.2491, "lr": 2.350259119436483e-06, "epoch": 2.835925997055246, "percentage": 56.72, "elapsed_time": "1:56:47", "remaining_time": "1:29:07", "throughput": 19893.5, "total_tokens": 139406272}
|
|
{"current_steps": 44305, "total_steps": 78105, "loss": 0.1931, "lr": 2.349701473834925e-06, "epoch": 2.836246078996223, "percentage": 56.72, "elapsed_time": "1:56:48", "remaining_time": "1:29:06", "throughput": 19893.78, "total_tokens": 139421312}
|
|
{"current_steps": 44310, "total_steps": 78105, "loss": 0.1616, "lr": 2.349143835738491e-06, "epoch": 2.8365661609372, "percentage": 56.73, "elapsed_time": "1:56:48", "remaining_time": "1:29:05", "throughput": 19894.05, "total_tokens": 139435840}
|
|
{"current_steps": 44315, "total_steps": 78105, "loss": 0.2365, "lr": 2.348586205175029e-06, "epoch": 2.836886242878177, "percentage": 56.74, "elapsed_time": "1:56:49", "remaining_time": "1:29:04", "throughput": 19894.4, "total_tokens": 139452032}
|
|
{"current_steps": 44320, "total_steps": 78105, "loss": 0.2702, "lr": 2.3480285821723826e-06, "epoch": 2.837206324819154, "percentage": 56.74, "elapsed_time": "1:56:50", "remaining_time": "1:29:03", "throughput": 19894.67, "total_tokens": 139466816}
|
|
{"current_steps": 44325, "total_steps": 78105, "loss": 0.2039, "lr": 2.347470966758398e-06, "epoch": 2.8375264067601305, "percentage": 56.75, "elapsed_time": "1:56:50", "remaining_time": "1:29:03", "throughput": 19895.0, "total_tokens": 139482240}
|
|
{"current_steps": 44330, "total_steps": 78105, "loss": 0.2812, "lr": 2.346913358960918e-06, "epoch": 2.8378464887011075, "percentage": 56.76, "elapsed_time": "1:56:51", "remaining_time": "1:29:02", "throughput": 19895.29, "total_tokens": 139497408}
|
|
{"current_steps": 44335, "total_steps": 78105, "loss": 0.2855, "lr": 2.3463557588077877e-06, "epoch": 2.8381665706420844, "percentage": 56.76, "elapsed_time": "1:56:52", "remaining_time": "1:29:01", "throughput": 19895.58, "total_tokens": 139512960}
|
|
{"current_steps": 44340, "total_steps": 78105, "loss": 0.218, "lr": 2.3457981663268513e-06, "epoch": 2.838486652583061, "percentage": 56.77, "elapsed_time": "1:56:52", "remaining_time": "1:29:00", "throughput": 19895.93, "total_tokens": 139529152}
|
|
{"current_steps": 44345, "total_steps": 78105, "loss": 0.2036, "lr": 2.34524058154595e-06, "epoch": 2.838806734524038, "percentage": 56.78, "elapsed_time": "1:56:53", "remaining_time": "1:28:59", "throughput": 19896.31, "total_tokens": 139545792}
|
|
{"current_steps": 44350, "total_steps": 78105, "loss": 0.2265, "lr": 2.344683004492929e-06, "epoch": 2.839126816465015, "percentage": 56.78, "elapsed_time": "1:56:54", "remaining_time": "1:28:58", "throughput": 19896.64, "total_tokens": 139561728}
|
|
{"current_steps": 44355, "total_steps": 78105, "loss": 0.2324, "lr": 2.344125435195629e-06, "epoch": 2.839446898405992, "percentage": 56.79, "elapsed_time": "1:56:54", "remaining_time": "1:28:57", "throughput": 19896.9, "total_tokens": 139576256}
|
|
{"current_steps": 44360, "total_steps": 78105, "loss": 0.1874, "lr": 2.343567873681894e-06, "epoch": 2.839766980346969, "percentage": 56.8, "elapsed_time": "1:56:55", "remaining_time": "1:28:56", "throughput": 19897.14, "total_tokens": 139590464}
|
|
{"current_steps": 44365, "total_steps": 78105, "loss": 0.2494, "lr": 2.343010319979563e-06, "epoch": 2.840087062287946, "percentage": 56.8, "elapsed_time": "1:56:56", "remaining_time": "1:28:55", "throughput": 19897.41, "total_tokens": 139605248}
|
|
{"current_steps": 44370, "total_steps": 78105, "loss": 0.2666, "lr": 2.3424527741164798e-06, "epoch": 2.8404071442289225, "percentage": 56.81, "elapsed_time": "1:56:56", "remaining_time": "1:28:55", "throughput": 19897.79, "total_tokens": 139621504}
|
|
{"current_steps": 44375, "total_steps": 78105, "loss": 0.3019, "lr": 2.3418952361204837e-06, "epoch": 2.8407272261698995, "percentage": 56.81, "elapsed_time": "1:56:57", "remaining_time": "1:28:54", "throughput": 19898.05, "total_tokens": 139636416}
|
|
{"current_steps": 44380, "total_steps": 78105, "loss": 0.2298, "lr": 2.341337706019417e-06, "epoch": 2.8410473081108765, "percentage": 56.82, "elapsed_time": "1:56:58", "remaining_time": "1:28:53", "throughput": 19898.39, "total_tokens": 139652480}
|
|
{"current_steps": 44385, "total_steps": 78105, "loss": 0.1877, "lr": 2.340780183841118e-06, "epoch": 2.841367390051853, "percentage": 56.83, "elapsed_time": "1:56:58", "remaining_time": "1:28:52", "throughput": 19898.67, "total_tokens": 139667584}
|
|
{"current_steps": 44390, "total_steps": 78105, "loss": 0.1671, "lr": 2.340222669613427e-06, "epoch": 2.84168747199283, "percentage": 56.83, "elapsed_time": "1:56:59", "remaining_time": "1:28:51", "throughput": 19898.97, "total_tokens": 139682816}
|
|
{"current_steps": 44395, "total_steps": 78105, "loss": 0.2568, "lr": 2.3396651633641854e-06, "epoch": 2.842007553933807, "percentage": 56.84, "elapsed_time": "1:57:00", "remaining_time": "1:28:50", "throughput": 19899.25, "total_tokens": 139697792}
|
|
{"current_steps": 44400, "total_steps": 78105, "loss": 0.2184, "lr": 2.3391076651212286e-06, "epoch": 2.842327635874784, "percentage": 56.85, "elapsed_time": "1:57:00", "remaining_time": "1:28:49", "throughput": 19899.64, "total_tokens": 139714496}
|
|
{"current_steps": 44405, "total_steps": 78105, "loss": 0.1701, "lr": 2.338550174912398e-06, "epoch": 2.842647717815761, "percentage": 56.85, "elapsed_time": "1:57:01", "remaining_time": "1:28:48", "throughput": 19899.97, "total_tokens": 139730432}
|
|
{"current_steps": 44410, "total_steps": 78105, "loss": 0.2167, "lr": 2.33799269276553e-06, "epoch": 2.842967799756738, "percentage": 56.86, "elapsed_time": "1:57:02", "remaining_time": "1:28:47", "throughput": 19900.27, "total_tokens": 139745600}
|
|
{"current_steps": 44415, "total_steps": 78105, "loss": 0.2717, "lr": 2.3374352187084638e-06, "epoch": 2.8432878816977145, "percentage": 56.87, "elapsed_time": "1:57:02", "remaining_time": "1:28:47", "throughput": 19900.59, "total_tokens": 139761152}
|
|
{"current_steps": 44420, "total_steps": 78105, "loss": 0.215, "lr": 2.336877752769035e-06, "epoch": 2.8436079636386915, "percentage": 56.87, "elapsed_time": "1:57:03", "remaining_time": "1:28:46", "throughput": 19900.89, "total_tokens": 139776512}
|
|
{"current_steps": 44425, "total_steps": 78105, "loss": 0.2383, "lr": 2.336320294975082e-06, "epoch": 2.8439280455796685, "percentage": 56.88, "elapsed_time": "1:57:04", "remaining_time": "1:28:45", "throughput": 19901.19, "total_tokens": 139791936}
|
|
{"current_steps": 44430, "total_steps": 78105, "loss": 0.1699, "lr": 2.3357628453544413e-06, "epoch": 2.844248127520645, "percentage": 56.88, "elapsed_time": "1:57:04", "remaining_time": "1:28:44", "throughput": 19901.48, "total_tokens": 139807360}
|
|
{"current_steps": 44435, "total_steps": 78105, "loss": 0.3171, "lr": 2.3352054039349485e-06, "epoch": 2.844568209461622, "percentage": 56.89, "elapsed_time": "1:57:05", "remaining_time": "1:28:43", "throughput": 19901.78, "total_tokens": 139822400}
|
|
{"current_steps": 44440, "total_steps": 78105, "loss": 0.1592, "lr": 2.334647970744439e-06, "epoch": 2.844888291402599, "percentage": 56.9, "elapsed_time": "1:57:06", "remaining_time": "1:28:42", "throughput": 19902.14, "total_tokens": 139838784}
|
|
{"current_steps": 44445, "total_steps": 78105, "loss": 0.183, "lr": 2.3340905458107483e-06, "epoch": 2.845208373343576, "percentage": 56.9, "elapsed_time": "1:57:07", "remaining_time": "1:28:41", "throughput": 19902.54, "total_tokens": 139856000}
|
|
{"current_steps": 44450, "total_steps": 78105, "loss": 0.1716, "lr": 2.333533129161713e-06, "epoch": 2.845528455284553, "percentage": 56.91, "elapsed_time": "1:57:07", "remaining_time": "1:28:40", "throughput": 19902.83, "total_tokens": 139871296}
|
|
{"current_steps": 44455, "total_steps": 78105, "loss": 0.5357, "lr": 2.332975720825164e-06, "epoch": 2.84584853722553, "percentage": 56.92, "elapsed_time": "1:57:08", "remaining_time": "1:28:40", "throughput": 19903.13, "total_tokens": 139886528}
|
|
{"current_steps": 44460, "total_steps": 78105, "loss": 0.2261, "lr": 2.3324183208289393e-06, "epoch": 2.8461686191665065, "percentage": 56.92, "elapsed_time": "1:57:09", "remaining_time": "1:28:39", "throughput": 19903.52, "total_tokens": 139903168}
|
|
{"current_steps": 44465, "total_steps": 78105, "loss": 0.244, "lr": 2.3318609292008697e-06, "epoch": 2.8464887011074835, "percentage": 56.93, "elapsed_time": "1:57:09", "remaining_time": "1:28:38", "throughput": 19903.84, "total_tokens": 139918848}
|
|
{"current_steps": 44470, "total_steps": 78105, "loss": 0.3307, "lr": 2.3313035459687906e-06, "epoch": 2.8468087830484605, "percentage": 56.94, "elapsed_time": "1:57:10", "remaining_time": "1:28:37", "throughput": 19904.17, "total_tokens": 139934976}
|
|
{"current_steps": 44475, "total_steps": 78105, "loss": 0.1675, "lr": 2.330746171160533e-06, "epoch": 2.847128864989437, "percentage": 56.94, "elapsed_time": "1:57:11", "remaining_time": "1:28:36", "throughput": 19904.56, "total_tokens": 139951616}
|
|
{"current_steps": 44480, "total_steps": 78105, "loss": 0.1356, "lr": 2.3301888048039306e-06, "epoch": 2.847448946930414, "percentage": 56.95, "elapsed_time": "1:57:11", "remaining_time": "1:28:35", "throughput": 19904.93, "total_tokens": 139968256}
|
|
{"current_steps": 44485, "total_steps": 78105, "loss": 0.1824, "lr": 2.3296314469268138e-06, "epoch": 2.847769028871391, "percentage": 56.96, "elapsed_time": "1:57:12", "remaining_time": "1:28:34", "throughput": 19905.24, "total_tokens": 139983616}
|
|
{"current_steps": 44490, "total_steps": 78105, "loss": 0.1731, "lr": 2.3290740975570155e-06, "epoch": 2.848089110812368, "percentage": 56.96, "elapsed_time": "1:57:13", "remaining_time": "1:28:34", "throughput": 19905.57, "total_tokens": 139999232}
|
|
{"current_steps": 44495, "total_steps": 78105, "loss": 0.2596, "lr": 2.3285167567223662e-06, "epoch": 2.848409192753345, "percentage": 56.97, "elapsed_time": "1:57:13", "remaining_time": "1:28:33", "throughput": 19905.87, "total_tokens": 140014400}
|
|
{"current_steps": 44500, "total_steps": 78105, "loss": 0.2312, "lr": 2.327959424450698e-06, "epoch": 2.848729274694322, "percentage": 56.97, "elapsed_time": "1:57:14", "remaining_time": "1:28:32", "throughput": 19906.22, "total_tokens": 140030528}
|
|
{"current_steps": 44505, "total_steps": 78105, "loss": 0.2017, "lr": 2.3274021007698385e-06, "epoch": 2.8490493566352986, "percentage": 56.98, "elapsed_time": "1:57:15", "remaining_time": "1:28:31", "throughput": 19906.53, "total_tokens": 140046208}
|
|
{"current_steps": 44510, "total_steps": 78105, "loss": 0.2373, "lr": 2.3268447857076194e-06, "epoch": 2.8493694385762756, "percentage": 56.99, "elapsed_time": "1:57:15", "remaining_time": "1:28:30", "throughput": 19906.92, "total_tokens": 140063296}
|
|
{"current_steps": 44515, "total_steps": 78105, "loss": 0.3689, "lr": 2.3262874792918703e-06, "epoch": 2.8496895205172526, "percentage": 56.99, "elapsed_time": "1:57:16", "remaining_time": "1:28:29", "throughput": 19907.18, "total_tokens": 140077888}
|
|
{"current_steps": 44520, "total_steps": 78105, "loss": 0.1692, "lr": 2.325730181550419e-06, "epoch": 2.850009602458229, "percentage": 57.0, "elapsed_time": "1:57:17", "remaining_time": "1:28:28", "throughput": 19907.54, "total_tokens": 140094208}
|
|
{"current_steps": 44525, "total_steps": 78105, "loss": 0.2089, "lr": 2.325172892511096e-06, "epoch": 2.850329684399206, "percentage": 57.01, "elapsed_time": "1:57:17", "remaining_time": "1:28:27", "throughput": 19908.0, "total_tokens": 140112000}
|
|
{"current_steps": 44530, "total_steps": 78105, "loss": 0.258, "lr": 2.3246156122017264e-06, "epoch": 2.850649766340183, "percentage": 57.01, "elapsed_time": "1:57:18", "remaining_time": "1:28:27", "throughput": 19908.32, "total_tokens": 140127552}
|
|
{"current_steps": 44535, "total_steps": 78105, "loss": 0.2803, "lr": 2.3240583406501414e-06, "epoch": 2.85096984828116, "percentage": 57.02, "elapsed_time": "1:57:19", "remaining_time": "1:28:26", "throughput": 19908.67, "total_tokens": 140143552}
|
|
{"current_steps": 44540, "total_steps": 78105, "loss": 0.321, "lr": 2.323501077884165e-06, "epoch": 2.851289930222137, "percentage": 57.03, "elapsed_time": "1:57:19", "remaining_time": "1:28:25", "throughput": 19908.99, "total_tokens": 140158720}
|
|
{"current_steps": 44545, "total_steps": 78105, "loss": 0.1971, "lr": 2.3229438239316258e-06, "epoch": 2.8516100121631136, "percentage": 57.03, "elapsed_time": "1:57:20", "remaining_time": "1:28:24", "throughput": 19909.32, "total_tokens": 140174784}
|
|
{"current_steps": 44550, "total_steps": 78105, "loss": 0.1966, "lr": 2.3223865788203495e-06, "epoch": 2.8519300941040906, "percentage": 57.04, "elapsed_time": "1:57:21", "remaining_time": "1:28:23", "throughput": 19909.66, "total_tokens": 140190720}
|
|
{"current_steps": 44555, "total_steps": 78105, "loss": 0.2565, "lr": 2.3218293425781634e-06, "epoch": 2.8522501760450676, "percentage": 57.05, "elapsed_time": "1:57:22", "remaining_time": "1:28:22", "throughput": 19909.97, "total_tokens": 140206400}
|
|
{"current_steps": 44560, "total_steps": 78105, "loss": 0.2011, "lr": 2.3212721152328907e-06, "epoch": 2.8525702579860446, "percentage": 57.05, "elapsed_time": "1:57:22", "remaining_time": "1:28:21", "throughput": 19910.25, "total_tokens": 140221120}
|
|
{"current_steps": 44565, "total_steps": 78105, "loss": 0.2197, "lr": 2.320714896812358e-06, "epoch": 2.852890339927021, "percentage": 57.06, "elapsed_time": "1:57:23", "remaining_time": "1:28:20", "throughput": 19910.57, "total_tokens": 140236800}
|
|
{"current_steps": 44570, "total_steps": 78105, "loss": 0.2038, "lr": 2.32015768734439e-06, "epoch": 2.853210421867998, "percentage": 57.06, "elapsed_time": "1:57:23", "remaining_time": "1:28:19", "throughput": 19910.86, "total_tokens": 140251968}
|
|
{"current_steps": 44575, "total_steps": 78105, "loss": 0.3535, "lr": 2.319600486856809e-06, "epoch": 2.853530503808975, "percentage": 57.07, "elapsed_time": "1:57:24", "remaining_time": "1:28:19", "throughput": 19911.14, "total_tokens": 140267136}
|
|
{"current_steps": 44580, "total_steps": 78105, "loss": 0.2445, "lr": 2.319043295377441e-06, "epoch": 2.853850585749952, "percentage": 57.08, "elapsed_time": "1:57:25", "remaining_time": "1:28:18", "throughput": 19911.4, "total_tokens": 140281792}
|
|
{"current_steps": 44585, "total_steps": 78105, "loss": 0.1777, "lr": 2.318486112934108e-06, "epoch": 2.854170667690929, "percentage": 57.08, "elapsed_time": "1:57:25", "remaining_time": "1:28:17", "throughput": 19911.68, "total_tokens": 140296832}
|
|
{"current_steps": 44590, "total_steps": 78105, "loss": 0.1406, "lr": 2.317928939554634e-06, "epoch": 2.8544907496319056, "percentage": 57.09, "elapsed_time": "1:57:26", "remaining_time": "1:28:16", "throughput": 19911.98, "total_tokens": 140312064}
|
|
{"current_steps": 44595, "total_steps": 78105, "loss": 0.2001, "lr": 2.317371775266839e-06, "epoch": 2.8548108315728826, "percentage": 57.1, "elapsed_time": "1:57:27", "remaining_time": "1:28:15", "throughput": 19912.26, "total_tokens": 140327360}
|
|
{"current_steps": 44600, "total_steps": 78105, "loss": 0.1539, "lr": 2.316814620098547e-06, "epoch": 2.8551309135138596, "percentage": 57.1, "elapsed_time": "1:57:27", "remaining_time": "1:28:14", "throughput": 19912.59, "total_tokens": 140343296}
|
|
{"current_steps": 44605, "total_steps": 78105, "loss": 0.2062, "lr": 2.316257474077578e-06, "epoch": 2.8554509954548366, "percentage": 57.11, "elapsed_time": "1:57:28", "remaining_time": "1:28:13", "throughput": 19912.92, "total_tokens": 140359232}
|
|
{"current_steps": 44610, "total_steps": 78105, "loss": 0.2805, "lr": 2.315700337231755e-06, "epoch": 2.855771077395813, "percentage": 57.12, "elapsed_time": "1:57:29", "remaining_time": "1:28:12", "throughput": 19913.28, "total_tokens": 140375552}
|
|
{"current_steps": 44615, "total_steps": 78105, "loss": 0.1845, "lr": 2.315143209588896e-06, "epoch": 2.85609115933679, "percentage": 57.12, "elapsed_time": "1:57:30", "remaining_time": "1:28:12", "throughput": 19913.67, "total_tokens": 140392576}
|
|
{"current_steps": 44620, "total_steps": 78105, "loss": 0.1562, "lr": 2.314586091176822e-06, "epoch": 2.856411241277767, "percentage": 57.13, "elapsed_time": "1:57:30", "remaining_time": "1:28:11", "throughput": 19913.97, "total_tokens": 140408256}
|
|
{"current_steps": 44625, "total_steps": 78105, "loss": 0.233, "lr": 2.314028982023354e-06, "epoch": 2.856731323218744, "percentage": 57.13, "elapsed_time": "1:57:31", "remaining_time": "1:28:10", "throughput": 19914.29, "total_tokens": 140424128}
|
|
{"current_steps": 44630, "total_steps": 78105, "loss": 0.2935, "lr": 2.313471882156309e-06, "epoch": 2.857051405159721, "percentage": 57.14, "elapsed_time": "1:57:32", "remaining_time": "1:28:09", "throughput": 19914.58, "total_tokens": 140439168}
|
|
{"current_steps": 44635, "total_steps": 78105, "loss": 0.1529, "lr": 2.312914791603507e-06, "epoch": 2.8573714871006977, "percentage": 57.15, "elapsed_time": "1:57:32", "remaining_time": "1:28:08", "throughput": 19914.91, "total_tokens": 140455360}
|
|
{"current_steps": 44640, "total_steps": 78105, "loss": 0.2748, "lr": 2.3123577103927656e-06, "epoch": 2.8576915690416747, "percentage": 57.15, "elapsed_time": "1:57:33", "remaining_time": "1:28:07", "throughput": 19915.27, "total_tokens": 140471872}
|
|
{"current_steps": 44645, "total_steps": 78105, "loss": 0.2308, "lr": 2.311800638551904e-06, "epoch": 2.8580116509826516, "percentage": 57.16, "elapsed_time": "1:57:34", "remaining_time": "1:28:06", "throughput": 19915.53, "total_tokens": 140486592}
|
|
{"current_steps": 44650, "total_steps": 78105, "loss": 0.219, "lr": 2.3112435761087367e-06, "epoch": 2.858331732923628, "percentage": 57.17, "elapsed_time": "1:57:34", "remaining_time": "1:28:05", "throughput": 19915.77, "total_tokens": 140500864}
|
|
{"current_steps": 44655, "total_steps": 78105, "loss": 0.2197, "lr": 2.3106865230910824e-06, "epoch": 2.858651814864605, "percentage": 57.17, "elapsed_time": "1:57:35", "remaining_time": "1:28:05", "throughput": 19916.13, "total_tokens": 140517056}
|
|
{"current_steps": 44660, "total_steps": 78105, "loss": 0.1609, "lr": 2.3101294795267567e-06, "epoch": 2.858971896805582, "percentage": 57.18, "elapsed_time": "1:57:36", "remaining_time": "1:28:04", "throughput": 19916.35, "total_tokens": 140531264}
|
|
{"current_steps": 44665, "total_steps": 78105, "loss": 0.1922, "lr": 2.309572445443577e-06, "epoch": 2.859291978746559, "percentage": 57.19, "elapsed_time": "1:57:36", "remaining_time": "1:28:03", "throughput": 19916.67, "total_tokens": 140547136}
|
|
{"current_steps": 44670, "total_steps": 78105, "loss": 0.2598, "lr": 2.309015420869356e-06, "epoch": 2.859612060687536, "percentage": 57.19, "elapsed_time": "1:57:37", "remaining_time": "1:28:02", "throughput": 19916.97, "total_tokens": 140562304}
|
|
{"current_steps": 44675, "total_steps": 78105, "loss": 0.265, "lr": 2.3084584058319115e-06, "epoch": 2.859932142628513, "percentage": 57.2, "elapsed_time": "1:57:38", "remaining_time": "1:28:01", "throughput": 19917.28, "total_tokens": 140578176}
|
|
{"current_steps": 44680, "total_steps": 78105, "loss": 0.1118, "lr": 2.3079014003590555e-06, "epoch": 2.8602522245694897, "percentage": 57.21, "elapsed_time": "1:57:38", "remaining_time": "1:28:00", "throughput": 19917.58, "total_tokens": 140593600}
|
|
{"current_steps": 44685, "total_steps": 78105, "loss": 0.2334, "lr": 2.3073444044786025e-06, "epoch": 2.8605723065104667, "percentage": 57.21, "elapsed_time": "1:57:39", "remaining_time": "1:27:59", "throughput": 19917.91, "total_tokens": 140609472}
|
|
{"current_steps": 44690, "total_steps": 78105, "loss": 0.1526, "lr": 2.3067874182183676e-06, "epoch": 2.8608923884514437, "percentage": 57.22, "elapsed_time": "1:57:40", "remaining_time": "1:27:58", "throughput": 19918.24, "total_tokens": 140624960}
|
|
{"current_steps": 44695, "total_steps": 78105, "loss": 0.263, "lr": 2.3062304416061616e-06, "epoch": 2.86121247039242, "percentage": 57.22, "elapsed_time": "1:57:40", "remaining_time": "1:27:58", "throughput": 19918.54, "total_tokens": 140640512}
|
|
{"current_steps": 44700, "total_steps": 78105, "loss": 0.2402, "lr": 2.3056734746697997e-06, "epoch": 2.861532552333397, "percentage": 57.23, "elapsed_time": "1:57:41", "remaining_time": "1:27:57", "throughput": 19918.93, "total_tokens": 140657408}
|
|
{"current_steps": 44705, "total_steps": 78105, "loss": 0.2162, "lr": 2.3051165174370904e-06, "epoch": 2.861852634274374, "percentage": 57.24, "elapsed_time": "1:57:42", "remaining_time": "1:27:56", "throughput": 19919.33, "total_tokens": 140674624}
|
|
{"current_steps": 44710, "total_steps": 78105, "loss": 0.2667, "lr": 2.3045595699358487e-06, "epoch": 2.862172716215351, "percentage": 57.24, "elapsed_time": "1:57:42", "remaining_time": "1:27:55", "throughput": 19919.65, "total_tokens": 140690368}
|
|
{"current_steps": 44715, "total_steps": 78105, "loss": 0.2578, "lr": 2.3040026321938826e-06, "epoch": 2.862492798156328, "percentage": 57.25, "elapsed_time": "1:57:43", "remaining_time": "1:27:54", "throughput": 19919.99, "total_tokens": 140706304}
|
|
{"current_steps": 44720, "total_steps": 78105, "loss": 0.2047, "lr": 2.303445704239005e-06, "epoch": 2.862812880097305, "percentage": 57.26, "elapsed_time": "1:57:44", "remaining_time": "1:27:53", "throughput": 19920.29, "total_tokens": 140721920}
|
|
{"current_steps": 44725, "total_steps": 78105, "loss": 0.2405, "lr": 2.3028887860990247e-06, "epoch": 2.8631329620382817, "percentage": 57.26, "elapsed_time": "1:57:44", "remaining_time": "1:27:52", "throughput": 19920.66, "total_tokens": 140738240}
|
|
{"current_steps": 44730, "total_steps": 78105, "loss": 0.2576, "lr": 2.3023318778017526e-06, "epoch": 2.8634530439792587, "percentage": 57.27, "elapsed_time": "1:57:45", "remaining_time": "1:27:51", "throughput": 19921.06, "total_tokens": 140755200}
|
|
{"current_steps": 44735, "total_steps": 78105, "loss": 0.2057, "lr": 2.3017749793749956e-06, "epoch": 2.8637731259202357, "percentage": 57.28, "elapsed_time": "1:57:46", "remaining_time": "1:27:51", "throughput": 19921.36, "total_tokens": 140770624}
|
|
{"current_steps": 44740, "total_steps": 78105, "loss": 0.2664, "lr": 2.301218090846564e-06, "epoch": 2.8640932078612122, "percentage": 57.28, "elapsed_time": "1:57:47", "remaining_time": "1:27:50", "throughput": 19921.74, "total_tokens": 140787392}
|
|
{"current_steps": 44745, "total_steps": 78105, "loss": 0.2434, "lr": 2.300661212244266e-06, "epoch": 2.8644132898021892, "percentage": 57.29, "elapsed_time": "1:57:47", "remaining_time": "1:27:49", "throughput": 19922.09, "total_tokens": 140803584}
|
|
{"current_steps": 44750, "total_steps": 78105, "loss": 0.1918, "lr": 2.300104343595908e-06, "epoch": 2.8647333717431662, "percentage": 57.29, "elapsed_time": "1:57:48", "remaining_time": "1:27:48", "throughput": 19922.43, "total_tokens": 140819776}
|
|
{"current_steps": 44755, "total_steps": 78105, "loss": 0.2781, "lr": 2.299547484929299e-06, "epoch": 2.865053453684143, "percentage": 57.3, "elapsed_time": "1:57:49", "remaining_time": "1:27:47", "throughput": 19922.78, "total_tokens": 140836224}
|
|
{"current_steps": 44760, "total_steps": 78105, "loss": 0.2081, "lr": 2.2989906362722434e-06, "epoch": 2.86537353562512, "percentage": 57.31, "elapsed_time": "1:57:49", "remaining_time": "1:27:46", "throughput": 19923.26, "total_tokens": 140854976}
|
|
{"current_steps": 44765, "total_steps": 78105, "loss": 0.2745, "lr": 2.29843379765255e-06, "epoch": 2.865693617566097, "percentage": 57.31, "elapsed_time": "1:57:50", "remaining_time": "1:27:45", "throughput": 19923.58, "total_tokens": 140870464}
|
|
{"current_steps": 44770, "total_steps": 78105, "loss": 0.2269, "lr": 2.297876969098021e-06, "epoch": 2.8660136995070737, "percentage": 57.32, "elapsed_time": "1:57:51", "remaining_time": "1:27:45", "throughput": 19923.89, "total_tokens": 140886016}
|
|
{"current_steps": 44775, "total_steps": 78105, "loss": 0.2441, "lr": 2.297320150636464e-06, "epoch": 2.8663337814480507, "percentage": 57.33, "elapsed_time": "1:57:51", "remaining_time": "1:27:44", "throughput": 19924.12, "total_tokens": 140900416}
|
|
{"current_steps": 44780, "total_steps": 78105, "loss": 0.3098, "lr": 2.2967633422956827e-06, "epoch": 2.8666538633890277, "percentage": 57.33, "elapsed_time": "1:57:52", "remaining_time": "1:27:43", "throughput": 19924.46, "total_tokens": 140916928}
|
|
{"current_steps": 44785, "total_steps": 78105, "loss": 0.2314, "lr": 2.2962065441034826e-06, "epoch": 2.8669739453300043, "percentage": 57.34, "elapsed_time": "1:57:53", "remaining_time": "1:27:42", "throughput": 19924.75, "total_tokens": 140932352}
|
|
{"current_steps": 44790, "total_steps": 78105, "loss": 0.2473, "lr": 2.2956497560876647e-06, "epoch": 2.8672940272709813, "percentage": 57.35, "elapsed_time": "1:57:53", "remaining_time": "1:27:41", "throughput": 19925.01, "total_tokens": 140947200}
|
|
{"current_steps": 44795, "total_steps": 78105, "loss": 0.1707, "lr": 2.295092978276034e-06, "epoch": 2.8676141092119583, "percentage": 57.35, "elapsed_time": "1:57:54", "remaining_time": "1:27:40", "throughput": 19925.35, "total_tokens": 140963584}
|
|
{"current_steps": 44800, "total_steps": 78105, "loss": 0.193, "lr": 2.2945362106963933e-06, "epoch": 2.8679341911529352, "percentage": 57.36, "elapsed_time": "1:57:55", "remaining_time": "1:27:39", "throughput": 19925.61, "total_tokens": 140978368}
|
|
{"current_steps": 44805, "total_steps": 78105, "loss": 0.1898, "lr": 2.293979453376543e-06, "epoch": 2.8682542730939122, "percentage": 57.37, "elapsed_time": "1:57:55", "remaining_time": "1:27:38", "throughput": 19925.92, "total_tokens": 140994048}
|
|
{"current_steps": 44810, "total_steps": 78105, "loss": 0.2355, "lr": 2.2934227063442866e-06, "epoch": 2.868574355034889, "percentage": 57.37, "elapsed_time": "1:57:56", "remaining_time": "1:27:38", "throughput": 19926.17, "total_tokens": 141008704}
|
|
{"current_steps": 44815, "total_steps": 78105, "loss": 0.2554, "lr": 2.2928659696274234e-06, "epoch": 2.8688944369758658, "percentage": 57.38, "elapsed_time": "1:57:57", "remaining_time": "1:27:37", "throughput": 19926.5, "total_tokens": 141024384}
|
|
{"current_steps": 44820, "total_steps": 78105, "loss": 0.183, "lr": 2.2923092432537556e-06, "epoch": 2.8692145189168428, "percentage": 57.38, "elapsed_time": "1:57:57", "remaining_time": "1:27:36", "throughput": 19926.89, "total_tokens": 141041344}
|
|
{"current_steps": 44825, "total_steps": 78105, "loss": 0.1648, "lr": 2.291752527251082e-06, "epoch": 2.8695346008578198, "percentage": 57.39, "elapsed_time": "1:57:58", "remaining_time": "1:27:35", "throughput": 19927.23, "total_tokens": 141057664}
|
|
{"current_steps": 44830, "total_steps": 78105, "loss": 0.2397, "lr": 2.2911958216472024e-06, "epoch": 2.8698546827987963, "percentage": 57.4, "elapsed_time": "1:57:59", "remaining_time": "1:27:34", "throughput": 19927.51, "total_tokens": 141072640}
|
|
{"current_steps": 44835, "total_steps": 78105, "loss": 0.1741, "lr": 2.2906391264699152e-06, "epoch": 2.8701747647397733, "percentage": 57.4, "elapsed_time": "1:57:59", "remaining_time": "1:27:33", "throughput": 19927.78, "total_tokens": 141088000}
|
|
{"current_steps": 44840, "total_steps": 78105, "loss": 0.1857, "lr": 2.290082441747021e-06, "epoch": 2.8704948466807503, "percentage": 57.41, "elapsed_time": "1:58:00", "remaining_time": "1:27:32", "throughput": 19928.09, "total_tokens": 141103936}
|
|
{"current_steps": 44845, "total_steps": 78105, "loss": 0.1053, "lr": 2.289525767506315e-06, "epoch": 2.8708149286217273, "percentage": 57.42, "elapsed_time": "1:58:01", "remaining_time": "1:27:32", "throughput": 19928.85, "total_tokens": 141132352}
|
|
{"current_steps": 44850, "total_steps": 78105, "loss": 0.2696, "lr": 2.288969103775597e-06, "epoch": 2.8711350105627043, "percentage": 57.42, "elapsed_time": "1:58:02", "remaining_time": "1:27:31", "throughput": 19929.15, "total_tokens": 141147520}
|
|
{"current_steps": 44855, "total_steps": 78105, "loss": 0.1419, "lr": 2.2884124505826617e-06, "epoch": 2.871455092503681, "percentage": 57.43, "elapsed_time": "1:58:03", "remaining_time": "1:27:30", "throughput": 19929.43, "total_tokens": 141163072}
|
|
{"current_steps": 44860, "total_steps": 78105, "loss": 0.2222, "lr": 2.2878558079553062e-06, "epoch": 2.871775174444658, "percentage": 57.44, "elapsed_time": "1:58:03", "remaining_time": "1:27:29", "throughput": 19929.72, "total_tokens": 141178304}
|
|
{"current_steps": 44865, "total_steps": 78105, "loss": 0.3013, "lr": 2.2872991759213277e-06, "epoch": 2.872095256385635, "percentage": 57.44, "elapsed_time": "1:58:04", "remaining_time": "1:27:28", "throughput": 19930.17, "total_tokens": 141196096}
|
|
{"current_steps": 44870, "total_steps": 78105, "loss": 0.2045, "lr": 2.2867425545085192e-06, "epoch": 2.8724153383266118, "percentage": 57.45, "elapsed_time": "1:58:05", "remaining_time": "1:27:27", "throughput": 19930.48, "total_tokens": 141211776}
|
|
{"current_steps": 44875, "total_steps": 78105, "loss": 0.2384, "lr": 2.286185943744678e-06, "epoch": 2.8727354202675883, "percentage": 57.45, "elapsed_time": "1:58:05", "remaining_time": "1:27:27", "throughput": 19930.79, "total_tokens": 141227328}
|
|
{"current_steps": 44880, "total_steps": 78105, "loss": 0.2071, "lr": 2.285629343657596e-06, "epoch": 2.8730555022085653, "percentage": 57.46, "elapsed_time": "1:58:06", "remaining_time": "1:27:26", "throughput": 19931.09, "total_tokens": 141242688}
|
|
{"current_steps": 44885, "total_steps": 78105, "loss": 0.2278, "lr": 2.285072754275069e-06, "epoch": 2.8733755841495423, "percentage": 57.47, "elapsed_time": "1:58:07", "remaining_time": "1:27:25", "throughput": 19931.37, "total_tokens": 141258048}
|
|
{"current_steps": 44890, "total_steps": 78105, "loss": 0.1792, "lr": 2.284516175624888e-06, "epoch": 2.8736956660905193, "percentage": 57.47, "elapsed_time": "1:58:07", "remaining_time": "1:27:24", "throughput": 19931.62, "total_tokens": 141272512}
|
|
{"current_steps": 44895, "total_steps": 78105, "loss": 0.327, "lr": 2.2839596077348483e-06, "epoch": 2.8740157480314963, "percentage": 57.48, "elapsed_time": "1:58:08", "remaining_time": "1:27:23", "throughput": 19931.95, "total_tokens": 141288640}
|
|
{"current_steps": 44900, "total_steps": 78105, "loss": 0.2094, "lr": 2.2834030506327394e-06, "epoch": 2.874335829972473, "percentage": 57.49, "elapsed_time": "1:58:09", "remaining_time": "1:27:22", "throughput": 19932.29, "total_tokens": 141304640}
|
|
{"current_steps": 44905, "total_steps": 78105, "loss": 0.2244, "lr": 2.2828465043463544e-06, "epoch": 2.87465591191345, "percentage": 57.49, "elapsed_time": "1:58:09", "remaining_time": "1:27:21", "throughput": 19932.74, "total_tokens": 141322560}
|
|
{"current_steps": 44910, "total_steps": 78105, "loss": 0.2849, "lr": 2.2822899689034832e-06, "epoch": 2.874975993854427, "percentage": 57.5, "elapsed_time": "1:58:10", "remaining_time": "1:27:21", "throughput": 19932.99, "total_tokens": 141337472}
|
|
{"current_steps": 44915, "total_steps": 78105, "loss": 0.2835, "lr": 2.2817334443319174e-06, "epoch": 2.8752960757954034, "percentage": 57.51, "elapsed_time": "1:58:11", "remaining_time": "1:27:20", "throughput": 19933.29, "total_tokens": 141352896}
|
|
{"current_steps": 44920, "total_steps": 78105, "loss": 0.2221, "lr": 2.2811769306594463e-06, "epoch": 2.8756161577363804, "percentage": 57.51, "elapsed_time": "1:58:11", "remaining_time": "1:27:19", "throughput": 19933.6, "total_tokens": 141368320}
|
|
{"current_steps": 44925, "total_steps": 78105, "loss": 0.1893, "lr": 2.280620427913859e-06, "epoch": 2.8759362396773573, "percentage": 57.52, "elapsed_time": "1:58:12", "remaining_time": "1:27:18", "throughput": 19933.91, "total_tokens": 141384000}
|
|
{"current_steps": 44930, "total_steps": 78105, "loss": 0.1827, "lr": 2.2800639361229464e-06, "epoch": 2.8762563216183343, "percentage": 57.53, "elapsed_time": "1:58:13", "remaining_time": "1:27:17", "throughput": 19934.21, "total_tokens": 141399040}
|
|
{"current_steps": 44935, "total_steps": 78105, "loss": 0.1448, "lr": 2.2795074553144936e-06, "epoch": 2.8765764035593113, "percentage": 57.53, "elapsed_time": "1:58:13", "remaining_time": "1:27:16", "throughput": 19934.57, "total_tokens": 141415680}
|
|
{"current_steps": 44940, "total_steps": 78105, "loss": 0.2351, "lr": 2.2789509855162912e-06, "epoch": 2.8768964855002883, "percentage": 57.54, "elapsed_time": "1:58:14", "remaining_time": "1:27:15", "throughput": 19934.93, "total_tokens": 141432064}
|
|
{"current_steps": 44945, "total_steps": 78105, "loss": 0.3497, "lr": 2.2783945267561246e-06, "epoch": 2.877216567441265, "percentage": 57.54, "elapsed_time": "1:58:15", "remaining_time": "1:27:14", "throughput": 19935.25, "total_tokens": 141447936}
|
|
{"current_steps": 44950, "total_steps": 78105, "loss": 0.1935, "lr": 2.277838079061781e-06, "epoch": 2.877536649382242, "percentage": 57.55, "elapsed_time": "1:58:16", "remaining_time": "1:27:14", "throughput": 19935.59, "total_tokens": 141463808}
|
|
{"current_steps": 44955, "total_steps": 78105, "loss": 0.2442, "lr": 2.2772816424610463e-06, "epoch": 2.877856731323219, "percentage": 57.56, "elapsed_time": "1:58:16", "remaining_time": "1:27:13", "throughput": 19935.93, "total_tokens": 141480256}
|
|
{"current_steps": 44960, "total_steps": 78105, "loss": 0.3129, "lr": 2.276725216981707e-06, "epoch": 2.8781768132641954, "percentage": 57.56, "elapsed_time": "1:58:17", "remaining_time": "1:27:12", "throughput": 19936.22, "total_tokens": 141495424}
|
|
{"current_steps": 44965, "total_steps": 78105, "loss": 0.3059, "lr": 2.2761688026515467e-06, "epoch": 2.8784968952051724, "percentage": 57.57, "elapsed_time": "1:58:18", "remaining_time": "1:27:11", "throughput": 19936.48, "total_tokens": 141510272}
|
|
{"current_steps": 44970, "total_steps": 78105, "loss": 0.1473, "lr": 2.27561239949835e-06, "epoch": 2.8788169771461494, "percentage": 57.58, "elapsed_time": "1:58:18", "remaining_time": "1:27:10", "throughput": 19936.76, "total_tokens": 141525312}
|
|
{"current_steps": 44975, "total_steps": 78105, "loss": 0.2226, "lr": 2.2750560075499025e-06, "epoch": 2.8791370590871264, "percentage": 57.58, "elapsed_time": "1:58:19", "remaining_time": "1:27:09", "throughput": 19937.09, "total_tokens": 141541312}
|
|
{"current_steps": 44980, "total_steps": 78105, "loss": 0.1887, "lr": 2.274499626833985e-06, "epoch": 2.8794571410281034, "percentage": 57.59, "elapsed_time": "1:58:20", "remaining_time": "1:27:08", "throughput": 19937.36, "total_tokens": 141556160}
|
|
{"current_steps": 44985, "total_steps": 78105, "loss": 0.191, "lr": 2.2739432573783832e-06, "epoch": 2.8797772229690803, "percentage": 57.6, "elapsed_time": "1:58:20", "remaining_time": "1:27:07", "throughput": 19937.65, "total_tokens": 141571648}
|
|
{"current_steps": 44990, "total_steps": 78105, "loss": 0.232, "lr": 2.273386899210876e-06, "epoch": 2.880097304910057, "percentage": 57.6, "elapsed_time": "1:58:21", "remaining_time": "1:27:06", "throughput": 19937.87, "total_tokens": 141585856}
|
|
{"current_steps": 44995, "total_steps": 78105, "loss": 0.1903, "lr": 2.2728305523592482e-06, "epoch": 2.880417386851034, "percentage": 57.61, "elapsed_time": "1:58:22", "remaining_time": "1:27:06", "throughput": 19938.13, "total_tokens": 141601152}
|
|
{"current_steps": 45000, "total_steps": 78105, "loss": 0.1379, "lr": 2.272274216851278e-06, "epoch": 2.880737468792011, "percentage": 57.61, "elapsed_time": "1:58:22", "remaining_time": "1:27:05", "throughput": 19938.44, "total_tokens": 141616896}
|
|
{"current_steps": 45005, "total_steps": 78105, "loss": 0.3167, "lr": 2.2717178927147483e-06, "epoch": 2.8810575507329874, "percentage": 57.62, "elapsed_time": "1:58:23", "remaining_time": "1:27:04", "throughput": 19938.75, "total_tokens": 141632320}
|
|
{"current_steps": 45010, "total_steps": 78105, "loss": 0.1945, "lr": 2.271161579977437e-06, "epoch": 2.8813776326739644, "percentage": 57.63, "elapsed_time": "1:58:24", "remaining_time": "1:27:03", "throughput": 19939.08, "total_tokens": 141648384}
|
|
{"current_steps": 45015, "total_steps": 78105, "loss": 0.2173, "lr": 2.2706052786671255e-06, "epoch": 2.8816977146149414, "percentage": 57.63, "elapsed_time": "1:58:24", "remaining_time": "1:27:02", "throughput": 19939.4, "total_tokens": 141664448}
|
|
{"current_steps": 45020, "total_steps": 78105, "loss": 0.2076, "lr": 2.2700489888115907e-06, "epoch": 2.8820177965559184, "percentage": 57.64, "elapsed_time": "1:58:25", "remaining_time": "1:27:01", "throughput": 19939.71, "total_tokens": 141680064}
|
|
{"current_steps": 45025, "total_steps": 78105, "loss": 0.293, "lr": 2.2694927104386115e-06, "epoch": 2.8823378784968954, "percentage": 57.65, "elapsed_time": "1:58:26", "remaining_time": "1:27:00", "throughput": 19940.06, "total_tokens": 141696448}
|
|
{"current_steps": 45030, "total_steps": 78105, "loss": 0.2419, "lr": 2.2689364435759665e-06, "epoch": 2.8826579604378724, "percentage": 57.65, "elapsed_time": "1:58:26", "remaining_time": "1:27:00", "throughput": 19940.35, "total_tokens": 141711744}
|
|
{"current_steps": 45035, "total_steps": 78105, "loss": 0.1816, "lr": 2.268380188251431e-06, "epoch": 2.882978042378849, "percentage": 57.66, "elapsed_time": "1:58:27", "remaining_time": "1:26:59", "throughput": 19940.7, "total_tokens": 141727616}
|
|
{"current_steps": 45040, "total_steps": 78105, "loss": 0.3126, "lr": 2.267823944492783e-06, "epoch": 2.883298124319826, "percentage": 57.67, "elapsed_time": "1:58:28", "remaining_time": "1:26:58", "throughput": 19941.03, "total_tokens": 141744000}
|
|
{"current_steps": 45045, "total_steps": 78105, "loss": 0.204, "lr": 2.267267712327797e-06, "epoch": 2.883618206260803, "percentage": 57.67, "elapsed_time": "1:58:28", "remaining_time": "1:26:57", "throughput": 19941.32, "total_tokens": 141759168}
|
|
{"current_steps": 45050, "total_steps": 78105, "loss": 0.248, "lr": 2.266711491784251e-06, "epoch": 2.8839382882017794, "percentage": 57.68, "elapsed_time": "1:58:29", "remaining_time": "1:26:56", "throughput": 19941.63, "total_tokens": 141774656}
|
|
{"current_steps": 45055, "total_steps": 78105, "loss": 0.2116, "lr": 2.266155282889916e-06, "epoch": 2.8842583701427564, "percentage": 57.69, "elapsed_time": "1:58:30", "remaining_time": "1:26:55", "throughput": 19941.99, "total_tokens": 141791296}
|
|
{"current_steps": 45060, "total_steps": 78105, "loss": 0.2342, "lr": 2.2655990856725695e-06, "epoch": 2.8845784520837334, "percentage": 57.69, "elapsed_time": "1:58:30", "remaining_time": "1:26:54", "throughput": 19942.21, "total_tokens": 141805504}
|
|
{"current_steps": 45065, "total_steps": 78105, "loss": 0.2994, "lr": 2.2650429001599826e-06, "epoch": 2.8848985340247104, "percentage": 57.7, "elapsed_time": "1:58:31", "remaining_time": "1:26:53", "throughput": 19942.55, "total_tokens": 141821632}
|
|
{"current_steps": 45070, "total_steps": 78105, "loss": 0.1775, "lr": 2.2644867263799307e-06, "epoch": 2.8852186159656874, "percentage": 57.7, "elapsed_time": "1:58:32", "remaining_time": "1:26:53", "throughput": 19942.84, "total_tokens": 141836736}
|
|
{"current_steps": 45075, "total_steps": 78105, "loss": 0.1947, "lr": 2.2639305643601838e-06, "epoch": 2.885538697906664, "percentage": 57.71, "elapsed_time": "1:58:32", "remaining_time": "1:26:52", "throughput": 19943.12, "total_tokens": 141852032}
|
|
{"current_steps": 45080, "total_steps": 78105, "loss": 0.2518, "lr": 2.263374414128516e-06, "epoch": 2.885858779847641, "percentage": 57.72, "elapsed_time": "1:58:33", "remaining_time": "1:26:51", "throughput": 19943.49, "total_tokens": 141869056}
|
|
{"current_steps": 45085, "total_steps": 78105, "loss": 0.1722, "lr": 2.262818275712696e-06, "epoch": 2.886178861788618, "percentage": 57.72, "elapsed_time": "1:58:34", "remaining_time": "1:26:50", "throughput": 19943.74, "total_tokens": 141883648}
|
|
{"current_steps": 45090, "total_steps": 78105, "loss": 0.3109, "lr": 2.2622621491404962e-06, "epoch": 2.886498943729595, "percentage": 57.73, "elapsed_time": "1:58:34", "remaining_time": "1:26:49", "throughput": 19944.1, "total_tokens": 141900352}
|
|
{"current_steps": 45095, "total_steps": 78105, "loss": 0.2064, "lr": 2.261706034439687e-06, "epoch": 2.8868190256705715, "percentage": 57.74, "elapsed_time": "1:58:35", "remaining_time": "1:26:48", "throughput": 19944.44, "total_tokens": 141916992}
|
|
{"current_steps": 45100, "total_steps": 78105, "loss": 0.1981, "lr": 2.2611499316380367e-06, "epoch": 2.8871391076115485, "percentage": 57.74, "elapsed_time": "1:58:36", "remaining_time": "1:26:47", "throughput": 19944.79, "total_tokens": 141933376}
|
|
{"current_steps": 45105, "total_steps": 78105, "loss": 0.1708, "lr": 2.2605938407633157e-06, "epoch": 2.8874591895525255, "percentage": 57.75, "elapsed_time": "1:58:36", "remaining_time": "1:26:46", "throughput": 19945.07, "total_tokens": 141948480}
|
|
{"current_steps": 45110, "total_steps": 78105, "loss": 0.2531, "lr": 2.26003776184329e-06, "epoch": 2.8877792714935024, "percentage": 57.76, "elapsed_time": "1:58:37", "remaining_time": "1:26:46", "throughput": 19945.38, "total_tokens": 141964224}
|
|
{"current_steps": 45115, "total_steps": 78105, "loss": 0.2189, "lr": 2.259481694905729e-06, "epoch": 2.8880993534344794, "percentage": 57.76, "elapsed_time": "1:58:38", "remaining_time": "1:26:45", "throughput": 19945.69, "total_tokens": 141980096}
|
|
{"current_steps": 45120, "total_steps": 78105, "loss": 0.2182, "lr": 2.2589256399783993e-06, "epoch": 2.888419435375456, "percentage": 57.77, "elapsed_time": "1:58:39", "remaining_time": "1:26:44", "throughput": 19946.04, "total_tokens": 141996800}
|
|
{"current_steps": 45125, "total_steps": 78105, "loss": 0.2882, "lr": 2.258369597089068e-06, "epoch": 2.888739517316433, "percentage": 57.77, "elapsed_time": "1:58:39", "remaining_time": "1:26:43", "throughput": 19946.31, "total_tokens": 142011584}
|
|
{"current_steps": 45130, "total_steps": 78105, "loss": 0.2331, "lr": 2.2578135662655e-06, "epoch": 2.88905959925741, "percentage": 57.78, "elapsed_time": "1:58:40", "remaining_time": "1:26:42", "throughput": 19946.68, "total_tokens": 142028672}
|
|
{"current_steps": 45135, "total_steps": 78105, "loss": 0.1424, "lr": 2.2572575475354614e-06, "epoch": 2.889379681198387, "percentage": 57.79, "elapsed_time": "1:58:41", "remaining_time": "1:26:41", "throughput": 19946.94, "total_tokens": 142043648}
|
|
{"current_steps": 45140, "total_steps": 78105, "loss": 0.2004, "lr": 2.256701540926716e-06, "epoch": 2.8896997631393635, "percentage": 57.79, "elapsed_time": "1:58:41", "remaining_time": "1:26:40", "throughput": 19947.19, "total_tokens": 142058240}
|
|
{"current_steps": 45145, "total_steps": 78105, "loss": 0.2502, "lr": 2.2561455464670274e-06, "epoch": 2.8900198450803405, "percentage": 57.8, "elapsed_time": "1:58:42", "remaining_time": "1:26:39", "throughput": 19947.46, "total_tokens": 142073216}
|
|
{"current_steps": 45150, "total_steps": 78105, "loss": 0.3251, "lr": 2.2555895641841614e-06, "epoch": 2.8903399270213175, "percentage": 57.81, "elapsed_time": "1:58:43", "remaining_time": "1:26:39", "throughput": 19947.69, "total_tokens": 142087488}
|
|
{"current_steps": 45155, "total_steps": 78105, "loss": 0.1674, "lr": 2.255033594105879e-06, "epoch": 2.8906600089622945, "percentage": 57.81, "elapsed_time": "1:58:43", "remaining_time": "1:26:38", "throughput": 19948.01, "total_tokens": 142103232}
|
|
{"current_steps": 45160, "total_steps": 78105, "loss": 0.3264, "lr": 2.2544776362599433e-06, "epoch": 2.8909800909032715, "percentage": 57.82, "elapsed_time": "1:58:44", "remaining_time": "1:26:37", "throughput": 19948.27, "total_tokens": 142118144}
|
|
{"current_steps": 45165, "total_steps": 78105, "loss": 0.1983, "lr": 2.253921690674115e-06, "epoch": 2.891300172844248, "percentage": 57.83, "elapsed_time": "1:58:45", "remaining_time": "1:26:36", "throughput": 19948.56, "total_tokens": 142133824}
|
|
{"current_steps": 45170, "total_steps": 78105, "loss": 0.3949, "lr": 2.2533657573761564e-06, "epoch": 2.891620254785225, "percentage": 57.83, "elapsed_time": "1:58:45", "remaining_time": "1:26:35", "throughput": 19948.87, "total_tokens": 142149568}
|
|
{"current_steps": 45175, "total_steps": 78105, "loss": 0.3101, "lr": 2.2528098363938262e-06, "epoch": 2.891940336726202, "percentage": 57.84, "elapsed_time": "1:58:46", "remaining_time": "1:26:34", "throughput": 19949.16, "total_tokens": 142164928}
|
|
{"current_steps": 45180, "total_steps": 78105, "loss": 0.1955, "lr": 2.252253927754886e-06, "epoch": 2.8922604186671785, "percentage": 57.85, "elapsed_time": "1:58:47", "remaining_time": "1:26:33", "throughput": 19949.5, "total_tokens": 142181120}
|
|
{"current_steps": 45185, "total_steps": 78105, "loss": 0.2241, "lr": 2.2516980314870936e-06, "epoch": 2.8925805006081555, "percentage": 57.85, "elapsed_time": "1:58:47", "remaining_time": "1:26:32", "throughput": 19949.8, "total_tokens": 142196544}
|
|
{"current_steps": 45190, "total_steps": 78105, "loss": 0.2419, "lr": 2.2511421476182096e-06, "epoch": 2.8929005825491325, "percentage": 57.86, "elapsed_time": "1:58:48", "remaining_time": "1:26:32", "throughput": 19950.08, "total_tokens": 142211648}
|
|
{"current_steps": 45195, "total_steps": 78105, "loss": 0.2536, "lr": 2.2505862761759888e-06, "epoch": 2.8932206644901095, "percentage": 57.86, "elapsed_time": "1:58:49", "remaining_time": "1:26:31", "throughput": 19950.41, "total_tokens": 142227712}
|
|
{"current_steps": 45200, "total_steps": 78105, "loss": 0.2748, "lr": 2.250030417188191e-06, "epoch": 2.8935407464310865, "percentage": 57.87, "elapsed_time": "1:58:49", "remaining_time": "1:26:30", "throughput": 19950.72, "total_tokens": 142243776}
|
|
{"current_steps": 45205, "total_steps": 78105, "loss": 0.2847, "lr": 2.2494745706825714e-06, "epoch": 2.8938608283720635, "percentage": 57.88, "elapsed_time": "1:58:50", "remaining_time": "1:26:29", "throughput": 19951.13, "total_tokens": 142261056}
|
|
{"current_steps": 45210, "total_steps": 78105, "loss": 0.2542, "lr": 2.248918736686887e-06, "epoch": 2.89418091031304, "percentage": 57.88, "elapsed_time": "1:58:51", "remaining_time": "1:26:28", "throughput": 19951.42, "total_tokens": 142276608}
|
|
{"current_steps": 45215, "total_steps": 78105, "loss": 0.2531, "lr": 2.248362915228894e-06, "epoch": 2.894500992254017, "percentage": 57.89, "elapsed_time": "1:58:51", "remaining_time": "1:26:27", "throughput": 19951.73, "total_tokens": 142292224}
|
|
{"current_steps": 45220, "total_steps": 78105, "loss": 0.1431, "lr": 2.2478071063363454e-06, "epoch": 2.894821074194994, "percentage": 57.9, "elapsed_time": "1:58:52", "remaining_time": "1:26:26", "throughput": 19952.03, "total_tokens": 142308032}
|
|
{"current_steps": 45225, "total_steps": 78105, "loss": 0.1981, "lr": 2.2472513100369974e-06, "epoch": 2.8951411561359706, "percentage": 57.9, "elapsed_time": "1:58:53", "remaining_time": "1:26:26", "throughput": 19952.37, "total_tokens": 142324096}
|
|
{"current_steps": 45230, "total_steps": 78105, "loss": 0.2682, "lr": 2.246695526358601e-06, "epoch": 2.8954612380769476, "percentage": 57.91, "elapsed_time": "1:58:53", "remaining_time": "1:26:25", "throughput": 19952.61, "total_tokens": 142338624}
|
|
{"current_steps": 45235, "total_steps": 78105, "loss": 0.1736, "lr": 2.2461397553289112e-06, "epoch": 2.8957813200179245, "percentage": 57.92, "elapsed_time": "1:58:54", "remaining_time": "1:26:24", "throughput": 19952.9, "total_tokens": 142354048}
|
|
{"current_steps": 45240, "total_steps": 78105, "loss": 0.1856, "lr": 2.2455839969756796e-06, "epoch": 2.8961014019589015, "percentage": 57.92, "elapsed_time": "1:58:55", "remaining_time": "1:26:23", "throughput": 19953.17, "total_tokens": 142369280}
|
|
{"current_steps": 45245, "total_steps": 78105, "loss": 0.1625, "lr": 2.2450282513266585e-06, "epoch": 2.8964214838998785, "percentage": 57.93, "elapsed_time": "1:58:55", "remaining_time": "1:26:22", "throughput": 19953.47, "total_tokens": 142385152}
|
|
{"current_steps": 45250, "total_steps": 78105, "loss": 0.3317, "lr": 2.244472518409598e-06, "epoch": 2.8967415658408555, "percentage": 57.93, "elapsed_time": "1:58:56", "remaining_time": "1:26:21", "throughput": 19953.77, "total_tokens": 142400576}
|
|
{"current_steps": 45255, "total_steps": 78105, "loss": 0.2302, "lr": 2.243916798252249e-06, "epoch": 2.897061647781832, "percentage": 57.94, "elapsed_time": "1:58:57", "remaining_time": "1:26:20", "throughput": 19954.21, "total_tokens": 142418368}
|
|
{"current_steps": 45260, "total_steps": 78105, "loss": 0.2291, "lr": 2.2433610908823607e-06, "epoch": 2.897381729722809, "percentage": 57.95, "elapsed_time": "1:58:57", "remaining_time": "1:26:19", "throughput": 19954.47, "total_tokens": 142433280}
|
|
{"current_steps": 45265, "total_steps": 78105, "loss": 0.1481, "lr": 2.2428053963276836e-06, "epoch": 2.897701811663786, "percentage": 57.95, "elapsed_time": "1:58:58", "remaining_time": "1:26:19", "throughput": 19954.92, "total_tokens": 142451392}
|
|
{"current_steps": 45270, "total_steps": 78105, "loss": 0.1916, "lr": 2.242249714615965e-06, "epoch": 2.8980218936047626, "percentage": 57.96, "elapsed_time": "1:58:59", "remaining_time": "1:26:18", "throughput": 19955.23, "total_tokens": 142466880}
|
|
{"current_steps": 45275, "total_steps": 78105, "loss": 0.2127, "lr": 2.2416940457749525e-06, "epoch": 2.8983419755457396, "percentage": 57.97, "elapsed_time": "1:59:00", "remaining_time": "1:26:17", "throughput": 19955.61, "total_tokens": 142483904}
|
|
{"current_steps": 45280, "total_steps": 78105, "loss": 0.226, "lr": 2.241138389832395e-06, "epoch": 2.8986620574867166, "percentage": 57.97, "elapsed_time": "1:59:00", "remaining_time": "1:26:16", "throughput": 19955.93, "total_tokens": 142499584}
|
|
{"current_steps": 45285, "total_steps": 78105, "loss": 0.2064, "lr": 2.240582746816037e-06, "epoch": 2.8989821394276936, "percentage": 57.98, "elapsed_time": "1:59:01", "remaining_time": "1:26:15", "throughput": 19956.28, "total_tokens": 142516032}
|
|
{"current_steps": 45290, "total_steps": 78105, "loss": 0.2315, "lr": 2.2400271167536262e-06, "epoch": 2.8993022213686706, "percentage": 57.99, "elapsed_time": "1:59:02", "remaining_time": "1:26:14", "throughput": 19956.67, "total_tokens": 142533312}
|
|
{"current_steps": 45295, "total_steps": 78105, "loss": 0.231, "lr": 2.2394714996729062e-06, "epoch": 2.8996223033096475, "percentage": 57.99, "elapsed_time": "1:59:02", "remaining_time": "1:26:13", "throughput": 19957.02, "total_tokens": 142549312}
|
|
{"current_steps": 45300, "total_steps": 78105, "loss": 0.2421, "lr": 2.2389158956016234e-06, "epoch": 2.899942385250624, "percentage": 58.0, "elapsed_time": "1:59:03", "remaining_time": "1:26:13", "throughput": 19957.33, "total_tokens": 142565184}
|
|
{"current_steps": 45305, "total_steps": 78105, "loss": 0.2441, "lr": 2.2383603045675197e-06, "epoch": 2.900262467191601, "percentage": 58.01, "elapsed_time": "1:59:04", "remaining_time": "1:26:12", "throughput": 19957.61, "total_tokens": 142580224}
|
|
{"current_steps": 45310, "total_steps": 78105, "loss": 0.151, "lr": 2.237804726598341e-06, "epoch": 2.900582549132578, "percentage": 58.01, "elapsed_time": "1:59:04", "remaining_time": "1:26:11", "throughput": 19957.93, "total_tokens": 142596288}
|
|
{"current_steps": 45315, "total_steps": 78105, "loss": 0.2503, "lr": 2.2372491617218274e-06, "epoch": 2.9009026310735546, "percentage": 58.02, "elapsed_time": "1:59:05", "remaining_time": "1:26:10", "throughput": 19958.24, "total_tokens": 142612032}
|
|
{"current_steps": 45320, "total_steps": 78105, "loss": 0.3537, "lr": 2.236693609965722e-06, "epoch": 2.9012227130145316, "percentage": 58.02, "elapsed_time": "1:59:06", "remaining_time": "1:26:09", "throughput": 19958.54, "total_tokens": 142627328}
|
|
{"current_steps": 45325, "total_steps": 78105, "loss": 0.1754, "lr": 2.236138071357766e-06, "epoch": 2.9015427949555086, "percentage": 58.03, "elapsed_time": "1:59:06", "remaining_time": "1:26:08", "throughput": 19958.86, "total_tokens": 142643264}
|
|
{"current_steps": 45330, "total_steps": 78105, "loss": 0.1899, "lr": 2.2355825459257006e-06, "epoch": 2.9018628768964856, "percentage": 58.04, "elapsed_time": "1:59:07", "remaining_time": "1:26:07", "throughput": 19959.17, "total_tokens": 142658688}
|
|
{"current_steps": 45335, "total_steps": 78105, "loss": 0.2643, "lr": 2.235027033697267e-06, "epoch": 2.9021829588374626, "percentage": 58.04, "elapsed_time": "1:59:08", "remaining_time": "1:26:07", "throughput": 19959.53, "total_tokens": 142675200}
|
|
{"current_steps": 45340, "total_steps": 78105, "loss": 0.3192, "lr": 2.2344715347002013e-06, "epoch": 2.902503040778439, "percentage": 58.05, "elapsed_time": "1:59:08", "remaining_time": "1:26:06", "throughput": 19959.95, "total_tokens": 142693120}
|
|
{"current_steps": 45345, "total_steps": 78105, "loss": 0.1802, "lr": 2.2339160489622448e-06, "epoch": 2.902823122719416, "percentage": 58.06, "elapsed_time": "1:59:09", "remaining_time": "1:26:05", "throughput": 19960.24, "total_tokens": 142708352}
|
|
{"current_steps": 45350, "total_steps": 78105, "loss": 0.1539, "lr": 2.233360576511134e-06, "epoch": 2.903143204660393, "percentage": 58.06, "elapsed_time": "1:59:10", "remaining_time": "1:26:04", "throughput": 19960.65, "total_tokens": 142725824}
|
|
{"current_steps": 45355, "total_steps": 78105, "loss": 0.2356, "lr": 2.232805117374609e-06, "epoch": 2.90346328660137, "percentage": 58.07, "elapsed_time": "1:59:11", "remaining_time": "1:26:03", "throughput": 19960.91, "total_tokens": 142741120}
|
|
{"current_steps": 45360, "total_steps": 78105, "loss": 0.2417, "lr": 2.232249671580403e-06, "epoch": 2.9037833685423466, "percentage": 58.08, "elapsed_time": "1:59:11", "remaining_time": "1:26:02", "throughput": 19961.21, "total_tokens": 142756928}
|
|
{"current_steps": 45365, "total_steps": 78105, "loss": 0.2518, "lr": 2.231694239156255e-06, "epoch": 2.9041034504833236, "percentage": 58.08, "elapsed_time": "1:59:12", "remaining_time": "1:26:01", "throughput": 19961.57, "total_tokens": 142773632}
|
|
{"current_steps": 45370, "total_steps": 78105, "loss": 0.2215, "lr": 2.231138820129898e-06, "epoch": 2.9044235324243006, "percentage": 58.09, "elapsed_time": "1:59:13", "remaining_time": "1:26:01", "throughput": 19961.88, "total_tokens": 142789056}
|
|
{"current_steps": 45375, "total_steps": 78105, "loss": 0.2013, "lr": 2.2305834145290683e-06, "epoch": 2.9047436143652776, "percentage": 58.09, "elapsed_time": "1:59:13", "remaining_time": "1:26:00", "throughput": 19962.19, "total_tokens": 142804672}
|
|
{"current_steps": 45380, "total_steps": 78105, "loss": 0.2605, "lr": 2.2300280223814994e-06, "epoch": 2.9050636963062546, "percentage": 58.1, "elapsed_time": "1:59:14", "remaining_time": "1:25:59", "throughput": 19962.59, "total_tokens": 142822016}
|
|
{"current_steps": 45385, "total_steps": 78105, "loss": 0.1667, "lr": 2.2294726437149246e-06, "epoch": 2.905383778247231, "percentage": 58.11, "elapsed_time": "1:59:15", "remaining_time": "1:25:58", "throughput": 19962.94, "total_tokens": 142838464}
|
|
{"current_steps": 45390, "total_steps": 78105, "loss": 0.2685, "lr": 2.2289172785570776e-06, "epoch": 2.905703860188208, "percentage": 58.11, "elapsed_time": "1:59:15", "remaining_time": "1:25:57", "throughput": 19963.28, "total_tokens": 142854848}
|
|
{"current_steps": 45395, "total_steps": 78105, "loss": 0.208, "lr": 2.2283619269356892e-06, "epoch": 2.906023942129185, "percentage": 58.12, "elapsed_time": "1:59:16", "remaining_time": "1:25:56", "throughput": 19963.6, "total_tokens": 142870912}
|
|
{"current_steps": 45400, "total_steps": 78105, "loss": 0.2718, "lr": 2.227806588878492e-06, "epoch": 2.906344024070162, "percentage": 58.13, "elapsed_time": "1:59:17", "remaining_time": "1:25:55", "throughput": 19963.86, "total_tokens": 142885760}
|
|
{"current_steps": 45405, "total_steps": 78105, "loss": 0.1724, "lr": 2.227251264413215e-06, "epoch": 2.9066641060111387, "percentage": 58.13, "elapsed_time": "1:59:17", "remaining_time": "1:25:55", "throughput": 19964.14, "total_tokens": 142901056}
|
|
{"current_steps": 45410, "total_steps": 78105, "loss": 0.1677, "lr": 2.2266959535675894e-06, "epoch": 2.9069841879521157, "percentage": 58.14, "elapsed_time": "1:59:18", "remaining_time": "1:25:54", "throughput": 19964.48, "total_tokens": 142917376}
|
|
{"current_steps": 45415, "total_steps": 78105, "loss": 0.3009, "lr": 2.226140656369344e-06, "epoch": 2.9073042698930927, "percentage": 58.15, "elapsed_time": "1:59:19", "remaining_time": "1:25:53", "throughput": 19964.8, "total_tokens": 142933312}
|
|
{"current_steps": 45420, "total_steps": 78105, "loss": 0.1602, "lr": 2.2255853728462086e-06, "epoch": 2.9076243518340696, "percentage": 58.15, "elapsed_time": "1:59:19", "remaining_time": "1:25:52", "throughput": 19965.14, "total_tokens": 142949440}
|
|
{"current_steps": 45425, "total_steps": 78105, "loss": 0.2475, "lr": 2.22503010302591e-06, "epoch": 2.9079444337750466, "percentage": 58.16, "elapsed_time": "1:59:20", "remaining_time": "1:25:51", "throughput": 19965.41, "total_tokens": 142964416}
|
|
{"current_steps": 45430, "total_steps": 78105, "loss": 0.1824, "lr": 2.2244748469361756e-06, "epoch": 2.908264515716023, "percentage": 58.17, "elapsed_time": "1:59:21", "remaining_time": "1:25:50", "throughput": 19965.72, "total_tokens": 142980480}
|
|
{"current_steps": 45435, "total_steps": 78105, "loss": 0.1742, "lr": 2.2239196046047315e-06, "epoch": 2.908584597657, "percentage": 58.17, "elapsed_time": "1:59:21", "remaining_time": "1:25:49", "throughput": 19966.03, "total_tokens": 142996352}
|
|
{"current_steps": 45440, "total_steps": 78105, "loss": 0.2532, "lr": 2.2233643760593047e-06, "epoch": 2.908904679597977, "percentage": 58.18, "elapsed_time": "1:59:22", "remaining_time": "1:25:48", "throughput": 19966.33, "total_tokens": 143011840}
|
|
{"current_steps": 45445, "total_steps": 78105, "loss": 0.2405, "lr": 2.222809161327621e-06, "epoch": 2.9092247615389537, "percentage": 58.18, "elapsed_time": "1:59:23", "remaining_time": "1:25:48", "throughput": 19966.64, "total_tokens": 143027712}
|
|
{"current_steps": 45450, "total_steps": 78105, "loss": 0.3159, "lr": 2.222253960437403e-06, "epoch": 2.9095448434799307, "percentage": 58.19, "elapsed_time": "1:59:24", "remaining_time": "1:25:47", "throughput": 19966.96, "total_tokens": 143043584}
|
|
{"current_steps": 45455, "total_steps": 78105, "loss": 0.3149, "lr": 2.221698773416376e-06, "epoch": 2.9098649254209077, "percentage": 58.2, "elapsed_time": "1:59:24", "remaining_time": "1:25:46", "throughput": 19967.33, "total_tokens": 143060288}
|
|
{"current_steps": 45460, "total_steps": 78105, "loss": 0.2105, "lr": 2.221143600292262e-06, "epoch": 2.9101850073618847, "percentage": 58.2, "elapsed_time": "1:59:25", "remaining_time": "1:25:45", "throughput": 19967.59, "total_tokens": 143074816}
|
|
{"current_steps": 45465, "total_steps": 78105, "loss": 0.1477, "lr": 2.2205884410927847e-06, "epoch": 2.9105050893028617, "percentage": 58.21, "elapsed_time": "1:59:26", "remaining_time": "1:25:44", "throughput": 19967.91, "total_tokens": 143090816}
|
|
{"current_steps": 45470, "total_steps": 78105, "loss": 0.1625, "lr": 2.2200332958456647e-06, "epoch": 2.9108251712438387, "percentage": 58.22, "elapsed_time": "1:59:26", "remaining_time": "1:25:43", "throughput": 19968.23, "total_tokens": 143106752}
|
|
{"current_steps": 45475, "total_steps": 78105, "loss": 0.2599, "lr": 2.2194781645786244e-06, "epoch": 2.911145253184815, "percentage": 58.22, "elapsed_time": "1:59:27", "remaining_time": "1:25:42", "throughput": 19968.66, "total_tokens": 143124672}
|
|
{"current_steps": 45480, "total_steps": 78105, "loss": 0.1547, "lr": 2.2189230473193826e-06, "epoch": 2.911465335125792, "percentage": 58.23, "elapsed_time": "1:59:28", "remaining_time": "1:25:42", "throughput": 19969.04, "total_tokens": 143141440}
|
|
{"current_steps": 45485, "total_steps": 78105, "loss": 0.2389, "lr": 2.21836794409566e-06, "epoch": 2.911785417066769, "percentage": 58.24, "elapsed_time": "1:59:28", "remaining_time": "1:25:41", "throughput": 19969.34, "total_tokens": 143157248}
|
|
{"current_steps": 45490, "total_steps": 78105, "loss": 0.2098, "lr": 2.217812854935175e-06, "epoch": 2.9121054990077457, "percentage": 58.24, "elapsed_time": "1:59:29", "remaining_time": "1:25:40", "throughput": 19969.67, "total_tokens": 143173440}
|
|
{"current_steps": 45495, "total_steps": 78105, "loss": 0.2594, "lr": 2.2172577798656467e-06, "epoch": 2.9124255809487227, "percentage": 58.25, "elapsed_time": "1:59:30", "remaining_time": "1:25:39", "throughput": 19969.98, "total_tokens": 143188992}
|
|
{"current_steps": 45500, "total_steps": 78105, "loss": 0.1787, "lr": 2.216702718914792e-06, "epoch": 2.9127456628896997, "percentage": 58.25, "elapsed_time": "1:59:30", "remaining_time": "1:25:38", "throughput": 19970.33, "total_tokens": 143205440}
|
|
{"current_steps": 45505, "total_steps": 78105, "loss": 0.1835, "lr": 2.2161476721103277e-06, "epoch": 2.9130657448306767, "percentage": 58.26, "elapsed_time": "1:59:31", "remaining_time": "1:25:37", "throughput": 19970.53, "total_tokens": 143219328}
|
|
{"current_steps": 45510, "total_steps": 78105, "loss": 0.2448, "lr": 2.2155926394799705e-06, "epoch": 2.9133858267716537, "percentage": 58.27, "elapsed_time": "1:59:32", "remaining_time": "1:25:36", "throughput": 19970.82, "total_tokens": 143234816}
|
|
{"current_steps": 45515, "total_steps": 78105, "loss": 0.3062, "lr": 2.215037621051435e-06, "epoch": 2.9137059087126307, "percentage": 58.27, "elapsed_time": "1:59:32", "remaining_time": "1:25:35", "throughput": 19971.11, "total_tokens": 143250624}
|
|
{"current_steps": 45520, "total_steps": 78105, "loss": 0.2098, "lr": 2.2144826168524368e-06, "epoch": 2.9140259906536072, "percentage": 58.28, "elapsed_time": "1:59:33", "remaining_time": "1:25:35", "throughput": 19971.4, "total_tokens": 143266112}
|
|
{"current_steps": 45525, "total_steps": 78105, "loss": 0.2695, "lr": 2.213927626910689e-06, "epoch": 2.9143460725945842, "percentage": 58.29, "elapsed_time": "1:59:34", "remaining_time": "1:25:34", "throughput": 19971.8, "total_tokens": 143283328}
|
|
{"current_steps": 45530, "total_steps": 78105, "loss": 0.194, "lr": 2.213372651253906e-06, "epoch": 2.914666154535561, "percentage": 58.29, "elapsed_time": "1:59:34", "remaining_time": "1:25:33", "throughput": 19972.07, "total_tokens": 143298176}
|
|
{"current_steps": 45535, "total_steps": 78105, "loss": 0.1273, "lr": 2.2128176899097993e-06, "epoch": 2.9149862364765378, "percentage": 58.3, "elapsed_time": "1:59:35", "remaining_time": "1:25:32", "throughput": 19972.4, "total_tokens": 143314368}
|
|
{"current_steps": 45540, "total_steps": 78105, "loss": 0.1717, "lr": 2.2122627429060823e-06, "epoch": 2.9153063184175148, "percentage": 58.31, "elapsed_time": "1:59:36", "remaining_time": "1:25:31", "throughput": 19972.64, "total_tokens": 143328960}
|
|
{"current_steps": 45545, "total_steps": 78105, "loss": 0.219, "lr": 2.211707810270464e-06, "epoch": 2.9156264003584917, "percentage": 58.31, "elapsed_time": "1:59:36", "remaining_time": "1:25:30", "throughput": 19972.9, "total_tokens": 143344256}
|
|
{"current_steps": 45550, "total_steps": 78105, "loss": 0.306, "lr": 2.211152892030656e-06, "epoch": 2.9159464822994687, "percentage": 58.32, "elapsed_time": "1:59:37", "remaining_time": "1:25:29", "throughput": 19973.24, "total_tokens": 143360896}
|
|
{"current_steps": 45555, "total_steps": 78105, "loss": 0.178, "lr": 2.2105979882143683e-06, "epoch": 2.9162665642404457, "percentage": 58.33, "elapsed_time": "1:59:38", "remaining_time": "1:25:29", "throughput": 19973.55, "total_tokens": 143376704}
|
|
{"current_steps": 45560, "total_steps": 78105, "loss": 0.2196, "lr": 2.2100430988493092e-06, "epoch": 2.9165866461814227, "percentage": 58.33, "elapsed_time": "1:59:38", "remaining_time": "1:25:28", "throughput": 19973.79, "total_tokens": 143391296}
|
|
{"current_steps": 45565, "total_steps": 78105, "loss": 0.21, "lr": 2.2094882239631886e-06, "epoch": 2.9169067281223993, "percentage": 58.34, "elapsed_time": "1:59:39", "remaining_time": "1:25:27", "throughput": 19974.12, "total_tokens": 143407488}
|
|
{"current_steps": 45570, "total_steps": 78105, "loss": 0.2053, "lr": 2.2089333635837117e-06, "epoch": 2.9172268100633763, "percentage": 58.34, "elapsed_time": "1:59:40", "remaining_time": "1:25:26", "throughput": 19974.42, "total_tokens": 143423232}
|
|
{"current_steps": 45575, "total_steps": 78105, "loss": 0.3258, "lr": 2.2083785177385875e-06, "epoch": 2.9175468920043532, "percentage": 58.35, "elapsed_time": "1:59:41", "remaining_time": "1:25:25", "throughput": 19974.84, "total_tokens": 143440704}
|
|
{"current_steps": 45580, "total_steps": 78105, "loss": 0.2386, "lr": 2.20782368645552e-06, "epoch": 2.91786697394533, "percentage": 58.36, "elapsed_time": "1:59:41", "remaining_time": "1:25:24", "throughput": 19975.08, "total_tokens": 143455680}
|
|
{"current_steps": 45585, "total_steps": 78105, "loss": 0.2883, "lr": 2.207268869762217e-06, "epoch": 2.918187055886307, "percentage": 58.36, "elapsed_time": "1:59:42", "remaining_time": "1:25:23", "throughput": 19975.33, "total_tokens": 143470528}
|
|
{"current_steps": 45590, "total_steps": 78105, "loss": 0.2332, "lr": 2.2067140676863807e-06, "epoch": 2.9185071378272838, "percentage": 58.37, "elapsed_time": "1:59:43", "remaining_time": "1:25:22", "throughput": 19975.65, "total_tokens": 143486272}
|
|
{"current_steps": 45595, "total_steps": 78105, "loss": 0.2276, "lr": 2.2061592802557174e-06, "epoch": 2.9188272197682608, "percentage": 58.38, "elapsed_time": "1:59:43", "remaining_time": "1:25:22", "throughput": 19975.93, "total_tokens": 143501888}
|
|
{"current_steps": 45600, "total_steps": 78105, "loss": 0.2908, "lr": 2.2056045074979284e-06, "epoch": 2.9191473017092378, "percentage": 58.38, "elapsed_time": "1:59:44", "remaining_time": "1:25:21", "throughput": 19976.19, "total_tokens": 143517056}
|
|
{"current_steps": 45605, "total_steps": 78105, "loss": 0.2319, "lr": 2.2050497494407168e-06, "epoch": 2.9194673836502143, "percentage": 58.39, "elapsed_time": "1:59:45", "remaining_time": "1:25:20", "throughput": 19976.43, "total_tokens": 143531904}
|
|
{"current_steps": 45610, "total_steps": 78105, "loss": 0.1832, "lr": 2.2044950061117835e-06, "epoch": 2.9197874655911913, "percentage": 58.4, "elapsed_time": "1:59:45", "remaining_time": "1:25:19", "throughput": 19976.75, "total_tokens": 143547840}
|
|
{"current_steps": 45615, "total_steps": 78105, "loss": 0.1995, "lr": 2.203940277538831e-06, "epoch": 2.9201075475321683, "percentage": 58.4, "elapsed_time": "1:59:46", "remaining_time": "1:25:18", "throughput": 19977.01, "total_tokens": 143563264}
|
|
{"current_steps": 45620, "total_steps": 78105, "loss": 0.1516, "lr": 2.20338556374956e-06, "epoch": 2.9204276294731453, "percentage": 58.41, "elapsed_time": "1:59:47", "remaining_time": "1:25:17", "throughput": 19977.29, "total_tokens": 143578304}
|
|
{"current_steps": 45625, "total_steps": 78105, "loss": 0.2949, "lr": 2.202830864771668e-06, "epoch": 2.920747711414122, "percentage": 58.41, "elapsed_time": "1:59:47", "remaining_time": "1:25:16", "throughput": 19977.59, "total_tokens": 143593984}
|
|
{"current_steps": 45630, "total_steps": 78105, "loss": 0.1497, "lr": 2.2022761806328552e-06, "epoch": 2.921067793355099, "percentage": 58.42, "elapsed_time": "1:59:48", "remaining_time": "1:25:16", "throughput": 19977.92, "total_tokens": 143610112}
|
|
{"current_steps": 45635, "total_steps": 78105, "loss": 0.3329, "lr": 2.2017215113608185e-06, "epoch": 2.921387875296076, "percentage": 58.43, "elapsed_time": "1:59:49", "remaining_time": "1:25:15", "throughput": 19978.27, "total_tokens": 143627200}
|
|
{"current_steps": 45640, "total_steps": 78105, "loss": 0.2323, "lr": 2.201166856983256e-06, "epoch": 2.921707957237053, "percentage": 58.43, "elapsed_time": "1:59:49", "remaining_time": "1:25:14", "throughput": 19978.58, "total_tokens": 143642816}
|
|
{"current_steps": 45645, "total_steps": 78105, "loss": 0.1793, "lr": 2.200612217527864e-06, "epoch": 2.92202803917803, "percentage": 58.44, "elapsed_time": "1:59:50", "remaining_time": "1:25:13", "throughput": 19978.85, "total_tokens": 143658240}
|
|
{"current_steps": 45650, "total_steps": 78105, "loss": 0.1993, "lr": 2.2000575930223394e-06, "epoch": 2.9223481211190063, "percentage": 58.45, "elapsed_time": "1:59:51", "remaining_time": "1:25:12", "throughput": 19979.24, "total_tokens": 143675520}
|
|
{"current_steps": 45655, "total_steps": 78105, "loss": 0.2178, "lr": 2.199502983494375e-06, "epoch": 2.9226682030599833, "percentage": 58.45, "elapsed_time": "1:59:51", "remaining_time": "1:25:11", "throughput": 19979.5, "total_tokens": 143690112}
|
|
{"current_steps": 45660, "total_steps": 78105, "loss": 0.1915, "lr": 2.198948388971667e-06, "epoch": 2.9229882850009603, "percentage": 58.46, "elapsed_time": "1:59:52", "remaining_time": "1:25:10", "throughput": 19979.74, "total_tokens": 143704832}
|
|
{"current_steps": 45665, "total_steps": 78105, "loss": 0.2821, "lr": 2.198393809481908e-06, "epoch": 2.9233083669419373, "percentage": 58.47, "elapsed_time": "1:59:53", "remaining_time": "1:25:09", "throughput": 19980.02, "total_tokens": 143719808}
|
|
{"current_steps": 45670, "total_steps": 78105, "loss": 0.2764, "lr": 2.197839245052791e-06, "epoch": 2.923628448882914, "percentage": 58.47, "elapsed_time": "1:59:53", "remaining_time": "1:25:09", "throughput": 19980.34, "total_tokens": 143735616}
|
|
{"current_steps": 45675, "total_steps": 78105, "loss": 0.2563, "lr": 2.197284695712009e-06, "epoch": 2.923948530823891, "percentage": 58.48, "elapsed_time": "1:59:54", "remaining_time": "1:25:08", "throughput": 19980.71, "total_tokens": 143752448}
|
|
{"current_steps": 45680, "total_steps": 78105, "loss": 0.2517, "lr": 2.1967301614872516e-06, "epoch": 2.924268612764868, "percentage": 58.49, "elapsed_time": "1:59:55", "remaining_time": "1:25:07", "throughput": 19981.03, "total_tokens": 143768384}
|
|
{"current_steps": 45685, "total_steps": 78105, "loss": 0.2312, "lr": 2.1961756424062114e-06, "epoch": 2.924588694705845, "percentage": 58.49, "elapsed_time": "1:59:55", "remaining_time": "1:25:06", "throughput": 19981.3, "total_tokens": 143783296}
|
|
{"current_steps": 45690, "total_steps": 78105, "loss": 0.2144, "lr": 2.195621138496576e-06, "epoch": 2.924908776646822, "percentage": 58.5, "elapsed_time": "1:59:56", "remaining_time": "1:25:05", "throughput": 19981.61, "total_tokens": 143799360}
|
|
{"current_steps": 45695, "total_steps": 78105, "loss": 0.2718, "lr": 2.195066649786036e-06, "epoch": 2.9252288585877984, "percentage": 58.5, "elapsed_time": "1:59:57", "remaining_time": "1:25:04", "throughput": 19981.86, "total_tokens": 143814144}
|
|
{"current_steps": 45700, "total_steps": 78105, "loss": 0.285, "lr": 2.1945121763022786e-06, "epoch": 2.9255489405287753, "percentage": 58.51, "elapsed_time": "1:59:57", "remaining_time": "1:25:03", "throughput": 19982.14, "total_tokens": 143828992}
|
|
{"current_steps": 45705, "total_steps": 78105, "loss": 0.1564, "lr": 2.193957718072993e-06, "epoch": 2.9258690224697523, "percentage": 58.52, "elapsed_time": "1:59:58", "remaining_time": "1:25:03", "throughput": 19982.43, "total_tokens": 143844672}
|
|
{"current_steps": 45710, "total_steps": 78105, "loss": 0.2277, "lr": 2.1934032751258638e-06, "epoch": 2.926189104410729, "percentage": 58.52, "elapsed_time": "1:59:59", "remaining_time": "1:25:02", "throughput": 19982.73, "total_tokens": 143860416}
|
|
{"current_steps": 45715, "total_steps": 78105, "loss": 0.2406, "lr": 2.1928488474885786e-06, "epoch": 2.926509186351706, "percentage": 58.53, "elapsed_time": "1:59:59", "remaining_time": "1:25:01", "throughput": 19983.0, "total_tokens": 143875520}
|
|
{"current_steps": 45720, "total_steps": 78105, "loss": 0.2056, "lr": 2.1922944351888214e-06, "epoch": 2.926829268292683, "percentage": 58.54, "elapsed_time": "2:00:00", "remaining_time": "1:25:00", "throughput": 19983.27, "total_tokens": 143891264}
|
|
{"current_steps": 45725, "total_steps": 78105, "loss": 0.2964, "lr": 2.1917400382542783e-06, "epoch": 2.92714935023366, "percentage": 58.54, "elapsed_time": "2:00:01", "remaining_time": "1:24:59", "throughput": 19983.54, "total_tokens": 143906240}
|
|
{"current_steps": 45730, "total_steps": 78105, "loss": 0.2918, "lr": 2.191185656712631e-06, "epoch": 2.927469432174637, "percentage": 58.55, "elapsed_time": "2:00:01", "remaining_time": "1:24:58", "throughput": 19983.86, "total_tokens": 143922240}
|
|
{"current_steps": 45735, "total_steps": 78105, "loss": 0.2193, "lr": 2.190631290591564e-06, "epoch": 2.927789514115614, "percentage": 58.56, "elapsed_time": "2:00:02", "remaining_time": "1:24:57", "throughput": 19984.16, "total_tokens": 143938112}
|
|
{"current_steps": 45740, "total_steps": 78105, "loss": 0.2415, "lr": 2.190076939918759e-06, "epoch": 2.9281095960565904, "percentage": 58.56, "elapsed_time": "2:00:03", "remaining_time": "1:24:56", "throughput": 19984.48, "total_tokens": 143954176}
|
|
{"current_steps": 45745, "total_steps": 78105, "loss": 0.3458, "lr": 2.189522604721897e-06, "epoch": 2.9284296779975674, "percentage": 58.57, "elapsed_time": "2:00:03", "remaining_time": "1:24:56", "throughput": 19984.78, "total_tokens": 143969856}
|
|
{"current_steps": 45750, "total_steps": 78105, "loss": 0.2386, "lr": 2.188968285028659e-06, "epoch": 2.9287497599385444, "percentage": 58.57, "elapsed_time": "2:00:04", "remaining_time": "1:24:55", "throughput": 19985.04, "total_tokens": 143984768}
|
|
{"current_steps": 45755, "total_steps": 78105, "loss": 0.3346, "lr": 2.1884139808667247e-06, "epoch": 2.929069841879521, "percentage": 58.58, "elapsed_time": "2:00:05", "remaining_time": "1:24:54", "throughput": 19985.33, "total_tokens": 144000128}
|
|
{"current_steps": 45760, "total_steps": 78105, "loss": 0.1727, "lr": 2.187859692263774e-06, "epoch": 2.929389923820498, "percentage": 58.59, "elapsed_time": "2:00:05", "remaining_time": "1:24:53", "throughput": 19985.6, "total_tokens": 144015360}
|
|
{"current_steps": 45765, "total_steps": 78105, "loss": 0.2571, "lr": 2.187305419247483e-06, "epoch": 2.929710005761475, "percentage": 58.59, "elapsed_time": "2:00:06", "remaining_time": "1:24:52", "throughput": 19985.88, "total_tokens": 144030656}
|
|
{"current_steps": 45770, "total_steps": 78105, "loss": 0.2338, "lr": 2.186751161845532e-06, "epoch": 2.930030087702452, "percentage": 58.6, "elapsed_time": "2:00:07", "remaining_time": "1:24:51", "throughput": 19986.2, "total_tokens": 144046272}
|
|
{"current_steps": 45775, "total_steps": 78105, "loss": 0.2016, "lr": 2.1861969200855954e-06, "epoch": 2.930350169643429, "percentage": 58.61, "elapsed_time": "2:00:07", "remaining_time": "1:24:50", "throughput": 19986.48, "total_tokens": 144061632}
|
|
{"current_steps": 45780, "total_steps": 78105, "loss": 0.1527, "lr": 2.1856426939953506e-06, "epoch": 2.930670251584406, "percentage": 58.61, "elapsed_time": "2:00:08", "remaining_time": "1:24:49", "throughput": 19986.77, "total_tokens": 144077248}
|
|
{"current_steps": 45785, "total_steps": 78105, "loss": 0.2234, "lr": 2.185088483602471e-06, "epoch": 2.9309903335253824, "percentage": 58.62, "elapsed_time": "2:00:09", "remaining_time": "1:24:49", "throughput": 19987.14, "total_tokens": 144094080}
|
|
{"current_steps": 45790, "total_steps": 78105, "loss": 0.1364, "lr": 2.184534288934633e-06, "epoch": 2.9313104154663594, "percentage": 58.63, "elapsed_time": "2:00:10", "remaining_time": "1:24:48", "throughput": 19987.43, "total_tokens": 144109440}
|
|
{"current_steps": 45795, "total_steps": 78105, "loss": 0.1791, "lr": 2.18398011001951e-06, "epoch": 2.9316304974073364, "percentage": 58.63, "elapsed_time": "2:00:10", "remaining_time": "1:24:47", "throughput": 19987.69, "total_tokens": 144124416}
|
|
{"current_steps": 45800, "total_steps": 78105, "loss": 0.2824, "lr": 2.1834259468847734e-06, "epoch": 2.931950579348313, "percentage": 58.64, "elapsed_time": "2:00:11", "remaining_time": "1:24:46", "throughput": 19987.94, "total_tokens": 144139136}
|
|
{"current_steps": 45805, "total_steps": 78105, "loss": 0.2023, "lr": 2.1828717995580966e-06, "epoch": 2.93227066128929, "percentage": 58.65, "elapsed_time": "2:00:12", "remaining_time": "1:24:45", "throughput": 19988.29, "total_tokens": 144155712}
|
|
{"current_steps": 45810, "total_steps": 78105, "loss": 0.2327, "lr": 2.1823176680671498e-06, "epoch": 2.932590743230267, "percentage": 58.65, "elapsed_time": "2:00:12", "remaining_time": "1:24:44", "throughput": 19988.6, "total_tokens": 144171264}
|
|
{"current_steps": 45815, "total_steps": 78105, "loss": 0.1862, "lr": 2.1817635524396045e-06, "epoch": 2.932910825171244, "percentage": 58.66, "elapsed_time": "2:00:13", "remaining_time": "1:24:43", "throughput": 19988.98, "total_tokens": 144188160}
|
|
{"current_steps": 45820, "total_steps": 78105, "loss": 0.16, "lr": 2.181209452703129e-06, "epoch": 2.933230907112221, "percentage": 58.66, "elapsed_time": "2:00:14", "remaining_time": "1:24:43", "throughput": 19989.24, "total_tokens": 144202688}
|
|
{"current_steps": 45825, "total_steps": 78105, "loss": 0.2288, "lr": 2.1806553688853942e-06, "epoch": 2.933550989053198, "percentage": 58.67, "elapsed_time": "2:00:14", "remaining_time": "1:24:42", "throughput": 19989.52, "total_tokens": 144218048}
|
|
{"current_steps": 45830, "total_steps": 78105, "loss": 0.2404, "lr": 2.180101301014066e-06, "epoch": 2.9338710709941744, "percentage": 58.68, "elapsed_time": "2:00:15", "remaining_time": "1:24:41", "throughput": 19989.83, "total_tokens": 144234304}
|
|
{"current_steps": 45835, "total_steps": 78105, "loss": 0.2155, "lr": 2.1795472491168124e-06, "epoch": 2.9341911529351514, "percentage": 58.68, "elapsed_time": "2:00:16", "remaining_time": "1:24:40", "throughput": 19990.14, "total_tokens": 144249856}
|
|
{"current_steps": 45840, "total_steps": 78105, "loss": 0.265, "lr": 2.1789932132212997e-06, "epoch": 2.9345112348761284, "percentage": 58.69, "elapsed_time": "2:00:16", "remaining_time": "1:24:39", "throughput": 19990.46, "total_tokens": 144265920}
|
|
{"current_steps": 45845, "total_steps": 78105, "loss": 0.1973, "lr": 2.178439193355194e-06, "epoch": 2.934831316817105, "percentage": 58.7, "elapsed_time": "2:00:17", "remaining_time": "1:24:38", "throughput": 19990.77, "total_tokens": 144282048}
|
|
{"current_steps": 45850, "total_steps": 78105, "loss": 0.2021, "lr": 2.1778851895461612e-06, "epoch": 2.935151398758082, "percentage": 58.7, "elapsed_time": "2:00:18", "remaining_time": "1:24:37", "throughput": 19991.03, "total_tokens": 144296960}
|
|
{"current_steps": 45855, "total_steps": 78105, "loss": 0.1903, "lr": 2.177331201821863e-06, "epoch": 2.935471480699059, "percentage": 58.71, "elapsed_time": "2:00:18", "remaining_time": "1:24:36", "throughput": 19991.34, "total_tokens": 144312896}
|
|
{"current_steps": 45860, "total_steps": 78105, "loss": 0.1487, "lr": 2.1767772302099645e-06, "epoch": 2.935791562640036, "percentage": 58.72, "elapsed_time": "2:00:19", "remaining_time": "1:24:36", "throughput": 19991.63, "total_tokens": 144328448}
|
|
{"current_steps": 45865, "total_steps": 78105, "loss": 0.1638, "lr": 2.176223274738127e-06, "epoch": 2.936111644581013, "percentage": 58.72, "elapsed_time": "2:00:20", "remaining_time": "1:24:35", "throughput": 19992.01, "total_tokens": 144345344}
|
|
{"current_steps": 45870, "total_steps": 78105, "loss": 0.2904, "lr": 2.175669335434013e-06, "epoch": 2.9364317265219895, "percentage": 58.73, "elapsed_time": "2:00:20", "remaining_time": "1:24:34", "throughput": 19992.29, "total_tokens": 144360320}
|
|
{"current_steps": 45875, "total_steps": 78105, "loss": 0.1791, "lr": 2.175115412325283e-06, "epoch": 2.9367518084629665, "percentage": 58.74, "elapsed_time": "2:00:21", "remaining_time": "1:24:33", "throughput": 19992.65, "total_tokens": 144376640}
|
|
{"current_steps": 45880, "total_steps": 78105, "loss": 0.2334, "lr": 2.1745615054395973e-06, "epoch": 2.9370718904039435, "percentage": 58.74, "elapsed_time": "2:00:22", "remaining_time": "1:24:32", "throughput": 19992.93, "total_tokens": 144392064}
|
|
{"current_steps": 45885, "total_steps": 78105, "loss": 0.1733, "lr": 2.174007614804614e-06, "epoch": 2.9373919723449204, "percentage": 58.75, "elapsed_time": "2:00:22", "remaining_time": "1:24:31", "throughput": 19993.31, "total_tokens": 144409216}
|
|
{"current_steps": 45890, "total_steps": 78105, "loss": 0.2628, "lr": 2.173453740447993e-06, "epoch": 2.937712054285897, "percentage": 58.75, "elapsed_time": "2:00:23", "remaining_time": "1:24:30", "throughput": 19993.6, "total_tokens": 144424832}
|
|
{"current_steps": 45895, "total_steps": 78105, "loss": 0.1869, "lr": 2.1728998823973908e-06, "epoch": 2.938032136226874, "percentage": 58.76, "elapsed_time": "2:00:24", "remaining_time": "1:24:30", "throughput": 19993.94, "total_tokens": 144441408}
|
|
{"current_steps": 45900, "total_steps": 78105, "loss": 0.2113, "lr": 2.1723460406804657e-06, "epoch": 2.938352218167851, "percentage": 58.77, "elapsed_time": "2:00:24", "remaining_time": "1:24:29", "throughput": 19994.19, "total_tokens": 144456000}
|
|
{"current_steps": 45905, "total_steps": 78105, "loss": 0.2059, "lr": 2.171792215324872e-06, "epoch": 2.938672300108828, "percentage": 58.77, "elapsed_time": "2:00:25", "remaining_time": "1:24:28", "throughput": 19994.54, "total_tokens": 144472576}
|
|
{"current_steps": 45910, "total_steps": 78105, "loss": 0.1148, "lr": 2.1712384063582647e-06, "epoch": 2.938992382049805, "percentage": 58.78, "elapsed_time": "2:00:26", "remaining_time": "1:24:27", "throughput": 19994.85, "total_tokens": 144488704}
|
|
{"current_steps": 45915, "total_steps": 78105, "loss": 0.2741, "lr": 2.170684613808301e-06, "epoch": 2.9393124639907815, "percentage": 58.79, "elapsed_time": "2:00:26", "remaining_time": "1:24:26", "throughput": 19995.12, "total_tokens": 144504000}
|
|
{"current_steps": 45920, "total_steps": 78105, "loss": 0.1434, "lr": 2.1701308377026305e-06, "epoch": 2.9396325459317585, "percentage": 58.79, "elapsed_time": "2:00:27", "remaining_time": "1:24:25", "throughput": 19995.38, "total_tokens": 144518848}
|
|
{"current_steps": 45925, "total_steps": 78105, "loss": 0.3417, "lr": 2.1695770780689083e-06, "epoch": 2.9399526278727355, "percentage": 58.8, "elapsed_time": "2:00:28", "remaining_time": "1:24:24", "throughput": 19995.71, "total_tokens": 144535424}
|
|
{"current_steps": 45930, "total_steps": 78105, "loss": 0.1506, "lr": 2.169023334934785e-06, "epoch": 2.9402727098137125, "percentage": 58.81, "elapsed_time": "2:00:28", "remaining_time": "1:24:24", "throughput": 19995.99, "total_tokens": 144550656}
|
|
{"current_steps": 45935, "total_steps": 78105, "loss": 0.1679, "lr": 2.1684696083279143e-06, "epoch": 2.940592791754689, "percentage": 58.81, "elapsed_time": "2:00:29", "remaining_time": "1:24:23", "throughput": 19996.22, "total_tokens": 144565312}
|
|
{"current_steps": 45940, "total_steps": 78105, "loss": 0.2042, "lr": 2.1679158982759425e-06, "epoch": 2.940912873695666, "percentage": 58.82, "elapsed_time": "2:00:30", "remaining_time": "1:24:22", "throughput": 19996.55, "total_tokens": 144581504}
|
|
{"current_steps": 45945, "total_steps": 78105, "loss": 0.2542, "lr": 2.1673622048065222e-06, "epoch": 2.941232955636643, "percentage": 58.82, "elapsed_time": "2:00:31", "remaining_time": "1:24:21", "throughput": 19996.87, "total_tokens": 144597568}
|
|
{"current_steps": 45950, "total_steps": 78105, "loss": 0.2439, "lr": 2.1668085279473e-06, "epoch": 2.94155303757762, "percentage": 58.83, "elapsed_time": "2:00:31", "remaining_time": "1:24:20", "throughput": 19997.13, "total_tokens": 144612544}
|
|
{"current_steps": 45955, "total_steps": 78105, "loss": 0.2301, "lr": 2.1662548677259252e-06, "epoch": 2.941873119518597, "percentage": 58.84, "elapsed_time": "2:00:32", "remaining_time": "1:24:19", "throughput": 19997.38, "total_tokens": 144627776}
|
|
{"current_steps": 45960, "total_steps": 78105, "loss": 0.1665, "lr": 2.1657012241700428e-06, "epoch": 2.9421932014595735, "percentage": 58.84, "elapsed_time": "2:00:33", "remaining_time": "1:24:18", "throughput": 19997.7, "total_tokens": 144643712}
|
|
{"current_steps": 45965, "total_steps": 78105, "loss": 0.3135, "lr": 2.1651475973073e-06, "epoch": 2.9425132834005505, "percentage": 58.85, "elapsed_time": "2:00:33", "remaining_time": "1:24:17", "throughput": 19997.95, "total_tokens": 144658688}
|
|
{"current_steps": 45970, "total_steps": 78105, "loss": 0.2947, "lr": 2.1645939871653433e-06, "epoch": 2.9428333653415275, "percentage": 58.86, "elapsed_time": "2:00:34", "remaining_time": "1:24:17", "throughput": 19998.27, "total_tokens": 144674752}
|
|
{"current_steps": 45975, "total_steps": 78105, "loss": 0.1797, "lr": 2.1640403937718146e-06, "epoch": 2.943153447282504, "percentage": 58.86, "elapsed_time": "2:00:35", "remaining_time": "1:24:16", "throughput": 19998.61, "total_tokens": 144690688}
|
|
{"current_steps": 45980, "total_steps": 78105, "loss": 0.3034, "lr": 2.1634868171543595e-06, "epoch": 2.943473529223481, "percentage": 58.87, "elapsed_time": "2:00:35", "remaining_time": "1:24:15", "throughput": 19998.92, "total_tokens": 144706816}
|
|
{"current_steps": 45985, "total_steps": 78105, "loss": 0.1714, "lr": 2.1629332573406194e-06, "epoch": 2.943793611164458, "percentage": 58.88, "elapsed_time": "2:00:36", "remaining_time": "1:24:14", "throughput": 19999.23, "total_tokens": 144722752}
|
|
{"current_steps": 45990, "total_steps": 78105, "loss": 0.2672, "lr": 2.1623797143582375e-06, "epoch": 2.944113693105435, "percentage": 58.88, "elapsed_time": "2:00:37", "remaining_time": "1:24:13", "throughput": 19999.57, "total_tokens": 144739200}
|
|
{"current_steps": 45995, "total_steps": 78105, "loss": 0.2714, "lr": 2.161826188234853e-06, "epoch": 2.944433775046412, "percentage": 58.89, "elapsed_time": "2:00:37", "remaining_time": "1:24:12", "throughput": 19999.84, "total_tokens": 144754432}
|
|
{"current_steps": 46000, "total_steps": 78105, "loss": 0.2298, "lr": 2.161272678998108e-06, "epoch": 2.944753856987389, "percentage": 58.9, "elapsed_time": "2:00:38", "remaining_time": "1:24:11", "throughput": 20000.12, "total_tokens": 144769600}
|
|
{"current_steps": 46005, "total_steps": 78105, "loss": 0.2261, "lr": 2.1607191866756403e-06, "epoch": 2.9450739389283656, "percentage": 58.9, "elapsed_time": "2:00:39", "remaining_time": "1:24:11", "throughput": 20000.42, "total_tokens": 144785152}
|
|
{"current_steps": 46010, "total_steps": 78105, "loss": 0.2334, "lr": 2.1601657112950897e-06, "epoch": 2.9453940208693425, "percentage": 58.91, "elapsed_time": "2:00:39", "remaining_time": "1:24:10", "throughput": 20000.74, "total_tokens": 144801088}
|
|
{"current_steps": 46015, "total_steps": 78105, "loss": 0.2539, "lr": 2.1596122528840928e-06, "epoch": 2.9457141028103195, "percentage": 58.91, "elapsed_time": "2:00:40", "remaining_time": "1:24:09", "throughput": 20001.13, "total_tokens": 144818560}
|
|
{"current_steps": 46020, "total_steps": 78105, "loss": 0.278, "lr": 2.159058811470287e-06, "epoch": 2.946034184751296, "percentage": 58.92, "elapsed_time": "2:00:41", "remaining_time": "1:24:08", "throughput": 20001.36, "total_tokens": 144832896}
|
|
{"current_steps": 46025, "total_steps": 78105, "loss": 0.1882, "lr": 2.158505387081309e-06, "epoch": 2.946354266692273, "percentage": 58.93, "elapsed_time": "2:00:41", "remaining_time": "1:24:07", "throughput": 20001.69, "total_tokens": 144848576}
|
|
{"current_steps": 46030, "total_steps": 78105, "loss": 0.3166, "lr": 2.1579519797447926e-06, "epoch": 2.94667434863325, "percentage": 58.93, "elapsed_time": "2:00:42", "remaining_time": "1:24:06", "throughput": 20001.98, "total_tokens": 144864000}
|
|
{"current_steps": 46035, "total_steps": 78105, "loss": 0.1769, "lr": 2.157398589488373e-06, "epoch": 2.946994430574227, "percentage": 58.94, "elapsed_time": "2:00:43", "remaining_time": "1:24:05", "throughput": 20002.24, "total_tokens": 144879232}
|
|
{"current_steps": 46040, "total_steps": 78105, "loss": 0.2262, "lr": 2.1568452163396826e-06, "epoch": 2.947314512515204, "percentage": 58.95, "elapsed_time": "2:00:43", "remaining_time": "1:24:05", "throughput": 20002.55, "total_tokens": 144895104}
|
|
{"current_steps": 46045, "total_steps": 78105, "loss": 0.2183, "lr": 2.156291860326356e-06, "epoch": 2.947634594456181, "percentage": 58.95, "elapsed_time": "2:00:44", "remaining_time": "1:24:04", "throughput": 20002.81, "total_tokens": 144909632}
|
|
{"current_steps": 46050, "total_steps": 78105, "loss": 0.2074, "lr": 2.155738521476022e-06, "epoch": 2.9479546763971576, "percentage": 58.96, "elapsed_time": "2:00:45", "remaining_time": "1:24:03", "throughput": 20003.1, "total_tokens": 144924992}
|
|
{"current_steps": 46055, "total_steps": 78105, "loss": 0.1439, "lr": 2.155185199816315e-06, "epoch": 2.9482747583381346, "percentage": 58.97, "elapsed_time": "2:00:45", "remaining_time": "1:24:02", "throughput": 20003.37, "total_tokens": 144940032}
|
|
{"current_steps": 46060, "total_steps": 78105, "loss": 0.2487, "lr": 2.1546318953748615e-06, "epoch": 2.9485948402791116, "percentage": 58.97, "elapsed_time": "2:00:46", "remaining_time": "1:24:01", "throughput": 20003.67, "total_tokens": 144955392}
|
|
{"current_steps": 46065, "total_steps": 78105, "loss": 0.1977, "lr": 2.154078608179293e-06, "epoch": 2.948914922220088, "percentage": 58.98, "elapsed_time": "2:00:47", "remaining_time": "1:24:00", "throughput": 20003.96, "total_tokens": 144970880}
|
|
{"current_steps": 46070, "total_steps": 78105, "loss": 0.2199, "lr": 2.153525338257236e-06, "epoch": 2.949235004161065, "percentage": 58.98, "elapsed_time": "2:00:47", "remaining_time": "1:23:59", "throughput": 20004.36, "total_tokens": 144988224}
|
|
{"current_steps": 46075, "total_steps": 78105, "loss": 0.2367, "lr": 2.15297208563632e-06, "epoch": 2.949555086102042, "percentage": 58.99, "elapsed_time": "2:00:48", "remaining_time": "1:23:58", "throughput": 20004.63, "total_tokens": 145003648}
|
|
{"current_steps": 46080, "total_steps": 78105, "loss": 0.2339, "lr": 2.1524188503441697e-06, "epoch": 2.949875168043019, "percentage": 59.0, "elapsed_time": "2:00:49", "remaining_time": "1:23:58", "throughput": 20005.07, "total_tokens": 145021952}
|
|
{"current_steps": 46085, "total_steps": 78105, "loss": 0.2185, "lr": 2.1518656324084113e-06, "epoch": 2.950195249983996, "percentage": 59.0, "elapsed_time": "2:00:49", "remaining_time": "1:23:57", "throughput": 20005.34, "total_tokens": 145036992}
|
|
{"current_steps": 46090, "total_steps": 78105, "loss": 0.2829, "lr": 2.1513124318566704e-06, "epoch": 2.950515331924973, "percentage": 59.01, "elapsed_time": "2:00:50", "remaining_time": "1:23:56", "throughput": 20005.6, "total_tokens": 145052224}
|
|
{"current_steps": 46095, "total_steps": 78105, "loss": 0.2023, "lr": 2.15075924871657e-06, "epoch": 2.9508354138659496, "percentage": 59.02, "elapsed_time": "2:00:51", "remaining_time": "1:23:55", "throughput": 20005.87, "total_tokens": 145067072}
|
|
{"current_steps": 46100, "total_steps": 78105, "loss": 0.2653, "lr": 2.1502060830157344e-06, "epoch": 2.9511554958069266, "percentage": 59.02, "elapsed_time": "2:00:51", "remaining_time": "1:23:54", "throughput": 20006.14, "total_tokens": 145082880}
|
|
{"current_steps": 46105, "total_steps": 78105, "loss": 0.279, "lr": 2.149652934781784e-06, "epoch": 2.9514755777479036, "percentage": 59.03, "elapsed_time": "2:00:52", "remaining_time": "1:23:53", "throughput": 20006.43, "total_tokens": 145098368}
|
|
{"current_steps": 46110, "total_steps": 78105, "loss": 0.1865, "lr": 2.1490998040423424e-06, "epoch": 2.95179565968888, "percentage": 59.04, "elapsed_time": "2:00:53", "remaining_time": "1:23:52", "throughput": 20006.76, "total_tokens": 145114368}
|
|
{"current_steps": 46115, "total_steps": 78105, "loss": 0.246, "lr": 2.1485466908250275e-06, "epoch": 2.952115741629857, "percentage": 59.04, "elapsed_time": "2:00:53", "remaining_time": "1:23:52", "throughput": 20007.03, "total_tokens": 145129664}
|
|
{"current_steps": 46120, "total_steps": 78105, "loss": 0.1757, "lr": 2.1479935951574606e-06, "epoch": 2.952435823570834, "percentage": 59.05, "elapsed_time": "2:00:54", "remaining_time": "1:23:51", "throughput": 20007.34, "total_tokens": 145145536}
|
|
{"current_steps": 46125, "total_steps": 78105, "loss": 0.2171, "lr": 2.14744051706726e-06, "epoch": 2.952755905511811, "percentage": 59.06, "elapsed_time": "2:00:55", "remaining_time": "1:23:50", "throughput": 20007.65, "total_tokens": 145161664}
|
|
{"current_steps": 46130, "total_steps": 78105, "loss": 0.2985, "lr": 2.1468874565820446e-06, "epoch": 2.953075987452788, "percentage": 59.06, "elapsed_time": "2:00:55", "remaining_time": "1:23:49", "throughput": 20007.94, "total_tokens": 145177024}
|
|
{"current_steps": 46135, "total_steps": 78105, "loss": 0.3048, "lr": 2.1463344137294287e-06, "epoch": 2.9533960693937646, "percentage": 59.07, "elapsed_time": "2:00:56", "remaining_time": "1:23:48", "throughput": 20008.22, "total_tokens": 145192384}
|
|
{"current_steps": 46140, "total_steps": 78105, "loss": 0.2254, "lr": 2.1457813885370304e-06, "epoch": 2.9537161513347416, "percentage": 59.07, "elapsed_time": "2:00:57", "remaining_time": "1:23:47", "throughput": 20008.54, "total_tokens": 145208320}
|
|
{"current_steps": 46145, "total_steps": 78105, "loss": 0.2513, "lr": 2.1452283810324657e-06, "epoch": 2.9540362332757186, "percentage": 59.08, "elapsed_time": "2:00:57", "remaining_time": "1:23:46", "throughput": 20008.84, "total_tokens": 145223808}
|
|
{"current_steps": 46150, "total_steps": 78105, "loss": 0.2454, "lr": 2.1446753912433464e-06, "epoch": 2.9543563152166956, "percentage": 59.09, "elapsed_time": "2:00:58", "remaining_time": "1:23:46", "throughput": 20009.12, "total_tokens": 145239104}
|
|
{"current_steps": 46155, "total_steps": 78105, "loss": 0.3205, "lr": 2.1441224191972876e-06, "epoch": 2.954676397157672, "percentage": 59.09, "elapsed_time": "2:00:59", "remaining_time": "1:23:45", "throughput": 20009.37, "total_tokens": 145254080}
|
|
{"current_steps": 46160, "total_steps": 78105, "loss": 0.2263, "lr": 2.1435694649219012e-06, "epoch": 2.954996479098649, "percentage": 59.1, "elapsed_time": "2:00:59", "remaining_time": "1:23:44", "throughput": 20009.72, "total_tokens": 145270464}
|
|
{"current_steps": 46165, "total_steps": 78105, "loss": 0.2373, "lr": 2.1430165284448006e-06, "epoch": 2.955316561039626, "percentage": 59.11, "elapsed_time": "2:01:00", "remaining_time": "1:23:43", "throughput": 20010.02, "total_tokens": 145285888}
|
|
{"current_steps": 46170, "total_steps": 78105, "loss": 0.2568, "lr": 2.1424636097935935e-06, "epoch": 2.955636642980603, "percentage": 59.11, "elapsed_time": "2:01:01", "remaining_time": "1:23:42", "throughput": 20010.3, "total_tokens": 145301056}
|
|
{"current_steps": 46175, "total_steps": 78105, "loss": 0.1594, "lr": 2.141910708995892e-06, "epoch": 2.95595672492158, "percentage": 59.12, "elapsed_time": "2:01:02", "remaining_time": "1:23:41", "throughput": 20010.65, "total_tokens": 145317504}
|
|
{"current_steps": 46180, "total_steps": 78105, "loss": 0.2354, "lr": 2.141357826079304e-06, "epoch": 2.9562768068625567, "percentage": 59.13, "elapsed_time": "2:01:02", "remaining_time": "1:23:40", "throughput": 20010.98, "total_tokens": 145333696}
|
|
{"current_steps": 46185, "total_steps": 78105, "loss": 0.1758, "lr": 2.140804961071439e-06, "epoch": 2.9565968888035337, "percentage": 59.13, "elapsed_time": "2:01:03", "remaining_time": "1:23:39", "throughput": 20011.27, "total_tokens": 145349312}
|
|
{"current_steps": 46190, "total_steps": 78105, "loss": 0.158, "lr": 2.1402521139999024e-06, "epoch": 2.9569169707445107, "percentage": 59.14, "elapsed_time": "2:01:04", "remaining_time": "1:23:39", "throughput": 20011.53, "total_tokens": 145364416}
|
|
{"current_steps": 46195, "total_steps": 78105, "loss": 0.2748, "lr": 2.1396992848923013e-06, "epoch": 2.9572370526854876, "percentage": 59.14, "elapsed_time": "2:01:04", "remaining_time": "1:23:38", "throughput": 20011.89, "total_tokens": 145381376}
|
|
{"current_steps": 46200, "total_steps": 78105, "loss": 0.2563, "lr": 2.1391464737762425e-06, "epoch": 2.957557134626464, "percentage": 59.15, "elapsed_time": "2:01:05", "remaining_time": "1:23:37", "throughput": 20012.21, "total_tokens": 145397568}
|
|
{"current_steps": 46205, "total_steps": 78105, "loss": 0.3571, "lr": 2.1385936806793277e-06, "epoch": 2.957877216567441, "percentage": 59.16, "elapsed_time": "2:01:06", "remaining_time": "1:23:36", "throughput": 20012.5, "total_tokens": 145412800}
|
|
{"current_steps": 46210, "total_steps": 78105, "loss": 0.169, "lr": 2.1380409056291627e-06, "epoch": 2.958197298508418, "percentage": 59.16, "elapsed_time": "2:01:06", "remaining_time": "1:23:35", "throughput": 20012.89, "total_tokens": 145429952}
|
|
{"current_steps": 46215, "total_steps": 78105, "loss": 0.2084, "lr": 2.1374881486533488e-06, "epoch": 2.958517380449395, "percentage": 59.17, "elapsed_time": "2:01:07", "remaining_time": "1:23:34", "throughput": 20013.3, "total_tokens": 145447296}
|
|
{"current_steps": 46220, "total_steps": 78105, "loss": 0.2288, "lr": 2.13693540977949e-06, "epoch": 2.958837462390372, "percentage": 59.18, "elapsed_time": "2:01:08", "remaining_time": "1:23:34", "throughput": 20013.73, "total_tokens": 145465216}
|
|
{"current_steps": 46225, "total_steps": 78105, "loss": 0.2945, "lr": 2.1363826890351842e-06, "epoch": 2.9591575443313487, "percentage": 59.18, "elapsed_time": "2:01:08", "remaining_time": "1:23:33", "throughput": 20014.01, "total_tokens": 145480640}
|
|
{"current_steps": 46230, "total_steps": 78105, "loss": 0.3031, "lr": 2.1358299864480344e-06, "epoch": 2.9594776262723257, "percentage": 59.19, "elapsed_time": "2:01:09", "remaining_time": "1:23:32", "throughput": 20014.3, "total_tokens": 145496000}
|
|
{"current_steps": 46235, "total_steps": 78105, "loss": 0.2104, "lr": 2.135277302045636e-06, "epoch": 2.9597977082133027, "percentage": 59.2, "elapsed_time": "2:01:10", "remaining_time": "1:23:31", "throughput": 20014.65, "total_tokens": 145512512}
|
|
{"current_steps": 46240, "total_steps": 78105, "loss": 0.168, "lr": 2.134724635855591e-06, "epoch": 2.9601177901542792, "percentage": 59.2, "elapsed_time": "2:01:10", "remaining_time": "1:23:30", "throughput": 20014.94, "total_tokens": 145527872}
|
|
{"current_steps": 46245, "total_steps": 78105, "loss": 0.2986, "lr": 2.1341719879054947e-06, "epoch": 2.960437872095256, "percentage": 59.21, "elapsed_time": "2:01:11", "remaining_time": "1:23:29", "throughput": 20015.23, "total_tokens": 145543808}
|
|
{"current_steps": 46250, "total_steps": 78105, "loss": 0.187, "lr": 2.133619358222944e-06, "epoch": 2.960757954036233, "percentage": 59.22, "elapsed_time": "2:01:12", "remaining_time": "1:23:28", "throughput": 20015.51, "total_tokens": 145559168}
|
|
{"current_steps": 46255, "total_steps": 78105, "loss": 0.2369, "lr": 2.133066746835534e-06, "epoch": 2.96107803597721, "percentage": 59.22, "elapsed_time": "2:01:12", "remaining_time": "1:23:27", "throughput": 20015.73, "total_tokens": 145573376}
|
|
{"current_steps": 46260, "total_steps": 78105, "loss": 0.2194, "lr": 2.1325141537708583e-06, "epoch": 2.961398117918187, "percentage": 59.23, "elapsed_time": "2:01:13", "remaining_time": "1:23:27", "throughput": 20016.06, "total_tokens": 145590016}
|
|
{"current_steps": 46265, "total_steps": 78105, "loss": 0.1865, "lr": 2.131961579056513e-06, "epoch": 2.961718199859164, "percentage": 59.23, "elapsed_time": "2:01:14", "remaining_time": "1:23:26", "throughput": 20016.53, "total_tokens": 145608832}
|
|
{"current_steps": 46270, "total_steps": 78105, "loss": 0.2651, "lr": 2.1314090227200885e-06, "epoch": 2.9620382818001407, "percentage": 59.24, "elapsed_time": "2:01:15", "remaining_time": "1:23:25", "throughput": 20016.82, "total_tokens": 145624640}
|
|
{"current_steps": 46275, "total_steps": 78105, "loss": 0.2519, "lr": 2.1308564847891784e-06, "epoch": 2.9623583637411177, "percentage": 59.25, "elapsed_time": "2:01:15", "remaining_time": "1:23:24", "throughput": 20017.13, "total_tokens": 145640320}
|
|
{"current_steps": 46280, "total_steps": 78105, "loss": 0.2431, "lr": 2.1303039652913718e-06, "epoch": 2.9626784456820947, "percentage": 59.25, "elapsed_time": "2:01:16", "remaining_time": "1:23:23", "throughput": 20017.39, "total_tokens": 145655296}
|
|
{"current_steps": 46285, "total_steps": 78105, "loss": 0.1792, "lr": 2.1297514642542607e-06, "epoch": 2.9629985276230713, "percentage": 59.26, "elapsed_time": "2:01:17", "remaining_time": "1:23:22", "throughput": 20017.68, "total_tokens": 145670528}
|
|
{"current_steps": 46290, "total_steps": 78105, "loss": 0.1373, "lr": 2.1291989817054314e-06, "epoch": 2.9633186095640482, "percentage": 59.27, "elapsed_time": "2:01:17", "remaining_time": "1:23:21", "throughput": 20018.02, "total_tokens": 145686720}
|
|
{"current_steps": 46295, "total_steps": 78105, "loss": 0.1459, "lr": 2.1286465176724746e-06, "epoch": 2.9636386915050252, "percentage": 59.27, "elapsed_time": "2:01:18", "remaining_time": "1:23:21", "throughput": 20018.31, "total_tokens": 145702144}
|
|
{"current_steps": 46300, "total_steps": 78105, "loss": 0.2048, "lr": 2.1280940721829757e-06, "epoch": 2.9639587734460022, "percentage": 59.28, "elapsed_time": "2:01:19", "remaining_time": "1:23:20", "throughput": 20018.55, "total_tokens": 145716992}
|
|
{"current_steps": 46305, "total_steps": 78105, "loss": 0.2093, "lr": 2.1275416452645226e-06, "epoch": 2.964278855386979, "percentage": 59.29, "elapsed_time": "2:01:19", "remaining_time": "1:23:19", "throughput": 20018.81, "total_tokens": 145731968}
|
|
{"current_steps": 46310, "total_steps": 78105, "loss": 0.3044, "lr": 2.1269892369446984e-06, "epoch": 2.964598937327956, "percentage": 59.29, "elapsed_time": "2:01:20", "remaining_time": "1:23:18", "throughput": 20019.1, "total_tokens": 145747392}
|
|
{"current_steps": 46315, "total_steps": 78105, "loss": 0.2091, "lr": 2.126436847251089e-06, "epoch": 2.9649190192689328, "percentage": 59.3, "elapsed_time": "2:01:21", "remaining_time": "1:23:17", "throughput": 20019.4, "total_tokens": 145763136}
|
|
{"current_steps": 46320, "total_steps": 78105, "loss": 0.2518, "lr": 2.1258844762112787e-06, "epoch": 2.9652391012099097, "percentage": 59.3, "elapsed_time": "2:01:21", "remaining_time": "1:23:16", "throughput": 20019.76, "total_tokens": 145779840}
|
|
{"current_steps": 46325, "total_steps": 78105, "loss": 0.3357, "lr": 2.1253321238528474e-06, "epoch": 2.9655591831508867, "percentage": 59.31, "elapsed_time": "2:01:22", "remaining_time": "1:23:15", "throughput": 20020.07, "total_tokens": 145795584}
|
|
{"current_steps": 46330, "total_steps": 78105, "loss": 0.2661, "lr": 2.12477979020338e-06, "epoch": 2.9658792650918633, "percentage": 59.32, "elapsed_time": "2:01:23", "remaining_time": "1:23:15", "throughput": 20020.34, "total_tokens": 145810688}
|
|
{"current_steps": 46335, "total_steps": 78105, "loss": 0.1763, "lr": 2.124227475290454e-06, "epoch": 2.9661993470328403, "percentage": 59.32, "elapsed_time": "2:01:23", "remaining_time": "1:23:14", "throughput": 20020.63, "total_tokens": 145826048}
|
|
{"current_steps": 46340, "total_steps": 78105, "loss": 0.3211, "lr": 2.123675179141652e-06, "epoch": 2.9665194289738173, "percentage": 59.33, "elapsed_time": "2:01:24", "remaining_time": "1:23:13", "throughput": 20020.9, "total_tokens": 145841408}
|
|
{"current_steps": 46345, "total_steps": 78105, "loss": 0.2432, "lr": 2.12312290178455e-06, "epoch": 2.9668395109147943, "percentage": 59.34, "elapsed_time": "2:01:25", "remaining_time": "1:23:12", "throughput": 20021.28, "total_tokens": 145858432}
|
|
{"current_steps": 46350, "total_steps": 78105, "loss": 0.2069, "lr": 2.1225706432467277e-06, "epoch": 2.9671595928557712, "percentage": 59.34, "elapsed_time": "2:01:25", "remaining_time": "1:23:11", "throughput": 20021.52, "total_tokens": 145873216}
|
|
{"current_steps": 46355, "total_steps": 78105, "loss": 0.1425, "lr": 2.1220184035557613e-06, "epoch": 2.9674796747967482, "percentage": 59.35, "elapsed_time": "2:01:26", "remaining_time": "1:23:10", "throughput": 20021.79, "total_tokens": 145888320}
|
|
{"current_steps": 46360, "total_steps": 78105, "loss": 0.2318, "lr": 2.1214661827392275e-06, "epoch": 2.967799756737725, "percentage": 59.36, "elapsed_time": "2:01:27", "remaining_time": "1:23:09", "throughput": 20022.06, "total_tokens": 145903424}
|
|
{"current_steps": 46365, "total_steps": 78105, "loss": 0.4214, "lr": 2.1209139808247003e-06, "epoch": 2.9681198386787018, "percentage": 59.36, "elapsed_time": "2:01:28", "remaining_time": "1:23:09", "throughput": 20022.83, "total_tokens": 145932608}
|
|
{"current_steps": 46370, "total_steps": 78105, "loss": 0.2261, "lr": 2.1203617978397543e-06, "epoch": 2.9684399206196788, "percentage": 59.37, "elapsed_time": "2:01:28", "remaining_time": "1:23:08", "throughput": 20023.08, "total_tokens": 145947456}
|
|
{"current_steps": 46375, "total_steps": 78105, "loss": 0.2228, "lr": 2.1198096338119634e-06, "epoch": 2.9687600025606553, "percentage": 59.38, "elapsed_time": "2:01:29", "remaining_time": "1:23:07", "throughput": 20023.34, "total_tokens": 145962560}
|
|
{"current_steps": 46380, "total_steps": 78105, "loss": 0.2131, "lr": 2.119257488768898e-06, "epoch": 2.9690800845016323, "percentage": 59.38, "elapsed_time": "2:01:30", "remaining_time": "1:23:06", "throughput": 20023.69, "total_tokens": 145978944}
|
|
{"current_steps": 46385, "total_steps": 78105, "loss": 0.1967, "lr": 2.118705362738131e-06, "epoch": 2.9694001664426093, "percentage": 59.39, "elapsed_time": "2:01:30", "remaining_time": "1:23:05", "throughput": 20023.96, "total_tokens": 145994304}
|
|
{"current_steps": 46390, "total_steps": 78105, "loss": 0.2386, "lr": 2.1181532557472316e-06, "epoch": 2.9697202483835863, "percentage": 59.39, "elapsed_time": "2:01:31", "remaining_time": "1:23:05", "throughput": 20024.27, "total_tokens": 146009856}
|
|
{"current_steps": 46395, "total_steps": 78105, "loss": 0.2441, "lr": 2.117601167823771e-06, "epoch": 2.9700403303245633, "percentage": 59.4, "elapsed_time": "2:01:32", "remaining_time": "1:23:04", "throughput": 20024.56, "total_tokens": 146025792}
|
|
{"current_steps": 46400, "total_steps": 78105, "loss": 0.2369, "lr": 2.1170490989953148e-06, "epoch": 2.97036041226554, "percentage": 59.41, "elapsed_time": "2:01:32", "remaining_time": "1:23:03", "throughput": 20024.81, "total_tokens": 146040832}
|
|
{"current_steps": 46405, "total_steps": 78105, "loss": 0.2341, "lr": 2.1164970492894326e-06, "epoch": 2.970680494206517, "percentage": 59.41, "elapsed_time": "2:01:33", "remaining_time": "1:23:02", "throughput": 20025.05, "total_tokens": 146055616}
|
|
{"current_steps": 46410, "total_steps": 78105, "loss": 0.2157, "lr": 2.1159450187336893e-06, "epoch": 2.971000576147494, "percentage": 59.42, "elapsed_time": "2:01:34", "remaining_time": "1:23:01", "throughput": 20025.33, "total_tokens": 146071168}
|
|
{"current_steps": 46415, "total_steps": 78105, "loss": 0.2758, "lr": 2.115393007355653e-06, "epoch": 2.971320658088471, "percentage": 59.43, "elapsed_time": "2:01:35", "remaining_time": "1:23:00", "throughput": 20025.63, "total_tokens": 146086976}
|
|
{"current_steps": 46420, "total_steps": 78105, "loss": 0.2013, "lr": 2.114841015182885e-06, "epoch": 2.9716407400294473, "percentage": 59.43, "elapsed_time": "2:01:35", "remaining_time": "1:22:59", "throughput": 20025.86, "total_tokens": 146101824}
|
|
{"current_steps": 46425, "total_steps": 78105, "loss": 0.2896, "lr": 2.1142890422429516e-06, "epoch": 2.9719608219704243, "percentage": 59.44, "elapsed_time": "2:01:36", "remaining_time": "1:22:58", "throughput": 20026.13, "total_tokens": 146117056}
|
|
{"current_steps": 46430, "total_steps": 78105, "loss": 0.1726, "lr": 2.1137370885634133e-06, "epoch": 2.9722809039114013, "percentage": 59.45, "elapsed_time": "2:01:37", "remaining_time": "1:22:58", "throughput": 20026.45, "total_tokens": 146133184}
|
|
{"current_steps": 46435, "total_steps": 78105, "loss": 0.2081, "lr": 2.113185154171833e-06, "epoch": 2.9726009858523783, "percentage": 59.45, "elapsed_time": "2:01:37", "remaining_time": "1:22:57", "throughput": 20026.75, "total_tokens": 146149440}
|
|
{"current_steps": 46440, "total_steps": 78105, "loss": 0.2804, "lr": 2.1126332390957714e-06, "epoch": 2.9729210677933553, "percentage": 59.46, "elapsed_time": "2:01:38", "remaining_time": "1:22:56", "throughput": 20026.96, "total_tokens": 146163776}
|
|
{"current_steps": 46445, "total_steps": 78105, "loss": 0.2193, "lr": 2.112081343362788e-06, "epoch": 2.973241149734332, "percentage": 59.46, "elapsed_time": "2:01:39", "remaining_time": "1:22:55", "throughput": 20027.3, "total_tokens": 146180224}
|
|
{"current_steps": 46450, "total_steps": 78105, "loss": 0.1553, "lr": 2.111529467000443e-06, "epoch": 2.973561231675309, "percentage": 59.47, "elapsed_time": "2:01:39", "remaining_time": "1:22:54", "throughput": 20027.54, "total_tokens": 146195072}
|
|
{"current_steps": 46455, "total_steps": 78105, "loss": 0.2079, "lr": 2.1109776100362912e-06, "epoch": 2.973881313616286, "percentage": 59.48, "elapsed_time": "2:01:40", "remaining_time": "1:22:53", "throughput": 20027.88, "total_tokens": 146211200}
|
|
{"current_steps": 46460, "total_steps": 78105, "loss": 0.173, "lr": 2.1104257724978924e-06, "epoch": 2.974201395557263, "percentage": 59.48, "elapsed_time": "2:01:41", "remaining_time": "1:22:52", "throughput": 20028.17, "total_tokens": 146227008}
|
|
{"current_steps": 46465, "total_steps": 78105, "loss": 0.3075, "lr": 2.1098739544128003e-06, "epoch": 2.9745214774982394, "percentage": 59.49, "elapsed_time": "2:01:41", "remaining_time": "1:22:52", "throughput": 20028.55, "total_tokens": 146243968}
|
|
{"current_steps": 46470, "total_steps": 78105, "loss": 0.2205, "lr": 2.1093221558085716e-06, "epoch": 2.9748415594392164, "percentage": 59.5, "elapsed_time": "2:01:42", "remaining_time": "1:22:51", "throughput": 20028.86, "total_tokens": 146259776}
|
|
{"current_steps": 46475, "total_steps": 78105, "loss": 0.1693, "lr": 2.108770376712759e-06, "epoch": 2.9751616413801933, "percentage": 59.5, "elapsed_time": "2:01:43", "remaining_time": "1:22:50", "throughput": 20029.22, "total_tokens": 146276736}
|
|
{"current_steps": 46480, "total_steps": 78105, "loss": 0.2099, "lr": 2.1082186171529166e-06, "epoch": 2.9754817233211703, "percentage": 59.51, "elapsed_time": "2:01:43", "remaining_time": "1:22:49", "throughput": 20029.49, "total_tokens": 146291904}
|
|
{"current_steps": 46485, "total_steps": 78105, "loss": 0.2414, "lr": 2.107666877156595e-06, "epoch": 2.9758018052621473, "percentage": 59.52, "elapsed_time": "2:01:44", "remaining_time": "1:22:48", "throughput": 20029.76, "total_tokens": 146306944}
|
|
{"current_steps": 46490, "total_steps": 78105, "loss": 0.3572, "lr": 2.107115156751346e-06, "epoch": 2.976121887203124, "percentage": 59.52, "elapsed_time": "2:01:45", "remaining_time": "1:22:47", "throughput": 20030.04, "total_tokens": 146322176}
|
|
{"current_steps": 46495, "total_steps": 78105, "loss": 0.1976, "lr": 2.1065634559647197e-06, "epoch": 2.976441969144101, "percentage": 59.53, "elapsed_time": "2:01:45", "remaining_time": "1:22:46", "throughput": 20030.34, "total_tokens": 146337728}
|
|
{"current_steps": 46500, "total_steps": 78105, "loss": 0.2953, "lr": 2.1060117748242647e-06, "epoch": 2.976762051085078, "percentage": 59.54, "elapsed_time": "2:01:46", "remaining_time": "1:22:46", "throughput": 20030.64, "total_tokens": 146353408}
|
|
{"current_steps": 46505, "total_steps": 78105, "loss": 0.1739, "lr": 2.105460113357531e-06, "epoch": 2.9770821330260544, "percentage": 59.54, "elapsed_time": "2:01:47", "remaining_time": "1:22:45", "throughput": 20030.99, "total_tokens": 146370048}
|
|
{"current_steps": 46510, "total_steps": 78105, "loss": 0.3382, "lr": 2.1049084715920628e-06, "epoch": 2.9774022149670314, "percentage": 59.55, "elapsed_time": "2:01:47", "remaining_time": "1:22:44", "throughput": 20031.21, "total_tokens": 146384384}
|
|
{"current_steps": 46515, "total_steps": 78105, "loss": 0.2107, "lr": 2.1043568495554086e-06, "epoch": 2.9777222969080084, "percentage": 59.55, "elapsed_time": "2:01:48", "remaining_time": "1:22:43", "throughput": 20031.53, "total_tokens": 146400320}
|
|
{"current_steps": 46520, "total_steps": 78105, "loss": 0.3154, "lr": 2.1038052472751114e-06, "epoch": 2.9780423788489854, "percentage": 59.56, "elapsed_time": "2:01:49", "remaining_time": "1:22:42", "throughput": 20031.77, "total_tokens": 146415168}
|
|
{"current_steps": 46525, "total_steps": 78105, "loss": 0.1901, "lr": 2.103253664778717e-06, "epoch": 2.9783624607899624, "percentage": 59.57, "elapsed_time": "2:01:49", "remaining_time": "1:22:41", "throughput": 20032.02, "total_tokens": 146430080}
|
|
{"current_steps": 46530, "total_steps": 78105, "loss": 0.2543, "lr": 2.1027021020937674e-06, "epoch": 2.9786825427309394, "percentage": 59.57, "elapsed_time": "2:01:50", "remaining_time": "1:22:40", "throughput": 20032.34, "total_tokens": 146446336}
|
|
{"current_steps": 46535, "total_steps": 78105, "loss": 0.1713, "lr": 2.1021505592478066e-06, "epoch": 2.979002624671916, "percentage": 59.58, "elapsed_time": "2:01:51", "remaining_time": "1:22:39", "throughput": 20032.65, "total_tokens": 146461952}
|
|
{"current_steps": 46540, "total_steps": 78105, "loss": 0.2301, "lr": 2.1015990362683733e-06, "epoch": 2.979322706612893, "percentage": 59.59, "elapsed_time": "2:01:51", "remaining_time": "1:22:39", "throughput": 20032.87, "total_tokens": 146476544}
|
|
{"current_steps": 46545, "total_steps": 78105, "loss": 0.2889, "lr": 2.101047533183009e-06, "epoch": 2.97964278855387, "percentage": 59.59, "elapsed_time": "2:01:52", "remaining_time": "1:22:38", "throughput": 20033.16, "total_tokens": 146491968}
|
|
{"current_steps": 46550, "total_steps": 78105, "loss": 0.3376, "lr": 2.1004960500192534e-06, "epoch": 2.9799628704948464, "percentage": 59.6, "elapsed_time": "2:01:53", "remaining_time": "1:22:37", "throughput": 20033.43, "total_tokens": 146507392}
|
|
{"current_steps": 46555, "total_steps": 78105, "loss": 0.1538, "lr": 2.0999445868046438e-06, "epoch": 2.9802829524358234, "percentage": 59.61, "elapsed_time": "2:01:53", "remaining_time": "1:22:36", "throughput": 20033.66, "total_tokens": 146521792}
|
|
{"current_steps": 46560, "total_steps": 78105, "loss": 0.1267, "lr": 2.0993931435667184e-06, "epoch": 2.9806030343768004, "percentage": 59.61, "elapsed_time": "2:01:54", "remaining_time": "1:22:35", "throughput": 20033.98, "total_tokens": 146538048}
|
|
{"current_steps": 46565, "total_steps": 78105, "loss": 0.253, "lr": 2.098841720333012e-06, "epoch": 2.9809231163177774, "percentage": 59.62, "elapsed_time": "2:01:55", "remaining_time": "1:22:34", "throughput": 20034.33, "total_tokens": 146554688}
|
|
{"current_steps": 46570, "total_steps": 78105, "loss": 0.1851, "lr": 2.0982903171310612e-06, "epoch": 2.9812431982587544, "percentage": 59.62, "elapsed_time": "2:01:55", "remaining_time": "1:22:33", "throughput": 20034.6, "total_tokens": 146569856}
|
|
{"current_steps": 46575, "total_steps": 78105, "loss": 0.1949, "lr": 2.0977389339883984e-06, "epoch": 2.9815632801997314, "percentage": 59.63, "elapsed_time": "2:01:56", "remaining_time": "1:22:33", "throughput": 20034.81, "total_tokens": 146584192}
|
|
{"current_steps": 46580, "total_steps": 78105, "loss": 0.2788, "lr": 2.097187570932559e-06, "epoch": 2.981883362140708, "percentage": 59.64, "elapsed_time": "2:01:57", "remaining_time": "1:22:32", "throughput": 20035.15, "total_tokens": 146600512}
|
|
{"current_steps": 46585, "total_steps": 78105, "loss": 0.2399, "lr": 2.0966362279910736e-06, "epoch": 2.982203444081685, "percentage": 59.64, "elapsed_time": "2:01:57", "remaining_time": "1:22:31", "throughput": 20035.41, "total_tokens": 146615744}
|
|
{"current_steps": 46590, "total_steps": 78105, "loss": 0.2708, "lr": 2.0960849051914743e-06, "epoch": 2.982523526022662, "percentage": 59.65, "elapsed_time": "2:01:58", "remaining_time": "1:22:30", "throughput": 20035.65, "total_tokens": 146630592}
|
|
{"current_steps": 46595, "total_steps": 78105, "loss": 0.268, "lr": 2.0955336025612904e-06, "epoch": 2.9828436079636385, "percentage": 59.66, "elapsed_time": "2:01:59", "remaining_time": "1:22:29", "throughput": 20035.9, "total_tokens": 146645440}
|
|
{"current_steps": 46600, "total_steps": 78105, "loss": 0.2394, "lr": 2.094982320128053e-06, "epoch": 2.9831636899046154, "percentage": 59.66, "elapsed_time": "2:01:59", "remaining_time": "1:22:28", "throughput": 20036.16, "total_tokens": 146660992}
|
|
{"current_steps": 46605, "total_steps": 78105, "loss": 0.2049, "lr": 2.094431057919287e-06, "epoch": 2.9834837718455924, "percentage": 59.67, "elapsed_time": "2:02:00", "remaining_time": "1:22:27", "throughput": 20036.48, "total_tokens": 146676992}
|
|
{"current_steps": 46610, "total_steps": 78105, "loss": 0.2071, "lr": 2.093879815962522e-06, "epoch": 2.9838038537865694, "percentage": 59.68, "elapsed_time": "2:02:01", "remaining_time": "1:22:27", "throughput": 20036.73, "total_tokens": 146692288}
|
|
{"current_steps": 46615, "total_steps": 78105, "loss": 0.2099, "lr": 2.0933285942852838e-06, "epoch": 2.9841239357275464, "percentage": 59.68, "elapsed_time": "2:02:01", "remaining_time": "1:22:26", "throughput": 20037.04, "total_tokens": 146708096}
|
|
{"current_steps": 46620, "total_steps": 78105, "loss": 0.1691, "lr": 2.092777392915097e-06, "epoch": 2.9844440176685234, "percentage": 59.69, "elapsed_time": "2:02:02", "remaining_time": "1:22:25", "throughput": 20037.31, "total_tokens": 146723328}
|
|
{"current_steps": 46625, "total_steps": 78105, "loss": 0.1584, "lr": 2.0922262118794874e-06, "epoch": 2.9847640996095, "percentage": 59.7, "elapsed_time": "2:02:03", "remaining_time": "1:22:24", "throughput": 20037.61, "total_tokens": 146739072}
|
|
{"current_steps": 46630, "total_steps": 78105, "loss": 0.2923, "lr": 2.091675051205975e-06, "epoch": 2.985084181550477, "percentage": 59.7, "elapsed_time": "2:02:03", "remaining_time": "1:22:23", "throughput": 20037.87, "total_tokens": 146754240}
|
|
{"current_steps": 46635, "total_steps": 78105, "loss": 0.1786, "lr": 2.0911239109220846e-06, "epoch": 2.985404263491454, "percentage": 59.71, "elapsed_time": "2:02:04", "remaining_time": "1:22:22", "throughput": 20038.11, "total_tokens": 146768960}
|
|
{"current_steps": 46640, "total_steps": 78105, "loss": 0.2447, "lr": 2.090572791055336e-06, "epoch": 2.9857243454324305, "percentage": 59.71, "elapsed_time": "2:02:05", "remaining_time": "1:22:21", "throughput": 20038.43, "total_tokens": 146784960}
|
|
{"current_steps": 46645, "total_steps": 78105, "loss": 0.2335, "lr": 2.09002169163325e-06, "epoch": 2.9860444273734075, "percentage": 59.72, "elapsed_time": "2:02:05", "remaining_time": "1:22:20", "throughput": 20038.76, "total_tokens": 146800896}
|
|
{"current_steps": 46650, "total_steps": 78105, "loss": 0.2207, "lr": 2.089470612683345e-06, "epoch": 2.9863645093143845, "percentage": 59.73, "elapsed_time": "2:02:06", "remaining_time": "1:22:20", "throughput": 20038.97, "total_tokens": 146815232}
|
|
{"current_steps": 46655, "total_steps": 78105, "loss": 0.3087, "lr": 2.0889195542331395e-06, "epoch": 2.9866845912553615, "percentage": 59.73, "elapsed_time": "2:02:07", "remaining_time": "1:22:19", "throughput": 20039.23, "total_tokens": 146830336}
|
|
{"current_steps": 46660, "total_steps": 78105, "loss": 0.1565, "lr": 2.0883685163101495e-06, "epoch": 2.9870046731963384, "percentage": 59.74, "elapsed_time": "2:02:07", "remaining_time": "1:22:18", "throughput": 20039.54, "total_tokens": 146846528}
|
|
{"current_steps": 46665, "total_steps": 78105, "loss": 0.1609, "lr": 2.0878174989418916e-06, "epoch": 2.987324755137315, "percentage": 59.75, "elapsed_time": "2:02:08", "remaining_time": "1:22:17", "throughput": 20039.8, "total_tokens": 146861824}
|
|
{"current_steps": 46670, "total_steps": 78105, "loss": 0.2016, "lr": 2.0872665021558817e-06, "epoch": 2.987644837078292, "percentage": 59.75, "elapsed_time": "2:02:09", "remaining_time": "1:22:16", "throughput": 20040.14, "total_tokens": 146877952}
|
|
{"current_steps": 46675, "total_steps": 78105, "loss": 0.2071, "lr": 2.086715525979632e-06, "epoch": 2.987964919019269, "percentage": 59.76, "elapsed_time": "2:02:09", "remaining_time": "1:22:15", "throughput": 20040.48, "total_tokens": 146894272}
|
|
{"current_steps": 46680, "total_steps": 78105, "loss": 0.1368, "lr": 2.086164570440657e-06, "epoch": 2.988285000960246, "percentage": 59.77, "elapsed_time": "2:02:10", "remaining_time": "1:22:14", "throughput": 20040.74, "total_tokens": 146909248}
|
|
{"current_steps": 46685, "total_steps": 78105, "loss": 0.3046, "lr": 2.0856136355664673e-06, "epoch": 2.9886050829012225, "percentage": 59.77, "elapsed_time": "2:02:11", "remaining_time": "1:22:14", "throughput": 20040.97, "total_tokens": 146923840}
|
|
{"current_steps": 46690, "total_steps": 78105, "loss": 0.2109, "lr": 2.085062721384574e-06, "epoch": 2.9889251648421995, "percentage": 59.78, "elapsed_time": "2:02:11", "remaining_time": "1:22:13", "throughput": 20041.23, "total_tokens": 146938816}
|
|
{"current_steps": 46695, "total_steps": 78105, "loss": 0.2048, "lr": 2.084511827922488e-06, "epoch": 2.9892452467831765, "percentage": 59.78, "elapsed_time": "2:02:12", "remaining_time": "1:22:12", "throughput": 20041.58, "total_tokens": 146955648}
|
|
{"current_steps": 46700, "total_steps": 78105, "loss": 0.2616, "lr": 2.0839609552077164e-06, "epoch": 2.9895653287241535, "percentage": 59.79, "elapsed_time": "2:02:13", "remaining_time": "1:22:11", "throughput": 20041.9, "total_tokens": 146971840}
|
|
{"current_steps": 46705, "total_steps": 78105, "loss": 0.1943, "lr": 2.083410103267767e-06, "epoch": 2.9898854106651305, "percentage": 59.8, "elapsed_time": "2:02:13", "remaining_time": "1:22:10", "throughput": 20042.21, "total_tokens": 146987840}
|
|
{"current_steps": 46710, "total_steps": 78105, "loss": 0.2437, "lr": 2.082859272130149e-06, "epoch": 2.990205492606107, "percentage": 59.8, "elapsed_time": "2:02:14", "remaining_time": "1:22:09", "throughput": 20042.52, "total_tokens": 147003840}
|
|
{"current_steps": 46715, "total_steps": 78105, "loss": 0.2044, "lr": 2.0823084618223644e-06, "epoch": 2.990525574547084, "percentage": 59.81, "elapsed_time": "2:02:15", "remaining_time": "1:22:08", "throughput": 20042.79, "total_tokens": 147019200}
|
|
{"current_steps": 46720, "total_steps": 78105, "loss": 0.1568, "lr": 2.08175767237192e-06, "epoch": 2.990845656488061, "percentage": 59.82, "elapsed_time": "2:02:15", "remaining_time": "1:22:08", "throughput": 20043.1, "total_tokens": 147035456}
|
|
{"current_steps": 46725, "total_steps": 78105, "loss": 0.1793, "lr": 2.081206903806319e-06, "epoch": 2.991165738429038, "percentage": 59.82, "elapsed_time": "2:02:16", "remaining_time": "1:22:07", "throughput": 20043.49, "total_tokens": 147053120}
|
|
{"current_steps": 46730, "total_steps": 78105, "loss": 0.2353, "lr": 2.080656156153063e-06, "epoch": 2.9914858203700145, "percentage": 59.83, "elapsed_time": "2:02:17", "remaining_time": "1:22:06", "throughput": 20043.8, "total_tokens": 147068992}
|
|
{"current_steps": 46735, "total_steps": 78105, "loss": 0.1396, "lr": 2.0801054294396557e-06, "epoch": 2.9918059023109915, "percentage": 59.84, "elapsed_time": "2:02:18", "remaining_time": "1:22:05", "throughput": 20044.16, "total_tokens": 147085632}
|
|
{"current_steps": 46740, "total_steps": 78105, "loss": 0.267, "lr": 2.079554723693595e-06, "epoch": 2.9921259842519685, "percentage": 59.84, "elapsed_time": "2:02:18", "remaining_time": "1:22:04", "throughput": 20044.49, "total_tokens": 147102144}
|
|
{"current_steps": 46745, "total_steps": 78105, "loss": 0.2866, "lr": 2.0790040389423826e-06, "epoch": 2.9924460661929455, "percentage": 59.85, "elapsed_time": "2:02:19", "remaining_time": "1:22:03", "throughput": 20044.93, "total_tokens": 147120384}
|
|
{"current_steps": 46750, "total_steps": 78105, "loss": 0.1751, "lr": 2.0784533752135146e-06, "epoch": 2.9927661481339225, "percentage": 59.86, "elapsed_time": "2:02:20", "remaining_time": "1:22:03", "throughput": 20045.17, "total_tokens": 147135232}
|
|
{"current_steps": 46755, "total_steps": 78105, "loss": 0.2485, "lr": 2.07790273253449e-06, "epoch": 2.993086230074899, "percentage": 59.86, "elapsed_time": "2:02:20", "remaining_time": "1:22:02", "throughput": 20045.48, "total_tokens": 147151168}
|
|
{"current_steps": 46760, "total_steps": 78105, "loss": 0.2188, "lr": 2.077352110932803e-06, "epoch": 2.993406312015876, "percentage": 59.87, "elapsed_time": "2:02:21", "remaining_time": "1:22:01", "throughput": 20045.75, "total_tokens": 147166208}
|
|
{"current_steps": 46765, "total_steps": 78105, "loss": 0.2365, "lr": 2.0768015104359517e-06, "epoch": 2.993726393956853, "percentage": 59.87, "elapsed_time": "2:02:22", "remaining_time": "1:22:00", "throughput": 20046.08, "total_tokens": 147182528}
|
|
{"current_steps": 46770, "total_steps": 78105, "loss": 0.2243, "lr": 2.0762509310714272e-06, "epoch": 2.9940464758978296, "percentage": 59.88, "elapsed_time": "2:02:22", "remaining_time": "1:21:59", "throughput": 20046.34, "total_tokens": 147197888}
|
|
{"current_steps": 46775, "total_steps": 78105, "loss": 0.3133, "lr": 2.075700372866725e-06, "epoch": 2.9943665578388066, "percentage": 59.89, "elapsed_time": "2:02:23", "remaining_time": "1:21:58", "throughput": 20046.68, "total_tokens": 147214592}
|
|
{"current_steps": 46780, "total_steps": 78105, "loss": 0.2857, "lr": 2.0751498358493355e-06, "epoch": 2.9946866397797836, "percentage": 59.89, "elapsed_time": "2:02:24", "remaining_time": "1:21:57", "throughput": 20047.04, "total_tokens": 147231680}
|
|
{"current_steps": 46785, "total_steps": 78105, "loss": 0.1859, "lr": 2.07459932004675e-06, "epoch": 2.9950067217207605, "percentage": 59.9, "elapsed_time": "2:02:24", "remaining_time": "1:21:57", "throughput": 20047.33, "total_tokens": 147247104}
|
|
{"current_steps": 46790, "total_steps": 78105, "loss": 0.1572, "lr": 2.0740488254864594e-06, "epoch": 2.9953268036617375, "percentage": 59.91, "elapsed_time": "2:02:25", "remaining_time": "1:21:56", "throughput": 20047.63, "total_tokens": 147262656}
|
|
{"current_steps": 46795, "total_steps": 78105, "loss": 0.2312, "lr": 2.073498352195951e-06, "epoch": 2.9956468856027145, "percentage": 59.91, "elapsed_time": "2:02:26", "remaining_time": "1:21:55", "throughput": 20047.86, "total_tokens": 147276992}
|
|
{"current_steps": 46800, "total_steps": 78105, "loss": 0.2586, "lr": 2.0729479002027147e-06, "epoch": 2.995966967543691, "percentage": 59.92, "elapsed_time": "2:02:27", "remaining_time": "1:21:54", "throughput": 20047.51, "total_tokens": 147293120}
|
|
{"current_steps": 46805, "total_steps": 78105, "loss": 0.1988, "lr": 2.0723974695342346e-06, "epoch": 2.996287049484668, "percentage": 59.93, "elapsed_time": "2:02:27", "remaining_time": "1:21:53", "throughput": 20047.77, "total_tokens": 147308160}
|
|
{"current_steps": 46810, "total_steps": 78105, "loss": 0.1825, "lr": 2.071847060217998e-06, "epoch": 2.996607131425645, "percentage": 59.93, "elapsed_time": "2:02:28", "remaining_time": "1:21:52", "throughput": 20048.03, "total_tokens": 147323200}
|
|
{"current_steps": 46815, "total_steps": 78105, "loss": 0.1861, "lr": 2.0712966722814883e-06, "epoch": 2.9969272133666216, "percentage": 59.94, "elapsed_time": "2:02:29", "remaining_time": "1:21:52", "throughput": 20048.32, "total_tokens": 147338688}
|
|
{"current_steps": 46820, "total_steps": 78105, "loss": 0.2376, "lr": 2.070746305752191e-06, "epoch": 2.9972472953075986, "percentage": 59.94, "elapsed_time": "2:02:29", "remaining_time": "1:21:51", "throughput": 20048.59, "total_tokens": 147353600}
|
|
{"current_steps": 46825, "total_steps": 78105, "loss": 0.2324, "lr": 2.0701959606575863e-06, "epoch": 2.9975673772485756, "percentage": 59.95, "elapsed_time": "2:02:30", "remaining_time": "1:21:50", "throughput": 20048.89, "total_tokens": 147369472}
|
|
{"current_steps": 46830, "total_steps": 78105, "loss": 0.1667, "lr": 2.0696456370251576e-06, "epoch": 2.9978874591895526, "percentage": 59.96, "elapsed_time": "2:02:31", "remaining_time": "1:21:49", "throughput": 20049.16, "total_tokens": 147384896}
|
|
{"current_steps": 46835, "total_steps": 78105, "loss": 0.1573, "lr": 2.069095334882383e-06, "epoch": 2.9982075411305296, "percentage": 59.96, "elapsed_time": "2:02:31", "remaining_time": "1:21:48", "throughput": 20049.45, "total_tokens": 147400832}
|
|
{"current_steps": 46840, "total_steps": 78105, "loss": 0.1533, "lr": 2.0685450542567432e-06, "epoch": 2.9985276230715066, "percentage": 59.97, "elapsed_time": "2:02:32", "remaining_time": "1:21:47", "throughput": 20049.78, "total_tokens": 147417280}
|
|
{"current_steps": 46845, "total_steps": 78105, "loss": 0.2064, "lr": 2.0679947951757164e-06, "epoch": 2.998847705012483, "percentage": 59.98, "elapsed_time": "2:02:33", "remaining_time": "1:21:46", "throughput": 20050.09, "total_tokens": 147433408}
|
|
{"current_steps": 46850, "total_steps": 78105, "loss": 0.216, "lr": 2.0674445576667785e-06, "epoch": 2.99916778695346, "percentage": 59.98, "elapsed_time": "2:02:33", "remaining_time": "1:21:46", "throughput": 20050.38, "total_tokens": 147448960}
|
|
{"current_steps": 46855, "total_steps": 78105, "loss": 0.2719, "lr": 2.0668943417574073e-06, "epoch": 2.999487868894437, "percentage": 59.99, "elapsed_time": "2:02:34", "remaining_time": "1:21:45", "throughput": 20050.61, "total_tokens": 147463744}
|
|
{"current_steps": 46860, "total_steps": 78105, "loss": 0.2388, "lr": 2.066344147475076e-06, "epoch": 2.9998079508354136, "percentage": 60.0, "elapsed_time": "2:02:35", "remaining_time": "1:21:44", "throughput": 20050.97, "total_tokens": 147480512}
|
|
{"current_steps": 46865, "total_steps": 78105, "loss": 0.1646, "lr": 2.0657939748472593e-06, "epoch": 3.0001280327763906, "percentage": 60.0, "elapsed_time": "2:02:36", "remaining_time": "1:21:43", "throughput": 20050.98, "total_tokens": 147496384}
|
|
{"current_steps": 46870, "total_steps": 78105, "loss": 0.1496, "lr": 2.06524382390143e-06, "epoch": 3.0004481147173676, "percentage": 60.01, "elapsed_time": "2:02:36", "remaining_time": "1:21:42", "throughput": 20051.21, "total_tokens": 147511104}
|
|
{"current_steps": 46872, "total_steps": 78105, "eval_loss": 0.5023031234741211, "epoch": 3.0005761474937582, "percentage": 60.01, "elapsed_time": "2:03:27", "remaining_time": "1:22:16", "throughput": 19913.36, "total_tokens": 147516736}
|
|
{"current_steps": 46875, "total_steps": 78105, "loss": 0.1556, "lr": 2.06469369466506e-06, "epoch": 3.0007681966583446, "percentage": 60.02, "elapsed_time": "2:04:02", "remaining_time": "1:22:38", "throughput": 19822.18, "total_tokens": 147525440}
|
|
{"current_steps": 46880, "total_steps": 78105, "loss": 0.1, "lr": 2.0641435871656183e-06, "epoch": 3.0010882785993216, "percentage": 60.02, "elapsed_time": "2:04:03", "remaining_time": "1:22:37", "throughput": 19822.46, "total_tokens": 147540800}
|
|
{"current_steps": 46885, "total_steps": 78105, "loss": 0.1753, "lr": 2.063593501430577e-06, "epoch": 3.001408360540298, "percentage": 60.03, "elapsed_time": "2:04:03", "remaining_time": "1:22:36", "throughput": 19822.78, "total_tokens": 147556992}
|
|
{"current_steps": 46890, "total_steps": 78105, "loss": 0.1897, "lr": 2.0630434374874017e-06, "epoch": 3.001728442481275, "percentage": 60.03, "elapsed_time": "2:04:04", "remaining_time": "1:22:35", "throughput": 19823.14, "total_tokens": 147574080}
|
|
{"current_steps": 46895, "total_steps": 78105, "loss": 0.1506, "lr": 2.062493395363562e-06, "epoch": 3.002048524422252, "percentage": 60.04, "elapsed_time": "2:04:05", "remaining_time": "1:22:34", "throughput": 19823.41, "total_tokens": 147588864}
|
|
{"current_steps": 46900, "total_steps": 78105, "loss": 0.1401, "lr": 2.0619433750865227e-06, "epoch": 3.002368606363229, "percentage": 60.05, "elapsed_time": "2:04:05", "remaining_time": "1:22:34", "throughput": 19823.67, "total_tokens": 147603776}
|
|
{"current_steps": 46905, "total_steps": 78105, "loss": 0.1737, "lr": 2.061393376683749e-06, "epoch": 3.0026886883042057, "percentage": 60.05, "elapsed_time": "2:04:06", "remaining_time": "1:22:33", "throughput": 19824.03, "total_tokens": 147620288}
|
|
{"current_steps": 46910, "total_steps": 78105, "loss": 0.1351, "lr": 2.060843400182707e-06, "epoch": 3.0030087702451826, "percentage": 60.06, "elapsed_time": "2:04:07", "remaining_time": "1:22:32", "throughput": 19824.28, "total_tokens": 147634816}
|
|
{"current_steps": 46915, "total_steps": 78105, "loss": 0.1831, "lr": 2.0602934456108567e-06, "epoch": 3.0033288521861596, "percentage": 60.07, "elapsed_time": "2:04:07", "remaining_time": "1:22:31", "throughput": 19824.69, "total_tokens": 147652032}
|
|
{"current_steps": 46920, "total_steps": 78105, "loss": 0.1352, "lr": 2.059743512995662e-06, "epoch": 3.0036489341271366, "percentage": 60.07, "elapsed_time": "2:04:08", "remaining_time": "1:22:30", "throughput": 19824.95, "total_tokens": 147667136}
|
|
{"current_steps": 46925, "total_steps": 78105, "loss": 0.1296, "lr": 2.059193602364583e-06, "epoch": 3.0039690160681136, "percentage": 60.08, "elapsed_time": "2:04:09", "remaining_time": "1:22:29", "throughput": 19825.3, "total_tokens": 147683520}
|
|
{"current_steps": 46930, "total_steps": 78105, "loss": 0.1696, "lr": 2.05864371374508e-06, "epoch": 3.00428909800909, "percentage": 60.09, "elapsed_time": "2:04:09", "remaining_time": "1:22:28", "throughput": 19825.58, "total_tokens": 147698496}
|
|
{"current_steps": 46935, "total_steps": 78105, "loss": 0.1052, "lr": 2.05809384716461e-06, "epoch": 3.004609179950067, "percentage": 60.09, "elapsed_time": "2:04:10", "remaining_time": "1:22:28", "throughput": 19825.91, "total_tokens": 147714496}
|
|
{"current_steps": 46940, "total_steps": 78105, "loss": 0.2007, "lr": 2.057544002650632e-06, "epoch": 3.004929261891044, "percentage": 60.1, "elapsed_time": "2:04:11", "remaining_time": "1:22:27", "throughput": 19826.22, "total_tokens": 147730048}
|
|
{"current_steps": 46945, "total_steps": 78105, "loss": 0.1726, "lr": 2.0569941802306018e-06, "epoch": 3.005249343832021, "percentage": 60.1, "elapsed_time": "2:04:11", "remaining_time": "1:22:26", "throughput": 19826.58, "total_tokens": 147746624}
|
|
{"current_steps": 46950, "total_steps": 78105, "loss": 0.1319, "lr": 2.0564443799319747e-06, "epoch": 3.0055694257729977, "percentage": 60.11, "elapsed_time": "2:04:12", "remaining_time": "1:22:25", "throughput": 19826.87, "total_tokens": 147761792}
|
|
{"current_steps": 46955, "total_steps": 78105, "loss": 0.1215, "lr": 2.0558946017822047e-06, "epoch": 3.0058895077139747, "percentage": 60.12, "elapsed_time": "2:04:13", "remaining_time": "1:22:24", "throughput": 19827.22, "total_tokens": 147777984}
|
|
{"current_steps": 46960, "total_steps": 78105, "loss": 0.1655, "lr": 2.0553448458087445e-06, "epoch": 3.0062095896549517, "percentage": 60.12, "elapsed_time": "2:04:13", "remaining_time": "1:22:23", "throughput": 19827.52, "total_tokens": 147793600}
|
|
{"current_steps": 46965, "total_steps": 78105, "loss": 0.1754, "lr": 2.0547951120390476e-06, "epoch": 3.0065296715959287, "percentage": 60.13, "elapsed_time": "2:04:14", "remaining_time": "1:22:22", "throughput": 19827.84, "total_tokens": 147809728}
|
|
{"current_steps": 46970, "total_steps": 78105, "loss": 0.0864, "lr": 2.054245400500563e-06, "epoch": 3.0068497535369056, "percentage": 60.14, "elapsed_time": "2:04:15", "remaining_time": "1:22:21", "throughput": 19828.15, "total_tokens": 147825600}
|
|
{"current_steps": 46975, "total_steps": 78105, "loss": 0.1677, "lr": 2.0536957112207424e-06, "epoch": 3.007169835477882, "percentage": 60.14, "elapsed_time": "2:04:16", "remaining_time": "1:22:21", "throughput": 19828.48, "total_tokens": 147841152}
|
|
{"current_steps": 46980, "total_steps": 78105, "loss": 0.1463, "lr": 2.0531460442270324e-06, "epoch": 3.007489917418859, "percentage": 60.15, "elapsed_time": "2:04:16", "remaining_time": "1:22:20", "throughput": 19828.78, "total_tokens": 147856512}
|
|
{"current_steps": 46985, "total_steps": 78105, "loss": 0.0892, "lr": 2.0525963995468815e-06, "epoch": 3.007809999359836, "percentage": 60.16, "elapsed_time": "2:04:17", "remaining_time": "1:22:19", "throughput": 19829.14, "total_tokens": 147873088}
|
|
{"current_steps": 46990, "total_steps": 78105, "loss": 0.2207, "lr": 2.052046777207736e-06, "epoch": 3.008130081300813, "percentage": 60.16, "elapsed_time": "2:04:18", "remaining_time": "1:22:18", "throughput": 19829.42, "total_tokens": 147888448}
|
|
{"current_steps": 46995, "total_steps": 78105, "loss": 0.1632, "lr": 2.0514971772370413e-06, "epoch": 3.0084501632417897, "percentage": 60.17, "elapsed_time": "2:04:18", "remaining_time": "1:22:17", "throughput": 19829.64, "total_tokens": 147902464}
|
|
{"current_steps": 47000, "total_steps": 78105, "loss": 0.1545, "lr": 2.050947599662241e-06, "epoch": 3.0087702451827667, "percentage": 60.18, "elapsed_time": "2:04:19", "remaining_time": "1:22:16", "throughput": 19829.93, "total_tokens": 147918272}
|
|
{"current_steps": 47005, "total_steps": 78105, "loss": 0.1162, "lr": 2.0503980445107786e-06, "epoch": 3.0090903271237437, "percentage": 60.18, "elapsed_time": "2:04:20", "remaining_time": "1:22:15", "throughput": 19830.23, "total_tokens": 147933696}
|
|
{"current_steps": 47010, "total_steps": 78105, "loss": 0.1201, "lr": 2.049848511810096e-06, "epoch": 3.0094104090647207, "percentage": 60.19, "elapsed_time": "2:04:20", "remaining_time": "1:22:14", "throughput": 19830.53, "total_tokens": 147949376}
|
|
{"current_steps": 47015, "total_steps": 78105, "loss": 0.193, "lr": 2.049299001587634e-06, "epoch": 3.0097304910056977, "percentage": 60.19, "elapsed_time": "2:04:21", "remaining_time": "1:22:14", "throughput": 19830.82, "total_tokens": 147964928}
|
|
{"current_steps": 47020, "total_steps": 78105, "loss": 0.1333, "lr": 2.0487495138708328e-06, "epoch": 3.010050572946674, "percentage": 60.2, "elapsed_time": "2:04:22", "remaining_time": "1:22:13", "throughput": 19831.14, "total_tokens": 147980672}
|
|
{"current_steps": 47025, "total_steps": 78105, "loss": 0.2778, "lr": 2.0482000486871295e-06, "epoch": 3.010370654887651, "percentage": 60.21, "elapsed_time": "2:04:22", "remaining_time": "1:22:12", "throughput": 19831.48, "total_tokens": 147996992}
|
|
{"current_steps": 47030, "total_steps": 78105, "loss": 0.1639, "lr": 2.0476506060639633e-06, "epoch": 3.010690736828628, "percentage": 60.21, "elapsed_time": "2:04:23", "remaining_time": "1:22:11", "throughput": 19831.78, "total_tokens": 148012736}
|
|
{"current_steps": 47035, "total_steps": 78105, "loss": 0.1334, "lr": 2.047101186028769e-06, "epoch": 3.011010818769605, "percentage": 60.22, "elapsed_time": "2:04:24", "remaining_time": "1:22:10", "throughput": 19832.07, "total_tokens": 148028544}
|
|
{"current_steps": 47040, "total_steps": 78105, "loss": 0.0719, "lr": 2.046551788608983e-06, "epoch": 3.0113309007105817, "percentage": 60.23, "elapsed_time": "2:04:24", "remaining_time": "1:22:09", "throughput": 19832.33, "total_tokens": 148043392}
|
|
{"current_steps": 47045, "total_steps": 78105, "loss": 0.1151, "lr": 2.0460024138320377e-06, "epoch": 3.0116509826515587, "percentage": 60.23, "elapsed_time": "2:04:25", "remaining_time": "1:22:08", "throughput": 19832.59, "total_tokens": 148058368}
|
|
{"current_steps": 47050, "total_steps": 78105, "loss": 0.1467, "lr": 2.0454530617253686e-06, "epoch": 3.0119710645925357, "percentage": 60.24, "elapsed_time": "2:04:26", "remaining_time": "1:22:07", "throughput": 19832.8, "total_tokens": 148072256}
|
|
{"current_steps": 47055, "total_steps": 78105, "loss": 0.1342, "lr": 2.0449037323164046e-06, "epoch": 3.0122911465335127, "percentage": 60.25, "elapsed_time": "2:04:26", "remaining_time": "1:22:07", "throughput": 19833.09, "total_tokens": 148087616}
|
|
{"current_steps": 47060, "total_steps": 78105, "loss": 0.1396, "lr": 2.0443544256325786e-06, "epoch": 3.0126112284744893, "percentage": 60.25, "elapsed_time": "2:04:27", "remaining_time": "1:22:06", "throughput": 19833.39, "total_tokens": 148103680}
|
|
{"current_steps": 47065, "total_steps": 78105, "loss": 0.1688, "lr": 2.0438051417013184e-06, "epoch": 3.0129313104154662, "percentage": 60.26, "elapsed_time": "2:04:28", "remaining_time": "1:22:05", "throughput": 19833.66, "total_tokens": 148118848}
|
|
{"current_steps": 47070, "total_steps": 78105, "loss": 0.2015, "lr": 2.043255880550054e-06, "epoch": 3.0132513923564432, "percentage": 60.27, "elapsed_time": "2:04:28", "remaining_time": "1:22:04", "throughput": 19834.0, "total_tokens": 148135296}
|
|
{"current_steps": 47075, "total_steps": 78105, "loss": 0.1305, "lr": 2.0427066422062107e-06, "epoch": 3.0135714742974202, "percentage": 60.27, "elapsed_time": "2:04:29", "remaining_time": "1:22:03", "throughput": 19834.27, "total_tokens": 148151040}
|
|
{"current_steps": 47080, "total_steps": 78105, "loss": 0.2057, "lr": 2.042157426697216e-06, "epoch": 3.013891556238397, "percentage": 60.28, "elapsed_time": "2:04:30", "remaining_time": "1:22:02", "throughput": 19834.57, "total_tokens": 148166784}
|
|
{"current_steps": 47085, "total_steps": 78105, "loss": 0.1627, "lr": 2.041608234050496e-06, "epoch": 3.0142116381793738, "percentage": 60.28, "elapsed_time": "2:04:30", "remaining_time": "1:22:01", "throughput": 19834.89, "total_tokens": 148182912}
|
|
{"current_steps": 47090, "total_steps": 78105, "loss": 0.1708, "lr": 2.0410590642934715e-06, "epoch": 3.0145317201203508, "percentage": 60.29, "elapsed_time": "2:04:31", "remaining_time": "1:22:00", "throughput": 19835.17, "total_tokens": 148198336}
|
|
{"current_steps": 47095, "total_steps": 78105, "loss": 0.1436, "lr": 2.040509917453568e-06, "epoch": 3.0148518020613277, "percentage": 60.3, "elapsed_time": "2:04:32", "remaining_time": "1:22:00", "throughput": 19835.5, "total_tokens": 148214464}
|
|
{"current_steps": 47100, "total_steps": 78105, "loss": 0.118, "lr": 2.039960793558205e-06, "epoch": 3.0151718840023047, "percentage": 60.3, "elapsed_time": "2:04:32", "remaining_time": "1:21:59", "throughput": 19835.75, "total_tokens": 148229760}
|
|
{"current_steps": 47105, "total_steps": 78105, "loss": 0.141, "lr": 2.039411692634804e-06, "epoch": 3.0154919659432813, "percentage": 60.31, "elapsed_time": "2:04:33", "remaining_time": "1:21:58", "throughput": 19836.06, "total_tokens": 148246208}
|
|
{"current_steps": 47110, "total_steps": 78105, "loss": 0.1982, "lr": 2.0388626147107837e-06, "epoch": 3.0158120478842583, "percentage": 60.32, "elapsed_time": "2:04:34", "remaining_time": "1:21:57", "throughput": 19836.33, "total_tokens": 148261312}
|
|
{"current_steps": 47115, "total_steps": 78105, "loss": 0.1154, "lr": 2.0383135598135635e-06, "epoch": 3.0161321298252353, "percentage": 60.32, "elapsed_time": "2:04:34", "remaining_time": "1:21:56", "throughput": 19836.66, "total_tokens": 148277824}
|
|
{"current_steps": 47120, "total_steps": 78105, "loss": 0.1589, "lr": 2.0377645279705583e-06, "epoch": 3.0164522117662123, "percentage": 60.33, "elapsed_time": "2:04:35", "remaining_time": "1:21:55", "throughput": 19836.88, "total_tokens": 148292480}
|
|
{"current_steps": 47125, "total_steps": 78105, "loss": 0.1566, "lr": 2.037215519209185e-06, "epoch": 3.0167722937071892, "percentage": 60.34, "elapsed_time": "2:04:36", "remaining_time": "1:21:54", "throughput": 19837.23, "total_tokens": 148308544}
|
|
{"current_steps": 47130, "total_steps": 78105, "loss": 0.182, "lr": 2.0366665335568576e-06, "epoch": 3.017092375648166, "percentage": 60.34, "elapsed_time": "2:04:36", "remaining_time": "1:21:54", "throughput": 19837.5, "total_tokens": 148323840}
|
|
{"current_steps": 47135, "total_steps": 78105, "loss": 0.1763, "lr": 2.0361175710409902e-06, "epoch": 3.017412457589143, "percentage": 60.35, "elapsed_time": "2:04:37", "remaining_time": "1:21:53", "throughput": 19837.77, "total_tokens": 148339264}
|
|
{"current_steps": 47140, "total_steps": 78105, "loss": 0.2021, "lr": 2.0355686316889964e-06, "epoch": 3.0177325395301198, "percentage": 60.35, "elapsed_time": "2:04:38", "remaining_time": "1:21:52", "throughput": 19838.04, "total_tokens": 148354944}
|
|
{"current_steps": 47145, "total_steps": 78105, "loss": 0.1247, "lr": 2.0350197155282848e-06, "epoch": 3.0180526214710968, "percentage": 60.36, "elapsed_time": "2:04:38", "remaining_time": "1:21:51", "throughput": 19838.34, "total_tokens": 148370880}
|
|
{"current_steps": 47150, "total_steps": 78105, "loss": 0.1669, "lr": 2.034470822586267e-06, "epoch": 3.0183727034120733, "percentage": 60.37, "elapsed_time": "2:04:39", "remaining_time": "1:21:50", "throughput": 19838.62, "total_tokens": 148386368}
|
|
{"current_steps": 47155, "total_steps": 78105, "loss": 0.205, "lr": 2.0339219528903508e-06, "epoch": 3.0186927853530503, "percentage": 60.37, "elapsed_time": "2:04:40", "remaining_time": "1:21:49", "throughput": 19838.88, "total_tokens": 148401600}
|
|
{"current_steps": 47160, "total_steps": 78105, "loss": 0.216, "lr": 2.033373106467946e-06, "epoch": 3.0190128672940273, "percentage": 60.38, "elapsed_time": "2:04:41", "remaining_time": "1:21:48", "throughput": 19839.12, "total_tokens": 148416576}
|
|
{"current_steps": 47165, "total_steps": 78105, "loss": 0.164, "lr": 2.0328242833464563e-06, "epoch": 3.0193329492350043, "percentage": 60.39, "elapsed_time": "2:04:41", "remaining_time": "1:21:47", "throughput": 19839.4, "total_tokens": 148431872}
|
|
{"current_steps": 47170, "total_steps": 78105, "loss": 0.1878, "lr": 2.0322754835532897e-06, "epoch": 3.019653031175981, "percentage": 60.39, "elapsed_time": "2:04:42", "remaining_time": "1:21:47", "throughput": 19839.66, "total_tokens": 148446784}
|
|
{"current_steps": 47175, "total_steps": 78105, "loss": 0.132, "lr": 2.0317267071158482e-06, "epoch": 3.019973113116958, "percentage": 60.4, "elapsed_time": "2:04:43", "remaining_time": "1:21:46", "throughput": 19839.95, "total_tokens": 148462592}
|
|
{"current_steps": 47180, "total_steps": 78105, "loss": 0.1745, "lr": 2.031177954061536e-06, "epoch": 3.020293195057935, "percentage": 60.41, "elapsed_time": "2:04:43", "remaining_time": "1:21:45", "throughput": 19840.21, "total_tokens": 148477632}
|
|
{"current_steps": 47185, "total_steps": 78105, "loss": 0.1788, "lr": 2.0306292244177543e-06, "epoch": 3.020613276998912, "percentage": 60.41, "elapsed_time": "2:04:44", "remaining_time": "1:21:44", "throughput": 19840.52, "total_tokens": 148493312}
|
|
{"current_steps": 47190, "total_steps": 78105, "loss": 0.1414, "lr": 2.0300805182119045e-06, "epoch": 3.020933358939889, "percentage": 60.42, "elapsed_time": "2:04:45", "remaining_time": "1:21:43", "throughput": 19840.89, "total_tokens": 148510144}
|
|
{"current_steps": 47195, "total_steps": 78105, "loss": 0.1231, "lr": 2.0295318354713867e-06, "epoch": 3.0212534408808653, "percentage": 60.43, "elapsed_time": "2:04:45", "remaining_time": "1:21:42", "throughput": 19841.12, "total_tokens": 148524480}
|
|
{"current_steps": 47200, "total_steps": 78105, "loss": 0.1774, "lr": 2.0289831762235976e-06, "epoch": 3.0215735228218423, "percentage": 60.43, "elapsed_time": "2:04:46", "remaining_time": "1:21:41", "throughput": 19841.44, "total_tokens": 148540416}
|
|
{"current_steps": 47205, "total_steps": 78105, "loss": 0.1439, "lr": 2.0284345404959364e-06, "epoch": 3.0218936047628193, "percentage": 60.44, "elapsed_time": "2:04:47", "remaining_time": "1:21:40", "throughput": 19841.75, "total_tokens": 148556288}
|
|
{"current_steps": 47210, "total_steps": 78105, "loss": 0.1966, "lr": 2.0278859283157966e-06, "epoch": 3.0222136867037963, "percentage": 60.44, "elapsed_time": "2:04:47", "remaining_time": "1:21:40", "throughput": 19842.06, "total_tokens": 148571712}
|
|
{"current_steps": 47215, "total_steps": 78105, "loss": 0.1737, "lr": 2.027337339710575e-06, "epoch": 3.022533768644773, "percentage": 60.45, "elapsed_time": "2:04:48", "remaining_time": "1:21:39", "throughput": 19842.35, "total_tokens": 148587264}
|
|
{"current_steps": 47220, "total_steps": 78105, "loss": 0.1391, "lr": 2.0267887747076642e-06, "epoch": 3.02285385058575, "percentage": 60.46, "elapsed_time": "2:04:49", "remaining_time": "1:21:38", "throughput": 19842.66, "total_tokens": 148602816}
|
|
{"current_steps": 47225, "total_steps": 78105, "loss": 0.1438, "lr": 2.0262402333344585e-06, "epoch": 3.023173932526727, "percentage": 60.46, "elapsed_time": "2:04:49", "remaining_time": "1:21:37", "throughput": 19842.96, "total_tokens": 148618368}
|
|
{"current_steps": 47230, "total_steps": 78105, "loss": 0.1759, "lr": 2.025691715618346e-06, "epoch": 3.023494014467704, "percentage": 60.47, "elapsed_time": "2:04:50", "remaining_time": "1:21:36", "throughput": 19843.19, "total_tokens": 148632960}
|
|
{"current_steps": 47235, "total_steps": 78105, "loss": 0.148, "lr": 2.0251432215867197e-06, "epoch": 3.023814096408681, "percentage": 60.48, "elapsed_time": "2:04:51", "remaining_time": "1:21:35", "throughput": 19843.45, "total_tokens": 148648000}
|
|
{"current_steps": 47240, "total_steps": 78105, "loss": 0.1652, "lr": 2.0245947512669668e-06, "epoch": 3.0241341783496574, "percentage": 60.48, "elapsed_time": "2:04:51", "remaining_time": "1:21:34", "throughput": 19843.8, "total_tokens": 148664832}
|
|
{"current_steps": 47245, "total_steps": 78105, "loss": 0.1096, "lr": 2.024046304686477e-06, "epoch": 3.0244542602906344, "percentage": 60.49, "elapsed_time": "2:04:52", "remaining_time": "1:21:33", "throughput": 19844.1, "total_tokens": 148680384}
|
|
{"current_steps": 47250, "total_steps": 78105, "loss": 0.1457, "lr": 2.023497881872634e-06, "epoch": 3.0247743422316113, "percentage": 60.5, "elapsed_time": "2:04:53", "remaining_time": "1:21:33", "throughput": 19844.36, "total_tokens": 148695424}
|
|
{"current_steps": 47255, "total_steps": 78105, "loss": 0.1612, "lr": 2.0229494828528252e-06, "epoch": 3.0250944241725883, "percentage": 60.5, "elapsed_time": "2:04:53", "remaining_time": "1:21:32", "throughput": 19844.61, "total_tokens": 148710016}
|
|
{"current_steps": 47260, "total_steps": 78105, "loss": 0.1602, "lr": 2.0224011076544352e-06, "epoch": 3.025414506113565, "percentage": 60.51, "elapsed_time": "2:04:54", "remaining_time": "1:21:31", "throughput": 19844.92, "total_tokens": 148725632}
|
|
{"current_steps": 47265, "total_steps": 78105, "loss": 0.1816, "lr": 2.0218527563048453e-06, "epoch": 3.025734588054542, "percentage": 60.51, "elapsed_time": "2:04:55", "remaining_time": "1:21:30", "throughput": 19845.24, "total_tokens": 148741632}
|
|
{"current_steps": 47270, "total_steps": 78105, "loss": 0.1029, "lr": 2.0213044288314386e-06, "epoch": 3.026054669995519, "percentage": 60.52, "elapsed_time": "2:04:55", "remaining_time": "1:21:29", "throughput": 19845.54, "total_tokens": 148757248}
|
|
{"current_steps": 47275, "total_steps": 78105, "loss": 0.1873, "lr": 2.0207561252615953e-06, "epoch": 3.026374751936496, "percentage": 60.53, "elapsed_time": "2:04:56", "remaining_time": "1:21:28", "throughput": 19845.83, "total_tokens": 148772864}
|
|
{"current_steps": 47280, "total_steps": 78105, "loss": 0.0767, "lr": 2.020207845622695e-06, "epoch": 3.026694833877473, "percentage": 60.53, "elapsed_time": "2:04:57", "remaining_time": "1:21:27", "throughput": 19846.19, "total_tokens": 148789440}
|
|
{"current_steps": 47285, "total_steps": 78105, "loss": 0.1261, "lr": 2.019659589942115e-06, "epoch": 3.0270149158184494, "percentage": 60.54, "elapsed_time": "2:04:57", "remaining_time": "1:21:27", "throughput": 19846.51, "total_tokens": 148805120}
|
|
{"current_steps": 47290, "total_steps": 78105, "loss": 0.1392, "lr": 2.019111358247234e-06, "epoch": 3.0273349977594264, "percentage": 60.55, "elapsed_time": "2:04:58", "remaining_time": "1:21:26", "throughput": 19846.79, "total_tokens": 148820544}
|
|
{"current_steps": 47295, "total_steps": 78105, "loss": 0.1823, "lr": 2.0185631505654262e-06, "epoch": 3.0276550797004034, "percentage": 60.55, "elapsed_time": "2:04:59", "remaining_time": "1:21:25", "throughput": 19847.05, "total_tokens": 148835264}
|
|
{"current_steps": 47300, "total_steps": 78105, "loss": 0.138, "lr": 2.0180149669240675e-06, "epoch": 3.0279751616413804, "percentage": 60.56, "elapsed_time": "2:04:59", "remaining_time": "1:21:24", "throughput": 19847.31, "total_tokens": 148850368}
|
|
{"current_steps": 47305, "total_steps": 78105, "loss": 0.1199, "lr": 2.0174668073505303e-06, "epoch": 3.028295243582357, "percentage": 60.57, "elapsed_time": "2:05:00", "remaining_time": "1:21:23", "throughput": 19847.65, "total_tokens": 148866304}
|
|
{"current_steps": 47310, "total_steps": 78105, "loss": 0.1164, "lr": 2.016918671872187e-06, "epoch": 3.028615325523334, "percentage": 60.57, "elapsed_time": "2:05:01", "remaining_time": "1:21:22", "throughput": 19847.91, "total_tokens": 148881408}
|
|
{"current_steps": 47315, "total_steps": 78105, "loss": 0.1429, "lr": 2.01637056051641e-06, "epoch": 3.028935407464311, "percentage": 60.58, "elapsed_time": "2:05:01", "remaining_time": "1:21:21", "throughput": 19848.17, "total_tokens": 148896192}
|
|
{"current_steps": 47320, "total_steps": 78105, "loss": 0.1199, "lr": 2.015822473310567e-06, "epoch": 3.029255489405288, "percentage": 60.59, "elapsed_time": "2:05:02", "remaining_time": "1:21:20", "throughput": 19848.45, "total_tokens": 148911552}
|
|
{"current_steps": 47325, "total_steps": 78105, "loss": 0.0901, "lr": 2.0152744102820286e-06, "epoch": 3.0295755713462644, "percentage": 60.59, "elapsed_time": "2:05:03", "remaining_time": "1:21:19", "throughput": 19848.77, "total_tokens": 148927040}
|
|
{"current_steps": 47330, "total_steps": 78105, "loss": 0.1823, "lr": 2.0147263714581606e-06, "epoch": 3.0298956532872414, "percentage": 60.6, "elapsed_time": "2:05:03", "remaining_time": "1:21:19", "throughput": 19849.13, "total_tokens": 148943808}
|
|
{"current_steps": 47335, "total_steps": 78105, "loss": 0.2101, "lr": 2.0141783568663304e-06, "epoch": 3.0302157352282184, "percentage": 60.6, "elapsed_time": "2:05:04", "remaining_time": "1:21:18", "throughput": 19849.38, "total_tokens": 148958464}
|
|
{"current_steps": 47340, "total_steps": 78105, "loss": 0.2033, "lr": 2.013630366533902e-06, "epoch": 3.0305358171691954, "percentage": 60.61, "elapsed_time": "2:05:05", "remaining_time": "1:21:17", "throughput": 19849.69, "total_tokens": 148974528}
|
|
{"current_steps": 47345, "total_steps": 78105, "loss": 0.1524, "lr": 2.013082400488241e-06, "epoch": 3.0308558991101724, "percentage": 60.62, "elapsed_time": "2:05:05", "remaining_time": "1:21:16", "throughput": 19849.99, "total_tokens": 148990016}
|
|
{"current_steps": 47350, "total_steps": 78105, "loss": 0.1503, "lr": 2.012534458756707e-06, "epoch": 3.031175981051149, "percentage": 60.62, "elapsed_time": "2:05:06", "remaining_time": "1:21:15", "throughput": 19850.31, "total_tokens": 149006016}
|
|
{"current_steps": 47355, "total_steps": 78105, "loss": 0.1339, "lr": 2.0119865413666638e-06, "epoch": 3.031496062992126, "percentage": 60.63, "elapsed_time": "2:05:07", "remaining_time": "1:21:14", "throughput": 19850.62, "total_tokens": 149021824}
|
|
{"current_steps": 47360, "total_steps": 78105, "loss": 0.1924, "lr": 2.0114386483454708e-06, "epoch": 3.031816144933103, "percentage": 60.64, "elapsed_time": "2:05:07", "remaining_time": "1:21:13", "throughput": 19850.93, "total_tokens": 149037376}
|
|
{"current_steps": 47365, "total_steps": 78105, "loss": 0.1623, "lr": 2.010890779720486e-06, "epoch": 3.03213622687408, "percentage": 60.64, "elapsed_time": "2:05:08", "remaining_time": "1:21:13", "throughput": 19851.28, "total_tokens": 149053888}
|
|
{"current_steps": 47370, "total_steps": 78105, "loss": 0.1361, "lr": 2.0103429355190695e-06, "epoch": 3.0324563088150565, "percentage": 60.65, "elapsed_time": "2:05:09", "remaining_time": "1:21:12", "throughput": 19851.55, "total_tokens": 149069248}
|
|
{"current_steps": 47375, "total_steps": 78105, "loss": 0.1533, "lr": 2.009795115768575e-06, "epoch": 3.0327763907560334, "percentage": 60.66, "elapsed_time": "2:05:09", "remaining_time": "1:21:11", "throughput": 19851.81, "total_tokens": 149084480}
|
|
{"current_steps": 47380, "total_steps": 78105, "loss": 0.2616, "lr": 2.00924732049636e-06, "epoch": 3.0330964726970104, "percentage": 60.66, "elapsed_time": "2:05:10", "remaining_time": "1:21:10", "throughput": 19852.09, "total_tokens": 149099712}
|
|
{"current_steps": 47385, "total_steps": 78105, "loss": 0.1906, "lr": 2.0086995497297764e-06, "epoch": 3.0334165546379874, "percentage": 60.67, "elapsed_time": "2:05:11", "remaining_time": "1:21:09", "throughput": 19852.35, "total_tokens": 149114880}
|
|
{"current_steps": 47390, "total_steps": 78105, "loss": 0.1628, "lr": 2.0081518034961795e-06, "epoch": 3.0337366365789644, "percentage": 60.67, "elapsed_time": "2:05:11", "remaining_time": "1:21:08", "throughput": 19852.68, "total_tokens": 149131072}
|
|
{"current_steps": 47395, "total_steps": 78105, "loss": 0.1103, "lr": 2.0076040818229187e-06, "epoch": 3.034056718519941, "percentage": 60.68, "elapsed_time": "2:05:12", "remaining_time": "1:21:07", "throughput": 19853.0, "total_tokens": 149147200}
|
|
{"current_steps": 47400, "total_steps": 78105, "loss": 0.155, "lr": 2.007056384737346e-06, "epoch": 3.034376800460918, "percentage": 60.69, "elapsed_time": "2:05:13", "remaining_time": "1:21:06", "throughput": 19853.34, "total_tokens": 149162944}
|
|
{"current_steps": 47405, "total_steps": 78105, "loss": 0.1416, "lr": 2.006508712266809e-06, "epoch": 3.034696882401895, "percentage": 60.69, "elapsed_time": "2:05:13", "remaining_time": "1:21:06", "throughput": 19853.63, "total_tokens": 149178432}
|
|
{"current_steps": 47410, "total_steps": 78105, "loss": 0.1509, "lr": 2.005961064438657e-06, "epoch": 3.035016964342872, "percentage": 60.7, "elapsed_time": "2:05:14", "remaining_time": "1:21:05", "throughput": 19853.93, "total_tokens": 149194240}
|
|
{"current_steps": 47415, "total_steps": 78105, "loss": 0.1214, "lr": 2.005413441280235e-06, "epoch": 3.0353370462838485, "percentage": 60.71, "elapsed_time": "2:05:15", "remaining_time": "1:21:04", "throughput": 19854.26, "total_tokens": 149210560}
|
|
{"current_steps": 47420, "total_steps": 78105, "loss": 0.2117, "lr": 2.004865842818891e-06, "epoch": 3.0356571282248255, "percentage": 60.71, "elapsed_time": "2:05:15", "remaining_time": "1:21:03", "throughput": 19854.55, "total_tokens": 149225728}
|
|
{"current_steps": 47425, "total_steps": 78105, "loss": 0.1594, "lr": 2.004318269081967e-06, "epoch": 3.0359772101658025, "percentage": 60.72, "elapsed_time": "2:05:16", "remaining_time": "1:21:02", "throughput": 19854.88, "total_tokens": 149241664}
|
|
{"current_steps": 47430, "total_steps": 78105, "loss": 0.1372, "lr": 2.0037707200968064e-06, "epoch": 3.0362972921067795, "percentage": 60.73, "elapsed_time": "2:05:17", "remaining_time": "1:21:01", "throughput": 19855.2, "total_tokens": 149257536}
|
|
{"current_steps": 47435, "total_steps": 78105, "loss": 0.1418, "lr": 2.0032231958907527e-06, "epoch": 3.036617374047756, "percentage": 60.73, "elapsed_time": "2:05:17", "remaining_time": "1:21:00", "throughput": 19855.46, "total_tokens": 149272704}
|
|
{"current_steps": 47440, "total_steps": 78105, "loss": 0.1928, "lr": 2.0026756964911434e-06, "epoch": 3.036937455988733, "percentage": 60.74, "elapsed_time": "2:05:18", "remaining_time": "1:21:00", "throughput": 19855.75, "total_tokens": 149288384}
|
|
{"current_steps": 47445, "total_steps": 78105, "loss": 0.1998, "lr": 2.00212822192532e-06, "epoch": 3.03725753792971, "percentage": 60.75, "elapsed_time": "2:05:19", "remaining_time": "1:20:59", "throughput": 19855.99, "total_tokens": 149303168}
|
|
{"current_steps": 47450, "total_steps": 78105, "loss": 0.1389, "lr": 2.0015807722206196e-06, "epoch": 3.037577619870687, "percentage": 60.75, "elapsed_time": "2:05:19", "remaining_time": "1:20:58", "throughput": 19856.25, "total_tokens": 149318336}
|
|
{"current_steps": 47455, "total_steps": 78105, "loss": 0.0876, "lr": 2.00103334740438e-06, "epoch": 3.037897701811664, "percentage": 60.76, "elapsed_time": "2:05:20", "remaining_time": "1:20:57", "throughput": 19856.54, "total_tokens": 149333504}
|
|
{"current_steps": 47460, "total_steps": 78105, "loss": 0.163, "lr": 2.000485947503935e-06, "epoch": 3.0382177837526405, "percentage": 60.76, "elapsed_time": "2:05:21", "remaining_time": "1:20:56", "throughput": 19856.92, "total_tokens": 149350656}
|
|
{"current_steps": 47465, "total_steps": 78105, "loss": 0.1282, "lr": 1.9999385725466207e-06, "epoch": 3.0385378656936175, "percentage": 60.77, "elapsed_time": "2:05:22", "remaining_time": "1:20:55", "throughput": 19857.18, "total_tokens": 149365760}
|
|
{"current_steps": 47470, "total_steps": 78105, "loss": 0.1662, "lr": 1.9993912225597685e-06, "epoch": 3.0388579476345945, "percentage": 60.78, "elapsed_time": "2:05:22", "remaining_time": "1:20:54", "throughput": 19857.45, "total_tokens": 149380736}
|
|
{"current_steps": 47475, "total_steps": 78105, "loss": 0.1963, "lr": 1.9988438975707127e-06, "epoch": 3.0391780295755715, "percentage": 60.78, "elapsed_time": "2:05:23", "remaining_time": "1:20:53", "throughput": 19857.76, "total_tokens": 149396608}
|
|
{"current_steps": 47480, "total_steps": 78105, "loss": 0.2719, "lr": 1.9982965976067808e-06, "epoch": 3.039498111516548, "percentage": 60.79, "elapsed_time": "2:05:23", "remaining_time": "1:20:53", "throughput": 19858.02, "total_tokens": 149411712}
|
|
{"current_steps": 47485, "total_steps": 78105, "loss": 0.1177, "lr": 1.997749322695303e-06, "epoch": 3.039818193457525, "percentage": 60.8, "elapsed_time": "2:05:24", "remaining_time": "1:20:52", "throughput": 19858.42, "total_tokens": 149428992}
|
|
{"current_steps": 47490, "total_steps": 78105, "loss": 0.1604, "lr": 1.99720207286361e-06, "epoch": 3.040138275398502, "percentage": 60.8, "elapsed_time": "2:05:25", "remaining_time": "1:20:51", "throughput": 19858.76, "total_tokens": 149445568}
|
|
{"current_steps": 47495, "total_steps": 78105, "loss": 0.1245, "lr": 1.9966548481390248e-06, "epoch": 3.040458357339479, "percentage": 60.81, "elapsed_time": "2:05:26", "remaining_time": "1:20:50", "throughput": 19859.09, "total_tokens": 149461568}
|
|
{"current_steps": 47500, "total_steps": 78105, "loss": 0.1583, "lr": 1.9961076485488753e-06, "epoch": 3.040778439280456, "percentage": 60.82, "elapsed_time": "2:05:26", "remaining_time": "1:20:49", "throughput": 19859.4, "total_tokens": 149477440}
|
|
{"current_steps": 47505, "total_steps": 78105, "loss": 0.1525, "lr": 1.995560474120484e-06, "epoch": 3.0410985212214325, "percentage": 60.82, "elapsed_time": "2:05:27", "remaining_time": "1:20:48", "throughput": 19859.77, "total_tokens": 149494464}
|
|
{"current_steps": 47510, "total_steps": 78105, "loss": 0.2602, "lr": 1.995013324881177e-06, "epoch": 3.0414186031624095, "percentage": 60.83, "elapsed_time": "2:05:28", "remaining_time": "1:20:47", "throughput": 19860.15, "total_tokens": 149511616}
|
|
{"current_steps": 47515, "total_steps": 78105, "loss": 0.1628, "lr": 1.9944662008582734e-06, "epoch": 3.0417386851033865, "percentage": 60.83, "elapsed_time": "2:05:28", "remaining_time": "1:20:47", "throughput": 19860.51, "total_tokens": 149528128}
|
|
{"current_steps": 47520, "total_steps": 78105, "loss": 0.1647, "lr": 1.9939191020790944e-06, "epoch": 3.0420587670443635, "percentage": 60.84, "elapsed_time": "2:05:29", "remaining_time": "1:20:46", "throughput": 19860.78, "total_tokens": 149543232}
|
|
{"current_steps": 47525, "total_steps": 78105, "loss": 0.0676, "lr": 1.9933720285709587e-06, "epoch": 3.04237884898534, "percentage": 60.85, "elapsed_time": "2:05:30", "remaining_time": "1:20:45", "throughput": 19861.07, "total_tokens": 149559040}
|
|
{"current_steps": 47530, "total_steps": 78105, "loss": 0.2869, "lr": 1.992824980361186e-06, "epoch": 3.042698930926317, "percentage": 60.85, "elapsed_time": "2:05:30", "remaining_time": "1:20:44", "throughput": 19861.42, "total_tokens": 149575360}
|
|
{"current_steps": 47535, "total_steps": 78105, "loss": 0.1776, "lr": 1.9922779574770913e-06, "epoch": 3.043019012867294, "percentage": 60.86, "elapsed_time": "2:05:31", "remaining_time": "1:20:43", "throughput": 19861.71, "total_tokens": 149591104}
|
|
{"current_steps": 47540, "total_steps": 78105, "loss": 0.1012, "lr": 1.9917309599459906e-06, "epoch": 3.043339094808271, "percentage": 60.87, "elapsed_time": "2:05:32", "remaining_time": "1:20:43", "throughput": 19862.46, "total_tokens": 149619904}
|
|
{"current_steps": 47545, "total_steps": 78105, "loss": 0.2155, "lr": 1.9911839877951997e-06, "epoch": 3.043659176749248, "percentage": 60.87, "elapsed_time": "2:05:33", "remaining_time": "1:20:42", "throughput": 19862.74, "total_tokens": 149635328}
|
|
{"current_steps": 47550, "total_steps": 78105, "loss": 0.1333, "lr": 1.9906370410520286e-06, "epoch": 3.0439792586902246, "percentage": 60.88, "elapsed_time": "2:05:34", "remaining_time": "1:20:41", "throughput": 19863.06, "total_tokens": 149651328}
|
|
{"current_steps": 47555, "total_steps": 78105, "loss": 0.1287, "lr": 1.9900901197437916e-06, "epoch": 3.0442993406312016, "percentage": 60.89, "elapsed_time": "2:05:34", "remaining_time": "1:20:40", "throughput": 19863.33, "total_tokens": 149666560}
|
|
{"current_steps": 47560, "total_steps": 78105, "loss": 0.1752, "lr": 1.989543223897797e-06, "epoch": 3.0446194225721785, "percentage": 60.89, "elapsed_time": "2:05:35", "remaining_time": "1:20:39", "throughput": 19863.61, "total_tokens": 149681792}
|
|
{"current_steps": 47565, "total_steps": 78105, "loss": 0.1434, "lr": 1.988996353541356e-06, "epoch": 3.0449395045131555, "percentage": 60.9, "elapsed_time": "2:05:36", "remaining_time": "1:20:38", "throughput": 19863.96, "total_tokens": 149698432}
|
|
{"current_steps": 47570, "total_steps": 78105, "loss": 0.1517, "lr": 1.9884495087017743e-06, "epoch": 3.045259586454132, "percentage": 60.91, "elapsed_time": "2:05:36", "remaining_time": "1:20:37", "throughput": 19864.33, "total_tokens": 149715264}
|
|
{"current_steps": 47575, "total_steps": 78105, "loss": 0.1653, "lr": 1.9879026894063606e-06, "epoch": 3.045579668395109, "percentage": 60.91, "elapsed_time": "2:05:37", "remaining_time": "1:20:37", "throughput": 19864.64, "total_tokens": 149731264}
|
|
{"current_steps": 47580, "total_steps": 78105, "loss": 0.1442, "lr": 1.9873558956824185e-06, "epoch": 3.045899750336086, "percentage": 60.92, "elapsed_time": "2:05:38", "remaining_time": "1:20:36", "throughput": 19864.98, "total_tokens": 149747584}
|
|
{"current_steps": 47585, "total_steps": 78105, "loss": 0.1226, "lr": 1.9868091275572528e-06, "epoch": 3.046219832277063, "percentage": 60.92, "elapsed_time": "2:05:38", "remaining_time": "1:20:35", "throughput": 19865.3, "total_tokens": 149763712}
|
|
{"current_steps": 47590, "total_steps": 78105, "loss": 0.1999, "lr": 1.986262385058166e-06, "epoch": 3.0465399142180396, "percentage": 60.93, "elapsed_time": "2:05:39", "remaining_time": "1:20:34", "throughput": 19865.61, "total_tokens": 149779712}
|
|
{"current_steps": 47595, "total_steps": 78105, "loss": 0.1887, "lr": 1.985715668212461e-06, "epoch": 3.0468599961590166, "percentage": 60.94, "elapsed_time": "2:05:40", "remaining_time": "1:20:33", "throughput": 19865.94, "total_tokens": 149796160}
|
|
{"current_steps": 47600, "total_steps": 78105, "loss": 0.1351, "lr": 1.9851689770474348e-06, "epoch": 3.0471800780999936, "percentage": 60.94, "elapsed_time": "2:05:41", "remaining_time": "1:20:32", "throughput": 19866.2, "total_tokens": 149811136}
|
|
{"current_steps": 47605, "total_steps": 78105, "loss": 0.1378, "lr": 1.984622311590389e-06, "epoch": 3.0475001600409706, "percentage": 60.95, "elapsed_time": "2:05:41", "remaining_time": "1:20:31", "throughput": 19866.52, "total_tokens": 149827072}
|
|
{"current_steps": 47610, "total_steps": 78105, "loss": 0.1605, "lr": 1.98407567186862e-06, "epoch": 3.0478202419819476, "percentage": 60.96, "elapsed_time": "2:05:42", "remaining_time": "1:20:31", "throughput": 19866.91, "total_tokens": 149844288}
|
|
{"current_steps": 47615, "total_steps": 78105, "loss": 0.2217, "lr": 1.983529057909425e-06, "epoch": 3.048140323922924, "percentage": 60.96, "elapsed_time": "2:05:43", "remaining_time": "1:20:30", "throughput": 19867.16, "total_tokens": 149859008}
|
|
{"current_steps": 47620, "total_steps": 78105, "loss": 0.1383, "lr": 1.9829824697400994e-06, "epoch": 3.048460405863901, "percentage": 60.97, "elapsed_time": "2:05:43", "remaining_time": "1:20:29", "throughput": 19867.48, "total_tokens": 149874880}
|
|
{"current_steps": 47625, "total_steps": 78105, "loss": 0.2013, "lr": 1.982435907387935e-06, "epoch": 3.048780487804878, "percentage": 60.98, "elapsed_time": "2:05:44", "remaining_time": "1:20:28", "throughput": 19867.77, "total_tokens": 149890496}
|
|
{"current_steps": 47630, "total_steps": 78105, "loss": 0.2065, "lr": 1.981889370880227e-06, "epoch": 3.049100569745855, "percentage": 60.98, "elapsed_time": "2:05:45", "remaining_time": "1:20:27", "throughput": 19868.03, "total_tokens": 149905344}
|
|
{"current_steps": 47635, "total_steps": 78105, "loss": 0.1493, "lr": 1.9813428602442636e-06, "epoch": 3.0494206516868316, "percentage": 60.99, "elapsed_time": "2:05:45", "remaining_time": "1:20:26", "throughput": 19868.3, "total_tokens": 149920768}
|
|
{"current_steps": 47640, "total_steps": 78105, "loss": 0.1375, "lr": 1.980796375507337e-06, "epoch": 3.0497407336278086, "percentage": 60.99, "elapsed_time": "2:05:46", "remaining_time": "1:20:25", "throughput": 19868.59, "total_tokens": 149936064}
|
|
{"current_steps": 47645, "total_steps": 78105, "loss": 0.1064, "lr": 1.980249916696734e-06, "epoch": 3.0500608155687856, "percentage": 61.0, "elapsed_time": "2:05:47", "remaining_time": "1:20:24", "throughput": 19868.82, "total_tokens": 149950592}
|
|
{"current_steps": 47650, "total_steps": 78105, "loss": 0.1358, "lr": 1.9797034838397446e-06, "epoch": 3.0503808975097626, "percentage": 61.01, "elapsed_time": "2:05:47", "remaining_time": "1:20:24", "throughput": 19869.06, "total_tokens": 149965056}
|
|
{"current_steps": 47655, "total_steps": 78105, "loss": 0.1829, "lr": 1.9791570769636514e-06, "epoch": 3.0507009794507396, "percentage": 61.01, "elapsed_time": "2:05:48", "remaining_time": "1:20:23", "throughput": 19869.38, "total_tokens": 149981184}
|
|
{"current_steps": 47660, "total_steps": 78105, "loss": 0.1375, "lr": 1.9786106960957412e-06, "epoch": 3.051021061391716, "percentage": 61.02, "elapsed_time": "2:05:49", "remaining_time": "1:20:22", "throughput": 19869.64, "total_tokens": 149996544}
|
|
{"current_steps": 47665, "total_steps": 78105, "loss": 0.1414, "lr": 1.978064341263298e-06, "epoch": 3.051341143332693, "percentage": 61.03, "elapsed_time": "2:05:49", "remaining_time": "1:20:21", "throughput": 19870.0, "total_tokens": 150013312}
|
|
{"current_steps": 47670, "total_steps": 78105, "loss": 0.1625, "lr": 1.977518012493602e-06, "epoch": 3.05166122527367, "percentage": 61.03, "elapsed_time": "2:05:50", "remaining_time": "1:20:20", "throughput": 19870.37, "total_tokens": 150030016}
|
|
{"current_steps": 47675, "total_steps": 78105, "loss": 0.1525, "lr": 1.976971709813935e-06, "epoch": 3.051981307214647, "percentage": 61.04, "elapsed_time": "2:05:51", "remaining_time": "1:20:19", "throughput": 19870.62, "total_tokens": 150045056}
|
|
{"current_steps": 47680, "total_steps": 78105, "loss": 0.114, "lr": 1.9764254332515765e-06, "epoch": 3.0523013891556237, "percentage": 61.05, "elapsed_time": "2:05:51", "remaining_time": "1:20:18", "throughput": 19870.91, "total_tokens": 150060480}
|
|
{"current_steps": 47685, "total_steps": 78105, "loss": 0.2572, "lr": 1.9758791828338053e-06, "epoch": 3.0526214710966006, "percentage": 61.05, "elapsed_time": "2:05:52", "remaining_time": "1:20:18", "throughput": 19871.25, "total_tokens": 150077120}
|
|
{"current_steps": 47690, "total_steps": 78105, "loss": 0.1731, "lr": 1.975332958587897e-06, "epoch": 3.0529415530375776, "percentage": 61.06, "elapsed_time": "2:05:53", "remaining_time": "1:20:17", "throughput": 19871.62, "total_tokens": 150093952}
|
|
{"current_steps": 47695, "total_steps": 78105, "loss": 0.1819, "lr": 1.9747867605411282e-06, "epoch": 3.0532616349785546, "percentage": 61.07, "elapsed_time": "2:05:53", "remaining_time": "1:20:16", "throughput": 19871.89, "total_tokens": 150109504}
|
|
{"current_steps": 47700, "total_steps": 78105, "loss": 0.1143, "lr": 1.974240588720772e-06, "epoch": 3.053581716919531, "percentage": 61.07, "elapsed_time": "2:05:54", "remaining_time": "1:20:15", "throughput": 19872.31, "total_tokens": 150127552}
|
|
{"current_steps": 47705, "total_steps": 78105, "loss": 0.1824, "lr": 1.9736944431541038e-06, "epoch": 3.053901798860508, "percentage": 61.08, "elapsed_time": "2:05:55", "remaining_time": "1:20:14", "throughput": 19872.58, "total_tokens": 150142720}
|
|
{"current_steps": 47710, "total_steps": 78105, "loss": 0.1491, "lr": 1.973148323868392e-06, "epoch": 3.054221880801485, "percentage": 61.08, "elapsed_time": "2:05:55", "remaining_time": "1:20:13", "throughput": 19872.82, "total_tokens": 150157504}
|
|
{"current_steps": 47715, "total_steps": 78105, "loss": 0.1599, "lr": 1.9726022308909095e-06, "epoch": 3.054541962742462, "percentage": 61.09, "elapsed_time": "2:05:56", "remaining_time": "1:20:12", "throughput": 19873.09, "total_tokens": 150172992}
|
|
{"current_steps": 47720, "total_steps": 78105, "loss": 0.1498, "lr": 1.9720561642489247e-06, "epoch": 3.054862044683439, "percentage": 61.1, "elapsed_time": "2:05:57", "remaining_time": "1:20:11", "throughput": 19873.4, "total_tokens": 150188992}
|
|
{"current_steps": 47725, "total_steps": 78105, "loss": 0.1453, "lr": 1.971510123969704e-06, "epoch": 3.0551821266244157, "percentage": 61.1, "elapsed_time": "2:05:58", "remaining_time": "1:20:11", "throughput": 19873.77, "total_tokens": 150205952}
|
|
{"current_steps": 47730, "total_steps": 78105, "loss": 0.1228, "lr": 1.9709641100805163e-06, "epoch": 3.0555022085653927, "percentage": 61.11, "elapsed_time": "2:05:58", "remaining_time": "1:20:10", "throughput": 19874.04, "total_tokens": 150221248}
|
|
{"current_steps": 47735, "total_steps": 78105, "loss": 0.1354, "lr": 1.970418122608624e-06, "epoch": 3.0558222905063697, "percentage": 61.12, "elapsed_time": "2:05:59", "remaining_time": "1:20:09", "throughput": 19874.32, "total_tokens": 150236672}
|
|
{"current_steps": 47740, "total_steps": 78105, "loss": 0.155, "lr": 1.969872161581294e-06, "epoch": 3.0561423724473467, "percentage": 61.12, "elapsed_time": "2:06:00", "remaining_time": "1:20:08", "throughput": 19874.65, "total_tokens": 150252672}
|
|
{"current_steps": 47745, "total_steps": 78105, "loss": 0.1819, "lr": 1.9693262270257853e-06, "epoch": 3.056462454388323, "percentage": 61.13, "elapsed_time": "2:06:00", "remaining_time": "1:20:07", "throughput": 19875.0, "total_tokens": 150269056}
|
|
{"current_steps": 47750, "total_steps": 78105, "loss": 0.2936, "lr": 1.968780318969361e-06, "epoch": 3.0567825363293, "percentage": 61.14, "elapsed_time": "2:06:01", "remaining_time": "1:20:06", "throughput": 19875.28, "total_tokens": 150284416}
|
|
{"current_steps": 47755, "total_steps": 78105, "loss": 0.1472, "lr": 1.9682344374392805e-06, "epoch": 3.057102618270277, "percentage": 61.14, "elapsed_time": "2:06:02", "remaining_time": "1:20:05", "throughput": 19875.56, "total_tokens": 150299776}
|
|
{"current_steps": 47760, "total_steps": 78105, "loss": 0.0795, "lr": 1.967688582462803e-06, "epoch": 3.057422700211254, "percentage": 61.15, "elapsed_time": "2:06:02", "remaining_time": "1:20:05", "throughput": 19875.84, "total_tokens": 150315264}
|
|
{"current_steps": 47765, "total_steps": 78105, "loss": 0.1907, "lr": 1.9671427540671838e-06, "epoch": 3.057742782152231, "percentage": 61.15, "elapsed_time": "2:06:03", "remaining_time": "1:20:04", "throughput": 19876.17, "total_tokens": 150331200}
|
|
{"current_steps": 47770, "total_steps": 78105, "loss": 0.1474, "lr": 1.9665969522796813e-06, "epoch": 3.0580628640932077, "percentage": 61.16, "elapsed_time": "2:06:04", "remaining_time": "1:20:03", "throughput": 19876.49, "total_tokens": 150347072}
|
|
{"current_steps": 47775, "total_steps": 78105, "loss": 0.1164, "lr": 1.9660511771275474e-06, "epoch": 3.0583829460341847, "percentage": 61.17, "elapsed_time": "2:06:04", "remaining_time": "1:20:02", "throughput": 19876.82, "total_tokens": 150363392}
|
|
{"current_steps": 47780, "total_steps": 78105, "loss": 0.2084, "lr": 1.9655054286380367e-06, "epoch": 3.0587030279751617, "percentage": 61.17, "elapsed_time": "2:06:05", "remaining_time": "1:20:01", "throughput": 19877.1, "total_tokens": 150379008}
|
|
{"current_steps": 47785, "total_steps": 78105, "loss": 0.2199, "lr": 1.9649597068384012e-06, "epoch": 3.0590231099161387, "percentage": 61.18, "elapsed_time": "2:06:06", "remaining_time": "1:20:00", "throughput": 19877.38, "total_tokens": 150394624}
|
|
{"current_steps": 47790, "total_steps": 78105, "loss": 0.1773, "lr": 1.9644140117558904e-06, "epoch": 3.0593431918571152, "percentage": 61.19, "elapsed_time": "2:06:06", "remaining_time": "1:19:59", "throughput": 19877.68, "total_tokens": 150410432}
|
|
{"current_steps": 47795, "total_steps": 78105, "loss": 0.1238, "lr": 1.9638683434177554e-06, "epoch": 3.059663273798092, "percentage": 61.19, "elapsed_time": "2:06:07", "remaining_time": "1:19:59", "throughput": 19877.92, "total_tokens": 150425216}
|
|
{"current_steps": 47800, "total_steps": 78105, "loss": 0.1551, "lr": 1.9633227018512414e-06, "epoch": 3.059983355739069, "percentage": 61.2, "elapsed_time": "2:06:08", "remaining_time": "1:19:58", "throughput": 19878.18, "total_tokens": 150440192}
|
|
{"current_steps": 47805, "total_steps": 78105, "loss": 0.1245, "lr": 1.9627770870835976e-06, "epoch": 3.060303437680046, "percentage": 61.21, "elapsed_time": "2:06:08", "remaining_time": "1:19:57", "throughput": 19878.47, "total_tokens": 150456128}
|
|
{"current_steps": 47810, "total_steps": 78105, "loss": 0.0953, "lr": 1.962231499142066e-06, "epoch": 3.060623519621023, "percentage": 61.21, "elapsed_time": "2:06:09", "remaining_time": "1:19:56", "throughput": 19878.85, "total_tokens": 150473088}
|
|
{"current_steps": 47815, "total_steps": 78105, "loss": 0.1509, "lr": 1.9616859380538936e-06, "epoch": 3.0609436015619997, "percentage": 61.22, "elapsed_time": "2:06:10", "remaining_time": "1:19:55", "throughput": 19879.11, "total_tokens": 150487744}
|
|
{"current_steps": 47820, "total_steps": 78105, "loss": 0.1472, "lr": 1.9611404038463205e-06, "epoch": 3.0612636835029767, "percentage": 61.23, "elapsed_time": "2:06:10", "remaining_time": "1:19:54", "throughput": 19879.44, "total_tokens": 150504000}
|
|
{"current_steps": 47825, "total_steps": 78105, "loss": 0.118, "lr": 1.96059489654659e-06, "epoch": 3.0615837654439537, "percentage": 61.23, "elapsed_time": "2:06:11", "remaining_time": "1:19:53", "throughput": 19879.71, "total_tokens": 150518912}
|
|
{"current_steps": 47830, "total_steps": 78105, "loss": 0.1458, "lr": 1.9600494161819393e-06, "epoch": 3.0619038473849307, "percentage": 61.24, "elapsed_time": "2:06:12", "remaining_time": "1:19:52", "throughput": 19880.03, "total_tokens": 150535232}
|
|
{"current_steps": 47835, "total_steps": 78105, "loss": 0.1911, "lr": 1.9595039627796085e-06, "epoch": 3.0622239293259073, "percentage": 61.24, "elapsed_time": "2:06:12", "remaining_time": "1:19:52", "throughput": 19880.32, "total_tokens": 150550912}
|
|
{"current_steps": 47840, "total_steps": 78105, "loss": 0.1329, "lr": 1.9589585363668353e-06, "epoch": 3.0625440112668842, "percentage": 61.25, "elapsed_time": "2:06:13", "remaining_time": "1:19:51", "throughput": 19880.61, "total_tokens": 150566528}
|
|
{"current_steps": 47845, "total_steps": 78105, "loss": 0.2175, "lr": 1.9584131369708535e-06, "epoch": 3.0628640932078612, "percentage": 61.26, "elapsed_time": "2:06:14", "remaining_time": "1:19:50", "throughput": 19880.84, "total_tokens": 150581056}
|
|
{"current_steps": 47850, "total_steps": 78105, "loss": 0.1842, "lr": 1.9578677646189e-06, "epoch": 3.0631841751488382, "percentage": 61.26, "elapsed_time": "2:06:14", "remaining_time": "1:19:49", "throughput": 19881.1, "total_tokens": 150596416}
|
|
{"current_steps": 47855, "total_steps": 78105, "loss": 0.1962, "lr": 1.957322419338206e-06, "epoch": 3.063504257089815, "percentage": 61.27, "elapsed_time": "2:06:15", "remaining_time": "1:19:48", "throughput": 19881.39, "total_tokens": 150612096}
|
|
{"current_steps": 47860, "total_steps": 78105, "loss": 0.138, "lr": 1.956777101156004e-06, "epoch": 3.0638243390307918, "percentage": 61.28, "elapsed_time": "2:06:16", "remaining_time": "1:19:47", "throughput": 19881.72, "total_tokens": 150628224}
|
|
{"current_steps": 47865, "total_steps": 78105, "loss": 0.1429, "lr": 1.956231810099523e-06, "epoch": 3.0641444209717688, "percentage": 61.28, "elapsed_time": "2:06:16", "remaining_time": "1:19:46", "throughput": 19882.03, "total_tokens": 150644160}
|
|
{"current_steps": 47870, "total_steps": 78105, "loss": 0.1739, "lr": 1.9556865461959936e-06, "epoch": 3.0644645029127457, "percentage": 61.29, "elapsed_time": "2:06:17", "remaining_time": "1:19:46", "throughput": 19882.37, "total_tokens": 150660736}
|
|
{"current_steps": 47875, "total_steps": 78105, "loss": 0.1547, "lr": 1.9551413094726422e-06, "epoch": 3.0647845848537227, "percentage": 61.3, "elapsed_time": "2:06:18", "remaining_time": "1:19:45", "throughput": 19882.68, "total_tokens": 150676800}
|
|
{"current_steps": 47880, "total_steps": 78105, "loss": 0.2209, "lr": 1.9545960999566973e-06, "epoch": 3.0651046667946993, "percentage": 61.3, "elapsed_time": "2:06:18", "remaining_time": "1:19:44", "throughput": 19882.99, "total_tokens": 150692800}
|
|
{"current_steps": 47885, "total_steps": 78105, "loss": 0.131, "lr": 1.9540509176753807e-06, "epoch": 3.0654247487356763, "percentage": 61.31, "elapsed_time": "2:06:19", "remaining_time": "1:19:43", "throughput": 19883.26, "total_tokens": 150707776}
|
|
{"current_steps": 47890, "total_steps": 78105, "loss": 0.1623, "lr": 1.9535057626559177e-06, "epoch": 3.0657448306766533, "percentage": 61.31, "elapsed_time": "2:06:20", "remaining_time": "1:19:42", "throughput": 19883.55, "total_tokens": 150723008}
|
|
{"current_steps": 47895, "total_steps": 78105, "loss": 0.1904, "lr": 1.9529606349255316e-06, "epoch": 3.0660649126176303, "percentage": 61.32, "elapsed_time": "2:06:20", "remaining_time": "1:19:41", "throughput": 19883.87, "total_tokens": 150738944}
|
|
{"current_steps": 47900, "total_steps": 78105, "loss": 0.1105, "lr": 1.9524155345114406e-06, "epoch": 3.066384994558607, "percentage": 61.33, "elapsed_time": "2:06:21", "remaining_time": "1:19:40", "throughput": 19884.21, "total_tokens": 150755200}
|
|
{"current_steps": 47905, "total_steps": 78105, "loss": 0.2196, "lr": 1.951870461440866e-06, "epoch": 3.066705076499584, "percentage": 61.33, "elapsed_time": "2:06:22", "remaining_time": "1:19:40", "throughput": 19884.51, "total_tokens": 150771136}
|
|
{"current_steps": 47910, "total_steps": 78105, "loss": 0.1726, "lr": 1.9513254157410253e-06, "epoch": 3.067025158440561, "percentage": 61.34, "elapsed_time": "2:06:22", "remaining_time": "1:19:39", "throughput": 19884.76, "total_tokens": 150785984}
|
|
{"current_steps": 47915, "total_steps": 78105, "loss": 0.1345, "lr": 1.950780397439136e-06, "epoch": 3.0673452403815378, "percentage": 61.35, "elapsed_time": "2:06:23", "remaining_time": "1:19:38", "throughput": 19885.17, "total_tokens": 150803712}
|
|
{"current_steps": 47920, "total_steps": 78105, "loss": 0.1882, "lr": 1.9502354065624124e-06, "epoch": 3.0676653223225148, "percentage": 61.35, "elapsed_time": "2:06:24", "remaining_time": "1:19:37", "throughput": 19885.49, "total_tokens": 150819712}
|
|
{"current_steps": 47925, "total_steps": 78105, "loss": 0.1113, "lr": 1.949690443138069e-06, "epoch": 3.0679854042634913, "percentage": 61.36, "elapsed_time": "2:06:25", "remaining_time": "1:19:36", "throughput": 19885.8, "total_tokens": 150835776}
|
|
{"current_steps": 47930, "total_steps": 78105, "loss": 0.1513, "lr": 1.949145507193318e-06, "epoch": 3.0683054862044683, "percentage": 61.37, "elapsed_time": "2:06:25", "remaining_time": "1:19:35", "throughput": 19886.06, "total_tokens": 150850816}
|
|
{"current_steps": 47935, "total_steps": 78105, "loss": 0.0757, "lr": 1.9486005987553722e-06, "epoch": 3.0686255681454453, "percentage": 61.37, "elapsed_time": "2:06:26", "remaining_time": "1:19:34", "throughput": 19886.39, "total_tokens": 150867264}
|
|
{"current_steps": 47940, "total_steps": 78105, "loss": 0.103, "lr": 1.948055717851439e-06, "epoch": 3.0689456500864223, "percentage": 61.38, "elapsed_time": "2:06:27", "remaining_time": "1:19:33", "throughput": 19886.64, "total_tokens": 150881728}
|
|
{"current_steps": 47945, "total_steps": 78105, "loss": 0.1954, "lr": 1.9475108645087297e-06, "epoch": 3.069265732027399, "percentage": 61.39, "elapsed_time": "2:06:27", "remaining_time": "1:19:33", "throughput": 19887.02, "total_tokens": 150898880}
|
|
{"current_steps": 47950, "total_steps": 78105, "loss": 0.1764, "lr": 1.9469660387544493e-06, "epoch": 3.069585813968376, "percentage": 61.39, "elapsed_time": "2:06:28", "remaining_time": "1:19:32", "throughput": 19887.31, "total_tokens": 150914624}
|
|
{"current_steps": 47955, "total_steps": 78105, "loss": 0.2132, "lr": 1.9464212406158036e-06, "epoch": 3.069905895909353, "percentage": 61.4, "elapsed_time": "2:06:29", "remaining_time": "1:19:31", "throughput": 19887.62, "total_tokens": 150930432}
|
|
{"current_steps": 47960, "total_steps": 78105, "loss": 0.2277, "lr": 1.9458764701199986e-06, "epoch": 3.07022597785033, "percentage": 61.4, "elapsed_time": "2:06:29", "remaining_time": "1:19:30", "throughput": 19887.89, "total_tokens": 150945664}
|
|
{"current_steps": 47965, "total_steps": 78105, "loss": 0.1633, "lr": 1.9453317272942358e-06, "epoch": 3.0705460597913063, "percentage": 61.41, "elapsed_time": "2:06:30", "remaining_time": "1:19:29", "throughput": 19888.19, "total_tokens": 150961344}
|
|
{"current_steps": 47970, "total_steps": 78105, "loss": 0.2098, "lr": 1.944787012165718e-06, "epoch": 3.0708661417322833, "percentage": 61.42, "elapsed_time": "2:06:31", "remaining_time": "1:19:28", "throughput": 19888.47, "total_tokens": 150976512}
|
|
{"current_steps": 47975, "total_steps": 78105, "loss": 0.1358, "lr": 1.944242324761644e-06, "epoch": 3.0711862236732603, "percentage": 61.42, "elapsed_time": "2:06:31", "remaining_time": "1:19:27", "throughput": 19888.92, "total_tokens": 150995136}
|
|
{"current_steps": 47980, "total_steps": 78105, "loss": 0.0953, "lr": 1.9436976651092143e-06, "epoch": 3.0715063056142373, "percentage": 61.43, "elapsed_time": "2:06:32", "remaining_time": "1:19:27", "throughput": 19889.3, "total_tokens": 151012416}
|
|
{"current_steps": 47985, "total_steps": 78105, "loss": 0.158, "lr": 1.9431530332356247e-06, "epoch": 3.0718263875552143, "percentage": 61.44, "elapsed_time": "2:06:33", "remaining_time": "1:19:26", "throughput": 19889.59, "total_tokens": 151028032}
|
|
{"current_steps": 47990, "total_steps": 78105, "loss": 0.1351, "lr": 1.942608429168073e-06, "epoch": 3.072146469496191, "percentage": 61.44, "elapsed_time": "2:06:33", "remaining_time": "1:19:25", "throughput": 19889.86, "total_tokens": 151043200}
|
|
{"current_steps": 47995, "total_steps": 78105, "loss": 0.1743, "lr": 1.9420638529337528e-06, "epoch": 3.072466551437168, "percentage": 61.45, "elapsed_time": "2:06:34", "remaining_time": "1:19:24", "throughput": 19890.2, "total_tokens": 151059712}
|
|
{"current_steps": 48000, "total_steps": 78105, "loss": 0.2285, "lr": 1.9415193045598575e-06, "epoch": 3.072786633378145, "percentage": 61.46, "elapsed_time": "2:06:35", "remaining_time": "1:19:23", "throughput": 19890.55, "total_tokens": 151076608}
|
|
{"current_steps": 48005, "total_steps": 78105, "loss": 0.1377, "lr": 1.940974784073579e-06, "epoch": 3.073106715319122, "percentage": 61.46, "elapsed_time": "2:06:36", "remaining_time": "1:19:22", "throughput": 19890.84, "total_tokens": 151092288}
|
|
{"current_steps": 48010, "total_steps": 78105, "loss": 0.1857, "lr": 1.940430291502107e-06, "epoch": 3.0734267972600984, "percentage": 61.47, "elapsed_time": "2:06:36", "remaining_time": "1:19:22", "throughput": 19891.19, "total_tokens": 151108992}
|
|
{"current_steps": 48015, "total_steps": 78105, "loss": 0.1747, "lr": 1.9398858268726327e-06, "epoch": 3.0737468792010754, "percentage": 61.47, "elapsed_time": "2:06:37", "remaining_time": "1:19:21", "throughput": 19891.45, "total_tokens": 151124224}
|
|
{"current_steps": 48020, "total_steps": 78105, "loss": 0.1461, "lr": 1.9393413902123425e-06, "epoch": 3.0740669611420524, "percentage": 61.48, "elapsed_time": "2:06:38", "remaining_time": "1:19:20", "throughput": 19891.8, "total_tokens": 151140928}
|
|
{"current_steps": 48025, "total_steps": 78105, "loss": 0.1835, "lr": 1.9387969815484233e-06, "epoch": 3.0743870430830293, "percentage": 61.49, "elapsed_time": "2:06:38", "remaining_time": "1:19:19", "throughput": 19892.08, "total_tokens": 151156416}
|
|
{"current_steps": 48030, "total_steps": 78105, "loss": 0.1423, "lr": 1.938252600908059e-06, "epoch": 3.0747071250240063, "percentage": 61.49, "elapsed_time": "2:06:39", "remaining_time": "1:19:18", "throughput": 19892.43, "total_tokens": 151172800}
|
|
{"current_steps": 48035, "total_steps": 78105, "loss": 0.1535, "lr": 1.9377082483184344e-06, "epoch": 3.075027206964983, "percentage": 61.5, "elapsed_time": "2:06:40", "remaining_time": "1:19:17", "throughput": 19892.73, "total_tokens": 151188352}
|
|
{"current_steps": 48040, "total_steps": 78105, "loss": 0.2626, "lr": 1.9371639238067302e-06, "epoch": 3.07534728890596, "percentage": 61.51, "elapsed_time": "2:06:40", "remaining_time": "1:19:16", "throughput": 19893.02, "total_tokens": 151204288}
|
|
{"current_steps": 48045, "total_steps": 78105, "loss": 0.2089, "lr": 1.9366196274001286e-06, "epoch": 3.075667370846937, "percentage": 61.51, "elapsed_time": "2:06:41", "remaining_time": "1:19:16", "throughput": 19893.3, "total_tokens": 151219712}
|
|
{"current_steps": 48050, "total_steps": 78105, "loss": 0.1127, "lr": 1.9360753591258073e-06, "epoch": 3.075987452787914, "percentage": 61.52, "elapsed_time": "2:06:42", "remaining_time": "1:19:15", "throughput": 19893.58, "total_tokens": 151235136}
|
|
{"current_steps": 48055, "total_steps": 78105, "loss": 0.131, "lr": 1.9355311190109465e-06, "epoch": 3.0763075347288904, "percentage": 61.53, "elapsed_time": "2:06:42", "remaining_time": "1:19:14", "throughput": 19893.86, "total_tokens": 151250624}
|
|
{"current_steps": 48060, "total_steps": 78105, "loss": 0.1795, "lr": 1.9349869070827205e-06, "epoch": 3.0766276166698674, "percentage": 61.53, "elapsed_time": "2:06:43", "remaining_time": "1:19:13", "throughput": 19894.14, "total_tokens": 151265792}
|
|
{"current_steps": 48065, "total_steps": 78105, "loss": 0.1688, "lr": 1.9344427233683053e-06, "epoch": 3.0769476986108444, "percentage": 61.54, "elapsed_time": "2:06:44", "remaining_time": "1:19:12", "throughput": 19894.44, "total_tokens": 151281664}
|
|
{"current_steps": 48070, "total_steps": 78105, "loss": 0.1275, "lr": 1.933898567894875e-06, "epoch": 3.0772677805518214, "percentage": 61.55, "elapsed_time": "2:06:44", "remaining_time": "1:19:11", "throughput": 19894.72, "total_tokens": 151297216}
|
|
{"current_steps": 48075, "total_steps": 78105, "loss": 0.2356, "lr": 1.933354440689601e-06, "epoch": 3.0775878624927984, "percentage": 61.55, "elapsed_time": "2:06:45", "remaining_time": "1:19:10", "throughput": 19895.04, "total_tokens": 151313152}
|
|
{"current_steps": 48080, "total_steps": 78105, "loss": 0.155, "lr": 1.932810341779656e-06, "epoch": 3.077907944433775, "percentage": 61.56, "elapsed_time": "2:06:46", "remaining_time": "1:19:09", "throughput": 19895.3, "total_tokens": 151328704}
|
|
{"current_steps": 48085, "total_steps": 78105, "loss": 0.1709, "lr": 1.9322662711922064e-06, "epoch": 3.078228026374752, "percentage": 61.56, "elapsed_time": "2:06:46", "remaining_time": "1:19:09", "throughput": 19895.58, "total_tokens": 151344128}
|
|
{"current_steps": 48090, "total_steps": 78105, "loss": 0.2147, "lr": 1.9317222289544234e-06, "epoch": 3.078548108315729, "percentage": 61.57, "elapsed_time": "2:06:47", "remaining_time": "1:19:08", "throughput": 19895.68, "total_tokens": 151361024}
|
|
{"current_steps": 48095, "total_steps": 78105, "loss": 0.1662, "lr": 1.9311782150934715e-06, "epoch": 3.078868190256706, "percentage": 61.58, "elapsed_time": "2:06:48", "remaining_time": "1:19:07", "throughput": 19896.02, "total_tokens": 151377600}
|
|
{"current_steps": 48100, "total_steps": 78105, "loss": 0.1914, "lr": 1.930634229636517e-06, "epoch": 3.0791882721976824, "percentage": 61.58, "elapsed_time": "2:06:49", "remaining_time": "1:19:06", "throughput": 19896.3, "total_tokens": 151393152}
|
|
{"current_steps": 48105, "total_steps": 78105, "loss": 0.1662, "lr": 1.930090272610723e-06, "epoch": 3.0795083541386594, "percentage": 61.59, "elapsed_time": "2:06:49", "remaining_time": "1:19:05", "throughput": 19896.59, "total_tokens": 151408576}
|
|
{"current_steps": 48110, "total_steps": 78105, "loss": 0.1979, "lr": 1.9295463440432528e-06, "epoch": 3.0798284360796364, "percentage": 61.6, "elapsed_time": "2:06:50", "remaining_time": "1:19:04", "throughput": 19896.85, "total_tokens": 151423616}
|
|
{"current_steps": 48115, "total_steps": 78105, "loss": 0.1284, "lr": 1.929002443961266e-06, "epoch": 3.0801485180206134, "percentage": 61.6, "elapsed_time": "2:06:51", "remaining_time": "1:19:03", "throughput": 19897.18, "total_tokens": 151439616}
|
|
{"current_steps": 48120, "total_steps": 78105, "loss": 0.144, "lr": 1.9284585723919237e-06, "epoch": 3.0804685999615904, "percentage": 61.61, "elapsed_time": "2:06:51", "remaining_time": "1:19:03", "throughput": 19897.11, "total_tokens": 151455424}
|
|
{"current_steps": 48125, "total_steps": 78105, "loss": 0.1426, "lr": 1.9279147293623825e-06, "epoch": 3.080788681902567, "percentage": 61.62, "elapsed_time": "2:06:52", "remaining_time": "1:19:02", "throughput": 19897.38, "total_tokens": 151470528}
|
|
{"current_steps": 48130, "total_steps": 78105, "loss": 0.1396, "lr": 1.9273709148998003e-06, "epoch": 3.081108763843544, "percentage": 61.62, "elapsed_time": "2:06:53", "remaining_time": "1:19:01", "throughput": 19897.55, "total_tokens": 151488768}
|
|
{"current_steps": 48135, "total_steps": 78105, "loss": 0.1163, "lr": 1.9268271290313317e-06, "epoch": 3.081428845784521, "percentage": 61.63, "elapsed_time": "2:06:54", "remaining_time": "1:19:00", "throughput": 19897.81, "total_tokens": 151504000}
|
|
{"current_steps": 48140, "total_steps": 78105, "loss": 0.177, "lr": 1.9262833717841306e-06, "epoch": 3.081748927725498, "percentage": 61.63, "elapsed_time": "2:06:54", "remaining_time": "1:18:59", "throughput": 19898.08, "total_tokens": 151519424}
|
|
{"current_steps": 48145, "total_steps": 78105, "loss": 0.1353, "lr": 1.9257396431853503e-06, "epoch": 3.0820690096664745, "percentage": 61.64, "elapsed_time": "2:06:55", "remaining_time": "1:18:58", "throughput": 19898.39, "total_tokens": 151535232}
|
|
{"current_steps": 48150, "total_steps": 78105, "loss": 0.1628, "lr": 1.92519594326214e-06, "epoch": 3.0823890916074514, "percentage": 61.65, "elapsed_time": "2:06:56", "remaining_time": "1:18:58", "throughput": 19898.64, "total_tokens": 151550272}
|
|
{"current_steps": 48155, "total_steps": 78105, "loss": 0.1283, "lr": 1.924652272041651e-06, "epoch": 3.0827091735484284, "percentage": 61.65, "elapsed_time": "2:06:56", "remaining_time": "1:18:57", "throughput": 19898.92, "total_tokens": 151565824}
|
|
{"current_steps": 48160, "total_steps": 78105, "loss": 0.1678, "lr": 1.92410862955103e-06, "epoch": 3.0830292554894054, "percentage": 61.66, "elapsed_time": "2:06:57", "remaining_time": "1:18:56", "throughput": 19899.29, "total_tokens": 151583040}
|
|
{"current_steps": 48165, "total_steps": 78105, "loss": 0.2132, "lr": 1.9235650158174253e-06, "epoch": 3.083349337430382, "percentage": 61.67, "elapsed_time": "2:06:58", "remaining_time": "1:18:55", "throughput": 19899.59, "total_tokens": 151599168}
|
|
{"current_steps": 48170, "total_steps": 78105, "loss": 0.1977, "lr": 1.9230214308679806e-06, "epoch": 3.083669419371359, "percentage": 61.67, "elapsed_time": "2:06:58", "remaining_time": "1:18:54", "throughput": 19899.87, "total_tokens": 151614464}
|
|
{"current_steps": 48175, "total_steps": 78105, "loss": 0.1759, "lr": 1.9224778747298407e-06, "epoch": 3.083989501312336, "percentage": 61.68, "elapsed_time": "2:06:59", "remaining_time": "1:18:53", "throughput": 19900.12, "total_tokens": 151629440}
|
|
{"current_steps": 48180, "total_steps": 78105, "loss": 0.2023, "lr": 1.9219343474301466e-06, "epoch": 3.084309583253313, "percentage": 61.69, "elapsed_time": "2:07:00", "remaining_time": "1:18:52", "throughput": 19900.44, "total_tokens": 151645312}
|
|
{"current_steps": 48185, "total_steps": 78105, "loss": 0.1556, "lr": 1.92139084899604e-06, "epoch": 3.08462966519429, "percentage": 61.69, "elapsed_time": "2:07:00", "remaining_time": "1:18:52", "throughput": 19900.72, "total_tokens": 151660736}
|
|
{"current_steps": 48190, "total_steps": 78105, "loss": 0.1916, "lr": 1.920847379454662e-06, "epoch": 3.0849497471352665, "percentage": 61.7, "elapsed_time": "2:07:01", "remaining_time": "1:18:51", "throughput": 19901.03, "total_tokens": 151676672}
|
|
{"current_steps": 48195, "total_steps": 78105, "loss": 0.1536, "lr": 1.9203039388331473e-06, "epoch": 3.0852698290762435, "percentage": 61.71, "elapsed_time": "2:07:02", "remaining_time": "1:18:50", "throughput": 19901.32, "total_tokens": 151692032}
|
|
{"current_steps": 48200, "total_steps": 78105, "loss": 0.2809, "lr": 1.9197605271586364e-06, "epoch": 3.0855899110172205, "percentage": 61.71, "elapsed_time": "2:07:02", "remaining_time": "1:18:49", "throughput": 19901.58, "total_tokens": 151707264}
|
|
{"current_steps": 48205, "total_steps": 78105, "loss": 0.1642, "lr": 1.9192171444582606e-06, "epoch": 3.0859099929581975, "percentage": 61.72, "elapsed_time": "2:07:03", "remaining_time": "1:18:48", "throughput": 19901.87, "total_tokens": 151723328}
|
|
{"current_steps": 48210, "total_steps": 78105, "loss": 0.195, "lr": 1.918673790759157e-06, "epoch": 3.086230074899174, "percentage": 61.72, "elapsed_time": "2:07:04", "remaining_time": "1:18:47", "throughput": 19902.16, "total_tokens": 151739008}
|
|
{"current_steps": 48215, "total_steps": 78105, "loss": 0.2135, "lr": 1.918130466088455e-06, "epoch": 3.086550156840151, "percentage": 61.73, "elapsed_time": "2:07:04", "remaining_time": "1:18:46", "throughput": 19902.45, "total_tokens": 151754752}
|
|
{"current_steps": 48220, "total_steps": 78105, "loss": 0.162, "lr": 1.917587170473288e-06, "epoch": 3.086870238781128, "percentage": 61.74, "elapsed_time": "2:07:05", "remaining_time": "1:18:46", "throughput": 19902.77, "total_tokens": 151770688}
|
|
{"current_steps": 48225, "total_steps": 78105, "loss": 0.2155, "lr": 1.9170439039407827e-06, "epoch": 3.087190320722105, "percentage": 61.74, "elapsed_time": "2:07:06", "remaining_time": "1:18:45", "throughput": 19903.05, "total_tokens": 151786112}
|
|
{"current_steps": 48230, "total_steps": 78105, "loss": 0.2069, "lr": 1.9165006665180694e-06, "epoch": 3.0875104026630815, "percentage": 61.75, "elapsed_time": "2:07:06", "remaining_time": "1:18:44", "throughput": 19903.29, "total_tokens": 151800896}
|
|
{"current_steps": 48235, "total_steps": 78105, "loss": 0.1197, "lr": 1.9159574582322725e-06, "epoch": 3.0878304846040585, "percentage": 61.76, "elapsed_time": "2:07:07", "remaining_time": "1:18:43", "throughput": 19903.57, "total_tokens": 151816192}
|
|
{"current_steps": 48240, "total_steps": 78105, "loss": 0.1611, "lr": 1.915414279110518e-06, "epoch": 3.0881505665450355, "percentage": 61.76, "elapsed_time": "2:07:08", "remaining_time": "1:18:42", "throughput": 19903.86, "total_tokens": 151831808}
|
|
{"current_steps": 48245, "total_steps": 78105, "loss": 0.1622, "lr": 1.91487112917993e-06, "epoch": 3.0884706484860125, "percentage": 61.77, "elapsed_time": "2:07:08", "remaining_time": "1:18:41", "throughput": 19904.13, "total_tokens": 151847168}
|
|
{"current_steps": 48250, "total_steps": 78105, "loss": 0.2085, "lr": 1.9143280084676295e-06, "epoch": 3.0887907304269895, "percentage": 61.78, "elapsed_time": "2:07:09", "remaining_time": "1:18:40", "throughput": 19904.5, "total_tokens": 151864256}
|
|
{"current_steps": 48255, "total_steps": 78105, "loss": 0.1397, "lr": 1.9137849170007382e-06, "epoch": 3.089110812367966, "percentage": 61.78, "elapsed_time": "2:07:10", "remaining_time": "1:18:40", "throughput": 19904.75, "total_tokens": 151879168}
|
|
{"current_steps": 48260, "total_steps": 78105, "loss": 0.1202, "lr": 1.9132418548063743e-06, "epoch": 3.089430894308943, "percentage": 61.79, "elapsed_time": "2:07:10", "remaining_time": "1:18:39", "throughput": 19905.03, "total_tokens": 151894592}
|
|
{"current_steps": 48265, "total_steps": 78105, "loss": 0.1594, "lr": 1.9126988219116567e-06, "epoch": 3.08975097624992, "percentage": 61.8, "elapsed_time": "2:07:11", "remaining_time": "1:18:38", "throughput": 19905.25, "total_tokens": 151908864}
|
|
{"current_steps": 48270, "total_steps": 78105, "loss": 0.2039, "lr": 1.9121558183436994e-06, "epoch": 3.090071058190897, "percentage": 61.8, "elapsed_time": "2:07:12", "remaining_time": "1:18:37", "throughput": 19905.55, "total_tokens": 151924736}
|
|
{"current_steps": 48275, "total_steps": 78105, "loss": 0.1521, "lr": 1.9116128441296194e-06, "epoch": 3.0903911401318735, "percentage": 61.81, "elapsed_time": "2:07:12", "remaining_time": "1:18:36", "throughput": 19905.87, "total_tokens": 151940864}
|
|
{"current_steps": 48280, "total_steps": 78105, "loss": 0.1123, "lr": 1.9110698992965284e-06, "epoch": 3.0907112220728505, "percentage": 61.81, "elapsed_time": "2:07:13", "remaining_time": "1:18:35", "throughput": 19906.23, "total_tokens": 151958144}
|
|
{"current_steps": 48285, "total_steps": 78105, "loss": 0.1449, "lr": 1.9105269838715405e-06, "epoch": 3.0910313040138275, "percentage": 61.82, "elapsed_time": "2:07:14", "remaining_time": "1:18:34", "throughput": 19906.56, "total_tokens": 151974144}
|
|
{"current_steps": 48290, "total_steps": 78105, "loss": 0.1393, "lr": 1.909984097881763e-06, "epoch": 3.0913513859548045, "percentage": 61.83, "elapsed_time": "2:07:15", "remaining_time": "1:18:34", "throughput": 19906.95, "total_tokens": 151991488}
|
|
{"current_steps": 48295, "total_steps": 78105, "loss": 0.2076, "lr": 1.9094412413543074e-06, "epoch": 3.0916714678957815, "percentage": 61.83, "elapsed_time": "2:07:15", "remaining_time": "1:18:33", "throughput": 19907.24, "total_tokens": 152006912}
|
|
{"current_steps": 48300, "total_steps": 78105, "loss": 0.2696, "lr": 1.908898414316279e-06, "epoch": 3.091991549836758, "percentage": 61.84, "elapsed_time": "2:07:16", "remaining_time": "1:18:32", "throughput": 19907.45, "total_tokens": 152021184}
|
|
{"current_steps": 48305, "total_steps": 78105, "loss": 0.1362, "lr": 1.908355616794785e-06, "epoch": 3.092311631777735, "percentage": 61.85, "elapsed_time": "2:07:17", "remaining_time": "1:18:31", "throughput": 19907.72, "total_tokens": 152036288}
|
|
{"current_steps": 48310, "total_steps": 78105, "loss": 0.1609, "lr": 1.9078128488169313e-06, "epoch": 3.092631713718712, "percentage": 61.85, "elapsed_time": "2:07:17", "remaining_time": "1:18:30", "throughput": 19907.97, "total_tokens": 152051200}
|
|
{"current_steps": 48315, "total_steps": 78105, "loss": 0.1461, "lr": 1.907270110409818e-06, "epoch": 3.092951795659689, "percentage": 61.86, "elapsed_time": "2:07:18", "remaining_time": "1:18:29", "throughput": 19908.25, "total_tokens": 152066240}
|
|
{"current_steps": 48320, "total_steps": 78105, "loss": 0.2316, "lr": 1.9067274016005488e-06, "epoch": 3.0932718776006656, "percentage": 61.87, "elapsed_time": "2:07:19", "remaining_time": "1:18:28", "throughput": 19908.56, "total_tokens": 152082304}
|
|
{"current_steps": 48325, "total_steps": 78105, "loss": 0.0797, "lr": 1.906184722416222e-06, "epoch": 3.0935919595416426, "percentage": 61.87, "elapsed_time": "2:07:19", "remaining_time": "1:18:27", "throughput": 19908.87, "total_tokens": 152098240}
|
|
{"current_steps": 48330, "total_steps": 78105, "loss": 0.1827, "lr": 1.9056420728839375e-06, "epoch": 3.0939120414826196, "percentage": 61.88, "elapsed_time": "2:07:20", "remaining_time": "1:18:27", "throughput": 19909.13, "total_tokens": 152113536}
|
|
{"current_steps": 48335, "total_steps": 78105, "loss": 0.1234, "lr": 1.9050994530307918e-06, "epoch": 3.0942321234235965, "percentage": 61.88, "elapsed_time": "2:07:21", "remaining_time": "1:18:26", "throughput": 19909.42, "total_tokens": 152129152}
|
|
{"current_steps": 48340, "total_steps": 78105, "loss": 0.2724, "lr": 1.9045568628838814e-06, "epoch": 3.0945522053645735, "percentage": 61.89, "elapsed_time": "2:07:21", "remaining_time": "1:18:25", "throughput": 19909.71, "total_tokens": 152144704}
|
|
{"current_steps": 48345, "total_steps": 78105, "loss": 0.1893, "lr": 1.9040143024702988e-06, "epoch": 3.09487228730555, "percentage": 61.9, "elapsed_time": "2:07:22", "remaining_time": "1:18:24", "throughput": 19910.0, "total_tokens": 152160640}
|
|
{"current_steps": 48350, "total_steps": 78105, "loss": 0.1086, "lr": 1.903471771817138e-06, "epoch": 3.095192369246527, "percentage": 61.9, "elapsed_time": "2:07:23", "remaining_time": "1:18:23", "throughput": 19910.29, "total_tokens": 152176320}
|
|
{"current_steps": 48355, "total_steps": 78105, "loss": 0.1531, "lr": 1.9029292709514894e-06, "epoch": 3.095512451187504, "percentage": 61.91, "elapsed_time": "2:07:23", "remaining_time": "1:18:22", "throughput": 19910.59, "total_tokens": 152191808}
|
|
{"current_steps": 48360, "total_steps": 78105, "loss": 0.1193, "lr": 1.9023867999004433e-06, "epoch": 3.095832533128481, "percentage": 61.92, "elapsed_time": "2:07:24", "remaining_time": "1:18:21", "throughput": 19910.99, "total_tokens": 152209280}
|
|
{"current_steps": 48365, "total_steps": 78105, "loss": 0.1564, "lr": 1.9018443586910875e-06, "epoch": 3.0961526150694576, "percentage": 61.92, "elapsed_time": "2:07:25", "remaining_time": "1:18:21", "throughput": 19911.23, "total_tokens": 152223936}
|
|
{"current_steps": 48370, "total_steps": 78105, "loss": 0.1128, "lr": 1.9013019473505085e-06, "epoch": 3.0964726970104346, "percentage": 61.93, "elapsed_time": "2:07:25", "remaining_time": "1:18:20", "throughput": 19911.44, "total_tokens": 152238400}
|
|
{"current_steps": 48375, "total_steps": 78105, "loss": 0.1442, "lr": 1.9007595659057925e-06, "epoch": 3.0967927789514116, "percentage": 61.94, "elapsed_time": "2:07:26", "remaining_time": "1:18:19", "throughput": 19911.74, "total_tokens": 152254336}
|
|
{"current_steps": 48380, "total_steps": 78105, "loss": 0.1276, "lr": 1.9002172143840217e-06, "epoch": 3.0971128608923886, "percentage": 61.94, "elapsed_time": "2:07:27", "remaining_time": "1:18:18", "throughput": 19912.02, "total_tokens": 152269760}
|
|
{"current_steps": 48385, "total_steps": 78105, "loss": 0.1542, "lr": 1.8996748928122794e-06, "epoch": 3.0974329428333656, "percentage": 61.95, "elapsed_time": "2:07:27", "remaining_time": "1:18:17", "throughput": 19912.27, "total_tokens": 152284736}
|
|
{"current_steps": 48390, "total_steps": 78105, "loss": 0.1119, "lr": 1.8991326012176455e-06, "epoch": 3.097753024774342, "percentage": 61.96, "elapsed_time": "2:07:28", "remaining_time": "1:18:16", "throughput": 19912.58, "total_tokens": 152300608}
|
|
{"current_steps": 48395, "total_steps": 78105, "loss": 0.1317, "lr": 1.898590339627201e-06, "epoch": 3.098073106715319, "percentage": 61.96, "elapsed_time": "2:07:29", "remaining_time": "1:18:15", "throughput": 19912.83, "total_tokens": 152315264}
|
|
{"current_steps": 48400, "total_steps": 78105, "loss": 0.1217, "lr": 1.8980481080680207e-06, "epoch": 3.098393188656296, "percentage": 61.97, "elapsed_time": "2:07:29", "remaining_time": "1:18:14", "throughput": 19913.2, "total_tokens": 152332096}
|
|
{"current_steps": 48405, "total_steps": 78105, "loss": 0.1199, "lr": 1.897505906567184e-06, "epoch": 3.098713270597273, "percentage": 61.97, "elapsed_time": "2:07:30", "remaining_time": "1:18:14", "throughput": 19913.49, "total_tokens": 152347712}
|
|
{"current_steps": 48410, "total_steps": 78105, "loss": 0.2118, "lr": 1.8969637351517625e-06, "epoch": 3.0990333525382496, "percentage": 61.98, "elapsed_time": "2:07:31", "remaining_time": "1:18:13", "throughput": 19913.76, "total_tokens": 152362816}
|
|
{"current_steps": 48415, "total_steps": 78105, "loss": 0.1492, "lr": 1.8964215938488312e-06, "epoch": 3.0993534344792266, "percentage": 61.99, "elapsed_time": "2:07:31", "remaining_time": "1:18:12", "throughput": 19914.08, "total_tokens": 152378880}
|
|
{"current_steps": 48420, "total_steps": 78105, "loss": 0.1298, "lr": 1.8958794826854623e-06, "epoch": 3.0996735164202036, "percentage": 61.99, "elapsed_time": "2:07:32", "remaining_time": "1:18:11", "throughput": 19914.34, "total_tokens": 152394368}
|
|
{"current_steps": 48425, "total_steps": 78105, "loss": 0.1463, "lr": 1.8953374016887247e-06, "epoch": 3.0999935983611806, "percentage": 62.0, "elapsed_time": "2:07:33", "remaining_time": "1:18:10", "throughput": 19914.62, "total_tokens": 152409792}
|
|
{"current_steps": 48430, "total_steps": 78105, "loss": 0.1744, "lr": 1.8947953508856887e-06, "epoch": 3.100313680302157, "percentage": 62.01, "elapsed_time": "2:07:33", "remaining_time": "1:18:09", "throughput": 19914.86, "total_tokens": 152424640}
|
|
{"current_steps": 48435, "total_steps": 78105, "loss": 0.1828, "lr": 1.8942533303034192e-06, "epoch": 3.100633762243134, "percentage": 62.01, "elapsed_time": "2:07:34", "remaining_time": "1:18:08", "throughput": 19915.23, "total_tokens": 152441856}
|
|
{"current_steps": 48440, "total_steps": 78105, "loss": 0.2279, "lr": 1.8937113399689838e-06, "epoch": 3.100953844184111, "percentage": 62.02, "elapsed_time": "2:07:35", "remaining_time": "1:18:08", "throughput": 19915.51, "total_tokens": 152457216}
|
|
{"current_steps": 48445, "total_steps": 78105, "loss": 0.1664, "lr": 1.8931693799094456e-06, "epoch": 3.101273926125088, "percentage": 62.03, "elapsed_time": "2:07:35", "remaining_time": "1:18:07", "throughput": 19915.79, "total_tokens": 152472704}
|
|
{"current_steps": 48450, "total_steps": 78105, "loss": 0.1746, "lr": 1.8926274501518687e-06, "epoch": 3.101594008066065, "percentage": 62.03, "elapsed_time": "2:07:36", "remaining_time": "1:18:06", "throughput": 19916.11, "total_tokens": 152488896}
|
|
{"current_steps": 48455, "total_steps": 78105, "loss": 0.1245, "lr": 1.892085550723312e-06, "epoch": 3.1019140900070417, "percentage": 62.04, "elapsed_time": "2:07:37", "remaining_time": "1:18:05", "throughput": 19916.39, "total_tokens": 152504384}
|
|
{"current_steps": 48460, "total_steps": 78105, "loss": 0.1303, "lr": 1.8915436816508379e-06, "epoch": 3.1022341719480186, "percentage": 62.04, "elapsed_time": "2:07:37", "remaining_time": "1:18:04", "throughput": 19916.64, "total_tokens": 152519104}
|
|
{"current_steps": 48465, "total_steps": 78105, "loss": 0.1384, "lr": 1.8910018429615017e-06, "epoch": 3.1025542538889956, "percentage": 62.05, "elapsed_time": "2:07:38", "remaining_time": "1:18:03", "throughput": 19916.92, "total_tokens": 152534208}
|
|
{"current_steps": 48470, "total_steps": 78105, "loss": 0.1584, "lr": 1.8904600346823617e-06, "epoch": 3.1028743358299726, "percentage": 62.06, "elapsed_time": "2:07:39", "remaining_time": "1:18:02", "throughput": 19917.18, "total_tokens": 152549184}
|
|
{"current_steps": 48475, "total_steps": 78105, "loss": 0.2436, "lr": 1.8899182568404723e-06, "epoch": 3.103194417770949, "percentage": 62.06, "elapsed_time": "2:07:39", "remaining_time": "1:18:02", "throughput": 19917.43, "total_tokens": 152564416}
|
|
{"current_steps": 48480, "total_steps": 78105, "loss": 0.1038, "lr": 1.8893765094628875e-06, "epoch": 3.103514499711926, "percentage": 62.07, "elapsed_time": "2:07:40", "remaining_time": "1:18:01", "throughput": 19917.72, "total_tokens": 152580032}
|
|
{"current_steps": 48485, "total_steps": 78105, "loss": 0.1456, "lr": 1.8888347925766598e-06, "epoch": 3.103834581652903, "percentage": 62.08, "elapsed_time": "2:07:41", "remaining_time": "1:18:00", "throughput": 19918.02, "total_tokens": 152596160}
|
|
{"current_steps": 48490, "total_steps": 78105, "loss": 0.1355, "lr": 1.8882931062088383e-06, "epoch": 3.10415466359388, "percentage": 62.08, "elapsed_time": "2:07:41", "remaining_time": "1:17:59", "throughput": 19918.35, "total_tokens": 152612864}
|
|
{"current_steps": 48495, "total_steps": 78105, "loss": 0.1907, "lr": 1.8877514503864738e-06, "epoch": 3.1044747455348567, "percentage": 62.09, "elapsed_time": "2:07:42", "remaining_time": "1:17:58", "throughput": 19918.66, "total_tokens": 152628672}
|
|
{"current_steps": 48500, "total_steps": 78105, "loss": 0.0881, "lr": 1.8872098251366117e-06, "epoch": 3.1047948274758337, "percentage": 62.1, "elapsed_time": "2:07:43", "remaining_time": "1:17:57", "throughput": 19918.93, "total_tokens": 152643968}
|
|
{"current_steps": 48505, "total_steps": 78105, "loss": 0.1305, "lr": 1.8866682304862999e-06, "epoch": 3.1051149094168107, "percentage": 62.1, "elapsed_time": "2:07:43", "remaining_time": "1:17:56", "throughput": 19919.27, "total_tokens": 152660288}
|
|
{"current_steps": 48510, "total_steps": 78105, "loss": 0.2122, "lr": 1.886126666462581e-06, "epoch": 3.1054349913577877, "percentage": 62.11, "elapsed_time": "2:07:44", "remaining_time": "1:17:56", "throughput": 19919.56, "total_tokens": 152675712}
|
|
{"current_steps": 48515, "total_steps": 78105, "loss": 0.1203, "lr": 1.8855851330925002e-06, "epoch": 3.1057550732987647, "percentage": 62.12, "elapsed_time": "2:07:45", "remaining_time": "1:17:55", "throughput": 19919.85, "total_tokens": 152691840}
|
|
{"current_steps": 48520, "total_steps": 78105, "loss": 0.1581, "lr": 1.885043630403096e-06, "epoch": 3.106075155239741, "percentage": 62.12, "elapsed_time": "2:07:46", "remaining_time": "1:17:54", "throughput": 19920.18, "total_tokens": 152708224}
|
|
{"current_steps": 48525, "total_steps": 78105, "loss": 0.1143, "lr": 1.8845021584214104e-06, "epoch": 3.106395237180718, "percentage": 62.13, "elapsed_time": "2:07:46", "remaining_time": "1:17:53", "throughput": 19920.51, "total_tokens": 152725376}
|
|
{"current_steps": 48530, "total_steps": 78105, "loss": 0.1379, "lr": 1.8839607171744805e-06, "epoch": 3.106715319121695, "percentage": 62.13, "elapsed_time": "2:07:47", "remaining_time": "1:17:52", "throughput": 19920.88, "total_tokens": 152742656}
|
|
{"current_steps": 48535, "total_steps": 78105, "loss": 0.1376, "lr": 1.8834193066893436e-06, "epoch": 3.107035401062672, "percentage": 62.14, "elapsed_time": "2:07:48", "remaining_time": "1:17:51", "throughput": 19921.1, "total_tokens": 152757120}
|
|
{"current_steps": 48540, "total_steps": 78105, "loss": 0.1454, "lr": 1.8828779269930362e-06, "epoch": 3.1073554830036487, "percentage": 62.15, "elapsed_time": "2:07:48", "remaining_time": "1:17:50", "throughput": 19921.34, "total_tokens": 152771840}
|
|
{"current_steps": 48545, "total_steps": 78105, "loss": 0.2025, "lr": 1.882336578112589e-06, "epoch": 3.1076755649446257, "percentage": 62.15, "elapsed_time": "2:07:49", "remaining_time": "1:17:50", "throughput": 19921.65, "total_tokens": 152787712}
|
|
{"current_steps": 48550, "total_steps": 78105, "loss": 0.1196, "lr": 1.8817952600750373e-06, "epoch": 3.1079956468856027, "percentage": 62.16, "elapsed_time": "2:07:50", "remaining_time": "1:17:49", "throughput": 19921.92, "total_tokens": 152803136}
|
|
{"current_steps": 48555, "total_steps": 78105, "loss": 0.1579, "lr": 1.8812539729074092e-06, "epoch": 3.1083157288265797, "percentage": 62.17, "elapsed_time": "2:07:50", "remaining_time": "1:17:48", "throughput": 19922.2, "total_tokens": 152818432}
|
|
{"current_steps": 48560, "total_steps": 78105, "loss": 0.1381, "lr": 1.880712716636735e-06, "epoch": 3.1086358107675567, "percentage": 62.17, "elapsed_time": "2:07:51", "remaining_time": "1:17:47", "throughput": 19922.49, "total_tokens": 152833920}
|
|
{"current_steps": 48565, "total_steps": 78105, "loss": 0.1764, "lr": 1.8801714912900415e-06, "epoch": 3.1089558927085332, "percentage": 62.18, "elapsed_time": "2:07:52", "remaining_time": "1:17:46", "throughput": 19922.8, "total_tokens": 152850112}
|
|
{"current_steps": 48570, "total_steps": 78105, "loss": 0.1551, "lr": 1.8796302968943564e-06, "epoch": 3.10927597464951, "percentage": 62.19, "elapsed_time": "2:07:52", "remaining_time": "1:17:45", "throughput": 19923.08, "total_tokens": 152865536}
|
|
{"current_steps": 48575, "total_steps": 78105, "loss": 0.2279, "lr": 1.8790891334767017e-06, "epoch": 3.109596056590487, "percentage": 62.19, "elapsed_time": "2:07:53", "remaining_time": "1:17:44", "throughput": 19923.38, "total_tokens": 152881664}
|
|
{"current_steps": 48580, "total_steps": 78105, "loss": 0.1577, "lr": 1.8785480010641018e-06, "epoch": 3.109916138531464, "percentage": 62.2, "elapsed_time": "2:07:54", "remaining_time": "1:17:44", "throughput": 19923.71, "total_tokens": 152898112}
|
|
{"current_steps": 48585, "total_steps": 78105, "loss": 0.1234, "lr": 1.8780068996835776e-06, "epoch": 3.1102362204724407, "percentage": 62.2, "elapsed_time": "2:07:54", "remaining_time": "1:17:43", "throughput": 19923.98, "total_tokens": 152913280}
|
|
{"current_steps": 48590, "total_steps": 78105, "loss": 0.1973, "lr": 1.8774658293621486e-06, "epoch": 3.1105563024134177, "percentage": 62.21, "elapsed_time": "2:07:55", "remaining_time": "1:17:42", "throughput": 19924.25, "total_tokens": 152928960}
|
|
{"current_steps": 48595, "total_steps": 78105, "loss": 0.1454, "lr": 1.8769247901268348e-06, "epoch": 3.1108763843543947, "percentage": 62.22, "elapsed_time": "2:07:56", "remaining_time": "1:17:41", "throughput": 19924.65, "total_tokens": 152946880}
|
|
{"current_steps": 48600, "total_steps": 78105, "loss": 0.1297, "lr": 1.8763837820046502e-06, "epoch": 3.1111964662953717, "percentage": 62.22, "elapsed_time": "2:07:56", "remaining_time": "1:17:40", "throughput": 19924.95, "total_tokens": 152962880}
|
|
{"current_steps": 48605, "total_steps": 78105, "loss": 0.1453, "lr": 1.8758428050226124e-06, "epoch": 3.1115165482363487, "percentage": 62.23, "elapsed_time": "2:07:57", "remaining_time": "1:17:39", "throughput": 19925.24, "total_tokens": 152978432}
|
|
{"current_steps": 48610, "total_steps": 78105, "loss": 0.1802, "lr": 1.8753018592077326e-06, "epoch": 3.1118366301773253, "percentage": 62.24, "elapsed_time": "2:07:58", "remaining_time": "1:17:38", "throughput": 19925.53, "total_tokens": 152993984}
|
|
{"current_steps": 48615, "total_steps": 78105, "loss": 0.1786, "lr": 1.8747609445870246e-06, "epoch": 3.1121567121183022, "percentage": 62.24, "elapsed_time": "2:07:58", "remaining_time": "1:17:38", "throughput": 19925.77, "total_tokens": 153008512}
|
|
{"current_steps": 48620, "total_steps": 78105, "loss": 0.1538, "lr": 1.8742200611874978e-06, "epoch": 3.1124767940592792, "percentage": 62.25, "elapsed_time": "2:07:59", "remaining_time": "1:17:37", "throughput": 19926.13, "total_tokens": 153025344}
|
|
{"current_steps": 48625, "total_steps": 78105, "loss": 0.1805, "lr": 1.8736792090361628e-06, "epoch": 3.1127968760002562, "percentage": 62.26, "elapsed_time": "2:08:00", "remaining_time": "1:17:36", "throughput": 19926.37, "total_tokens": 153040320}
|
|
{"current_steps": 48630, "total_steps": 78105, "loss": 0.1545, "lr": 1.8731383881600247e-06, "epoch": 3.1131169579412328, "percentage": 62.26, "elapsed_time": "2:08:00", "remaining_time": "1:17:35", "throughput": 19926.63, "total_tokens": 153055360}
|
|
{"current_steps": 48635, "total_steps": 78105, "loss": 0.1554, "lr": 1.8725975985860912e-06, "epoch": 3.1134370398822098, "percentage": 62.27, "elapsed_time": "2:08:01", "remaining_time": "1:17:34", "throughput": 19926.92, "total_tokens": 153070656}
|
|
{"current_steps": 48640, "total_steps": 78105, "loss": 0.1662, "lr": 1.8720568403413642e-06, "epoch": 3.1137571218231868, "percentage": 62.28, "elapsed_time": "2:08:02", "remaining_time": "1:17:33", "throughput": 19927.22, "total_tokens": 153086336}
|
|
{"current_steps": 48645, "total_steps": 78105, "loss": 0.2895, "lr": 1.8715161134528487e-06, "epoch": 3.1140772037641637, "percentage": 62.28, "elapsed_time": "2:08:02", "remaining_time": "1:17:32", "throughput": 19927.46, "total_tokens": 153101056}
|
|
{"current_steps": 48650, "total_steps": 78105, "loss": 0.1772, "lr": 1.8709754179475442e-06, "epoch": 3.1143972857051407, "percentage": 62.29, "elapsed_time": "2:08:03", "remaining_time": "1:17:32", "throughput": 19927.77, "total_tokens": 153117568}
|
|
{"current_steps": 48655, "total_steps": 78105, "loss": 0.1515, "lr": 1.8704347538524509e-06, "epoch": 3.1147173676461173, "percentage": 62.29, "elapsed_time": "2:08:04", "remaining_time": "1:17:31", "throughput": 19928.04, "total_tokens": 153132608}
|
|
{"current_steps": 48660, "total_steps": 78105, "loss": 0.1736, "lr": 1.8698941211945675e-06, "epoch": 3.1150374495870943, "percentage": 62.3, "elapsed_time": "2:08:04", "remaining_time": "1:17:30", "throughput": 19928.33, "total_tokens": 153148480}
|
|
{"current_steps": 48665, "total_steps": 78105, "loss": 0.1968, "lr": 1.869353520000889e-06, "epoch": 3.1153575315280713, "percentage": 62.31, "elapsed_time": "2:08:05", "remaining_time": "1:17:29", "throughput": 19928.57, "total_tokens": 153163328}
|
|
{"current_steps": 48670, "total_steps": 78105, "loss": 0.2051, "lr": 1.868812950298411e-06, "epoch": 3.1156776134690483, "percentage": 62.31, "elapsed_time": "2:08:06", "remaining_time": "1:17:28", "throughput": 19928.89, "total_tokens": 153179072}
|
|
{"current_steps": 48675, "total_steps": 78105, "loss": 0.1486, "lr": 1.868272412114126e-06, "epoch": 3.115997695410025, "percentage": 62.32, "elapsed_time": "2:08:06", "remaining_time": "1:17:27", "throughput": 19929.18, "total_tokens": 153194944}
|
|
{"current_steps": 48680, "total_steps": 78105, "loss": 0.129, "lr": 1.8677319054750272e-06, "epoch": 3.116317777351002, "percentage": 62.33, "elapsed_time": "2:08:07", "remaining_time": "1:17:26", "throughput": 19929.43, "total_tokens": 153209856}
|
|
{"current_steps": 48685, "total_steps": 78105, "loss": 0.1309, "lr": 1.867191430408103e-06, "epoch": 3.116637859291979, "percentage": 62.33, "elapsed_time": "2:08:08", "remaining_time": "1:17:25", "throughput": 19929.72, "total_tokens": 153225472}
|
|
{"current_steps": 48690, "total_steps": 78105, "loss": 0.1015, "lr": 1.8666509869403433e-06, "epoch": 3.1169579412329558, "percentage": 62.34, "elapsed_time": "2:08:09", "remaining_time": "1:17:25", "throughput": 19930.14, "total_tokens": 153243392}
|
|
{"current_steps": 48695, "total_steps": 78105, "loss": 0.1797, "lr": 1.8661105750987333e-06, "epoch": 3.1172780231739323, "percentage": 62.35, "elapsed_time": "2:08:09", "remaining_time": "1:17:24", "throughput": 19930.4, "total_tokens": 153258752}
|
|
{"current_steps": 48700, "total_steps": 78105, "loss": 0.3228, "lr": 1.86557019491026e-06, "epoch": 3.1175981051149093, "percentage": 62.35, "elapsed_time": "2:08:10", "remaining_time": "1:17:23", "throughput": 19930.65, "total_tokens": 153273856}
|
|
{"current_steps": 48705, "total_steps": 78105, "loss": 0.1741, "lr": 1.8650298464019057e-06, "epoch": 3.1179181870558863, "percentage": 62.36, "elapsed_time": "2:08:11", "remaining_time": "1:17:22", "throughput": 19930.98, "total_tokens": 153290496}
|
|
{"current_steps": 48710, "total_steps": 78105, "loss": 0.1642, "lr": 1.8644895296006539e-06, "epoch": 3.1182382689968633, "percentage": 62.36, "elapsed_time": "2:08:11", "remaining_time": "1:17:21", "throughput": 19931.29, "total_tokens": 153306752}
|
|
{"current_steps": 48715, "total_steps": 78105, "loss": 0.1198, "lr": 1.8639492445334857e-06, "epoch": 3.1185583509378403, "percentage": 62.37, "elapsed_time": "2:08:12", "remaining_time": "1:17:20", "throughput": 19931.52, "total_tokens": 153321600}
|
|
{"current_steps": 48720, "total_steps": 78105, "loss": 0.1153, "lr": 1.8634089912273779e-06, "epoch": 3.118878432878817, "percentage": 62.38, "elapsed_time": "2:08:13", "remaining_time": "1:17:20", "throughput": 19931.79, "total_tokens": 153336640}
|
|
{"current_steps": 48725, "total_steps": 78105, "loss": 0.1617, "lr": 1.8628687697093101e-06, "epoch": 3.119198514819794, "percentage": 62.38, "elapsed_time": "2:08:13", "remaining_time": "1:17:19", "throughput": 19932.11, "total_tokens": 153352832}
|
|
{"current_steps": 48730, "total_steps": 78105, "loss": 0.2183, "lr": 1.8623285800062565e-06, "epoch": 3.119518596760771, "percentage": 62.39, "elapsed_time": "2:08:14", "remaining_time": "1:17:18", "throughput": 19932.43, "total_tokens": 153368768}
|
|
{"current_steps": 48735, "total_steps": 78105, "loss": 0.1451, "lr": 1.8617884221451927e-06, "epoch": 3.119838678701748, "percentage": 62.4, "elapsed_time": "2:08:15", "remaining_time": "1:17:17", "throughput": 19932.8, "total_tokens": 153385920}
|
|
{"current_steps": 48740, "total_steps": 78105, "loss": 0.1351, "lr": 1.8612482961530897e-06, "epoch": 3.1201587606427243, "percentage": 62.4, "elapsed_time": "2:08:15", "remaining_time": "1:17:16", "throughput": 19933.09, "total_tokens": 153401216}
|
|
{"current_steps": 48745, "total_steps": 78105, "loss": 0.1363, "lr": 1.8607082020569214e-06, "epoch": 3.1204788425837013, "percentage": 62.41, "elapsed_time": "2:08:16", "remaining_time": "1:17:15", "throughput": 19933.38, "total_tokens": 153417024}
|
|
{"current_steps": 48750, "total_steps": 78105, "loss": 0.2222, "lr": 1.8601681398836541e-06, "epoch": 3.1207989245246783, "percentage": 62.42, "elapsed_time": "2:08:17", "remaining_time": "1:17:14", "throughput": 19933.72, "total_tokens": 153433664}
|
|
{"current_steps": 48755, "total_steps": 78105, "loss": 0.1448, "lr": 1.859628109660258e-06, "epoch": 3.1211190064656553, "percentage": 62.42, "elapsed_time": "2:08:17", "remaining_time": "1:17:14", "throughput": 19934.04, "total_tokens": 153450112}
|
|
{"current_steps": 48760, "total_steps": 78105, "loss": 0.1144, "lr": 1.8590881114136978e-06, "epoch": 3.1214390884066323, "percentage": 62.43, "elapsed_time": "2:08:18", "remaining_time": "1:17:13", "throughput": 19934.27, "total_tokens": 153464512}
|
|
{"current_steps": 48765, "total_steps": 78105, "loss": 0.1898, "lr": 1.8585481451709386e-06, "epoch": 3.121759170347609, "percentage": 62.44, "elapsed_time": "2:08:19", "remaining_time": "1:17:12", "throughput": 19934.55, "total_tokens": 153479808}
|
|
{"current_steps": 48770, "total_steps": 78105, "loss": 0.2137, "lr": 1.8580082109589454e-06, "epoch": 3.122079252288586, "percentage": 62.44, "elapsed_time": "2:08:19", "remaining_time": "1:17:11", "throughput": 19934.85, "total_tokens": 153495680}
|
|
{"current_steps": 48775, "total_steps": 78105, "loss": 0.1361, "lr": 1.8574683088046769e-06, "epoch": 3.122399334229563, "percentage": 62.45, "elapsed_time": "2:08:20", "remaining_time": "1:17:10", "throughput": 19935.2, "total_tokens": 153512640}
|
|
{"current_steps": 48780, "total_steps": 78105, "loss": 0.1215, "lr": 1.856928438735095e-06, "epoch": 3.12271941617054, "percentage": 62.45, "elapsed_time": "2:08:21", "remaining_time": "1:17:09", "throughput": 19935.45, "total_tokens": 153527680}
|
|
{"current_steps": 48785, "total_steps": 78105, "loss": 0.2302, "lr": 1.8563886007771564e-06, "epoch": 3.1230394981115164, "percentage": 62.46, "elapsed_time": "2:08:21", "remaining_time": "1:17:08", "throughput": 19935.69, "total_tokens": 153542336}
|
|
{"current_steps": 48790, "total_steps": 78105, "loss": 0.1459, "lr": 1.8558487949578192e-06, "epoch": 3.1233595800524934, "percentage": 62.47, "elapsed_time": "2:08:22", "remaining_time": "1:17:08", "throughput": 19935.95, "total_tokens": 153557568}
|
|
{"current_steps": 48795, "total_steps": 78105, "loss": 0.1635, "lr": 1.8553090213040372e-06, "epoch": 3.1236796619934704, "percentage": 62.47, "elapsed_time": "2:08:23", "remaining_time": "1:17:07", "throughput": 19936.29, "total_tokens": 153574272}
|
|
{"current_steps": 48800, "total_steps": 78105, "loss": 0.2197, "lr": 1.8547692798427659e-06, "epoch": 3.1239997439344473, "percentage": 62.48, "elapsed_time": "2:08:23", "remaining_time": "1:17:06", "throughput": 19936.56, "total_tokens": 153589184}
|
|
{"current_steps": 48805, "total_steps": 78105, "loss": 0.2185, "lr": 1.854229570600955e-06, "epoch": 3.124319825875424, "percentage": 62.49, "elapsed_time": "2:08:24", "remaining_time": "1:17:05", "throughput": 19936.81, "total_tokens": 153603840}
|
|
{"current_steps": 48810, "total_steps": 78105, "loss": 0.1806, "lr": 1.853689893605556e-06, "epoch": 3.124639907816401, "percentage": 62.49, "elapsed_time": "2:08:25", "remaining_time": "1:17:04", "throughput": 19937.11, "total_tokens": 153620288}
|
|
{"current_steps": 48815, "total_steps": 78105, "loss": 0.1389, "lr": 1.8531502488835162e-06, "epoch": 3.124959989757378, "percentage": 62.5, "elapsed_time": "2:08:25", "remaining_time": "1:17:03", "throughput": 19937.35, "total_tokens": 153635264}
|
|
{"current_steps": 48820, "total_steps": 78105, "loss": 0.2212, "lr": 1.852610636461785e-06, "epoch": 3.125280071698355, "percentage": 62.51, "elapsed_time": "2:08:26", "remaining_time": "1:17:02", "throughput": 19937.66, "total_tokens": 153651264}
|
|
{"current_steps": 48825, "total_steps": 78105, "loss": 0.1795, "lr": 1.8520710563673055e-06, "epoch": 3.125600153639332, "percentage": 62.51, "elapsed_time": "2:08:27", "remaining_time": "1:17:01", "throughput": 19937.96, "total_tokens": 153667072}
|
|
{"current_steps": 48830, "total_steps": 78105, "loss": 0.1396, "lr": 1.8515315086270228e-06, "epoch": 3.1259202355803084, "percentage": 62.52, "elapsed_time": "2:08:27", "remaining_time": "1:17:01", "throughput": 19938.25, "total_tokens": 153682880}
|
|
{"current_steps": 48835, "total_steps": 78105, "loss": 0.1204, "lr": 1.8509919932678797e-06, "epoch": 3.1262403175212854, "percentage": 62.52, "elapsed_time": "2:08:28", "remaining_time": "1:17:00", "throughput": 19938.54, "total_tokens": 153698880}
|
|
{"current_steps": 48840, "total_steps": 78105, "loss": 0.154, "lr": 1.8504525103168147e-06, "epoch": 3.1265603994622624, "percentage": 62.53, "elapsed_time": "2:08:29", "remaining_time": "1:16:59", "throughput": 19938.87, "total_tokens": 153715136}
|
|
{"current_steps": 48845, "total_steps": 78105, "loss": 0.1258, "lr": 1.8499130598007685e-06, "epoch": 3.1268804814032394, "percentage": 62.54, "elapsed_time": "2:08:29", "remaining_time": "1:16:58", "throughput": 19939.09, "total_tokens": 153729728}
|
|
{"current_steps": 48850, "total_steps": 78105, "loss": 0.1056, "lr": 1.8493736417466774e-06, "epoch": 3.127200563344216, "percentage": 62.54, "elapsed_time": "2:08:30", "remaining_time": "1:16:57", "throughput": 19939.47, "total_tokens": 153747264}
|
|
{"current_steps": 48855, "total_steps": 78105, "loss": 0.1394, "lr": 1.8488342561814787e-06, "epoch": 3.127520645285193, "percentage": 62.55, "elapsed_time": "2:08:31", "remaining_time": "1:16:56", "throughput": 19939.71, "total_tokens": 153761728}
|
|
{"current_steps": 48860, "total_steps": 78105, "loss": 0.1739, "lr": 1.8482949031321045e-06, "epoch": 3.12784072722617, "percentage": 62.56, "elapsed_time": "2:08:32", "remaining_time": "1:16:56", "throughput": 19940.02, "total_tokens": 153777856}
|
|
{"current_steps": 48865, "total_steps": 78105, "loss": 0.1185, "lr": 1.8477555826254893e-06, "epoch": 3.128160809167147, "percentage": 62.56, "elapsed_time": "2:08:32", "remaining_time": "1:16:55", "throughput": 19940.29, "total_tokens": 153793344}
|
|
{"current_steps": 48870, "total_steps": 78105, "loss": 0.1611, "lr": 1.8472162946885617e-06, "epoch": 3.128480891108124, "percentage": 62.57, "elapsed_time": "2:08:33", "remaining_time": "1:16:54", "throughput": 19940.56, "total_tokens": 153808704}
|
|
{"current_steps": 48875, "total_steps": 78105, "loss": 0.1739, "lr": 1.8466770393482526e-06, "epoch": 3.1288009730491004, "percentage": 62.58, "elapsed_time": "2:08:34", "remaining_time": "1:16:53", "throughput": 19940.85, "total_tokens": 153824576}
|
|
{"current_steps": 48880, "total_steps": 78105, "loss": 0.171, "lr": 1.8461378166314886e-06, "epoch": 3.1291210549900774, "percentage": 62.58, "elapsed_time": "2:08:34", "remaining_time": "1:16:52", "throughput": 19941.1, "total_tokens": 153839616}
|
|
{"current_steps": 48885, "total_steps": 78105, "loss": 0.1554, "lr": 1.8455986265651966e-06, "epoch": 3.1294411369310544, "percentage": 62.59, "elapsed_time": "2:08:35", "remaining_time": "1:16:51", "throughput": 19941.41, "total_tokens": 153855552}
|
|
{"current_steps": 48890, "total_steps": 78105, "loss": 0.1696, "lr": 1.845059469176301e-06, "epoch": 3.1297612188720314, "percentage": 62.6, "elapsed_time": "2:08:36", "remaining_time": "1:16:50", "throughput": 19941.72, "total_tokens": 153871616}
|
|
{"current_steps": 48895, "total_steps": 78105, "loss": 0.1619, "lr": 1.8445203444917231e-06, "epoch": 3.130081300813008, "percentage": 62.6, "elapsed_time": "2:08:36", "remaining_time": "1:16:49", "throughput": 19941.97, "total_tokens": 153886720}
|
|
{"current_steps": 48900, "total_steps": 78105, "loss": 0.1497, "lr": 1.8439812525383858e-06, "epoch": 3.130401382753985, "percentage": 62.61, "elapsed_time": "2:08:37", "remaining_time": "1:16:49", "throughput": 19942.21, "total_tokens": 153901568}
|
|
{"current_steps": 48905, "total_steps": 78105, "loss": 0.1483, "lr": 1.8434421933432068e-06, "epoch": 3.130721464694962, "percentage": 62.61, "elapsed_time": "2:08:38", "remaining_time": "1:16:48", "throughput": 19942.58, "total_tokens": 153918720}
|
|
{"current_steps": 48910, "total_steps": 78105, "loss": 0.1223, "lr": 1.842903166933106e-06, "epoch": 3.131041546635939, "percentage": 62.62, "elapsed_time": "2:08:38", "remaining_time": "1:16:47", "throughput": 19942.94, "total_tokens": 153935808}
|
|
{"current_steps": 48915, "total_steps": 78105, "loss": 0.1352, "lr": 1.8423641733349974e-06, "epoch": 3.131361628576916, "percentage": 62.63, "elapsed_time": "2:08:39", "remaining_time": "1:16:46", "throughput": 19943.25, "total_tokens": 153951616}
|
|
{"current_steps": 48920, "total_steps": 78105, "loss": 0.1278, "lr": 1.8418252125757974e-06, "epoch": 3.1316817105178925, "percentage": 62.63, "elapsed_time": "2:08:40", "remaining_time": "1:16:45", "throughput": 19943.51, "total_tokens": 153966976}
|
|
{"current_steps": 48925, "total_steps": 78105, "loss": 0.1283, "lr": 1.841286284682417e-06, "epoch": 3.1320017924588694, "percentage": 62.64, "elapsed_time": "2:08:40", "remaining_time": "1:16:44", "throughput": 19943.79, "total_tokens": 153982720}
|
|
{"current_steps": 48930, "total_steps": 78105, "loss": 0.2001, "lr": 1.8407473896817688e-06, "epoch": 3.1323218743998464, "percentage": 62.65, "elapsed_time": "2:08:41", "remaining_time": "1:16:44", "throughput": 19944.05, "total_tokens": 153998080}
|
|
{"current_steps": 48935, "total_steps": 78105, "loss": 0.1943, "lr": 1.8402085276007614e-06, "epoch": 3.1326419563408234, "percentage": 62.65, "elapsed_time": "2:08:42", "remaining_time": "1:16:43", "throughput": 19944.31, "total_tokens": 154013632}
|
|
{"current_steps": 48940, "total_steps": 78105, "loss": 0.1954, "lr": 1.8396696984663036e-06, "epoch": 3.1329620382818, "percentage": 62.66, "elapsed_time": "2:08:42", "remaining_time": "1:16:42", "throughput": 19944.6, "total_tokens": 154029248}
|
|
{"current_steps": 48945, "total_steps": 78105, "loss": 0.128, "lr": 1.8391309023053028e-06, "epoch": 3.133282120222777, "percentage": 62.67, "elapsed_time": "2:08:43", "remaining_time": "1:16:41", "throughput": 19944.86, "total_tokens": 154044096}
|
|
{"current_steps": 48950, "total_steps": 78105, "loss": 0.1484, "lr": 1.838592139144661e-06, "epoch": 3.133602202163754, "percentage": 62.67, "elapsed_time": "2:08:44", "remaining_time": "1:16:40", "throughput": 19945.14, "total_tokens": 154060032}
|
|
{"current_steps": 48955, "total_steps": 78105, "loss": 0.2031, "lr": 1.8380534090112834e-06, "epoch": 3.133922284104731, "percentage": 62.68, "elapsed_time": "2:08:44", "remaining_time": "1:16:39", "throughput": 19945.43, "total_tokens": 154075776}
|
|
{"current_steps": 48960, "total_steps": 78105, "loss": 0.2037, "lr": 1.8375147119320705e-06, "epoch": 3.1342423660457075, "percentage": 62.68, "elapsed_time": "2:08:45", "remaining_time": "1:16:38", "throughput": 19945.66, "total_tokens": 154090176}
|
|
{"current_steps": 48965, "total_steps": 78105, "loss": 0.152, "lr": 1.8369760479339222e-06, "epoch": 3.1345624479866845, "percentage": 62.69, "elapsed_time": "2:08:46", "remaining_time": "1:16:38", "throughput": 19946.0, "total_tokens": 154107072}
|
|
{"current_steps": 48970, "total_steps": 78105, "loss": 0.1923, "lr": 1.8364374170437357e-06, "epoch": 3.1348825299276615, "percentage": 62.7, "elapsed_time": "2:08:46", "remaining_time": "1:16:37", "throughput": 19946.25, "total_tokens": 154121792}
|
|
{"current_steps": 48975, "total_steps": 78105, "loss": 0.152, "lr": 1.8358988192884098e-06, "epoch": 3.1352026118686385, "percentage": 62.7, "elapsed_time": "2:08:47", "remaining_time": "1:16:36", "throughput": 19946.59, "total_tokens": 154138240}
|
|
{"current_steps": 48980, "total_steps": 78105, "loss": 0.1644, "lr": 1.835360254694836e-06, "epoch": 3.1355226938096155, "percentage": 62.71, "elapsed_time": "2:08:48", "remaining_time": "1:16:35", "throughput": 19946.85, "total_tokens": 154153408}
|
|
{"current_steps": 48985, "total_steps": 78105, "loss": 0.1467, "lr": 1.83482172328991e-06, "epoch": 3.135842775750592, "percentage": 62.72, "elapsed_time": "2:08:48", "remaining_time": "1:16:34", "throughput": 19947.14, "total_tokens": 154169216}
|
|
{"current_steps": 48990, "total_steps": 78105, "loss": 0.1552, "lr": 1.8342832251005216e-06, "epoch": 3.136162857691569, "percentage": 62.72, "elapsed_time": "2:08:49", "remaining_time": "1:16:33", "throughput": 19947.42, "total_tokens": 154184832}
|
|
{"current_steps": 48995, "total_steps": 78105, "loss": 0.2161, "lr": 1.8337447601535627e-06, "epoch": 3.136482939632546, "percentage": 62.73, "elapsed_time": "2:08:50", "remaining_time": "1:16:32", "throughput": 19947.76, "total_tokens": 154201728}
|
|
{"current_steps": 49000, "total_steps": 78105, "loss": 0.1876, "lr": 1.8332063284759188e-06, "epoch": 3.136803021573523, "percentage": 62.74, "elapsed_time": "2:08:50", "remaining_time": "1:16:32", "throughput": 19948.04, "total_tokens": 154217024}
|
|
{"current_steps": 49005, "total_steps": 78105, "loss": 0.1428, "lr": 1.8326679300944778e-06, "epoch": 3.1371231035144995, "percentage": 62.74, "elapsed_time": "2:08:51", "remaining_time": "1:16:31", "throughput": 19948.31, "total_tokens": 154232640}
|
|
{"current_steps": 49010, "total_steps": 78105, "loss": 0.2121, "lr": 1.8321295650361253e-06, "epoch": 3.1374431854554765, "percentage": 62.75, "elapsed_time": "2:08:52", "remaining_time": "1:16:30", "throughput": 19948.58, "total_tokens": 154248064}
|
|
{"current_steps": 49015, "total_steps": 78105, "loss": 0.2179, "lr": 1.8315912333277425e-06, "epoch": 3.1377632673964535, "percentage": 62.76, "elapsed_time": "2:08:52", "remaining_time": "1:16:29", "throughput": 19948.89, "total_tokens": 154264192}
|
|
{"current_steps": 49020, "total_steps": 78105, "loss": 0.1238, "lr": 1.8310529349962125e-06, "epoch": 3.1380833493374305, "percentage": 62.76, "elapsed_time": "2:08:53", "remaining_time": "1:16:28", "throughput": 19949.21, "total_tokens": 154280576}
|
|
{"current_steps": 49025, "total_steps": 78105, "loss": 0.1334, "lr": 1.830514670068414e-06, "epoch": 3.138403431278407, "percentage": 62.77, "elapsed_time": "2:08:54", "remaining_time": "1:16:27", "throughput": 19949.48, "total_tokens": 154295872}
|
|
{"current_steps": 49030, "total_steps": 78105, "loss": 0.1362, "lr": 1.8299764385712264e-06, "epoch": 3.138723513219384, "percentage": 62.77, "elapsed_time": "2:08:54", "remaining_time": "1:16:26", "throughput": 19949.73, "total_tokens": 154310976}
|
|
{"current_steps": 49035, "total_steps": 78105, "loss": 0.2198, "lr": 1.8294382405315248e-06, "epoch": 3.139043595160361, "percentage": 62.78, "elapsed_time": "2:08:55", "remaining_time": "1:16:26", "throughput": 19950.07, "total_tokens": 154327680}
|
|
{"current_steps": 49040, "total_steps": 78105, "loss": 0.1391, "lr": 1.8289000759761849e-06, "epoch": 3.139363677101338, "percentage": 62.79, "elapsed_time": "2:08:56", "remaining_time": "1:16:25", "throughput": 19950.36, "total_tokens": 154343296}
|
|
{"current_steps": 49045, "total_steps": 78105, "loss": 0.1522, "lr": 1.8283619449320788e-06, "epoch": 3.139683759042315, "percentage": 62.79, "elapsed_time": "2:08:57", "remaining_time": "1:16:24", "throughput": 19950.64, "total_tokens": 154358976}
|
|
{"current_steps": 49050, "total_steps": 78105, "loss": 0.1084, "lr": 1.8278238474260801e-06, "epoch": 3.1400038409832915, "percentage": 62.8, "elapsed_time": "2:08:57", "remaining_time": "1:16:23", "throughput": 19950.94, "total_tokens": 154374784}
|
|
{"current_steps": 49055, "total_steps": 78105, "loss": 0.2112, "lr": 1.8272857834850562e-06, "epoch": 3.1403239229242685, "percentage": 62.81, "elapsed_time": "2:08:58", "remaining_time": "1:16:22", "throughput": 19951.21, "total_tokens": 154390080}
|
|
{"current_steps": 49060, "total_steps": 78105, "loss": 0.1409, "lr": 1.8267477531358763e-06, "epoch": 3.1406440048652455, "percentage": 62.81, "elapsed_time": "2:08:59", "remaining_time": "1:16:21", "throughput": 19951.53, "total_tokens": 154406848}
|
|
{"current_steps": 49065, "total_steps": 78105, "loss": 0.1635, "lr": 1.8262097564054076e-06, "epoch": 3.1409640868062225, "percentage": 62.82, "elapsed_time": "2:08:59", "remaining_time": "1:16:20", "throughput": 19951.84, "total_tokens": 154423168}
|
|
{"current_steps": 49070, "total_steps": 78105, "loss": 0.154, "lr": 1.825671793320513e-06, "epoch": 3.141284168747199, "percentage": 62.83, "elapsed_time": "2:09:00", "remaining_time": "1:16:20", "throughput": 19952.12, "total_tokens": 154438720}
|
|
{"current_steps": 49075, "total_steps": 78105, "loss": 0.1403, "lr": 1.8251338639080575e-06, "epoch": 3.141604250688176, "percentage": 62.83, "elapsed_time": "2:09:01", "remaining_time": "1:16:19", "throughput": 19952.38, "total_tokens": 154454144}
|
|
{"current_steps": 49080, "total_steps": 78105, "loss": 0.1403, "lr": 1.8245959681949004e-06, "epoch": 3.141924332629153, "percentage": 62.84, "elapsed_time": "2:09:01", "remaining_time": "1:16:18", "throughput": 19952.66, "total_tokens": 154469632}
|
|
{"current_steps": 49085, "total_steps": 78105, "loss": 0.2037, "lr": 1.8240581062079043e-06, "epoch": 3.14224441457013, "percentage": 62.84, "elapsed_time": "2:09:02", "remaining_time": "1:16:17", "throughput": 19952.93, "total_tokens": 154485120}
|
|
{"current_steps": 49090, "total_steps": 78105, "loss": 0.1463, "lr": 1.8235202779739242e-06, "epoch": 3.142564496511107, "percentage": 62.85, "elapsed_time": "2:09:03", "remaining_time": "1:16:16", "throughput": 19953.22, "total_tokens": 154500864}
|
|
{"current_steps": 49095, "total_steps": 78105, "loss": 0.1618, "lr": 1.8229824835198187e-06, "epoch": 3.1428845784520836, "percentage": 62.86, "elapsed_time": "2:09:03", "remaining_time": "1:16:15", "throughput": 19953.5, "total_tokens": 154516608}
|
|
{"current_steps": 49100, "total_steps": 78105, "loss": 0.2045, "lr": 1.8224447228724407e-06, "epoch": 3.1432046603930606, "percentage": 62.86, "elapsed_time": "2:09:04", "remaining_time": "1:16:14", "throughput": 19953.76, "total_tokens": 154531904}
|
|
{"current_steps": 49105, "total_steps": 78105, "loss": 0.1027, "lr": 1.8219069960586444e-06, "epoch": 3.1435247423340376, "percentage": 62.87, "elapsed_time": "2:09:05", "remaining_time": "1:16:14", "throughput": 19954.01, "total_tokens": 154546944}
|
|
{"current_steps": 49110, "total_steps": 78105, "loss": 0.1544, "lr": 1.8213693031052798e-06, "epoch": 3.1438448242750145, "percentage": 62.88, "elapsed_time": "2:09:05", "remaining_time": "1:16:13", "throughput": 19954.33, "total_tokens": 154563072}
|
|
{"current_steps": 49115, "total_steps": 78105, "loss": 0.1837, "lr": 1.8208316440391977e-06, "epoch": 3.144164906215991, "percentage": 62.88, "elapsed_time": "2:09:06", "remaining_time": "1:16:12", "throughput": 19954.68, "total_tokens": 154579712}
|
|
{"current_steps": 49120, "total_steps": 78105, "loss": 0.1791, "lr": 1.8202940188872468e-06, "epoch": 3.144484988156968, "percentage": 62.89, "elapsed_time": "2:09:07", "remaining_time": "1:16:11", "throughput": 19954.93, "total_tokens": 154595008}
|
|
{"current_steps": 49125, "total_steps": 78105, "loss": 0.2902, "lr": 1.8197564276762708e-06, "epoch": 3.144805070097945, "percentage": 62.9, "elapsed_time": "2:09:07", "remaining_time": "1:16:10", "throughput": 19955.28, "total_tokens": 154611776}
|
|
{"current_steps": 49130, "total_steps": 78105, "loss": 0.1741, "lr": 1.8192188704331163e-06, "epoch": 3.145125152038922, "percentage": 62.9, "elapsed_time": "2:09:08", "remaining_time": "1:16:09", "throughput": 19955.51, "total_tokens": 154626432}
|
|
{"current_steps": 49135, "total_steps": 78105, "loss": 0.1921, "lr": 1.8186813471846246e-06, "epoch": 3.145445233979899, "percentage": 62.91, "elapsed_time": "2:09:09", "remaining_time": "1:16:08", "throughput": 19955.86, "total_tokens": 154643456}
|
|
{"current_steps": 49140, "total_steps": 78105, "loss": 0.1822, "lr": 1.818143857957639e-06, "epoch": 3.1457653159208756, "percentage": 62.92, "elapsed_time": "2:09:09", "remaining_time": "1:16:08", "throughput": 19956.17, "total_tokens": 154659136}
|
|
{"current_steps": 49145, "total_steps": 78105, "loss": 0.2113, "lr": 1.8176064027789958e-06, "epoch": 3.1460853978618526, "percentage": 62.92, "elapsed_time": "2:09:10", "remaining_time": "1:16:07", "throughput": 19956.38, "total_tokens": 154673408}
|
|
{"current_steps": 49150, "total_steps": 78105, "loss": 0.1947, "lr": 1.817068981675536e-06, "epoch": 3.1464054798028296, "percentage": 62.93, "elapsed_time": "2:09:11", "remaining_time": "1:16:06", "throughput": 19956.64, "total_tokens": 154688448}
|
|
{"current_steps": 49155, "total_steps": 78105, "loss": 0.1851, "lr": 1.8165315946740925e-06, "epoch": 3.1467255617438066, "percentage": 62.93, "elapsed_time": "2:09:11", "remaining_time": "1:16:05", "throughput": 19956.96, "total_tokens": 154704448}
|
|
{"current_steps": 49160, "total_steps": 78105, "loss": 0.1101, "lr": 1.815994241801502e-06, "epoch": 3.147045643684783, "percentage": 62.94, "elapsed_time": "2:09:12", "remaining_time": "1:16:04", "throughput": 19957.23, "total_tokens": 154719936}
|
|
{"current_steps": 49165, "total_steps": 78105, "loss": 0.1941, "lr": 1.8154569230845957e-06, "epoch": 3.14736572562576, "percentage": 62.95, "elapsed_time": "2:09:13", "remaining_time": "1:16:03", "throughput": 19957.52, "total_tokens": 154735808}
|
|
{"current_steps": 49170, "total_steps": 78105, "loss": 0.1623, "lr": 1.8149196385502058e-06, "epoch": 3.147685807566737, "percentage": 62.95, "elapsed_time": "2:09:13", "remaining_time": "1:16:02", "throughput": 19957.76, "total_tokens": 154750720}
|
|
{"current_steps": 49175, "total_steps": 78105, "loss": 0.2436, "lr": 1.8143823882251598e-06, "epoch": 3.148005889507714, "percentage": 62.96, "elapsed_time": "2:09:14", "remaining_time": "1:16:02", "throughput": 19958.03, "total_tokens": 154766144}
|
|
{"current_steps": 49180, "total_steps": 78105, "loss": 0.2159, "lr": 1.813845172136286e-06, "epoch": 3.148325971448691, "percentage": 62.97, "elapsed_time": "2:09:15", "remaining_time": "1:16:01", "throughput": 19958.32, "total_tokens": 154781952}
|
|
{"current_steps": 49185, "total_steps": 78105, "loss": 0.1272, "lr": 1.8133079903104105e-06, "epoch": 3.1486460533896676, "percentage": 62.97, "elapsed_time": "2:09:15", "remaining_time": "1:16:00", "throughput": 19958.6, "total_tokens": 154797504}
|
|
{"current_steps": 49190, "total_steps": 78105, "loss": 0.1553, "lr": 1.8127708427743572e-06, "epoch": 3.1489661353306446, "percentage": 62.98, "elapsed_time": "2:09:16", "remaining_time": "1:15:59", "throughput": 19958.87, "total_tokens": 154812672}
|
|
{"current_steps": 49195, "total_steps": 78105, "loss": 0.1029, "lr": 1.8122337295549491e-06, "epoch": 3.1492862172716216, "percentage": 62.99, "elapsed_time": "2:09:17", "remaining_time": "1:15:58", "throughput": 19959.13, "total_tokens": 154827968}
|
|
{"current_steps": 49200, "total_steps": 78105, "loss": 0.0969, "lr": 1.8116966506790052e-06, "epoch": 3.1496062992125986, "percentage": 62.99, "elapsed_time": "2:09:18", "remaining_time": "1:15:57", "throughput": 19959.57, "total_tokens": 154846528}
|
|
{"current_steps": 49205, "total_steps": 78105, "loss": 0.085, "lr": 1.811159606173346e-06, "epoch": 3.149926381153575, "percentage": 63.0, "elapsed_time": "2:09:18", "remaining_time": "1:15:56", "throughput": 19959.85, "total_tokens": 154862336}
|
|
{"current_steps": 49210, "total_steps": 78105, "loss": 0.1819, "lr": 1.810622596064787e-06, "epoch": 3.150246463094552, "percentage": 63.0, "elapsed_time": "2:09:19", "remaining_time": "1:15:56", "throughput": 19960.07, "total_tokens": 154876992}
|
|
{"current_steps": 49215, "total_steps": 78105, "loss": 0.1521, "lr": 1.8100856203801458e-06, "epoch": 3.150566545035529, "percentage": 63.01, "elapsed_time": "2:09:20", "remaining_time": "1:15:55", "throughput": 19960.36, "total_tokens": 154892864}
|
|
{"current_steps": 49220, "total_steps": 78105, "loss": 0.2068, "lr": 1.8095486791462342e-06, "epoch": 3.150886626976506, "percentage": 63.02, "elapsed_time": "2:09:20", "remaining_time": "1:15:54", "throughput": 19960.62, "total_tokens": 154908096}
|
|
{"current_steps": 49225, "total_steps": 78105, "loss": 0.0886, "lr": 1.8090117723898663e-06, "epoch": 3.1512067089174827, "percentage": 63.02, "elapsed_time": "2:09:21", "remaining_time": "1:15:53", "throughput": 19960.9, "total_tokens": 154923584}
|
|
{"current_steps": 49230, "total_steps": 78105, "loss": 0.2037, "lr": 1.8084749001378499e-06, "epoch": 3.1515267908584597, "percentage": 63.03, "elapsed_time": "2:09:21", "remaining_time": "1:15:52", "throughput": 19961.15, "total_tokens": 154938432}
|
|
{"current_steps": 49235, "total_steps": 78105, "loss": 0.1023, "lr": 1.8079380624169955e-06, "epoch": 3.1518468727994366, "percentage": 63.04, "elapsed_time": "2:09:22", "remaining_time": "1:15:51", "throughput": 19961.45, "total_tokens": 154954368}
|
|
{"current_steps": 49240, "total_steps": 78105, "loss": 0.3264, "lr": 1.8074012592541102e-06, "epoch": 3.1521669547404136, "percentage": 63.04, "elapsed_time": "2:09:23", "remaining_time": "1:15:50", "throughput": 19961.73, "total_tokens": 154970240}
|
|
{"current_steps": 49245, "total_steps": 78105, "loss": 0.1556, "lr": 1.8068644906759973e-06, "epoch": 3.1524870366813906, "percentage": 63.05, "elapsed_time": "2:09:24", "remaining_time": "1:15:50", "throughput": 19961.97, "total_tokens": 154984896}
|
|
{"current_steps": 49250, "total_steps": 78105, "loss": 0.1864, "lr": 1.8063277567094617e-06, "epoch": 3.152807118622367, "percentage": 63.06, "elapsed_time": "2:09:24", "remaining_time": "1:15:49", "throughput": 19962.26, "total_tokens": 155000768}
|
|
{"current_steps": 49255, "total_steps": 78105, "loss": 0.2295, "lr": 1.8057910573813042e-06, "epoch": 3.153127200563344, "percentage": 63.06, "elapsed_time": "2:09:25", "remaining_time": "1:15:48", "throughput": 19962.68, "total_tokens": 155019072}
|
|
{"current_steps": 49260, "total_steps": 78105, "loss": 0.2016, "lr": 1.805254392718327e-06, "epoch": 3.153447282504321, "percentage": 63.07, "elapsed_time": "2:09:26", "remaining_time": "1:15:47", "throughput": 19962.97, "total_tokens": 155035072}
|
|
{"current_steps": 49265, "total_steps": 78105, "loss": 0.1776, "lr": 1.804717762747325e-06, "epoch": 3.153767364445298, "percentage": 63.08, "elapsed_time": "2:09:26", "remaining_time": "1:15:46", "throughput": 19963.22, "total_tokens": 155049920}
|
|
{"current_steps": 49270, "total_steps": 78105, "loss": 0.0771, "lr": 1.804181167495097e-06, "epoch": 3.1540874463862747, "percentage": 63.08, "elapsed_time": "2:09:27", "remaining_time": "1:15:45", "throughput": 19963.55, "total_tokens": 155066432}
|
|
{"current_steps": 49275, "total_steps": 78105, "loss": 0.1546, "lr": 1.8036446069884362e-06, "epoch": 3.1544075283272517, "percentage": 63.09, "elapsed_time": "2:09:28", "remaining_time": "1:15:45", "throughput": 19963.84, "total_tokens": 155082496}
|
|
{"current_steps": 49280, "total_steps": 78105, "loss": 0.1381, "lr": 1.803108081254138e-06, "epoch": 3.1547276102682287, "percentage": 63.09, "elapsed_time": "2:09:28", "remaining_time": "1:15:44", "throughput": 19964.21, "total_tokens": 155099904}
|
|
{"current_steps": 49285, "total_steps": 78105, "loss": 0.1278, "lr": 1.802571590318991e-06, "epoch": 3.1550476922092057, "percentage": 63.1, "elapsed_time": "2:09:29", "remaining_time": "1:15:43", "throughput": 19964.47, "total_tokens": 155114880}
|
|
{"current_steps": 49290, "total_steps": 78105, "loss": 0.2006, "lr": 1.8020351342097864e-06, "epoch": 3.155367774150182, "percentage": 63.11, "elapsed_time": "2:09:30", "remaining_time": "1:15:42", "throughput": 19964.75, "total_tokens": 155130496}
|
|
{"current_steps": 49295, "total_steps": 78105, "loss": 0.1725, "lr": 1.8014987129533123e-06, "epoch": 3.155687856091159, "percentage": 63.11, "elapsed_time": "2:09:30", "remaining_time": "1:15:41", "throughput": 19965.02, "total_tokens": 155146240}
|
|
{"current_steps": 49300, "total_steps": 78105, "loss": 0.1611, "lr": 1.8009623265763531e-06, "epoch": 3.156007938032136, "percentage": 63.12, "elapsed_time": "2:09:31", "remaining_time": "1:15:40", "throughput": 19965.29, "total_tokens": 155161984}
|
|
{"current_steps": 49305, "total_steps": 78105, "loss": 0.1304, "lr": 1.800425975105695e-06, "epoch": 3.156328019973113, "percentage": 63.13, "elapsed_time": "2:09:32", "remaining_time": "1:15:39", "throughput": 19965.55, "total_tokens": 155177408}
|
|
{"current_steps": 49310, "total_steps": 78105, "loss": 0.1918, "lr": 1.7998896585681189e-06, "epoch": 3.15664810191409, "percentage": 63.13, "elapsed_time": "2:09:32", "remaining_time": "1:15:39", "throughput": 19965.9, "total_tokens": 155194496}
|
|
{"current_steps": 49315, "total_steps": 78105, "loss": 0.1523, "lr": 1.7993533769904076e-06, "epoch": 3.1569681838550667, "percentage": 63.14, "elapsed_time": "2:09:33", "remaining_time": "1:15:38", "throughput": 19966.21, "total_tokens": 155210560}
|
|
{"current_steps": 49320, "total_steps": 78105, "loss": 0.1641, "lr": 1.7988171303993383e-06, "epoch": 3.1572882657960437, "percentage": 63.15, "elapsed_time": "2:09:34", "remaining_time": "1:15:37", "throughput": 19966.49, "total_tokens": 155226368}
|
|
{"current_steps": 49325, "total_steps": 78105, "loss": 0.2784, "lr": 1.79828091882169e-06, "epoch": 3.1576083477370207, "percentage": 63.15, "elapsed_time": "2:09:35", "remaining_time": "1:15:36", "throughput": 19966.74, "total_tokens": 155241472}
|
|
{"current_steps": 49330, "total_steps": 78105, "loss": 0.1695, "lr": 1.7977447422842364e-06, "epoch": 3.1579284296779977, "percentage": 63.16, "elapsed_time": "2:09:35", "remaining_time": "1:15:35", "throughput": 19966.99, "total_tokens": 155256640}
|
|
{"current_steps": 49335, "total_steps": 78105, "loss": 0.1801, "lr": 1.797208600813753e-06, "epoch": 3.1582485116189742, "percentage": 63.16, "elapsed_time": "2:09:36", "remaining_time": "1:15:34", "throughput": 19967.32, "total_tokens": 155273024}
|
|
{"current_steps": 49340, "total_steps": 78105, "loss": 0.2074, "lr": 1.7966724944370108e-06, "epoch": 3.1585685935599512, "percentage": 63.17, "elapsed_time": "2:09:37", "remaining_time": "1:15:33", "throughput": 19967.61, "total_tokens": 155289088}
|
|
{"current_steps": 49345, "total_steps": 78105, "loss": 0.1932, "lr": 1.7961364231807822e-06, "epoch": 3.1588886755009282, "percentage": 63.18, "elapsed_time": "2:09:37", "remaining_time": "1:15:33", "throughput": 19967.94, "total_tokens": 155305728}
|
|
{"current_steps": 49350, "total_steps": 78105, "loss": 0.2204, "lr": 1.795600387071833e-06, "epoch": 3.159208757441905, "percentage": 63.18, "elapsed_time": "2:09:38", "remaining_time": "1:15:32", "throughput": 19968.23, "total_tokens": 155321792}
|
|
{"current_steps": 49355, "total_steps": 78105, "loss": 0.2136, "lr": 1.795064386136931e-06, "epoch": 3.159528839382882, "percentage": 63.19, "elapsed_time": "2:09:39", "remaining_time": "1:15:31", "throughput": 19968.63, "total_tokens": 155339264}
|
|
{"current_steps": 49360, "total_steps": 78105, "loss": 0.1362, "lr": 1.7945284204028428e-06, "epoch": 3.1598489213238588, "percentage": 63.2, "elapsed_time": "2:09:39", "remaining_time": "1:15:30", "throughput": 19968.86, "total_tokens": 155354304}
|
|
{"current_steps": 49365, "total_steps": 78105, "loss": 0.275, "lr": 1.79399248989633e-06, "epoch": 3.1601690032648357, "percentage": 63.2, "elapsed_time": "2:09:40", "remaining_time": "1:15:29", "throughput": 19969.11, "total_tokens": 155369152}
|
|
{"current_steps": 49370, "total_steps": 78105, "loss": 0.2137, "lr": 1.7934565946441556e-06, "epoch": 3.1604890852058127, "percentage": 63.21, "elapsed_time": "2:09:41", "remaining_time": "1:15:28", "throughput": 19969.42, "total_tokens": 155384896}
|
|
{"current_steps": 49375, "total_steps": 78105, "loss": 0.1578, "lr": 1.7929207346730776e-06, "epoch": 3.1608091671467897, "percentage": 63.22, "elapsed_time": "2:09:41", "remaining_time": "1:15:28", "throughput": 19969.69, "total_tokens": 155400512}
|
|
{"current_steps": 49380, "total_steps": 78105, "loss": 0.1719, "lr": 1.7923849100098562e-06, "epoch": 3.1611292490877663, "percentage": 63.22, "elapsed_time": "2:09:42", "remaining_time": "1:15:27", "throughput": 19970.0, "total_tokens": 155416512}
|
|
{"current_steps": 49385, "total_steps": 78105, "loss": 0.169, "lr": 1.7918491206812455e-06, "epoch": 3.1614493310287433, "percentage": 63.23, "elapsed_time": "2:09:43", "remaining_time": "1:15:26", "throughput": 19970.24, "total_tokens": 155431296}
|
|
{"current_steps": 49390, "total_steps": 78105, "loss": 0.1524, "lr": 1.7913133667140014e-06, "epoch": 3.1617694129697202, "percentage": 63.24, "elapsed_time": "2:09:43", "remaining_time": "1:15:25", "throughput": 19970.54, "total_tokens": 155447296}
|
|
{"current_steps": 49395, "total_steps": 78105, "loss": 0.1339, "lr": 1.7907776481348763e-06, "epoch": 3.1620894949106972, "percentage": 63.24, "elapsed_time": "2:09:44", "remaining_time": "1:15:24", "throughput": 19970.83, "total_tokens": 155463104}
|
|
{"current_steps": 49400, "total_steps": 78105, "loss": 0.1519, "lr": 1.790241964970622e-06, "epoch": 3.1624095768516742, "percentage": 63.25, "elapsed_time": "2:09:45", "remaining_time": "1:15:23", "throughput": 19971.12, "total_tokens": 155478784}
|
|
{"current_steps": 49405, "total_steps": 78105, "loss": 0.1732, "lr": 1.7897063172479862e-06, "epoch": 3.1627296587926508, "percentage": 63.25, "elapsed_time": "2:09:45", "remaining_time": "1:15:22", "throughput": 19971.47, "total_tokens": 155495616}
|
|
{"current_steps": 49410, "total_steps": 78105, "loss": 0.1998, "lr": 1.7891707049937168e-06, "epoch": 3.1630497407336278, "percentage": 63.26, "elapsed_time": "2:09:46", "remaining_time": "1:15:22", "throughput": 19971.85, "total_tokens": 155512640}
|
|
{"current_steps": 49415, "total_steps": 78105, "loss": 0.1176, "lr": 1.7886351282345604e-06, "epoch": 3.1633698226746048, "percentage": 63.27, "elapsed_time": "2:09:47", "remaining_time": "1:15:21", "throughput": 19972.15, "total_tokens": 155528576}
|
|
{"current_steps": 49420, "total_steps": 78105, "loss": 0.1531, "lr": 1.7880995869972598e-06, "epoch": 3.1636899046155817, "percentage": 63.27, "elapsed_time": "2:09:47", "remaining_time": "1:15:20", "throughput": 19972.45, "total_tokens": 155544704}
|
|
{"current_steps": 49425, "total_steps": 78105, "loss": 0.1638, "lr": 1.7875640813085587e-06, "epoch": 3.1640099865565583, "percentage": 63.28, "elapsed_time": "2:09:48", "remaining_time": "1:15:19", "throughput": 19972.75, "total_tokens": 155560704}
|
|
{"current_steps": 49430, "total_steps": 78105, "loss": 0.1806, "lr": 1.7870286111951957e-06, "epoch": 3.1643300684975353, "percentage": 63.29, "elapsed_time": "2:09:49", "remaining_time": "1:15:18", "throughput": 19973.0, "total_tokens": 155575680}
|
|
{"current_steps": 49435, "total_steps": 78105, "loss": 0.1671, "lr": 1.786493176683911e-06, "epoch": 3.1646501504385123, "percentage": 63.29, "elapsed_time": "2:09:50", "remaining_time": "1:15:17", "throughput": 19973.33, "total_tokens": 155592448}
|
|
{"current_steps": 49440, "total_steps": 78105, "loss": 0.1551, "lr": 1.7859577778014393e-06, "epoch": 3.1649702323794893, "percentage": 63.3, "elapsed_time": "2:09:50", "remaining_time": "1:15:16", "throughput": 19973.64, "total_tokens": 155608512}
|
|
{"current_steps": 49445, "total_steps": 78105, "loss": 0.1647, "lr": 1.7854224145745175e-06, "epoch": 3.1652903143204663, "percentage": 63.31, "elapsed_time": "2:09:51", "remaining_time": "1:15:16", "throughput": 19973.96, "total_tokens": 155625088}
|
|
{"current_steps": 49450, "total_steps": 78105, "loss": 0.1323, "lr": 1.7848870870298777e-06, "epoch": 3.165610396261443, "percentage": 63.31, "elapsed_time": "2:09:52", "remaining_time": "1:15:15", "throughput": 19974.27, "total_tokens": 155641344}
|
|
{"current_steps": 49455, "total_steps": 78105, "loss": 0.1538, "lr": 1.784351795194253e-06, "epoch": 3.16593047820242, "percentage": 63.32, "elapsed_time": "2:09:52", "remaining_time": "1:15:14", "throughput": 19974.55, "total_tokens": 155657088}
|
|
{"current_steps": 49460, "total_steps": 78105, "loss": 0.2068, "lr": 1.7838165390943706e-06, "epoch": 3.166250560143397, "percentage": 63.33, "elapsed_time": "2:09:53", "remaining_time": "1:15:13", "throughput": 19974.87, "total_tokens": 155673280}
|
|
{"current_steps": 49465, "total_steps": 78105, "loss": 0.1617, "lr": 1.783281318756961e-06, "epoch": 3.1665706420843738, "percentage": 63.33, "elapsed_time": "2:09:54", "remaining_time": "1:15:12", "throughput": 19975.14, "total_tokens": 155688832}
|
|
{"current_steps": 49470, "total_steps": 78105, "loss": 0.1426, "lr": 1.7827461342087483e-06, "epoch": 3.1668907240253503, "percentage": 63.34, "elapsed_time": "2:09:54", "remaining_time": "1:15:11", "throughput": 19975.38, "total_tokens": 155703552}
|
|
{"current_steps": 49475, "total_steps": 78105, "loss": 0.18, "lr": 1.7822109854764575e-06, "epoch": 3.1672108059663273, "percentage": 63.34, "elapsed_time": "2:09:55", "remaining_time": "1:15:11", "throughput": 19975.72, "total_tokens": 155720384}
|
|
{"current_steps": 49480, "total_steps": 78105, "loss": 0.1643, "lr": 1.7816758725868117e-06, "epoch": 3.1675308879073043, "percentage": 63.35, "elapsed_time": "2:09:56", "remaining_time": "1:15:10", "throughput": 19976.04, "total_tokens": 155736960}
|
|
{"current_steps": 49485, "total_steps": 78105, "loss": 0.1909, "lr": 1.7811407955665306e-06, "epoch": 3.1678509698482813, "percentage": 63.36, "elapsed_time": "2:09:56", "remaining_time": "1:15:09", "throughput": 19976.31, "total_tokens": 155752320}
|
|
{"current_steps": 49490, "total_steps": 78105, "loss": 0.1776, "lr": 1.780605754442335e-06, "epoch": 3.1681710517892583, "percentage": 63.36, "elapsed_time": "2:09:57", "remaining_time": "1:15:08", "throughput": 19976.57, "total_tokens": 155767616}
|
|
{"current_steps": 49495, "total_steps": 78105, "loss": 0.1464, "lr": 1.7800707492409398e-06, "epoch": 3.168491133730235, "percentage": 63.37, "elapsed_time": "2:09:58", "remaining_time": "1:15:07", "throughput": 19976.94, "total_tokens": 155785088}
|
|
{"current_steps": 49500, "total_steps": 78105, "loss": 0.1997, "lr": 1.7795357799890617e-06, "epoch": 3.168811215671212, "percentage": 63.38, "elapsed_time": "2:09:58", "remaining_time": "1:15:06", "throughput": 19977.25, "total_tokens": 155801600}
|
|
{"current_steps": 49505, "total_steps": 78105, "loss": 0.1413, "lr": 1.7790008467134136e-06, "epoch": 3.169131297612189, "percentage": 63.38, "elapsed_time": "2:09:59", "remaining_time": "1:15:06", "throughput": 19977.61, "total_tokens": 155818496}
|
|
{"current_steps": 49510, "total_steps": 78105, "loss": 0.1477, "lr": 1.7784659494407085e-06, "epoch": 3.169451379553166, "percentage": 63.39, "elapsed_time": "2:10:00", "remaining_time": "1:15:05", "throughput": 19977.86, "total_tokens": 155833664}
|
|
{"current_steps": 49515, "total_steps": 78105, "loss": 0.168, "lr": 1.777931088197655e-06, "epoch": 3.1697714614941424, "percentage": 63.4, "elapsed_time": "2:10:01", "remaining_time": "1:15:04", "throughput": 19978.2, "total_tokens": 155850496}
|
|
{"current_steps": 49520, "total_steps": 78105, "loss": 0.1681, "lr": 1.7773962630109625e-06, "epoch": 3.1700915434351193, "percentage": 63.4, "elapsed_time": "2:10:01", "remaining_time": "1:15:03", "throughput": 19978.46, "total_tokens": 155865664}
|
|
{"current_steps": 49525, "total_steps": 78105, "loss": 0.2676, "lr": 1.776861473907336e-06, "epoch": 3.1704116253760963, "percentage": 63.41, "elapsed_time": "2:10:02", "remaining_time": "1:15:02", "throughput": 19978.72, "total_tokens": 155881088}
|
|
{"current_steps": 49530, "total_steps": 78105, "loss": 0.1935, "lr": 1.7763267209134805e-06, "epoch": 3.1707317073170733, "percentage": 63.41, "elapsed_time": "2:10:03", "remaining_time": "1:15:01", "throughput": 19978.99, "total_tokens": 155896640}
|
|
{"current_steps": 49535, "total_steps": 78105, "loss": 0.1933, "lr": 1.7757920040560996e-06, "epoch": 3.17105178925805, "percentage": 63.42, "elapsed_time": "2:10:03", "remaining_time": "1:15:00", "throughput": 19979.23, "total_tokens": 155911616}
|
|
{"current_steps": 49540, "total_steps": 78105, "loss": 0.1791, "lr": 1.7752573233618936e-06, "epoch": 3.171371871199027, "percentage": 63.43, "elapsed_time": "2:10:04", "remaining_time": "1:15:00", "throughput": 19979.49, "total_tokens": 155926784}
|
|
{"current_steps": 49545, "total_steps": 78105, "loss": 0.1269, "lr": 1.7747226788575622e-06, "epoch": 3.171691953140004, "percentage": 63.43, "elapsed_time": "2:10:05", "remaining_time": "1:14:59", "throughput": 19979.84, "total_tokens": 155943552}
|
|
{"current_steps": 49550, "total_steps": 78105, "loss": 0.1519, "lr": 1.774188070569802e-06, "epoch": 3.172012035080981, "percentage": 63.44, "elapsed_time": "2:10:05", "remaining_time": "1:14:58", "throughput": 19980.1, "total_tokens": 155959040}
|
|
{"current_steps": 49555, "total_steps": 78105, "loss": 0.1188, "lr": 1.773653498525309e-06, "epoch": 3.1723321170219574, "percentage": 63.45, "elapsed_time": "2:10:06", "remaining_time": "1:14:57", "throughput": 19980.38, "total_tokens": 155974592}
|
|
{"current_steps": 49560, "total_steps": 78105, "loss": 0.1247, "lr": 1.7731189627507772e-06, "epoch": 3.1726521989629344, "percentage": 63.45, "elapsed_time": "2:10:07", "remaining_time": "1:14:56", "throughput": 19980.68, "total_tokens": 155990592}
|
|
{"current_steps": 49565, "total_steps": 78105, "loss": 0.105, "lr": 1.772584463272898e-06, "epoch": 3.1729722809039114, "percentage": 63.46, "elapsed_time": "2:10:07", "remaining_time": "1:14:55", "throughput": 19980.94, "total_tokens": 156006144}
|
|
{"current_steps": 49570, "total_steps": 78105, "loss": 0.0919, "lr": 1.7720500001183617e-06, "epoch": 3.1732923628448884, "percentage": 63.47, "elapsed_time": "2:10:08", "remaining_time": "1:14:54", "throughput": 19981.23, "total_tokens": 156022016}
|
|
{"current_steps": 49575, "total_steps": 78105, "loss": 0.1088, "lr": 1.771515573313857e-06, "epoch": 3.1736124447858653, "percentage": 63.47, "elapsed_time": "2:10:09", "remaining_time": "1:14:54", "throughput": 19981.53, "total_tokens": 156037824}
|
|
{"current_steps": 49580, "total_steps": 78105, "loss": 0.2396, "lr": 1.7709811828860693e-06, "epoch": 3.173932526726842, "percentage": 63.48, "elapsed_time": "2:10:09", "remaining_time": "1:14:53", "throughput": 19981.78, "total_tokens": 156053120}
|
|
{"current_steps": 49585, "total_steps": 78105, "loss": 0.2079, "lr": 1.770446828861684e-06, "epoch": 3.174252608667819, "percentage": 63.49, "elapsed_time": "2:10:10", "remaining_time": "1:14:52", "throughput": 19982.01, "total_tokens": 156068160}
|
|
{"current_steps": 49590, "total_steps": 78105, "loss": 0.0824, "lr": 1.7699125112673843e-06, "epoch": 3.174572690608796, "percentage": 63.49, "elapsed_time": "2:10:11", "remaining_time": "1:14:51", "throughput": 19982.32, "total_tokens": 156084480}
|
|
{"current_steps": 49595, "total_steps": 78105, "loss": 0.1434, "lr": 1.7693782301298508e-06, "epoch": 3.174892772549773, "percentage": 63.5, "elapsed_time": "2:10:11", "remaining_time": "1:14:50", "throughput": 19982.61, "total_tokens": 156100736}
|
|
{"current_steps": 49600, "total_steps": 78105, "loss": 0.2225, "lr": 1.7688439854757635e-06, "epoch": 3.1752128544907494, "percentage": 63.5, "elapsed_time": "2:10:12", "remaining_time": "1:14:49", "throughput": 19982.9, "total_tokens": 156117056}
|
|
{"current_steps": 49605, "total_steps": 78105, "loss": 0.1643, "lr": 1.7683097773317981e-06, "epoch": 3.1755329364317264, "percentage": 63.51, "elapsed_time": "2:10:13", "remaining_time": "1:14:48", "throughput": 19983.16, "total_tokens": 156132672}
|
|
{"current_steps": 49610, "total_steps": 78105, "loss": 0.1787, "lr": 1.7677756057246326e-06, "epoch": 3.1758530183727034, "percentage": 63.52, "elapsed_time": "2:10:13", "remaining_time": "1:14:48", "throughput": 19983.44, "total_tokens": 156148480}
|
|
{"current_steps": 49615, "total_steps": 78105, "loss": 0.1366, "lr": 1.7672414706809377e-06, "epoch": 3.1761731003136804, "percentage": 63.52, "elapsed_time": "2:10:14", "remaining_time": "1:14:47", "throughput": 19983.73, "total_tokens": 156164160}
|
|
{"current_steps": 49620, "total_steps": 78105, "loss": 0.1075, "lr": 1.7667073722273877e-06, "epoch": 3.1764931822546574, "percentage": 63.53, "elapsed_time": "2:10:15", "remaining_time": "1:14:46", "throughput": 19983.97, "total_tokens": 156179008}
|
|
{"current_steps": 49625, "total_steps": 78105, "loss": 0.2051, "lr": 1.7661733103906517e-06, "epoch": 3.176813264195634, "percentage": 63.54, "elapsed_time": "2:10:15", "remaining_time": "1:14:45", "throughput": 19984.28, "total_tokens": 156195072}
|
|
{"current_steps": 49630, "total_steps": 78105, "loss": 0.1316, "lr": 1.765639285197399e-06, "epoch": 3.177133346136611, "percentage": 63.54, "elapsed_time": "2:10:16", "remaining_time": "1:14:44", "throughput": 19984.54, "total_tokens": 156210560}
|
|
{"current_steps": 49635, "total_steps": 78105, "loss": 0.1347, "lr": 1.7651052966742943e-06, "epoch": 3.177453428077588, "percentage": 63.55, "elapsed_time": "2:10:17", "remaining_time": "1:14:43", "throughput": 19984.8, "total_tokens": 156226240}
|
|
{"current_steps": 49640, "total_steps": 78105, "loss": 0.122, "lr": 1.7645713448480037e-06, "epoch": 3.177773510018565, "percentage": 63.56, "elapsed_time": "2:10:17", "remaining_time": "1:14:43", "throughput": 19985.13, "total_tokens": 156242880}
|
|
{"current_steps": 49645, "total_steps": 78105, "loss": 0.1503, "lr": 1.764037429745189e-06, "epoch": 3.1780935919595414, "percentage": 63.56, "elapsed_time": "2:10:18", "remaining_time": "1:14:42", "throughput": 19985.41, "total_tokens": 156258432}
|
|
{"current_steps": 49650, "total_steps": 78105, "loss": 0.1754, "lr": 1.7635035513925115e-06, "epoch": 3.1784136739005184, "percentage": 63.57, "elapsed_time": "2:10:19", "remaining_time": "1:14:41", "throughput": 19985.67, "total_tokens": 156273664}
|
|
{"current_steps": 49655, "total_steps": 78105, "loss": 0.231, "lr": 1.762969709816631e-06, "epoch": 3.1787337558414954, "percentage": 63.57, "elapsed_time": "2:10:19", "remaining_time": "1:14:40", "throughput": 19985.96, "total_tokens": 156289792}
|
|
{"current_steps": 49660, "total_steps": 78105, "loss": 0.2028, "lr": 1.7624359050442037e-06, "epoch": 3.1790538377824724, "percentage": 63.58, "elapsed_time": "2:10:20", "remaining_time": "1:14:39", "throughput": 19986.25, "total_tokens": 156305408}
|
|
{"current_steps": 49665, "total_steps": 78105, "loss": 0.1313, "lr": 1.761902137101886e-06, "epoch": 3.1793739197234494, "percentage": 63.59, "elapsed_time": "2:10:21", "remaining_time": "1:14:38", "throughput": 19986.51, "total_tokens": 156320896}
|
|
{"current_steps": 49670, "total_steps": 78105, "loss": 0.1381, "lr": 1.7613684060163303e-06, "epoch": 3.179694001664426, "percentage": 63.59, "elapsed_time": "2:10:21", "remaining_time": "1:14:37", "throughput": 19986.79, "total_tokens": 156336576}
|
|
{"current_steps": 49675, "total_steps": 78105, "loss": 0.1875, "lr": 1.7608347118141894e-06, "epoch": 3.180014083605403, "percentage": 63.6, "elapsed_time": "2:10:22", "remaining_time": "1:14:37", "throughput": 19987.05, "total_tokens": 156351872}
|
|
{"current_steps": 49680, "total_steps": 78105, "loss": 0.1498, "lr": 1.7603010545221123e-06, "epoch": 3.18033416554638, "percentage": 63.61, "elapsed_time": "2:10:23", "remaining_time": "1:14:36", "throughput": 19987.3, "total_tokens": 156366976}
|
|
{"current_steps": 49685, "total_steps": 78105, "loss": 0.2003, "lr": 1.7597674341667487e-06, "epoch": 3.180654247487357, "percentage": 63.61, "elapsed_time": "2:10:24", "remaining_time": "1:14:35", "throughput": 19987.6, "total_tokens": 156383104}
|
|
{"current_steps": 49690, "total_steps": 78105, "loss": 0.1034, "lr": 1.7592338507747425e-06, "epoch": 3.1809743294283335, "percentage": 63.62, "elapsed_time": "2:10:24", "remaining_time": "1:14:34", "throughput": 19987.88, "total_tokens": 156398720}
|
|
{"current_steps": 49695, "total_steps": 78105, "loss": 0.2642, "lr": 1.7587003043727403e-06, "epoch": 3.1812944113693105, "percentage": 63.63, "elapsed_time": "2:10:25", "remaining_time": "1:14:33", "throughput": 19988.14, "total_tokens": 156414080}
|
|
{"current_steps": 49700, "total_steps": 78105, "loss": 0.1386, "lr": 1.7581667949873827e-06, "epoch": 3.1816144933102875, "percentage": 63.63, "elapsed_time": "2:10:25", "remaining_time": "1:14:32", "throughput": 19988.4, "total_tokens": 156429056}
|
|
{"current_steps": 49705, "total_steps": 78105, "loss": 0.1463, "lr": 1.7576333226453113e-06, "epoch": 3.1819345752512644, "percentage": 63.64, "elapsed_time": "2:10:26", "remaining_time": "1:14:31", "throughput": 19988.71, "total_tokens": 156445184}
|
|
{"current_steps": 49710, "total_steps": 78105, "loss": 0.1941, "lr": 1.7570998873731654e-06, "epoch": 3.1822546571922414, "percentage": 63.65, "elapsed_time": "2:10:27", "remaining_time": "1:14:31", "throughput": 19988.98, "total_tokens": 156460864}
|
|
{"current_steps": 49715, "total_steps": 78105, "loss": 0.2133, "lr": 1.756566489197581e-06, "epoch": 3.182574739133218, "percentage": 63.65, "elapsed_time": "2:10:28", "remaining_time": "1:14:30", "throughput": 19989.38, "total_tokens": 156478464}
|
|
{"current_steps": 49720, "total_steps": 78105, "loss": 0.1716, "lr": 1.7560331281451947e-06, "epoch": 3.182894821074195, "percentage": 63.66, "elapsed_time": "2:10:28", "remaining_time": "1:14:29", "throughput": 19989.7, "total_tokens": 156495296}
|
|
{"current_steps": 49725, "total_steps": 78105, "loss": 0.1428, "lr": 1.755499804242638e-06, "epoch": 3.183214903015172, "percentage": 63.66, "elapsed_time": "2:10:29", "remaining_time": "1:14:28", "throughput": 19989.97, "total_tokens": 156510464}
|
|
{"current_steps": 49730, "total_steps": 78105, "loss": 0.1476, "lr": 1.7549665175165435e-06, "epoch": 3.183534984956149, "percentage": 63.67, "elapsed_time": "2:10:30", "remaining_time": "1:14:27", "throughput": 19990.21, "total_tokens": 156525632}
|
|
{"current_steps": 49735, "total_steps": 78105, "loss": 0.1398, "lr": 1.7544332679935397e-06, "epoch": 3.1838550668971255, "percentage": 63.68, "elapsed_time": "2:10:30", "remaining_time": "1:14:26", "throughput": 19990.5, "total_tokens": 156541568}
|
|
{"current_steps": 49740, "total_steps": 78105, "loss": 0.1856, "lr": 1.753900055700256e-06, "epoch": 3.1841751488381025, "percentage": 63.68, "elapsed_time": "2:10:31", "remaining_time": "1:14:26", "throughput": 19990.73, "total_tokens": 156556288}
|
|
{"current_steps": 49745, "total_steps": 78105, "loss": 0.1636, "lr": 1.7533668806633163e-06, "epoch": 3.1844952307790795, "percentage": 63.69, "elapsed_time": "2:10:32", "remaining_time": "1:14:25", "throughput": 19990.96, "total_tokens": 156571328}
|
|
{"current_steps": 49750, "total_steps": 78105, "loss": 0.2179, "lr": 1.7528337429093468e-06, "epoch": 3.1848153127200565, "percentage": 63.7, "elapsed_time": "2:10:32", "remaining_time": "1:14:24", "throughput": 19991.21, "total_tokens": 156586432}
|
|
{"current_steps": 49755, "total_steps": 78105, "loss": 0.1316, "lr": 1.7523006424649668e-06, "epoch": 3.1851353946610335, "percentage": 63.7, "elapsed_time": "2:10:33", "remaining_time": "1:14:23", "throughput": 19991.52, "total_tokens": 156602560}
|
|
{"current_steps": 49760, "total_steps": 78105, "loss": 0.148, "lr": 1.7517675793567984e-06, "epoch": 3.18545547660201, "percentage": 63.71, "elapsed_time": "2:10:34", "remaining_time": "1:14:22", "throughput": 19991.78, "total_tokens": 156617984}
|
|
{"current_steps": 49765, "total_steps": 78105, "loss": 0.1879, "lr": 1.7512345536114605e-06, "epoch": 3.185775558542987, "percentage": 63.72, "elapsed_time": "2:10:34", "remaining_time": "1:14:21", "throughput": 19992.08, "total_tokens": 156634304}
|
|
{"current_steps": 49770, "total_steps": 78105, "loss": 0.1933, "lr": 1.750701565255568e-06, "epoch": 3.186095640483964, "percentage": 63.72, "elapsed_time": "2:10:35", "remaining_time": "1:14:20", "throughput": 19992.38, "total_tokens": 156650304}
|
|
{"current_steps": 49775, "total_steps": 78105, "loss": 0.1263, "lr": 1.750168614315737e-06, "epoch": 3.186415722424941, "percentage": 63.73, "elapsed_time": "2:10:36", "remaining_time": "1:14:20", "throughput": 19992.67, "total_tokens": 156666112}
|
|
{"current_steps": 49780, "total_steps": 78105, "loss": 0.1138, "lr": 1.7496357008185793e-06, "epoch": 3.1867358043659175, "percentage": 63.73, "elapsed_time": "2:10:36", "remaining_time": "1:14:19", "throughput": 19992.95, "total_tokens": 156681728}
|
|
{"current_steps": 49785, "total_steps": 78105, "loss": 0.1, "lr": 1.7491028247907063e-06, "epoch": 3.1870558863068945, "percentage": 63.74, "elapsed_time": "2:10:37", "remaining_time": "1:14:18", "throughput": 19993.17, "total_tokens": 156696320}
|
|
{"current_steps": 49790, "total_steps": 78105, "loss": 0.1686, "lr": 1.7485699862587267e-06, "epoch": 3.1873759682478715, "percentage": 63.75, "elapsed_time": "2:10:38", "remaining_time": "1:14:17", "throughput": 19993.51, "total_tokens": 156713088}
|
|
{"current_steps": 49795, "total_steps": 78105, "loss": 0.1719, "lr": 1.748037185249249e-06, "epoch": 3.1876960501888485, "percentage": 63.75, "elapsed_time": "2:10:38", "remaining_time": "1:14:16", "throughput": 19993.76, "total_tokens": 156728192}
|
|
{"current_steps": 49800, "total_steps": 78105, "loss": 0.1797, "lr": 1.747504421788876e-06, "epoch": 3.188016132129825, "percentage": 63.76, "elapsed_time": "2:10:39", "remaining_time": "1:14:15", "throughput": 19994.01, "total_tokens": 156743360}
|
|
{"current_steps": 49805, "total_steps": 78105, "loss": 0.2801, "lr": 1.7469716959042137e-06, "epoch": 3.188336214070802, "percentage": 63.77, "elapsed_time": "2:10:40", "remaining_time": "1:14:14", "throughput": 19994.39, "total_tokens": 156760960}
|
|
{"current_steps": 49810, "total_steps": 78105, "loss": 0.1715, "lr": 1.7464390076218618e-06, "epoch": 3.188656296011779, "percentage": 63.77, "elapsed_time": "2:10:40", "remaining_time": "1:14:14", "throughput": 19994.64, "total_tokens": 156775872}
|
|
{"current_steps": 49815, "total_steps": 78105, "loss": 0.1884, "lr": 1.7459063569684209e-06, "epoch": 3.188976377952756, "percentage": 63.78, "elapsed_time": "2:10:41", "remaining_time": "1:14:13", "throughput": 19994.88, "total_tokens": 156791040}
|
|
{"current_steps": 49820, "total_steps": 78105, "loss": 0.1933, "lr": 1.7453737439704882e-06, "epoch": 3.1892964598937326, "percentage": 63.79, "elapsed_time": "2:10:42", "remaining_time": "1:14:12", "throughput": 19995.2, "total_tokens": 156807360}
|
|
{"current_steps": 49825, "total_steps": 78105, "loss": 0.163, "lr": 1.7448411686546602e-06, "epoch": 3.1896165418347096, "percentage": 63.79, "elapsed_time": "2:10:42", "remaining_time": "1:14:11", "throughput": 19995.45, "total_tokens": 156822656}
|
|
{"current_steps": 49830, "total_steps": 78105, "loss": 0.2076, "lr": 1.7443086310475315e-06, "epoch": 3.1899366237756865, "percentage": 63.8, "elapsed_time": "2:10:43", "remaining_time": "1:14:10", "throughput": 19995.69, "total_tokens": 156837824}
|
|
{"current_steps": 49835, "total_steps": 78105, "loss": 0.1561, "lr": 1.7437761311756927e-06, "epoch": 3.1902567057166635, "percentage": 63.81, "elapsed_time": "2:10:44", "remaining_time": "1:14:09", "throughput": 19996.0, "total_tokens": 156853952}
|
|
{"current_steps": 49840, "total_steps": 78105, "loss": 0.2274, "lr": 1.7432436690657357e-06, "epoch": 3.1905767876576405, "percentage": 63.81, "elapsed_time": "2:10:44", "remaining_time": "1:14:08", "throughput": 19996.3, "total_tokens": 156870144}
|
|
{"current_steps": 49845, "total_steps": 78105, "loss": 0.1089, "lr": 1.7427112447442474e-06, "epoch": 3.190896869598617, "percentage": 63.82, "elapsed_time": "2:10:45", "remaining_time": "1:14:08", "throughput": 19996.6, "total_tokens": 156885952}
|
|
{"current_steps": 49850, "total_steps": 78105, "loss": 0.1579, "lr": 1.7421788582378153e-06, "epoch": 3.191216951539594, "percentage": 63.82, "elapsed_time": "2:10:46", "remaining_time": "1:14:07", "throughput": 19996.9, "total_tokens": 156901824}
|
|
{"current_steps": 49855, "total_steps": 78105, "loss": 0.2451, "lr": 1.741646509573023e-06, "epoch": 3.191537033480571, "percentage": 63.83, "elapsed_time": "2:10:46", "remaining_time": "1:14:06", "throughput": 19997.18, "total_tokens": 156917440}
|
|
{"current_steps": 49860, "total_steps": 78105, "loss": 0.152, "lr": 1.7411141987764552e-06, "epoch": 3.191857115421548, "percentage": 63.84, "elapsed_time": "2:10:47", "remaining_time": "1:14:05", "throughput": 19997.47, "total_tokens": 156933376}
|
|
{"current_steps": 49865, "total_steps": 78105, "loss": 0.2357, "lr": 1.7405819258746897e-06, "epoch": 3.1921771973625246, "percentage": 63.84, "elapsed_time": "2:10:48", "remaining_time": "1:14:04", "throughput": 19997.73, "total_tokens": 156948736}
|
|
{"current_steps": 49870, "total_steps": 78105, "loss": 0.141, "lr": 1.7400496908943085e-06, "epoch": 3.1924972793035016, "percentage": 63.85, "elapsed_time": "2:10:49", "remaining_time": "1:14:03", "throughput": 19998.04, "total_tokens": 156965120}
|
|
{"current_steps": 49875, "total_steps": 78105, "loss": 0.1475, "lr": 1.7395174938618864e-06, "epoch": 3.1928173612444786, "percentage": 63.86, "elapsed_time": "2:10:49", "remaining_time": "1:14:03", "throughput": 19998.29, "total_tokens": 156980608}
|
|
{"current_steps": 49880, "total_steps": 78105, "loss": 0.1343, "lr": 1.7389853348039996e-06, "epoch": 3.1931374431854556, "percentage": 63.86, "elapsed_time": "2:10:50", "remaining_time": "1:14:02", "throughput": 19998.61, "total_tokens": 156997056}
|
|
{"current_steps": 49885, "total_steps": 78105, "loss": 0.1241, "lr": 1.7384532137472221e-06, "epoch": 3.1934575251264326, "percentage": 63.87, "elapsed_time": "2:10:51", "remaining_time": "1:14:01", "throughput": 19998.89, "total_tokens": 157012800}
|
|
{"current_steps": 49890, "total_steps": 78105, "loss": 0.1486, "lr": 1.7379211307181235e-06, "epoch": 3.193777607067409, "percentage": 63.88, "elapsed_time": "2:10:51", "remaining_time": "1:14:00", "throughput": 19999.26, "total_tokens": 157030144}
|
|
{"current_steps": 49895, "total_steps": 78105, "loss": 0.2988, "lr": 1.7373890857432752e-06, "epoch": 3.194097689008386, "percentage": 63.88, "elapsed_time": "2:10:52", "remaining_time": "1:13:59", "throughput": 19999.51, "total_tokens": 157045312}
|
|
{"current_steps": 49900, "total_steps": 78105, "loss": 0.1294, "lr": 1.7368570788492422e-06, "epoch": 3.194417770949363, "percentage": 63.89, "elapsed_time": "2:10:53", "remaining_time": "1:13:58", "throughput": 19999.86, "total_tokens": 157062400}
|
|
{"current_steps": 49905, "total_steps": 78105, "loss": 0.1612, "lr": 1.7363251100625922e-06, "epoch": 3.19473785289034, "percentage": 63.89, "elapsed_time": "2:10:53", "remaining_time": "1:13:58", "throughput": 20000.21, "total_tokens": 157079488}
|
|
{"current_steps": 49910, "total_steps": 78105, "loss": 0.1299, "lr": 1.735793179409888e-06, "epoch": 3.1950579348313166, "percentage": 63.9, "elapsed_time": "2:10:54", "remaining_time": "1:13:57", "throughput": 20000.55, "total_tokens": 157096320}
|
|
{"current_steps": 49915, "total_steps": 78105, "loss": 0.1561, "lr": 1.7352612869176932e-06, "epoch": 3.1953780167722936, "percentage": 63.91, "elapsed_time": "2:10:55", "remaining_time": "1:13:56", "throughput": 20000.8, "total_tokens": 157111680}
|
|
{"current_steps": 49920, "total_steps": 78105, "loss": 0.1775, "lr": 1.7347294326125646e-06, "epoch": 3.1956980987132706, "percentage": 63.91, "elapsed_time": "2:10:55", "remaining_time": "1:13:55", "throughput": 20001.1, "total_tokens": 157127552}
|
|
{"current_steps": 49925, "total_steps": 78105, "loss": 0.1089, "lr": 1.7341976165210638e-06, "epoch": 3.1960181806542476, "percentage": 63.92, "elapsed_time": "2:10:56", "remaining_time": "1:13:54", "throughput": 20001.39, "total_tokens": 157143488}
|
|
{"current_steps": 49930, "total_steps": 78105, "loss": 0.197, "lr": 1.7336658386697436e-06, "epoch": 3.1963382625952246, "percentage": 63.93, "elapsed_time": "2:10:57", "remaining_time": "1:13:53", "throughput": 20001.64, "total_tokens": 157158720}
|
|
{"current_steps": 49935, "total_steps": 78105, "loss": 0.1638, "lr": 1.7331340990851597e-06, "epoch": 3.196658344536201, "percentage": 63.93, "elapsed_time": "2:10:58", "remaining_time": "1:13:52", "throughput": 20002.0, "total_tokens": 157176192}
|
|
{"current_steps": 49940, "total_steps": 78105, "loss": 0.147, "lr": 1.732602397793865e-06, "epoch": 3.196978426477178, "percentage": 63.94, "elapsed_time": "2:10:58", "remaining_time": "1:13:52", "throughput": 20002.29, "total_tokens": 157192192}
|
|
{"current_steps": 49945, "total_steps": 78105, "loss": 0.1966, "lr": 1.732070734822409e-06, "epoch": 3.197298508418155, "percentage": 63.95, "elapsed_time": "2:10:59", "remaining_time": "1:13:51", "throughput": 20002.59, "total_tokens": 157208384}
|
|
{"current_steps": 49950, "total_steps": 78105, "loss": 0.1784, "lr": 1.7315391101973416e-06, "epoch": 3.197618590359132, "percentage": 63.95, "elapsed_time": "2:11:00", "remaining_time": "1:13:50", "throughput": 20002.91, "total_tokens": 157224832}
|
|
{"current_steps": 49955, "total_steps": 78105, "loss": 0.1672, "lr": 1.7310075239452067e-06, "epoch": 3.1979386723001086, "percentage": 63.96, "elapsed_time": "2:11:00", "remaining_time": "1:13:49", "throughput": 20003.15, "total_tokens": 157240064}
|
|
{"current_steps": 49960, "total_steps": 78105, "loss": 0.2047, "lr": 1.7304759760925516e-06, "epoch": 3.1982587542410856, "percentage": 63.97, "elapsed_time": "2:11:01", "remaining_time": "1:13:48", "throughput": 20003.43, "total_tokens": 157256000}
|
|
{"current_steps": 49965, "total_steps": 78105, "loss": 0.1714, "lr": 1.729944466665917e-06, "epoch": 3.1985788361820626, "percentage": 63.97, "elapsed_time": "2:11:02", "remaining_time": "1:13:47", "throughput": 20003.78, "total_tokens": 157272704}
|
|
{"current_steps": 49970, "total_steps": 78105, "loss": 0.1405, "lr": 1.7294129956918459e-06, "epoch": 3.1988989181230396, "percentage": 63.98, "elapsed_time": "2:11:02", "remaining_time": "1:13:47", "throughput": 20004.11, "total_tokens": 157289664}
|
|
{"current_steps": 49975, "total_steps": 78105, "loss": 0.1312, "lr": 1.7288815631968752e-06, "epoch": 3.1992190000640166, "percentage": 63.98, "elapsed_time": "2:11:03", "remaining_time": "1:13:46", "throughput": 20004.46, "total_tokens": 157306688}
|
|
{"current_steps": 49980, "total_steps": 78105, "loss": 0.1973, "lr": 1.7283501692075432e-06, "epoch": 3.199539082004993, "percentage": 63.99, "elapsed_time": "2:11:04", "remaining_time": "1:13:45", "throughput": 20004.73, "total_tokens": 157322688}
|
|
{"current_steps": 49985, "total_steps": 78105, "loss": 0.1386, "lr": 1.7278188137503837e-06, "epoch": 3.19985916394597, "percentage": 64.0, "elapsed_time": "2:11:04", "remaining_time": "1:13:44", "throughput": 20004.97, "total_tokens": 157337664}
|
|
{"current_steps": 49990, "total_steps": 78105, "loss": 0.1684, "lr": 1.727287496851931e-06, "epoch": 3.200179245886947, "percentage": 64.0, "elapsed_time": "2:11:05", "remaining_time": "1:13:43", "throughput": 20005.24, "total_tokens": 157353472}
|
|
{"current_steps": 49995, "total_steps": 78105, "loss": 0.1185, "lr": 1.7267562185387148e-06, "epoch": 3.200499327827924, "percentage": 64.01, "elapsed_time": "2:11:06", "remaining_time": "1:13:42", "throughput": 20005.49, "total_tokens": 157368768}
|
|
{"current_steps": 50000, "total_steps": 78105, "loss": 0.2063, "lr": 1.7262249788372659e-06, "epoch": 3.2008194097689007, "percentage": 64.02, "elapsed_time": "2:11:06", "remaining_time": "1:13:42", "throughput": 20005.83, "total_tokens": 157385728}
|
|
{"current_steps": 50005, "total_steps": 78105, "loss": 0.2027, "lr": 1.7256937777741117e-06, "epoch": 3.2011394917098777, "percentage": 64.02, "elapsed_time": "2:11:07", "remaining_time": "1:13:41", "throughput": 20006.09, "total_tokens": 157401152}
|
|
{"current_steps": 50010, "total_steps": 78105, "loss": 0.1551, "lr": 1.7251626153757765e-06, "epoch": 3.2014595736508547, "percentage": 64.03, "elapsed_time": "2:11:08", "remaining_time": "1:13:40", "throughput": 20006.36, "total_tokens": 157416576}
|
|
{"current_steps": 50015, "total_steps": 78105, "loss": 0.2308, "lr": 1.7246314916687846e-06, "epoch": 3.2017796555918316, "percentage": 64.04, "elapsed_time": "2:11:09", "remaining_time": "1:13:39", "throughput": 20006.62, "total_tokens": 157432320}
|
|
{"current_steps": 50020, "total_steps": 78105, "loss": 0.1514, "lr": 1.7241004066796566e-06, "epoch": 3.2020997375328086, "percentage": 64.04, "elapsed_time": "2:11:09", "remaining_time": "1:13:38", "throughput": 20006.97, "total_tokens": 157449152}
|
|
{"current_steps": 50025, "total_steps": 78105, "loss": 0.1343, "lr": 1.723569360434914e-06, "epoch": 3.202419819473785, "percentage": 64.05, "elapsed_time": "2:11:10", "remaining_time": "1:13:37", "throughput": 20007.29, "total_tokens": 157465344}
|
|
{"current_steps": 50030, "total_steps": 78105, "loss": 0.1736, "lr": 1.723038352961072e-06, "epoch": 3.202739901414762, "percentage": 64.05, "elapsed_time": "2:11:11", "remaining_time": "1:13:36", "throughput": 20007.52, "total_tokens": 157480256}
|
|
{"current_steps": 50035, "total_steps": 78105, "loss": 0.244, "lr": 1.7225073842846487e-06, "epoch": 3.203059983355739, "percentage": 64.06, "elapsed_time": "2:11:11", "remaining_time": "1:13:36", "throughput": 20007.83, "total_tokens": 157496320}
|
|
{"current_steps": 50040, "total_steps": 78105, "loss": 0.1672, "lr": 1.721976454432156e-06, "epoch": 3.203380065296716, "percentage": 64.07, "elapsed_time": "2:11:12", "remaining_time": "1:13:35", "throughput": 20008.11, "total_tokens": 157512128}
|
|
{"current_steps": 50045, "total_steps": 78105, "loss": 0.1483, "lr": 1.721445563430107e-06, "epoch": 3.2037001472376927, "percentage": 64.07, "elapsed_time": "2:11:13", "remaining_time": "1:13:34", "throughput": 20008.33, "total_tokens": 157526656}
|
|
{"current_steps": 50050, "total_steps": 78105, "loss": 0.0944, "lr": 1.7209147113050106e-06, "epoch": 3.2040202291786697, "percentage": 64.08, "elapsed_time": "2:11:13", "remaining_time": "1:13:33", "throughput": 20008.6, "total_tokens": 157542144}
|
|
{"current_steps": 50055, "total_steps": 78105, "loss": 0.1734, "lr": 1.7203838980833753e-06, "epoch": 3.2043403111196467, "percentage": 64.09, "elapsed_time": "2:11:14", "remaining_time": "1:13:32", "throughput": 20008.86, "total_tokens": 157557888}
|
|
{"current_steps": 50060, "total_steps": 78105, "loss": 0.1445, "lr": 1.7198531237917087e-06, "epoch": 3.2046603930606237, "percentage": 64.09, "elapsed_time": "2:11:15", "remaining_time": "1:13:31", "throughput": 20009.15, "total_tokens": 157573760}
|
|
{"current_steps": 50065, "total_steps": 78105, "loss": 0.132, "lr": 1.7193223884565121e-06, "epoch": 3.2049804750016, "percentage": 64.1, "elapsed_time": "2:11:15", "remaining_time": "1:13:30", "throughput": 20009.39, "total_tokens": 157588928}
|
|
{"current_steps": 50070, "total_steps": 78105, "loss": 0.1412, "lr": 1.7187916921042902e-06, "epoch": 3.205300556942577, "percentage": 64.11, "elapsed_time": "2:11:16", "remaining_time": "1:13:30", "throughput": 20009.62, "total_tokens": 157603776}
|
|
{"current_steps": 50075, "total_steps": 78105, "loss": 0.1607, "lr": 1.7182610347615409e-06, "epoch": 3.205620638883554, "percentage": 64.11, "elapsed_time": "2:11:17", "remaining_time": "1:13:29", "throughput": 20009.92, "total_tokens": 157620160}
|
|
{"current_steps": 50080, "total_steps": 78105, "loss": 0.1085, "lr": 1.7177304164547643e-06, "epoch": 3.205940720824531, "percentage": 64.12, "elapsed_time": "2:11:17", "remaining_time": "1:13:28", "throughput": 20010.16, "total_tokens": 157635136}
|
|
{"current_steps": 50085, "total_steps": 78105, "loss": 0.1608, "lr": 1.7171998372104553e-06, "epoch": 3.206260802765508, "percentage": 64.13, "elapsed_time": "2:11:18", "remaining_time": "1:13:27", "throughput": 20010.37, "total_tokens": 157649728}
|
|
{"current_steps": 50090, "total_steps": 78105, "loss": 0.0994, "lr": 1.71666929705511e-06, "epoch": 3.2065808847064847, "percentage": 64.13, "elapsed_time": "2:11:19", "remaining_time": "1:13:26", "throughput": 20010.64, "total_tokens": 157665152}
|
|
{"current_steps": 50095, "total_steps": 78105, "loss": 0.1194, "lr": 1.7161387960152187e-06, "epoch": 3.2069009666474617, "percentage": 64.14, "elapsed_time": "2:11:19", "remaining_time": "1:13:25", "throughput": 20010.87, "total_tokens": 157680064}
|
|
{"current_steps": 50100, "total_steps": 78105, "loss": 0.1971, "lr": 1.7156083341172735e-06, "epoch": 3.2072210485884387, "percentage": 64.14, "elapsed_time": "2:11:20", "remaining_time": "1:13:24", "throughput": 20011.14, "total_tokens": 157695552}
|
|
{"current_steps": 50105, "total_steps": 78105, "loss": 0.1669, "lr": 1.7150779113877619e-06, "epoch": 3.2075411305294157, "percentage": 64.15, "elapsed_time": "2:11:21", "remaining_time": "1:13:24", "throughput": 20011.4, "total_tokens": 157710784}
|
|
{"current_steps": 50110, "total_steps": 78105, "loss": 0.1842, "lr": 1.714547527853171e-06, "epoch": 3.2078612124703922, "percentage": 64.16, "elapsed_time": "2:11:21", "remaining_time": "1:13:23", "throughput": 20011.64, "total_tokens": 157725312}
|
|
{"current_steps": 50115, "total_steps": 78105, "loss": 0.1329, "lr": 1.7140171835399862e-06, "epoch": 3.2081812944113692, "percentage": 64.16, "elapsed_time": "2:11:22", "remaining_time": "1:13:22", "throughput": 20011.91, "total_tokens": 157741184}
|
|
{"current_steps": 50120, "total_steps": 78105, "loss": 0.1845, "lr": 1.7134868784746883e-06, "epoch": 3.2085013763523462, "percentage": 64.17, "elapsed_time": "2:11:23", "remaining_time": "1:13:21", "throughput": 20012.25, "total_tokens": 157758144}
|
|
{"current_steps": 50125, "total_steps": 78105, "loss": 0.15, "lr": 1.7129566126837598e-06, "epoch": 3.208821458293323, "percentage": 64.18, "elapsed_time": "2:11:23", "remaining_time": "1:13:20", "throughput": 20012.5, "total_tokens": 157773120}
|
|
{"current_steps": 50130, "total_steps": 78105, "loss": 0.0986, "lr": 1.7124263861936774e-06, "epoch": 3.2091415402342998, "percentage": 64.18, "elapsed_time": "2:11:24", "remaining_time": "1:13:19", "throughput": 20012.77, "total_tokens": 157788736}
|
|
{"current_steps": 50135, "total_steps": 78105, "loss": 0.1814, "lr": 1.7118961990309196e-06, "epoch": 3.2094616221752768, "percentage": 64.19, "elapsed_time": "2:11:25", "remaining_time": "1:13:19", "throughput": 20013.05, "total_tokens": 157804480}
|
|
{"current_steps": 50140, "total_steps": 78105, "loss": 0.1131, "lr": 1.71136605122196e-06, "epoch": 3.2097817041162537, "percentage": 64.2, "elapsed_time": "2:11:25", "remaining_time": "1:13:18", "throughput": 20013.38, "total_tokens": 157820992}
|
|
{"current_steps": 50145, "total_steps": 78105, "loss": 0.0996, "lr": 1.710835942793273e-06, "epoch": 3.2101017860572307, "percentage": 64.2, "elapsed_time": "2:11:26", "remaining_time": "1:13:17", "throughput": 20013.6, "total_tokens": 157835520}
|
|
{"current_steps": 50150, "total_steps": 78105, "loss": 0.1827, "lr": 1.7103058737713275e-06, "epoch": 3.2104218679982077, "percentage": 64.21, "elapsed_time": "2:11:27", "remaining_time": "1:13:16", "throughput": 20013.83, "total_tokens": 157850560}
|
|
{"current_steps": 50155, "total_steps": 78105, "loss": 0.1209, "lr": 1.7097758441825934e-06, "epoch": 3.2107419499391843, "percentage": 64.21, "elapsed_time": "2:11:27", "remaining_time": "1:13:15", "throughput": 20014.08, "total_tokens": 157865728}
|
|
{"current_steps": 50160, "total_steps": 78105, "loss": 0.0946, "lr": 1.7092458540535378e-06, "epoch": 3.2110620318801613, "percentage": 64.22, "elapsed_time": "2:11:28", "remaining_time": "1:13:14", "throughput": 20014.36, "total_tokens": 157881920}
|
|
{"current_steps": 50165, "total_steps": 78105, "loss": 0.1415, "lr": 1.7087159034106255e-06, "epoch": 3.2113821138211383, "percentage": 64.23, "elapsed_time": "2:11:29", "remaining_time": "1:13:13", "throughput": 20014.64, "total_tokens": 157897792}
|
|
{"current_steps": 50170, "total_steps": 78105, "loss": 0.1709, "lr": 1.7081859922803184e-06, "epoch": 3.2117021957621152, "percentage": 64.23, "elapsed_time": "2:11:29", "remaining_time": "1:13:13", "throughput": 20014.91, "total_tokens": 157913024}
|
|
{"current_steps": 50175, "total_steps": 78105, "loss": 0.146, "lr": 1.7076561206890787e-06, "epoch": 3.212022277703092, "percentage": 64.24, "elapsed_time": "2:11:30", "remaining_time": "1:13:12", "throughput": 20015.23, "total_tokens": 157929728}
|
|
{"current_steps": 50180, "total_steps": 78105, "loss": 0.1187, "lr": 1.7071262886633663e-06, "epoch": 3.212342359644069, "percentage": 64.25, "elapsed_time": "2:11:31", "remaining_time": "1:13:11", "throughput": 20015.5, "total_tokens": 157945280}
|
|
{"current_steps": 50185, "total_steps": 78105, "loss": 0.1315, "lr": 1.706596496229636e-06, "epoch": 3.2126624415850458, "percentage": 64.25, "elapsed_time": "2:11:31", "remaining_time": "1:13:10", "throughput": 20015.83, "total_tokens": 157961920}
|
|
{"current_steps": 50190, "total_steps": 78105, "loss": 0.1425, "lr": 1.7060667434143446e-06, "epoch": 3.2129825235260228, "percentage": 64.26, "elapsed_time": "2:11:32", "remaining_time": "1:13:09", "throughput": 20016.09, "total_tokens": 157977792}
|
|
{"current_steps": 50195, "total_steps": 78105, "loss": 0.117, "lr": 1.7055370302439439e-06, "epoch": 3.2133026054669998, "percentage": 64.27, "elapsed_time": "2:11:33", "remaining_time": "1:13:08", "throughput": 20016.37, "total_tokens": 157993664}
|
|
{"current_steps": 50200, "total_steps": 78105, "loss": 0.1704, "lr": 1.7050073567448867e-06, "epoch": 3.2136226874079763, "percentage": 64.27, "elapsed_time": "2:11:33", "remaining_time": "1:13:08", "throughput": 20016.59, "total_tokens": 158008704}
|
|
{"current_steps": 50205, "total_steps": 78105, "loss": 0.1336, "lr": 1.7044777229436201e-06, "epoch": 3.2139427693489533, "percentage": 64.28, "elapsed_time": "2:11:34", "remaining_time": "1:13:07", "throughput": 20016.89, "total_tokens": 158024640}
|
|
{"current_steps": 50210, "total_steps": 78105, "loss": 0.18, "lr": 1.7039481288665935e-06, "epoch": 3.2142628512899303, "percentage": 64.29, "elapsed_time": "2:11:35", "remaining_time": "1:13:06", "throughput": 20017.14, "total_tokens": 158039680}
|
|
{"current_steps": 50215, "total_steps": 78105, "loss": 0.1753, "lr": 1.7034185745402498e-06, "epoch": 3.2145829332309073, "percentage": 64.29, "elapsed_time": "2:11:35", "remaining_time": "1:13:05", "throughput": 20017.41, "total_tokens": 158055296}
|
|
{"current_steps": 50220, "total_steps": 78105, "loss": 0.1483, "lr": 1.7028890599910337e-06, "epoch": 3.214903015171884, "percentage": 64.3, "elapsed_time": "2:11:37", "remaining_time": "1:13:04", "throughput": 20016.42, "total_tokens": 158070528}
|
|
{"current_steps": 50225, "total_steps": 78105, "loss": 0.1415, "lr": 1.7023595852453855e-06, "epoch": 3.215223097112861, "percentage": 64.3, "elapsed_time": "2:11:37", "remaining_time": "1:13:04", "throughput": 20016.7, "total_tokens": 158086016}
|
|
{"current_steps": 50230, "total_steps": 78105, "loss": 0.176, "lr": 1.7018301503297447e-06, "epoch": 3.215543179053838, "percentage": 64.31, "elapsed_time": "2:11:38", "remaining_time": "1:13:03", "throughput": 20016.97, "total_tokens": 158101568}
|
|
{"current_steps": 50235, "total_steps": 78105, "loss": 0.1828, "lr": 1.7013007552705495e-06, "epoch": 3.215863260994815, "percentage": 64.32, "elapsed_time": "2:11:39", "remaining_time": "1:13:02", "throughput": 20017.19, "total_tokens": 158116352}
|
|
{"current_steps": 50240, "total_steps": 78105, "loss": 0.1761, "lr": 1.7007714000942338e-06, "epoch": 3.216183342935792, "percentage": 64.32, "elapsed_time": "2:11:39", "remaining_time": "1:13:01", "throughput": 20017.43, "total_tokens": 158131392}
|
|
{"current_steps": 50245, "total_steps": 78105, "loss": 0.1337, "lr": 1.7002420848272317e-06, "epoch": 3.2165034248767683, "percentage": 64.33, "elapsed_time": "2:11:40", "remaining_time": "1:13:00", "throughput": 20017.7, "total_tokens": 158147008}
|
|
{"current_steps": 50250, "total_steps": 78105, "loss": 0.1049, "lr": 1.6997128094959736e-06, "epoch": 3.2168235068177453, "percentage": 64.34, "elapsed_time": "2:11:41", "remaining_time": "1:12:59", "throughput": 20017.9, "total_tokens": 158161536}
|
|
{"current_steps": 50255, "total_steps": 78105, "loss": 0.2973, "lr": 1.6991835741268903e-06, "epoch": 3.2171435887587223, "percentage": 64.34, "elapsed_time": "2:11:41", "remaining_time": "1:12:58", "throughput": 20018.21, "total_tokens": 158177600}
|
|
{"current_steps": 50260, "total_steps": 78105, "loss": 0.2662, "lr": 1.6986543787464065e-06, "epoch": 3.2174636706996993, "percentage": 64.35, "elapsed_time": "2:11:42", "remaining_time": "1:12:58", "throughput": 20018.43, "total_tokens": 158192384}
|
|
{"current_steps": 50265, "total_steps": 78105, "loss": 0.2164, "lr": 1.6981252233809505e-06, "epoch": 3.217783752640676, "percentage": 64.36, "elapsed_time": "2:11:42", "remaining_time": "1:12:57", "throughput": 20018.68, "total_tokens": 158207424}
|
|
{"current_steps": 50270, "total_steps": 78105, "loss": 0.3059, "lr": 1.697596108056943e-06, "epoch": 3.218103834581653, "percentage": 64.36, "elapsed_time": "2:11:43", "remaining_time": "1:12:56", "throughput": 20018.99, "total_tokens": 158223680}
|
|
{"current_steps": 50275, "total_steps": 78105, "loss": 0.1693, "lr": 1.6970670328008066e-06, "epoch": 3.21842391652263, "percentage": 64.37, "elapsed_time": "2:11:45", "remaining_time": "1:12:56", "throughput": 20015.73, "total_tokens": 158240448}
|
|
{"current_steps": 50280, "total_steps": 78105, "loss": 0.1316, "lr": 1.69653799763896e-06, "epoch": 3.218743998463607, "percentage": 64.37, "elapsed_time": "2:11:46", "remaining_time": "1:12:55", "throughput": 20015.98, "total_tokens": 158255552}
|
|
{"current_steps": 50285, "total_steps": 78105, "loss": 0.1596, "lr": 1.6960090025978204e-06, "epoch": 3.219064080404584, "percentage": 64.38, "elapsed_time": "2:11:47", "remaining_time": "1:12:54", "throughput": 20016.33, "total_tokens": 158272704}
|
|
{"current_steps": 50290, "total_steps": 78105, "loss": 0.1489, "lr": 1.6954800477038046e-06, "epoch": 3.2193841623455604, "percentage": 64.39, "elapsed_time": "2:11:47", "remaining_time": "1:12:53", "throughput": 20016.6, "total_tokens": 158288064}
|
|
{"current_steps": 50295, "total_steps": 78105, "loss": 0.1086, "lr": 1.6949511329833236e-06, "epoch": 3.2197042442865373, "percentage": 64.39, "elapsed_time": "2:11:48", "remaining_time": "1:12:52", "throughput": 20016.94, "total_tokens": 158304704}
|
|
{"current_steps": 50300, "total_steps": 78105, "loss": 0.1302, "lr": 1.6944222584627907e-06, "epoch": 3.2200243262275143, "percentage": 64.4, "elapsed_time": "2:11:49", "remaining_time": "1:12:52", "throughput": 20017.22, "total_tokens": 158320384}
|
|
{"current_steps": 50305, "total_steps": 78105, "loss": 0.15, "lr": 1.6938934241686128e-06, "epoch": 3.2203444081684913, "percentage": 64.41, "elapsed_time": "2:11:49", "remaining_time": "1:12:51", "throughput": 20017.44, "total_tokens": 158335168}
|
|
{"current_steps": 50310, "total_steps": 78105, "loss": 0.1386, "lr": 1.6933646301271993e-06, "epoch": 3.220664490109468, "percentage": 64.41, "elapsed_time": "2:11:50", "remaining_time": "1:12:50", "throughput": 20017.71, "total_tokens": 158350784}
|
|
{"current_steps": 50315, "total_steps": 78105, "loss": 0.1085, "lr": 1.692835876364954e-06, "epoch": 3.220984572050445, "percentage": 64.42, "elapsed_time": "2:11:51", "remaining_time": "1:12:49", "throughput": 20017.93, "total_tokens": 158365504}
|
|
{"current_steps": 50320, "total_steps": 78105, "loss": 0.1634, "lr": 1.6923071629082815e-06, "epoch": 3.221304653991422, "percentage": 64.43, "elapsed_time": "2:11:54", "remaining_time": "1:12:49", "throughput": 20012.42, "total_tokens": 158380416}
|
|
{"current_steps": 50325, "total_steps": 78105, "loss": 0.1808, "lr": 1.6917784897835815e-06, "epoch": 3.221624735932399, "percentage": 64.43, "elapsed_time": "2:11:54", "remaining_time": "1:12:49", "throughput": 20012.76, "total_tokens": 158397184}
|
|
{"current_steps": 50330, "total_steps": 78105, "loss": 0.2651, "lr": 1.6912498570172542e-06, "epoch": 3.2219448178733754, "percentage": 64.44, "elapsed_time": "2:11:55", "remaining_time": "1:12:48", "throughput": 20012.98, "total_tokens": 158411904}
|
|
{"current_steps": 50335, "total_steps": 78105, "loss": 0.2043, "lr": 1.6907212646356957e-06, "epoch": 3.2222648998143524, "percentage": 64.45, "elapsed_time": "2:11:56", "remaining_time": "1:12:47", "throughput": 20013.24, "total_tokens": 158427264}
|
|
{"current_steps": 50340, "total_steps": 78105, "loss": 0.1144, "lr": 1.6901927126653028e-06, "epoch": 3.2225849817553294, "percentage": 64.45, "elapsed_time": "2:11:57", "remaining_time": "1:12:46", "throughput": 20012.78, "total_tokens": 158442240}
|
|
{"current_steps": 50345, "total_steps": 78105, "loss": 0.2137, "lr": 1.689664201132467e-06, "epoch": 3.2229050636963064, "percentage": 64.46, "elapsed_time": "2:11:57", "remaining_time": "1:12:45", "throughput": 20013.05, "total_tokens": 158457920}
|
|
{"current_steps": 50350, "total_steps": 78105, "loss": 0.0984, "lr": 1.6891357300635803e-06, "epoch": 3.2232251456372834, "percentage": 64.46, "elapsed_time": "2:11:58", "remaining_time": "1:12:44", "throughput": 20013.33, "total_tokens": 158473664}
|
|
{"current_steps": 50355, "total_steps": 78105, "loss": 0.2136, "lr": 1.6886072994850322e-06, "epoch": 3.22354522757826, "percentage": 64.47, "elapsed_time": "2:11:59", "remaining_time": "1:12:44", "throughput": 20013.68, "total_tokens": 158491200}
|
|
{"current_steps": 50360, "total_steps": 78105, "loss": 0.1379, "lr": 1.688078909423208e-06, "epoch": 3.223865309519237, "percentage": 64.48, "elapsed_time": "2:11:59", "remaining_time": "1:12:43", "throughput": 20013.95, "total_tokens": 158506880}
|
|
{"current_steps": 50365, "total_steps": 78105, "loss": 0.1701, "lr": 1.687550559904495e-06, "epoch": 3.224185391460214, "percentage": 64.48, "elapsed_time": "2:12:00", "remaining_time": "1:12:42", "throughput": 20014.21, "total_tokens": 158522112}
|
|
{"current_steps": 50370, "total_steps": 78105, "loss": 0.2155, "lr": 1.6870222509552742e-06, "epoch": 3.224505473401191, "percentage": 64.49, "elapsed_time": "2:12:01", "remaining_time": "1:12:41", "throughput": 20014.5, "total_tokens": 158538176}
|
|
{"current_steps": 50375, "total_steps": 78105, "loss": 0.1279, "lr": 1.6864939826019288e-06, "epoch": 3.2248255553421674, "percentage": 64.5, "elapsed_time": "2:12:02", "remaining_time": "1:12:40", "throughput": 20014.35, "total_tokens": 158553728}
|
|
{"current_steps": 50380, "total_steps": 78105, "loss": 0.1478, "lr": 1.6859657548708353e-06, "epoch": 3.2251456372831444, "percentage": 64.5, "elapsed_time": "2:12:02", "remaining_time": "1:12:39", "throughput": 20014.6, "total_tokens": 158568768}
|
|
{"current_steps": 50385, "total_steps": 78105, "loss": 0.1325, "lr": 1.6854375677883727e-06, "epoch": 3.2254657192241214, "percentage": 64.51, "elapsed_time": "2:12:03", "remaining_time": "1:12:39", "throughput": 20014.91, "total_tokens": 158585024}
|
|
{"current_steps": 50390, "total_steps": 78105, "loss": 0.087, "lr": 1.6849094213809142e-06, "epoch": 3.2257858011650984, "percentage": 64.52, "elapsed_time": "2:12:04", "remaining_time": "1:12:38", "throughput": 20015.19, "total_tokens": 158601024}
|
|
{"current_steps": 50395, "total_steps": 78105, "loss": 0.2116, "lr": 1.6843813156748345e-06, "epoch": 3.226105883106075, "percentage": 64.52, "elapsed_time": "2:12:04", "remaining_time": "1:12:37", "throughput": 20015.45, "total_tokens": 158616512}
|
|
{"current_steps": 50400, "total_steps": 78105, "loss": 0.2442, "lr": 1.6838532506965028e-06, "epoch": 3.226425965047052, "percentage": 64.53, "elapsed_time": "2:12:05", "remaining_time": "1:12:36", "throughput": 20015.74, "total_tokens": 158632512}
|
|
{"current_steps": 50405, "total_steps": 78105, "loss": 0.1336, "lr": 1.6833252264722885e-06, "epoch": 3.226746046988029, "percentage": 64.53, "elapsed_time": "2:12:06", "remaining_time": "1:12:35", "throughput": 20016.02, "total_tokens": 158648192}
|
|
{"current_steps": 50410, "total_steps": 78105, "loss": 0.1862, "lr": 1.6827972430285595e-06, "epoch": 3.227066128929006, "percentage": 64.54, "elapsed_time": "2:12:06", "remaining_time": "1:12:34", "throughput": 20016.29, "total_tokens": 158663744}
|
|
{"current_steps": 50415, "total_steps": 78105, "loss": 0.1386, "lr": 1.6822693003916784e-06, "epoch": 3.227386210869983, "percentage": 64.55, "elapsed_time": "2:12:07", "remaining_time": "1:12:34", "throughput": 20016.6, "total_tokens": 158679808}
|
|
{"current_steps": 50420, "total_steps": 78105, "loss": 0.1487, "lr": 1.6817413985880098e-06, "epoch": 3.2277062928109594, "percentage": 64.55, "elapsed_time": "2:12:11", "remaining_time": "1:12:35", "throughput": 20008.63, "total_tokens": 158695552}
|
|
{"current_steps": 50425, "total_steps": 78105, "loss": 0.1717, "lr": 1.6812135376439125e-06, "epoch": 3.2280263747519364, "percentage": 64.56, "elapsed_time": "2:12:12", "remaining_time": "1:12:34", "throughput": 20008.89, "total_tokens": 158711040}
|
|
{"current_steps": 50430, "total_steps": 78105, "loss": 0.2013, "lr": 1.680685717585748e-06, "epoch": 3.2283464566929134, "percentage": 64.57, "elapsed_time": "2:12:12", "remaining_time": "1:12:33", "throughput": 20009.11, "total_tokens": 158725888}
|
|
{"current_steps": 50435, "total_steps": 78105, "loss": 0.1652, "lr": 1.6801579384398698e-06, "epoch": 3.2286665386338904, "percentage": 64.57, "elapsed_time": "2:12:13", "remaining_time": "1:12:32", "throughput": 20009.41, "total_tokens": 158741760}
|
|
{"current_steps": 50440, "total_steps": 78105, "loss": 0.1905, "lr": 1.6796302002326347e-06, "epoch": 3.228986620574867, "percentage": 64.58, "elapsed_time": "2:12:14", "remaining_time": "1:12:31", "throughput": 20009.62, "total_tokens": 158756352}
|
|
{"current_steps": 50445, "total_steps": 78105, "loss": 0.1151, "lr": 1.6791025029903932e-06, "epoch": 3.229306702515844, "percentage": 64.59, "elapsed_time": "2:12:14", "remaining_time": "1:12:30", "throughput": 20009.95, "total_tokens": 158773120}
|
|
{"current_steps": 50450, "total_steps": 78105, "loss": 0.1392, "lr": 1.6785748467394974e-06, "epoch": 3.229626784456821, "percentage": 64.59, "elapsed_time": "2:12:15", "remaining_time": "1:12:29", "throughput": 20010.24, "total_tokens": 158789312}
|
|
{"current_steps": 50455, "total_steps": 78105, "loss": 0.1836, "lr": 1.6780472315062947e-06, "epoch": 3.229946866397798, "percentage": 64.6, "elapsed_time": "2:12:16", "remaining_time": "1:12:29", "throughput": 20010.49, "total_tokens": 158804544}
|
|
{"current_steps": 50460, "total_steps": 78105, "loss": 0.2771, "lr": 1.6775196573171315e-06, "epoch": 3.230266948338775, "percentage": 64.61, "elapsed_time": "2:12:16", "remaining_time": "1:12:28", "throughput": 20010.74, "total_tokens": 158819904}
|
|
{"current_steps": 50465, "total_steps": 78105, "loss": 0.1154, "lr": 1.676992124198354e-06, "epoch": 3.2305870302797515, "percentage": 64.61, "elapsed_time": "2:12:17", "remaining_time": "1:12:27", "throughput": 20010.94, "total_tokens": 158834112}
|
|
{"current_steps": 50470, "total_steps": 78105, "loss": 0.1504, "lr": 1.6764646321763014e-06, "epoch": 3.2309071122207285, "percentage": 64.62, "elapsed_time": "2:12:18", "remaining_time": "1:12:26", "throughput": 20011.22, "total_tokens": 158850112}
|
|
{"current_steps": 50475, "total_steps": 78105, "loss": 0.1946, "lr": 1.6759371812773163e-06, "epoch": 3.2312271941617055, "percentage": 64.62, "elapsed_time": "2:12:18", "remaining_time": "1:12:25", "throughput": 20011.51, "total_tokens": 158866176}
|
|
{"current_steps": 50480, "total_steps": 78105, "loss": 0.1821, "lr": 1.675409771527735e-06, "epoch": 3.2315472761026824, "percentage": 64.63, "elapsed_time": "2:12:19", "remaining_time": "1:12:24", "throughput": 20011.72, "total_tokens": 158880768}
|
|
{"current_steps": 50485, "total_steps": 78105, "loss": 0.242, "lr": 1.6748824029538963e-06, "epoch": 3.231867358043659, "percentage": 64.64, "elapsed_time": "2:12:20", "remaining_time": "1:12:23", "throughput": 20011.98, "total_tokens": 158895872}
|
|
{"current_steps": 50490, "total_steps": 78105, "loss": 0.1342, "lr": 1.6743550755821308e-06, "epoch": 3.232187439984636, "percentage": 64.64, "elapsed_time": "2:12:20", "remaining_time": "1:12:23", "throughput": 20012.25, "total_tokens": 158911872}
|
|
{"current_steps": 50495, "total_steps": 78105, "loss": 0.1507, "lr": 1.673827789438774e-06, "epoch": 3.232507521925613, "percentage": 64.65, "elapsed_time": "2:12:21", "remaining_time": "1:12:22", "throughput": 20012.58, "total_tokens": 158928768}
|
|
{"current_steps": 50500, "total_steps": 78105, "loss": 0.1835, "lr": 1.6733005445501521e-06, "epoch": 3.23282760386659, "percentage": 64.66, "elapsed_time": "2:12:22", "remaining_time": "1:12:21", "throughput": 20012.98, "total_tokens": 158947392}
|
|
{"current_steps": 50505, "total_steps": 78105, "loss": 0.1034, "lr": 1.6727733409425962e-06, "epoch": 3.233147685807567, "percentage": 64.66, "elapsed_time": "2:12:22", "remaining_time": "1:12:20", "throughput": 20013.27, "total_tokens": 158963520}
|
|
{"current_steps": 50510, "total_steps": 78105, "loss": 0.172, "lr": 1.6722461786424299e-06, "epoch": 3.2334677677485435, "percentage": 64.67, "elapsed_time": "2:12:23", "remaining_time": "1:12:19", "throughput": 20013.56, "total_tokens": 158979392}
|
|
{"current_steps": 50515, "total_steps": 78105, "loss": 0.2307, "lr": 1.6717190576759787e-06, "epoch": 3.2337878496895205, "percentage": 64.68, "elapsed_time": "2:12:24", "remaining_time": "1:12:18", "throughput": 20013.75, "total_tokens": 158993728}
|
|
{"current_steps": 50520, "total_steps": 78105, "loss": 0.1392, "lr": 1.6711919780695623e-06, "epoch": 3.2341079316304975, "percentage": 64.68, "elapsed_time": "2:12:24", "remaining_time": "1:12:18", "throughput": 20013.99, "total_tokens": 159008768}
|
|
{"current_steps": 50525, "total_steps": 78105, "loss": 0.1858, "lr": 1.6706649398495018e-06, "epoch": 3.2344280135714745, "percentage": 64.69, "elapsed_time": "2:12:25", "remaining_time": "1:12:17", "throughput": 20014.31, "total_tokens": 159025216}
|
|
{"current_steps": 50530, "total_steps": 78105, "loss": 0.1169, "lr": 1.6701379430421153e-06, "epoch": 3.234748095512451, "percentage": 64.69, "elapsed_time": "2:12:26", "remaining_time": "1:12:16", "throughput": 20014.55, "total_tokens": 159040064}
|
|
{"current_steps": 50535, "total_steps": 78105, "loss": 0.1845, "lr": 1.6696109876737163e-06, "epoch": 3.235068177453428, "percentage": 64.7, "elapsed_time": "2:12:26", "remaining_time": "1:12:15", "throughput": 20014.79, "total_tokens": 159055616}
|
|
{"current_steps": 50540, "total_steps": 78105, "loss": 0.176, "lr": 1.6690840737706195e-06, "epoch": 3.235388259394405, "percentage": 64.71, "elapsed_time": "2:12:27", "remaining_time": "1:12:14", "throughput": 20015.07, "total_tokens": 159071296}
|
|
{"current_steps": 50545, "total_steps": 78105, "loss": 0.2208, "lr": 1.6685572013591355e-06, "epoch": 3.235708341335382, "percentage": 64.71, "elapsed_time": "2:12:28", "remaining_time": "1:12:13", "throughput": 20015.29, "total_tokens": 159086016}
|
|
{"current_steps": 50550, "total_steps": 78105, "loss": 0.144, "lr": 1.6680303704655753e-06, "epoch": 3.236028423276359, "percentage": 64.72, "elapsed_time": "2:12:28", "remaining_time": "1:12:12", "throughput": 20015.54, "total_tokens": 159100864}
|
|
{"current_steps": 50555, "total_steps": 78105, "loss": 0.1831, "lr": 1.6675035811162437e-06, "epoch": 3.2363485052173355, "percentage": 64.73, "elapsed_time": "2:12:29", "remaining_time": "1:12:12", "throughput": 20015.84, "total_tokens": 159117056}
|
|
{"current_steps": 50560, "total_steps": 78105, "loss": 0.1634, "lr": 1.6669768333374476e-06, "epoch": 3.2366685871583125, "percentage": 64.73, "elapsed_time": "2:12:30", "remaining_time": "1:12:11", "throughput": 20016.14, "total_tokens": 159133440}
|
|
{"current_steps": 50565, "total_steps": 78105, "loss": 0.1818, "lr": 1.6664501271554889e-06, "epoch": 3.2369886690992895, "percentage": 64.74, "elapsed_time": "2:12:30", "remaining_time": "1:12:10", "throughput": 20016.43, "total_tokens": 159149568}
|
|
{"current_steps": 50570, "total_steps": 78105, "loss": 0.1208, "lr": 1.6659234625966697e-06, "epoch": 3.2373087510402665, "percentage": 64.75, "elapsed_time": "2:12:31", "remaining_time": "1:12:09", "throughput": 20016.68, "total_tokens": 159164608}
|
|
{"current_steps": 50575, "total_steps": 78105, "loss": 0.0906, "lr": 1.6653968396872877e-06, "epoch": 3.237628832981243, "percentage": 64.75, "elapsed_time": "2:12:32", "remaining_time": "1:12:08", "throughput": 20016.89, "total_tokens": 159179136}
|
|
{"current_steps": 50580, "total_steps": 78105, "loss": 0.2349, "lr": 1.6648702584536398e-06, "epoch": 3.23794891492222, "percentage": 64.76, "elapsed_time": "2:12:32", "remaining_time": "1:12:07", "throughput": 20017.2, "total_tokens": 159195456}
|
|
{"current_steps": 50585, "total_steps": 78105, "loss": 0.1733, "lr": 1.6643437189220223e-06, "epoch": 3.238268996863197, "percentage": 64.77, "elapsed_time": "2:12:33", "remaining_time": "1:12:07", "throughput": 20017.51, "total_tokens": 159211840}
|
|
{"current_steps": 50590, "total_steps": 78105, "loss": 0.1659, "lr": 1.6638172211187258e-06, "epoch": 3.238589078804174, "percentage": 64.77, "elapsed_time": "2:12:34", "remaining_time": "1:12:06", "throughput": 20017.87, "total_tokens": 159229888}
|
|
{"current_steps": 50595, "total_steps": 78105, "loss": 0.1958, "lr": 1.6632907650700419e-06, "epoch": 3.2389091607451506, "percentage": 64.78, "elapsed_time": "2:12:35", "remaining_time": "1:12:05", "throughput": 20018.14, "total_tokens": 159245376}
|
|
{"current_steps": 50600, "total_steps": 78105, "loss": 0.1657, "lr": 1.662764350802259e-06, "epoch": 3.2392292426861276, "percentage": 64.78, "elapsed_time": "2:12:35", "remaining_time": "1:12:04", "throughput": 20018.4, "total_tokens": 159260608}
|
|
{"current_steps": 50605, "total_steps": 78105, "loss": 0.1726, "lr": 1.6622379783416641e-06, "epoch": 3.2395493246271045, "percentage": 64.79, "elapsed_time": "2:12:36", "remaining_time": "1:12:03", "throughput": 20018.67, "total_tokens": 159276096}
|
|
{"current_steps": 50610, "total_steps": 78105, "loss": 0.15, "lr": 1.6617116477145397e-06, "epoch": 3.2398694065680815, "percentage": 64.8, "elapsed_time": "2:12:37", "remaining_time": "1:12:02", "throughput": 20019.01, "total_tokens": 159292992}
|
|
{"current_steps": 50615, "total_steps": 78105, "loss": 0.1745, "lr": 1.66118535894717e-06, "epoch": 3.2401894885090585, "percentage": 64.8, "elapsed_time": "2:12:37", "remaining_time": "1:12:02", "throughput": 20019.23, "total_tokens": 159307776}
|
|
{"current_steps": 50620, "total_steps": 78105, "loss": 0.1346, "lr": 1.6606591120658333e-06, "epoch": 3.240509570450035, "percentage": 64.81, "elapsed_time": "2:12:38", "remaining_time": "1:12:01", "throughput": 20019.55, "total_tokens": 159324160}
|
|
{"current_steps": 50625, "total_steps": 78105, "loss": 0.1276, "lr": 1.66013290709681e-06, "epoch": 3.240829652391012, "percentage": 64.82, "elapsed_time": "2:12:39", "remaining_time": "1:12:00", "throughput": 20019.78, "total_tokens": 159339392}
|
|
{"current_steps": 50630, "total_steps": 78105, "loss": 0.1764, "lr": 1.6596067440663731e-06, "epoch": 3.241149734331989, "percentage": 64.82, "elapsed_time": "2:12:39", "remaining_time": "1:11:59", "throughput": 20020.1, "total_tokens": 159356096}
|
|
{"current_steps": 50635, "total_steps": 78105, "loss": 0.1711, "lr": 1.6590806230007986e-06, "epoch": 3.241469816272966, "percentage": 64.83, "elapsed_time": "2:12:40", "remaining_time": "1:11:58", "throughput": 20020.34, "total_tokens": 159371008}
|
|
{"current_steps": 50640, "total_steps": 78105, "loss": 0.1747, "lr": 1.6585545439263585e-06, "epoch": 3.2417898982139426, "percentage": 64.84, "elapsed_time": "2:12:41", "remaining_time": "1:11:57", "throughput": 20020.58, "total_tokens": 159385792}
|
|
{"current_steps": 50645, "total_steps": 78105, "loss": 0.1891, "lr": 1.6580285068693202e-06, "epoch": 3.2421099801549196, "percentage": 64.84, "elapsed_time": "2:12:41", "remaining_time": "1:11:56", "throughput": 20020.86, "total_tokens": 159401600}
|
|
{"current_steps": 50650, "total_steps": 78105, "loss": 0.181, "lr": 1.6575025118559532e-06, "epoch": 3.2424300620958966, "percentage": 64.85, "elapsed_time": "2:12:42", "remaining_time": "1:11:56", "throughput": 20021.14, "total_tokens": 159417344}
|
|
{"current_steps": 50655, "total_steps": 78105, "loss": 0.1942, "lr": 1.656976558912522e-06, "epoch": 3.2427501440368736, "percentage": 64.86, "elapsed_time": "2:12:43", "remaining_time": "1:11:55", "throughput": 20021.45, "total_tokens": 159433792}
|
|
{"current_steps": 50660, "total_steps": 78105, "loss": 0.1781, "lr": 1.6564506480652908e-06, "epoch": 3.24307022597785, "percentage": 64.86, "elapsed_time": "2:12:43", "remaining_time": "1:11:54", "throughput": 20021.73, "total_tokens": 159449728}
|
|
{"current_steps": 50665, "total_steps": 78105, "loss": 0.1456, "lr": 1.6559247793405198e-06, "epoch": 3.243390307918827, "percentage": 64.87, "elapsed_time": "2:12:44", "remaining_time": "1:11:53", "throughput": 20021.99, "total_tokens": 159464704}
|
|
{"current_steps": 50670, "total_steps": 78105, "loss": 0.113, "lr": 1.6553989527644698e-06, "epoch": 3.243710389859804, "percentage": 64.87, "elapsed_time": "2:12:45", "remaining_time": "1:11:52", "throughput": 20022.23, "total_tokens": 159479936}
|
|
{"current_steps": 50675, "total_steps": 78105, "loss": 0.1559, "lr": 1.6548731683633957e-06, "epoch": 3.244030471800781, "percentage": 64.88, "elapsed_time": "2:12:45", "remaining_time": "1:11:51", "throughput": 20022.47, "total_tokens": 159495360}
|
|
{"current_steps": 50680, "total_steps": 78105, "loss": 0.1396, "lr": 1.6543474261635535e-06, "epoch": 3.244350553741758, "percentage": 64.89, "elapsed_time": "2:12:46", "remaining_time": "1:11:50", "throughput": 20022.7, "total_tokens": 159509952}
|
|
{"current_steps": 50685, "total_steps": 78105, "loss": 0.1429, "lr": 1.6538217261911959e-06, "epoch": 3.2446706356827346, "percentage": 64.89, "elapsed_time": "2:12:47", "remaining_time": "1:11:50", "throughput": 20023.01, "total_tokens": 159526528}
|
|
{"current_steps": 50690, "total_steps": 78105, "loss": 0.1186, "lr": 1.6532960684725745e-06, "epoch": 3.2449907176237116, "percentage": 64.9, "elapsed_time": "2:12:47", "remaining_time": "1:11:49", "throughput": 20023.23, "total_tokens": 159541760}
|
|
{"current_steps": 50695, "total_steps": 78105, "loss": 0.1645, "lr": 1.6527704530339361e-06, "epoch": 3.2453107995646886, "percentage": 64.91, "elapsed_time": "2:12:48", "remaining_time": "1:11:48", "throughput": 20023.53, "total_tokens": 159558016}
|
|
{"current_steps": 50700, "total_steps": 78105, "loss": 0.1832, "lr": 1.6522448799015284e-06, "epoch": 3.2456308815056656, "percentage": 64.91, "elapsed_time": "2:12:49", "remaining_time": "1:11:47", "throughput": 20023.92, "total_tokens": 159576640}
|
|
{"current_steps": 50705, "total_steps": 78105, "loss": 0.1161, "lr": 1.6517193491015963e-06, "epoch": 3.245950963446642, "percentage": 64.92, "elapsed_time": "2:12:49", "remaining_time": "1:11:46", "throughput": 20024.18, "total_tokens": 159592256}
|
|
{"current_steps": 50710, "total_steps": 78105, "loss": 0.0625, "lr": 1.6511938606603805e-06, "epoch": 3.246271045387619, "percentage": 64.93, "elapsed_time": "2:12:50", "remaining_time": "1:11:45", "throughput": 20024.43, "total_tokens": 159607616}
|
|
{"current_steps": 50715, "total_steps": 78105, "loss": 0.1533, "lr": 1.6506684146041233e-06, "epoch": 3.246591127328596, "percentage": 64.93, "elapsed_time": "2:12:51", "remaining_time": "1:11:45", "throughput": 20024.73, "total_tokens": 159623744}
|
|
{"current_steps": 50720, "total_steps": 78105, "loss": 0.1751, "lr": 1.6501430109590603e-06, "epoch": 3.246911209269573, "percentage": 64.94, "elapsed_time": "2:12:51", "remaining_time": "1:11:44", "throughput": 20024.99, "total_tokens": 159639104}
|
|
{"current_steps": 50725, "total_steps": 78105, "loss": 0.1446, "lr": 1.6496176497514294e-06, "epoch": 3.24723129121055, "percentage": 64.94, "elapsed_time": "2:12:52", "remaining_time": "1:11:43", "throughput": 20025.28, "total_tokens": 159655168}
|
|
{"current_steps": 50730, "total_steps": 78105, "loss": 0.1741, "lr": 1.649092331007463e-06, "epoch": 3.2475513731515266, "percentage": 64.95, "elapsed_time": "2:12:53", "remaining_time": "1:11:42", "throughput": 20025.6, "total_tokens": 159671872}
|
|
{"current_steps": 50735, "total_steps": 78105, "loss": 0.1633, "lr": 1.6485670547533936e-06, "epoch": 3.2478714550925036, "percentage": 64.96, "elapsed_time": "2:12:54", "remaining_time": "1:11:41", "throughput": 20025.79, "total_tokens": 159686272}
|
|
{"current_steps": 50740, "total_steps": 78105, "loss": 0.2598, "lr": 1.6480418210154502e-06, "epoch": 3.2481915370334806, "percentage": 64.96, "elapsed_time": "2:12:54", "remaining_time": "1:11:40", "throughput": 20026.22, "total_tokens": 159705344}
|
|
{"current_steps": 50745, "total_steps": 78105, "loss": 0.189, "lr": 1.6475166298198614e-06, "epoch": 3.2485116189744576, "percentage": 64.97, "elapsed_time": "2:12:55", "remaining_time": "1:11:40", "throughput": 20026.46, "total_tokens": 159720576}
|
|
{"current_steps": 50750, "total_steps": 78105, "loss": 0.0661, "lr": 1.6469914811928505e-06, "epoch": 3.248831700915434, "percentage": 64.98, "elapsed_time": "2:12:56", "remaining_time": "1:11:39", "throughput": 20026.75, "total_tokens": 159736896}
|
|
{"current_steps": 50755, "total_steps": 78105, "loss": 0.2317, "lr": 1.6464663751606425e-06, "epoch": 3.249151782856411, "percentage": 64.98, "elapsed_time": "2:12:56", "remaining_time": "1:11:38", "throughput": 20027.01, "total_tokens": 159752320}
|
|
{"current_steps": 50760, "total_steps": 78105, "loss": 0.1442, "lr": 1.6459413117494582e-06, "epoch": 3.249471864797388, "percentage": 64.99, "elapsed_time": "2:12:57", "remaining_time": "1:11:37", "throughput": 20027.26, "total_tokens": 159767872}
|
|
{"current_steps": 50765, "total_steps": 78105, "loss": 0.1711, "lr": 1.6454162909855156e-06, "epoch": 3.249791946738365, "percentage": 65.0, "elapsed_time": "2:12:58", "remaining_time": "1:11:36", "throughput": 20027.5, "total_tokens": 159782784}
|
|
{"current_steps": 50770, "total_steps": 78105, "loss": 0.1899, "lr": 1.6448913128950321e-06, "epoch": 3.250112028679342, "percentage": 65.0, "elapsed_time": "2:12:58", "remaining_time": "1:11:35", "throughput": 20027.73, "total_tokens": 159798016}
|
|
{"current_steps": 50775, "total_steps": 78105, "loss": 0.1005, "lr": 1.6443663775042224e-06, "epoch": 3.2504321106203187, "percentage": 65.01, "elapsed_time": "2:12:59", "remaining_time": "1:11:35", "throughput": 20028.07, "total_tokens": 159814976}
|
|
{"current_steps": 50778, "total_steps": 78105, "eval_loss": 0.5568700432777405, "epoch": 3.250624159784905, "percentage": 65.01, "elapsed_time": "2:13:51", "remaining_time": "1:12:02", "throughput": 19900.63, "total_tokens": 159826368}
|
|
{"current_steps": 50780, "total_steps": 78105, "loss": 0.2279, "lr": 1.6438414848392997e-06, "epoch": 3.2507521925612957, "percentage": 65.02, "elapsed_time": "2:14:24", "remaining_time": "1:12:19", "throughput": 19818.15, "total_tokens": 159832960}
|
|
{"current_steps": 50785, "total_steps": 78105, "loss": 0.1268, "lr": 1.6433166349264728e-06, "epoch": 3.2510722745022727, "percentage": 65.02, "elapsed_time": "2:14:25", "remaining_time": "1:12:18", "throughput": 19818.42, "total_tokens": 159848704}
|
|
{"current_steps": 50790, "total_steps": 78105, "loss": 0.1757, "lr": 1.642791827791951e-06, "epoch": 3.2513923564432496, "percentage": 65.03, "elapsed_time": "2:14:26", "remaining_time": "1:12:18", "throughput": 19818.73, "total_tokens": 159864832}
|
|
{"current_steps": 50795, "total_steps": 78105, "loss": 0.1713, "lr": 1.6422670634619404e-06, "epoch": 3.251712438384226, "percentage": 65.03, "elapsed_time": "2:14:27", "remaining_time": "1:12:17", "throughput": 19819.0, "total_tokens": 159880256}
|
|
{"current_steps": 50800, "total_steps": 78105, "loss": 0.263, "lr": 1.6417423419626452e-06, "epoch": 3.252032520325203, "percentage": 65.04, "elapsed_time": "2:14:27", "remaining_time": "1:12:16", "throughput": 19819.32, "total_tokens": 159896640}
|
|
{"current_steps": 50805, "total_steps": 78105, "loss": 0.1778, "lr": 1.6412176633202664e-06, "epoch": 3.25235260226618, "percentage": 65.05, "elapsed_time": "2:14:28", "remaining_time": "1:12:15", "throughput": 19819.57, "total_tokens": 159911616}
|
|
{"current_steps": 50810, "total_steps": 78105, "loss": 0.1722, "lr": 1.640693027561004e-06, "epoch": 3.252672684207157, "percentage": 65.05, "elapsed_time": "2:14:29", "remaining_time": "1:12:14", "throughput": 19819.86, "total_tokens": 159927680}
|
|
{"current_steps": 50815, "total_steps": 78105, "loss": 0.1964, "lr": 1.6401684347110569e-06, "epoch": 3.252992766148134, "percentage": 65.06, "elapsed_time": "2:14:29", "remaining_time": "1:12:13", "throughput": 19820.14, "total_tokens": 159943424}
|
|
{"current_steps": 50820, "total_steps": 78105, "loss": 0.2241, "lr": 1.6396438847966185e-06, "epoch": 3.2533128480891107, "percentage": 65.07, "elapsed_time": "2:14:30", "remaining_time": "1:12:12", "throughput": 19820.54, "total_tokens": 159961152}
|
|
{"current_steps": 50825, "total_steps": 78105, "loss": 0.1458, "lr": 1.6391193778438835e-06, "epoch": 3.2536329300300877, "percentage": 65.07, "elapsed_time": "2:14:31", "remaining_time": "1:12:12", "throughput": 19820.81, "total_tokens": 159976768}
|
|
{"current_steps": 50830, "total_steps": 78105, "loss": 0.1221, "lr": 1.6385949138790425e-06, "epoch": 3.2539530119710647, "percentage": 65.08, "elapsed_time": "2:14:31", "remaining_time": "1:12:11", "throughput": 19821.12, "total_tokens": 159992896}
|
|
{"current_steps": 50835, "total_steps": 78105, "loss": 0.198, "lr": 1.6380704929282852e-06, "epoch": 3.2542730939120417, "percentage": 65.09, "elapsed_time": "2:14:32", "remaining_time": "1:12:10", "throughput": 19821.48, "total_tokens": 160009792}
|
|
{"current_steps": 50840, "total_steps": 78105, "loss": 0.165, "lr": 1.6375461150177969e-06, "epoch": 3.254593175853018, "percentage": 65.09, "elapsed_time": "2:14:33", "remaining_time": "1:12:09", "throughput": 19821.77, "total_tokens": 160025856}
|
|
{"current_steps": 50845, "total_steps": 78105, "loss": 0.2053, "lr": 1.6370217801737637e-06, "epoch": 3.254913257793995, "percentage": 65.1, "elapsed_time": "2:14:33", "remaining_time": "1:12:08", "throughput": 19822.0, "total_tokens": 160040768}
|
|
{"current_steps": 50850, "total_steps": 78105, "loss": 0.176, "lr": 1.6364974884223672e-06, "epoch": 3.255233339734972, "percentage": 65.1, "elapsed_time": "2:14:34", "remaining_time": "1:12:07", "throughput": 19822.26, "total_tokens": 160056000}
|
|
{"current_steps": 50855, "total_steps": 78105, "loss": 0.1947, "lr": 1.6359732397897895e-06, "epoch": 3.255553421675949, "percentage": 65.11, "elapsed_time": "2:14:35", "remaining_time": "1:12:07", "throughput": 19822.63, "total_tokens": 160073600}
|
|
{"current_steps": 50860, "total_steps": 78105, "loss": 0.1147, "lr": 1.6354490343022068e-06, "epoch": 3.255873503616926, "percentage": 65.12, "elapsed_time": "2:14:35", "remaining_time": "1:12:06", "throughput": 19822.87, "total_tokens": 160088320}
|
|
{"current_steps": 50865, "total_steps": 78105, "loss": 0.1613, "lr": 1.6349248719857966e-06, "epoch": 3.2561935855579027, "percentage": 65.12, "elapsed_time": "2:14:36", "remaining_time": "1:12:05", "throughput": 19823.12, "total_tokens": 160103296}
|
|
{"current_steps": 50870, "total_steps": 78105, "loss": 0.2662, "lr": 1.6344007528667317e-06, "epoch": 3.2565136674988797, "percentage": 65.13, "elapsed_time": "2:14:37", "remaining_time": "1:12:04", "throughput": 19823.36, "total_tokens": 160118336}
|
|
{"current_steps": 50875, "total_steps": 78105, "loss": 0.1258, "lr": 1.6338766769711839e-06, "epoch": 3.2568337494398567, "percentage": 65.14, "elapsed_time": "2:14:37", "remaining_time": "1:12:03", "throughput": 19823.64, "total_tokens": 160134080}
|
|
{"current_steps": 50880, "total_steps": 78105, "loss": 0.1309, "lr": 1.6333526443253245e-06, "epoch": 3.2571538313808333, "percentage": 65.14, "elapsed_time": "2:14:38", "remaining_time": "1:12:02", "throughput": 19823.89, "total_tokens": 160148992}
|
|
{"current_steps": 50885, "total_steps": 78105, "loss": 0.1755, "lr": 1.6328286549553192e-06, "epoch": 3.2574739133218102, "percentage": 65.15, "elapsed_time": "2:14:39", "remaining_time": "1:12:01", "throughput": 19824.16, "total_tokens": 160164544}
|
|
{"current_steps": 50890, "total_steps": 78105, "loss": 0.143, "lr": 1.6323047088873345e-06, "epoch": 3.2577939952627872, "percentage": 65.16, "elapsed_time": "2:14:39", "remaining_time": "1:12:00", "throughput": 19824.39, "total_tokens": 160179328}
|
|
{"current_steps": 50895, "total_steps": 78105, "loss": 0.1076, "lr": 1.6317808061475324e-06, "epoch": 3.2581140772037642, "percentage": 65.16, "elapsed_time": "2:14:40", "remaining_time": "1:12:00", "throughput": 19824.63, "total_tokens": 160194176}
|
|
{"current_steps": 50900, "total_steps": 78105, "loss": 0.3658, "lr": 1.6312569467620754e-06, "epoch": 3.258434159144741, "percentage": 65.17, "elapsed_time": "2:14:41", "remaining_time": "1:11:59", "throughput": 19824.88, "total_tokens": 160209472}
|
|
{"current_steps": 50905, "total_steps": 78105, "loss": 0.1431, "lr": 1.63073313075712e-06, "epoch": 3.2587542410857178, "percentage": 65.18, "elapsed_time": "2:14:41", "remaining_time": "1:11:58", "throughput": 19825.21, "total_tokens": 160225920}
|
|
{"current_steps": 50910, "total_steps": 78105, "loss": 0.1891, "lr": 1.6302093581588252e-06, "epoch": 3.2590743230266948, "percentage": 65.18, "elapsed_time": "2:14:42", "remaining_time": "1:11:57", "throughput": 19825.48, "total_tokens": 160241088}
|
|
{"current_steps": 50915, "total_steps": 78105, "loss": 0.1269, "lr": 1.629685628993344e-06, "epoch": 3.2593944049676717, "percentage": 65.19, "elapsed_time": "2:14:43", "remaining_time": "1:11:56", "throughput": 19825.81, "total_tokens": 160257984}
|
|
{"current_steps": 50920, "total_steps": 78105, "loss": 0.1527, "lr": 1.62916194328683e-06, "epoch": 3.2597144869086487, "percentage": 65.19, "elapsed_time": "2:14:43", "remaining_time": "1:11:55", "throughput": 19826.09, "total_tokens": 160273792}
|
|
{"current_steps": 50925, "total_steps": 78105, "loss": 0.1758, "lr": 1.6286383010654313e-06, "epoch": 3.2600345688496253, "percentage": 65.2, "elapsed_time": "2:14:44", "remaining_time": "1:11:55", "throughput": 19826.39, "total_tokens": 160290176}
|
|
{"current_steps": 50930, "total_steps": 78105, "loss": 0.2347, "lr": 1.6281147023552974e-06, "epoch": 3.2603546507906023, "percentage": 65.21, "elapsed_time": "2:14:45", "remaining_time": "1:11:54", "throughput": 19826.78, "total_tokens": 160308096}
|
|
{"current_steps": 50935, "total_steps": 78105, "loss": 0.2139, "lr": 1.627591147182574e-06, "epoch": 3.2606747327315793, "percentage": 65.21, "elapsed_time": "2:14:46", "remaining_time": "1:11:53", "throughput": 19827.14, "total_tokens": 160325504}
|
|
{"current_steps": 50940, "total_steps": 78105, "loss": 0.1226, "lr": 1.6270676355734043e-06, "epoch": 3.2609948146725563, "percentage": 65.22, "elapsed_time": "2:14:46", "remaining_time": "1:11:52", "throughput": 19827.46, "total_tokens": 160342208}
|
|
{"current_steps": 50945, "total_steps": 78105, "loss": 0.1524, "lr": 1.6265441675539306e-06, "epoch": 3.2613148966135332, "percentage": 65.23, "elapsed_time": "2:14:47", "remaining_time": "1:11:51", "throughput": 19827.71, "total_tokens": 160357184}
|
|
{"current_steps": 50950, "total_steps": 78105, "loss": 0.1378, "lr": 1.626020743150291e-06, "epoch": 3.26163497855451, "percentage": 65.23, "elapsed_time": "2:14:48", "remaining_time": "1:11:50", "throughput": 19827.95, "total_tokens": 160372352}
|
|
{"current_steps": 50955, "total_steps": 78105, "loss": 0.1331, "lr": 1.625497362388624e-06, "epoch": 3.261955060495487, "percentage": 65.24, "elapsed_time": "2:14:49", "remaining_time": "1:11:50", "throughput": 19828.45, "total_tokens": 160392448}
|
|
{"current_steps": 50960, "total_steps": 78105, "loss": 0.143, "lr": 1.6249740252950618e-06, "epoch": 3.2622751424364638, "percentage": 65.25, "elapsed_time": "2:14:49", "remaining_time": "1:11:49", "throughput": 19828.69, "total_tokens": 160407616}
|
|
{"current_steps": 50965, "total_steps": 78105, "loss": 0.1814, "lr": 1.6244507318957403e-06, "epoch": 3.2625952243774408, "percentage": 65.25, "elapsed_time": "2:14:50", "remaining_time": "1:11:48", "throughput": 19828.96, "total_tokens": 160423552}
|
|
{"current_steps": 50970, "total_steps": 78105, "loss": 0.1984, "lr": 1.6239274822167877e-06, "epoch": 3.2629153063184173, "percentage": 65.26, "elapsed_time": "2:14:51", "remaining_time": "1:11:47", "throughput": 19829.26, "total_tokens": 160440064}
|
|
{"current_steps": 50975, "total_steps": 78105, "loss": 0.1385, "lr": 1.623404276284335e-06, "epoch": 3.2632353882593943, "percentage": 65.26, "elapsed_time": "2:14:51", "remaining_time": "1:11:46", "throughput": 19829.53, "total_tokens": 160455360}
|
|
{"current_steps": 50980, "total_steps": 78105, "loss": 0.1309, "lr": 1.6228811141245052e-06, "epoch": 3.2635554702003713, "percentage": 65.27, "elapsed_time": "2:14:52", "remaining_time": "1:11:45", "throughput": 19829.82, "total_tokens": 160471232}
|
|
{"current_steps": 50985, "total_steps": 78105, "loss": 0.1685, "lr": 1.622357995763424e-06, "epoch": 3.2638755521413483, "percentage": 65.28, "elapsed_time": "2:14:53", "remaining_time": "1:11:44", "throughput": 19830.09, "total_tokens": 160486976}
|
|
{"current_steps": 50990, "total_steps": 78105, "loss": 0.2235, "lr": 1.6218349212272139e-06, "epoch": 3.2641956340823253, "percentage": 65.28, "elapsed_time": "2:14:53", "remaining_time": "1:11:44", "throughput": 19830.45, "total_tokens": 160504256}
|
|
{"current_steps": 50995, "total_steps": 78105, "loss": 0.1593, "lr": 1.6213118905419924e-06, "epoch": 3.264515716023302, "percentage": 65.29, "elapsed_time": "2:14:54", "remaining_time": "1:11:43", "throughput": 19830.82, "total_tokens": 160521664}
|
|
{"current_steps": 51000, "total_steps": 78105, "loss": 0.1769, "lr": 1.6207889037338792e-06, "epoch": 3.264835797964279, "percentage": 65.3, "elapsed_time": "2:14:55", "remaining_time": "1:11:42", "throughput": 19831.07, "total_tokens": 160537024}
|
|
{"current_steps": 51005, "total_steps": 78105, "loss": 0.1815, "lr": 1.6202659608289875e-06, "epoch": 3.265155879905256, "percentage": 65.3, "elapsed_time": "2:14:55", "remaining_time": "1:11:41", "throughput": 19831.3, "total_tokens": 160552192}
|
|
{"current_steps": 51010, "total_steps": 78105, "loss": 0.1378, "lr": 1.6197430618534327e-06, "epoch": 3.265475961846233, "percentage": 65.31, "elapsed_time": "2:14:56", "remaining_time": "1:11:40", "throughput": 19831.55, "total_tokens": 160567616}
|
|
{"current_steps": 51015, "total_steps": 78105, "loss": 0.1762, "lr": 1.619220206833323e-06, "epoch": 3.2657960437872093, "percentage": 65.32, "elapsed_time": "2:14:57", "remaining_time": "1:11:39", "throughput": 19831.86, "total_tokens": 160584128}
|
|
{"current_steps": 51020, "total_steps": 78105, "loss": 0.1345, "lr": 1.6186973957947694e-06, "epoch": 3.2661161257281863, "percentage": 65.32, "elapsed_time": "2:14:57", "remaining_time": "1:11:38", "throughput": 19832.16, "total_tokens": 160600256}
|
|
{"current_steps": 51025, "total_steps": 78105, "loss": 0.1763, "lr": 1.618174628763876e-06, "epoch": 3.2664362076691633, "percentage": 65.33, "elapsed_time": "2:14:58", "remaining_time": "1:11:38", "throughput": 19832.43, "total_tokens": 160616000}
|
|
{"current_steps": 51030, "total_steps": 78105, "loss": 0.1702, "lr": 1.61765190576675e-06, "epoch": 3.2667562896101403, "percentage": 65.34, "elapsed_time": "2:14:59", "remaining_time": "1:11:37", "throughput": 19832.77, "total_tokens": 160633024}
|
|
{"current_steps": 51035, "total_steps": 78105, "loss": 0.2605, "lr": 1.6171292268294908e-06, "epoch": 3.2670763715511173, "percentage": 65.34, "elapsed_time": "2:15:00", "remaining_time": "1:11:36", "throughput": 19833.01, "total_tokens": 160648128}
|
|
{"current_steps": 51040, "total_steps": 78105, "loss": 0.1063, "lr": 1.6166065919782004e-06, "epoch": 3.267396453492094, "percentage": 65.35, "elapsed_time": "2:15:00", "remaining_time": "1:11:35", "throughput": 19833.25, "total_tokens": 160663360}
|
|
{"current_steps": 51045, "total_steps": 78105, "loss": 0.1636, "lr": 1.616084001238974e-06, "epoch": 3.267716535433071, "percentage": 65.35, "elapsed_time": "2:15:01", "remaining_time": "1:11:34", "throughput": 19833.51, "total_tokens": 160678848}
|
|
{"current_steps": 51050, "total_steps": 78105, "loss": 0.2143, "lr": 1.6155614546379083e-06, "epoch": 3.268036617374048, "percentage": 65.36, "elapsed_time": "2:15:02", "remaining_time": "1:11:33", "throughput": 19833.78, "total_tokens": 160694400}
|
|
{"current_steps": 51055, "total_steps": 78105, "loss": 0.1625, "lr": 1.6150389522010975e-06, "epoch": 3.268356699315025, "percentage": 65.37, "elapsed_time": "2:15:02", "remaining_time": "1:11:33", "throughput": 19834.09, "total_tokens": 160710912}
|
|
{"current_steps": 51060, "total_steps": 78105, "loss": 0.1617, "lr": 1.6145164939546315e-06, "epoch": 3.2686767812560014, "percentage": 65.37, "elapsed_time": "2:15:03", "remaining_time": "1:11:32", "throughput": 19834.34, "total_tokens": 160726016}
|
|
{"current_steps": 51065, "total_steps": 78105, "loss": 0.176, "lr": 1.6139940799246001e-06, "epoch": 3.2689968631969784, "percentage": 65.38, "elapsed_time": "2:15:04", "remaining_time": "1:11:31", "throughput": 19834.61, "total_tokens": 160741824}
|
|
{"current_steps": 51070, "total_steps": 78105, "loss": 0.1164, "lr": 1.6134717101370886e-06, "epoch": 3.2693169451379553, "percentage": 65.39, "elapsed_time": "2:15:04", "remaining_time": "1:11:30", "throughput": 19834.85, "total_tokens": 160757120}
|
|
{"current_steps": 51075, "total_steps": 78105, "loss": 0.1337, "lr": 1.6129493846181827e-06, "epoch": 3.2696370270789323, "percentage": 65.39, "elapsed_time": "2:15:05", "remaining_time": "1:11:29", "throughput": 19835.1, "total_tokens": 160772800}
|
|
{"current_steps": 51080, "total_steps": 78105, "loss": 0.1944, "lr": 1.612427103393963e-06, "epoch": 3.2699571090199093, "percentage": 65.4, "elapsed_time": "2:15:06", "remaining_time": "1:11:28", "throughput": 19835.33, "total_tokens": 160787584}
|
|
{"current_steps": 51085, "total_steps": 78105, "loss": 0.1293, "lr": 1.6119048664905122e-06, "epoch": 3.270277190960886, "percentage": 65.41, "elapsed_time": "2:15:06", "remaining_time": "1:11:27", "throughput": 19835.65, "total_tokens": 160804224}
|
|
{"current_steps": 51090, "total_steps": 78105, "loss": 0.1281, "lr": 1.611382673933905e-06, "epoch": 3.270597272901863, "percentage": 65.41, "elapsed_time": "2:15:07", "remaining_time": "1:11:27", "throughput": 19835.94, "total_tokens": 160820160}
|
|
{"current_steps": 51095, "total_steps": 78105, "loss": 0.1661, "lr": 1.6108605257502196e-06, "epoch": 3.27091735484284, "percentage": 65.42, "elapsed_time": "2:15:08", "remaining_time": "1:11:26", "throughput": 19836.17, "total_tokens": 160835008}
|
|
{"current_steps": 51100, "total_steps": 78105, "loss": 0.0908, "lr": 1.6103384219655271e-06, "epoch": 3.271237436783817, "percentage": 65.42, "elapsed_time": "2:15:08", "remaining_time": "1:11:25", "throughput": 19836.45, "total_tokens": 160850944}
|
|
{"current_steps": 51105, "total_steps": 78105, "loss": 0.11, "lr": 1.6098163626059e-06, "epoch": 3.2715575187247934, "percentage": 65.43, "elapsed_time": "2:15:09", "remaining_time": "1:11:24", "throughput": 19836.78, "total_tokens": 160867328}
|
|
{"current_steps": 51110, "total_steps": 78105, "loss": 0.1708, "lr": 1.609294347697407e-06, "epoch": 3.2718776006657704, "percentage": 65.44, "elapsed_time": "2:15:10", "remaining_time": "1:11:23", "throughput": 19837.01, "total_tokens": 160881984}
|
|
{"current_steps": 51115, "total_steps": 78105, "loss": 0.2074, "lr": 1.608772377266115e-06, "epoch": 3.2721976826067474, "percentage": 65.44, "elapsed_time": "2:15:10", "remaining_time": "1:11:22", "throughput": 19837.24, "total_tokens": 160896768}
|
|
{"current_steps": 51120, "total_steps": 78105, "loss": 0.1457, "lr": 1.6082504513380886e-06, "epoch": 3.2725177645477244, "percentage": 65.45, "elapsed_time": "2:15:11", "remaining_time": "1:11:21", "throughput": 19837.56, "total_tokens": 160912960}
|
|
{"current_steps": 51125, "total_steps": 78105, "loss": 0.1962, "lr": 1.607728569939389e-06, "epoch": 3.2728378464887014, "percentage": 65.46, "elapsed_time": "2:15:12", "remaining_time": "1:11:21", "throughput": 19837.96, "total_tokens": 160930944}
|
|
{"current_steps": 51130, "total_steps": 78105, "loss": 0.154, "lr": 1.6072067330960777e-06, "epoch": 3.273157928429678, "percentage": 65.46, "elapsed_time": "2:15:12", "remaining_time": "1:11:20", "throughput": 19838.22, "total_tokens": 160945920}
|
|
{"current_steps": 51135, "total_steps": 78105, "loss": 0.1215, "lr": 1.6066849408342112e-06, "epoch": 3.273478010370655, "percentage": 65.47, "elapsed_time": "2:15:13", "remaining_time": "1:11:19", "throughput": 19838.44, "total_tokens": 160960576}
|
|
{"current_steps": 51140, "total_steps": 78105, "loss": 0.1596, "lr": 1.6061631931798454e-06, "epoch": 3.273798092311632, "percentage": 65.48, "elapsed_time": "2:15:14", "remaining_time": "1:11:18", "throughput": 19838.73, "total_tokens": 160976576}
|
|
{"current_steps": 51145, "total_steps": 78105, "loss": 0.2135, "lr": 1.605641490159034e-06, "epoch": 3.2741181742526084, "percentage": 65.48, "elapsed_time": "2:15:14", "remaining_time": "1:11:17", "throughput": 19839.04, "total_tokens": 160992768}
|
|
{"current_steps": 51150, "total_steps": 78105, "loss": 0.1276, "lr": 1.605119831797829e-06, "epoch": 3.2744382561935854, "percentage": 65.49, "elapsed_time": "2:15:15", "remaining_time": "1:11:16", "throughput": 19839.28, "total_tokens": 161008128}
|
|
{"current_steps": 51155, "total_steps": 78105, "loss": 0.1963, "lr": 1.6045982181222772e-06, "epoch": 3.2747583381345624, "percentage": 65.5, "elapsed_time": "2:15:16", "remaining_time": "1:11:15", "throughput": 19839.6, "total_tokens": 161024512}
|
|
{"current_steps": 51160, "total_steps": 78105, "loss": 0.1377, "lr": 1.6040766491584264e-06, "epoch": 3.2750784200755394, "percentage": 65.5, "elapsed_time": "2:15:16", "remaining_time": "1:11:15", "throughput": 19839.83, "total_tokens": 161039232}
|
|
{"current_steps": 51165, "total_steps": 78105, "loss": 0.1584, "lr": 1.6035551249323216e-06, "epoch": 3.2753985020165164, "percentage": 65.51, "elapsed_time": "2:15:17", "remaining_time": "1:11:14", "throughput": 19840.08, "total_tokens": 161054528}
|
|
{"current_steps": 51170, "total_steps": 78105, "loss": 0.1557, "lr": 1.603033645470004e-06, "epoch": 3.275718583957493, "percentage": 65.51, "elapsed_time": "2:15:18", "remaining_time": "1:11:13", "throughput": 19840.33, "total_tokens": 161069312}
|
|
{"current_steps": 51175, "total_steps": 78105, "loss": 0.1792, "lr": 1.602512210797515e-06, "epoch": 3.27603866589847, "percentage": 65.52, "elapsed_time": "2:15:18", "remaining_time": "1:11:12", "throughput": 19840.58, "total_tokens": 161084352}
|
|
{"current_steps": 51180, "total_steps": 78105, "loss": 0.1318, "lr": 1.6019908209408902e-06, "epoch": 3.276358747839447, "percentage": 65.53, "elapsed_time": "2:15:19", "remaining_time": "1:11:11", "throughput": 19840.88, "total_tokens": 161100672}
|
|
{"current_steps": 51185, "total_steps": 78105, "loss": 0.1644, "lr": 1.6014694759261674e-06, "epoch": 3.276678829780424, "percentage": 65.53, "elapsed_time": "2:15:20", "remaining_time": "1:11:10", "throughput": 19841.14, "total_tokens": 161116224}
|
|
{"current_steps": 51190, "total_steps": 78105, "loss": 0.1413, "lr": 1.6009481757793776e-06, "epoch": 3.2769989117214005, "percentage": 65.54, "elapsed_time": "2:15:21", "remaining_time": "1:11:09", "throughput": 19841.46, "total_tokens": 161132608}
|
|
{"current_steps": 51195, "total_steps": 78105, "loss": 0.1782, "lr": 1.6004269205265532e-06, "epoch": 3.2773189936623774, "percentage": 65.55, "elapsed_time": "2:15:21", "remaining_time": "1:11:09", "throughput": 19841.8, "total_tokens": 161149312}
|
|
{"current_steps": 51200, "total_steps": 78105, "loss": 0.1237, "lr": 1.5999057101937227e-06, "epoch": 3.2776390756033544, "percentage": 65.55, "elapsed_time": "2:15:22", "remaining_time": "1:11:08", "throughput": 19842.11, "total_tokens": 161165504}
|
|
{"current_steps": 51205, "total_steps": 78105, "loss": 0.1619, "lr": 1.5993845448069134e-06, "epoch": 3.2779591575443314, "percentage": 65.56, "elapsed_time": "2:15:23", "remaining_time": "1:11:07", "throughput": 19842.35, "total_tokens": 161180800}
|
|
{"current_steps": 51210, "total_steps": 78105, "loss": 0.1661, "lr": 1.5988634243921474e-06, "epoch": 3.2782792394853084, "percentage": 65.57, "elapsed_time": "2:15:23", "remaining_time": "1:11:06", "throughput": 19842.64, "total_tokens": 161196864}
|
|
{"current_steps": 51215, "total_steps": 78105, "loss": 0.1789, "lr": 1.598342348975449e-06, "epoch": 3.278599321426285, "percentage": 65.57, "elapsed_time": "2:15:24", "remaining_time": "1:11:05", "throughput": 19842.9, "total_tokens": 161211776}
|
|
{"current_steps": 51220, "total_steps": 78105, "loss": 0.1678, "lr": 1.5978213185828368e-06, "epoch": 3.278919403367262, "percentage": 65.58, "elapsed_time": "2:15:25", "remaining_time": "1:11:04", "throughput": 19843.16, "total_tokens": 161227008}
|
|
{"current_steps": 51225, "total_steps": 78105, "loss": 0.1313, "lr": 1.5973003332403288e-06, "epoch": 3.279239485308239, "percentage": 65.58, "elapsed_time": "2:15:25", "remaining_time": "1:11:03", "throughput": 19843.46, "total_tokens": 161243200}
|
|
{"current_steps": 51230, "total_steps": 78105, "loss": 0.159, "lr": 1.5967793929739406e-06, "epoch": 3.279559567249216, "percentage": 65.59, "elapsed_time": "2:15:26", "remaining_time": "1:11:03", "throughput": 19843.68, "total_tokens": 161257856}
|
|
{"current_steps": 51235, "total_steps": 78105, "loss": 0.1918, "lr": 1.596258497809684e-06, "epoch": 3.2798796491901925, "percentage": 65.6, "elapsed_time": "2:15:27", "remaining_time": "1:11:02", "throughput": 19843.96, "total_tokens": 161273536}
|
|
{"current_steps": 51240, "total_steps": 78105, "loss": 0.1254, "lr": 1.5957376477735714e-06, "epoch": 3.2801997311311695, "percentage": 65.6, "elapsed_time": "2:15:27", "remaining_time": "1:11:01", "throughput": 19844.19, "total_tokens": 161288192}
|
|
{"current_steps": 51245, "total_steps": 78105, "loss": 0.2384, "lr": 1.5952168428916098e-06, "epoch": 3.2805198130721465, "percentage": 65.61, "elapsed_time": "2:15:28", "remaining_time": "1:11:00", "throughput": 19844.52, "total_tokens": 161304832}
|
|
{"current_steps": 51250, "total_steps": 78105, "loss": 0.1288, "lr": 1.5946960831898068e-06, "epoch": 3.2808398950131235, "percentage": 65.62, "elapsed_time": "2:15:29", "remaining_time": "1:10:59", "throughput": 19844.82, "total_tokens": 161321472}
|
|
{"current_steps": 51255, "total_steps": 78105, "loss": 0.1756, "lr": 1.5941753686941652e-06, "epoch": 3.2811599769541004, "percentage": 65.62, "elapsed_time": "2:15:29", "remaining_time": "1:10:58", "throughput": 19845.11, "total_tokens": 161337152}
|
|
{"current_steps": 51260, "total_steps": 78105, "loss": 0.167, "lr": 1.5936546994306887e-06, "epoch": 3.281480058895077, "percentage": 65.63, "elapsed_time": "2:15:30", "remaining_time": "1:10:57", "throughput": 19845.42, "total_tokens": 161353472}
|
|
{"current_steps": 51265, "total_steps": 78105, "loss": 0.2041, "lr": 1.5931340754253743e-06, "epoch": 3.281800140836054, "percentage": 65.64, "elapsed_time": "2:15:31", "remaining_time": "1:10:57", "throughput": 19845.67, "total_tokens": 161368576}
|
|
{"current_steps": 51270, "total_steps": 78105, "loss": 0.2391, "lr": 1.5926134967042218e-06, "epoch": 3.282120222777031, "percentage": 65.64, "elapsed_time": "2:15:31", "remaining_time": "1:10:56", "throughput": 19845.98, "total_tokens": 161385024}
|
|
{"current_steps": 51275, "total_steps": 78105, "loss": 0.2544, "lr": 1.5920929632932233e-06, "epoch": 3.282440304718008, "percentage": 65.65, "elapsed_time": "2:15:32", "remaining_time": "1:10:55", "throughput": 19846.3, "total_tokens": 161401664}
|
|
{"current_steps": 51280, "total_steps": 78105, "loss": 0.2, "lr": 1.5915724752183733e-06, "epoch": 3.2827603866589845, "percentage": 65.66, "elapsed_time": "2:15:33", "remaining_time": "1:10:54", "throughput": 19846.51, "total_tokens": 161416000}
|
|
{"current_steps": 51285, "total_steps": 78105, "loss": 0.1225, "lr": 1.5910520325056627e-06, "epoch": 3.2830804685999615, "percentage": 65.66, "elapsed_time": "2:15:33", "remaining_time": "1:10:53", "throughput": 19846.78, "total_tokens": 161431424}
|
|
{"current_steps": 51290, "total_steps": 78105, "loss": 0.142, "lr": 1.5905316351810784e-06, "epoch": 3.2834005505409385, "percentage": 65.67, "elapsed_time": "2:15:34", "remaining_time": "1:10:52", "throughput": 19847.03, "total_tokens": 161446272}
|
|
{"current_steps": 51295, "total_steps": 78105, "loss": 0.1649, "lr": 1.5900112832706084e-06, "epoch": 3.2837206324819155, "percentage": 65.67, "elapsed_time": "2:15:35", "remaining_time": "1:10:51", "throughput": 19847.3, "total_tokens": 161461696}
|
|
{"current_steps": 51300, "total_steps": 78105, "loss": 0.1297, "lr": 1.5894909768002334e-06, "epoch": 3.2840407144228925, "percentage": 65.68, "elapsed_time": "2:15:35", "remaining_time": "1:10:51", "throughput": 19847.54, "total_tokens": 161476736}
|
|
{"current_steps": 51305, "total_steps": 78105, "loss": 0.1668, "lr": 1.5889707157959373e-06, "epoch": 3.284360796363869, "percentage": 65.69, "elapsed_time": "2:15:36", "remaining_time": "1:10:50", "throughput": 19847.75, "total_tokens": 161490944}
|
|
{"current_steps": 51310, "total_steps": 78105, "loss": 0.0876, "lr": 1.5884505002836975e-06, "epoch": 3.284680878304846, "percentage": 65.69, "elapsed_time": "2:15:37", "remaining_time": "1:10:49", "throughput": 19848.01, "total_tokens": 161505792}
|
|
{"current_steps": 51315, "total_steps": 78105, "loss": 0.202, "lr": 1.5879303302894928e-06, "epoch": 3.285000960245823, "percentage": 65.7, "elapsed_time": "2:15:37", "remaining_time": "1:10:48", "throughput": 19848.34, "total_tokens": 161522624}
|
|
{"current_steps": 51320, "total_steps": 78105, "loss": 0.2409, "lr": 1.5874102058392954e-06, "epoch": 3.2853210421868, "percentage": 65.71, "elapsed_time": "2:15:38", "remaining_time": "1:10:47", "throughput": 19848.59, "total_tokens": 161537152}
|
|
{"current_steps": 51325, "total_steps": 78105, "loss": 0.1001, "lr": 1.5868901269590797e-06, "epoch": 3.2856411241277765, "percentage": 65.71, "elapsed_time": "2:15:39", "remaining_time": "1:10:46", "throughput": 19848.88, "total_tokens": 161552832}
|
|
{"current_steps": 51330, "total_steps": 78105, "loss": 0.1714, "lr": 1.5863700936748142e-06, "epoch": 3.2859612060687535, "percentage": 65.72, "elapsed_time": "2:15:39", "remaining_time": "1:10:45", "throughput": 19849.19, "total_tokens": 161569024}
|
|
{"current_steps": 51335, "total_steps": 78105, "loss": 0.1252, "lr": 1.585850106012467e-06, "epoch": 3.2862812880097305, "percentage": 65.73, "elapsed_time": "2:15:40", "remaining_time": "1:10:45", "throughput": 19849.43, "total_tokens": 161584000}
|
|
{"current_steps": 51340, "total_steps": 78105, "loss": 0.2069, "lr": 1.5853301639980046e-06, "epoch": 3.2866013699507075, "percentage": 65.73, "elapsed_time": "2:15:41", "remaining_time": "1:10:44", "throughput": 19849.73, "total_tokens": 161600128}
|
|
{"current_steps": 51345, "total_steps": 78105, "loss": 0.17, "lr": 1.584810267657389e-06, "epoch": 3.2869214518916845, "percentage": 65.74, "elapsed_time": "2:15:41", "remaining_time": "1:10:43", "throughput": 19850.02, "total_tokens": 161615744}
|
|
{"current_steps": 51350, "total_steps": 78105, "loss": 0.1748, "lr": 1.5842904170165825e-06, "epoch": 3.287241533832661, "percentage": 65.74, "elapsed_time": "2:15:42", "remaining_time": "1:10:42", "throughput": 19850.28, "total_tokens": 161631040}
|
|
{"current_steps": 51355, "total_steps": 78105, "loss": 0.1542, "lr": 1.5837706121015418e-06, "epoch": 3.287561615773638, "percentage": 65.75, "elapsed_time": "2:15:43", "remaining_time": "1:10:41", "throughput": 19850.59, "total_tokens": 161647424}
|
|
{"current_steps": 51360, "total_steps": 78105, "loss": 0.1548, "lr": 1.5832508529382251e-06, "epoch": 3.287881697714615, "percentage": 65.76, "elapsed_time": "2:15:43", "remaining_time": "1:10:40", "throughput": 19850.88, "total_tokens": 161663424}
|
|
{"current_steps": 51365, "total_steps": 78105, "loss": 0.2099, "lr": 1.582731139552585e-06, "epoch": 3.288201779655592, "percentage": 65.76, "elapsed_time": "2:15:44", "remaining_time": "1:10:39", "throughput": 19851.14, "total_tokens": 161678720}
|
|
{"current_steps": 51370, "total_steps": 78105, "loss": 0.1522, "lr": 1.5822114719705738e-06, "epoch": 3.2885218615965686, "percentage": 65.77, "elapsed_time": "2:15:45", "remaining_time": "1:10:39", "throughput": 19851.42, "total_tokens": 161694720}
|
|
{"current_steps": 51375, "total_steps": 78105, "loss": 0.2042, "lr": 1.5816918502181412e-06, "epoch": 3.2888419435375456, "percentage": 65.78, "elapsed_time": "2:15:45", "remaining_time": "1:10:38", "throughput": 19851.72, "total_tokens": 161711104}
|
|
{"current_steps": 51380, "total_steps": 78105, "loss": 0.1617, "lr": 1.5811722743212351e-06, "epoch": 3.2891620254785225, "percentage": 65.78, "elapsed_time": "2:15:46", "remaining_time": "1:10:37", "throughput": 19852.02, "total_tokens": 161727168}
|
|
{"current_steps": 51385, "total_steps": 78105, "loss": 0.1495, "lr": 1.5806527443057987e-06, "epoch": 3.2894821074194995, "percentage": 65.79, "elapsed_time": "2:15:47", "remaining_time": "1:10:36", "throughput": 19852.29, "total_tokens": 161742656}
|
|
{"current_steps": 51390, "total_steps": 78105, "loss": 0.1643, "lr": 1.580133260197776e-06, "epoch": 3.2898021893604765, "percentage": 65.8, "elapsed_time": "2:15:48", "remaining_time": "1:10:35", "throughput": 19852.6, "total_tokens": 161759104}
|
|
{"current_steps": 51395, "total_steps": 78105, "loss": 0.1352, "lr": 1.5796138220231065e-06, "epoch": 3.290122271301453, "percentage": 65.8, "elapsed_time": "2:15:48", "remaining_time": "1:10:34", "throughput": 19852.88, "total_tokens": 161774720}
|
|
{"current_steps": 51400, "total_steps": 78105, "loss": 0.1548, "lr": 1.5790944298077281e-06, "epoch": 3.29044235324243, "percentage": 65.81, "elapsed_time": "2:15:49", "remaining_time": "1:10:34", "throughput": 19853.16, "total_tokens": 161790336}
|
|
{"current_steps": 51405, "total_steps": 78105, "loss": 0.1835, "lr": 1.5785750835775788e-06, "epoch": 3.290762435183407, "percentage": 65.82, "elapsed_time": "2:15:50", "remaining_time": "1:10:33", "throughput": 19853.41, "total_tokens": 161805568}
|
|
{"current_steps": 51410, "total_steps": 78105, "loss": 0.2432, "lr": 1.5780557833585886e-06, "epoch": 3.2910825171243836, "percentage": 65.82, "elapsed_time": "2:15:50", "remaining_time": "1:10:32", "throughput": 19853.69, "total_tokens": 161821376}
|
|
{"current_steps": 51415, "total_steps": 78105, "loss": 0.1006, "lr": 1.577536529176692e-06, "epoch": 3.2914025990653606, "percentage": 65.83, "elapsed_time": "2:15:51", "remaining_time": "1:10:31", "throughput": 19853.96, "total_tokens": 161836992}
|
|
{"current_steps": 51420, "total_steps": 78105, "loss": 0.2084, "lr": 1.5770173210578146e-06, "epoch": 3.2917226810063376, "percentage": 65.83, "elapsed_time": "2:15:52", "remaining_time": "1:10:30", "throughput": 19854.24, "total_tokens": 161852608}
|
|
{"current_steps": 51425, "total_steps": 78105, "loss": 0.1015, "lr": 1.5764981590278854e-06, "epoch": 3.2920427629473146, "percentage": 65.84, "elapsed_time": "2:15:52", "remaining_time": "1:10:29", "throughput": 19854.52, "total_tokens": 161868480}
|
|
{"current_steps": 51430, "total_steps": 78105, "loss": 0.1725, "lr": 1.5759790431128273e-06, "epoch": 3.2923628448882916, "percentage": 65.85, "elapsed_time": "2:15:53", "remaining_time": "1:10:28", "throughput": 19854.81, "total_tokens": 161884416}
|
|
{"current_steps": 51435, "total_steps": 78105, "loss": 0.1928, "lr": 1.5754599733385635e-06, "epoch": 3.292682926829268, "percentage": 65.85, "elapsed_time": "2:15:54", "remaining_time": "1:10:28", "throughput": 19855.11, "total_tokens": 161900288}
|
|
{"current_steps": 51440, "total_steps": 78105, "loss": 0.1744, "lr": 1.574940949731012e-06, "epoch": 3.293003008770245, "percentage": 65.86, "elapsed_time": "2:15:54", "remaining_time": "1:10:27", "throughput": 19855.38, "total_tokens": 161915712}
|
|
{"current_steps": 51445, "total_steps": 78105, "loss": 0.1224, "lr": 1.5744219723160913e-06, "epoch": 3.293323090711222, "percentage": 65.87, "elapsed_time": "2:15:55", "remaining_time": "1:10:26", "throughput": 19855.67, "total_tokens": 161931648}
|
|
{"current_steps": 51450, "total_steps": 78105, "loss": 0.3029, "lr": 1.5739030411197162e-06, "epoch": 3.293643172652199, "percentage": 65.87, "elapsed_time": "2:15:56", "remaining_time": "1:10:25", "throughput": 19855.94, "total_tokens": 161947072}
|
|
{"current_steps": 51455, "total_steps": 78105, "loss": 0.2028, "lr": 1.573384156167799e-06, "epoch": 3.2939632545931756, "percentage": 65.88, "elapsed_time": "2:15:56", "remaining_time": "1:10:24", "throughput": 19856.16, "total_tokens": 161961472}
|
|
{"current_steps": 51460, "total_steps": 78105, "loss": 0.2772, "lr": 1.5728653174862516e-06, "epoch": 3.2942833365341526, "percentage": 65.89, "elapsed_time": "2:15:57", "remaining_time": "1:10:23", "throughput": 19856.39, "total_tokens": 161976320}
|
|
{"current_steps": 51465, "total_steps": 78105, "loss": 0.2452, "lr": 1.5723465251009797e-06, "epoch": 3.2946034184751296, "percentage": 65.89, "elapsed_time": "2:15:58", "remaining_time": "1:10:22", "throughput": 19856.65, "total_tokens": 161991488}
|
|
{"current_steps": 51470, "total_steps": 78105, "loss": 0.1274, "lr": 1.5718277790378916e-06, "epoch": 3.2949235004161066, "percentage": 65.9, "elapsed_time": "2:15:58", "remaining_time": "1:10:22", "throughput": 19856.96, "total_tokens": 162007552}
|
|
{"current_steps": 51475, "total_steps": 78105, "loss": 0.1282, "lr": 1.5713090793228886e-06, "epoch": 3.2952435823570836, "percentage": 65.9, "elapsed_time": "2:15:59", "remaining_time": "1:10:21", "throughput": 19857.24, "total_tokens": 162023616}
|
|
{"current_steps": 51480, "total_steps": 78105, "loss": 0.1591, "lr": 1.5707904259818731e-06, "epoch": 3.29556366429806, "percentage": 65.91, "elapsed_time": "2:16:00", "remaining_time": "1:10:20", "throughput": 19857.5, "total_tokens": 162038848}
|
|
{"current_steps": 51485, "total_steps": 78105, "loss": 0.1127, "lr": 1.5702718190407435e-06, "epoch": 3.295883746239037, "percentage": 65.92, "elapsed_time": "2:16:00", "remaining_time": "1:10:19", "throughput": 19857.76, "total_tokens": 162053824}
|
|
{"current_steps": 51490, "total_steps": 78105, "loss": 0.1609, "lr": 1.5697532585253972e-06, "epoch": 3.296203828180014, "percentage": 65.92, "elapsed_time": "2:16:01", "remaining_time": "1:10:18", "throughput": 19858.03, "total_tokens": 162069120}
|
|
{"current_steps": 51495, "total_steps": 78105, "loss": 0.1923, "lr": 1.5692347444617267e-06, "epoch": 3.296523910120991, "percentage": 65.93, "elapsed_time": "2:16:02", "remaining_time": "1:10:17", "throughput": 19858.31, "total_tokens": 162084800}
|
|
{"current_steps": 51500, "total_steps": 78105, "loss": 0.1272, "lr": 1.5687162768756259e-06, "epoch": 3.2968439920619677, "percentage": 65.94, "elapsed_time": "2:16:02", "remaining_time": "1:10:16", "throughput": 19858.6, "total_tokens": 162100992}
|
|
{"current_steps": 51505, "total_steps": 78105, "loss": 0.1235, "lr": 1.5681978557929823e-06, "epoch": 3.2971640740029446, "percentage": 65.94, "elapsed_time": "2:16:03", "remaining_time": "1:10:16", "throughput": 19858.85, "total_tokens": 162115904}
|
|
{"current_steps": 51510, "total_steps": 78105, "loss": 0.2148, "lr": 1.5676794812396843e-06, "epoch": 3.2974841559439216, "percentage": 65.95, "elapsed_time": "2:16:04", "remaining_time": "1:10:15", "throughput": 19859.13, "total_tokens": 162131712}
|
|
{"current_steps": 51515, "total_steps": 78105, "loss": 0.1377, "lr": 1.567161153241617e-06, "epoch": 3.2978042378848986, "percentage": 65.96, "elapsed_time": "2:16:04", "remaining_time": "1:10:14", "throughput": 19859.42, "total_tokens": 162147584}
|
|
{"current_steps": 51520, "total_steps": 78105, "loss": 0.1467, "lr": 1.5666428718246623e-06, "epoch": 3.2981243198258756, "percentage": 65.96, "elapsed_time": "2:16:05", "remaining_time": "1:10:13", "throughput": 19859.67, "total_tokens": 162162496}
|
|
{"current_steps": 51525, "total_steps": 78105, "loss": 0.2769, "lr": 1.566124637014702e-06, "epoch": 3.298444401766852, "percentage": 65.97, "elapsed_time": "2:16:06", "remaining_time": "1:10:12", "throughput": 19859.9, "total_tokens": 162177408}
|
|
{"current_steps": 51530, "total_steps": 78105, "loss": 0.1259, "lr": 1.5656064488376122e-06, "epoch": 3.298764483707829, "percentage": 65.98, "elapsed_time": "2:16:06", "remaining_time": "1:10:11", "throughput": 19860.2, "total_tokens": 162193216}
|
|
{"current_steps": 51535, "total_steps": 78105, "loss": 0.1456, "lr": 1.5650883073192697e-06, "epoch": 3.299084565648806, "percentage": 65.98, "elapsed_time": "2:16:07", "remaining_time": "1:10:10", "throughput": 19860.47, "total_tokens": 162208960}
|
|
{"current_steps": 51540, "total_steps": 78105, "loss": 0.154, "lr": 1.5645702124855466e-06, "epoch": 3.299404647589783, "percentage": 65.99, "elapsed_time": "2:16:08", "remaining_time": "1:10:10", "throughput": 19860.86, "total_tokens": 162226944}
|
|
{"current_steps": 51545, "total_steps": 78105, "loss": 0.2134, "lr": 1.5640521643623157e-06, "epoch": 3.2997247295307597, "percentage": 65.99, "elapsed_time": "2:16:08", "remaining_time": "1:10:09", "throughput": 19861.15, "total_tokens": 162242816}
|
|
{"current_steps": 51550, "total_steps": 78105, "loss": 0.1578, "lr": 1.5635341629754435e-06, "epoch": 3.3000448114717367, "percentage": 66.0, "elapsed_time": "2:16:09", "remaining_time": "1:10:08", "throughput": 19861.44, "total_tokens": 162258880}
|
|
{"current_steps": 51555, "total_steps": 78105, "loss": 0.2403, "lr": 1.5630162083507983e-06, "epoch": 3.3003648934127137, "percentage": 66.01, "elapsed_time": "2:16:10", "remaining_time": "1:10:07", "throughput": 19861.73, "total_tokens": 162274752}
|
|
{"current_steps": 51560, "total_steps": 78105, "loss": 0.2098, "lr": 1.5624983005142422e-06, "epoch": 3.3006849753536907, "percentage": 66.01, "elapsed_time": "2:16:10", "remaining_time": "1:10:06", "throughput": 19861.97, "total_tokens": 162289856}
|
|
{"current_steps": 51565, "total_steps": 78105, "loss": 0.211, "lr": 1.5619804394916382e-06, "epoch": 3.3010050572946676, "percentage": 66.02, "elapsed_time": "2:16:11", "remaining_time": "1:10:05", "throughput": 19862.25, "total_tokens": 162305856}
|
|
{"current_steps": 51570, "total_steps": 78105, "loss": 0.1482, "lr": 1.5614626253088444e-06, "epoch": 3.301325139235644, "percentage": 66.03, "elapsed_time": "2:16:12", "remaining_time": "1:10:04", "throughput": 19862.54, "total_tokens": 162321920}
|
|
{"current_steps": 51575, "total_steps": 78105, "loss": 0.2047, "lr": 1.5609448579917186e-06, "epoch": 3.301645221176621, "percentage": 66.03, "elapsed_time": "2:16:12", "remaining_time": "1:10:04", "throughput": 19862.85, "total_tokens": 162338304}
|
|
{"current_steps": 51580, "total_steps": 78105, "loss": 0.1453, "lr": 1.5604271375661162e-06, "epoch": 3.301965303117598, "percentage": 66.04, "elapsed_time": "2:16:13", "remaining_time": "1:10:03", "throughput": 19863.06, "total_tokens": 162352768}
|
|
{"current_steps": 51585, "total_steps": 78105, "loss": 0.2241, "lr": 1.5599094640578876e-06, "epoch": 3.302285385058575, "percentage": 66.05, "elapsed_time": "2:16:14", "remaining_time": "1:10:02", "throughput": 19863.41, "total_tokens": 162369536}
|
|
{"current_steps": 51590, "total_steps": 78105, "loss": 0.1666, "lr": 1.5593918374928846e-06, "epoch": 3.3026054669995517, "percentage": 66.05, "elapsed_time": "2:16:14", "remaining_time": "1:10:01", "throughput": 19863.65, "total_tokens": 162384704}
|
|
{"current_steps": 51595, "total_steps": 78105, "loss": 0.2035, "lr": 1.5588742578969523e-06, "epoch": 3.3029255489405287, "percentage": 66.06, "elapsed_time": "2:16:15", "remaining_time": "1:10:00", "throughput": 19863.95, "total_tokens": 162400832}
|
|
{"current_steps": 51600, "total_steps": 78105, "loss": 0.125, "lr": 1.5583567252959387e-06, "epoch": 3.3032456308815057, "percentage": 66.06, "elapsed_time": "2:16:16", "remaining_time": "1:09:59", "throughput": 19864.18, "total_tokens": 162415552}
|
|
{"current_steps": 51605, "total_steps": 78105, "loss": 0.1395, "lr": 1.5578392397156845e-06, "epoch": 3.3035657128224827, "percentage": 66.07, "elapsed_time": "2:16:16", "remaining_time": "1:09:59", "throughput": 19864.47, "total_tokens": 162431680}
|
|
{"current_steps": 51610, "total_steps": 78105, "loss": 0.2007, "lr": 1.5573218011820317e-06, "epoch": 3.3038857947634597, "percentage": 66.08, "elapsed_time": "2:16:17", "remaining_time": "1:09:58", "throughput": 19864.73, "total_tokens": 162446912}
|
|
{"current_steps": 51615, "total_steps": 78105, "loss": 0.1321, "lr": 1.5568044097208177e-06, "epoch": 3.304205876704436, "percentage": 66.08, "elapsed_time": "2:16:18", "remaining_time": "1:09:57", "throughput": 19865.08, "total_tokens": 162463744}
|
|
{"current_steps": 51620, "total_steps": 78105, "loss": 0.1678, "lr": 1.5562870653578788e-06, "epoch": 3.304525958645413, "percentage": 66.09, "elapsed_time": "2:16:19", "remaining_time": "1:09:56", "throughput": 19865.36, "total_tokens": 162479232}
|
|
{"current_steps": 51625, "total_steps": 78105, "loss": 0.1444, "lr": 1.5557697681190475e-06, "epoch": 3.30484604058639, "percentage": 66.1, "elapsed_time": "2:16:19", "remaining_time": "1:09:55", "throughput": 19865.63, "total_tokens": 162494464}
|
|
{"current_steps": 51630, "total_steps": 78105, "loss": 0.1948, "lr": 1.555252518030156e-06, "epoch": 3.305166122527367, "percentage": 66.1, "elapsed_time": "2:16:20", "remaining_time": "1:09:54", "throughput": 19865.93, "total_tokens": 162510784}
|
|
{"current_steps": 51635, "total_steps": 78105, "loss": 0.1419, "lr": 1.5547353151170333e-06, "epoch": 3.3054862044683437, "percentage": 66.11, "elapsed_time": "2:16:21", "remaining_time": "1:09:53", "throughput": 19866.24, "total_tokens": 162527104}
|
|
{"current_steps": 51640, "total_steps": 78105, "loss": 0.1725, "lr": 1.5542181594055045e-06, "epoch": 3.3058062864093207, "percentage": 66.12, "elapsed_time": "2:16:21", "remaining_time": "1:09:53", "throughput": 19866.59, "total_tokens": 162544064}
|
|
{"current_steps": 51645, "total_steps": 78105, "loss": 0.1908, "lr": 1.5537010509213955e-06, "epoch": 3.3061263683502977, "percentage": 66.12, "elapsed_time": "2:16:22", "remaining_time": "1:09:52", "throughput": 19866.82, "total_tokens": 162558848}
|
|
{"current_steps": 51650, "total_steps": 78105, "loss": 0.1431, "lr": 1.5531839896905257e-06, "epoch": 3.3064464502912747, "percentage": 66.13, "elapsed_time": "2:16:23", "remaining_time": "1:09:51", "throughput": 19867.07, "total_tokens": 162574400}
|
|
{"current_steps": 51655, "total_steps": 78105, "loss": 0.146, "lr": 1.5526669757387164e-06, "epoch": 3.3067665322322517, "percentage": 66.14, "elapsed_time": "2:16:23", "remaining_time": "1:09:50", "throughput": 19867.37, "total_tokens": 162590464}
|
|
{"current_steps": 51660, "total_steps": 78105, "loss": 0.1786, "lr": 1.5521500090917832e-06, "epoch": 3.3070866141732282, "percentage": 66.14, "elapsed_time": "2:16:24", "remaining_time": "1:09:49", "throughput": 19867.74, "total_tokens": 162607424}
|
|
{"current_steps": 51665, "total_steps": 78105, "loss": 0.259, "lr": 1.5516330897755427e-06, "epoch": 3.3074066961142052, "percentage": 66.15, "elapsed_time": "2:16:25", "remaining_time": "1:09:48", "throughput": 19868.04, "total_tokens": 162623296}
|
|
{"current_steps": 51670, "total_steps": 78105, "loss": 0.1724, "lr": 1.5511162178158047e-06, "epoch": 3.3077267780551822, "percentage": 66.15, "elapsed_time": "2:16:25", "remaining_time": "1:09:47", "throughput": 19868.32, "total_tokens": 162639296}
|
|
{"current_steps": 51675, "total_steps": 78105, "loss": 0.1288, "lr": 1.5505993932383803e-06, "epoch": 3.3080468599961588, "percentage": 66.16, "elapsed_time": "2:16:26", "remaining_time": "1:09:47", "throughput": 19868.59, "total_tokens": 162654912}
|
|
{"current_steps": 51680, "total_steps": 78105, "loss": 0.1219, "lr": 1.5500826160690768e-06, "epoch": 3.3083669419371358, "percentage": 66.17, "elapsed_time": "2:16:27", "remaining_time": "1:09:46", "throughput": 19868.83, "total_tokens": 162669568}
|
|
{"current_steps": 51685, "total_steps": 78105, "loss": 0.1946, "lr": 1.5495658863336993e-06, "epoch": 3.3086870238781128, "percentage": 66.17, "elapsed_time": "2:16:27", "remaining_time": "1:09:45", "throughput": 19869.1, "total_tokens": 162685184}
|
|
{"current_steps": 51690, "total_steps": 78105, "loss": 0.1778, "lr": 1.5490492040580521e-06, "epoch": 3.3090071058190897, "percentage": 66.18, "elapsed_time": "2:16:28", "remaining_time": "1:09:44", "throughput": 19869.37, "total_tokens": 162700800}
|
|
{"current_steps": 51695, "total_steps": 78105, "loss": 0.2389, "lr": 1.5485325692679332e-06, "epoch": 3.3093271877600667, "percentage": 66.19, "elapsed_time": "2:16:29", "remaining_time": "1:09:43", "throughput": 19869.61, "total_tokens": 162715712}
|
|
{"current_steps": 51700, "total_steps": 78105, "loss": 0.1923, "lr": 1.5480159819891427e-06, "epoch": 3.3096472697010433, "percentage": 66.19, "elapsed_time": "2:16:29", "remaining_time": "1:09:42", "throughput": 19869.87, "total_tokens": 162731328}
|
|
{"current_steps": 51705, "total_steps": 78105, "loss": 0.1421, "lr": 1.5474994422474745e-06, "epoch": 3.3099673516420203, "percentage": 66.2, "elapsed_time": "2:16:30", "remaining_time": "1:09:41", "throughput": 19870.11, "total_tokens": 162746304}
|
|
{"current_steps": 51710, "total_steps": 78105, "loss": 0.1303, "lr": 1.5469829500687233e-06, "epoch": 3.3102874335829973, "percentage": 66.21, "elapsed_time": "2:16:31", "remaining_time": "1:09:41", "throughput": 19870.41, "total_tokens": 162762368}
|
|
{"current_steps": 51715, "total_steps": 78105, "loss": 0.1704, "lr": 1.5464665054786787e-06, "epoch": 3.3106075155239743, "percentage": 66.21, "elapsed_time": "2:16:31", "remaining_time": "1:09:40", "throughput": 19870.69, "total_tokens": 162777728}
|
|
{"current_steps": 51720, "total_steps": 78105, "loss": 0.0864, "lr": 1.5459501085031314e-06, "epoch": 3.310927597464951, "percentage": 66.22, "elapsed_time": "2:16:32", "remaining_time": "1:09:39", "throughput": 19871.04, "total_tokens": 162794944}
|
|
{"current_steps": 51725, "total_steps": 78105, "loss": 0.1593, "lr": 1.545433759167865e-06, "epoch": 3.311247679405928, "percentage": 66.22, "elapsed_time": "2:16:33", "remaining_time": "1:09:38", "throughput": 19871.3, "total_tokens": 162810304}
|
|
{"current_steps": 51730, "total_steps": 78105, "loss": 0.1378, "lr": 1.5449174574986658e-06, "epoch": 3.311567761346905, "percentage": 66.23, "elapsed_time": "2:16:33", "remaining_time": "1:09:37", "throughput": 19871.59, "total_tokens": 162826176}
|
|
{"current_steps": 51735, "total_steps": 78105, "loss": 0.2269, "lr": 1.5444012035213124e-06, "epoch": 3.3118878432878818, "percentage": 66.24, "elapsed_time": "2:16:34", "remaining_time": "1:09:36", "throughput": 19871.84, "total_tokens": 162841152}
|
|
{"current_steps": 51740, "total_steps": 78105, "loss": 0.1379, "lr": 1.5438849972615863e-06, "epoch": 3.3122079252288588, "percentage": 66.24, "elapsed_time": "2:16:35", "remaining_time": "1:09:36", "throughput": 19872.12, "total_tokens": 162856896}
|
|
{"current_steps": 51745, "total_steps": 78105, "loss": 0.112, "lr": 1.5433688387452627e-06, "epoch": 3.3125280071698353, "percentage": 66.25, "elapsed_time": "2:16:35", "remaining_time": "1:09:35", "throughput": 19872.41, "total_tokens": 162872768}
|
|
{"current_steps": 51750, "total_steps": 78105, "loss": 0.164, "lr": 1.5428527279981161e-06, "epoch": 3.3128480891108123, "percentage": 66.26, "elapsed_time": "2:16:36", "remaining_time": "1:09:34", "throughput": 19872.68, "total_tokens": 162888832}
|
|
{"current_steps": 51755, "total_steps": 78105, "loss": 0.1244, "lr": 1.54233666504592e-06, "epoch": 3.3131681710517893, "percentage": 66.26, "elapsed_time": "2:16:37", "remaining_time": "1:09:33", "throughput": 19872.96, "total_tokens": 162904512}
|
|
{"current_steps": 51760, "total_steps": 78105, "loss": 0.1858, "lr": 1.5418206499144411e-06, "epoch": 3.3134882529927663, "percentage": 66.27, "elapsed_time": "2:16:37", "remaining_time": "1:09:32", "throughput": 19873.23, "total_tokens": 162920000}
|
|
{"current_steps": 51765, "total_steps": 78105, "loss": 0.1148, "lr": 1.541304682629449e-06, "epoch": 3.313808334933743, "percentage": 66.28, "elapsed_time": "2:16:38", "remaining_time": "1:09:31", "throughput": 19873.45, "total_tokens": 162934912}
|
|
{"current_steps": 51770, "total_steps": 78105, "loss": 0.1397, "lr": 1.5407887632167065e-06, "epoch": 3.31412841687472, "percentage": 66.28, "elapsed_time": "2:16:39", "remaining_time": "1:09:30", "throughput": 19873.68, "total_tokens": 162949824}
|
|
{"current_steps": 51775, "total_steps": 78105, "loss": 0.1157, "lr": 1.540272891701978e-06, "epoch": 3.314448498815697, "percentage": 66.29, "elapsed_time": "2:16:39", "remaining_time": "1:09:30", "throughput": 19873.96, "total_tokens": 162965632}
|
|
{"current_steps": 51780, "total_steps": 78105, "loss": 0.2852, "lr": 1.5397570681110214e-06, "epoch": 3.314768580756674, "percentage": 66.3, "elapsed_time": "2:16:40", "remaining_time": "1:09:29", "throughput": 19874.2, "total_tokens": 162980672}
|
|
{"current_steps": 51785, "total_steps": 78105, "loss": 0.1575, "lr": 1.5392412924695958e-06, "epoch": 3.315088662697651, "percentage": 66.3, "elapsed_time": "2:16:41", "remaining_time": "1:09:28", "throughput": 19874.44, "total_tokens": 162995392}
|
|
{"current_steps": 51790, "total_steps": 78105, "loss": 0.1573, "lr": 1.5387255648034545e-06, "epoch": 3.3154087446386273, "percentage": 66.31, "elapsed_time": "2:16:41", "remaining_time": "1:09:27", "throughput": 19874.69, "total_tokens": 163010624}
|
|
{"current_steps": 51795, "total_steps": 78105, "loss": 0.0979, "lr": 1.5382098851383524e-06, "epoch": 3.3157288265796043, "percentage": 66.31, "elapsed_time": "2:16:42", "remaining_time": "1:09:26", "throughput": 19874.97, "total_tokens": 163026304}
|
|
{"current_steps": 51800, "total_steps": 78105, "loss": 0.1413, "lr": 1.537694253500038e-06, "epoch": 3.3160489085205813, "percentage": 66.32, "elapsed_time": "2:16:43", "remaining_time": "1:09:25", "throughput": 19875.3, "total_tokens": 163043136}
|
|
{"current_steps": 51805, "total_steps": 78105, "loss": 0.1279, "lr": 1.5371786699142604e-06, "epoch": 3.3163689904615583, "percentage": 66.33, "elapsed_time": "2:16:43", "remaining_time": "1:09:24", "throughput": 19875.58, "total_tokens": 163058432}
|
|
{"current_steps": 51810, "total_steps": 78105, "loss": 0.1982, "lr": 1.5366631344067657e-06, "epoch": 3.316689072402535, "percentage": 66.33, "elapsed_time": "2:16:44", "remaining_time": "1:09:24", "throughput": 19875.85, "total_tokens": 163074048}
|
|
{"current_steps": 51815, "total_steps": 78105, "loss": 0.1727, "lr": 1.5361476470032949e-06, "epoch": 3.317009154343512, "percentage": 66.34, "elapsed_time": "2:16:45", "remaining_time": "1:09:23", "throughput": 19876.11, "total_tokens": 163089216}
|
|
{"current_steps": 51820, "total_steps": 78105, "loss": 0.1093, "lr": 1.535632207729591e-06, "epoch": 3.317329236284489, "percentage": 66.35, "elapsed_time": "2:16:46", "remaining_time": "1:09:22", "throughput": 19876.44, "total_tokens": 163106176}
|
|
{"current_steps": 51825, "total_steps": 78105, "loss": 0.1487, "lr": 1.5351168166113915e-06, "epoch": 3.317649318225466, "percentage": 66.35, "elapsed_time": "2:16:46", "remaining_time": "1:09:21", "throughput": 19876.75, "total_tokens": 163122816}
|
|
{"current_steps": 51830, "total_steps": 78105, "loss": 0.1041, "lr": 1.5346014736744318e-06, "epoch": 3.317969400166443, "percentage": 66.36, "elapsed_time": "2:16:47", "remaining_time": "1:09:20", "throughput": 19877.01, "total_tokens": 163138240}
|
|
{"current_steps": 51835, "total_steps": 78105, "loss": 0.144, "lr": 1.534086178944446e-06, "epoch": 3.3182894821074194, "percentage": 66.37, "elapsed_time": "2:16:48", "remaining_time": "1:09:19", "throughput": 19877.27, "total_tokens": 163153600}
|
|
{"current_steps": 51840, "total_steps": 78105, "loss": 0.1569, "lr": 1.5335709324471659e-06, "epoch": 3.3186095640483964, "percentage": 66.37, "elapsed_time": "2:16:48", "remaining_time": "1:09:18", "throughput": 19877.53, "total_tokens": 163168832}
|
|
{"current_steps": 51845, "total_steps": 78105, "loss": 0.2149, "lr": 1.5330557342083186e-06, "epoch": 3.3189296459893733, "percentage": 66.38, "elapsed_time": "2:16:49", "remaining_time": "1:09:18", "throughput": 19877.77, "total_tokens": 163183616}
|
|
{"current_steps": 51850, "total_steps": 78105, "loss": 0.2786, "lr": 1.5325405842536317e-06, "epoch": 3.3192497279303503, "percentage": 66.38, "elapsed_time": "2:16:50", "remaining_time": "1:09:17", "throughput": 19878.04, "total_tokens": 163199040}
|
|
{"current_steps": 51855, "total_steps": 78105, "loss": 0.1501, "lr": 1.5320254826088282e-06, "epoch": 3.319569809871327, "percentage": 66.39, "elapsed_time": "2:16:50", "remaining_time": "1:09:16", "throughput": 19878.32, "total_tokens": 163214784}
|
|
{"current_steps": 51860, "total_steps": 78105, "loss": 0.1559, "lr": 1.5315104292996302e-06, "epoch": 3.319889891812304, "percentage": 66.4, "elapsed_time": "2:16:51", "remaining_time": "1:09:15", "throughput": 19878.63, "total_tokens": 163231360}
|
|
{"current_steps": 51865, "total_steps": 78105, "loss": 0.2074, "lr": 1.5309954243517575e-06, "epoch": 3.320209973753281, "percentage": 66.4, "elapsed_time": "2:16:52", "remaining_time": "1:09:14", "throughput": 19878.89, "total_tokens": 163246656}
|
|
{"current_steps": 51870, "total_steps": 78105, "loss": 0.2297, "lr": 1.5304804677909253e-06, "epoch": 3.320530055694258, "percentage": 66.41, "elapsed_time": "2:16:52", "remaining_time": "1:09:13", "throughput": 19879.14, "total_tokens": 163262016}
|
|
{"current_steps": 51875, "total_steps": 78105, "loss": 0.1756, "lr": 1.529965559642849e-06, "epoch": 3.320850137635235, "percentage": 66.42, "elapsed_time": "2:16:53", "remaining_time": "1:09:12", "throughput": 19879.34, "total_tokens": 163276288}
|
|
{"current_steps": 51880, "total_steps": 78105, "loss": 0.1887, "lr": 1.5294506999332392e-06, "epoch": 3.3211702195762114, "percentage": 66.42, "elapsed_time": "2:16:54", "remaining_time": "1:09:12", "throughput": 19879.61, "total_tokens": 163291904}
|
|
{"current_steps": 51885, "total_steps": 78105, "loss": 0.1598, "lr": 1.5289358886878065e-06, "epoch": 3.3214903015171884, "percentage": 66.43, "elapsed_time": "2:16:54", "remaining_time": "1:09:11", "throughput": 19879.81, "total_tokens": 163306304}
|
|
{"current_steps": 51890, "total_steps": 78105, "loss": 0.1919, "lr": 1.5284211259322569e-06, "epoch": 3.3218103834581654, "percentage": 66.44, "elapsed_time": "2:16:55", "remaining_time": "1:09:10", "throughput": 19880.14, "total_tokens": 163323072}
|
|
{"current_steps": 51895, "total_steps": 78105, "loss": 0.1985, "lr": 1.5279064116922964e-06, "epoch": 3.3221304653991424, "percentage": 66.44, "elapsed_time": "2:16:56", "remaining_time": "1:09:09", "throughput": 19880.34, "total_tokens": 163337344}
|
|
{"current_steps": 51900, "total_steps": 78105, "loss": 0.1853, "lr": 1.5273917459936255e-06, "epoch": 3.322450547340119, "percentage": 66.45, "elapsed_time": "2:16:56", "remaining_time": "1:09:08", "throughput": 19880.68, "total_tokens": 163353984}
|
|
{"current_steps": 51905, "total_steps": 78105, "loss": 0.1524, "lr": 1.526877128861945e-06, "epoch": 3.322770629281096, "percentage": 66.46, "elapsed_time": "2:16:57", "remaining_time": "1:09:07", "throughput": 19880.92, "total_tokens": 163368960}
|
|
{"current_steps": 51910, "total_steps": 78105, "loss": 0.2759, "lr": 1.5263625603229515e-06, "epoch": 3.323090711222073, "percentage": 66.46, "elapsed_time": "2:16:58", "remaining_time": "1:09:07", "throughput": 19881.15, "total_tokens": 163383616}
|
|
{"current_steps": 51915, "total_steps": 78105, "loss": 0.1216, "lr": 1.5258480404023413e-06, "epoch": 3.32341079316305, "percentage": 66.47, "elapsed_time": "2:16:58", "remaining_time": "1:09:06", "throughput": 19881.38, "total_tokens": 163398464}
|
|
{"current_steps": 51920, "total_steps": 78105, "loss": 0.2778, "lr": 1.5253335691258049e-06, "epoch": 3.323730875104027, "percentage": 66.47, "elapsed_time": "2:16:59", "remaining_time": "1:09:05", "throughput": 19881.61, "total_tokens": 163413504}
|
|
{"current_steps": 51925, "total_steps": 78105, "loss": 0.1836, "lr": 1.5248191465190326e-06, "epoch": 3.3240509570450034, "percentage": 66.48, "elapsed_time": "2:17:00", "remaining_time": "1:09:04", "throughput": 19881.92, "total_tokens": 163429888}
|
|
{"current_steps": 51930, "total_steps": 78105, "loss": 0.1867, "lr": 1.524304772607714e-06, "epoch": 3.3243710389859804, "percentage": 66.49, "elapsed_time": "2:17:00", "remaining_time": "1:09:03", "throughput": 19882.12, "total_tokens": 163444288}
|
|
{"current_steps": 51935, "total_steps": 78105, "loss": 0.1265, "lr": 1.5237904474175319e-06, "epoch": 3.3246911209269574, "percentage": 66.49, "elapsed_time": "2:17:01", "remaining_time": "1:09:02", "throughput": 19882.47, "total_tokens": 163461504}
|
|
{"current_steps": 51940, "total_steps": 78105, "loss": 0.1699, "lr": 1.5232761709741703e-06, "epoch": 3.325011202867934, "percentage": 66.5, "elapsed_time": "2:17:02", "remaining_time": "1:09:01", "throughput": 19882.79, "total_tokens": 163477952}
|
|
{"current_steps": 51945, "total_steps": 78105, "loss": 0.3431, "lr": 1.5227619433033087e-06, "epoch": 3.325331284808911, "percentage": 66.51, "elapsed_time": "2:17:02", "remaining_time": "1:09:01", "throughput": 19883.07, "total_tokens": 163493760}
|
|
{"current_steps": 51950, "total_steps": 78105, "loss": 0.1206, "lr": 1.5222477644306265e-06, "epoch": 3.325651366749888, "percentage": 66.51, "elapsed_time": "2:17:03", "remaining_time": "1:09:00", "throughput": 19883.34, "total_tokens": 163509568}
|
|
{"current_steps": 51955, "total_steps": 78105, "loss": 0.1551, "lr": 1.5217336343817967e-06, "epoch": 3.325971448690865, "percentage": 66.52, "elapsed_time": "2:17:04", "remaining_time": "1:08:59", "throughput": 19883.65, "total_tokens": 163525888}
|
|
{"current_steps": 51960, "total_steps": 78105, "loss": 0.2371, "lr": 1.5212195531824952e-06, "epoch": 3.326291530631842, "percentage": 66.53, "elapsed_time": "2:17:04", "remaining_time": "1:08:58", "throughput": 19883.95, "total_tokens": 163542016}
|
|
{"current_steps": 51965, "total_steps": 78105, "loss": 0.1392, "lr": 1.5207055208583893e-06, "epoch": 3.3266116125728185, "percentage": 66.53, "elapsed_time": "2:17:05", "remaining_time": "1:08:57", "throughput": 19884.33, "total_tokens": 163559872}
|
|
{"current_steps": 51970, "total_steps": 78105, "loss": 0.2296, "lr": 1.5201915374351498e-06, "epoch": 3.3269316945137954, "percentage": 66.54, "elapsed_time": "2:17:06", "remaining_time": "1:08:56", "throughput": 19884.67, "total_tokens": 163576832}
|
|
{"current_steps": 51975, "total_steps": 78105, "loss": 0.1343, "lr": 1.5196776029384402e-06, "epoch": 3.3272517764547724, "percentage": 66.55, "elapsed_time": "2:17:06", "remaining_time": "1:08:56", "throughput": 19884.93, "total_tokens": 163592448}
|
|
{"current_steps": 51980, "total_steps": 78105, "loss": 0.1818, "lr": 1.5191637173939254e-06, "epoch": 3.3275718583957494, "percentage": 66.55, "elapsed_time": "2:17:07", "remaining_time": "1:08:55", "throughput": 19885.17, "total_tokens": 163607680}
|
|
{"current_steps": 51985, "total_steps": 78105, "loss": 0.102, "lr": 1.5186498808272662e-06, "epoch": 3.327891940336726, "percentage": 66.56, "elapsed_time": "2:17:08", "remaining_time": "1:08:54", "throughput": 19885.43, "total_tokens": 163623104}
|
|
{"current_steps": 51990, "total_steps": 78105, "loss": 0.2705, "lr": 1.5181360932641194e-06, "epoch": 3.328212022277703, "percentage": 66.56, "elapsed_time": "2:17:08", "remaining_time": "1:08:53", "throughput": 19885.67, "total_tokens": 163638272}
|
|
{"current_steps": 51995, "total_steps": 78105, "loss": 0.1754, "lr": 1.5176223547301423e-06, "epoch": 3.32853210421868, "percentage": 66.57, "elapsed_time": "2:17:09", "remaining_time": "1:08:52", "throughput": 19885.97, "total_tokens": 163654720}
|
|
{"current_steps": 52000, "total_steps": 78105, "loss": 0.1243, "lr": 1.517108665250987e-06, "epoch": 3.328852186159657, "percentage": 66.58, "elapsed_time": "2:17:10", "remaining_time": "1:08:51", "throughput": 19886.24, "total_tokens": 163670400}
|
|
{"current_steps": 52005, "total_steps": 78105, "loss": 0.1807, "lr": 1.5165950248523065e-06, "epoch": 3.329172268100634, "percentage": 66.58, "elapsed_time": "2:17:11", "remaining_time": "1:08:50", "throughput": 19886.54, "total_tokens": 163686400}
|
|
{"current_steps": 52010, "total_steps": 78105, "loss": 0.1709, "lr": 1.5160814335597468e-06, "epoch": 3.3294923500416105, "percentage": 66.59, "elapsed_time": "2:17:11", "remaining_time": "1:08:50", "throughput": 19886.81, "total_tokens": 163702208}
|
|
{"current_steps": 52015, "total_steps": 78105, "loss": 0.1957, "lr": 1.5155678913989568e-06, "epoch": 3.3298124319825875, "percentage": 66.6, "elapsed_time": "2:17:12", "remaining_time": "1:08:49", "throughput": 19887.15, "total_tokens": 163719296}
|
|
{"current_steps": 52020, "total_steps": 78105, "loss": 0.2161, "lr": 1.5150543983955774e-06, "epoch": 3.3301325139235645, "percentage": 66.6, "elapsed_time": "2:17:13", "remaining_time": "1:08:48", "throughput": 19887.45, "total_tokens": 163735616}
|
|
{"current_steps": 52025, "total_steps": 78105, "loss": 0.2133, "lr": 1.514540954575251e-06, "epoch": 3.3304525958645415, "percentage": 66.61, "elapsed_time": "2:17:13", "remaining_time": "1:08:47", "throughput": 19887.65, "total_tokens": 163749952}
|
|
{"current_steps": 52030, "total_steps": 78105, "loss": 0.1288, "lr": 1.5140275599636162e-06, "epoch": 3.330772677805518, "percentage": 66.62, "elapsed_time": "2:17:14", "remaining_time": "1:08:46", "throughput": 19887.94, "total_tokens": 163765952}
|
|
{"current_steps": 52035, "total_steps": 78105, "loss": 0.1351, "lr": 1.513514214586309e-06, "epoch": 3.331092759746495, "percentage": 66.62, "elapsed_time": "2:17:15", "remaining_time": "1:08:45", "throughput": 19888.17, "total_tokens": 163781056}
|
|
{"current_steps": 52040, "total_steps": 78105, "loss": 0.15, "lr": 1.5130009184689648e-06, "epoch": 3.331412841687472, "percentage": 66.63, "elapsed_time": "2:17:15", "remaining_time": "1:08:44", "throughput": 19888.38, "total_tokens": 163795584}
|
|
{"current_steps": 52045, "total_steps": 78105, "loss": 0.2191, "lr": 1.5124876716372127e-06, "epoch": 3.331732923628449, "percentage": 66.63, "elapsed_time": "2:17:16", "remaining_time": "1:08:44", "throughput": 19888.66, "total_tokens": 163811392}
|
|
{"current_steps": 52050, "total_steps": 78105, "loss": 0.1737, "lr": 1.511974474116683e-06, "epoch": 3.332053005569426, "percentage": 66.64, "elapsed_time": "2:17:17", "remaining_time": "1:08:43", "throughput": 19888.97, "total_tokens": 163827904}
|
|
{"current_steps": 52055, "total_steps": 78105, "loss": 0.1451, "lr": 1.5114613259330007e-06, "epoch": 3.3323730875104025, "percentage": 66.65, "elapsed_time": "2:17:17", "remaining_time": "1:08:42", "throughput": 19889.21, "total_tokens": 163843072}
|
|
{"current_steps": 52060, "total_steps": 78105, "loss": 0.1819, "lr": 1.510948227111792e-06, "epoch": 3.3326931694513795, "percentage": 66.65, "elapsed_time": "2:17:18", "remaining_time": "1:08:41", "throughput": 19889.46, "total_tokens": 163858240}
|
|
{"current_steps": 52065, "total_steps": 78105, "loss": 0.1353, "lr": 1.5104351776786762e-06, "epoch": 3.3330132513923565, "percentage": 66.66, "elapsed_time": "2:17:19", "remaining_time": "1:08:40", "throughput": 19889.75, "total_tokens": 163874176}
|
|
{"current_steps": 52070, "total_steps": 78105, "loss": 0.147, "lr": 1.509922177659274e-06, "epoch": 3.3333333333333335, "percentage": 66.67, "elapsed_time": "2:17:19", "remaining_time": "1:08:39", "throughput": 19890.05, "total_tokens": 163890176}
|
|
{"current_steps": 52075, "total_steps": 78105, "loss": 0.1087, "lr": 1.5094092270792001e-06, "epoch": 3.33365341527431, "percentage": 66.67, "elapsed_time": "2:17:20", "remaining_time": "1:08:39", "throughput": 19890.3, "total_tokens": 163905472}
|
|
{"current_steps": 52080, "total_steps": 78105, "loss": 0.2845, "lr": 1.5088963259640702e-06, "epoch": 3.333973497215287, "percentage": 66.68, "elapsed_time": "2:17:21", "remaining_time": "1:08:38", "throughput": 19890.59, "total_tokens": 163921600}
|
|
{"current_steps": 52085, "total_steps": 78105, "loss": 0.1563, "lr": 1.5083834743394948e-06, "epoch": 3.334293579156264, "percentage": 66.69, "elapsed_time": "2:17:21", "remaining_time": "1:08:37", "throughput": 19890.86, "total_tokens": 163937280}
|
|
{"current_steps": 52090, "total_steps": 78105, "loss": 0.1064, "lr": 1.5078706722310843e-06, "epoch": 3.334613661097241, "percentage": 66.69, "elapsed_time": "2:17:22", "remaining_time": "1:08:36", "throughput": 19891.13, "total_tokens": 163952960}
|
|
{"current_steps": 52095, "total_steps": 78105, "loss": 0.1586, "lr": 1.5073579196644439e-06, "epoch": 3.334933743038218, "percentage": 66.7, "elapsed_time": "2:17:23", "remaining_time": "1:08:35", "throughput": 19891.37, "total_tokens": 163967680}
|
|
{"current_steps": 52100, "total_steps": 78105, "loss": 0.1476, "lr": 1.5068452166651784e-06, "epoch": 3.3352538249791945, "percentage": 66.71, "elapsed_time": "2:17:23", "remaining_time": "1:08:34", "throughput": 19891.65, "total_tokens": 163983616}
|
|
{"current_steps": 52105, "total_steps": 78105, "loss": 0.1312, "lr": 1.5063325632588905e-06, "epoch": 3.3355739069201715, "percentage": 66.71, "elapsed_time": "2:17:24", "remaining_time": "1:08:33", "throughput": 19891.88, "total_tokens": 163998528}
|
|
{"current_steps": 52110, "total_steps": 78105, "loss": 0.1513, "lr": 1.5058199594711779e-06, "epoch": 3.3358939888611485, "percentage": 66.72, "elapsed_time": "2:17:25", "remaining_time": "1:08:33", "throughput": 19892.13, "total_tokens": 164013952}
|
|
{"current_steps": 52115, "total_steps": 78105, "loss": 0.1451, "lr": 1.5053074053276384e-06, "epoch": 3.3362140708021255, "percentage": 66.72, "elapsed_time": "2:17:25", "remaining_time": "1:08:32", "throughput": 19892.34, "total_tokens": 164028416}
|
|
{"current_steps": 52120, "total_steps": 78105, "loss": 0.1569, "lr": 1.504794900853865e-06, "epoch": 3.336534152743102, "percentage": 66.73, "elapsed_time": "2:17:26", "remaining_time": "1:08:31", "throughput": 19892.57, "total_tokens": 164043456}
|
|
{"current_steps": 52125, "total_steps": 78105, "loss": 0.1808, "lr": 1.5042824460754518e-06, "epoch": 3.336854234684079, "percentage": 66.74, "elapsed_time": "2:17:27", "remaining_time": "1:08:30", "throughput": 19892.83, "total_tokens": 164058816}
|
|
{"current_steps": 52130, "total_steps": 78105, "loss": 0.2024, "lr": 1.5037700410179856e-06, "epoch": 3.337174316625056, "percentage": 66.74, "elapsed_time": "2:17:27", "remaining_time": "1:08:29", "throughput": 19893.11, "total_tokens": 164074624}
|
|
{"current_steps": 52135, "total_steps": 78105, "loss": 0.2193, "lr": 1.5032576857070552e-06, "epoch": 3.337494398566033, "percentage": 66.75, "elapsed_time": "2:17:28", "remaining_time": "1:08:28", "throughput": 19893.36, "total_tokens": 164090112}
|
|
{"current_steps": 52140, "total_steps": 78105, "loss": 0.2415, "lr": 1.5027453801682435e-06, "epoch": 3.33781448050701, "percentage": 66.76, "elapsed_time": "2:17:29", "remaining_time": "1:08:27", "throughput": 19893.63, "total_tokens": 164105664}
|
|
{"current_steps": 52145, "total_steps": 78105, "loss": 0.157, "lr": 1.5022331244271338e-06, "epoch": 3.3381345624479866, "percentage": 66.76, "elapsed_time": "2:17:29", "remaining_time": "1:08:27", "throughput": 19893.87, "total_tokens": 164120512}
|
|
{"current_steps": 52150, "total_steps": 78105, "loss": 0.1663, "lr": 1.5017209185093038e-06, "epoch": 3.3384546443889636, "percentage": 66.77, "elapsed_time": "2:17:30", "remaining_time": "1:08:26", "throughput": 19894.17, "total_tokens": 164136576}
|
|
{"current_steps": 52155, "total_steps": 78105, "loss": 0.2125, "lr": 1.5012087624403313e-06, "epoch": 3.3387747263299405, "percentage": 66.78, "elapsed_time": "2:17:31", "remaining_time": "1:08:25", "throughput": 19894.42, "total_tokens": 164151552}
|
|
{"current_steps": 52160, "total_steps": 78105, "loss": 0.2435, "lr": 1.5006966562457916e-06, "epoch": 3.3390948082709175, "percentage": 66.78, "elapsed_time": "2:17:31", "remaining_time": "1:08:24", "throughput": 19894.79, "total_tokens": 164169088}
|
|
{"current_steps": 52165, "total_steps": 78105, "loss": 0.119, "lr": 1.5001845999512548e-06, "epoch": 3.339414890211894, "percentage": 66.79, "elapsed_time": "2:17:32", "remaining_time": "1:08:23", "throughput": 19895.05, "total_tokens": 164184448}
|
|
{"current_steps": 52170, "total_steps": 78105, "loss": 0.1432, "lr": 1.4996725935822918e-06, "epoch": 3.339734972152871, "percentage": 66.79, "elapsed_time": "2:17:33", "remaining_time": "1:08:22", "throughput": 19895.39, "total_tokens": 164201664}
|
|
{"current_steps": 52175, "total_steps": 78105, "loss": 0.1176, "lr": 1.4991606371644684e-06, "epoch": 3.340055054093848, "percentage": 66.8, "elapsed_time": "2:17:33", "remaining_time": "1:08:22", "throughput": 19895.64, "total_tokens": 164216640}
|
|
{"current_steps": 52180, "total_steps": 78105, "loss": 0.2017, "lr": 1.4986487307233506e-06, "epoch": 3.340375136034825, "percentage": 66.81, "elapsed_time": "2:17:34", "remaining_time": "1:08:21", "throughput": 19895.86, "total_tokens": 164231616}
|
|
{"current_steps": 52185, "total_steps": 78105, "loss": 0.1749, "lr": 1.498136874284498e-06, "epoch": 3.340695217975802, "percentage": 66.81, "elapsed_time": "2:17:35", "remaining_time": "1:08:20", "throughput": 19896.13, "total_tokens": 164247040}
|
|
{"current_steps": 52190, "total_steps": 78105, "loss": 0.1614, "lr": 1.4976250678734727e-06, "epoch": 3.3410152999167786, "percentage": 66.82, "elapsed_time": "2:17:35", "remaining_time": "1:08:19", "throughput": 19896.36, "total_tokens": 164262080}
|
|
{"current_steps": 52195, "total_steps": 78105, "loss": 0.1396, "lr": 1.4971133115158297e-06, "epoch": 3.3413353818577556, "percentage": 66.83, "elapsed_time": "2:17:36", "remaining_time": "1:08:18", "throughput": 19896.61, "total_tokens": 164277312}
|
|
{"current_steps": 52200, "total_steps": 78105, "loss": 0.2273, "lr": 1.4966016052371241e-06, "epoch": 3.3416554637987326, "percentage": 66.83, "elapsed_time": "2:17:37", "remaining_time": "1:08:17", "throughput": 19896.89, "total_tokens": 164293312}
|
|
{"current_steps": 52205, "total_steps": 78105, "loss": 0.2192, "lr": 1.4960899490629073e-06, "epoch": 3.341975545739709, "percentage": 66.84, "elapsed_time": "2:17:37", "remaining_time": "1:08:16", "throughput": 19897.17, "total_tokens": 164309440}
|
|
{"current_steps": 52210, "total_steps": 78105, "loss": 0.1269, "lr": 1.4955783430187305e-06, "epoch": 3.342295627680686, "percentage": 66.85, "elapsed_time": "2:17:38", "remaining_time": "1:08:16", "throughput": 19897.4, "total_tokens": 164323968}
|
|
{"current_steps": 52215, "total_steps": 78105, "loss": 0.2158, "lr": 1.4950667871301384e-06, "epoch": 3.342615709621663, "percentage": 66.85, "elapsed_time": "2:17:39", "remaining_time": "1:08:15", "throughput": 19897.63, "total_tokens": 164338752}
|
|
{"current_steps": 52220, "total_steps": 78105, "loss": 0.1074, "lr": 1.494555281422676e-06, "epoch": 3.34293579156264, "percentage": 66.86, "elapsed_time": "2:17:39", "remaining_time": "1:08:14", "throughput": 19897.9, "total_tokens": 164354560}
|
|
{"current_steps": 52225, "total_steps": 78105, "loss": 0.2454, "lr": 1.4940438259218868e-06, "epoch": 3.343255873503617, "percentage": 66.87, "elapsed_time": "2:17:40", "remaining_time": "1:08:13", "throughput": 19898.15, "total_tokens": 164369920}
|
|
{"current_steps": 52230, "total_steps": 78105, "loss": 0.1124, "lr": 1.493532420653308e-06, "epoch": 3.3435759554445936, "percentage": 66.87, "elapsed_time": "2:17:41", "remaining_time": "1:08:12", "throughput": 19898.37, "total_tokens": 164384640}
|
|
{"current_steps": 52235, "total_steps": 78105, "loss": 0.2178, "lr": 1.4930210656424794e-06, "epoch": 3.3438960373855706, "percentage": 66.88, "elapsed_time": "2:17:41", "remaining_time": "1:08:11", "throughput": 19898.64, "total_tokens": 164400448}
|
|
{"current_steps": 52240, "total_steps": 78105, "loss": 0.1355, "lr": 1.4925097609149319e-06, "epoch": 3.3442161193265476, "percentage": 66.88, "elapsed_time": "2:17:42", "remaining_time": "1:08:10", "throughput": 19898.9, "total_tokens": 164416064}
|
|
{"current_steps": 52245, "total_steps": 78105, "loss": 0.1537, "lr": 1.4919985064962003e-06, "epoch": 3.3445362012675246, "percentage": 66.89, "elapsed_time": "2:17:43", "remaining_time": "1:08:10", "throughput": 19899.15, "total_tokens": 164431680}
|
|
{"current_steps": 52250, "total_steps": 78105, "loss": 0.2147, "lr": 1.491487302411812e-06, "epoch": 3.344856283208501, "percentage": 66.9, "elapsed_time": "2:17:43", "remaining_time": "1:08:09", "throughput": 19899.37, "total_tokens": 164446464}
|
|
{"current_steps": 52255, "total_steps": 78105, "loss": 0.1536, "lr": 1.4909761486872947e-06, "epoch": 3.345176365149478, "percentage": 66.9, "elapsed_time": "2:17:44", "remaining_time": "1:08:08", "throughput": 19899.68, "total_tokens": 164462656}
|
|
{"current_steps": 52260, "total_steps": 78105, "loss": 0.1829, "lr": 1.4904650453481728e-06, "epoch": 3.345496447090455, "percentage": 66.91, "elapsed_time": "2:17:45", "remaining_time": "1:08:07", "throughput": 19899.89, "total_tokens": 164477248}
|
|
{"current_steps": 52265, "total_steps": 78105, "loss": 0.119, "lr": 1.4899539924199688e-06, "epoch": 3.345816529031432, "percentage": 66.92, "elapsed_time": "2:17:45", "remaining_time": "1:08:06", "throughput": 19900.23, "total_tokens": 164494272}
|
|
{"current_steps": 52270, "total_steps": 78105, "loss": 0.1539, "lr": 1.4894429899282004e-06, "epoch": 3.346136610972409, "percentage": 66.92, "elapsed_time": "2:17:46", "remaining_time": "1:08:05", "throughput": 19900.47, "total_tokens": 164509312}
|
|
{"current_steps": 52275, "total_steps": 78105, "loss": 0.0911, "lr": 1.488932037898385e-06, "epoch": 3.3464566929133857, "percentage": 66.93, "elapsed_time": "2:17:47", "remaining_time": "1:08:05", "throughput": 19900.75, "total_tokens": 164525120}
|
|
{"current_steps": 52280, "total_steps": 78105, "loss": 0.2595, "lr": 1.488421136356038e-06, "epoch": 3.3467767748543626, "percentage": 66.94, "elapsed_time": "2:17:47", "remaining_time": "1:08:04", "throughput": 19901.08, "total_tokens": 164542016}
|
|
{"current_steps": 52285, "total_steps": 78105, "loss": 0.1644, "lr": 1.4879102853266696e-06, "epoch": 3.3470968567953396, "percentage": 66.94, "elapsed_time": "2:17:48", "remaining_time": "1:08:03", "throughput": 19901.32, "total_tokens": 164557056}
|
|
{"current_steps": 52290, "total_steps": 78105, "loss": 0.1882, "lr": 1.4873994848357909e-06, "epoch": 3.3474169387363166, "percentage": 66.95, "elapsed_time": "2:17:49", "remaining_time": "1:08:02", "throughput": 19901.6, "total_tokens": 164572608}
|
|
{"current_steps": 52295, "total_steps": 78105, "loss": 0.1258, "lr": 1.4868887349089065e-06, "epoch": 3.347737020677293, "percentage": 66.95, "elapsed_time": "2:17:50", "remaining_time": "1:08:01", "throughput": 19901.93, "total_tokens": 164589184}
|
|
{"current_steps": 52300, "total_steps": 78105, "loss": 0.1126, "lr": 1.4863780355715225e-06, "epoch": 3.34805710261827, "percentage": 66.96, "elapsed_time": "2:17:50", "remaining_time": "1:08:00", "throughput": 19902.16, "total_tokens": 164604160}
|
|
{"current_steps": 52305, "total_steps": 78105, "loss": 0.2105, "lr": 1.4858673868491386e-06, "epoch": 3.348377184559247, "percentage": 66.97, "elapsed_time": "2:17:51", "remaining_time": "1:07:59", "throughput": 19902.42, "total_tokens": 164619712}
|
|
{"current_steps": 52310, "total_steps": 78105, "loss": 0.2556, "lr": 1.4853567887672555e-06, "epoch": 3.348697266500224, "percentage": 66.97, "elapsed_time": "2:17:52", "remaining_time": "1:07:59", "throughput": 19902.7, "total_tokens": 164635904}
|
|
{"current_steps": 52315, "total_steps": 78105, "loss": 0.1156, "lr": 1.4848462413513686e-06, "epoch": 3.349017348441201, "percentage": 66.98, "elapsed_time": "2:17:52", "remaining_time": "1:07:58", "throughput": 19903.02, "total_tokens": 164652288}
|
|
{"current_steps": 52320, "total_steps": 78105, "loss": 0.1689, "lr": 1.484335744626974e-06, "epoch": 3.3493374303821777, "percentage": 66.99, "elapsed_time": "2:17:53", "remaining_time": "1:07:57", "throughput": 19903.26, "total_tokens": 164667776}
|
|
{"current_steps": 52325, "total_steps": 78105, "loss": 0.1575, "lr": 1.4838252986195605e-06, "epoch": 3.3496575123231547, "percentage": 66.99, "elapsed_time": "2:17:54", "remaining_time": "1:07:56", "throughput": 19903.5, "total_tokens": 164682752}
|
|
{"current_steps": 52330, "total_steps": 78105, "loss": 0.1663, "lr": 1.483314903354619e-06, "epoch": 3.3499775942641317, "percentage": 67.0, "elapsed_time": "2:17:54", "remaining_time": "1:07:55", "throughput": 19903.79, "total_tokens": 164698688}
|
|
{"current_steps": 52335, "total_steps": 78105, "loss": 0.1392, "lr": 1.4828045588576361e-06, "epoch": 3.3502976762051087, "percentage": 67.01, "elapsed_time": "2:17:55", "remaining_time": "1:07:54", "throughput": 19904.05, "total_tokens": 164714560}
|
|
{"current_steps": 52340, "total_steps": 78105, "loss": 0.1694, "lr": 1.4822942651540944e-06, "epoch": 3.350617758146085, "percentage": 67.01, "elapsed_time": "2:17:56", "remaining_time": "1:07:54", "throughput": 19904.29, "total_tokens": 164729920}
|
|
{"current_steps": 52345, "total_steps": 78105, "loss": 0.1632, "lr": 1.4817840222694768e-06, "epoch": 3.350937840087062, "percentage": 67.02, "elapsed_time": "2:17:56", "remaining_time": "1:07:53", "throughput": 19904.59, "total_tokens": 164745920}
|
|
{"current_steps": 52350, "total_steps": 78105, "loss": 0.1718, "lr": 1.4812738302292605e-06, "epoch": 3.351257922028039, "percentage": 67.03, "elapsed_time": "2:17:57", "remaining_time": "1:07:52", "throughput": 19904.84, "total_tokens": 164761344}
|
|
{"current_steps": 52355, "total_steps": 78105, "loss": 0.186, "lr": 1.4807636890589238e-06, "epoch": 3.351578003969016, "percentage": 67.03, "elapsed_time": "2:17:58", "remaining_time": "1:07:51", "throughput": 19905.13, "total_tokens": 164777728}
|
|
{"current_steps": 52360, "total_steps": 78105, "loss": 0.1383, "lr": 1.480253598783939e-06, "epoch": 3.351898085909993, "percentage": 67.04, "elapsed_time": "2:17:58", "remaining_time": "1:07:50", "throughput": 19905.39, "total_tokens": 164792896}
|
|
{"current_steps": 52365, "total_steps": 78105, "loss": 0.1454, "lr": 1.4797435594297777e-06, "epoch": 3.3522181678509697, "percentage": 67.04, "elapsed_time": "2:17:59", "remaining_time": "1:07:49", "throughput": 19905.69, "total_tokens": 164809216}
|
|
{"current_steps": 52370, "total_steps": 78105, "loss": 0.1029, "lr": 1.4792335710219086e-06, "epoch": 3.3525382497919467, "percentage": 67.05, "elapsed_time": "2:18:00", "remaining_time": "1:07:48", "throughput": 19905.9, "total_tokens": 164824000}
|
|
{"current_steps": 52375, "total_steps": 78105, "loss": 0.1583, "lr": 1.4787236335857984e-06, "epoch": 3.3528583317329237, "percentage": 67.06, "elapsed_time": "2:18:00", "remaining_time": "1:07:48", "throughput": 19906.12, "total_tokens": 164838784}
|
|
{"current_steps": 52380, "total_steps": 78105, "loss": 0.1767, "lr": 1.47821374714691e-06, "epoch": 3.3531784136739007, "percentage": 67.06, "elapsed_time": "2:18:01", "remaining_time": "1:07:47", "throughput": 19906.34, "total_tokens": 164853376}
|
|
{"current_steps": 52385, "total_steps": 78105, "loss": 0.155, "lr": 1.4777039117307054e-06, "epoch": 3.3534984956148772, "percentage": 67.07, "elapsed_time": "2:18:02", "remaining_time": "1:07:46", "throughput": 19906.56, "total_tokens": 164868416}
|
|
{"current_steps": 52390, "total_steps": 78105, "loss": 0.1631, "lr": 1.4771941273626417e-06, "epoch": 3.353818577555854, "percentage": 67.08, "elapsed_time": "2:18:02", "remaining_time": "1:07:45", "throughput": 19906.8, "total_tokens": 164883392}
|
|
{"current_steps": 52395, "total_steps": 78105, "loss": 0.1535, "lr": 1.4766843940681755e-06, "epoch": 3.354138659496831, "percentage": 67.08, "elapsed_time": "2:18:03", "remaining_time": "1:07:44", "throughput": 19907.13, "total_tokens": 164900224}
|
|
{"current_steps": 52400, "total_steps": 78105, "loss": 0.1386, "lr": 1.476174711872761e-06, "epoch": 3.354458741437808, "percentage": 67.09, "elapsed_time": "2:18:04", "remaining_time": "1:07:43", "throughput": 19907.37, "total_tokens": 164915456}
|
|
{"current_steps": 52405, "total_steps": 78105, "loss": 0.158, "lr": 1.475665080801848e-06, "epoch": 3.354778823378785, "percentage": 67.1, "elapsed_time": "2:18:04", "remaining_time": "1:07:42", "throughput": 19907.69, "total_tokens": 164932096}
|
|
{"current_steps": 52410, "total_steps": 78105, "loss": 0.1584, "lr": 1.475155500880886e-06, "epoch": 3.3550989053197617, "percentage": 67.1, "elapsed_time": "2:18:05", "remaining_time": "1:07:42", "throughput": 19908.12, "total_tokens": 164951168}
|
|
{"current_steps": 52415, "total_steps": 78105, "loss": 0.1488, "lr": 1.4746459721353196e-06, "epoch": 3.3554189872607387, "percentage": 67.11, "elapsed_time": "2:18:06", "remaining_time": "1:07:41", "throughput": 19908.33, "total_tokens": 164965568}
|
|
{"current_steps": 52420, "total_steps": 78105, "loss": 0.1788, "lr": 1.4741364945905925e-06, "epoch": 3.3557390692017157, "percentage": 67.11, "elapsed_time": "2:18:06", "remaining_time": "1:07:40", "throughput": 19908.61, "total_tokens": 164981504}
|
|
{"current_steps": 52425, "total_steps": 78105, "loss": 0.2274, "lr": 1.4736270682721454e-06, "epoch": 3.3560591511426927, "percentage": 67.12, "elapsed_time": "2:18:07", "remaining_time": "1:07:39", "throughput": 19908.95, "total_tokens": 164998400}
|
|
{"current_steps": 52430, "total_steps": 78105, "loss": 0.1673, "lr": 1.4731176932054163e-06, "epoch": 3.3563792330836693, "percentage": 67.13, "elapsed_time": "2:18:08", "remaining_time": "1:07:38", "throughput": 19909.15, "total_tokens": 165012736}
|
|
{"current_steps": 52435, "total_steps": 78105, "loss": 0.1393, "lr": 1.4726083694158407e-06, "epoch": 3.3566993150246462, "percentage": 67.13, "elapsed_time": "2:18:08", "remaining_time": "1:07:37", "throughput": 19909.39, "total_tokens": 165027968}
|
|
{"current_steps": 52440, "total_steps": 78105, "loss": 0.167, "lr": 1.4720990969288523e-06, "epoch": 3.3570193969656232, "percentage": 67.14, "elapsed_time": "2:18:09", "remaining_time": "1:07:37", "throughput": 19909.7, "total_tokens": 165044096}
|
|
{"current_steps": 52445, "total_steps": 78105, "loss": 0.1592, "lr": 1.47158987576988e-06, "epoch": 3.3573394789066002, "percentage": 67.15, "elapsed_time": "2:18:10", "remaining_time": "1:07:36", "throughput": 19909.99, "total_tokens": 165060096}
|
|
{"current_steps": 52450, "total_steps": 78105, "loss": 0.1531, "lr": 1.4710807059643523e-06, "epoch": 3.357659560847577, "percentage": 67.15, "elapsed_time": "2:18:11", "remaining_time": "1:07:35", "throughput": 19910.25, "total_tokens": 165075904}
|
|
{"current_steps": 52455, "total_steps": 78105, "loss": 0.1206, "lr": 1.4705715875376956e-06, "epoch": 3.3579796427885538, "percentage": 67.16, "elapsed_time": "2:18:11", "remaining_time": "1:07:34", "throughput": 19910.47, "total_tokens": 165090432}
|
|
{"current_steps": 52460, "total_steps": 78105, "loss": 0.1642, "lr": 1.4700625205153307e-06, "epoch": 3.3582997247295308, "percentage": 67.17, "elapsed_time": "2:18:12", "remaining_time": "1:07:33", "throughput": 19910.7, "total_tokens": 165105344}
|
|
{"current_steps": 52465, "total_steps": 78105, "loss": 0.1409, "lr": 1.46955350492268e-06, "epoch": 3.3586198066705077, "percentage": 67.17, "elapsed_time": "2:18:12", "remaining_time": "1:07:32", "throughput": 19910.97, "total_tokens": 165120896}
|
|
{"current_steps": 52470, "total_steps": 78105, "loss": 0.1729, "lr": 1.469044540785159e-06, "epoch": 3.3589398886114843, "percentage": 67.18, "elapsed_time": "2:18:13", "remaining_time": "1:07:31", "throughput": 19911.28, "total_tokens": 165137408}
|
|
{"current_steps": 52475, "total_steps": 78105, "loss": 0.1621, "lr": 1.4685356281281845e-06, "epoch": 3.3592599705524613, "percentage": 67.19, "elapsed_time": "2:18:14", "remaining_time": "1:07:31", "throughput": 19911.53, "total_tokens": 165152896}
|
|
{"current_steps": 52480, "total_steps": 78105, "loss": 0.2257, "lr": 1.468026766977167e-06, "epoch": 3.3595800524934383, "percentage": 67.19, "elapsed_time": "2:18:15", "remaining_time": "1:07:30", "throughput": 19911.84, "total_tokens": 165169600}
|
|
{"current_steps": 52485, "total_steps": 78105, "loss": 0.1312, "lr": 1.467517957357518e-06, "epoch": 3.3599001344344153, "percentage": 67.2, "elapsed_time": "2:18:15", "remaining_time": "1:07:29", "throughput": 19912.15, "total_tokens": 165185920}
|
|
{"current_steps": 52490, "total_steps": 78105, "loss": 0.1837, "lr": 1.4670091992946433e-06, "epoch": 3.3602202163753923, "percentage": 67.2, "elapsed_time": "2:18:16", "remaining_time": "1:07:28", "throughput": 19912.41, "total_tokens": 165201728}
|
|
{"current_steps": 52495, "total_steps": 78105, "loss": 0.1726, "lr": 1.4665004928139501e-06, "epoch": 3.360540298316369, "percentage": 67.21, "elapsed_time": "2:18:17", "remaining_time": "1:07:27", "throughput": 19912.68, "total_tokens": 165217216}
|
|
{"current_steps": 52500, "total_steps": 78105, "loss": 0.1053, "lr": 1.4659918379408378e-06, "epoch": 3.360860380257346, "percentage": 67.22, "elapsed_time": "2:18:17", "remaining_time": "1:07:26", "throughput": 19912.97, "total_tokens": 165233088}
|
|
{"current_steps": 52505, "total_steps": 78105, "loss": 0.1588, "lr": 1.4654832347007069e-06, "epoch": 3.361180462198323, "percentage": 67.22, "elapsed_time": "2:18:18", "remaining_time": "1:07:26", "throughput": 19913.27, "total_tokens": 165249472}
|
|
{"current_steps": 52510, "total_steps": 78105, "loss": 0.1919, "lr": 1.4649746831189553e-06, "epoch": 3.3615005441392998, "percentage": 67.23, "elapsed_time": "2:18:19", "remaining_time": "1:07:25", "throughput": 19913.55, "total_tokens": 165265664}
|
|
{"current_steps": 52515, "total_steps": 78105, "loss": 0.2331, "lr": 1.4644661832209767e-06, "epoch": 3.3618206260802763, "percentage": 67.24, "elapsed_time": "2:18:19", "remaining_time": "1:07:24", "throughput": 19913.83, "total_tokens": 165281856}
|
|
{"current_steps": 52520, "total_steps": 78105, "loss": 0.1156, "lr": 1.4639577350321635e-06, "epoch": 3.3621407080212533, "percentage": 67.24, "elapsed_time": "2:18:20", "remaining_time": "1:07:23", "throughput": 19914.09, "total_tokens": 165297280}
|
|
{"current_steps": 52525, "total_steps": 78105, "loss": 0.1516, "lr": 1.4634493385779036e-06, "epoch": 3.3624607899622303, "percentage": 67.25, "elapsed_time": "2:18:21", "remaining_time": "1:07:22", "throughput": 19914.4, "total_tokens": 165313664}
|
|
{"current_steps": 52530, "total_steps": 78105, "loss": 0.1599, "lr": 1.4629409938835853e-06, "epoch": 3.3627808719032073, "percentage": 67.26, "elapsed_time": "2:18:21", "remaining_time": "1:07:21", "throughput": 19914.65, "total_tokens": 165328960}
|
|
{"current_steps": 52535, "total_steps": 78105, "loss": 0.139, "lr": 1.4624327009745915e-06, "epoch": 3.3631009538441843, "percentage": 67.26, "elapsed_time": "2:18:22", "remaining_time": "1:07:21", "throughput": 19914.95, "total_tokens": 165344960}
|
|
{"current_steps": 52540, "total_steps": 78105, "loss": 0.1452, "lr": 1.4619244598763041e-06, "epoch": 3.363421035785161, "percentage": 67.27, "elapsed_time": "2:18:23", "remaining_time": "1:07:20", "throughput": 19915.27, "total_tokens": 165361536}
|
|
{"current_steps": 52545, "total_steps": 78105, "loss": 0.15, "lr": 1.4614162706141017e-06, "epoch": 3.363741117726138, "percentage": 67.27, "elapsed_time": "2:18:23", "remaining_time": "1:07:19", "throughput": 19915.49, "total_tokens": 165376320}
|
|
{"current_steps": 52550, "total_steps": 78105, "loss": 0.1734, "lr": 1.4609081332133621e-06, "epoch": 3.364061199667115, "percentage": 67.28, "elapsed_time": "2:18:24", "remaining_time": "1:07:18", "throughput": 19915.76, "total_tokens": 165392128}
|
|
{"current_steps": 52555, "total_steps": 78105, "loss": 0.1577, "lr": 1.4604000476994568e-06, "epoch": 3.364381281608092, "percentage": 67.29, "elapsed_time": "2:18:25", "remaining_time": "1:07:17", "throughput": 19916.07, "total_tokens": 165408704}
|
|
{"current_steps": 52560, "total_steps": 78105, "loss": 0.1585, "lr": 1.459892014097759e-06, "epoch": 3.3647013635490683, "percentage": 67.29, "elapsed_time": "2:18:25", "remaining_time": "1:07:16", "throughput": 19916.31, "total_tokens": 165423872}
|
|
{"current_steps": 52565, "total_steps": 78105, "loss": 0.1832, "lr": 1.459384032433635e-06, "epoch": 3.3650214454900453, "percentage": 67.3, "elapsed_time": "2:18:26", "remaining_time": "1:07:15", "throughput": 19916.57, "total_tokens": 165439168}
|
|
{"current_steps": 52570, "total_steps": 78105, "loss": 0.2009, "lr": 1.458876102732452e-06, "epoch": 3.3653415274310223, "percentage": 67.31, "elapsed_time": "2:18:27", "remaining_time": "1:07:15", "throughput": 19916.84, "total_tokens": 165455040}
|
|
{"current_steps": 52575, "total_steps": 78105, "loss": 0.1912, "lr": 1.4583682250195753e-06, "epoch": 3.3656616093719993, "percentage": 67.31, "elapsed_time": "2:18:27", "remaining_time": "1:07:14", "throughput": 19917.11, "total_tokens": 165470656}
|
|
{"current_steps": 52580, "total_steps": 78105, "loss": 0.1539, "lr": 1.4578603993203617e-06, "epoch": 3.3659816913129763, "percentage": 67.32, "elapsed_time": "2:18:28", "remaining_time": "1:07:13", "throughput": 19917.08, "total_tokens": 165486080}
|
|
{"current_steps": 52585, "total_steps": 78105, "loss": 0.167, "lr": 1.4573526256601728e-06, "epoch": 3.366301773253953, "percentage": 67.33, "elapsed_time": "2:18:29", "remaining_time": "1:07:12", "throughput": 19917.36, "total_tokens": 165501568}
|
|
{"current_steps": 52590, "total_steps": 78105, "loss": 0.1771, "lr": 1.4568449040643622e-06, "epoch": 3.36662185519493, "percentage": 67.33, "elapsed_time": "2:18:30", "remaining_time": "1:07:11", "throughput": 19917.6, "total_tokens": 165516800}
|
|
{"current_steps": 52595, "total_steps": 78105, "loss": 0.1523, "lr": 1.4563372345582838e-06, "epoch": 3.366941937135907, "percentage": 67.34, "elapsed_time": "2:18:30", "remaining_time": "1:07:10", "throughput": 19917.91, "total_tokens": 165533248}
|
|
{"current_steps": 52600, "total_steps": 78105, "loss": 0.2097, "lr": 1.4558296171672878e-06, "epoch": 3.367262019076884, "percentage": 67.35, "elapsed_time": "2:18:31", "remaining_time": "1:07:10", "throughput": 19918.14, "total_tokens": 165548288}
|
|
{"current_steps": 52605, "total_steps": 78105, "loss": 0.2918, "lr": 1.4553220519167216e-06, "epoch": 3.3675821010178604, "percentage": 67.35, "elapsed_time": "2:18:32", "remaining_time": "1:07:09", "throughput": 19918.39, "total_tokens": 165563456}
|
|
{"current_steps": 52610, "total_steps": 78105, "loss": 0.1531, "lr": 1.4548145388319306e-06, "epoch": 3.3679021829588374, "percentage": 67.36, "elapsed_time": "2:18:32", "remaining_time": "1:07:08", "throughput": 19918.76, "total_tokens": 165581248}
|
|
{"current_steps": 52615, "total_steps": 78105, "loss": 0.2317, "lr": 1.454307077938258e-06, "epoch": 3.3682222648998144, "percentage": 67.36, "elapsed_time": "2:18:33", "remaining_time": "1:07:07", "throughput": 19919.04, "total_tokens": 165597824}
|
|
{"current_steps": 52620, "total_steps": 78105, "loss": 0.1681, "lr": 1.4537996692610413e-06, "epoch": 3.3685423468407913, "percentage": 67.37, "elapsed_time": "2:18:34", "remaining_time": "1:07:06", "throughput": 19919.35, "total_tokens": 165614016}
|
|
{"current_steps": 52625, "total_steps": 78105, "loss": 0.1565, "lr": 1.453292312825621e-06, "epoch": 3.3688624287817683, "percentage": 67.38, "elapsed_time": "2:18:34", "remaining_time": "1:07:05", "throughput": 19919.61, "total_tokens": 165629568}
|
|
{"current_steps": 52630, "total_steps": 78105, "loss": 0.1623, "lr": 1.4527850086573303e-06, "epoch": 3.369182510722745, "percentage": 67.38, "elapsed_time": "2:18:35", "remaining_time": "1:07:05", "throughput": 19919.87, "total_tokens": 165645120}
|
|
{"current_steps": 52635, "total_steps": 78105, "loss": 0.1302, "lr": 1.4522777567815017e-06, "epoch": 3.369502592663722, "percentage": 67.39, "elapsed_time": "2:18:36", "remaining_time": "1:07:04", "throughput": 19920.13, "total_tokens": 165660352}
|
|
{"current_steps": 52640, "total_steps": 78105, "loss": 0.1674, "lr": 1.4517705572234647e-06, "epoch": 3.369822674604699, "percentage": 67.4, "elapsed_time": "2:18:36", "remaining_time": "1:07:03", "throughput": 19920.41, "total_tokens": 165676160}
|
|
{"current_steps": 52645, "total_steps": 78105, "loss": 0.1841, "lr": 1.4512634100085449e-06, "epoch": 3.370142756545676, "percentage": 67.4, "elapsed_time": "2:18:37", "remaining_time": "1:07:02", "throughput": 19920.69, "total_tokens": 165691968}
|
|
{"current_steps": 52650, "total_steps": 78105, "loss": 0.1902, "lr": 1.4507563151620696e-06, "epoch": 3.3704628384866524, "percentage": 67.41, "elapsed_time": "2:18:38", "remaining_time": "1:07:01", "throughput": 19920.98, "total_tokens": 165707968}
|
|
{"current_steps": 52655, "total_steps": 78105, "loss": 0.1807, "lr": 1.4502492727093565e-06, "epoch": 3.3707829204276294, "percentage": 67.42, "elapsed_time": "2:18:38", "remaining_time": "1:07:00", "throughput": 19921.23, "total_tokens": 165723456}
|
|
{"current_steps": 52660, "total_steps": 78105, "loss": 0.1911, "lr": 1.449742282675729e-06, "epoch": 3.3711030023686064, "percentage": 67.42, "elapsed_time": "2:18:39", "remaining_time": "1:06:59", "throughput": 19921.46, "total_tokens": 165738432}
|
|
{"current_steps": 52665, "total_steps": 78105, "loss": 0.1701, "lr": 1.4492353450864992e-06, "epoch": 3.3714230843095834, "percentage": 67.43, "elapsed_time": "2:18:40", "remaining_time": "1:06:59", "throughput": 19921.75, "total_tokens": 165754624}
|
|
{"current_steps": 52670, "total_steps": 78105, "loss": 0.1334, "lr": 1.4487284599669842e-06, "epoch": 3.3717431662505604, "percentage": 67.43, "elapsed_time": "2:18:40", "remaining_time": "1:06:58", "throughput": 19922.02, "total_tokens": 165770432}
|
|
{"current_steps": 52675, "total_steps": 78105, "loss": 0.1972, "lr": 1.448221627342494e-06, "epoch": 3.372063248191537, "percentage": 67.44, "elapsed_time": "2:18:41", "remaining_time": "1:06:57", "throughput": 19922.31, "total_tokens": 165786368}
|
|
{"current_steps": 52680, "total_steps": 78105, "loss": 0.162, "lr": 1.4477148472383363e-06, "epoch": 3.372383330132514, "percentage": 67.45, "elapsed_time": "2:18:42", "remaining_time": "1:06:56", "throughput": 19922.54, "total_tokens": 165801088}
|
|
{"current_steps": 52685, "total_steps": 78105, "loss": 0.249, "lr": 1.44720811967982e-06, "epoch": 3.372703412073491, "percentage": 67.45, "elapsed_time": "2:18:42", "remaining_time": "1:06:55", "throughput": 19922.75, "total_tokens": 165815744}
|
|
{"current_steps": 52690, "total_steps": 78105, "loss": 0.1416, "lr": 1.446701444692244e-06, "epoch": 3.373023494014468, "percentage": 67.46, "elapsed_time": "2:18:43", "remaining_time": "1:06:54", "throughput": 19923.02, "total_tokens": 165831104}
|
|
{"current_steps": 52695, "total_steps": 78105, "loss": 0.1625, "lr": 1.4461948223009126e-06, "epoch": 3.3733435759554444, "percentage": 67.47, "elapsed_time": "2:18:44", "remaining_time": "1:06:54", "throughput": 19923.23, "total_tokens": 165845760}
|
|
{"current_steps": 52700, "total_steps": 78105, "loss": 0.1882, "lr": 1.4456882525311228e-06, "epoch": 3.3736636578964214, "percentage": 67.47, "elapsed_time": "2:18:44", "remaining_time": "1:06:53", "throughput": 19923.52, "total_tokens": 165861952}
|
|
{"current_steps": 52705, "total_steps": 78105, "loss": 0.2035, "lr": 1.4451817354081698e-06, "epoch": 3.3739837398373984, "percentage": 67.48, "elapsed_time": "2:18:45", "remaining_time": "1:06:52", "throughput": 19923.8, "total_tokens": 165877760}
|
|
{"current_steps": 52710, "total_steps": 78105, "loss": 0.1695, "lr": 1.444675270957347e-06, "epoch": 3.3743038217783754, "percentage": 67.49, "elapsed_time": "2:18:46", "remaining_time": "1:06:51", "throughput": 19924.02, "total_tokens": 165892288}
|
|
{"current_steps": 52715, "total_steps": 78105, "loss": 0.1716, "lr": 1.4441688592039438e-06, "epoch": 3.3746239037193524, "percentage": 67.49, "elapsed_time": "2:18:46", "remaining_time": "1:06:50", "throughput": 19924.25, "total_tokens": 165906880}
|
|
{"current_steps": 52720, "total_steps": 78105, "loss": 0.2366, "lr": 1.4436625001732483e-06, "epoch": 3.374943985660329, "percentage": 67.5, "elapsed_time": "2:18:47", "remaining_time": "1:06:49", "throughput": 19924.55, "total_tokens": 165923456}
|
|
{"current_steps": 52725, "total_steps": 78105, "loss": 0.1386, "lr": 1.4431561938905453e-06, "epoch": 3.375264067601306, "percentage": 67.51, "elapsed_time": "2:18:48", "remaining_time": "1:06:48", "throughput": 19924.84, "total_tokens": 165939584}
|
|
{"current_steps": 52730, "total_steps": 78105, "loss": 0.1757, "lr": 1.4426499403811156e-06, "epoch": 3.375584149542283, "percentage": 67.51, "elapsed_time": "2:18:48", "remaining_time": "1:06:48", "throughput": 19925.12, "total_tokens": 165955392}
|
|
{"current_steps": 52735, "total_steps": 78105, "loss": 0.1456, "lr": 1.442143739670243e-06, "epoch": 3.3759042314832595, "percentage": 67.52, "elapsed_time": "2:18:49", "remaining_time": "1:06:47", "throughput": 19925.43, "total_tokens": 165971840}
|
|
{"current_steps": 52740, "total_steps": 78105, "loss": 0.1885, "lr": 1.4416375917831995e-06, "epoch": 3.3762243134242365, "percentage": 67.52, "elapsed_time": "2:18:50", "remaining_time": "1:06:46", "throughput": 19925.69, "total_tokens": 165987392}
|
|
{"current_steps": 52745, "total_steps": 78105, "loss": 0.1549, "lr": 1.441131496745263e-06, "epoch": 3.3765443953652134, "percentage": 67.53, "elapsed_time": "2:18:50", "remaining_time": "1:06:45", "throughput": 19925.91, "total_tokens": 166002432}
|
|
{"current_steps": 52750, "total_steps": 78105, "loss": 0.1498, "lr": 1.4406254545817041e-06, "epoch": 3.3768644773061904, "percentage": 67.54, "elapsed_time": "2:18:51", "remaining_time": "1:06:44", "throughput": 19926.15, "total_tokens": 166017856}
|
|
{"current_steps": 52755, "total_steps": 78105, "loss": 0.1363, "lr": 1.440119465317792e-06, "epoch": 3.3771845592471674, "percentage": 67.54, "elapsed_time": "2:18:52", "remaining_time": "1:06:43", "throughput": 19926.34, "total_tokens": 166032192}
|
|
{"current_steps": 52760, "total_steps": 78105, "loss": 0.1525, "lr": 1.4396135289787926e-06, "epoch": 3.377504641188144, "percentage": 67.55, "elapsed_time": "2:18:52", "remaining_time": "1:06:43", "throughput": 19926.57, "total_tokens": 166047424}
|
|
{"current_steps": 52765, "total_steps": 78105, "loss": 0.1357, "lr": 1.4391076455899699e-06, "epoch": 3.377824723129121, "percentage": 67.56, "elapsed_time": "2:18:53", "remaining_time": "1:06:42", "throughput": 19926.83, "total_tokens": 166062976}
|
|
{"current_steps": 52770, "total_steps": 78105, "loss": 0.2519, "lr": 1.438601815176587e-06, "epoch": 3.378144805070098, "percentage": 67.56, "elapsed_time": "2:18:54", "remaining_time": "1:06:41", "throughput": 19927.1, "total_tokens": 166078720}
|
|
{"current_steps": 52775, "total_steps": 78105, "loss": 0.1971, "lr": 1.4380960377638992e-06, "epoch": 3.378464887011075, "percentage": 67.57, "elapsed_time": "2:18:55", "remaining_time": "1:06:40", "throughput": 19927.38, "total_tokens": 166094720}
|
|
{"current_steps": 52780, "total_steps": 78105, "loss": 0.166, "lr": 1.4375903133771646e-06, "epoch": 3.3787849689520515, "percentage": 67.58, "elapsed_time": "2:18:55", "remaining_time": "1:06:39", "throughput": 19927.64, "total_tokens": 166110208}
|
|
{"current_steps": 52785, "total_steps": 78105, "loss": 0.223, "lr": 1.4370846420416359e-06, "epoch": 3.3791050508930285, "percentage": 67.58, "elapsed_time": "2:18:56", "remaining_time": "1:06:38", "throughput": 19927.87, "total_tokens": 166125184}
|
|
{"current_steps": 52790, "total_steps": 78105, "loss": 0.1478, "lr": 1.436579023782564e-06, "epoch": 3.3794251328340055, "percentage": 67.59, "elapsed_time": "2:18:57", "remaining_time": "1:06:37", "throughput": 19928.14, "total_tokens": 166140992}
|
|
{"current_steps": 52795, "total_steps": 78105, "loss": 0.1542, "lr": 1.4360734586251958e-06, "epoch": 3.3797452147749825, "percentage": 67.59, "elapsed_time": "2:18:57", "remaining_time": "1:06:37", "throughput": 19928.4, "total_tokens": 166156672}
|
|
{"current_steps": 52800, "total_steps": 78105, "loss": 0.1927, "lr": 1.4355679465947768e-06, "epoch": 3.3800652967159595, "percentage": 67.6, "elapsed_time": "2:18:58", "remaining_time": "1:06:36", "throughput": 19928.66, "total_tokens": 166172224}
|
|
{"current_steps": 52805, "total_steps": 78105, "loss": 0.1953, "lr": 1.435062487716552e-06, "epoch": 3.380385378656936, "percentage": 67.61, "elapsed_time": "2:18:59", "remaining_time": "1:06:35", "throughput": 19928.93, "total_tokens": 166187904}
|
|
{"current_steps": 52810, "total_steps": 78105, "loss": 0.1237, "lr": 1.4345570820157577e-06, "epoch": 3.380705460597913, "percentage": 67.61, "elapsed_time": "2:18:59", "remaining_time": "1:06:34", "throughput": 19929.21, "total_tokens": 166203840}
|
|
{"current_steps": 52815, "total_steps": 78105, "loss": 0.1942, "lr": 1.4340517295176338e-06, "epoch": 3.38102554253889, "percentage": 67.62, "elapsed_time": "2:19:00", "remaining_time": "1:06:33", "throughput": 19929.47, "total_tokens": 166219712}
|
|
{"current_steps": 52820, "total_steps": 78105, "loss": 0.1575, "lr": 1.4335464302474145e-06, "epoch": 3.381345624479867, "percentage": 67.63, "elapsed_time": "2:19:01", "remaining_time": "1:06:32", "throughput": 19929.72, "total_tokens": 166234880}
|
|
{"current_steps": 52825, "total_steps": 78105, "loss": 0.1432, "lr": 1.4330411842303311e-06, "epoch": 3.3816657064208435, "percentage": 67.63, "elapsed_time": "2:19:01", "remaining_time": "1:06:32", "throughput": 19930.03, "total_tokens": 166251648}
|
|
{"current_steps": 52830, "total_steps": 78105, "loss": 0.2069, "lr": 1.4325359914916137e-06, "epoch": 3.3819857883618205, "percentage": 67.64, "elapsed_time": "2:19:02", "remaining_time": "1:06:31", "throughput": 19930.24, "total_tokens": 166266240}
|
|
{"current_steps": 52835, "total_steps": 78105, "loss": 0.2206, "lr": 1.4320308520564885e-06, "epoch": 3.3823058703027975, "percentage": 67.65, "elapsed_time": "2:19:03", "remaining_time": "1:06:30", "throughput": 19930.46, "total_tokens": 166281280}
|
|
{"current_steps": 52840, "total_steps": 78105, "loss": 0.1591, "lr": 1.4315257659501797e-06, "epoch": 3.3826259522437745, "percentage": 67.65, "elapsed_time": "2:19:03", "remaining_time": "1:06:29", "throughput": 19930.72, "total_tokens": 166296960}
|
|
{"current_steps": 52845, "total_steps": 78105, "loss": 0.1698, "lr": 1.4310207331979087e-06, "epoch": 3.3829460341847515, "percentage": 67.66, "elapsed_time": "2:19:04", "remaining_time": "1:06:28", "throughput": 19931.01, "total_tokens": 166312832}
|
|
{"current_steps": 52850, "total_steps": 78105, "loss": 0.1811, "lr": 1.4305157538248932e-06, "epoch": 3.383266116125728, "percentage": 67.67, "elapsed_time": "2:19:05", "remaining_time": "1:06:27", "throughput": 19931.28, "total_tokens": 166328704}
|
|
{"current_steps": 52855, "total_steps": 78105, "loss": 0.1651, "lr": 1.4300108278563512e-06, "epoch": 3.383586198066705, "percentage": 67.67, "elapsed_time": "2:19:05", "remaining_time": "1:06:26", "throughput": 19931.57, "total_tokens": 166345280}
|
|
{"current_steps": 52860, "total_steps": 78105, "loss": 0.1416, "lr": 1.4295059553174954e-06, "epoch": 3.383906280007682, "percentage": 67.68, "elapsed_time": "2:19:06", "remaining_time": "1:06:26", "throughput": 19931.82, "total_tokens": 166360704}
|
|
{"current_steps": 52865, "total_steps": 78105, "loss": 0.1792, "lr": 1.4290011362335358e-06, "epoch": 3.384226361948659, "percentage": 67.68, "elapsed_time": "2:19:07", "remaining_time": "1:06:25", "throughput": 19932.12, "total_tokens": 166377408}
|
|
{"current_steps": 52870, "total_steps": 78105, "loss": 0.1122, "lr": 1.4284963706296808e-06, "epoch": 3.3845464438896355, "percentage": 67.69, "elapsed_time": "2:19:07", "remaining_time": "1:06:24", "throughput": 19932.37, "total_tokens": 166392768}
|
|
{"current_steps": 52875, "total_steps": 78105, "loss": 0.212, "lr": 1.427991658531135e-06, "epoch": 3.3848665258306125, "percentage": 67.7, "elapsed_time": "2:19:08", "remaining_time": "1:06:23", "throughput": 19932.66, "total_tokens": 166408960}
|
|
{"current_steps": 52880, "total_steps": 78105, "loss": 0.1707, "lr": 1.4274869999631036e-06, "epoch": 3.3851866077715895, "percentage": 67.7, "elapsed_time": "2:19:09", "remaining_time": "1:06:22", "throughput": 19932.92, "total_tokens": 166424320}
|
|
{"current_steps": 52885, "total_steps": 78105, "loss": 0.1575, "lr": 1.4269823949507832e-06, "epoch": 3.3855066897125665, "percentage": 67.71, "elapsed_time": "2:19:09", "remaining_time": "1:06:21", "throughput": 19933.17, "total_tokens": 166439616}
|
|
{"current_steps": 52890, "total_steps": 78105, "loss": 0.116, "lr": 1.426477843519375e-06, "epoch": 3.3858267716535435, "percentage": 67.72, "elapsed_time": "2:19:10", "remaining_time": "1:06:21", "throughput": 19933.45, "total_tokens": 166455552}
|
|
{"current_steps": 52895, "total_steps": 78105, "loss": 0.2972, "lr": 1.4259733456940691e-06, "epoch": 3.38614685359452, "percentage": 67.72, "elapsed_time": "2:19:11", "remaining_time": "1:06:20", "throughput": 19933.7, "total_tokens": 166471232}
|
|
{"current_steps": 52900, "total_steps": 78105, "loss": 0.1586, "lr": 1.425468901500061e-06, "epoch": 3.386466935535497, "percentage": 67.73, "elapsed_time": "2:19:11", "remaining_time": "1:06:19", "throughput": 19933.93, "total_tokens": 166486144}
|
|
{"current_steps": 52905, "total_steps": 78105, "loss": 0.1247, "lr": 1.4249645109625394e-06, "epoch": 3.386787017476474, "percentage": 67.74, "elapsed_time": "2:19:12", "remaining_time": "1:06:18", "throughput": 19934.14, "total_tokens": 166500608}
|
|
{"current_steps": 52910, "total_steps": 78105, "loss": 0.1257, "lr": 1.42446017410669e-06, "epoch": 3.387107099417451, "percentage": 67.74, "elapsed_time": "2:19:13", "remaining_time": "1:06:17", "throughput": 19934.47, "total_tokens": 166517504}
|
|
{"current_steps": 52915, "total_steps": 78105, "loss": 0.1979, "lr": 1.4239558909576973e-06, "epoch": 3.3874271813584276, "percentage": 67.75, "elapsed_time": "2:19:13", "remaining_time": "1:06:16", "throughput": 19934.71, "total_tokens": 166532608}
|
|
{"current_steps": 52920, "total_steps": 78105, "loss": 0.1649, "lr": 1.4234516615407413e-06, "epoch": 3.3877472632994046, "percentage": 67.75, "elapsed_time": "2:19:14", "remaining_time": "1:06:16", "throughput": 19935.1, "total_tokens": 166550784}
|
|
{"current_steps": 52925, "total_steps": 78105, "loss": 0.0975, "lr": 1.422947485881003e-06, "epoch": 3.3880673452403816, "percentage": 67.76, "elapsed_time": "2:19:15", "remaining_time": "1:06:15", "throughput": 19935.36, "total_tokens": 166566464}
|
|
{"current_steps": 52930, "total_steps": 78105, "loss": 0.1885, "lr": 1.4224433640036572e-06, "epoch": 3.3883874271813585, "percentage": 67.77, "elapsed_time": "2:19:15", "remaining_time": "1:06:14", "throughput": 19935.64, "total_tokens": 166581952}
|
|
{"current_steps": 52935, "total_steps": 78105, "loss": 0.1934, "lr": 1.4219392959338767e-06, "epoch": 3.3887075091223355, "percentage": 67.77, "elapsed_time": "2:19:16", "remaining_time": "1:06:13", "throughput": 19935.91, "total_tokens": 166598080}
|
|
{"current_steps": 52940, "total_steps": 78105, "loss": 0.2168, "lr": 1.4214352816968327e-06, "epoch": 3.389027591063312, "percentage": 67.78, "elapsed_time": "2:19:17", "remaining_time": "1:06:12", "throughput": 19936.14, "total_tokens": 166613120}
|
|
{"current_steps": 52945, "total_steps": 78105, "loss": 0.1545, "lr": 1.4209313213176922e-06, "epoch": 3.389347673004289, "percentage": 67.79, "elapsed_time": "2:19:17", "remaining_time": "1:06:11", "throughput": 19936.36, "total_tokens": 166628032}
|
|
{"current_steps": 52950, "total_steps": 78105, "loss": 0.1895, "lr": 1.4204274148216217e-06, "epoch": 3.389667754945266, "percentage": 67.79, "elapsed_time": "2:19:18", "remaining_time": "1:06:10", "throughput": 19936.61, "total_tokens": 166643328}
|
|
{"current_steps": 52955, "total_steps": 78105, "loss": 0.2368, "lr": 1.419923562233782e-06, "epoch": 3.389987836886243, "percentage": 67.8, "elapsed_time": "2:19:19", "remaining_time": "1:06:10", "throughput": 19936.89, "total_tokens": 166659328}
|
|
{"current_steps": 52960, "total_steps": 78105, "loss": 0.1384, "lr": 1.419419763579333e-06, "epoch": 3.3903079188272196, "percentage": 67.81, "elapsed_time": "2:19:19", "remaining_time": "1:06:09", "throughput": 19937.13, "total_tokens": 166674176}
|
|
{"current_steps": 52965, "total_steps": 78105, "loss": 0.1512, "lr": 1.4189160188834344e-06, "epoch": 3.3906280007681966, "percentage": 67.81, "elapsed_time": "2:19:20", "remaining_time": "1:06:08", "throughput": 19937.46, "total_tokens": 166690816}
|
|
{"current_steps": 52970, "total_steps": 78105, "loss": 0.2621, "lr": 1.418412328171237e-06, "epoch": 3.3909480827091736, "percentage": 67.82, "elapsed_time": "2:19:21", "remaining_time": "1:06:07", "throughput": 19937.75, "total_tokens": 166707264}
|
|
{"current_steps": 52975, "total_steps": 78105, "loss": 0.1546, "lr": 1.4179086914678947e-06, "epoch": 3.3912681646501506, "percentage": 67.83, "elapsed_time": "2:19:22", "remaining_time": "1:06:06", "throughput": 19937.98, "total_tokens": 166722432}
|
|
{"current_steps": 52980, "total_steps": 78105, "loss": 0.1353, "lr": 1.4174051087985563e-06, "epoch": 3.3915882465911276, "percentage": 67.83, "elapsed_time": "2:19:22", "remaining_time": "1:06:05", "throughput": 19938.23, "total_tokens": 166737536}
|
|
{"current_steps": 52985, "total_steps": 78105, "loss": 0.1705, "lr": 1.4169015801883674e-06, "epoch": 3.391908328532104, "percentage": 67.84, "elapsed_time": "2:19:23", "remaining_time": "1:06:05", "throughput": 19938.46, "total_tokens": 166752768}
|
|
{"current_steps": 52990, "total_steps": 78105, "loss": 0.1519, "lr": 1.416398105662472e-06, "epoch": 3.392228410473081, "percentage": 67.84, "elapsed_time": "2:19:24", "remaining_time": "1:06:04", "throughput": 19938.67, "total_tokens": 166767168}
|
|
{"current_steps": 52995, "total_steps": 78105, "loss": 0.1357, "lr": 1.41589468524601e-06, "epoch": 3.392548492414058, "percentage": 67.85, "elapsed_time": "2:19:24", "remaining_time": "1:06:03", "throughput": 19938.94, "total_tokens": 166782848}
|
|
{"current_steps": 53000, "total_steps": 78105, "loss": 0.2156, "lr": 1.4153913189641226e-06, "epoch": 3.392868574355035, "percentage": 67.86, "elapsed_time": "2:19:25", "remaining_time": "1:06:02", "throughput": 19939.26, "total_tokens": 166799552}
|
|
{"current_steps": 53005, "total_steps": 78105, "loss": 0.141, "lr": 1.4148880068419413e-06, "epoch": 3.3931886562960116, "percentage": 67.86, "elapsed_time": "2:19:26", "remaining_time": "1:06:01", "throughput": 19939.58, "total_tokens": 166815936}
|
|
{"current_steps": 53010, "total_steps": 78105, "loss": 0.156, "lr": 1.4143847489046014e-06, "epoch": 3.3935087382369886, "percentage": 67.87, "elapsed_time": "2:19:26", "remaining_time": "1:06:00", "throughput": 19939.88, "total_tokens": 166832320}
|
|
{"current_steps": 53015, "total_steps": 78105, "loss": 0.1561, "lr": 1.4138815451772325e-06, "epoch": 3.3938288201779656, "percentage": 67.88, "elapsed_time": "2:19:27", "remaining_time": "1:05:59", "throughput": 19940.13, "total_tokens": 166847680}
|
|
{"current_steps": 53020, "total_steps": 78105, "loss": 0.1001, "lr": 1.4133783956849623e-06, "epoch": 3.3941489021189426, "percentage": 67.88, "elapsed_time": "2:19:28", "remaining_time": "1:05:59", "throughput": 19940.35, "total_tokens": 166862400}
|
|
{"current_steps": 53025, "total_steps": 78105, "loss": 0.1729, "lr": 1.4128753004529145e-06, "epoch": 3.394468984059919, "percentage": 67.89, "elapsed_time": "2:19:28", "remaining_time": "1:05:58", "throughput": 19940.61, "total_tokens": 166878144}
|
|
{"current_steps": 53030, "total_steps": 78105, "loss": 0.1541, "lr": 1.4123722595062107e-06, "epoch": 3.394789066000896, "percentage": 67.9, "elapsed_time": "2:19:29", "remaining_time": "1:05:57", "throughput": 19940.83, "total_tokens": 166893120}
|
|
{"current_steps": 53035, "total_steps": 78105, "loss": 0.1637, "lr": 1.4118692728699733e-06, "epoch": 3.395109147941873, "percentage": 67.9, "elapsed_time": "2:19:30", "remaining_time": "1:05:56", "throughput": 19941.19, "total_tokens": 166910976}
|
|
{"current_steps": 53040, "total_steps": 78105, "loss": 0.1321, "lr": 1.411366340569314e-06, "epoch": 3.39542922988285, "percentage": 67.91, "elapsed_time": "2:19:30", "remaining_time": "1:05:55", "throughput": 19941.56, "total_tokens": 166928832}
|
|
{"current_steps": 53045, "total_steps": 78105, "loss": 0.1172, "lr": 1.4108634626293505e-06, "epoch": 3.3957493118238267, "percentage": 67.91, "elapsed_time": "2:19:31", "remaining_time": "1:05:54", "throughput": 19941.82, "total_tokens": 166944576}
|
|
{"current_steps": 53050, "total_steps": 78105, "loss": 0.1946, "lr": 1.4103606390751925e-06, "epoch": 3.3960693937648037, "percentage": 67.92, "elapsed_time": "2:19:32", "remaining_time": "1:05:54", "throughput": 19942.06, "total_tokens": 166959936}
|
|
{"current_steps": 53055, "total_steps": 78105, "loss": 0.142, "lr": 1.4098578699319486e-06, "epoch": 3.3963894757057806, "percentage": 67.93, "elapsed_time": "2:19:32", "remaining_time": "1:05:53", "throughput": 19942.32, "total_tokens": 166975424}
|
|
{"current_steps": 53060, "total_steps": 78105, "loss": 0.1515, "lr": 1.4093551552247243e-06, "epoch": 3.3967095576467576, "percentage": 67.93, "elapsed_time": "2:19:33", "remaining_time": "1:05:52", "throughput": 19942.52, "total_tokens": 166989952}
|
|
{"current_steps": 53065, "total_steps": 78105, "loss": 0.1446, "lr": 1.4088524949786225e-06, "epoch": 3.3970296395877346, "percentage": 67.94, "elapsed_time": "2:19:34", "remaining_time": "1:05:51", "throughput": 19942.74, "total_tokens": 167004864}
|
|
{"current_steps": 53070, "total_steps": 78105, "loss": 0.1743, "lr": 1.4083498892187424e-06, "epoch": 3.397349721528711, "percentage": 67.95, "elapsed_time": "2:19:34", "remaining_time": "1:05:50", "throughput": 19943.0, "total_tokens": 167020352}
|
|
{"current_steps": 53075, "total_steps": 78105, "loss": 0.1583, "lr": 1.407847337970185e-06, "epoch": 3.397669803469688, "percentage": 67.95, "elapsed_time": "2:19:35", "remaining_time": "1:05:49", "throughput": 19943.28, "total_tokens": 167036032}
|
|
{"current_steps": 53080, "total_steps": 78105, "loss": 0.2474, "lr": 1.4073448412580403e-06, "epoch": 3.397989885410665, "percentage": 67.96, "elapsed_time": "2:19:36", "remaining_time": "1:05:49", "throughput": 19943.55, "total_tokens": 167051520}
|
|
{"current_steps": 53085, "total_steps": 78105, "loss": 0.1208, "lr": 1.406842399107405e-06, "epoch": 3.398309967351642, "percentage": 67.97, "elapsed_time": "2:19:36", "remaining_time": "1:05:48", "throughput": 19943.83, "total_tokens": 167067392}
|
|
{"current_steps": 53090, "total_steps": 78105, "loss": 0.1728, "lr": 1.4063400115433642e-06, "epoch": 3.3986300492926187, "percentage": 67.97, "elapsed_time": "2:19:37", "remaining_time": "1:05:47", "throughput": 19944.09, "total_tokens": 167082816}
|
|
{"current_steps": 53095, "total_steps": 78105, "loss": 0.1649, "lr": 1.4058376785910078e-06, "epoch": 3.3989501312335957, "percentage": 67.98, "elapsed_time": "2:19:38", "remaining_time": "1:05:46", "throughput": 19944.32, "total_tokens": 167097920}
|
|
{"current_steps": 53100, "total_steps": 78105, "loss": 0.1787, "lr": 1.4053354002754183e-06, "epoch": 3.3992702131745727, "percentage": 67.99, "elapsed_time": "2:19:38", "remaining_time": "1:05:45", "throughput": 19944.57, "total_tokens": 167113152}
|
|
{"current_steps": 53105, "total_steps": 78105, "loss": 0.1523, "lr": 1.4048331766216757e-06, "epoch": 3.3995902951155497, "percentage": 67.99, "elapsed_time": "2:19:39", "remaining_time": "1:05:44", "throughput": 19944.86, "total_tokens": 167129472}
|
|
{"current_steps": 53110, "total_steps": 78105, "loss": 0.2269, "lr": 1.4043310076548621e-06, "epoch": 3.3999103770565267, "percentage": 68.0, "elapsed_time": "2:19:40", "remaining_time": "1:05:43", "throughput": 19945.08, "total_tokens": 167144192}
|
|
{"current_steps": 53115, "total_steps": 78105, "loss": 0.1375, "lr": 1.403828893400049e-06, "epoch": 3.400230458997503, "percentage": 68.0, "elapsed_time": "2:19:40", "remaining_time": "1:05:43", "throughput": 19945.31, "total_tokens": 167158976}
|
|
{"current_steps": 53120, "total_steps": 78105, "loss": 0.1697, "lr": 1.4033268338823125e-06, "epoch": 3.40055054093848, "percentage": 68.01, "elapsed_time": "2:19:41", "remaining_time": "1:05:42", "throughput": 19945.58, "total_tokens": 167174656}
|
|
{"current_steps": 53125, "total_steps": 78105, "loss": 0.1128, "lr": 1.4028248291267203e-06, "epoch": 3.400870622879457, "percentage": 68.02, "elapsed_time": "2:19:42", "remaining_time": "1:05:41", "throughput": 19945.82, "total_tokens": 167189632}
|
|
{"current_steps": 53130, "total_steps": 78105, "loss": 0.1365, "lr": 1.4023228791583416e-06, "epoch": 3.401190704820434, "percentage": 68.02, "elapsed_time": "2:19:42", "remaining_time": "1:05:40", "throughput": 19946.08, "total_tokens": 167205440}
|
|
{"current_steps": 53135, "total_steps": 78105, "loss": 0.1522, "lr": 1.4018209840022412e-06, "epoch": 3.4015107867614107, "percentage": 68.03, "elapsed_time": "2:19:43", "remaining_time": "1:05:39", "throughput": 19946.35, "total_tokens": 167221120}
|
|
{"current_steps": 53140, "total_steps": 78105, "loss": 0.1633, "lr": 1.4013191436834805e-06, "epoch": 3.4018308687023877, "percentage": 68.04, "elapsed_time": "2:19:44", "remaining_time": "1:05:38", "throughput": 19946.58, "total_tokens": 167236160}
|
|
{"current_steps": 53145, "total_steps": 78105, "loss": 0.2166, "lr": 1.4008173582271186e-06, "epoch": 3.4021509506433647, "percentage": 68.04, "elapsed_time": "2:19:44", "remaining_time": "1:05:38", "throughput": 19946.85, "total_tokens": 167252224}
|
|
{"current_steps": 53150, "total_steps": 78105, "loss": 0.1682, "lr": 1.400315627658212e-06, "epoch": 3.4024710325843417, "percentage": 68.05, "elapsed_time": "2:19:45", "remaining_time": "1:05:37", "throughput": 19947.12, "total_tokens": 167268352}
|
|
{"current_steps": 53155, "total_steps": 78105, "loss": 0.1205, "lr": 1.3998139520018155e-06, "epoch": 3.4027911145253187, "percentage": 68.06, "elapsed_time": "2:19:46", "remaining_time": "1:05:36", "throughput": 19947.33, "total_tokens": 167283072}
|
|
{"current_steps": 53160, "total_steps": 78105, "loss": 0.1569, "lr": 1.3993123312829796e-06, "epoch": 3.4031111964662952, "percentage": 68.06, "elapsed_time": "2:19:46", "remaining_time": "1:05:35", "throughput": 19947.69, "total_tokens": 167300736}
|
|
{"current_steps": 53165, "total_steps": 78105, "loss": 0.1232, "lr": 1.3988107655267527e-06, "epoch": 3.403431278407272, "percentage": 68.07, "elapsed_time": "2:19:47", "remaining_time": "1:05:34", "throughput": 19947.98, "total_tokens": 167316800}
|
|
{"current_steps": 53170, "total_steps": 78105, "loss": 0.2691, "lr": 1.39830925475818e-06, "epoch": 3.403751360348249, "percentage": 68.08, "elapsed_time": "2:19:48", "remaining_time": "1:05:33", "throughput": 19948.2, "total_tokens": 167331520}
|
|
{"current_steps": 53175, "total_steps": 78105, "loss": 0.1229, "lr": 1.397807799002305e-06, "epoch": 3.404071442289226, "percentage": 68.08, "elapsed_time": "2:19:48", "remaining_time": "1:05:32", "throughput": 19948.46, "total_tokens": 167347072}
|
|
{"current_steps": 53180, "total_steps": 78105, "loss": 0.155, "lr": 1.3973063982841673e-06, "epoch": 3.4043915242302027, "percentage": 68.09, "elapsed_time": "2:19:49", "remaining_time": "1:05:32", "throughput": 19948.73, "total_tokens": 167362880}
|
|
{"current_steps": 53185, "total_steps": 78105, "loss": 0.135, "lr": 1.3968050526288042e-06, "epoch": 3.4047116061711797, "percentage": 68.09, "elapsed_time": "2:19:50", "remaining_time": "1:05:31", "throughput": 19948.98, "total_tokens": 167378304}
|
|
{"current_steps": 53190, "total_steps": 78105, "loss": 0.1764, "lr": 1.3963037620612496e-06, "epoch": 3.4050316881121567, "percentage": 68.1, "elapsed_time": "2:19:50", "remaining_time": "1:05:30", "throughput": 19949.2, "total_tokens": 167393408}
|
|
{"current_steps": 53195, "total_steps": 78105, "loss": 0.1792, "lr": 1.3958025266065384e-06, "epoch": 3.4053517700531337, "percentage": 68.11, "elapsed_time": "2:19:51", "remaining_time": "1:05:29", "throughput": 19949.45, "total_tokens": 167408960}
|
|
{"current_steps": 53200, "total_steps": 78105, "loss": 0.1613, "lr": 1.3953013462896952e-06, "epoch": 3.4056718519941107, "percentage": 68.11, "elapsed_time": "2:19:52", "remaining_time": "1:05:28", "throughput": 19949.74, "total_tokens": 167424896}
|
|
{"current_steps": 53205, "total_steps": 78105, "loss": 0.1138, "lr": 1.3948002211357496e-06, "epoch": 3.4059919339350873, "percentage": 68.12, "elapsed_time": "2:19:53", "remaining_time": "1:05:27", "throughput": 19950.03, "total_tokens": 167441088}
|
|
{"current_steps": 53210, "total_steps": 78105, "loss": 0.1472, "lr": 1.394299151169724e-06, "epoch": 3.4063120158760642, "percentage": 68.13, "elapsed_time": "2:19:53", "remaining_time": "1:05:27", "throughput": 19950.3, "total_tokens": 167457024}
|
|
{"current_steps": 53215, "total_steps": 78105, "loss": 0.121, "lr": 1.3937981364166397e-06, "epoch": 3.4066320978170412, "percentage": 68.13, "elapsed_time": "2:19:54", "remaining_time": "1:05:26", "throughput": 19950.52, "total_tokens": 167471936}
|
|
{"current_steps": 53220, "total_steps": 78105, "loss": 0.1657, "lr": 1.3932971769015143e-06, "epoch": 3.4069521797580182, "percentage": 68.14, "elapsed_time": "2:19:55", "remaining_time": "1:05:25", "throughput": 19950.77, "total_tokens": 167487104}
|
|
{"current_steps": 53225, "total_steps": 78105, "loss": 0.1632, "lr": 1.3927962726493624e-06, "epoch": 3.4072722616989948, "percentage": 68.15, "elapsed_time": "2:19:55", "remaining_time": "1:05:24", "throughput": 19951.03, "total_tokens": 167502848}
|
|
{"current_steps": 53230, "total_steps": 78105, "loss": 0.2176, "lr": 1.3922954236851993e-06, "epoch": 3.4075923436399718, "percentage": 68.15, "elapsed_time": "2:19:56", "remaining_time": "1:05:23", "throughput": 19951.36, "total_tokens": 167520192}
|
|
{"current_steps": 53235, "total_steps": 78105, "loss": 0.1436, "lr": 1.3917946300340312e-06, "epoch": 3.4079124255809488, "percentage": 68.16, "elapsed_time": "2:19:57", "remaining_time": "1:05:22", "throughput": 19951.62, "total_tokens": 167535680}
|
|
{"current_steps": 53240, "total_steps": 78105, "loss": 0.248, "lr": 1.3912938917208677e-06, "epoch": 3.4082325075219257, "percentage": 68.16, "elapsed_time": "2:19:57", "remaining_time": "1:05:22", "throughput": 19951.95, "total_tokens": 167552768}
|
|
{"current_steps": 53245, "total_steps": 78105, "loss": 0.1496, "lr": 1.3907932087707121e-06, "epoch": 3.4085525894629027, "percentage": 68.17, "elapsed_time": "2:19:58", "remaining_time": "1:05:21", "throughput": 19952.19, "total_tokens": 167568320}
|
|
{"current_steps": 53250, "total_steps": 78105, "loss": 0.172, "lr": 1.3902925812085661e-06, "epoch": 3.4088726714038793, "percentage": 68.18, "elapsed_time": "2:19:59", "remaining_time": "1:05:20", "throughput": 19952.44, "total_tokens": 167583616}
|
|
{"current_steps": 53255, "total_steps": 78105, "loss": 0.1069, "lr": 1.389792009059428e-06, "epoch": 3.4091927533448563, "percentage": 68.18, "elapsed_time": "2:19:59", "remaining_time": "1:05:19", "throughput": 19952.73, "total_tokens": 167599616}
|
|
{"current_steps": 53260, "total_steps": 78105, "loss": 0.2235, "lr": 1.3892914923482947e-06, "epoch": 3.4095128352858333, "percentage": 68.19, "elapsed_time": "2:20:00", "remaining_time": "1:05:18", "throughput": 19953.03, "total_tokens": 167615616}
|
|
{"current_steps": 53265, "total_steps": 78105, "loss": 0.1071, "lr": 1.3887910311001584e-06, "epoch": 3.4098329172268103, "percentage": 68.2, "elapsed_time": "2:20:01", "remaining_time": "1:05:17", "throughput": 19953.26, "total_tokens": 167630464}
|
|
{"current_steps": 53270, "total_steps": 78105, "loss": 0.2736, "lr": 1.388290625340009e-06, "epoch": 3.410152999167787, "percentage": 68.2, "elapsed_time": "2:20:01", "remaining_time": "1:05:17", "throughput": 19953.59, "total_tokens": 167647424}
|
|
{"current_steps": 53275, "total_steps": 78105, "loss": 0.1264, "lr": 1.3877902750928364e-06, "epoch": 3.410473081108764, "percentage": 68.21, "elapsed_time": "2:20:02", "remaining_time": "1:05:16", "throughput": 19953.87, "total_tokens": 167663424}
|
|
{"current_steps": 53280, "total_steps": 78105, "loss": 0.2118, "lr": 1.3872899803836237e-06, "epoch": 3.410793163049741, "percentage": 68.22, "elapsed_time": "2:20:03", "remaining_time": "1:05:15", "throughput": 19954.09, "total_tokens": 167678208}
|
|
{"current_steps": 53285, "total_steps": 78105, "loss": 0.1357, "lr": 1.3867897412373538e-06, "epoch": 3.4111132449907178, "percentage": 68.22, "elapsed_time": "2:20:03", "remaining_time": "1:05:14", "throughput": 19954.33, "total_tokens": 167693696}
|
|
{"current_steps": 53290, "total_steps": 78105, "loss": 0.1305, "lr": 1.3862895576790052e-06, "epoch": 3.4114333269316943, "percentage": 68.23, "elapsed_time": "2:20:04", "remaining_time": "1:05:13", "throughput": 19954.56, "total_tokens": 167708608}
|
|
{"current_steps": 53295, "total_steps": 78105, "loss": 0.2431, "lr": 1.3857894297335555e-06, "epoch": 3.4117534088726713, "percentage": 68.24, "elapsed_time": "2:20:05", "remaining_time": "1:05:12", "throughput": 19954.86, "total_tokens": 167724992}
|
|
{"current_steps": 53300, "total_steps": 78105, "loss": 0.1511, "lr": 1.385289357425977e-06, "epoch": 3.4120734908136483, "percentage": 68.24, "elapsed_time": "2:20:05", "remaining_time": "1:05:11", "throughput": 19955.11, "total_tokens": 167740288}
|
|
{"current_steps": 53305, "total_steps": 78105, "loss": 0.1275, "lr": 1.3847893407812434e-06, "epoch": 3.4123935727546253, "percentage": 68.25, "elapsed_time": "2:20:06", "remaining_time": "1:05:11", "throughput": 19955.34, "total_tokens": 167755328}
|
|
{"current_steps": 53310, "total_steps": 78105, "loss": 0.1331, "lr": 1.3842893798243195e-06, "epoch": 3.412713654695602, "percentage": 68.25, "elapsed_time": "2:20:07", "remaining_time": "1:05:10", "throughput": 19955.58, "total_tokens": 167770752}
|
|
{"current_steps": 53315, "total_steps": 78105, "loss": 0.2498, "lr": 1.3837894745801745e-06, "epoch": 3.413033736636579, "percentage": 68.26, "elapsed_time": "2:20:07", "remaining_time": "1:05:09", "throughput": 19955.83, "total_tokens": 167785984}
|
|
{"current_steps": 53320, "total_steps": 78105, "loss": 0.1911, "lr": 1.3832896250737665e-06, "epoch": 3.413353818577556, "percentage": 68.27, "elapsed_time": "2:20:08", "remaining_time": "1:05:08", "throughput": 19956.08, "total_tokens": 167801792}
|
|
{"current_steps": 53325, "total_steps": 78105, "loss": 0.1828, "lr": 1.382789831330059e-06, "epoch": 3.413673900518533, "percentage": 68.27, "elapsed_time": "2:20:09", "remaining_time": "1:05:07", "throughput": 19956.31, "total_tokens": 167816896}
|
|
{"current_steps": 53330, "total_steps": 78105, "loss": 0.1473, "lr": 1.3822900933740085e-06, "epoch": 3.41399398245951, "percentage": 68.28, "elapsed_time": "2:20:09", "remaining_time": "1:05:06", "throughput": 19956.56, "total_tokens": 167832320}
|
|
{"current_steps": 53335, "total_steps": 78105, "loss": 0.2207, "lr": 1.3817904112305676e-06, "epoch": 3.4143140644004863, "percentage": 68.29, "elapsed_time": "2:20:10", "remaining_time": "1:05:06", "throughput": 19956.85, "total_tokens": 167848512}
|
|
{"current_steps": 53340, "total_steps": 78105, "loss": 0.2879, "lr": 1.3812907849246907e-06, "epoch": 3.4146341463414633, "percentage": 68.29, "elapsed_time": "2:20:11", "remaining_time": "1:05:05", "throughput": 19957.09, "total_tokens": 167863680}
|
|
{"current_steps": 53345, "total_steps": 78105, "loss": 0.1084, "lr": 1.380791214481323e-06, "epoch": 3.4149542282824403, "percentage": 68.3, "elapsed_time": "2:20:11", "remaining_time": "1:05:04", "throughput": 19957.36, "total_tokens": 167879488}
|
|
{"current_steps": 53350, "total_steps": 78105, "loss": 0.2407, "lr": 1.3802916999254146e-06, "epoch": 3.4152743102234173, "percentage": 68.31, "elapsed_time": "2:20:12", "remaining_time": "1:05:03", "throughput": 19957.64, "total_tokens": 167895296}
|
|
{"current_steps": 53355, "total_steps": 78105, "loss": 0.2134, "lr": 1.379792241281904e-06, "epoch": 3.415594392164394, "percentage": 68.31, "elapsed_time": "2:20:13", "remaining_time": "1:05:02", "throughput": 19957.89, "total_tokens": 167910528}
|
|
{"current_steps": 53360, "total_steps": 78105, "loss": 0.1455, "lr": 1.3792928385757348e-06, "epoch": 3.415914474105371, "percentage": 68.32, "elapsed_time": "2:20:13", "remaining_time": "1:05:01", "throughput": 19958.16, "total_tokens": 167926400}
|
|
{"current_steps": 53365, "total_steps": 78105, "loss": 0.1733, "lr": 1.378793491831844e-06, "epoch": 3.416234556046348, "percentage": 68.32, "elapsed_time": "2:20:14", "remaining_time": "1:05:01", "throughput": 19958.47, "total_tokens": 167942976}
|
|
{"current_steps": 53370, "total_steps": 78105, "loss": 0.1124, "lr": 1.378294201075166e-06, "epoch": 3.416554637987325, "percentage": 68.33, "elapsed_time": "2:20:15", "remaining_time": "1:05:00", "throughput": 19958.74, "total_tokens": 167958656}
|
|
{"current_steps": 53375, "total_steps": 78105, "loss": 0.1323, "lr": 1.3777949663306329e-06, "epoch": 3.416874719928302, "percentage": 68.34, "elapsed_time": "2:20:16", "remaining_time": "1:04:59", "throughput": 19959.06, "total_tokens": 167975552}
|
|
{"current_steps": 53380, "total_steps": 78105, "loss": 0.1302, "lr": 1.377295787623173e-06, "epoch": 3.4171948018692784, "percentage": 68.34, "elapsed_time": "2:20:16", "remaining_time": "1:04:58", "throughput": 19959.3, "total_tokens": 167990720}
|
|
{"current_steps": 53385, "total_steps": 78105, "loss": 0.1207, "lr": 1.3767966649777143e-06, "epoch": 3.4175148838102554, "percentage": 68.35, "elapsed_time": "2:20:17", "remaining_time": "1:04:57", "throughput": 19959.53, "total_tokens": 168005632}
|
|
{"current_steps": 53390, "total_steps": 78105, "loss": 0.0946, "lr": 1.3762975984191795e-06, "epoch": 3.4178349657512324, "percentage": 68.36, "elapsed_time": "2:20:17", "remaining_time": "1:04:56", "throughput": 19959.79, "total_tokens": 168020992}
|
|
{"current_steps": 53395, "total_steps": 78105, "loss": 0.1956, "lr": 1.3757985879724894e-06, "epoch": 3.4181550476922093, "percentage": 68.36, "elapsed_time": "2:20:18", "remaining_time": "1:04:55", "throughput": 19960.09, "total_tokens": 168037440}
|
|
{"current_steps": 53400, "total_steps": 78105, "loss": 0.1871, "lr": 1.3752996336625623e-06, "epoch": 3.418475129633186, "percentage": 68.37, "elapsed_time": "2:20:19", "remaining_time": "1:04:55", "throughput": 19960.43, "total_tokens": 168054528}
|
|
{"current_steps": 53405, "total_steps": 78105, "loss": 0.1581, "lr": 1.3748007355143129e-06, "epoch": 3.418795211574163, "percentage": 68.38, "elapsed_time": "2:20:20", "remaining_time": "1:04:54", "throughput": 19960.78, "total_tokens": 168071744}
|
|
{"current_steps": 53410, "total_steps": 78105, "loss": 0.1422, "lr": 1.3743018935526542e-06, "epoch": 3.41911529351514, "percentage": 68.38, "elapsed_time": "2:20:20", "remaining_time": "1:04:53", "throughput": 19961.06, "total_tokens": 168087744}
|
|
{"current_steps": 53415, "total_steps": 78105, "loss": 0.1392, "lr": 1.373803107802495e-06, "epoch": 3.419435375456117, "percentage": 68.39, "elapsed_time": "2:20:21", "remaining_time": "1:04:52", "throughput": 19961.37, "total_tokens": 168104256}
|
|
{"current_steps": 53420, "total_steps": 78105, "loss": 0.1463, "lr": 1.3733043782887417e-06, "epoch": 3.419755457397094, "percentage": 68.4, "elapsed_time": "2:20:22", "remaining_time": "1:04:51", "throughput": 19961.72, "total_tokens": 168121472}
|
|
{"current_steps": 53425, "total_steps": 78105, "loss": 0.1322, "lr": 1.372805705036301e-06, "epoch": 3.4200755393380704, "percentage": 68.4, "elapsed_time": "2:20:22", "remaining_time": "1:04:50", "throughput": 19961.98, "total_tokens": 168137344}
|
|
{"current_steps": 53430, "total_steps": 78105, "loss": 0.2022, "lr": 1.37230708807007e-06, "epoch": 3.4203956212790474, "percentage": 68.41, "elapsed_time": "2:20:23", "remaining_time": "1:04:50", "throughput": 19962.23, "total_tokens": 168152768}
|
|
{"current_steps": 53435, "total_steps": 78105, "loss": 0.1392, "lr": 1.37180852741495e-06, "epoch": 3.4207157032200244, "percentage": 68.41, "elapsed_time": "2:20:24", "remaining_time": "1:04:49", "throughput": 19962.55, "total_tokens": 168169984}
|
|
{"current_steps": 53440, "total_steps": 78105, "loss": 0.1241, "lr": 1.3713100230958356e-06, "epoch": 3.4210357851610014, "percentage": 68.42, "elapsed_time": "2:20:24", "remaining_time": "1:04:48", "throughput": 19962.88, "total_tokens": 168186560}
|
|
{"current_steps": 53445, "total_steps": 78105, "loss": 0.1573, "lr": 1.3708115751376194e-06, "epoch": 3.421355867101978, "percentage": 68.43, "elapsed_time": "2:20:25", "remaining_time": "1:04:47", "throughput": 19963.12, "total_tokens": 168201728}
|
|
{"current_steps": 53450, "total_steps": 78105, "loss": 0.136, "lr": 1.3703131835651917e-06, "epoch": 3.421675949042955, "percentage": 68.43, "elapsed_time": "2:20:26", "remaining_time": "1:04:46", "throughput": 19963.39, "total_tokens": 168217408}
|
|
{"current_steps": 53455, "total_steps": 78105, "loss": 0.1847, "lr": 1.3698148484034385e-06, "epoch": 3.421996030983932, "percentage": 68.44, "elapsed_time": "2:20:26", "remaining_time": "1:04:45", "throughput": 19963.67, "total_tokens": 168233408}
|
|
{"current_steps": 53460, "total_steps": 78105, "loss": 0.1363, "lr": 1.3693165696772465e-06, "epoch": 3.422316112924909, "percentage": 68.45, "elapsed_time": "2:20:27", "remaining_time": "1:04:45", "throughput": 19963.93, "total_tokens": 168249472}
|
|
{"current_steps": 53465, "total_steps": 78105, "loss": 0.1404, "lr": 1.3688183474114936e-06, "epoch": 3.422636194865886, "percentage": 68.45, "elapsed_time": "2:20:28", "remaining_time": "1:04:44", "throughput": 19964.13, "total_tokens": 168263808}
|
|
{"current_steps": 53470, "total_steps": 78105, "loss": 0.2012, "lr": 1.3683201816310609e-06, "epoch": 3.4229562768068624, "percentage": 68.46, "elapsed_time": "2:20:28", "remaining_time": "1:04:43", "throughput": 19964.36, "total_tokens": 168279104}
|
|
{"current_steps": 53475, "total_steps": 78105, "loss": 0.147, "lr": 1.367822072360824e-06, "epoch": 3.4232763587478394, "percentage": 68.47, "elapsed_time": "2:20:29", "remaining_time": "1:04:42", "throughput": 19964.61, "total_tokens": 168294720}
|
|
{"current_steps": 53480, "total_steps": 78105, "loss": 0.265, "lr": 1.3673240196256554e-06, "epoch": 3.4235964406888164, "percentage": 68.47, "elapsed_time": "2:20:30", "remaining_time": "1:04:41", "throughput": 19964.9, "total_tokens": 168310848}
|
|
{"current_steps": 53485, "total_steps": 78105, "loss": 0.1805, "lr": 1.3668260234504255e-06, "epoch": 3.4239165226297934, "percentage": 68.48, "elapsed_time": "2:20:31", "remaining_time": "1:04:40", "throughput": 19965.14, "total_tokens": 168326208}
|
|
{"current_steps": 53490, "total_steps": 78105, "loss": 0.1349, "lr": 1.3663280838600015e-06, "epoch": 3.42423660457077, "percentage": 68.48, "elapsed_time": "2:20:31", "remaining_time": "1:04:40", "throughput": 19965.38, "total_tokens": 168341568}
|
|
{"current_steps": 53495, "total_steps": 78105, "loss": 0.1316, "lr": 1.365830200879248e-06, "epoch": 3.424556686511747, "percentage": 68.49, "elapsed_time": "2:20:32", "remaining_time": "1:04:39", "throughput": 19965.68, "total_tokens": 168358016}
|
|
{"current_steps": 53500, "total_steps": 78105, "loss": 0.1207, "lr": 1.3653323745330255e-06, "epoch": 3.424876768452724, "percentage": 68.5, "elapsed_time": "2:20:33", "remaining_time": "1:04:38", "throughput": 19965.89, "total_tokens": 168372672}
|
|
{"current_steps": 53505, "total_steps": 78105, "loss": 0.1532, "lr": 1.3648346048461946e-06, "epoch": 3.425196850393701, "percentage": 68.5, "elapsed_time": "2:20:33", "remaining_time": "1:04:37", "throughput": 19966.11, "total_tokens": 168387584}
|
|
{"current_steps": 53510, "total_steps": 78105, "loss": 0.1866, "lr": 1.3643368918436112e-06, "epoch": 3.425516932334678, "percentage": 68.51, "elapsed_time": "2:20:34", "remaining_time": "1:04:36", "throughput": 19966.38, "total_tokens": 168403264}
|
|
{"current_steps": 53515, "total_steps": 78105, "loss": 0.127, "lr": 1.3638392355501279e-06, "epoch": 3.4258370142756545, "percentage": 68.52, "elapsed_time": "2:20:34", "remaining_time": "1:04:35", "throughput": 19966.59, "total_tokens": 168417856}
|
|
{"current_steps": 53520, "total_steps": 78105, "loss": 0.1511, "lr": 1.363341635990595e-06, "epoch": 3.4261570962166314, "percentage": 68.52, "elapsed_time": "2:20:35", "remaining_time": "1:04:35", "throughput": 19966.98, "total_tokens": 168436224}
|
|
{"current_steps": 53525, "total_steps": 78105, "loss": 0.2167, "lr": 1.3628440931898602e-06, "epoch": 3.4264771781576084, "percentage": 68.53, "elapsed_time": "2:20:36", "remaining_time": "1:04:34", "throughput": 19967.19, "total_tokens": 168450816}
|
|
{"current_steps": 53530, "total_steps": 78105, "loss": 0.1586, "lr": 1.362346607172767e-06, "epoch": 3.4267972600985854, "percentage": 68.54, "elapsed_time": "2:20:37", "remaining_time": "1:04:33", "throughput": 19967.47, "total_tokens": 168466880}
|
|
{"current_steps": 53535, "total_steps": 78105, "loss": 0.1427, "lr": 1.3618491779641603e-06, "epoch": 3.427117342039562, "percentage": 68.54, "elapsed_time": "2:20:37", "remaining_time": "1:04:32", "throughput": 19967.78, "total_tokens": 168483648}
|
|
{"current_steps": 53540, "total_steps": 78105, "loss": 0.1957, "lr": 1.3613518055888755e-06, "epoch": 3.427437423980539, "percentage": 68.55, "elapsed_time": "2:20:38", "remaining_time": "1:04:31", "throughput": 19968.05, "total_tokens": 168499456}
|
|
{"current_steps": 53545, "total_steps": 78105, "loss": 0.1738, "lr": 1.3608544900717524e-06, "epoch": 3.427757505921516, "percentage": 68.56, "elapsed_time": "2:20:39", "remaining_time": "1:04:30", "throughput": 19968.28, "total_tokens": 168515008}
|
|
{"current_steps": 53550, "total_steps": 78105, "loss": 0.1664, "lr": 1.3603572314376207e-06, "epoch": 3.428077587862493, "percentage": 68.56, "elapsed_time": "2:20:39", "remaining_time": "1:04:30", "throughput": 19968.57, "total_tokens": 168531200}
|
|
{"current_steps": 53555, "total_steps": 78105, "loss": 0.1422, "lr": 1.3598600297113135e-06, "epoch": 3.4283976698034695, "percentage": 68.57, "elapsed_time": "2:20:40", "remaining_time": "1:04:29", "throughput": 19968.86, "total_tokens": 168547328}
|
|
{"current_steps": 53560, "total_steps": 78105, "loss": 0.159, "lr": 1.3593628849176576e-06, "epoch": 3.4287177517444465, "percentage": 68.57, "elapsed_time": "2:20:41", "remaining_time": "1:04:28", "throughput": 19969.13, "total_tokens": 168563328}
|
|
{"current_steps": 53565, "total_steps": 78105, "loss": 0.1592, "lr": 1.3588657970814769e-06, "epoch": 3.4290378336854235, "percentage": 68.58, "elapsed_time": "2:20:41", "remaining_time": "1:04:27", "throughput": 19969.4, "total_tokens": 168579264}
|
|
{"current_steps": 53570, "total_steps": 78105, "loss": 0.1542, "lr": 1.3583687662275962e-06, "epoch": 3.4293579156264005, "percentage": 68.59, "elapsed_time": "2:20:42", "remaining_time": "1:04:26", "throughput": 19969.69, "total_tokens": 168595712}
|
|
{"current_steps": 53575, "total_steps": 78105, "loss": 0.1631, "lr": 1.3578717923808312e-06, "epoch": 3.429677997567377, "percentage": 68.59, "elapsed_time": "2:20:43", "remaining_time": "1:04:25", "throughput": 19970.02, "total_tokens": 168612672}
|
|
{"current_steps": 53580, "total_steps": 78105, "loss": 0.1705, "lr": 1.3573748755660015e-06, "epoch": 3.429998079508354, "percentage": 68.6, "elapsed_time": "2:20:43", "remaining_time": "1:04:25", "throughput": 19970.24, "total_tokens": 168627776}
|
|
{"current_steps": 53585, "total_steps": 78105, "loss": 0.1229, "lr": 1.356878015807917e-06, "epoch": 3.430318161449331, "percentage": 68.61, "elapsed_time": "2:20:44", "remaining_time": "1:04:24", "throughput": 19970.48, "total_tokens": 168642816}
|
|
{"current_steps": 53590, "total_steps": 78105, "loss": 0.2726, "lr": 1.3563812131313909e-06, "epoch": 3.430638243390308, "percentage": 68.61, "elapsed_time": "2:20:45", "remaining_time": "1:04:23", "throughput": 19970.16, "total_tokens": 168658432}
|
|
{"current_steps": 53595, "total_steps": 78105, "loss": 0.2198, "lr": 1.3558844675612297e-06, "epoch": 3.430958325331285, "percentage": 68.62, "elapsed_time": "2:20:46", "remaining_time": "1:04:22", "throughput": 19970.4, "total_tokens": 168673472}
|
|
{"current_steps": 53600, "total_steps": 78105, "loss": 0.1386, "lr": 1.3553877791222394e-06, "epoch": 3.4312784072722615, "percentage": 68.63, "elapsed_time": "2:20:46", "remaining_time": "1:04:21", "throughput": 19970.69, "total_tokens": 168689664}
|
|
{"current_steps": 53605, "total_steps": 78105, "loss": 0.1974, "lr": 1.3548911478392213e-06, "epoch": 3.4315984892132385, "percentage": 68.63, "elapsed_time": "2:20:47", "remaining_time": "1:04:20", "throughput": 19970.98, "total_tokens": 168706112}
|
|
{"current_steps": 53610, "total_steps": 78105, "loss": 0.2249, "lr": 1.354394573736974e-06, "epoch": 3.4319185711542155, "percentage": 68.64, "elapsed_time": "2:20:48", "remaining_time": "1:04:20", "throughput": 19971.21, "total_tokens": 168721024}
|
|
{"current_steps": 53615, "total_steps": 78105, "loss": 0.1364, "lr": 1.353898056840294e-06, "epoch": 3.4322386530951925, "percentage": 68.64, "elapsed_time": "2:20:48", "remaining_time": "1:04:19", "throughput": 19971.45, "total_tokens": 168736448}
|
|
{"current_steps": 53620, "total_steps": 78105, "loss": 0.1433, "lr": 1.3534015971739761e-06, "epoch": 3.432558735036169, "percentage": 68.65, "elapsed_time": "2:20:49", "remaining_time": "1:04:18", "throughput": 19971.72, "total_tokens": 168751936}
|
|
{"current_steps": 53625, "total_steps": 78105, "loss": 0.2215, "lr": 1.35290519476281e-06, "epoch": 3.432878816977146, "percentage": 68.66, "elapsed_time": "2:20:50", "remaining_time": "1:04:17", "throughput": 19971.96, "total_tokens": 168767360}
|
|
{"current_steps": 53630, "total_steps": 78105, "loss": 0.2581, "lr": 1.3524088496315835e-06, "epoch": 3.433198898918123, "percentage": 68.66, "elapsed_time": "2:20:50", "remaining_time": "1:04:16", "throughput": 19972.23, "total_tokens": 168783104}
|
|
{"current_steps": 53635, "total_steps": 78105, "loss": 0.2828, "lr": 1.3519125618050814e-06, "epoch": 3.4335189808591, "percentage": 68.67, "elapsed_time": "2:20:51", "remaining_time": "1:04:15", "throughput": 19972.52, "total_tokens": 168799616}
|
|
{"current_steps": 53640, "total_steps": 78105, "loss": 0.1831, "lr": 1.3514163313080857e-06, "epoch": 3.433839062800077, "percentage": 68.68, "elapsed_time": "2:20:52", "remaining_time": "1:04:15", "throughput": 19972.76, "total_tokens": 168814656}
|
|
{"current_steps": 53645, "total_steps": 78105, "loss": 0.1593, "lr": 1.350920158165376e-06, "epoch": 3.4341591447410535, "percentage": 68.68, "elapsed_time": "2:20:53", "remaining_time": "1:04:14", "throughput": 19973.15, "total_tokens": 168833152}
|
|
{"current_steps": 53650, "total_steps": 78105, "loss": 0.1567, "lr": 1.3504240424017268e-06, "epoch": 3.4344792266820305, "percentage": 68.69, "elapsed_time": "2:20:53", "remaining_time": "1:04:13", "throughput": 19973.42, "total_tokens": 168849216}
|
|
{"current_steps": 53655, "total_steps": 78105, "loss": 0.1661, "lr": 1.3499279840419155e-06, "epoch": 3.4347993086230075, "percentage": 68.7, "elapsed_time": "2:20:54", "remaining_time": "1:04:12", "throughput": 19973.67, "total_tokens": 168864512}
|
|
{"current_steps": 53660, "total_steps": 78105, "loss": 0.1892, "lr": 1.3494319831107079e-06, "epoch": 3.4351193905639845, "percentage": 68.7, "elapsed_time": "2:20:55", "remaining_time": "1:04:11", "throughput": 19973.91, "total_tokens": 168879616}
|
|
{"current_steps": 53665, "total_steps": 78105, "loss": 0.1741, "lr": 1.348936039632875e-06, "epoch": 3.435439472504961, "percentage": 68.71, "elapsed_time": "2:20:55", "remaining_time": "1:04:10", "throughput": 19974.18, "total_tokens": 168895616}
|
|
{"current_steps": 53670, "total_steps": 78105, "loss": 0.1418, "lr": 1.3484401536331804e-06, "epoch": 3.435759554445938, "percentage": 68.72, "elapsed_time": "2:20:56", "remaining_time": "1:04:10", "throughput": 19974.42, "total_tokens": 168910912}
|
|
{"current_steps": 53675, "total_steps": 78105, "loss": 0.257, "lr": 1.3479443251363867e-06, "epoch": 3.436079636386915, "percentage": 68.72, "elapsed_time": "2:20:57", "remaining_time": "1:04:09", "throughput": 19974.66, "total_tokens": 168926336}
|
|
{"current_steps": 53680, "total_steps": 78105, "loss": 0.1206, "lr": 1.3474485541672521e-06, "epoch": 3.436399718327892, "percentage": 68.73, "elapsed_time": "2:20:57", "remaining_time": "1:04:08", "throughput": 19974.87, "total_tokens": 168940800}
|
|
{"current_steps": 53685, "total_steps": 78105, "loss": 0.2162, "lr": 1.3469528407505326e-06, "epoch": 3.436719800268869, "percentage": 68.73, "elapsed_time": "2:20:58", "remaining_time": "1:04:07", "throughput": 19975.13, "total_tokens": 168956544}
|
|
{"current_steps": 53690, "total_steps": 78105, "loss": 0.1356, "lr": 1.3464571849109842e-06, "epoch": 3.4370398822098456, "percentage": 68.74, "elapsed_time": "2:20:59", "remaining_time": "1:04:06", "throughput": 19975.36, "total_tokens": 168971776}
|
|
{"current_steps": 53695, "total_steps": 78105, "loss": 0.1781, "lr": 1.3459615866733534e-06, "epoch": 3.4373599641508226, "percentage": 68.75, "elapsed_time": "2:20:59", "remaining_time": "1:04:05", "throughput": 19975.58, "total_tokens": 168986560}
|
|
{"current_steps": 53700, "total_steps": 78105, "loss": 0.1384, "lr": 1.3454660460623905e-06, "epoch": 3.4376800460917996, "percentage": 68.75, "elapsed_time": "2:21:00", "remaining_time": "1:04:04", "throughput": 19975.79, "total_tokens": 169001344}
|
|
{"current_steps": 53705, "total_steps": 78105, "loss": 0.1207, "lr": 1.3449705631028398e-06, "epoch": 3.4380001280327765, "percentage": 68.76, "elapsed_time": "2:21:01", "remaining_time": "1:04:04", "throughput": 19976.1, "total_tokens": 169017856}
|
|
{"current_steps": 53710, "total_steps": 78105, "loss": 0.2258, "lr": 1.3444751378194427e-06, "epoch": 3.438320209973753, "percentage": 68.77, "elapsed_time": "2:21:01", "remaining_time": "1:04:03", "throughput": 19976.42, "total_tokens": 169034816}
|
|
{"current_steps": 53715, "total_steps": 78105, "loss": 0.1529, "lr": 1.3439797702369385e-06, "epoch": 3.43864029191473, "percentage": 68.77, "elapsed_time": "2:21:02", "remaining_time": "1:04:02", "throughput": 19976.67, "total_tokens": 169049920}
|
|
{"current_steps": 53720, "total_steps": 78105, "loss": 0.2182, "lr": 1.343484460380063e-06, "epoch": 3.438960373855707, "percentage": 68.78, "elapsed_time": "2:21:03", "remaining_time": "1:04:01", "throughput": 19976.94, "total_tokens": 169065664}
|
|
{"current_steps": 53725, "total_steps": 78105, "loss": 0.2003, "lr": 1.3429892082735496e-06, "epoch": 3.439280455796684, "percentage": 68.79, "elapsed_time": "2:21:03", "remaining_time": "1:04:00", "throughput": 19977.19, "total_tokens": 169081216}
|
|
{"current_steps": 53730, "total_steps": 78105, "loss": 0.2119, "lr": 1.3424940139421273e-06, "epoch": 3.439600537737661, "percentage": 68.79, "elapsed_time": "2:21:04", "remaining_time": "1:03:59", "throughput": 19977.37, "total_tokens": 169095296}
|
|
{"current_steps": 53735, "total_steps": 78105, "loss": 0.1635, "lr": 1.3419988774105258e-06, "epoch": 3.4399206196786376, "percentage": 68.8, "elapsed_time": "2:21:05", "remaining_time": "1:03:59", "throughput": 19977.65, "total_tokens": 169111680}
|
|
{"current_steps": 53740, "total_steps": 78105, "loss": 0.0987, "lr": 1.3415037987034685e-06, "epoch": 3.4402407016196146, "percentage": 68.8, "elapsed_time": "2:21:05", "remaining_time": "1:03:58", "throughput": 19977.93, "total_tokens": 169127872}
|
|
{"current_steps": 53745, "total_steps": 78105, "loss": 0.1418, "lr": 1.341008777845677e-06, "epoch": 3.4405607835605916, "percentage": 68.81, "elapsed_time": "2:21:06", "remaining_time": "1:03:57", "throughput": 19978.16, "total_tokens": 169142848}
|
|
{"current_steps": 53750, "total_steps": 78105, "loss": 0.1778, "lr": 1.3405138148618703e-06, "epoch": 3.4408808655015686, "percentage": 68.82, "elapsed_time": "2:21:07", "remaining_time": "1:03:56", "throughput": 19978.51, "total_tokens": 169160256}
|
|
{"current_steps": 53755, "total_steps": 78105, "loss": 0.1404, "lr": 1.3400189097767641e-06, "epoch": 3.441200947442545, "percentage": 68.82, "elapsed_time": "2:21:07", "remaining_time": "1:03:55", "throughput": 19978.74, "total_tokens": 169175936}
|
|
{"current_steps": 53760, "total_steps": 78105, "loss": 0.1682, "lr": 1.3395240626150707e-06, "epoch": 3.441521029383522, "percentage": 68.83, "elapsed_time": "2:21:08", "remaining_time": "1:03:54", "throughput": 19978.96, "total_tokens": 169190912}
|
|
{"current_steps": 53765, "total_steps": 78105, "loss": 0.1384, "lr": 1.3390292734015025e-06, "epoch": 3.441841111324499, "percentage": 68.84, "elapsed_time": "2:21:09", "remaining_time": "1:03:54", "throughput": 19979.19, "total_tokens": 169205760}
|
|
{"current_steps": 53770, "total_steps": 78105, "loss": 0.2142, "lr": 1.3385345421607632e-06, "epoch": 3.442161193265476, "percentage": 68.84, "elapsed_time": "2:21:09", "remaining_time": "1:03:53", "throughput": 19979.48, "total_tokens": 169222144}
|
|
{"current_steps": 53775, "total_steps": 78105, "loss": 0.1446, "lr": 1.338039868917561e-06, "epoch": 3.442481275206453, "percentage": 68.85, "elapsed_time": "2:21:10", "remaining_time": "1:03:52", "throughput": 19979.8, "total_tokens": 169239104}
|
|
{"current_steps": 53780, "total_steps": 78105, "loss": 0.2169, "lr": 1.3375452536965938e-06, "epoch": 3.4428013571474296, "percentage": 68.86, "elapsed_time": "2:21:11", "remaining_time": "1:03:51", "throughput": 19980.05, "total_tokens": 169254400}
|
|
{"current_steps": 53785, "total_steps": 78105, "loss": 0.1781, "lr": 1.3370506965225621e-06, "epoch": 3.4431214390884066, "percentage": 68.86, "elapsed_time": "2:21:11", "remaining_time": "1:03:50", "throughput": 19980.28, "total_tokens": 169269632}
|
|
{"current_steps": 53790, "total_steps": 78105, "loss": 0.1089, "lr": 1.3365561974201612e-06, "epoch": 3.4434415210293836, "percentage": 68.87, "elapsed_time": "2:21:12", "remaining_time": "1:03:49", "throughput": 19980.52, "total_tokens": 169284800}
|
|
{"current_steps": 53795, "total_steps": 78105, "loss": 0.1775, "lr": 1.3360617564140826e-06, "epoch": 3.4437616029703606, "percentage": 68.88, "elapsed_time": "2:21:13", "remaining_time": "1:03:49", "throughput": 19980.74, "total_tokens": 169299648}
|
|
{"current_steps": 53800, "total_steps": 78105, "loss": 0.2354, "lr": 1.3355673735290192e-06, "epoch": 3.444081684911337, "percentage": 68.88, "elapsed_time": "2:21:13", "remaining_time": "1:03:48", "throughput": 19981.01, "total_tokens": 169315200}
|
|
{"current_steps": 53805, "total_steps": 78105, "loss": 0.1418, "lr": 1.335073048789654e-06, "epoch": 3.444401766852314, "percentage": 68.89, "elapsed_time": "2:21:14", "remaining_time": "1:03:47", "throughput": 19981.33, "total_tokens": 169331904}
|
|
{"current_steps": 53810, "total_steps": 78105, "loss": 0.1935, "lr": 1.3345787822206745e-06, "epoch": 3.444721848793291, "percentage": 68.89, "elapsed_time": "2:21:15", "remaining_time": "1:03:46", "throughput": 19981.62, "total_tokens": 169348032}
|
|
{"current_steps": 53815, "total_steps": 78105, "loss": 0.1424, "lr": 1.3340845738467584e-06, "epoch": 3.445041930734268, "percentage": 68.9, "elapsed_time": "2:21:15", "remaining_time": "1:03:45", "throughput": 19981.89, "total_tokens": 169363776}
|
|
{"current_steps": 53820, "total_steps": 78105, "loss": 0.19, "lr": 1.3335904236925868e-06, "epoch": 3.445362012675245, "percentage": 68.91, "elapsed_time": "2:21:16", "remaining_time": "1:03:44", "throughput": 19982.1, "total_tokens": 169378368}
|
|
{"current_steps": 53825, "total_steps": 78105, "loss": 0.1903, "lr": 1.3330963317828333e-06, "epoch": 3.4456820946162217, "percentage": 68.91, "elapsed_time": "2:21:17", "remaining_time": "1:03:43", "throughput": 19982.32, "total_tokens": 169393280}
|
|
{"current_steps": 53830, "total_steps": 78105, "loss": 0.1758, "lr": 1.3326022981421713e-06, "epoch": 3.4460021765571986, "percentage": 68.92, "elapsed_time": "2:21:17", "remaining_time": "1:03:43", "throughput": 19982.63, "total_tokens": 169409984}
|
|
{"current_steps": 53835, "total_steps": 78105, "loss": 0.1885, "lr": 1.3321083227952697e-06, "epoch": 3.4463222584981756, "percentage": 68.93, "elapsed_time": "2:21:18", "remaining_time": "1:03:42", "throughput": 19982.88, "total_tokens": 169425024}
|
|
{"current_steps": 53840, "total_steps": 78105, "loss": 0.1566, "lr": 1.3316144057667951e-06, "epoch": 3.446642340439152, "percentage": 68.93, "elapsed_time": "2:21:19", "remaining_time": "1:03:41", "throughput": 19983.15, "total_tokens": 169440960}
|
|
{"current_steps": 53845, "total_steps": 78105, "loss": 0.1346, "lr": 1.3311205470814102e-06, "epoch": 3.446962422380129, "percentage": 68.94, "elapsed_time": "2:21:19", "remaining_time": "1:03:40", "throughput": 19983.38, "total_tokens": 169455936}
|
|
{"current_steps": 53850, "total_steps": 78105, "loss": 0.1984, "lr": 1.3306267467637773e-06, "epoch": 3.447282504321106, "percentage": 68.95, "elapsed_time": "2:21:20", "remaining_time": "1:03:39", "throughput": 19983.63, "total_tokens": 169471168}
|
|
{"current_steps": 53855, "total_steps": 78105, "loss": 0.1497, "lr": 1.3301330048385542e-06, "epoch": 3.447602586262083, "percentage": 68.95, "elapsed_time": "2:21:21", "remaining_time": "1:03:38", "throughput": 19983.87, "total_tokens": 169486336}
|
|
{"current_steps": 53860, "total_steps": 78105, "loss": 0.163, "lr": 1.329639321330395e-06, "epoch": 3.44792266820306, "percentage": 68.96, "elapsed_time": "2:21:21", "remaining_time": "1:03:38", "throughput": 19984.11, "total_tokens": 169501440}
|
|
{"current_steps": 53865, "total_steps": 78105, "loss": 0.0954, "lr": 1.3291456962639518e-06, "epoch": 3.4482427501440367, "percentage": 68.96, "elapsed_time": "2:21:22", "remaining_time": "1:03:37", "throughput": 19984.34, "total_tokens": 169516480}
|
|
{"current_steps": 53870, "total_steps": 78105, "loss": 0.1782, "lr": 1.328652129663874e-06, "epoch": 3.4485628320850137, "percentage": 68.97, "elapsed_time": "2:21:23", "remaining_time": "1:03:36", "throughput": 19984.63, "total_tokens": 169532864}
|
|
{"current_steps": 53875, "total_steps": 78105, "loss": 0.2489, "lr": 1.3281586215548072e-06, "epoch": 3.4488829140259907, "percentage": 68.98, "elapsed_time": "2:21:23", "remaining_time": "1:03:35", "throughput": 19984.87, "total_tokens": 169548032}
|
|
{"current_steps": 53880, "total_steps": 78105, "loss": 0.166, "lr": 1.3276651719613942e-06, "epoch": 3.4492029959669677, "percentage": 68.98, "elapsed_time": "2:21:24", "remaining_time": "1:03:34", "throughput": 19985.16, "total_tokens": 169564352}
|
|
{"current_steps": 53885, "total_steps": 78105, "loss": 0.1573, "lr": 1.327171780908278e-06, "epoch": 3.449523077907944, "percentage": 68.99, "elapsed_time": "2:21:25", "remaining_time": "1:03:33", "throughput": 19985.4, "total_tokens": 169579456}
|
|
{"current_steps": 53890, "total_steps": 78105, "loss": 0.1914, "lr": 1.326678448420092e-06, "epoch": 3.449843159848921, "percentage": 69.0, "elapsed_time": "2:21:25", "remaining_time": "1:03:33", "throughput": 19985.66, "total_tokens": 169595392}
|
|
{"current_steps": 53895, "total_steps": 78105, "loss": 0.1444, "lr": 1.3261851745214738e-06, "epoch": 3.450163241789898, "percentage": 69.0, "elapsed_time": "2:21:26", "remaining_time": "1:03:32", "throughput": 19986.0, "total_tokens": 169612928}
|
|
{"current_steps": 53900, "total_steps": 78105, "loss": 0.1282, "lr": 1.3256919592370538e-06, "epoch": 3.450483323730875, "percentage": 69.01, "elapsed_time": "2:21:27", "remaining_time": "1:03:31", "throughput": 19986.25, "total_tokens": 169628608}
|
|
{"current_steps": 53905, "total_steps": 78105, "loss": 0.1949, "lr": 1.3251988025914604e-06, "epoch": 3.450803405671852, "percentage": 69.02, "elapsed_time": "2:21:27", "remaining_time": "1:03:30", "throughput": 19986.48, "total_tokens": 169643840}
|
|
{"current_steps": 53910, "total_steps": 78105, "loss": 0.0894, "lr": 1.3247057046093192e-06, "epoch": 3.4511234876128287, "percentage": 69.02, "elapsed_time": "2:21:28", "remaining_time": "1:03:29", "throughput": 19986.76, "total_tokens": 169659904}
|
|
{"current_steps": 53915, "total_steps": 78105, "loss": 0.1221, "lr": 1.3242126653152526e-06, "epoch": 3.4514435695538057, "percentage": 69.03, "elapsed_time": "2:21:29", "remaining_time": "1:03:28", "throughput": 19987.04, "total_tokens": 169676288}
|
|
{"current_steps": 53920, "total_steps": 78105, "loss": 0.2169, "lr": 1.3237196847338833e-06, "epoch": 3.4517636514947827, "percentage": 69.04, "elapsed_time": "2:21:30", "remaining_time": "1:03:28", "throughput": 19987.29, "total_tokens": 169692224}
|
|
{"current_steps": 53925, "total_steps": 78105, "loss": 0.1808, "lr": 1.3232267628898238e-06, "epoch": 3.4520837334357597, "percentage": 69.04, "elapsed_time": "2:21:30", "remaining_time": "1:03:27", "throughput": 19987.59, "total_tokens": 169708864}
|
|
{"current_steps": 53930, "total_steps": 78105, "loss": 0.1437, "lr": 1.3227338998076907e-06, "epoch": 3.4524038153767362, "percentage": 69.05, "elapsed_time": "2:21:31", "remaining_time": "1:03:26", "throughput": 19987.83, "total_tokens": 169724288}
|
|
{"current_steps": 53935, "total_steps": 78105, "loss": 0.1715, "lr": 1.322241095512095e-06, "epoch": 3.4527238973177132, "percentage": 69.05, "elapsed_time": "2:21:32", "remaining_time": "1:03:25", "throughput": 19988.08, "total_tokens": 169739776}
|
|
{"current_steps": 53940, "total_steps": 78105, "loss": 0.1788, "lr": 1.321748350027644e-06, "epoch": 3.45304397925869, "percentage": 69.06, "elapsed_time": "2:21:32", "remaining_time": "1:03:24", "throughput": 19988.35, "total_tokens": 169755904}
|
|
{"current_steps": 53945, "total_steps": 78105, "loss": 0.174, "lr": 1.321255663378943e-06, "epoch": 3.453364061199667, "percentage": 69.07, "elapsed_time": "2:21:33", "remaining_time": "1:03:23", "throughput": 19988.58, "total_tokens": 169771200}
|
|
{"current_steps": 53950, "total_steps": 78105, "loss": 0.1106, "lr": 1.3207630355905948e-06, "epoch": 3.453684143140644, "percentage": 69.07, "elapsed_time": "2:21:34", "remaining_time": "1:03:23", "throughput": 19988.81, "total_tokens": 169785984}
|
|
{"current_steps": 53955, "total_steps": 78105, "loss": 0.2024, "lr": 1.320270466687198e-06, "epoch": 3.4540042250816207, "percentage": 69.08, "elapsed_time": "2:21:34", "remaining_time": "1:03:22", "throughput": 19989.1, "total_tokens": 169802560}
|
|
{"current_steps": 53960, "total_steps": 78105, "loss": 0.0919, "lr": 1.319777956693349e-06, "epoch": 3.4543243070225977, "percentage": 69.09, "elapsed_time": "2:21:35", "remaining_time": "1:03:21", "throughput": 19989.32, "total_tokens": 169817152}
|
|
{"current_steps": 53965, "total_steps": 78105, "loss": 0.1311, "lr": 1.3192855056336402e-06, "epoch": 3.4546443889635747, "percentage": 69.09, "elapsed_time": "2:21:36", "remaining_time": "1:03:20", "throughput": 19989.61, "total_tokens": 169833408}
|
|
{"current_steps": 53970, "total_steps": 78105, "loss": 0.1746, "lr": 1.3187931135326642e-06, "epoch": 3.4549644709045517, "percentage": 69.1, "elapsed_time": "2:21:36", "remaining_time": "1:03:19", "throughput": 19989.87, "total_tokens": 169849088}
|
|
{"current_steps": 53975, "total_steps": 78105, "loss": 0.1401, "lr": 1.3183007804150074e-06, "epoch": 3.4552845528455283, "percentage": 69.11, "elapsed_time": "2:21:37", "remaining_time": "1:03:18", "throughput": 19990.1, "total_tokens": 169864192}
|
|
{"current_steps": 53980, "total_steps": 78105, "loss": 0.2488, "lr": 1.3178085063052546e-06, "epoch": 3.4556046347865053, "percentage": 69.11, "elapsed_time": "2:21:38", "remaining_time": "1:03:18", "throughput": 19990.35, "total_tokens": 169879488}
|
|
{"current_steps": 53985, "total_steps": 78105, "loss": 0.1743, "lr": 1.317316291227987e-06, "epoch": 3.4559247167274822, "percentage": 69.12, "elapsed_time": "2:21:38", "remaining_time": "1:03:17", "throughput": 19990.61, "total_tokens": 169894976}
|
|
{"current_steps": 53990, "total_steps": 78105, "loss": 0.1497, "lr": 1.3168241352077823e-06, "epoch": 3.4562447986684592, "percentage": 69.12, "elapsed_time": "2:21:39", "remaining_time": "1:03:16", "throughput": 19990.84, "total_tokens": 169910080}
|
|
{"current_steps": 53995, "total_steps": 78105, "loss": 0.1819, "lr": 1.3163320382692196e-06, "epoch": 3.4565648806094362, "percentage": 69.13, "elapsed_time": "2:21:40", "remaining_time": "1:03:15", "throughput": 19991.09, "total_tokens": 169925120}
|
|
{"current_steps": 54000, "total_steps": 78105, "loss": 0.1647, "lr": 1.3158400004368672e-06, "epoch": 3.4568849625504128, "percentage": 69.14, "elapsed_time": "2:21:40", "remaining_time": "1:03:14", "throughput": 19991.31, "total_tokens": 169940096}
|
|
{"current_steps": 54005, "total_steps": 78105, "loss": 0.1532, "lr": 1.3153480217352993e-06, "epoch": 3.4572050444913898, "percentage": 69.14, "elapsed_time": "2:21:41", "remaining_time": "1:03:13", "throughput": 19991.57, "total_tokens": 169955520}
|
|
{"current_steps": 54010, "total_steps": 78105, "loss": 0.1127, "lr": 1.3148561021890784e-06, "epoch": 3.4575251264323668, "percentage": 69.15, "elapsed_time": "2:21:42", "remaining_time": "1:03:12", "throughput": 19991.87, "total_tokens": 169972160}
|
|
{"current_steps": 54015, "total_steps": 78105, "loss": 0.2193, "lr": 1.314364241822772e-06, "epoch": 3.4578452083733437, "percentage": 69.16, "elapsed_time": "2:21:42", "remaining_time": "1:03:12", "throughput": 19992.12, "total_tokens": 169987712}
|
|
{"current_steps": 54020, "total_steps": 78105, "loss": 0.1667, "lr": 1.313872440660939e-06, "epoch": 3.4581652903143203, "percentage": 69.16, "elapsed_time": "2:21:43", "remaining_time": "1:03:11", "throughput": 19992.4, "total_tokens": 170003968}
|
|
{"current_steps": 54025, "total_steps": 78105, "loss": 0.1623, "lr": 1.3133806987281373e-06, "epoch": 3.4584853722552973, "percentage": 69.17, "elapsed_time": "2:21:44", "remaining_time": "1:03:10", "throughput": 19992.64, "total_tokens": 170019008}
|
|
{"current_steps": 54030, "total_steps": 78105, "loss": 0.1978, "lr": 1.3128890160489248e-06, "epoch": 3.4588054541962743, "percentage": 69.18, "elapsed_time": "2:21:44", "remaining_time": "1:03:09", "throughput": 19992.88, "total_tokens": 170034368}
|
|
{"current_steps": 54035, "total_steps": 78105, "loss": 0.1493, "lr": 1.312397392647849e-06, "epoch": 3.4591255361372513, "percentage": 69.18, "elapsed_time": "2:21:45", "remaining_time": "1:03:08", "throughput": 19993.14, "total_tokens": 170050368}
|
|
{"current_steps": 54040, "total_steps": 78105, "loss": 0.2024, "lr": 1.3119058285494638e-06, "epoch": 3.4594456180782283, "percentage": 69.19, "elapsed_time": "2:21:46", "remaining_time": "1:03:07", "throughput": 19993.41, "total_tokens": 170066560}
|
|
{"current_steps": 54045, "total_steps": 78105, "loss": 0.1027, "lr": 1.3114143237783106e-06, "epoch": 3.459765700019205, "percentage": 69.2, "elapsed_time": "2:21:46", "remaining_time": "1:03:07", "throughput": 19993.72, "total_tokens": 170083520}
|
|
{"current_steps": 54050, "total_steps": 78105, "loss": 0.1372, "lr": 1.3109228783589361e-06, "epoch": 3.460085781960182, "percentage": 69.2, "elapsed_time": "2:21:47", "remaining_time": "1:03:06", "throughput": 19994.01, "total_tokens": 170099904}
|
|
{"current_steps": 54055, "total_steps": 78105, "loss": 0.1701, "lr": 1.3104314923158794e-06, "epoch": 3.460405863901159, "percentage": 69.21, "elapsed_time": "2:21:48", "remaining_time": "1:03:05", "throughput": 19994.3, "total_tokens": 170116736}
|
|
{"current_steps": 54060, "total_steps": 78105, "loss": 0.1479, "lr": 1.309940165673678e-06, "epoch": 3.4607259458421358, "percentage": 69.21, "elapsed_time": "2:21:48", "remaining_time": "1:03:04", "throughput": 19994.51, "total_tokens": 170131328}
|
|
{"current_steps": 54065, "total_steps": 78105, "loss": 0.1405, "lr": 1.3094488984568657e-06, "epoch": 3.4610460277831123, "percentage": 69.22, "elapsed_time": "2:21:49", "remaining_time": "1:03:03", "throughput": 19994.79, "total_tokens": 170147584}
|
|
{"current_steps": 54070, "total_steps": 78105, "loss": 0.1724, "lr": 1.308957690689974e-06, "epoch": 3.4613661097240893, "percentage": 69.23, "elapsed_time": "2:21:50", "remaining_time": "1:03:02", "throughput": 19995.1, "total_tokens": 170164288}
|
|
{"current_steps": 54075, "total_steps": 78105, "loss": 0.2134, "lr": 1.3084665423975304e-06, "epoch": 3.4616861916650663, "percentage": 69.23, "elapsed_time": "2:21:50", "remaining_time": "1:03:02", "throughput": 19995.35, "total_tokens": 170179776}
|
|
{"current_steps": 54080, "total_steps": 78105, "loss": 0.1867, "lr": 1.3079754536040624e-06, "epoch": 3.4620062736060433, "percentage": 69.24, "elapsed_time": "2:21:51", "remaining_time": "1:03:01", "throughput": 19995.62, "total_tokens": 170195648}
|
|
{"current_steps": 54085, "total_steps": 78105, "loss": 0.0758, "lr": 1.3074844243340906e-06, "epoch": 3.4623263555470203, "percentage": 69.25, "elapsed_time": "2:21:52", "remaining_time": "1:03:00", "throughput": 19995.85, "total_tokens": 170210816}
|
|
{"current_steps": 54090, "total_steps": 78105, "loss": 0.2233, "lr": 1.3069934546121354e-06, "epoch": 3.462646437487997, "percentage": 69.25, "elapsed_time": "2:21:52", "remaining_time": "1:02:59", "throughput": 19996.07, "total_tokens": 170225408}
|
|
{"current_steps": 54095, "total_steps": 78105, "loss": 0.1863, "lr": 1.3065025444627134e-06, "epoch": 3.462966519428974, "percentage": 69.26, "elapsed_time": "2:21:53", "remaining_time": "1:02:58", "throughput": 19996.33, "total_tokens": 170241088}
|
|
{"current_steps": 54100, "total_steps": 78105, "loss": 0.1855, "lr": 1.3060116939103368e-06, "epoch": 3.463286601369951, "percentage": 69.27, "elapsed_time": "2:21:54", "remaining_time": "1:02:57", "throughput": 19996.55, "total_tokens": 170255936}
|
|
{"current_steps": 54105, "total_steps": 78105, "loss": 0.0975, "lr": 1.3055209029795174e-06, "epoch": 3.4636066833109274, "percentage": 69.27, "elapsed_time": "2:21:54", "remaining_time": "1:02:57", "throughput": 19996.77, "total_tokens": 170271040}
|
|
{"current_steps": 54110, "total_steps": 78105, "loss": 0.1701, "lr": 1.3050301716947613e-06, "epoch": 3.4639267652519043, "percentage": 69.28, "elapsed_time": "2:21:55", "remaining_time": "1:02:56", "throughput": 19997.07, "total_tokens": 170287552}
|
|
{"current_steps": 54115, "total_steps": 78105, "loss": 0.1511, "lr": 1.304539500080576e-06, "epoch": 3.4642468471928813, "percentage": 69.28, "elapsed_time": "2:21:56", "remaining_time": "1:02:55", "throughput": 19997.35, "total_tokens": 170303744}
|
|
{"current_steps": 54120, "total_steps": 78105, "loss": 0.1382, "lr": 1.3040488881614593e-06, "epoch": 3.4645669291338583, "percentage": 69.29, "elapsed_time": "2:21:57", "remaining_time": "1:02:54", "throughput": 19997.73, "total_tokens": 170321728}
|
|
{"current_steps": 54125, "total_steps": 78105, "loss": 0.2174, "lr": 1.3035583359619125e-06, "epoch": 3.4648870110748353, "percentage": 69.3, "elapsed_time": "2:21:57", "remaining_time": "1:02:53", "throughput": 19997.96, "total_tokens": 170336768}
|
|
{"current_steps": 54130, "total_steps": 78105, "loss": 0.2128, "lr": 1.3030678435064304e-06, "epoch": 3.465207093015812, "percentage": 69.3, "elapsed_time": "2:21:58", "remaining_time": "1:02:52", "throughput": 19998.22, "total_tokens": 170352576}
|
|
{"current_steps": 54135, "total_steps": 78105, "loss": 0.1207, "lr": 1.3025774108195055e-06, "epoch": 3.465527174956789, "percentage": 69.31, "elapsed_time": "2:21:59", "remaining_time": "1:02:52", "throughput": 19998.49, "total_tokens": 170368832}
|
|
{"current_steps": 54140, "total_steps": 78105, "loss": 0.1648, "lr": 1.3020870379256273e-06, "epoch": 3.465847256897766, "percentage": 69.32, "elapsed_time": "2:21:59", "remaining_time": "1:02:51", "throughput": 19998.75, "total_tokens": 170384256}
|
|
{"current_steps": 54145, "total_steps": 78105, "loss": 0.2808, "lr": 1.3015967248492822e-06, "epoch": 3.466167338838743, "percentage": 69.32, "elapsed_time": "2:22:00", "remaining_time": "1:02:50", "throughput": 19998.98, "total_tokens": 170399360}
|
|
{"current_steps": 54150, "total_steps": 78105, "loss": 0.1558, "lr": 1.3011064716149563e-06, "epoch": 3.4664874207797194, "percentage": 69.33, "elapsed_time": "2:22:01", "remaining_time": "1:02:49", "throughput": 19999.25, "total_tokens": 170415360}
|
|
{"current_steps": 54155, "total_steps": 78105, "loss": 0.1096, "lr": 1.3006162782471264e-06, "epoch": 3.4668075027206964, "percentage": 69.34, "elapsed_time": "2:22:01", "remaining_time": "1:02:48", "throughput": 19999.48, "total_tokens": 170430528}
|
|
{"current_steps": 54160, "total_steps": 78105, "loss": 0.1795, "lr": 1.300126144770273e-06, "epoch": 3.4671275846616734, "percentage": 69.34, "elapsed_time": "2:22:02", "remaining_time": "1:02:47", "throughput": 19999.82, "total_tokens": 170448000}
|
|
{"current_steps": 54165, "total_steps": 78105, "loss": 0.1462, "lr": 1.2996360712088702e-06, "epoch": 3.4674476666026504, "percentage": 69.35, "elapsed_time": "2:22:03", "remaining_time": "1:02:47", "throughput": 20000.07, "total_tokens": 170463488}
|
|
{"current_steps": 54170, "total_steps": 78105, "loss": 0.1631, "lr": 1.2991460575873893e-06, "epoch": 3.4677677485436273, "percentage": 69.36, "elapsed_time": "2:22:03", "remaining_time": "1:02:46", "throughput": 20000.34, "total_tokens": 170479680}
|
|
{"current_steps": 54175, "total_steps": 78105, "loss": 0.2201, "lr": 1.2986561039302997e-06, "epoch": 3.468087830484604, "percentage": 69.36, "elapsed_time": "2:22:04", "remaining_time": "1:02:45", "throughput": 20000.62, "total_tokens": 170495616}
|
|
{"current_steps": 54180, "total_steps": 78105, "loss": 0.1784, "lr": 1.2981662102620662e-06, "epoch": 3.468407912425581, "percentage": 69.37, "elapsed_time": "2:22:05", "remaining_time": "1:02:44", "throughput": 20000.95, "total_tokens": 170512960}
|
|
{"current_steps": 54185, "total_steps": 78105, "loss": 0.2556, "lr": 1.2976763766071525e-06, "epoch": 3.468727994366558, "percentage": 69.37, "elapsed_time": "2:22:05", "remaining_time": "1:02:43", "throughput": 20001.21, "total_tokens": 170529024}
|
|
{"current_steps": 54190, "total_steps": 78105, "loss": 0.1448, "lr": 1.2971866029900176e-06, "epoch": 3.469048076307535, "percentage": 69.38, "elapsed_time": "2:22:06", "remaining_time": "1:02:42", "throughput": 20001.45, "total_tokens": 170544832}
|
|
{"current_steps": 54195, "total_steps": 78105, "loss": 0.1645, "lr": 1.2966968894351175e-06, "epoch": 3.4693681582485114, "percentage": 69.39, "elapsed_time": "2:22:07", "remaining_time": "1:02:42", "throughput": 20001.68, "total_tokens": 170559808}
|
|
{"current_steps": 54200, "total_steps": 78105, "loss": 0.1558, "lr": 1.296207235966908e-06, "epoch": 3.4696882401894884, "percentage": 69.39, "elapsed_time": "2:22:07", "remaining_time": "1:02:41", "throughput": 20001.94, "total_tokens": 170575424}
|
|
{"current_steps": 54205, "total_steps": 78105, "loss": 0.1591, "lr": 1.295717642609839e-06, "epoch": 3.4700083221304654, "percentage": 69.4, "elapsed_time": "2:22:08", "remaining_time": "1:02:40", "throughput": 20002.2, "total_tokens": 170591232}
|
|
{"current_steps": 54210, "total_steps": 78105, "loss": 0.1287, "lr": 1.295228109388358e-06, "epoch": 3.4703284040714424, "percentage": 69.41, "elapsed_time": "2:22:09", "remaining_time": "1:02:39", "throughput": 20002.47, "total_tokens": 170607232}
|
|
{"current_steps": 54215, "total_steps": 78105, "loss": 0.1459, "lr": 1.2947386363269097e-06, "epoch": 3.4706484860124194, "percentage": 69.41, "elapsed_time": "2:22:09", "remaining_time": "1:02:38", "throughput": 20002.7, "total_tokens": 170622272}
|
|
{"current_steps": 54220, "total_steps": 78105, "loss": 0.1371, "lr": 1.2942492234499348e-06, "epoch": 3.470968567953396, "percentage": 69.42, "elapsed_time": "2:22:10", "remaining_time": "1:02:37", "throughput": 20002.95, "total_tokens": 170637824}
|
|
{"current_steps": 54225, "total_steps": 78105, "loss": 0.1282, "lr": 1.2937598707818755e-06, "epoch": 3.471288649894373, "percentage": 69.43, "elapsed_time": "2:22:11", "remaining_time": "1:02:37", "throughput": 20003.19, "total_tokens": 170652928}
|
|
{"current_steps": 54230, "total_steps": 78105, "loss": 0.1599, "lr": 1.2932705783471628e-06, "epoch": 3.47160873183535, "percentage": 69.43, "elapsed_time": "2:22:11", "remaining_time": "1:02:36", "throughput": 20003.44, "total_tokens": 170668864}
|
|
{"current_steps": 54235, "total_steps": 78105, "loss": 0.134, "lr": 1.2927813461702337e-06, "epoch": 3.471928813776327, "percentage": 69.44, "elapsed_time": "2:22:12", "remaining_time": "1:02:35", "throughput": 20003.69, "total_tokens": 170684416}
|
|
{"current_steps": 54240, "total_steps": 78105, "loss": 0.1454, "lr": 1.2922921742755145e-06, "epoch": 3.4722488957173034, "percentage": 69.44, "elapsed_time": "2:22:13", "remaining_time": "1:02:34", "throughput": 20004.08, "total_tokens": 170702720}
|
|
{"current_steps": 54245, "total_steps": 78105, "loss": 0.1346, "lr": 1.2918030626874339e-06, "epoch": 3.4725689776582804, "percentage": 69.45, "elapsed_time": "2:22:14", "remaining_time": "1:02:33", "throughput": 20004.31, "total_tokens": 170717568}
|
|
{"current_steps": 54250, "total_steps": 78105, "loss": 0.1632, "lr": 1.2913140114304157e-06, "epoch": 3.4728890595992574, "percentage": 69.46, "elapsed_time": "2:22:14", "remaining_time": "1:02:32", "throughput": 20004.54, "total_tokens": 170732416}
|
|
{"current_steps": 54255, "total_steps": 78105, "loss": 0.1729, "lr": 1.2908250205288786e-06, "epoch": 3.4732091415402344, "percentage": 69.46, "elapsed_time": "2:22:15", "remaining_time": "1:02:32", "throughput": 20004.75, "total_tokens": 170747008}
|
|
{"current_steps": 54260, "total_steps": 78105, "loss": 0.1996, "lr": 1.2903360900072434e-06, "epoch": 3.4735292234812114, "percentage": 69.47, "elapsed_time": "2:22:15", "remaining_time": "1:02:31", "throughput": 20005.01, "total_tokens": 170762624}
|
|
{"current_steps": 54265, "total_steps": 78105, "loss": 0.1166, "lr": 1.2898472198899212e-06, "epoch": 3.473849305422188, "percentage": 69.48, "elapsed_time": "2:22:16", "remaining_time": "1:02:30", "throughput": 20005.22, "total_tokens": 170777216}
|
|
{"current_steps": 54270, "total_steps": 78105, "loss": 0.1858, "lr": 1.2893584102013262e-06, "epoch": 3.474169387363165, "percentage": 69.48, "elapsed_time": "2:22:17", "remaining_time": "1:02:29", "throughput": 20005.45, "total_tokens": 170792128}
|
|
{"current_steps": 54275, "total_steps": 78105, "loss": 0.1894, "lr": 1.2888696609658663e-06, "epoch": 3.474489469304142, "percentage": 69.49, "elapsed_time": "2:22:17", "remaining_time": "1:02:28", "throughput": 20005.72, "total_tokens": 170808128}
|
|
{"current_steps": 54280, "total_steps": 78105, "loss": 0.1623, "lr": 1.288380972207947e-06, "epoch": 3.474809551245119, "percentage": 69.5, "elapsed_time": "2:22:18", "remaining_time": "1:02:27", "throughput": 20006.01, "total_tokens": 170824448}
|
|
{"current_steps": 54285, "total_steps": 78105, "loss": 0.224, "lr": 1.2878923439519709e-06, "epoch": 3.4751296331860955, "percentage": 69.5, "elapsed_time": "2:22:19", "remaining_time": "1:02:27", "throughput": 20006.27, "total_tokens": 170840512}
|
|
{"current_steps": 54290, "total_steps": 78105, "loss": 0.1499, "lr": 1.2874037762223373e-06, "epoch": 3.4754497151270725, "percentage": 69.51, "elapsed_time": "2:22:20", "remaining_time": "1:02:26", "throughput": 20006.56, "total_tokens": 170857088}
|
|
{"current_steps": 54295, "total_steps": 78105, "loss": 0.1297, "lr": 1.2869152690434434e-06, "epoch": 3.4757697970680494, "percentage": 69.52, "elapsed_time": "2:22:20", "remaining_time": "1:02:25", "throughput": 20006.82, "total_tokens": 170872576}
|
|
{"current_steps": 54300, "total_steps": 78105, "loss": 0.1237, "lr": 1.286426822439682e-06, "epoch": 3.4760898790090264, "percentage": 69.52, "elapsed_time": "2:22:21", "remaining_time": "1:02:24", "throughput": 20007.06, "total_tokens": 170887872}
|
|
{"current_steps": 54305, "total_steps": 78105, "loss": 0.1636, "lr": 1.2859384364354426e-06, "epoch": 3.4764099609500034, "percentage": 69.53, "elapsed_time": "2:22:22", "remaining_time": "1:02:23", "throughput": 20007.3, "total_tokens": 170903168}
|
|
{"current_steps": 54310, "total_steps": 78105, "loss": 0.1947, "lr": 1.285450111055116e-06, "epoch": 3.47673004289098, "percentage": 69.53, "elapsed_time": "2:22:22", "remaining_time": "1:02:22", "throughput": 20007.6, "total_tokens": 170919424}
|
|
{"current_steps": 54315, "total_steps": 78105, "loss": 0.1981, "lr": 1.2849618463230826e-06, "epoch": 3.477050124831957, "percentage": 69.54, "elapsed_time": "2:22:23", "remaining_time": "1:02:22", "throughput": 20007.82, "total_tokens": 170934336}
|
|
{"current_steps": 54320, "total_steps": 78105, "loss": 0.199, "lr": 1.2844736422637267e-06, "epoch": 3.477370206772934, "percentage": 69.55, "elapsed_time": "2:22:24", "remaining_time": "1:02:21", "throughput": 20008.22, "total_tokens": 170953024}
|
|
{"current_steps": 54325, "total_steps": 78105, "loss": 0.1898, "lr": 1.2839854989014256e-06, "epoch": 3.477690288713911, "percentage": 69.55, "elapsed_time": "2:22:24", "remaining_time": "1:02:20", "throughput": 20008.43, "total_tokens": 170967808}
|
|
{"current_steps": 54330, "total_steps": 78105, "loss": 0.111, "lr": 1.2834974162605546e-06, "epoch": 3.4780103706548875, "percentage": 69.56, "elapsed_time": "2:22:25", "remaining_time": "1:02:19", "throughput": 20008.68, "total_tokens": 170983360}
|
|
{"current_steps": 54335, "total_steps": 78105, "loss": 0.0923, "lr": 1.2830093943654858e-06, "epoch": 3.4783304525958645, "percentage": 69.57, "elapsed_time": "2:22:26", "remaining_time": "1:02:18", "throughput": 20008.91, "total_tokens": 170998784}
|
|
{"current_steps": 54340, "total_steps": 78105, "loss": 0.128, "lr": 1.2825214332405884e-06, "epoch": 3.4786505345368415, "percentage": 69.57, "elapsed_time": "2:22:26", "remaining_time": "1:02:17", "throughput": 20009.11, "total_tokens": 171013440}
|
|
{"current_steps": 54345, "total_steps": 78105, "loss": 0.1324, "lr": 1.2820335329102308e-06, "epoch": 3.4789706164778185, "percentage": 69.58, "elapsed_time": "2:22:27", "remaining_time": "1:02:17", "throughput": 20009.36, "total_tokens": 171028928}
|
|
{"current_steps": 54350, "total_steps": 78105, "loss": 0.1495, "lr": 1.2815456933987725e-06, "epoch": 3.4792906984187955, "percentage": 69.59, "elapsed_time": "2:22:28", "remaining_time": "1:02:16", "throughput": 20009.61, "total_tokens": 171044672}
|
|
{"current_steps": 54355, "total_steps": 78105, "loss": 0.1096, "lr": 1.2810579147305762e-06, "epoch": 3.479610780359772, "percentage": 69.59, "elapsed_time": "2:22:28", "remaining_time": "1:02:15", "throughput": 20009.87, "total_tokens": 171060480}
|
|
{"current_steps": 54360, "total_steps": 78105, "loss": 0.1723, "lr": 1.280570196929999e-06, "epoch": 3.479930862300749, "percentage": 69.6, "elapsed_time": "2:22:29", "remaining_time": "1:02:14", "throughput": 20010.11, "total_tokens": 171075968}
|
|
{"current_steps": 54365, "total_steps": 78105, "loss": 0.1517, "lr": 1.2800825400213937e-06, "epoch": 3.480250944241726, "percentage": 69.61, "elapsed_time": "2:22:30", "remaining_time": "1:02:13", "throughput": 20010.37, "total_tokens": 171091840}
|
|
{"current_steps": 54370, "total_steps": 78105, "loss": 0.1621, "lr": 1.2795949440291122e-06, "epoch": 3.4805710261827025, "percentage": 69.61, "elapsed_time": "2:22:30", "remaining_time": "1:02:12", "throughput": 20010.61, "total_tokens": 171107008}
|
|
{"current_steps": 54375, "total_steps": 78105, "loss": 0.123, "lr": 1.2791074089775018e-06, "epoch": 3.4808911081236795, "percentage": 69.62, "elapsed_time": "2:22:31", "remaining_time": "1:02:11", "throughput": 20010.83, "total_tokens": 171122176}
|
|
{"current_steps": 54380, "total_steps": 78105, "loss": 0.1391, "lr": 1.2786199348909095e-06, "epoch": 3.4812111900646565, "percentage": 69.62, "elapsed_time": "2:22:32", "remaining_time": "1:02:11", "throughput": 20011.07, "total_tokens": 171137600}
|
|
{"current_steps": 54385, "total_steps": 78105, "loss": 0.123, "lr": 1.2781325217936741e-06, "epoch": 3.4815312720056335, "percentage": 69.63, "elapsed_time": "2:22:32", "remaining_time": "1:02:10", "throughput": 20011.34, "total_tokens": 171153536}
|
|
{"current_steps": 54390, "total_steps": 78105, "loss": 0.0861, "lr": 1.2776451697101367e-06, "epoch": 3.4818513539466105, "percentage": 69.64, "elapsed_time": "2:22:33", "remaining_time": "1:02:09", "throughput": 20011.65, "total_tokens": 171170048}
|
|
{"current_steps": 54395, "total_steps": 78105, "loss": 0.1005, "lr": 1.2771578786646328e-06, "epoch": 3.482171435887587, "percentage": 69.64, "elapsed_time": "2:22:34", "remaining_time": "1:02:08", "throughput": 20011.89, "total_tokens": 171185664}
|
|
{"current_steps": 54400, "total_steps": 78105, "loss": 0.1379, "lr": 1.2766706486814948e-06, "epoch": 3.482491517828564, "percentage": 69.65, "elapsed_time": "2:22:34", "remaining_time": "1:02:07", "throughput": 20012.1, "total_tokens": 171200448}
|
|
{"current_steps": 54405, "total_steps": 78105, "loss": 0.19, "lr": 1.2761834797850524e-06, "epoch": 3.482811599769541, "percentage": 69.66, "elapsed_time": "2:22:35", "remaining_time": "1:02:06", "throughput": 20012.33, "total_tokens": 171216000}
|
|
{"current_steps": 54410, "total_steps": 78105, "loss": 0.109, "lr": 1.2756963719996323e-06, "epoch": 3.483131681710518, "percentage": 69.66, "elapsed_time": "2:22:36", "remaining_time": "1:02:06", "throughput": 20012.58, "total_tokens": 171231424}
|
|
{"current_steps": 54415, "total_steps": 78105, "loss": 0.146, "lr": 1.2752093253495584e-06, "epoch": 3.4834517636514946, "percentage": 69.67, "elapsed_time": "2:22:36", "remaining_time": "1:02:05", "throughput": 20012.79, "total_tokens": 171246272}
|
|
{"current_steps": 54420, "total_steps": 78105, "loss": 0.172, "lr": 1.2747223398591513e-06, "epoch": 3.4837718455924715, "percentage": 69.68, "elapsed_time": "2:22:37", "remaining_time": "1:02:04", "throughput": 20013.03, "total_tokens": 171261440}
|
|
{"current_steps": 54425, "total_steps": 78105, "loss": 0.1479, "lr": 1.274235415552727e-06, "epoch": 3.4840919275334485, "percentage": 69.68, "elapsed_time": "2:22:38", "remaining_time": "1:02:03", "throughput": 20013.19, "total_tokens": 171275264}
|
|
{"current_steps": 54430, "total_steps": 78105, "loss": 0.2251, "lr": 1.2737485524546025e-06, "epoch": 3.4844120094744255, "percentage": 69.69, "elapsed_time": "2:22:38", "remaining_time": "1:02:02", "throughput": 20013.46, "total_tokens": 171291392}
|
|
{"current_steps": 54435, "total_steps": 78105, "loss": 0.1986, "lr": 1.2732617505890878e-06, "epoch": 3.4847320914154025, "percentage": 69.69, "elapsed_time": "2:22:39", "remaining_time": "1:02:01", "throughput": 20013.72, "total_tokens": 171306944}
|
|
{"current_steps": 54440, "total_steps": 78105, "loss": 0.2145, "lr": 1.2727750099804915e-06, "epoch": 3.485052173356379, "percentage": 69.7, "elapsed_time": "2:22:40", "remaining_time": "1:02:01", "throughput": 20014.0, "total_tokens": 171323136}
|
|
{"current_steps": 54445, "total_steps": 78105, "loss": 0.1658, "lr": 1.2722883306531188e-06, "epoch": 3.485372255297356, "percentage": 69.71, "elapsed_time": "2:22:40", "remaining_time": "1:02:00", "throughput": 20014.27, "total_tokens": 171339264}
|
|
{"current_steps": 54450, "total_steps": 78105, "loss": 0.1896, "lr": 1.271801712631271e-06, "epoch": 3.485692337238333, "percentage": 69.71, "elapsed_time": "2:22:41", "remaining_time": "1:01:59", "throughput": 20014.56, "total_tokens": 171355392}
|
|
{"current_steps": 54455, "total_steps": 78105, "loss": 0.2414, "lr": 1.27131515593925e-06, "epoch": 3.48601241917931, "percentage": 69.72, "elapsed_time": "2:22:42", "remaining_time": "1:01:58", "throughput": 20014.82, "total_tokens": 171371456}
|
|
{"current_steps": 54460, "total_steps": 78105, "loss": 0.1557, "lr": 1.2708286606013487e-06, "epoch": 3.4863325011202866, "percentage": 69.73, "elapsed_time": "2:22:42", "remaining_time": "1:01:57", "throughput": 20015.04, "total_tokens": 171386304}
|
|
{"current_steps": 54465, "total_steps": 78105, "loss": 0.1722, "lr": 1.270342226641863e-06, "epoch": 3.4866525830612636, "percentage": 69.73, "elapsed_time": "2:22:43", "remaining_time": "1:01:56", "throughput": 20015.25, "total_tokens": 171400896}
|
|
{"current_steps": 54470, "total_steps": 78105, "loss": 0.1206, "lr": 1.2698558540850797e-06, "epoch": 3.4869726650022406, "percentage": 69.74, "elapsed_time": "2:22:44", "remaining_time": "1:01:56", "throughput": 20015.45, "total_tokens": 171415488}
|
|
{"current_steps": 54475, "total_steps": 78105, "loss": 0.1393, "lr": 1.2693695429552883e-06, "epoch": 3.4872927469432176, "percentage": 69.75, "elapsed_time": "2:22:44", "remaining_time": "1:01:55", "throughput": 20015.7, "total_tokens": 171430976}
|
|
{"current_steps": 54480, "total_steps": 78105, "loss": 0.1704, "lr": 1.268883293276772e-06, "epoch": 3.4876128288841945, "percentage": 69.75, "elapsed_time": "2:22:45", "remaining_time": "1:01:54", "throughput": 20015.99, "total_tokens": 171447040}
|
|
{"current_steps": 54485, "total_steps": 78105, "loss": 0.1885, "lr": 1.268397105073811e-06, "epoch": 3.487932910825171, "percentage": 69.76, "elapsed_time": "2:22:46", "remaining_time": "1:01:53", "throughput": 20016.3, "total_tokens": 171463616}
|
|
{"current_steps": 54490, "total_steps": 78105, "loss": 0.2525, "lr": 1.2679109783706838e-06, "epoch": 3.488252992766148, "percentage": 69.77, "elapsed_time": "2:22:46", "remaining_time": "1:01:52", "throughput": 20016.59, "total_tokens": 171480128}
|
|
{"current_steps": 54495, "total_steps": 78105, "loss": 0.1436, "lr": 1.2674249131916636e-06, "epoch": 3.488573074707125, "percentage": 69.77, "elapsed_time": "2:22:47", "remaining_time": "1:01:51", "throughput": 20016.83, "total_tokens": 171495744}
|
|
{"current_steps": 54500, "total_steps": 78105, "loss": 0.183, "lr": 1.2669389095610237e-06, "epoch": 3.488893156648102, "percentage": 69.78, "elapsed_time": "2:22:48", "remaining_time": "1:01:51", "throughput": 20017.04, "total_tokens": 171510272}
|
|
{"current_steps": 54505, "total_steps": 78105, "loss": 0.1267, "lr": 1.2664529675030321e-06, "epoch": 3.4892132385890786, "percentage": 69.78, "elapsed_time": "2:22:48", "remaining_time": "1:01:50", "throughput": 20017.28, "total_tokens": 171525568}
|
|
{"current_steps": 54510, "total_steps": 78105, "loss": 0.2177, "lr": 1.2659670870419537e-06, "epoch": 3.4895333205300556, "percentage": 69.79, "elapsed_time": "2:22:49", "remaining_time": "1:01:49", "throughput": 20017.54, "total_tokens": 171541376}
|
|
{"current_steps": 54515, "total_steps": 78105, "loss": 0.1468, "lr": 1.2654812682020517e-06, "epoch": 3.4898534024710326, "percentage": 69.8, "elapsed_time": "2:22:50", "remaining_time": "1:01:48", "throughput": 20017.82, "total_tokens": 171557312}
|
|
{"current_steps": 54520, "total_steps": 78105, "loss": 0.0905, "lr": 1.2649955110075846e-06, "epoch": 3.4901734844120096, "percentage": 69.8, "elapsed_time": "2:22:50", "remaining_time": "1:01:47", "throughput": 20018.1, "total_tokens": 171573696}
|
|
{"current_steps": 54525, "total_steps": 78105, "loss": 0.1485, "lr": 1.2645098154828084e-06, "epoch": 3.4904935663529866, "percentage": 69.81, "elapsed_time": "2:22:51", "remaining_time": "1:01:46", "throughput": 20018.33, "total_tokens": 171588928}
|
|
{"current_steps": 54530, "total_steps": 78105, "loss": 0.1302, "lr": 1.2640241816519765e-06, "epoch": 3.490813648293963, "percentage": 69.82, "elapsed_time": "2:22:52", "remaining_time": "1:01:46", "throughput": 20018.57, "total_tokens": 171604160}
|
|
{"current_steps": 54535, "total_steps": 78105, "loss": 0.1761, "lr": 1.2635386095393387e-06, "epoch": 3.49113373023494, "percentage": 69.82, "elapsed_time": "2:22:52", "remaining_time": "1:01:45", "throughput": 20018.82, "total_tokens": 171619584}
|
|
{"current_steps": 54540, "total_steps": 78105, "loss": 0.2513, "lr": 1.2630530991691436e-06, "epoch": 3.491453812175917, "percentage": 69.83, "elapsed_time": "2:22:53", "remaining_time": "1:01:44", "throughput": 20019.09, "total_tokens": 171635456}
|
|
{"current_steps": 54545, "total_steps": 78105, "loss": 0.1897, "lr": 1.262567650565632e-06, "epoch": 3.491773894116894, "percentage": 69.84, "elapsed_time": "2:22:54", "remaining_time": "1:01:43", "throughput": 20019.34, "total_tokens": 171650880}
|
|
{"current_steps": 54550, "total_steps": 78105, "loss": 0.1038, "lr": 1.2620822637530475e-06, "epoch": 3.4920939760578706, "percentage": 69.84, "elapsed_time": "2:22:54", "remaining_time": "1:01:42", "throughput": 20019.57, "total_tokens": 171666304}
|
|
{"current_steps": 54555, "total_steps": 78105, "loss": 0.1508, "lr": 1.2615969387556265e-06, "epoch": 3.4924140579988476, "percentage": 69.85, "elapsed_time": "2:22:55", "remaining_time": "1:01:41", "throughput": 20019.82, "total_tokens": 171681856}
|
|
{"current_steps": 54560, "total_steps": 78105, "loss": 0.2129, "lr": 1.2611116755976037e-06, "epoch": 3.4927341399398246, "percentage": 69.85, "elapsed_time": "2:22:56", "remaining_time": "1:01:41", "throughput": 20020.06, "total_tokens": 171697600}
|
|
{"current_steps": 54565, "total_steps": 78105, "loss": 0.1515, "lr": 1.2606264743032108e-06, "epoch": 3.4930542218808016, "percentage": 69.86, "elapsed_time": "2:22:56", "remaining_time": "1:01:40", "throughput": 20020.3, "total_tokens": 171713024}
|
|
{"current_steps": 54570, "total_steps": 78105, "loss": 0.2095, "lr": 1.260141334896675e-06, "epoch": 3.4933743038217786, "percentage": 69.87, "elapsed_time": "2:22:57", "remaining_time": "1:01:39", "throughput": 20020.51, "total_tokens": 171728128}
|
|
{"current_steps": 54575, "total_steps": 78105, "loss": 0.1379, "lr": 1.259656257402225e-06, "epoch": 3.493694385762755, "percentage": 69.87, "elapsed_time": "2:22:58", "remaining_time": "1:01:38", "throughput": 20020.76, "total_tokens": 171743744}
|
|
{"current_steps": 54580, "total_steps": 78105, "loss": 0.2119, "lr": 1.2591712418440788e-06, "epoch": 3.494014467703732, "percentage": 69.88, "elapsed_time": "2:22:58", "remaining_time": "1:01:37", "throughput": 20021.02, "total_tokens": 171759616}
|
|
{"current_steps": 54585, "total_steps": 78105, "loss": 0.1256, "lr": 1.2586862882464584e-06, "epoch": 3.494334549644709, "percentage": 69.89, "elapsed_time": "2:22:59", "remaining_time": "1:01:36", "throughput": 20021.34, "total_tokens": 171776192}
|
|
{"current_steps": 54590, "total_steps": 78105, "loss": 0.1207, "lr": 1.2582013966335788e-06, "epoch": 3.494654631585686, "percentage": 69.89, "elapsed_time": "2:23:00", "remaining_time": "1:01:36", "throughput": 20021.55, "total_tokens": 171791168}
|
|
{"current_steps": 54595, "total_steps": 78105, "loss": 0.1626, "lr": 1.2577165670296535e-06, "epoch": 3.4949747135266627, "percentage": 69.9, "elapsed_time": "2:23:00", "remaining_time": "1:01:35", "throughput": 20021.79, "total_tokens": 171806528}
|
|
{"current_steps": 54600, "total_steps": 78105, "loss": 0.1652, "lr": 1.2572317994588918e-06, "epoch": 3.4952947954676397, "percentage": 69.91, "elapsed_time": "2:23:01", "remaining_time": "1:01:34", "throughput": 20022.01, "total_tokens": 171821504}
|
|
{"current_steps": 54605, "total_steps": 78105, "loss": 0.1281, "lr": 1.2567470939454995e-06, "epoch": 3.4956148774086166, "percentage": 69.91, "elapsed_time": "2:23:02", "remaining_time": "1:01:33", "throughput": 20022.26, "total_tokens": 171837312}
|
|
{"current_steps": 54610, "total_steps": 78105, "loss": 0.164, "lr": 1.2562624505136839e-06, "epoch": 3.4959349593495936, "percentage": 69.92, "elapsed_time": "2:23:02", "remaining_time": "1:01:32", "throughput": 20022.48, "total_tokens": 171852416}
|
|
{"current_steps": 54615, "total_steps": 78105, "loss": 0.1751, "lr": 1.255777869187641e-06, "epoch": 3.4962550412905706, "percentage": 69.93, "elapsed_time": "2:23:03", "remaining_time": "1:01:31", "throughput": 20022.72, "total_tokens": 171867840}
|
|
{"current_steps": 54620, "total_steps": 78105, "loss": 0.2423, "lr": 1.2552933499915715e-06, "epoch": 3.496575123231547, "percentage": 69.93, "elapsed_time": "2:23:04", "remaining_time": "1:01:30", "throughput": 20022.94, "total_tokens": 171882816}
|
|
{"current_steps": 54625, "total_steps": 78105, "loss": 0.2101, "lr": 1.2548088929496682e-06, "epoch": 3.496895205172524, "percentage": 69.94, "elapsed_time": "2:23:05", "remaining_time": "1:01:30", "throughput": 20023.23, "total_tokens": 171899520}
|
|
{"current_steps": 54630, "total_steps": 78105, "loss": 0.1913, "lr": 1.254324498086123e-06, "epoch": 3.497215287113501, "percentage": 69.94, "elapsed_time": "2:23:05", "remaining_time": "1:01:29", "throughput": 20023.5, "total_tokens": 171915584}
|
|
{"current_steps": 54635, "total_steps": 78105, "loss": 0.1486, "lr": 1.2538401654251242e-06, "epoch": 3.4975353690544777, "percentage": 69.95, "elapsed_time": "2:23:06", "remaining_time": "1:01:28", "throughput": 20023.71, "total_tokens": 171930624}
|
|
{"current_steps": 54640, "total_steps": 78105, "loss": 0.1292, "lr": 1.2533558949908564e-06, "epoch": 3.4978554509954547, "percentage": 69.96, "elapsed_time": "2:23:07", "remaining_time": "1:01:27", "throughput": 20023.94, "total_tokens": 171946112}
|
|
{"current_steps": 54645, "total_steps": 78105, "loss": 0.183, "lr": 1.2528716868075014e-06, "epoch": 3.4981755329364317, "percentage": 69.96, "elapsed_time": "2:23:07", "remaining_time": "1:01:26", "throughput": 20024.19, "total_tokens": 171961856}
|
|
{"current_steps": 54650, "total_steps": 78105, "loss": 0.1333, "lr": 1.2523875408992387e-06, "epoch": 3.4984956148774087, "percentage": 69.97, "elapsed_time": "2:23:08", "remaining_time": "1:01:26", "throughput": 20024.5, "total_tokens": 171978624}
|
|
{"current_steps": 54655, "total_steps": 78105, "loss": 0.1237, "lr": 1.2519034572902422e-06, "epoch": 3.4988156968183857, "percentage": 69.98, "elapsed_time": "2:23:09", "remaining_time": "1:01:25", "throughput": 20024.78, "total_tokens": 171994688}
|
|
{"current_steps": 54660, "total_steps": 78105, "loss": 0.1462, "lr": 1.2514194360046883e-06, "epoch": 3.499135778759362, "percentage": 69.98, "elapsed_time": "2:23:09", "remaining_time": "1:01:24", "throughput": 20025.03, "total_tokens": 172010048}
|
|
{"current_steps": 54665, "total_steps": 78105, "loss": 0.19, "lr": 1.250935477066742e-06, "epoch": 3.499455860700339, "percentage": 69.99, "elapsed_time": "2:23:10", "remaining_time": "1:01:23", "throughput": 20025.26, "total_tokens": 172025152}
|
|
{"current_steps": 54670, "total_steps": 78105, "loss": 0.169, "lr": 1.250451580500573e-06, "epoch": 3.499775942641316, "percentage": 70.0, "elapsed_time": "2:23:11", "remaining_time": "1:01:22", "throughput": 20025.56, "total_tokens": 172041856}
|
|
{"current_steps": 54675, "total_steps": 78105, "loss": 0.1667, "lr": 1.249967746330343e-06, "epoch": 3.500096024582293, "percentage": 70.0, "elapsed_time": "2:23:11", "remaining_time": "1:01:21", "throughput": 20025.76, "total_tokens": 172056576}
|
|
{"current_steps": 54680, "total_steps": 78105, "loss": 0.159, "lr": 1.249483974580212e-06, "epoch": 3.5004161065232697, "percentage": 70.01, "elapsed_time": "2:23:12", "remaining_time": "1:01:21", "throughput": 20026.0, "total_tokens": 172072256}
|
|
{"current_steps": 54684, "total_steps": 78105, "eval_loss": 0.5747029781341553, "epoch": 3.5006721720760514, "percentage": 70.01, "elapsed_time": "2:24:03", "remaining_time": "1:01:42", "throughput": 19908.04, "total_tokens": 172084032}
|
|
{"current_steps": 54685, "total_steps": 78105, "loss": 0.1904, "lr": 1.2490002652743394e-06, "epoch": 3.5007361884642467, "percentage": 70.01, "elapsed_time": "2:24:38", "remaining_time": "1:01:56", "throughput": 19829.66, "total_tokens": 172086912}
|
|
{"current_steps": 54690, "total_steps": 78105, "loss": 0.1551, "lr": 1.2485166184368756e-06, "epoch": 3.5010562704052237, "percentage": 70.02, "elapsed_time": "2:24:38", "remaining_time": "1:01:55", "throughput": 19829.9, "total_tokens": 172102144}
|
|
{"current_steps": 54695, "total_steps": 78105, "loss": 0.1735, "lr": 1.2480330340919746e-06, "epoch": 3.5013763523462007, "percentage": 70.03, "elapsed_time": "2:24:39", "remaining_time": "1:01:54", "throughput": 19830.12, "total_tokens": 172117184}
|
|
{"current_steps": 54700, "total_steps": 78105, "loss": 0.1312, "lr": 1.2475495122637813e-06, "epoch": 3.5016964342871777, "percentage": 70.03, "elapsed_time": "2:24:40", "remaining_time": "1:01:54", "throughput": 19830.38, "total_tokens": 172132736}
|
|
{"current_steps": 54705, "total_steps": 78105, "loss": 0.1723, "lr": 1.247066052976442e-06, "epoch": 3.5020165162281542, "percentage": 70.04, "elapsed_time": "2:24:40", "remaining_time": "1:01:53", "throughput": 19830.64, "total_tokens": 172148160}
|
|
{"current_steps": 54710, "total_steps": 78105, "loss": 0.2084, "lr": 1.246582656254098e-06, "epoch": 3.5023365981691312, "percentage": 70.05, "elapsed_time": "2:24:41", "remaining_time": "1:01:52", "throughput": 19830.9, "total_tokens": 172163392}
|
|
{"current_steps": 54715, "total_steps": 78105, "loss": 0.2715, "lr": 1.2460993221208872e-06, "epoch": 3.5026566801101082, "percentage": 70.05, "elapsed_time": "2:24:42", "remaining_time": "1:01:51", "throughput": 19831.15, "total_tokens": 172178688}
|
|
{"current_steps": 54720, "total_steps": 78105, "loss": 0.1911, "lr": 1.245616050600945e-06, "epoch": 3.502976762051085, "percentage": 70.06, "elapsed_time": "2:24:42", "remaining_time": "1:01:50", "throughput": 19831.48, "total_tokens": 172195712}
|
|
{"current_steps": 54725, "total_steps": 78105, "loss": 0.1843, "lr": 1.2451328417184025e-06, "epoch": 3.5032968439920618, "percentage": 70.07, "elapsed_time": "2:24:43", "remaining_time": "1:01:49", "throughput": 19831.76, "total_tokens": 172211712}
|
|
{"current_steps": 54730, "total_steps": 78105, "loss": 0.2073, "lr": 1.2446496954973903e-06, "epoch": 3.5036169259330388, "percentage": 70.07, "elapsed_time": "2:24:44", "remaining_time": "1:01:49", "throughput": 19832.07, "total_tokens": 172228288}
|
|
{"current_steps": 54735, "total_steps": 78105, "loss": 0.185, "lr": 1.2441666119620335e-06, "epoch": 3.5039370078740157, "percentage": 70.08, "elapsed_time": "2:24:45", "remaining_time": "1:01:48", "throughput": 19832.39, "total_tokens": 172244864}
|
|
{"current_steps": 54740, "total_steps": 78105, "loss": 0.3193, "lr": 1.2436835911364548e-06, "epoch": 3.5042570898149927, "percentage": 70.09, "elapsed_time": "2:24:45", "remaining_time": "1:01:47", "throughput": 19832.63, "total_tokens": 172260288}
|
|
{"current_steps": 54745, "total_steps": 78105, "loss": 0.2089, "lr": 1.2432006330447736e-06, "epoch": 3.5045771717559697, "percentage": 70.09, "elapsed_time": "2:24:46", "remaining_time": "1:01:46", "throughput": 19832.86, "total_tokens": 172275008}
|
|
{"current_steps": 54750, "total_steps": 78105, "loss": 0.1869, "lr": 1.2427177377111062e-06, "epoch": 3.5048972536969463, "percentage": 70.1, "elapsed_time": "2:24:47", "remaining_time": "1:01:45", "throughput": 19833.16, "total_tokens": 172291520}
|
|
{"current_steps": 54755, "total_steps": 78105, "loss": 0.1421, "lr": 1.2422349051595662e-06, "epoch": 3.5052173356379233, "percentage": 70.1, "elapsed_time": "2:24:47", "remaining_time": "1:01:44", "throughput": 19833.42, "total_tokens": 172306816}
|
|
{"current_steps": 54760, "total_steps": 78105, "loss": 0.1578, "lr": 1.2417521354142637e-06, "epoch": 3.5055374175789002, "percentage": 70.11, "elapsed_time": "2:24:48", "remaining_time": "1:01:43", "throughput": 19833.69, "total_tokens": 172322752}
|
|
{"current_steps": 54765, "total_steps": 78105, "loss": 0.2266, "lr": 1.2412694284993043e-06, "epoch": 3.5058574995198772, "percentage": 70.12, "elapsed_time": "2:24:49", "remaining_time": "1:01:43", "throughput": 19833.93, "total_tokens": 172337984}
|
|
{"current_steps": 54770, "total_steps": 78105, "loss": 0.1267, "lr": 1.2407867844387952e-06, "epoch": 3.506177581460854, "percentage": 70.12, "elapsed_time": "2:24:49", "remaining_time": "1:01:42", "throughput": 19834.2, "total_tokens": 172353728}
|
|
{"current_steps": 54775, "total_steps": 78105, "loss": 0.1485, "lr": 1.2403042032568332e-06, "epoch": 3.5064976634018308, "percentage": 70.13, "elapsed_time": "2:24:50", "remaining_time": "1:01:41", "throughput": 19834.47, "total_tokens": 172369472}
|
|
{"current_steps": 54780, "total_steps": 78105, "loss": 0.1988, "lr": 1.2398216849775189e-06, "epoch": 3.5068177453428078, "percentage": 70.14, "elapsed_time": "2:24:51", "remaining_time": "1:01:40", "throughput": 19834.7, "total_tokens": 172384768}
|
|
{"current_steps": 54785, "total_steps": 78105, "loss": 0.1131, "lr": 1.2393392296249454e-06, "epoch": 3.5071378272837848, "percentage": 70.14, "elapsed_time": "2:24:51", "remaining_time": "1:01:39", "throughput": 19834.93, "total_tokens": 172399680}
|
|
{"current_steps": 54790, "total_steps": 78105, "loss": 0.2084, "lr": 1.2388568372232043e-06, "epoch": 3.5074579092247617, "percentage": 70.15, "elapsed_time": "2:24:52", "remaining_time": "1:01:38", "throughput": 19835.17, "total_tokens": 172415040}
|
|
{"current_steps": 54795, "total_steps": 78105, "loss": 0.2005, "lr": 1.2383745077963836e-06, "epoch": 3.5077779911657383, "percentage": 70.16, "elapsed_time": "2:24:53", "remaining_time": "1:01:38", "throughput": 19835.41, "total_tokens": 172429888}
|
|
{"current_steps": 54800, "total_steps": 78105, "loss": 0.1536, "lr": 1.2378922413685677e-06, "epoch": 3.5080980731067153, "percentage": 70.16, "elapsed_time": "2:24:53", "remaining_time": "1:01:37", "throughput": 19835.62, "total_tokens": 172444416}
|
|
{"current_steps": 54805, "total_steps": 78105, "loss": 0.2097, "lr": 1.2374100379638406e-06, "epoch": 3.5084181550476923, "percentage": 70.17, "elapsed_time": "2:24:54", "remaining_time": "1:01:36", "throughput": 19835.87, "total_tokens": 172459968}
|
|
{"current_steps": 54810, "total_steps": 78105, "loss": 0.1501, "lr": 1.2369278976062783e-06, "epoch": 3.508738236988669, "percentage": 70.17, "elapsed_time": "2:24:55", "remaining_time": "1:01:35", "throughput": 19836.08, "total_tokens": 172474816}
|
|
{"current_steps": 54815, "total_steps": 78105, "loss": 0.162, "lr": 1.2364458203199583e-06, "epoch": 3.509058318929646, "percentage": 70.18, "elapsed_time": "2:24:55", "remaining_time": "1:01:34", "throughput": 19836.38, "total_tokens": 172491392}
|
|
{"current_steps": 54820, "total_steps": 78105, "loss": 0.1621, "lr": 1.2359638061289524e-06, "epoch": 3.509378400870623, "percentage": 70.19, "elapsed_time": "2:24:56", "remaining_time": "1:01:33", "throughput": 19836.66, "total_tokens": 172507072}
|
|
{"current_steps": 54825, "total_steps": 78105, "loss": 0.1915, "lr": 1.23548185505733e-06, "epoch": 3.5096984828116, "percentage": 70.19, "elapsed_time": "2:24:57", "remaining_time": "1:01:32", "throughput": 19836.89, "total_tokens": 172522368}
|
|
{"current_steps": 54830, "total_steps": 78105, "loss": 0.1609, "lr": 1.234999967129157e-06, "epoch": 3.510018564752577, "percentage": 70.2, "elapsed_time": "2:24:57", "remaining_time": "1:01:32", "throughput": 19837.24, "total_tokens": 172540032}
|
|
{"current_steps": 54835, "total_steps": 78105, "loss": 0.1412, "lr": 1.2345181423684968e-06, "epoch": 3.5103386466935538, "percentage": 70.21, "elapsed_time": "2:24:58", "remaining_time": "1:01:31", "throughput": 19837.49, "total_tokens": 172555840}
|
|
{"current_steps": 54840, "total_steps": 78105, "loss": 0.1772, "lr": 1.2340363807994087e-06, "epoch": 3.5106587286345303, "percentage": 70.21, "elapsed_time": "2:24:59", "remaining_time": "1:01:30", "throughput": 19837.76, "total_tokens": 172571712}
|
|
{"current_steps": 54845, "total_steps": 78105, "loss": 0.1899, "lr": 1.2335546824459487e-06, "epoch": 3.5109788105755073, "percentage": 70.22, "elapsed_time": "2:24:59", "remaining_time": "1:01:29", "throughput": 19838.05, "total_tokens": 172588032}
|
|
{"current_steps": 54850, "total_steps": 78105, "loss": 0.1741, "lr": 1.233073047332172e-06, "epoch": 3.5112988925164843, "percentage": 70.23, "elapsed_time": "2:25:00", "remaining_time": "1:01:28", "throughput": 19838.28, "total_tokens": 172603200}
|
|
{"current_steps": 54855, "total_steps": 78105, "loss": 0.1684, "lr": 1.2325914754821284e-06, "epoch": 3.511618974457461, "percentage": 70.23, "elapsed_time": "2:25:01", "remaining_time": "1:01:27", "throughput": 19838.54, "total_tokens": 172618880}
|
|
{"current_steps": 54860, "total_steps": 78105, "loss": 0.1823, "lr": 1.2321099669198646e-06, "epoch": 3.511939056398438, "percentage": 70.24, "elapsed_time": "2:25:01", "remaining_time": "1:01:27", "throughput": 19838.84, "total_tokens": 172635456}
|
|
{"current_steps": 54865, "total_steps": 78105, "loss": 0.0929, "lr": 1.231628521669425e-06, "epoch": 3.512259138339415, "percentage": 70.25, "elapsed_time": "2:25:02", "remaining_time": "1:01:26", "throughput": 19839.11, "total_tokens": 172651456}
|
|
{"current_steps": 54870, "total_steps": 78105, "loss": 0.2091, "lr": 1.2311471397548503e-06, "epoch": 3.512579220280392, "percentage": 70.25, "elapsed_time": "2:25:03", "remaining_time": "1:01:25", "throughput": 19839.41, "total_tokens": 172668096}
|
|
{"current_steps": 54875, "total_steps": 78105, "loss": 0.1772, "lr": 1.230665821200178e-06, "epoch": 3.512899302221369, "percentage": 70.26, "elapsed_time": "2:25:03", "remaining_time": "1:01:24", "throughput": 19839.66, "total_tokens": 172683456}
|
|
{"current_steps": 54880, "total_steps": 78105, "loss": 0.1792, "lr": 1.2301845660294429e-06, "epoch": 3.513219384162346, "percentage": 70.26, "elapsed_time": "2:25:04", "remaining_time": "1:01:23", "throughput": 19839.92, "total_tokens": 172699712}
|
|
{"current_steps": 54885, "total_steps": 78105, "loss": 0.1578, "lr": 1.2297033742666756e-06, "epoch": 3.5135394661033224, "percentage": 70.27, "elapsed_time": "2:25:05", "remaining_time": "1:01:22", "throughput": 19840.17, "total_tokens": 172715200}
|
|
{"current_steps": 54890, "total_steps": 78105, "loss": 0.2734, "lr": 1.2292222459359068e-06, "epoch": 3.5138595480442993, "percentage": 70.28, "elapsed_time": "2:25:05", "remaining_time": "1:01:22", "throughput": 19840.4, "total_tokens": 172730176}
|
|
{"current_steps": 54895, "total_steps": 78105, "loss": 0.1462, "lr": 1.2287411810611579e-06, "epoch": 3.5141796299852763, "percentage": 70.28, "elapsed_time": "2:25:06", "remaining_time": "1:01:21", "throughput": 19840.64, "total_tokens": 172745728}
|
|
{"current_steps": 54900, "total_steps": 78105, "loss": 0.1788, "lr": 1.228260179666454e-06, "epoch": 3.514499711926253, "percentage": 70.29, "elapsed_time": "2:25:07", "remaining_time": "1:01:20", "throughput": 19840.89, "total_tokens": 172761536}
|
|
{"current_steps": 54905, "total_steps": 78105, "loss": 0.1697, "lr": 1.227779241775812e-06, "epoch": 3.51481979386723, "percentage": 70.3, "elapsed_time": "2:25:08", "remaining_time": "1:01:19", "throughput": 19841.15, "total_tokens": 172777280}
|
|
{"current_steps": 54910, "total_steps": 78105, "loss": 0.2206, "lr": 1.2272983674132472e-06, "epoch": 3.515139875808207, "percentage": 70.3, "elapsed_time": "2:25:08", "remaining_time": "1:01:18", "throughput": 19841.39, "total_tokens": 172792704}
|
|
{"current_steps": 54915, "total_steps": 78105, "loss": 0.1639, "lr": 1.2268175566027745e-06, "epoch": 3.515459957749184, "percentage": 70.31, "elapsed_time": "2:25:09", "remaining_time": "1:01:17", "throughput": 19841.67, "total_tokens": 172809088}
|
|
{"current_steps": 54920, "total_steps": 78105, "loss": 0.11, "lr": 1.2263368093683994e-06, "epoch": 3.515780039690161, "percentage": 70.32, "elapsed_time": "2:25:10", "remaining_time": "1:01:17", "throughput": 19841.88, "total_tokens": 172823744}
|
|
{"current_steps": 54925, "total_steps": 78105, "loss": 0.2312, "lr": 1.2258561257341317e-06, "epoch": 3.516100121631138, "percentage": 70.32, "elapsed_time": "2:25:10", "remaining_time": "1:01:16", "throughput": 19842.2, "total_tokens": 172840896}
|
|
{"current_steps": 54930, "total_steps": 78105, "loss": 0.1506, "lr": 1.2253755057239702e-06, "epoch": 3.5164202035721144, "percentage": 70.33, "elapsed_time": "2:25:11", "remaining_time": "1:01:15", "throughput": 19842.5, "total_tokens": 172857472}
|
|
{"current_steps": 54935, "total_steps": 78105, "loss": 0.1447, "lr": 1.2248949493619178e-06, "epoch": 3.5167402855130914, "percentage": 70.33, "elapsed_time": "2:25:12", "remaining_time": "1:01:14", "throughput": 19842.71, "total_tokens": 172872384}
|
|
{"current_steps": 54940, "total_steps": 78105, "loss": 0.2104, "lr": 1.2244144566719699e-06, "epoch": 3.5170603674540684, "percentage": 70.34, "elapsed_time": "2:25:12", "remaining_time": "1:01:13", "throughput": 19842.94, "total_tokens": 172887680}
|
|
{"current_steps": 54945, "total_steps": 78105, "loss": 0.1787, "lr": 1.2239340276781195e-06, "epoch": 3.517380449395045, "percentage": 70.35, "elapsed_time": "2:25:13", "remaining_time": "1:01:12", "throughput": 19843.2, "total_tokens": 172903488}
|
|
{"current_steps": 54950, "total_steps": 78105, "loss": 0.1806, "lr": 1.2234536624043573e-06, "epoch": 3.517700531336022, "percentage": 70.35, "elapsed_time": "2:25:14", "remaining_time": "1:01:11", "throughput": 19843.42, "total_tokens": 172918464}
|
|
{"current_steps": 54955, "total_steps": 78105, "loss": 0.2439, "lr": 1.2229733608746696e-06, "epoch": 3.518020613276999, "percentage": 70.36, "elapsed_time": "2:25:14", "remaining_time": "1:01:11", "throughput": 19843.73, "total_tokens": 172935488}
|
|
{"current_steps": 54960, "total_steps": 78105, "loss": 0.2157, "lr": 1.2224931231130396e-06, "epoch": 3.518340695217976, "percentage": 70.37, "elapsed_time": "2:25:15", "remaining_time": "1:01:10", "throughput": 19844.0, "total_tokens": 172951488}
|
|
{"current_steps": 54965, "total_steps": 78105, "loss": 0.2159, "lr": 1.2220129491434499e-06, "epoch": 3.518660777158953, "percentage": 70.37, "elapsed_time": "2:25:16", "remaining_time": "1:01:09", "throughput": 19844.24, "total_tokens": 172967360}
|
|
{"current_steps": 54970, "total_steps": 78105, "loss": 0.1599, "lr": 1.2215328389898766e-06, "epoch": 3.51898085909993, "percentage": 70.38, "elapsed_time": "2:25:16", "remaining_time": "1:01:08", "throughput": 19844.46, "total_tokens": 172982336}
|
|
{"current_steps": 54975, "total_steps": 78105, "loss": 0.2029, "lr": 1.2210527926762939e-06, "epoch": 3.5193009410409064, "percentage": 70.39, "elapsed_time": "2:25:17", "remaining_time": "1:01:07", "throughput": 19844.74, "total_tokens": 172998464}
|
|
{"current_steps": 54980, "total_steps": 78105, "loss": 0.1688, "lr": 1.220572810226673e-06, "epoch": 3.5196210229818834, "percentage": 70.39, "elapsed_time": "2:25:18", "remaining_time": "1:01:06", "throughput": 19844.96, "total_tokens": 173013824}
|
|
{"current_steps": 54985, "total_steps": 78105, "loss": 0.148, "lr": 1.2200928916649818e-06, "epoch": 3.5199411049228604, "percentage": 70.4, "elapsed_time": "2:25:18", "remaining_time": "1:01:06", "throughput": 19845.22, "total_tokens": 173029504}
|
|
{"current_steps": 54990, "total_steps": 78105, "loss": 0.1436, "lr": 1.2196130370151847e-06, "epoch": 3.520261186863837, "percentage": 70.41, "elapsed_time": "2:25:19", "remaining_time": "1:01:05", "throughput": 19845.44, "total_tokens": 173045056}
|
|
{"current_steps": 54995, "total_steps": 78105, "loss": 0.1622, "lr": 1.2191332463012424e-06, "epoch": 3.520581268804814, "percentage": 70.41, "elapsed_time": "2:25:20", "remaining_time": "1:01:04", "throughput": 19845.75, "total_tokens": 173062144}
|
|
{"current_steps": 55000, "total_steps": 78105, "loss": 0.0933, "lr": 1.2186535195471159e-06, "epoch": 3.520901350745791, "percentage": 70.42, "elapsed_time": "2:25:21", "remaining_time": "1:01:03", "throughput": 19845.95, "total_tokens": 173076544}
|
|
{"current_steps": 55005, "total_steps": 78105, "loss": 0.1678, "lr": 1.2181738567767562e-06, "epoch": 3.521221432686768, "percentage": 70.42, "elapsed_time": "2:25:21", "remaining_time": "1:01:02", "throughput": 19846.23, "total_tokens": 173092864}
|
|
{"current_steps": 55010, "total_steps": 78105, "loss": 0.1625, "lr": 1.2176942580141193e-06, "epoch": 3.521541514627745, "percentage": 70.43, "elapsed_time": "2:25:22", "remaining_time": "1:01:01", "throughput": 19846.5, "total_tokens": 173108736}
|
|
{"current_steps": 55015, "total_steps": 78105, "loss": 0.1287, "lr": 1.2172147232831499e-06, "epoch": 3.5218615965687214, "percentage": 70.44, "elapsed_time": "2:25:23", "remaining_time": "1:01:01", "throughput": 19846.82, "total_tokens": 173125952}
|
|
{"current_steps": 55020, "total_steps": 78105, "loss": 0.1979, "lr": 1.2167352526077967e-06, "epoch": 3.5221816785096984, "percentage": 70.44, "elapsed_time": "2:25:23", "remaining_time": "1:01:00", "throughput": 19847.05, "total_tokens": 173141376}
|
|
{"current_steps": 55025, "total_steps": 78105, "loss": 0.1789, "lr": 1.2162558460120002e-06, "epoch": 3.5225017604506754, "percentage": 70.45, "elapsed_time": "2:25:24", "remaining_time": "1:00:59", "throughput": 19847.33, "total_tokens": 173157312}
|
|
{"current_steps": 55030, "total_steps": 78105, "loss": 0.1455, "lr": 1.2157765035196995e-06, "epoch": 3.5228218423916524, "percentage": 70.46, "elapsed_time": "2:25:25", "remaining_time": "1:00:58", "throughput": 19847.66, "total_tokens": 173174464}
|
|
{"current_steps": 55035, "total_steps": 78105, "loss": 0.1821, "lr": 1.2152972251548326e-06, "epoch": 3.523141924332629, "percentage": 70.46, "elapsed_time": "2:25:25", "remaining_time": "1:00:57", "throughput": 19847.88, "total_tokens": 173189824}
|
|
{"current_steps": 55040, "total_steps": 78105, "loss": 0.1983, "lr": 1.2148180109413288e-06, "epoch": 3.523462006273606, "percentage": 70.47, "elapsed_time": "2:25:26", "remaining_time": "1:00:56", "throughput": 19848.11, "total_tokens": 173205056}
|
|
{"current_steps": 55045, "total_steps": 78105, "loss": 0.1334, "lr": 1.2143388609031202e-06, "epoch": 3.523782088214583, "percentage": 70.48, "elapsed_time": "2:25:27", "remaining_time": "1:00:56", "throughput": 19848.34, "total_tokens": 173220288}
|
|
{"current_steps": 55050, "total_steps": 78105, "loss": 0.2197, "lr": 1.2138597750641319e-06, "epoch": 3.52410217015556, "percentage": 70.48, "elapsed_time": "2:25:27", "remaining_time": "1:00:55", "throughput": 19848.58, "total_tokens": 173235904}
|
|
{"current_steps": 55055, "total_steps": 78105, "loss": 0.1482, "lr": 1.2133807534482872e-06, "epoch": 3.524422252096537, "percentage": 70.49, "elapsed_time": "2:25:28", "remaining_time": "1:00:54", "throughput": 19848.79, "total_tokens": 173250816}
|
|
{"current_steps": 55060, "total_steps": 78105, "loss": 0.1846, "lr": 1.212901796079506e-06, "epoch": 3.5247423340375135, "percentage": 70.49, "elapsed_time": "2:25:29", "remaining_time": "1:00:53", "throughput": 19849.03, "total_tokens": 173266048}
|
|
{"current_steps": 55065, "total_steps": 78105, "loss": 0.1462, "lr": 1.2124229029817047e-06, "epoch": 3.5250624159784905, "percentage": 70.5, "elapsed_time": "2:25:29", "remaining_time": "1:00:52", "throughput": 19849.27, "total_tokens": 173281408}
|
|
{"current_steps": 55070, "total_steps": 78105, "loss": 0.1719, "lr": 1.2119440741787972e-06, "epoch": 3.5253824979194675, "percentage": 70.51, "elapsed_time": "2:25:30", "remaining_time": "1:00:51", "throughput": 19849.59, "total_tokens": 173298560}
|
|
{"current_steps": 55075, "total_steps": 78105, "loss": 0.1421, "lr": 1.2114653096946922e-06, "epoch": 3.525702579860444, "percentage": 70.51, "elapsed_time": "2:25:31", "remaining_time": "1:00:51", "throughput": 19849.82, "total_tokens": 173313920}
|
|
{"current_steps": 55080, "total_steps": 78105, "loss": 0.1484, "lr": 1.2109866095532988e-06, "epoch": 3.526022661801421, "percentage": 70.52, "elapsed_time": "2:25:31", "remaining_time": "1:00:50", "throughput": 19850.13, "total_tokens": 173330304}
|
|
{"current_steps": 55085, "total_steps": 78105, "loss": 0.1517, "lr": 1.21050797377852e-06, "epoch": 3.526342743742398, "percentage": 70.53, "elapsed_time": "2:25:32", "remaining_time": "1:00:49", "throughput": 19850.39, "total_tokens": 173345664}
|
|
{"current_steps": 55090, "total_steps": 78105, "loss": 0.1688, "lr": 1.2100294023942563e-06, "epoch": 3.526662825683375, "percentage": 70.53, "elapsed_time": "2:25:33", "remaining_time": "1:00:48", "throughput": 19850.64, "total_tokens": 173361408}
|
|
{"current_steps": 55095, "total_steps": 78105, "loss": 0.2171, "lr": 1.2095508954244051e-06, "epoch": 3.526982907624352, "percentage": 70.54, "elapsed_time": "2:25:33", "remaining_time": "1:00:47", "throughput": 19850.95, "total_tokens": 173378112}
|
|
{"current_steps": 55100, "total_steps": 78105, "loss": 0.1459, "lr": 1.2090724528928604e-06, "epoch": 3.527302989565329, "percentage": 70.55, "elapsed_time": "2:25:34", "remaining_time": "1:00:46", "throughput": 19851.21, "total_tokens": 173393856}
|
|
{"current_steps": 55105, "total_steps": 78105, "loss": 0.1535, "lr": 1.208594074823513e-06, "epoch": 3.5276230715063055, "percentage": 70.55, "elapsed_time": "2:25:35", "remaining_time": "1:00:46", "throughput": 19851.46, "total_tokens": 173409920}
|
|
{"current_steps": 55110, "total_steps": 78105, "loss": 0.1757, "lr": 1.2081157612402512e-06, "epoch": 3.5279431534472825, "percentage": 70.56, "elapsed_time": "2:25:36", "remaining_time": "1:00:45", "throughput": 19851.7, "total_tokens": 173425472}
|
|
{"current_steps": 55115, "total_steps": 78105, "loss": 0.1457, "lr": 1.2076375121669578e-06, "epoch": 3.5282632353882595, "percentage": 70.57, "elapsed_time": "2:25:36", "remaining_time": "1:00:44", "throughput": 19851.99, "total_tokens": 173441408}
|
|
{"current_steps": 55120, "total_steps": 78105, "loss": 0.1543, "lr": 1.2071593276275176e-06, "epoch": 3.528583317329236, "percentage": 70.57, "elapsed_time": "2:25:37", "remaining_time": "1:00:43", "throughput": 19852.34, "total_tokens": 173458688}
|
|
{"current_steps": 55125, "total_steps": 78105, "loss": 0.1125, "lr": 1.2066812076458043e-06, "epoch": 3.528903399270213, "percentage": 70.58, "elapsed_time": "2:25:38", "remaining_time": "1:00:42", "throughput": 19852.58, "total_tokens": 173474048}
|
|
{"current_steps": 55130, "total_steps": 78105, "loss": 0.1115, "lr": 1.206203152245696e-06, "epoch": 3.52922348121119, "percentage": 70.58, "elapsed_time": "2:25:38", "remaining_time": "1:00:41", "throughput": 19852.8, "total_tokens": 173488704}
|
|
{"current_steps": 55135, "total_steps": 78105, "loss": 0.1522, "lr": 1.2057251614510629e-06, "epoch": 3.529543563152167, "percentage": 70.59, "elapsed_time": "2:25:39", "remaining_time": "1:00:40", "throughput": 19853.02, "total_tokens": 173503808}
|
|
{"current_steps": 55140, "total_steps": 78105, "loss": 0.2235, "lr": 1.2052472352857727e-06, "epoch": 3.529863645093144, "percentage": 70.6, "elapsed_time": "2:25:40", "remaining_time": "1:00:40", "throughput": 19853.33, "total_tokens": 173520256}
|
|
{"current_steps": 55145, "total_steps": 78105, "loss": 0.1624, "lr": 1.2047693737736935e-06, "epoch": 3.530183727034121, "percentage": 70.6, "elapsed_time": "2:25:40", "remaining_time": "1:00:39", "throughput": 19853.7, "total_tokens": 173538816}
|
|
{"current_steps": 55150, "total_steps": 78105, "loss": 0.1122, "lr": 1.2042915769386832e-06, "epoch": 3.5305038089750975, "percentage": 70.61, "elapsed_time": "2:25:41", "remaining_time": "1:00:38", "throughput": 19853.96, "total_tokens": 173554624}
|
|
{"current_steps": 55155, "total_steps": 78105, "loss": 0.1504, "lr": 1.203813844804604e-06, "epoch": 3.5308238909160745, "percentage": 70.62, "elapsed_time": "2:25:42", "remaining_time": "1:00:37", "throughput": 19854.25, "total_tokens": 173570880}
|
|
{"current_steps": 55160, "total_steps": 78105, "loss": 0.2002, "lr": 1.2033361773953084e-06, "epoch": 3.5311439728570515, "percentage": 70.62, "elapsed_time": "2:25:42", "remaining_time": "1:00:36", "throughput": 19854.53, "total_tokens": 173587008}
|
|
{"current_steps": 55165, "total_steps": 78105, "loss": 0.1973, "lr": 1.2028585747346508e-06, "epoch": 3.531464054798028, "percentage": 70.63, "elapsed_time": "2:25:43", "remaining_time": "1:00:35", "throughput": 19854.76, "total_tokens": 173602432}
|
|
{"current_steps": 55170, "total_steps": 78105, "loss": 0.1014, "lr": 1.2023810368464791e-06, "epoch": 3.531784136739005, "percentage": 70.64, "elapsed_time": "2:25:44", "remaining_time": "1:00:35", "throughput": 19855.03, "total_tokens": 173618560}
|
|
{"current_steps": 55175, "total_steps": 78105, "loss": 0.1761, "lr": 1.2019035637546396e-06, "epoch": 3.532104218679982, "percentage": 70.64, "elapsed_time": "2:25:44", "remaining_time": "1:00:34", "throughput": 19855.27, "total_tokens": 173633664}
|
|
{"current_steps": 55180, "total_steps": 78105, "loss": 0.1754, "lr": 1.2014261554829748e-06, "epoch": 3.532424300620959, "percentage": 70.65, "elapsed_time": "2:25:45", "remaining_time": "1:00:33", "throughput": 19855.6, "total_tokens": 173650240}
|
|
{"current_steps": 55185, "total_steps": 78105, "loss": 0.1137, "lr": 1.2009488120553236e-06, "epoch": 3.532744382561936, "percentage": 70.65, "elapsed_time": "2:25:46", "remaining_time": "1:00:32", "throughput": 19855.82, "total_tokens": 173665088}
|
|
{"current_steps": 55190, "total_steps": 78105, "loss": 0.154, "lr": 1.2004715334955214e-06, "epoch": 3.533064464502913, "percentage": 70.66, "elapsed_time": "2:25:46", "remaining_time": "1:00:31", "throughput": 19856.05, "total_tokens": 173680256}
|
|
{"current_steps": 55195, "total_steps": 78105, "loss": 0.2658, "lr": 1.1999943198274027e-06, "epoch": 3.5333845464438896, "percentage": 70.67, "elapsed_time": "2:25:47", "remaining_time": "1:00:30", "throughput": 19856.3, "total_tokens": 173695872}
|
|
{"current_steps": 55200, "total_steps": 78105, "loss": 0.1829, "lr": 1.199517171074796e-06, "epoch": 3.5337046283848665, "percentage": 70.67, "elapsed_time": "2:25:48", "remaining_time": "1:00:30", "throughput": 19856.54, "total_tokens": 173711104}
|
|
{"current_steps": 55205, "total_steps": 78105, "loss": 0.1435, "lr": 1.199040087261528e-06, "epoch": 3.5340247103258435, "percentage": 70.68, "elapsed_time": "2:25:49", "remaining_time": "1:00:29", "throughput": 19856.86, "total_tokens": 173728128}
|
|
{"current_steps": 55210, "total_steps": 78105, "loss": 0.1638, "lr": 1.1985630684114216e-06, "epoch": 3.53434479226682, "percentage": 70.69, "elapsed_time": "2:25:49", "remaining_time": "1:00:28", "throughput": 19857.18, "total_tokens": 173744640}
|
|
{"current_steps": 55215, "total_steps": 78105, "loss": 0.1891, "lr": 1.1980861145482968e-06, "epoch": 3.534664874207797, "percentage": 70.69, "elapsed_time": "2:25:50", "remaining_time": "1:00:27", "throughput": 19857.43, "total_tokens": 173760384}
|
|
{"current_steps": 55220, "total_steps": 78105, "loss": 0.1534, "lr": 1.1976092256959696e-06, "epoch": 3.534984956148774, "percentage": 70.7, "elapsed_time": "2:25:51", "remaining_time": "1:00:26", "throughput": 19857.67, "total_tokens": 173775680}
|
|
{"current_steps": 55225, "total_steps": 78105, "loss": 0.1374, "lr": 1.197132401878253e-06, "epoch": 3.535305038089751, "percentage": 70.71, "elapsed_time": "2:25:51", "remaining_time": "1:00:25", "throughput": 19858.01, "total_tokens": 173793472}
|
|
{"current_steps": 55230, "total_steps": 78105, "loss": 0.1977, "lr": 1.1966556431189597e-06, "epoch": 3.535625120030728, "percentage": 70.71, "elapsed_time": "2:25:52", "remaining_time": "1:00:25", "throughput": 19858.3, "total_tokens": 173809664}
|
|
{"current_steps": 55235, "total_steps": 78105, "loss": 0.1825, "lr": 1.196178949441893e-06, "epoch": 3.535945201971705, "percentage": 70.72, "elapsed_time": "2:25:53", "remaining_time": "1:00:24", "throughput": 19858.52, "total_tokens": 173824832}
|
|
{"current_steps": 55240, "total_steps": 78105, "loss": 0.1694, "lr": 1.1957023208708598e-06, "epoch": 3.5362652839126816, "percentage": 70.73, "elapsed_time": "2:25:53", "remaining_time": "1:00:23", "throughput": 19858.75, "total_tokens": 173839808}
|
|
{"current_steps": 55245, "total_steps": 78105, "loss": 0.1843, "lr": 1.1952257574296572e-06, "epoch": 3.5365853658536586, "percentage": 70.73, "elapsed_time": "2:25:54", "remaining_time": "1:00:22", "throughput": 19858.99, "total_tokens": 173855424}
|
|
{"current_steps": 55250, "total_steps": 78105, "loss": 0.1045, "lr": 1.1947492591420847e-06, "epoch": 3.5369054477946356, "percentage": 70.74, "elapsed_time": "2:25:55", "remaining_time": "1:00:21", "throughput": 19859.19, "total_tokens": 173870144}
|
|
{"current_steps": 55255, "total_steps": 78105, "loss": 0.2019, "lr": 1.1942728260319355e-06, "epoch": 3.537225529735612, "percentage": 70.74, "elapsed_time": "2:25:55", "remaining_time": "1:00:20", "throughput": 19859.45, "total_tokens": 173885824}
|
|
{"current_steps": 55260, "total_steps": 78105, "loss": 0.1624, "lr": 1.1937964581229993e-06, "epoch": 3.537545611676589, "percentage": 70.75, "elapsed_time": "2:25:56", "remaining_time": "1:00:20", "throughput": 19859.68, "total_tokens": 173901056}
|
|
{"current_steps": 55265, "total_steps": 78105, "loss": 0.2457, "lr": 1.1933201554390661e-06, "epoch": 3.537865693617566, "percentage": 70.76, "elapsed_time": "2:25:57", "remaining_time": "1:00:19", "throughput": 19859.95, "total_tokens": 173917376}
|
|
{"current_steps": 55270, "total_steps": 78105, "loss": 0.0853, "lr": 1.192843918003916e-06, "epoch": 3.538185775558543, "percentage": 70.76, "elapsed_time": "2:25:57", "remaining_time": "1:00:18", "throughput": 19860.18, "total_tokens": 173932224}
|
|
{"current_steps": 55275, "total_steps": 78105, "loss": 0.1423, "lr": 1.192367745841333e-06, "epoch": 3.53850585749952, "percentage": 70.77, "elapsed_time": "2:25:58", "remaining_time": "1:00:17", "throughput": 19860.43, "total_tokens": 173947520}
|
|
{"current_steps": 55280, "total_steps": 78105, "loss": 0.2957, "lr": 1.1918916389750934e-06, "epoch": 3.5388259394404966, "percentage": 70.78, "elapsed_time": "2:25:59", "remaining_time": "1:00:16", "throughput": 19860.66, "total_tokens": 173962944}
|
|
{"current_steps": 55285, "total_steps": 78105, "loss": 0.1717, "lr": 1.1914155974289719e-06, "epoch": 3.5391460213814736, "percentage": 70.78, "elapsed_time": "2:25:59", "remaining_time": "1:00:15", "throughput": 19860.88, "total_tokens": 173977472}
|
|
{"current_steps": 55290, "total_steps": 78105, "loss": 0.1313, "lr": 1.190939621226739e-06, "epoch": 3.5394661033224506, "percentage": 70.79, "elapsed_time": "2:26:00", "remaining_time": "1:00:14", "throughput": 19861.14, "total_tokens": 173993408}
|
|
{"current_steps": 55295, "total_steps": 78105, "loss": 0.1651, "lr": 1.1904637103921629e-06, "epoch": 3.5397861852634276, "percentage": 70.8, "elapsed_time": "2:26:01", "remaining_time": "1:00:14", "throughput": 19861.42, "total_tokens": 174009536}
|
|
{"current_steps": 55300, "total_steps": 78105, "loss": 0.2041, "lr": 1.189987864949008e-06, "epoch": 3.540106267204404, "percentage": 70.8, "elapsed_time": "2:26:01", "remaining_time": "1:00:13", "throughput": 19861.66, "total_tokens": 174025024}
|
|
{"current_steps": 55305, "total_steps": 78105, "loss": 0.157, "lr": 1.1895120849210355e-06, "epoch": 3.540426349145381, "percentage": 70.81, "elapsed_time": "2:26:02", "remaining_time": "1:00:12", "throughput": 19861.97, "total_tokens": 174041536}
|
|
{"current_steps": 55310, "total_steps": 78105, "loss": 0.2251, "lr": 1.1890363703320024e-06, "epoch": 3.540746431086358, "percentage": 70.81, "elapsed_time": "2:26:03", "remaining_time": "1:00:11", "throughput": 19862.4, "total_tokens": 174061184}
|
|
{"current_steps": 55315, "total_steps": 78105, "loss": 0.162, "lr": 1.1885607212056654e-06, "epoch": 3.541066513027335, "percentage": 70.82, "elapsed_time": "2:26:04", "remaining_time": "1:00:10", "throughput": 19862.64, "total_tokens": 174076864}
|
|
{"current_steps": 55320, "total_steps": 78105, "loss": 0.1838, "lr": 1.1880851375657751e-06, "epoch": 3.541386594968312, "percentage": 70.83, "elapsed_time": "2:26:04", "remaining_time": "1:00:09", "throughput": 19862.86, "total_tokens": 174091776}
|
|
{"current_steps": 55325, "total_steps": 78105, "loss": 0.1919, "lr": 1.1876096194360792e-06, "epoch": 3.5417066769092886, "percentage": 70.83, "elapsed_time": "2:26:05", "remaining_time": "1:00:09", "throughput": 19863.23, "total_tokens": 174109632}
|
|
{"current_steps": 55330, "total_steps": 78105, "loss": 0.1432, "lr": 1.1871341668403233e-06, "epoch": 3.5420267588502656, "percentage": 70.84, "elapsed_time": "2:26:06", "remaining_time": "1:00:08", "throughput": 19863.47, "total_tokens": 174124992}
|
|
{"current_steps": 55335, "total_steps": 78105, "loss": 0.221, "lr": 1.1866587798022474e-06, "epoch": 3.5423468407912426, "percentage": 70.85, "elapsed_time": "2:26:06", "remaining_time": "1:00:07", "throughput": 19863.69, "total_tokens": 174139968}
|
|
{"current_steps": 55340, "total_steps": 78105, "loss": 0.1514, "lr": 1.1861834583455931e-06, "epoch": 3.5426669227322196, "percentage": 70.85, "elapsed_time": "2:26:07", "remaining_time": "1:00:06", "throughput": 19863.98, "total_tokens": 174156224}
|
|
{"current_steps": 55345, "total_steps": 78105, "loss": 0.136, "lr": 1.1857082024940917e-06, "epoch": 3.542987004673196, "percentage": 70.86, "elapsed_time": "2:26:08", "remaining_time": "1:00:05", "throughput": 19864.34, "total_tokens": 174174144}
|
|
{"current_steps": 55350, "total_steps": 78105, "loss": 0.1773, "lr": 1.1852330122714783e-06, "epoch": 3.543307086614173, "percentage": 70.87, "elapsed_time": "2:26:08", "remaining_time": "1:00:04", "throughput": 19864.68, "total_tokens": 174191168}
|
|
{"current_steps": 55355, "total_steps": 78105, "loss": 0.226, "lr": 1.184757887701478e-06, "epoch": 3.54362716855515, "percentage": 70.87, "elapsed_time": "2:26:09", "remaining_time": "1:00:04", "throughput": 19864.91, "total_tokens": 174206208}
|
|
{"current_steps": 55360, "total_steps": 78105, "loss": 0.1229, "lr": 1.1842828288078193e-06, "epoch": 3.543947250496127, "percentage": 70.88, "elapsed_time": "2:26:10", "remaining_time": "1:00:03", "throughput": 19865.15, "total_tokens": 174222016}
|
|
{"current_steps": 55365, "total_steps": 78105, "loss": 0.1571, "lr": 1.1838078356142227e-06, "epoch": 3.544267332437104, "percentage": 70.89, "elapsed_time": "2:26:10", "remaining_time": "1:00:02", "throughput": 19865.46, "total_tokens": 174239104}
|
|
{"current_steps": 55370, "total_steps": 78105, "loss": 0.1274, "lr": 1.1833329081444059e-06, "epoch": 3.5445874143780807, "percentage": 70.89, "elapsed_time": "2:26:11", "remaining_time": "1:00:01", "throughput": 19865.7, "total_tokens": 174254784}
|
|
{"current_steps": 55375, "total_steps": 78105, "loss": 0.1939, "lr": 1.1828580464220873e-06, "epoch": 3.5449074963190577, "percentage": 70.9, "elapsed_time": "2:26:12", "remaining_time": "1:00:00", "throughput": 19865.94, "total_tokens": 174270336}
|
|
{"current_steps": 55380, "total_steps": 78105, "loss": 0.1964, "lr": 1.1823832504709754e-06, "epoch": 3.5452275782600347, "percentage": 70.9, "elapsed_time": "2:26:12", "remaining_time": "0:59:59", "throughput": 19866.18, "total_tokens": 174285696}
|
|
{"current_steps": 55385, "total_steps": 78105, "loss": 0.1029, "lr": 1.1819085203147823e-06, "epoch": 3.545547660201011, "percentage": 70.91, "elapsed_time": "2:26:13", "remaining_time": "0:59:59", "throughput": 19866.39, "total_tokens": 174300672}
|
|
{"current_steps": 55390, "total_steps": 78105, "loss": 0.1274, "lr": 1.1814338559772104e-06, "epoch": 3.545867742141988, "percentage": 70.92, "elapsed_time": "2:26:14", "remaining_time": "0:59:58", "throughput": 19866.66, "total_tokens": 174316672}
|
|
{"current_steps": 55395, "total_steps": 78105, "loss": 0.1423, "lr": 1.1809592574819644e-06, "epoch": 3.546187824082965, "percentage": 70.92, "elapsed_time": "2:26:15", "remaining_time": "0:59:57", "throughput": 19866.95, "total_tokens": 174332928}
|
|
{"current_steps": 55400, "total_steps": 78105, "loss": 0.1825, "lr": 1.1804847248527426e-06, "epoch": 3.546507906023942, "percentage": 70.93, "elapsed_time": "2:26:15", "remaining_time": "0:59:56", "throughput": 19867.17, "total_tokens": 174347712}
|
|
{"current_steps": 55405, "total_steps": 78105, "loss": 0.1215, "lr": 1.1800102581132405e-06, "epoch": 3.546827987964919, "percentage": 70.94, "elapsed_time": "2:26:16", "remaining_time": "0:59:55", "throughput": 19867.39, "total_tokens": 174362752}
|
|
{"current_steps": 55410, "total_steps": 78105, "loss": 0.159, "lr": 1.1795358572871504e-06, "epoch": 3.547148069905896, "percentage": 70.94, "elapsed_time": "2:26:17", "remaining_time": "0:59:54", "throughput": 19867.65, "total_tokens": 174378880}
|
|
{"current_steps": 55415, "total_steps": 78105, "loss": 0.1177, "lr": 1.1790615223981616e-06, "epoch": 3.5474681518468727, "percentage": 70.95, "elapsed_time": "2:26:17", "remaining_time": "0:59:54", "throughput": 19867.95, "total_tokens": 174395520}
|
|
{"current_steps": 55420, "total_steps": 78105, "loss": 0.1959, "lr": 1.1785872534699591e-06, "epoch": 3.5477882337878497, "percentage": 70.96, "elapsed_time": "2:26:18", "remaining_time": "0:59:53", "throughput": 19868.27, "total_tokens": 174412672}
|
|
{"current_steps": 55425, "total_steps": 78105, "loss": 0.1277, "lr": 1.178113050526227e-06, "epoch": 3.5481083157288267, "percentage": 70.96, "elapsed_time": "2:26:19", "remaining_time": "0:59:52", "throughput": 19868.51, "total_tokens": 174428096}
|
|
{"current_steps": 55430, "total_steps": 78105, "loss": 0.1724, "lr": 1.1776389135906435e-06, "epoch": 3.5484283976698032, "percentage": 70.97, "elapsed_time": "2:26:19", "remaining_time": "0:59:51", "throughput": 19868.71, "total_tokens": 174442880}
|
|
{"current_steps": 55435, "total_steps": 78105, "loss": 0.2434, "lr": 1.1771648426868847e-06, "epoch": 3.54874847961078, "percentage": 70.97, "elapsed_time": "2:26:20", "remaining_time": "0:59:50", "throughput": 19868.92, "total_tokens": 174457600}
|
|
{"current_steps": 55440, "total_steps": 78105, "loss": 0.1841, "lr": 1.1766908378386236e-06, "epoch": 3.549068561551757, "percentage": 70.98, "elapsed_time": "2:26:21", "remaining_time": "0:59:49", "throughput": 19869.18, "total_tokens": 174473152}
|
|
{"current_steps": 55445, "total_steps": 78105, "loss": 0.1522, "lr": 1.1762168990695292e-06, "epoch": 3.549388643492734, "percentage": 70.99, "elapsed_time": "2:26:21", "remaining_time": "0:59:49", "throughput": 19869.48, "total_tokens": 174489728}
|
|
{"current_steps": 55450, "total_steps": 78105, "loss": 0.2114, "lr": 1.1757430264032671e-06, "epoch": 3.549708725433711, "percentage": 70.99, "elapsed_time": "2:26:22", "remaining_time": "0:59:48", "throughput": 19869.74, "total_tokens": 174505792}
|
|
{"current_steps": 55455, "total_steps": 78105, "loss": 0.1655, "lr": 1.1752692198635e-06, "epoch": 3.550028807374688, "percentage": 71.0, "elapsed_time": "2:26:23", "remaining_time": "0:59:47", "throughput": 19869.97, "total_tokens": 174520896}
|
|
{"current_steps": 55460, "total_steps": 78105, "loss": 0.1716, "lr": 1.1747954794738895e-06, "epoch": 3.5503488893156647, "percentage": 71.01, "elapsed_time": "2:26:23", "remaining_time": "0:59:46", "throughput": 19870.19, "total_tokens": 174535488}
|
|
{"current_steps": 55465, "total_steps": 78105, "loss": 0.1682, "lr": 1.174321805258088e-06, "epoch": 3.5506689712566417, "percentage": 71.01, "elapsed_time": "2:26:24", "remaining_time": "0:59:45", "throughput": 19870.41, "total_tokens": 174550208}
|
|
{"current_steps": 55470, "total_steps": 78105, "loss": 0.1313, "lr": 1.1738481972397522e-06, "epoch": 3.5509890531976187, "percentage": 71.02, "elapsed_time": "2:26:25", "remaining_time": "0:59:44", "throughput": 19870.64, "total_tokens": 174565824}
|
|
{"current_steps": 55475, "total_steps": 78105, "loss": 0.1355, "lr": 1.1733746554425278e-06, "epoch": 3.5513091351385953, "percentage": 71.03, "elapsed_time": "2:26:25", "remaining_time": "0:59:44", "throughput": 19870.91, "total_tokens": 174582208}
|
|
{"current_steps": 55480, "total_steps": 78105, "loss": 0.2098, "lr": 1.172901179890064e-06, "epoch": 3.5516292170795722, "percentage": 71.03, "elapsed_time": "2:26:26", "remaining_time": "0:59:43", "throughput": 19871.23, "total_tokens": 174599232}
|
|
{"current_steps": 55485, "total_steps": 78105, "loss": 0.1394, "lr": 1.172427770606003e-06, "epoch": 3.5519492990205492, "percentage": 71.04, "elapsed_time": "2:26:27", "remaining_time": "0:59:42", "throughput": 19871.56, "total_tokens": 174616320}
|
|
{"current_steps": 55490, "total_steps": 78105, "loss": 0.1705, "lr": 1.171954427613983e-06, "epoch": 3.5522693809615262, "percentage": 71.05, "elapsed_time": "2:26:27", "remaining_time": "0:59:41", "throughput": 19871.81, "total_tokens": 174631808}
|
|
{"current_steps": 55495, "total_steps": 78105, "loss": 0.1855, "lr": 1.1714811509376432e-06, "epoch": 3.552589462902503, "percentage": 71.05, "elapsed_time": "2:26:28", "remaining_time": "0:59:40", "throughput": 19872.02, "total_tokens": 174646784}
|
|
{"current_steps": 55500, "total_steps": 78105, "loss": 0.1401, "lr": 1.1710079406006125e-06, "epoch": 3.55290954484348, "percentage": 71.06, "elapsed_time": "2:26:29", "remaining_time": "0:59:39", "throughput": 19872.28, "total_tokens": 174662592}
|
|
{"current_steps": 55505, "total_steps": 78105, "loss": 0.1048, "lr": 1.170534796626524e-06, "epoch": 3.5532296267844568, "percentage": 71.06, "elapsed_time": "2:26:29", "remaining_time": "0:59:39", "throughput": 19872.56, "total_tokens": 174678720}
|
|
{"current_steps": 55510, "total_steps": 78105, "loss": 0.157, "lr": 1.1700617190390029e-06, "epoch": 3.5535497087254337, "percentage": 71.07, "elapsed_time": "2:26:30", "remaining_time": "0:59:38", "throughput": 19872.83, "total_tokens": 174694848}
|
|
{"current_steps": 55515, "total_steps": 78105, "loss": 0.1073, "lr": 1.1695887078616718e-06, "epoch": 3.5538697906664107, "percentage": 71.08, "elapsed_time": "2:26:31", "remaining_time": "0:59:37", "throughput": 19873.13, "total_tokens": 174711488}
|
|
{"current_steps": 55520, "total_steps": 78105, "loss": 0.195, "lr": 1.169115763118151e-06, "epoch": 3.5541898726073873, "percentage": 71.08, "elapsed_time": "2:26:32", "remaining_time": "0:59:36", "throughput": 19873.54, "total_tokens": 174730240}
|
|
{"current_steps": 55525, "total_steps": 78105, "loss": 0.2061, "lr": 1.1686428848320568e-06, "epoch": 3.5545099545483643, "percentage": 71.09, "elapsed_time": "2:26:32", "remaining_time": "0:59:35", "throughput": 19873.82, "total_tokens": 174746112}
|
|
{"current_steps": 55530, "total_steps": 78105, "loss": 0.1502, "lr": 1.168170073027002e-06, "epoch": 3.5548300364893413, "percentage": 71.1, "elapsed_time": "2:26:33", "remaining_time": "0:59:34", "throughput": 19874.09, "total_tokens": 174762048}
|
|
{"current_steps": 55535, "total_steps": 78105, "loss": 0.2336, "lr": 1.1676973277265965e-06, "epoch": 3.5551501184303183, "percentage": 71.1, "elapsed_time": "2:26:34", "remaining_time": "0:59:34", "throughput": 19874.32, "total_tokens": 174777344}
|
|
{"current_steps": 55540, "total_steps": 78105, "loss": 0.1855, "lr": 1.167224648954446e-06, "epoch": 3.5554702003712952, "percentage": 71.11, "elapsed_time": "2:26:34", "remaining_time": "0:59:33", "throughput": 19874.63, "total_tokens": 174793792}
|
|
{"current_steps": 55545, "total_steps": 78105, "loss": 0.2353, "lr": 1.166752036734155e-06, "epoch": 3.555790282312272, "percentage": 71.12, "elapsed_time": "2:26:35", "remaining_time": "0:59:32", "throughput": 19874.92, "total_tokens": 174810112}
|
|
{"current_steps": 55550, "total_steps": 78105, "loss": 0.1519, "lr": 1.1662794910893228e-06, "epoch": 3.556110364253249, "percentage": 71.12, "elapsed_time": "2:26:36", "remaining_time": "0:59:31", "throughput": 19875.17, "total_tokens": 174825344}
|
|
{"current_steps": 55555, "total_steps": 78105, "loss": 0.1605, "lr": 1.1658070120435457e-06, "epoch": 3.5564304461942258, "percentage": 71.13, "elapsed_time": "2:26:36", "remaining_time": "0:59:30", "throughput": 19875.4, "total_tokens": 174840448}
|
|
{"current_steps": 55560, "total_steps": 78105, "loss": 0.2179, "lr": 1.1653345996204168e-06, "epoch": 3.5567505281352028, "percentage": 71.14, "elapsed_time": "2:26:37", "remaining_time": "0:59:29", "throughput": 19875.61, "total_tokens": 174855168}
|
|
{"current_steps": 55565, "total_steps": 78105, "loss": 0.176, "lr": 1.1648622538435252e-06, "epoch": 3.5570706100761793, "percentage": 71.14, "elapsed_time": "2:26:38", "remaining_time": "0:59:28", "throughput": 19875.86, "total_tokens": 174870400}
|
|
{"current_steps": 55570, "total_steps": 78105, "loss": 0.2301, "lr": 1.1643899747364599e-06, "epoch": 3.5573906920171563, "percentage": 71.15, "elapsed_time": "2:26:38", "remaining_time": "0:59:28", "throughput": 19876.08, "total_tokens": 174885312}
|
|
{"current_steps": 55575, "total_steps": 78105, "loss": 0.1409, "lr": 1.1639177623228005e-06, "epoch": 3.5577107739581333, "percentage": 71.15, "elapsed_time": "2:26:39", "remaining_time": "0:59:27", "throughput": 19876.31, "total_tokens": 174900288}
|
|
{"current_steps": 55580, "total_steps": 78105, "loss": 0.202, "lr": 1.1634456166261304e-06, "epoch": 3.5580308558991103, "percentage": 71.16, "elapsed_time": "2:26:40", "remaining_time": "0:59:26", "throughput": 19876.6, "total_tokens": 174916672}
|
|
{"current_steps": 55585, "total_steps": 78105, "loss": 0.1512, "lr": 1.1629735376700227e-06, "epoch": 3.5583509378400873, "percentage": 71.17, "elapsed_time": "2:26:40", "remaining_time": "0:59:25", "throughput": 19876.87, "total_tokens": 174932800}
|
|
{"current_steps": 55590, "total_steps": 78105, "loss": 0.1533, "lr": 1.1625015254780531e-06, "epoch": 3.558671019781064, "percentage": 71.17, "elapsed_time": "2:26:41", "remaining_time": "0:59:24", "throughput": 19877.13, "total_tokens": 174948608}
|
|
{"current_steps": 55595, "total_steps": 78105, "loss": 0.1162, "lr": 1.1620295800737905e-06, "epoch": 3.558991101722041, "percentage": 71.18, "elapsed_time": "2:26:42", "remaining_time": "0:59:23", "throughput": 19877.3, "total_tokens": 174962816}
|
|
{"current_steps": 55600, "total_steps": 78105, "loss": 0.2085, "lr": 1.1615577014808005e-06, "epoch": 3.559311183663018, "percentage": 71.19, "elapsed_time": "2:26:42", "remaining_time": "0:59:23", "throughput": 19877.58, "total_tokens": 174979200}
|
|
{"current_steps": 55605, "total_steps": 78105, "loss": 0.1848, "lr": 1.161085889722649e-06, "epoch": 3.559631265603995, "percentage": 71.19, "elapsed_time": "2:26:43", "remaining_time": "0:59:22", "throughput": 19877.84, "total_tokens": 174995008}
|
|
{"current_steps": 55610, "total_steps": 78105, "loss": 0.0892, "lr": 1.160614144822892e-06, "epoch": 3.5599513475449713, "percentage": 71.2, "elapsed_time": "2:26:44", "remaining_time": "0:59:21", "throughput": 19878.08, "total_tokens": 175010560}
|
|
{"current_steps": 55615, "total_steps": 78105, "loss": 0.1707, "lr": 1.16014246680509e-06, "epoch": 3.5602714294859483, "percentage": 71.21, "elapsed_time": "2:26:44", "remaining_time": "0:59:20", "throughput": 19878.31, "total_tokens": 175026240}
|
|
{"current_steps": 55620, "total_steps": 78105, "loss": 0.1431, "lr": 1.1596708556927924e-06, "epoch": 3.5605915114269253, "percentage": 71.21, "elapsed_time": "2:26:45", "remaining_time": "0:59:19", "throughput": 19878.6, "total_tokens": 175042752}
|
|
{"current_steps": 55625, "total_steps": 78105, "loss": 0.166, "lr": 1.1591993115095517e-06, "epoch": 3.5609115933679023, "percentage": 71.22, "elapsed_time": "2:26:46", "remaining_time": "0:59:18", "throughput": 19878.82, "total_tokens": 175057856}
|
|
{"current_steps": 55630, "total_steps": 78105, "loss": 0.2111, "lr": 1.1587278342789135e-06, "epoch": 3.5612316753088793, "percentage": 71.22, "elapsed_time": "2:26:46", "remaining_time": "0:59:18", "throughput": 19879.07, "total_tokens": 175073472}
|
|
{"current_steps": 55635, "total_steps": 78105, "loss": 0.1581, "lr": 1.1582564240244205e-06, "epoch": 3.561551757249856, "percentage": 71.23, "elapsed_time": "2:26:47", "remaining_time": "0:59:17", "throughput": 19879.4, "total_tokens": 175090880}
|
|
{"current_steps": 55640, "total_steps": 78105, "loss": 0.1837, "lr": 1.1577850807696131e-06, "epoch": 3.561871839190833, "percentage": 71.24, "elapsed_time": "2:26:48", "remaining_time": "0:59:16", "throughput": 19879.69, "total_tokens": 175107072}
|
|
{"current_steps": 55645, "total_steps": 78105, "loss": 0.1625, "lr": 1.1573138045380273e-06, "epoch": 3.56219192113181, "percentage": 71.24, "elapsed_time": "2:26:49", "remaining_time": "0:59:15", "throughput": 19879.96, "total_tokens": 175123008}
|
|
{"current_steps": 55650, "total_steps": 78105, "loss": 0.1638, "lr": 1.1568425953531953e-06, "epoch": 3.5625120030727864, "percentage": 71.25, "elapsed_time": "2:26:49", "remaining_time": "0:59:14", "throughput": 19880.26, "total_tokens": 175139584}
|
|
{"current_steps": 55655, "total_steps": 78105, "loss": 0.2076, "lr": 1.1563714532386498e-06, "epoch": 3.5628320850137634, "percentage": 71.26, "elapsed_time": "2:26:50", "remaining_time": "0:59:13", "throughput": 19880.47, "total_tokens": 175154240}
|
|
{"current_steps": 55660, "total_steps": 78105, "loss": 0.2162, "lr": 1.1559003782179131e-06, "epoch": 3.5631521669547404, "percentage": 71.26, "elapsed_time": "2:26:51", "remaining_time": "0:59:13", "throughput": 19880.68, "total_tokens": 175168896}
|
|
{"current_steps": 55665, "total_steps": 78105, "loss": 0.0951, "lr": 1.1554293703145114e-06, "epoch": 3.5634722488957173, "percentage": 71.27, "elapsed_time": "2:26:51", "remaining_time": "0:59:12", "throughput": 19880.91, "total_tokens": 175183552}
|
|
{"current_steps": 55670, "total_steps": 78105, "loss": 0.1163, "lr": 1.1549584295519635e-06, "epoch": 3.5637923308366943, "percentage": 71.28, "elapsed_time": "2:26:52", "remaining_time": "0:59:11", "throughput": 19881.13, "total_tokens": 175198528}
|
|
{"current_steps": 55675, "total_steps": 78105, "loss": 0.1664, "lr": 1.1544875559537856e-06, "epoch": 3.5641124127776713, "percentage": 71.28, "elapsed_time": "2:26:52", "remaining_time": "0:59:10", "throughput": 19881.36, "total_tokens": 175213952}
|
|
{"current_steps": 55680, "total_steps": 78105, "loss": 0.1836, "lr": 1.1540167495434904e-06, "epoch": 3.564432494718648, "percentage": 71.29, "elapsed_time": "2:26:53", "remaining_time": "0:59:09", "throughput": 19881.6, "total_tokens": 175229184}
|
|
{"current_steps": 55685, "total_steps": 78105, "loss": 0.1354, "lr": 1.153546010344587e-06, "epoch": 3.564752576659625, "percentage": 71.3, "elapsed_time": "2:26:54", "remaining_time": "0:59:08", "throughput": 19881.88, "total_tokens": 175245312}
|
|
{"current_steps": 55690, "total_steps": 78105, "loss": 0.1861, "lr": 1.1530753383805843e-06, "epoch": 3.565072658600602, "percentage": 71.3, "elapsed_time": "2:26:54", "remaining_time": "0:59:07", "throughput": 19882.13, "total_tokens": 175260416}
|
|
{"current_steps": 55695, "total_steps": 78105, "loss": 0.2018, "lr": 1.1526047336749812e-06, "epoch": 3.5653927405415784, "percentage": 71.31, "elapsed_time": "2:26:55", "remaining_time": "0:59:07", "throughput": 19882.34, "total_tokens": 175275200}
|
|
{"current_steps": 55700, "total_steps": 78105, "loss": 0.1482, "lr": 1.1521341962512805e-06, "epoch": 3.5657128224825554, "percentage": 71.31, "elapsed_time": "2:26:56", "remaining_time": "0:59:06", "throughput": 19882.59, "total_tokens": 175291072}
|
|
{"current_steps": 55705, "total_steps": 78105, "loss": 0.15, "lr": 1.1516637261329774e-06, "epoch": 3.5660329044235324, "percentage": 71.32, "elapsed_time": "2:26:57", "remaining_time": "0:59:05", "throughput": 19882.91, "total_tokens": 175307968}
|
|
{"current_steps": 55710, "total_steps": 78105, "loss": 0.2098, "lr": 1.1511933233435643e-06, "epoch": 3.5663529863645094, "percentage": 71.33, "elapsed_time": "2:26:57", "remaining_time": "0:59:04", "throughput": 19883.1, "total_tokens": 175322496}
|
|
{"current_steps": 55715, "total_steps": 78105, "loss": 0.238, "lr": 1.1507229879065312e-06, "epoch": 3.5666730683054864, "percentage": 71.33, "elapsed_time": "2:26:58", "remaining_time": "0:59:03", "throughput": 19883.37, "total_tokens": 175338368}
|
|
{"current_steps": 55720, "total_steps": 78105, "loss": 0.2193, "lr": 1.1502527198453632e-06, "epoch": 3.5669931502464634, "percentage": 71.34, "elapsed_time": "2:26:59", "remaining_time": "0:59:02", "throughput": 19883.66, "total_tokens": 175354688}
|
|
{"current_steps": 55725, "total_steps": 78105, "loss": 0.2203, "lr": 1.1497825191835453e-06, "epoch": 3.56731323218744, "percentage": 71.35, "elapsed_time": "2:26:59", "remaining_time": "0:59:02", "throughput": 19883.98, "total_tokens": 175372032}
|
|
{"current_steps": 55730, "total_steps": 78105, "loss": 0.1978, "lr": 1.1493123859445534e-06, "epoch": 3.567633314128417, "percentage": 71.35, "elapsed_time": "2:27:00", "remaining_time": "0:59:01", "throughput": 19884.22, "total_tokens": 175387392}
|
|
{"current_steps": 55735, "total_steps": 78105, "loss": 0.1106, "lr": 1.1488423201518668e-06, "epoch": 3.567953396069394, "percentage": 71.36, "elapsed_time": "2:27:01", "remaining_time": "0:59:00", "throughput": 19884.46, "total_tokens": 175402624}
|
|
{"current_steps": 55740, "total_steps": 78105, "loss": 0.2546, "lr": 1.1483723218289564e-06, "epoch": 3.5682734780103704, "percentage": 71.37, "elapsed_time": "2:27:01", "remaining_time": "0:58:59", "throughput": 19884.7, "total_tokens": 175418368}
|
|
{"current_steps": 55745, "total_steps": 78105, "loss": 0.1019, "lr": 1.147902390999292e-06, "epoch": 3.5685935599513474, "percentage": 71.37, "elapsed_time": "2:27:02", "remaining_time": "0:58:58", "throughput": 19884.96, "total_tokens": 175434112}
|
|
{"current_steps": 55750, "total_steps": 78105, "loss": 0.1804, "lr": 1.1474325276863395e-06, "epoch": 3.5689136418923244, "percentage": 71.38, "elapsed_time": "2:27:03", "remaining_time": "0:58:57", "throughput": 19885.26, "total_tokens": 175450752}
|
|
{"current_steps": 55755, "total_steps": 78105, "loss": 0.232, "lr": 1.1469627319135612e-06, "epoch": 3.5692337238333014, "percentage": 71.38, "elapsed_time": "2:27:03", "remaining_time": "0:58:57", "throughput": 19885.54, "total_tokens": 175467008}
|
|
{"current_steps": 55760, "total_steps": 78105, "loss": 0.1739, "lr": 1.1464930037044164e-06, "epoch": 3.5695538057742784, "percentage": 71.39, "elapsed_time": "2:27:04", "remaining_time": "0:58:56", "throughput": 19885.8, "total_tokens": 175482752}
|
|
{"current_steps": 55765, "total_steps": 78105, "loss": 0.1691, "lr": 1.1460233430823608e-06, "epoch": 3.5698738877152554, "percentage": 71.4, "elapsed_time": "2:27:05", "remaining_time": "0:58:55", "throughput": 19886.05, "total_tokens": 175498368}
|
|
{"current_steps": 55770, "total_steps": 78105, "loss": 0.1854, "lr": 1.145553750070846e-06, "epoch": 3.570193969656232, "percentage": 71.4, "elapsed_time": "2:27:06", "remaining_time": "0:58:54", "throughput": 19886.48, "total_tokens": 175518400}
|
|
{"current_steps": 55775, "total_steps": 78105, "loss": 0.1446, "lr": 1.1450842246933227e-06, "epoch": 3.570514051597209, "percentage": 71.41, "elapsed_time": "2:27:06", "remaining_time": "0:58:53", "throughput": 19886.74, "total_tokens": 175534144}
|
|
{"current_steps": 55780, "total_steps": 78105, "loss": 0.1739, "lr": 1.144614766973236e-06, "epoch": 3.570834133538186, "percentage": 71.42, "elapsed_time": "2:27:07", "remaining_time": "0:58:53", "throughput": 19886.98, "total_tokens": 175549952}
|
|
{"current_steps": 55785, "total_steps": 78105, "loss": 0.1334, "lr": 1.1441453769340275e-06, "epoch": 3.5711542154791625, "percentage": 71.42, "elapsed_time": "2:27:08", "remaining_time": "0:58:52", "throughput": 19887.26, "total_tokens": 175566336}
|
|
{"current_steps": 55790, "total_steps": 78105, "loss": 0.1545, "lr": 1.1436760545991368e-06, "epoch": 3.5714742974201394, "percentage": 71.43, "elapsed_time": "2:27:08", "remaining_time": "0:58:51", "throughput": 19887.53, "total_tokens": 175582400}
|
|
{"current_steps": 55795, "total_steps": 78105, "loss": 0.166, "lr": 1.1432067999919979e-06, "epoch": 3.5717943793611164, "percentage": 71.44, "elapsed_time": "2:27:09", "remaining_time": "0:58:50", "throughput": 19887.83, "total_tokens": 175598784}
|
|
{"current_steps": 55800, "total_steps": 78105, "loss": 0.2502, "lr": 1.1427376131360462e-06, "epoch": 3.5721144613020934, "percentage": 71.44, "elapsed_time": "2:27:10", "remaining_time": "0:58:49", "throughput": 19888.2, "total_tokens": 175617152}
|
|
{"current_steps": 55805, "total_steps": 78105, "loss": 0.1629, "lr": 1.1422684940547064e-06, "epoch": 3.5724345432430704, "percentage": 71.45, "elapsed_time": "2:27:10", "remaining_time": "0:58:48", "throughput": 19888.48, "total_tokens": 175633664}
|
|
{"current_steps": 55810, "total_steps": 78105, "loss": 0.1444, "lr": 1.1417994427714076e-06, "epoch": 3.572754625184047, "percentage": 71.46, "elapsed_time": "2:27:11", "remaining_time": "0:58:48", "throughput": 19888.7, "total_tokens": 175648448}
|
|
{"current_steps": 55815, "total_steps": 78105, "loss": 0.1474, "lr": 1.1413304593095683e-06, "epoch": 3.573074707125024, "percentage": 71.46, "elapsed_time": "2:27:12", "remaining_time": "0:58:47", "throughput": 19888.99, "total_tokens": 175664640}
|
|
{"current_steps": 55820, "total_steps": 78105, "loss": 0.0936, "lr": 1.1408615436926098e-06, "epoch": 3.573394789066001, "percentage": 71.47, "elapsed_time": "2:27:12", "remaining_time": "0:58:46", "throughput": 19889.2, "total_tokens": 175679488}
|
|
{"current_steps": 55825, "total_steps": 78105, "loss": 0.1603, "lr": 1.1403926959439462e-06, "epoch": 3.573714871006978, "percentage": 71.47, "elapsed_time": "2:27:13", "remaining_time": "0:58:45", "throughput": 19889.49, "total_tokens": 175695744}
|
|
{"current_steps": 55830, "total_steps": 78105, "loss": 0.1229, "lr": 1.1399239160869897e-06, "epoch": 3.5740349529479545, "percentage": 71.48, "elapsed_time": "2:27:14", "remaining_time": "0:58:44", "throughput": 19889.71, "total_tokens": 175710848}
|
|
{"current_steps": 55835, "total_steps": 78105, "loss": 0.1613, "lr": 1.1394552041451482e-06, "epoch": 3.5743550348889315, "percentage": 71.49, "elapsed_time": "2:27:14", "remaining_time": "0:58:43", "throughput": 19889.96, "total_tokens": 175726592}
|
|
{"current_steps": 55840, "total_steps": 78105, "loss": 0.2505, "lr": 1.138986560141826e-06, "epoch": 3.5746751168299085, "percentage": 71.49, "elapsed_time": "2:27:15", "remaining_time": "0:58:43", "throughput": 19890.19, "total_tokens": 175741632}
|
|
{"current_steps": 55845, "total_steps": 78105, "loss": 0.1542, "lr": 1.1385179841004276e-06, "epoch": 3.5749951987708855, "percentage": 71.5, "elapsed_time": "2:27:16", "remaining_time": "0:58:42", "throughput": 19890.39, "total_tokens": 175756608}
|
|
{"current_steps": 55850, "total_steps": 78105, "loss": 0.2027, "lr": 1.1380494760443475e-06, "epoch": 3.5753152807118624, "percentage": 71.51, "elapsed_time": "2:27:16", "remaining_time": "0:58:41", "throughput": 19890.63, "total_tokens": 175771904}
|
|
{"current_steps": 55855, "total_steps": 78105, "loss": 0.2972, "lr": 1.1375810359969834e-06, "epoch": 3.575635362652839, "percentage": 71.51, "elapsed_time": "2:27:17", "remaining_time": "0:58:40", "throughput": 19890.88, "total_tokens": 175787264}
|
|
{"current_steps": 55860, "total_steps": 78105, "loss": 0.2696, "lr": 1.137112663981726e-06, "epoch": 3.575955444593816, "percentage": 71.52, "elapsed_time": "2:27:18", "remaining_time": "0:58:39", "throughput": 19891.1, "total_tokens": 175802240}
|
|
{"current_steps": 55865, "total_steps": 78105, "loss": 0.1377, "lr": 1.136644360021963e-06, "epoch": 3.576275526534793, "percentage": 71.53, "elapsed_time": "2:27:18", "remaining_time": "0:58:38", "throughput": 19891.34, "total_tokens": 175817728}
|
|
{"current_steps": 55870, "total_steps": 78105, "loss": 0.1294, "lr": 1.136176124141079e-06, "epoch": 3.57659560847577, "percentage": 71.53, "elapsed_time": "2:27:19", "remaining_time": "0:58:37", "throughput": 19891.59, "total_tokens": 175833344}
|
|
{"current_steps": 55875, "total_steps": 78105, "loss": 0.1668, "lr": 1.1357079563624556e-06, "epoch": 3.5769156904167465, "percentage": 71.54, "elapsed_time": "2:27:20", "remaining_time": "0:58:37", "throughput": 19891.86, "total_tokens": 175849600}
|
|
{"current_steps": 55880, "total_steps": 78105, "loss": 0.1578, "lr": 1.1352398567094696e-06, "epoch": 3.5772357723577235, "percentage": 71.54, "elapsed_time": "2:27:20", "remaining_time": "0:58:36", "throughput": 19892.07, "total_tokens": 175864064}
|
|
{"current_steps": 55885, "total_steps": 78105, "loss": 0.1167, "lr": 1.1347718252054981e-06, "epoch": 3.5775558542987005, "percentage": 71.55, "elapsed_time": "2:27:21", "remaining_time": "0:58:35", "throughput": 19892.29, "total_tokens": 175879104}
|
|
{"current_steps": 55890, "total_steps": 78105, "loss": 0.1646, "lr": 1.134303861873909e-06, "epoch": 3.5778759362396775, "percentage": 71.56, "elapsed_time": "2:27:22", "remaining_time": "0:58:34", "throughput": 19892.63, "total_tokens": 175896320}
|
|
{"current_steps": 55895, "total_steps": 78105, "loss": 0.2506, "lr": 1.133835966738072e-06, "epoch": 3.5781960181806545, "percentage": 71.56, "elapsed_time": "2:27:22", "remaining_time": "0:58:33", "throughput": 19892.83, "total_tokens": 175910784}
|
|
{"current_steps": 55900, "total_steps": 78105, "loss": 0.1526, "lr": 1.1333681398213509e-06, "epoch": 3.578516100121631, "percentage": 71.57, "elapsed_time": "2:27:23", "remaining_time": "0:58:32", "throughput": 19893.04, "total_tokens": 175925760}
|
|
{"current_steps": 55905, "total_steps": 78105, "loss": 0.1295, "lr": 1.1329003811471061e-06, "epoch": 3.578836182062608, "percentage": 71.58, "elapsed_time": "2:27:24", "remaining_time": "0:58:32", "throughput": 19893.29, "total_tokens": 175941568}
|
|
{"current_steps": 55910, "total_steps": 78105, "loss": 0.1643, "lr": 1.1324326907386955e-06, "epoch": 3.579156264003585, "percentage": 71.58, "elapsed_time": "2:27:24", "remaining_time": "0:58:31", "throughput": 19893.5, "total_tokens": 175956352}
|
|
{"current_steps": 55915, "total_steps": 78105, "loss": 0.1576, "lr": 1.1319650686194719e-06, "epoch": 3.5794763459445615, "percentage": 71.59, "elapsed_time": "2:27:25", "remaining_time": "0:58:30", "throughput": 19893.74, "total_tokens": 175971648}
|
|
{"current_steps": 55920, "total_steps": 78105, "loss": 0.1446, "lr": 1.1314975148127889e-06, "epoch": 3.5797964278855385, "percentage": 71.6, "elapsed_time": "2:27:26", "remaining_time": "0:58:29", "throughput": 19893.96, "total_tokens": 175986752}
|
|
{"current_steps": 55925, "total_steps": 78105, "loss": 0.15, "lr": 1.1310300293419898e-06, "epoch": 3.5801165098265155, "percentage": 71.6, "elapsed_time": "2:27:26", "remaining_time": "0:58:28", "throughput": 19894.2, "total_tokens": 176002368}
|
|
{"current_steps": 55930, "total_steps": 78105, "loss": 0.1144, "lr": 1.130562612230421e-06, "epoch": 3.5804365917674925, "percentage": 71.61, "elapsed_time": "2:27:27", "remaining_time": "0:58:27", "throughput": 19894.45, "total_tokens": 176018304}
|
|
{"current_steps": 55935, "total_steps": 78105, "loss": 0.1389, "lr": 1.1300952635014226e-06, "epoch": 3.5807566737084695, "percentage": 71.62, "elapsed_time": "2:27:28", "remaining_time": "0:58:27", "throughput": 19894.72, "total_tokens": 176034560}
|
|
{"current_steps": 55940, "total_steps": 78105, "loss": 0.2644, "lr": 1.129627983178331e-06, "epoch": 3.5810767556494465, "percentage": 71.62, "elapsed_time": "2:27:28", "remaining_time": "0:58:26", "throughput": 19894.93, "total_tokens": 176049472}
|
|
{"current_steps": 55945, "total_steps": 78105, "loss": 0.1385, "lr": 1.12916077128448e-06, "epoch": 3.581396837590423, "percentage": 71.63, "elapsed_time": "2:27:29", "remaining_time": "0:58:25", "throughput": 19895.24, "total_tokens": 176066560}
|
|
{"current_steps": 55950, "total_steps": 78105, "loss": 0.1688, "lr": 1.1286936278431986e-06, "epoch": 3.5817169195314, "percentage": 71.63, "elapsed_time": "2:27:30", "remaining_time": "0:58:24", "throughput": 19895.5, "total_tokens": 176082432}
|
|
{"current_steps": 55955, "total_steps": 78105, "loss": 0.161, "lr": 1.1282265528778167e-06, "epoch": 3.582037001472377, "percentage": 71.64, "elapsed_time": "2:27:31", "remaining_time": "0:58:23", "throughput": 19895.74, "total_tokens": 176097664}
|
|
{"current_steps": 55960, "total_steps": 78105, "loss": 0.1433, "lr": 1.1277595464116534e-06, "epoch": 3.5823570834133536, "percentage": 71.65, "elapsed_time": "2:27:31", "remaining_time": "0:58:22", "throughput": 19895.99, "total_tokens": 176113024}
|
|
{"current_steps": 55965, "total_steps": 78105, "loss": 0.1655, "lr": 1.1272926084680317e-06, "epoch": 3.5826771653543306, "percentage": 71.65, "elapsed_time": "2:27:32", "remaining_time": "0:58:22", "throughput": 19896.2, "total_tokens": 176128064}
|
|
{"current_steps": 55970, "total_steps": 78105, "loss": 0.216, "lr": 1.1268257390702667e-06, "epoch": 3.5829972472953076, "percentage": 71.66, "elapsed_time": "2:27:33", "remaining_time": "0:58:21", "throughput": 19896.5, "total_tokens": 176144512}
|
|
{"current_steps": 55975, "total_steps": 78105, "loss": 0.2733, "lr": 1.1263589382416717e-06, "epoch": 3.5833173292362845, "percentage": 71.67, "elapsed_time": "2:27:33", "remaining_time": "0:58:20", "throughput": 19896.75, "total_tokens": 176160128}
|
|
{"current_steps": 55980, "total_steps": 78105, "loss": 0.1822, "lr": 1.1258922060055563e-06, "epoch": 3.5836374111772615, "percentage": 71.67, "elapsed_time": "2:27:34", "remaining_time": "0:58:19", "throughput": 19897.0, "total_tokens": 176175616}
|
|
{"current_steps": 55985, "total_steps": 78105, "loss": 0.1332, "lr": 1.1254255423852267e-06, "epoch": 3.5839574931182385, "percentage": 71.68, "elapsed_time": "2:27:35", "remaining_time": "0:58:18", "throughput": 19897.26, "total_tokens": 176191616}
|
|
{"current_steps": 55990, "total_steps": 78105, "loss": 0.1587, "lr": 1.1249589474039857e-06, "epoch": 3.584277575059215, "percentage": 71.69, "elapsed_time": "2:27:35", "remaining_time": "0:58:17", "throughput": 19897.49, "total_tokens": 176206976}
|
|
{"current_steps": 55995, "total_steps": 78105, "loss": 0.1667, "lr": 1.1244924210851324e-06, "epoch": 3.584597657000192, "percentage": 71.69, "elapsed_time": "2:27:36", "remaining_time": "0:58:17", "throughput": 19897.8, "total_tokens": 176223552}
|
|
{"current_steps": 56000, "total_steps": 78105, "loss": 0.1975, "lr": 1.1240259634519622e-06, "epoch": 3.584917738941169, "percentage": 71.7, "elapsed_time": "2:27:37", "remaining_time": "0:58:16", "throughput": 19898.01, "total_tokens": 176238400}
|
|
{"current_steps": 56005, "total_steps": 78105, "loss": 0.1504, "lr": 1.1235595745277697e-06, "epoch": 3.5852378208821456, "percentage": 71.7, "elapsed_time": "2:27:37", "remaining_time": "0:58:15", "throughput": 19898.27, "total_tokens": 176254208}
|
|
{"current_steps": 56010, "total_steps": 78105, "loss": 0.1632, "lr": 1.1230932543358405e-06, "epoch": 3.5855579028231226, "percentage": 71.71, "elapsed_time": "2:27:38", "remaining_time": "0:58:14", "throughput": 19898.55, "total_tokens": 176270208}
|
|
{"current_steps": 56015, "total_steps": 78105, "loss": 0.1512, "lr": 1.1226270028994634e-06, "epoch": 3.5858779847640996, "percentage": 71.72, "elapsed_time": "2:27:39", "remaining_time": "0:58:13", "throughput": 19898.75, "total_tokens": 176284736}
|
|
{"current_steps": 56020, "total_steps": 78105, "loss": 0.1237, "lr": 1.1221608202419194e-06, "epoch": 3.5861980667050766, "percentage": 71.72, "elapsed_time": "2:27:39", "remaining_time": "0:58:12", "throughput": 19898.99, "total_tokens": 176299968}
|
|
{"current_steps": 56025, "total_steps": 78105, "loss": 0.175, "lr": 1.1216947063864857e-06, "epoch": 3.5865181486460536, "percentage": 71.73, "elapsed_time": "2:27:40", "remaining_time": "0:58:11", "throughput": 19899.31, "total_tokens": 176317056}
|
|
{"current_steps": 56030, "total_steps": 78105, "loss": 0.1639, "lr": 1.1212286613564413e-06, "epoch": 3.5868382305870306, "percentage": 71.74, "elapsed_time": "2:27:41", "remaining_time": "0:58:11", "throughput": 19899.55, "total_tokens": 176332288}
|
|
{"current_steps": 56035, "total_steps": 78105, "loss": 0.1594, "lr": 1.1207626851750535e-06, "epoch": 3.587158312528007, "percentage": 71.74, "elapsed_time": "2:27:41", "remaining_time": "0:58:10", "throughput": 19899.87, "total_tokens": 176349440}
|
|
{"current_steps": 56040, "total_steps": 78105, "loss": 0.2285, "lr": 1.1202967778655954e-06, "epoch": 3.587478394468984, "percentage": 71.75, "elapsed_time": "2:27:42", "remaining_time": "0:58:09", "throughput": 19900.08, "total_tokens": 176364672}
|
|
{"current_steps": 56045, "total_steps": 78105, "loss": 0.1599, "lr": 1.1198309394513277e-06, "epoch": 3.587798476409961, "percentage": 71.76, "elapsed_time": "2:27:43", "remaining_time": "0:58:08", "throughput": 19900.35, "total_tokens": 176380480}
|
|
{"current_steps": 56050, "total_steps": 78105, "loss": 0.1678, "lr": 1.1193651699555144e-06, "epoch": 3.5881185583509376, "percentage": 71.76, "elapsed_time": "2:27:43", "remaining_time": "0:58:07", "throughput": 19900.58, "total_tokens": 176395712}
|
|
{"current_steps": 56055, "total_steps": 78105, "loss": 0.18, "lr": 1.1188994694014132e-06, "epoch": 3.5884386402919146, "percentage": 71.77, "elapsed_time": "2:27:44", "remaining_time": "0:58:06", "throughput": 19900.79, "total_tokens": 176410880}
|
|
{"current_steps": 56060, "total_steps": 78105, "loss": 0.0902, "lr": 1.1184338378122785e-06, "epoch": 3.5887587222328916, "percentage": 71.78, "elapsed_time": "2:27:45", "remaining_time": "0:58:06", "throughput": 19901.05, "total_tokens": 176426496}
|
|
{"current_steps": 56065, "total_steps": 78105, "loss": 0.1495, "lr": 1.1179682752113622e-06, "epoch": 3.5890788041738686, "percentage": 71.78, "elapsed_time": "2:27:45", "remaining_time": "0:58:05", "throughput": 19901.27, "total_tokens": 176441472}
|
|
{"current_steps": 56070, "total_steps": 78105, "loss": 0.1449, "lr": 1.11750278162191e-06, "epoch": 3.5893988861148456, "percentage": 71.79, "elapsed_time": "2:27:46", "remaining_time": "0:58:04", "throughput": 19901.58, "total_tokens": 176458176}
|
|
{"current_steps": 56075, "total_steps": 78105, "loss": 0.2215, "lr": 1.1170373570671697e-06, "epoch": 3.589718968055822, "percentage": 71.79, "elapsed_time": "2:27:47", "remaining_time": "0:58:03", "throughput": 19901.83, "total_tokens": 176473856}
|
|
{"current_steps": 56080, "total_steps": 78105, "loss": 0.1884, "lr": 1.1165720015703784e-06, "epoch": 3.590039049996799, "percentage": 71.8, "elapsed_time": "2:27:47", "remaining_time": "0:58:02", "throughput": 19902.06, "total_tokens": 176489216}
|
|
{"current_steps": 56085, "total_steps": 78105, "loss": 0.1233, "lr": 1.1161067151547761e-06, "epoch": 3.590359131937776, "percentage": 71.81, "elapsed_time": "2:27:48", "remaining_time": "0:58:01", "throughput": 19902.27, "total_tokens": 176504192}
|
|
{"current_steps": 56090, "total_steps": 78105, "loss": 0.1553, "lr": 1.1156414978435961e-06, "epoch": 3.590679213878753, "percentage": 71.81, "elapsed_time": "2:27:49", "remaining_time": "0:58:01", "throughput": 19902.48, "total_tokens": 176518912}
|
|
{"current_steps": 56095, "total_steps": 78105, "loss": 0.1501, "lr": 1.115176349660069e-06, "epoch": 3.5909992958197297, "percentage": 71.82, "elapsed_time": "2:27:49", "remaining_time": "0:58:00", "throughput": 19902.78, "total_tokens": 176535744}
|
|
{"current_steps": 56100, "total_steps": 78105, "loss": 0.2381, "lr": 1.1147112706274216e-06, "epoch": 3.5913193777607066, "percentage": 71.83, "elapsed_time": "2:27:50", "remaining_time": "0:57:59", "throughput": 19903.01, "total_tokens": 176550784}
|
|
{"current_steps": 56105, "total_steps": 78105, "loss": 0.3078, "lr": 1.1142462607688776e-06, "epoch": 3.5916394597016836, "percentage": 71.83, "elapsed_time": "2:27:51", "remaining_time": "0:57:58", "throughput": 19903.26, "total_tokens": 176566464}
|
|
{"current_steps": 56110, "total_steps": 78105, "loss": 0.1107, "lr": 1.1137813201076564e-06, "epoch": 3.5919595416426606, "percentage": 71.84, "elapsed_time": "2:27:51", "remaining_time": "0:57:57", "throughput": 19903.54, "total_tokens": 176582592}
|
|
{"current_steps": 56115, "total_steps": 78105, "loss": 0.1347, "lr": 1.113316448666977e-06, "epoch": 3.5922796235836376, "percentage": 71.85, "elapsed_time": "2:27:52", "remaining_time": "0:57:56", "throughput": 19903.79, "total_tokens": 176598528}
|
|
{"current_steps": 56120, "total_steps": 78105, "loss": 0.1809, "lr": 1.1128516464700498e-06, "epoch": 3.592599705524614, "percentage": 71.85, "elapsed_time": "2:27:53", "remaining_time": "0:57:56", "throughput": 19904.1, "total_tokens": 176615552}
|
|
{"current_steps": 56125, "total_steps": 78105, "loss": 0.1755, "lr": 1.1123869135400866e-06, "epoch": 3.592919787465591, "percentage": 71.86, "elapsed_time": "2:27:53", "remaining_time": "0:57:55", "throughput": 19904.36, "total_tokens": 176631232}
|
|
{"current_steps": 56130, "total_steps": 78105, "loss": 0.238, "lr": 1.1119222499002931e-06, "epoch": 3.593239869406568, "percentage": 71.86, "elapsed_time": "2:27:54", "remaining_time": "0:57:54", "throughput": 19904.59, "total_tokens": 176646400}
|
|
{"current_steps": 56135, "total_steps": 78105, "loss": 0.2218, "lr": 1.1114576555738725e-06, "epoch": 3.593559951347545, "percentage": 71.87, "elapsed_time": "2:27:55", "remaining_time": "0:57:53", "throughput": 19904.88, "total_tokens": 176662528}
|
|
{"current_steps": 56140, "total_steps": 78105, "loss": 0.1394, "lr": 1.1109931305840238e-06, "epoch": 3.5938800332885217, "percentage": 71.88, "elapsed_time": "2:27:56", "remaining_time": "0:57:52", "throughput": 19905.15, "total_tokens": 176678720}
|
|
{"current_steps": 56145, "total_steps": 78105, "loss": 0.1446, "lr": 1.110528674953942e-06, "epoch": 3.5942001152294987, "percentage": 71.88, "elapsed_time": "2:27:56", "remaining_time": "0:57:51", "throughput": 19905.46, "total_tokens": 176695552}
|
|
{"current_steps": 56150, "total_steps": 78105, "loss": 0.1611, "lr": 1.1100642887068228e-06, "epoch": 3.5945201971704757, "percentage": 71.89, "elapsed_time": "2:27:57", "remaining_time": "0:57:51", "throughput": 19905.63, "total_tokens": 176709760}
|
|
{"current_steps": 56155, "total_steps": 78105, "loss": 0.1788, "lr": 1.1095999718658512e-06, "epoch": 3.5948402791114527, "percentage": 71.9, "elapsed_time": "2:27:58", "remaining_time": "0:57:50", "throughput": 19905.85, "total_tokens": 176724544}
|
|
{"current_steps": 56160, "total_steps": 78105, "loss": 0.1416, "lr": 1.1091357244542156e-06, "epoch": 3.5951603610524296, "percentage": 71.9, "elapsed_time": "2:27:58", "remaining_time": "0:57:49", "throughput": 19906.14, "total_tokens": 176740864}
|
|
{"current_steps": 56165, "total_steps": 78105, "loss": 0.1585, "lr": 1.108671546495097e-06, "epoch": 3.595480442993406, "percentage": 71.91, "elapsed_time": "2:27:59", "remaining_time": "0:57:48", "throughput": 19906.46, "total_tokens": 176757504}
|
|
{"current_steps": 56170, "total_steps": 78105, "loss": 0.2267, "lr": 1.1082074380116746e-06, "epoch": 3.595800524934383, "percentage": 71.92, "elapsed_time": "2:28:00", "remaining_time": "0:57:47", "throughput": 19906.75, "total_tokens": 176773888}
|
|
{"current_steps": 56175, "total_steps": 78105, "loss": 0.1493, "lr": 1.1077433990271228e-06, "epoch": 3.59612060687536, "percentage": 71.92, "elapsed_time": "2:28:00", "remaining_time": "0:57:46", "throughput": 19906.98, "total_tokens": 176789568}
|
|
{"current_steps": 56180, "total_steps": 78105, "loss": 0.1922, "lr": 1.1072794295646135e-06, "epoch": 3.5964406888163367, "percentage": 71.93, "elapsed_time": "2:28:01", "remaining_time": "0:57:46", "throughput": 19907.27, "total_tokens": 176805696}
|
|
{"current_steps": 56185, "total_steps": 78105, "loss": 0.1178, "lr": 1.1068155296473153e-06, "epoch": 3.5967607707573137, "percentage": 71.94, "elapsed_time": "2:28:02", "remaining_time": "0:57:45", "throughput": 19907.65, "total_tokens": 176824064}
|
|
{"current_steps": 56190, "total_steps": 78105, "loss": 0.1523, "lr": 1.1063516992983918e-06, "epoch": 3.5970808526982907, "percentage": 71.94, "elapsed_time": "2:28:02", "remaining_time": "0:57:44", "throughput": 19907.86, "total_tokens": 176839040}
|
|
{"current_steps": 56195, "total_steps": 78105, "loss": 0.1325, "lr": 1.1058879385410062e-06, "epoch": 3.5974009346392677, "percentage": 71.95, "elapsed_time": "2:28:03", "remaining_time": "0:57:43", "throughput": 19908.11, "total_tokens": 176854720}
|
|
{"current_steps": 56200, "total_steps": 78105, "loss": 0.2025, "lr": 1.1054242473983151e-06, "epoch": 3.5977210165802447, "percentage": 71.95, "elapsed_time": "2:28:04", "remaining_time": "0:57:42", "throughput": 19908.31, "total_tokens": 176869376}
|
|
{"current_steps": 56205, "total_steps": 78105, "loss": 0.0986, "lr": 1.104960625893473e-06, "epoch": 3.5980410985212217, "percentage": 71.96, "elapsed_time": "2:28:04", "remaining_time": "0:57:41", "throughput": 19908.63, "total_tokens": 176886784}
|
|
{"current_steps": 56210, "total_steps": 78105, "loss": 0.1185, "lr": 1.1044970740496308e-06, "epoch": 3.598361180462198, "percentage": 71.97, "elapsed_time": "2:28:05", "remaining_time": "0:57:41", "throughput": 19908.83, "total_tokens": 176901376}
|
|
{"current_steps": 56215, "total_steps": 78105, "loss": 0.148, "lr": 1.104033591889936e-06, "epoch": 3.598681262403175, "percentage": 71.97, "elapsed_time": "2:28:06", "remaining_time": "0:57:40", "throughput": 19909.07, "total_tokens": 176916928}
|
|
{"current_steps": 56220, "total_steps": 78105, "loss": 0.184, "lr": 1.1035701794375322e-06, "epoch": 3.599001344344152, "percentage": 71.98, "elapsed_time": "2:28:06", "remaining_time": "0:57:39", "throughput": 19909.32, "total_tokens": 176932288}
|
|
{"current_steps": 56225, "total_steps": 78105, "loss": 0.1429, "lr": 1.1031068367155598e-06, "epoch": 3.5993214262851287, "percentage": 71.99, "elapsed_time": "2:28:07", "remaining_time": "0:57:38", "throughput": 19909.62, "total_tokens": 176948800}
|
|
{"current_steps": 56230, "total_steps": 78105, "loss": 0.2124, "lr": 1.102643563747155e-06, "epoch": 3.5996415082261057, "percentage": 71.99, "elapsed_time": "2:28:08", "remaining_time": "0:57:37", "throughput": 19909.83, "total_tokens": 176963648}
|
|
{"current_steps": 56235, "total_steps": 78105, "loss": 0.1687, "lr": 1.1021803605554538e-06, "epoch": 3.5999615901670827, "percentage": 72.0, "elapsed_time": "2:28:08", "remaining_time": "0:57:36", "throughput": 19910.07, "total_tokens": 176979200}
|
|
{"current_steps": 56240, "total_steps": 78105, "loss": 0.1796, "lr": 1.1017172271635825e-06, "epoch": 3.6002816721080597, "percentage": 72.01, "elapsed_time": "2:28:09", "remaining_time": "0:57:36", "throughput": 19910.32, "total_tokens": 176994880}
|
|
{"current_steps": 56245, "total_steps": 78105, "loss": 0.1608, "lr": 1.1012541635946705e-06, "epoch": 3.6006017540490367, "percentage": 72.01, "elapsed_time": "2:28:10", "remaining_time": "0:57:35", "throughput": 19910.54, "total_tokens": 177009792}
|
|
{"current_steps": 56250, "total_steps": 78105, "loss": 0.1098, "lr": 1.10079116987184e-06, "epoch": 3.6009218359900137, "percentage": 72.02, "elapsed_time": "2:28:10", "remaining_time": "0:57:34", "throughput": 19910.77, "total_tokens": 177025024}
|
|
{"current_steps": 56255, "total_steps": 78105, "loss": 0.2529, "lr": 1.1003282460182091e-06, "epoch": 3.6012419179309902, "percentage": 72.02, "elapsed_time": "2:28:11", "remaining_time": "0:57:33", "throughput": 19910.98, "total_tokens": 177039872}
|
|
{"current_steps": 56260, "total_steps": 78105, "loss": 0.1617, "lr": 1.099865392056897e-06, "epoch": 3.6015619998719672, "percentage": 72.03, "elapsed_time": "2:28:12", "remaining_time": "0:57:32", "throughput": 19911.25, "total_tokens": 177055872}
|
|
{"current_steps": 56265, "total_steps": 78105, "loss": 0.204, "lr": 1.099402608011012e-06, "epoch": 3.6018820818129442, "percentage": 72.04, "elapsed_time": "2:28:12", "remaining_time": "0:57:31", "throughput": 19911.53, "total_tokens": 177072320}
|
|
{"current_steps": 56270, "total_steps": 78105, "loss": 0.1869, "lr": 1.0989398939036674e-06, "epoch": 3.6022021637539208, "percentage": 72.04, "elapsed_time": "2:28:13", "remaining_time": "0:57:31", "throughput": 19911.8, "total_tokens": 177088512}
|
|
{"current_steps": 56275, "total_steps": 78105, "loss": 0.2186, "lr": 1.0984772497579648e-06, "epoch": 3.6025222456948978, "percentage": 72.05, "elapsed_time": "2:28:14", "remaining_time": "0:57:30", "throughput": 19912.03, "total_tokens": 177103744}
|
|
{"current_steps": 56280, "total_steps": 78105, "loss": 0.1175, "lr": 1.0980146755970088e-06, "epoch": 3.6028423276358748, "percentage": 72.06, "elapsed_time": "2:28:14", "remaining_time": "0:57:29", "throughput": 19912.25, "total_tokens": 177118400}
|
|
{"current_steps": 56285, "total_steps": 78105, "loss": 0.1222, "lr": 1.0975521714438975e-06, "epoch": 3.6031624095768517, "percentage": 72.06, "elapsed_time": "2:28:15", "remaining_time": "0:57:28", "throughput": 19912.49, "total_tokens": 177133696}
|
|
{"current_steps": 56290, "total_steps": 78105, "loss": 0.1266, "lr": 1.0970897373217258e-06, "epoch": 3.6034824915178287, "percentage": 72.07, "elapsed_time": "2:28:16", "remaining_time": "0:57:27", "throughput": 19912.7, "total_tokens": 177148608}
|
|
{"current_steps": 56295, "total_steps": 78105, "loss": 0.2266, "lr": 1.0966273732535846e-06, "epoch": 3.6038025734588057, "percentage": 72.08, "elapsed_time": "2:28:16", "remaining_time": "0:57:26", "throughput": 19912.93, "total_tokens": 177163904}
|
|
{"current_steps": 56300, "total_steps": 78105, "loss": 0.21, "lr": 1.0961650792625618e-06, "epoch": 3.6041226553997823, "percentage": 72.08, "elapsed_time": "2:28:17", "remaining_time": "0:57:26", "throughput": 19913.16, "total_tokens": 177179008}
|
|
{"current_steps": 56305, "total_steps": 78105, "loss": 0.2096, "lr": 1.0957028553717448e-06, "epoch": 3.6044427373407593, "percentage": 72.09, "elapsed_time": "2:28:18", "remaining_time": "0:57:25", "throughput": 19913.44, "total_tokens": 177195584}
|
|
{"current_steps": 56310, "total_steps": 78105, "loss": 0.1885, "lr": 1.0952407016042103e-06, "epoch": 3.6047628192817363, "percentage": 72.1, "elapsed_time": "2:28:18", "remaining_time": "0:57:24", "throughput": 19913.7, "total_tokens": 177211200}
|
|
{"current_steps": 56315, "total_steps": 78105, "loss": 0.1208, "lr": 1.0947786179830388e-06, "epoch": 3.605082901222713, "percentage": 72.1, "elapsed_time": "2:28:19", "remaining_time": "0:57:23", "throughput": 19913.95, "total_tokens": 177226624}
|
|
{"current_steps": 56320, "total_steps": 78105, "loss": 0.1208, "lr": 1.0943166045313039e-06, "epoch": 3.60540298316369, "percentage": 72.11, "elapsed_time": "2:28:20", "remaining_time": "0:57:22", "throughput": 19914.19, "total_tokens": 177242240}
|
|
{"current_steps": 56325, "total_steps": 78105, "loss": 0.1427, "lr": 1.0938546612720756e-06, "epoch": 3.605723065104667, "percentage": 72.11, "elapsed_time": "2:28:20", "remaining_time": "0:57:21", "throughput": 19914.5, "total_tokens": 177258944}
|
|
{"current_steps": 56330, "total_steps": 78105, "loss": 0.1503, "lr": 1.0933927882284216e-06, "epoch": 3.6060431470456438, "percentage": 72.12, "elapsed_time": "2:28:21", "remaining_time": "0:57:21", "throughput": 19914.74, "total_tokens": 177274240}
|
|
{"current_steps": 56335, "total_steps": 78105, "loss": 0.1458, "lr": 1.0929309854234047e-06, "epoch": 3.6063632289866208, "percentage": 72.13, "elapsed_time": "2:28:22", "remaining_time": "0:57:20", "throughput": 19914.95, "total_tokens": 177289024}
|
|
{"current_steps": 56340, "total_steps": 78105, "loss": 0.1496, "lr": 1.0924692528800844e-06, "epoch": 3.6066833109275973, "percentage": 72.13, "elapsed_time": "2:28:22", "remaining_time": "0:57:19", "throughput": 19915.17, "total_tokens": 177303744}
|
|
{"current_steps": 56345, "total_steps": 78105, "loss": 0.1769, "lr": 1.0920075906215196e-06, "epoch": 3.6070033928685743, "percentage": 72.14, "elapsed_time": "2:28:23", "remaining_time": "0:57:18", "throughput": 19915.41, "total_tokens": 177319488}
|
|
{"current_steps": 56350, "total_steps": 78105, "loss": 0.1742, "lr": 1.0915459986707601e-06, "epoch": 3.6073234748095513, "percentage": 72.15, "elapsed_time": "2:28:24", "remaining_time": "0:57:17", "throughput": 19915.64, "total_tokens": 177334400}
|
|
{"current_steps": 56355, "total_steps": 78105, "loss": 0.2224, "lr": 1.0910844770508588e-06, "epoch": 3.6076435567505283, "percentage": 72.15, "elapsed_time": "2:28:24", "remaining_time": "0:57:16", "throughput": 19915.9, "total_tokens": 177350336}
|
|
{"current_steps": 56360, "total_steps": 78105, "loss": 0.1737, "lr": 1.0906230257848586e-06, "epoch": 3.607963638691505, "percentage": 72.16, "elapsed_time": "2:28:25", "remaining_time": "0:57:16", "throughput": 19916.18, "total_tokens": 177366784}
|
|
{"current_steps": 56365, "total_steps": 78105, "loss": 0.1851, "lr": 1.0901616448958036e-06, "epoch": 3.608283720632482, "percentage": 72.17, "elapsed_time": "2:28:26", "remaining_time": "0:57:15", "throughput": 19916.38, "total_tokens": 177381504}
|
|
{"current_steps": 56370, "total_steps": 78105, "loss": 0.23, "lr": 1.0897003344067328e-06, "epoch": 3.608603802573459, "percentage": 72.17, "elapsed_time": "2:28:26", "remaining_time": "0:57:14", "throughput": 19916.63, "total_tokens": 177396864}
|
|
{"current_steps": 56375, "total_steps": 78105, "loss": 0.124, "lr": 1.0892390943406803e-06, "epoch": 3.608923884514436, "percentage": 72.18, "elapsed_time": "2:28:27", "remaining_time": "0:57:13", "throughput": 19916.89, "total_tokens": 177412800}
|
|
{"current_steps": 56380, "total_steps": 78105, "loss": 0.1778, "lr": 1.0887779247206809e-06, "epoch": 3.609243966455413, "percentage": 72.18, "elapsed_time": "2:28:28", "remaining_time": "0:57:12", "throughput": 19917.15, "total_tokens": 177428736}
|
|
{"current_steps": 56385, "total_steps": 78105, "loss": 0.114, "lr": 1.0883168255697596e-06, "epoch": 3.6095640483963893, "percentage": 72.19, "elapsed_time": "2:28:29", "remaining_time": "0:57:11", "throughput": 19917.4, "total_tokens": 177444544}
|
|
{"current_steps": 56390, "total_steps": 78105, "loss": 0.1327, "lr": 1.0878557969109432e-06, "epoch": 3.6098841303373663, "percentage": 72.2, "elapsed_time": "2:28:29", "remaining_time": "0:57:10", "throughput": 19917.65, "total_tokens": 177460224}
|
|
{"current_steps": 56395, "total_steps": 78105, "loss": 0.1233, "lr": 1.0873948387672531e-06, "epoch": 3.6102042122783433, "percentage": 72.2, "elapsed_time": "2:28:30", "remaining_time": "0:57:10", "throughput": 19917.89, "total_tokens": 177475776}
|
|
{"current_steps": 56400, "total_steps": 78105, "loss": 0.1733, "lr": 1.0869339511617066e-06, "epoch": 3.6105242942193203, "percentage": 72.21, "elapsed_time": "2:28:31", "remaining_time": "0:57:09", "throughput": 19918.14, "total_tokens": 177491520}
|
|
{"current_steps": 56405, "total_steps": 78105, "loss": 0.1546, "lr": 1.0864731341173186e-06, "epoch": 3.610844376160297, "percentage": 72.22, "elapsed_time": "2:28:31", "remaining_time": "0:57:08", "throughput": 19918.34, "total_tokens": 177506048}
|
|
{"current_steps": 56410, "total_steps": 78105, "loss": 0.1158, "lr": 1.0860123876570991e-06, "epoch": 3.611164458101274, "percentage": 72.22, "elapsed_time": "2:28:32", "remaining_time": "0:57:07", "throughput": 19918.68, "total_tokens": 177523904}
|
|
{"current_steps": 56415, "total_steps": 78105, "loss": 0.162, "lr": 1.0855517118040556e-06, "epoch": 3.611484540042251, "percentage": 72.23, "elapsed_time": "2:28:33", "remaining_time": "0:57:06", "throughput": 19918.92, "total_tokens": 177539264}
|
|
{"current_steps": 56420, "total_steps": 78105, "loss": 0.255, "lr": 1.0850911065811914e-06, "epoch": 3.611804621983228, "percentage": 72.24, "elapsed_time": "2:28:33", "remaining_time": "0:57:06", "throughput": 19919.2, "total_tokens": 177555392}
|
|
{"current_steps": 56425, "total_steps": 78105, "loss": 0.1543, "lr": 1.0846305720115083e-06, "epoch": 3.612124703924205, "percentage": 72.24, "elapsed_time": "2:28:34", "remaining_time": "0:57:05", "throughput": 19919.44, "total_tokens": 177570816}
|
|
{"current_steps": 56430, "total_steps": 78105, "loss": 0.136, "lr": 1.0841701081180018e-06, "epoch": 3.6124447858651814, "percentage": 72.25, "elapsed_time": "2:28:35", "remaining_time": "0:57:04", "throughput": 19919.7, "total_tokens": 177587136}
|
|
{"current_steps": 56435, "total_steps": 78105, "loss": 0.2711, "lr": 1.0837097149236655e-06, "epoch": 3.6127648678061584, "percentage": 72.26, "elapsed_time": "2:28:35", "remaining_time": "0:57:03", "throughput": 19919.93, "total_tokens": 177602752}
|
|
{"current_steps": 56440, "total_steps": 78105, "loss": 0.2058, "lr": 1.0832493924514886e-06, "epoch": 3.6130849497471353, "percentage": 72.26, "elapsed_time": "2:28:36", "remaining_time": "0:57:02", "throughput": 19920.24, "total_tokens": 177619712}
|
|
{"current_steps": 56445, "total_steps": 78105, "loss": 0.183, "lr": 1.0827891407244578e-06, "epoch": 3.613405031688112, "percentage": 72.27, "elapsed_time": "2:28:37", "remaining_time": "0:57:01", "throughput": 19920.48, "total_tokens": 177635008}
|
|
{"current_steps": 56450, "total_steps": 78105, "loss": 0.1528, "lr": 1.082328959765555e-06, "epoch": 3.613725113629089, "percentage": 72.27, "elapsed_time": "2:28:37", "remaining_time": "0:57:01", "throughput": 19920.72, "total_tokens": 177650688}
|
|
{"current_steps": 56455, "total_steps": 78105, "loss": 0.1042, "lr": 1.08186884959776e-06, "epoch": 3.614045195570066, "percentage": 72.28, "elapsed_time": "2:28:38", "remaining_time": "0:57:00", "throughput": 19920.95, "total_tokens": 177665792}
|
|
{"current_steps": 56460, "total_steps": 78105, "loss": 0.14, "lr": 1.0814088102440464e-06, "epoch": 3.614365277511043, "percentage": 72.29, "elapsed_time": "2:28:39", "remaining_time": "0:56:59", "throughput": 19921.22, "total_tokens": 177681792}
|
|
{"current_steps": 56465, "total_steps": 78105, "loss": 0.1633, "lr": 1.08094884172739e-06, "epoch": 3.61468535945202, "percentage": 72.29, "elapsed_time": "2:28:39", "remaining_time": "0:56:58", "throughput": 19921.4, "total_tokens": 177696128}
|
|
{"current_steps": 56470, "total_steps": 78105, "loss": 0.1264, "lr": 1.0804889440707548e-06, "epoch": 3.615005441392997, "percentage": 72.3, "elapsed_time": "2:28:40", "remaining_time": "0:56:57", "throughput": 19921.7, "total_tokens": 177712960}
|
|
{"current_steps": 56475, "total_steps": 78105, "loss": 0.1892, "lr": 1.0800291172971085e-06, "epoch": 3.6153255233339734, "percentage": 72.31, "elapsed_time": "2:28:41", "remaining_time": "0:56:56", "throughput": 19921.92, "total_tokens": 177728128}
|
|
{"current_steps": 56480, "total_steps": 78105, "loss": 0.1293, "lr": 1.0795693614294122e-06, "epoch": 3.6156456052749504, "percentage": 72.31, "elapsed_time": "2:28:41", "remaining_time": "0:56:56", "throughput": 19922.21, "total_tokens": 177744832}
|
|
{"current_steps": 56485, "total_steps": 78105, "loss": 0.1196, "lr": 1.079109676490622e-06, "epoch": 3.6159656872159274, "percentage": 72.32, "elapsed_time": "2:28:42", "remaining_time": "0:56:55", "throughput": 19922.47, "total_tokens": 177760512}
|
|
{"current_steps": 56490, "total_steps": 78105, "loss": 0.136, "lr": 1.0786500625036952e-06, "epoch": 3.616285769156904, "percentage": 72.33, "elapsed_time": "2:28:43", "remaining_time": "0:56:54", "throughput": 19922.75, "total_tokens": 177776896}
|
|
{"current_steps": 56495, "total_steps": 78105, "loss": 0.2233, "lr": 1.0781905194915793e-06, "epoch": 3.616605851097881, "percentage": 72.33, "elapsed_time": "2:28:43", "remaining_time": "0:56:53", "throughput": 19922.98, "total_tokens": 177792256}
|
|
{"current_steps": 56500, "total_steps": 78105, "loss": 0.1567, "lr": 1.0777310474772242e-06, "epoch": 3.616925933038858, "percentage": 72.34, "elapsed_time": "2:28:44", "remaining_time": "0:56:52", "throughput": 19923.21, "total_tokens": 177807616}
|
|
{"current_steps": 56505, "total_steps": 78105, "loss": 0.1297, "lr": 1.0772716464835708e-06, "epoch": 3.617246014979835, "percentage": 72.34, "elapsed_time": "2:28:45", "remaining_time": "0:56:51", "throughput": 19923.44, "total_tokens": 177822848}
|
|
{"current_steps": 56510, "total_steps": 78105, "loss": 0.177, "lr": 1.0768123165335615e-06, "epoch": 3.617566096920812, "percentage": 72.35, "elapsed_time": "2:28:45", "remaining_time": "0:56:51", "throughput": 19923.69, "total_tokens": 177838656}
|
|
{"current_steps": 56515, "total_steps": 78105, "loss": 0.1451, "lr": 1.076353057650132e-06, "epoch": 3.617886178861789, "percentage": 72.36, "elapsed_time": "2:28:46", "remaining_time": "0:56:50", "throughput": 19923.99, "total_tokens": 177855744}
|
|
{"current_steps": 56520, "total_steps": 78105, "loss": 0.1705, "lr": 1.0758938698562152e-06, "epoch": 3.6182062608027654, "percentage": 72.36, "elapsed_time": "2:28:47", "remaining_time": "0:56:49", "throughput": 19924.22, "total_tokens": 177870720}
|
|
{"current_steps": 56525, "total_steps": 78105, "loss": 0.2157, "lr": 1.0754347531747406e-06, "epoch": 3.6185263427437424, "percentage": 72.37, "elapsed_time": "2:28:48", "remaining_time": "0:56:48", "throughput": 19924.44, "total_tokens": 177886144}
|
|
{"current_steps": 56530, "total_steps": 78105, "loss": 0.2933, "lr": 1.0749757076286343e-06, "epoch": 3.6188464246847194, "percentage": 72.38, "elapsed_time": "2:28:48", "remaining_time": "0:56:47", "throughput": 19924.71, "total_tokens": 177902208}
|
|
{"current_steps": 56535, "total_steps": 78105, "loss": 0.1728, "lr": 1.0745167332408175e-06, "epoch": 3.619166506625696, "percentage": 72.38, "elapsed_time": "2:28:49", "remaining_time": "0:56:46", "throughput": 19924.96, "total_tokens": 177918272}
|
|
{"current_steps": 56540, "total_steps": 78105, "loss": 0.0814, "lr": 1.0740578300342107e-06, "epoch": 3.619486588566673, "percentage": 72.39, "elapsed_time": "2:28:50", "remaining_time": "0:56:46", "throughput": 19925.25, "total_tokens": 177934592}
|
|
{"current_steps": 56545, "total_steps": 78105, "loss": 0.1128, "lr": 1.0735989980317287e-06, "epoch": 3.61980667050765, "percentage": 72.4, "elapsed_time": "2:28:50", "remaining_time": "0:56:45", "throughput": 19925.5, "total_tokens": 177950528}
|
|
{"current_steps": 56550, "total_steps": 78105, "loss": 0.1652, "lr": 1.0731402372562826e-06, "epoch": 3.620126752448627, "percentage": 72.4, "elapsed_time": "2:28:51", "remaining_time": "0:56:44", "throughput": 19925.74, "total_tokens": 177965696}
|
|
{"current_steps": 56555, "total_steps": 78105, "loss": 0.1763, "lr": 1.072681547730781e-06, "epoch": 3.620446834389604, "percentage": 72.41, "elapsed_time": "2:28:52", "remaining_time": "0:56:43", "throughput": 19925.98, "total_tokens": 177981056}
|
|
{"current_steps": 56560, "total_steps": 78105, "loss": 0.1637, "lr": 1.0722229294781284e-06, "epoch": 3.620766916330581, "percentage": 72.42, "elapsed_time": "2:28:52", "remaining_time": "0:56:42", "throughput": 19926.27, "total_tokens": 177997696}
|
|
{"current_steps": 56565, "total_steps": 78105, "loss": 0.1245, "lr": 1.0717643825212253e-06, "epoch": 3.6210869982715574, "percentage": 72.42, "elapsed_time": "2:28:53", "remaining_time": "0:56:41", "throughput": 19926.51, "total_tokens": 178013760}
|
|
{"current_steps": 56570, "total_steps": 78105, "loss": 0.1651, "lr": 1.071305906882969e-06, "epoch": 3.6214070802125344, "percentage": 72.43, "elapsed_time": "2:28:54", "remaining_time": "0:56:41", "throughput": 19926.78, "total_tokens": 178029952}
|
|
{"current_steps": 56575, "total_steps": 78105, "loss": 0.1644, "lr": 1.0708475025862558e-06, "epoch": 3.6217271621535114, "percentage": 72.43, "elapsed_time": "2:28:54", "remaining_time": "0:56:40", "throughput": 19927.09, "total_tokens": 178046976}
|
|
{"current_steps": 56580, "total_steps": 78105, "loss": 0.0867, "lr": 1.070389169653972e-06, "epoch": 3.622047244094488, "percentage": 72.44, "elapsed_time": "2:28:55", "remaining_time": "0:56:39", "throughput": 19927.36, "total_tokens": 178063040}
|
|
{"current_steps": 56585, "total_steps": 78105, "loss": 0.1882, "lr": 1.0699309081090085e-06, "epoch": 3.622367326035465, "percentage": 72.45, "elapsed_time": "2:28:56", "remaining_time": "0:56:38", "throughput": 19927.62, "total_tokens": 178078912}
|
|
{"current_steps": 56590, "total_steps": 78105, "loss": 0.1782, "lr": 1.0694727179742445e-06, "epoch": 3.622687407976442, "percentage": 72.45, "elapsed_time": "2:28:56", "remaining_time": "0:56:37", "throughput": 19927.84, "total_tokens": 178093888}
|
|
{"current_steps": 56595, "total_steps": 78105, "loss": 0.1866, "lr": 1.0690145992725625e-06, "epoch": 3.623007489917419, "percentage": 72.46, "elapsed_time": "2:28:57", "remaining_time": "0:56:36", "throughput": 19928.05, "total_tokens": 178108992}
|
|
{"current_steps": 56600, "total_steps": 78105, "loss": 0.2354, "lr": 1.068556552026838e-06, "epoch": 3.623327571858396, "percentage": 72.47, "elapsed_time": "2:28:58", "remaining_time": "0:56:36", "throughput": 19928.27, "total_tokens": 178124352}
|
|
{"current_steps": 56605, "total_steps": 78105, "loss": 0.1695, "lr": 1.0680985762599418e-06, "epoch": 3.6236476537993725, "percentage": 72.47, "elapsed_time": "2:28:58", "remaining_time": "0:56:35", "throughput": 19928.58, "total_tokens": 178141568}
|
|
{"current_steps": 56610, "total_steps": 78105, "loss": 0.2273, "lr": 1.0676406719947466e-06, "epoch": 3.6239677357403495, "percentage": 72.48, "elapsed_time": "2:28:59", "remaining_time": "0:56:34", "throughput": 19928.9, "total_tokens": 178158720}
|
|
{"current_steps": 56615, "total_steps": 78105, "loss": 0.1764, "lr": 1.067182839254113e-06, "epoch": 3.6242878176813265, "percentage": 72.49, "elapsed_time": "2:29:00", "remaining_time": "0:56:33", "throughput": 19929.2, "total_tokens": 178175424}
|
|
{"current_steps": 56620, "total_steps": 78105, "loss": 0.2011, "lr": 1.0667250780609065e-06, "epoch": 3.6246078996223035, "percentage": 72.49, "elapsed_time": "2:29:01", "remaining_time": "0:56:32", "throughput": 19929.5, "total_tokens": 178192000}
|
|
{"current_steps": 56625, "total_steps": 78105, "loss": 0.2361, "lr": 1.0662673884379837e-06, "epoch": 3.62492798156328, "percentage": 72.5, "elapsed_time": "2:29:01", "remaining_time": "0:56:31", "throughput": 19929.71, "total_tokens": 178206528}
|
|
{"current_steps": 56630, "total_steps": 78105, "loss": 0.2, "lr": 1.0658097704081993e-06, "epoch": 3.625248063504257, "percentage": 72.5, "elapsed_time": "2:29:02", "remaining_time": "0:56:31", "throughput": 19929.97, "total_tokens": 178222528}
|
|
{"current_steps": 56635, "total_steps": 78105, "loss": 0.114, "lr": 1.0653522239944048e-06, "epoch": 3.625568145445234, "percentage": 72.51, "elapsed_time": "2:29:03", "remaining_time": "0:56:30", "throughput": 19930.2, "total_tokens": 178237696}
|
|
{"current_steps": 56640, "total_steps": 78105, "loss": 0.1414, "lr": 1.0648947492194473e-06, "epoch": 3.625888227386211, "percentage": 72.52, "elapsed_time": "2:29:03", "remaining_time": "0:56:29", "throughput": 19930.45, "total_tokens": 178253376}
|
|
{"current_steps": 56645, "total_steps": 78105, "loss": 0.138, "lr": 1.064437346106171e-06, "epoch": 3.626208309327188, "percentage": 72.52, "elapsed_time": "2:29:04", "remaining_time": "0:56:28", "throughput": 19930.92, "total_tokens": 178274368}
|
|
{"current_steps": 56650, "total_steps": 78105, "loss": 0.1887, "lr": 1.063980014677415e-06, "epoch": 3.6265283912681645, "percentage": 72.53, "elapsed_time": "2:29:05", "remaining_time": "0:56:27", "throughput": 19931.15, "total_tokens": 178289664}
|
|
{"current_steps": 56655, "total_steps": 78105, "loss": 0.1833, "lr": 1.0635227549560182e-06, "epoch": 3.6268484732091415, "percentage": 72.54, "elapsed_time": "2:29:05", "remaining_time": "0:56:27", "throughput": 19931.47, "total_tokens": 178306816}
|
|
{"current_steps": 56660, "total_steps": 78105, "loss": 0.1544, "lr": 1.0630655669648123e-06, "epoch": 3.6271685551501185, "percentage": 72.54, "elapsed_time": "2:29:06", "remaining_time": "0:56:26", "throughput": 19931.71, "total_tokens": 178322688}
|
|
{"current_steps": 56665, "total_steps": 78105, "loss": 0.2436, "lr": 1.0626084507266276e-06, "epoch": 3.6274886370910955, "percentage": 72.55, "elapsed_time": "2:29:07", "remaining_time": "0:56:25", "throughput": 19931.98, "total_tokens": 178338752}
|
|
{"current_steps": 56670, "total_steps": 78105, "loss": 0.1756, "lr": 1.0621514062642898e-06, "epoch": 3.627808719032072, "percentage": 72.56, "elapsed_time": "2:29:08", "remaining_time": "0:56:24", "throughput": 19932.24, "total_tokens": 178354496}
|
|
{"current_steps": 56675, "total_steps": 78105, "loss": 0.1773, "lr": 1.061694433600621e-06, "epoch": 3.628128800973049, "percentage": 72.56, "elapsed_time": "2:29:08", "remaining_time": "0:56:23", "throughput": 19932.46, "total_tokens": 178369984}
|
|
{"current_steps": 56680, "total_steps": 78105, "loss": 0.197, "lr": 1.0612375327584406e-06, "epoch": 3.628448882914026, "percentage": 72.57, "elapsed_time": "2:29:09", "remaining_time": "0:56:22", "throughput": 19932.8, "total_tokens": 178387392}
|
|
{"current_steps": 56685, "total_steps": 78105, "loss": 0.1712, "lr": 1.0607807037605637e-06, "epoch": 3.628768964855003, "percentage": 72.58, "elapsed_time": "2:29:10", "remaining_time": "0:56:22", "throughput": 19933.04, "total_tokens": 178403072}
|
|
{"current_steps": 56690, "total_steps": 78105, "loss": 0.1992, "lr": 1.0603239466298004e-06, "epoch": 3.62908904679598, "percentage": 72.58, "elapsed_time": "2:29:10", "remaining_time": "0:56:21", "throughput": 19933.27, "total_tokens": 178418240}
|
|
{"current_steps": 56695, "total_steps": 78105, "loss": 0.1193, "lr": 1.0598672613889623e-06, "epoch": 3.6294091287369565, "percentage": 72.59, "elapsed_time": "2:29:11", "remaining_time": "0:56:20", "throughput": 19933.48, "total_tokens": 178433280}
|
|
{"current_steps": 56700, "total_steps": 78105, "loss": 0.1284, "lr": 1.05941064806085e-06, "epoch": 3.6297292106779335, "percentage": 72.59, "elapsed_time": "2:29:12", "remaining_time": "0:56:19", "throughput": 19933.7, "total_tokens": 178448512}
|
|
{"current_steps": 56705, "total_steps": 78105, "loss": 0.2032, "lr": 1.0589541066682674e-06, "epoch": 3.6300492926189105, "percentage": 72.6, "elapsed_time": "2:29:12", "remaining_time": "0:56:18", "throughput": 19933.96, "total_tokens": 178464320}
|
|
{"current_steps": 56710, "total_steps": 78105, "loss": 0.1425, "lr": 1.05849763723401e-06, "epoch": 3.630369374559887, "percentage": 72.61, "elapsed_time": "2:29:13", "remaining_time": "0:56:17", "throughput": 19934.18, "total_tokens": 178479488}
|
|
{"current_steps": 56715, "total_steps": 78105, "loss": 0.1901, "lr": 1.0580412397808715e-06, "epoch": 3.630689456500864, "percentage": 72.61, "elapsed_time": "2:29:14", "remaining_time": "0:56:17", "throughput": 19934.35, "total_tokens": 178493632}
|
|
{"current_steps": 56720, "total_steps": 78105, "loss": 0.176, "lr": 1.0575849143316445e-06, "epoch": 3.631009538441841, "percentage": 72.62, "elapsed_time": "2:29:14", "remaining_time": "0:56:16", "throughput": 19934.55, "total_tokens": 178508608}
|
|
{"current_steps": 56725, "total_steps": 78105, "loss": 0.1989, "lr": 1.0571286609091116e-06, "epoch": 3.631329620382818, "percentage": 72.63, "elapsed_time": "2:29:15", "remaining_time": "0:56:15", "throughput": 19934.76, "total_tokens": 178523392}
|
|
{"current_steps": 56730, "total_steps": 78105, "loss": 0.2151, "lr": 1.0566724795360594e-06, "epoch": 3.631649702323795, "percentage": 72.63, "elapsed_time": "2:29:16", "remaining_time": "0:56:14", "throughput": 19935.05, "total_tokens": 178540096}
|
|
{"current_steps": 56735, "total_steps": 78105, "loss": 0.203, "lr": 1.056216370235264e-06, "epoch": 3.631969784264772, "percentage": 72.64, "elapsed_time": "2:29:16", "remaining_time": "0:56:13", "throughput": 19935.3, "total_tokens": 178555904}
|
|
{"current_steps": 56740, "total_steps": 78105, "loss": 0.2156, "lr": 1.0557603330295035e-06, "epoch": 3.6322898662057486, "percentage": 72.65, "elapsed_time": "2:29:17", "remaining_time": "0:56:12", "throughput": 19935.53, "total_tokens": 178571200}
|
|
{"current_steps": 56745, "total_steps": 78105, "loss": 0.2445, "lr": 1.0553043679415493e-06, "epoch": 3.6326099481467256, "percentage": 72.65, "elapsed_time": "2:29:18", "remaining_time": "0:56:12", "throughput": 19935.91, "total_tokens": 178589696}
|
|
{"current_steps": 56750, "total_steps": 78105, "loss": 0.1446, "lr": 1.05484847499417e-06, "epoch": 3.6329300300877025, "percentage": 72.66, "elapsed_time": "2:29:18", "remaining_time": "0:56:11", "throughput": 19936.17, "total_tokens": 178605376}
|
|
{"current_steps": 56755, "total_steps": 78105, "loss": 0.1405, "lr": 1.0543926542101299e-06, "epoch": 3.633250112028679, "percentage": 72.67, "elapsed_time": "2:29:19", "remaining_time": "0:56:10", "throughput": 19936.42, "total_tokens": 178621440}
|
|
{"current_steps": 56760, "total_steps": 78105, "loss": 0.1387, "lr": 1.0539369056121909e-06, "epoch": 3.633570193969656, "percentage": 72.67, "elapsed_time": "2:29:20", "remaining_time": "0:56:09", "throughput": 19936.66, "total_tokens": 178636864}
|
|
{"current_steps": 56765, "total_steps": 78105, "loss": 0.1275, "lr": 1.0534812292231097e-06, "epoch": 3.633890275910633, "percentage": 72.68, "elapsed_time": "2:29:20", "remaining_time": "0:56:08", "throughput": 19936.86, "total_tokens": 178651328}
|
|
{"current_steps": 56770, "total_steps": 78105, "loss": 0.1511, "lr": 1.053025625065642e-06, "epoch": 3.63421035785161, "percentage": 72.68, "elapsed_time": "2:29:21", "remaining_time": "0:56:07", "throughput": 19937.11, "total_tokens": 178666624}
|
|
{"current_steps": 56775, "total_steps": 78105, "loss": 0.1657, "lr": 1.052570093162538e-06, "epoch": 3.634530439792587, "percentage": 72.69, "elapsed_time": "2:29:22", "remaining_time": "0:56:07", "throughput": 19937.32, "total_tokens": 178681536}
|
|
{"current_steps": 56780, "total_steps": 78105, "loss": 0.1717, "lr": 1.0521146335365438e-06, "epoch": 3.634850521733564, "percentage": 72.7, "elapsed_time": "2:29:22", "remaining_time": "0:56:06", "throughput": 19937.53, "total_tokens": 178696512}
|
|
{"current_steps": 56785, "total_steps": 78105, "loss": 0.1864, "lr": 1.051659246210403e-06, "epoch": 3.6351706036745406, "percentage": 72.7, "elapsed_time": "2:29:23", "remaining_time": "0:56:05", "throughput": 19937.75, "total_tokens": 178711680}
|
|
{"current_steps": 56790, "total_steps": 78105, "loss": 0.1241, "lr": 1.0512039312068556e-06, "epoch": 3.6354906856155176, "percentage": 72.71, "elapsed_time": "2:29:24", "remaining_time": "0:56:04", "throughput": 19938.06, "total_tokens": 178728832}
|
|
{"current_steps": 56795, "total_steps": 78105, "loss": 0.2808, "lr": 1.0507486885486374e-06, "epoch": 3.6358107675564946, "percentage": 72.72, "elapsed_time": "2:29:24", "remaining_time": "0:56:03", "throughput": 19938.29, "total_tokens": 178743872}
|
|
{"current_steps": 56800, "total_steps": 78105, "loss": 0.1577, "lr": 1.0502935182584794e-06, "epoch": 3.636130849497471, "percentage": 72.72, "elapsed_time": "2:29:25", "remaining_time": "0:56:02", "throughput": 19938.57, "total_tokens": 178760192}
|
|
{"current_steps": 56805, "total_steps": 78105, "loss": 0.0967, "lr": 1.0498384203591141e-06, "epoch": 3.636450931438448, "percentage": 72.73, "elapsed_time": "2:29:26", "remaining_time": "0:56:02", "throughput": 19938.77, "total_tokens": 178774784}
|
|
{"current_steps": 56810, "total_steps": 78105, "loss": 0.1555, "lr": 1.0493833948732626e-06, "epoch": 3.636771013379425, "percentage": 72.74, "elapsed_time": "2:29:26", "remaining_time": "0:56:01", "throughput": 19939.0, "total_tokens": 178790208}
|
|
{"current_steps": 56815, "total_steps": 78105, "loss": 0.1846, "lr": 1.0489284418236504e-06, "epoch": 3.637091095320402, "percentage": 72.74, "elapsed_time": "2:29:27", "remaining_time": "0:56:00", "throughput": 19939.26, "total_tokens": 178806016}
|
|
{"current_steps": 56820, "total_steps": 78105, "loss": 0.1632, "lr": 1.0484735612329914e-06, "epoch": 3.637411177261379, "percentage": 72.75, "elapsed_time": "2:29:28", "remaining_time": "0:55:59", "throughput": 19939.52, "total_tokens": 178821760}
|
|
{"current_steps": 56825, "total_steps": 78105, "loss": 0.1746, "lr": 1.0480187531240028e-06, "epoch": 3.637731259202356, "percentage": 72.75, "elapsed_time": "2:29:28", "remaining_time": "0:55:58", "throughput": 19939.72, "total_tokens": 178836160}
|
|
{"current_steps": 56830, "total_steps": 78105, "loss": 0.2226, "lr": 1.0475640175193946e-06, "epoch": 3.6380513411433326, "percentage": 72.76, "elapsed_time": "2:29:29", "remaining_time": "0:55:57", "throughput": 19939.93, "total_tokens": 178850880}
|
|
{"current_steps": 56835, "total_steps": 78105, "loss": 0.1764, "lr": 1.047109354441873e-06, "epoch": 3.6383714230843096, "percentage": 72.77, "elapsed_time": "2:29:30", "remaining_time": "0:55:57", "throughput": 19940.18, "total_tokens": 178866816}
|
|
{"current_steps": 56840, "total_steps": 78105, "loss": 0.1911, "lr": 1.0466547639141444e-06, "epoch": 3.6386915050252866, "percentage": 72.77, "elapsed_time": "2:29:30", "remaining_time": "0:55:56", "throughput": 19940.44, "total_tokens": 178882752}
|
|
{"current_steps": 56845, "total_steps": 78105, "loss": 0.1797, "lr": 1.046200245958905e-06, "epoch": 3.639011586966263, "percentage": 72.78, "elapsed_time": "2:29:31", "remaining_time": "0:55:55", "throughput": 19940.66, "total_tokens": 178897664}
|
|
{"current_steps": 56850, "total_steps": 78105, "loss": 0.1386, "lr": 1.0457458005988533e-06, "epoch": 3.63933166890724, "percentage": 72.79, "elapsed_time": "2:29:32", "remaining_time": "0:55:54", "throughput": 19940.99, "total_tokens": 178915072}
|
|
{"current_steps": 56855, "total_steps": 78105, "loss": 0.1424, "lr": 1.0452914278566816e-06, "epoch": 3.639651750848217, "percentage": 72.79, "elapsed_time": "2:29:32", "remaining_time": "0:55:53", "throughput": 19941.22, "total_tokens": 178930368}
|
|
{"current_steps": 56860, "total_steps": 78105, "loss": 0.1593, "lr": 1.0448371277550787e-06, "epoch": 3.639971832789194, "percentage": 72.8, "elapsed_time": "2:29:33", "remaining_time": "0:55:52", "throughput": 19941.46, "total_tokens": 178945984}
|
|
{"current_steps": 56865, "total_steps": 78105, "loss": 0.1889, "lr": 1.0443829003167299e-06, "epoch": 3.640291914730171, "percentage": 72.81, "elapsed_time": "2:29:34", "remaining_time": "0:55:52", "throughput": 19941.89, "total_tokens": 178965632}
|
|
{"current_steps": 56870, "total_steps": 78105, "loss": 0.199, "lr": 1.043928745564317e-06, "epoch": 3.6406119966711477, "percentage": 72.81, "elapsed_time": "2:29:35", "remaining_time": "0:55:51", "throughput": 19942.11, "total_tokens": 178980864}
|
|
{"current_steps": 56875, "total_steps": 78105, "loss": 0.1782, "lr": 1.0434746635205182e-06, "epoch": 3.6409320786121246, "percentage": 72.82, "elapsed_time": "2:29:35", "remaining_time": "0:55:50", "throughput": 19942.37, "total_tokens": 178996864}
|
|
{"current_steps": 56880, "total_steps": 78105, "loss": 0.0972, "lr": 1.043020654208008e-06, "epoch": 3.6412521605531016, "percentage": 72.83, "elapsed_time": "2:29:36", "remaining_time": "0:55:49", "throughput": 19942.62, "total_tokens": 179012288}
|
|
{"current_steps": 56885, "total_steps": 78105, "loss": 0.1872, "lr": 1.042566717649456e-06, "epoch": 3.6415722424940786, "percentage": 72.83, "elapsed_time": "2:29:37", "remaining_time": "0:55:48", "throughput": 19942.88, "total_tokens": 179028224}
|
|
{"current_steps": 56890, "total_steps": 78105, "loss": 0.1648, "lr": 1.0421128538675319e-06, "epoch": 3.641892324435055, "percentage": 72.84, "elapsed_time": "2:29:37", "remaining_time": "0:55:47", "throughput": 19943.17, "total_tokens": 179044992}
|
|
{"current_steps": 56895, "total_steps": 78105, "loss": 0.1396, "lr": 1.0416590628848977e-06, "epoch": 3.642212406376032, "percentage": 72.84, "elapsed_time": "2:29:38", "remaining_time": "0:55:47", "throughput": 19943.38, "total_tokens": 179060032}
|
|
{"current_steps": 56900, "total_steps": 78105, "loss": 0.2087, "lr": 1.041205344724214e-06, "epoch": 3.642532488317009, "percentage": 72.85, "elapsed_time": "2:29:39", "remaining_time": "0:55:46", "throughput": 19943.65, "total_tokens": 179076160}
|
|
{"current_steps": 56905, "total_steps": 78105, "loss": 0.2219, "lr": 1.0407516994081365e-06, "epoch": 3.642852570257986, "percentage": 72.86, "elapsed_time": "2:29:39", "remaining_time": "0:55:45", "throughput": 19943.9, "total_tokens": 179091584}
|
|
{"current_steps": 56910, "total_steps": 78105, "loss": 0.1018, "lr": 1.0402981269593184e-06, "epoch": 3.643172652198963, "percentage": 72.86, "elapsed_time": "2:29:40", "remaining_time": "0:55:44", "throughput": 19944.17, "total_tokens": 179107520}
|
|
{"current_steps": 56915, "total_steps": 78105, "loss": 0.1515, "lr": 1.0398446274004083e-06, "epoch": 3.6434927341399397, "percentage": 72.87, "elapsed_time": "2:29:41", "remaining_time": "0:55:43", "throughput": 19944.42, "total_tokens": 179123712}
|
|
{"current_steps": 56920, "total_steps": 78105, "loss": 0.1506, "lr": 1.0393912007540514e-06, "epoch": 3.6438128160809167, "percentage": 72.88, "elapsed_time": "2:29:41", "remaining_time": "0:55:42", "throughput": 19944.63, "total_tokens": 179138688}
|
|
{"current_steps": 56925, "total_steps": 78105, "loss": 0.1346, "lr": 1.0389378470428913e-06, "epoch": 3.6441328980218937, "percentage": 72.88, "elapsed_time": "2:29:42", "remaining_time": "0:55:42", "throughput": 19944.86, "total_tokens": 179154368}
|
|
{"current_steps": 56930, "total_steps": 78105, "loss": 0.1626, "lr": 1.0384845662895629e-06, "epoch": 3.6444529799628707, "percentage": 72.89, "elapsed_time": "2:29:43", "remaining_time": "0:55:41", "throughput": 19945.09, "total_tokens": 179169728}
|
|
{"current_steps": 56935, "total_steps": 78105, "loss": 0.1644, "lr": 1.0380313585167037e-06, "epoch": 3.644773061903847, "percentage": 72.9, "elapsed_time": "2:29:43", "remaining_time": "0:55:40", "throughput": 19945.34, "total_tokens": 179185536}
|
|
{"current_steps": 56940, "total_steps": 78105, "loss": 0.184, "lr": 1.0375782237469432e-06, "epoch": 3.645093143844824, "percentage": 72.9, "elapsed_time": "2:29:44", "remaining_time": "0:55:39", "throughput": 19945.63, "total_tokens": 179202048}
|
|
{"current_steps": 56945, "total_steps": 78105, "loss": 0.1639, "lr": 1.0371251620029076e-06, "epoch": 3.645413225785801, "percentage": 72.91, "elapsed_time": "2:29:45", "remaining_time": "0:55:38", "throughput": 19945.87, "total_tokens": 179217920}
|
|
{"current_steps": 56950, "total_steps": 78105, "loss": 0.2135, "lr": 1.0366721733072239e-06, "epoch": 3.645733307726778, "percentage": 72.91, "elapsed_time": "2:29:45", "remaining_time": "0:55:37", "throughput": 19946.19, "total_tokens": 179234752}
|
|
{"current_steps": 56955, "total_steps": 78105, "loss": 0.1203, "lr": 1.0362192576825072e-06, "epoch": 3.646053389667755, "percentage": 72.92, "elapsed_time": "2:29:46", "remaining_time": "0:55:37", "throughput": 19946.48, "total_tokens": 179251264}
|
|
{"current_steps": 56960, "total_steps": 78105, "loss": 0.1495, "lr": 1.0357664151513785e-06, "epoch": 3.6463734716087317, "percentage": 72.93, "elapsed_time": "2:29:47", "remaining_time": "0:55:36", "throughput": 19946.76, "total_tokens": 179267712}
|
|
{"current_steps": 56965, "total_steps": 78105, "loss": 0.1079, "lr": 1.035313645736446e-06, "epoch": 3.6466935535497087, "percentage": 72.93, "elapsed_time": "2:29:47", "remaining_time": "0:55:35", "throughput": 19947.0, "total_tokens": 179283072}
|
|
{"current_steps": 56970, "total_steps": 78105, "loss": 0.1446, "lr": 1.0348609494603221e-06, "epoch": 3.6470136354906857, "percentage": 72.94, "elapsed_time": "2:29:48", "remaining_time": "0:55:34", "throughput": 19947.23, "total_tokens": 179298560}
|
|
{"current_steps": 56975, "total_steps": 78105, "loss": 0.203, "lr": 1.0344083263456103e-06, "epoch": 3.6473337174316622, "percentage": 72.95, "elapsed_time": "2:29:49", "remaining_time": "0:55:33", "throughput": 19947.43, "total_tokens": 179313152}
|
|
{"current_steps": 56980, "total_steps": 78105, "loss": 0.1554, "lr": 1.033955776414913e-06, "epoch": 3.6476537993726392, "percentage": 72.95, "elapsed_time": "2:29:49", "remaining_time": "0:55:32", "throughput": 19947.7, "total_tokens": 179329024}
|
|
{"current_steps": 56985, "total_steps": 78105, "loss": 0.1955, "lr": 1.0335032996908275e-06, "epoch": 3.647973881313616, "percentage": 72.96, "elapsed_time": "2:29:50", "remaining_time": "0:55:32", "throughput": 19947.95, "total_tokens": 179344640}
|
|
{"current_steps": 56990, "total_steps": 78105, "loss": 0.1811, "lr": 1.033050896195949e-06, "epoch": 3.648293963254593, "percentage": 72.97, "elapsed_time": "2:29:51", "remaining_time": "0:55:31", "throughput": 19948.24, "total_tokens": 179361216}
|
|
{"current_steps": 56995, "total_steps": 78105, "loss": 0.1391, "lr": 1.0325985659528664e-06, "epoch": 3.64861404519557, "percentage": 72.97, "elapsed_time": "2:29:51", "remaining_time": "0:55:30", "throughput": 19948.46, "total_tokens": 179376128}
|
|
{"current_steps": 57000, "total_steps": 78105, "loss": 0.1746, "lr": 1.0321463089841692e-06, "epoch": 3.648934127136547, "percentage": 72.98, "elapsed_time": "2:29:52", "remaining_time": "0:55:29", "throughput": 19948.72, "total_tokens": 179392064}
|
|
{"current_steps": 57005, "total_steps": 78105, "loss": 0.1963, "lr": 1.0316941253124393e-06, "epoch": 3.6492542090775237, "percentage": 72.99, "elapsed_time": "2:29:53", "remaining_time": "0:55:28", "throughput": 19948.96, "total_tokens": 179407616}
|
|
{"current_steps": 57010, "total_steps": 78105, "loss": 0.1602, "lr": 1.0312420149602568e-06, "epoch": 3.6495742910185007, "percentage": 72.99, "elapsed_time": "2:29:54", "remaining_time": "0:55:27", "throughput": 19949.23, "total_tokens": 179423872}
|
|
{"current_steps": 57015, "total_steps": 78105, "loss": 0.1398, "lr": 1.0307899779501976e-06, "epoch": 3.6498943729594777, "percentage": 73.0, "elapsed_time": "2:29:54", "remaining_time": "0:55:27", "throughput": 19949.43, "total_tokens": 179438656}
|
|
{"current_steps": 57020, "total_steps": 78105, "loss": 0.1756, "lr": 1.030338014304834e-06, "epoch": 3.6502144549004543, "percentage": 73.0, "elapsed_time": "2:29:55", "remaining_time": "0:55:26", "throughput": 19949.64, "total_tokens": 179453184}
|
|
{"current_steps": 57025, "total_steps": 78105, "loss": 0.1713, "lr": 1.0298861240467348e-06, "epoch": 3.6505345368414313, "percentage": 73.01, "elapsed_time": "2:29:55", "remaining_time": "0:55:25", "throughput": 19949.9, "total_tokens": 179469120}
|
|
{"current_steps": 57030, "total_steps": 78105, "loss": 0.1347, "lr": 1.0294343071984641e-06, "epoch": 3.6508546187824082, "percentage": 73.02, "elapsed_time": "2:29:56", "remaining_time": "0:55:24", "throughput": 19950.21, "total_tokens": 179485952}
|
|
{"current_steps": 57035, "total_steps": 78105, "loss": 0.1821, "lr": 1.028982563782586e-06, "epoch": 3.6511747007233852, "percentage": 73.02, "elapsed_time": "2:29:57", "remaining_time": "0:55:23", "throughput": 19950.45, "total_tokens": 179501696}
|
|
{"current_steps": 57040, "total_steps": 78105, "loss": 0.1825, "lr": 1.0285308938216545e-06, "epoch": 3.6514947826643622, "percentage": 73.03, "elapsed_time": "2:29:58", "remaining_time": "0:55:23", "throughput": 19950.7, "total_tokens": 179517568}
|
|
{"current_steps": 57045, "total_steps": 78105, "loss": 0.2007, "lr": 1.0280792973382276e-06, "epoch": 3.651814864605339, "percentage": 73.04, "elapsed_time": "2:29:58", "remaining_time": "0:55:22", "throughput": 19951.02, "total_tokens": 179534912}
|
|
{"current_steps": 57050, "total_steps": 78105, "loss": 0.1787, "lr": 1.0276277743548518e-06, "epoch": 3.6521349465463158, "percentage": 73.04, "elapsed_time": "2:29:59", "remaining_time": "0:55:21", "throughput": 19951.27, "total_tokens": 179550656}
|
|
{"current_steps": 57055, "total_steps": 78105, "loss": 0.2023, "lr": 1.0271763248940763e-06, "epoch": 3.6524550284872928, "percentage": 73.05, "elapsed_time": "2:30:00", "remaining_time": "0:55:20", "throughput": 19951.54, "total_tokens": 179566912}
|
|
{"current_steps": 57060, "total_steps": 78105, "loss": 0.2051, "lr": 1.0267249489784437e-06, "epoch": 3.6527751104282697, "percentage": 73.06, "elapsed_time": "2:30:00", "remaining_time": "0:55:19", "throughput": 19951.81, "total_tokens": 179583040}
|
|
{"current_steps": 57065, "total_steps": 78105, "loss": 0.2087, "lr": 1.0262736466304923e-06, "epoch": 3.6530951923692463, "percentage": 73.06, "elapsed_time": "2:30:01", "remaining_time": "0:55:18", "throughput": 19952.09, "total_tokens": 179599424}
|
|
{"current_steps": 57070, "total_steps": 78105, "loss": 0.1832, "lr": 1.0258224178727602e-06, "epoch": 3.6534152743102233, "percentage": 73.07, "elapsed_time": "2:30:02", "remaining_time": "0:55:18", "throughput": 19952.28, "total_tokens": 179614272}
|
|
{"current_steps": 57075, "total_steps": 78105, "loss": 0.2102, "lr": 1.0253712627277761e-06, "epoch": 3.6537353562512003, "percentage": 73.07, "elapsed_time": "2:30:02", "remaining_time": "0:55:17", "throughput": 19952.53, "total_tokens": 179630144}
|
|
{"current_steps": 57080, "total_steps": 78105, "loss": 0.1381, "lr": 1.0249201812180711e-06, "epoch": 3.6540554381921773, "percentage": 73.08, "elapsed_time": "2:30:03", "remaining_time": "0:55:16", "throughput": 19952.86, "total_tokens": 179647744}
|
|
{"current_steps": 57085, "total_steps": 78105, "loss": 0.2123, "lr": 1.0244691733661687e-06, "epoch": 3.6543755201331543, "percentage": 73.09, "elapsed_time": "2:30:04", "remaining_time": "0:55:15", "throughput": 19953.11, "total_tokens": 179663744}
|
|
{"current_steps": 57090, "total_steps": 78105, "loss": 0.1327, "lr": 1.0240182391945902e-06, "epoch": 3.6546956020741312, "percentage": 73.09, "elapsed_time": "2:30:04", "remaining_time": "0:55:14", "throughput": 19953.35, "total_tokens": 179679360}
|
|
{"current_steps": 57095, "total_steps": 78105, "loss": 0.2272, "lr": 1.0235673787258527e-06, "epoch": 3.655015684015108, "percentage": 73.1, "elapsed_time": "2:30:05", "remaining_time": "0:55:13", "throughput": 19953.56, "total_tokens": 179694272}
|
|
{"current_steps": 57100, "total_steps": 78105, "loss": 0.2296, "lr": 1.0231165919824699e-06, "epoch": 3.655335765956085, "percentage": 73.11, "elapsed_time": "2:30:06", "remaining_time": "0:55:13", "throughput": 19953.79, "total_tokens": 179709632}
|
|
{"current_steps": 57105, "total_steps": 78105, "loss": 0.2083, "lr": 1.0226658789869514e-06, "epoch": 3.6556558478970618, "percentage": 73.11, "elapsed_time": "2:30:06", "remaining_time": "0:55:12", "throughput": 19954.0, "total_tokens": 179724736}
|
|
{"current_steps": 57110, "total_steps": 78105, "loss": 0.1711, "lr": 1.0222152397618037e-06, "epoch": 3.6559759298380383, "percentage": 73.12, "elapsed_time": "2:30:07", "remaining_time": "0:55:11", "throughput": 19954.3, "total_tokens": 179741440}
|
|
{"current_steps": 57115, "total_steps": 78105, "loss": 0.1709, "lr": 1.021764674329529e-06, "epoch": 3.6562960117790153, "percentage": 73.13, "elapsed_time": "2:30:08", "remaining_time": "0:55:10", "throughput": 19954.54, "total_tokens": 179756992}
|
|
{"current_steps": 57120, "total_steps": 78105, "loss": 0.1895, "lr": 1.021314182712627e-06, "epoch": 3.6566160937199923, "percentage": 73.13, "elapsed_time": "2:30:08", "remaining_time": "0:55:09", "throughput": 19954.76, "total_tokens": 179771968}
|
|
{"current_steps": 57125, "total_steps": 78105, "loss": 0.1812, "lr": 1.0208637649335928e-06, "epoch": 3.6569361756609693, "percentage": 73.14, "elapsed_time": "2:30:09", "remaining_time": "0:55:08", "throughput": 19955.02, "total_tokens": 179788096}
|
|
{"current_steps": 57130, "total_steps": 78105, "loss": 0.1849, "lr": 1.0204134210149178e-06, "epoch": 3.6572562576019463, "percentage": 73.15, "elapsed_time": "2:30:10", "remaining_time": "0:55:08", "throughput": 19955.32, "total_tokens": 179804864}
|
|
{"current_steps": 57135, "total_steps": 78105, "loss": 0.1623, "lr": 1.0199631509790893e-06, "epoch": 3.657576339542923, "percentage": 73.15, "elapsed_time": "2:30:11", "remaining_time": "0:55:07", "throughput": 19955.56, "total_tokens": 179820544}
|
|
{"current_steps": 57140, "total_steps": 78105, "loss": 0.119, "lr": 1.019512954848592e-06, "epoch": 3.6578964214839, "percentage": 73.16, "elapsed_time": "2:30:11", "remaining_time": "0:55:06", "throughput": 19955.82, "total_tokens": 179836544}
|
|
{"current_steps": 57145, "total_steps": 78105, "loss": 0.1459, "lr": 1.0190628326459062e-06, "epoch": 3.658216503424877, "percentage": 73.16, "elapsed_time": "2:30:12", "remaining_time": "0:55:05", "throughput": 19956.02, "total_tokens": 179851648}
|
|
{"current_steps": 57150, "total_steps": 78105, "loss": 0.1805, "lr": 1.0186127843935079e-06, "epoch": 3.658536585365854, "percentage": 73.17, "elapsed_time": "2:30:13", "remaining_time": "0:55:04", "throughput": 19956.26, "total_tokens": 179867072}
|
|
{"current_steps": 57155, "total_steps": 78105, "loss": 0.1724, "lr": 1.0181628101138724e-06, "epoch": 3.6588566673068303, "percentage": 73.18, "elapsed_time": "2:30:13", "remaining_time": "0:55:03", "throughput": 19956.51, "total_tokens": 179883008}
|
|
{"current_steps": 57160, "total_steps": 78105, "loss": 0.1869, "lr": 1.0177129098294658e-06, "epoch": 3.6591767492478073, "percentage": 73.18, "elapsed_time": "2:30:14", "remaining_time": "0:55:03", "throughput": 19956.77, "total_tokens": 179898560}
|
|
{"current_steps": 57165, "total_steps": 78105, "loss": 0.1669, "lr": 1.0172630835627564e-06, "epoch": 3.6594968311887843, "percentage": 73.19, "elapsed_time": "2:30:15", "remaining_time": "0:55:02", "throughput": 19957.01, "total_tokens": 179913856}
|
|
{"current_steps": 57170, "total_steps": 78105, "loss": 0.1365, "lr": 1.0168133313362053e-06, "epoch": 3.6598169131297613, "percentage": 73.2, "elapsed_time": "2:30:15", "remaining_time": "0:55:01", "throughput": 19957.33, "total_tokens": 179931008}
|
|
{"current_steps": 57175, "total_steps": 78105, "loss": 0.1969, "lr": 1.01636365317227e-06, "epoch": 3.6601369950707383, "percentage": 73.2, "elapsed_time": "2:30:16", "remaining_time": "0:55:00", "throughput": 19957.56, "total_tokens": 179946240}
|
|
{"current_steps": 57180, "total_steps": 78105, "loss": 0.1953, "lr": 1.0159140490934077e-06, "epoch": 3.660457077011715, "percentage": 73.21, "elapsed_time": "2:30:17", "remaining_time": "0:54:59", "throughput": 19957.77, "total_tokens": 179961728}
|
|
{"current_steps": 57185, "total_steps": 78105, "loss": 0.1594, "lr": 1.0154645191220655e-06, "epoch": 3.660777158952692, "percentage": 73.22, "elapsed_time": "2:30:17", "remaining_time": "0:54:58", "throughput": 19957.99, "total_tokens": 179976576}
|
|
{"current_steps": 57190, "total_steps": 78105, "loss": 0.1153, "lr": 1.0150150632806944e-06, "epoch": 3.661097240893669, "percentage": 73.22, "elapsed_time": "2:30:18", "remaining_time": "0:54:58", "throughput": 19958.24, "total_tokens": 179992384}
|
|
{"current_steps": 57195, "total_steps": 78105, "loss": 0.144, "lr": 1.0145656815917344e-06, "epoch": 3.661417322834646, "percentage": 73.23, "elapsed_time": "2:30:19", "remaining_time": "0:54:57", "throughput": 19958.56, "total_tokens": 180009600}
|
|
{"current_steps": 57200, "total_steps": 78105, "loss": 0.1512, "lr": 1.0141163740776277e-06, "epoch": 3.6617374047756224, "percentage": 73.23, "elapsed_time": "2:30:19", "remaining_time": "0:54:56", "throughput": 19958.8, "total_tokens": 180025152}
|
|
{"current_steps": 57205, "total_steps": 78105, "loss": 0.1425, "lr": 1.0136671407608096e-06, "epoch": 3.6620574867165994, "percentage": 73.24, "elapsed_time": "2:30:20", "remaining_time": "0:54:55", "throughput": 19959.02, "total_tokens": 180040320}
|
|
{"current_steps": 57210, "total_steps": 78105, "loss": 0.117, "lr": 1.0132179816637126e-06, "epoch": 3.6623775686575764, "percentage": 73.25, "elapsed_time": "2:30:21", "remaining_time": "0:54:54", "throughput": 19959.36, "total_tokens": 180058112}
|
|
{"current_steps": 57215, "total_steps": 78105, "loss": 0.1732, "lr": 1.0127688968087647e-06, "epoch": 3.6626976505985533, "percentage": 73.25, "elapsed_time": "2:30:21", "remaining_time": "0:54:54", "throughput": 19959.54, "total_tokens": 180072640}
|
|
{"current_steps": 57220, "total_steps": 78105, "loss": 0.2144, "lr": 1.012319886218392e-06, "epoch": 3.6630177325395303, "percentage": 73.26, "elapsed_time": "2:30:22", "remaining_time": "0:54:53", "throughput": 19959.74, "total_tokens": 180087424}
|
|
{"current_steps": 57225, "total_steps": 78105, "loss": 0.1504, "lr": 1.0118709499150139e-06, "epoch": 3.663337814480507, "percentage": 73.27, "elapsed_time": "2:30:23", "remaining_time": "0:54:52", "throughput": 19960.12, "total_tokens": 180106176}
|
|
{"current_steps": 57230, "total_steps": 78105, "loss": 0.1149, "lr": 1.0114220879210513e-06, "epoch": 3.663657896421484, "percentage": 73.27, "elapsed_time": "2:30:23", "remaining_time": "0:54:51", "throughput": 19960.33, "total_tokens": 180121408}
|
|
{"current_steps": 57235, "total_steps": 78105, "loss": 0.1256, "lr": 1.0109733002589137e-06, "epoch": 3.663977978362461, "percentage": 73.28, "elapsed_time": "2:30:24", "remaining_time": "0:54:50", "throughput": 19960.56, "total_tokens": 180137024}
|
|
{"current_steps": 57240, "total_steps": 78105, "loss": 0.1808, "lr": 1.0105245869510146e-06, "epoch": 3.6642980603034374, "percentage": 73.29, "elapsed_time": "2:30:25", "remaining_time": "0:54:49", "throughput": 19960.81, "total_tokens": 180153152}
|
|
{"current_steps": 57245, "total_steps": 78105, "loss": 0.1366, "lr": 1.0100759480197592e-06, "epoch": 3.6646181422444144, "percentage": 73.29, "elapsed_time": "2:30:26", "remaining_time": "0:54:49", "throughput": 19961.02, "total_tokens": 180168320}
|
|
{"current_steps": 57250, "total_steps": 78105, "loss": 0.1463, "lr": 1.0096273834875502e-06, "epoch": 3.6649382241853914, "percentage": 73.3, "elapsed_time": "2:30:26", "remaining_time": "0:54:48", "throughput": 19961.21, "total_tokens": 180182784}
|
|
{"current_steps": 57255, "total_steps": 78105, "loss": 0.2409, "lr": 1.0091788933767865e-06, "epoch": 3.6652583061263684, "percentage": 73.31, "elapsed_time": "2:30:27", "remaining_time": "0:54:47", "throughput": 19961.48, "total_tokens": 180198848}
|
|
{"current_steps": 57260, "total_steps": 78105, "loss": 0.1213, "lr": 1.0087304777098628e-06, "epoch": 3.6655783880673454, "percentage": 73.31, "elapsed_time": "2:30:27", "remaining_time": "0:54:46", "throughput": 19961.68, "total_tokens": 180213568}
|
|
{"current_steps": 57265, "total_steps": 78105, "loss": 0.1878, "lr": 1.008282136509173e-06, "epoch": 3.6658984700083224, "percentage": 73.32, "elapsed_time": "2:30:28", "remaining_time": "0:54:45", "throughput": 19961.95, "total_tokens": 180229888}
|
|
{"current_steps": 57270, "total_steps": 78105, "loss": 0.1587, "lr": 1.0078338697971013e-06, "epoch": 3.666218551949299, "percentage": 73.32, "elapsed_time": "2:30:29", "remaining_time": "0:54:44", "throughput": 19962.16, "total_tokens": 180244864}
|
|
{"current_steps": 57275, "total_steps": 78105, "loss": 0.1306, "lr": 1.0073856775960356e-06, "epoch": 3.666538633890276, "percentage": 73.33, "elapsed_time": "2:30:30", "remaining_time": "0:54:44", "throughput": 19962.44, "total_tokens": 180261184}
|
|
{"current_steps": 57280, "total_steps": 78105, "loss": 0.1935, "lr": 1.0069375599283526e-06, "epoch": 3.666858715831253, "percentage": 73.34, "elapsed_time": "2:30:30", "remaining_time": "0:54:43", "throughput": 19962.66, "total_tokens": 180276224}
|
|
{"current_steps": 57285, "total_steps": 78105, "loss": 0.2373, "lr": 1.0064895168164319e-06, "epoch": 3.6671787977722294, "percentage": 73.34, "elapsed_time": "2:30:31", "remaining_time": "0:54:42", "throughput": 19962.92, "total_tokens": 180292096}
|
|
{"current_steps": 57290, "total_steps": 78105, "loss": 0.1558, "lr": 1.0060415482826449e-06, "epoch": 3.6674988797132064, "percentage": 73.35, "elapsed_time": "2:30:32", "remaining_time": "0:54:41", "throughput": 19963.19, "total_tokens": 180308160}
|
|
{"current_steps": 57295, "total_steps": 78105, "loss": 0.1774, "lr": 1.0055936543493609e-06, "epoch": 3.6678189616541834, "percentage": 73.36, "elapsed_time": "2:30:32", "remaining_time": "0:54:40", "throughput": 19963.4, "total_tokens": 180323200}
|
|
{"current_steps": 57300, "total_steps": 78105, "loss": 0.2222, "lr": 1.005145835038947e-06, "epoch": 3.6681390435951604, "percentage": 73.36, "elapsed_time": "2:30:33", "remaining_time": "0:54:39", "throughput": 19963.69, "total_tokens": 180340032}
|
|
{"current_steps": 57305, "total_steps": 78105, "loss": 0.2245, "lr": 1.0046980903737621e-06, "epoch": 3.6684591255361374, "percentage": 73.37, "elapsed_time": "2:30:34", "remaining_time": "0:54:39", "throughput": 19963.89, "total_tokens": 180354752}
|
|
{"current_steps": 57310, "total_steps": 78105, "loss": 0.1197, "lr": 1.0042504203761667e-06, "epoch": 3.6687792074771144, "percentage": 73.38, "elapsed_time": "2:30:34", "remaining_time": "0:54:38", "throughput": 19964.16, "total_tokens": 180370944}
|
|
{"current_steps": 57315, "total_steps": 78105, "loss": 0.1151, "lr": 1.0038028250685142e-06, "epoch": 3.669099289418091, "percentage": 73.38, "elapsed_time": "2:30:35", "remaining_time": "0:54:37", "throughput": 19964.33, "total_tokens": 180385344}
|
|
{"current_steps": 57320, "total_steps": 78105, "loss": 0.1282, "lr": 1.0033553044731554e-06, "epoch": 3.669419371359068, "percentage": 73.39, "elapsed_time": "2:30:36", "remaining_time": "0:54:36", "throughput": 19964.54, "total_tokens": 180400256}
|
|
{"current_steps": 57325, "total_steps": 78105, "loss": 0.1913, "lr": 1.0029078586124367e-06, "epoch": 3.669739453300045, "percentage": 73.39, "elapsed_time": "2:30:36", "remaining_time": "0:54:35", "throughput": 19964.85, "total_tokens": 180417216}
|
|
{"current_steps": 57330, "total_steps": 78105, "loss": 0.1991, "lr": 1.0024604875087016e-06, "epoch": 3.6700595352410215, "percentage": 73.4, "elapsed_time": "2:30:37", "remaining_time": "0:54:34", "throughput": 19965.1, "total_tokens": 180433152}
|
|
{"current_steps": 57335, "total_steps": 78105, "loss": 0.1723, "lr": 1.0020131911842892e-06, "epoch": 3.6703796171819985, "percentage": 73.41, "elapsed_time": "2:30:38", "remaining_time": "0:54:34", "throughput": 19965.4, "total_tokens": 180450112}
|
|
{"current_steps": 57340, "total_steps": 78105, "loss": 0.1653, "lr": 1.0015659696615355e-06, "epoch": 3.6706996991229754, "percentage": 73.41, "elapsed_time": "2:30:38", "remaining_time": "0:54:33", "throughput": 19965.69, "total_tokens": 180466880}
|
|
{"current_steps": 57345, "total_steps": 78105, "loss": 0.1518, "lr": 1.001118822962771e-06, "epoch": 3.6710197810639524, "percentage": 73.42, "elapsed_time": "2:30:39", "remaining_time": "0:54:32", "throughput": 19965.96, "total_tokens": 180482944}
|
|
{"current_steps": 57350, "total_steps": 78105, "loss": 0.2415, "lr": 1.0006717511103264e-06, "epoch": 3.6713398630049294, "percentage": 73.43, "elapsed_time": "2:30:40", "remaining_time": "0:54:31", "throughput": 19966.18, "total_tokens": 180498112}
|
|
{"current_steps": 57355, "total_steps": 78105, "loss": 0.1801, "lr": 1.0002247541265247e-06, "epoch": 3.6716599449459064, "percentage": 73.43, "elapsed_time": "2:30:40", "remaining_time": "0:54:30", "throughput": 19966.37, "total_tokens": 180512704}
|
|
{"current_steps": 57360, "total_steps": 78105, "loss": 0.1716, "lr": 9.997778320336867e-07, "epoch": 3.671980026886883, "percentage": 73.44, "elapsed_time": "2:30:41", "remaining_time": "0:54:29", "throughput": 19966.6, "total_tokens": 180528000}
|
|
{"current_steps": 57365, "total_steps": 78105, "loss": 0.1643, "lr": 9.993309848541295e-07, "epoch": 3.67230010882786, "percentage": 73.45, "elapsed_time": "2:30:42", "remaining_time": "0:54:29", "throughput": 19966.87, "total_tokens": 180544768}
|
|
{"current_steps": 57370, "total_steps": 78105, "loss": 0.2062, "lr": 9.98884212610166e-07, "epoch": 3.672620190768837, "percentage": 73.45, "elapsed_time": "2:30:42", "remaining_time": "0:54:28", "throughput": 19967.1, "total_tokens": 180560384}
|
|
{"current_steps": 57375, "total_steps": 78105, "loss": 0.1647, "lr": 9.98437515324106e-07, "epoch": 3.6729402727098135, "percentage": 73.46, "elapsed_time": "2:30:43", "remaining_time": "0:54:27", "throughput": 19967.41, "total_tokens": 180577280}
|
|
{"current_steps": 57380, "total_steps": 78105, "loss": 0.1513, "lr": 9.979908930182543e-07, "epoch": 3.6732603546507905, "percentage": 73.47, "elapsed_time": "2:30:44", "remaining_time": "0:54:26", "throughput": 19967.69, "total_tokens": 180593920}
|
|
{"current_steps": 57385, "total_steps": 78105, "loss": 0.1824, "lr": 9.975443457149155e-07, "epoch": 3.6735804365917675, "percentage": 73.47, "elapsed_time": "2:30:44", "remaining_time": "0:54:25", "throughput": 19967.89, "total_tokens": 180608640}
|
|
{"current_steps": 57390, "total_steps": 78105, "loss": 0.2315, "lr": 9.970978734363843e-07, "epoch": 3.6739005185327445, "percentage": 73.48, "elapsed_time": "2:30:45", "remaining_time": "0:54:25", "throughput": 19968.1, "total_tokens": 180623680}
|
|
{"current_steps": 57395, "total_steps": 78105, "loss": 0.157, "lr": 9.96651476204958e-07, "epoch": 3.6742206004737215, "percentage": 73.48, "elapsed_time": "2:30:46", "remaining_time": "0:54:24", "throughput": 19968.34, "total_tokens": 180639296}
|
|
{"current_steps": 57400, "total_steps": 78105, "loss": 0.152, "lr": 9.962051540429257e-07, "epoch": 3.674540682414698, "percentage": 73.49, "elapsed_time": "2:30:46", "remaining_time": "0:54:23", "throughput": 19968.59, "total_tokens": 180655040}
|
|
{"current_steps": 57405, "total_steps": 78105, "loss": 0.1281, "lr": 9.957589069725756e-07, "epoch": 3.674860764355675, "percentage": 73.5, "elapsed_time": "2:30:47", "remaining_time": "0:54:22", "throughput": 19968.85, "total_tokens": 180670976}
|
|
{"current_steps": 57410, "total_steps": 78105, "loss": 0.273, "lr": 9.953127350161904e-07, "epoch": 3.675180846296652, "percentage": 73.5, "elapsed_time": "2:30:48", "remaining_time": "0:54:21", "throughput": 19969.07, "total_tokens": 180686400}
|
|
{"current_steps": 57415, "total_steps": 78105, "loss": 0.2331, "lr": 9.948666381960484e-07, "epoch": 3.675500928237629, "percentage": 73.51, "elapsed_time": "2:30:49", "remaining_time": "0:54:20", "throughput": 19969.38, "total_tokens": 180703488}
|
|
{"current_steps": 57420, "total_steps": 78105, "loss": 0.2869, "lr": 9.944206165344284e-07, "epoch": 3.6758210101786055, "percentage": 73.52, "elapsed_time": "2:30:49", "remaining_time": "0:54:20", "throughput": 19969.63, "total_tokens": 180719424}
|
|
{"current_steps": 57425, "total_steps": 78105, "loss": 0.1517, "lr": 9.939746700535987e-07, "epoch": 3.6761410921195825, "percentage": 73.52, "elapsed_time": "2:30:50", "remaining_time": "0:54:19", "throughput": 19969.88, "total_tokens": 180735168}
|
|
{"current_steps": 57430, "total_steps": 78105, "loss": 0.1296, "lr": 9.935287987758305e-07, "epoch": 3.6764611740605595, "percentage": 73.53, "elapsed_time": "2:30:51", "remaining_time": "0:54:18", "throughput": 19970.15, "total_tokens": 180751232}
|
|
{"current_steps": 57435, "total_steps": 78105, "loss": 0.1411, "lr": 9.930830027233871e-07, "epoch": 3.6767812560015365, "percentage": 73.54, "elapsed_time": "2:30:51", "remaining_time": "0:54:17", "throughput": 19970.38, "total_tokens": 180766784}
|
|
{"current_steps": 57440, "total_steps": 78105, "loss": 0.2098, "lr": 9.926372819185292e-07, "epoch": 3.6771013379425135, "percentage": 73.54, "elapsed_time": "2:30:52", "remaining_time": "0:54:16", "throughput": 19970.59, "total_tokens": 180781888}
|
|
{"current_steps": 57445, "total_steps": 78105, "loss": 0.1277, "lr": 9.921916363835138e-07, "epoch": 3.67742141988349, "percentage": 73.55, "elapsed_time": "2:30:53", "remaining_time": "0:54:15", "throughput": 19970.85, "total_tokens": 180798016}
|
|
{"current_steps": 57450, "total_steps": 78105, "loss": 0.1431, "lr": 9.917460661405945e-07, "epoch": 3.677741501824467, "percentage": 73.55, "elapsed_time": "2:30:53", "remaining_time": "0:54:15", "throughput": 19971.11, "total_tokens": 180814016}
|
|
{"current_steps": 57455, "total_steps": 78105, "loss": 0.1803, "lr": 9.913005712120191e-07, "epoch": 3.678061583765444, "percentage": 73.56, "elapsed_time": "2:30:54", "remaining_time": "0:54:14", "throughput": 19971.35, "total_tokens": 180829824}
|
|
{"current_steps": 57460, "total_steps": 78105, "loss": 0.1095, "lr": 9.908551516200368e-07, "epoch": 3.678381665706421, "percentage": 73.57, "elapsed_time": "2:30:55", "remaining_time": "0:54:13", "throughput": 19971.55, "total_tokens": 180844928}
|
|
{"current_steps": 57465, "total_steps": 78105, "loss": 0.2054, "lr": 9.904098073868854e-07, "epoch": 3.6787017476473975, "percentage": 73.57, "elapsed_time": "2:30:55", "remaining_time": "0:54:12", "throughput": 19971.81, "total_tokens": 180860800}
|
|
{"current_steps": 57470, "total_steps": 78105, "loss": 0.1437, "lr": 9.899645385348062e-07, "epoch": 3.6790218295883745, "percentage": 73.58, "elapsed_time": "2:30:56", "remaining_time": "0:54:11", "throughput": 19972.01, "total_tokens": 180875712}
|
|
{"current_steps": 57475, "total_steps": 78105, "loss": 0.1594, "lr": 9.89519345086032e-07, "epoch": 3.6793419115293515, "percentage": 73.59, "elapsed_time": "2:30:57", "remaining_time": "0:54:10", "throughput": 19972.31, "total_tokens": 180892672}
|
|
{"current_steps": 57480, "total_steps": 78105, "loss": 0.1879, "lr": 9.890742270627943e-07, "epoch": 3.6796619934703285, "percentage": 73.59, "elapsed_time": "2:30:57", "remaining_time": "0:54:10", "throughput": 19972.55, "total_tokens": 180907968}
|
|
{"current_steps": 57485, "total_steps": 78105, "loss": 0.2108, "lr": 9.886291844873194e-07, "epoch": 3.6799820754113055, "percentage": 73.6, "elapsed_time": "2:30:58", "remaining_time": "0:54:09", "throughput": 19972.72, "total_tokens": 180921984}
|
|
{"current_steps": 57490, "total_steps": 78105, "loss": 0.1362, "lr": 9.8818421738183e-07, "epoch": 3.680302157352282, "percentage": 73.61, "elapsed_time": "2:30:59", "remaining_time": "0:54:08", "throughput": 19972.96, "total_tokens": 180937280}
|
|
{"current_steps": 57495, "total_steps": 78105, "loss": 0.207, "lr": 9.877393257685474e-07, "epoch": 3.680622239293259, "percentage": 73.61, "elapsed_time": "2:30:59", "remaining_time": "0:54:07", "throughput": 19973.21, "total_tokens": 180952960}
|
|
{"current_steps": 57500, "total_steps": 78105, "loss": 0.192, "lr": 9.872945096696843e-07, "epoch": 3.680942321234236, "percentage": 73.62, "elapsed_time": "2:31:00", "remaining_time": "0:54:06", "throughput": 19973.45, "total_tokens": 180968768}
|
|
{"current_steps": 57505, "total_steps": 78105, "loss": 0.1509, "lr": 9.868497691074555e-07, "epoch": 3.6812624031752126, "percentage": 73.63, "elapsed_time": "2:31:01", "remaining_time": "0:54:05", "throughput": 19973.73, "total_tokens": 180984960}
|
|
{"current_steps": 57510, "total_steps": 78105, "loss": 0.1594, "lr": 9.86405104104066e-07, "epoch": 3.6815824851161896, "percentage": 73.63, "elapsed_time": "2:31:01", "remaining_time": "0:54:05", "throughput": 19973.97, "total_tokens": 181001088}
|
|
{"current_steps": 57515, "total_steps": 78105, "loss": 0.1653, "lr": 9.859605146817223e-07, "epoch": 3.6819025670571666, "percentage": 73.64, "elapsed_time": "2:31:02", "remaining_time": "0:54:04", "throughput": 19974.21, "total_tokens": 181016576}
|
|
{"current_steps": 57520, "total_steps": 78105, "loss": 0.1897, "lr": 9.85516000862624e-07, "epoch": 3.6822226489981436, "percentage": 73.64, "elapsed_time": "2:31:03", "remaining_time": "0:54:03", "throughput": 19974.43, "total_tokens": 181031488}
|
|
{"current_steps": 57525, "total_steps": 78105, "loss": 0.1544, "lr": 9.850715626689674e-07, "epoch": 3.6825427309391205, "percentage": 73.65, "elapsed_time": "2:31:03", "remaining_time": "0:54:02", "throughput": 19974.71, "total_tokens": 181047616}
|
|
{"current_steps": 57530, "total_steps": 78105, "loss": 0.108, "lr": 9.846272001229475e-07, "epoch": 3.6828628128800975, "percentage": 73.66, "elapsed_time": "2:31:04", "remaining_time": "0:54:01", "throughput": 19974.9, "total_tokens": 181062144}
|
|
{"current_steps": 57535, "total_steps": 78105, "loss": 0.1709, "lr": 9.841829132467504e-07, "epoch": 3.683182894821074, "percentage": 73.66, "elapsed_time": "2:31:05", "remaining_time": "0:54:00", "throughput": 19975.16, "total_tokens": 181077952}
|
|
{"current_steps": 57540, "total_steps": 78105, "loss": 0.1293, "lr": 9.837387020625638e-07, "epoch": 3.683502976762051, "percentage": 73.67, "elapsed_time": "2:31:05", "remaining_time": "0:54:00", "throughput": 19975.42, "total_tokens": 181094080}
|
|
{"current_steps": 57545, "total_steps": 78105, "loss": 0.1193, "lr": 9.832945665925685e-07, "epoch": 3.683823058703028, "percentage": 73.68, "elapsed_time": "2:31:06", "remaining_time": "0:53:59", "throughput": 19975.66, "total_tokens": 181109696}
|
|
{"current_steps": 57550, "total_steps": 78105, "loss": 0.2667, "lr": 9.828505068589422e-07, "epoch": 3.6841431406440046, "percentage": 73.68, "elapsed_time": "2:31:07", "remaining_time": "0:53:58", "throughput": 19975.91, "total_tokens": 181125440}
|
|
{"current_steps": 57555, "total_steps": 78105, "loss": 0.1779, "lr": 9.824065228838592e-07, "epoch": 3.6844632225849816, "percentage": 73.69, "elapsed_time": "2:31:07", "remaining_time": "0:53:57", "throughput": 19976.09, "total_tokens": 181139840}
|
|
{"current_steps": 57560, "total_steps": 78105, "loss": 0.181, "lr": 9.819626146894895e-07, "epoch": 3.6847833045259586, "percentage": 73.7, "elapsed_time": "2:31:08", "remaining_time": "0:53:56", "throughput": 19976.32, "total_tokens": 181154880}
|
|
{"current_steps": 57565, "total_steps": 78105, "loss": 0.1546, "lr": 9.815187822979995e-07, "epoch": 3.6851033864669356, "percentage": 73.7, "elapsed_time": "2:31:09", "remaining_time": "0:53:56", "throughput": 19976.57, "total_tokens": 181170752}
|
|
{"current_steps": 57570, "total_steps": 78105, "loss": 0.1677, "lr": 9.810750257315524e-07, "epoch": 3.6854234684079126, "percentage": 73.71, "elapsed_time": "2:31:09", "remaining_time": "0:53:55", "throughput": 19976.82, "total_tokens": 181186688}
|
|
{"current_steps": 57575, "total_steps": 78105, "loss": 0.1408, "lr": 9.806313450123056e-07, "epoch": 3.6857435503488896, "percentage": 73.71, "elapsed_time": "2:31:10", "remaining_time": "0:53:54", "throughput": 19977.04, "total_tokens": 181201984}
|
|
{"current_steps": 57580, "total_steps": 78105, "loss": 0.1191, "lr": 9.801877401624175e-07, "epoch": 3.686063632289866, "percentage": 73.72, "elapsed_time": "2:31:11", "remaining_time": "0:53:53", "throughput": 19977.31, "total_tokens": 181218048}
|
|
{"current_steps": 57585, "total_steps": 78105, "loss": 0.1615, "lr": 9.797442112040349e-07, "epoch": 3.686383714230843, "percentage": 73.73, "elapsed_time": "2:31:11", "remaining_time": "0:53:52", "throughput": 19977.59, "total_tokens": 181234368}
|
|
{"current_steps": 57590, "total_steps": 78105, "loss": 0.1587, "lr": 9.79300758159309e-07, "epoch": 3.68670379617182, "percentage": 73.73, "elapsed_time": "2:31:12", "remaining_time": "0:53:51", "throughput": 19977.83, "total_tokens": 181249792}
|
|
{"current_steps": 57595, "total_steps": 78105, "loss": 0.1392, "lr": 9.788573810503819e-07, "epoch": 3.6870238781127966, "percentage": 73.74, "elapsed_time": "2:31:13", "remaining_time": "0:53:51", "throughput": 19978.05, "total_tokens": 181265152}
|
|
{"current_steps": 57600, "total_steps": 78105, "loss": 0.1723, "lr": 9.784140798993929e-07, "epoch": 3.6873439600537736, "percentage": 73.75, "elapsed_time": "2:31:13", "remaining_time": "0:53:50", "throughput": 19978.34, "total_tokens": 181281728}
|
|
{"current_steps": 57605, "total_steps": 78105, "loss": 0.1786, "lr": 9.77970854728481e-07, "epoch": 3.6876640419947506, "percentage": 73.75, "elapsed_time": "2:31:14", "remaining_time": "0:53:49", "throughput": 19978.56, "total_tokens": 181297344}
|
|
{"current_steps": 57610, "total_steps": 78105, "loss": 0.1465, "lr": 9.775277055597748e-07, "epoch": 3.6879841239357276, "percentage": 73.76, "elapsed_time": "2:31:15", "remaining_time": "0:53:48", "throughput": 19978.83, "total_tokens": 181313536}
|
|
{"current_steps": 57615, "total_steps": 78105, "loss": 0.1422, "lr": 9.770846324154069e-07, "epoch": 3.6883042058767046, "percentage": 73.77, "elapsed_time": "2:31:15", "remaining_time": "0:53:47", "throughput": 19979.08, "total_tokens": 181329536}
|
|
{"current_steps": 57620, "total_steps": 78105, "loss": 0.2226, "lr": 9.766416353174976e-07, "epoch": 3.6886242878176816, "percentage": 73.77, "elapsed_time": "2:31:16", "remaining_time": "0:53:46", "throughput": 19979.3, "total_tokens": 181344832}
|
|
{"current_steps": 57625, "total_steps": 78105, "loss": 0.1559, "lr": 9.761987142881713e-07, "epoch": 3.688944369758658, "percentage": 73.78, "elapsed_time": "2:31:17", "remaining_time": "0:53:46", "throughput": 19979.53, "total_tokens": 181360768}
|
|
{"current_steps": 57630, "total_steps": 78105, "loss": 0.2096, "lr": 9.75755869349544e-07, "epoch": 3.689264451699635, "percentage": 73.79, "elapsed_time": "2:31:18", "remaining_time": "0:53:45", "throughput": 19979.79, "total_tokens": 181376576}
|
|
{"current_steps": 57635, "total_steps": 78105, "loss": 0.146, "lr": 9.75313100523729e-07, "epoch": 3.689584533640612, "percentage": 73.79, "elapsed_time": "2:31:18", "remaining_time": "0:53:44", "throughput": 19980.01, "total_tokens": 181391680}
|
|
{"current_steps": 57640, "total_steps": 78105, "loss": 0.1516, "lr": 9.74870407832836e-07, "epoch": 3.6899046155815887, "percentage": 73.8, "elapsed_time": "2:31:19", "remaining_time": "0:53:43", "throughput": 19980.2, "total_tokens": 181406400}
|
|
{"current_steps": 57645, "total_steps": 78105, "loss": 0.0999, "lr": 9.744277912989697e-07, "epoch": 3.6902246975225657, "percentage": 73.8, "elapsed_time": "2:31:19", "remaining_time": "0:53:42", "throughput": 19980.44, "total_tokens": 181422144}
|
|
{"current_steps": 57650, "total_steps": 78105, "loss": 0.1618, "lr": 9.739852509442352e-07, "epoch": 3.6905447794635426, "percentage": 73.81, "elapsed_time": "2:31:20", "remaining_time": "0:53:41", "throughput": 19980.72, "total_tokens": 181438400}
|
|
{"current_steps": 57655, "total_steps": 78105, "loss": 0.1733, "lr": 9.735427867907265e-07, "epoch": 3.6908648614045196, "percentage": 73.82, "elapsed_time": "2:31:21", "remaining_time": "0:53:41", "throughput": 19980.98, "total_tokens": 181454400}
|
|
{"current_steps": 57660, "total_steps": 78105, "loss": 0.1211, "lr": 9.731003988605408e-07, "epoch": 3.6911849433454966, "percentage": 73.82, "elapsed_time": "2:31:22", "remaining_time": "0:53:40", "throughput": 19981.24, "total_tokens": 181470400}
|
|
{"current_steps": 57665, "total_steps": 78105, "loss": 0.1932, "lr": 9.726580871757681e-07, "epoch": 3.691505025286473, "percentage": 73.83, "elapsed_time": "2:31:22", "remaining_time": "0:53:39", "throughput": 19981.46, "total_tokens": 181485632}
|
|
{"current_steps": 57670, "total_steps": 78105, "loss": 0.1826, "lr": 9.72215851758495e-07, "epoch": 3.69182510722745, "percentage": 73.84, "elapsed_time": "2:31:23", "remaining_time": "0:53:38", "throughput": 19981.73, "total_tokens": 181502208}
|
|
{"current_steps": 57675, "total_steps": 78105, "loss": 0.2884, "lr": 9.717736926308039e-07, "epoch": 3.692145189168427, "percentage": 73.84, "elapsed_time": "2:31:24", "remaining_time": "0:53:37", "throughput": 19981.96, "total_tokens": 181517888}
|
|
{"current_steps": 57680, "total_steps": 78105, "loss": 0.1474, "lr": 9.713316098147747e-07, "epoch": 3.692465271109404, "percentage": 73.85, "elapsed_time": "2:31:24", "remaining_time": "0:53:36", "throughput": 19982.18, "total_tokens": 181532992}
|
|
{"current_steps": 57685, "total_steps": 78105, "loss": 0.1147, "lr": 9.708896033324814e-07, "epoch": 3.6927853530503807, "percentage": 73.86, "elapsed_time": "2:31:25", "remaining_time": "0:53:36", "throughput": 19982.42, "total_tokens": 181548672}
|
|
{"current_steps": 57690, "total_steps": 78105, "loss": 0.1595, "lr": 9.70447673205998e-07, "epoch": 3.6931054349913577, "percentage": 73.86, "elapsed_time": "2:31:26", "remaining_time": "0:53:35", "throughput": 19982.61, "total_tokens": 181563328}
|
|
{"current_steps": 57695, "total_steps": 78105, "loss": 0.1967, "lr": 9.700058194573892e-07, "epoch": 3.6934255169323347, "percentage": 73.87, "elapsed_time": "2:31:26", "remaining_time": "0:53:34", "throughput": 19982.87, "total_tokens": 181579520}
|
|
{"current_steps": 57700, "total_steps": 78105, "loss": 0.122, "lr": 9.69564042108721e-07, "epoch": 3.6937455988733117, "percentage": 73.87, "elapsed_time": "2:31:27", "remaining_time": "0:53:33", "throughput": 19983.07, "total_tokens": 181594112}
|
|
{"current_steps": 57705, "total_steps": 78105, "loss": 0.1416, "lr": 9.691223411820528e-07, "epoch": 3.6940656808142887, "percentage": 73.88, "elapsed_time": "2:31:28", "remaining_time": "0:53:32", "throughput": 19983.3, "total_tokens": 181609536}
|
|
{"current_steps": 57710, "total_steps": 78105, "loss": 0.1628, "lr": 9.686807166994407e-07, "epoch": 3.694385762755265, "percentage": 73.89, "elapsed_time": "2:31:28", "remaining_time": "0:53:31", "throughput": 19983.49, "total_tokens": 181624192}
|
|
{"current_steps": 57715, "total_steps": 78105, "loss": 0.2056, "lr": 9.682391686829376e-07, "epoch": 3.694705844696242, "percentage": 73.89, "elapsed_time": "2:31:29", "remaining_time": "0:53:31", "throughput": 19983.79, "total_tokens": 181641216}
|
|
{"current_steps": 57720, "total_steps": 78105, "loss": 0.1575, "lr": 9.677976971545908e-07, "epoch": 3.695025926637219, "percentage": 73.9, "elapsed_time": "2:31:30", "remaining_time": "0:53:30", "throughput": 19984.01, "total_tokens": 181656832}
|
|
{"current_steps": 57725, "total_steps": 78105, "loss": 0.1759, "lr": 9.673563021364476e-07, "epoch": 3.695346008578196, "percentage": 73.91, "elapsed_time": "2:31:30", "remaining_time": "0:53:29", "throughput": 19984.22, "total_tokens": 181671808}
|
|
{"current_steps": 57730, "total_steps": 78105, "loss": 0.1817, "lr": 9.66914983650546e-07, "epoch": 3.6956660905191727, "percentage": 73.91, "elapsed_time": "2:31:31", "remaining_time": "0:53:28", "throughput": 19984.46, "total_tokens": 181687360}
|
|
{"current_steps": 57735, "total_steps": 78105, "loss": 0.129, "lr": 9.66473741718926e-07, "epoch": 3.6959861724601497, "percentage": 73.92, "elapsed_time": "2:31:32", "remaining_time": "0:53:27", "throughput": 19984.68, "total_tokens": 181702720}
|
|
{"current_steps": 57740, "total_steps": 78105, "loss": 0.174, "lr": 9.66032576363618e-07, "epoch": 3.6963062544011267, "percentage": 73.93, "elapsed_time": "2:31:32", "remaining_time": "0:53:27", "throughput": 19984.94, "total_tokens": 181718464}
|
|
{"current_steps": 57745, "total_steps": 78105, "loss": 0.1462, "lr": 9.655914876066537e-07, "epoch": 3.6966263363421037, "percentage": 73.93, "elapsed_time": "2:31:33", "remaining_time": "0:53:26", "throughput": 19985.2, "total_tokens": 181734592}
|
|
{"current_steps": 57750, "total_steps": 78105, "loss": 0.1505, "lr": 9.65150475470058e-07, "epoch": 3.6969464182830807, "percentage": 73.94, "elapsed_time": "2:31:34", "remaining_time": "0:53:25", "throughput": 19985.44, "total_tokens": 181750464}
|
|
{"current_steps": 57755, "total_steps": 78105, "loss": 0.1542, "lr": 9.64709539975853e-07, "epoch": 3.6972665002240572, "percentage": 73.95, "elapsed_time": "2:31:34", "remaining_time": "0:53:24", "throughput": 19985.66, "total_tokens": 181765568}
|
|
{"current_steps": 57760, "total_steps": 78105, "loss": 0.1164, "lr": 9.642686811460565e-07, "epoch": 3.697586582165034, "percentage": 73.95, "elapsed_time": "2:31:35", "remaining_time": "0:53:23", "throughput": 19985.91, "total_tokens": 181781888}
|
|
{"current_steps": 57765, "total_steps": 78105, "loss": 0.2066, "lr": 9.638278990026818e-07, "epoch": 3.697906664106011, "percentage": 73.96, "elapsed_time": "2:31:36", "remaining_time": "0:53:22", "throughput": 19986.15, "total_tokens": 181797504}
|
|
{"current_steps": 57770, "total_steps": 78105, "loss": 0.1559, "lr": 9.633871935677412e-07, "epoch": 3.6982267460469878, "percentage": 73.96, "elapsed_time": "2:31:36", "remaining_time": "0:53:22", "throughput": 19986.37, "total_tokens": 181812800}
|
|
{"current_steps": 57775, "total_steps": 78105, "loss": 0.1546, "lr": 9.6294656486324e-07, "epoch": 3.6985468279879647, "percentage": 73.97, "elapsed_time": "2:31:37", "remaining_time": "0:53:21", "throughput": 19986.62, "total_tokens": 181828544}
|
|
{"current_steps": 57780, "total_steps": 78105, "loss": 0.1759, "lr": 9.625060129111812e-07, "epoch": 3.6988669099289417, "percentage": 73.98, "elapsed_time": "2:31:38", "remaining_time": "0:53:20", "throughput": 19986.86, "total_tokens": 181844160}
|
|
{"current_steps": 57785, "total_steps": 78105, "loss": 0.1689, "lr": 9.620655377335637e-07, "epoch": 3.6991869918699187, "percentage": 73.98, "elapsed_time": "2:31:38", "remaining_time": "0:53:19", "throughput": 19987.07, "total_tokens": 181859392}
|
|
{"current_steps": 57790, "total_steps": 78105, "loss": 0.1344, "lr": 9.616251393523823e-07, "epoch": 3.6995070738108957, "percentage": 73.99, "elapsed_time": "2:31:39", "remaining_time": "0:53:18", "throughput": 19987.33, "total_tokens": 181875200}
|
|
{"current_steps": 57795, "total_steps": 78105, "loss": 0.1284, "lr": 9.611848177896286e-07, "epoch": 3.6998271557518727, "percentage": 74.0, "elapsed_time": "2:31:40", "remaining_time": "0:53:17", "throughput": 19987.55, "total_tokens": 181890496}
|
|
{"current_steps": 57800, "total_steps": 78105, "loss": 0.1392, "lr": 9.607445730672895e-07, "epoch": 3.7001472376928493, "percentage": 74.0, "elapsed_time": "2:31:40", "remaining_time": "0:53:17", "throughput": 19987.76, "total_tokens": 181905472}
|
|
{"current_steps": 57805, "total_steps": 78105, "loss": 0.1796, "lr": 9.603044052073479e-07, "epoch": 3.7004673196338262, "percentage": 74.01, "elapsed_time": "2:31:41", "remaining_time": "0:53:16", "throughput": 19987.98, "total_tokens": 181920768}
|
|
{"current_steps": 57810, "total_steps": 78105, "loss": 0.1486, "lr": 9.59864314231786e-07, "epoch": 3.7007874015748032, "percentage": 74.02, "elapsed_time": "2:31:42", "remaining_time": "0:53:15", "throughput": 19988.2, "total_tokens": 181935872}
|
|
{"current_steps": 57815, "total_steps": 78105, "loss": 0.2066, "lr": 9.594243001625765e-07, "epoch": 3.70110748351578, "percentage": 74.02, "elapsed_time": "2:31:42", "remaining_time": "0:53:14", "throughput": 19988.46, "total_tokens": 181952064}
|
|
{"current_steps": 57820, "total_steps": 78105, "loss": 0.1551, "lr": 9.589843630216933e-07, "epoch": 3.7014275654567568, "percentage": 74.03, "elapsed_time": "2:31:43", "remaining_time": "0:53:13", "throughput": 19988.72, "total_tokens": 181968000}
|
|
{"current_steps": 57825, "total_steps": 78105, "loss": 0.1295, "lr": 9.585445028311044e-07, "epoch": 3.7017476473977338, "percentage": 74.03, "elapsed_time": "2:31:44", "remaining_time": "0:53:13", "throughput": 19989.1, "total_tokens": 181986816}
|
|
{"current_steps": 57830, "total_steps": 78105, "loss": 0.2857, "lr": 9.58104719612773e-07, "epoch": 3.7020677293387108, "percentage": 74.04, "elapsed_time": "2:31:44", "remaining_time": "0:53:12", "throughput": 19989.35, "total_tokens": 182002496}
|
|
{"current_steps": 57835, "total_steps": 78105, "loss": 0.1325, "lr": 9.576650133886623e-07, "epoch": 3.7023878112796877, "percentage": 74.05, "elapsed_time": "2:31:45", "remaining_time": "0:53:11", "throughput": 19989.57, "total_tokens": 182017728}
|
|
{"current_steps": 57840, "total_steps": 78105, "loss": 0.1165, "lr": 9.572253841807252e-07, "epoch": 3.7027078932206647, "percentage": 74.05, "elapsed_time": "2:31:46", "remaining_time": "0:53:10", "throughput": 19989.81, "total_tokens": 182033536}
|
|
{"current_steps": 57845, "total_steps": 78105, "loss": 0.1179, "lr": 9.567858320109182e-07, "epoch": 3.7030279751616413, "percentage": 74.06, "elapsed_time": "2:31:46", "remaining_time": "0:53:09", "throughput": 19990.04, "total_tokens": 182048384}
|
|
{"current_steps": 57850, "total_steps": 78105, "loss": 0.2408, "lr": 9.563463569011866e-07, "epoch": 3.7033480571026183, "percentage": 74.07, "elapsed_time": "2:31:47", "remaining_time": "0:53:08", "throughput": 19990.27, "total_tokens": 182063680}
|
|
{"current_steps": 57855, "total_steps": 78105, "loss": 0.1413, "lr": 9.559069588734784e-07, "epoch": 3.7036681390435953, "percentage": 74.07, "elapsed_time": "2:31:48", "remaining_time": "0:53:08", "throughput": 19990.49, "total_tokens": 182079168}
|
|
{"current_steps": 57860, "total_steps": 78105, "loss": 0.1944, "lr": 9.554676379497338e-07, "epoch": 3.703988220984572, "percentage": 74.08, "elapsed_time": "2:31:48", "remaining_time": "0:53:07", "throughput": 19990.75, "total_tokens": 182095360}
|
|
{"current_steps": 57865, "total_steps": 78105, "loss": 0.175, "lr": 9.5502839415189e-07, "epoch": 3.704308302925549, "percentage": 74.09, "elapsed_time": "2:31:49", "remaining_time": "0:53:06", "throughput": 19991.08, "total_tokens": 182112832}
|
|
{"current_steps": 57870, "total_steps": 78105, "loss": 0.2135, "lr": 9.54589227501881e-07, "epoch": 3.704628384866526, "percentage": 74.09, "elapsed_time": "2:31:50", "remaining_time": "0:53:05", "throughput": 19991.27, "total_tokens": 182127168}
|
|
{"current_steps": 57875, "total_steps": 78105, "loss": 0.1375, "lr": 9.541501380216358e-07, "epoch": 3.704948466807503, "percentage": 74.1, "elapsed_time": "2:31:50", "remaining_time": "0:53:04", "throughput": 19991.51, "total_tokens": 182142656}
|
|
{"current_steps": 57880, "total_steps": 78105, "loss": 0.1446, "lr": 9.53711125733081e-07, "epoch": 3.7052685487484798, "percentage": 74.11, "elapsed_time": "2:31:51", "remaining_time": "0:53:03", "throughput": 19991.81, "total_tokens": 182159680}
|
|
{"current_steps": 57885, "total_steps": 78105, "loss": 0.233, "lr": 9.532721906581375e-07, "epoch": 3.7055886306894568, "percentage": 74.11, "elapsed_time": "2:31:52", "remaining_time": "0:53:03", "throughput": 19992.02, "total_tokens": 182174464}
|
|
{"current_steps": 57890, "total_steps": 78105, "loss": 0.2154, "lr": 9.528333328187248e-07, "epoch": 3.7059087126304333, "percentage": 74.12, "elapsed_time": "2:31:53", "remaining_time": "0:53:02", "throughput": 19992.27, "total_tokens": 182190592}
|
|
{"current_steps": 57895, "total_steps": 78105, "loss": 0.2132, "lr": 9.523945522367564e-07, "epoch": 3.7062287945714103, "percentage": 74.12, "elapsed_time": "2:31:53", "remaining_time": "0:53:01", "throughput": 19992.49, "total_tokens": 182205888}
|
|
{"current_steps": 57900, "total_steps": 78105, "loss": 0.1342, "lr": 9.519558489341432e-07, "epoch": 3.7065488765123873, "percentage": 74.13, "elapsed_time": "2:31:54", "remaining_time": "0:53:00", "throughput": 19992.76, "total_tokens": 182222208}
|
|
{"current_steps": 57905, "total_steps": 78105, "loss": 0.1714, "lr": 9.515172229327912e-07, "epoch": 3.706868958453364, "percentage": 74.14, "elapsed_time": "2:31:55", "remaining_time": "0:52:59", "throughput": 19992.98, "total_tokens": 182237312}
|
|
{"current_steps": 57910, "total_steps": 78105, "loss": 0.1067, "lr": 9.510786742546035e-07, "epoch": 3.707189040394341, "percentage": 74.14, "elapsed_time": "2:31:55", "remaining_time": "0:52:58", "throughput": 19993.2, "total_tokens": 182252352}
|
|
{"current_steps": 57915, "total_steps": 78105, "loss": 0.148, "lr": 9.506402029214778e-07, "epoch": 3.707509122335318, "percentage": 74.15, "elapsed_time": "2:31:56", "remaining_time": "0:52:58", "throughput": 19993.42, "total_tokens": 182267328}
|
|
{"current_steps": 57920, "total_steps": 78105, "loss": 0.1358, "lr": 9.502018089553119e-07, "epoch": 3.707829204276295, "percentage": 74.16, "elapsed_time": "2:31:57", "remaining_time": "0:52:57", "throughput": 19993.66, "total_tokens": 182283200}
|
|
{"current_steps": 57925, "total_steps": 78105, "loss": 0.1286, "lr": 9.497634923779933e-07, "epoch": 3.708149286217272, "percentage": 74.16, "elapsed_time": "2:31:57", "remaining_time": "0:52:56", "throughput": 19993.88, "total_tokens": 182298432}
|
|
{"current_steps": 57930, "total_steps": 78105, "loss": 0.1499, "lr": 9.493252532114127e-07, "epoch": 3.7084693681582483, "percentage": 74.17, "elapsed_time": "2:31:58", "remaining_time": "0:52:55", "throughput": 19994.11, "total_tokens": 182314048}
|
|
{"current_steps": 57935, "total_steps": 78105, "loss": 0.1604, "lr": 9.4888709147745e-07, "epoch": 3.7087894500992253, "percentage": 74.18, "elapsed_time": "2:31:59", "remaining_time": "0:52:54", "throughput": 19994.36, "total_tokens": 182330112}
|
|
{"current_steps": 57940, "total_steps": 78105, "loss": 0.1404, "lr": 9.484490071979877e-07, "epoch": 3.7091095320402023, "percentage": 74.18, "elapsed_time": "2:31:59", "remaining_time": "0:52:53", "throughput": 19994.66, "total_tokens": 182347200}
|
|
{"current_steps": 57945, "total_steps": 78105, "loss": 0.1375, "lr": 9.480110003948997e-07, "epoch": 3.7094296139811793, "percentage": 74.19, "elapsed_time": "2:32:00", "remaining_time": "0:52:53", "throughput": 19994.9, "total_tokens": 182362816}
|
|
{"current_steps": 57950, "total_steps": 78105, "loss": 0.0855, "lr": 9.47573071090058e-07, "epoch": 3.709749695922156, "percentage": 74.19, "elapsed_time": "2:32:01", "remaining_time": "0:52:52", "throughput": 19995.12, "total_tokens": 182377984}
|
|
{"current_steps": 57955, "total_steps": 78105, "loss": 0.2059, "lr": 9.47135219305332e-07, "epoch": 3.710069777863133, "percentage": 74.2, "elapsed_time": "2:32:01", "remaining_time": "0:52:51", "throughput": 19995.34, "total_tokens": 182393344}
|
|
{"current_steps": 57960, "total_steps": 78105, "loss": 0.1385, "lr": 9.466974450625832e-07, "epoch": 3.71038985980411, "percentage": 74.21, "elapsed_time": "2:32:02", "remaining_time": "0:52:50", "throughput": 19995.57, "total_tokens": 182408832}
|
|
{"current_steps": 57965, "total_steps": 78105, "loss": 0.1468, "lr": 9.462597483836741e-07, "epoch": 3.710709941745087, "percentage": 74.21, "elapsed_time": "2:32:03", "remaining_time": "0:52:49", "throughput": 19995.87, "total_tokens": 182426112}
|
|
{"current_steps": 57970, "total_steps": 78105, "loss": 0.217, "lr": 9.458221292904599e-07, "epoch": 3.711030023686064, "percentage": 74.22, "elapsed_time": "2:32:03", "remaining_time": "0:52:49", "throughput": 19996.15, "total_tokens": 182442944}
|
|
{"current_steps": 57975, "total_steps": 78105, "loss": 0.1142, "lr": 9.453845878047929e-07, "epoch": 3.7113501056270404, "percentage": 74.23, "elapsed_time": "2:32:04", "remaining_time": "0:52:48", "throughput": 19996.39, "total_tokens": 182458496}
|
|
{"current_steps": 57980, "total_steps": 78105, "loss": 0.1684, "lr": 9.449471239485222e-07, "epoch": 3.7116701875680174, "percentage": 74.23, "elapsed_time": "2:32:05", "remaining_time": "0:52:47", "throughput": 19996.63, "total_tokens": 182473920}
|
|
{"current_steps": 57985, "total_steps": 78105, "loss": 0.1379, "lr": 9.445097377434919e-07, "epoch": 3.7119902695089944, "percentage": 74.24, "elapsed_time": "2:32:05", "remaining_time": "0:52:46", "throughput": 19996.91, "total_tokens": 182490496}
|
|
{"current_steps": 57990, "total_steps": 78105, "loss": 0.1118, "lr": 9.440724292115433e-07, "epoch": 3.7123103514499713, "percentage": 74.25, "elapsed_time": "2:32:06", "remaining_time": "0:52:45", "throughput": 19997.13, "total_tokens": 182505920}
|
|
{"current_steps": 57995, "total_steps": 78105, "loss": 0.2433, "lr": 9.436351983745121e-07, "epoch": 3.712630433390948, "percentage": 74.25, "elapsed_time": "2:32:07", "remaining_time": "0:52:44", "throughput": 19997.33, "total_tokens": 182520832}
|
|
{"current_steps": 58000, "total_steps": 78105, "loss": 0.1167, "lr": 9.431980452542333e-07, "epoch": 3.712950515331925, "percentage": 74.26, "elapsed_time": "2:32:07", "remaining_time": "0:52:44", "throughput": 19997.6, "total_tokens": 182537216}
|
|
{"current_steps": 58005, "total_steps": 78105, "loss": 0.1445, "lr": 9.427609698725349e-07, "epoch": 3.713270597272902, "percentage": 74.27, "elapsed_time": "2:32:08", "remaining_time": "0:52:43", "throughput": 19997.8, "total_tokens": 182552320}
|
|
{"current_steps": 58010, "total_steps": 78105, "loss": 0.1567, "lr": 9.423239722512422e-07, "epoch": 3.713590679213879, "percentage": 74.27, "elapsed_time": "2:32:09", "remaining_time": "0:52:42", "throughput": 19998.07, "total_tokens": 182568576}
|
|
{"current_steps": 58015, "total_steps": 78105, "loss": 0.1741, "lr": 9.418870524121768e-07, "epoch": 3.713910761154856, "percentage": 74.28, "elapsed_time": "2:32:10", "remaining_time": "0:52:41", "throughput": 19998.36, "total_tokens": 182585408}
|
|
{"current_steps": 58020, "total_steps": 78105, "loss": 0.1087, "lr": 9.414502103771562e-07, "epoch": 3.7142308430958324, "percentage": 74.28, "elapsed_time": "2:32:10", "remaining_time": "0:52:40", "throughput": 19998.58, "total_tokens": 182601024}
|
|
{"current_steps": 58025, "total_steps": 78105, "loss": 0.1428, "lr": 9.410134461679935e-07, "epoch": 3.7145509250368094, "percentage": 74.29, "elapsed_time": "2:32:11", "remaining_time": "0:52:39", "throughput": 19998.78, "total_tokens": 182615936}
|
|
{"current_steps": 58030, "total_steps": 78105, "loss": 0.2219, "lr": 9.40576759806499e-07, "epoch": 3.7148710069777864, "percentage": 74.3, "elapsed_time": "2:32:12", "remaining_time": "0:52:39", "throughput": 19999.09, "total_tokens": 182632960}
|
|
{"current_steps": 58035, "total_steps": 78105, "loss": 0.1462, "lr": 9.401401513144778e-07, "epoch": 3.715191088918763, "percentage": 74.3, "elapsed_time": "2:32:12", "remaining_time": "0:52:38", "throughput": 19999.34, "total_tokens": 182649216}
|
|
{"current_steps": 58040, "total_steps": 78105, "loss": 0.1371, "lr": 9.397036207137339e-07, "epoch": 3.71551117085974, "percentage": 74.31, "elapsed_time": "2:32:13", "remaining_time": "0:52:37", "throughput": 19999.54, "total_tokens": 182664000}
|
|
{"current_steps": 58045, "total_steps": 78105, "loss": 0.2014, "lr": 9.392671680260623e-07, "epoch": 3.715831252800717, "percentage": 74.32, "elapsed_time": "2:32:14", "remaining_time": "0:52:36", "throughput": 19999.76, "total_tokens": 182679296}
|
|
{"current_steps": 58050, "total_steps": 78105, "loss": 0.1677, "lr": 9.388307932732596e-07, "epoch": 3.716151334741694, "percentage": 74.32, "elapsed_time": "2:32:14", "remaining_time": "0:52:35", "throughput": 20000.02, "total_tokens": 182695360}
|
|
{"current_steps": 58055, "total_steps": 78105, "loss": 0.211, "lr": 9.383944964771153e-07, "epoch": 3.716471416682671, "percentage": 74.33, "elapsed_time": "2:32:15", "remaining_time": "0:52:35", "throughput": 20000.26, "total_tokens": 182711040}
|
|
{"current_steps": 58060, "total_steps": 78105, "loss": 0.1923, "lr": 9.379582776594145e-07, "epoch": 3.716791498623648, "percentage": 74.34, "elapsed_time": "2:32:16", "remaining_time": "0:52:34", "throughput": 20000.53, "total_tokens": 182727488}
|
|
{"current_steps": 58065, "total_steps": 78105, "loss": 0.2335, "lr": 9.37522136841943e-07, "epoch": 3.7171115805646244, "percentage": 74.34, "elapsed_time": "2:32:16", "remaining_time": "0:52:33", "throughput": 20000.75, "total_tokens": 182742656}
|
|
{"current_steps": 58070, "total_steps": 78105, "loss": 0.1594, "lr": 9.370860740464757e-07, "epoch": 3.7174316625056014, "percentage": 74.35, "elapsed_time": "2:32:17", "remaining_time": "0:52:32", "throughput": 20001.0, "total_tokens": 182758528}
|
|
{"current_steps": 58075, "total_steps": 78105, "loss": 0.1927, "lr": 9.366500892947905e-07, "epoch": 3.7177517444465784, "percentage": 74.36, "elapsed_time": "2:32:18", "remaining_time": "0:52:31", "throughput": 20001.29, "total_tokens": 182775104}
|
|
{"current_steps": 58080, "total_steps": 78105, "loss": 0.2175, "lr": 9.362141826086549e-07, "epoch": 3.718071826387555, "percentage": 74.36, "elapsed_time": "2:32:18", "remaining_time": "0:52:30", "throughput": 20001.48, "total_tokens": 182789568}
|
|
{"current_steps": 58085, "total_steps": 78105, "loss": 0.1658, "lr": 9.357783540098383e-07, "epoch": 3.718391908328532, "percentage": 74.37, "elapsed_time": "2:32:19", "remaining_time": "0:52:30", "throughput": 20001.68, "total_tokens": 182804800}
|
|
{"current_steps": 58090, "total_steps": 78105, "loss": 0.1647, "lr": 9.353426035201032e-07, "epoch": 3.718711990269509, "percentage": 74.37, "elapsed_time": "2:32:20", "remaining_time": "0:52:29", "throughput": 20001.97, "total_tokens": 182821696}
|
|
{"current_steps": 58095, "total_steps": 78105, "loss": 0.1604, "lr": 9.349069311612086e-07, "epoch": 3.719032072210486, "percentage": 74.38, "elapsed_time": "2:32:20", "remaining_time": "0:52:28", "throughput": 20002.21, "total_tokens": 182837312}
|
|
{"current_steps": 58100, "total_steps": 78105, "loss": 0.1641, "lr": 9.344713369549094e-07, "epoch": 3.719352154151463, "percentage": 74.39, "elapsed_time": "2:32:21", "remaining_time": "0:52:27", "throughput": 20002.46, "total_tokens": 182853184}
|
|
{"current_steps": 58105, "total_steps": 78105, "loss": 0.1184, "lr": 9.34035820922957e-07, "epoch": 3.71967223609244, "percentage": 74.39, "elapsed_time": "2:32:22", "remaining_time": "0:52:26", "throughput": 20002.74, "total_tokens": 182869824}
|
|
{"current_steps": 58110, "total_steps": 78105, "loss": 0.1285, "lr": 9.336003830870991e-07, "epoch": 3.7199923180334165, "percentage": 74.4, "elapsed_time": "2:32:22", "remaining_time": "0:52:25", "throughput": 20002.99, "total_tokens": 182886016}
|
|
{"current_steps": 58115, "total_steps": 78105, "loss": 0.1813, "lr": 9.331650234690778e-07, "epoch": 3.7203123999743934, "percentage": 74.41, "elapsed_time": "2:32:23", "remaining_time": "0:52:25", "throughput": 20003.26, "total_tokens": 182902336}
|
|
{"current_steps": 58120, "total_steps": 78105, "loss": 0.1766, "lr": 9.32729742090635e-07, "epoch": 3.7206324819153704, "percentage": 74.41, "elapsed_time": "2:32:24", "remaining_time": "0:52:24", "throughput": 20003.54, "total_tokens": 182919040}
|
|
{"current_steps": 58125, "total_steps": 78105, "loss": 0.1951, "lr": 9.322945389735053e-07, "epoch": 3.720952563856347, "percentage": 74.42, "elapsed_time": "2:32:24", "remaining_time": "0:52:23", "throughput": 20003.77, "total_tokens": 182934208}
|
|
{"current_steps": 58130, "total_steps": 78105, "loss": 0.1802, "lr": 9.318594141394202e-07, "epoch": 3.721272645797324, "percentage": 74.43, "elapsed_time": "2:32:25", "remaining_time": "0:52:22", "throughput": 20004.01, "total_tokens": 182949888}
|
|
{"current_steps": 58135, "total_steps": 78105, "loss": 0.1623, "lr": 9.314243676101082e-07, "epoch": 3.721592727738301, "percentage": 74.43, "elapsed_time": "2:32:26", "remaining_time": "0:52:21", "throughput": 20004.26, "total_tokens": 182966080}
|
|
{"current_steps": 58140, "total_steps": 78105, "loss": 0.165, "lr": 9.309893994072924e-07, "epoch": 3.721912809679278, "percentage": 74.44, "elapsed_time": "2:32:27", "remaining_time": "0:52:21", "throughput": 20004.45, "total_tokens": 182980928}
|
|
{"current_steps": 58145, "total_steps": 78105, "loss": 0.1839, "lr": 9.305545095526927e-07, "epoch": 3.722232891620255, "percentage": 74.44, "elapsed_time": "2:32:27", "remaining_time": "0:52:20", "throughput": 20004.68, "total_tokens": 182996224}
|
|
{"current_steps": 58150, "total_steps": 78105, "loss": 0.1195, "lr": 9.301196980680277e-07, "epoch": 3.722552973561232, "percentage": 74.45, "elapsed_time": "2:32:28", "remaining_time": "0:52:19", "throughput": 20004.88, "total_tokens": 183011008}
|
|
{"current_steps": 58155, "total_steps": 78105, "loss": 0.1878, "lr": 9.296849649750059e-07, "epoch": 3.7228730555022085, "percentage": 74.46, "elapsed_time": "2:32:29", "remaining_time": "0:52:18", "throughput": 20005.14, "total_tokens": 183027136}
|
|
{"current_steps": 58160, "total_steps": 78105, "loss": 0.2403, "lr": 9.292503102953393e-07, "epoch": 3.7231931374431855, "percentage": 74.46, "elapsed_time": "2:32:29", "remaining_time": "0:52:17", "throughput": 20005.45, "total_tokens": 183044736}
|
|
{"current_steps": 58165, "total_steps": 78105, "loss": 0.1359, "lr": 9.288157340507287e-07, "epoch": 3.7235132193841625, "percentage": 74.47, "elapsed_time": "2:32:30", "remaining_time": "0:52:17", "throughput": 20006.03, "total_tokens": 183073088}
|
|
{"current_steps": 58170, "total_steps": 78105, "loss": 0.1782, "lr": 9.283812362628772e-07, "epoch": 3.723833301325139, "percentage": 74.48, "elapsed_time": "2:32:31", "remaining_time": "0:52:16", "throughput": 20006.24, "total_tokens": 183088192}
|
|
{"current_steps": 58175, "total_steps": 78105, "loss": 0.2739, "lr": 9.279468169534803e-07, "epoch": 3.724153383266116, "percentage": 74.48, "elapsed_time": "2:32:32", "remaining_time": "0:52:15", "throughput": 20006.45, "total_tokens": 183103616}
|
|
{"current_steps": 58180, "total_steps": 78105, "loss": 0.1555, "lr": 9.275124761442303e-07, "epoch": 3.724473465207093, "percentage": 74.49, "elapsed_time": "2:32:32", "remaining_time": "0:52:14", "throughput": 20006.7, "total_tokens": 183119680}
|
|
{"current_steps": 58185, "total_steps": 78105, "loss": 0.1637, "lr": 9.270782138568179e-07, "epoch": 3.72479354714807, "percentage": 74.5, "elapsed_time": "2:32:33", "remaining_time": "0:52:13", "throughput": 20006.96, "total_tokens": 183135744}
|
|
{"current_steps": 58190, "total_steps": 78105, "loss": 0.2175, "lr": 9.266440301129248e-07, "epoch": 3.725113629089047, "percentage": 74.5, "elapsed_time": "2:32:34", "remaining_time": "0:52:12", "throughput": 20007.21, "total_tokens": 183152000}
|
|
{"current_steps": 58195, "total_steps": 78105, "loss": 0.1934, "lr": 9.262099249342344e-07, "epoch": 3.7254337110300235, "percentage": 74.51, "elapsed_time": "2:32:34", "remaining_time": "0:52:12", "throughput": 20007.43, "total_tokens": 183167232}
|
|
{"current_steps": 58200, "total_steps": 78105, "loss": 0.1466, "lr": 9.257758983424228e-07, "epoch": 3.7257537929710005, "percentage": 74.52, "elapsed_time": "2:32:35", "remaining_time": "0:52:11", "throughput": 20007.64, "total_tokens": 183182464}
|
|
{"current_steps": 58205, "total_steps": 78105, "loss": 0.1633, "lr": 9.253419503591632e-07, "epoch": 3.7260738749119775, "percentage": 74.52, "elapsed_time": "2:32:36", "remaining_time": "0:52:10", "throughput": 20007.89, "total_tokens": 183198464}
|
|
{"current_steps": 58210, "total_steps": 78105, "loss": 0.2352, "lr": 9.249080810061243e-07, "epoch": 3.7263939568529545, "percentage": 74.53, "elapsed_time": "2:32:36", "remaining_time": "0:52:09", "throughput": 20008.13, "total_tokens": 183214336}
|
|
{"current_steps": 58215, "total_steps": 78105, "loss": 0.1492, "lr": 9.244742903049714e-07, "epoch": 3.726714038793931, "percentage": 74.53, "elapsed_time": "2:32:37", "remaining_time": "0:52:08", "throughput": 20008.39, "total_tokens": 183229952}
|
|
{"current_steps": 58220, "total_steps": 78105, "loss": 0.193, "lr": 9.240405782773659e-07, "epoch": 3.727034120734908, "percentage": 74.54, "elapsed_time": "2:32:38", "remaining_time": "0:52:08", "throughput": 20008.6, "total_tokens": 183245120}
|
|
{"current_steps": 58225, "total_steps": 78105, "loss": 0.1448, "lr": 9.23606944944965e-07, "epoch": 3.727354202675885, "percentage": 74.55, "elapsed_time": "2:32:39", "remaining_time": "0:52:07", "throughput": 20008.85, "total_tokens": 183261248}
|
|
{"current_steps": 58230, "total_steps": 78105, "loss": 0.3069, "lr": 9.231733903294213e-07, "epoch": 3.727674284616862, "percentage": 74.55, "elapsed_time": "2:32:39", "remaining_time": "0:52:06", "throughput": 20009.06, "total_tokens": 183276352}
|
|
{"current_steps": 58235, "total_steps": 78105, "loss": 0.1813, "lr": 9.22739914452386e-07, "epoch": 3.727994366557839, "percentage": 74.56, "elapsed_time": "2:32:40", "remaining_time": "0:52:05", "throughput": 20009.27, "total_tokens": 183291072}
|
|
{"current_steps": 58240, "total_steps": 78105, "loss": 0.1631, "lr": 9.223065173355039e-07, "epoch": 3.7283144484988155, "percentage": 74.57, "elapsed_time": "2:32:40", "remaining_time": "0:52:04", "throughput": 20009.52, "total_tokens": 183306944}
|
|
{"current_steps": 58245, "total_steps": 78105, "loss": 0.1864, "lr": 9.218731990004162e-07, "epoch": 3.7286345304397925, "percentage": 74.57, "elapsed_time": "2:32:41", "remaining_time": "0:52:03", "throughput": 20009.72, "total_tokens": 183321664}
|
|
{"current_steps": 58250, "total_steps": 78105, "loss": 0.2093, "lr": 9.214399594687609e-07, "epoch": 3.7289546123807695, "percentage": 74.58, "elapsed_time": "2:32:42", "remaining_time": "0:52:03", "throughput": 20009.95, "total_tokens": 183336896}
|
|
{"current_steps": 58255, "total_steps": 78105, "loss": 0.2773, "lr": 9.210067987621715e-07, "epoch": 3.7292746943217465, "percentage": 74.59, "elapsed_time": "2:32:43", "remaining_time": "0:52:02", "throughput": 20010.25, "total_tokens": 183354176}
|
|
{"current_steps": 58260, "total_steps": 78105, "loss": 0.1509, "lr": 9.205737169022777e-07, "epoch": 3.729594776262723, "percentage": 74.59, "elapsed_time": "2:32:43", "remaining_time": "0:52:01", "throughput": 20010.49, "total_tokens": 183370304}
|
|
{"current_steps": 58265, "total_steps": 78105, "loss": 0.1581, "lr": 9.201407139107049e-07, "epoch": 3.7299148582037, "percentage": 74.6, "elapsed_time": "2:32:44", "remaining_time": "0:52:00", "throughput": 20010.65, "total_tokens": 183384000}
|
|
{"current_steps": 58270, "total_steps": 78105, "loss": 0.1703, "lr": 9.197077898090776e-07, "epoch": 3.730234940144677, "percentage": 74.6, "elapsed_time": "2:32:45", "remaining_time": "0:51:59", "throughput": 20010.95, "total_tokens": 183401344}
|
|
{"current_steps": 58275, "total_steps": 78105, "loss": 0.2102, "lr": 9.1927494461901e-07, "epoch": 3.730555022085654, "percentage": 74.61, "elapsed_time": "2:32:45", "remaining_time": "0:51:58", "throughput": 20011.24, "total_tokens": 183417920}
|
|
{"current_steps": 58280, "total_steps": 78105, "loss": 0.1473, "lr": 9.188421783621184e-07, "epoch": 3.730875104026631, "percentage": 74.62, "elapsed_time": "2:32:46", "remaining_time": "0:51:58", "throughput": 20011.61, "total_tokens": 183436544}
|
|
{"current_steps": 58285, "total_steps": 78105, "loss": 0.2144, "lr": 9.18409491060013e-07, "epoch": 3.7311951859676076, "percentage": 74.62, "elapsed_time": "2:32:47", "remaining_time": "0:51:57", "throughput": 20011.82, "total_tokens": 183451520}
|
|
{"current_steps": 58290, "total_steps": 78105, "loss": 0.141, "lr": 9.179768827342983e-07, "epoch": 3.7315152679085846, "percentage": 74.63, "elapsed_time": "2:32:47", "remaining_time": "0:51:56", "throughput": 20012.11, "total_tokens": 183468352}
|
|
{"current_steps": 58295, "total_steps": 78105, "loss": 0.1781, "lr": 9.175443534065795e-07, "epoch": 3.7318353498495616, "percentage": 74.64, "elapsed_time": "2:32:48", "remaining_time": "0:51:55", "throughput": 20012.35, "total_tokens": 183483648}
|
|
{"current_steps": 58300, "total_steps": 78105, "loss": 0.142, "lr": 9.17111903098451e-07, "epoch": 3.732155431790538, "percentage": 74.64, "elapsed_time": "2:32:49", "remaining_time": "0:51:54", "throughput": 20012.57, "total_tokens": 183499264}
|
|
{"current_steps": 58305, "total_steps": 78105, "loss": 0.134, "lr": 9.166795318315111e-07, "epoch": 3.732475513731515, "percentage": 74.65, "elapsed_time": "2:32:49", "remaining_time": "0:51:54", "throughput": 20012.8, "total_tokens": 183514816}
|
|
{"current_steps": 58310, "total_steps": 78105, "loss": 0.1455, "lr": 9.162472396273461e-07, "epoch": 3.732795595672492, "percentage": 74.66, "elapsed_time": "2:32:50", "remaining_time": "0:51:53", "throughput": 20013.11, "total_tokens": 183531840}
|
|
{"current_steps": 58315, "total_steps": 78105, "loss": 0.1455, "lr": 9.158150265075458e-07, "epoch": 3.733115677613469, "percentage": 74.66, "elapsed_time": "2:32:51", "remaining_time": "0:51:52", "throughput": 20013.35, "total_tokens": 183547712}
|
|
{"current_steps": 58320, "total_steps": 78105, "loss": 0.1859, "lr": 9.153828924936911e-07, "epoch": 3.733435759554446, "percentage": 74.67, "elapsed_time": "2:32:51", "remaining_time": "0:51:51", "throughput": 20013.59, "total_tokens": 183563136}
|
|
{"current_steps": 58325, "total_steps": 78105, "loss": 0.1411, "lr": 9.149508376073607e-07, "epoch": 3.733755841495423, "percentage": 74.68, "elapsed_time": "2:32:52", "remaining_time": "0:51:50", "throughput": 20013.79, "total_tokens": 183578176}
|
|
{"current_steps": 58330, "total_steps": 78105, "loss": 0.1877, "lr": 9.145188618701292e-07, "epoch": 3.7340759234363996, "percentage": 74.68, "elapsed_time": "2:32:53", "remaining_time": "0:51:49", "throughput": 20014.02, "total_tokens": 183593728}
|
|
{"current_steps": 58335, "total_steps": 78105, "loss": 0.2938, "lr": 9.140869653035675e-07, "epoch": 3.7343960053773766, "percentage": 74.69, "elapsed_time": "2:32:53", "remaining_time": "0:51:49", "throughput": 20014.21, "total_tokens": 183608192}
|
|
{"current_steps": 58340, "total_steps": 78105, "loss": 0.1158, "lr": 9.136551479292419e-07, "epoch": 3.7347160873183536, "percentage": 74.69, "elapsed_time": "2:32:54", "remaining_time": "0:51:48", "throughput": 20014.44, "total_tokens": 183623616}
|
|
{"current_steps": 58345, "total_steps": 78105, "loss": 0.2017, "lr": 9.132234097687146e-07, "epoch": 3.73503616925933, "percentage": 74.7, "elapsed_time": "2:32:55", "remaining_time": "0:51:47", "throughput": 20014.68, "total_tokens": 183639424}
|
|
{"current_steps": 58350, "total_steps": 78105, "loss": 0.1849, "lr": 9.127917508435455e-07, "epoch": 3.735356251200307, "percentage": 74.71, "elapsed_time": "2:32:55", "remaining_time": "0:51:46", "throughput": 20014.92, "total_tokens": 183655104}
|
|
{"current_steps": 58355, "total_steps": 78105, "loss": 0.1435, "lr": 9.123601711752891e-07, "epoch": 3.735676333141284, "percentage": 74.71, "elapsed_time": "2:32:56", "remaining_time": "0:51:45", "throughput": 20015.18, "total_tokens": 183671040}
|
|
{"current_steps": 58360, "total_steps": 78105, "loss": 0.1367, "lr": 9.11928670785496e-07, "epoch": 3.735996415082261, "percentage": 74.72, "elapsed_time": "2:32:57", "remaining_time": "0:51:44", "throughput": 20015.37, "total_tokens": 183685504}
|
|
{"current_steps": 58365, "total_steps": 78105, "loss": 0.1668, "lr": 9.114972496957133e-07, "epoch": 3.736316497023238, "percentage": 74.73, "elapsed_time": "2:32:57", "remaining_time": "0:51:44", "throughput": 20015.62, "total_tokens": 183701568}
|
|
{"current_steps": 58370, "total_steps": 78105, "loss": 0.1892, "lr": 9.110659079274837e-07, "epoch": 3.736636578964215, "percentage": 74.73, "elapsed_time": "2:32:58", "remaining_time": "0:51:43", "throughput": 20015.88, "total_tokens": 183717760}
|
|
{"current_steps": 58375, "total_steps": 78105, "loss": 0.1631, "lr": 9.106346455023452e-07, "epoch": 3.7369566609051916, "percentage": 74.74, "elapsed_time": "2:32:59", "remaining_time": "0:51:42", "throughput": 20016.11, "total_tokens": 183733376}
|
|
{"current_steps": 58380, "total_steps": 78105, "loss": 0.1023, "lr": 9.102034624418354e-07, "epoch": 3.7372767428461686, "percentage": 74.75, "elapsed_time": "2:32:59", "remaining_time": "0:51:41", "throughput": 20016.31, "total_tokens": 183748416}
|
|
{"current_steps": 58385, "total_steps": 78105, "loss": 0.1932, "lr": 9.097723587674823e-07, "epoch": 3.7375968247871456, "percentage": 74.75, "elapsed_time": "2:33:00", "remaining_time": "0:51:40", "throughput": 20016.55, "total_tokens": 183764032}
|
|
{"current_steps": 58390, "total_steps": 78105, "loss": 0.1381, "lr": 9.093413345008159e-07, "epoch": 3.737916906728122, "percentage": 74.76, "elapsed_time": "2:33:01", "remaining_time": "0:51:39", "throughput": 20016.74, "total_tokens": 183779008}
|
|
{"current_steps": 58395, "total_steps": 78105, "loss": 0.2475, "lr": 9.089103896633561e-07, "epoch": 3.738236988669099, "percentage": 74.76, "elapsed_time": "2:33:01", "remaining_time": "0:51:39", "throughput": 20016.96, "total_tokens": 183794048}
|
|
{"current_steps": 58400, "total_steps": 78105, "loss": 0.1339, "lr": 9.084795242766248e-07, "epoch": 3.738557070610076, "percentage": 74.77, "elapsed_time": "2:33:02", "remaining_time": "0:51:38", "throughput": 20017.19, "total_tokens": 183809664}
|
|
{"current_steps": 58405, "total_steps": 78105, "loss": 0.135, "lr": 9.080487383621358e-07, "epoch": 3.738877152551053, "percentage": 74.78, "elapsed_time": "2:33:03", "remaining_time": "0:51:37", "throughput": 20017.44, "total_tokens": 183825472}
|
|
{"current_steps": 58410, "total_steps": 78105, "loss": 0.1617, "lr": 9.076180319414002e-07, "epoch": 3.73919723449203, "percentage": 74.78, "elapsed_time": "2:33:03", "remaining_time": "0:51:36", "throughput": 20017.69, "total_tokens": 183841664}
|
|
{"current_steps": 58415, "total_steps": 78105, "loss": 0.2482, "lr": 9.071874050359272e-07, "epoch": 3.739517316433007, "percentage": 74.79, "elapsed_time": "2:33:04", "remaining_time": "0:51:35", "throughput": 20017.92, "total_tokens": 183857088}
|
|
{"current_steps": 58420, "total_steps": 78105, "loss": 0.2929, "lr": 9.067568576672167e-07, "epoch": 3.7398373983739837, "percentage": 74.8, "elapsed_time": "2:33:05", "remaining_time": "0:51:35", "throughput": 20018.15, "total_tokens": 183872384}
|
|
{"current_steps": 58425, "total_steps": 78105, "loss": 0.1294, "lr": 9.063263898567709e-07, "epoch": 3.7401574803149606, "percentage": 74.8, "elapsed_time": "2:33:05", "remaining_time": "0:51:34", "throughput": 20018.38, "total_tokens": 183887680}
|
|
{"current_steps": 58430, "total_steps": 78105, "loss": 0.1621, "lr": 9.05896001626084e-07, "epoch": 3.7404775622559376, "percentage": 74.81, "elapsed_time": "2:33:06", "remaining_time": "0:51:33", "throughput": 20018.6, "total_tokens": 183903424}
|
|
{"current_steps": 58435, "total_steps": 78105, "loss": 0.1688, "lr": 9.054656929966474e-07, "epoch": 3.740797644196914, "percentage": 74.82, "elapsed_time": "2:33:07", "remaining_time": "0:51:32", "throughput": 20018.85, "total_tokens": 183919296}
|
|
{"current_steps": 58440, "total_steps": 78105, "loss": 0.1352, "lr": 9.050354639899484e-07, "epoch": 3.741117726137891, "percentage": 74.82, "elapsed_time": "2:33:07", "remaining_time": "0:51:31", "throughput": 20019.03, "total_tokens": 183933760}
|
|
{"current_steps": 58445, "total_steps": 78105, "loss": 0.1704, "lr": 9.046053146274702e-07, "epoch": 3.741437808078868, "percentage": 74.83, "elapsed_time": "2:33:08", "remaining_time": "0:51:30", "throughput": 20019.25, "total_tokens": 183948992}
|
|
{"current_steps": 58450, "total_steps": 78105, "loss": 0.2059, "lr": 9.041752449306928e-07, "epoch": 3.741757890019845, "percentage": 74.84, "elapsed_time": "2:33:09", "remaining_time": "0:51:30", "throughput": 20019.52, "total_tokens": 183965312}
|
|
{"current_steps": 58455, "total_steps": 78105, "loss": 0.113, "lr": 9.037452549210915e-07, "epoch": 3.742077971960822, "percentage": 74.84, "elapsed_time": "2:33:09", "remaining_time": "0:51:29", "throughput": 20019.74, "total_tokens": 183980480}
|
|
{"current_steps": 58460, "total_steps": 78105, "loss": 0.1869, "lr": 9.033153446201365e-07, "epoch": 3.742398053901799, "percentage": 74.85, "elapsed_time": "2:33:10", "remaining_time": "0:51:28", "throughput": 20019.97, "total_tokens": 183996096}
|
|
{"current_steps": 58465, "total_steps": 78105, "loss": 0.1666, "lr": 9.02885514049297e-07, "epoch": 3.7427181358427757, "percentage": 74.85, "elapsed_time": "2:33:11", "remaining_time": "0:51:27", "throughput": 20020.18, "total_tokens": 184011264}
|
|
{"current_steps": 58470, "total_steps": 78105, "loss": 0.1623, "lr": 9.024557632300362e-07, "epoch": 3.7430382177837527, "percentage": 74.86, "elapsed_time": "2:33:11", "remaining_time": "0:51:26", "throughput": 20020.41, "total_tokens": 184026688}
|
|
{"current_steps": 58475, "total_steps": 78105, "loss": 0.1249, "lr": 9.020260921838131e-07, "epoch": 3.7433582997247297, "percentage": 74.87, "elapsed_time": "2:33:12", "remaining_time": "0:51:25", "throughput": 20020.61, "total_tokens": 184042048}
|
|
{"current_steps": 58480, "total_steps": 78105, "loss": 0.1691, "lr": 9.015965009320834e-07, "epoch": 3.743678381665706, "percentage": 74.87, "elapsed_time": "2:33:13", "remaining_time": "0:51:25", "throughput": 20020.83, "total_tokens": 184057472}
|
|
{"current_steps": 58485, "total_steps": 78105, "loss": 0.1338, "lr": 9.011669894962988e-07, "epoch": 3.743998463606683, "percentage": 74.88, "elapsed_time": "2:33:13", "remaining_time": "0:51:24", "throughput": 20021.07, "total_tokens": 184073344}
|
|
{"current_steps": 58490, "total_steps": 78105, "loss": 0.1715, "lr": 9.007375578979066e-07, "epoch": 3.74431854554766, "percentage": 74.89, "elapsed_time": "2:33:14", "remaining_time": "0:51:23", "throughput": 20021.3, "total_tokens": 184088704}
|
|
{"current_steps": 58495, "total_steps": 78105, "loss": 0.1331, "lr": 9.003082061583498e-07, "epoch": 3.744638627488637, "percentage": 74.89, "elapsed_time": "2:33:15", "remaining_time": "0:51:22", "throughput": 20021.53, "total_tokens": 184104064}
|
|
{"current_steps": 58500, "total_steps": 78105, "loss": 0.1823, "lr": 8.998789342990702e-07, "epoch": 3.744958709429614, "percentage": 74.9, "elapsed_time": "2:33:15", "remaining_time": "0:51:21", "throughput": 20021.74, "total_tokens": 184119232}
|
|
{"current_steps": 58505, "total_steps": 78105, "loss": 0.1629, "lr": 8.994497423415005e-07, "epoch": 3.7452787913705907, "percentage": 74.91, "elapsed_time": "2:33:16", "remaining_time": "0:51:21", "throughput": 20021.99, "total_tokens": 184135360}
|
|
{"current_steps": 58510, "total_steps": 78105, "loss": 0.1444, "lr": 8.990206303070742e-07, "epoch": 3.7455988733115677, "percentage": 74.91, "elapsed_time": "2:33:17", "remaining_time": "0:51:20", "throughput": 20022.28, "total_tokens": 184152192}
|
|
{"current_steps": 58515, "total_steps": 78105, "loss": 0.1326, "lr": 8.985915982172186e-07, "epoch": 3.7459189552525447, "percentage": 74.92, "elapsed_time": "2:33:18", "remaining_time": "0:51:19", "throughput": 20022.52, "total_tokens": 184168256}
|
|
{"current_steps": 58520, "total_steps": 78105, "loss": 0.2129, "lr": 8.981626460933562e-07, "epoch": 3.7462390371935217, "percentage": 74.92, "elapsed_time": "2:33:18", "remaining_time": "0:51:18", "throughput": 20022.76, "total_tokens": 184184320}
|
|
{"current_steps": 58525, "total_steps": 78105, "loss": 0.1328, "lr": 8.977337739569094e-07, "epoch": 3.7465591191344982, "percentage": 74.93, "elapsed_time": "2:33:19", "remaining_time": "0:51:17", "throughput": 20022.97, "total_tokens": 184199552}
|
|
{"current_steps": 58530, "total_steps": 78105, "loss": 0.0902, "lr": 8.973049818292903e-07, "epoch": 3.7468792010754752, "percentage": 74.94, "elapsed_time": "2:33:20", "remaining_time": "0:51:16", "throughput": 20023.21, "total_tokens": 184215104}
|
|
{"current_steps": 58535, "total_steps": 78105, "loss": 0.2247, "lr": 8.968762697319141e-07, "epoch": 3.747199283016452, "percentage": 74.94, "elapsed_time": "2:33:20", "remaining_time": "0:51:16", "throughput": 20023.44, "total_tokens": 184230592}
|
|
{"current_steps": 58540, "total_steps": 78105, "loss": 0.1407, "lr": 8.964476376861847e-07, "epoch": 3.747519364957429, "percentage": 74.95, "elapsed_time": "2:33:21", "remaining_time": "0:51:15", "throughput": 20023.7, "total_tokens": 184246848}
|
|
{"current_steps": 58545, "total_steps": 78105, "loss": 0.2023, "lr": 8.960190857135088e-07, "epoch": 3.747839446898406, "percentage": 74.96, "elapsed_time": "2:33:22", "remaining_time": "0:51:14", "throughput": 20023.93, "total_tokens": 184262656}
|
|
{"current_steps": 58550, "total_steps": 78105, "loss": 0.2425, "lr": 8.955906138352846e-07, "epoch": 3.7481595288393827, "percentage": 74.96, "elapsed_time": "2:33:22", "remaining_time": "0:51:13", "throughput": 20024.23, "total_tokens": 184279744}
|
|
{"current_steps": 58555, "total_steps": 78105, "loss": 0.2124, "lr": 8.951622220729084e-07, "epoch": 3.7484796107803597, "percentage": 74.97, "elapsed_time": "2:33:23", "remaining_time": "0:51:12", "throughput": 20024.41, "total_tokens": 184294336}
|
|
{"current_steps": 58560, "total_steps": 78105, "loss": 0.2263, "lr": 8.947339104477712e-07, "epoch": 3.7487996927213367, "percentage": 74.98, "elapsed_time": "2:33:24", "remaining_time": "0:51:11", "throughput": 20024.67, "total_tokens": 184310336}
|
|
{"current_steps": 58565, "total_steps": 78105, "loss": 0.1727, "lr": 8.943056789812612e-07, "epoch": 3.7491197746623133, "percentage": 74.98, "elapsed_time": "2:33:24", "remaining_time": "0:51:11", "throughput": 20024.84, "total_tokens": 184324864}
|
|
{"current_steps": 58570, "total_steps": 78105, "loss": 0.1008, "lr": 8.938775276947618e-07, "epoch": 3.7494398566032903, "percentage": 74.99, "elapsed_time": "2:33:25", "remaining_time": "0:51:10", "throughput": 20025.08, "total_tokens": 184340800}
|
|
{"current_steps": 58575, "total_steps": 78105, "loss": 0.1586, "lr": 8.934494566096527e-07, "epoch": 3.7497599385442673, "percentage": 75.0, "elapsed_time": "2:33:26", "remaining_time": "0:51:09", "throughput": 20025.31, "total_tokens": 184356096}
|
|
{"current_steps": 58580, "total_steps": 78105, "loss": 0.1676, "lr": 8.930214657473085e-07, "epoch": 3.7500800204852442, "percentage": 75.0, "elapsed_time": "2:33:26", "remaining_time": "0:51:08", "throughput": 20025.54, "total_tokens": 184371968}
|
|
{"current_steps": 58585, "total_steps": 78105, "loss": 0.1847, "lr": 8.925935551291029e-07, "epoch": 3.7504001024262212, "percentage": 75.01, "elapsed_time": "2:33:27", "remaining_time": "0:51:07", "throughput": 20025.76, "total_tokens": 184387328}
|
|
{"current_steps": 58590, "total_steps": 78105, "loss": 0.1324, "lr": 8.921657247764027e-07, "epoch": 3.7507201843671982, "percentage": 75.01, "elapsed_time": "2:33:28", "remaining_time": "0:51:07", "throughput": 20025.98, "total_tokens": 184402752}
|
|
{"current_steps": 58590, "total_steps": 78105, "eval_loss": 0.5466420650482178, "epoch": 3.7507201843671982, "percentage": 75.01, "elapsed_time": "2:34:19", "remaining_time": "0:51:24", "throughput": 19915.64, "total_tokens": 184402752}
|
|
{"current_steps": 58595, "total_steps": 78105, "loss": 0.1472, "lr": 8.917379747105709e-07, "epoch": 3.7510402663081748, "percentage": 75.02, "elapsed_time": "2:34:53", "remaining_time": "0:51:34", "throughput": 19843.04, "total_tokens": 184417984}
|
|
{"current_steps": 58600, "total_steps": 78105, "loss": 0.1734, "lr": 8.913103049529676e-07, "epoch": 3.7513603482491518, "percentage": 75.03, "elapsed_time": "2:34:54", "remaining_time": "0:51:33", "throughput": 19843.3, "total_tokens": 184434176}
|
|
{"current_steps": 58605, "total_steps": 78105, "loss": 0.1154, "lr": 8.908827155249475e-07, "epoch": 3.7516804301901288, "percentage": 75.03, "elapsed_time": "2:34:55", "remaining_time": "0:51:32", "throughput": 19843.53, "total_tokens": 184449792}
|
|
{"current_steps": 58610, "total_steps": 78105, "loss": 0.1888, "lr": 8.904552064478647e-07, "epoch": 3.7520005121311053, "percentage": 75.04, "elapsed_time": "2:34:55", "remaining_time": "0:51:32", "throughput": 19843.78, "total_tokens": 184465472}
|
|
{"current_steps": 58615, "total_steps": 78105, "loss": 0.1765, "lr": 8.900277777430636e-07, "epoch": 3.7523205940720823, "percentage": 75.05, "elapsed_time": "2:34:56", "remaining_time": "0:51:31", "throughput": 19844.0, "total_tokens": 184480832}
|
|
{"current_steps": 58620, "total_steps": 78105, "loss": 0.1907, "lr": 8.896004294318906e-07, "epoch": 3.7526406760130593, "percentage": 75.05, "elapsed_time": "2:34:57", "remaining_time": "0:51:30", "throughput": 19844.23, "total_tokens": 184496000}
|
|
{"current_steps": 58625, "total_steps": 78105, "loss": 0.1515, "lr": 8.891731615356822e-07, "epoch": 3.7529607579540363, "percentage": 75.06, "elapsed_time": "2:34:57", "remaining_time": "0:51:29", "throughput": 19844.44, "total_tokens": 184510720}
|
|
{"current_steps": 58630, "total_steps": 78105, "loss": 0.16, "lr": 8.887459740757767e-07, "epoch": 3.7532808398950133, "percentage": 75.07, "elapsed_time": "2:34:58", "remaining_time": "0:51:28", "throughput": 19844.62, "total_tokens": 184525120}
|
|
{"current_steps": 58635, "total_steps": 78105, "loss": 0.1823, "lr": 8.883188670735043e-07, "epoch": 3.7536009218359903, "percentage": 75.07, "elapsed_time": "2:34:59", "remaining_time": "0:51:27", "throughput": 19844.85, "total_tokens": 184540160}
|
|
{"current_steps": 58640, "total_steps": 78105, "loss": 0.1471, "lr": 8.878918405501921e-07, "epoch": 3.753921003776967, "percentage": 75.08, "elapsed_time": "2:34:59", "remaining_time": "0:51:26", "throughput": 19845.06, "total_tokens": 184554944}
|
|
{"current_steps": 58645, "total_steps": 78105, "loss": 0.156, "lr": 8.874648945271658e-07, "epoch": 3.754241085717944, "percentage": 75.08, "elapsed_time": "2:35:00", "remaining_time": "0:51:26", "throughput": 19845.34, "total_tokens": 184571584}
|
|
{"current_steps": 58650, "total_steps": 78105, "loss": 0.1775, "lr": 8.870380290257416e-07, "epoch": 3.754561167658921, "percentage": 75.09, "elapsed_time": "2:35:01", "remaining_time": "0:51:25", "throughput": 19845.54, "total_tokens": 184586240}
|
|
{"current_steps": 58655, "total_steps": 78105, "loss": 0.1593, "lr": 8.866112440672375e-07, "epoch": 3.7548812495998973, "percentage": 75.1, "elapsed_time": "2:35:01", "remaining_time": "0:51:24", "throughput": 19845.74, "total_tokens": 184600832}
|
|
{"current_steps": 58660, "total_steps": 78105, "loss": 0.1339, "lr": 8.86184539672964e-07, "epoch": 3.7552013315408743, "percentage": 75.1, "elapsed_time": "2:35:02", "remaining_time": "0:51:23", "throughput": 19846.0, "total_tokens": 184616960}
|
|
{"current_steps": 58665, "total_steps": 78105, "loss": 0.1753, "lr": 8.857579158642285e-07, "epoch": 3.7555214134818513, "percentage": 75.11, "elapsed_time": "2:35:03", "remaining_time": "0:51:22", "throughput": 19846.23, "total_tokens": 184632576}
|
|
{"current_steps": 58670, "total_steps": 78105, "loss": 0.1055, "lr": 8.853313726623344e-07, "epoch": 3.7558414954228283, "percentage": 75.12, "elapsed_time": "2:35:03", "remaining_time": "0:51:21", "throughput": 19846.5, "total_tokens": 184648896}
|
|
{"current_steps": 58675, "total_steps": 78105, "loss": 0.1847, "lr": 8.849049100885812e-07, "epoch": 3.7561615773638053, "percentage": 75.12, "elapsed_time": "2:35:04", "remaining_time": "0:51:21", "throughput": 19846.72, "total_tokens": 184664256}
|
|
{"current_steps": 58680, "total_steps": 78105, "loss": 0.4096, "lr": 8.84478528164264e-07, "epoch": 3.7564816593047823, "percentage": 75.13, "elapsed_time": "2:35:05", "remaining_time": "0:51:20", "throughput": 19846.94, "total_tokens": 184679488}
|
|
{"current_steps": 58685, "total_steps": 78105, "loss": 0.1922, "lr": 8.840522269106741e-07, "epoch": 3.756801741245759, "percentage": 75.14, "elapsed_time": "2:35:05", "remaining_time": "0:51:19", "throughput": 19847.18, "total_tokens": 184695424}
|
|
{"current_steps": 58690, "total_steps": 78105, "loss": 0.1835, "lr": 8.836260063490978e-07, "epoch": 3.757121823186736, "percentage": 75.14, "elapsed_time": "2:35:06", "remaining_time": "0:51:18", "throughput": 19847.38, "total_tokens": 184710336}
|
|
{"current_steps": 58695, "total_steps": 78105, "loss": 0.1775, "lr": 8.831998665008204e-07, "epoch": 3.757441905127713, "percentage": 75.15, "elapsed_time": "2:35:07", "remaining_time": "0:51:17", "throughput": 19847.64, "total_tokens": 184726272}
|
|
{"current_steps": 58700, "total_steps": 78105, "loss": 0.1546, "lr": 8.827738073871203e-07, "epoch": 3.7577619870686894, "percentage": 75.16, "elapsed_time": "2:35:07", "remaining_time": "0:51:16", "throughput": 19847.88, "total_tokens": 184741696}
|
|
{"current_steps": 58705, "total_steps": 78105, "loss": 0.2376, "lr": 8.823478290292719e-07, "epoch": 3.7580820690096663, "percentage": 75.16, "elapsed_time": "2:35:08", "remaining_time": "0:51:16", "throughput": 19848.08, "total_tokens": 184756416}
|
|
{"current_steps": 58710, "total_steps": 78105, "loss": 0.157, "lr": 8.819219314485472e-07, "epoch": 3.7584021509506433, "percentage": 75.17, "elapsed_time": "2:35:09", "remaining_time": "0:51:15", "throughput": 19848.33, "total_tokens": 184772032}
|
|
{"current_steps": 58715, "total_steps": 78105, "loss": 0.1521, "lr": 8.814961146662133e-07, "epoch": 3.7587222328916203, "percentage": 75.17, "elapsed_time": "2:35:09", "remaining_time": "0:51:14", "throughput": 19848.62, "total_tokens": 184788736}
|
|
{"current_steps": 58720, "total_steps": 78105, "loss": 0.1624, "lr": 8.810703787035324e-07, "epoch": 3.7590423148325973, "percentage": 75.18, "elapsed_time": "2:35:10", "remaining_time": "0:51:13", "throughput": 19848.85, "total_tokens": 184804224}
|
|
{"current_steps": 58725, "total_steps": 78105, "loss": 0.1646, "lr": 8.806447235817633e-07, "epoch": 3.7593623967735743, "percentage": 75.19, "elapsed_time": "2:35:11", "remaining_time": "0:51:12", "throughput": 19849.08, "total_tokens": 184819584}
|
|
{"current_steps": 58730, "total_steps": 78105, "loss": 0.1719, "lr": 8.802191493221638e-07, "epoch": 3.759682478714551, "percentage": 75.19, "elapsed_time": "2:35:11", "remaining_time": "0:51:11", "throughput": 19849.31, "total_tokens": 184835200}
|
|
{"current_steps": 58735, "total_steps": 78105, "loss": 0.1877, "lr": 8.79793655945981e-07, "epoch": 3.760002560655528, "percentage": 75.2, "elapsed_time": "2:35:12", "remaining_time": "0:51:11", "throughput": 19849.54, "total_tokens": 184850816}
|
|
{"current_steps": 58740, "total_steps": 78105, "loss": 0.1455, "lr": 8.793682434744647e-07, "epoch": 3.760322642596505, "percentage": 75.21, "elapsed_time": "2:35:13", "remaining_time": "0:51:10", "throughput": 19849.75, "total_tokens": 184866048}
|
|
{"current_steps": 58745, "total_steps": 78105, "loss": 0.2206, "lr": 8.789429119288564e-07, "epoch": 3.7606427245374814, "percentage": 75.21, "elapsed_time": "2:35:13", "remaining_time": "0:51:09", "throughput": 19849.95, "total_tokens": 184880960}
|
|
{"current_steps": 58750, "total_steps": 78105, "loss": 0.1905, "lr": 8.785176613303955e-07, "epoch": 3.7609628064784584, "percentage": 75.22, "elapsed_time": "2:35:14", "remaining_time": "0:51:08", "throughput": 19850.22, "total_tokens": 184897344}
|
|
{"current_steps": 58755, "total_steps": 78105, "loss": 0.162, "lr": 8.780924917003164e-07, "epoch": 3.7612828884194354, "percentage": 75.23, "elapsed_time": "2:35:15", "remaining_time": "0:51:07", "throughput": 19850.39, "total_tokens": 184911808}
|
|
{"current_steps": 58760, "total_steps": 78105, "loss": 0.2049, "lr": 8.776674030598494e-07, "epoch": 3.7616029703604124, "percentage": 75.23, "elapsed_time": "2:35:15", "remaining_time": "0:51:06", "throughput": 19850.62, "total_tokens": 184926976}
|
|
{"current_steps": 58765, "total_steps": 78105, "loss": 0.1463, "lr": 8.772423954302237e-07, "epoch": 3.7619230523013893, "percentage": 75.24, "elapsed_time": "2:35:16", "remaining_time": "0:51:06", "throughput": 19850.84, "total_tokens": 184942656}
|
|
{"current_steps": 58770, "total_steps": 78105, "loss": 0.1993, "lr": 8.768174688326583e-07, "epoch": 3.762243134242366, "percentage": 75.24, "elapsed_time": "2:35:17", "remaining_time": "0:51:05", "throughput": 19851.12, "total_tokens": 184959232}
|
|
{"current_steps": 58775, "total_steps": 78105, "loss": 0.1141, "lr": 8.763926232883743e-07, "epoch": 3.762563216183343, "percentage": 75.25, "elapsed_time": "2:35:17", "remaining_time": "0:51:04", "throughput": 19851.35, "total_tokens": 184974592}
|
|
{"current_steps": 58780, "total_steps": 78105, "loss": 0.1266, "lr": 8.75967858818586e-07, "epoch": 3.76288329812432, "percentage": 75.26, "elapsed_time": "2:35:18", "remaining_time": "0:51:03", "throughput": 19851.54, "total_tokens": 184989632}
|
|
{"current_steps": 58785, "total_steps": 78105, "loss": 0.2549, "lr": 8.755431754445035e-07, "epoch": 3.763203380065297, "percentage": 75.26, "elapsed_time": "2:35:19", "remaining_time": "0:51:02", "throughput": 19851.83, "total_tokens": 185006464}
|
|
{"current_steps": 58790, "total_steps": 78105, "loss": 0.1331, "lr": 8.751185731873329e-07, "epoch": 3.7635234620062734, "percentage": 75.27, "elapsed_time": "2:35:20", "remaining_time": "0:51:02", "throughput": 19852.03, "total_tokens": 185021376}
|
|
{"current_steps": 58795, "total_steps": 78105, "loss": 0.2821, "lr": 8.746940520682775e-07, "epoch": 3.7638435439472504, "percentage": 75.28, "elapsed_time": "2:35:20", "remaining_time": "0:51:01", "throughput": 19852.23, "total_tokens": 185036032}
|
|
{"current_steps": 58800, "total_steps": 78105, "loss": 0.1156, "lr": 8.742696121085343e-07, "epoch": 3.7641636258882274, "percentage": 75.28, "elapsed_time": "2:35:21", "remaining_time": "0:51:00", "throughput": 19852.46, "total_tokens": 185051520}
|
|
{"current_steps": 58805, "total_steps": 78105, "loss": 0.1415, "lr": 8.738452533293002e-07, "epoch": 3.7644837078292044, "percentage": 75.29, "elapsed_time": "2:35:22", "remaining_time": "0:50:59", "throughput": 19852.73, "total_tokens": 185067776}
|
|
{"current_steps": 58810, "total_steps": 78105, "loss": 0.1482, "lr": 8.73420975751762e-07, "epoch": 3.7648037897701814, "percentage": 75.3, "elapsed_time": "2:35:22", "remaining_time": "0:50:58", "throughput": 19852.93, "total_tokens": 185082944}
|
|
{"current_steps": 58815, "total_steps": 78105, "loss": 0.1886, "lr": 8.72996779397109e-07, "epoch": 3.765123871711158, "percentage": 75.3, "elapsed_time": "2:35:23", "remaining_time": "0:50:57", "throughput": 19853.19, "total_tokens": 185098944}
|
|
{"current_steps": 58820, "total_steps": 78105, "loss": 0.178, "lr": 8.725726642865215e-07, "epoch": 3.765443953652135, "percentage": 75.31, "elapsed_time": "2:35:24", "remaining_time": "0:50:57", "throughput": 19853.34, "total_tokens": 185112896}
|
|
{"current_steps": 58825, "total_steps": 78105, "loss": 0.2024, "lr": 8.721486304411786e-07, "epoch": 3.765764035593112, "percentage": 75.32, "elapsed_time": "2:35:24", "remaining_time": "0:50:56", "throughput": 19853.52, "total_tokens": 185127296}
|
|
{"current_steps": 58830, "total_steps": 78105, "loss": 0.1694, "lr": 8.717246778822538e-07, "epoch": 3.766084117534089, "percentage": 75.32, "elapsed_time": "2:35:25", "remaining_time": "0:50:55", "throughput": 19853.75, "total_tokens": 185143040}
|
|
{"current_steps": 58835, "total_steps": 78105, "loss": 0.1773, "lr": 8.713008066309164e-07, "epoch": 3.7664041994750654, "percentage": 75.33, "elapsed_time": "2:35:26", "remaining_time": "0:50:54", "throughput": 19854.07, "total_tokens": 185160768}
|
|
{"current_steps": 58840, "total_steps": 78105, "loss": 0.0715, "lr": 8.708770167083346e-07, "epoch": 3.7667242814160424, "percentage": 75.33, "elapsed_time": "2:35:26", "remaining_time": "0:50:53", "throughput": 19854.28, "total_tokens": 185175936}
|
|
{"current_steps": 58845, "total_steps": 78105, "loss": 0.1279, "lr": 8.704533081356676e-07, "epoch": 3.7670443633570194, "percentage": 75.34, "elapsed_time": "2:35:27", "remaining_time": "0:50:52", "throughput": 19854.48, "total_tokens": 185190720}
|
|
{"current_steps": 58850, "total_steps": 78105, "loss": 0.1493, "lr": 8.700296809340756e-07, "epoch": 3.7673644452979964, "percentage": 75.35, "elapsed_time": "2:35:28", "remaining_time": "0:50:52", "throughput": 19854.75, "total_tokens": 185207296}
|
|
{"current_steps": 58855, "total_steps": 78105, "loss": 0.2158, "lr": 8.696061351247095e-07, "epoch": 3.7676845272389734, "percentage": 75.35, "elapsed_time": "2:35:28", "remaining_time": "0:50:51", "throughput": 19854.96, "total_tokens": 185222272}
|
|
{"current_steps": 58860, "total_steps": 78105, "loss": 0.129, "lr": 8.691826707287215e-07, "epoch": 3.76800460917995, "percentage": 75.36, "elapsed_time": "2:35:29", "remaining_time": "0:50:50", "throughput": 19855.19, "total_tokens": 185237760}
|
|
{"current_steps": 58865, "total_steps": 78105, "loss": 0.1215, "lr": 8.687592877672566e-07, "epoch": 3.768324691120927, "percentage": 75.37, "elapsed_time": "2:35:30", "remaining_time": "0:50:49", "throughput": 19855.41, "total_tokens": 185252992}
|
|
{"current_steps": 58870, "total_steps": 78105, "loss": 0.204, "lr": 8.683359862614548e-07, "epoch": 3.768644773061904, "percentage": 75.37, "elapsed_time": "2:35:30", "remaining_time": "0:50:48", "throughput": 19855.63, "total_tokens": 185268288}
|
|
{"current_steps": 58875, "total_steps": 78105, "loss": 0.1527, "lr": 8.679127662324569e-07, "epoch": 3.7689648550028805, "percentage": 75.38, "elapsed_time": "2:35:31", "remaining_time": "0:50:47", "throughput": 19855.92, "total_tokens": 185285312}
|
|
{"current_steps": 58880, "total_steps": 78105, "loss": 0.1456, "lr": 8.674896277013922e-07, "epoch": 3.7692849369438575, "percentage": 75.39, "elapsed_time": "2:35:32", "remaining_time": "0:50:47", "throughput": 19856.17, "total_tokens": 185301312}
|
|
{"current_steps": 58885, "total_steps": 78105, "loss": 0.1527, "lr": 8.67066570689393e-07, "epoch": 3.7696050188848345, "percentage": 75.39, "elapsed_time": "2:35:32", "remaining_time": "0:50:46", "throughput": 19856.36, "total_tokens": 185315776}
|
|
{"current_steps": 58890, "total_steps": 78105, "loss": 0.114, "lr": 8.666435952175837e-07, "epoch": 3.7699251008258114, "percentage": 75.4, "elapsed_time": "2:35:33", "remaining_time": "0:50:45", "throughput": 19856.63, "total_tokens": 185332032}
|
|
{"current_steps": 58895, "total_steps": 78105, "loss": 0.1257, "lr": 8.662207013070853e-07, "epoch": 3.7702451827667884, "percentage": 75.4, "elapsed_time": "2:35:34", "remaining_time": "0:50:44", "throughput": 19856.85, "total_tokens": 185347456}
|
|
{"current_steps": 58900, "total_steps": 78105, "loss": 0.169, "lr": 8.65797888979015e-07, "epoch": 3.7705652647077654, "percentage": 75.41, "elapsed_time": "2:35:34", "remaining_time": "0:50:43", "throughput": 19857.13, "total_tokens": 185363904}
|
|
{"current_steps": 58905, "total_steps": 78105, "loss": 0.1429, "lr": 8.653751582544859e-07, "epoch": 3.770885346648742, "percentage": 75.42, "elapsed_time": "2:35:35", "remaining_time": "0:50:42", "throughput": 19857.36, "total_tokens": 185379392}
|
|
{"current_steps": 58910, "total_steps": 78105, "loss": 0.1909, "lr": 8.649525091546071e-07, "epoch": 3.771205428589719, "percentage": 75.42, "elapsed_time": "2:35:36", "remaining_time": "0:50:42", "throughput": 19857.66, "total_tokens": 185395968}
|
|
{"current_steps": 58915, "total_steps": 78105, "loss": 0.2106, "lr": 8.645299417004832e-07, "epoch": 3.771525510530696, "percentage": 75.43, "elapsed_time": "2:35:36", "remaining_time": "0:50:41", "throughput": 19857.88, "total_tokens": 185410944}
|
|
{"current_steps": 58920, "total_steps": 78105, "loss": 0.1437, "lr": 8.641074559132142e-07, "epoch": 3.7718455924716725, "percentage": 75.44, "elapsed_time": "2:35:37", "remaining_time": "0:50:40", "throughput": 19858.12, "total_tokens": 185426816}
|
|
{"current_steps": 58925, "total_steps": 78105, "loss": 0.2352, "lr": 8.636850518138997e-07, "epoch": 3.7721656744126495, "percentage": 75.44, "elapsed_time": "2:35:38", "remaining_time": "0:50:39", "throughput": 19858.32, "total_tokens": 185441600}
|
|
{"current_steps": 58930, "total_steps": 78105, "loss": 0.1437, "lr": 8.632627294236287e-07, "epoch": 3.7724857563536265, "percentage": 75.45, "elapsed_time": "2:35:38", "remaining_time": "0:50:38", "throughput": 19858.52, "total_tokens": 185456192}
|
|
{"current_steps": 58935, "total_steps": 78105, "loss": 0.1591, "lr": 8.628404887634926e-07, "epoch": 3.7728058382946035, "percentage": 75.46, "elapsed_time": "2:35:39", "remaining_time": "0:50:37", "throughput": 19858.76, "total_tokens": 185471680}
|
|
{"current_steps": 58940, "total_steps": 78105, "loss": 0.1462, "lr": 8.624183298545744e-07, "epoch": 3.7731259202355805, "percentage": 75.46, "elapsed_time": "2:35:40", "remaining_time": "0:50:37", "throughput": 19858.98, "total_tokens": 185486528}
|
|
{"current_steps": 58945, "total_steps": 78105, "loss": 0.1322, "lr": 8.619962527179551e-07, "epoch": 3.7734460021765575, "percentage": 75.47, "elapsed_time": "2:35:40", "remaining_time": "0:50:36", "throughput": 19859.21, "total_tokens": 185502080}
|
|
{"current_steps": 58950, "total_steps": 78105, "loss": 0.2411, "lr": 8.61574257374711e-07, "epoch": 3.773766084117534, "percentage": 75.48, "elapsed_time": "2:35:41", "remaining_time": "0:50:35", "throughput": 19859.49, "total_tokens": 185518976}
|
|
{"current_steps": 58955, "total_steps": 78105, "loss": 0.1189, "lr": 8.611523438459135e-07, "epoch": 3.774086166058511, "percentage": 75.48, "elapsed_time": "2:35:42", "remaining_time": "0:50:34", "throughput": 19859.71, "total_tokens": 185534016}
|
|
{"current_steps": 58960, "total_steps": 78105, "loss": 0.1929, "lr": 8.60730512152633e-07, "epoch": 3.774406247999488, "percentage": 75.49, "elapsed_time": "2:35:42", "remaining_time": "0:50:33", "throughput": 19860.01, "total_tokens": 185550720}
|
|
{"current_steps": 58965, "total_steps": 78105, "loss": 0.1363, "lr": 8.603087623159303e-07, "epoch": 3.7747263299404645, "percentage": 75.49, "elapsed_time": "2:35:43", "remaining_time": "0:50:32", "throughput": 19860.24, "total_tokens": 185566080}
|
|
{"current_steps": 58970, "total_steps": 78105, "loss": 0.1328, "lr": 8.598870943568682e-07, "epoch": 3.7750464118814415, "percentage": 75.5, "elapsed_time": "2:35:44", "remaining_time": "0:50:32", "throughput": 19860.49, "total_tokens": 185581824}
|
|
{"current_steps": 58975, "total_steps": 78105, "loss": 0.2467, "lr": 8.594655082965017e-07, "epoch": 3.7753664938224185, "percentage": 75.51, "elapsed_time": "2:35:44", "remaining_time": "0:50:31", "throughput": 19860.72, "total_tokens": 185597056}
|
|
{"current_steps": 58980, "total_steps": 78105, "loss": 0.1799, "lr": 8.590440041558823e-07, "epoch": 3.7756865757633955, "percentage": 75.51, "elapsed_time": "2:35:45", "remaining_time": "0:50:30", "throughput": 19860.91, "total_tokens": 185611584}
|
|
{"current_steps": 58985, "total_steps": 78105, "loss": 0.2062, "lr": 8.586225819560578e-07, "epoch": 3.7760066577043725, "percentage": 75.52, "elapsed_time": "2:35:46", "remaining_time": "0:50:29", "throughput": 19861.16, "total_tokens": 185627712}
|
|
{"current_steps": 58990, "total_steps": 78105, "loss": 0.165, "lr": 8.582012417180713e-07, "epoch": 3.7763267396453495, "percentage": 75.53, "elapsed_time": "2:35:46", "remaining_time": "0:50:28", "throughput": 19861.42, "total_tokens": 185643520}
|
|
{"current_steps": 58995, "total_steps": 78105, "loss": 0.1311, "lr": 8.577799834629647e-07, "epoch": 3.776646821586326, "percentage": 75.53, "elapsed_time": "2:35:47", "remaining_time": "0:50:27", "throughput": 19861.65, "total_tokens": 185659008}
|
|
{"current_steps": 59000, "total_steps": 78105, "loss": 0.1303, "lr": 8.573588072117697e-07, "epoch": 3.776966903527303, "percentage": 75.54, "elapsed_time": "2:35:48", "remaining_time": "0:50:27", "throughput": 19861.89, "total_tokens": 185674368}
|
|
{"current_steps": 59005, "total_steps": 78105, "loss": 0.1685, "lr": 8.569377129855208e-07, "epoch": 3.77728698546828, "percentage": 75.55, "elapsed_time": "2:35:48", "remaining_time": "0:50:26", "throughput": 19862.16, "total_tokens": 185690816}
|
|
{"current_steps": 59010, "total_steps": 78105, "loss": 0.1551, "lr": 8.565167008052438e-07, "epoch": 3.7776070674092566, "percentage": 75.55, "elapsed_time": "2:35:49", "remaining_time": "0:50:25", "throughput": 19862.36, "total_tokens": 185705408}
|
|
{"current_steps": 59015, "total_steps": 78105, "loss": 0.1196, "lr": 8.560957706919626e-07, "epoch": 3.7779271493502335, "percentage": 75.56, "elapsed_time": "2:35:50", "remaining_time": "0:50:24", "throughput": 19862.58, "total_tokens": 185720448}
|
|
{"current_steps": 59020, "total_steps": 78105, "loss": 0.1554, "lr": 8.556749226666955e-07, "epoch": 3.7782472312912105, "percentage": 75.56, "elapsed_time": "2:35:50", "remaining_time": "0:50:23", "throughput": 19862.89, "total_tokens": 185737344}
|
|
{"current_steps": 59025, "total_steps": 78105, "loss": 0.1578, "lr": 8.55254156750458e-07, "epoch": 3.7785673132321875, "percentage": 75.57, "elapsed_time": "2:35:51", "remaining_time": "0:50:22", "throughput": 19863.21, "total_tokens": 185754624}
|
|
{"current_steps": 59030, "total_steps": 78105, "loss": 0.1406, "lr": 8.548334729642599e-07, "epoch": 3.7788873951731645, "percentage": 75.58, "elapsed_time": "2:35:52", "remaining_time": "0:50:22", "throughput": 19863.41, "total_tokens": 185769344}
|
|
{"current_steps": 59035, "total_steps": 78105, "loss": 0.1151, "lr": 8.544128713291102e-07, "epoch": 3.779207477114141, "percentage": 75.58, "elapsed_time": "2:35:53", "remaining_time": "0:50:21", "throughput": 19863.64, "total_tokens": 185784704}
|
|
{"current_steps": 59040, "total_steps": 78105, "loss": 0.2172, "lr": 8.539923518660087e-07, "epoch": 3.779527559055118, "percentage": 75.59, "elapsed_time": "2:35:53", "remaining_time": "0:50:20", "throughput": 19863.88, "total_tokens": 185799872}
|
|
{"current_steps": 59045, "total_steps": 78105, "loss": 0.2514, "lr": 8.535719145959561e-07, "epoch": 3.779847640996095, "percentage": 75.6, "elapsed_time": "2:35:54", "remaining_time": "0:50:19", "throughput": 19864.12, "total_tokens": 185815424}
|
|
{"current_steps": 59050, "total_steps": 78105, "loss": 0.2081, "lr": 8.531515595399464e-07, "epoch": 3.780167722937072, "percentage": 75.6, "elapsed_time": "2:35:54", "remaining_time": "0:50:18", "throughput": 19864.35, "total_tokens": 185830848}
|
|
{"current_steps": 59055, "total_steps": 78105, "loss": 0.1937, "lr": 8.527312867189694e-07, "epoch": 3.7804878048780486, "percentage": 75.61, "elapsed_time": "2:35:55", "remaining_time": "0:50:17", "throughput": 19864.57, "total_tokens": 185845760}
|
|
{"current_steps": 59060, "total_steps": 78105, "loss": 0.2577, "lr": 8.523110961540118e-07, "epoch": 3.7808078868190256, "percentage": 75.62, "elapsed_time": "2:35:56", "remaining_time": "0:50:17", "throughput": 19864.79, "total_tokens": 185861056}
|
|
{"current_steps": 59065, "total_steps": 78105, "loss": 0.1585, "lr": 8.518909878660547e-07, "epoch": 3.7811279687600026, "percentage": 75.62, "elapsed_time": "2:35:56", "remaining_time": "0:50:16", "throughput": 19865.04, "total_tokens": 185876928}
|
|
{"current_steps": 59070, "total_steps": 78105, "loss": 0.1739, "lr": 8.514709618760789e-07, "epoch": 3.7814480507009796, "percentage": 75.63, "elapsed_time": "2:35:57", "remaining_time": "0:50:15", "throughput": 19865.27, "total_tokens": 185892544}
|
|
{"current_steps": 59075, "total_steps": 78105, "loss": 0.1742, "lr": 8.510510182050544e-07, "epoch": 3.7817681326419565, "percentage": 75.64, "elapsed_time": "2:35:58", "remaining_time": "0:50:14", "throughput": 19865.55, "total_tokens": 185908608}
|
|
{"current_steps": 59080, "total_steps": 78105, "loss": 0.1917, "lr": 8.506311568739547e-07, "epoch": 3.782088214582933, "percentage": 75.64, "elapsed_time": "2:35:59", "remaining_time": "0:50:13", "throughput": 19865.81, "total_tokens": 185924800}
|
|
{"current_steps": 59085, "total_steps": 78105, "loss": 0.1868, "lr": 8.502113779037421e-07, "epoch": 3.78240829652391, "percentage": 75.65, "elapsed_time": "2:35:59", "remaining_time": "0:50:12", "throughput": 19866.09, "total_tokens": 185940864}
|
|
{"current_steps": 59090, "total_steps": 78105, "loss": 0.1681, "lr": 8.497916813153809e-07, "epoch": 3.782728378464887, "percentage": 75.65, "elapsed_time": "2:36:00", "remaining_time": "0:50:12", "throughput": 19866.35, "total_tokens": 185956928}
|
|
{"current_steps": 59095, "total_steps": 78105, "loss": 0.1297, "lr": 8.493720671298272e-07, "epoch": 3.783048460405864, "percentage": 75.66, "elapsed_time": "2:36:01", "remaining_time": "0:50:11", "throughput": 19866.6, "total_tokens": 185972864}
|
|
{"current_steps": 59100, "total_steps": 78105, "loss": 0.2017, "lr": 8.489525353680348e-07, "epoch": 3.7833685423468406, "percentage": 75.67, "elapsed_time": "2:36:01", "remaining_time": "0:50:10", "throughput": 19866.83, "total_tokens": 185988416}
|
|
{"current_steps": 59105, "total_steps": 78105, "loss": 0.1072, "lr": 8.485330860509527e-07, "epoch": 3.7836886242878176, "percentage": 75.67, "elapsed_time": "2:36:02", "remaining_time": "0:50:09", "throughput": 19867.07, "total_tokens": 186004224}
|
|
{"current_steps": 59110, "total_steps": 78105, "loss": 0.1486, "lr": 8.481137191995254e-07, "epoch": 3.7840087062287946, "percentage": 75.68, "elapsed_time": "2:36:03", "remaining_time": "0:50:08", "throughput": 19867.31, "total_tokens": 186019584}
|
|
{"current_steps": 59115, "total_steps": 78105, "loss": 0.1568, "lr": 8.476944348346955e-07, "epoch": 3.7843287881697716, "percentage": 75.69, "elapsed_time": "2:36:03", "remaining_time": "0:50:07", "throughput": 19867.52, "total_tokens": 186034752}
|
|
{"current_steps": 59120, "total_steps": 78105, "loss": 0.1745, "lr": 8.472752329773989e-07, "epoch": 3.7846488701107486, "percentage": 75.69, "elapsed_time": "2:36:04", "remaining_time": "0:50:07", "throughput": 19867.74, "total_tokens": 186049984}
|
|
{"current_steps": 59125, "total_steps": 78105, "loss": 0.1704, "lr": 8.468561136485684e-07, "epoch": 3.784968952051725, "percentage": 75.7, "elapsed_time": "2:36:05", "remaining_time": "0:50:06", "throughput": 19867.97, "total_tokens": 186065472}
|
|
{"current_steps": 59130, "total_steps": 78105, "loss": 0.1607, "lr": 8.464370768691327e-07, "epoch": 3.785289033992702, "percentage": 75.71, "elapsed_time": "2:36:05", "remaining_time": "0:50:05", "throughput": 19868.25, "total_tokens": 186081920}
|
|
{"current_steps": 59135, "total_steps": 78105, "loss": 0.1746, "lr": 8.460181226600161e-07, "epoch": 3.785609115933679, "percentage": 75.71, "elapsed_time": "2:36:06", "remaining_time": "0:50:04", "throughput": 19868.54, "total_tokens": 186098880}
|
|
{"current_steps": 59140, "total_steps": 78105, "loss": 0.1381, "lr": 8.455992510421396e-07, "epoch": 3.7859291978746556, "percentage": 75.72, "elapsed_time": "2:36:07", "remaining_time": "0:50:03", "throughput": 19868.79, "total_tokens": 186114752}
|
|
{"current_steps": 59145, "total_steps": 78105, "loss": 0.1937, "lr": 8.451804620364187e-07, "epoch": 3.7862492798156326, "percentage": 75.72, "elapsed_time": "2:36:07", "remaining_time": "0:50:03", "throughput": 19869.02, "total_tokens": 186130304}
|
|
{"current_steps": 59150, "total_steps": 78105, "loss": 0.2016, "lr": 8.447617556637652e-07, "epoch": 3.7865693617566096, "percentage": 75.73, "elapsed_time": "2:36:08", "remaining_time": "0:50:02", "throughput": 19869.27, "total_tokens": 186145984}
|
|
{"current_steps": 59155, "total_steps": 78105, "loss": 0.1861, "lr": 8.443431319450893e-07, "epoch": 3.7868894436975866, "percentage": 75.74, "elapsed_time": "2:36:09", "remaining_time": "0:50:01", "throughput": 19869.52, "total_tokens": 186162240}
|
|
{"current_steps": 59160, "total_steps": 78105, "loss": 0.13, "lr": 8.439245909012919e-07, "epoch": 3.7872095256385636, "percentage": 75.74, "elapsed_time": "2:36:09", "remaining_time": "0:50:00", "throughput": 19869.81, "total_tokens": 186179200}
|
|
{"current_steps": 59165, "total_steps": 78105, "loss": 0.1823, "lr": 8.435061325532751e-07, "epoch": 3.7875296075795406, "percentage": 75.75, "elapsed_time": "2:36:10", "remaining_time": "0:49:59", "throughput": 19870.01, "total_tokens": 186193920}
|
|
{"current_steps": 59170, "total_steps": 78105, "loss": 0.1573, "lr": 8.430877569219334e-07, "epoch": 3.787849689520517, "percentage": 75.76, "elapsed_time": "2:36:11", "remaining_time": "0:49:58", "throughput": 19870.23, "total_tokens": 186209216}
|
|
{"current_steps": 59175, "total_steps": 78105, "loss": 0.1624, "lr": 8.426694640281588e-07, "epoch": 3.788169771461494, "percentage": 75.76, "elapsed_time": "2:36:11", "remaining_time": "0:49:58", "throughput": 19870.48, "total_tokens": 186225216}
|
|
{"current_steps": 59180, "total_steps": 78105, "loss": 0.1514, "lr": 8.422512538928384e-07, "epoch": 3.788489853402471, "percentage": 75.77, "elapsed_time": "2:36:12", "remaining_time": "0:49:57", "throughput": 19870.7, "total_tokens": 186240832}
|
|
{"current_steps": 59185, "total_steps": 78105, "loss": 0.1732, "lr": 8.418331265368548e-07, "epoch": 3.7888099353434477, "percentage": 75.78, "elapsed_time": "2:36:13", "remaining_time": "0:49:56", "throughput": 19870.92, "total_tokens": 186256128}
|
|
{"current_steps": 59190, "total_steps": 78105, "loss": 0.1985, "lr": 8.414150819810893e-07, "epoch": 3.7891300172844247, "percentage": 75.78, "elapsed_time": "2:36:14", "remaining_time": "0:49:55", "throughput": 19871.23, "total_tokens": 186273600}
|
|
{"current_steps": 59195, "total_steps": 78105, "loss": 0.1339, "lr": 8.409971202464138e-07, "epoch": 3.7894500992254017, "percentage": 75.79, "elapsed_time": "2:36:14", "remaining_time": "0:49:54", "throughput": 19871.51, "total_tokens": 186289920}
|
|
{"current_steps": 59200, "total_steps": 78105, "loss": 0.1111, "lr": 8.405792413537014e-07, "epoch": 3.7897701811663786, "percentage": 75.8, "elapsed_time": "2:36:15", "remaining_time": "0:49:53", "throughput": 19871.82, "total_tokens": 186306816}
|
|
{"current_steps": 59205, "total_steps": 78105, "loss": 0.18, "lr": 8.40161445323818e-07, "epoch": 3.7900902631073556, "percentage": 75.8, "elapsed_time": "2:36:16", "remaining_time": "0:49:53", "throughput": 19872.14, "total_tokens": 186324160}
|
|
{"current_steps": 59210, "total_steps": 78105, "loss": 0.136, "lr": 8.397437321776264e-07, "epoch": 3.7904103450483326, "percentage": 75.81, "elapsed_time": "2:36:16", "remaining_time": "0:49:52", "throughput": 19872.39, "total_tokens": 186340224}
|
|
{"current_steps": 59215, "total_steps": 78105, "loss": 0.1257, "lr": 8.393261019359849e-07, "epoch": 3.790730426989309, "percentage": 75.81, "elapsed_time": "2:36:17", "remaining_time": "0:49:51", "throughput": 19872.68, "total_tokens": 186356928}
|
|
{"current_steps": 59220, "total_steps": 78105, "loss": 0.2072, "lr": 8.389085546197467e-07, "epoch": 3.791050508930286, "percentage": 75.82, "elapsed_time": "2:36:18", "remaining_time": "0:49:50", "throughput": 19872.9, "total_tokens": 186372288}
|
|
{"current_steps": 59225, "total_steps": 78105, "loss": 0.164, "lr": 8.384910902497648e-07, "epoch": 3.791370590871263, "percentage": 75.83, "elapsed_time": "2:36:18", "remaining_time": "0:49:49", "throughput": 19873.15, "total_tokens": 186388160}
|
|
{"current_steps": 59230, "total_steps": 78105, "loss": 0.194, "lr": 8.380737088468815e-07, "epoch": 3.7916906728122397, "percentage": 75.83, "elapsed_time": "2:36:19", "remaining_time": "0:49:49", "throughput": 19873.44, "total_tokens": 186404800}
|
|
{"current_steps": 59235, "total_steps": 78105, "loss": 0.1868, "lr": 8.376564104319415e-07, "epoch": 3.7920107547532167, "percentage": 75.84, "elapsed_time": "2:36:20", "remaining_time": "0:49:48", "throughput": 19873.65, "total_tokens": 186419584}
|
|
{"current_steps": 59240, "total_steps": 78105, "loss": 0.186, "lr": 8.372391950257816e-07, "epoch": 3.7923308366941937, "percentage": 75.85, "elapsed_time": "2:36:20", "remaining_time": "0:49:47", "throughput": 19873.88, "total_tokens": 186434816}
|
|
{"current_steps": 59245, "total_steps": 78105, "loss": 0.176, "lr": 8.368220626492352e-07, "epoch": 3.7926509186351707, "percentage": 75.85, "elapsed_time": "2:36:21", "remaining_time": "0:49:46", "throughput": 19874.14, "total_tokens": 186451136}
|
|
{"current_steps": 59250, "total_steps": 78105, "loss": 0.1469, "lr": 8.364050133231319e-07, "epoch": 3.7929710005761477, "percentage": 75.86, "elapsed_time": "2:36:22", "remaining_time": "0:49:45", "throughput": 19874.37, "total_tokens": 186466624}
|
|
{"current_steps": 59255, "total_steps": 78105, "loss": 0.2088, "lr": 8.359880470682969e-07, "epoch": 3.7932910825171247, "percentage": 75.87, "elapsed_time": "2:36:22", "remaining_time": "0:49:44", "throughput": 19874.6, "total_tokens": 186482048}
|
|
{"current_steps": 59260, "total_steps": 78105, "loss": 0.1554, "lr": 8.355711639055502e-07, "epoch": 3.793611164458101, "percentage": 75.87, "elapsed_time": "2:36:23", "remaining_time": "0:49:44", "throughput": 19874.84, "total_tokens": 186497856}
|
|
{"current_steps": 59265, "total_steps": 78105, "loss": 0.1703, "lr": 8.351543638557119e-07, "epoch": 3.793931246399078, "percentage": 75.88, "elapsed_time": "2:36:24", "remaining_time": "0:49:43", "throughput": 19875.11, "total_tokens": 186514176}
|
|
{"current_steps": 59270, "total_steps": 78105, "loss": 0.2763, "lr": 8.34737646939591e-07, "epoch": 3.794251328340055, "percentage": 75.89, "elapsed_time": "2:36:24", "remaining_time": "0:49:42", "throughput": 19875.32, "total_tokens": 186529344}
|
|
{"current_steps": 59275, "total_steps": 78105, "loss": 0.2097, "lr": 8.343210131779997e-07, "epoch": 3.7945714102810317, "percentage": 75.89, "elapsed_time": "2:36:25", "remaining_time": "0:49:41", "throughput": 19875.61, "total_tokens": 186546304}
|
|
{"current_steps": 59280, "total_steps": 78105, "loss": 0.1346, "lr": 8.339044625917389e-07, "epoch": 3.7948914922220087, "percentage": 75.9, "elapsed_time": "2:36:26", "remaining_time": "0:49:40", "throughput": 19875.84, "total_tokens": 186561728}
|
|
{"current_steps": 59285, "total_steps": 78105, "loss": 0.0934, "lr": 8.33487995201612e-07, "epoch": 3.7952115741629857, "percentage": 75.9, "elapsed_time": "2:36:27", "remaining_time": "0:49:39", "throughput": 19876.07, "total_tokens": 186577280}
|
|
{"current_steps": 59290, "total_steps": 78105, "loss": 0.2336, "lr": 8.330716110284137e-07, "epoch": 3.7955316561039627, "percentage": 75.91, "elapsed_time": "2:36:27", "remaining_time": "0:49:39", "throughput": 19876.36, "total_tokens": 186594048}
|
|
{"current_steps": 59295, "total_steps": 78105, "loss": 0.182, "lr": 8.32655310092936e-07, "epoch": 3.7958517380449397, "percentage": 75.92, "elapsed_time": "2:36:28", "remaining_time": "0:49:38", "throughput": 19876.62, "total_tokens": 186610432}
|
|
{"current_steps": 59300, "total_steps": 78105, "loss": 0.1275, "lr": 8.322390924159688e-07, "epoch": 3.7961718199859162, "percentage": 75.92, "elapsed_time": "2:36:29", "remaining_time": "0:49:37", "throughput": 19876.81, "total_tokens": 186625024}
|
|
{"current_steps": 59305, "total_steps": 78105, "loss": 0.2067, "lr": 8.318229580182924e-07, "epoch": 3.7964919019268932, "percentage": 75.93, "elapsed_time": "2:36:29", "remaining_time": "0:49:36", "throughput": 19877.05, "total_tokens": 186640256}
|
|
{"current_steps": 59310, "total_steps": 78105, "loss": 0.1439, "lr": 8.314069069206901e-07, "epoch": 3.79681198386787, "percentage": 75.94, "elapsed_time": "2:36:30", "remaining_time": "0:49:35", "throughput": 19877.33, "total_tokens": 186657152}
|
|
{"current_steps": 59315, "total_steps": 78105, "loss": 0.2074, "lr": 8.309909391439339e-07, "epoch": 3.797132065808847, "percentage": 75.94, "elapsed_time": "2:36:31", "remaining_time": "0:49:34", "throughput": 19877.57, "total_tokens": 186672640}
|
|
{"current_steps": 59320, "total_steps": 78105, "loss": 0.1296, "lr": 8.305750547087974e-07, "epoch": 3.7974521477498238, "percentage": 75.95, "elapsed_time": "2:36:31", "remaining_time": "0:49:34", "throughput": 19877.87, "total_tokens": 186689664}
|
|
{"current_steps": 59325, "total_steps": 78105, "loss": 0.174, "lr": 8.301592536360467e-07, "epoch": 3.7977722296908007, "percentage": 75.96, "elapsed_time": "2:36:32", "remaining_time": "0:49:33", "throughput": 19878.05, "total_tokens": 186704320}
|
|
{"current_steps": 59330, "total_steps": 78105, "loss": 0.2151, "lr": 8.297435359464453e-07, "epoch": 3.7980923116317777, "percentage": 75.96, "elapsed_time": "2:36:33", "remaining_time": "0:49:32", "throughput": 19878.25, "total_tokens": 186719040}
|
|
{"current_steps": 59335, "total_steps": 78105, "loss": 0.1068, "lr": 8.293279016607514e-07, "epoch": 3.7984123935727547, "percentage": 75.97, "elapsed_time": "2:36:33", "remaining_time": "0:49:31", "throughput": 19878.53, "total_tokens": 186735680}
|
|
{"current_steps": 59340, "total_steps": 78105, "loss": 0.1465, "lr": 8.289123507997193e-07, "epoch": 3.7987324755137317, "percentage": 75.97, "elapsed_time": "2:36:34", "remaining_time": "0:49:30", "throughput": 19878.74, "total_tokens": 186750720}
|
|
{"current_steps": 59345, "total_steps": 78105, "loss": 0.1444, "lr": 8.284968833841009e-07, "epoch": 3.7990525574547083, "percentage": 75.98, "elapsed_time": "2:36:35", "remaining_time": "0:49:29", "throughput": 19879.0, "total_tokens": 186766720}
|
|
{"current_steps": 59350, "total_steps": 78105, "loss": 0.1979, "lr": 8.280814994346415e-07, "epoch": 3.7993726393956853, "percentage": 75.99, "elapsed_time": "2:36:35", "remaining_time": "0:49:29", "throughput": 19879.32, "total_tokens": 186783808}
|
|
{"current_steps": 59355, "total_steps": 78105, "loss": 0.1956, "lr": 8.276661989720833e-07, "epoch": 3.7996927213366622, "percentage": 75.99, "elapsed_time": "2:36:36", "remaining_time": "0:49:28", "throughput": 19879.56, "total_tokens": 186799488}
|
|
{"current_steps": 59360, "total_steps": 78105, "loss": 0.1846, "lr": 8.27250982017164e-07, "epoch": 3.8000128032776392, "percentage": 76.0, "elapsed_time": "2:36:37", "remaining_time": "0:49:27", "throughput": 19879.82, "total_tokens": 186815488}
|
|
{"current_steps": 59365, "total_steps": 78105, "loss": 0.1437, "lr": 8.268358485906181e-07, "epoch": 3.800332885218616, "percentage": 76.01, "elapsed_time": "2:36:37", "remaining_time": "0:49:26", "throughput": 19879.98, "total_tokens": 186829632}
|
|
{"current_steps": 59370, "total_steps": 78105, "loss": 0.109, "lr": 8.264207987131745e-07, "epoch": 3.8006529671595928, "percentage": 76.01, "elapsed_time": "2:36:38", "remaining_time": "0:49:25", "throughput": 19880.19, "total_tokens": 186844736}
|
|
{"current_steps": 59375, "total_steps": 78105, "loss": 0.1567, "lr": 8.260058324055586e-07, "epoch": 3.8009730491005698, "percentage": 76.02, "elapsed_time": "2:36:39", "remaining_time": "0:49:25", "throughput": 19880.43, "total_tokens": 186860416}
|
|
{"current_steps": 59380, "total_steps": 78105, "loss": 0.2166, "lr": 8.255909496884914e-07, "epoch": 3.8012931310415468, "percentage": 76.03, "elapsed_time": "2:36:39", "remaining_time": "0:49:24", "throughput": 19880.66, "total_tokens": 186875776}
|
|
{"current_steps": 59385, "total_steps": 78105, "loss": 0.2555, "lr": 8.251761505826922e-07, "epoch": 3.8016132129825237, "percentage": 76.03, "elapsed_time": "2:36:40", "remaining_time": "0:49:23", "throughput": 19880.9, "total_tokens": 186891648}
|
|
{"current_steps": 59390, "total_steps": 78105, "loss": 0.1712, "lr": 8.247614351088703e-07, "epoch": 3.8019332949235003, "percentage": 76.04, "elapsed_time": "2:36:41", "remaining_time": "0:49:22", "throughput": 19881.13, "total_tokens": 186907072}
|
|
{"current_steps": 59395, "total_steps": 78105, "loss": 0.0999, "lr": 8.243468032877375e-07, "epoch": 3.8022533768644773, "percentage": 76.05, "elapsed_time": "2:36:41", "remaining_time": "0:49:21", "throughput": 19881.38, "total_tokens": 186923264}
|
|
{"current_steps": 59400, "total_steps": 78105, "loss": 0.1436, "lr": 8.239322551399967e-07, "epoch": 3.8025734588054543, "percentage": 76.05, "elapsed_time": "2:36:42", "remaining_time": "0:49:20", "throughput": 19881.61, "total_tokens": 186938688}
|
|
{"current_steps": 59405, "total_steps": 78105, "loss": 0.1095, "lr": 8.235177906863492e-07, "epoch": 3.802893540746431, "percentage": 76.06, "elapsed_time": "2:36:43", "remaining_time": "0:49:20", "throughput": 19881.84, "total_tokens": 186954304}
|
|
{"current_steps": 59410, "total_steps": 78105, "loss": 0.2212, "lr": 8.231034099474905e-07, "epoch": 3.803213622687408, "percentage": 76.06, "elapsed_time": "2:36:43", "remaining_time": "0:49:19", "throughput": 19882.05, "total_tokens": 186969024}
|
|
{"current_steps": 59415, "total_steps": 78105, "loss": 0.1865, "lr": 8.226891129441122e-07, "epoch": 3.803533704628385, "percentage": 76.07, "elapsed_time": "2:36:44", "remaining_time": "0:49:18", "throughput": 19882.27, "total_tokens": 186984512}
|
|
{"current_steps": 59420, "total_steps": 78105, "loss": 0.167, "lr": 8.222748996969044e-07, "epoch": 3.803853786569362, "percentage": 76.08, "elapsed_time": "2:36:45", "remaining_time": "0:49:17", "throughput": 19882.51, "total_tokens": 187000320}
|
|
{"current_steps": 59425, "total_steps": 78105, "loss": 0.2452, "lr": 8.218607702265474e-07, "epoch": 3.804173868510339, "percentage": 76.08, "elapsed_time": "2:36:45", "remaining_time": "0:49:16", "throughput": 19882.75, "total_tokens": 187015936}
|
|
{"current_steps": 59430, "total_steps": 78105, "loss": 0.1496, "lr": 8.214467245537233e-07, "epoch": 3.8044939504513158, "percentage": 76.09, "elapsed_time": "2:36:46", "remaining_time": "0:49:15", "throughput": 19882.97, "total_tokens": 187031232}
|
|
{"current_steps": 59435, "total_steps": 78105, "loss": 0.1514, "lr": 8.210327626991066e-07, "epoch": 3.8048140323922923, "percentage": 76.1, "elapsed_time": "2:36:47", "remaining_time": "0:49:15", "throughput": 19883.17, "total_tokens": 187045824}
|
|
{"current_steps": 59440, "total_steps": 78105, "loss": 0.1598, "lr": 8.206188846833685e-07, "epoch": 3.8051341143332693, "percentage": 76.1, "elapsed_time": "2:36:47", "remaining_time": "0:49:14", "throughput": 19883.43, "total_tokens": 187062208}
|
|
{"current_steps": 59445, "total_steps": 78105, "loss": 0.1775, "lr": 8.202050905271752e-07, "epoch": 3.8054541962742463, "percentage": 76.11, "elapsed_time": "2:36:48", "remaining_time": "0:49:13", "throughput": 19883.67, "total_tokens": 187077888}
|
|
{"current_steps": 59450, "total_steps": 78105, "loss": 0.1985, "lr": 8.197913802511903e-07, "epoch": 3.805774278215223, "percentage": 76.12, "elapsed_time": "2:36:49", "remaining_time": "0:49:12", "throughput": 19883.88, "total_tokens": 187093120}
|
|
{"current_steps": 59455, "total_steps": 78105, "loss": 0.1402, "lr": 8.193777538760719e-07, "epoch": 3.8060943601562, "percentage": 76.12, "elapsed_time": "2:36:49", "remaining_time": "0:49:11", "throughput": 19884.14, "total_tokens": 187109056}
|
|
{"current_steps": 59460, "total_steps": 78105, "loss": 0.1896, "lr": 8.189642114224739e-07, "epoch": 3.806414442097177, "percentage": 76.13, "elapsed_time": "2:36:50", "remaining_time": "0:49:10", "throughput": 19884.36, "total_tokens": 187124480}
|
|
{"current_steps": 59465, "total_steps": 78105, "loss": 0.1081, "lr": 8.185507529110473e-07, "epoch": 3.806734524038154, "percentage": 76.13, "elapsed_time": "2:36:51", "remaining_time": "0:49:10", "throughput": 19884.62, "total_tokens": 187140864}
|
|
{"current_steps": 59470, "total_steps": 78105, "loss": 0.1726, "lr": 8.18137378362438e-07, "epoch": 3.807054605979131, "percentage": 76.14, "elapsed_time": "2:36:52", "remaining_time": "0:49:09", "throughput": 19884.86, "total_tokens": 187156736}
|
|
{"current_steps": 59475, "total_steps": 78105, "loss": 0.1552, "lr": 8.177240877972875e-07, "epoch": 3.807374687920108, "percentage": 76.15, "elapsed_time": "2:36:52", "remaining_time": "0:49:08", "throughput": 19885.08, "total_tokens": 187171968}
|
|
{"current_steps": 59480, "total_steps": 78105, "loss": 0.1448, "lr": 8.173108812362334e-07, "epoch": 3.8076947698610843, "percentage": 76.15, "elapsed_time": "2:36:53", "remaining_time": "0:49:07", "throughput": 19885.32, "total_tokens": 187187648}
|
|
{"current_steps": 59485, "total_steps": 78105, "loss": 0.1607, "lr": 8.168977586999091e-07, "epoch": 3.8080148518020613, "percentage": 76.16, "elapsed_time": "2:36:54", "remaining_time": "0:49:06", "throughput": 19885.6, "total_tokens": 187204416}
|
|
{"current_steps": 59490, "total_steps": 78105, "loss": 0.1403, "lr": 8.164847202089429e-07, "epoch": 3.8083349337430383, "percentage": 76.17, "elapsed_time": "2:36:54", "remaining_time": "0:49:05", "throughput": 19885.81, "total_tokens": 187219328}
|
|
{"current_steps": 59495, "total_steps": 78105, "loss": 0.1597, "lr": 8.160717657839625e-07, "epoch": 3.808655015684015, "percentage": 76.17, "elapsed_time": "2:36:55", "remaining_time": "0:49:05", "throughput": 19886.03, "total_tokens": 187234560}
|
|
{"current_steps": 59500, "total_steps": 78105, "loss": 0.1466, "lr": 8.156588954455849e-07, "epoch": 3.808975097624992, "percentage": 76.18, "elapsed_time": "2:36:56", "remaining_time": "0:49:04", "throughput": 19886.28, "total_tokens": 187250688}
|
|
{"current_steps": 59505, "total_steps": 78105, "loss": 0.1375, "lr": 8.152461092144306e-07, "epoch": 3.809295179565969, "percentage": 76.19, "elapsed_time": "2:36:56", "remaining_time": "0:49:03", "throughput": 19886.54, "total_tokens": 187266432}
|
|
{"current_steps": 59510, "total_steps": 78105, "loss": 0.1379, "lr": 8.148334071111084e-07, "epoch": 3.809615261506946, "percentage": 76.19, "elapsed_time": "2:36:57", "remaining_time": "0:49:02", "throughput": 19886.83, "total_tokens": 187283200}
|
|
{"current_steps": 59515, "total_steps": 78105, "loss": 0.2232, "lr": 8.144207891562287e-07, "epoch": 3.809935343447923, "percentage": 76.2, "elapsed_time": "2:36:58", "remaining_time": "0:49:01", "throughput": 19887.08, "total_tokens": 187299136}
|
|
{"current_steps": 59520, "total_steps": 78105, "loss": 0.146, "lr": 8.140082553703948e-07, "epoch": 3.8102554253889, "percentage": 76.21, "elapsed_time": "2:36:58", "remaining_time": "0:49:00", "throughput": 19887.29, "total_tokens": 187314240}
|
|
{"current_steps": 59525, "total_steps": 78105, "loss": 0.1601, "lr": 8.135958057742061e-07, "epoch": 3.8105755073298764, "percentage": 76.21, "elapsed_time": "2:36:59", "remaining_time": "0:49:00", "throughput": 19887.51, "total_tokens": 187329984}
|
|
{"current_steps": 59530, "total_steps": 78105, "loss": 0.1516, "lr": 8.131834403882605e-07, "epoch": 3.8108955892708534, "percentage": 76.22, "elapsed_time": "2:37:00", "remaining_time": "0:48:59", "throughput": 19887.73, "total_tokens": 187344960}
|
|
{"current_steps": 59535, "total_steps": 78105, "loss": 0.194, "lr": 8.127711592331456e-07, "epoch": 3.8112156712118304, "percentage": 76.22, "elapsed_time": "2:37:00", "remaining_time": "0:48:58", "throughput": 19887.97, "total_tokens": 187360320}
|
|
{"current_steps": 59540, "total_steps": 78105, "loss": 0.1793, "lr": 8.123589623294522e-07, "epoch": 3.811535753152807, "percentage": 76.23, "elapsed_time": "2:37:01", "remaining_time": "0:48:57", "throughput": 19888.25, "total_tokens": 187377088}
|
|
{"current_steps": 59545, "total_steps": 78105, "loss": 0.2101, "lr": 8.1194684969776e-07, "epoch": 3.811855835093784, "percentage": 76.24, "elapsed_time": "2:37:02", "remaining_time": "0:48:56", "throughput": 19888.53, "total_tokens": 187394368}
|
|
{"current_steps": 59550, "total_steps": 78105, "loss": 0.1827, "lr": 8.115348213586505e-07, "epoch": 3.812175917034761, "percentage": 76.24, "elapsed_time": "2:37:02", "remaining_time": "0:48:56", "throughput": 19888.84, "total_tokens": 187412224}
|
|
{"current_steps": 59555, "total_steps": 78105, "loss": 0.1453, "lr": 8.111228773326968e-07, "epoch": 3.812495998975738, "percentage": 76.25, "elapsed_time": "2:37:03", "remaining_time": "0:48:55", "throughput": 19889.1, "total_tokens": 187428608}
|
|
{"current_steps": 59560, "total_steps": 78105, "loss": 0.1248, "lr": 8.107110176404697e-07, "epoch": 3.812816080916715, "percentage": 76.26, "elapsed_time": "2:37:04", "remaining_time": "0:48:54", "throughput": 19889.38, "total_tokens": 187445184}
|
|
{"current_steps": 59565, "total_steps": 78105, "loss": 0.1271, "lr": 8.102992423025352e-07, "epoch": 3.8131361628576914, "percentage": 76.26, "elapsed_time": "2:37:05", "remaining_time": "0:48:53", "throughput": 19889.59, "total_tokens": 187460416}
|
|
{"current_steps": 59570, "total_steps": 78105, "loss": 0.1845, "lr": 8.098875513394544e-07, "epoch": 3.8134562447986684, "percentage": 76.27, "elapsed_time": "2:37:05", "remaining_time": "0:48:52", "throughput": 19889.79, "total_tokens": 187475328}
|
|
{"current_steps": 59575, "total_steps": 78105, "loss": 0.1381, "lr": 8.094759447717865e-07, "epoch": 3.8137763267396454, "percentage": 76.28, "elapsed_time": "2:37:06", "remaining_time": "0:48:51", "throughput": 19890.04, "total_tokens": 187491392}
|
|
{"current_steps": 59580, "total_steps": 78105, "loss": 0.2322, "lr": 8.090644226200845e-07, "epoch": 3.8140964086806224, "percentage": 76.28, "elapsed_time": "2:37:07", "remaining_time": "0:48:51", "throughput": 19890.26, "total_tokens": 187506880}
|
|
{"current_steps": 59585, "total_steps": 78105, "loss": 0.1146, "lr": 8.086529849048974e-07, "epoch": 3.814416490621599, "percentage": 76.29, "elapsed_time": "2:37:07", "remaining_time": "0:48:50", "throughput": 19890.49, "total_tokens": 187522368}
|
|
{"current_steps": 59590, "total_steps": 78105, "loss": 0.141, "lr": 8.082416316467703e-07, "epoch": 3.814736572562576, "percentage": 76.29, "elapsed_time": "2:37:08", "remaining_time": "0:48:49", "throughput": 19890.75, "total_tokens": 187538560}
|
|
{"current_steps": 59595, "total_steps": 78105, "loss": 0.1886, "lr": 8.07830362866244e-07, "epoch": 3.815056654503553, "percentage": 76.3, "elapsed_time": "2:37:09", "remaining_time": "0:48:48", "throughput": 19891.09, "total_tokens": 187556608}
|
|
{"current_steps": 59600, "total_steps": 78105, "loss": 0.1766, "lr": 8.074191785838554e-07, "epoch": 3.81537673644453, "percentage": 76.31, "elapsed_time": "2:37:09", "remaining_time": "0:48:47", "throughput": 19891.38, "total_tokens": 187573568}
|
|
{"current_steps": 59605, "total_steps": 78105, "loss": 0.1894, "lr": 8.070080788201365e-07, "epoch": 3.815696818385507, "percentage": 76.31, "elapsed_time": "2:37:10", "remaining_time": "0:48:47", "throughput": 19891.69, "total_tokens": 187590720}
|
|
{"current_steps": 59610, "total_steps": 78105, "loss": 0.2042, "lr": 8.065970635956147e-07, "epoch": 3.8160169003264834, "percentage": 76.32, "elapsed_time": "2:37:11", "remaining_time": "0:48:46", "throughput": 19891.91, "total_tokens": 187606144}
|
|
{"current_steps": 59615, "total_steps": 78105, "loss": 0.2101, "lr": 8.061861329308168e-07, "epoch": 3.8163369822674604, "percentage": 76.33, "elapsed_time": "2:37:11", "remaining_time": "0:48:45", "throughput": 19892.19, "total_tokens": 187622592}
|
|
{"current_steps": 59620, "total_steps": 78105, "loss": 0.1257, "lr": 8.05775286846259e-07, "epoch": 3.8166570642084374, "percentage": 76.33, "elapsed_time": "2:37:12", "remaining_time": "0:48:44", "throughput": 19892.42, "total_tokens": 187638080}
|
|
{"current_steps": 59625, "total_steps": 78105, "loss": 0.117, "lr": 8.053645253624593e-07, "epoch": 3.8169771461494144, "percentage": 76.34, "elapsed_time": "2:37:13", "remaining_time": "0:48:43", "throughput": 19892.65, "total_tokens": 187654144}
|
|
{"current_steps": 59630, "total_steps": 78105, "loss": 0.176, "lr": 8.049538484999281e-07, "epoch": 3.817297228090391, "percentage": 76.35, "elapsed_time": "2:37:14", "remaining_time": "0:48:42", "throughput": 19892.94, "total_tokens": 187670720}
|
|
{"current_steps": 59635, "total_steps": 78105, "loss": 0.1196, "lr": 8.045432562791727e-07, "epoch": 3.817617310031368, "percentage": 76.35, "elapsed_time": "2:37:14", "remaining_time": "0:48:42", "throughput": 19893.17, "total_tokens": 187686016}
|
|
{"current_steps": 59640, "total_steps": 78105, "loss": 0.1555, "lr": 8.041327487206957e-07, "epoch": 3.817937391972345, "percentage": 76.36, "elapsed_time": "2:37:15", "remaining_time": "0:48:41", "throughput": 19893.29, "total_tokens": 187701312}
|
|
{"current_steps": 59645, "total_steps": 78105, "loss": 0.154, "lr": 8.037223258449947e-07, "epoch": 3.818257473913322, "percentage": 76.37, "elapsed_time": "2:37:16", "remaining_time": "0:48:40", "throughput": 19893.55, "total_tokens": 187717440}
|
|
{"current_steps": 59650, "total_steps": 78105, "loss": 0.1606, "lr": 8.03311987672567e-07, "epoch": 3.818577555854299, "percentage": 76.37, "elapsed_time": "2:37:16", "remaining_time": "0:48:39", "throughput": 19893.73, "total_tokens": 187732992}
|
|
{"current_steps": 59655, "total_steps": 78105, "loss": 0.1504, "lr": 8.029017342238995e-07, "epoch": 3.8188976377952755, "percentage": 76.38, "elapsed_time": "2:37:17", "remaining_time": "0:48:38", "throughput": 19893.98, "total_tokens": 187748736}
|
|
{"current_steps": 59660, "total_steps": 78105, "loss": 0.1484, "lr": 8.024915655194801e-07, "epoch": 3.8192177197362525, "percentage": 76.38, "elapsed_time": "2:37:18", "remaining_time": "0:48:37", "throughput": 19894.21, "total_tokens": 187764160}
|
|
{"current_steps": 59665, "total_steps": 78105, "loss": 0.1643, "lr": 8.0208148157979e-07, "epoch": 3.8195378016772294, "percentage": 76.39, "elapsed_time": "2:37:18", "remaining_time": "0:48:37", "throughput": 19894.48, "total_tokens": 187780800}
|
|
{"current_steps": 59670, "total_steps": 78105, "loss": 0.1844, "lr": 8.016714824253063e-07, "epoch": 3.819857883618206, "percentage": 76.4, "elapsed_time": "2:37:19", "remaining_time": "0:48:36", "throughput": 19894.7, "total_tokens": 187796160}
|
|
{"current_steps": 59675, "total_steps": 78105, "loss": 0.1807, "lr": 8.012615680765029e-07, "epoch": 3.820177965559183, "percentage": 76.4, "elapsed_time": "2:37:20", "remaining_time": "0:48:35", "throughput": 19894.92, "total_tokens": 187811840}
|
|
{"current_steps": 59680, "total_steps": 78105, "loss": 0.1676, "lr": 8.00851738553848e-07, "epoch": 3.82049804750016, "percentage": 76.41, "elapsed_time": "2:37:20", "remaining_time": "0:48:34", "throughput": 19895.14, "total_tokens": 187827584}
|
|
{"current_steps": 59685, "total_steps": 78105, "loss": 0.1598, "lr": 8.004419938778071e-07, "epoch": 3.820818129441137, "percentage": 76.42, "elapsed_time": "2:37:21", "remaining_time": "0:48:33", "throughput": 19895.35, "total_tokens": 187842816}
|
|
{"current_steps": 59690, "total_steps": 78105, "loss": 0.1937, "lr": 8.000323340688396e-07, "epoch": 3.821138211382114, "percentage": 76.42, "elapsed_time": "2:37:22", "remaining_time": "0:48:33", "throughput": 19895.55, "total_tokens": 187857536}
|
|
{"current_steps": 59695, "total_steps": 78105, "loss": 0.156, "lr": 7.996227591474029e-07, "epoch": 3.821458293323091, "percentage": 76.43, "elapsed_time": "2:37:22", "remaining_time": "0:48:32", "throughput": 19895.77, "total_tokens": 187872832}
|
|
{"current_steps": 59700, "total_steps": 78105, "loss": 0.1605, "lr": 7.992132691339491e-07, "epoch": 3.8217783752640675, "percentage": 76.44, "elapsed_time": "2:37:23", "remaining_time": "0:48:31", "throughput": 19895.96, "total_tokens": 187887168}
|
|
{"current_steps": 59705, "total_steps": 78105, "loss": 0.1874, "lr": 7.988038640489252e-07, "epoch": 3.8220984572050445, "percentage": 76.44, "elapsed_time": "2:37:24", "remaining_time": "0:48:30", "throughput": 19896.17, "total_tokens": 187902464}
|
|
{"current_steps": 59710, "total_steps": 78105, "loss": 0.172, "lr": 7.983945439127753e-07, "epoch": 3.8224185391460215, "percentage": 76.45, "elapsed_time": "2:37:24", "remaining_time": "0:48:29", "throughput": 19896.38, "total_tokens": 187917632}
|
|
{"current_steps": 59715, "total_steps": 78105, "loss": 0.2199, "lr": 7.979853087459386e-07, "epoch": 3.822738621086998, "percentage": 76.45, "elapsed_time": "2:37:25", "remaining_time": "0:48:28", "throughput": 19896.62, "total_tokens": 187933440}
|
|
{"current_steps": 59720, "total_steps": 78105, "loss": 0.1556, "lr": 7.97576158568849e-07, "epoch": 3.823058703027975, "percentage": 76.46, "elapsed_time": "2:37:26", "remaining_time": "0:48:28", "throughput": 19896.84, "total_tokens": 187949376}
|
|
{"current_steps": 59725, "total_steps": 78105, "loss": 0.2253, "lr": 7.971670934019405e-07, "epoch": 3.823378784968952, "percentage": 76.47, "elapsed_time": "2:37:26", "remaining_time": "0:48:27", "throughput": 19897.05, "total_tokens": 187964480}
|
|
{"current_steps": 59730, "total_steps": 78105, "loss": 0.2192, "lr": 7.967581132656357e-07, "epoch": 3.823698866909929, "percentage": 76.47, "elapsed_time": "2:37:27", "remaining_time": "0:48:26", "throughput": 19897.3, "total_tokens": 187980608}
|
|
{"current_steps": 59735, "total_steps": 78105, "loss": 0.193, "lr": 7.963492181803606e-07, "epoch": 3.824018948850906, "percentage": 76.48, "elapsed_time": "2:37:28", "remaining_time": "0:48:25", "throughput": 19897.55, "total_tokens": 187996416}
|
|
{"current_steps": 59740, "total_steps": 78105, "loss": 0.2356, "lr": 7.959404081665298e-07, "epoch": 3.824339030791883, "percentage": 76.49, "elapsed_time": "2:37:28", "remaining_time": "0:48:24", "throughput": 19897.79, "total_tokens": 188012032}
|
|
{"current_steps": 59745, "total_steps": 78105, "loss": 0.1795, "lr": 7.955316832445598e-07, "epoch": 3.8246591127328595, "percentage": 76.49, "elapsed_time": "2:37:29", "remaining_time": "0:48:23", "throughput": 19898.03, "total_tokens": 188028160}
|
|
{"current_steps": 59750, "total_steps": 78105, "loss": 0.1108, "lr": 7.951230434348595e-07, "epoch": 3.8249791946738365, "percentage": 76.5, "elapsed_time": "2:37:30", "remaining_time": "0:48:23", "throughput": 19898.25, "total_tokens": 188043264}
|
|
{"current_steps": 59755, "total_steps": 78105, "loss": 0.2085, "lr": 7.947144887578329e-07, "epoch": 3.8252992766148135, "percentage": 76.51, "elapsed_time": "2:37:30", "remaining_time": "0:48:22", "throughput": 19898.43, "total_tokens": 188058112}
|
|
{"current_steps": 59760, "total_steps": 78105, "loss": 0.1225, "lr": 7.943060192338839e-07, "epoch": 3.82561935855579, "percentage": 76.51, "elapsed_time": "2:37:31", "remaining_time": "0:48:21", "throughput": 19898.65, "total_tokens": 188073344}
|
|
{"current_steps": 59765, "total_steps": 78105, "loss": 0.1736, "lr": 7.938976348834059e-07, "epoch": 3.825939440496767, "percentage": 76.52, "elapsed_time": "2:37:32", "remaining_time": "0:48:20", "throughput": 19898.9, "total_tokens": 188088960}
|
|
{"current_steps": 59770, "total_steps": 78105, "loss": 0.1727, "lr": 7.934893357267953e-07, "epoch": 3.826259522437744, "percentage": 76.53, "elapsed_time": "2:37:32", "remaining_time": "0:48:19", "throughput": 19899.15, "total_tokens": 188105152}
|
|
{"current_steps": 59775, "total_steps": 78105, "loss": 0.2031, "lr": 7.930811217844365e-07, "epoch": 3.826579604378721, "percentage": 76.53, "elapsed_time": "2:37:33", "remaining_time": "0:48:18", "throughput": 19899.4, "total_tokens": 188121152}
|
|
{"current_steps": 59780, "total_steps": 78105, "loss": 0.1778, "lr": 7.926729930767163e-07, "epoch": 3.826899686319698, "percentage": 76.54, "elapsed_time": "2:37:34", "remaining_time": "0:48:18", "throughput": 19899.61, "total_tokens": 188136128}
|
|
{"current_steps": 59785, "total_steps": 78105, "loss": 0.1873, "lr": 7.922649496240137e-07, "epoch": 3.827219768260675, "percentage": 76.54, "elapsed_time": "2:37:34", "remaining_time": "0:48:17", "throughput": 19899.87, "total_tokens": 188152320}
|
|
{"current_steps": 59790, "total_steps": 78105, "loss": 0.1792, "lr": 7.918569914467041e-07, "epoch": 3.8275398502016516, "percentage": 76.55, "elapsed_time": "2:37:35", "remaining_time": "0:48:16", "throughput": 19900.19, "total_tokens": 188169728}
|
|
{"current_steps": 59795, "total_steps": 78105, "loss": 0.1481, "lr": 7.914491185651594e-07, "epoch": 3.8278599321426285, "percentage": 76.56, "elapsed_time": "2:37:36", "remaining_time": "0:48:15", "throughput": 19900.44, "total_tokens": 188185856}
|
|
{"current_steps": 59800, "total_steps": 78105, "loss": 0.2949, "lr": 7.910413309997456e-07, "epoch": 3.8281800140836055, "percentage": 76.56, "elapsed_time": "2:37:37", "remaining_time": "0:48:14", "throughput": 19900.7, "total_tokens": 188202176}
|
|
{"current_steps": 59805, "total_steps": 78105, "loss": 0.1485, "lr": 7.906336287708258e-07, "epoch": 3.828500096024582, "percentage": 76.57, "elapsed_time": "2:37:37", "remaining_time": "0:48:14", "throughput": 19900.9, "total_tokens": 188216832}
|
|
{"current_steps": 59810, "total_steps": 78105, "loss": 0.1764, "lr": 7.902260118987592e-07, "epoch": 3.828820177965559, "percentage": 76.58, "elapsed_time": "2:37:38", "remaining_time": "0:48:13", "throughput": 19901.13, "total_tokens": 188232064}
|
|
{"current_steps": 59815, "total_steps": 78105, "loss": 0.1855, "lr": 7.898184804039e-07, "epoch": 3.829140259906536, "percentage": 76.58, "elapsed_time": "2:37:38", "remaining_time": "0:48:12", "throughput": 19901.3, "total_tokens": 188246272}
|
|
{"current_steps": 59820, "total_steps": 78105, "loss": 0.1861, "lr": 7.894110343065978e-07, "epoch": 3.829460341847513, "percentage": 76.59, "elapsed_time": "2:37:39", "remaining_time": "0:48:11", "throughput": 19901.56, "total_tokens": 188262400}
|
|
{"current_steps": 59825, "total_steps": 78105, "loss": 0.1767, "lr": 7.890036736271983e-07, "epoch": 3.82978042378849, "percentage": 76.6, "elapsed_time": "2:37:40", "remaining_time": "0:48:10", "throughput": 19901.81, "total_tokens": 188278592}
|
|
{"current_steps": 59830, "total_steps": 78105, "loss": 0.1602, "lr": 7.885963983860431e-07, "epoch": 3.8301005057294666, "percentage": 76.6, "elapsed_time": "2:37:41", "remaining_time": "0:48:09", "throughput": 19902.04, "total_tokens": 188294272}
|
|
{"current_steps": 59835, "total_steps": 78105, "loss": 0.1602, "lr": 7.881892086034695e-07, "epoch": 3.8304205876704436, "percentage": 76.61, "elapsed_time": "2:37:41", "remaining_time": "0:48:09", "throughput": 19902.23, "total_tokens": 188308992}
|
|
{"current_steps": 59840, "total_steps": 78105, "loss": 0.1537, "lr": 7.877821042998093e-07, "epoch": 3.8307406696114206, "percentage": 76.61, "elapsed_time": "2:37:42", "remaining_time": "0:48:08", "throughput": 19902.45, "total_tokens": 188324224}
|
|
{"current_steps": 59845, "total_steps": 78105, "loss": 0.1458, "lr": 7.87375085495394e-07, "epoch": 3.8310607515523976, "percentage": 76.62, "elapsed_time": "2:37:43", "remaining_time": "0:48:07", "throughput": 19902.67, "total_tokens": 188339136}
|
|
{"current_steps": 59850, "total_steps": 78105, "loss": 0.1341, "lr": 7.869681522105446e-07, "epoch": 3.831380833493374, "percentage": 76.63, "elapsed_time": "2:37:43", "remaining_time": "0:48:06", "throughput": 19902.88, "total_tokens": 188354432}
|
|
{"current_steps": 59855, "total_steps": 78105, "loss": 0.2227, "lr": 7.865613044655835e-07, "epoch": 3.831700915434351, "percentage": 76.63, "elapsed_time": "2:37:44", "remaining_time": "0:48:05", "throughput": 19903.06, "total_tokens": 188368640}
|
|
{"current_steps": 59860, "total_steps": 78105, "loss": 0.0862, "lr": 7.861545422808259e-07, "epoch": 3.832020997375328, "percentage": 76.64, "elapsed_time": "2:37:45", "remaining_time": "0:48:04", "throughput": 19903.34, "total_tokens": 188385344}
|
|
{"current_steps": 59865, "total_steps": 78105, "loss": 0.1752, "lr": 7.857478656765826e-07, "epoch": 3.832341079316305, "percentage": 76.65, "elapsed_time": "2:37:45", "remaining_time": "0:48:04", "throughput": 19903.63, "total_tokens": 188401984}
|
|
{"current_steps": 59870, "total_steps": 78105, "loss": 0.2178, "lr": 7.853412746731634e-07, "epoch": 3.832661161257282, "percentage": 76.65, "elapsed_time": "2:37:46", "remaining_time": "0:48:03", "throughput": 19903.86, "total_tokens": 188417472}
|
|
{"current_steps": 59875, "total_steps": 78105, "loss": 0.1809, "lr": 7.84934769290868e-07, "epoch": 3.8329812431982586, "percentage": 76.66, "elapsed_time": "2:37:47", "remaining_time": "0:48:02", "throughput": 19904.06, "total_tokens": 188432192}
|
|
{"current_steps": 59880, "total_steps": 78105, "loss": 0.1476, "lr": 7.845283495499981e-07, "epoch": 3.8333013251392356, "percentage": 76.67, "elapsed_time": "2:37:47", "remaining_time": "0:48:01", "throughput": 19904.33, "total_tokens": 188448576}
|
|
{"current_steps": 59885, "total_steps": 78105, "loss": 0.1717, "lr": 7.841220154708454e-07, "epoch": 3.8336214070802126, "percentage": 76.67, "elapsed_time": "2:37:48", "remaining_time": "0:48:00", "throughput": 19904.53, "total_tokens": 188463744}
|
|
{"current_steps": 59890, "total_steps": 78105, "loss": 0.2148, "lr": 7.837157670737025e-07, "epoch": 3.8339414890211896, "percentage": 76.68, "elapsed_time": "2:37:49", "remaining_time": "0:47:59", "throughput": 19904.77, "total_tokens": 188479488}
|
|
{"current_steps": 59895, "total_steps": 78105, "loss": 0.1013, "lr": 7.833096043788543e-07, "epoch": 3.834261570962166, "percentage": 76.69, "elapsed_time": "2:37:49", "remaining_time": "0:47:59", "throughput": 19904.98, "total_tokens": 188494464}
|
|
{"current_steps": 59900, "total_steps": 78105, "loss": 0.1325, "lr": 7.829035274065824e-07, "epoch": 3.834581652903143, "percentage": 76.69, "elapsed_time": "2:37:50", "remaining_time": "0:47:58", "throughput": 19905.18, "total_tokens": 188509376}
|
|
{"current_steps": 59905, "total_steps": 78105, "loss": 0.122, "lr": 7.824975361771644e-07, "epoch": 3.83490173484412, "percentage": 76.7, "elapsed_time": "2:37:51", "remaining_time": "0:47:57", "throughput": 19905.37, "total_tokens": 188524032}
|
|
{"current_steps": 59910, "total_steps": 78105, "loss": 0.0974, "lr": 7.820916307108734e-07, "epoch": 3.835221816785097, "percentage": 76.7, "elapsed_time": "2:37:51", "remaining_time": "0:47:56", "throughput": 19905.6, "total_tokens": 188539392}
|
|
{"current_steps": 59915, "total_steps": 78105, "loss": 0.1441, "lr": 7.816858110279782e-07, "epoch": 3.835541898726074, "percentage": 76.71, "elapsed_time": "2:37:52", "remaining_time": "0:47:55", "throughput": 19905.81, "total_tokens": 188554240}
|
|
{"current_steps": 59920, "total_steps": 78105, "loss": 0.1766, "lr": 7.812800771487422e-07, "epoch": 3.8358619806670506, "percentage": 76.72, "elapsed_time": "2:37:53", "remaining_time": "0:47:54", "throughput": 19906.06, "total_tokens": 188570240}
|
|
{"current_steps": 59925, "total_steps": 78105, "loss": 0.1876, "lr": 7.808744290934275e-07, "epoch": 3.8361820626080276, "percentage": 76.72, "elapsed_time": "2:37:53", "remaining_time": "0:47:54", "throughput": 19906.26, "total_tokens": 188585024}
|
|
{"current_steps": 59930, "total_steps": 78105, "loss": 0.1244, "lr": 7.804688668822893e-07, "epoch": 3.8365021445490046, "percentage": 76.73, "elapsed_time": "2:37:54", "remaining_time": "0:47:53", "throughput": 19906.51, "total_tokens": 188600896}
|
|
{"current_steps": 59935, "total_steps": 78105, "loss": 0.1395, "lr": 7.800633905355792e-07, "epoch": 3.836822226489981, "percentage": 76.74, "elapsed_time": "2:37:54", "remaining_time": "0:47:52", "throughput": 19906.75, "total_tokens": 188616256}
|
|
{"current_steps": 59940, "total_steps": 78105, "loss": 0.1308, "lr": 7.796580000735443e-07, "epoch": 3.837142308430958, "percentage": 76.74, "elapsed_time": "2:37:55", "remaining_time": "0:47:51", "throughput": 19907.02, "total_tokens": 188633088}
|
|
{"current_steps": 59945, "total_steps": 78105, "loss": 0.1823, "lr": 7.792526955164281e-07, "epoch": 3.837462390371935, "percentage": 76.75, "elapsed_time": "2:37:56", "remaining_time": "0:47:50", "throughput": 19907.33, "total_tokens": 188650880}
|
|
{"current_steps": 59950, "total_steps": 78105, "loss": 0.1192, "lr": 7.788474768844686e-07, "epoch": 3.837782472312912, "percentage": 76.76, "elapsed_time": "2:37:57", "remaining_time": "0:47:50", "throughput": 19907.63, "total_tokens": 188668096}
|
|
{"current_steps": 59955, "total_steps": 78105, "loss": 0.1912, "lr": 7.784423441979025e-07, "epoch": 3.838102554253889, "percentage": 76.76, "elapsed_time": "2:37:57", "remaining_time": "0:47:49", "throughput": 19907.92, "total_tokens": 188685248}
|
|
{"current_steps": 59960, "total_steps": 78105, "loss": 0.1559, "lr": 7.780372974769565e-07, "epoch": 3.838422636194866, "percentage": 76.77, "elapsed_time": "2:37:58", "remaining_time": "0:47:48", "throughput": 19908.17, "total_tokens": 188700992}
|
|
{"current_steps": 59965, "total_steps": 78105, "loss": 0.1487, "lr": 7.776323367418606e-07, "epoch": 3.8387427181358427, "percentage": 76.77, "elapsed_time": "2:37:59", "remaining_time": "0:47:47", "throughput": 19908.39, "total_tokens": 188716288}
|
|
{"current_steps": 59970, "total_steps": 78105, "loss": 0.1905, "lr": 7.772274620128323e-07, "epoch": 3.8390628000768197, "percentage": 76.78, "elapsed_time": "2:37:59", "remaining_time": "0:47:46", "throughput": 19908.61, "total_tokens": 188731648}
|
|
{"current_steps": 59975, "total_steps": 78105, "loss": 0.2068, "lr": 7.76822673310092e-07, "epoch": 3.8393828820177966, "percentage": 76.79, "elapsed_time": "2:38:00", "remaining_time": "0:47:45", "throughput": 19908.84, "total_tokens": 188747648}
|
|
{"current_steps": 59980, "total_steps": 78105, "loss": 0.166, "lr": 7.764179706538516e-07, "epoch": 3.839702963958773, "percentage": 76.79, "elapsed_time": "2:38:01", "remaining_time": "0:47:45", "throughput": 19909.12, "total_tokens": 188764224}
|
|
{"current_steps": 59985, "total_steps": 78105, "loss": 0.1819, "lr": 7.760133540643191e-07, "epoch": 3.84002304589975, "percentage": 76.8, "elapsed_time": "2:38:02", "remaining_time": "0:47:44", "throughput": 19909.42, "total_tokens": 188782016}
|
|
{"current_steps": 59990, "total_steps": 78105, "loss": 0.2164, "lr": 7.756088235617015e-07, "epoch": 3.840343127840727, "percentage": 76.81, "elapsed_time": "2:38:02", "remaining_time": "0:47:43", "throughput": 19909.61, "total_tokens": 188797056}
|
|
{"current_steps": 59995, "total_steps": 78105, "loss": 0.0811, "lr": 7.752043791661956e-07, "epoch": 3.840663209781704, "percentage": 76.81, "elapsed_time": "2:38:03", "remaining_time": "0:47:42", "throughput": 19909.91, "total_tokens": 188813888}
|
|
{"current_steps": 60000, "total_steps": 78105, "loss": 0.188, "lr": 7.748000208980002e-07, "epoch": 3.840983291722681, "percentage": 76.82, "elapsed_time": "2:38:04", "remaining_time": "0:47:41", "throughput": 19910.12, "total_tokens": 188829440}
|
|
{"current_steps": 60005, "total_steps": 78105, "loss": 0.2041, "lr": 7.743957487773043e-07, "epoch": 3.841303373663658, "percentage": 76.83, "elapsed_time": "2:38:04", "remaining_time": "0:47:40", "throughput": 19910.34, "total_tokens": 188844352}
|
|
{"current_steps": 60010, "total_steps": 78105, "loss": 0.1179, "lr": 7.739915628242969e-07, "epoch": 3.8416234556046347, "percentage": 76.83, "elapsed_time": "2:38:05", "remaining_time": "0:47:40", "throughput": 19910.61, "total_tokens": 188860800}
|
|
{"current_steps": 60015, "total_steps": 78105, "loss": 0.209, "lr": 7.735874630591605e-07, "epoch": 3.8419435375456117, "percentage": 76.84, "elapsed_time": "2:38:06", "remaining_time": "0:47:39", "throughput": 19910.83, "total_tokens": 188875968}
|
|
{"current_steps": 60020, "total_steps": 78105, "loss": 0.1646, "lr": 7.731834495020738e-07, "epoch": 3.8422636194865887, "percentage": 76.85, "elapsed_time": "2:38:06", "remaining_time": "0:47:38", "throughput": 19911.08, "total_tokens": 188891776}
|
|
{"current_steps": 60025, "total_steps": 78105, "loss": 0.2128, "lr": 7.727795221732109e-07, "epoch": 3.8425837014275652, "percentage": 76.85, "elapsed_time": "2:38:07", "remaining_time": "0:47:37", "throughput": 19911.28, "total_tokens": 188906624}
|
|
{"current_steps": 60030, "total_steps": 78105, "loss": 0.0938, "lr": 7.723756810927418e-07, "epoch": 3.842903783368542, "percentage": 76.86, "elapsed_time": "2:38:08", "remaining_time": "0:47:36", "throughput": 19911.53, "total_tokens": 188922624}
|
|
{"current_steps": 60035, "total_steps": 78105, "loss": 0.1343, "lr": 7.719719262808314e-07, "epoch": 3.843223865309519, "percentage": 76.86, "elapsed_time": "2:38:08", "remaining_time": "0:47:36", "throughput": 19911.77, "total_tokens": 188938624}
|
|
{"current_steps": 60040, "total_steps": 78105, "loss": 0.14, "lr": 7.71568257757643e-07, "epoch": 3.843543947250496, "percentage": 76.87, "elapsed_time": "2:38:09", "remaining_time": "0:47:35", "throughput": 19911.99, "total_tokens": 188953984}
|
|
{"current_steps": 60045, "total_steps": 78105, "loss": 0.1468, "lr": 7.711646755433328e-07, "epoch": 3.843864029191473, "percentage": 76.88, "elapsed_time": "2:38:10", "remaining_time": "0:47:34", "throughput": 19912.32, "total_tokens": 188971904}
|
|
{"current_steps": 60050, "total_steps": 78105, "loss": 0.1918, "lr": 7.707611796580533e-07, "epoch": 3.84418411113245, "percentage": 76.88, "elapsed_time": "2:38:10", "remaining_time": "0:47:33", "throughput": 19912.53, "total_tokens": 188987008}
|
|
{"current_steps": 60055, "total_steps": 78105, "loss": 0.1181, "lr": 7.703577701219537e-07, "epoch": 3.8445041930734267, "percentage": 76.89, "elapsed_time": "2:38:11", "remaining_time": "0:47:32", "throughput": 19912.73, "total_tokens": 189001920}
|
|
{"current_steps": 60060, "total_steps": 78105, "loss": 0.1857, "lr": 7.69954446955177e-07, "epoch": 3.8448242750144037, "percentage": 76.9, "elapsed_time": "2:38:12", "remaining_time": "0:47:31", "throughput": 19912.94, "total_tokens": 189016960}
|
|
{"current_steps": 60065, "total_steps": 78105, "loss": 0.1907, "lr": 7.695512101778644e-07, "epoch": 3.8451443569553807, "percentage": 76.9, "elapsed_time": "2:38:12", "remaining_time": "0:47:31", "throughput": 19913.15, "total_tokens": 189031872}
|
|
{"current_steps": 60070, "total_steps": 78105, "loss": 0.195, "lr": 7.691480598101495e-07, "epoch": 3.8454644388963573, "percentage": 76.91, "elapsed_time": "2:38:13", "remaining_time": "0:47:30", "throughput": 19913.35, "total_tokens": 189047104}
|
|
{"current_steps": 60075, "total_steps": 78105, "loss": 0.2234, "lr": 7.687449958721665e-07, "epoch": 3.8457845208373342, "percentage": 76.92, "elapsed_time": "2:38:14", "remaining_time": "0:47:29", "throughput": 19913.53, "total_tokens": 189061376}
|
|
{"current_steps": 60080, "total_steps": 78105, "loss": 0.1447, "lr": 7.683420183840393e-07, "epoch": 3.8461046027783112, "percentage": 76.92, "elapsed_time": "2:38:14", "remaining_time": "0:47:28", "throughput": 19913.76, "total_tokens": 189077056}
|
|
{"current_steps": 60085, "total_steps": 78105, "loss": 0.122, "lr": 7.679391273658923e-07, "epoch": 3.8464246847192882, "percentage": 76.93, "elapsed_time": "2:38:15", "remaining_time": "0:47:27", "throughput": 19913.97, "total_tokens": 189092224}
|
|
{"current_steps": 60090, "total_steps": 78105, "loss": 0.1337, "lr": 7.675363228378435e-07, "epoch": 3.846744766660265, "percentage": 76.93, "elapsed_time": "2:38:16", "remaining_time": "0:47:26", "throughput": 19914.23, "total_tokens": 189108160}
|
|
{"current_steps": 60095, "total_steps": 78105, "loss": 0.1809, "lr": 7.671336048200057e-07, "epoch": 3.8470648486012418, "percentage": 76.94, "elapsed_time": "2:38:16", "remaining_time": "0:47:26", "throughput": 19914.47, "total_tokens": 189124224}
|
|
{"current_steps": 60100, "total_steps": 78105, "loss": 0.1052, "lr": 7.667309733324913e-07, "epoch": 3.8473849305422188, "percentage": 76.95, "elapsed_time": "2:38:17", "remaining_time": "0:47:25", "throughput": 19914.67, "total_tokens": 189138880}
|
|
{"current_steps": 60105, "total_steps": 78105, "loss": 0.1603, "lr": 7.66328428395402e-07, "epoch": 3.8477050124831957, "percentage": 76.95, "elapsed_time": "2:38:18", "remaining_time": "0:47:24", "throughput": 19914.92, "total_tokens": 189155520}
|
|
{"current_steps": 60110, "total_steps": 78105, "loss": 0.248, "lr": 7.659259700288423e-07, "epoch": 3.8480250944241727, "percentage": 76.96, "elapsed_time": "2:38:18", "remaining_time": "0:47:23", "throughput": 19915.13, "total_tokens": 189170688}
|
|
{"current_steps": 60115, "total_steps": 78105, "loss": 0.1931, "lr": 7.655235982529058e-07, "epoch": 3.8483451763651493, "percentage": 76.97, "elapsed_time": "2:38:19", "remaining_time": "0:47:22", "throughput": 19915.34, "total_tokens": 189185792}
|
|
{"current_steps": 60120, "total_steps": 78105, "loss": 0.1178, "lr": 7.651213130876867e-07, "epoch": 3.8486652583061263, "percentage": 76.97, "elapsed_time": "2:38:20", "remaining_time": "0:47:22", "throughput": 19915.61, "total_tokens": 189202240}
|
|
{"current_steps": 60125, "total_steps": 78105, "loss": 0.2771, "lr": 7.647191145532729e-07, "epoch": 3.8489853402471033, "percentage": 76.98, "elapsed_time": "2:38:20", "remaining_time": "0:47:21", "throughput": 19915.85, "total_tokens": 189217984}
|
|
{"current_steps": 60130, "total_steps": 78105, "loss": 0.1143, "lr": 7.643170026697475e-07, "epoch": 3.8493054221880803, "percentage": 76.99, "elapsed_time": "2:38:21", "remaining_time": "0:47:20", "throughput": 19916.09, "total_tokens": 189233728}
|
|
{"current_steps": 60135, "total_steps": 78105, "loss": 0.1361, "lr": 7.639149774571902e-07, "epoch": 3.8496255041290572, "percentage": 76.99, "elapsed_time": "2:38:22", "remaining_time": "0:47:19", "throughput": 19916.32, "total_tokens": 189249280}
|
|
{"current_steps": 60140, "total_steps": 78105, "loss": 0.1428, "lr": 7.635130389356763e-07, "epoch": 3.849945586070034, "percentage": 77.0, "elapsed_time": "2:38:22", "remaining_time": "0:47:18", "throughput": 19916.63, "total_tokens": 189266688}
|
|
{"current_steps": 60145, "total_steps": 78105, "loss": 0.1778, "lr": 7.631111871252758e-07, "epoch": 3.8502656680110108, "percentage": 77.01, "elapsed_time": "2:38:23", "remaining_time": "0:47:17", "throughput": 19916.85, "total_tokens": 189281792}
|
|
{"current_steps": 60150, "total_steps": 78105, "loss": 0.1246, "lr": 7.627094220460557e-07, "epoch": 3.8505857499519878, "percentage": 77.01, "elapsed_time": "2:38:24", "remaining_time": "0:47:17", "throughput": 19917.06, "total_tokens": 189296768}
|
|
{"current_steps": 60155, "total_steps": 78105, "loss": 0.1854, "lr": 7.623077437180771e-07, "epoch": 3.8509058318929648, "percentage": 77.02, "elapsed_time": "2:38:24", "remaining_time": "0:47:16", "throughput": 19917.29, "total_tokens": 189312384}
|
|
{"current_steps": 60160, "total_steps": 78105, "loss": 0.138, "lr": 7.619061521613991e-07, "epoch": 3.8512259138339413, "percentage": 77.02, "elapsed_time": "2:38:25", "remaining_time": "0:47:15", "throughput": 19917.54, "total_tokens": 189328640}
|
|
{"current_steps": 60165, "total_steps": 78105, "loss": 0.1848, "lr": 7.615046473960744e-07, "epoch": 3.8515459957749183, "percentage": 77.03, "elapsed_time": "2:38:26", "remaining_time": "0:47:14", "throughput": 19917.72, "total_tokens": 189343360}
|
|
{"current_steps": 60170, "total_steps": 78105, "loss": 0.1605, "lr": 7.611032294421522e-07, "epoch": 3.8518660777158953, "percentage": 77.04, "elapsed_time": "2:38:26", "remaining_time": "0:47:13", "throughput": 19917.92, "total_tokens": 189358336}
|
|
{"current_steps": 60175, "total_steps": 78105, "loss": 0.1258, "lr": 7.607018983196773e-07, "epoch": 3.8521861596568723, "percentage": 77.04, "elapsed_time": "2:38:27", "remaining_time": "0:47:12", "throughput": 19918.21, "total_tokens": 189375168}
|
|
{"current_steps": 60180, "total_steps": 78105, "loss": 0.214, "lr": 7.60300654048689e-07, "epoch": 3.8525062415978493, "percentage": 77.05, "elapsed_time": "2:38:28", "remaining_time": "0:47:12", "throughput": 19918.45, "total_tokens": 189391232}
|
|
{"current_steps": 60185, "total_steps": 78105, "loss": 0.1282, "lr": 7.59899496649226e-07, "epoch": 3.852826323538826, "percentage": 77.06, "elapsed_time": "2:38:29", "remaining_time": "0:47:11", "throughput": 19918.7, "total_tokens": 189407424}
|
|
{"current_steps": 60190, "total_steps": 78105, "loss": 0.163, "lr": 7.594984261413165e-07, "epoch": 3.853146405479803, "percentage": 77.06, "elapsed_time": "2:38:29", "remaining_time": "0:47:10", "throughput": 19918.92, "total_tokens": 189423040}
|
|
{"current_steps": 60195, "total_steps": 78105, "loss": 0.2409, "lr": 7.590974425449913e-07, "epoch": 3.85346648742078, "percentage": 77.07, "elapsed_time": "2:38:30", "remaining_time": "0:47:09", "throughput": 19919.19, "total_tokens": 189439296}
|
|
{"current_steps": 60200, "total_steps": 78105, "loss": 0.1665, "lr": 7.586965458802703e-07, "epoch": 3.8537865693617563, "percentage": 77.08, "elapsed_time": "2:38:31", "remaining_time": "0:47:08", "throughput": 19919.39, "total_tokens": 189453952}
|
|
{"current_steps": 60205, "total_steps": 78105, "loss": 0.1404, "lr": 7.582957361671744e-07, "epoch": 3.8541066513027333, "percentage": 77.08, "elapsed_time": "2:38:31", "remaining_time": "0:47:08", "throughput": 19919.69, "total_tokens": 189471168}
|
|
{"current_steps": 60210, "total_steps": 78105, "loss": 0.1323, "lr": 7.578950134257171e-07, "epoch": 3.8544267332437103, "percentage": 77.09, "elapsed_time": "2:38:32", "remaining_time": "0:47:07", "throughput": 19919.9, "total_tokens": 189486144}
|
|
{"current_steps": 60215, "total_steps": 78105, "loss": 0.1816, "lr": 7.57494377675908e-07, "epoch": 3.8547468151846873, "percentage": 77.09, "elapsed_time": "2:38:33", "remaining_time": "0:47:06", "throughput": 19920.11, "total_tokens": 189501376}
|
|
{"current_steps": 60220, "total_steps": 78105, "loss": 0.1773, "lr": 7.570938289377547e-07, "epoch": 3.8550668971256643, "percentage": 77.1, "elapsed_time": "2:38:33", "remaining_time": "0:47:05", "throughput": 19920.33, "total_tokens": 189516864}
|
|
{"current_steps": 60225, "total_steps": 78105, "loss": 0.131, "lr": 7.566933672312554e-07, "epoch": 3.8553869790666413, "percentage": 77.11, "elapsed_time": "2:38:34", "remaining_time": "0:47:04", "throughput": 19920.52, "total_tokens": 189531904}
|
|
{"current_steps": 60230, "total_steps": 78105, "loss": 0.1841, "lr": 7.562929925764098e-07, "epoch": 3.855707061007618, "percentage": 77.11, "elapsed_time": "2:38:35", "remaining_time": "0:47:03", "throughput": 19920.75, "total_tokens": 189547200}
|
|
{"current_steps": 60235, "total_steps": 78105, "loss": 0.1869, "lr": 7.558927049932097e-07, "epoch": 3.856027142948595, "percentage": 77.12, "elapsed_time": "2:38:35", "remaining_time": "0:47:03", "throughput": 19921.0, "total_tokens": 189563328}
|
|
{"current_steps": 60240, "total_steps": 78105, "loss": 0.1303, "lr": 7.554925045016429e-07, "epoch": 3.856347224889572, "percentage": 77.13, "elapsed_time": "2:38:36", "remaining_time": "0:47:02", "throughput": 19921.25, "total_tokens": 189579456}
|
|
{"current_steps": 60245, "total_steps": 78105, "loss": 0.1375, "lr": 7.550923911216937e-07, "epoch": 3.8566673068305484, "percentage": 77.13, "elapsed_time": "2:38:37", "remaining_time": "0:47:01", "throughput": 19921.52, "total_tokens": 189595904}
|
|
{"current_steps": 60250, "total_steps": 78105, "loss": 0.2064, "lr": 7.546923648733415e-07, "epoch": 3.8569873887715254, "percentage": 77.14, "elapsed_time": "2:38:37", "remaining_time": "0:47:00", "throughput": 19921.76, "total_tokens": 189612096}
|
|
{"current_steps": 60255, "total_steps": 78105, "loss": 0.1931, "lr": 7.542924257765616e-07, "epoch": 3.8573074707125024, "percentage": 77.15, "elapsed_time": "2:38:38", "remaining_time": "0:46:59", "throughput": 19921.97, "total_tokens": 189627584}
|
|
{"current_steps": 60260, "total_steps": 78105, "loss": 0.1254, "lr": 7.53892573851325e-07, "epoch": 3.8576275526534793, "percentage": 77.15, "elapsed_time": "2:38:39", "remaining_time": "0:46:58", "throughput": 19922.25, "total_tokens": 189644480}
|
|
{"current_steps": 60265, "total_steps": 78105, "loss": 0.2058, "lr": 7.534928091175974e-07, "epoch": 3.8579476345944563, "percentage": 77.16, "elapsed_time": "2:38:39", "remaining_time": "0:46:58", "throughput": 19922.48, "total_tokens": 189660160}
|
|
{"current_steps": 60270, "total_steps": 78105, "loss": 0.1398, "lr": 7.530931315953424e-07, "epoch": 3.8582677165354333, "percentage": 77.17, "elapsed_time": "2:38:40", "remaining_time": "0:46:57", "throughput": 19922.7, "total_tokens": 189675584}
|
|
{"current_steps": 60275, "total_steps": 78105, "loss": 0.1847, "lr": 7.526935413045172e-07, "epoch": 3.85858779847641, "percentage": 77.17, "elapsed_time": "2:38:41", "remaining_time": "0:46:56", "throughput": 19922.95, "total_tokens": 189691136}
|
|
{"current_steps": 60280, "total_steps": 78105, "loss": 0.1821, "lr": 7.522940382650751e-07, "epoch": 3.858907880417387, "percentage": 77.18, "elapsed_time": "2:38:41", "remaining_time": "0:46:55", "throughput": 19923.22, "total_tokens": 189707776}
|
|
{"current_steps": 60285, "total_steps": 78105, "loss": 0.1471, "lr": 7.518946224969651e-07, "epoch": 3.859227962358364, "percentage": 77.18, "elapsed_time": "2:38:42", "remaining_time": "0:46:54", "throughput": 19923.46, "total_tokens": 189723584}
|
|
{"current_steps": 60290, "total_steps": 78105, "loss": 0.1466, "lr": 7.514952940201323e-07, "epoch": 3.8595480442993404, "percentage": 77.19, "elapsed_time": "2:38:43", "remaining_time": "0:46:54", "throughput": 19923.69, "total_tokens": 189739264}
|
|
{"current_steps": 60295, "total_steps": 78105, "loss": 0.1721, "lr": 7.510960528545169e-07, "epoch": 3.8598681262403174, "percentage": 77.2, "elapsed_time": "2:38:43", "remaining_time": "0:46:53", "throughput": 19923.9, "total_tokens": 189754560}
|
|
{"current_steps": 60300, "total_steps": 78105, "loss": 0.1718, "lr": 7.506968990200539e-07, "epoch": 3.8601882081812944, "percentage": 77.2, "elapsed_time": "2:38:44", "remaining_time": "0:46:52", "throughput": 19924.09, "total_tokens": 189769600}
|
|
{"current_steps": 60305, "total_steps": 78105, "loss": 0.1877, "lr": 7.502978325366778e-07, "epoch": 3.8605082901222714, "percentage": 77.21, "elapsed_time": "2:38:45", "remaining_time": "0:46:51", "throughput": 19924.28, "total_tokens": 189784320}
|
|
{"current_steps": 60310, "total_steps": 78105, "loss": 0.1328, "lr": 7.498988534243123e-07, "epoch": 3.8608283720632484, "percentage": 77.22, "elapsed_time": "2:38:45", "remaining_time": "0:46:50", "throughput": 19924.49, "total_tokens": 189799552}
|
|
{"current_steps": 60315, "total_steps": 78105, "loss": 0.1716, "lr": 7.494999617028831e-07, "epoch": 3.8611484540042253, "percentage": 77.22, "elapsed_time": "2:38:46", "remaining_time": "0:46:49", "throughput": 19924.71, "total_tokens": 189815424}
|
|
{"current_steps": 60320, "total_steps": 78105, "loss": 0.1421, "lr": 7.491011573923077e-07, "epoch": 3.861468535945202, "percentage": 77.23, "elapsed_time": "2:38:47", "remaining_time": "0:46:49", "throughput": 19924.92, "total_tokens": 189830208}
|
|
{"current_steps": 60325, "total_steps": 78105, "loss": 0.1343, "lr": 7.487024405125004e-07, "epoch": 3.861788617886179, "percentage": 77.24, "elapsed_time": "2:38:47", "remaining_time": "0:46:48", "throughput": 19925.12, "total_tokens": 189845248}
|
|
{"current_steps": 60330, "total_steps": 78105, "loss": 0.199, "lr": 7.48303811083371e-07, "epoch": 3.862108699827156, "percentage": 77.24, "elapsed_time": "2:38:48", "remaining_time": "0:46:47", "throughput": 19925.31, "total_tokens": 189860224}
|
|
{"current_steps": 60335, "total_steps": 78105, "loss": 0.1605, "lr": 7.479052691248243e-07, "epoch": 3.8624287817681324, "percentage": 77.25, "elapsed_time": "2:38:49", "remaining_time": "0:46:46", "throughput": 19925.57, "total_tokens": 189876416}
|
|
{"current_steps": 60340, "total_steps": 78105, "loss": 0.144, "lr": 7.475068146567635e-07, "epoch": 3.8627488637091094, "percentage": 77.25, "elapsed_time": "2:38:49", "remaining_time": "0:46:45", "throughput": 19925.81, "total_tokens": 189892032}
|
|
{"current_steps": 60345, "total_steps": 78105, "loss": 0.2088, "lr": 7.471084476990825e-07, "epoch": 3.8630689456500864, "percentage": 77.26, "elapsed_time": "2:38:50", "remaining_time": "0:46:44", "throughput": 19926.06, "total_tokens": 189908288}
|
|
{"current_steps": 60350, "total_steps": 78105, "loss": 0.2278, "lr": 7.467101682716762e-07, "epoch": 3.8633890275910634, "percentage": 77.27, "elapsed_time": "2:38:51", "remaining_time": "0:46:44", "throughput": 19926.24, "total_tokens": 189922880}
|
|
{"current_steps": 60355, "total_steps": 78105, "loss": 0.1796, "lr": 7.463119763944313e-07, "epoch": 3.8637091095320404, "percentage": 77.27, "elapsed_time": "2:38:51", "remaining_time": "0:46:43", "throughput": 19926.47, "total_tokens": 189938176}
|
|
{"current_steps": 60360, "total_steps": 78105, "loss": 0.1747, "lr": 7.459138720872316e-07, "epoch": 3.864029191473017, "percentage": 77.28, "elapsed_time": "2:38:52", "remaining_time": "0:46:42", "throughput": 19926.23, "total_tokens": 189955264}
|
|
{"current_steps": 60365, "total_steps": 78105, "loss": 0.1671, "lr": 7.455158553699568e-07, "epoch": 3.864349273413994, "percentage": 77.29, "elapsed_time": "2:38:53", "remaining_time": "0:46:41", "throughput": 19926.47, "total_tokens": 189971584}
|
|
{"current_steps": 60370, "total_steps": 78105, "loss": 0.1623, "lr": 7.451179262624811e-07, "epoch": 3.864669355354971, "percentage": 77.29, "elapsed_time": "2:38:54", "remaining_time": "0:46:40", "throughput": 19926.71, "total_tokens": 189987520}
|
|
{"current_steps": 60375, "total_steps": 78105, "loss": 0.1078, "lr": 7.447200847846753e-07, "epoch": 3.864989437295948, "percentage": 77.3, "elapsed_time": "2:38:54", "remaining_time": "0:46:40", "throughput": 19926.94, "total_tokens": 190003072}
|
|
{"current_steps": 60380, "total_steps": 78105, "loss": 0.1921, "lr": 7.443223309564057e-07, "epoch": 3.8653095192369245, "percentage": 77.31, "elapsed_time": "2:38:55", "remaining_time": "0:46:39", "throughput": 19927.22, "total_tokens": 190019584}
|
|
{"current_steps": 60385, "total_steps": 78105, "loss": 0.0837, "lr": 7.439246647975329e-07, "epoch": 3.8656296011779014, "percentage": 77.31, "elapsed_time": "2:38:56", "remaining_time": "0:46:38", "throughput": 19927.43, "total_tokens": 190034688}
|
|
{"current_steps": 60390, "total_steps": 78105, "loss": 0.1779, "lr": 7.435270863279162e-07, "epoch": 3.8659496831188784, "percentage": 77.32, "elapsed_time": "2:38:57", "remaining_time": "0:46:37", "throughput": 19927.66, "total_tokens": 190050624}
|
|
{"current_steps": 60395, "total_steps": 78105, "loss": 0.2802, "lr": 7.431295955674078e-07, "epoch": 3.8662697650598554, "percentage": 77.33, "elapsed_time": "2:38:57", "remaining_time": "0:46:36", "throughput": 19927.96, "total_tokens": 190067328}
|
|
{"current_steps": 60400, "total_steps": 78105, "loss": 0.1764, "lr": 7.42732192535856e-07, "epoch": 3.8665898470008324, "percentage": 77.33, "elapsed_time": "2:38:58", "remaining_time": "0:46:35", "throughput": 19928.18, "total_tokens": 190082944}
|
|
{"current_steps": 60405, "total_steps": 78105, "loss": 0.1618, "lr": 7.423348772531053e-07, "epoch": 3.866909928941809, "percentage": 77.34, "elapsed_time": "2:38:59", "remaining_time": "0:46:35", "throughput": 19928.44, "total_tokens": 190099072}
|
|
{"current_steps": 60410, "total_steps": 78105, "loss": 0.202, "lr": 7.419376497389947e-07, "epoch": 3.867230010882786, "percentage": 77.34, "elapsed_time": "2:38:59", "remaining_time": "0:46:34", "throughput": 19928.64, "total_tokens": 190113984}
|
|
{"current_steps": 60415, "total_steps": 78105, "loss": 0.1491, "lr": 7.415405100133622e-07, "epoch": 3.867550092823763, "percentage": 77.35, "elapsed_time": "2:39:00", "remaining_time": "0:46:33", "throughput": 19928.88, "total_tokens": 190129664}
|
|
{"current_steps": 60420, "total_steps": 78105, "loss": 0.164, "lr": 7.411434580960353e-07, "epoch": 3.86787017476474, "percentage": 77.36, "elapsed_time": "2:39:01", "remaining_time": "0:46:32", "throughput": 19929.08, "total_tokens": 190144896}
|
|
{"current_steps": 60425, "total_steps": 78105, "loss": 0.1218, "lr": 7.40746494006844e-07, "epoch": 3.8681902567057165, "percentage": 77.36, "elapsed_time": "2:39:01", "remaining_time": "0:46:31", "throughput": 19929.28, "total_tokens": 190159744}
|
|
{"current_steps": 60430, "total_steps": 78105, "loss": 0.101, "lr": 7.40349617765608e-07, "epoch": 3.8685103386466935, "percentage": 77.37, "elapsed_time": "2:39:02", "remaining_time": "0:46:31", "throughput": 19929.49, "total_tokens": 190175040}
|
|
{"current_steps": 60435, "total_steps": 78105, "loss": 0.1493, "lr": 7.39952829392147e-07, "epoch": 3.8688304205876705, "percentage": 77.38, "elapsed_time": "2:39:03", "remaining_time": "0:46:30", "throughput": 19929.82, "total_tokens": 190192768}
|
|
{"current_steps": 60440, "total_steps": 78105, "loss": 0.1294, "lr": 7.395561289062739e-07, "epoch": 3.8691505025286475, "percentage": 77.38, "elapsed_time": "2:39:04", "remaining_time": "0:46:29", "throughput": 19930.45, "total_tokens": 190223232}
|
|
{"current_steps": 60445, "total_steps": 78105, "loss": 0.1829, "lr": 7.391595163277974e-07, "epoch": 3.8694705844696244, "percentage": 77.39, "elapsed_time": "2:39:05", "remaining_time": "0:46:28", "throughput": 19930.74, "total_tokens": 190240128}
|
|
{"current_steps": 60450, "total_steps": 78105, "loss": 0.1744, "lr": 7.387629916765241e-07, "epoch": 3.869790666410601, "percentage": 77.4, "elapsed_time": "2:39:05", "remaining_time": "0:46:27", "throughput": 19930.94, "total_tokens": 190255360}
|
|
{"current_steps": 60455, "total_steps": 78105, "loss": 0.1547, "lr": 7.383665549722513e-07, "epoch": 3.870110748351578, "percentage": 77.4, "elapsed_time": "2:39:06", "remaining_time": "0:46:27", "throughput": 19931.16, "total_tokens": 190270784}
|
|
{"current_steps": 60460, "total_steps": 78105, "loss": 0.1979, "lr": 7.379702062347776e-07, "epoch": 3.870430830292555, "percentage": 77.41, "elapsed_time": "2:39:07", "remaining_time": "0:46:26", "throughput": 19931.39, "total_tokens": 190286784}
|
|
{"current_steps": 60465, "total_steps": 78105, "loss": 0.1682, "lr": 7.37573945483894e-07, "epoch": 3.8707509122335315, "percentage": 77.42, "elapsed_time": "2:39:07", "remaining_time": "0:46:25", "throughput": 19931.61, "total_tokens": 190302208}
|
|
{"current_steps": 60470, "total_steps": 78105, "loss": 0.1857, "lr": 7.371777727393872e-07, "epoch": 3.8710709941745085, "percentage": 77.42, "elapsed_time": "2:39:08", "remaining_time": "0:46:24", "throughput": 19931.87, "total_tokens": 190318464}
|
|
{"current_steps": 60475, "total_steps": 78105, "loss": 0.1751, "lr": 7.367816880210404e-07, "epoch": 3.8713910761154855, "percentage": 77.43, "elapsed_time": "2:39:09", "remaining_time": "0:46:23", "throughput": 19932.09, "total_tokens": 190333824}
|
|
{"current_steps": 60480, "total_steps": 78105, "loss": 0.1397, "lr": 7.363856913486317e-07, "epoch": 3.8717111580564625, "percentage": 77.43, "elapsed_time": "2:39:09", "remaining_time": "0:46:22", "throughput": 19932.34, "total_tokens": 190349632}
|
|
{"current_steps": 60485, "total_steps": 78105, "loss": 0.152, "lr": 7.359897827419351e-07, "epoch": 3.8720312399974395, "percentage": 77.44, "elapsed_time": "2:39:10", "remaining_time": "0:46:22", "throughput": 19932.65, "total_tokens": 190366976}
|
|
{"current_steps": 60490, "total_steps": 78105, "loss": 0.1419, "lr": 7.355939622207206e-07, "epoch": 3.8723513219384165, "percentage": 77.45, "elapsed_time": "2:39:11", "remaining_time": "0:46:21", "throughput": 19932.89, "total_tokens": 190382848}
|
|
{"current_steps": 60495, "total_steps": 78105, "loss": 0.1415, "lr": 7.351982298047525e-07, "epoch": 3.872671403879393, "percentage": 77.45, "elapsed_time": "2:39:11", "remaining_time": "0:46:20", "throughput": 19933.09, "total_tokens": 190397824}
|
|
{"current_steps": 60500, "total_steps": 78105, "loss": 0.1781, "lr": 7.348025855137936e-07, "epoch": 3.87299148582037, "percentage": 77.46, "elapsed_time": "2:39:12", "remaining_time": "0:46:19", "throughput": 19933.4, "total_tokens": 190415424}
|
|
{"current_steps": 60505, "total_steps": 78105, "loss": 0.1485, "lr": 7.344070293675976e-07, "epoch": 3.873311567761347, "percentage": 77.47, "elapsed_time": "2:39:13", "remaining_time": "0:46:18", "throughput": 19933.6, "total_tokens": 190430080}
|
|
{"current_steps": 60510, "total_steps": 78105, "loss": 0.1483, "lr": 7.340115613859184e-07, "epoch": 3.8736316497023235, "percentage": 77.47, "elapsed_time": "2:39:13", "remaining_time": "0:46:18", "throughput": 19933.86, "total_tokens": 190446528}
|
|
{"current_steps": 60515, "total_steps": 78105, "loss": 0.2556, "lr": 7.336161815885034e-07, "epoch": 3.8739517316433005, "percentage": 77.48, "elapsed_time": "2:39:14", "remaining_time": "0:46:17", "throughput": 19934.09, "total_tokens": 190462144}
|
|
{"current_steps": 60520, "total_steps": 78105, "loss": 0.1332, "lr": 7.332208899950955e-07, "epoch": 3.8742718135842775, "percentage": 77.49, "elapsed_time": "2:39:15", "remaining_time": "0:46:16", "throughput": 19934.38, "total_tokens": 190478720}
|
|
{"current_steps": 60525, "total_steps": 78105, "loss": 0.1721, "lr": 7.328256866254336e-07, "epoch": 3.8745918955252545, "percentage": 77.49, "elapsed_time": "2:39:15", "remaining_time": "0:46:15", "throughput": 19934.65, "total_tokens": 190495232}
|
|
{"current_steps": 60530, "total_steps": 78105, "loss": 0.1546, "lr": 7.324305714992511e-07, "epoch": 3.8749119774662315, "percentage": 77.5, "elapsed_time": "2:39:16", "remaining_time": "0:46:14", "throughput": 19934.89, "total_tokens": 190511360}
|
|
{"current_steps": 60535, "total_steps": 78105, "loss": 0.2121, "lr": 7.320355446362804e-07, "epoch": 3.8752320594072085, "percentage": 77.5, "elapsed_time": "2:39:17", "remaining_time": "0:46:13", "throughput": 19935.12, "total_tokens": 190527104}
|
|
{"current_steps": 60540, "total_steps": 78105, "loss": 0.1911, "lr": 7.316406060562442e-07, "epoch": 3.875552141348185, "percentage": 77.51, "elapsed_time": "2:39:17", "remaining_time": "0:46:13", "throughput": 19935.31, "total_tokens": 190541632}
|
|
{"current_steps": 60545, "total_steps": 78105, "loss": 0.1714, "lr": 7.312457557788658e-07, "epoch": 3.875872223289162, "percentage": 77.52, "elapsed_time": "2:39:18", "remaining_time": "0:46:12", "throughput": 19935.55, "total_tokens": 190557248}
|
|
{"current_steps": 60550, "total_steps": 78105, "loss": 0.1592, "lr": 7.308509938238612e-07, "epoch": 3.876192305230139, "percentage": 77.52, "elapsed_time": "2:39:19", "remaining_time": "0:46:11", "throughput": 19935.79, "total_tokens": 190572864}
|
|
{"current_steps": 60555, "total_steps": 78105, "loss": 0.1854, "lr": 7.30456320210943e-07, "epoch": 3.8765123871711156, "percentage": 77.53, "elapsed_time": "2:39:19", "remaining_time": "0:46:10", "throughput": 19935.99, "total_tokens": 190587648}
|
|
{"current_steps": 60560, "total_steps": 78105, "loss": 0.2262, "lr": 7.300617349598188e-07, "epoch": 3.8768324691120926, "percentage": 77.54, "elapsed_time": "2:39:20", "remaining_time": "0:46:09", "throughput": 19936.24, "total_tokens": 190603968}
|
|
{"current_steps": 60565, "total_steps": 78105, "loss": 0.1007, "lr": 7.296672380901918e-07, "epoch": 3.8771525510530696, "percentage": 77.54, "elapsed_time": "2:39:21", "remaining_time": "0:46:09", "throughput": 19936.46, "total_tokens": 190619520}
|
|
{"current_steps": 60570, "total_steps": 78105, "loss": 0.1625, "lr": 7.292728296217627e-07, "epoch": 3.8774726329940465, "percentage": 77.55, "elapsed_time": "2:39:22", "remaining_time": "0:46:08", "throughput": 19936.7, "total_tokens": 190635072}
|
|
{"current_steps": 60575, "total_steps": 78105, "loss": 0.1696, "lr": 7.288785095742237e-07, "epoch": 3.8777927149350235, "percentage": 77.56, "elapsed_time": "2:39:22", "remaining_time": "0:46:07", "throughput": 19936.99, "total_tokens": 190651712}
|
|
{"current_steps": 60580, "total_steps": 78105, "loss": 0.1997, "lr": 7.284842779672674e-07, "epoch": 3.8781127968760005, "percentage": 77.56, "elapsed_time": "2:39:23", "remaining_time": "0:46:06", "throughput": 19937.22, "total_tokens": 190667520}
|
|
{"current_steps": 60585, "total_steps": 78105, "loss": 0.175, "lr": 7.280901348205788e-07, "epoch": 3.878432878816977, "percentage": 77.57, "elapsed_time": "2:39:24", "remaining_time": "0:46:05", "throughput": 19937.47, "total_tokens": 190683584}
|
|
{"current_steps": 60590, "total_steps": 78105, "loss": 0.128, "lr": 7.276960801538394e-07, "epoch": 3.878752960757954, "percentage": 77.58, "elapsed_time": "2:39:24", "remaining_time": "0:46:04", "throughput": 19937.73, "total_tokens": 190699968}
|
|
{"current_steps": 60595, "total_steps": 78105, "loss": 0.185, "lr": 7.273021139867259e-07, "epoch": 3.879073042698931, "percentage": 77.58, "elapsed_time": "2:39:25", "remaining_time": "0:46:04", "throughput": 19937.94, "total_tokens": 190715008}
|
|
{"current_steps": 60600, "total_steps": 78105, "loss": 0.1773, "lr": 7.269082363389113e-07, "epoch": 3.8793931246399076, "percentage": 77.59, "elapsed_time": "2:39:26", "remaining_time": "0:46:03", "throughput": 19938.16, "total_tokens": 190730496}
|
|
{"current_steps": 60605, "total_steps": 78105, "loss": 0.1031, "lr": 7.265144472300636e-07, "epoch": 3.8797132065808846, "percentage": 77.59, "elapsed_time": "2:39:26", "remaining_time": "0:46:02", "throughput": 19938.47, "total_tokens": 190747776}
|
|
{"current_steps": 60610, "total_steps": 78105, "loss": 0.1581, "lr": 7.261207466798467e-07, "epoch": 3.8800332885218616, "percentage": 77.6, "elapsed_time": "2:39:27", "remaining_time": "0:46:01", "throughput": 19938.71, "total_tokens": 190763712}
|
|
{"current_steps": 60615, "total_steps": 78105, "loss": 0.1172, "lr": 7.257271347079192e-07, "epoch": 3.8803533704628386, "percentage": 77.61, "elapsed_time": "2:39:28", "remaining_time": "0:46:00", "throughput": 19938.95, "total_tokens": 190779328}
|
|
{"current_steps": 60620, "total_steps": 78105, "loss": 0.1333, "lr": 7.253336113339382e-07, "epoch": 3.8806734524038156, "percentage": 77.61, "elapsed_time": "2:39:28", "remaining_time": "0:46:00", "throughput": 19939.16, "total_tokens": 190794752}
|
|
{"current_steps": 60625, "total_steps": 78105, "loss": 0.1782, "lr": 7.249401765775513e-07, "epoch": 3.880993534344792, "percentage": 77.62, "elapsed_time": "2:39:29", "remaining_time": "0:45:59", "throughput": 19939.38, "total_tokens": 190810496}
|
|
{"current_steps": 60630, "total_steps": 78105, "loss": 0.1172, "lr": 7.245468304584067e-07, "epoch": 3.881313616285769, "percentage": 77.63, "elapsed_time": "2:39:30", "remaining_time": "0:45:58", "throughput": 19939.64, "total_tokens": 190826816}
|
|
{"current_steps": 60635, "total_steps": 78105, "loss": 0.1543, "lr": 7.241535729961455e-07, "epoch": 3.881633698226746, "percentage": 77.63, "elapsed_time": "2:39:30", "remaining_time": "0:45:57", "throughput": 19939.84, "total_tokens": 190841792}
|
|
{"current_steps": 60640, "total_steps": 78105, "loss": 0.1781, "lr": 7.23760404210404e-07, "epoch": 3.881953780167723, "percentage": 77.64, "elapsed_time": "2:39:31", "remaining_time": "0:45:56", "throughput": 19940.07, "total_tokens": 190857664}
|
|
{"current_steps": 60645, "total_steps": 78105, "loss": 0.1459, "lr": 7.233673241208173e-07, "epoch": 3.8822738621086996, "percentage": 77.65, "elapsed_time": "2:39:32", "remaining_time": "0:45:55", "throughput": 19940.33, "total_tokens": 190874304}
|
|
{"current_steps": 60650, "total_steps": 78105, "loss": 0.0941, "lr": 7.229743327470109e-07, "epoch": 3.8825939440496766, "percentage": 77.65, "elapsed_time": "2:39:32", "remaining_time": "0:45:55", "throughput": 19940.53, "total_tokens": 190889472}
|
|
{"current_steps": 60655, "total_steps": 78105, "loss": 0.1908, "lr": 7.225814301086115e-07, "epoch": 3.8829140259906536, "percentage": 77.66, "elapsed_time": "2:39:33", "remaining_time": "0:45:54", "throughput": 19940.74, "total_tokens": 190904512}
|
|
{"current_steps": 60660, "total_steps": 78105, "loss": 0.1529, "lr": 7.221886162252358e-07, "epoch": 3.8832341079316306, "percentage": 77.66, "elapsed_time": "2:39:34", "remaining_time": "0:45:53", "throughput": 19940.93, "total_tokens": 190919552}
|
|
{"current_steps": 60665, "total_steps": 78105, "loss": 0.1297, "lr": 7.21795891116501e-07, "epoch": 3.8835541898726076, "percentage": 77.67, "elapsed_time": "2:39:34", "remaining_time": "0:45:52", "throughput": 19941.13, "total_tokens": 190934464}
|
|
{"current_steps": 60670, "total_steps": 78105, "loss": 0.1908, "lr": 7.214032548020172e-07, "epoch": 3.883874271813584, "percentage": 77.68, "elapsed_time": "2:39:35", "remaining_time": "0:45:51", "throughput": 19941.32, "total_tokens": 190949376}
|
|
{"current_steps": 60675, "total_steps": 78105, "loss": 0.162, "lr": 7.210107073013906e-07, "epoch": 3.884194353754561, "percentage": 77.68, "elapsed_time": "2:39:36", "remaining_time": "0:45:50", "throughput": 19941.54, "total_tokens": 190964672}
|
|
{"current_steps": 60680, "total_steps": 78105, "loss": 0.2023, "lr": 7.206182486342225e-07, "epoch": 3.884514435695538, "percentage": 77.69, "elapsed_time": "2:39:36", "remaining_time": "0:45:50", "throughput": 19941.72, "total_tokens": 190979200}
|
|
{"current_steps": 60685, "total_steps": 78105, "loss": 0.2337, "lr": 7.2022587882011e-07, "epoch": 3.884834517636515, "percentage": 77.7, "elapsed_time": "2:39:37", "remaining_time": "0:45:49", "throughput": 19941.98, "total_tokens": 190995200}
|
|
{"current_steps": 60690, "total_steps": 78105, "loss": 0.2028, "lr": 7.198335978786475e-07, "epoch": 3.8851545995774917, "percentage": 77.7, "elapsed_time": "2:39:38", "remaining_time": "0:45:48", "throughput": 19942.23, "total_tokens": 191011584}
|
|
{"current_steps": 60695, "total_steps": 78105, "loss": 0.2709, "lr": 7.194414058294225e-07, "epoch": 3.8854746815184686, "percentage": 77.71, "elapsed_time": "2:39:38", "remaining_time": "0:45:47", "throughput": 19942.51, "total_tokens": 191028288}
|
|
{"current_steps": 60700, "total_steps": 78105, "loss": 0.2329, "lr": 7.19049302692019e-07, "epoch": 3.8857947634594456, "percentage": 77.72, "elapsed_time": "2:39:39", "remaining_time": "0:45:46", "throughput": 19942.72, "total_tokens": 191043840}
|
|
{"current_steps": 60705, "total_steps": 78105, "loss": 0.2638, "lr": 7.186572884860169e-07, "epoch": 3.8861148454004226, "percentage": 77.72, "elapsed_time": "2:39:40", "remaining_time": "0:45:46", "throughput": 19942.95, "total_tokens": 191059392}
|
|
{"current_steps": 60710, "total_steps": 78105, "loss": 0.1877, "lr": 7.182653632309908e-07, "epoch": 3.8864349273413996, "percentage": 77.73, "elapsed_time": "2:39:40", "remaining_time": "0:45:45", "throughput": 19943.21, "total_tokens": 191075264}
|
|
{"current_steps": 60715, "total_steps": 78105, "loss": 0.1368, "lr": 7.17873526946512e-07, "epoch": 3.886755009282376, "percentage": 77.74, "elapsed_time": "2:39:41", "remaining_time": "0:45:44", "throughput": 19943.49, "total_tokens": 191092032}
|
|
{"current_steps": 60720, "total_steps": 78105, "loss": 0.15, "lr": 7.174817796521466e-07, "epoch": 3.887075091223353, "percentage": 77.74, "elapsed_time": "2:39:42", "remaining_time": "0:45:43", "throughput": 19943.71, "total_tokens": 191107584}
|
|
{"current_steps": 60725, "total_steps": 78105, "loss": 0.1673, "lr": 7.170901213674552e-07, "epoch": 3.88739517316433, "percentage": 77.75, "elapsed_time": "2:39:43", "remaining_time": "0:45:42", "throughput": 19943.96, "total_tokens": 191123520}
|
|
{"current_steps": 60730, "total_steps": 78105, "loss": 0.1535, "lr": 7.166985521119982e-07, "epoch": 3.8877152551053067, "percentage": 77.75, "elapsed_time": "2:39:43", "remaining_time": "0:45:41", "throughput": 19944.23, "total_tokens": 191140096}
|
|
{"current_steps": 60735, "total_steps": 78105, "loss": 0.1902, "lr": 7.16307071905325e-07, "epoch": 3.8880353370462837, "percentage": 77.76, "elapsed_time": "2:39:44", "remaining_time": "0:45:41", "throughput": 19944.44, "total_tokens": 191154944}
|
|
{"current_steps": 60740, "total_steps": 78105, "loss": 0.1822, "lr": 7.159156807669862e-07, "epoch": 3.8883554189872607, "percentage": 77.77, "elapsed_time": "2:39:45", "remaining_time": "0:45:40", "throughput": 19944.64, "total_tokens": 191169856}
|
|
{"current_steps": 60745, "total_steps": 78105, "loss": 0.1245, "lr": 7.155243787165256e-07, "epoch": 3.8886755009282377, "percentage": 77.77, "elapsed_time": "2:39:45", "remaining_time": "0:45:39", "throughput": 19944.87, "total_tokens": 191185216}
|
|
{"current_steps": 60750, "total_steps": 78105, "loss": 0.131, "lr": 7.151331657734825e-07, "epoch": 3.8889955828692147, "percentage": 77.78, "elapsed_time": "2:39:46", "remaining_time": "0:45:38", "throughput": 19945.03, "total_tokens": 191199488}
|
|
{"current_steps": 60755, "total_steps": 78105, "loss": 0.2288, "lr": 7.147420419573922e-07, "epoch": 3.8893156648101916, "percentage": 77.79, "elapsed_time": "2:39:47", "remaining_time": "0:45:37", "throughput": 19945.32, "total_tokens": 191216384}
|
|
{"current_steps": 60760, "total_steps": 78105, "loss": 0.1724, "lr": 7.143510072877844e-07, "epoch": 3.889635746751168, "percentage": 77.79, "elapsed_time": "2:39:47", "remaining_time": "0:45:36", "throughput": 19945.51, "total_tokens": 191231680}
|
|
{"current_steps": 60765, "total_steps": 78105, "loss": 0.1791, "lr": 7.139600617841877e-07, "epoch": 3.889955828692145, "percentage": 77.8, "elapsed_time": "2:39:48", "remaining_time": "0:45:36", "throughput": 19945.71, "total_tokens": 191246848}
|
|
{"current_steps": 60770, "total_steps": 78105, "loss": 0.1297, "lr": 7.135692054661206e-07, "epoch": 3.890275910633122, "percentage": 77.81, "elapsed_time": "2:39:49", "remaining_time": "0:45:35", "throughput": 19945.98, "total_tokens": 191263552}
|
|
{"current_steps": 60775, "total_steps": 78105, "loss": 0.1783, "lr": 7.131784383531032e-07, "epoch": 3.8905959925740987, "percentage": 77.81, "elapsed_time": "2:39:49", "remaining_time": "0:45:34", "throughput": 19946.22, "total_tokens": 191279744}
|
|
{"current_steps": 60780, "total_steps": 78105, "loss": 0.1951, "lr": 7.127877604646474e-07, "epoch": 3.8909160745150757, "percentage": 77.82, "elapsed_time": "2:39:50", "remaining_time": "0:45:33", "throughput": 19946.41, "total_tokens": 191294464}
|
|
{"current_steps": 60785, "total_steps": 78105, "loss": 0.2041, "lr": 7.123971718202616e-07, "epoch": 3.8912361564560527, "percentage": 77.82, "elapsed_time": "2:39:51", "remaining_time": "0:45:32", "throughput": 19946.58, "total_tokens": 191308864}
|
|
{"current_steps": 60790, "total_steps": 78105, "loss": 0.2145, "lr": 7.120066724394498e-07, "epoch": 3.8915562383970297, "percentage": 77.83, "elapsed_time": "2:39:51", "remaining_time": "0:45:32", "throughput": 19946.82, "total_tokens": 191324352}
|
|
{"current_steps": 60795, "total_steps": 78105, "loss": 0.1404, "lr": 7.116162623417114e-07, "epoch": 3.8918763203380067, "percentage": 77.84, "elapsed_time": "2:39:52", "remaining_time": "0:45:31", "throughput": 19947.06, "total_tokens": 191340288}
|
|
{"current_steps": 60800, "total_steps": 78105, "loss": 0.1309, "lr": 7.112259415465414e-07, "epoch": 3.8921964022789837, "percentage": 77.84, "elapsed_time": "2:39:53", "remaining_time": "0:45:30", "throughput": 19947.27, "total_tokens": 191355584}
|
|
{"current_steps": 60805, "total_steps": 78105, "loss": 0.1976, "lr": 7.108357100734298e-07, "epoch": 3.89251648421996, "percentage": 77.85, "elapsed_time": "2:39:53", "remaining_time": "0:45:29", "throughput": 19947.5, "total_tokens": 191371328}
|
|
{"current_steps": 60810, "total_steps": 78105, "loss": 0.1907, "lr": 7.104455679418642e-07, "epoch": 3.892836566160937, "percentage": 77.86, "elapsed_time": "2:39:54", "remaining_time": "0:45:28", "throughput": 19947.7, "total_tokens": 191386368}
|
|
{"current_steps": 60815, "total_steps": 78105, "loss": 0.2097, "lr": 7.100555151713254e-07, "epoch": 3.893156648101914, "percentage": 77.86, "elapsed_time": "2:39:55", "remaining_time": "0:45:27", "throughput": 19947.86, "total_tokens": 191400448}
|
|
{"current_steps": 60820, "total_steps": 78105, "loss": 0.1963, "lr": 7.096655517812908e-07, "epoch": 3.8934767300428907, "percentage": 77.87, "elapsed_time": "2:39:55", "remaining_time": "0:45:27", "throughput": 19948.12, "total_tokens": 191416768}
|
|
{"current_steps": 60825, "total_steps": 78105, "loss": 0.1735, "lr": 7.09275677791233e-07, "epoch": 3.8937968119838677, "percentage": 77.88, "elapsed_time": "2:39:56", "remaining_time": "0:45:26", "throughput": 19948.34, "total_tokens": 191432128}
|
|
{"current_steps": 60830, "total_steps": 78105, "loss": 0.0954, "lr": 7.088858932206203e-07, "epoch": 3.8941168939248447, "percentage": 77.88, "elapsed_time": "2:39:57", "remaining_time": "0:45:25", "throughput": 19948.59, "total_tokens": 191448128}
|
|
{"current_steps": 60835, "total_steps": 78105, "loss": 0.16, "lr": 7.084961980889166e-07, "epoch": 3.8944369758658217, "percentage": 77.89, "elapsed_time": "2:39:57", "remaining_time": "0:45:24", "throughput": 19948.9, "total_tokens": 191465664}
|
|
{"current_steps": 60840, "total_steps": 78105, "loss": 0.2341, "lr": 7.081065924155811e-07, "epoch": 3.8947570578067987, "percentage": 77.9, "elapsed_time": "2:39:58", "remaining_time": "0:45:23", "throughput": 19949.08, "total_tokens": 191480256}
|
|
{"current_steps": 60845, "total_steps": 78105, "loss": 0.1444, "lr": 7.077170762200681e-07, "epoch": 3.8950771397477757, "percentage": 77.9, "elapsed_time": "2:39:59", "remaining_time": "0:45:22", "throughput": 19949.26, "total_tokens": 191495424}
|
|
{"current_steps": 60850, "total_steps": 78105, "loss": 0.1778, "lr": 7.073276495218298e-07, "epoch": 3.8953972216887522, "percentage": 77.91, "elapsed_time": "2:39:59", "remaining_time": "0:45:22", "throughput": 19949.51, "total_tokens": 191511744}
|
|
{"current_steps": 60855, "total_steps": 78105, "loss": 0.0944, "lr": 7.069383123403098e-07, "epoch": 3.8957173036297292, "percentage": 77.91, "elapsed_time": "2:40:00", "remaining_time": "0:45:21", "throughput": 19949.72, "total_tokens": 191526784}
|
|
{"current_steps": 60860, "total_steps": 78105, "loss": 0.1969, "lr": 7.065490646949516e-07, "epoch": 3.8960373855707062, "percentage": 77.92, "elapsed_time": "2:40:01", "remaining_time": "0:45:20", "throughput": 19949.97, "total_tokens": 191543040}
|
|
{"current_steps": 60865, "total_steps": 78105, "loss": 0.164, "lr": 7.061599066051911e-07, "epoch": 3.8963574675116828, "percentage": 77.93, "elapsed_time": "2:40:01", "remaining_time": "0:45:19", "throughput": 19950.26, "total_tokens": 191560512}
|
|
{"current_steps": 60870, "total_steps": 78105, "loss": 0.1916, "lr": 7.057708380904604e-07, "epoch": 3.8966775494526598, "percentage": 77.93, "elapsed_time": "2:40:02", "remaining_time": "0:45:18", "throughput": 19950.45, "total_tokens": 191575744}
|
|
{"current_steps": 60875, "total_steps": 78105, "loss": 0.2014, "lr": 7.0538185917019e-07, "epoch": 3.8969976313936368, "percentage": 77.94, "elapsed_time": "2:40:03", "remaining_time": "0:45:18", "throughput": 19950.66, "total_tokens": 191591232}
|
|
{"current_steps": 60880, "total_steps": 78105, "loss": 0.3399, "lr": 7.049929698638e-07, "epoch": 3.8973177133346137, "percentage": 77.95, "elapsed_time": "2:40:03", "remaining_time": "0:45:17", "throughput": 19950.89, "total_tokens": 191606848}
|
|
{"current_steps": 60885, "total_steps": 78105, "loss": 0.1631, "lr": 7.046041701907127e-07, "epoch": 3.8976377952755907, "percentage": 77.95, "elapsed_time": "2:40:04", "remaining_time": "0:45:16", "throughput": 19951.13, "total_tokens": 191623040}
|
|
{"current_steps": 60890, "total_steps": 78105, "loss": 0.1141, "lr": 7.0421546017034e-07, "epoch": 3.8979578772165673, "percentage": 77.96, "elapsed_time": "2:40:05", "remaining_time": "0:45:15", "throughput": 19951.28, "total_tokens": 191637376}
|
|
{"current_steps": 60895, "total_steps": 78105, "loss": 0.2313, "lr": 7.038268398220937e-07, "epoch": 3.8982779591575443, "percentage": 77.97, "elapsed_time": "2:40:05", "remaining_time": "0:45:14", "throughput": 19951.48, "total_tokens": 191652480}
|
|
{"current_steps": 60900, "total_steps": 78105, "loss": 0.1317, "lr": 7.034383091653793e-07, "epoch": 3.8985980410985213, "percentage": 77.97, "elapsed_time": "2:40:06", "remaining_time": "0:45:13", "throughput": 19951.73, "total_tokens": 191668800}
|
|
{"current_steps": 60905, "total_steps": 78105, "loss": 0.2489, "lr": 7.030498682195977e-07, "epoch": 3.8989181230394983, "percentage": 77.98, "elapsed_time": "2:40:07", "remaining_time": "0:45:13", "throughput": 19952.01, "total_tokens": 191685696}
|
|
{"current_steps": 60910, "total_steps": 78105, "loss": 0.1676, "lr": 7.026615170041459e-07, "epoch": 3.899238204980475, "percentage": 77.98, "elapsed_time": "2:40:08", "remaining_time": "0:45:12", "throughput": 19952.21, "total_tokens": 191700992}
|
|
{"current_steps": 60915, "total_steps": 78105, "loss": 0.2083, "lr": 7.022732555384151e-07, "epoch": 3.899558286921452, "percentage": 77.99, "elapsed_time": "2:40:08", "remaining_time": "0:45:11", "throughput": 19952.47, "total_tokens": 191717376}
|
|
{"current_steps": 60920, "total_steps": 78105, "loss": 0.1527, "lr": 7.018850838417946e-07, "epoch": 3.899878368862429, "percentage": 78.0, "elapsed_time": "2:40:09", "remaining_time": "0:45:10", "throughput": 19952.76, "total_tokens": 191734720}
|
|
{"current_steps": 60925, "total_steps": 78105, "loss": 0.1515, "lr": 7.01497001933667e-07, "epoch": 3.9001984508034058, "percentage": 78.0, "elapsed_time": "2:40:10", "remaining_time": "0:45:09", "throughput": 19952.98, "total_tokens": 191750400}
|
|
{"current_steps": 60930, "total_steps": 78105, "loss": 0.1293, "lr": 7.011090098334111e-07, "epoch": 3.9005185327443828, "percentage": 78.01, "elapsed_time": "2:40:10", "remaining_time": "0:45:09", "throughput": 19953.2, "total_tokens": 191766208}
|
|
{"current_steps": 60935, "total_steps": 78105, "loss": 0.1245, "lr": 7.007211075604012e-07, "epoch": 3.9008386146853593, "percentage": 78.02, "elapsed_time": "2:40:11", "remaining_time": "0:45:08", "throughput": 19953.44, "total_tokens": 191782592}
|
|
{"current_steps": 60940, "total_steps": 78105, "loss": 0.1701, "lr": 7.003332951340069e-07, "epoch": 3.9011586966263363, "percentage": 78.02, "elapsed_time": "2:40:12", "remaining_time": "0:45:07", "throughput": 19953.68, "total_tokens": 191798720}
|
|
{"current_steps": 60945, "total_steps": 78105, "loss": 0.1942, "lr": 6.99945572573594e-07, "epoch": 3.9014787785673133, "percentage": 78.03, "elapsed_time": "2:40:12", "remaining_time": "0:45:06", "throughput": 19953.86, "total_tokens": 191813376}
|
|
{"current_steps": 60950, "total_steps": 78105, "loss": 0.1709, "lr": 6.995579398985231e-07, "epoch": 3.9017988605082903, "percentage": 78.04, "elapsed_time": "2:40:13", "remaining_time": "0:45:05", "throughput": 19954.1, "total_tokens": 191828992}
|
|
{"current_steps": 60955, "total_steps": 78105, "loss": 0.1457, "lr": 6.991703971281494e-07, "epoch": 3.902118942449267, "percentage": 78.04, "elapsed_time": "2:40:14", "remaining_time": "0:45:04", "throughput": 19954.31, "total_tokens": 191844288}
|
|
{"current_steps": 60960, "total_steps": 78105, "loss": 0.1594, "lr": 6.987829442818275e-07, "epoch": 3.902439024390244, "percentage": 78.05, "elapsed_time": "2:40:14", "remaining_time": "0:45:04", "throughput": 19954.56, "total_tokens": 191860160}
|
|
{"current_steps": 60965, "total_steps": 78105, "loss": 0.2075, "lr": 6.983955813789018e-07, "epoch": 3.902759106331221, "percentage": 78.06, "elapsed_time": "2:40:15", "remaining_time": "0:45:03", "throughput": 19954.77, "total_tokens": 191875392}
|
|
{"current_steps": 60970, "total_steps": 78105, "loss": 0.1591, "lr": 6.980083084387182e-07, "epoch": 3.903079188272198, "percentage": 78.06, "elapsed_time": "2:40:16", "remaining_time": "0:45:02", "throughput": 19954.99, "total_tokens": 191891136}
|
|
{"current_steps": 60975, "total_steps": 78105, "loss": 0.173, "lr": 6.976211254806118e-07, "epoch": 3.903399270213175, "percentage": 78.07, "elapsed_time": "2:40:16", "remaining_time": "0:45:01", "throughput": 19955.22, "total_tokens": 191906880}
|
|
{"current_steps": 60980, "total_steps": 78105, "loss": 0.1801, "lr": 6.972340325239185e-07, "epoch": 3.9037193521541513, "percentage": 78.07, "elapsed_time": "2:40:17", "remaining_time": "0:45:00", "throughput": 19955.45, "total_tokens": 191922752}
|
|
{"current_steps": 60985, "total_steps": 78105, "loss": 0.1427, "lr": 6.968470295879679e-07, "epoch": 3.9040394340951283, "percentage": 78.08, "elapsed_time": "2:40:18", "remaining_time": "0:45:00", "throughput": 19955.8, "total_tokens": 191941184}
|
|
{"current_steps": 60990, "total_steps": 78105, "loss": 0.1502, "lr": 6.964601166920834e-07, "epoch": 3.9043595160361053, "percentage": 78.09, "elapsed_time": "2:40:19", "remaining_time": "0:44:59", "throughput": 19956.04, "total_tokens": 191957184}
|
|
{"current_steps": 60995, "total_steps": 78105, "loss": 0.1786, "lr": 6.960732938555878e-07, "epoch": 3.904679597977082, "percentage": 78.09, "elapsed_time": "2:40:19", "remaining_time": "0:44:58", "throughput": 19956.34, "total_tokens": 191974656}
|
|
{"current_steps": 61000, "total_steps": 78105, "loss": 0.1952, "lr": 6.956865610977942e-07, "epoch": 3.904999679918059, "percentage": 78.1, "elapsed_time": "2:40:20", "remaining_time": "0:44:57", "throughput": 19956.57, "total_tokens": 191990528}
|
|
{"current_steps": 61005, "total_steps": 78105, "loss": 0.1903, "lr": 6.95299918438016e-07, "epoch": 3.905319761859036, "percentage": 78.11, "elapsed_time": "2:40:21", "remaining_time": "0:44:56", "throughput": 19956.79, "total_tokens": 192005696}
|
|
{"current_steps": 61010, "total_steps": 78105, "loss": 0.2206, "lr": 6.949133658955598e-07, "epoch": 3.905639843800013, "percentage": 78.11, "elapsed_time": "2:40:21", "remaining_time": "0:44:56", "throughput": 19956.97, "total_tokens": 192020480}
|
|
{"current_steps": 61015, "total_steps": 78105, "loss": 0.1778, "lr": 6.945269034897276e-07, "epoch": 3.90595992574099, "percentage": 78.12, "elapsed_time": "2:40:22", "remaining_time": "0:44:55", "throughput": 19957.18, "total_tokens": 192035648}
|
|
{"current_steps": 61020, "total_steps": 78105, "loss": 0.1518, "lr": 6.941405312398175e-07, "epoch": 3.906280007681967, "percentage": 78.13, "elapsed_time": "2:40:23", "remaining_time": "0:44:54", "throughput": 19957.44, "total_tokens": 192051968}
|
|
{"current_steps": 61025, "total_steps": 78105, "loss": 0.1313, "lr": 6.937542491651228e-07, "epoch": 3.9066000896229434, "percentage": 78.13, "elapsed_time": "2:40:23", "remaining_time": "0:44:53", "throughput": 19957.65, "total_tokens": 192067392}
|
|
{"current_steps": 61030, "total_steps": 78105, "loss": 0.0996, "lr": 6.933680572849327e-07, "epoch": 3.9069201715639204, "percentage": 78.14, "elapsed_time": "2:40:24", "remaining_time": "0:44:52", "throughput": 19957.86, "total_tokens": 192082432}
|
|
{"current_steps": 61035, "total_steps": 78105, "loss": 0.1796, "lr": 6.929819556185305e-07, "epoch": 3.9072402535048973, "percentage": 78.14, "elapsed_time": "2:40:25", "remaining_time": "0:44:51", "throughput": 19958.09, "total_tokens": 192098048}
|
|
{"current_steps": 61040, "total_steps": 78105, "loss": 0.1404, "lr": 6.925959441851979e-07, "epoch": 3.907560335445874, "percentage": 78.15, "elapsed_time": "2:40:25", "remaining_time": "0:44:51", "throughput": 19958.27, "total_tokens": 192113152}
|
|
{"current_steps": 61045, "total_steps": 78105, "loss": 0.1074, "lr": 6.922100230042092e-07, "epoch": 3.907880417386851, "percentage": 78.16, "elapsed_time": "2:40:26", "remaining_time": "0:44:50", "throughput": 19958.53, "total_tokens": 192129600}
|
|
{"current_steps": 61050, "total_steps": 78105, "loss": 0.1495, "lr": 6.918241920948357e-07, "epoch": 3.908200499327828, "percentage": 78.16, "elapsed_time": "2:40:27", "remaining_time": "0:44:49", "throughput": 19958.76, "total_tokens": 192145344}
|
|
{"current_steps": 61055, "total_steps": 78105, "loss": 0.1553, "lr": 6.914384514763437e-07, "epoch": 3.908520581268805, "percentage": 78.17, "elapsed_time": "2:40:27", "remaining_time": "0:44:48", "throughput": 19958.96, "total_tokens": 192160384}
|
|
{"current_steps": 61060, "total_steps": 78105, "loss": 0.2117, "lr": 6.910528011679948e-07, "epoch": 3.908840663209782, "percentage": 78.18, "elapsed_time": "2:40:28", "remaining_time": "0:44:47", "throughput": 19959.22, "total_tokens": 192177024}
|
|
{"current_steps": 61065, "total_steps": 78105, "loss": 0.186, "lr": 6.906672411890459e-07, "epoch": 3.909160745150759, "percentage": 78.18, "elapsed_time": "2:40:29", "remaining_time": "0:44:46", "throughput": 19959.42, "total_tokens": 192192128}
|
|
{"current_steps": 61070, "total_steps": 78105, "loss": 0.1418, "lr": 6.902817715587518e-07, "epoch": 3.9094808270917354, "percentage": 78.19, "elapsed_time": "2:40:29", "remaining_time": "0:44:46", "throughput": 19959.65, "total_tokens": 192207680}
|
|
{"current_steps": 61075, "total_steps": 78105, "loss": 0.1739, "lr": 6.898963922963584e-07, "epoch": 3.9098009090327124, "percentage": 78.2, "elapsed_time": "2:40:30", "remaining_time": "0:44:45", "throughput": 19959.87, "total_tokens": 192223616}
|
|
{"current_steps": 61080, "total_steps": 78105, "loss": 0.2011, "lr": 6.895111034211119e-07, "epoch": 3.9101209909736894, "percentage": 78.2, "elapsed_time": "2:40:31", "remaining_time": "0:44:44", "throughput": 19960.07, "total_tokens": 192238720}
|
|
{"current_steps": 61085, "total_steps": 78105, "loss": 0.1026, "lr": 6.891259049522489e-07, "epoch": 3.910441072914666, "percentage": 78.21, "elapsed_time": "2:40:31", "remaining_time": "0:44:43", "throughput": 19960.28, "total_tokens": 192254080}
|
|
{"current_steps": 61090, "total_steps": 78105, "loss": 0.1356, "lr": 6.887407969090066e-07, "epoch": 3.910761154855643, "percentage": 78.22, "elapsed_time": "2:40:32", "remaining_time": "0:44:42", "throughput": 19960.54, "total_tokens": 192270720}
|
|
{"current_steps": 61095, "total_steps": 78105, "loss": 0.1405, "lr": 6.883557793106143e-07, "epoch": 3.91108123679662, "percentage": 78.22, "elapsed_time": "2:40:33", "remaining_time": "0:44:42", "throughput": 19960.75, "total_tokens": 192286400}
|
|
{"current_steps": 61100, "total_steps": 78105, "loss": 0.1561, "lr": 6.879708521762974e-07, "epoch": 3.911401318737597, "percentage": 78.23, "elapsed_time": "2:40:33", "remaining_time": "0:44:41", "throughput": 19960.95, "total_tokens": 192301504}
|
|
{"current_steps": 61105, "total_steps": 78105, "loss": 0.1513, "lr": 6.875860155252787e-07, "epoch": 3.911721400678574, "percentage": 78.23, "elapsed_time": "2:40:34", "remaining_time": "0:44:40", "throughput": 19961.18, "total_tokens": 192317184}
|
|
{"current_steps": 61110, "total_steps": 78105, "loss": 0.1592, "lr": 6.872012693767727e-07, "epoch": 3.912041482619551, "percentage": 78.24, "elapsed_time": "2:40:35", "remaining_time": "0:44:39", "throughput": 19961.44, "total_tokens": 192333760}
|
|
{"current_steps": 61115, "total_steps": 78105, "loss": 0.2469, "lr": 6.86816613749994e-07, "epoch": 3.9123615645605274, "percentage": 78.25, "elapsed_time": "2:40:35", "remaining_time": "0:44:38", "throughput": 19961.65, "total_tokens": 192349120}
|
|
{"current_steps": 61120, "total_steps": 78105, "loss": 0.2381, "lr": 6.864320486641476e-07, "epoch": 3.9126816465015044, "percentage": 78.25, "elapsed_time": "2:40:36", "remaining_time": "0:44:37", "throughput": 19961.88, "total_tokens": 192364544}
|
|
{"current_steps": 61125, "total_steps": 78105, "loss": 0.1527, "lr": 6.860475741384392e-07, "epoch": 3.9130017284424814, "percentage": 78.26, "elapsed_time": "2:40:37", "remaining_time": "0:44:37", "throughput": 19962.11, "total_tokens": 192380800}
|
|
{"current_steps": 61130, "total_steps": 78105, "loss": 0.1796, "lr": 6.856631901920662e-07, "epoch": 3.913321810383458, "percentage": 78.27, "elapsed_time": "2:40:38", "remaining_time": "0:44:36", "throughput": 19960.51, "total_tokens": 192396288}
|
|
{"current_steps": 61135, "total_steps": 78105, "loss": 0.2359, "lr": 6.852788968442232e-07, "epoch": 3.913641892324435, "percentage": 78.27, "elapsed_time": "2:40:39", "remaining_time": "0:44:35", "throughput": 19960.73, "total_tokens": 192411840}
|
|
{"current_steps": 61140, "total_steps": 78105, "loss": 0.223, "lr": 6.848946941140991e-07, "epoch": 3.913961974265412, "percentage": 78.28, "elapsed_time": "2:40:40", "remaining_time": "0:44:34", "throughput": 19960.99, "total_tokens": 192427968}
|
|
{"current_steps": 61145, "total_steps": 78105, "loss": 0.1878, "lr": 6.845105820208797e-07, "epoch": 3.914282056206389, "percentage": 78.29, "elapsed_time": "2:40:40", "remaining_time": "0:44:34", "throughput": 19961.18, "total_tokens": 192443072}
|
|
{"current_steps": 61150, "total_steps": 78105, "loss": 0.1909, "lr": 6.841265605837447e-07, "epoch": 3.914602138147366, "percentage": 78.29, "elapsed_time": "2:40:41", "remaining_time": "0:44:33", "throughput": 19961.4, "total_tokens": 192458240}
|
|
{"current_steps": 61155, "total_steps": 78105, "loss": 0.1758, "lr": 6.837426298218711e-07, "epoch": 3.9149222200883425, "percentage": 78.3, "elapsed_time": "2:40:42", "remaining_time": "0:44:32", "throughput": 19961.73, "total_tokens": 192476032}
|
|
{"current_steps": 61160, "total_steps": 78105, "loss": 0.1384, "lr": 6.833587897544302e-07, "epoch": 3.9152423020293194, "percentage": 78.3, "elapsed_time": "2:40:42", "remaining_time": "0:44:31", "throughput": 19961.94, "total_tokens": 192491264}
|
|
{"current_steps": 61165, "total_steps": 78105, "loss": 0.1865, "lr": 6.829750404005888e-07, "epoch": 3.9155623839702964, "percentage": 78.31, "elapsed_time": "2:40:43", "remaining_time": "0:44:30", "throughput": 19962.12, "total_tokens": 192505728}
|
|
{"current_steps": 61170, "total_steps": 78105, "loss": 0.0989, "lr": 6.825913817795096e-07, "epoch": 3.9158824659112734, "percentage": 78.32, "elapsed_time": "2:40:44", "remaining_time": "0:44:30", "throughput": 19962.34, "total_tokens": 192521344}
|
|
{"current_steps": 61175, "total_steps": 78105, "loss": 0.2037, "lr": 6.8220781391035e-07, "epoch": 3.91620254785225, "percentage": 78.32, "elapsed_time": "2:40:44", "remaining_time": "0:44:29", "throughput": 19962.62, "total_tokens": 192538176}
|
|
{"current_steps": 61180, "total_steps": 78105, "loss": 0.1416, "lr": 6.818243368122637e-07, "epoch": 3.916522629793227, "percentage": 78.33, "elapsed_time": "2:40:45", "remaining_time": "0:44:28", "throughput": 19962.81, "total_tokens": 192553024}
|
|
{"current_steps": 61185, "total_steps": 78105, "loss": 0.1883, "lr": 6.814409505043986e-07, "epoch": 3.916842711734204, "percentage": 78.34, "elapsed_time": "2:40:46", "remaining_time": "0:44:27", "throughput": 19963.08, "total_tokens": 192570112}
|
|
{"current_steps": 61190, "total_steps": 78105, "loss": 0.1616, "lr": 6.810576550059014e-07, "epoch": 3.917162793675181, "percentage": 78.34, "elapsed_time": "2:40:46", "remaining_time": "0:44:26", "throughput": 19963.27, "total_tokens": 192584768}
|
|
{"current_steps": 61195, "total_steps": 78105, "loss": 0.1655, "lr": 6.806744503359092e-07, "epoch": 3.917482875616158, "percentage": 78.35, "elapsed_time": "2:40:47", "remaining_time": "0:44:25", "throughput": 19963.48, "total_tokens": 192599616}
|
|
{"current_steps": 61200, "total_steps": 78105, "loss": 0.172, "lr": 6.802913365135597e-07, "epoch": 3.9178029575571345, "percentage": 78.36, "elapsed_time": "2:40:48", "remaining_time": "0:44:25", "throughput": 19963.7, "total_tokens": 192615168}
|
|
{"current_steps": 61205, "total_steps": 78105, "loss": 0.2143, "lr": 6.79908313557981e-07, "epoch": 3.9181230394981115, "percentage": 78.36, "elapsed_time": "2:40:48", "remaining_time": "0:44:24", "throughput": 19963.89, "total_tokens": 192630208}
|
|
{"current_steps": 61210, "total_steps": 78105, "loss": 0.1218, "lr": 6.79525381488301e-07, "epoch": 3.9184431214390885, "percentage": 78.37, "elapsed_time": "2:40:49", "remaining_time": "0:44:23", "throughput": 19964.11, "total_tokens": 192645888}
|
|
{"current_steps": 61215, "total_steps": 78105, "loss": 0.1763, "lr": 6.791425403236413e-07, "epoch": 3.9187632033800655, "percentage": 78.38, "elapsed_time": "2:40:50", "remaining_time": "0:44:22", "throughput": 19964.33, "total_tokens": 192661248}
|
|
{"current_steps": 61220, "total_steps": 78105, "loss": 0.1867, "lr": 6.787597900831178e-07, "epoch": 3.919083285321042, "percentage": 78.38, "elapsed_time": "2:40:51", "remaining_time": "0:44:21", "throughput": 19964.86, "total_tokens": 192689216}
|
|
{"current_steps": 61225, "total_steps": 78105, "loss": 0.114, "lr": 6.783771307858453e-07, "epoch": 3.919403367262019, "percentage": 78.39, "elapsed_time": "2:40:52", "remaining_time": "0:44:21", "throughput": 19965.1, "total_tokens": 192705280}
|
|
{"current_steps": 61230, "total_steps": 78105, "loss": 0.0957, "lr": 6.779945624509288e-07, "epoch": 3.919723449202996, "percentage": 78.39, "elapsed_time": "2:40:52", "remaining_time": "0:44:20", "throughput": 19965.28, "total_tokens": 192719744}
|
|
{"current_steps": 61235, "total_steps": 78105, "loss": 0.1738, "lr": 6.776120850974743e-07, "epoch": 3.920043531143973, "percentage": 78.4, "elapsed_time": "2:40:53", "remaining_time": "0:44:19", "throughput": 19965.52, "total_tokens": 192735680}
|
|
{"current_steps": 61240, "total_steps": 78105, "loss": 0.1434, "lr": 6.772296987445798e-07, "epoch": 3.92036361308495, "percentage": 78.41, "elapsed_time": "2:40:54", "remaining_time": "0:44:18", "throughput": 19965.74, "total_tokens": 192751104}
|
|
{"current_steps": 61245, "total_steps": 78105, "loss": 0.1488, "lr": 6.768474034113396e-07, "epoch": 3.9206836950259265, "percentage": 78.41, "elapsed_time": "2:40:54", "remaining_time": "0:44:17", "throughput": 19965.99, "total_tokens": 192767168}
|
|
{"current_steps": 61250, "total_steps": 78105, "loss": 0.1799, "lr": 6.764651991168436e-07, "epoch": 3.9210037769669035, "percentage": 78.42, "elapsed_time": "2:40:55", "remaining_time": "0:44:17", "throughput": 19966.21, "total_tokens": 192782720}
|
|
{"current_steps": 61255, "total_steps": 78105, "loss": 0.1952, "lr": 6.760830858801773e-07, "epoch": 3.9213238589078805, "percentage": 78.43, "elapsed_time": "2:40:56", "remaining_time": "0:44:16", "throughput": 19966.45, "total_tokens": 192798976}
|
|
{"current_steps": 61260, "total_steps": 78105, "loss": 0.1964, "lr": 6.757010637204212e-07, "epoch": 3.921643940848857, "percentage": 78.43, "elapsed_time": "2:40:56", "remaining_time": "0:44:15", "throughput": 19966.71, "total_tokens": 192815552}
|
|
{"current_steps": 61265, "total_steps": 78105, "loss": 0.1476, "lr": 6.753191326566508e-07, "epoch": 3.921964022789834, "percentage": 78.44, "elapsed_time": "2:40:57", "remaining_time": "0:44:14", "throughput": 19966.95, "total_tokens": 192831744}
|
|
{"current_steps": 61270, "total_steps": 78105, "loss": 0.1225, "lr": 6.74937292707939e-07, "epoch": 3.922284104730811, "percentage": 78.45, "elapsed_time": "2:40:58", "remaining_time": "0:44:13", "throughput": 19967.17, "total_tokens": 192847168}
|
|
{"current_steps": 61275, "total_steps": 78105, "loss": 0.167, "lr": 6.745555438933527e-07, "epoch": 3.922604186671788, "percentage": 78.45, "elapsed_time": "2:40:58", "remaining_time": "0:44:12", "throughput": 19967.39, "total_tokens": 192862784}
|
|
{"current_steps": 61280, "total_steps": 78105, "loss": 0.1852, "lr": 6.741738862319538e-07, "epoch": 3.922924268612765, "percentage": 78.46, "elapsed_time": "2:40:59", "remaining_time": "0:44:12", "throughput": 19967.62, "total_tokens": 192878656}
|
|
{"current_steps": 61285, "total_steps": 78105, "loss": 0.1745, "lr": 6.737923197428009e-07, "epoch": 3.923244350553742, "percentage": 78.46, "elapsed_time": "2:41:00", "remaining_time": "0:44:11", "throughput": 19967.8, "total_tokens": 192893376}
|
|
{"current_steps": 61290, "total_steps": 78105, "loss": 0.1699, "lr": 6.734108444449472e-07, "epoch": 3.9235644324947185, "percentage": 78.47, "elapsed_time": "2:41:00", "remaining_time": "0:44:10", "throughput": 19968.06, "total_tokens": 192909504}
|
|
{"current_steps": 61295, "total_steps": 78105, "loss": 0.1383, "lr": 6.730294603574408e-07, "epoch": 3.9238845144356955, "percentage": 78.48, "elapsed_time": "2:41:01", "remaining_time": "0:44:09", "throughput": 19968.28, "total_tokens": 192925440}
|
|
{"current_steps": 61300, "total_steps": 78105, "loss": 0.1798, "lr": 6.726481674993282e-07, "epoch": 3.9242045963766725, "percentage": 78.48, "elapsed_time": "2:41:02", "remaining_time": "0:44:08", "throughput": 19968.54, "total_tokens": 192941696}
|
|
{"current_steps": 61305, "total_steps": 78105, "loss": 0.1459, "lr": 6.722669658896464e-07, "epoch": 3.924524678317649, "percentage": 78.49, "elapsed_time": "2:41:02", "remaining_time": "0:44:08", "throughput": 19968.77, "total_tokens": 192957376}
|
|
{"current_steps": 61310, "total_steps": 78105, "loss": 0.2058, "lr": 6.718858555474336e-07, "epoch": 3.924844760258626, "percentage": 78.5, "elapsed_time": "2:41:03", "remaining_time": "0:44:07", "throughput": 19969.01, "total_tokens": 192973632}
|
|
{"current_steps": 61315, "total_steps": 78105, "loss": 0.2832, "lr": 6.71504836491717e-07, "epoch": 3.925164842199603, "percentage": 78.5, "elapsed_time": "2:41:04", "remaining_time": "0:44:06", "throughput": 19969.23, "total_tokens": 192988992}
|
|
{"current_steps": 61320, "total_steps": 78105, "loss": 0.1554, "lr": 6.711239087415258e-07, "epoch": 3.92548492414058, "percentage": 78.51, "elapsed_time": "2:41:04", "remaining_time": "0:44:05", "throughput": 19969.45, "total_tokens": 193004608}
|
|
{"current_steps": 61325, "total_steps": 78105, "loss": 0.1474, "lr": 6.7074307231588e-07, "epoch": 3.925805006081557, "percentage": 78.52, "elapsed_time": "2:41:05", "remaining_time": "0:44:04", "throughput": 19969.66, "total_tokens": 193020096}
|
|
{"current_steps": 61330, "total_steps": 78105, "loss": 0.1642, "lr": 6.703623272337964e-07, "epoch": 3.926125088022534, "percentage": 78.52, "elapsed_time": "2:41:06", "remaining_time": "0:44:03", "throughput": 19969.89, "total_tokens": 193036096}
|
|
{"current_steps": 61335, "total_steps": 78105, "loss": 0.2024, "lr": 6.699816735142891e-07, "epoch": 3.9264451699635106, "percentage": 78.53, "elapsed_time": "2:41:07", "remaining_time": "0:44:03", "throughput": 19970.09, "total_tokens": 193051264}
|
|
{"current_steps": 61340, "total_steps": 78105, "loss": 0.162, "lr": 6.696011111763637e-07, "epoch": 3.9267652519044876, "percentage": 78.54, "elapsed_time": "2:41:07", "remaining_time": "0:44:02", "throughput": 19970.27, "total_tokens": 193066176}
|
|
{"current_steps": 61345, "total_steps": 78105, "loss": 0.0706, "lr": 6.692206402390258e-07, "epoch": 3.9270853338454645, "percentage": 78.54, "elapsed_time": "2:41:08", "remaining_time": "0:44:01", "throughput": 19970.49, "total_tokens": 193081728}
|
|
{"current_steps": 61350, "total_steps": 78105, "loss": 0.1414, "lr": 6.688402607212715e-07, "epoch": 3.927405415786441, "percentage": 78.55, "elapsed_time": "2:41:09", "remaining_time": "0:44:00", "throughput": 19970.69, "total_tokens": 193097280}
|
|
{"current_steps": 61355, "total_steps": 78105, "loss": 0.1554, "lr": 6.684599726420973e-07, "epoch": 3.927725497727418, "percentage": 78.55, "elapsed_time": "2:41:09", "remaining_time": "0:43:59", "throughput": 19970.91, "total_tokens": 193112896}
|
|
{"current_steps": 61360, "total_steps": 78105, "loss": 0.1971, "lr": 6.680797760204921e-07, "epoch": 3.928045579668395, "percentage": 78.56, "elapsed_time": "2:41:10", "remaining_time": "0:43:59", "throughput": 19971.11, "total_tokens": 193127808}
|
|
{"current_steps": 61365, "total_steps": 78105, "loss": 0.189, "lr": 6.676996708754405e-07, "epoch": 3.928365661609372, "percentage": 78.57, "elapsed_time": "2:41:11", "remaining_time": "0:43:58", "throughput": 19971.33, "total_tokens": 193143744}
|
|
{"current_steps": 61370, "total_steps": 78105, "loss": 0.1005, "lr": 6.673196572259233e-07, "epoch": 3.928685743550349, "percentage": 78.57, "elapsed_time": "2:41:11", "remaining_time": "0:43:57", "throughput": 19971.65, "total_tokens": 193161472}
|
|
{"current_steps": 61375, "total_steps": 78105, "loss": 0.1638, "lr": 6.669397350909162e-07, "epoch": 3.929005825491326, "percentage": 78.58, "elapsed_time": "2:41:12", "remaining_time": "0:43:56", "throughput": 19971.85, "total_tokens": 193176512}
|
|
{"current_steps": 61380, "total_steps": 78105, "loss": 0.2051, "lr": 6.6655990448939e-07, "epoch": 3.9293259074323026, "percentage": 78.59, "elapsed_time": "2:41:13", "remaining_time": "0:43:55", "throughput": 19972.07, "total_tokens": 193192000}
|
|
{"current_steps": 61385, "total_steps": 78105, "loss": 0.1869, "lr": 6.66180165440313e-07, "epoch": 3.9296459893732796, "percentage": 78.59, "elapsed_time": "2:41:13", "remaining_time": "0:43:54", "throughput": 19972.31, "total_tokens": 193208128}
|
|
{"current_steps": 61390, "total_steps": 78105, "loss": 0.1991, "lr": 6.658005179626462e-07, "epoch": 3.9299660713142566, "percentage": 78.6, "elapsed_time": "2:41:14", "remaining_time": "0:43:54", "throughput": 19972.48, "total_tokens": 193222592}
|
|
{"current_steps": 61395, "total_steps": 78105, "loss": 0.1891, "lr": 6.65420962075348e-07, "epoch": 3.930286153255233, "percentage": 78.61, "elapsed_time": "2:41:15", "remaining_time": "0:43:53", "throughput": 19972.68, "total_tokens": 193237760}
|
|
{"current_steps": 61400, "total_steps": 78105, "loss": 0.1598, "lr": 6.650414977973707e-07, "epoch": 3.93060623519621, "percentage": 78.61, "elapsed_time": "2:41:15", "remaining_time": "0:43:52", "throughput": 19972.93, "total_tokens": 193254336}
|
|
{"current_steps": 61405, "total_steps": 78105, "loss": 0.1265, "lr": 6.646621251476628e-07, "epoch": 3.930926317137187, "percentage": 78.62, "elapsed_time": "2:41:16", "remaining_time": "0:43:51", "throughput": 19973.15, "total_tokens": 193269824}
|
|
{"current_steps": 61410, "total_steps": 78105, "loss": 0.2535, "lr": 6.642828441451687e-07, "epoch": 3.931246399078164, "percentage": 78.62, "elapsed_time": "2:41:17", "remaining_time": "0:43:50", "throughput": 19973.39, "total_tokens": 193286336}
|
|
{"current_steps": 61415, "total_steps": 78105, "loss": 0.1526, "lr": 6.639036548088268e-07, "epoch": 3.931566481019141, "percentage": 78.63, "elapsed_time": "2:41:17", "remaining_time": "0:43:50", "throughput": 19973.65, "total_tokens": 193302848}
|
|
{"current_steps": 61420, "total_steps": 78105, "loss": 0.1592, "lr": 6.635245571575741e-07, "epoch": 3.9318865629601176, "percentage": 78.64, "elapsed_time": "2:41:18", "remaining_time": "0:43:49", "throughput": 19973.9, "total_tokens": 193318976}
|
|
{"current_steps": 61425, "total_steps": 78105, "loss": 0.1106, "lr": 6.631455512103375e-07, "epoch": 3.9322066449010946, "percentage": 78.64, "elapsed_time": "2:41:19", "remaining_time": "0:43:48", "throughput": 19974.07, "total_tokens": 193333632}
|
|
{"current_steps": 61430, "total_steps": 78105, "loss": 0.1728, "lr": 6.62766636986045e-07, "epoch": 3.9325267268420716, "percentage": 78.65, "elapsed_time": "2:41:19", "remaining_time": "0:43:47", "throughput": 19974.3, "total_tokens": 193349568}
|
|
{"current_steps": 61435, "total_steps": 78105, "loss": 0.124, "lr": 6.623878145036166e-07, "epoch": 3.9328468087830486, "percentage": 78.66, "elapsed_time": "2:41:20", "remaining_time": "0:43:46", "throughput": 19974.49, "total_tokens": 193364288}
|
|
{"current_steps": 61440, "total_steps": 78105, "loss": 0.1758, "lr": 6.620090837819693e-07, "epoch": 3.933166890724025, "percentage": 78.66, "elapsed_time": "2:41:21", "remaining_time": "0:43:45", "throughput": 19974.7, "total_tokens": 193379648}
|
|
{"current_steps": 61445, "total_steps": 78105, "loss": 0.2264, "lr": 6.616304448400146e-07, "epoch": 3.933486972665002, "percentage": 78.67, "elapsed_time": "2:41:21", "remaining_time": "0:43:45", "throughput": 19974.9, "total_tokens": 193394944}
|
|
{"current_steps": 61450, "total_steps": 78105, "loss": 0.1586, "lr": 6.612518976966589e-07, "epoch": 3.933807054605979, "percentage": 78.68, "elapsed_time": "2:41:22", "remaining_time": "0:43:44", "throughput": 19975.12, "total_tokens": 193410432}
|
|
{"current_steps": 61455, "total_steps": 78105, "loss": 0.1256, "lr": 6.608734423708074e-07, "epoch": 3.934127136546956, "percentage": 78.68, "elapsed_time": "2:41:23", "remaining_time": "0:43:43", "throughput": 19975.34, "total_tokens": 193426304}
|
|
{"current_steps": 61460, "total_steps": 78105, "loss": 0.1297, "lr": 6.604950788813549e-07, "epoch": 3.934447218487933, "percentage": 78.69, "elapsed_time": "2:41:23", "remaining_time": "0:43:42", "throughput": 19975.57, "total_tokens": 193441664}
|
|
{"current_steps": 61465, "total_steps": 78105, "loss": 0.1317, "lr": 6.601168072471973e-07, "epoch": 3.9347673004289097, "percentage": 78.7, "elapsed_time": "2:41:24", "remaining_time": "0:43:41", "throughput": 19975.86, "total_tokens": 193458944}
|
|
{"current_steps": 61470, "total_steps": 78105, "loss": 0.1677, "lr": 6.597386274872228e-07, "epoch": 3.9350873823698866, "percentage": 78.7, "elapsed_time": "2:41:25", "remaining_time": "0:43:41", "throughput": 19976.07, "total_tokens": 193474112}
|
|
{"current_steps": 61475, "total_steps": 78105, "loss": 0.1896, "lr": 6.593605396203154e-07, "epoch": 3.9354074643108636, "percentage": 78.71, "elapsed_time": "2:41:25", "remaining_time": "0:43:40", "throughput": 19976.27, "total_tokens": 193489088}
|
|
{"current_steps": 61480, "total_steps": 78105, "loss": 0.1522, "lr": 6.589825436653554e-07, "epoch": 3.9357275462518406, "percentage": 78.71, "elapsed_time": "2:41:26", "remaining_time": "0:43:39", "throughput": 19976.46, "total_tokens": 193504384}
|
|
{"current_steps": 61485, "total_steps": 78105, "loss": 0.1566, "lr": 6.586046396412177e-07, "epoch": 3.936047628192817, "percentage": 78.72, "elapsed_time": "2:41:27", "remaining_time": "0:43:38", "throughput": 19976.77, "total_tokens": 193521664}
|
|
{"current_steps": 61490, "total_steps": 78105, "loss": 0.2193, "lr": 6.582268275667725e-07, "epoch": 3.936367710133794, "percentage": 78.73, "elapsed_time": "2:41:28", "remaining_time": "0:43:37", "throughput": 19977.02, "total_tokens": 193537920}
|
|
{"current_steps": 61495, "total_steps": 78105, "loss": 0.136, "lr": 6.578491074608864e-07, "epoch": 3.936687792074771, "percentage": 78.73, "elapsed_time": "2:41:28", "remaining_time": "0:43:36", "throughput": 19977.21, "total_tokens": 193553280}
|
|
{"current_steps": 61500, "total_steps": 78105, "loss": 0.1589, "lr": 6.574714793424195e-07, "epoch": 3.937007874015748, "percentage": 78.74, "elapsed_time": "2:41:29", "remaining_time": "0:43:36", "throughput": 19977.4, "total_tokens": 193568192}
|
|
{"current_steps": 61505, "total_steps": 78105, "loss": 0.1547, "lr": 6.570939432302303e-07, "epoch": 3.937327955956725, "percentage": 78.75, "elapsed_time": "2:41:30", "remaining_time": "0:43:35", "throughput": 19977.63, "total_tokens": 193584000}
|
|
{"current_steps": 61510, "total_steps": 78105, "loss": 0.1701, "lr": 6.567164991431704e-07, "epoch": 3.9376480378977017, "percentage": 78.75, "elapsed_time": "2:41:30", "remaining_time": "0:43:34", "throughput": 19977.85, "total_tokens": 193599872}
|
|
{"current_steps": 61515, "total_steps": 78105, "loss": 0.2501, "lr": 6.563391471000871e-07, "epoch": 3.9379681198386787, "percentage": 78.76, "elapsed_time": "2:41:31", "remaining_time": "0:43:33", "throughput": 19978.12, "total_tokens": 193616768}
|
|
{"current_steps": 61520, "total_steps": 78105, "loss": 0.1324, "lr": 6.559618871198237e-07, "epoch": 3.9382882017796557, "percentage": 78.77, "elapsed_time": "2:41:32", "remaining_time": "0:43:32", "throughput": 19978.35, "total_tokens": 193632768}
|
|
{"current_steps": 61525, "total_steps": 78105, "loss": 0.1442, "lr": 6.555847192212175e-07, "epoch": 3.938608283720632, "percentage": 78.77, "elapsed_time": "2:41:32", "remaining_time": "0:43:32", "throughput": 19978.57, "total_tokens": 193648320}
|
|
{"current_steps": 61530, "total_steps": 78105, "loss": 0.1977, "lr": 6.552076434231047e-07, "epoch": 3.938928365661609, "percentage": 78.78, "elapsed_time": "2:41:33", "remaining_time": "0:43:31", "throughput": 19978.79, "total_tokens": 193664128}
|
|
{"current_steps": 61535, "total_steps": 78105, "loss": 0.2114, "lr": 6.548306597443116e-07, "epoch": 3.939248447602586, "percentage": 78.78, "elapsed_time": "2:41:34", "remaining_time": "0:43:30", "throughput": 19978.99, "total_tokens": 193679488}
|
|
{"current_steps": 61540, "total_steps": 78105, "loss": 0.1585, "lr": 6.544537682036655e-07, "epoch": 3.939568529543563, "percentage": 78.79, "elapsed_time": "2:41:34", "remaining_time": "0:43:29", "throughput": 19979.19, "total_tokens": 193694848}
|
|
{"current_steps": 61545, "total_steps": 78105, "loss": 0.1558, "lr": 6.540769688199838e-07, "epoch": 3.93988861148454, "percentage": 78.8, "elapsed_time": "2:41:35", "remaining_time": "0:43:28", "throughput": 19979.42, "total_tokens": 193710464}
|
|
{"current_steps": 61550, "total_steps": 78105, "loss": 0.2872, "lr": 6.537002616120839e-07, "epoch": 3.940208693425517, "percentage": 78.8, "elapsed_time": "2:41:36", "remaining_time": "0:43:27", "throughput": 19979.6, "total_tokens": 193725248}
|
|
{"current_steps": 61555, "total_steps": 78105, "loss": 0.1698, "lr": 6.53323646598776e-07, "epoch": 3.9405287753664937, "percentage": 78.81, "elapsed_time": "2:41:36", "remaining_time": "0:43:27", "throughput": 19979.82, "total_tokens": 193740672}
|
|
{"current_steps": 61560, "total_steps": 78105, "loss": 0.1753, "lr": 6.529471237988654e-07, "epoch": 3.9408488573074707, "percentage": 78.82, "elapsed_time": "2:41:37", "remaining_time": "0:43:26", "throughput": 19980.02, "total_tokens": 193756160}
|
|
{"current_steps": 61565, "total_steps": 78105, "loss": 0.1949, "lr": 6.525706932311559e-07, "epoch": 3.9411689392484477, "percentage": 78.82, "elapsed_time": "2:41:38", "remaining_time": "0:43:25", "throughput": 19980.21, "total_tokens": 193770880}
|
|
{"current_steps": 61570, "total_steps": 78105, "loss": 0.1703, "lr": 6.521943549144416e-07, "epoch": 3.9414890211894242, "percentage": 78.83, "elapsed_time": "2:41:38", "remaining_time": "0:43:24", "throughput": 19980.43, "total_tokens": 193786368}
|
|
{"current_steps": 61575, "total_steps": 78105, "loss": 0.1146, "lr": 6.51818108867518e-07, "epoch": 3.9418091031304012, "percentage": 78.84, "elapsed_time": "2:41:39", "remaining_time": "0:43:23", "throughput": 19980.68, "total_tokens": 193802496}
|
|
{"current_steps": 61580, "total_steps": 78105, "loss": 0.3015, "lr": 6.514419551091697e-07, "epoch": 3.942129185071378, "percentage": 78.84, "elapsed_time": "2:41:40", "remaining_time": "0:43:23", "throughput": 19980.88, "total_tokens": 193817728}
|
|
{"current_steps": 61585, "total_steps": 78105, "loss": 0.1662, "lr": 6.51065893658182e-07, "epoch": 3.942449267012355, "percentage": 78.85, "elapsed_time": "2:41:40", "remaining_time": "0:43:22", "throughput": 19981.09, "total_tokens": 193833024}
|
|
{"current_steps": 61590, "total_steps": 78105, "loss": 0.1407, "lr": 6.506899245333329e-07, "epoch": 3.942769348953332, "percentage": 78.86, "elapsed_time": "2:41:41", "remaining_time": "0:43:21", "throughput": 19981.32, "total_tokens": 193848640}
|
|
{"current_steps": 61595, "total_steps": 78105, "loss": 0.1651, "lr": 6.503140477533962e-07, "epoch": 3.943089430894309, "percentage": 78.86, "elapsed_time": "2:41:42", "remaining_time": "0:43:20", "throughput": 19981.49, "total_tokens": 193863040}
|
|
{"current_steps": 61600, "total_steps": 78105, "loss": 0.109, "lr": 6.499382633371412e-07, "epoch": 3.9434095128352857, "percentage": 78.87, "elapsed_time": "2:41:42", "remaining_time": "0:43:19", "throughput": 19981.72, "total_tokens": 193879168}
|
|
{"current_steps": 61605, "total_steps": 78105, "loss": 0.1581, "lr": 6.495625713033329e-07, "epoch": 3.9437295947762627, "percentage": 78.87, "elapsed_time": "2:41:43", "remaining_time": "0:43:18", "throughput": 19981.95, "total_tokens": 193894720}
|
|
{"current_steps": 61610, "total_steps": 78105, "loss": 0.1539, "lr": 6.491869716707305e-07, "epoch": 3.9440496767172397, "percentage": 78.88, "elapsed_time": "2:41:44", "remaining_time": "0:43:18", "throughput": 19982.16, "total_tokens": 193910208}
|
|
{"current_steps": 61615, "total_steps": 78105, "loss": 0.2289, "lr": 6.488114644580909e-07, "epoch": 3.9443697586582163, "percentage": 78.89, "elapsed_time": "2:41:44", "remaining_time": "0:43:17", "throughput": 19982.41, "total_tokens": 193926528}
|
|
{"current_steps": 61620, "total_steps": 78105, "loss": 0.1422, "lr": 6.484360496841641e-07, "epoch": 3.9446898405991933, "percentage": 78.89, "elapsed_time": "2:41:45", "remaining_time": "0:43:16", "throughput": 19982.6, "total_tokens": 193941760}
|
|
{"current_steps": 61625, "total_steps": 78105, "loss": 0.1784, "lr": 6.480607273676967e-07, "epoch": 3.9450099225401702, "percentage": 78.9, "elapsed_time": "2:41:46", "remaining_time": "0:43:15", "throughput": 19982.83, "total_tokens": 193957376}
|
|
{"current_steps": 61630, "total_steps": 78105, "loss": 0.1341, "lr": 6.476854975274302e-07, "epoch": 3.9453300044811472, "percentage": 78.91, "elapsed_time": "2:41:46", "remaining_time": "0:43:14", "throughput": 19983.12, "total_tokens": 193974784}
|
|
{"current_steps": 61635, "total_steps": 78105, "loss": 0.1264, "lr": 6.473103601821015e-07, "epoch": 3.9456500864221242, "percentage": 78.91, "elapsed_time": "2:41:47", "remaining_time": "0:43:14", "throughput": 19983.38, "total_tokens": 193991168}
|
|
{"current_steps": 61640, "total_steps": 78105, "loss": 0.1234, "lr": 6.469353153504431e-07, "epoch": 3.945970168363101, "percentage": 78.92, "elapsed_time": "2:41:48", "remaining_time": "0:43:13", "throughput": 19983.57, "total_tokens": 194006144}
|
|
{"current_steps": 61645, "total_steps": 78105, "loss": 0.2083, "lr": 6.465603630511822e-07, "epoch": 3.9462902503040778, "percentage": 78.93, "elapsed_time": "2:41:48", "remaining_time": "0:43:12", "throughput": 19983.81, "total_tokens": 194022336}
|
|
{"current_steps": 61650, "total_steps": 78105, "loss": 0.2178, "lr": 6.461855033030437e-07, "epoch": 3.9466103322450548, "percentage": 78.93, "elapsed_time": "2:41:49", "remaining_time": "0:43:11", "throughput": 19984.0, "total_tokens": 194037504}
|
|
{"current_steps": 61655, "total_steps": 78105, "loss": 0.1497, "lr": 6.458107361247437e-07, "epoch": 3.9469304141860317, "percentage": 78.94, "elapsed_time": "2:41:50", "remaining_time": "0:43:10", "throughput": 19984.2, "total_tokens": 194052544}
|
|
{"current_steps": 61660, "total_steps": 78105, "loss": 0.1428, "lr": 6.454360615349983e-07, "epoch": 3.9472504961270083, "percentage": 78.95, "elapsed_time": "2:41:50", "remaining_time": "0:43:09", "throughput": 19984.41, "total_tokens": 194067968}
|
|
{"current_steps": 61665, "total_steps": 78105, "loss": 0.2099, "lr": 6.450614795525159e-07, "epoch": 3.9475705780679853, "percentage": 78.95, "elapsed_time": "2:41:51", "remaining_time": "0:43:09", "throughput": 19984.63, "total_tokens": 194083904}
|
|
{"current_steps": 61670, "total_steps": 78105, "loss": 0.1603, "lr": 6.44686990196001e-07, "epoch": 3.9478906600089623, "percentage": 78.96, "elapsed_time": "2:41:52", "remaining_time": "0:43:08", "throughput": 19984.83, "total_tokens": 194099136}
|
|
{"current_steps": 61675, "total_steps": 78105, "loss": 0.145, "lr": 6.44312593484154e-07, "epoch": 3.9482107419499393, "percentage": 78.96, "elapsed_time": "2:41:52", "remaining_time": "0:43:07", "throughput": 19985.02, "total_tokens": 194114176}
|
|
{"current_steps": 61680, "total_steps": 78105, "loss": 0.1703, "lr": 6.439382894356694e-07, "epoch": 3.9485308238909163, "percentage": 78.97, "elapsed_time": "2:41:53", "remaining_time": "0:43:06", "throughput": 19985.24, "total_tokens": 194129664}
|
|
{"current_steps": 61685, "total_steps": 78105, "loss": 0.1856, "lr": 6.435640780692401e-07, "epoch": 3.948850905831893, "percentage": 78.98, "elapsed_time": "2:41:54", "remaining_time": "0:43:05", "throughput": 19985.42, "total_tokens": 194144576}
|
|
{"current_steps": 61690, "total_steps": 78105, "loss": 0.2219, "lr": 6.431899594035495e-07, "epoch": 3.94917098777287, "percentage": 78.98, "elapsed_time": "2:41:54", "remaining_time": "0:43:05", "throughput": 19985.65, "total_tokens": 194160256}
|
|
{"current_steps": 61695, "total_steps": 78105, "loss": 0.1516, "lr": 6.428159334572814e-07, "epoch": 3.949491069713847, "percentage": 78.99, "elapsed_time": "2:41:55", "remaining_time": "0:43:04", "throughput": 19985.84, "total_tokens": 194175488}
|
|
{"current_steps": 61700, "total_steps": 78105, "loss": 0.1925, "lr": 6.424420002491116e-07, "epoch": 3.9498111516548238, "percentage": 79.0, "elapsed_time": "2:41:56", "remaining_time": "0:43:03", "throughput": 19986.03, "total_tokens": 194190464}
|
|
{"current_steps": 61705, "total_steps": 78105, "loss": 0.183, "lr": 6.420681597977129e-07, "epoch": 3.9501312335958003, "percentage": 79.0, "elapsed_time": "2:41:56", "remaining_time": "0:43:02", "throughput": 19986.25, "total_tokens": 194206208}
|
|
{"current_steps": 61710, "total_steps": 78105, "loss": 0.231, "lr": 6.416944121217527e-07, "epoch": 3.9504513155367773, "percentage": 79.01, "elapsed_time": "2:41:57", "remaining_time": "0:43:01", "throughput": 19986.47, "total_tokens": 194222208}
|
|
{"current_steps": 61715, "total_steps": 78105, "loss": 0.2852, "lr": 6.413207572398938e-07, "epoch": 3.9507713974777543, "percentage": 79.02, "elapsed_time": "2:41:58", "remaining_time": "0:43:00", "throughput": 19986.73, "total_tokens": 194238784}
|
|
{"current_steps": 61720, "total_steps": 78105, "loss": 0.2077, "lr": 6.40947195170795e-07, "epoch": 3.9510914794187313, "percentage": 79.02, "elapsed_time": "2:41:59", "remaining_time": "0:43:00", "throughput": 19986.95, "total_tokens": 194254272}
|
|
{"current_steps": 61725, "total_steps": 78105, "loss": 0.1523, "lr": 6.405737259331096e-07, "epoch": 3.9514115613597083, "percentage": 79.03, "elapsed_time": "2:41:59", "remaining_time": "0:42:59", "throughput": 19987.17, "total_tokens": 194269888}
|
|
{"current_steps": 61730, "total_steps": 78105, "loss": 0.1398, "lr": 6.402003495454864e-07, "epoch": 3.951731643300685, "percentage": 79.03, "elapsed_time": "2:42:00", "remaining_time": "0:42:58", "throughput": 19987.35, "total_tokens": 194284544}
|
|
{"current_steps": 61735, "total_steps": 78105, "loss": 0.1711, "lr": 6.398270660265709e-07, "epoch": 3.952051725241662, "percentage": 79.04, "elapsed_time": "2:42:01", "remaining_time": "0:42:57", "throughput": 19987.51, "total_tokens": 194298752}
|
|
{"current_steps": 61740, "total_steps": 78105, "loss": 0.1687, "lr": 6.394538753950028e-07, "epoch": 3.952371807182639, "percentage": 79.05, "elapsed_time": "2:42:01", "remaining_time": "0:42:56", "throughput": 19987.71, "total_tokens": 194313856}
|
|
{"current_steps": 61745, "total_steps": 78105, "loss": 0.1807, "lr": 6.390807776694166e-07, "epoch": 3.952691889123616, "percentage": 79.05, "elapsed_time": "2:42:02", "remaining_time": "0:42:56", "throughput": 19987.91, "total_tokens": 194329280}
|
|
{"current_steps": 61750, "total_steps": 78105, "loss": 0.1258, "lr": 6.387077728684432e-07, "epoch": 3.9530119710645923, "percentage": 79.06, "elapsed_time": "2:42:02", "remaining_time": "0:42:55", "throughput": 19988.11, "total_tokens": 194344320}
|
|
{"current_steps": 61755, "total_steps": 78105, "loss": 0.1547, "lr": 6.383348610107079e-07, "epoch": 3.9533320530055693, "percentage": 79.07, "elapsed_time": "2:42:03", "remaining_time": "0:42:54", "throughput": 19988.37, "total_tokens": 194361088}
|
|
{"current_steps": 61760, "total_steps": 78105, "loss": 0.1649, "lr": 6.379620421148342e-07, "epoch": 3.9536521349465463, "percentage": 79.07, "elapsed_time": "2:42:04", "remaining_time": "0:42:53", "throughput": 19988.67, "total_tokens": 194378240}
|
|
{"current_steps": 61765, "total_steps": 78105, "loss": 0.0695, "lr": 6.375893161994353e-07, "epoch": 3.9539722168875233, "percentage": 79.08, "elapsed_time": "2:42:05", "remaining_time": "0:42:52", "throughput": 19988.87, "total_tokens": 194393728}
|
|
{"current_steps": 61770, "total_steps": 78105, "loss": 0.1269, "lr": 6.372166832831267e-07, "epoch": 3.9542922988285003, "percentage": 79.09, "elapsed_time": "2:42:05", "remaining_time": "0:42:51", "throughput": 19989.13, "total_tokens": 194410432}
|
|
{"current_steps": 61775, "total_steps": 78105, "loss": 0.1109, "lr": 6.368441433845126e-07, "epoch": 3.954612380769477, "percentage": 79.09, "elapsed_time": "2:42:06", "remaining_time": "0:42:51", "throughput": 19989.36, "total_tokens": 194426240}
|
|
{"current_steps": 61780, "total_steps": 78105, "loss": 0.1346, "lr": 6.364716965221978e-07, "epoch": 3.954932462710454, "percentage": 79.1, "elapsed_time": "2:42:07", "remaining_time": "0:42:50", "throughput": 19989.57, "total_tokens": 194441408}
|
|
{"current_steps": 61785, "total_steps": 78105, "loss": 0.2338, "lr": 6.360993427147796e-07, "epoch": 3.955252544651431, "percentage": 79.11, "elapsed_time": "2:42:07", "remaining_time": "0:42:49", "throughput": 19989.87, "total_tokens": 194458752}
|
|
{"current_steps": 61790, "total_steps": 78105, "loss": 0.1724, "lr": 6.357270819808509e-07, "epoch": 3.9555726265924074, "percentage": 79.11, "elapsed_time": "2:42:08", "remaining_time": "0:42:48", "throughput": 19990.14, "total_tokens": 194475904}
|
|
{"current_steps": 61795, "total_steps": 78105, "loss": 0.1906, "lr": 6.353549143390025e-07, "epoch": 3.9558927085333844, "percentage": 79.12, "elapsed_time": "2:42:09", "remaining_time": "0:42:47", "throughput": 19990.36, "total_tokens": 194491264}
|
|
{"current_steps": 61800, "total_steps": 78105, "loss": 0.1908, "lr": 6.349828398078159e-07, "epoch": 3.9562127904743614, "percentage": 79.12, "elapsed_time": "2:42:09", "remaining_time": "0:42:47", "throughput": 19990.57, "total_tokens": 194506432}
|
|
{"current_steps": 61805, "total_steps": 78105, "loss": 0.1164, "lr": 6.34610858405873e-07, "epoch": 3.9565328724153384, "percentage": 79.13, "elapsed_time": "2:42:10", "remaining_time": "0:42:46", "throughput": 19990.8, "total_tokens": 194522240}
|
|
{"current_steps": 61810, "total_steps": 78105, "loss": 0.1896, "lr": 6.34238970151746e-07, "epoch": 3.9568529543563153, "percentage": 79.14, "elapsed_time": "2:42:11", "remaining_time": "0:42:45", "throughput": 19991.02, "total_tokens": 194537984}
|
|
{"current_steps": 61815, "total_steps": 78105, "loss": 0.2008, "lr": 6.338671750640068e-07, "epoch": 3.9571730362972923, "percentage": 79.14, "elapsed_time": "2:42:11", "remaining_time": "0:42:44", "throughput": 19991.24, "total_tokens": 194553344}
|
|
{"current_steps": 61820, "total_steps": 78105, "loss": 0.1522, "lr": 6.334954731612211e-07, "epoch": 3.957493118238269, "percentage": 79.15, "elapsed_time": "2:42:12", "remaining_time": "0:42:43", "throughput": 19991.46, "total_tokens": 194569216}
|
|
{"current_steps": 61825, "total_steps": 78105, "loss": 0.1726, "lr": 6.331238644619489e-07, "epoch": 3.957813200179246, "percentage": 79.16, "elapsed_time": "2:42:13", "remaining_time": "0:42:43", "throughput": 19991.67, "total_tokens": 194584448}
|
|
{"current_steps": 61830, "total_steps": 78105, "loss": 0.2159, "lr": 6.327523489847468e-07, "epoch": 3.958133282120223, "percentage": 79.16, "elapsed_time": "2:42:13", "remaining_time": "0:42:42", "throughput": 19991.88, "total_tokens": 194599680}
|
|
{"current_steps": 61835, "total_steps": 78105, "loss": 0.1385, "lr": 6.323809267481662e-07, "epoch": 3.9584533640611994, "percentage": 79.17, "elapsed_time": "2:42:14", "remaining_time": "0:42:41", "throughput": 19992.12, "total_tokens": 194615808}
|
|
{"current_steps": 61840, "total_steps": 78105, "loss": 0.2075, "lr": 6.320095977707533e-07, "epoch": 3.9587734460021764, "percentage": 79.18, "elapsed_time": "2:42:15", "remaining_time": "0:42:40", "throughput": 19992.33, "total_tokens": 194631232}
|
|
{"current_steps": 61845, "total_steps": 78105, "loss": 0.222, "lr": 6.316383620710526e-07, "epoch": 3.9590935279431534, "percentage": 79.18, "elapsed_time": "2:42:15", "remaining_time": "0:42:39", "throughput": 19992.58, "total_tokens": 194647680}
|
|
{"current_steps": 61850, "total_steps": 78105, "loss": 0.1296, "lr": 6.312672196675986e-07, "epoch": 3.9594136098841304, "percentage": 79.19, "elapsed_time": "2:42:16", "remaining_time": "0:42:38", "throughput": 19992.79, "total_tokens": 194663168}
|
|
{"current_steps": 61855, "total_steps": 78105, "loss": 0.1991, "lr": 6.308961705789265e-07, "epoch": 3.9597336918251074, "percentage": 79.19, "elapsed_time": "2:42:17", "remaining_time": "0:42:38", "throughput": 19993.01, "total_tokens": 194679168}
|
|
{"current_steps": 61860, "total_steps": 78105, "loss": 0.1225, "lr": 6.305252148235638e-07, "epoch": 3.9600537737660844, "percentage": 79.2, "elapsed_time": "2:42:18", "remaining_time": "0:42:37", "throughput": 19993.22, "total_tokens": 194694656}
|
|
{"current_steps": 61865, "total_steps": 78105, "loss": 0.1924, "lr": 6.301543524200343e-07, "epoch": 3.960373855707061, "percentage": 79.21, "elapsed_time": "2:42:18", "remaining_time": "0:42:36", "throughput": 19993.44, "total_tokens": 194710016}
|
|
{"current_steps": 61870, "total_steps": 78105, "loss": 0.182, "lr": 6.297835833868568e-07, "epoch": 3.960693937648038, "percentage": 79.21, "elapsed_time": "2:42:19", "remaining_time": "0:42:35", "throughput": 19993.64, "total_tokens": 194725376}
|
|
{"current_steps": 61875, "total_steps": 78105, "loss": 0.187, "lr": 6.294129077425445e-07, "epoch": 3.961014019589015, "percentage": 79.22, "elapsed_time": "2:42:20", "remaining_time": "0:42:34", "throughput": 19993.86, "total_tokens": 194741440}
|
|
{"current_steps": 61880, "total_steps": 78105, "loss": 0.1324, "lr": 6.2904232550561e-07, "epoch": 3.9613341015299914, "percentage": 79.23, "elapsed_time": "2:42:20", "remaining_time": "0:42:34", "throughput": 19994.06, "total_tokens": 194756800}
|
|
{"current_steps": 61885, "total_steps": 78105, "loss": 0.1479, "lr": 6.286718366945543e-07, "epoch": 3.9616541834709684, "percentage": 79.23, "elapsed_time": "2:42:21", "remaining_time": "0:42:33", "throughput": 19994.26, "total_tokens": 194771776}
|
|
{"current_steps": 61890, "total_steps": 78105, "loss": 0.1099, "lr": 6.28301441327881e-07, "epoch": 3.9619742654119454, "percentage": 79.24, "elapsed_time": "2:42:22", "remaining_time": "0:42:32", "throughput": 19994.44, "total_tokens": 194786432}
|
|
{"current_steps": 61895, "total_steps": 78105, "loss": 0.0988, "lr": 6.279311394240839e-07, "epoch": 3.9622943473529224, "percentage": 79.25, "elapsed_time": "2:42:22", "remaining_time": "0:42:31", "throughput": 19994.64, "total_tokens": 194801984}
|
|
{"current_steps": 61900, "total_steps": 78105, "loss": 0.1369, "lr": 6.275609310016544e-07, "epoch": 3.9626144292938994, "percentage": 79.25, "elapsed_time": "2:42:23", "remaining_time": "0:42:30", "throughput": 19994.8, "total_tokens": 194816064}
|
|
{"current_steps": 61905, "total_steps": 78105, "loss": 0.1972, "lr": 6.271908160790794e-07, "epoch": 3.9629345112348764, "percentage": 79.26, "elapsed_time": "2:42:23", "remaining_time": "0:42:29", "throughput": 19995.02, "total_tokens": 194831424}
|
|
{"current_steps": 61910, "total_steps": 78105, "loss": 0.1671, "lr": 6.268207946748387e-07, "epoch": 3.963254593175853, "percentage": 79.27, "elapsed_time": "2:42:24", "remaining_time": "0:42:29", "throughput": 19995.23, "total_tokens": 194846720}
|
|
{"current_steps": 61915, "total_steps": 78105, "loss": 0.1602, "lr": 6.26450866807412e-07, "epoch": 3.96357467511683, "percentage": 79.27, "elapsed_time": "2:42:25", "remaining_time": "0:42:28", "throughput": 19995.49, "total_tokens": 194863360}
|
|
{"current_steps": 61920, "total_steps": 78105, "loss": 0.249, "lr": 6.260810324952688e-07, "epoch": 3.963894757057807, "percentage": 79.28, "elapsed_time": "2:42:26", "remaining_time": "0:42:27", "throughput": 19995.72, "total_tokens": 194879104}
|
|
{"current_steps": 61925, "total_steps": 78105, "loss": 0.1858, "lr": 6.257112917568787e-07, "epoch": 3.9642148389987835, "percentage": 79.28, "elapsed_time": "2:42:26", "remaining_time": "0:42:26", "throughput": 19995.9, "total_tokens": 194893632}
|
|
{"current_steps": 61930, "total_steps": 78105, "loss": 0.1371, "lr": 6.253416446107036e-07, "epoch": 3.9645349209397605, "percentage": 79.29, "elapsed_time": "2:42:27", "remaining_time": "0:42:25", "throughput": 19996.14, "total_tokens": 194909632}
|
|
{"current_steps": 61935, "total_steps": 78105, "loss": 0.1251, "lr": 6.249720910752022e-07, "epoch": 3.9648550028807374, "percentage": 79.3, "elapsed_time": "2:42:28", "remaining_time": "0:42:25", "throughput": 19996.38, "total_tokens": 194925632}
|
|
{"current_steps": 61940, "total_steps": 78105, "loss": 0.1424, "lr": 6.24602631168828e-07, "epoch": 3.9651750848217144, "percentage": 79.3, "elapsed_time": "2:42:28", "remaining_time": "0:42:24", "throughput": 19996.66, "total_tokens": 194942784}
|
|
{"current_steps": 61945, "total_steps": 78105, "loss": 0.1561, "lr": 6.242332649100297e-07, "epoch": 3.9654951667626914, "percentage": 79.31, "elapsed_time": "2:42:29", "remaining_time": "0:42:23", "throughput": 19996.88, "total_tokens": 194958720}
|
|
{"current_steps": 61950, "total_steps": 78105, "loss": 0.1491, "lr": 6.238639923172515e-07, "epoch": 3.9658152487036684, "percentage": 79.32, "elapsed_time": "2:42:30", "remaining_time": "0:42:22", "throughput": 19997.18, "total_tokens": 194975872}
|
|
{"current_steps": 61955, "total_steps": 78105, "loss": 0.1673, "lr": 6.234948134089333e-07, "epoch": 3.966135330644645, "percentage": 79.32, "elapsed_time": "2:42:30", "remaining_time": "0:42:21", "throughput": 19997.41, "total_tokens": 194991808}
|
|
{"current_steps": 61960, "total_steps": 78105, "loss": 0.2584, "lr": 6.231257282035088e-07, "epoch": 3.966455412585622, "percentage": 79.33, "elapsed_time": "2:42:31", "remaining_time": "0:42:20", "throughput": 19997.61, "total_tokens": 195006976}
|
|
{"current_steps": 61965, "total_steps": 78105, "loss": 0.1614, "lr": 6.227567367194101e-07, "epoch": 3.966775494526599, "percentage": 79.34, "elapsed_time": "2:42:32", "remaining_time": "0:42:20", "throughput": 19997.86, "total_tokens": 195023360}
|
|
{"current_steps": 61970, "total_steps": 78105, "loss": 0.1806, "lr": 6.223878389750618e-07, "epoch": 3.9670955764675755, "percentage": 79.34, "elapsed_time": "2:42:32", "remaining_time": "0:42:19", "throughput": 19998.1, "total_tokens": 195039296}
|
|
{"current_steps": 61975, "total_steps": 78105, "loss": 0.1318, "lr": 6.220190349888843e-07, "epoch": 3.9674156584085525, "percentage": 79.35, "elapsed_time": "2:42:33", "remaining_time": "0:42:18", "throughput": 19998.29, "total_tokens": 195054336}
|
|
{"current_steps": 61980, "total_steps": 78105, "loss": 0.1347, "lr": 6.216503247792946e-07, "epoch": 3.9677357403495295, "percentage": 79.35, "elapsed_time": "2:42:34", "remaining_time": "0:42:17", "throughput": 19998.49, "total_tokens": 195069696}
|
|
{"current_steps": 61985, "total_steps": 78105, "loss": 0.1617, "lr": 6.21281708364703e-07, "epoch": 3.9680558222905065, "percentage": 79.36, "elapsed_time": "2:42:34", "remaining_time": "0:42:16", "throughput": 19998.69, "total_tokens": 195084992}
|
|
{"current_steps": 61990, "total_steps": 78105, "loss": 0.1866, "lr": 6.209131857635181e-07, "epoch": 3.9683759042314835, "percentage": 79.37, "elapsed_time": "2:42:35", "remaining_time": "0:42:16", "throughput": 19998.98, "total_tokens": 195102208}
|
|
{"current_steps": 61995, "total_steps": 78105, "loss": 0.1466, "lr": 6.205447569941398e-07, "epoch": 3.96869598617246, "percentage": 79.37, "elapsed_time": "2:42:36", "remaining_time": "0:42:15", "throughput": 19999.17, "total_tokens": 195117056}
|
|
{"current_steps": 62000, "total_steps": 78105, "loss": 0.1537, "lr": 6.201764220749678e-07, "epoch": 3.969016068113437, "percentage": 79.38, "elapsed_time": "2:42:36", "remaining_time": "0:42:14", "throughput": 19999.4, "total_tokens": 195132736}
|
|
{"current_steps": 62005, "total_steps": 78105, "loss": 0.1636, "lr": 6.198081810243922e-07, "epoch": 3.969336150054414, "percentage": 79.39, "elapsed_time": "2:42:37", "remaining_time": "0:42:13", "throughput": 19999.68, "total_tokens": 195149760}
|
|
{"current_steps": 62010, "total_steps": 78105, "loss": 0.1442, "lr": 6.194400338608034e-07, "epoch": 3.969656231995391, "percentage": 79.39, "elapsed_time": "2:42:38", "remaining_time": "0:42:12", "throughput": 19999.87, "total_tokens": 195165056}
|
|
{"current_steps": 62015, "total_steps": 78105, "loss": 0.1662, "lr": 6.190719806025835e-07, "epoch": 3.9699763139363675, "percentage": 79.4, "elapsed_time": "2:42:38", "remaining_time": "0:42:11", "throughput": 20000.08, "total_tokens": 195180224}
|
|
{"current_steps": 62020, "total_steps": 78105, "loss": 0.1082, "lr": 6.187040212681117e-07, "epoch": 3.9702963958773445, "percentage": 79.41, "elapsed_time": "2:42:39", "remaining_time": "0:42:11", "throughput": 20000.44, "total_tokens": 195199296}
|
|
{"current_steps": 62025, "total_steps": 78105, "loss": 0.22, "lr": 6.183361558757619e-07, "epoch": 3.9706164778183215, "percentage": 79.41, "elapsed_time": "2:42:40", "remaining_time": "0:42:10", "throughput": 20000.66, "total_tokens": 195215232}
|
|
{"current_steps": 62030, "total_steps": 78105, "loss": 0.1818, "lr": 6.179683844439025e-07, "epoch": 3.9709365597592985, "percentage": 79.42, "elapsed_time": "2:42:41", "remaining_time": "0:42:09", "throughput": 20000.84, "total_tokens": 195229632}
|
|
{"current_steps": 62035, "total_steps": 78105, "loss": 0.1776, "lr": 6.176007069909002e-07, "epoch": 3.9712566417002755, "percentage": 79.43, "elapsed_time": "2:42:41", "remaining_time": "0:42:08", "throughput": 20001.05, "total_tokens": 195245184}
|
|
{"current_steps": 62040, "total_steps": 78105, "loss": 0.1291, "lr": 6.17233123535112e-07, "epoch": 3.971576723641252, "percentage": 79.43, "elapsed_time": "2:42:42", "remaining_time": "0:42:07", "throughput": 20001.24, "total_tokens": 195260416}
|
|
{"current_steps": 62045, "total_steps": 78105, "loss": 0.1825, "lr": 6.168656340948953e-07, "epoch": 3.971896805582229, "percentage": 79.44, "elapsed_time": "2:42:43", "remaining_time": "0:42:07", "throughput": 20001.49, "total_tokens": 195276544}
|
|
{"current_steps": 62050, "total_steps": 78105, "loss": 0.1618, "lr": 6.164982386886001e-07, "epoch": 3.972216887523206, "percentage": 79.44, "elapsed_time": "2:42:43", "remaining_time": "0:42:06", "throughput": 20001.8, "total_tokens": 195294528}
|
|
{"current_steps": 62055, "total_steps": 78105, "loss": 0.1169, "lr": 6.16130937334572e-07, "epoch": 3.9725369694641826, "percentage": 79.45, "elapsed_time": "2:42:44", "remaining_time": "0:42:05", "throughput": 20002.01, "total_tokens": 195310080}
|
|
{"current_steps": 62060, "total_steps": 78105, "loss": 0.1439, "lr": 6.15763730051152e-07, "epoch": 3.9728570514051595, "percentage": 79.46, "elapsed_time": "2:42:45", "remaining_time": "0:42:04", "throughput": 20002.23, "total_tokens": 195326208}
|
|
{"current_steps": 62065, "total_steps": 78105, "loss": 0.1621, "lr": 6.153966168566769e-07, "epoch": 3.9731771333461365, "percentage": 79.46, "elapsed_time": "2:42:45", "remaining_time": "0:42:03", "throughput": 20002.47, "total_tokens": 195341824}
|
|
{"current_steps": 62070, "total_steps": 78105, "loss": 0.1332, "lr": 6.150295977694776e-07, "epoch": 3.9734972152871135, "percentage": 79.47, "elapsed_time": "2:42:46", "remaining_time": "0:42:03", "throughput": 20002.68, "total_tokens": 195357440}
|
|
{"current_steps": 62075, "total_steps": 78105, "loss": 0.1515, "lr": 6.14662672807883e-07, "epoch": 3.9738172972280905, "percentage": 79.48, "elapsed_time": "2:42:47", "remaining_time": "0:42:02", "throughput": 20002.91, "total_tokens": 195373312}
|
|
{"current_steps": 62080, "total_steps": 78105, "loss": 0.1444, "lr": 6.142958419902129e-07, "epoch": 3.9741373791690675, "percentage": 79.48, "elapsed_time": "2:42:47", "remaining_time": "0:42:01", "throughput": 20003.11, "total_tokens": 195388544}
|
|
{"current_steps": 62085, "total_steps": 78105, "loss": 0.2397, "lr": 6.139291053347868e-07, "epoch": 3.974457461110044, "percentage": 79.49, "elapsed_time": "2:42:48", "remaining_time": "0:42:00", "throughput": 20003.3, "total_tokens": 195403904}
|
|
{"current_steps": 62090, "total_steps": 78105, "loss": 0.148, "lr": 6.135624628599169e-07, "epoch": 3.974777543051021, "percentage": 79.5, "elapsed_time": "2:42:49", "remaining_time": "0:41:59", "throughput": 20003.56, "total_tokens": 195420288}
|
|
{"current_steps": 62095, "total_steps": 78105, "loss": 0.1955, "lr": 6.131959145839114e-07, "epoch": 3.975097624991998, "percentage": 79.5, "elapsed_time": "2:42:49", "remaining_time": "0:41:58", "throughput": 20003.81, "total_tokens": 195436608}
|
|
{"current_steps": 62100, "total_steps": 78105, "loss": 0.1525, "lr": 6.128294605250743e-07, "epoch": 3.9754177069329746, "percentage": 79.51, "elapsed_time": "2:42:50", "remaining_time": "0:41:58", "throughput": 20003.99, "total_tokens": 195451264}
|
|
{"current_steps": 62105, "total_steps": 78105, "loss": 0.1367, "lr": 6.124631007017029e-07, "epoch": 3.9757377888739516, "percentage": 79.51, "elapsed_time": "2:42:51", "remaining_time": "0:41:57", "throughput": 20004.17, "total_tokens": 195465728}
|
|
{"current_steps": 62110, "total_steps": 78105, "loss": 0.153, "lr": 6.120968351320941e-07, "epoch": 3.9760578708149286, "percentage": 79.52, "elapsed_time": "2:42:51", "remaining_time": "0:41:56", "throughput": 20004.43, "total_tokens": 195482112}
|
|
{"current_steps": 62115, "total_steps": 78105, "loss": 0.1983, "lr": 6.117306638345341e-07, "epoch": 3.9763779527559056, "percentage": 79.53, "elapsed_time": "2:42:52", "remaining_time": "0:41:55", "throughput": 20004.66, "total_tokens": 195498368}
|
|
{"current_steps": 62120, "total_steps": 78105, "loss": 0.1494, "lr": 6.113645868273099e-07, "epoch": 3.9766980346968825, "percentage": 79.53, "elapsed_time": "2:42:53", "remaining_time": "0:41:54", "throughput": 20004.86, "total_tokens": 195513280}
|
|
{"current_steps": 62125, "total_steps": 78105, "loss": 0.194, "lr": 6.109986041287008e-07, "epoch": 3.9770181166378595, "percentage": 79.54, "elapsed_time": "2:42:53", "remaining_time": "0:41:54", "throughput": 20005.06, "total_tokens": 195528512}
|
|
{"current_steps": 62130, "total_steps": 78105, "loss": 0.2468, "lr": 6.106327157569819e-07, "epoch": 3.977338198578836, "percentage": 79.55, "elapsed_time": "2:42:54", "remaining_time": "0:41:53", "throughput": 20005.23, "total_tokens": 195543104}
|
|
{"current_steps": 62135, "total_steps": 78105, "loss": 0.1486, "lr": 6.102669217304236e-07, "epoch": 3.977658280519813, "percentage": 79.55, "elapsed_time": "2:42:55", "remaining_time": "0:41:52", "throughput": 20005.42, "total_tokens": 195558272}
|
|
{"current_steps": 62140, "total_steps": 78105, "loss": 0.1698, "lr": 6.099012220672914e-07, "epoch": 3.97797836246079, "percentage": 79.56, "elapsed_time": "2:42:55", "remaining_time": "0:41:51", "throughput": 20005.63, "total_tokens": 195573632}
|
|
{"current_steps": 62145, "total_steps": 78105, "loss": 0.1665, "lr": 6.095356167858484e-07, "epoch": 3.9782984444017666, "percentage": 79.57, "elapsed_time": "2:42:56", "remaining_time": "0:41:50", "throughput": 20005.92, "total_tokens": 195590592}
|
|
{"current_steps": 62150, "total_steps": 78105, "loss": 0.2224, "lr": 6.091701059043484e-07, "epoch": 3.9786185263427436, "percentage": 79.57, "elapsed_time": "2:42:57", "remaining_time": "0:41:50", "throughput": 20006.15, "total_tokens": 195606592}
|
|
{"current_steps": 62155, "total_steps": 78105, "loss": 0.151, "lr": 6.088046894410448e-07, "epoch": 3.9789386082837206, "percentage": 79.58, "elapsed_time": "2:42:58", "remaining_time": "0:41:49", "throughput": 20006.39, "total_tokens": 195622656}
|
|
{"current_steps": 62160, "total_steps": 78105, "loss": 0.1325, "lr": 6.084393674141842e-07, "epoch": 3.9792586902246976, "percentage": 79.59, "elapsed_time": "2:42:58", "remaining_time": "0:41:48", "throughput": 20006.6, "total_tokens": 195638080}
|
|
{"current_steps": 62165, "total_steps": 78105, "loss": 0.1371, "lr": 6.080741398420089e-07, "epoch": 3.9795787721656746, "percentage": 79.59, "elapsed_time": "2:42:59", "remaining_time": "0:41:47", "throughput": 20006.83, "total_tokens": 195654016}
|
|
{"current_steps": 62170, "total_steps": 78105, "loss": 0.1487, "lr": 6.077090067427563e-07, "epoch": 3.9798988541066516, "percentage": 79.6, "elapsed_time": "2:43:00", "remaining_time": "0:41:46", "throughput": 20007.05, "total_tokens": 195669504}
|
|
{"current_steps": 62175, "total_steps": 78105, "loss": 0.1872, "lr": 6.073439681346593e-07, "epoch": 3.980218936047628, "percentage": 79.6, "elapsed_time": "2:43:00", "remaining_time": "0:41:45", "throughput": 20007.25, "total_tokens": 195684288}
|
|
{"current_steps": 62180, "total_steps": 78105, "loss": 0.2259, "lr": 6.069790240359461e-07, "epoch": 3.980539017988605, "percentage": 79.61, "elapsed_time": "2:43:01", "remaining_time": "0:41:45", "throughput": 20007.47, "total_tokens": 195700224}
|
|
{"current_steps": 62185, "total_steps": 78105, "loss": 0.2132, "lr": 6.066141744648401e-07, "epoch": 3.980859099929582, "percentage": 79.62, "elapsed_time": "2:43:02", "remaining_time": "0:41:44", "throughput": 20007.67, "total_tokens": 195715520}
|
|
{"current_steps": 62190, "total_steps": 78105, "loss": 0.1509, "lr": 6.062494194395591e-07, "epoch": 3.9811791818705586, "percentage": 79.62, "elapsed_time": "2:43:02", "remaining_time": "0:41:43", "throughput": 20007.89, "total_tokens": 195731264}
|
|
{"current_steps": 62195, "total_steps": 78105, "loss": 0.2026, "lr": 6.058847589783193e-07, "epoch": 3.9814992638115356, "percentage": 79.63, "elapsed_time": "2:43:03", "remaining_time": "0:41:42", "throughput": 20008.1, "total_tokens": 195746560}
|
|
{"current_steps": 62200, "total_steps": 78105, "loss": 0.1928, "lr": 6.055201930993274e-07, "epoch": 3.9818193457525126, "percentage": 79.64, "elapsed_time": "2:43:04", "remaining_time": "0:41:41", "throughput": 20008.32, "total_tokens": 195762304}
|
|
{"current_steps": 62205, "total_steps": 78105, "loss": 0.1464, "lr": 6.051557218207896e-07, "epoch": 3.9821394276934896, "percentage": 79.64, "elapsed_time": "2:43:04", "remaining_time": "0:41:41", "throughput": 20008.5, "total_tokens": 195777024}
|
|
{"current_steps": 62210, "total_steps": 78105, "loss": 0.1745, "lr": 6.04791345160905e-07, "epoch": 3.9824595096344666, "percentage": 79.65, "elapsed_time": "2:43:05", "remaining_time": "0:41:40", "throughput": 20008.72, "total_tokens": 195792768}
|
|
{"current_steps": 62215, "total_steps": 78105, "loss": 0.1862, "lr": 6.044270631378685e-07, "epoch": 3.9827795915754436, "percentage": 79.66, "elapsed_time": "2:43:06", "remaining_time": "0:41:39", "throughput": 20008.9, "total_tokens": 195807360}
|
|
{"current_steps": 62220, "total_steps": 78105, "loss": 0.2348, "lr": 6.040628757698719e-07, "epoch": 3.98309967351642, "percentage": 79.66, "elapsed_time": "2:43:06", "remaining_time": "0:41:38", "throughput": 20009.12, "total_tokens": 195823040}
|
|
{"current_steps": 62225, "total_steps": 78105, "loss": 0.2013, "lr": 6.036987830750984e-07, "epoch": 3.983419755457397, "percentage": 79.67, "elapsed_time": "2:43:07", "remaining_time": "0:41:37", "throughput": 20009.37, "total_tokens": 195839872}
|
|
{"current_steps": 62230, "total_steps": 78105, "loss": 0.1472, "lr": 6.033347850717316e-07, "epoch": 3.983739837398374, "percentage": 79.67, "elapsed_time": "2:43:08", "remaining_time": "0:41:36", "throughput": 20009.67, "total_tokens": 195857152}
|
|
{"current_steps": 62235, "total_steps": 78105, "loss": 0.206, "lr": 6.029708817779447e-07, "epoch": 3.9840599193393507, "percentage": 79.68, "elapsed_time": "2:43:08", "remaining_time": "0:41:36", "throughput": 20009.88, "total_tokens": 195872576}
|
|
{"current_steps": 62240, "total_steps": 78105, "loss": 0.1288, "lr": 6.026070732119115e-07, "epoch": 3.9843800012803277, "percentage": 79.69, "elapsed_time": "2:43:09", "remaining_time": "0:41:35", "throughput": 20010.09, "total_tokens": 195887744}
|
|
{"current_steps": 62245, "total_steps": 78105, "loss": 0.1633, "lr": 6.022433593917981e-07, "epoch": 3.9847000832213046, "percentage": 79.69, "elapsed_time": "2:43:10", "remaining_time": "0:41:34", "throughput": 20010.29, "total_tokens": 195903168}
|
|
{"current_steps": 62250, "total_steps": 78105, "loss": 0.0829, "lr": 6.018797403357662e-07, "epoch": 3.9850201651622816, "percentage": 79.7, "elapsed_time": "2:43:10", "remaining_time": "0:41:33", "throughput": 20010.56, "total_tokens": 195920192}
|
|
{"current_steps": 62255, "total_steps": 78105, "loss": 0.1791, "lr": 6.015162160619731e-07, "epoch": 3.9853402471032586, "percentage": 79.71, "elapsed_time": "2:43:11", "remaining_time": "0:41:32", "throughput": 20010.79, "total_tokens": 195935680}
|
|
{"current_steps": 62260, "total_steps": 78105, "loss": 0.1432, "lr": 6.011527865885705e-07, "epoch": 3.985660329044235, "percentage": 79.71, "elapsed_time": "2:43:12", "remaining_time": "0:41:32", "throughput": 20010.98, "total_tokens": 195950720}
|
|
{"current_steps": 62265, "total_steps": 78105, "loss": 0.186, "lr": 6.007894519337088e-07, "epoch": 3.985980410985212, "percentage": 79.72, "elapsed_time": "2:43:12", "remaining_time": "0:41:31", "throughput": 20011.23, "total_tokens": 195966976}
|
|
{"current_steps": 62270, "total_steps": 78105, "loss": 0.2086, "lr": 6.004262121155275e-07, "epoch": 3.986300492926189, "percentage": 79.73, "elapsed_time": "2:43:13", "remaining_time": "0:41:30", "throughput": 20011.43, "total_tokens": 195982080}
|
|
{"current_steps": 62275, "total_steps": 78105, "loss": 0.2099, "lr": 6.000630671521673e-07, "epoch": 3.986620574867166, "percentage": 79.73, "elapsed_time": "2:43:14", "remaining_time": "0:41:29", "throughput": 20011.66, "total_tokens": 195997952}
|
|
{"current_steps": 62280, "total_steps": 78105, "loss": 0.1217, "lr": 5.997000170617612e-07, "epoch": 3.9869406568081427, "percentage": 79.74, "elapsed_time": "2:43:14", "remaining_time": "0:41:28", "throughput": 20011.9, "total_tokens": 196014208}
|
|
{"current_steps": 62285, "total_steps": 78105, "loss": 0.1811, "lr": 5.993370618624378e-07, "epoch": 3.9872607387491197, "percentage": 79.75, "elapsed_time": "2:43:15", "remaining_time": "0:41:28", "throughput": 20012.18, "total_tokens": 196031296}
|
|
{"current_steps": 62290, "total_steps": 78105, "loss": 0.1576, "lr": 5.989742015723215e-07, "epoch": 3.9875808206900967, "percentage": 79.75, "elapsed_time": "2:43:16", "remaining_time": "0:41:27", "throughput": 20012.38, "total_tokens": 196046592}
|
|
{"current_steps": 62295, "total_steps": 78105, "loss": 0.1303, "lr": 5.986114362095316e-07, "epoch": 3.9879009026310737, "percentage": 79.76, "elapsed_time": "2:43:16", "remaining_time": "0:41:26", "throughput": 20012.58, "total_tokens": 196062016}
|
|
{"current_steps": 62300, "total_steps": 78105, "loss": 0.1843, "lr": 5.982487657921818e-07, "epoch": 3.9882209845720507, "percentage": 79.76, "elapsed_time": "2:43:17", "remaining_time": "0:41:25", "throughput": 20012.76, "total_tokens": 196077184}
|
|
{"current_steps": 62305, "total_steps": 78105, "loss": 0.1572, "lr": 5.978861903383843e-07, "epoch": 3.988541066513027, "percentage": 79.77, "elapsed_time": "2:43:18", "remaining_time": "0:41:24", "throughput": 20012.99, "total_tokens": 196092864}
|
|
{"current_steps": 62310, "total_steps": 78105, "loss": 0.1924, "lr": 5.975237098662409e-07, "epoch": 3.988861148454004, "percentage": 79.78, "elapsed_time": "2:43:18", "remaining_time": "0:41:23", "throughput": 20013.18, "total_tokens": 196107776}
|
|
{"current_steps": 62315, "total_steps": 78105, "loss": 0.1844, "lr": 5.971613243938548e-07, "epoch": 3.989181230394981, "percentage": 79.78, "elapsed_time": "2:43:19", "remaining_time": "0:41:23", "throughput": 20013.4, "total_tokens": 196123712}
|
|
{"current_steps": 62320, "total_steps": 78105, "loss": 0.17, "lr": 5.967990339393206e-07, "epoch": 3.9895013123359577, "percentage": 79.79, "elapsed_time": "2:43:20", "remaining_time": "0:41:22", "throughput": 20013.58, "total_tokens": 196138560}
|
|
{"current_steps": 62325, "total_steps": 78105, "loss": 0.2002, "lr": 5.964368385207292e-07, "epoch": 3.9898213942769347, "percentage": 79.8, "elapsed_time": "2:43:20", "remaining_time": "0:41:21", "throughput": 20013.77, "total_tokens": 196153664}
|
|
{"current_steps": 62330, "total_steps": 78105, "loss": 0.126, "lr": 5.960747381561669e-07, "epoch": 3.9901414762179117, "percentage": 79.8, "elapsed_time": "2:43:21", "remaining_time": "0:41:20", "throughput": 20014.0, "total_tokens": 196169536}
|
|
{"current_steps": 62335, "total_steps": 78105, "loss": 0.1623, "lr": 5.957127328637141e-07, "epoch": 3.9904615581588887, "percentage": 79.81, "elapsed_time": "2:43:22", "remaining_time": "0:41:19", "throughput": 20014.21, "total_tokens": 196184960}
|
|
{"current_steps": 62340, "total_steps": 78105, "loss": 0.2614, "lr": 5.953508226614499e-07, "epoch": 3.9907816400998657, "percentage": 79.82, "elapsed_time": "2:43:22", "remaining_time": "0:41:19", "throughput": 20014.46, "total_tokens": 196201152}
|
|
{"current_steps": 62345, "total_steps": 78105, "loss": 0.1714, "lr": 5.949890075674433e-07, "epoch": 3.9911017220408427, "percentage": 79.82, "elapsed_time": "2:43:23", "remaining_time": "0:41:18", "throughput": 20014.67, "total_tokens": 196216960}
|
|
{"current_steps": 62350, "total_steps": 78105, "loss": 0.153, "lr": 5.946272875997636e-07, "epoch": 3.9914218039818192, "percentage": 79.83, "elapsed_time": "2:43:24", "remaining_time": "0:41:17", "throughput": 20014.89, "total_tokens": 196232384}
|
|
{"current_steps": 62355, "total_steps": 78105, "loss": 0.1225, "lr": 5.942656627764723e-07, "epoch": 3.991741885922796, "percentage": 79.83, "elapsed_time": "2:43:24", "remaining_time": "0:41:16", "throughput": 20015.06, "total_tokens": 196247040}
|
|
{"current_steps": 62360, "total_steps": 78105, "loss": 0.2269, "lr": 5.93904133115627e-07, "epoch": 3.992061967863773, "percentage": 79.84, "elapsed_time": "2:43:25", "remaining_time": "0:41:15", "throughput": 20015.32, "total_tokens": 196263744}
|
|
{"current_steps": 62365, "total_steps": 78105, "loss": 0.1624, "lr": 5.935426986352811e-07, "epoch": 3.9923820498047498, "percentage": 79.85, "elapsed_time": "2:43:26", "remaining_time": "0:41:14", "throughput": 20015.53, "total_tokens": 196279168}
|
|
{"current_steps": 62370, "total_steps": 78105, "loss": 0.1554, "lr": 5.931813593534821e-07, "epoch": 3.9927021317457267, "percentage": 79.85, "elapsed_time": "2:43:27", "remaining_time": "0:41:14", "throughput": 20015.77, "total_tokens": 196295168}
|
|
{"current_steps": 62375, "total_steps": 78105, "loss": 0.1292, "lr": 5.92820115288274e-07, "epoch": 3.9930222136867037, "percentage": 79.86, "elapsed_time": "2:43:27", "remaining_time": "0:41:13", "throughput": 20016.02, "total_tokens": 196311616}
|
|
{"current_steps": 62380, "total_steps": 78105, "loss": 0.1403, "lr": 5.924589664576946e-07, "epoch": 3.9933422956276807, "percentage": 79.87, "elapsed_time": "2:43:28", "remaining_time": "0:41:12", "throughput": 20016.2, "total_tokens": 196326144}
|
|
{"current_steps": 62385, "total_steps": 78105, "loss": 0.1459, "lr": 5.920979128797791e-07, "epoch": 3.9936623775686577, "percentage": 79.87, "elapsed_time": "2:43:29", "remaining_time": "0:41:11", "throughput": 20016.46, "total_tokens": 196342464}
|
|
{"current_steps": 62390, "total_steps": 78105, "loss": 0.1534, "lr": 5.917369545725557e-07, "epoch": 3.9939824595096347, "percentage": 79.88, "elapsed_time": "2:43:29", "remaining_time": "0:41:10", "throughput": 20016.68, "total_tokens": 196358400}
|
|
{"current_steps": 62395, "total_steps": 78105, "loss": 0.1893, "lr": 5.913760915540493e-07, "epoch": 3.9943025414506113, "percentage": 79.89, "elapsed_time": "2:43:30", "remaining_time": "0:41:10", "throughput": 20016.88, "total_tokens": 196373760}
|
|
{"current_steps": 62400, "total_steps": 78105, "loss": 0.133, "lr": 5.910153238422789e-07, "epoch": 3.9946226233915882, "percentage": 79.89, "elapsed_time": "2:43:31", "remaining_time": "0:41:09", "throughput": 20017.1, "total_tokens": 196389632}
|
|
{"current_steps": 62405, "total_steps": 78105, "loss": 0.1228, "lr": 5.906546514552599e-07, "epoch": 3.9949427053325652, "percentage": 79.9, "elapsed_time": "2:43:31", "remaining_time": "0:41:08", "throughput": 20017.32, "total_tokens": 196405312}
|
|
{"current_steps": 62410, "total_steps": 78105, "loss": 0.1461, "lr": 5.90294074411002e-07, "epoch": 3.995262787273542, "percentage": 79.91, "elapsed_time": "2:43:32", "remaining_time": "0:41:07", "throughput": 20017.52, "total_tokens": 196420864}
|
|
{"current_steps": 62415, "total_steps": 78105, "loss": 0.2077, "lr": 5.89933592727511e-07, "epoch": 3.9955828692145188, "percentage": 79.91, "elapsed_time": "2:43:33", "remaining_time": "0:41:06", "throughput": 20017.73, "total_tokens": 196436224}
|
|
{"current_steps": 62420, "total_steps": 78105, "loss": 0.1708, "lr": 5.895732064227863e-07, "epoch": 3.9959029511554958, "percentage": 79.92, "elapsed_time": "2:43:33", "remaining_time": "0:41:06", "throughput": 20017.96, "total_tokens": 196452416}
|
|
{"current_steps": 62425, "total_steps": 78105, "loss": 0.1581, "lr": 5.89212915514826e-07, "epoch": 3.9962230330964728, "percentage": 79.92, "elapsed_time": "2:43:34", "remaining_time": "0:41:05", "throughput": 20018.18, "total_tokens": 196468416}
|
|
{"current_steps": 62430, "total_steps": 78105, "loss": 0.1456, "lr": 5.888527200216185e-07, "epoch": 3.9965431150374497, "percentage": 79.93, "elapsed_time": "2:43:35", "remaining_time": "0:41:04", "throughput": 20018.37, "total_tokens": 196483328}
|
|
{"current_steps": 62435, "total_steps": 78105, "loss": 0.2012, "lr": 5.884926199611521e-07, "epoch": 3.9968631969784267, "percentage": 79.94, "elapsed_time": "2:43:35", "remaining_time": "0:41:03", "throughput": 20018.58, "total_tokens": 196498752}
|
|
{"current_steps": 62440, "total_steps": 78105, "loss": 0.2059, "lr": 5.881326153514075e-07, "epoch": 3.9971832789194033, "percentage": 79.94, "elapsed_time": "2:43:36", "remaining_time": "0:41:02", "throughput": 20018.79, "total_tokens": 196514624}
|
|
{"current_steps": 62445, "total_steps": 78105, "loss": 0.2552, "lr": 5.877727062103608e-07, "epoch": 3.9975033608603803, "percentage": 79.95, "elapsed_time": "2:43:37", "remaining_time": "0:41:01", "throughput": 20018.97, "total_tokens": 196529088}
|
|
{"current_steps": 62450, "total_steps": 78105, "loss": 0.1713, "lr": 5.874128925559863e-07, "epoch": 3.9978234428013573, "percentage": 79.96, "elapsed_time": "2:43:37", "remaining_time": "0:41:01", "throughput": 20019.17, "total_tokens": 196544128}
|
|
{"current_steps": 62455, "total_steps": 78105, "loss": 0.2296, "lr": 5.870531744062479e-07, "epoch": 3.998143524742334, "percentage": 79.96, "elapsed_time": "2:43:38", "remaining_time": "0:41:00", "throughput": 20019.35, "total_tokens": 196558848}
|
|
{"current_steps": 62460, "total_steps": 78105, "loss": 0.1552, "lr": 5.866935517791114e-07, "epoch": 3.998463606683311, "percentage": 79.97, "elapsed_time": "2:43:39", "remaining_time": "0:40:59", "throughput": 20019.55, "total_tokens": 196574272}
|
|
{"current_steps": 62465, "total_steps": 78105, "loss": 0.2365, "lr": 5.863340246925317e-07, "epoch": 3.998783688624288, "percentage": 79.98, "elapsed_time": "2:43:39", "remaining_time": "0:40:58", "throughput": 20019.76, "total_tokens": 196589696}
|
|
{"current_steps": 62470, "total_steps": 78105, "loss": 0.124, "lr": 5.859745931644631e-07, "epoch": 3.999103770565265, "percentage": 79.98, "elapsed_time": "2:43:40", "remaining_time": "0:40:57", "throughput": 20020.03, "total_tokens": 196606656}
|
|
{"current_steps": 62475, "total_steps": 78105, "loss": 0.1343, "lr": 5.85615257212854e-07, "epoch": 3.9994238525062418, "percentage": 79.99, "elapsed_time": "2:43:41", "remaining_time": "0:40:57", "throughput": 20020.26, "total_tokens": 196622336}
|
|
{"current_steps": 62480, "total_steps": 78105, "loss": 0.1703, "lr": 5.85256016855647e-07, "epoch": 3.9997439344472188, "percentage": 79.99, "elapsed_time": "2:43:41", "remaining_time": "0:40:56", "throughput": 20020.46, "total_tokens": 196637888}
|
|
{"current_steps": 62485, "total_steps": 78105, "loss": 0.1188, "lr": 5.848968721107812e-07, "epoch": 4.000064016388196, "percentage": 80.0, "elapsed_time": "2:43:42", "remaining_time": "0:40:55", "throughput": 20020.47, "total_tokens": 196653696}
|
|
{"current_steps": 62490, "total_steps": 78105, "loss": 0.1062, "lr": 5.845378229961893e-07, "epoch": 4.000384098329172, "percentage": 80.01, "elapsed_time": "2:43:43", "remaining_time": "0:40:54", "throughput": 20020.69, "total_tokens": 196669312}
|
|
{"current_steps": 62495, "total_steps": 78105, "loss": 0.1773, "lr": 5.841788695298017e-07, "epoch": 4.000704180270149, "percentage": 80.01, "elapsed_time": "2:43:43", "remaining_time": "0:40:53", "throughput": 20020.91, "total_tokens": 196685312}
|
|
{"current_steps": 62496, "total_steps": 78105, "eval_loss": 0.5554755926132202, "epoch": 4.000768196658345, "percentage": 80.02, "elapsed_time": "2:44:35", "remaining_time": "0:41:06", "throughput": 19917.52, "total_tokens": 196687936}
|
|
{"current_steps": 62500, "total_steps": 78105, "loss": 0.1117, "lr": 5.838200117295426e-07, "epoch": 4.001024262211126, "percentage": 80.02, "elapsed_time": "2:45:10", "remaining_time": "0:41:14", "throughput": 19848.34, "total_tokens": 196700928}
|
|
{"current_steps": 62505, "total_steps": 78105, "loss": 0.1308, "lr": 5.83461249613331e-07, "epoch": 4.001344344152103, "percentage": 80.03, "elapsed_time": "2:45:10", "remaining_time": "0:41:13", "throughput": 19848.57, "total_tokens": 196717120}
|
|
{"current_steps": 62510, "total_steps": 78105, "loss": 0.1165, "lr": 5.831025831990817e-07, "epoch": 4.00166442609308, "percentage": 80.03, "elapsed_time": "2:45:11", "remaining_time": "0:41:12", "throughput": 19848.81, "total_tokens": 196732608}
|
|
{"current_steps": 62515, "total_steps": 78105, "loss": 0.0882, "lr": 5.827440125047046e-07, "epoch": 4.001984508034057, "percentage": 80.04, "elapsed_time": "2:45:12", "remaining_time": "0:41:11", "throughput": 19849.04, "total_tokens": 196748416}
|
|
{"current_steps": 62520, "total_steps": 78105, "loss": 0.1229, "lr": 5.823855375481049e-07, "epoch": 4.002304589975034, "percentage": 80.05, "elapsed_time": "2:45:12", "remaining_time": "0:41:11", "throughput": 19849.37, "total_tokens": 196766272}
|
|
{"current_steps": 62525, "total_steps": 78105, "loss": 0.0923, "lr": 5.820271583471832e-07, "epoch": 4.002624671916011, "percentage": 80.05, "elapsed_time": "2:45:13", "remaining_time": "0:41:10", "throughput": 19849.59, "total_tokens": 196781632}
|
|
{"current_steps": 62530, "total_steps": 78105, "loss": 0.1239, "lr": 5.81668874919834e-07, "epoch": 4.002944753856988, "percentage": 80.06, "elapsed_time": "2:45:14", "remaining_time": "0:41:09", "throughput": 19849.83, "total_tokens": 196797312}
|
|
{"current_steps": 62535, "total_steps": 78105, "loss": 0.1743, "lr": 5.813106872839503e-07, "epoch": 4.003264835797964, "percentage": 80.07, "elapsed_time": "2:45:14", "remaining_time": "0:41:08", "throughput": 19850.06, "total_tokens": 196813184}
|
|
{"current_steps": 62540, "total_steps": 78105, "loss": 0.1132, "lr": 5.809525954574153e-07, "epoch": 4.003584917738941, "percentage": 80.07, "elapsed_time": "2:45:15", "remaining_time": "0:41:07", "throughput": 19850.3, "total_tokens": 196828992}
|
|
{"current_steps": 62545, "total_steps": 78105, "loss": 0.092, "lr": 5.805945994581133e-07, "epoch": 4.003904999679918, "percentage": 80.08, "elapsed_time": "2:45:16", "remaining_time": "0:41:06", "throughput": 19850.5, "total_tokens": 196844160}
|
|
{"current_steps": 62550, "total_steps": 78105, "loss": 0.1455, "lr": 5.802366993039176e-07, "epoch": 4.004225081620895, "percentage": 80.08, "elapsed_time": "2:45:16", "remaining_time": "0:41:06", "throughput": 19850.71, "total_tokens": 196859456}
|
|
{"current_steps": 62555, "total_steps": 78105, "loss": 0.1404, "lr": 5.798788950127024e-07, "epoch": 4.004545163561872, "percentage": 80.09, "elapsed_time": "2:45:17", "remaining_time": "0:41:05", "throughput": 19850.93, "total_tokens": 196875072}
|
|
{"current_steps": 62560, "total_steps": 78105, "loss": 0.1236, "lr": 5.795211866023334e-07, "epoch": 4.004865245502849, "percentage": 80.1, "elapsed_time": "2:45:18", "remaining_time": "0:41:04", "throughput": 19851.09, "total_tokens": 196889088}
|
|
{"current_steps": 62565, "total_steps": 78105, "loss": 0.0884, "lr": 5.791635740906725e-07, "epoch": 4.005185327443826, "percentage": 80.1, "elapsed_time": "2:45:18", "remaining_time": "0:41:03", "throughput": 19851.26, "total_tokens": 196903744}
|
|
{"current_steps": 62570, "total_steps": 78105, "loss": 0.1446, "lr": 5.788060574955787e-07, "epoch": 4.005505409384803, "percentage": 80.11, "elapsed_time": "2:45:19", "remaining_time": "0:41:02", "throughput": 19851.49, "total_tokens": 196919296}
|
|
{"current_steps": 62575, "total_steps": 78105, "loss": 0.1382, "lr": 5.784486368349018e-07, "epoch": 4.00582549132578, "percentage": 80.12, "elapsed_time": "2:45:20", "remaining_time": "0:41:02", "throughput": 19851.69, "total_tokens": 196934528}
|
|
{"current_steps": 62580, "total_steps": 78105, "loss": 0.1447, "lr": 5.780913121264916e-07, "epoch": 4.006145573266756, "percentage": 80.12, "elapsed_time": "2:45:20", "remaining_time": "0:41:01", "throughput": 19851.96, "total_tokens": 196951104}
|
|
{"current_steps": 62585, "total_steps": 78105, "loss": 0.1598, "lr": 5.777340833881906e-07, "epoch": 4.006465655207733, "percentage": 80.13, "elapsed_time": "2:45:21", "remaining_time": "0:41:00", "throughput": 19852.17, "total_tokens": 196966336}
|
|
{"current_steps": 62590, "total_steps": 78105, "loss": 0.1354, "lr": 5.773769506378369e-07, "epoch": 4.00678573714871, "percentage": 80.14, "elapsed_time": "2:45:22", "remaining_time": "0:40:59", "throughput": 19852.72, "total_tokens": 196995136}
|
|
{"current_steps": 62595, "total_steps": 78105, "loss": 0.1417, "lr": 5.770199138932633e-07, "epoch": 4.007105819089687, "percentage": 80.14, "elapsed_time": "2:45:23", "remaining_time": "0:40:58", "throughput": 19852.94, "total_tokens": 197010496}
|
|
{"current_steps": 62600, "total_steps": 78105, "loss": 0.1278, "lr": 5.76662973172299e-07, "epoch": 4.007425901030664, "percentage": 80.15, "elapsed_time": "2:45:24", "remaining_time": "0:40:58", "throughput": 19853.19, "total_tokens": 197026560}
|
|
{"current_steps": 62605, "total_steps": 78105, "loss": 0.0678, "lr": 5.763061284927677e-07, "epoch": 4.007745982971641, "percentage": 80.15, "elapsed_time": "2:45:24", "remaining_time": "0:40:57", "throughput": 19853.47, "total_tokens": 197043712}
|
|
{"current_steps": 62610, "total_steps": 78105, "loss": 0.1013, "lr": 5.759493798724872e-07, "epoch": 4.008066064912618, "percentage": 80.16, "elapsed_time": "2:45:25", "remaining_time": "0:40:56", "throughput": 19853.75, "total_tokens": 197060608}
|
|
{"current_steps": 62615, "total_steps": 78105, "loss": 0.1109, "lr": 5.755927273292733e-07, "epoch": 4.008386146853595, "percentage": 80.17, "elapsed_time": "2:45:26", "remaining_time": "0:40:55", "throughput": 19853.97, "total_tokens": 197076288}
|
|
{"current_steps": 62620, "total_steps": 78105, "loss": 0.1732, "lr": 5.752361708809348e-07, "epoch": 4.008706228794572, "percentage": 80.17, "elapsed_time": "2:45:26", "remaining_time": "0:40:54", "throughput": 19854.19, "total_tokens": 197091712}
|
|
{"current_steps": 62625, "total_steps": 78105, "loss": 0.1169, "lr": 5.748797105452764e-07, "epoch": 4.009026310735548, "percentage": 80.18, "elapsed_time": "2:45:27", "remaining_time": "0:40:53", "throughput": 19854.37, "total_tokens": 197106496}
|
|
{"current_steps": 62630, "total_steps": 78105, "loss": 0.1016, "lr": 5.745233463400977e-07, "epoch": 4.009346392676525, "percentage": 80.19, "elapsed_time": "2:45:28", "remaining_time": "0:40:53", "throughput": 19854.59, "total_tokens": 197122176}
|
|
{"current_steps": 62635, "total_steps": 78105, "loss": 0.1032, "lr": 5.741670782831931e-07, "epoch": 4.009666474617502, "percentage": 80.19, "elapsed_time": "2:45:28", "remaining_time": "0:40:52", "throughput": 19854.83, "total_tokens": 197138240}
|
|
{"current_steps": 62640, "total_steps": 78105, "loss": 0.1633, "lr": 5.738109063923539e-07, "epoch": 4.009986556558479, "percentage": 80.2, "elapsed_time": "2:45:29", "remaining_time": "0:40:51", "throughput": 19855.03, "total_tokens": 197153408}
|
|
{"current_steps": 62645, "total_steps": 78105, "loss": 0.1155, "lr": 5.734548306853646e-07, "epoch": 4.010306638499456, "percentage": 80.21, "elapsed_time": "2:45:30", "remaining_time": "0:40:50", "throughput": 19855.23, "total_tokens": 197168640}
|
|
{"current_steps": 62650, "total_steps": 78105, "loss": 0.1154, "lr": 5.730988511800053e-07, "epoch": 4.010626720440433, "percentage": 80.21, "elapsed_time": "2:45:30", "remaining_time": "0:40:49", "throughput": 19855.45, "total_tokens": 197184448}
|
|
{"current_steps": 62655, "total_steps": 78105, "loss": 0.1091, "lr": 5.727429678940541e-07, "epoch": 4.01094680238141, "percentage": 80.22, "elapsed_time": "2:45:31", "remaining_time": "0:40:49", "throughput": 19855.7, "total_tokens": 197201088}
|
|
{"current_steps": 62660, "total_steps": 78105, "loss": 0.0878, "lr": 5.723871808452788e-07, "epoch": 4.011266884322387, "percentage": 80.23, "elapsed_time": "2:45:32", "remaining_time": "0:40:48", "throughput": 19855.9, "total_tokens": 197216320}
|
|
{"current_steps": 62665, "total_steps": 78105, "loss": 0.1662, "lr": 5.720314900514481e-07, "epoch": 4.011586966263364, "percentage": 80.23, "elapsed_time": "2:45:33", "remaining_time": "0:40:47", "throughput": 19856.11, "total_tokens": 197231744}
|
|
{"current_steps": 62670, "total_steps": 78105, "loss": 0.136, "lr": 5.71675895530322e-07, "epoch": 4.01190704820434, "percentage": 80.24, "elapsed_time": "2:45:33", "remaining_time": "0:40:46", "throughput": 19856.36, "total_tokens": 197248128}
|
|
{"current_steps": 62675, "total_steps": 78105, "loss": 0.0906, "lr": 5.71320397299657e-07, "epoch": 4.012227130145317, "percentage": 80.24, "elapsed_time": "2:45:34", "remaining_time": "0:40:45", "throughput": 19856.64, "total_tokens": 197265408}
|
|
{"current_steps": 62680, "total_steps": 78105, "loss": 0.1032, "lr": 5.709649953772064e-07, "epoch": 4.012547212086294, "percentage": 80.25, "elapsed_time": "2:45:35", "remaining_time": "0:40:44", "throughput": 19856.82, "total_tokens": 197280384}
|
|
{"current_steps": 62685, "total_steps": 78105, "loss": 0.1049, "lr": 5.706096897807148e-07, "epoch": 4.012867294027271, "percentage": 80.26, "elapsed_time": "2:45:35", "remaining_time": "0:40:44", "throughput": 19857.02, "total_tokens": 197295808}
|
|
{"current_steps": 62690, "total_steps": 78105, "loss": 0.1803, "lr": 5.702544805279267e-07, "epoch": 4.013187375968248, "percentage": 80.26, "elapsed_time": "2:45:36", "remaining_time": "0:40:43", "throughput": 19857.2, "total_tokens": 197310272}
|
|
{"current_steps": 62695, "total_steps": 78105, "loss": 0.1273, "lr": 5.698993676365769e-07, "epoch": 4.013507457909225, "percentage": 80.27, "elapsed_time": "2:45:37", "remaining_time": "0:40:42", "throughput": 19857.39, "total_tokens": 197325440}
|
|
{"current_steps": 62700, "total_steps": 78105, "loss": 0.0859, "lr": 5.695443511243998e-07, "epoch": 4.013827539850202, "percentage": 80.28, "elapsed_time": "2:45:37", "remaining_time": "0:40:41", "throughput": 19857.58, "total_tokens": 197340608}
|
|
{"current_steps": 62705, "total_steps": 78105, "loss": 0.1413, "lr": 5.691894310091223e-07, "epoch": 4.014147621791179, "percentage": 80.28, "elapsed_time": "2:45:38", "remaining_time": "0:40:40", "throughput": 19857.74, "total_tokens": 197354816}
|
|
{"current_steps": 62710, "total_steps": 78105, "loss": 0.1179, "lr": 5.688346073084674e-07, "epoch": 4.014467703732155, "percentage": 80.29, "elapsed_time": "2:45:39", "remaining_time": "0:40:39", "throughput": 19857.96, "total_tokens": 197370112}
|
|
{"current_steps": 62715, "total_steps": 78105, "loss": 0.1382, "lr": 5.684798800401531e-07, "epoch": 4.014787785673132, "percentage": 80.3, "elapsed_time": "2:45:39", "remaining_time": "0:40:39", "throughput": 19858.17, "total_tokens": 197385728}
|
|
{"current_steps": 62720, "total_steps": 78105, "loss": 0.1136, "lr": 5.681252492218928e-07, "epoch": 4.015107867614109, "percentage": 80.3, "elapsed_time": "2:45:40", "remaining_time": "0:40:38", "throughput": 19858.36, "total_tokens": 197401088}
|
|
{"current_steps": 62725, "total_steps": 78105, "loss": 0.1547, "lr": 5.67770714871394e-07, "epoch": 4.015427949555086, "percentage": 80.31, "elapsed_time": "2:45:41", "remaining_time": "0:40:37", "throughput": 19858.61, "total_tokens": 197417216}
|
|
{"current_steps": 62730, "total_steps": 78105, "loss": 0.103, "lr": 5.674162770063621e-07, "epoch": 4.015748031496063, "percentage": 80.31, "elapsed_time": "2:45:41", "remaining_time": "0:40:36", "throughput": 19858.82, "total_tokens": 197432768}
|
|
{"current_steps": 62735, "total_steps": 78105, "loss": 0.1073, "lr": 5.670619356444945e-07, "epoch": 4.01606811343704, "percentage": 80.32, "elapsed_time": "2:45:42", "remaining_time": "0:40:35", "throughput": 19858.99, "total_tokens": 197447360}
|
|
{"current_steps": 62740, "total_steps": 78105, "loss": 0.0736, "lr": 5.667076908034858e-07, "epoch": 4.016388195378017, "percentage": 80.33, "elapsed_time": "2:45:43", "remaining_time": "0:40:35", "throughput": 19859.22, "total_tokens": 197463232}
|
|
{"current_steps": 62745, "total_steps": 78105, "loss": 0.1192, "lr": 5.663535425010245e-07, "epoch": 4.016708277318994, "percentage": 80.33, "elapsed_time": "2:45:43", "remaining_time": "0:40:34", "throughput": 19859.48, "total_tokens": 197480000}
|
|
{"current_steps": 62750, "total_steps": 78105, "loss": 0.1443, "lr": 5.659994907547955e-07, "epoch": 4.017028359259971, "percentage": 80.34, "elapsed_time": "2:45:44", "remaining_time": "0:40:33", "throughput": 19859.69, "total_tokens": 197495424}
|
|
{"current_steps": 62755, "total_steps": 78105, "loss": 0.1448, "lr": 5.65645535582478e-07, "epoch": 4.017348441200947, "percentage": 80.35, "elapsed_time": "2:45:45", "remaining_time": "0:40:32", "throughput": 19859.91, "total_tokens": 197511168}
|
|
{"current_steps": 62760, "total_steps": 78105, "loss": 0.1295, "lr": 5.652916770017464e-07, "epoch": 4.017668523141924, "percentage": 80.35, "elapsed_time": "2:45:45", "remaining_time": "0:40:31", "throughput": 19860.16, "total_tokens": 197527552}
|
|
{"current_steps": 62765, "total_steps": 78105, "loss": 0.1673, "lr": 5.64937915030272e-07, "epoch": 4.017988605082901, "percentage": 80.36, "elapsed_time": "2:45:46", "remaining_time": "0:40:30", "throughput": 19860.39, "total_tokens": 197543488}
|
|
{"current_steps": 62770, "total_steps": 78105, "loss": 0.1176, "lr": 5.645842496857173e-07, "epoch": 4.018308687023878, "percentage": 80.37, "elapsed_time": "2:45:47", "remaining_time": "0:40:30", "throughput": 19860.6, "total_tokens": 197558656}
|
|
{"current_steps": 62775, "total_steps": 78105, "loss": 0.0851, "lr": 5.642306809857457e-07, "epoch": 4.018628768964855, "percentage": 80.37, "elapsed_time": "2:45:47", "remaining_time": "0:40:29", "throughput": 19860.78, "total_tokens": 197573312}
|
|
{"current_steps": 62780, "total_steps": 78105, "loss": 0.1452, "lr": 5.63877208948009e-07, "epoch": 4.018948850905832, "percentage": 80.38, "elapsed_time": "2:45:48", "remaining_time": "0:40:28", "throughput": 19860.98, "total_tokens": 197588672}
|
|
{"current_steps": 62785, "total_steps": 78105, "loss": 0.127, "lr": 5.635238335901605e-07, "epoch": 4.019268932846809, "percentage": 80.39, "elapsed_time": "2:45:49", "remaining_time": "0:40:27", "throughput": 19861.22, "total_tokens": 197604608}
|
|
{"current_steps": 62790, "total_steps": 78105, "loss": 0.1122, "lr": 5.631705549298452e-07, "epoch": 4.019589014787786, "percentage": 80.39, "elapsed_time": "2:45:49", "remaining_time": "0:40:26", "throughput": 19861.44, "total_tokens": 197620480}
|
|
{"current_steps": 62795, "total_steps": 78105, "loss": 0.112, "lr": 5.628173729847028e-07, "epoch": 4.019909096728763, "percentage": 80.4, "elapsed_time": "2:45:50", "remaining_time": "0:40:26", "throughput": 19861.63, "total_tokens": 197635456}
|
|
{"current_steps": 62800, "total_steps": 78105, "loss": 0.117, "lr": 5.624642877723721e-07, "epoch": 4.020229178669739, "percentage": 80.4, "elapsed_time": "2:45:51", "remaining_time": "0:40:25", "throughput": 19861.84, "total_tokens": 197650368}
|
|
{"current_steps": 62805, "total_steps": 78105, "loss": 0.179, "lr": 5.62111299310481e-07, "epoch": 4.020549260610716, "percentage": 80.41, "elapsed_time": "2:45:51", "remaining_time": "0:40:24", "throughput": 19862.07, "total_tokens": 197665920}
|
|
{"current_steps": 62810, "total_steps": 78105, "loss": 0.1436, "lr": 5.617584076166583e-07, "epoch": 4.020869342551693, "percentage": 80.42, "elapsed_time": "2:45:52", "remaining_time": "0:40:23", "throughput": 19862.27, "total_tokens": 197680704}
|
|
{"current_steps": 62815, "total_steps": 78105, "loss": 0.1605, "lr": 5.614056127085246e-07, "epoch": 4.02118942449267, "percentage": 80.42, "elapsed_time": "2:45:53", "remaining_time": "0:40:22", "throughput": 19862.45, "total_tokens": 197695168}
|
|
{"current_steps": 62820, "total_steps": 78105, "loss": 0.0953, "lr": 5.610529146036969e-07, "epoch": 4.021509506433647, "percentage": 80.43, "elapsed_time": "2:45:53", "remaining_time": "0:40:21", "throughput": 19862.64, "total_tokens": 197710016}
|
|
{"current_steps": 62825, "total_steps": 78105, "loss": 0.1414, "lr": 5.607003133197872e-07, "epoch": 4.021829588374624, "percentage": 80.44, "elapsed_time": "2:45:54", "remaining_time": "0:40:21", "throughput": 19862.82, "total_tokens": 197725056}
|
|
{"current_steps": 62830, "total_steps": 78105, "loss": 0.1614, "lr": 5.603478088744024e-07, "epoch": 4.022149670315601, "percentage": 80.44, "elapsed_time": "2:45:55", "remaining_time": "0:40:20", "throughput": 19863.04, "total_tokens": 197740672}
|
|
{"current_steps": 62835, "total_steps": 78105, "loss": 0.1611, "lr": 5.599954012851447e-07, "epoch": 4.022469752256578, "percentage": 80.45, "elapsed_time": "2:45:56", "remaining_time": "0:40:19", "throughput": 19863.53, "total_tokens": 197762816}
|
|
{"current_steps": 62840, "total_steps": 78105, "loss": 0.1423, "lr": 5.596430905696107e-07, "epoch": 4.022789834197555, "percentage": 80.46, "elapsed_time": "2:45:56", "remaining_time": "0:40:18", "throughput": 19863.71, "total_tokens": 197777472}
|
|
{"current_steps": 62845, "total_steps": 78105, "loss": 0.1156, "lr": 5.592908767453947e-07, "epoch": 4.023109916138531, "percentage": 80.46, "elapsed_time": "2:45:57", "remaining_time": "0:40:17", "throughput": 19863.93, "total_tokens": 197793152}
|
|
{"current_steps": 62850, "total_steps": 78105, "loss": 0.1107, "lr": 5.589387598300832e-07, "epoch": 4.023429998079508, "percentage": 80.47, "elapsed_time": "2:45:58", "remaining_time": "0:40:17", "throughput": 19864.12, "total_tokens": 197808256}
|
|
{"current_steps": 62855, "total_steps": 78105, "loss": 0.1397, "lr": 5.585867398412598e-07, "epoch": 4.023750080020485, "percentage": 80.48, "elapsed_time": "2:45:58", "remaining_time": "0:40:16", "throughput": 19864.33, "total_tokens": 197823616}
|
|
{"current_steps": 62860, "total_steps": 78105, "loss": 0.1653, "lr": 5.582348167965021e-07, "epoch": 4.024070161961462, "percentage": 80.48, "elapsed_time": "2:45:59", "remaining_time": "0:40:15", "throughput": 19864.56, "total_tokens": 197839168}
|
|
{"current_steps": 62865, "total_steps": 78105, "loss": 0.1231, "lr": 5.578829907133831e-07, "epoch": 4.024390243902439, "percentage": 80.49, "elapsed_time": "2:46:00", "remaining_time": "0:40:14", "throughput": 19864.86, "total_tokens": 197856320}
|
|
{"current_steps": 62870, "total_steps": 78105, "loss": 0.1525, "lr": 5.575312616094716e-07, "epoch": 4.024710325843416, "percentage": 80.49, "elapsed_time": "2:46:00", "remaining_time": "0:40:13", "throughput": 19865.05, "total_tokens": 197871104}
|
|
{"current_steps": 62875, "total_steps": 78105, "loss": 0.1495, "lr": 5.57179629502331e-07, "epoch": 4.025030407784393, "percentage": 80.5, "elapsed_time": "2:46:01", "remaining_time": "0:40:12", "throughput": 19865.29, "total_tokens": 197887104}
|
|
{"current_steps": 62880, "total_steps": 78105, "loss": 0.1425, "lr": 5.568280944095192e-07, "epoch": 4.02535048972537, "percentage": 80.51, "elapsed_time": "2:46:02", "remaining_time": "0:40:12", "throughput": 19865.49, "total_tokens": 197902144}
|
|
{"current_steps": 62885, "total_steps": 78105, "loss": 0.0992, "lr": 5.564766563485921e-07, "epoch": 4.025670571666347, "percentage": 80.51, "elapsed_time": "2:46:02", "remaining_time": "0:40:11", "throughput": 19865.73, "total_tokens": 197918208}
|
|
{"current_steps": 62890, "total_steps": 78105, "loss": 0.1317, "lr": 5.56125315337096e-07, "epoch": 4.025990653607323, "percentage": 80.52, "elapsed_time": "2:46:03", "remaining_time": "0:40:10", "throughput": 19865.95, "total_tokens": 197933504}
|
|
{"current_steps": 62895, "total_steps": 78105, "loss": 0.1354, "lr": 5.557740713925771e-07, "epoch": 4.0263107355483, "percentage": 80.53, "elapsed_time": "2:46:04", "remaining_time": "0:40:09", "throughput": 19866.18, "total_tokens": 197949248}
|
|
{"current_steps": 62900, "total_steps": 78105, "loss": 0.1555, "lr": 5.55422924532574e-07, "epoch": 4.026630817489277, "percentage": 80.53, "elapsed_time": "2:46:04", "remaining_time": "0:40:08", "throughput": 19866.39, "total_tokens": 197964672}
|
|
{"current_steps": 62905, "total_steps": 78105, "loss": 0.0964, "lr": 5.550718747746203e-07, "epoch": 4.026950899430254, "percentage": 80.54, "elapsed_time": "2:46:05", "remaining_time": "0:40:08", "throughput": 19866.62, "total_tokens": 197980544}
|
|
{"current_steps": 62910, "total_steps": 78105, "loss": 0.0851, "lr": 5.547209221362479e-07, "epoch": 4.027270981371231, "percentage": 80.55, "elapsed_time": "2:46:06", "remaining_time": "0:40:07", "throughput": 19866.82, "total_tokens": 197995520}
|
|
{"current_steps": 62915, "total_steps": 78105, "loss": 0.1104, "lr": 5.543700666349785e-07, "epoch": 4.027591063312208, "percentage": 80.55, "elapsed_time": "2:46:06", "remaining_time": "0:40:06", "throughput": 19867.17, "total_tokens": 198014272}
|
|
{"current_steps": 62920, "total_steps": 78105, "loss": 0.0735, "lr": 5.54019308288335e-07, "epoch": 4.027911145253185, "percentage": 80.56, "elapsed_time": "2:46:07", "remaining_time": "0:40:05", "throughput": 19867.35, "total_tokens": 198029248}
|
|
{"current_steps": 62925, "total_steps": 78105, "loss": 0.1291, "lr": 5.536686471138298e-07, "epoch": 4.028231227194162, "percentage": 80.56, "elapsed_time": "2:46:08", "remaining_time": "0:40:04", "throughput": 19867.56, "total_tokens": 198044224}
|
|
{"current_steps": 62930, "total_steps": 78105, "loss": 0.1579, "lr": 5.533180831289748e-07, "epoch": 4.028551309135139, "percentage": 80.57, "elapsed_time": "2:46:08", "remaining_time": "0:40:03", "throughput": 19867.81, "total_tokens": 198060736}
|
|
{"current_steps": 62935, "total_steps": 78105, "loss": 0.1227, "lr": 5.529676163512748e-07, "epoch": 4.028871391076115, "percentage": 80.58, "elapsed_time": "2:46:09", "remaining_time": "0:40:03", "throughput": 19868.05, "total_tokens": 198076672}
|
|
{"current_steps": 62940, "total_steps": 78105, "loss": 0.1297, "lr": 5.526172467982305e-07, "epoch": 4.029191473017092, "percentage": 80.58, "elapsed_time": "2:46:10", "remaining_time": "0:40:02", "throughput": 19868.31, "total_tokens": 198093312}
|
|
{"current_steps": 62945, "total_steps": 78105, "loss": 0.1562, "lr": 5.522669744873372e-07, "epoch": 4.029511554958069, "percentage": 80.59, "elapsed_time": "2:46:10", "remaining_time": "0:40:01", "throughput": 19868.5, "total_tokens": 198108032}
|
|
{"current_steps": 62950, "total_steps": 78105, "loss": 0.1627, "lr": 5.519167994360858e-07, "epoch": 4.029831636899046, "percentage": 80.6, "elapsed_time": "2:46:11", "remaining_time": "0:40:00", "throughput": 19868.73, "total_tokens": 198123904}
|
|
{"current_steps": 62955, "total_steps": 78105, "loss": 0.117, "lr": 5.515667216619617e-07, "epoch": 4.030151718840023, "percentage": 80.6, "elapsed_time": "2:46:12", "remaining_time": "0:39:59", "throughput": 19868.98, "total_tokens": 198140160}
|
|
{"current_steps": 62960, "total_steps": 78105, "loss": 0.0972, "lr": 5.51216741182447e-07, "epoch": 4.030471800781, "percentage": 80.61, "elapsed_time": "2:46:13", "remaining_time": "0:39:59", "throughput": 19869.23, "total_tokens": 198156544}
|
|
{"current_steps": 62965, "total_steps": 78105, "loss": 0.0985, "lr": 5.508668580150175e-07, "epoch": 4.030791882721977, "percentage": 80.62, "elapsed_time": "2:46:13", "remaining_time": "0:39:58", "throughput": 19869.5, "total_tokens": 198173504}
|
|
{"current_steps": 62970, "total_steps": 78105, "loss": 0.1219, "lr": 5.505170721771447e-07, "epoch": 4.031111964662954, "percentage": 80.62, "elapsed_time": "2:46:14", "remaining_time": "0:39:57", "throughput": 19869.71, "total_tokens": 198188416}
|
|
{"current_steps": 62975, "total_steps": 78105, "loss": 0.1055, "lr": 5.501673836862945e-07, "epoch": 4.03143204660393, "percentage": 80.63, "elapsed_time": "2:46:15", "remaining_time": "0:39:56", "throughput": 19869.89, "total_tokens": 198202944}
|
|
{"current_steps": 62980, "total_steps": 78105, "loss": 0.1133, "lr": 5.498177925599291e-07, "epoch": 4.031752128544907, "percentage": 80.64, "elapsed_time": "2:46:15", "remaining_time": "0:39:55", "throughput": 19870.09, "total_tokens": 198218240}
|
|
{"current_steps": 62985, "total_steps": 78105, "loss": 0.105, "lr": 5.494682988155048e-07, "epoch": 4.032072210485884, "percentage": 80.64, "elapsed_time": "2:46:16", "remaining_time": "0:39:54", "throughput": 19870.33, "total_tokens": 198233984}
|
|
{"current_steps": 62990, "total_steps": 78105, "loss": 0.065, "lr": 5.491189024704729e-07, "epoch": 4.032392292426861, "percentage": 80.65, "elapsed_time": "2:46:17", "remaining_time": "0:39:54", "throughput": 19870.52, "total_tokens": 198248640}
|
|
{"current_steps": 62995, "total_steps": 78105, "loss": 0.1014, "lr": 5.487696035422829e-07, "epoch": 4.032712374367838, "percentage": 80.65, "elapsed_time": "2:46:17", "remaining_time": "0:39:53", "throughput": 19870.74, "total_tokens": 198264256}
|
|
{"current_steps": 63000, "total_steps": 78105, "loss": 0.0645, "lr": 5.484204020483739e-07, "epoch": 4.033032456308815, "percentage": 80.66, "elapsed_time": "2:46:18", "remaining_time": "0:39:52", "throughput": 19870.95, "total_tokens": 198279552}
|
|
{"current_steps": 63005, "total_steps": 78105, "loss": 0.1114, "lr": 5.480712980061858e-07, "epoch": 4.033352538249792, "percentage": 80.67, "elapsed_time": "2:46:19", "remaining_time": "0:39:51", "throughput": 19871.21, "total_tokens": 198296192}
|
|
{"current_steps": 63010, "total_steps": 78105, "loss": 0.1338, "lr": 5.477222914331487e-07, "epoch": 4.033672620190769, "percentage": 80.67, "elapsed_time": "2:46:19", "remaining_time": "0:39:50", "throughput": 19871.41, "total_tokens": 198310976}
|
|
{"current_steps": 63015, "total_steps": 78105, "loss": 0.0964, "lr": 5.47373382346692e-07, "epoch": 4.033992702131746, "percentage": 80.68, "elapsed_time": "2:46:20", "remaining_time": "0:39:49", "throughput": 19871.6, "total_tokens": 198325824}
|
|
{"current_steps": 63020, "total_steps": 78105, "loss": 0.1392, "lr": 5.470245707642375e-07, "epoch": 4.034312784072722, "percentage": 80.69, "elapsed_time": "2:46:21", "remaining_time": "0:39:49", "throughput": 19871.81, "total_tokens": 198341248}
|
|
{"current_steps": 63025, "total_steps": 78105, "loss": 0.1401, "lr": 5.466758567032027e-07, "epoch": 4.034632866013699, "percentage": 80.69, "elapsed_time": "2:46:21", "remaining_time": "0:39:48", "throughput": 19872.01, "total_tokens": 198356544}
|
|
{"current_steps": 63030, "total_steps": 78105, "loss": 0.1066, "lr": 5.463272401810025e-07, "epoch": 4.034952947954676, "percentage": 80.7, "elapsed_time": "2:46:22", "remaining_time": "0:39:47", "throughput": 19872.24, "total_tokens": 198372416}
|
|
{"current_steps": 63035, "total_steps": 78105, "loss": 0.1044, "lr": 5.459787212150422e-07, "epoch": 4.035273029895653, "percentage": 80.71, "elapsed_time": "2:46:23", "remaining_time": "0:39:46", "throughput": 19872.48, "total_tokens": 198388096}
|
|
{"current_steps": 63040, "total_steps": 78105, "loss": 0.1113, "lr": 5.456302998227273e-07, "epoch": 4.03559311183663, "percentage": 80.71, "elapsed_time": "2:46:23", "remaining_time": "0:39:45", "throughput": 19872.68, "total_tokens": 198403136}
|
|
{"current_steps": 63045, "total_steps": 78105, "loss": 0.1056, "lr": 5.452819760214551e-07, "epoch": 4.035913193777607, "percentage": 80.72, "elapsed_time": "2:46:24", "remaining_time": "0:39:45", "throughput": 19872.9, "total_tokens": 198418752}
|
|
{"current_steps": 63050, "total_steps": 78105, "loss": 0.1626, "lr": 5.449337498286195e-07, "epoch": 4.036233275718584, "percentage": 80.72, "elapsed_time": "2:46:25", "remaining_time": "0:39:44", "throughput": 19873.12, "total_tokens": 198434240}
|
|
{"current_steps": 63055, "total_steps": 78105, "loss": 0.1379, "lr": 5.445856212616085e-07, "epoch": 4.036553357659561, "percentage": 80.73, "elapsed_time": "2:46:25", "remaining_time": "0:39:43", "throughput": 19873.34, "total_tokens": 198449920}
|
|
{"current_steps": 63060, "total_steps": 78105, "loss": 0.1009, "lr": 5.442375903378064e-07, "epoch": 4.036873439600538, "percentage": 80.74, "elapsed_time": "2:46:26", "remaining_time": "0:39:42", "throughput": 19873.51, "total_tokens": 198464576}
|
|
{"current_steps": 63065, "total_steps": 78105, "loss": 0.21, "lr": 5.438896570745919e-07, "epoch": 4.037193521541514, "percentage": 80.74, "elapsed_time": "2:46:27", "remaining_time": "0:39:41", "throughput": 19873.73, "total_tokens": 198480192}
|
|
{"current_steps": 63070, "total_steps": 78105, "loss": 0.1761, "lr": 5.435418214893387e-07, "epoch": 4.037513603482491, "percentage": 80.75, "elapsed_time": "2:46:27", "remaining_time": "0:39:40", "throughput": 19874.02, "total_tokens": 198497408}
|
|
{"current_steps": 63075, "total_steps": 78105, "loss": 0.118, "lr": 5.431940835994156e-07, "epoch": 4.037833685423468, "percentage": 80.76, "elapsed_time": "2:46:28", "remaining_time": "0:39:40", "throughput": 19874.19, "total_tokens": 198511808}
|
|
{"current_steps": 63080, "total_steps": 78105, "loss": 0.1109, "lr": 5.42846443422188e-07, "epoch": 4.038153767364445, "percentage": 80.76, "elapsed_time": "2:46:29", "remaining_time": "0:39:39", "throughput": 19874.45, "total_tokens": 198528128}
|
|
{"current_steps": 63085, "total_steps": 78105, "loss": 0.1732, "lr": 5.424989009750145e-07, "epoch": 4.038473849305422, "percentage": 80.77, "elapsed_time": "2:46:29", "remaining_time": "0:39:38", "throughput": 19874.66, "total_tokens": 198543424}
|
|
{"current_steps": 63090, "total_steps": 78105, "loss": 0.085, "lr": 5.421514562752498e-07, "epoch": 4.038793931246399, "percentage": 80.78, "elapsed_time": "2:46:30", "remaining_time": "0:39:37", "throughput": 19874.83, "total_tokens": 198558144}
|
|
{"current_steps": 63095, "total_steps": 78105, "loss": 0.1585, "lr": 5.418041093402432e-07, "epoch": 4.039114013187376, "percentage": 80.78, "elapsed_time": "2:46:31", "remaining_time": "0:39:36", "throughput": 19875.06, "total_tokens": 198573760}
|
|
{"current_steps": 63100, "total_steps": 78105, "loss": 0.1162, "lr": 5.414568601873396e-07, "epoch": 4.039434095128353, "percentage": 80.79, "elapsed_time": "2:46:31", "remaining_time": "0:39:36", "throughput": 19875.24, "total_tokens": 198588224}
|
|
{"current_steps": 63105, "total_steps": 78105, "loss": 0.2928, "lr": 5.411097088338785e-07, "epoch": 4.03975417706933, "percentage": 80.8, "elapsed_time": "2:46:32", "remaining_time": "0:39:35", "throughput": 19875.52, "total_tokens": 198605312}
|
|
{"current_steps": 63110, "total_steps": 78105, "loss": 0.1135, "lr": 5.407626552971946e-07, "epoch": 4.040074259010306, "percentage": 80.8, "elapsed_time": "2:46:33", "remaining_time": "0:39:34", "throughput": 19875.73, "total_tokens": 198621248}
|
|
{"current_steps": 63115, "total_steps": 78105, "loss": 0.1772, "lr": 5.404156995946197e-07, "epoch": 4.040394340951283, "percentage": 80.81, "elapsed_time": "2:46:33", "remaining_time": "0:39:33", "throughput": 19875.98, "total_tokens": 198637440}
|
|
{"current_steps": 63120, "total_steps": 78105, "loss": 0.1699, "lr": 5.400688417434763e-07, "epoch": 4.04071442289226, "percentage": 80.81, "elapsed_time": "2:46:34", "remaining_time": "0:39:32", "throughput": 19876.17, "total_tokens": 198652608}
|
|
{"current_steps": 63125, "total_steps": 78105, "loss": 0.1354, "lr": 5.397220817610866e-07, "epoch": 4.041034504833237, "percentage": 80.82, "elapsed_time": "2:46:35", "remaining_time": "0:39:31", "throughput": 19876.42, "total_tokens": 198668992}
|
|
{"current_steps": 63130, "total_steps": 78105, "loss": 0.1053, "lr": 5.393754196647655e-07, "epoch": 4.041354586774214, "percentage": 80.83, "elapsed_time": "2:46:35", "remaining_time": "0:39:31", "throughput": 19876.62, "total_tokens": 198684160}
|
|
{"current_steps": 63135, "total_steps": 78105, "loss": 0.134, "lr": 5.390288554718229e-07, "epoch": 4.041674668715191, "percentage": 80.83, "elapsed_time": "2:46:36", "remaining_time": "0:39:30", "throughput": 19876.85, "total_tokens": 198700160}
|
|
{"current_steps": 63140, "total_steps": 78105, "loss": 0.1397, "lr": 5.38682389199566e-07, "epoch": 4.041994750656168, "percentage": 80.84, "elapsed_time": "2:46:37", "remaining_time": "0:39:29", "throughput": 19877.05, "total_tokens": 198715328}
|
|
{"current_steps": 63145, "total_steps": 78105, "loss": 0.1005, "lr": 5.383360208652933e-07, "epoch": 4.042314832597145, "percentage": 80.85, "elapsed_time": "2:46:37", "remaining_time": "0:39:28", "throughput": 19877.25, "total_tokens": 198730624}
|
|
{"current_steps": 63150, "total_steps": 78105, "loss": 0.1437, "lr": 5.37989750486303e-07, "epoch": 4.042634914538122, "percentage": 80.85, "elapsed_time": "2:46:38", "remaining_time": "0:39:27", "throughput": 19877.44, "total_tokens": 198745280}
|
|
{"current_steps": 63155, "total_steps": 78105, "loss": 0.1493, "lr": 5.376435780798836e-07, "epoch": 4.042954996479098, "percentage": 80.86, "elapsed_time": "2:46:39", "remaining_time": "0:39:27", "throughput": 19877.71, "total_tokens": 198761792}
|
|
{"current_steps": 63160, "total_steps": 78105, "loss": 0.1642, "lr": 5.372975036633229e-07, "epoch": 4.043275078420075, "percentage": 80.87, "elapsed_time": "2:46:39", "remaining_time": "0:39:26", "throughput": 19877.99, "total_tokens": 198778560}
|
|
{"current_steps": 63165, "total_steps": 78105, "loss": 0.0871, "lr": 5.369515272539017e-07, "epoch": 4.043595160361052, "percentage": 80.87, "elapsed_time": "2:46:40", "remaining_time": "0:39:25", "throughput": 19878.21, "total_tokens": 198793792}
|
|
{"current_steps": 63170, "total_steps": 78105, "loss": 0.1275, "lr": 5.366056488688958e-07, "epoch": 4.043915242302029, "percentage": 80.88, "elapsed_time": "2:46:41", "remaining_time": "0:39:24", "throughput": 19878.46, "total_tokens": 198810304}
|
|
{"current_steps": 63175, "total_steps": 78105, "loss": 0.1562, "lr": 5.362598685255771e-07, "epoch": 4.044235324243006, "percentage": 80.88, "elapsed_time": "2:46:41", "remaining_time": "0:39:23", "throughput": 19878.69, "total_tokens": 198826240}
|
|
{"current_steps": 63180, "total_steps": 78105, "loss": 0.1165, "lr": 5.359141862412118e-07, "epoch": 4.044555406183983, "percentage": 80.89, "elapsed_time": "2:46:42", "remaining_time": "0:39:22", "throughput": 19878.89, "total_tokens": 198841600}
|
|
{"current_steps": 63185, "total_steps": 78105, "loss": 0.1027, "lr": 5.355686020330608e-07, "epoch": 4.04487548812496, "percentage": 80.9, "elapsed_time": "2:46:43", "remaining_time": "0:39:22", "throughput": 19879.12, "total_tokens": 198857280}
|
|
{"current_steps": 63190, "total_steps": 78105, "loss": 0.1533, "lr": 5.352231159183821e-07, "epoch": 4.045195570065937, "percentage": 80.9, "elapsed_time": "2:46:44", "remaining_time": "0:39:21", "throughput": 19879.43, "total_tokens": 198875008}
|
|
{"current_steps": 63195, "total_steps": 78105, "loss": 0.1865, "lr": 5.348777279144269e-07, "epoch": 4.045515652006914, "percentage": 80.91, "elapsed_time": "2:46:44", "remaining_time": "0:39:20", "throughput": 19879.62, "total_tokens": 198889792}
|
|
{"current_steps": 63200, "total_steps": 78105, "loss": 0.1483, "lr": 5.34532438038442e-07, "epoch": 4.04583573394789, "percentage": 80.92, "elapsed_time": "2:46:45", "remaining_time": "0:39:19", "throughput": 19879.81, "total_tokens": 198904384}
|
|
{"current_steps": 63205, "total_steps": 78105, "loss": 0.1277, "lr": 5.341872463076695e-07, "epoch": 4.046155815888867, "percentage": 80.92, "elapsed_time": "2:46:46", "remaining_time": "0:39:18", "throughput": 19880.07, "total_tokens": 198921152}
|
|
{"current_steps": 63210, "total_steps": 78105, "loss": 0.1328, "lr": 5.338421527393461e-07, "epoch": 4.046475897829844, "percentage": 80.93, "elapsed_time": "2:46:46", "remaining_time": "0:39:18", "throughput": 19880.4, "total_tokens": 198939264}
|
|
{"current_steps": 63215, "total_steps": 78105, "loss": 0.1319, "lr": 5.334971573507045e-07, "epoch": 4.046795979770821, "percentage": 80.94, "elapsed_time": "2:46:47", "remaining_time": "0:39:17", "throughput": 19880.66, "total_tokens": 198955584}
|
|
{"current_steps": 63220, "total_steps": 78105, "loss": 0.1289, "lr": 5.331522601589709e-07, "epoch": 4.047116061711798, "percentage": 80.94, "elapsed_time": "2:46:48", "remaining_time": "0:39:16", "throughput": 19880.88, "total_tokens": 198970880}
|
|
{"current_steps": 63225, "total_steps": 78105, "loss": 0.118, "lr": 5.328074611813699e-07, "epoch": 4.047436143652775, "percentage": 80.95, "elapsed_time": "2:46:48", "remaining_time": "0:39:15", "throughput": 19881.11, "total_tokens": 198986624}
|
|
{"current_steps": 63230, "total_steps": 78105, "loss": 0.1392, "lr": 5.324627604351163e-07, "epoch": 4.047756225593752, "percentage": 80.96, "elapsed_time": "2:46:49", "remaining_time": "0:39:14", "throughput": 19881.33, "total_tokens": 199002112}
|
|
{"current_steps": 63235, "total_steps": 78105, "loss": 0.0864, "lr": 5.321181579374252e-07, "epoch": 4.048076307534729, "percentage": 80.96, "elapsed_time": "2:46:50", "remaining_time": "0:39:13", "throughput": 19881.54, "total_tokens": 199017600}
|
|
{"current_steps": 63240, "total_steps": 78105, "loss": 0.1273, "lr": 5.317736537055012e-07, "epoch": 4.048396389475705, "percentage": 80.97, "elapsed_time": "2:46:50", "remaining_time": "0:39:13", "throughput": 19881.77, "total_tokens": 199033536}
|
|
{"current_steps": 63245, "total_steps": 78105, "loss": 0.1282, "lr": 5.314292477565499e-07, "epoch": 4.048716471416682, "percentage": 80.97, "elapsed_time": "2:46:51", "remaining_time": "0:39:12", "throughput": 19881.96, "total_tokens": 199048192}
|
|
{"current_steps": 63250, "total_steps": 78105, "loss": 0.1739, "lr": 5.310849401077683e-07, "epoch": 4.049036553357659, "percentage": 80.98, "elapsed_time": "2:46:52", "remaining_time": "0:39:11", "throughput": 19882.21, "total_tokens": 199064192}
|
|
{"current_steps": 63255, "total_steps": 78105, "loss": 0.1056, "lr": 5.307407307763479e-07, "epoch": 4.049356635298636, "percentage": 80.99, "elapsed_time": "2:46:52", "remaining_time": "0:39:10", "throughput": 19882.44, "total_tokens": 199080256}
|
|
{"current_steps": 63260, "total_steps": 78105, "loss": 0.1282, "lr": 5.303966197794797e-07, "epoch": 4.049676717239613, "percentage": 80.99, "elapsed_time": "2:46:53", "remaining_time": "0:39:09", "throughput": 19882.67, "total_tokens": 199096192}
|
|
{"current_steps": 63265, "total_steps": 78105, "loss": 0.1329, "lr": 5.300526071343434e-07, "epoch": 4.04999679918059, "percentage": 81.0, "elapsed_time": "2:46:54", "remaining_time": "0:39:09", "throughput": 19882.89, "total_tokens": 199112000}
|
|
{"current_steps": 63270, "total_steps": 78105, "loss": 0.1424, "lr": 5.297086928581196e-07, "epoch": 4.050316881121567, "percentage": 81.01, "elapsed_time": "2:46:54", "remaining_time": "0:39:08", "throughput": 19883.09, "total_tokens": 199127040}
|
|
{"current_steps": 63275, "total_steps": 78105, "loss": 0.1404, "lr": 5.293648769679807e-07, "epoch": 4.050636963062544, "percentage": 81.01, "elapsed_time": "2:46:55", "remaining_time": "0:39:07", "throughput": 19883.32, "total_tokens": 199142720}
|
|
{"current_steps": 63280, "total_steps": 78105, "loss": 0.1317, "lr": 5.290211594810954e-07, "epoch": 4.050957045003521, "percentage": 81.02, "elapsed_time": "2:46:56", "remaining_time": "0:39:06", "throughput": 19883.56, "total_tokens": 199158848}
|
|
{"current_steps": 63285, "total_steps": 78105, "loss": 0.1368, "lr": 5.286775404146266e-07, "epoch": 4.051277126944497, "percentage": 81.03, "elapsed_time": "2:46:56", "remaining_time": "0:39:05", "throughput": 19883.77, "total_tokens": 199173760}
|
|
{"current_steps": 63290, "total_steps": 78105, "loss": 0.1028, "lr": 5.283340197857334e-07, "epoch": 4.051597208885474, "percentage": 81.03, "elapsed_time": "2:46:57", "remaining_time": "0:39:04", "throughput": 19883.97, "total_tokens": 199188608}
|
|
{"current_steps": 63295, "total_steps": 78105, "loss": 0.1147, "lr": 5.279905976115693e-07, "epoch": 4.051917290826451, "percentage": 81.04, "elapsed_time": "2:46:58", "remaining_time": "0:39:04", "throughput": 19884.15, "total_tokens": 199203136}
|
|
{"current_steps": 63300, "total_steps": 78105, "loss": 0.1054, "lr": 5.276472739092828e-07, "epoch": 4.052237372767428, "percentage": 81.04, "elapsed_time": "2:46:58", "remaining_time": "0:39:03", "throughput": 19884.38, "total_tokens": 199219008}
|
|
{"current_steps": 63305, "total_steps": 78105, "loss": 0.1534, "lr": 5.273040486960171e-07, "epoch": 4.052557454708405, "percentage": 81.05, "elapsed_time": "2:46:59", "remaining_time": "0:39:02", "throughput": 19884.59, "total_tokens": 199234624}
|
|
{"current_steps": 63310, "total_steps": 78105, "loss": 0.1307, "lr": 5.269609219889127e-07, "epoch": 4.052877536649382, "percentage": 81.06, "elapsed_time": "2:47:00", "remaining_time": "0:39:01", "throughput": 19884.78, "total_tokens": 199249408}
|
|
{"current_steps": 63315, "total_steps": 78105, "loss": 0.1606, "lr": 5.266178938051026e-07, "epoch": 4.053197618590359, "percentage": 81.06, "elapsed_time": "2:47:00", "remaining_time": "0:39:00", "throughput": 19884.96, "total_tokens": 199263872}
|
|
{"current_steps": 63320, "total_steps": 78105, "loss": 0.1286, "lr": 5.262749641617157e-07, "epoch": 4.053517700531336, "percentage": 81.07, "elapsed_time": "2:47:01", "remaining_time": "0:38:59", "throughput": 19885.18, "total_tokens": 199279936}
|
|
{"current_steps": 63325, "total_steps": 78105, "loss": 0.1539, "lr": 5.259321330758766e-07, "epoch": 4.053837782472313, "percentage": 81.08, "elapsed_time": "2:47:02", "remaining_time": "0:38:59", "throughput": 19885.4, "total_tokens": 199295552}
|
|
{"current_steps": 63330, "total_steps": 78105, "loss": 0.0866, "lr": 5.255894005647033e-07, "epoch": 4.054157864413289, "percentage": 81.08, "elapsed_time": "2:47:02", "remaining_time": "0:38:58", "throughput": 19885.6, "total_tokens": 199311104}
|
|
{"current_steps": 63335, "total_steps": 78105, "loss": 0.113, "lr": 5.252467666453126e-07, "epoch": 4.054477946354266, "percentage": 81.09, "elapsed_time": "2:47:03", "remaining_time": "0:38:57", "throughput": 19885.81, "total_tokens": 199326656}
|
|
{"current_steps": 63340, "total_steps": 78105, "loss": 0.0873, "lr": 5.249042313348107e-07, "epoch": 4.054798028295243, "percentage": 81.1, "elapsed_time": "2:47:04", "remaining_time": "0:38:56", "throughput": 19886.01, "total_tokens": 199341760}
|
|
{"current_steps": 63345, "total_steps": 78105, "loss": 0.1428, "lr": 5.245617946503051e-07, "epoch": 4.05511811023622, "percentage": 81.1, "elapsed_time": "2:47:04", "remaining_time": "0:38:55", "throughput": 19886.2, "total_tokens": 199356480}
|
|
{"current_steps": 63350, "total_steps": 78105, "loss": 0.1471, "lr": 5.242194566088923e-07, "epoch": 4.055438192177197, "percentage": 81.11, "elapsed_time": "2:47:05", "remaining_time": "0:38:55", "throughput": 19886.41, "total_tokens": 199371712}
|
|
{"current_steps": 63355, "total_steps": 78105, "loss": 0.138, "lr": 5.238772172276693e-07, "epoch": 4.055758274118174, "percentage": 81.12, "elapsed_time": "2:47:06", "remaining_time": "0:38:54", "throughput": 19886.61, "total_tokens": 199387200}
|
|
{"current_steps": 63360, "total_steps": 78105, "loss": 0.1257, "lr": 5.235350765237246e-07, "epoch": 4.056078356059151, "percentage": 81.12, "elapsed_time": "2:47:06", "remaining_time": "0:38:53", "throughput": 19886.86, "total_tokens": 199403776}
|
|
{"current_steps": 63365, "total_steps": 78105, "loss": 0.1356, "lr": 5.231930345141431e-07, "epoch": 4.056398438000128, "percentage": 81.13, "elapsed_time": "2:47:07", "remaining_time": "0:38:52", "throughput": 19887.07, "total_tokens": 199419200}
|
|
{"current_steps": 63370, "total_steps": 78105, "loss": 0.1023, "lr": 5.228510912160048e-07, "epoch": 4.056718519941105, "percentage": 81.13, "elapsed_time": "2:47:08", "remaining_time": "0:38:51", "throughput": 19887.3, "total_tokens": 199434752}
|
|
{"current_steps": 63375, "total_steps": 78105, "loss": 0.1009, "lr": 5.225092466463838e-07, "epoch": 4.057038601882081, "percentage": 81.14, "elapsed_time": "2:47:08", "remaining_time": "0:38:50", "throughput": 19887.53, "total_tokens": 199450816}
|
|
{"current_steps": 63380, "total_steps": 78105, "loss": 0.1229, "lr": 5.221675008223517e-07, "epoch": 4.057358683823058, "percentage": 81.15, "elapsed_time": "2:47:09", "remaining_time": "0:38:50", "throughput": 19887.74, "total_tokens": 199466496}
|
|
{"current_steps": 63385, "total_steps": 78105, "loss": 0.0859, "lr": 5.218258537609713e-07, "epoch": 4.057678765764035, "percentage": 81.15, "elapsed_time": "2:47:10", "remaining_time": "0:38:49", "throughput": 19887.96, "total_tokens": 199482304}
|
|
{"current_steps": 63390, "total_steps": 78105, "loss": 0.0956, "lr": 5.214843054793045e-07, "epoch": 4.057998847705012, "percentage": 81.16, "elapsed_time": "2:47:10", "remaining_time": "0:38:48", "throughput": 19888.19, "total_tokens": 199498368}
|
|
{"current_steps": 63395, "total_steps": 78105, "loss": 0.0836, "lr": 5.21142855994406e-07, "epoch": 4.058318929645989, "percentage": 81.17, "elapsed_time": "2:47:11", "remaining_time": "0:38:47", "throughput": 19888.43, "total_tokens": 199514304}
|
|
{"current_steps": 63400, "total_steps": 78105, "loss": 0.1224, "lr": 5.208015053233253e-07, "epoch": 4.058639011586966, "percentage": 81.17, "elapsed_time": "2:47:12", "remaining_time": "0:38:46", "throughput": 19888.66, "total_tokens": 199530048}
|
|
{"current_steps": 63405, "total_steps": 78105, "loss": 0.1053, "lr": 5.204602534831085e-07, "epoch": 4.058959093527943, "percentage": 81.18, "elapsed_time": "2:47:12", "remaining_time": "0:38:46", "throughput": 19888.82, "total_tokens": 199544512}
|
|
{"current_steps": 63410, "total_steps": 78105, "loss": 0.1026, "lr": 5.201191004907955e-07, "epoch": 4.05927917546892, "percentage": 81.19, "elapsed_time": "2:47:13", "remaining_time": "0:38:45", "throughput": 19889.02, "total_tokens": 199559616}
|
|
{"current_steps": 63415, "total_steps": 78105, "loss": 0.1402, "lr": 5.197780463634211e-07, "epoch": 4.059599257409897, "percentage": 81.19, "elapsed_time": "2:47:14", "remaining_time": "0:38:44", "throughput": 19889.24, "total_tokens": 199575488}
|
|
{"current_steps": 63420, "total_steps": 78105, "loss": 0.1276, "lr": 5.194370911180177e-07, "epoch": 4.0599193393508735, "percentage": 81.2, "elapsed_time": "2:47:15", "remaining_time": "0:38:43", "throughput": 19889.46, "total_tokens": 199591232}
|
|
{"current_steps": 63425, "total_steps": 78105, "loss": 0.1628, "lr": 5.190962347716086e-07, "epoch": 4.0602394212918504, "percentage": 81.2, "elapsed_time": "2:47:15", "remaining_time": "0:38:42", "throughput": 19889.71, "total_tokens": 199607232}
|
|
{"current_steps": 63430, "total_steps": 78105, "loss": 0.1143, "lr": 5.187554773412157e-07, "epoch": 4.060559503232827, "percentage": 81.21, "elapsed_time": "2:47:16", "remaining_time": "0:38:41", "throughput": 19889.88, "total_tokens": 199621760}
|
|
{"current_steps": 63435, "total_steps": 78105, "loss": 0.0886, "lr": 5.184148188438545e-07, "epoch": 4.060879585173804, "percentage": 81.22, "elapsed_time": "2:47:17", "remaining_time": "0:38:41", "throughput": 19890.14, "total_tokens": 199638016}
|
|
{"current_steps": 63440, "total_steps": 78105, "loss": 0.1261, "lr": 5.180742592965354e-07, "epoch": 4.061199667114781, "percentage": 81.22, "elapsed_time": "2:47:17", "remaining_time": "0:38:40", "throughput": 19890.36, "total_tokens": 199653376}
|
|
{"current_steps": 63445, "total_steps": 78105, "loss": 0.1589, "lr": 5.177337987162645e-07, "epoch": 4.061519749055758, "percentage": 81.23, "elapsed_time": "2:47:18", "remaining_time": "0:38:39", "throughput": 19890.71, "total_tokens": 199671680}
|
|
{"current_steps": 63450, "total_steps": 78105, "loss": 0.1513, "lr": 5.173934371200418e-07, "epoch": 4.061839830996735, "percentage": 81.24, "elapsed_time": "2:47:19", "remaining_time": "0:38:38", "throughput": 19891.0, "total_tokens": 199689408}
|
|
{"current_steps": 63455, "total_steps": 78105, "loss": 0.1684, "lr": 5.17053174524865e-07, "epoch": 4.062159912937712, "percentage": 81.24, "elapsed_time": "2:47:19", "remaining_time": "0:38:37", "throughput": 19891.2, "total_tokens": 199704576}
|
|
{"current_steps": 63460, "total_steps": 78105, "loss": 0.1274, "lr": 5.167130109477228e-07, "epoch": 4.062479994878689, "percentage": 81.25, "elapsed_time": "2:47:20", "remaining_time": "0:38:37", "throughput": 19891.55, "total_tokens": 199723200}
|
|
{"current_steps": 63465, "total_steps": 78105, "loss": 0.0847, "lr": 5.163729464056033e-07, "epoch": 4.0628000768196655, "percentage": 81.26, "elapsed_time": "2:47:21", "remaining_time": "0:38:36", "throughput": 19891.71, "total_tokens": 199737344}
|
|
{"current_steps": 63470, "total_steps": 78105, "loss": 0.1188, "lr": 5.160329809154854e-07, "epoch": 4.0631201587606425, "percentage": 81.26, "elapsed_time": "2:47:21", "remaining_time": "0:38:35", "throughput": 19891.93, "total_tokens": 199753152}
|
|
{"current_steps": 63475, "total_steps": 78105, "loss": 0.1069, "lr": 5.156931144943466e-07, "epoch": 4.0634402407016195, "percentage": 81.27, "elapsed_time": "2:47:22", "remaining_time": "0:38:34", "throughput": 19892.1, "total_tokens": 199767808}
|
|
{"current_steps": 63480, "total_steps": 78105, "loss": 0.1627, "lr": 5.153533471591582e-07, "epoch": 4.0637603226425965, "percentage": 81.28, "elapsed_time": "2:47:23", "remaining_time": "0:38:33", "throughput": 19892.27, "total_tokens": 199782080}
|
|
{"current_steps": 63485, "total_steps": 78105, "loss": 0.1198, "lr": 5.150136789268853e-07, "epoch": 4.0640804045835734, "percentage": 81.28, "elapsed_time": "2:47:23", "remaining_time": "0:38:33", "throughput": 19892.48, "total_tokens": 199797312}
|
|
{"current_steps": 63490, "total_steps": 78105, "loss": 0.2233, "lr": 5.146741098144911e-07, "epoch": 4.06440048652455, "percentage": 81.29, "elapsed_time": "2:47:24", "remaining_time": "0:38:32", "throughput": 19892.68, "total_tokens": 199812544}
|
|
{"current_steps": 63495, "total_steps": 78105, "loss": 0.1031, "lr": 5.143346398389292e-07, "epoch": 4.064720568465527, "percentage": 81.29, "elapsed_time": "2:47:25", "remaining_time": "0:38:31", "throughput": 19892.88, "total_tokens": 199827776}
|
|
{"current_steps": 63500, "total_steps": 78105, "loss": 0.1403, "lr": 5.139952690171534e-07, "epoch": 4.065040650406504, "percentage": 81.3, "elapsed_time": "2:47:25", "remaining_time": "0:38:30", "throughput": 19893.09, "total_tokens": 199843264}
|
|
{"current_steps": 63505, "total_steps": 78105, "loss": 0.116, "lr": 5.13655997366109e-07, "epoch": 4.0653607323474805, "percentage": 81.31, "elapsed_time": "2:47:26", "remaining_time": "0:38:29", "throughput": 19893.3, "total_tokens": 199858752}
|
|
{"current_steps": 63510, "total_steps": 78105, "loss": 0.1519, "lr": 5.133168249027376e-07, "epoch": 4.0656808142884575, "percentage": 81.31, "elapsed_time": "2:47:27", "remaining_time": "0:38:28", "throughput": 19893.51, "total_tokens": 199874304}
|
|
{"current_steps": 63515, "total_steps": 78105, "loss": 0.1366, "lr": 5.129777516439757e-07, "epoch": 4.0660008962294345, "percentage": 81.32, "elapsed_time": "2:47:27", "remaining_time": "0:38:28", "throughput": 19893.79, "total_tokens": 199890944}
|
|
{"current_steps": 63520, "total_steps": 78105, "loss": 0.1564, "lr": 5.126387776067548e-07, "epoch": 4.0663209781704115, "percentage": 81.33, "elapsed_time": "2:47:28", "remaining_time": "0:38:27", "throughput": 19893.98, "total_tokens": 199905920}
|
|
{"current_steps": 63525, "total_steps": 78105, "loss": 0.1467, "lr": 5.122999028080017e-07, "epoch": 4.0666410601113885, "percentage": 81.33, "elapsed_time": "2:47:29", "remaining_time": "0:38:26", "throughput": 19894.22, "total_tokens": 199921728}
|
|
{"current_steps": 63530, "total_steps": 78105, "loss": 0.132, "lr": 5.119611272646377e-07, "epoch": 4.0669611420523655, "percentage": 81.34, "elapsed_time": "2:47:29", "remaining_time": "0:38:25", "throughput": 19894.41, "total_tokens": 199936896}
|
|
{"current_steps": 63535, "total_steps": 78105, "loss": 0.1072, "lr": 5.116224509935791e-07, "epoch": 4.0672812239933425, "percentage": 81.35, "elapsed_time": "2:47:30", "remaining_time": "0:38:24", "throughput": 19894.6, "total_tokens": 199951808}
|
|
{"current_steps": 63540, "total_steps": 78105, "loss": 0.1189, "lr": 5.112838740117396e-07, "epoch": 4.0676013059343195, "percentage": 81.35, "elapsed_time": "2:47:31", "remaining_time": "0:38:24", "throughput": 19894.81, "total_tokens": 199967360}
|
|
{"current_steps": 63545, "total_steps": 78105, "loss": 0.1474, "lr": 5.109453963360234e-07, "epoch": 4.0679213878752964, "percentage": 81.36, "elapsed_time": "2:47:31", "remaining_time": "0:38:23", "throughput": 19895.04, "total_tokens": 199983360}
|
|
{"current_steps": 63550, "total_steps": 78105, "loss": 0.1933, "lr": 5.106070179833339e-07, "epoch": 4.0682414698162725, "percentage": 81.36, "elapsed_time": "2:47:32", "remaining_time": "0:38:22", "throughput": 19895.26, "total_tokens": 199999040}
|
|
{"current_steps": 63555, "total_steps": 78105, "loss": 0.1199, "lr": 5.102687389705676e-07, "epoch": 4.0685615517572495, "percentage": 81.37, "elapsed_time": "2:47:33", "remaining_time": "0:38:21", "throughput": 19895.51, "total_tokens": 200015104}
|
|
{"current_steps": 63560, "total_steps": 78105, "loss": 0.1405, "lr": 5.099305593146159e-07, "epoch": 4.0688816336982265, "percentage": 81.38, "elapsed_time": "2:47:33", "remaining_time": "0:38:20", "throughput": 19895.74, "total_tokens": 200031168}
|
|
{"current_steps": 63565, "total_steps": 78105, "loss": 0.1306, "lr": 5.095924790323672e-07, "epoch": 4.0692017156392035, "percentage": 81.38, "elapsed_time": "2:47:34", "remaining_time": "0:38:19", "throughput": 19895.94, "total_tokens": 200046208}
|
|
{"current_steps": 63570, "total_steps": 78105, "loss": 0.1384, "lr": 5.092544981407013e-07, "epoch": 4.0695217975801805, "percentage": 81.39, "elapsed_time": "2:47:35", "remaining_time": "0:38:19", "throughput": 19896.15, "total_tokens": 200061696}
|
|
{"current_steps": 63575, "total_steps": 78105, "loss": 0.1234, "lr": 5.089166166564977e-07, "epoch": 4.0698418795211575, "percentage": 81.4, "elapsed_time": "2:47:35", "remaining_time": "0:38:18", "throughput": 19896.35, "total_tokens": 200076992}
|
|
{"current_steps": 63580, "total_steps": 78105, "loss": 0.1477, "lr": 5.085788345966255e-07, "epoch": 4.0701619614621345, "percentage": 81.4, "elapsed_time": "2:47:36", "remaining_time": "0:38:17", "throughput": 19896.55, "total_tokens": 200092224}
|
|
{"current_steps": 63585, "total_steps": 78105, "loss": 0.1053, "lr": 5.082411519779542e-07, "epoch": 4.0704820434031115, "percentage": 81.41, "elapsed_time": "2:47:37", "remaining_time": "0:38:16", "throughput": 19896.77, "total_tokens": 200107776}
|
|
{"current_steps": 63590, "total_steps": 78105, "loss": 0.0908, "lr": 5.079035688173454e-07, "epoch": 4.0708021253440885, "percentage": 81.42, "elapsed_time": "2:47:37", "remaining_time": "0:38:15", "throughput": 19896.99, "total_tokens": 200123136}
|
|
{"current_steps": 63595, "total_steps": 78105, "loss": 0.1172, "lr": 5.075660851316555e-07, "epoch": 4.071122207285065, "percentage": 81.42, "elapsed_time": "2:47:38", "remaining_time": "0:38:15", "throughput": 19897.2, "total_tokens": 200138688}
|
|
{"current_steps": 63600, "total_steps": 78105, "loss": 0.0586, "lr": 5.072287009377372e-07, "epoch": 4.071442289226042, "percentage": 81.43, "elapsed_time": "2:47:39", "remaining_time": "0:38:14", "throughput": 19897.47, "total_tokens": 200155456}
|
|
{"current_steps": 63605, "total_steps": 78105, "loss": 0.1135, "lr": 5.068914162524368e-07, "epoch": 4.071762371167019, "percentage": 81.44, "elapsed_time": "2:47:40", "remaining_time": "0:38:13", "throughput": 19897.65, "total_tokens": 200170560}
|
|
{"current_steps": 63610, "total_steps": 78105, "loss": 0.1052, "lr": 5.065542310925992e-07, "epoch": 4.0720824531079955, "percentage": 81.44, "elapsed_time": "2:47:40", "remaining_time": "0:38:12", "throughput": 19897.87, "total_tokens": 200186048}
|
|
{"current_steps": 63615, "total_steps": 78105, "loss": 0.1529, "lr": 5.062171454750581e-07, "epoch": 4.0724025350489725, "percentage": 81.45, "elapsed_time": "2:47:41", "remaining_time": "0:38:11", "throughput": 19898.08, "total_tokens": 200201216}
|
|
{"current_steps": 63620, "total_steps": 78105, "loss": 0.1382, "lr": 5.058801594166482e-07, "epoch": 4.0727226169899495, "percentage": 81.45, "elapsed_time": "2:47:42", "remaining_time": "0:38:10", "throughput": 19898.28, "total_tokens": 200216576}
|
|
{"current_steps": 63625, "total_steps": 78105, "loss": 0.1454, "lr": 5.05543272934196e-07, "epoch": 4.0730426989309265, "percentage": 81.46, "elapsed_time": "2:47:42", "remaining_time": "0:38:10", "throughput": 19898.51, "total_tokens": 200232640}
|
|
{"current_steps": 63630, "total_steps": 78105, "loss": 0.1427, "lr": 5.052064860445241e-07, "epoch": 4.0733627808719035, "percentage": 81.47, "elapsed_time": "2:47:43", "remaining_time": "0:38:09", "throughput": 19898.75, "total_tokens": 200249088}
|
|
{"current_steps": 63635, "total_steps": 78105, "loss": 0.1248, "lr": 5.048697987644499e-07, "epoch": 4.0736828628128805, "percentage": 81.47, "elapsed_time": "2:47:44", "remaining_time": "0:38:08", "throughput": 19898.95, "total_tokens": 200264320}
|
|
{"current_steps": 63640, "total_steps": 78105, "loss": 0.1284, "lr": 5.045332111107856e-07, "epoch": 4.074002944753857, "percentage": 81.48, "elapsed_time": "2:47:44", "remaining_time": "0:38:07", "throughput": 19899.2, "total_tokens": 200280832}
|
|
{"current_steps": 63645, "total_steps": 78105, "loss": 0.157, "lr": 5.041967231003383e-07, "epoch": 4.074323026694834, "percentage": 81.49, "elapsed_time": "2:47:45", "remaining_time": "0:38:06", "throughput": 19899.49, "total_tokens": 200298048}
|
|
{"current_steps": 63650, "total_steps": 78105, "loss": 0.1335, "lr": 5.038603347499121e-07, "epoch": 4.074643108635811, "percentage": 81.49, "elapsed_time": "2:47:46", "remaining_time": "0:38:06", "throughput": 19899.68, "total_tokens": 200312768}
|
|
{"current_steps": 63655, "total_steps": 78105, "loss": 0.1238, "lr": 5.035240460763019e-07, "epoch": 4.074963190576788, "percentage": 81.5, "elapsed_time": "2:47:46", "remaining_time": "0:38:05", "throughput": 19899.9, "total_tokens": 200328512}
|
|
{"current_steps": 63660, "total_steps": 78105, "loss": 0.1152, "lr": 5.031878570963019e-07, "epoch": 4.075283272517765, "percentage": 81.51, "elapsed_time": "2:47:47", "remaining_time": "0:38:04", "throughput": 19900.16, "total_tokens": 200345152}
|
|
{"current_steps": 63665, "total_steps": 78105, "loss": 0.1495, "lr": 5.028517678266997e-07, "epoch": 4.075603354458742, "percentage": 81.51, "elapsed_time": "2:47:48", "remaining_time": "0:38:03", "throughput": 19900.37, "total_tokens": 200360832}
|
|
{"current_steps": 63670, "total_steps": 78105, "loss": 0.1661, "lr": 5.025157782842774e-07, "epoch": 4.0759234363997185, "percentage": 81.52, "elapsed_time": "2:47:48", "remaining_time": "0:38:02", "throughput": 19900.61, "total_tokens": 200376960}
|
|
{"current_steps": 63675, "total_steps": 78105, "loss": 0.1434, "lr": 5.021798884858126e-07, "epoch": 4.0762435183406955, "percentage": 81.52, "elapsed_time": "2:47:49", "remaining_time": "0:38:01", "throughput": 19900.8, "total_tokens": 200392064}
|
|
{"current_steps": 63680, "total_steps": 78105, "loss": 0.1084, "lr": 5.018440984480775e-07, "epoch": 4.0765636002816725, "percentage": 81.53, "elapsed_time": "2:47:50", "remaining_time": "0:38:01", "throughput": 19901.03, "total_tokens": 200407936}
|
|
{"current_steps": 63685, "total_steps": 78105, "loss": 0.1253, "lr": 5.01508408187841e-07, "epoch": 4.076883682222649, "percentage": 81.54, "elapsed_time": "2:47:50", "remaining_time": "0:38:00", "throughput": 19901.27, "total_tokens": 200424192}
|
|
{"current_steps": 63690, "total_steps": 78105, "loss": 0.1007, "lr": 5.011728177218636e-07, "epoch": 4.077203764163626, "percentage": 81.54, "elapsed_time": "2:47:51", "remaining_time": "0:37:59", "throughput": 19901.5, "total_tokens": 200440576}
|
|
{"current_steps": 63695, "total_steps": 78105, "loss": 0.1397, "lr": 5.008373270669048e-07, "epoch": 4.077523846104603, "percentage": 81.55, "elapsed_time": "2:47:52", "remaining_time": "0:37:58", "throughput": 19901.69, "total_tokens": 200455680}
|
|
{"current_steps": 63700, "total_steps": 78105, "loss": 0.1862, "lr": 5.005019362397167e-07, "epoch": 4.07784392804558, "percentage": 81.56, "elapsed_time": "2:47:52", "remaining_time": "0:37:57", "throughput": 19901.95, "total_tokens": 200472064}
|
|
{"current_steps": 63705, "total_steps": 78105, "loss": 0.1342, "lr": 5.00166645257047e-07, "epoch": 4.078164009986557, "percentage": 81.56, "elapsed_time": "2:47:53", "remaining_time": "0:37:57", "throughput": 19902.16, "total_tokens": 200487360}
|
|
{"current_steps": 63710, "total_steps": 78105, "loss": 0.1402, "lr": 4.998314541356384e-07, "epoch": 4.078484091927534, "percentage": 81.57, "elapsed_time": "2:47:54", "remaining_time": "0:37:56", "throughput": 19902.37, "total_tokens": 200502464}
|
|
{"current_steps": 63715, "total_steps": 78105, "loss": 0.122, "lr": 4.994963628922281e-07, "epoch": 4.078804173868511, "percentage": 81.58, "elapsed_time": "2:47:54", "remaining_time": "0:37:55", "throughput": 19902.59, "total_tokens": 200518144}
|
|
{"current_steps": 63720, "total_steps": 78105, "loss": 0.1182, "lr": 4.991613715435495e-07, "epoch": 4.079124255809488, "percentage": 81.58, "elapsed_time": "2:47:55", "remaining_time": "0:37:54", "throughput": 19902.78, "total_tokens": 200533312}
|
|
{"current_steps": 63725, "total_steps": 78105, "loss": 0.0836, "lr": 4.988264801063291e-07, "epoch": 4.0794443377504646, "percentage": 81.59, "elapsed_time": "2:47:56", "remaining_time": "0:37:53", "throughput": 19903.02, "total_tokens": 200549248}
|
|
{"current_steps": 63730, "total_steps": 78105, "loss": 0.112, "lr": 4.984916885972915e-07, "epoch": 4.079764419691441, "percentage": 81.6, "elapsed_time": "2:47:57", "remaining_time": "0:37:52", "throughput": 19903.28, "total_tokens": 200565696}
|
|
{"current_steps": 63735, "total_steps": 78105, "loss": 0.1929, "lr": 4.981569970331534e-07, "epoch": 4.080084501632418, "percentage": 81.6, "elapsed_time": "2:47:57", "remaining_time": "0:37:52", "throughput": 19903.52, "total_tokens": 200581696}
|
|
{"current_steps": 63740, "total_steps": 78105, "loss": 0.1161, "lr": 4.978224054306274e-07, "epoch": 4.080404583573395, "percentage": 81.61, "elapsed_time": "2:47:58", "remaining_time": "0:37:51", "throughput": 19903.72, "total_tokens": 200597120}
|
|
{"current_steps": 63745, "total_steps": 78105, "loss": 0.0953, "lr": 4.974879138064217e-07, "epoch": 4.080724665514372, "percentage": 81.61, "elapsed_time": "2:47:59", "remaining_time": "0:37:50", "throughput": 19903.91, "total_tokens": 200611712}
|
|
{"current_steps": 63750, "total_steps": 78105, "loss": 0.0989, "lr": 4.971535221772391e-07, "epoch": 4.081044747455349, "percentage": 81.62, "elapsed_time": "2:47:59", "remaining_time": "0:37:49", "throughput": 19904.1, "total_tokens": 200626560}
|
|
{"current_steps": 63755, "total_steps": 78105, "loss": 0.128, "lr": 4.968192305597769e-07, "epoch": 4.081364829396326, "percentage": 81.63, "elapsed_time": "2:48:00", "remaining_time": "0:37:48", "throughput": 19904.31, "total_tokens": 200641600}
|
|
{"current_steps": 63760, "total_steps": 78105, "loss": 0.138, "lr": 4.964850389707282e-07, "epoch": 4.081684911337303, "percentage": 81.63, "elapsed_time": "2:48:00", "remaining_time": "0:37:48", "throughput": 19904.49, "total_tokens": 200656192}
|
|
{"current_steps": 63765, "total_steps": 78105, "loss": 0.1218, "lr": 4.9615094742678e-07, "epoch": 4.08200499327828, "percentage": 81.64, "elapsed_time": "2:48:01", "remaining_time": "0:37:47", "throughput": 19904.67, "total_tokens": 200670848}
|
|
{"current_steps": 63770, "total_steps": 78105, "loss": 0.1446, "lr": 4.958169559446171e-07, "epoch": 4.082325075219256, "percentage": 81.65, "elapsed_time": "2:48:02", "remaining_time": "0:37:46", "throughput": 19904.88, "total_tokens": 200686144}
|
|
{"current_steps": 63775, "total_steps": 78105, "loss": 0.1186, "lr": 4.954830645409147e-07, "epoch": 4.082645157160233, "percentage": 81.65, "elapsed_time": "2:48:02", "remaining_time": "0:37:45", "throughput": 19905.07, "total_tokens": 200701248}
|
|
{"current_steps": 63780, "total_steps": 78105, "loss": 0.1228, "lr": 4.951492732323479e-07, "epoch": 4.08296523910121, "percentage": 81.66, "elapsed_time": "2:48:03", "remaining_time": "0:37:44", "throughput": 19905.25, "total_tokens": 200715840}
|
|
{"current_steps": 63785, "total_steps": 78105, "loss": 0.1434, "lr": 4.948155820355832e-07, "epoch": 4.083285321042187, "percentage": 81.67, "elapsed_time": "2:48:04", "remaining_time": "0:37:43", "throughput": 19905.5, "total_tokens": 200732672}
|
|
{"current_steps": 63790, "total_steps": 78105, "loss": 0.1807, "lr": 4.944819909672832e-07, "epoch": 4.083605402983164, "percentage": 81.67, "elapsed_time": "2:48:04", "remaining_time": "0:37:43", "throughput": 19905.69, "total_tokens": 200747456}
|
|
{"current_steps": 63795, "total_steps": 78105, "loss": 0.1366, "lr": 4.941485000441074e-07, "epoch": 4.083925484924141, "percentage": 81.68, "elapsed_time": "2:48:05", "remaining_time": "0:37:42", "throughput": 19905.93, "total_tokens": 200763904}
|
|
{"current_steps": 63800, "total_steps": 78105, "loss": 0.1214, "lr": 4.938151092827059e-07, "epoch": 4.084245566865118, "percentage": 81.68, "elapsed_time": "2:48:06", "remaining_time": "0:37:41", "throughput": 19906.15, "total_tokens": 200779648}
|
|
{"current_steps": 63805, "total_steps": 78105, "loss": 0.1133, "lr": 4.934818186997298e-07, "epoch": 4.084565648806095, "percentage": 81.69, "elapsed_time": "2:48:06", "remaining_time": "0:37:40", "throughput": 19906.36, "total_tokens": 200795072}
|
|
{"current_steps": 63810, "total_steps": 78105, "loss": 0.1034, "lr": 4.931486283118183e-07, "epoch": 4.084885730747072, "percentage": 81.7, "elapsed_time": "2:48:07", "remaining_time": "0:37:39", "throughput": 19906.53, "total_tokens": 200809536}
|
|
{"current_steps": 63815, "total_steps": 78105, "loss": 0.1396, "lr": 4.92815538135612e-07, "epoch": 4.085205812688048, "percentage": 81.7, "elapsed_time": "2:48:08", "remaining_time": "0:37:39", "throughput": 19906.77, "total_tokens": 200825472}
|
|
{"current_steps": 63820, "total_steps": 78105, "loss": 0.0972, "lr": 4.924825481877427e-07, "epoch": 4.085525894629025, "percentage": 81.71, "elapsed_time": "2:48:08", "remaining_time": "0:37:38", "throughput": 19906.99, "total_tokens": 200841088}
|
|
{"current_steps": 63825, "total_steps": 78105, "loss": 0.1445, "lr": 4.921496584848379e-07, "epoch": 4.085845976570002, "percentage": 81.72, "elapsed_time": "2:48:09", "remaining_time": "0:37:37", "throughput": 19907.27, "total_tokens": 200857920}
|
|
{"current_steps": 63830, "total_steps": 78105, "loss": 0.1206, "lr": 4.918168690435208e-07, "epoch": 4.086166058510979, "percentage": 81.72, "elapsed_time": "2:48:10", "remaining_time": "0:37:36", "throughput": 19907.53, "total_tokens": 200874880}
|
|
{"current_steps": 63835, "total_steps": 78105, "loss": 0.1534, "lr": 4.914841798804085e-07, "epoch": 4.086486140451956, "percentage": 81.73, "elapsed_time": "2:48:11", "remaining_time": "0:37:35", "throughput": 19907.72, "total_tokens": 200889536}
|
|
{"current_steps": 63840, "total_steps": 78105, "loss": 0.1226, "lr": 4.911515910121153e-07, "epoch": 4.086806222392933, "percentage": 81.74, "elapsed_time": "2:48:11", "remaining_time": "0:37:34", "throughput": 19907.98, "total_tokens": 200906304}
|
|
{"current_steps": 63845, "total_steps": 78105, "loss": 0.1881, "lr": 4.908191024552466e-07, "epoch": 4.08712630433391, "percentage": 81.74, "elapsed_time": "2:48:12", "remaining_time": "0:37:34", "throughput": 19908.19, "total_tokens": 200921856}
|
|
{"current_steps": 63850, "total_steps": 78105, "loss": 0.1129, "lr": 4.904867142264074e-07, "epoch": 4.087446386274887, "percentage": 81.75, "elapsed_time": "2:48:13", "remaining_time": "0:37:33", "throughput": 19908.39, "total_tokens": 200937088}
|
|
{"current_steps": 63855, "total_steps": 78105, "loss": 0.0849, "lr": 4.901544263421945e-07, "epoch": 4.087766468215864, "percentage": 81.76, "elapsed_time": "2:48:13", "remaining_time": "0:37:32", "throughput": 19908.63, "total_tokens": 200953280}
|
|
{"current_steps": 63860, "total_steps": 78105, "loss": 0.1147, "lr": 4.898222388192005e-07, "epoch": 4.08808655015684, "percentage": 81.76, "elapsed_time": "2:48:14", "remaining_time": "0:37:31", "throughput": 19908.84, "total_tokens": 200968960}
|
|
{"current_steps": 63865, "total_steps": 78105, "loss": 0.1434, "lr": 4.894901516740133e-07, "epoch": 4.088406632097817, "percentage": 81.77, "elapsed_time": "2:48:15", "remaining_time": "0:37:30", "throughput": 19909.07, "total_tokens": 200984704}
|
|
{"current_steps": 63870, "total_steps": 78105, "loss": 0.1706, "lr": 4.891581649232155e-07, "epoch": 4.088726714038794, "percentage": 81.77, "elapsed_time": "2:48:15", "remaining_time": "0:37:30", "throughput": 19909.26, "total_tokens": 201000064}
|
|
{"current_steps": 63875, "total_steps": 78105, "loss": 0.1149, "lr": 4.888262785833844e-07, "epoch": 4.089046795979771, "percentage": 81.78, "elapsed_time": "2:48:16", "remaining_time": "0:37:29", "throughput": 19909.44, "total_tokens": 201014848}
|
|
{"current_steps": 63880, "total_steps": 78105, "loss": 0.1785, "lr": 4.884944926710943e-07, "epoch": 4.089366877920748, "percentage": 81.79, "elapsed_time": "2:48:17", "remaining_time": "0:37:28", "throughput": 19909.66, "total_tokens": 201030464}
|
|
{"current_steps": 63885, "total_steps": 78105, "loss": 0.1517, "lr": 4.881628072029105e-07, "epoch": 4.089686959861725, "percentage": 81.79, "elapsed_time": "2:48:17", "remaining_time": "0:37:27", "throughput": 19909.92, "total_tokens": 201047104}
|
|
{"current_steps": 63890, "total_steps": 78105, "loss": 0.1125, "lr": 4.878312221953979e-07, "epoch": 4.090007041802702, "percentage": 81.8, "elapsed_time": "2:48:18", "remaining_time": "0:37:26", "throughput": 19910.12, "total_tokens": 201062272}
|
|
{"current_steps": 63895, "total_steps": 78105, "loss": 0.1329, "lr": 4.87499737665112e-07, "epoch": 4.090327123743679, "percentage": 81.81, "elapsed_time": "2:48:19", "remaining_time": "0:37:26", "throughput": 19910.53, "total_tokens": 201082624}
|
|
{"current_steps": 63900, "total_steps": 78105, "loss": 0.1418, "lr": 4.871683536286068e-07, "epoch": 4.090647205684656, "percentage": 81.81, "elapsed_time": "2:48:19", "remaining_time": "0:37:25", "throughput": 19910.75, "total_tokens": 201098496}
|
|
{"current_steps": 63905, "total_steps": 78105, "loss": 0.1341, "lr": 4.868370701024299e-07, "epoch": 4.090967287625632, "percentage": 81.82, "elapsed_time": "2:48:20", "remaining_time": "0:37:24", "throughput": 19910.92, "total_tokens": 201113152}
|
|
{"current_steps": 63910, "total_steps": 78105, "loss": 0.0994, "lr": 4.865058871031228e-07, "epoch": 4.091287369566609, "percentage": 81.83, "elapsed_time": "2:48:21", "remaining_time": "0:37:23", "throughput": 19911.19, "total_tokens": 201129728}
|
|
{"current_steps": 63915, "total_steps": 78105, "loss": 0.1385, "lr": 4.86174804647225e-07, "epoch": 4.091607451507586, "percentage": 81.83, "elapsed_time": "2:48:21", "remaining_time": "0:37:22", "throughput": 19911.39, "total_tokens": 201144640}
|
|
{"current_steps": 63920, "total_steps": 78105, "loss": 0.1163, "lr": 4.858438227512666e-07, "epoch": 4.091927533448563, "percentage": 81.84, "elapsed_time": "2:48:22", "remaining_time": "0:37:21", "throughput": 19911.63, "total_tokens": 201161024}
|
|
{"current_steps": 63925, "total_steps": 78105, "loss": 0.1613, "lr": 4.855129414317769e-07, "epoch": 4.09224761538954, "percentage": 81.84, "elapsed_time": "2:48:23", "remaining_time": "0:37:21", "throughput": 19911.85, "total_tokens": 201176512}
|
|
{"current_steps": 63930, "total_steps": 78105, "loss": 0.0998, "lr": 4.851821607052779e-07, "epoch": 4.092567697330517, "percentage": 81.85, "elapsed_time": "2:48:23", "remaining_time": "0:37:20", "throughput": 19912.03, "total_tokens": 201191040}
|
|
{"current_steps": 63935, "total_steps": 78105, "loss": 0.1349, "lr": 4.84851480588287e-07, "epoch": 4.092887779271494, "percentage": 81.86, "elapsed_time": "2:48:24", "remaining_time": "0:37:19", "throughput": 19912.28, "total_tokens": 201207296}
|
|
{"current_steps": 63940, "total_steps": 78105, "loss": 0.0892, "lr": 4.845209010973164e-07, "epoch": 4.093207861212471, "percentage": 81.86, "elapsed_time": "2:48:25", "remaining_time": "0:37:18", "throughput": 19912.5, "total_tokens": 201223104}
|
|
{"current_steps": 63945, "total_steps": 78105, "loss": 0.178, "lr": 4.841904222488741e-07, "epoch": 4.093527943153448, "percentage": 81.87, "elapsed_time": "2:48:26", "remaining_time": "0:37:17", "throughput": 19912.76, "total_tokens": 201239296}
|
|
{"current_steps": 63950, "total_steps": 78105, "loss": 0.1348, "lr": 4.838600440594618e-07, "epoch": 4.093848025094424, "percentage": 81.88, "elapsed_time": "2:48:26", "remaining_time": "0:37:17", "throughput": 19912.96, "total_tokens": 201254272}
|
|
{"current_steps": 63955, "total_steps": 78105, "loss": 0.1422, "lr": 4.835297665455768e-07, "epoch": 4.094168107035401, "percentage": 81.88, "elapsed_time": "2:48:27", "remaining_time": "0:37:16", "throughput": 19913.13, "total_tokens": 201268544}
|
|
{"current_steps": 63960, "total_steps": 78105, "loss": 0.317, "lr": 4.831995897237124e-07, "epoch": 4.094488188976378, "percentage": 81.89, "elapsed_time": "2:48:27", "remaining_time": "0:37:15", "throughput": 19913.34, "total_tokens": 201283840}
|
|
{"current_steps": 63965, "total_steps": 78105, "loss": 0.0856, "lr": 4.828695136103557e-07, "epoch": 4.094808270917355, "percentage": 81.9, "elapsed_time": "2:48:28", "remaining_time": "0:37:14", "throughput": 19913.57, "total_tokens": 201300096}
|
|
{"current_steps": 63970, "total_steps": 78105, "loss": 0.1197, "lr": 4.825395382219883e-07, "epoch": 4.095128352858332, "percentage": 81.9, "elapsed_time": "2:48:29", "remaining_time": "0:37:13", "throughput": 19913.78, "total_tokens": 201315968}
|
|
{"current_steps": 63975, "total_steps": 78105, "loss": 0.1351, "lr": 4.822096635750879e-07, "epoch": 4.095448434799309, "percentage": 81.91, "elapsed_time": "2:48:30", "remaining_time": "0:37:12", "throughput": 19914.02, "total_tokens": 201331968}
|
|
{"current_steps": 63980, "total_steps": 78105, "loss": 0.1312, "lr": 4.818798896861265e-07, "epoch": 4.095768516740286, "percentage": 81.92, "elapsed_time": "2:48:30", "remaining_time": "0:37:12", "throughput": 19914.33, "total_tokens": 201349760}
|
|
{"current_steps": 63985, "total_steps": 78105, "loss": 0.1428, "lr": 4.815502165715713e-07, "epoch": 4.096088598681263, "percentage": 81.92, "elapsed_time": "2:48:31", "remaining_time": "0:37:11", "throughput": 19914.64, "total_tokens": 201367552}
|
|
{"current_steps": 63990, "total_steps": 78105, "loss": 0.1233, "lr": 4.812206442478848e-07, "epoch": 4.09640868062224, "percentage": 81.93, "elapsed_time": "2:48:32", "remaining_time": "0:37:10", "throughput": 19914.83, "total_tokens": 201382272}
|
|
{"current_steps": 63995, "total_steps": 78105, "loss": 0.1355, "lr": 4.808911727315233e-07, "epoch": 4.096728762563216, "percentage": 81.93, "elapsed_time": "2:48:32", "remaining_time": "0:37:09", "throughput": 19915.07, "total_tokens": 201398336}
|
|
{"current_steps": 64000, "total_steps": 78105, "loss": 0.1449, "lr": 4.805618020389405e-07, "epoch": 4.097048844504193, "percentage": 81.94, "elapsed_time": "2:48:33", "remaining_time": "0:37:08", "throughput": 19915.25, "total_tokens": 201412992}
|
|
{"current_steps": 64005, "total_steps": 78105, "loss": 0.2353, "lr": 4.802325321865814e-07, "epoch": 4.09736892644517, "percentage": 81.95, "elapsed_time": "2:48:34", "remaining_time": "0:37:08", "throughput": 19915.44, "total_tokens": 201427840}
|
|
{"current_steps": 64010, "total_steps": 78105, "loss": 0.1559, "lr": 4.799033631908894e-07, "epoch": 4.097689008386147, "percentage": 81.95, "elapsed_time": "2:48:34", "remaining_time": "0:37:07", "throughput": 19915.68, "total_tokens": 201444096}
|
|
{"current_steps": 64015, "total_steps": 78105, "loss": 0.1141, "lr": 4.795742950683013e-07, "epoch": 4.098009090327124, "percentage": 81.96, "elapsed_time": "2:48:35", "remaining_time": "0:37:06", "throughput": 19915.94, "total_tokens": 201460800}
|
|
{"current_steps": 64020, "total_steps": 78105, "loss": 0.1633, "lr": 4.792453278352485e-07, "epoch": 4.098329172268101, "percentage": 81.97, "elapsed_time": "2:48:36", "remaining_time": "0:37:05", "throughput": 19916.19, "total_tokens": 201477440}
|
|
{"current_steps": 64025, "total_steps": 78105, "loss": 0.1256, "lr": 4.789164615081593e-07, "epoch": 4.098649254209078, "percentage": 81.97, "elapsed_time": "2:48:36", "remaining_time": "0:37:04", "throughput": 19916.4, "total_tokens": 201492992}
|
|
{"current_steps": 64030, "total_steps": 78105, "loss": 0.1386, "lr": 4.785876961034533e-07, "epoch": 4.098969336150055, "percentage": 81.98, "elapsed_time": "2:48:37", "remaining_time": "0:37:04", "throughput": 19916.61, "total_tokens": 201508608}
|
|
{"current_steps": 64035, "total_steps": 78105, "loss": 0.0883, "lr": 4.782590316375499e-07, "epoch": 4.099289418091031, "percentage": 81.99, "elapsed_time": "2:48:38", "remaining_time": "0:37:03", "throughput": 19916.81, "total_tokens": 201523840}
|
|
{"current_steps": 64040, "total_steps": 78105, "loss": 0.2145, "lr": 4.779304681268584e-07, "epoch": 4.099609500032008, "percentage": 81.99, "elapsed_time": "2:48:38", "remaining_time": "0:37:02", "throughput": 19917.08, "total_tokens": 201540672}
|
|
{"current_steps": 64045, "total_steps": 78105, "loss": 0.1174, "lr": 4.776020055877872e-07, "epoch": 4.099929581972985, "percentage": 82.0, "elapsed_time": "2:48:39", "remaining_time": "0:37:01", "throughput": 19917.26, "total_tokens": 201555200}
|
|
{"current_steps": 64050, "total_steps": 78105, "loss": 0.121, "lr": 4.772736440367379e-07, "epoch": 4.100249663913962, "percentage": 82.0, "elapsed_time": "2:48:40", "remaining_time": "0:37:00", "throughput": 19917.49, "total_tokens": 201570880}
|
|
{"current_steps": 64055, "total_steps": 78105, "loss": 0.1114, "lr": 4.769453834901066e-07, "epoch": 4.100569745854939, "percentage": 82.01, "elapsed_time": "2:48:40", "remaining_time": "0:36:59", "throughput": 19917.68, "total_tokens": 201585664}
|
|
{"current_steps": 64060, "total_steps": 78105, "loss": 0.1461, "lr": 4.766172239642852e-07, "epoch": 4.100889827795916, "percentage": 82.02, "elapsed_time": "2:48:41", "remaining_time": "0:36:59", "throughput": 19917.91, "total_tokens": 201601792}
|
|
{"current_steps": 64065, "total_steps": 78105, "loss": 0.1063, "lr": 4.7628916547566017e-07, "epoch": 4.101209909736893, "percentage": 82.02, "elapsed_time": "2:48:42", "remaining_time": "0:36:58", "throughput": 19918.14, "total_tokens": 201617280}
|
|
{"current_steps": 64070, "total_steps": 78105, "loss": 0.1303, "lr": 4.7596120804061314e-07, "epoch": 4.10152999167787, "percentage": 82.03, "elapsed_time": "2:48:42", "remaining_time": "0:36:57", "throughput": 19918.38, "total_tokens": 201633472}
|
|
{"current_steps": 64075, "total_steps": 78105, "loss": 0.1477, "lr": 4.756333516755199e-07, "epoch": 4.101850073618847, "percentage": 82.04, "elapsed_time": "2:48:43", "remaining_time": "0:36:56", "throughput": 19918.58, "total_tokens": 201648512}
|
|
{"current_steps": 64080, "total_steps": 78105, "loss": 0.1502, "lr": 4.753055963967529e-07, "epoch": 4.102170155559823, "percentage": 82.04, "elapsed_time": "2:48:44", "remaining_time": "0:36:55", "throughput": 19918.84, "total_tokens": 201665152}
|
|
{"current_steps": 64085, "total_steps": 78105, "loss": 0.1126, "lr": 4.749779422206785e-07, "epoch": 4.1024902375008, "percentage": 82.05, "elapsed_time": "2:48:45", "remaining_time": "0:36:55", "throughput": 19919.07, "total_tokens": 201680896}
|
|
{"current_steps": 64090, "total_steps": 78105, "loss": 0.1152, "lr": 4.746503891636575e-07, "epoch": 4.102810319441777, "percentage": 82.06, "elapsed_time": "2:48:45", "remaining_time": "0:36:54", "throughput": 19919.26, "total_tokens": 201696064}
|
|
{"current_steps": 64095, "total_steps": 78105, "loss": 0.1107, "lr": 4.743229372420466e-07, "epoch": 4.103130401382754, "percentage": 82.06, "elapsed_time": "2:48:46", "remaining_time": "0:36:53", "throughput": 19919.46, "total_tokens": 201711104}
|
|
{"current_steps": 64100, "total_steps": 78105, "loss": 0.1282, "lr": 4.739955864721968e-07, "epoch": 4.103450483323731, "percentage": 82.07, "elapsed_time": "2:48:47", "remaining_time": "0:36:52", "throughput": 19919.71, "total_tokens": 201727232}
|
|
{"current_steps": 64105, "total_steps": 78105, "loss": 0.1236, "lr": 4.7366833687045365e-07, "epoch": 4.103770565264708, "percentage": 82.08, "elapsed_time": "2:48:47", "remaining_time": "0:36:51", "throughput": 19919.92, "total_tokens": 201742464}
|
|
{"current_steps": 64110, "total_steps": 78105, "loss": 0.1583, "lr": 4.7334118845316024e-07, "epoch": 4.104090647205685, "percentage": 82.08, "elapsed_time": "2:48:48", "remaining_time": "0:36:50", "throughput": 19920.13, "total_tokens": 201757824}
|
|
{"current_steps": 64115, "total_steps": 78105, "loss": 0.1226, "lr": 4.730141412366501e-07, "epoch": 4.104410729146662, "percentage": 82.09, "elapsed_time": "2:48:49", "remaining_time": "0:36:50", "throughput": 19920.34, "total_tokens": 201773440}
|
|
{"current_steps": 64120, "total_steps": 78105, "loss": 0.0671, "lr": 4.726871952372569e-07, "epoch": 4.104730811087639, "percentage": 82.09, "elapsed_time": "2:48:49", "remaining_time": "0:36:49", "throughput": 19920.56, "total_tokens": 201789248}
|
|
{"current_steps": 64125, "total_steps": 78105, "loss": 0.1372, "lr": 4.7236035047130425e-07, "epoch": 4.105050893028615, "percentage": 82.1, "elapsed_time": "2:48:50", "remaining_time": "0:36:48", "throughput": 19920.77, "total_tokens": 201804928}
|
|
{"current_steps": 64130, "total_steps": 78105, "loss": 0.1448, "lr": 4.720336069551143e-07, "epoch": 4.105370974969592, "percentage": 82.11, "elapsed_time": "2:48:51", "remaining_time": "0:36:47", "throughput": 19920.98, "total_tokens": 201820352}
|
|
{"current_steps": 64135, "total_steps": 78105, "loss": 0.103, "lr": 4.717069647050029e-07, "epoch": 4.105691056910569, "percentage": 82.11, "elapsed_time": "2:48:51", "remaining_time": "0:36:46", "throughput": 19921.22, "total_tokens": 201836160}
|
|
{"current_steps": 64140, "total_steps": 78105, "loss": 0.1144, "lr": 4.7138042373728016e-07, "epoch": 4.106011138851546, "percentage": 82.12, "elapsed_time": "2:48:52", "remaining_time": "0:36:46", "throughput": 19921.47, "total_tokens": 201852736}
|
|
{"current_steps": 64145, "total_steps": 78105, "loss": 0.0645, "lr": 4.710539840682538e-07, "epoch": 4.106331220792523, "percentage": 82.13, "elapsed_time": "2:48:53", "remaining_time": "0:36:45", "throughput": 19921.69, "total_tokens": 201868608}
|
|
{"current_steps": 64150, "total_steps": 78105, "loss": 0.0904, "lr": 4.7072764571422127e-07, "epoch": 4.1066513027335, "percentage": 82.13, "elapsed_time": "2:48:53", "remaining_time": "0:36:44", "throughput": 19921.91, "total_tokens": 201884864}
|
|
{"current_steps": 64155, "total_steps": 78105, "loss": 0.2092, "lr": 4.70401408691481e-07, "epoch": 4.106971384674477, "percentage": 82.14, "elapsed_time": "2:48:54", "remaining_time": "0:36:43", "throughput": 19922.13, "total_tokens": 201900352}
|
|
{"current_steps": 64160, "total_steps": 78105, "loss": 0.144, "lr": 4.7007527301632274e-07, "epoch": 4.107291466615454, "percentage": 82.15, "elapsed_time": "2:48:55", "remaining_time": "0:36:42", "throughput": 19922.35, "total_tokens": 201915968}
|
|
{"current_steps": 64165, "total_steps": 78105, "loss": 0.1251, "lr": 4.697492387050315e-07, "epoch": 4.107611548556431, "percentage": 82.15, "elapsed_time": "2:48:55", "remaining_time": "0:36:42", "throughput": 19922.59, "total_tokens": 201932032}
|
|
{"current_steps": 64170, "total_steps": 78105, "loss": 0.1085, "lr": 4.6942330577388837e-07, "epoch": 4.107931630497407, "percentage": 82.16, "elapsed_time": "2:48:56", "remaining_time": "0:36:41", "throughput": 19922.82, "total_tokens": 201947584}
|
|
{"current_steps": 64175, "total_steps": 78105, "loss": 0.1524, "lr": 4.6909747423916826e-07, "epoch": 4.108251712438384, "percentage": 82.17, "elapsed_time": "2:48:57", "remaining_time": "0:36:40", "throughput": 19923.03, "total_tokens": 201963264}
|
|
{"current_steps": 64180, "total_steps": 78105, "loss": 0.1561, "lr": 4.6877174411714186e-07, "epoch": 4.108571794379361, "percentage": 82.17, "elapsed_time": "2:48:57", "remaining_time": "0:36:39", "throughput": 19923.27, "total_tokens": 201979328}
|
|
{"current_steps": 64185, "total_steps": 78105, "loss": 0.1357, "lr": 4.6844611542407354e-07, "epoch": 4.108891876320338, "percentage": 82.18, "elapsed_time": "2:48:58", "remaining_time": "0:36:38", "throughput": 19923.48, "total_tokens": 201994688}
|
|
{"current_steps": 64190, "total_steps": 78105, "loss": 0.2014, "lr": 4.681205881762249e-07, "epoch": 4.109211958261315, "percentage": 82.18, "elapsed_time": "2:48:59", "remaining_time": "0:36:37", "throughput": 19923.66, "total_tokens": 202009536}
|
|
{"current_steps": 64195, "total_steps": 78105, "loss": 0.0846, "lr": 4.677951623898502e-07, "epoch": 4.109532040202292, "percentage": 82.19, "elapsed_time": "2:48:59", "remaining_time": "0:36:37", "throughput": 19923.9, "total_tokens": 202025600}
|
|
{"current_steps": 64200, "total_steps": 78105, "loss": 0.187, "lr": 4.674698380812001e-07, "epoch": 4.109852122143269, "percentage": 82.2, "elapsed_time": "2:49:00", "remaining_time": "0:36:36", "throughput": 19924.12, "total_tokens": 202041664}
|
|
{"current_steps": 64205, "total_steps": 78105, "loss": 0.1034, "lr": 4.671446152665191e-07, "epoch": 4.110172204084246, "percentage": 82.2, "elapsed_time": "2:49:01", "remaining_time": "0:36:35", "throughput": 19924.33, "total_tokens": 202057024}
|
|
{"current_steps": 64210, "total_steps": 78105, "loss": 0.1511, "lr": 4.668194939620471e-07, "epoch": 4.110492286025223, "percentage": 82.21, "elapsed_time": "2:49:01", "remaining_time": "0:36:34", "throughput": 19924.54, "total_tokens": 202072512}
|
|
{"current_steps": 64215, "total_steps": 78105, "loss": 0.1393, "lr": 4.664944741840191e-07, "epoch": 4.110812367966199, "percentage": 82.22, "elapsed_time": "2:49:02", "remaining_time": "0:36:33", "throughput": 19924.77, "total_tokens": 202088704}
|
|
{"current_steps": 64220, "total_steps": 78105, "loss": 0.0914, "lr": 4.6616955594866507e-07, "epoch": 4.111132449907176, "percentage": 82.22, "elapsed_time": "2:49:03", "remaining_time": "0:36:33", "throughput": 19924.95, "total_tokens": 202103744}
|
|
{"current_steps": 64225, "total_steps": 78105, "loss": 0.1151, "lr": 4.65844739272209e-07, "epoch": 4.111452531848153, "percentage": 82.23, "elapsed_time": "2:49:03", "remaining_time": "0:36:32", "throughput": 19925.13, "total_tokens": 202118528}
|
|
{"current_steps": 64230, "total_steps": 78105, "loss": 0.112, "lr": 4.6552002417087236e-07, "epoch": 4.11177261378913, "percentage": 82.24, "elapsed_time": "2:49:04", "remaining_time": "0:36:31", "throughput": 19925.33, "total_tokens": 202133696}
|
|
{"current_steps": 64235, "total_steps": 78105, "loss": 0.1342, "lr": 4.651954106608672e-07, "epoch": 4.112092695730107, "percentage": 82.24, "elapsed_time": "2:49:05", "remaining_time": "0:36:30", "throughput": 19925.63, "total_tokens": 202151296}
|
|
{"current_steps": 64240, "total_steps": 78105, "loss": 0.0695, "lr": 4.6487089875840496e-07, "epoch": 4.112412777671084, "percentage": 82.25, "elapsed_time": "2:49:05", "remaining_time": "0:36:29", "throughput": 19925.86, "total_tokens": 202167296}
|
|
{"current_steps": 64245, "total_steps": 78105, "loss": 0.1241, "lr": 4.6454648847968935e-07, "epoch": 4.112732859612061, "percentage": 82.25, "elapsed_time": "2:49:06", "remaining_time": "0:36:28", "throughput": 19926.07, "total_tokens": 202182272}
|
|
{"current_steps": 64250, "total_steps": 78105, "loss": 0.1315, "lr": 4.642221798409191e-07, "epoch": 4.113052941553038, "percentage": 82.26, "elapsed_time": "2:49:07", "remaining_time": "0:36:28", "throughput": 19926.33, "total_tokens": 202198784}
|
|
{"current_steps": 64255, "total_steps": 78105, "loss": 0.0888, "lr": 4.6389797285829067e-07, "epoch": 4.113373023494015, "percentage": 82.27, "elapsed_time": "2:49:07", "remaining_time": "0:36:27", "throughput": 19926.53, "total_tokens": 202213824}
|
|
{"current_steps": 64260, "total_steps": 78105, "loss": 0.1836, "lr": 4.635738675479906e-07, "epoch": 4.113693105434991, "percentage": 82.27, "elapsed_time": "2:49:08", "remaining_time": "0:36:26", "throughput": 19926.7, "total_tokens": 202228416}
|
|
{"current_steps": 64265, "total_steps": 78105, "loss": 0.1495, "lr": 4.6324986392620533e-07, "epoch": 4.114013187375968, "percentage": 82.28, "elapsed_time": "2:49:09", "remaining_time": "0:36:25", "throughput": 19926.9, "total_tokens": 202243648}
|
|
{"current_steps": 64270, "total_steps": 78105, "loss": 0.1623, "lr": 4.629259620091114e-07, "epoch": 4.114333269316945, "percentage": 82.29, "elapsed_time": "2:49:09", "remaining_time": "0:36:24", "throughput": 19927.08, "total_tokens": 202258624}
|
|
{"current_steps": 64275, "total_steps": 78105, "loss": 0.1202, "lr": 4.626021618128851e-07, "epoch": 4.114653351257922, "percentage": 82.29, "elapsed_time": "2:49:10", "remaining_time": "0:36:24", "throughput": 19927.28, "total_tokens": 202273792}
|
|
{"current_steps": 64280, "total_steps": 78105, "loss": 0.2068, "lr": 4.6227846335369445e-07, "epoch": 4.114973433198899, "percentage": 82.3, "elapsed_time": "2:49:11", "remaining_time": "0:36:23", "throughput": 19927.49, "total_tokens": 202289280}
|
|
{"current_steps": 64285, "total_steps": 78105, "loss": 0.1566, "lr": 4.6195486664770307e-07, "epoch": 4.115293515139876, "percentage": 82.31, "elapsed_time": "2:49:11", "remaining_time": "0:36:22", "throughput": 19927.7, "total_tokens": 202304448}
|
|
{"current_steps": 64290, "total_steps": 78105, "loss": 0.147, "lr": 4.6163137171106985e-07, "epoch": 4.115613597080853, "percentage": 82.31, "elapsed_time": "2:49:12", "remaining_time": "0:36:21", "throughput": 19927.89, "total_tokens": 202319808}
|
|
{"current_steps": 64295, "total_steps": 78105, "loss": 0.1479, "lr": 4.613079785599486e-07, "epoch": 4.11593367902183, "percentage": 82.32, "elapsed_time": "2:49:13", "remaining_time": "0:36:20", "throughput": 19928.11, "total_tokens": 202335360}
|
|
{"current_steps": 64300, "total_steps": 78105, "loss": 0.1095, "lr": 4.609846872104876e-07, "epoch": 4.116253760962806, "percentage": 82.33, "elapsed_time": "2:49:13", "remaining_time": "0:36:20", "throughput": 19928.35, "total_tokens": 202351488}
|
|
{"current_steps": 64305, "total_steps": 78105, "loss": 0.107, "lr": 4.6066149767882986e-07, "epoch": 4.116573842903783, "percentage": 82.33, "elapsed_time": "2:49:14", "remaining_time": "0:36:19", "throughput": 19928.55, "total_tokens": 202366592}
|
|
{"current_steps": 64310, "total_steps": 78105, "loss": 0.1555, "lr": 4.603384099811151e-07, "epoch": 4.11689392484476, "percentage": 82.34, "elapsed_time": "2:49:15", "remaining_time": "0:36:18", "throughput": 19928.75, "total_tokens": 202381760}
|
|
{"current_steps": 64315, "total_steps": 78105, "loss": 0.1152, "lr": 4.600154241334759e-07, "epoch": 4.117214006785737, "percentage": 82.34, "elapsed_time": "2:49:15", "remaining_time": "0:36:17", "throughput": 19928.97, "total_tokens": 202397696}
|
|
{"current_steps": 64320, "total_steps": 78105, "loss": 0.1218, "lr": 4.596925401520405e-07, "epoch": 4.117534088726714, "percentage": 82.35, "elapsed_time": "2:49:16", "remaining_time": "0:36:16", "throughput": 19929.15, "total_tokens": 202412608}
|
|
{"current_steps": 64325, "total_steps": 78105, "loss": 0.1414, "lr": 4.5936975805293213e-07, "epoch": 4.117854170667691, "percentage": 82.36, "elapsed_time": "2:49:17", "remaining_time": "0:36:15", "throughput": 19929.4, "total_tokens": 202429248}
|
|
{"current_steps": 64330, "total_steps": 78105, "loss": 0.1261, "lr": 4.590470778522688e-07, "epoch": 4.118174252608668, "percentage": 82.36, "elapsed_time": "2:49:18", "remaining_time": "0:36:15", "throughput": 19929.67, "total_tokens": 202445568}
|
|
{"current_steps": 64335, "total_steps": 78105, "loss": 0.1538, "lr": 4.58724499566163e-07, "epoch": 4.118494334549645, "percentage": 82.37, "elapsed_time": "2:49:18", "remaining_time": "0:36:14", "throughput": 19929.9, "total_tokens": 202461568}
|
|
{"current_steps": 64340, "total_steps": 78105, "loss": 0.1237, "lr": 4.584020232107239e-07, "epoch": 4.118814416490622, "percentage": 82.38, "elapsed_time": "2:49:19", "remaining_time": "0:36:13", "throughput": 19930.1, "total_tokens": 202476800}
|
|
{"current_steps": 64345, "total_steps": 78105, "loss": 0.1777, "lr": 4.580796488020525e-07, "epoch": 4.119134498431598, "percentage": 82.38, "elapsed_time": "2:49:20", "remaining_time": "0:36:12", "throughput": 19930.31, "total_tokens": 202492416}
|
|
{"current_steps": 64350, "total_steps": 78105, "loss": 0.1355, "lr": 4.577573763562487e-07, "epoch": 4.119454580372575, "percentage": 82.39, "elapsed_time": "2:49:20", "remaining_time": "0:36:11", "throughput": 19930.52, "total_tokens": 202507648}
|
|
{"current_steps": 64355, "total_steps": 78105, "loss": 0.0978, "lr": 4.574352058894027e-07, "epoch": 4.119774662313552, "percentage": 82.4, "elapsed_time": "2:49:21", "remaining_time": "0:36:11", "throughput": 19930.73, "total_tokens": 202523328}
|
|
{"current_steps": 64360, "total_steps": 78105, "loss": 0.0897, "lr": 4.5711313741760354e-07, "epoch": 4.120094744254529, "percentage": 82.4, "elapsed_time": "2:49:22", "remaining_time": "0:36:10", "throughput": 19931.0, "total_tokens": 202540736}
|
|
{"current_steps": 64365, "total_steps": 78105, "loss": 0.169, "lr": 4.5679117095693363e-07, "epoch": 4.120414826195506, "percentage": 82.41, "elapsed_time": "2:49:22", "remaining_time": "0:36:09", "throughput": 19931.21, "total_tokens": 202556160}
|
|
{"current_steps": 64370, "total_steps": 78105, "loss": 0.166, "lr": 4.5646930652346904e-07, "epoch": 4.120734908136483, "percentage": 82.41, "elapsed_time": "2:49:23", "remaining_time": "0:36:08", "throughput": 19931.39, "total_tokens": 202571072}
|
|
{"current_steps": 64375, "total_steps": 78105, "loss": 0.1584, "lr": 4.561475441332844e-07, "epoch": 4.12105499007746, "percentage": 82.42, "elapsed_time": "2:49:24", "remaining_time": "0:36:07", "throughput": 19931.57, "total_tokens": 202586048}
|
|
{"current_steps": 64380, "total_steps": 78105, "loss": 0.1511, "lr": 4.558258838024437e-07, "epoch": 4.121375072018437, "percentage": 82.43, "elapsed_time": "2:49:24", "remaining_time": "0:36:06", "throughput": 19931.78, "total_tokens": 202601408}
|
|
{"current_steps": 64385, "total_steps": 78105, "loss": 0.0973, "lr": 4.555043255470118e-07, "epoch": 4.121695153959414, "percentage": 82.43, "elapsed_time": "2:49:25", "remaining_time": "0:36:06", "throughput": 19932.03, "total_tokens": 202617920}
|
|
{"current_steps": 64390, "total_steps": 78105, "loss": 0.1305, "lr": 4.551828693830443e-07, "epoch": 4.12201523590039, "percentage": 82.44, "elapsed_time": "2:49:26", "remaining_time": "0:36:05", "throughput": 19932.26, "total_tokens": 202634176}
|
|
{"current_steps": 64395, "total_steps": 78105, "loss": 0.1261, "lr": 4.548615153265931e-07, "epoch": 4.122335317841367, "percentage": 82.45, "elapsed_time": "2:49:26", "remaining_time": "0:36:04", "throughput": 19932.5, "total_tokens": 202650432}
|
|
{"current_steps": 64400, "total_steps": 78105, "loss": 0.132, "lr": 4.5454026339370514e-07, "epoch": 4.122655399782344, "percentage": 82.45, "elapsed_time": "2:49:27", "remaining_time": "0:36:03", "throughput": 19932.69, "total_tokens": 202665280}
|
|
{"current_steps": 64405, "total_steps": 78105, "loss": 0.1161, "lr": 4.54219113600422e-07, "epoch": 4.122975481723321, "percentage": 82.46, "elapsed_time": "2:49:28", "remaining_time": "0:36:02", "throughput": 19932.91, "total_tokens": 202681472}
|
|
{"current_steps": 64410, "total_steps": 78105, "loss": 0.1338, "lr": 4.5389806596278035e-07, "epoch": 4.123295563664298, "percentage": 82.47, "elapsed_time": "2:49:28", "remaining_time": "0:36:02", "throughput": 19933.13, "total_tokens": 202697088}
|
|
{"current_steps": 64415, "total_steps": 78105, "loss": 0.1066, "lr": 4.5357712049681145e-07, "epoch": 4.123615645605275, "percentage": 82.47, "elapsed_time": "2:49:29", "remaining_time": "0:36:01", "throughput": 19933.38, "total_tokens": 202713536}
|
|
{"current_steps": 64420, "total_steps": 78105, "loss": 0.1069, "lr": 4.5325627721854114e-07, "epoch": 4.123935727546252, "percentage": 82.48, "elapsed_time": "2:49:30", "remaining_time": "0:36:00", "throughput": 19933.58, "total_tokens": 202728512}
|
|
{"current_steps": 64425, "total_steps": 78105, "loss": 0.1338, "lr": 4.529355361439919e-07, "epoch": 4.124255809487229, "percentage": 82.49, "elapsed_time": "2:49:30", "remaining_time": "0:35:59", "throughput": 19933.79, "total_tokens": 202744256}
|
|
{"current_steps": 64430, "total_steps": 78105, "loss": 0.1358, "lr": 4.5261489728917917e-07, "epoch": 4.124575891428206, "percentage": 82.49, "elapsed_time": "2:49:31", "remaining_time": "0:35:58", "throughput": 19933.98, "total_tokens": 202759424}
|
|
{"current_steps": 64435, "total_steps": 78105, "loss": 0.1301, "lr": 4.522943606701141e-07, "epoch": 4.124895973369182, "percentage": 82.5, "elapsed_time": "2:49:32", "remaining_time": "0:35:58", "throughput": 19934.18, "total_tokens": 202774784}
|
|
{"current_steps": 64440, "total_steps": 78105, "loss": 0.0963, "lr": 4.519739263028025e-07, "epoch": 4.125216055310159, "percentage": 82.5, "elapsed_time": "2:49:32", "remaining_time": "0:35:57", "throughput": 19934.52, "total_tokens": 202793536}
|
|
{"current_steps": 64445, "total_steps": 78105, "loss": 0.1323, "lr": 4.516535942032452e-07, "epoch": 4.125536137251136, "percentage": 82.51, "elapsed_time": "2:49:33", "remaining_time": "0:35:56", "throughput": 19934.75, "total_tokens": 202809536}
|
|
{"current_steps": 64450, "total_steps": 78105, "loss": 0.1842, "lr": 4.5133336438743793e-07, "epoch": 4.125856219192113, "percentage": 82.52, "elapsed_time": "2:49:34", "remaining_time": "0:35:55", "throughput": 19934.98, "total_tokens": 202825408}
|
|
{"current_steps": 64455, "total_steps": 78105, "loss": 0.0924, "lr": 4.5101323687137074e-07, "epoch": 4.12617630113309, "percentage": 82.52, "elapsed_time": "2:49:35", "remaining_time": "0:35:54", "throughput": 19935.18, "total_tokens": 202840448}
|
|
{"current_steps": 64460, "total_steps": 78105, "loss": 0.1397, "lr": 4.5069321167103106e-07, "epoch": 4.126496383074067, "percentage": 82.53, "elapsed_time": "2:49:35", "remaining_time": "0:35:54", "throughput": 19935.39, "total_tokens": 202856064}
|
|
{"current_steps": 64465, "total_steps": 78105, "loss": 0.1081, "lr": 4.503732888023968e-07, "epoch": 4.126816465015044, "percentage": 82.54, "elapsed_time": "2:49:36", "remaining_time": "0:35:53", "throughput": 19935.61, "total_tokens": 202871936}
|
|
{"current_steps": 64470, "total_steps": 78105, "loss": 0.1444, "lr": 4.5005346828144477e-07, "epoch": 4.127136546956021, "percentage": 82.54, "elapsed_time": "2:49:37", "remaining_time": "0:35:52", "throughput": 19935.82, "total_tokens": 202887232}
|
|
{"current_steps": 64475, "total_steps": 78105, "loss": 0.1141, "lr": 4.4973375012414474e-07, "epoch": 4.127456628896998, "percentage": 82.55, "elapsed_time": "2:49:37", "remaining_time": "0:35:51", "throughput": 19936.01, "total_tokens": 202902528}
|
|
{"current_steps": 64480, "total_steps": 78105, "loss": 0.1175, "lr": 4.4941413434646116e-07, "epoch": 4.127776710837974, "percentage": 82.56, "elapsed_time": "2:49:38", "remaining_time": "0:35:50", "throughput": 19936.21, "total_tokens": 202917632}
|
|
{"current_steps": 64485, "total_steps": 78105, "loss": 0.1459, "lr": 4.4909462096435594e-07, "epoch": 4.128096792778951, "percentage": 82.56, "elapsed_time": "2:49:39", "remaining_time": "0:35:49", "throughput": 19936.43, "total_tokens": 202933376}
|
|
{"current_steps": 64490, "total_steps": 78105, "loss": 0.1393, "lr": 4.4877520999378115e-07, "epoch": 4.128416874719928, "percentage": 82.57, "elapsed_time": "2:49:39", "remaining_time": "0:35:49", "throughput": 19936.65, "total_tokens": 202949056}
|
|
{"current_steps": 64495, "total_steps": 78105, "loss": 0.0822, "lr": 4.484559014506895e-07, "epoch": 4.128736956660905, "percentage": 82.57, "elapsed_time": "2:49:40", "remaining_time": "0:35:48", "throughput": 19936.87, "total_tokens": 202964736}
|
|
{"current_steps": 64500, "total_steps": 78105, "loss": 0.1429, "lr": 4.4813669535102243e-07, "epoch": 4.129057038601882, "percentage": 82.58, "elapsed_time": "2:49:41", "remaining_time": "0:35:47", "throughput": 19937.06, "total_tokens": 202979776}
|
|
{"current_steps": 64505, "total_steps": 78105, "loss": 0.1544, "lr": 4.4781759171072194e-07, "epoch": 4.129377120542859, "percentage": 82.59, "elapsed_time": "2:49:41", "remaining_time": "0:35:46", "throughput": 19937.29, "total_tokens": 202996096}
|
|
{"current_steps": 64510, "total_steps": 78105, "loss": 0.1191, "lr": 4.4749859054572133e-07, "epoch": 4.129697202483836, "percentage": 82.59, "elapsed_time": "2:49:42", "remaining_time": "0:35:45", "throughput": 19937.48, "total_tokens": 203011136}
|
|
{"current_steps": 64515, "total_steps": 78105, "loss": 0.0833, "lr": 4.471796918719501e-07, "epoch": 4.130017284424813, "percentage": 82.6, "elapsed_time": "2:49:43", "remaining_time": "0:35:45", "throughput": 19937.7, "total_tokens": 203026816}
|
|
{"current_steps": 64520, "total_steps": 78105, "loss": 0.1392, "lr": 4.4686089570533247e-07, "epoch": 4.13033736636579, "percentage": 82.61, "elapsed_time": "2:49:43", "remaining_time": "0:35:44", "throughput": 19937.95, "total_tokens": 203043456}
|
|
{"current_steps": 64525, "total_steps": 78105, "loss": 0.1316, "lr": 4.465422020617871e-07, "epoch": 4.130657448306766, "percentage": 82.61, "elapsed_time": "2:49:44", "remaining_time": "0:35:43", "throughput": 19938.17, "total_tokens": 203059456}
|
|
{"current_steps": 64530, "total_steps": 78105, "loss": 0.0822, "lr": 4.462236109572279e-07, "epoch": 4.130977530247743, "percentage": 82.62, "elapsed_time": "2:49:45", "remaining_time": "0:35:42", "throughput": 19938.4, "total_tokens": 203075328}
|
|
{"current_steps": 64535, "total_steps": 78105, "loss": 0.1321, "lr": 4.4590512240756334e-07, "epoch": 4.13129761218872, "percentage": 82.63, "elapsed_time": "2:49:45", "remaining_time": "0:35:41", "throughput": 19938.63, "total_tokens": 203091136}
|
|
{"current_steps": 64540, "total_steps": 78105, "loss": 0.1423, "lr": 4.455867364286984e-07, "epoch": 4.131617694129697, "percentage": 82.63, "elapsed_time": "2:49:46", "remaining_time": "0:35:40", "throughput": 19938.83, "total_tokens": 203106432}
|
|
{"current_steps": 64545, "total_steps": 78105, "loss": 0.1326, "lr": 4.452684530365306e-07, "epoch": 4.131937776070674, "percentage": 82.64, "elapsed_time": "2:49:47", "remaining_time": "0:35:40", "throughput": 19939.03, "total_tokens": 203121600}
|
|
{"current_steps": 64550, "total_steps": 78105, "loss": 0.1625, "lr": 4.449502722469537e-07, "epoch": 4.132257858011651, "percentage": 82.65, "elapsed_time": "2:49:48", "remaining_time": "0:35:39", "throughput": 19939.59, "total_tokens": 203150528}
|
|
{"current_steps": 64555, "total_steps": 78105, "loss": 0.1392, "lr": 4.4463219407585577e-07, "epoch": 4.132577939952628, "percentage": 82.65, "elapsed_time": "2:49:48", "remaining_time": "0:35:38", "throughput": 19939.82, "total_tokens": 203166848}
|
|
{"current_steps": 64560, "total_steps": 78105, "loss": 0.1145, "lr": 4.4431421853912e-07, "epoch": 4.132898021893605, "percentage": 82.66, "elapsed_time": "2:49:49", "remaining_time": "0:35:37", "throughput": 19940.03, "total_tokens": 203182336}
|
|
{"current_steps": 64565, "total_steps": 78105, "loss": 0.1261, "lr": 4.439963456526239e-07, "epoch": 4.133218103834581, "percentage": 82.66, "elapsed_time": "2:49:50", "remaining_time": "0:35:37", "throughput": 19940.24, "total_tokens": 203197504}
|
|
{"current_steps": 64570, "total_steps": 78105, "loss": 0.0983, "lr": 4.436785754322423e-07, "epoch": 4.133538185775558, "percentage": 82.67, "elapsed_time": "2:49:50", "remaining_time": "0:35:36", "throughput": 19940.42, "total_tokens": 203212032}
|
|
{"current_steps": 64575, "total_steps": 78105, "loss": 0.1492, "lr": 4.433609078938403e-07, "epoch": 4.133858267716535, "percentage": 82.68, "elapsed_time": "2:49:51", "remaining_time": "0:35:35", "throughput": 19940.65, "total_tokens": 203227520}
|
|
{"current_steps": 64580, "total_steps": 78105, "loss": 0.1942, "lr": 4.4304334305328296e-07, "epoch": 4.134178349657512, "percentage": 82.68, "elapsed_time": "2:49:52", "remaining_time": "0:35:34", "throughput": 19940.84, "total_tokens": 203242432}
|
|
{"current_steps": 64585, "total_steps": 78105, "loss": 0.142, "lr": 4.4272588092642596e-07, "epoch": 4.134498431598489, "percentage": 82.69, "elapsed_time": "2:49:52", "remaining_time": "0:35:33", "throughput": 19941.06, "total_tokens": 203257984}
|
|
{"current_steps": 64590, "total_steps": 78105, "loss": 0.1, "lr": 4.4240852152912267e-07, "epoch": 4.134818513539466, "percentage": 82.7, "elapsed_time": "2:49:53", "remaining_time": "0:35:32", "throughput": 19941.23, "total_tokens": 203272640}
|
|
{"current_steps": 64595, "total_steps": 78105, "loss": 0.0801, "lr": 4.420912648772205e-07, "epoch": 4.135138595480443, "percentage": 82.7, "elapsed_time": "2:49:54", "remaining_time": "0:35:32", "throughput": 19941.42, "total_tokens": 203287680}
|
|
{"current_steps": 64600, "total_steps": 78105, "loss": 0.1625, "lr": 4.417741109865606e-07, "epoch": 4.13545867742142, "percentage": 82.71, "elapsed_time": "2:49:54", "remaining_time": "0:35:31", "throughput": 19941.65, "total_tokens": 203303616}
|
|
{"current_steps": 64605, "total_steps": 78105, "loss": 0.1081, "lr": 4.4145705987298197e-07, "epoch": 4.135778759362397, "percentage": 82.72, "elapsed_time": "2:49:55", "remaining_time": "0:35:30", "throughput": 19941.85, "total_tokens": 203318784}
|
|
{"current_steps": 64610, "total_steps": 78105, "loss": 0.0931, "lr": 4.411401115523142e-07, "epoch": 4.136098841303373, "percentage": 82.72, "elapsed_time": "2:49:56", "remaining_time": "0:35:29", "throughput": 19942.08, "total_tokens": 203334848}
|
|
{"current_steps": 64615, "total_steps": 78105, "loss": 0.1182, "lr": 4.4082326604038525e-07, "epoch": 4.13641892324435, "percentage": 82.73, "elapsed_time": "2:49:56", "remaining_time": "0:35:28", "throughput": 19942.26, "total_tokens": 203349696}
|
|
{"current_steps": 64620, "total_steps": 78105, "loss": 0.1232, "lr": 4.4050652335301676e-07, "epoch": 4.136739005185327, "percentage": 82.73, "elapsed_time": "2:49:57", "remaining_time": "0:35:28", "throughput": 19942.48, "total_tokens": 203365376}
|
|
{"current_steps": 64625, "total_steps": 78105, "loss": 0.1279, "lr": 4.40189883506025e-07, "epoch": 4.137059087126304, "percentage": 82.74, "elapsed_time": "2:49:58", "remaining_time": "0:35:27", "throughput": 19942.72, "total_tokens": 203381376}
|
|
{"current_steps": 64630, "total_steps": 78105, "loss": 0.1339, "lr": 4.3987334651522134e-07, "epoch": 4.137379169067281, "percentage": 82.75, "elapsed_time": "2:49:59", "remaining_time": "0:35:26", "throughput": 19942.99, "total_tokens": 203398528}
|
|
{"current_steps": 64635, "total_steps": 78105, "loss": 0.1897, "lr": 4.395569123964119e-07, "epoch": 4.137699251008258, "percentage": 82.75, "elapsed_time": "2:49:59", "remaining_time": "0:35:25", "throughput": 19943.22, "total_tokens": 203414784}
|
|
{"current_steps": 64640, "total_steps": 78105, "loss": 0.17, "lr": 4.392405811653977e-07, "epoch": 4.138019332949235, "percentage": 82.76, "elapsed_time": "2:50:00", "remaining_time": "0:35:24", "throughput": 19943.48, "total_tokens": 203431168}
|
|
{"current_steps": 64645, "total_steps": 78105, "loss": 0.1441, "lr": 4.389243528379747e-07, "epoch": 4.138339414890212, "percentage": 82.77, "elapsed_time": "2:50:01", "remaining_time": "0:35:23", "throughput": 19943.67, "total_tokens": 203446144}
|
|
{"current_steps": 64650, "total_steps": 78105, "loss": 0.1901, "lr": 4.386082274299333e-07, "epoch": 4.138659496831189, "percentage": 82.77, "elapsed_time": "2:50:01", "remaining_time": "0:35:23", "throughput": 19943.84, "total_tokens": 203460992}
|
|
{"current_steps": 64655, "total_steps": 78105, "loss": 0.1195, "lr": 4.382922049570604e-07, "epoch": 4.138979578772165, "percentage": 82.78, "elapsed_time": "2:50:02", "remaining_time": "0:35:22", "throughput": 19944.06, "total_tokens": 203476608}
|
|
{"current_steps": 64660, "total_steps": 78105, "loss": 0.1279, "lr": 4.3797628543513547e-07, "epoch": 4.139299660713142, "percentage": 82.79, "elapsed_time": "2:50:03", "remaining_time": "0:35:21", "throughput": 19944.29, "total_tokens": 203492416}
|
|
{"current_steps": 64665, "total_steps": 78105, "loss": 0.154, "lr": 4.376604688799344e-07, "epoch": 4.139619742654119, "percentage": 82.79, "elapsed_time": "2:50:03", "remaining_time": "0:35:20", "throughput": 19944.51, "total_tokens": 203508160}
|
|
{"current_steps": 64670, "total_steps": 78105, "loss": 0.1593, "lr": 4.3734475530722686e-07, "epoch": 4.139939824595096, "percentage": 82.8, "elapsed_time": "2:50:04", "remaining_time": "0:35:19", "throughput": 19944.76, "total_tokens": 203524800}
|
|
{"current_steps": 64675, "total_steps": 78105, "loss": 0.1384, "lr": 4.370291447327785e-07, "epoch": 4.140259906536073, "percentage": 82.81, "elapsed_time": "2:50:05", "remaining_time": "0:35:19", "throughput": 19945.04, "total_tokens": 203541952}
|
|
{"current_steps": 64680, "total_steps": 78105, "loss": 0.156, "lr": 4.3671363717234877e-07, "epoch": 4.14057998847705, "percentage": 82.81, "elapsed_time": "2:50:05", "remaining_time": "0:35:18", "throughput": 19945.29, "total_tokens": 203558400}
|
|
{"current_steps": 64685, "total_steps": 78105, "loss": 0.1089, "lr": 4.363982326416924e-07, "epoch": 4.140900070418027, "percentage": 82.82, "elapsed_time": "2:50:06", "remaining_time": "0:35:17", "throughput": 19945.82, "total_tokens": 203586816}
|
|
{"current_steps": 64690, "total_steps": 78105, "loss": 0.1465, "lr": 4.3608293115656066e-07, "epoch": 4.141220152359004, "percentage": 82.82, "elapsed_time": "2:50:07", "remaining_time": "0:35:16", "throughput": 19946.03, "total_tokens": 203602304}
|
|
{"current_steps": 64695, "total_steps": 78105, "loss": 0.1297, "lr": 4.3576773273269533e-07, "epoch": 4.141540234299981, "percentage": 82.83, "elapsed_time": "2:50:08", "remaining_time": "0:35:15", "throughput": 19946.22, "total_tokens": 203617216}
|
|
{"current_steps": 64700, "total_steps": 78105, "loss": 0.1259, "lr": 4.3545263738583784e-07, "epoch": 4.141860316240957, "percentage": 82.84, "elapsed_time": "2:50:08", "remaining_time": "0:35:15", "throughput": 19946.42, "total_tokens": 203632832}
|
|
{"current_steps": 64705, "total_steps": 78105, "loss": 0.1186, "lr": 4.3513764513172183e-07, "epoch": 4.142180398181934, "percentage": 82.84, "elapsed_time": "2:50:09", "remaining_time": "0:35:14", "throughput": 19946.62, "total_tokens": 203648256}
|
|
{"current_steps": 64710, "total_steps": 78105, "loss": 0.1011, "lr": 4.3482275598607596e-07, "epoch": 4.142500480122911, "percentage": 82.85, "elapsed_time": "2:50:10", "remaining_time": "0:35:13", "throughput": 19946.87, "total_tokens": 203664448}
|
|
{"current_steps": 64715, "total_steps": 78105, "loss": 0.083, "lr": 4.3450796996462567e-07, "epoch": 4.142820562063888, "percentage": 82.86, "elapsed_time": "2:50:11", "remaining_time": "0:35:12", "throughput": 19947.13, "total_tokens": 203681472}
|
|
{"current_steps": 64720, "total_steps": 78105, "loss": 0.1433, "lr": 4.3419328708308727e-07, "epoch": 4.143140644004865, "percentage": 82.86, "elapsed_time": "2:50:11", "remaining_time": "0:35:11", "throughput": 19947.4, "total_tokens": 203698240}
|
|
{"current_steps": 64725, "total_steps": 78105, "loss": 0.1355, "lr": 4.338787073571768e-07, "epoch": 4.143460725945842, "percentage": 82.87, "elapsed_time": "2:50:12", "remaining_time": "0:35:11", "throughput": 19947.63, "total_tokens": 203714304}
|
|
{"current_steps": 64730, "total_steps": 78105, "loss": 0.1808, "lr": 4.3356423080260085e-07, "epoch": 4.143780807886819, "percentage": 82.88, "elapsed_time": "2:50:13", "remaining_time": "0:35:10", "throughput": 19947.85, "total_tokens": 203729728}
|
|
{"current_steps": 64735, "total_steps": 78105, "loss": 0.1245, "lr": 4.332498574350638e-07, "epoch": 4.144100889827796, "percentage": 82.88, "elapsed_time": "2:50:13", "remaining_time": "0:35:09", "throughput": 19948.1, "total_tokens": 203745792}
|
|
{"current_steps": 64740, "total_steps": 78105, "loss": 0.131, "lr": 4.329355872702637e-07, "epoch": 4.144420971768773, "percentage": 82.89, "elapsed_time": "2:50:14", "remaining_time": "0:35:08", "throughput": 19948.3, "total_tokens": 203761216}
|
|
{"current_steps": 64745, "total_steps": 78105, "loss": 0.0968, "lr": 4.3262142032389347e-07, "epoch": 4.144741053709749, "percentage": 82.89, "elapsed_time": "2:50:15", "remaining_time": "0:35:07", "throughput": 19948.52, "total_tokens": 203776640}
|
|
{"current_steps": 64750, "total_steps": 78105, "loss": 0.1456, "lr": 4.3230735661164093e-07, "epoch": 4.145061135650726, "percentage": 82.9, "elapsed_time": "2:50:15", "remaining_time": "0:35:07", "throughput": 19948.75, "total_tokens": 203792640}
|
|
{"current_steps": 64755, "total_steps": 78105, "loss": 0.1417, "lr": 4.3199339614918874e-07, "epoch": 4.145381217591703, "percentage": 82.91, "elapsed_time": "2:50:16", "remaining_time": "0:35:06", "throughput": 19948.97, "total_tokens": 203808128}
|
|
{"current_steps": 64760, "total_steps": 78105, "loss": 0.1232, "lr": 4.316795389522138e-07, "epoch": 4.14570129953268, "percentage": 82.91, "elapsed_time": "2:50:17", "remaining_time": "0:35:05", "throughput": 19949.22, "total_tokens": 203824704}
|
|
{"current_steps": 64765, "total_steps": 78105, "loss": 0.1134, "lr": 4.313657850363903e-07, "epoch": 4.146021381473657, "percentage": 82.92, "elapsed_time": "2:50:17", "remaining_time": "0:35:04", "throughput": 19949.37, "total_tokens": 203838784}
|
|
{"current_steps": 64770, "total_steps": 78105, "loss": 0.117, "lr": 4.310521344173835e-07, "epoch": 4.146341463414634, "percentage": 82.93, "elapsed_time": "2:50:18", "remaining_time": "0:35:03", "throughput": 19949.63, "total_tokens": 203855616}
|
|
{"current_steps": 64775, "total_steps": 78105, "loss": 0.1809, "lr": 4.3073858711085656e-07, "epoch": 4.146661545355611, "percentage": 82.93, "elapsed_time": "2:50:19", "remaining_time": "0:35:02", "throughput": 19949.82, "total_tokens": 203870912}
|
|
{"current_steps": 64780, "total_steps": 78105, "loss": 0.1523, "lr": 4.3042514313246656e-07, "epoch": 4.146981627296588, "percentage": 82.94, "elapsed_time": "2:50:19", "remaining_time": "0:35:02", "throughput": 19950.07, "total_tokens": 203887360}
|
|
{"current_steps": 64785, "total_steps": 78105, "loss": 0.153, "lr": 4.3011180249786485e-07, "epoch": 4.147301709237565, "percentage": 82.95, "elapsed_time": "2:50:20", "remaining_time": "0:35:01", "throughput": 19950.26, "total_tokens": 203902272}
|
|
{"current_steps": 64790, "total_steps": 78105, "loss": 0.1738, "lr": 4.2979856522269796e-07, "epoch": 4.147621791178541, "percentage": 82.95, "elapsed_time": "2:50:21", "remaining_time": "0:35:00", "throughput": 19950.45, "total_tokens": 203917632}
|
|
{"current_steps": 64795, "total_steps": 78105, "loss": 0.1056, "lr": 4.2948543132260686e-07, "epoch": 4.147941873119518, "percentage": 82.96, "elapsed_time": "2:50:21", "remaining_time": "0:34:59", "throughput": 19950.64, "total_tokens": 203932416}
|
|
{"current_steps": 64800, "total_steps": 78105, "loss": 0.1178, "lr": 4.291724008132295e-07, "epoch": 4.148261955060495, "percentage": 82.97, "elapsed_time": "2:50:22", "remaining_time": "0:34:58", "throughput": 19950.81, "total_tokens": 203946816}
|
|
{"current_steps": 64805, "total_steps": 78105, "loss": 0.1153, "lr": 4.2885947371019476e-07, "epoch": 4.148582037001472, "percentage": 82.97, "elapsed_time": "2:50:23", "remaining_time": "0:34:58", "throughput": 19951.05, "total_tokens": 203963136}
|
|
{"current_steps": 64810, "total_steps": 78105, "loss": 0.1212, "lr": 4.2854665002913103e-07, "epoch": 4.148902118942449, "percentage": 82.98, "elapsed_time": "2:50:23", "remaining_time": "0:34:57", "throughput": 19951.26, "total_tokens": 203978496}
|
|
{"current_steps": 64815, "total_steps": 78105, "loss": 0.1427, "lr": 4.282339297856564e-07, "epoch": 4.149222200883426, "percentage": 82.98, "elapsed_time": "2:50:24", "remaining_time": "0:34:56", "throughput": 19951.48, "total_tokens": 203994368}
|
|
{"current_steps": 64820, "total_steps": 78105, "loss": 0.1349, "lr": 4.279213129953885e-07, "epoch": 4.149542282824403, "percentage": 82.99, "elapsed_time": "2:50:25", "remaining_time": "0:34:55", "throughput": 19951.7, "total_tokens": 204010112}
|
|
{"current_steps": 64825, "total_steps": 78105, "loss": 0.1063, "lr": 4.276087996739375e-07, "epoch": 4.14986236476538, "percentage": 83.0, "elapsed_time": "2:50:25", "remaining_time": "0:34:54", "throughput": 19951.91, "total_tokens": 204026048}
|
|
{"current_steps": 64830, "total_steps": 78105, "loss": 0.1247, "lr": 4.272963898369073e-07, "epoch": 4.150182446706356, "percentage": 83.0, "elapsed_time": "2:50:26", "remaining_time": "0:34:54", "throughput": 19952.1, "total_tokens": 204040896}
|
|
{"current_steps": 64835, "total_steps": 78105, "loss": 0.1024, "lr": 4.269840834999006e-07, "epoch": 4.150502528647333, "percentage": 83.01, "elapsed_time": "2:50:27", "remaining_time": "0:34:53", "throughput": 19952.31, "total_tokens": 204056704}
|
|
{"current_steps": 64840, "total_steps": 78105, "loss": 0.1437, "lr": 4.2667188067850943e-07, "epoch": 4.15082261058831, "percentage": 83.02, "elapsed_time": "2:50:27", "remaining_time": "0:34:52", "throughput": 19952.54, "total_tokens": 204072832}
|
|
{"current_steps": 64845, "total_steps": 78105, "loss": 0.1359, "lr": 4.2635978138832576e-07, "epoch": 4.151142692529287, "percentage": 83.02, "elapsed_time": "2:50:28", "remaining_time": "0:34:51", "throughput": 19952.79, "total_tokens": 204089152}
|
|
{"current_steps": 64850, "total_steps": 78105, "loss": 0.0895, "lr": 4.2604778564493326e-07, "epoch": 4.151462774470264, "percentage": 83.03, "elapsed_time": "2:50:29", "remaining_time": "0:34:50", "throughput": 19953.04, "total_tokens": 204105600}
|
|
{"current_steps": 64855, "total_steps": 78105, "loss": 0.1332, "lr": 4.2573589346391166e-07, "epoch": 4.151782856411241, "percentage": 83.04, "elapsed_time": "2:50:30", "remaining_time": "0:34:50", "throughput": 19953.34, "total_tokens": 204123392}
|
|
{"current_steps": 64860, "total_steps": 78105, "loss": 0.133, "lr": 4.2542410486083484e-07, "epoch": 4.152102938352218, "percentage": 83.04, "elapsed_time": "2:50:30", "remaining_time": "0:34:49", "throughput": 19953.55, "total_tokens": 204139008}
|
|
{"current_steps": 64865, "total_steps": 78105, "loss": 0.187, "lr": 4.2511241985127236e-07, "epoch": 4.152423020293195, "percentage": 83.05, "elapsed_time": "2:50:31", "remaining_time": "0:34:48", "throughput": 19953.75, "total_tokens": 204154368}
|
|
{"current_steps": 64870, "total_steps": 78105, "loss": 0.0906, "lr": 4.248008384507879e-07, "epoch": 4.152743102234172, "percentage": 83.05, "elapsed_time": "2:50:32", "remaining_time": "0:34:47", "throughput": 19953.96, "total_tokens": 204169792}
|
|
{"current_steps": 64875, "total_steps": 78105, "loss": 0.1672, "lr": 4.2448936067494033e-07, "epoch": 4.153063184175148, "percentage": 83.06, "elapsed_time": "2:50:32", "remaining_time": "0:34:46", "throughput": 19954.14, "total_tokens": 204185088}
|
|
{"current_steps": 64880, "total_steps": 78105, "loss": 0.143, "lr": 4.241779865392823e-07, "epoch": 4.153383266116125, "percentage": 83.07, "elapsed_time": "2:50:33", "remaining_time": "0:34:45", "throughput": 19954.34, "total_tokens": 204200576}
|
|
{"current_steps": 64885, "total_steps": 78105, "loss": 0.1264, "lr": 4.2386671605936377e-07, "epoch": 4.153703348057102, "percentage": 83.07, "elapsed_time": "2:50:34", "remaining_time": "0:34:45", "throughput": 19954.59, "total_tokens": 204216768}
|
|
{"current_steps": 64890, "total_steps": 78105, "loss": 0.1559, "lr": 4.2355554925072733e-07, "epoch": 4.154023429998079, "percentage": 83.08, "elapsed_time": "2:50:34", "remaining_time": "0:34:44", "throughput": 19954.77, "total_tokens": 204231488}
|
|
{"current_steps": 64895, "total_steps": 78105, "loss": 0.1119, "lr": 4.232444861289109e-07, "epoch": 4.154343511939056, "percentage": 83.09, "elapsed_time": "2:50:35", "remaining_time": "0:34:43", "throughput": 19954.96, "total_tokens": 204246592}
|
|
{"current_steps": 64900, "total_steps": 78105, "loss": 0.1392, "lr": 4.2293352670944755e-07, "epoch": 4.154663593880033, "percentage": 83.09, "elapsed_time": "2:50:36", "remaining_time": "0:34:42", "throughput": 19955.2, "total_tokens": 204262656}
|
|
{"current_steps": 64905, "total_steps": 78105, "loss": 0.1221, "lr": 4.226226710078646e-07, "epoch": 4.15498367582101, "percentage": 83.1, "elapsed_time": "2:50:36", "remaining_time": "0:34:41", "throughput": 19955.43, "total_tokens": 204278592}
|
|
{"current_steps": 64910, "total_steps": 78105, "loss": 0.1017, "lr": 4.2231191903968487e-07, "epoch": 4.155303757761987, "percentage": 83.11, "elapsed_time": "2:50:37", "remaining_time": "0:34:41", "throughput": 19955.69, "total_tokens": 204295360}
|
|
{"current_steps": 64915, "total_steps": 78105, "loss": 0.0691, "lr": 4.220012708204252e-07, "epoch": 4.155623839702964, "percentage": 83.11, "elapsed_time": "2:50:38", "remaining_time": "0:34:40", "throughput": 19955.88, "total_tokens": 204310336}
|
|
{"current_steps": 64920, "total_steps": 78105, "loss": 0.0884, "lr": 4.216907263655992e-07, "epoch": 4.15594392164394, "percentage": 83.12, "elapsed_time": "2:50:38", "remaining_time": "0:34:39", "throughput": 19956.24, "total_tokens": 204330432}
|
|
{"current_steps": 64925, "total_steps": 78105, "loss": 0.093, "lr": 4.213802856907115e-07, "epoch": 4.156264003584917, "percentage": 83.13, "elapsed_time": "2:50:39", "remaining_time": "0:34:38", "throughput": 19956.51, "total_tokens": 204347264}
|
|
{"current_steps": 64930, "total_steps": 78105, "loss": 0.1339, "lr": 4.21069948811266e-07, "epoch": 4.156584085525894, "percentage": 83.13, "elapsed_time": "2:50:40", "remaining_time": "0:34:37", "throughput": 19956.68, "total_tokens": 204361920}
|
|
{"current_steps": 64935, "total_steps": 78105, "loss": 0.1068, "lr": 4.207597157427584e-07, "epoch": 4.156904167466871, "percentage": 83.14, "elapsed_time": "2:50:40", "remaining_time": "0:34:37", "throughput": 19956.85, "total_tokens": 204376576}
|
|
{"current_steps": 64940, "total_steps": 78105, "loss": 0.1804, "lr": 4.2044958650068024e-07, "epoch": 4.157224249407848, "percentage": 83.14, "elapsed_time": "2:50:41", "remaining_time": "0:34:36", "throughput": 19957.08, "total_tokens": 204392640}
|
|
{"current_steps": 64945, "total_steps": 78105, "loss": 0.1378, "lr": 4.2013956110051766e-07, "epoch": 4.157544331348825, "percentage": 83.15, "elapsed_time": "2:50:42", "remaining_time": "0:34:35", "throughput": 19957.33, "total_tokens": 204409216}
|
|
{"current_steps": 64950, "total_steps": 78105, "loss": 0.1157, "lr": 4.1982963955775143e-07, "epoch": 4.157864413289802, "percentage": 83.16, "elapsed_time": "2:50:42", "remaining_time": "0:34:34", "throughput": 19957.53, "total_tokens": 204424448}
|
|
{"current_steps": 64955, "total_steps": 78105, "loss": 0.0977, "lr": 4.195198218878588e-07, "epoch": 4.158184495230779, "percentage": 83.16, "elapsed_time": "2:50:43", "remaining_time": "0:34:33", "throughput": 19957.72, "total_tokens": 204439360}
|
|
{"current_steps": 64960, "total_steps": 78105, "loss": 0.1166, "lr": 4.192101081063082e-07, "epoch": 4.158504577171756, "percentage": 83.17, "elapsed_time": "2:50:44", "remaining_time": "0:34:32", "throughput": 19957.9, "total_tokens": 204454208}
|
|
{"current_steps": 64965, "total_steps": 78105, "loss": 0.1392, "lr": 4.1890049822856716e-07, "epoch": 4.1588246591127325, "percentage": 83.18, "elapsed_time": "2:50:44", "remaining_time": "0:34:32", "throughput": 19958.13, "total_tokens": 204470208}
|
|
{"current_steps": 64970, "total_steps": 78105, "loss": 0.1047, "lr": 4.185909922700951e-07, "epoch": 4.1591447410537095, "percentage": 83.18, "elapsed_time": "2:50:45", "remaining_time": "0:34:31", "throughput": 19958.33, "total_tokens": 204485312}
|
|
{"current_steps": 64975, "total_steps": 78105, "loss": 0.127, "lr": 4.182815902463472e-07, "epoch": 4.1594648229946865, "percentage": 83.19, "elapsed_time": "2:50:46", "remaining_time": "0:34:30", "throughput": 19958.57, "total_tokens": 204501440}
|
|
{"current_steps": 64980, "total_steps": 78105, "loss": 0.0809, "lr": 4.179722921727736e-07, "epoch": 4.159784904935663, "percentage": 83.2, "elapsed_time": "2:50:46", "remaining_time": "0:34:29", "throughput": 19958.74, "total_tokens": 204515904}
|
|
{"current_steps": 64985, "total_steps": 78105, "loss": 0.1538, "lr": 4.1766309806481857e-07, "epoch": 4.16010498687664, "percentage": 83.2, "elapsed_time": "2:50:47", "remaining_time": "0:34:28", "throughput": 19958.95, "total_tokens": 204531328}
|
|
{"current_steps": 64990, "total_steps": 78105, "loss": 0.1203, "lr": 4.1735400793792153e-07, "epoch": 4.160425068817617, "percentage": 83.21, "elapsed_time": "2:50:48", "remaining_time": "0:34:28", "throughput": 19959.14, "total_tokens": 204546560}
|
|
{"current_steps": 64995, "total_steps": 78105, "loss": 0.1002, "lr": 4.170450218075181e-07, "epoch": 4.160745150758594, "percentage": 83.21, "elapsed_time": "2:50:48", "remaining_time": "0:34:27", "throughput": 19959.36, "total_tokens": 204562432}
|
|
{"current_steps": 65000, "total_steps": 78105, "loss": 0.1078, "lr": 4.167361396890357e-07, "epoch": 4.161065232699571, "percentage": 83.22, "elapsed_time": "2:50:49", "remaining_time": "0:34:26", "throughput": 19959.55, "total_tokens": 204577856}
|
|
{"current_steps": 65005, "total_steps": 78105, "loss": 0.1839, "lr": 4.1642736159789974e-07, "epoch": 4.161385314640548, "percentage": 83.23, "elapsed_time": "2:50:50", "remaining_time": "0:34:25", "throughput": 19959.79, "total_tokens": 204593984}
|
|
{"current_steps": 65010, "total_steps": 78105, "loss": 0.2009, "lr": 4.1611868754952824e-07, "epoch": 4.1617053965815245, "percentage": 83.23, "elapsed_time": "2:50:50", "remaining_time": "0:34:24", "throughput": 19959.99, "total_tokens": 204609152}
|
|
{"current_steps": 65015, "total_steps": 78105, "loss": 0.0927, "lr": 4.158101175593349e-07, "epoch": 4.1620254785225015, "percentage": 83.24, "elapsed_time": "2:50:51", "remaining_time": "0:34:24", "throughput": 19960.2, "total_tokens": 204624896}
|
|
{"current_steps": 65020, "total_steps": 78105, "loss": 0.1504, "lr": 4.155016516427285e-07, "epoch": 4.1623455604634785, "percentage": 83.25, "elapsed_time": "2:50:52", "remaining_time": "0:34:23", "throughput": 19960.42, "total_tokens": 204640576}
|
|
{"current_steps": 65025, "total_steps": 78105, "loss": 0.1276, "lr": 4.1519328981511094e-07, "epoch": 4.1626656424044555, "percentage": 83.25, "elapsed_time": "2:50:53", "remaining_time": "0:34:22", "throughput": 19960.65, "total_tokens": 204657024}
|
|
{"current_steps": 65030, "total_steps": 78105, "loss": 0.0998, "lr": 4.1488503209188233e-07, "epoch": 4.1629857243454325, "percentage": 83.26, "elapsed_time": "2:50:53", "remaining_time": "0:34:21", "throughput": 19960.86, "total_tokens": 204672576}
|
|
{"current_steps": 65035, "total_steps": 78105, "loss": 0.146, "lr": 4.14576878488433e-07, "epoch": 4.1633058062864094, "percentage": 83.27, "elapsed_time": "2:50:54", "remaining_time": "0:34:20", "throughput": 19961.08, "total_tokens": 204688128}
|
|
{"current_steps": 65040, "total_steps": 78105, "loss": 0.1338, "lr": 4.1426882902015325e-07, "epoch": 4.163625888227386, "percentage": 83.27, "elapsed_time": "2:50:55", "remaining_time": "0:34:20", "throughput": 19961.36, "total_tokens": 204705472}
|
|
{"current_steps": 65045, "total_steps": 78105, "loss": 0.1259, "lr": 4.1396088370242257e-07, "epoch": 4.163945970168363, "percentage": 83.28, "elapsed_time": "2:50:55", "remaining_time": "0:34:19", "throughput": 19961.6, "total_tokens": 204721984}
|
|
{"current_steps": 65050, "total_steps": 78105, "loss": 0.146, "lr": 4.136530425506202e-07, "epoch": 4.16426605210934, "percentage": 83.29, "elapsed_time": "2:50:56", "remaining_time": "0:34:18", "throughput": 19961.87, "total_tokens": 204738816}
|
|
{"current_steps": 65055, "total_steps": 78105, "loss": 0.119, "lr": 4.133453055801176e-07, "epoch": 4.1645861340503165, "percentage": 83.29, "elapsed_time": "2:50:57", "remaining_time": "0:34:17", "throughput": 19962.07, "total_tokens": 204753856}
|
|
{"current_steps": 65060, "total_steps": 78105, "loss": 0.1108, "lr": 4.13037672806281e-07, "epoch": 4.1649062159912935, "percentage": 83.3, "elapsed_time": "2:50:57", "remaining_time": "0:34:16", "throughput": 19962.26, "total_tokens": 204768960}
|
|
{"current_steps": 65065, "total_steps": 78105, "loss": 0.0814, "lr": 4.1273014424447336e-07, "epoch": 4.1652262979322705, "percentage": 83.3, "elapsed_time": "2:50:58", "remaining_time": "0:34:15", "throughput": 19962.45, "total_tokens": 204784384}
|
|
{"current_steps": 65070, "total_steps": 78105, "loss": 0.0996, "lr": 4.124227199100489e-07, "epoch": 4.1655463798732475, "percentage": 83.31, "elapsed_time": "2:50:59", "remaining_time": "0:34:15", "throughput": 19962.66, "total_tokens": 204800576}
|
|
{"current_steps": 65075, "total_steps": 78105, "loss": 0.0984, "lr": 4.121153998183608e-07, "epoch": 4.1658664618142245, "percentage": 83.32, "elapsed_time": "2:50:59", "remaining_time": "0:34:14", "throughput": 19962.9, "total_tokens": 204816896}
|
|
{"current_steps": 65080, "total_steps": 78105, "loss": 0.1195, "lr": 4.1180818398475405e-07, "epoch": 4.1661865437552015, "percentage": 83.32, "elapsed_time": "2:51:00", "remaining_time": "0:34:13", "throughput": 19963.13, "total_tokens": 204832512}
|
|
{"current_steps": 65085, "total_steps": 78105, "loss": 0.1197, "lr": 4.1150107242456994e-07, "epoch": 4.1665066256961785, "percentage": 83.33, "elapsed_time": "2:51:01", "remaining_time": "0:34:12", "throughput": 19963.41, "total_tokens": 204849920}
|
|
{"current_steps": 65090, "total_steps": 78105, "loss": 0.1707, "lr": 4.111940651531432e-07, "epoch": 4.1668267076371555, "percentage": 83.34, "elapsed_time": "2:51:01", "remaining_time": "0:34:11", "throughput": 19963.61, "total_tokens": 204864960}
|
|
{"current_steps": 65095, "total_steps": 78105, "loss": 0.1008, "lr": 4.1088716218580517e-07, "epoch": 4.167146789578132, "percentage": 83.34, "elapsed_time": "2:51:02", "remaining_time": "0:34:11", "throughput": 19963.83, "total_tokens": 204880832}
|
|
{"current_steps": 65100, "total_steps": 78105, "loss": 0.1414, "lr": 4.1058036353787996e-07, "epoch": 4.1674668715191086, "percentage": 83.35, "elapsed_time": "2:51:03", "remaining_time": "0:34:10", "throughput": 19964.04, "total_tokens": 204896448}
|
|
{"current_steps": 65105, "total_steps": 78105, "loss": 0.1297, "lr": 4.1027366922468836e-07, "epoch": 4.1677869534600855, "percentage": 83.36, "elapsed_time": "2:51:03", "remaining_time": "0:34:09", "throughput": 19964.26, "total_tokens": 204911936}
|
|
{"current_steps": 65110, "total_steps": 78105, "loss": 0.1235, "lr": 4.09967079261544e-07, "epoch": 4.1681070354010625, "percentage": 83.36, "elapsed_time": "2:51:04", "remaining_time": "0:34:08", "throughput": 19964.5, "total_tokens": 204928128}
|
|
{"current_steps": 65115, "total_steps": 78105, "loss": 0.1563, "lr": 4.096605936637582e-07, "epoch": 4.1684271173420395, "percentage": 83.37, "elapsed_time": "2:51:05", "remaining_time": "0:34:07", "throughput": 19964.81, "total_tokens": 204945984}
|
|
{"current_steps": 65120, "total_steps": 78105, "loss": 0.1165, "lr": 4.093542124466332e-07, "epoch": 4.1687471992830165, "percentage": 83.37, "elapsed_time": "2:51:06", "remaining_time": "0:34:07", "throughput": 19965.01, "total_tokens": 204961664}
|
|
{"current_steps": 65125, "total_steps": 78105, "loss": 0.1322, "lr": 4.090479356254695e-07, "epoch": 4.1690672812239935, "percentage": 83.38, "elapsed_time": "2:51:06", "remaining_time": "0:34:06", "throughput": 19965.28, "total_tokens": 204978816}
|
|
{"current_steps": 65130, "total_steps": 78105, "loss": 0.1032, "lr": 4.0874176321556076e-07, "epoch": 4.1693873631649705, "percentage": 83.39, "elapsed_time": "2:51:07", "remaining_time": "0:34:05", "throughput": 19965.48, "total_tokens": 204994048}
|
|
{"current_steps": 65135, "total_steps": 78105, "loss": 0.0664, "lr": 4.084356952321952e-07, "epoch": 4.1697074451059475, "percentage": 83.39, "elapsed_time": "2:51:08", "remaining_time": "0:34:04", "throughput": 19965.72, "total_tokens": 205010240}
|
|
{"current_steps": 65140, "total_steps": 78105, "loss": 0.1436, "lr": 4.0812973169065655e-07, "epoch": 4.170027527046924, "percentage": 83.4, "elapsed_time": "2:51:08", "remaining_time": "0:34:03", "throughput": 19965.95, "total_tokens": 205026112}
|
|
{"current_steps": 65145, "total_steps": 78105, "loss": 0.1175, "lr": 4.0782387260622225e-07, "epoch": 4.170347608987901, "percentage": 83.41, "elapsed_time": "2:51:09", "remaining_time": "0:34:03", "throughput": 19966.14, "total_tokens": 205041408}
|
|
{"current_steps": 65150, "total_steps": 78105, "loss": 0.1286, "lr": 4.07518117994167e-07, "epoch": 4.170667690928878, "percentage": 83.41, "elapsed_time": "2:51:10", "remaining_time": "0:34:02", "throughput": 19966.36, "total_tokens": 205057408}
|
|
{"current_steps": 65155, "total_steps": 78105, "loss": 0.1448, "lr": 4.0721246786975673e-07, "epoch": 4.170987772869855, "percentage": 83.42, "elapsed_time": "2:51:10", "remaining_time": "0:34:01", "throughput": 19966.57, "total_tokens": 205073152}
|
|
{"current_steps": 65160, "total_steps": 78105, "loss": 0.1165, "lr": 4.069069222482555e-07, "epoch": 4.1713078548108316, "percentage": 83.43, "elapsed_time": "2:51:11", "remaining_time": "0:34:00", "throughput": 19966.77, "total_tokens": 205088384}
|
|
{"current_steps": 65165, "total_steps": 78105, "loss": 0.144, "lr": 4.066014811449198e-07, "epoch": 4.1716279367518085, "percentage": 83.43, "elapsed_time": "2:51:12", "remaining_time": "0:33:59", "throughput": 19967.02, "total_tokens": 205104832}
|
|
{"current_steps": 65170, "total_steps": 78105, "loss": 0.1505, "lr": 4.0629614457500214e-07, "epoch": 4.1719480186927855, "percentage": 83.44, "elapsed_time": "2:51:12", "remaining_time": "0:33:58", "throughput": 19967.23, "total_tokens": 205120576}
|
|
{"current_steps": 65175, "total_steps": 78105, "loss": 0.1021, "lr": 4.059909125537495e-07, "epoch": 4.1722681006337625, "percentage": 83.45, "elapsed_time": "2:51:13", "remaining_time": "0:33:58", "throughput": 19967.45, "total_tokens": 205136128}
|
|
{"current_steps": 65180, "total_steps": 78105, "loss": 0.1506, "lr": 4.0568578509640245e-07, "epoch": 4.1725881825747395, "percentage": 83.45, "elapsed_time": "2:51:14", "remaining_time": "0:33:57", "throughput": 19967.63, "total_tokens": 205150912}
|
|
{"current_steps": 65185, "total_steps": 78105, "loss": 0.1284, "lr": 4.053807622181999e-07, "epoch": 4.172908264515716, "percentage": 83.46, "elapsed_time": "2:51:14", "remaining_time": "0:33:56", "throughput": 19967.86, "total_tokens": 205167040}
|
|
{"current_steps": 65190, "total_steps": 78105, "loss": 0.1668, "lr": 4.0507584393437023e-07, "epoch": 4.173228346456693, "percentage": 83.46, "elapsed_time": "2:51:15", "remaining_time": "0:33:55", "throughput": 19968.14, "total_tokens": 205184320}
|
|
{"current_steps": 65195, "total_steps": 78105, "loss": 0.136, "lr": 4.047710302601415e-07, "epoch": 4.17354842839767, "percentage": 83.47, "elapsed_time": "2:51:16", "remaining_time": "0:33:54", "throughput": 19968.37, "total_tokens": 205200192}
|
|
{"current_steps": 65200, "total_steps": 78105, "loss": 0.1075, "lr": 4.0446632121073387e-07, "epoch": 4.173868510338647, "percentage": 83.48, "elapsed_time": "2:51:16", "remaining_time": "0:33:54", "throughput": 19968.59, "total_tokens": 205216128}
|
|
{"current_steps": 65205, "total_steps": 78105, "loss": 0.1893, "lr": 4.0416171680136285e-07, "epoch": 4.174188592279624, "percentage": 83.48, "elapsed_time": "2:51:17", "remaining_time": "0:33:53", "throughput": 19968.9, "total_tokens": 205234624}
|
|
{"current_steps": 65210, "total_steps": 78105, "loss": 0.117, "lr": 4.0385721704723906e-07, "epoch": 4.174508674220601, "percentage": 83.49, "elapsed_time": "2:51:18", "remaining_time": "0:33:52", "throughput": 19969.12, "total_tokens": 205249984}
|
|
{"current_steps": 65215, "total_steps": 78105, "loss": 0.1858, "lr": 4.035528219635673e-07, "epoch": 4.174828756161578, "percentage": 83.5, "elapsed_time": "2:51:19", "remaining_time": "0:33:51", "throughput": 19969.32, "total_tokens": 205265152}
|
|
{"current_steps": 65220, "total_steps": 78105, "loss": 0.1583, "lr": 4.032485315655471e-07, "epoch": 4.1751488381025545, "percentage": 83.5, "elapsed_time": "2:51:19", "remaining_time": "0:33:50", "throughput": 19969.61, "total_tokens": 205282368}
|
|
{"current_steps": 65225, "total_steps": 78105, "loss": 0.1332, "lr": 4.029443458683746e-07, "epoch": 4.1754689200435315, "percentage": 83.51, "elapsed_time": "2:51:20", "remaining_time": "0:33:50", "throughput": 19969.81, "total_tokens": 205297664}
|
|
{"current_steps": 65230, "total_steps": 78105, "loss": 0.0907, "lr": 4.026402648872374e-07, "epoch": 4.175789001984508, "percentage": 83.52, "elapsed_time": "2:51:21", "remaining_time": "0:33:49", "throughput": 19970.02, "total_tokens": 205312960}
|
|
{"current_steps": 65235, "total_steps": 78105, "loss": 0.1385, "lr": 4.023362886373211e-07, "epoch": 4.176109083925485, "percentage": 83.52, "elapsed_time": "2:51:21", "remaining_time": "0:33:48", "throughput": 19970.23, "total_tokens": 205328448}
|
|
{"current_steps": 65240, "total_steps": 78105, "loss": 0.1852, "lr": 4.0203241713380414e-07, "epoch": 4.176429165866462, "percentage": 83.53, "elapsed_time": "2:51:22", "remaining_time": "0:33:47", "throughput": 19970.53, "total_tokens": 205346112}
|
|
{"current_steps": 65245, "total_steps": 78105, "loss": 0.1236, "lr": 4.017286503918605e-07, "epoch": 4.176749247807439, "percentage": 83.53, "elapsed_time": "2:51:23", "remaining_time": "0:33:46", "throughput": 19970.74, "total_tokens": 205361664}
|
|
{"current_steps": 65250, "total_steps": 78105, "loss": 0.1978, "lr": 4.0142498842665853e-07, "epoch": 4.177069329748416, "percentage": 83.54, "elapsed_time": "2:51:23", "remaining_time": "0:33:46", "throughput": 19970.93, "total_tokens": 205377152}
|
|
{"current_steps": 65255, "total_steps": 78105, "loss": 0.1269, "lr": 4.0112143125336093e-07, "epoch": 4.177389411689393, "percentage": 83.55, "elapsed_time": "2:51:24", "remaining_time": "0:33:45", "throughput": 19971.13, "total_tokens": 205392320}
|
|
{"current_steps": 65260, "total_steps": 78105, "loss": 0.212, "lr": 4.0081797888712776e-07, "epoch": 4.17770949363037, "percentage": 83.55, "elapsed_time": "2:51:25", "remaining_time": "0:33:44", "throughput": 19971.3, "total_tokens": 205407296}
|
|
{"current_steps": 65265, "total_steps": 78105, "loss": 0.1213, "lr": 4.0051463134310906e-07, "epoch": 4.178029575571347, "percentage": 83.56, "elapsed_time": "2:51:25", "remaining_time": "0:33:43", "throughput": 19971.51, "total_tokens": 205422720}
|
|
{"current_steps": 65270, "total_steps": 78105, "loss": 0.1097, "lr": 4.00211388636455e-07, "epoch": 4.178349657512324, "percentage": 83.57, "elapsed_time": "2:51:26", "remaining_time": "0:33:42", "throughput": 19971.72, "total_tokens": 205438656}
|
|
{"current_steps": 65275, "total_steps": 78105, "loss": 0.1315, "lr": 3.999082507823057e-07, "epoch": 4.1786697394533, "percentage": 83.57, "elapsed_time": "2:51:27", "remaining_time": "0:33:41", "throughput": 19971.91, "total_tokens": 205453760}
|
|
{"current_steps": 65280, "total_steps": 78105, "loss": 0.1704, "lr": 3.9960521779580014e-07, "epoch": 4.178989821394277, "percentage": 83.58, "elapsed_time": "2:51:27", "remaining_time": "0:33:41", "throughput": 19972.12, "total_tokens": 205469056}
|
|
{"current_steps": 65285, "total_steps": 78105, "loss": 0.1219, "lr": 3.9930228969206925e-07, "epoch": 4.179309903335254, "percentage": 83.59, "elapsed_time": "2:51:28", "remaining_time": "0:33:40", "throughput": 19972.33, "total_tokens": 205484736}
|
|
{"current_steps": 65290, "total_steps": 78105, "loss": 0.1858, "lr": 3.9899946648623983e-07, "epoch": 4.179629985276231, "percentage": 83.59, "elapsed_time": "2:51:29", "remaining_time": "0:33:39", "throughput": 19972.5, "total_tokens": 205499520}
|
|
{"current_steps": 65295, "total_steps": 78105, "loss": 0.1224, "lr": 3.986967481934334e-07, "epoch": 4.179950067217208, "percentage": 83.6, "elapsed_time": "2:51:29", "remaining_time": "0:33:38", "throughput": 19972.68, "total_tokens": 205514304}
|
|
{"current_steps": 65300, "total_steps": 78105, "loss": 0.1172, "lr": 3.9839413482876565e-07, "epoch": 4.180270149158185, "percentage": 83.61, "elapsed_time": "2:51:30", "remaining_time": "0:33:37", "throughput": 19972.9, "total_tokens": 205529984}
|
|
{"current_steps": 65305, "total_steps": 78105, "loss": 0.1216, "lr": 3.9809162640734837e-07, "epoch": 4.180590231099162, "percentage": 83.61, "elapsed_time": "2:51:31", "remaining_time": "0:33:37", "throughput": 19973.11, "total_tokens": 205545664}
|
|
{"current_steps": 65310, "total_steps": 78105, "loss": 0.1885, "lr": 3.97789222944287e-07, "epoch": 4.180910313040139, "percentage": 83.62, "elapsed_time": "2:51:31", "remaining_time": "0:33:36", "throughput": 19973.32, "total_tokens": 205561152}
|
|
{"current_steps": 65315, "total_steps": 78105, "loss": 0.1337, "lr": 3.974869244546817e-07, "epoch": 4.181230394981116, "percentage": 83.62, "elapsed_time": "2:51:32", "remaining_time": "0:33:35", "throughput": 19973.52, "total_tokens": 205576512}
|
|
{"current_steps": 65320, "total_steps": 78105, "loss": 0.1512, "lr": 3.971847309536275e-07, "epoch": 4.181550476922092, "percentage": 83.63, "elapsed_time": "2:51:33", "remaining_time": "0:33:34", "throughput": 19973.7, "total_tokens": 205591104}
|
|
{"current_steps": 65325, "total_steps": 78105, "loss": 0.1086, "lr": 3.96882642456215e-07, "epoch": 4.181870558863069, "percentage": 83.64, "elapsed_time": "2:51:33", "remaining_time": "0:33:33", "throughput": 19973.9, "total_tokens": 205606464}
|
|
{"current_steps": 65330, "total_steps": 78105, "loss": 0.1306, "lr": 3.9658065897752867e-07, "epoch": 4.182190640804046, "percentage": 83.64, "elapsed_time": "2:51:34", "remaining_time": "0:33:33", "throughput": 19974.1, "total_tokens": 205621888}
|
|
{"current_steps": 65335, "total_steps": 78105, "loss": 0.1259, "lr": 3.962787805326479e-07, "epoch": 4.182510722745023, "percentage": 83.65, "elapsed_time": "2:51:35", "remaining_time": "0:33:32", "throughput": 19974.32, "total_tokens": 205637440}
|
|
{"current_steps": 65340, "total_steps": 78105, "loss": 0.1316, "lr": 3.959770071366462e-07, "epoch": 4.182830804686, "percentage": 83.66, "elapsed_time": "2:51:35", "remaining_time": "0:33:31", "throughput": 19974.55, "total_tokens": 205653440}
|
|
{"current_steps": 65345, "total_steps": 78105, "loss": 0.1136, "lr": 3.9567533880459446e-07, "epoch": 4.183150886626977, "percentage": 83.66, "elapsed_time": "2:51:36", "remaining_time": "0:33:30", "throughput": 19974.74, "total_tokens": 205668992}
|
|
{"current_steps": 65350, "total_steps": 78105, "loss": 0.1497, "lr": 3.953737755515541e-07, "epoch": 4.183470968567954, "percentage": 83.67, "elapsed_time": "2:51:37", "remaining_time": "0:33:29", "throughput": 19974.92, "total_tokens": 205683904}
|
|
{"current_steps": 65355, "total_steps": 78105, "loss": 0.1018, "lr": 3.950723173925855e-07, "epoch": 4.183791050508931, "percentage": 83.68, "elapsed_time": "2:51:37", "remaining_time": "0:33:28", "throughput": 19975.12, "total_tokens": 205699200}
|
|
{"current_steps": 65360, "total_steps": 78105, "loss": 0.098, "lr": 3.9477096434274107e-07, "epoch": 4.184111132449907, "percentage": 83.68, "elapsed_time": "2:51:38", "remaining_time": "0:33:28", "throughput": 19975.39, "total_tokens": 205716096}
|
|
{"current_steps": 65365, "total_steps": 78105, "loss": 0.1628, "lr": 3.944697164170691e-07, "epoch": 4.184431214390884, "percentage": 83.69, "elapsed_time": "2:51:39", "remaining_time": "0:33:27", "throughput": 19975.62, "total_tokens": 205732416}
|
|
{"current_steps": 65370, "total_steps": 78105, "loss": 0.1253, "lr": 3.941685736306122e-07, "epoch": 4.184751296331861, "percentage": 83.7, "elapsed_time": "2:51:39", "remaining_time": "0:33:26", "throughput": 19975.81, "total_tokens": 205747520}
|
|
{"current_steps": 65375, "total_steps": 78105, "loss": 0.1618, "lr": 3.93867535998407e-07, "epoch": 4.185071378272838, "percentage": 83.7, "elapsed_time": "2:51:40", "remaining_time": "0:33:25", "throughput": 19976.06, "total_tokens": 205764160}
|
|
{"current_steps": 65380, "total_steps": 78105, "loss": 0.1273, "lr": 3.935666035354882e-07, "epoch": 4.185391460213815, "percentage": 83.71, "elapsed_time": "2:51:41", "remaining_time": "0:33:24", "throughput": 19976.31, "total_tokens": 205780416}
|
|
{"current_steps": 65385, "total_steps": 78105, "loss": 0.1622, "lr": 3.932657762568798e-07, "epoch": 4.185711542154792, "percentage": 83.71, "elapsed_time": "2:51:41", "remaining_time": "0:33:24", "throughput": 19976.52, "total_tokens": 205796288}
|
|
{"current_steps": 65390, "total_steps": 78105, "loss": 0.1556, "lr": 3.9296505417760535e-07, "epoch": 4.186031624095769, "percentage": 83.72, "elapsed_time": "2:51:42", "remaining_time": "0:33:23", "throughput": 19976.75, "total_tokens": 205812672}
|
|
{"current_steps": 65395, "total_steps": 78105, "loss": 0.1712, "lr": 3.926644373126812e-07, "epoch": 4.186351706036746, "percentage": 83.73, "elapsed_time": "2:51:43", "remaining_time": "0:33:22", "throughput": 19977.03, "total_tokens": 205829952}
|
|
{"current_steps": 65400, "total_steps": 78105, "loss": 0.1052, "lr": 3.9236392567711835e-07, "epoch": 4.186671787977723, "percentage": 83.73, "elapsed_time": "2:51:44", "remaining_time": "0:33:21", "throughput": 19977.25, "total_tokens": 205845760}
|
|
{"current_steps": 65405, "total_steps": 78105, "loss": 0.1405, "lr": 3.9206351928592263e-07, "epoch": 4.186991869918699, "percentage": 83.74, "elapsed_time": "2:51:44", "remaining_time": "0:33:20", "throughput": 19977.5, "total_tokens": 205862784}
|
|
{"current_steps": 65410, "total_steps": 78105, "loss": 0.1094, "lr": 3.917632181540945e-07, "epoch": 4.187311951859676, "percentage": 83.75, "elapsed_time": "2:51:45", "remaining_time": "0:33:20", "throughput": 19977.72, "total_tokens": 205878976}
|
|
{"current_steps": 65415, "total_steps": 78105, "loss": 0.1794, "lr": 3.9146302229663093e-07, "epoch": 4.187632033800653, "percentage": 83.75, "elapsed_time": "2:51:46", "remaining_time": "0:33:19", "throughput": 19977.93, "total_tokens": 205894528}
|
|
{"current_steps": 65420, "total_steps": 78105, "loss": 0.1093, "lr": 3.911629317285198e-07, "epoch": 4.18795211574163, "percentage": 83.76, "elapsed_time": "2:51:46", "remaining_time": "0:33:18", "throughput": 19978.16, "total_tokens": 205910592}
|
|
{"current_steps": 65425, "total_steps": 78105, "loss": 0.1483, "lr": 3.9086294646474817e-07, "epoch": 4.188272197682607, "percentage": 83.77, "elapsed_time": "2:51:47", "remaining_time": "0:33:17", "throughput": 19978.38, "total_tokens": 205926656}
|
|
{"current_steps": 65430, "total_steps": 78105, "loss": 0.1685, "lr": 3.9056306652029473e-07, "epoch": 4.188592279623584, "percentage": 83.77, "elapsed_time": "2:51:48", "remaining_time": "0:33:16", "throughput": 19978.57, "total_tokens": 205941952}
|
|
{"current_steps": 65435, "total_steps": 78105, "loss": 0.1527, "lr": 3.90263291910134e-07, "epoch": 4.188912361564561, "percentage": 83.78, "elapsed_time": "2:51:48", "remaining_time": "0:33:16", "throughput": 19978.81, "total_tokens": 205958336}
|
|
{"current_steps": 65440, "total_steps": 78105, "loss": 0.1384, "lr": 3.899636226492354e-07, "epoch": 4.189232443505538, "percentage": 83.78, "elapsed_time": "2:51:49", "remaining_time": "0:33:15", "throughput": 19979.02, "total_tokens": 205973824}
|
|
{"current_steps": 65445, "total_steps": 78105, "loss": 0.111, "lr": 3.896640587525627e-07, "epoch": 4.189552525446515, "percentage": 83.79, "elapsed_time": "2:51:50", "remaining_time": "0:33:14", "throughput": 19979.21, "total_tokens": 205989056}
|
|
{"current_steps": 65450, "total_steps": 78105, "loss": 0.0883, "lr": 3.893646002350737e-07, "epoch": 4.189872607387491, "percentage": 83.8, "elapsed_time": "2:51:50", "remaining_time": "0:33:13", "throughput": 19979.48, "total_tokens": 206005824}
|
|
{"current_steps": 65455, "total_steps": 78105, "loss": 0.1226, "lr": 3.890652471117243e-07, "epoch": 4.190192689328468, "percentage": 83.8, "elapsed_time": "2:51:51", "remaining_time": "0:33:12", "throughput": 19979.68, "total_tokens": 206021376}
|
|
{"current_steps": 65460, "total_steps": 78105, "loss": 0.1542, "lr": 3.887659993974596e-07, "epoch": 4.190512771269445, "percentage": 83.81, "elapsed_time": "2:51:52", "remaining_time": "0:33:12", "throughput": 19979.86, "total_tokens": 206036416}
|
|
{"current_steps": 65465, "total_steps": 78105, "loss": 0.0965, "lr": 3.88466857107225e-07, "epoch": 4.190832853210422, "percentage": 83.82, "elapsed_time": "2:51:52", "remaining_time": "0:33:11", "throughput": 19980.19, "total_tokens": 206055040}
|
|
{"current_steps": 65470, "total_steps": 78105, "loss": 0.1405, "lr": 3.881678202559558e-07, "epoch": 4.191152935151399, "percentage": 83.82, "elapsed_time": "2:51:53", "remaining_time": "0:33:10", "throughput": 19980.42, "total_tokens": 206070848}
|
|
{"current_steps": 65475, "total_steps": 78105, "loss": 0.1526, "lr": 3.878688888585863e-07, "epoch": 4.191473017092376, "percentage": 83.83, "elapsed_time": "2:51:54", "remaining_time": "0:33:09", "throughput": 19980.61, "total_tokens": 206086272}
|
|
{"current_steps": 65480, "total_steps": 78105, "loss": 0.1044, "lr": 3.875700629300425e-07, "epoch": 4.191793099033353, "percentage": 83.84, "elapsed_time": "2:51:55", "remaining_time": "0:33:08", "throughput": 19980.89, "total_tokens": 206103744}
|
|
{"current_steps": 65485, "total_steps": 78105, "loss": 0.1167, "lr": 3.8727134248524617e-07, "epoch": 4.19211318097433, "percentage": 83.84, "elapsed_time": "2:51:55", "remaining_time": "0:33:08", "throughput": 19981.17, "total_tokens": 206121152}
|
|
{"current_steps": 65490, "total_steps": 78105, "loss": 0.1162, "lr": 3.8697272753911526e-07, "epoch": 4.192433262915307, "percentage": 83.85, "elapsed_time": "2:51:56", "remaining_time": "0:33:07", "throughput": 19981.4, "total_tokens": 206137472}
|
|
{"current_steps": 65495, "total_steps": 78105, "loss": 0.1418, "lr": 3.8667421810655887e-07, "epoch": 4.192753344856283, "percentage": 83.86, "elapsed_time": "2:51:57", "remaining_time": "0:33:06", "throughput": 19981.62, "total_tokens": 206153280}
|
|
{"current_steps": 65500, "total_steps": 78105, "loss": 0.1267, "lr": 3.863758142024854e-07, "epoch": 4.19307342679726, "percentage": 83.86, "elapsed_time": "2:51:57", "remaining_time": "0:33:05", "throughput": 19981.88, "total_tokens": 206169984}
|
|
{"current_steps": 65505, "total_steps": 78105, "loss": 0.1162, "lr": 3.860775158417929e-07, "epoch": 4.193393508738237, "percentage": 83.87, "elapsed_time": "2:51:58", "remaining_time": "0:33:04", "throughput": 19982.1, "total_tokens": 206185472}
|
|
{"current_steps": 65510, "total_steps": 78105, "loss": 0.1482, "lr": 3.8577932303937886e-07, "epoch": 4.193713590679214, "percentage": 83.87, "elapsed_time": "2:51:59", "remaining_time": "0:33:03", "throughput": 19982.3, "total_tokens": 206200704}
|
|
{"current_steps": 65515, "total_steps": 78105, "loss": 0.1442, "lr": 3.8548123581013287e-07, "epoch": 4.194033672620191, "percentage": 83.88, "elapsed_time": "2:51:59", "remaining_time": "0:33:03", "throughput": 19982.49, "total_tokens": 206215680}
|
|
{"current_steps": 65520, "total_steps": 78105, "loss": 0.1025, "lr": 3.851832541689399e-07, "epoch": 4.194353754561168, "percentage": 83.89, "elapsed_time": "2:52:00", "remaining_time": "0:33:02", "throughput": 19982.69, "total_tokens": 206230976}
|
|
{"current_steps": 65525, "total_steps": 78105, "loss": 0.1415, "lr": 3.848853781306794e-07, "epoch": 4.194673836502145, "percentage": 83.89, "elapsed_time": "2:52:01", "remaining_time": "0:33:01", "throughput": 19982.89, "total_tokens": 206246720}
|
|
{"current_steps": 65530, "total_steps": 78105, "loss": 0.1326, "lr": 3.845876077102256e-07, "epoch": 4.194993918443122, "percentage": 83.9, "elapsed_time": "2:52:01", "remaining_time": "0:33:00", "throughput": 19983.16, "total_tokens": 206263808}
|
|
{"current_steps": 65535, "total_steps": 78105, "loss": 0.1183, "lr": 3.842899429224481e-07, "epoch": 4.195314000384099, "percentage": 83.91, "elapsed_time": "2:52:02", "remaining_time": "0:32:59", "throughput": 19983.4, "total_tokens": 206280256}
|
|
{"current_steps": 65540, "total_steps": 78105, "loss": 0.1151, "lr": 3.8399238378221073e-07, "epoch": 4.195634082325075, "percentage": 83.91, "elapsed_time": "2:52:03", "remaining_time": "0:32:59", "throughput": 19983.61, "total_tokens": 206295744}
|
|
{"current_steps": 65545, "total_steps": 78105, "loss": 0.0828, "lr": 3.8369493030437187e-07, "epoch": 4.195954164266052, "percentage": 83.92, "elapsed_time": "2:52:03", "remaining_time": "0:32:58", "throughput": 19983.89, "total_tokens": 206313664}
|
|
{"current_steps": 65550, "total_steps": 78105, "loss": 0.1232, "lr": 3.833975825037847e-07, "epoch": 4.196274246207029, "percentage": 83.93, "elapsed_time": "2:52:04", "remaining_time": "0:32:57", "throughput": 19984.08, "total_tokens": 206329216}
|
|
{"current_steps": 65555, "total_steps": 78105, "loss": 0.1237, "lr": 3.8310034039529736e-07, "epoch": 4.196594328148006, "percentage": 83.93, "elapsed_time": "2:52:05", "remaining_time": "0:32:56", "throughput": 19984.28, "total_tokens": 206344576}
|
|
{"current_steps": 65560, "total_steps": 78105, "loss": 0.0961, "lr": 3.828032039937524e-07, "epoch": 4.196914410088983, "percentage": 83.94, "elapsed_time": "2:52:06", "remaining_time": "0:32:55", "throughput": 19984.49, "total_tokens": 206360192}
|
|
{"current_steps": 65565, "total_steps": 78105, "loss": 0.1549, "lr": 3.825061733139873e-07, "epoch": 4.19723449202996, "percentage": 83.94, "elapsed_time": "2:52:06", "remaining_time": "0:32:55", "throughput": 19984.7, "total_tokens": 206375808}
|
|
{"current_steps": 65570, "total_steps": 78105, "loss": 0.1523, "lr": 3.822092483708337e-07, "epoch": 4.197554573970937, "percentage": 83.95, "elapsed_time": "2:52:07", "remaining_time": "0:32:54", "throughput": 19984.88, "total_tokens": 206390720}
|
|
{"current_steps": 65575, "total_steps": 78105, "loss": 0.13, "lr": 3.819124291791204e-07, "epoch": 4.197874655911914, "percentage": 83.96, "elapsed_time": "2:52:08", "remaining_time": "0:32:53", "throughput": 19985.18, "total_tokens": 206408320}
|
|
{"current_steps": 65580, "total_steps": 78105, "loss": 0.1249, "lr": 3.8161571575366654e-07, "epoch": 4.198194737852891, "percentage": 83.96, "elapsed_time": "2:52:08", "remaining_time": "0:32:52", "throughput": 19985.42, "total_tokens": 206424000}
|
|
{"current_steps": 65585, "total_steps": 78105, "loss": 0.1413, "lr": 3.813191081092898e-07, "epoch": 4.198514819793867, "percentage": 83.97, "elapsed_time": "2:52:09", "remaining_time": "0:32:51", "throughput": 19985.63, "total_tokens": 206439936}
|
|
{"current_steps": 65590, "total_steps": 78105, "loss": 0.1154, "lr": 3.810226062608013e-07, "epoch": 4.198834901734844, "percentage": 83.98, "elapsed_time": "2:52:10", "remaining_time": "0:32:51", "throughput": 19985.86, "total_tokens": 206455872}
|
|
{"current_steps": 65595, "total_steps": 78105, "loss": 0.1157, "lr": 3.8072621022300584e-07, "epoch": 4.199154983675821, "percentage": 83.98, "elapsed_time": "2:52:10", "remaining_time": "0:32:50", "throughput": 19986.03, "total_tokens": 206470656}
|
|
{"current_steps": 65600, "total_steps": 78105, "loss": 0.1487, "lr": 3.804299200107056e-07, "epoch": 4.199475065616798, "percentage": 83.99, "elapsed_time": "2:52:11", "remaining_time": "0:32:49", "throughput": 19986.26, "total_tokens": 206486656}
|
|
{"current_steps": 65605, "total_steps": 78105, "loss": 0.1272, "lr": 3.801337356386936e-07, "epoch": 4.199795147557775, "percentage": 84.0, "elapsed_time": "2:52:12", "remaining_time": "0:32:48", "throughput": 19986.48, "total_tokens": 206502400}
|
|
{"current_steps": 65610, "total_steps": 78105, "loss": 0.1387, "lr": 3.798376571217621e-07, "epoch": 4.200115229498752, "percentage": 84.0, "elapsed_time": "2:52:12", "remaining_time": "0:32:47", "throughput": 19986.74, "total_tokens": 206519232}
|
|
{"current_steps": 65615, "total_steps": 78105, "loss": 0.0857, "lr": 3.795416844746935e-07, "epoch": 4.200435311439729, "percentage": 84.01, "elapsed_time": "2:52:13", "remaining_time": "0:32:47", "throughput": 19986.92, "total_tokens": 206534016}
|
|
{"current_steps": 65620, "total_steps": 78105, "loss": 0.093, "lr": 3.792458177122685e-07, "epoch": 4.200755393380706, "percentage": 84.02, "elapsed_time": "2:52:14", "remaining_time": "0:32:46", "throughput": 19987.08, "total_tokens": 206548352}
|
|
{"current_steps": 65625, "total_steps": 78105, "loss": 0.1267, "lr": 3.7895005684926073e-07, "epoch": 4.201075475321682, "percentage": 84.02, "elapsed_time": "2:52:14", "remaining_time": "0:32:45", "throughput": 19987.35, "total_tokens": 206565312}
|
|
{"current_steps": 65630, "total_steps": 78105, "loss": 0.162, "lr": 3.78654401900439e-07, "epoch": 4.201395557262659, "percentage": 84.03, "elapsed_time": "2:52:15", "remaining_time": "0:32:44", "throughput": 19987.52, "total_tokens": 206579584}
|
|
{"current_steps": 65635, "total_steps": 78105, "loss": 0.1325, "lr": 3.7835885288056653e-07, "epoch": 4.201715639203636, "percentage": 84.03, "elapsed_time": "2:52:16", "remaining_time": "0:32:43", "throughput": 19987.68, "total_tokens": 206594112}
|
|
{"current_steps": 65640, "total_steps": 78105, "loss": 0.107, "lr": 3.78063409804402e-07, "epoch": 4.202035721144613, "percentage": 84.04, "elapsed_time": "2:52:16", "remaining_time": "0:32:42", "throughput": 19987.88, "total_tokens": 206609728}
|
|
{"current_steps": 65645, "total_steps": 78105, "loss": 0.1051, "lr": 3.7776807268669805e-07, "epoch": 4.20235580308559, "percentage": 84.05, "elapsed_time": "2:52:17", "remaining_time": "0:32:42", "throughput": 19988.08, "total_tokens": 206624896}
|
|
{"current_steps": 65650, "total_steps": 78105, "loss": 0.1849, "lr": 3.774728415422016e-07, "epoch": 4.202675885026567, "percentage": 84.05, "elapsed_time": "2:52:18", "remaining_time": "0:32:41", "throughput": 19988.29, "total_tokens": 206640064}
|
|
{"current_steps": 65655, "total_steps": 78105, "loss": 0.1867, "lr": 3.77177716385656e-07, "epoch": 4.202995966967544, "percentage": 84.06, "elapsed_time": "2:52:18", "remaining_time": "0:32:40", "throughput": 19988.47, "total_tokens": 206655296}
|
|
{"current_steps": 65660, "total_steps": 78105, "loss": 0.12, "lr": 3.768826972317982e-07, "epoch": 4.203316048908521, "percentage": 84.07, "elapsed_time": "2:52:19", "remaining_time": "0:32:39", "throughput": 19988.73, "total_tokens": 206672192}
|
|
{"current_steps": 65665, "total_steps": 78105, "loss": 0.1247, "lr": 3.765877840953594e-07, "epoch": 4.203636130849498, "percentage": 84.07, "elapsed_time": "2:52:20", "remaining_time": "0:32:38", "throughput": 19988.99, "total_tokens": 206688832}
|
|
{"current_steps": 65670, "total_steps": 78105, "loss": 0.1091, "lr": 3.762929769910662e-07, "epoch": 4.203956212790474, "percentage": 84.08, "elapsed_time": "2:52:20", "remaining_time": "0:32:38", "throughput": 19989.36, "total_tokens": 206708096}
|
|
{"current_steps": 65675, "total_steps": 78105, "loss": 0.1137, "lr": 3.7599827593363976e-07, "epoch": 4.204276294731451, "percentage": 84.09, "elapsed_time": "2:52:21", "remaining_time": "0:32:37", "throughput": 19989.57, "total_tokens": 206724160}
|
|
{"current_steps": 65680, "total_steps": 78105, "loss": 0.1201, "lr": 3.7570368093779535e-07, "epoch": 4.204596376672428, "percentage": 84.09, "elapsed_time": "2:52:22", "remaining_time": "0:32:36", "throughput": 19989.78, "total_tokens": 206739648}
|
|
{"current_steps": 65685, "total_steps": 78105, "loss": 0.1037, "lr": 3.754091920182451e-07, "epoch": 4.204916458613405, "percentage": 84.1, "elapsed_time": "2:52:22", "remaining_time": "0:32:35", "throughput": 19990.03, "total_tokens": 206756480}
|
|
{"current_steps": 65690, "total_steps": 78105, "loss": 0.1038, "lr": 3.7511480918969224e-07, "epoch": 4.205236540554382, "percentage": 84.1, "elapsed_time": "2:52:23", "remaining_time": "0:32:34", "throughput": 19990.23, "total_tokens": 206772096}
|
|
{"current_steps": 65695, "total_steps": 78105, "loss": 0.1128, "lr": 3.748205324668386e-07, "epoch": 4.205556622495359, "percentage": 84.11, "elapsed_time": "2:52:24", "remaining_time": "0:32:34", "throughput": 19990.43, "total_tokens": 206787520}
|
|
{"current_steps": 65700, "total_steps": 78105, "loss": 0.1101, "lr": 3.7452636186437683e-07, "epoch": 4.205876704436336, "percentage": 84.12, "elapsed_time": "2:52:24", "remaining_time": "0:32:33", "throughput": 19990.59, "total_tokens": 206801856}
|
|
{"current_steps": 65705, "total_steps": 78105, "loss": 0.1327, "lr": 3.7423229739699805e-07, "epoch": 4.206196786377313, "percentage": 84.12, "elapsed_time": "2:52:25", "remaining_time": "0:32:32", "throughput": 19990.82, "total_tokens": 206817856}
|
|
{"current_steps": 65710, "total_steps": 78105, "loss": 0.0846, "lr": 3.7393833907938576e-07, "epoch": 4.20651686831829, "percentage": 84.13, "elapsed_time": "2:52:26", "remaining_time": "0:32:31", "throughput": 19991.06, "total_tokens": 206834432}
|
|
{"current_steps": 65715, "total_steps": 78105, "loss": 0.1401, "lr": 3.7364448692621763e-07, "epoch": 4.206836950259266, "percentage": 84.14, "elapsed_time": "2:52:27", "remaining_time": "0:32:30", "throughput": 19991.27, "total_tokens": 206850048}
|
|
{"current_steps": 65720, "total_steps": 78105, "loss": 0.096, "lr": 3.7335074095216946e-07, "epoch": 4.207157032200243, "percentage": 84.14, "elapsed_time": "2:52:27", "remaining_time": "0:32:30", "throughput": 19991.53, "total_tokens": 206866560}
|
|
{"current_steps": 65725, "total_steps": 78105, "loss": 0.1242, "lr": 3.730571011719067e-07, "epoch": 4.20747711414122, "percentage": 84.15, "elapsed_time": "2:52:28", "remaining_time": "0:32:29", "throughput": 19991.76, "total_tokens": 206882880}
|
|
{"current_steps": 65730, "total_steps": 78105, "loss": 0.1136, "lr": 3.7276356760009455e-07, "epoch": 4.207797196082197, "percentage": 84.16, "elapsed_time": "2:52:29", "remaining_time": "0:32:28", "throughput": 19991.95, "total_tokens": 206897920}
|
|
{"current_steps": 65735, "total_steps": 78105, "loss": 0.091, "lr": 3.724701402513886e-07, "epoch": 4.208117278023174, "percentage": 84.16, "elapsed_time": "2:52:29", "remaining_time": "0:32:27", "throughput": 19992.15, "total_tokens": 206913408}
|
|
{"current_steps": 65740, "total_steps": 78105, "loss": 0.143, "lr": 3.7217681914044234e-07, "epoch": 4.208437359964151, "percentage": 84.17, "elapsed_time": "2:52:30", "remaining_time": "0:32:26", "throughput": 19992.41, "total_tokens": 206930432}
|
|
{"current_steps": 65745, "total_steps": 78105, "loss": 0.1638, "lr": 3.718836042819021e-07, "epoch": 4.208757441905128, "percentage": 84.18, "elapsed_time": "2:52:31", "remaining_time": "0:32:26", "throughput": 19992.69, "total_tokens": 206947456}
|
|
{"current_steps": 65750, "total_steps": 78105, "loss": 0.1498, "lr": 3.715904956904101e-07, "epoch": 4.209077523846105, "percentage": 84.18, "elapsed_time": "2:52:31", "remaining_time": "0:32:25", "throughput": 19992.89, "total_tokens": 206963008}
|
|
{"current_steps": 65755, "total_steps": 78105, "loss": 0.1051, "lr": 3.7129749338060214e-07, "epoch": 4.209397605787082, "percentage": 84.19, "elapsed_time": "2:52:32", "remaining_time": "0:32:24", "throughput": 19993.13, "total_tokens": 206979008}
|
|
{"current_steps": 65760, "total_steps": 78105, "loss": 0.1227, "lr": 3.7100459736710866e-07, "epoch": 4.209717687728058, "percentage": 84.19, "elapsed_time": "2:52:33", "remaining_time": "0:32:23", "throughput": 19993.35, "total_tokens": 206995072}
|
|
{"current_steps": 65765, "total_steps": 78105, "loss": 0.1134, "lr": 3.707118076645566e-07, "epoch": 4.210037769669035, "percentage": 84.2, "elapsed_time": "2:52:33", "remaining_time": "0:32:22", "throughput": 19993.55, "total_tokens": 207010624}
|
|
{"current_steps": 65770, "total_steps": 78105, "loss": 0.1118, "lr": 3.70419124287566e-07, "epoch": 4.210357851610012, "percentage": 84.21, "elapsed_time": "2:52:34", "remaining_time": "0:32:21", "throughput": 19993.78, "total_tokens": 207026688}
|
|
{"current_steps": 65775, "total_steps": 78105, "loss": 0.1667, "lr": 3.7012654725075147e-07, "epoch": 4.210677933550989, "percentage": 84.21, "elapsed_time": "2:52:35", "remaining_time": "0:32:21", "throughput": 19994.03, "total_tokens": 207043200}
|
|
{"current_steps": 65780, "total_steps": 78105, "loss": 0.1023, "lr": 3.698340765687233e-07, "epoch": 4.210998015491966, "percentage": 84.22, "elapsed_time": "2:52:35", "remaining_time": "0:32:20", "throughput": 19994.27, "total_tokens": 207059328}
|
|
{"current_steps": 65785, "total_steps": 78105, "loss": 0.085, "lr": 3.695417122560857e-07, "epoch": 4.211318097432943, "percentage": 84.23, "elapsed_time": "2:52:36", "remaining_time": "0:32:19", "throughput": 19994.45, "total_tokens": 207074560}
|
|
{"current_steps": 65790, "total_steps": 78105, "loss": 0.1445, "lr": 3.6924945432743777e-07, "epoch": 4.21163817937392, "percentage": 84.23, "elapsed_time": "2:52:37", "remaining_time": "0:32:18", "throughput": 19994.63, "total_tokens": 207089408}
|
|
{"current_steps": 65795, "total_steps": 78105, "loss": 0.1427, "lr": 3.6895730279737336e-07, "epoch": 4.211958261314897, "percentage": 84.24, "elapsed_time": "2:52:38", "remaining_time": "0:32:18", "throughput": 19994.02, "total_tokens": 207105728}
|
|
{"current_steps": 65800, "total_steps": 78105, "loss": 0.1415, "lr": 3.6866525768048057e-07, "epoch": 4.212278343255874, "percentage": 84.25, "elapsed_time": "2:52:39", "remaining_time": "0:32:17", "throughput": 19993.58, "total_tokens": 207121408}
|
|
{"current_steps": 65805, "total_steps": 78105, "loss": 0.1295, "lr": 3.6837331899134436e-07, "epoch": 4.21259842519685, "percentage": 84.25, "elapsed_time": "2:52:40", "remaining_time": "0:32:16", "throughput": 19993.84, "total_tokens": 207138560}
|
|
{"current_steps": 65810, "total_steps": 78105, "loss": 0.1691, "lr": 3.680814867445401e-07, "epoch": 4.212918507137827, "percentage": 84.26, "elapsed_time": "2:52:40", "remaining_time": "0:32:15", "throughput": 19994.04, "total_tokens": 207154048}
|
|
{"current_steps": 65815, "total_steps": 78105, "loss": 0.1338, "lr": 3.6778976095464215e-07, "epoch": 4.213238589078804, "percentage": 84.26, "elapsed_time": "2:52:41", "remaining_time": "0:32:14", "throughput": 19994.21, "total_tokens": 207169088}
|
|
{"current_steps": 65820, "total_steps": 78105, "loss": 0.1106, "lr": 3.674981416362172e-07, "epoch": 4.213558671019781, "percentage": 84.27, "elapsed_time": "2:52:42", "remaining_time": "0:32:14", "throughput": 19994.41, "total_tokens": 207184384}
|
|
{"current_steps": 65825, "total_steps": 78105, "loss": 0.1202, "lr": 3.6720662880382703e-07, "epoch": 4.213878752960758, "percentage": 84.28, "elapsed_time": "2:52:42", "remaining_time": "0:32:13", "throughput": 19994.6, "total_tokens": 207199552}
|
|
{"current_steps": 65830, "total_steps": 78105, "loss": 0.1289, "lr": 3.669152224720293e-07, "epoch": 4.214198834901735, "percentage": 84.28, "elapsed_time": "2:52:43", "remaining_time": "0:32:12", "throughput": 19994.8, "total_tokens": 207215232}
|
|
{"current_steps": 65835, "total_steps": 78105, "loss": 0.1326, "lr": 3.6662392265537334e-07, "epoch": 4.214518916842712, "percentage": 84.29, "elapsed_time": "2:52:44", "remaining_time": "0:32:11", "throughput": 19994.99, "total_tokens": 207230464}
|
|
{"current_steps": 65840, "total_steps": 78105, "loss": 0.1068, "lr": 3.663327293684077e-07, "epoch": 4.214838998783689, "percentage": 84.3, "elapsed_time": "2:52:44", "remaining_time": "0:32:10", "throughput": 19995.19, "total_tokens": 207246208}
|
|
{"current_steps": 65845, "total_steps": 78105, "loss": 0.1396, "lr": 3.660416426256705e-07, "epoch": 4.215159080724666, "percentage": 84.3, "elapsed_time": "2:52:45", "remaining_time": "0:32:09", "throughput": 19995.4, "total_tokens": 207261824}
|
|
{"current_steps": 65850, "total_steps": 78105, "loss": 0.1417, "lr": 3.657506624416987e-07, "epoch": 4.215479162665642, "percentage": 84.31, "elapsed_time": "2:52:46", "remaining_time": "0:32:09", "throughput": 19995.62, "total_tokens": 207277952}
|
|
{"current_steps": 65855, "total_steps": 78105, "loss": 0.1551, "lr": 3.6545978883102214e-07, "epoch": 4.215799244606619, "percentage": 84.32, "elapsed_time": "2:52:46", "remaining_time": "0:32:08", "throughput": 19995.86, "total_tokens": 207294464}
|
|
{"current_steps": 65860, "total_steps": 78105, "loss": 0.126, "lr": 3.6516902180816523e-07, "epoch": 4.216119326547596, "percentage": 84.32, "elapsed_time": "2:52:47", "remaining_time": "0:32:07", "throughput": 19996.06, "total_tokens": 207309760}
|
|
{"current_steps": 65865, "total_steps": 78105, "loss": 0.1593, "lr": 3.648783613876475e-07, "epoch": 4.216439408488573, "percentage": 84.33, "elapsed_time": "2:52:48", "remaining_time": "0:32:06", "throughput": 19996.24, "total_tokens": 207324928}
|
|
{"current_steps": 65870, "total_steps": 78105, "loss": 0.1487, "lr": 3.645878075839829e-07, "epoch": 4.21675949042955, "percentage": 84.34, "elapsed_time": "2:52:48", "remaining_time": "0:32:05", "throughput": 19996.49, "total_tokens": 207341312}
|
|
{"current_steps": 65875, "total_steps": 78105, "loss": 0.1265, "lr": 3.642973604116801e-07, "epoch": 4.217079572370527, "percentage": 84.34, "elapsed_time": "2:52:49", "remaining_time": "0:32:05", "throughput": 19996.74, "total_tokens": 207357824}
|
|
{"current_steps": 65880, "total_steps": 78105, "loss": 0.1482, "lr": 3.6400701988524215e-07, "epoch": 4.217399654311504, "percentage": 84.35, "elapsed_time": "2:52:50", "remaining_time": "0:32:04", "throughput": 19997.06, "total_tokens": 207376448}
|
|
{"current_steps": 65885, "total_steps": 78105, "loss": 0.1313, "lr": 3.6371678601916843e-07, "epoch": 4.217719736252481, "percentage": 84.35, "elapsed_time": "2:52:51", "remaining_time": "0:32:03", "throughput": 19997.32, "total_tokens": 207393408}
|
|
{"current_steps": 65890, "total_steps": 78105, "loss": 0.1284, "lr": 3.634266588279506e-07, "epoch": 4.218039818193457, "percentage": 84.36, "elapsed_time": "2:52:51", "remaining_time": "0:32:02", "throughput": 19997.54, "total_tokens": 207409280}
|
|
{"current_steps": 65895, "total_steps": 78105, "loss": 0.1631, "lr": 3.6313663832607676e-07, "epoch": 4.218359900134434, "percentage": 84.37, "elapsed_time": "2:52:52", "remaining_time": "0:32:01", "throughput": 19997.71, "total_tokens": 207424192}
|
|
{"current_steps": 65900, "total_steps": 78105, "loss": 0.107, "lr": 3.6284672452802845e-07, "epoch": 4.218679982075411, "percentage": 84.37, "elapsed_time": "2:52:53", "remaining_time": "0:32:01", "throughput": 19997.94, "total_tokens": 207440512}
|
|
{"current_steps": 65905, "total_steps": 78105, "loss": 0.1197, "lr": 3.6255691744828263e-07, "epoch": 4.219000064016388, "percentage": 84.38, "elapsed_time": "2:52:53", "remaining_time": "0:32:00", "throughput": 19998.13, "total_tokens": 207455872}
|
|
{"current_steps": 65910, "total_steps": 78105, "loss": 0.093, "lr": 3.622672171013106e-07, "epoch": 4.219320145957365, "percentage": 84.39, "elapsed_time": "2:52:54", "remaining_time": "0:31:59", "throughput": 19998.41, "total_tokens": 207473152}
|
|
{"current_steps": 65915, "total_steps": 78105, "loss": 0.1283, "lr": 3.619776235015796e-07, "epoch": 4.219640227898342, "percentage": 84.39, "elapsed_time": "2:52:55", "remaining_time": "0:31:58", "throughput": 19998.6, "total_tokens": 207488256}
|
|
{"current_steps": 65920, "total_steps": 78105, "loss": 0.1359, "lr": 3.616881366635483e-07, "epoch": 4.219960309839319, "percentage": 84.4, "elapsed_time": "2:52:55", "remaining_time": "0:31:57", "throughput": 19998.76, "total_tokens": 207502912}
|
|
{"current_steps": 65925, "total_steps": 78105, "loss": 0.1323, "lr": 3.613987566016747e-07, "epoch": 4.220280391780296, "percentage": 84.41, "elapsed_time": "2:52:56", "remaining_time": "0:31:57", "throughput": 19998.96, "total_tokens": 207518400}
|
|
{"current_steps": 65930, "total_steps": 78105, "loss": 0.1187, "lr": 3.611094833304063e-07, "epoch": 4.220600473721273, "percentage": 84.41, "elapsed_time": "2:52:57", "remaining_time": "0:31:56", "throughput": 19999.15, "total_tokens": 207533376}
|
|
{"current_steps": 65935, "total_steps": 78105, "loss": 0.1428, "lr": 3.608203168641899e-07, "epoch": 4.220920555662249, "percentage": 84.42, "elapsed_time": "2:52:57", "remaining_time": "0:31:55", "throughput": 19999.36, "total_tokens": 207549184}
|
|
{"current_steps": 65940, "total_steps": 78105, "loss": 0.1186, "lr": 3.6053125721746435e-07, "epoch": 4.221240637603226, "percentage": 84.42, "elapsed_time": "2:52:58", "remaining_time": "0:31:54", "throughput": 19999.57, "total_tokens": 207565440}
|
|
{"current_steps": 65945, "total_steps": 78105, "loss": 0.1489, "lr": 3.602423044046632e-07, "epoch": 4.221560719544203, "percentage": 84.43, "elapsed_time": "2:52:59", "remaining_time": "0:31:53", "throughput": 19999.8, "total_tokens": 207581568}
|
|
{"current_steps": 65950, "total_steps": 78105, "loss": 0.1264, "lr": 3.599534584402167e-07, "epoch": 4.22188080148518, "percentage": 84.44, "elapsed_time": "2:52:59", "remaining_time": "0:31:53", "throughput": 19999.99, "total_tokens": 207596992}
|
|
{"current_steps": 65955, "total_steps": 78105, "loss": 0.0979, "lr": 3.596647193385466e-07, "epoch": 4.222200883426157, "percentage": 84.44, "elapsed_time": "2:53:00", "remaining_time": "0:31:52", "throughput": 20000.18, "total_tokens": 207612352}
|
|
{"current_steps": 65960, "total_steps": 78105, "loss": 0.1098, "lr": 3.5937608711407223e-07, "epoch": 4.222520965367134, "percentage": 84.45, "elapsed_time": "2:53:01", "remaining_time": "0:31:51", "throughput": 20000.37, "total_tokens": 207627840}
|
|
{"current_steps": 65965, "total_steps": 78105, "loss": 0.1227, "lr": 3.590875617812059e-07, "epoch": 4.222841047308111, "percentage": 84.46, "elapsed_time": "2:53:01", "remaining_time": "0:31:50", "throughput": 20000.52, "total_tokens": 207642368}
|
|
{"current_steps": 65970, "total_steps": 78105, "loss": 0.0972, "lr": 3.58799143354355e-07, "epoch": 4.223161129249088, "percentage": 84.46, "elapsed_time": "2:53:02", "remaining_time": "0:31:49", "throughput": 20000.8, "total_tokens": 207659776}
|
|
{"current_steps": 65975, "total_steps": 78105, "loss": 0.1464, "lr": 3.585108318479219e-07, "epoch": 4.223481211190065, "percentage": 84.47, "elapsed_time": "2:53:03", "remaining_time": "0:31:49", "throughput": 20000.95, "total_tokens": 207674432}
|
|
{"current_steps": 65980, "total_steps": 78105, "loss": 0.1076, "lr": 3.582226272763034e-07, "epoch": 4.223801293131041, "percentage": 84.48, "elapsed_time": "2:53:03", "remaining_time": "0:31:48", "throughput": 20001.13, "total_tokens": 207689792}
|
|
{"current_steps": 65985, "total_steps": 78105, "loss": 0.1273, "lr": 3.5793452965389053e-07, "epoch": 4.224121375072018, "percentage": 84.48, "elapsed_time": "2:53:04", "remaining_time": "0:31:47", "throughput": 20001.41, "total_tokens": 207707200}
|
|
{"current_steps": 65990, "total_steps": 78105, "loss": 0.1367, "lr": 3.576465389950698e-07, "epoch": 4.224441457012995, "percentage": 84.49, "elapsed_time": "2:53:05", "remaining_time": "0:31:46", "throughput": 20001.56, "total_tokens": 207721408}
|
|
{"current_steps": 65995, "total_steps": 78105, "loss": 0.1176, "lr": 3.5735865531422107e-07, "epoch": 4.224761538953972, "percentage": 84.5, "elapsed_time": "2:53:06", "remaining_time": "0:31:45", "throughput": 20001.88, "total_tokens": 207739584}
|
|
{"current_steps": 66000, "total_steps": 78105, "loss": 0.1428, "lr": 3.5707087862572125e-07, "epoch": 4.225081620894949, "percentage": 84.5, "elapsed_time": "2:53:06", "remaining_time": "0:31:45", "throughput": 20002.09, "total_tokens": 207754944}
|
|
{"current_steps": 66005, "total_steps": 78105, "loss": 0.1425, "lr": 3.5678320894393964e-07, "epoch": 4.225401702835926, "percentage": 84.51, "elapsed_time": "2:53:09", "remaining_time": "0:31:44", "throughput": 19998.26, "total_tokens": 207771968}
|
|
{"current_steps": 66010, "total_steps": 78105, "loss": 0.1372, "lr": 3.5649564628324086e-07, "epoch": 4.225721784776903, "percentage": 84.51, "elapsed_time": "2:53:10", "remaining_time": "0:31:43", "throughput": 19998.58, "total_tokens": 207790720}
|
|
{"current_steps": 66015, "total_steps": 78105, "loss": 0.1162, "lr": 3.562081906579845e-07, "epoch": 4.22604186671788, "percentage": 84.52, "elapsed_time": "2:53:10", "remaining_time": "0:31:42", "throughput": 19998.78, "total_tokens": 207806144}
|
|
{"current_steps": 66020, "total_steps": 78105, "loss": 0.1124, "lr": 3.559208420825244e-07, "epoch": 4.226361948658857, "percentage": 84.53, "elapsed_time": "2:53:11", "remaining_time": "0:31:42", "throughput": 19998.98, "total_tokens": 207821504}
|
|
{"current_steps": 66025, "total_steps": 78105, "loss": 0.0644, "lr": 3.5563360057120937e-07, "epoch": 4.226682030599833, "percentage": 84.53, "elapsed_time": "2:53:12", "remaining_time": "0:31:41", "throughput": 19999.2, "total_tokens": 207837568}
|
|
{"current_steps": 66030, "total_steps": 78105, "loss": 0.1241, "lr": 3.553464661383821e-07, "epoch": 4.22700211254081, "percentage": 84.54, "elapsed_time": "2:53:12", "remaining_time": "0:31:40", "throughput": 19999.38, "total_tokens": 207852352}
|
|
{"current_steps": 66035, "total_steps": 78105, "loss": 0.1247, "lr": 3.550594387983825e-07, "epoch": 4.227322194481787, "percentage": 84.55, "elapsed_time": "2:53:13", "remaining_time": "0:31:39", "throughput": 19999.57, "total_tokens": 207867200}
|
|
{"current_steps": 66040, "total_steps": 78105, "loss": 0.1117, "lr": 3.547725185655407e-07, "epoch": 4.227642276422764, "percentage": 84.55, "elapsed_time": "2:53:15", "remaining_time": "0:31:39", "throughput": 19997.65, "total_tokens": 207883008}
|
|
{"current_steps": 66045, "total_steps": 78105, "loss": 0.143, "lr": 3.5448570545418587e-07, "epoch": 4.227962358363741, "percentage": 84.56, "elapsed_time": "2:53:16", "remaining_time": "0:31:38", "throughput": 19997.85, "total_tokens": 207898560}
|
|
{"current_steps": 66050, "total_steps": 78105, "loss": 0.1124, "lr": 3.5419899947863905e-07, "epoch": 4.228282440304718, "percentage": 84.57, "elapsed_time": "2:53:16", "remaining_time": "0:31:37", "throughput": 19998.09, "total_tokens": 207915200}
|
|
{"current_steps": 66055, "total_steps": 78105, "loss": 0.1827, "lr": 3.539124006532169e-07, "epoch": 4.228602522245695, "percentage": 84.57, "elapsed_time": "2:53:19", "remaining_time": "0:31:37", "throughput": 19995.4, "total_tokens": 207932224}
|
|
{"current_steps": 66060, "total_steps": 78105, "loss": 0.1057, "lr": 3.53625908992232e-07, "epoch": 4.228922604186672, "percentage": 84.58, "elapsed_time": "2:53:19", "remaining_time": "0:31:36", "throughput": 19995.61, "total_tokens": 207947968}
|
|
{"current_steps": 66065, "total_steps": 78105, "loss": 0.1429, "lr": 3.53339524509988e-07, "epoch": 4.229242686127649, "percentage": 84.58, "elapsed_time": "2:53:20", "remaining_time": "0:31:35", "throughput": 19995.79, "total_tokens": 207963072}
|
|
{"current_steps": 66070, "total_steps": 78105, "loss": 0.1562, "lr": 3.530532472207876e-07, "epoch": 4.229562768068625, "percentage": 84.59, "elapsed_time": "2:53:21", "remaining_time": "0:31:34", "throughput": 19996.0, "total_tokens": 207978880}
|
|
{"current_steps": 66075, "total_steps": 78105, "loss": 0.1663, "lr": 3.527670771389238e-07, "epoch": 4.229882850009602, "percentage": 84.6, "elapsed_time": "2:53:21", "remaining_time": "0:31:33", "throughput": 19996.16, "total_tokens": 207993536}
|
|
{"current_steps": 66080, "total_steps": 78105, "loss": 0.139, "lr": 3.5248101427868857e-07, "epoch": 4.230202931950579, "percentage": 84.6, "elapsed_time": "2:53:22", "remaining_time": "0:31:32", "throughput": 19996.4, "total_tokens": 208010048}
|
|
{"current_steps": 66085, "total_steps": 78105, "loss": 0.1293, "lr": 3.5219505865436515e-07, "epoch": 4.230523013891556, "percentage": 84.61, "elapsed_time": "2:53:23", "remaining_time": "0:31:32", "throughput": 19996.64, "total_tokens": 208026368}
|
|
{"current_steps": 66090, "total_steps": 78105, "loss": 0.0796, "lr": 3.5190921028023325e-07, "epoch": 4.230843095832533, "percentage": 84.62, "elapsed_time": "2:53:23", "remaining_time": "0:31:31", "throughput": 19996.84, "total_tokens": 208041792}
|
|
{"current_steps": 66095, "total_steps": 78105, "loss": 0.1153, "lr": 3.5162346917056613e-07, "epoch": 4.23116317777351, "percentage": 84.62, "elapsed_time": "2:53:24", "remaining_time": "0:31:30", "throughput": 19997.04, "total_tokens": 208057152}
|
|
{"current_steps": 66100, "total_steps": 78105, "loss": 0.1223, "lr": 3.513378353396327e-07, "epoch": 4.231483259714487, "percentage": 84.63, "elapsed_time": "2:53:25", "remaining_time": "0:31:29", "throughput": 19997.23, "total_tokens": 208072512}
|
|
{"current_steps": 66105, "total_steps": 78105, "loss": 0.1408, "lr": 3.5105230880169537e-07, "epoch": 4.231803341655464, "percentage": 84.64, "elapsed_time": "2:53:25", "remaining_time": "0:31:28", "throughput": 19997.42, "total_tokens": 208087808}
|
|
{"current_steps": 66110, "total_steps": 78105, "loss": 0.1467, "lr": 3.507668895710123e-07, "epoch": 4.232123423596441, "percentage": 84.64, "elapsed_time": "2:53:26", "remaining_time": "0:31:28", "throughput": 19997.6, "total_tokens": 208102720}
|
|
{"current_steps": 66115, "total_steps": 78105, "loss": 0.1314, "lr": 3.5048157766183587e-07, "epoch": 4.232443505537417, "percentage": 84.65, "elapsed_time": "2:53:27", "remaining_time": "0:31:27", "throughput": 19997.8, "total_tokens": 208118272}
|
|
{"current_steps": 66120, "total_steps": 78105, "loss": 0.1444, "lr": 3.501963730884134e-07, "epoch": 4.232763587478394, "percentage": 84.66, "elapsed_time": "2:53:27", "remaining_time": "0:31:26", "throughput": 19997.98, "total_tokens": 208133184}
|
|
{"current_steps": 66125, "total_steps": 78105, "loss": 0.1083, "lr": 3.4991127586498587e-07, "epoch": 4.233083669419371, "percentage": 84.66, "elapsed_time": "2:53:28", "remaining_time": "0:31:25", "throughput": 19998.17, "total_tokens": 208148608}
|
|
{"current_steps": 66130, "total_steps": 78105, "loss": 0.1114, "lr": 3.4962628600579e-07, "epoch": 4.233403751360348, "percentage": 84.67, "elapsed_time": "2:53:29", "remaining_time": "0:31:24", "throughput": 19998.44, "total_tokens": 208165440}
|
|
{"current_steps": 66135, "total_steps": 78105, "loss": 0.1222, "lr": 3.493414035250564e-07, "epoch": 4.233723833301325, "percentage": 84.67, "elapsed_time": "2:53:29", "remaining_time": "0:31:24", "throughput": 19998.64, "total_tokens": 208181184}
|
|
{"current_steps": 66140, "total_steps": 78105, "loss": 0.1674, "lr": 3.490566284370103e-07, "epoch": 4.234043915242302, "percentage": 84.68, "elapsed_time": "2:53:30", "remaining_time": "0:31:23", "throughput": 19998.85, "total_tokens": 208196992}
|
|
{"current_steps": 66145, "total_steps": 78105, "loss": 0.1058, "lr": 3.4877196075587344e-07, "epoch": 4.234363997183279, "percentage": 84.69, "elapsed_time": "2:53:31", "remaining_time": "0:31:22", "throughput": 19999.11, "total_tokens": 208214272}
|
|
{"current_steps": 66150, "total_steps": 78105, "loss": 0.1147, "lr": 3.484874004958583e-07, "epoch": 4.234684079124256, "percentage": 84.69, "elapsed_time": "2:53:31", "remaining_time": "0:31:21", "throughput": 19999.34, "total_tokens": 208230656}
|
|
{"current_steps": 66155, "total_steps": 78105, "loss": 0.1606, "lr": 3.482029476711768e-07, "epoch": 4.235004161065232, "percentage": 84.7, "elapsed_time": "2:53:32", "remaining_time": "0:31:20", "throughput": 19999.6, "total_tokens": 208247936}
|
|
{"current_steps": 66160, "total_steps": 78105, "loss": 0.1125, "lr": 3.479186022960304e-07, "epoch": 4.235324243006209, "percentage": 84.71, "elapsed_time": "2:53:33", "remaining_time": "0:31:20", "throughput": 19999.85, "total_tokens": 208264576}
|
|
{"current_steps": 66165, "total_steps": 78105, "loss": 0.0993, "lr": 3.4763436438462003e-07, "epoch": 4.235644324947186, "percentage": 84.71, "elapsed_time": "2:53:34", "remaining_time": "0:31:19", "throughput": 20000.1, "total_tokens": 208281408}
|
|
{"current_steps": 66170, "total_steps": 78105, "loss": 0.0993, "lr": 3.473502339511381e-07, "epoch": 4.235964406888163, "percentage": 84.72, "elapsed_time": "2:53:34", "remaining_time": "0:31:18", "throughput": 20000.33, "total_tokens": 208297344}
|
|
{"current_steps": 66175, "total_steps": 78105, "loss": 0.1202, "lr": 3.470662110097725e-07, "epoch": 4.23628448882914, "percentage": 84.73, "elapsed_time": "2:53:43", "remaining_time": "0:31:19", "throughput": 19985.16, "total_tokens": 208314176}
|
|
{"current_steps": 66180, "total_steps": 78105, "loss": 0.075, "lr": 3.467822955747069e-07, "epoch": 4.236604570770117, "percentage": 84.73, "elapsed_time": "2:53:44", "remaining_time": "0:31:18", "throughput": 19985.35, "total_tokens": 208329408}
|
|
{"current_steps": 66185, "total_steps": 78105, "loss": 0.1311, "lr": 3.4649848766011655e-07, "epoch": 4.236924652711094, "percentage": 84.74, "elapsed_time": "2:53:44", "remaining_time": "0:31:17", "throughput": 19985.57, "total_tokens": 208345280}
|
|
{"current_steps": 66190, "total_steps": 78105, "loss": 0.1266, "lr": 3.462147872801755e-07, "epoch": 4.237244734652071, "percentage": 84.74, "elapsed_time": "2:53:45", "remaining_time": "0:31:16", "throughput": 19985.73, "total_tokens": 208359488}
|
|
{"current_steps": 66195, "total_steps": 78105, "loss": 0.1105, "lr": 3.45931194449049e-07, "epoch": 4.237564816593048, "percentage": 84.75, "elapsed_time": "2:53:46", "remaining_time": "0:31:15", "throughput": 19985.89, "total_tokens": 208373888}
|
|
{"current_steps": 66200, "total_steps": 78105, "loss": 0.1334, "lr": 3.456477091808985e-07, "epoch": 4.237884898534024, "percentage": 84.76, "elapsed_time": "2:53:46", "remaining_time": "0:31:15", "throughput": 19986.08, "total_tokens": 208388800}
|
|
{"current_steps": 66205, "total_steps": 78105, "loss": 0.139, "lr": 3.4536433148988e-07, "epoch": 4.238204980475001, "percentage": 84.76, "elapsed_time": "2:53:47", "remaining_time": "0:31:14", "throughput": 19986.33, "total_tokens": 208405568}
|
|
{"current_steps": 66210, "total_steps": 78105, "loss": 0.1348, "lr": 3.450810613901437e-07, "epoch": 4.238525062415978, "percentage": 84.77, "elapsed_time": "2:53:48", "remaining_time": "0:31:13", "throughput": 19986.53, "total_tokens": 208421312}
|
|
{"current_steps": 66215, "total_steps": 78105, "loss": 0.0886, "lr": 3.447978988958345e-07, "epoch": 4.238845144356955, "percentage": 84.78, "elapsed_time": "2:53:48", "remaining_time": "0:31:12", "throughput": 19986.72, "total_tokens": 208436736}
|
|
{"current_steps": 66220, "total_steps": 78105, "loss": 0.153, "lr": 3.4451484402109213e-07, "epoch": 4.239165226297932, "percentage": 84.78, "elapsed_time": "2:53:49", "remaining_time": "0:31:11", "throughput": 19986.94, "total_tokens": 208452672}
|
|
{"current_steps": 66225, "total_steps": 78105, "loss": 0.1039, "lr": 3.442318967800504e-07, "epoch": 4.239485308238909, "percentage": 84.79, "elapsed_time": "2:53:50", "remaining_time": "0:31:11", "throughput": 19987.15, "total_tokens": 208468672}
|
|
{"current_steps": 66230, "total_steps": 78105, "loss": 0.111, "lr": 3.439490571868395e-07, "epoch": 4.239805390179886, "percentage": 84.8, "elapsed_time": "2:53:50", "remaining_time": "0:31:10", "throughput": 19987.33, "total_tokens": 208483584}
|
|
{"current_steps": 66235, "total_steps": 78105, "loss": 0.1269, "lr": 3.436663252555822e-07, "epoch": 4.240125472120863, "percentage": 84.8, "elapsed_time": "2:53:51", "remaining_time": "0:31:09", "throughput": 19987.56, "total_tokens": 208499840}
|
|
{"current_steps": 66240, "total_steps": 78105, "loss": 0.1484, "lr": 3.4338370100039654e-07, "epoch": 4.24044555406184, "percentage": 84.81, "elapsed_time": "2:53:52", "remaining_time": "0:31:08", "throughput": 19987.78, "total_tokens": 208515776}
|
|
{"current_steps": 66245, "total_steps": 78105, "loss": 0.1339, "lr": 3.431011844353954e-07, "epoch": 4.240765636002816, "percentage": 84.82, "elapsed_time": "2:53:52", "remaining_time": "0:31:07", "throughput": 19987.97, "total_tokens": 208531264}
|
|
{"current_steps": 66250, "total_steps": 78105, "loss": 0.1122, "lr": 3.4281877557468644e-07, "epoch": 4.241085717943793, "percentage": 84.82, "elapsed_time": "2:53:53", "remaining_time": "0:31:07", "throughput": 19988.18, "total_tokens": 208547008}
|
|
{"current_steps": 66255, "total_steps": 78105, "loss": 0.0951, "lr": 3.425364744323714e-07, "epoch": 4.24140579988477, "percentage": 84.83, "elapsed_time": "2:53:54", "remaining_time": "0:31:06", "throughput": 19988.35, "total_tokens": 208562112}
|
|
{"current_steps": 66260, "total_steps": 78105, "loss": 0.1762, "lr": 3.4225428102254627e-07, "epoch": 4.241725881825747, "percentage": 84.83, "elapsed_time": "2:53:54", "remaining_time": "0:31:05", "throughput": 19988.55, "total_tokens": 208577344}
|
|
{"current_steps": 66265, "total_steps": 78105, "loss": 0.117, "lr": 3.4197219535930445e-07, "epoch": 4.242045963766724, "percentage": 84.84, "elapsed_time": "2:53:55", "remaining_time": "0:31:04", "throughput": 19988.8, "total_tokens": 208593920}
|
|
{"current_steps": 66270, "total_steps": 78105, "loss": 0.1237, "lr": 3.416902174567291e-07, "epoch": 4.242366045707701, "percentage": 84.85, "elapsed_time": "2:53:56", "remaining_time": "0:31:03", "throughput": 19989.0, "total_tokens": 208609088}
|
|
{"current_steps": 66275, "total_steps": 78105, "loss": 0.1098, "lr": 3.4140834732890286e-07, "epoch": 4.242686127648678, "percentage": 84.85, "elapsed_time": "2:53:56", "remaining_time": "0:31:02", "throughput": 19989.17, "total_tokens": 208624192}
|
|
{"current_steps": 66280, "total_steps": 78105, "loss": 0.1449, "lr": 3.411265849898998e-07, "epoch": 4.243006209589655, "percentage": 84.86, "elapsed_time": "2:53:57", "remaining_time": "0:31:02", "throughput": 19989.42, "total_tokens": 208640960}
|
|
{"current_steps": 66285, "total_steps": 78105, "loss": 0.1017, "lr": 3.4084493045379003e-07, "epoch": 4.243326291530632, "percentage": 84.87, "elapsed_time": "2:53:58", "remaining_time": "0:31:01", "throughput": 19989.63, "total_tokens": 208656768}
|
|
{"current_steps": 66290, "total_steps": 78105, "loss": 0.1363, "lr": 3.4056338373463783e-07, "epoch": 4.243646373471608, "percentage": 84.87, "elapsed_time": "2:53:58", "remaining_time": "0:31:00", "throughput": 19989.81, "total_tokens": 208672192}
|
|
{"current_steps": 66295, "total_steps": 78105, "loss": 0.1305, "lr": 3.402819448465017e-07, "epoch": 4.243966455412585, "percentage": 84.88, "elapsed_time": "2:53:59", "remaining_time": "0:30:59", "throughput": 19990.02, "total_tokens": 208687680}
|
|
{"current_steps": 66300, "total_steps": 78105, "loss": 0.1184, "lr": 3.4000061380343645e-07, "epoch": 4.244286537353562, "percentage": 84.89, "elapsed_time": "2:54:00", "remaining_time": "0:30:58", "throughput": 19990.22, "total_tokens": 208702656}
|
|
{"current_steps": 66305, "total_steps": 78105, "loss": 0.1418, "lr": 3.397193906194887e-07, "epoch": 4.244606619294539, "percentage": 84.89, "elapsed_time": "2:54:00", "remaining_time": "0:30:58", "throughput": 19990.41, "total_tokens": 208718272}
|
|
{"current_steps": 66310, "total_steps": 78105, "loss": 0.1569, "lr": 3.3943827530870276e-07, "epoch": 4.244926701235516, "percentage": 84.9, "elapsed_time": "2:54:01", "remaining_time": "0:30:57", "throughput": 19990.59, "total_tokens": 208733248}
|
|
{"current_steps": 66315, "total_steps": 78105, "loss": 0.1097, "lr": 3.3915726788511514e-07, "epoch": 4.245246783176493, "percentage": 84.9, "elapsed_time": "2:54:02", "remaining_time": "0:30:56", "throughput": 19990.81, "total_tokens": 208748928}
|
|
{"current_steps": 66320, "total_steps": 78105, "loss": 0.1422, "lr": 3.388763683627583e-07, "epoch": 4.24556686511747, "percentage": 84.91, "elapsed_time": "2:54:02", "remaining_time": "0:30:55", "throughput": 19991.0, "total_tokens": 208763968}
|
|
{"current_steps": 66325, "total_steps": 78105, "loss": 0.1638, "lr": 3.385955767556587e-07, "epoch": 4.245886947058447, "percentage": 84.92, "elapsed_time": "2:54:03", "remaining_time": "0:30:54", "throughput": 19991.17, "total_tokens": 208778816}
|
|
{"current_steps": 66330, "total_steps": 78105, "loss": 0.1093, "lr": 3.3831489307783765e-07, "epoch": 4.246207028999424, "percentage": 84.92, "elapsed_time": "2:54:04", "remaining_time": "0:30:54", "throughput": 19991.38, "total_tokens": 208794624}
|
|
{"current_steps": 66335, "total_steps": 78105, "loss": 0.1696, "lr": 3.3803431734331093e-07, "epoch": 4.2465271109404, "percentage": 84.93, "elapsed_time": "2:54:04", "remaining_time": "0:30:53", "throughput": 19991.56, "total_tokens": 208810176}
|
|
{"current_steps": 66340, "total_steps": 78105, "loss": 0.1384, "lr": 3.377538495660893e-07, "epoch": 4.246847192881377, "percentage": 84.94, "elapsed_time": "2:54:05", "remaining_time": "0:30:52", "throughput": 19991.72, "total_tokens": 208824704}
|
|
{"current_steps": 66345, "total_steps": 78105, "loss": 0.1278, "lr": 3.3747348976017694e-07, "epoch": 4.247167274822354, "percentage": 84.94, "elapsed_time": "2:54:06", "remaining_time": "0:30:51", "throughput": 19991.91, "total_tokens": 208840704}
|
|
{"current_steps": 66350, "total_steps": 78105, "loss": 0.0788, "lr": 3.371932379395748e-07, "epoch": 4.247487356763331, "percentage": 84.95, "elapsed_time": "2:54:06", "remaining_time": "0:30:50", "throughput": 19992.12, "total_tokens": 208856448}
|
|
{"current_steps": 66355, "total_steps": 78105, "loss": 0.1371, "lr": 3.3691309411827716e-07, "epoch": 4.247807438704308, "percentage": 84.96, "elapsed_time": "2:54:07", "remaining_time": "0:30:50", "throughput": 19992.31, "total_tokens": 208871616}
|
|
{"current_steps": 66360, "total_steps": 78105, "loss": 0.0801, "lr": 3.3663305831027197e-07, "epoch": 4.248127520645285, "percentage": 84.96, "elapsed_time": "2:54:08", "remaining_time": "0:30:49", "throughput": 19992.48, "total_tokens": 208886912}
|
|
{"current_steps": 66365, "total_steps": 78105, "loss": 0.1693, "lr": 3.3635313052954365e-07, "epoch": 4.248447602586262, "percentage": 84.97, "elapsed_time": "2:54:08", "remaining_time": "0:30:48", "throughput": 19992.72, "total_tokens": 208903232}
|
|
{"current_steps": 66370, "total_steps": 78105, "loss": 0.1533, "lr": 3.3607331079006935e-07, "epoch": 4.248767684527239, "percentage": 84.98, "elapsed_time": "2:54:09", "remaining_time": "0:30:47", "throughput": 19992.88, "total_tokens": 208917696}
|
|
{"current_steps": 66375, "total_steps": 78105, "loss": 0.2296, "lr": 3.3579359910582337e-07, "epoch": 4.249087766468216, "percentage": 84.98, "elapsed_time": "2:54:10", "remaining_time": "0:30:46", "throughput": 19993.08, "total_tokens": 208933376}
|
|
{"current_steps": 66380, "total_steps": 78105, "loss": 0.1136, "lr": 3.35513995490771e-07, "epoch": 4.249407848409192, "percentage": 84.99, "elapsed_time": "2:54:10", "remaining_time": "0:30:45", "throughput": 19993.26, "total_tokens": 208948736}
|
|
{"current_steps": 66385, "total_steps": 78105, "loss": 0.1421, "lr": 3.3523449995887664e-07, "epoch": 4.249727930350169, "percentage": 84.99, "elapsed_time": "2:54:11", "remaining_time": "0:30:45", "throughput": 19993.48, "total_tokens": 208964800}
|
|
{"current_steps": 66390, "total_steps": 78105, "loss": 0.1526, "lr": 3.3495511252409436e-07, "epoch": 4.250048012291146, "percentage": 85.0, "elapsed_time": "2:54:12", "remaining_time": "0:30:44", "throughput": 19993.64, "total_tokens": 208979520}
|
|
{"current_steps": 66395, "total_steps": 78105, "loss": 0.0669, "lr": 3.3467583320037687e-07, "epoch": 4.250368094232123, "percentage": 85.01, "elapsed_time": "2:54:12", "remaining_time": "0:30:43", "throughput": 19993.86, "total_tokens": 208995328}
|
|
{"current_steps": 66400, "total_steps": 78105, "loss": 0.0922, "lr": 3.343966620016695e-07, "epoch": 4.2506881761731, "percentage": 85.01, "elapsed_time": "2:54:13", "remaining_time": "0:30:42", "throughput": 19994.04, "total_tokens": 209010624}
|
|
{"current_steps": 66402, "total_steps": 78105, "eval_loss": 0.6279151439666748, "epoch": 4.250816208949491, "percentage": 85.02, "elapsed_time": "2:55:04", "remaining_time": "0:30:51", "throughput": 19896.96, "total_tokens": 209017024}
|
|
{"current_steps": 66405, "total_steps": 78105, "loss": 0.1591, "lr": 3.3411759894191207e-07, "epoch": 4.251008258114077, "percentage": 85.02, "elapsed_time": "2:55:38", "remaining_time": "0:30:56", "throughput": 19834.88, "total_tokens": 209026368}
|
|
{"current_steps": 66410, "total_steps": 78105, "loss": 0.1292, "lr": 3.3383864403504136e-07, "epoch": 4.251328340055054, "percentage": 85.03, "elapsed_time": "2:55:38", "remaining_time": "0:30:55", "throughput": 19835.07, "total_tokens": 209041344}
|
|
{"current_steps": 66415, "total_steps": 78105, "loss": 0.1414, "lr": 3.335597972949842e-07, "epoch": 4.251648421996031, "percentage": 85.03, "elapsed_time": "2:55:39", "remaining_time": "0:30:55", "throughput": 19835.26, "total_tokens": 209056320}
|
|
{"current_steps": 66420, "total_steps": 78105, "loss": 0.1266, "lr": 3.332810587356669e-07, "epoch": 4.251968503937007, "percentage": 85.04, "elapsed_time": "2:55:40", "remaining_time": "0:30:54", "throughput": 19835.45, "total_tokens": 209071488}
|
|
{"current_steps": 66425, "total_steps": 78105, "loss": 0.1375, "lr": 3.3300242837100763e-07, "epoch": 4.252288585877984, "percentage": 85.05, "elapsed_time": "2:55:40", "remaining_time": "0:30:53", "throughput": 19835.65, "total_tokens": 209086720}
|
|
{"current_steps": 66430, "total_steps": 78105, "loss": 0.1353, "lr": 3.327239062149196e-07, "epoch": 4.252608667818961, "percentage": 85.05, "elapsed_time": "2:55:41", "remaining_time": "0:30:52", "throughput": 19835.88, "total_tokens": 209102784}
|
|
{"current_steps": 66435, "total_steps": 78105, "loss": 0.1139, "lr": 3.3244549228131054e-07, "epoch": 4.252928749759938, "percentage": 85.06, "elapsed_time": "2:55:42", "remaining_time": "0:30:51", "throughput": 19836.1, "total_tokens": 209118592}
|
|
{"current_steps": 66440, "total_steps": 78105, "loss": 0.1902, "lr": 3.3216718658408337e-07, "epoch": 4.253248831700915, "percentage": 85.06, "elapsed_time": "2:55:43", "remaining_time": "0:30:51", "throughput": 19836.36, "total_tokens": 209134912}
|
|
{"current_steps": 66445, "total_steps": 78105, "loss": 0.1814, "lr": 3.318889891371352e-07, "epoch": 4.253568913641892, "percentage": 85.07, "elapsed_time": "2:55:43", "remaining_time": "0:30:50", "throughput": 19836.68, "total_tokens": 209153792}
|
|
{"current_steps": 66450, "total_steps": 78105, "loss": 0.0806, "lr": 3.316108999543574e-07, "epoch": 4.253888995582869, "percentage": 85.08, "elapsed_time": "2:55:44", "remaining_time": "0:30:49", "throughput": 19836.89, "total_tokens": 209169472}
|
|
{"current_steps": 66455, "total_steps": 78105, "loss": 0.1387, "lr": 3.313329190496362e-07, "epoch": 4.254209077523846, "percentage": 85.08, "elapsed_time": "2:55:45", "remaining_time": "0:30:48", "throughput": 19837.12, "total_tokens": 209185536}
|
|
{"current_steps": 66460, "total_steps": 78105, "loss": 0.1188, "lr": 3.3105504643685367e-07, "epoch": 4.254529159464823, "percentage": 85.09, "elapsed_time": "2:55:45", "remaining_time": "0:30:47", "throughput": 19837.34, "total_tokens": 209201280}
|
|
{"current_steps": 66465, "total_steps": 78105, "loss": 0.129, "lr": 3.3077728212988354e-07, "epoch": 4.2548492414058, "percentage": 85.1, "elapsed_time": "2:55:46", "remaining_time": "0:30:47", "throughput": 19837.53, "total_tokens": 209216640}
|
|
{"current_steps": 66470, "total_steps": 78105, "loss": 0.1634, "lr": 3.3049962614259744e-07, "epoch": 4.2551693233467764, "percentage": 85.1, "elapsed_time": "2:55:47", "remaining_time": "0:30:46", "throughput": 19837.71, "total_tokens": 209231232}
|
|
{"current_steps": 66475, "total_steps": 78105, "loss": 0.0657, "lr": 3.3022207848885963e-07, "epoch": 4.255489405287753, "percentage": 85.11, "elapsed_time": "2:55:47", "remaining_time": "0:30:45", "throughput": 19837.89, "total_tokens": 209246080}
|
|
{"current_steps": 66480, "total_steps": 78105, "loss": 0.1012, "lr": 3.2994463918252924e-07, "epoch": 4.25580948722873, "percentage": 85.12, "elapsed_time": "2:55:48", "remaining_time": "0:30:44", "throughput": 19838.16, "total_tokens": 209262976}
|
|
{"current_steps": 66485, "total_steps": 78105, "loss": 0.082, "lr": 3.296673082374599e-07, "epoch": 4.256129569169707, "percentage": 85.12, "elapsed_time": "2:55:49", "remaining_time": "0:30:43", "throughput": 19838.37, "total_tokens": 209278784}
|
|
{"current_steps": 66490, "total_steps": 78105, "loss": 0.0905, "lr": 3.2939008566750003e-07, "epoch": 4.256449651110684, "percentage": 85.13, "elapsed_time": "2:55:50", "remaining_time": "0:30:42", "throughput": 19838.77, "total_tokens": 209299648}
|
|
{"current_steps": 66495, "total_steps": 78105, "loss": 0.1748, "lr": 3.291129714864938e-07, "epoch": 4.256769733051661, "percentage": 85.14, "elapsed_time": "2:55:50", "remaining_time": "0:30:42", "throughput": 19838.98, "total_tokens": 209315328}
|
|
{"current_steps": 66500, "total_steps": 78105, "loss": 0.1073, "lr": 3.2883596570827736e-07, "epoch": 4.257089814992638, "percentage": 85.14, "elapsed_time": "2:55:51", "remaining_time": "0:30:41", "throughput": 19839.19, "total_tokens": 209330752}
|
|
{"current_steps": 66505, "total_steps": 78105, "loss": 0.1558, "lr": 3.2855906834668384e-07, "epoch": 4.257409896933615, "percentage": 85.15, "elapsed_time": "2:55:52", "remaining_time": "0:30:40", "throughput": 19839.39, "total_tokens": 209345856}
|
|
{"current_steps": 66510, "total_steps": 78105, "loss": 0.1235, "lr": 3.282822794155399e-07, "epoch": 4.2577299788745915, "percentage": 85.15, "elapsed_time": "2:55:52", "remaining_time": "0:30:39", "throughput": 19839.65, "total_tokens": 209362560}
|
|
{"current_steps": 66515, "total_steps": 78105, "loss": 0.1399, "lr": 3.280055989286668e-07, "epoch": 4.2580500608155685, "percentage": 85.16, "elapsed_time": "2:55:53", "remaining_time": "0:30:38", "throughput": 19839.83, "total_tokens": 209377152}
|
|
{"current_steps": 66520, "total_steps": 78105, "loss": 0.1494, "lr": 3.277290268998806e-07, "epoch": 4.2583701427565455, "percentage": 85.17, "elapsed_time": "2:55:54", "remaining_time": "0:30:38", "throughput": 19840.1, "total_tokens": 209394496}
|
|
{"current_steps": 66525, "total_steps": 78105, "loss": 0.164, "lr": 3.2745256334299145e-07, "epoch": 4.2586902246975225, "percentage": 85.17, "elapsed_time": "2:55:54", "remaining_time": "0:30:37", "throughput": 19840.28, "total_tokens": 209409152}
|
|
{"current_steps": 66530, "total_steps": 78105, "loss": 0.1273, "lr": 3.271762082718058e-07, "epoch": 4.259010306638499, "percentage": 85.18, "elapsed_time": "2:55:55", "remaining_time": "0:30:36", "throughput": 19840.5, "total_tokens": 209425280}
|
|
{"current_steps": 66535, "total_steps": 78105, "loss": 0.1372, "lr": 3.268999617001217e-07, "epoch": 4.259330388579476, "percentage": 85.19, "elapsed_time": "2:55:56", "remaining_time": "0:30:35", "throughput": 19840.73, "total_tokens": 209441600}
|
|
{"current_steps": 66540, "total_steps": 78105, "loss": 0.1437, "lr": 3.2662382364173434e-07, "epoch": 4.259650470520453, "percentage": 85.19, "elapsed_time": "2:55:56", "remaining_time": "0:30:34", "throughput": 19840.97, "total_tokens": 209457664}
|
|
{"current_steps": 66545, "total_steps": 78105, "loss": 0.1344, "lr": 3.2634779411043287e-07, "epoch": 4.25997055246143, "percentage": 85.2, "elapsed_time": "2:55:57", "remaining_time": "0:30:34", "throughput": 19841.12, "total_tokens": 209472000}
|
|
{"current_steps": 66550, "total_steps": 78105, "loss": 0.084, "lr": 3.260718731200005e-07, "epoch": 4.260290634402407, "percentage": 85.21, "elapsed_time": "2:55:58", "remaining_time": "0:30:33", "throughput": 19841.38, "total_tokens": 209488704}
|
|
{"current_steps": 66555, "total_steps": 78105, "loss": 0.1185, "lr": 3.2579606068421476e-07, "epoch": 4.2606107163433835, "percentage": 85.21, "elapsed_time": "2:55:58", "remaining_time": "0:30:32", "throughput": 19841.56, "total_tokens": 209503744}
|
|
{"current_steps": 66560, "total_steps": 78105, "loss": 0.1368, "lr": 3.2552035681684913e-07, "epoch": 4.2609307982843605, "percentage": 85.22, "elapsed_time": "2:55:59", "remaining_time": "0:30:31", "throughput": 19841.83, "total_tokens": 209520768}
|
|
{"current_steps": 66565, "total_steps": 78105, "loss": 0.1241, "lr": 3.2524476153167033e-07, "epoch": 4.2612508802253375, "percentage": 85.23, "elapsed_time": "2:56:00", "remaining_time": "0:30:30", "throughput": 19842.07, "total_tokens": 209537728}
|
|
{"current_steps": 66570, "total_steps": 78105, "loss": 0.1202, "lr": 3.2496927484244017e-07, "epoch": 4.2615709621663145, "percentage": 85.23, "elapsed_time": "2:56:00", "remaining_time": "0:30:29", "throughput": 19842.31, "total_tokens": 209553984}
|
|
{"current_steps": 66575, "total_steps": 78105, "loss": 0.0852, "lr": 3.2469389676291456e-07, "epoch": 4.2618910441072915, "percentage": 85.24, "elapsed_time": "2:56:01", "remaining_time": "0:30:29", "throughput": 19842.53, "total_tokens": 209569600}
|
|
{"current_steps": 66580, "total_steps": 78105, "loss": 0.1302, "lr": 3.244186273068456e-07, "epoch": 4.2622111260482685, "percentage": 85.24, "elapsed_time": "2:56:02", "remaining_time": "0:30:28", "throughput": 19842.73, "total_tokens": 209584512}
|
|
{"current_steps": 66585, "total_steps": 78105, "loss": 0.1274, "lr": 3.2414346648797834e-07, "epoch": 4.2625312079892455, "percentage": 85.25, "elapsed_time": "2:56:02", "remaining_time": "0:30:27", "throughput": 19842.91, "total_tokens": 209599808}
|
|
{"current_steps": 66590, "total_steps": 78105, "loss": 0.1476, "lr": 3.2386841432005237e-07, "epoch": 4.262851289930222, "percentage": 85.26, "elapsed_time": "2:56:03", "remaining_time": "0:30:26", "throughput": 19843.11, "total_tokens": 209615552}
|
|
{"current_steps": 66595, "total_steps": 78105, "loss": 0.1607, "lr": 3.235934708168031e-07, "epoch": 4.263171371871199, "percentage": 85.26, "elapsed_time": "2:56:04", "remaining_time": "0:30:25", "throughput": 19843.33, "total_tokens": 209631360}
|
|
{"current_steps": 66600, "total_steps": 78105, "loss": 0.1514, "lr": 3.233186359919588e-07, "epoch": 4.2634914538121755, "percentage": 85.27, "elapsed_time": "2:56:04", "remaining_time": "0:30:25", "throughput": 19843.51, "total_tokens": 209646272}
|
|
{"current_steps": 66605, "total_steps": 78105, "loss": 0.0887, "lr": 3.2304390985924473e-07, "epoch": 4.2638115357531525, "percentage": 85.28, "elapsed_time": "2:56:05", "remaining_time": "0:30:24", "throughput": 19843.69, "total_tokens": 209661376}
|
|
{"current_steps": 66610, "total_steps": 78105, "loss": 0.1717, "lr": 3.2276929243237776e-07, "epoch": 4.2641316176941295, "percentage": 85.28, "elapsed_time": "2:56:06", "remaining_time": "0:30:23", "throughput": 19843.98, "total_tokens": 209678720}
|
|
{"current_steps": 66615, "total_steps": 78105, "loss": 0.1424, "lr": 3.224947837250722e-07, "epoch": 4.2644516996351065, "percentage": 85.29, "elapsed_time": "2:56:07", "remaining_time": "0:30:22", "throughput": 19844.18, "total_tokens": 209694272}
|
|
{"current_steps": 66620, "total_steps": 78105, "loss": 0.1312, "lr": 3.2222038375103404e-07, "epoch": 4.2647717815760835, "percentage": 85.3, "elapsed_time": "2:56:07", "remaining_time": "0:30:21", "throughput": 19844.39, "total_tokens": 209710016}
|
|
{"current_steps": 66625, "total_steps": 78105, "loss": 0.1292, "lr": 3.21946092523967e-07, "epoch": 4.2650918635170605, "percentage": 85.3, "elapsed_time": "2:56:08", "remaining_time": "0:30:21", "throughput": 19844.59, "total_tokens": 209725376}
|
|
{"current_steps": 66630, "total_steps": 78105, "loss": 0.1617, "lr": 3.216719100575669e-07, "epoch": 4.2654119454580375, "percentage": 85.31, "elapsed_time": "2:56:09", "remaining_time": "0:30:20", "throughput": 19844.76, "total_tokens": 209740160}
|
|
{"current_steps": 66635, "total_steps": 78105, "loss": 0.2161, "lr": 3.2139783636552483e-07, "epoch": 4.2657320273990145, "percentage": 85.31, "elapsed_time": "2:56:09", "remaining_time": "0:30:19", "throughput": 19845.01, "total_tokens": 209757248}
|
|
{"current_steps": 66640, "total_steps": 78105, "loss": 0.1192, "lr": 3.2112387146152726e-07, "epoch": 4.2660521093399915, "percentage": 85.32, "elapsed_time": "2:56:10", "remaining_time": "0:30:18", "throughput": 19845.2, "total_tokens": 209772416}
|
|
{"current_steps": 66645, "total_steps": 78105, "loss": 0.1435, "lr": 3.2085001535925364e-07, "epoch": 4.266372191280968, "percentage": 85.33, "elapsed_time": "2:56:11", "remaining_time": "0:30:17", "throughput": 19845.42, "total_tokens": 209788352}
|
|
{"current_steps": 66650, "total_steps": 78105, "loss": 0.1802, "lr": 3.205762680723798e-07, "epoch": 4.2666922732219446, "percentage": 85.33, "elapsed_time": "2:56:11", "remaining_time": "0:30:16", "throughput": 19845.69, "total_tokens": 209805248}
|
|
{"current_steps": 66655, "total_steps": 78105, "loss": 0.1442, "lr": 3.20302629614575e-07, "epoch": 4.2670123551629215, "percentage": 85.34, "elapsed_time": "2:56:12", "remaining_time": "0:30:16", "throughput": 19845.92, "total_tokens": 209821440}
|
|
{"current_steps": 66660, "total_steps": 78105, "loss": 0.1423, "lr": 3.2002909999950343e-07, "epoch": 4.2673324371038985, "percentage": 85.35, "elapsed_time": "2:56:13", "remaining_time": "0:30:15", "throughput": 19846.14, "total_tokens": 209837312}
|
|
{"current_steps": 66665, "total_steps": 78105, "loss": 0.1184, "lr": 3.197556792408232e-07, "epoch": 4.2676525190448755, "percentage": 85.35, "elapsed_time": "2:56:13", "remaining_time": "0:30:14", "throughput": 19846.4, "total_tokens": 209854336}
|
|
{"current_steps": 66670, "total_steps": 78105, "loss": 0.117, "lr": 3.1948236735218826e-07, "epoch": 4.2679726009858525, "percentage": 85.36, "elapsed_time": "2:56:14", "remaining_time": "0:30:13", "throughput": 19846.57, "total_tokens": 209869120}
|
|
{"current_steps": 66675, "total_steps": 78105, "loss": 0.1468, "lr": 3.1920916434724586e-07, "epoch": 4.2682926829268295, "percentage": 85.37, "elapsed_time": "2:56:15", "remaining_time": "0:30:12", "throughput": 19846.82, "total_tokens": 209885952}
|
|
{"current_steps": 66680, "total_steps": 78105, "loss": 0.1449, "lr": 3.1893607023963823e-07, "epoch": 4.2686127648678065, "percentage": 85.37, "elapsed_time": "2:56:15", "remaining_time": "0:30:12", "throughput": 19846.99, "total_tokens": 209900736}
|
|
{"current_steps": 66685, "total_steps": 78105, "loss": 0.1056, "lr": 3.1866308504300226e-07, "epoch": 4.268932846808783, "percentage": 85.38, "elapsed_time": "2:56:16", "remaining_time": "0:30:11", "throughput": 19847.21, "total_tokens": 209916544}
|
|
{"current_steps": 66690, "total_steps": 78105, "loss": 0.108, "lr": 3.183902087709706e-07, "epoch": 4.26925292874976, "percentage": 85.39, "elapsed_time": "2:56:17", "remaining_time": "0:30:10", "throughput": 19847.41, "total_tokens": 209932160}
|
|
{"current_steps": 66695, "total_steps": 78105, "loss": 0.1569, "lr": 3.181174414371674e-07, "epoch": 4.269573010690737, "percentage": 85.39, "elapsed_time": "2:56:18", "remaining_time": "0:30:09", "throughput": 19847.66, "total_tokens": 209948928}
|
|
{"current_steps": 66700, "total_steps": 78105, "loss": 0.1881, "lr": 3.178447830552145e-07, "epoch": 4.269893092631714, "percentage": 85.4, "elapsed_time": "2:56:18", "remaining_time": "0:30:08", "throughput": 19847.83, "total_tokens": 209963776}
|
|
{"current_steps": 66705, "total_steps": 78105, "loss": 0.1396, "lr": 3.175722336387266e-07, "epoch": 4.270213174572691, "percentage": 85.4, "elapsed_time": "2:56:19", "remaining_time": "0:30:08", "throughput": 19848.05, "total_tokens": 209979904}
|
|
{"current_steps": 66710, "total_steps": 78105, "loss": 0.1545, "lr": 3.172997932013139e-07, "epoch": 4.2705332565136676, "percentage": 85.41, "elapsed_time": "2:56:20", "remaining_time": "0:30:07", "throughput": 19848.26, "total_tokens": 209995136}
|
|
{"current_steps": 66715, "total_steps": 78105, "loss": 0.1718, "lr": 3.1702746175657997e-07, "epoch": 4.2708533384546445, "percentage": 85.42, "elapsed_time": "2:56:20", "remaining_time": "0:30:06", "throughput": 19848.48, "total_tokens": 210011072}
|
|
{"current_steps": 66720, "total_steps": 78105, "loss": 0.1476, "lr": 3.1675523931812337e-07, "epoch": 4.2711734203956215, "percentage": 85.42, "elapsed_time": "2:56:21", "remaining_time": "0:30:05", "throughput": 19848.67, "total_tokens": 210025856}
|
|
{"current_steps": 66725, "total_steps": 78105, "loss": 0.0998, "lr": 3.164831258995391e-07, "epoch": 4.2714935023365985, "percentage": 85.43, "elapsed_time": "2:56:22", "remaining_time": "0:30:04", "throughput": 19848.92, "total_tokens": 210042240}
|
|
{"current_steps": 66730, "total_steps": 78105, "loss": 0.1297, "lr": 3.1621112151441315e-07, "epoch": 4.2718135842775755, "percentage": 85.44, "elapsed_time": "2:56:22", "remaining_time": "0:30:03", "throughput": 19849.13, "total_tokens": 210057664}
|
|
{"current_steps": 66735, "total_steps": 78105, "loss": 0.1096, "lr": 3.159392261763292e-07, "epoch": 4.272133666218552, "percentage": 85.44, "elapsed_time": "2:56:23", "remaining_time": "0:30:03", "throughput": 19849.3, "total_tokens": 210072384}
|
|
{"current_steps": 66740, "total_steps": 78105, "loss": 0.1216, "lr": 3.156674398988638e-07, "epoch": 4.272453748159529, "percentage": 85.45, "elapsed_time": "2:56:24", "remaining_time": "0:30:02", "throughput": 19849.5, "total_tokens": 210087488}
|
|
{"current_steps": 66745, "total_steps": 78105, "loss": 0.1317, "lr": 3.153957626955889e-07, "epoch": 4.272773830100506, "percentage": 85.46, "elapsed_time": "2:56:24", "remaining_time": "0:30:01", "throughput": 19849.77, "total_tokens": 210104512}
|
|
{"current_steps": 66750, "total_steps": 78105, "loss": 0.1093, "lr": 3.151241945800704e-07, "epoch": 4.273093912041483, "percentage": 85.46, "elapsed_time": "2:56:25", "remaining_time": "0:30:00", "throughput": 19850.11, "total_tokens": 210123136}
|
|
{"current_steps": 66755, "total_steps": 78105, "loss": 0.1306, "lr": 3.148527355658684e-07, "epoch": 4.27341399398246, "percentage": 85.47, "elapsed_time": "2:56:26", "remaining_time": "0:29:59", "throughput": 19850.32, "total_tokens": 210138624}
|
|
{"current_steps": 66760, "total_steps": 78105, "loss": 0.1056, "lr": 3.1458138566653975e-07, "epoch": 4.273734075923437, "percentage": 85.47, "elapsed_time": "2:56:26", "remaining_time": "0:29:59", "throughput": 19850.54, "total_tokens": 210154304}
|
|
{"current_steps": 66765, "total_steps": 78105, "loss": 0.1623, "lr": 3.143101448956323e-07, "epoch": 4.274054157864414, "percentage": 85.48, "elapsed_time": "2:56:27", "remaining_time": "0:29:58", "throughput": 19850.76, "total_tokens": 210169728}
|
|
{"current_steps": 66770, "total_steps": 78105, "loss": 0.1264, "lr": 3.140390132666921e-07, "epoch": 4.2743742398053906, "percentage": 85.49, "elapsed_time": "2:56:28", "remaining_time": "0:29:57", "throughput": 19850.94, "total_tokens": 210184704}
|
|
{"current_steps": 66775, "total_steps": 78105, "loss": 0.0836, "lr": 3.1376799079325726e-07, "epoch": 4.274694321746367, "percentage": 85.49, "elapsed_time": "2:56:28", "remaining_time": "0:29:56", "throughput": 19851.14, "total_tokens": 210200448}
|
|
{"current_steps": 66780, "total_steps": 78105, "loss": 0.1466, "lr": 3.1349707748886097e-07, "epoch": 4.275014403687344, "percentage": 85.5, "elapsed_time": "2:56:29", "remaining_time": "0:29:55", "throughput": 19851.33, "total_tokens": 210215360}
|
|
{"current_steps": 66785, "total_steps": 78105, "loss": 0.1295, "lr": 3.1322627336703186e-07, "epoch": 4.275334485628321, "percentage": 85.51, "elapsed_time": "2:56:30", "remaining_time": "0:29:55", "throughput": 19851.57, "total_tokens": 210231616}
|
|
{"current_steps": 66790, "total_steps": 78105, "loss": 0.103, "lr": 3.1295557844129217e-07, "epoch": 4.275654567569298, "percentage": 85.51, "elapsed_time": "2:56:30", "remaining_time": "0:29:54", "throughput": 19851.82, "total_tokens": 210248448}
|
|
{"current_steps": 66795, "total_steps": 78105, "loss": 0.138, "lr": 3.126849927251588e-07, "epoch": 4.275974649510275, "percentage": 85.52, "elapsed_time": "2:56:31", "remaining_time": "0:29:53", "throughput": 19852.0, "total_tokens": 210263040}
|
|
{"current_steps": 66800, "total_steps": 78105, "loss": 0.1231, "lr": 3.124145162321437e-07, "epoch": 4.276294731451252, "percentage": 85.53, "elapsed_time": "2:56:32", "remaining_time": "0:29:52", "throughput": 19852.21, "total_tokens": 210278464}
|
|
{"current_steps": 66805, "total_steps": 78105, "loss": 0.1155, "lr": 3.1214414897575236e-07, "epoch": 4.276614813392229, "percentage": 85.53, "elapsed_time": "2:56:32", "remaining_time": "0:29:51", "throughput": 19852.42, "total_tokens": 210294016}
|
|
{"current_steps": 66810, "total_steps": 78105, "loss": 0.1451, "lr": 3.118738909694871e-07, "epoch": 4.276934895333206, "percentage": 85.54, "elapsed_time": "2:56:33", "remaining_time": "0:29:50", "throughput": 19852.63, "total_tokens": 210310080}
|
|
{"current_steps": 66815, "total_steps": 78105, "loss": 0.1678, "lr": 3.1160374222684144e-07, "epoch": 4.277254977274183, "percentage": 85.55, "elapsed_time": "2:56:34", "remaining_time": "0:29:50", "throughput": 19852.86, "total_tokens": 210326144}
|
|
{"current_steps": 66820, "total_steps": 78105, "loss": 0.1058, "lr": 3.113337027613061e-07, "epoch": 4.277575059215159, "percentage": 85.55, "elapsed_time": "2:56:34", "remaining_time": "0:29:49", "throughput": 19853.01, "total_tokens": 210340544}
|
|
{"current_steps": 66825, "total_steps": 78105, "loss": 0.095, "lr": 3.110637725863655e-07, "epoch": 4.277895141156136, "percentage": 85.56, "elapsed_time": "2:56:35", "remaining_time": "0:29:48", "throughput": 19853.28, "total_tokens": 210357696}
|
|
{"current_steps": 66830, "total_steps": 78105, "loss": 0.107, "lr": 3.1079395171549787e-07, "epoch": 4.278215223097113, "percentage": 85.56, "elapsed_time": "2:56:36", "remaining_time": "0:29:47", "throughput": 19853.47, "total_tokens": 210372800}
|
|
{"current_steps": 66835, "total_steps": 78105, "loss": 0.1143, "lr": 3.1052424016217804e-07, "epoch": 4.27853530503809, "percentage": 85.57, "elapsed_time": "2:56:36", "remaining_time": "0:29:46", "throughput": 19853.69, "total_tokens": 210388672}
|
|
{"current_steps": 66840, "total_steps": 78105, "loss": 0.1163, "lr": 3.1025463793987215e-07, "epoch": 4.278855386979067, "percentage": 85.58, "elapsed_time": "2:56:37", "remaining_time": "0:29:46", "throughput": 19853.86, "total_tokens": 210403136}
|
|
{"current_steps": 66845, "total_steps": 78105, "loss": 0.1007, "lr": 3.0998514506204474e-07, "epoch": 4.279175468920044, "percentage": 85.58, "elapsed_time": "2:56:38", "remaining_time": "0:29:45", "throughput": 19854.1, "total_tokens": 210419136}
|
|
{"current_steps": 66850, "total_steps": 78105, "loss": 0.1155, "lr": 3.097157615421506e-07, "epoch": 4.279495550861021, "percentage": 85.59, "elapsed_time": "2:56:38", "remaining_time": "0:29:44", "throughput": 19854.33, "total_tokens": 210435392}
|
|
{"current_steps": 66855, "total_steps": 78105, "loss": 0.1402, "lr": 3.0944648739364313e-07, "epoch": 4.279815632801998, "percentage": 85.6, "elapsed_time": "2:56:39", "remaining_time": "0:29:43", "throughput": 19854.5, "total_tokens": 210450240}
|
|
{"current_steps": 66860, "total_steps": 78105, "loss": 0.1489, "lr": 3.09177322629968e-07, "epoch": 4.280135714742975, "percentage": 85.6, "elapsed_time": "2:56:40", "remaining_time": "0:29:42", "throughput": 19854.75, "total_tokens": 210467072}
|
|
{"current_steps": 66865, "total_steps": 78105, "loss": 0.1579, "lr": 3.0890826726456576e-07, "epoch": 4.280455796683951, "percentage": 85.61, "elapsed_time": "2:56:40", "remaining_time": "0:29:42", "throughput": 19854.96, "total_tokens": 210482368}
|
|
{"current_steps": 66870, "total_steps": 78105, "loss": 0.1348, "lr": 3.0863932131087154e-07, "epoch": 4.280775878624928, "percentage": 85.62, "elapsed_time": "2:56:41", "remaining_time": "0:29:41", "throughput": 19855.26, "total_tokens": 210500480}
|
|
{"current_steps": 66875, "total_steps": 78105, "loss": 0.1665, "lr": 3.0837048478231493e-07, "epoch": 4.281095960565905, "percentage": 85.62, "elapsed_time": "2:56:42", "remaining_time": "0:29:40", "throughput": 19855.49, "total_tokens": 210516672}
|
|
{"current_steps": 66880, "total_steps": 78105, "loss": 0.1429, "lr": 3.0810175769232093e-07, "epoch": 4.281416042506882, "percentage": 85.63, "elapsed_time": "2:56:43", "remaining_time": "0:29:39", "throughput": 19855.74, "total_tokens": 210532992}
|
|
{"current_steps": 66885, "total_steps": 78105, "loss": 0.1077, "lr": 3.07833140054308e-07, "epoch": 4.281736124447859, "percentage": 85.63, "elapsed_time": "2:56:43", "remaining_time": "0:29:38", "throughput": 19855.94, "total_tokens": 210548544}
|
|
{"current_steps": 66890, "total_steps": 78105, "loss": 0.1233, "lr": 3.075646318816894e-07, "epoch": 4.282056206388836, "percentage": 85.64, "elapsed_time": "2:56:44", "remaining_time": "0:29:37", "throughput": 19856.12, "total_tokens": 210563456}
|
|
{"current_steps": 66895, "total_steps": 78105, "loss": 0.1286, "lr": 3.0729623318787344e-07, "epoch": 4.282376288329813, "percentage": 85.65, "elapsed_time": "2:56:45", "remaining_time": "0:29:37", "throughput": 19856.32, "total_tokens": 210579136}
|
|
{"current_steps": 66900, "total_steps": 78105, "loss": 0.1611, "lr": 3.070279439862617e-07, "epoch": 4.28269637027079, "percentage": 85.65, "elapsed_time": "2:56:45", "remaining_time": "0:29:36", "throughput": 19856.48, "total_tokens": 210593664}
|
|
{"current_steps": 66905, "total_steps": 78105, "loss": 0.151, "lr": 3.06759764290252e-07, "epoch": 4.283016452211767, "percentage": 85.66, "elapsed_time": "2:56:46", "remaining_time": "0:29:35", "throughput": 19856.7, "total_tokens": 210609728}
|
|
{"current_steps": 66910, "total_steps": 78105, "loss": 0.1032, "lr": 3.064916941132354e-07, "epoch": 4.283336534152743, "percentage": 85.67, "elapsed_time": "2:56:47", "remaining_time": "0:29:34", "throughput": 19856.93, "total_tokens": 210625792}
|
|
{"current_steps": 66915, "total_steps": 78105, "loss": 0.0661, "lr": 3.0622373346859746e-07, "epoch": 4.28365661609372, "percentage": 85.67, "elapsed_time": "2:56:47", "remaining_time": "0:29:33", "throughput": 19857.14, "total_tokens": 210641216}
|
|
{"current_steps": 66920, "total_steps": 78105, "loss": 0.1357, "lr": 3.0595588236972063e-07, "epoch": 4.283976698034697, "percentage": 85.68, "elapsed_time": "2:56:48", "remaining_time": "0:29:33", "throughput": 19857.36, "total_tokens": 210657088}
|
|
{"current_steps": 66925, "total_steps": 78105, "loss": 0.1477, "lr": 3.056881408299772e-07, "epoch": 4.284296779975674, "percentage": 85.69, "elapsed_time": "2:56:49", "remaining_time": "0:29:32", "throughput": 19857.56, "total_tokens": 210672128}
|
|
{"current_steps": 66930, "total_steps": 78105, "loss": 0.1822, "lr": 3.0542050886273904e-07, "epoch": 4.284616861916651, "percentage": 85.69, "elapsed_time": "2:56:49", "remaining_time": "0:29:31", "throughput": 19857.78, "total_tokens": 210687808}
|
|
{"current_steps": 66935, "total_steps": 78105, "loss": 0.1298, "lr": 3.051529864813696e-07, "epoch": 4.284936943857628, "percentage": 85.7, "elapsed_time": "2:56:50", "remaining_time": "0:29:30", "throughput": 19857.97, "total_tokens": 210703040}
|
|
{"current_steps": 66940, "total_steps": 78105, "loss": 0.0893, "lr": 3.0488557369922735e-07, "epoch": 4.285257025798605, "percentage": 85.71, "elapsed_time": "2:56:51", "remaining_time": "0:29:29", "throughput": 19858.15, "total_tokens": 210717824}
|
|
{"current_steps": 66945, "total_steps": 78105, "loss": 0.1294, "lr": 3.046182705296655e-07, "epoch": 4.285577107739582, "percentage": 85.71, "elapsed_time": "2:56:51", "remaining_time": "0:29:29", "throughput": 19858.35, "total_tokens": 210732928}
|
|
{"current_steps": 66950, "total_steps": 78105, "loss": 0.0836, "lr": 3.0435107698603167e-07, "epoch": 4.285897189680558, "percentage": 85.72, "elapsed_time": "2:56:52", "remaining_time": "0:29:28", "throughput": 19858.58, "total_tokens": 210748864}
|
|
{"current_steps": 66955, "total_steps": 78105, "loss": 0.1691, "lr": 3.0408399308166916e-07, "epoch": 4.286217271621535, "percentage": 85.72, "elapsed_time": "2:56:53", "remaining_time": "0:29:27", "throughput": 19858.84, "total_tokens": 210765888}
|
|
{"current_steps": 66960, "total_steps": 78105, "loss": 0.1109, "lr": 3.038170188299128e-07, "epoch": 4.286537353562512, "percentage": 85.73, "elapsed_time": "2:56:53", "remaining_time": "0:29:26", "throughput": 19859.04, "total_tokens": 210781504}
|
|
{"current_steps": 66965, "total_steps": 78105, "loss": 0.1941, "lr": 3.035501542440958e-07, "epoch": 4.286857435503489, "percentage": 85.74, "elapsed_time": "2:56:54", "remaining_time": "0:29:25", "throughput": 19859.34, "total_tokens": 210799424}
|
|
{"current_steps": 66970, "total_steps": 78105, "loss": 0.1214, "lr": 3.032833993375431e-07, "epoch": 4.287177517444466, "percentage": 85.74, "elapsed_time": "2:56:55", "remaining_time": "0:29:24", "throughput": 19859.56, "total_tokens": 210815424}
|
|
{"current_steps": 66975, "total_steps": 78105, "loss": 0.1333, "lr": 3.030167541235751e-07, "epoch": 4.287497599385443, "percentage": 85.75, "elapsed_time": "2:56:55", "remaining_time": "0:29:24", "throughput": 19859.77, "total_tokens": 210830784}
|
|
{"current_steps": 66980, "total_steps": 78105, "loss": 0.1684, "lr": 3.027502186155068e-07, "epoch": 4.28781768132642, "percentage": 85.76, "elapsed_time": "2:56:56", "remaining_time": "0:29:23", "throughput": 19859.98, "total_tokens": 210846656}
|
|
{"current_steps": 66985, "total_steps": 78105, "loss": 0.1467, "lr": 3.024837928266475e-07, "epoch": 4.288137763267397, "percentage": 85.76, "elapsed_time": "2:56:57", "remaining_time": "0:29:22", "throughput": 19860.17, "total_tokens": 210861760}
|
|
{"current_steps": 66990, "total_steps": 78105, "loss": 0.1134, "lr": 3.0221747677030094e-07, "epoch": 4.288457845208374, "percentage": 85.77, "elapsed_time": "2:56:58", "remaining_time": "0:29:21", "throughput": 19860.44, "total_tokens": 210878784}
|
|
{"current_steps": 66995, "total_steps": 78105, "loss": 0.1247, "lr": 3.019512704597655e-07, "epoch": 4.288777927149351, "percentage": 85.78, "elapsed_time": "2:56:58", "remaining_time": "0:29:20", "throughput": 19860.65, "total_tokens": 210894336}
|
|
{"current_steps": 67000, "total_steps": 78105, "loss": 0.0949, "lr": 3.0168517390833485e-07, "epoch": 4.289098009090327, "percentage": 85.78, "elapsed_time": "2:56:59", "remaining_time": "0:29:20", "throughput": 19860.89, "total_tokens": 210910912}
|
|
{"current_steps": 67005, "total_steps": 78105, "loss": 0.1361, "lr": 3.0141918712929595e-07, "epoch": 4.289418091031304, "percentage": 85.79, "elapsed_time": "2:57:00", "remaining_time": "0:29:19", "throughput": 19861.08, "total_tokens": 210926144}
|
|
{"current_steps": 67010, "total_steps": 78105, "loss": 0.1263, "lr": 3.0115331013593096e-07, "epoch": 4.289738172972281, "percentage": 85.79, "elapsed_time": "2:57:00", "remaining_time": "0:29:18", "throughput": 19861.32, "total_tokens": 210942080}
|
|
{"current_steps": 67015, "total_steps": 78105, "loss": 0.1491, "lr": 3.0088754294151616e-07, "epoch": 4.290058254913258, "percentage": 85.8, "elapsed_time": "2:57:01", "remaining_time": "0:29:17", "throughput": 19861.58, "total_tokens": 210958848}
|
|
{"current_steps": 67020, "total_steps": 78105, "loss": 0.1034, "lr": 3.006218855593229e-07, "epoch": 4.290378336854235, "percentage": 85.81, "elapsed_time": "2:57:02", "remaining_time": "0:29:16", "throughput": 19861.78, "total_tokens": 210974336}
|
|
{"current_steps": 67025, "total_steps": 78105, "loss": 0.1211, "lr": 3.003563380026159e-07, "epoch": 4.290698418795212, "percentage": 85.81, "elapsed_time": "2:57:02", "remaining_time": "0:29:16", "throughput": 19862.08, "total_tokens": 210992256}
|
|
{"current_steps": 67030, "total_steps": 78105, "loss": 0.1435, "lr": 3.0009090028465693e-07, "epoch": 4.291018500736189, "percentage": 85.82, "elapsed_time": "2:57:03", "remaining_time": "0:29:15", "throughput": 19862.27, "total_tokens": 211007424}
|
|
{"current_steps": 67035, "total_steps": 78105, "loss": 0.117, "lr": 2.9982557241869827e-07, "epoch": 4.291338582677166, "percentage": 85.83, "elapsed_time": "2:57:04", "remaining_time": "0:29:14", "throughput": 19862.55, "total_tokens": 211024640}
|
|
{"current_steps": 67040, "total_steps": 78105, "loss": 0.099, "lr": 2.9956035441799147e-07, "epoch": 4.291658664618142, "percentage": 85.83, "elapsed_time": "2:57:04", "remaining_time": "0:29:13", "throughput": 19862.76, "total_tokens": 211040512}
|
|
{"current_steps": 67045, "total_steps": 78105, "loss": 0.124, "lr": 2.992952462957779e-07, "epoch": 4.291978746559119, "percentage": 85.84, "elapsed_time": "2:57:05", "remaining_time": "0:29:12", "throughput": 19862.99, "total_tokens": 211056704}
|
|
{"current_steps": 67050, "total_steps": 78105, "loss": 0.1007, "lr": 2.990302480652971e-07, "epoch": 4.292298828500096, "percentage": 85.85, "elapsed_time": "2:57:06", "remaining_time": "0:29:12", "throughput": 19863.21, "total_tokens": 211072896}
|
|
{"current_steps": 67055, "total_steps": 78105, "loss": 0.1816, "lr": 2.9876535973978134e-07, "epoch": 4.292618910441073, "percentage": 85.85, "elapsed_time": "2:57:06", "remaining_time": "0:29:11", "throughput": 19863.42, "total_tokens": 211088128}
|
|
{"current_steps": 67060, "total_steps": 78105, "loss": 0.128, "lr": 2.9850058133245707e-07, "epoch": 4.29293899238205, "percentage": 85.86, "elapsed_time": "2:57:07", "remaining_time": "0:29:10", "throughput": 19863.64, "total_tokens": 211104256}
|
|
{"current_steps": 67065, "total_steps": 78105, "loss": 0.1425, "lr": 2.982359128565476e-07, "epoch": 4.293259074323027, "percentage": 85.87, "elapsed_time": "2:57:08", "remaining_time": "0:29:09", "throughput": 19863.82, "total_tokens": 211118912}
|
|
{"current_steps": 67070, "total_steps": 78105, "loss": 0.1409, "lr": 2.979713543252671e-07, "epoch": 4.293579156264004, "percentage": 85.87, "elapsed_time": "2:57:08", "remaining_time": "0:29:08", "throughput": 19864.0, "total_tokens": 211133888}
|
|
{"current_steps": 67075, "total_steps": 78105, "loss": 0.1371, "lr": 2.977069057518284e-07, "epoch": 4.293899238204981, "percentage": 85.88, "elapsed_time": "2:57:09", "remaining_time": "0:29:07", "throughput": 19864.25, "total_tokens": 211150464}
|
|
{"current_steps": 67080, "total_steps": 78105, "loss": 0.1069, "lr": 2.9744256714943407e-07, "epoch": 4.294219320145958, "percentage": 85.88, "elapsed_time": "2:57:10", "remaining_time": "0:29:07", "throughput": 19864.43, "total_tokens": 211165120}
|
|
{"current_steps": 67085, "total_steps": 78105, "loss": 0.1297, "lr": 2.97178338531286e-07, "epoch": 4.294539402086934, "percentage": 85.89, "elapsed_time": "2:57:11", "remaining_time": "0:29:06", "throughput": 19864.64, "total_tokens": 211181056}
|
|
{"current_steps": 67090, "total_steps": 78105, "loss": 0.1655, "lr": 2.969142199105776e-07, "epoch": 4.294859484027911, "percentage": 85.9, "elapsed_time": "2:57:11", "remaining_time": "0:29:05", "throughput": 19864.86, "total_tokens": 211197120}
|
|
{"current_steps": 67095, "total_steps": 78105, "loss": 0.1848, "lr": 2.9665021130049767e-07, "epoch": 4.295179565968888, "percentage": 85.9, "elapsed_time": "2:57:12", "remaining_time": "0:29:04", "throughput": 19865.05, "total_tokens": 211212032}
|
|
{"current_steps": 67100, "total_steps": 78105, "loss": 0.139, "lr": 2.963863127142294e-07, "epoch": 4.295499647909865, "percentage": 85.91, "elapsed_time": "2:57:12", "remaining_time": "0:29:03", "throughput": 19865.23, "total_tokens": 211226624}
|
|
{"current_steps": 67105, "total_steps": 78105, "loss": 0.1133, "lr": 2.9612252416494985e-07, "epoch": 4.295819729850842, "percentage": 85.92, "elapsed_time": "2:57:13", "remaining_time": "0:29:03", "throughput": 19865.39, "total_tokens": 211240896}
|
|
{"current_steps": 67110, "total_steps": 78105, "loss": 0.1605, "lr": 2.958588456658323e-07, "epoch": 4.296139811791819, "percentage": 85.92, "elapsed_time": "2:57:14", "remaining_time": "0:29:02", "throughput": 19865.59, "total_tokens": 211256192}
|
|
{"current_steps": 67115, "total_steps": 78105, "loss": 0.1421, "lr": 2.955952772300433e-07, "epoch": 4.296459893732796, "percentage": 85.93, "elapsed_time": "2:57:14", "remaining_time": "0:29:01", "throughput": 19865.83, "total_tokens": 211272512}
|
|
{"current_steps": 67120, "total_steps": 78105, "loss": 0.1326, "lr": 2.9533181887074353e-07, "epoch": 4.296779975673773, "percentage": 85.94, "elapsed_time": "2:57:15", "remaining_time": "0:29:00", "throughput": 19866.02, "total_tokens": 211287744}
|
|
{"current_steps": 67125, "total_steps": 78105, "loss": 0.1269, "lr": 2.950684706010892e-07, "epoch": 4.29710005761475, "percentage": 85.94, "elapsed_time": "2:57:16", "remaining_time": "0:28:59", "throughput": 19866.22, "total_tokens": 211302528}
|
|
{"current_steps": 67130, "total_steps": 78105, "loss": 0.119, "lr": 2.9480523243423043e-07, "epoch": 4.297420139555726, "percentage": 85.95, "elapsed_time": "2:57:16", "remaining_time": "0:28:59", "throughput": 19866.41, "total_tokens": 211318208}
|
|
{"current_steps": 67135, "total_steps": 78105, "loss": 0.0983, "lr": 2.945421043833119e-07, "epoch": 4.297740221496703, "percentage": 85.95, "elapsed_time": "2:57:17", "remaining_time": "0:28:58", "throughput": 19866.59, "total_tokens": 211333120}
|
|
{"current_steps": 67140, "total_steps": 78105, "loss": 0.1105, "lr": 2.942790864614728e-07, "epoch": 4.29806030343768, "percentage": 85.96, "elapsed_time": "2:57:18", "remaining_time": "0:28:57", "throughput": 19866.81, "total_tokens": 211348992}
|
|
{"current_steps": 67145, "total_steps": 78105, "loss": 0.1588, "lr": 2.9401617868184626e-07, "epoch": 4.298380385378657, "percentage": 85.97, "elapsed_time": "2:57:18", "remaining_time": "0:28:56", "throughput": 19867.04, "total_tokens": 211364992}
|
|
{"current_steps": 67150, "total_steps": 78105, "loss": 0.103, "lr": 2.9375338105756243e-07, "epoch": 4.298700467319634, "percentage": 85.97, "elapsed_time": "2:57:19", "remaining_time": "0:28:55", "throughput": 19867.23, "total_tokens": 211380224}
|
|
{"current_steps": 67155, "total_steps": 78105, "loss": 0.1047, "lr": 2.934906936017418e-07, "epoch": 4.299020549260611, "percentage": 85.98, "elapsed_time": "2:57:20", "remaining_time": "0:28:54", "throughput": 19867.47, "total_tokens": 211396416}
|
|
{"current_steps": 67160, "total_steps": 78105, "loss": 0.075, "lr": 2.932281163275033e-07, "epoch": 4.299340631201588, "percentage": 85.99, "elapsed_time": "2:57:20", "remaining_time": "0:28:54", "throughput": 19867.67, "total_tokens": 211411520}
|
|
{"current_steps": 67165, "total_steps": 78105, "loss": 0.1115, "lr": 2.929656492479577e-07, "epoch": 4.299660713142565, "percentage": 85.99, "elapsed_time": "2:57:21", "remaining_time": "0:28:53", "throughput": 19867.88, "total_tokens": 211427136}
|
|
{"current_steps": 67170, "total_steps": 78105, "loss": 0.1317, "lr": 2.927032923762116e-07, "epoch": 4.299980795083542, "percentage": 86.0, "elapsed_time": "2:57:22", "remaining_time": "0:28:52", "throughput": 19868.07, "total_tokens": 211442304}
|
|
{"current_steps": 67175, "total_steps": 78105, "loss": 0.127, "lr": 2.9244104572536595e-07, "epoch": 4.300300877024518, "percentage": 86.01, "elapsed_time": "2:57:22", "remaining_time": "0:28:51", "throughput": 19868.27, "total_tokens": 211457344}
|
|
{"current_steps": 67180, "total_steps": 78105, "loss": 0.1585, "lr": 2.9217890930851505e-07, "epoch": 4.300620958965495, "percentage": 86.01, "elapsed_time": "2:57:23", "remaining_time": "0:28:50", "throughput": 19868.5, "total_tokens": 211473472}
|
|
{"current_steps": 67185, "total_steps": 78105, "loss": 0.1279, "lr": 2.9191688313875035e-07, "epoch": 4.300941040906472, "percentage": 86.02, "elapsed_time": "2:57:24", "remaining_time": "0:28:50", "throughput": 19868.73, "total_tokens": 211489536}
|
|
{"current_steps": 67190, "total_steps": 78105, "loss": 0.1289, "lr": 2.916549672291538e-07, "epoch": 4.301261122847449, "percentage": 86.03, "elapsed_time": "2:57:25", "remaining_time": "0:28:49", "throughput": 19868.95, "total_tokens": 211505216}
|
|
{"current_steps": 67195, "total_steps": 78105, "loss": 0.1253, "lr": 2.9139316159280617e-07, "epoch": 4.301581204788426, "percentage": 86.03, "elapsed_time": "2:57:25", "remaining_time": "0:28:48", "throughput": 19868.8, "total_tokens": 211522816}
|
|
{"current_steps": 67200, "total_steps": 78105, "loss": 0.119, "lr": 2.911314662427797e-07, "epoch": 4.301901286729403, "percentage": 86.04, "elapsed_time": "2:57:26", "remaining_time": "0:28:47", "throughput": 19868.99, "total_tokens": 211537856}
|
|
{"current_steps": 67205, "total_steps": 78105, "loss": 0.1219, "lr": 2.9086988119214207e-07, "epoch": 4.30222136867038, "percentage": 86.04, "elapsed_time": "2:57:27", "remaining_time": "0:28:46", "throughput": 19869.21, "total_tokens": 211553664}
|
|
{"current_steps": 67210, "total_steps": 78105, "loss": 0.1333, "lr": 2.906084064539558e-07, "epoch": 4.302541450611357, "percentage": 86.05, "elapsed_time": "2:57:28", "remaining_time": "0:28:46", "throughput": 19869.48, "total_tokens": 211570880}
|
|
{"current_steps": 67215, "total_steps": 78105, "loss": 0.1191, "lr": 2.903470420412774e-07, "epoch": 4.302861532552333, "percentage": 86.06, "elapsed_time": "2:57:28", "remaining_time": "0:28:45", "throughput": 19869.7, "total_tokens": 211586880}
|
|
{"current_steps": 67220, "total_steps": 78105, "loss": 0.1304, "lr": 2.9008578796715814e-07, "epoch": 4.30318161449331, "percentage": 86.06, "elapsed_time": "2:57:29", "remaining_time": "0:28:44", "throughput": 19869.91, "total_tokens": 211602752}
|
|
{"current_steps": 67225, "total_steps": 78105, "loss": 0.1196, "lr": 2.8982464424464286e-07, "epoch": 4.303501696434287, "percentage": 86.07, "elapsed_time": "2:57:30", "remaining_time": "0:28:43", "throughput": 19870.1, "total_tokens": 211617984}
|
|
{"current_steps": 67230, "total_steps": 78105, "loss": 0.1474, "lr": 2.895636108867733e-07, "epoch": 4.303821778375264, "percentage": 86.08, "elapsed_time": "2:57:30", "remaining_time": "0:28:42", "throughput": 19870.3, "total_tokens": 211632960}
|
|
{"current_steps": 67235, "total_steps": 78105, "loss": 0.1015, "lr": 2.89302687906583e-07, "epoch": 4.304141860316241, "percentage": 86.08, "elapsed_time": "2:57:31", "remaining_time": "0:28:42", "throughput": 19870.51, "total_tokens": 211648704}
|
|
{"current_steps": 67240, "total_steps": 78105, "loss": 0.1068, "lr": 2.8904187531710147e-07, "epoch": 4.304461942257218, "percentage": 86.09, "elapsed_time": "2:57:32", "remaining_time": "0:28:41", "throughput": 19870.67, "total_tokens": 211663296}
|
|
{"current_steps": 67245, "total_steps": 78105, "loss": 0.1566, "lr": 2.8878117313135225e-07, "epoch": 4.304782024198195, "percentage": 86.1, "elapsed_time": "2:57:32", "remaining_time": "0:28:40", "throughput": 19870.92, "total_tokens": 211679744}
|
|
{"current_steps": 67250, "total_steps": 78105, "loss": 0.1574, "lr": 2.885205813623534e-07, "epoch": 4.305102106139172, "percentage": 86.1, "elapsed_time": "2:57:33", "remaining_time": "0:28:39", "throughput": 19871.12, "total_tokens": 211695296}
|
|
{"current_steps": 67255, "total_steps": 78105, "loss": 0.1124, "lr": 2.8826010002311697e-07, "epoch": 4.305422188080149, "percentage": 86.11, "elapsed_time": "2:57:34", "remaining_time": "0:28:38", "throughput": 19871.36, "total_tokens": 211711616}
|
|
{"current_steps": 67260, "total_steps": 78105, "loss": 0.1695, "lr": 2.8799972912665176e-07, "epoch": 4.305742270021126, "percentage": 86.11, "elapsed_time": "2:57:34", "remaining_time": "0:28:37", "throughput": 19871.56, "total_tokens": 211727232}
|
|
{"current_steps": 67265, "total_steps": 78105, "loss": 0.1305, "lr": 2.877394686859569e-07, "epoch": 4.306062351962102, "percentage": 86.12, "elapsed_time": "2:57:35", "remaining_time": "0:28:37", "throughput": 19871.73, "total_tokens": 211742016}
|
|
{"current_steps": 67270, "total_steps": 78105, "loss": 0.1308, "lr": 2.87479318714031e-07, "epoch": 4.306382433903079, "percentage": 86.13, "elapsed_time": "2:57:36", "remaining_time": "0:28:36", "throughput": 19871.9, "total_tokens": 211756544}
|
|
{"current_steps": 67275, "total_steps": 78105, "loss": 0.1009, "lr": 2.872192792238623e-07, "epoch": 4.306702515844056, "percentage": 86.13, "elapsed_time": "2:57:36", "remaining_time": "0:28:35", "throughput": 19872.09, "total_tokens": 211771968}
|
|
{"current_steps": 67280, "total_steps": 78105, "loss": 0.1001, "lr": 2.8695935022843737e-07, "epoch": 4.307022597785033, "percentage": 86.14, "elapsed_time": "2:57:37", "remaining_time": "0:28:34", "throughput": 19872.3, "total_tokens": 211787712}
|
|
{"current_steps": 67285, "total_steps": 78105, "loss": 0.0968, "lr": 2.8669953174073506e-07, "epoch": 4.30734267972601, "percentage": 86.15, "elapsed_time": "2:57:38", "remaining_time": "0:28:33", "throughput": 19872.54, "total_tokens": 211803712}
|
|
{"current_steps": 67290, "total_steps": 78105, "loss": 0.1407, "lr": 2.86439823773729e-07, "epoch": 4.307662761666987, "percentage": 86.15, "elapsed_time": "2:57:38", "remaining_time": "0:28:33", "throughput": 19872.74, "total_tokens": 211819200}
|
|
{"current_steps": 67295, "total_steps": 78105, "loss": 0.0711, "lr": 2.86180226340389e-07, "epoch": 4.307982843607964, "percentage": 86.16, "elapsed_time": "2:57:39", "remaining_time": "0:28:32", "throughput": 19872.98, "total_tokens": 211835840}
|
|
{"current_steps": 67300, "total_steps": 78105, "loss": 0.1331, "lr": 2.8592073945367645e-07, "epoch": 4.308302925548941, "percentage": 86.17, "elapsed_time": "2:57:40", "remaining_time": "0:28:31", "throughput": 19873.18, "total_tokens": 211851264}
|
|
{"current_steps": 67305, "total_steps": 78105, "loss": 0.0881, "lr": 2.8566136312655006e-07, "epoch": 4.308623007489917, "percentage": 86.17, "elapsed_time": "2:57:40", "remaining_time": "0:28:30", "throughput": 19873.39, "total_tokens": 211866688}
|
|
{"current_steps": 67310, "total_steps": 78105, "loss": 0.0945, "lr": 2.854020973719604e-07, "epoch": 4.308943089430894, "percentage": 86.18, "elapsed_time": "2:57:41", "remaining_time": "0:28:29", "throughput": 19873.59, "total_tokens": 211881984}
|
|
{"current_steps": 67315, "total_steps": 78105, "loss": 0.1264, "lr": 2.851429422028551e-07, "epoch": 4.309263171371871, "percentage": 86.19, "elapsed_time": "2:57:42", "remaining_time": "0:28:29", "throughput": 19873.8, "total_tokens": 211897792}
|
|
{"current_steps": 67320, "total_steps": 78105, "loss": 0.1337, "lr": 2.848838976321744e-07, "epoch": 4.309583253312848, "percentage": 86.19, "elapsed_time": "2:57:42", "remaining_time": "0:28:28", "throughput": 19874.03, "total_tokens": 211913792}
|
|
{"current_steps": 67325, "total_steps": 78105, "loss": 0.1773, "lr": 2.8462496367285377e-07, "epoch": 4.309903335253825, "percentage": 86.2, "elapsed_time": "2:57:43", "remaining_time": "0:28:27", "throughput": 19874.29, "total_tokens": 211930880}
|
|
{"current_steps": 67330, "total_steps": 78105, "loss": 0.1356, "lr": 2.8436614033782286e-07, "epoch": 4.310223417194802, "percentage": 86.2, "elapsed_time": "2:57:44", "remaining_time": "0:28:26", "throughput": 19874.49, "total_tokens": 211946368}
|
|
{"current_steps": 67335, "total_steps": 78105, "loss": 0.1384, "lr": 2.841074276400063e-07, "epoch": 4.310543499135779, "percentage": 86.21, "elapsed_time": "2:57:44", "remaining_time": "0:28:25", "throughput": 19874.71, "total_tokens": 211962240}
|
|
{"current_steps": 67340, "total_steps": 78105, "loss": 0.1178, "lr": 2.838488255923219e-07, "epoch": 4.310863581076756, "percentage": 86.22, "elapsed_time": "2:57:45", "remaining_time": "0:28:25", "throughput": 19874.95, "total_tokens": 211978624}
|
|
{"current_steps": 67345, "total_steps": 78105, "loss": 0.0958, "lr": 2.835903342076843e-07, "epoch": 4.311183663017733, "percentage": 86.22, "elapsed_time": "2:57:46", "remaining_time": "0:28:24", "throughput": 19875.14, "total_tokens": 211993792}
|
|
{"current_steps": 67350, "total_steps": 78105, "loss": 0.1116, "lr": 2.8333195349900064e-07, "epoch": 4.311503744958709, "percentage": 86.23, "elapsed_time": "2:57:46", "remaining_time": "0:28:23", "throughput": 19875.37, "total_tokens": 212009856}
|
|
{"current_steps": 67355, "total_steps": 78105, "loss": 0.1179, "lr": 2.830736834791728e-07, "epoch": 4.311823826899686, "percentage": 86.24, "elapsed_time": "2:57:47", "remaining_time": "0:28:22", "throughput": 19875.56, "total_tokens": 212025216}
|
|
{"current_steps": 67360, "total_steps": 78105, "loss": 0.0726, "lr": 2.8281552416109773e-07, "epoch": 4.312143908840663, "percentage": 86.24, "elapsed_time": "2:57:48", "remaining_time": "0:28:21", "throughput": 19875.8, "total_tokens": 212041920}
|
|
{"current_steps": 67365, "total_steps": 78105, "loss": 0.1373, "lr": 2.8255747555766654e-07, "epoch": 4.31246399078164, "percentage": 86.25, "elapsed_time": "2:57:49", "remaining_time": "0:28:20", "throughput": 19876.05, "total_tokens": 212058432}
|
|
{"current_steps": 67370, "total_steps": 78105, "loss": 0.1017, "lr": 2.822995376817647e-07, "epoch": 4.312784072722617, "percentage": 86.26, "elapsed_time": "2:57:49", "remaining_time": "0:28:20", "throughput": 19876.23, "total_tokens": 212073216}
|
|
{"current_steps": 67375, "total_steps": 78105, "loss": 0.1448, "lr": 2.8204171054627194e-07, "epoch": 4.313104154663594, "percentage": 86.26, "elapsed_time": "2:57:50", "remaining_time": "0:28:19", "throughput": 19876.47, "total_tokens": 212089792}
|
|
{"current_steps": 67380, "total_steps": 78105, "loss": 0.1223, "lr": 2.817839941640643e-07, "epoch": 4.313424236604571, "percentage": 86.27, "elapsed_time": "2:57:51", "remaining_time": "0:28:18", "throughput": 19876.69, "total_tokens": 212105984}
|
|
{"current_steps": 67385, "total_steps": 78105, "loss": 0.1467, "lr": 2.815263885480088e-07, "epoch": 4.313744318545548, "percentage": 86.27, "elapsed_time": "2:57:51", "remaining_time": "0:28:17", "throughput": 19876.85, "total_tokens": 212120640}
|
|
{"current_steps": 67390, "total_steps": 78105, "loss": 0.1213, "lr": 2.8126889371097006e-07, "epoch": 4.314064400486525, "percentage": 86.28, "elapsed_time": "2:57:52", "remaining_time": "0:28:16", "throughput": 19877.05, "total_tokens": 212136064}
|
|
{"current_steps": 67395, "total_steps": 78105, "loss": 0.1132, "lr": 2.810115096658059e-07, "epoch": 4.314384482427501, "percentage": 86.29, "elapsed_time": "2:57:53", "remaining_time": "0:28:16", "throughput": 19877.25, "total_tokens": 212151488}
|
|
{"current_steps": 67400, "total_steps": 78105, "loss": 0.1291, "lr": 2.8075423642536876e-07, "epoch": 4.314704564368478, "percentage": 86.29, "elapsed_time": "2:57:53", "remaining_time": "0:28:15", "throughput": 19877.42, "total_tokens": 212166016}
|
|
{"current_steps": 67405, "total_steps": 78105, "loss": 0.1273, "lr": 2.8049707400250534e-07, "epoch": 4.315024646309455, "percentage": 86.3, "elapsed_time": "2:57:54", "remaining_time": "0:28:14", "throughput": 19877.6, "total_tokens": 212180928}
|
|
{"current_steps": 67410, "total_steps": 78105, "loss": 0.1066, "lr": 2.8024002241005654e-07, "epoch": 4.315344728250432, "percentage": 86.31, "elapsed_time": "2:57:55", "remaining_time": "0:28:13", "throughput": 19877.79, "total_tokens": 212196352}
|
|
{"current_steps": 67415, "total_steps": 78105, "loss": 0.213, "lr": 2.799830816608598e-07, "epoch": 4.315664810191409, "percentage": 86.31, "elapsed_time": "2:57:55", "remaining_time": "0:28:12", "throughput": 19878.02, "total_tokens": 212212480}
|
|
{"current_steps": 67420, "total_steps": 78105, "loss": 0.1763, "lr": 2.797262517677435e-07, "epoch": 4.315984892132386, "percentage": 86.32, "elapsed_time": "2:57:56", "remaining_time": "0:28:12", "throughput": 19878.19, "total_tokens": 212227072}
|
|
{"current_steps": 67425, "total_steps": 78105, "loss": 0.1169, "lr": 2.7946953274353346e-07, "epoch": 4.316304974073363, "percentage": 86.33, "elapsed_time": "2:57:57", "remaining_time": "0:28:11", "throughput": 19878.38, "total_tokens": 212242176}
|
|
{"current_steps": 67430, "total_steps": 78105, "loss": 0.1239, "lr": 2.792129246010486e-07, "epoch": 4.31662505601434, "percentage": 86.33, "elapsed_time": "2:57:57", "remaining_time": "0:28:10", "throughput": 19878.58, "total_tokens": 212257408}
|
|
{"current_steps": 67435, "total_steps": 78105, "loss": 0.1388, "lr": 2.7895642735310285e-07, "epoch": 4.316945137955317, "percentage": 86.34, "elapsed_time": "2:57:58", "remaining_time": "0:28:09", "throughput": 19878.82, "total_tokens": 212273856}
|
|
{"current_steps": 67440, "total_steps": 78105, "loss": 0.1176, "lr": 2.78700041012504e-07, "epoch": 4.317265219896293, "percentage": 86.35, "elapsed_time": "2:57:59", "remaining_time": "0:28:08", "throughput": 19879.1, "total_tokens": 212291456}
|
|
{"current_steps": 67445, "total_steps": 78105, "loss": 0.11, "lr": 2.7844376559205464e-07, "epoch": 4.31758530183727, "percentage": 86.35, "elapsed_time": "2:57:59", "remaining_time": "0:28:07", "throughput": 19879.3, "total_tokens": 212306688}
|
|
{"current_steps": 67450, "total_steps": 78105, "loss": 0.1237, "lr": 2.78187601104552e-07, "epoch": 4.317905383778247, "percentage": 86.36, "elapsed_time": "2:58:00", "remaining_time": "0:28:07", "throughput": 19879.52, "total_tokens": 212322944}
|
|
{"current_steps": 67455, "total_steps": 78105, "loss": 0.1884, "lr": 2.779315475627872e-07, "epoch": 4.318225465719224, "percentage": 86.36, "elapsed_time": "2:58:01", "remaining_time": "0:28:06", "throughput": 19879.72, "total_tokens": 212338688}
|
|
{"current_steps": 67460, "total_steps": 78105, "loss": 0.1658, "lr": 2.7767560497954704e-07, "epoch": 4.318545547660201, "percentage": 86.37, "elapsed_time": "2:58:01", "remaining_time": "0:28:05", "throughput": 19879.97, "total_tokens": 212355392}
|
|
{"current_steps": 67465, "total_steps": 78105, "loss": 0.1622, "lr": 2.774197733676115e-07, "epoch": 4.318865629601178, "percentage": 86.38, "elapsed_time": "2:58:02", "remaining_time": "0:28:04", "throughput": 19880.21, "total_tokens": 212372032}
|
|
{"current_steps": 67470, "total_steps": 78105, "loss": 0.1957, "lr": 2.771640527397554e-07, "epoch": 4.319185711542155, "percentage": 86.38, "elapsed_time": "2:58:03", "remaining_time": "0:28:03", "throughput": 19880.43, "total_tokens": 212388288}
|
|
{"current_steps": 67475, "total_steps": 78105, "loss": 0.1136, "lr": 2.7690844310874847e-07, "epoch": 4.319505793483132, "percentage": 86.39, "elapsed_time": "2:58:03", "remaining_time": "0:28:03", "throughput": 19880.61, "total_tokens": 212402944}
|
|
{"current_steps": 67480, "total_steps": 78105, "loss": 0.2043, "lr": 2.766529444873539e-07, "epoch": 4.319825875424108, "percentage": 86.4, "elapsed_time": "2:58:04", "remaining_time": "0:28:02", "throughput": 19880.87, "total_tokens": 212419776}
|
|
{"current_steps": 67485, "total_steps": 78105, "loss": 0.1526, "lr": 2.763975568883298e-07, "epoch": 4.320145957365085, "percentage": 86.4, "elapsed_time": "2:58:05", "remaining_time": "0:28:01", "throughput": 19881.04, "total_tokens": 212434688}
|
|
{"current_steps": 67490, "total_steps": 78105, "loss": 0.0931, "lr": 2.761422803244307e-07, "epoch": 4.320466039306062, "percentage": 86.41, "elapsed_time": "2:58:05", "remaining_time": "0:28:00", "throughput": 19881.27, "total_tokens": 212451072}
|
|
{"current_steps": 67495, "total_steps": 78105, "loss": 0.1119, "lr": 2.758871148084011e-07, "epoch": 4.320786121247039, "percentage": 86.42, "elapsed_time": "2:58:06", "remaining_time": "0:27:59", "throughput": 19881.46, "total_tokens": 212466432}
|
|
{"current_steps": 67500, "total_steps": 78105, "loss": 0.1451, "lr": 2.7563206035298525e-07, "epoch": 4.321106203188016, "percentage": 86.42, "elapsed_time": "2:58:07", "remaining_time": "0:27:59", "throughput": 19881.63, "total_tokens": 212481472}
|
|
{"current_steps": 67505, "total_steps": 78105, "loss": 0.0936, "lr": 2.7537711697091685e-07, "epoch": 4.321426285128993, "percentage": 86.43, "elapsed_time": "2:58:07", "remaining_time": "0:27:58", "throughput": 19881.82, "total_tokens": 212496576}
|
|
{"current_steps": 67510, "total_steps": 78105, "loss": 0.1878, "lr": 2.7512228467492826e-07, "epoch": 4.32174636706997, "percentage": 86.43, "elapsed_time": "2:58:08", "remaining_time": "0:27:57", "throughput": 19882.01, "total_tokens": 212511808}
|
|
{"current_steps": 67515, "total_steps": 78105, "loss": 0.113, "lr": 2.748675634777434e-07, "epoch": 4.322066449010947, "percentage": 86.44, "elapsed_time": "2:58:09", "remaining_time": "0:27:56", "throughput": 19882.19, "total_tokens": 212526848}
|
|
{"current_steps": 67520, "total_steps": 78105, "loss": 0.0943, "lr": 2.7461295339208214e-07, "epoch": 4.322386530951924, "percentage": 86.45, "elapsed_time": "2:58:09", "remaining_time": "0:27:55", "throughput": 19882.39, "total_tokens": 212542272}
|
|
{"current_steps": 67525, "total_steps": 78105, "loss": 0.1284, "lr": 2.7435845443065906e-07, "epoch": 4.322706612892901, "percentage": 86.45, "elapsed_time": "2:58:10", "remaining_time": "0:27:55", "throughput": 19882.62, "total_tokens": 212558400}
|
|
{"current_steps": 67530, "total_steps": 78105, "loss": 0.1714, "lr": 2.741040666061809e-07, "epoch": 4.323026694833877, "percentage": 86.46, "elapsed_time": "2:58:11", "remaining_time": "0:27:54", "throughput": 19882.83, "total_tokens": 212574144}
|
|
{"current_steps": 67535, "total_steps": 78105, "loss": 0.1202, "lr": 2.738497899313525e-07, "epoch": 4.323346776774854, "percentage": 86.47, "elapsed_time": "2:58:12", "remaining_time": "0:27:53", "throughput": 19883.03, "total_tokens": 212589888}
|
|
{"current_steps": 67540, "total_steps": 78105, "loss": 0.1282, "lr": 2.7359562441886867e-07, "epoch": 4.323666858715831, "percentage": 86.47, "elapsed_time": "2:58:12", "remaining_time": "0:27:52", "throughput": 19883.24, "total_tokens": 212605696}
|
|
{"current_steps": 67545, "total_steps": 78105, "loss": 0.0927, "lr": 2.7334157008142323e-07, "epoch": 4.323986940656808, "percentage": 86.48, "elapsed_time": "2:58:13", "remaining_time": "0:27:51", "throughput": 19883.44, "total_tokens": 212621248}
|
|
{"current_steps": 67550, "total_steps": 78105, "loss": 0.1703, "lr": 2.730876269317015e-07, "epoch": 4.324307022597785, "percentage": 86.49, "elapsed_time": "2:58:14", "remaining_time": "0:27:51", "throughput": 19883.73, "total_tokens": 212638784}
|
|
{"current_steps": 67555, "total_steps": 78105, "loss": 0.164, "lr": 2.728337949823842e-07, "epoch": 4.324627104538762, "percentage": 86.49, "elapsed_time": "2:58:14", "remaining_time": "0:27:50", "throughput": 19883.91, "total_tokens": 212653888}
|
|
{"current_steps": 67560, "total_steps": 78105, "loss": 0.0987, "lr": 2.7258007424614595e-07, "epoch": 4.324947186479739, "percentage": 86.5, "elapsed_time": "2:58:15", "remaining_time": "0:27:49", "throughput": 19884.12, "total_tokens": 212669504}
|
|
{"current_steps": 67565, "total_steps": 78105, "loss": 0.118, "lr": 2.723264647356569e-07, "epoch": 4.325267268420716, "percentage": 86.51, "elapsed_time": "2:58:16", "remaining_time": "0:27:48", "throughput": 19884.32, "total_tokens": 212684864}
|
|
{"current_steps": 67570, "total_steps": 78105, "loss": 0.117, "lr": 2.7207296646358017e-07, "epoch": 4.325587350361692, "percentage": 86.51, "elapsed_time": "2:58:16", "remaining_time": "0:27:47", "throughput": 19884.56, "total_tokens": 212701632}
|
|
{"current_steps": 67575, "total_steps": 78105, "loss": 0.1121, "lr": 2.7181957944257515e-07, "epoch": 4.325907432302669, "percentage": 86.52, "elapsed_time": "2:58:17", "remaining_time": "0:27:46", "throughput": 19884.83, "total_tokens": 212718784}
|
|
{"current_steps": 67580, "total_steps": 78105, "loss": 0.1139, "lr": 2.715663036852939e-07, "epoch": 4.326227514243646, "percentage": 86.52, "elapsed_time": "2:58:18", "remaining_time": "0:27:46", "throughput": 19885.06, "total_tokens": 212735296}
|
|
{"current_steps": 67585, "total_steps": 78105, "loss": 0.1374, "lr": 2.7131313920438414e-07, "epoch": 4.326547596184623, "percentage": 86.53, "elapsed_time": "2:58:18", "remaining_time": "0:27:45", "throughput": 19885.26, "total_tokens": 212750848}
|
|
{"current_steps": 67590, "total_steps": 78105, "loss": 0.1773, "lr": 2.7106008601248737e-07, "epoch": 4.3268676781256, "percentage": 86.54, "elapsed_time": "2:58:19", "remaining_time": "0:27:44", "throughput": 19885.46, "total_tokens": 212766400}
|
|
{"current_steps": 67595, "total_steps": 78105, "loss": 0.1624, "lr": 2.7080714412223987e-07, "epoch": 4.327187760066577, "percentage": 86.54, "elapsed_time": "2:58:20", "remaining_time": "0:27:43", "throughput": 19885.66, "total_tokens": 212781504}
|
|
{"current_steps": 67600, "total_steps": 78105, "loss": 0.1569, "lr": 2.705543135462721e-07, "epoch": 4.327507842007554, "percentage": 86.55, "elapsed_time": "2:58:20", "remaining_time": "0:27:42", "throughput": 19885.93, "total_tokens": 212798784}
|
|
{"current_steps": 67605, "total_steps": 78105, "loss": 0.1138, "lr": 2.703015942972087e-07, "epoch": 4.327827923948531, "percentage": 86.56, "elapsed_time": "2:58:21", "remaining_time": "0:27:42", "throughput": 19886.22, "total_tokens": 212816384}
|
|
{"current_steps": 67610, "total_steps": 78105, "loss": 0.1259, "lr": 2.700489863876704e-07, "epoch": 4.328148005889508, "percentage": 86.56, "elapsed_time": "2:58:22", "remaining_time": "0:27:41", "throughput": 19886.42, "total_tokens": 212831808}
|
|
{"current_steps": 67615, "total_steps": 78105, "loss": 0.1065, "lr": 2.697964898302696e-07, "epoch": 4.328468087830484, "percentage": 86.57, "elapsed_time": "2:58:23", "remaining_time": "0:27:40", "throughput": 19886.62, "total_tokens": 212847360}
|
|
{"current_steps": 67620, "total_steps": 78105, "loss": 0.1117, "lr": 2.695441046376157e-07, "epoch": 4.328788169771461, "percentage": 86.58, "elapsed_time": "2:58:23", "remaining_time": "0:27:39", "throughput": 19886.82, "total_tokens": 212862976}
|
|
{"current_steps": 67625, "total_steps": 78105, "loss": 0.1428, "lr": 2.6929183082231136e-07, "epoch": 4.329108251712438, "percentage": 86.58, "elapsed_time": "2:58:24", "remaining_time": "0:27:38", "throughput": 19886.98, "total_tokens": 212877504}
|
|
{"current_steps": 67630, "total_steps": 78105, "loss": 0.1325, "lr": 2.6903966839695346e-07, "epoch": 4.329428333653415, "percentage": 86.59, "elapsed_time": "2:58:25", "remaining_time": "0:27:38", "throughput": 19887.16, "total_tokens": 212892544}
|
|
{"current_steps": 67635, "total_steps": 78105, "loss": 0.119, "lr": 2.6878761737413413e-07, "epoch": 4.329748415594392, "percentage": 86.59, "elapsed_time": "2:58:25", "remaining_time": "0:27:37", "throughput": 19887.35, "total_tokens": 212907968}
|
|
{"current_steps": 67640, "total_steps": 78105, "loss": 0.1283, "lr": 2.685356777664386e-07, "epoch": 4.330068497535369, "percentage": 86.6, "elapsed_time": "2:58:26", "remaining_time": "0:27:36", "throughput": 19887.56, "total_tokens": 212923840}
|
|
{"current_steps": 67645, "total_steps": 78105, "loss": 0.1442, "lr": 2.68283849586449e-07, "epoch": 4.330388579476346, "percentage": 86.61, "elapsed_time": "2:58:27", "remaining_time": "0:27:35", "throughput": 19887.76, "total_tokens": 212939200}
|
|
{"current_steps": 67650, "total_steps": 78105, "loss": 0.1297, "lr": 2.6803213284673834e-07, "epoch": 4.330708661417323, "percentage": 86.61, "elapsed_time": "2:58:27", "remaining_time": "0:27:34", "throughput": 19887.97, "total_tokens": 212955008}
|
|
{"current_steps": 67655, "total_steps": 78105, "loss": 0.127, "lr": 2.6778052755987743e-07, "epoch": 4.3310287433583, "percentage": 86.62, "elapsed_time": "2:58:28", "remaining_time": "0:27:34", "throughput": 19888.17, "total_tokens": 212970752}
|
|
{"current_steps": 67660, "total_steps": 78105, "loss": 0.1288, "lr": 2.675290337384301e-07, "epoch": 4.331348825299276, "percentage": 86.63, "elapsed_time": "2:58:29", "remaining_time": "0:27:33", "throughput": 19888.35, "total_tokens": 212986048}
|
|
{"current_steps": 67665, "total_steps": 78105, "loss": 0.1173, "lr": 2.6727765139495404e-07, "epoch": 4.331668907240253, "percentage": 86.63, "elapsed_time": "2:58:29", "remaining_time": "0:27:32", "throughput": 19888.55, "total_tokens": 213001728}
|
|
{"current_steps": 67670, "total_steps": 78105, "loss": 0.1382, "lr": 2.670263805420026e-07, "epoch": 4.33198898918123, "percentage": 86.64, "elapsed_time": "2:58:30", "remaining_time": "0:27:31", "throughput": 19888.81, "total_tokens": 213018624}
|
|
{"current_steps": 67675, "total_steps": 78105, "loss": 0.1322, "lr": 2.667752211921221e-07, "epoch": 4.332309071122207, "percentage": 86.65, "elapsed_time": "2:58:31", "remaining_time": "0:27:30", "throughput": 19889.01, "total_tokens": 213034112}
|
|
{"current_steps": 67680, "total_steps": 78105, "loss": 0.1283, "lr": 2.66524173357855e-07, "epoch": 4.332629153063184, "percentage": 86.65, "elapsed_time": "2:58:31", "remaining_time": "0:27:29", "throughput": 19889.19, "total_tokens": 213049024}
|
|
{"current_steps": 67685, "total_steps": 78105, "loss": 0.1605, "lr": 2.662732370517368e-07, "epoch": 4.332949235004161, "percentage": 86.66, "elapsed_time": "2:58:32", "remaining_time": "0:27:29", "throughput": 19889.41, "total_tokens": 213064640}
|
|
{"current_steps": 67690, "total_steps": 78105, "loss": 0.1374, "lr": 2.6602241228629757e-07, "epoch": 4.333269316945138, "percentage": 86.67, "elapsed_time": "2:58:33", "remaining_time": "0:27:28", "throughput": 19889.57, "total_tokens": 213079040}
|
|
{"current_steps": 67695, "total_steps": 78105, "loss": 0.1308, "lr": 2.65771699074063e-07, "epoch": 4.333589398886115, "percentage": 86.67, "elapsed_time": "2:58:33", "remaining_time": "0:27:27", "throughput": 19889.76, "total_tokens": 213094336}
|
|
{"current_steps": 67700, "total_steps": 78105, "loss": 0.1318, "lr": 2.655210974275524e-07, "epoch": 4.333909480827092, "percentage": 86.68, "elapsed_time": "2:58:34", "remaining_time": "0:27:26", "throughput": 19889.98, "total_tokens": 213110592}
|
|
{"current_steps": 67705, "total_steps": 78105, "loss": 0.1614, "lr": 2.652706073592792e-07, "epoch": 4.334229562768068, "percentage": 86.68, "elapsed_time": "2:58:35", "remaining_time": "0:27:25", "throughput": 19890.19, "total_tokens": 213126336}
|
|
{"current_steps": 67710, "total_steps": 78105, "loss": 0.1108, "lr": 2.650202288817516e-07, "epoch": 4.334549644709045, "percentage": 86.69, "elapsed_time": "2:58:35", "remaining_time": "0:27:25", "throughput": 19890.39, "total_tokens": 213141760}
|
|
{"current_steps": 67715, "total_steps": 78105, "loss": 0.1333, "lr": 2.6476996200747144e-07, "epoch": 4.334869726650022, "percentage": 86.7, "elapsed_time": "2:58:36", "remaining_time": "0:27:24", "throughput": 19890.57, "total_tokens": 213156736}
|
|
{"current_steps": 67720, "total_steps": 78105, "loss": 0.1304, "lr": 2.6451980674893766e-07, "epoch": 4.335189808590999, "percentage": 86.7, "elapsed_time": "2:58:37", "remaining_time": "0:27:23", "throughput": 19890.75, "total_tokens": 213171712}
|
|
{"current_steps": 67725, "total_steps": 78105, "loss": 0.1565, "lr": 2.6426976311863947e-07, "epoch": 4.335509890531976, "percentage": 86.71, "elapsed_time": "2:58:37", "remaining_time": "0:27:22", "throughput": 19890.95, "total_tokens": 213187200}
|
|
{"current_steps": 67730, "total_steps": 78105, "loss": 0.1332, "lr": 2.640198311290648e-07, "epoch": 4.335829972472953, "percentage": 86.72, "elapsed_time": "2:58:38", "remaining_time": "0:27:21", "throughput": 19891.14, "total_tokens": 213202112}
|
|
{"current_steps": 67735, "total_steps": 78105, "loss": 0.1567, "lr": 2.6377001079269215e-07, "epoch": 4.33615005441393, "percentage": 86.72, "elapsed_time": "2:58:39", "remaining_time": "0:27:21", "throughput": 19891.35, "total_tokens": 213217856}
|
|
{"current_steps": 67740, "total_steps": 78105, "loss": 0.1348, "lr": 2.635203021219976e-07, "epoch": 4.336470136354907, "percentage": 86.73, "elapsed_time": "2:58:39", "remaining_time": "0:27:20", "throughput": 19891.51, "total_tokens": 213232256}
|
|
{"current_steps": 67745, "total_steps": 78105, "loss": 0.1511, "lr": 2.6327070512944947e-07, "epoch": 4.336790218295883, "percentage": 86.74, "elapsed_time": "2:58:40", "remaining_time": "0:27:19", "throughput": 19891.7, "total_tokens": 213247488}
|
|
{"current_steps": 67750, "total_steps": 78105, "loss": 0.1696, "lr": 2.6302121982751144e-07, "epoch": 4.33711030023686, "percentage": 86.74, "elapsed_time": "2:58:41", "remaining_time": "0:27:18", "throughput": 19891.87, "total_tokens": 213262272}
|
|
{"current_steps": 67755, "total_steps": 78105, "loss": 0.1443, "lr": 2.6277184622864245e-07, "epoch": 4.337430382177837, "percentage": 86.75, "elapsed_time": "2:58:41", "remaining_time": "0:27:17", "throughput": 19892.11, "total_tokens": 213278528}
|
|
{"current_steps": 67760, "total_steps": 78105, "loss": 0.1079, "lr": 2.625225843452933e-07, "epoch": 4.337750464118814, "percentage": 86.76, "elapsed_time": "2:58:42", "remaining_time": "0:27:17", "throughput": 19892.31, "total_tokens": 213294208}
|
|
{"current_steps": 67765, "total_steps": 78105, "loss": 0.1091, "lr": 2.6227343418991246e-07, "epoch": 4.338070546059791, "percentage": 86.76, "elapsed_time": "2:58:43", "remaining_time": "0:27:16", "throughput": 19892.5, "total_tokens": 213309440}
|
|
{"current_steps": 67770, "total_steps": 78105, "loss": 0.1213, "lr": 2.6202439577493966e-07, "epoch": 4.338390628000768, "percentage": 86.77, "elapsed_time": "2:58:43", "remaining_time": "0:27:15", "throughput": 19892.77, "total_tokens": 213326464}
|
|
{"current_steps": 67775, "total_steps": 78105, "loss": 0.1333, "lr": 2.6177546911281185e-07, "epoch": 4.338710709941745, "percentage": 86.77, "elapsed_time": "2:58:44", "remaining_time": "0:27:14", "throughput": 19892.99, "total_tokens": 213342336}
|
|
{"current_steps": 67780, "total_steps": 78105, "loss": 0.2306, "lr": 2.6152665421595865e-07, "epoch": 4.339030791882722, "percentage": 86.78, "elapsed_time": "2:58:45", "remaining_time": "0:27:13", "throughput": 19893.19, "total_tokens": 213358080}
|
|
{"current_steps": 67785, "total_steps": 78105, "loss": 0.128, "lr": 2.6127795109680443e-07, "epoch": 4.339350873823699, "percentage": 86.79, "elapsed_time": "2:58:45", "remaining_time": "0:27:12", "throughput": 19893.35, "total_tokens": 213372672}
|
|
{"current_steps": 67790, "total_steps": 78105, "loss": 0.1503, "lr": 2.6102935976776825e-07, "epoch": 4.339670955764676, "percentage": 86.79, "elapsed_time": "2:58:46", "remaining_time": "0:27:12", "throughput": 19893.56, "total_tokens": 213388160}
|
|
{"current_steps": 67795, "total_steps": 78105, "loss": 0.0951, "lr": 2.607808802412634e-07, "epoch": 4.339991037705652, "percentage": 86.8, "elapsed_time": "2:58:47", "remaining_time": "0:27:11", "throughput": 19893.74, "total_tokens": 213403456}
|
|
{"current_steps": 67800, "total_steps": 78105, "loss": 0.1334, "lr": 2.605325125296976e-07, "epoch": 4.340311119646629, "percentage": 86.81, "elapsed_time": "2:58:47", "remaining_time": "0:27:10", "throughput": 19894.01, "total_tokens": 213420608}
|
|
{"current_steps": 67805, "total_steps": 78105, "loss": 0.1515, "lr": 2.602842566454733e-07, "epoch": 4.340631201587606, "percentage": 86.81, "elapsed_time": "2:58:48", "remaining_time": "0:27:09", "throughput": 19894.2, "total_tokens": 213436096}
|
|
{"current_steps": 67810, "total_steps": 78105, "loss": 0.0939, "lr": 2.6003611260098705e-07, "epoch": 4.340951283528583, "percentage": 86.82, "elapsed_time": "2:58:49", "remaining_time": "0:27:08", "throughput": 19894.41, "total_tokens": 213452096}
|
|
{"current_steps": 67815, "total_steps": 78105, "loss": 0.1105, "lr": 2.5978808040862976e-07, "epoch": 4.34127136546956, "percentage": 86.83, "elapsed_time": "2:58:49", "remaining_time": "0:27:08", "throughput": 19894.6, "total_tokens": 213467200}
|
|
{"current_steps": 67820, "total_steps": 78105, "loss": 0.1204, "lr": 2.5954016008078704e-07, "epoch": 4.341591447410537, "percentage": 86.83, "elapsed_time": "2:58:50", "remaining_time": "0:27:07", "throughput": 19894.8, "total_tokens": 213482816}
|
|
{"current_steps": 67825, "total_steps": 78105, "loss": 0.1339, "lr": 2.5929235162983845e-07, "epoch": 4.341911529351514, "percentage": 86.84, "elapsed_time": "2:58:51", "remaining_time": "0:27:06", "throughput": 19894.99, "total_tokens": 213497664}
|
|
{"current_steps": 67830, "total_steps": 78105, "loss": 0.1269, "lr": 2.590446550681586e-07, "epoch": 4.342231611292491, "percentage": 86.84, "elapsed_time": "2:58:51", "remaining_time": "0:27:05", "throughput": 19895.18, "total_tokens": 213513024}
|
|
{"current_steps": 67835, "total_steps": 78105, "loss": 0.1238, "lr": 2.5879707040811527e-07, "epoch": 4.342551693233467, "percentage": 86.85, "elapsed_time": "2:58:52", "remaining_time": "0:27:04", "throughput": 19895.35, "total_tokens": 213527872}
|
|
{"current_steps": 67840, "total_steps": 78105, "loss": 0.1203, "lr": 2.585495976620733e-07, "epoch": 4.342871775174444, "percentage": 86.86, "elapsed_time": "2:58:53", "remaining_time": "0:27:04", "throughput": 19895.56, "total_tokens": 213543424}
|
|
{"current_steps": 67845, "total_steps": 78105, "loss": 0.1121, "lr": 2.583022368423882e-07, "epoch": 4.343191857115421, "percentage": 86.86, "elapsed_time": "2:58:53", "remaining_time": "0:27:03", "throughput": 19895.74, "total_tokens": 213558144}
|
|
{"current_steps": 67850, "total_steps": 78105, "loss": 0.1569, "lr": 2.5805498796141304e-07, "epoch": 4.343511939056398, "percentage": 86.87, "elapsed_time": "2:58:54", "remaining_time": "0:27:02", "throughput": 19895.92, "total_tokens": 213573184}
|
|
{"current_steps": 67855, "total_steps": 78105, "loss": 0.1484, "lr": 2.578078510314941e-07, "epoch": 4.343832020997375, "percentage": 86.88, "elapsed_time": "2:58:55", "remaining_time": "0:27:01", "throughput": 19896.08, "total_tokens": 213587712}
|
|
{"current_steps": 67860, "total_steps": 78105, "loss": 0.123, "lr": 2.57560826064972e-07, "epoch": 4.344152102938352, "percentage": 86.88, "elapsed_time": "2:58:55", "remaining_time": "0:27:00", "throughput": 19896.29, "total_tokens": 213603456}
|
|
{"current_steps": 67865, "total_steps": 78105, "loss": 0.1645, "lr": 2.573139130741817e-07, "epoch": 4.344472184879329, "percentage": 86.89, "elapsed_time": "2:58:56", "remaining_time": "0:27:00", "throughput": 19896.5, "total_tokens": 213619264}
|
|
{"current_steps": 67870, "total_steps": 78105, "loss": 0.1573, "lr": 2.570671120714524e-07, "epoch": 4.344792266820306, "percentage": 86.9, "elapsed_time": "2:58:57", "remaining_time": "0:26:59", "throughput": 19896.72, "total_tokens": 213635392}
|
|
{"current_steps": 67875, "total_steps": 78105, "loss": 0.1601, "lr": 2.5682042306910957e-07, "epoch": 4.345112348761283, "percentage": 86.9, "elapsed_time": "2:58:57", "remaining_time": "0:26:58", "throughput": 19896.98, "total_tokens": 213652224}
|
|
{"current_steps": 67880, "total_steps": 78105, "loss": 0.1461, "lr": 2.565738460794695e-07, "epoch": 4.345432430702259, "percentage": 86.91, "elapsed_time": "2:58:58", "remaining_time": "0:26:57", "throughput": 19897.17, "total_tokens": 213667712}
|
|
{"current_steps": 67885, "total_steps": 78105, "loss": 0.1313, "lr": 2.5632738111484644e-07, "epoch": 4.345752512643236, "percentage": 86.92, "elapsed_time": "2:58:59", "remaining_time": "0:26:56", "throughput": 19897.32, "total_tokens": 213681984}
|
|
{"current_steps": 67890, "total_steps": 78105, "loss": 0.1267, "lr": 2.5608102818754725e-07, "epoch": 4.346072594584213, "percentage": 86.92, "elapsed_time": "2:58:59", "remaining_time": "0:26:55", "throughput": 19897.51, "total_tokens": 213696768}
|
|
{"current_steps": 67895, "total_steps": 78105, "loss": 0.1883, "lr": 2.558347873098732e-07, "epoch": 4.34639267652519, "percentage": 86.93, "elapsed_time": "2:59:00", "remaining_time": "0:26:55", "throughput": 19897.7, "total_tokens": 213712256}
|
|
{"current_steps": 67900, "total_steps": 78105, "loss": 0.1275, "lr": 2.555886584941203e-07, "epoch": 4.346712758466167, "percentage": 86.93, "elapsed_time": "2:59:01", "remaining_time": "0:26:54", "throughput": 19897.89, "total_tokens": 213727552}
|
|
{"current_steps": 67905, "total_steps": 78105, "loss": 0.1666, "lr": 2.553426417525792e-07, "epoch": 4.347032840407144, "percentage": 86.94, "elapsed_time": "2:59:01", "remaining_time": "0:26:53", "throughput": 19898.11, "total_tokens": 213743488}
|
|
{"current_steps": 67910, "total_steps": 78105, "loss": 0.1325, "lr": 2.5509673709753465e-07, "epoch": 4.347352922348121, "percentage": 86.95, "elapsed_time": "2:59:02", "remaining_time": "0:26:52", "throughput": 19898.28, "total_tokens": 213758592}
|
|
{"current_steps": 67915, "total_steps": 78105, "loss": 0.125, "lr": 2.548509445412659e-07, "epoch": 4.347673004289098, "percentage": 86.95, "elapsed_time": "2:59:03", "remaining_time": "0:26:51", "throughput": 19898.46, "total_tokens": 213773824}
|
|
{"current_steps": 67920, "total_steps": 78105, "loss": 0.1177, "lr": 2.5460526409604586e-07, "epoch": 4.347993086230075, "percentage": 86.96, "elapsed_time": "2:59:03", "remaining_time": "0:26:51", "throughput": 19898.64, "total_tokens": 213788416}
|
|
{"current_steps": 67925, "total_steps": 78105, "loss": 0.1054, "lr": 2.5435969577414345e-07, "epoch": 4.348313168171051, "percentage": 86.97, "elapsed_time": "2:59:04", "remaining_time": "0:26:50", "throughput": 19898.8, "total_tokens": 213803136}
|
|
{"current_steps": 67930, "total_steps": 78105, "loss": 0.1535, "lr": 2.541142395878207e-07, "epoch": 4.348633250112028, "percentage": 86.97, "elapsed_time": "2:59:05", "remaining_time": "0:26:49", "throughput": 19899.05, "total_tokens": 213819648}
|
|
{"current_steps": 67935, "total_steps": 78105, "loss": 0.1334, "lr": 2.538688955493346e-07, "epoch": 4.348953332053005, "percentage": 86.98, "elapsed_time": "2:59:05", "remaining_time": "0:26:48", "throughput": 19899.27, "total_tokens": 213835584}
|
|
{"current_steps": 67940, "total_steps": 78105, "loss": 0.1374, "lr": 2.5362366367093603e-07, "epoch": 4.349273413993982, "percentage": 86.99, "elapsed_time": "2:59:06", "remaining_time": "0:26:47", "throughput": 19899.5, "total_tokens": 213851968}
|
|
{"current_steps": 67945, "total_steps": 78105, "loss": 0.1607, "lr": 2.5337854396487034e-07, "epoch": 4.349593495934959, "percentage": 86.99, "elapsed_time": "2:59:07", "remaining_time": "0:26:47", "throughput": 19899.72, "total_tokens": 213868480}
|
|
{"current_steps": 67950, "total_steps": 78105, "loss": 0.1363, "lr": 2.5313353644337903e-07, "epoch": 4.349913577875936, "percentage": 87.0, "elapsed_time": "2:59:08", "remaining_time": "0:26:46", "throughput": 19899.97, "total_tokens": 213885120}
|
|
{"current_steps": 67955, "total_steps": 78105, "loss": 0.1133, "lr": 2.528886411186943e-07, "epoch": 4.350233659816913, "percentage": 87.0, "elapsed_time": "2:59:08", "remaining_time": "0:26:45", "throughput": 19900.21, "total_tokens": 213902080}
|
|
{"current_steps": 67960, "total_steps": 78105, "loss": 0.1242, "lr": 2.5264385800304687e-07, "epoch": 4.35055374175789, "percentage": 87.01, "elapsed_time": "2:59:09", "remaining_time": "0:26:44", "throughput": 19900.48, "total_tokens": 213919232}
|
|
{"current_steps": 67965, "total_steps": 78105, "loss": 0.1588, "lr": 2.523991871086584e-07, "epoch": 4.350873823698867, "percentage": 87.02, "elapsed_time": "2:59:10", "remaining_time": "0:26:43", "throughput": 19900.69, "total_tokens": 213935360}
|
|
{"current_steps": 67970, "total_steps": 78105, "loss": 0.1353, "lr": 2.5215462844774744e-07, "epoch": 4.351193905639843, "percentage": 87.02, "elapsed_time": "2:59:10", "remaining_time": "0:26:43", "throughput": 19900.89, "total_tokens": 213950720}
|
|
{"current_steps": 67975, "total_steps": 78105, "loss": 0.1115, "lr": 2.519101820325259e-07, "epoch": 4.35151398758082, "percentage": 87.03, "elapsed_time": "2:59:11", "remaining_time": "0:26:42", "throughput": 19901.12, "total_tokens": 213967040}
|
|
{"current_steps": 67980, "total_steps": 78105, "loss": 0.143, "lr": 2.516658478751996e-07, "epoch": 4.351834069521797, "percentage": 87.04, "elapsed_time": "2:59:12", "remaining_time": "0:26:41", "throughput": 19901.39, "total_tokens": 213984384}
|
|
{"current_steps": 67985, "total_steps": 78105, "loss": 0.1467, "lr": 2.5142162598797063e-07, "epoch": 4.352154151462774, "percentage": 87.04, "elapsed_time": "2:59:12", "remaining_time": "0:26:40", "throughput": 19901.62, "total_tokens": 214000704}
|
|
{"current_steps": 67990, "total_steps": 78105, "loss": 0.1568, "lr": 2.511775163830321e-07, "epoch": 4.352474233403751, "percentage": 87.05, "elapsed_time": "2:59:13", "remaining_time": "0:26:39", "throughput": 19901.94, "total_tokens": 214019712}
|
|
{"current_steps": 67995, "total_steps": 78105, "loss": 0.0906, "lr": 2.509335190725759e-07, "epoch": 4.352794315344728, "percentage": 87.06, "elapsed_time": "2:59:14", "remaining_time": "0:26:39", "throughput": 19902.15, "total_tokens": 214035328}
|
|
{"current_steps": 68000, "total_steps": 78105, "loss": 0.1677, "lr": 2.506896340687839e-07, "epoch": 4.353114397285705, "percentage": 87.06, "elapsed_time": "2:59:15", "remaining_time": "0:26:38", "throughput": 19902.34, "total_tokens": 214050688}
|
|
{"current_steps": 68005, "total_steps": 78105, "loss": 0.1416, "lr": 2.504458613838356e-07, "epoch": 4.353434479226682, "percentage": 87.07, "elapsed_time": "2:59:15", "remaining_time": "0:26:37", "throughput": 19902.5, "total_tokens": 214065280}
|
|
{"current_steps": 68010, "total_steps": 78105, "loss": 0.152, "lr": 2.502022010299035e-07, "epoch": 4.3537545611676585, "percentage": 87.08, "elapsed_time": "2:59:16", "remaining_time": "0:26:36", "throughput": 19902.72, "total_tokens": 214081408}
|
|
{"current_steps": 68015, "total_steps": 78105, "loss": 0.2511, "lr": 2.4995865301915473e-07, "epoch": 4.3540746431086355, "percentage": 87.08, "elapsed_time": "2:59:17", "remaining_time": "0:26:35", "throughput": 19902.93, "total_tokens": 214097152}
|
|
{"current_steps": 68020, "total_steps": 78105, "loss": 0.1267, "lr": 2.49715217363751e-07, "epoch": 4.3543947250496124, "percentage": 87.09, "elapsed_time": "2:59:17", "remaining_time": "0:26:34", "throughput": 19903.14, "total_tokens": 214112832}
|
|
{"current_steps": 68025, "total_steps": 78105, "loss": 0.1704, "lr": 2.4947189407584784e-07, "epoch": 4.354714806990589, "percentage": 87.09, "elapsed_time": "2:59:18", "remaining_time": "0:26:34", "throughput": 19903.35, "total_tokens": 214128512}
|
|
{"current_steps": 68030, "total_steps": 78105, "loss": 0.1271, "lr": 2.4922868316759527e-07, "epoch": 4.355034888931566, "percentage": 87.1, "elapsed_time": "2:59:19", "remaining_time": "0:26:33", "throughput": 19903.56, "total_tokens": 214144512}
|
|
{"current_steps": 68035, "total_steps": 78105, "loss": 0.1152, "lr": 2.4898558465113935e-07, "epoch": 4.355354970872543, "percentage": 87.11, "elapsed_time": "2:59:19", "remaining_time": "0:26:32", "throughput": 19903.77, "total_tokens": 214160000}
|
|
{"current_steps": 68040, "total_steps": 78105, "loss": 0.1667, "lr": 2.487425985386172e-07, "epoch": 4.35567505281352, "percentage": 87.11, "elapsed_time": "2:59:20", "remaining_time": "0:26:31", "throughput": 19904.02, "total_tokens": 214176768}
|
|
{"current_steps": 68045, "total_steps": 78105, "loss": 0.1652, "lr": 2.4849972484216387e-07, "epoch": 4.355995134754497, "percentage": 87.12, "elapsed_time": "2:59:21", "remaining_time": "0:26:30", "throughput": 19904.19, "total_tokens": 214191552}
|
|
{"current_steps": 68050, "total_steps": 78105, "loss": 0.1493, "lr": 2.4825696357390656e-07, "epoch": 4.356315216695474, "percentage": 87.13, "elapsed_time": "2:59:21", "remaining_time": "0:26:30", "throughput": 19904.37, "total_tokens": 214206656}
|
|
{"current_steps": 68055, "total_steps": 78105, "loss": 0.1189, "lr": 2.4801431474596766e-07, "epoch": 4.356635298636451, "percentage": 87.13, "elapsed_time": "2:59:22", "remaining_time": "0:26:29", "throughput": 19904.56, "total_tokens": 214221952}
|
|
{"current_steps": 68060, "total_steps": 78105, "loss": 0.1283, "lr": 2.477717783704636e-07, "epoch": 4.3569553805774275, "percentage": 87.14, "elapsed_time": "2:59:23", "remaining_time": "0:26:28", "throughput": 19904.76, "total_tokens": 214237376}
|
|
{"current_steps": 68065, "total_steps": 78105, "loss": 0.1263, "lr": 2.4752935445950535e-07, "epoch": 4.3572754625184045, "percentage": 87.15, "elapsed_time": "2:59:23", "remaining_time": "0:26:27", "throughput": 19904.99, "total_tokens": 214253632}
|
|
{"current_steps": 68070, "total_steps": 78105, "loss": 0.1959, "lr": 2.472870430251992e-07, "epoch": 4.3575955444593815, "percentage": 87.15, "elapsed_time": "2:59:24", "remaining_time": "0:26:26", "throughput": 19905.2, "total_tokens": 214269376}
|
|
{"current_steps": 68075, "total_steps": 78105, "loss": 0.1753, "lr": 2.4704484407964303e-07, "epoch": 4.3579156264003585, "percentage": 87.16, "elapsed_time": "2:59:25", "remaining_time": "0:26:26", "throughput": 19905.39, "total_tokens": 214284864}
|
|
{"current_steps": 68080, "total_steps": 78105, "loss": 0.1114, "lr": 2.4680275763493273e-07, "epoch": 4.3582357083413354, "percentage": 87.16, "elapsed_time": "2:59:25", "remaining_time": "0:26:25", "throughput": 19905.63, "total_tokens": 214300992}
|
|
{"current_steps": 68085, "total_steps": 78105, "loss": 0.1027, "lr": 2.465607837031564e-07, "epoch": 4.358555790282312, "percentage": 87.17, "elapsed_time": "2:59:26", "remaining_time": "0:26:24", "throughput": 19905.82, "total_tokens": 214316352}
|
|
{"current_steps": 68090, "total_steps": 78105, "loss": 0.1452, "lr": 2.463189222963966e-07, "epoch": 4.358875872223289, "percentage": 87.18, "elapsed_time": "2:59:27", "remaining_time": "0:26:23", "throughput": 19906.08, "total_tokens": 214333632}
|
|
{"current_steps": 68095, "total_steps": 78105, "loss": 0.1413, "lr": 2.4607717342673086e-07, "epoch": 4.359195954164266, "percentage": 87.18, "elapsed_time": "2:59:27", "remaining_time": "0:26:22", "throughput": 19906.27, "total_tokens": 214348480}
|
|
{"current_steps": 68100, "total_steps": 78105, "loss": 0.2, "lr": 2.458355371062304e-07, "epoch": 4.3595160361052425, "percentage": 87.19, "elapsed_time": "2:59:28", "remaining_time": "0:26:22", "throughput": 19906.48, "total_tokens": 214364288}
|
|
{"current_steps": 68105, "total_steps": 78105, "loss": 0.1063, "lr": 2.455940133469628e-07, "epoch": 4.3598361180462195, "percentage": 87.2, "elapsed_time": "2:59:29", "remaining_time": "0:26:21", "throughput": 19906.67, "total_tokens": 214379648}
|
|
{"current_steps": 68110, "total_steps": 78105, "loss": 0.1157, "lr": 2.4535260216098615e-07, "epoch": 4.3601561999871965, "percentage": 87.2, "elapsed_time": "2:59:29", "remaining_time": "0:26:20", "throughput": 19906.85, "total_tokens": 214394496}
|
|
{"current_steps": 68115, "total_steps": 78105, "loss": 0.1066, "lr": 2.451113035603572e-07, "epoch": 4.3604762819281735, "percentage": 87.21, "elapsed_time": "2:59:30", "remaining_time": "0:26:19", "throughput": 19907.06, "total_tokens": 214409664}
|
|
{"current_steps": 68120, "total_steps": 78105, "loss": 0.1429, "lr": 2.4487011755712443e-07, "epoch": 4.3607963638691505, "percentage": 87.22, "elapsed_time": "2:59:31", "remaining_time": "0:26:18", "throughput": 19907.28, "total_tokens": 214425600}
|
|
{"current_steps": 68125, "total_steps": 78105, "loss": 0.1468, "lr": 2.446290441633314e-07, "epoch": 4.3611164458101275, "percentage": 87.22, "elapsed_time": "2:59:31", "remaining_time": "0:26:18", "throughput": 19907.43, "total_tokens": 214439872}
|
|
{"current_steps": 68130, "total_steps": 78105, "loss": 0.1316, "lr": 2.4438808339101615e-07, "epoch": 4.3614365277511045, "percentage": 87.23, "elapsed_time": "2:59:32", "remaining_time": "0:26:17", "throughput": 19907.61, "total_tokens": 214454912}
|
|
{"current_steps": 68135, "total_steps": 78105, "loss": 0.1236, "lr": 2.441472352522109e-07, "epoch": 4.3617566096920815, "percentage": 87.24, "elapsed_time": "2:59:33", "remaining_time": "0:26:16", "throughput": 19907.8, "total_tokens": 214470208}
|
|
{"current_steps": 68140, "total_steps": 78105, "loss": 0.1019, "lr": 2.4390649975894244e-07, "epoch": 4.3620766916330584, "percentage": 87.24, "elapsed_time": "2:59:33", "remaining_time": "0:26:15", "throughput": 19908.01, "total_tokens": 214485888}
|
|
{"current_steps": 68145, "total_steps": 78105, "loss": 0.1106, "lr": 2.4366587692323196e-07, "epoch": 4.3623967735740345, "percentage": 87.25, "elapsed_time": "2:59:34", "remaining_time": "0:26:14", "throughput": 19908.22, "total_tokens": 214501696}
|
|
{"current_steps": 68150, "total_steps": 78105, "loss": 0.1187, "lr": 2.4342536675709406e-07, "epoch": 4.3627168555150115, "percentage": 87.25, "elapsed_time": "2:59:35", "remaining_time": "0:26:13", "throughput": 19908.4, "total_tokens": 214517184}
|
|
{"current_steps": 68155, "total_steps": 78105, "loss": 0.1068, "lr": 2.431849692725399e-07, "epoch": 4.3630369374559885, "percentage": 87.26, "elapsed_time": "2:59:35", "remaining_time": "0:26:13", "throughput": 19908.62, "total_tokens": 214533120}
|
|
{"current_steps": 68160, "total_steps": 78105, "loss": 0.0825, "lr": 2.4294468448157304e-07, "epoch": 4.3633570193969655, "percentage": 87.27, "elapsed_time": "2:59:36", "remaining_time": "0:26:12", "throughput": 19908.79, "total_tokens": 214547904}
|
|
{"current_steps": 68165, "total_steps": 78105, "loss": 0.0626, "lr": 2.4270451239619186e-07, "epoch": 4.3636771013379425, "percentage": 87.27, "elapsed_time": "2:59:37", "remaining_time": "0:26:11", "throughput": 19909.02, "total_tokens": 214564160}
|
|
{"current_steps": 68170, "total_steps": 78105, "loss": 0.1123, "lr": 2.424644530283898e-07, "epoch": 4.3639971832789195, "percentage": 87.28, "elapsed_time": "2:59:37", "remaining_time": "0:26:10", "throughput": 19909.17, "total_tokens": 214578368}
|
|
{"current_steps": 68175, "total_steps": 78105, "loss": 0.1215, "lr": 2.422245063901532e-07, "epoch": 4.3643172652198965, "percentage": 87.29, "elapsed_time": "2:59:38", "remaining_time": "0:26:09", "throughput": 19909.4, "total_tokens": 214594496}
|
|
{"current_steps": 68180, "total_steps": 78105, "loss": 0.1454, "lr": 2.4198467249346547e-07, "epoch": 4.3646373471608735, "percentage": 87.29, "elapsed_time": "2:59:39", "remaining_time": "0:26:09", "throughput": 19909.63, "total_tokens": 214610880}
|
|
{"current_steps": 68185, "total_steps": 78105, "loss": 0.1073, "lr": 2.417449513503009e-07, "epoch": 4.3649574291018505, "percentage": 87.3, "elapsed_time": "2:59:39", "remaining_time": "0:26:08", "throughput": 19909.84, "total_tokens": 214626816}
|
|
{"current_steps": 68190, "total_steps": 78105, "loss": 0.1574, "lr": 2.4150534297263135e-07, "epoch": 4.365277511042827, "percentage": 87.31, "elapsed_time": "2:59:40", "remaining_time": "0:26:07", "throughput": 19910.02, "total_tokens": 214642048}
|
|
{"current_steps": 68195, "total_steps": 78105, "loss": 0.1334, "lr": 2.4126584737241996e-07, "epoch": 4.365597592983804, "percentage": 87.31, "elapsed_time": "2:59:41", "remaining_time": "0:26:06", "throughput": 19910.22, "total_tokens": 214657664}
|
|
{"current_steps": 68200, "total_steps": 78105, "loss": 0.1465, "lr": 2.410264645616273e-07, "epoch": 4.365917674924781, "percentage": 87.32, "elapsed_time": "2:59:41", "remaining_time": "0:26:05", "throughput": 19910.43, "total_tokens": 214673600}
|
|
{"current_steps": 68205, "total_steps": 78105, "loss": 0.1223, "lr": 2.407871945522064e-07, "epoch": 4.3662377568657575, "percentage": 87.32, "elapsed_time": "2:59:42", "remaining_time": "0:26:05", "throughput": 19910.6, "total_tokens": 214688576}
|
|
{"current_steps": 68210, "total_steps": 78105, "loss": 0.1337, "lr": 2.4054803735610535e-07, "epoch": 4.3665578388067345, "percentage": 87.33, "elapsed_time": "2:59:43", "remaining_time": "0:26:04", "throughput": 19910.78, "total_tokens": 214703232}
|
|
{"current_steps": 68215, "total_steps": 78105, "loss": 0.1444, "lr": 2.403089929852659e-07, "epoch": 4.3668779207477115, "percentage": 87.34, "elapsed_time": "2:59:43", "remaining_time": "0:26:03", "throughput": 19910.97, "total_tokens": 214718528}
|
|
{"current_steps": 68220, "total_steps": 78105, "loss": 0.1182, "lr": 2.400700614516249e-07, "epoch": 4.3671980026886885, "percentage": 87.34, "elapsed_time": "2:59:44", "remaining_time": "0:26:02", "throughput": 19911.19, "total_tokens": 214734400}
|
|
{"current_steps": 68225, "total_steps": 78105, "loss": 0.1145, "lr": 2.39831242767114e-07, "epoch": 4.3675180846296655, "percentage": 87.35, "elapsed_time": "2:59:45", "remaining_time": "0:26:01", "throughput": 19911.42, "total_tokens": 214750720}
|
|
{"current_steps": 68230, "total_steps": 78105, "loss": 0.1521, "lr": 2.3959253694365773e-07, "epoch": 4.3678381665706425, "percentage": 87.36, "elapsed_time": "2:59:45", "remaining_time": "0:26:01", "throughput": 19911.67, "total_tokens": 214767168}
|
|
{"current_steps": 68235, "total_steps": 78105, "loss": 0.1359, "lr": 2.3935394399317635e-07, "epoch": 4.368158248511619, "percentage": 87.36, "elapsed_time": "2:59:46", "remaining_time": "0:26:00", "throughput": 19911.87, "total_tokens": 214782592}
|
|
{"current_steps": 68240, "total_steps": 78105, "loss": 0.1161, "lr": 2.3911546392758387e-07, "epoch": 4.368478330452596, "percentage": 87.37, "elapsed_time": "2:59:47", "remaining_time": "0:25:59", "throughput": 19912.05, "total_tokens": 214797632}
|
|
{"current_steps": 68245, "total_steps": 78105, "loss": 0.1449, "lr": 2.3887709675878835e-07, "epoch": 4.368798412393573, "percentage": 87.38, "elapsed_time": "2:59:48", "remaining_time": "0:25:58", "throughput": 19912.28, "total_tokens": 214813888}
|
|
{"current_steps": 68250, "total_steps": 78105, "loss": 0.1307, "lr": 2.38638842498693e-07, "epoch": 4.36911849433455, "percentage": 87.38, "elapsed_time": "2:59:48", "remaining_time": "0:25:57", "throughput": 19912.44, "total_tokens": 214828224}
|
|
{"current_steps": 68255, "total_steps": 78105, "loss": 0.1223, "lr": 2.3840070115919462e-07, "epoch": 4.369438576275527, "percentage": 87.39, "elapsed_time": "2:59:49", "remaining_time": "0:25:57", "throughput": 19912.62, "total_tokens": 214843200}
|
|
{"current_steps": 68260, "total_steps": 78105, "loss": 0.1461, "lr": 2.381626727521849e-07, "epoch": 4.3697586582165036, "percentage": 87.4, "elapsed_time": "2:59:49", "remaining_time": "0:25:56", "throughput": 19912.85, "total_tokens": 214859392}
|
|
{"current_steps": 68265, "total_steps": 78105, "loss": 0.1309, "lr": 2.3792475728955046e-07, "epoch": 4.3700787401574805, "percentage": 87.4, "elapsed_time": "2:59:50", "remaining_time": "0:25:55", "throughput": 19913.06, "total_tokens": 214875584}
|
|
{"current_steps": 68270, "total_steps": 78105, "loss": 0.1253, "lr": 2.3768695478317005e-07, "epoch": 4.3703988220984575, "percentage": 87.41, "elapsed_time": "2:59:51", "remaining_time": "0:25:54", "throughput": 19913.26, "total_tokens": 214890816}
|
|
{"current_steps": 68275, "total_steps": 78105, "loss": 0.1477, "lr": 2.374492652449198e-07, "epoch": 4.370718904039434, "percentage": 87.41, "elapsed_time": "2:59:51", "remaining_time": "0:25:53", "throughput": 19913.44, "total_tokens": 214905728}
|
|
{"current_steps": 68280, "total_steps": 78105, "loss": 0.1718, "lr": 2.3721168868666793e-07, "epoch": 4.371038985980411, "percentage": 87.42, "elapsed_time": "2:59:52", "remaining_time": "0:25:52", "throughput": 19913.65, "total_tokens": 214921792}
|
|
{"current_steps": 68285, "total_steps": 78105, "loss": 0.1627, "lr": 2.369742251202778e-07, "epoch": 4.371359067921388, "percentage": 87.43, "elapsed_time": "2:59:53", "remaining_time": "0:25:52", "throughput": 19913.81, "total_tokens": 214936256}
|
|
{"current_steps": 68290, "total_steps": 78105, "loss": 0.0926, "lr": 2.3673687455760735e-07, "epoch": 4.371679149862365, "percentage": 87.43, "elapsed_time": "2:59:54", "remaining_time": "0:25:51", "throughput": 19914.0, "total_tokens": 214951936}
|
|
{"current_steps": 68295, "total_steps": 78105, "loss": 0.125, "lr": 2.3649963701050772e-07, "epoch": 4.371999231803342, "percentage": 87.44, "elapsed_time": "2:59:54", "remaining_time": "0:25:50", "throughput": 19914.16, "total_tokens": 214966464}
|
|
{"current_steps": 68300, "total_steps": 78105, "loss": 0.1055, "lr": 2.3626251249082744e-07, "epoch": 4.372319313744319, "percentage": 87.45, "elapsed_time": "2:59:55", "remaining_time": "0:25:49", "throughput": 19914.41, "total_tokens": 214983104}
|
|
{"current_steps": 68305, "total_steps": 78105, "loss": 0.1297, "lr": 2.3602550101040454e-07, "epoch": 4.372639395685296, "percentage": 87.45, "elapsed_time": "2:59:56", "remaining_time": "0:25:48", "throughput": 19914.64, "total_tokens": 214999360}
|
|
{"current_steps": 68310, "total_steps": 78105, "loss": 0.1403, "lr": 2.3578860258107628e-07, "epoch": 4.372959477626273, "percentage": 87.46, "elapsed_time": "2:59:56", "remaining_time": "0:25:48", "throughput": 19914.91, "total_tokens": 215016640}
|
|
{"current_steps": 68315, "total_steps": 78105, "loss": 0.1308, "lr": 2.355518172146712e-07, "epoch": 4.37327955956725, "percentage": 87.47, "elapsed_time": "2:59:57", "remaining_time": "0:25:47", "throughput": 19915.09, "total_tokens": 215031808}
|
|
{"current_steps": 68320, "total_steps": 78105, "loss": 0.1332, "lr": 2.3531514492301345e-07, "epoch": 4.3735996415082266, "percentage": 87.47, "elapsed_time": "2:59:58", "remaining_time": "0:25:46", "throughput": 19915.36, "total_tokens": 215049024}
|
|
{"current_steps": 68325, "total_steps": 78105, "loss": 0.1569, "lr": 2.3507858571792081e-07, "epoch": 4.373919723449203, "percentage": 87.48, "elapsed_time": "2:59:58", "remaining_time": "0:25:45", "throughput": 19915.55, "total_tokens": 215064512}
|
|
{"current_steps": 68330, "total_steps": 78105, "loss": 0.1687, "lr": 2.3484213961120577e-07, "epoch": 4.37423980539018, "percentage": 87.48, "elapsed_time": "2:59:59", "remaining_time": "0:25:44", "throughput": 19915.75, "total_tokens": 215079936}
|
|
{"current_steps": 68335, "total_steps": 78105, "loss": 0.1534, "lr": 2.3460580661467642e-07, "epoch": 4.374559887331157, "percentage": 87.49, "elapsed_time": "3:00:00", "remaining_time": "0:25:44", "throughput": 19915.94, "total_tokens": 215095552}
|
|
{"current_steps": 68340, "total_steps": 78105, "loss": 0.1338, "lr": 2.3436958674013216e-07, "epoch": 4.374879969272134, "percentage": 87.5, "elapsed_time": "3:00:00", "remaining_time": "0:25:43", "throughput": 19916.15, "total_tokens": 215111232}
|
|
{"current_steps": 68345, "total_steps": 78105, "loss": 0.186, "lr": 2.3413347999936998e-07, "epoch": 4.375200051213111, "percentage": 87.5, "elapsed_time": "3:00:01", "remaining_time": "0:25:42", "throughput": 19916.33, "total_tokens": 215126080}
|
|
{"current_steps": 68350, "total_steps": 78105, "loss": 0.0991, "lr": 2.338974864041793e-07, "epoch": 4.375520133154088, "percentage": 87.51, "elapsed_time": "3:00:02", "remaining_time": "0:25:41", "throughput": 19916.51, "total_tokens": 215141184}
|
|
{"current_steps": 68355, "total_steps": 78105, "loss": 0.1234, "lr": 2.3366160596634436e-07, "epoch": 4.375840215095065, "percentage": 87.52, "elapsed_time": "3:00:02", "remaining_time": "0:25:40", "throughput": 19916.68, "total_tokens": 215155968}
|
|
{"current_steps": 68360, "total_steps": 78105, "loss": 0.1445, "lr": 2.33425838697644e-07, "epoch": 4.376160297036042, "percentage": 87.52, "elapsed_time": "3:00:03", "remaining_time": "0:25:40", "throughput": 19916.9, "total_tokens": 215172160}
|
|
{"current_steps": 68365, "total_steps": 78105, "loss": 0.1426, "lr": 2.331901846098511e-07, "epoch": 4.376480378977018, "percentage": 87.53, "elapsed_time": "3:00:04", "remaining_time": "0:25:39", "throughput": 19917.13, "total_tokens": 215188160}
|
|
{"current_steps": 68370, "total_steps": 78105, "loss": 0.1611, "lr": 2.3295464371473287e-07, "epoch": 4.376800460917995, "percentage": 87.54, "elapsed_time": "3:00:04", "remaining_time": "0:25:38", "throughput": 19917.36, "total_tokens": 215204544}
|
|
{"current_steps": 68375, "total_steps": 78105, "loss": 0.1618, "lr": 2.3271921602405129e-07, "epoch": 4.377120542858972, "percentage": 87.54, "elapsed_time": "3:00:05", "remaining_time": "0:25:37", "throughput": 19917.58, "total_tokens": 215220864}
|
|
{"current_steps": 68380, "total_steps": 78105, "loss": 0.1636, "lr": 2.324839015495617e-07, "epoch": 4.377440624799949, "percentage": 87.55, "elapsed_time": "3:00:06", "remaining_time": "0:25:36", "throughput": 19917.77, "total_tokens": 215236352}
|
|
{"current_steps": 68385, "total_steps": 78105, "loss": 0.1369, "lr": 2.3224870030301577e-07, "epoch": 4.377760706740926, "percentage": 87.56, "elapsed_time": "3:00:06", "remaining_time": "0:25:36", "throughput": 19918.01, "total_tokens": 215252928}
|
|
{"current_steps": 68390, "total_steps": 78105, "loss": 0.1762, "lr": 2.3201361229615694e-07, "epoch": 4.378080788681903, "percentage": 87.56, "elapsed_time": "3:00:07", "remaining_time": "0:25:35", "throughput": 19918.28, "total_tokens": 215270080}
|
|
{"current_steps": 68395, "total_steps": 78105, "loss": 0.1265, "lr": 2.3177863754072493e-07, "epoch": 4.37840087062288, "percentage": 87.57, "elapsed_time": "3:00:08", "remaining_time": "0:25:34", "throughput": 19918.57, "total_tokens": 215287808}
|
|
{"current_steps": 68400, "total_steps": 78105, "loss": 0.0845, "lr": 2.3154377604845318e-07, "epoch": 4.378720952563857, "percentage": 87.57, "elapsed_time": "3:00:09", "remaining_time": "0:25:33", "throughput": 19918.71, "total_tokens": 215301824}
|
|
{"current_steps": 68405, "total_steps": 78105, "loss": 0.1127, "lr": 2.3130902783106867e-07, "epoch": 4.379041034504834, "percentage": 87.58, "elapsed_time": "3:00:09", "remaining_time": "0:25:32", "throughput": 19918.9, "total_tokens": 215317440}
|
|
{"current_steps": 68410, "total_steps": 78105, "loss": 0.1349, "lr": 2.310743929002951e-07, "epoch": 4.37936111644581, "percentage": 87.59, "elapsed_time": "3:00:10", "remaining_time": "0:25:32", "throughput": 19919.13, "total_tokens": 215333952}
|
|
{"current_steps": 68415, "total_steps": 78105, "loss": 0.126, "lr": 2.3083987126784723e-07, "epoch": 4.379681198386787, "percentage": 87.59, "elapsed_time": "3:00:11", "remaining_time": "0:25:31", "throughput": 19919.35, "total_tokens": 215350272}
|
|
{"current_steps": 68420, "total_steps": 78105, "loss": 0.1614, "lr": 2.306054629454374e-07, "epoch": 4.380001280327764, "percentage": 87.6, "elapsed_time": "3:00:11", "remaining_time": "0:25:30", "throughput": 19919.54, "total_tokens": 215365760}
|
|
{"current_steps": 68425, "total_steps": 78105, "loss": 0.1559, "lr": 2.303711679447687e-07, "epoch": 4.380321362268741, "percentage": 87.61, "elapsed_time": "3:00:12", "remaining_time": "0:25:29", "throughput": 19919.8, "total_tokens": 215382912}
|
|
{"current_steps": 68430, "total_steps": 78105, "loss": 0.1158, "lr": 2.3013698627754267e-07, "epoch": 4.380641444209718, "percentage": 87.61, "elapsed_time": "3:00:13", "remaining_time": "0:25:28", "throughput": 19920.05, "total_tokens": 215399744}
|
|
{"current_steps": 68435, "total_steps": 78105, "loss": 0.1577, "lr": 2.299029179554521e-07, "epoch": 4.380961526150695, "percentage": 87.62, "elapsed_time": "3:00:13", "remaining_time": "0:25:28", "throughput": 19920.24, "total_tokens": 215414912}
|
|
{"current_steps": 68440, "total_steps": 78105, "loss": 0.1296, "lr": 2.296689629901852e-07, "epoch": 4.381281608091672, "percentage": 87.63, "elapsed_time": "3:00:14", "remaining_time": "0:25:27", "throughput": 19920.43, "total_tokens": 215430464}
|
|
{"current_steps": 68445, "total_steps": 78105, "loss": 0.0872, "lr": 2.2943512139342455e-07, "epoch": 4.381601690032649, "percentage": 87.63, "elapsed_time": "3:00:15", "remaining_time": "0:25:26", "throughput": 19920.65, "total_tokens": 215446336}
|
|
{"current_steps": 68450, "total_steps": 78105, "loss": 0.1934, "lr": 2.2920139317684665e-07, "epoch": 4.381921771973626, "percentage": 87.64, "elapsed_time": "3:00:15", "remaining_time": "0:25:25", "throughput": 19920.83, "total_tokens": 215461312}
|
|
{"current_steps": 68455, "total_steps": 78105, "loss": 0.1609, "lr": 2.2896777835212354e-07, "epoch": 4.382241853914602, "percentage": 87.64, "elapsed_time": "3:00:16", "remaining_time": "0:25:24", "throughput": 19921.06, "total_tokens": 215477504}
|
|
{"current_steps": 68460, "total_steps": 78105, "loss": 0.1693, "lr": 2.2873427693092003e-07, "epoch": 4.382561935855579, "percentage": 87.65, "elapsed_time": "3:00:17", "remaining_time": "0:25:23", "throughput": 19921.23, "total_tokens": 215492416}
|
|
{"current_steps": 68465, "total_steps": 78105, "loss": 0.1326, "lr": 2.285008889248963e-07, "epoch": 4.382882017796556, "percentage": 87.66, "elapsed_time": "3:00:17", "remaining_time": "0:25:23", "throughput": 19921.46, "total_tokens": 215508800}
|
|
{"current_steps": 68470, "total_steps": 78105, "loss": 0.1533, "lr": 2.2826761434570633e-07, "epoch": 4.383202099737533, "percentage": 87.66, "elapsed_time": "3:00:18", "remaining_time": "0:25:22", "throughput": 19921.65, "total_tokens": 215524160}
|
|
{"current_steps": 68475, "total_steps": 78105, "loss": 0.0927, "lr": 2.2803445320499856e-07, "epoch": 4.38352218167851, "percentage": 87.67, "elapsed_time": "3:00:19", "remaining_time": "0:25:21", "throughput": 19921.87, "total_tokens": 215540416}
|
|
{"current_steps": 68480, "total_steps": 78105, "loss": 0.1327, "lr": 2.2780140551441621e-07, "epoch": 4.383842263619487, "percentage": 87.68, "elapsed_time": "3:00:19", "remaining_time": "0:25:20", "throughput": 19922.11, "total_tokens": 215557120}
|
|
{"current_steps": 68485, "total_steps": 78105, "loss": 0.1328, "lr": 2.2756847128559606e-07, "epoch": 4.384162345560464, "percentage": 87.68, "elapsed_time": "3:00:20", "remaining_time": "0:25:19", "throughput": 19922.33, "total_tokens": 215572992}
|
|
{"current_steps": 68490, "total_steps": 78105, "loss": 0.1588, "lr": 2.2733565053016938e-07, "epoch": 4.384482427501441, "percentage": 87.69, "elapsed_time": "3:00:21", "remaining_time": "0:25:19", "throughput": 19922.51, "total_tokens": 215587712}
|
|
{"current_steps": 68495, "total_steps": 78105, "loss": 0.2038, "lr": 2.2710294325976324e-07, "epoch": 4.384802509442418, "percentage": 87.7, "elapsed_time": "3:00:21", "remaining_time": "0:25:18", "throughput": 19922.71, "total_tokens": 215603456}
|
|
{"current_steps": 68500, "total_steps": 78105, "loss": 0.1214, "lr": 2.2687034948599613e-07, "epoch": 4.385122591383394, "percentage": 87.7, "elapsed_time": "3:00:22", "remaining_time": "0:25:17", "throughput": 19922.92, "total_tokens": 215619520}
|
|
{"current_steps": 68505, "total_steps": 78105, "loss": 0.1287, "lr": 2.2663786922048402e-07, "epoch": 4.385442673324371, "percentage": 87.71, "elapsed_time": "3:00:23", "remaining_time": "0:25:16", "throughput": 19923.08, "total_tokens": 215633856}
|
|
{"current_steps": 68510, "total_steps": 78105, "loss": 0.1602, "lr": 2.2640550247483517e-07, "epoch": 4.385762755265348, "percentage": 87.72, "elapsed_time": "3:00:23", "remaining_time": "0:25:15", "throughput": 19923.28, "total_tokens": 215649280}
|
|
{"current_steps": 68515, "total_steps": 78105, "loss": 0.1408, "lr": 2.2617324926065276e-07, "epoch": 4.386082837206325, "percentage": 87.72, "elapsed_time": "3:00:24", "remaining_time": "0:25:15", "throughput": 19923.46, "total_tokens": 215664192}
|
|
{"current_steps": 68520, "total_steps": 78105, "loss": 0.1775, "lr": 2.2594110958953448e-07, "epoch": 4.386402919147302, "percentage": 87.73, "elapsed_time": "3:00:25", "remaining_time": "0:25:14", "throughput": 19923.71, "total_tokens": 215681024}
|
|
{"current_steps": 68525, "total_steps": 78105, "loss": 0.1583, "lr": 2.257090834730713e-07, "epoch": 4.386723001088279, "percentage": 87.73, "elapsed_time": "3:00:26", "remaining_time": "0:25:13", "throughput": 19923.94, "total_tokens": 215697216}
|
|
{"current_steps": 68530, "total_steps": 78105, "loss": 0.131, "lr": 2.2547717092285148e-07, "epoch": 4.387043083029256, "percentage": 87.74, "elapsed_time": "3:00:26", "remaining_time": "0:25:12", "throughput": 19924.18, "total_tokens": 215713536}
|
|
{"current_steps": 68535, "total_steps": 78105, "loss": 0.1456, "lr": 2.25245371950453e-07, "epoch": 4.387363164970233, "percentage": 87.75, "elapsed_time": "3:00:27", "remaining_time": "0:25:11", "throughput": 19924.38, "total_tokens": 215729280}
|
|
{"current_steps": 68540, "total_steps": 78105, "loss": 0.1976, "lr": 2.2501368656745236e-07, "epoch": 4.387683246911209, "percentage": 87.75, "elapsed_time": "3:00:28", "remaining_time": "0:25:11", "throughput": 19924.61, "total_tokens": 215745664}
|
|
{"current_steps": 68545, "total_steps": 78105, "loss": 0.1521, "lr": 2.2478211478541818e-07, "epoch": 4.388003328852186, "percentage": 87.76, "elapsed_time": "3:00:28", "remaining_time": "0:25:10", "throughput": 19924.78, "total_tokens": 215760192}
|
|
{"current_steps": 68550, "total_steps": 78105, "loss": 0.1721, "lr": 2.245506566159142e-07, "epoch": 4.388323410793163, "percentage": 87.77, "elapsed_time": "3:00:29", "remaining_time": "0:25:09", "throughput": 19925.03, "total_tokens": 215776896}
|
|
{"current_steps": 68555, "total_steps": 78105, "loss": 0.1071, "lr": 2.2431931207049784e-07, "epoch": 4.38864349273414, "percentage": 87.77, "elapsed_time": "3:00:30", "remaining_time": "0:25:08", "throughput": 19925.29, "total_tokens": 215793920}
|
|
{"current_steps": 68560, "total_steps": 78105, "loss": 0.1205, "lr": 2.2408808116072155e-07, "epoch": 4.388963574675117, "percentage": 87.78, "elapsed_time": "3:00:30", "remaining_time": "0:25:07", "throughput": 19925.49, "total_tokens": 215809856}
|
|
{"current_steps": 68565, "total_steps": 78105, "loss": 0.1373, "lr": 2.2385696389813165e-07, "epoch": 4.389283656616094, "percentage": 87.79, "elapsed_time": "3:00:31", "remaining_time": "0:25:07", "throughput": 19925.68, "total_tokens": 215824704}
|
|
{"current_steps": 68570, "total_steps": 78105, "loss": 0.16, "lr": 2.2362596029426865e-07, "epoch": 4.389603738557071, "percentage": 87.79, "elapsed_time": "3:00:32", "remaining_time": "0:25:06", "throughput": 19925.87, "total_tokens": 215840064}
|
|
{"current_steps": 68575, "total_steps": 78105, "loss": 0.1306, "lr": 2.2339507036066826e-07, "epoch": 4.389923820498048, "percentage": 87.8, "elapsed_time": "3:00:32", "remaining_time": "0:25:05", "throughput": 19926.06, "total_tokens": 215855296}
|
|
{"current_steps": 68580, "total_steps": 78105, "loss": 0.0857, "lr": 2.2316429410885991e-07, "epoch": 4.390243902439025, "percentage": 87.8, "elapsed_time": "3:00:33", "remaining_time": "0:25:04", "throughput": 19926.26, "total_tokens": 215870976}
|
|
{"current_steps": 68585, "total_steps": 78105, "loss": 0.117, "lr": 2.229336315503669e-07, "epoch": 4.390563984380002, "percentage": 87.81, "elapsed_time": "3:00:34", "remaining_time": "0:25:03", "throughput": 19926.47, "total_tokens": 215886848}
|
|
{"current_steps": 68590, "total_steps": 78105, "loss": 0.1023, "lr": 2.2270308269670748e-07, "epoch": 4.390884066320978, "percentage": 87.82, "elapsed_time": "3:00:34", "remaining_time": "0:25:03", "throughput": 19926.65, "total_tokens": 215902144}
|
|
{"current_steps": 68595, "total_steps": 78105, "loss": 0.1323, "lr": 2.2247264755939414e-07, "epoch": 4.391204148261955, "percentage": 87.82, "elapsed_time": "3:00:35", "remaining_time": "0:25:02", "throughput": 19926.87, "total_tokens": 215918208}
|
|
{"current_steps": 68600, "total_steps": 78105, "loss": 0.1176, "lr": 2.2224232614993373e-07, "epoch": 4.391524230202932, "percentage": 87.83, "elapsed_time": "3:00:36", "remaining_time": "0:25:01", "throughput": 19927.11, "total_tokens": 215934848}
|
|
{"current_steps": 68605, "total_steps": 78105, "loss": 0.1315, "lr": 2.220121184798271e-07, "epoch": 4.391844312143909, "percentage": 87.84, "elapsed_time": "3:00:36", "remaining_time": "0:25:00", "throughput": 19927.29, "total_tokens": 215950016}
|
|
{"current_steps": 68610, "total_steps": 78105, "loss": 0.1965, "lr": 2.2178202456056924e-07, "epoch": 4.392164394084886, "percentage": 87.84, "elapsed_time": "3:00:37", "remaining_time": "0:24:59", "throughput": 19927.46, "total_tokens": 215964864}
|
|
{"current_steps": 68615, "total_steps": 78105, "loss": 0.1214, "lr": 2.215520444036509e-07, "epoch": 4.392484476025863, "percentage": 87.85, "elapsed_time": "3:00:38", "remaining_time": "0:24:59", "throughput": 19927.64, "total_tokens": 215980224}
|
|
{"current_steps": 68620, "total_steps": 78105, "loss": 0.1851, "lr": 2.2132217802055488e-07, "epoch": 4.39280455796684, "percentage": 87.86, "elapsed_time": "3:00:38", "remaining_time": "0:24:58", "throughput": 19927.84, "total_tokens": 215995456}
|
|
{"current_steps": 68625, "total_steps": 78105, "loss": 0.1647, "lr": 2.2109242542276032e-07, "epoch": 4.393124639907817, "percentage": 87.86, "elapsed_time": "3:00:39", "remaining_time": "0:24:57", "throughput": 19928.06, "total_tokens": 216011264}
|
|
{"current_steps": 68630, "total_steps": 78105, "loss": 0.1623, "lr": 2.208627866217397e-07, "epoch": 4.393444721848793, "percentage": 87.87, "elapsed_time": "3:00:40", "remaining_time": "0:24:56", "throughput": 19928.25, "total_tokens": 216026624}
|
|
{"current_steps": 68635, "total_steps": 78105, "loss": 0.176, "lr": 2.2063326162895972e-07, "epoch": 4.39376480378977, "percentage": 87.88, "elapsed_time": "3:00:40", "remaining_time": "0:24:55", "throughput": 19928.46, "total_tokens": 216041984}
|
|
{"current_steps": 68640, "total_steps": 78105, "loss": 0.1405, "lr": 2.2040385045588253e-07, "epoch": 4.394084885730747, "percentage": 87.88, "elapsed_time": "3:00:41", "remaining_time": "0:24:54", "throughput": 19928.64, "total_tokens": 216057024}
|
|
{"current_steps": 68645, "total_steps": 78105, "loss": 0.1438, "lr": 2.2017455311396208e-07, "epoch": 4.394404967671724, "percentage": 87.89, "elapsed_time": "3:00:42", "remaining_time": "0:24:54", "throughput": 19928.9, "total_tokens": 216074496}
|
|
{"current_steps": 68650, "total_steps": 78105, "loss": 0.1325, "lr": 2.199453696146503e-07, "epoch": 4.394725049612701, "percentage": 87.89, "elapsed_time": "3:00:42", "remaining_time": "0:24:53", "throughput": 19929.15, "total_tokens": 216090880}
|
|
{"current_steps": 68655, "total_steps": 78105, "loss": 0.1628, "lr": 2.1971629996938943e-07, "epoch": 4.395045131553678, "percentage": 87.9, "elapsed_time": "3:00:43", "remaining_time": "0:24:52", "throughput": 19929.36, "total_tokens": 216106624}
|
|
{"current_steps": 68660, "total_steps": 78105, "loss": 0.1105, "lr": 2.1948734418961948e-07, "epoch": 4.395365213494655, "percentage": 87.91, "elapsed_time": "3:00:44", "remaining_time": "0:24:51", "throughput": 19929.57, "total_tokens": 216122752}
|
|
{"current_steps": 68665, "total_steps": 78105, "loss": 0.1333, "lr": 2.192585022867727e-07, "epoch": 4.395685295435632, "percentage": 87.91, "elapsed_time": "3:00:44", "remaining_time": "0:24:50", "throughput": 19929.75, "total_tokens": 216137536}
|
|
{"current_steps": 68670, "total_steps": 78105, "loss": 0.1093, "lr": 2.1902977427227662e-07, "epoch": 4.396005377376609, "percentage": 87.92, "elapsed_time": "3:00:45", "remaining_time": "0:24:50", "throughput": 19929.93, "total_tokens": 216152832}
|
|
{"current_steps": 68675, "total_steps": 78105, "loss": 0.0979, "lr": 2.1880116015755236e-07, "epoch": 4.396325459317585, "percentage": 87.93, "elapsed_time": "3:00:46", "remaining_time": "0:24:49", "throughput": 19930.11, "total_tokens": 216168128}
|
|
{"current_steps": 68680, "total_steps": 78105, "loss": 0.139, "lr": 2.185726599540153e-07, "epoch": 4.396645541258562, "percentage": 87.93, "elapsed_time": "3:00:46", "remaining_time": "0:24:48", "throughput": 19930.28, "total_tokens": 216183040}
|
|
{"current_steps": 68685, "total_steps": 78105, "loss": 0.1028, "lr": 2.183442736730765e-07, "epoch": 4.396965623199539, "percentage": 87.94, "elapsed_time": "3:00:47", "remaining_time": "0:24:47", "throughput": 19930.47, "total_tokens": 216198272}
|
|
{"current_steps": 68690, "total_steps": 78105, "loss": 0.1174, "lr": 2.1811600132613992e-07, "epoch": 4.397285705140516, "percentage": 87.95, "elapsed_time": "3:00:48", "remaining_time": "0:24:46", "throughput": 19930.66, "total_tokens": 216213440}
|
|
{"current_steps": 68695, "total_steps": 78105, "loss": 0.1425, "lr": 2.1788784292460452e-07, "epoch": 4.397605787081493, "percentage": 87.95, "elapsed_time": "3:00:48", "remaining_time": "0:24:46", "throughput": 19930.92, "total_tokens": 216230528}
|
|
{"current_steps": 68700, "total_steps": 78105, "loss": 0.1405, "lr": 2.1765979847986313e-07, "epoch": 4.39792586902247, "percentage": 87.96, "elapsed_time": "3:00:49", "remaining_time": "0:24:45", "throughput": 19931.12, "total_tokens": 216246400}
|
|
{"current_steps": 68705, "total_steps": 78105, "loss": 0.16, "lr": 2.1743186800330301e-07, "epoch": 4.398245950963447, "percentage": 87.96, "elapsed_time": "3:00:50", "remaining_time": "0:24:44", "throughput": 19931.32, "total_tokens": 216262016}
|
|
{"current_steps": 68710, "total_steps": 78105, "loss": 0.172, "lr": 2.1720405150630591e-07, "epoch": 4.398566032904424, "percentage": 87.97, "elapsed_time": "3:00:51", "remaining_time": "0:24:43", "throughput": 19931.51, "total_tokens": 216277376}
|
|
{"current_steps": 68715, "total_steps": 78105, "loss": 0.1409, "lr": 2.16976349000248e-07, "epoch": 4.398886114845401, "percentage": 87.98, "elapsed_time": "3:00:51", "remaining_time": "0:24:42", "throughput": 19931.67, "total_tokens": 216291968}
|
|
{"current_steps": 68720, "total_steps": 78105, "loss": 0.1377, "lr": 2.167487604964988e-07, "epoch": 4.399206196786377, "percentage": 87.98, "elapsed_time": "3:00:52", "remaining_time": "0:24:42", "throughput": 19931.88, "total_tokens": 216307712}
|
|
{"current_steps": 68725, "total_steps": 78105, "loss": 0.1266, "lr": 2.1652128600642447e-07, "epoch": 4.399526278727354, "percentage": 87.99, "elapsed_time": "3:00:52", "remaining_time": "0:24:41", "throughput": 19932.04, "total_tokens": 216322368}
|
|
{"current_steps": 68730, "total_steps": 78105, "loss": 0.1413, "lr": 2.1629392554138207e-07, "epoch": 4.399846360668331, "percentage": 88.0, "elapsed_time": "3:00:53", "remaining_time": "0:24:40", "throughput": 19932.21, "total_tokens": 216337344}
|
|
{"current_steps": 68735, "total_steps": 78105, "loss": 0.0947, "lr": 2.1606667911272637e-07, "epoch": 4.400166442609308, "percentage": 88.0, "elapsed_time": "3:00:54", "remaining_time": "0:24:39", "throughput": 19932.37, "total_tokens": 216351680}
|
|
{"current_steps": 68740, "total_steps": 78105, "loss": 0.1737, "lr": 2.1583954673180335e-07, "epoch": 4.400486524550285, "percentage": 88.01, "elapsed_time": "3:00:54", "remaining_time": "0:24:38", "throughput": 19932.59, "total_tokens": 216367936}
|
|
{"current_steps": 68745, "total_steps": 78105, "loss": 0.111, "lr": 2.1561252840995584e-07, "epoch": 4.400806606491262, "percentage": 88.02, "elapsed_time": "3:00:55", "remaining_time": "0:24:38", "throughput": 19932.8, "total_tokens": 216383936}
|
|
{"current_steps": 68750, "total_steps": 78105, "loss": 0.1774, "lr": 2.1538562415852005e-07, "epoch": 4.401126688432239, "percentage": 88.02, "elapsed_time": "3:00:56", "remaining_time": "0:24:37", "throughput": 19932.97, "total_tokens": 216399040}
|
|
{"current_steps": 68755, "total_steps": 78105, "loss": 0.1669, "lr": 2.1515883398882553e-07, "epoch": 4.401446770373216, "percentage": 88.03, "elapsed_time": "3:00:57", "remaining_time": "0:24:36", "throughput": 19933.18, "total_tokens": 216414656}
|
|
{"current_steps": 68760, "total_steps": 78105, "loss": 0.1553, "lr": 2.1493215791219823e-07, "epoch": 4.401766852314193, "percentage": 88.04, "elapsed_time": "3:00:57", "remaining_time": "0:24:35", "throughput": 19933.39, "total_tokens": 216430336}
|
|
{"current_steps": 68765, "total_steps": 78105, "loss": 0.1275, "lr": 2.1470559593995577e-07, "epoch": 4.402086934255169, "percentage": 88.04, "elapsed_time": "3:00:58", "remaining_time": "0:24:34", "throughput": 19933.64, "total_tokens": 216447296}
|
|
{"current_steps": 68770, "total_steps": 78105, "loss": 0.1531, "lr": 2.1447914808341242e-07, "epoch": 4.402407016196146, "percentage": 88.05, "elapsed_time": "3:00:59", "remaining_time": "0:24:34", "throughput": 19933.8, "total_tokens": 216462016}
|
|
{"current_steps": 68775, "total_steps": 78105, "loss": 0.1497, "lr": 2.142528143538758e-07, "epoch": 4.402727098137123, "percentage": 88.05, "elapsed_time": "3:00:59", "remaining_time": "0:24:33", "throughput": 19934.02, "total_tokens": 216478336}
|
|
{"current_steps": 68780, "total_steps": 78105, "loss": 0.1606, "lr": 2.140265947626477e-07, "epoch": 4.4030471800781, "percentage": 88.06, "elapsed_time": "3:01:00", "remaining_time": "0:24:32", "throughput": 19934.23, "total_tokens": 216494336}
|
|
{"current_steps": 68785, "total_steps": 78105, "loss": 0.1079, "lr": 2.138004893210241e-07, "epoch": 4.403367262019077, "percentage": 88.07, "elapsed_time": "3:01:01", "remaining_time": "0:24:31", "throughput": 19934.41, "total_tokens": 216509312}
|
|
{"current_steps": 68790, "total_steps": 78105, "loss": 0.1321, "lr": 2.1357449804029567e-07, "epoch": 4.403687343960054, "percentage": 88.07, "elapsed_time": "3:01:01", "remaining_time": "0:24:30", "throughput": 19934.68, "total_tokens": 216526848}
|
|
{"current_steps": 68795, "total_steps": 78105, "loss": 0.1595, "lr": 2.133486209317473e-07, "epoch": 4.404007425901031, "percentage": 88.08, "elapsed_time": "3:01:02", "remaining_time": "0:24:30", "throughput": 19934.97, "total_tokens": 216544448}
|
|
{"current_steps": 68800, "total_steps": 78105, "loss": 0.1079, "lr": 2.1312285800665745e-07, "epoch": 4.404327507842008, "percentage": 88.09, "elapsed_time": "3:01:03", "remaining_time": "0:24:29", "throughput": 19935.16, "total_tokens": 216559872}
|
|
{"current_steps": 68805, "total_steps": 78105, "loss": 0.0746, "lr": 2.1289720927630069e-07, "epoch": 4.404647589782984, "percentage": 88.09, "elapsed_time": "3:01:03", "remaining_time": "0:24:28", "throughput": 19935.32, "total_tokens": 216574464}
|
|
{"current_steps": 68810, "total_steps": 78105, "loss": 0.1298, "lr": 2.1267167475194444e-07, "epoch": 4.404967671723961, "percentage": 88.1, "elapsed_time": "3:01:04", "remaining_time": "0:24:27", "throughput": 19935.53, "total_tokens": 216590080}
|
|
{"current_steps": 68815, "total_steps": 78105, "loss": 0.1356, "lr": 2.1244625444485023e-07, "epoch": 4.405287753664938, "percentage": 88.11, "elapsed_time": "3:01:05", "remaining_time": "0:24:26", "throughput": 19935.77, "total_tokens": 216606528}
|
|
{"current_steps": 68820, "total_steps": 78105, "loss": 0.1249, "lr": 2.122209483662749e-07, "epoch": 4.405607835605915, "percentage": 88.11, "elapsed_time": "3:01:05", "remaining_time": "0:24:25", "throughput": 19935.94, "total_tokens": 216621248}
|
|
{"current_steps": 68825, "total_steps": 78105, "loss": 0.1037, "lr": 2.1199575652746857e-07, "epoch": 4.405927917546892, "percentage": 88.12, "elapsed_time": "3:01:06", "remaining_time": "0:24:25", "throughput": 19936.12, "total_tokens": 216636416}
|
|
{"current_steps": 68830, "total_steps": 78105, "loss": 0.1393, "lr": 2.1177067893967646e-07, "epoch": 4.406247999487869, "percentage": 88.12, "elapsed_time": "3:01:07", "remaining_time": "0:24:24", "throughput": 19936.34, "total_tokens": 216652544}
|
|
{"current_steps": 68835, "total_steps": 78105, "loss": 0.1389, "lr": 2.1154571561413762e-07, "epoch": 4.406568081428846, "percentage": 88.13, "elapsed_time": "3:01:07", "remaining_time": "0:24:23", "throughput": 19936.53, "total_tokens": 216668032}
|
|
{"current_steps": 68840, "total_steps": 78105, "loss": 0.1304, "lr": 2.1132086656208527e-07, "epoch": 4.406888163369823, "percentage": 88.14, "elapsed_time": "3:01:08", "remaining_time": "0:24:22", "throughput": 19936.72, "total_tokens": 216683136}
|
|
{"current_steps": 68845, "total_steps": 78105, "loss": 0.1055, "lr": 2.1109613179474825e-07, "epoch": 4.4072082453108, "percentage": 88.14, "elapsed_time": "3:01:09", "remaining_time": "0:24:21", "throughput": 19936.93, "total_tokens": 216699008}
|
|
{"current_steps": 68850, "total_steps": 78105, "loss": 0.1204, "lr": 2.1087151132334727e-07, "epoch": 4.407528327251777, "percentage": 88.15, "elapsed_time": "3:01:09", "remaining_time": "0:24:21", "throughput": 19937.11, "total_tokens": 216714368}
|
|
{"current_steps": 68855, "total_steps": 78105, "loss": 0.1576, "lr": 2.1064700515909975e-07, "epoch": 4.407848409192753, "percentage": 88.16, "elapsed_time": "3:01:10", "remaining_time": "0:24:20", "throughput": 19937.33, "total_tokens": 216730432}
|
|
{"current_steps": 68860, "total_steps": 78105, "loss": 0.114, "lr": 2.1042261331321616e-07, "epoch": 4.40816849113373, "percentage": 88.16, "elapsed_time": "3:01:11", "remaining_time": "0:24:19", "throughput": 19937.6, "total_tokens": 216747904}
|
|
{"current_steps": 68865, "total_steps": 78105, "loss": 0.154, "lr": 2.1019833579690063e-07, "epoch": 4.408488573074707, "percentage": 88.17, "elapsed_time": "3:01:11", "remaining_time": "0:24:18", "throughput": 19937.82, "total_tokens": 216763648}
|
|
{"current_steps": 68870, "total_steps": 78105, "loss": 0.1425, "lr": 2.099741726213539e-07, "epoch": 4.408808655015684, "percentage": 88.18, "elapsed_time": "3:01:12", "remaining_time": "0:24:17", "throughput": 19938.03, "total_tokens": 216779136}
|
|
{"current_steps": 68875, "total_steps": 78105, "loss": 0.1038, "lr": 2.097501237977681e-07, "epoch": 4.409128736956661, "percentage": 88.18, "elapsed_time": "3:01:13", "remaining_time": "0:24:17", "throughput": 19938.23, "total_tokens": 216794816}
|
|
{"current_steps": 68880, "total_steps": 78105, "loss": 0.1107, "lr": 2.095261893373321e-07, "epoch": 4.409448818897638, "percentage": 88.19, "elapsed_time": "3:01:13", "remaining_time": "0:24:16", "throughput": 19938.38, "total_tokens": 216809280}
|
|
{"current_steps": 68885, "total_steps": 78105, "loss": 0.1353, "lr": 2.0930236925122692e-07, "epoch": 4.409768900838615, "percentage": 88.2, "elapsed_time": "3:01:14", "remaining_time": "0:24:15", "throughput": 19938.55, "total_tokens": 216824128}
|
|
{"current_steps": 68890, "total_steps": 78105, "loss": 0.1449, "lr": 2.0907866355063006e-07, "epoch": 4.410088982779592, "percentage": 88.2, "elapsed_time": "3:01:15", "remaining_time": "0:24:14", "throughput": 19938.79, "total_tokens": 216840256}
|
|
{"current_steps": 68895, "total_steps": 78105, "loss": 0.121, "lr": 2.0885507224671197e-07, "epoch": 4.410409064720568, "percentage": 88.21, "elapsed_time": "3:01:16", "remaining_time": "0:24:13", "throughput": 19939.04, "total_tokens": 216857600}
|
|
{"current_steps": 68900, "total_steps": 78105, "loss": 0.1349, "lr": 2.086315953506371e-07, "epoch": 4.410729146661545, "percentage": 88.21, "elapsed_time": "3:01:16", "remaining_time": "0:24:13", "throughput": 19939.24, "total_tokens": 216873152}
|
|
{"current_steps": 68905, "total_steps": 78105, "loss": 0.1164, "lr": 2.084082328735651e-07, "epoch": 4.411049228602522, "percentage": 88.22, "elapsed_time": "3:01:17", "remaining_time": "0:24:12", "throughput": 19939.45, "total_tokens": 216889152}
|
|
{"current_steps": 68910, "total_steps": 78105, "loss": 0.1164, "lr": 2.0818498482664955e-07, "epoch": 4.411369310543499, "percentage": 88.23, "elapsed_time": "3:01:18", "remaining_time": "0:24:11", "throughput": 19939.62, "total_tokens": 216904192}
|
|
{"current_steps": 68915, "total_steps": 78105, "loss": 0.1648, "lr": 2.0796185122103797e-07, "epoch": 4.411689392484476, "percentage": 88.23, "elapsed_time": "3:01:18", "remaining_time": "0:24:10", "throughput": 19939.82, "total_tokens": 216919552}
|
|
{"current_steps": 68920, "total_steps": 78105, "loss": 0.0874, "lr": 2.0773883206787275e-07, "epoch": 4.412009474425453, "percentage": 88.24, "elapsed_time": "3:01:19", "remaining_time": "0:24:09", "throughput": 19940.01, "total_tokens": 216934720}
|
|
{"current_steps": 68925, "total_steps": 78105, "loss": 0.1486, "lr": 2.0751592737829062e-07, "epoch": 4.41232955636643, "percentage": 88.25, "elapsed_time": "3:01:20", "remaining_time": "0:24:09", "throughput": 19940.2, "total_tokens": 216950336}
|
|
{"current_steps": 68930, "total_steps": 78105, "loss": 0.1377, "lr": 2.0729313716342209e-07, "epoch": 4.412649638307407, "percentage": 88.25, "elapsed_time": "3:01:20", "remaining_time": "0:24:08", "throughput": 19940.38, "total_tokens": 216965504}
|
|
{"current_steps": 68935, "total_steps": 78105, "loss": 0.151, "lr": 2.0707046143439187e-07, "epoch": 4.412969720248384, "percentage": 88.26, "elapsed_time": "3:01:21", "remaining_time": "0:24:07", "throughput": 19940.55, "total_tokens": 216980672}
|
|
{"current_steps": 68940, "total_steps": 78105, "loss": 0.1334, "lr": 2.068479002023191e-07, "epoch": 4.41328980218936, "percentage": 88.27, "elapsed_time": "3:01:22", "remaining_time": "0:24:06", "throughput": 19940.77, "total_tokens": 216996608}
|
|
{"current_steps": 68945, "total_steps": 78105, "loss": 0.1647, "lr": 2.0662545347831798e-07, "epoch": 4.413609884130337, "percentage": 88.27, "elapsed_time": "3:01:22", "remaining_time": "0:24:05", "throughput": 19941.0, "total_tokens": 217013184}
|
|
{"current_steps": 68950, "total_steps": 78105, "loss": 0.1388, "lr": 2.0640312127349544e-07, "epoch": 4.413929966071314, "percentage": 88.28, "elapsed_time": "3:01:23", "remaining_time": "0:24:05", "throughput": 19941.19, "total_tokens": 217028544}
|
|
{"current_steps": 68955, "total_steps": 78105, "loss": 0.121, "lr": 2.0618090359895482e-07, "epoch": 4.414250048012291, "percentage": 88.29, "elapsed_time": "3:01:24", "remaining_time": "0:24:04", "throughput": 19941.43, "total_tokens": 217045120}
|
|
{"current_steps": 68960, "total_steps": 78105, "loss": 0.1209, "lr": 2.0595880046579113e-07, "epoch": 4.414570129953268, "percentage": 88.29, "elapsed_time": "3:01:24", "remaining_time": "0:24:03", "throughput": 19941.6, "total_tokens": 217060224}
|
|
{"current_steps": 68965, "total_steps": 78105, "loss": 0.1759, "lr": 2.0573681188509637e-07, "epoch": 4.414890211894245, "percentage": 88.3, "elapsed_time": "3:01:25", "remaining_time": "0:24:02", "throughput": 19941.77, "total_tokens": 217074880}
|
|
{"current_steps": 68970, "total_steps": 78105, "loss": 0.1506, "lr": 2.055149378679541e-07, "epoch": 4.415210293835222, "percentage": 88.3, "elapsed_time": "3:01:26", "remaining_time": "0:24:01", "throughput": 19941.98, "total_tokens": 217090624}
|
|
{"current_steps": 68975, "total_steps": 78105, "loss": 0.0857, "lr": 2.052931784254447e-07, "epoch": 4.415530375776199, "percentage": 88.31, "elapsed_time": "3:01:26", "remaining_time": "0:24:01", "throughput": 19942.23, "total_tokens": 217107136}
|
|
{"current_steps": 68980, "total_steps": 78105, "loss": 0.1358, "lr": 2.050715335686415e-07, "epoch": 4.415850457717176, "percentage": 88.32, "elapsed_time": "3:01:27", "remaining_time": "0:24:00", "throughput": 19942.42, "total_tokens": 217122368}
|
|
{"current_steps": 68985, "total_steps": 78105, "loss": 0.1529, "lr": 2.048500033086115e-07, "epoch": 4.416170539658152, "percentage": 88.32, "elapsed_time": "3:01:28", "remaining_time": "0:23:59", "throughput": 19942.64, "total_tokens": 217138816}
|
|
{"current_steps": 68990, "total_steps": 78105, "loss": 0.0889, "lr": 2.04628587656418e-07, "epoch": 4.416490621599129, "percentage": 88.33, "elapsed_time": "3:01:28", "remaining_time": "0:23:58", "throughput": 19942.82, "total_tokens": 217153536}
|
|
{"current_steps": 68995, "total_steps": 78105, "loss": 0.1377, "lr": 2.044072866231159e-07, "epoch": 4.416810703540106, "percentage": 88.34, "elapsed_time": "3:01:29", "remaining_time": "0:23:57", "throughput": 19943.04, "total_tokens": 217169984}
|
|
{"current_steps": 69000, "total_steps": 78105, "loss": 0.1121, "lr": 2.0418610021975732e-07, "epoch": 4.417130785481083, "percentage": 88.34, "elapsed_time": "3:01:30", "remaining_time": "0:23:57", "throughput": 19943.28, "total_tokens": 217186560}
|
|
{"current_steps": 69005, "total_steps": 78105, "loss": 0.1765, "lr": 2.0396502845738602e-07, "epoch": 4.41745086742206, "percentage": 88.35, "elapsed_time": "3:01:30", "remaining_time": "0:23:56", "throughput": 19943.55, "total_tokens": 217203776}
|
|
{"current_steps": 69010, "total_steps": 78105, "loss": 0.0829, "lr": 2.0374407134704178e-07, "epoch": 4.417770949363037, "percentage": 88.36, "elapsed_time": "3:01:31", "remaining_time": "0:23:55", "throughput": 19943.74, "total_tokens": 217219264}
|
|
{"current_steps": 69015, "total_steps": 78105, "loss": 0.0862, "lr": 2.0352322889975796e-07, "epoch": 4.418091031304014, "percentage": 88.36, "elapsed_time": "3:01:32", "remaining_time": "0:23:54", "throughput": 19943.98, "total_tokens": 217235840}
|
|
{"current_steps": 69020, "total_steps": 78105, "loss": 0.1079, "lr": 2.0330250112656215e-07, "epoch": 4.418411113244991, "percentage": 88.37, "elapsed_time": "3:01:32", "remaining_time": "0:23:53", "throughput": 19944.18, "total_tokens": 217251456}
|
|
{"current_steps": 69025, "total_steps": 78105, "loss": 0.1418, "lr": 2.0308188803847635e-07, "epoch": 4.418731195185968, "percentage": 88.37, "elapsed_time": "3:01:33", "remaining_time": "0:23:53", "throughput": 19944.35, "total_tokens": 217266240}
|
|
{"current_steps": 69030, "total_steps": 78105, "loss": 0.1094, "lr": 2.0286138964651676e-07, "epoch": 4.419051277126944, "percentage": 88.38, "elapsed_time": "3:01:34", "remaining_time": "0:23:52", "throughput": 19944.6, "total_tokens": 217282944}
|
|
{"current_steps": 69035, "total_steps": 78105, "loss": 0.1358, "lr": 2.0264100596169345e-07, "epoch": 4.419371359067921, "percentage": 88.39, "elapsed_time": "3:01:34", "remaining_time": "0:23:51", "throughput": 19944.79, "total_tokens": 217298112}
|
|
{"current_steps": 69040, "total_steps": 78105, "loss": 0.1167, "lr": 2.0242073699501236e-07, "epoch": 4.419691441008898, "percentage": 88.39, "elapsed_time": "3:01:35", "remaining_time": "0:23:50", "throughput": 19944.98, "total_tokens": 217313664}
|
|
{"current_steps": 69045, "total_steps": 78105, "loss": 0.1408, "lr": 2.0220058275747217e-07, "epoch": 4.420011522949875, "percentage": 88.4, "elapsed_time": "3:01:36", "remaining_time": "0:23:49", "throughput": 19945.16, "total_tokens": 217328320}
|
|
{"current_steps": 69050, "total_steps": 78105, "loss": 0.2011, "lr": 2.0198054326006606e-07, "epoch": 4.420331604890852, "percentage": 88.41, "elapsed_time": "3:01:36", "remaining_time": "0:23:48", "throughput": 19945.33, "total_tokens": 217343552}
|
|
{"current_steps": 69055, "total_steps": 78105, "loss": 0.1643, "lr": 2.017606185137816e-07, "epoch": 4.420651686831829, "percentage": 88.41, "elapsed_time": "3:01:37", "remaining_time": "0:23:48", "throughput": 19945.56, "total_tokens": 217359616}
|
|
{"current_steps": 69060, "total_steps": 78105, "loss": 0.1961, "lr": 2.0154080852960056e-07, "epoch": 4.420971768772806, "percentage": 88.42, "elapsed_time": "3:01:38", "remaining_time": "0:23:47", "throughput": 19945.76, "total_tokens": 217375424}
|
|
{"current_steps": 69065, "total_steps": 78105, "loss": 0.1649, "lr": 2.0132111331849947e-07, "epoch": 4.421291850713783, "percentage": 88.43, "elapsed_time": "3:01:39", "remaining_time": "0:23:46", "throughput": 19945.94, "total_tokens": 217390848}
|
|
{"current_steps": 69070, "total_steps": 78105, "loss": 0.1364, "lr": 2.0110153289144812e-07, "epoch": 4.421611932654759, "percentage": 88.43, "elapsed_time": "3:01:39", "remaining_time": "0:23:45", "throughput": 19946.19, "total_tokens": 217407616}
|
|
{"current_steps": 69075, "total_steps": 78105, "loss": 0.1023, "lr": 2.0088206725941278e-07, "epoch": 4.421932014595736, "percentage": 88.44, "elapsed_time": "3:01:40", "remaining_time": "0:23:44", "throughput": 19946.4, "total_tokens": 217423296}
|
|
{"current_steps": 69080, "total_steps": 78105, "loss": 0.111, "lr": 2.006627164333502e-07, "epoch": 4.422252096536713, "percentage": 88.45, "elapsed_time": "3:01:41", "remaining_time": "0:23:44", "throughput": 19946.58, "total_tokens": 217438656}
|
|
{"current_steps": 69085, "total_steps": 78105, "loss": 0.1101, "lr": 2.004434804242153e-07, "epoch": 4.42257217847769, "percentage": 88.45, "elapsed_time": "3:01:41", "remaining_time": "0:23:43", "throughput": 19946.82, "total_tokens": 217455040}
|
|
{"current_steps": 69090, "total_steps": 78105, "loss": 0.1491, "lr": 2.0022435924295508e-07, "epoch": 4.422892260418667, "percentage": 88.46, "elapsed_time": "3:01:42", "remaining_time": "0:23:42", "throughput": 19947.01, "total_tokens": 217470848}
|
|
{"current_steps": 69095, "total_steps": 78105, "loss": 0.1008, "lr": 2.0000535290051083e-07, "epoch": 4.423212342359644, "percentage": 88.46, "elapsed_time": "3:01:43", "remaining_time": "0:23:41", "throughput": 19947.28, "total_tokens": 217488064}
|
|
{"current_steps": 69100, "total_steps": 78105, "loss": 0.1556, "lr": 1.997864614078196e-07, "epoch": 4.423532424300621, "percentage": 88.47, "elapsed_time": "3:01:43", "remaining_time": "0:23:40", "throughput": 19947.46, "total_tokens": 217503296}
|
|
{"current_steps": 69105, "total_steps": 78105, "loss": 0.1135, "lr": 1.9956768477581044e-07, "epoch": 4.423852506241598, "percentage": 88.48, "elapsed_time": "3:01:44", "remaining_time": "0:23:40", "throughput": 19947.65, "total_tokens": 217518464}
|
|
{"current_steps": 69110, "total_steps": 78105, "loss": 0.1434, "lr": 1.9934902301540964e-07, "epoch": 4.424172588182575, "percentage": 88.48, "elapsed_time": "3:01:45", "remaining_time": "0:23:39", "throughput": 19947.85, "total_tokens": 217533952}
|
|
{"current_steps": 69115, "total_steps": 78105, "loss": 0.1169, "lr": 1.99130476137534e-07, "epoch": 4.424492670123552, "percentage": 88.49, "elapsed_time": "3:01:45", "remaining_time": "0:23:38", "throughput": 19948.02, "total_tokens": 217548992}
|
|
{"current_steps": 69120, "total_steps": 78105, "loss": 0.1238, "lr": 1.9891204415309785e-07, "epoch": 4.424812752064528, "percentage": 88.5, "elapsed_time": "3:01:46", "remaining_time": "0:23:37", "throughput": 19948.21, "total_tokens": 217564160}
|
|
{"current_steps": 69125, "total_steps": 78105, "loss": 0.1162, "lr": 1.9869372707300856e-07, "epoch": 4.425132834005505, "percentage": 88.5, "elapsed_time": "3:01:47", "remaining_time": "0:23:36", "throughput": 19948.42, "total_tokens": 217580288}
|
|
{"current_steps": 69130, "total_steps": 78105, "loss": 0.1178, "lr": 1.984755249081674e-07, "epoch": 4.425452915946482, "percentage": 88.51, "elapsed_time": "3:01:47", "remaining_time": "0:23:36", "throughput": 19948.59, "total_tokens": 217595008}
|
|
{"current_steps": 69135, "total_steps": 78105, "loss": 0.0866, "lr": 1.9825743766947014e-07, "epoch": 4.425772997887459, "percentage": 88.52, "elapsed_time": "3:01:48", "remaining_time": "0:23:35", "throughput": 19948.77, "total_tokens": 217609984}
|
|
{"current_steps": 69140, "total_steps": 78105, "loss": 0.0968, "lr": 1.980394653678072e-07, "epoch": 4.426093079828436, "percentage": 88.52, "elapsed_time": "3:01:49", "remaining_time": "0:23:34", "throughput": 19948.94, "total_tokens": 217624640}
|
|
{"current_steps": 69145, "total_steps": 78105, "loss": 0.1389, "lr": 1.978216080140624e-07, "epoch": 4.426413161769413, "percentage": 88.53, "elapsed_time": "3:01:49", "remaining_time": "0:23:33", "throughput": 19949.15, "total_tokens": 217640896}
|
|
{"current_steps": 69150, "total_steps": 78105, "loss": 0.1078, "lr": 1.9760386561911537e-07, "epoch": 4.42673324371039, "percentage": 88.53, "elapsed_time": "3:01:50", "remaining_time": "0:23:32", "throughput": 19949.35, "total_tokens": 217656512}
|
|
{"current_steps": 69155, "total_steps": 78105, "loss": 0.0876, "lr": 1.9738623819383852e-07, "epoch": 4.427053325651367, "percentage": 88.54, "elapsed_time": "3:01:51", "remaining_time": "0:23:32", "throughput": 19949.55, "total_tokens": 217671616}
|
|
{"current_steps": 69160, "total_steps": 78105, "loss": 0.1675, "lr": 1.9716872574909923e-07, "epoch": 4.427373407592343, "percentage": 88.55, "elapsed_time": "3:01:51", "remaining_time": "0:23:31", "throughput": 19949.73, "total_tokens": 217686656}
|
|
{"current_steps": 69165, "total_steps": 78105, "loss": 0.1062, "lr": 1.9695132829575857e-07, "epoch": 4.42769348953332, "percentage": 88.55, "elapsed_time": "3:01:52", "remaining_time": "0:23:30", "throughput": 19949.97, "total_tokens": 217703296}
|
|
{"current_steps": 69170, "total_steps": 78105, "loss": 0.128, "lr": 1.9673404584467258e-07, "epoch": 4.428013571474297, "percentage": 88.56, "elapsed_time": "3:01:53", "remaining_time": "0:23:29", "throughput": 19950.14, "total_tokens": 217718016}
|
|
{"current_steps": 69175, "total_steps": 78105, "loss": 0.12, "lr": 1.9651687840669092e-07, "epoch": 4.428333653415274, "percentage": 88.57, "elapsed_time": "3:01:53", "remaining_time": "0:23:28", "throughput": 19950.31, "total_tokens": 217732992}
|
|
{"current_steps": 69180, "total_steps": 78105, "loss": 0.1107, "lr": 1.962998259926577e-07, "epoch": 4.428653735356251, "percentage": 88.57, "elapsed_time": "3:01:54", "remaining_time": "0:23:28", "throughput": 19950.49, "total_tokens": 217748096}
|
|
{"current_steps": 69185, "total_steps": 78105, "loss": 0.1051, "lr": 1.9608288861341225e-07, "epoch": 4.428973817297228, "percentage": 88.58, "elapsed_time": "3:01:55", "remaining_time": "0:23:27", "throughput": 19950.7, "total_tokens": 217764096}
|
|
{"current_steps": 69190, "total_steps": 78105, "loss": 0.1685, "lr": 1.9586606627978593e-07, "epoch": 4.429293899238205, "percentage": 88.59, "elapsed_time": "3:01:55", "remaining_time": "0:23:26", "throughput": 19950.94, "total_tokens": 217780608}
|
|
{"current_steps": 69195, "total_steps": 78105, "loss": 0.0954, "lr": 1.9564935900260734e-07, "epoch": 4.429613981179182, "percentage": 88.59, "elapsed_time": "3:01:56", "remaining_time": "0:23:25", "throughput": 19951.15, "total_tokens": 217796352}
|
|
{"current_steps": 69200, "total_steps": 78105, "loss": 0.0962, "lr": 1.9543276679269613e-07, "epoch": 4.429934063120159, "percentage": 88.6, "elapsed_time": "3:01:57", "remaining_time": "0:23:24", "throughput": 19951.31, "total_tokens": 217811136}
|
|
{"current_steps": 69205, "total_steps": 78105, "loss": 0.1146, "lr": 1.9521628966086864e-07, "epoch": 4.430254145061135, "percentage": 88.61, "elapsed_time": "3:01:57", "remaining_time": "0:23:24", "throughput": 19951.53, "total_tokens": 217827456}
|
|
{"current_steps": 69210, "total_steps": 78105, "loss": 0.1101, "lr": 1.9499992761793458e-07, "epoch": 4.430574227002112, "percentage": 88.61, "elapsed_time": "3:01:58", "remaining_time": "0:23:23", "throughput": 19951.74, "total_tokens": 217843264}
|
|
{"current_steps": 69215, "total_steps": 78105, "loss": 0.1223, "lr": 1.9478368067469749e-07, "epoch": 4.430894308943089, "percentage": 88.62, "elapsed_time": "3:01:59", "remaining_time": "0:23:22", "throughput": 19951.92, "total_tokens": 217858432}
|
|
{"current_steps": 69220, "total_steps": 78105, "loss": 0.126, "lr": 1.9456754884195655e-07, "epoch": 4.431214390884066, "percentage": 88.62, "elapsed_time": "3:01:59", "remaining_time": "0:23:21", "throughput": 19952.13, "total_tokens": 217874112}
|
|
{"current_steps": 69225, "total_steps": 78105, "loss": 0.1397, "lr": 1.9435153213050312e-07, "epoch": 4.431534472825043, "percentage": 88.63, "elapsed_time": "3:02:00", "remaining_time": "0:23:20", "throughput": 19952.31, "total_tokens": 217889344}
|
|
{"current_steps": 69230, "total_steps": 78105, "loss": 0.0912, "lr": 1.9413563055112465e-07, "epoch": 4.43185455476602, "percentage": 88.64, "elapsed_time": "3:02:01", "remaining_time": "0:23:20", "throughput": 19952.49, "total_tokens": 217904576}
|
|
{"current_steps": 69235, "total_steps": 78105, "loss": 0.1608, "lr": 1.939198441146023e-07, "epoch": 4.432174636706997, "percentage": 88.64, "elapsed_time": "3:02:01", "remaining_time": "0:23:19", "throughput": 19952.65, "total_tokens": 217919232}
|
|
{"current_steps": 69240, "total_steps": 78105, "loss": 0.1051, "lr": 1.9370417283171072e-07, "epoch": 4.432494718647974, "percentage": 88.65, "elapsed_time": "3:02:02", "remaining_time": "0:23:18", "throughput": 19952.83, "total_tokens": 217934144}
|
|
{"current_steps": 69245, "total_steps": 78105, "loss": 0.1495, "lr": 1.9348861671321966e-07, "epoch": 4.432814800588951, "percentage": 88.66, "elapsed_time": "3:02:03", "remaining_time": "0:23:17", "throughput": 19953.0, "total_tokens": 217949056}
|
|
{"current_steps": 69250, "total_steps": 78105, "loss": 0.1505, "lr": 1.932731757698933e-07, "epoch": 4.433134882529927, "percentage": 88.66, "elapsed_time": "3:02:03", "remaining_time": "0:23:16", "throughput": 19953.26, "total_tokens": 217966336}
|
|
{"current_steps": 69255, "total_steps": 78105, "loss": 0.1432, "lr": 1.9305785001248883e-07, "epoch": 4.433454964470904, "percentage": 88.67, "elapsed_time": "3:02:04", "remaining_time": "0:23:16", "throughput": 19953.46, "total_tokens": 217981824}
|
|
{"current_steps": 69260, "total_steps": 78105, "loss": 0.1343, "lr": 1.9284263945175908e-07, "epoch": 4.433775046411881, "percentage": 88.68, "elapsed_time": "3:02:05", "remaining_time": "0:23:15", "throughput": 19953.65, "total_tokens": 217997632}
|
|
{"current_steps": 69265, "total_steps": 78105, "loss": 0.1941, "lr": 1.9262754409844985e-07, "epoch": 4.434095128352858, "percentage": 88.68, "elapsed_time": "3:02:05", "remaining_time": "0:23:14", "throughput": 19953.83, "total_tokens": 218012416}
|
|
{"current_steps": 69270, "total_steps": 78105, "loss": 0.1536, "lr": 1.9241256396330288e-07, "epoch": 4.434415210293835, "percentage": 88.69, "elapsed_time": "3:02:06", "remaining_time": "0:23:13", "throughput": 19954.04, "total_tokens": 218027904}
|
|
{"current_steps": 69275, "total_steps": 78105, "loss": 0.1174, "lr": 1.921976990570529e-07, "epoch": 4.434735292234812, "percentage": 88.69, "elapsed_time": "3:02:07", "remaining_time": "0:23:12", "throughput": 19954.24, "total_tokens": 218043840}
|
|
{"current_steps": 69280, "total_steps": 78105, "loss": 0.1416, "lr": 1.919829493904285e-07, "epoch": 4.435055374175789, "percentage": 88.7, "elapsed_time": "3:02:07", "remaining_time": "0:23:12", "throughput": 19954.43, "total_tokens": 218059520}
|
|
{"current_steps": 69285, "total_steps": 78105, "loss": 0.1487, "lr": 1.9176831497415393e-07, "epoch": 4.435375456116766, "percentage": 88.71, "elapsed_time": "3:02:08", "remaining_time": "0:23:11", "throughput": 19954.61, "total_tokens": 218074880}
|
|
{"current_steps": 69290, "total_steps": 78105, "loss": 0.1598, "lr": 1.915537958189459e-07, "epoch": 4.435695538057743, "percentage": 88.71, "elapsed_time": "3:02:09", "remaining_time": "0:23:10", "throughput": 19954.85, "total_tokens": 218091648}
|
|
{"current_steps": 69295, "total_steps": 78105, "loss": 0.1524, "lr": 1.9133939193551804e-07, "epoch": 4.436015619998719, "percentage": 88.72, "elapsed_time": "3:02:09", "remaining_time": "0:23:09", "throughput": 19955.05, "total_tokens": 218107456}
|
|
{"current_steps": 69300, "total_steps": 78105, "loss": 0.1667, "lr": 1.9112510333457485e-07, "epoch": 4.436335701939696, "percentage": 88.73, "elapsed_time": "3:02:10", "remaining_time": "0:23:08", "throughput": 19955.3, "total_tokens": 218124416}
|
|
{"current_steps": 69305, "total_steps": 78105, "loss": 0.1479, "lr": 1.9091093002681803e-07, "epoch": 4.436655783880673, "percentage": 88.73, "elapsed_time": "3:02:11", "remaining_time": "0:23:08", "throughput": 19955.51, "total_tokens": 218140224}
|
|
{"current_steps": 69310, "total_steps": 78105, "loss": 0.1211, "lr": 1.906968720229413e-07, "epoch": 4.43697586582165, "percentage": 88.74, "elapsed_time": "3:02:12", "remaining_time": "0:23:07", "throughput": 19955.76, "total_tokens": 218157376}
|
|
{"current_steps": 69315, "total_steps": 78105, "loss": 0.1096, "lr": 1.904829293336341e-07, "epoch": 4.437295947762627, "percentage": 88.75, "elapsed_time": "3:02:12", "remaining_time": "0:23:06", "throughput": 19955.93, "total_tokens": 218172288}
|
|
{"current_steps": 69320, "total_steps": 78105, "loss": 0.1438, "lr": 1.9026910196957986e-07, "epoch": 4.437616029703604, "percentage": 88.75, "elapsed_time": "3:02:13", "remaining_time": "0:23:05", "throughput": 19956.13, "total_tokens": 218187904}
|
|
{"current_steps": 69325, "total_steps": 78105, "loss": 0.1146, "lr": 1.9005538994145507e-07, "epoch": 4.437936111644581, "percentage": 88.76, "elapsed_time": "3:02:14", "remaining_time": "0:23:04", "throughput": 19956.34, "total_tokens": 218203968}
|
|
{"current_steps": 69330, "total_steps": 78105, "loss": 0.1408, "lr": 1.8984179325993308e-07, "epoch": 4.438256193585558, "percentage": 88.77, "elapsed_time": "3:02:14", "remaining_time": "0:23:03", "throughput": 19956.53, "total_tokens": 218219776}
|
|
{"current_steps": 69335, "total_steps": 78105, "loss": 0.1296, "lr": 1.8962831193567788e-07, "epoch": 4.438576275526534, "percentage": 88.77, "elapsed_time": "3:02:15", "remaining_time": "0:23:03", "throughput": 19956.71, "total_tokens": 218234880}
|
|
{"current_steps": 69340, "total_steps": 78105, "loss": 0.1389, "lr": 1.8941494597935124e-07, "epoch": 4.438896357467511, "percentage": 88.78, "elapsed_time": "3:02:16", "remaining_time": "0:23:02", "throughput": 19956.96, "total_tokens": 218251968}
|
|
{"current_steps": 69345, "total_steps": 78105, "loss": 0.1744, "lr": 1.89201695401606e-07, "epoch": 4.439216439408488, "percentage": 88.78, "elapsed_time": "3:02:16", "remaining_time": "0:23:01", "throughput": 19957.14, "total_tokens": 218266816}
|
|
{"current_steps": 69350, "total_steps": 78105, "loss": 0.1143, "lr": 1.88988560213092e-07, "epoch": 4.439536521349465, "percentage": 88.79, "elapsed_time": "3:02:17", "remaining_time": "0:23:00", "throughput": 19957.41, "total_tokens": 218284160}
|
|
{"current_steps": 69355, "total_steps": 78105, "loss": 0.1526, "lr": 1.8877554042445178e-07, "epoch": 4.439856603290442, "percentage": 88.8, "elapsed_time": "3:02:18", "remaining_time": "0:22:59", "throughput": 19957.61, "total_tokens": 218299712}
|
|
{"current_steps": 69360, "total_steps": 78105, "loss": 0.1798, "lr": 1.885626360463222e-07, "epoch": 4.440176685231419, "percentage": 88.8, "elapsed_time": "3:02:18", "remaining_time": "0:22:59", "throughput": 19957.86, "total_tokens": 218316544}
|
|
{"current_steps": 69365, "total_steps": 78105, "loss": 0.1062, "lr": 1.8834984708933468e-07, "epoch": 4.440496767172396, "percentage": 88.81, "elapsed_time": "3:02:19", "remaining_time": "0:22:58", "throughput": 19958.03, "total_tokens": 218331200}
|
|
{"current_steps": 69370, "total_steps": 78105, "loss": 0.1513, "lr": 1.8813717356411492e-07, "epoch": 4.440816849113373, "percentage": 88.82, "elapsed_time": "3:02:20", "remaining_time": "0:22:57", "throughput": 19958.21, "total_tokens": 218346368}
|
|
{"current_steps": 69375, "total_steps": 78105, "loss": 0.1303, "lr": 1.879246154812825e-07, "epoch": 4.44113693105435, "percentage": 88.82, "elapsed_time": "3:02:20", "remaining_time": "0:22:56", "throughput": 19958.39, "total_tokens": 218361664}
|
|
{"current_steps": 69380, "total_steps": 78105, "loss": 0.1369, "lr": 1.877121728514522e-07, "epoch": 4.441457012995327, "percentage": 88.83, "elapsed_time": "3:02:21", "remaining_time": "0:22:55", "throughput": 19958.61, "total_tokens": 218377920}
|
|
{"current_steps": 69385, "total_steps": 78105, "loss": 0.1277, "lr": 1.8749984568523089e-07, "epoch": 4.441777094936303, "percentage": 88.84, "elapsed_time": "3:02:22", "remaining_time": "0:22:55", "throughput": 19958.8, "total_tokens": 218393280}
|
|
{"current_steps": 69390, "total_steps": 78105, "loss": 0.0977, "lr": 1.8728763399322252e-07, "epoch": 4.44209717687728, "percentage": 88.84, "elapsed_time": "3:02:22", "remaining_time": "0:22:54", "throughput": 19958.95, "total_tokens": 218407936}
|
|
{"current_steps": 69395, "total_steps": 78105, "loss": 0.1393, "lr": 1.8707553778602282e-07, "epoch": 4.442417258818257, "percentage": 88.85, "elapsed_time": "3:02:23", "remaining_time": "0:22:53", "throughput": 19959.12, "total_tokens": 218422976}
|
|
{"current_steps": 69400, "total_steps": 78105, "loss": 0.1009, "lr": 1.868635570742236e-07, "epoch": 4.442737340759234, "percentage": 88.85, "elapsed_time": "3:02:24", "remaining_time": "0:22:52", "throughput": 19959.32, "total_tokens": 218438656}
|
|
{"current_steps": 69405, "total_steps": 78105, "loss": 0.1584, "lr": 1.8665169186840943e-07, "epoch": 4.443057422700211, "percentage": 88.86, "elapsed_time": "3:02:24", "remaining_time": "0:22:51", "throughput": 19959.52, "total_tokens": 218454080}
|
|
{"current_steps": 69410, "total_steps": 78105, "loss": 0.1521, "lr": 1.8643994217915961e-07, "epoch": 4.443377504641188, "percentage": 88.87, "elapsed_time": "3:02:25", "remaining_time": "0:22:51", "throughput": 19959.68, "total_tokens": 218469056}
|
|
{"current_steps": 69415, "total_steps": 78105, "loss": 0.1504, "lr": 1.8622830801704905e-07, "epoch": 4.443697586582165, "percentage": 88.87, "elapsed_time": "3:02:26", "remaining_time": "0:22:50", "throughput": 19959.84, "total_tokens": 218483776}
|
|
{"current_steps": 69420, "total_steps": 78105, "loss": 0.1207, "lr": 1.86016789392644e-07, "epoch": 4.444017668523142, "percentage": 88.88, "elapsed_time": "3:02:26", "remaining_time": "0:22:49", "throughput": 19960.03, "total_tokens": 218499136}
|
|
{"current_steps": 69425, "total_steps": 78105, "loss": 0.128, "lr": 1.858053863165077e-07, "epoch": 4.444337750464118, "percentage": 88.89, "elapsed_time": "3:02:27", "remaining_time": "0:22:48", "throughput": 19960.17, "total_tokens": 218513088}
|
|
{"current_steps": 69430, "total_steps": 78105, "loss": 0.1121, "lr": 1.8559409879919636e-07, "epoch": 4.444657832405095, "percentage": 88.89, "elapsed_time": "3:02:28", "remaining_time": "0:22:47", "throughput": 19960.35, "total_tokens": 218528384}
|
|
{"current_steps": 69435, "total_steps": 78105, "loss": 0.1311, "lr": 1.8538292685126026e-07, "epoch": 4.444977914346072, "percentage": 88.9, "elapsed_time": "3:02:28", "remaining_time": "0:22:47", "throughput": 19960.51, "total_tokens": 218543040}
|
|
{"current_steps": 69440, "total_steps": 78105, "loss": 0.1148, "lr": 1.8517187048324446e-07, "epoch": 4.445297996287049, "percentage": 88.91, "elapsed_time": "3:02:29", "remaining_time": "0:22:46", "throughput": 19960.7, "total_tokens": 218558400}
|
|
{"current_steps": 69445, "total_steps": 78105, "loss": 0.0942, "lr": 1.8496092970568757e-07, "epoch": 4.445618078228026, "percentage": 88.91, "elapsed_time": "3:02:30", "remaining_time": "0:22:45", "throughput": 19960.94, "total_tokens": 218575232}
|
|
{"current_steps": 69450, "total_steps": 78105, "loss": 0.11, "lr": 1.847501045291239e-07, "epoch": 4.445938160169003, "percentage": 88.92, "elapsed_time": "3:02:30", "remaining_time": "0:22:44", "throughput": 19961.12, "total_tokens": 218590208}
|
|
{"current_steps": 69455, "total_steps": 78105, "loss": 0.1019, "lr": 1.8453939496407975e-07, "epoch": 4.44625824210998, "percentage": 88.93, "elapsed_time": "3:02:31", "remaining_time": "0:22:43", "throughput": 19961.34, "total_tokens": 218606272}
|
|
{"current_steps": 69460, "total_steps": 78105, "loss": 0.1144, "lr": 1.8432880102107754e-07, "epoch": 4.446578324050957, "percentage": 88.93, "elapsed_time": "3:02:32", "remaining_time": "0:22:43", "throughput": 19961.52, "total_tokens": 218621376}
|
|
{"current_steps": 69465, "total_steps": 78105, "loss": 0.141, "lr": 1.8411832271063328e-07, "epoch": 4.446898405991934, "percentage": 88.94, "elapsed_time": "3:02:32", "remaining_time": "0:22:42", "throughput": 19961.71, "total_tokens": 218636416}
|
|
{"current_steps": 69470, "total_steps": 78105, "loss": 0.1628, "lr": 1.8390796004325696e-07, "epoch": 4.44721848793291, "percentage": 88.94, "elapsed_time": "3:02:33", "remaining_time": "0:22:41", "throughput": 19961.96, "total_tokens": 218653440}
|
|
{"current_steps": 69475, "total_steps": 78105, "loss": 0.1397, "lr": 1.8369771302945316e-07, "epoch": 4.447538569873887, "percentage": 88.95, "elapsed_time": "3:02:34", "remaining_time": "0:22:40", "throughput": 19962.15, "total_tokens": 218668800}
|
|
{"current_steps": 69480, "total_steps": 78105, "loss": 0.1364, "lr": 1.834875816797202e-07, "epoch": 4.447858651814864, "percentage": 88.96, "elapsed_time": "3:02:34", "remaining_time": "0:22:39", "throughput": 19962.3, "total_tokens": 218683328}
|
|
{"current_steps": 69485, "total_steps": 78105, "loss": 0.1205, "lr": 1.8327756600455132e-07, "epoch": 4.448178733755841, "percentage": 88.96, "elapsed_time": "3:02:35", "remaining_time": "0:22:39", "throughput": 19962.5, "total_tokens": 218699328}
|
|
{"current_steps": 69490, "total_steps": 78105, "loss": 0.1402, "lr": 1.8306766601443343e-07, "epoch": 4.448498815696818, "percentage": 88.97, "elapsed_time": "3:02:36", "remaining_time": "0:22:38", "throughput": 19962.67, "total_tokens": 218714304}
|
|
{"current_steps": 69495, "total_steps": 78105, "loss": 0.1515, "lr": 1.828578817198473e-07, "epoch": 4.448818897637795, "percentage": 88.98, "elapsed_time": "3:02:36", "remaining_time": "0:22:37", "throughput": 19962.86, "total_tokens": 218729664}
|
|
{"current_steps": 69500, "total_steps": 78105, "loss": 0.1253, "lr": 1.8264821313126958e-07, "epoch": 4.449138979578772, "percentage": 88.98, "elapsed_time": "3:02:37", "remaining_time": "0:22:36", "throughput": 19963.06, "total_tokens": 218745600}
|
|
{"current_steps": 69505, "total_steps": 78105, "loss": 0.1192, "lr": 1.8243866025916907e-07, "epoch": 4.449459061519749, "percentage": 88.99, "elapsed_time": "3:02:38", "remaining_time": "0:22:35", "throughput": 19963.3, "total_tokens": 218762304}
|
|
{"current_steps": 69510, "total_steps": 78105, "loss": 0.153, "lr": 1.8222922311401047e-07, "epoch": 4.449779143460726, "percentage": 89.0, "elapsed_time": "3:02:38", "remaining_time": "0:22:35", "throughput": 19963.45, "total_tokens": 218776960}
|
|
{"current_steps": 69515, "total_steps": 78105, "loss": 0.1357, "lr": 1.8201990170625157e-07, "epoch": 4.450099225401702, "percentage": 89.0, "elapsed_time": "3:02:39", "remaining_time": "0:22:34", "throughput": 19963.68, "total_tokens": 218793280}
|
|
{"current_steps": 69520, "total_steps": 78105, "loss": 0.0949, "lr": 1.818106960463445e-07, "epoch": 4.450419307342679, "percentage": 89.01, "elapsed_time": "3:02:40", "remaining_time": "0:22:33", "throughput": 19963.92, "total_tokens": 218810240}
|
|
{"current_steps": 69525, "total_steps": 78105, "loss": 0.1111, "lr": 1.8160160614473703e-07, "epoch": 4.450739389283656, "percentage": 89.01, "elapsed_time": "3:02:40", "remaining_time": "0:22:32", "throughput": 19964.11, "total_tokens": 218825792}
|
|
{"current_steps": 69530, "total_steps": 78105, "loss": 0.1376, "lr": 1.8139263201186836e-07, "epoch": 4.451059471224633, "percentage": 89.02, "elapsed_time": "3:02:41", "remaining_time": "0:22:31", "throughput": 19964.33, "total_tokens": 218842176}
|
|
{"current_steps": 69535, "total_steps": 78105, "loss": 0.1484, "lr": 1.8118377365817534e-07, "epoch": 4.45137955316561, "percentage": 89.03, "elapsed_time": "3:02:42", "remaining_time": "0:22:31", "throughput": 19964.51, "total_tokens": 218857344}
|
|
{"current_steps": 69540, "total_steps": 78105, "loss": 0.1222, "lr": 1.8097503109408553e-07, "epoch": 4.451699635106587, "percentage": 89.03, "elapsed_time": "3:02:42", "remaining_time": "0:22:30", "throughput": 19964.71, "total_tokens": 218872832}
|
|
{"current_steps": 69545, "total_steps": 78105, "loss": 0.1965, "lr": 1.807664043300239e-07, "epoch": 4.452019717047564, "percentage": 89.04, "elapsed_time": "3:02:43", "remaining_time": "0:22:29", "throughput": 19964.95, "total_tokens": 218889536}
|
|
{"current_steps": 69550, "total_steps": 78105, "loss": 0.1365, "lr": 1.805578933764074e-07, "epoch": 4.452339798988541, "percentage": 89.05, "elapsed_time": "3:02:44", "remaining_time": "0:22:28", "throughput": 19965.13, "total_tokens": 218904640}
|
|
{"current_steps": 69555, "total_steps": 78105, "loss": 0.1715, "lr": 1.8034949824364824e-07, "epoch": 4.452659880929518, "percentage": 89.05, "elapsed_time": "3:02:45", "remaining_time": "0:22:27", "throughput": 19965.33, "total_tokens": 218919872}
|
|
{"current_steps": 69560, "total_steps": 78105, "loss": 0.1422, "lr": 1.8014121894215252e-07, "epoch": 4.4529799628704945, "percentage": 89.06, "elapsed_time": "3:02:45", "remaining_time": "0:22:27", "throughput": 19965.52, "total_tokens": 218935424}
|
|
{"current_steps": 69565, "total_steps": 78105, "loss": 0.0803, "lr": 1.7993305548232003e-07, "epoch": 4.4533000448114715, "percentage": 89.07, "elapsed_time": "3:02:46", "remaining_time": "0:22:26", "throughput": 19965.73, "total_tokens": 218951040}
|
|
{"current_steps": 69570, "total_steps": 78105, "loss": 0.2246, "lr": 1.7972500787454717e-07, "epoch": 4.4536201267524484, "percentage": 89.07, "elapsed_time": "3:02:47", "remaining_time": "0:22:25", "throughput": 19965.96, "total_tokens": 218967680}
|
|
{"current_steps": 69575, "total_steps": 78105, "loss": 0.12, "lr": 1.7951707612922032e-07, "epoch": 4.453940208693425, "percentage": 89.08, "elapsed_time": "3:02:47", "remaining_time": "0:22:24", "throughput": 19966.14, "total_tokens": 218983104}
|
|
{"current_steps": 69580, "total_steps": 78105, "loss": 0.1104, "lr": 1.7930926025672451e-07, "epoch": 4.454260290634402, "percentage": 89.09, "elapsed_time": "3:02:48", "remaining_time": "0:22:23", "throughput": 19966.27, "total_tokens": 218997248}
|
|
{"current_steps": 69585, "total_steps": 78105, "loss": 0.1288, "lr": 1.791015602674359e-07, "epoch": 4.454580372575379, "percentage": 89.09, "elapsed_time": "3:02:49", "remaining_time": "0:22:23", "throughput": 19966.46, "total_tokens": 219012608}
|
|
{"current_steps": 69590, "total_steps": 78105, "loss": 0.1238, "lr": 1.7889397617172649e-07, "epoch": 4.454900454516356, "percentage": 89.1, "elapsed_time": "3:02:49", "remaining_time": "0:22:22", "throughput": 19966.6, "total_tokens": 219027072}
|
|
{"current_steps": 69595, "total_steps": 78105, "loss": 0.1223, "lr": 1.7868650797996156e-07, "epoch": 4.455220536457333, "percentage": 89.1, "elapsed_time": "3:02:50", "remaining_time": "0:22:21", "throughput": 19966.84, "total_tokens": 219043968}
|
|
{"current_steps": 69600, "total_steps": 78105, "loss": 0.1511, "lr": 1.7847915570250117e-07, "epoch": 4.4555406183983095, "percentage": 89.11, "elapsed_time": "3:02:51", "remaining_time": "0:22:20", "throughput": 19967.02, "total_tokens": 219059200}
|
|
{"current_steps": 69605, "total_steps": 78105, "loss": 0.1452, "lr": 1.78271919349699e-07, "epoch": 4.4558607003392865, "percentage": 89.12, "elapsed_time": "3:02:51", "remaining_time": "0:22:19", "throughput": 19967.19, "total_tokens": 219074368}
|
|
{"current_steps": 69610, "total_steps": 78105, "loss": 0.1056, "lr": 1.7806479893190455e-07, "epoch": 4.4561807822802635, "percentage": 89.12, "elapsed_time": "3:02:52", "remaining_time": "0:22:19", "throughput": 19967.41, "total_tokens": 219090176}
|
|
{"current_steps": 69615, "total_steps": 78105, "loss": 0.0991, "lr": 1.7785779445945867e-07, "epoch": 4.4565008642212405, "percentage": 89.13, "elapsed_time": "3:02:53", "remaining_time": "0:22:18", "throughput": 19967.61, "total_tokens": 219106240}
|
|
{"current_steps": 69620, "total_steps": 78105, "loss": 0.1892, "lr": 1.7765090594269924e-07, "epoch": 4.4568209461622175, "percentage": 89.14, "elapsed_time": "3:02:53", "remaining_time": "0:22:17", "throughput": 19967.83, "total_tokens": 219122240}
|
|
{"current_steps": 69625, "total_steps": 78105, "loss": 0.1367, "lr": 1.7744413339195715e-07, "epoch": 4.4571410281031945, "percentage": 89.14, "elapsed_time": "3:02:54", "remaining_time": "0:22:16", "throughput": 19968.02, "total_tokens": 219137152}
|
|
{"current_steps": 69630, "total_steps": 78105, "loss": 0.1387, "lr": 1.772374768175572e-07, "epoch": 4.4574611100441714, "percentage": 89.15, "elapsed_time": "3:02:55", "remaining_time": "0:22:15", "throughput": 19968.18, "total_tokens": 219152128}
|
|
{"current_steps": 69635, "total_steps": 78105, "loss": 0.1311, "lr": 1.7703093622981865e-07, "epoch": 4.457781191985148, "percentage": 89.16, "elapsed_time": "3:02:55", "remaining_time": "0:22:15", "throughput": 19968.36, "total_tokens": 219167552}
|
|
{"current_steps": 69640, "total_steps": 78105, "loss": 0.1422, "lr": 1.7682451163905517e-07, "epoch": 4.458101273926125, "percentage": 89.16, "elapsed_time": "3:02:56", "remaining_time": "0:22:14", "throughput": 19968.6, "total_tokens": 219184576}
|
|
{"current_steps": 69645, "total_steps": 78105, "loss": 0.1922, "lr": 1.766182030555752e-07, "epoch": 4.458421355867102, "percentage": 89.17, "elapsed_time": "3:02:57", "remaining_time": "0:22:13", "throughput": 19968.83, "total_tokens": 219201024}
|
|
{"current_steps": 69650, "total_steps": 78105, "loss": 0.1521, "lr": 1.764120104896791e-07, "epoch": 4.4587414378080785, "percentage": 89.17, "elapsed_time": "3:02:57", "remaining_time": "0:22:12", "throughput": 19969.05, "total_tokens": 219217216}
|
|
{"current_steps": 69655, "total_steps": 78105, "loss": 0.0957, "lr": 1.7620593395166474e-07, "epoch": 4.4590615197490555, "percentage": 89.18, "elapsed_time": "3:02:58", "remaining_time": "0:22:11", "throughput": 19969.3, "total_tokens": 219234240}
|
|
{"current_steps": 69660, "total_steps": 78105, "loss": 0.1359, "lr": 1.7599997345182195e-07, "epoch": 4.4593816016900325, "percentage": 89.19, "elapsed_time": "3:02:59", "remaining_time": "0:22:11", "throughput": 19969.5, "total_tokens": 219250112}
|
|
{"current_steps": 69665, "total_steps": 78105, "loss": 0.129, "lr": 1.7579412900043501e-07, "epoch": 4.4597016836310095, "percentage": 89.19, "elapsed_time": "3:02:59", "remaining_time": "0:22:10", "throughput": 19969.66, "total_tokens": 219264896}
|
|
{"current_steps": 69670, "total_steps": 78105, "loss": 0.1221, "lr": 1.7558840060778292e-07, "epoch": 4.4600217655719865, "percentage": 89.2, "elapsed_time": "3:03:00", "remaining_time": "0:22:09", "throughput": 19969.86, "total_tokens": 219280384}
|
|
{"current_steps": 69675, "total_steps": 78105, "loss": 0.1147, "lr": 1.753827882841383e-07, "epoch": 4.4603418475129635, "percentage": 89.21, "elapsed_time": "3:03:01", "remaining_time": "0:22:08", "throughput": 19970.09, "total_tokens": 219296576}
|
|
{"current_steps": 69680, "total_steps": 78105, "loss": 0.1177, "lr": 1.7517729203976958e-07, "epoch": 4.4606619294539405, "percentage": 89.21, "elapsed_time": "3:03:01", "remaining_time": "0:22:07", "throughput": 19970.26, "total_tokens": 219311232}
|
|
{"current_steps": 69685, "total_steps": 78105, "loss": 0.1277, "lr": 1.7497191188493662e-07, "epoch": 4.4609820113949175, "percentage": 89.22, "elapsed_time": "3:03:02", "remaining_time": "0:22:07", "throughput": 19970.43, "total_tokens": 219326592}
|
|
{"current_steps": 69690, "total_steps": 78105, "loss": 0.124, "lr": 1.7476664782989594e-07, "epoch": 4.461302093335894, "percentage": 89.23, "elapsed_time": "3:03:03", "remaining_time": "0:22:06", "throughput": 19970.63, "total_tokens": 219342272}
|
|
{"current_steps": 69695, "total_steps": 78105, "loss": 0.1427, "lr": 1.745614998848974e-07, "epoch": 4.4616221752768705, "percentage": 89.23, "elapsed_time": "3:03:03", "remaining_time": "0:22:05", "throughput": 19970.84, "total_tokens": 219357952}
|
|
{"current_steps": 69700, "total_steps": 78105, "loss": 0.1702, "lr": 1.7435646806018448e-07, "epoch": 4.4619422572178475, "percentage": 89.24, "elapsed_time": "3:03:04", "remaining_time": "0:22:04", "throughput": 19971.07, "total_tokens": 219374720}
|
|
{"current_steps": 69705, "total_steps": 78105, "loss": 0.1301, "lr": 1.741515523659959e-07, "epoch": 4.4622623391588245, "percentage": 89.25, "elapsed_time": "3:03:05", "remaining_time": "0:22:03", "throughput": 19971.27, "total_tokens": 219390400}
|
|
{"current_steps": 69710, "total_steps": 78105, "loss": 0.1124, "lr": 1.7394675281256407e-07, "epoch": 4.4625824210998015, "percentage": 89.25, "elapsed_time": "3:03:05", "remaining_time": "0:22:03", "throughput": 19971.5, "total_tokens": 219406656}
|
|
{"current_steps": 69715, "total_steps": 78105, "loss": 0.1343, "lr": 1.7374206941011523e-07, "epoch": 4.4629025030407785, "percentage": 89.26, "elapsed_time": "3:03:06", "remaining_time": "0:22:02", "throughput": 19971.69, "total_tokens": 219421824}
|
|
{"current_steps": 69720, "total_steps": 78105, "loss": 0.119, "lr": 1.7353750216887039e-07, "epoch": 4.4632225849817555, "percentage": 89.26, "elapsed_time": "3:03:07", "remaining_time": "0:22:01", "throughput": 19971.89, "total_tokens": 219437440}
|
|
{"current_steps": 69725, "total_steps": 78105, "loss": 0.1894, "lr": 1.7333305109904413e-07, "epoch": 4.4635426669227325, "percentage": 89.27, "elapsed_time": "3:03:07", "remaining_time": "0:22:00", "throughput": 19972.06, "total_tokens": 219452224}
|
|
{"current_steps": 69730, "total_steps": 78105, "loss": 0.1614, "lr": 1.731287162108472e-07, "epoch": 4.4638627488637095, "percentage": 89.28, "elapsed_time": "3:03:08", "remaining_time": "0:21:59", "throughput": 19972.21, "total_tokens": 219466880}
|
|
{"current_steps": 69735, "total_steps": 78105, "loss": 0.1149, "lr": 1.7292449751448087e-07, "epoch": 4.464182830804686, "percentage": 89.28, "elapsed_time": "3:03:09", "remaining_time": "0:21:58", "throughput": 19972.4, "total_tokens": 219482496}
|
|
{"current_steps": 69740, "total_steps": 78105, "loss": 0.1285, "lr": 1.7272039502014449e-07, "epoch": 4.464502912745663, "percentage": 89.29, "elapsed_time": "3:03:09", "remaining_time": "0:21:58", "throughput": 19972.61, "total_tokens": 219498368}
|
|
{"current_steps": 69745, "total_steps": 78105, "loss": 0.1362, "lr": 1.7251640873802905e-07, "epoch": 4.46482299468664, "percentage": 89.3, "elapsed_time": "3:03:10", "remaining_time": "0:21:57", "throughput": 19972.78, "total_tokens": 219513408}
|
|
{"current_steps": 69750, "total_steps": 78105, "loss": 0.1121, "lr": 1.7231253867832032e-07, "epoch": 4.465143076627617, "percentage": 89.3, "elapsed_time": "3:03:11", "remaining_time": "0:21:56", "throughput": 19972.97, "total_tokens": 219529088}
|
|
{"current_steps": 69755, "total_steps": 78105, "loss": 0.1074, "lr": 1.7210878485119985e-07, "epoch": 4.4654631585685935, "percentage": 89.31, "elapsed_time": "3:03:11", "remaining_time": "0:21:55", "throughput": 19973.16, "total_tokens": 219544576}
|
|
{"current_steps": 69760, "total_steps": 78105, "loss": 0.1674, "lr": 1.7190514726684037e-07, "epoch": 4.4657832405095705, "percentage": 89.32, "elapsed_time": "3:03:12", "remaining_time": "0:21:54", "throughput": 19973.34, "total_tokens": 219560064}
|
|
{"current_steps": 69765, "total_steps": 78105, "loss": 0.1022, "lr": 1.7170162593541178e-07, "epoch": 4.4661033224505475, "percentage": 89.32, "elapsed_time": "3:03:13", "remaining_time": "0:21:54", "throughput": 19973.52, "total_tokens": 219575424}
|
|
{"current_steps": 69770, "total_steps": 78105, "loss": 0.1462, "lr": 1.7149822086707567e-07, "epoch": 4.4664234043915245, "percentage": 89.33, "elapsed_time": "3:03:14", "remaining_time": "0:21:53", "throughput": 19973.77, "total_tokens": 219592256}
|
|
{"current_steps": 69775, "total_steps": 78105, "loss": 0.097, "lr": 1.7129493207199006e-07, "epoch": 4.4667434863325015, "percentage": 89.33, "elapsed_time": "3:03:14", "remaining_time": "0:21:52", "throughput": 19973.98, "total_tokens": 219608320}
|
|
{"current_steps": 69780, "total_steps": 78105, "loss": 0.1632, "lr": 1.7109175956030595e-07, "epoch": 4.467063568273478, "percentage": 89.34, "elapsed_time": "3:03:15", "remaining_time": "0:21:51", "throughput": 19974.19, "total_tokens": 219624256}
|
|
{"current_steps": 69785, "total_steps": 78105, "loss": 0.1218, "lr": 1.7088870334216827e-07, "epoch": 4.467383650214455, "percentage": 89.35, "elapsed_time": "3:03:16", "remaining_time": "0:21:50", "throughput": 19974.35, "total_tokens": 219638656}
|
|
{"current_steps": 69790, "total_steps": 78105, "loss": 0.1249, "lr": 1.7068576342771703e-07, "epoch": 4.467703732155432, "percentage": 89.35, "elapsed_time": "3:03:16", "remaining_time": "0:21:50", "throughput": 19974.52, "total_tokens": 219653632}
|
|
{"current_steps": 69795, "total_steps": 78105, "loss": 0.1586, "lr": 1.7048293982708518e-07, "epoch": 4.468023814096409, "percentage": 89.36, "elapsed_time": "3:03:17", "remaining_time": "0:21:49", "throughput": 19974.73, "total_tokens": 219669760}
|
|
{"current_steps": 69800, "total_steps": 78105, "loss": 0.1466, "lr": 1.7028023255040244e-07, "epoch": 4.468343896037386, "percentage": 89.37, "elapsed_time": "3:03:18", "remaining_time": "0:21:48", "throughput": 19974.91, "total_tokens": 219685184}
|
|
{"current_steps": 69805, "total_steps": 78105, "loss": 0.1214, "lr": 1.7007764160778872e-07, "epoch": 4.468663977978363, "percentage": 89.37, "elapsed_time": "3:03:18", "remaining_time": "0:21:47", "throughput": 19975.18, "total_tokens": 219702720}
|
|
{"current_steps": 69810, "total_steps": 78105, "loss": 0.1596, "lr": 1.6987516700936207e-07, "epoch": 4.46898405991934, "percentage": 89.38, "elapsed_time": "3:03:19", "remaining_time": "0:21:46", "throughput": 19975.45, "total_tokens": 219720256}
|
|
{"current_steps": 69815, "total_steps": 78105, "loss": 0.1266, "lr": 1.6967280876523217e-07, "epoch": 4.4693041418603165, "percentage": 89.39, "elapsed_time": "3:03:20", "remaining_time": "0:21:46", "throughput": 19975.64, "total_tokens": 219735872}
|
|
{"current_steps": 69820, "total_steps": 78105, "loss": 0.1267, "lr": 1.694705668855043e-07, "epoch": 4.4696242238012935, "percentage": 89.39, "elapsed_time": "3:03:20", "remaining_time": "0:21:45", "throughput": 19975.84, "total_tokens": 219751680}
|
|
{"current_steps": 69825, "total_steps": 78105, "loss": 0.144, "lr": 1.692684413802767e-07, "epoch": 4.46994430574227, "percentage": 89.4, "elapsed_time": "3:03:21", "remaining_time": "0:21:44", "throughput": 19976.02, "total_tokens": 219767040}
|
|
{"current_steps": 69830, "total_steps": 78105, "loss": 0.1171, "lr": 1.6906643225964304e-07, "epoch": 4.470264387683247, "percentage": 89.41, "elapsed_time": "3:03:22", "remaining_time": "0:21:43", "throughput": 19976.28, "total_tokens": 219784384}
|
|
{"current_steps": 69835, "total_steps": 78105, "loss": 0.1216, "lr": 1.6886453953368964e-07, "epoch": 4.470584469624224, "percentage": 89.41, "elapsed_time": "3:03:22", "remaining_time": "0:21:42", "throughput": 19976.47, "total_tokens": 219800192}
|
|
{"current_steps": 69840, "total_steps": 78105, "loss": 0.114, "lr": 1.6866276321249986e-07, "epoch": 4.470904551565201, "percentage": 89.42, "elapsed_time": "3:03:23", "remaining_time": "0:21:42", "throughput": 19976.69, "total_tokens": 219816448}
|
|
{"current_steps": 69845, "total_steps": 78105, "loss": 0.156, "lr": 1.684611033061473e-07, "epoch": 4.471224633506178, "percentage": 89.42, "elapsed_time": "3:03:24", "remaining_time": "0:21:41", "throughput": 19976.88, "total_tokens": 219831808}
|
|
{"current_steps": 69850, "total_steps": 78105, "loss": 0.1274, "lr": 1.6825955982470306e-07, "epoch": 4.471544715447155, "percentage": 89.43, "elapsed_time": "3:03:24", "remaining_time": "0:21:40", "throughput": 19977.07, "total_tokens": 219847168}
|
|
{"current_steps": 69855, "total_steps": 78105, "loss": 0.1213, "lr": 1.6805813277823075e-07, "epoch": 4.471864797388132, "percentage": 89.44, "elapsed_time": "3:03:25", "remaining_time": "0:21:39", "throughput": 19977.25, "total_tokens": 219862272}
|
|
{"current_steps": 69860, "total_steps": 78105, "loss": 0.1077, "lr": 1.6785682217678844e-07, "epoch": 4.472184879329109, "percentage": 89.44, "elapsed_time": "3:03:26", "remaining_time": "0:21:38", "throughput": 19977.46, "total_tokens": 219878272}
|
|
{"current_steps": 69865, "total_steps": 78105, "loss": 0.1285, "lr": 1.6765562803042924e-07, "epoch": 4.472504961270085, "percentage": 89.45, "elapsed_time": "3:03:26", "remaining_time": "0:21:38", "throughput": 19977.61, "total_tokens": 219892992}
|
|
{"current_steps": 69870, "total_steps": 78105, "loss": 0.1122, "lr": 1.6745455034919838e-07, "epoch": 4.472825043211062, "percentage": 89.46, "elapsed_time": "3:03:27", "remaining_time": "0:21:37", "throughput": 19977.81, "total_tokens": 219908736}
|
|
{"current_steps": 69875, "total_steps": 78105, "loss": 0.1247, "lr": 1.6725358914313844e-07, "epoch": 4.473145125152039, "percentage": 89.46, "elapsed_time": "3:03:28", "remaining_time": "0:21:36", "throughput": 19977.99, "total_tokens": 219923840}
|
|
{"current_steps": 69880, "total_steps": 78105, "loss": 0.1047, "lr": 1.6705274442228275e-07, "epoch": 4.473465207093016, "percentage": 89.47, "elapsed_time": "3:03:28", "remaining_time": "0:21:35", "throughput": 19978.19, "total_tokens": 219939392}
|
|
{"current_steps": 69885, "total_steps": 78105, "loss": 0.1226, "lr": 1.6685201619666164e-07, "epoch": 4.473785289033993, "percentage": 89.48, "elapsed_time": "3:03:29", "remaining_time": "0:21:34", "throughput": 19978.37, "total_tokens": 219954496}
|
|
{"current_steps": 69890, "total_steps": 78105, "loss": 0.1456, "lr": 1.6665140447629762e-07, "epoch": 4.47410537097497, "percentage": 89.48, "elapsed_time": "3:03:30", "remaining_time": "0:21:34", "throughput": 19978.59, "total_tokens": 219970752}
|
|
{"current_steps": 69895, "total_steps": 78105, "loss": 0.1304, "lr": 1.6645090927120854e-07, "epoch": 4.474425452915947, "percentage": 89.49, "elapsed_time": "3:03:30", "remaining_time": "0:21:33", "throughput": 19978.75, "total_tokens": 219985472}
|
|
{"current_steps": 69900, "total_steps": 78105, "loss": 0.1338, "lr": 1.6625053059140612e-07, "epoch": 4.474745534856924, "percentage": 89.49, "elapsed_time": "3:03:31", "remaining_time": "0:21:32", "throughput": 19978.88, "total_tokens": 219999616}
|
|
{"current_steps": 69905, "total_steps": 78105, "loss": 0.0955, "lr": 1.660502684468962e-07, "epoch": 4.475065616797901, "percentage": 89.5, "elapsed_time": "3:03:32", "remaining_time": "0:21:31", "throughput": 19979.04, "total_tokens": 220014144}
|
|
{"current_steps": 69910, "total_steps": 78105, "loss": 0.1256, "lr": 1.6585012284767858e-07, "epoch": 4.475385698738878, "percentage": 89.51, "elapsed_time": "3:03:32", "remaining_time": "0:21:30", "throughput": 19979.22, "total_tokens": 220029696}
|
|
{"current_steps": 69915, "total_steps": 78105, "loss": 0.1082, "lr": 1.6565009380374752e-07, "epoch": 4.475705780679854, "percentage": 89.51, "elapsed_time": "3:03:33", "remaining_time": "0:21:30", "throughput": 19979.44, "total_tokens": 220045760}
|
|
{"current_steps": 69920, "total_steps": 78105, "loss": 0.1459, "lr": 1.6545018132509194e-07, "epoch": 4.476025862620831, "percentage": 89.52, "elapsed_time": "3:03:34", "remaining_time": "0:21:29", "throughput": 19979.64, "total_tokens": 220061696}
|
|
{"current_steps": 69925, "total_steps": 78105, "loss": 0.1237, "lr": 1.6525038542169386e-07, "epoch": 4.476345944561808, "percentage": 89.53, "elapsed_time": "3:03:34", "remaining_time": "0:21:28", "throughput": 19979.83, "total_tokens": 220077184}
|
|
{"current_steps": 69930, "total_steps": 78105, "loss": 0.1802, "lr": 1.6505070610353057e-07, "epoch": 4.476666026502785, "percentage": 89.53, "elapsed_time": "3:03:35", "remaining_time": "0:21:27", "throughput": 19980.05, "total_tokens": 220093440}
|
|
{"current_steps": 69935, "total_steps": 78105, "loss": 0.1719, "lr": 1.6485114338057245e-07, "epoch": 4.476986108443762, "percentage": 89.54, "elapsed_time": "3:03:36", "remaining_time": "0:21:26", "throughput": 19980.27, "total_tokens": 220110080}
|
|
{"current_steps": 69940, "total_steps": 78105, "loss": 0.1265, "lr": 1.6465169726278485e-07, "epoch": 4.477306190384739, "percentage": 89.55, "elapsed_time": "3:03:37", "remaining_time": "0:21:26", "throughput": 19980.49, "total_tokens": 220126080}
|
|
{"current_steps": 69945, "total_steps": 78105, "loss": 0.1821, "lr": 1.644523677601273e-07, "epoch": 4.477626272325716, "percentage": 89.55, "elapsed_time": "3:03:37", "remaining_time": "0:21:25", "throughput": 19980.65, "total_tokens": 220140864}
|
|
{"current_steps": 69950, "total_steps": 78105, "loss": 0.1111, "lr": 1.6425315488255294e-07, "epoch": 4.477946354266693, "percentage": 89.56, "elapsed_time": "3:03:38", "remaining_time": "0:21:24", "throughput": 19980.84, "total_tokens": 220156544}
|
|
{"current_steps": 69955, "total_steps": 78105, "loss": 0.1253, "lr": 1.640540586400094e-07, "epoch": 4.478266436207669, "percentage": 89.57, "elapsed_time": "3:03:39", "remaining_time": "0:21:23", "throughput": 19981.0, "total_tokens": 220171840}
|
|
{"current_steps": 69960, "total_steps": 78105, "loss": 0.0854, "lr": 1.6385507904243924e-07, "epoch": 4.478586518148646, "percentage": 89.57, "elapsed_time": "3:03:39", "remaining_time": "0:21:22", "throughput": 19981.17, "total_tokens": 220186752}
|
|
{"current_steps": 69965, "total_steps": 78105, "loss": 0.0735, "lr": 1.6365621609977734e-07, "epoch": 4.478906600089623, "percentage": 89.58, "elapsed_time": "3:03:40", "remaining_time": "0:21:22", "throughput": 19981.32, "total_tokens": 220201088}
|
|
{"current_steps": 69970, "total_steps": 78105, "loss": 0.124, "lr": 1.634574698219546e-07, "epoch": 4.4792266820306, "percentage": 89.58, "elapsed_time": "3:03:41", "remaining_time": "0:21:21", "throughput": 19981.52, "total_tokens": 220216768}
|
|
{"current_steps": 69975, "total_steps": 78105, "loss": 0.1255, "lr": 1.6325884021889533e-07, "epoch": 4.479546763971577, "percentage": 89.59, "elapsed_time": "3:03:41", "remaining_time": "0:21:20", "throughput": 19981.69, "total_tokens": 220231936}
|
|
{"current_steps": 69980, "total_steps": 78105, "loss": 0.1017, "lr": 1.6306032730051742e-07, "epoch": 4.479866845912554, "percentage": 89.6, "elapsed_time": "3:03:42", "remaining_time": "0:21:19", "throughput": 19981.86, "total_tokens": 220246592}
|
|
{"current_steps": 69985, "total_steps": 78105, "loss": 0.1462, "lr": 1.6286193107673487e-07, "epoch": 4.480186927853531, "percentage": 89.6, "elapsed_time": "3:03:42", "remaining_time": "0:21:18", "throughput": 19982.05, "total_tokens": 220261888}
|
|
{"current_steps": 69990, "total_steps": 78105, "loss": 0.1161, "lr": 1.6266365155745312e-07, "epoch": 4.480507009794508, "percentage": 89.61, "elapsed_time": "3:03:43", "remaining_time": "0:21:18", "throughput": 19982.21, "total_tokens": 220276800}
|
|
{"current_steps": 69995, "total_steps": 78105, "loss": 0.1647, "lr": 1.624654887525745e-07, "epoch": 4.480827091735485, "percentage": 89.62, "elapsed_time": "3:03:44", "remaining_time": "0:21:17", "throughput": 19982.4, "total_tokens": 220292160}
|
|
{"current_steps": 70000, "total_steps": 78105, "loss": 0.1887, "lr": 1.6226744267199308e-07, "epoch": 4.481147173676461, "percentage": 89.62, "elapsed_time": "3:03:44", "remaining_time": "0:21:16", "throughput": 19982.59, "total_tokens": 220307648}
|
|
{"current_steps": 70005, "total_steps": 78105, "loss": 0.1344, "lr": 1.6206951332559894e-07, "epoch": 4.481467255617438, "percentage": 89.63, "elapsed_time": "3:03:45", "remaining_time": "0:21:15", "throughput": 19982.79, "total_tokens": 220323648}
|
|
{"current_steps": 70010, "total_steps": 78105, "loss": 0.1439, "lr": 1.6187170072327562e-07, "epoch": 4.481787337558415, "percentage": 89.64, "elapsed_time": "3:03:46", "remaining_time": "0:21:14", "throughput": 19983.01, "total_tokens": 220339968}
|
|
{"current_steps": 70015, "total_steps": 78105, "loss": 0.1211, "lr": 1.6167400487490047e-07, "epoch": 4.482107419499392, "percentage": 89.64, "elapsed_time": "3:03:47", "remaining_time": "0:21:14", "throughput": 19983.18, "total_tokens": 220355136}
|
|
{"current_steps": 70020, "total_steps": 78105, "loss": 0.1041, "lr": 1.614764257903459e-07, "epoch": 4.482427501440369, "percentage": 89.65, "elapsed_time": "3:03:47", "remaining_time": "0:21:13", "throughput": 19983.36, "total_tokens": 220370304}
|
|
{"current_steps": 70025, "total_steps": 78105, "loss": 0.1214, "lr": 1.6127896347947702e-07, "epoch": 4.482747583381346, "percentage": 89.65, "elapsed_time": "3:03:48", "remaining_time": "0:21:12", "throughput": 19983.57, "total_tokens": 220386240}
|
|
{"current_steps": 70030, "total_steps": 78105, "loss": 0.157, "lr": 1.6108161795215572e-07, "epoch": 4.483067665322323, "percentage": 89.66, "elapsed_time": "3:03:49", "remaining_time": "0:21:11", "throughput": 19983.76, "total_tokens": 220401600}
|
|
{"current_steps": 70035, "total_steps": 78105, "loss": 0.1292, "lr": 1.6088438921823463e-07, "epoch": 4.4833877472633, "percentage": 89.67, "elapsed_time": "3:03:49", "remaining_time": "0:21:10", "throughput": 19983.97, "total_tokens": 220417728}
|
|
{"current_steps": 70040, "total_steps": 78105, "loss": 0.1869, "lr": 1.6068727728756368e-07, "epoch": 4.483707829204277, "percentage": 89.67, "elapsed_time": "3:03:50", "remaining_time": "0:21:10", "throughput": 19984.24, "total_tokens": 220435200}
|
|
{"current_steps": 70045, "total_steps": 78105, "loss": 0.1442, "lr": 1.6049028216998468e-07, "epoch": 4.484027911145253, "percentage": 89.68, "elapsed_time": "3:03:51", "remaining_time": "0:21:09", "throughput": 19984.39, "total_tokens": 220449600}
|
|
{"current_steps": 70050, "total_steps": 78105, "loss": 0.1654, "lr": 1.6029340387533539e-07, "epoch": 4.48434799308623, "percentage": 89.69, "elapsed_time": "3:03:51", "remaining_time": "0:21:08", "throughput": 19984.58, "total_tokens": 220465280}
|
|
{"current_steps": 70055, "total_steps": 78105, "loss": 0.1323, "lr": 1.6009664241344618e-07, "epoch": 4.484668075027207, "percentage": 89.69, "elapsed_time": "3:03:52", "remaining_time": "0:21:07", "throughput": 19984.79, "total_tokens": 220481472}
|
|
{"current_steps": 70060, "total_steps": 78105, "loss": 0.1493, "lr": 1.5989999779414234e-07, "epoch": 4.484988156968184, "percentage": 89.7, "elapsed_time": "3:03:53", "remaining_time": "0:21:06", "throughput": 19984.99, "total_tokens": 220497344}
|
|
{"current_steps": 70065, "total_steps": 78105, "loss": 0.1714, "lr": 1.5970347002724346e-07, "epoch": 4.485308238909161, "percentage": 89.71, "elapsed_time": "3:03:53", "remaining_time": "0:21:06", "throughput": 19985.22, "total_tokens": 220513984}
|
|
{"current_steps": 70070, "total_steps": 78105, "loss": 0.1036, "lr": 1.5950705912256366e-07, "epoch": 4.485628320850138, "percentage": 89.71, "elapsed_time": "3:03:54", "remaining_time": "0:21:05", "throughput": 19985.41, "total_tokens": 220529472}
|
|
{"current_steps": 70075, "total_steps": 78105, "loss": 0.173, "lr": 1.5931076508990951e-07, "epoch": 4.485948402791115, "percentage": 89.72, "elapsed_time": "3:03:55", "remaining_time": "0:21:04", "throughput": 19985.6, "total_tokens": 220545344}
|
|
{"current_steps": 70080, "total_steps": 78105, "loss": 0.1361, "lr": 1.591145879390843e-07, "epoch": 4.486268484732092, "percentage": 89.73, "elapsed_time": "3:03:55", "remaining_time": "0:21:03", "throughput": 19985.83, "total_tokens": 220561920}
|
|
{"current_steps": 70085, "total_steps": 78105, "loss": 0.1346, "lr": 1.5891852767988243e-07, "epoch": 4.486588566673069, "percentage": 89.73, "elapsed_time": "3:03:56", "remaining_time": "0:21:02", "throughput": 19986.01, "total_tokens": 220576960}
|
|
{"current_steps": 70090, "total_steps": 78105, "loss": 0.1342, "lr": 1.5872258432209549e-07, "epoch": 4.486908648614045, "percentage": 89.74, "elapsed_time": "3:03:57", "remaining_time": "0:21:02", "throughput": 19986.19, "total_tokens": 220592192}
|
|
{"current_steps": 70095, "total_steps": 78105, "loss": 0.1167, "lr": 1.585267578755073e-07, "epoch": 4.487228730555022, "percentage": 89.74, "elapsed_time": "3:03:57", "remaining_time": "0:21:01", "throughput": 19986.42, "total_tokens": 220608640}
|
|
{"current_steps": 70100, "total_steps": 78105, "loss": 0.1202, "lr": 1.5833104834989648e-07, "epoch": 4.487548812495999, "percentage": 89.75, "elapsed_time": "3:03:58", "remaining_time": "0:21:00", "throughput": 19986.64, "total_tokens": 220625024}
|
|
{"current_steps": 70105, "total_steps": 78105, "loss": 0.1104, "lr": 1.5813545575503632e-07, "epoch": 4.487868894436976, "percentage": 89.76, "elapsed_time": "3:03:59", "remaining_time": "0:20:59", "throughput": 19986.8, "total_tokens": 220639744}
|
|
{"current_steps": 70110, "total_steps": 78105, "loss": 0.0776, "lr": 1.5793998010069255e-07, "epoch": 4.488188976377953, "percentage": 89.76, "elapsed_time": "3:03:59", "remaining_time": "0:20:58", "throughput": 19986.98, "total_tokens": 220655040}
|
|
{"current_steps": 70115, "total_steps": 78105, "loss": 0.0842, "lr": 1.5774462139662716e-07, "epoch": 4.48850905831893, "percentage": 89.77, "elapsed_time": "3:04:00", "remaining_time": "0:20:58", "throughput": 19987.24, "total_tokens": 220672384}
|
|
{"current_steps": 70120, "total_steps": 78105, "loss": 0.1939, "lr": 1.5754937965259482e-07, "epoch": 4.488829140259907, "percentage": 89.78, "elapsed_time": "3:04:01", "remaining_time": "0:20:57", "throughput": 19987.43, "total_tokens": 220687744}
|
|
{"current_steps": 70125, "total_steps": 78105, "loss": 0.1449, "lr": 1.573542548783452e-07, "epoch": 4.489149222200884, "percentage": 89.78, "elapsed_time": "3:04:01", "remaining_time": "0:20:56", "throughput": 19987.61, "total_tokens": 220702848}
|
|
{"current_steps": 70130, "total_steps": 78105, "loss": 0.143, "lr": 1.5715924708362196e-07, "epoch": 4.48946930414186, "percentage": 89.79, "elapsed_time": "3:04:02", "remaining_time": "0:20:55", "throughput": 19987.82, "total_tokens": 220718720}
|
|
{"current_steps": 70135, "total_steps": 78105, "loss": 0.1038, "lr": 1.5696435627816225e-07, "epoch": 4.489789386082837, "percentage": 89.8, "elapsed_time": "3:04:03", "remaining_time": "0:20:54", "throughput": 19988.04, "total_tokens": 220735104}
|
|
{"current_steps": 70140, "total_steps": 78105, "loss": 0.1334, "lr": 1.5676958247169805e-07, "epoch": 4.490109468023814, "percentage": 89.8, "elapsed_time": "3:04:04", "remaining_time": "0:20:54", "throughput": 19988.2, "total_tokens": 220749888}
|
|
{"current_steps": 70145, "total_steps": 78105, "loss": 0.1412, "lr": 1.5657492567395516e-07, "epoch": 4.490429549964791, "percentage": 89.81, "elapsed_time": "3:04:04", "remaining_time": "0:20:53", "throughput": 19988.37, "total_tokens": 220764864}
|
|
{"current_steps": 70150, "total_steps": 78105, "loss": 0.1492, "lr": 1.5638038589465443e-07, "epoch": 4.490749631905768, "percentage": 89.81, "elapsed_time": "3:04:05", "remaining_time": "0:20:52", "throughput": 19988.57, "total_tokens": 220780672}
|
|
{"current_steps": 70155, "total_steps": 78105, "loss": 0.1121, "lr": 1.5618596314351003e-07, "epoch": 4.491069713846745, "percentage": 89.82, "elapsed_time": "3:04:06", "remaining_time": "0:20:51", "throughput": 19988.74, "total_tokens": 220795712}
|
|
{"current_steps": 70160, "total_steps": 78105, "loss": 0.1244, "lr": 1.5599165743022975e-07, "epoch": 4.491389795787722, "percentage": 89.83, "elapsed_time": "3:04:06", "remaining_time": "0:20:50", "throughput": 19988.9, "total_tokens": 220810624}
|
|
{"current_steps": 70165, "total_steps": 78105, "loss": 0.1525, "lr": 1.5579746876451696e-07, "epoch": 4.491709877728699, "percentage": 89.83, "elapsed_time": "3:04:07", "remaining_time": "0:20:50", "throughput": 19989.09, "total_tokens": 220826048}
|
|
{"current_steps": 70170, "total_steps": 78105, "loss": 0.1572, "lr": 1.5560339715606776e-07, "epoch": 4.492029959669676, "percentage": 89.84, "elapsed_time": "3:04:08", "remaining_time": "0:20:49", "throughput": 19989.38, "total_tokens": 220844352}
|
|
{"current_steps": 70175, "total_steps": 78105, "loss": 0.1244, "lr": 1.5540944261457357e-07, "epoch": 4.492350041610653, "percentage": 89.85, "elapsed_time": "3:04:08", "remaining_time": "0:20:48", "throughput": 19989.55, "total_tokens": 220859712}
|
|
{"current_steps": 70180, "total_steps": 78105, "loss": 0.0878, "lr": 1.5521560514971916e-07, "epoch": 4.492670123551629, "percentage": 89.85, "elapsed_time": "3:04:09", "remaining_time": "0:20:47", "throughput": 19989.73, "total_tokens": 220874880}
|
|
{"current_steps": 70185, "total_steps": 78105, "loss": 0.2133, "lr": 1.5502188477118345e-07, "epoch": 4.492990205492606, "percentage": 89.86, "elapsed_time": "3:04:10", "remaining_time": "0:20:46", "throughput": 19989.93, "total_tokens": 220891136}
|
|
{"current_steps": 70190, "total_steps": 78105, "loss": 0.1168, "lr": 1.548282814886412e-07, "epoch": 4.493310287433583, "percentage": 89.87, "elapsed_time": "3:04:10", "remaining_time": "0:20:46", "throughput": 19990.12, "total_tokens": 220906624}
|
|
{"current_steps": 70195, "total_steps": 78105, "loss": 0.1324, "lr": 1.546347953117583e-07, "epoch": 4.49363036937456, "percentage": 89.87, "elapsed_time": "3:04:11", "remaining_time": "0:20:45", "throughput": 19990.35, "total_tokens": 220923200}
|
|
{"current_steps": 70200, "total_steps": 78105, "loss": 0.1657, "lr": 1.5444142625019753e-07, "epoch": 4.493950451315537, "percentage": 89.88, "elapsed_time": "3:04:12", "remaining_time": "0:20:44", "throughput": 19990.59, "total_tokens": 220939776}
|
|
{"current_steps": 70205, "total_steps": 78105, "loss": 0.151, "lr": 1.5424817431361428e-07, "epoch": 4.494270533256514, "percentage": 89.89, "elapsed_time": "3:04:12", "remaining_time": "0:20:43", "throughput": 19990.74, "total_tokens": 220954240}
|
|
{"current_steps": 70210, "total_steps": 78105, "loss": 0.1397, "lr": 1.540550395116583e-07, "epoch": 4.494590615197491, "percentage": 89.89, "elapsed_time": "3:04:13", "remaining_time": "0:20:42", "throughput": 19990.89, "total_tokens": 220968640}
|
|
{"current_steps": 70215, "total_steps": 78105, "loss": 0.1244, "lr": 1.5386202185397465e-07, "epoch": 4.494910697138468, "percentage": 89.9, "elapsed_time": "3:04:14", "remaining_time": "0:20:42", "throughput": 19991.12, "total_tokens": 220985216}
|
|
{"current_steps": 70220, "total_steps": 78105, "loss": 0.1985, "lr": 1.5366912135020062e-07, "epoch": 4.495230779079444, "percentage": 89.9, "elapsed_time": "3:04:14", "remaining_time": "0:20:41", "throughput": 19991.33, "total_tokens": 221001408}
|
|
{"current_steps": 70225, "total_steps": 78105, "loss": 0.1547, "lr": 1.5347633800996963e-07, "epoch": 4.495550861020421, "percentage": 89.91, "elapsed_time": "3:04:15", "remaining_time": "0:20:40", "throughput": 19991.51, "total_tokens": 221016576}
|
|
{"current_steps": 70230, "total_steps": 78105, "loss": 0.1679, "lr": 1.5328367184290677e-07, "epoch": 4.495870942961398, "percentage": 89.92, "elapsed_time": "3:04:16", "remaining_time": "0:20:39", "throughput": 19991.69, "total_tokens": 221031680}
|
|
{"current_steps": 70235, "total_steps": 78105, "loss": 0.1326, "lr": 1.530911228586346e-07, "epoch": 4.496191024902375, "percentage": 89.92, "elapsed_time": "3:04:16", "remaining_time": "0:20:38", "throughput": 19991.89, "total_tokens": 221047424}
|
|
{"current_steps": 70240, "total_steps": 78105, "loss": 0.1601, "lr": 1.528986910667668e-07, "epoch": 4.496511106843352, "percentage": 89.93, "elapsed_time": "3:04:17", "remaining_time": "0:20:38", "throughput": 19992.07, "total_tokens": 221062784}
|
|
{"current_steps": 70245, "total_steps": 78105, "loss": 0.1514, "lr": 1.5270637647691267e-07, "epoch": 4.496831188784329, "percentage": 89.94, "elapsed_time": "3:04:18", "remaining_time": "0:20:37", "throughput": 19992.25, "total_tokens": 221077824}
|
|
{"current_steps": 70250, "total_steps": 78105, "loss": 0.1055, "lr": 1.525141790986759e-07, "epoch": 4.497151270725306, "percentage": 89.94, "elapsed_time": "3:04:18", "remaining_time": "0:20:36", "throughput": 19992.45, "total_tokens": 221093568}
|
|
{"current_steps": 70255, "total_steps": 78105, "loss": 0.1353, "lr": 1.5232209894165295e-07, "epoch": 4.497471352666283, "percentage": 89.95, "elapsed_time": "3:04:19", "remaining_time": "0:20:35", "throughput": 19992.69, "total_tokens": 221110144}
|
|
{"current_steps": 70260, "total_steps": 78105, "loss": 0.1503, "lr": 1.5213013601543619e-07, "epoch": 4.49779143460726, "percentage": 89.96, "elapsed_time": "3:04:20", "remaining_time": "0:20:34", "throughput": 19992.88, "total_tokens": 221125632}
|
|
{"current_steps": 70265, "total_steps": 78105, "loss": 0.0863, "lr": 1.5193829032961015e-07, "epoch": 4.498111516548236, "percentage": 89.96, "elapsed_time": "3:04:20", "remaining_time": "0:20:34", "throughput": 19993.1, "total_tokens": 221142016}
|
|
{"current_steps": 70270, "total_steps": 78105, "loss": 0.1553, "lr": 1.517465618937558e-07, "epoch": 4.498431598489213, "percentage": 89.97, "elapsed_time": "3:04:21", "remaining_time": "0:20:33", "throughput": 19993.32, "total_tokens": 221158592}
|
|
{"current_steps": 70275, "total_steps": 78105, "loss": 0.1032, "lr": 1.5155495071744658e-07, "epoch": 4.49875168043019, "percentage": 89.98, "elapsed_time": "3:04:22", "remaining_time": "0:20:32", "throughput": 19993.5, "total_tokens": 221174016}
|
|
{"current_steps": 70280, "total_steps": 78105, "loss": 0.1602, "lr": 1.513634568102504e-07, "epoch": 4.499071762371167, "percentage": 89.98, "elapsed_time": "3:04:22", "remaining_time": "0:20:31", "throughput": 19993.66, "total_tokens": 221188480}
|
|
{"current_steps": 70285, "total_steps": 78105, "loss": 0.1548, "lr": 1.5117208018172957e-07, "epoch": 4.499391844312144, "percentage": 89.99, "elapsed_time": "3:04:23", "remaining_time": "0:20:30", "throughput": 19993.86, "total_tokens": 221204480}
|
|
{"current_steps": 70290, "total_steps": 78105, "loss": 0.1047, "lr": 1.5098082084144068e-07, "epoch": 4.499711926253121, "percentage": 89.99, "elapsed_time": "3:04:24", "remaining_time": "0:20:30", "throughput": 19994.13, "total_tokens": 221222208}
|
|
{"current_steps": 70295, "total_steps": 78105, "loss": 0.1387, "lr": 1.5078967879893353e-07, "epoch": 4.500032008194098, "percentage": 90.0, "elapsed_time": "3:04:25", "remaining_time": "0:20:29", "throughput": 19994.3, "total_tokens": 221237504}
|
|
{"current_steps": 70300, "total_steps": 78105, "loss": 0.1378, "lr": 1.5059865406375384e-07, "epoch": 4.500352090135075, "percentage": 90.01, "elapsed_time": "3:04:25", "remaining_time": "0:20:28", "throughput": 19994.49, "total_tokens": 221253056}
|
|
{"current_steps": 70305, "total_steps": 78105, "loss": 0.1645, "lr": 1.50407746645439e-07, "epoch": 4.500672172076051, "percentage": 90.01, "elapsed_time": "3:04:26", "remaining_time": "0:20:27", "throughput": 19994.7, "total_tokens": 221269248}
|
|
{"current_steps": 70308, "total_steps": 78105, "eval_loss": 0.6086835265159607, "epoch": 4.500864221240637, "percentage": 90.02, "elapsed_time": "3:05:17", "remaining_time": "0:20:32", "throughput": 19903.16, "total_tokens": 221278272}
|
|
{"current_steps": 70310, "total_steps": 78105, "loss": 0.1307, "lr": 1.502169565535236e-07, "epoch": 4.500992254017028, "percentage": 90.02, "elapsed_time": "3:05:52", "remaining_time": "0:20:36", "throughput": 19841.96, "total_tokens": 221286720}
|
|
{"current_steps": 70315, "total_steps": 78105, "loss": 0.1483, "lr": 1.5002628379753305e-07, "epoch": 4.501312335958005, "percentage": 90.03, "elapsed_time": "3:05:53", "remaining_time": "0:20:35", "throughput": 19842.13, "total_tokens": 221301824}
|
|
{"current_steps": 70320, "total_steps": 78105, "loss": 0.169, "lr": 1.4983572838698972e-07, "epoch": 4.501632417898982, "percentage": 90.03, "elapsed_time": "3:05:53", "remaining_time": "0:20:34", "throughput": 19842.31, "total_tokens": 221316800}
|
|
{"current_steps": 70325, "total_steps": 78105, "loss": 0.121, "lr": 1.4964529033140856e-07, "epoch": 4.501952499839959, "percentage": 90.04, "elapsed_time": "3:05:54", "remaining_time": "0:20:34", "throughput": 19842.57, "total_tokens": 221334400}
|
|
{"current_steps": 70330, "total_steps": 78105, "loss": 0.1592, "lr": 1.494549696402986e-07, "epoch": 4.502272581780936, "percentage": 90.05, "elapsed_time": "3:05:55", "remaining_time": "0:20:33", "throughput": 19842.76, "total_tokens": 221349504}
|
|
{"current_steps": 70335, "total_steps": 78105, "loss": 0.1331, "lr": 1.4926476632316473e-07, "epoch": 4.502592663721913, "percentage": 90.05, "elapsed_time": "3:05:55", "remaining_time": "0:20:32", "throughput": 19842.95, "total_tokens": 221364800}
|
|
{"current_steps": 70340, "total_steps": 78105, "loss": 0.1543, "lr": 1.4907468038950324e-07, "epoch": 4.50291274566289, "percentage": 90.06, "elapsed_time": "3:05:56", "remaining_time": "0:20:31", "throughput": 19843.15, "total_tokens": 221380224}
|
|
{"current_steps": 70345, "total_steps": 78105, "loss": 0.1287, "lr": 1.4888471184880683e-07, "epoch": 4.503232827603867, "percentage": 90.06, "elapsed_time": "3:05:57", "remaining_time": "0:20:30", "throughput": 19843.36, "total_tokens": 221396224}
|
|
{"current_steps": 70350, "total_steps": 78105, "loss": 0.1297, "lr": 1.486948607105615e-07, "epoch": 4.503552909544844, "percentage": 90.07, "elapsed_time": "3:05:57", "remaining_time": "0:20:29", "throughput": 19843.54, "total_tokens": 221411328}
|
|
{"current_steps": 70355, "total_steps": 78105, "loss": 0.1418, "lr": 1.4850512698424747e-07, "epoch": 4.50387299148582, "percentage": 90.08, "elapsed_time": "3:05:58", "remaining_time": "0:20:29", "throughput": 19843.73, "total_tokens": 221426816}
|
|
{"current_steps": 70360, "total_steps": 78105, "loss": 0.1001, "lr": 1.483155106793388e-07, "epoch": 4.504193073426797, "percentage": 90.08, "elapsed_time": "3:05:59", "remaining_time": "0:20:28", "throughput": 19843.88, "total_tokens": 221441024}
|
|
{"current_steps": 70365, "total_steps": 78105, "loss": 0.0924, "lr": 1.4812601180530405e-07, "epoch": 4.504513155367774, "percentage": 90.09, "elapsed_time": "3:05:59", "remaining_time": "0:20:27", "throughput": 19844.08, "total_tokens": 221456960}
|
|
{"current_steps": 70370, "total_steps": 78105, "loss": 0.1594, "lr": 1.4793663037160565e-07, "epoch": 4.504833237308751, "percentage": 90.1, "elapsed_time": "3:06:00", "remaining_time": "0:20:26", "throughput": 19844.27, "total_tokens": 221472384}
|
|
{"current_steps": 70375, "total_steps": 78105, "loss": 0.1194, "lr": 1.477473663877002e-07, "epoch": 4.505153319249728, "percentage": 90.1, "elapsed_time": "3:06:01", "remaining_time": "0:20:25", "throughput": 19844.48, "total_tokens": 221488320}
|
|
{"current_steps": 70380, "total_steps": 78105, "loss": 0.1285, "lr": 1.475582198630393e-07, "epoch": 4.505473401190705, "percentage": 90.11, "elapsed_time": "3:06:01", "remaining_time": "0:20:25", "throughput": 19844.71, "total_tokens": 221504576}
|
|
{"current_steps": 70385, "total_steps": 78105, "loss": 0.1567, "lr": 1.4736919080706734e-07, "epoch": 4.505793483131682, "percentage": 90.12, "elapsed_time": "3:06:02", "remaining_time": "0:20:24", "throughput": 19844.9, "total_tokens": 221519744}
|
|
{"current_steps": 70390, "total_steps": 78105, "loss": 0.1245, "lr": 1.4718027922922374e-07, "epoch": 4.506113565072659, "percentage": 90.12, "elapsed_time": "3:06:03", "remaining_time": "0:20:23", "throughput": 19845.13, "total_tokens": 221535872}
|
|
{"current_steps": 70395, "total_steps": 78105, "loss": 0.1983, "lr": 1.4699148513894173e-07, "epoch": 4.506433647013635, "percentage": 90.13, "elapsed_time": "3:06:03", "remaining_time": "0:20:22", "throughput": 19845.33, "total_tokens": 221551744}
|
|
{"current_steps": 70400, "total_steps": 78105, "loss": 0.1324, "lr": 1.4680280854564826e-07, "epoch": 4.506753728954612, "percentage": 90.14, "elapsed_time": "3:06:04", "remaining_time": "0:20:21", "throughput": 19845.55, "total_tokens": 221567488}
|
|
{"current_steps": 70405, "total_steps": 78105, "loss": 0.0993, "lr": 1.4661424945876524e-07, "epoch": 4.507073810895589, "percentage": 90.14, "elapsed_time": "3:06:05", "remaining_time": "0:20:21", "throughput": 19845.73, "total_tokens": 221582528}
|
|
{"current_steps": 70410, "total_steps": 78105, "loss": 0.1916, "lr": 1.4642580788770843e-07, "epoch": 4.507393892836566, "percentage": 90.15, "elapsed_time": "3:06:05", "remaining_time": "0:20:20", "throughput": 19845.94, "total_tokens": 221598208}
|
|
{"current_steps": 70415, "total_steps": 78105, "loss": 0.1744, "lr": 1.4623748384188703e-07, "epoch": 4.507713974777543, "percentage": 90.15, "elapsed_time": "3:06:06", "remaining_time": "0:20:19", "throughput": 19846.16, "total_tokens": 221614848}
|
|
{"current_steps": 70420, "total_steps": 78105, "loss": 0.1177, "lr": 1.4604927733070595e-07, "epoch": 4.50803405671852, "percentage": 90.16, "elapsed_time": "3:06:07", "remaining_time": "0:20:18", "throughput": 19846.36, "total_tokens": 221630336}
|
|
{"current_steps": 70425, "total_steps": 78105, "loss": 0.0852, "lr": 1.4586118836356216e-07, "epoch": 4.508354138659497, "percentage": 90.17, "elapsed_time": "3:06:08", "remaining_time": "0:20:17", "throughput": 19846.61, "total_tokens": 221647808}
|
|
{"current_steps": 70430, "total_steps": 78105, "loss": 0.1256, "lr": 1.4567321694984843e-07, "epoch": 4.508674220600474, "percentage": 90.17, "elapsed_time": "3:06:08", "remaining_time": "0:20:17", "throughput": 19846.8, "total_tokens": 221663232}
|
|
{"current_steps": 70435, "total_steps": 78105, "loss": 0.1454, "lr": 1.454853630989514e-07, "epoch": 4.508994302541451, "percentage": 90.18, "elapsed_time": "3:06:09", "remaining_time": "0:20:16", "throughput": 19846.98, "total_tokens": 221678272}
|
|
{"current_steps": 70440, "total_steps": 78105, "loss": 0.1047, "lr": 1.452976268202505e-07, "epoch": 4.509314384482428, "percentage": 90.19, "elapsed_time": "3:06:10", "remaining_time": "0:20:15", "throughput": 19847.22, "total_tokens": 221695040}
|
|
{"current_steps": 70445, "total_steps": 78105, "loss": 0.1347, "lr": 1.451100081231216e-07, "epoch": 4.509634466423404, "percentage": 90.19, "elapsed_time": "3:06:10", "remaining_time": "0:20:14", "throughput": 19847.48, "total_tokens": 221712704}
|
|
{"current_steps": 70450, "total_steps": 78105, "loss": 0.1447, "lr": 1.4492250701693218e-07, "epoch": 4.509954548364381, "percentage": 90.2, "elapsed_time": "3:06:11", "remaining_time": "0:20:13", "throughput": 19847.69, "total_tokens": 221728448}
|
|
{"current_steps": 70455, "total_steps": 78105, "loss": 0.1322, "lr": 1.447351235110464e-07, "epoch": 4.510274630305358, "percentage": 90.21, "elapsed_time": "3:06:12", "remaining_time": "0:20:13", "throughput": 19847.88, "total_tokens": 221743936}
|
|
{"current_steps": 70460, "total_steps": 78105, "loss": 0.0946, "lr": 1.4454785761481933e-07, "epoch": 4.510594712246335, "percentage": 90.21, "elapsed_time": "3:06:12", "remaining_time": "0:20:12", "throughput": 19848.06, "total_tokens": 221758976}
|
|
{"current_steps": 70465, "total_steps": 78105, "loss": 0.0909, "lr": 1.443607093376037e-07, "epoch": 4.510914794187312, "percentage": 90.22, "elapsed_time": "3:06:13", "remaining_time": "0:20:11", "throughput": 19848.32, "total_tokens": 221776512}
|
|
{"current_steps": 70470, "total_steps": 78105, "loss": 0.1663, "lr": 1.4417367868874433e-07, "epoch": 4.511234876128289, "percentage": 90.22, "elapsed_time": "3:06:14", "remaining_time": "0:20:10", "throughput": 19848.52, "total_tokens": 221792512}
|
|
{"current_steps": 70475, "total_steps": 78105, "loss": 0.1203, "lr": 1.4398676567758035e-07, "epoch": 4.511554958069266, "percentage": 90.23, "elapsed_time": "3:06:14", "remaining_time": "0:20:09", "throughput": 19848.71, "total_tokens": 221807680}
|
|
{"current_steps": 70480, "total_steps": 78105, "loss": 0.1465, "lr": 1.4379997031344518e-07, "epoch": 4.511875040010243, "percentage": 90.24, "elapsed_time": "3:06:15", "remaining_time": "0:20:09", "throughput": 19848.92, "total_tokens": 221824000}
|
|
{"current_steps": 70485, "total_steps": 78105, "loss": 0.1207, "lr": 1.436132926056663e-07, "epoch": 4.512195121951219, "percentage": 90.24, "elapsed_time": "3:06:16", "remaining_time": "0:20:08", "throughput": 19849.09, "total_tokens": 221839168}
|
|
{"current_steps": 70490, "total_steps": 78105, "loss": 0.0961, "lr": 1.434267325635655e-07, "epoch": 4.512515203892196, "percentage": 90.25, "elapsed_time": "3:06:17", "remaining_time": "0:20:07", "throughput": 19849.37, "total_tokens": 221857024}
|
|
{"current_steps": 70495, "total_steps": 78105, "loss": 0.141, "lr": 1.4324029019645913e-07, "epoch": 4.512835285833173, "percentage": 90.26, "elapsed_time": "3:06:17", "remaining_time": "0:20:06", "throughput": 19849.67, "total_tokens": 221875392}
|
|
{"current_steps": 70500, "total_steps": 78105, "loss": 0.1099, "lr": 1.4305396551365675e-07, "epoch": 4.51315536777415, "percentage": 90.26, "elapsed_time": "3:06:18", "remaining_time": "0:20:05", "throughput": 19849.86, "total_tokens": 221891328}
|
|
{"current_steps": 70505, "total_steps": 78105, "loss": 0.1362, "lr": 1.4286775852446226e-07, "epoch": 4.513475449715127, "percentage": 90.27, "elapsed_time": "3:06:19", "remaining_time": "0:20:05", "throughput": 19850.05, "total_tokens": 221906560}
|
|
{"current_steps": 70510, "total_steps": 78105, "loss": 0.1105, "lr": 1.4268166923817412e-07, "epoch": 4.513795531656104, "percentage": 90.28, "elapsed_time": "3:06:19", "remaining_time": "0:20:04", "throughput": 19850.22, "total_tokens": 221921216}
|
|
{"current_steps": 70515, "total_steps": 78105, "loss": 0.0972, "lr": 1.4249569766408484e-07, "epoch": 4.514115613597081, "percentage": 90.28, "elapsed_time": "3:06:20", "remaining_time": "0:20:03", "throughput": 19850.44, "total_tokens": 221937472}
|
|
{"current_steps": 70520, "total_steps": 78105, "loss": 0.1015, "lr": 1.4230984381148032e-07, "epoch": 4.514435695538058, "percentage": 90.29, "elapsed_time": "3:06:21", "remaining_time": "0:20:02", "throughput": 19850.65, "total_tokens": 221953472}
|
|
{"current_steps": 70525, "total_steps": 78105, "loss": 0.1435, "lr": 1.4212410768964126e-07, "epoch": 4.514755777479035, "percentage": 90.3, "elapsed_time": "3:06:21", "remaining_time": "0:20:01", "throughput": 19850.83, "total_tokens": 221969024}
|
|
{"current_steps": 70530, "total_steps": 78105, "loss": 0.2193, "lr": 1.419384893078432e-07, "epoch": 4.515075859420012, "percentage": 90.3, "elapsed_time": "3:06:22", "remaining_time": "0:20:01", "throughput": 19851.01, "total_tokens": 221984832}
|
|
{"current_steps": 70535, "total_steps": 78105, "loss": 0.1949, "lr": 1.4175298867535353e-07, "epoch": 4.515395941360988, "percentage": 90.31, "elapsed_time": "3:06:23", "remaining_time": "0:20:00", "throughput": 19851.19, "total_tokens": 222000192}
|
|
{"current_steps": 70540, "total_steps": 78105, "loss": 0.153, "lr": 1.4156760580143675e-07, "epoch": 4.515716023301965, "percentage": 90.31, "elapsed_time": "3:06:23", "remaining_time": "0:19:59", "throughput": 19851.35, "total_tokens": 222015168}
|
|
{"current_steps": 70545, "total_steps": 78105, "loss": 0.1466, "lr": 1.4138234069534822e-07, "epoch": 4.516036105242942, "percentage": 90.32, "elapsed_time": "3:06:24", "remaining_time": "0:19:58", "throughput": 19851.53, "total_tokens": 222030272}
|
|
{"current_steps": 70550, "total_steps": 78105, "loss": 0.1251, "lr": 1.411971933663406e-07, "epoch": 4.516356187183919, "percentage": 90.33, "elapsed_time": "3:06:25", "remaining_time": "0:19:57", "throughput": 19851.69, "total_tokens": 222045120}
|
|
{"current_steps": 70555, "total_steps": 78105, "loss": 0.1185, "lr": 1.4101216382365833e-07, "epoch": 4.516676269124896, "percentage": 90.33, "elapsed_time": "3:06:25", "remaining_time": "0:19:56", "throughput": 19851.9, "total_tokens": 222061440}
|
|
{"current_steps": 70560, "total_steps": 78105, "loss": 0.0941, "lr": 1.408272520765408e-07, "epoch": 4.516996351065873, "percentage": 90.34, "elapsed_time": "3:06:26", "remaining_time": "0:19:56", "throughput": 19852.08, "total_tokens": 222076480}
|
|
{"current_steps": 70565, "total_steps": 78105, "loss": 0.1247, "lr": 1.406424581342228e-07, "epoch": 4.51731643300685, "percentage": 90.35, "elapsed_time": "3:06:27", "remaining_time": "0:19:55", "throughput": 19852.27, "total_tokens": 222091648}
|
|
{"current_steps": 70570, "total_steps": 78105, "loss": 0.1387, "lr": 1.4045778200592997e-07, "epoch": 4.517636514947827, "percentage": 90.35, "elapsed_time": "3:06:27", "remaining_time": "0:19:54", "throughput": 19852.48, "total_tokens": 222107712}
|
|
{"current_steps": 70575, "total_steps": 78105, "loss": 0.1016, "lr": 1.4027322370088553e-07, "epoch": 4.517956596888803, "percentage": 90.36, "elapsed_time": "3:06:28", "remaining_time": "0:19:53", "throughput": 19852.69, "total_tokens": 222124032}
|
|
{"current_steps": 70580, "total_steps": 78105, "loss": 0.1439, "lr": 1.4008878322830488e-07, "epoch": 4.51827667882978, "percentage": 90.37, "elapsed_time": "3:06:29", "remaining_time": "0:19:52", "throughput": 19852.93, "total_tokens": 222140672}
|
|
{"current_steps": 70585, "total_steps": 78105, "loss": 0.1312, "lr": 1.3990446059739816e-07, "epoch": 4.518596760770757, "percentage": 90.37, "elapsed_time": "3:06:29", "remaining_time": "0:19:52", "throughput": 19853.1, "total_tokens": 222155712}
|
|
{"current_steps": 70590, "total_steps": 78105, "loss": 0.1327, "lr": 1.3972025581736936e-07, "epoch": 4.518916842711734, "percentage": 90.38, "elapsed_time": "3:06:30", "remaining_time": "0:19:51", "throughput": 19853.3, "total_tokens": 222171968}
|
|
{"current_steps": 70595, "total_steps": 78105, "loss": 0.136, "lr": 1.3953616889741673e-07, "epoch": 4.519236924652711, "percentage": 90.38, "elapsed_time": "3:06:31", "remaining_time": "0:19:50", "throughput": 19853.47, "total_tokens": 222186816}
|
|
{"current_steps": 70600, "total_steps": 78105, "loss": 0.1054, "lr": 1.393521998467326e-07, "epoch": 4.519557006593688, "percentage": 90.39, "elapsed_time": "3:06:32", "remaining_time": "0:19:49", "throughput": 19853.67, "total_tokens": 222202944}
|
|
{"current_steps": 70605, "total_steps": 78105, "loss": 0.1645, "lr": 1.3916834867450352e-07, "epoch": 4.519877088534665, "percentage": 90.4, "elapsed_time": "3:06:32", "remaining_time": "0:19:48", "throughput": 19853.85, "total_tokens": 222218112}
|
|
{"current_steps": 70610, "total_steps": 78105, "loss": 0.1397, "lr": 1.3898461538990965e-07, "epoch": 4.520197170475642, "percentage": 90.4, "elapsed_time": "3:06:33", "remaining_time": "0:19:48", "throughput": 19854.03, "total_tokens": 222233344}
|
|
{"current_steps": 70615, "total_steps": 78105, "loss": 0.1085, "lr": 1.3880100000212642e-07, "epoch": 4.520517252416619, "percentage": 90.41, "elapsed_time": "3:06:34", "remaining_time": "0:19:47", "throughput": 19854.19, "total_tokens": 222247936}
|
|
{"current_steps": 70620, "total_steps": 78105, "loss": 0.0946, "lr": 1.3861750252032207e-07, "epoch": 4.520837334357595, "percentage": 90.42, "elapsed_time": "3:06:34", "remaining_time": "0:19:46", "throughput": 19854.37, "total_tokens": 222263168}
|
|
{"current_steps": 70625, "total_steps": 78105, "loss": 0.1761, "lr": 1.3843412295365955e-07, "epoch": 4.521157416298572, "percentage": 90.42, "elapsed_time": "3:06:35", "remaining_time": "0:19:45", "throughput": 19854.67, "total_tokens": 222281536}
|
|
{"current_steps": 70630, "total_steps": 78105, "loss": 0.1108, "lr": 1.382508613112962e-07, "epoch": 4.521477498239549, "percentage": 90.43, "elapsed_time": "3:06:36", "remaining_time": "0:19:44", "throughput": 19854.88, "total_tokens": 222297472}
|
|
{"current_steps": 70635, "total_steps": 78105, "loss": 0.1148, "lr": 1.3806771760238285e-07, "epoch": 4.521797580180526, "percentage": 90.44, "elapsed_time": "3:06:36", "remaining_time": "0:19:44", "throughput": 19855.08, "total_tokens": 222313088}
|
|
{"current_steps": 70640, "total_steps": 78105, "loss": 0.1408, "lr": 1.3788469183606462e-07, "epoch": 4.522117662121503, "percentage": 90.44, "elapsed_time": "3:06:37", "remaining_time": "0:19:43", "throughput": 19855.3, "total_tokens": 222329344}
|
|
{"current_steps": 70645, "total_steps": 78105, "loss": 0.0952, "lr": 1.3770178402148116e-07, "epoch": 4.52243774406248, "percentage": 90.45, "elapsed_time": "3:06:38", "remaining_time": "0:19:42", "throughput": 19855.48, "total_tokens": 222344128}
|
|
{"current_steps": 70650, "total_steps": 78105, "loss": 0.1312, "lr": 1.3751899416776626e-07, "epoch": 4.522757826003457, "percentage": 90.46, "elapsed_time": "3:06:38", "remaining_time": "0:19:41", "throughput": 19855.68, "total_tokens": 222359808}
|
|
{"current_steps": 70655, "total_steps": 78105, "loss": 0.0929, "lr": 1.3733632228404626e-07, "epoch": 4.523077907944434, "percentage": 90.46, "elapsed_time": "3:06:39", "remaining_time": "0:19:40", "throughput": 19855.87, "total_tokens": 222375488}
|
|
{"current_steps": 70660, "total_steps": 78105, "loss": 0.1294, "lr": 1.371537683794441e-07, "epoch": 4.52339798988541, "percentage": 90.47, "elapsed_time": "3:06:40", "remaining_time": "0:19:40", "throughput": 19856.1, "total_tokens": 222392000}
|
|
{"current_steps": 70665, "total_steps": 78105, "loss": 0.1487, "lr": 1.3697133246307532e-07, "epoch": 4.523718071826387, "percentage": 90.47, "elapsed_time": "3:06:40", "remaining_time": "0:19:39", "throughput": 19856.28, "total_tokens": 222407168}
|
|
{"current_steps": 70670, "total_steps": 78105, "loss": 0.1413, "lr": 1.3678901454404925e-07, "epoch": 4.524038153767364, "percentage": 90.48, "elapsed_time": "3:06:41", "remaining_time": "0:19:38", "throughput": 19856.5, "total_tokens": 222423808}
|
|
{"current_steps": 70675, "total_steps": 78105, "loss": 0.1716, "lr": 1.3660681463147086e-07, "epoch": 4.524358235708341, "percentage": 90.49, "elapsed_time": "3:06:42", "remaining_time": "0:19:37", "throughput": 19856.69, "total_tokens": 222438912}
|
|
{"current_steps": 70680, "total_steps": 78105, "loss": 0.1222, "lr": 1.36424732734437e-07, "epoch": 4.524678317649318, "percentage": 90.49, "elapsed_time": "3:06:42", "remaining_time": "0:19:36", "throughput": 19856.85, "total_tokens": 222453696}
|
|
{"current_steps": 70685, "total_steps": 78105, "loss": 0.112, "lr": 1.362427688620413e-07, "epoch": 4.524998399590295, "percentage": 90.5, "elapsed_time": "3:06:43", "remaining_time": "0:19:36", "throughput": 19857.05, "total_tokens": 222469120}
|
|
{"current_steps": 70690, "total_steps": 78105, "loss": 0.1283, "lr": 1.3606092302336866e-07, "epoch": 4.525318481531272, "percentage": 90.51, "elapsed_time": "3:06:44", "remaining_time": "0:19:35", "throughput": 19857.24, "total_tokens": 222484544}
|
|
{"current_steps": 70695, "total_steps": 78105, "loss": 0.0812, "lr": 1.3587919522750044e-07, "epoch": 4.525638563472249, "percentage": 90.51, "elapsed_time": "3:06:44", "remaining_time": "0:19:34", "throughput": 19857.48, "total_tokens": 222501184}
|
|
{"current_steps": 70700, "total_steps": 78105, "loss": 0.1355, "lr": 1.3569758548351103e-07, "epoch": 4.525958645413226, "percentage": 90.52, "elapsed_time": "3:06:45", "remaining_time": "0:19:33", "throughput": 19857.66, "total_tokens": 222516608}
|
|
{"current_steps": 70705, "total_steps": 78105, "loss": 0.1258, "lr": 1.3551609380046903e-07, "epoch": 4.526278727354203, "percentage": 90.53, "elapsed_time": "3:06:46", "remaining_time": "0:19:32", "throughput": 19857.86, "total_tokens": 222532352}
|
|
{"current_steps": 70710, "total_steps": 78105, "loss": 0.0945, "lr": 1.353347201874372e-07, "epoch": 4.526598809295179, "percentage": 90.53, "elapsed_time": "3:06:46", "remaining_time": "0:19:32", "throughput": 19858.08, "total_tokens": 222548736}
|
|
{"current_steps": 70715, "total_steps": 78105, "loss": 0.1458, "lr": 1.3515346465347245e-07, "epoch": 4.526918891236156, "percentage": 90.54, "elapsed_time": "3:06:47", "remaining_time": "0:19:31", "throughput": 19858.29, "total_tokens": 222564736}
|
|
{"current_steps": 70720, "total_steps": 78105, "loss": 0.1355, "lr": 1.3497232720762505e-07, "epoch": 4.527238973177133, "percentage": 90.54, "elapsed_time": "3:06:48", "remaining_time": "0:19:30", "throughput": 19858.46, "total_tokens": 222579520}
|
|
{"current_steps": 70725, "total_steps": 78105, "loss": 0.1466, "lr": 1.3479130785894107e-07, "epoch": 4.52755905511811, "percentage": 90.55, "elapsed_time": "3:06:48", "remaining_time": "0:19:29", "throughput": 19858.65, "total_tokens": 222594752}
|
|
{"current_steps": 70730, "total_steps": 78105, "loss": 0.2823, "lr": 1.3461040661645912e-07, "epoch": 4.527879137059087, "percentage": 90.56, "elapsed_time": "3:06:49", "remaining_time": "0:19:28", "throughput": 19858.93, "total_tokens": 222612672}
|
|
{"current_steps": 70735, "total_steps": 78105, "loss": 0.1378, "lr": 1.3442962348921285e-07, "epoch": 4.528199219000064, "percentage": 90.56, "elapsed_time": "3:06:50", "remaining_time": "0:19:28", "throughput": 19859.13, "total_tokens": 222627968}
|
|
{"current_steps": 70740, "total_steps": 78105, "loss": 0.1256, "lr": 1.3424895848622942e-07, "epoch": 4.528519300941041, "percentage": 90.57, "elapsed_time": "3:06:51", "remaining_time": "0:19:27", "throughput": 19859.37, "total_tokens": 222644480}
|
|
{"current_steps": 70745, "total_steps": 78105, "loss": 0.114, "lr": 1.3406841161653e-07, "epoch": 4.528839382882018, "percentage": 90.58, "elapsed_time": "3:06:51", "remaining_time": "0:19:26", "throughput": 19859.55, "total_tokens": 222659392}
|
|
{"current_steps": 70750, "total_steps": 78105, "loss": 0.1296, "lr": 1.338879828891307e-07, "epoch": 4.529159464822994, "percentage": 90.58, "elapsed_time": "3:06:52", "remaining_time": "0:19:25", "throughput": 19859.75, "total_tokens": 222675072}
|
|
{"current_steps": 70755, "total_steps": 78105, "loss": 0.228, "lr": 1.3370767231304037e-07, "epoch": 4.529479546763971, "percentage": 90.59, "elapsed_time": "3:06:53", "remaining_time": "0:19:24", "throughput": 19859.93, "total_tokens": 222690304}
|
|
{"current_steps": 70760, "total_steps": 78105, "loss": 0.1558, "lr": 1.3352747989726385e-07, "epoch": 4.529799628704948, "percentage": 90.6, "elapsed_time": "3:06:53", "remaining_time": "0:19:24", "throughput": 19860.09, "total_tokens": 222705344}
|
|
{"current_steps": 70765, "total_steps": 78105, "loss": 0.1268, "lr": 1.3334740565079801e-07, "epoch": 4.530119710645925, "percentage": 90.6, "elapsed_time": "3:06:54", "remaining_time": "0:19:23", "throughput": 19860.26, "total_tokens": 222720192}
|
|
{"current_steps": 70770, "total_steps": 78105, "loss": 0.1253, "lr": 1.33167449582636e-07, "epoch": 4.530439792586902, "percentage": 90.61, "elapsed_time": "3:06:55", "remaining_time": "0:19:22", "throughput": 19860.46, "total_tokens": 222735616}
|
|
{"current_steps": 70775, "total_steps": 78105, "loss": 0.0883, "lr": 1.3298761170176255e-07, "epoch": 4.530759874527879, "percentage": 90.62, "elapsed_time": "3:06:55", "remaining_time": "0:19:21", "throughput": 19860.68, "total_tokens": 222751936}
|
|
{"current_steps": 70780, "total_steps": 78105, "loss": 0.1138, "lr": 1.3280789201715854e-07, "epoch": 4.531079956468856, "percentage": 90.62, "elapsed_time": "3:06:56", "remaining_time": "0:19:20", "throughput": 19860.89, "total_tokens": 222767872}
|
|
{"current_steps": 70785, "total_steps": 78105, "loss": 0.1461, "lr": 1.326282905377982e-07, "epoch": 4.531400038409833, "percentage": 90.63, "elapsed_time": "3:06:57", "remaining_time": "0:19:19", "throughput": 19861.07, "total_tokens": 222783360}
|
|
{"current_steps": 70790, "total_steps": 78105, "loss": 0.1418, "lr": 1.324488072726496e-07, "epoch": 4.53172012035081, "percentage": 90.63, "elapsed_time": "3:06:57", "remaining_time": "0:19:19", "throughput": 19861.31, "total_tokens": 222799936}
|
|
{"current_steps": 70795, "total_steps": 78105, "loss": 0.135, "lr": 1.322694422306761e-07, "epoch": 4.532040202291787, "percentage": 90.64, "elapsed_time": "3:06:58", "remaining_time": "0:19:18", "throughput": 19861.59, "total_tokens": 222817664}
|
|
{"current_steps": 70800, "total_steps": 78105, "loss": 0.1051, "lr": 1.3209019542083313e-07, "epoch": 4.532360284232763, "percentage": 90.65, "elapsed_time": "3:06:59", "remaining_time": "0:19:17", "throughput": 19861.8, "total_tokens": 222833664}
|
|
{"current_steps": 70805, "total_steps": 78105, "loss": 0.2186, "lr": 1.3191106685207178e-07, "epoch": 4.53268036617374, "percentage": 90.65, "elapsed_time": "3:06:59", "remaining_time": "0:19:16", "throughput": 19862.03, "total_tokens": 222849856}
|
|
{"current_steps": 70810, "total_steps": 78105, "loss": 0.1267, "lr": 1.3173205653333715e-07, "epoch": 4.533000448114717, "percentage": 90.66, "elapsed_time": "3:07:00", "remaining_time": "0:19:15", "throughput": 19862.23, "total_tokens": 222865280}
|
|
{"current_steps": 70815, "total_steps": 78105, "loss": 0.1373, "lr": 1.3155316447356765e-07, "epoch": 4.533320530055694, "percentage": 90.67, "elapsed_time": "3:07:01", "remaining_time": "0:19:15", "throughput": 19862.42, "total_tokens": 222880832}
|
|
{"current_steps": 70820, "total_steps": 78105, "loss": 0.1698, "lr": 1.313743906816961e-07, "epoch": 4.533640611996671, "percentage": 90.67, "elapsed_time": "3:07:01", "remaining_time": "0:19:14", "throughput": 19862.61, "total_tokens": 222895936}
|
|
{"current_steps": 70825, "total_steps": 78105, "loss": 0.079, "lr": 1.3119573516664984e-07, "epoch": 4.533960693937648, "percentage": 90.68, "elapsed_time": "3:07:02", "remaining_time": "0:19:13", "throughput": 19862.8, "total_tokens": 222911232}
|
|
{"current_steps": 70830, "total_steps": 78105, "loss": 0.1053, "lr": 1.3101719793735007e-07, "epoch": 4.534280775878625, "percentage": 90.69, "elapsed_time": "3:07:03", "remaining_time": "0:19:12", "throughput": 19863.01, "total_tokens": 222927232}
|
|
{"current_steps": 70835, "total_steps": 78105, "loss": 0.1166, "lr": 1.3083877900271185e-07, "epoch": 4.534600857819602, "percentage": 90.69, "elapsed_time": "3:07:03", "remaining_time": "0:19:11", "throughput": 19863.21, "total_tokens": 222942848}
|
|
{"current_steps": 70840, "total_steps": 78105, "loss": 0.1313, "lr": 1.306604783716442e-07, "epoch": 4.534920939760578, "percentage": 90.7, "elapsed_time": "3:07:04", "remaining_time": "0:19:11", "throughput": 19863.44, "total_tokens": 222959296}
|
|
{"current_steps": 70845, "total_steps": 78105, "loss": 0.1726, "lr": 1.3048229605305108e-07, "epoch": 4.535241021701555, "percentage": 90.7, "elapsed_time": "3:07:05", "remaining_time": "0:19:10", "throughput": 19863.7, "total_tokens": 222976320}
|
|
{"current_steps": 70850, "total_steps": 78105, "loss": 0.1038, "lr": 1.3030423205582955e-07, "epoch": 4.535561103642532, "percentage": 90.71, "elapsed_time": "3:07:05", "remaining_time": "0:19:09", "throughput": 19863.88, "total_tokens": 222991360}
|
|
{"current_steps": 70855, "total_steps": 78105, "loss": 0.133, "lr": 1.3012628638887165e-07, "epoch": 4.535881185583509, "percentage": 90.72, "elapsed_time": "3:07:06", "remaining_time": "0:19:08", "throughput": 19864.03, "total_tokens": 223006080}
|
|
{"current_steps": 70860, "total_steps": 78105, "loss": 0.1226, "lr": 1.2994845906106252e-07, "epoch": 4.536201267524486, "percentage": 90.72, "elapsed_time": "3:07:07", "remaining_time": "0:19:07", "throughput": 19864.22, "total_tokens": 223021504}
|
|
{"current_steps": 70865, "total_steps": 78105, "loss": 0.1884, "lr": 1.2977075008128255e-07, "epoch": 4.536521349465463, "percentage": 90.73, "elapsed_time": "3:07:08", "remaining_time": "0:19:07", "throughput": 19864.46, "total_tokens": 223038336}
|
|
{"current_steps": 70870, "total_steps": 78105, "loss": 0.1449, "lr": 1.295931594584049e-07, "epoch": 4.53684143140644, "percentage": 90.74, "elapsed_time": "3:07:08", "remaining_time": "0:19:06", "throughput": 19864.73, "total_tokens": 223056256}
|
|
{"current_steps": 70875, "total_steps": 78105, "loss": 0.118, "lr": 1.2941568720129778e-07, "epoch": 4.537161513347417, "percentage": 90.74, "elapsed_time": "3:07:09", "remaining_time": "0:19:05", "throughput": 19864.96, "total_tokens": 223072768}
|
|
{"current_steps": 70880, "total_steps": 78105, "loss": 0.1638, "lr": 1.2923833331882378e-07, "epoch": 4.537481595288394, "percentage": 90.75, "elapsed_time": "3:07:10", "remaining_time": "0:19:04", "throughput": 19865.22, "total_tokens": 223090432}
|
|
{"current_steps": 70885, "total_steps": 78105, "loss": 0.1098, "lr": 1.290610978198381e-07, "epoch": 4.53780167722937, "percentage": 90.76, "elapsed_time": "3:07:10", "remaining_time": "0:19:03", "throughput": 19865.38, "total_tokens": 223105280}
|
|
{"current_steps": 70890, "total_steps": 78105, "loss": 0.0926, "lr": 1.288839807131917e-07, "epoch": 4.538121759170347, "percentage": 90.76, "elapsed_time": "3:07:11", "remaining_time": "0:19:03", "throughput": 19865.54, "total_tokens": 223120448}
|
|
{"current_steps": 70895, "total_steps": 78105, "loss": 0.137, "lr": 1.2870698200772857e-07, "epoch": 4.538441841111324, "percentage": 90.77, "elapsed_time": "3:07:12", "remaining_time": "0:19:02", "throughput": 19865.73, "total_tokens": 223135872}
|
|
{"current_steps": 70900, "total_steps": 78105, "loss": 0.0738, "lr": 1.285301017122867e-07, "epoch": 4.538761923052301, "percentage": 90.78, "elapsed_time": "3:07:12", "remaining_time": "0:19:01", "throughput": 19865.89, "total_tokens": 223150592}
|
|
{"current_steps": 70905, "total_steps": 78105, "loss": 0.1483, "lr": 1.283533398356998e-07, "epoch": 4.539082004993278, "percentage": 90.78, "elapsed_time": "3:07:13", "remaining_time": "0:19:00", "throughput": 19866.12, "total_tokens": 223167360}
|
|
{"current_steps": 70910, "total_steps": 78105, "loss": 0.0748, "lr": 1.2817669638679307e-07, "epoch": 4.539402086934255, "percentage": 90.79, "elapsed_time": "3:07:14", "remaining_time": "0:18:59", "throughput": 19866.32, "total_tokens": 223183040}
|
|
{"current_steps": 70915, "total_steps": 78105, "loss": 0.1027, "lr": 1.2800017137438837e-07, "epoch": 4.539722168875232, "percentage": 90.79, "elapsed_time": "3:07:14", "remaining_time": "0:18:59", "throughput": 19866.51, "total_tokens": 223198720}
|
|
{"current_steps": 70920, "total_steps": 78105, "loss": 0.1212, "lr": 1.2782376480729915e-07, "epoch": 4.540042250816209, "percentage": 90.8, "elapsed_time": "3:07:15", "remaining_time": "0:18:58", "throughput": 19866.72, "total_tokens": 223214656}
|
|
{"current_steps": 70925, "total_steps": 78105, "loss": 0.1335, "lr": 1.2764747669433502e-07, "epoch": 4.540362332757185, "percentage": 90.81, "elapsed_time": "3:07:16", "remaining_time": "0:18:57", "throughput": 19866.9, "total_tokens": 223229568}
|
|
{"current_steps": 70930, "total_steps": 78105, "loss": 0.1703, "lr": 1.27471307044299e-07, "epoch": 4.540682414698162, "percentage": 90.81, "elapsed_time": "3:07:16", "remaining_time": "0:18:56", "throughput": 19867.12, "total_tokens": 223245696}
|
|
{"current_steps": 70935, "total_steps": 78105, "loss": 0.0976, "lr": 1.2729525586598761e-07, "epoch": 4.541002496639139, "percentage": 90.82, "elapsed_time": "3:07:17", "remaining_time": "0:18:55", "throughput": 19867.4, "total_tokens": 223263488}
|
|
{"current_steps": 70940, "total_steps": 78105, "loss": 0.1428, "lr": 1.2711932316819247e-07, "epoch": 4.541322578580116, "percentage": 90.83, "elapsed_time": "3:07:18", "remaining_time": "0:18:55", "throughput": 19867.6, "total_tokens": 223278976}
|
|
{"current_steps": 70945, "total_steps": 78105, "loss": 0.1352, "lr": 1.2694350895969848e-07, "epoch": 4.541642660521093, "percentage": 90.83, "elapsed_time": "3:07:19", "remaining_time": "0:18:54", "throughput": 19867.79, "total_tokens": 223294208}
|
|
{"current_steps": 70950, "total_steps": 78105, "loss": 0.1224, "lr": 1.2676781324928418e-07, "epoch": 4.54196274246207, "percentage": 90.84, "elapsed_time": "3:07:19", "remaining_time": "0:18:53", "throughput": 19867.99, "total_tokens": 223310208}
|
|
{"current_steps": 70955, "total_steps": 78105, "loss": 0.1099, "lr": 1.2659223604572446e-07, "epoch": 4.542282824403047, "percentage": 90.85, "elapsed_time": "3:07:20", "remaining_time": "0:18:52", "throughput": 19868.26, "total_tokens": 223328000}
|
|
{"current_steps": 70960, "total_steps": 78105, "loss": 0.1044, "lr": 1.2641677735778513e-07, "epoch": 4.542602906344024, "percentage": 90.85, "elapsed_time": "3:07:21", "remaining_time": "0:18:51", "throughput": 19868.44, "total_tokens": 223343104}
|
|
{"current_steps": 70965, "total_steps": 78105, "loss": 0.1606, "lr": 1.2624143719422889e-07, "epoch": 4.542922988285001, "percentage": 90.86, "elapsed_time": "3:07:21", "remaining_time": "0:18:51", "throughput": 19868.6, "total_tokens": 223357632}
|
|
{"current_steps": 70970, "total_steps": 78105, "loss": 0.1414, "lr": 1.2606621556381064e-07, "epoch": 4.543243070225978, "percentage": 90.86, "elapsed_time": "3:07:22", "remaining_time": "0:18:50", "throughput": 19868.79, "total_tokens": 223373056}
|
|
{"current_steps": 70975, "total_steps": 78105, "loss": 0.1308, "lr": 1.2589111247528012e-07, "epoch": 4.543563152166954, "percentage": 90.87, "elapsed_time": "3:07:23", "remaining_time": "0:18:49", "throughput": 19869.04, "total_tokens": 223389952}
|
|
{"current_steps": 70980, "total_steps": 78105, "loss": 0.1504, "lr": 1.2571612793738142e-07, "epoch": 4.543883234107931, "percentage": 90.88, "elapsed_time": "3:07:23", "remaining_time": "0:18:48", "throughput": 19869.23, "total_tokens": 223405440}
|
|
{"current_steps": 70985, "total_steps": 78105, "loss": 0.1406, "lr": 1.255412619588517e-07, "epoch": 4.544203316048908, "percentage": 90.88, "elapsed_time": "3:07:24", "remaining_time": "0:18:47", "throughput": 19869.43, "total_tokens": 223420928}
|
|
{"current_steps": 70990, "total_steps": 78105, "loss": 0.1182, "lr": 1.2536651454842368e-07, "epoch": 4.544523397989885, "percentage": 90.89, "elapsed_time": "3:07:25", "remaining_time": "0:18:47", "throughput": 19869.62, "total_tokens": 223436288}
|
|
{"current_steps": 70995, "total_steps": 78105, "loss": 0.117, "lr": 1.251918857148224e-07, "epoch": 4.544843479930862, "percentage": 90.9, "elapsed_time": "3:07:25", "remaining_time": "0:18:46", "throughput": 19869.82, "total_tokens": 223451968}
|
|
{"current_steps": 71000, "total_steps": 78105, "loss": 0.1489, "lr": 1.2501737546676912e-07, "epoch": 4.545163561871839, "percentage": 90.9, "elapsed_time": "3:07:26", "remaining_time": "0:18:45", "throughput": 19870.03, "total_tokens": 223468096}
|
|
{"current_steps": 71005, "total_steps": 78105, "loss": 0.0986, "lr": 1.2484298381297666e-07, "epoch": 4.545483643812816, "percentage": 90.91, "elapsed_time": "3:07:27", "remaining_time": "0:18:44", "throughput": 19870.24, "total_tokens": 223484032}
|
|
{"current_steps": 71010, "total_steps": 78105, "loss": 0.1461, "lr": 1.2466871076215415e-07, "epoch": 4.545803725753793, "percentage": 90.92, "elapsed_time": "3:07:27", "remaining_time": "0:18:43", "throughput": 19870.44, "total_tokens": 223499136}
|
|
{"current_steps": 71015, "total_steps": 78105, "loss": 0.1309, "lr": 1.244945563230035e-07, "epoch": 4.546123807694769, "percentage": 90.92, "elapsed_time": "3:07:28", "remaining_time": "0:18:43", "throughput": 19870.65, "total_tokens": 223515328}
|
|
{"current_steps": 71020, "total_steps": 78105, "loss": 0.0979, "lr": 1.2432052050422084e-07, "epoch": 4.546443889635746, "percentage": 90.93, "elapsed_time": "3:07:29", "remaining_time": "0:18:42", "throughput": 19870.84, "total_tokens": 223530880}
|
|
{"current_steps": 71025, "total_steps": 78105, "loss": 0.1408, "lr": 1.241466033144975e-07, "epoch": 4.546763971576723, "percentage": 90.94, "elapsed_time": "3:07:29", "remaining_time": "0:18:41", "throughput": 19871.01, "total_tokens": 223545728}
|
|
{"current_steps": 71030, "total_steps": 78105, "loss": 0.2186, "lr": 1.239728047625169e-07, "epoch": 4.5470840535177, "percentage": 90.94, "elapsed_time": "3:07:30", "remaining_time": "0:18:40", "throughput": 19871.21, "total_tokens": 223561600}
|
|
{"current_steps": 71035, "total_steps": 78105, "loss": 0.1206, "lr": 1.2379912485695838e-07, "epoch": 4.547404135458677, "percentage": 90.95, "elapsed_time": "3:07:31", "remaining_time": "0:18:39", "throughput": 19871.37, "total_tokens": 223576000}
|
|
{"current_steps": 71040, "total_steps": 78105, "loss": 0.1115, "lr": 1.2362556360649452e-07, "epoch": 4.547724217399654, "percentage": 90.95, "elapsed_time": "3:07:31", "remaining_time": "0:18:39", "throughput": 19871.56, "total_tokens": 223591168}
|
|
{"current_steps": 71045, "total_steps": 78105, "loss": 0.1072, "lr": 1.2345212101979199e-07, "epoch": 4.548044299340631, "percentage": 90.96, "elapsed_time": "3:07:32", "remaining_time": "0:18:38", "throughput": 19871.78, "total_tokens": 223607168}
|
|
{"current_steps": 71050, "total_steps": 78105, "loss": 0.1287, "lr": 1.2327879710551133e-07, "epoch": 4.548364381281608, "percentage": 90.97, "elapsed_time": "3:07:33", "remaining_time": "0:18:37", "throughput": 19871.96, "total_tokens": 223622400}
|
|
{"current_steps": 71055, "total_steps": 78105, "loss": 0.1505, "lr": 1.2310559187230787e-07, "epoch": 4.548684463222585, "percentage": 90.97, "elapsed_time": "3:07:33", "remaining_time": "0:18:36", "throughput": 19872.15, "total_tokens": 223637696}
|
|
{"current_steps": 71060, "total_steps": 78105, "loss": 0.1425, "lr": 1.2293250532883022e-07, "epoch": 4.549004545163562, "percentage": 90.98, "elapsed_time": "3:07:34", "remaining_time": "0:18:35", "throughput": 19872.34, "total_tokens": 223653248}
|
|
{"current_steps": 71065, "total_steps": 78105, "loss": 0.1391, "lr": 1.227595374837215e-07, "epoch": 4.549324627104538, "percentage": 90.99, "elapsed_time": "3:07:35", "remaining_time": "0:18:34", "throughput": 19872.55, "total_tokens": 223669248}
|
|
{"current_steps": 71070, "total_steps": 78105, "loss": 0.12, "lr": 1.2258668834561866e-07, "epoch": 4.549644709045515, "percentage": 90.99, "elapsed_time": "3:07:35", "remaining_time": "0:18:34", "throughput": 19872.79, "total_tokens": 223685568}
|
|
{"current_steps": 71075, "total_steps": 78105, "loss": 0.1335, "lr": 1.2241395792315342e-07, "epoch": 4.549964790986492, "percentage": 91.0, "elapsed_time": "3:07:36", "remaining_time": "0:18:33", "throughput": 19872.98, "total_tokens": 223701184}
|
|
{"current_steps": 71080, "total_steps": 78105, "loss": 0.1077, "lr": 1.2224134622495083e-07, "epoch": 4.550284872927469, "percentage": 91.01, "elapsed_time": "3:07:37", "remaining_time": "0:18:32", "throughput": 19873.21, "total_tokens": 223717632}
|
|
{"current_steps": 71085, "total_steps": 78105, "loss": 0.1536, "lr": 1.2206885325963008e-07, "epoch": 4.550604954868446, "percentage": 91.01, "elapsed_time": "3:07:37", "remaining_time": "0:18:31", "throughput": 19873.39, "total_tokens": 223732928}
|
|
{"current_steps": 71090, "total_steps": 78105, "loss": 0.1241, "lr": 1.218964790358043e-07, "epoch": 4.550925036809423, "percentage": 91.02, "elapsed_time": "3:07:38", "remaining_time": "0:18:30", "throughput": 19873.64, "total_tokens": 223749760}
|
|
{"current_steps": 71095, "total_steps": 78105, "loss": 0.1209, "lr": 1.2172422356208164e-07, "epoch": 4.5512451187504, "percentage": 91.02, "elapsed_time": "3:07:39", "remaining_time": "0:18:30", "throughput": 19873.85, "total_tokens": 223765568}
|
|
{"current_steps": 71100, "total_steps": 78105, "loss": 0.1231, "lr": 1.215520868470632e-07, "epoch": 4.551565200691377, "percentage": 91.03, "elapsed_time": "3:07:39", "remaining_time": "0:18:29", "throughput": 19874.06, "total_tokens": 223781568}
|
|
{"current_steps": 71105, "total_steps": 78105, "loss": 0.1464, "lr": 1.2138006889934411e-07, "epoch": 4.5518852826323535, "percentage": 91.04, "elapsed_time": "3:07:40", "remaining_time": "0:18:28", "throughput": 19874.24, "total_tokens": 223797184}
|
|
{"current_steps": 71110, "total_steps": 78105, "loss": 0.1143, "lr": 1.2120816972751553e-07, "epoch": 4.5522053645733305, "percentage": 91.04, "elapsed_time": "3:07:41", "remaining_time": "0:18:27", "throughput": 19874.43, "total_tokens": 223812672}
|
|
{"current_steps": 71115, "total_steps": 78105, "loss": 0.1232, "lr": 1.210363893401595e-07, "epoch": 4.5525254465143075, "percentage": 91.05, "elapsed_time": "3:07:42", "remaining_time": "0:18:26", "throughput": 19874.64, "total_tokens": 223828608}
|
|
{"current_steps": 71120, "total_steps": 78105, "loss": 0.1691, "lr": 1.2086472774585495e-07, "epoch": 4.5528455284552845, "percentage": 91.06, "elapsed_time": "3:07:42", "remaining_time": "0:18:26", "throughput": 19874.81, "total_tokens": 223843776}
|
|
{"current_steps": 71125, "total_steps": 78105, "loss": 0.1783, "lr": 1.206931849531734e-07, "epoch": 4.553165610396261, "percentage": 91.06, "elapsed_time": "3:07:43", "remaining_time": "0:18:25", "throughput": 19875.02, "total_tokens": 223859776}
|
|
{"current_steps": 71130, "total_steps": 78105, "loss": 0.1281, "lr": 1.2052176097068075e-07, "epoch": 4.553485692337238, "percentage": 91.07, "elapsed_time": "3:07:44", "remaining_time": "0:18:24", "throughput": 19875.21, "total_tokens": 223875264}
|
|
{"current_steps": 71135, "total_steps": 78105, "loss": 0.1319, "lr": 1.2035045580693737e-07, "epoch": 4.553805774278215, "percentage": 91.08, "elapsed_time": "3:07:44", "remaining_time": "0:18:23", "throughput": 19875.43, "total_tokens": 223891264}
|
|
{"current_steps": 71140, "total_steps": 78105, "loss": 0.177, "lr": 1.2017926947049647e-07, "epoch": 4.554125856219192, "percentage": 91.08, "elapsed_time": "3:07:45", "remaining_time": "0:18:22", "throughput": 19875.62, "total_tokens": 223906944}
|
|
{"current_steps": 71145, "total_steps": 78105, "loss": 0.0987, "lr": 1.2000820196990782e-07, "epoch": 4.554445938160169, "percentage": 91.09, "elapsed_time": "3:07:46", "remaining_time": "0:18:22", "throughput": 19875.79, "total_tokens": 223921728}
|
|
{"current_steps": 71150, "total_steps": 78105, "loss": 0.1643, "lr": 1.1983725331371182e-07, "epoch": 4.5547660201011455, "percentage": 91.1, "elapsed_time": "3:07:46", "remaining_time": "0:18:21", "throughput": 19876.0, "total_tokens": 223937664}
|
|
{"current_steps": 71155, "total_steps": 78105, "loss": 0.1039, "lr": 1.1966642351044583e-07, "epoch": 4.5550861020421225, "percentage": 91.1, "elapsed_time": "3:07:47", "remaining_time": "0:18:20", "throughput": 19876.23, "total_tokens": 223954368}
|
|
{"current_steps": 71160, "total_steps": 78105, "loss": 0.1078, "lr": 1.1949571256863997e-07, "epoch": 4.5554061839830995, "percentage": 91.11, "elapsed_time": "3:07:48", "remaining_time": "0:18:19", "throughput": 19876.42, "total_tokens": 223969728}
|
|
{"current_steps": 71165, "total_steps": 78105, "loss": 0.1415, "lr": 1.193251204968185e-07, "epoch": 4.5557262659240765, "percentage": 91.11, "elapsed_time": "3:07:48", "remaining_time": "0:18:18", "throughput": 19876.61, "total_tokens": 223985216}
|
|
{"current_steps": 71170, "total_steps": 78105, "loss": 0.1583, "lr": 1.1915464730350018e-07, "epoch": 4.5560463478650535, "percentage": 91.12, "elapsed_time": "3:07:49", "remaining_time": "0:18:18", "throughput": 19876.78, "total_tokens": 224000192}
|
|
{"current_steps": 71175, "total_steps": 78105, "loss": 0.0869, "lr": 1.1898429299719739e-07, "epoch": 4.5563664298060305, "percentage": 91.13, "elapsed_time": "3:07:50", "remaining_time": "0:18:17", "throughput": 19877.02, "total_tokens": 224017088}
|
|
{"current_steps": 71180, "total_steps": 78105, "loss": 0.1612, "lr": 1.1881405758641662e-07, "epoch": 4.5566865117470075, "percentage": 91.13, "elapsed_time": "3:07:50", "remaining_time": "0:18:16", "throughput": 19877.2, "total_tokens": 224032320}
|
|
{"current_steps": 71185, "total_steps": 78105, "loss": 0.1646, "lr": 1.1864394107965916e-07, "epoch": 4.557006593687984, "percentage": 91.14, "elapsed_time": "3:07:51", "remaining_time": "0:18:15", "throughput": 19877.39, "total_tokens": 224047872}
|
|
{"current_steps": 71190, "total_steps": 78105, "loss": 0.1223, "lr": 1.1847394348541846e-07, "epoch": 4.5573266756289605, "percentage": 91.15, "elapsed_time": "3:07:52", "remaining_time": "0:18:14", "throughput": 19877.61, "total_tokens": 224064064}
|
|
{"current_steps": 71195, "total_steps": 78105, "loss": 0.1125, "lr": 1.1830406481218443e-07, "epoch": 4.5576467575699375, "percentage": 91.15, "elapsed_time": "3:07:52", "remaining_time": "0:18:14", "throughput": 19877.77, "total_tokens": 224078528}
|
|
{"current_steps": 71200, "total_steps": 78105, "loss": 0.1145, "lr": 1.181343050684397e-07, "epoch": 4.5579668395109145, "percentage": 91.16, "elapsed_time": "3:07:53", "remaining_time": "0:18:13", "throughput": 19877.95, "total_tokens": 224093696}
|
|
{"current_steps": 71205, "total_steps": 78105, "loss": 0.0946, "lr": 1.1796466426266112e-07, "epoch": 4.5582869214518915, "percentage": 91.17, "elapsed_time": "3:07:54", "remaining_time": "0:18:12", "throughput": 19878.16, "total_tokens": 224109184}
|
|
{"current_steps": 71210, "total_steps": 78105, "loss": 0.104, "lr": 1.1779514240331969e-07, "epoch": 4.5586070033928685, "percentage": 91.17, "elapsed_time": "3:07:54", "remaining_time": "0:18:11", "throughput": 19878.32, "total_tokens": 224124160}
|
|
{"current_steps": 71215, "total_steps": 78105, "loss": 0.1091, "lr": 1.1762573949888002e-07, "epoch": 4.5589270853338455, "percentage": 91.18, "elapsed_time": "3:07:55", "remaining_time": "0:18:10", "throughput": 19878.59, "total_tokens": 224141568}
|
|
{"current_steps": 71220, "total_steps": 78105, "loss": 0.0917, "lr": 1.1745645555780227e-07, "epoch": 4.5592471672748225, "percentage": 91.18, "elapsed_time": "3:07:56", "remaining_time": "0:18:10", "throughput": 19878.77, "total_tokens": 224156992}
|
|
{"current_steps": 71225, "total_steps": 78105, "loss": 0.1227, "lr": 1.1728729058853806e-07, "epoch": 4.5595672492157995, "percentage": 91.19, "elapsed_time": "3:07:56", "remaining_time": "0:18:09", "throughput": 19878.94, "total_tokens": 224171840}
|
|
{"current_steps": 71230, "total_steps": 78105, "loss": 0.1402, "lr": 1.1711824459953641e-07, "epoch": 4.5598873311567765, "percentage": 91.2, "elapsed_time": "3:07:57", "remaining_time": "0:18:08", "throughput": 19879.18, "total_tokens": 224188544}
|
|
{"current_steps": 71235, "total_steps": 78105, "loss": 0.14, "lr": 1.1694931759923672e-07, "epoch": 4.5602074130977535, "percentage": 91.2, "elapsed_time": "3:07:58", "remaining_time": "0:18:07", "throughput": 19879.37, "total_tokens": 224203648}
|
|
{"current_steps": 71240, "total_steps": 78105, "loss": 0.1254, "lr": 1.1678050959607584e-07, "epoch": 4.56052749503873, "percentage": 91.21, "elapsed_time": "3:07:58", "remaining_time": "0:18:06", "throughput": 19879.56, "total_tokens": 224219328}
|
|
{"current_steps": 71245, "total_steps": 78105, "loss": 0.1012, "lr": 1.1661182059848231e-07, "epoch": 4.5608475769797066, "percentage": 91.22, "elapsed_time": "3:07:59", "remaining_time": "0:18:06", "throughput": 19879.79, "total_tokens": 224235712}
|
|
{"current_steps": 71250, "total_steps": 78105, "loss": 0.113, "lr": 1.1644325061487965e-07, "epoch": 4.5611676589206835, "percentage": 91.22, "elapsed_time": "3:08:00", "remaining_time": "0:18:05", "throughput": 19880.01, "total_tokens": 224252480}
|
|
{"current_steps": 71255, "total_steps": 78105, "loss": 0.1115, "lr": 1.1627479965368615e-07, "epoch": 4.5614877408616605, "percentage": 91.23, "elapsed_time": "3:08:00", "remaining_time": "0:18:04", "throughput": 19880.24, "total_tokens": 224268800}
|
|
{"current_steps": 71260, "total_steps": 78105, "loss": 0.0969, "lr": 1.161064677233123e-07, "epoch": 4.5618078228026375, "percentage": 91.24, "elapsed_time": "3:08:01", "remaining_time": "0:18:03", "throughput": 19880.45, "total_tokens": 224285120}
|
|
{"current_steps": 71265, "total_steps": 78105, "loss": 0.1204, "lr": 1.1593825483216442e-07, "epoch": 4.5621279047436145, "percentage": 91.24, "elapsed_time": "3:08:02", "remaining_time": "0:18:02", "throughput": 19880.61, "total_tokens": 224299776}
|
|
{"current_steps": 71270, "total_steps": 78105, "loss": 0.1214, "lr": 1.1577016098864191e-07, "epoch": 4.5624479866845915, "percentage": 91.25, "elapsed_time": "3:08:03", "remaining_time": "0:18:02", "throughput": 19880.82, "total_tokens": 224315968}
|
|
{"current_steps": 71275, "total_steps": 78105, "loss": 0.1247, "lr": 1.156021862011386e-07, "epoch": 4.5627680686255685, "percentage": 91.26, "elapsed_time": "3:08:03", "remaining_time": "0:18:01", "throughput": 19881.07, "total_tokens": 224333120}
|
|
{"current_steps": 71280, "total_steps": 78105, "loss": 0.1869, "lr": 1.1543433047804225e-07, "epoch": 4.563088150566545, "percentage": 91.26, "elapsed_time": "3:08:04", "remaining_time": "0:18:00", "throughput": 19881.25, "total_tokens": 224348352}
|
|
{"current_steps": 71285, "total_steps": 78105, "loss": 0.1544, "lr": 1.1526659382773475e-07, "epoch": 4.563408232507522, "percentage": 91.27, "elapsed_time": "3:08:05", "remaining_time": "0:17:59", "throughput": 19881.46, "total_tokens": 224364416}
|
|
{"current_steps": 71290, "total_steps": 78105, "loss": 0.0978, "lr": 1.150989762585919e-07, "epoch": 4.563728314448499, "percentage": 91.27, "elapsed_time": "3:08:05", "remaining_time": "0:17:58", "throughput": 19881.64, "total_tokens": 224379520}
|
|
{"current_steps": 71295, "total_steps": 78105, "loss": 0.1321, "lr": 1.1493147777898367e-07, "epoch": 4.564048396389476, "percentage": 91.28, "elapsed_time": "3:08:06", "remaining_time": "0:17:58", "throughput": 19881.82, "total_tokens": 224394368}
|
|
{"current_steps": 71300, "total_steps": 78105, "loss": 0.1334, "lr": 1.1476409839727365e-07, "epoch": 4.564368478330453, "percentage": 91.29, "elapsed_time": "3:08:07", "remaining_time": "0:17:57", "throughput": 19882.03, "total_tokens": 224410304}
|
|
{"current_steps": 71305, "total_steps": 78105, "loss": 0.1488, "lr": 1.1459683812182099e-07, "epoch": 4.5646885602714296, "percentage": 91.29, "elapsed_time": "3:08:07", "remaining_time": "0:17:56", "throughput": 19882.29, "total_tokens": 224427328}
|
|
{"current_steps": 71310, "total_steps": 78105, "loss": 0.1515, "lr": 1.144296969609765e-07, "epoch": 4.5650086422124065, "percentage": 91.3, "elapsed_time": "3:08:08", "remaining_time": "0:17:55", "throughput": 19882.49, "total_tokens": 224443328}
|
|
{"current_steps": 71315, "total_steps": 78105, "loss": 0.0841, "lr": 1.142626749230874e-07, "epoch": 4.5653287241533835, "percentage": 91.31, "elapsed_time": "3:08:09", "remaining_time": "0:17:54", "throughput": 19882.71, "total_tokens": 224459776}
|
|
{"current_steps": 71320, "total_steps": 78105, "loss": 0.1807, "lr": 1.1409577201649341e-07, "epoch": 4.5656488060943605, "percentage": 91.31, "elapsed_time": "3:08:09", "remaining_time": "0:17:54", "throughput": 19882.91, "total_tokens": 224474944}
|
|
{"current_steps": 71325, "total_steps": 78105, "loss": 0.1202, "lr": 1.1392898824952869e-07, "epoch": 4.5659688880353375, "percentage": 91.32, "elapsed_time": "3:08:10", "remaining_time": "0:17:53", "throughput": 19883.09, "total_tokens": 224490560}
|
|
{"current_steps": 71330, "total_steps": 78105, "loss": 0.1203, "lr": 1.1376232363052187e-07, "epoch": 4.566288969976314, "percentage": 91.33, "elapsed_time": "3:08:11", "remaining_time": "0:17:52", "throughput": 19883.26, "total_tokens": 224505280}
|
|
{"current_steps": 71335, "total_steps": 78105, "loss": 0.1206, "lr": 1.1359577816779488e-07, "epoch": 4.566609051917291, "percentage": 91.33, "elapsed_time": "3:08:11", "remaining_time": "0:17:51", "throughput": 19883.45, "total_tokens": 224520768}
|
|
{"current_steps": 71340, "total_steps": 78105, "loss": 0.1084, "lr": 1.1342935186966497e-07, "epoch": 4.566929133858268, "percentage": 91.34, "elapsed_time": "3:08:12", "remaining_time": "0:17:50", "throughput": 19883.66, "total_tokens": 224536960}
|
|
{"current_steps": 71345, "total_steps": 78105, "loss": 0.1134, "lr": 1.1326304474444134e-07, "epoch": 4.567249215799245, "percentage": 91.34, "elapsed_time": "3:08:13", "remaining_time": "0:17:50", "throughput": 19883.89, "total_tokens": 224553792}
|
|
{"current_steps": 71350, "total_steps": 78105, "loss": 0.1283, "lr": 1.1309685680042953e-07, "epoch": 4.567569297740222, "percentage": 91.35, "elapsed_time": "3:08:13", "remaining_time": "0:17:49", "throughput": 19884.08, "total_tokens": 224569280}
|
|
{"current_steps": 71355, "total_steps": 78105, "loss": 0.1325, "lr": 1.1293078804592767e-07, "epoch": 4.567889379681199, "percentage": 91.36, "elapsed_time": "3:08:14", "remaining_time": "0:17:48", "throughput": 19884.24, "total_tokens": 224583936}
|
|
{"current_steps": 71360, "total_steps": 78105, "loss": 0.1016, "lr": 1.1276483848922853e-07, "epoch": 4.568209461622176, "percentage": 91.36, "elapsed_time": "3:08:15", "remaining_time": "0:17:47", "throughput": 19884.46, "total_tokens": 224600192}
|
|
{"current_steps": 71365, "total_steps": 78105, "loss": 0.1563, "lr": 1.1259900813861885e-07, "epoch": 4.5685295435631526, "percentage": 91.37, "elapsed_time": "3:08:15", "remaining_time": "0:17:46", "throughput": 19884.63, "total_tokens": 224615424}
|
|
{"current_steps": 71370, "total_steps": 78105, "loss": 0.1742, "lr": 1.1243329700237864e-07, "epoch": 4.568849625504129, "percentage": 91.38, "elapsed_time": "3:08:16", "remaining_time": "0:17:46", "throughput": 19884.86, "total_tokens": 224631616}
|
|
{"current_steps": 71375, "total_steps": 78105, "loss": 0.1483, "lr": 1.122677050887841e-07, "epoch": 4.569169707445106, "percentage": 91.38, "elapsed_time": "3:08:17", "remaining_time": "0:17:45", "throughput": 19885.08, "total_tokens": 224647936}
|
|
{"current_steps": 71380, "total_steps": 78105, "loss": 0.1854, "lr": 1.1210223240610218e-07, "epoch": 4.569489789386083, "percentage": 91.39, "elapsed_time": "3:08:17", "remaining_time": "0:17:44", "throughput": 19885.23, "total_tokens": 224662208}
|
|
{"current_steps": 71385, "total_steps": 78105, "loss": 0.1174, "lr": 1.1193687896259742e-07, "epoch": 4.56980987132706, "percentage": 91.4, "elapsed_time": "3:08:18", "remaining_time": "0:17:43", "throughput": 19885.46, "total_tokens": 224678720}
|
|
{"current_steps": 71390, "total_steps": 78105, "loss": 0.143, "lr": 1.1177164476652569e-07, "epoch": 4.570129953268037, "percentage": 91.4, "elapsed_time": "3:08:19", "remaining_time": "0:17:42", "throughput": 19885.63, "total_tokens": 224693824}
|
|
{"current_steps": 71395, "total_steps": 78105, "loss": 0.1392, "lr": 1.1160652982613818e-07, "epoch": 4.570450035209014, "percentage": 91.41, "elapsed_time": "3:08:19", "remaining_time": "0:17:42", "throughput": 19885.82, "total_tokens": 224709120}
|
|
{"current_steps": 71400, "total_steps": 78105, "loss": 0.1682, "lr": 1.1144153414967967e-07, "epoch": 4.570770117149991, "percentage": 91.42, "elapsed_time": "3:08:20", "remaining_time": "0:17:41", "throughput": 19885.99, "total_tokens": 224724288}
|
|
{"current_steps": 71405, "total_steps": 78105, "loss": 0.1275, "lr": 1.112766577453897e-07, "epoch": 4.571090199090968, "percentage": 91.42, "elapsed_time": "3:08:21", "remaining_time": "0:17:40", "throughput": 19886.17, "total_tokens": 224739584}
|
|
{"current_steps": 71410, "total_steps": 78105, "loss": 0.1433, "lr": 1.1111190062150057e-07, "epoch": 4.571410281031945, "percentage": 91.43, "elapsed_time": "3:08:21", "remaining_time": "0:17:39", "throughput": 19886.38, "total_tokens": 224755136}
|
|
{"current_steps": 71415, "total_steps": 78105, "loss": 0.1184, "lr": 1.1094726278624068e-07, "epoch": 4.571730362972921, "percentage": 91.43, "elapsed_time": "3:08:22", "remaining_time": "0:17:38", "throughput": 19886.59, "total_tokens": 224771008}
|
|
{"current_steps": 71420, "total_steps": 78105, "loss": 0.1282, "lr": 1.1078274424782959e-07, "epoch": 4.572050444913898, "percentage": 91.44, "elapsed_time": "3:08:23", "remaining_time": "0:17:38", "throughput": 19886.76, "total_tokens": 224786176}
|
|
{"current_steps": 71425, "total_steps": 78105, "loss": 0.1075, "lr": 1.1061834501448376e-07, "epoch": 4.572370526854875, "percentage": 91.45, "elapsed_time": "3:08:23", "remaining_time": "0:17:37", "throughput": 19886.95, "total_tokens": 224801600}
|
|
{"current_steps": 71430, "total_steps": 78105, "loss": 0.0999, "lr": 1.1045406509441164e-07, "epoch": 4.572690608795852, "percentage": 91.45, "elapsed_time": "3:08:24", "remaining_time": "0:17:36", "throughput": 19887.12, "total_tokens": 224816576}
|
|
{"current_steps": 71435, "total_steps": 78105, "loss": 0.1159, "lr": 1.1028990449581694e-07, "epoch": 4.573010690736829, "percentage": 91.46, "elapsed_time": "3:08:25", "remaining_time": "0:17:35", "throughput": 19887.29, "total_tokens": 224831104}
|
|
{"current_steps": 71440, "total_steps": 78105, "loss": 0.1193, "lr": 1.10125863226897e-07, "epoch": 4.573330772677806, "percentage": 91.47, "elapsed_time": "3:08:25", "remaining_time": "0:17:34", "throughput": 19887.43, "total_tokens": 224845376}
|
|
{"current_steps": 71445, "total_steps": 78105, "loss": 0.1172, "lr": 1.0996194129584276e-07, "epoch": 4.573650854618783, "percentage": 91.47, "elapsed_time": "3:08:26", "remaining_time": "0:17:33", "throughput": 19887.62, "total_tokens": 224860992}
|
|
{"current_steps": 71450, "total_steps": 78105, "loss": 0.1335, "lr": 1.0979813871084044e-07, "epoch": 4.57397093655976, "percentage": 91.48, "elapsed_time": "3:08:27", "remaining_time": "0:17:33", "throughput": 19887.8, "total_tokens": 224876672}
|
|
{"current_steps": 71455, "total_steps": 78105, "loss": 0.1455, "lr": 1.0963445548006824e-07, "epoch": 4.574291018500736, "percentage": 91.49, "elapsed_time": "3:08:27", "remaining_time": "0:17:32", "throughput": 19888.0, "total_tokens": 224892608}
|
|
{"current_steps": 71460, "total_steps": 78105, "loss": 0.1401, "lr": 1.0947089161170099e-07, "epoch": 4.574611100441713, "percentage": 91.49, "elapsed_time": "3:08:28", "remaining_time": "0:17:31", "throughput": 19888.19, "total_tokens": 224907968}
|
|
{"current_steps": 71465, "total_steps": 78105, "loss": 0.0782, "lr": 1.0930744711390523e-07, "epoch": 4.57493118238269, "percentage": 91.5, "elapsed_time": "3:08:29", "remaining_time": "0:17:30", "throughput": 19888.35, "total_tokens": 224922496}
|
|
{"current_steps": 71470, "total_steps": 78105, "loss": 0.1133, "lr": 1.0914412199484303e-07, "epoch": 4.575251264323667, "percentage": 91.51, "elapsed_time": "3:08:29", "remaining_time": "0:17:29", "throughput": 19888.53, "total_tokens": 224937856}
|
|
{"current_steps": 71475, "total_steps": 78105, "loss": 0.1167, "lr": 1.0898091626266983e-07, "epoch": 4.575571346264644, "percentage": 91.51, "elapsed_time": "3:08:30", "remaining_time": "0:17:29", "throughput": 19888.74, "total_tokens": 224953600}
|
|
{"current_steps": 71480, "total_steps": 78105, "loss": 0.1403, "lr": 1.0881782992553546e-07, "epoch": 4.575891428205621, "percentage": 91.52, "elapsed_time": "3:08:31", "remaining_time": "0:17:28", "throughput": 19888.98, "total_tokens": 224970432}
|
|
{"current_steps": 71485, "total_steps": 78105, "loss": 0.1316, "lr": 1.0865486299158317e-07, "epoch": 4.576211510146598, "percentage": 91.52, "elapsed_time": "3:08:31", "remaining_time": "0:17:27", "throughput": 19889.18, "total_tokens": 224986304}
|
|
{"current_steps": 71490, "total_steps": 78105, "loss": 0.1415, "lr": 1.0849201546895089e-07, "epoch": 4.576531592087575, "percentage": 91.53, "elapsed_time": "3:08:32", "remaining_time": "0:17:26", "throughput": 19889.37, "total_tokens": 225001536}
|
|
{"current_steps": 71495, "total_steps": 78105, "loss": 0.2262, "lr": 1.083292873657707e-07, "epoch": 4.576851674028552, "percentage": 91.54, "elapsed_time": "3:08:33", "remaining_time": "0:17:25", "throughput": 19889.54, "total_tokens": 225016384}
|
|
{"current_steps": 71500, "total_steps": 78105, "loss": 0.1604, "lr": 1.0816667869016806e-07, "epoch": 4.577171755969529, "percentage": 91.54, "elapsed_time": "3:08:33", "remaining_time": "0:17:25", "throughput": 19889.71, "total_tokens": 225031296}
|
|
{"current_steps": 71505, "total_steps": 78105, "loss": 0.1623, "lr": 1.0800418945026314e-07, "epoch": 4.577491837910505, "percentage": 91.55, "elapsed_time": "3:08:34", "remaining_time": "0:17:24", "throughput": 19889.91, "total_tokens": 225047168}
|
|
{"current_steps": 71510, "total_steps": 78105, "loss": 0.14, "lr": 1.0784181965416918e-07, "epoch": 4.577811919851482, "percentage": 91.56, "elapsed_time": "3:08:35", "remaining_time": "0:17:23", "throughput": 19890.1, "total_tokens": 225062720}
|
|
{"current_steps": 71515, "total_steps": 78105, "loss": 0.1263, "lr": 1.0767956930999468e-07, "epoch": 4.578132001792459, "percentage": 91.56, "elapsed_time": "3:08:35", "remaining_time": "0:17:22", "throughput": 19890.31, "total_tokens": 225078720}
|
|
{"current_steps": 71520, "total_steps": 78105, "loss": 0.1385, "lr": 1.0751743842584123e-07, "epoch": 4.578452083733436, "percentage": 91.57, "elapsed_time": "3:08:36", "remaining_time": "0:17:21", "throughput": 19890.52, "total_tokens": 225094848}
|
|
{"current_steps": 71525, "total_steps": 78105, "loss": 0.1383, "lr": 1.0735542700980512e-07, "epoch": 4.578772165674413, "percentage": 91.58, "elapsed_time": "3:08:37", "remaining_time": "0:17:21", "throughput": 19890.72, "total_tokens": 225110528}
|
|
{"current_steps": 71530, "total_steps": 78105, "loss": 0.1028, "lr": 1.071935350699757e-07, "epoch": 4.57909224761539, "percentage": 91.58, "elapsed_time": "3:08:38", "remaining_time": "0:17:20", "throughput": 19890.89, "total_tokens": 225125504}
|
|
{"current_steps": 71535, "total_steps": 78105, "loss": 0.1912, "lr": 1.070317626144382e-07, "epoch": 4.579412329556367, "percentage": 91.59, "elapsed_time": "3:08:38", "remaining_time": "0:17:19", "throughput": 19891.08, "total_tokens": 225141312}
|
|
{"current_steps": 71540, "total_steps": 78105, "loss": 0.1729, "lr": 1.0687010965126921e-07, "epoch": 4.579732411497344, "percentage": 91.59, "elapsed_time": "3:08:39", "remaining_time": "0:17:18", "throughput": 19891.27, "total_tokens": 225156544}
|
|
{"current_steps": 71545, "total_steps": 78105, "loss": 0.0871, "lr": 1.0670857618854197e-07, "epoch": 4.58005249343832, "percentage": 91.6, "elapsed_time": "3:08:40", "remaining_time": "0:17:17", "throughput": 19891.48, "total_tokens": 225172800}
|
|
{"current_steps": 71550, "total_steps": 78105, "loss": 0.1186, "lr": 1.0654716223432199e-07, "epoch": 4.580372575379297, "percentage": 91.61, "elapsed_time": "3:08:40", "remaining_time": "0:17:17", "throughput": 19891.65, "total_tokens": 225187648}
|
|
{"current_steps": 71555, "total_steps": 78105, "loss": 0.112, "lr": 1.0638586779666976e-07, "epoch": 4.580692657320274, "percentage": 91.61, "elapsed_time": "3:08:41", "remaining_time": "0:17:16", "throughput": 19891.83, "total_tokens": 225202752}
|
|
{"current_steps": 71560, "total_steps": 78105, "loss": 0.1416, "lr": 1.0622469288363968e-07, "epoch": 4.581012739261251, "percentage": 91.62, "elapsed_time": "3:08:42", "remaining_time": "0:17:15", "throughput": 19892.03, "total_tokens": 225218240}
|
|
{"current_steps": 71565, "total_steps": 78105, "loss": 0.1091, "lr": 1.0606363750327947e-07, "epoch": 4.581332821202228, "percentage": 91.63, "elapsed_time": "3:08:42", "remaining_time": "0:17:14", "throughput": 19892.34, "total_tokens": 225236992}
|
|
{"current_steps": 71570, "total_steps": 78105, "loss": 0.1602, "lr": 1.0590270166363187e-07, "epoch": 4.581652903143205, "percentage": 91.63, "elapsed_time": "3:08:43", "remaining_time": "0:17:13", "throughput": 19892.52, "total_tokens": 225252416}
|
|
{"current_steps": 71575, "total_steps": 78105, "loss": 0.1033, "lr": 1.0574188537273267e-07, "epoch": 4.581972985084182, "percentage": 91.64, "elapsed_time": "3:08:44", "remaining_time": "0:17:13", "throughput": 19892.71, "total_tokens": 225268160}
|
|
{"current_steps": 71580, "total_steps": 78105, "loss": 0.1463, "lr": 1.0558118863861266e-07, "epoch": 4.582293067025159, "percentage": 91.65, "elapsed_time": "3:08:44", "remaining_time": "0:17:12", "throughput": 19892.88, "total_tokens": 225282944}
|
|
{"current_steps": 71585, "total_steps": 78105, "loss": 0.1086, "lr": 1.0542061146929599e-07, "epoch": 4.582613148966136, "percentage": 91.65, "elapsed_time": "3:08:45", "remaining_time": "0:17:11", "throughput": 19893.07, "total_tokens": 225298432}
|
|
{"current_steps": 71590, "total_steps": 78105, "loss": 0.1555, "lr": 1.0526015387280098e-07, "epoch": 4.582933230907113, "percentage": 91.66, "elapsed_time": "3:08:46", "remaining_time": "0:17:10", "throughput": 19893.26, "total_tokens": 225313984}
|
|
{"current_steps": 71595, "total_steps": 78105, "loss": 0.0699, "lr": 1.0509981585714035e-07, "epoch": 4.583253312848089, "percentage": 91.67, "elapsed_time": "3:08:46", "remaining_time": "0:17:09", "throughput": 19893.42, "total_tokens": 225328768}
|
|
{"current_steps": 71600, "total_steps": 78105, "loss": 0.1617, "lr": 1.0493959743031995e-07, "epoch": 4.583573394789066, "percentage": 91.67, "elapsed_time": "3:08:47", "remaining_time": "0:17:09", "throughput": 19893.6, "total_tokens": 225343808}
|
|
{"current_steps": 71605, "total_steps": 78105, "loss": 0.1486, "lr": 1.0477949860034115e-07, "epoch": 4.583893476730043, "percentage": 91.68, "elapsed_time": "3:08:48", "remaining_time": "0:17:08", "throughput": 19893.79, "total_tokens": 225359360}
|
|
{"current_steps": 71610, "total_steps": 78105, "loss": 0.1467, "lr": 1.0461951937519726e-07, "epoch": 4.58421355867102, "percentage": 91.68, "elapsed_time": "3:08:48", "remaining_time": "0:17:07", "throughput": 19893.97, "total_tokens": 225374848}
|
|
{"current_steps": 71615, "total_steps": 78105, "loss": 0.1638, "lr": 1.0445965976287803e-07, "epoch": 4.584533640611997, "percentage": 91.69, "elapsed_time": "3:08:49", "remaining_time": "0:17:06", "throughput": 19894.15, "total_tokens": 225390144}
|
|
{"current_steps": 71620, "total_steps": 78105, "loss": 0.1237, "lr": 1.0429991977136539e-07, "epoch": 4.584853722552974, "percentage": 91.7, "elapsed_time": "3:08:50", "remaining_time": "0:17:05", "throughput": 19894.31, "total_tokens": 225405056}
|
|
{"current_steps": 71625, "total_steps": 78105, "loss": 0.1357, "lr": 1.0414029940863601e-07, "epoch": 4.585173804493951, "percentage": 91.7, "elapsed_time": "3:08:50", "remaining_time": "0:17:05", "throughput": 19894.5, "total_tokens": 225420864}
|
|
{"current_steps": 71630, "total_steps": 78105, "loss": 0.1428, "lr": 1.0398079868266048e-07, "epoch": 4.585493886434928, "percentage": 91.71, "elapsed_time": "3:08:51", "remaining_time": "0:17:04", "throughput": 19894.78, "total_tokens": 225439104}
|
|
{"current_steps": 71635, "total_steps": 78105, "loss": 0.1331, "lr": 1.0382141760140352e-07, "epoch": 4.585813968375904, "percentage": 91.72, "elapsed_time": "3:08:52", "remaining_time": "0:17:03", "throughput": 19895.0, "total_tokens": 225455104}
|
|
{"current_steps": 71640, "total_steps": 78105, "loss": 0.1329, "lr": 1.036621561728235e-07, "epoch": 4.586134050316881, "percentage": 91.72, "elapsed_time": "3:08:52", "remaining_time": "0:17:02", "throughput": 19895.15, "total_tokens": 225469440}
|
|
{"current_steps": 71645, "total_steps": 78105, "loss": 0.1366, "lr": 1.0350301440487376e-07, "epoch": 4.586454132257858, "percentage": 91.73, "elapsed_time": "3:08:53", "remaining_time": "0:17:01", "throughput": 19895.33, "total_tokens": 225484800}
|
|
{"current_steps": 71650, "total_steps": 78105, "loss": 0.1327, "lr": 1.033439923054999e-07, "epoch": 4.586774214198835, "percentage": 91.74, "elapsed_time": "3:08:54", "remaining_time": "0:17:01", "throughput": 19895.49, "total_tokens": 225499648}
|
|
{"current_steps": 71655, "total_steps": 78105, "loss": 0.111, "lr": 1.031850898826442e-07, "epoch": 4.587094296139812, "percentage": 91.74, "elapsed_time": "3:08:54", "remaining_time": "0:17:00", "throughput": 19895.68, "total_tokens": 225515008}
|
|
{"current_steps": 71660, "total_steps": 78105, "loss": 0.2199, "lr": 1.0302630714423972e-07, "epoch": 4.587414378080789, "percentage": 91.75, "elapsed_time": "3:08:55", "remaining_time": "0:16:59", "throughput": 19895.86, "total_tokens": 225530304}
|
|
{"current_steps": 71665, "total_steps": 78105, "loss": 0.0972, "lr": 1.0286764409821654e-07, "epoch": 4.587734460021766, "percentage": 91.75, "elapsed_time": "3:08:56", "remaining_time": "0:16:58", "throughput": 19896.04, "total_tokens": 225545664}
|
|
{"current_steps": 71670, "total_steps": 78105, "loss": 0.204, "lr": 1.027091007524969e-07, "epoch": 4.588054541962743, "percentage": 91.76, "elapsed_time": "3:08:56", "remaining_time": "0:16:57", "throughput": 19896.22, "total_tokens": 225561088}
|
|
{"current_steps": 71675, "total_steps": 78105, "loss": 0.1343, "lr": 1.0255067711499756e-07, "epoch": 4.58837462390372, "percentage": 91.77, "elapsed_time": "3:08:57", "remaining_time": "0:16:57", "throughput": 19896.44, "total_tokens": 225577216}
|
|
{"current_steps": 71680, "total_steps": 78105, "loss": 0.1733, "lr": 1.0239237319363021e-07, "epoch": 4.588694705844697, "percentage": 91.77, "elapsed_time": "3:08:58", "remaining_time": "0:16:56", "throughput": 19896.62, "total_tokens": 225592512}
|
|
{"current_steps": 71685, "total_steps": 78105, "loss": 0.1738, "lr": 1.0223418899629828e-07, "epoch": 4.589014787785673, "percentage": 91.78, "elapsed_time": "3:08:58", "remaining_time": "0:16:55", "throughput": 19896.82, "total_tokens": 225608320}
|
|
{"current_steps": 71690, "total_steps": 78105, "loss": 0.143, "lr": 1.0207612453090182e-07, "epoch": 4.58933486972665, "percentage": 91.79, "elapsed_time": "3:08:59", "remaining_time": "0:16:54", "throughput": 19897.03, "total_tokens": 225624128}
|
|
{"current_steps": 71695, "total_steps": 78105, "loss": 0.0987, "lr": 1.0191817980533315e-07, "epoch": 4.589654951667627, "percentage": 91.79, "elapsed_time": "3:09:00", "remaining_time": "0:16:53", "throughput": 19897.23, "total_tokens": 225640320}
|
|
{"current_steps": 71700, "total_steps": 78105, "loss": 0.1262, "lr": 1.0176035482747981e-07, "epoch": 4.589975033608604, "percentage": 91.8, "elapsed_time": "3:09:00", "remaining_time": "0:16:53", "throughput": 19897.42, "total_tokens": 225655808}
|
|
{"current_steps": 71705, "total_steps": 78105, "loss": 0.1595, "lr": 1.016026496052222e-07, "epoch": 4.590295115549581, "percentage": 91.81, "elapsed_time": "3:09:01", "remaining_time": "0:16:52", "throughput": 19897.59, "total_tokens": 225670912}
|
|
{"current_steps": 71710, "total_steps": 78105, "loss": 0.1142, "lr": 1.014450641464354e-07, "epoch": 4.590615197490558, "percentage": 91.81, "elapsed_time": "3:09:02", "remaining_time": "0:16:51", "throughput": 19897.8, "total_tokens": 225687104}
|
|
{"current_steps": 71715, "total_steps": 78105, "loss": 0.1299, "lr": 1.0128759845898838e-07, "epoch": 4.590935279431535, "percentage": 91.82, "elapsed_time": "3:09:02", "remaining_time": "0:16:50", "throughput": 19897.97, "total_tokens": 225702080}
|
|
{"current_steps": 71720, "total_steps": 78105, "loss": 0.0972, "lr": 1.0113025255074432e-07, "epoch": 4.591255361372511, "percentage": 91.83, "elapsed_time": "3:09:03", "remaining_time": "0:16:49", "throughput": 19898.14, "total_tokens": 225717056}
|
|
{"current_steps": 71725, "total_steps": 78105, "loss": 0.1374, "lr": 1.0097302642956025e-07, "epoch": 4.591575443313488, "percentage": 91.83, "elapsed_time": "3:09:04", "remaining_time": "0:16:49", "throughput": 19898.31, "total_tokens": 225732224}
|
|
{"current_steps": 71730, "total_steps": 78105, "loss": 0.1285, "lr": 1.0081592010328711e-07, "epoch": 4.591895525254465, "percentage": 91.84, "elapsed_time": "3:09:04", "remaining_time": "0:16:48", "throughput": 19898.52, "total_tokens": 225748352}
|
|
{"current_steps": 71735, "total_steps": 78105, "loss": 0.118, "lr": 1.006589335797703e-07, "epoch": 4.592215607195442, "percentage": 91.84, "elapsed_time": "3:09:05", "remaining_time": "0:16:47", "throughput": 19898.72, "total_tokens": 225763840}
|
|
{"current_steps": 71740, "total_steps": 78105, "loss": 0.1691, "lr": 1.0050206686684827e-07, "epoch": 4.592535689136419, "percentage": 91.85, "elapsed_time": "3:09:06", "remaining_time": "0:16:46", "throughput": 19898.91, "total_tokens": 225779200}
|
|
{"current_steps": 71745, "total_steps": 78105, "loss": 0.1638, "lr": 1.0034531997235475e-07, "epoch": 4.592855771077396, "percentage": 91.86, "elapsed_time": "3:09:06", "remaining_time": "0:16:45", "throughput": 19899.09, "total_tokens": 225794496}
|
|
{"current_steps": 71750, "total_steps": 78105, "loss": 0.1313, "lr": 1.0018869290411654e-07, "epoch": 4.593175853018373, "percentage": 91.86, "elapsed_time": "3:09:07", "remaining_time": "0:16:45", "throughput": 19899.35, "total_tokens": 225811904}
|
|
{"current_steps": 71755, "total_steps": 78105, "loss": 0.1251, "lr": 1.0003218566995487e-07, "epoch": 4.59349593495935, "percentage": 91.87, "elapsed_time": "3:09:08", "remaining_time": "0:16:44", "throughput": 19899.59, "total_tokens": 225828288}
|
|
{"current_steps": 71760, "total_steps": 78105, "loss": 0.1297, "lr": 9.987579827768462e-08, "epoch": 4.593816016900327, "percentage": 91.88, "elapsed_time": "3:09:09", "remaining_time": "0:16:43", "throughput": 19899.78, "total_tokens": 225843904}
|
|
{"current_steps": 71765, "total_steps": 78105, "loss": 0.1375, "lr": 9.971953073511565e-08, "epoch": 4.594136098841304, "percentage": 91.88, "elapsed_time": "3:09:09", "remaining_time": "0:16:42", "throughput": 19899.99, "total_tokens": 225860032}
|
|
{"current_steps": 71770, "total_steps": 78105, "loss": 0.0914, "lr": 9.95633830500503e-08, "epoch": 4.59445618078228, "percentage": 91.89, "elapsed_time": "3:09:10", "remaining_time": "0:16:41", "throughput": 19900.16, "total_tokens": 225875072}
|
|
{"current_steps": 71775, "total_steps": 78105, "loss": 0.1657, "lr": 9.940735523028628e-08, "epoch": 4.594776262723257, "percentage": 91.9, "elapsed_time": "3:09:11", "remaining_time": "0:16:41", "throughput": 19900.37, "total_tokens": 225891392}
|
|
{"current_steps": 71780, "total_steps": 78105, "loss": 0.1509, "lr": 9.92514472836148e-08, "epoch": 4.595096344664234, "percentage": 91.9, "elapsed_time": "3:09:11", "remaining_time": "0:16:40", "throughput": 19900.54, "total_tokens": 225906176}
|
|
{"current_steps": 71785, "total_steps": 78105, "loss": 0.1462, "lr": 9.909565921782077e-08, "epoch": 4.595416426605211, "percentage": 91.91, "elapsed_time": "3:09:12", "remaining_time": "0:16:39", "throughput": 19900.81, "total_tokens": 225923520}
|
|
{"current_steps": 71790, "total_steps": 78105, "loss": 0.1051, "lr": 9.893999104068408e-08, "epoch": 4.595736508546188, "percentage": 91.91, "elapsed_time": "3:09:13", "remaining_time": "0:16:38", "throughput": 19901.04, "total_tokens": 225939968}
|
|
{"current_steps": 71795, "total_steps": 78105, "loss": 0.1622, "lr": 9.878444275997712e-08, "epoch": 4.596056590487165, "percentage": 91.92, "elapsed_time": "3:09:13", "remaining_time": "0:16:37", "throughput": 19901.2, "total_tokens": 225955136}
|
|
{"current_steps": 71800, "total_steps": 78105, "loss": 0.1444, "lr": 9.862901438346839e-08, "epoch": 4.596376672428142, "percentage": 91.93, "elapsed_time": "3:09:14", "remaining_time": "0:16:37", "throughput": 19901.38, "total_tokens": 225970432}
|
|
{"current_steps": 71805, "total_steps": 78105, "loss": 0.0974, "lr": 9.847370591891752e-08, "epoch": 4.596696754369119, "percentage": 91.93, "elapsed_time": "3:09:15", "remaining_time": "0:16:36", "throughput": 19901.6, "total_tokens": 225986816}
|
|
{"current_steps": 71810, "total_steps": 78105, "loss": 0.1092, "lr": 9.83185173740811e-08, "epoch": 4.597016836310095, "percentage": 91.94, "elapsed_time": "3:09:15", "remaining_time": "0:16:35", "throughput": 19901.8, "total_tokens": 226002688}
|
|
{"current_steps": 71815, "total_steps": 78105, "loss": 0.1662, "lr": 9.816344875670791e-08, "epoch": 4.597336918251072, "percentage": 91.95, "elapsed_time": "3:09:16", "remaining_time": "0:16:34", "throughput": 19901.97, "total_tokens": 226017728}
|
|
{"current_steps": 71820, "total_steps": 78105, "loss": 0.0916, "lr": 9.80085000745415e-08, "epoch": 4.597657000192049, "percentage": 91.95, "elapsed_time": "3:09:17", "remaining_time": "0:16:33", "throughput": 19902.16, "total_tokens": 226033280}
|
|
{"current_steps": 71825, "total_steps": 78105, "loss": 0.1357, "lr": 9.785367133531898e-08, "epoch": 4.597977082133026, "percentage": 91.96, "elapsed_time": "3:09:17", "remaining_time": "0:16:33", "throughput": 19902.36, "total_tokens": 226049472}
|
|
{"current_steps": 71830, "total_steps": 78105, "loss": 0.1376, "lr": 9.76989625467717e-08, "epoch": 4.598297164074003, "percentage": 91.97, "elapsed_time": "3:09:18", "remaining_time": "0:16:32", "throughput": 19902.59, "total_tokens": 226065792}
|
|
{"current_steps": 71835, "total_steps": 78105, "loss": 0.0953, "lr": 9.75443737166254e-08, "epoch": 4.59861724601498, "percentage": 91.97, "elapsed_time": "3:09:19", "remaining_time": "0:16:31", "throughput": 19902.84, "total_tokens": 226082944}
|
|
{"current_steps": 71840, "total_steps": 78105, "loss": 0.1213, "lr": 9.738990485259864e-08, "epoch": 4.598937327955957, "percentage": 91.98, "elapsed_time": "3:09:19", "remaining_time": "0:16:30", "throughput": 19903.03, "total_tokens": 226098368}
|
|
{"current_steps": 71845, "total_steps": 78105, "loss": 0.1013, "lr": 9.723555596240553e-08, "epoch": 4.599257409896934, "percentage": 91.99, "elapsed_time": "3:09:20", "remaining_time": "0:16:29", "throughput": 19903.21, "total_tokens": 226113280}
|
|
{"current_steps": 71850, "total_steps": 78105, "loss": 0.1224, "lr": 9.708132705375351e-08, "epoch": 4.599577491837911, "percentage": 91.99, "elapsed_time": "3:09:21", "remaining_time": "0:16:29", "throughput": 19903.38, "total_tokens": 226128512}
|
|
{"current_steps": 71855, "total_steps": 78105, "loss": 0.1327, "lr": 9.692721813434336e-08, "epoch": 4.599897573778888, "percentage": 92.0, "elapsed_time": "3:09:22", "remaining_time": "0:16:28", "throughput": 19903.58, "total_tokens": 226144640}
|
|
{"current_steps": 71860, "total_steps": 78105, "loss": 0.1267, "lr": 9.677322921187088e-08, "epoch": 4.600217655719864, "percentage": 92.0, "elapsed_time": "3:09:22", "remaining_time": "0:16:27", "throughput": 19903.76, "total_tokens": 226159488}
|
|
{"current_steps": 71865, "total_steps": 78105, "loss": 0.1427, "lr": 9.661936029402546e-08, "epoch": 4.600537737660841, "percentage": 92.01, "elapsed_time": "3:09:23", "remaining_time": "0:16:26", "throughput": 19904.0, "total_tokens": 226176448}
|
|
{"current_steps": 71870, "total_steps": 78105, "loss": 0.1368, "lr": 9.646561138849014e-08, "epoch": 4.600857819601818, "percentage": 92.02, "elapsed_time": "3:09:24", "remaining_time": "0:16:25", "throughput": 19904.2, "total_tokens": 226192576}
|
|
{"current_steps": 71875, "total_steps": 78105, "loss": 0.1371, "lr": 9.631198250294294e-08, "epoch": 4.601177901542795, "percentage": 92.02, "elapsed_time": "3:09:24", "remaining_time": "0:16:25", "throughput": 19904.34, "total_tokens": 226206912}
|
|
{"current_steps": 71880, "total_steps": 78105, "loss": 0.1285, "lr": 9.615847364505465e-08, "epoch": 4.601497983483772, "percentage": 92.03, "elapsed_time": "3:09:25", "remaining_time": "0:16:24", "throughput": 19904.52, "total_tokens": 226222208}
|
|
{"current_steps": 71885, "total_steps": 78105, "loss": 0.129, "lr": 9.600508482249166e-08, "epoch": 4.601818065424749, "percentage": 92.04, "elapsed_time": "3:09:26", "remaining_time": "0:16:23", "throughput": 19904.77, "total_tokens": 226239168}
|
|
{"current_steps": 71890, "total_steps": 78105, "loss": 0.1246, "lr": 9.585181604291228e-08, "epoch": 4.602138147365726, "percentage": 92.04, "elapsed_time": "3:09:26", "remaining_time": "0:16:22", "throughput": 19904.95, "total_tokens": 226254656}
|
|
{"current_steps": 71895, "total_steps": 78105, "loss": 0.1405, "lr": 9.569866731397038e-08, "epoch": 4.602458229306703, "percentage": 92.05, "elapsed_time": "3:09:27", "remaining_time": "0:16:21", "throughput": 19905.11, "total_tokens": 226269376}
|
|
{"current_steps": 71900, "total_steps": 78105, "loss": 0.1312, "lr": 9.554563864331373e-08, "epoch": 4.602778311247679, "percentage": 92.06, "elapsed_time": "3:09:28", "remaining_time": "0:16:21", "throughput": 19905.3, "total_tokens": 226284416}
|
|
{"current_steps": 71905, "total_steps": 78105, "loss": 0.143, "lr": 9.539273003858318e-08, "epoch": 4.603098393188656, "percentage": 92.06, "elapsed_time": "3:09:28", "remaining_time": "0:16:20", "throughput": 19905.49, "total_tokens": 226300224}
|
|
{"current_steps": 71910, "total_steps": 78105, "loss": 0.1151, "lr": 9.523994150741511e-08, "epoch": 4.603418475129633, "percentage": 92.07, "elapsed_time": "3:09:29", "remaining_time": "0:16:19", "throughput": 19905.68, "total_tokens": 226315840}
|
|
{"current_steps": 71915, "total_steps": 78105, "loss": 0.0849, "lr": 9.508727305743815e-08, "epoch": 4.60373855707061, "percentage": 92.07, "elapsed_time": "3:09:30", "remaining_time": "0:16:18", "throughput": 19905.84, "total_tokens": 226330752}
|
|
{"current_steps": 71920, "total_steps": 78105, "loss": 0.1479, "lr": 9.49347246962759e-08, "epoch": 4.604058639011587, "percentage": 92.08, "elapsed_time": "3:09:30", "remaining_time": "0:16:17", "throughput": 19906.04, "total_tokens": 226346560}
|
|
{"current_steps": 71925, "total_steps": 78105, "loss": 0.1116, "lr": 9.478229643154618e-08, "epoch": 4.604378720952564, "percentage": 92.09, "elapsed_time": "3:09:31", "remaining_time": "0:16:17", "throughput": 19906.21, "total_tokens": 226361408}
|
|
{"current_steps": 71930, "total_steps": 78105, "loss": 0.1724, "lr": 9.462998827086012e-08, "epoch": 4.604698802893541, "percentage": 92.09, "elapsed_time": "3:09:32", "remaining_time": "0:16:16", "throughput": 19906.44, "total_tokens": 226378240}
|
|
{"current_steps": 71935, "total_steps": 78105, "loss": 0.109, "lr": 9.447780022182357e-08, "epoch": 4.605018884834518, "percentage": 92.1, "elapsed_time": "3:09:32", "remaining_time": "0:16:15", "throughput": 19906.64, "total_tokens": 226393856}
|
|
{"current_steps": 71940, "total_steps": 78105, "loss": 0.1899, "lr": 9.432573229203573e-08, "epoch": 4.605338966775495, "percentage": 92.11, "elapsed_time": "3:09:33", "remaining_time": "0:16:14", "throughput": 19906.84, "total_tokens": 226409600}
|
|
{"current_steps": 71945, "total_steps": 78105, "loss": 0.1598, "lr": 9.417378448908998e-08, "epoch": 4.605659048716472, "percentage": 92.11, "elapsed_time": "3:09:34", "remaining_time": "0:16:13", "throughput": 19907.05, "total_tokens": 226425792}
|
|
{"current_steps": 71950, "total_steps": 78105, "loss": 0.1339, "lr": 9.402195682057385e-08, "epoch": 4.605979130657448, "percentage": 92.12, "elapsed_time": "3:09:34", "remaining_time": "0:16:13", "throughput": 19907.23, "total_tokens": 226441216}
|
|
{"current_steps": 71955, "total_steps": 78105, "loss": 0.1129, "lr": 9.387024929406879e-08, "epoch": 4.606299212598425, "percentage": 92.13, "elapsed_time": "3:09:35", "remaining_time": "0:16:12", "throughput": 19907.41, "total_tokens": 226456512}
|
|
{"current_steps": 71960, "total_steps": 78105, "loss": 0.1145, "lr": 9.371866191715067e-08, "epoch": 4.606619294539402, "percentage": 92.13, "elapsed_time": "3:09:36", "remaining_time": "0:16:11", "throughput": 19907.68, "total_tokens": 226474112}
|
|
{"current_steps": 71965, "total_steps": 78105, "loss": 0.1759, "lr": 9.356719469738873e-08, "epoch": 4.606939376480379, "percentage": 92.14, "elapsed_time": "3:09:36", "remaining_time": "0:16:10", "throughput": 19907.89, "total_tokens": 226489920}
|
|
{"current_steps": 71970, "total_steps": 78105, "loss": 0.1302, "lr": 9.341584764234635e-08, "epoch": 4.607259458421356, "percentage": 92.15, "elapsed_time": "3:09:37", "remaining_time": "0:16:09", "throughput": 19908.03, "total_tokens": 226504576}
|
|
{"current_steps": 71975, "total_steps": 78105, "loss": 0.1262, "lr": 9.326462075958137e-08, "epoch": 4.607579540362333, "percentage": 92.15, "elapsed_time": "3:09:38", "remaining_time": "0:16:09", "throughput": 19908.2, "total_tokens": 226519488}
|
|
{"current_steps": 71980, "total_steps": 78105, "loss": 0.1337, "lr": 9.31135140566447e-08, "epoch": 4.60789962230331, "percentage": 92.16, "elapsed_time": "3:09:38", "remaining_time": "0:16:08", "throughput": 19908.37, "total_tokens": 226534592}
|
|
{"current_steps": 71985, "total_steps": 78105, "loss": 0.1339, "lr": 9.296252754108225e-08, "epoch": 4.608219704244286, "percentage": 92.16, "elapsed_time": "3:09:39", "remaining_time": "0:16:07", "throughput": 19908.56, "total_tokens": 226550272}
|
|
{"current_steps": 71990, "total_steps": 78105, "loss": 0.1727, "lr": 9.2811661220433e-08, "epoch": 4.608539786185263, "percentage": 92.17, "elapsed_time": "3:09:40", "remaining_time": "0:16:06", "throughput": 19908.79, "total_tokens": 226567104}
|
|
{"current_steps": 71995, "total_steps": 78105, "loss": 0.1343, "lr": 9.266091510223146e-08, "epoch": 4.60885986812624, "percentage": 92.18, "elapsed_time": "3:09:40", "remaining_time": "0:16:05", "throughput": 19908.99, "total_tokens": 226582784}
|
|
{"current_steps": 72000, "total_steps": 78105, "loss": 0.1683, "lr": 9.251028919400385e-08, "epoch": 4.609179950067217, "percentage": 92.18, "elapsed_time": "3:09:41", "remaining_time": "0:16:05", "throughput": 19909.22, "total_tokens": 226599296}
|
|
{"current_steps": 72005, "total_steps": 78105, "loss": 0.1576, "lr": 9.235978350327246e-08, "epoch": 4.609500032008194, "percentage": 92.19, "elapsed_time": "3:09:42", "remaining_time": "0:16:04", "throughput": 19909.44, "total_tokens": 226615744}
|
|
{"current_steps": 72010, "total_steps": 78105, "loss": 0.1205, "lr": 9.220939803755269e-08, "epoch": 4.609820113949171, "percentage": 92.2, "elapsed_time": "3:09:42", "remaining_time": "0:16:03", "throughput": 19909.61, "total_tokens": 226630720}
|
|
{"current_steps": 72015, "total_steps": 78105, "loss": 0.1782, "lr": 9.205913280435352e-08, "epoch": 4.610140195890148, "percentage": 92.2, "elapsed_time": "3:09:43", "remaining_time": "0:16:02", "throughput": 19909.87, "total_tokens": 226647936}
|
|
{"current_steps": 72020, "total_steps": 78105, "loss": 0.1098, "lr": 9.190898781117925e-08, "epoch": 4.610460277831125, "percentage": 92.21, "elapsed_time": "3:09:44", "remaining_time": "0:16:01", "throughput": 19910.06, "total_tokens": 226663680}
|
|
{"current_steps": 72025, "total_steps": 78105, "loss": 0.1331, "lr": 9.175896306552634e-08, "epoch": 4.610780359772102, "percentage": 92.22, "elapsed_time": "3:09:45", "remaining_time": "0:16:01", "throughput": 19910.24, "total_tokens": 226678976}
|
|
{"current_steps": 72030, "total_steps": 78105, "loss": 0.1393, "lr": 9.160905857488717e-08, "epoch": 4.611100441713079, "percentage": 92.22, "elapsed_time": "3:09:45", "remaining_time": "0:16:00", "throughput": 19910.41, "total_tokens": 226694144}
|
|
{"current_steps": 72035, "total_steps": 78105, "loss": 0.1737, "lr": 9.145927434674629e-08, "epoch": 4.611420523654055, "percentage": 92.23, "elapsed_time": "3:09:46", "remaining_time": "0:15:59", "throughput": 19910.56, "total_tokens": 226708800}
|
|
{"current_steps": 72040, "total_steps": 78105, "loss": 0.0947, "lr": 9.13096103885841e-08, "epoch": 4.611740605595032, "percentage": 92.23, "elapsed_time": "3:09:47", "remaining_time": "0:15:58", "throughput": 19910.75, "total_tokens": 226724480}
|
|
{"current_steps": 72045, "total_steps": 78105, "loss": 0.1318, "lr": 9.116006670787325e-08, "epoch": 4.612060687536009, "percentage": 92.24, "elapsed_time": "3:09:47", "remaining_time": "0:15:57", "throughput": 19910.92, "total_tokens": 226739456}
|
|
{"current_steps": 72050, "total_steps": 78105, "loss": 0.1285, "lr": 9.101064331208165e-08, "epoch": 4.612380769476986, "percentage": 92.25, "elapsed_time": "3:09:48", "remaining_time": "0:15:57", "throughput": 19911.1, "total_tokens": 226754944}
|
|
{"current_steps": 72055, "total_steps": 78105, "loss": 0.1325, "lr": 9.086134020867055e-08, "epoch": 4.612700851417963, "percentage": 92.25, "elapsed_time": "3:09:49", "remaining_time": "0:15:56", "throughput": 19911.28, "total_tokens": 226769920}
|
|
{"current_steps": 72060, "total_steps": 78105, "loss": 0.1794, "lr": 9.071215740509537e-08, "epoch": 4.61302093335894, "percentage": 92.26, "elapsed_time": "3:09:49", "remaining_time": "0:15:55", "throughput": 19911.47, "total_tokens": 226784960}
|
|
{"current_steps": 72065, "total_steps": 78105, "loss": 0.0837, "lr": 9.056309490880544e-08, "epoch": 4.613341015299917, "percentage": 92.27, "elapsed_time": "3:09:50", "remaining_time": "0:15:54", "throughput": 19911.66, "total_tokens": 226800768}
|
|
{"current_steps": 72070, "total_steps": 78105, "loss": 0.0679, "lr": 9.041415272724396e-08, "epoch": 4.613661097240894, "percentage": 92.27, "elapsed_time": "3:09:51", "remaining_time": "0:15:53", "throughput": 19911.86, "total_tokens": 226816896}
|
|
{"current_steps": 72075, "total_steps": 78105, "loss": 0.1183, "lr": 9.026533086784889e-08, "epoch": 4.61398117918187, "percentage": 92.28, "elapsed_time": "3:09:51", "remaining_time": "0:15:53", "throughput": 19912.05, "total_tokens": 226832896}
|
|
{"current_steps": 72080, "total_steps": 78105, "loss": 0.1624, "lr": 9.011662933805149e-08, "epoch": 4.614301261122847, "percentage": 92.29, "elapsed_time": "3:09:52", "remaining_time": "0:15:52", "throughput": 19912.22, "total_tokens": 226847936}
|
|
{"current_steps": 72085, "total_steps": 78105, "loss": 0.1095, "lr": 8.996804814527693e-08, "epoch": 4.614621343063824, "percentage": 92.29, "elapsed_time": "3:09:53", "remaining_time": "0:15:51", "throughput": 19912.41, "total_tokens": 226863360}
|
|
{"current_steps": 72090, "total_steps": 78105, "loss": 0.1501, "lr": 8.981958729694457e-08, "epoch": 4.614941425004801, "percentage": 92.3, "elapsed_time": "3:09:53", "remaining_time": "0:15:50", "throughput": 19912.6, "total_tokens": 226878784}
|
|
{"current_steps": 72095, "total_steps": 78105, "loss": 0.1179, "lr": 8.967124680046819e-08, "epoch": 4.615261506945778, "percentage": 92.31, "elapsed_time": "3:09:54", "remaining_time": "0:15:49", "throughput": 19912.83, "total_tokens": 226895616}
|
|
{"current_steps": 72100, "total_steps": 78105, "loss": 0.0891, "lr": 8.952302666325408e-08, "epoch": 4.615581588886755, "percentage": 92.31, "elapsed_time": "3:09:55", "remaining_time": "0:15:49", "throughput": 19913.05, "total_tokens": 226912000}
|
|
{"current_steps": 72105, "total_steps": 78105, "loss": 0.1361, "lr": 8.937492689270522e-08, "epoch": 4.615901670827732, "percentage": 92.32, "elapsed_time": "3:09:55", "remaining_time": "0:15:48", "throughput": 19913.25, "total_tokens": 226928192}
|
|
{"current_steps": 72110, "total_steps": 78105, "loss": 0.1639, "lr": 8.922694749621513e-08, "epoch": 4.616221752768709, "percentage": 92.32, "elapsed_time": "3:09:56", "remaining_time": "0:15:47", "throughput": 19913.44, "total_tokens": 226943936}
|
|
{"current_steps": 72115, "total_steps": 78105, "loss": 0.1455, "lr": 8.907908848117485e-08, "epoch": 4.616541834709686, "percentage": 92.33, "elapsed_time": "3:09:57", "remaining_time": "0:15:46", "throughput": 19913.64, "total_tokens": 226959872}
|
|
{"current_steps": 72120, "total_steps": 78105, "loss": 0.0895, "lr": 8.893134985496599e-08, "epoch": 4.616861916650663, "percentage": 92.34, "elapsed_time": "3:09:57", "remaining_time": "0:15:45", "throughput": 19913.8, "total_tokens": 226974912}
|
|
{"current_steps": 72125, "total_steps": 78105, "loss": 0.1536, "lr": 8.878373162496734e-08, "epoch": 4.617181998591639, "percentage": 92.34, "elapsed_time": "3:09:58", "remaining_time": "0:15:45", "throughput": 19914.0, "total_tokens": 226990656}
|
|
{"current_steps": 72130, "total_steps": 78105, "loss": 0.1079, "lr": 8.863623379854942e-08, "epoch": 4.617502080532616, "percentage": 92.35, "elapsed_time": "3:09:59", "remaining_time": "0:15:44", "throughput": 19914.18, "total_tokens": 227005632}
|
|
{"current_steps": 72135, "total_steps": 78105, "loss": 0.0973, "lr": 8.848885638307741e-08, "epoch": 4.617822162473593, "percentage": 92.36, "elapsed_time": "3:09:59", "remaining_time": "0:15:43", "throughput": 19914.37, "total_tokens": 227021440}
|
|
{"current_steps": 72140, "total_steps": 78105, "loss": 0.1035, "lr": 8.834159938591131e-08, "epoch": 4.61814224441457, "percentage": 92.36, "elapsed_time": "3:10:00", "remaining_time": "0:15:42", "throughput": 19914.55, "total_tokens": 227036672}
|
|
{"current_steps": 72145, "total_steps": 78105, "loss": 0.117, "lr": 8.819446281440325e-08, "epoch": 4.618462326355547, "percentage": 92.37, "elapsed_time": "3:10:01", "remaining_time": "0:15:41", "throughput": 19914.77, "total_tokens": 227052928}
|
|
{"current_steps": 72150, "total_steps": 78105, "loss": 0.1554, "lr": 8.804744667590153e-08, "epoch": 4.618782408296524, "percentage": 92.38, "elapsed_time": "3:10:01", "remaining_time": "0:15:41", "throughput": 19914.97, "total_tokens": 227068544}
|
|
{"current_steps": 72155, "total_steps": 78105, "loss": 0.117, "lr": 8.790055097774668e-08, "epoch": 4.619102490237501, "percentage": 92.38, "elapsed_time": "3:10:02", "remaining_time": "0:15:40", "throughput": 19915.14, "total_tokens": 227083520}
|
|
{"current_steps": 72160, "total_steps": 78105, "loss": 0.1018, "lr": 8.775377572727423e-08, "epoch": 4.619422572178478, "percentage": 92.39, "elapsed_time": "3:10:03", "remaining_time": "0:15:39", "throughput": 19915.34, "total_tokens": 227099264}
|
|
{"current_steps": 72165, "total_steps": 78105, "loss": 0.1249, "lr": 8.760712093181356e-08, "epoch": 4.619742654119454, "percentage": 92.39, "elapsed_time": "3:10:03", "remaining_time": "0:15:38", "throughput": 19915.52, "total_tokens": 227114496}
|
|
{"current_steps": 72170, "total_steps": 78105, "loss": 0.1567, "lr": 8.746058659868717e-08, "epoch": 4.620062736060431, "percentage": 92.4, "elapsed_time": "3:10:04", "remaining_time": "0:15:37", "throughput": 19915.68, "total_tokens": 227129536}
|
|
{"current_steps": 72175, "total_steps": 78105, "loss": 0.1458, "lr": 8.731417273521308e-08, "epoch": 4.620382818001408, "percentage": 92.41, "elapsed_time": "3:10:05", "remaining_time": "0:15:37", "throughput": 19915.84, "total_tokens": 227144256}
|
|
{"current_steps": 72180, "total_steps": 78105, "loss": 0.113, "lr": 8.716787934870158e-08, "epoch": 4.620702899942385, "percentage": 92.41, "elapsed_time": "3:10:05", "remaining_time": "0:15:36", "throughput": 19916.04, "total_tokens": 227159808}
|
|
{"current_steps": 72185, "total_steps": 78105, "loss": 0.1498, "lr": 8.702170644645819e-08, "epoch": 4.621022981883362, "percentage": 92.42, "elapsed_time": "3:10:06", "remaining_time": "0:15:35", "throughput": 19916.26, "total_tokens": 227176192}
|
|
{"current_steps": 72190, "total_steps": 78105, "loss": 0.2111, "lr": 8.687565403578208e-08, "epoch": 4.621343063824339, "percentage": 92.43, "elapsed_time": "3:10:07", "remaining_time": "0:15:34", "throughput": 19916.45, "total_tokens": 227191872}
|
|
{"current_steps": 72195, "total_steps": 78105, "loss": 0.1301, "lr": 8.67297221239663e-08, "epoch": 4.621663145765316, "percentage": 92.43, "elapsed_time": "3:10:07", "remaining_time": "0:15:33", "throughput": 19916.68, "total_tokens": 227208768}
|
|
{"current_steps": 72200, "total_steps": 78105, "loss": 0.1165, "lr": 8.658391071829808e-08, "epoch": 4.621983227706293, "percentage": 92.44, "elapsed_time": "3:10:08", "remaining_time": "0:15:33", "throughput": 19916.83, "total_tokens": 227223168}
|
|
{"current_steps": 72205, "total_steps": 78105, "loss": 0.1173, "lr": 8.643821982605826e-08, "epoch": 4.62230330964727, "percentage": 92.45, "elapsed_time": "3:10:09", "remaining_time": "0:15:32", "throughput": 19917.01, "total_tokens": 227238784}
|
|
{"current_steps": 72210, "total_steps": 78105, "loss": 0.1041, "lr": 8.629264945452214e-08, "epoch": 4.622623391588247, "percentage": 92.45, "elapsed_time": "3:10:09", "remaining_time": "0:15:31", "throughput": 19917.23, "total_tokens": 227255424}
|
|
{"current_steps": 72215, "total_steps": 78105, "loss": 0.1124, "lr": 8.614719961095836e-08, "epoch": 4.622943473529223, "percentage": 92.46, "elapsed_time": "3:10:10", "remaining_time": "0:15:30", "throughput": 19917.4, "total_tokens": 227270144}
|
|
{"current_steps": 72220, "total_steps": 78105, "loss": 0.1159, "lr": 8.600187030262997e-08, "epoch": 4.6232635554702, "percentage": 92.47, "elapsed_time": "3:10:11", "remaining_time": "0:15:29", "throughput": 19917.63, "total_tokens": 227286656}
|
|
{"current_steps": 72225, "total_steps": 78105, "loss": 0.0744, "lr": 8.58566615367945e-08, "epoch": 4.623583637411177, "percentage": 92.47, "elapsed_time": "3:10:12", "remaining_time": "0:15:29", "throughput": 19917.81, "total_tokens": 227302336}
|
|
{"current_steps": 72230, "total_steps": 78105, "loss": 0.1673, "lr": 8.571157332070228e-08, "epoch": 4.623903719352154, "percentage": 92.48, "elapsed_time": "3:10:12", "remaining_time": "0:15:28", "throughput": 19918.02, "total_tokens": 227318208}
|
|
{"current_steps": 72235, "total_steps": 78105, "loss": 0.1104, "lr": 8.556660566159864e-08, "epoch": 4.624223801293131, "percentage": 92.48, "elapsed_time": "3:10:13", "remaining_time": "0:15:27", "throughput": 19918.21, "total_tokens": 227334016}
|
|
{"current_steps": 72240, "total_steps": 78105, "loss": 0.1376, "lr": 8.54217585667222e-08, "epoch": 4.624543883234108, "percentage": 92.49, "elapsed_time": "3:10:14", "remaining_time": "0:15:26", "throughput": 19918.44, "total_tokens": 227350720}
|
|
{"current_steps": 72245, "total_steps": 78105, "loss": 0.1202, "lr": 8.527703204330607e-08, "epoch": 4.624863965175085, "percentage": 92.5, "elapsed_time": "3:10:14", "remaining_time": "0:15:25", "throughput": 19918.65, "total_tokens": 227367040}
|
|
{"current_steps": 72250, "total_steps": 78105, "loss": 0.1063, "lr": 8.513242609857753e-08, "epoch": 4.625184047116061, "percentage": 92.5, "elapsed_time": "3:10:15", "remaining_time": "0:15:25", "throughput": 19918.82, "total_tokens": 227382144}
|
|
{"current_steps": 72255, "total_steps": 78105, "loss": 0.153, "lr": 8.498794073975636e-08, "epoch": 4.625504129057038, "percentage": 92.51, "elapsed_time": "3:10:16", "remaining_time": "0:15:24", "throughput": 19919.01, "total_tokens": 227397824}
|
|
{"current_steps": 72260, "total_steps": 78105, "loss": 0.1076, "lr": 8.4843575974059e-08, "epoch": 4.625824210998015, "percentage": 92.52, "elapsed_time": "3:10:16", "remaining_time": "0:15:23", "throughput": 19919.17, "total_tokens": 227412544}
|
|
{"current_steps": 72265, "total_steps": 78105, "loss": 0.1476, "lr": 8.46993318086925e-08, "epoch": 4.626144292938992, "percentage": 92.52, "elapsed_time": "3:10:17", "remaining_time": "0:15:22", "throughput": 19919.38, "total_tokens": 227428864}
|
|
{"current_steps": 72270, "total_steps": 78105, "loss": 0.1022, "lr": 8.455520825086105e-08, "epoch": 4.626464374879969, "percentage": 92.53, "elapsed_time": "3:10:18", "remaining_time": "0:15:21", "throughput": 19919.55, "total_tokens": 227444032}
|
|
{"current_steps": 72275, "total_steps": 78105, "loss": 0.1015, "lr": 8.44112053077606e-08, "epoch": 4.626784456820946, "percentage": 92.54, "elapsed_time": "3:10:18", "remaining_time": "0:15:21", "throughput": 19919.76, "total_tokens": 227460032}
|
|
{"current_steps": 72280, "total_steps": 78105, "loss": 0.1472, "lr": 8.426732298658258e-08, "epoch": 4.627104538761923, "percentage": 92.54, "elapsed_time": "3:10:19", "remaining_time": "0:15:20", "throughput": 19919.97, "total_tokens": 227476672}
|
|
{"current_steps": 72285, "total_steps": 78105, "loss": 0.1316, "lr": 8.412356129451099e-08, "epoch": 4.6274246207029, "percentage": 92.55, "elapsed_time": "3:10:20", "remaining_time": "0:15:19", "throughput": 19920.15, "total_tokens": 227491904}
|
|
{"current_steps": 72290, "total_steps": 78105, "loss": 0.1001, "lr": 8.397992023872508e-08, "epoch": 4.627744702643877, "percentage": 92.55, "elapsed_time": "3:10:20", "remaining_time": "0:15:18", "throughput": 19920.35, "total_tokens": 227507840}
|
|
{"current_steps": 72295, "total_steps": 78105, "loss": 0.1326, "lr": 8.383639982639747e-08, "epoch": 4.628064784584854, "percentage": 92.56, "elapsed_time": "3:10:21", "remaining_time": "0:15:17", "throughput": 19920.55, "total_tokens": 227523520}
|
|
{"current_steps": 72300, "total_steps": 78105, "loss": 0.1299, "lr": 8.369300006469461e-08, "epoch": 4.62838486652583, "percentage": 92.57, "elapsed_time": "3:10:22", "remaining_time": "0:15:17", "throughput": 19920.76, "total_tokens": 227539648}
|
|
{"current_steps": 72305, "total_steps": 78105, "loss": 0.1256, "lr": 8.35497209607769e-08, "epoch": 4.628704948466807, "percentage": 92.57, "elapsed_time": "3:10:22", "remaining_time": "0:15:16", "throughput": 19920.95, "total_tokens": 227555264}
|
|
{"current_steps": 72310, "total_steps": 78105, "loss": 0.1384, "lr": 8.340656252179974e-08, "epoch": 4.629025030407784, "percentage": 92.58, "elapsed_time": "3:10:23", "remaining_time": "0:15:15", "throughput": 19921.18, "total_tokens": 227571648}
|
|
{"current_steps": 72315, "total_steps": 78105, "loss": 0.1439, "lr": 8.326352475491101e-08, "epoch": 4.629345112348761, "percentage": 92.59, "elapsed_time": "3:10:24", "remaining_time": "0:15:14", "throughput": 19921.34, "total_tokens": 227586496}
|
|
{"current_steps": 72320, "total_steps": 78105, "loss": 0.1583, "lr": 8.312060766725388e-08, "epoch": 4.629665194289738, "percentage": 92.59, "elapsed_time": "3:10:24", "remaining_time": "0:15:13", "throughput": 19921.51, "total_tokens": 227601472}
|
|
{"current_steps": 72325, "total_steps": 78105, "loss": 0.1593, "lr": 8.297781126596433e-08, "epoch": 4.629985276230715, "percentage": 92.6, "elapsed_time": "3:10:25", "remaining_time": "0:15:13", "throughput": 19921.71, "total_tokens": 227617536}
|
|
{"current_steps": 72330, "total_steps": 78105, "loss": 0.1117, "lr": 8.283513555817274e-08, "epoch": 4.630305358171692, "percentage": 92.61, "elapsed_time": "3:10:26", "remaining_time": "0:15:12", "throughput": 19921.89, "total_tokens": 227632704}
|
|
{"current_steps": 72335, "total_steps": 78105, "loss": 0.156, "lr": 8.269258055100454e-08, "epoch": 4.630625440112669, "percentage": 92.61, "elapsed_time": "3:10:26", "remaining_time": "0:15:11", "throughput": 19922.1, "total_tokens": 227648640}
|
|
{"current_steps": 72340, "total_steps": 78105, "loss": 0.1008, "lr": 8.255014625157709e-08, "epoch": 4.630945522053645, "percentage": 92.62, "elapsed_time": "3:10:27", "remaining_time": "0:15:10", "throughput": 19922.31, "total_tokens": 227664960}
|
|
{"current_steps": 72345, "total_steps": 78105, "loss": 0.1675, "lr": 8.240783266700387e-08, "epoch": 4.631265603994622, "percentage": 92.63, "elapsed_time": "3:10:28", "remaining_time": "0:15:09", "throughput": 19922.51, "total_tokens": 227680576}
|
|
{"current_steps": 72350, "total_steps": 78105, "loss": 0.0895, "lr": 8.226563980439e-08, "epoch": 4.631585685935599, "percentage": 92.63, "elapsed_time": "3:10:29", "remaining_time": "0:15:09", "throughput": 19922.74, "total_tokens": 227697280}
|
|
{"current_steps": 72355, "total_steps": 78105, "loss": 0.1265, "lr": 8.212356767083706e-08, "epoch": 4.631905767876576, "percentage": 92.64, "elapsed_time": "3:10:29", "remaining_time": "0:15:08", "throughput": 19922.94, "total_tokens": 227713152}
|
|
{"current_steps": 72360, "total_steps": 78105, "loss": 0.1322, "lr": 8.198161627343881e-08, "epoch": 4.632225849817553, "percentage": 92.64, "elapsed_time": "3:10:30", "remaining_time": "0:15:07", "throughput": 19923.13, "total_tokens": 227728576}
|
|
{"current_steps": 72365, "total_steps": 78105, "loss": 0.1192, "lr": 8.183978561928319e-08, "epoch": 4.63254593175853, "percentage": 92.65, "elapsed_time": "3:10:31", "remaining_time": "0:15:06", "throughput": 19923.34, "total_tokens": 227744448}
|
|
{"current_steps": 72370, "total_steps": 78105, "loss": 0.1155, "lr": 8.169807571545369e-08, "epoch": 4.632866013699507, "percentage": 92.66, "elapsed_time": "3:10:31", "remaining_time": "0:15:05", "throughput": 19923.53, "total_tokens": 227760000}
|
|
{"current_steps": 72375, "total_steps": 78105, "loss": 0.1282, "lr": 8.15564865690252e-08, "epoch": 4.633186095640484, "percentage": 92.66, "elapsed_time": "3:10:32", "remaining_time": "0:15:05", "throughput": 19923.7, "total_tokens": 227775488}
|
|
{"current_steps": 72380, "total_steps": 78105, "loss": 0.1357, "lr": 8.141501818706877e-08, "epoch": 4.633506177581461, "percentage": 92.67, "elapsed_time": "3:10:33", "remaining_time": "0:15:04", "throughput": 19923.86, "total_tokens": 227790272}
|
|
{"current_steps": 72385, "total_steps": 78105, "loss": 0.1583, "lr": 8.127367057664842e-08, "epoch": 4.633826259522438, "percentage": 92.68, "elapsed_time": "3:10:33", "remaining_time": "0:15:03", "throughput": 19924.06, "total_tokens": 227805824}
|
|
{"current_steps": 72390, "total_steps": 78105, "loss": 0.0813, "lr": 8.113244374482243e-08, "epoch": 4.634146341463414, "percentage": 92.68, "elapsed_time": "3:10:34", "remaining_time": "0:15:02", "throughput": 19924.31, "total_tokens": 227822912}
|
|
{"current_steps": 72395, "total_steps": 78105, "loss": 0.1038, "lr": 8.099133769864265e-08, "epoch": 4.634466423404391, "percentage": 92.69, "elapsed_time": "3:10:35", "remaining_time": "0:15:01", "throughput": 19924.46, "total_tokens": 227837504}
|
|
{"current_steps": 72400, "total_steps": 78105, "loss": 0.1134, "lr": 8.08503524451551e-08, "epoch": 4.634786505345368, "percentage": 92.7, "elapsed_time": "3:10:35", "remaining_time": "0:15:01", "throughput": 19924.65, "total_tokens": 227852992}
|
|
{"current_steps": 72405, "total_steps": 78105, "loss": 0.1408, "lr": 8.070948799140027e-08, "epoch": 4.635106587286345, "percentage": 92.7, "elapsed_time": "3:10:36", "remaining_time": "0:15:00", "throughput": 19924.84, "total_tokens": 227868480}
|
|
{"current_steps": 72410, "total_steps": 78105, "loss": 0.1485, "lr": 8.056874434441197e-08, "epoch": 4.635426669227322, "percentage": 92.71, "elapsed_time": "3:10:37", "remaining_time": "0:14:59", "throughput": 19924.99, "total_tokens": 227882880}
|
|
{"current_steps": 72415, "total_steps": 78105, "loss": 0.156, "lr": 8.042812151121793e-08, "epoch": 4.635746751168299, "percentage": 92.71, "elapsed_time": "3:10:37", "remaining_time": "0:14:58", "throughput": 19925.2, "total_tokens": 227899264}
|
|
{"current_steps": 72420, "total_steps": 78105, "loss": 0.1853, "lr": 8.028761949884084e-08, "epoch": 4.636066833109276, "percentage": 92.72, "elapsed_time": "3:10:38", "remaining_time": "0:14:57", "throughput": 19925.37, "total_tokens": 227914432}
|
|
{"current_steps": 72425, "total_steps": 78105, "loss": 0.1024, "lr": 8.014723831429594e-08, "epoch": 4.636386915050253, "percentage": 92.73, "elapsed_time": "3:10:39", "remaining_time": "0:14:57", "throughput": 19925.58, "total_tokens": 227930432}
|
|
{"current_steps": 72430, "total_steps": 78105, "loss": 0.2161, "lr": 8.000697796459373e-08, "epoch": 4.636706996991229, "percentage": 92.73, "elapsed_time": "3:10:39", "remaining_time": "0:14:56", "throughput": 19925.79, "total_tokens": 227946432}
|
|
{"current_steps": 72435, "total_steps": 78105, "loss": 0.1531, "lr": 7.986683845673748e-08, "epoch": 4.637027078932206, "percentage": 92.74, "elapsed_time": "3:10:40", "remaining_time": "0:14:55", "throughput": 19925.94, "total_tokens": 227960832}
|
|
{"current_steps": 72440, "total_steps": 78105, "loss": 0.1102, "lr": 7.972681979772551e-08, "epoch": 4.637347160873183, "percentage": 92.75, "elapsed_time": "3:10:41", "remaining_time": "0:14:54", "throughput": 19926.12, "total_tokens": 227976192}
|
|
{"current_steps": 72445, "total_steps": 78105, "loss": 0.1153, "lr": 7.958692199454915e-08, "epoch": 4.63766724281416, "percentage": 92.75, "elapsed_time": "3:10:41", "remaining_time": "0:14:53", "throughput": 19926.31, "total_tokens": 227991680}
|
|
{"current_steps": 72450, "total_steps": 78105, "loss": 0.1571, "lr": 7.944714505419448e-08, "epoch": 4.637987324755137, "percentage": 92.76, "elapsed_time": "3:10:42", "remaining_time": "0:14:53", "throughput": 19926.49, "total_tokens": 228007232}
|
|
{"current_steps": 72455, "total_steps": 78105, "loss": 0.2076, "lr": 7.930748898364149e-08, "epoch": 4.638307406696114, "percentage": 92.77, "elapsed_time": "3:10:43", "remaining_time": "0:14:52", "throughput": 19926.67, "total_tokens": 228022720}
|
|
{"current_steps": 72460, "total_steps": 78105, "loss": 0.1597, "lr": 7.91679537898632e-08, "epoch": 4.638627488637091, "percentage": 92.77, "elapsed_time": "3:10:43", "remaining_time": "0:14:51", "throughput": 19926.87, "total_tokens": 228038016}
|
|
{"current_steps": 72465, "total_steps": 78105, "loss": 0.1065, "lr": 7.902853947982764e-08, "epoch": 4.638947570578068, "percentage": 92.78, "elapsed_time": "3:10:44", "remaining_time": "0:14:50", "throughput": 19927.04, "total_tokens": 228052864}
|
|
{"current_steps": 72470, "total_steps": 78105, "loss": 0.1465, "lr": 7.888924606049675e-08, "epoch": 4.639267652519045, "percentage": 92.79, "elapsed_time": "3:10:45", "remaining_time": "0:14:49", "throughput": 19927.23, "total_tokens": 228068352}
|
|
{"current_steps": 72475, "total_steps": 78105, "loss": 0.1201, "lr": 7.875007353882552e-08, "epoch": 4.639587734460022, "percentage": 92.79, "elapsed_time": "3:10:45", "remaining_time": "0:14:49", "throughput": 19927.46, "total_tokens": 228084672}
|
|
{"current_steps": 72480, "total_steps": 78105, "loss": 0.1088, "lr": 7.861102192176395e-08, "epoch": 4.639907816400998, "percentage": 92.8, "elapsed_time": "3:10:46", "remaining_time": "0:14:48", "throughput": 19927.68, "total_tokens": 228100864}
|
|
{"current_steps": 72485, "total_steps": 78105, "loss": 0.1338, "lr": 7.84720912162551e-08, "epoch": 4.640227898341975, "percentage": 92.8, "elapsed_time": "3:10:47", "remaining_time": "0:14:47", "throughput": 19927.85, "total_tokens": 228115968}
|
|
{"current_steps": 72490, "total_steps": 78105, "loss": 0.1203, "lr": 7.83332814292373e-08, "epoch": 4.640547980282952, "percentage": 92.81, "elapsed_time": "3:10:47", "remaining_time": "0:14:46", "throughput": 19928.09, "total_tokens": 228132736}
|
|
{"current_steps": 72495, "total_steps": 78105, "loss": 0.1164, "lr": 7.819459256764083e-08, "epoch": 4.640868062223929, "percentage": 92.82, "elapsed_time": "3:10:48", "remaining_time": "0:14:45", "throughput": 19928.29, "total_tokens": 228148800}
|
|
{"current_steps": 72500, "total_steps": 78105, "loss": 0.1113, "lr": 7.805602463839212e-08, "epoch": 4.641188144164906, "percentage": 92.82, "elapsed_time": "3:10:49", "remaining_time": "0:14:45", "throughput": 19928.44, "total_tokens": 228163712}
|
|
{"current_steps": 72505, "total_steps": 78105, "loss": 0.1175, "lr": 7.79175776484098e-08, "epoch": 4.641508226105883, "percentage": 92.83, "elapsed_time": "3:10:49", "remaining_time": "0:14:44", "throughput": 19928.6, "total_tokens": 228178432}
|
|
{"current_steps": 72510, "total_steps": 78105, "loss": 0.1329, "lr": 7.777925160460776e-08, "epoch": 4.64182830804686, "percentage": 92.84, "elapsed_time": "3:10:50", "remaining_time": "0:14:43", "throughput": 19928.78, "total_tokens": 228193856}
|
|
{"current_steps": 72515, "total_steps": 78105, "loss": 0.1489, "lr": 7.764104651389298e-08, "epoch": 4.642148389987836, "percentage": 92.84, "elapsed_time": "3:10:51", "remaining_time": "0:14:42", "throughput": 19928.94, "total_tokens": 228208768}
|
|
{"current_steps": 72520, "total_steps": 78105, "loss": 0.1213, "lr": 7.750296238316663e-08, "epoch": 4.642468471928813, "percentage": 92.85, "elapsed_time": "3:10:51", "remaining_time": "0:14:41", "throughput": 19929.16, "total_tokens": 228225344}
|
|
{"current_steps": 72525, "total_steps": 78105, "loss": 0.1263, "lr": 7.7364999219324e-08, "epoch": 4.64278855386979, "percentage": 92.86, "elapsed_time": "3:10:52", "remaining_time": "0:14:41", "throughput": 19929.32, "total_tokens": 228239872}
|
|
{"current_steps": 72530, "total_steps": 78105, "loss": 0.1204, "lr": 7.722715702925432e-08, "epoch": 4.643108635810767, "percentage": 92.86, "elapsed_time": "3:10:53", "remaining_time": "0:14:40", "throughput": 19929.49, "total_tokens": 228255040}
|
|
{"current_steps": 72535, "total_steps": 78105, "loss": 0.109, "lr": 7.708943581984041e-08, "epoch": 4.643428717751744, "percentage": 92.87, "elapsed_time": "3:10:53", "remaining_time": "0:14:39", "throughput": 19929.68, "total_tokens": 228270592}
|
|
{"current_steps": 72540, "total_steps": 78105, "loss": 0.139, "lr": 7.695183559795982e-08, "epoch": 4.643748799692721, "percentage": 92.87, "elapsed_time": "3:10:54", "remaining_time": "0:14:38", "throughput": 19929.84, "total_tokens": 228285632}
|
|
{"current_steps": 72545, "total_steps": 78105, "loss": 0.1182, "lr": 7.681435637048318e-08, "epoch": 4.644068881633698, "percentage": 92.88, "elapsed_time": "3:10:55", "remaining_time": "0:14:37", "throughput": 19930.1, "total_tokens": 228303552}
|
|
{"current_steps": 72550, "total_steps": 78105, "loss": 0.127, "lr": 7.667699814427582e-08, "epoch": 4.644388963574675, "percentage": 92.89, "elapsed_time": "3:10:55", "remaining_time": "0:14:37", "throughput": 19930.32, "total_tokens": 228319936}
|
|
{"current_steps": 72555, "total_steps": 78105, "loss": 0.1141, "lr": 7.65397609261967e-08, "epoch": 4.644709045515652, "percentage": 92.89, "elapsed_time": "3:10:56", "remaining_time": "0:14:36", "throughput": 19930.56, "total_tokens": 228336896}
|
|
{"current_steps": 72560, "total_steps": 78105, "loss": 0.1452, "lr": 7.640264472309811e-08, "epoch": 4.645029127456629, "percentage": 92.9, "elapsed_time": "3:10:57", "remaining_time": "0:14:35", "throughput": 19930.73, "total_tokens": 228352128}
|
|
{"current_steps": 72565, "total_steps": 78105, "loss": 0.1243, "lr": 7.626564954182792e-08, "epoch": 4.645349209397605, "percentage": 92.91, "elapsed_time": "3:10:57", "remaining_time": "0:14:34", "throughput": 19930.94, "total_tokens": 228368448}
|
|
{"current_steps": 72570, "total_steps": 78105, "loss": 0.1149, "lr": 7.61287753892262e-08, "epoch": 4.645669291338582, "percentage": 92.91, "elapsed_time": "3:10:58", "remaining_time": "0:14:33", "throughput": 19931.15, "total_tokens": 228384512}
|
|
{"current_steps": 72575, "total_steps": 78105, "loss": 0.157, "lr": 7.599202227212805e-08, "epoch": 4.645989373279559, "percentage": 92.92, "elapsed_time": "3:10:59", "remaining_time": "0:14:33", "throughput": 19931.3, "total_tokens": 228398912}
|
|
{"current_steps": 72580, "total_steps": 78105, "loss": 0.1111, "lr": 7.585539019736188e-08, "epoch": 4.646309455220536, "percentage": 92.93, "elapsed_time": "3:10:59", "remaining_time": "0:14:32", "throughput": 19931.46, "total_tokens": 228413888}
|
|
{"current_steps": 72585, "total_steps": 78105, "loss": 0.1115, "lr": 7.571887917175086e-08, "epoch": 4.646629537161513, "percentage": 92.93, "elapsed_time": "3:11:00", "remaining_time": "0:14:31", "throughput": 19931.64, "total_tokens": 228429056}
|
|
{"current_steps": 72590, "total_steps": 78105, "loss": 0.1235, "lr": 7.55824892021112e-08, "epoch": 4.64694961910249, "percentage": 92.94, "elapsed_time": "3:11:01", "remaining_time": "0:14:30", "throughput": 19931.84, "total_tokens": 228444736}
|
|
{"current_steps": 72595, "total_steps": 78105, "loss": 0.139, "lr": 7.544622029525356e-08, "epoch": 4.647269701043467, "percentage": 92.95, "elapsed_time": "3:11:01", "remaining_time": "0:14:29", "throughput": 19932.0, "total_tokens": 228459840}
|
|
{"current_steps": 72600, "total_steps": 78105, "loss": 0.12, "lr": 7.531007245798305e-08, "epoch": 4.647589782984444, "percentage": 92.95, "elapsed_time": "3:11:02", "remaining_time": "0:14:29", "throughput": 19932.21, "total_tokens": 228475712}
|
|
{"current_steps": 72605, "total_steps": 78105, "loss": 0.1487, "lr": 7.51740456970973e-08, "epoch": 4.6479098649254205, "percentage": 92.96, "elapsed_time": "3:11:03", "remaining_time": "0:14:28", "throughput": 19932.36, "total_tokens": 228490176}
|
|
{"current_steps": 72610, "total_steps": 78105, "loss": 0.1478, "lr": 7.503814001938975e-08, "epoch": 4.6482299468663975, "percentage": 92.96, "elapsed_time": "3:11:03", "remaining_time": "0:14:27", "throughput": 19932.56, "total_tokens": 228506176}
|
|
{"current_steps": 72615, "total_steps": 78105, "loss": 0.1512, "lr": 7.490235543164609e-08, "epoch": 4.6485500288073744, "percentage": 92.97, "elapsed_time": "3:11:04", "remaining_time": "0:14:26", "throughput": 19932.78, "total_tokens": 228522240}
|
|
{"current_steps": 72620, "total_steps": 78105, "loss": 0.1258, "lr": 7.4766691940647e-08, "epoch": 4.648870110748351, "percentage": 92.98, "elapsed_time": "3:11:05", "remaining_time": "0:14:25", "throughput": 19932.96, "total_tokens": 228537792}
|
|
{"current_steps": 72625, "total_steps": 78105, "loss": 0.179, "lr": 7.463114955316653e-08, "epoch": 4.649190192689328, "percentage": 92.98, "elapsed_time": "3:11:06", "remaining_time": "0:14:25", "throughput": 19933.14, "total_tokens": 228553600}
|
|
{"current_steps": 72630, "total_steps": 78105, "loss": 0.112, "lr": 7.44957282759734e-08, "epoch": 4.649510274630305, "percentage": 92.99, "elapsed_time": "3:11:06", "remaining_time": "0:14:24", "throughput": 19933.34, "total_tokens": 228569728}
|
|
{"current_steps": 72635, "total_steps": 78105, "loss": 0.2158, "lr": 7.436042811582944e-08, "epoch": 4.649830356571282, "percentage": 93.0, "elapsed_time": "3:11:07", "remaining_time": "0:14:23", "throughput": 19933.53, "total_tokens": 228585280}
|
|
{"current_steps": 72640, "total_steps": 78105, "loss": 0.094, "lr": 7.422524907949119e-08, "epoch": 4.650150438512259, "percentage": 93.0, "elapsed_time": "3:11:08", "remaining_time": "0:14:22", "throughput": 19933.7, "total_tokens": 228600512}
|
|
{"current_steps": 72645, "total_steps": 78105, "loss": 0.1175, "lr": 7.409019117370825e-08, "epoch": 4.650470520453236, "percentage": 93.01, "elapsed_time": "3:11:08", "remaining_time": "0:14:21", "throughput": 19933.88, "total_tokens": 228615744}
|
|
{"current_steps": 72650, "total_steps": 78105, "loss": 0.1566, "lr": 7.39552544052255e-08, "epoch": 4.650790602394213, "percentage": 93.02, "elapsed_time": "3:11:09", "remaining_time": "0:14:21", "throughput": 19934.12, "total_tokens": 228632960}
|
|
{"current_steps": 72655, "total_steps": 78105, "loss": 0.1128, "lr": 7.382043878078033e-08, "epoch": 4.6511106843351895, "percentage": 93.02, "elapsed_time": "3:11:10", "remaining_time": "0:14:20", "throughput": 19934.32, "total_tokens": 228648512}
|
|
{"current_steps": 72660, "total_steps": 78105, "loss": 0.1539, "lr": 7.368574430710484e-08, "epoch": 4.6514307662761665, "percentage": 93.03, "elapsed_time": "3:11:10", "remaining_time": "0:14:19", "throughput": 19934.5, "total_tokens": 228664064}
|
|
{"current_steps": 72665, "total_steps": 78105, "loss": 0.1434, "lr": 7.355117099092535e-08, "epoch": 4.6517508482171435, "percentage": 93.04, "elapsed_time": "3:11:11", "remaining_time": "0:14:18", "throughput": 19934.69, "total_tokens": 228679552}
|
|
{"current_steps": 72670, "total_steps": 78105, "loss": 0.1441, "lr": 7.341671883896117e-08, "epoch": 4.6520709301581205, "percentage": 93.04, "elapsed_time": "3:11:12", "remaining_time": "0:14:18", "throughput": 19934.93, "total_tokens": 228696576}
|
|
{"current_steps": 72675, "total_steps": 78105, "loss": 0.0874, "lr": 7.328238785792669e-08, "epoch": 4.6523910120990974, "percentage": 93.05, "elapsed_time": "3:11:12", "remaining_time": "0:14:17", "throughput": 19935.09, "total_tokens": 228711872}
|
|
{"current_steps": 72680, "total_steps": 78105, "loss": 0.1149, "lr": 7.3148178054529e-08, "epoch": 4.652711094040074, "percentage": 93.05, "elapsed_time": "3:11:13", "remaining_time": "0:14:16", "throughput": 19935.25, "total_tokens": 228726656}
|
|
{"current_steps": 72685, "total_steps": 78105, "loss": 0.1334, "lr": 7.301408943547111e-08, "epoch": 4.653031175981051, "percentage": 93.06, "elapsed_time": "3:11:14", "remaining_time": "0:14:15", "throughput": 19935.41, "total_tokens": 228741696}
|
|
{"current_steps": 72690, "total_steps": 78105, "loss": 0.1242, "lr": 7.28801220074471e-08, "epoch": 4.653351257922028, "percentage": 93.07, "elapsed_time": "3:11:14", "remaining_time": "0:14:14", "throughput": 19935.61, "total_tokens": 228757440}
|
|
{"current_steps": 72695, "total_steps": 78105, "loss": 0.1562, "lr": 7.274627577714771e-08, "epoch": 4.6536713398630045, "percentage": 93.07, "elapsed_time": "3:11:15", "remaining_time": "0:14:14", "throughput": 19935.83, "total_tokens": 228774080}
|
|
{"current_steps": 72700, "total_steps": 78105, "loss": 0.1858, "lr": 7.261255075125623e-08, "epoch": 4.6539914218039815, "percentage": 93.08, "elapsed_time": "3:11:16", "remaining_time": "0:14:13", "throughput": 19935.99, "total_tokens": 228788672}
|
|
{"current_steps": 72705, "total_steps": 78105, "loss": 0.1244, "lr": 7.247894693645007e-08, "epoch": 4.6543115037449585, "percentage": 93.09, "elapsed_time": "3:11:16", "remaining_time": "0:14:12", "throughput": 19936.17, "total_tokens": 228804160}
|
|
{"current_steps": 72710, "total_steps": 78105, "loss": 0.1227, "lr": 7.234546433940087e-08, "epoch": 4.6546315856859355, "percentage": 93.09, "elapsed_time": "3:11:17", "remaining_time": "0:14:11", "throughput": 19936.36, "total_tokens": 228819968}
|
|
{"current_steps": 72715, "total_steps": 78105, "loss": 0.1291, "lr": 7.221210296677384e-08, "epoch": 4.6549516676269125, "percentage": 93.1, "elapsed_time": "3:11:18", "remaining_time": "0:14:10", "throughput": 19936.56, "total_tokens": 228835584}
|
|
{"current_steps": 72720, "total_steps": 78105, "loss": 0.1333, "lr": 7.207886282522891e-08, "epoch": 4.6552717495678895, "percentage": 93.11, "elapsed_time": "3:11:18", "remaining_time": "0:14:10", "throughput": 19936.75, "total_tokens": 228851392}
|
|
{"current_steps": 72725, "total_steps": 78105, "loss": 0.0877, "lr": 7.194574392141829e-08, "epoch": 4.6555918315088665, "percentage": 93.11, "elapsed_time": "3:11:19", "remaining_time": "0:14:09", "throughput": 19936.97, "total_tokens": 228867712}
|
|
{"current_steps": 72730, "total_steps": 78105, "loss": 0.0871, "lr": 7.181274626199053e-08, "epoch": 4.6559119134498435, "percentage": 93.12, "elapsed_time": "3:11:20", "remaining_time": "0:14:08", "throughput": 19937.13, "total_tokens": 228882880}
|
|
{"current_steps": 72735, "total_steps": 78105, "loss": 0.1208, "lr": 7.167986985358616e-08, "epoch": 4.65623199539082, "percentage": 93.12, "elapsed_time": "3:11:20", "remaining_time": "0:14:07", "throughput": 19937.35, "total_tokens": 228899456}
|
|
{"current_steps": 72740, "total_steps": 78105, "loss": 0.1594, "lr": 7.154711470284043e-08, "epoch": 4.656552077331797, "percentage": 93.13, "elapsed_time": "3:11:21", "remaining_time": "0:14:06", "throughput": 19937.56, "total_tokens": 228915328}
|
|
{"current_steps": 72745, "total_steps": 78105, "loss": 0.1059, "lr": 7.141448081638219e-08, "epoch": 4.6568721592727735, "percentage": 93.14, "elapsed_time": "3:11:22", "remaining_time": "0:14:06", "throughput": 19937.77, "total_tokens": 228931328}
|
|
{"current_steps": 72750, "total_steps": 78105, "loss": 0.1116, "lr": 7.128196820083505e-08, "epoch": 4.6571922412137505, "percentage": 93.14, "elapsed_time": "3:11:23", "remaining_time": "0:14:05", "throughput": 19938.02, "total_tokens": 228948928}
|
|
{"current_steps": 72755, "total_steps": 78105, "loss": 0.204, "lr": 7.114957686281538e-08, "epoch": 4.6575123231547275, "percentage": 93.15, "elapsed_time": "3:11:23", "remaining_time": "0:14:04", "throughput": 19938.22, "total_tokens": 228964928}
|
|
{"current_steps": 72760, "total_steps": 78105, "loss": 0.1163, "lr": 7.101730680893481e-08, "epoch": 4.6578324050957045, "percentage": 93.16, "elapsed_time": "3:11:24", "remaining_time": "0:14:03", "throughput": 19938.47, "total_tokens": 228982080}
|
|
{"current_steps": 72765, "total_steps": 78105, "loss": 0.1596, "lr": 7.088515804579754e-08, "epoch": 4.6581524870366815, "percentage": 93.16, "elapsed_time": "3:11:25", "remaining_time": "0:14:02", "throughput": 19938.69, "total_tokens": 228998528}
|
|
{"current_steps": 72770, "total_steps": 78105, "loss": 0.1141, "lr": 7.075313058000272e-08, "epoch": 4.6584725689776585, "percentage": 93.17, "elapsed_time": "3:11:25", "remaining_time": "0:14:02", "throughput": 19938.87, "total_tokens": 229014272}
|
|
{"current_steps": 72775, "total_steps": 78105, "loss": 0.133, "lr": 7.062122441814312e-08, "epoch": 4.6587926509186355, "percentage": 93.18, "elapsed_time": "3:11:26", "remaining_time": "0:14:01", "throughput": 19939.11, "total_tokens": 229031168}
|
|
{"current_steps": 72780, "total_steps": 78105, "loss": 0.1359, "lr": 7.048943956680543e-08, "epoch": 4.659112732859612, "percentage": 93.18, "elapsed_time": "3:11:27", "remaining_time": "0:14:00", "throughput": 19939.29, "total_tokens": 229046976}
|
|
{"current_steps": 72785, "total_steps": 78105, "loss": 0.1714, "lr": 7.035777603257021e-08, "epoch": 4.659432814800589, "percentage": 93.19, "elapsed_time": "3:11:27", "remaining_time": "0:13:59", "throughput": 19939.5, "total_tokens": 229062912}
|
|
{"current_steps": 72790, "total_steps": 78105, "loss": 0.1059, "lr": 7.022623382201193e-08, "epoch": 4.659752896741566, "percentage": 93.2, "elapsed_time": "3:11:28", "remaining_time": "0:13:58", "throughput": 19939.68, "total_tokens": 229078464}
|
|
{"current_steps": 72795, "total_steps": 78105, "loss": 0.1921, "lr": 7.009481294169978e-08, "epoch": 4.6600729786825426, "percentage": 93.2, "elapsed_time": "3:11:29", "remaining_time": "0:13:58", "throughput": 19939.85, "total_tokens": 229093696}
|
|
{"current_steps": 72800, "total_steps": 78105, "loss": 0.1094, "lr": 6.996351339819518e-08, "epoch": 4.6603930606235195, "percentage": 93.21, "elapsed_time": "3:11:29", "remaining_time": "0:13:57", "throughput": 19940.12, "total_tokens": 229111296}
|
|
{"current_steps": 72805, "total_steps": 78105, "loss": 0.1231, "lr": 6.983233519805593e-08, "epoch": 4.6607131425644965, "percentage": 93.21, "elapsed_time": "3:11:30", "remaining_time": "0:13:56", "throughput": 19940.3, "total_tokens": 229126720}
|
|
{"current_steps": 72810, "total_steps": 78105, "loss": 0.1054, "lr": 6.970127834783069e-08, "epoch": 4.6610332245054735, "percentage": 93.22, "elapsed_time": "3:11:31", "remaining_time": "0:13:55", "throughput": 19940.54, "total_tokens": 229144064}
|
|
{"current_steps": 72815, "total_steps": 78105, "loss": 0.1294, "lr": 6.957034285406506e-08, "epoch": 4.6613533064464505, "percentage": 93.23, "elapsed_time": "3:11:32", "remaining_time": "0:13:54", "throughput": 19940.72, "total_tokens": 229159360}
|
|
{"current_steps": 72820, "total_steps": 78105, "loss": 0.1443, "lr": 6.943952872329684e-08, "epoch": 4.6616733883874275, "percentage": 93.23, "elapsed_time": "3:11:32", "remaining_time": "0:13:54", "throughput": 19940.93, "total_tokens": 229176000}
|
|
{"current_steps": 72825, "total_steps": 78105, "loss": 0.1475, "lr": 6.930883596205834e-08, "epoch": 4.6619934703284045, "percentage": 93.24, "elapsed_time": "3:11:33", "remaining_time": "0:13:53", "throughput": 19941.11, "total_tokens": 229191552}
|
|
{"current_steps": 72830, "total_steps": 78105, "loss": 0.1755, "lr": 6.917826457687543e-08, "epoch": 4.662313552269381, "percentage": 93.25, "elapsed_time": "3:11:34", "remaining_time": "0:13:52", "throughput": 19941.31, "total_tokens": 229207296}
|
|
{"current_steps": 72835, "total_steps": 78105, "loss": 0.1581, "lr": 6.904781457426818e-08, "epoch": 4.662633634210358, "percentage": 93.25, "elapsed_time": "3:11:34", "remaining_time": "0:13:51", "throughput": 19941.47, "total_tokens": 229222656}
|
|
{"current_steps": 72840, "total_steps": 78105, "loss": 0.1397, "lr": 6.891748596075082e-08, "epoch": 4.662953716151335, "percentage": 93.26, "elapsed_time": "3:11:35", "remaining_time": "0:13:50", "throughput": 19941.72, "total_tokens": 229239744}
|
|
{"current_steps": 72845, "total_steps": 78105, "loss": 0.1136, "lr": 6.87872787428312e-08, "epoch": 4.663273798092312, "percentage": 93.27, "elapsed_time": "3:11:36", "remaining_time": "0:13:50", "throughput": 19941.88, "total_tokens": 229254528}
|
|
{"current_steps": 72850, "total_steps": 78105, "loss": 0.1586, "lr": 6.865719292701107e-08, "epoch": 4.663593880033289, "percentage": 93.27, "elapsed_time": "3:11:36", "remaining_time": "0:13:49", "throughput": 19942.03, "total_tokens": 229269184}
|
|
{"current_steps": 72855, "total_steps": 78105, "loss": 0.1257, "lr": 6.852722851978661e-08, "epoch": 4.6639139619742656, "percentage": 93.28, "elapsed_time": "3:11:37", "remaining_time": "0:13:48", "throughput": 19942.19, "total_tokens": 229283776}
|
|
{"current_steps": 72860, "total_steps": 78105, "loss": 0.1441, "lr": 6.839738552764707e-08, "epoch": 4.6642340439152425, "percentage": 93.28, "elapsed_time": "3:11:38", "remaining_time": "0:13:47", "throughput": 19942.34, "total_tokens": 229298240}
|
|
{"current_steps": 72865, "total_steps": 78105, "loss": 0.0929, "lr": 6.826766395707618e-08, "epoch": 4.6645541258562195, "percentage": 93.29, "elapsed_time": "3:11:38", "remaining_time": "0:13:46", "throughput": 19942.59, "total_tokens": 229315584}
|
|
{"current_steps": 72870, "total_steps": 78105, "loss": 0.1165, "lr": 6.813806381455207e-08, "epoch": 4.664874207797196, "percentage": 93.3, "elapsed_time": "3:11:39", "remaining_time": "0:13:46", "throughput": 19942.83, "total_tokens": 229332480}
|
|
{"current_steps": 72875, "total_steps": 78105, "loss": 0.11, "lr": 6.80085851065454e-08, "epoch": 4.665194289738173, "percentage": 93.3, "elapsed_time": "3:11:40", "remaining_time": "0:13:45", "throughput": 19943.02, "total_tokens": 229348288}
|
|
{"current_steps": 72880, "total_steps": 78105, "loss": 0.1362, "lr": 6.787922783952294e-08, "epoch": 4.66551437167915, "percentage": 93.31, "elapsed_time": "3:11:40", "remaining_time": "0:13:44", "throughput": 19943.26, "total_tokens": 229365248}
|
|
{"current_steps": 72885, "total_steps": 78105, "loss": 0.129, "lr": 6.774999201994259e-08, "epoch": 4.665834453620127, "percentage": 93.32, "elapsed_time": "3:11:41", "remaining_time": "0:13:43", "throughput": 19943.48, "total_tokens": 229381760}
|
|
{"current_steps": 72890, "total_steps": 78105, "loss": 0.1462, "lr": 6.762087765425918e-08, "epoch": 4.666154535561104, "percentage": 93.32, "elapsed_time": "3:11:42", "remaining_time": "0:13:42", "throughput": 19943.62, "total_tokens": 229396160}
|
|
{"current_steps": 72895, "total_steps": 78105, "loss": 0.0713, "lr": 6.749188474891893e-08, "epoch": 4.666474617502081, "percentage": 93.33, "elapsed_time": "3:11:42", "remaining_time": "0:13:42", "throughput": 19943.8, "total_tokens": 229411712}
|
|
{"current_steps": 72900, "total_steps": 78105, "loss": 0.1271, "lr": 6.736301331036366e-08, "epoch": 4.666794699443058, "percentage": 93.34, "elapsed_time": "3:11:43", "remaining_time": "0:13:41", "throughput": 19944.01, "total_tokens": 229427968}
|
|
{"current_steps": 72905, "total_steps": 78105, "loss": 0.0927, "lr": 6.723426334502847e-08, "epoch": 4.667114781384035, "percentage": 93.34, "elapsed_time": "3:11:44", "remaining_time": "0:13:40", "throughput": 19944.18, "total_tokens": 229443200}
|
|
{"current_steps": 72910, "total_steps": 78105, "loss": 0.1143, "lr": 6.710563485934185e-08, "epoch": 4.667434863325012, "percentage": 93.35, "elapsed_time": "3:11:44", "remaining_time": "0:13:39", "throughput": 19944.38, "total_tokens": 229459200}
|
|
{"current_steps": 72915, "total_steps": 78105, "loss": 0.1747, "lr": 6.69771278597281e-08, "epoch": 4.6677549452659886, "percentage": 93.36, "elapsed_time": "3:11:45", "remaining_time": "0:13:38", "throughput": 19944.61, "total_tokens": 229475712}
|
|
{"current_steps": 72920, "total_steps": 78105, "loss": 0.1402, "lr": 6.684874235260291e-08, "epoch": 4.668075027206965, "percentage": 93.36, "elapsed_time": "3:11:46", "remaining_time": "0:13:38", "throughput": 19944.81, "total_tokens": 229491520}
|
|
{"current_steps": 72925, "total_steps": 78105, "loss": 0.17, "lr": 6.672047834437783e-08, "epoch": 4.668395109147942, "percentage": 93.37, "elapsed_time": "3:11:47", "remaining_time": "0:13:37", "throughput": 19945.02, "total_tokens": 229507392}
|
|
{"current_steps": 72930, "total_steps": 78105, "loss": 0.1274, "lr": 6.659233584145746e-08, "epoch": 4.668715191088919, "percentage": 93.37, "elapsed_time": "3:11:47", "remaining_time": "0:13:36", "throughput": 19945.25, "total_tokens": 229524352}
|
|
{"current_steps": 72935, "total_steps": 78105, "loss": 0.1653, "lr": 6.646431485024085e-08, "epoch": 4.669035273029896, "percentage": 93.38, "elapsed_time": "3:11:48", "remaining_time": "0:13:35", "throughput": 19945.43, "total_tokens": 229539968}
|
|
{"current_steps": 72940, "total_steps": 78105, "loss": 0.1243, "lr": 6.633641537712066e-08, "epoch": 4.669355354970873, "percentage": 93.39, "elapsed_time": "3:11:49", "remaining_time": "0:13:34", "throughput": 19945.6, "total_tokens": 229554944}
|
|
{"current_steps": 72945, "total_steps": 78105, "loss": 0.1262, "lr": 6.620863742848288e-08, "epoch": 4.66967543691185, "percentage": 93.39, "elapsed_time": "3:11:49", "remaining_time": "0:13:34", "throughput": 19945.81, "total_tokens": 229571200}
|
|
{"current_steps": 72950, "total_steps": 78105, "loss": 0.1765, "lr": 6.608098101070908e-08, "epoch": 4.669995518852827, "percentage": 93.4, "elapsed_time": "3:11:50", "remaining_time": "0:13:33", "throughput": 19946.0, "total_tokens": 229586880}
|
|
{"current_steps": 72955, "total_steps": 78105, "loss": 0.1208, "lr": 6.595344613017307e-08, "epoch": 4.670315600793804, "percentage": 93.41, "elapsed_time": "3:11:51", "remaining_time": "0:13:32", "throughput": 19946.17, "total_tokens": 229601792}
|
|
{"current_steps": 72960, "total_steps": 78105, "loss": 0.1383, "lr": 6.582603279324362e-08, "epoch": 4.67063568273478, "percentage": 93.41, "elapsed_time": "3:11:51", "remaining_time": "0:13:31", "throughput": 19946.35, "total_tokens": 229617024}
|
|
{"current_steps": 72965, "total_steps": 78105, "loss": 0.1161, "lr": 6.569874100628288e-08, "epoch": 4.670955764675757, "percentage": 93.42, "elapsed_time": "3:11:52", "remaining_time": "0:13:30", "throughput": 19946.58, "total_tokens": 229633728}
|
|
{"current_steps": 72970, "total_steps": 78105, "loss": 0.1409, "lr": 6.557157077564713e-08, "epoch": 4.671275846616734, "percentage": 93.43, "elapsed_time": "3:11:53", "remaining_time": "0:13:30", "throughput": 19946.76, "total_tokens": 229648960}
|
|
{"current_steps": 72975, "total_steps": 78105, "loss": 0.1149, "lr": 6.54445221076866e-08, "epoch": 4.671595928557711, "percentage": 93.43, "elapsed_time": "3:11:53", "remaining_time": "0:13:29", "throughput": 19946.95, "total_tokens": 229664704}
|
|
{"current_steps": 72980, "total_steps": 78105, "loss": 0.1062, "lr": 6.531759500874563e-08, "epoch": 4.671916010498688, "percentage": 93.44, "elapsed_time": "3:11:54", "remaining_time": "0:13:28", "throughput": 19947.1, "total_tokens": 229679232}
|
|
{"current_steps": 72985, "total_steps": 78105, "loss": 0.1484, "lr": 6.519078948516194e-08, "epoch": 4.672236092439665, "percentage": 93.44, "elapsed_time": "3:11:55", "remaining_time": "0:13:27", "throughput": 19947.34, "total_tokens": 229696128}
|
|
{"current_steps": 72990, "total_steps": 78105, "loss": 0.1004, "lr": 6.506410554326825e-08, "epoch": 4.672556174380642, "percentage": 93.45, "elapsed_time": "3:11:55", "remaining_time": "0:13:27", "throughput": 19947.5, "total_tokens": 229710656}
|
|
{"current_steps": 72995, "total_steps": 78105, "loss": 0.1867, "lr": 6.493754318938949e-08, "epoch": 4.672876256321619, "percentage": 93.46, "elapsed_time": "3:11:56", "remaining_time": "0:13:26", "throughput": 19947.69, "total_tokens": 229726592}
|
|
{"current_steps": 73000, "total_steps": 78105, "loss": 0.1095, "lr": 6.481110242984645e-08, "epoch": 4.673196338262596, "percentage": 93.46, "elapsed_time": "3:11:57", "remaining_time": "0:13:25", "throughput": 19947.88, "total_tokens": 229742336}
|
|
{"current_steps": 73005, "total_steps": 78105, "loss": 0.0999, "lr": 6.468478327095212e-08, "epoch": 4.673516420203573, "percentage": 93.47, "elapsed_time": "3:11:57", "remaining_time": "0:13:24", "throughput": 19948.04, "total_tokens": 229757312}
|
|
{"current_steps": 73010, "total_steps": 78105, "loss": 0.1799, "lr": 6.455858571901508e-08, "epoch": 4.673836502144549, "percentage": 93.48, "elapsed_time": "3:11:58", "remaining_time": "0:13:23", "throughput": 19948.18, "total_tokens": 229771520}
|
|
{"current_steps": 73015, "total_steps": 78105, "loss": 0.0701, "lr": 6.44325097803361e-08, "epoch": 4.674156584085526, "percentage": 93.48, "elapsed_time": "3:11:59", "remaining_time": "0:13:23", "throughput": 19948.38, "total_tokens": 229787648}
|
|
{"current_steps": 73020, "total_steps": 78105, "loss": 0.0831, "lr": 6.430655546121129e-08, "epoch": 4.674476666026503, "percentage": 93.49, "elapsed_time": "3:11:59", "remaining_time": "0:13:22", "throughput": 19948.55, "total_tokens": 229803072}
|
|
{"current_steps": 73025, "total_steps": 78105, "loss": 0.1144, "lr": 6.418072276793031e-08, "epoch": 4.67479674796748, "percentage": 93.5, "elapsed_time": "3:12:00", "remaining_time": "0:13:21", "throughput": 19948.74, "total_tokens": 229818880}
|
|
{"current_steps": 73030, "total_steps": 78105, "loss": 0.1126, "lr": 6.405501170677591e-08, "epoch": 4.675116829908457, "percentage": 93.5, "elapsed_time": "3:12:01", "remaining_time": "0:13:20", "throughput": 19948.94, "total_tokens": 229834944}
|
|
{"current_steps": 73035, "total_steps": 78105, "loss": 0.1023, "lr": 6.392942228402616e-08, "epoch": 4.675436911849434, "percentage": 93.51, "elapsed_time": "3:12:01", "remaining_time": "0:13:19", "throughput": 19949.12, "total_tokens": 229850624}
|
|
{"current_steps": 73040, "total_steps": 78105, "loss": 0.1349, "lr": 6.380395450595184e-08, "epoch": 4.675756993790411, "percentage": 93.52, "elapsed_time": "3:12:02", "remaining_time": "0:13:19", "throughput": 19949.28, "total_tokens": 229865920}
|
|
{"current_steps": 73045, "total_steps": 78105, "loss": 0.109, "lr": 6.367860837881851e-08, "epoch": 4.676077075731387, "percentage": 93.52, "elapsed_time": "3:12:03", "remaining_time": "0:13:18", "throughput": 19949.43, "total_tokens": 229880320}
|
|
{"current_steps": 73050, "total_steps": 78105, "loss": 0.149, "lr": 6.355338390888505e-08, "epoch": 4.676397157672364, "percentage": 93.53, "elapsed_time": "3:12:03", "remaining_time": "0:13:17", "throughput": 19949.65, "total_tokens": 229896640}
|
|
{"current_steps": 73055, "total_steps": 78105, "loss": 0.1463, "lr": 6.342828110240451e-08, "epoch": 4.676717239613341, "percentage": 93.53, "elapsed_time": "3:12:04", "remaining_time": "0:13:16", "throughput": 19949.83, "total_tokens": 229911936}
|
|
{"current_steps": 73060, "total_steps": 78105, "loss": 0.1401, "lr": 6.330329996562412e-08, "epoch": 4.677037321554318, "percentage": 93.54, "elapsed_time": "3:12:05", "remaining_time": "0:13:15", "throughput": 19950.06, "total_tokens": 229929088}
|
|
{"current_steps": 73065, "total_steps": 78105, "loss": 0.1218, "lr": 6.317844050478445e-08, "epoch": 4.677357403495295, "percentage": 93.55, "elapsed_time": "3:12:05", "remaining_time": "0:13:15", "throughput": 19950.24, "total_tokens": 229944512}
|
|
{"current_steps": 73070, "total_steps": 78105, "loss": 0.1105, "lr": 6.305370272612078e-08, "epoch": 4.677677485436272, "percentage": 93.55, "elapsed_time": "3:12:06", "remaining_time": "0:13:14", "throughput": 19950.42, "total_tokens": 229960064}
|
|
{"current_steps": 73075, "total_steps": 78105, "loss": 0.2026, "lr": 6.292908663586144e-08, "epoch": 4.677997567377249, "percentage": 93.56, "elapsed_time": "3:12:07", "remaining_time": "0:13:13", "throughput": 19950.6, "total_tokens": 229975232}
|
|
{"current_steps": 73080, "total_steps": 78105, "loss": 0.1484, "lr": 6.280459224022928e-08, "epoch": 4.678317649318226, "percentage": 93.57, "elapsed_time": "3:12:07", "remaining_time": "0:13:12", "throughput": 19950.84, "total_tokens": 229992256}
|
|
{"current_steps": 73085, "total_steps": 78105, "loss": 0.1312, "lr": 6.268021954544095e-08, "epoch": 4.678637731259203, "percentage": 93.57, "elapsed_time": "3:12:08", "remaining_time": "0:13:11", "throughput": 19950.99, "total_tokens": 230006976}
|
|
{"current_steps": 73090, "total_steps": 78105, "loss": 0.136, "lr": 6.255596855770707e-08, "epoch": 4.67895781320018, "percentage": 93.58, "elapsed_time": "3:12:09", "remaining_time": "0:13:11", "throughput": 19951.14, "total_tokens": 230021568}
|
|
{"current_steps": 73095, "total_steps": 78105, "loss": 0.1165, "lr": 6.243183928323181e-08, "epoch": 4.679277895141156, "percentage": 93.59, "elapsed_time": "3:12:09", "remaining_time": "0:13:10", "throughput": 19951.35, "total_tokens": 230037632}
|
|
{"current_steps": 73100, "total_steps": 78105, "loss": 0.1218, "lr": 6.230783172821359e-08, "epoch": 4.679597977082133, "percentage": 93.59, "elapsed_time": "3:12:10", "remaining_time": "0:13:09", "throughput": 19951.53, "total_tokens": 230053184}
|
|
{"current_steps": 73105, "total_steps": 78105, "loss": 0.1192, "lr": 6.218394589884464e-08, "epoch": 4.67991805902311, "percentage": 93.6, "elapsed_time": "3:12:11", "remaining_time": "0:13:08", "throughput": 19951.7, "total_tokens": 230068416}
|
|
{"current_steps": 73110, "total_steps": 78105, "loss": 0.1385, "lr": 6.206018180131168e-08, "epoch": 4.680238140964087, "percentage": 93.6, "elapsed_time": "3:12:11", "remaining_time": "0:13:07", "throughput": 19951.9, "total_tokens": 230084672}
|
|
{"current_steps": 73115, "total_steps": 78105, "loss": 0.0944, "lr": 6.193653944179423e-08, "epoch": 4.680558222905064, "percentage": 93.61, "elapsed_time": "3:12:12", "remaining_time": "0:13:07", "throughput": 19952.13, "total_tokens": 230101376}
|
|
{"current_steps": 73120, "total_steps": 78105, "loss": 0.1419, "lr": 6.181301882646679e-08, "epoch": 4.680878304846041, "percentage": 93.62, "elapsed_time": "3:12:13", "remaining_time": "0:13:06", "throughput": 19952.31, "total_tokens": 230116864}
|
|
{"current_steps": 73125, "total_steps": 78105, "loss": 0.1384, "lr": 6.168961996149692e-08, "epoch": 4.681198386787018, "percentage": 93.62, "elapsed_time": "3:12:14", "remaining_time": "0:13:05", "throughput": 19952.52, "total_tokens": 230133248}
|
|
{"current_steps": 73130, "total_steps": 78105, "loss": 0.1422, "lr": 6.156634285304691e-08, "epoch": 4.681518468727995, "percentage": 93.63, "elapsed_time": "3:12:14", "remaining_time": "0:13:04", "throughput": 19952.71, "total_tokens": 230148800}
|
|
{"current_steps": 73135, "total_steps": 78105, "loss": 0.1046, "lr": 6.144318750727213e-08, "epoch": 4.681838550668971, "percentage": 93.64, "elapsed_time": "3:12:15", "remaining_time": "0:13:03", "throughput": 19952.88, "total_tokens": 230164032}
|
|
{"current_steps": 73140, "total_steps": 78105, "loss": 0.1448, "lr": 6.132015393032264e-08, "epoch": 4.682158632609948, "percentage": 93.64, "elapsed_time": "3:12:16", "remaining_time": "0:13:03", "throughput": 19953.08, "total_tokens": 230180288}
|
|
{"current_steps": 73145, "total_steps": 78105, "loss": 0.1762, "lr": 6.119724212834244e-08, "epoch": 4.682478714550925, "percentage": 93.65, "elapsed_time": "3:12:16", "remaining_time": "0:13:02", "throughput": 19953.27, "total_tokens": 230195904}
|
|
{"current_steps": 73150, "total_steps": 78105, "loss": 0.1421, "lr": 6.107445210746826e-08, "epoch": 4.682798796491902, "percentage": 93.66, "elapsed_time": "3:12:17", "remaining_time": "0:13:01", "throughput": 19953.45, "total_tokens": 230211264}
|
|
{"current_steps": 73155, "total_steps": 78105, "loss": 0.121, "lr": 6.095178387383217e-08, "epoch": 4.683118878432879, "percentage": 93.66, "elapsed_time": "3:12:18", "remaining_time": "0:13:00", "throughput": 19953.64, "total_tokens": 230226816}
|
|
{"current_steps": 73160, "total_steps": 78105, "loss": 0.1636, "lr": 6.082923743355951e-08, "epoch": 4.683438960373856, "percentage": 93.67, "elapsed_time": "3:12:18", "remaining_time": "0:12:59", "throughput": 19953.84, "total_tokens": 230242816}
|
|
{"current_steps": 73165, "total_steps": 78105, "loss": 0.1264, "lr": 6.070681279276958e-08, "epoch": 4.683759042314833, "percentage": 93.68, "elapsed_time": "3:12:19", "remaining_time": "0:12:59", "throughput": 19954.01, "total_tokens": 230257792}
|
|
{"current_steps": 73170, "total_steps": 78105, "loss": 0.1512, "lr": 6.05845099575758e-08, "epoch": 4.68407912425581, "percentage": 93.68, "elapsed_time": "3:12:20", "remaining_time": "0:12:58", "throughput": 19954.17, "total_tokens": 230272448}
|
|
{"current_steps": 73175, "total_steps": 78105, "loss": 0.187, "lr": 6.046232893408499e-08, "epoch": 4.684399206196787, "percentage": 93.69, "elapsed_time": "3:12:20", "remaining_time": "0:12:57", "throughput": 19954.35, "total_tokens": 230287936}
|
|
{"current_steps": 73180, "total_steps": 78105, "loss": 0.1061, "lr": 6.034026972839807e-08, "epoch": 4.684719288137764, "percentage": 93.69, "elapsed_time": "3:12:21", "remaining_time": "0:12:56", "throughput": 19954.53, "total_tokens": 230303424}
|
|
{"current_steps": 73185, "total_steps": 78105, "loss": 0.0877, "lr": 6.021833234661045e-08, "epoch": 4.68503937007874, "percentage": 93.7, "elapsed_time": "3:12:22", "remaining_time": "0:12:55", "throughput": 19954.73, "total_tokens": 230319552}
|
|
{"current_steps": 73190, "total_steps": 78105, "loss": 0.1239, "lr": 6.009651679481116e-08, "epoch": 4.685359452019717, "percentage": 93.71, "elapsed_time": "3:12:22", "remaining_time": "0:12:55", "throughput": 19954.9, "total_tokens": 230334528}
|
|
{"current_steps": 73195, "total_steps": 78105, "loss": 0.1369, "lr": 5.997482307908281e-08, "epoch": 4.685679533960694, "percentage": 93.71, "elapsed_time": "3:12:23", "remaining_time": "0:12:54", "throughput": 19955.07, "total_tokens": 230349568}
|
|
{"current_steps": 73200, "total_steps": 78105, "loss": 0.1013, "lr": 5.985325120550223e-08, "epoch": 4.685999615901671, "percentage": 93.72, "elapsed_time": "3:12:24", "remaining_time": "0:12:53", "throughput": 19955.27, "total_tokens": 230365696}
|
|
{"current_steps": 73205, "total_steps": 78105, "loss": 0.1414, "lr": 5.973180118013982e-08, "epoch": 4.686319697842648, "percentage": 93.73, "elapsed_time": "3:12:24", "remaining_time": "0:12:52", "throughput": 19955.48, "total_tokens": 230382016}
|
|
{"current_steps": 73210, "total_steps": 78105, "loss": 0.1835, "lr": 5.961047300906047e-08, "epoch": 4.686639779783625, "percentage": 93.73, "elapsed_time": "3:12:25", "remaining_time": "0:12:51", "throughput": 19955.66, "total_tokens": 230397696}
|
|
{"current_steps": 73215, "total_steps": 78105, "loss": 0.1048, "lr": 5.9489266698322387e-08, "epoch": 4.686959861724602, "percentage": 93.74, "elapsed_time": "3:12:26", "remaining_time": "0:12:51", "throughput": 19955.83, "total_tokens": 230412864}
|
|
{"current_steps": 73220, "total_steps": 78105, "loss": 0.0997, "lr": 5.936818225397878e-08, "epoch": 4.687279943665579, "percentage": 93.75, "elapsed_time": "3:12:26", "remaining_time": "0:12:50", "throughput": 19956.01, "total_tokens": 230427968}
|
|
{"current_steps": 73225, "total_steps": 78105, "loss": 0.1102, "lr": 5.924721968207453e-08, "epoch": 4.687600025606555, "percentage": 93.75, "elapsed_time": "3:12:27", "remaining_time": "0:12:49", "throughput": 19956.21, "total_tokens": 230443776}
|
|
{"current_steps": 73230, "total_steps": 78105, "loss": 0.1537, "lr": 5.912637898865148e-08, "epoch": 4.687920107547532, "percentage": 93.76, "elapsed_time": "3:12:28", "remaining_time": "0:12:48", "throughput": 19956.38, "total_tokens": 230458880}
|
|
{"current_steps": 73235, "total_steps": 78105, "loss": 0.1252, "lr": 5.900566017974257e-08, "epoch": 4.688240189488509, "percentage": 93.76, "elapsed_time": "3:12:28", "remaining_time": "0:12:47", "throughput": 19956.55, "total_tokens": 230473920}
|
|
{"current_steps": 73240, "total_steps": 78105, "loss": 0.153, "lr": 5.888506326137633e-08, "epoch": 4.688560271429486, "percentage": 93.77, "elapsed_time": "3:12:29", "remaining_time": "0:12:47", "throughput": 19956.76, "total_tokens": 230489792}
|
|
{"current_steps": 73245, "total_steps": 78105, "loss": 0.1257, "lr": 5.876458823957487e-08, "epoch": 4.688880353370463, "percentage": 93.78, "elapsed_time": "3:12:30", "remaining_time": "0:12:46", "throughput": 19956.93, "total_tokens": 230504832}
|
|
{"current_steps": 73250, "total_steps": 78105, "loss": 0.1214, "lr": 5.8644235120353665e-08, "epoch": 4.68920043531144, "percentage": 93.78, "elapsed_time": "3:12:30", "remaining_time": "0:12:45", "throughput": 19957.12, "total_tokens": 230520192}
|
|
{"current_steps": 73255, "total_steps": 78105, "loss": 0.1276, "lr": 5.852400390972318e-08, "epoch": 4.689520517252417, "percentage": 93.79, "elapsed_time": "3:12:31", "remaining_time": "0:12:44", "throughput": 19957.3, "total_tokens": 230535680}
|
|
{"current_steps": 73260, "total_steps": 78105, "loss": 0.1105, "lr": 5.84038946136864e-08, "epoch": 4.689840599193394, "percentage": 93.8, "elapsed_time": "3:12:32", "remaining_time": "0:12:43", "throughput": 19957.47, "total_tokens": 230550528}
|
|
{"current_steps": 73265, "total_steps": 78105, "loss": 0.1535, "lr": 5.828390723824184e-08, "epoch": 4.690160681134371, "percentage": 93.8, "elapsed_time": "3:12:32", "remaining_time": "0:12:43", "throughput": 19957.64, "total_tokens": 230565440}
|
|
{"current_steps": 73270, "total_steps": 78105, "loss": 0.1366, "lr": 5.816404178938001e-08, "epoch": 4.690480763075348, "percentage": 93.81, "elapsed_time": "3:12:33", "remaining_time": "0:12:42", "throughput": 19957.87, "total_tokens": 230582208}
|
|
{"current_steps": 73275, "total_steps": 78105, "loss": 0.1885, "lr": 5.804429827308694e-08, "epoch": 4.690800845016324, "percentage": 93.82, "elapsed_time": "3:12:34", "remaining_time": "0:12:41", "throughput": 19958.06, "total_tokens": 230597696}
|
|
{"current_steps": 73280, "total_steps": 78105, "loss": 0.0957, "lr": 5.792467669534202e-08, "epoch": 4.691120926957301, "percentage": 93.82, "elapsed_time": "3:12:34", "remaining_time": "0:12:40", "throughput": 19958.27, "total_tokens": 230613888}
|
|
{"current_steps": 73285, "total_steps": 78105, "loss": 0.2248, "lr": 5.7805177062118525e-08, "epoch": 4.691441008898278, "percentage": 93.83, "elapsed_time": "3:12:35", "remaining_time": "0:12:40", "throughput": 19958.46, "total_tokens": 230629632}
|
|
{"current_steps": 73290, "total_steps": 78105, "loss": 0.163, "lr": 5.768579937938335e-08, "epoch": 4.691761090839255, "percentage": 93.84, "elapsed_time": "3:12:36", "remaining_time": "0:12:39", "throughput": 19958.66, "total_tokens": 230645440}
|
|
{"current_steps": 73295, "total_steps": 78105, "loss": 0.1181, "lr": 5.756654365309783e-08, "epoch": 4.692081172780232, "percentage": 93.84, "elapsed_time": "3:12:36", "remaining_time": "0:12:38", "throughput": 19958.82, "total_tokens": 230660544}
|
|
{"current_steps": 73300, "total_steps": 78105, "loss": 0.16, "lr": 5.744740988921721e-08, "epoch": 4.692401254721209, "percentage": 93.85, "elapsed_time": "3:12:37", "remaining_time": "0:12:37", "throughput": 19959.05, "total_tokens": 230677312}
|
|
{"current_steps": 73305, "total_steps": 78105, "loss": 0.1196, "lr": 5.732839809368979e-08, "epoch": 4.692721336662186, "percentage": 93.85, "elapsed_time": "3:12:38", "remaining_time": "0:12:36", "throughput": 19959.25, "total_tokens": 230693312}
|
|
{"current_steps": 73310, "total_steps": 78105, "loss": 0.157, "lr": 5.7209508272459135e-08, "epoch": 4.693041418603162, "percentage": 93.86, "elapsed_time": "3:12:38", "remaining_time": "0:12:36", "throughput": 19959.45, "total_tokens": 230709632}
|
|
{"current_steps": 73315, "total_steps": 78105, "loss": 0.15, "lr": 5.7090740431461346e-08, "epoch": 4.693361500544139, "percentage": 93.87, "elapsed_time": "3:12:39", "remaining_time": "0:12:35", "throughput": 19959.65, "total_tokens": 230725632}
|
|
{"current_steps": 73320, "total_steps": 78105, "loss": 0.1249, "lr": 5.69720945766275e-08, "epoch": 4.693681582485116, "percentage": 93.87, "elapsed_time": "3:12:40", "remaining_time": "0:12:34", "throughput": 19959.85, "total_tokens": 230741632}
|
|
{"current_steps": 73325, "total_steps": 78105, "loss": 0.1326, "lr": 5.685357071388203e-08, "epoch": 4.694001664426093, "percentage": 93.88, "elapsed_time": "3:12:40", "remaining_time": "0:12:33", "throughput": 19960.02, "total_tokens": 230757184}
|
|
{"current_steps": 73330, "total_steps": 78105, "loss": 0.1176, "lr": 5.673516884914326e-08, "epoch": 4.69432174636707, "percentage": 93.89, "elapsed_time": "3:12:41", "remaining_time": "0:12:32", "throughput": 19960.16, "total_tokens": 230771520}
|
|
{"current_steps": 73335, "total_steps": 78105, "loss": 0.142, "lr": 5.6616888988323404e-08, "epoch": 4.694641828308047, "percentage": 93.89, "elapsed_time": "3:12:42", "remaining_time": "0:12:32", "throughput": 19960.34, "total_tokens": 230787136}
|
|
{"current_steps": 73340, "total_steps": 78105, "loss": 0.2031, "lr": 5.64987311373294e-08, "epoch": 4.694961910249024, "percentage": 93.9, "elapsed_time": "3:12:43", "remaining_time": "0:12:31", "throughput": 19960.61, "total_tokens": 230805184}
|
|
{"current_steps": 73345, "total_steps": 78105, "loss": 0.0862, "lr": 5.6380695302060696e-08, "epoch": 4.695281992190001, "percentage": 93.91, "elapsed_time": "3:12:43", "remaining_time": "0:12:30", "throughput": 19960.78, "total_tokens": 230820288}
|
|
{"current_steps": 73350, "total_steps": 78105, "loss": 0.1266, "lr": 5.626278148841174e-08, "epoch": 4.695602074130978, "percentage": 93.91, "elapsed_time": "3:12:44", "remaining_time": "0:12:29", "throughput": 19960.95, "total_tokens": 230835456}
|
|
{"current_steps": 73355, "total_steps": 78105, "loss": 0.1365, "lr": 5.614498970227061e-08, "epoch": 4.695922156071955, "percentage": 93.92, "elapsed_time": "3:12:45", "remaining_time": "0:12:28", "throughput": 19961.18, "total_tokens": 230852480}
|
|
{"current_steps": 73360, "total_steps": 78105, "loss": 0.1164, "lr": 5.6027319949519264e-08, "epoch": 4.696242238012931, "percentage": 93.92, "elapsed_time": "3:12:45", "remaining_time": "0:12:28", "throughput": 19961.4, "total_tokens": 230868992}
|
|
{"current_steps": 73365, "total_steps": 78105, "loss": 0.1405, "lr": 5.590977223603328e-08, "epoch": 4.696562319953908, "percentage": 93.93, "elapsed_time": "3:12:46", "remaining_time": "0:12:27", "throughput": 19961.63, "total_tokens": 230885760}
|
|
{"current_steps": 73370, "total_steps": 78105, "loss": 0.0959, "lr": 5.579234656768212e-08, "epoch": 4.696882401894885, "percentage": 93.94, "elapsed_time": "3:12:47", "remaining_time": "0:12:26", "throughput": 19961.88, "total_tokens": 230903232}
|
|
{"current_steps": 73375, "total_steps": 78105, "loss": 0.106, "lr": 5.567504295033027e-08, "epoch": 4.697202483835862, "percentage": 93.94, "elapsed_time": "3:12:47", "remaining_time": "0:12:25", "throughput": 19962.05, "total_tokens": 230918272}
|
|
{"current_steps": 73380, "total_steps": 78105, "loss": 0.1122, "lr": 5.555786138983443e-08, "epoch": 4.697522565776839, "percentage": 93.95, "elapsed_time": "3:12:48", "remaining_time": "0:12:24", "throughput": 19962.24, "total_tokens": 230934272}
|
|
{"current_steps": 73385, "total_steps": 78105, "loss": 0.1117, "lr": 5.5440801892046594e-08, "epoch": 4.697842647717816, "percentage": 93.96, "elapsed_time": "3:12:49", "remaining_time": "0:12:24", "throughput": 19962.44, "total_tokens": 230950144}
|
|
{"current_steps": 73390, "total_steps": 78105, "loss": 0.1378, "lr": 5.53238644628118e-08, "epoch": 4.698162729658793, "percentage": 93.96, "elapsed_time": "3:12:49", "remaining_time": "0:12:23", "throughput": 19962.61, "total_tokens": 230965184}
|
|
{"current_steps": 73395, "total_steps": 78105, "loss": 0.1683, "lr": 5.5207049107969257e-08, "epoch": 4.69848281159977, "percentage": 93.97, "elapsed_time": "3:12:50", "remaining_time": "0:12:22", "throughput": 19962.81, "total_tokens": 230981312}
|
|
{"current_steps": 73400, "total_steps": 78105, "loss": 0.1005, "lr": 5.509035583335237e-08, "epoch": 4.698802893540746, "percentage": 93.98, "elapsed_time": "3:12:51", "remaining_time": "0:12:21", "throughput": 19962.97, "total_tokens": 230996288}
|
|
{"current_steps": 73405, "total_steps": 78105, "loss": 0.1176, "lr": 5.497378464478787e-08, "epoch": 4.699122975481723, "percentage": 93.98, "elapsed_time": "3:12:51", "remaining_time": "0:12:20", "throughput": 19963.13, "total_tokens": 231011520}
|
|
{"current_steps": 73410, "total_steps": 78105, "loss": 0.1081, "lr": 5.485733554809719e-08, "epoch": 4.6994430574227, "percentage": 93.99, "elapsed_time": "3:12:52", "remaining_time": "0:12:20", "throughput": 19963.31, "total_tokens": 231026880}
|
|
{"current_steps": 73415, "total_steps": 78105, "loss": 0.147, "lr": 5.474100854909431e-08, "epoch": 4.699763139363677, "percentage": 94.0, "elapsed_time": "3:12:53", "remaining_time": "0:12:19", "throughput": 19963.52, "total_tokens": 231043072}
|
|
{"current_steps": 73420, "total_steps": 78105, "loss": 0.1295, "lr": 5.462480365358902e-08, "epoch": 4.700083221304654, "percentage": 94.0, "elapsed_time": "3:12:53", "remaining_time": "0:12:18", "throughput": 19963.68, "total_tokens": 231058048}
|
|
{"current_steps": 73425, "total_steps": 78105, "loss": 0.1301, "lr": 5.450872086738335e-08, "epoch": 4.700403303245631, "percentage": 94.01, "elapsed_time": "3:12:54", "remaining_time": "0:12:17", "throughput": 19963.85, "total_tokens": 231073344}
|
|
{"current_steps": 73430, "total_steps": 78105, "loss": 0.1716, "lr": 5.439276019627432e-08, "epoch": 4.700723385186608, "percentage": 94.01, "elapsed_time": "3:12:55", "remaining_time": "0:12:16", "throughput": 19964.03, "total_tokens": 231088704}
|
|
{"current_steps": 73435, "total_steps": 78105, "loss": 0.1269, "lr": 5.427692164605203e-08, "epoch": 4.701043467127585, "percentage": 94.02, "elapsed_time": "3:12:55", "remaining_time": "0:12:16", "throughput": 19964.2, "total_tokens": 231103744}
|
|
{"current_steps": 73440, "total_steps": 78105, "loss": 0.1007, "lr": 5.416120522250101e-08, "epoch": 4.701363549068562, "percentage": 94.03, "elapsed_time": "3:12:56", "remaining_time": "0:12:15", "throughput": 19964.39, "total_tokens": 231119360}
|
|
{"current_steps": 73445, "total_steps": 78105, "loss": 0.1522, "lr": 5.404561093139915e-08, "epoch": 4.701683631009539, "percentage": 94.03, "elapsed_time": "3:12:57", "remaining_time": "0:12:14", "throughput": 19964.55, "total_tokens": 231134848}
|
|
{"current_steps": 73450, "total_steps": 78105, "loss": 0.1559, "lr": 5.393013877851932e-08, "epoch": 4.702003712950515, "percentage": 94.04, "elapsed_time": "3:12:57", "remaining_time": "0:12:13", "throughput": 19964.76, "total_tokens": 231151040}
|
|
{"current_steps": 73455, "total_steps": 78105, "loss": 0.1347, "lr": 5.381478876962692e-08, "epoch": 4.702323794891492, "percentage": 94.05, "elapsed_time": "3:12:58", "remaining_time": "0:12:12", "throughput": 19964.92, "total_tokens": 231165824}
|
|
{"current_steps": 73460, "total_steps": 78105, "loss": 0.1306, "lr": 5.36995609104829e-08, "epoch": 4.702643876832469, "percentage": 94.05, "elapsed_time": "3:12:59", "remaining_time": "0:12:12", "throughput": 19965.06, "total_tokens": 231180608}
|
|
{"current_steps": 73465, "total_steps": 78105, "loss": 0.1666, "lr": 5.3584455206839855e-08, "epoch": 4.702963958773446, "percentage": 94.06, "elapsed_time": "3:12:59", "remaining_time": "0:12:11", "throughput": 19965.28, "total_tokens": 231196992}
|
|
{"current_steps": 73470, "total_steps": 78105, "loss": 0.1021, "lr": 5.346947166444655e-08, "epoch": 4.703284040714423, "percentage": 94.07, "elapsed_time": "3:13:00", "remaining_time": "0:12:10", "throughput": 19965.43, "total_tokens": 231211712}
|
|
{"current_steps": 73475, "total_steps": 78105, "loss": 0.1717, "lr": 5.335461028904421e-08, "epoch": 4.7036041226554, "percentage": 94.07, "elapsed_time": "3:13:01", "remaining_time": "0:12:09", "throughput": 19965.6, "total_tokens": 231226688}
|
|
{"current_steps": 73480, "total_steps": 78105, "loss": 0.1499, "lr": 5.3239871086368245e-08, "epoch": 4.703924204596377, "percentage": 94.08, "elapsed_time": "3:13:01", "remaining_time": "0:12:08", "throughput": 19965.78, "total_tokens": 231241856}
|
|
{"current_steps": 73485, "total_steps": 78105, "loss": 0.0853, "lr": 5.31252540621488e-08, "epoch": 4.704244286537354, "percentage": 94.08, "elapsed_time": "3:13:02", "remaining_time": "0:12:08", "throughput": 19966.0, "total_tokens": 231258560}
|
|
{"current_steps": 73490, "total_steps": 78105, "loss": 0.1042, "lr": 5.301075922210852e-08, "epoch": 4.70456436847833, "percentage": 94.09, "elapsed_time": "3:13:03", "remaining_time": "0:12:07", "throughput": 19966.18, "total_tokens": 231274240}
|
|
{"current_steps": 73495, "total_steps": 78105, "loss": 0.1418, "lr": 5.2896386571965605e-08, "epoch": 4.704884450419307, "percentage": 94.1, "elapsed_time": "3:13:03", "remaining_time": "0:12:06", "throughput": 19966.36, "total_tokens": 231290048}
|
|
{"current_steps": 73500, "total_steps": 78105, "loss": 0.1013, "lr": 5.278213611742994e-08, "epoch": 4.705204532360284, "percentage": 94.1, "elapsed_time": "3:13:04", "remaining_time": "0:12:05", "throughput": 19966.55, "total_tokens": 231306048}
|
|
{"current_steps": 73505, "total_steps": 78105, "loss": 0.1174, "lr": 5.266800786420778e-08, "epoch": 4.705524614301261, "percentage": 94.11, "elapsed_time": "3:13:05", "remaining_time": "0:12:05", "throughput": 19966.78, "total_tokens": 231322816}
|
|
{"current_steps": 73510, "total_steps": 78105, "loss": 0.137, "lr": 5.2554001817997636e-08, "epoch": 4.705844696242238, "percentage": 94.12, "elapsed_time": "3:13:06", "remaining_time": "0:12:04", "throughput": 19966.92, "total_tokens": 231337280}
|
|
{"current_steps": 73515, "total_steps": 78105, "loss": 0.1297, "lr": 5.2440117984492166e-08, "epoch": 4.706164778183215, "percentage": 94.12, "elapsed_time": "3:13:06", "remaining_time": "0:12:03", "throughput": 19967.1, "total_tokens": 231352960}
|
|
{"current_steps": 73520, "total_steps": 78105, "loss": 0.1608, "lr": 5.23263563693785e-08, "epoch": 4.706484860124192, "percentage": 94.13, "elapsed_time": "3:13:07", "remaining_time": "0:12:02", "throughput": 19967.27, "total_tokens": 231368064}
|
|
{"current_steps": 73525, "total_steps": 78105, "loss": 0.1151, "lr": 5.22127169783368e-08, "epoch": 4.706804942065169, "percentage": 94.14, "elapsed_time": "3:13:08", "remaining_time": "0:12:01", "throughput": 19967.49, "total_tokens": 231384832}
|
|
{"current_steps": 73530, "total_steps": 78105, "loss": 0.1378, "lr": 5.209919981704198e-08, "epoch": 4.707125024006146, "percentage": 94.14, "elapsed_time": "3:13:08", "remaining_time": "0:12:01", "throughput": 19967.7, "total_tokens": 231401088}
|
|
{"current_steps": 73535, "total_steps": 78105, "loss": 0.105, "lr": 5.198580489116284e-08, "epoch": 4.707445105947123, "percentage": 94.15, "elapsed_time": "3:13:09", "remaining_time": "0:12:00", "throughput": 19967.88, "total_tokens": 231416896}
|
|
{"current_steps": 73540, "total_steps": 78105, "loss": 0.0956, "lr": 5.1872532206361234e-08, "epoch": 4.707765187888099, "percentage": 94.16, "elapsed_time": "3:13:10", "remaining_time": "0:11:59", "throughput": 19968.07, "total_tokens": 231432320}
|
|
{"current_steps": 73545, "total_steps": 78105, "loss": 0.1142, "lr": 5.175938176829348e-08, "epoch": 4.708085269829076, "percentage": 94.16, "elapsed_time": "3:13:10", "remaining_time": "0:11:58", "throughput": 19968.32, "total_tokens": 231449600}
|
|
{"current_steps": 73550, "total_steps": 78105, "loss": 0.1118, "lr": 5.1646353582609777e-08, "epoch": 4.708405351770053, "percentage": 94.17, "elapsed_time": "3:13:11", "remaining_time": "0:11:57", "throughput": 19968.51, "total_tokens": 231465536}
|
|
{"current_steps": 73555, "total_steps": 78105, "loss": 0.1166, "lr": 5.153344765495421e-08, "epoch": 4.70872543371103, "percentage": 94.17, "elapsed_time": "3:13:12", "remaining_time": "0:11:57", "throughput": 19968.69, "total_tokens": 231480704}
|
|
{"current_steps": 73560, "total_steps": 78105, "loss": 0.1093, "lr": 5.1420663990964514e-08, "epoch": 4.709045515652007, "percentage": 94.18, "elapsed_time": "3:13:12", "remaining_time": "0:11:56", "throughput": 19968.86, "total_tokens": 231496256}
|
|
{"current_steps": 73565, "total_steps": 78105, "loss": 0.1134, "lr": 5.1308002596272555e-08, "epoch": 4.709365597592984, "percentage": 94.19, "elapsed_time": "3:13:13", "remaining_time": "0:11:55", "throughput": 19969.05, "total_tokens": 231512128}
|
|
{"current_steps": 73570, "total_steps": 78105, "loss": 0.147, "lr": 5.119546347650467e-08, "epoch": 4.709685679533961, "percentage": 94.19, "elapsed_time": "3:13:14", "remaining_time": "0:11:54", "throughput": 19969.24, "total_tokens": 231527872}
|
|
{"current_steps": 73575, "total_steps": 78105, "loss": 0.1331, "lr": 5.10830466372797e-08, "epoch": 4.710005761474937, "percentage": 94.2, "elapsed_time": "3:13:14", "remaining_time": "0:11:53", "throughput": 19969.43, "total_tokens": 231543424}
|
|
{"current_steps": 73580, "total_steps": 78105, "loss": 0.1523, "lr": 5.097075208421148e-08, "epoch": 4.710325843415914, "percentage": 94.21, "elapsed_time": "3:13:15", "remaining_time": "0:11:53", "throughput": 19969.61, "total_tokens": 231559040}
|
|
{"current_steps": 73585, "total_steps": 78105, "loss": 0.1265, "lr": 5.0858579822907206e-08, "epoch": 4.710645925356891, "percentage": 94.21, "elapsed_time": "3:13:16", "remaining_time": "0:11:52", "throughput": 19969.86, "total_tokens": 231576704}
|
|
{"current_steps": 73590, "total_steps": 78105, "loss": 0.1183, "lr": 5.07465298589685e-08, "epoch": 4.710966007297868, "percentage": 94.22, "elapsed_time": "3:13:16", "remaining_time": "0:11:51", "throughput": 19970.05, "total_tokens": 231592000}
|
|
{"current_steps": 73595, "total_steps": 78105, "loss": 0.1462, "lr": 5.06346021979906e-08, "epoch": 4.711286089238845, "percentage": 94.23, "elapsed_time": "3:13:17", "remaining_time": "0:11:50", "throughput": 19970.21, "total_tokens": 231607168}
|
|
{"current_steps": 73600, "total_steps": 78105, "loss": 0.1004, "lr": 5.0522796845561826e-08, "epoch": 4.711606171179822, "percentage": 94.23, "elapsed_time": "3:13:18", "remaining_time": "0:11:49", "throughput": 19970.42, "total_tokens": 231623424}
|
|
{"current_steps": 73605, "total_steps": 78105, "loss": 0.1133, "lr": 5.041111380726632e-08, "epoch": 4.711926253120799, "percentage": 94.24, "elapsed_time": "3:13:18", "remaining_time": "0:11:49", "throughput": 19970.59, "total_tokens": 231638592}
|
|
{"current_steps": 73610, "total_steps": 78105, "loss": 0.1748, "lr": 5.0299553088679896e-08, "epoch": 4.712246335061776, "percentage": 94.24, "elapsed_time": "3:13:19", "remaining_time": "0:11:48", "throughput": 19970.76, "total_tokens": 231653504}
|
|
{"current_steps": 73615, "total_steps": 78105, "loss": 0.1306, "lr": 5.018811469537394e-08, "epoch": 4.712566417002753, "percentage": 94.25, "elapsed_time": "3:13:20", "remaining_time": "0:11:47", "throughput": 19970.91, "total_tokens": 231668032}
|
|
{"current_steps": 73620, "total_steps": 78105, "loss": 0.1682, "lr": 5.007679863291315e-08, "epoch": 4.71288649894373, "percentage": 94.26, "elapsed_time": "3:13:20", "remaining_time": "0:11:46", "throughput": 19971.07, "total_tokens": 231683072}
|
|
{"current_steps": 73625, "total_steps": 78105, "loss": 0.1183, "lr": 4.9965604906855594e-08, "epoch": 4.713206580884706, "percentage": 94.26, "elapsed_time": "3:13:21", "remaining_time": "0:11:45", "throughput": 19971.23, "total_tokens": 231697792}
|
|
{"current_steps": 73630, "total_steps": 78105, "loss": 0.1674, "lr": 4.9854533522754045e-08, "epoch": 4.713526662825683, "percentage": 94.27, "elapsed_time": "3:13:22", "remaining_time": "0:11:45", "throughput": 19971.39, "total_tokens": 231713024}
|
|
{"current_steps": 73635, "total_steps": 78105, "loss": 0.1751, "lr": 4.974358448615463e-08, "epoch": 4.71384674476666, "percentage": 94.28, "elapsed_time": "3:13:22", "remaining_time": "0:11:44", "throughput": 19971.57, "total_tokens": 231728704}
|
|
{"current_steps": 73640, "total_steps": 78105, "loss": 0.1221, "lr": 4.963275780259791e-08, "epoch": 4.714166826707637, "percentage": 94.28, "elapsed_time": "3:13:23", "remaining_time": "0:11:43", "throughput": 19971.72, "total_tokens": 231743360}
|
|
{"current_steps": 73645, "total_steps": 78105, "loss": 0.1398, "lr": 4.952205347761751e-08, "epoch": 4.714486908648614, "percentage": 94.29, "elapsed_time": "3:13:24", "remaining_time": "0:11:42", "throughput": 19971.9, "total_tokens": 231758784}
|
|
{"current_steps": 73650, "total_steps": 78105, "loss": 0.1405, "lr": 4.9411471516741806e-08, "epoch": 4.714806990589591, "percentage": 94.3, "elapsed_time": "3:13:24", "remaining_time": "0:11:41", "throughput": 19972.07, "total_tokens": 231773888}
|
|
{"current_steps": 73655, "total_steps": 78105, "loss": 0.1416, "lr": 4.930101192549275e-08, "epoch": 4.715127072530568, "percentage": 94.3, "elapsed_time": "3:13:25", "remaining_time": "0:11:41", "throughput": 19972.23, "total_tokens": 231788800}
|
|
{"current_steps": 73660, "total_steps": 78105, "loss": 0.1014, "lr": 4.9190674709385655e-08, "epoch": 4.715447154471545, "percentage": 94.31, "elapsed_time": "3:13:26", "remaining_time": "0:11:40", "throughput": 19972.4, "total_tokens": 231804032}
|
|
{"current_steps": 73665, "total_steps": 78105, "loss": 0.1118, "lr": 4.908045987393056e-08, "epoch": 4.715767236412521, "percentage": 94.32, "elapsed_time": "3:13:26", "remaining_time": "0:11:39", "throughput": 19972.55, "total_tokens": 231818752}
|
|
{"current_steps": 73670, "total_steps": 78105, "loss": 0.1301, "lr": 4.8970367424631125e-08, "epoch": 4.716087318353498, "percentage": 94.32, "elapsed_time": "3:13:27", "remaining_time": "0:11:38", "throughput": 19972.7, "total_tokens": 231833664}
|
|
{"current_steps": 73675, "total_steps": 78105, "loss": 0.0802, "lr": 4.886039736698406e-08, "epoch": 4.716407400294475, "percentage": 94.33, "elapsed_time": "3:13:28", "remaining_time": "0:11:37", "throughput": 19972.93, "total_tokens": 231850496}
|
|
{"current_steps": 73680, "total_steps": 78105, "loss": 0.1247, "lr": 4.875054970648191e-08, "epoch": 4.716727482235452, "percentage": 94.33, "elapsed_time": "3:13:28", "remaining_time": "0:11:37", "throughput": 19973.11, "total_tokens": 231866176}
|
|
{"current_steps": 73685, "total_steps": 78105, "loss": 0.1106, "lr": 4.864082444860862e-08, "epoch": 4.717047564176429, "percentage": 94.34, "elapsed_time": "3:13:29", "remaining_time": "0:11:36", "throughput": 19973.31, "total_tokens": 231882176}
|
|
{"current_steps": 73690, "total_steps": 78105, "loss": 0.127, "lr": 4.8531221598844545e-08, "epoch": 4.717367646117406, "percentage": 94.35, "elapsed_time": "3:13:30", "remaining_time": "0:11:35", "throughput": 19973.5, "total_tokens": 231897984}
|
|
{"current_steps": 73695, "total_steps": 78105, "loss": 0.1445, "lr": 4.84217411626614e-08, "epoch": 4.717687728058383, "percentage": 94.35, "elapsed_time": "3:13:30", "remaining_time": "0:11:34", "throughput": 19973.72, "total_tokens": 231914304}
|
|
{"current_steps": 73700, "total_steps": 78105, "loss": 0.1687, "lr": 4.8312383145526774e-08, "epoch": 4.71800780999936, "percentage": 94.36, "elapsed_time": "3:13:31", "remaining_time": "0:11:34", "throughput": 19973.89, "total_tokens": 231929600}
|
|
{"current_steps": 73705, "total_steps": 78105, "loss": 0.1082, "lr": 4.8203147552901566e-08, "epoch": 4.718327891940337, "percentage": 94.37, "elapsed_time": "3:13:32", "remaining_time": "0:11:33", "throughput": 19974.05, "total_tokens": 231944576}
|
|
{"current_steps": 73710, "total_steps": 78105, "loss": 0.1516, "lr": 4.809403439024002e-08, "epoch": 4.718647973881314, "percentage": 94.37, "elapsed_time": "3:13:33", "remaining_time": "0:11:32", "throughput": 19974.27, "total_tokens": 231961408}
|
|
{"current_steps": 73715, "total_steps": 78105, "loss": 0.1671, "lr": 4.7985043662991395e-08, "epoch": 4.71896805582229, "percentage": 94.38, "elapsed_time": "3:13:33", "remaining_time": "0:11:31", "throughput": 19974.44, "total_tokens": 231976448}
|
|
{"current_steps": 73720, "total_steps": 78105, "loss": 0.1444, "lr": 4.787617537659689e-08, "epoch": 4.719288137763267, "percentage": 94.39, "elapsed_time": "3:13:34", "remaining_time": "0:11:30", "throughput": 19974.6, "total_tokens": 231990912}
|
|
{"current_steps": 73725, "total_steps": 78105, "loss": 0.1042, "lr": 4.776742953649438e-08, "epoch": 4.719608219704244, "percentage": 94.39, "elapsed_time": "3:13:35", "remaining_time": "0:11:30", "throughput": 19974.89, "total_tokens": 232009024}
|
|
{"current_steps": 73730, "total_steps": 78105, "loss": 0.1367, "lr": 4.765880614811258e-08, "epoch": 4.719928301645221, "percentage": 94.4, "elapsed_time": "3:13:35", "remaining_time": "0:11:29", "throughput": 19975.05, "total_tokens": 232024192}
|
|
{"current_steps": 73735, "total_steps": 78105, "loss": 0.1457, "lr": 4.7550305216876315e-08, "epoch": 4.720248383586198, "percentage": 94.4, "elapsed_time": "3:13:36", "remaining_time": "0:11:28", "throughput": 19975.23, "total_tokens": 232039424}
|
|
{"current_steps": 73740, "total_steps": 78105, "loss": 0.1621, "lr": 4.744192674820347e-08, "epoch": 4.720568465527175, "percentage": 94.41, "elapsed_time": "3:13:37", "remaining_time": "0:11:27", "throughput": 19975.41, "total_tokens": 232054656}
|
|
{"current_steps": 73745, "total_steps": 78105, "loss": 0.1297, "lr": 4.733367074750611e-08, "epoch": 4.720888547468152, "percentage": 94.42, "elapsed_time": "3:13:37", "remaining_time": "0:11:26", "throughput": 19975.64, "total_tokens": 232071680}
|
|
{"current_steps": 73750, "total_steps": 78105, "loss": 0.1854, "lr": 4.722553722018963e-08, "epoch": 4.721208629409129, "percentage": 94.42, "elapsed_time": "3:13:38", "remaining_time": "0:11:26", "throughput": 19975.81, "total_tokens": 232087168}
|
|
{"current_steps": 73755, "total_steps": 78105, "loss": 0.1434, "lr": 4.711752617165388e-08, "epoch": 4.721528711350105, "percentage": 94.43, "elapsed_time": "3:13:39", "remaining_time": "0:11:25", "throughput": 19976.02, "total_tokens": 232103040}
|
|
{"current_steps": 73760, "total_steps": 78105, "loss": 0.1494, "lr": 4.700963760729177e-08, "epoch": 4.721848793291082, "percentage": 94.44, "elapsed_time": "3:13:39", "remaining_time": "0:11:24", "throughput": 19976.18, "total_tokens": 232117888}
|
|
{"current_steps": 73765, "total_steps": 78105, "loss": 0.1116, "lr": 4.690187153249176e-08, "epoch": 4.722168875232059, "percentage": 94.44, "elapsed_time": "3:13:40", "remaining_time": "0:11:23", "throughput": 19976.37, "total_tokens": 232133248}
|
|
{"current_steps": 73770, "total_steps": 78105, "loss": 0.1319, "lr": 4.679422795263428e-08, "epoch": 4.722488957173036, "percentage": 94.45, "elapsed_time": "3:13:41", "remaining_time": "0:11:22", "throughput": 19976.51, "total_tokens": 232147584}
|
|
{"current_steps": 73775, "total_steps": 78105, "loss": 0.1414, "lr": 4.668670687309501e-08, "epoch": 4.722809039114013, "percentage": 94.46, "elapsed_time": "3:13:41", "remaining_time": "0:11:22", "throughput": 19976.69, "total_tokens": 232163008}
|
|
{"current_steps": 73780, "total_steps": 78105, "loss": 0.113, "lr": 4.657930829924273e-08, "epoch": 4.72312912105499, "percentage": 94.46, "elapsed_time": "3:13:42", "remaining_time": "0:11:21", "throughput": 19976.91, "total_tokens": 232179328}
|
|
{"current_steps": 73785, "total_steps": 78105, "loss": 0.1288, "lr": 4.6472032236440364e-08, "epoch": 4.723449202995967, "percentage": 94.47, "elapsed_time": "3:13:43", "remaining_time": "0:11:20", "throughput": 19977.06, "total_tokens": 232194496}
|
|
{"current_steps": 73790, "total_steps": 78105, "loss": 0.109, "lr": 4.636487869004475e-08, "epoch": 4.723769284936944, "percentage": 94.48, "elapsed_time": "3:13:43", "remaining_time": "0:11:19", "throughput": 19977.23, "total_tokens": 232209280}
|
|
{"current_steps": 73795, "total_steps": 78105, "loss": 0.1194, "lr": 4.625784766540631e-08, "epoch": 4.724089366877921, "percentage": 94.48, "elapsed_time": "3:13:44", "remaining_time": "0:11:18", "throughput": 19977.43, "total_tokens": 232225536}
|
|
{"current_steps": 73800, "total_steps": 78105, "loss": 0.1079, "lr": 4.615093916787022e-08, "epoch": 4.724409448818898, "percentage": 94.49, "elapsed_time": "3:13:45", "remaining_time": "0:11:18", "throughput": 19977.65, "total_tokens": 232242176}
|
|
{"current_steps": 73805, "total_steps": 78105, "loss": 0.1189, "lr": 4.6044153202774166e-08, "epoch": 4.724729530759874, "percentage": 94.49, "elapsed_time": "3:13:45", "remaining_time": "0:11:17", "throughput": 19977.9, "total_tokens": 232259712}
|
|
{"current_steps": 73810, "total_steps": 78105, "loss": 0.1489, "lr": 4.5937489775451374e-08, "epoch": 4.725049612700851, "percentage": 94.5, "elapsed_time": "3:13:46", "remaining_time": "0:11:16", "throughput": 19978.12, "total_tokens": 232276480}
|
|
{"current_steps": 73815, "total_steps": 78105, "loss": 0.1006, "lr": 4.58309488912273e-08, "epoch": 4.725369694641828, "percentage": 94.51, "elapsed_time": "3:13:47", "remaining_time": "0:11:15", "throughput": 19978.31, "total_tokens": 232292224}
|
|
{"current_steps": 73820, "total_steps": 78105, "loss": 0.1326, "lr": 4.5724530555422416e-08, "epoch": 4.725689776582805, "percentage": 94.51, "elapsed_time": "3:13:47", "remaining_time": "0:11:14", "throughput": 19978.48, "total_tokens": 232307392}
|
|
{"current_steps": 73825, "total_steps": 78105, "loss": 0.1721, "lr": 4.561823477335081e-08, "epoch": 4.726009858523782, "percentage": 94.52, "elapsed_time": "3:13:48", "remaining_time": "0:11:14", "throughput": 19978.65, "total_tokens": 232322752}
|
|
{"current_steps": 73830, "total_steps": 78105, "loss": 0.0902, "lr": 4.55120615503199e-08, "epoch": 4.726329940464759, "percentage": 94.53, "elapsed_time": "3:13:49", "remaining_time": "0:11:13", "throughput": 19978.82, "total_tokens": 232338304}
|
|
{"current_steps": 73835, "total_steps": 78105, "loss": 0.1177, "lr": 4.5406010891632104e-08, "epoch": 4.726650022405736, "percentage": 94.53, "elapsed_time": "3:13:49", "remaining_time": "0:11:12", "throughput": 19979.02, "total_tokens": 232354432}
|
|
{"current_steps": 73840, "total_steps": 78105, "loss": 0.141, "lr": 4.530008280258208e-08, "epoch": 4.726970104346712, "percentage": 94.54, "elapsed_time": "3:13:50", "remaining_time": "0:11:11", "throughput": 19979.19, "total_tokens": 232369920}
|
|
{"current_steps": 73845, "total_steps": 78105, "loss": 0.1157, "lr": 4.519427728846004e-08, "epoch": 4.727290186287689, "percentage": 94.55, "elapsed_time": "3:13:51", "remaining_time": "0:11:10", "throughput": 19979.36, "total_tokens": 232384896}
|
|
{"current_steps": 73850, "total_steps": 78105, "loss": 0.1335, "lr": 4.508859435454926e-08, "epoch": 4.727610268228666, "percentage": 94.55, "elapsed_time": "3:13:51", "remaining_time": "0:11:10", "throughput": 19979.52, "total_tokens": 232400064}
|
|
{"current_steps": 73855, "total_steps": 78105, "loss": 0.1583, "lr": 4.498303400612691e-08, "epoch": 4.727930350169643, "percentage": 94.56, "elapsed_time": "3:13:52", "remaining_time": "0:11:09", "throughput": 19979.71, "total_tokens": 232415808}
|
|
{"current_steps": 73860, "total_steps": 78105, "loss": 0.1177, "lr": 4.487759624846405e-08, "epoch": 4.72825043211062, "percentage": 94.57, "elapsed_time": "3:13:53", "remaining_time": "0:11:08", "throughput": 19979.88, "total_tokens": 232431168}
|
|
{"current_steps": 73865, "total_steps": 78105, "loss": 0.1467, "lr": 4.4772281086825905e-08, "epoch": 4.728570514051597, "percentage": 94.57, "elapsed_time": "3:13:53", "remaining_time": "0:11:07", "throughput": 19980.07, "total_tokens": 232447232}
|
|
{"current_steps": 73870, "total_steps": 78105, "loss": 0.1056, "lr": 4.466708852647106e-08, "epoch": 4.728890595992574, "percentage": 94.58, "elapsed_time": "3:13:54", "remaining_time": "0:11:07", "throughput": 19980.24, "total_tokens": 232462464}
|
|
{"current_steps": 73875, "total_steps": 78105, "loss": 0.2092, "lr": 4.4562018572652524e-08, "epoch": 4.729210677933551, "percentage": 94.58, "elapsed_time": "3:13:55", "remaining_time": "0:11:06", "throughput": 19980.53, "total_tokens": 232481600}
|
|
{"current_steps": 73880, "total_steps": 78105, "loss": 0.1094, "lr": 4.4457071230616656e-08, "epoch": 4.729530759874528, "percentage": 94.59, "elapsed_time": "3:13:56", "remaining_time": "0:11:05", "throughput": 19980.78, "total_tokens": 232498752}
|
|
{"current_steps": 73885, "total_steps": 78105, "loss": 0.1342, "lr": 4.435224650560427e-08, "epoch": 4.729850841815505, "percentage": 94.6, "elapsed_time": "3:13:56", "remaining_time": "0:11:04", "throughput": 19980.97, "total_tokens": 232514496}
|
|
{"current_steps": 73890, "total_steps": 78105, "loss": 0.122, "lr": 4.424754440285006e-08, "epoch": 4.730170923756481, "percentage": 94.6, "elapsed_time": "3:13:57", "remaining_time": "0:11:03", "throughput": 19981.14, "total_tokens": 232529664}
|
|
{"current_steps": 73895, "total_steps": 78105, "loss": 0.1481, "lr": 4.414296492758152e-08, "epoch": 4.730491005697458, "percentage": 94.61, "elapsed_time": "3:13:58", "remaining_time": "0:11:03", "throughput": 19981.34, "total_tokens": 232546176}
|
|
{"current_steps": 73900, "total_steps": 78105, "loss": 0.1273, "lr": 4.403850808502141e-08, "epoch": 4.730811087638435, "percentage": 94.62, "elapsed_time": "3:13:58", "remaining_time": "0:11:02", "throughput": 19981.52, "total_tokens": 232561280}
|
|
{"current_steps": 73905, "total_steps": 78105, "loss": 0.1615, "lr": 4.393417388038529e-08, "epoch": 4.731131169579412, "percentage": 94.62, "elapsed_time": "3:13:59", "remaining_time": "0:11:01", "throughput": 19981.74, "total_tokens": 232577792}
|
|
{"current_steps": 73910, "total_steps": 78105, "loss": 0.0995, "lr": 4.382996231888398e-08, "epoch": 4.731451251520389, "percentage": 94.63, "elapsed_time": "3:14:00", "remaining_time": "0:11:00", "throughput": 19981.94, "total_tokens": 232593728}
|
|
{"current_steps": 73915, "total_steps": 78105, "loss": 0.1221, "lr": 4.372587340571999e-08, "epoch": 4.731771333461366, "percentage": 94.64, "elapsed_time": "3:14:00", "remaining_time": "0:10:59", "throughput": 19982.11, "total_tokens": 232609024}
|
|
{"current_steps": 73920, "total_steps": 78105, "loss": 0.1173, "lr": 4.362190714609221e-08, "epoch": 4.732091415402343, "percentage": 94.64, "elapsed_time": "3:14:01", "remaining_time": "0:10:59", "throughput": 19982.27, "total_tokens": 232624064}
|
|
{"current_steps": 73925, "total_steps": 78105, "loss": 0.109, "lr": 4.3518063545190947e-08, "epoch": 4.73241149734332, "percentage": 94.65, "elapsed_time": "3:14:02", "remaining_time": "0:10:58", "throughput": 19982.5, "total_tokens": 232641024}
|
|
{"current_steps": 73930, "total_steps": 78105, "loss": 0.1378, "lr": 4.3414342608202595e-08, "epoch": 4.732731579284296, "percentage": 94.65, "elapsed_time": "3:14:02", "remaining_time": "0:10:57", "throughput": 19982.69, "total_tokens": 232656768}
|
|
{"current_steps": 73935, "total_steps": 78105, "loss": 0.1154, "lr": 4.3310744340306075e-08, "epoch": 4.733051661225273, "percentage": 94.66, "elapsed_time": "3:14:03", "remaining_time": "0:10:56", "throughput": 19982.87, "total_tokens": 232672576}
|
|
{"current_steps": 73940, "total_steps": 78105, "loss": 0.1172, "lr": 4.320726874667447e-08, "epoch": 4.73337174316625, "percentage": 94.67, "elapsed_time": "3:14:04", "remaining_time": "0:10:55", "throughput": 19983.05, "total_tokens": 232687872}
|
|
{"current_steps": 73945, "total_steps": 78105, "loss": 0.1152, "lr": 4.310391583247503e-08, "epoch": 4.733691825107227, "percentage": 94.67, "elapsed_time": "3:14:04", "remaining_time": "0:10:55", "throughput": 19983.25, "total_tokens": 232704000}
|
|
{"current_steps": 73950, "total_steps": 78105, "loss": 0.1181, "lr": 4.300068560286835e-08, "epoch": 4.734011907048204, "percentage": 94.68, "elapsed_time": "3:14:05", "remaining_time": "0:10:54", "throughput": 19983.43, "total_tokens": 232719552}
|
|
{"current_steps": 73955, "total_steps": 78105, "loss": 0.0953, "lr": 4.289757806300948e-08, "epoch": 4.734331988989181, "percentage": 94.69, "elapsed_time": "3:14:06", "remaining_time": "0:10:53", "throughput": 19983.6, "total_tokens": 232735040}
|
|
{"current_steps": 73960, "total_steps": 78105, "loss": 0.1417, "lr": 4.27945932180468e-08, "epoch": 4.734652070930158, "percentage": 94.69, "elapsed_time": "3:14:07", "remaining_time": "0:10:52", "throughput": 19983.92, "total_tokens": 232754560}
|
|
{"current_steps": 73965, "total_steps": 78105, "loss": 0.1613, "lr": 4.269173107312341e-08, "epoch": 4.734972152871135, "percentage": 94.7, "elapsed_time": "3:14:07", "remaining_time": "0:10:51", "throughput": 19984.12, "total_tokens": 232770688}
|
|
{"current_steps": 73970, "total_steps": 78105, "loss": 0.138, "lr": 4.258899163337521e-08, "epoch": 4.735292234812112, "percentage": 94.71, "elapsed_time": "3:14:08", "remaining_time": "0:10:51", "throughput": 19984.29, "total_tokens": 232786048}
|
|
{"current_steps": 73975, "total_steps": 78105, "loss": 0.1359, "lr": 4.248637490393226e-08, "epoch": 4.735612316753089, "percentage": 94.71, "elapsed_time": "3:14:09", "remaining_time": "0:10:50", "throughput": 19984.45, "total_tokens": 232801408}
|
|
{"current_steps": 73980, "total_steps": 78105, "loss": 0.1005, "lr": 4.238388088991935e-08, "epoch": 4.735932398694065, "percentage": 94.72, "elapsed_time": "3:14:09", "remaining_time": "0:10:49", "throughput": 19984.76, "total_tokens": 232821056}
|
|
{"current_steps": 73985, "total_steps": 78105, "loss": 0.1143, "lr": 4.228150959645405e-08, "epoch": 4.736252480635042, "percentage": 94.73, "elapsed_time": "3:14:10", "remaining_time": "0:10:48", "throughput": 19984.98, "total_tokens": 232837504}
|
|
{"current_steps": 73990, "total_steps": 78105, "loss": 0.1471, "lr": 4.2179261028648385e-08, "epoch": 4.736572562576019, "percentage": 94.73, "elapsed_time": "3:14:11", "remaining_time": "0:10:47", "throughput": 19985.12, "total_tokens": 232852032}
|
|
{"current_steps": 73995, "total_steps": 78105, "loss": 0.0952, "lr": 4.2077135191608e-08, "epoch": 4.736892644516996, "percentage": 94.74, "elapsed_time": "3:14:11", "remaining_time": "0:10:47", "throughput": 19985.31, "total_tokens": 232867904}
|
|
{"current_steps": 74000, "total_steps": 78105, "loss": 0.1332, "lr": 4.197513209043269e-08, "epoch": 4.737212726457973, "percentage": 94.74, "elapsed_time": "3:14:12", "remaining_time": "0:10:46", "throughput": 19985.53, "total_tokens": 232884288}
|
|
{"current_steps": 74005, "total_steps": 78105, "loss": 0.1478, "lr": 4.187325173021617e-08, "epoch": 4.73753280839895, "percentage": 94.75, "elapsed_time": "3:14:13", "remaining_time": "0:10:45", "throughput": 19985.72, "total_tokens": 232900416}
|
|
{"current_steps": 74010, "total_steps": 78105, "loss": 0.1663, "lr": 4.17714941160452e-08, "epoch": 4.737852890339927, "percentage": 94.76, "elapsed_time": "3:14:14", "remaining_time": "0:10:44", "throughput": 19985.94, "total_tokens": 232916928}
|
|
{"current_steps": 74015, "total_steps": 78105, "loss": 0.1142, "lr": 4.166985925300154e-08, "epoch": 4.738172972280904, "percentage": 94.76, "elapsed_time": "3:14:14", "remaining_time": "0:10:44", "throughput": 19986.23, "total_tokens": 232935744}
|
|
{"current_steps": 74020, "total_steps": 78105, "loss": 0.1247, "lr": 4.1568347146160035e-08, "epoch": 4.73849305422188, "percentage": 94.77, "elapsed_time": "3:14:15", "remaining_time": "0:10:43", "throughput": 19986.39, "total_tokens": 232950528}
|
|
{"current_steps": 74025, "total_steps": 78105, "loss": 0.1167, "lr": 4.146695780058968e-08, "epoch": 4.738813136162857, "percentage": 94.78, "elapsed_time": "3:14:16", "remaining_time": "0:10:42", "throughput": 19986.61, "total_tokens": 232966848}
|
|
{"current_steps": 74030, "total_steps": 78105, "loss": 0.0911, "lr": 4.136569122135392e-08, "epoch": 4.739133218103834, "percentage": 94.78, "elapsed_time": "3:14:16", "remaining_time": "0:10:41", "throughput": 19986.81, "total_tokens": 232983040}
|
|
{"current_steps": 74035, "total_steps": 78105, "loss": 0.1299, "lr": 4.126454741350844e-08, "epoch": 4.739453300044811, "percentage": 94.79, "elapsed_time": "3:14:17", "remaining_time": "0:10:40", "throughput": 19987.04, "total_tokens": 232999808}
|
|
{"current_steps": 74040, "total_steps": 78105, "loss": 0.1338, "lr": 4.116352638210447e-08, "epoch": 4.739773381985788, "percentage": 94.8, "elapsed_time": "3:14:18", "remaining_time": "0:10:40", "throughput": 19987.24, "total_tokens": 233016320}
|
|
{"current_steps": 74045, "total_steps": 78105, "loss": 0.1548, "lr": 4.106262813218659e-08, "epoch": 4.740093463926765, "percentage": 94.8, "elapsed_time": "3:14:18", "remaining_time": "0:10:39", "throughput": 19987.47, "total_tokens": 233033216}
|
|
{"current_steps": 74050, "total_steps": 78105, "loss": 0.1068, "lr": 4.096185266879271e-08, "epoch": 4.740413545867742, "percentage": 94.81, "elapsed_time": "3:14:19", "remaining_time": "0:10:38", "throughput": 19987.67, "total_tokens": 233049280}
|
|
{"current_steps": 74055, "total_steps": 78105, "loss": 0.1205, "lr": 4.086119999695548e-08, "epoch": 4.740733627808719, "percentage": 94.81, "elapsed_time": "3:14:20", "remaining_time": "0:10:37", "throughput": 19987.84, "total_tokens": 233064384}
|
|
{"current_steps": 74060, "total_steps": 78105, "loss": 0.1341, "lr": 4.076067012170032e-08, "epoch": 4.741053709749696, "percentage": 94.82, "elapsed_time": "3:14:20", "remaining_time": "0:10:36", "throughput": 19987.99, "total_tokens": 233079168}
|
|
{"current_steps": 74065, "total_steps": 78105, "loss": 0.1492, "lr": 4.066026304804821e-08, "epoch": 4.741373791690673, "percentage": 94.83, "elapsed_time": "3:14:21", "remaining_time": "0:10:36", "throughput": 19988.2, "total_tokens": 233095296}
|
|
{"current_steps": 74070, "total_steps": 78105, "loss": 0.0897, "lr": 4.055997878101181e-08, "epoch": 4.741693873631649, "percentage": 94.83, "elapsed_time": "3:14:22", "remaining_time": "0:10:35", "throughput": 19987.99, "total_tokens": 233111552}
|
|
{"current_steps": 74075, "total_steps": 78105, "loss": 0.1233, "lr": 4.045981732559934e-08, "epoch": 4.742013955572626, "percentage": 94.84, "elapsed_time": "3:14:23", "remaining_time": "0:10:34", "throughput": 19988.19, "total_tokens": 233127488}
|
|
{"current_steps": 74080, "total_steps": 78105, "loss": 0.1122, "lr": 4.035977868681262e-08, "epoch": 4.742334037513603, "percentage": 94.85, "elapsed_time": "3:14:23", "remaining_time": "0:10:33", "throughput": 19988.41, "total_tokens": 233143872}
|
|
{"current_steps": 74085, "total_steps": 78105, "loss": 0.1337, "lr": 4.0259862869646557e-08, "epoch": 4.74265411945458, "percentage": 94.85, "elapsed_time": "3:14:24", "remaining_time": "0:10:32", "throughput": 19988.58, "total_tokens": 233159360}
|
|
{"current_steps": 74090, "total_steps": 78105, "loss": 0.0919, "lr": 4.016006987909077e-08, "epoch": 4.742974201395557, "percentage": 94.86, "elapsed_time": "3:14:25", "remaining_time": "0:10:32", "throughput": 19988.76, "total_tokens": 233174848}
|
|
{"current_steps": 74095, "total_steps": 78105, "loss": 0.1226, "lr": 4.006039972012793e-08, "epoch": 4.743294283336534, "percentage": 94.87, "elapsed_time": "3:14:25", "remaining_time": "0:10:31", "throughput": 19988.97, "total_tokens": 233191296}
|
|
{"current_steps": 74100, "total_steps": 78105, "loss": 0.1095, "lr": 3.996085239773573e-08, "epoch": 4.743614365277511, "percentage": 94.87, "elapsed_time": "3:14:26", "remaining_time": "0:10:30", "throughput": 19989.17, "total_tokens": 233207360}
|
|
{"current_steps": 74105, "total_steps": 78105, "loss": 0.1206, "lr": 3.9861427916884633e-08, "epoch": 4.7439344472184874, "percentage": 94.88, "elapsed_time": "3:14:27", "remaining_time": "0:10:29", "throughput": 19989.32, "total_tokens": 233221888}
|
|
{"current_steps": 74110, "total_steps": 78105, "loss": 0.0942, "lr": 3.9762126282539006e-08, "epoch": 4.744254529159464, "percentage": 94.89, "elapsed_time": "3:14:27", "remaining_time": "0:10:28", "throughput": 19989.49, "total_tokens": 233236992}
|
|
{"current_steps": 74115, "total_steps": 78105, "loss": 0.1296, "lr": 3.966294749965821e-08, "epoch": 4.744574611100441, "percentage": 94.89, "elapsed_time": "3:14:28", "remaining_time": "0:10:28", "throughput": 19989.67, "total_tokens": 233252928}
|
|
{"current_steps": 74120, "total_steps": 78105, "loss": 0.0977, "lr": 3.95638915731944e-08, "epoch": 4.744894693041418, "percentage": 94.9, "elapsed_time": "3:14:29", "remaining_time": "0:10:27", "throughput": 19989.83, "total_tokens": 233267712}
|
|
{"current_steps": 74125, "total_steps": 78105, "loss": 0.0949, "lr": 3.946495850809418e-08, "epoch": 4.745214774982395, "percentage": 94.9, "elapsed_time": "3:14:29", "remaining_time": "0:10:26", "throughput": 19989.96, "total_tokens": 233281984}
|
|
{"current_steps": 74130, "total_steps": 78105, "loss": 0.0871, "lr": 3.9366148309297195e-08, "epoch": 4.745534856923372, "percentage": 94.91, "elapsed_time": "3:14:30", "remaining_time": "0:10:25", "throughput": 19990.14, "total_tokens": 233297472}
|
|
{"current_steps": 74135, "total_steps": 78105, "loss": 0.1606, "lr": 3.926746098173756e-08, "epoch": 4.745854938864349, "percentage": 94.92, "elapsed_time": "3:14:31", "remaining_time": "0:10:25", "throughput": 19990.33, "total_tokens": 233313280}
|
|
{"current_steps": 74140, "total_steps": 78105, "loss": 0.1081, "lr": 3.916889653034384e-08, "epoch": 4.746175020805326, "percentage": 94.92, "elapsed_time": "3:14:31", "remaining_time": "0:10:24", "throughput": 19990.52, "total_tokens": 233328896}
|
|
{"current_steps": 74145, "total_steps": 78105, "loss": 0.1058, "lr": 3.9070454960037086e-08, "epoch": 4.746495102746303, "percentage": 94.93, "elapsed_time": "3:14:32", "remaining_time": "0:10:23", "throughput": 19990.72, "total_tokens": 233345088}
|
|
{"current_steps": 74150, "total_steps": 78105, "loss": 0.0974, "lr": 3.897213627573365e-08, "epoch": 4.74681518468728, "percentage": 94.94, "elapsed_time": "3:14:33", "remaining_time": "0:10:22", "throughput": 19990.92, "total_tokens": 233361216}
|
|
{"current_steps": 74155, "total_steps": 78105, "loss": 0.1023, "lr": 3.8873940482342384e-08, "epoch": 4.7471352666282565, "percentage": 94.94, "elapsed_time": "3:14:34", "remaining_time": "0:10:21", "throughput": 19991.13, "total_tokens": 233377792}
|
|
{"current_steps": 74160, "total_steps": 78105, "loss": 0.1428, "lr": 3.8775867584767145e-08, "epoch": 4.7474553485692335, "percentage": 94.95, "elapsed_time": "3:14:34", "remaining_time": "0:10:21", "throughput": 19991.33, "total_tokens": 233393792}
|
|
{"current_steps": 74165, "total_steps": 78105, "loss": 0.1314, "lr": 3.8677917587904844e-08, "epoch": 4.7477754305102104, "percentage": 94.96, "elapsed_time": "3:14:35", "remaining_time": "0:10:20", "throughput": 19991.52, "total_tokens": 233409408}
|
|
{"current_steps": 74170, "total_steps": 78105, "loss": 0.2069, "lr": 3.858009049664685e-08, "epoch": 4.748095512451187, "percentage": 94.96, "elapsed_time": "3:14:36", "remaining_time": "0:10:19", "throughput": 19991.69, "total_tokens": 233424832}
|
|
{"current_steps": 74175, "total_steps": 78105, "loss": 0.1239, "lr": 3.8482386315878416e-08, "epoch": 4.748415594392164, "percentage": 94.97, "elapsed_time": "3:14:36", "remaining_time": "0:10:18", "throughput": 19991.86, "total_tokens": 233440064}
|
|
{"current_steps": 74180, "total_steps": 78105, "loss": 0.1283, "lr": 3.838480505047759e-08, "epoch": 4.748735676333141, "percentage": 94.97, "elapsed_time": "3:14:37", "remaining_time": "0:10:17", "throughput": 19992.04, "total_tokens": 233455872}
|
|
{"current_steps": 74185, "total_steps": 78105, "loss": 0.1478, "lr": 3.828734670531769e-08, "epoch": 4.749055758274118, "percentage": 94.98, "elapsed_time": "3:14:38", "remaining_time": "0:10:17", "throughput": 19992.22, "total_tokens": 233471168}
|
|
{"current_steps": 74190, "total_steps": 78105, "loss": 0.0953, "lr": 3.819001128526512e-08, "epoch": 4.749375840215095, "percentage": 94.99, "elapsed_time": "3:14:38", "remaining_time": "0:10:16", "throughput": 19992.38, "total_tokens": 233485824}
|
|
{"current_steps": 74195, "total_steps": 78105, "loss": 0.0894, "lr": 3.809279879518013e-08, "epoch": 4.7496959221560715, "percentage": 94.99, "elapsed_time": "3:14:39", "remaining_time": "0:10:15", "throughput": 19992.62, "total_tokens": 233502784}
|
|
{"current_steps": 74200, "total_steps": 78105, "loss": 0.1182, "lr": 3.7995709239917465e-08, "epoch": 4.7500160040970485, "percentage": 95.0, "elapsed_time": "3:14:40", "remaining_time": "0:10:14", "throughput": 19992.88, "total_tokens": 233520512}
|
|
{"current_steps": 74205, "total_steps": 78105, "loss": 0.1649, "lr": 3.7898742624324915e-08, "epoch": 4.7503360860380255, "percentage": 95.01, "elapsed_time": "3:14:40", "remaining_time": "0:10:13", "throughput": 19993.04, "total_tokens": 233535744}
|
|
{"current_steps": 74210, "total_steps": 78105, "loss": 0.1252, "lr": 3.780189895324443e-08, "epoch": 4.7506561679790025, "percentage": 95.01, "elapsed_time": "3:14:41", "remaining_time": "0:10:13", "throughput": 19993.26, "total_tokens": 233552064}
|
|
{"current_steps": 74214, "total_steps": 78105, "eval_loss": 0.6057931184768677, "epoch": 4.750912233531784, "percentage": 95.02, "elapsed_time": "3:15:33", "remaining_time": "0:10:15", "throughput": 19906.52, "total_tokens": 233564288}
|
|
{"current_steps": 74215, "total_steps": 78105, "loss": 0.1157, "lr": 3.7705178231511875e-08, "epoch": 4.7509762499199795, "percentage": 95.02, "elapsed_time": "3:16:07", "remaining_time": "0:10:16", "throughput": 19848.89, "total_tokens": 233567168}
|
|
{"current_steps": 74220, "total_steps": 78105, "loss": 0.1425, "lr": 3.760858046395699e-08, "epoch": 4.7512963318609565, "percentage": 95.03, "elapsed_time": "3:16:07", "remaining_time": "0:10:15", "throughput": 19849.1, "total_tokens": 233583552}
|
|
{"current_steps": 74225, "total_steps": 78105, "loss": 0.1248, "lr": 3.7512105655403974e-08, "epoch": 4.7516164138019334, "percentage": 95.03, "elapsed_time": "3:16:08", "remaining_time": "0:10:15", "throughput": 19849.29, "total_tokens": 233599168}
|
|
{"current_steps": 74230, "total_steps": 78105, "loss": 0.1434, "lr": 3.7415753810669253e-08, "epoch": 4.75193649574291, "percentage": 95.04, "elapsed_time": "3:16:09", "remaining_time": "0:10:14", "throughput": 19849.48, "total_tokens": 233614464}
|
|
{"current_steps": 74235, "total_steps": 78105, "loss": 0.1861, "lr": 3.731952493456509e-08, "epoch": 4.752256577683887, "percentage": 95.05, "elapsed_time": "3:16:10", "remaining_time": "0:10:13", "throughput": 19849.75, "total_tokens": 233632128}
|
|
{"current_steps": 74240, "total_steps": 78105, "loss": 0.1573, "lr": 3.722341903189597e-08, "epoch": 4.752576659624864, "percentage": 95.05, "elapsed_time": "3:16:10", "remaining_time": "0:10:12", "throughput": 19849.92, "total_tokens": 233647232}
|
|
{"current_steps": 74245, "total_steps": 78105, "loss": 0.1575, "lr": 3.712743610746111e-08, "epoch": 4.7528967415658405, "percentage": 95.06, "elapsed_time": "3:16:11", "remaining_time": "0:10:11", "throughput": 19850.11, "total_tokens": 233662720}
|
|
{"current_steps": 74250, "total_steps": 78105, "loss": 0.1193, "lr": 3.703157616605363e-08, "epoch": 4.7532168235068175, "percentage": 95.06, "elapsed_time": "3:16:12", "remaining_time": "0:10:11", "throughput": 19850.28, "total_tokens": 233677824}
|
|
{"current_steps": 74255, "total_steps": 78105, "loss": 0.1434, "lr": 3.693583921245997e-08, "epoch": 4.7535369054477945, "percentage": 95.07, "elapsed_time": "3:16:12", "remaining_time": "0:10:10", "throughput": 19850.48, "total_tokens": 233693568}
|
|
{"current_steps": 74260, "total_steps": 78105, "loss": 0.1683, "lr": 3.684022525146103e-08, "epoch": 4.7538569873887715, "percentage": 95.08, "elapsed_time": "3:16:13", "remaining_time": "0:10:09", "throughput": 19850.66, "total_tokens": 233708864}
|
|
{"current_steps": 74265, "total_steps": 78105, "loss": 0.1419, "lr": 3.6744734287830766e-08, "epoch": 4.7541770693297485, "percentage": 95.08, "elapsed_time": "3:16:14", "remaining_time": "0:10:08", "throughput": 19850.89, "total_tokens": 233725504}
|
|
{"current_steps": 74270, "total_steps": 78105, "loss": 0.1156, "lr": 3.664936632633814e-08, "epoch": 4.7544971512707255, "percentage": 95.09, "elapsed_time": "3:16:14", "remaining_time": "0:10:07", "throughput": 19851.05, "total_tokens": 233740480}
|
|
{"current_steps": 74275, "total_steps": 78105, "loss": 0.1669, "lr": 3.655412137174519e-08, "epoch": 4.7548172332117025, "percentage": 95.1, "elapsed_time": "3:16:15", "remaining_time": "0:10:07", "throughput": 19851.21, "total_tokens": 233754944}
|
|
{"current_steps": 74280, "total_steps": 78105, "loss": 0.1299, "lr": 3.645899942880754e-08, "epoch": 4.7551373151526795, "percentage": 95.1, "elapsed_time": "3:16:16", "remaining_time": "0:10:06", "throughput": 19851.38, "total_tokens": 233769984}
|
|
{"current_steps": 74285, "total_steps": 78105, "loss": 0.1703, "lr": 3.6364000502275285e-08, "epoch": 4.755457397093656, "percentage": 95.11, "elapsed_time": "3:16:16", "remaining_time": "0:10:05", "throughput": 19851.55, "total_tokens": 233784640}
|
|
{"current_steps": 74290, "total_steps": 78105, "loss": 0.1472, "lr": 3.626912459689214e-08, "epoch": 4.7557774790346325, "percentage": 95.12, "elapsed_time": "3:16:17", "remaining_time": "0:10:04", "throughput": 19851.82, "total_tokens": 233802240}
|
|
{"current_steps": 74295, "total_steps": 78105, "loss": 0.1296, "lr": 3.617437171739624e-08, "epoch": 4.7560975609756095, "percentage": 95.12, "elapsed_time": "3:16:18", "remaining_time": "0:10:04", "throughput": 19852.03, "total_tokens": 233818304}
|
|
{"current_steps": 74300, "total_steps": 78105, "loss": 0.1498, "lr": 3.607974186851826e-08, "epoch": 4.7564176429165865, "percentage": 95.13, "elapsed_time": "3:16:18", "remaining_time": "0:10:03", "throughput": 19852.23, "total_tokens": 233834368}
|
|
{"current_steps": 74305, "total_steps": 78105, "loss": 0.1469, "lr": 3.598523505498414e-08, "epoch": 4.7567377248575635, "percentage": 95.13, "elapsed_time": "3:16:19", "remaining_time": "0:10:02", "throughput": 19852.43, "total_tokens": 233850240}
|
|
{"current_steps": 74310, "total_steps": 78105, "loss": 0.125, "lr": 3.5890851281512594e-08, "epoch": 4.7570578067985405, "percentage": 95.14, "elapsed_time": "3:16:20", "remaining_time": "0:10:01", "throughput": 19852.58, "total_tokens": 233864832}
|
|
{"current_steps": 74315, "total_steps": 78105, "loss": 0.1653, "lr": 3.5796590552816815e-08, "epoch": 4.7573778887395175, "percentage": 95.15, "elapsed_time": "3:16:20", "remaining_time": "0:10:00", "throughput": 19852.73, "total_tokens": 233879488}
|
|
{"current_steps": 74320, "total_steps": 78105, "loss": 0.1048, "lr": 3.570245287360386e-08, "epoch": 4.7576979706804945, "percentage": 95.15, "elapsed_time": "3:16:21", "remaining_time": "0:10:00", "throughput": 19852.92, "total_tokens": 233895168}
|
|
{"current_steps": 74325, "total_steps": 78105, "loss": 0.1108, "lr": 3.560843824857441e-08, "epoch": 4.7580180526214715, "percentage": 95.16, "elapsed_time": "3:16:22", "remaining_time": "0:09:59", "throughput": 19853.12, "total_tokens": 233910784}
|
|
{"current_steps": 74330, "total_steps": 78105, "loss": 0.163, "lr": 3.5514546682423044e-08, "epoch": 4.7583381345624485, "percentage": 95.17, "elapsed_time": "3:16:22", "remaining_time": "0:09:58", "throughput": 19853.32, "total_tokens": 233926656}
|
|
{"current_steps": 74335, "total_steps": 78105, "loss": 0.192, "lr": 3.5420778179837964e-08, "epoch": 4.758658216503425, "percentage": 95.17, "elapsed_time": "3:16:23", "remaining_time": "0:09:57", "throughput": 19853.5, "total_tokens": 233941952}
|
|
{"current_steps": 74340, "total_steps": 78105, "loss": 0.1245, "lr": 3.5327132745501535e-08, "epoch": 4.758978298444402, "percentage": 95.18, "elapsed_time": "3:16:24", "remaining_time": "0:09:56", "throughput": 19853.7, "total_tokens": 233957952}
|
|
{"current_steps": 74345, "total_steps": 78105, "loss": 0.1383, "lr": 3.523361038409057e-08, "epoch": 4.759298380385379, "percentage": 95.19, "elapsed_time": "3:16:24", "remaining_time": "0:09:56", "throughput": 19853.91, "total_tokens": 233974400}
|
|
{"current_steps": 74350, "total_steps": 78105, "loss": 0.077, "lr": 3.514021110027438e-08, "epoch": 4.7596184623263555, "percentage": 95.19, "elapsed_time": "3:16:25", "remaining_time": "0:09:55", "throughput": 19854.08, "total_tokens": 233989760}
|
|
{"current_steps": 74355, "total_steps": 78105, "loss": 0.1296, "lr": 3.504693489871702e-08, "epoch": 4.7599385442673325, "percentage": 95.2, "elapsed_time": "3:16:26", "remaining_time": "0:09:54", "throughput": 19854.28, "total_tokens": 234005760}
|
|
{"current_steps": 74360, "total_steps": 78105, "loss": 0.1496, "lr": 3.495378178407643e-08, "epoch": 4.7602586262083095, "percentage": 95.21, "elapsed_time": "3:16:26", "remaining_time": "0:09:53", "throughput": 19854.51, "total_tokens": 234022400}
|
|
{"current_steps": 74365, "total_steps": 78105, "loss": 0.1553, "lr": 3.486075176100362e-08, "epoch": 4.7605787081492865, "percentage": 95.21, "elapsed_time": "3:16:27", "remaining_time": "0:09:52", "throughput": 19854.71, "total_tokens": 234038720}
|
|
{"current_steps": 74370, "total_steps": 78105, "loss": 0.1218, "lr": 3.476784483414486e-08, "epoch": 4.760898790090263, "percentage": 95.22, "elapsed_time": "3:16:28", "remaining_time": "0:09:52", "throughput": 19854.92, "total_tokens": 234054912}
|
|
{"current_steps": 74375, "total_steps": 78105, "loss": 0.1305, "lr": 3.4675061008138664e-08, "epoch": 4.76121887203124, "percentage": 95.22, "elapsed_time": "3:16:28", "remaining_time": "0:09:51", "throughput": 19855.15, "total_tokens": 234071616}
|
|
{"current_steps": 74380, "total_steps": 78105, "loss": 0.1552, "lr": 3.458240028761883e-08, "epoch": 4.761538953972217, "percentage": 95.23, "elapsed_time": "3:16:29", "remaining_time": "0:09:50", "throughput": 19855.35, "total_tokens": 234087808}
|
|
{"current_steps": 74385, "total_steps": 78105, "loss": 0.1138, "lr": 3.4489862677211936e-08, "epoch": 4.761859035913194, "percentage": 95.24, "elapsed_time": "3:16:30", "remaining_time": "0:09:49", "throughput": 19855.52, "total_tokens": 234102912}
|
|
{"current_steps": 74390, "total_steps": 78105, "loss": 0.0945, "lr": 3.4397448181538726e-08, "epoch": 4.762179117854171, "percentage": 95.24, "elapsed_time": "3:16:30", "remaining_time": "0:09:48", "throughput": 19855.7, "total_tokens": 234118400}
|
|
{"current_steps": 74395, "total_steps": 78105, "loss": 0.1406, "lr": 3.43051568052144e-08, "epoch": 4.762499199795148, "percentage": 95.25, "elapsed_time": "3:16:31", "remaining_time": "0:09:48", "throughput": 19855.85, "total_tokens": 234132864}
|
|
{"current_steps": 74400, "total_steps": 78105, "loss": 0.094, "lr": 3.4212988552846936e-08, "epoch": 4.762819281736125, "percentage": 95.26, "elapsed_time": "3:16:32", "remaining_time": "0:09:47", "throughput": 19856.06, "total_tokens": 234149248}
|
|
{"current_steps": 74405, "total_steps": 78105, "loss": 0.118, "lr": 3.4120943429039324e-08, "epoch": 4.763139363677102, "percentage": 95.26, "elapsed_time": "3:16:32", "remaining_time": "0:09:46", "throughput": 19856.21, "total_tokens": 234164224}
|
|
{"current_steps": 74410, "total_steps": 78105, "loss": 0.1164, "lr": 3.402902143838705e-08, "epoch": 4.7634594456180785, "percentage": 95.27, "elapsed_time": "3:16:33", "remaining_time": "0:09:45", "throughput": 19856.41, "total_tokens": 234180288}
|
|
{"current_steps": 74415, "total_steps": 78105, "loss": 0.1564, "lr": 3.393722258548088e-08, "epoch": 4.7637795275590555, "percentage": 95.28, "elapsed_time": "3:16:34", "remaining_time": "0:09:44", "throughput": 19856.59, "total_tokens": 234195840}
|
|
{"current_steps": 74420, "total_steps": 78105, "loss": 0.1233, "lr": 3.384554687490466e-08, "epoch": 4.764099609500032, "percentage": 95.28, "elapsed_time": "3:16:35", "remaining_time": "0:09:44", "throughput": 19856.77, "total_tokens": 234211456}
|
|
{"current_steps": 74425, "total_steps": 78105, "loss": 0.1477, "lr": 3.3753994311236106e-08, "epoch": 4.764419691441009, "percentage": 95.29, "elapsed_time": "3:16:35", "remaining_time": "0:09:43", "throughput": 19856.93, "total_tokens": 234226624}
|
|
{"current_steps": 74430, "total_steps": 78105, "loss": 0.0965, "lr": 3.366256489904685e-08, "epoch": 4.764739773381986, "percentage": 95.29, "elapsed_time": "3:16:36", "remaining_time": "0:09:42", "throughput": 19857.1, "total_tokens": 234242432}
|
|
{"current_steps": 74435, "total_steps": 78105, "loss": 0.1509, "lr": 3.3571258642902394e-08, "epoch": 4.765059855322963, "percentage": 95.3, "elapsed_time": "3:16:37", "remaining_time": "0:09:41", "throughput": 19857.28, "total_tokens": 234257920}
|
|
{"current_steps": 74440, "total_steps": 78105, "loss": 0.1437, "lr": 3.3480075547362154e-08, "epoch": 4.76537993726394, "percentage": 95.31, "elapsed_time": "3:16:37", "remaining_time": "0:09:40", "throughput": 19857.51, "total_tokens": 234274944}
|
|
{"current_steps": 74445, "total_steps": 78105, "loss": 0.0895, "lr": 3.338901561697944e-08, "epoch": 4.765700019204917, "percentage": 95.31, "elapsed_time": "3:16:38", "remaining_time": "0:09:40", "throughput": 19857.67, "total_tokens": 234289792}
|
|
{"current_steps": 74450, "total_steps": 78105, "loss": 0.0701, "lr": 3.3298078856300874e-08, "epoch": 4.766020101145894, "percentage": 95.32, "elapsed_time": "3:16:39", "remaining_time": "0:09:39", "throughput": 19857.84, "total_tokens": 234304768}
|
|
{"current_steps": 74455, "total_steps": 78105, "loss": 0.1504, "lr": 3.3207265269867836e-08, "epoch": 4.766340183086871, "percentage": 95.33, "elapsed_time": "3:16:39", "remaining_time": "0:09:38", "throughput": 19857.99, "total_tokens": 234319616}
|
|
{"current_steps": 74460, "total_steps": 78105, "loss": 0.135, "lr": 3.311657486221476e-08, "epoch": 4.766660265027847, "percentage": 95.33, "elapsed_time": "3:16:40", "remaining_time": "0:09:37", "throughput": 19858.15, "total_tokens": 234334272}
|
|
{"current_steps": 74465, "total_steps": 78105, "loss": 0.1336, "lr": 3.30260076378705e-08, "epoch": 4.766980346968824, "percentage": 95.34, "elapsed_time": "3:16:41", "remaining_time": "0:09:36", "throughput": 19858.33, "total_tokens": 234349696}
|
|
{"current_steps": 74470, "total_steps": 78105, "loss": 0.1244, "lr": 3.293556360135758e-08, "epoch": 4.767300428909801, "percentage": 95.35, "elapsed_time": "3:16:41", "remaining_time": "0:09:36", "throughput": 19858.49, "total_tokens": 234364864}
|
|
{"current_steps": 74475, "total_steps": 78105, "loss": 0.1555, "lr": 3.2845242757192085e-08, "epoch": 4.767620510850778, "percentage": 95.35, "elapsed_time": "3:16:42", "remaining_time": "0:09:35", "throughput": 19858.67, "total_tokens": 234380288}
|
|
{"current_steps": 74480, "total_steps": 78105, "loss": 0.1255, "lr": 3.275504510988403e-08, "epoch": 4.767940592791755, "percentage": 95.36, "elapsed_time": "3:16:43", "remaining_time": "0:09:34", "throughput": 19858.82, "total_tokens": 234395008}
|
|
{"current_steps": 74485, "total_steps": 78105, "loss": 0.1423, "lr": 3.266497066393759e-08, "epoch": 4.768260674732732, "percentage": 95.37, "elapsed_time": "3:16:43", "remaining_time": "0:09:33", "throughput": 19858.98, "total_tokens": 234410176}
|
|
{"current_steps": 74490, "total_steps": 78105, "loss": 0.1873, "lr": 3.2575019423851106e-08, "epoch": 4.768580756673709, "percentage": 95.37, "elapsed_time": "3:16:44", "remaining_time": "0:09:32", "throughput": 19859.15, "total_tokens": 234425856}
|
|
{"current_steps": 74495, "total_steps": 78105, "loss": 0.1291, "lr": 3.2485191394115153e-08, "epoch": 4.768900838614686, "percentage": 95.38, "elapsed_time": "3:16:45", "remaining_time": "0:09:32", "throughput": 19859.32, "total_tokens": 234441472}
|
|
{"current_steps": 74500, "total_steps": 78105, "loss": 0.0958, "lr": 3.239548657921643e-08, "epoch": 4.769220920555663, "percentage": 95.38, "elapsed_time": "3:16:45", "remaining_time": "0:09:31", "throughput": 19859.53, "total_tokens": 234457408}
|
|
{"current_steps": 74505, "total_steps": 78105, "loss": 0.128, "lr": 3.230590498363356e-08, "epoch": 4.76954100249664, "percentage": 95.39, "elapsed_time": "3:16:46", "remaining_time": "0:09:30", "throughput": 19859.7, "total_tokens": 234472832}
|
|
{"current_steps": 74510, "total_steps": 78105, "loss": 0.1635, "lr": 3.22164466118402e-08, "epoch": 4.769861084437616, "percentage": 95.4, "elapsed_time": "3:16:47", "remaining_time": "0:09:29", "throughput": 19859.89, "total_tokens": 234488320}
|
|
{"current_steps": 74515, "total_steps": 78105, "loss": 0.1556, "lr": 3.212711146830333e-08, "epoch": 4.770181166378593, "percentage": 95.4, "elapsed_time": "3:16:47", "remaining_time": "0:09:28", "throughput": 19860.15, "total_tokens": 234506112}
|
|
{"current_steps": 74520, "total_steps": 78105, "loss": 0.1281, "lr": 3.2037899557483545e-08, "epoch": 4.77050124831957, "percentage": 95.41, "elapsed_time": "3:16:48", "remaining_time": "0:09:28", "throughput": 19860.41, "total_tokens": 234523648}
|
|
{"current_steps": 74525, "total_steps": 78105, "loss": 0.1576, "lr": 3.194881088383617e-08, "epoch": 4.770821330260547, "percentage": 95.42, "elapsed_time": "3:16:49", "remaining_time": "0:09:27", "throughput": 19860.56, "total_tokens": 234538560}
|
|
{"current_steps": 74530, "total_steps": 78105, "loss": 0.1453, "lr": 3.1859845451809315e-08, "epoch": 4.771141412201524, "percentage": 95.42, "elapsed_time": "3:16:49", "remaining_time": "0:09:26", "throughput": 19860.76, "total_tokens": 234554496}
|
|
{"current_steps": 74535, "total_steps": 78105, "loss": 0.1317, "lr": 3.1771003265845815e-08, "epoch": 4.771461494142501, "percentage": 95.43, "elapsed_time": "3:16:50", "remaining_time": "0:09:25", "throughput": 19860.93, "total_tokens": 234569600}
|
|
{"current_steps": 74540, "total_steps": 78105, "loss": 0.1402, "lr": 3.168228433038212e-08, "epoch": 4.771781576083478, "percentage": 95.44, "elapsed_time": "3:16:51", "remaining_time": "0:09:24", "throughput": 19861.12, "total_tokens": 234585152}
|
|
{"current_steps": 74545, "total_steps": 78105, "loss": 0.1452, "lr": 3.159368864984802e-08, "epoch": 4.772101658024455, "percentage": 95.44, "elapsed_time": "3:16:51", "remaining_time": "0:09:24", "throughput": 19861.27, "total_tokens": 234599744}
|
|
{"current_steps": 74550, "total_steps": 78105, "loss": 0.1187, "lr": 3.150521622866748e-08, "epoch": 4.772421739965431, "percentage": 95.45, "elapsed_time": "3:16:52", "remaining_time": "0:09:23", "throughput": 19861.45, "total_tokens": 234615040}
|
|
{"current_steps": 74555, "total_steps": 78105, "loss": 0.097, "lr": 3.141686707125863e-08, "epoch": 4.772741821906408, "percentage": 95.45, "elapsed_time": "3:16:53", "remaining_time": "0:09:22", "throughput": 19861.72, "total_tokens": 234632768}
|
|
{"current_steps": 74560, "total_steps": 78105, "loss": 0.1359, "lr": 3.1328641182033214e-08, "epoch": 4.773061903847385, "percentage": 95.46, "elapsed_time": "3:16:54", "remaining_time": "0:09:21", "throughput": 19861.92, "total_tokens": 234648832}
|
|
{"current_steps": 74565, "total_steps": 78105, "loss": 0.1411, "lr": 3.1240538565396325e-08, "epoch": 4.773381985788362, "percentage": 95.47, "elapsed_time": "3:16:54", "remaining_time": "0:09:20", "throughput": 19862.08, "total_tokens": 234663872}
|
|
{"current_steps": 74570, "total_steps": 78105, "loss": 0.1816, "lr": 3.115255922574778e-08, "epoch": 4.773702067729339, "percentage": 95.47, "elapsed_time": "3:16:55", "remaining_time": "0:09:20", "throughput": 19862.27, "total_tokens": 234679296}
|
|
{"current_steps": 74575, "total_steps": 78105, "loss": 0.1338, "lr": 3.106470316748072e-08, "epoch": 4.774022149670316, "percentage": 95.48, "elapsed_time": "3:16:56", "remaining_time": "0:09:19", "throughput": 19862.48, "total_tokens": 234695360}
|
|
{"current_steps": 74580, "total_steps": 78105, "loss": 0.1087, "lr": 3.097697039498221e-08, "epoch": 4.774342231611293, "percentage": 95.49, "elapsed_time": "3:16:56", "remaining_time": "0:09:18", "throughput": 19862.7, "total_tokens": 234711616}
|
|
{"current_steps": 74585, "total_steps": 78105, "loss": 0.1395, "lr": 3.088936091263289e-08, "epoch": 4.77466231355227, "percentage": 95.49, "elapsed_time": "3:16:57", "remaining_time": "0:09:17", "throughput": 19862.87, "total_tokens": 234726784}
|
|
{"current_steps": 74590, "total_steps": 78105, "loss": 0.0694, "lr": 3.080187472480789e-08, "epoch": 4.774982395493247, "percentage": 95.5, "elapsed_time": "3:16:58", "remaining_time": "0:09:16", "throughput": 19863.07, "total_tokens": 234742400}
|
|
{"current_steps": 74595, "total_steps": 78105, "loss": 0.0987, "lr": 3.071451183587565e-08, "epoch": 4.775302477434224, "percentage": 95.51, "elapsed_time": "3:16:58", "remaining_time": "0:09:16", "throughput": 19863.28, "total_tokens": 234758528}
|
|
{"current_steps": 74600, "total_steps": 78105, "loss": 0.1424, "lr": 3.062727225019879e-08, "epoch": 4.7756225593752, "percentage": 95.51, "elapsed_time": "3:16:59", "remaining_time": "0:09:15", "throughput": 19863.48, "total_tokens": 234774656}
|
|
{"current_steps": 74605, "total_steps": 78105, "loss": 0.1352, "lr": 3.0540155972133275e-08, "epoch": 4.775942641316177, "percentage": 95.52, "elapsed_time": "3:17:00", "remaining_time": "0:09:14", "throughput": 19863.67, "total_tokens": 234790528}
|
|
{"current_steps": 74610, "total_steps": 78105, "loss": 0.105, "lr": 3.045316300602979e-08, "epoch": 4.776262723257154, "percentage": 95.53, "elapsed_time": "3:17:00", "remaining_time": "0:09:13", "throughput": 19863.87, "total_tokens": 234806464}
|
|
{"current_steps": 74615, "total_steps": 78105, "loss": 0.1414, "lr": 3.036629335623154e-08, "epoch": 4.776582805198131, "percentage": 95.53, "elapsed_time": "3:17:01", "remaining_time": "0:09:12", "throughput": 19864.03, "total_tokens": 234821632}
|
|
{"current_steps": 74620, "total_steps": 78105, "loss": 0.0939, "lr": 3.027954702707697e-08, "epoch": 4.776902887139108, "percentage": 95.54, "elapsed_time": "3:17:02", "remaining_time": "0:09:12", "throughput": 19864.23, "total_tokens": 234837376}
|
|
{"current_steps": 74625, "total_steps": 78105, "loss": 0.1217, "lr": 3.0192924022897365e-08, "epoch": 4.777222969080085, "percentage": 95.54, "elapsed_time": "3:17:02", "remaining_time": "0:09:11", "throughput": 19864.43, "total_tokens": 234853376}
|
|
{"current_steps": 74630, "total_steps": 78105, "loss": 0.1146, "lr": 3.0106424348018417e-08, "epoch": 4.777543051021062, "percentage": 95.55, "elapsed_time": "3:17:03", "remaining_time": "0:09:10", "throughput": 19864.62, "total_tokens": 234868608}
|
|
{"current_steps": 74635, "total_steps": 78105, "loss": 0.1605, "lr": 3.002004800675945e-08, "epoch": 4.777863132962038, "percentage": 95.56, "elapsed_time": "3:17:04", "remaining_time": "0:09:09", "throughput": 19864.83, "total_tokens": 234885312}
|
|
{"current_steps": 74640, "total_steps": 78105, "loss": 0.1277, "lr": 2.993379500343341e-08, "epoch": 4.778183214903015, "percentage": 95.56, "elapsed_time": "3:17:04", "remaining_time": "0:09:08", "throughput": 19865.14, "total_tokens": 234904192}
|
|
{"current_steps": 74645, "total_steps": 78105, "loss": 0.1633, "lr": 2.984766534234795e-08, "epoch": 4.778503296843992, "percentage": 95.57, "elapsed_time": "3:17:05", "remaining_time": "0:09:08", "throughput": 19865.31, "total_tokens": 234919360}
|
|
{"current_steps": 74650, "total_steps": 78105, "loss": 0.1372, "lr": 2.9761659027803257e-08, "epoch": 4.778823378784969, "percentage": 95.58, "elapsed_time": "3:17:06", "remaining_time": "0:09:07", "throughput": 19865.48, "total_tokens": 234934464}
|
|
{"current_steps": 74655, "total_steps": 78105, "loss": 0.1541, "lr": 2.9675776064094495e-08, "epoch": 4.779143460725946, "percentage": 95.58, "elapsed_time": "3:17:06", "remaining_time": "0:09:06", "throughput": 19865.74, "total_tokens": 234951744}
|
|
{"current_steps": 74660, "total_steps": 78105, "loss": 0.0929, "lr": 2.9590016455509908e-08, "epoch": 4.779463542666923, "percentage": 95.59, "elapsed_time": "3:17:07", "remaining_time": "0:09:05", "throughput": 19865.92, "total_tokens": 234966848}
|
|
{"current_steps": 74665, "total_steps": 78105, "loss": 0.1831, "lr": 2.9504380206332173e-08, "epoch": 4.7797836246079, "percentage": 95.6, "elapsed_time": "3:17:08", "remaining_time": "0:09:04", "throughput": 19866.1, "total_tokens": 234982016}
|
|
{"current_steps": 74670, "total_steps": 78105, "loss": 0.1148, "lr": 2.941886732083704e-08, "epoch": 4.780103706548877, "percentage": 95.6, "elapsed_time": "3:17:08", "remaining_time": "0:09:04", "throughput": 19866.27, "total_tokens": 234997568}
|
|
{"current_steps": 74675, "total_steps": 78105, "loss": 0.1312, "lr": 2.933347780329526e-08, "epoch": 4.780423788489854, "percentage": 95.61, "elapsed_time": "3:17:09", "remaining_time": "0:09:03", "throughput": 19866.47, "total_tokens": 235013568}
|
|
{"current_steps": 74680, "total_steps": 78105, "loss": 0.092, "lr": 2.9248211657969816e-08, "epoch": 4.780743870430831, "percentage": 95.61, "elapsed_time": "3:17:10", "remaining_time": "0:09:02", "throughput": 19866.66, "total_tokens": 235029504}
|
|
{"current_steps": 74685, "total_steps": 78105, "loss": 0.0907, "lr": 2.9163068889119516e-08, "epoch": 4.781063952371807, "percentage": 95.62, "elapsed_time": "3:17:11", "remaining_time": "0:09:01", "throughput": 19866.86, "total_tokens": 235045312}
|
|
{"current_steps": 74690, "total_steps": 78105, "loss": 0.1454, "lr": 2.9078049500995133e-08, "epoch": 4.781384034312784, "percentage": 95.63, "elapsed_time": "3:17:11", "remaining_time": "0:09:00", "throughput": 19867.08, "total_tokens": 235061824}
|
|
{"current_steps": 74695, "total_steps": 78105, "loss": 0.1331, "lr": 2.8993153497842708e-08, "epoch": 4.781704116253761, "percentage": 95.63, "elapsed_time": "3:17:12", "remaining_time": "0:09:00", "throughput": 19867.27, "total_tokens": 235077568}
|
|
{"current_steps": 74700, "total_steps": 78105, "loss": 0.1477, "lr": 2.8908380883901077e-08, "epoch": 4.782024198194738, "percentage": 95.64, "elapsed_time": "3:17:13", "remaining_time": "0:08:59", "throughput": 19867.46, "total_tokens": 235092864}
|
|
{"current_steps": 74705, "total_steps": 78105, "loss": 0.0905, "lr": 2.8823731663403243e-08, "epoch": 4.782344280135715, "percentage": 95.65, "elapsed_time": "3:17:13", "remaining_time": "0:08:58", "throughput": 19867.65, "total_tokens": 235108608}
|
|
{"current_steps": 74710, "total_steps": 78105, "loss": 0.1283, "lr": 2.8739205840576656e-08, "epoch": 4.782664362076692, "percentage": 95.65, "elapsed_time": "3:17:14", "remaining_time": "0:08:57", "throughput": 19867.84, "total_tokens": 235124608}
|
|
{"current_steps": 74715, "total_steps": 78105, "loss": 0.1257, "lr": 2.8654803419641554e-08, "epoch": 4.782984444017669, "percentage": 95.66, "elapsed_time": "3:17:15", "remaining_time": "0:08:56", "throughput": 19868.03, "total_tokens": 235140160}
|
|
{"current_steps": 74720, "total_steps": 78105, "loss": 0.1582, "lr": 2.8570524404812893e-08, "epoch": 4.783304525958646, "percentage": 95.67, "elapsed_time": "3:17:15", "remaining_time": "0:08:56", "throughput": 19868.21, "total_tokens": 235155584}
|
|
{"current_steps": 74725, "total_steps": 78105, "loss": 0.1286, "lr": 2.848636880029898e-08, "epoch": 4.783624607899622, "percentage": 95.67, "elapsed_time": "3:17:16", "remaining_time": "0:08:55", "throughput": 19868.38, "total_tokens": 235170304}
|
|
{"current_steps": 74730, "total_steps": 78105, "loss": 0.1374, "lr": 2.8402336610302283e-08, "epoch": 4.783944689840599, "percentage": 95.68, "elapsed_time": "3:17:17", "remaining_time": "0:08:54", "throughput": 19868.57, "total_tokens": 235186176}
|
|
{"current_steps": 74735, "total_steps": 78105, "loss": 0.1392, "lr": 2.831842783901889e-08, "epoch": 4.784264771781576, "percentage": 95.69, "elapsed_time": "3:17:17", "remaining_time": "0:08:53", "throughput": 19868.74, "total_tokens": 235201408}
|
|
{"current_steps": 74740, "total_steps": 78105, "loss": 0.1631, "lr": 2.8234642490638507e-08, "epoch": 4.784584853722553, "percentage": 95.69, "elapsed_time": "3:17:18", "remaining_time": "0:08:52", "throughput": 19868.92, "total_tokens": 235216896}
|
|
{"current_steps": 74745, "total_steps": 78105, "loss": 0.1293, "lr": 2.815098056934501e-08, "epoch": 4.78490493566353, "percentage": 95.7, "elapsed_time": "3:17:19", "remaining_time": "0:08:52", "throughput": 19869.14, "total_tokens": 235233408}
|
|
{"current_steps": 74750, "total_steps": 78105, "loss": 0.1907, "lr": 2.8067442079316444e-08, "epoch": 4.785225017604507, "percentage": 95.7, "elapsed_time": "3:17:19", "remaining_time": "0:08:51", "throughput": 19869.38, "total_tokens": 235250432}
|
|
{"current_steps": 74755, "total_steps": 78105, "loss": 0.0923, "lr": 2.7984027024723915e-08, "epoch": 4.785545099545484, "percentage": 95.71, "elapsed_time": "3:17:20", "remaining_time": "0:08:50", "throughput": 19869.56, "total_tokens": 235265920}
|
|
{"current_steps": 74760, "total_steps": 78105, "loss": 0.1538, "lr": 2.7900735409732704e-08, "epoch": 4.785865181486461, "percentage": 95.72, "elapsed_time": "3:17:21", "remaining_time": "0:08:49", "throughput": 19869.78, "total_tokens": 235282496}
|
|
{"current_steps": 74765, "total_steps": 78105, "loss": 0.0995, "lr": 2.7817567238501984e-08, "epoch": 4.786185263427438, "percentage": 95.72, "elapsed_time": "3:17:21", "remaining_time": "0:08:49", "throughput": 19869.96, "total_tokens": 235297664}
|
|
{"current_steps": 74770, "total_steps": 78105, "loss": 0.1395, "lr": 2.7734522515185102e-08, "epoch": 4.786505345368415, "percentage": 95.73, "elapsed_time": "3:17:22", "remaining_time": "0:08:48", "throughput": 19870.16, "total_tokens": 235313536}
|
|
{"current_steps": 74775, "total_steps": 78105, "loss": 0.1431, "lr": 2.7651601243928462e-08, "epoch": 4.786825427309391, "percentage": 95.74, "elapsed_time": "3:17:23", "remaining_time": "0:08:47", "throughput": 19870.33, "total_tokens": 235328768}
|
|
{"current_steps": 74780, "total_steps": 78105, "loss": 0.1117, "lr": 2.7568803428872915e-08, "epoch": 4.787145509250368, "percentage": 95.74, "elapsed_time": "3:17:23", "remaining_time": "0:08:46", "throughput": 19870.52, "total_tokens": 235344384}
|
|
{"current_steps": 74785, "total_steps": 78105, "loss": 0.1219, "lr": 2.7486129074152934e-08, "epoch": 4.787465591191345, "percentage": 95.75, "elapsed_time": "3:17:24", "remaining_time": "0:08:45", "throughput": 19870.78, "total_tokens": 235361792}
|
|
{"current_steps": 74790, "total_steps": 78105, "loss": 0.1024, "lr": 2.7403578183896884e-08, "epoch": 4.787785673132322, "percentage": 95.76, "elapsed_time": "3:17:25", "remaining_time": "0:08:45", "throughput": 19871.01, "total_tokens": 235378752}
|
|
{"current_steps": 74795, "total_steps": 78105, "loss": 0.125, "lr": 2.7321150762226743e-08, "epoch": 4.788105755073299, "percentage": 95.76, "elapsed_time": "3:17:26", "remaining_time": "0:08:44", "throughput": 19871.21, "total_tokens": 235394944}
|
|
{"current_steps": 74800, "total_steps": 78105, "loss": 0.1228, "lr": 2.723884681325839e-08, "epoch": 4.788425837014276, "percentage": 95.77, "elapsed_time": "3:17:26", "remaining_time": "0:08:43", "throughput": 19871.4, "total_tokens": 235410560}
|
|
{"current_steps": 74805, "total_steps": 78105, "loss": 0.1286, "lr": 2.7156666341102145e-08, "epoch": 4.788745918955253, "percentage": 95.77, "elapsed_time": "3:17:27", "remaining_time": "0:08:42", "throughput": 19871.61, "total_tokens": 235426688}
|
|
{"current_steps": 74810, "total_steps": 78105, "loss": 0.0951, "lr": 2.707460934986139e-08, "epoch": 4.78906600089623, "percentage": 95.78, "elapsed_time": "3:17:28", "remaining_time": "0:08:41", "throughput": 19871.79, "total_tokens": 235442048}
|
|
{"current_steps": 74815, "total_steps": 78105, "loss": 0.1333, "lr": 2.6992675843633408e-08, "epoch": 4.789386082837206, "percentage": 95.79, "elapsed_time": "3:17:28", "remaining_time": "0:08:41", "throughput": 19872.0, "total_tokens": 235458240}
|
|
{"current_steps": 74820, "total_steps": 78105, "loss": 0.1369, "lr": 2.69108658265102e-08, "epoch": 4.789706164778183, "percentage": 95.79, "elapsed_time": "3:17:29", "remaining_time": "0:08:40", "throughput": 19872.19, "total_tokens": 235473856}
|
|
{"current_steps": 74825, "total_steps": 78105, "loss": 0.1185, "lr": 2.6829179302576003e-08, "epoch": 4.79002624671916, "percentage": 95.8, "elapsed_time": "3:17:30", "remaining_time": "0:08:39", "throughput": 19872.37, "total_tokens": 235489536}
|
|
{"current_steps": 74830, "total_steps": 78105, "loss": 0.1613, "lr": 2.674761627591088e-08, "epoch": 4.790346328660137, "percentage": 95.81, "elapsed_time": "3:17:30", "remaining_time": "0:08:38", "throughput": 19872.58, "total_tokens": 235505920}
|
|
{"current_steps": 74835, "total_steps": 78105, "loss": 0.1412, "lr": 2.666617675058658e-08, "epoch": 4.790666410601114, "percentage": 95.81, "elapsed_time": "3:17:31", "remaining_time": "0:08:37", "throughput": 19872.77, "total_tokens": 235521216}
|
|
{"current_steps": 74840, "total_steps": 78105, "loss": 0.1291, "lr": 2.6584860730670682e-08, "epoch": 4.790986492542091, "percentage": 95.82, "elapsed_time": "3:17:32", "remaining_time": "0:08:37", "throughput": 19872.97, "total_tokens": 235536704}
|
|
{"current_steps": 74845, "total_steps": 78105, "loss": 0.1207, "lr": 2.6503668220222988e-08, "epoch": 4.791306574483068, "percentage": 95.83, "elapsed_time": "3:17:32", "remaining_time": "0:08:36", "throughput": 19873.14, "total_tokens": 235551936}
|
|
{"current_steps": 74850, "total_steps": 78105, "loss": 0.151, "lr": 2.642259922329832e-08, "epoch": 4.791626656424045, "percentage": 95.83, "elapsed_time": "3:17:33", "remaining_time": "0:08:35", "throughput": 19873.33, "total_tokens": 235567232}
|
|
{"current_steps": 74855, "total_steps": 78105, "loss": 0.0967, "lr": 2.6341653743944538e-08, "epoch": 4.791946738365022, "percentage": 95.84, "elapsed_time": "3:17:34", "remaining_time": "0:08:34", "throughput": 19873.53, "total_tokens": 235583040}
|
|
{"current_steps": 74860, "total_steps": 78105, "loss": 0.1503, "lr": 2.6260831786203976e-08, "epoch": 4.792266820305999, "percentage": 95.85, "elapsed_time": "3:17:34", "remaining_time": "0:08:33", "throughput": 19873.68, "total_tokens": 235597696}
|
|
{"current_steps": 74865, "total_steps": 78105, "loss": 0.1339, "lr": 2.6180133354112013e-08, "epoch": 4.792586902246975, "percentage": 95.85, "elapsed_time": "3:17:35", "remaining_time": "0:08:33", "throughput": 19873.89, "total_tokens": 235613888}
|
|
{"current_steps": 74870, "total_steps": 78105, "loss": 0.1132, "lr": 2.609955845169876e-08, "epoch": 4.792906984187952, "percentage": 95.86, "elapsed_time": "3:17:36", "remaining_time": "0:08:32", "throughput": 19874.08, "total_tokens": 235629632}
|
|
{"current_steps": 74875, "total_steps": 78105, "loss": 0.1654, "lr": 2.6019107082987104e-08, "epoch": 4.793227066128929, "percentage": 95.86, "elapsed_time": "3:17:36", "remaining_time": "0:08:31", "throughput": 19874.26, "total_tokens": 235644672}
|
|
{"current_steps": 74880, "total_steps": 78105, "loss": 0.106, "lr": 2.593877925199495e-08, "epoch": 4.793547148069906, "percentage": 95.87, "elapsed_time": "3:17:37", "remaining_time": "0:08:30", "throughput": 19874.45, "total_tokens": 235660160}
|
|
{"current_steps": 74885, "total_steps": 78105, "loss": 0.1481, "lr": 2.585857496273325e-08, "epoch": 4.793867230010883, "percentage": 95.88, "elapsed_time": "3:17:38", "remaining_time": "0:08:29", "throughput": 19874.62, "total_tokens": 235675520}
|
|
{"current_steps": 74890, "total_steps": 78105, "loss": 0.2174, "lr": 2.5778494219206862e-08, "epoch": 4.79418731195186, "percentage": 95.88, "elapsed_time": "3:17:38", "remaining_time": "0:08:29", "throughput": 19874.83, "total_tokens": 235691328}
|
|
{"current_steps": 74895, "total_steps": 78105, "loss": 0.1311, "lr": 2.5698537025415083e-08, "epoch": 4.794507393892837, "percentage": 95.89, "elapsed_time": "3:17:39", "remaining_time": "0:08:28", "throughput": 19875.01, "total_tokens": 235706816}
|
|
{"current_steps": 74900, "total_steps": 78105, "loss": 0.1343, "lr": 2.5618703385349997e-08, "epoch": 4.794827475833813, "percentage": 95.9, "elapsed_time": "3:17:40", "remaining_time": "0:08:27", "throughput": 19875.18, "total_tokens": 235721920}
|
|
{"current_steps": 74905, "total_steps": 78105, "loss": 0.1618, "lr": 2.553899330299814e-08, "epoch": 4.79514755777479, "percentage": 95.9, "elapsed_time": "3:17:40", "remaining_time": "0:08:26", "throughput": 19875.34, "total_tokens": 235736768}
|
|
{"current_steps": 74910, "total_steps": 78105, "loss": 0.1474, "lr": 2.5459406782339936e-08, "epoch": 4.795467639715767, "percentage": 95.91, "elapsed_time": "3:17:41", "remaining_time": "0:08:25", "throughput": 19875.53, "total_tokens": 235752512}
|
|
{"current_steps": 74915, "total_steps": 78105, "loss": 0.1266, "lr": 2.5379943827349984e-08, "epoch": 4.795787721656744, "percentage": 95.92, "elapsed_time": "3:17:42", "remaining_time": "0:08:25", "throughput": 19875.73, "total_tokens": 235768256}
|
|
{"current_steps": 74920, "total_steps": 78105, "loss": 0.1488, "lr": 2.5300604441995115e-08, "epoch": 4.796107803597721, "percentage": 95.92, "elapsed_time": "3:17:42", "remaining_time": "0:08:24", "throughput": 19875.87, "total_tokens": 235782784}
|
|
{"current_steps": 74925, "total_steps": 78105, "loss": 0.1571, "lr": 2.5221388630238543e-08, "epoch": 4.796427885538698, "percentage": 95.93, "elapsed_time": "3:17:43", "remaining_time": "0:08:23", "throughput": 19876.13, "total_tokens": 235800256}
|
|
{"current_steps": 74930, "total_steps": 78105, "loss": 0.1359, "lr": 2.5142296396034605e-08, "epoch": 4.796747967479675, "percentage": 95.93, "elapsed_time": "3:17:44", "remaining_time": "0:08:22", "throughput": 19876.31, "total_tokens": 235815936}
|
|
{"current_steps": 74935, "total_steps": 78105, "loss": 0.1299, "lr": 2.5063327743333753e-08, "epoch": 4.797068049420652, "percentage": 95.94, "elapsed_time": "3:17:44", "remaining_time": "0:08:21", "throughput": 19876.53, "total_tokens": 235832384}
|
|
{"current_steps": 74940, "total_steps": 78105, "loss": 0.1401, "lr": 2.498448267607867e-08, "epoch": 4.797388131361629, "percentage": 95.95, "elapsed_time": "3:17:45", "remaining_time": "0:08:21", "throughput": 19876.72, "total_tokens": 235848128}
|
|
{"current_steps": 74945, "total_steps": 78105, "loss": 0.1738, "lr": 2.490576119820648e-08, "epoch": 4.797708213302606, "percentage": 95.95, "elapsed_time": "3:17:46", "remaining_time": "0:08:20", "throughput": 19876.88, "total_tokens": 235862976}
|
|
{"current_steps": 74950, "total_steps": 78105, "loss": 0.1179, "lr": 2.4827163313648482e-08, "epoch": 4.798028295243582, "percentage": 95.96, "elapsed_time": "3:17:46", "remaining_time": "0:08:19", "throughput": 19877.04, "total_tokens": 235878080}
|
|
{"current_steps": 74955, "total_steps": 78105, "loss": 0.1961, "lr": 2.4748689026329042e-08, "epoch": 4.798348377184559, "percentage": 95.97, "elapsed_time": "3:17:47", "remaining_time": "0:08:18", "throughput": 19877.23, "total_tokens": 235893952}
|
|
{"current_steps": 74960, "total_steps": 78105, "loss": 0.1436, "lr": 2.4670338340166966e-08, "epoch": 4.798668459125536, "percentage": 95.97, "elapsed_time": "3:17:48", "remaining_time": "0:08:17", "throughput": 19877.48, "total_tokens": 235911296}
|
|
{"current_steps": 74965, "total_steps": 78105, "loss": 0.116, "lr": 2.4592111259074958e-08, "epoch": 4.798988541066513, "percentage": 95.98, "elapsed_time": "3:17:48", "remaining_time": "0:08:17", "throughput": 19877.68, "total_tokens": 235927296}
|
|
{"current_steps": 74970, "total_steps": 78105, "loss": 0.1287, "lr": 2.4514007786958782e-08, "epoch": 4.79930862300749, "percentage": 95.99, "elapsed_time": "3:17:49", "remaining_time": "0:08:16", "throughput": 19877.86, "total_tokens": 235942656}
|
|
{"current_steps": 74975, "total_steps": 78105, "loss": 0.1126, "lr": 2.4436027927718652e-08, "epoch": 4.799628704948467, "percentage": 95.99, "elapsed_time": "3:17:50", "remaining_time": "0:08:15", "throughput": 19878.03, "total_tokens": 235957888}
|
|
{"current_steps": 74980, "total_steps": 78105, "loss": 0.1124, "lr": 2.435817168524868e-08, "epoch": 4.799948786889444, "percentage": 96.0, "elapsed_time": "3:17:50", "remaining_time": "0:08:14", "throughput": 19878.21, "total_tokens": 235973568}
|
|
{"current_steps": 74985, "total_steps": 78105, "loss": 0.1234, "lr": 2.4280439063436578e-08, "epoch": 4.800268868830421, "percentage": 96.01, "elapsed_time": "3:17:51", "remaining_time": "0:08:13", "throughput": 19878.36, "total_tokens": 235987904}
|
|
{"current_steps": 74990, "total_steps": 78105, "loss": 0.1344, "lr": 2.4202830066163418e-08, "epoch": 4.800588950771397, "percentage": 96.01, "elapsed_time": "3:17:52", "remaining_time": "0:08:13", "throughput": 19878.57, "total_tokens": 236004352}
|
|
{"current_steps": 74995, "total_steps": 78105, "loss": 0.133, "lr": 2.4125344697305266e-08, "epoch": 4.800909032712374, "percentage": 96.02, "elapsed_time": "3:17:53", "remaining_time": "0:08:12", "throughput": 19878.81, "total_tokens": 236021696}
|
|
{"current_steps": 75000, "total_steps": 78105, "loss": 0.1096, "lr": 2.4047982960730966e-08, "epoch": 4.801229114653351, "percentage": 96.02, "elapsed_time": "3:17:53", "remaining_time": "0:08:11", "throughput": 19879.01, "total_tokens": 236037504}
|
|
{"current_steps": 75005, "total_steps": 78105, "loss": 0.1214, "lr": 2.3970744860303817e-08, "epoch": 4.801549196594328, "percentage": 96.03, "elapsed_time": "3:17:54", "remaining_time": "0:08:10", "throughput": 19879.17, "total_tokens": 236052608}
|
|
{"current_steps": 75010, "total_steps": 78105, "loss": 0.1436, "lr": 2.3893630399880185e-08, "epoch": 4.801869278535305, "percentage": 96.04, "elapsed_time": "3:17:55", "remaining_time": "0:08:09", "throughput": 19879.37, "total_tokens": 236068416}
|
|
{"current_steps": 75015, "total_steps": 78105, "loss": 0.1614, "lr": 2.381663958331143e-08, "epoch": 4.802189360476282, "percentage": 96.04, "elapsed_time": "3:17:55", "remaining_time": "0:08:09", "throughput": 19879.59, "total_tokens": 236084672}
|
|
{"current_steps": 75020, "total_steps": 78105, "loss": 0.1321, "lr": 2.373977241444142e-08, "epoch": 4.802509442417259, "percentage": 96.05, "elapsed_time": "3:17:56", "remaining_time": "0:08:08", "throughput": 19879.75, "total_tokens": 236099520}
|
|
{"current_steps": 75025, "total_steps": 78105, "loss": 0.1787, "lr": 2.366302889710903e-08, "epoch": 4.802829524358236, "percentage": 96.06, "elapsed_time": "3:17:57", "remaining_time": "0:08:07", "throughput": 19879.95, "total_tokens": 236115648}
|
|
{"current_steps": 75030, "total_steps": 78105, "loss": 0.0954, "lr": 2.358640903514592e-08, "epoch": 4.803149606299213, "percentage": 96.06, "elapsed_time": "3:17:57", "remaining_time": "0:08:06", "throughput": 19880.14, "total_tokens": 236131200}
|
|
{"current_steps": 75035, "total_steps": 78105, "loss": 0.1687, "lr": 2.3509912832378746e-08, "epoch": 4.80346968824019, "percentage": 96.07, "elapsed_time": "3:17:58", "remaining_time": "0:08:05", "throughput": 19880.32, "total_tokens": 236146368}
|
|
{"current_steps": 75040, "total_steps": 78105, "loss": 0.1407, "lr": 2.3433540292626678e-08, "epoch": 4.803789770181166, "percentage": 96.08, "elapsed_time": "3:17:59", "remaining_time": "0:08:05", "throughput": 19880.51, "total_tokens": 236162048}
|
|
{"current_steps": 75045, "total_steps": 78105, "loss": 0.1161, "lr": 2.335729141970361e-08, "epoch": 4.804109852122143, "percentage": 96.08, "elapsed_time": "3:17:59", "remaining_time": "0:08:04", "throughput": 19880.71, "total_tokens": 236177728}
|
|
{"current_steps": 75050, "total_steps": 78105, "loss": 0.0982, "lr": 2.328116621741733e-08, "epoch": 4.80442993406312, "percentage": 96.09, "elapsed_time": "3:18:00", "remaining_time": "0:08:03", "throughput": 19880.89, "total_tokens": 236193024}
|
|
{"current_steps": 75055, "total_steps": 78105, "loss": 0.1279, "lr": 2.3205164689568405e-08, "epoch": 4.804750016004097, "percentage": 96.1, "elapsed_time": "3:18:01", "remaining_time": "0:08:02", "throughput": 19881.12, "total_tokens": 236209408}
|
|
{"current_steps": 75060, "total_steps": 78105, "loss": 0.1167, "lr": 2.312928683995297e-08, "epoch": 4.805070097945074, "percentage": 96.1, "elapsed_time": "3:18:01", "remaining_time": "0:08:02", "throughput": 19881.3, "total_tokens": 236225216}
|
|
{"current_steps": 75065, "total_steps": 78105, "loss": 0.1225, "lr": 2.3053532672358824e-08, "epoch": 4.805390179886051, "percentage": 96.11, "elapsed_time": "3:18:02", "remaining_time": "0:08:01", "throughput": 19881.46, "total_tokens": 236239808}
|
|
{"current_steps": 75070, "total_steps": 78105, "loss": 0.1458, "lr": 2.297790219056989e-08, "epoch": 4.805710261827028, "percentage": 96.11, "elapsed_time": "3:18:03", "remaining_time": "0:08:00", "throughput": 19881.63, "total_tokens": 236254720}
|
|
{"current_steps": 75075, "total_steps": 78105, "loss": 0.1154, "lr": 2.2902395398361755e-08, "epoch": 4.806030343768005, "percentage": 96.12, "elapsed_time": "3:18:03", "remaining_time": "0:07:59", "throughput": 19881.8, "total_tokens": 236269632}
|
|
{"current_steps": 75080, "total_steps": 78105, "loss": 0.0826, "lr": 2.282701229950557e-08, "epoch": 4.806350425708981, "percentage": 96.13, "elapsed_time": "3:18:04", "remaining_time": "0:07:58", "throughput": 19881.97, "total_tokens": 236284864}
|
|
{"current_steps": 75085, "total_steps": 78105, "loss": 0.124, "lr": 2.275175289776499e-08, "epoch": 4.806670507649958, "percentage": 96.13, "elapsed_time": "3:18:05", "remaining_time": "0:07:58", "throughput": 19882.15, "total_tokens": 236300288}
|
|
{"current_steps": 75090, "total_steps": 78105, "loss": 0.1499, "lr": 2.26766171968984e-08, "epoch": 4.806990589590935, "percentage": 96.14, "elapsed_time": "3:18:05", "remaining_time": "0:07:57", "throughput": 19882.33, "total_tokens": 236315840}
|
|
{"current_steps": 75095, "total_steps": 78105, "loss": 0.1085, "lr": 2.2601605200657793e-08, "epoch": 4.807310671531912, "percentage": 96.15, "elapsed_time": "3:18:06", "remaining_time": "0:07:56", "throughput": 19882.51, "total_tokens": 236331264}
|
|
{"current_steps": 75100, "total_steps": 78105, "loss": 0.146, "lr": 2.2526716912788793e-08, "epoch": 4.807630753472889, "percentage": 96.15, "elapsed_time": "3:18:07", "remaining_time": "0:07:55", "throughput": 19882.7, "total_tokens": 236347264}
|
|
{"current_steps": 75105, "total_steps": 78105, "loss": 0.1494, "lr": 2.2451952337030346e-08, "epoch": 4.807950835413866, "percentage": 96.16, "elapsed_time": "3:18:07", "remaining_time": "0:07:54", "throughput": 19882.87, "total_tokens": 236362496}
|
|
{"current_steps": 75110, "total_steps": 78105, "loss": 0.089, "lr": 2.237731147711669e-08, "epoch": 4.808270917354843, "percentage": 96.17, "elapsed_time": "3:18:08", "remaining_time": "0:07:54", "throughput": 19883.11, "total_tokens": 236379776}
|
|
{"current_steps": 75115, "total_steps": 78105, "loss": 0.1351, "lr": 2.2302794336774846e-08, "epoch": 4.80859099929582, "percentage": 96.17, "elapsed_time": "3:18:09", "remaining_time": "0:07:53", "throughput": 19883.28, "total_tokens": 236395072}
|
|
{"current_steps": 75120, "total_steps": 78105, "loss": 0.1269, "lr": 2.2228400919725445e-08, "epoch": 4.808911081236797, "percentage": 96.18, "elapsed_time": "3:18:09", "remaining_time": "0:07:52", "throughput": 19883.49, "total_tokens": 236411200}
|
|
{"current_steps": 75125, "total_steps": 78105, "loss": 0.0987, "lr": 2.2154131229683297e-08, "epoch": 4.809231163177774, "percentage": 96.18, "elapsed_time": "3:18:10", "remaining_time": "0:07:51", "throughput": 19883.64, "total_tokens": 236425856}
|
|
{"current_steps": 75130, "total_steps": 78105, "loss": 0.1418, "lr": 2.2079985270357374e-08, "epoch": 4.80955124511875, "percentage": 96.19, "elapsed_time": "3:18:11", "remaining_time": "0:07:50", "throughput": 19883.82, "total_tokens": 236441408}
|
|
{"current_steps": 75135, "total_steps": 78105, "loss": 0.1102, "lr": 2.2005963045449995e-08, "epoch": 4.809871327059727, "percentage": 96.2, "elapsed_time": "3:18:11", "remaining_time": "0:07:50", "throughput": 19884.01, "total_tokens": 236457024}
|
|
{"current_steps": 75140, "total_steps": 78105, "loss": 0.1223, "lr": 2.1932064558657094e-08, "epoch": 4.810191409000704, "percentage": 96.2, "elapsed_time": "3:18:12", "remaining_time": "0:07:49", "throughput": 19884.18, "total_tokens": 236472448}
|
|
{"current_steps": 75145, "total_steps": 78105, "loss": 0.1348, "lr": 2.185828981366961e-08, "epoch": 4.810511490941681, "percentage": 96.21, "elapsed_time": "3:18:13", "remaining_time": "0:07:48", "throughput": 19884.4, "total_tokens": 236489472}
|
|
{"current_steps": 75150, "total_steps": 78105, "loss": 0.1235, "lr": 2.178463881417042e-08, "epoch": 4.810831572882658, "percentage": 96.22, "elapsed_time": "3:18:13", "remaining_time": "0:07:47", "throughput": 19884.58, "total_tokens": 236504576}
|
|
{"current_steps": 75155, "total_steps": 78105, "loss": 0.155, "lr": 2.1711111563838262e-08, "epoch": 4.811151654823635, "percentage": 96.22, "elapsed_time": "3:18:14", "remaining_time": "0:07:46", "throughput": 19884.77, "total_tokens": 236520192}
|
|
{"current_steps": 75160, "total_steps": 78105, "loss": 0.1342, "lr": 2.1637708066344075e-08, "epoch": 4.811471736764612, "percentage": 96.23, "elapsed_time": "3:18:15", "remaining_time": "0:07:46", "throughput": 19885.0, "total_tokens": 236537152}
|
|
{"current_steps": 75165, "total_steps": 78105, "loss": 0.106, "lr": 2.156442832535327e-08, "epoch": 4.811791818705588, "percentage": 96.24, "elapsed_time": "3:18:15", "remaining_time": "0:07:45", "throughput": 19885.19, "total_tokens": 236552640}
|
|
{"current_steps": 75170, "total_steps": 78105, "loss": 0.147, "lr": 2.1491272344525415e-08, "epoch": 4.812111900646565, "percentage": 96.24, "elapsed_time": "3:18:16", "remaining_time": "0:07:44", "throughput": 19885.36, "total_tokens": 236568128}
|
|
{"current_steps": 75175, "total_steps": 78105, "loss": 0.1444, "lr": 2.1418240127513423e-08, "epoch": 4.812431982587542, "percentage": 96.25, "elapsed_time": "3:18:17", "remaining_time": "0:07:43", "throughput": 19885.54, "total_tokens": 236583168}
|
|
{"current_steps": 75180, "total_steps": 78105, "loss": 0.1053, "lr": 2.13453316779641e-08, "epoch": 4.812752064528519, "percentage": 96.26, "elapsed_time": "3:18:17", "remaining_time": "0:07:42", "throughput": 19885.73, "total_tokens": 236598720}
|
|
{"current_steps": 75185, "total_steps": 78105, "loss": 0.153, "lr": 2.1272546999517584e-08, "epoch": 4.813072146469496, "percentage": 96.26, "elapsed_time": "3:18:18", "remaining_time": "0:07:42", "throughput": 19885.94, "total_tokens": 236614976}
|
|
{"current_steps": 75190, "total_steps": 78105, "loss": 0.1202, "lr": 2.1199886095809307e-08, "epoch": 4.813392228410473, "percentage": 96.27, "elapsed_time": "3:18:19", "remaining_time": "0:07:41", "throughput": 19886.11, "total_tokens": 236630528}
|
|
{"current_steps": 75195, "total_steps": 78105, "loss": 0.0844, "lr": 2.1127348970466922e-08, "epoch": 4.81371231035145, "percentage": 96.27, "elapsed_time": "3:18:19", "remaining_time": "0:07:40", "throughput": 19886.35, "total_tokens": 236647552}
|
|
{"current_steps": 75200, "total_steps": 78105, "loss": 0.1569, "lr": 2.1054935627112805e-08, "epoch": 4.814032392292427, "percentage": 96.28, "elapsed_time": "3:18:20", "remaining_time": "0:07:39", "throughput": 19886.54, "total_tokens": 236663232}
|
|
{"current_steps": 75205, "total_steps": 78105, "loss": 0.1148, "lr": 2.0982646069362955e-08, "epoch": 4.814352474233404, "percentage": 96.29, "elapsed_time": "3:18:21", "remaining_time": "0:07:38", "throughput": 19886.74, "total_tokens": 236679168}
|
|
{"current_steps": 75210, "total_steps": 78105, "loss": 0.163, "lr": 2.0910480300826985e-08, "epoch": 4.814672556174381, "percentage": 96.29, "elapsed_time": "3:18:22", "remaining_time": "0:07:38", "throughput": 19886.98, "total_tokens": 236696128}
|
|
{"current_steps": 75215, "total_steps": 78105, "loss": 0.1698, "lr": 2.0838438325108403e-08, "epoch": 4.814992638115357, "percentage": 96.3, "elapsed_time": "3:18:22", "remaining_time": "0:07:37", "throughput": 19887.19, "total_tokens": 236712448}
|
|
{"current_steps": 75220, "total_steps": 78105, "loss": 0.1178, "lr": 2.076652014580488e-08, "epoch": 4.815312720056334, "percentage": 96.31, "elapsed_time": "3:18:23", "remaining_time": "0:07:36", "throughput": 19887.4, "total_tokens": 236728768}
|
|
{"current_steps": 75225, "total_steps": 78105, "loss": 0.1358, "lr": 2.069472576650744e-08, "epoch": 4.815632801997311, "percentage": 96.31, "elapsed_time": "3:18:24", "remaining_time": "0:07:35", "throughput": 19887.63, "total_tokens": 236745728}
|
|
{"current_steps": 75230, "total_steps": 78105, "loss": 0.146, "lr": 2.062305519080099e-08, "epoch": 4.815952883938288, "percentage": 96.32, "elapsed_time": "3:18:24", "remaining_time": "0:07:34", "throughput": 19887.86, "total_tokens": 236762432}
|
|
{"current_steps": 75235, "total_steps": 78105, "loss": 0.1146, "lr": 2.0551508422264886e-08, "epoch": 4.816272965879265, "percentage": 96.33, "elapsed_time": "3:18:25", "remaining_time": "0:07:34", "throughput": 19888.09, "total_tokens": 236779520}
|
|
{"current_steps": 75240, "total_steps": 78105, "loss": 0.1183, "lr": 2.0480085464471277e-08, "epoch": 4.816593047820242, "percentage": 96.33, "elapsed_time": "3:18:26", "remaining_time": "0:07:33", "throughput": 19888.29, "total_tokens": 236795200}
|
|
{"current_steps": 75245, "total_steps": 78105, "loss": 0.1313, "lr": 2.040878632098675e-08, "epoch": 4.816913129761219, "percentage": 96.34, "elapsed_time": "3:18:26", "remaining_time": "0:07:32", "throughput": 19888.48, "total_tokens": 236811072}
|
|
{"current_steps": 75250, "total_steps": 78105, "loss": 0.1623, "lr": 2.0337610995371794e-08, "epoch": 4.817233211702196, "percentage": 96.34, "elapsed_time": "3:18:27", "remaining_time": "0:07:31", "throughput": 19888.66, "total_tokens": 236826752}
|
|
{"current_steps": 75255, "total_steps": 78105, "loss": 0.1166, "lr": 2.026655949118078e-08, "epoch": 4.817553293643172, "percentage": 96.35, "elapsed_time": "3:18:28", "remaining_time": "0:07:30", "throughput": 19888.86, "total_tokens": 236842752}
|
|
{"current_steps": 75260, "total_steps": 78105, "loss": 0.1135, "lr": 2.0195631811960882e-08, "epoch": 4.817873375584149, "percentage": 96.36, "elapsed_time": "3:18:28", "remaining_time": "0:07:30", "throughput": 19889.03, "total_tokens": 236857920}
|
|
{"current_steps": 75265, "total_steps": 78105, "loss": 0.1134, "lr": 2.0124827961254534e-08, "epoch": 4.818193457525126, "percentage": 96.36, "elapsed_time": "3:18:29", "remaining_time": "0:07:29", "throughput": 19889.19, "total_tokens": 236872960}
|
|
{"current_steps": 75270, "total_steps": 78105, "loss": 0.1411, "lr": 2.0054147942596967e-08, "epoch": 4.818513539466103, "percentage": 96.37, "elapsed_time": "3:18:30", "remaining_time": "0:07:28", "throughput": 19889.38, "total_tokens": 236888704}
|
|
{"current_steps": 75275, "total_steps": 78105, "loss": 0.1475, "lr": 1.998359175951786e-08, "epoch": 4.81883362140708, "percentage": 96.38, "elapsed_time": "3:18:30", "remaining_time": "0:07:27", "throughput": 19889.57, "total_tokens": 236904128}
|
|
{"current_steps": 75280, "total_steps": 78105, "loss": 0.1007, "lr": 1.991315941553995e-08, "epoch": 4.819153703348057, "percentage": 96.38, "elapsed_time": "3:18:31", "remaining_time": "0:07:27", "throughput": 19889.78, "total_tokens": 236920256}
|
|
{"current_steps": 75285, "total_steps": 78105, "loss": 0.0977, "lr": 1.98428509141807e-08, "epoch": 4.819473785289034, "percentage": 96.39, "elapsed_time": "3:18:32", "remaining_time": "0:07:26", "throughput": 19889.94, "total_tokens": 236935040}
|
|
{"current_steps": 75290, "total_steps": 78105, "loss": 0.1245, "lr": 1.9772666258950913e-08, "epoch": 4.819793867230011, "percentage": 96.4, "elapsed_time": "3:18:32", "remaining_time": "0:07:25", "throughput": 19890.11, "total_tokens": 236950400}
|
|
{"current_steps": 75295, "total_steps": 78105, "loss": 0.1496, "lr": 1.9702605453355007e-08, "epoch": 4.820113949170988, "percentage": 96.4, "elapsed_time": "3:18:33", "remaining_time": "0:07:24", "throughput": 19890.3, "total_tokens": 236966144}
|
|
{"current_steps": 75300, "total_steps": 78105, "loss": 0.1386, "lr": 1.963266850089185e-08, "epoch": 4.820434031111965, "percentage": 96.41, "elapsed_time": "3:18:34", "remaining_time": "0:07:23", "throughput": 19890.49, "total_tokens": 236981824}
|
|
{"current_steps": 75305, "total_steps": 78105, "loss": 0.1928, "lr": 1.9562855405053092e-08, "epoch": 4.820754113052941, "percentage": 96.42, "elapsed_time": "3:18:35", "remaining_time": "0:07:23", "throughput": 19890.74, "total_tokens": 236999232}
|
|
{"current_steps": 75310, "total_steps": 78105, "loss": 0.1102, "lr": 1.9493166169325395e-08, "epoch": 4.821074194993918, "percentage": 96.42, "elapsed_time": "3:18:35", "remaining_time": "0:07:22", "throughput": 19890.93, "total_tokens": 237014784}
|
|
{"current_steps": 75315, "total_steps": 78105, "loss": 0.0877, "lr": 1.942360079718847e-08, "epoch": 4.821394276934895, "percentage": 96.43, "elapsed_time": "3:18:36", "remaining_time": "0:07:21", "throughput": 19891.13, "total_tokens": 237031104}
|
|
{"current_steps": 75320, "total_steps": 78105, "loss": 0.1016, "lr": 1.9354159292116203e-08, "epoch": 4.821714358875872, "percentage": 96.43, "elapsed_time": "3:18:37", "remaining_time": "0:07:20", "throughput": 19891.3, "total_tokens": 237046080}
|
|
{"current_steps": 75325, "total_steps": 78105, "loss": 0.1461, "lr": 1.9284841657575827e-08, "epoch": 4.822034440816849, "percentage": 96.44, "elapsed_time": "3:18:37", "remaining_time": "0:07:19", "throughput": 19891.46, "total_tokens": 237061248}
|
|
{"current_steps": 75330, "total_steps": 78105, "loss": 0.1599, "lr": 1.9215647897029012e-08, "epoch": 4.822354522757826, "percentage": 96.45, "elapsed_time": "3:18:38", "remaining_time": "0:07:19", "throughput": 19891.66, "total_tokens": 237076928}
|
|
{"current_steps": 75335, "total_steps": 78105, "loss": 0.1229, "lr": 1.9146578013930496e-08, "epoch": 4.822674604698803, "percentage": 96.45, "elapsed_time": "3:18:39", "remaining_time": "0:07:18", "throughput": 19891.89, "total_tokens": 237093632}
|
|
{"current_steps": 75340, "total_steps": 78105, "loss": 0.1142, "lr": 1.907763201172974e-08, "epoch": 4.82299468663978, "percentage": 96.46, "elapsed_time": "3:18:39", "remaining_time": "0:07:17", "throughput": 19892.12, "total_tokens": 237110464}
|
|
{"current_steps": 75345, "total_steps": 78105, "loss": 0.1102, "lr": 1.9008809893869552e-08, "epoch": 4.823314768580756, "percentage": 96.47, "elapsed_time": "3:18:40", "remaining_time": "0:07:16", "throughput": 19892.28, "total_tokens": 237125312}
|
|
{"current_steps": 75350, "total_steps": 78105, "loss": 0.1336, "lr": 1.894011166378634e-08, "epoch": 4.823634850521733, "percentage": 96.47, "elapsed_time": "3:18:41", "remaining_time": "0:07:15", "throughput": 19892.51, "total_tokens": 237142464}
|
|
{"current_steps": 75355, "total_steps": 78105, "loss": 0.1491, "lr": 1.8871537324910427e-08, "epoch": 4.82395493246271, "percentage": 96.48, "elapsed_time": "3:18:41", "remaining_time": "0:07:15", "throughput": 19892.67, "total_tokens": 237157504}
|
|
{"current_steps": 75360, "total_steps": 78105, "loss": 0.1177, "lr": 1.8803086880666565e-08, "epoch": 4.824275014403687, "percentage": 96.49, "elapsed_time": "3:18:42", "remaining_time": "0:07:14", "throughput": 19892.86, "total_tokens": 237173120}
|
|
{"current_steps": 75365, "total_steps": 78105, "loss": 0.1385, "lr": 1.8734760334472024e-08, "epoch": 4.824595096344664, "percentage": 96.49, "elapsed_time": "3:18:43", "remaining_time": "0:07:13", "throughput": 19893.01, "total_tokens": 237187968}
|
|
{"current_steps": 75370, "total_steps": 78105, "loss": 0.1245, "lr": 1.866655768973935e-08, "epoch": 4.824915178285641, "percentage": 96.5, "elapsed_time": "3:18:43", "remaining_time": "0:07:12", "throughput": 19893.33, "total_tokens": 237207360}
|
|
{"current_steps": 75375, "total_steps": 78105, "loss": 0.0867, "lr": 1.8598478949874155e-08, "epoch": 4.825235260226618, "percentage": 96.5, "elapsed_time": "3:18:44", "remaining_time": "0:07:11", "throughput": 19893.54, "total_tokens": 237223744}
|
|
{"current_steps": 75380, "total_steps": 78105, "loss": 0.1123, "lr": 1.8530524118275382e-08, "epoch": 4.825555342167595, "percentage": 96.51, "elapsed_time": "3:18:45", "remaining_time": "0:07:11", "throughput": 19893.76, "total_tokens": 237240448}
|
|
{"current_steps": 75385, "total_steps": 78105, "loss": 0.1312, "lr": 1.846269319833699e-08, "epoch": 4.825875424108572, "percentage": 96.52, "elapsed_time": "3:18:46", "remaining_time": "0:07:10", "throughput": 19893.96, "total_tokens": 237256192}
|
|
{"current_steps": 75390, "total_steps": 78105, "loss": 0.1082, "lr": 1.8394986193445708e-08, "epoch": 4.826195506049549, "percentage": 96.52, "elapsed_time": "3:18:46", "remaining_time": "0:07:09", "throughput": 19894.11, "total_tokens": 237270912}
|
|
{"current_steps": 75395, "total_steps": 78105, "loss": 0.1427, "lr": 1.832740310698272e-08, "epoch": 4.826515587990525, "percentage": 96.53, "elapsed_time": "3:18:47", "remaining_time": "0:07:08", "throughput": 19894.36, "total_tokens": 237288128}
|
|
{"current_steps": 75400, "total_steps": 78105, "loss": 0.109, "lr": 1.8259943942322557e-08, "epoch": 4.826835669931502, "percentage": 96.54, "elapsed_time": "3:18:48", "remaining_time": "0:07:07", "throughput": 19894.54, "total_tokens": 237304064}
|
|
{"current_steps": 75405, "total_steps": 78105, "loss": 0.1363, "lr": 1.8192608702833903e-08, "epoch": 4.827155751872479, "percentage": 96.54, "elapsed_time": "3:18:48", "remaining_time": "0:07:07", "throughput": 19894.76, "total_tokens": 237320640}
|
|
{"current_steps": 75410, "total_steps": 78105, "loss": 0.1808, "lr": 1.8125397391879352e-08, "epoch": 4.827475833813456, "percentage": 96.55, "elapsed_time": "3:18:49", "remaining_time": "0:07:06", "throughput": 19895.02, "total_tokens": 237338304}
|
|
{"current_steps": 75415, "total_steps": 78105, "loss": 0.1513, "lr": 1.8058310012814273e-08, "epoch": 4.827795915754433, "percentage": 96.56, "elapsed_time": "3:18:50", "remaining_time": "0:07:05", "throughput": 19895.2, "total_tokens": 237353408}
|
|
{"current_steps": 75420, "total_steps": 78105, "loss": 0.1071, "lr": 1.7991346568989598e-08, "epoch": 4.82811599769541, "percentage": 96.56, "elapsed_time": "3:18:50", "remaining_time": "0:07:04", "throughput": 19895.37, "total_tokens": 237368960}
|
|
{"current_steps": 75425, "total_steps": 78105, "loss": 0.1679, "lr": 1.7924507063748763e-08, "epoch": 4.828436079636387, "percentage": 96.57, "elapsed_time": "3:18:51", "remaining_time": "0:07:03", "throughput": 19895.57, "total_tokens": 237384832}
|
|
{"current_steps": 75430, "total_steps": 78105, "loss": 0.1145, "lr": 1.7857791500429378e-08, "epoch": 4.828756161577363, "percentage": 96.58, "elapsed_time": "3:18:52", "remaining_time": "0:07:03", "throughput": 19895.76, "total_tokens": 237400512}
|
|
{"current_steps": 75435, "total_steps": 78105, "loss": 0.1578, "lr": 1.779119988236294e-08, "epoch": 4.82907624351834, "percentage": 96.58, "elapsed_time": "3:18:52", "remaining_time": "0:07:02", "throughput": 19895.95, "total_tokens": 237416256}
|
|
{"current_steps": 75440, "total_steps": 78105, "loss": 0.1284, "lr": 1.7724732212874572e-08, "epoch": 4.829396325459317, "percentage": 96.59, "elapsed_time": "3:18:53", "remaining_time": "0:07:01", "throughput": 19896.12, "total_tokens": 237431296}
|
|
{"current_steps": 75445, "total_steps": 78105, "loss": 0.0989, "lr": 1.7658388495283563e-08, "epoch": 4.829716407400294, "percentage": 96.59, "elapsed_time": "3:18:54", "remaining_time": "0:07:00", "throughput": 19896.31, "total_tokens": 237447040}
|
|
{"current_steps": 75450, "total_steps": 78105, "loss": 0.1114, "lr": 1.759216873290226e-08, "epoch": 4.830036489341271, "percentage": 96.6, "elapsed_time": "3:18:54", "remaining_time": "0:06:59", "throughput": 19896.51, "total_tokens": 237463168}
|
|
{"current_steps": 75455, "total_steps": 78105, "loss": 0.1209, "lr": 1.752607292903774e-08, "epoch": 4.830356571282248, "percentage": 96.61, "elapsed_time": "3:18:55", "remaining_time": "0:06:59", "throughput": 19896.72, "total_tokens": 237479232}
|
|
{"current_steps": 75460, "total_steps": 78105, "loss": 0.1228, "lr": 1.746010108699042e-08, "epoch": 4.830676653223225, "percentage": 96.61, "elapsed_time": "3:18:56", "remaining_time": "0:06:58", "throughput": 19896.9, "total_tokens": 237494976}
|
|
{"current_steps": 75465, "total_steps": 78105, "loss": 0.137, "lr": 1.7394253210054613e-08, "epoch": 4.830996735164202, "percentage": 96.62, "elapsed_time": "3:18:56", "remaining_time": "0:06:57", "throughput": 19897.1, "total_tokens": 237511232}
|
|
{"current_steps": 75470, "total_steps": 78105, "loss": 0.1509, "lr": 1.7328529301518515e-08, "epoch": 4.831316817105179, "percentage": 96.63, "elapsed_time": "3:18:57", "remaining_time": "0:06:56", "throughput": 19897.3, "total_tokens": 237526720}
|
|
{"current_steps": 75475, "total_steps": 78105, "loss": 0.1434, "lr": 1.7262929364663673e-08, "epoch": 4.831636899046156, "percentage": 96.63, "elapsed_time": "3:18:58", "remaining_time": "0:06:56", "throughput": 19897.49, "total_tokens": 237542528}
|
|
{"current_steps": 75480, "total_steps": 78105, "loss": 0.1276, "lr": 1.7197453402766073e-08, "epoch": 4.831956980987132, "percentage": 96.64, "elapsed_time": "3:18:58", "remaining_time": "0:06:55", "throughput": 19897.67, "total_tokens": 237558080}
|
|
{"current_steps": 75485, "total_steps": 78105, "loss": 0.0911, "lr": 1.7132101419095326e-08, "epoch": 4.832277062928109, "percentage": 96.65, "elapsed_time": "3:18:59", "remaining_time": "0:06:54", "throughput": 19897.89, "total_tokens": 237574592}
|
|
{"current_steps": 75490, "total_steps": 78105, "loss": 0.1761, "lr": 1.7066873416914377e-08, "epoch": 4.832597144869086, "percentage": 96.65, "elapsed_time": "3:19:00", "remaining_time": "0:06:53", "throughput": 19898.14, "total_tokens": 237592192}
|
|
{"current_steps": 75495, "total_steps": 78105, "loss": 0.1351, "lr": 1.700176939948117e-08, "epoch": 4.832917226810063, "percentage": 96.66, "elapsed_time": "3:19:01", "remaining_time": "0:06:52", "throughput": 19898.34, "total_tokens": 237608064}
|
|
{"current_steps": 75500, "total_steps": 78105, "loss": 0.1362, "lr": 1.6936789370045613e-08, "epoch": 4.83323730875104, "percentage": 96.66, "elapsed_time": "3:19:01", "remaining_time": "0:06:52", "throughput": 19898.54, "total_tokens": 237623872}
|
|
{"current_steps": 75505, "total_steps": 78105, "loss": 0.1269, "lr": 1.6871933331853164e-08, "epoch": 4.833557390692017, "percentage": 96.67, "elapsed_time": "3:19:02", "remaining_time": "0:06:51", "throughput": 19898.74, "total_tokens": 237639744}
|
|
{"current_steps": 75510, "total_steps": 78105, "loss": 0.1469, "lr": 1.680720128814234e-08, "epoch": 4.833877472632994, "percentage": 96.68, "elapsed_time": "3:19:03", "remaining_time": "0:06:50", "throughput": 19898.92, "total_tokens": 237655488}
|
|
{"current_steps": 75515, "total_steps": 78105, "loss": 0.1226, "lr": 1.6742593242145e-08, "epoch": 4.834197554573971, "percentage": 96.68, "elapsed_time": "3:19:03", "remaining_time": "0:06:49", "throughput": 19899.11, "total_tokens": 237671488}
|
|
{"current_steps": 75520, "total_steps": 78105, "loss": 0.1412, "lr": 1.667810919708829e-08, "epoch": 4.834517636514947, "percentage": 96.69, "elapsed_time": "3:19:04", "remaining_time": "0:06:48", "throughput": 19899.28, "total_tokens": 237687040}
|
|
{"current_steps": 75525, "total_steps": 78105, "loss": 0.1122, "lr": 1.6613749156191296e-08, "epoch": 4.834837718455924, "percentage": 96.7, "elapsed_time": "3:19:05", "remaining_time": "0:06:48", "throughput": 19899.48, "total_tokens": 237702912}
|
|
{"current_steps": 75530, "total_steps": 78105, "loss": 0.134, "lr": 1.6549513122668393e-08, "epoch": 4.835157800396901, "percentage": 96.7, "elapsed_time": "3:19:05", "remaining_time": "0:06:47", "throughput": 19899.66, "total_tokens": 237717888}
|
|
{"current_steps": 75535, "total_steps": 78105, "loss": 0.0941, "lr": 1.6485401099726738e-08, "epoch": 4.835477882337878, "percentage": 96.71, "elapsed_time": "3:19:06", "remaining_time": "0:06:46", "throughput": 19899.85, "total_tokens": 237733696}
|
|
{"current_steps": 75540, "total_steps": 78105, "loss": 0.1398, "lr": 1.6421413090568218e-08, "epoch": 4.835797964278855, "percentage": 96.72, "elapsed_time": "3:19:07", "remaining_time": "0:06:45", "throughput": 19900.01, "total_tokens": 237748608}
|
|
{"current_steps": 75545, "total_steps": 78105, "loss": 0.1138, "lr": 1.635754909838777e-08, "epoch": 4.836118046219832, "percentage": 96.72, "elapsed_time": "3:19:07", "remaining_time": "0:06:44", "throughput": 19900.22, "total_tokens": 237764736}
|
|
{"current_steps": 75550, "total_steps": 78105, "loss": 0.1803, "lr": 1.6293809126374515e-08, "epoch": 4.836438128160809, "percentage": 96.73, "elapsed_time": "3:19:08", "remaining_time": "0:06:44", "throughput": 19900.39, "total_tokens": 237780160}
|
|
{"current_steps": 75555, "total_steps": 78105, "loss": 0.1223, "lr": 1.6230193177711184e-08, "epoch": 4.836758210101786, "percentage": 96.74, "elapsed_time": "3:19:09", "remaining_time": "0:06:43", "throughput": 19900.56, "total_tokens": 237795200}
|
|
{"current_steps": 75560, "total_steps": 78105, "loss": 0.1103, "lr": 1.6166701255574678e-08, "epoch": 4.837078292042763, "percentage": 96.74, "elapsed_time": "3:19:09", "remaining_time": "0:06:42", "throughput": 19900.72, "total_tokens": 237809920}
|
|
{"current_steps": 75565, "total_steps": 78105, "loss": 0.1294, "lr": 1.6103333363135243e-08, "epoch": 4.83739837398374, "percentage": 96.75, "elapsed_time": "3:19:10", "remaining_time": "0:06:41", "throughput": 19900.93, "total_tokens": 237826368}
|
|
{"current_steps": 75570, "total_steps": 78105, "loss": 0.1121, "lr": 1.6040089503557287e-08, "epoch": 4.837718455924716, "percentage": 96.75, "elapsed_time": "3:19:11", "remaining_time": "0:06:40", "throughput": 19901.11, "total_tokens": 237841920}
|
|
{"current_steps": 75575, "total_steps": 78105, "loss": 0.1326, "lr": 1.5976969679998568e-08, "epoch": 4.838038537865693, "percentage": 96.76, "elapsed_time": "3:19:11", "remaining_time": "0:06:40", "throughput": 19901.28, "total_tokens": 237857408}
|
|
{"current_steps": 75580, "total_steps": 78105, "loss": 0.1293, "lr": 1.591397389561128e-08, "epoch": 4.83835861980667, "percentage": 96.77, "elapsed_time": "3:19:12", "remaining_time": "0:06:39", "throughput": 19901.46, "total_tokens": 237872704}
|
|
{"current_steps": 75585, "total_steps": 78105, "loss": 0.1315, "lr": 1.5851102153540964e-08, "epoch": 4.838678701747647, "percentage": 96.77, "elapsed_time": "3:19:13", "remaining_time": "0:06:38", "throughput": 19901.65, "total_tokens": 237888448}
|
|
{"current_steps": 75590, "total_steps": 78105, "loss": 0.1358, "lr": 1.578835445692706e-08, "epoch": 4.838998783688624, "percentage": 96.78, "elapsed_time": "3:19:13", "remaining_time": "0:06:37", "throughput": 19901.82, "total_tokens": 237903680}
|
|
{"current_steps": 75595, "total_steps": 78105, "loss": 0.1391, "lr": 1.572573080890316e-08, "epoch": 4.839318865629601, "percentage": 96.79, "elapsed_time": "3:19:14", "remaining_time": "0:06:36", "throughput": 19902.01, "total_tokens": 237919360}
|
|
{"current_steps": 75600, "total_steps": 78105, "loss": 0.1116, "lr": 1.5663231212595943e-08, "epoch": 4.839638947570578, "percentage": 96.79, "elapsed_time": "3:19:15", "remaining_time": "0:06:36", "throughput": 19902.21, "total_tokens": 237935360}
|
|
{"current_steps": 75605, "total_steps": 78105, "loss": 0.1546, "lr": 1.560085567112679e-08, "epoch": 4.839959029511555, "percentage": 96.8, "elapsed_time": "3:19:15", "remaining_time": "0:06:35", "throughput": 19902.37, "total_tokens": 237950336}
|
|
{"current_steps": 75610, "total_steps": 78105, "loss": 0.1033, "lr": 1.5538604187609884e-08, "epoch": 4.840279111452531, "percentage": 96.81, "elapsed_time": "3:19:16", "remaining_time": "0:06:34", "throughput": 19902.59, "total_tokens": 237966848}
|
|
{"current_steps": 75615, "total_steps": 78105, "loss": 0.1219, "lr": 1.5476476765154126e-08, "epoch": 4.840599193393508, "percentage": 96.81, "elapsed_time": "3:19:17", "remaining_time": "0:06:33", "throughput": 19902.75, "total_tokens": 237982080}
|
|
{"current_steps": 75620, "total_steps": 78105, "loss": 0.1846, "lr": 1.541447340686175e-08, "epoch": 4.840919275334485, "percentage": 96.82, "elapsed_time": "3:19:17", "remaining_time": "0:06:32", "throughput": 19902.94, "total_tokens": 237997888}
|
|
{"current_steps": 75625, "total_steps": 78105, "loss": 0.182, "lr": 1.5352594115828624e-08, "epoch": 4.841239357275462, "percentage": 96.82, "elapsed_time": "3:19:18", "remaining_time": "0:06:32", "throughput": 19903.11, "total_tokens": 238013376}
|
|
{"current_steps": 75630, "total_steps": 78105, "loss": 0.1719, "lr": 1.5290838895145043e-08, "epoch": 4.841559439216439, "percentage": 96.83, "elapsed_time": "3:19:19", "remaining_time": "0:06:31", "throughput": 19903.33, "total_tokens": 238030208}
|
|
{"current_steps": 75635, "total_steps": 78105, "loss": 0.121, "lr": 1.522920774789466e-08, "epoch": 4.841879521157416, "percentage": 96.84, "elapsed_time": "3:19:20", "remaining_time": "0:06:30", "throughput": 19903.53, "total_tokens": 238046272}
|
|
{"current_steps": 75640, "total_steps": 78105, "loss": 0.1466, "lr": 1.516770067715473e-08, "epoch": 4.842199603098393, "percentage": 96.84, "elapsed_time": "3:19:20", "remaining_time": "0:06:29", "throughput": 19903.71, "total_tokens": 238061568}
|
|
{"current_steps": 75645, "total_steps": 78105, "loss": 0.1399, "lr": 1.5106317685996964e-08, "epoch": 4.84251968503937, "percentage": 96.85, "elapsed_time": "3:19:21", "remaining_time": "0:06:28", "throughput": 19903.87, "total_tokens": 238076352}
|
|
{"current_steps": 75650, "total_steps": 78105, "loss": 0.1294, "lr": 1.504505877748641e-08, "epoch": 4.842839766980347, "percentage": 96.86, "elapsed_time": "3:19:22", "remaining_time": "0:06:28", "throughput": 19904.12, "total_tokens": 238094080}
|
|
{"current_steps": 75655, "total_steps": 78105, "loss": 0.0934, "lr": 1.4983923954681735e-08, "epoch": 4.843159848921324, "percentage": 96.86, "elapsed_time": "3:19:22", "remaining_time": "0:06:27", "throughput": 19904.31, "total_tokens": 238110080}
|
|
{"current_steps": 75660, "total_steps": 78105, "loss": 0.1358, "lr": 1.4922913220636326e-08, "epoch": 4.8434799308623, "percentage": 96.87, "elapsed_time": "3:19:23", "remaining_time": "0:06:26", "throughput": 19904.51, "total_tokens": 238126144}
|
|
{"current_steps": 75665, "total_steps": 78105, "loss": 0.1138, "lr": 1.4862026578396083e-08, "epoch": 4.843800012803277, "percentage": 96.88, "elapsed_time": "3:19:24", "remaining_time": "0:06:25", "throughput": 19904.73, "total_tokens": 238142592}
|
|
{"current_steps": 75670, "total_steps": 78105, "loss": 0.1079, "lr": 1.4801264031001627e-08, "epoch": 4.844120094744254, "percentage": 96.88, "elapsed_time": "3:19:24", "remaining_time": "0:06:25", "throughput": 19904.9, "total_tokens": 238157760}
|
|
{"current_steps": 75675, "total_steps": 78105, "loss": 0.1317, "lr": 1.4740625581486923e-08, "epoch": 4.844440176685231, "percentage": 96.89, "elapsed_time": "3:19:25", "remaining_time": "0:06:24", "throughput": 19905.05, "total_tokens": 238172608}
|
|
{"current_steps": 75680, "total_steps": 78105, "loss": 0.1381, "lr": 1.4680111232880379e-08, "epoch": 4.844760258626208, "percentage": 96.9, "elapsed_time": "3:19:26", "remaining_time": "0:06:23", "throughput": 19905.2, "total_tokens": 238187328}
|
|
{"current_steps": 75685, "total_steps": 78105, "loss": 0.1558, "lr": 1.4619720988203468e-08, "epoch": 4.845080340567185, "percentage": 96.9, "elapsed_time": "3:19:26", "remaining_time": "0:06:22", "throughput": 19905.4, "total_tokens": 238203584}
|
|
{"current_steps": 75690, "total_steps": 78105, "loss": 0.1319, "lr": 1.4559454850471832e-08, "epoch": 4.845400422508162, "percentage": 96.91, "elapsed_time": "3:19:27", "remaining_time": "0:06:21", "throughput": 19905.59, "total_tokens": 238219072}
|
|
{"current_steps": 75695, "total_steps": 78105, "loss": 0.1137, "lr": 1.4499312822694733e-08, "epoch": 4.8457205044491385, "percentage": 96.91, "elapsed_time": "3:19:28", "remaining_time": "0:06:21", "throughput": 19905.76, "total_tokens": 238234432}
|
|
{"current_steps": 75700, "total_steps": 78105, "loss": 0.0912, "lr": 1.44392949078756e-08, "epoch": 4.8460405863901155, "percentage": 96.92, "elapsed_time": "3:19:28", "remaining_time": "0:06:20", "throughput": 19905.91, "total_tokens": 238249024}
|
|
{"current_steps": 75705, "total_steps": 78105, "loss": 0.2068, "lr": 1.4379401109011204e-08, "epoch": 4.8463606683310925, "percentage": 96.93, "elapsed_time": "3:19:29", "remaining_time": "0:06:19", "throughput": 19906.07, "total_tokens": 238263552}
|
|
{"current_steps": 75710, "total_steps": 78105, "loss": 0.1415, "lr": 1.4319631429092207e-08, "epoch": 4.8466807502720695, "percentage": 96.93, "elapsed_time": "3:19:30", "remaining_time": "0:06:18", "throughput": 19906.24, "total_tokens": 238278848}
|
|
{"current_steps": 75715, "total_steps": 78105, "loss": 0.1501, "lr": 1.4259985871103721e-08, "epoch": 4.8470008322130465, "percentage": 96.94, "elapsed_time": "3:19:30", "remaining_time": "0:06:17", "throughput": 19906.39, "total_tokens": 238293632}
|
|
{"current_steps": 75720, "total_steps": 78105, "loss": 0.1726, "lr": 1.4200464438023642e-08, "epoch": 4.847320914154023, "percentage": 96.95, "elapsed_time": "3:19:31", "remaining_time": "0:06:17", "throughput": 19906.55, "total_tokens": 238308608}
|
|
{"current_steps": 75725, "total_steps": 78105, "loss": 0.1247, "lr": 1.4141067132824316e-08, "epoch": 4.847640996095, "percentage": 96.95, "elapsed_time": "3:19:32", "remaining_time": "0:06:16", "throughput": 19906.72, "total_tokens": 238323840}
|
|
{"current_steps": 75730, "total_steps": 78105, "loss": 0.1215, "lr": 1.40817939584717e-08, "epoch": 4.847961078035977, "percentage": 96.96, "elapsed_time": "3:19:32", "remaining_time": "0:06:15", "throughput": 19906.91, "total_tokens": 238339648}
|
|
{"current_steps": 75735, "total_steps": 78105, "loss": 0.146, "lr": 1.4022644917925654e-08, "epoch": 4.848281159976954, "percentage": 96.97, "elapsed_time": "3:19:33", "remaining_time": "0:06:14", "throughput": 19907.07, "total_tokens": 238354496}
|
|
{"current_steps": 75740, "total_steps": 78105, "loss": 0.1378, "lr": 1.3963620014139645e-08, "epoch": 4.848601241917931, "percentage": 96.97, "elapsed_time": "3:19:34", "remaining_time": "0:06:13", "throughput": 19907.24, "total_tokens": 238369664}
|
|
{"current_steps": 75745, "total_steps": 78105, "loss": 0.1618, "lr": 1.3904719250061316e-08, "epoch": 4.8489213238589075, "percentage": 96.98, "elapsed_time": "3:19:34", "remaining_time": "0:06:13", "throughput": 19907.47, "total_tokens": 238386496}
|
|
{"current_steps": 75750, "total_steps": 78105, "loss": 0.1654, "lr": 1.3845942628631648e-08, "epoch": 4.8492414057998845, "percentage": 96.98, "elapsed_time": "3:19:35", "remaining_time": "0:06:12", "throughput": 19907.69, "total_tokens": 238403264}
|
|
{"current_steps": 75755, "total_steps": 78105, "loss": 0.1614, "lr": 1.3787290152785514e-08, "epoch": 4.8495614877408615, "percentage": 96.99, "elapsed_time": "3:19:36", "remaining_time": "0:06:11", "throughput": 19907.86, "total_tokens": 238418368}
|
|
{"current_steps": 75760, "total_steps": 78105, "loss": 0.0952, "lr": 1.3728761825452242e-08, "epoch": 4.8498815696818385, "percentage": 97.0, "elapsed_time": "3:19:36", "remaining_time": "0:06:10", "throughput": 19908.06, "total_tokens": 238434368}
|
|
{"current_steps": 75765, "total_steps": 78105, "loss": 0.0997, "lr": 1.3670357649553933e-08, "epoch": 4.8502016516228155, "percentage": 97.0, "elapsed_time": "3:19:37", "remaining_time": "0:06:09", "throughput": 19908.21, "total_tokens": 238448960}
|
|
{"current_steps": 75770, "total_steps": 78105, "loss": 0.1256, "lr": 1.3612077628007147e-08, "epoch": 4.8505217335637925, "percentage": 97.01, "elapsed_time": "3:19:38", "remaining_time": "0:06:09", "throughput": 19908.36, "total_tokens": 238463552}
|
|
{"current_steps": 75775, "total_steps": 78105, "loss": 0.1224, "lr": 1.3553921763722055e-08, "epoch": 4.8508418155047694, "percentage": 97.02, "elapsed_time": "3:19:38", "remaining_time": "0:06:08", "throughput": 19908.52, "total_tokens": 238478528}
|
|
{"current_steps": 75780, "total_steps": 78105, "loss": 0.15, "lr": 1.3495890059602723e-08, "epoch": 4.851161897445746, "percentage": 97.02, "elapsed_time": "3:19:39", "remaining_time": "0:06:07", "throughput": 19908.79, "total_tokens": 238496128}
|
|
{"current_steps": 75785, "total_steps": 78105, "loss": 0.1264, "lr": 1.3437982518546832e-08, "epoch": 4.8514819793867225, "percentage": 97.03, "elapsed_time": "3:19:40", "remaining_time": "0:06:06", "throughput": 19908.99, "total_tokens": 238511936}
|
|
{"current_steps": 75790, "total_steps": 78105, "loss": 0.1236, "lr": 1.3380199143446238e-08, "epoch": 4.8518020613276995, "percentage": 97.04, "elapsed_time": "3:19:40", "remaining_time": "0:06:05", "throughput": 19909.16, "total_tokens": 238527168}
|
|
{"current_steps": 75795, "total_steps": 78105, "loss": 0.1523, "lr": 1.3322539937185852e-08, "epoch": 4.8521221432686765, "percentage": 97.04, "elapsed_time": "3:19:41", "remaining_time": "0:06:05", "throughput": 19909.35, "total_tokens": 238542976}
|
|
{"current_steps": 75800, "total_steps": 78105, "loss": 0.1294, "lr": 1.3265004902645595e-08, "epoch": 4.8524422252096535, "percentage": 97.05, "elapsed_time": "3:19:42", "remaining_time": "0:06:04", "throughput": 19909.5, "total_tokens": 238557760}
|
|
{"current_steps": 75805, "total_steps": 78105, "loss": 0.1267, "lr": 1.3207594042697614e-08, "epoch": 4.8527623071506305, "percentage": 97.06, "elapsed_time": "3:19:42", "remaining_time": "0:06:03", "throughput": 19909.7, "total_tokens": 238573888}
|
|
{"current_steps": 75810, "total_steps": 78105, "loss": 0.1533, "lr": 1.3150307360209614e-08, "epoch": 4.8530823890916075, "percentage": 97.06, "elapsed_time": "3:19:43", "remaining_time": "0:06:02", "throughput": 19909.93, "total_tokens": 238590784}
|
|
{"current_steps": 75815, "total_steps": 78105, "loss": 0.11, "lr": 1.3093144858041528e-08, "epoch": 4.8534024710325845, "percentage": 97.07, "elapsed_time": "3:19:44", "remaining_time": "0:06:01", "throughput": 19910.1, "total_tokens": 238605952}
|
|
{"current_steps": 75820, "total_steps": 78105, "loss": 0.1783, "lr": 1.3036106539048021e-08, "epoch": 4.8537225529735615, "percentage": 97.07, "elapsed_time": "3:19:44", "remaining_time": "0:06:01", "throughput": 19910.26, "total_tokens": 238621120}
|
|
{"current_steps": 75825, "total_steps": 78105, "loss": 0.1111, "lr": 1.2979192406077091e-08, "epoch": 4.8540426349145385, "percentage": 97.08, "elapsed_time": "3:19:45", "remaining_time": "0:06:00", "throughput": 19910.44, "total_tokens": 238636928}
|
|
{"current_steps": 75830, "total_steps": 78105, "loss": 0.0834, "lr": 1.2922402461970906e-08, "epoch": 4.8543627168555155, "percentage": 97.09, "elapsed_time": "3:19:46", "remaining_time": "0:05:59", "throughput": 19910.6, "total_tokens": 238652480}
|
|
{"current_steps": 75835, "total_steps": 78105, "loss": 0.1284, "lr": 1.2865736709565258e-08, "epoch": 4.854682798796492, "percentage": 97.09, "elapsed_time": "3:19:46", "remaining_time": "0:05:58", "throughput": 19910.85, "total_tokens": 238670016}
|
|
{"current_steps": 75840, "total_steps": 78105, "loss": 0.1419, "lr": 1.2809195151689823e-08, "epoch": 4.8550028807374686, "percentage": 97.1, "elapsed_time": "3:19:47", "remaining_time": "0:05:58", "throughput": 19911.05, "total_tokens": 238686272}
|
|
{"current_steps": 75845, "total_steps": 78105, "loss": 0.2286, "lr": 1.2752777791167626e-08, "epoch": 4.8553229626784455, "percentage": 97.11, "elapsed_time": "3:19:48", "remaining_time": "0:05:57", "throughput": 19911.23, "total_tokens": 238701376}
|
|
{"current_steps": 75850, "total_steps": 78105, "loss": 0.1259, "lr": 1.2696484630816408e-08, "epoch": 4.8556430446194225, "percentage": 97.11, "elapsed_time": "3:19:49", "remaining_time": "0:05:56", "throughput": 19911.46, "total_tokens": 238718656}
|
|
{"current_steps": 75855, "total_steps": 78105, "loss": 0.1536, "lr": 1.26403156734467e-08, "epoch": 4.8559631265603995, "percentage": 97.12, "elapsed_time": "3:19:49", "remaining_time": "0:05:55", "throughput": 19911.66, "total_tokens": 238734528}
|
|
{"current_steps": 75860, "total_steps": 78105, "loss": 0.1468, "lr": 1.2584270921863484e-08, "epoch": 4.8562832085013765, "percentage": 97.13, "elapsed_time": "3:19:50", "remaining_time": "0:05:54", "throughput": 19911.85, "total_tokens": 238750400}
|
|
{"current_steps": 75865, "total_steps": 78105, "loss": 0.1326, "lr": 1.2528350378865073e-08, "epoch": 4.8566032904423535, "percentage": 97.13, "elapsed_time": "3:19:51", "remaining_time": "0:05:54", "throughput": 19911.99, "total_tokens": 238764800}
|
|
{"current_steps": 75870, "total_steps": 78105, "loss": 0.1932, "lr": 1.2472554047244512e-08, "epoch": 4.8569233723833305, "percentage": 97.14, "elapsed_time": "3:19:51", "remaining_time": "0:05:53", "throughput": 19912.15, "total_tokens": 238779712}
|
|
{"current_steps": 75875, "total_steps": 78105, "loss": 0.1359, "lr": 1.2416881929787072e-08, "epoch": 4.857243454324307, "percentage": 97.14, "elapsed_time": "3:19:52", "remaining_time": "0:05:52", "throughput": 19912.35, "total_tokens": 238795840}
|
|
{"current_steps": 75880, "total_steps": 78105, "loss": 0.1416, "lr": 1.2361334029273586e-08, "epoch": 4.857563536265284, "percentage": 97.15, "elapsed_time": "3:19:53", "remaining_time": "0:05:51", "throughput": 19912.58, "total_tokens": 238812736}
|
|
{"current_steps": 75885, "total_steps": 78105, "loss": 0.1357, "lr": 1.2305910348477112e-08, "epoch": 4.857883618206261, "percentage": 97.16, "elapsed_time": "3:19:53", "remaining_time": "0:05:50", "throughput": 19912.78, "total_tokens": 238828928}
|
|
{"current_steps": 75890, "total_steps": 78105, "loss": 0.1251, "lr": 1.2250610890165992e-08, "epoch": 4.858203700147238, "percentage": 97.16, "elapsed_time": "3:19:54", "remaining_time": "0:05:50", "throughput": 19912.96, "total_tokens": 238844480}
|
|
{"current_steps": 75895, "total_steps": 78105, "loss": 0.1038, "lr": 1.2195435657100796e-08, "epoch": 4.858523782088215, "percentage": 97.17, "elapsed_time": "3:19:55", "remaining_time": "0:05:49", "throughput": 19913.16, "total_tokens": 238860736}
|
|
{"current_steps": 75900, "total_steps": 78105, "loss": 0.1103, "lr": 1.2140384652037095e-08, "epoch": 4.8588438640291916, "percentage": 97.18, "elapsed_time": "3:19:55", "remaining_time": "0:05:48", "throughput": 19913.32, "total_tokens": 238875776}
|
|
{"current_steps": 75905, "total_steps": 78105, "loss": 0.1327, "lr": 1.2085457877723805e-08, "epoch": 4.8591639459701685, "percentage": 97.18, "elapsed_time": "3:19:56", "remaining_time": "0:05:47", "throughput": 19913.52, "total_tokens": 238892224}
|
|
{"current_steps": 75910, "total_steps": 78105, "loss": 0.1306, "lr": 1.203065533690373e-08, "epoch": 4.8594840279111455, "percentage": 97.19, "elapsed_time": "3:19:57", "remaining_time": "0:05:46", "throughput": 19913.74, "total_tokens": 238909056}
|
|
{"current_steps": 75915, "total_steps": 78105, "loss": 0.1577, "lr": 1.1975977032313291e-08, "epoch": 4.8598041098521225, "percentage": 97.2, "elapsed_time": "3:19:57", "remaining_time": "0:05:46", "throughput": 19913.94, "total_tokens": 238924800}
|
|
{"current_steps": 75920, "total_steps": 78105, "loss": 0.1231, "lr": 1.1921422966683083e-08, "epoch": 4.8601241917930995, "percentage": 97.2, "elapsed_time": "3:19:58", "remaining_time": "0:05:45", "throughput": 19914.1, "total_tokens": 238939712}
|
|
{"current_steps": 75925, "total_steps": 78105, "loss": 0.0955, "lr": 1.1866993142737038e-08, "epoch": 4.860444273734076, "percentage": 97.21, "elapsed_time": "3:19:59", "remaining_time": "0:05:44", "throughput": 19914.26, "total_tokens": 238954944}
|
|
{"current_steps": 75930, "total_steps": 78105, "loss": 0.1146, "lr": 1.1812687563192982e-08, "epoch": 4.860764355675053, "percentage": 97.22, "elapsed_time": "3:19:59", "remaining_time": "0:05:43", "throughput": 19914.46, "total_tokens": 238971328}
|
|
{"current_steps": 75935, "total_steps": 78105, "loss": 0.1, "lr": 1.1758506230762911e-08, "epoch": 4.86108443761603, "percentage": 97.22, "elapsed_time": "3:20:00", "remaining_time": "0:05:42", "throughput": 19914.62, "total_tokens": 238986304}
|
|
{"current_steps": 75940, "total_steps": 78105, "loss": 0.0854, "lr": 1.1704449148152163e-08, "epoch": 4.861404519557007, "percentage": 97.23, "elapsed_time": "3:20:01", "remaining_time": "0:05:42", "throughput": 19914.8, "total_tokens": 239002048}
|
|
{"current_steps": 75945, "total_steps": 78105, "loss": 0.1655, "lr": 1.1650516318060523e-08, "epoch": 4.861724601497984, "percentage": 97.23, "elapsed_time": "3:20:01", "remaining_time": "0:05:41", "throughput": 19915.01, "total_tokens": 239018624}
|
|
{"current_steps": 75950, "total_steps": 78105, "loss": 0.1123, "lr": 1.159670774318028e-08, "epoch": 4.862044683438961, "percentage": 97.24, "elapsed_time": "3:20:02", "remaining_time": "0:05:40", "throughput": 19915.19, "total_tokens": 239034496}
|
|
{"current_steps": 75955, "total_steps": 78105, "loss": 0.1426, "lr": 1.1543023426199285e-08, "epoch": 4.862364765379938, "percentage": 97.25, "elapsed_time": "3:20:03", "remaining_time": "0:05:39", "throughput": 19915.36, "total_tokens": 239050112}
|
|
{"current_steps": 75960, "total_steps": 78105, "loss": 0.0867, "lr": 1.1489463369797338e-08, "epoch": 4.862684847320914, "percentage": 97.25, "elapsed_time": "3:20:03", "remaining_time": "0:05:38", "throughput": 19915.51, "total_tokens": 239064832}
|
|
{"current_steps": 75965, "total_steps": 78105, "loss": 0.0697, "lr": 1.14360275766498e-08, "epoch": 4.863004929261891, "percentage": 97.26, "elapsed_time": "3:20:04", "remaining_time": "0:05:38", "throughput": 19915.72, "total_tokens": 239081280}
|
|
{"current_steps": 75970, "total_steps": 78105, "loss": 0.1372, "lr": 1.1382716049424259e-08, "epoch": 4.863325011202868, "percentage": 97.27, "elapsed_time": "3:20:05", "remaining_time": "0:05:37", "throughput": 19915.89, "total_tokens": 239096640}
|
|
{"current_steps": 75975, "total_steps": 78105, "loss": 0.1582, "lr": 1.1329528790783307e-08, "epoch": 4.863645093143845, "percentage": 97.27, "elapsed_time": "3:20:05", "remaining_time": "0:05:36", "throughput": 19916.05, "total_tokens": 239111552}
|
|
{"current_steps": 75980, "total_steps": 78105, "loss": 0.1885, "lr": 1.1276465803382875e-08, "epoch": 4.863965175084822, "percentage": 97.28, "elapsed_time": "3:20:06", "remaining_time": "0:05:35", "throughput": 19916.24, "total_tokens": 239127104}
|
|
{"current_steps": 75985, "total_steps": 78105, "loss": 0.1205, "lr": 1.1223527089872232e-08, "epoch": 4.864285257025799, "percentage": 97.29, "elapsed_time": "3:20:07", "remaining_time": "0:05:35", "throughput": 19916.44, "total_tokens": 239143168}
|
|
{"current_steps": 75990, "total_steps": 78105, "loss": 0.1061, "lr": 1.1170712652895099e-08, "epoch": 4.864605338966776, "percentage": 97.29, "elapsed_time": "3:20:08", "remaining_time": "0:05:34", "throughput": 19916.65, "total_tokens": 239159424}
|
|
{"current_steps": 75995, "total_steps": 78105, "loss": 0.1172, "lr": 1.1118022495088532e-08, "epoch": 4.864925420907753, "percentage": 97.3, "elapsed_time": "3:20:08", "remaining_time": "0:05:33", "throughput": 19916.88, "total_tokens": 239176384}
|
|
{"current_steps": 76000, "total_steps": 78105, "loss": 0.1468, "lr": 1.1065456619084036e-08, "epoch": 4.86524550284873, "percentage": 97.3, "elapsed_time": "3:20:09", "remaining_time": "0:05:32", "throughput": 19917.06, "total_tokens": 239192064}
|
|
{"current_steps": 76005, "total_steps": 78105, "loss": 0.0856, "lr": 1.1013015027506458e-08, "epoch": 4.865565584789707, "percentage": 97.31, "elapsed_time": "3:20:10", "remaining_time": "0:05:31", "throughput": 19917.22, "total_tokens": 239206720}
|
|
{"current_steps": 76010, "total_steps": 78105, "loss": 0.1264, "lr": 1.0960697722973978e-08, "epoch": 4.865885666730683, "percentage": 97.32, "elapsed_time": "3:20:10", "remaining_time": "0:05:31", "throughput": 19917.44, "total_tokens": 239223488}
|
|
{"current_steps": 76015, "total_steps": 78105, "loss": 0.132, "lr": 1.090850470809951e-08, "epoch": 4.86620574867166, "percentage": 97.32, "elapsed_time": "3:20:11", "remaining_time": "0:05:30", "throughput": 19917.59, "total_tokens": 239238720}
|
|
{"current_steps": 76020, "total_steps": 78105, "loss": 0.1585, "lr": 1.0856435985488745e-08, "epoch": 4.866525830612637, "percentage": 97.33, "elapsed_time": "3:20:12", "remaining_time": "0:05:29", "throughput": 19917.72, "total_tokens": 239253120}
|
|
{"current_steps": 76025, "total_steps": 78105, "loss": 0.0961, "lr": 1.080449155774238e-08, "epoch": 4.866845912553614, "percentage": 97.34, "elapsed_time": "3:20:12", "remaining_time": "0:05:28", "throughput": 19917.88, "total_tokens": 239268096}
|
|
{"current_steps": 76030, "total_steps": 78105, "loss": 0.1236, "lr": 1.0752671427453898e-08, "epoch": 4.867165994494591, "percentage": 97.34, "elapsed_time": "3:20:13", "remaining_time": "0:05:27", "throughput": 19918.04, "total_tokens": 239282752}
|
|
{"current_steps": 76035, "total_steps": 78105, "loss": 0.1387, "lr": 1.070097559721095e-08, "epoch": 4.867486076435568, "percentage": 97.35, "elapsed_time": "3:20:14", "remaining_time": "0:05:27", "throughput": 19918.25, "total_tokens": 239299072}
|
|
{"current_steps": 76040, "total_steps": 78105, "loss": 0.1356, "lr": 1.0649404069595082e-08, "epoch": 4.867806158376545, "percentage": 97.36, "elapsed_time": "3:20:14", "remaining_time": "0:05:26", "throughput": 19918.43, "total_tokens": 239314624}
|
|
{"current_steps": 76045, "total_steps": 78105, "loss": 0.1301, "lr": 1.0597956847181457e-08, "epoch": 4.868126240317522, "percentage": 97.36, "elapsed_time": "3:20:15", "remaining_time": "0:05:25", "throughput": 19918.58, "total_tokens": 239329536}
|
|
{"current_steps": 76050, "total_steps": 78105, "loss": 0.1584, "lr": 1.0546633932538853e-08, "epoch": 4.868446322258498, "percentage": 97.37, "elapsed_time": "3:20:16", "remaining_time": "0:05:24", "throughput": 19918.75, "total_tokens": 239344896}
|
|
{"current_steps": 76055, "total_steps": 78105, "loss": 0.2035, "lr": 1.0495435328230497e-08, "epoch": 4.868766404199475, "percentage": 97.38, "elapsed_time": "3:20:16", "remaining_time": "0:05:23", "throughput": 19918.95, "total_tokens": 239360768}
|
|
{"current_steps": 76060, "total_steps": 78105, "loss": 0.135, "lr": 1.0444361036812401e-08, "epoch": 4.869086486140452, "percentage": 97.38, "elapsed_time": "3:20:17", "remaining_time": "0:05:23", "throughput": 19919.13, "total_tokens": 239376000}
|
|
{"current_steps": 76065, "total_steps": 78105, "loss": 0.1407, "lr": 1.0393411060835856e-08, "epoch": 4.869406568081429, "percentage": 97.39, "elapsed_time": "3:20:18", "remaining_time": "0:05:22", "throughput": 19919.3, "total_tokens": 239391232}
|
|
{"current_steps": 76070, "total_steps": 78105, "loss": 0.1369, "lr": 1.034258540284383e-08, "epoch": 4.869726650022406, "percentage": 97.39, "elapsed_time": "3:20:18", "remaining_time": "0:05:21", "throughput": 19919.5, "total_tokens": 239407424}
|
|
{"current_steps": 76075, "total_steps": 78105, "loss": 0.1577, "lr": 1.0291884065375402e-08, "epoch": 4.870046731963383, "percentage": 97.4, "elapsed_time": "3:20:19", "remaining_time": "0:05:20", "throughput": 19919.7, "total_tokens": 239423360}
|
|
{"current_steps": 76080, "total_steps": 78105, "loss": 0.1566, "lr": 1.0241307050961602e-08, "epoch": 4.87036681390436, "percentage": 97.41, "elapsed_time": "3:20:20", "remaining_time": "0:05:19", "throughput": 19919.88, "total_tokens": 239439104}
|
|
{"current_steps": 76085, "total_steps": 78105, "loss": 0.1454, "lr": 1.0190854362128465e-08, "epoch": 4.870686895845337, "percentage": 97.41, "elapsed_time": "3:20:20", "remaining_time": "0:05:19", "throughput": 19920.06, "total_tokens": 239454592}
|
|
{"current_steps": 76090, "total_steps": 78105, "loss": 0.1571, "lr": 1.014052600139509e-08, "epoch": 4.871006977786314, "percentage": 97.42, "elapsed_time": "3:20:21", "remaining_time": "0:05:18", "throughput": 19920.25, "total_tokens": 239470272}
|
|
{"current_steps": 76095, "total_steps": 78105, "loss": 0.1414, "lr": 1.0090321971274464e-08, "epoch": 4.871327059727291, "percentage": 97.43, "elapsed_time": "3:20:22", "remaining_time": "0:05:17", "throughput": 19920.43, "total_tokens": 239485696}
|
|
{"current_steps": 76100, "total_steps": 78105, "loss": 0.1537, "lr": 1.0040242274273749e-08, "epoch": 4.871647141668267, "percentage": 97.43, "elapsed_time": "3:20:22", "remaining_time": "0:05:16", "throughput": 19920.64, "total_tokens": 239501824}
|
|
{"current_steps": 76105, "total_steps": 78105, "loss": 0.1263, "lr": 9.99028691289372e-09, "epoch": 4.871967223609244, "percentage": 97.44, "elapsed_time": "3:20:23", "remaining_time": "0:05:15", "throughput": 19920.82, "total_tokens": 239517568}
|
|
{"current_steps": 76110, "total_steps": 78105, "loss": 0.1416, "lr": 9.940455889628498e-09, "epoch": 4.872287305550221, "percentage": 97.45, "elapsed_time": "3:20:24", "remaining_time": "0:05:15", "throughput": 19920.99, "total_tokens": 239532800}
|
|
{"current_steps": 76115, "total_steps": 78105, "loss": 0.1386, "lr": 9.89074920696692e-09, "epoch": 4.872607387491198, "percentage": 97.45, "elapsed_time": "3:20:24", "remaining_time": "0:05:14", "throughput": 19921.3, "total_tokens": 239552192}
|
|
{"current_steps": 76120, "total_steps": 78105, "loss": 0.0614, "lr": 9.841166867390895e-09, "epoch": 4.872927469432175, "percentage": 97.46, "elapsed_time": "3:20:25", "remaining_time": "0:05:13", "throughput": 19921.45, "total_tokens": 239566912}
|
|
{"current_steps": 76125, "total_steps": 78105, "loss": 0.125, "lr": 9.791708873375939e-09, "epoch": 4.873247551373152, "percentage": 97.46, "elapsed_time": "3:20:26", "remaining_time": "0:05:12", "throughput": 19921.65, "total_tokens": 239583040}
|
|
{"current_steps": 76130, "total_steps": 78105, "loss": 0.1232, "lr": 9.742375227392297e-09, "epoch": 4.873567633314129, "percentage": 97.47, "elapsed_time": "3:20:26", "remaining_time": "0:05:12", "throughput": 19921.79, "total_tokens": 239597440}
|
|
{"current_steps": 76135, "total_steps": 78105, "loss": 0.1181, "lr": 9.693165931903004e-09, "epoch": 4.873887715255106, "percentage": 97.48, "elapsed_time": "3:20:27", "remaining_time": "0:05:11", "throughput": 19921.96, "total_tokens": 239613056}
|
|
{"current_steps": 76140, "total_steps": 78105, "loss": 0.091, "lr": 9.644080989365534e-09, "epoch": 4.874207797196082, "percentage": 97.48, "elapsed_time": "3:20:28", "remaining_time": "0:05:10", "throughput": 19922.13, "total_tokens": 239628224}
|
|
{"current_steps": 76145, "total_steps": 78105, "loss": 0.1661, "lr": 9.595120402230707e-09, "epoch": 4.874527879137059, "percentage": 97.49, "elapsed_time": "3:20:28", "remaining_time": "0:05:09", "throughput": 19922.33, "total_tokens": 239644160}
|
|
{"current_steps": 76150, "total_steps": 78105, "loss": 0.1528, "lr": 9.546284172943787e-09, "epoch": 4.874847961078036, "percentage": 97.5, "elapsed_time": "3:20:29", "remaining_time": "0:05:08", "throughput": 19922.52, "total_tokens": 239660352}
|
|
{"current_steps": 76155, "total_steps": 78105, "loss": 0.1373, "lr": 9.497572303943103e-09, "epoch": 4.875168043019013, "percentage": 97.5, "elapsed_time": "3:20:30", "remaining_time": "0:05:08", "throughput": 19922.7, "total_tokens": 239675840}
|
|
{"current_steps": 76160, "total_steps": 78105, "loss": 0.1207, "lr": 9.448984797660876e-09, "epoch": 4.87548812495999, "percentage": 97.51, "elapsed_time": "3:20:31", "remaining_time": "0:05:07", "throughput": 19922.93, "total_tokens": 239692928}
|
|
{"current_steps": 76165, "total_steps": 78105, "loss": 0.1428, "lr": 9.400521656523775e-09, "epoch": 4.875808206900967, "percentage": 97.52, "elapsed_time": "3:20:31", "remaining_time": "0:05:06", "throughput": 19923.11, "total_tokens": 239708288}
|
|
{"current_steps": 76170, "total_steps": 78105, "loss": 0.1252, "lr": 9.352182882951255e-09, "epoch": 4.876128288841944, "percentage": 97.52, "elapsed_time": "3:20:32", "remaining_time": "0:05:05", "throughput": 19923.29, "total_tokens": 239724416}
|
|
{"current_steps": 76175, "total_steps": 78105, "loss": 0.1028, "lr": 9.303968479357772e-09, "epoch": 4.876448370782921, "percentage": 97.53, "elapsed_time": "3:20:33", "remaining_time": "0:05:04", "throughput": 19923.5, "total_tokens": 239740800}
|
|
{"current_steps": 76180, "total_steps": 78105, "loss": 0.1897, "lr": 9.25587844815029e-09, "epoch": 4.876768452723898, "percentage": 97.54, "elapsed_time": "3:20:33", "remaining_time": "0:05:04", "throughput": 19923.69, "total_tokens": 239756096}
|
|
{"current_steps": 76185, "total_steps": 78105, "loss": 0.1047, "lr": 9.207912791730777e-09, "epoch": 4.877088534664875, "percentage": 97.54, "elapsed_time": "3:20:34", "remaining_time": "0:05:03", "throughput": 19923.84, "total_tokens": 239770752}
|
|
{"current_steps": 76190, "total_steps": 78105, "loss": 0.1175, "lr": 9.160071512493706e-09, "epoch": 4.877408616605851, "percentage": 97.55, "elapsed_time": "3:20:35", "remaining_time": "0:05:02", "throughput": 19924.03, "total_tokens": 239787072}
|
|
{"current_steps": 76195, "total_steps": 78105, "loss": 0.1365, "lr": 9.112354612828277e-09, "epoch": 4.877728698546828, "percentage": 97.55, "elapsed_time": "3:20:35", "remaining_time": "0:05:01", "throughput": 19924.21, "total_tokens": 239802752}
|
|
{"current_steps": 76200, "total_steps": 78105, "loss": 0.1338, "lr": 9.064762095117585e-09, "epoch": 4.878048780487805, "percentage": 97.56, "elapsed_time": "3:20:36", "remaining_time": "0:05:00", "throughput": 19924.37, "total_tokens": 239818048}
|
|
{"current_steps": 76205, "total_steps": 78105, "loss": 0.1664, "lr": 9.017293961737783e-09, "epoch": 4.878368862428782, "percentage": 97.57, "elapsed_time": "3:20:37", "remaining_time": "0:05:00", "throughput": 19924.56, "total_tokens": 239833984}
|
|
{"current_steps": 76210, "total_steps": 78105, "loss": 0.1539, "lr": 8.969950215059198e-09, "epoch": 4.878688944369759, "percentage": 97.57, "elapsed_time": "3:20:37", "remaining_time": "0:04:59", "throughput": 19924.74, "total_tokens": 239849344}
|
|
{"current_steps": 76215, "total_steps": 78105, "loss": 0.1103, "lr": 8.922730857445771e-09, "epoch": 4.879009026310736, "percentage": 97.58, "elapsed_time": "3:20:38", "remaining_time": "0:04:58", "throughput": 19924.89, "total_tokens": 239863936}
|
|
{"current_steps": 76220, "total_steps": 78105, "loss": 0.1669, "lr": 8.875635891255896e-09, "epoch": 4.879329108251713, "percentage": 97.59, "elapsed_time": "3:20:39", "remaining_time": "0:04:57", "throughput": 19925.1, "total_tokens": 239880192}
|
|
{"current_steps": 76225, "total_steps": 78105, "loss": 0.1698, "lr": 8.828665318841024e-09, "epoch": 4.879649190192689, "percentage": 97.59, "elapsed_time": "3:20:39", "remaining_time": "0:04:56", "throughput": 19925.31, "total_tokens": 239896768}
|
|
{"current_steps": 76230, "total_steps": 78105, "loss": 0.1383, "lr": 8.781819142546499e-09, "epoch": 4.879969272133666, "percentage": 97.6, "elapsed_time": "3:20:40", "remaining_time": "0:04:56", "throughput": 19925.5, "total_tokens": 239912576}
|
|
{"current_steps": 76235, "total_steps": 78105, "loss": 0.1702, "lr": 8.735097364711565e-09, "epoch": 4.880289354074643, "percentage": 97.61, "elapsed_time": "3:20:41", "remaining_time": "0:04:55", "throughput": 19925.75, "total_tokens": 239930176}
|
|
{"current_steps": 76240, "total_steps": 78105, "loss": 0.1631, "lr": 8.688499987669351e-09, "epoch": 4.88060943601562, "percentage": 97.61, "elapsed_time": "3:20:41", "remaining_time": "0:04:54", "throughput": 19925.96, "total_tokens": 239946752}
|
|
{"current_steps": 76245, "total_steps": 78105, "loss": 0.1363, "lr": 8.642027013746889e-09, "epoch": 4.880929517956597, "percentage": 97.62, "elapsed_time": "3:20:42", "remaining_time": "0:04:53", "throughput": 19926.15, "total_tokens": 239962496}
|
|
{"current_steps": 76250, "total_steps": 78105, "loss": 0.1232, "lr": 8.595678445264544e-09, "epoch": 4.881249599897574, "percentage": 97.62, "elapsed_time": "3:20:43", "remaining_time": "0:04:52", "throughput": 19926.34, "total_tokens": 239978496}
|
|
{"current_steps": 76255, "total_steps": 78105, "loss": 0.1478, "lr": 8.549454284536573e-09, "epoch": 4.881569681838551, "percentage": 97.63, "elapsed_time": "3:20:43", "remaining_time": "0:04:52", "throughput": 19926.51, "total_tokens": 239993536}
|
|
{"current_steps": 76260, "total_steps": 78105, "loss": 0.166, "lr": 8.50335453387141e-09, "epoch": 4.881889763779528, "percentage": 97.64, "elapsed_time": "3:20:44", "remaining_time": "0:04:51", "throughput": 19926.69, "total_tokens": 240009152}
|
|
{"current_steps": 76265, "total_steps": 78105, "loss": 0.1519, "lr": 8.457379195571102e-09, "epoch": 4.882209845720505, "percentage": 97.64, "elapsed_time": "3:20:45", "remaining_time": "0:04:50", "throughput": 19926.87, "total_tokens": 240024832}
|
|
{"current_steps": 76270, "total_steps": 78105, "loss": 0.1361, "lr": 8.411528271931313e-09, "epoch": 4.882529927661482, "percentage": 97.65, "elapsed_time": "3:20:45", "remaining_time": "0:04:49", "throughput": 19927.06, "total_tokens": 240040320}
|
|
{"current_steps": 76275, "total_steps": 78105, "loss": 0.1749, "lr": 8.365801765241321e-09, "epoch": 4.882850009602458, "percentage": 97.66, "elapsed_time": "3:20:46", "remaining_time": "0:04:49", "throughput": 19927.23, "total_tokens": 240055424}
|
|
{"current_steps": 76280, "total_steps": 78105, "loss": 0.2047, "lr": 8.320199677784857e-09, "epoch": 4.883170091543435, "percentage": 97.66, "elapsed_time": "3:20:47", "remaining_time": "0:04:48", "throughput": 19927.43, "total_tokens": 240071872}
|
|
{"current_steps": 76285, "total_steps": 78105, "loss": 0.0991, "lr": 8.274722011838986e-09, "epoch": 4.883490173484412, "percentage": 97.67, "elapsed_time": "3:20:47", "remaining_time": "0:04:47", "throughput": 19927.62, "total_tokens": 240087872}
|
|
{"current_steps": 76290, "total_steps": 78105, "loss": 0.0973, "lr": 8.229368769674673e-09, "epoch": 4.883810255425389, "percentage": 97.68, "elapsed_time": "3:20:48", "remaining_time": "0:04:46", "throughput": 19927.79, "total_tokens": 240102912}
|
|
{"current_steps": 76295, "total_steps": 78105, "loss": 0.1305, "lr": 8.184139953556491e-09, "epoch": 4.884130337366366, "percentage": 97.68, "elapsed_time": "3:20:49", "remaining_time": "0:04:45", "throughput": 19927.97, "total_tokens": 240118592}
|
|
{"current_steps": 76300, "total_steps": 78105, "loss": 0.1303, "lr": 8.139035565742915e-09, "epoch": 4.884450419307343, "percentage": 97.69, "elapsed_time": "3:20:50", "remaining_time": "0:04:45", "throughput": 19928.17, "total_tokens": 240134464}
|
|
{"current_steps": 76305, "total_steps": 78105, "loss": 0.1544, "lr": 8.094055608486028e-09, "epoch": 4.88477050124832, "percentage": 97.7, "elapsed_time": "3:20:50", "remaining_time": "0:04:44", "throughput": 19928.36, "total_tokens": 240150720}
|
|
{"current_steps": 76310, "total_steps": 78105, "loss": 0.1457, "lr": 8.049200084032649e-09, "epoch": 4.885090583189297, "percentage": 97.7, "elapsed_time": "3:20:51", "remaining_time": "0:04:43", "throughput": 19928.53, "total_tokens": 240166272}
|
|
{"current_steps": 76315, "total_steps": 78105, "loss": 0.1214, "lr": 8.004468994621816e-09, "epoch": 4.885410665130273, "percentage": 97.71, "elapsed_time": "3:20:52", "remaining_time": "0:04:42", "throughput": 19928.69, "total_tokens": 240181056}
|
|
{"current_steps": 76320, "total_steps": 78105, "loss": 0.1339, "lr": 7.959862342487579e-09, "epoch": 4.88573074707125, "percentage": 97.71, "elapsed_time": "3:20:52", "remaining_time": "0:04:41", "throughput": 19928.86, "total_tokens": 240196160}
|
|
{"current_steps": 76325, "total_steps": 78105, "loss": 0.1292, "lr": 7.915380129857042e-09, "epoch": 4.886050829012227, "percentage": 97.72, "elapsed_time": "3:20:53", "remaining_time": "0:04:41", "throughput": 19929.03, "total_tokens": 240211520}
|
|
{"current_steps": 76330, "total_steps": 78105, "loss": 0.1346, "lr": 7.871022358951763e-09, "epoch": 4.886370910953204, "percentage": 97.73, "elapsed_time": "3:20:54", "remaining_time": "0:04:40", "throughput": 19929.24, "total_tokens": 240227776}
|
|
{"current_steps": 76335, "total_steps": 78105, "loss": 0.1496, "lr": 7.826789031986638e-09, "epoch": 4.886690992894181, "percentage": 97.73, "elapsed_time": "3:20:54", "remaining_time": "0:04:39", "throughput": 19929.41, "total_tokens": 240243072}
|
|
{"current_steps": 76340, "total_steps": 78105, "loss": 0.1494, "lr": 7.782680151170175e-09, "epoch": 4.887011074835158, "percentage": 97.74, "elapsed_time": "3:20:55", "remaining_time": "0:04:38", "throughput": 19929.59, "total_tokens": 240258624}
|
|
{"current_steps": 76345, "total_steps": 78105, "loss": 0.1253, "lr": 7.738695718705614e-09, "epoch": 4.887331156776135, "percentage": 97.75, "elapsed_time": "3:20:56", "remaining_time": "0:04:37", "throughput": 19929.78, "total_tokens": 240274496}
|
|
{"current_steps": 76350, "total_steps": 78105, "loss": 0.137, "lr": 7.694835736788697e-09, "epoch": 4.887651238717112, "percentage": 97.75, "elapsed_time": "3:20:56", "remaining_time": "0:04:37", "throughput": 19929.96, "total_tokens": 240289536}
|
|
{"current_steps": 76355, "total_steps": 78105, "loss": 0.1308, "lr": 7.651100207609619e-09, "epoch": 4.887971320658089, "percentage": 97.76, "elapsed_time": "3:20:57", "remaining_time": "0:04:36", "throughput": 19930.13, "total_tokens": 240304704}
|
|
{"current_steps": 76360, "total_steps": 78105, "loss": 0.1412, "lr": 7.607489133352464e-09, "epoch": 4.888291402599066, "percentage": 97.77, "elapsed_time": "3:20:58", "remaining_time": "0:04:35", "throughput": 19930.31, "total_tokens": 240320256}
|
|
{"current_steps": 76365, "total_steps": 78105, "loss": 0.1534, "lr": 7.564002516194936e-09, "epoch": 4.888611484540042, "percentage": 97.77, "elapsed_time": "3:20:58", "remaining_time": "0:04:34", "throughput": 19930.56, "total_tokens": 240337536}
|
|
{"current_steps": 76370, "total_steps": 78105, "loss": 0.1367, "lr": 7.520640358308351e-09, "epoch": 4.888931566481019, "percentage": 97.78, "elapsed_time": "3:20:59", "remaining_time": "0:04:33", "throughput": 19930.77, "total_tokens": 240353664}
|
|
{"current_steps": 76375, "total_steps": 78105, "loss": 0.1344, "lr": 7.477402661858201e-09, "epoch": 4.889251648421996, "percentage": 97.79, "elapsed_time": "3:21:00", "remaining_time": "0:04:33", "throughput": 19930.93, "total_tokens": 240368896}
|
|
{"current_steps": 76380, "total_steps": 78105, "loss": 0.1102, "lr": 7.434289429003872e-09, "epoch": 4.889571730362973, "percentage": 97.79, "elapsed_time": "3:21:00", "remaining_time": "0:04:32", "throughput": 19931.12, "total_tokens": 240384704}
|
|
{"current_steps": 76385, "total_steps": 78105, "loss": 0.1189, "lr": 7.391300661897249e-09, "epoch": 4.88989181230395, "percentage": 97.8, "elapsed_time": "3:21:01", "remaining_time": "0:04:31", "throughput": 19931.3, "total_tokens": 240400128}
|
|
{"current_steps": 76390, "total_steps": 78105, "loss": 0.1221, "lr": 7.348436362685785e-09, "epoch": 4.890211894244927, "percentage": 97.8, "elapsed_time": "3:21:02", "remaining_time": "0:04:30", "throughput": 19931.47, "total_tokens": 240415552}
|
|
{"current_steps": 76395, "total_steps": 78105, "loss": 0.1167, "lr": 7.30569653350971e-09, "epoch": 4.890531976185904, "percentage": 97.81, "elapsed_time": "3:21:02", "remaining_time": "0:04:30", "throughput": 19931.64, "total_tokens": 240430848}
|
|
{"current_steps": 76400, "total_steps": 78105, "loss": 0.132, "lr": 7.26308117650315e-09, "epoch": 4.890852058126881, "percentage": 97.82, "elapsed_time": "3:21:03", "remaining_time": "0:04:29", "throughput": 19931.85, "total_tokens": 240447424}
|
|
{"current_steps": 76405, "total_steps": 78105, "loss": 0.1441, "lr": 7.220590293794405e-09, "epoch": 4.891172140067857, "percentage": 97.82, "elapsed_time": "3:21:04", "remaining_time": "0:04:28", "throughput": 19932.03, "total_tokens": 240463360}
|
|
{"current_steps": 76410, "total_steps": 78105, "loss": 0.1452, "lr": 7.178223887504554e-09, "epoch": 4.891492222008834, "percentage": 97.83, "elapsed_time": "3:21:04", "remaining_time": "0:04:27", "throughput": 19932.19, "total_tokens": 240478144}
|
|
{"current_steps": 76415, "total_steps": 78105, "loss": 0.1284, "lr": 7.135981959749683e-09, "epoch": 4.891812303949811, "percentage": 97.84, "elapsed_time": "3:21:05", "remaining_time": "0:04:26", "throughput": 19932.38, "total_tokens": 240494016}
|
|
{"current_steps": 76420, "total_steps": 78105, "loss": 0.1206, "lr": 7.093864512638937e-09, "epoch": 4.892132385890788, "percentage": 97.84, "elapsed_time": "3:21:06", "remaining_time": "0:04:26", "throughput": 19932.54, "total_tokens": 240509184}
|
|
{"current_steps": 76425, "total_steps": 78105, "loss": 0.1024, "lr": 7.051871548275635e-09, "epoch": 4.892452467831765, "percentage": 97.85, "elapsed_time": "3:21:06", "remaining_time": "0:04:25", "throughput": 19932.7, "total_tokens": 240524224}
|
|
{"current_steps": 76430, "total_steps": 78105, "loss": 0.152, "lr": 7.01000306875671e-09, "epoch": 4.892772549772742, "percentage": 97.86, "elapsed_time": "3:21:07", "remaining_time": "0:04:24", "throughput": 19932.87, "total_tokens": 240539328}
|
|
{"current_steps": 76435, "total_steps": 78105, "loss": 0.1137, "lr": 6.9682590761724344e-09, "epoch": 4.893092631713719, "percentage": 97.86, "elapsed_time": "3:21:08", "remaining_time": "0:04:23", "throughput": 19933.01, "total_tokens": 240553920}
|
|
{"current_steps": 76440, "total_steps": 78105, "loss": 0.1477, "lr": 6.926639572607807e-09, "epoch": 4.893412713654696, "percentage": 97.87, "elapsed_time": "3:21:08", "remaining_time": "0:04:22", "throughput": 19933.22, "total_tokens": 240570688}
|
|
{"current_steps": 76445, "total_steps": 78105, "loss": 0.1921, "lr": 6.8851445601408884e-09, "epoch": 4.893732795595673, "percentage": 97.87, "elapsed_time": "3:21:09", "remaining_time": "0:04:22", "throughput": 19933.45, "total_tokens": 240587648}
|
|
{"current_steps": 76450, "total_steps": 78105, "loss": 0.1596, "lr": 6.843774040843354e-09, "epoch": 4.89405287753665, "percentage": 97.88, "elapsed_time": "3:21:10", "remaining_time": "0:04:21", "throughput": 19933.66, "total_tokens": 240604224}
|
|
{"current_steps": 76455, "total_steps": 78105, "loss": 0.1189, "lr": 6.802528016781607e-09, "epoch": 4.894372959477626, "percentage": 97.89, "elapsed_time": "3:21:10", "remaining_time": "0:04:20", "throughput": 19933.89, "total_tokens": 240621120}
|
|
{"current_steps": 76460, "total_steps": 78105, "loss": 0.1566, "lr": 6.76140649001511e-09, "epoch": 4.894693041418603, "percentage": 97.89, "elapsed_time": "3:21:11", "remaining_time": "0:04:19", "throughput": 19934.11, "total_tokens": 240637888}
|
|
{"current_steps": 76465, "total_steps": 78105, "loss": 0.1291, "lr": 6.720409462596944e-09, "epoch": 4.89501312335958, "percentage": 97.9, "elapsed_time": "3:21:12", "remaining_time": "0:04:18", "throughput": 19934.27, "total_tokens": 240653248}
|
|
{"current_steps": 76470, "total_steps": 78105, "loss": 0.1721, "lr": 6.6795369365746375e-09, "epoch": 4.895333205300557, "percentage": 97.91, "elapsed_time": "3:21:13", "remaining_time": "0:04:18", "throughput": 19934.45, "total_tokens": 240669056}
|
|
{"current_steps": 76475, "total_steps": 78105, "loss": 0.1247, "lr": 6.638788913989058e-09, "epoch": 4.895653287241534, "percentage": 97.91, "elapsed_time": "3:21:13", "remaining_time": "0:04:17", "throughput": 19934.67, "total_tokens": 240685376}
|
|
{"current_steps": 76480, "total_steps": 78105, "loss": 0.1372, "lr": 6.5981653968749674e-09, "epoch": 4.895973369182511, "percentage": 97.92, "elapsed_time": "3:21:14", "remaining_time": "0:04:16", "throughput": 19934.82, "total_tokens": 240700224}
|
|
{"current_steps": 76485, "total_steps": 78105, "loss": 0.1509, "lr": 6.557666387260464e-09, "epoch": 4.896293451123488, "percentage": 97.93, "elapsed_time": "3:21:15", "remaining_time": "0:04:15", "throughput": 19935.0, "total_tokens": 240715712}
|
|
{"current_steps": 76490, "total_steps": 78105, "loss": 0.1406, "lr": 6.517291887168653e-09, "epoch": 4.896613533064464, "percentage": 97.93, "elapsed_time": "3:21:15", "remaining_time": "0:04:14", "throughput": 19935.23, "total_tokens": 240732800}
|
|
{"current_steps": 76495, "total_steps": 78105, "loss": 0.1357, "lr": 6.477041898614866e-09, "epoch": 4.896933615005441, "percentage": 97.94, "elapsed_time": "3:21:16", "remaining_time": "0:04:14", "throughput": 19935.42, "total_tokens": 240748544}
|
|
{"current_steps": 76500, "total_steps": 78105, "loss": 0.0972, "lr": 6.436916423609718e-09, "epoch": 4.897253696946418, "percentage": 97.95, "elapsed_time": "3:21:17", "remaining_time": "0:04:13", "throughput": 19935.57, "total_tokens": 240763392}
|
|
{"current_steps": 76505, "total_steps": 78105, "loss": 0.1498, "lr": 6.39691546415605e-09, "epoch": 4.897573778887395, "percentage": 97.95, "elapsed_time": "3:21:17", "remaining_time": "0:04:12", "throughput": 19935.83, "total_tokens": 240781120}
|
|
{"current_steps": 76510, "total_steps": 78105, "loss": 0.1136, "lr": 6.357039022251988e-09, "epoch": 4.897893860828372, "percentage": 97.96, "elapsed_time": "3:21:18", "remaining_time": "0:04:11", "throughput": 19936.0, "total_tokens": 240796480}
|
|
{"current_steps": 76515, "total_steps": 78105, "loss": 0.1309, "lr": 6.317287099888436e-09, "epoch": 4.898213942769349, "percentage": 97.96, "elapsed_time": "3:21:19", "remaining_time": "0:04:11", "throughput": 19936.15, "total_tokens": 240811328}
|
|
{"current_steps": 76520, "total_steps": 78105, "loss": 0.1471, "lr": 6.277659699050476e-09, "epoch": 4.898534024710326, "percentage": 97.97, "elapsed_time": "3:21:19", "remaining_time": "0:04:10", "throughput": 19936.33, "total_tokens": 240826944}
|
|
{"current_steps": 76525, "total_steps": 78105, "loss": 0.1394, "lr": 6.238156821716801e-09, "epoch": 4.898854106651303, "percentage": 97.98, "elapsed_time": "3:21:20", "remaining_time": "0:04:09", "throughput": 19936.49, "total_tokens": 240841856}
|
|
{"current_steps": 76530, "total_steps": 78105, "loss": 0.1103, "lr": 6.1987784698600005e-09, "epoch": 4.89917418859228, "percentage": 97.98, "elapsed_time": "3:21:21", "remaining_time": "0:04:08", "throughput": 19936.68, "total_tokens": 240857472}
|
|
{"current_steps": 76535, "total_steps": 78105, "loss": 0.1552, "lr": 6.159524645446558e-09, "epoch": 4.899494270533257, "percentage": 97.99, "elapsed_time": "3:21:21", "remaining_time": "0:04:07", "throughput": 19936.85, "total_tokens": 240872768}
|
|
{"current_steps": 76540, "total_steps": 78105, "loss": 0.1359, "lr": 6.12039535043657e-09, "epoch": 4.899814352474233, "percentage": 98.0, "elapsed_time": "3:21:22", "remaining_time": "0:04:07", "throughput": 19936.99, "total_tokens": 240887424}
|
|
{"current_steps": 76545, "total_steps": 78105, "loss": 0.1248, "lr": 6.081390586783753e-09, "epoch": 4.90013443441521, "percentage": 98.0, "elapsed_time": "3:21:23", "remaining_time": "0:04:06", "throughput": 19937.2, "total_tokens": 240903936}
|
|
{"current_steps": 76550, "total_steps": 78105, "loss": 0.1086, "lr": 6.042510356435993e-09, "epoch": 4.900454516356187, "percentage": 98.01, "elapsed_time": "3:21:23", "remaining_time": "0:04:05", "throughput": 19937.38, "total_tokens": 240919680}
|
|
{"current_steps": 76555, "total_steps": 78105, "loss": 0.0986, "lr": 6.003754661334793e-09, "epoch": 4.900774598297164, "percentage": 98.02, "elapsed_time": "3:21:24", "remaining_time": "0:04:04", "throughput": 19937.55, "total_tokens": 240934784}
|
|
{"current_steps": 76560, "total_steps": 78105, "loss": 0.1642, "lr": 5.965123503415271e-09, "epoch": 4.901094680238141, "percentage": 98.02, "elapsed_time": "3:21:25", "remaining_time": "0:04:03", "throughput": 19937.73, "total_tokens": 240950656}
|
|
{"current_steps": 76565, "total_steps": 78105, "loss": 0.1192, "lr": 5.9266168846064395e-09, "epoch": 4.901414762179118, "percentage": 98.03, "elapsed_time": "3:21:25", "remaining_time": "0:04:03", "throughput": 19937.9, "total_tokens": 240966016}
|
|
{"current_steps": 76570, "total_steps": 78105, "loss": 0.1047, "lr": 5.888234806831206e-09, "epoch": 4.901734844120095, "percentage": 98.03, "elapsed_time": "3:21:26", "remaining_time": "0:04:02", "throughput": 19938.09, "total_tokens": 240981888}
|
|
{"current_steps": 76575, "total_steps": 78105, "loss": 0.097, "lr": 5.849977272006369e-09, "epoch": 4.902054926061072, "percentage": 98.04, "elapsed_time": "3:21:27", "remaining_time": "0:04:01", "throughput": 19938.27, "total_tokens": 240997248}
|
|
{"current_steps": 76580, "total_steps": 78105, "loss": 0.1327, "lr": 5.811844282042067e-09, "epoch": 4.902375008002048, "percentage": 98.05, "elapsed_time": "3:21:27", "remaining_time": "0:04:00", "throughput": 19938.42, "total_tokens": 241011904}
|
|
{"current_steps": 76585, "total_steps": 78105, "loss": 0.1364, "lr": 5.773835838842334e-09, "epoch": 4.902695089943025, "percentage": 98.05, "elapsed_time": "3:21:28", "remaining_time": "0:03:59", "throughput": 19938.58, "total_tokens": 241026944}
|
|
{"current_steps": 76590, "total_steps": 78105, "loss": 0.1266, "lr": 5.735951944305373e-09, "epoch": 4.903015171884002, "percentage": 98.06, "elapsed_time": "3:21:29", "remaining_time": "0:03:59", "throughput": 19938.78, "total_tokens": 241043200}
|
|
{"current_steps": 76595, "total_steps": 78105, "loss": 0.0909, "lr": 5.698192600323005e-09, "epoch": 4.903335253824979, "percentage": 98.07, "elapsed_time": "3:21:30", "remaining_time": "0:03:58", "throughput": 19939.22, "total_tokens": 241071744}
|
|
{"current_steps": 76600, "total_steps": 78105, "loss": 0.1238, "lr": 5.660557808780387e-09, "epoch": 4.903655335765956, "percentage": 98.07, "elapsed_time": "3:21:31", "remaining_time": "0:03:57", "throughput": 19939.38, "total_tokens": 241087296}
|
|
{"current_steps": 76605, "total_steps": 78105, "loss": 0.0983, "lr": 5.62304757155685e-09, "epoch": 4.903975417706933, "percentage": 98.08, "elapsed_time": "3:21:31", "remaining_time": "0:03:56", "throughput": 19939.56, "total_tokens": 241103104}
|
|
{"current_steps": 76610, "total_steps": 78105, "loss": 0.1001, "lr": 5.585661890525895e-09, "epoch": 4.90429549964791, "percentage": 98.09, "elapsed_time": "3:21:32", "remaining_time": "0:03:55", "throughput": 19939.69, "total_tokens": 241117376}
|
|
{"current_steps": 76615, "total_steps": 78105, "loss": 0.1411, "lr": 5.548400767553808e-09, "epoch": 4.904615581588887, "percentage": 98.09, "elapsed_time": "3:21:32", "remaining_time": "0:03:55", "throughput": 19939.87, "total_tokens": 241132480}
|
|
{"current_steps": 76620, "total_steps": 78105, "loss": 0.1447, "lr": 5.511264204501321e-09, "epoch": 4.904935663529864, "percentage": 98.1, "elapsed_time": "3:21:33", "remaining_time": "0:03:54", "throughput": 19940.06, "total_tokens": 241148416}
|
|
{"current_steps": 76625, "total_steps": 78105, "loss": 0.1086, "lr": 5.47425220322334e-09, "epoch": 4.905255745470841, "percentage": 98.11, "elapsed_time": "3:21:34", "remaining_time": "0:03:53", "throughput": 19940.3, "total_tokens": 241165824}
|
|
{"current_steps": 76630, "total_steps": 78105, "loss": 0.1246, "lr": 5.437364765567555e-09, "epoch": 4.905575827411817, "percentage": 98.11, "elapsed_time": "3:21:35", "remaining_time": "0:03:52", "throughput": 19940.45, "total_tokens": 241180416}
|
|
{"current_steps": 76635, "total_steps": 78105, "loss": 0.1738, "lr": 5.400601893376101e-09, "epoch": 4.905895909352794, "percentage": 98.12, "elapsed_time": "3:21:35", "remaining_time": "0:03:52", "throughput": 19940.66, "total_tokens": 241197248}
|
|
{"current_steps": 76640, "total_steps": 78105, "loss": 0.1298, "lr": 5.363963588484456e-09, "epoch": 4.906215991293771, "percentage": 98.12, "elapsed_time": "3:21:36", "remaining_time": "0:03:51", "throughput": 19940.85, "total_tokens": 241213056}
|
|
{"current_steps": 76645, "total_steps": 78105, "loss": 0.1165, "lr": 5.327449852722821e-09, "epoch": 4.906536073234748, "percentage": 98.13, "elapsed_time": "3:21:37", "remaining_time": "0:03:50", "throughput": 19941.04, "total_tokens": 241228672}
|
|
{"current_steps": 76650, "total_steps": 78105, "loss": 0.1659, "lr": 5.291060687913629e-09, "epoch": 4.906856155175725, "percentage": 98.14, "elapsed_time": "3:21:37", "remaining_time": "0:03:49", "throughput": 19941.21, "total_tokens": 241243712}
|
|
{"current_steps": 76655, "total_steps": 78105, "loss": 0.1274, "lr": 5.254796095874592e-09, "epoch": 4.907176237116702, "percentage": 98.14, "elapsed_time": "3:21:38", "remaining_time": "0:03:48", "throughput": 19941.39, "total_tokens": 241259456}
|
|
{"current_steps": 76660, "total_steps": 78105, "loss": 0.2301, "lr": 5.2186560784164845e-09, "epoch": 4.907496319057679, "percentage": 98.15, "elapsed_time": "3:21:39", "remaining_time": "0:03:48", "throughput": 19941.59, "total_tokens": 241275264}
|
|
{"current_steps": 76665, "total_steps": 78105, "loss": 0.1328, "lr": 5.1826406373436945e-09, "epoch": 4.907816400998656, "percentage": 98.16, "elapsed_time": "3:21:39", "remaining_time": "0:03:47", "throughput": 19941.81, "total_tokens": 241291840}
|
|
{"current_steps": 76670, "total_steps": 78105, "loss": 0.2093, "lr": 5.146749774455062e-09, "epoch": 4.908136482939632, "percentage": 98.16, "elapsed_time": "3:21:40", "remaining_time": "0:03:46", "throughput": 19941.99, "total_tokens": 241307520}
|
|
{"current_steps": 76675, "total_steps": 78105, "loss": 0.1466, "lr": 5.11098349154221e-09, "epoch": 4.908456564880609, "percentage": 98.17, "elapsed_time": "3:21:41", "remaining_time": "0:03:45", "throughput": 19942.18, "total_tokens": 241323264}
|
|
{"current_steps": 76680, "total_steps": 78105, "loss": 0.1059, "lr": 5.075341790391486e-09, "epoch": 4.908776646821586, "percentage": 98.18, "elapsed_time": "3:21:41", "remaining_time": "0:03:44", "throughput": 19942.36, "total_tokens": 241338880}
|
|
{"current_steps": 76685, "total_steps": 78105, "loss": 0.141, "lr": 5.0398246727825785e-09, "epoch": 4.909096728762563, "percentage": 98.18, "elapsed_time": "3:21:42", "remaining_time": "0:03:44", "throughput": 19942.55, "total_tokens": 241355200}
|
|
{"current_steps": 76690, "total_steps": 78105, "loss": 0.1335, "lr": 5.004432140489346e-09, "epoch": 4.90941681070354, "percentage": 98.19, "elapsed_time": "3:21:43", "remaining_time": "0:03:43", "throughput": 19942.73, "total_tokens": 241370240}
|
|
{"current_steps": 76695, "total_steps": 78105, "loss": 0.1502, "lr": 4.969164195278431e-09, "epoch": 4.909736892644517, "percentage": 98.19, "elapsed_time": "3:21:43", "remaining_time": "0:03:42", "throughput": 19942.9, "total_tokens": 241385728}
|
|
{"current_steps": 76700, "total_steps": 78105, "loss": 0.1209, "lr": 4.934020838911757e-09, "epoch": 4.910056974585494, "percentage": 98.2, "elapsed_time": "3:21:44", "remaining_time": "0:03:41", "throughput": 19943.09, "total_tokens": 241401472}
|
|
{"current_steps": 76705, "total_steps": 78105, "loss": 0.1353, "lr": 4.8990020731434775e-09, "epoch": 4.910377056526471, "percentage": 98.21, "elapsed_time": "3:21:45", "remaining_time": "0:03:40", "throughput": 19943.29, "total_tokens": 241417792}
|
|
{"current_steps": 76710, "total_steps": 78105, "loss": 0.1508, "lr": 4.86410789972247e-09, "epoch": 4.910697138467448, "percentage": 98.21, "elapsed_time": "3:21:45", "remaining_time": "0:03:40", "throughput": 19943.56, "total_tokens": 241435840}
|
|
{"current_steps": 76715, "total_steps": 78105, "loss": 0.1295, "lr": 4.8293383203912305e-09, "epoch": 4.911017220408425, "percentage": 98.22, "elapsed_time": "3:21:46", "remaining_time": "0:03:39", "throughput": 19943.76, "total_tokens": 241451904}
|
|
{"current_steps": 76720, "total_steps": 78105, "loss": 0.1311, "lr": 4.794693336886147e-09, "epoch": 4.911337302349401, "percentage": 98.23, "elapsed_time": "3:21:47", "remaining_time": "0:03:38", "throughput": 19943.92, "total_tokens": 241467136}
|
|
{"current_steps": 76725, "total_steps": 78105, "loss": 0.1043, "lr": 4.760172950936947e-09, "epoch": 4.911657384290378, "percentage": 98.23, "elapsed_time": "3:21:47", "remaining_time": "0:03:37", "throughput": 19944.09, "total_tokens": 241482432}
|
|
{"current_steps": 76730, "total_steps": 78105, "loss": 0.189, "lr": 4.725777164267531e-09, "epoch": 4.911977466231355, "percentage": 98.24, "elapsed_time": "3:21:48", "remaining_time": "0:03:36", "throughput": 19944.26, "total_tokens": 241497728}
|
|
{"current_steps": 76735, "total_steps": 78105, "loss": 0.125, "lr": 4.691505978595135e-09, "epoch": 4.912297548172332, "percentage": 98.25, "elapsed_time": "3:21:49", "remaining_time": "0:03:36", "throughput": 19944.42, "total_tokens": 241513024}
|
|
{"current_steps": 76740, "total_steps": 78105, "loss": 0.1549, "lr": 4.657359395631722e-09, "epoch": 4.912617630113309, "percentage": 98.25, "elapsed_time": "3:21:49", "remaining_time": "0:03:35", "throughput": 19944.61, "total_tokens": 241528384}
|
|
{"current_steps": 76745, "total_steps": 78105, "loss": 0.1177, "lr": 4.623337417081764e-09, "epoch": 4.912937712054286, "percentage": 98.26, "elapsed_time": "3:21:50", "remaining_time": "0:03:34", "throughput": 19944.78, "total_tokens": 241543360}
|
|
{"current_steps": 76750, "total_steps": 78105, "loss": 0.1099, "lr": 4.589440044644456e-09, "epoch": 4.913257793995263, "percentage": 98.27, "elapsed_time": "3:21:51", "remaining_time": "0:03:33", "throughput": 19944.98, "total_tokens": 241559744}
|
|
{"current_steps": 76755, "total_steps": 78105, "loss": 0.1189, "lr": 4.555667280012332e-09, "epoch": 4.913577875936239, "percentage": 98.27, "elapsed_time": "3:21:52", "remaining_time": "0:03:33", "throughput": 19945.19, "total_tokens": 241576448}
|
|
{"current_steps": 76760, "total_steps": 78105, "loss": 0.1318, "lr": 4.522019124871824e-09, "epoch": 4.913897957877216, "percentage": 98.28, "elapsed_time": "3:21:52", "remaining_time": "0:03:32", "throughput": 19945.35, "total_tokens": 241591744}
|
|
{"current_steps": 76765, "total_steps": 78105, "loss": 0.1418, "lr": 4.488495580903529e-09, "epoch": 4.914218039818193, "percentage": 98.28, "elapsed_time": "3:21:53", "remaining_time": "0:03:31", "throughput": 19945.52, "total_tokens": 241606976}
|
|
{"current_steps": 76770, "total_steps": 78105, "loss": 0.1476, "lr": 4.455096649780832e-09, "epoch": 4.91453812175917, "percentage": 98.29, "elapsed_time": "3:21:54", "remaining_time": "0:03:30", "throughput": 19945.75, "total_tokens": 241624064}
|
|
{"current_steps": 76775, "total_steps": 78105, "loss": 0.1563, "lr": 4.421822333171844e-09, "epoch": 4.914858203700147, "percentage": 98.3, "elapsed_time": "3:21:54", "remaining_time": "0:03:29", "throughput": 19945.93, "total_tokens": 241639552}
|
|
{"current_steps": 76780, "total_steps": 78105, "loss": 0.1066, "lr": 4.388672632738289e-09, "epoch": 4.915178285641124, "percentage": 98.3, "elapsed_time": "3:21:55", "remaining_time": "0:03:29", "throughput": 19946.1, "total_tokens": 241654848}
|
|
{"current_steps": 76785, "total_steps": 78105, "loss": 0.1624, "lr": 4.355647550134956e-09, "epoch": 4.915498367582101, "percentage": 98.31, "elapsed_time": "3:21:56", "remaining_time": "0:03:28", "throughput": 19946.27, "total_tokens": 241670080}
|
|
{"current_steps": 76790, "total_steps": 78105, "loss": 0.1416, "lr": 4.322747087011358e-09, "epoch": 4.915818449523078, "percentage": 98.32, "elapsed_time": "3:21:56", "remaining_time": "0:03:27", "throughput": 19946.44, "total_tokens": 241685312}
|
|
{"current_steps": 76795, "total_steps": 78105, "loss": 0.1207, "lr": 4.2899712450103496e-09, "epoch": 4.916138531464055, "percentage": 98.32, "elapsed_time": "3:21:57", "remaining_time": "0:03:26", "throughput": 19946.59, "total_tokens": 241699968}
|
|
{"current_steps": 76800, "total_steps": 78105, "loss": 0.1647, "lr": 4.2573200257683966e-09, "epoch": 4.916458613405032, "percentage": 98.33, "elapsed_time": "3:21:58", "remaining_time": "0:03:25", "throughput": 19946.77, "total_tokens": 241715776}
|
|
{"current_steps": 76805, "total_steps": 78105, "loss": 0.1806, "lr": 4.224793430915863e-09, "epoch": 4.916778695346008, "percentage": 98.34, "elapsed_time": "3:21:58", "remaining_time": "0:03:25", "throughput": 19946.89, "total_tokens": 241729792}
|
|
{"current_steps": 76810, "total_steps": 78105, "loss": 0.1019, "lr": 4.192391462077283e-09, "epoch": 4.917098777286985, "percentage": 98.34, "elapsed_time": "3:21:59", "remaining_time": "0:03:24", "throughput": 19947.05, "total_tokens": 241744704}
|
|
{"current_steps": 76815, "total_steps": 78105, "loss": 0.125, "lr": 4.160114120870528e-09, "epoch": 4.917418859227962, "percentage": 98.35, "elapsed_time": "3:22:00", "remaining_time": "0:03:23", "throughput": 19947.23, "total_tokens": 241760512}
|
|
{"current_steps": 76820, "total_steps": 78105, "loss": 0.1156, "lr": 4.127961408907366e-09, "epoch": 4.917738941168939, "percentage": 98.35, "elapsed_time": "3:22:00", "remaining_time": "0:03:22", "throughput": 19947.43, "total_tokens": 241776512}
|
|
{"current_steps": 76825, "total_steps": 78105, "loss": 0.1726, "lr": 4.095933327793178e-09, "epoch": 4.918059023109916, "percentage": 98.36, "elapsed_time": "3:22:01", "remaining_time": "0:03:21", "throughput": 19947.58, "total_tokens": 241791360}
|
|
{"current_steps": 76830, "total_steps": 78105, "loss": 0.1392, "lr": 4.064029879127518e-09, "epoch": 4.918379105050893, "percentage": 98.37, "elapsed_time": "3:22:01", "remaining_time": "0:03:21", "throughput": 19947.74, "total_tokens": 241806272}
|
|
{"current_steps": 76835, "total_steps": 78105, "loss": 0.1218, "lr": 4.0322510645032786e-09, "epoch": 4.91869918699187, "percentage": 98.37, "elapsed_time": "3:22:02", "remaining_time": "0:03:20", "throughput": 19947.96, "total_tokens": 241823040}
|
|
{"current_steps": 76840, "total_steps": 78105, "loss": 0.098, "lr": 4.000596885507524e-09, "epoch": 4.919019268932847, "percentage": 98.38, "elapsed_time": "3:22:03", "remaining_time": "0:03:19", "throughput": 19948.12, "total_tokens": 241837824}
|
|
{"current_steps": 76845, "total_steps": 78105, "loss": 0.108, "lr": 3.969067343720379e-09, "epoch": 4.919339350873823, "percentage": 98.39, "elapsed_time": "3:22:04", "remaining_time": "0:03:18", "throughput": 19948.29, "total_tokens": 241853120}
|
|
{"current_steps": 76850, "total_steps": 78105, "loss": 0.0886, "lr": 3.9376624407169716e-09, "epoch": 4.9196594328148, "percentage": 98.39, "elapsed_time": "3:22:04", "remaining_time": "0:03:18", "throughput": 19948.48, "total_tokens": 241868928}
|
|
{"current_steps": 76855, "total_steps": 78105, "loss": 0.1296, "lr": 3.9063821780652156e-09, "epoch": 4.919979514755777, "percentage": 98.4, "elapsed_time": "3:22:05", "remaining_time": "0:03:17", "throughput": 19948.66, "total_tokens": 241884544}
|
|
{"current_steps": 76860, "total_steps": 78105, "loss": 0.1034, "lr": 3.875226557326916e-09, "epoch": 4.920299596696754, "percentage": 98.41, "elapsed_time": "3:22:06", "remaining_time": "0:03:16", "throughput": 19948.81, "total_tokens": 241899392}
|
|
{"current_steps": 76865, "total_steps": 78105, "loss": 0.1351, "lr": 3.8441955800580525e-09, "epoch": 4.920619678637731, "percentage": 98.41, "elapsed_time": "3:22:06", "remaining_time": "0:03:15", "throughput": 19948.99, "total_tokens": 241915072}
|
|
{"current_steps": 76870, "total_steps": 78105, "loss": 0.1709, "lr": 3.813289247807939e-09, "epoch": 4.920939760578708, "percentage": 98.42, "elapsed_time": "3:22:07", "remaining_time": "0:03:14", "throughput": 19949.2, "total_tokens": 241931840}
|
|
{"current_steps": 76875, "total_steps": 78105, "loss": 0.1152, "lr": 3.782507562120341e-09, "epoch": 4.921259842519685, "percentage": 98.43, "elapsed_time": "3:22:08", "remaining_time": "0:03:14", "throughput": 19949.41, "total_tokens": 241948224}
|
|
{"current_steps": 76880, "total_steps": 78105, "loss": 0.114, "lr": 3.751850524531531e-09, "epoch": 4.921579924460662, "percentage": 98.43, "elapsed_time": "3:22:08", "remaining_time": "0:03:13", "throughput": 19949.57, "total_tokens": 241963264}
|
|
{"current_steps": 76885, "total_steps": 78105, "loss": 0.1146, "lr": 3.72131813657306e-09, "epoch": 4.921900006401639, "percentage": 98.44, "elapsed_time": "3:22:09", "remaining_time": "0:03:12", "throughput": 19949.76, "total_tokens": 241979264}
|
|
{"current_steps": 76890, "total_steps": 78105, "loss": 0.1478, "lr": 3.690910399769265e-09, "epoch": 4.922220088342616, "percentage": 98.44, "elapsed_time": "3:22:10", "remaining_time": "0:03:11", "throughput": 19949.93, "total_tokens": 241994624}
|
|
{"current_steps": 76895, "total_steps": 78105, "loss": 0.1463, "lr": 3.660627315638654e-09, "epoch": 4.922540170283592, "percentage": 98.45, "elapsed_time": "3:22:10", "remaining_time": "0:03:10", "throughput": 19950.12, "total_tokens": 242010368}
|
|
{"current_steps": 76900, "total_steps": 78105, "loss": 0.1097, "lr": 3.63046888569335e-09, "epoch": 4.922860252224569, "percentage": 98.46, "elapsed_time": "3:22:11", "remaining_time": "0:03:10", "throughput": 19950.26, "total_tokens": 242024960}
|
|
{"current_steps": 76905, "total_steps": 78105, "loss": 0.1899, "lr": 3.600435111439371e-09, "epoch": 4.923180334165546, "percentage": 98.46, "elapsed_time": "3:22:12", "remaining_time": "0:03:09", "throughput": 19950.46, "total_tokens": 242041152}
|
|
{"current_steps": 76910, "total_steps": 78105, "loss": 0.1325, "lr": 3.57052599437635e-09, "epoch": 4.923500416106523, "percentage": 98.47, "elapsed_time": "3:22:12", "remaining_time": "0:03:08", "throughput": 19950.65, "total_tokens": 242057024}
|
|
{"current_steps": 76915, "total_steps": 78105, "loss": 0.1525, "lr": 3.540741535997816e-09, "epoch": 4.9238204980475, "percentage": 98.48, "elapsed_time": "3:22:13", "remaining_time": "0:03:07", "throughput": 19950.79, "total_tokens": 242071680}
|
|
{"current_steps": 76920, "total_steps": 78105, "loss": 0.1029, "lr": 3.5110817377909113e-09, "epoch": 4.924140579988477, "percentage": 98.48, "elapsed_time": "3:22:14", "remaining_time": "0:03:06", "throughput": 19950.95, "total_tokens": 242086400}
|
|
{"current_steps": 76925, "total_steps": 78105, "loss": 0.1437, "lr": 3.481546601237229e-09, "epoch": 4.924460661929454, "percentage": 98.49, "elapsed_time": "3:22:14", "remaining_time": "0:03:06", "throughput": 19951.18, "total_tokens": 242103360}
|
|
{"current_steps": 76930, "total_steps": 78105, "loss": 0.1526, "lr": 3.4521361278108677e-09, "epoch": 4.924780743870431, "percentage": 98.5, "elapsed_time": "3:22:15", "remaining_time": "0:03:05", "throughput": 19951.34, "total_tokens": 242118720}
|
|
{"current_steps": 76935, "total_steps": 78105, "loss": 0.1564, "lr": 3.4228503189809302e-09, "epoch": 4.925100825811407, "percentage": 98.5, "elapsed_time": "3:22:16", "remaining_time": "0:03:04", "throughput": 19951.6, "total_tokens": 242136320}
|
|
{"current_steps": 76940, "total_steps": 78105, "loss": 0.1275, "lr": 3.3936891762095803e-09, "epoch": 4.925420907752384, "percentage": 98.51, "elapsed_time": "3:22:16", "remaining_time": "0:03:03", "throughput": 19951.86, "total_tokens": 242154048}
|
|
{"current_steps": 76945, "total_steps": 78105, "loss": 0.1692, "lr": 3.3646527009531528e-09, "epoch": 4.925740989693361, "percentage": 98.51, "elapsed_time": "3:22:17", "remaining_time": "0:03:02", "throughput": 19952.07, "total_tokens": 242170560}
|
|
{"current_steps": 76950, "total_steps": 78105, "loss": 0.1088, "lr": 3.335740894661321e-09, "epoch": 4.926061071634338, "percentage": 98.52, "elapsed_time": "3:22:18", "remaining_time": "0:03:02", "throughput": 19952.23, "total_tokens": 242185472}
|
|
{"current_steps": 76955, "total_steps": 78105, "loss": 0.1553, "lr": 3.3069537587782086e-09, "epoch": 4.926381153575315, "percentage": 98.53, "elapsed_time": "3:22:18", "remaining_time": "0:03:01", "throughput": 19952.43, "total_tokens": 242201792}
|
|
{"current_steps": 76960, "total_steps": 78105, "loss": 0.125, "lr": 3.278291294740721e-09, "epoch": 4.926701235516292, "percentage": 98.53, "elapsed_time": "3:22:19", "remaining_time": "0:03:00", "throughput": 19952.56, "total_tokens": 242215872}
|
|
{"current_steps": 76965, "total_steps": 78105, "loss": 0.1315, "lr": 3.2497535039804905e-09, "epoch": 4.927021317457269, "percentage": 98.54, "elapsed_time": "3:22:20", "remaining_time": "0:02:59", "throughput": 19952.74, "total_tokens": 242231232}
|
|
{"current_steps": 76970, "total_steps": 78105, "loss": 0.0974, "lr": 3.221340387922489e-09, "epoch": 4.927341399398246, "percentage": 98.55, "elapsed_time": "3:22:20", "remaining_time": "0:02:59", "throughput": 19952.91, "total_tokens": 242246528}
|
|
{"current_steps": 76975, "total_steps": 78105, "loss": 0.1575, "lr": 3.1930519479855816e-09, "epoch": 4.927661481339223, "percentage": 98.55, "elapsed_time": "3:22:21", "remaining_time": "0:02:58", "throughput": 19953.08, "total_tokens": 242261824}
|
|
{"current_steps": 76980, "total_steps": 78105, "loss": 0.1637, "lr": 3.164888185582249e-09, "epoch": 4.9279815632802, "percentage": 98.56, "elapsed_time": "3:22:22", "remaining_time": "0:02:57", "throughput": 19953.31, "total_tokens": 242278592}
|
|
{"current_steps": 76985, "total_steps": 78105, "loss": 0.1099, "lr": 3.1368491021188663e-09, "epoch": 4.928301645221176, "percentage": 98.57, "elapsed_time": "3:22:22", "remaining_time": "0:02:56", "throughput": 19953.48, "total_tokens": 242294272}
|
|
{"current_steps": 76990, "total_steps": 78105, "loss": 0.1196, "lr": 3.108934698995425e-09, "epoch": 4.928621727162153, "percentage": 98.57, "elapsed_time": "3:22:23", "remaining_time": "0:02:55", "throughput": 19953.65, "total_tokens": 242309248}
|
|
{"current_steps": 76995, "total_steps": 78105, "loss": 0.1361, "lr": 3.0811449776060874e-09, "epoch": 4.92894180910313, "percentage": 98.58, "elapsed_time": "3:22:24", "remaining_time": "0:02:55", "throughput": 19953.81, "total_tokens": 242324608}
|
|
{"current_steps": 77000, "total_steps": 78105, "loss": 0.0993, "lr": 3.053479939338355e-09, "epoch": 4.929261891044107, "percentage": 98.59, "elapsed_time": "3:22:24", "remaining_time": "0:02:54", "throughput": 19954.01, "total_tokens": 242340288}
|
|
{"current_steps": 77005, "total_steps": 78105, "loss": 0.1388, "lr": 3.0259395855739005e-09, "epoch": 4.929581972985084, "percentage": 98.59, "elapsed_time": "3:22:25", "remaining_time": "0:02:53", "throughput": 19954.19, "total_tokens": 242356288}
|
|
{"current_steps": 77010, "total_steps": 78105, "loss": 0.1588, "lr": 2.998523917687457e-09, "epoch": 4.929902054926061, "percentage": 98.6, "elapsed_time": "3:22:26", "remaining_time": "0:02:52", "throughput": 19954.39, "total_tokens": 242372160}
|
|
{"current_steps": 77015, "total_steps": 78105, "loss": 0.1585, "lr": 2.971232937048485e-09, "epoch": 4.930222136867038, "percentage": 98.6, "elapsed_time": "3:22:26", "remaining_time": "0:02:51", "throughput": 19954.56, "total_tokens": 242387712}
|
|
{"current_steps": 77020, "total_steps": 78105, "loss": 0.128, "lr": 2.9440666450195053e-09, "epoch": 4.930542218808014, "percentage": 98.61, "elapsed_time": "3:22:27", "remaining_time": "0:02:51", "throughput": 19954.72, "total_tokens": 242402624}
|
|
{"current_steps": 77025, "total_steps": 78105, "loss": 0.1348, "lr": 2.9170250429572113e-09, "epoch": 4.930862300748991, "percentage": 98.62, "elapsed_time": "3:22:28", "remaining_time": "0:02:50", "throughput": 19954.96, "total_tokens": 242419776}
|
|
{"current_steps": 77030, "total_steps": 78105, "loss": 0.1029, "lr": 2.8901081322119105e-09, "epoch": 4.931182382689968, "percentage": 98.62, "elapsed_time": "3:22:29", "remaining_time": "0:02:49", "throughput": 19955.16, "total_tokens": 242435840}
|
|
{"current_steps": 77035, "total_steps": 78105, "loss": 0.1312, "lr": 2.8633159141275293e-09, "epoch": 4.931502464630945, "percentage": 98.63, "elapsed_time": "3:22:29", "remaining_time": "0:02:48", "throughput": 19955.33, "total_tokens": 242451520}
|
|
{"current_steps": 77040, "total_steps": 78105, "loss": 0.1471, "lr": 2.836648390042163e-09, "epoch": 4.931822546571922, "percentage": 98.64, "elapsed_time": "3:22:30", "remaining_time": "0:02:47", "throughput": 19955.54, "total_tokens": 242467840}
|
|
{"current_steps": 77045, "total_steps": 78105, "loss": 0.1564, "lr": 2.8101055612872465e-09, "epoch": 4.932142628512899, "percentage": 98.64, "elapsed_time": "3:22:31", "remaining_time": "0:02:47", "throughput": 19955.73, "total_tokens": 242483840}
|
|
{"current_steps": 77050, "total_steps": 78105, "loss": 0.1553, "lr": 2.783687429188109e-09, "epoch": 4.932462710453876, "percentage": 98.65, "elapsed_time": "3:22:31", "remaining_time": "0:02:46", "throughput": 19955.97, "total_tokens": 242501184}
|
|
{"current_steps": 77055, "total_steps": 78105, "loss": 0.1446, "lr": 2.7573939950642503e-09, "epoch": 4.932782792394853, "percentage": 98.66, "elapsed_time": "3:22:32", "remaining_time": "0:02:45", "throughput": 19956.15, "total_tokens": 242516800}
|
|
{"current_steps": 77060, "total_steps": 78105, "loss": 0.1238, "lr": 2.7312252602285093e-09, "epoch": 4.93310287433583, "percentage": 98.66, "elapsed_time": "3:22:33", "remaining_time": "0:02:44", "throughput": 19956.31, "total_tokens": 242532096}
|
|
{"current_steps": 77065, "total_steps": 78105, "loss": 0.1247, "lr": 2.705181225987619e-09, "epoch": 4.933422956276807, "percentage": 98.67, "elapsed_time": "3:22:33", "remaining_time": "0:02:44", "throughput": 19956.51, "total_tokens": 242548416}
|
|
{"current_steps": 77070, "total_steps": 78105, "loss": 0.1417, "lr": 2.679261893641927e-09, "epoch": 4.933743038217783, "percentage": 98.67, "elapsed_time": "3:22:34", "remaining_time": "0:02:43", "throughput": 19956.68, "total_tokens": 242563776}
|
|
{"current_steps": 77075, "total_steps": 78105, "loss": 0.1207, "lr": 2.6534672644859536e-09, "epoch": 4.93406312015876, "percentage": 98.68, "elapsed_time": "3:22:35", "remaining_time": "0:02:42", "throughput": 19956.84, "total_tokens": 242579008}
|
|
{"current_steps": 77080, "total_steps": 78105, "loss": 0.1284, "lr": 2.6277973398075586e-09, "epoch": 4.934383202099737, "percentage": 98.69, "elapsed_time": "3:22:35", "remaining_time": "0:02:41", "throughput": 19957.0, "total_tokens": 242593856}
|
|
{"current_steps": 77085, "total_steps": 78105, "loss": 0.1056, "lr": 2.602252120888493e-09, "epoch": 4.934703284040714, "percentage": 98.69, "elapsed_time": "3:22:36", "remaining_time": "0:02:40", "throughput": 19957.19, "total_tokens": 242609664}
|
|
{"current_steps": 77090, "total_steps": 78105, "loss": 0.1059, "lr": 2.5768316090046817e-09, "epoch": 4.935023365981691, "percentage": 98.7, "elapsed_time": "3:22:37", "remaining_time": "0:02:40", "throughput": 19957.36, "total_tokens": 242625152}
|
|
{"current_steps": 77095, "total_steps": 78105, "loss": 0.1323, "lr": 2.5515358054251095e-09, "epoch": 4.935343447922668, "percentage": 98.71, "elapsed_time": "3:22:37", "remaining_time": "0:02:39", "throughput": 19957.51, "total_tokens": 242639872}
|
|
{"current_steps": 77100, "total_steps": 78105, "loss": 0.097, "lr": 2.5263647114132096e-09, "epoch": 4.935663529863645, "percentage": 98.71, "elapsed_time": "3:22:38", "remaining_time": "0:02:38", "throughput": 19957.7, "total_tokens": 242655936}
|
|
{"current_steps": 77105, "total_steps": 78105, "loss": 0.1782, "lr": 2.501318328226032e-09, "epoch": 4.935983611804622, "percentage": 98.72, "elapsed_time": "3:22:39", "remaining_time": "0:02:37", "throughput": 19957.89, "total_tokens": 242671424}
|
|
{"current_steps": 77110, "total_steps": 78105, "loss": 0.1143, "lr": 2.4763966571136888e-09, "epoch": 4.936303693745598, "percentage": 98.73, "elapsed_time": "3:22:39", "remaining_time": "0:02:36", "throughput": 19958.06, "total_tokens": 242686784}
|
|
{"current_steps": 77115, "total_steps": 78105, "loss": 0.1169, "lr": 2.4515996993210167e-09, "epoch": 4.936623775686575, "percentage": 98.73, "elapsed_time": "3:22:40", "remaining_time": "0:02:36", "throughput": 19958.23, "total_tokens": 242702208}
|
|
{"current_steps": 77120, "total_steps": 78105, "loss": 0.1495, "lr": 2.4269274560861923e-09, "epoch": 4.936943857627552, "percentage": 98.74, "elapsed_time": "3:22:41", "remaining_time": "0:02:35", "throughput": 19958.39, "total_tokens": 242717056}
|
|
{"current_steps": 77125, "total_steps": 78105, "loss": 0.0898, "lr": 2.402379928641285e-09, "epoch": 4.937263939568529, "percentage": 98.75, "elapsed_time": "3:22:41", "remaining_time": "0:02:34", "throughput": 19958.58, "total_tokens": 242732992}
|
|
{"current_steps": 77130, "total_steps": 78105, "loss": 0.1133, "lr": 2.377957118211982e-09, "epoch": 4.937584021509506, "percentage": 98.75, "elapsed_time": "3:22:42", "remaining_time": "0:02:33", "throughput": 19958.75, "total_tokens": 242748288}
|
|
{"current_steps": 77135, "total_steps": 78105, "loss": 0.1387, "lr": 2.3536590260181402e-09, "epoch": 4.937904103450483, "percentage": 98.76, "elapsed_time": "3:22:43", "remaining_time": "0:02:32", "throughput": 19958.91, "total_tokens": 242763264}
|
|
{"current_steps": 77140, "total_steps": 78105, "loss": 0.13, "lr": 2.3294856532724008e-09, "epoch": 4.93822418539146, "percentage": 98.76, "elapsed_time": "3:22:43", "remaining_time": "0:02:32", "throughput": 19959.09, "total_tokens": 242778560}
|
|
{"current_steps": 77145, "total_steps": 78105, "loss": 0.1385, "lr": 2.305437001182409e-09, "epoch": 4.938544267332437, "percentage": 98.77, "elapsed_time": "3:22:44", "remaining_time": "0:02:31", "throughput": 19959.26, "total_tokens": 242793920}
|
|
{"current_steps": 77150, "total_steps": 78105, "loss": 0.1293, "lr": 2.281513070948871e-09, "epoch": 4.938864349273414, "percentage": 98.78, "elapsed_time": "3:22:45", "remaining_time": "0:02:30", "throughput": 19959.46, "total_tokens": 242810176}
|
|
{"current_steps": 77155, "total_steps": 78105, "loss": 0.0911, "lr": 2.2577138637663865e-09, "epoch": 4.939184431214391, "percentage": 98.78, "elapsed_time": "3:22:45", "remaining_time": "0:02:29", "throughput": 19959.67, "total_tokens": 242826432}
|
|
{"current_steps": 77160, "total_steps": 78105, "loss": 0.1056, "lr": 2.2340393808234496e-09, "epoch": 4.939504513155367, "percentage": 98.79, "elapsed_time": "3:22:46", "remaining_time": "0:02:29", "throughput": 19959.82, "total_tokens": 242841216}
|
|
{"current_steps": 77165, "total_steps": 78105, "loss": 0.1069, "lr": 2.2104896233021698e-09, "epoch": 4.939824595096344, "percentage": 98.8, "elapsed_time": "3:22:47", "remaining_time": "0:02:28", "throughput": 19959.96, "total_tokens": 242856000}
|
|
{"current_steps": 77170, "total_steps": 78105, "loss": 0.1748, "lr": 2.1870645923788292e-09, "epoch": 4.940144677037321, "percentage": 98.8, "elapsed_time": "3:22:47", "remaining_time": "0:02:27", "throughput": 19960.15, "total_tokens": 242871872}
|
|
{"current_steps": 77175, "total_steps": 78105, "loss": 0.1392, "lr": 2.163764289222492e-09, "epoch": 4.940464758978298, "percentage": 98.81, "elapsed_time": "3:22:48", "remaining_time": "0:02:26", "throughput": 19960.42, "total_tokens": 242890048}
|
|
{"current_steps": 77180, "total_steps": 78105, "loss": 0.1054, "lr": 2.140588714997227e-09, "epoch": 4.940784840919275, "percentage": 98.82, "elapsed_time": "3:22:49", "remaining_time": "0:02:25", "throughput": 19960.59, "total_tokens": 242905600}
|
|
{"current_steps": 77185, "total_steps": 78105, "loss": 0.2571, "lr": 2.117537870859887e-09, "epoch": 4.941104922860252, "percentage": 98.82, "elapsed_time": "3:22:49", "remaining_time": "0:02:25", "throughput": 19960.78, "total_tokens": 242921472}
|
|
{"current_steps": 77190, "total_steps": 78105, "loss": 0.1923, "lr": 2.0946117579617733e-09, "epoch": 4.941425004801229, "percentage": 98.83, "elapsed_time": "3:22:50", "remaining_time": "0:02:24", "throughput": 19960.97, "total_tokens": 242937088}
|
|
{"current_steps": 77195, "total_steps": 78105, "loss": 0.1124, "lr": 2.071810377447525e-09, "epoch": 4.941745086742206, "percentage": 98.83, "elapsed_time": "3:22:51", "remaining_time": "0:02:23", "throughput": 19961.19, "total_tokens": 242953728}
|
|
{"current_steps": 77200, "total_steps": 78105, "loss": 0.0859, "lr": 2.0491337304559546e-09, "epoch": 4.9420651686831825, "percentage": 98.84, "elapsed_time": "3:22:51", "remaining_time": "0:02:22", "throughput": 19961.4, "total_tokens": 242970048}
|
|
{"current_steps": 77205, "total_steps": 78105, "loss": 0.173, "lr": 2.0265818181194885e-09, "epoch": 4.9423852506241595, "percentage": 98.85, "elapsed_time": "3:22:52", "remaining_time": "0:02:21", "throughput": 19961.6, "total_tokens": 242986624}
|
|
{"current_steps": 77210, "total_steps": 78105, "loss": 0.1756, "lr": 2.004154641563616e-09, "epoch": 4.9427053325651364, "percentage": 98.85, "elapsed_time": "3:22:53", "remaining_time": "0:02:21", "throughput": 19961.76, "total_tokens": 243001536}
|
|
{"current_steps": 77215, "total_steps": 78105, "loss": 0.1873, "lr": 1.981852201909107e-09, "epoch": 4.943025414506113, "percentage": 98.86, "elapsed_time": "3:22:54", "remaining_time": "0:02:20", "throughput": 19961.99, "total_tokens": 243018432}
|
|
{"current_steps": 77220, "total_steps": 78105, "loss": 0.1458, "lr": 1.9596745002689597e-09, "epoch": 4.94334549644709, "percentage": 98.87, "elapsed_time": "3:22:54", "remaining_time": "0:02:19", "throughput": 19962.18, "total_tokens": 243033920}
|
|
{"current_steps": 77225, "total_steps": 78105, "loss": 0.1194, "lr": 1.9376215377509e-09, "epoch": 4.943665578388067, "percentage": 98.87, "elapsed_time": "3:22:55", "remaining_time": "0:02:18", "throughput": 19962.34, "total_tokens": 243048896}
|
|
{"current_steps": 77230, "total_steps": 78105, "loss": 0.1452, "lr": 1.9156933154559908e-09, "epoch": 4.943985660329044, "percentage": 98.88, "elapsed_time": "3:22:56", "remaining_time": "0:02:17", "throughput": 19962.51, "total_tokens": 243064576}
|
|
{"current_steps": 77235, "total_steps": 78105, "loss": 0.1342, "lr": 1.893889834479468e-09, "epoch": 4.944305742270021, "percentage": 98.89, "elapsed_time": "3:22:56", "remaining_time": "0:02:17", "throughput": 19962.69, "total_tokens": 243080320}
|
|
{"current_steps": 77240, "total_steps": 78105, "loss": 0.1674, "lr": 1.8722110959099037e-09, "epoch": 4.944625824210998, "percentage": 98.89, "elapsed_time": "3:22:57", "remaining_time": "0:02:16", "throughput": 19962.86, "total_tokens": 243095744}
|
|
{"current_steps": 77245, "total_steps": 78105, "loss": 0.1205, "lr": 1.850657100829767e-09, "epoch": 4.944945906151975, "percentage": 98.9, "elapsed_time": "3:22:58", "remaining_time": "0:02:15", "throughput": 19963.08, "total_tokens": 243112320}
|
|
{"current_steps": 77250, "total_steps": 78105, "loss": 0.1471, "lr": 1.8292278503156958e-09, "epoch": 4.9452659880929515, "percentage": 98.91, "elapsed_time": "3:22:58", "remaining_time": "0:02:14", "throughput": 19963.23, "total_tokens": 243127168}
|
|
{"current_steps": 77255, "total_steps": 78105, "loss": 0.1377, "lr": 1.8079233454371126e-09, "epoch": 4.9455860700339285, "percentage": 98.91, "elapsed_time": "3:22:59", "remaining_time": "0:02:14", "throughput": 19963.42, "total_tokens": 243142848}
|
|
{"current_steps": 77260, "total_steps": 78105, "loss": 0.0792, "lr": 1.7867435872584437e-09, "epoch": 4.9459061519749055, "percentage": 98.92, "elapsed_time": "3:23:00", "remaining_time": "0:02:13", "throughput": 19963.63, "total_tokens": 243159552}
|
|
{"current_steps": 77265, "total_steps": 78105, "loss": 0.1807, "lr": 1.7656885768368992e-09, "epoch": 4.9462262339158825, "percentage": 98.92, "elapsed_time": "3:23:00", "remaining_time": "0:02:12", "throughput": 19963.81, "total_tokens": 243175040}
|
|
{"current_steps": 77270, "total_steps": 78105, "loss": 0.1333, "lr": 1.7447583152244153e-09, "epoch": 4.946546315856859, "percentage": 98.93, "elapsed_time": "3:23:01", "remaining_time": "0:02:11", "throughput": 19963.99, "total_tokens": 243190912}
|
|
{"current_steps": 77275, "total_steps": 78105, "loss": 0.1126, "lr": 1.723952803465434e-09, "epoch": 4.946866397797836, "percentage": 98.94, "elapsed_time": "3:23:02", "remaining_time": "0:02:10", "throughput": 19964.15, "total_tokens": 243205952}
|
|
{"current_steps": 77280, "total_steps": 78105, "loss": 0.1722, "lr": 1.703272042599402e-09, "epoch": 4.947186479738813, "percentage": 98.94, "elapsed_time": "3:23:02", "remaining_time": "0:02:10", "throughput": 19964.31, "total_tokens": 243220928}
|
|
{"current_steps": 77285, "total_steps": 78105, "loss": 0.1612, "lr": 1.682716033658549e-09, "epoch": 4.9475065616797895, "percentage": 98.95, "elapsed_time": "3:23:03", "remaining_time": "0:02:09", "throughput": 19964.52, "total_tokens": 243237824}
|
|
{"current_steps": 77290, "total_steps": 78105, "loss": 0.1464, "lr": 1.6622847776698314e-09, "epoch": 4.9478266436207665, "percentage": 98.96, "elapsed_time": "3:23:04", "remaining_time": "0:02:08", "throughput": 19964.68, "total_tokens": 243252928}
|
|
{"current_steps": 77295, "total_steps": 78105, "loss": 0.1613, "lr": 1.641978275652989e-09, "epoch": 4.9481467255617435, "percentage": 98.96, "elapsed_time": "3:23:04", "remaining_time": "0:02:07", "throughput": 19964.91, "total_tokens": 243270144}
|
|
{"current_steps": 77300, "total_steps": 78105, "loss": 0.1337, "lr": 1.621796528622488e-09, "epoch": 4.9484668075027205, "percentage": 98.97, "elapsed_time": "3:23:05", "remaining_time": "0:02:06", "throughput": 19965.1, "total_tokens": 243286016}
|
|
{"current_steps": 77305, "total_steps": 78105, "loss": 0.1124, "lr": 1.6017395375858557e-09, "epoch": 4.9487868894436975, "percentage": 98.98, "elapsed_time": "3:23:06", "remaining_time": "0:02:06", "throughput": 19965.27, "total_tokens": 243301248}
|
|
{"current_steps": 77310, "total_steps": 78105, "loss": 0.1515, "lr": 1.5818073035447912e-09, "epoch": 4.9491069713846745, "percentage": 98.98, "elapsed_time": "3:23:06", "remaining_time": "0:02:05", "throughput": 19965.44, "total_tokens": 243316800}
|
|
{"current_steps": 77315, "total_steps": 78105, "loss": 0.1255, "lr": 1.5619998274943316e-09, "epoch": 4.9494270533256515, "percentage": 98.99, "elapsed_time": "3:23:07", "remaining_time": "0:02:04", "throughput": 19965.6, "total_tokens": 243332224}
|
|
{"current_steps": 77320, "total_steps": 78105, "loss": 0.1724, "lr": 1.5423171104236857e-09, "epoch": 4.9497471352666285, "percentage": 98.99, "elapsed_time": "3:23:08", "remaining_time": "0:02:03", "throughput": 19965.79, "total_tokens": 243348096}
|
|
{"current_steps": 77325, "total_steps": 78105, "loss": 0.1574, "lr": 1.522759153315956e-09, "epoch": 4.9500672172076055, "percentage": 99.0, "elapsed_time": "3:23:08", "remaining_time": "0:02:02", "throughput": 19965.97, "total_tokens": 243363392}
|
|
{"current_steps": 77330, "total_steps": 78105, "loss": 0.1445, "lr": 1.5033259571473058e-09, "epoch": 4.950387299148582, "percentage": 99.01, "elapsed_time": "3:23:09", "remaining_time": "0:02:02", "throughput": 19966.16, "total_tokens": 243379520}
|
|
{"current_steps": 77335, "total_steps": 78105, "loss": 0.1646, "lr": 1.484017522888348e-09, "epoch": 4.9507073810895585, "percentage": 99.01, "elapsed_time": "3:23:10", "remaining_time": "0:02:01", "throughput": 19966.32, "total_tokens": 243394368}
|
|
{"current_steps": 77340, "total_steps": 78105, "loss": 0.1214, "lr": 1.4648338515033112e-09, "epoch": 4.9510274630305355, "percentage": 99.02, "elapsed_time": "3:23:10", "remaining_time": "0:02:00", "throughput": 19966.47, "total_tokens": 243409216}
|
|
{"current_steps": 77345, "total_steps": 78105, "loss": 0.1642, "lr": 1.4457749439503178e-09, "epoch": 4.9513475449715125, "percentage": 99.03, "elapsed_time": "3:23:11", "remaining_time": "0:01:59", "throughput": 19966.62, "total_tokens": 243424256}
|
|
{"current_steps": 77350, "total_steps": 78105, "loss": 0.1421, "lr": 1.4268408011805512e-09, "epoch": 4.9516676269124895, "percentage": 99.03, "elapsed_time": "3:23:12", "remaining_time": "0:01:59", "throughput": 19966.79, "total_tokens": 243439936}
|
|
{"current_steps": 77355, "total_steps": 78105, "loss": 0.1381, "lr": 1.4080314241399218e-09, "epoch": 4.9519877088534665, "percentage": 99.04, "elapsed_time": "3:23:12", "remaining_time": "0:01:58", "throughput": 19966.95, "total_tokens": 243454656}
|
|
{"current_steps": 77360, "total_steps": 78105, "loss": 0.1524, "lr": 1.3893468137674005e-09, "epoch": 4.9523077907944435, "percentage": 99.05, "elapsed_time": "3:23:13", "remaining_time": "0:01:57", "throughput": 19967.09, "total_tokens": 243469632}
|
|
{"current_steps": 77365, "total_steps": 78105, "loss": 0.1276, "lr": 1.3707869709961297e-09, "epoch": 4.9526278727354205, "percentage": 99.05, "elapsed_time": "3:23:14", "remaining_time": "0:01:56", "throughput": 19967.27, "total_tokens": 243485056}
|
|
{"current_steps": 77370, "total_steps": 78105, "loss": 0.152, "lr": 1.3523518967528682e-09, "epoch": 4.9529479546763975, "percentage": 99.06, "elapsed_time": "3:23:14", "remaining_time": "0:01:55", "throughput": 19967.44, "total_tokens": 243500480}
|
|
{"current_steps": 77375, "total_steps": 78105, "loss": 0.1355, "lr": 1.3340415919579908e-09, "epoch": 4.953268036617374, "percentage": 99.07, "elapsed_time": "3:23:15", "remaining_time": "0:01:55", "throughput": 19967.62, "total_tokens": 243515968}
|
|
{"current_steps": 77380, "total_steps": 78105, "loss": 0.1437, "lr": 1.3158560575263212e-09, "epoch": 4.953588118558351, "percentage": 99.07, "elapsed_time": "3:23:16", "remaining_time": "0:01:54", "throughput": 19967.79, "total_tokens": 243531136}
|
|
{"current_steps": 77385, "total_steps": 78105, "loss": 0.1236, "lr": 1.297795294365467e-09, "epoch": 4.953908200499328, "percentage": 99.08, "elapsed_time": "3:23:16", "remaining_time": "0:01:53", "throughput": 19967.95, "total_tokens": 243546176}
|
|
{"current_steps": 77390, "total_steps": 78105, "loss": 0.157, "lr": 1.2798593033774843e-09, "epoch": 4.9542282824403046, "percentage": 99.08, "elapsed_time": "3:23:17", "remaining_time": "0:01:52", "throughput": 19968.11, "total_tokens": 243561536}
|
|
{"current_steps": 77395, "total_steps": 78105, "loss": 0.0718, "lr": 1.2620480854580453e-09, "epoch": 4.9545483643812815, "percentage": 99.09, "elapsed_time": "3:23:18", "remaining_time": "0:01:51", "throughput": 19968.26, "total_tokens": 243576320}
|
|
{"current_steps": 77400, "total_steps": 78105, "loss": 0.0955, "lr": 1.2443616414964387e-09, "epoch": 4.9548684463222585, "percentage": 99.1, "elapsed_time": "3:23:18", "remaining_time": "0:01:51", "throughput": 19968.43, "total_tokens": 243591488}
|
|
{"current_steps": 77405, "total_steps": 78105, "loss": 0.1424, "lr": 1.2267999723758472e-09, "epoch": 4.9551885282632355, "percentage": 99.1, "elapsed_time": "3:23:19", "remaining_time": "0:01:50", "throughput": 19968.66, "total_tokens": 243608832}
|
|
{"current_steps": 77410, "total_steps": 78105, "loss": 0.175, "lr": 1.2093630789730692e-09, "epoch": 4.9555086102042125, "percentage": 99.11, "elapsed_time": "3:23:20", "remaining_time": "0:01:49", "throughput": 19968.86, "total_tokens": 243625536}
|
|
{"current_steps": 77415, "total_steps": 78105, "loss": 0.1116, "lr": 1.1920509621590748e-09, "epoch": 4.9558286921451895, "percentage": 99.12, "elapsed_time": "3:23:20", "remaining_time": "0:01:48", "throughput": 19968.98, "total_tokens": 243639808}
|
|
{"current_steps": 77420, "total_steps": 78105, "loss": 0.1386, "lr": 1.17486362279845e-09, "epoch": 4.9561487740861665, "percentage": 99.12, "elapsed_time": "3:23:21", "remaining_time": "0:01:47", "throughput": 19969.18, "total_tokens": 243655424}
|
|
{"current_steps": 77425, "total_steps": 78105, "loss": 0.1339, "lr": 1.1578010617488422e-09, "epoch": 4.956468856027143, "percentage": 99.13, "elapsed_time": "3:23:22", "remaining_time": "0:01:47", "throughput": 19969.34, "total_tokens": 243670848}
|
|
{"current_steps": 77430, "total_steps": 78105, "loss": 0.0594, "lr": 1.1408632798629027e-09, "epoch": 4.95678893796812, "percentage": 99.14, "elapsed_time": "3:23:22", "remaining_time": "0:01:46", "throughput": 19969.47, "total_tokens": 243685120}
|
|
{"current_steps": 77435, "total_steps": 78105, "loss": 0.1308, "lr": 1.1240502779863438e-09, "epoch": 4.957109019909097, "percentage": 99.14, "elapsed_time": "3:23:23", "remaining_time": "0:01:45", "throughput": 19969.62, "total_tokens": 243699776}
|
|
{"current_steps": 77440, "total_steps": 78105, "loss": 0.124, "lr": 1.1073620569582167e-09, "epoch": 4.957429101850074, "percentage": 99.15, "elapsed_time": "3:23:24", "remaining_time": "0:01:44", "throughput": 19969.81, "total_tokens": 243715904}
|
|
{"current_steps": 77445, "total_steps": 78105, "loss": 0.1289, "lr": 1.0907986176122988e-09, "epoch": 4.957749183791051, "percentage": 99.15, "elapsed_time": "3:23:24", "remaining_time": "0:01:44", "throughput": 19969.99, "total_tokens": 243731264}
|
|
{"current_steps": 77450, "total_steps": 78105, "loss": 0.127, "lr": 1.0743599607754285e-09, "epoch": 4.9580692657320276, "percentage": 99.16, "elapsed_time": "3:23:25", "remaining_time": "0:01:43", "throughput": 19970.17, "total_tokens": 243747392}
|
|
{"current_steps": 77455, "total_steps": 78105, "loss": 0.1257, "lr": 1.0580460872688937e-09, "epoch": 4.9583893476730045, "percentage": 99.17, "elapsed_time": "3:23:26", "remaining_time": "0:01:42", "throughput": 19970.36, "total_tokens": 243763456}
|
|
{"current_steps": 77460, "total_steps": 78105, "loss": 0.1227, "lr": 1.041856997906765e-09, "epoch": 4.9587094296139815, "percentage": 99.17, "elapsed_time": "3:23:26", "remaining_time": "0:01:41", "throughput": 19970.57, "total_tokens": 243779904}
|
|
{"current_steps": 77465, "total_steps": 78105, "loss": 0.1338, "lr": 1.0257926934978402e-09, "epoch": 4.959029511554958, "percentage": 99.18, "elapsed_time": "3:23:27", "remaining_time": "0:01:40", "throughput": 19970.74, "total_tokens": 243795648}
|
|
{"current_steps": 77470, "total_steps": 78105, "loss": 0.0907, "lr": 1.0098531748439778e-09, "epoch": 4.959349593495935, "percentage": 99.19, "elapsed_time": "3:23:28", "remaining_time": "0:01:40", "throughput": 19970.89, "total_tokens": 243810624}
|
|
{"current_steps": 77475, "total_steps": 78105, "loss": 0.1494, "lr": 9.94038442741485e-10, "epoch": 4.959669675436912, "percentage": 99.19, "elapsed_time": "3:23:28", "remaining_time": "0:01:39", "throughput": 19971.04, "total_tokens": 243825408}
|
|
{"current_steps": 77480, "total_steps": 78105, "loss": 0.1262, "lr": 9.783484979797309e-10, "epoch": 4.959989757377889, "percentage": 99.2, "elapsed_time": "3:23:29", "remaining_time": "0:01:38", "throughput": 19971.22, "total_tokens": 243841472}
|
|
{"current_steps": 77485, "total_steps": 78105, "loss": 0.0931, "lr": 9.627833413425324e-10, "epoch": 4.960309839318866, "percentage": 99.21, "elapsed_time": "3:23:30", "remaining_time": "0:01:37", "throughput": 19971.37, "total_tokens": 243856448}
|
|
{"current_steps": 77490, "total_steps": 78105, "loss": 0.1191, "lr": 9.473429736067686e-10, "epoch": 4.960629921259843, "percentage": 99.21, "elapsed_time": "3:23:30", "remaining_time": "0:01:36", "throughput": 19971.55, "total_tokens": 243872128}
|
|
{"current_steps": 77495, "total_steps": 78105, "loss": 0.1775, "lr": 9.32027395543489e-10, "epoch": 4.96095000320082, "percentage": 99.22, "elapsed_time": "3:23:31", "remaining_time": "0:01:36", "throughput": 19971.79, "total_tokens": 243889600}
|
|
{"current_steps": 77500, "total_steps": 78105, "loss": 0.1342, "lr": 9.168366079176372e-10, "epoch": 4.961270085141797, "percentage": 99.23, "elapsed_time": "3:23:32", "remaining_time": "0:01:35", "throughput": 19971.97, "total_tokens": 243905536}
|
|
{"current_steps": 77505, "total_steps": 78105, "loss": 0.1372, "lr": 9.017706114877734e-10, "epoch": 4.961590167082774, "percentage": 99.23, "elapsed_time": "3:23:33", "remaining_time": "0:01:34", "throughput": 19972.11, "total_tokens": 243920000}
|
|
{"current_steps": 77510, "total_steps": 78105, "loss": 0.1475, "lr": 8.86829407006351e-10, "epoch": 4.9619102490237506, "percentage": 99.24, "elapsed_time": "3:23:33", "remaining_time": "0:01:33", "throughput": 19972.34, "total_tokens": 243936960}
|
|
{"current_steps": 77515, "total_steps": 78105, "loss": 0.1515, "lr": 8.720129952188849e-10, "epoch": 4.962230330964727, "percentage": 99.24, "elapsed_time": "3:23:34", "remaining_time": "0:01:32", "throughput": 19972.52, "total_tokens": 243952832}
|
|
{"current_steps": 77520, "total_steps": 78105, "loss": 0.1125, "lr": 8.573213768658939e-10, "epoch": 4.962550412905704, "percentage": 99.25, "elapsed_time": "3:23:35", "remaining_time": "0:01:32", "throughput": 19972.7, "total_tokens": 243968896}
|
|
{"current_steps": 77525, "total_steps": 78105, "loss": 0.1148, "lr": 8.427545526806802e-10, "epoch": 4.962870494846681, "percentage": 99.26, "elapsed_time": "3:23:35", "remaining_time": "0:01:31", "throughput": 19972.84, "total_tokens": 243983488}
|
|
{"current_steps": 77530, "total_steps": 78105, "loss": 0.1502, "lr": 8.283125233904399e-10, "epoch": 4.963190576787658, "percentage": 99.26, "elapsed_time": "3:23:36", "remaining_time": "0:01:30", "throughput": 19973.04, "total_tokens": 243999296}
|
|
{"current_steps": 77535, "total_steps": 78105, "loss": 0.118, "lr": 8.139952897168179e-10, "epoch": 4.963510658728635, "percentage": 99.27, "elapsed_time": "3:23:37", "remaining_time": "0:01:29", "throughput": 19973.21, "total_tokens": 244014528}
|
|
{"current_steps": 77540, "total_steps": 78105, "loss": 0.1087, "lr": 7.998028523745205e-10, "epoch": 4.963830740669612, "percentage": 99.28, "elapsed_time": "3:23:37", "remaining_time": "0:01:29", "throughput": 19973.39, "total_tokens": 244029952}
|
|
{"current_steps": 77545, "total_steps": 78105, "loss": 0.1206, "lr": 7.857352120718698e-10, "epoch": 4.964150822610589, "percentage": 99.28, "elapsed_time": "3:23:38", "remaining_time": "0:01:28", "throughput": 19973.58, "total_tokens": 244045824}
|
|
{"current_steps": 77550, "total_steps": 78105, "loss": 0.1345, "lr": 7.717923695119145e-10, "epoch": 4.964470904551566, "percentage": 99.29, "elapsed_time": "3:23:39", "remaining_time": "0:01:27", "throughput": 19973.74, "total_tokens": 244060928}
|
|
{"current_steps": 77555, "total_steps": 78105, "loss": 0.1289, "lr": 7.579743253907645e-10, "epoch": 4.964790986492542, "percentage": 99.3, "elapsed_time": "3:23:39", "remaining_time": "0:01:26", "throughput": 19973.9, "total_tokens": 244075776}
|
|
{"current_steps": 77560, "total_steps": 78105, "loss": 0.181, "lr": 7.442810803981459e-10, "epoch": 4.965111068433519, "percentage": 99.3, "elapsed_time": "3:23:40", "remaining_time": "0:01:25", "throughput": 19974.13, "total_tokens": 244092928}
|
|
{"current_steps": 77565, "total_steps": 78105, "loss": 0.1588, "lr": 7.307126352179561e-10, "epoch": 4.965431150374496, "percentage": 99.31, "elapsed_time": "3:23:41", "remaining_time": "0:01:25", "throughput": 19974.29, "total_tokens": 244108288}
|
|
{"current_steps": 77570, "total_steps": 78105, "loss": 0.1545, "lr": 7.172689905277086e-10, "epoch": 4.965751232315473, "percentage": 99.32, "elapsed_time": "3:23:41", "remaining_time": "0:01:24", "throughput": 19974.44, "total_tokens": 244122944}
|
|
{"current_steps": 77575, "total_steps": 78105, "loss": 0.1665, "lr": 7.039501469988108e-10, "epoch": 4.96607131425645, "percentage": 99.32, "elapsed_time": "3:23:42", "remaining_time": "0:01:23", "throughput": 19974.63, "total_tokens": 244139008}
|
|
{"current_steps": 77580, "total_steps": 78105, "loss": 0.127, "lr": 6.907561052962863e-10, "epoch": 4.966391396197427, "percentage": 99.33, "elapsed_time": "3:23:43", "remaining_time": "0:01:22", "throughput": 19974.86, "total_tokens": 244156096}
|
|
{"current_steps": 77585, "total_steps": 78105, "loss": 0.1055, "lr": 6.776868660790525e-10, "epoch": 4.966711478138404, "percentage": 99.33, "elapsed_time": "3:23:43", "remaining_time": "0:01:21", "throughput": 19975.08, "total_tokens": 244173376}
|
|
{"current_steps": 77590, "total_steps": 78105, "loss": 0.123, "lr": 6.64742429999643e-10, "epoch": 4.967031560079381, "percentage": 99.34, "elapsed_time": "3:23:44", "remaining_time": "0:01:21", "throughput": 19975.23, "total_tokens": 244188352}
|
|
{"current_steps": 77595, "total_steps": 78105, "loss": 0.1075, "lr": 6.519227977044851e-10, "epoch": 4.967351642020358, "percentage": 99.35, "elapsed_time": "3:23:45", "remaining_time": "0:01:20", "throughput": 19975.45, "total_tokens": 244205120}
|
|
{"current_steps": 77600, "total_steps": 78105, "loss": 0.1182, "lr": 6.392279698333447e-10, "epoch": 4.967671723961334, "percentage": 99.35, "elapsed_time": "3:23:45", "remaining_time": "0:01:19", "throughput": 19975.62, "total_tokens": 244220672}
|
|
{"current_steps": 77605, "total_steps": 78105, "loss": 0.167, "lr": 6.266579470207146e-10, "epoch": 4.967991805902311, "percentage": 99.36, "elapsed_time": "3:23:46", "remaining_time": "0:01:18", "throughput": 19975.8, "total_tokens": 244236032}
|
|
{"current_steps": 77610, "total_steps": 78105, "loss": 0.1858, "lr": 6.14212729894148e-10, "epoch": 4.968311887843288, "percentage": 99.37, "elapsed_time": "3:23:47", "remaining_time": "0:01:17", "throughput": 19976.01, "total_tokens": 244252800}
|
|
{"current_steps": 77615, "total_steps": 78105, "loss": 0.1277, "lr": 6.018923190748149e-10, "epoch": 4.968631969784265, "percentage": 99.37, "elapsed_time": "3:23:47", "remaining_time": "0:01:17", "throughput": 19976.2, "total_tokens": 244268544}
|
|
{"current_steps": 77620, "total_steps": 78105, "loss": 0.121, "lr": 5.896967151780564e-10, "epoch": 4.968952051725242, "percentage": 99.38, "elapsed_time": "3:23:48", "remaining_time": "0:01:16", "throughput": 19976.35, "total_tokens": 244283264}
|
|
{"current_steps": 77625, "total_steps": 78105, "loss": 0.1355, "lr": 5.776259188131073e-10, "epoch": 4.969272133666219, "percentage": 99.39, "elapsed_time": "3:23:49", "remaining_time": "0:01:15", "throughput": 19976.55, "total_tokens": 244299712}
|
|
{"current_steps": 77630, "total_steps": 78105, "loss": 0.1311, "lr": 5.656799305822636e-10, "epoch": 4.969592215607196, "percentage": 99.39, "elapsed_time": "3:23:49", "remaining_time": "0:01:14", "throughput": 19976.71, "total_tokens": 244314816}
|
|
{"current_steps": 77635, "total_steps": 78105, "loss": 0.1564, "lr": 5.538587510822702e-10, "epoch": 4.969912297548173, "percentage": 99.4, "elapsed_time": "3:23:50", "remaining_time": "0:01:14", "throughput": 19976.89, "total_tokens": 244330496}
|
|
{"current_steps": 77640, "total_steps": 78105, "loss": 0.1397, "lr": 5.421623809034882e-10, "epoch": 4.970232379489149, "percentage": 99.4, "elapsed_time": "3:23:51", "remaining_time": "0:01:13", "throughput": 19977.04, "total_tokens": 244345728}
|
|
{"current_steps": 77645, "total_steps": 78105, "loss": 0.0867, "lr": 5.305908206298949e-10, "epoch": 4.970552461430126, "percentage": 99.41, "elapsed_time": "3:23:52", "remaining_time": "0:01:12", "throughput": 19977.22, "total_tokens": 244361728}
|
|
{"current_steps": 77650, "total_steps": 78105, "loss": 0.1088, "lr": 5.191440708393613e-10, "epoch": 4.970872543371103, "percentage": 99.42, "elapsed_time": "3:23:52", "remaining_time": "0:01:11", "throughput": 19977.41, "total_tokens": 244377728}
|
|
{"current_steps": 77655, "total_steps": 78105, "loss": 0.1322, "lr": 5.078221321036525e-10, "epoch": 4.97119262531208, "percentage": 99.42, "elapsed_time": "3:23:53", "remaining_time": "0:01:10", "throughput": 19977.55, "total_tokens": 244392640}
|
|
{"current_steps": 77660, "total_steps": 78105, "loss": 0.1037, "lr": 4.966250049875942e-10, "epoch": 4.971512707253057, "percentage": 99.43, "elapsed_time": "3:23:54", "remaining_time": "0:01:10", "throughput": 19977.71, "total_tokens": 244407616}
|
|
{"current_steps": 77665, "total_steps": 78105, "loss": 0.0899, "lr": 4.855526900510166e-10, "epoch": 4.971832789194034, "percentage": 99.44, "elapsed_time": "3:23:54", "remaining_time": "0:01:09", "throughput": 19977.94, "total_tokens": 244425216}
|
|
{"current_steps": 77670, "total_steps": 78105, "loss": 0.1292, "lr": 4.746051878462554e-10, "epoch": 4.972152871135011, "percentage": 99.44, "elapsed_time": "3:23:55", "remaining_time": "0:01:08", "throughput": 19978.19, "total_tokens": 244443136}
|
|
{"current_steps": 77675, "total_steps": 78105, "loss": 0.1404, "lr": 4.6378249892009565e-10, "epoch": 4.972472953075988, "percentage": 99.45, "elapsed_time": "3:23:56", "remaining_time": "0:01:07", "throughput": 19978.39, "total_tokens": 244459648}
|
|
{"current_steps": 77680, "total_steps": 78105, "loss": 0.1061, "lr": 4.5308462381293826e-10, "epoch": 4.972793035016965, "percentage": 99.46, "elapsed_time": "3:23:56", "remaining_time": "0:01:06", "throughput": 19978.58, "total_tokens": 244475840}
|
|
{"current_steps": 77685, "total_steps": 78105, "loss": 0.1305, "lr": 4.425115630593557e-10, "epoch": 4.973113116957942, "percentage": 99.46, "elapsed_time": "3:23:57", "remaining_time": "0:01:06", "throughput": 19978.79, "total_tokens": 244492224}
|
|
{"current_steps": 77690, "total_steps": 78105, "loss": 0.1381, "lr": 4.320633171867039e-10, "epoch": 4.973433198898918, "percentage": 99.47, "elapsed_time": "3:23:58", "remaining_time": "0:01:05", "throughput": 19978.99, "total_tokens": 244508608}
|
|
{"current_steps": 77695, "total_steps": 78105, "loss": 0.1032, "lr": 4.217398867173428e-10, "epoch": 4.973753280839895, "percentage": 99.48, "elapsed_time": "3:23:58", "remaining_time": "0:01:04", "throughput": 19979.14, "total_tokens": 244523328}
|
|
{"current_steps": 77700, "total_steps": 78105, "loss": 0.1129, "lr": 4.1154127216613826e-10, "epoch": 4.974073362780872, "percentage": 99.48, "elapsed_time": "3:23:59", "remaining_time": "0:01:03", "throughput": 19979.37, "total_tokens": 244540352}
|
|
{"current_steps": 77705, "total_steps": 78105, "loss": 0.1084, "lr": 4.014674740429603e-10, "epoch": 4.974393444721849, "percentage": 99.49, "elapsed_time": "3:24:00", "remaining_time": "0:01:03", "throughput": 19979.54, "total_tokens": 244555840}
|
|
{"current_steps": 77710, "total_steps": 78105, "loss": 0.1037, "lr": 3.915184928504623e-10, "epoch": 4.974713526662826, "percentage": 99.49, "elapsed_time": "3:24:00", "remaining_time": "0:01:02", "throughput": 19979.7, "total_tokens": 244570752}
|
|
{"current_steps": 77715, "total_steps": 78105, "loss": 0.132, "lr": 3.816943290854691e-10, "epoch": 4.975033608603803, "percentage": 99.5, "elapsed_time": "3:24:01", "remaining_time": "0:01:01", "throughput": 19979.84, "total_tokens": 244585472}
|
|
{"current_steps": 77720, "total_steps": 78105, "loss": 0.1101, "lr": 3.719949832386993e-10, "epoch": 4.97535369054478, "percentage": 99.51, "elapsed_time": "3:24:02", "remaining_time": "0:01:00", "throughput": 19980.01, "total_tokens": 244600640}
|
|
{"current_steps": 77725, "total_steps": 78105, "loss": 0.0917, "lr": 3.624204557944877e-10, "epoch": 4.975673772485757, "percentage": 99.51, "elapsed_time": "3:24:02", "remaining_time": "0:00:59", "throughput": 19980.19, "total_tokens": 244616128}
|
|
{"current_steps": 77730, "total_steps": 78105, "loss": 0.1886, "lr": 3.5297074723078527e-10, "epoch": 4.975993854426733, "percentage": 99.52, "elapsed_time": "3:24:03", "remaining_time": "0:00:59", "throughput": 19980.38, "total_tokens": 244632128}
|
|
{"current_steps": 77735, "total_steps": 78105, "loss": 0.1892, "lr": 3.4364585801943685e-10, "epoch": 4.97631393636771, "percentage": 99.53, "elapsed_time": "3:24:04", "remaining_time": "0:00:58", "throughput": 19980.56, "total_tokens": 244647808}
|
|
{"current_steps": 77740, "total_steps": 78105, "loss": 0.1363, "lr": 3.344457886264585e-10, "epoch": 4.976634018308687, "percentage": 99.53, "elapsed_time": "3:24:04", "remaining_time": "0:00:57", "throughput": 19980.75, "total_tokens": 244663808}
|
|
{"current_steps": 77745, "total_steps": 78105, "loss": 0.1687, "lr": 3.253705395106499e-10, "epoch": 4.976954100249664, "percentage": 99.54, "elapsed_time": "3:24:05", "remaining_time": "0:00:56", "throughput": 19980.94, "total_tokens": 244679936}
|
|
{"current_steps": 77750, "total_steps": 78105, "loss": 0.1167, "lr": 3.1642011112581474e-10, "epoch": 4.977274182190641, "percentage": 99.55, "elapsed_time": "3:24:06", "remaining_time": "0:00:55", "throughput": 19981.12, "total_tokens": 244695680}
|
|
{"current_steps": 77755, "total_steps": 78105, "loss": 0.1144, "lr": 3.075945039182626e-10, "epoch": 4.977594264131618, "percentage": 99.55, "elapsed_time": "3:24:06", "remaining_time": "0:00:55", "throughput": 19981.29, "total_tokens": 244710784}
|
|
{"current_steps": 77760, "total_steps": 78105, "loss": 0.1137, "lr": 2.9889371832930725e-10, "epoch": 4.977914346072595, "percentage": 99.56, "elapsed_time": "3:24:07", "remaining_time": "0:00:54", "throughput": 19981.51, "total_tokens": 244727744}
|
|
{"current_steps": 77765, "total_steps": 78105, "loss": 0.1229, "lr": 2.903177547930458e-10, "epoch": 4.978234428013572, "percentage": 99.56, "elapsed_time": "3:24:08", "remaining_time": "0:00:53", "throughput": 19981.69, "total_tokens": 244743360}
|
|
{"current_steps": 77770, "total_steps": 78105, "loss": 0.0964, "lr": 2.8186661373774683e-10, "epoch": 4.978554509954549, "percentage": 99.57, "elapsed_time": "3:24:09", "remaining_time": "0:00:52", "throughput": 19981.89, "total_tokens": 244759232}
|
|
{"current_steps": 77775, "total_steps": 78105, "loss": 0.1706, "lr": 2.735402955855726e-10, "epoch": 4.978874591895526, "percentage": 99.58, "elapsed_time": "3:24:09", "remaining_time": "0:00:51", "throughput": 19982.04, "total_tokens": 244774016}
|
|
{"current_steps": 77780, "total_steps": 78105, "loss": 0.1908, "lr": 2.6533880075202414e-10, "epoch": 4.979194673836502, "percentage": 99.58, "elapsed_time": "3:24:10", "remaining_time": "0:00:51", "throughput": 19982.22, "total_tokens": 244789440}
|
|
{"current_steps": 77785, "total_steps": 78105, "loss": 0.1037, "lr": 2.5726212964677364e-10, "epoch": 4.979514755777479, "percentage": 99.59, "elapsed_time": "3:24:11", "remaining_time": "0:00:50", "throughput": 19982.36, "total_tokens": 244804096}
|
|
{"current_steps": 77790, "total_steps": 78105, "loss": 0.1278, "lr": 2.493102826733873e-10, "epoch": 4.979834837718456, "percentage": 99.6, "elapsed_time": "3:24:11", "remaining_time": "0:00:49", "throughput": 19982.54, "total_tokens": 244819520}
|
|
{"current_steps": 77795, "total_steps": 78105, "loss": 0.1482, "lr": 2.414832602287698e-10, "epoch": 4.980154919659433, "percentage": 99.6, "elapsed_time": "3:24:12", "remaining_time": "0:00:48", "throughput": 19982.72, "total_tokens": 244835712}
|
|
{"current_steps": 77800, "total_steps": 78105, "loss": 0.1263, "lr": 2.3378106270344204e-10, "epoch": 4.98047500160041, "percentage": 99.61, "elapsed_time": "3:24:13", "remaining_time": "0:00:48", "throughput": 19982.91, "total_tokens": 244851520}
|
|
{"current_steps": 77805, "total_steps": 78105, "loss": 0.1208, "lr": 2.262036904823739e-10, "epoch": 4.980795083541387, "percentage": 99.62, "elapsed_time": "3:24:13", "remaining_time": "0:00:47", "throughput": 19983.1, "total_tokens": 244867712}
|
|
{"current_steps": 77810, "total_steps": 78105, "loss": 0.1306, "lr": 2.1875114394387387e-10, "epoch": 4.981115165482364, "percentage": 99.62, "elapsed_time": "3:24:14", "remaining_time": "0:00:46", "throughput": 19983.26, "total_tokens": 244883136}
|
|
{"current_steps": 77815, "total_steps": 78105, "loss": 0.1069, "lr": 2.114234234601442e-10, "epoch": 4.981435247423341, "percentage": 99.63, "elapsed_time": "3:24:15", "remaining_time": "0:00:45", "throughput": 19983.42, "total_tokens": 244898112}
|
|
{"current_steps": 77820, "total_steps": 78105, "loss": 0.1617, "lr": 2.0422052939700343e-10, "epoch": 4.981755329364317, "percentage": 99.64, "elapsed_time": "3:24:15", "remaining_time": "0:00:44", "throughput": 19983.59, "total_tokens": 244913600}
|
|
{"current_steps": 77825, "total_steps": 78105, "loss": 0.1076, "lr": 1.9714246211388622e-10, "epoch": 4.982075411305294, "percentage": 99.64, "elapsed_time": "3:24:16", "remaining_time": "0:00:44", "throughput": 19983.75, "total_tokens": 244929088}
|
|
{"current_steps": 77830, "total_steps": 78105, "loss": 0.1483, "lr": 1.9018922196467615e-10, "epoch": 4.982395493246271, "percentage": 99.65, "elapsed_time": "3:24:17", "remaining_time": "0:00:43", "throughput": 19983.94, "total_tokens": 244944832}
|
|
{"current_steps": 77835, "total_steps": 78105, "loss": 0.1109, "lr": 1.8336080929631793e-10, "epoch": 4.982715575187248, "percentage": 99.65, "elapsed_time": "3:24:17", "remaining_time": "0:00:42", "throughput": 19984.14, "total_tokens": 244960960}
|
|
{"current_steps": 77840, "total_steps": 78105, "loss": 0.1747, "lr": 1.7665722444992761e-10, "epoch": 4.983035657128225, "percentage": 99.66, "elapsed_time": "3:24:18", "remaining_time": "0:00:41", "throughput": 19984.31, "total_tokens": 244977408}
|
|
{"current_steps": 77845, "total_steps": 78105, "loss": 0.1474, "lr": 1.700784677602374e-10, "epoch": 4.983355739069202, "percentage": 99.67, "elapsed_time": "3:24:19", "remaining_time": "0:00:40", "throughput": 19984.49, "total_tokens": 244993664}
|
|
{"current_steps": 77850, "total_steps": 78105, "loss": 0.0894, "lr": 1.6362453955559575e-10, "epoch": 4.983675821010179, "percentage": 99.67, "elapsed_time": "3:24:19", "remaining_time": "0:00:40", "throughput": 19984.67, "total_tokens": 245009152}
|
|
{"current_steps": 77855, "total_steps": 78105, "loss": 0.1345, "lr": 1.5729544015824494e-10, "epoch": 4.983995902951156, "percentage": 99.68, "elapsed_time": "3:24:20", "remaining_time": "0:00:39", "throughput": 19984.87, "total_tokens": 245025216}
|
|
{"current_steps": 77860, "total_steps": 78105, "loss": 0.1322, "lr": 1.5109116988432092e-10, "epoch": 4.984315984892133, "percentage": 99.69, "elapsed_time": "3:24:21", "remaining_time": "0:00:38", "throughput": 19985.05, "total_tokens": 245040896}
|
|
{"current_steps": 77865, "total_steps": 78105, "loss": 0.1358, "lr": 1.4501172904385353e-10, "epoch": 4.984636066833109, "percentage": 99.69, "elapsed_time": "3:24:21", "remaining_time": "0:00:37", "throughput": 19985.2, "total_tokens": 245055808}
|
|
{"current_steps": 77870, "total_steps": 78105, "loss": 0.1047, "lr": 1.3905711794021114e-10, "epoch": 4.984956148774086, "percentage": 99.7, "elapsed_time": "3:24:22", "remaining_time": "0:00:37", "throughput": 19985.4, "total_tokens": 245072448}
|
|
{"current_steps": 77875, "total_steps": 78105, "loss": 0.1337, "lr": 1.3322733687065604e-10, "epoch": 4.985276230715063, "percentage": 99.71, "elapsed_time": "3:24:23", "remaining_time": "0:00:36", "throughput": 19985.6, "total_tokens": 245088320}
|
|
{"current_steps": 77880, "total_steps": 78105, "loss": 0.1103, "lr": 1.2752238612662172e-10, "epoch": 4.98559631265604, "percentage": 99.71, "elapsed_time": "3:24:23", "remaining_time": "0:00:35", "throughput": 19985.79, "total_tokens": 245104128}
|
|
{"current_steps": 77885, "total_steps": 78105, "loss": 0.1379, "lr": 1.2194226599260283e-10, "epoch": 4.985916394597017, "percentage": 99.72, "elapsed_time": "3:24:24", "remaining_time": "0:00:34", "throughput": 19986.0, "total_tokens": 245120512}
|
|
{"current_steps": 77890, "total_steps": 78105, "loss": 0.1084, "lr": 1.164869767472654e-10, "epoch": 4.986236476537994, "percentage": 99.72, "elapsed_time": "3:24:25", "remaining_time": "0:00:33", "throughput": 19986.2, "total_tokens": 245136896}
|
|
{"current_steps": 77895, "total_steps": 78105, "loss": 0.1533, "lr": 1.1115651866344667e-10, "epoch": 4.986556558478971, "percentage": 99.73, "elapsed_time": "3:24:26", "remaining_time": "0:00:33", "throughput": 19986.45, "total_tokens": 245154432}
|
|
{"current_steps": 77900, "total_steps": 78105, "loss": 0.154, "lr": 1.0595089200676756e-10, "epoch": 4.986876640419948, "percentage": 99.74, "elapsed_time": "3:24:26", "remaining_time": "0:00:32", "throughput": 19986.64, "total_tokens": 245170624}
|
|
{"current_steps": 77905, "total_steps": 78105, "loss": 0.1123, "lr": 1.0087009703757533e-10, "epoch": 4.987196722360924, "percentage": 99.74, "elapsed_time": "3:24:27", "remaining_time": "0:00:31", "throughput": 19986.81, "total_tokens": 245186112}
|
|
{"current_steps": 77910, "total_steps": 78105, "loss": 0.0976, "lr": 9.59141340092784e-11, "epoch": 4.987516804301901, "percentage": 99.75, "elapsed_time": "3:24:28", "remaining_time": "0:00:30", "throughput": 19986.99, "total_tokens": 245201472}
|
|
{"current_steps": 77915, "total_steps": 78105, "loss": 0.1485, "lr": 9.108300316973407e-11, "epoch": 4.987836886242878, "percentage": 99.76, "elapsed_time": "3:24:28", "remaining_time": "0:00:29", "throughput": 19987.16, "total_tokens": 245216960}
|
|
{"current_steps": 77920, "total_steps": 78105, "loss": 0.1192, "lr": 8.637670475986071e-11, "epoch": 4.988156968183855, "percentage": 99.76, "elapsed_time": "3:24:29", "remaining_time": "0:00:29", "throughput": 19987.36, "total_tokens": 245233280}
|
|
{"current_steps": 77925, "total_steps": 78105, "loss": 0.1237, "lr": 8.179523901474806e-11, "epoch": 4.988477050124832, "percentage": 99.77, "elapsed_time": "3:24:30", "remaining_time": "0:00:28", "throughput": 19987.54, "total_tokens": 245248896}
|
|
{"current_steps": 77930, "total_steps": 78105, "loss": 0.1457, "lr": 7.733860616310208e-11, "epoch": 4.988797132065809, "percentage": 99.78, "elapsed_time": "3:24:30", "remaining_time": "0:00:27", "throughput": 19987.7, "total_tokens": 245263744}
|
|
{"current_steps": 77935, "total_steps": 78105, "loss": 0.0994, "lr": 7.300680642752245e-11, "epoch": 4.989117214006786, "percentage": 99.78, "elapsed_time": "3:24:31", "remaining_time": "0:00:26", "throughput": 19987.85, "total_tokens": 245278848}
|
|
{"current_steps": 77940, "total_steps": 78105, "loss": 0.1573, "lr": 6.879984002450269e-11, "epoch": 4.989437295947763, "percentage": 99.79, "elapsed_time": "3:24:32", "remaining_time": "0:00:25", "throughput": 19988.0, "total_tokens": 245293632}
|
|
{"current_steps": 77945, "total_steps": 78105, "loss": 0.1218, "lr": 6.471770716387493e-11, "epoch": 4.98975737788874, "percentage": 99.8, "elapsed_time": "3:24:32", "remaining_time": "0:00:25", "throughput": 19988.2, "total_tokens": 245309952}
|
|
{"current_steps": 77950, "total_steps": 78105, "loss": 0.1241, "lr": 6.076040804964267e-11, "epoch": 4.990077459829717, "percentage": 99.8, "elapsed_time": "3:24:33", "remaining_time": "0:00:24", "throughput": 19988.37, "total_tokens": 245325888}
|
|
{"current_steps": 77955, "total_steps": 78105, "loss": 0.1366, "lr": 5.6927942879148046e-11, "epoch": 4.990397541770693, "percentage": 99.81, "elapsed_time": "3:24:34", "remaining_time": "0:00:23", "throughput": 19988.54, "total_tokens": 245341312}
|
|
{"current_steps": 77960, "total_steps": 78105, "loss": 0.1117, "lr": 5.322031184390453e-11, "epoch": 4.99071762371167, "percentage": 99.81, "elapsed_time": "3:24:34", "remaining_time": "0:00:22", "throughput": 19988.74, "total_tokens": 245357760}
|
|
{"current_steps": 77965, "total_steps": 78105, "loss": 0.1299, "lr": 4.963751512931936e-11, "epoch": 4.991037705652647, "percentage": 99.82, "elapsed_time": "3:24:35", "remaining_time": "0:00:22", "throughput": 19988.87, "total_tokens": 245371904}
|
|
{"current_steps": 77970, "total_steps": 78105, "loss": 0.1135, "lr": 4.617955291386089e-11, "epoch": 4.991357787593624, "percentage": 99.83, "elapsed_time": "3:24:36", "remaining_time": "0:00:21", "throughput": 19989.04, "total_tokens": 245387712}
|
|
{"current_steps": 77975, "total_steps": 78105, "loss": 0.1689, "lr": 4.2846425370723924e-11, "epoch": 4.991677869534601, "percentage": 99.83, "elapsed_time": "3:24:36", "remaining_time": "0:00:20", "throughput": 19989.2, "total_tokens": 245402624}
|
|
{"current_steps": 77980, "total_steps": 78105, "loss": 0.1612, "lr": 3.9638132665886784e-11, "epoch": 4.991997951475578, "percentage": 99.84, "elapsed_time": "3:24:37", "remaining_time": "0:00:19", "throughput": 19989.39, "total_tokens": 245418624}
|
|
{"current_steps": 77985, "total_steps": 78105, "loss": 0.0896, "lr": 3.655467495949916e-11, "epoch": 4.992318033416555, "percentage": 99.85, "elapsed_time": "3:24:38", "remaining_time": "0:00:18", "throughput": 19989.56, "total_tokens": 245433984}
|
|
{"current_steps": 77990, "total_steps": 78105, "loss": 0.129, "lr": 3.359605240588204e-11, "epoch": 4.992638115357532, "percentage": 99.85, "elapsed_time": "3:24:38", "remaining_time": "0:00:18", "throughput": 19989.73, "total_tokens": 245449472}
|
|
{"current_steps": 77995, "total_steps": 78105, "loss": 0.099, "lr": 3.076226515269509e-11, "epoch": 4.992958197298508, "percentage": 99.86, "elapsed_time": "3:24:39", "remaining_time": "0:00:17", "throughput": 19989.88, "total_tokens": 245464512}
|
|
{"current_steps": 78000, "total_steps": 78105, "loss": 0.1504, "lr": 2.8053313341214196e-11, "epoch": 4.993278279239485, "percentage": 99.87, "elapsed_time": "3:24:40", "remaining_time": "0:00:16", "throughput": 19990.08, "total_tokens": 245480768}
|
|
{"current_steps": 78005, "total_steps": 78105, "loss": 0.149, "lr": 2.5469197107164113e-11, "epoch": 4.993598361180462, "percentage": 99.87, "elapsed_time": "3:24:40", "remaining_time": "0:00:15", "throughput": 19990.26, "total_tokens": 245496640}
|
|
{"current_steps": 78010, "total_steps": 78105, "loss": 0.0999, "lr": 2.300991657905316e-11, "epoch": 4.993918443121439, "percentage": 99.88, "elapsed_time": "3:24:41", "remaining_time": "0:00:14", "throughput": 19990.43, "total_tokens": 245512320}
|
|
{"current_steps": 78015, "total_steps": 78105, "loss": 0.1716, "lr": 2.067547187983854e-11, "epoch": 4.994238525062416, "percentage": 99.88, "elapsed_time": "3:24:42", "remaining_time": "0:00:14", "throughput": 19990.58, "total_tokens": 245527168}
|
|
{"current_steps": 78020, "total_steps": 78105, "loss": 0.1145, "lr": 1.846586312637122e-11, "epoch": 4.994558607003393, "percentage": 99.89, "elapsed_time": "3:24:42", "remaining_time": "0:00:13", "throughput": 19990.75, "total_tokens": 245542592}
|
|
{"current_steps": 78025, "total_steps": 78105, "loss": 0.1178, "lr": 1.638109042884084e-11, "epoch": 4.99487868894437, "percentage": 99.9, "elapsed_time": "3:24:43", "remaining_time": "0:00:12", "throughput": 19990.92, "total_tokens": 245558208}
|
|
{"current_steps": 78030, "total_steps": 78105, "loss": 0.0969, "lr": 1.4421153891053252e-11, "epoch": 4.995198770885347, "percentage": 99.9, "elapsed_time": "3:24:44", "remaining_time": "0:00:11", "throughput": 19991.14, "total_tokens": 245574976}
|
|
{"current_steps": 78035, "total_steps": 78105, "loss": 0.1036, "lr": 1.2586053611263193e-11, "epoch": 4.995518852826324, "percentage": 99.91, "elapsed_time": "3:24:44", "remaining_time": "0:00:11", "throughput": 19991.29, "total_tokens": 245589888}
|
|
{"current_steps": 78040, "total_steps": 78105, "loss": 0.1257, "lr": 1.0875789681064063e-11, "epoch": 4.995838934767301, "percentage": 99.92, "elapsed_time": "3:24:45", "remaining_time": "0:00:10", "throughput": 19991.46, "total_tokens": 245605376}
|
|
{"current_steps": 78045, "total_steps": 78105, "loss": 0.1021, "lr": 9.290362185387924e-12, "epoch": 4.996159016708277, "percentage": 99.92, "elapsed_time": "3:24:46", "remaining_time": "0:00:09", "throughput": 19991.63, "total_tokens": 245621056}
|
|
{"current_steps": 78050, "total_steps": 78105, "loss": 0.149, "lr": 7.829771204170833e-12, "epoch": 4.996479098649254, "percentage": 99.93, "elapsed_time": "3:24:46", "remaining_time": "0:00:08", "throughput": 19991.81, "total_tokens": 245636928}
|
|
{"current_steps": 78055, "total_steps": 78105, "loss": 0.1508, "lr": 6.494016809854842e-12, "epoch": 4.996799180590231, "percentage": 99.94, "elapsed_time": "3:24:47", "remaining_time": "0:00:07", "throughput": 19991.96, "total_tokens": 245651968}
|
|
{"current_steps": 78060, "total_steps": 78105, "loss": 0.1208, "lr": 5.283099069053332e-12, "epoch": 4.997119262531208, "percentage": 99.94, "elapsed_time": "3:24:48", "remaining_time": "0:00:07", "throughput": 19992.11, "total_tokens": 245666752}
|
|
{"current_steps": 78065, "total_steps": 78105, "loss": 0.1275, "lr": 4.1970180425510155e-12, "epoch": 4.997439344472185, "percentage": 99.95, "elapsed_time": "3:24:48", "remaining_time": "0:00:06", "throughput": 19992.33, "total_tokens": 245683776}
|
|
{"current_steps": 78070, "total_steps": 78105, "loss": 0.1582, "lr": 3.2357737844712635e-12, "epoch": 4.997759426413162, "percentage": 99.96, "elapsed_time": "3:24:49", "remaining_time": "0:00:05", "throughput": 19992.5, "total_tokens": 245699008}
|
|
{"current_steps": 78075, "total_steps": 78105, "loss": 0.1184, "lr": 2.399366342831222e-12, "epoch": 4.998079508354139, "percentage": 99.96, "elapsed_time": "3:24:50", "remaining_time": "0:00:04", "throughput": 19992.69, "total_tokens": 245715008}
|
|
{"current_steps": 78080, "total_steps": 78105, "loss": 0.1467, "lr": 1.6877957592642547e-12, "epoch": 4.998399590295116, "percentage": 99.97, "elapsed_time": "3:24:50", "remaining_time": "0:00:03", "throughput": 19992.86, "total_tokens": 245730240}
|
|
{"current_steps": 78085, "total_steps": 78105, "loss": 0.1206, "lr": 1.101062069575054e-12, "epoch": 4.998719672236092, "percentage": 99.97, "elapsed_time": "3:24:51", "remaining_time": "0:00:03", "throughput": 19993.01, "total_tokens": 245744896}
|
|
{"current_steps": 78090, "total_steps": 78105, "loss": 0.1843, "lr": 6.391653026294187e-13, "epoch": 4.999039754177069, "percentage": 99.98, "elapsed_time": "3:24:52", "remaining_time": "0:00:02", "throughput": 19993.22, "total_tokens": 245761728}
|
|
{"current_steps": 78095, "total_steps": 78105, "loss": 0.1793, "lr": 3.0210548174203214e-13, "epoch": 4.999359836118046, "percentage": 99.99, "elapsed_time": "3:24:52", "remaining_time": "0:00:01", "throughput": 19993.4, "total_tokens": 245777472}
|
|
{"current_steps": 78100, "total_steps": 78105, "loss": 0.1155, "lr": 8.988262384379553e-14, "epoch": 4.999679918059023, "percentage": 99.99, "elapsed_time": "3:24:53", "remaining_time": "0:00:00", "throughput": 19993.54, "total_tokens": 245792448}
|
|
{"current_steps": 78105, "total_steps": 78105, "loss": 0.0874, "lr": 2.4967394818276035e-15, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "3:24:54", "remaining_time": "0:00:00", "throughput": 19993.63, "total_tokens": 245808128}
|
|
{"current_steps": 78105, "total_steps": 78105, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "3:25:44", "remaining_time": "0:00:00", "throughput": 19912.92, "total_tokens": 245808128}
|