7598 lines
1.8 MiB
7598 lines
1.8 MiB
{"current_steps": 5, "total_steps": 37885, "loss": 1.4356, "lr": 2.111375032990235e-09, "epoch": 0.0006598917777484492, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "3:34:02", "throughput": 1321.39, "total_tokens": 2240}
|
|
{"current_steps": 10, "total_steps": 37885, "loss": 1.5327, "lr": 4.7505938242280285e-09, "epoch": 0.0013197835554968984, "percentage": 0.03, "elapsed_time": "0:00:02", "remaining_time": "2:09:06", "throughput": 2284.35, "total_tokens": 4672}
|
|
{"current_steps": 15, "total_steps": 37885, "loss": 1.4513, "lr": 7.389812615465822e-09, "epoch": 0.0019796753332453477, "percentage": 0.04, "elapsed_time": "0:00:02", "remaining_time": "1:39:59", "throughput": 2962.75, "total_tokens": 7040}
|
|
{"current_steps": 20, "total_steps": 37885, "loss": 1.4726, "lr": 1.0029031406703616e-08, "epoch": 0.002639567110993797, "percentage": 0.05, "elapsed_time": "0:00:02", "remaining_time": "1:25:21", "throughput": 3548.75, "total_tokens": 9600}
|
|
{"current_steps": 25, "total_steps": 37885, "loss": 1.4022, "lr": 1.2668250197941409e-08, "epoch": 0.0032994588887422464, "percentage": 0.07, "elapsed_time": "0:00:03", "remaining_time": "1:16:31", "throughput": 4010.46, "total_tokens": 12160}
|
|
{"current_steps": 30, "total_steps": 37885, "loss": 1.5128, "lr": 1.5307468989179204e-08, "epoch": 0.0039593506664906955, "percentage": 0.08, "elapsed_time": "0:00:03", "remaining_time": "1:10:39", "throughput": 4324.5, "total_tokens": 14528}
|
|
{"current_steps": 35, "total_steps": 37885, "loss": 1.3482, "lr": 1.7946687780416997e-08, "epoch": 0.004619242444239145, "percentage": 0.09, "elapsed_time": "0:00:03", "remaining_time": "1:06:27", "throughput": 4547.47, "total_tokens": 16768}
|
|
{"current_steps": 40, "total_steps": 37885, "loss": 1.4398, "lr": 2.058590657165479e-08, "epoch": 0.005279134221987594, "percentage": 0.11, "elapsed_time": "0:00:04", "remaining_time": "1:03:21", "throughput": 4794.28, "total_tokens": 19264}
|
|
{"current_steps": 45, "total_steps": 37885, "loss": 1.4043, "lr": 2.3225125362892583e-08, "epoch": 0.005939025999736044, "percentage": 0.12, "elapsed_time": "0:00:04", "remaining_time": "1:01:04", "throughput": 4964.35, "total_tokens": 21632}
|
|
{"current_steps": 50, "total_steps": 37885, "loss": 1.3473, "lr": 2.5864344154130376e-08, "epoch": 0.006598917777484493, "percentage": 0.13, "elapsed_time": "0:00:04", "remaining_time": "0:59:03", "throughput": 5124.89, "total_tokens": 24000}
|
|
{"current_steps": 55, "total_steps": 37885, "loss": 1.427, "lr": 2.850356294536817e-08, "epoch": 0.007258809555232942, "percentage": 0.15, "elapsed_time": "0:00:05", "remaining_time": "0:57:28", "throughput": 5284.83, "total_tokens": 26496}
|
|
{"current_steps": 60, "total_steps": 37885, "loss": 1.4604, "lr": 3.1142781736605966e-08, "epoch": 0.007918701332981391, "percentage": 0.16, "elapsed_time": "0:00:05", "remaining_time": "0:56:11", "throughput": 5445.53, "total_tokens": 29120}
|
|
{"current_steps": 65, "total_steps": 37885, "loss": 1.2511, "lr": 3.378200052784376e-08, "epoch": 0.008578593110729841, "percentage": 0.17, "elapsed_time": "0:00:05", "remaining_time": "0:55:05", "throughput": 5588.02, "total_tokens": 31744}
|
|
{"current_steps": 70, "total_steps": 37885, "loss": 1.2233, "lr": 3.6421219319081546e-08, "epoch": 0.00923848488847829, "percentage": 0.18, "elapsed_time": "0:00:06", "remaining_time": "0:54:06", "throughput": 5686.91, "total_tokens": 34176}
|
|
{"current_steps": 75, "total_steps": 37885, "loss": 1.2987, "lr": 3.9060438110319346e-08, "epoch": 0.009898376666226739, "percentage": 0.2, "elapsed_time": "0:00:06", "remaining_time": "0:53:18", "throughput": 5810.7, "total_tokens": 36864}
|
|
{"current_steps": 80, "total_steps": 37885, "loss": 1.1757, "lr": 4.169965690155713e-08, "epoch": 0.010558268443975187, "percentage": 0.21, "elapsed_time": "0:00:06", "remaining_time": "0:52:34", "throughput": 5906.38, "total_tokens": 39424}
|
|
{"current_steps": 85, "total_steps": 37885, "loss": 1.1551, "lr": 4.433887569279493e-08, "epoch": 0.011218160221723637, "percentage": 0.22, "elapsed_time": "0:00:07", "remaining_time": "0:51:57", "throughput": 6007.46, "total_tokens": 42112}
|
|
{"current_steps": 90, "total_steps": 37885, "loss": 1.0816, "lr": 4.6978094484032725e-08, "epoch": 0.011878051999472087, "percentage": 0.24, "elapsed_time": "0:00:07", "remaining_time": "0:51:21", "throughput": 6069.53, "total_tokens": 44544}
|
|
{"current_steps": 95, "total_steps": 37885, "loss": 0.8812, "lr": 4.961731327527052e-08, "epoch": 0.012537943777220536, "percentage": 0.25, "elapsed_time": "0:00:07", "remaining_time": "0:50:55", "throughput": 6132.86, "total_tokens": 47104}
|
|
{"current_steps": 100, "total_steps": 37885, "loss": 0.8313, "lr": 5.225653206650831e-08, "epoch": 0.013197835554968985, "percentage": 0.26, "elapsed_time": "0:00:08", "remaining_time": "0:50:27", "throughput": 6198.38, "total_tokens": 49664}
|
|
{"current_steps": 105, "total_steps": 37885, "loss": 0.7819, "lr": 5.4895750857746105e-08, "epoch": 0.013857727332717434, "percentage": 0.28, "elapsed_time": "0:00:08", "remaining_time": "0:50:01", "throughput": 6275.64, "total_tokens": 52352}
|
|
{"current_steps": 110, "total_steps": 37885, "loss": 0.746, "lr": 5.75349696489839e-08, "epoch": 0.014517619110465884, "percentage": 0.29, "elapsed_time": "0:00:08", "remaining_time": "0:49:36", "throughput": 6313.1, "total_tokens": 54720}
|
|
{"current_steps": 115, "total_steps": 37885, "loss": 0.7562, "lr": 6.01741884402217e-08, "epoch": 0.015177510888214334, "percentage": 0.3, "elapsed_time": "0:00:08", "remaining_time": "0:49:14", "throughput": 6353.76, "total_tokens": 57152}
|
|
{"current_steps": 120, "total_steps": 37885, "loss": 0.6604, "lr": 6.281340723145948e-08, "epoch": 0.015837402665962782, "percentage": 0.32, "elapsed_time": "0:00:09", "remaining_time": "0:48:55", "throughput": 6408.76, "total_tokens": 59776}
|
|
{"current_steps": 125, "total_steps": 37885, "loss": 0.4326, "lr": 6.545262602269728e-08, "epoch": 0.01649729444371123, "percentage": 0.33, "elapsed_time": "0:00:09", "remaining_time": "0:48:38", "throughput": 6464.85, "total_tokens": 62464}
|
|
{"current_steps": 130, "total_steps": 37885, "loss": 0.3566, "lr": 6.809184481393507e-08, "epoch": 0.017157186221459682, "percentage": 0.34, "elapsed_time": "0:00:09", "remaining_time": "0:48:22", "throughput": 6511.73, "total_tokens": 65088}
|
|
{"current_steps": 135, "total_steps": 37885, "loss": 0.3399, "lr": 7.073106360517287e-08, "epoch": 0.01781707799920813, "percentage": 0.36, "elapsed_time": "0:00:10", "remaining_time": "0:48:07", "throughput": 6564.21, "total_tokens": 67776}
|
|
{"current_steps": 140, "total_steps": 37885, "loss": 0.3479, "lr": 7.337028239641066e-08, "epoch": 0.01847696977695658, "percentage": 0.37, "elapsed_time": "0:00:10", "remaining_time": "0:47:53", "throughput": 6606.29, "total_tokens": 70400}
|
|
{"current_steps": 145, "total_steps": 37885, "loss": 0.3225, "lr": 7.600950118764846e-08, "epoch": 0.01913686155470503, "percentage": 0.38, "elapsed_time": "0:00:10", "remaining_time": "0:47:41", "throughput": 6612.95, "total_tokens": 72704}
|
|
{"current_steps": 150, "total_steps": 37885, "loss": 0.3282, "lr": 7.864871997888626e-08, "epoch": 0.019796753332453478, "percentage": 0.4, "elapsed_time": "0:00:11", "remaining_time": "0:47:27", "throughput": 6637.39, "total_tokens": 75136}
|
|
{"current_steps": 155, "total_steps": 37885, "loss": 0.3013, "lr": 8.128793877012403e-08, "epoch": 0.020456645110201926, "percentage": 0.41, "elapsed_time": "0:00:11", "remaining_time": "0:47:15", "throughput": 6664.25, "total_tokens": 77632}
|
|
{"current_steps": 160, "total_steps": 37885, "loss": 0.3084, "lr": 8.392715756136183e-08, "epoch": 0.021116536887950375, "percentage": 0.42, "elapsed_time": "0:00:11", "remaining_time": "0:47:06", "throughput": 6700.13, "total_tokens": 80320}
|
|
{"current_steps": 165, "total_steps": 37885, "loss": 0.3253, "lr": 8.656637635259963e-08, "epoch": 0.021776428665698826, "percentage": 0.44, "elapsed_time": "0:00:12", "remaining_time": "0:46:55", "throughput": 6718.42, "total_tokens": 82752}
|
|
{"current_steps": 170, "total_steps": 37885, "loss": 0.2936, "lr": 8.920559514383743e-08, "epoch": 0.022436320443447275, "percentage": 0.45, "elapsed_time": "0:00:12", "remaining_time": "0:46:45", "throughput": 6740.9, "total_tokens": 85248}
|
|
{"current_steps": 175, "total_steps": 37885, "loss": 0.3018, "lr": 9.184481393507522e-08, "epoch": 0.023096212221195723, "percentage": 0.46, "elapsed_time": "0:00:12", "remaining_time": "0:46:37", "throughput": 6769.47, "total_tokens": 87872}
|
|
{"current_steps": 180, "total_steps": 37885, "loss": 0.2907, "lr": 9.4484032726313e-08, "epoch": 0.023756103998944175, "percentage": 0.48, "elapsed_time": "0:00:13", "remaining_time": "0:46:27", "throughput": 6789.68, "total_tokens": 90368}
|
|
{"current_steps": 185, "total_steps": 37885, "loss": 0.2362, "lr": 9.71232515175508e-08, "epoch": 0.024415995776692623, "percentage": 0.49, "elapsed_time": "0:00:13", "remaining_time": "0:46:19", "throughput": 6813.68, "total_tokens": 92928}
|
|
{"current_steps": 190, "total_steps": 37885, "loss": 0.2169, "lr": 9.976247030878859e-08, "epoch": 0.02507588755444107, "percentage": 0.5, "elapsed_time": "0:00:13", "remaining_time": "0:46:10", "throughput": 6823.31, "total_tokens": 95296}
|
|
{"current_steps": 195, "total_steps": 37885, "loss": 0.204, "lr": 1.0240168910002639e-07, "epoch": 0.02573577933218952, "percentage": 0.51, "elapsed_time": "0:00:14", "remaining_time": "0:46:04", "throughput": 6851.79, "total_tokens": 97984}
|
|
{"current_steps": 200, "total_steps": 37885, "loss": 0.2406, "lr": 1.0504090789126419e-07, "epoch": 0.02639567110993797, "percentage": 0.53, "elapsed_time": "0:00:14", "remaining_time": "0:45:55", "throughput": 6861.54, "total_tokens": 100352}
|
|
{"current_steps": 205, "total_steps": 37885, "loss": 0.2504, "lr": 1.0768012668250196e-07, "epoch": 0.02705556288768642, "percentage": 0.54, "elapsed_time": "0:00:14", "remaining_time": "0:45:48", "throughput": 6853.24, "total_tokens": 102464}
|
|
{"current_steps": 210, "total_steps": 37885, "loss": 0.1825, "lr": 1.1031934547373976e-07, "epoch": 0.027715454665434867, "percentage": 0.55, "elapsed_time": "0:00:15", "remaining_time": "0:45:41", "throughput": 6876.48, "total_tokens": 105088}
|
|
{"current_steps": 215, "total_steps": 37885, "loss": 0.203, "lr": 1.1295856426497756e-07, "epoch": 0.02837534644318332, "percentage": 0.57, "elapsed_time": "0:00:15", "remaining_time": "0:45:35", "throughput": 6894.69, "total_tokens": 107648}
|
|
{"current_steps": 220, "total_steps": 37885, "loss": 0.1882, "lr": 1.1559778305621536e-07, "epoch": 0.029035238220931767, "percentage": 0.58, "elapsed_time": "0:00:15", "remaining_time": "0:45:29", "throughput": 6909.27, "total_tokens": 110144}
|
|
{"current_steps": 225, "total_steps": 37885, "loss": 0.202, "lr": 1.1823700184745315e-07, "epoch": 0.029695129998680216, "percentage": 0.59, "elapsed_time": "0:00:16", "remaining_time": "0:45:23", "throughput": 6919.82, "total_tokens": 112576}
|
|
{"current_steps": 230, "total_steps": 37885, "loss": 0.2255, "lr": 1.2087622063869096e-07, "epoch": 0.030355021776428667, "percentage": 0.61, "elapsed_time": "0:00:16", "remaining_time": "0:45:18", "throughput": 6942.84, "total_tokens": 115264}
|
|
{"current_steps": 235, "total_steps": 37885, "loss": 0.1494, "lr": 1.2351543942992873e-07, "epoch": 0.031014913554177116, "percentage": 0.62, "elapsed_time": "0:00:16", "remaining_time": "0:45:12", "throughput": 6961.75, "total_tokens": 117888}
|
|
{"current_steps": 240, "total_steps": 37885, "loss": 0.0743, "lr": 1.2615465822116653e-07, "epoch": 0.031674805331925564, "percentage": 0.63, "elapsed_time": "0:00:17", "remaining_time": "0:45:07", "throughput": 6971.05, "total_tokens": 120320}
|
|
{"current_steps": 245, "total_steps": 37885, "loss": 0.1968, "lr": 1.2879387701240433e-07, "epoch": 0.032334697109674015, "percentage": 0.65, "elapsed_time": "0:00:17", "remaining_time": "0:45:01", "throughput": 6976.82, "total_tokens": 122688}
|
|
{"current_steps": 250, "total_steps": 37885, "loss": 0.1802, "lr": 1.314330958036421e-07, "epoch": 0.03299458888742246, "percentage": 0.66, "elapsed_time": "0:00:17", "remaining_time": "0:44:56", "throughput": 6991.96, "total_tokens": 125248}
|
|
{"current_steps": 255, "total_steps": 37885, "loss": 0.149, "lr": 1.340723145948799e-07, "epoch": 0.03365448066517091, "percentage": 0.67, "elapsed_time": "0:00:18", "remaining_time": "0:44:51", "throughput": 6999.4, "total_tokens": 127680}
|
|
{"current_steps": 260, "total_steps": 37885, "loss": 0.0907, "lr": 1.367115333861177e-07, "epoch": 0.034314372442919364, "percentage": 0.69, "elapsed_time": "0:00:18", "remaining_time": "0:44:48", "throughput": 7024.26, "total_tokens": 130496}
|
|
{"current_steps": 265, "total_steps": 37885, "loss": 0.1709, "lr": 1.393507521773555e-07, "epoch": 0.03497426422066781, "percentage": 0.7, "elapsed_time": "0:00:18", "remaining_time": "0:44:43", "throughput": 7034.92, "total_tokens": 132992}
|
|
{"current_steps": 270, "total_steps": 37885, "loss": 0.1453, "lr": 1.419899709685933e-07, "epoch": 0.03563415599841626, "percentage": 0.71, "elapsed_time": "0:00:19", "remaining_time": "0:44:39", "throughput": 7022.26, "total_tokens": 135040}
|
|
{"current_steps": 275, "total_steps": 37885, "loss": 0.1749, "lr": 1.4462918975983108e-07, "epoch": 0.03629404777616471, "percentage": 0.73, "elapsed_time": "0:00:19", "remaining_time": "0:44:34", "throughput": 7035.21, "total_tokens": 137600}
|
|
{"current_steps": 280, "total_steps": 37885, "loss": 0.1575, "lr": 1.4726840855106888e-07, "epoch": 0.03695393955391316, "percentage": 0.74, "elapsed_time": "0:00:19", "remaining_time": "0:44:30", "throughput": 7034.77, "total_tokens": 139904}
|
|
{"current_steps": 285, "total_steps": 37885, "loss": 0.1957, "lr": 1.4990762734230665e-07, "epoch": 0.03761383133166161, "percentage": 0.75, "elapsed_time": "0:00:20", "remaining_time": "0:44:26", "throughput": 7026.07, "total_tokens": 142016}
|
|
{"current_steps": 290, "total_steps": 37885, "loss": 0.0195, "lr": 1.5254684613354445e-07, "epoch": 0.03827372310941006, "percentage": 0.77, "elapsed_time": "0:00:20", "remaining_time": "0:44:22", "throughput": 7038.13, "total_tokens": 144576}
|
|
{"current_steps": 295, "total_steps": 37885, "loss": 0.086, "lr": 1.5518606492478225e-07, "epoch": 0.038933614887158505, "percentage": 0.78, "elapsed_time": "0:00:20", "remaining_time": "0:44:18", "throughput": 7039.0, "total_tokens": 146880}
|
|
{"current_steps": 300, "total_steps": 37885, "loss": 0.2891, "lr": 1.5782528371602005e-07, "epoch": 0.039593506664906956, "percentage": 0.79, "elapsed_time": "0:00:21", "remaining_time": "0:44:15", "throughput": 7039.47, "total_tokens": 149184}
|
|
{"current_steps": 305, "total_steps": 37885, "loss": 0.1615, "lr": 1.6046450250725785e-07, "epoch": 0.0402533984426554, "percentage": 0.81, "elapsed_time": "0:00:21", "remaining_time": "0:44:11", "throughput": 7031.68, "total_tokens": 151296}
|
|
{"current_steps": 310, "total_steps": 37885, "loss": 0.2055, "lr": 1.6310372129849565e-07, "epoch": 0.04091329022040385, "percentage": 0.82, "elapsed_time": "0:00:21", "remaining_time": "0:44:07", "throughput": 7034.42, "total_tokens": 153664}
|
|
{"current_steps": 315, "total_steps": 37885, "loss": 0.2078, "lr": 1.6574294008973345e-07, "epoch": 0.041573181998152305, "percentage": 0.83, "elapsed_time": "0:00:22", "remaining_time": "0:44:04", "throughput": 7037.54, "total_tokens": 156032}
|
|
{"current_steps": 320, "total_steps": 37885, "loss": 0.1392, "lr": 1.6838215888097122e-07, "epoch": 0.04223307377590075, "percentage": 0.84, "elapsed_time": "0:00:22", "remaining_time": "0:44:01", "throughput": 7044.45, "total_tokens": 158528}
|
|
{"current_steps": 325, "total_steps": 37885, "loss": 0.1113, "lr": 1.71021377672209e-07, "epoch": 0.0428929655536492, "percentage": 0.86, "elapsed_time": "0:00:22", "remaining_time": "0:43:59", "throughput": 7037.47, "total_tokens": 160704}
|
|
{"current_steps": 330, "total_steps": 37885, "loss": 0.178, "lr": 1.736605964634468e-07, "epoch": 0.04355285733139765, "percentage": 0.87, "elapsed_time": "0:00:23", "remaining_time": "0:43:56", "throughput": 7039.06, "total_tokens": 163072}
|
|
{"current_steps": 335, "total_steps": 37885, "loss": 0.1961, "lr": 1.762998152546846e-07, "epoch": 0.0442127491091461, "percentage": 0.88, "elapsed_time": "0:00:23", "remaining_time": "0:43:54", "throughput": 7044.0, "total_tokens": 165568}
|
|
{"current_steps": 340, "total_steps": 37885, "loss": 0.2253, "lr": 1.789390340459224e-07, "epoch": 0.04487264088689455, "percentage": 0.9, "elapsed_time": "0:00:23", "remaining_time": "0:43:52", "throughput": 7044.54, "total_tokens": 167936}
|
|
{"current_steps": 345, "total_steps": 37885, "loss": 0.174, "lr": 1.815782528371602e-07, "epoch": 0.045532532664643, "percentage": 0.91, "elapsed_time": "0:00:24", "remaining_time": "0:43:50", "throughput": 7040.38, "total_tokens": 170176}
|
|
{"current_steps": 350, "total_steps": 37885, "loss": 0.1717, "lr": 1.84217471628398e-07, "epoch": 0.046192424442391446, "percentage": 0.92, "elapsed_time": "0:00:24", "remaining_time": "0:43:47", "throughput": 7033.66, "total_tokens": 172352}
|
|
{"current_steps": 355, "total_steps": 37885, "loss": 0.1506, "lr": 1.8685669041963577e-07, "epoch": 0.0468523162201399, "percentage": 0.94, "elapsed_time": "0:00:24", "remaining_time": "0:43:46", "throughput": 7050.16, "total_tokens": 175168}
|
|
{"current_steps": 360, "total_steps": 37885, "loss": 0.0852, "lr": 1.8949590921087357e-07, "epoch": 0.04751220799788835, "percentage": 0.95, "elapsed_time": "0:00:25", "remaining_time": "0:43:45", "throughput": 7061.91, "total_tokens": 177856}
|
|
{"current_steps": 365, "total_steps": 37885, "loss": 0.2049, "lr": 1.9213512800211137e-07, "epoch": 0.048172099775636794, "percentage": 0.96, "elapsed_time": "0:00:25", "remaining_time": "0:43:43", "throughput": 7059.68, "total_tokens": 180160}
|
|
{"current_steps": 370, "total_steps": 37885, "loss": 0.1772, "lr": 1.9477434679334917e-07, "epoch": 0.048831991553385246, "percentage": 0.98, "elapsed_time": "0:00:25", "remaining_time": "0:43:41", "throughput": 7068.65, "total_tokens": 182784}
|
|
{"current_steps": 375, "total_steps": 37885, "loss": 0.0558, "lr": 1.9741356558458697e-07, "epoch": 0.0494918833311337, "percentage": 0.99, "elapsed_time": "0:00:26", "remaining_time": "0:43:40", "throughput": 7063.16, "total_tokens": 185024}
|
|
{"current_steps": 380, "total_steps": 37885, "loss": 0.2301, "lr": 2.0005278437582474e-07, "epoch": 0.05015177510888214, "percentage": 1.0, "elapsed_time": "0:00:26", "remaining_time": "0:43:38", "throughput": 7074.88, "total_tokens": 187712}
|
|
{"current_steps": 385, "total_steps": 37885, "loss": 0.179, "lr": 2.0269200316706254e-07, "epoch": 0.050811666886630594, "percentage": 1.02, "elapsed_time": "0:00:26", "remaining_time": "0:43:37", "throughput": 7085.78, "total_tokens": 190400}
|
|
{"current_steps": 390, "total_steps": 37885, "loss": 0.1703, "lr": 2.0533122195830032e-07, "epoch": 0.05147155866437904, "percentage": 1.03, "elapsed_time": "0:00:27", "remaining_time": "0:43:36", "throughput": 7102.99, "total_tokens": 193280}
|
|
{"current_steps": 395, "total_steps": 37885, "loss": 0.238, "lr": 2.0797044074953812e-07, "epoch": 0.05213145044212749, "percentage": 1.04, "elapsed_time": "0:00:27", "remaining_time": "0:43:34", "throughput": 7100.58, "total_tokens": 195584}
|
|
{"current_steps": 400, "total_steps": 37885, "loss": 0.0807, "lr": 2.1060965954077591e-07, "epoch": 0.05279134221987594, "percentage": 1.06, "elapsed_time": "0:00:27", "remaining_time": "0:43:32", "throughput": 7109.02, "total_tokens": 198208}
|
|
{"current_steps": 405, "total_steps": 37885, "loss": 0.1271, "lr": 2.1324887833201371e-07, "epoch": 0.05345123399762439, "percentage": 1.07, "elapsed_time": "0:00:28", "remaining_time": "0:43:30", "throughput": 7114.86, "total_tokens": 200704}
|
|
{"current_steps": 410, "total_steps": 37885, "loss": 0.1709, "lr": 2.1588809712325151e-07, "epoch": 0.05411112577537284, "percentage": 1.08, "elapsed_time": "0:00:28", "remaining_time": "0:43:28", "throughput": 7118.96, "total_tokens": 203136}
|
|
{"current_steps": 415, "total_steps": 37885, "loss": 0.1371, "lr": 2.1852731591448931e-07, "epoch": 0.05477101755312129, "percentage": 1.1, "elapsed_time": "0:00:28", "remaining_time": "0:43:26", "throughput": 7119.25, "total_tokens": 205504}
|
|
{"current_steps": 420, "total_steps": 37885, "loss": 0.2497, "lr": 2.2116653470572711e-07, "epoch": 0.055430909330869735, "percentage": 1.11, "elapsed_time": "0:00:29", "remaining_time": "0:43:24", "throughput": 7124.61, "total_tokens": 208000}
|
|
{"current_steps": 425, "total_steps": 37885, "loss": 0.1593, "lr": 2.238057534969649e-07, "epoch": 0.056090801108618187, "percentage": 1.12, "elapsed_time": "0:00:29", "remaining_time": "0:43:22", "throughput": 7131.71, "total_tokens": 210560}
|
|
{"current_steps": 430, "total_steps": 37885, "loss": 0.2053, "lr": 2.2644497228820266e-07, "epoch": 0.05675069288636664, "percentage": 1.14, "elapsed_time": "0:00:29", "remaining_time": "0:43:20", "throughput": 7136.08, "total_tokens": 213056}
|
|
{"current_steps": 435, "total_steps": 37885, "loss": 0.1709, "lr": 2.2908419107944046e-07, "epoch": 0.05741058466411508, "percentage": 1.15, "elapsed_time": "0:00:30", "remaining_time": "0:43:18", "throughput": 7133.38, "total_tokens": 215296}
|
|
{"current_steps": 440, "total_steps": 37885, "loss": 0.0204, "lr": 2.3172340987067826e-07, "epoch": 0.058070476441863535, "percentage": 1.16, "elapsed_time": "0:00:30", "remaining_time": "0:43:16", "throughput": 7128.77, "total_tokens": 217472}
|
|
{"current_steps": 445, "total_steps": 37885, "loss": 0.0669, "lr": 2.3436262866191606e-07, "epoch": 0.058730368219611986, "percentage": 1.17, "elapsed_time": "0:00:30", "remaining_time": "0:43:14", "throughput": 7134.11, "total_tokens": 219968}
|
|
{"current_steps": 450, "total_steps": 37885, "loss": 0.1046, "lr": 2.3700184745315386e-07, "epoch": 0.05939025999736043, "percentage": 1.19, "elapsed_time": "0:00:31", "remaining_time": "0:43:12", "throughput": 7142.83, "total_tokens": 222592}
|
|
{"current_steps": 455, "total_steps": 37885, "loss": 0.1579, "lr": 2.3964106624439166e-07, "epoch": 0.06005015177510888, "percentage": 1.2, "elapsed_time": "0:00:31", "remaining_time": "0:43:10", "throughput": 7138.21, "total_tokens": 224768}
|
|
{"current_steps": 460, "total_steps": 37885, "loss": 0.238, "lr": 2.4228028503562943e-07, "epoch": 0.060710043552857335, "percentage": 1.21, "elapsed_time": "0:00:31", "remaining_time": "0:43:08", "throughput": 7142.44, "total_tokens": 227264}
|
|
{"current_steps": 465, "total_steps": 37885, "loss": 0.0656, "lr": 2.4491950382686726e-07, "epoch": 0.06136993533060578, "percentage": 1.23, "elapsed_time": "0:00:32", "remaining_time": "0:43:06", "throughput": 7147.37, "total_tokens": 229760}
|
|
{"current_steps": 470, "total_steps": 37885, "loss": 0.2998, "lr": 2.4755872261810503e-07, "epoch": 0.06202982710835423, "percentage": 1.24, "elapsed_time": "0:00:32", "remaining_time": "0:43:04", "throughput": 7144.9, "total_tokens": 232000}
|
|
{"current_steps": 475, "total_steps": 37885, "loss": 0.2337, "lr": 2.501979414093428e-07, "epoch": 0.06268971888610268, "percentage": 1.25, "elapsed_time": "0:00:32", "remaining_time": "0:43:02", "throughput": 7140.36, "total_tokens": 234176}
|
|
{"current_steps": 480, "total_steps": 37885, "loss": 0.1524, "lr": 2.528371602005806e-07, "epoch": 0.06334961066385113, "percentage": 1.27, "elapsed_time": "0:00:33", "remaining_time": "0:43:01", "throughput": 7146.84, "total_tokens": 236736}
|
|
{"current_steps": 485, "total_steps": 37885, "loss": 0.0146, "lr": 2.554763789918184e-07, "epoch": 0.06400950244159957, "percentage": 1.28, "elapsed_time": "0:00:33", "remaining_time": "0:42:59", "throughput": 7148.07, "total_tokens": 239104}
|
|
{"current_steps": 490, "total_steps": 37885, "loss": 0.4542, "lr": 2.581155977830562e-07, "epoch": 0.06466939421934803, "percentage": 1.29, "elapsed_time": "0:00:33", "remaining_time": "0:42:57", "throughput": 7152.45, "total_tokens": 241600}
|
|
{"current_steps": 495, "total_steps": 37885, "loss": 0.5945, "lr": 2.60754816574294e-07, "epoch": 0.06532928599709648, "percentage": 1.31, "elapsed_time": "0:00:34", "remaining_time": "0:42:56", "throughput": 7158.65, "total_tokens": 244160}
|
|
{"current_steps": 500, "total_steps": 37885, "loss": 0.3712, "lr": 2.633940353655318e-07, "epoch": 0.06598917777484492, "percentage": 1.32, "elapsed_time": "0:00:34", "remaining_time": "0:42:54", "throughput": 7157.58, "total_tokens": 246464}
|
|
{"current_steps": 505, "total_steps": 37885, "loss": 0.1548, "lr": 2.660332541567696e-07, "epoch": 0.06664906955259338, "percentage": 1.33, "elapsed_time": "0:00:34", "remaining_time": "0:42:53", "throughput": 7161.61, "total_tokens": 248960}
|
|
{"current_steps": 510, "total_steps": 37885, "loss": 0.1575, "lr": 2.686724729480074e-07, "epoch": 0.06730896133034182, "percentage": 1.35, "elapsed_time": "0:00:35", "remaining_time": "0:42:51", "throughput": 7164.56, "total_tokens": 251392}
|
|
{"current_steps": 515, "total_steps": 37885, "loss": 0.1289, "lr": 2.7131169173924515e-07, "epoch": 0.06796885310809027, "percentage": 1.36, "elapsed_time": "0:00:35", "remaining_time": "0:42:49", "throughput": 7168.52, "total_tokens": 253888}
|
|
{"current_steps": 520, "total_steps": 37885, "loss": 0.1912, "lr": 2.73950910530483e-07, "epoch": 0.06862874488583873, "percentage": 1.37, "elapsed_time": "0:00:35", "remaining_time": "0:42:48", "throughput": 7172.36, "total_tokens": 256384}
|
|
{"current_steps": 525, "total_steps": 37885, "loss": 0.2614, "lr": 2.7659012932172075e-07, "epoch": 0.06928863666358717, "percentage": 1.39, "elapsed_time": "0:00:36", "remaining_time": "0:42:47", "throughput": 7165.85, "total_tokens": 258496}
|
|
{"current_steps": 530, "total_steps": 37885, "loss": 0.1036, "lr": 2.792293481129586e-07, "epoch": 0.06994852844133562, "percentage": 1.4, "elapsed_time": "0:00:36", "remaining_time": "0:42:45", "throughput": 7166.59, "total_tokens": 260864}
|
|
{"current_steps": 535, "total_steps": 37885, "loss": 0.0465, "lr": 2.8186856690419635e-07, "epoch": 0.07060842021908408, "percentage": 1.41, "elapsed_time": "0:00:36", "remaining_time": "0:42:44", "throughput": 7170.15, "total_tokens": 263360}
|
|
{"current_steps": 540, "total_steps": 37885, "loss": 0.097, "lr": 2.845077856954342e-07, "epoch": 0.07126831199683252, "percentage": 1.43, "elapsed_time": "0:00:37", "remaining_time": "0:42:43", "throughput": 7179.82, "total_tokens": 266112}
|
|
{"current_steps": 545, "total_steps": 37885, "loss": 0.1353, "lr": 2.8714700448667195e-07, "epoch": 0.07192820377458096, "percentage": 1.44, "elapsed_time": "0:00:37", "remaining_time": "0:42:41", "throughput": 7182.05, "total_tokens": 268544}
|
|
{"current_steps": 550, "total_steps": 37885, "loss": 0.2927, "lr": 2.897862232779097e-07, "epoch": 0.07258809555232942, "percentage": 1.45, "elapsed_time": "0:00:37", "remaining_time": "0:42:40", "throughput": 7183.01, "total_tokens": 270912}
|
|
{"current_steps": 555, "total_steps": 37885, "loss": 0.042, "lr": 2.9242544206914755e-07, "epoch": 0.07324798733007787, "percentage": 1.46, "elapsed_time": "0:00:38", "remaining_time": "0:42:39", "throughput": 7192.34, "total_tokens": 273664}
|
|
{"current_steps": 560, "total_steps": 37885, "loss": 0.1713, "lr": 2.950646608603853e-07, "epoch": 0.07390787910782631, "percentage": 1.48, "elapsed_time": "0:00:38", "remaining_time": "0:42:37", "throughput": 7196.13, "total_tokens": 276160}
|
|
{"current_steps": 565, "total_steps": 37885, "loss": 0.1153, "lr": 2.977038796516231e-07, "epoch": 0.07456777088557477, "percentage": 1.49, "elapsed_time": "0:00:38", "remaining_time": "0:42:36", "throughput": 7202.85, "total_tokens": 278784}
|
|
{"current_steps": 570, "total_steps": 37885, "loss": 0.1525, "lr": 3.0034309844286087e-07, "epoch": 0.07522766266332322, "percentage": 1.5, "elapsed_time": "0:00:39", "remaining_time": "0:42:35", "throughput": 7203.58, "total_tokens": 281152}
|
|
{"current_steps": 575, "total_steps": 37885, "loss": 0.2213, "lr": 3.029823172340987e-07, "epoch": 0.07588755444107166, "percentage": 1.52, "elapsed_time": "0:00:39", "remaining_time": "0:42:33", "throughput": 7202.74, "total_tokens": 283456}
|
|
{"current_steps": 580, "total_steps": 37885, "loss": 0.1509, "lr": 3.0562153602533647e-07, "epoch": 0.07654744621882012, "percentage": 1.53, "elapsed_time": "0:00:39", "remaining_time": "0:42:32", "throughput": 7207.14, "total_tokens": 286016}
|
|
{"current_steps": 585, "total_steps": 37885, "loss": 0.1542, "lr": 3.0826075481657424e-07, "epoch": 0.07720733799656856, "percentage": 1.54, "elapsed_time": "0:00:40", "remaining_time": "0:42:31", "throughput": 7209.31, "total_tokens": 288448}
|
|
{"current_steps": 590, "total_steps": 37885, "loss": 0.136, "lr": 3.1089997360781207e-07, "epoch": 0.07786722977431701, "percentage": 1.56, "elapsed_time": "0:00:40", "remaining_time": "0:42:29", "throughput": 7209.91, "total_tokens": 290816}
|
|
{"current_steps": 595, "total_steps": 37885, "loss": 0.0768, "lr": 3.1353919239904984e-07, "epoch": 0.07852712155206547, "percentage": 1.57, "elapsed_time": "0:00:40", "remaining_time": "0:42:28", "throughput": 7217.18, "total_tokens": 293504}
|
|
{"current_steps": 600, "total_steps": 37885, "loss": 0.1251, "lr": 3.1617841119028767e-07, "epoch": 0.07918701332981391, "percentage": 1.58, "elapsed_time": "0:00:40", "remaining_time": "0:42:27", "throughput": 7214.71, "total_tokens": 295744}
|
|
{"current_steps": 605, "total_steps": 37885, "loss": 0.1154, "lr": 3.1881762998152544e-07, "epoch": 0.07984690510756236, "percentage": 1.6, "elapsed_time": "0:00:41", "remaining_time": "0:42:26", "throughput": 7214.84, "total_tokens": 298112}
|
|
{"current_steps": 610, "total_steps": 37885, "loss": 0.1348, "lr": 3.2145684877276327e-07, "epoch": 0.0805067968853108, "percentage": 1.61, "elapsed_time": "0:00:41", "remaining_time": "0:42:24", "throughput": 7217.89, "total_tokens": 300608}
|
|
{"current_steps": 615, "total_steps": 37885, "loss": 0.0238, "lr": 3.2409606756400104e-07, "epoch": 0.08116668866305926, "percentage": 1.62, "elapsed_time": "0:00:41", "remaining_time": "0:42:24", "throughput": 7226.04, "total_tokens": 303360}
|
|
{"current_steps": 620, "total_steps": 37885, "loss": 0.1818, "lr": 3.267352863552388e-07, "epoch": 0.0818265804408077, "percentage": 1.64, "elapsed_time": "0:00:42", "remaining_time": "0:42:23", "throughput": 7230.5, "total_tokens": 305920}
|
|
{"current_steps": 625, "total_steps": 37885, "loss": 0.1448, "lr": 3.2937450514647664e-07, "epoch": 0.08248647221855615, "percentage": 1.65, "elapsed_time": "0:00:42", "remaining_time": "0:42:21", "throughput": 7233.47, "total_tokens": 308416}
|
|
{"current_steps": 630, "total_steps": 37885, "loss": 0.0859, "lr": 3.320137239377144e-07, "epoch": 0.08314636399630461, "percentage": 1.66, "elapsed_time": "0:00:42", "remaining_time": "0:42:20", "throughput": 7235.24, "total_tokens": 310848}
|
|
{"current_steps": 635, "total_steps": 37885, "loss": 0.1894, "lr": 3.3465294272895224e-07, "epoch": 0.08380625577405305, "percentage": 1.68, "elapsed_time": "0:00:43", "remaining_time": "0:42:19", "throughput": 7239.54, "total_tokens": 313408}
|
|
{"current_steps": 640, "total_steps": 37885, "loss": 0.2527, "lr": 3.3729216152019e-07, "epoch": 0.0844661475518015, "percentage": 1.69, "elapsed_time": "0:00:43", "remaining_time": "0:42:18", "throughput": 7241.95, "total_tokens": 315904}
|
|
{"current_steps": 645, "total_steps": 37885, "loss": 0.2999, "lr": 3.3993138031142784e-07, "epoch": 0.08512603932954996, "percentage": 1.7, "elapsed_time": "0:00:43", "remaining_time": "0:42:17", "throughput": 7238.14, "total_tokens": 318080}
|
|
{"current_steps": 650, "total_steps": 37885, "loss": 0.1142, "lr": 3.425705991026656e-07, "epoch": 0.0857859311072984, "percentage": 1.72, "elapsed_time": "0:00:44", "remaining_time": "0:42:15", "throughput": 7234.24, "total_tokens": 320256}
|
|
{"current_steps": 655, "total_steps": 37885, "loss": 0.0054, "lr": 3.452098178939034e-07, "epoch": 0.08644582288504685, "percentage": 1.73, "elapsed_time": "0:00:44", "remaining_time": "0:42:14", "throughput": 7231.76, "total_tokens": 322496}
|
|
{"current_steps": 660, "total_steps": 37885, "loss": 0.133, "lr": 3.478490366851412e-07, "epoch": 0.0871057146627953, "percentage": 1.74, "elapsed_time": "0:00:44", "remaining_time": "0:42:13", "throughput": 7236.78, "total_tokens": 325120}
|
|
{"current_steps": 665, "total_steps": 37885, "loss": 0.0938, "lr": 3.50488255476379e-07, "epoch": 0.08776560644054375, "percentage": 1.76, "elapsed_time": "0:00:45", "remaining_time": "0:42:12", "throughput": 7232.71, "total_tokens": 327296}
|
|
{"current_steps": 670, "total_steps": 37885, "loss": 0.492, "lr": 3.5312747426761676e-07, "epoch": 0.0884254982182922, "percentage": 1.77, "elapsed_time": "0:00:45", "remaining_time": "0:42:11", "throughput": 7233.1, "total_tokens": 329664}
|
|
{"current_steps": 675, "total_steps": 37885, "loss": 0.2004, "lr": 3.5576669305885453e-07, "epoch": 0.08908538999604065, "percentage": 1.78, "elapsed_time": "0:00:45", "remaining_time": "0:42:11", "throughput": 7240.04, "total_tokens": 332416}
|
|
{"current_steps": 680, "total_steps": 37885, "loss": 0.0255, "lr": 3.5840591185009236e-07, "epoch": 0.0897452817737891, "percentage": 1.79, "elapsed_time": "0:00:46", "remaining_time": "0:42:10", "throughput": 7244.0, "total_tokens": 334976}
|
|
{"current_steps": 685, "total_steps": 37885, "loss": 0.1744, "lr": 3.6104513064133013e-07, "epoch": 0.09040517355153754, "percentage": 1.81, "elapsed_time": "0:00:46", "remaining_time": "0:42:09", "throughput": 7246.64, "total_tokens": 337472}
|
|
{"current_steps": 690, "total_steps": 37885, "loss": 0.0702, "lr": 3.636843494325679e-07, "epoch": 0.091065065329286, "percentage": 1.82, "elapsed_time": "0:00:46", "remaining_time": "0:42:08", "throughput": 7247.86, "total_tokens": 339904}
|
|
{"current_steps": 695, "total_steps": 37885, "loss": 0.1571, "lr": 3.6632356822380573e-07, "epoch": 0.09172495710703445, "percentage": 1.83, "elapsed_time": "0:00:47", "remaining_time": "0:42:07", "throughput": 7247.26, "total_tokens": 342272}
|
|
{"current_steps": 700, "total_steps": 37885, "loss": 0.0945, "lr": 3.689627870150435e-07, "epoch": 0.09238484888478289, "percentage": 1.85, "elapsed_time": "0:00:47", "remaining_time": "0:42:06", "throughput": 7247.58, "total_tokens": 344640}
|
|
{"current_steps": 705, "total_steps": 37885, "loss": 0.1118, "lr": 3.7160200580628133e-07, "epoch": 0.09304474066253135, "percentage": 1.86, "elapsed_time": "0:00:47", "remaining_time": "0:42:05", "throughput": 7248.8, "total_tokens": 347072}
|
|
{"current_steps": 710, "total_steps": 37885, "loss": 0.1896, "lr": 3.742412245975191e-07, "epoch": 0.0937046324402798, "percentage": 1.87, "elapsed_time": "0:00:48", "remaining_time": "0:42:04", "throughput": 7253.26, "total_tokens": 349696}
|
|
{"current_steps": 715, "total_steps": 37885, "loss": 0.1602, "lr": 3.7688044338875693e-07, "epoch": 0.09436452421802824, "percentage": 1.89, "elapsed_time": "0:00:48", "remaining_time": "0:42:03", "throughput": 7256.55, "total_tokens": 352256}
|
|
{"current_steps": 720, "total_steps": 37885, "loss": 0.2044, "lr": 3.795196621799947e-07, "epoch": 0.0950244159957767, "percentage": 1.9, "elapsed_time": "0:00:48", "remaining_time": "0:42:02", "throughput": 7263.44, "total_tokens": 355008}
|
|
{"current_steps": 725, "total_steps": 37885, "loss": 0.4465, "lr": 3.821588809712325e-07, "epoch": 0.09568430777352514, "percentage": 1.91, "elapsed_time": "0:00:49", "remaining_time": "0:42:01", "throughput": 7263.2, "total_tokens": 357376}
|
|
{"current_steps": 730, "total_steps": 37885, "loss": 0.2807, "lr": 3.847980997624703e-07, "epoch": 0.09634419955127359, "percentage": 1.93, "elapsed_time": "0:00:49", "remaining_time": "0:42:00", "throughput": 7262.29, "total_tokens": 359680}
|
|
{"current_steps": 735, "total_steps": 37885, "loss": 0.0482, "lr": 3.874373185537081e-07, "epoch": 0.09700409132902205, "percentage": 1.94, "elapsed_time": "0:00:49", "remaining_time": "0:41:59", "throughput": 7264.54, "total_tokens": 362176}
|
|
{"current_steps": 740, "total_steps": 37885, "loss": 0.0361, "lr": 3.900765373449459e-07, "epoch": 0.09766398310677049, "percentage": 1.95, "elapsed_time": "0:00:50", "remaining_time": "0:41:59", "throughput": 7273.04, "total_tokens": 365056}
|
|
{"current_steps": 745, "total_steps": 37885, "loss": 0.1913, "lr": 3.927157561361837e-07, "epoch": 0.09832387488451894, "percentage": 1.97, "elapsed_time": "0:00:50", "remaining_time": "0:41:58", "throughput": 7273.89, "total_tokens": 367488}
|
|
{"current_steps": 750, "total_steps": 37885, "loss": 0.2768, "lr": 3.953549749274215e-07, "epoch": 0.0989837666622674, "percentage": 1.98, "elapsed_time": "0:00:50", "remaining_time": "0:41:57", "throughput": 7272.62, "total_tokens": 369792}
|
|
{"current_steps": 755, "total_steps": 37885, "loss": 0.0962, "lr": 3.979941937186593e-07, "epoch": 0.09964365844001584, "percentage": 1.99, "elapsed_time": "0:00:51", "remaining_time": "0:41:56", "throughput": 7272.38, "total_tokens": 372160}
|
|
{"current_steps": 760, "total_steps": 37885, "loss": 0.1613, "lr": 4.0063341250989705e-07, "epoch": 0.10030355021776428, "percentage": 2.01, "elapsed_time": "0:00:51", "remaining_time": "0:41:55", "throughput": 7267.62, "total_tokens": 374272}
|
|
{"current_steps": 765, "total_steps": 37885, "loss": 0.1893, "lr": 4.032726313011349e-07, "epoch": 0.10096344199551274, "percentage": 2.02, "elapsed_time": "0:00:51", "remaining_time": "0:41:54", "throughput": 7268.51, "total_tokens": 376704}
|
|
{"current_steps": 770, "total_steps": 37885, "loss": 0.321, "lr": 4.0591185009237265e-07, "epoch": 0.10162333377326119, "percentage": 2.03, "elapsed_time": "0:00:52", "remaining_time": "0:41:53", "throughput": 7269.39, "total_tokens": 379136}
|
|
{"current_steps": 775, "total_steps": 37885, "loss": 0.0868, "lr": 4.085510688836104e-07, "epoch": 0.10228322555100963, "percentage": 2.05, "elapsed_time": "0:00:52", "remaining_time": "0:41:53", "throughput": 7270.39, "total_tokens": 381568}
|
|
{"current_steps": 780, "total_steps": 37885, "loss": 0.2393, "lr": 4.111902876748482e-07, "epoch": 0.10294311732875808, "percentage": 2.06, "elapsed_time": "0:00:52", "remaining_time": "0:41:52", "throughput": 7273.34, "total_tokens": 384128}
|
|
{"current_steps": 785, "total_steps": 37885, "loss": 0.3035, "lr": 4.13829506466086e-07, "epoch": 0.10360300910650654, "percentage": 2.07, "elapsed_time": "0:00:53", "remaining_time": "0:41:51", "throughput": 7269.37, "total_tokens": 386304}
|
|
{"current_steps": 790, "total_steps": 37885, "loss": 0.2872, "lr": 4.164687252573238e-07, "epoch": 0.10426290088425498, "percentage": 2.09, "elapsed_time": "0:00:53", "remaining_time": "0:41:50", "throughput": 7272.68, "total_tokens": 388864}
|
|
{"current_steps": 795, "total_steps": 37885, "loss": 0.0753, "lr": 4.1910794404856157e-07, "epoch": 0.10492279266200343, "percentage": 2.1, "elapsed_time": "0:00:53", "remaining_time": "0:41:49", "throughput": 7270.02, "total_tokens": 391104}
|
|
{"current_steps": 800, "total_steps": 37885, "loss": 0.1116, "lr": 4.217471628397994e-07, "epoch": 0.10558268443975188, "percentage": 2.11, "elapsed_time": "0:00:54", "remaining_time": "0:41:49", "throughput": 7276.05, "total_tokens": 393856}
|
|
{"current_steps": 805, "total_steps": 37885, "loss": 0.1934, "lr": 4.2438638163103717e-07, "epoch": 0.10624257621750033, "percentage": 2.12, "elapsed_time": "0:00:54", "remaining_time": "0:41:48", "throughput": 7277.72, "total_tokens": 396352}
|
|
{"current_steps": 810, "total_steps": 37885, "loss": 0.1459, "lr": 4.27025600422275e-07, "epoch": 0.10690246799524877, "percentage": 2.14, "elapsed_time": "0:00:54", "remaining_time": "0:41:47", "throughput": 7275.05, "total_tokens": 398592}
|
|
{"current_steps": 815, "total_steps": 37885, "loss": 0.0887, "lr": 4.2966481921351277e-07, "epoch": 0.10756235977299723, "percentage": 2.15, "elapsed_time": "0:00:55", "remaining_time": "0:41:46", "throughput": 7277.01, "total_tokens": 401088}
|
|
{"current_steps": 820, "total_steps": 37885, "loss": 0.0521, "lr": 4.323040380047506e-07, "epoch": 0.10822225155074568, "percentage": 2.16, "elapsed_time": "0:00:55", "remaining_time": "0:41:46", "throughput": 7276.75, "total_tokens": 403456}
|
|
{"current_steps": 825, "total_steps": 37885, "loss": 0.2846, "lr": 4.3494325679598837e-07, "epoch": 0.10888214332849412, "percentage": 2.18, "elapsed_time": "0:00:55", "remaining_time": "0:41:45", "throughput": 7274.46, "total_tokens": 405696}
|
|
{"current_steps": 830, "total_steps": 37885, "loss": 0.2486, "lr": 4.3758247558722614e-07, "epoch": 0.10954203510624258, "percentage": 2.19, "elapsed_time": "0:00:56", "remaining_time": "0:41:44", "throughput": 7275.36, "total_tokens": 408128}
|
|
{"current_steps": 835, "total_steps": 37885, "loss": 0.0355, "lr": 4.4022169437846397e-07, "epoch": 0.11020192688399102, "percentage": 2.2, "elapsed_time": "0:00:56", "remaining_time": "0:41:44", "throughput": 7283.03, "total_tokens": 411008}
|
|
{"current_steps": 840, "total_steps": 37885, "loss": 0.3422, "lr": 4.4286091316970174e-07, "epoch": 0.11086181866173947, "percentage": 2.22, "elapsed_time": "0:00:56", "remaining_time": "0:41:43", "throughput": 7281.94, "total_tokens": 413312}
|
|
{"current_steps": 845, "total_steps": 37885, "loss": 0.124, "lr": 4.4550013196093957e-07, "epoch": 0.11152171043948793, "percentage": 2.23, "elapsed_time": "0:00:57", "remaining_time": "0:41:42", "throughput": 7280.99, "total_tokens": 415616}
|
|
{"current_steps": 850, "total_steps": 37885, "loss": 0.1853, "lr": 4.4813935075217734e-07, "epoch": 0.11218160221723637, "percentage": 2.24, "elapsed_time": "0:00:57", "remaining_time": "0:41:41", "throughput": 7286.5, "total_tokens": 418368}
|
|
{"current_steps": 855, "total_steps": 37885, "loss": 0.0833, "lr": 4.5077856954341517e-07, "epoch": 0.11284149399498482, "percentage": 2.26, "elapsed_time": "0:00:57", "remaining_time": "0:41:40", "throughput": 7288.38, "total_tokens": 420864}
|
|
{"current_steps": 860, "total_steps": 37885, "loss": 0.222, "lr": 4.5341778833465294e-07, "epoch": 0.11350138577273328, "percentage": 2.27, "elapsed_time": "0:00:58", "remaining_time": "0:41:40", "throughput": 7285.03, "total_tokens": 423040}
|
|
{"current_steps": 865, "total_steps": 37885, "loss": 0.3516, "lr": 4.560570071258907e-07, "epoch": 0.11416127755048172, "percentage": 2.28, "elapsed_time": "0:00:58", "remaining_time": "0:41:39", "throughput": 7284.03, "total_tokens": 425344}
|
|
{"current_steps": 870, "total_steps": 37885, "loss": 0.1296, "lr": 4.5869622591712854e-07, "epoch": 0.11482116932823017, "percentage": 2.3, "elapsed_time": "0:00:58", "remaining_time": "0:41:38", "throughput": 7287.72, "total_tokens": 427968}
|
|
{"current_steps": 875, "total_steps": 37885, "loss": 0.232, "lr": 4.613354447083663e-07, "epoch": 0.11548106110597862, "percentage": 2.31, "elapsed_time": "0:00:59", "remaining_time": "0:41:37", "throughput": 7291.15, "total_tokens": 430592}
|
|
{"current_steps": 880, "total_steps": 37885, "loss": 0.4212, "lr": 4.639746634996041e-07, "epoch": 0.11614095288372707, "percentage": 2.32, "elapsed_time": "0:00:59", "remaining_time": "0:41:37", "throughput": 7294.04, "total_tokens": 433152}
|
|
{"current_steps": 885, "total_steps": 37885, "loss": 0.0185, "lr": 4.6661388229084186e-07, "epoch": 0.11680084466147551, "percentage": 2.34, "elapsed_time": "0:00:59", "remaining_time": "0:41:36", "throughput": 7292.88, "total_tokens": 435456}
|
|
{"current_steps": 890, "total_steps": 37885, "loss": 0.2316, "lr": 4.692531010820797e-07, "epoch": 0.11746073643922397, "percentage": 2.35, "elapsed_time": "0:01:00", "remaining_time": "0:41:35", "throughput": 7296.26, "total_tokens": 438080}
|
|
{"current_steps": 895, "total_steps": 37885, "loss": 0.2135, "lr": 4.7189231987331746e-07, "epoch": 0.11812062821697242, "percentage": 2.36, "elapsed_time": "0:01:00", "remaining_time": "0:41:35", "throughput": 7298.89, "total_tokens": 440640}
|
|
{"current_steps": 900, "total_steps": 37885, "loss": 0.0402, "lr": 4.7453153866455523e-07, "epoch": 0.11878051999472086, "percentage": 2.38, "elapsed_time": "0:01:00", "remaining_time": "0:41:34", "throughput": 7298.96, "total_tokens": 443008}
|
|
{"current_steps": 905, "total_steps": 37885, "loss": 0.1973, "lr": 4.771707574557931e-07, "epoch": 0.11944041177246932, "percentage": 2.39, "elapsed_time": "0:01:01", "remaining_time": "0:41:33", "throughput": 7297.74, "total_tokens": 445312}
|
|
{"current_steps": 910, "total_steps": 37885, "loss": 0.105, "lr": 4.798099762470308e-07, "epoch": 0.12010030355021777, "percentage": 2.4, "elapsed_time": "0:01:01", "remaining_time": "0:41:32", "throughput": 7297.65, "total_tokens": 447680}
|
|
{"current_steps": 915, "total_steps": 37885, "loss": 0.1353, "lr": 4.824491950382686e-07, "epoch": 0.12076019532796621, "percentage": 2.42, "elapsed_time": "0:01:01", "remaining_time": "0:41:32", "throughput": 7301.9, "total_tokens": 450368}
|
|
{"current_steps": 920, "total_steps": 37885, "loss": 0.0864, "lr": 4.850884138295065e-07, "epoch": 0.12142008710571467, "percentage": 2.43, "elapsed_time": "0:01:02", "remaining_time": "0:41:31", "throughput": 7302.46, "total_tokens": 452800}
|
|
{"current_steps": 925, "total_steps": 37885, "loss": 0.1168, "lr": 4.877276326207443e-07, "epoch": 0.12207997888346311, "percentage": 2.44, "elapsed_time": "0:01:02", "remaining_time": "0:41:31", "throughput": 7310.22, "total_tokens": 455744}
|
|
{"current_steps": 930, "total_steps": 37885, "loss": 0.1899, "lr": 4.90366851411982e-07, "epoch": 0.12273987066121156, "percentage": 2.45, "elapsed_time": "0:01:02", "remaining_time": "0:41:30", "throughput": 7310.75, "total_tokens": 458176}
|
|
{"current_steps": 935, "total_steps": 37885, "loss": 0.1902, "lr": 4.930060702032198e-07, "epoch": 0.12339976243896002, "percentage": 2.47, "elapsed_time": "0:01:02", "remaining_time": "0:41:29", "throughput": 7309.7, "total_tokens": 460480}
|
|
{"current_steps": 940, "total_steps": 37885, "loss": 0.169, "lr": 4.956452889944576e-07, "epoch": 0.12405965421670846, "percentage": 2.48, "elapsed_time": "0:01:03", "remaining_time": "0:41:28", "throughput": 7306.69, "total_tokens": 462656}
|
|
{"current_steps": 945, "total_steps": 37885, "loss": 0.2648, "lr": 4.982845077856955e-07, "epoch": 0.1247195459944569, "percentage": 2.49, "elapsed_time": "0:01:03", "remaining_time": "0:41:28", "throughput": 7310.85, "total_tokens": 465344}
|
|
{"current_steps": 950, "total_steps": 37885, "loss": 0.3667, "lr": 5.009237265769331e-07, "epoch": 0.12537943777220537, "percentage": 2.51, "elapsed_time": "0:01:03", "remaining_time": "0:41:27", "throughput": 7310.54, "total_tokens": 467712}
|
|
{"current_steps": 955, "total_steps": 37885, "loss": 0.2885, "lr": 5.03562945368171e-07, "epoch": 0.1260393295499538, "percentage": 2.52, "elapsed_time": "0:01:04", "remaining_time": "0:41:26", "throughput": 7310.49, "total_tokens": 470080}
|
|
{"current_steps": 960, "total_steps": 37885, "loss": 0.1084, "lr": 5.062021641594088e-07, "epoch": 0.12669922132770225, "percentage": 2.53, "elapsed_time": "0:01:04", "remaining_time": "0:41:25", "throughput": 7311.11, "total_tokens": 472512}
|
|
{"current_steps": 965, "total_steps": 37885, "loss": 0.1628, "lr": 5.088413829506465e-07, "epoch": 0.1273591131054507, "percentage": 2.55, "elapsed_time": "0:01:04", "remaining_time": "0:41:25", "throughput": 7309.17, "total_tokens": 474752}
|
|
{"current_steps": 970, "total_steps": 37885, "loss": 0.0777, "lr": 5.114806017418843e-07, "epoch": 0.12801900488319914, "percentage": 2.56, "elapsed_time": "0:01:05", "remaining_time": "0:41:24", "throughput": 7312.87, "total_tokens": 477440}
|
|
{"current_steps": 975, "total_steps": 37885, "loss": 0.1169, "lr": 5.141198205331222e-07, "epoch": 0.12867889666094762, "percentage": 2.57, "elapsed_time": "0:01:05", "remaining_time": "0:41:24", "throughput": 7314.9, "total_tokens": 480000}
|
|
{"current_steps": 980, "total_steps": 37885, "loss": 0.2261, "lr": 5.1675903932436e-07, "epoch": 0.12933878843869606, "percentage": 2.59, "elapsed_time": "0:01:05", "remaining_time": "0:41:23", "throughput": 7314.42, "total_tokens": 482368}
|
|
{"current_steps": 985, "total_steps": 37885, "loss": 0.0063, "lr": 5.193982581155977e-07, "epoch": 0.1299986802164445, "percentage": 2.6, "elapsed_time": "0:01:06", "remaining_time": "0:41:22", "throughput": 7310.75, "total_tokens": 484544}
|
|
{"current_steps": 990, "total_steps": 37885, "loss": 0.1087, "lr": 5.220374769068355e-07, "epoch": 0.13065857199419295, "percentage": 2.61, "elapsed_time": "0:01:06", "remaining_time": "0:41:22", "throughput": 7307.81, "total_tokens": 486720}
|
|
{"current_steps": 995, "total_steps": 37885, "loss": 0.0935, "lr": 5.246766956980734e-07, "epoch": 0.1313184637719414, "percentage": 2.63, "elapsed_time": "0:01:06", "remaining_time": "0:41:21", "throughput": 7310.74, "total_tokens": 489344}
|
|
{"current_steps": 1000, "total_steps": 37885, "loss": 0.0785, "lr": 5.273159144893111e-07, "epoch": 0.13197835554968984, "percentage": 2.64, "elapsed_time": "0:01:07", "remaining_time": "0:41:21", "throughput": 7311.2, "total_tokens": 491776}
|
|
{"current_steps": 1005, "total_steps": 37885, "loss": 0.1646, "lr": 5.29955133280549e-07, "epoch": 0.1326382473274383, "percentage": 2.65, "elapsed_time": "0:01:07", "remaining_time": "0:41:20", "throughput": 7308.09, "total_tokens": 493952}
|
|
{"current_steps": 1010, "total_steps": 37885, "loss": 0.2601, "lr": 5.325943520717867e-07, "epoch": 0.13329813910518676, "percentage": 2.67, "elapsed_time": "0:01:07", "remaining_time": "0:41:19", "throughput": 7307.46, "total_tokens": 496320}
|
|
{"current_steps": 1015, "total_steps": 37885, "loss": 0.075, "lr": 5.352335708630246e-07, "epoch": 0.1339580308829352, "percentage": 2.68, "elapsed_time": "0:01:08", "remaining_time": "0:41:19", "throughput": 7304.33, "total_tokens": 498496}
|
|
{"current_steps": 1020, "total_steps": 37885, "loss": 0.3792, "lr": 5.378727896542623e-07, "epoch": 0.13461792266068365, "percentage": 2.69, "elapsed_time": "0:01:08", "remaining_time": "0:41:18", "throughput": 7306.53, "total_tokens": 501056}
|
|
{"current_steps": 1025, "total_steps": 37885, "loss": 0.0633, "lr": 5.405120084455001e-07, "epoch": 0.1352778144384321, "percentage": 2.71, "elapsed_time": "0:01:08", "remaining_time": "0:41:17", "throughput": 7304.66, "total_tokens": 503296}
|
|
{"current_steps": 1030, "total_steps": 37885, "loss": 0.2425, "lr": 5.431512272367379e-07, "epoch": 0.13593770621618054, "percentage": 2.72, "elapsed_time": "0:01:09", "remaining_time": "0:41:17", "throughput": 7307.14, "total_tokens": 505856}
|
|
{"current_steps": 1035, "total_steps": 37885, "loss": 0.2077, "lr": 5.457904460279758e-07, "epoch": 0.13659759799392898, "percentage": 2.73, "elapsed_time": "0:01:09", "remaining_time": "0:41:16", "throughput": 7309.32, "total_tokens": 508416}
|
|
{"current_steps": 1040, "total_steps": 37885, "loss": 0.1821, "lr": 5.484296648192135e-07, "epoch": 0.13725748977167745, "percentage": 2.75, "elapsed_time": "0:01:09", "remaining_time": "0:41:16", "throughput": 7311.93, "total_tokens": 511040}
|
|
{"current_steps": 1045, "total_steps": 37885, "loss": 0.0126, "lr": 5.510688836104512e-07, "epoch": 0.1379173815494259, "percentage": 2.76, "elapsed_time": "0:01:10", "remaining_time": "0:41:15", "throughput": 7309.03, "total_tokens": 513216}
|
|
{"current_steps": 1050, "total_steps": 37885, "loss": 0.2747, "lr": 5.537081024016891e-07, "epoch": 0.13857727332717434, "percentage": 2.77, "elapsed_time": "0:01:10", "remaining_time": "0:41:14", "throughput": 7307.2, "total_tokens": 515456}
|
|
{"current_steps": 1055, "total_steps": 37885, "loss": 0.2289, "lr": 5.563473211929268e-07, "epoch": 0.1392371651049228, "percentage": 2.78, "elapsed_time": "0:01:10", "remaining_time": "0:41:14", "throughput": 7306.71, "total_tokens": 517824}
|
|
{"current_steps": 1060, "total_steps": 37885, "loss": 0.1583, "lr": 5.589865399841647e-07, "epoch": 0.13989705688267123, "percentage": 2.8, "elapsed_time": "0:01:11", "remaining_time": "0:41:13", "throughput": 7308.03, "total_tokens": 520320}
|
|
{"current_steps": 1065, "total_steps": 37885, "loss": 0.0417, "lr": 5.616257587754024e-07, "epoch": 0.14055694866041968, "percentage": 2.81, "elapsed_time": "0:01:11", "remaining_time": "0:41:12", "throughput": 7308.65, "total_tokens": 522752}
|
|
{"current_steps": 1070, "total_steps": 37885, "loss": 0.0679, "lr": 5.642649775666402e-07, "epoch": 0.14121684043816815, "percentage": 2.82, "elapsed_time": "0:01:11", "remaining_time": "0:41:12", "throughput": 7311.26, "total_tokens": 525376}
|
|
{"current_steps": 1075, "total_steps": 37885, "loss": 0.2223, "lr": 5.66904196357878e-07, "epoch": 0.1418767322159166, "percentage": 2.84, "elapsed_time": "0:01:12", "remaining_time": "0:41:11", "throughput": 7311.97, "total_tokens": 527808}
|
|
{"current_steps": 1080, "total_steps": 37885, "loss": 0.1876, "lr": 5.695434151491159e-07, "epoch": 0.14253662399366504, "percentage": 2.85, "elapsed_time": "0:01:12", "remaining_time": "0:41:11", "throughput": 7313.37, "total_tokens": 530304}
|
|
{"current_steps": 1085, "total_steps": 37885, "loss": 0.375, "lr": 5.721826339403536e-07, "epoch": 0.14319651577141349, "percentage": 2.86, "elapsed_time": "0:01:12", "remaining_time": "0:41:10", "throughput": 7310.34, "total_tokens": 532480}
|
|
{"current_steps": 1090, "total_steps": 37885, "loss": 0.2028, "lr": 5.748218527315914e-07, "epoch": 0.14385640754916193, "percentage": 2.88, "elapsed_time": "0:01:13", "remaining_time": "0:41:09", "throughput": 7314.23, "total_tokens": 535168}
|
|
{"current_steps": 1095, "total_steps": 37885, "loss": 0.3046, "lr": 5.774610715228292e-07, "epoch": 0.14451629932691037, "percentage": 2.89, "elapsed_time": "0:01:13", "remaining_time": "0:41:09", "throughput": 7314.45, "total_tokens": 537600}
|
|
{"current_steps": 1100, "total_steps": 37885, "loss": 0.043, "lr": 5.801002903140671e-07, "epoch": 0.14517619110465885, "percentage": 2.9, "elapsed_time": "0:01:13", "remaining_time": "0:41:08", "throughput": 7315.74, "total_tokens": 540096}
|
|
{"current_steps": 1105, "total_steps": 37885, "loss": 0.0119, "lr": 5.827395091053047e-07, "epoch": 0.1458360828824073, "percentage": 2.92, "elapsed_time": "0:01:14", "remaining_time": "0:41:08", "throughput": 7317.07, "total_tokens": 542592}
|
|
{"current_steps": 1110, "total_steps": 37885, "loss": 0.0392, "lr": 5.853787278965426e-07, "epoch": 0.14649597466015574, "percentage": 2.93, "elapsed_time": "0:01:14", "remaining_time": "0:41:07", "throughput": 7316.63, "total_tokens": 544960}
|
|
{"current_steps": 1115, "total_steps": 37885, "loss": 0.0851, "lr": 5.880179466877804e-07, "epoch": 0.14715586643790418, "percentage": 2.94, "elapsed_time": "0:01:14", "remaining_time": "0:41:06", "throughput": 7313.9, "total_tokens": 547136}
|
|
{"current_steps": 1120, "total_steps": 37885, "loss": 0.1365, "lr": 5.906571654790183e-07, "epoch": 0.14781575821565263, "percentage": 2.96, "elapsed_time": "0:01:15", "remaining_time": "0:41:06", "throughput": 7312.64, "total_tokens": 549440}
|
|
{"current_steps": 1125, "total_steps": 37885, "loss": 0.2707, "lr": 5.932963842702559e-07, "epoch": 0.14847564999340107, "percentage": 2.97, "elapsed_time": "0:01:15", "remaining_time": "0:41:05", "throughput": 7315.4, "total_tokens": 552064}
|
|
{"current_steps": 1130, "total_steps": 37885, "loss": 0.0997, "lr": 5.959356030614938e-07, "epoch": 0.14913554177114954, "percentage": 2.98, "elapsed_time": "0:01:15", "remaining_time": "0:41:05", "throughput": 7315.01, "total_tokens": 554432}
|
|
{"current_steps": 1135, "total_steps": 37885, "loss": 0.0028, "lr": 5.985748218527316e-07, "epoch": 0.149795433548898, "percentage": 3.0, "elapsed_time": "0:01:16", "remaining_time": "0:41:04", "throughput": 7314.92, "total_tokens": 556800}
|
|
{"current_steps": 1140, "total_steps": 37885, "loss": 0.2855, "lr": 6.012140406439695e-07, "epoch": 0.15045532532664643, "percentage": 3.01, "elapsed_time": "0:01:16", "remaining_time": "0:41:04", "throughput": 7315.83, "total_tokens": 559296}
|
|
{"current_steps": 1145, "total_steps": 37885, "loss": 0.1093, "lr": 6.038532594352071e-07, "epoch": 0.15111521710439488, "percentage": 3.02, "elapsed_time": "0:01:16", "remaining_time": "0:41:03", "throughput": 7320.67, "total_tokens": 562112}
|
|
{"current_steps": 1150, "total_steps": 37885, "loss": 0.0592, "lr": 6.064924782264449e-07, "epoch": 0.15177510888214332, "percentage": 3.04, "elapsed_time": "0:01:17", "remaining_time": "0:41:03", "throughput": 7317.65, "total_tokens": 564288}
|
|
{"current_steps": 1155, "total_steps": 37885, "loss": 0.188, "lr": 6.091316970176828e-07, "epoch": 0.15243500065989177, "percentage": 3.05, "elapsed_time": "0:01:17", "remaining_time": "0:41:02", "throughput": 7319.59, "total_tokens": 566848}
|
|
{"current_steps": 1160, "total_steps": 37885, "loss": 0.4772, "lr": 6.117709158089205e-07, "epoch": 0.15309489243764024, "percentage": 3.06, "elapsed_time": "0:01:17", "remaining_time": "0:41:02", "throughput": 7318.35, "total_tokens": 569152}
|
|
{"current_steps": 1165, "total_steps": 37885, "loss": 0.0848, "lr": 6.144101346001583e-07, "epoch": 0.15375478421538868, "percentage": 3.08, "elapsed_time": "0:01:18", "remaining_time": "0:41:01", "throughput": 7317.39, "total_tokens": 571456}
|
|
{"current_steps": 1170, "total_steps": 37885, "loss": 0.2235, "lr": 6.170493533913961e-07, "epoch": 0.15441467599313713, "percentage": 3.09, "elapsed_time": "0:01:18", "remaining_time": "0:41:00", "throughput": 7317.3, "total_tokens": 573824}
|
|
{"current_steps": 1175, "total_steps": 37885, "loss": 0.0517, "lr": 6.196885721826339e-07, "epoch": 0.15507456777088557, "percentage": 3.1, "elapsed_time": "0:01:18", "remaining_time": "0:41:00", "throughput": 7316.06, "total_tokens": 576128}
|
|
{"current_steps": 1180, "total_steps": 37885, "loss": 0.1252, "lr": 6.223277909738716e-07, "epoch": 0.15573445954863402, "percentage": 3.11, "elapsed_time": "0:01:19", "remaining_time": "0:40:59", "throughput": 7315.14, "total_tokens": 578432}
|
|
{"current_steps": 1185, "total_steps": 37885, "loss": 0.2063, "lr": 6.249670097651095e-07, "epoch": 0.15639435132638246, "percentage": 3.13, "elapsed_time": "0:01:19", "remaining_time": "0:40:59", "throughput": 7314.05, "total_tokens": 580736}
|
|
{"current_steps": 1190, "total_steps": 37885, "loss": 0.0011, "lr": 6.276062285563473e-07, "epoch": 0.15705424310413094, "percentage": 3.14, "elapsed_time": "0:01:19", "remaining_time": "0:40:58", "throughput": 7313.24, "total_tokens": 583040}
|
|
{"current_steps": 1195, "total_steps": 37885, "loss": 0.0065, "lr": 6.302454473475851e-07, "epoch": 0.15771413488187938, "percentage": 3.15, "elapsed_time": "0:01:20", "remaining_time": "0:40:57", "throughput": 7312.08, "total_tokens": 585344}
|
|
{"current_steps": 1200, "total_steps": 37885, "loss": 0.2037, "lr": 6.328846661388228e-07, "epoch": 0.15837402665962783, "percentage": 3.17, "elapsed_time": "0:01:20", "remaining_time": "0:40:57", "throughput": 7313.91, "total_tokens": 587904}
|
|
{"current_steps": 1205, "total_steps": 37885, "loss": 0.2007, "lr": 6.355238849300607e-07, "epoch": 0.15903391843737627, "percentage": 3.18, "elapsed_time": "0:01:20", "remaining_time": "0:40:56", "throughput": 7312.86, "total_tokens": 590208}
|
|
{"current_steps": 1210, "total_steps": 37885, "loss": 0.1552, "lr": 6.381631037212984e-07, "epoch": 0.15969381021512472, "percentage": 3.19, "elapsed_time": "0:01:21", "remaining_time": "0:40:56", "throughput": 7311.99, "total_tokens": 592512}
|
|
{"current_steps": 1215, "total_steps": 37885, "loss": 0.2204, "lr": 6.408023225125363e-07, "epoch": 0.16035370199287316, "percentage": 3.21, "elapsed_time": "0:01:21", "remaining_time": "0:40:55", "throughput": 7313.63, "total_tokens": 595072}
|
|
{"current_steps": 1220, "total_steps": 37885, "loss": 0.2003, "lr": 6.43441541303774e-07, "epoch": 0.1610135937706216, "percentage": 3.22, "elapsed_time": "0:01:21", "remaining_time": "0:40:55", "throughput": 7315.11, "total_tokens": 597632}
|
|
{"current_steps": 1225, "total_steps": 37885, "loss": 0.0941, "lr": 6.460807600950119e-07, "epoch": 0.16167348554837008, "percentage": 3.23, "elapsed_time": "0:01:22", "remaining_time": "0:40:54", "throughput": 7316.95, "total_tokens": 600192}
|
|
{"current_steps": 1230, "total_steps": 37885, "loss": 0.2021, "lr": 6.487199788862496e-07, "epoch": 0.16233337732611852, "percentage": 3.25, "elapsed_time": "0:01:22", "remaining_time": "0:40:54", "throughput": 7313.77, "total_tokens": 602304}
|
|
{"current_steps": 1235, "total_steps": 37885, "loss": 0.2325, "lr": 6.513591976774875e-07, "epoch": 0.16299326910386697, "percentage": 3.26, "elapsed_time": "0:01:22", "remaining_time": "0:40:53", "throughput": 7312.87, "total_tokens": 604608}
|
|
{"current_steps": 1240, "total_steps": 37885, "loss": 0.1003, "lr": 6.539984164687252e-07, "epoch": 0.1636531608816154, "percentage": 3.27, "elapsed_time": "0:01:23", "remaining_time": "0:40:53", "throughput": 7312.24, "total_tokens": 606976}
|
|
{"current_steps": 1245, "total_steps": 37885, "loss": 0.0153, "lr": 6.566376352599631e-07, "epoch": 0.16431305265936386, "percentage": 3.29, "elapsed_time": "0:01:23", "remaining_time": "0:40:52", "throughput": 7314.72, "total_tokens": 609600}
|
|
{"current_steps": 1250, "total_steps": 37885, "loss": 0.2758, "lr": 6.592768540512008e-07, "epoch": 0.1649729444371123, "percentage": 3.3, "elapsed_time": "0:01:23", "remaining_time": "0:40:51", "throughput": 7313.14, "total_tokens": 611840}
|
|
{"current_steps": 1255, "total_steps": 37885, "loss": 0.1107, "lr": 6.619160728424386e-07, "epoch": 0.16563283621486077, "percentage": 3.31, "elapsed_time": "0:01:23", "remaining_time": "0:40:51", "throughput": 7316.33, "total_tokens": 614528}
|
|
{"current_steps": 1260, "total_steps": 37885, "loss": 0.0118, "lr": 6.645552916336764e-07, "epoch": 0.16629272799260922, "percentage": 3.33, "elapsed_time": "0:01:24", "remaining_time": "0:40:51", "throughput": 7317.53, "total_tokens": 617024}
|
|
{"current_steps": 1265, "total_steps": 37885, "loss": 0.156, "lr": 6.671945104249141e-07, "epoch": 0.16695261977035766, "percentage": 3.34, "elapsed_time": "0:01:24", "remaining_time": "0:40:50", "throughput": 7317.29, "total_tokens": 619392}
|
|
{"current_steps": 1270, "total_steps": 37885, "loss": 0.179, "lr": 6.69833729216152e-07, "epoch": 0.1676125115481061, "percentage": 3.35, "elapsed_time": "0:01:24", "remaining_time": "0:40:49", "throughput": 7318.25, "total_tokens": 621888}
|
|
{"current_steps": 1275, "total_steps": 37885, "loss": 0.1028, "lr": 6.724729480073898e-07, "epoch": 0.16827240332585455, "percentage": 3.37, "elapsed_time": "0:01:25", "remaining_time": "0:40:49", "throughput": 7317.34, "total_tokens": 624192}
|
|
{"current_steps": 1280, "total_steps": 37885, "loss": 0.0813, "lr": 6.751121667986275e-07, "epoch": 0.168932295103603, "percentage": 3.38, "elapsed_time": "0:01:25", "remaining_time": "0:40:48", "throughput": 7317.49, "total_tokens": 626624}
|
|
{"current_steps": 1285, "total_steps": 37885, "loss": 0.0655, "lr": 6.777513855898653e-07, "epoch": 0.16959218688135147, "percentage": 3.39, "elapsed_time": "0:01:25", "remaining_time": "0:40:48", "throughput": 7316.37, "total_tokens": 628928}
|
|
{"current_steps": 1290, "total_steps": 37885, "loss": 0.1142, "lr": 6.803906043811032e-07, "epoch": 0.17025207865909991, "percentage": 3.41, "elapsed_time": "0:01:26", "remaining_time": "0:40:47", "throughput": 7319.43, "total_tokens": 631616}
|
|
{"current_steps": 1295, "total_steps": 37885, "loss": 0.045, "lr": 6.83029823172341e-07, "epoch": 0.17091197043684836, "percentage": 3.42, "elapsed_time": "0:01:26", "remaining_time": "0:40:47", "throughput": 7320.27, "total_tokens": 634112}
|
|
{"current_steps": 1300, "total_steps": 37885, "loss": 0.2547, "lr": 6.856690419635787e-07, "epoch": 0.1715718622145968, "percentage": 3.43, "elapsed_time": "0:01:26", "remaining_time": "0:40:46", "throughput": 7319.28, "total_tokens": 636416}
|
|
{"current_steps": 1305, "total_steps": 37885, "loss": 0.0028, "lr": 6.883082607548165e-07, "epoch": 0.17223175399234525, "percentage": 3.44, "elapsed_time": "0:01:27", "remaining_time": "0:40:46", "throughput": 7319.92, "total_tokens": 638848}
|
|
{"current_steps": 1310, "total_steps": 37885, "loss": 0.0734, "lr": 6.909474795460544e-07, "epoch": 0.1728916457700937, "percentage": 3.46, "elapsed_time": "0:01:27", "remaining_time": "0:40:45", "throughput": 7319.8, "total_tokens": 641216}
|
|
{"current_steps": 1315, "total_steps": 37885, "loss": 0.0042, "lr": 6.935866983372921e-07, "epoch": 0.17355153754784217, "percentage": 3.47, "elapsed_time": "0:01:27", "remaining_time": "0:40:45", "throughput": 7324.02, "total_tokens": 644032}
|
|
{"current_steps": 1320, "total_steps": 37885, "loss": 0.1632, "lr": 6.962259171285299e-07, "epoch": 0.1742114293255906, "percentage": 3.48, "elapsed_time": "0:01:28", "remaining_time": "0:40:45", "throughput": 7324.86, "total_tokens": 646528}
|
|
{"current_steps": 1325, "total_steps": 37885, "loss": 0.1357, "lr": 6.988651359197677e-07, "epoch": 0.17487132110333906, "percentage": 3.5, "elapsed_time": "0:01:28", "remaining_time": "0:40:44", "throughput": 7326.48, "total_tokens": 649088}
|
|
{"current_steps": 1330, "total_steps": 37885, "loss": 0.0358, "lr": 7.015043547110056e-07, "epoch": 0.1755312128810875, "percentage": 3.51, "elapsed_time": "0:01:28", "remaining_time": "0:40:44", "throughput": 7326.83, "total_tokens": 651520}
|
|
{"current_steps": 1335, "total_steps": 37885, "loss": 0.1195, "lr": 7.041435735022433e-07, "epoch": 0.17619110465883595, "percentage": 3.52, "elapsed_time": "0:01:29", "remaining_time": "0:40:43", "throughput": 7327.85, "total_tokens": 654016}
|
|
{"current_steps": 1340, "total_steps": 37885, "loss": 0.0002, "lr": 7.067827922934811e-07, "epoch": 0.1768509964365844, "percentage": 3.54, "elapsed_time": "0:01:29", "remaining_time": "0:40:42", "throughput": 7326.97, "total_tokens": 656320}
|
|
{"current_steps": 1345, "total_steps": 37885, "loss": 0.2259, "lr": 7.094220110847189e-07, "epoch": 0.17751088821433286, "percentage": 3.55, "elapsed_time": "0:01:29", "remaining_time": "0:40:42", "throughput": 7328.71, "total_tokens": 658880}
|
|
{"current_steps": 1350, "total_steps": 37885, "loss": 0.0003, "lr": 7.120612298759568e-07, "epoch": 0.1781707799920813, "percentage": 3.56, "elapsed_time": "0:01:30", "remaining_time": "0:40:41", "throughput": 7327.64, "total_tokens": 661184}
|
|
{"current_steps": 1355, "total_steps": 37885, "loss": 0.0801, "lr": 7.147004486671945e-07, "epoch": 0.17883067176982975, "percentage": 3.58, "elapsed_time": "0:01:30", "remaining_time": "0:40:41", "throughput": 7328.54, "total_tokens": 663680}
|
|
{"current_steps": 1360, "total_steps": 37885, "loss": 0.0336, "lr": 7.173396674584322e-07, "epoch": 0.1794905635475782, "percentage": 3.59, "elapsed_time": "0:01:30", "remaining_time": "0:40:40", "throughput": 7328.3, "total_tokens": 666048}
|
|
{"current_steps": 1365, "total_steps": 37885, "loss": 0.1074, "lr": 7.199788862496701e-07, "epoch": 0.18015045532532664, "percentage": 3.6, "elapsed_time": "0:01:31", "remaining_time": "0:40:40", "throughput": 7328.58, "total_tokens": 668480}
|
|
{"current_steps": 1370, "total_steps": 37885, "loss": 0.2056, "lr": 7.226181050409078e-07, "epoch": 0.1808103471030751, "percentage": 3.62, "elapsed_time": "0:01:31", "remaining_time": "0:40:40", "throughput": 7334.39, "total_tokens": 671488}
|
|
{"current_steps": 1375, "total_steps": 37885, "loss": 0.1112, "lr": 7.252573238321457e-07, "epoch": 0.18147023888082353, "percentage": 3.63, "elapsed_time": "0:01:31", "remaining_time": "0:40:39", "throughput": 7334.92, "total_tokens": 673920}
|
|
{"current_steps": 1380, "total_steps": 37885, "loss": 0.2556, "lr": 7.278965426233834e-07, "epoch": 0.182130130658572, "percentage": 3.64, "elapsed_time": "0:01:32", "remaining_time": "0:40:39", "throughput": 7335.8, "total_tokens": 676416}
|
|
{"current_steps": 1385, "total_steps": 37885, "loss": 0.1058, "lr": 7.305357614146212e-07, "epoch": 0.18279002243632045, "percentage": 3.66, "elapsed_time": "0:01:32", "remaining_time": "0:40:38", "throughput": 7337.96, "total_tokens": 679040}
|
|
{"current_steps": 1390, "total_steps": 37885, "loss": 0.188, "lr": 7.33174980205859e-07, "epoch": 0.1834499142140689, "percentage": 3.67, "elapsed_time": "0:01:32", "remaining_time": "0:40:38", "throughput": 7342.18, "total_tokens": 681920}
|
|
{"current_steps": 1395, "total_steps": 37885, "loss": 0.0583, "lr": 7.358141989970969e-07, "epoch": 0.18410980599181734, "percentage": 3.68, "elapsed_time": "0:01:33", "remaining_time": "0:40:38", "throughput": 7342.91, "total_tokens": 684416}
|
|
{"current_steps": 1400, "total_steps": 37885, "loss": 0.0136, "lr": 7.384534177883346e-07, "epoch": 0.18476969776956578, "percentage": 3.7, "elapsed_time": "0:01:33", "remaining_time": "0:40:37", "throughput": 7344.02, "total_tokens": 686976}
|
|
{"current_steps": 1405, "total_steps": 37885, "loss": 0.1873, "lr": 7.410926365795724e-07, "epoch": 0.18542958954731423, "percentage": 3.71, "elapsed_time": "0:01:33", "remaining_time": "0:40:37", "throughput": 7343.92, "total_tokens": 689408}
|
|
{"current_steps": 1410, "total_steps": 37885, "loss": 0.0987, "lr": 7.437318553708102e-07, "epoch": 0.1860894813250627, "percentage": 3.72, "elapsed_time": "0:01:34", "remaining_time": "0:40:37", "throughput": 7342.46, "total_tokens": 691712}
|
|
{"current_steps": 1415, "total_steps": 37885, "loss": 0.1017, "lr": 7.463710741620481e-07, "epoch": 0.18674937310281114, "percentage": 3.73, "elapsed_time": "0:01:34", "remaining_time": "0:40:36", "throughput": 7341.59, "total_tokens": 694080}
|
|
{"current_steps": 1420, "total_steps": 37885, "loss": 0.1519, "lr": 7.490102929532857e-07, "epoch": 0.1874092648805596, "percentage": 3.75, "elapsed_time": "0:01:34", "remaining_time": "0:40:36", "throughput": 7342.37, "total_tokens": 696640}
|
|
{"current_steps": 1425, "total_steps": 37885, "loss": 0.0091, "lr": 7.516495117445236e-07, "epoch": 0.18806915665830803, "percentage": 3.76, "elapsed_time": "0:01:35", "remaining_time": "0:40:36", "throughput": 7342.54, "total_tokens": 699136}
|
|
{"current_steps": 1430, "total_steps": 37885, "loss": 0.3917, "lr": 7.542887305357614e-07, "epoch": 0.18872904843605648, "percentage": 3.77, "elapsed_time": "0:01:35", "remaining_time": "0:40:35", "throughput": 7343.38, "total_tokens": 701696}
|
|
{"current_steps": 1435, "total_steps": 37885, "loss": 0.206, "lr": 7.569279493269993e-07, "epoch": 0.18938894021380492, "percentage": 3.79, "elapsed_time": "0:01:35", "remaining_time": "0:40:35", "throughput": 7345.34, "total_tokens": 704384}
|
|
{"current_steps": 1440, "total_steps": 37885, "loss": 0.0719, "lr": 7.595671681182369e-07, "epoch": 0.1900488319915534, "percentage": 3.8, "elapsed_time": "0:01:36", "remaining_time": "0:40:35", "throughput": 7342.64, "total_tokens": 706560}
|
|
{"current_steps": 1445, "total_steps": 37885, "loss": 0.0804, "lr": 7.622063869094748e-07, "epoch": 0.19070872376930184, "percentage": 3.81, "elapsed_time": "0:01:36", "remaining_time": "0:40:35", "throughput": 7344.74, "total_tokens": 709248}
|
|
{"current_steps": 1450, "total_steps": 37885, "loss": 0.1375, "lr": 7.648456057007126e-07, "epoch": 0.19136861554705029, "percentage": 3.83, "elapsed_time": "0:01:36", "remaining_time": "0:40:34", "throughput": 7340.48, "total_tokens": 711296}
|
|
{"current_steps": 1455, "total_steps": 37885, "loss": 0.0051, "lr": 7.674848244919505e-07, "epoch": 0.19202850732479873, "percentage": 3.84, "elapsed_time": "0:01:37", "remaining_time": "0:40:34", "throughput": 7340.22, "total_tokens": 713728}
|
|
{"current_steps": 1460, "total_steps": 37885, "loss": 0.0025, "lr": 7.701240432831881e-07, "epoch": 0.19268839910254718, "percentage": 3.85, "elapsed_time": "0:01:37", "remaining_time": "0:40:34", "throughput": 7340.8, "total_tokens": 716224}
|
|
{"current_steps": 1465, "total_steps": 37885, "loss": 0.3884, "lr": 7.727632620744259e-07, "epoch": 0.19334829088029562, "percentage": 3.87, "elapsed_time": "0:01:37", "remaining_time": "0:40:33", "throughput": 7339.29, "total_tokens": 718528}
|
|
{"current_steps": 1470, "total_steps": 37885, "loss": 0.2433, "lr": 7.754024808656638e-07, "epoch": 0.1940081826580441, "percentage": 3.88, "elapsed_time": "0:01:38", "remaining_time": "0:40:33", "throughput": 7338.84, "total_tokens": 720960}
|
|
{"current_steps": 1475, "total_steps": 37885, "loss": 0.196, "lr": 7.780416996569014e-07, "epoch": 0.19466807443579254, "percentage": 3.89, "elapsed_time": "0:01:38", "remaining_time": "0:40:33", "throughput": 7337.1, "total_tokens": 723200}
|
|
{"current_steps": 1480, "total_steps": 37885, "loss": 0.1681, "lr": 7.806809184481393e-07, "epoch": 0.19532796621354098, "percentage": 3.91, "elapsed_time": "0:01:38", "remaining_time": "0:40:32", "throughput": 7339.69, "total_tokens": 725888}
|
|
{"current_steps": 1485, "total_steps": 37885, "loss": 0.0609, "lr": 7.833201372393771e-07, "epoch": 0.19598785799128943, "percentage": 3.92, "elapsed_time": "0:01:39", "remaining_time": "0:40:32", "throughput": 7339.34, "total_tokens": 728256}
|
|
{"current_steps": 1490, "total_steps": 37885, "loss": 0.1787, "lr": 7.859593560306149e-07, "epoch": 0.19664774976903787, "percentage": 3.93, "elapsed_time": "0:01:39", "remaining_time": "0:40:31", "throughput": 7339.8, "total_tokens": 730688}
|
|
{"current_steps": 1495, "total_steps": 37885, "loss": 0.2156, "lr": 7.885985748218526e-07, "epoch": 0.19730764154678632, "percentage": 3.95, "elapsed_time": "0:01:39", "remaining_time": "0:40:31", "throughput": 7338.74, "total_tokens": 732992}
|
|
{"current_steps": 1500, "total_steps": 37885, "loss": 0.0585, "lr": 7.912377936130905e-07, "epoch": 0.1979675333245348, "percentage": 3.96, "elapsed_time": "0:01:40", "remaining_time": "0:40:30", "throughput": 7338.95, "total_tokens": 735424}
|
|
{"current_steps": 1505, "total_steps": 37885, "loss": 0.1832, "lr": 7.938770124043283e-07, "epoch": 0.19862742510228323, "percentage": 3.97, "elapsed_time": "0:01:40", "remaining_time": "0:40:30", "throughput": 7337.52, "total_tokens": 737664}
|
|
{"current_steps": 1510, "total_steps": 37885, "loss": 0.2169, "lr": 7.965162311955661e-07, "epoch": 0.19928731688003168, "percentage": 3.99, "elapsed_time": "0:01:40", "remaining_time": "0:40:29", "throughput": 7337.77, "total_tokens": 740096}
|
|
{"current_steps": 1515, "total_steps": 37885, "loss": 0.1121, "lr": 7.991554499868038e-07, "epoch": 0.19994720865778012, "percentage": 4.0, "elapsed_time": "0:01:41", "remaining_time": "0:40:29", "throughput": 7341.24, "total_tokens": 742912}
|
|
{"current_steps": 1520, "total_steps": 37885, "loss": 0.1211, "lr": 8.017946687780417e-07, "epoch": 0.20060710043552857, "percentage": 4.01, "elapsed_time": "0:01:41", "remaining_time": "0:40:28", "throughput": 7343.16, "total_tokens": 745536}
|
|
{"current_steps": 1525, "total_steps": 37885, "loss": 0.0182, "lr": 8.044338875692794e-07, "epoch": 0.201266992213277, "percentage": 4.03, "elapsed_time": "0:01:41", "remaining_time": "0:40:28", "throughput": 7343.41, "total_tokens": 747968}
|
|
{"current_steps": 1530, "total_steps": 37885, "loss": 0.1159, "lr": 8.070731063605173e-07, "epoch": 0.20192688399102549, "percentage": 4.04, "elapsed_time": "0:01:42", "remaining_time": "0:40:28", "throughput": 7344.09, "total_tokens": 750464}
|
|
{"current_steps": 1535, "total_steps": 37885, "loss": 0.1005, "lr": 8.09712325151755e-07, "epoch": 0.20258677576877393, "percentage": 4.05, "elapsed_time": "0:01:42", "remaining_time": "0:40:27", "throughput": 7344.41, "total_tokens": 752896}
|
|
{"current_steps": 1540, "total_steps": 37885, "loss": 0.2272, "lr": 8.123515439429929e-07, "epoch": 0.20324666754652237, "percentage": 4.06, "elapsed_time": "0:01:42", "remaining_time": "0:40:27", "throughput": 7347.39, "total_tokens": 755648}
|
|
{"current_steps": 1545, "total_steps": 37885, "loss": 0.2213, "lr": 8.149907627342306e-07, "epoch": 0.20390655932427082, "percentage": 4.08, "elapsed_time": "0:01:43", "remaining_time": "0:40:26", "throughput": 7348.3, "total_tokens": 758144}
|
|
{"current_steps": 1550, "total_steps": 37885, "loss": 0.0083, "lr": 8.176299815254685e-07, "epoch": 0.20456645110201926, "percentage": 4.09, "elapsed_time": "0:01:43", "remaining_time": "0:40:26", "throughput": 7349.8, "total_tokens": 760704}
|
|
{"current_steps": 1555, "total_steps": 37885, "loss": 0.0023, "lr": 8.202692003167062e-07, "epoch": 0.2052263428797677, "percentage": 4.1, "elapsed_time": "0:01:43", "remaining_time": "0:40:25", "throughput": 7350.04, "total_tokens": 763136}
|
|
{"current_steps": 1560, "total_steps": 37885, "loss": 0.1036, "lr": 8.229084191079441e-07, "epoch": 0.20588623465751615, "percentage": 4.12, "elapsed_time": "0:01:44", "remaining_time": "0:40:25", "throughput": 7353.97, "total_tokens": 766016}
|
|
{"current_steps": 1565, "total_steps": 37885, "loss": 0.3823, "lr": 8.255476378991818e-07, "epoch": 0.20654612643526463, "percentage": 4.13, "elapsed_time": "0:01:44", "remaining_time": "0:40:25", "throughput": 7354.63, "total_tokens": 768512}
|
|
{"current_steps": 1570, "total_steps": 37885, "loss": 0.2189, "lr": 8.281868566904196e-07, "epoch": 0.20720601821301307, "percentage": 4.14, "elapsed_time": "0:01:44", "remaining_time": "0:40:24", "throughput": 7353.85, "total_tokens": 770816}
|
|
{"current_steps": 1575, "total_steps": 37885, "loss": 0.2545, "lr": 8.308260754816574e-07, "epoch": 0.20786590999076152, "percentage": 4.16, "elapsed_time": "0:01:45", "remaining_time": "0:40:24", "throughput": 7354.38, "total_tokens": 773312}
|
|
{"current_steps": 1580, "total_steps": 37885, "loss": 0.043, "lr": 8.334652942728951e-07, "epoch": 0.20852580176850996, "percentage": 4.17, "elapsed_time": "0:01:45", "remaining_time": "0:40:23", "throughput": 7354.11, "total_tokens": 775680}
|
|
{"current_steps": 1585, "total_steps": 37885, "loss": 0.1056, "lr": 8.36104513064133e-07, "epoch": 0.2091856935462584, "percentage": 4.18, "elapsed_time": "0:01:45", "remaining_time": "0:40:23", "throughput": 7353.73, "total_tokens": 778048}
|
|
{"current_steps": 1590, "total_steps": 37885, "loss": 0.2807, "lr": 8.387437318553708e-07, "epoch": 0.20984558532400685, "percentage": 4.2, "elapsed_time": "0:01:46", "remaining_time": "0:40:22", "throughput": 7355.52, "total_tokens": 780672}
|
|
{"current_steps": 1595, "total_steps": 37885, "loss": 0.4118, "lr": 8.413829506466085e-07, "epoch": 0.21050547710175532, "percentage": 4.21, "elapsed_time": "0:01:46", "remaining_time": "0:40:22", "throughput": 7357.73, "total_tokens": 783360}
|
|
{"current_steps": 1600, "total_steps": 37885, "loss": 0.1426, "lr": 8.440221694378463e-07, "epoch": 0.21116536887950377, "percentage": 4.22, "elapsed_time": "0:01:46", "remaining_time": "0:40:21", "throughput": 7358.33, "total_tokens": 785856}
|
|
{"current_steps": 1605, "total_steps": 37885, "loss": 0.1497, "lr": 8.466613882290842e-07, "epoch": 0.2118252606572522, "percentage": 4.24, "elapsed_time": "0:01:47", "remaining_time": "0:40:21", "throughput": 7360.16, "total_tokens": 788480}
|
|
{"current_steps": 1610, "total_steps": 37885, "loss": 0.1096, "lr": 8.49300607020322e-07, "epoch": 0.21248515243500066, "percentage": 4.25, "elapsed_time": "0:01:47", "remaining_time": "0:40:21", "throughput": 7361.35, "total_tokens": 791040}
|
|
{"current_steps": 1615, "total_steps": 37885, "loss": 0.0508, "lr": 8.519398258115597e-07, "epoch": 0.2131450442127491, "percentage": 4.26, "elapsed_time": "0:01:47", "remaining_time": "0:40:20", "throughput": 7362.54, "total_tokens": 793600}
|
|
{"current_steps": 1620, "total_steps": 37885, "loss": 0.2171, "lr": 8.545790446027975e-07, "epoch": 0.21380493599049755, "percentage": 4.28, "elapsed_time": "0:01:48", "remaining_time": "0:40:20", "throughput": 7362.36, "total_tokens": 795968}
|
|
{"current_steps": 1625, "total_steps": 37885, "loss": 0.0251, "lr": 8.572182633940354e-07, "epoch": 0.21446482776824602, "percentage": 4.29, "elapsed_time": "0:01:48", "remaining_time": "0:40:19", "throughput": 7361.51, "total_tokens": 798272}
|
|
{"current_steps": 1630, "total_steps": 37885, "loss": 0.1544, "lr": 8.59857482185273e-07, "epoch": 0.21512471954599446, "percentage": 4.3, "elapsed_time": "0:01:48", "remaining_time": "0:40:19", "throughput": 7364.2, "total_tokens": 801024}
|
|
{"current_steps": 1635, "total_steps": 37885, "loss": 0.1345, "lr": 8.624967009765109e-07, "epoch": 0.2157846113237429, "percentage": 4.32, "elapsed_time": "0:01:49", "remaining_time": "0:40:18", "throughput": 7365.38, "total_tokens": 803584}
|
|
{"current_steps": 1640, "total_steps": 37885, "loss": 0.0544, "lr": 8.651359197677487e-07, "epoch": 0.21644450310149135, "percentage": 4.33, "elapsed_time": "0:01:49", "remaining_time": "0:40:18", "throughput": 7365.02, "total_tokens": 805952}
|
|
{"current_steps": 1645, "total_steps": 37885, "loss": 0.0881, "lr": 8.677751385589866e-07, "epoch": 0.2171043948792398, "percentage": 4.34, "elapsed_time": "0:01:49", "remaining_time": "0:40:18", "throughput": 7366.17, "total_tokens": 808512}
|
|
{"current_steps": 1650, "total_steps": 37885, "loss": 0.0636, "lr": 8.704143573502242e-07, "epoch": 0.21776428665698824, "percentage": 4.36, "elapsed_time": "0:01:50", "remaining_time": "0:40:17", "throughput": 7367.82, "total_tokens": 811136}
|
|
{"current_steps": 1655, "total_steps": 37885, "loss": 0.0526, "lr": 8.730535761414621e-07, "epoch": 0.21842417843473672, "percentage": 4.37, "elapsed_time": "0:01:50", "remaining_time": "0:40:17", "throughput": 7366.26, "total_tokens": 813376}
|
|
{"current_steps": 1660, "total_steps": 37885, "loss": 0.2304, "lr": 8.756927949326999e-07, "epoch": 0.21908407021248516, "percentage": 4.38, "elapsed_time": "0:01:50", "remaining_time": "0:40:16", "throughput": 7366.75, "total_tokens": 815872}
|
|
{"current_steps": 1665, "total_steps": 37885, "loss": 0.0414, "lr": 8.783320137239377e-07, "epoch": 0.2197439619902336, "percentage": 4.39, "elapsed_time": "0:01:51", "remaining_time": "0:40:16", "throughput": 7366.27, "total_tokens": 818240}
|
|
{"current_steps": 1670, "total_steps": 37885, "loss": 0.108, "lr": 8.809712325151754e-07, "epoch": 0.22040385376798205, "percentage": 4.41, "elapsed_time": "0:01:51", "remaining_time": "0:40:15", "throughput": 7366.96, "total_tokens": 820736}
|
|
{"current_steps": 1675, "total_steps": 37885, "loss": 0.3837, "lr": 8.836104513064132e-07, "epoch": 0.2210637455457305, "percentage": 4.42, "elapsed_time": "0:01:51", "remaining_time": "0:40:15", "throughput": 7370.53, "total_tokens": 823616}
|
|
{"current_steps": 1680, "total_steps": 37885, "loss": 0.1827, "lr": 8.862496700976511e-07, "epoch": 0.22172363732347894, "percentage": 4.43, "elapsed_time": "0:01:52", "remaining_time": "0:40:15", "throughput": 7371.07, "total_tokens": 826112}
|
|
{"current_steps": 1685, "total_steps": 37885, "loss": 0.1026, "lr": 8.888888888888888e-07, "epoch": 0.2223835291012274, "percentage": 4.45, "elapsed_time": "0:01:52", "remaining_time": "0:40:14", "throughput": 7372.2, "total_tokens": 828672}
|
|
{"current_steps": 1690, "total_steps": 37885, "loss": 0.1787, "lr": 8.915281076801266e-07, "epoch": 0.22304342087897586, "percentage": 4.46, "elapsed_time": "0:01:52", "remaining_time": "0:40:14", "throughput": 7374.02, "total_tokens": 831296}
|
|
{"current_steps": 1695, "total_steps": 37885, "loss": 0.0912, "lr": 8.941673264713644e-07, "epoch": 0.2237033126567243, "percentage": 4.47, "elapsed_time": "0:01:53", "remaining_time": "0:40:14", "throughput": 7375.11, "total_tokens": 833856}
|
|
{"current_steps": 1700, "total_steps": 37885, "loss": 0.2237, "lr": 8.968065452626022e-07, "epoch": 0.22436320443447275, "percentage": 4.49, "elapsed_time": "0:01:53", "remaining_time": "0:40:13", "throughput": 7376.72, "total_tokens": 836480}
|
|
{"current_steps": 1705, "total_steps": 37885, "loss": 0.0627, "lr": 8.9944576405384e-07, "epoch": 0.2250230962122212, "percentage": 4.5, "elapsed_time": "0:01:53", "remaining_time": "0:40:13", "throughput": 7377.07, "total_tokens": 838976}
|
|
{"current_steps": 1710, "total_steps": 37885, "loss": 0.1252, "lr": 9.020849828450778e-07, "epoch": 0.22568298798996964, "percentage": 4.51, "elapsed_time": "0:01:54", "remaining_time": "0:40:12", "throughput": 7379.66, "total_tokens": 841728}
|
|
{"current_steps": 1715, "total_steps": 37885, "loss": 0.0499, "lr": 9.047242016363156e-07, "epoch": 0.22634287976771808, "percentage": 4.53, "elapsed_time": "0:01:54", "remaining_time": "0:40:12", "throughput": 7378.26, "total_tokens": 843968}
|
|
{"current_steps": 1720, "total_steps": 37885, "loss": 0.4636, "lr": 9.073634204275534e-07, "epoch": 0.22700277154546655, "percentage": 4.54, "elapsed_time": "0:01:54", "remaining_time": "0:40:11", "throughput": 7378.95, "total_tokens": 846464}
|
|
{"current_steps": 1725, "total_steps": 37885, "loss": 0.2002, "lr": 9.100026392187912e-07, "epoch": 0.227662663323215, "percentage": 4.55, "elapsed_time": "0:01:55", "remaining_time": "0:40:11", "throughput": 7380.57, "total_tokens": 849088}
|
|
{"current_steps": 1730, "total_steps": 37885, "loss": 0.0052, "lr": 9.12641858010029e-07, "epoch": 0.22832255510096344, "percentage": 4.57, "elapsed_time": "0:01:55", "remaining_time": "0:40:11", "throughput": 7382.2, "total_tokens": 851712}
|
|
{"current_steps": 1735, "total_steps": 37885, "loss": 0.156, "lr": 9.152810768012667e-07, "epoch": 0.2289824468787119, "percentage": 4.58, "elapsed_time": "0:01:55", "remaining_time": "0:40:10", "throughput": 7382.69, "total_tokens": 854208}
|
|
{"current_steps": 1740, "total_steps": 37885, "loss": 0.0557, "lr": 9.179202955925046e-07, "epoch": 0.22964233865646033, "percentage": 4.59, "elapsed_time": "0:01:56", "remaining_time": "0:40:10", "throughput": 7382.05, "total_tokens": 856576}
|
|
{"current_steps": 1745, "total_steps": 37885, "loss": 0.2487, "lr": 9.205595143837424e-07, "epoch": 0.23030223043420878, "percentage": 4.61, "elapsed_time": "0:01:56", "remaining_time": "0:40:09", "throughput": 7382.31, "total_tokens": 859008}
|
|
{"current_steps": 1750, "total_steps": 37885, "loss": 0.1679, "lr": 9.231987331749802e-07, "epoch": 0.23096212221195725, "percentage": 4.62, "elapsed_time": "0:01:56", "remaining_time": "0:40:09", "throughput": 7382.45, "total_tokens": 861440}
|
|
{"current_steps": 1755, "total_steps": 37885, "loss": 0.1502, "lr": 9.258379519662179e-07, "epoch": 0.2316220139897057, "percentage": 4.63, "elapsed_time": "0:01:57", "remaining_time": "0:40:09", "throughput": 7382.9, "total_tokens": 863936}
|
|
{"current_steps": 1760, "total_steps": 37885, "loss": 0.1127, "lr": 9.284771707574558e-07, "epoch": 0.23228190576745414, "percentage": 4.65, "elapsed_time": "0:01:57", "remaining_time": "0:40:08", "throughput": 7381.5, "total_tokens": 866176}
|
|
{"current_steps": 1765, "total_steps": 37885, "loss": 0.1574, "lr": 9.311163895486936e-07, "epoch": 0.23294179754520258, "percentage": 4.66, "elapsed_time": "0:01:57", "remaining_time": "0:40:08", "throughput": 7380.78, "total_tokens": 868480}
|
|
{"current_steps": 1770, "total_steps": 37885, "loss": 0.1025, "lr": 9.337556083399313e-07, "epoch": 0.23360168932295103, "percentage": 4.67, "elapsed_time": "0:01:57", "remaining_time": "0:40:07", "throughput": 7381.26, "total_tokens": 870976}
|
|
{"current_steps": 1775, "total_steps": 37885, "loss": 0.2237, "lr": 9.363948271311691e-07, "epoch": 0.23426158110069947, "percentage": 4.69, "elapsed_time": "0:01:58", "remaining_time": "0:40:07", "throughput": 7378.9, "total_tokens": 873088}
|
|
{"current_steps": 1780, "total_steps": 37885, "loss": 0.2408, "lr": 9.390340459224069e-07, "epoch": 0.23492147287844795, "percentage": 4.7, "elapsed_time": "0:01:58", "remaining_time": "0:40:06", "throughput": 7378.94, "total_tokens": 875520}
|
|
{"current_steps": 1785, "total_steps": 37885, "loss": 0.0166, "lr": 9.416732647136448e-07, "epoch": 0.2355813646561964, "percentage": 4.71, "elapsed_time": "0:01:58", "remaining_time": "0:40:06", "throughput": 7376.61, "total_tokens": 877632}
|
|
{"current_steps": 1790, "total_steps": 37885, "loss": 0.0702, "lr": 9.443124835048824e-07, "epoch": 0.23624125643394484, "percentage": 4.72, "elapsed_time": "0:01:59", "remaining_time": "0:40:05", "throughput": 7376.23, "total_tokens": 880000}
|
|
{"current_steps": 1795, "total_steps": 37885, "loss": 0.0669, "lr": 9.469517022961203e-07, "epoch": 0.23690114821169328, "percentage": 4.74, "elapsed_time": "0:01:59", "remaining_time": "0:40:05", "throughput": 7374.39, "total_tokens": 882176}
|
|
{"current_steps": 1800, "total_steps": 37885, "loss": 0.0867, "lr": 9.495909210873581e-07, "epoch": 0.23756103998944172, "percentage": 4.75, "elapsed_time": "0:01:59", "remaining_time": "0:40:04", "throughput": 7376.03, "total_tokens": 884800}
|
|
{"current_steps": 1805, "total_steps": 37885, "loss": 0.0113, "lr": 9.522301398785959e-07, "epoch": 0.23822093176719017, "percentage": 4.76, "elapsed_time": "0:02:00", "remaining_time": "0:40:04", "throughput": 7375.09, "total_tokens": 887104}
|
|
{"current_steps": 1810, "total_steps": 37885, "loss": 0.043, "lr": 9.548693586698336e-07, "epoch": 0.23888082354493864, "percentage": 4.78, "elapsed_time": "0:02:00", "remaining_time": "0:40:04", "throughput": 7373.84, "total_tokens": 889408}
|
|
{"current_steps": 1815, "total_steps": 37885, "loss": 0.2031, "lr": 9.575085774610714e-07, "epoch": 0.2395407153226871, "percentage": 4.79, "elapsed_time": "0:02:00", "remaining_time": "0:40:03", "throughput": 7372.42, "total_tokens": 891648}
|
|
{"current_steps": 1820, "total_steps": 37885, "loss": 0.2171, "lr": 9.601477962523092e-07, "epoch": 0.24020060710043553, "percentage": 4.8, "elapsed_time": "0:02:01", "remaining_time": "0:40:03", "throughput": 7373.36, "total_tokens": 894208}
|
|
{"current_steps": 1825, "total_steps": 37885, "loss": 0.1157, "lr": 9.627870150435472e-07, "epoch": 0.24086049887818398, "percentage": 4.82, "elapsed_time": "0:02:01", "remaining_time": "0:40:02", "throughput": 7374.06, "total_tokens": 896704}
|
|
{"current_steps": 1830, "total_steps": 37885, "loss": 0.045, "lr": 9.65426233834785e-07, "epoch": 0.24152039065593242, "percentage": 4.83, "elapsed_time": "0:02:01", "remaining_time": "0:40:02", "throughput": 7375.09, "total_tokens": 899264}
|
|
{"current_steps": 1835, "total_steps": 37885, "loss": 0.0719, "lr": 9.680654526260227e-07, "epoch": 0.24218028243368087, "percentage": 4.84, "elapsed_time": "0:02:02", "remaining_time": "0:40:01", "throughput": 7375.55, "total_tokens": 901760}
|
|
{"current_steps": 1840, "total_steps": 37885, "loss": 0.1597, "lr": 9.707046714172605e-07, "epoch": 0.24284017421142934, "percentage": 4.86, "elapsed_time": "0:02:02", "remaining_time": "0:40:01", "throughput": 7373.34, "total_tokens": 903872}
|
|
{"current_steps": 1845, "total_steps": 37885, "loss": 0.0005, "lr": 9.733438902084983e-07, "epoch": 0.24350006598917778, "percentage": 4.87, "elapsed_time": "0:02:02", "remaining_time": "0:40:01", "throughput": 7373.83, "total_tokens": 906368}
|
|
{"current_steps": 1850, "total_steps": 37885, "loss": 0.0012, "lr": 9.75983108999736e-07, "epoch": 0.24415995776692623, "percentage": 4.88, "elapsed_time": "0:02:03", "remaining_time": "0:40:00", "throughput": 7374.51, "total_tokens": 908864}
|
|
{"current_steps": 1855, "total_steps": 37885, "loss": 0.169, "lr": 9.786223277909738e-07, "epoch": 0.24481984954467467, "percentage": 4.9, "elapsed_time": "0:02:03", "remaining_time": "0:40:00", "throughput": 7372.81, "total_tokens": 911040}
|
|
{"current_steps": 1860, "total_steps": 37885, "loss": 0.2041, "lr": 9.812615465822116e-07, "epoch": 0.24547974132242312, "percentage": 4.91, "elapsed_time": "0:02:03", "remaining_time": "0:39:59", "throughput": 7372.44, "total_tokens": 913408}
|
|
{"current_steps": 1865, "total_steps": 37885, "loss": 0.4034, "lr": 9.839007653734496e-07, "epoch": 0.24613963310017156, "percentage": 4.92, "elapsed_time": "0:02:04", "remaining_time": "0:39:59", "throughput": 7373.28, "total_tokens": 915968}
|
|
{"current_steps": 1870, "total_steps": 37885, "loss": 0.1269, "lr": 9.865399841646871e-07, "epoch": 0.24679952487792003, "percentage": 4.94, "elapsed_time": "0:02:04", "remaining_time": "0:39:58", "throughput": 7374.43, "total_tokens": 918528}
|
|
{"current_steps": 1875, "total_steps": 37885, "loss": 0.0006, "lr": 9.89179202955925e-07, "epoch": 0.24745941665566848, "percentage": 4.95, "elapsed_time": "0:02:04", "remaining_time": "0:39:58", "throughput": 7375.94, "total_tokens": 921152}
|
|
{"current_steps": 1880, "total_steps": 37885, "loss": 0.0005, "lr": 9.918184217471629e-07, "epoch": 0.24811930843341692, "percentage": 4.96, "elapsed_time": "0:02:05", "remaining_time": "0:39:58", "throughput": 7375.54, "total_tokens": 923520}
|
|
{"current_steps": 1885, "total_steps": 37885, "loss": 0.1003, "lr": 9.944576405384004e-07, "epoch": 0.24877920021116537, "percentage": 4.98, "elapsed_time": "0:02:05", "remaining_time": "0:39:57", "throughput": 7375.19, "total_tokens": 925888}
|
|
{"current_steps": 1890, "total_steps": 37885, "loss": 0.1144, "lr": 9.970968593296384e-07, "epoch": 0.2494390919889138, "percentage": 4.99, "elapsed_time": "0:02:05", "remaining_time": "0:39:57", "throughput": 7377.87, "total_tokens": 928704}
|
|
{"current_steps": 1895, "total_steps": 37885, "loss": 0.4074, "lr": 9.997360781208762e-07, "epoch": 0.2500989837666623, "percentage": 5.0, "elapsed_time": "0:02:06", "remaining_time": "0:39:56", "throughput": 7376.53, "total_tokens": 930944}
|
|
{"current_steps": 1895, "total_steps": 37885, "eval_loss": 0.15521390736103058, "epoch": 0.2500989837666623, "percentage": 5.0, "elapsed_time": "0:02:14", "remaining_time": "0:42:26", "throughput": 6943.21, "total_tokens": 930944}
|
|
{"current_steps": 1900, "total_steps": 37885, "loss": 0.2799, "lr": 1.002375296912114e-06, "epoch": 0.25075887554441073, "percentage": 5.02, "elapsed_time": "0:02:46", "remaining_time": "0:52:26", "throughput": 5617.74, "total_tokens": 933376}
|
|
{"current_steps": 1905, "total_steps": 37885, "loss": 0.151, "lr": 1.0050145157033517e-06, "epoch": 0.2514187673221592, "percentage": 5.03, "elapsed_time": "0:02:46", "remaining_time": "0:52:24", "throughput": 5622.32, "total_tokens": 936000}
|
|
{"current_steps": 1910, "total_steps": 37885, "loss": 0.226, "lr": 1.0076537344945895e-06, "epoch": 0.2520786590999076, "percentage": 5.04, "elapsed_time": "0:02:46", "remaining_time": "0:52:21", "throughput": 5625.78, "total_tokens": 938432}
|
|
{"current_steps": 1915, "total_steps": 37885, "loss": 0.1408, "lr": 1.0102929532858273e-06, "epoch": 0.25273855087765607, "percentage": 5.05, "elapsed_time": "0:02:47", "remaining_time": "0:52:19", "throughput": 5631.61, "total_tokens": 941312}
|
|
{"current_steps": 1920, "total_steps": 37885, "loss": 0.0428, "lr": 1.012932172077065e-06, "epoch": 0.2533984426554045, "percentage": 5.07, "elapsed_time": "0:02:47", "remaining_time": "0:52:17", "throughput": 5633.62, "total_tokens": 943488}
|
|
{"current_steps": 1925, "total_steps": 37885, "loss": 0.1021, "lr": 1.015571390868303e-06, "epoch": 0.25405833443315295, "percentage": 5.08, "elapsed_time": "0:02:47", "remaining_time": "0:52:14", "throughput": 5636.77, "total_tokens": 945856}
|
|
{"current_steps": 1930, "total_steps": 37885, "loss": 0.1363, "lr": 1.0182106096595406e-06, "epoch": 0.2547182262109014, "percentage": 5.09, "elapsed_time": "0:02:48", "remaining_time": "0:52:12", "throughput": 5640.46, "total_tokens": 948352}
|
|
{"current_steps": 1935, "total_steps": 37885, "loss": 0.0973, "lr": 1.0208498284507786e-06, "epoch": 0.25537811798864984, "percentage": 5.11, "elapsed_time": "0:02:48", "remaining_time": "0:52:09", "throughput": 5644.95, "total_tokens": 950976}
|
|
{"current_steps": 1940, "total_steps": 37885, "loss": 0.1607, "lr": 1.0234890472420164e-06, "epoch": 0.2560380097663983, "percentage": 5.12, "elapsed_time": "0:02:48", "remaining_time": "0:52:07", "throughput": 5647.18, "total_tokens": 953216}
|
|
{"current_steps": 1945, "total_steps": 37885, "loss": 0.015, "lr": 1.0261282660332541e-06, "epoch": 0.25669790154414673, "percentage": 5.13, "elapsed_time": "0:02:49", "remaining_time": "0:52:05", "throughput": 5650.55, "total_tokens": 955648}
|
|
{"current_steps": 1950, "total_steps": 37885, "loss": 0.0037, "lr": 1.028767484824492e-06, "epoch": 0.25735779332189523, "percentage": 5.15, "elapsed_time": "0:02:49", "remaining_time": "0:52:02", "throughput": 5652.84, "total_tokens": 957888}
|
|
{"current_steps": 1955, "total_steps": 37885, "loss": 0.1865, "lr": 1.0314067036157297e-06, "epoch": 0.2580176850996437, "percentage": 5.16, "elapsed_time": "0:02:49", "remaining_time": "0:52:00", "throughput": 5655.07, "total_tokens": 960128}
|
|
{"current_steps": 1960, "total_steps": 37885, "loss": 0.0348, "lr": 1.0340459224069675e-06, "epoch": 0.2586775768773921, "percentage": 5.17, "elapsed_time": "0:02:50", "remaining_time": "0:51:57", "throughput": 5658.05, "total_tokens": 962496}
|
|
{"current_steps": 1965, "total_steps": 37885, "loss": 0.3992, "lr": 1.0366851411982054e-06, "epoch": 0.25933746865514057, "percentage": 5.19, "elapsed_time": "0:02:50", "remaining_time": "0:51:55", "throughput": 5662.43, "total_tokens": 965120}
|
|
{"current_steps": 1970, "total_steps": 37885, "loss": 0.007, "lr": 1.039324359989443e-06, "epoch": 0.259997360432889, "percentage": 5.2, "elapsed_time": "0:02:50", "remaining_time": "0:51:53", "throughput": 5665.97, "total_tokens": 967616}
|
|
{"current_steps": 1975, "total_steps": 37885, "loss": 0.1056, "lr": 1.0419635787806808e-06, "epoch": 0.26065725221063746, "percentage": 5.21, "elapsed_time": "0:02:51", "remaining_time": "0:51:51", "throughput": 5670.26, "total_tokens": 970240}
|
|
{"current_steps": 1980, "total_steps": 37885, "loss": 0.2734, "lr": 1.0446027975719188e-06, "epoch": 0.2613171439883859, "percentage": 5.23, "elapsed_time": "0:02:51", "remaining_time": "0:51:48", "throughput": 5672.76, "total_tokens": 972544}
|
|
{"current_steps": 1985, "total_steps": 37885, "loss": 0.2381, "lr": 1.0472420163631565e-06, "epoch": 0.26197703576613435, "percentage": 5.24, "elapsed_time": "0:02:51", "remaining_time": "0:51:46", "throughput": 5675.73, "total_tokens": 974912}
|
|
{"current_steps": 1990, "total_steps": 37885, "loss": 0.4659, "lr": 1.049881235154394e-06, "epoch": 0.2626369275438828, "percentage": 5.25, "elapsed_time": "0:02:52", "remaining_time": "0:51:44", "throughput": 5677.57, "total_tokens": 977088}
|
|
{"current_steps": 1995, "total_steps": 37885, "loss": 0.241, "lr": 1.052520453945632e-06, "epoch": 0.26329681932163124, "percentage": 5.27, "elapsed_time": "0:02:52", "remaining_time": "0:51:41", "throughput": 5681.53, "total_tokens": 979648}
|
|
{"current_steps": 2000, "total_steps": 37885, "loss": 0.0961, "lr": 1.0551596727368699e-06, "epoch": 0.2639567110993797, "percentage": 5.28, "elapsed_time": "0:02:52", "remaining_time": "0:51:39", "throughput": 5686.03, "total_tokens": 982336}
|
|
{"current_steps": 2005, "total_steps": 37885, "loss": 0.0121, "lr": 1.0577988915281074e-06, "epoch": 0.2646166028771281, "percentage": 5.29, "elapsed_time": "0:02:53", "remaining_time": "0:51:37", "throughput": 5689.18, "total_tokens": 984768}
|
|
{"current_steps": 2010, "total_steps": 37885, "loss": 0.0467, "lr": 1.0604381103193454e-06, "epoch": 0.2652764946548766, "percentage": 5.31, "elapsed_time": "0:02:53", "remaining_time": "0:51:35", "throughput": 5692.08, "total_tokens": 987136}
|
|
{"current_steps": 2015, "total_steps": 37885, "loss": 0.142, "lr": 1.0630773291105832e-06, "epoch": 0.26593638643262507, "percentage": 5.32, "elapsed_time": "0:02:53", "remaining_time": "0:51:33", "throughput": 5696.56, "total_tokens": 989824}
|
|
{"current_steps": 2020, "total_steps": 37885, "loss": 0.2781, "lr": 1.0657165479018212e-06, "epoch": 0.2665962782103735, "percentage": 5.33, "elapsed_time": "0:02:54", "remaining_time": "0:51:30", "throughput": 5698.73, "total_tokens": 992064}
|
|
{"current_steps": 2025, "total_steps": 37885, "loss": 0.2715, "lr": 1.0683557666930587e-06, "epoch": 0.26725616998812196, "percentage": 5.35, "elapsed_time": "0:02:54", "remaining_time": "0:51:28", "throughput": 5701.16, "total_tokens": 994368}
|
|
{"current_steps": 2030, "total_steps": 37885, "loss": 0.1645, "lr": 1.0709949854842965e-06, "epoch": 0.2679160617658704, "percentage": 5.36, "elapsed_time": "0:02:54", "remaining_time": "0:51:26", "throughput": 5704.63, "total_tokens": 996864}
|
|
{"current_steps": 2035, "total_steps": 37885, "loss": 0.1714, "lr": 1.0736342042755345e-06, "epoch": 0.26857595354361885, "percentage": 5.37, "elapsed_time": "0:02:55", "remaining_time": "0:51:24", "throughput": 5708.22, "total_tokens": 999360}
|
|
{"current_steps": 2040, "total_steps": 37885, "loss": 0.0919, "lr": 1.0762734230667723e-06, "epoch": 0.2692358453213673, "percentage": 5.38, "elapsed_time": "0:02:55", "remaining_time": "0:51:22", "throughput": 5712.06, "total_tokens": 1001920}
|
|
{"current_steps": 2045, "total_steps": 37885, "loss": 0.0063, "lr": 1.0789126418580098e-06, "epoch": 0.26989573709911574, "percentage": 5.4, "elapsed_time": "0:02:55", "remaining_time": "0:51:19", "throughput": 5714.47, "total_tokens": 1004224}
|
|
{"current_steps": 2050, "total_steps": 37885, "loss": 0.0012, "lr": 1.0815518606492478e-06, "epoch": 0.2705556288768642, "percentage": 5.41, "elapsed_time": "0:02:56", "remaining_time": "0:51:17", "throughput": 5716.8, "total_tokens": 1006528}
|
|
{"current_steps": 2055, "total_steps": 37885, "loss": 0.2114, "lr": 1.0841910794404856e-06, "epoch": 0.27121552065461263, "percentage": 5.42, "elapsed_time": "0:02:56", "remaining_time": "0:51:15", "throughput": 5719.57, "total_tokens": 1008896}
|
|
{"current_steps": 2060, "total_steps": 37885, "loss": 0.1478, "lr": 1.0868302982317234e-06, "epoch": 0.2718754124323611, "percentage": 5.44, "elapsed_time": "0:02:56", "remaining_time": "0:51:13", "throughput": 5724.28, "total_tokens": 1011648}
|
|
{"current_steps": 2065, "total_steps": 37885, "loss": 0.0509, "lr": 1.0894695170229611e-06, "epoch": 0.2725353042101095, "percentage": 5.45, "elapsed_time": "0:02:57", "remaining_time": "0:51:11", "throughput": 5728.02, "total_tokens": 1014208}
|
|
{"current_steps": 2070, "total_steps": 37885, "loss": 0.105, "lr": 1.092108735814199e-06, "epoch": 0.27319519598785796, "percentage": 5.46, "elapsed_time": "0:02:57", "remaining_time": "0:51:09", "throughput": 5730.96, "total_tokens": 1016640}
|
|
{"current_steps": 2075, "total_steps": 37885, "loss": 0.0663, "lr": 1.0947479546054369e-06, "epoch": 0.27385508776560646, "percentage": 5.48, "elapsed_time": "0:02:57", "remaining_time": "0:51:09", "throughput": 5731.14, "total_tokens": 1019328}
|
|
{"current_steps": 2080, "total_steps": 37885, "loss": 0.0865, "lr": 1.0973871733966747e-06, "epoch": 0.2745149795433549, "percentage": 5.49, "elapsed_time": "0:02:58", "remaining_time": "0:51:07", "throughput": 5733.7, "total_tokens": 1021696}
|
|
{"current_steps": 2085, "total_steps": 37885, "loss": 0.2014, "lr": 1.1000263921879122e-06, "epoch": 0.27517487132110335, "percentage": 5.5, "elapsed_time": "0:02:58", "remaining_time": "0:51:05", "throughput": 5737.41, "total_tokens": 1024256}
|
|
{"current_steps": 2090, "total_steps": 37885, "loss": 0.1935, "lr": 1.1026656109791502e-06, "epoch": 0.2758347630988518, "percentage": 5.52, "elapsed_time": "0:02:58", "remaining_time": "0:51:03", "throughput": 5739.7, "total_tokens": 1026560}
|
|
{"current_steps": 2095, "total_steps": 37885, "loss": 0.2346, "lr": 1.105304829770388e-06, "epoch": 0.27649465487660024, "percentage": 5.53, "elapsed_time": "0:02:59", "remaining_time": "0:51:01", "throughput": 5743.56, "total_tokens": 1029184}
|
|
{"current_steps": 2100, "total_steps": 37885, "loss": 0.0947, "lr": 1.1079440485616255e-06, "epoch": 0.2771545466543487, "percentage": 5.54, "elapsed_time": "0:02:59", "remaining_time": "0:50:59", "throughput": 5747.28, "total_tokens": 1031744}
|
|
{"current_steps": 2105, "total_steps": 37885, "loss": 0.1724, "lr": 1.1105832673528635e-06, "epoch": 0.27781443843209713, "percentage": 5.56, "elapsed_time": "0:02:59", "remaining_time": "0:50:56", "throughput": 5749.71, "total_tokens": 1034048}
|
|
{"current_steps": 2110, "total_steps": 37885, "loss": 0.0564, "lr": 1.1132224861441013e-06, "epoch": 0.2784743302098456, "percentage": 5.57, "elapsed_time": "0:03:00", "remaining_time": "0:50:54", "throughput": 5753.09, "total_tokens": 1036544}
|
|
{"current_steps": 2115, "total_steps": 37885, "loss": 0.2777, "lr": 1.115861704935339e-06, "epoch": 0.279134221987594, "percentage": 5.58, "elapsed_time": "0:03:00", "remaining_time": "0:50:52", "throughput": 5755.36, "total_tokens": 1038848}
|
|
{"current_steps": 2120, "total_steps": 37885, "loss": 0.1166, "lr": 1.1185009237265768e-06, "epoch": 0.27979411376534247, "percentage": 5.6, "elapsed_time": "0:03:00", "remaining_time": "0:50:50", "throughput": 5757.76, "total_tokens": 1041152}
|
|
{"current_steps": 2125, "total_steps": 37885, "loss": 0.1476, "lr": 1.1211401425178146e-06, "epoch": 0.2804540055430909, "percentage": 5.61, "elapsed_time": "0:03:01", "remaining_time": "0:50:48", "throughput": 5762.62, "total_tokens": 1043968}
|
|
{"current_steps": 2130, "total_steps": 37885, "loss": 0.2938, "lr": 1.1237793613090524e-06, "epoch": 0.28111389732083936, "percentage": 5.62, "elapsed_time": "0:03:01", "remaining_time": "0:50:46", "throughput": 5764.95, "total_tokens": 1046272}
|
|
{"current_steps": 2135, "total_steps": 37885, "loss": 0.1254, "lr": 1.1264185801002904e-06, "epoch": 0.28177378909858786, "percentage": 5.64, "elapsed_time": "0:03:01", "remaining_time": "0:50:44", "throughput": 5766.56, "total_tokens": 1048448}
|
|
{"current_steps": 2140, "total_steps": 37885, "loss": 0.2135, "lr": 1.129057798891528e-06, "epoch": 0.2824336808763363, "percentage": 5.65, "elapsed_time": "0:03:02", "remaining_time": "0:50:42", "throughput": 5770.44, "total_tokens": 1051072}
|
|
{"current_steps": 2145, "total_steps": 37885, "loss": 0.0968, "lr": 1.131697017682766e-06, "epoch": 0.28309357265408475, "percentage": 5.66, "elapsed_time": "0:03:02", "remaining_time": "0:50:40", "throughput": 5772.43, "total_tokens": 1053312}
|
|
{"current_steps": 2150, "total_steps": 37885, "loss": 0.1169, "lr": 1.1343362364740037e-06, "epoch": 0.2837534644318332, "percentage": 5.68, "elapsed_time": "0:03:02", "remaining_time": "0:50:38", "throughput": 5775.05, "total_tokens": 1055680}
|
|
{"current_steps": 2155, "total_steps": 37885, "loss": 0.2016, "lr": 1.1369754552652415e-06, "epoch": 0.28441335620958164, "percentage": 5.69, "elapsed_time": "0:03:03", "remaining_time": "0:50:36", "throughput": 5777.31, "total_tokens": 1057984}
|
|
{"current_steps": 2160, "total_steps": 37885, "loss": 0.1111, "lr": 1.1396146740564792e-06, "epoch": 0.2850732479873301, "percentage": 5.7, "elapsed_time": "0:03:03", "remaining_time": "0:50:34", "throughput": 5781.74, "total_tokens": 1060736}
|
|
{"current_steps": 2165, "total_steps": 37885, "loss": 0.1279, "lr": 1.142253892847717e-06, "epoch": 0.2857331397650785, "percentage": 5.71, "elapsed_time": "0:03:03", "remaining_time": "0:50:32", "throughput": 5785.72, "total_tokens": 1063424}
|
|
{"current_steps": 2170, "total_steps": 37885, "loss": 0.0038, "lr": 1.1448931116389548e-06, "epoch": 0.28639303154282697, "percentage": 5.73, "elapsed_time": "0:03:04", "remaining_time": "0:50:30", "throughput": 5787.78, "total_tokens": 1065728}
|
|
{"current_steps": 2175, "total_steps": 37885, "loss": 0.0007, "lr": 1.1475323304301928e-06, "epoch": 0.2870529233205754, "percentage": 5.74, "elapsed_time": "0:03:04", "remaining_time": "0:50:28", "throughput": 5790.65, "total_tokens": 1068160}
|
|
{"current_steps": 2180, "total_steps": 37885, "loss": 0.2872, "lr": 1.1501715492214303e-06, "epoch": 0.28771281509832386, "percentage": 5.75, "elapsed_time": "0:03:04", "remaining_time": "0:50:26", "throughput": 5793.37, "total_tokens": 1070592}
|
|
{"current_steps": 2185, "total_steps": 37885, "loss": 0.0978, "lr": 1.1528107680126681e-06, "epoch": 0.2883727068760723, "percentage": 5.77, "elapsed_time": "0:03:05", "remaining_time": "0:50:24", "throughput": 5797.41, "total_tokens": 1073280}
|
|
{"current_steps": 2190, "total_steps": 37885, "loss": 0.1237, "lr": 1.155449986803906e-06, "epoch": 0.28903259865382075, "percentage": 5.78, "elapsed_time": "0:03:05", "remaining_time": "0:50:22", "throughput": 5799.62, "total_tokens": 1075584}
|
|
{"current_steps": 2195, "total_steps": 37885, "loss": 0.1684, "lr": 1.1580892055951439e-06, "epoch": 0.28969249043156925, "percentage": 5.79, "elapsed_time": "0:03:05", "remaining_time": "0:50:20", "throughput": 5802.35, "total_tokens": 1078016}
|
|
{"current_steps": 2200, "total_steps": 37885, "loss": 0.1865, "lr": 1.1607284243863814e-06, "epoch": 0.2903523822093177, "percentage": 5.81, "elapsed_time": "0:03:06", "remaining_time": "0:50:18", "throughput": 5805.41, "total_tokens": 1080512}
|
|
{"current_steps": 2205, "total_steps": 37885, "loss": 0.169, "lr": 1.1633676431776194e-06, "epoch": 0.29101227398706614, "percentage": 5.82, "elapsed_time": "0:03:06", "remaining_time": "0:50:17", "throughput": 5807.16, "total_tokens": 1082752}
|
|
{"current_steps": 2210, "total_steps": 37885, "loss": 0.3035, "lr": 1.1660068619688572e-06, "epoch": 0.2916721657648146, "percentage": 5.83, "elapsed_time": "0:03:06", "remaining_time": "0:50:15", "throughput": 5809.86, "total_tokens": 1085184}
|
|
{"current_steps": 2215, "total_steps": 37885, "loss": 0.212, "lr": 1.1686460807600947e-06, "epoch": 0.29233205754256303, "percentage": 5.85, "elapsed_time": "0:03:07", "remaining_time": "0:50:13", "throughput": 5811.28, "total_tokens": 1087360}
|
|
{"current_steps": 2220, "total_steps": 37885, "loss": 0.0484, "lr": 1.1712852995513327e-06, "epoch": 0.2929919493203115, "percentage": 5.86, "elapsed_time": "0:03:07", "remaining_time": "0:50:11", "throughput": 5814.75, "total_tokens": 1089984}
|
|
{"current_steps": 2225, "total_steps": 37885, "loss": 0.0607, "lr": 1.1739245183425705e-06, "epoch": 0.2936518410980599, "percentage": 5.87, "elapsed_time": "0:03:07", "remaining_time": "0:50:09", "throughput": 5818.29, "total_tokens": 1092608}
|
|
{"current_steps": 2230, "total_steps": 37885, "loss": 0.101, "lr": 1.1765637371338085e-06, "epoch": 0.29431173287580836, "percentage": 5.89, "elapsed_time": "0:03:08", "remaining_time": "0:50:07", "throughput": 5820.87, "total_tokens": 1095040}
|
|
{"current_steps": 2235, "total_steps": 37885, "loss": 0.1061, "lr": 1.179202955925046e-06, "epoch": 0.2949716246535568, "percentage": 5.9, "elapsed_time": "0:03:08", "remaining_time": "0:50:06", "throughput": 5824.64, "total_tokens": 1097728}
|
|
{"current_steps": 2240, "total_steps": 37885, "loss": 0.2662, "lr": 1.1818421747162838e-06, "epoch": 0.29563151643130525, "percentage": 5.91, "elapsed_time": "0:03:08", "remaining_time": "0:50:04", "throughput": 5826.89, "total_tokens": 1100096}
|
|
{"current_steps": 2245, "total_steps": 37885, "loss": 0.1855, "lr": 1.1844813935075218e-06, "epoch": 0.2962914082090537, "percentage": 5.93, "elapsed_time": "0:03:09", "remaining_time": "0:50:02", "throughput": 5828.91, "total_tokens": 1102400}
|
|
{"current_steps": 2250, "total_steps": 37885, "loss": 0.0741, "lr": 1.1871206122987596e-06, "epoch": 0.29695129998680214, "percentage": 5.94, "elapsed_time": "0:03:09", "remaining_time": "0:50:00", "throughput": 5832.1, "total_tokens": 1104960}
|
|
{"current_steps": 2255, "total_steps": 37885, "loss": 0.2804, "lr": 1.1897598310899971e-06, "epoch": 0.2976111917645506, "percentage": 5.95, "elapsed_time": "0:03:09", "remaining_time": "0:49:58", "throughput": 5835.35, "total_tokens": 1107520}
|
|
{"current_steps": 2260, "total_steps": 37885, "loss": 0.2522, "lr": 1.1923990498812351e-06, "epoch": 0.2982710835422991, "percentage": 5.97, "elapsed_time": "0:03:10", "remaining_time": "0:49:57", "throughput": 5837.94, "total_tokens": 1109952}
|
|
{"current_steps": 2265, "total_steps": 37885, "loss": 0.0116, "lr": 1.195038268672473e-06, "epoch": 0.29893097532004753, "percentage": 5.98, "elapsed_time": "0:03:10", "remaining_time": "0:49:55", "throughput": 5839.25, "total_tokens": 1112128}
|
|
{"current_steps": 2270, "total_steps": 37885, "loss": 0.0716, "lr": 1.1976774874637107e-06, "epoch": 0.299590867097796, "percentage": 5.99, "elapsed_time": "0:03:10", "remaining_time": "0:49:53", "throughput": 5842.39, "total_tokens": 1114688}
|
|
{"current_steps": 2275, "total_steps": 37885, "loss": 0.2035, "lr": 1.2003167062549485e-06, "epoch": 0.3002507588755444, "percentage": 6.01, "elapsed_time": "0:03:11", "remaining_time": "0:49:51", "throughput": 5845.39, "total_tokens": 1117248}
|
|
{"current_steps": 2280, "total_steps": 37885, "loss": 0.1032, "lr": 1.2029559250461862e-06, "epoch": 0.30091065065329287, "percentage": 6.02, "elapsed_time": "0:03:11", "remaining_time": "0:49:50", "throughput": 5848.38, "total_tokens": 1119808}
|
|
{"current_steps": 2285, "total_steps": 37885, "loss": 0.4005, "lr": 1.2055951438374242e-06, "epoch": 0.3015705424310413, "percentage": 6.03, "elapsed_time": "0:03:11", "remaining_time": "0:49:48", "throughput": 5850.26, "total_tokens": 1122112}
|
|
{"current_steps": 2290, "total_steps": 37885, "loss": 0.1585, "lr": 1.208234362628662e-06, "epoch": 0.30223043420878976, "percentage": 6.04, "elapsed_time": "0:03:12", "remaining_time": "0:49:46", "throughput": 5852.69, "total_tokens": 1124544}
|
|
{"current_steps": 2295, "total_steps": 37885, "loss": 0.1725, "lr": 1.2108735814198995e-06, "epoch": 0.3028903259865382, "percentage": 6.06, "elapsed_time": "0:03:12", "remaining_time": "0:49:44", "throughput": 5855.77, "total_tokens": 1127104}
|
|
{"current_steps": 2300, "total_steps": 37885, "loss": 0.1268, "lr": 1.2135128002111375e-06, "epoch": 0.30355021776428665, "percentage": 6.07, "elapsed_time": "0:03:12", "remaining_time": "0:49:43", "throughput": 5858.26, "total_tokens": 1129536}
|
|
{"current_steps": 2305, "total_steps": 37885, "loss": 0.0557, "lr": 1.2161520190023753e-06, "epoch": 0.3042101095420351, "percentage": 6.08, "elapsed_time": "0:03:13", "remaining_time": "0:49:41", "throughput": 5860.12, "total_tokens": 1131840}
|
|
{"current_steps": 2310, "total_steps": 37885, "loss": 0.128, "lr": 1.2187912377936129e-06, "epoch": 0.30487000131978353, "percentage": 6.1, "elapsed_time": "0:03:13", "remaining_time": "0:49:39", "throughput": 5862.88, "total_tokens": 1134336}
|
|
{"current_steps": 2315, "total_steps": 37885, "loss": 0.3577, "lr": 1.2214304565848509e-06, "epoch": 0.305529893097532, "percentage": 6.11, "elapsed_time": "0:03:13", "remaining_time": "0:49:37", "throughput": 5864.69, "total_tokens": 1136640}
|
|
{"current_steps": 2320, "total_steps": 37885, "loss": 0.1164, "lr": 1.2240696753760886e-06, "epoch": 0.3061897848752805, "percentage": 6.12, "elapsed_time": "0:03:14", "remaining_time": "0:49:36", "throughput": 5867.6, "total_tokens": 1139136}
|
|
{"current_steps": 2325, "total_steps": 37885, "loss": 0.0429, "lr": 1.2267088941673264e-06, "epoch": 0.3068496766530289, "percentage": 6.14, "elapsed_time": "0:03:14", "remaining_time": "0:49:34", "throughput": 5870.17, "total_tokens": 1141568}
|
|
{"current_steps": 2330, "total_steps": 37885, "loss": 0.0024, "lr": 1.2293481129585642e-06, "epoch": 0.30750956843077737, "percentage": 6.15, "elapsed_time": "0:03:14", "remaining_time": "0:49:32", "throughput": 5872.38, "total_tokens": 1143936}
|
|
{"current_steps": 2335, "total_steps": 37885, "loss": 0.0776, "lr": 1.231987331749802e-06, "epoch": 0.3081694602085258, "percentage": 6.16, "elapsed_time": "0:03:15", "remaining_time": "0:49:30", "throughput": 5876.06, "total_tokens": 1146624}
|
|
{"current_steps": 2340, "total_steps": 37885, "loss": 0.0591, "lr": 1.2346265505410397e-06, "epoch": 0.30882935198627426, "percentage": 6.18, "elapsed_time": "0:03:15", "remaining_time": "0:49:29", "throughput": 5878.97, "total_tokens": 1149120}
|
|
{"current_steps": 2345, "total_steps": 37885, "loss": 0.1412, "lr": 1.2372657693322777e-06, "epoch": 0.3094892437640227, "percentage": 6.19, "elapsed_time": "0:03:15", "remaining_time": "0:49:27", "throughput": 5881.07, "total_tokens": 1151488}
|
|
{"current_steps": 2350, "total_steps": 37885, "loss": 0.0975, "lr": 1.2399049881235153e-06, "epoch": 0.31014913554177115, "percentage": 6.2, "elapsed_time": "0:03:16", "remaining_time": "0:49:25", "throughput": 5881.9, "total_tokens": 1153600}
|
|
{"current_steps": 2355, "total_steps": 37885, "loss": 0.1037, "lr": 1.2425442069147532e-06, "epoch": 0.3108090273195196, "percentage": 6.22, "elapsed_time": "0:03:16", "remaining_time": "0:49:24", "throughput": 5885.1, "total_tokens": 1156224}
|
|
{"current_steps": 2360, "total_steps": 37885, "loss": 0.1141, "lr": 1.245183425705991e-06, "epoch": 0.31146891909726804, "percentage": 6.23, "elapsed_time": "0:03:16", "remaining_time": "0:49:22", "throughput": 5887.43, "total_tokens": 1158656}
|
|
{"current_steps": 2365, "total_steps": 37885, "loss": 0.1503, "lr": 1.2478226444972288e-06, "epoch": 0.3121288108750165, "percentage": 6.24, "elapsed_time": "0:03:17", "remaining_time": "0:49:20", "throughput": 5891.31, "total_tokens": 1161408}
|
|
{"current_steps": 2370, "total_steps": 37885, "loss": 0.0871, "lr": 1.2504618632884666e-06, "epoch": 0.3127887026527649, "percentage": 6.26, "elapsed_time": "0:03:17", "remaining_time": "0:49:19", "throughput": 5894.0, "total_tokens": 1163904}
|
|
{"current_steps": 2375, "total_steps": 37885, "loss": 0.3747, "lr": 1.2531010820797043e-06, "epoch": 0.31344859443051337, "percentage": 6.27, "elapsed_time": "0:03:17", "remaining_time": "0:49:17", "throughput": 5895.66, "total_tokens": 1166208}
|
|
{"current_steps": 2380, "total_steps": 37885, "loss": 0.1877, "lr": 1.2557403008709421e-06, "epoch": 0.3141084862082619, "percentage": 6.28, "elapsed_time": "0:03:18", "remaining_time": "0:49:15", "throughput": 5897.34, "total_tokens": 1168512}
|
|
{"current_steps": 2385, "total_steps": 37885, "loss": 0.2214, "lr": 1.25837951966218e-06, "epoch": 0.3147683779860103, "percentage": 6.3, "elapsed_time": "0:03:18", "remaining_time": "0:49:14", "throughput": 5900.26, "total_tokens": 1171072}
|
|
{"current_steps": 2390, "total_steps": 37885, "loss": 0.0104, "lr": 1.2610187384534177e-06, "epoch": 0.31542826976375876, "percentage": 6.31, "elapsed_time": "0:03:18", "remaining_time": "0:49:12", "throughput": 5901.68, "total_tokens": 1173312}
|
|
{"current_steps": 2395, "total_steps": 37885, "loss": 0.1493, "lr": 1.2636579572446554e-06, "epoch": 0.3160881615415072, "percentage": 6.32, "elapsed_time": "0:03:19", "remaining_time": "0:49:11", "throughput": 5903.9, "total_tokens": 1175744}
|
|
{"current_steps": 2400, "total_steps": 37885, "loss": 0.2014, "lr": 1.2662971760358934e-06, "epoch": 0.31674805331925565, "percentage": 6.33, "elapsed_time": "0:03:19", "remaining_time": "0:49:09", "throughput": 5905.91, "total_tokens": 1178112}
|
|
{"current_steps": 2405, "total_steps": 37885, "loss": 0.0012, "lr": 1.2689363948271312e-06, "epoch": 0.3174079450970041, "percentage": 6.35, "elapsed_time": "0:03:19", "remaining_time": "0:49:07", "throughput": 5907.28, "total_tokens": 1180352}
|
|
{"current_steps": 2410, "total_steps": 37885, "loss": 0.1906, "lr": 1.2715756136183688e-06, "epoch": 0.31806783687475254, "percentage": 6.36, "elapsed_time": "0:03:20", "remaining_time": "0:49:06", "throughput": 5908.3, "total_tokens": 1182528}
|
|
{"current_steps": 2415, "total_steps": 37885, "loss": 0.018, "lr": 1.2742148324096067e-06, "epoch": 0.318727728652501, "percentage": 6.37, "elapsed_time": "0:03:20", "remaining_time": "0:49:04", "throughput": 5912.0, "total_tokens": 1185280}
|
|
{"current_steps": 2420, "total_steps": 37885, "loss": 0.0975, "lr": 1.2768540512008445e-06, "epoch": 0.31938762043024943, "percentage": 6.39, "elapsed_time": "0:03:20", "remaining_time": "0:49:03", "throughput": 5915.65, "total_tokens": 1188032}
|
|
{"current_steps": 2425, "total_steps": 37885, "loss": 0.2283, "lr": 1.279493269992082e-06, "epoch": 0.3200475122079979, "percentage": 6.4, "elapsed_time": "0:03:21", "remaining_time": "0:49:01", "throughput": 5917.71, "total_tokens": 1190464}
|
|
{"current_steps": 2430, "total_steps": 37885, "loss": 0.192, "lr": 1.28213248878332e-06, "epoch": 0.3207074039857463, "percentage": 6.41, "elapsed_time": "0:03:21", "remaining_time": "0:49:00", "throughput": 5920.2, "total_tokens": 1192960}
|
|
{"current_steps": 2435, "total_steps": 37885, "loss": 0.0019, "lr": 1.2847717075745578e-06, "epoch": 0.32136729576349476, "percentage": 6.43, "elapsed_time": "0:03:21", "remaining_time": "0:48:58", "throughput": 5920.81, "total_tokens": 1195072}
|
|
{"current_steps": 2440, "total_steps": 37885, "loss": 0.0131, "lr": 1.2874109263657958e-06, "epoch": 0.3220271875412432, "percentage": 6.44, "elapsed_time": "0:03:22", "remaining_time": "0:48:56", "throughput": 5922.43, "total_tokens": 1197376}
|
|
{"current_steps": 2445, "total_steps": 37885, "loss": 0.0088, "lr": 1.2900501451570334e-06, "epoch": 0.3226870793189917, "percentage": 6.45, "elapsed_time": "0:03:22", "remaining_time": "0:48:55", "throughput": 5925.17, "total_tokens": 1199936}
|
|
{"current_steps": 2450, "total_steps": 37885, "loss": 0.0404, "lr": 1.2926893639482712e-06, "epoch": 0.32334697109674015, "percentage": 6.47, "elapsed_time": "0:03:22", "remaining_time": "0:48:53", "throughput": 5927.26, "total_tokens": 1202368}
|
|
{"current_steps": 2455, "total_steps": 37885, "loss": 0.0721, "lr": 1.2953285827395091e-06, "epoch": 0.3240068628744886, "percentage": 6.48, "elapsed_time": "0:03:23", "remaining_time": "0:48:52", "throughput": 5929.46, "total_tokens": 1204800}
|
|
{"current_steps": 2460, "total_steps": 37885, "loss": 0.1065, "lr": 1.297967801530747e-06, "epoch": 0.32466675465223704, "percentage": 6.49, "elapsed_time": "0:03:23", "remaining_time": "0:48:50", "throughput": 5933.1, "total_tokens": 1207552}
|
|
{"current_steps": 2465, "total_steps": 37885, "loss": 0.2085, "lr": 1.3006070203219845e-06, "epoch": 0.3253266464299855, "percentage": 6.51, "elapsed_time": "0:03:23", "remaining_time": "0:48:49", "throughput": 5934.6, "total_tokens": 1209856}
|
|
{"current_steps": 2470, "total_steps": 37885, "loss": 0.1298, "lr": 1.3032462391132225e-06, "epoch": 0.32598653820773393, "percentage": 6.52, "elapsed_time": "0:03:24", "remaining_time": "0:48:47", "throughput": 5936.98, "total_tokens": 1212352}
|
|
{"current_steps": 2475, "total_steps": 37885, "loss": 0.0053, "lr": 1.3058854579044602e-06, "epoch": 0.3266464299854824, "percentage": 6.53, "elapsed_time": "0:03:24", "remaining_time": "0:48:46", "throughput": 5939.79, "total_tokens": 1214912}
|
|
{"current_steps": 2480, "total_steps": 37885, "loss": 0.0804, "lr": 1.308524676695698e-06, "epoch": 0.3273063217632308, "percentage": 6.55, "elapsed_time": "0:03:24", "remaining_time": "0:48:44", "throughput": 5940.78, "total_tokens": 1217088}
|
|
{"current_steps": 2485, "total_steps": 37885, "loss": 0.1441, "lr": 1.3111638954869358e-06, "epoch": 0.32796621354097927, "percentage": 6.56, "elapsed_time": "0:03:25", "remaining_time": "0:48:43", "throughput": 5943.4, "total_tokens": 1219584}
|
|
{"current_steps": 2490, "total_steps": 37885, "loss": 0.1647, "lr": 1.3138031142781736e-06, "epoch": 0.3286261053187277, "percentage": 6.57, "elapsed_time": "0:03:25", "remaining_time": "0:48:41", "throughput": 5947.04, "total_tokens": 1222336}
|
|
{"current_steps": 2495, "total_steps": 37885, "loss": 0.16, "lr": 1.3164423330694115e-06, "epoch": 0.32928599709647616, "percentage": 6.59, "elapsed_time": "0:03:25", "remaining_time": "0:48:40", "throughput": 5949.52, "total_tokens": 1224832}
|
|
{"current_steps": 2500, "total_steps": 37885, "loss": 0.3234, "lr": 1.3190815518606493e-06, "epoch": 0.3299458888742246, "percentage": 6.6, "elapsed_time": "0:03:26", "remaining_time": "0:48:38", "throughput": 5952.33, "total_tokens": 1227392}
|
|
{"current_steps": 2505, "total_steps": 37885, "loss": 0.1718, "lr": 1.3217207706518869e-06, "epoch": 0.3306057806519731, "percentage": 6.61, "elapsed_time": "0:03:26", "remaining_time": "0:48:37", "throughput": 5955.07, "total_tokens": 1229952}
|
|
{"current_steps": 2510, "total_steps": 37885, "loss": 0.0817, "lr": 1.3243599894431249e-06, "epoch": 0.33126567242972155, "percentage": 6.63, "elapsed_time": "0:03:26", "remaining_time": "0:48:35", "throughput": 5958.34, "total_tokens": 1232640}
|
|
{"current_steps": 2515, "total_steps": 37885, "loss": 0.0409, "lr": 1.3269992082343626e-06, "epoch": 0.33192556420747, "percentage": 6.64, "elapsed_time": "0:03:27", "remaining_time": "0:48:34", "throughput": 5961.04, "total_tokens": 1235200}
|
|
{"current_steps": 2520, "total_steps": 37885, "loss": 0.0289, "lr": 1.3296384270256002e-06, "epoch": 0.33258545598521844, "percentage": 6.65, "elapsed_time": "0:03:27", "remaining_time": "0:48:32", "throughput": 5963.22, "total_tokens": 1237632}
|
|
{"current_steps": 2525, "total_steps": 37885, "loss": 0.1123, "lr": 1.3322776458168382e-06, "epoch": 0.3332453477629669, "percentage": 6.66, "elapsed_time": "0:03:27", "remaining_time": "0:48:31", "throughput": 5964.75, "total_tokens": 1239936}
|
|
{"current_steps": 2530, "total_steps": 37885, "loss": 0.2255, "lr": 1.334916864608076e-06, "epoch": 0.3339052395407153, "percentage": 6.68, "elapsed_time": "0:03:28", "remaining_time": "0:48:29", "throughput": 5966.53, "total_tokens": 1242304}
|
|
{"current_steps": 2535, "total_steps": 37885, "loss": 0.0004, "lr": 1.3375560833993137e-06, "epoch": 0.33456513131846377, "percentage": 6.69, "elapsed_time": "0:03:28", "remaining_time": "0:48:28", "throughput": 5968.61, "total_tokens": 1244736}
|
|
{"current_steps": 2540, "total_steps": 37885, "loss": 0.2463, "lr": 1.3401953021905515e-06, "epoch": 0.3352250230962122, "percentage": 6.7, "elapsed_time": "0:03:28", "remaining_time": "0:48:26", "throughput": 5972.17, "total_tokens": 1247488}
|
|
{"current_steps": 2545, "total_steps": 37885, "loss": 0.1187, "lr": 1.3428345209817893e-06, "epoch": 0.33588491487396066, "percentage": 6.72, "elapsed_time": "0:03:29", "remaining_time": "0:48:25", "throughput": 5974.95, "total_tokens": 1250048}
|
|
{"current_steps": 2550, "total_steps": 37885, "loss": 0.2226, "lr": 1.345473739773027e-06, "epoch": 0.3365448066517091, "percentage": 6.73, "elapsed_time": "0:03:29", "remaining_time": "0:48:23", "throughput": 5979.03, "total_tokens": 1252928}
|
|
{"current_steps": 2555, "total_steps": 37885, "loss": 0.2176, "lr": 1.348112958564265e-06, "epoch": 0.33720469842945755, "percentage": 6.74, "elapsed_time": "0:03:29", "remaining_time": "0:48:22", "throughput": 5981.81, "total_tokens": 1255488}
|
|
{"current_steps": 2560, "total_steps": 37885, "loss": 0.4771, "lr": 1.3507521773555026e-06, "epoch": 0.337864590207206, "percentage": 6.76, "elapsed_time": "0:03:30", "remaining_time": "0:48:20", "throughput": 5984.2, "total_tokens": 1257984}
|
|
{"current_steps": 2565, "total_steps": 37885, "loss": 0.1681, "lr": 1.3533913961467406e-06, "epoch": 0.3385244819849545, "percentage": 6.77, "elapsed_time": "0:03:30", "remaining_time": "0:48:19", "throughput": 5986.46, "total_tokens": 1260416}
|
|
{"current_steps": 2570, "total_steps": 37885, "loss": 0.1162, "lr": 1.3560306149379783e-06, "epoch": 0.33918437376270294, "percentage": 6.78, "elapsed_time": "0:03:30", "remaining_time": "0:48:17", "throughput": 5990.01, "total_tokens": 1263168}
|
|
{"current_steps": 2575, "total_steps": 37885, "loss": 0.1092, "lr": 1.3586698337292161e-06, "epoch": 0.3398442655404514, "percentage": 6.8, "elapsed_time": "0:03:31", "remaining_time": "0:48:16", "throughput": 5992.2, "total_tokens": 1265600}
|
|
{"current_steps": 2580, "total_steps": 37885, "loss": 0.0017, "lr": 1.361309052520454e-06, "epoch": 0.34050415731819983, "percentage": 6.81, "elapsed_time": "0:03:31", "remaining_time": "0:48:14", "throughput": 5994.44, "total_tokens": 1268032}
|
|
{"current_steps": 2585, "total_steps": 37885, "loss": 0.124, "lr": 1.3639482713116917e-06, "epoch": 0.3411640490959483, "percentage": 6.82, "elapsed_time": "0:03:31", "remaining_time": "0:48:13", "throughput": 5996.07, "total_tokens": 1270336}
|
|
{"current_steps": 2590, "total_steps": 37885, "loss": 0.0837, "lr": 1.3665874901029294e-06, "epoch": 0.3418239408736967, "percentage": 6.84, "elapsed_time": "0:03:32", "remaining_time": "0:48:11", "throughput": 5999.68, "total_tokens": 1273088}
|
|
{"current_steps": 2595, "total_steps": 37885, "loss": 0.2435, "lr": 1.3692267088941674e-06, "epoch": 0.34248383265144516, "percentage": 6.85, "elapsed_time": "0:03:32", "remaining_time": "0:48:10", "throughput": 6001.54, "total_tokens": 1275456}
|
|
{"current_steps": 2600, "total_steps": 37885, "loss": 0.0428, "lr": 1.371865927685405e-06, "epoch": 0.3431437244291936, "percentage": 6.86, "elapsed_time": "0:03:32", "remaining_time": "0:48:08", "throughput": 6003.74, "total_tokens": 1277888}
|
|
{"current_steps": 2605, "total_steps": 37885, "loss": 0.0011, "lr": 1.3745051464766428e-06, "epoch": 0.34380361620694205, "percentage": 6.88, "elapsed_time": "0:03:33", "remaining_time": "0:48:07", "throughput": 6006.18, "total_tokens": 1280384}
|
|
{"current_steps": 2610, "total_steps": 37885, "loss": 0.0331, "lr": 1.3771443652678807e-06, "epoch": 0.3444635079846905, "percentage": 6.89, "elapsed_time": "0:03:33", "remaining_time": "0:48:05", "throughput": 6008.85, "total_tokens": 1282944}
|
|
{"current_steps": 2615, "total_steps": 37885, "loss": 0.1367, "lr": 1.3797835840591185e-06, "epoch": 0.34512339976243894, "percentage": 6.9, "elapsed_time": "0:03:33", "remaining_time": "0:48:04", "throughput": 6010.16, "total_tokens": 1285184}
|
|
{"current_steps": 2620, "total_steps": 37885, "loss": 0.202, "lr": 1.382422802850356e-06, "epoch": 0.3457832915401874, "percentage": 6.92, "elapsed_time": "0:03:34", "remaining_time": "0:48:02", "throughput": 6013.1, "total_tokens": 1287808}
|
|
{"current_steps": 2625, "total_steps": 37885, "loss": 0.1634, "lr": 1.385062021641594e-06, "epoch": 0.34644318331793583, "percentage": 6.93, "elapsed_time": "0:03:34", "remaining_time": "0:48:01", "throughput": 6016.08, "total_tokens": 1290432}
|
|
{"current_steps": 2630, "total_steps": 37885, "loss": 0.1988, "lr": 1.3877012404328318e-06, "epoch": 0.34710307509568433, "percentage": 6.94, "elapsed_time": "0:03:34", "remaining_time": "0:47:59", "throughput": 6017.58, "total_tokens": 1292736}
|
|
{"current_steps": 2635, "total_steps": 37885, "loss": 0.1405, "lr": 1.3903404592240694e-06, "epoch": 0.3477629668734328, "percentage": 6.96, "elapsed_time": "0:03:35", "remaining_time": "0:47:58", "throughput": 6019.37, "total_tokens": 1295104}
|
|
{"current_steps": 2640, "total_steps": 37885, "loss": 0.0939, "lr": 1.3929796780153074e-06, "epoch": 0.3484228586511812, "percentage": 6.97, "elapsed_time": "0:03:35", "remaining_time": "0:47:56", "throughput": 6021.74, "total_tokens": 1297600}
|
|
{"current_steps": 2645, "total_steps": 37885, "loss": 0.0013, "lr": 1.3956188968065452e-06, "epoch": 0.34908275042892967, "percentage": 6.98, "elapsed_time": "0:03:35", "remaining_time": "0:47:55", "throughput": 6024.64, "total_tokens": 1300224}
|
|
{"current_steps": 2650, "total_steps": 37885, "loss": 0.1809, "lr": 1.3982581155977831e-06, "epoch": 0.3497426422066781, "percentage": 6.99, "elapsed_time": "0:03:36", "remaining_time": "0:47:54", "throughput": 6025.9, "total_tokens": 1302528}
|
|
{"current_steps": 2655, "total_steps": 37885, "loss": 0.2074, "lr": 1.4008973343890207e-06, "epoch": 0.35040253398442656, "percentage": 7.01, "elapsed_time": "0:03:36", "remaining_time": "0:47:52", "throughput": 6028.09, "total_tokens": 1305024}
|
|
{"current_steps": 2660, "total_steps": 37885, "loss": 0.2161, "lr": 1.4035365531802585e-06, "epoch": 0.351062425762175, "percentage": 7.02, "elapsed_time": "0:03:36", "remaining_time": "0:47:51", "throughput": 6029.83, "total_tokens": 1307392}
|
|
{"current_steps": 2665, "total_steps": 37885, "loss": 0.2352, "lr": 1.4061757719714965e-06, "epoch": 0.35172231753992345, "percentage": 7.03, "elapsed_time": "0:03:37", "remaining_time": "0:47:49", "throughput": 6032.68, "total_tokens": 1310016}
|
|
{"current_steps": 2670, "total_steps": 37885, "loss": 0.0256, "lr": 1.4088149907627342e-06, "epoch": 0.3523822093176719, "percentage": 7.05, "elapsed_time": "0:03:37", "remaining_time": "0:47:48", "throughput": 6035.2, "total_tokens": 1312576}
|
|
{"current_steps": 2675, "total_steps": 37885, "loss": 0.1432, "lr": 1.4114542095539718e-06, "epoch": 0.35304210109542034, "percentage": 7.06, "elapsed_time": "0:03:37", "remaining_time": "0:47:47", "throughput": 6037.4, "total_tokens": 1315072}
|
|
{"current_steps": 2680, "total_steps": 37885, "loss": 0.1524, "lr": 1.4140934283452098e-06, "epoch": 0.3537019928731688, "percentage": 7.07, "elapsed_time": "0:03:38", "remaining_time": "0:47:45", "throughput": 6039.39, "total_tokens": 1317504}
|
|
{"current_steps": 2685, "total_steps": 37885, "loss": 0.001, "lr": 1.4167326471364476e-06, "epoch": 0.3543618846509172, "percentage": 7.09, "elapsed_time": "0:03:38", "remaining_time": "0:47:44", "throughput": 6040.27, "total_tokens": 1319680}
|
|
{"current_steps": 2690, "total_steps": 37885, "loss": 0.0008, "lr": 1.4193718659276853e-06, "epoch": 0.3550217764286657, "percentage": 7.1, "elapsed_time": "0:03:38", "remaining_time": "0:47:42", "throughput": 6042.81, "total_tokens": 1322240}
|
|
{"current_steps": 2695, "total_steps": 37885, "loss": 0.1094, "lr": 1.4220110847189231e-06, "epoch": 0.35568166820641417, "percentage": 7.11, "elapsed_time": "0:03:39", "remaining_time": "0:47:41", "throughput": 6045.01, "total_tokens": 1324736}
|
|
{"current_steps": 2700, "total_steps": 37885, "loss": 0.1183, "lr": 1.4246503035101609e-06, "epoch": 0.3563415599841626, "percentage": 7.13, "elapsed_time": "0:03:39", "remaining_time": "0:47:40", "throughput": 6046.78, "total_tokens": 1327104}
|
|
{"current_steps": 2705, "total_steps": 37885, "loss": 0.2524, "lr": 1.4272895223013989e-06, "epoch": 0.35700145176191106, "percentage": 7.14, "elapsed_time": "0:03:39", "remaining_time": "0:47:38", "throughput": 6050.26, "total_tokens": 1329920}
|
|
{"current_steps": 2710, "total_steps": 37885, "loss": 0.1706, "lr": 1.4299287410926366e-06, "epoch": 0.3576613435396595, "percentage": 7.15, "elapsed_time": "0:03:40", "remaining_time": "0:47:37", "throughput": 6052.29, "total_tokens": 1332352}
|
|
{"current_steps": 2715, "total_steps": 37885, "loss": 0.0205, "lr": 1.4325679598838742e-06, "epoch": 0.35832123531740795, "percentage": 7.17, "elapsed_time": "0:03:40", "remaining_time": "0:47:35", "throughput": 6055.36, "total_tokens": 1335040}
|
|
{"current_steps": 2720, "total_steps": 37885, "loss": 0.003, "lr": 1.4352071786751122e-06, "epoch": 0.3589811270951564, "percentage": 7.18, "elapsed_time": "0:03:40", "remaining_time": "0:47:34", "throughput": 6057.38, "total_tokens": 1337472}
|
|
{"current_steps": 2725, "total_steps": 37885, "loss": 0.0809, "lr": 1.43784639746635e-06, "epoch": 0.35964101887290484, "percentage": 7.19, "elapsed_time": "0:03:41", "remaining_time": "0:47:33", "throughput": 6058.58, "total_tokens": 1339712}
|
|
{"current_steps": 2730, "total_steps": 37885, "loss": 0.0011, "lr": 1.4404856162575877e-06, "epoch": 0.3603009106506533, "percentage": 7.21, "elapsed_time": "0:03:41", "remaining_time": "0:47:31", "throughput": 6060.6, "total_tokens": 1342144}
|
|
{"current_steps": 2735, "total_steps": 37885, "loss": 0.0817, "lr": 1.4431248350488255e-06, "epoch": 0.36096080242840173, "percentage": 7.22, "elapsed_time": "0:03:41", "remaining_time": "0:47:30", "throughput": 6062.42, "total_tokens": 1344512}
|
|
{"current_steps": 2740, "total_steps": 37885, "loss": 0.4208, "lr": 1.4457640538400633e-06, "epoch": 0.3616206942061502, "percentage": 7.23, "elapsed_time": "0:03:42", "remaining_time": "0:47:28", "throughput": 6064.15, "total_tokens": 1346880}
|
|
{"current_steps": 2745, "total_steps": 37885, "loss": 0.0683, "lr": 1.448403272631301e-06, "epoch": 0.3622805859838986, "percentage": 7.25, "elapsed_time": "0:03:42", "remaining_time": "0:47:27", "throughput": 6065.06, "total_tokens": 1349056}
|
|
{"current_steps": 2750, "total_steps": 37885, "loss": 0.1806, "lr": 1.4510424914225388e-06, "epoch": 0.36294047776164706, "percentage": 7.26, "elapsed_time": "0:03:42", "remaining_time": "0:47:26", "throughput": 6066.94, "total_tokens": 1351488}
|
|
{"current_steps": 2755, "total_steps": 37885, "loss": 0.0011, "lr": 1.4536817102137766e-06, "epoch": 0.36360036953939556, "percentage": 7.27, "elapsed_time": "0:03:43", "remaining_time": "0:47:24", "throughput": 6068.86, "total_tokens": 1353920}
|
|
{"current_steps": 2760, "total_steps": 37885, "loss": 0.3443, "lr": 1.4563209290050144e-06, "epoch": 0.364260261317144, "percentage": 7.29, "elapsed_time": "0:03:43", "remaining_time": "0:47:23", "throughput": 6070.83, "total_tokens": 1356352}
|
|
{"current_steps": 2765, "total_steps": 37885, "loss": 0.2605, "lr": 1.4589601477962524e-06, "epoch": 0.36492015309489245, "percentage": 7.3, "elapsed_time": "0:03:43", "remaining_time": "0:47:22", "throughput": 6074.12, "total_tokens": 1359104}
|
|
{"current_steps": 2770, "total_steps": 37885, "loss": 0.2077, "lr": 1.46159936658749e-06, "epoch": 0.3655800448726409, "percentage": 7.31, "elapsed_time": "0:03:44", "remaining_time": "0:47:20", "throughput": 6076.03, "total_tokens": 1361536}
|
|
{"current_steps": 2775, "total_steps": 37885, "loss": 0.3646, "lr": 1.464238585378728e-06, "epoch": 0.36623993665038934, "percentage": 7.32, "elapsed_time": "0:03:44", "remaining_time": "0:47:19", "throughput": 6078.79, "total_tokens": 1364160}
|
|
{"current_steps": 2780, "total_steps": 37885, "loss": 0.0023, "lr": 1.4668778041699657e-06, "epoch": 0.3668998284281378, "percentage": 7.34, "elapsed_time": "0:03:44", "remaining_time": "0:47:17", "throughput": 6081.06, "total_tokens": 1366656}
|
|
{"current_steps": 2785, "total_steps": 37885, "loss": 0.142, "lr": 1.4695170229612034e-06, "epoch": 0.36755972020588623, "percentage": 7.35, "elapsed_time": "0:03:45", "remaining_time": "0:47:16", "throughput": 6083.56, "total_tokens": 1369216}
|
|
{"current_steps": 2790, "total_steps": 37885, "loss": 0.2354, "lr": 1.4721562417524412e-06, "epoch": 0.3682196119836347, "percentage": 7.36, "elapsed_time": "0:03:45", "remaining_time": "0:47:15", "throughput": 6085.7, "total_tokens": 1371712}
|
|
{"current_steps": 2795, "total_steps": 37885, "loss": 0.1816, "lr": 1.474795460543679e-06, "epoch": 0.3688795037613831, "percentage": 7.38, "elapsed_time": "0:03:45", "remaining_time": "0:47:13", "throughput": 6087.39, "total_tokens": 1374080}
|
|
{"current_steps": 2800, "total_steps": 37885, "loss": 0.1143, "lr": 1.4774346793349168e-06, "epoch": 0.36953939553913157, "percentage": 7.39, "elapsed_time": "0:03:46", "remaining_time": "0:47:12", "throughput": 6091.29, "total_tokens": 1377024}
|
|
{"current_steps": 2805, "total_steps": 37885, "loss": 0.122, "lr": 1.4800738981261548e-06, "epoch": 0.37019928731688, "percentage": 7.4, "elapsed_time": "0:03:46", "remaining_time": "0:47:11", "throughput": 6092.92, "total_tokens": 1379392}
|
|
{"current_steps": 2810, "total_steps": 37885, "loss": 0.1612, "lr": 1.4827131169173923e-06, "epoch": 0.37085917909462845, "percentage": 7.42, "elapsed_time": "0:03:46", "remaining_time": "0:47:09", "throughput": 6094.64, "total_tokens": 1381760}
|
|
{"current_steps": 2815, "total_steps": 37885, "loss": 0.0652, "lr": 1.48535233570863e-06, "epoch": 0.37151907087237696, "percentage": 7.43, "elapsed_time": "0:03:47", "remaining_time": "0:47:08", "throughput": 6096.26, "total_tokens": 1384128}
|
|
{"current_steps": 2820, "total_steps": 37885, "loss": 0.053, "lr": 1.487991554499868e-06, "epoch": 0.3721789626501254, "percentage": 7.44, "elapsed_time": "0:03:47", "remaining_time": "0:47:07", "throughput": 6097.89, "total_tokens": 1386496}
|
|
{"current_steps": 2825, "total_steps": 37885, "loss": 0.1133, "lr": 1.4906307732911058e-06, "epoch": 0.37283885442787384, "percentage": 7.46, "elapsed_time": "0:03:47", "remaining_time": "0:47:05", "throughput": 6099.0, "total_tokens": 1388736}
|
|
{"current_steps": 2830, "total_steps": 37885, "loss": 0.0446, "lr": 1.4932699920823434e-06, "epoch": 0.3734987462056223, "percentage": 7.47, "elapsed_time": "0:03:48", "remaining_time": "0:47:04", "throughput": 6100.4, "total_tokens": 1391040}
|
|
{"current_steps": 2835, "total_steps": 37885, "loss": 0.093, "lr": 1.4959092108735814e-06, "epoch": 0.37415863798337073, "percentage": 7.48, "elapsed_time": "0:03:48", "remaining_time": "0:47:03", "throughput": 6101.82, "total_tokens": 1393344}
|
|
{"current_steps": 2840, "total_steps": 37885, "loss": 0.074, "lr": 1.4985484296648192e-06, "epoch": 0.3748185297611192, "percentage": 7.5, "elapsed_time": "0:03:48", "remaining_time": "0:47:01", "throughput": 6103.24, "total_tokens": 1395648}
|
|
{"current_steps": 2845, "total_steps": 37885, "loss": 0.2042, "lr": 1.5011876484560567e-06, "epoch": 0.3754784215388676, "percentage": 7.51, "elapsed_time": "0:03:49", "remaining_time": "0:47:00", "throughput": 6105.96, "total_tokens": 1398272}
|
|
{"current_steps": 2850, "total_steps": 37885, "loss": 0.2285, "lr": 1.5038268672472947e-06, "epoch": 0.37613831331661607, "percentage": 7.52, "elapsed_time": "0:03:49", "remaining_time": "0:46:59", "throughput": 6107.27, "total_tokens": 1400576}
|
|
{"current_steps": 2855, "total_steps": 37885, "loss": 0.049, "lr": 1.5064660860385325e-06, "epoch": 0.3767982050943645, "percentage": 7.54, "elapsed_time": "0:03:49", "remaining_time": "0:46:57", "throughput": 6109.65, "total_tokens": 1403136}
|
|
{"current_steps": 2860, "total_steps": 37885, "loss": 0.0759, "lr": 1.5091053048297705e-06, "epoch": 0.37745809687211296, "percentage": 7.55, "elapsed_time": "0:03:49", "remaining_time": "0:46:56", "throughput": 6111.32, "total_tokens": 1405504}
|
|
{"current_steps": 2865, "total_steps": 37885, "loss": 0.2051, "lr": 1.511744523621008e-06, "epoch": 0.3781179886498614, "percentage": 7.56, "elapsed_time": "0:03:50", "remaining_time": "0:46:55", "throughput": 6112.64, "total_tokens": 1407808}
|
|
{"current_steps": 2870, "total_steps": 37885, "loss": 0.1983, "lr": 1.5143837424122458e-06, "epoch": 0.37877788042760985, "percentage": 7.58, "elapsed_time": "0:03:50", "remaining_time": "0:46:53", "throughput": 6114.71, "total_tokens": 1410304}
|
|
{"current_steps": 2875, "total_steps": 37885, "loss": 0.0052, "lr": 1.5170229612034838e-06, "epoch": 0.37943777220535835, "percentage": 7.59, "elapsed_time": "0:03:50", "remaining_time": "0:46:52", "throughput": 6116.58, "total_tokens": 1412736}
|
|
{"current_steps": 2880, "total_steps": 37885, "loss": 0.1043, "lr": 1.5196621799947216e-06, "epoch": 0.3800976639831068, "percentage": 7.6, "elapsed_time": "0:03:51", "remaining_time": "0:46:51", "throughput": 6119.66, "total_tokens": 1415488}
|
|
{"current_steps": 2885, "total_steps": 37885, "loss": 0.148, "lr": 1.5223013987859591e-06, "epoch": 0.38075755576085524, "percentage": 7.62, "elapsed_time": "0:03:51", "remaining_time": "0:46:50", "throughput": 6121.16, "total_tokens": 1417856}
|
|
{"current_steps": 2890, "total_steps": 37885, "loss": 0.1376, "lr": 1.5249406175771971e-06, "epoch": 0.3814174475386037, "percentage": 7.63, "elapsed_time": "0:03:51", "remaining_time": "0:46:48", "throughput": 6122.23, "total_tokens": 1420096}
|
|
{"current_steps": 2895, "total_steps": 37885, "loss": 0.1461, "lr": 1.5275798363684349e-06, "epoch": 0.3820773393163521, "percentage": 7.64, "elapsed_time": "0:03:52", "remaining_time": "0:46:47", "throughput": 6124.59, "total_tokens": 1422656}
|
|
{"current_steps": 2900, "total_steps": 37885, "loss": 0.1672, "lr": 1.5302190551596727e-06, "epoch": 0.38273723109410057, "percentage": 7.65, "elapsed_time": "0:03:52", "remaining_time": "0:46:46", "throughput": 6126.68, "total_tokens": 1425152}
|
|
{"current_steps": 2905, "total_steps": 37885, "loss": 0.0423, "lr": 1.5328582739509104e-06, "epoch": 0.383397122871849, "percentage": 7.67, "elapsed_time": "0:03:52", "remaining_time": "0:46:44", "throughput": 6129.51, "total_tokens": 1427840}
|
|
{"current_steps": 2910, "total_steps": 37885, "loss": 0.0218, "lr": 1.5354974927421482e-06, "epoch": 0.38405701464959746, "percentage": 7.68, "elapsed_time": "0:03:53", "remaining_time": "0:46:43", "throughput": 6130.87, "total_tokens": 1430144}
|
|
{"current_steps": 2915, "total_steps": 37885, "loss": 0.1745, "lr": 1.5381367115333862e-06, "epoch": 0.3847169064273459, "percentage": 7.69, "elapsed_time": "0:03:53", "remaining_time": "0:46:42", "throughput": 6132.15, "total_tokens": 1432448}
|
|
{"current_steps": 2920, "total_steps": 37885, "loss": 0.0026, "lr": 1.540775930324624e-06, "epoch": 0.38537679820509435, "percentage": 7.71, "elapsed_time": "0:03:53", "remaining_time": "0:46:41", "throughput": 6133.45, "total_tokens": 1434752}
|
|
{"current_steps": 2925, "total_steps": 37885, "loss": 0.001, "lr": 1.5434151491158615e-06, "epoch": 0.3860366899828428, "percentage": 7.72, "elapsed_time": "0:03:54", "remaining_time": "0:46:39", "throughput": 6135.26, "total_tokens": 1437184}
|
|
{"current_steps": 2930, "total_steps": 37885, "loss": 0.051, "lr": 1.5460543679070995e-06, "epoch": 0.38669658176059124, "percentage": 7.73, "elapsed_time": "0:03:54", "remaining_time": "0:46:38", "throughput": 6138.46, "total_tokens": 1440000}
|
|
{"current_steps": 2935, "total_steps": 37885, "loss": 0.0864, "lr": 1.5486935866983373e-06, "epoch": 0.3873564735383397, "percentage": 7.75, "elapsed_time": "0:03:54", "remaining_time": "0:46:37", "throughput": 6140.9, "total_tokens": 1442624}
|
|
{"current_steps": 2940, "total_steps": 37885, "loss": 0.291, "lr": 1.551332805489575e-06, "epoch": 0.3880163653160882, "percentage": 7.76, "elapsed_time": "0:03:55", "remaining_time": "0:46:36", "throughput": 6143.16, "total_tokens": 1445184}
|
|
{"current_steps": 2945, "total_steps": 37885, "loss": 0.2756, "lr": 1.5539720242808128e-06, "epoch": 0.38867625709383663, "percentage": 7.77, "elapsed_time": "0:03:55", "remaining_time": "0:46:34", "throughput": 6144.87, "total_tokens": 1447616}
|
|
{"current_steps": 2950, "total_steps": 37885, "loss": 0.0006, "lr": 1.5566112430720506e-06, "epoch": 0.3893361488715851, "percentage": 7.79, "elapsed_time": "0:03:55", "remaining_time": "0:46:33", "throughput": 6147.05, "total_tokens": 1450176}
|
|
{"current_steps": 2955, "total_steps": 37885, "loss": 0.4024, "lr": 1.5592504618632884e-06, "epoch": 0.3899960406493335, "percentage": 7.8, "elapsed_time": "0:03:56", "remaining_time": "0:46:32", "throughput": 6149.15, "total_tokens": 1452672}
|
|
{"current_steps": 2960, "total_steps": 37885, "loss": 0.2948, "lr": 1.5618896806545262e-06, "epoch": 0.39065593242708196, "percentage": 7.81, "elapsed_time": "0:03:56", "remaining_time": "0:46:31", "throughput": 6151.42, "total_tokens": 1455232}
|
|
{"current_steps": 2965, "total_steps": 37885, "loss": 0.2825, "lr": 1.564528899445764e-06, "epoch": 0.3913158242048304, "percentage": 7.83, "elapsed_time": "0:03:56", "remaining_time": "0:46:30", "throughput": 6153.64, "total_tokens": 1457792}
|
|
{"current_steps": 2970, "total_steps": 37885, "loss": 0.0889, "lr": 1.5671681182370017e-06, "epoch": 0.39197571598257885, "percentage": 7.84, "elapsed_time": "0:03:57", "remaining_time": "0:46:28", "throughput": 6155.2, "total_tokens": 1460160}
|
|
{"current_steps": 2975, "total_steps": 37885, "loss": 0.074, "lr": 1.5698073370282397e-06, "epoch": 0.3926356077603273, "percentage": 7.85, "elapsed_time": "0:03:57", "remaining_time": "0:46:27", "throughput": 6156.26, "total_tokens": 1462400}
|
|
{"current_steps": 2980, "total_steps": 37885, "loss": 0.128, "lr": 1.5724465558194772e-06, "epoch": 0.39329549953807574, "percentage": 7.87, "elapsed_time": "0:03:57", "remaining_time": "0:46:26", "throughput": 6158.98, "total_tokens": 1465088}
|
|
{"current_steps": 2985, "total_steps": 37885, "loss": 0.1378, "lr": 1.5750857746107152e-06, "epoch": 0.3939553913158242, "percentage": 7.88, "elapsed_time": "0:03:58", "remaining_time": "0:46:25", "throughput": 6160.51, "total_tokens": 1467456}
|
|
{"current_steps": 2990, "total_steps": 37885, "loss": 0.0013, "lr": 1.577724993401953e-06, "epoch": 0.39461528309357263, "percentage": 7.89, "elapsed_time": "0:03:58", "remaining_time": "0:46:23", "throughput": 6162.71, "total_tokens": 1470016}
|
|
{"current_steps": 2995, "total_steps": 37885, "loss": 0.0779, "lr": 1.5803642121931908e-06, "epoch": 0.3952751748713211, "percentage": 7.91, "elapsed_time": "0:03:58", "remaining_time": "0:46:22", "throughput": 6164.5, "total_tokens": 1472448}
|
|
{"current_steps": 3000, "total_steps": 37885, "loss": 0.0693, "lr": 1.5830034309844285e-06, "epoch": 0.3959350666490696, "percentage": 7.92, "elapsed_time": "0:03:59", "remaining_time": "0:46:21", "throughput": 6166.87, "total_tokens": 1475072}
|
|
{"current_steps": 3005, "total_steps": 37885, "loss": 0.063, "lr": 1.5856426497756663e-06, "epoch": 0.396594958426818, "percentage": 7.93, "elapsed_time": "0:03:59", "remaining_time": "0:46:20", "throughput": 6168.29, "total_tokens": 1477440}
|
|
{"current_steps": 3010, "total_steps": 37885, "loss": 0.0011, "lr": 1.588281868566904e-06, "epoch": 0.39725485020456647, "percentage": 7.95, "elapsed_time": "0:03:59", "remaining_time": "0:46:19", "throughput": 6169.94, "total_tokens": 1479872}
|
|
{"current_steps": 3015, "total_steps": 37885, "loss": 0.2275, "lr": 1.590921087358142e-06, "epoch": 0.3979147419823149, "percentage": 7.96, "elapsed_time": "0:04:00", "remaining_time": "0:46:17", "throughput": 6170.13, "total_tokens": 1481920}
|
|
{"current_steps": 3020, "total_steps": 37885, "loss": 0.2769, "lr": 1.5935603061493796e-06, "epoch": 0.39857463376006336, "percentage": 7.97, "elapsed_time": "0:04:00", "remaining_time": "0:46:16", "throughput": 6174.19, "total_tokens": 1484992}
|
|
{"current_steps": 3025, "total_steps": 37885, "loss": 0.3505, "lr": 1.5961995249406174e-06, "epoch": 0.3992345255378118, "percentage": 7.98, "elapsed_time": "0:04:00", "remaining_time": "0:46:15", "throughput": 6175.18, "total_tokens": 1487232}
|
|
{"current_steps": 3030, "total_steps": 37885, "loss": 0.19, "lr": 1.5988387437318554e-06, "epoch": 0.39989441731556025, "percentage": 8.0, "elapsed_time": "0:04:01", "remaining_time": "0:46:14", "throughput": 6177.19, "total_tokens": 1489728}
|
|
{"current_steps": 3035, "total_steps": 37885, "loss": 0.2316, "lr": 1.6014779625230932e-06, "epoch": 0.4005543090933087, "percentage": 8.01, "elapsed_time": "0:04:01", "remaining_time": "0:46:12", "throughput": 6178.17, "total_tokens": 1491968}
|
|
{"current_steps": 3040, "total_steps": 37885, "loss": 0.1632, "lr": 1.6041171813143307e-06, "epoch": 0.40121420087105714, "percentage": 8.02, "elapsed_time": "0:04:01", "remaining_time": "0:46:11", "throughput": 6179.61, "total_tokens": 1494336}
|
|
{"current_steps": 3045, "total_steps": 37885, "loss": 0.1404, "lr": 1.6067564001055687e-06, "epoch": 0.4018740926488056, "percentage": 8.04, "elapsed_time": "0:04:02", "remaining_time": "0:46:10", "throughput": 6183.83, "total_tokens": 1497472}
|
|
{"current_steps": 3050, "total_steps": 37885, "loss": 0.085, "lr": 1.6093956188968065e-06, "epoch": 0.402533984426554, "percentage": 8.05, "elapsed_time": "0:04:02", "remaining_time": "0:46:09", "throughput": 6185.77, "total_tokens": 1499968}
|
|
{"current_steps": 3055, "total_steps": 37885, "loss": 0.0344, "lr": 1.612034837688044e-06, "epoch": 0.40319387620430247, "percentage": 8.06, "elapsed_time": "0:04:02", "remaining_time": "0:46:08", "throughput": 6187.23, "total_tokens": 1502336}
|
|
{"current_steps": 3060, "total_steps": 37885, "loss": 0.2477, "lr": 1.614674056479282e-06, "epoch": 0.40385376798205097, "percentage": 8.08, "elapsed_time": "0:04:03", "remaining_time": "0:46:07", "throughput": 6188.16, "total_tokens": 1504576}
|
|
{"current_steps": 3065, "total_steps": 37885, "loss": 0.2589, "lr": 1.6173132752705198e-06, "epoch": 0.4045136597597994, "percentage": 8.09, "elapsed_time": "0:04:03", "remaining_time": "0:46:05", "throughput": 6190.35, "total_tokens": 1507136}
|
|
{"current_steps": 3070, "total_steps": 37885, "loss": 0.0691, "lr": 1.6199524940617578e-06, "epoch": 0.40517355153754786, "percentage": 8.1, "elapsed_time": "0:04:03", "remaining_time": "0:46:04", "throughput": 6192.51, "total_tokens": 1509696}
|
|
{"current_steps": 3075, "total_steps": 37885, "loss": 0.2528, "lr": 1.6225917128529954e-06, "epoch": 0.4058334433152963, "percentage": 8.12, "elapsed_time": "0:04:04", "remaining_time": "0:46:03", "throughput": 6194.4, "total_tokens": 1512192}
|
|
{"current_steps": 3080, "total_steps": 37885, "loss": 0.0691, "lr": 1.6252309316442331e-06, "epoch": 0.40649333509304475, "percentage": 8.13, "elapsed_time": "0:04:04", "remaining_time": "0:46:02", "throughput": 6196.55, "total_tokens": 1514752}
|
|
{"current_steps": 3085, "total_steps": 37885, "loss": 0.3173, "lr": 1.6278701504354711e-06, "epoch": 0.4071532268707932, "percentage": 8.14, "elapsed_time": "0:04:04", "remaining_time": "0:46:01", "throughput": 6199.06, "total_tokens": 1517440}
|
|
{"current_steps": 3090, "total_steps": 37885, "loss": 0.0856, "lr": 1.6305093692267089e-06, "epoch": 0.40781311864854164, "percentage": 8.16, "elapsed_time": "0:04:05", "remaining_time": "0:46:00", "throughput": 6200.28, "total_tokens": 1519744}
|
|
{"current_steps": 3095, "total_steps": 37885, "loss": 0.1111, "lr": 1.6331485880179465e-06, "epoch": 0.4084730104262901, "percentage": 8.17, "elapsed_time": "0:04:05", "remaining_time": "0:45:58", "throughput": 6201.86, "total_tokens": 1522176}
|
|
{"current_steps": 3100, "total_steps": 37885, "loss": 0.0694, "lr": 1.6357878068091844e-06, "epoch": 0.40913290220403853, "percentage": 8.18, "elapsed_time": "0:04:05", "remaining_time": "0:45:57", "throughput": 6202.54, "total_tokens": 1524352}
|
|
{"current_steps": 3105, "total_steps": 37885, "loss": 0.2666, "lr": 1.6384270256004222e-06, "epoch": 0.409792793981787, "percentage": 8.2, "elapsed_time": "0:04:06", "remaining_time": "0:45:56", "throughput": 6204.82, "total_tokens": 1526976}
|
|
{"current_steps": 3110, "total_steps": 37885, "loss": 0.1743, "lr": 1.64106624439166e-06, "epoch": 0.4104526857595354, "percentage": 8.21, "elapsed_time": "0:04:06", "remaining_time": "0:45:55", "throughput": 6205.45, "total_tokens": 1529152}
|
|
{"current_steps": 3115, "total_steps": 37885, "loss": 0.0033, "lr": 1.6437054631828978e-06, "epoch": 0.41111257753728386, "percentage": 8.22, "elapsed_time": "0:04:06", "remaining_time": "0:45:54", "throughput": 6207.33, "total_tokens": 1531648}
|
|
{"current_steps": 3120, "total_steps": 37885, "loss": 0.1017, "lr": 1.6463446819741355e-06, "epoch": 0.4117724693150323, "percentage": 8.24, "elapsed_time": "0:04:07", "remaining_time": "0:45:53", "throughput": 6208.5, "total_tokens": 1533952}
|
|
{"current_steps": 3125, "total_steps": 37885, "loss": 0.294, "lr": 1.6489839007653735e-06, "epoch": 0.4124323610927808, "percentage": 8.25, "elapsed_time": "0:04:07", "remaining_time": "0:45:51", "throughput": 6211.0, "total_tokens": 1536640}
|
|
{"current_steps": 3130, "total_steps": 37885, "loss": 0.0033, "lr": 1.6516231195566113e-06, "epoch": 0.41309225287052925, "percentage": 8.26, "elapsed_time": "0:04:07", "remaining_time": "0:45:50", "throughput": 6213.0, "total_tokens": 1539200}
|
|
{"current_steps": 3135, "total_steps": 37885, "loss": 0.1664, "lr": 1.6542623383478489e-06, "epoch": 0.4137521446482777, "percentage": 8.28, "elapsed_time": "0:04:08", "remaining_time": "0:45:49", "throughput": 6214.68, "total_tokens": 1541632}
|
|
{"current_steps": 3140, "total_steps": 37885, "loss": 0.0619, "lr": 1.6569015571390868e-06, "epoch": 0.41441203642602614, "percentage": 8.29, "elapsed_time": "0:04:08", "remaining_time": "0:45:48", "throughput": 6216.52, "total_tokens": 1544128}
|
|
{"current_steps": 3145, "total_steps": 37885, "loss": 0.0231, "lr": 1.6595407759303246e-06, "epoch": 0.4150719282037746, "percentage": 8.3, "elapsed_time": "0:04:08", "remaining_time": "0:45:47", "throughput": 6217.42, "total_tokens": 1546368}
|
|
{"current_steps": 3150, "total_steps": 37885, "loss": 0.4345, "lr": 1.6621799947215624e-06, "epoch": 0.41573181998152303, "percentage": 8.31, "elapsed_time": "0:04:09", "remaining_time": "0:45:46", "throughput": 6218.83, "total_tokens": 1548736}
|
|
{"current_steps": 3155, "total_steps": 37885, "loss": 0.2605, "lr": 1.6648192135128002e-06, "epoch": 0.4163917117592715, "percentage": 8.33, "elapsed_time": "0:04:09", "remaining_time": "0:45:45", "throughput": 6220.43, "total_tokens": 1551168}
|
|
{"current_steps": 3160, "total_steps": 37885, "loss": 0.1204, "lr": 1.667458432304038e-06, "epoch": 0.4170516035370199, "percentage": 8.34, "elapsed_time": "0:04:09", "remaining_time": "0:45:43", "throughput": 6222.2, "total_tokens": 1553664}
|
|
{"current_steps": 3165, "total_steps": 37885, "loss": 0.1409, "lr": 1.6700976510952757e-06, "epoch": 0.41771149531476837, "percentage": 8.35, "elapsed_time": "0:04:10", "remaining_time": "0:45:42", "throughput": 6223.35, "total_tokens": 1555968}
|
|
{"current_steps": 3170, "total_steps": 37885, "loss": 0.064, "lr": 1.6727368698865135e-06, "epoch": 0.4183713870925168, "percentage": 8.37, "elapsed_time": "0:04:10", "remaining_time": "0:45:41", "throughput": 6224.25, "total_tokens": 1558208}
|
|
{"current_steps": 3175, "total_steps": 37885, "loss": 0.0094, "lr": 1.6753760886777513e-06, "epoch": 0.41903127887026526, "percentage": 8.38, "elapsed_time": "0:04:10", "remaining_time": "0:45:40", "throughput": 6225.84, "total_tokens": 1560640}
|
|
{"current_steps": 3180, "total_steps": 37885, "loss": 0.0375, "lr": 1.678015307468989e-06, "epoch": 0.4196911706480137, "percentage": 8.39, "elapsed_time": "0:04:10", "remaining_time": "0:45:39", "throughput": 6228.4, "total_tokens": 1563328}
|
|
{"current_steps": 3185, "total_steps": 37885, "loss": 0.1175, "lr": 1.680654526260227e-06, "epoch": 0.4203510624257622, "percentage": 8.41, "elapsed_time": "0:04:11", "remaining_time": "0:45:38", "throughput": 6229.73, "total_tokens": 1565696}
|
|
{"current_steps": 3190, "total_steps": 37885, "loss": 0.4013, "lr": 1.6832937450514646e-06, "epoch": 0.42101095420351065, "percentage": 8.42, "elapsed_time": "0:04:11", "remaining_time": "0:45:37", "throughput": 6233.09, "total_tokens": 1568640}
|
|
{"current_steps": 3195, "total_steps": 37885, "loss": 0.0542, "lr": 1.6859329638427023e-06, "epoch": 0.4216708459812591, "percentage": 8.43, "elapsed_time": "0:04:11", "remaining_time": "0:45:36", "throughput": 6235.53, "total_tokens": 1571328}
|
|
{"current_steps": 3200, "total_steps": 37885, "loss": 0.5376, "lr": 1.6885721826339403e-06, "epoch": 0.42233073775900754, "percentage": 8.45, "elapsed_time": "0:04:12", "remaining_time": "0:45:35", "throughput": 6237.95, "total_tokens": 1574016}
|
|
{"current_steps": 3205, "total_steps": 37885, "loss": 0.2516, "lr": 1.691211401425178e-06, "epoch": 0.422990629536756, "percentage": 8.46, "elapsed_time": "0:04:12", "remaining_time": "0:45:33", "throughput": 6239.21, "total_tokens": 1576384}
|
|
{"current_steps": 3210, "total_steps": 37885, "loss": 0.217, "lr": 1.6938506202164159e-06, "epoch": 0.4236505213145044, "percentage": 8.47, "elapsed_time": "0:04:12", "remaining_time": "0:45:32", "throughput": 6242.06, "total_tokens": 1579200}
|
|
{"current_steps": 3215, "total_steps": 37885, "loss": 0.1522, "lr": 1.6964898390076536e-06, "epoch": 0.42431041309225287, "percentage": 8.49, "elapsed_time": "0:04:13", "remaining_time": "0:45:31", "throughput": 6243.6, "total_tokens": 1581632}
|
|
{"current_steps": 3220, "total_steps": 37885, "loss": 0.0283, "lr": 1.6991290577988914e-06, "epoch": 0.4249703048700013, "percentage": 8.5, "elapsed_time": "0:04:13", "remaining_time": "0:45:30", "throughput": 6245.07, "total_tokens": 1584064}
|
|
{"current_steps": 3225, "total_steps": 37885, "loss": 0.0425, "lr": 1.7017682765901294e-06, "epoch": 0.42563019664774976, "percentage": 8.51, "elapsed_time": "0:04:13", "remaining_time": "0:45:29", "throughput": 6247.53, "total_tokens": 1586752}
|
|
{"current_steps": 3230, "total_steps": 37885, "loss": 0.1471, "lr": 1.704407495381367e-06, "epoch": 0.4262900884254982, "percentage": 8.53, "elapsed_time": "0:04:14", "remaining_time": "0:45:28", "throughput": 6249.29, "total_tokens": 1589248}
|
|
{"current_steps": 3235, "total_steps": 37885, "loss": 0.0052, "lr": 1.7070467141726047e-06, "epoch": 0.42694998020324665, "percentage": 8.54, "elapsed_time": "0:04:14", "remaining_time": "0:45:27", "throughput": 6250.38, "total_tokens": 1591552}
|
|
{"current_steps": 3240, "total_steps": 37885, "loss": 0.0685, "lr": 1.7096859329638427e-06, "epoch": 0.4276098719809951, "percentage": 8.55, "elapsed_time": "0:04:14", "remaining_time": "0:45:26", "throughput": 6252.14, "total_tokens": 1594048}
|
|
{"current_steps": 3245, "total_steps": 37885, "loss": 0.2108, "lr": 1.7123251517550805e-06, "epoch": 0.4282697637587436, "percentage": 8.57, "elapsed_time": "0:04:15", "remaining_time": "0:45:25", "throughput": 6253.89, "total_tokens": 1596544}
|
|
{"current_steps": 3250, "total_steps": 37885, "loss": 0.2884, "lr": 1.714964370546318e-06, "epoch": 0.42892965553649204, "percentage": 8.58, "elapsed_time": "0:04:15", "remaining_time": "0:45:24", "throughput": 6255.89, "total_tokens": 1599104}
|
|
{"current_steps": 3255, "total_steps": 37885, "loss": 0.1717, "lr": 1.717603589337556e-06, "epoch": 0.4295895473142405, "percentage": 8.59, "elapsed_time": "0:04:15", "remaining_time": "0:45:22", "throughput": 6256.67, "total_tokens": 1601344}
|
|
{"current_steps": 3260, "total_steps": 37885, "loss": 0.0818, "lr": 1.7202428081287938e-06, "epoch": 0.43024943909198893, "percentage": 8.6, "elapsed_time": "0:04:16", "remaining_time": "0:45:21", "throughput": 6258.83, "total_tokens": 1603968}
|
|
{"current_steps": 3265, "total_steps": 37885, "loss": 0.0261, "lr": 1.7228820269200314e-06, "epoch": 0.4309093308697374, "percentage": 8.62, "elapsed_time": "0:04:16", "remaining_time": "0:45:20", "throughput": 6260.32, "total_tokens": 1606400}
|
|
{"current_steps": 3270, "total_steps": 37885, "loss": 0.2814, "lr": 1.7255212457112694e-06, "epoch": 0.4315692226474858, "percentage": 8.63, "elapsed_time": "0:04:16", "remaining_time": "0:45:19", "throughput": 6262.23, "total_tokens": 1608960}
|
|
{"current_steps": 3275, "total_steps": 37885, "loss": 0.0021, "lr": 1.7281604645025071e-06, "epoch": 0.43222911442523426, "percentage": 8.64, "elapsed_time": "0:04:17", "remaining_time": "0:45:18", "throughput": 6263.96, "total_tokens": 1611456}
|
|
{"current_steps": 3280, "total_steps": 37885, "loss": 0.0595, "lr": 1.7307996832937451e-06, "epoch": 0.4328890062029827, "percentage": 8.66, "elapsed_time": "0:04:17", "remaining_time": "0:45:17", "throughput": 6265.68, "total_tokens": 1613952}
|
|
{"current_steps": 3285, "total_steps": 37885, "loss": 0.1862, "lr": 1.7334389020849827e-06, "epoch": 0.43354889798073115, "percentage": 8.67, "elapsed_time": "0:04:17", "remaining_time": "0:45:16", "throughput": 6266.99, "total_tokens": 1616320}
|
|
{"current_steps": 3290, "total_steps": 37885, "loss": 0.3119, "lr": 1.7360781208762205e-06, "epoch": 0.4342087897584796, "percentage": 8.68, "elapsed_time": "0:04:18", "remaining_time": "0:45:15", "throughput": 6269.8, "total_tokens": 1619136}
|
|
{"current_steps": 3295, "total_steps": 37885, "loss": 0.0999, "lr": 1.7387173396674584e-06, "epoch": 0.43486868153622804, "percentage": 8.7, "elapsed_time": "0:04:18", "remaining_time": "0:45:14", "throughput": 6271.49, "total_tokens": 1621632}
|
|
{"current_steps": 3300, "total_steps": 37885, "loss": 0.1682, "lr": 1.7413565584586962e-06, "epoch": 0.4355285733139765, "percentage": 8.71, "elapsed_time": "0:04:18", "remaining_time": "0:45:13", "throughput": 6272.92, "total_tokens": 1624064}
|
|
{"current_steps": 3305, "total_steps": 37885, "loss": 0.1445, "lr": 1.7439957772499338e-06, "epoch": 0.43618846509172493, "percentage": 8.72, "elapsed_time": "0:04:19", "remaining_time": "0:45:12", "throughput": 6274.4, "total_tokens": 1626496}
|
|
{"current_steps": 3310, "total_steps": 37885, "loss": 0.1393, "lr": 1.7466349960411718e-06, "epoch": 0.43684835686947343, "percentage": 8.74, "elapsed_time": "0:04:19", "remaining_time": "0:45:11", "throughput": 6275.38, "total_tokens": 1628800}
|
|
{"current_steps": 3315, "total_steps": 37885, "loss": 0.1456, "lr": 1.7492742148324095e-06, "epoch": 0.4375082486472219, "percentage": 8.75, "elapsed_time": "0:04:19", "remaining_time": "0:45:10", "throughput": 6276.85, "total_tokens": 1631232}
|
|
{"current_steps": 3320, "total_steps": 37885, "loss": 0.0401, "lr": 1.7519134336236473e-06, "epoch": 0.4381681404249703, "percentage": 8.76, "elapsed_time": "0:04:20", "remaining_time": "0:45:09", "throughput": 6278.44, "total_tokens": 1633728}
|
|
{"current_steps": 3325, "total_steps": 37885, "loss": 0.1055, "lr": 1.754552652414885e-06, "epoch": 0.43882803220271877, "percentage": 8.78, "elapsed_time": "0:04:20", "remaining_time": "0:45:08", "throughput": 6280.15, "total_tokens": 1636224}
|
|
{"current_steps": 3330, "total_steps": 37885, "loss": 0.0655, "lr": 1.7571918712061229e-06, "epoch": 0.4394879239804672, "percentage": 8.79, "elapsed_time": "0:04:20", "remaining_time": "0:45:06", "throughput": 6280.12, "total_tokens": 1638272}
|
|
{"current_steps": 3335, "total_steps": 37885, "loss": 0.0947, "lr": 1.7598310899973608e-06, "epoch": 0.44014781575821565, "percentage": 8.8, "elapsed_time": "0:04:21", "remaining_time": "0:45:05", "throughput": 6280.77, "total_tokens": 1640512}
|
|
{"current_steps": 3340, "total_steps": 37885, "loss": 0.0818, "lr": 1.7624703087885986e-06, "epoch": 0.4408077075359641, "percentage": 8.82, "elapsed_time": "0:04:21", "remaining_time": "0:45:04", "throughput": 6282.18, "total_tokens": 1642944}
|
|
{"current_steps": 3345, "total_steps": 37885, "loss": 0.0882, "lr": 1.7651095275798362e-06, "epoch": 0.44146759931371254, "percentage": 8.83, "elapsed_time": "0:04:21", "remaining_time": "0:45:03", "throughput": 6283.56, "total_tokens": 1645376}
|
|
{"current_steps": 3350, "total_steps": 37885, "loss": 0.0007, "lr": 1.7677487463710742e-06, "epoch": 0.442127491091461, "percentage": 8.84, "elapsed_time": "0:04:22", "remaining_time": "0:45:02", "throughput": 6285.42, "total_tokens": 1647936}
|
|
{"current_steps": 3355, "total_steps": 37885, "loss": 0.1307, "lr": 1.770387965162312e-06, "epoch": 0.44278738286920943, "percentage": 8.86, "elapsed_time": "0:04:22", "remaining_time": "0:45:01", "throughput": 6287.26, "total_tokens": 1650496}
|
|
{"current_steps": 3360, "total_steps": 37885, "loss": 0.1083, "lr": 1.7730271839535497e-06, "epoch": 0.4434472746469579, "percentage": 8.87, "elapsed_time": "0:04:22", "remaining_time": "0:45:00", "throughput": 6288.88, "total_tokens": 1652992}
|
|
{"current_steps": 3365, "total_steps": 37885, "loss": 0.0738, "lr": 1.7756664027447875e-06, "epoch": 0.4441071664247063, "percentage": 8.88, "elapsed_time": "0:04:23", "remaining_time": "0:44:59", "throughput": 6290.5, "total_tokens": 1655488}
|
|
{"current_steps": 3370, "total_steps": 37885, "loss": 0.1521, "lr": 1.7783056215360253e-06, "epoch": 0.4447670582024548, "percentage": 8.9, "elapsed_time": "0:04:23", "remaining_time": "0:44:58", "throughput": 6292.09, "total_tokens": 1657984}
|
|
{"current_steps": 3375, "total_steps": 37885, "loss": 0.2652, "lr": 1.780944840327263e-06, "epoch": 0.44542694998020327, "percentage": 8.91, "elapsed_time": "0:04:23", "remaining_time": "0:44:57", "throughput": 6294.49, "total_tokens": 1660736}
|
|
{"current_steps": 3380, "total_steps": 37885, "loss": 0.1661, "lr": 1.7835840591185008e-06, "epoch": 0.4460868417579517, "percentage": 8.92, "elapsed_time": "0:04:24", "remaining_time": "0:44:56", "throughput": 6295.6, "total_tokens": 1663104}
|
|
{"current_steps": 3385, "total_steps": 37885, "loss": 0.0768, "lr": 1.7862232779097386e-06, "epoch": 0.44674673353570016, "percentage": 8.93, "elapsed_time": "0:04:24", "remaining_time": "0:44:55", "throughput": 6296.27, "total_tokens": 1665344}
|
|
{"current_steps": 3390, "total_steps": 37885, "loss": 0.1197, "lr": 1.7888624967009763e-06, "epoch": 0.4474066253134486, "percentage": 8.95, "elapsed_time": "0:04:24", "remaining_time": "0:44:54", "throughput": 6298.72, "total_tokens": 1668096}
|
|
{"current_steps": 3395, "total_steps": 37885, "loss": 0.0718, "lr": 1.7915017154922143e-06, "epoch": 0.44806651709119705, "percentage": 8.96, "elapsed_time": "0:04:25", "remaining_time": "0:44:53", "throughput": 6299.16, "total_tokens": 1670272}
|
|
{"current_steps": 3400, "total_steps": 37885, "loss": 0.1287, "lr": 1.794140934283452e-06, "epoch": 0.4487264088689455, "percentage": 8.97, "elapsed_time": "0:04:25", "remaining_time": "0:44:52", "throughput": 6299.57, "total_tokens": 1672448}
|
|
{"current_steps": 3405, "total_steps": 37885, "loss": 0.0269, "lr": 1.7967801530746897e-06, "epoch": 0.44938630064669394, "percentage": 8.99, "elapsed_time": "0:04:25", "remaining_time": "0:44:51", "throughput": 6301.92, "total_tokens": 1675200}
|
|
{"current_steps": 3410, "total_steps": 37885, "loss": 0.1333, "lr": 1.7994193718659277e-06, "epoch": 0.4500461924244424, "percentage": 9.0, "elapsed_time": "0:04:26", "remaining_time": "0:44:50", "throughput": 6303.44, "total_tokens": 1677696}
|
|
{"current_steps": 3415, "total_steps": 37885, "loss": 0.0596, "lr": 1.8020585906571654e-06, "epoch": 0.4507060842021908, "percentage": 9.01, "elapsed_time": "0:04:26", "remaining_time": "0:44:49", "throughput": 6305.2, "total_tokens": 1680256}
|
|
{"current_steps": 3420, "total_steps": 37885, "loss": 0.085, "lr": 1.8046978094484032e-06, "epoch": 0.45136597597993927, "percentage": 9.03, "elapsed_time": "0:04:26", "remaining_time": "0:44:48", "throughput": 6306.25, "total_tokens": 1682624}
|
|
{"current_steps": 3425, "total_steps": 37885, "loss": 0.0495, "lr": 1.807337028239641e-06, "epoch": 0.4520258677576877, "percentage": 9.04, "elapsed_time": "0:04:27", "remaining_time": "0:44:47", "throughput": 6307.59, "total_tokens": 1685056}
|
|
{"current_steps": 3430, "total_steps": 37885, "loss": 0.1929, "lr": 1.8099762470308787e-06, "epoch": 0.45268575953543616, "percentage": 9.05, "elapsed_time": "0:04:27", "remaining_time": "0:44:46", "throughput": 6307.79, "total_tokens": 1687168}
|
|
{"current_steps": 3435, "total_steps": 37885, "loss": 0.277, "lr": 1.8126154658221167e-06, "epoch": 0.45334565131318466, "percentage": 9.07, "elapsed_time": "0:04:27", "remaining_time": "0:44:45", "throughput": 6307.72, "total_tokens": 1689216}
|
|
{"current_steps": 3440, "total_steps": 37885, "loss": 0.0305, "lr": 1.8152546846133543e-06, "epoch": 0.4540055430909331, "percentage": 9.08, "elapsed_time": "0:04:28", "remaining_time": "0:44:44", "throughput": 6309.5, "total_tokens": 1691776}
|
|
{"current_steps": 3445, "total_steps": 37885, "loss": 0.0776, "lr": 1.817893903404592e-06, "epoch": 0.45466543486868155, "percentage": 9.09, "elapsed_time": "0:04:28", "remaining_time": "0:44:43", "throughput": 6311.24, "total_tokens": 1694336}
|
|
{"current_steps": 3450, "total_steps": 37885, "loss": 0.2252, "lr": 1.82053312219583e-06, "epoch": 0.45532532664643, "percentage": 9.11, "elapsed_time": "0:04:28", "remaining_time": "0:44:42", "throughput": 6312.11, "total_tokens": 1696640}
|
|
{"current_steps": 3455, "total_steps": 37885, "loss": 0.0788, "lr": 1.8231723409870678e-06, "epoch": 0.45598521842417844, "percentage": 9.12, "elapsed_time": "0:04:29", "remaining_time": "0:44:41", "throughput": 6312.68, "total_tokens": 1698880}
|
|
{"current_steps": 3460, "total_steps": 37885, "loss": 0.2611, "lr": 1.8258115597783054e-06, "epoch": 0.4566451102019269, "percentage": 9.13, "elapsed_time": "0:04:29", "remaining_time": "0:44:40", "throughput": 6313.97, "total_tokens": 1701312}
|
|
{"current_steps": 3465, "total_steps": 37885, "loss": 0.2096, "lr": 1.8284507785695434e-06, "epoch": 0.45730500197967533, "percentage": 9.15, "elapsed_time": "0:04:29", "remaining_time": "0:44:39", "throughput": 6315.42, "total_tokens": 1703808}
|
|
{"current_steps": 3470, "total_steps": 37885, "loss": 0.0747, "lr": 1.8310899973607811e-06, "epoch": 0.4579648937574238, "percentage": 9.16, "elapsed_time": "0:04:30", "remaining_time": "0:44:38", "throughput": 6316.89, "total_tokens": 1706304}
|
|
{"current_steps": 3475, "total_steps": 37885, "loss": 0.3095, "lr": 1.8337292161520187e-06, "epoch": 0.4586247855351722, "percentage": 9.17, "elapsed_time": "0:04:30", "remaining_time": "0:44:37", "throughput": 6318.22, "total_tokens": 1708736}
|
|
{"current_steps": 3480, "total_steps": 37885, "loss": 0.1241, "lr": 1.8363684349432567e-06, "epoch": 0.45928467731292066, "percentage": 9.19, "elapsed_time": "0:04:30", "remaining_time": "0:44:37", "throughput": 6319.73, "total_tokens": 1711232}
|
|
{"current_steps": 3485, "total_steps": 37885, "loss": 0.0022, "lr": 1.8390076537344945e-06, "epoch": 0.4599445690906691, "percentage": 9.2, "elapsed_time": "0:04:31", "remaining_time": "0:44:36", "throughput": 6320.75, "total_tokens": 1713600}
|
|
{"current_steps": 3490, "total_steps": 37885, "loss": 0.1762, "lr": 1.8416468725257325e-06, "epoch": 0.46060446086841755, "percentage": 9.21, "elapsed_time": "0:04:31", "remaining_time": "0:44:35", "throughput": 6322.73, "total_tokens": 1716224}
|
|
{"current_steps": 3495, "total_steps": 37885, "loss": 0.2366, "lr": 1.84428609131697e-06, "epoch": 0.46126435264616605, "percentage": 9.23, "elapsed_time": "0:04:31", "remaining_time": "0:44:34", "throughput": 6324.22, "total_tokens": 1718720}
|
|
{"current_steps": 3500, "total_steps": 37885, "loss": 0.2068, "lr": 1.8469253101082078e-06, "epoch": 0.4619242444239145, "percentage": 9.24, "elapsed_time": "0:04:32", "remaining_time": "0:44:33", "throughput": 6325.94, "total_tokens": 1721280}
|
|
{"current_steps": 3505, "total_steps": 37885, "loss": 0.0785, "lr": 1.8495645288994458e-06, "epoch": 0.46258413620166294, "percentage": 9.25, "elapsed_time": "0:04:32", "remaining_time": "0:44:32", "throughput": 6327.22, "total_tokens": 1723712}
|
|
{"current_steps": 3510, "total_steps": 37885, "loss": 0.1624, "lr": 1.8522037476906835e-06, "epoch": 0.4632440279794114, "percentage": 9.26, "elapsed_time": "0:04:32", "remaining_time": "0:44:31", "throughput": 6327.85, "total_tokens": 1725952}
|
|
{"current_steps": 3515, "total_steps": 37885, "loss": 0.0022, "lr": 1.8548429664819211e-06, "epoch": 0.46390391975715983, "percentage": 9.28, "elapsed_time": "0:04:33", "remaining_time": "0:44:30", "throughput": 6329.53, "total_tokens": 1728512}
|
|
{"current_steps": 3520, "total_steps": 37885, "loss": 0.1997, "lr": 1.857482185273159e-06, "epoch": 0.4645638115349083, "percentage": 9.29, "elapsed_time": "0:04:33", "remaining_time": "0:44:29", "throughput": 6331.01, "total_tokens": 1731008}
|
|
{"current_steps": 3525, "total_steps": 37885, "loss": 0.1817, "lr": 1.8601214040643969e-06, "epoch": 0.4652237033126567, "percentage": 9.3, "elapsed_time": "0:04:33", "remaining_time": "0:44:28", "throughput": 6333.12, "total_tokens": 1733696}
|
|
{"current_steps": 3530, "total_steps": 37885, "loss": 0.0807, "lr": 1.8627606228556346e-06, "epoch": 0.46588359509040517, "percentage": 9.32, "elapsed_time": "0:04:34", "remaining_time": "0:44:27", "throughput": 6334.88, "total_tokens": 1736256}
|
|
{"current_steps": 3535, "total_steps": 37885, "loss": 0.2405, "lr": 1.8653998416468724e-06, "epoch": 0.4665434868681536, "percentage": 9.33, "elapsed_time": "0:04:34", "remaining_time": "0:44:26", "throughput": 6337.01, "total_tokens": 1738944}
|
|
{"current_steps": 3540, "total_steps": 37885, "loss": 0.207, "lr": 1.8680390604381102e-06, "epoch": 0.46720337864590206, "percentage": 9.34, "elapsed_time": "0:04:34", "remaining_time": "0:44:25", "throughput": 6338.95, "total_tokens": 1741568}
|
|
{"current_steps": 3545, "total_steps": 37885, "loss": 0.0024, "lr": 1.8706782792293482e-06, "epoch": 0.4678632704236505, "percentage": 9.36, "elapsed_time": "0:04:35", "remaining_time": "0:44:24", "throughput": 6339.52, "total_tokens": 1743808}
|
|
{"current_steps": 3550, "total_steps": 37885, "loss": 0.035, "lr": 1.873317498020586e-06, "epoch": 0.46852316220139895, "percentage": 9.37, "elapsed_time": "0:04:35", "remaining_time": "0:44:23", "throughput": 6340.64, "total_tokens": 1746176}
|
|
{"current_steps": 3555, "total_steps": 37885, "loss": 0.214, "lr": 1.8759567168118235e-06, "epoch": 0.46918305397914745, "percentage": 9.38, "elapsed_time": "0:04:35", "remaining_time": "0:44:22", "throughput": 6341.88, "total_tokens": 1748608}
|
|
{"current_steps": 3560, "total_steps": 37885, "loss": 0.0983, "lr": 1.8785959356030615e-06, "epoch": 0.4698429457568959, "percentage": 9.4, "elapsed_time": "0:04:36", "remaining_time": "0:44:21", "throughput": 6343.21, "total_tokens": 1751040}
|
|
{"current_steps": 3565, "total_steps": 37885, "loss": 0.1472, "lr": 1.8812351543942993e-06, "epoch": 0.47050283753464434, "percentage": 9.41, "elapsed_time": "0:04:36", "remaining_time": "0:44:20", "throughput": 6343.82, "total_tokens": 1753280}
|
|
{"current_steps": 3570, "total_steps": 37885, "loss": 0.4087, "lr": 1.883874373185537e-06, "epoch": 0.4711627293123928, "percentage": 9.42, "elapsed_time": "0:04:36", "remaining_time": "0:44:19", "throughput": 6345.15, "total_tokens": 1755712}
|
|
{"current_steps": 3575, "total_steps": 37885, "loss": 0.1291, "lr": 1.8865135919767748e-06, "epoch": 0.4718226210901412, "percentage": 9.44, "elapsed_time": "0:04:37", "remaining_time": "0:44:18", "throughput": 6345.75, "total_tokens": 1757952}
|
|
{"current_steps": 3580, "total_steps": 37885, "loss": 0.0719, "lr": 1.8891528107680126e-06, "epoch": 0.47248251286788967, "percentage": 9.45, "elapsed_time": "0:04:37", "remaining_time": "0:44:17", "throughput": 6346.93, "total_tokens": 1760384}
|
|
{"current_steps": 3585, "total_steps": 37885, "loss": 0.0011, "lr": 1.8917920295592504e-06, "epoch": 0.4731424046456381, "percentage": 9.46, "elapsed_time": "0:04:37", "remaining_time": "0:44:16", "throughput": 6348.54, "total_tokens": 1762944}
|
|
{"current_steps": 3590, "total_steps": 37885, "loss": 0.2399, "lr": 1.8944312483504881e-06, "epoch": 0.47380229642338656, "percentage": 9.48, "elapsed_time": "0:04:38", "remaining_time": "0:44:15", "throughput": 6350.3, "total_tokens": 1765504}
|
|
{"current_steps": 3595, "total_steps": 37885, "loss": 0.0452, "lr": 1.897070467141726e-06, "epoch": 0.474462188201135, "percentage": 9.49, "elapsed_time": "0:04:38", "remaining_time": "0:44:14", "throughput": 6352.17, "total_tokens": 1768128}
|
|
{"current_steps": 3600, "total_steps": 37885, "loss": 0.1898, "lr": 1.8997096859329637e-06, "epoch": 0.47512207997888345, "percentage": 9.5, "elapsed_time": "0:04:38", "remaining_time": "0:44:14", "throughput": 6353.59, "total_tokens": 1770624}
|
|
{"current_steps": 3605, "total_steps": 37885, "loss": 0.329, "lr": 1.9023489047242017e-06, "epoch": 0.4757819717566319, "percentage": 9.52, "elapsed_time": "0:04:39", "remaining_time": "0:44:13", "throughput": 6355.46, "total_tokens": 1773248}
|
|
{"current_steps": 3610, "total_steps": 37885, "loss": 0.0657, "lr": 1.9049881235154392e-06, "epoch": 0.47644186353438034, "percentage": 9.53, "elapsed_time": "0:04:39", "remaining_time": "0:44:12", "throughput": 6356.08, "total_tokens": 1775488}
|
|
{"current_steps": 3615, "total_steps": 37885, "loss": 0.1438, "lr": 1.907627342306677e-06, "epoch": 0.4771017553121288, "percentage": 9.54, "elapsed_time": "0:04:39", "remaining_time": "0:44:11", "throughput": 6356.87, "total_tokens": 1777792}
|
|
{"current_steps": 3620, "total_steps": 37885, "loss": 0.0654, "lr": 1.9102665610979148e-06, "epoch": 0.4777616470898773, "percentage": 9.56, "elapsed_time": "0:04:39", "remaining_time": "0:44:10", "throughput": 6358.5, "total_tokens": 1780352}
|
|
{"current_steps": 3625, "total_steps": 37885, "loss": 0.2003, "lr": 1.9129057798891528e-06, "epoch": 0.47842153886762573, "percentage": 9.57, "elapsed_time": "0:04:40", "remaining_time": "0:44:09", "throughput": 6360.9, "total_tokens": 1783168}
|
|
{"current_steps": 3630, "total_steps": 37885, "loss": 0.1582, "lr": 1.9155449986803903e-06, "epoch": 0.4790814306453742, "percentage": 9.58, "elapsed_time": "0:04:40", "remaining_time": "0:44:08", "throughput": 6363.16, "total_tokens": 1785920}
|
|
{"current_steps": 3635, "total_steps": 37885, "loss": 0.1929, "lr": 1.9181842174716283e-06, "epoch": 0.4797413224231226, "percentage": 9.59, "elapsed_time": "0:04:40", "remaining_time": "0:44:07", "throughput": 6364.61, "total_tokens": 1788416}
|
|
{"current_steps": 3640, "total_steps": 37885, "loss": 0.0412, "lr": 1.9208234362628663e-06, "epoch": 0.48040121420087106, "percentage": 9.61, "elapsed_time": "0:04:41", "remaining_time": "0:44:06", "throughput": 6365.77, "total_tokens": 1790848}
|
|
{"current_steps": 3645, "total_steps": 37885, "loss": 0.0349, "lr": 1.923462655054104e-06, "epoch": 0.4810611059786195, "percentage": 9.62, "elapsed_time": "0:04:41", "remaining_time": "0:44:05", "throughput": 6366.97, "total_tokens": 1793280}
|
|
{"current_steps": 3650, "total_steps": 37885, "loss": 0.209, "lr": 1.9261018738453414e-06, "epoch": 0.48172099775636795, "percentage": 9.63, "elapsed_time": "0:04:41", "remaining_time": "0:44:04", "throughput": 6367.94, "total_tokens": 1795648}
|
|
{"current_steps": 3655, "total_steps": 37885, "loss": 0.1709, "lr": 1.9287410926365794e-06, "epoch": 0.4823808895341164, "percentage": 9.65, "elapsed_time": "0:04:42", "remaining_time": "0:44:03", "throughput": 6368.67, "total_tokens": 1797952}
|
|
{"current_steps": 3660, "total_steps": 37885, "loss": 0.099, "lr": 1.9313803114278174e-06, "epoch": 0.48304078131186484, "percentage": 9.66, "elapsed_time": "0:04:42", "remaining_time": "0:44:02", "throughput": 6369.19, "total_tokens": 1800192}
|
|
{"current_steps": 3665, "total_steps": 37885, "loss": 0.1852, "lr": 1.9340195302190554e-06, "epoch": 0.4837006730896133, "percentage": 9.67, "elapsed_time": "0:04:42", "remaining_time": "0:44:02", "throughput": 6370.15, "total_tokens": 1802560}
|
|
{"current_steps": 3670, "total_steps": 37885, "loss": 0.2034, "lr": 1.936658749010293e-06, "epoch": 0.48436056486736173, "percentage": 9.69, "elapsed_time": "0:04:43", "remaining_time": "0:44:01", "throughput": 6371.5, "total_tokens": 1805056}
|
|
{"current_steps": 3675, "total_steps": 37885, "loss": 0.2424, "lr": 1.9392979678015305e-06, "epoch": 0.4850204566451102, "percentage": 9.7, "elapsed_time": "0:04:43", "remaining_time": "0:44:00", "throughput": 6373.52, "total_tokens": 1807744}
|
|
{"current_steps": 3680, "total_steps": 37885, "loss": 0.1529, "lr": 1.9419371865927685e-06, "epoch": 0.4856803484228587, "percentage": 9.71, "elapsed_time": "0:04:43", "remaining_time": "0:43:59", "throughput": 6375.31, "total_tokens": 1810368}
|
|
{"current_steps": 3685, "total_steps": 37885, "loss": 0.0411, "lr": 1.944576405384006e-06, "epoch": 0.4863402402006071, "percentage": 9.73, "elapsed_time": "0:04:44", "remaining_time": "0:43:58", "throughput": 6377.31, "total_tokens": 1813056}
|
|
{"current_steps": 3690, "total_steps": 37885, "loss": 0.2287, "lr": 1.947215624175244e-06, "epoch": 0.48700013197835557, "percentage": 9.74, "elapsed_time": "0:04:44", "remaining_time": "0:43:57", "throughput": 6377.92, "total_tokens": 1815360}
|
|
{"current_steps": 3695, "total_steps": 37885, "loss": 0.1827, "lr": 1.949854842966482e-06, "epoch": 0.487660023756104, "percentage": 9.75, "elapsed_time": "0:04:44", "remaining_time": "0:43:56", "throughput": 6379.49, "total_tokens": 1817920}
|
|
{"current_steps": 3700, "total_steps": 37885, "loss": 0.0576, "lr": 1.9524940617577196e-06, "epoch": 0.48831991553385246, "percentage": 9.77, "elapsed_time": "0:04:45", "remaining_time": "0:43:55", "throughput": 6380.35, "total_tokens": 1820288}
|
|
{"current_steps": 3705, "total_steps": 37885, "loss": 0.1, "lr": 1.955133280548957e-06, "epoch": 0.4889798073116009, "percentage": 9.78, "elapsed_time": "0:04:45", "remaining_time": "0:43:54", "throughput": 6381.33, "total_tokens": 1822656}
|
|
{"current_steps": 3710, "total_steps": 37885, "loss": 0.1911, "lr": 1.957772499340195e-06, "epoch": 0.48963969908934935, "percentage": 9.79, "elapsed_time": "0:04:45", "remaining_time": "0:43:54", "throughput": 6381.64, "total_tokens": 1824832}
|
|
{"current_steps": 3715, "total_steps": 37885, "loss": 0.1059, "lr": 1.960411718131433e-06, "epoch": 0.4902995908670978, "percentage": 9.81, "elapsed_time": "0:04:46", "remaining_time": "0:43:53", "throughput": 6382.6, "total_tokens": 1827200}
|
|
{"current_steps": 3720, "total_steps": 37885, "loss": 0.1355, "lr": 1.963050936922671e-06, "epoch": 0.49095948264484623, "percentage": 9.82, "elapsed_time": "0:04:46", "remaining_time": "0:43:52", "throughput": 6383.86, "total_tokens": 1829632}
|
|
{"current_steps": 3725, "total_steps": 37885, "loss": 0.0601, "lr": 1.9656901557139086e-06, "epoch": 0.4916193744225947, "percentage": 9.83, "elapsed_time": "0:04:46", "remaining_time": "0:43:51", "throughput": 6385.49, "total_tokens": 1832192}
|
|
{"current_steps": 3730, "total_steps": 37885, "loss": 0.1779, "lr": 1.968329374505146e-06, "epoch": 0.4922792662003431, "percentage": 9.85, "elapsed_time": "0:04:47", "remaining_time": "0:43:50", "throughput": 6387.06, "total_tokens": 1834752}
|
|
{"current_steps": 3735, "total_steps": 37885, "loss": 0.0968, "lr": 1.970968593296384e-06, "epoch": 0.49293915797809157, "percentage": 9.86, "elapsed_time": "0:04:47", "remaining_time": "0:43:49", "throughput": 6388.46, "total_tokens": 1837248}
|
|
{"current_steps": 3740, "total_steps": 37885, "loss": 0.2923, "lr": 1.973607812087622e-06, "epoch": 0.49359904975584007, "percentage": 9.87, "elapsed_time": "0:04:47", "remaining_time": "0:43:48", "throughput": 6389.56, "total_tokens": 1839680}
|
|
{"current_steps": 3745, "total_steps": 37885, "loss": 0.115, "lr": 1.9762470308788597e-06, "epoch": 0.4942589415335885, "percentage": 9.89, "elapsed_time": "0:04:48", "remaining_time": "0:43:47", "throughput": 6391.41, "total_tokens": 1842304}
|
|
{"current_steps": 3750, "total_steps": 37885, "loss": 0.0656, "lr": 1.9788862496700977e-06, "epoch": 0.49491883331133696, "percentage": 9.9, "elapsed_time": "0:04:48", "remaining_time": "0:43:46", "throughput": 6391.76, "total_tokens": 1844480}
|
|
{"current_steps": 3755, "total_steps": 37885, "loss": 0.0883, "lr": 1.9815254684613353e-06, "epoch": 0.4955787250890854, "percentage": 9.91, "elapsed_time": "0:04:48", "remaining_time": "0:43:45", "throughput": 6392.95, "total_tokens": 1846912}
|
|
{"current_steps": 3760, "total_steps": 37885, "loss": 0.1242, "lr": 1.9841646872525733e-06, "epoch": 0.49623861686683385, "percentage": 9.92, "elapsed_time": "0:04:49", "remaining_time": "0:43:44", "throughput": 6393.08, "total_tokens": 1849024}
|
|
{"current_steps": 3765, "total_steps": 37885, "loss": 0.0576, "lr": 1.986803906043811e-06, "epoch": 0.4968985086445823, "percentage": 9.94, "elapsed_time": "0:04:49", "remaining_time": "0:43:44", "throughput": 6395.04, "total_tokens": 1851712}
|
|
{"current_steps": 3770, "total_steps": 37885, "loss": 0.1076, "lr": 1.989443124835049e-06, "epoch": 0.49755840042233074, "percentage": 9.95, "elapsed_time": "0:04:49", "remaining_time": "0:43:43", "throughput": 6396.4, "total_tokens": 1854208}
|
|
{"current_steps": 3775, "total_steps": 37885, "loss": 0.0448, "lr": 1.9920823436262864e-06, "epoch": 0.4982182922000792, "percentage": 9.96, "elapsed_time": "0:04:50", "remaining_time": "0:43:42", "throughput": 6397.76, "total_tokens": 1856704}
|
|
{"current_steps": 3780, "total_steps": 37885, "loss": 0.1233, "lr": 1.9947215624175244e-06, "epoch": 0.4988781839778276, "percentage": 9.98, "elapsed_time": "0:04:50", "remaining_time": "0:43:41", "throughput": 6398.24, "total_tokens": 1858944}
|
|
{"current_steps": 3785, "total_steps": 37885, "loss": 0.1662, "lr": 1.997360781208762e-06, "epoch": 0.49953807575557607, "percentage": 9.99, "elapsed_time": "0:04:50", "remaining_time": "0:43:40", "throughput": 6400.42, "total_tokens": 1861696}
|
|
{"current_steps": 3790, "total_steps": 37885, "loss": 0.3196, "lr": 2e-06, "epoch": 0.5001979675333246, "percentage": 10.0, "elapsed_time": "0:04:51", "remaining_time": "0:43:39", "throughput": 6401.59, "total_tokens": 1864128}
|
|
{"current_steps": 3790, "total_steps": 37885, "eval_loss": 0.15765729546546936, "epoch": 0.5001979675333246, "percentage": 10.0, "elapsed_time": "0:04:59", "remaining_time": "0:44:49", "throughput": 6234.32, "total_tokens": 1864128}
|
|
{"current_steps": 3795, "total_steps": 37885, "loss": 0.0058, "lr": 1.9999998938786208e-06, "epoch": 0.500857859311073, "percentage": 10.02, "elapsed_time": "0:05:33", "remaining_time": "0:49:53", "throughput": 5600.48, "total_tokens": 1866432}
|
|
{"current_steps": 3800, "total_steps": 37885, "loss": 0.0722, "lr": 1.9999995755145053e-06, "epoch": 0.5015177510888215, "percentage": 10.03, "elapsed_time": "0:05:33", "remaining_time": "0:49:52", "throughput": 5601.92, "total_tokens": 1868736}
|
|
{"current_steps": 3805, "total_steps": 37885, "loss": 0.1512, "lr": 1.9999990449077214e-06, "epoch": 0.5021776428665699, "percentage": 10.04, "elapsed_time": "0:05:33", "remaining_time": "0:49:50", "throughput": 5604.2, "total_tokens": 1871360}
|
|
{"current_steps": 3810, "total_steps": 37885, "loss": 0.0042, "lr": 1.999998302058382e-06, "epoch": 0.5028375346443184, "percentage": 10.06, "elapsed_time": "0:05:34", "remaining_time": "0:49:49", "throughput": 5605.24, "total_tokens": 1873536}
|
|
{"current_steps": 3815, "total_steps": 37885, "loss": 0.2111, "lr": 1.999997346966644e-06, "epoch": 0.5034974264220667, "percentage": 10.07, "elapsed_time": "0:05:34", "remaining_time": "0:49:47", "throughput": 5606.79, "total_tokens": 1875904}
|
|
{"current_steps": 3820, "total_steps": 37885, "loss": 0.2222, "lr": 1.999996179632711e-06, "epoch": 0.5041573181998152, "percentage": 10.08, "elapsed_time": "0:05:34", "remaining_time": "0:49:46", "throughput": 5608.88, "total_tokens": 1878464}
|
|
{"current_steps": 3825, "total_steps": 37885, "loss": 0.1967, "lr": 1.9999948000568297e-06, "epoch": 0.5048172099775636, "percentage": 10.1, "elapsed_time": "0:05:35", "remaining_time": "0:49:45", "throughput": 5609.91, "total_tokens": 1880640}
|
|
{"current_steps": 3830, "total_steps": 37885, "loss": 0.1649, "lr": 1.9999932082392934e-06, "epoch": 0.5054771017553121, "percentage": 10.11, "elapsed_time": "0:05:35", "remaining_time": "0:49:43", "throughput": 5611.21, "total_tokens": 1882944}
|
|
{"current_steps": 3835, "total_steps": 37885, "loss": 0.064, "lr": 1.9999914041804405e-06, "epoch": 0.5061369935330606, "percentage": 10.12, "elapsed_time": "0:05:35", "remaining_time": "0:49:42", "throughput": 5612.56, "total_tokens": 1885248}
|
|
{"current_steps": 3840, "total_steps": 37885, "loss": 0.1077, "lr": 1.9999893878806534e-06, "epoch": 0.506796885310809, "percentage": 10.14, "elapsed_time": "0:05:36", "remaining_time": "0:49:40", "throughput": 5614.81, "total_tokens": 1887872}
|
|
{"current_steps": 3845, "total_steps": 37885, "loss": 0.0414, "lr": 1.99998715934036e-06, "epoch": 0.5074567770885575, "percentage": 10.15, "elapsed_time": "0:05:36", "remaining_time": "0:49:39", "throughput": 5617.24, "total_tokens": 1890560}
|
|
{"current_steps": 3850, "total_steps": 37885, "loss": 0.1999, "lr": 1.999984718560033e-06, "epoch": 0.5081166688663059, "percentage": 10.16, "elapsed_time": "0:05:36", "remaining_time": "0:49:38", "throughput": 5619.04, "total_tokens": 1893056}
|
|
{"current_steps": 3855, "total_steps": 37885, "loss": 0.1416, "lr": 1.9999820655401914e-06, "epoch": 0.5087765606440544, "percentage": 10.18, "elapsed_time": "0:05:37", "remaining_time": "0:49:36", "throughput": 5620.36, "total_tokens": 1895360}
|
|
{"current_steps": 3860, "total_steps": 37885, "loss": 0.0954, "lr": 1.9999792002813973e-06, "epoch": 0.5094364524218028, "percentage": 10.19, "elapsed_time": "0:05:37", "remaining_time": "0:49:35", "throughput": 5621.69, "total_tokens": 1897664}
|
|
{"current_steps": 3865, "total_steps": 37885, "loss": 0.27, "lr": 1.9999761227842592e-06, "epoch": 0.5100963441995513, "percentage": 10.2, "elapsed_time": "0:05:37", "remaining_time": "0:49:34", "throughput": 5623.97, "total_tokens": 1900288}
|
|
{"current_steps": 3870, "total_steps": 37885, "loss": 0.0797, "lr": 1.9999728330494307e-06, "epoch": 0.5107562359772997, "percentage": 10.22, "elapsed_time": "0:05:38", "remaining_time": "0:49:32", "throughput": 5626.53, "total_tokens": 1903040}
|
|
{"current_steps": 3875, "total_steps": 37885, "loss": 0.1566, "lr": 1.9999693310776095e-06, "epoch": 0.5114161277550482, "percentage": 10.23, "elapsed_time": "0:05:38", "remaining_time": "0:49:31", "throughput": 5628.21, "total_tokens": 1905472}
|
|
{"current_steps": 3880, "total_steps": 37885, "loss": 0.0922, "lr": 1.9999656168695387e-06, "epoch": 0.5120760195327966, "percentage": 10.24, "elapsed_time": "0:05:38", "remaining_time": "0:49:30", "throughput": 5629.36, "total_tokens": 1907712}
|
|
{"current_steps": 3885, "total_steps": 37885, "loss": 0.001, "lr": 1.9999616904260072e-06, "epoch": 0.5127359113105451, "percentage": 10.25, "elapsed_time": "0:05:39", "remaining_time": "0:49:28", "throughput": 5630.89, "total_tokens": 1910080}
|
|
{"current_steps": 3890, "total_steps": 37885, "loss": 0.2619, "lr": 1.9999575517478477e-06, "epoch": 0.5133958030882935, "percentage": 10.27, "elapsed_time": "0:05:39", "remaining_time": "0:49:27", "throughput": 5632.58, "total_tokens": 1912512}
|
|
{"current_steps": 3895, "total_steps": 37885, "loss": 0.1074, "lr": 1.9999532008359393e-06, "epoch": 0.514055694866042, "percentage": 10.28, "elapsed_time": "0:05:39", "remaining_time": "0:49:25", "throughput": 5633.76, "total_tokens": 1914752}
|
|
{"current_steps": 3900, "total_steps": 37885, "loss": 0.002, "lr": 1.999948637691205e-06, "epoch": 0.5147155866437905, "percentage": 10.29, "elapsed_time": "0:05:40", "remaining_time": "0:49:24", "throughput": 5635.27, "total_tokens": 1917120}
|
|
{"current_steps": 3905, "total_steps": 37885, "loss": 0.0127, "lr": 1.9999438623146132e-06, "epoch": 0.5153754784215389, "percentage": 10.31, "elapsed_time": "0:05:40", "remaining_time": "0:49:23", "throughput": 5636.06, "total_tokens": 1919232}
|
|
{"current_steps": 3910, "total_steps": 37885, "loss": 0.0941, "lr": 1.999938874707178e-06, "epoch": 0.5160353701992874, "percentage": 10.32, "elapsed_time": "0:05:40", "remaining_time": "0:49:21", "throughput": 5638.23, "total_tokens": 1921856}
|
|
{"current_steps": 3915, "total_steps": 37885, "loss": 0.1961, "lr": 1.9999336748699576e-06, "epoch": 0.5166952619770357, "percentage": 10.33, "elapsed_time": "0:05:41", "remaining_time": "0:49:20", "throughput": 5639.71, "total_tokens": 1924224}
|
|
{"current_steps": 3920, "total_steps": 37885, "loss": 0.0313, "lr": 1.9999282628040553e-06, "epoch": 0.5173551537547842, "percentage": 10.35, "elapsed_time": "0:05:41", "remaining_time": "0:49:19", "throughput": 5640.86, "total_tokens": 1926464}
|
|
{"current_steps": 3925, "total_steps": 37885, "loss": 0.3644, "lr": 1.9999226385106205e-06, "epoch": 0.5180150455325326, "percentage": 10.36, "elapsed_time": "0:05:41", "remaining_time": "0:49:17", "throughput": 5642.58, "total_tokens": 1928896}
|
|
{"current_steps": 3930, "total_steps": 37885, "loss": 0.1085, "lr": 1.9999168019908464e-06, "epoch": 0.5186749373102811, "percentage": 10.37, "elapsed_time": "0:05:42", "remaining_time": "0:49:16", "throughput": 5643.9, "total_tokens": 1931200}
|
|
{"current_steps": 3935, "total_steps": 37885, "loss": 0.1389, "lr": 1.9999107532459716e-06, "epoch": 0.5193348290880295, "percentage": 10.39, "elapsed_time": "0:05:42", "remaining_time": "0:49:15", "throughput": 5645.61, "total_tokens": 1933632}
|
|
{"current_steps": 3940, "total_steps": 37885, "loss": 0.0024, "lr": 1.9999044922772808e-06, "epoch": 0.519994720865778, "percentage": 10.4, "elapsed_time": "0:05:42", "remaining_time": "0:49:13", "throughput": 5646.64, "total_tokens": 1935872}
|
|
{"current_steps": 3945, "total_steps": 37885, "loss": 0.0619, "lr": 1.999898019086102e-06, "epoch": 0.5206546126435264, "percentage": 10.41, "elapsed_time": "0:05:43", "remaining_time": "0:49:12", "throughput": 5647.59, "total_tokens": 1938048}
|
|
{"current_steps": 3950, "total_steps": 37885, "loss": 0.0606, "lr": 1.999891333673809e-06, "epoch": 0.5213145044212749, "percentage": 10.43, "elapsed_time": "0:05:43", "remaining_time": "0:49:11", "throughput": 5649.6, "total_tokens": 1940608}
|
|
{"current_steps": 3955, "total_steps": 37885, "loss": 0.3859, "lr": 1.999884436041822e-06, "epoch": 0.5219743961990233, "percentage": 10.44, "elapsed_time": "0:05:43", "remaining_time": "0:49:09", "throughput": 5651.24, "total_tokens": 1943040}
|
|
{"current_steps": 3960, "total_steps": 37885, "loss": 0.1834, "lr": 1.999877326191603e-06, "epoch": 0.5226342879767718, "percentage": 10.45, "elapsed_time": "0:05:44", "remaining_time": "0:49:08", "throughput": 5653.41, "total_tokens": 1945664}
|
|
{"current_steps": 3965, "total_steps": 37885, "loss": 0.0053, "lr": 1.9998700041246626e-06, "epoch": 0.5232941797545203, "percentage": 10.47, "elapsed_time": "0:05:44", "remaining_time": "0:49:07", "throughput": 5654.88, "total_tokens": 1948032}
|
|
{"current_steps": 3970, "total_steps": 37885, "loss": 0.0008, "lr": 1.9998624698425545e-06, "epoch": 0.5239540715322687, "percentage": 10.48, "elapsed_time": "0:05:44", "remaining_time": "0:49:05", "throughput": 5655.8, "total_tokens": 1950208}
|
|
{"current_steps": 3975, "total_steps": 37885, "loss": 0.1693, "lr": 1.999854723346877e-06, "epoch": 0.5246139633100172, "percentage": 10.49, "elapsed_time": "0:05:45", "remaining_time": "0:49:04", "throughput": 5658.07, "total_tokens": 1952896}
|
|
{"current_steps": 3980, "total_steps": 37885, "loss": 0.0881, "lr": 1.999846764639275e-06, "epoch": 0.5252738550877656, "percentage": 10.51, "elapsed_time": "0:05:45", "remaining_time": "0:49:03", "throughput": 5659.34, "total_tokens": 1955200}
|
|
{"current_steps": 3985, "total_steps": 37885, "loss": 0.0753, "lr": 1.999838593721438e-06, "epoch": 0.5259337468655141, "percentage": 10.52, "elapsed_time": "0:05:45", "remaining_time": "0:49:01", "throughput": 5660.23, "total_tokens": 1957376}
|
|
{"current_steps": 3990, "total_steps": 37885, "loss": 0.2321, "lr": 1.999830210595099e-06, "epoch": 0.5265936386432625, "percentage": 10.53, "elapsed_time": "0:05:46", "remaining_time": "0:49:00", "throughput": 5662.49, "total_tokens": 1960064}
|
|
{"current_steps": 3995, "total_steps": 37885, "loss": 0.1365, "lr": 1.999821615262039e-06, "epoch": 0.527253530421011, "percentage": 10.55, "elapsed_time": "0:05:46", "remaining_time": "0:48:59", "throughput": 5664.47, "total_tokens": 1962624}
|
|
{"current_steps": 4000, "total_steps": 37885, "loss": 0.0799, "lr": 1.9998128077240805e-06, "epoch": 0.5279134221987594, "percentage": 10.56, "elapsed_time": "0:05:46", "remaining_time": "0:48:57", "throughput": 5666.12, "total_tokens": 1965056}
|
|
{"current_steps": 4005, "total_steps": 37885, "loss": 0.137, "lr": 1.9998037879830937e-06, "epoch": 0.5285733139765079, "percentage": 10.57, "elapsed_time": "0:05:47", "remaining_time": "0:48:56", "throughput": 5667.62, "total_tokens": 1967424}
|
|
{"current_steps": 4010, "total_steps": 37885, "loss": 0.2433, "lr": 1.999794556040993e-06, "epoch": 0.5292332057542563, "percentage": 10.58, "elapsed_time": "0:05:47", "remaining_time": "0:48:55", "throughput": 5669.27, "total_tokens": 1969856}
|
|
{"current_steps": 4015, "total_steps": 37885, "loss": 0.0609, "lr": 1.999785111899738e-06, "epoch": 0.5298930975320048, "percentage": 10.6, "elapsed_time": "0:05:47", "remaining_time": "0:48:53", "throughput": 5671.56, "total_tokens": 1972544}
|
|
{"current_steps": 4020, "total_steps": 37885, "loss": 0.0675, "lr": 1.9997754555613324e-06, "epoch": 0.5305529893097533, "percentage": 10.61, "elapsed_time": "0:05:48", "remaining_time": "0:48:52", "throughput": 5672.99, "total_tokens": 1974912}
|
|
{"current_steps": 4025, "total_steps": 37885, "loss": 0.2871, "lr": 1.999765587027827e-06, "epoch": 0.5312128810875016, "percentage": 10.62, "elapsed_time": "0:05:48", "remaining_time": "0:48:51", "throughput": 5674.25, "total_tokens": 1977216}
|
|
{"current_steps": 4030, "total_steps": 37885, "loss": 0.2346, "lr": 1.9997555063013147e-06, "epoch": 0.5318727728652501, "percentage": 10.64, "elapsed_time": "0:05:48", "remaining_time": "0:48:50", "throughput": 5675.88, "total_tokens": 1979648}
|
|
{"current_steps": 4035, "total_steps": 37885, "loss": 0.013, "lr": 1.999745213383936e-06, "epoch": 0.5325326646429985, "percentage": 10.65, "elapsed_time": "0:05:49", "remaining_time": "0:48:48", "throughput": 5676.97, "total_tokens": 1981888}
|
|
{"current_steps": 4040, "total_steps": 37885, "loss": 0.1859, "lr": 1.9997347082778753e-06, "epoch": 0.533192556420747, "percentage": 10.66, "elapsed_time": "0:05:49", "remaining_time": "0:48:47", "throughput": 5678.93, "total_tokens": 1984448}
|
|
{"current_steps": 4045, "total_steps": 37885, "loss": 0.3186, "lr": 1.999723990985363e-06, "epoch": 0.5338524481984954, "percentage": 10.68, "elapsed_time": "0:05:49", "remaining_time": "0:48:46", "throughput": 5681.02, "total_tokens": 1987072}
|
|
{"current_steps": 4050, "total_steps": 37885, "loss": 0.1181, "lr": 1.999713061508672e-06, "epoch": 0.5345123399762439, "percentage": 10.69, "elapsed_time": "0:05:50", "remaining_time": "0:48:44", "throughput": 5682.95, "total_tokens": 1989632}
|
|
{"current_steps": 4055, "total_steps": 37885, "loss": 0.0538, "lr": 1.9997019198501233e-06, "epoch": 0.5351722317539923, "percentage": 10.7, "elapsed_time": "0:05:50", "remaining_time": "0:48:43", "throughput": 5684.94, "total_tokens": 1992192}
|
|
{"current_steps": 4060, "total_steps": 37885, "loss": 0.265, "lr": 1.999690566012082e-06, "epoch": 0.5358321235317408, "percentage": 10.72, "elapsed_time": "0:05:50", "remaining_time": "0:48:42", "throughput": 5686.54, "total_tokens": 1994624}
|
|
{"current_steps": 4065, "total_steps": 37885, "loss": 0.0607, "lr": 1.9996789999969568e-06, "epoch": 0.5364920153094892, "percentage": 10.73, "elapsed_time": "0:05:51", "remaining_time": "0:48:41", "throughput": 5688.09, "total_tokens": 1997056}
|
|
{"current_steps": 4070, "total_steps": 37885, "loss": 0.134, "lr": 1.999667221807203e-06, "epoch": 0.5371519070872377, "percentage": 10.74, "elapsed_time": "0:05:51", "remaining_time": "0:48:39", "throughput": 5689.21, "total_tokens": 1999360}
|
|
{"current_steps": 4075, "total_steps": 37885, "loss": 0.082, "lr": 1.9996552314453204e-06, "epoch": 0.5378117988649861, "percentage": 10.76, "elapsed_time": "0:05:51", "remaining_time": "0:48:38", "throughput": 5690.86, "total_tokens": 2001856}
|
|
{"current_steps": 4080, "total_steps": 37885, "loss": 0.1956, "lr": 1.999643028913854e-06, "epoch": 0.5384716906427346, "percentage": 10.77, "elapsed_time": "0:05:52", "remaining_time": "0:48:37", "throughput": 5692.43, "total_tokens": 2004288}
|
|
{"current_steps": 4085, "total_steps": 37885, "loss": 0.1135, "lr": 1.9996306142153935e-06, "epoch": 0.5391315824204831, "percentage": 10.78, "elapsed_time": "0:05:52", "remaining_time": "0:48:36", "throughput": 5694.66, "total_tokens": 2006976}
|
|
{"current_steps": 4090, "total_steps": 37885, "loss": 0.0367, "lr": 1.9996179873525737e-06, "epoch": 0.5397914741982315, "percentage": 10.8, "elapsed_time": "0:05:52", "remaining_time": "0:48:34", "throughput": 5695.81, "total_tokens": 2009280}
|
|
{"current_steps": 4095, "total_steps": 37885, "loss": 0.2253, "lr": 1.9996051483280744e-06, "epoch": 0.54045136597598, "percentage": 10.81, "elapsed_time": "0:05:53", "remaining_time": "0:48:33", "throughput": 5697.54, "total_tokens": 2011776}
|
|
{"current_steps": 4100, "total_steps": 37885, "loss": 0.4021, "lr": 1.9995920971446215e-06, "epoch": 0.5411112577537284, "percentage": 10.82, "elapsed_time": "0:05:53", "remaining_time": "0:48:32", "throughput": 5699.35, "total_tokens": 2014336}
|
|
{"current_steps": 4105, "total_steps": 37885, "loss": 0.1495, "lr": 1.9995788338049846e-06, "epoch": 0.5417711495314769, "percentage": 10.84, "elapsed_time": "0:05:53", "remaining_time": "0:48:31", "throughput": 5701.76, "total_tokens": 2017152}
|
|
{"current_steps": 4110, "total_steps": 37885, "loss": 0.1161, "lr": 1.999565358311978e-06, "epoch": 0.5424310413092253, "percentage": 10.85, "elapsed_time": "0:05:54", "remaining_time": "0:48:29", "throughput": 5703.14, "total_tokens": 2019520}
|
|
{"current_steps": 4115, "total_steps": 37885, "loss": 0.1655, "lr": 1.999551670668463e-06, "epoch": 0.5430909330869738, "percentage": 10.86, "elapsed_time": "0:05:54", "remaining_time": "0:48:28", "throughput": 5703.69, "total_tokens": 2021632}
|
|
{"current_steps": 4120, "total_steps": 37885, "loss": 0.1365, "lr": 1.9995377708773437e-06, "epoch": 0.5437508248647221, "percentage": 10.88, "elapsed_time": "0:05:54", "remaining_time": "0:48:27", "throughput": 5704.25, "total_tokens": 2023744}
|
|
{"current_steps": 4125, "total_steps": 37885, "loss": 0.0072, "lr": 1.999523658941571e-06, "epoch": 0.5444107166424706, "percentage": 10.89, "elapsed_time": "0:05:55", "remaining_time": "0:48:26", "throughput": 5705.48, "total_tokens": 2026048}
|
|
{"current_steps": 4130, "total_steps": 37885, "loss": 0.0905, "lr": 1.999509334864139e-06, "epoch": 0.545070608420219, "percentage": 10.9, "elapsed_time": "0:05:55", "remaining_time": "0:48:25", "throughput": 5706.69, "total_tokens": 2028352}
|
|
{"current_steps": 4135, "total_steps": 37885, "loss": 0.0448, "lr": 1.999494798648089e-06, "epoch": 0.5457305001979675, "percentage": 10.91, "elapsed_time": "0:05:55", "remaining_time": "0:48:23", "throughput": 5708.34, "total_tokens": 2030848}
|
|
{"current_steps": 4140, "total_steps": 37885, "loss": 0.1427, "lr": 1.9994800502965055e-06, "epoch": 0.5463903919757159, "percentage": 10.93, "elapsed_time": "0:05:56", "remaining_time": "0:48:22", "throughput": 5709.93, "total_tokens": 2033344}
|
|
{"current_steps": 4145, "total_steps": 37885, "loss": 0.0661, "lr": 1.9994650898125193e-06, "epoch": 0.5470502837534644, "percentage": 10.94, "elapsed_time": "0:05:56", "remaining_time": "0:48:21", "throughput": 5710.87, "total_tokens": 2035584}
|
|
{"current_steps": 4150, "total_steps": 37885, "loss": 0.0643, "lr": 1.9994499171993056e-06, "epoch": 0.5477101755312129, "percentage": 10.95, "elapsed_time": "0:05:56", "remaining_time": "0:48:20", "throughput": 5712.94, "total_tokens": 2038272}
|
|
{"current_steps": 4155, "total_steps": 37885, "loss": 0.1879, "lr": 1.999434532460084e-06, "epoch": 0.5483700673089613, "percentage": 10.97, "elapsed_time": "0:05:57", "remaining_time": "0:48:19", "throughput": 5714.44, "total_tokens": 2040768}
|
|
{"current_steps": 4160, "total_steps": 37885, "loss": 0.1577, "lr": 1.99941893559812e-06, "epoch": 0.5490299590867098, "percentage": 10.98, "elapsed_time": "0:05:57", "remaining_time": "0:48:17", "throughput": 5715.52, "total_tokens": 2043072}
|
|
{"current_steps": 4165, "total_steps": 37885, "loss": 0.1324, "lr": 1.9994031266167247e-06, "epoch": 0.5496898508644582, "percentage": 10.99, "elapsed_time": "0:05:57", "remaining_time": "0:48:16", "throughput": 5717.84, "total_tokens": 2045824}
|
|
{"current_steps": 4170, "total_steps": 37885, "loss": 0.366, "lr": 1.999387105519253e-06, "epoch": 0.5503497426422067, "percentage": 11.01, "elapsed_time": "0:05:58", "remaining_time": "0:48:15", "throughput": 5718.8, "total_tokens": 2048064}
|
|
{"current_steps": 4175, "total_steps": 37885, "loss": 0.0539, "lr": 1.9993708723091044e-06, "epoch": 0.5510096344199551, "percentage": 11.02, "elapsed_time": "0:05:58", "remaining_time": "0:48:14", "throughput": 5720.05, "total_tokens": 2050432}
|
|
{"current_steps": 4180, "total_steps": 37885, "loss": 0.0967, "lr": 1.9993544269897253e-06, "epoch": 0.5516695261977036, "percentage": 11.03, "elapsed_time": "0:05:58", "remaining_time": "0:48:13", "throughput": 5721.74, "total_tokens": 2052928}
|
|
{"current_steps": 4185, "total_steps": 37885, "loss": 0.1474, "lr": 1.999337769564606e-06, "epoch": 0.552329417975452, "percentage": 11.05, "elapsed_time": "0:05:59", "remaining_time": "0:48:11", "throughput": 5723.35, "total_tokens": 2055424}
|
|
{"current_steps": 4190, "total_steps": 37885, "loss": 0.121, "lr": 1.9993209000372814e-06, "epoch": 0.5529893097532005, "percentage": 11.06, "elapsed_time": "0:05:59", "remaining_time": "0:48:10", "throughput": 5723.93, "total_tokens": 2057536}
|
|
{"current_steps": 4195, "total_steps": 37885, "loss": 0.2545, "lr": 1.9993038184113325e-06, "epoch": 0.5536492015309489, "percentage": 11.07, "elapsed_time": "0:05:59", "remaining_time": "0:48:09", "throughput": 5725.03, "total_tokens": 2059840}
|
|
{"current_steps": 4200, "total_steps": 37885, "loss": 0.2414, "lr": 1.999286524690385e-06, "epoch": 0.5543090933086974, "percentage": 11.09, "elapsed_time": "0:06:00", "remaining_time": "0:48:08", "throughput": 5727.38, "total_tokens": 2062656}
|
|
{"current_steps": 4205, "total_steps": 37885, "loss": 0.2045, "lr": 1.999269018878108e-06, "epoch": 0.5549689850864459, "percentage": 11.1, "elapsed_time": "0:06:00", "remaining_time": "0:48:07", "throughput": 5728.46, "total_tokens": 2064960}
|
|
{"current_steps": 4210, "total_steps": 37885, "loss": 0.233, "lr": 1.999251300978219e-06, "epoch": 0.5556288768641943, "percentage": 11.11, "elapsed_time": "0:06:00", "remaining_time": "0:48:06", "throughput": 5728.87, "total_tokens": 2067008}
|
|
{"current_steps": 4215, "total_steps": 37885, "loss": 0.0333, "lr": 1.9992333709944764e-06, "epoch": 0.5562887686419428, "percentage": 11.13, "elapsed_time": "0:06:01", "remaining_time": "0:48:04", "throughput": 5730.95, "total_tokens": 2069696}
|
|
{"current_steps": 4220, "total_steps": 37885, "loss": 0.0229, "lr": 1.9992152289306872e-06, "epoch": 0.5569486604196912, "percentage": 11.14, "elapsed_time": "0:06:01", "remaining_time": "0:48:03", "throughput": 5732.82, "total_tokens": 2072320}
|
|
{"current_steps": 4225, "total_steps": 37885, "loss": 0.005, "lr": 1.999196874790701e-06, "epoch": 0.5576085521974397, "percentage": 11.15, "elapsed_time": "0:06:01", "remaining_time": "0:48:02", "throughput": 5734.33, "total_tokens": 2074752}
|
|
{"current_steps": 4230, "total_steps": 37885, "loss": 0.1422, "lr": 1.999178308578414e-06, "epoch": 0.558268443975188, "percentage": 11.17, "elapsed_time": "0:06:02", "remaining_time": "0:48:01", "throughput": 5736.36, "total_tokens": 2077440}
|
|
{"current_steps": 4235, "total_steps": 37885, "loss": 0.2342, "lr": 1.9991595302977666e-06, "epoch": 0.5589283357529365, "percentage": 11.18, "elapsed_time": "0:06:02", "remaining_time": "0:48:00", "throughput": 5738.07, "total_tokens": 2080000}
|
|
{"current_steps": 4240, "total_steps": 37885, "loss": 0.196, "lr": 1.9991405399527438e-06, "epoch": 0.5595882275306849, "percentage": 11.19, "elapsed_time": "0:06:02", "remaining_time": "0:47:59", "throughput": 5739.78, "total_tokens": 2082560}
|
|
{"current_steps": 4245, "total_steps": 37885, "loss": 0.1375, "lr": 1.999121337547377e-06, "epoch": 0.5602481193084334, "percentage": 11.2, "elapsed_time": "0:06:03", "remaining_time": "0:47:57", "throughput": 5740.89, "total_tokens": 2084864}
|
|
{"current_steps": 4250, "total_steps": 37885, "loss": 0.097, "lr": 1.9991019230857413e-06, "epoch": 0.5609080110861818, "percentage": 11.22, "elapsed_time": "0:06:03", "remaining_time": "0:47:56", "throughput": 5742.57, "total_tokens": 2087424}
|
|
{"current_steps": 4255, "total_steps": 37885, "loss": 0.1572, "lr": 1.999082296571957e-06, "epoch": 0.5615679028639303, "percentage": 11.23, "elapsed_time": "0:06:03", "remaining_time": "0:47:55", "throughput": 5744.42, "total_tokens": 2090048}
|
|
{"current_steps": 4260, "total_steps": 37885, "loss": 0.0845, "lr": 1.9990624580101907e-06, "epoch": 0.5622277946416787, "percentage": 11.24, "elapsed_time": "0:06:04", "remaining_time": "0:47:54", "throughput": 5745.61, "total_tokens": 2092416}
|
|
{"current_steps": 4265, "total_steps": 37885, "loss": 0.0568, "lr": 1.999042407404652e-06, "epoch": 0.5628876864194272, "percentage": 11.26, "elapsed_time": "0:06:04", "remaining_time": "0:47:53", "throughput": 5746.51, "total_tokens": 2094656}
|
|
{"current_steps": 4270, "total_steps": 37885, "loss": 0.0462, "lr": 1.999022144759597e-06, "epoch": 0.5635475781971757, "percentage": 11.27, "elapsed_time": "0:06:04", "remaining_time": "0:47:52", "throughput": 5747.78, "total_tokens": 2097024}
|
|
{"current_steps": 4275, "total_steps": 37885, "loss": 0.0643, "lr": 1.9990016700793257e-06, "epoch": 0.5642074699749241, "percentage": 11.28, "elapsed_time": "0:06:05", "remaining_time": "0:47:50", "throughput": 5749.03, "total_tokens": 2099392}
|
|
{"current_steps": 4280, "total_steps": 37885, "loss": 0.0911, "lr": 1.9989809833681845e-06, "epoch": 0.5648673617526726, "percentage": 11.3, "elapsed_time": "0:06:05", "remaining_time": "0:47:49", "throughput": 5750.88, "total_tokens": 2102016}
|
|
{"current_steps": 4285, "total_steps": 37885, "loss": 0.2019, "lr": 1.9989600846305634e-06, "epoch": 0.565527253530421, "percentage": 11.31, "elapsed_time": "0:06:05", "remaining_time": "0:47:48", "throughput": 5751.91, "total_tokens": 2104320}
|
|
{"current_steps": 4290, "total_steps": 37885, "loss": 0.1077, "lr": 1.9989389738708984e-06, "epoch": 0.5661871453081695, "percentage": 11.32, "elapsed_time": "0:06:06", "remaining_time": "0:47:47", "throughput": 5754.25, "total_tokens": 2107136}
|
|
{"current_steps": 4295, "total_steps": 37885, "loss": 0.0653, "lr": 1.9989176510936698e-06, "epoch": 0.5668470370859179, "percentage": 11.34, "elapsed_time": "0:06:06", "remaining_time": "0:47:46", "throughput": 5756.48, "total_tokens": 2109888}
|
|
{"current_steps": 4300, "total_steps": 37885, "loss": 0.1031, "lr": 1.9988961163034033e-06, "epoch": 0.5675069288636664, "percentage": 11.35, "elapsed_time": "0:06:06", "remaining_time": "0:47:45", "throughput": 5757.52, "total_tokens": 2112192}
|
|
{"current_steps": 4305, "total_steps": 37885, "loss": 0.1154, "lr": 1.9988743695046696e-06, "epoch": 0.5681668206414148, "percentage": 11.36, "elapsed_time": "0:06:07", "remaining_time": "0:47:44", "throughput": 5759.16, "total_tokens": 2114752}
|
|
{"current_steps": 4310, "total_steps": 37885, "loss": 0.0766, "lr": 1.9988524107020844e-06, "epoch": 0.5688267124191633, "percentage": 11.38, "elapsed_time": "0:06:07", "remaining_time": "0:47:43", "throughput": 5760.51, "total_tokens": 2117184}
|
|
{"current_steps": 4315, "total_steps": 37885, "loss": 0.1522, "lr": 1.9988302399003083e-06, "epoch": 0.5694866041969117, "percentage": 11.39, "elapsed_time": "0:06:07", "remaining_time": "0:47:41", "throughput": 5761.74, "total_tokens": 2119552}
|
|
{"current_steps": 4320, "total_steps": 37885, "loss": 0.1321, "lr": 1.9988078571040464e-06, "epoch": 0.5701464959746602, "percentage": 11.4, "elapsed_time": "0:06:08", "remaining_time": "0:47:40", "throughput": 5762.85, "total_tokens": 2121920}
|
|
{"current_steps": 4325, "total_steps": 37885, "loss": 0.2248, "lr": 1.99878526231805e-06, "epoch": 0.5708063877524086, "percentage": 11.42, "elapsed_time": "0:06:08", "remaining_time": "0:47:39", "throughput": 5763.56, "total_tokens": 2124096}
|
|
{"current_steps": 4330, "total_steps": 37885, "loss": 0.1275, "lr": 1.998762455547114e-06, "epoch": 0.571466279530157, "percentage": 11.43, "elapsed_time": "0:06:08", "remaining_time": "0:47:38", "throughput": 5764.85, "total_tokens": 2126528}
|
|
{"current_steps": 4335, "total_steps": 37885, "loss": 0.0773, "lr": 1.998739436796079e-06, "epoch": 0.5721261713079056, "percentage": 11.44, "elapsed_time": "0:06:09", "remaining_time": "0:47:37", "throughput": 5765.24, "total_tokens": 2128576}
|
|
{"current_steps": 4340, "total_steps": 37885, "loss": 0.1947, "lr": 1.9987162060698312e-06, "epoch": 0.5727860630856539, "percentage": 11.46, "elapsed_time": "0:06:09", "remaining_time": "0:47:36", "throughput": 5765.74, "total_tokens": 2130688}
|
|
{"current_steps": 4345, "total_steps": 37885, "loss": 0.2038, "lr": 1.9986927633733007e-06, "epoch": 0.5734459548634024, "percentage": 11.47, "elapsed_time": "0:06:09", "remaining_time": "0:47:35", "throughput": 5767.53, "total_tokens": 2133312}
|
|
{"current_steps": 4350, "total_steps": 37885, "loss": 0.1005, "lr": 1.9986691087114634e-06, "epoch": 0.5741058466411508, "percentage": 11.48, "elapsed_time": "0:06:10", "remaining_time": "0:47:34", "throughput": 5768.67, "total_tokens": 2135680}
|
|
{"current_steps": 4355, "total_steps": 37885, "loss": 0.1931, "lr": 1.9986452420893393e-06, "epoch": 0.5747657384188993, "percentage": 11.5, "elapsed_time": "0:06:10", "remaining_time": "0:47:32", "throughput": 5770.0, "total_tokens": 2138112}
|
|
{"current_steps": 4360, "total_steps": 37885, "loss": 0.3394, "lr": 1.998621163511994e-06, "epoch": 0.5754256301966477, "percentage": 11.51, "elapsed_time": "0:06:10", "remaining_time": "0:47:31", "throughput": 5770.81, "total_tokens": 2140352}
|
|
{"current_steps": 4365, "total_steps": 37885, "loss": 0.1082, "lr": 1.998596872984539e-06, "epoch": 0.5760855219743962, "percentage": 11.52, "elapsed_time": "0:06:11", "remaining_time": "0:47:30", "throughput": 5772.8, "total_tokens": 2143040}
|
|
{"current_steps": 4370, "total_steps": 37885, "loss": 0.0335, "lr": 1.998572370512128e-06, "epoch": 0.5767454137521446, "percentage": 11.53, "elapsed_time": "0:06:11", "remaining_time": "0:47:29", "throughput": 5773.66, "total_tokens": 2145280}
|
|
{"current_steps": 4375, "total_steps": 37885, "loss": 0.0719, "lr": 1.998547656099963e-06, "epoch": 0.5774053055298931, "percentage": 11.55, "elapsed_time": "0:06:11", "remaining_time": "0:47:28", "throughput": 5775.47, "total_tokens": 2147904}
|
|
{"current_steps": 4380, "total_steps": 37885, "loss": 0.0904, "lr": 1.9985227297532886e-06, "epoch": 0.5780651973076415, "percentage": 11.56, "elapsed_time": "0:06:12", "remaining_time": "0:47:27", "throughput": 5776.95, "total_tokens": 2150400}
|
|
{"current_steps": 4385, "total_steps": 37885, "loss": 0.1622, "lr": 1.9984975914773957e-06, "epoch": 0.57872508908539, "percentage": 11.57, "elapsed_time": "0:06:12", "remaining_time": "0:47:26", "throughput": 5778.9, "total_tokens": 2153088}
|
|
{"current_steps": 4390, "total_steps": 37885, "loss": 0.0055, "lr": 1.9984722412776197e-06, "epoch": 0.5793849808631385, "percentage": 11.59, "elapsed_time": "0:06:12", "remaining_time": "0:47:25", "throughput": 5780.92, "total_tokens": 2155776}
|
|
{"current_steps": 4395, "total_steps": 37885, "loss": 0.2532, "lr": 1.9984466791593407e-06, "epoch": 0.5800448726408869, "percentage": 11.6, "elapsed_time": "0:06:13", "remaining_time": "0:47:24", "throughput": 5782.67, "total_tokens": 2158400}
|
|
{"current_steps": 4400, "total_steps": 37885, "loss": 0.0378, "lr": 1.9984209051279843e-06, "epoch": 0.5807047644186354, "percentage": 11.61, "elapsed_time": "0:06:13", "remaining_time": "0:47:23", "throughput": 5783.61, "total_tokens": 2160704}
|
|
{"current_steps": 4405, "total_steps": 37885, "loss": 0.0141, "lr": 1.998394919189021e-06, "epoch": 0.5813646561963838, "percentage": 11.63, "elapsed_time": "0:06:13", "remaining_time": "0:47:22", "throughput": 5785.08, "total_tokens": 2163200}
|
|
{"current_steps": 4410, "total_steps": 37885, "loss": 0.137, "lr": 1.9983687213479655e-06, "epoch": 0.5820245479741323, "percentage": 11.64, "elapsed_time": "0:06:14", "remaining_time": "0:47:20", "throughput": 5785.69, "total_tokens": 2165376}
|
|
{"current_steps": 4415, "total_steps": 37885, "loss": 0.2062, "lr": 1.998342311610379e-06, "epoch": 0.5826844397518807, "percentage": 11.65, "elapsed_time": "0:06:14", "remaining_time": "0:47:19", "throughput": 5786.98, "total_tokens": 2167808}
|
|
{"current_steps": 4420, "total_steps": 37885, "loss": 0.225, "lr": 1.998315689981866e-06, "epoch": 0.5833443315296292, "percentage": 11.67, "elapsed_time": "0:06:14", "remaining_time": "0:47:18", "throughput": 5787.98, "total_tokens": 2170112}
|
|
{"current_steps": 4425, "total_steps": 37885, "loss": 0.0063, "lr": 1.998288856468077e-06, "epoch": 0.5840042233073776, "percentage": 11.68, "elapsed_time": "0:06:15", "remaining_time": "0:47:17", "throughput": 5789.13, "total_tokens": 2172480}
|
|
{"current_steps": 4430, "total_steps": 37885, "loss": 0.1578, "lr": 1.998261811074707e-06, "epoch": 0.5846641150851261, "percentage": 11.69, "elapsed_time": "0:06:15", "remaining_time": "0:47:16", "throughput": 5790.88, "total_tokens": 2175104}
|
|
{"current_steps": 4435, "total_steps": 37885, "loss": 0.0695, "lr": 1.998234553807497e-06, "epoch": 0.5853240068628744, "percentage": 11.71, "elapsed_time": "0:06:15", "remaining_time": "0:47:15", "throughput": 5791.61, "total_tokens": 2177280}
|
|
{"current_steps": 4440, "total_steps": 37885, "loss": 0.0882, "lr": 1.9982070846722312e-06, "epoch": 0.585983898640623, "percentage": 11.72, "elapsed_time": "0:06:16", "remaining_time": "0:47:14", "throughput": 5793.2, "total_tokens": 2179776}
|
|
{"current_steps": 4445, "total_steps": 37885, "loss": 0.1574, "lr": 1.9981794036747402e-06, "epoch": 0.5866437904183713, "percentage": 11.73, "elapsed_time": "0:06:16", "remaining_time": "0:47:13", "throughput": 5795.02, "total_tokens": 2182400}
|
|
{"current_steps": 4450, "total_steps": 37885, "loss": 0.0313, "lr": 1.998151510820899e-06, "epoch": 0.5873036821961198, "percentage": 11.75, "elapsed_time": "0:06:16", "remaining_time": "0:47:12", "throughput": 5797.07, "total_tokens": 2185088}
|
|
{"current_steps": 4455, "total_steps": 37885, "loss": 0.0876, "lr": 1.9981234061166275e-06, "epoch": 0.5879635739738683, "percentage": 11.76, "elapsed_time": "0:06:17", "remaining_time": "0:47:10", "throughput": 5799.13, "total_tokens": 2187776}
|
|
{"current_steps": 4460, "total_steps": 37885, "loss": 0.0183, "lr": 1.9980950895678914e-06, "epoch": 0.5886234657516167, "percentage": 11.77, "elapsed_time": "0:06:17", "remaining_time": "0:47:09", "throughput": 5799.99, "total_tokens": 2190016}
|
|
{"current_steps": 4465, "total_steps": 37885, "loss": 0.0676, "lr": 1.9980665611806998e-06, "epoch": 0.5892833575293652, "percentage": 11.79, "elapsed_time": "0:06:17", "remaining_time": "0:47:08", "throughput": 5801.06, "total_tokens": 2192320}
|
|
{"current_steps": 4470, "total_steps": 37885, "loss": 0.1909, "lr": 1.998037820961108e-06, "epoch": 0.5899432493071136, "percentage": 11.8, "elapsed_time": "0:06:18", "remaining_time": "0:47:07", "throughput": 5802.44, "total_tokens": 2194752}
|
|
{"current_steps": 4475, "total_steps": 37885, "loss": 0.1777, "lr": 1.9980088689152163e-06, "epoch": 0.5906031410848621, "percentage": 11.81, "elapsed_time": "0:06:18", "remaining_time": "0:47:06", "throughput": 5803.53, "total_tokens": 2197056}
|
|
{"current_steps": 4480, "total_steps": 37885, "loss": 0.092, "lr": 1.9979797050491687e-06, "epoch": 0.5912630328626105, "percentage": 11.83, "elapsed_time": "0:06:18", "remaining_time": "0:47:05", "throughput": 5804.45, "total_tokens": 2199296}
|
|
{"current_steps": 4485, "total_steps": 37885, "loss": 0.3187, "lr": 1.997950329369156e-06, "epoch": 0.591922924640359, "percentage": 11.84, "elapsed_time": "0:06:19", "remaining_time": "0:47:04", "throughput": 5805.69, "total_tokens": 2201664}
|
|
{"current_steps": 4490, "total_steps": 37885, "loss": 0.1067, "lr": 1.997920741881412e-06, "epoch": 0.5925828164181074, "percentage": 11.85, "elapsed_time": "0:06:19", "remaining_time": "0:47:03", "throughput": 5807.48, "total_tokens": 2204288}
|
|
{"current_steps": 4495, "total_steps": 37885, "loss": 0.1064, "lr": 1.997890942592217e-06, "epoch": 0.5932427081958559, "percentage": 11.86, "elapsed_time": "0:06:19", "remaining_time": "0:47:01", "throughput": 5808.32, "total_tokens": 2206528}
|
|
{"current_steps": 4500, "total_steps": 37885, "loss": 0.1168, "lr": 1.997860931507896e-06, "epoch": 0.5939025999736043, "percentage": 11.88, "elapsed_time": "0:06:20", "remaining_time": "0:47:00", "throughput": 5809.83, "total_tokens": 2209024}
|
|
{"current_steps": 4505, "total_steps": 37885, "loss": 0.0681, "lr": 1.997830708634818e-06, "epoch": 0.5945624917513528, "percentage": 11.89, "elapsed_time": "0:06:20", "remaining_time": "0:46:59", "throughput": 5811.48, "total_tokens": 2211584}
|
|
{"current_steps": 4510, "total_steps": 37885, "loss": 0.1479, "lr": 1.9978002739793977e-06, "epoch": 0.5952223835291012, "percentage": 11.9, "elapsed_time": "0:06:20", "remaining_time": "0:46:58", "throughput": 5812.66, "total_tokens": 2213952}
|
|
{"current_steps": 4515, "total_steps": 37885, "loss": 0.1002, "lr": 1.9977696275480945e-06, "epoch": 0.5958822753068497, "percentage": 11.92, "elapsed_time": "0:06:21", "remaining_time": "0:46:57", "throughput": 5813.52, "total_tokens": 2216192}
|
|
{"current_steps": 4520, "total_steps": 37885, "loss": 0.0057, "lr": 1.9977387693474134e-06, "epoch": 0.5965421670845982, "percentage": 11.93, "elapsed_time": "0:06:21", "remaining_time": "0:46:56", "throughput": 5814.97, "total_tokens": 2218688}
|
|
{"current_steps": 4525, "total_steps": 37885, "loss": 0.0011, "lr": 1.9977076993839037e-06, "epoch": 0.5972020588623466, "percentage": 11.94, "elapsed_time": "0:06:21", "remaining_time": "0:46:55", "throughput": 5815.79, "total_tokens": 2220928}
|
|
{"current_steps": 4530, "total_steps": 37885, "loss": 0.001, "lr": 1.9976764176641592e-06, "epoch": 0.5978619506400951, "percentage": 11.96, "elapsed_time": "0:06:22", "remaining_time": "0:46:54", "throughput": 5817.09, "total_tokens": 2223360}
|
|
{"current_steps": 4535, "total_steps": 37885, "loss": 0.1205, "lr": 1.99764492419482e-06, "epoch": 0.5985218424178435, "percentage": 11.97, "elapsed_time": "0:06:22", "remaining_time": "0:46:53", "throughput": 5818.46, "total_tokens": 2225792}
|
|
{"current_steps": 4540, "total_steps": 37885, "loss": 0.2427, "lr": 1.99761321898257e-06, "epoch": 0.599181734195592, "percentage": 11.98, "elapsed_time": "0:06:22", "remaining_time": "0:46:52", "throughput": 5819.95, "total_tokens": 2228288}
|
|
{"current_steps": 4545, "total_steps": 37885, "loss": 0.2698, "lr": 1.9975813020341387e-06, "epoch": 0.5998416259733403, "percentage": 12.0, "elapsed_time": "0:06:23", "remaining_time": "0:46:51", "throughput": 5821.57, "total_tokens": 2230848}
|
|
{"current_steps": 4550, "total_steps": 37885, "loss": 0.2384, "lr": 1.9975491733562997e-06, "epoch": 0.6005015177510888, "percentage": 12.01, "elapsed_time": "0:06:23", "remaining_time": "0:46:49", "throughput": 5823.35, "total_tokens": 2233472}
|
|
{"current_steps": 4555, "total_steps": 37885, "loss": 0.2191, "lr": 1.9975168329558725e-06, "epoch": 0.6011614095288372, "percentage": 12.02, "elapsed_time": "0:06:23", "remaining_time": "0:46:48", "throughput": 5825.19, "total_tokens": 2236096}
|
|
{"current_steps": 4560, "total_steps": 37885, "loss": 0.1075, "lr": 1.9974842808397206e-06, "epoch": 0.6018213013065857, "percentage": 12.04, "elapsed_time": "0:06:24", "remaining_time": "0:46:47", "throughput": 5826.97, "total_tokens": 2238720}
|
|
{"current_steps": 4565, "total_steps": 37885, "loss": 0.1344, "lr": 1.9974515170147533e-06, "epoch": 0.6024811930843341, "percentage": 12.05, "elapsed_time": "0:06:24", "remaining_time": "0:46:46", "throughput": 5828.44, "total_tokens": 2241216}
|
|
{"current_steps": 4570, "total_steps": 37885, "loss": 0.07, "lr": 1.997418541487925e-06, "epoch": 0.6031410848620826, "percentage": 12.06, "elapsed_time": "0:06:24", "remaining_time": "0:46:45", "throughput": 5829.81, "total_tokens": 2243648}
|
|
{"current_steps": 4575, "total_steps": 37885, "loss": 0.1225, "lr": 1.9973853542662336e-06, "epoch": 0.6038009766398311, "percentage": 12.08, "elapsed_time": "0:06:25", "remaining_time": "0:46:44", "throughput": 5831.13, "total_tokens": 2246080}
|
|
{"current_steps": 4580, "total_steps": 37885, "loss": 0.0581, "lr": 1.9973519553567233e-06, "epoch": 0.6044608684175795, "percentage": 12.09, "elapsed_time": "0:06:25", "remaining_time": "0:46:43", "throughput": 5831.82, "total_tokens": 2248256}
|
|
{"current_steps": 4585, "total_steps": 37885, "loss": 0.0503, "lr": 1.9973183447664826e-06, "epoch": 0.605120760195328, "percentage": 12.1, "elapsed_time": "0:06:25", "remaining_time": "0:46:42", "throughput": 5833.16, "total_tokens": 2250688}
|
|
{"current_steps": 4590, "total_steps": 37885, "loss": 0.2459, "lr": 1.9972845225026458e-06, "epoch": 0.6057806519730764, "percentage": 12.12, "elapsed_time": "0:06:26", "remaining_time": "0:46:41", "throughput": 5834.52, "total_tokens": 2253120}
|
|
{"current_steps": 4595, "total_steps": 37885, "loss": 0.2109, "lr": 1.99725048857239e-06, "epoch": 0.6064405437508249, "percentage": 12.13, "elapsed_time": "0:06:26", "remaining_time": "0:46:40", "throughput": 5835.4, "total_tokens": 2255360}
|
|
{"current_steps": 4600, "total_steps": 37885, "loss": 0.1823, "lr": 1.99721624298294e-06, "epoch": 0.6071004355285733, "percentage": 12.14, "elapsed_time": "0:06:26", "remaining_time": "0:46:39", "throughput": 5836.59, "total_tokens": 2257728}
|
|
{"current_steps": 4605, "total_steps": 37885, "loss": 0.0686, "lr": 1.997181785741564e-06, "epoch": 0.6077603273063218, "percentage": 12.16, "elapsed_time": "0:06:27", "remaining_time": "0:46:37", "throughput": 5838.08, "total_tokens": 2260224}
|
|
{"current_steps": 4610, "total_steps": 37885, "loss": 0.0076, "lr": 1.9971471168555746e-06, "epoch": 0.6084202190840702, "percentage": 12.17, "elapsed_time": "0:06:27", "remaining_time": "0:46:36", "throughput": 5840.03, "total_tokens": 2262912}
|
|
{"current_steps": 4615, "total_steps": 37885, "loss": 0.2299, "lr": 1.9971122363323307e-06, "epoch": 0.6090801108618187, "percentage": 12.18, "elapsed_time": "0:06:27", "remaining_time": "0:46:35", "throughput": 5840.92, "total_tokens": 2265152}
|
|
{"current_steps": 4620, "total_steps": 37885, "loss": 0.0823, "lr": 1.9970771441792347e-06, "epoch": 0.6097400026395671, "percentage": 12.19, "elapsed_time": "0:06:28", "remaining_time": "0:46:34", "throughput": 5843.11, "total_tokens": 2267968}
|
|
{"current_steps": 4625, "total_steps": 37885, "loss": 0.1992, "lr": 1.997041840403735e-06, "epoch": 0.6103998944173156, "percentage": 12.21, "elapsed_time": "0:06:28", "remaining_time": "0:46:33", "throughput": 5844.3, "total_tokens": 2270336}
|
|
{"current_steps": 4630, "total_steps": 37885, "loss": 0.2115, "lr": 1.997006325013325e-06, "epoch": 0.611059786195064, "percentage": 12.22, "elapsed_time": "0:06:28", "remaining_time": "0:46:32", "throughput": 5846.2, "total_tokens": 2273024}
|
|
{"current_steps": 4635, "total_steps": 37885, "loss": 0.0794, "lr": 1.9969705980155426e-06, "epoch": 0.6117196779728125, "percentage": 12.23, "elapsed_time": "0:06:29", "remaining_time": "0:46:31", "throughput": 5847.1, "total_tokens": 2275264}
|
|
{"current_steps": 4640, "total_steps": 37885, "loss": 0.0776, "lr": 1.99693465941797e-06, "epoch": 0.612379569750561, "percentage": 12.25, "elapsed_time": "0:06:29", "remaining_time": "0:46:30", "throughput": 5848.87, "total_tokens": 2277888}
|
|
{"current_steps": 4645, "total_steps": 37885, "loss": 0.002, "lr": 1.9968985092282354e-06, "epoch": 0.6130394615283093, "percentage": 12.26, "elapsed_time": "0:06:29", "remaining_time": "0:46:29", "throughput": 5850.18, "total_tokens": 2280320}
|
|
{"current_steps": 4650, "total_steps": 37885, "loss": 0.0637, "lr": 1.996862147454011e-06, "epoch": 0.6136993533060578, "percentage": 12.27, "elapsed_time": "0:06:30", "remaining_time": "0:46:28", "throughput": 5851.05, "total_tokens": 2282560}
|
|
{"current_steps": 4655, "total_steps": 37885, "loss": 0.121, "lr": 1.9968255741030144e-06, "epoch": 0.6143592450838062, "percentage": 12.29, "elapsed_time": "0:06:30", "remaining_time": "0:46:27", "throughput": 5852.05, "total_tokens": 2284864}
|
|
{"current_steps": 4660, "total_steps": 37885, "loss": 0.1595, "lr": 1.9967887891830082e-06, "epoch": 0.6150191368615547, "percentage": 12.3, "elapsed_time": "0:06:30", "remaining_time": "0:46:26", "throughput": 5853.06, "total_tokens": 2287168}
|
|
{"current_steps": 4665, "total_steps": 37885, "loss": 0.0561, "lr": 1.9967517927017995e-06, "epoch": 0.6156790286393031, "percentage": 12.31, "elapsed_time": "0:06:31", "remaining_time": "0:46:25", "throughput": 5854.33, "total_tokens": 2289600}
|
|
{"current_steps": 4670, "total_steps": 37885, "loss": 0.1015, "lr": 1.996714584667241e-06, "epoch": 0.6163389204170516, "percentage": 12.33, "elapsed_time": "0:06:31", "remaining_time": "0:46:23", "throughput": 5855.93, "total_tokens": 2292160}
|
|
{"current_steps": 4675, "total_steps": 37885, "loss": 0.0621, "lr": 1.9966771650872295e-06, "epoch": 0.6169988121948, "percentage": 12.34, "elapsed_time": "0:06:31", "remaining_time": "0:46:22", "throughput": 5857.96, "total_tokens": 2294912}
|
|
{"current_steps": 4680, "total_steps": 37885, "loss": 0.0647, "lr": 1.996639533969707e-06, "epoch": 0.6176587039725485, "percentage": 12.35, "elapsed_time": "0:06:32", "remaining_time": "0:46:21", "throughput": 5858.48, "total_tokens": 2297024}
|
|
{"current_steps": 4685, "total_steps": 37885, "loss": 0.2015, "lr": 1.9966016913226602e-06, "epoch": 0.6183185957502969, "percentage": 12.37, "elapsed_time": "0:06:32", "remaining_time": "0:46:20", "throughput": 5859.76, "total_tokens": 2299456}
|
|
{"current_steps": 4690, "total_steps": 37885, "loss": 0.062, "lr": 1.9965636371541217e-06, "epoch": 0.6189784875280454, "percentage": 12.38, "elapsed_time": "0:06:32", "remaining_time": "0:46:19", "throughput": 5860.28, "total_tokens": 2301568}
|
|
{"current_steps": 4695, "total_steps": 37885, "loss": 0.1759, "lr": 1.9965253714721676e-06, "epoch": 0.6196383793057938, "percentage": 12.39, "elapsed_time": "0:06:33", "remaining_time": "0:46:18", "throughput": 5861.44, "total_tokens": 2303936}
|
|
{"current_steps": 4700, "total_steps": 37885, "loss": 0.1793, "lr": 1.99648689428492e-06, "epoch": 0.6202982710835423, "percentage": 12.41, "elapsed_time": "0:06:33", "remaining_time": "0:46:17", "throughput": 5862.24, "total_tokens": 2306176}
|
|
{"current_steps": 4705, "total_steps": 37885, "loss": 0.046, "lr": 1.9964482056005446e-06, "epoch": 0.6209581628612908, "percentage": 12.42, "elapsed_time": "0:06:33", "remaining_time": "0:46:16", "throughput": 5863.81, "total_tokens": 2308736}
|
|
{"current_steps": 4710, "total_steps": 37885, "loss": 0.2129, "lr": 1.9964093054272534e-06, "epoch": 0.6216180546390392, "percentage": 12.43, "elapsed_time": "0:06:34", "remaining_time": "0:46:15", "throughput": 5864.91, "total_tokens": 2311104}
|
|
{"current_steps": 4715, "total_steps": 37885, "loss": 0.1223, "lr": 1.9963701937733024e-06, "epoch": 0.6222779464167877, "percentage": 12.45, "elapsed_time": "0:06:34", "remaining_time": "0:46:14", "throughput": 5866.16, "total_tokens": 2313536}
|
|
{"current_steps": 4720, "total_steps": 37885, "loss": 0.1751, "lr": 1.9963308706469932e-06, "epoch": 0.6229378381945361, "percentage": 12.46, "elapsed_time": "0:06:34", "remaining_time": "0:46:13", "throughput": 5867.57, "total_tokens": 2316032}
|
|
{"current_steps": 4725, "total_steps": 37885, "loss": 0.2008, "lr": 1.9962913360566713e-06, "epoch": 0.6235977299722846, "percentage": 12.47, "elapsed_time": "0:06:35", "remaining_time": "0:46:12", "throughput": 5869.23, "total_tokens": 2318656}
|
|
{"current_steps": 4730, "total_steps": 37885, "loss": 0.1295, "lr": 1.9962515900107283e-06, "epoch": 0.624257621750033, "percentage": 12.49, "elapsed_time": "0:06:35", "remaining_time": "0:46:11", "throughput": 5870.76, "total_tokens": 2321216}
|
|
{"current_steps": 4735, "total_steps": 37885, "loss": 0.1381, "lr": 1.9962116325175993e-06, "epoch": 0.6249175135277815, "percentage": 12.5, "elapsed_time": "0:06:35", "remaining_time": "0:46:10", "throughput": 5871.97, "total_tokens": 2323648}
|
|
{"current_steps": 4740, "total_steps": 37885, "loss": 0.1807, "lr": 1.996171463585765e-06, "epoch": 0.6255774053055299, "percentage": 12.51, "elapsed_time": "0:06:36", "remaining_time": "0:46:09", "throughput": 5873.19, "total_tokens": 2326080}
|
|
{"current_steps": 4745, "total_steps": 37885, "loss": 0.097, "lr": 1.996131083223752e-06, "epoch": 0.6262372970832784, "percentage": 12.52, "elapsed_time": "0:06:36", "remaining_time": "0:46:08", "throughput": 5874.43, "total_tokens": 2328512}
|
|
{"current_steps": 4750, "total_steps": 37885, "loss": 0.1358, "lr": 1.9960904914401295e-06, "epoch": 0.6268971888610267, "percentage": 12.54, "elapsed_time": "0:06:36", "remaining_time": "0:46:07", "throughput": 5875.79, "total_tokens": 2331008}
|
|
{"current_steps": 4755, "total_steps": 37885, "loss": 0.0575, "lr": 1.9960496882435138e-06, "epoch": 0.6275570806387752, "percentage": 12.55, "elapsed_time": "0:06:37", "remaining_time": "0:46:06", "throughput": 5876.9, "total_tokens": 2333376}
|
|
{"current_steps": 4760, "total_steps": 37885, "loss": 0.2401, "lr": 1.996008673642564e-06, "epoch": 0.6282169724165237, "percentage": 12.56, "elapsed_time": "0:06:37", "remaining_time": "0:46:05", "throughput": 5878.27, "total_tokens": 2335872}
|
|
{"current_steps": 4765, "total_steps": 37885, "loss": 0.0035, "lr": 1.995967447645986e-06, "epoch": 0.6288768641942721, "percentage": 12.58, "elapsed_time": "0:06:37", "remaining_time": "0:46:04", "throughput": 5879.82, "total_tokens": 2338432}
|
|
{"current_steps": 4770, "total_steps": 37885, "loss": 0.2603, "lr": 1.9959260102625293e-06, "epoch": 0.6295367559720206, "percentage": 12.59, "elapsed_time": "0:06:38", "remaining_time": "0:46:03", "throughput": 5881.18, "total_tokens": 2340928}
|
|
{"current_steps": 4775, "total_steps": 37885, "loss": 0.1541, "lr": 1.9958843615009892e-06, "epoch": 0.630196647749769, "percentage": 12.6, "elapsed_time": "0:06:38", "remaining_time": "0:46:02", "throughput": 5883.14, "total_tokens": 2343680}
|
|
{"current_steps": 4780, "total_steps": 37885, "loss": 0.062, "lr": 1.995842501370205e-06, "epoch": 0.6308565395275175, "percentage": 12.62, "elapsed_time": "0:06:38", "remaining_time": "0:46:01", "throughput": 5884.67, "total_tokens": 2346240}
|
|
{"current_steps": 4785, "total_steps": 37885, "loss": 0.1132, "lr": 1.9958004298790607e-06, "epoch": 0.6315164313052659, "percentage": 12.63, "elapsed_time": "0:06:39", "remaining_time": "0:46:00", "throughput": 5885.64, "total_tokens": 2348544}
|
|
{"current_steps": 4790, "total_steps": 37885, "loss": 0.3771, "lr": 1.9957581470364867e-06, "epoch": 0.6321763230830144, "percentage": 12.64, "elapsed_time": "0:06:39", "remaining_time": "0:45:59", "throughput": 5886.86, "total_tokens": 2350976}
|
|
{"current_steps": 4795, "total_steps": 37885, "loss": 0.1463, "lr": 1.9957156528514564e-06, "epoch": 0.6328362148607628, "percentage": 12.66, "elapsed_time": "0:06:39", "remaining_time": "0:45:58", "throughput": 5887.68, "total_tokens": 2353216}
|
|
{"current_steps": 4800, "total_steps": 37885, "loss": 0.1893, "lr": 1.995672947332989e-06, "epoch": 0.6334961066385113, "percentage": 12.67, "elapsed_time": "0:06:40", "remaining_time": "0:45:57", "throughput": 5888.73, "total_tokens": 2355584}
|
|
{"current_steps": 4805, "total_steps": 37885, "loss": 0.0834, "lr": 1.995630030490149e-06, "epoch": 0.6341559984162597, "percentage": 12.68, "elapsed_time": "0:06:40", "remaining_time": "0:45:56", "throughput": 5890.25, "total_tokens": 2358144}
|
|
{"current_steps": 4810, "total_steps": 37885, "loss": 0.0498, "lr": 1.9955869023320447e-06, "epoch": 0.6348158901940082, "percentage": 12.7, "elapsed_time": "0:06:40", "remaining_time": "0:45:55", "throughput": 5892.19, "total_tokens": 2360896}
|
|
{"current_steps": 4815, "total_steps": 37885, "loss": 0.2227, "lr": 1.99554356286783e-06, "epoch": 0.6354757819717566, "percentage": 12.71, "elapsed_time": "0:06:41", "remaining_time": "0:45:54", "throughput": 5893.27, "total_tokens": 2363264}
|
|
{"current_steps": 4820, "total_steps": 37885, "loss": 0.0659, "lr": 1.9955000121067035e-06, "epoch": 0.6361356737495051, "percentage": 12.72, "elapsed_time": "0:06:41", "remaining_time": "0:45:53", "throughput": 5894.37, "total_tokens": 2365632}
|
|
{"current_steps": 4825, "total_steps": 37885, "loss": 0.0047, "lr": 1.9954562500579075e-06, "epoch": 0.6367955655272536, "percentage": 12.74, "elapsed_time": "0:06:41", "remaining_time": "0:45:52", "throughput": 5895.46, "total_tokens": 2368000}
|
|
{"current_steps": 4830, "total_steps": 37885, "loss": 0.122, "lr": 1.9954122767307316e-06, "epoch": 0.637455457305002, "percentage": 12.75, "elapsed_time": "0:06:41", "remaining_time": "0:45:51", "throughput": 5897.0, "total_tokens": 2370560}
|
|
{"current_steps": 4835, "total_steps": 37885, "loss": 0.0527, "lr": 1.995368092134508e-06, "epoch": 0.6381153490827505, "percentage": 12.76, "elapsed_time": "0:06:42", "remaining_time": "0:45:50", "throughput": 5898.5, "total_tokens": 2373120}
|
|
{"current_steps": 4840, "total_steps": 37885, "loss": 0.004, "lr": 1.9953236962786143e-06, "epoch": 0.6387752408604989, "percentage": 12.78, "elapsed_time": "0:06:42", "remaining_time": "0:45:49", "throughput": 5900.46, "total_tokens": 2375872}
|
|
{"current_steps": 4845, "total_steps": 37885, "loss": 0.2211, "lr": 1.995279089172474e-06, "epoch": 0.6394351326382474, "percentage": 12.79, "elapsed_time": "0:06:42", "remaining_time": "0:45:48", "throughput": 5901.96, "total_tokens": 2378432}
|
|
{"current_steps": 4850, "total_steps": 37885, "loss": 0.1565, "lr": 1.9952342708255543e-06, "epoch": 0.6400950244159958, "percentage": 12.8, "elapsed_time": "0:06:43", "remaining_time": "0:45:47", "throughput": 5903.08, "total_tokens": 2380800}
|
|
{"current_steps": 4855, "total_steps": 37885, "loss": 0.1074, "lr": 1.9951892412473677e-06, "epoch": 0.6407549161937443, "percentage": 12.82, "elapsed_time": "0:06:43", "remaining_time": "0:45:46", "throughput": 5905.45, "total_tokens": 2383744}
|
|
{"current_steps": 4860, "total_steps": 37885, "loss": 0.124, "lr": 1.9951440004474707e-06, "epoch": 0.6414148079714926, "percentage": 12.83, "elapsed_time": "0:06:43", "remaining_time": "0:45:45", "throughput": 5906.56, "total_tokens": 2386112}
|
|
{"current_steps": 4865, "total_steps": 37885, "loss": 0.1265, "lr": 1.9950985484354664e-06, "epoch": 0.6420746997492411, "percentage": 12.84, "elapsed_time": "0:06:44", "remaining_time": "0:45:44", "throughput": 5908.24, "total_tokens": 2388736}
|
|
{"current_steps": 4870, "total_steps": 37885, "loss": 0.0334, "lr": 1.9950528852210013e-06, "epoch": 0.6427345915269895, "percentage": 12.85, "elapsed_time": "0:06:44", "remaining_time": "0:45:43", "throughput": 5909.32, "total_tokens": 2391104}
|
|
{"current_steps": 4875, "total_steps": 37885, "loss": 0.1468, "lr": 1.9950070108137663e-06, "epoch": 0.643394483304738, "percentage": 12.87, "elapsed_time": "0:06:44", "remaining_time": "0:45:42", "throughput": 5910.97, "total_tokens": 2393728}
|
|
{"current_steps": 4880, "total_steps": 37885, "loss": 0.1562, "lr": 1.9949609252234985e-06, "epoch": 0.6440543750824864, "percentage": 12.88, "elapsed_time": "0:06:45", "remaining_time": "0:45:41", "throughput": 5912.88, "total_tokens": 2396480}
|
|
{"current_steps": 4885, "total_steps": 37885, "loss": 0.1169, "lr": 1.9949146284599794e-06, "epoch": 0.6447142668602349, "percentage": 12.89, "elapsed_time": "0:06:45", "remaining_time": "0:45:40", "throughput": 5914.5, "total_tokens": 2399104}
|
|
{"current_steps": 4890, "total_steps": 37885, "loss": 0.096, "lr": 1.9948681205330354e-06, "epoch": 0.6453741586379834, "percentage": 12.91, "elapsed_time": "0:06:45", "remaining_time": "0:45:39", "throughput": 5916.02, "total_tokens": 2401664}
|
|
{"current_steps": 4895, "total_steps": 37885, "loss": 0.1063, "lr": 1.994821401452537e-06, "epoch": 0.6460340504157318, "percentage": 12.92, "elapsed_time": "0:06:46", "remaining_time": "0:45:38", "throughput": 5917.4, "total_tokens": 2404160}
|
|
{"current_steps": 4900, "total_steps": 37885, "loss": 0.0837, "lr": 1.9947744712283997e-06, "epoch": 0.6466939421934803, "percentage": 12.93, "elapsed_time": "0:06:46", "remaining_time": "0:45:37", "throughput": 5918.6, "total_tokens": 2406592}
|
|
{"current_steps": 4905, "total_steps": 37885, "loss": 0.0537, "lr": 1.9947273298705848e-06, "epoch": 0.6473538339712287, "percentage": 12.95, "elapsed_time": "0:06:46", "remaining_time": "0:45:36", "throughput": 5919.92, "total_tokens": 2409088}
|
|
{"current_steps": 4910, "total_steps": 37885, "loss": 0.173, "lr": 1.994679977389097e-06, "epoch": 0.6480137257489772, "percentage": 12.96, "elapsed_time": "0:06:47", "remaining_time": "0:45:35", "throughput": 5921.27, "total_tokens": 2411584}
|
|
{"current_steps": 4915, "total_steps": 37885, "loss": 0.3713, "lr": 1.9946324137939876e-06, "epoch": 0.6486736175267256, "percentage": 12.97, "elapsed_time": "0:06:47", "remaining_time": "0:45:34", "throughput": 5923.29, "total_tokens": 2414400}
|
|
{"current_steps": 4920, "total_steps": 37885, "loss": 0.22, "lr": 1.9945846390953503e-06, "epoch": 0.6493335093044741, "percentage": 12.99, "elapsed_time": "0:06:47", "remaining_time": "0:45:33", "throughput": 5924.03, "total_tokens": 2416640}
|
|
{"current_steps": 4925, "total_steps": 37885, "loss": 0.1468, "lr": 1.994536653303326e-06, "epoch": 0.6499934010822225, "percentage": 13.0, "elapsed_time": "0:06:48", "remaining_time": "0:45:32", "throughput": 5925.34, "total_tokens": 2419136}
|
|
{"current_steps": 4930, "total_steps": 37885, "loss": 0.0354, "lr": 1.9944884564280987e-06, "epoch": 0.650653292859971, "percentage": 13.01, "elapsed_time": "0:06:48", "remaining_time": "0:45:31", "throughput": 5926.25, "total_tokens": 2421440}
|
|
{"current_steps": 4935, "total_steps": 37885, "loss": 0.0854, "lr": 1.994440048479898e-06, "epoch": 0.6513131846377194, "percentage": 13.03, "elapsed_time": "0:06:48", "remaining_time": "0:45:30", "throughput": 5927.76, "total_tokens": 2424000}
|
|
{"current_steps": 4940, "total_steps": 37885, "loss": 0.0808, "lr": 1.9943914294689984e-06, "epoch": 0.6519730764154679, "percentage": 13.04, "elapsed_time": "0:06:49", "remaining_time": "0:45:29", "throughput": 5928.53, "total_tokens": 2426240}
|
|
{"current_steps": 4945, "total_steps": 37885, "loss": 0.0641, "lr": 1.9943425994057184e-06, "epoch": 0.6526329681932164, "percentage": 13.05, "elapsed_time": "0:06:49", "remaining_time": "0:45:28", "throughput": 5930.19, "total_tokens": 2428864}
|
|
{"current_steps": 4950, "total_steps": 37885, "loss": 0.0188, "lr": 1.994293558300422e-06, "epoch": 0.6532928599709648, "percentage": 13.07, "elapsed_time": "0:06:49", "remaining_time": "0:45:27", "throughput": 5931.41, "total_tokens": 2431296}
|
|
{"current_steps": 4955, "total_steps": 37885, "loss": 0.0037, "lr": 1.9942443061635183e-06, "epoch": 0.6539527517487133, "percentage": 13.08, "elapsed_time": "0:06:50", "remaining_time": "0:45:26", "throughput": 5933.08, "total_tokens": 2433984}
|
|
{"current_steps": 4960, "total_steps": 37885, "loss": 0.5298, "lr": 1.9941948430054603e-06, "epoch": 0.6546126435264616, "percentage": 13.09, "elapsed_time": "0:06:50", "remaining_time": "0:45:25", "throughput": 5933.79, "total_tokens": 2436224}
|
|
{"current_steps": 4965, "total_steps": 37885, "loss": 0.1602, "lr": 1.994145168836746e-06, "epoch": 0.6552725353042101, "percentage": 13.11, "elapsed_time": "0:06:50", "remaining_time": "0:45:24", "throughput": 5935.16, "total_tokens": 2438720}
|
|
{"current_steps": 4970, "total_steps": 37885, "loss": 0.1132, "lr": 1.994095283667919e-06, "epoch": 0.6559324270819585, "percentage": 13.12, "elapsed_time": "0:06:51", "remaining_time": "0:45:23", "throughput": 5935.87, "total_tokens": 2440960}
|
|
{"current_steps": 4975, "total_steps": 37885, "loss": 0.0129, "lr": 1.9940451875095666e-06, "epoch": 0.656592318859707, "percentage": 13.13, "elapsed_time": "0:06:51", "remaining_time": "0:45:22", "throughput": 5936.84, "total_tokens": 2443328}
|
|
{"current_steps": 4980, "total_steps": 37885, "loss": 0.1357, "lr": 1.9939948803723217e-06, "epoch": 0.6572522106374554, "percentage": 13.15, "elapsed_time": "0:06:51", "remaining_time": "0:45:21", "throughput": 5938.43, "total_tokens": 2445952}
|
|
{"current_steps": 4985, "total_steps": 37885, "loss": 0.0527, "lr": 1.9939443622668614e-06, "epoch": 0.6579121024152039, "percentage": 13.16, "elapsed_time": "0:06:52", "remaining_time": "0:45:20", "throughput": 5940.03, "total_tokens": 2448576}
|
|
{"current_steps": 4990, "total_steps": 37885, "loss": 0.3274, "lr": 1.9938936332039073e-06, "epoch": 0.6585719941929523, "percentage": 13.17, "elapsed_time": "0:06:52", "remaining_time": "0:45:19", "throughput": 5941.46, "total_tokens": 2451136}
|
|
{"current_steps": 4995, "total_steps": 37885, "loss": 0.0122, "lr": 1.993842693194227e-06, "epoch": 0.6592318859707008, "percentage": 13.18, "elapsed_time": "0:06:52", "remaining_time": "0:45:18", "throughput": 5942.78, "total_tokens": 2453632}
|
|
{"current_steps": 5000, "total_steps": 37885, "loss": 0.1135, "lr": 1.993791542248632e-06, "epoch": 0.6598917777484492, "percentage": 13.2, "elapsed_time": "0:06:53", "remaining_time": "0:45:17", "throughput": 5944.2, "total_tokens": 2456192}
|
|
{"current_steps": 5005, "total_steps": 37885, "loss": 0.0259, "lr": 1.9937401803779784e-06, "epoch": 0.6605516695261977, "percentage": 13.21, "elapsed_time": "0:06:53", "remaining_time": "0:45:16", "throughput": 5945.33, "total_tokens": 2458624}
|
|
{"current_steps": 5010, "total_steps": 37885, "loss": 0.0829, "lr": 1.9936886075931678e-06, "epoch": 0.6612115613039462, "percentage": 13.22, "elapsed_time": "0:06:53", "remaining_time": "0:45:15", "throughput": 5946.17, "total_tokens": 2460928}
|
|
{"current_steps": 5015, "total_steps": 37885, "loss": 0.0726, "lr": 1.993636823905146e-06, "epoch": 0.6618714530816946, "percentage": 13.24, "elapsed_time": "0:06:54", "remaining_time": "0:45:14", "throughput": 5947.73, "total_tokens": 2463552}
|
|
{"current_steps": 5020, "total_steps": 37885, "loss": 0.0881, "lr": 1.9935848293249034e-06, "epoch": 0.6625313448594431, "percentage": 13.25, "elapsed_time": "0:06:54", "remaining_time": "0:45:13", "throughput": 5948.62, "total_tokens": 2465856}
|
|
{"current_steps": 5025, "total_steps": 37885, "loss": 0.044, "lr": 1.9935326238634763e-06, "epoch": 0.6631912366371915, "percentage": 13.26, "elapsed_time": "0:06:54", "remaining_time": "0:45:12", "throughput": 5949.72, "total_tokens": 2468288}
|
|
{"current_steps": 5030, "total_steps": 37885, "loss": 0.3045, "lr": 1.993480207531944e-06, "epoch": 0.66385112841494, "percentage": 13.28, "elapsed_time": "0:06:55", "remaining_time": "0:45:11", "throughput": 5951.29, "total_tokens": 2470912}
|
|
{"current_steps": 5035, "total_steps": 37885, "loss": 0.1027, "lr": 1.9934275803414317e-06, "epoch": 0.6645110201926884, "percentage": 13.29, "elapsed_time": "0:06:55", "remaining_time": "0:45:10", "throughput": 5952.86, "total_tokens": 2473536}
|
|
{"current_steps": 5040, "total_steps": 37885, "loss": 0.0028, "lr": 1.99337474230311e-06, "epoch": 0.6651709119704369, "percentage": 13.3, "elapsed_time": "0:06:55", "remaining_time": "0:45:10", "throughput": 5954.15, "total_tokens": 2476032}
|
|
{"current_steps": 5045, "total_steps": 37885, "loss": 0.2344, "lr": 1.993321693428192e-06, "epoch": 0.6658308037481853, "percentage": 13.32, "elapsed_time": "0:06:56", "remaining_time": "0:45:09", "throughput": 5954.69, "total_tokens": 2478208}
|
|
{"current_steps": 5050, "total_steps": 37885, "loss": 0.1178, "lr": 1.9932684337279378e-06, "epoch": 0.6664906955259338, "percentage": 13.33, "elapsed_time": "0:06:56", "remaining_time": "0:45:08", "throughput": 5955.51, "total_tokens": 2480512}
|
|
{"current_steps": 5055, "total_steps": 37885, "loss": 0.2015, "lr": 1.9932149632136514e-06, "epoch": 0.6671505873036822, "percentage": 13.34, "elapsed_time": "0:06:56", "remaining_time": "0:45:07", "throughput": 5956.78, "total_tokens": 2483008}
|
|
{"current_steps": 5060, "total_steps": 37885, "loss": 0.2345, "lr": 1.9931612818966812e-06, "epoch": 0.6678104790814307, "percentage": 13.36, "elapsed_time": "0:06:57", "remaining_time": "0:45:06", "throughput": 5957.79, "total_tokens": 2485376}
|
|
{"current_steps": 5065, "total_steps": 37885, "loss": 0.0378, "lr": 1.993107389788421e-06, "epoch": 0.668470370859179, "percentage": 13.37, "elapsed_time": "0:06:57", "remaining_time": "0:45:05", "throughput": 5959.5, "total_tokens": 2488064}
|
|
{"current_steps": 5070, "total_steps": 37885, "loss": 0.0923, "lr": 1.9930532869003085e-06, "epoch": 0.6691302626369275, "percentage": 13.38, "elapsed_time": "0:06:57", "remaining_time": "0:45:04", "throughput": 5960.88, "total_tokens": 2490624}
|
|
{"current_steps": 5075, "total_steps": 37885, "loss": 0.4066, "lr": 1.992998973243827e-06, "epoch": 0.669790154414676, "percentage": 13.4, "elapsed_time": "0:06:58", "remaining_time": "0:45:03", "throughput": 5962.43, "total_tokens": 2493248}
|
|
{"current_steps": 5080, "total_steps": 37885, "loss": 0.1969, "lr": 1.9929444488305047e-06, "epoch": 0.6704500461924244, "percentage": 13.41, "elapsed_time": "0:06:58", "remaining_time": "0:45:02", "throughput": 5963.71, "total_tokens": 2495744}
|
|
{"current_steps": 5085, "total_steps": 37885, "loss": 0.0028, "lr": 1.992889713671913e-06, "epoch": 0.6711099379701729, "percentage": 13.42, "elapsed_time": "0:06:58", "remaining_time": "0:45:01", "throughput": 5964.85, "total_tokens": 2498176}
|
|
{"current_steps": 5090, "total_steps": 37885, "loss": 0.0702, "lr": 1.99283476777967e-06, "epoch": 0.6717698297479213, "percentage": 13.44, "elapsed_time": "0:06:59", "remaining_time": "0:45:00", "throughput": 5965.55, "total_tokens": 2500416}
|
|
{"current_steps": 5095, "total_steps": 37885, "loss": 0.1533, "lr": 1.9927796111654366e-06, "epoch": 0.6724297215256698, "percentage": 13.45, "elapsed_time": "0:06:59", "remaining_time": "0:44:59", "throughput": 5966.66, "total_tokens": 2502848}
|
|
{"current_steps": 5100, "total_steps": 37885, "loss": 0.1575, "lr": 1.99272424384092e-06, "epoch": 0.6730896133034182, "percentage": 13.46, "elapsed_time": "0:06:59", "remaining_time": "0:44:58", "throughput": 5967.52, "total_tokens": 2505152}
|
|
{"current_steps": 5105, "total_steps": 37885, "loss": 0.1766, "lr": 1.992668665817871e-06, "epoch": 0.6737495050811667, "percentage": 13.47, "elapsed_time": "0:07:00", "remaining_time": "0:44:57", "throughput": 5968.79, "total_tokens": 2507648}
|
|
{"current_steps": 5110, "total_steps": 37885, "loss": 0.0742, "lr": 1.9926128771080867e-06, "epoch": 0.6744093968589151, "percentage": 13.49, "elapsed_time": "0:07:00", "remaining_time": "0:44:56", "throughput": 5970.05, "total_tokens": 2510144}
|
|
{"current_steps": 5115, "total_steps": 37885, "loss": 0.2246, "lr": 1.9925568777234067e-06, "epoch": 0.6750692886366636, "percentage": 13.5, "elapsed_time": "0:07:00", "remaining_time": "0:44:55", "throughput": 5972.06, "total_tokens": 2513024}
|
|
{"current_steps": 5120, "total_steps": 37885, "loss": 0.0633, "lr": 1.992500667675717e-06, "epoch": 0.675729180414412, "percentage": 13.51, "elapsed_time": "0:07:01", "remaining_time": "0:44:54", "throughput": 5972.3, "total_tokens": 2515072}
|
|
{"current_steps": 5125, "total_steps": 37885, "loss": 0.0047, "lr": 1.992444246976948e-06, "epoch": 0.6763890721921605, "percentage": 13.53, "elapsed_time": "0:07:01", "remaining_time": "0:44:54", "throughput": 5973.11, "total_tokens": 2517376}
|
|
{"current_steps": 5130, "total_steps": 37885, "loss": 0.0688, "lr": 1.9923876156390743e-06, "epoch": 0.677048963969909, "percentage": 13.54, "elapsed_time": "0:07:01", "remaining_time": "0:44:53", "throughput": 5974.72, "total_tokens": 2520064}
|
|
{"current_steps": 5135, "total_steps": 37885, "loss": 0.0592, "lr": 1.992330773674115e-06, "epoch": 0.6777088557476574, "percentage": 13.55, "elapsed_time": "0:07:02", "remaining_time": "0:44:52", "throughput": 5976.28, "total_tokens": 2522688}
|
|
{"current_steps": 5140, "total_steps": 37885, "loss": 0.0356, "lr": 1.9922737210941353e-06, "epoch": 0.6783687475254059, "percentage": 13.57, "elapsed_time": "0:07:02", "remaining_time": "0:44:51", "throughput": 5977.51, "total_tokens": 2525184}
|
|
{"current_steps": 5145, "total_steps": 37885, "loss": 0.0004, "lr": 1.9922164579112436e-06, "epoch": 0.6790286393031543, "percentage": 13.58, "elapsed_time": "0:07:02", "remaining_time": "0:44:50", "throughput": 5978.5, "total_tokens": 2527552}
|
|
{"current_steps": 5150, "total_steps": 37885, "loss": 0.0067, "lr": 1.9921589841375938e-06, "epoch": 0.6796885310809028, "percentage": 13.59, "elapsed_time": "0:07:03", "remaining_time": "0:44:49", "throughput": 5980.17, "total_tokens": 2530240}
|
|
{"current_steps": 5155, "total_steps": 37885, "loss": 0.0581, "lr": 1.9921012997853843e-06, "epoch": 0.6803484228586512, "percentage": 13.61, "elapsed_time": "0:07:03", "remaining_time": "0:44:48", "throughput": 5980.87, "total_tokens": 2532480}
|
|
{"current_steps": 5160, "total_steps": 37885, "loss": 0.0488, "lr": 1.9920434048668582e-06, "epoch": 0.6810083146363997, "percentage": 13.62, "elapsed_time": "0:07:03", "remaining_time": "0:44:47", "throughput": 5981.94, "total_tokens": 2534912}
|
|
{"current_steps": 5165, "total_steps": 37885, "loss": 0.1032, "lr": 1.9919852993943035e-06, "epoch": 0.681668206414148, "percentage": 13.63, "elapsed_time": "0:07:04", "remaining_time": "0:44:46", "throughput": 5983.18, "total_tokens": 2537408}
|
|
{"current_steps": 5170, "total_steps": 37885, "loss": 0.3101, "lr": 1.991926983380052e-06, "epoch": 0.6823280981918965, "percentage": 13.65, "elapsed_time": "0:07:04", "remaining_time": "0:44:45", "throughput": 5984.15, "total_tokens": 2539776}
|
|
{"current_steps": 5175, "total_steps": 37885, "loss": 0.0739, "lr": 1.9918684568364813e-06, "epoch": 0.6829879899696449, "percentage": 13.66, "elapsed_time": "0:07:04", "remaining_time": "0:44:44", "throughput": 5985.24, "total_tokens": 2542208}
|
|
{"current_steps": 5180, "total_steps": 37885, "loss": 0.0663, "lr": 1.9918097197760134e-06, "epoch": 0.6836478817473934, "percentage": 13.67, "elapsed_time": "0:07:05", "remaining_time": "0:44:43", "throughput": 5986.44, "total_tokens": 2544704}
|
|
{"current_steps": 5185, "total_steps": 37885, "loss": 0.1005, "lr": 1.9917507722111144e-06, "epoch": 0.6843077735251418, "percentage": 13.69, "elapsed_time": "0:07:05", "remaining_time": "0:44:42", "throughput": 5987.39, "total_tokens": 2547072}
|
|
{"current_steps": 5190, "total_steps": 37885, "loss": 0.1848, "lr": 1.9916916141542957e-06, "epoch": 0.6849676653028903, "percentage": 13.7, "elapsed_time": "0:07:05", "remaining_time": "0:44:41", "throughput": 5988.33, "total_tokens": 2549440}
|
|
{"current_steps": 5195, "total_steps": 37885, "loss": 0.4134, "lr": 1.991632245618113e-06, "epoch": 0.6856275570806388, "percentage": 13.71, "elapsed_time": "0:07:06", "remaining_time": "0:44:41", "throughput": 5989.98, "total_tokens": 2552128}
|
|
{"current_steps": 5200, "total_steps": 37885, "loss": 0.0699, "lr": 1.9915726666151673e-06, "epoch": 0.6862874488583872, "percentage": 13.73, "elapsed_time": "0:07:06", "remaining_time": "0:44:40", "throughput": 5990.64, "total_tokens": 2554368}
|
|
{"current_steps": 5205, "total_steps": 37885, "loss": 0.1567, "lr": 1.9915128771581033e-06, "epoch": 0.6869473406361357, "percentage": 13.74, "elapsed_time": "0:07:06", "remaining_time": "0:44:39", "throughput": 5992.03, "total_tokens": 2556928}
|
|
{"current_steps": 5210, "total_steps": 37885, "loss": 0.17, "lr": 1.9914528772596113e-06, "epoch": 0.6876072324138841, "percentage": 13.75, "elapsed_time": "0:07:07", "remaining_time": "0:44:38", "throughput": 5993.13, "total_tokens": 2559360}
|
|
{"current_steps": 5215, "total_steps": 37885, "loss": 0.2589, "lr": 1.9913926669324253e-06, "epoch": 0.6882671241916326, "percentage": 13.77, "elapsed_time": "0:07:07", "remaining_time": "0:44:37", "throughput": 5994.36, "total_tokens": 2561856}
|
|
{"current_steps": 5220, "total_steps": 37885, "loss": 0.2197, "lr": 1.991332246189325e-06, "epoch": 0.688927015969381, "percentage": 13.78, "elapsed_time": "0:07:07", "remaining_time": "0:44:36", "throughput": 5995.59, "total_tokens": 2564352}
|
|
{"current_steps": 5225, "total_steps": 37885, "loss": 0.0444, "lr": 1.9912716150431343e-06, "epoch": 0.6895869077471295, "percentage": 13.79, "elapsed_time": "0:07:08", "remaining_time": "0:44:35", "throughput": 5996.63, "total_tokens": 2566784}
|
|
{"current_steps": 5230, "total_steps": 37885, "loss": 0.1357, "lr": 1.9912107735067215e-06, "epoch": 0.6902467995248779, "percentage": 13.8, "elapsed_time": "0:07:08", "remaining_time": "0:44:34", "throughput": 5997.6, "total_tokens": 2569152}
|
|
{"current_steps": 5235, "total_steps": 37885, "loss": 0.0881, "lr": 1.991149721593e-06, "epoch": 0.6909066913026264, "percentage": 13.82, "elapsed_time": "0:07:08", "remaining_time": "0:44:33", "throughput": 5998.54, "total_tokens": 2571520}
|
|
{"current_steps": 5240, "total_steps": 37885, "loss": 0.0346, "lr": 1.991088459314927e-06, "epoch": 0.6915665830803748, "percentage": 13.83, "elapsed_time": "0:07:09", "remaining_time": "0:44:32", "throughput": 5999.91, "total_tokens": 2574080}
|
|
{"current_steps": 5245, "total_steps": 37885, "loss": 0.0699, "lr": 1.991026986685506e-06, "epoch": 0.6922264748581233, "percentage": 13.84, "elapsed_time": "0:07:09", "remaining_time": "0:44:31", "throughput": 6000.59, "total_tokens": 2576320}
|
|
{"current_steps": 5250, "total_steps": 37885, "loss": 0.073, "lr": 1.9909653037177826e-06, "epoch": 0.6928863666358717, "percentage": 13.86, "elapsed_time": "0:07:09", "remaining_time": "0:44:30", "throughput": 6001.68, "total_tokens": 2578752}
|
|
{"current_steps": 5255, "total_steps": 37885, "loss": 0.0697, "lr": 1.9909034104248503e-06, "epoch": 0.6935462584136202, "percentage": 13.87, "elapsed_time": "0:07:09", "remaining_time": "0:44:30", "throughput": 6002.76, "total_tokens": 2581184}
|
|
{"current_steps": 5260, "total_steps": 37885, "loss": 0.1952, "lr": 1.9908413068198442e-06, "epoch": 0.6942061501913687, "percentage": 13.88, "elapsed_time": "0:07:10", "remaining_time": "0:44:29", "throughput": 6004.31, "total_tokens": 2583872}
|
|
{"current_steps": 5265, "total_steps": 37885, "loss": 0.2296, "lr": 1.990778992915946e-06, "epoch": 0.694866041969117, "percentage": 13.9, "elapsed_time": "0:07:10", "remaining_time": "0:44:28", "throughput": 6005.36, "total_tokens": 2586304}
|
|
{"current_steps": 5270, "total_steps": 37885, "loss": 0.1202, "lr": 1.990716468726381e-06, "epoch": 0.6955259337468656, "percentage": 13.91, "elapsed_time": "0:07:10", "remaining_time": "0:44:27", "throughput": 6007.1, "total_tokens": 2589056}
|
|
{"current_steps": 5275, "total_steps": 37885, "loss": 0.1517, "lr": 1.9906537342644203e-06, "epoch": 0.6961858255246139, "percentage": 13.92, "elapsed_time": "0:07:11", "remaining_time": "0:44:26", "throughput": 6008.18, "total_tokens": 2591488}
|
|
{"current_steps": 5280, "total_steps": 37885, "loss": 0.0183, "lr": 1.990590789543378e-06, "epoch": 0.6968457173023624, "percentage": 13.94, "elapsed_time": "0:07:11", "remaining_time": "0:44:25", "throughput": 6008.97, "total_tokens": 2593792}
|
|
{"current_steps": 5285, "total_steps": 37885, "loss": 0.2654, "lr": 1.9905276345766134e-06, "epoch": 0.6975056090801108, "percentage": 13.95, "elapsed_time": "0:07:11", "remaining_time": "0:44:24", "throughput": 6009.5, "total_tokens": 2595968}
|
|
{"current_steps": 5290, "total_steps": 37885, "loss": 0.2556, "lr": 1.990464269377532e-06, "epoch": 0.6981655008578593, "percentage": 13.96, "elapsed_time": "0:07:12", "remaining_time": "0:44:23", "throughput": 6010.03, "total_tokens": 2598144}
|
|
{"current_steps": 5295, "total_steps": 37885, "loss": 0.0923, "lr": 1.9904006939595815e-06, "epoch": 0.6988253926356077, "percentage": 13.98, "elapsed_time": "0:07:12", "remaining_time": "0:44:22", "throughput": 6010.82, "total_tokens": 2600448}
|
|
{"current_steps": 5300, "total_steps": 37885, "loss": 0.0051, "lr": 1.9903369083362554e-06, "epoch": 0.6994852844133562, "percentage": 13.99, "elapsed_time": "0:07:12", "remaining_time": "0:44:21", "throughput": 6012.0, "total_tokens": 2602944}
|
|
{"current_steps": 5305, "total_steps": 37885, "loss": 0.1914, "lr": 1.990272912521092e-06, "epoch": 0.7001451761911046, "percentage": 14.0, "elapsed_time": "0:07:13", "remaining_time": "0:44:20", "throughput": 6012.51, "total_tokens": 2605120}
|
|
{"current_steps": 5310, "total_steps": 37885, "loss": 0.0594, "lr": 1.990208706527674e-06, "epoch": 0.7008050679688531, "percentage": 14.02, "elapsed_time": "0:07:13", "remaining_time": "0:44:20", "throughput": 6013.04, "total_tokens": 2607296}
|
|
{"current_steps": 5315, "total_steps": 37885, "loss": 0.0578, "lr": 1.9901442903696284e-06, "epoch": 0.7014649597466015, "percentage": 14.03, "elapsed_time": "0:07:13", "remaining_time": "0:44:19", "throughput": 6014.11, "total_tokens": 2609728}
|
|
{"current_steps": 5320, "total_steps": 37885, "loss": 0.2463, "lr": 1.990079664060628e-06, "epoch": 0.70212485152435, "percentage": 14.04, "elapsed_time": "0:07:14", "remaining_time": "0:44:18", "throughput": 6015.31, "total_tokens": 2612224}
|
|
{"current_steps": 5325, "total_steps": 37885, "loss": 0.0071, "lr": 1.9900148276143874e-06, "epoch": 0.7027847433020985, "percentage": 14.06, "elapsed_time": "0:07:14", "remaining_time": "0:44:17", "throughput": 6016.47, "total_tokens": 2614720}
|
|
{"current_steps": 5330, "total_steps": 37885, "loss": 0.3149, "lr": 1.9899497810446694e-06, "epoch": 0.7034446350798469, "percentage": 14.07, "elapsed_time": "0:07:14", "remaining_time": "0:44:16", "throughput": 6017.9, "total_tokens": 2617344}
|
|
{"current_steps": 5335, "total_steps": 37885, "loss": 0.1851, "lr": 1.989884524365279e-06, "epoch": 0.7041045268575954, "percentage": 14.08, "elapsed_time": "0:07:15", "remaining_time": "0:44:15", "throughput": 6018.55, "total_tokens": 2619584}
|
|
{"current_steps": 5340, "total_steps": 37885, "loss": 0.0551, "lr": 1.9898190575900664e-06, "epoch": 0.7047644186353438, "percentage": 14.1, "elapsed_time": "0:07:15", "remaining_time": "0:44:14", "throughput": 6019.31, "total_tokens": 2621888}
|
|
{"current_steps": 5345, "total_steps": 37885, "loss": 0.1479, "lr": 1.9897533807329265e-06, "epoch": 0.7054243104130923, "percentage": 14.11, "elapsed_time": "0:07:15", "remaining_time": "0:44:13", "throughput": 6020.79, "total_tokens": 2624512}
|
|
{"current_steps": 5350, "total_steps": 37885, "loss": 0.0035, "lr": 1.989687493807799e-06, "epoch": 0.7060842021908407, "percentage": 14.12, "elapsed_time": "0:07:16", "remaining_time": "0:44:12", "throughput": 6021.93, "total_tokens": 2627008}
|
|
{"current_steps": 5355, "total_steps": 37885, "loss": 0.0406, "lr": 1.9896213968286672e-06, "epoch": 0.7067440939685892, "percentage": 14.13, "elapsed_time": "0:07:16", "remaining_time": "0:44:12", "throughput": 6022.99, "total_tokens": 2629440}
|
|
{"current_steps": 5360, "total_steps": 37885, "loss": 0.1103, "lr": 1.9895550898095606e-06, "epoch": 0.7074039857463376, "percentage": 14.15, "elapsed_time": "0:07:16", "remaining_time": "0:44:11", "throughput": 6024.03, "total_tokens": 2631872}
|
|
{"current_steps": 5365, "total_steps": 37885, "loss": 0.1771, "lr": 1.9894885727645516e-06, "epoch": 0.7080638775240861, "percentage": 14.16, "elapsed_time": "0:07:17", "remaining_time": "0:44:10", "throughput": 6025.6, "total_tokens": 2634560}
|
|
{"current_steps": 5370, "total_steps": 37885, "loss": 0.2135, "lr": 1.989421845707759e-06, "epoch": 0.7087237693018344, "percentage": 14.17, "elapsed_time": "0:07:17", "remaining_time": "0:44:09", "throughput": 6026.91, "total_tokens": 2637120}
|
|
{"current_steps": 5375, "total_steps": 37885, "loss": 0.0021, "lr": 1.989354908653344e-06, "epoch": 0.709383661079583, "percentage": 14.19, "elapsed_time": "0:07:17", "remaining_time": "0:44:08", "throughput": 6027.92, "total_tokens": 2639552}
|
|
{"current_steps": 5380, "total_steps": 37885, "loss": 0.1498, "lr": 1.989287761615514e-06, "epoch": 0.7100435528573315, "percentage": 14.2, "elapsed_time": "0:07:18", "remaining_time": "0:44:07", "throughput": 6028.97, "total_tokens": 2641984}
|
|
{"current_steps": 5385, "total_steps": 37885, "loss": 0.0816, "lr": 1.9892204046085206e-06, "epoch": 0.7107034446350798, "percentage": 14.21, "elapsed_time": "0:07:18", "remaining_time": "0:44:06", "throughput": 6029.88, "total_tokens": 2644352}
|
|
{"current_steps": 5390, "total_steps": 37885, "loss": 0.0261, "lr": 1.98915283764666e-06, "epoch": 0.7113633364128283, "percentage": 14.23, "elapsed_time": "0:07:18", "remaining_time": "0:44:05", "throughput": 6031.46, "total_tokens": 2647040}
|
|
{"current_steps": 5395, "total_steps": 37885, "loss": 0.0705, "lr": 1.989085060744272e-06, "epoch": 0.7120232281905767, "percentage": 14.24, "elapsed_time": "0:07:19", "remaining_time": "0:44:04", "throughput": 6032.44, "total_tokens": 2649472}
|
|
{"current_steps": 5400, "total_steps": 37885, "loss": 0.0431, "lr": 1.989017073915742e-06, "epoch": 0.7126831199683252, "percentage": 14.25, "elapsed_time": "0:07:19", "remaining_time": "0:44:04", "throughput": 6033.32, "total_tokens": 2651840}
|
|
{"current_steps": 5405, "total_steps": 37885, "loss": 0.0093, "lr": 1.9889488771755004e-06, "epoch": 0.7133430117460736, "percentage": 14.27, "elapsed_time": "0:07:19", "remaining_time": "0:44:03", "throughput": 6034.76, "total_tokens": 2654464}
|
|
{"current_steps": 5410, "total_steps": 37885, "loss": 0.1071, "lr": 1.9888804705380207e-06, "epoch": 0.7140029035238221, "percentage": 14.28, "elapsed_time": "0:07:20", "remaining_time": "0:44:02", "throughput": 6035.11, "total_tokens": 2656576}
|
|
{"current_steps": 5415, "total_steps": 37885, "loss": 0.0828, "lr": 1.9888118540178228e-06, "epoch": 0.7146627953015705, "percentage": 14.29, "elapsed_time": "0:07:20", "remaining_time": "0:44:01", "throughput": 6036.17, "total_tokens": 2659008}
|
|
{"current_steps": 5420, "total_steps": 37885, "loss": 0.0466, "lr": 1.9887430276294688e-06, "epoch": 0.715322687079319, "percentage": 14.31, "elapsed_time": "0:07:20", "remaining_time": "0:44:00", "throughput": 6037.62, "total_tokens": 2661632}
|
|
{"current_steps": 5425, "total_steps": 37885, "loss": 0.1611, "lr": 1.9886739913875666e-06, "epoch": 0.7159825788570674, "percentage": 14.32, "elapsed_time": "0:07:21", "remaining_time": "0:43:59", "throughput": 6038.91, "total_tokens": 2664192}
|
|
{"current_steps": 5430, "total_steps": 37885, "loss": 0.0963, "lr": 1.98860474530677e-06, "epoch": 0.7166424706348159, "percentage": 14.33, "elapsed_time": "0:07:21", "remaining_time": "0:43:58", "throughput": 6039.93, "total_tokens": 2666624}
|
|
{"current_steps": 5435, "total_steps": 37885, "loss": 0.1402, "lr": 1.9885352894017745e-06, "epoch": 0.7173023624125643, "percentage": 14.35, "elapsed_time": "0:07:21", "remaining_time": "0:43:57", "throughput": 6041.13, "total_tokens": 2669120}
|
|
{"current_steps": 5440, "total_steps": 37885, "loss": 0.2358, "lr": 1.9884656236873224e-06, "epoch": 0.7179622541903128, "percentage": 14.36, "elapsed_time": "0:07:22", "remaining_time": "0:43:57", "throughput": 6042.19, "total_tokens": 2671552}
|
|
{"current_steps": 5445, "total_steps": 37885, "loss": 0.1333, "lr": 1.9883957481781998e-06, "epoch": 0.7186221459680613, "percentage": 14.37, "elapsed_time": "0:07:22", "remaining_time": "0:43:56", "throughput": 6043.7, "total_tokens": 2674240}
|
|
{"current_steps": 5450, "total_steps": 37885, "loss": 0.1131, "lr": 1.988325662889237e-06, "epoch": 0.7192820377458097, "percentage": 14.39, "elapsed_time": "0:07:22", "remaining_time": "0:43:55", "throughput": 6044.44, "total_tokens": 2676544}
|
|
{"current_steps": 5455, "total_steps": 37885, "loss": 0.0009, "lr": 1.988255367835309e-06, "epoch": 0.7199419295235582, "percentage": 14.4, "elapsed_time": "0:07:23", "remaining_time": "0:43:54", "throughput": 6045.32, "total_tokens": 2678912}
|
|
{"current_steps": 5460, "total_steps": 37885, "loss": 0.0309, "lr": 1.9881848630313357e-06, "epoch": 0.7206018213013066, "percentage": 14.41, "elapsed_time": "0:07:23", "remaining_time": "0:43:53", "throughput": 6046.35, "total_tokens": 2681344}
|
|
{"current_steps": 5465, "total_steps": 37885, "loss": 0.0208, "lr": 1.988114148492281e-06, "epoch": 0.7212617130790551, "percentage": 14.43, "elapsed_time": "0:07:23", "remaining_time": "0:43:52", "throughput": 6047.33, "total_tokens": 2683776}
|
|
{"current_steps": 5470, "total_steps": 37885, "loss": 0.1115, "lr": 1.9880432242331534e-06, "epoch": 0.7219216048568035, "percentage": 14.44, "elapsed_time": "0:07:24", "remaining_time": "0:43:51", "throughput": 6047.94, "total_tokens": 2686016}
|
|
{"current_steps": 5475, "total_steps": 37885, "loss": 0.1267, "lr": 1.9879720902690067e-06, "epoch": 0.722581496634552, "percentage": 14.45, "elapsed_time": "0:07:24", "remaining_time": "0:43:50", "throughput": 6048.28, "total_tokens": 2688128}
|
|
{"current_steps": 5480, "total_steps": 37885, "loss": 0.2031, "lr": 1.987900746614938e-06, "epoch": 0.7232413884123003, "percentage": 14.46, "elapsed_time": "0:07:24", "remaining_time": "0:43:50", "throughput": 6048.91, "total_tokens": 2690368}
|
|
{"current_steps": 5485, "total_steps": 37885, "loss": 0.0919, "lr": 1.98782919328609e-06, "epoch": 0.7239012801900488, "percentage": 14.48, "elapsed_time": "0:07:25", "remaining_time": "0:43:49", "throughput": 6050.3, "total_tokens": 2692992}
|
|
{"current_steps": 5490, "total_steps": 37885, "loss": 0.0433, "lr": 1.9877574302976484e-06, "epoch": 0.7245611719677972, "percentage": 14.49, "elapsed_time": "0:07:25", "remaining_time": "0:43:48", "throughput": 6051.28, "total_tokens": 2695424}
|
|
{"current_steps": 5495, "total_steps": 37885, "loss": 0.1607, "lr": 1.987685457664845e-06, "epoch": 0.7252210637455457, "percentage": 14.5, "elapsed_time": "0:07:25", "remaining_time": "0:43:47", "throughput": 6052.29, "total_tokens": 2697856}
|
|
{"current_steps": 5500, "total_steps": 37885, "loss": 0.0006, "lr": 1.987613275402956e-06, "epoch": 0.7258809555232941, "percentage": 14.52, "elapsed_time": "0:07:26", "remaining_time": "0:43:46", "throughput": 6053.92, "total_tokens": 2700608}
|
|
{"current_steps": 5505, "total_steps": 37885, "loss": 0.023, "lr": 1.9875408835273007e-06, "epoch": 0.7265408473010426, "percentage": 14.53, "elapsed_time": "0:07:26", "remaining_time": "0:43:45", "throughput": 6055.0, "total_tokens": 2703104}
|
|
{"current_steps": 5510, "total_steps": 37885, "loss": 0.1917, "lr": 1.9874682820532444e-06, "epoch": 0.7272007390787911, "percentage": 14.54, "elapsed_time": "0:07:26", "remaining_time": "0:43:44", "throughput": 6055.59, "total_tokens": 2705344}
|
|
{"current_steps": 5515, "total_steps": 37885, "loss": 0.0201, "lr": 1.9873954709961956e-06, "epoch": 0.7278606308565395, "percentage": 14.56, "elapsed_time": "0:07:27", "remaining_time": "0:43:44", "throughput": 6056.08, "total_tokens": 2707520}
|
|
{"current_steps": 5520, "total_steps": 37885, "loss": 0.1724, "lr": 1.987322450371608e-06, "epoch": 0.728520522634288, "percentage": 14.57, "elapsed_time": "0:07:27", "remaining_time": "0:43:43", "throughput": 6056.92, "total_tokens": 2709888}
|
|
{"current_steps": 5525, "total_steps": 37885, "loss": 0.2705, "lr": 1.9872492201949807e-06, "epoch": 0.7291804144120364, "percentage": 14.58, "elapsed_time": "0:07:27", "remaining_time": "0:43:42", "throughput": 6057.64, "total_tokens": 2712192}
|
|
{"current_steps": 5530, "total_steps": 37885, "loss": 0.0019, "lr": 1.9871757804818546e-06, "epoch": 0.7298403061897849, "percentage": 14.6, "elapsed_time": "0:07:28", "remaining_time": "0:43:41", "throughput": 6058.09, "total_tokens": 2714368}
|
|
{"current_steps": 5535, "total_steps": 37885, "loss": 0.1082, "lr": 1.9871021312478183e-06, "epoch": 0.7305001979675333, "percentage": 14.61, "elapsed_time": "0:07:28", "remaining_time": "0:43:40", "throughput": 6058.68, "total_tokens": 2716608}
|
|
{"current_steps": 5540, "total_steps": 37885, "loss": 0.0082, "lr": 1.9870282725085025e-06, "epoch": 0.7311600897452818, "percentage": 14.62, "elapsed_time": "0:07:28", "remaining_time": "0:43:39", "throughput": 6058.86, "total_tokens": 2718656}
|
|
{"current_steps": 5545, "total_steps": 37885, "loss": 0.104, "lr": 1.9869542042795832e-06, "epoch": 0.7318199815230302, "percentage": 14.64, "elapsed_time": "0:07:29", "remaining_time": "0:43:38", "throughput": 6059.98, "total_tokens": 2721152}
|
|
{"current_steps": 5550, "total_steps": 37885, "loss": 0.0037, "lr": 1.9868799265767814e-06, "epoch": 0.7324798733007787, "percentage": 14.65, "elapsed_time": "0:07:29", "remaining_time": "0:43:38", "throughput": 6060.32, "total_tokens": 2723264}
|
|
{"current_steps": 5555, "total_steps": 37885, "loss": 0.268, "lr": 1.986805439415861e-06, "epoch": 0.7331397650785271, "percentage": 14.66, "elapsed_time": "0:07:29", "remaining_time": "0:43:37", "throughput": 6061.05, "total_tokens": 2725568}
|
|
{"current_steps": 5560, "total_steps": 37885, "loss": 0.1503, "lr": 1.9867307428126327e-06, "epoch": 0.7337996568562756, "percentage": 14.68, "elapsed_time": "0:07:30", "remaining_time": "0:43:36", "throughput": 6062.43, "total_tokens": 2728192}
|
|
{"current_steps": 5565, "total_steps": 37885, "loss": 0.2448, "lr": 1.9866558367829493e-06, "epoch": 0.7344595486340241, "percentage": 14.69, "elapsed_time": "0:07:30", "remaining_time": "0:43:35", "throughput": 6064.3, "total_tokens": 2731072}
|
|
{"current_steps": 5570, "total_steps": 37885, "loss": 0.123, "lr": 1.986580721342709e-06, "epoch": 0.7351194404117725, "percentage": 14.7, "elapsed_time": "0:07:30", "remaining_time": "0:43:34", "throughput": 6065.12, "total_tokens": 2733440}
|
|
{"current_steps": 5575, "total_steps": 37885, "loss": 0.1279, "lr": 1.986505396507855e-06, "epoch": 0.735779332189521, "percentage": 14.72, "elapsed_time": "0:07:31", "remaining_time": "0:43:33", "throughput": 6066.49, "total_tokens": 2736064}
|
|
{"current_steps": 5580, "total_steps": 37885, "loss": 0.0323, "lr": 1.9864298622943747e-06, "epoch": 0.7364392239672694, "percentage": 14.73, "elapsed_time": "0:07:31", "remaining_time": "0:43:33", "throughput": 6067.46, "total_tokens": 2738496}
|
|
{"current_steps": 5585, "total_steps": 37885, "loss": 0.0531, "lr": 1.986354118718299e-06, "epoch": 0.7370991157450179, "percentage": 14.74, "elapsed_time": "0:07:31", "remaining_time": "0:43:32", "throughput": 6068.2, "total_tokens": 2740800}
|
|
{"current_steps": 5590, "total_steps": 37885, "loss": 0.0734, "lr": 1.9862781657957043e-06, "epoch": 0.7377590075227662, "percentage": 14.76, "elapsed_time": "0:07:31", "remaining_time": "0:43:31", "throughput": 6068.89, "total_tokens": 2743104}
|
|
{"current_steps": 5595, "total_steps": 37885, "loss": 0.164, "lr": 1.986202003542711e-06, "epoch": 0.7384188993005147, "percentage": 14.77, "elapsed_time": "0:07:32", "remaining_time": "0:43:30", "throughput": 6069.5, "total_tokens": 2745344}
|
|
{"current_steps": 5600, "total_steps": 37885, "loss": 0.0798, "lr": 1.9861256319754836e-06, "epoch": 0.7390787910782631, "percentage": 14.78, "elapsed_time": "0:07:32", "remaining_time": "0:43:29", "throughput": 6069.93, "total_tokens": 2747520}
|
|
{"current_steps": 5605, "total_steps": 37885, "loss": 0.0556, "lr": 1.986049051110232e-06, "epoch": 0.7397386828560116, "percentage": 14.79, "elapsed_time": "0:07:32", "remaining_time": "0:43:28", "throughput": 6071.02, "total_tokens": 2750016}
|
|
{"current_steps": 5610, "total_steps": 37885, "loss": 0.165, "lr": 1.9859722609632097e-06, "epoch": 0.74039857463376, "percentage": 14.81, "elapsed_time": "0:07:33", "remaining_time": "0:43:27", "throughput": 6072.5, "total_tokens": 2752704}
|
|
{"current_steps": 5615, "total_steps": 37885, "loss": 0.1732, "lr": 1.985895261550715e-06, "epoch": 0.7410584664115085, "percentage": 14.82, "elapsed_time": "0:07:33", "remaining_time": "0:43:27", "throughput": 6073.8, "total_tokens": 2755328}
|
|
{"current_steps": 5620, "total_steps": 37885, "loss": 0.1728, "lr": 1.9858180528890898e-06, "epoch": 0.7417183581892569, "percentage": 14.83, "elapsed_time": "0:07:33", "remaining_time": "0:43:26", "throughput": 6074.45, "total_tokens": 2757632}
|
|
{"current_steps": 5625, "total_steps": 37885, "loss": 0.0655, "lr": 1.985740634994722e-06, "epoch": 0.7423782499670054, "percentage": 14.85, "elapsed_time": "0:07:34", "remaining_time": "0:43:25", "throughput": 6075.7, "total_tokens": 2760192}
|
|
{"current_steps": 5630, "total_steps": 37885, "loss": 0.0018, "lr": 1.985663007884043e-06, "epoch": 0.7430381417447539, "percentage": 14.86, "elapsed_time": "0:07:34", "remaining_time": "0:43:24", "throughput": 6077.03, "total_tokens": 2762816}
|
|
{"current_steps": 5635, "total_steps": 37885, "loss": 0.0711, "lr": 1.9855851715735275e-06, "epoch": 0.7436980335225023, "percentage": 14.87, "elapsed_time": "0:07:34", "remaining_time": "0:43:23", "throughput": 6077.71, "total_tokens": 2765120}
|
|
{"current_steps": 5640, "total_steps": 37885, "loss": 0.0933, "lr": 1.985507126079697e-06, "epoch": 0.7443579253002508, "percentage": 14.89, "elapsed_time": "0:07:35", "remaining_time": "0:43:23", "throughput": 6079.11, "total_tokens": 2767808}
|
|
{"current_steps": 5645, "total_steps": 37885, "loss": 0.0009, "lr": 1.985428871419115e-06, "epoch": 0.7450178170779992, "percentage": 14.9, "elapsed_time": "0:07:35", "remaining_time": "0:43:22", "throughput": 6079.95, "total_tokens": 2770176}
|
|
{"current_steps": 5650, "total_steps": 37885, "loss": 0.1552, "lr": 1.9853504076083914e-06, "epoch": 0.7456777088557477, "percentage": 14.91, "elapsed_time": "0:07:35", "remaining_time": "0:43:21", "throughput": 6081.04, "total_tokens": 2772672}
|
|
{"current_steps": 5655, "total_steps": 37885, "loss": 0.1258, "lr": 1.985271734664179e-06, "epoch": 0.7463376006334961, "percentage": 14.93, "elapsed_time": "0:07:36", "remaining_time": "0:43:20", "throughput": 6081.99, "total_tokens": 2775104}
|
|
{"current_steps": 5660, "total_steps": 37885, "loss": 0.3175, "lr": 1.985192852603175e-06, "epoch": 0.7469974924112446, "percentage": 14.94, "elapsed_time": "0:07:36", "remaining_time": "0:43:19", "throughput": 6083.45, "total_tokens": 2777792}
|
|
{"current_steps": 5665, "total_steps": 37885, "loss": 0.2089, "lr": 1.9851137614421234e-06, "epoch": 0.747657384188993, "percentage": 14.95, "elapsed_time": "0:07:36", "remaining_time": "0:43:18", "throughput": 6084.78, "total_tokens": 2780416}
|
|
{"current_steps": 5670, "total_steps": 37885, "loss": 0.0021, "lr": 1.9850344611978085e-06, "epoch": 0.7483172759667415, "percentage": 14.97, "elapsed_time": "0:07:37", "remaining_time": "0:43:18", "throughput": 6086.49, "total_tokens": 2783232}
|
|
{"current_steps": 5675, "total_steps": 37885, "loss": 0.1406, "lr": 1.984954951887063e-06, "epoch": 0.7489771677444899, "percentage": 14.98, "elapsed_time": "0:07:37", "remaining_time": "0:43:17", "throughput": 6087.46, "total_tokens": 2785664}
|
|
{"current_steps": 5680, "total_steps": 37885, "loss": 0.0632, "lr": 1.984875233526761e-06, "epoch": 0.7496370595222384, "percentage": 14.99, "elapsed_time": "0:07:37", "remaining_time": "0:43:16", "throughput": 6088.61, "total_tokens": 2788224}
|
|
{"current_steps": 5685, "total_steps": 37885, "loss": 0.0028, "lr": 1.984795306133823e-06, "epoch": 0.7502969512999867, "percentage": 15.01, "elapsed_time": "0:07:38", "remaining_time": "0:43:15", "throughput": 6089.57, "total_tokens": 2790656}
|
|
{"current_steps": 5685, "total_steps": 37885, "eval_loss": 0.09698151051998138, "epoch": 0.7502969512999867, "percentage": 15.01, "elapsed_time": "0:07:46", "remaining_time": "0:43:59", "throughput": 5987.4, "total_tokens": 2790656}
|
|
{"current_steps": 5690, "total_steps": 37885, "loss": 0.0287, "lr": 1.984715169725212e-06, "epoch": 0.7509568430777352, "percentage": 15.02, "elapsed_time": "0:08:25", "remaining_time": "0:47:40", "throughput": 5525.46, "total_tokens": 2792960}
|
|
{"current_steps": 5695, "total_steps": 37885, "loss": 0.0862, "lr": 1.9846348243179373e-06, "epoch": 0.7516167348554837, "percentage": 15.03, "elapsed_time": "0:08:25", "remaining_time": "0:47:38", "throughput": 5527.11, "total_tokens": 2795648}
|
|
{"current_steps": 5700, "total_steps": 37885, "loss": 0.0883, "lr": 1.9845542699290516e-06, "epoch": 0.7522766266332321, "percentage": 15.05, "elapsed_time": "0:08:26", "remaining_time": "0:47:37", "throughput": 5527.58, "total_tokens": 2797696}
|
|
{"current_steps": 5705, "total_steps": 37885, "loss": 0.1298, "lr": 1.9844735065756513e-06, "epoch": 0.7529365184109806, "percentage": 15.06, "elapsed_time": "0:08:26", "remaining_time": "0:47:36", "throughput": 5528.88, "total_tokens": 2800192}
|
|
{"current_steps": 5710, "total_steps": 37885, "loss": 0.0658, "lr": 1.984392534274878e-06, "epoch": 0.753596410188729, "percentage": 15.07, "elapsed_time": "0:08:26", "remaining_time": "0:47:35", "throughput": 5529.93, "total_tokens": 2802560}
|
|
{"current_steps": 5715, "total_steps": 37885, "loss": 0.2382, "lr": 1.9843113530439184e-06, "epoch": 0.7542563019664775, "percentage": 15.09, "elapsed_time": "0:08:27", "remaining_time": "0:47:34", "throughput": 5531.1, "total_tokens": 2804992}
|
|
{"current_steps": 5720, "total_steps": 37885, "loss": 0.2144, "lr": 1.9842299629000014e-06, "epoch": 0.7549161937442259, "percentage": 15.1, "elapsed_time": "0:08:27", "remaining_time": "0:47:33", "throughput": 5532.08, "total_tokens": 2807296}
|
|
{"current_steps": 5725, "total_steps": 37885, "loss": 0.1445, "lr": 1.9841483638604025e-06, "epoch": 0.7555760855219744, "percentage": 15.11, "elapsed_time": "0:08:27", "remaining_time": "0:47:32", "throughput": 5533.78, "total_tokens": 2809984}
|
|
{"current_steps": 5730, "total_steps": 37885, "loss": 0.0021, "lr": 1.9840665559424395e-06, "epoch": 0.7562359772997228, "percentage": 15.12, "elapsed_time": "0:08:28", "remaining_time": "0:47:31", "throughput": 5535.52, "total_tokens": 2812736}
|
|
{"current_steps": 5735, "total_steps": 37885, "loss": 0.1602, "lr": 1.9839845391634764e-06, "epoch": 0.7568958690774713, "percentage": 15.14, "elapsed_time": "0:08:28", "remaining_time": "0:47:30", "throughput": 5536.5, "total_tokens": 2815040}
|
|
{"current_steps": 5740, "total_steps": 37885, "loss": 0.1313, "lr": 1.9839023135409203e-06, "epoch": 0.7575557608552197, "percentage": 15.15, "elapsed_time": "0:08:28", "remaining_time": "0:47:29", "throughput": 5537.45, "total_tokens": 2817344}
|
|
{"current_steps": 5745, "total_steps": 37885, "loss": 0.0919, "lr": 1.983819879092223e-06, "epoch": 0.7582156526329682, "percentage": 15.16, "elapsed_time": "0:08:29", "remaining_time": "0:47:28", "throughput": 5538.4, "total_tokens": 2819648}
|
|
{"current_steps": 5750, "total_steps": 37885, "loss": 0.2254, "lr": 1.9837372358348804e-06, "epoch": 0.7588755444107167, "percentage": 15.18, "elapsed_time": "0:08:29", "remaining_time": "0:47:27", "throughput": 5540.22, "total_tokens": 2822464}
|
|
{"current_steps": 5755, "total_steps": 37885, "loss": 0.1121, "lr": 1.9836543837864332e-06, "epoch": 0.7595354361884651, "percentage": 15.19, "elapsed_time": "0:08:29", "remaining_time": "0:47:26", "throughput": 5541.38, "total_tokens": 2824896}
|
|
{"current_steps": 5760, "total_steps": 37885, "loss": 0.1378, "lr": 1.9835713229644663e-06, "epoch": 0.7601953279662136, "percentage": 15.2, "elapsed_time": "0:08:30", "remaining_time": "0:47:25", "throughput": 5543.17, "total_tokens": 2827648}
|
|
{"current_steps": 5765, "total_steps": 37885, "loss": 0.1264, "lr": 1.983488053386608e-06, "epoch": 0.760855219743962, "percentage": 15.22, "elapsed_time": "0:08:30", "remaining_time": "0:47:23", "throughput": 5544.83, "total_tokens": 2830336}
|
|
{"current_steps": 5770, "total_steps": 37885, "loss": 0.039, "lr": 1.983404575070533e-06, "epoch": 0.7615151115217105, "percentage": 15.23, "elapsed_time": "0:08:30", "remaining_time": "0:47:22", "throughput": 5545.79, "total_tokens": 2832640}
|
|
{"current_steps": 5775, "total_steps": 37885, "loss": 0.0268, "lr": 1.9833208880339576e-06, "epoch": 0.7621750032994589, "percentage": 15.24, "elapsed_time": "0:08:31", "remaining_time": "0:47:21", "throughput": 5546.63, "total_tokens": 2834880}
|
|
{"current_steps": 5780, "total_steps": 37885, "loss": 0.1555, "lr": 1.983236992294645e-06, "epoch": 0.7628348950772074, "percentage": 15.26, "elapsed_time": "0:08:31", "remaining_time": "0:47:20", "throughput": 5548.02, "total_tokens": 2837440}
|
|
{"current_steps": 5785, "total_steps": 37885, "loss": 0.1095, "lr": 1.9831528878704003e-06, "epoch": 0.7634947868549558, "percentage": 15.27, "elapsed_time": "0:08:31", "remaining_time": "0:47:19", "throughput": 5549.08, "total_tokens": 2839808}
|
|
{"current_steps": 5790, "total_steps": 37885, "loss": 0.1398, "lr": 1.983068574779075e-06, "epoch": 0.7641546786327043, "percentage": 15.28, "elapsed_time": "0:08:32", "remaining_time": "0:47:18", "throughput": 5550.61, "total_tokens": 2842432}
|
|
{"current_steps": 5795, "total_steps": 37885, "loss": 0.1598, "lr": 1.9829840530385633e-06, "epoch": 0.7648145704104526, "percentage": 15.3, "elapsed_time": "0:08:32", "remaining_time": "0:47:17", "throughput": 5552.26, "total_tokens": 2845120}
|
|
{"current_steps": 5800, "total_steps": 37885, "loss": 0.0721, "lr": 1.9828993226668046e-06, "epoch": 0.7654744621882011, "percentage": 15.31, "elapsed_time": "0:08:32", "remaining_time": "0:47:16", "throughput": 5554.23, "total_tokens": 2848000}
|
|
{"current_steps": 5805, "total_steps": 37885, "loss": 0.1805, "lr": 1.982814383681782e-06, "epoch": 0.7661343539659495, "percentage": 15.32, "elapsed_time": "0:08:33", "remaining_time": "0:47:15", "throughput": 5555.72, "total_tokens": 2850624}
|
|
{"current_steps": 5810, "total_steps": 37885, "loss": 0.1815, "lr": 1.9827292361015235e-06, "epoch": 0.766794245743698, "percentage": 15.34, "elapsed_time": "0:08:33", "remaining_time": "0:47:14", "throughput": 5556.77, "total_tokens": 2852992}
|
|
{"current_steps": 5815, "total_steps": 37885, "loss": 0.0437, "lr": 1.9826438799441016e-06, "epoch": 0.7674541375214465, "percentage": 15.35, "elapsed_time": "0:08:33", "remaining_time": "0:47:13", "throughput": 5557.93, "total_tokens": 2855424}
|
|
{"current_steps": 5820, "total_steps": 37885, "loss": 0.147, "lr": 1.982558315227631e-06, "epoch": 0.7681140292991949, "percentage": 15.36, "elapsed_time": "0:08:34", "remaining_time": "0:47:12", "throughput": 5559.31, "total_tokens": 2857984}
|
|
{"current_steps": 5825, "total_steps": 37885, "loss": 0.0712, "lr": 1.982472541970274e-06, "epoch": 0.7687739210769434, "percentage": 15.38, "elapsed_time": "0:08:34", "remaining_time": "0:47:11", "throughput": 5560.94, "total_tokens": 2860672}
|
|
{"current_steps": 5830, "total_steps": 37885, "loss": 0.21, "lr": 1.9823865601902337e-06, "epoch": 0.7694338128546918, "percentage": 15.39, "elapsed_time": "0:08:34", "remaining_time": "0:47:10", "throughput": 5561.95, "total_tokens": 2863040}
|
|
{"current_steps": 5835, "total_steps": 37885, "loss": 0.1239, "lr": 1.9823003699057607e-06, "epoch": 0.7700937046324403, "percentage": 15.4, "elapsed_time": "0:08:35", "remaining_time": "0:47:09", "throughput": 5563.79, "total_tokens": 2865856}
|
|
{"current_steps": 5840, "total_steps": 37885, "loss": 0.1, "lr": 1.9822139711351465e-06, "epoch": 0.7707535964101887, "percentage": 15.42, "elapsed_time": "0:08:35", "remaining_time": "0:47:08", "throughput": 5564.58, "total_tokens": 2868096}
|
|
{"current_steps": 5845, "total_steps": 37885, "loss": 0.0024, "lr": 1.9821273638967304e-06, "epoch": 0.7714134881879372, "percentage": 15.43, "elapsed_time": "0:08:35", "remaining_time": "0:47:07", "throughput": 5566.21, "total_tokens": 2870784}
|
|
{"current_steps": 5850, "total_steps": 37885, "loss": 0.0828, "lr": 1.9820405482088927e-06, "epoch": 0.7720733799656856, "percentage": 15.44, "elapsed_time": "0:08:36", "remaining_time": "0:47:06", "throughput": 5567.37, "total_tokens": 2873216}
|
|
{"current_steps": 5855, "total_steps": 37885, "loss": 0.001, "lr": 1.9819535240900606e-06, "epoch": 0.7727332717434341, "percentage": 15.45, "elapsed_time": "0:08:36", "remaining_time": "0:47:05", "throughput": 5568.75, "total_tokens": 2875776}
|
|
{"current_steps": 5860, "total_steps": 37885, "loss": 0.0624, "lr": 1.9818662915587036e-06, "epoch": 0.7733931635211825, "percentage": 15.47, "elapsed_time": "0:08:36", "remaining_time": "0:47:04", "throughput": 5570.13, "total_tokens": 2878336}
|
|
{"current_steps": 5865, "total_steps": 37885, "loss": 0.2229, "lr": 1.981778850633336e-06, "epoch": 0.774053055298931, "percentage": 15.48, "elapsed_time": "0:08:37", "remaining_time": "0:47:02", "throughput": 5571.5, "total_tokens": 2880896}
|
|
{"current_steps": 5870, "total_steps": 37885, "loss": 0.0652, "lr": 1.981691201332517e-06, "epoch": 0.7747129470766794, "percentage": 15.49, "elapsed_time": "0:08:37", "remaining_time": "0:47:01", "throughput": 5573.2, "total_tokens": 2883648}
|
|
{"current_steps": 5875, "total_steps": 37885, "loss": 0.0585, "lr": 1.9816033436748495e-06, "epoch": 0.7753728388544279, "percentage": 15.51, "elapsed_time": "0:08:37", "remaining_time": "0:47:00", "throughput": 5574.1, "total_tokens": 2885952}
|
|
{"current_steps": 5880, "total_steps": 37885, "loss": 0.079, "lr": 1.98151527767898e-06, "epoch": 0.7760327306321764, "percentage": 15.52, "elapsed_time": "0:08:38", "remaining_time": "0:46:59", "throughput": 5575.6, "total_tokens": 2888576}
|
|
{"current_steps": 5885, "total_steps": 37885, "loss": 0.1387, "lr": 1.981427003363601e-06, "epoch": 0.7766926224099248, "percentage": 15.53, "elapsed_time": "0:08:38", "remaining_time": "0:46:58", "throughput": 5576.99, "total_tokens": 2891136}
|
|
{"current_steps": 5890, "total_steps": 37885, "loss": 0.1429, "lr": 1.9813385207474472e-06, "epoch": 0.7773525141876733, "percentage": 15.55, "elapsed_time": "0:08:38", "remaining_time": "0:46:57", "throughput": 5578.38, "total_tokens": 2893696}
|
|
{"current_steps": 5895, "total_steps": 37885, "loss": 0.0546, "lr": 1.981249829849299e-06, "epoch": 0.7780124059654216, "percentage": 15.56, "elapsed_time": "0:08:39", "remaining_time": "0:46:56", "throughput": 5580.16, "total_tokens": 2896512}
|
|
{"current_steps": 5900, "total_steps": 37885, "loss": 0.1847, "lr": 1.9811609306879798e-06, "epoch": 0.7786722977431701, "percentage": 15.57, "elapsed_time": "0:08:39", "remaining_time": "0:46:55", "throughput": 5581.43, "total_tokens": 2899008}
|
|
{"current_steps": 5905, "total_steps": 37885, "loss": 0.0416, "lr": 1.9810718232823584e-06, "epoch": 0.7793321895209185, "percentage": 15.59, "elapsed_time": "0:08:39", "remaining_time": "0:46:54", "throughput": 5582.49, "total_tokens": 2901376}
|
|
{"current_steps": 5910, "total_steps": 37885, "loss": 0.2391, "lr": 1.9809825076513462e-06, "epoch": 0.779992081298667, "percentage": 15.6, "elapsed_time": "0:08:40", "remaining_time": "0:46:53", "throughput": 5583.78, "total_tokens": 2903872}
|
|
{"current_steps": 5915, "total_steps": 37885, "loss": 0.0021, "lr": 1.980892983813901e-06, "epoch": 0.7806519730764154, "percentage": 15.61, "elapsed_time": "0:08:40", "remaining_time": "0:46:52", "throughput": 5584.81, "total_tokens": 2906240}
|
|
{"current_steps": 5920, "total_steps": 37885, "loss": 0.1206, "lr": 1.980803251789023e-06, "epoch": 0.7813118648541639, "percentage": 15.63, "elapsed_time": "0:08:40", "remaining_time": "0:46:51", "throughput": 5586.08, "total_tokens": 2908736}
|
|
{"current_steps": 5925, "total_steps": 37885, "loss": 0.1592, "lr": 1.980713311595757e-06, "epoch": 0.7819717566319123, "percentage": 15.64, "elapsed_time": "0:08:41", "remaining_time": "0:46:50", "throughput": 5587.1, "total_tokens": 2911104}
|
|
{"current_steps": 5930, "total_steps": 37885, "loss": 0.0998, "lr": 1.980623163253192e-06, "epoch": 0.7826316484096608, "percentage": 15.65, "elapsed_time": "0:08:41", "remaining_time": "0:46:49", "throughput": 5588.15, "total_tokens": 2913472}
|
|
{"current_steps": 5935, "total_steps": 37885, "loss": 0.1875, "lr": 1.9805328067804626e-06, "epoch": 0.7832915401874093, "percentage": 15.67, "elapsed_time": "0:08:41", "remaining_time": "0:46:48", "throughput": 5589.16, "total_tokens": 2915840}
|
|
{"current_steps": 5940, "total_steps": 37885, "loss": 0.0014, "lr": 1.980442242196745e-06, "epoch": 0.7839514319651577, "percentage": 15.68, "elapsed_time": "0:08:42", "remaining_time": "0:46:47", "throughput": 5590.08, "total_tokens": 2918144}
|
|
{"current_steps": 5945, "total_steps": 37885, "loss": 0.1515, "lr": 1.9803514695212613e-06, "epoch": 0.7846113237429062, "percentage": 15.69, "elapsed_time": "0:08:42", "remaining_time": "0:46:46", "throughput": 5591.59, "total_tokens": 2920768}
|
|
{"current_steps": 5950, "total_steps": 37885, "loss": 0.093, "lr": 1.9802604887732773e-06, "epoch": 0.7852712155206546, "percentage": 15.71, "elapsed_time": "0:08:42", "remaining_time": "0:46:45", "throughput": 5592.59, "total_tokens": 2923136}
|
|
{"current_steps": 5955, "total_steps": 37885, "loss": 0.1338, "lr": 1.980169299972103e-06, "epoch": 0.7859311072984031, "percentage": 15.72, "elapsed_time": "0:08:43", "remaining_time": "0:46:44", "throughput": 5593.73, "total_tokens": 2925568}
|
|
{"current_steps": 5960, "total_steps": 37885, "loss": 0.0132, "lr": 1.980077903137093e-06, "epoch": 0.7865909990761515, "percentage": 15.73, "elapsed_time": "0:08:43", "remaining_time": "0:46:43", "throughput": 5594.9, "total_tokens": 2928064}
|
|
{"current_steps": 5965, "total_steps": 37885, "loss": 0.1477, "lr": 1.979986298287645e-06, "epoch": 0.7872508908539, "percentage": 15.75, "elapsed_time": "0:08:43", "remaining_time": "0:46:42", "throughput": 5595.71, "total_tokens": 2930368}
|
|
{"current_steps": 5970, "total_steps": 37885, "loss": 0.0939, "lr": 1.979894485443201e-06, "epoch": 0.7879107826316484, "percentage": 15.76, "elapsed_time": "0:08:44", "remaining_time": "0:46:41", "throughput": 5597.05, "total_tokens": 2932928}
|
|
{"current_steps": 5975, "total_steps": 37885, "loss": 0.2729, "lr": 1.9798024646232495e-06, "epoch": 0.7885706744093969, "percentage": 15.77, "elapsed_time": "0:08:44", "remaining_time": "0:46:40", "throughput": 5598.19, "total_tokens": 2935360}
|
|
{"current_steps": 5980, "total_steps": 37885, "loss": 0.0693, "lr": 1.9797102358473195e-06, "epoch": 0.7892305661871453, "percentage": 15.78, "elapsed_time": "0:08:44", "remaining_time": "0:46:39", "throughput": 5599.46, "total_tokens": 2937920}
|
|
{"current_steps": 5985, "total_steps": 37885, "loss": 0.2579, "lr": 1.979617799134986e-06, "epoch": 0.7898904579648938, "percentage": 15.8, "elapsed_time": "0:08:45", "remaining_time": "0:46:38", "throughput": 5600.38, "total_tokens": 2940224}
|
|
{"current_steps": 5990, "total_steps": 37885, "loss": 0.0073, "lr": 1.979525154505869e-06, "epoch": 0.7905503497426422, "percentage": 15.81, "elapsed_time": "0:08:45", "remaining_time": "0:46:37", "throughput": 5601.82, "total_tokens": 2942848}
|
|
{"current_steps": 5995, "total_steps": 37885, "loss": 0.1256, "lr": 1.979432301979631e-06, "epoch": 0.7912102415203907, "percentage": 15.82, "elapsed_time": "0:08:45", "remaining_time": "0:46:36", "throughput": 5603.07, "total_tokens": 2945344}
|
|
{"current_steps": 6000, "total_steps": 37885, "loss": 0.0084, "lr": 1.9793392415759796e-06, "epoch": 0.7918701332981392, "percentage": 15.84, "elapsed_time": "0:08:46", "remaining_time": "0:46:35", "throughput": 5604.25, "total_tokens": 2947840}
|
|
{"current_steps": 6005, "total_steps": 37885, "loss": 0.1543, "lr": 1.979245973314666e-06, "epoch": 0.7925300250758875, "percentage": 15.85, "elapsed_time": "0:08:46", "remaining_time": "0:46:34", "throughput": 5605.11, "total_tokens": 2950144}
|
|
{"current_steps": 6010, "total_steps": 37885, "loss": 0.0351, "lr": 1.9791524972154856e-06, "epoch": 0.793189916853636, "percentage": 15.86, "elapsed_time": "0:08:46", "remaining_time": "0:46:33", "throughput": 5605.88, "total_tokens": 2952384}
|
|
{"current_steps": 6015, "total_steps": 37885, "loss": 0.2565, "lr": 1.979058813298278e-06, "epoch": 0.7938498086313844, "percentage": 15.88, "elapsed_time": "0:08:46", "remaining_time": "0:46:32", "throughput": 5607.48, "total_tokens": 2955136}
|
|
{"current_steps": 6020, "total_steps": 37885, "loss": 0.0011, "lr": 1.978964921582927e-06, "epoch": 0.7945097004091329, "percentage": 15.89, "elapsed_time": "0:08:47", "remaining_time": "0:46:31", "throughput": 5609.04, "total_tokens": 2957824}
|
|
{"current_steps": 6025, "total_steps": 37885, "loss": 0.063, "lr": 1.9788708220893608e-06, "epoch": 0.7951695921868813, "percentage": 15.9, "elapsed_time": "0:08:47", "remaining_time": "0:46:30", "throughput": 5610.12, "total_tokens": 2960256}
|
|
{"current_steps": 6030, "total_steps": 37885, "loss": 0.19, "lr": 1.9787765148375506e-06, "epoch": 0.7958294839646298, "percentage": 15.92, "elapsed_time": "0:08:48", "remaining_time": "0:46:30", "throughput": 5608.27, "total_tokens": 2962944}
|
|
{"current_steps": 6035, "total_steps": 37885, "loss": 0.2955, "lr": 1.978681999847513e-06, "epoch": 0.7964893757423782, "percentage": 15.93, "elapsed_time": "0:08:48", "remaining_time": "0:46:29", "throughput": 5609.61, "total_tokens": 2965504}
|
|
{"current_steps": 6040, "total_steps": 37885, "loss": 0.3805, "lr": 1.9785872771393084e-06, "epoch": 0.7971492675201267, "percentage": 15.94, "elapsed_time": "0:08:48", "remaining_time": "0:46:28", "throughput": 5610.34, "total_tokens": 2967744}
|
|
{"current_steps": 6045, "total_steps": 37885, "loss": 0.0549, "lr": 1.9784923467330403e-06, "epoch": 0.7978091592978751, "percentage": 15.96, "elapsed_time": "0:08:49", "remaining_time": "0:46:27", "throughput": 5611.46, "total_tokens": 2970240}
|
|
{"current_steps": 6050, "total_steps": 37885, "loss": 0.2836, "lr": 1.9783972086488573e-06, "epoch": 0.7984690510756236, "percentage": 15.97, "elapsed_time": "0:08:49", "remaining_time": "0:46:27", "throughput": 5612.94, "total_tokens": 2972928}
|
|
{"current_steps": 6055, "total_steps": 37885, "loss": 0.0879, "lr": 1.9783018629069516e-06, "epoch": 0.799128942853372, "percentage": 15.98, "elapsed_time": "0:08:49", "remaining_time": "0:46:26", "throughput": 5613.68, "total_tokens": 2975168}
|
|
{"current_steps": 6060, "total_steps": 37885, "loss": 0.1121, "lr": 1.97820630952756e-06, "epoch": 0.7997888346311205, "percentage": 16.0, "elapsed_time": "0:08:50", "remaining_time": "0:46:25", "throughput": 5614.4, "total_tokens": 2977408}
|
|
{"current_steps": 6065, "total_steps": 37885, "loss": 0.0025, "lr": 1.978110548530963e-06, "epoch": 0.800448726408869, "percentage": 16.01, "elapsed_time": "0:08:50", "remaining_time": "0:46:24", "throughput": 5615.68, "total_tokens": 2979968}
|
|
{"current_steps": 6070, "total_steps": 37885, "loss": 0.2776, "lr": 1.9780145799374846e-06, "epoch": 0.8011086181866174, "percentage": 16.02, "elapsed_time": "0:08:50", "remaining_time": "0:46:23", "throughput": 5616.91, "total_tokens": 2982528}
|
|
{"current_steps": 6075, "total_steps": 37885, "loss": 0.0743, "lr": 1.977918403767494e-06, "epoch": 0.8017685099643659, "percentage": 16.04, "elapsed_time": "0:08:51", "remaining_time": "0:46:22", "throughput": 5617.75, "total_tokens": 2984832}
|
|
{"current_steps": 6080, "total_steps": 37885, "loss": 0.0383, "lr": 1.9778220200414036e-06, "epoch": 0.8024284017421143, "percentage": 16.05, "elapsed_time": "0:08:51", "remaining_time": "0:46:21", "throughput": 5618.93, "total_tokens": 2987328}
|
|
{"current_steps": 6085, "total_steps": 37885, "loss": 0.0037, "lr": 1.9777254287796706e-06, "epoch": 0.8030882935198628, "percentage": 16.06, "elapsed_time": "0:08:51", "remaining_time": "0:46:20", "throughput": 5619.94, "total_tokens": 2989760}
|
|
{"current_steps": 6090, "total_steps": 37885, "loss": 0.0012, "lr": 1.9776286300027954e-06, "epoch": 0.8037481852976112, "percentage": 16.07, "elapsed_time": "0:08:52", "remaining_time": "0:46:19", "throughput": 5621.22, "total_tokens": 2992320}
|
|
{"current_steps": 6095, "total_steps": 37885, "loss": 0.1335, "lr": 1.9775316237313225e-06, "epoch": 0.8044080770753597, "percentage": 16.09, "elapsed_time": "0:08:52", "remaining_time": "0:46:18", "throughput": 5622.91, "total_tokens": 2995136}
|
|
{"current_steps": 6100, "total_steps": 37885, "loss": 0.0003, "lr": 1.977434409985842e-06, "epoch": 0.805067968853108, "percentage": 16.1, "elapsed_time": "0:08:53", "remaining_time": "0:46:17", "throughput": 5624.71, "total_tokens": 2998016}
|
|
{"current_steps": 6105, "total_steps": 37885, "loss": 0.2727, "lr": 1.977336988786985e-06, "epoch": 0.8057278606308566, "percentage": 16.11, "elapsed_time": "0:08:53", "remaining_time": "0:46:16", "throughput": 5626.4, "total_tokens": 3000832}
|
|
{"current_steps": 6110, "total_steps": 37885, "loss": 0.2474, "lr": 1.97723936015543e-06, "epoch": 0.8063877524086049, "percentage": 16.13, "elapsed_time": "0:08:53", "remaining_time": "0:46:15", "throughput": 5627.97, "total_tokens": 3003584}
|
|
{"current_steps": 6115, "total_steps": 37885, "loss": 0.0878, "lr": 1.9771415241118972e-06, "epoch": 0.8070476441863534, "percentage": 16.14, "elapsed_time": "0:08:54", "remaining_time": "0:46:14", "throughput": 5629.8, "total_tokens": 3006464}
|
|
{"current_steps": 6120, "total_steps": 37885, "loss": 0.1026, "lr": 1.9770434806771525e-06, "epoch": 0.8077075359641019, "percentage": 16.15, "elapsed_time": "0:08:54", "remaining_time": "0:46:13", "throughput": 5630.88, "total_tokens": 3008896}
|
|
{"current_steps": 6125, "total_steps": 37885, "loss": 0.0518, "lr": 1.976945229872003e-06, "epoch": 0.8083674277418503, "percentage": 16.17, "elapsed_time": "0:08:54", "remaining_time": "0:46:12", "throughput": 5632.03, "total_tokens": 3011392}
|
|
{"current_steps": 6130, "total_steps": 37885, "loss": 0.2063, "lr": 1.976846771717304e-06, "epoch": 0.8090273195195988, "percentage": 16.18, "elapsed_time": "0:08:55", "remaining_time": "0:46:11", "throughput": 5633.39, "total_tokens": 3014016}
|
|
{"current_steps": 6135, "total_steps": 37885, "loss": 0.1909, "lr": 1.9767481062339512e-06, "epoch": 0.8096872112973472, "percentage": 16.19, "elapsed_time": "0:08:55", "remaining_time": "0:46:10", "throughput": 5634.63, "total_tokens": 3016576}
|
|
{"current_steps": 6140, "total_steps": 37885, "loss": 0.0153, "lr": 1.976649233442886e-06, "epoch": 0.8103471030750957, "percentage": 16.21, "elapsed_time": "0:08:55", "remaining_time": "0:46:09", "throughput": 5635.68, "total_tokens": 3019008}
|
|
{"current_steps": 6145, "total_steps": 37885, "loss": 0.0678, "lr": 1.976550153365093e-06, "epoch": 0.8110069948528441, "percentage": 16.22, "elapsed_time": "0:08:56", "remaining_time": "0:46:08", "throughput": 5636.84, "total_tokens": 3021504}
|
|
{"current_steps": 6150, "total_steps": 37885, "loss": 0.0594, "lr": 1.9764508660216018e-06, "epoch": 0.8116668866305926, "percentage": 16.23, "elapsed_time": "0:08:56", "remaining_time": "0:46:07", "throughput": 5637.19, "total_tokens": 3023552}
|
|
{"current_steps": 6155, "total_steps": 37885, "loss": 0.1778, "lr": 1.976351371433485e-06, "epoch": 0.812326778408341, "percentage": 16.25, "elapsed_time": "0:08:56", "remaining_time": "0:46:06", "throughput": 5638.01, "total_tokens": 3025856}
|
|
{"current_steps": 6160, "total_steps": 37885, "loss": 0.2057, "lr": 1.9762516696218598e-06, "epoch": 0.8129866701860895, "percentage": 16.26, "elapsed_time": "0:08:57", "remaining_time": "0:46:05", "throughput": 5638.69, "total_tokens": 3028096}
|
|
{"current_steps": 6165, "total_steps": 37885, "loss": 0.3517, "lr": 1.9761517606078873e-06, "epoch": 0.8136465619638379, "percentage": 16.27, "elapsed_time": "0:08:57", "remaining_time": "0:46:04", "throughput": 5639.66, "total_tokens": 3030528}
|
|
{"current_steps": 6170, "total_steps": 37885, "loss": 0.2465, "lr": 1.9760516444127722e-06, "epoch": 0.8143064537415864, "percentage": 16.29, "elapsed_time": "0:08:57", "remaining_time": "0:46:03", "throughput": 5640.91, "total_tokens": 3033088}
|
|
{"current_steps": 6175, "total_steps": 37885, "loss": 0.0653, "lr": 1.975951321057764e-06, "epoch": 0.8149663455193348, "percentage": 16.3, "elapsed_time": "0:08:58", "remaining_time": "0:46:02", "throughput": 5641.34, "total_tokens": 3035200}
|
|
{"current_steps": 6180, "total_steps": 37885, "loss": 0.0755, "lr": 1.975850790564155e-06, "epoch": 0.8156262372970833, "percentage": 16.31, "elapsed_time": "0:08:58", "remaining_time": "0:46:01", "throughput": 5642.48, "total_tokens": 3037696}
|
|
{"current_steps": 6185, "total_steps": 37885, "loss": 0.1064, "lr": 1.9757500529532817e-06, "epoch": 0.8162861290748318, "percentage": 16.33, "elapsed_time": "0:08:58", "remaining_time": "0:46:00", "throughput": 5643.52, "total_tokens": 3040128}
|
|
{"current_steps": 6190, "total_steps": 37885, "loss": 0.1667, "lr": 1.975649108246526e-06, "epoch": 0.8169460208525802, "percentage": 16.34, "elapsed_time": "0:08:59", "remaining_time": "0:46:00", "throughput": 5644.54, "total_tokens": 3042560}
|
|
{"current_steps": 6195, "total_steps": 37885, "loss": 0.2541, "lr": 1.9755479564653123e-06, "epoch": 0.8176059126303287, "percentage": 16.35, "elapsed_time": "0:08:59", "remaining_time": "0:45:59", "throughput": 5645.24, "total_tokens": 3044800}
|
|
{"current_steps": 6200, "total_steps": 37885, "loss": 0.124, "lr": 1.975446597631109e-06, "epoch": 0.8182658044080771, "percentage": 16.37, "elapsed_time": "0:08:59", "remaining_time": "0:45:58", "throughput": 5645.96, "total_tokens": 3047040}
|
|
{"current_steps": 6205, "total_steps": 37885, "loss": 0.064, "lr": 1.975345031765429e-06, "epoch": 0.8189256961858256, "percentage": 16.38, "elapsed_time": "0:09:00", "remaining_time": "0:45:57", "throughput": 5647.19, "total_tokens": 3049600}
|
|
{"current_steps": 6210, "total_steps": 37885, "loss": 0.1276, "lr": 1.975243258889829e-06, "epoch": 0.819585587963574, "percentage": 16.39, "elapsed_time": "0:09:00", "remaining_time": "0:45:56", "throughput": 5648.83, "total_tokens": 3052416}
|
|
{"current_steps": 6215, "total_steps": 37885, "loss": 0.0928, "lr": 1.9751412790259093e-06, "epoch": 0.8202454797413224, "percentage": 16.4, "elapsed_time": "0:09:00", "remaining_time": "0:45:55", "throughput": 5650.16, "total_tokens": 3055040}
|
|
{"current_steps": 6220, "total_steps": 37885, "loss": 0.0983, "lr": 1.9750390921953144e-06, "epoch": 0.8209053715190708, "percentage": 16.42, "elapsed_time": "0:09:01", "remaining_time": "0:45:54", "throughput": 5651.84, "total_tokens": 3057856}
|
|
{"current_steps": 6225, "total_steps": 37885, "loss": 0.2008, "lr": 1.9749366984197335e-06, "epoch": 0.8215652632968193, "percentage": 16.43, "elapsed_time": "0:09:01", "remaining_time": "0:45:53", "throughput": 5652.64, "total_tokens": 3060160}
|
|
{"current_steps": 6230, "total_steps": 37885, "loss": 0.1972, "lr": 1.9748340977208975e-06, "epoch": 0.8222251550745677, "percentage": 16.44, "elapsed_time": "0:09:01", "remaining_time": "0:45:52", "throughput": 5653.6, "total_tokens": 3062592}
|
|
{"current_steps": 6235, "total_steps": 37885, "loss": 0.0591, "lr": 1.9747312901205837e-06, "epoch": 0.8228850468523162, "percentage": 16.46, "elapsed_time": "0:09:02", "remaining_time": "0:45:51", "throughput": 5654.68, "total_tokens": 3065088}
|
|
{"current_steps": 6240, "total_steps": 37885, "loss": 0.0013, "lr": 1.9746282756406126e-06, "epoch": 0.8235449386300646, "percentage": 16.47, "elapsed_time": "0:09:02", "remaining_time": "0:45:50", "throughput": 5655.95, "total_tokens": 3067712}
|
|
{"current_steps": 6245, "total_steps": 37885, "loss": 0.1508, "lr": 1.974525054302847e-06, "epoch": 0.8242048304078131, "percentage": 16.48, "elapsed_time": "0:09:02", "remaining_time": "0:45:49", "throughput": 5656.94, "total_tokens": 3070144}
|
|
{"current_steps": 6250, "total_steps": 37885, "loss": 0.2101, "lr": 1.974421626129196e-06, "epoch": 0.8248647221855616, "percentage": 16.5, "elapsed_time": "0:09:03", "remaining_time": "0:45:48", "throughput": 5657.69, "total_tokens": 3072448}
|
|
{"current_steps": 6255, "total_steps": 37885, "loss": 0.1979, "lr": 1.9743179911416104e-06, "epoch": 0.82552461396331, "percentage": 16.51, "elapsed_time": "0:09:03", "remaining_time": "0:45:47", "throughput": 5658.95, "total_tokens": 3075072}
|
|
{"current_steps": 6260, "total_steps": 37885, "loss": 0.1248, "lr": 1.9742141493620876e-06, "epoch": 0.8261845057410585, "percentage": 16.52, "elapsed_time": "0:09:03", "remaining_time": "0:45:46", "throughput": 5659.7, "total_tokens": 3077376}
|
|
{"current_steps": 6265, "total_steps": 37885, "loss": 0.2122, "lr": 1.9741101008126655e-06, "epoch": 0.8268443975188069, "percentage": 16.54, "elapsed_time": "0:09:04", "remaining_time": "0:45:45", "throughput": 5660.71, "total_tokens": 3079808}
|
|
{"current_steps": 6270, "total_steps": 37885, "loss": 0.0026, "lr": 1.974005845515429e-06, "epoch": 0.8275042892965554, "percentage": 16.55, "elapsed_time": "0:09:04", "remaining_time": "0:45:45", "throughput": 5662.21, "total_tokens": 3082560}
|
|
{"current_steps": 6275, "total_steps": 37885, "loss": 0.1156, "lr": 1.9739013834925047e-06, "epoch": 0.8281641810743038, "percentage": 16.56, "elapsed_time": "0:09:04", "remaining_time": "0:45:44", "throughput": 5662.49, "total_tokens": 3084608}
|
|
{"current_steps": 6280, "total_steps": 37885, "loss": 0.3242, "lr": 1.973796714766064e-06, "epoch": 0.8288240728520523, "percentage": 16.58, "elapsed_time": "0:09:05", "remaining_time": "0:45:43", "throughput": 5663.55, "total_tokens": 3087104}
|
|
{"current_steps": 6285, "total_steps": 37885, "loss": 0.0915, "lr": 1.973691839358323e-06, "epoch": 0.8294839646298007, "percentage": 16.59, "elapsed_time": "0:09:05", "remaining_time": "0:45:42", "throughput": 5664.25, "total_tokens": 3089408}
|
|
{"current_steps": 6290, "total_steps": 37885, "loss": 0.054, "lr": 1.973586757291539e-06, "epoch": 0.8301438564075492, "percentage": 16.6, "elapsed_time": "0:09:05", "remaining_time": "0:45:41", "throughput": 5665.11, "total_tokens": 3091776}
|
|
{"current_steps": 6295, "total_steps": 37885, "loss": 0.1558, "lr": 1.973481468588017e-06, "epoch": 0.8308037481852976, "percentage": 16.62, "elapsed_time": "0:09:06", "remaining_time": "0:45:40", "throughput": 5666.1, "total_tokens": 3094208}
|
|
{"current_steps": 6300, "total_steps": 37885, "loss": 0.0414, "lr": 1.973375973270102e-06, "epoch": 0.8314636399630461, "percentage": 16.63, "elapsed_time": "0:09:06", "remaining_time": "0:45:39", "throughput": 5667.34, "total_tokens": 3096768}
|
|
{"current_steps": 6305, "total_steps": 37885, "loss": 0.0018, "lr": 1.973270271360185e-06, "epoch": 0.8321235317407946, "percentage": 16.64, "elapsed_time": "0:09:06", "remaining_time": "0:45:38", "throughput": 5668.74, "total_tokens": 3099456}
|
|
{"current_steps": 6310, "total_steps": 37885, "loss": 0.1685, "lr": 1.9731643628807014e-06, "epoch": 0.832783423518543, "percentage": 16.66, "elapsed_time": "0:09:07", "remaining_time": "0:45:37", "throughput": 5670.24, "total_tokens": 3102208}
|
|
{"current_steps": 6315, "total_steps": 37885, "loss": 0.0576, "lr": 1.973058247854129e-06, "epoch": 0.8334433152962915, "percentage": 16.67, "elapsed_time": "0:09:07", "remaining_time": "0:45:36", "throughput": 5671.63, "total_tokens": 3104896}
|
|
{"current_steps": 6320, "total_steps": 37885, "loss": 0.1591, "lr": 1.9729519263029895e-06, "epoch": 0.8341032070740398, "percentage": 16.68, "elapsed_time": "0:09:07", "remaining_time": "0:45:35", "throughput": 5672.98, "total_tokens": 3107520}
|
|
{"current_steps": 6325, "total_steps": 37885, "loss": 0.0462, "lr": 1.972845398249849e-06, "epoch": 0.8347630988517883, "percentage": 16.7, "elapsed_time": "0:09:08", "remaining_time": "0:45:34", "throughput": 5674.37, "total_tokens": 3110144}
|
|
{"current_steps": 6330, "total_steps": 37885, "loss": 0.0463, "lr": 1.972738663717318e-06, "epoch": 0.8354229906295367, "percentage": 16.71, "elapsed_time": "0:09:08", "remaining_time": "0:45:33", "throughput": 5675.74, "total_tokens": 3112768}
|
|
{"current_steps": 6335, "total_steps": 37885, "loss": 0.0003, "lr": 1.9726317227280494e-06, "epoch": 0.8360828824072852, "percentage": 16.72, "elapsed_time": "0:09:08", "remaining_time": "0:45:33", "throughput": 5676.97, "total_tokens": 3115328}
|
|
{"current_steps": 6340, "total_steps": 37885, "loss": 0.0213, "lr": 1.972524575304741e-06, "epoch": 0.8367427741850336, "percentage": 16.73, "elapsed_time": "0:09:09", "remaining_time": "0:45:32", "throughput": 5678.21, "total_tokens": 3117888}
|
|
{"current_steps": 6345, "total_steps": 37885, "loss": 0.0118, "lr": 1.972417221470134e-06, "epoch": 0.8374026659627821, "percentage": 16.75, "elapsed_time": "0:09:09", "remaining_time": "0:45:31", "throughput": 5679.33, "total_tokens": 3120384}
|
|
{"current_steps": 6350, "total_steps": 37885, "loss": 0.1584, "lr": 1.972309661247013e-06, "epoch": 0.8380625577405305, "percentage": 16.76, "elapsed_time": "0:09:09", "remaining_time": "0:45:30", "throughput": 5680.63, "total_tokens": 3123008}
|
|
{"current_steps": 6355, "total_steps": 37885, "loss": 0.0573, "lr": 1.9722018946582075e-06, "epoch": 0.838722449518279, "percentage": 16.77, "elapsed_time": "0:09:10", "remaining_time": "0:45:29", "throughput": 5681.76, "total_tokens": 3125504}
|
|
{"current_steps": 6360, "total_steps": 37885, "loss": 0.0681, "lr": 1.9720939217265904e-06, "epoch": 0.8393823412960274, "percentage": 16.79, "elapsed_time": "0:09:10", "remaining_time": "0:45:28", "throughput": 5682.47, "total_tokens": 3127744}
|
|
{"current_steps": 6365, "total_steps": 37885, "loss": 0.1754, "lr": 1.9719857424750776e-06, "epoch": 0.8400422330737759, "percentage": 16.8, "elapsed_time": "0:09:10", "remaining_time": "0:45:27", "throughput": 5683.26, "total_tokens": 3130048}
|
|
{"current_steps": 6370, "total_steps": 37885, "loss": 0.0619, "lr": 1.971877356926629e-06, "epoch": 0.8407021248515244, "percentage": 16.81, "elapsed_time": "0:09:11", "remaining_time": "0:45:26", "throughput": 5684.28, "total_tokens": 3132480}
|
|
{"current_steps": 6375, "total_steps": 37885, "loss": 0.136, "lr": 1.9717687651042494e-06, "epoch": 0.8413620166292728, "percentage": 16.83, "elapsed_time": "0:09:11", "remaining_time": "0:45:25", "throughput": 5685.65, "total_tokens": 3135104}
|
|
{"current_steps": 6380, "total_steps": 37885, "loss": 0.2398, "lr": 1.971659967030987e-06, "epoch": 0.8420219084070213, "percentage": 16.84, "elapsed_time": "0:09:11", "remaining_time": "0:45:24", "throughput": 5686.32, "total_tokens": 3137344}
|
|
{"current_steps": 6385, "total_steps": 37885, "loss": 0.2223, "lr": 1.9715509627299324e-06, "epoch": 0.8426818001847697, "percentage": 16.85, "elapsed_time": "0:09:12", "remaining_time": "0:45:23", "throughput": 5687.9, "total_tokens": 3140096}
|
|
{"current_steps": 6390, "total_steps": 37885, "loss": 0.1451, "lr": 1.971441752224221e-06, "epoch": 0.8433416919625182, "percentage": 16.87, "elapsed_time": "0:09:12", "remaining_time": "0:45:22", "throughput": 5688.69, "total_tokens": 3142400}
|
|
{"current_steps": 6395, "total_steps": 37885, "loss": 0.0571, "lr": 1.971332335537033e-06, "epoch": 0.8440015837402666, "percentage": 16.88, "elapsed_time": "0:09:12", "remaining_time": "0:45:21", "throughput": 5689.17, "total_tokens": 3144512}
|
|
{"current_steps": 6400, "total_steps": 37885, "loss": 0.1166, "lr": 1.97122271269159e-06, "epoch": 0.8446614755180151, "percentage": 16.89, "elapsed_time": "0:09:13", "remaining_time": "0:45:20", "throughput": 5690.21, "total_tokens": 3146944}
|
|
{"current_steps": 6405, "total_steps": 37885, "loss": 0.1062, "lr": 1.97111288371116e-06, "epoch": 0.8453213672957635, "percentage": 16.91, "elapsed_time": "0:09:13", "remaining_time": "0:45:19", "throughput": 5691.27, "total_tokens": 3149376}
|
|
{"current_steps": 6410, "total_steps": 37885, "loss": 0.1249, "lr": 1.9710028486190524e-06, "epoch": 0.845981259073512, "percentage": 16.92, "elapsed_time": "0:09:13", "remaining_time": "0:45:18", "throughput": 5692.17, "total_tokens": 3151744}
|
|
{"current_steps": 6415, "total_steps": 37885, "loss": 0.039, "lr": 1.970892607438621e-06, "epoch": 0.8466411508512603, "percentage": 16.93, "elapsed_time": "0:09:14", "remaining_time": "0:45:17", "throughput": 5693.07, "total_tokens": 3154112}
|
|
{"current_steps": 6420, "total_steps": 37885, "loss": 0.0129, "lr": 1.970782160193265e-06, "epoch": 0.8473010426290088, "percentage": 16.95, "elapsed_time": "0:09:14", "remaining_time": "0:45:16", "throughput": 5693.98, "total_tokens": 3156480}
|
|
{"current_steps": 6425, "total_steps": 37885, "loss": 0.2154, "lr": 1.970671506906425e-06, "epoch": 0.8479609344067572, "percentage": 16.96, "elapsed_time": "0:09:14", "remaining_time": "0:45:15", "throughput": 5694.77, "total_tokens": 3158784}
|
|
{"current_steps": 6430, "total_steps": 37885, "loss": 0.1681, "lr": 1.970560647601587e-06, "epoch": 0.8486208261845057, "percentage": 16.97, "elapsed_time": "0:09:15", "remaining_time": "0:45:15", "throughput": 5695.66, "total_tokens": 3161152}
|
|
{"current_steps": 6435, "total_steps": 37885, "loss": 0.0015, "lr": 1.9704495823022797e-06, "epoch": 0.8492807179622542, "percentage": 16.99, "elapsed_time": "0:09:15", "remaining_time": "0:45:14", "throughput": 5696.99, "total_tokens": 3163776}
|
|
{"current_steps": 6440, "total_steps": 37885, "loss": 0.1335, "lr": 1.970338311032076e-06, "epoch": 0.8499406097400026, "percentage": 17.0, "elapsed_time": "0:09:15", "remaining_time": "0:45:13", "throughput": 5698.12, "total_tokens": 3166272}
|
|
{"current_steps": 6445, "total_steps": 37885, "loss": 0.1466, "lr": 1.970226833814592e-06, "epoch": 0.8506005015177511, "percentage": 17.01, "elapsed_time": "0:09:16", "remaining_time": "0:45:12", "throughput": 5698.98, "total_tokens": 3168640}
|
|
{"current_steps": 6450, "total_steps": 37885, "loss": 0.072, "lr": 1.970115150673489e-06, "epoch": 0.8512603932954995, "percentage": 17.03, "elapsed_time": "0:09:16", "remaining_time": "0:45:11", "throughput": 5699.91, "total_tokens": 3171008}
|
|
{"current_steps": 6455, "total_steps": 37885, "loss": 0.0785, "lr": 1.97000326163247e-06, "epoch": 0.851920285073248, "percentage": 17.04, "elapsed_time": "0:09:16", "remaining_time": "0:45:10", "throughput": 5700.71, "total_tokens": 3173312}
|
|
{"current_steps": 6460, "total_steps": 37885, "loss": 0.1788, "lr": 1.969891166715283e-06, "epoch": 0.8525801768509964, "percentage": 17.05, "elapsed_time": "0:09:16", "remaining_time": "0:45:09", "throughput": 5701.81, "total_tokens": 3175808}
|
|
{"current_steps": 6465, "total_steps": 37885, "loss": 0.1182, "lr": 1.969778865945719e-06, "epoch": 0.8532400686287449, "percentage": 17.06, "elapsed_time": "0:09:17", "remaining_time": "0:45:08", "throughput": 5702.47, "total_tokens": 3178048}
|
|
{"current_steps": 6470, "total_steps": 37885, "loss": 0.0031, "lr": 1.969666359347614e-06, "epoch": 0.8538999604064933, "percentage": 17.08, "elapsed_time": "0:09:17", "remaining_time": "0:45:07", "throughput": 5703.56, "total_tokens": 3180544}
|
|
{"current_steps": 6475, "total_steps": 37885, "loss": 0.268, "lr": 1.969553646944845e-06, "epoch": 0.8545598521842418, "percentage": 17.09, "elapsed_time": "0:09:17", "remaining_time": "0:45:06", "throughput": 5704.65, "total_tokens": 3183040}
|
|
{"current_steps": 6480, "total_steps": 37885, "loss": 0.0905, "lr": 1.969440728761336e-06, "epoch": 0.8552197439619902, "percentage": 17.1, "elapsed_time": "0:09:18", "remaining_time": "0:45:05", "throughput": 5705.96, "total_tokens": 3185664}
|
|
{"current_steps": 6485, "total_steps": 37885, "loss": 0.1175, "lr": 1.9693276048210524e-06, "epoch": 0.8558796357397387, "percentage": 17.12, "elapsed_time": "0:09:18", "remaining_time": "0:45:04", "throughput": 5707.86, "total_tokens": 3188672}
|
|
{"current_steps": 6490, "total_steps": 37885, "loss": 0.0078, "lr": 1.969214275148004e-06, "epoch": 0.8565395275174872, "percentage": 17.13, "elapsed_time": "0:09:18", "remaining_time": "0:45:04", "throughput": 5708.98, "total_tokens": 3191168}
|
|
{"current_steps": 6495, "total_steps": 37885, "loss": 0.2481, "lr": 1.9691007397662444e-06, "epoch": 0.8571994192952356, "percentage": 17.14, "elapsed_time": "0:09:19", "remaining_time": "0:45:03", "throughput": 5710.1, "total_tokens": 3193664}
|
|
{"current_steps": 6500, "total_steps": 37885, "loss": 0.0524, "lr": 1.96898699869987e-06, "epoch": 0.8578593110729841, "percentage": 17.16, "elapsed_time": "0:09:19", "remaining_time": "0:45:02", "throughput": 5711.3, "total_tokens": 3196224}
|
|
{"current_steps": 6505, "total_steps": 37885, "loss": 0.1735, "lr": 1.968873051973022e-06, "epoch": 0.8585192028507325, "percentage": 17.17, "elapsed_time": "0:09:19", "remaining_time": "0:45:01", "throughput": 5712.49, "total_tokens": 3198784}
|
|
{"current_steps": 6510, "total_steps": 37885, "loss": 0.1, "lr": 1.968758899609885e-06, "epoch": 0.859179094628481, "percentage": 17.18, "elapsed_time": "0:09:20", "remaining_time": "0:45:00", "throughput": 5713.91, "total_tokens": 3201472}
|
|
{"current_steps": 6515, "total_steps": 37885, "loss": 0.0387, "lr": 1.9686445416346866e-06, "epoch": 0.8598389864062294, "percentage": 17.2, "elapsed_time": "0:09:20", "remaining_time": "0:44:59", "throughput": 5714.36, "total_tokens": 3203584}
|
|
{"current_steps": 6520, "total_steps": 37885, "loss": 0.16, "lr": 1.9685299780716988e-06, "epoch": 0.8604988781839779, "percentage": 17.21, "elapsed_time": "0:09:20", "remaining_time": "0:44:58", "throughput": 5715.14, "total_tokens": 3205888}
|
|
{"current_steps": 6525, "total_steps": 37885, "loss": 0.2118, "lr": 1.968415208945237e-06, "epoch": 0.8611587699617262, "percentage": 17.22, "elapsed_time": "0:09:21", "remaining_time": "0:44:57", "throughput": 5715.58, "total_tokens": 3208000}
|
|
{"current_steps": 6530, "total_steps": 37885, "loss": 0.0009, "lr": 1.9683002342796594e-06, "epoch": 0.8618186617394747, "percentage": 17.24, "elapsed_time": "0:09:21", "remaining_time": "0:44:56", "throughput": 5716.25, "total_tokens": 3210240}
|
|
{"current_steps": 6535, "total_steps": 37885, "loss": 0.0847, "lr": 1.9681850540993687e-06, "epoch": 0.8624785535172231, "percentage": 17.25, "elapsed_time": "0:09:21", "remaining_time": "0:44:55", "throughput": 5717.22, "total_tokens": 3212672}
|
|
{"current_steps": 6540, "total_steps": 37885, "loss": 0.2278, "lr": 1.9680696684288116e-06, "epoch": 0.8631384452949716, "percentage": 17.26, "elapsed_time": "0:09:22", "remaining_time": "0:44:54", "throughput": 5718.6, "total_tokens": 3215360}
|
|
{"current_steps": 6545, "total_steps": 37885, "loss": 0.1291, "lr": 1.9679540772924773e-06, "epoch": 0.86379833707272, "percentage": 17.28, "elapsed_time": "0:09:22", "remaining_time": "0:44:53", "throughput": 5720.09, "total_tokens": 3218112}
|
|
{"current_steps": 6550, "total_steps": 37885, "loss": 0.0677, "lr": 1.9678382807149e-06, "epoch": 0.8644582288504685, "percentage": 17.29, "elapsed_time": "0:09:22", "remaining_time": "0:44:53", "throughput": 5720.63, "total_tokens": 3220288}
|
|
{"current_steps": 6555, "total_steps": 37885, "loss": 0.0061, "lr": 1.967722278720656e-06, "epoch": 0.865118120628217, "percentage": 17.3, "elapsed_time": "0:09:23", "remaining_time": "0:44:52", "throughput": 5722.01, "total_tokens": 3222976}
|
|
{"current_steps": 6560, "total_steps": 37885, "loss": 0.087, "lr": 1.967606071334366e-06, "epoch": 0.8657780124059654, "percentage": 17.32, "elapsed_time": "0:09:23", "remaining_time": "0:44:51", "throughput": 5723.1, "total_tokens": 3225472}
|
|
{"current_steps": 6565, "total_steps": 37885, "loss": 0.2098, "lr": 1.9674896585806938e-06, "epoch": 0.8664379041837139, "percentage": 17.33, "elapsed_time": "0:09:23", "remaining_time": "0:44:50", "throughput": 5724.41, "total_tokens": 3228096}
|
|
{"current_steps": 6570, "total_steps": 37885, "loss": 0.1914, "lr": 1.967373040484348e-06, "epoch": 0.8670977959614623, "percentage": 17.34, "elapsed_time": "0:09:24", "remaining_time": "0:44:49", "throughput": 5725.71, "total_tokens": 3230720}
|
|
{"current_steps": 6575, "total_steps": 37885, "loss": 0.1312, "lr": 1.9672562170700794e-06, "epoch": 0.8677576877392108, "percentage": 17.36, "elapsed_time": "0:09:24", "remaining_time": "0:44:48", "throughput": 5726.58, "total_tokens": 3233088}
|
|
{"current_steps": 6580, "total_steps": 37885, "loss": 0.139, "lr": 1.967139188362683e-06, "epoch": 0.8684175795169592, "percentage": 17.37, "elapsed_time": "0:09:24", "remaining_time": "0:44:47", "throughput": 5727.89, "total_tokens": 3235712}
|
|
{"current_steps": 6585, "total_steps": 37885, "loss": 0.1531, "lr": 1.9670219543869977e-06, "epoch": 0.8690774712947077, "percentage": 17.38, "elapsed_time": "0:09:25", "remaining_time": "0:44:46", "throughput": 5729.48, "total_tokens": 3238528}
|
|
{"current_steps": 6590, "total_steps": 37885, "loss": 0.1389, "lr": 1.9669045151679045e-06, "epoch": 0.8697373630724561, "percentage": 17.39, "elapsed_time": "0:09:25", "remaining_time": "0:44:45", "throughput": 5730.35, "total_tokens": 3240896}
|
|
{"current_steps": 6595, "total_steps": 37885, "loss": 0.0033, "lr": 1.9667868707303304e-06, "epoch": 0.8703972548502046, "percentage": 17.41, "elapsed_time": "0:09:25", "remaining_time": "0:44:44", "throughput": 5731.41, "total_tokens": 3243392}
|
|
{"current_steps": 6600, "total_steps": 37885, "loss": 0.0405, "lr": 1.966669021099244e-06, "epoch": 0.871057146627953, "percentage": 17.42, "elapsed_time": "0:09:26", "remaining_time": "0:44:43", "throughput": 5732.39, "total_tokens": 3245824}
|
|
{"current_steps": 6605, "total_steps": 37885, "loss": 0.002, "lr": 1.966550966299657e-06, "epoch": 0.8717170384057015, "percentage": 17.43, "elapsed_time": "0:09:26", "remaining_time": "0:44:43", "throughput": 5733.14, "total_tokens": 3248128}
|
|
{"current_steps": 6610, "total_steps": 37885, "loss": 0.2562, "lr": 1.9664327063566273e-06, "epoch": 0.8723769301834499, "percentage": 17.45, "elapsed_time": "0:09:26", "remaining_time": "0:44:42", "throughput": 5734.23, "total_tokens": 3250624}
|
|
{"current_steps": 6615, "total_steps": 37885, "loss": 0.1405, "lr": 1.966314241295254e-06, "epoch": 0.8730368219611984, "percentage": 17.46, "elapsed_time": "0:09:27", "remaining_time": "0:44:41", "throughput": 5735.62, "total_tokens": 3253312}
|
|
{"current_steps": 6620, "total_steps": 37885, "loss": 0.1581, "lr": 1.9661955711406808e-06, "epoch": 0.8736967137389469, "percentage": 17.47, "elapsed_time": "0:09:27", "remaining_time": "0:44:40", "throughput": 5736.18, "total_tokens": 3255488}
|
|
{"current_steps": 6625, "total_steps": 37885, "loss": 0.0712, "lr": 1.966076695918094e-06, "epoch": 0.8743566055166953, "percentage": 17.49, "elapsed_time": "0:09:27", "remaining_time": "0:44:39", "throughput": 5736.74, "total_tokens": 3257664}
|
|
{"current_steps": 6630, "total_steps": 37885, "loss": 0.0422, "lr": 1.9659576156527236e-06, "epoch": 0.8750164972944438, "percentage": 17.5, "elapsed_time": "0:09:28", "remaining_time": "0:44:38", "throughput": 5737.79, "total_tokens": 3260160}
|
|
{"current_steps": 6635, "total_steps": 37885, "loss": 0.15, "lr": 1.965838330369845e-06, "epoch": 0.8756763890721921, "percentage": 17.51, "elapsed_time": "0:09:28", "remaining_time": "0:44:37", "throughput": 5738.64, "total_tokens": 3262528}
|
|
{"current_steps": 6640, "total_steps": 37885, "loss": 0.099, "lr": 1.9657188400947748e-06, "epoch": 0.8763362808499406, "percentage": 17.53, "elapsed_time": "0:09:28", "remaining_time": "0:44:36", "throughput": 5739.72, "total_tokens": 3265024}
|
|
{"current_steps": 6645, "total_steps": 37885, "loss": 0.2838, "lr": 1.965599144852874e-06, "epoch": 0.876996172627689, "percentage": 17.54, "elapsed_time": "0:09:29", "remaining_time": "0:44:35", "throughput": 5740.67, "total_tokens": 3267456}
|
|
{"current_steps": 6650, "total_steps": 37885, "loss": 0.0717, "lr": 1.9654792446695467e-06, "epoch": 0.8776560644054375, "percentage": 17.55, "elapsed_time": "0:09:29", "remaining_time": "0:44:34", "throughput": 5742.14, "total_tokens": 3270208}
|
|
{"current_steps": 6655, "total_steps": 37885, "loss": 0.1191, "lr": 1.9653591395702408e-06, "epoch": 0.8783159561831859, "percentage": 17.57, "elapsed_time": "0:09:29", "remaining_time": "0:44:34", "throughput": 5743.62, "total_tokens": 3272960}
|
|
{"current_steps": 6660, "total_steps": 37885, "loss": 0.1331, "lr": 1.9652388295804484e-06, "epoch": 0.8789758479609344, "percentage": 17.58, "elapsed_time": "0:09:30", "remaining_time": "0:44:33", "throughput": 5744.17, "total_tokens": 3275136}
|
|
{"current_steps": 6665, "total_steps": 37885, "loss": 0.2028, "lr": 1.9651183147257046e-06, "epoch": 0.8796357397386828, "percentage": 17.59, "elapsed_time": "0:09:30", "remaining_time": "0:44:32", "throughput": 5745.36, "total_tokens": 3277696}
|
|
{"current_steps": 6670, "total_steps": 37885, "loss": 0.1612, "lr": 1.964997595031587e-06, "epoch": 0.8802956315164313, "percentage": 17.61, "elapsed_time": "0:09:30", "remaining_time": "0:44:31", "throughput": 5746.21, "total_tokens": 3280064}
|
|
{"current_steps": 6675, "total_steps": 37885, "loss": 0.185, "lr": 1.964876670523718e-06, "epoch": 0.8809555232941798, "percentage": 17.62, "elapsed_time": "0:09:31", "remaining_time": "0:44:30", "throughput": 5746.84, "total_tokens": 3282304}
|
|
{"current_steps": 6680, "total_steps": 37885, "loss": 0.102, "lr": 1.9647555412277623e-06, "epoch": 0.8816154150719282, "percentage": 17.63, "elapsed_time": "0:09:31", "remaining_time": "0:44:29", "throughput": 5747.78, "total_tokens": 3284736}
|
|
{"current_steps": 6685, "total_steps": 37885, "loss": 0.0322, "lr": 1.9646342071694298e-06, "epoch": 0.8822753068496767, "percentage": 17.65, "elapsed_time": "0:09:31", "remaining_time": "0:44:28", "throughput": 5748.73, "total_tokens": 3287168}
|
|
{"current_steps": 6690, "total_steps": 37885, "loss": 0.1026, "lr": 1.9645126683744718e-06, "epoch": 0.8829351986274251, "percentage": 17.66, "elapsed_time": "0:09:32", "remaining_time": "0:44:27", "throughput": 5749.69, "total_tokens": 3289600}
|
|
{"current_steps": 6695, "total_steps": 37885, "loss": 0.0023, "lr": 1.9643909248686847e-06, "epoch": 0.8835950904051736, "percentage": 17.67, "elapsed_time": "0:09:32", "remaining_time": "0:44:26", "throughput": 5750.87, "total_tokens": 3292160}
|
|
{"current_steps": 6700, "total_steps": 37885, "loss": 0.1903, "lr": 1.964268976677907e-06, "epoch": 0.884254982182922, "percentage": 17.69, "elapsed_time": "0:09:32", "remaining_time": "0:44:26", "throughput": 5751.82, "total_tokens": 3294592}
|
|
{"current_steps": 6705, "total_steps": 37885, "loss": 0.0425, "lr": 1.964146823828022e-06, "epoch": 0.8849148739606705, "percentage": 17.7, "elapsed_time": "0:09:33", "remaining_time": "0:44:25", "throughput": 5752.66, "total_tokens": 3296960}
|
|
{"current_steps": 6710, "total_steps": 37885, "loss": 0.035, "lr": 1.9640244663449548e-06, "epoch": 0.8855747657384189, "percentage": 17.71, "elapsed_time": "0:09:33", "remaining_time": "0:44:24", "throughput": 5753.32, "total_tokens": 3299200}
|
|
{"current_steps": 6715, "total_steps": 37885, "loss": 0.2501, "lr": 1.963901904254676e-06, "epoch": 0.8862346575161674, "percentage": 17.72, "elapsed_time": "0:09:33", "remaining_time": "0:44:23", "throughput": 5754.19, "total_tokens": 3301568}
|
|
{"current_steps": 6720, "total_steps": 37885, "loss": 0.1129, "lr": 1.963779137583198e-06, "epoch": 0.8868945492939158, "percentage": 17.74, "elapsed_time": "0:09:34", "remaining_time": "0:44:22", "throughput": 5755.23, "total_tokens": 3304064}
|
|
{"current_steps": 6725, "total_steps": 37885, "loss": 0.1272, "lr": 1.963656166356577e-06, "epoch": 0.8875544410716643, "percentage": 17.75, "elapsed_time": "0:09:34", "remaining_time": "0:44:21", "throughput": 5756.09, "total_tokens": 3306432}
|
|
{"current_steps": 6730, "total_steps": 37885, "loss": 0.1033, "lr": 1.9635329906009135e-06, "epoch": 0.8882143328494126, "percentage": 17.76, "elapsed_time": "0:09:34", "remaining_time": "0:44:20", "throughput": 5756.83, "total_tokens": 3308736}
|
|
{"current_steps": 6735, "total_steps": 37885, "loss": 0.0311, "lr": 1.96340961034235e-06, "epoch": 0.8888742246271611, "percentage": 17.78, "elapsed_time": "0:09:35", "remaining_time": "0:44:19", "throughput": 5757.77, "total_tokens": 3311168}
|
|
{"current_steps": 6740, "total_steps": 37885, "loss": 0.1654, "lr": 1.9632860256070727e-06, "epoch": 0.8895341164049096, "percentage": 17.79, "elapsed_time": "0:09:35", "remaining_time": "0:44:18", "throughput": 5758.79, "total_tokens": 3313664}
|
|
{"current_steps": 6745, "total_steps": 37885, "loss": 0.1481, "lr": 1.9631622364213124e-06, "epoch": 0.890194008182658, "percentage": 17.8, "elapsed_time": "0:09:35", "remaining_time": "0:44:18", "throughput": 5759.94, "total_tokens": 3316224}
|
|
{"current_steps": 6750, "total_steps": 37885, "loss": 0.0998, "lr": 1.9630382428113416e-06, "epoch": 0.8908538999604065, "percentage": 17.82, "elapsed_time": "0:09:36", "remaining_time": "0:44:17", "throughput": 5760.55, "total_tokens": 3318464}
|
|
{"current_steps": 6755, "total_steps": 37885, "loss": 0.0018, "lr": 1.962914044803478e-06, "epoch": 0.8915137917381549, "percentage": 17.83, "elapsed_time": "0:09:36", "remaining_time": "0:44:16", "throughput": 5761.46, "total_tokens": 3320896}
|
|
{"current_steps": 6760, "total_steps": 37885, "loss": 0.1516, "lr": 1.9627896424240814e-06, "epoch": 0.8921736835159034, "percentage": 17.84, "elapsed_time": "0:09:36", "remaining_time": "0:44:15", "throughput": 5762.87, "total_tokens": 3323648}
|
|
{"current_steps": 6765, "total_steps": 37885, "loss": 0.2309, "lr": 1.9626650356995545e-06, "epoch": 0.8928335752936518, "percentage": 17.86, "elapsed_time": "0:09:37", "remaining_time": "0:44:14", "throughput": 5764.0, "total_tokens": 3326208}
|
|
{"current_steps": 6770, "total_steps": 37885, "loss": 0.1373, "lr": 1.9625402246563456e-06, "epoch": 0.8934934670714003, "percentage": 17.87, "elapsed_time": "0:09:37", "remaining_time": "0:44:13", "throughput": 5764.86, "total_tokens": 3328576}
|
|
{"current_steps": 6775, "total_steps": 37885, "loss": 0.0354, "lr": 1.962415209320944e-06, "epoch": 0.8941533588491487, "percentage": 17.88, "elapsed_time": "0:09:37", "remaining_time": "0:44:12", "throughput": 5766.55, "total_tokens": 3331520}
|
|
{"current_steps": 6780, "total_steps": 37885, "loss": 0.0489, "lr": 1.9622899897198834e-06, "epoch": 0.8948132506268972, "percentage": 17.9, "elapsed_time": "0:09:38", "remaining_time": "0:44:12", "throughput": 5768.08, "total_tokens": 3334336}
|
|
{"current_steps": 6785, "total_steps": 37885, "loss": 0.1136, "lr": 1.962164565879741e-06, "epoch": 0.8954731424046456, "percentage": 17.91, "elapsed_time": "0:09:38", "remaining_time": "0:44:11", "throughput": 5769.22, "total_tokens": 3336896}
|
|
{"current_steps": 6790, "total_steps": 37885, "loss": 0.1573, "lr": 1.9620389378271363e-06, "epoch": 0.8961330341823941, "percentage": 17.92, "elapsed_time": "0:09:38", "remaining_time": "0:44:10", "throughput": 5770.1, "total_tokens": 3339328}
|
|
{"current_steps": 6795, "total_steps": 37885, "loss": 0.0079, "lr": 1.9619131055887343e-06, "epoch": 0.8967929259601425, "percentage": 17.94, "elapsed_time": "0:09:39", "remaining_time": "0:44:09", "throughput": 5771.05, "total_tokens": 3341760}
|
|
{"current_steps": 6800, "total_steps": 37885, "loss": 0.1041, "lr": 1.961787069191241e-06, "epoch": 0.897452817737891, "percentage": 17.95, "elapsed_time": "0:09:39", "remaining_time": "0:44:08", "throughput": 5772.36, "total_tokens": 3344448}
|
|
{"current_steps": 6805, "total_steps": 37885, "loss": 0.0233, "lr": 1.9616608286614065e-06, "epoch": 0.8981127095156395, "percentage": 17.96, "elapsed_time": "0:09:39", "remaining_time": "0:44:07", "throughput": 5773.5, "total_tokens": 3347008}
|
|
{"current_steps": 6810, "total_steps": 37885, "loss": 0.0408, "lr": 1.9615343840260255e-06, "epoch": 0.8987726012933879, "percentage": 17.98, "elapsed_time": "0:09:40", "remaining_time": "0:44:06", "throughput": 5775.0, "total_tokens": 3349824}
|
|
{"current_steps": 6815, "total_steps": 37885, "loss": 0.0705, "lr": 1.9614077353119345e-06, "epoch": 0.8994324930711364, "percentage": 17.99, "elapsed_time": "0:09:40", "remaining_time": "0:44:06", "throughput": 5776.0, "total_tokens": 3352320}
|
|
{"current_steps": 6820, "total_steps": 37885, "loss": 0.0009, "lr": 1.961280882546013e-06, "epoch": 0.9000923848488848, "percentage": 18.0, "elapsed_time": "0:09:40", "remaining_time": "0:44:05", "throughput": 5776.78, "total_tokens": 3354688}
|
|
{"current_steps": 6825, "total_steps": 37885, "loss": 0.0692, "lr": 1.961153825755186e-06, "epoch": 0.9007522766266333, "percentage": 18.02, "elapsed_time": "0:09:41", "remaining_time": "0:44:04", "throughput": 5777.56, "total_tokens": 3357056}
|
|
{"current_steps": 6830, "total_steps": 37885, "loss": 0.0761, "lr": 1.961026564966419e-06, "epoch": 0.9014121684043817, "percentage": 18.03, "elapsed_time": "0:09:41", "remaining_time": "0:44:03", "throughput": 5778.48, "total_tokens": 3359488}
|
|
{"current_steps": 6835, "total_steps": 37885, "loss": 0.3297, "lr": 1.9608991002067233e-06, "epoch": 0.9020720601821302, "percentage": 18.04, "elapsed_time": "0:09:41", "remaining_time": "0:44:02", "throughput": 5779.4, "total_tokens": 3361920}
|
|
{"current_steps": 6840, "total_steps": 37885, "loss": 0.0016, "lr": 1.9607714315031513e-06, "epoch": 0.9027319519598785, "percentage": 18.05, "elapsed_time": "0:09:42", "remaining_time": "0:44:01", "throughput": 5780.45, "total_tokens": 3364416}
|
|
{"current_steps": 6845, "total_steps": 37885, "loss": 0.1103, "lr": 1.9606435588828008e-06, "epoch": 0.903391843737627, "percentage": 18.07, "elapsed_time": "0:09:42", "remaining_time": "0:44:00", "throughput": 5781.5, "total_tokens": 3366912}
|
|
{"current_steps": 6850, "total_steps": 37885, "loss": 0.0008, "lr": 1.960515482372811e-06, "epoch": 0.9040517355153754, "percentage": 18.08, "elapsed_time": "0:09:42", "remaining_time": "0:43:59", "throughput": 5782.01, "total_tokens": 3369088}
|
|
{"current_steps": 6855, "total_steps": 37885, "loss": 0.2938, "lr": 1.960387202000366e-06, "epoch": 0.9047116272931239, "percentage": 18.09, "elapsed_time": "0:09:43", "remaining_time": "0:43:59", "throughput": 5782.93, "total_tokens": 3371520}
|
|
{"current_steps": 6860, "total_steps": 37885, "loss": 0.0004, "lr": 1.9602587177926913e-06, "epoch": 0.9053715190708723, "percentage": 18.11, "elapsed_time": "0:09:43", "remaining_time": "0:43:58", "throughput": 5784.04, "total_tokens": 3374080}
|
|
{"current_steps": 6865, "total_steps": 37885, "loss": 0.0758, "lr": 1.960130029777058e-06, "epoch": 0.9060314108486208, "percentage": 18.12, "elapsed_time": "0:09:43", "remaining_time": "0:43:57", "throughput": 5785.14, "total_tokens": 3376640}
|
|
{"current_steps": 6870, "total_steps": 37885, "loss": 0.0005, "lr": 1.9600011379807783e-06, "epoch": 0.9066913026263693, "percentage": 18.13, "elapsed_time": "0:09:44", "remaining_time": "0:43:56", "throughput": 5786.05, "total_tokens": 3379072}
|
|
{"current_steps": 6875, "total_steps": 37885, "loss": 0.05, "lr": 1.9598720424312093e-06, "epoch": 0.9073511944041177, "percentage": 18.15, "elapsed_time": "0:09:44", "remaining_time": "0:43:55", "throughput": 5787.27, "total_tokens": 3381696}
|
|
{"current_steps": 6880, "total_steps": 37885, "loss": 0.317, "lr": 1.9597427431557497e-06, "epoch": 0.9080110861818662, "percentage": 18.16, "elapsed_time": "0:09:44", "remaining_time": "0:43:54", "throughput": 5788.1, "total_tokens": 3384064}
|
|
{"current_steps": 6885, "total_steps": 37885, "loss": 0.1413, "lr": 1.9596132401818427e-06, "epoch": 0.9086709779596146, "percentage": 18.17, "elapsed_time": "0:09:44", "remaining_time": "0:43:53", "throughput": 5788.67, "total_tokens": 3386304}
|
|
{"current_steps": 6890, "total_steps": 37885, "loss": 0.078, "lr": 1.9594835335369748e-06, "epoch": 0.9093308697373631, "percentage": 18.19, "elapsed_time": "0:09:45", "remaining_time": "0:43:53", "throughput": 5789.68, "total_tokens": 3388800}
|
|
{"current_steps": 6895, "total_steps": 37885, "loss": 0.1664, "lr": 1.9593536232486747e-06, "epoch": 0.9099907615151115, "percentage": 18.2, "elapsed_time": "0:09:45", "remaining_time": "0:43:52", "throughput": 5790.56, "total_tokens": 3391232}
|
|
{"current_steps": 6900, "total_steps": 37885, "loss": 0.0852, "lr": 1.9592235093445153e-06, "epoch": 0.91065065329286, "percentage": 18.21, "elapsed_time": "0:09:45", "remaining_time": "0:43:51", "throughput": 5791.45, "total_tokens": 3393664}
|
|
{"current_steps": 6905, "total_steps": 37885, "loss": 0.1319, "lr": 1.959093191852112e-06, "epoch": 0.9113105450706084, "percentage": 18.23, "elapsed_time": "0:09:46", "remaining_time": "0:43:50", "throughput": 5792.12, "total_tokens": 3395968}
|
|
{"current_steps": 6910, "total_steps": 37885, "loss": 0.1763, "lr": 1.958962670799124e-06, "epoch": 0.9119704368483569, "percentage": 18.24, "elapsed_time": "0:09:46", "remaining_time": "0:43:49", "throughput": 5792.83, "total_tokens": 3398272}
|
|
{"current_steps": 6915, "total_steps": 37885, "loss": 0.2054, "lr": 1.9588319462132535e-06, "epoch": 0.9126303286261053, "percentage": 18.25, "elapsed_time": "0:09:46", "remaining_time": "0:43:48", "throughput": 5794.12, "total_tokens": 3400960}
|
|
{"current_steps": 6920, "total_steps": 37885, "loss": 0.2306, "lr": 1.9587010181222456e-06, "epoch": 0.9132902204038538, "percentage": 18.27, "elapsed_time": "0:09:47", "remaining_time": "0:43:47", "throughput": 5795.21, "total_tokens": 3403520}
|
|
{"current_steps": 6925, "total_steps": 37885, "loss": 0.2867, "lr": 1.9585698865538892e-06, "epoch": 0.9139501121816023, "percentage": 18.28, "elapsed_time": "0:09:47", "remaining_time": "0:43:47", "throughput": 5796.12, "total_tokens": 3405952}
|
|
{"current_steps": 6930, "total_steps": 37885, "loss": 0.1133, "lr": 1.9584385515360155e-06, "epoch": 0.9146100039593507, "percentage": 18.29, "elapsed_time": "0:09:47", "remaining_time": "0:43:46", "throughput": 5796.94, "total_tokens": 3408320}
|
|
{"current_steps": 6935, "total_steps": 37885, "loss": 0.0866, "lr": 1.9583070130965e-06, "epoch": 0.9152698957370992, "percentage": 18.31, "elapsed_time": "0:09:48", "remaining_time": "0:43:45", "throughput": 5797.99, "total_tokens": 3410880}
|
|
{"current_steps": 6940, "total_steps": 37885, "loss": 0.0082, "lr": 1.95817527126326e-06, "epoch": 0.9159297875148475, "percentage": 18.32, "elapsed_time": "0:09:48", "remaining_time": "0:43:44", "throughput": 5799.1, "total_tokens": 3413440}
|
|
{"current_steps": 6945, "total_steps": 37885, "loss": 0.1116, "lr": 1.9580433260642576e-06, "epoch": 0.916589679292596, "percentage": 18.33, "elapsed_time": "0:09:48", "remaining_time": "0:43:43", "throughput": 5800.19, "total_tokens": 3416000}
|
|
{"current_steps": 6950, "total_steps": 37885, "loss": 0.1138, "lr": 1.9579111775274967e-06, "epoch": 0.9172495710703444, "percentage": 18.34, "elapsed_time": "0:09:49", "remaining_time": "0:43:42", "throughput": 5800.79, "total_tokens": 3418240}
|
|
{"current_steps": 6955, "total_steps": 37885, "loss": 0.1691, "lr": 1.957778825681025e-06, "epoch": 0.9179094628480929, "percentage": 18.36, "elapsed_time": "0:09:49", "remaining_time": "0:43:42", "throughput": 5801.7, "total_tokens": 3420672}
|
|
{"current_steps": 6960, "total_steps": 37885, "loss": 0.0336, "lr": 1.9576462705529334e-06, "epoch": 0.9185693546258413, "percentage": 18.37, "elapsed_time": "0:09:49", "remaining_time": "0:43:41", "throughput": 5802.3, "total_tokens": 3422912}
|
|
{"current_steps": 6965, "total_steps": 37885, "loss": 0.0039, "lr": 1.9575135121713554e-06, "epoch": 0.9192292464035898, "percentage": 18.38, "elapsed_time": "0:09:50", "remaining_time": "0:43:40", "throughput": 5803.27, "total_tokens": 3425408}
|
|
{"current_steps": 6970, "total_steps": 37885, "loss": 0.0885, "lr": 1.9573805505644687e-06, "epoch": 0.9198891381813382, "percentage": 18.4, "elapsed_time": "0:09:50", "remaining_time": "0:43:39", "throughput": 5804.06, "total_tokens": 3427776}
|
|
{"current_steps": 6975, "total_steps": 37885, "loss": 0.1885, "lr": 1.9572473857604924e-06, "epoch": 0.9205490299590867, "percentage": 18.41, "elapsed_time": "0:09:50", "remaining_time": "0:43:38", "throughput": 5805.16, "total_tokens": 3430336}
|
|
{"current_steps": 6980, "total_steps": 37885, "loss": 0.2446, "lr": 1.9571140177876904e-06, "epoch": 0.9212089217368351, "percentage": 18.42, "elapsed_time": "0:09:51", "remaining_time": "0:43:37", "throughput": 5806.24, "total_tokens": 3432896}
|
|
{"current_steps": 6985, "total_steps": 37885, "loss": 0.0608, "lr": 1.956980446674369e-06, "epoch": 0.9218688135145836, "percentage": 18.44, "elapsed_time": "0:09:51", "remaining_time": "0:43:36", "throughput": 5806.85, "total_tokens": 3435136}
|
|
{"current_steps": 6990, "total_steps": 37885, "loss": 0.0706, "lr": 1.9568466724488783e-06, "epoch": 0.9225287052923321, "percentage": 18.45, "elapsed_time": "0:09:51", "remaining_time": "0:43:36", "throughput": 5808.14, "total_tokens": 3437824}
|
|
{"current_steps": 6995, "total_steps": 37885, "loss": 0.0195, "lr": 1.95671269513961e-06, "epoch": 0.9231885970700805, "percentage": 18.46, "elapsed_time": "0:09:52", "remaining_time": "0:43:35", "throughput": 5809.14, "total_tokens": 3440320}
|
|
{"current_steps": 7000, "total_steps": 37885, "loss": 0.1083, "lr": 1.9565785147749994e-06, "epoch": 0.923848488847829, "percentage": 18.48, "elapsed_time": "0:09:52", "remaining_time": "0:43:34", "throughput": 5810.19, "total_tokens": 3442880}
|
|
{"current_steps": 7005, "total_steps": 37885, "loss": 0.0438, "lr": 1.956444131383527e-06, "epoch": 0.9245083806255774, "percentage": 18.49, "elapsed_time": "0:09:52", "remaining_time": "0:43:33", "throughput": 5810.77, "total_tokens": 3445120}
|
|
{"current_steps": 7010, "total_steps": 37885, "loss": 0.1449, "lr": 1.9563095449937133e-06, "epoch": 0.9251682724033259, "percentage": 18.5, "elapsed_time": "0:09:53", "remaining_time": "0:43:32", "throughput": 5811.44, "total_tokens": 3447424}
|
|
{"current_steps": 7015, "total_steps": 37885, "loss": 0.0746, "lr": 1.9561747556341236e-06, "epoch": 0.9258281641810743, "percentage": 18.52, "elapsed_time": "0:09:53", "remaining_time": "0:43:31", "throughput": 5812.44, "total_tokens": 3449920}
|
|
{"current_steps": 7020, "total_steps": 37885, "loss": 0.0844, "lr": 1.9560397633333663e-06, "epoch": 0.9264880559588228, "percentage": 18.53, "elapsed_time": "0:09:53", "remaining_time": "0:43:31", "throughput": 5813.43, "total_tokens": 3452416}
|
|
{"current_steps": 7025, "total_steps": 37885, "loss": 0.1329, "lr": 1.955904568120092e-06, "epoch": 0.9271479477365712, "percentage": 18.54, "elapsed_time": "0:09:54", "remaining_time": "0:43:30", "throughput": 5814.42, "total_tokens": 3454912}
|
|
{"current_steps": 7030, "total_steps": 37885, "loss": 0.0823, "lr": 1.955769170022996e-06, "epoch": 0.9278078395143197, "percentage": 18.56, "elapsed_time": "0:09:54", "remaining_time": "0:43:29", "throughput": 5815.51, "total_tokens": 3457472}
|
|
{"current_steps": 7035, "total_steps": 37885, "loss": 0.0728, "lr": 1.955633569070814e-06, "epoch": 0.928467731292068, "percentage": 18.57, "elapsed_time": "0:09:54", "remaining_time": "0:43:28", "throughput": 5816.1, "total_tokens": 3459712}
|
|
{"current_steps": 7040, "total_steps": 37885, "loss": 0.1126, "lr": 1.9554977652923276e-06, "epoch": 0.9291276230698166, "percentage": 18.58, "elapsed_time": "0:09:55", "remaining_time": "0:43:27", "throughput": 5816.97, "total_tokens": 3462144}
|
|
{"current_steps": 7045, "total_steps": 37885, "loss": 0.2659, "lr": 1.9553617587163594e-06, "epoch": 0.9297875148475649, "percentage": 18.6, "elapsed_time": "0:09:55", "remaining_time": "0:43:26", "throughput": 5817.78, "total_tokens": 3464512}
|
|
{"current_steps": 7050, "total_steps": 37885, "loss": 0.0937, "lr": 1.955225549371776e-06, "epoch": 0.9304474066253134, "percentage": 18.61, "elapsed_time": "0:09:55", "remaining_time": "0:43:26", "throughput": 5818.57, "total_tokens": 3466880}
|
|
{"current_steps": 7055, "total_steps": 37885, "loss": 0.1008, "lr": 1.9550891372874872e-06, "epoch": 0.931107298403062, "percentage": 18.62, "elapsed_time": "0:09:56", "remaining_time": "0:43:25", "throughput": 5819.34, "total_tokens": 3469248}
|
|
{"current_steps": 7060, "total_steps": 37885, "loss": 0.2362, "lr": 1.9549525224924453e-06, "epoch": 0.9317671901808103, "percentage": 18.64, "elapsed_time": "0:09:56", "remaining_time": "0:43:24", "throughput": 5820.11, "total_tokens": 3471616}
|
|
{"current_steps": 7065, "total_steps": 37885, "loss": 0.0761, "lr": 1.9548157050156456e-06, "epoch": 0.9324270819585588, "percentage": 18.65, "elapsed_time": "0:09:56", "remaining_time": "0:43:23", "throughput": 5821.28, "total_tokens": 3474240}
|
|
{"current_steps": 7070, "total_steps": 37885, "loss": 0.0566, "lr": 1.9546786848861268e-06, "epoch": 0.9330869737363072, "percentage": 18.66, "elapsed_time": "0:09:57", "remaining_time": "0:43:22", "throughput": 5822.36, "total_tokens": 3476800}
|
|
{"current_steps": 7075, "total_steps": 37885, "loss": 0.091, "lr": 1.95454146213297e-06, "epoch": 0.9337468655140557, "percentage": 18.67, "elapsed_time": "0:09:57", "remaining_time": "0:43:21", "throughput": 5823.61, "total_tokens": 3479488}
|
|
{"current_steps": 7080, "total_steps": 37885, "loss": 0.0015, "lr": 1.954404036785301e-06, "epoch": 0.9344067572918041, "percentage": 18.69, "elapsed_time": "0:09:57", "remaining_time": "0:43:21", "throughput": 5824.87, "total_tokens": 3482176}
|
|
{"current_steps": 7085, "total_steps": 37885, "loss": 0.078, "lr": 1.9542664088722857e-06, "epoch": 0.9350666490695526, "percentage": 18.7, "elapsed_time": "0:09:58", "remaining_time": "0:43:20", "throughput": 5826.06, "total_tokens": 3484800}
|
|
{"current_steps": 7090, "total_steps": 37885, "loss": 0.1244, "lr": 1.9541285784231355e-06, "epoch": 0.935726540847301, "percentage": 18.71, "elapsed_time": "0:09:58", "remaining_time": "0:43:19", "throughput": 5827.31, "total_tokens": 3487488}
|
|
{"current_steps": 7095, "total_steps": 37885, "loss": 0.3198, "lr": 1.9539905454671037e-06, "epoch": 0.9363864326250495, "percentage": 18.73, "elapsed_time": "0:09:58", "remaining_time": "0:43:18", "throughput": 5827.89, "total_tokens": 3489728}
|
|
{"current_steps": 7100, "total_steps": 37885, "loss": 0.2493, "lr": 1.953852310033487e-06, "epoch": 0.9370463244027979, "percentage": 18.74, "elapsed_time": "0:09:59", "remaining_time": "0:43:17", "throughput": 5828.38, "total_tokens": 3491904}
|
|
{"current_steps": 7105, "total_steps": 37885, "loss": 0.059, "lr": 1.9537138721516248e-06, "epoch": 0.9377062161805464, "percentage": 18.75, "elapsed_time": "0:09:59", "remaining_time": "0:43:16", "throughput": 5829.64, "total_tokens": 3494592}
|
|
{"current_steps": 7110, "total_steps": 37885, "loss": 0.1345, "lr": 1.9535752318508995e-06, "epoch": 0.9383661079582949, "percentage": 18.77, "elapsed_time": "0:09:59", "remaining_time": "0:43:16", "throughput": 5830.61, "total_tokens": 3497088}
|
|
{"current_steps": 7115, "total_steps": 37885, "loss": 0.0865, "lr": 1.9534363891607363e-06, "epoch": 0.9390259997360433, "percentage": 18.78, "elapsed_time": "0:10:00", "remaining_time": "0:43:15", "throughput": 5831.48, "total_tokens": 3499520}
|
|
{"current_steps": 7120, "total_steps": 37885, "loss": 0.1349, "lr": 1.953297344110604e-06, "epoch": 0.9396858915137918, "percentage": 18.79, "elapsed_time": "0:10:00", "remaining_time": "0:43:14", "throughput": 5832.74, "total_tokens": 3502208}
|
|
{"current_steps": 7125, "total_steps": 37885, "loss": 0.1946, "lr": 1.9531580967300135e-06, "epoch": 0.9403457832915402, "percentage": 18.81, "elapsed_time": "0:10:00", "remaining_time": "0:43:13", "throughput": 5833.62, "total_tokens": 3504640}
|
|
{"current_steps": 7130, "total_steps": 37885, "loss": 0.0946, "lr": 1.953018647048519e-06, "epoch": 0.9410056750692887, "percentage": 18.82, "elapsed_time": "0:10:01", "remaining_time": "0:43:12", "throughput": 5834.28, "total_tokens": 3506944}
|
|
{"current_steps": 7135, "total_steps": 37885, "loss": 0.2065, "lr": 1.9528789950957182e-06, "epoch": 0.9416655668470371, "percentage": 18.83, "elapsed_time": "0:10:01", "remaining_time": "0:43:11", "throughput": 5835.14, "total_tokens": 3509376}
|
|
{"current_steps": 7140, "total_steps": 37885, "loss": 0.096, "lr": 1.9527391409012507e-06, "epoch": 0.9423254586247856, "percentage": 18.85, "elapsed_time": "0:10:01", "remaining_time": "0:43:11", "throughput": 5835.82, "total_tokens": 3511680}
|
|
{"current_steps": 7145, "total_steps": 37885, "loss": 0.0617, "lr": 1.9525990844948e-06, "epoch": 0.942985350402534, "percentage": 18.86, "elapsed_time": "0:10:02", "remaining_time": "0:43:10", "throughput": 5836.68, "total_tokens": 3514112}
|
|
{"current_steps": 7150, "total_steps": 37885, "loss": 0.0848, "lr": 1.952458825906092e-06, "epoch": 0.9436452421802825, "percentage": 18.87, "elapsed_time": "0:10:02", "remaining_time": "0:43:09", "throughput": 5837.46, "total_tokens": 3516480}
|
|
{"current_steps": 7155, "total_steps": 37885, "loss": 0.1279, "lr": 1.952318365164895e-06, "epoch": 0.9443051339580308, "percentage": 18.89, "elapsed_time": "0:10:02", "remaining_time": "0:43:08", "throughput": 5838.02, "total_tokens": 3518720}
|
|
{"current_steps": 7160, "total_steps": 37885, "loss": 0.0011, "lr": 1.952177702301021e-06, "epoch": 0.9449650257357793, "percentage": 18.9, "elapsed_time": "0:10:03", "remaining_time": "0:43:07", "throughput": 5838.98, "total_tokens": 3521216}
|
|
{"current_steps": 7165, "total_steps": 37885, "loss": 0.2427, "lr": 1.9520368373443246e-06, "epoch": 0.9456249175135277, "percentage": 18.91, "elapsed_time": "0:10:03", "remaining_time": "0:43:07", "throughput": 5840.02, "total_tokens": 3523776}
|
|
{"current_steps": 7170, "total_steps": 37885, "loss": 0.0515, "lr": 1.951895770324704e-06, "epoch": 0.9462848092912762, "percentage": 18.93, "elapsed_time": "0:10:03", "remaining_time": "0:43:06", "throughput": 5841.01, "total_tokens": 3526272}
|
|
{"current_steps": 7175, "total_steps": 37885, "loss": 0.1211, "lr": 1.9517545012720993e-06, "epoch": 0.9469447010690247, "percentage": 18.94, "elapsed_time": "0:10:04", "remaining_time": "0:43:05", "throughput": 5841.68, "total_tokens": 3528576}
|
|
{"current_steps": 7180, "total_steps": 37885, "loss": 0.0018, "lr": 1.9516130302164937e-06, "epoch": 0.9476045928467731, "percentage": 18.95, "elapsed_time": "0:10:04", "remaining_time": "0:43:04", "throughput": 5842.69, "total_tokens": 3531136}
|
|
{"current_steps": 7185, "total_steps": 37885, "loss": 0.2951, "lr": 1.9514713571879135e-06, "epoch": 0.9482644846245216, "percentage": 18.97, "elapsed_time": "0:10:04", "remaining_time": "0:43:03", "throughput": 5843.73, "total_tokens": 3533696}
|
|
{"current_steps": 7190, "total_steps": 37885, "loss": 0.0036, "lr": 1.9513294822164274e-06, "epoch": 0.94892437640227, "percentage": 18.98, "elapsed_time": "0:10:05", "remaining_time": "0:43:02", "throughput": 5844.48, "total_tokens": 3536064}
|
|
{"current_steps": 7195, "total_steps": 37885, "loss": 0.0063, "lr": 1.9511874053321483e-06, "epoch": 0.9495842681800185, "percentage": 18.99, "elapsed_time": "0:10:05", "remaining_time": "0:43:02", "throughput": 5845.23, "total_tokens": 3538432}
|
|
{"current_steps": 7200, "total_steps": 37885, "loss": 0.0012, "lr": 1.95104512656523e-06, "epoch": 0.9502441599577669, "percentage": 19.0, "elapsed_time": "0:10:05", "remaining_time": "0:43:01", "throughput": 5846.46, "total_tokens": 3541120}
|
|
{"current_steps": 7205, "total_steps": 37885, "loss": 0.0973, "lr": 1.9509026459458702e-06, "epoch": 0.9509040517355154, "percentage": 19.02, "elapsed_time": "0:10:06", "remaining_time": "0:43:00", "throughput": 5847.52, "total_tokens": 3543680}
|
|
{"current_steps": 7210, "total_steps": 37885, "loss": 0.2608, "lr": 1.95075996350431e-06, "epoch": 0.9515639435132638, "percentage": 19.03, "elapsed_time": "0:10:06", "remaining_time": "0:42:59", "throughput": 5848.16, "total_tokens": 3545984}
|
|
{"current_steps": 7215, "total_steps": 37885, "loss": 0.0943, "lr": 1.9506170792708327e-06, "epoch": 0.9522238352910123, "percentage": 19.04, "elapsed_time": "0:10:06", "remaining_time": "0:42:58", "throughput": 5849.19, "total_tokens": 3548544}
|
|
{"current_steps": 7220, "total_steps": 37885, "loss": 0.0524, "lr": 1.950473993275764e-06, "epoch": 0.9528837270687607, "percentage": 19.06, "elapsed_time": "0:10:07", "remaining_time": "0:42:58", "throughput": 5850.14, "total_tokens": 3551040}
|
|
{"current_steps": 7225, "total_steps": 37885, "loss": 0.1268, "lr": 1.950330705549473e-06, "epoch": 0.9535436188465092, "percentage": 19.07, "elapsed_time": "0:10:07", "remaining_time": "0:42:57", "throughput": 5851.09, "total_tokens": 3553536}
|
|
{"current_steps": 7230, "total_steps": 37885, "loss": 0.1104, "lr": 1.950187216122371e-06, "epoch": 0.9542035106242576, "percentage": 19.08, "elapsed_time": "0:10:07", "remaining_time": "0:42:56", "throughput": 5851.55, "total_tokens": 3555712}
|
|
{"current_steps": 7235, "total_steps": 37885, "loss": 0.1443, "lr": 1.9500435250249136e-06, "epoch": 0.9548634024020061, "percentage": 19.1, "elapsed_time": "0:10:07", "remaining_time": "0:42:55", "throughput": 5852.3, "total_tokens": 3558080}
|
|
{"current_steps": 7240, "total_steps": 37885, "loss": 0.1269, "lr": 1.949899632287598e-06, "epoch": 0.9555232941797546, "percentage": 19.11, "elapsed_time": "0:10:08", "remaining_time": "0:42:54", "throughput": 5853.36, "total_tokens": 3560640}
|
|
{"current_steps": 7245, "total_steps": 37885, "loss": 0.0389, "lr": 1.9497555379409633e-06, "epoch": 0.956183185957503, "percentage": 19.12, "elapsed_time": "0:10:08", "remaining_time": "0:42:54", "throughput": 5854.67, "total_tokens": 3563392}
|
|
{"current_steps": 7250, "total_steps": 37885, "loss": 0.309, "lr": 1.9496112420155937e-06, "epoch": 0.9568430777352515, "percentage": 19.14, "elapsed_time": "0:10:08", "remaining_time": "0:42:53", "throughput": 5855.48, "total_tokens": 3565824}
|
|
{"current_steps": 7255, "total_steps": 37885, "loss": 0.0023, "lr": 1.949466744542115e-06, "epoch": 0.9575029695129998, "percentage": 19.15, "elapsed_time": "0:10:09", "remaining_time": "0:42:52", "throughput": 5856.32, "total_tokens": 3568256}
|
|
{"current_steps": 7260, "total_steps": 37885, "loss": 0.0659, "lr": 1.9493220455511943e-06, "epoch": 0.9581628612907483, "percentage": 19.16, "elapsed_time": "0:10:09", "remaining_time": "0:42:51", "throughput": 5857.29, "total_tokens": 3570752}
|
|
{"current_steps": 7265, "total_steps": 37885, "loss": 0.1136, "lr": 1.9491771450735444e-06, "epoch": 0.9588227530684967, "percentage": 19.18, "elapsed_time": "0:10:09", "remaining_time": "0:42:50", "throughput": 5857.72, "total_tokens": 3572928}
|
|
{"current_steps": 7270, "total_steps": 37885, "loss": 0.2459, "lr": 1.9490320431399186e-06, "epoch": 0.9594826448462452, "percentage": 19.19, "elapsed_time": "0:10:10", "remaining_time": "0:42:49", "throughput": 5858.45, "total_tokens": 3575296}
|
|
{"current_steps": 7275, "total_steps": 37885, "loss": 0.0014, "lr": 1.9488867397811143e-06, "epoch": 0.9601425366239936, "percentage": 19.2, "elapsed_time": "0:10:10", "remaining_time": "0:42:49", "throughput": 5859.17, "total_tokens": 3577664}
|
|
{"current_steps": 7280, "total_steps": 37885, "loss": 0.1765, "lr": 1.948741235027971e-06, "epoch": 0.9608024284017421, "percentage": 19.22, "elapsed_time": "0:10:10", "remaining_time": "0:42:48", "throughput": 5860.08, "total_tokens": 3580160}
|
|
{"current_steps": 7285, "total_steps": 37885, "loss": 0.1181, "lr": 1.9485955289113703e-06, "epoch": 0.9614623201794905, "percentage": 19.23, "elapsed_time": "0:10:11", "remaining_time": "0:42:47", "throughput": 5860.72, "total_tokens": 3582464}
|
|
{"current_steps": 7290, "total_steps": 37885, "loss": 0.1001, "lr": 1.9484496214622375e-06, "epoch": 0.962122211957239, "percentage": 19.24, "elapsed_time": "0:10:11", "remaining_time": "0:42:46", "throughput": 5861.58, "total_tokens": 3584896}
|
|
{"current_steps": 7295, "total_steps": 37885, "loss": 0.0035, "lr": 1.9483035127115416e-06, "epoch": 0.9627821037349875, "percentage": 19.26, "elapsed_time": "0:10:11", "remaining_time": "0:42:45", "throughput": 5862.77, "total_tokens": 3587584}
|
|
{"current_steps": 7300, "total_steps": 37885, "loss": 0.149, "lr": 1.948157202690292e-06, "epoch": 0.9634419955127359, "percentage": 19.27, "elapsed_time": "0:10:12", "remaining_time": "0:42:45", "throughput": 5863.81, "total_tokens": 3590144}
|
|
{"current_steps": 7305, "total_steps": 37885, "loss": 0.0409, "lr": 1.9480106914295416e-06, "epoch": 0.9641018872904844, "percentage": 19.28, "elapsed_time": "0:10:12", "remaining_time": "0:42:44", "throughput": 5865.02, "total_tokens": 3592832}
|
|
{"current_steps": 7310, "total_steps": 37885, "loss": 0.1264, "lr": 1.947863978960387e-06, "epoch": 0.9647617790682328, "percentage": 19.3, "elapsed_time": "0:10:12", "remaining_time": "0:42:43", "throughput": 5866.1, "total_tokens": 3595456}
|
|
{"current_steps": 7315, "total_steps": 37885, "loss": 0.1478, "lr": 1.947717065313967e-06, "epoch": 0.9654216708459813, "percentage": 19.31, "elapsed_time": "0:10:13", "remaining_time": "0:42:42", "throughput": 5866.95, "total_tokens": 3597888}
|
|
{"current_steps": 7320, "total_steps": 37885, "loss": 0.0841, "lr": 1.9475699505214625e-06, "epoch": 0.9660815626237297, "percentage": 19.32, "elapsed_time": "0:10:13", "remaining_time": "0:42:42", "throughput": 5867.86, "total_tokens": 3600384}
|
|
{"current_steps": 7325, "total_steps": 37885, "loss": 0.183, "lr": 1.947422634614098e-06, "epoch": 0.9667414544014782, "percentage": 19.33, "elapsed_time": "0:10:13", "remaining_time": "0:42:41", "throughput": 5868.77, "total_tokens": 3602880}
|
|
{"current_steps": 7330, "total_steps": 37885, "loss": 0.0148, "lr": 1.94727511762314e-06, "epoch": 0.9674013461792266, "percentage": 19.35, "elapsed_time": "0:10:14", "remaining_time": "0:42:40", "throughput": 5869.5, "total_tokens": 3605248}
|
|
{"current_steps": 7335, "total_steps": 37885, "loss": 0.0009, "lr": 1.9471273995798977e-06, "epoch": 0.9680612379569751, "percentage": 19.36, "elapsed_time": "0:10:14", "remaining_time": "0:42:39", "throughput": 5870.53, "total_tokens": 3607808}
|
|
{"current_steps": 7340, "total_steps": 37885, "loss": 0.2154, "lr": 1.9469794805157235e-06, "epoch": 0.9687211297347235, "percentage": 19.37, "elapsed_time": "0:10:14", "remaining_time": "0:42:38", "throughput": 5871.16, "total_tokens": 3610112}
|
|
{"current_steps": 7345, "total_steps": 37885, "loss": 0.1076, "lr": 1.946831360462012e-06, "epoch": 0.969381021512472, "percentage": 19.39, "elapsed_time": "0:10:15", "remaining_time": "0:42:38", "throughput": 5871.69, "total_tokens": 3612352}
|
|
{"current_steps": 7350, "total_steps": 37885, "loss": 0.184, "lr": 1.946683039450201e-06, "epoch": 0.9700409132902204, "percentage": 19.4, "elapsed_time": "0:10:15", "remaining_time": "0:42:37", "throughput": 5872.59, "total_tokens": 3614848}
|
|
{"current_steps": 7355, "total_steps": 37885, "loss": 0.0021, "lr": 1.9465345175117698e-06, "epoch": 0.9707008050679689, "percentage": 19.41, "elapsed_time": "0:10:15", "remaining_time": "0:42:36", "throughput": 5873.61, "total_tokens": 3617408}
|
|
{"current_steps": 7360, "total_steps": 37885, "loss": 0.1402, "lr": 1.9463857946782418e-06, "epoch": 0.9713606968457174, "percentage": 19.43, "elapsed_time": "0:10:16", "remaining_time": "0:42:35", "throughput": 5874.62, "total_tokens": 3619968}
|
|
{"current_steps": 7365, "total_steps": 37885, "loss": 0.1302, "lr": 1.9462368709811816e-06, "epoch": 0.9720205886234657, "percentage": 19.44, "elapsed_time": "0:10:16", "remaining_time": "0:42:34", "throughput": 5874.86, "total_tokens": 3622016}
|
|
{"current_steps": 7370, "total_steps": 37885, "loss": 0.115, "lr": 1.946087746452198e-06, "epoch": 0.9726804804012142, "percentage": 19.45, "elapsed_time": "0:10:16", "remaining_time": "0:42:34", "throughput": 5875.29, "total_tokens": 3624192}
|
|
{"current_steps": 7375, "total_steps": 37885, "loss": 0.0111, "lr": 1.945938421122941e-06, "epoch": 0.9733403721789626, "percentage": 19.47, "elapsed_time": "0:10:17", "remaining_time": "0:42:33", "throughput": 5876.1, "total_tokens": 3626624}
|
|
{"current_steps": 7380, "total_steps": 37885, "loss": 0.1937, "lr": 1.9457888950251045e-06, "epoch": 0.9740002639567111, "percentage": 19.48, "elapsed_time": "0:10:17", "remaining_time": "0:42:32", "throughput": 5876.71, "total_tokens": 3628928}
|
|
{"current_steps": 7385, "total_steps": 37885, "loss": 0.1085, "lr": 1.9456391681904234e-06, "epoch": 0.9746601557344595, "percentage": 19.49, "elapsed_time": "0:10:17", "remaining_time": "0:42:31", "throughput": 5877.81, "total_tokens": 3631552}
|
|
{"current_steps": 7390, "total_steps": 37885, "loss": 0.078, "lr": 1.9454892406506774e-06, "epoch": 0.975320047512208, "percentage": 19.51, "elapsed_time": "0:10:18", "remaining_time": "0:42:30", "throughput": 5878.63, "total_tokens": 3633984}
|
|
{"current_steps": 7395, "total_steps": 37885, "loss": 0.2813, "lr": 1.945339112437686e-06, "epoch": 0.9759799392899564, "percentage": 19.52, "elapsed_time": "0:10:18", "remaining_time": "0:42:30", "throughput": 5879.17, "total_tokens": 3636224}
|
|
{"current_steps": 7400, "total_steps": 37885, "loss": 0.1583, "lr": 1.945188783583314e-06, "epoch": 0.9766398310677049, "percentage": 19.53, "elapsed_time": "0:10:18", "remaining_time": "0:42:29", "throughput": 5879.98, "total_tokens": 3638656}
|
|
{"current_steps": 7405, "total_steps": 37885, "loss": 0.0369, "lr": 1.945038254119467e-06, "epoch": 0.9772997228454533, "percentage": 19.55, "elapsed_time": "0:10:19", "remaining_time": "0:42:28", "throughput": 5881.25, "total_tokens": 3641408}
|
|
{"current_steps": 7410, "total_steps": 37885, "loss": 0.133, "lr": 1.944887524078094e-06, "epoch": 0.9779596146232018, "percentage": 19.56, "elapsed_time": "0:10:19", "remaining_time": "0:42:27", "throughput": 5882.05, "total_tokens": 3643840}
|
|
{"current_steps": 7415, "total_steps": 37885, "loss": 0.058, "lr": 1.9447365934911862e-06, "epoch": 0.9786195064009502, "percentage": 19.57, "elapsed_time": "0:10:19", "remaining_time": "0:42:26", "throughput": 5882.97, "total_tokens": 3646336}
|
|
{"current_steps": 7420, "total_steps": 37885, "loss": 0.086, "lr": 1.944585462390778e-06, "epoch": 0.9792793981786987, "percentage": 19.59, "elapsed_time": "0:10:20", "remaining_time": "0:42:26", "throughput": 5884.07, "total_tokens": 3648960}
|
|
{"current_steps": 7425, "total_steps": 37885, "loss": 0.0681, "lr": 1.9444341308089456e-06, "epoch": 0.9799392899564472, "percentage": 19.6, "elapsed_time": "0:10:20", "remaining_time": "0:42:25", "throughput": 5884.54, "total_tokens": 3651200}
|
|
{"current_steps": 7430, "total_steps": 37885, "loss": 0.0279, "lr": 1.944282598777808e-06, "epoch": 0.9805991817341956, "percentage": 19.61, "elapsed_time": "0:10:20", "remaining_time": "0:42:24", "throughput": 5885.14, "total_tokens": 3653504}
|
|
{"current_steps": 7435, "total_steps": 37885, "loss": 0.3589, "lr": 1.9441308663295264e-06, "epoch": 0.9812590735119441, "percentage": 19.63, "elapsed_time": "0:10:21", "remaining_time": "0:42:23", "throughput": 5886.13, "total_tokens": 3656064}
|
|
{"current_steps": 7440, "total_steps": 37885, "loss": 0.4366, "lr": 1.9439789334963055e-06, "epoch": 0.9819189652896925, "percentage": 19.64, "elapsed_time": "0:10:21", "remaining_time": "0:42:23", "throughput": 5886.35, "total_tokens": 3658112}
|
|
{"current_steps": 7445, "total_steps": 37885, "loss": 0.1661, "lr": 1.9438268003103916e-06, "epoch": 0.982578857067441, "percentage": 19.65, "elapsed_time": "0:10:21", "remaining_time": "0:42:22", "throughput": 5887.71, "total_tokens": 3660928}
|
|
{"current_steps": 7450, "total_steps": 37885, "loss": 0.1425, "lr": 1.943674466804074e-06, "epoch": 0.9832387488451894, "percentage": 19.66, "elapsed_time": "0:10:22", "remaining_time": "0:42:21", "throughput": 5888.31, "total_tokens": 3663232}
|
|
{"current_steps": 7455, "total_steps": 37885, "loss": 0.0064, "lr": 1.9435219330096845e-06, "epoch": 0.9838986406229379, "percentage": 19.68, "elapsed_time": "0:10:22", "remaining_time": "0:42:20", "throughput": 5889.0, "total_tokens": 3665600}
|
|
{"current_steps": 7460, "total_steps": 37885, "loss": 0.0393, "lr": 1.9433691989595975e-06, "epoch": 0.9845585324006862, "percentage": 19.69, "elapsed_time": "0:10:22", "remaining_time": "0:42:19", "throughput": 5889.88, "total_tokens": 3668096}
|
|
{"current_steps": 7465, "total_steps": 37885, "loss": 0.0312, "lr": 1.943216264686229e-06, "epoch": 0.9852184241784347, "percentage": 19.7, "elapsed_time": "0:10:23", "remaining_time": "0:42:19", "throughput": 5890.89, "total_tokens": 3670656}
|
|
{"current_steps": 7470, "total_steps": 37885, "loss": 0.157, "lr": 1.943063130222038e-06, "epoch": 0.9858783159561831, "percentage": 19.72, "elapsed_time": "0:10:23", "remaining_time": "0:42:18", "throughput": 5891.55, "total_tokens": 3673024}
|
|
{"current_steps": 7475, "total_steps": 37885, "loss": 0.0569, "lr": 1.9429097955995275e-06, "epoch": 0.9865382077339316, "percentage": 19.73, "elapsed_time": "0:10:23", "remaining_time": "0:42:17", "throughput": 5892.7, "total_tokens": 3675712}
|
|
{"current_steps": 7480, "total_steps": 37885, "loss": 0.1377, "lr": 1.9427562608512406e-06, "epoch": 0.9871980995116801, "percentage": 19.74, "elapsed_time": "0:10:24", "remaining_time": "0:42:16", "throughput": 5893.42, "total_tokens": 3678080}
|
|
{"current_steps": 7485, "total_steps": 37885, "loss": 0.2043, "lr": 1.9426025260097645e-06, "epoch": 0.9878579912894285, "percentage": 19.76, "elapsed_time": "0:10:24", "remaining_time": "0:42:16", "throughput": 5894.12, "total_tokens": 3680448}
|
|
{"current_steps": 7490, "total_steps": 37885, "loss": 0.0405, "lr": 1.9424485911077278e-06, "epoch": 0.988517883067177, "percentage": 19.77, "elapsed_time": "0:10:24", "remaining_time": "0:42:15", "throughput": 5894.74, "total_tokens": 3682752}
|
|
{"current_steps": 7495, "total_steps": 37885, "loss": 0.1633, "lr": 1.9422944561778026e-06, "epoch": 0.9891777748449254, "percentage": 19.78, "elapsed_time": "0:10:25", "remaining_time": "0:42:14", "throughput": 5895.77, "total_tokens": 3685376}
|
|
{"current_steps": 7500, "total_steps": 37885, "loss": 0.0496, "lr": 1.9421401212527023e-06, "epoch": 0.9898376666226739, "percentage": 19.8, "elapsed_time": "0:10:25", "remaining_time": "0:42:13", "throughput": 5896.47, "total_tokens": 3687744}
|
|
{"current_steps": 7505, "total_steps": 37885, "loss": 0.1828, "lr": 1.9419855863651837e-06, "epoch": 0.9904975584004223, "percentage": 19.81, "elapsed_time": "0:10:25", "remaining_time": "0:42:12", "throughput": 5897.35, "total_tokens": 3690240}
|
|
{"current_steps": 7510, "total_steps": 37885, "loss": 0.1345, "lr": 1.941830851548046e-06, "epoch": 0.9911574501781708, "percentage": 19.82, "elapsed_time": "0:10:26", "remaining_time": "0:42:12", "throughput": 5898.22, "total_tokens": 3692736}
|
|
{"current_steps": 7515, "total_steps": 37885, "loss": 0.1963, "lr": 1.94167591683413e-06, "epoch": 0.9918173419559192, "percentage": 19.84, "elapsed_time": "0:10:26", "remaining_time": "0:42:11", "throughput": 5899.31, "total_tokens": 3695360}
|
|
{"current_steps": 7520, "total_steps": 37885, "loss": 0.0683, "lr": 1.94152078225632e-06, "epoch": 0.9924772337336677, "percentage": 19.85, "elapsed_time": "0:10:26", "remaining_time": "0:42:10", "throughput": 5900.18, "total_tokens": 3697856}
|
|
{"current_steps": 7525, "total_steps": 37885, "loss": 0.0825, "lr": 1.9413654478475415e-06, "epoch": 0.9931371255114161, "percentage": 19.86, "elapsed_time": "0:10:27", "remaining_time": "0:42:09", "throughput": 5900.87, "total_tokens": 3700224}
|
|
{"current_steps": 7530, "total_steps": 37885, "loss": 0.1488, "lr": 1.941209913640764e-06, "epoch": 0.9937970172891646, "percentage": 19.88, "elapsed_time": "0:10:27", "remaining_time": "0:42:09", "throughput": 5901.57, "total_tokens": 3702592}
|
|
{"current_steps": 7535, "total_steps": 37885, "loss": 0.1055, "lr": 1.9410541796689975e-06, "epoch": 0.994456909066913, "percentage": 19.89, "elapsed_time": "0:10:27", "remaining_time": "0:42:08", "throughput": 5902.19, "total_tokens": 3704896}
|
|
{"current_steps": 7540, "total_steps": 37885, "loss": 0.0121, "lr": 1.9408982459652963e-06, "epoch": 0.9951168008446615, "percentage": 19.9, "elapsed_time": "0:10:28", "remaining_time": "0:42:07", "throughput": 5902.89, "total_tokens": 3707264}
|
|
{"current_steps": 7545, "total_steps": 37885, "loss": 0.0858, "lr": 1.940742112562756e-06, "epoch": 0.99577669262241, "percentage": 19.92, "elapsed_time": "0:10:28", "remaining_time": "0:42:06", "throughput": 5903.84, "total_tokens": 3709824}
|
|
{"current_steps": 7550, "total_steps": 37885, "loss": 0.1293, "lr": 1.9405857794945142e-06, "epoch": 0.9964365844001584, "percentage": 19.93, "elapsed_time": "0:10:28", "remaining_time": "0:42:06", "throughput": 5904.53, "total_tokens": 3712192}
|
|
{"current_steps": 7555, "total_steps": 37885, "loss": 0.0699, "lr": 1.9404292467937525e-06, "epoch": 0.9970964761779069, "percentage": 19.94, "elapsed_time": "0:10:29", "remaining_time": "0:42:05", "throughput": 5905.67, "total_tokens": 3714880}
|
|
{"current_steps": 7560, "total_steps": 37885, "loss": 0.0584, "lr": 1.9402725144936926e-06, "epoch": 0.9977563679556553, "percentage": 19.96, "elapsed_time": "0:10:29", "remaining_time": "0:42:04", "throughput": 5906.25, "total_tokens": 3717184}
|
|
{"current_steps": 7565, "total_steps": 37885, "loss": 0.0402, "lr": 1.940115582627601e-06, "epoch": 0.9984162597334038, "percentage": 19.97, "elapsed_time": "0:10:29", "remaining_time": "0:42:03", "throughput": 5906.76, "total_tokens": 3719424}
|
|
{"current_steps": 7570, "total_steps": 37885, "loss": 0.0668, "lr": 1.9399584512287842e-06, "epoch": 0.9990761515111521, "percentage": 19.98, "elapsed_time": "0:10:30", "remaining_time": "0:42:02", "throughput": 5907.64, "total_tokens": 3721920}
|
|
{"current_steps": 7575, "total_steps": 37885, "loss": 0.123, "lr": 1.939801120330593e-06, "epoch": 0.9997360432889006, "percentage": 19.99, "elapsed_time": "0:10:30", "remaining_time": "0:42:02", "throughput": 5908.35, "total_tokens": 3724288}
|
|
{"current_steps": 7580, "total_steps": 37885, "loss": 0.0006, "lr": 1.9396435899664198e-06, "epoch": 1.0003959350666491, "percentage": 20.01, "elapsed_time": "0:10:30", "remaining_time": "0:42:01", "throughput": 5908.03, "total_tokens": 3726464}
|
|
{"current_steps": 7580, "total_steps": 37885, "eval_loss": 0.11427787691354752, "epoch": 1.0003959350666491, "percentage": 20.01, "elapsed_time": "0:10:38", "remaining_time": "0:42:32", "throughput": 5835.73, "total_tokens": 3726464}
|
|
{"current_steps": 7585, "total_steps": 37885, "loss": 0.0614, "lr": 1.9394858601696986e-06, "epoch": 1.0010558268443974, "percentage": 20.02, "elapsed_time": "0:11:14", "remaining_time": "0:44:55", "throughput": 5525.45, "total_tokens": 3728960}
|
|
{"current_steps": 7590, "total_steps": 37885, "loss": 0.0011, "lr": 1.9393279309739067e-06, "epoch": 1.001715718622146, "percentage": 20.03, "elapsed_time": "0:11:15", "remaining_time": "0:44:55", "throughput": 5526.63, "total_tokens": 3731648}
|
|
{"current_steps": 7595, "total_steps": 37885, "loss": 0.0326, "lr": 1.939169802412564e-06, "epoch": 1.0023756103998944, "percentage": 20.05, "elapsed_time": "0:11:15", "remaining_time": "0:44:54", "throughput": 5527.54, "total_tokens": 3734144}
|
|
{"current_steps": 7600, "total_steps": 37885, "loss": 0.1584, "lr": 1.939011474519231e-06, "epoch": 1.003035502177643, "percentage": 20.06, "elapsed_time": "0:11:15", "remaining_time": "0:44:53", "throughput": 5528.55, "total_tokens": 3736704}
|
|
{"current_steps": 7605, "total_steps": 37885, "loss": 0.1347, "lr": 1.938852947327513e-06, "epoch": 1.0036953939553914, "percentage": 20.07, "elapsed_time": "0:11:16", "remaining_time": "0:44:52", "throughput": 5529.65, "total_tokens": 3739328}
|
|
{"current_steps": 7610, "total_steps": 37885, "loss": 0.1089, "lr": 1.938694220871055e-06, "epoch": 1.0043552857331397, "percentage": 20.09, "elapsed_time": "0:11:16", "remaining_time": "0:44:51", "throughput": 5530.51, "total_tokens": 3741760}
|
|
{"current_steps": 7615, "total_steps": 37885, "loss": 0.0551, "lr": 1.938535295183547e-06, "epoch": 1.0050151775108882, "percentage": 20.1, "elapsed_time": "0:11:16", "remaining_time": "0:44:50", "throughput": 5531.64, "total_tokens": 3744384}
|
|
{"current_steps": 7620, "total_steps": 37885, "loss": 0.1567, "lr": 1.938376170298718e-06, "epoch": 1.0056750692886367, "percentage": 20.11, "elapsed_time": "0:11:17", "remaining_time": "0:44:49", "throughput": 5532.46, "total_tokens": 3746816}
|
|
{"current_steps": 7625, "total_steps": 37885, "loss": 0.0361, "lr": 1.9382168462503425e-06, "epoch": 1.0063349610663852, "percentage": 20.13, "elapsed_time": "0:11:17", "remaining_time": "0:44:48", "throughput": 5533.66, "total_tokens": 3749504}
|
|
{"current_steps": 7630, "total_steps": 37885, "loss": 0.052, "lr": 1.9380573230722354e-06, "epoch": 1.0069948528441335, "percentage": 20.14, "elapsed_time": "0:11:17", "remaining_time": "0:44:48", "throughput": 5534.51, "total_tokens": 3751936}
|
|
{"current_steps": 7635, "total_steps": 37885, "loss": 0.0017, "lr": 1.9378976007982543e-06, "epoch": 1.007654744621882, "percentage": 20.15, "elapsed_time": "0:11:18", "remaining_time": "0:44:47", "throughput": 5535.36, "total_tokens": 3754368}
|
|
{"current_steps": 7640, "total_steps": 37885, "loss": 0.0029, "lr": 1.9377376794622992e-06, "epoch": 1.0083146363996305, "percentage": 20.17, "elapsed_time": "0:11:18", "remaining_time": "0:44:46", "throughput": 5536.1, "total_tokens": 3756736}
|
|
{"current_steps": 7645, "total_steps": 37885, "loss": 0.0945, "lr": 1.937577559098312e-06, "epoch": 1.008974528177379, "percentage": 20.18, "elapsed_time": "0:11:18", "remaining_time": "0:44:45", "throughput": 5537.25, "total_tokens": 3759360}
|
|
{"current_steps": 7650, "total_steps": 37885, "loss": 0.0614, "lr": 1.9374172397402774e-06, "epoch": 1.0096344199551273, "percentage": 20.19, "elapsed_time": "0:11:19", "remaining_time": "0:44:44", "throughput": 5537.78, "total_tokens": 3761536}
|
|
{"current_steps": 7655, "total_steps": 37885, "loss": 0.1333, "lr": 1.937256721422222e-06, "epoch": 1.0102943117328758, "percentage": 20.21, "elapsed_time": "0:11:19", "remaining_time": "0:44:43", "throughput": 5538.68, "total_tokens": 3763968}
|
|
{"current_steps": 7660, "total_steps": 37885, "loss": 0.1473, "lr": 1.9370960041782144e-06, "epoch": 1.0109542035106243, "percentage": 20.22, "elapsed_time": "0:11:19", "remaining_time": "0:44:42", "throughput": 5539.49, "total_tokens": 3766336}
|
|
{"current_steps": 7665, "total_steps": 37885, "loss": 0.1232, "lr": 1.936935088042366e-06, "epoch": 1.0116140952883728, "percentage": 20.23, "elapsed_time": "0:11:20", "remaining_time": "0:44:41", "throughput": 5540.46, "total_tokens": 3768832}
|
|
{"current_steps": 7670, "total_steps": 37885, "loss": 0.066, "lr": 1.9367739730488295e-06, "epoch": 1.0122739870661213, "percentage": 20.25, "elapsed_time": "0:11:20", "remaining_time": "0:44:40", "throughput": 5541.38, "total_tokens": 3771264}
|
|
{"current_steps": 7675, "total_steps": 37885, "loss": 0.0628, "lr": 1.9366126592318012e-06, "epoch": 1.0129338788438695, "percentage": 20.26, "elapsed_time": "0:11:20", "remaining_time": "0:44:40", "throughput": 5542.36, "total_tokens": 3773760}
|
|
{"current_steps": 7680, "total_steps": 37885, "loss": 0.069, "lr": 1.936451146625518e-06, "epoch": 1.013593770621618, "percentage": 20.27, "elapsed_time": "0:11:21", "remaining_time": "0:44:39", "throughput": 5543.07, "total_tokens": 3776064}
|
|
{"current_steps": 7685, "total_steps": 37885, "loss": 0.0009, "lr": 1.9362894352642606e-06, "epoch": 1.0142536623993665, "percentage": 20.29, "elapsed_time": "0:11:21", "remaining_time": "0:44:38", "throughput": 5543.97, "total_tokens": 3778496}
|
|
{"current_steps": 7690, "total_steps": 37885, "loss": 0.0102, "lr": 1.9361275251823507e-06, "epoch": 1.014913554177115, "percentage": 20.3, "elapsed_time": "0:11:21", "remaining_time": "0:44:37", "throughput": 5544.87, "total_tokens": 3780928}
|
|
{"current_steps": 7695, "total_steps": 37885, "loss": 0.1457, "lr": 1.935965416414152e-06, "epoch": 1.0155734459548633, "percentage": 20.31, "elapsed_time": "0:11:22", "remaining_time": "0:44:36", "throughput": 5545.74, "total_tokens": 3783360}
|
|
{"current_steps": 7700, "total_steps": 37885, "loss": 0.0004, "lr": 1.935803108994072e-06, "epoch": 1.0162333377326118, "percentage": 20.32, "elapsed_time": "0:11:22", "remaining_time": "0:44:35", "throughput": 5546.46, "total_tokens": 3785664}
|
|
{"current_steps": 7705, "total_steps": 37885, "loss": 0.0002, "lr": 1.9356406029565584e-06, "epoch": 1.0168932295103603, "percentage": 20.34, "elapsed_time": "0:11:22", "remaining_time": "0:44:34", "throughput": 5547.6, "total_tokens": 3788288}
|
|
{"current_steps": 7710, "total_steps": 37885, "loss": 0.2777, "lr": 1.935477898336102e-06, "epoch": 1.0175531212881088, "percentage": 20.35, "elapsed_time": "0:11:23", "remaining_time": "0:44:33", "throughput": 5548.6, "total_tokens": 3790784}
|
|
{"current_steps": 7715, "total_steps": 37885, "loss": 0.0015, "lr": 1.935314995167236e-06, "epoch": 1.018213013065857, "percentage": 20.36, "elapsed_time": "0:11:23", "remaining_time": "0:44:32", "throughput": 5549.43, "total_tokens": 3793152}
|
|
{"current_steps": 7720, "total_steps": 37885, "loss": 0.0003, "lr": 1.9351518934845355e-06, "epoch": 1.0188729048436056, "percentage": 20.38, "elapsed_time": "0:11:23", "remaining_time": "0:44:32", "throughput": 5550.48, "total_tokens": 3795712}
|
|
{"current_steps": 7725, "total_steps": 37885, "loss": 0.0478, "lr": 1.934988593322617e-06, "epoch": 1.019532796621354, "percentage": 20.39, "elapsed_time": "0:11:24", "remaining_time": "0:44:31", "throughput": 5551.3, "total_tokens": 3798080}
|
|
{"current_steps": 7730, "total_steps": 37885, "loss": 0.1037, "lr": 1.934825094716141e-06, "epoch": 1.0201926883991026, "percentage": 20.4, "elapsed_time": "0:11:24", "remaining_time": "0:44:30", "throughput": 5552.35, "total_tokens": 3800640}
|
|
{"current_steps": 7735, "total_steps": 37885, "loss": 0.1827, "lr": 1.9346613976998075e-06, "epoch": 1.020852580176851, "percentage": 20.42, "elapsed_time": "0:11:24", "remaining_time": "0:44:29", "throughput": 5553.09, "total_tokens": 3802944}
|
|
{"current_steps": 7740, "total_steps": 37885, "loss": 0.0798, "lr": 1.9344975023083606e-06, "epoch": 1.0215124719545994, "percentage": 20.43, "elapsed_time": "0:11:25", "remaining_time": "0:44:28", "throughput": 5554.15, "total_tokens": 3805504}
|
|
{"current_steps": 7745, "total_steps": 37885, "loss": 0.1328, "lr": 1.9343334085765862e-06, "epoch": 1.0221723637323479, "percentage": 20.44, "elapsed_time": "0:11:25", "remaining_time": "0:44:27", "throughput": 5554.78, "total_tokens": 3807744}
|
|
{"current_steps": 7750, "total_steps": 37885, "loss": 0.028, "lr": 1.9341691165393116e-06, "epoch": 1.0228322555100964, "percentage": 20.46, "elapsed_time": "0:11:25", "remaining_time": "0:44:26", "throughput": 5555.54, "total_tokens": 3810112}
|
|
{"current_steps": 7755, "total_steps": 37885, "loss": 0.1645, "lr": 1.9340046262314065e-06, "epoch": 1.0234921472878449, "percentage": 20.47, "elapsed_time": "0:11:26", "remaining_time": "0:44:25", "throughput": 5556.51, "total_tokens": 3812608}
|
|
{"current_steps": 7760, "total_steps": 37885, "loss": 0.0013, "lr": 1.9338399376877835e-06, "epoch": 1.0241520390655932, "percentage": 20.48, "elapsed_time": "0:11:26", "remaining_time": "0:44:24", "throughput": 5557.2, "total_tokens": 3814912}
|
|
{"current_steps": 7765, "total_steps": 37885, "loss": 0.0006, "lr": 1.9336750509433958e-06, "epoch": 1.0248119308433417, "percentage": 20.5, "elapsed_time": "0:11:26", "remaining_time": "0:44:24", "throughput": 5558.44, "total_tokens": 3817600}
|
|
{"current_steps": 7770, "total_steps": 37885, "loss": 0.1336, "lr": 1.93350996603324e-06, "epoch": 1.0254718226210902, "percentage": 20.51, "elapsed_time": "0:11:27", "remaining_time": "0:44:23", "throughput": 5559.14, "total_tokens": 3819904}
|
|
{"current_steps": 7775, "total_steps": 37885, "loss": 0.0452, "lr": 1.933344682992353e-06, "epoch": 1.0261317143988387, "percentage": 20.52, "elapsed_time": "0:11:27", "remaining_time": "0:44:22", "throughput": 5559.89, "total_tokens": 3822272}
|
|
{"current_steps": 7780, "total_steps": 37885, "loss": 0.1192, "lr": 1.9331792018558165e-06, "epoch": 1.026791606176587, "percentage": 20.54, "elapsed_time": "0:11:27", "remaining_time": "0:44:21", "throughput": 5560.5, "total_tokens": 3824512}
|
|
{"current_steps": 7785, "total_steps": 37885, "loss": 0.0616, "lr": 1.933013522658752e-06, "epoch": 1.0274514979543354, "percentage": 20.55, "elapsed_time": "0:11:28", "remaining_time": "0:44:20", "throughput": 5561.55, "total_tokens": 3827072}
|
|
{"current_steps": 7790, "total_steps": 37885, "loss": 0.1669, "lr": 1.9328476454363235e-06, "epoch": 1.028111389732084, "percentage": 20.56, "elapsed_time": "0:11:28", "remaining_time": "0:44:19", "throughput": 5562.62, "total_tokens": 3829632}
|
|
{"current_steps": 7795, "total_steps": 37885, "loss": 0.0014, "lr": 1.932681570223737e-06, "epoch": 1.0287712815098324, "percentage": 20.58, "elapsed_time": "0:11:28", "remaining_time": "0:44:18", "throughput": 5563.42, "total_tokens": 3832000}
|
|
{"current_steps": 7800, "total_steps": 37885, "loss": 0.0993, "lr": 1.9325152970562418e-06, "epoch": 1.029431173287581, "percentage": 20.59, "elapsed_time": "0:11:29", "remaining_time": "0:44:17", "throughput": 5564.55, "total_tokens": 3834624}
|
|
{"current_steps": 7805, "total_steps": 37885, "loss": 0.1106, "lr": 1.9323488259691273e-06, "epoch": 1.0300910650653292, "percentage": 20.6, "elapsed_time": "0:11:29", "remaining_time": "0:44:17", "throughput": 5565.34, "total_tokens": 3836992}
|
|
{"current_steps": 7810, "total_steps": 37885, "loss": 0.1535, "lr": 1.932182156997726e-06, "epoch": 1.0307509568430777, "percentage": 20.62, "elapsed_time": "0:11:29", "remaining_time": "0:44:16", "throughput": 5566.34, "total_tokens": 3839488}
|
|
{"current_steps": 7815, "total_steps": 37885, "loss": 0.0557, "lr": 1.9320152901774124e-06, "epoch": 1.0314108486208262, "percentage": 20.63, "elapsed_time": "0:11:30", "remaining_time": "0:44:15", "throughput": 5567.46, "total_tokens": 3842112}
|
|
{"current_steps": 7820, "total_steps": 37885, "loss": 0.0745, "lr": 1.9318482255436022e-06, "epoch": 1.0320707403985747, "percentage": 20.64, "elapsed_time": "0:11:30", "remaining_time": "0:44:14", "throughput": 5567.99, "total_tokens": 3844288}
|
|
{"current_steps": 7825, "total_steps": 37885, "loss": 0.1813, "lr": 1.9316809631317544e-06, "epoch": 1.032730632176323, "percentage": 20.65, "elapsed_time": "0:11:30", "remaining_time": "0:44:13", "throughput": 5568.79, "total_tokens": 3846656}
|
|
{"current_steps": 7830, "total_steps": 37885, "loss": 0.0014, "lr": 1.931513502977369e-06, "epoch": 1.0333905239540715, "percentage": 20.67, "elapsed_time": "0:11:31", "remaining_time": "0:44:12", "throughput": 5570.0, "total_tokens": 3849344}
|
|
{"current_steps": 7835, "total_steps": 37885, "loss": 0.0637, "lr": 1.931345845115988e-06, "epoch": 1.03405041573182, "percentage": 20.68, "elapsed_time": "0:11:31", "remaining_time": "0:44:11", "throughput": 5570.81, "total_tokens": 3851712}
|
|
{"current_steps": 7840, "total_steps": 37885, "loss": 0.0103, "lr": 1.931177989583195e-06, "epoch": 1.0347103075095685, "percentage": 20.69, "elapsed_time": "0:11:31", "remaining_time": "0:44:10", "throughput": 5571.59, "total_tokens": 3854080}
|
|
{"current_steps": 7845, "total_steps": 37885, "loss": 0.063, "lr": 1.9310099364146174e-06, "epoch": 1.0353701992873168, "percentage": 20.71, "elapsed_time": "0:11:32", "remaining_time": "0:44:10", "throughput": 5572.55, "total_tokens": 3856576}
|
|
{"current_steps": 7850, "total_steps": 37885, "loss": 0.1425, "lr": 1.930841685645922e-06, "epoch": 1.0360300910650653, "percentage": 20.72, "elapsed_time": "0:11:32", "remaining_time": "0:44:09", "throughput": 5573.44, "total_tokens": 3859008}
|
|
{"current_steps": 7855, "total_steps": 37885, "loss": 0.0006, "lr": 1.93067323731282e-06, "epoch": 1.0366899828428138, "percentage": 20.73, "elapsed_time": "0:11:32", "remaining_time": "0:44:08", "throughput": 5574.22, "total_tokens": 3861376}
|
|
{"current_steps": 7860, "total_steps": 37885, "loss": 0.0849, "lr": 1.930504591451063e-06, "epoch": 1.0373498746205623, "percentage": 20.75, "elapsed_time": "0:11:33", "remaining_time": "0:44:07", "throughput": 5575.21, "total_tokens": 3863872}
|
|
{"current_steps": 7865, "total_steps": 37885, "loss": 0.073, "lr": 1.9303357480964445e-06, "epoch": 1.0380097663983108, "percentage": 20.76, "elapsed_time": "0:11:33", "remaining_time": "0:44:06", "throughput": 5576.08, "total_tokens": 3866304}
|
|
{"current_steps": 7870, "total_steps": 37885, "loss": 0.0006, "lr": 1.9301667072848002e-06, "epoch": 1.038669658176059, "percentage": 20.77, "elapsed_time": "0:11:33", "remaining_time": "0:44:05", "throughput": 5577.13, "total_tokens": 3868864}
|
|
{"current_steps": 7875, "total_steps": 37885, "loss": 0.0896, "lr": 1.929997469052008e-06, "epoch": 1.0393295499538076, "percentage": 20.79, "elapsed_time": "0:11:34", "remaining_time": "0:44:04", "throughput": 5577.57, "total_tokens": 3870976}
|
|
{"current_steps": 7880, "total_steps": 37885, "loss": 0.132, "lr": 1.929828033433988e-06, "epoch": 1.039989441731556, "percentage": 20.8, "elapsed_time": "0:11:34", "remaining_time": "0:44:03", "throughput": 5578.44, "total_tokens": 3873408}
|
|
{"current_steps": 7885, "total_steps": 37885, "loss": 0.2669, "lr": 1.9296584004667005e-06, "epoch": 1.0406493335093046, "percentage": 20.81, "elapsed_time": "0:11:34", "remaining_time": "0:44:03", "throughput": 5579.21, "total_tokens": 3875776}
|
|
{"current_steps": 7890, "total_steps": 37885, "loss": 0.0612, "lr": 1.92948857018615e-06, "epoch": 1.0413092252870528, "percentage": 20.83, "elapsed_time": "0:11:35", "remaining_time": "0:44:02", "throughput": 5579.95, "total_tokens": 3878144}
|
|
{"current_steps": 7895, "total_steps": 37885, "loss": 0.1703, "lr": 1.929318542628381e-06, "epoch": 1.0419691170648013, "percentage": 20.84, "elapsed_time": "0:11:35", "remaining_time": "0:44:01", "throughput": 5580.74, "total_tokens": 3880512}
|
|
{"current_steps": 7900, "total_steps": 37885, "loss": 0.0021, "lr": 1.9291483178294813e-06, "epoch": 1.0426290088425498, "percentage": 20.85, "elapsed_time": "0:11:35", "remaining_time": "0:44:00", "throughput": 5581.51, "total_tokens": 3882880}
|
|
{"current_steps": 7905, "total_steps": 37885, "loss": 0.0301, "lr": 1.928977895825579e-06, "epoch": 1.0432889006202983, "percentage": 20.87, "elapsed_time": "0:11:35", "remaining_time": "0:43:59", "throughput": 5582.36, "total_tokens": 3885312}
|
|
{"current_steps": 7910, "total_steps": 37885, "loss": 0.0624, "lr": 1.928807276652846e-06, "epoch": 1.0439487923980466, "percentage": 20.88, "elapsed_time": "0:11:36", "remaining_time": "0:43:58", "throughput": 5583.22, "total_tokens": 3887744}
|
|
{"current_steps": 7915, "total_steps": 37885, "loss": 0.0541, "lr": 1.928636460347494e-06, "epoch": 1.044608684175795, "percentage": 20.89, "elapsed_time": "0:11:36", "remaining_time": "0:43:57", "throughput": 5583.91, "total_tokens": 3890048}
|
|
{"current_steps": 7920, "total_steps": 37885, "loss": 0.0159, "lr": 1.928465446945778e-06, "epoch": 1.0452685759535436, "percentage": 20.91, "elapsed_time": "0:11:36", "remaining_time": "0:43:56", "throughput": 5584.77, "total_tokens": 3892480}
|
|
{"current_steps": 7925, "total_steps": 37885, "loss": 0.0013, "lr": 1.9282942364839947e-06, "epoch": 1.045928467731292, "percentage": 20.92, "elapsed_time": "0:11:37", "remaining_time": "0:43:56", "throughput": 5585.45, "total_tokens": 3894784}
|
|
{"current_steps": 7930, "total_steps": 37885, "loss": 0.0775, "lr": 1.9281228289984816e-06, "epoch": 1.0465883595090406, "percentage": 20.93, "elapsed_time": "0:11:37", "remaining_time": "0:43:55", "throughput": 5586.63, "total_tokens": 3897472}
|
|
{"current_steps": 7935, "total_steps": 37885, "loss": 0.078, "lr": 1.927951224525619e-06, "epoch": 1.047248251286789, "percentage": 20.94, "elapsed_time": "0:11:37", "remaining_time": "0:43:54", "throughput": 5587.66, "total_tokens": 3900032}
|
|
{"current_steps": 7940, "total_steps": 37885, "loss": 0.0335, "lr": 1.9277794231018286e-06, "epoch": 1.0479081430645374, "percentage": 20.96, "elapsed_time": "0:11:38", "remaining_time": "0:43:53", "throughput": 5588.7, "total_tokens": 3902592}
|
|
{"current_steps": 7945, "total_steps": 37885, "loss": 0.0915, "lr": 1.927607424763574e-06, "epoch": 1.048568034842286, "percentage": 20.97, "elapsed_time": "0:11:38", "remaining_time": "0:43:52", "throughput": 5589.38, "total_tokens": 3904896}
|
|
{"current_steps": 7950, "total_steps": 37885, "loss": 0.0017, "lr": 1.927435229547361e-06, "epoch": 1.0492279266200344, "percentage": 20.98, "elapsed_time": "0:11:38", "remaining_time": "0:43:51", "throughput": 5590.05, "total_tokens": 3907200}
|
|
{"current_steps": 7955, "total_steps": 37885, "loss": 0.0768, "lr": 1.9272628374897366e-06, "epoch": 1.0498878183977827, "percentage": 21.0, "elapsed_time": "0:11:39", "remaining_time": "0:43:51", "throughput": 5591.21, "total_tokens": 3909888}
|
|
{"current_steps": 7960, "total_steps": 37885, "loss": 0.0445, "lr": 1.9270902486272892e-06, "epoch": 1.0505477101755312, "percentage": 21.01, "elapsed_time": "0:11:39", "remaining_time": "0:43:50", "throughput": 5592.01, "total_tokens": 3912320}
|
|
{"current_steps": 7965, "total_steps": 37885, "loss": 0.1083, "lr": 1.92691746299665e-06, "epoch": 1.0512076019532797, "percentage": 21.02, "elapsed_time": "0:11:39", "remaining_time": "0:43:49", "throughput": 5593.07, "total_tokens": 3914944}
|
|
{"current_steps": 7970, "total_steps": 37885, "loss": 0.0833, "lr": 1.9267444806344917e-06, "epoch": 1.0518674937310282, "percentage": 21.04, "elapsed_time": "0:11:40", "remaining_time": "0:43:48", "throughput": 5593.87, "total_tokens": 3917376}
|
|
{"current_steps": 7975, "total_steps": 37885, "loss": 0.0865, "lr": 1.9265713015775285e-06, "epoch": 1.0525273855087764, "percentage": 21.05, "elapsed_time": "0:11:40", "remaining_time": "0:43:47", "throughput": 5594.79, "total_tokens": 3919872}
|
|
{"current_steps": 7980, "total_steps": 37885, "loss": 0.1148, "lr": 1.926397925862516e-06, "epoch": 1.053187277286525, "percentage": 21.06, "elapsed_time": "0:11:40", "remaining_time": "0:43:46", "throughput": 5595.68, "total_tokens": 3922368}
|
|
{"current_steps": 7985, "total_steps": 37885, "loss": 0.0591, "lr": 1.9262243535262527e-06, "epoch": 1.0538471690642734, "percentage": 21.08, "elapsed_time": "0:11:41", "remaining_time": "0:43:46", "throughput": 5596.81, "total_tokens": 3925056}
|
|
{"current_steps": 7990, "total_steps": 37885, "loss": 0.2027, "lr": 1.926050584605577e-06, "epoch": 1.054507060842022, "percentage": 21.09, "elapsed_time": "0:11:41", "remaining_time": "0:43:45", "throughput": 5597.69, "total_tokens": 3927552}
|
|
{"current_steps": 7995, "total_steps": 37885, "loss": 0.0009, "lr": 1.9258766191373706e-06, "epoch": 1.0551669526197704, "percentage": 21.1, "elapsed_time": "0:11:41", "remaining_time": "0:43:44", "throughput": 5598.77, "total_tokens": 3930176}
|
|
{"current_steps": 8000, "total_steps": 37885, "loss": 0.0005, "lr": 1.9257024571585565e-06, "epoch": 1.0558268443975187, "percentage": 21.12, "elapsed_time": "0:11:42", "remaining_time": "0:43:43", "throughput": 5599.68, "total_tokens": 3932672}
|
|
{"current_steps": 8005, "total_steps": 37885, "loss": 0.2338, "lr": 1.9255280987060995e-06, "epoch": 1.0564867361752672, "percentage": 21.13, "elapsed_time": "0:11:42", "remaining_time": "0:43:42", "throughput": 5600.22, "total_tokens": 3934912}
|
|
{"current_steps": 8010, "total_steps": 37885, "loss": 0.0681, "lr": 1.9253535438170056e-06, "epoch": 1.0571466279530157, "percentage": 21.14, "elapsed_time": "0:11:42", "remaining_time": "0:43:41", "throughput": 5600.83, "total_tokens": 3937216}
|
|
{"current_steps": 8015, "total_steps": 37885, "loss": 0.145, "lr": 1.9251787925283228e-06, "epoch": 1.0578065197307642, "percentage": 21.16, "elapsed_time": "0:11:43", "remaining_time": "0:43:41", "throughput": 5601.79, "total_tokens": 3939776}
|
|
{"current_steps": 8020, "total_steps": 37885, "loss": 0.1157, "lr": 1.925003844877141e-06, "epoch": 1.0584664115085125, "percentage": 21.17, "elapsed_time": "0:11:43", "remaining_time": "0:43:40", "throughput": 5602.18, "total_tokens": 3941888}
|
|
{"current_steps": 8025, "total_steps": 37885, "loss": 0.0852, "lr": 1.9248287009005914e-06, "epoch": 1.059126303286261, "percentage": 21.18, "elapsed_time": "0:11:43", "remaining_time": "0:43:39", "throughput": 5602.83, "total_tokens": 3944192}
|
|
{"current_steps": 8030, "total_steps": 37885, "loss": 0.0706, "lr": 1.9246533606358475e-06, "epoch": 1.0597861950640095, "percentage": 21.2, "elapsed_time": "0:11:44", "remaining_time": "0:43:38", "throughput": 5603.85, "total_tokens": 3946816}
|
|
{"current_steps": 8035, "total_steps": 37885, "loss": 0.1036, "lr": 1.9244778241201232e-06, "epoch": 1.060446086841758, "percentage": 21.21, "elapsed_time": "0:11:44", "remaining_time": "0:43:37", "throughput": 5604.89, "total_tokens": 3949440}
|
|
{"current_steps": 8040, "total_steps": 37885, "loss": 0.0058, "lr": 1.9243020913906753e-06, "epoch": 1.0611059786195065, "percentage": 21.22, "elapsed_time": "0:11:44", "remaining_time": "0:43:36", "throughput": 5605.81, "total_tokens": 3952000}
|
|
{"current_steps": 8045, "total_steps": 37885, "loss": 0.0008, "lr": 1.924126162484802e-06, "epoch": 1.0617658703972548, "percentage": 21.24, "elapsed_time": "0:11:45", "remaining_time": "0:43:36", "throughput": 5606.31, "total_tokens": 3954240}
|
|
{"current_steps": 8050, "total_steps": 37885, "loss": 0.1045, "lr": 1.9239500374398427e-06, "epoch": 1.0624257621750033, "percentage": 21.25, "elapsed_time": "0:11:45", "remaining_time": "0:43:35", "throughput": 5607.01, "total_tokens": 3956608}
|
|
{"current_steps": 8055, "total_steps": 37885, "loss": 0.0012, "lr": 1.9237737162931785e-06, "epoch": 1.0630856539527518, "percentage": 21.26, "elapsed_time": "0:11:45", "remaining_time": "0:43:34", "throughput": 5607.89, "total_tokens": 3959104}
|
|
{"current_steps": 8060, "total_steps": 37885, "loss": 0.1183, "lr": 1.9235971990822323e-06, "epoch": 1.0637455457305003, "percentage": 21.27, "elapsed_time": "0:11:46", "remaining_time": "0:43:33", "throughput": 5608.87, "total_tokens": 3961664}
|
|
{"current_steps": 8065, "total_steps": 37885, "loss": 0.0608, "lr": 1.923420485844469e-06, "epoch": 1.0644054375082486, "percentage": 21.29, "elapsed_time": "0:11:46", "remaining_time": "0:43:32", "throughput": 5609.97, "total_tokens": 3964352}
|
|
{"current_steps": 8070, "total_steps": 37885, "loss": 0.0015, "lr": 1.9232435766173944e-06, "epoch": 1.065065329285997, "percentage": 21.3, "elapsed_time": "0:11:46", "remaining_time": "0:43:32", "throughput": 5610.59, "total_tokens": 3966656}
|
|
{"current_steps": 8075, "total_steps": 37885, "loss": 0.1006, "lr": 1.9230664714385567e-06, "epoch": 1.0657252210637456, "percentage": 21.31, "elapsed_time": "0:11:47", "remaining_time": "0:43:31", "throughput": 5611.08, "total_tokens": 3968896}
|
|
{"current_steps": 8080, "total_steps": 37885, "loss": 0.028, "lr": 1.922889170345544e-06, "epoch": 1.066385112841494, "percentage": 21.33, "elapsed_time": "0:11:47", "remaining_time": "0:43:30", "throughput": 5611.83, "total_tokens": 3971328}
|
|
{"current_steps": 8085, "total_steps": 37885, "loss": 0.1022, "lr": 1.9227116733759883e-06, "epoch": 1.0670450046192423, "percentage": 21.34, "elapsed_time": "0:11:48", "remaining_time": "0:43:29", "throughput": 5612.48, "total_tokens": 3973696}
|
|
{"current_steps": 8090, "total_steps": 37885, "loss": 0.1266, "lr": 1.922533980567562e-06, "epoch": 1.0677048963969908, "percentage": 21.35, "elapsed_time": "0:11:48", "remaining_time": "0:43:28", "throughput": 5613.34, "total_tokens": 3976192}
|
|
{"current_steps": 8095, "total_steps": 37885, "loss": 0.0802, "lr": 1.9223560919579782e-06, "epoch": 1.0683647881747393, "percentage": 21.37, "elapsed_time": "0:11:48", "remaining_time": "0:43:28", "throughput": 5614.51, "total_tokens": 3978944}
|
|
{"current_steps": 8100, "total_steps": 37885, "loss": 0.0701, "lr": 1.922178007584993e-06, "epoch": 1.0690246799524878, "percentage": 21.38, "elapsed_time": "0:11:49", "remaining_time": "0:43:27", "throughput": 5615.28, "total_tokens": 3981376}
|
|
{"current_steps": 8105, "total_steps": 37885, "loss": 0.0006, "lr": 1.921999727486404e-06, "epoch": 1.0696845717302363, "percentage": 21.39, "elapsed_time": "0:11:49", "remaining_time": "0:43:26", "throughput": 5615.95, "total_tokens": 3983744}
|
|
{"current_steps": 8110, "total_steps": 37885, "loss": 0.0604, "lr": 1.9218212517000495e-06, "epoch": 1.0703444635079846, "percentage": 21.41, "elapsed_time": "0:11:49", "remaining_time": "0:43:25", "throughput": 5616.56, "total_tokens": 3986048}
|
|
{"current_steps": 8115, "total_steps": 37885, "loss": 0.0162, "lr": 1.9216425802638095e-06, "epoch": 1.0710043552857331, "percentage": 21.42, "elapsed_time": "0:11:50", "remaining_time": "0:43:24", "throughput": 5617.7, "total_tokens": 3988736}
|
|
{"current_steps": 8120, "total_steps": 37885, "loss": 0.046, "lr": 1.9214637132156056e-06, "epoch": 1.0716642470634816, "percentage": 21.43, "elapsed_time": "0:11:50", "remaining_time": "0:43:23", "throughput": 5618.69, "total_tokens": 3991360}
|
|
{"current_steps": 8125, "total_steps": 37885, "loss": 0.0479, "lr": 1.9212846505934018e-06, "epoch": 1.0723241388412301, "percentage": 21.45, "elapsed_time": "0:11:50", "remaining_time": "0:43:23", "throughput": 5619.97, "total_tokens": 3994176}
|
|
{"current_steps": 8130, "total_steps": 37885, "loss": 0.0754, "lr": 1.921105392435202e-06, "epoch": 1.0729840306189784, "percentage": 21.46, "elapsed_time": "0:11:51", "remaining_time": "0:43:22", "throughput": 5620.46, "total_tokens": 3996416}
|
|
{"current_steps": 8135, "total_steps": 37885, "loss": 0.0023, "lr": 1.9209259387790526e-06, "epoch": 1.073643922396727, "percentage": 21.47, "elapsed_time": "0:11:51", "remaining_time": "0:43:21", "throughput": 5621.4, "total_tokens": 3998976}
|
|
{"current_steps": 8140, "total_steps": 37885, "loss": 0.1178, "lr": 1.920746289663042e-06, "epoch": 1.0743038141744754, "percentage": 21.49, "elapsed_time": "0:11:51", "remaining_time": "0:43:20", "throughput": 5622.07, "total_tokens": 4001344}
|
|
{"current_steps": 8145, "total_steps": 37885, "loss": 0.0384, "lr": 1.9205664451252986e-06, "epoch": 1.074963705952224, "percentage": 21.5, "elapsed_time": "0:11:52", "remaining_time": "0:43:19", "throughput": 5622.75, "total_tokens": 4003712}
|
|
{"current_steps": 8150, "total_steps": 37885, "loss": 0.0743, "lr": 1.9203864052039935e-06, "epoch": 1.0756235977299722, "percentage": 21.51, "elapsed_time": "0:11:52", "remaining_time": "0:43:19", "throughput": 5623.53, "total_tokens": 4006144}
|
|
{"current_steps": 8155, "total_steps": 37885, "loss": 0.0612, "lr": 1.9202061699373386e-06, "epoch": 1.0762834895077207, "percentage": 21.53, "elapsed_time": "0:11:52", "remaining_time": "0:43:18", "throughput": 5624.38, "total_tokens": 4008640}
|
|
{"current_steps": 8160, "total_steps": 37885, "loss": 0.0445, "lr": 1.9200257393635878e-06, "epoch": 1.0769433812854692, "percentage": 21.54, "elapsed_time": "0:11:53", "remaining_time": "0:43:17", "throughput": 5625.67, "total_tokens": 4011456}
|
|
{"current_steps": 8165, "total_steps": 37885, "loss": 0.0008, "lr": 1.9198451135210365e-06, "epoch": 1.0776032730632177, "percentage": 21.55, "elapsed_time": "0:11:53", "remaining_time": "0:43:16", "throughput": 5626.78, "total_tokens": 4014144}
|
|
{"current_steps": 8170, "total_steps": 37885, "loss": 0.0969, "lr": 1.919664292448021e-06, "epoch": 1.0782631648409662, "percentage": 21.57, "elapsed_time": "0:11:53", "remaining_time": "0:43:15", "throughput": 5627.2, "total_tokens": 4016320}
|
|
{"current_steps": 8175, "total_steps": 37885, "loss": 0.0018, "lr": 1.9194832761829184e-06, "epoch": 1.0789230566187145, "percentage": 21.58, "elapsed_time": "0:11:54", "remaining_time": "0:43:15", "throughput": 5628.33, "total_tokens": 4019008}
|
|
{"current_steps": 8180, "total_steps": 37885, "loss": 0.1604, "lr": 1.919302064764149e-06, "epoch": 1.079582948396463, "percentage": 21.59, "elapsed_time": "0:11:54", "remaining_time": "0:43:14", "throughput": 5629.27, "total_tokens": 4021568}
|
|
{"current_steps": 8185, "total_steps": 37885, "loss": 0.0614, "lr": 1.9191206582301737e-06, "epoch": 1.0802428401742115, "percentage": 21.6, "elapsed_time": "0:11:54", "remaining_time": "0:43:13", "throughput": 5630.14, "total_tokens": 4024064}
|
|
{"current_steps": 8190, "total_steps": 37885, "loss": 0.2029, "lr": 1.9189390566194942e-06, "epoch": 1.08090273195196, "percentage": 21.62, "elapsed_time": "0:11:55", "remaining_time": "0:43:12", "throughput": 5630.9, "total_tokens": 4026496}
|
|
{"current_steps": 8195, "total_steps": 37885, "loss": 0.0022, "lr": 1.9187572599706547e-06, "epoch": 1.0815626237297082, "percentage": 21.63, "elapsed_time": "0:11:55", "remaining_time": "0:43:11", "throughput": 5631.74, "total_tokens": 4028992}
|
|
{"current_steps": 8200, "total_steps": 37885, "loss": 0.2369, "lr": 1.9185752683222395e-06, "epoch": 1.0822225155074567, "percentage": 21.64, "elapsed_time": "0:11:55", "remaining_time": "0:43:11", "throughput": 5632.32, "total_tokens": 4031296}
|
|
{"current_steps": 8205, "total_steps": 37885, "loss": 0.0804, "lr": 1.9183930817128755e-06, "epoch": 1.0828824072852052, "percentage": 21.66, "elapsed_time": "0:11:56", "remaining_time": "0:43:10", "throughput": 5633.02, "total_tokens": 4033664}
|
|
{"current_steps": 8210, "total_steps": 37885, "loss": 0.0454, "lr": 1.9182107001812303e-06, "epoch": 1.0835422990629537, "percentage": 21.67, "elapsed_time": "0:11:56", "remaining_time": "0:43:09", "throughput": 5633.85, "total_tokens": 4036160}
|
|
{"current_steps": 8215, "total_steps": 37885, "loss": 0.0698, "lr": 1.9180281237660136e-06, "epoch": 1.0842021908407022, "percentage": 21.68, "elapsed_time": "0:11:56", "remaining_time": "0:43:08", "throughput": 5634.86, "total_tokens": 4038784}
|
|
{"current_steps": 8220, "total_steps": 37885, "loss": 0.0012, "lr": 1.917845352505975e-06, "epoch": 1.0848620826184505, "percentage": 21.7, "elapsed_time": "0:11:57", "remaining_time": "0:43:07", "throughput": 5635.73, "total_tokens": 4041280}
|
|
{"current_steps": 8225, "total_steps": 37885, "loss": 0.0258, "lr": 1.917662386439907e-06, "epoch": 1.085521974396199, "percentage": 21.71, "elapsed_time": "0:11:57", "remaining_time": "0:43:07", "throughput": 5636.53, "total_tokens": 4043712}
|
|
{"current_steps": 8230, "total_steps": 37885, "loss": 0.0015, "lr": 1.9174792256066427e-06, "epoch": 1.0861818661739475, "percentage": 21.72, "elapsed_time": "0:11:57", "remaining_time": "0:43:06", "throughput": 5636.87, "total_tokens": 4045824}
|
|
{"current_steps": 8235, "total_steps": 37885, "loss": 0.0688, "lr": 1.9172958700450565e-06, "epoch": 1.086841757951696, "percentage": 21.74, "elapsed_time": "0:11:58", "remaining_time": "0:43:05", "throughput": 5637.71, "total_tokens": 4048320}
|
|
{"current_steps": 8240, "total_steps": 37885, "loss": 0.0083, "lr": 1.9171123197940647e-06, "epoch": 1.0875016497294443, "percentage": 21.75, "elapsed_time": "0:11:58", "remaining_time": "0:43:04", "throughput": 5638.46, "total_tokens": 4050688}
|
|
{"current_steps": 8245, "total_steps": 37885, "loss": 0.0006, "lr": 1.916928574892624e-06, "epoch": 1.0881615415071928, "percentage": 21.76, "elapsed_time": "0:11:58", "remaining_time": "0:43:03", "throughput": 5639.5, "total_tokens": 4053312}
|
|
{"current_steps": 8250, "total_steps": 37885, "loss": 0.0955, "lr": 1.9167446353797334e-06, "epoch": 1.0888214332849413, "percentage": 21.78, "elapsed_time": "0:11:59", "remaining_time": "0:43:02", "throughput": 5640.49, "total_tokens": 4055872}
|
|
{"current_steps": 8255, "total_steps": 37885, "loss": 0.0006, "lr": 1.9165605012944322e-06, "epoch": 1.0894813250626898, "percentage": 21.79, "elapsed_time": "0:11:59", "remaining_time": "0:43:02", "throughput": 5641.3, "total_tokens": 4058304}
|
|
{"current_steps": 8260, "total_steps": 37885, "loss": 0.1845, "lr": 1.916376172675802e-06, "epoch": 1.090141216840438, "percentage": 21.8, "elapsed_time": "0:11:59", "remaining_time": "0:43:01", "throughput": 5642.2, "total_tokens": 4060800}
|
|
{"current_steps": 8265, "total_steps": 37885, "loss": 0.1351, "lr": 1.916191649562965e-06, "epoch": 1.0908011086181866, "percentage": 21.82, "elapsed_time": "0:12:00", "remaining_time": "0:43:00", "throughput": 5643.33, "total_tokens": 4063488}
|
|
{"current_steps": 8270, "total_steps": 37885, "loss": 0.0002, "lr": 1.9160069319950844e-06, "epoch": 1.091461000395935, "percentage": 21.83, "elapsed_time": "0:12:00", "remaining_time": "0:42:59", "throughput": 5644.47, "total_tokens": 4066176}
|
|
{"current_steps": 8275, "total_steps": 37885, "loss": 0.0815, "lr": 1.915822020011366e-06, "epoch": 1.0921208921736836, "percentage": 21.84, "elapsed_time": "0:12:00", "remaining_time": "0:42:58", "throughput": 5645.6, "total_tokens": 4068864}
|
|
{"current_steps": 8280, "total_steps": 37885, "loss": 0.0007, "lr": 1.915636913651056e-06, "epoch": 1.092780783951432, "percentage": 21.86, "elapsed_time": "0:12:01", "remaining_time": "0:42:58", "throughput": 5646.49, "total_tokens": 4071360}
|
|
{"current_steps": 8285, "total_steps": 37885, "loss": 0.0924, "lr": 1.9154516129534414e-06, "epoch": 1.0934406757291804, "percentage": 21.87, "elapsed_time": "0:12:01", "remaining_time": "0:42:57", "throughput": 5647.71, "total_tokens": 4074112}
|
|
{"current_steps": 8290, "total_steps": 37885, "loss": 0.0658, "lr": 1.915266117957851e-06, "epoch": 1.0941005675069289, "percentage": 21.88, "elapsed_time": "0:12:01", "remaining_time": "0:42:56", "throughput": 5648.43, "total_tokens": 4076480}
|
|
{"current_steps": 8295, "total_steps": 37885, "loss": 0.0017, "lr": 1.915080428703655e-06, "epoch": 1.0947604592846774, "percentage": 21.9, "elapsed_time": "0:12:02", "remaining_time": "0:42:55", "throughput": 5649.32, "total_tokens": 4078976}
|
|
{"current_steps": 8300, "total_steps": 37885, "loss": 0.1423, "lr": 1.9148945452302647e-06, "epoch": 1.0954203510624259, "percentage": 21.91, "elapsed_time": "0:12:02", "remaining_time": "0:42:54", "throughput": 5650.44, "total_tokens": 4081664}
|
|
{"current_steps": 8305, "total_steps": 37885, "loss": 0.1065, "lr": 1.9147084675771322e-06, "epoch": 1.0960802428401741, "percentage": 21.92, "elapsed_time": "0:12:02", "remaining_time": "0:42:54", "throughput": 5651.49, "total_tokens": 4084288}
|
|
{"current_steps": 8310, "total_steps": 37885, "loss": 0.0763, "lr": 1.9145221957837513e-06, "epoch": 1.0967401346179226, "percentage": 21.93, "elapsed_time": "0:12:03", "remaining_time": "0:42:53", "throughput": 5652.22, "total_tokens": 4086656}
|
|
{"current_steps": 8315, "total_steps": 37885, "loss": 0.066, "lr": 1.9143357298896564e-06, "epoch": 1.0974000263956711, "percentage": 21.95, "elapsed_time": "0:12:03", "remaining_time": "0:42:52", "throughput": 5653.09, "total_tokens": 4089152}
|
|
{"current_steps": 8320, "total_steps": 37885, "loss": 0.0539, "lr": 1.9141490699344243e-06, "epoch": 1.0980599181734196, "percentage": 21.96, "elapsed_time": "0:12:03", "remaining_time": "0:42:51", "throughput": 5653.89, "total_tokens": 4091584}
|
|
{"current_steps": 8325, "total_steps": 37885, "loss": 0.0029, "lr": 1.913962215957672e-06, "epoch": 1.098719809951168, "percentage": 21.97, "elapsed_time": "0:12:04", "remaining_time": "0:42:50", "throughput": 5654.51, "total_tokens": 4093888}
|
|
{"current_steps": 8330, "total_steps": 37885, "loss": 0.0022, "lr": 1.9137751679990576e-06, "epoch": 1.0993797017289164, "percentage": 21.99, "elapsed_time": "0:12:04", "remaining_time": "0:42:49", "throughput": 5655.24, "total_tokens": 4096256}
|
|
{"current_steps": 8335, "total_steps": 37885, "loss": 0.1069, "lr": 1.9135879260982806e-06, "epoch": 1.100039593506665, "percentage": 22.0, "elapsed_time": "0:12:04", "remaining_time": "0:42:49", "throughput": 5656.19, "total_tokens": 4098816}
|
|
{"current_steps": 8340, "total_steps": 37885, "loss": 0.1271, "lr": 1.9134004902950826e-06, "epoch": 1.1006994852844134, "percentage": 22.01, "elapsed_time": "0:12:04", "remaining_time": "0:42:48", "throughput": 5656.93, "total_tokens": 4101184}
|
|
{"current_steps": 8345, "total_steps": 37885, "loss": 0.1017, "lr": 1.913212860629244e-06, "epoch": 1.101359377062162, "percentage": 22.03, "elapsed_time": "0:12:05", "remaining_time": "0:42:47", "throughput": 5657.54, "total_tokens": 4103488}
|
|
{"current_steps": 8350, "total_steps": 37885, "loss": 0.1092, "lr": 1.9130250371405895e-06, "epoch": 1.1020192688399102, "percentage": 22.04, "elapsed_time": "0:12:05", "remaining_time": "0:42:46", "throughput": 5658.17, "total_tokens": 4105792}
|
|
{"current_steps": 8355, "total_steps": 37885, "loss": 0.0289, "lr": 1.912837019868982e-06, "epoch": 1.1026791606176587, "percentage": 22.05, "elapsed_time": "0:12:05", "remaining_time": "0:42:45", "throughput": 5659.19, "total_tokens": 4108416}
|
|
{"current_steps": 8360, "total_steps": 37885, "loss": 0.0011, "lr": 1.9126488088543273e-06, "epoch": 1.1033390523954072, "percentage": 22.07, "elapsed_time": "0:12:06", "remaining_time": "0:42:45", "throughput": 5660.16, "total_tokens": 4110976}
|
|
{"current_steps": 8365, "total_steps": 37885, "loss": 0.1147, "lr": 1.912460404136572e-06, "epoch": 1.1039989441731557, "percentage": 22.08, "elapsed_time": "0:12:06", "remaining_time": "0:42:44", "throughput": 5661.08, "total_tokens": 4113536}
|
|
{"current_steps": 8370, "total_steps": 37885, "loss": 0.1414, "lr": 1.912271805755703e-06, "epoch": 1.104658835950904, "percentage": 22.09, "elapsed_time": "0:12:06", "remaining_time": "0:42:43", "throughput": 5661.72, "total_tokens": 4115840}
|
|
{"current_steps": 8375, "total_steps": 37885, "loss": 0.0527, "lr": 1.9120830137517498e-06, "epoch": 1.1053187277286525, "percentage": 22.11, "elapsed_time": "0:12:07", "remaining_time": "0:42:42", "throughput": 5662.43, "total_tokens": 4118208}
|
|
{"current_steps": 8380, "total_steps": 37885, "loss": 0.0102, "lr": 1.9118940281647816e-06, "epoch": 1.105978619506401, "percentage": 22.12, "elapsed_time": "0:12:07", "remaining_time": "0:42:41", "throughput": 5662.96, "total_tokens": 4120448}
|
|
{"current_steps": 8385, "total_steps": 37885, "loss": 0.1928, "lr": 1.9117048490349096e-06, "epoch": 1.1066385112841495, "percentage": 22.13, "elapsed_time": "0:12:07", "remaining_time": "0:42:41", "throughput": 5663.34, "total_tokens": 4122560}
|
|
{"current_steps": 8390, "total_steps": 37885, "loss": 0.0306, "lr": 1.9115154764022852e-06, "epoch": 1.1072984030618978, "percentage": 22.15, "elapsed_time": "0:12:08", "remaining_time": "0:42:40", "throughput": 5664.28, "total_tokens": 4125120}
|
|
{"current_steps": 8395, "total_steps": 37885, "loss": 0.0867, "lr": 1.9113259103071015e-06, "epoch": 1.1079582948396463, "percentage": 22.16, "elapsed_time": "0:12:08", "remaining_time": "0:42:39", "throughput": 5664.8, "total_tokens": 4127360}
|
|
{"current_steps": 8400, "total_steps": 37885, "loss": 0.1242, "lr": 1.9111361507895925e-06, "epoch": 1.1086181866173948, "percentage": 22.17, "elapsed_time": "0:12:08", "remaining_time": "0:42:38", "throughput": 5665.44, "total_tokens": 4129664}
|
|
{"current_steps": 8405, "total_steps": 37885, "loss": 0.082, "lr": 1.9109461978900342e-06, "epoch": 1.1092780783951433, "percentage": 22.19, "elapsed_time": "0:12:09", "remaining_time": "0:42:37", "throughput": 5666.16, "total_tokens": 4132032}
|
|
{"current_steps": 8410, "total_steps": 37885, "loss": 0.0745, "lr": 1.910756051648741e-06, "epoch": 1.1099379701728918, "percentage": 22.2, "elapsed_time": "0:12:09", "remaining_time": "0:42:36", "throughput": 5667.04, "total_tokens": 4134528}
|
|
{"current_steps": 8415, "total_steps": 37885, "loss": 0.001, "lr": 1.9105657121060715e-06, "epoch": 1.11059786195064, "percentage": 22.21, "elapsed_time": "0:12:09", "remaining_time": "0:42:36", "throughput": 5668.23, "total_tokens": 4137280}
|
|
{"current_steps": 8420, "total_steps": 37885, "loss": 0.0026, "lr": 1.9103751793024236e-06, "epoch": 1.1112577537283885, "percentage": 22.23, "elapsed_time": "0:12:10", "remaining_time": "0:42:35", "throughput": 5669.09, "total_tokens": 4139776}
|
|
{"current_steps": 8425, "total_steps": 37885, "loss": 0.0961, "lr": 1.9101844532782357e-06, "epoch": 1.111917645506137, "percentage": 22.24, "elapsed_time": "0:12:10", "remaining_time": "0:42:34", "throughput": 5669.81, "total_tokens": 4142144}
|
|
{"current_steps": 8430, "total_steps": 37885, "loss": 0.0057, "lr": 1.909993534073989e-06, "epoch": 1.1125775372838855, "percentage": 22.25, "elapsed_time": "0:12:10", "remaining_time": "0:42:33", "throughput": 5670.84, "total_tokens": 4144768}
|
|
{"current_steps": 8435, "total_steps": 37885, "loss": 0.0725, "lr": 1.9098024217302043e-06, "epoch": 1.1132374290616338, "percentage": 22.26, "elapsed_time": "0:12:11", "remaining_time": "0:42:32", "throughput": 5671.38, "total_tokens": 4147008}
|
|
{"current_steps": 8440, "total_steps": 37885, "loss": 0.0589, "lr": 1.909611116287444e-06, "epoch": 1.1138973208393823, "percentage": 22.28, "elapsed_time": "0:12:11", "remaining_time": "0:42:32", "throughput": 5671.58, "total_tokens": 4148992}
|
|
{"current_steps": 8445, "total_steps": 37885, "loss": 0.0708, "lr": 1.909419617786311e-06, "epoch": 1.1145572126171308, "percentage": 22.29, "elapsed_time": "0:12:11", "remaining_time": "0:42:31", "throughput": 5672.53, "total_tokens": 4151552}
|
|
{"current_steps": 8450, "total_steps": 37885, "loss": 0.1095, "lr": 1.90922792626745e-06, "epoch": 1.1152171043948793, "percentage": 22.3, "elapsed_time": "0:12:12", "remaining_time": "0:42:30", "throughput": 5673.53, "total_tokens": 4154176}
|
|
{"current_steps": 8455, "total_steps": 37885, "loss": 0.1106, "lr": 1.9090360417715454e-06, "epoch": 1.1158769961726276, "percentage": 22.32, "elapsed_time": "0:12:12", "remaining_time": "0:42:29", "throughput": 5674.47, "total_tokens": 4156736}
|
|
{"current_steps": 8460, "total_steps": 37885, "loss": 0.0834, "lr": 1.9088439643393236e-06, "epoch": 1.116536887950376, "percentage": 22.33, "elapsed_time": "0:12:12", "remaining_time": "0:42:28", "throughput": 5675.02, "total_tokens": 4158976}
|
|
{"current_steps": 8465, "total_steps": 37885, "loss": 0.001, "lr": 1.9086516940115518e-06, "epoch": 1.1171967797281246, "percentage": 22.34, "elapsed_time": "0:12:13", "remaining_time": "0:42:28", "throughput": 5675.63, "total_tokens": 4161280}
|
|
{"current_steps": 8470, "total_steps": 37885, "loss": 0.0794, "lr": 1.908459230829038e-06, "epoch": 1.117856671505873, "percentage": 22.36, "elapsed_time": "0:12:13", "remaining_time": "0:42:27", "throughput": 5676.48, "total_tokens": 4163776}
|
|
{"current_steps": 8475, "total_steps": 37885, "loss": 0.1537, "lr": 1.908266574832631e-06, "epoch": 1.1185165632836216, "percentage": 22.37, "elapsed_time": "0:12:13", "remaining_time": "0:42:26", "throughput": 5677.41, "total_tokens": 4166336}
|
|
{"current_steps": 8480, "total_steps": 37885, "loss": 0.1069, "lr": 1.90807372606322e-06, "epoch": 1.1191764550613699, "percentage": 22.38, "elapsed_time": "0:12:14", "remaining_time": "0:42:25", "throughput": 5678.27, "total_tokens": 4168832}
|
|
{"current_steps": 8485, "total_steps": 37885, "loss": 0.0022, "lr": 1.9078806845617372e-06, "epoch": 1.1198363468391184, "percentage": 22.4, "elapsed_time": "0:12:14", "remaining_time": "0:42:25", "throughput": 5679.36, "total_tokens": 4171520}
|
|
{"current_steps": 8490, "total_steps": 37885, "loss": 0.0587, "lr": 1.907687450369153e-06, "epoch": 1.1204962386168669, "percentage": 22.41, "elapsed_time": "0:12:14", "remaining_time": "0:42:24", "throughput": 5680.43, "total_tokens": 4174208}
|
|
{"current_steps": 8495, "total_steps": 37885, "loss": 0.1138, "lr": 1.9074940235264805e-06, "epoch": 1.1211561303946154, "percentage": 22.42, "elapsed_time": "0:12:15", "remaining_time": "0:42:23", "throughput": 5681.03, "total_tokens": 4176512}
|
|
{"current_steps": 8500, "total_steps": 37885, "loss": 0.0606, "lr": 1.9073004040747732e-06, "epoch": 1.1218160221723636, "percentage": 22.44, "elapsed_time": "0:12:15", "remaining_time": "0:42:22", "throughput": 5681.96, "total_tokens": 4179072}
|
|
{"current_steps": 8505, "total_steps": 37885, "loss": 0.0689, "lr": 1.9071065920551254e-06, "epoch": 1.1224759139501121, "percentage": 22.45, "elapsed_time": "0:12:15", "remaining_time": "0:42:21", "throughput": 5682.82, "total_tokens": 4181568}
|
|
{"current_steps": 8510, "total_steps": 37885, "loss": 0.0378, "lr": 1.906912587508672e-06, "epoch": 1.1231358057278606, "percentage": 22.46, "elapsed_time": "0:12:16", "remaining_time": "0:42:21", "throughput": 5683.6, "total_tokens": 4184000}
|
|
{"current_steps": 8515, "total_steps": 37885, "loss": 0.0591, "lr": 1.9067183904765893e-06, "epoch": 1.1237956975056091, "percentage": 22.48, "elapsed_time": "0:12:16", "remaining_time": "0:42:20", "throughput": 5684.11, "total_tokens": 4186240}
|
|
{"current_steps": 8520, "total_steps": 37885, "loss": 0.0016, "lr": 1.9065240010000942e-06, "epoch": 1.1244555892833574, "percentage": 22.49, "elapsed_time": "0:12:16", "remaining_time": "0:42:19", "throughput": 5684.71, "total_tokens": 4188544}
|
|
{"current_steps": 8525, "total_steps": 37885, "loss": 0.1241, "lr": 1.9063294191204442e-06, "epoch": 1.125115481061106, "percentage": 22.5, "elapsed_time": "0:12:17", "remaining_time": "0:42:18", "throughput": 5685.72, "total_tokens": 4191168}
|
|
{"current_steps": 8530, "total_steps": 37885, "loss": 0.2255, "lr": 1.9061346448789383e-06, "epoch": 1.1257753728388544, "percentage": 22.52, "elapsed_time": "0:12:17", "remaining_time": "0:42:17", "throughput": 5686.97, "total_tokens": 4193984}
|
|
{"current_steps": 8535, "total_steps": 37885, "loss": 0.001, "lr": 1.9059396783169157e-06, "epoch": 1.126435264616603, "percentage": 22.53, "elapsed_time": "0:12:17", "remaining_time": "0:42:17", "throughput": 5688.35, "total_tokens": 4196928}
|
|
{"current_steps": 8540, "total_steps": 37885, "loss": 0.0009, "lr": 1.9057445194757566e-06, "epoch": 1.1270951563943514, "percentage": 22.54, "elapsed_time": "0:12:18", "remaining_time": "0:42:16", "throughput": 5689.2, "total_tokens": 4199424}
|
|
{"current_steps": 8545, "total_steps": 37885, "loss": 0.1215, "lr": 1.9055491683968822e-06, "epoch": 1.1277550481720997, "percentage": 22.56, "elapsed_time": "0:12:18", "remaining_time": "0:42:15", "throughput": 5689.63, "total_tokens": 4201600}
|
|
{"current_steps": 8550, "total_steps": 37885, "loss": 0.0429, "lr": 1.9053536251217544e-06, "epoch": 1.1284149399498482, "percentage": 22.57, "elapsed_time": "0:12:18", "remaining_time": "0:42:14", "throughput": 5690.3, "total_tokens": 4203968}
|
|
{"current_steps": 8555, "total_steps": 37885, "loss": 0.0003, "lr": 1.9051578896918756e-06, "epoch": 1.1290748317275967, "percentage": 22.58, "elapsed_time": "0:12:19", "remaining_time": "0:42:14", "throughput": 5690.98, "total_tokens": 4206336}
|
|
{"current_steps": 8560, "total_steps": 37885, "loss": 0.0588, "lr": 1.9049619621487894e-06, "epoch": 1.1297347235053452, "percentage": 22.59, "elapsed_time": "0:12:19", "remaining_time": "0:42:13", "throughput": 5691.98, "total_tokens": 4208960}
|
|
{"current_steps": 8565, "total_steps": 37885, "loss": 0.0102, "lr": 1.9047658425340798e-06, "epoch": 1.1303946152830935, "percentage": 22.61, "elapsed_time": "0:12:19", "remaining_time": "0:42:12", "throughput": 5692.5, "total_tokens": 4211200}
|
|
{"current_steps": 8570, "total_steps": 37885, "loss": 0.0002, "lr": 1.904569530889372e-06, "epoch": 1.131054507060842, "percentage": 22.62, "elapsed_time": "0:12:20", "remaining_time": "0:42:11", "throughput": 5693.51, "total_tokens": 4213824}
|
|
{"current_steps": 8575, "total_steps": 37885, "loss": 0.0023, "lr": 1.9043730272563319e-06, "epoch": 1.1317143988385905, "percentage": 22.63, "elapsed_time": "0:12:20", "remaining_time": "0:42:10", "throughput": 5694.18, "total_tokens": 4216192}
|
|
{"current_steps": 8580, "total_steps": 37885, "loss": 0.0643, "lr": 1.9041763316766653e-06, "epoch": 1.132374290616339, "percentage": 22.65, "elapsed_time": "0:12:20", "remaining_time": "0:42:10", "throughput": 5694.52, "total_tokens": 4218304}
|
|
{"current_steps": 8585, "total_steps": 37885, "loss": 0.0723, "lr": 1.90397944419212e-06, "epoch": 1.1330341823940873, "percentage": 22.66, "elapsed_time": "0:12:21", "remaining_time": "0:42:09", "throughput": 5695.14, "total_tokens": 4220608}
|
|
{"current_steps": 8590, "total_steps": 37885, "loss": 0.1629, "lr": 1.9037823648444839e-06, "epoch": 1.1336940741718358, "percentage": 22.67, "elapsed_time": "0:12:21", "remaining_time": "0:42:08", "throughput": 5695.87, "total_tokens": 4223040}
|
|
{"current_steps": 8595, "total_steps": 37885, "loss": 0.0002, "lr": 1.9035850936755855e-06, "epoch": 1.1343539659495843, "percentage": 22.69, "elapsed_time": "0:12:21", "remaining_time": "0:42:07", "throughput": 5696.89, "total_tokens": 4225664}
|
|
{"current_steps": 8600, "total_steps": 37885, "loss": 0.1971, "lr": 1.9033876307272941e-06, "epoch": 1.1350138577273328, "percentage": 22.7, "elapsed_time": "0:12:22", "remaining_time": "0:42:06", "throughput": 5697.83, "total_tokens": 4228224}
|
|
{"current_steps": 8605, "total_steps": 37885, "loss": 0.0001, "lr": 1.9031899760415198e-06, "epoch": 1.1356737495050813, "percentage": 22.71, "elapsed_time": "0:12:22", "remaining_time": "0:42:06", "throughput": 5698.76, "total_tokens": 4230784}
|
|
{"current_steps": 8610, "total_steps": 37885, "loss": 0.1488, "lr": 1.9029921296602139e-06, "epoch": 1.1363336412828295, "percentage": 22.73, "elapsed_time": "0:12:22", "remaining_time": "0:42:05", "throughput": 5699.52, "total_tokens": 4233216}
|
|
{"current_steps": 8615, "total_steps": 37885, "loss": 0.0005, "lr": 1.9027940916253668e-06, "epoch": 1.136993533060578, "percentage": 22.74, "elapsed_time": "0:12:23", "remaining_time": "0:42:04", "throughput": 5700.21, "total_tokens": 4235584}
|
|
{"current_steps": 8620, "total_steps": 37885, "loss": 0.112, "lr": 1.9025958619790118e-06, "epoch": 1.1376534248383265, "percentage": 22.75, "elapsed_time": "0:12:23", "remaining_time": "0:42:03", "throughput": 5700.88, "total_tokens": 4237952}
|
|
{"current_steps": 8625, "total_steps": 37885, "loss": 0.0003, "lr": 1.902397440763221e-06, "epoch": 1.138313316616075, "percentage": 22.77, "elapsed_time": "0:12:23", "remaining_time": "0:42:03", "throughput": 5702.11, "total_tokens": 4240768}
|
|
{"current_steps": 8630, "total_steps": 37885, "loss": 0.2625, "lr": 1.9021988280201083e-06, "epoch": 1.1389732083938233, "percentage": 22.78, "elapsed_time": "0:12:24", "remaining_time": "0:42:02", "throughput": 5702.66, "total_tokens": 4243072}
|
|
{"current_steps": 8635, "total_steps": 37885, "loss": 0.003, "lr": 1.9020000237918273e-06, "epoch": 1.1396331001715718, "percentage": 22.79, "elapsed_time": "0:12:24", "remaining_time": "0:42:01", "throughput": 5703.57, "total_tokens": 4245632}
|
|
{"current_steps": 8640, "total_steps": 37885, "loss": 0.0461, "lr": 1.9018010281205727e-06, "epoch": 1.1402929919493203, "percentage": 22.81, "elapsed_time": "0:12:24", "remaining_time": "0:42:00", "throughput": 5704.35, "total_tokens": 4248064}
|
|
{"current_steps": 8645, "total_steps": 37885, "loss": 0.0676, "lr": 1.9016018410485809e-06, "epoch": 1.1409528837270688, "percentage": 22.82, "elapsed_time": "0:12:25", "remaining_time": "0:41:59", "throughput": 5705.08, "total_tokens": 4250496}
|
|
{"current_steps": 8650, "total_steps": 37885, "loss": 0.3759, "lr": 1.901402462618127e-06, "epoch": 1.141612775504817, "percentage": 22.83, "elapsed_time": "0:12:25", "remaining_time": "0:41:59", "throughput": 5706.07, "total_tokens": 4253120}
|
|
{"current_steps": 8655, "total_steps": 37885, "loss": 0.2103, "lr": 1.9012028928715272e-06, "epoch": 1.1422726672825656, "percentage": 22.85, "elapsed_time": "0:12:25", "remaining_time": "0:41:58", "throughput": 5706.97, "total_tokens": 4255680}
|
|
{"current_steps": 8660, "total_steps": 37885, "loss": 0.0168, "lr": 1.9010031318511401e-06, "epoch": 1.142932559060314, "percentage": 22.86, "elapsed_time": "0:12:26", "remaining_time": "0:41:57", "throughput": 5707.57, "total_tokens": 4257984}
|
|
{"current_steps": 8665, "total_steps": 37885, "loss": 0.0013, "lr": 1.9008031795993627e-06, "epoch": 1.1435924508380626, "percentage": 22.87, "elapsed_time": "0:12:26", "remaining_time": "0:41:56", "throughput": 5708.1, "total_tokens": 4260224}
|
|
{"current_steps": 8670, "total_steps": 37885, "loss": 0.0495, "lr": 1.9006030361586337e-06, "epoch": 1.144252342615811, "percentage": 22.89, "elapsed_time": "0:12:26", "remaining_time": "0:41:56", "throughput": 5708.85, "total_tokens": 4262656}
|
|
{"current_steps": 8675, "total_steps": 37885, "loss": 0.0547, "lr": 1.9004027015714315e-06, "epoch": 1.1449122343935594, "percentage": 22.9, "elapsed_time": "0:12:27", "remaining_time": "0:41:55", "throughput": 5709.44, "total_tokens": 4264960}
|
|
{"current_steps": 8680, "total_steps": 37885, "loss": 0.0832, "lr": 1.9002021758802762e-06, "epoch": 1.1455721261713079, "percentage": 22.91, "elapsed_time": "0:12:27", "remaining_time": "0:41:54", "throughput": 5710.26, "total_tokens": 4267456}
|
|
{"current_steps": 8685, "total_steps": 37885, "loss": 0.0731, "lr": 1.900001459127728e-06, "epoch": 1.1462320179490564, "percentage": 22.92, "elapsed_time": "0:12:27", "remaining_time": "0:41:53", "throughput": 5710.86, "total_tokens": 4269760}
|
|
{"current_steps": 8690, "total_steps": 37885, "loss": 0.031, "lr": 1.8998005513563872e-06, "epoch": 1.1468919097268049, "percentage": 22.94, "elapsed_time": "0:12:27", "remaining_time": "0:41:52", "throughput": 5711.86, "total_tokens": 4272384}
|
|
{"current_steps": 8695, "total_steps": 37885, "loss": 0.0028, "lr": 1.8995994526088955e-06, "epoch": 1.1475518015045532, "percentage": 22.95, "elapsed_time": "0:12:28", "remaining_time": "0:41:52", "throughput": 5712.84, "total_tokens": 4275008}
|
|
{"current_steps": 8700, "total_steps": 37885, "loss": 0.014, "lr": 1.8993981629279342e-06, "epoch": 1.1482116932823017, "percentage": 22.96, "elapsed_time": "0:12:28", "remaining_time": "0:41:51", "throughput": 5713.58, "total_tokens": 4277440}
|
|
{"current_steps": 8705, "total_steps": 37885, "loss": 0.0003, "lr": 1.8991966823562258e-06, "epoch": 1.1488715850600502, "percentage": 22.98, "elapsed_time": "0:12:28", "remaining_time": "0:41:50", "throughput": 5714.17, "total_tokens": 4279744}
|
|
{"current_steps": 8710, "total_steps": 37885, "loss": 0.1334, "lr": 1.8989950109365328e-06, "epoch": 1.1495314768377987, "percentage": 22.99, "elapsed_time": "0:12:29", "remaining_time": "0:41:49", "throughput": 5714.77, "total_tokens": 4282048}
|
|
{"current_steps": 8715, "total_steps": 37885, "loss": 0.0581, "lr": 1.8987931487116591e-06, "epoch": 1.150191368615547, "percentage": 23.0, "elapsed_time": "0:12:29", "remaining_time": "0:41:49", "throughput": 5715.27, "total_tokens": 4284288}
|
|
{"current_steps": 8720, "total_steps": 37885, "loss": 0.0003, "lr": 1.898591095724448e-06, "epoch": 1.1508512603932954, "percentage": 23.02, "elapsed_time": "0:12:29", "remaining_time": "0:41:48", "throughput": 5716.3, "total_tokens": 4286976}
|
|
{"current_steps": 8725, "total_steps": 37885, "loss": 0.0615, "lr": 1.898388852017784e-06, "epoch": 1.151511152171044, "percentage": 23.03, "elapsed_time": "0:12:30", "remaining_time": "0:41:47", "throughput": 5717.11, "total_tokens": 4289472}
|
|
{"current_steps": 8730, "total_steps": 37885, "loss": 0.0752, "lr": 1.8981864176345914e-06, "epoch": 1.1521710439487924, "percentage": 23.04, "elapsed_time": "0:12:30", "remaining_time": "0:41:46", "throughput": 5718.15, "total_tokens": 4292160}
|
|
{"current_steps": 8735, "total_steps": 37885, "loss": 0.1534, "lr": 1.8979837926178362e-06, "epoch": 1.152830935726541, "percentage": 23.06, "elapsed_time": "0:12:30", "remaining_time": "0:41:46", "throughput": 5718.82, "total_tokens": 4294528}
|
|
{"current_steps": 8740, "total_steps": 37885, "loss": 0.0736, "lr": 1.8977809770105235e-06, "epoch": 1.1534908275042892, "percentage": 23.07, "elapsed_time": "0:12:31", "remaining_time": "0:41:45", "throughput": 5719.78, "total_tokens": 4297152}
|
|
{"current_steps": 8745, "total_steps": 37885, "loss": 0.0011, "lr": 1.8975779708556998e-06, "epoch": 1.1541507192820377, "percentage": 23.08, "elapsed_time": "0:12:31", "remaining_time": "0:41:44", "throughput": 5720.96, "total_tokens": 4299968}
|
|
{"current_steps": 8750, "total_steps": 37885, "loss": 0.0063, "lr": 1.8973747741964515e-06, "epoch": 1.1548106110597862, "percentage": 23.1, "elapsed_time": "0:12:31", "remaining_time": "0:41:43", "throughput": 5721.36, "total_tokens": 4302144}
|
|
{"current_steps": 8755, "total_steps": 37885, "loss": 0.0892, "lr": 1.8971713870759057e-06, "epoch": 1.1554705028375347, "percentage": 23.11, "elapsed_time": "0:12:32", "remaining_time": "0:41:42", "throughput": 5722.1, "total_tokens": 4304576}
|
|
{"current_steps": 8760, "total_steps": 37885, "loss": 0.0785, "lr": 1.8969678095372296e-06, "epoch": 1.156130394615283, "percentage": 23.12, "elapsed_time": "0:12:32", "remaining_time": "0:41:42", "throughput": 5722.62, "total_tokens": 4306816}
|
|
{"current_steps": 8765, "total_steps": 37885, "loss": 0.1083, "lr": 1.8967640416236313e-06, "epoch": 1.1567902863930315, "percentage": 23.14, "elapsed_time": "0:12:32", "remaining_time": "0:41:41", "throughput": 5723.57, "total_tokens": 4309440}
|
|
{"current_steps": 8770, "total_steps": 37885, "loss": 0.121, "lr": 1.8965600833783594e-06, "epoch": 1.15745017817078, "percentage": 23.15, "elapsed_time": "0:12:33", "remaining_time": "0:41:40", "throughput": 5724.46, "total_tokens": 4312000}
|
|
{"current_steps": 8775, "total_steps": 37885, "loss": 0.1331, "lr": 1.8963559348447015e-06, "epoch": 1.1581100699485285, "percentage": 23.16, "elapsed_time": "0:12:33", "remaining_time": "0:41:39", "throughput": 5725.18, "total_tokens": 4314432}
|
|
{"current_steps": 8780, "total_steps": 37885, "loss": 0.0048, "lr": 1.8961515960659878e-06, "epoch": 1.1587699617262768, "percentage": 23.18, "elapsed_time": "0:12:33", "remaining_time": "0:41:39", "throughput": 5726.0, "total_tokens": 4316928}
|
|
{"current_steps": 8785, "total_steps": 37885, "loss": 0.1, "lr": 1.8959470670855873e-06, "epoch": 1.1594298535040253, "percentage": 23.19, "elapsed_time": "0:12:34", "remaining_time": "0:41:38", "throughput": 5726.49, "total_tokens": 4319168}
|
|
{"current_steps": 8790, "total_steps": 37885, "loss": 0.1709, "lr": 1.8957423479469095e-06, "epoch": 1.1600897452817738, "percentage": 23.2, "elapsed_time": "0:12:34", "remaining_time": "0:41:37", "throughput": 5726.91, "total_tokens": 4321344}
|
|
{"current_steps": 8795, "total_steps": 37885, "loss": 0.0571, "lr": 1.8955374386934049e-06, "epoch": 1.1607496370595223, "percentage": 23.21, "elapsed_time": "0:12:34", "remaining_time": "0:41:36", "throughput": 5727.79, "total_tokens": 4323904}
|
|
{"current_steps": 8800, "total_steps": 37885, "loss": 0.0995, "lr": 1.895332339368564e-06, "epoch": 1.1614095288372708, "percentage": 23.23, "elapsed_time": "0:12:35", "remaining_time": "0:41:36", "throughput": 5728.46, "total_tokens": 4326272}
|
|
{"current_steps": 8805, "total_steps": 37885, "loss": 0.0573, "lr": 1.8951270500159176e-06, "epoch": 1.162069420615019, "percentage": 23.24, "elapsed_time": "0:12:35", "remaining_time": "0:41:35", "throughput": 5729.58, "total_tokens": 4329024}
|
|
{"current_steps": 8810, "total_steps": 37885, "loss": 0.0971, "lr": 1.8949215706790364e-06, "epoch": 1.1627293123927676, "percentage": 23.25, "elapsed_time": "0:12:35", "remaining_time": "0:41:34", "throughput": 5730.16, "total_tokens": 4331328}
|
|
{"current_steps": 8815, "total_steps": 37885, "loss": 0.1817, "lr": 1.8947159014015326e-06, "epoch": 1.163389204170516, "percentage": 23.27, "elapsed_time": "0:12:36", "remaining_time": "0:41:33", "throughput": 5730.81, "total_tokens": 4333696}
|
|
{"current_steps": 8820, "total_steps": 37885, "loss": 0.1063, "lr": 1.8945100422270578e-06, "epoch": 1.1640490959482646, "percentage": 23.28, "elapsed_time": "0:12:36", "remaining_time": "0:41:33", "throughput": 5731.77, "total_tokens": 4336320}
|
|
{"current_steps": 8825, "total_steps": 37885, "loss": 0.0024, "lr": 1.8943039931993043e-06, "epoch": 1.164708987726013, "percentage": 23.29, "elapsed_time": "0:12:36", "remaining_time": "0:41:32", "throughput": 5732.43, "total_tokens": 4338688}
|
|
{"current_steps": 8830, "total_steps": 37885, "loss": 0.078, "lr": 1.8940977543620038e-06, "epoch": 1.1653688795037613, "percentage": 23.31, "elapsed_time": "0:12:37", "remaining_time": "0:41:31", "throughput": 5733.39, "total_tokens": 4341312}
|
|
{"current_steps": 8835, "total_steps": 37885, "loss": 0.0008, "lr": 1.89389132575893e-06, "epoch": 1.1660287712815098, "percentage": 23.32, "elapsed_time": "0:12:37", "remaining_time": "0:41:30", "throughput": 5734.21, "total_tokens": 4343808}
|
|
{"current_steps": 8840, "total_steps": 37885, "loss": 0.0904, "lr": 1.8936847074338948e-06, "epoch": 1.1666886630592583, "percentage": 23.33, "elapsed_time": "0:12:37", "remaining_time": "0:41:30", "throughput": 5734.85, "total_tokens": 4346176}
|
|
{"current_steps": 8845, "total_steps": 37885, "loss": 0.0682, "lr": 1.8934778994307526e-06, "epoch": 1.1673485548370066, "percentage": 23.35, "elapsed_time": "0:12:38", "remaining_time": "0:41:29", "throughput": 5735.67, "total_tokens": 4348672}
|
|
{"current_steps": 8850, "total_steps": 37885, "loss": 0.1466, "lr": 1.8932709017933958e-06, "epoch": 1.1680084466147551, "percentage": 23.36, "elapsed_time": "0:12:38", "remaining_time": "0:41:28", "throughput": 5736.25, "total_tokens": 4350976}
|
|
{"current_steps": 8855, "total_steps": 37885, "loss": 0.0004, "lr": 1.8930637145657592e-06, "epoch": 1.1686683383925036, "percentage": 23.37, "elapsed_time": "0:12:38", "remaining_time": "0:41:27", "throughput": 5737.14, "total_tokens": 4353536}
|
|
{"current_steps": 8860, "total_steps": 37885, "loss": 0.1143, "lr": 1.8928563377918157e-06, "epoch": 1.1693282301702521, "percentage": 23.39, "elapsed_time": "0:12:39", "remaining_time": "0:41:26", "throughput": 5737.55, "total_tokens": 4355712}
|
|
{"current_steps": 8865, "total_steps": 37885, "loss": 0.0635, "lr": 1.8926487715155802e-06, "epoch": 1.1699881219480006, "percentage": 23.4, "elapsed_time": "0:12:39", "remaining_time": "0:41:26", "throughput": 5738.51, "total_tokens": 4358336}
|
|
{"current_steps": 8870, "total_steps": 37885, "loss": 0.0941, "lr": 1.892441015781107e-06, "epoch": 1.170648013725749, "percentage": 23.41, "elapsed_time": "0:12:39", "remaining_time": "0:41:25", "throughput": 5739.4, "total_tokens": 4360896}
|
|
{"current_steps": 8875, "total_steps": 37885, "loss": 0.0008, "lr": 1.892233070632491e-06, "epoch": 1.1713079055034974, "percentage": 23.43, "elapsed_time": "0:12:40", "remaining_time": "0:41:24", "throughput": 5740.28, "total_tokens": 4363456}
|
|
{"current_steps": 8880, "total_steps": 37885, "loss": 0.1365, "lr": 1.8920249361138665e-06, "epoch": 1.171967797281246, "percentage": 23.44, "elapsed_time": "0:12:40", "remaining_time": "0:41:23", "throughput": 5740.84, "total_tokens": 4365760}
|
|
{"current_steps": 8885, "total_steps": 37885, "loss": 0.0024, "lr": 1.891816612269409e-06, "epoch": 1.1726276890589944, "percentage": 23.45, "elapsed_time": "0:12:40", "remaining_time": "0:41:23", "throughput": 5741.55, "total_tokens": 4368192}
|
|
{"current_steps": 8890, "total_steps": 37885, "loss": 0.0928, "lr": 1.8916080991433337e-06, "epoch": 1.173287580836743, "percentage": 23.47, "elapsed_time": "0:12:41", "remaining_time": "0:41:22", "throughput": 5742.43, "total_tokens": 4370752}
|
|
{"current_steps": 8895, "total_steps": 37885, "loss": 0.0007, "lr": 1.8913993967798956e-06, "epoch": 1.1739474726144912, "percentage": 23.48, "elapsed_time": "0:12:41", "remaining_time": "0:41:21", "throughput": 5743.39, "total_tokens": 4373376}
|
|
{"current_steps": 8900, "total_steps": 37885, "loss": 0.0492, "lr": 1.8911905052233905e-06, "epoch": 1.1746073643922397, "percentage": 23.49, "elapsed_time": "0:12:41", "remaining_time": "0:41:20", "throughput": 5744.33, "total_tokens": 4376000}
|
|
{"current_steps": 8905, "total_steps": 37885, "loss": 0.1225, "lr": 1.8909814245181543e-06, "epoch": 1.1752672561699882, "percentage": 23.51, "elapsed_time": "0:12:42", "remaining_time": "0:41:20", "throughput": 5745.14, "total_tokens": 4378496}
|
|
{"current_steps": 8910, "total_steps": 37885, "loss": 0.0383, "lr": 1.890772154708563e-06, "epoch": 1.1759271479477365, "percentage": 23.52, "elapsed_time": "0:12:42", "remaining_time": "0:41:19", "throughput": 5745.55, "total_tokens": 4380672}
|
|
{"current_steps": 8915, "total_steps": 37885, "loss": 0.1929, "lr": 1.8905626958390317e-06, "epoch": 1.176587039725485, "percentage": 23.53, "elapsed_time": "0:12:42", "remaining_time": "0:41:18", "throughput": 5746.35, "total_tokens": 4383168}
|
|
{"current_steps": 8920, "total_steps": 37885, "loss": 0.0006, "lr": 1.8903530479540176e-06, "epoch": 1.1772469315032335, "percentage": 23.54, "elapsed_time": "0:12:43", "remaining_time": "0:41:17", "throughput": 5746.92, "total_tokens": 4385472}
|
|
{"current_steps": 8925, "total_steps": 37885, "loss": 0.0004, "lr": 1.8901432110980164e-06, "epoch": 1.177906823280982, "percentage": 23.56, "elapsed_time": "0:12:43", "remaining_time": "0:41:17", "throughput": 5747.57, "total_tokens": 4387840}
|
|
{"current_steps": 8930, "total_steps": 37885, "loss": 0.1419, "lr": 1.8899331853155648e-06, "epoch": 1.1785667150587305, "percentage": 23.57, "elapsed_time": "0:12:43", "remaining_time": "0:41:16", "throughput": 5748.34, "total_tokens": 4390336}
|
|
{"current_steps": 8935, "total_steps": 37885, "loss": 0.099, "lr": 1.8897229706512387e-06, "epoch": 1.1792266068364787, "percentage": 23.58, "elapsed_time": "0:12:44", "remaining_time": "0:41:15", "throughput": 5748.91, "total_tokens": 4392640}
|
|
{"current_steps": 8940, "total_steps": 37885, "loss": 0.1339, "lr": 1.889512567149655e-06, "epoch": 1.1798864986142272, "percentage": 23.6, "elapsed_time": "0:12:44", "remaining_time": "0:41:14", "throughput": 5749.68, "total_tokens": 4395136}
|
|
{"current_steps": 8945, "total_steps": 37885, "loss": 0.0047, "lr": 1.88930197485547e-06, "epoch": 1.1805463903919757, "percentage": 23.61, "elapsed_time": "0:12:44", "remaining_time": "0:41:14", "throughput": 5750.31, "total_tokens": 4397504}
|
|
{"current_steps": 8950, "total_steps": 37885, "loss": 0.0047, "lr": 1.8890911938133814e-06, "epoch": 1.1812062821697242, "percentage": 23.62, "elapsed_time": "0:12:45", "remaining_time": "0:41:13", "throughput": 5750.96, "total_tokens": 4399872}
|
|
{"current_steps": 8955, "total_steps": 37885, "loss": 0.0867, "lr": 1.8888802240681248e-06, "epoch": 1.1818661739474727, "percentage": 23.64, "elapsed_time": "0:12:45", "remaining_time": "0:41:12", "throughput": 5751.37, "total_tokens": 4402048}
|
|
{"current_steps": 8960, "total_steps": 37885, "loss": 0.003, "lr": 1.888669065664477e-06, "epoch": 1.182526065725221, "percentage": 23.65, "elapsed_time": "0:12:45", "remaining_time": "0:41:11", "throughput": 5752.01, "total_tokens": 4404416}
|
|
{"current_steps": 8965, "total_steps": 37885, "loss": 0.0207, "lr": 1.8884577186472557e-06, "epoch": 1.1831859575029695, "percentage": 23.66, "elapsed_time": "0:12:46", "remaining_time": "0:41:11", "throughput": 5752.56, "total_tokens": 4406720}
|
|
{"current_steps": 8970, "total_steps": 37885, "loss": 0.1395, "lr": 1.8882461830613173e-06, "epoch": 1.183845849280718, "percentage": 23.68, "elapsed_time": "0:12:46", "remaining_time": "0:41:10", "throughput": 5752.95, "total_tokens": 4408896}
|
|
{"current_steps": 8975, "total_steps": 37885, "loss": 0.0004, "lr": 1.8880344589515587e-06, "epoch": 1.1845057410584663, "percentage": 23.69, "elapsed_time": "0:12:46", "remaining_time": "0:41:09", "throughput": 5753.72, "total_tokens": 4411392}
|
|
{"current_steps": 8980, "total_steps": 37885, "loss": 0.0003, "lr": 1.887822546362917e-06, "epoch": 1.1851656328362148, "percentage": 23.7, "elapsed_time": "0:12:47", "remaining_time": "0:41:08", "throughput": 5754.51, "total_tokens": 4413888}
|
|
{"current_steps": 8985, "total_steps": 37885, "loss": 0.0006, "lr": 1.8876104453403686e-06, "epoch": 1.1858255246139633, "percentage": 23.72, "elapsed_time": "0:12:47", "remaining_time": "0:41:08", "throughput": 5755.3, "total_tokens": 4416384}
|
|
{"current_steps": 8990, "total_steps": 37885, "loss": 0.21, "lr": 1.8873981559289308e-06, "epoch": 1.1864854163917118, "percentage": 23.73, "elapsed_time": "0:12:47", "remaining_time": "0:41:07", "throughput": 5756.4, "total_tokens": 4419136}
|
|
{"current_steps": 8995, "total_steps": 37885, "loss": 0.0786, "lr": 1.8871856781736604e-06, "epoch": 1.1871453081694603, "percentage": 23.74, "elapsed_time": "0:12:48", "remaining_time": "0:41:06", "throughput": 5757.18, "total_tokens": 4421632}
|
|
{"current_steps": 9000, "total_steps": 37885, "loss": 0.0006, "lr": 1.8869730121196542e-06, "epoch": 1.1878051999472086, "percentage": 23.76, "elapsed_time": "0:12:48", "remaining_time": "0:41:05", "throughput": 5758.19, "total_tokens": 4424320}
|
|
{"current_steps": 9005, "total_steps": 37885, "loss": 0.1348, "lr": 1.8867601578120495e-06, "epoch": 1.188465091724957, "percentage": 23.77, "elapsed_time": "0:12:48", "remaining_time": "0:41:05", "throughput": 5759.03, "total_tokens": 4426880}
|
|
{"current_steps": 9010, "total_steps": 37885, "loss": 0.0006, "lr": 1.8865471152960225e-06, "epoch": 1.1891249835027056, "percentage": 23.78, "elapsed_time": "0:12:49", "remaining_time": "0:41:04", "throughput": 5759.79, "total_tokens": 4429376}
|
|
{"current_steps": 9015, "total_steps": 37885, "loss": 0.1167, "lr": 1.8863338846167905e-06, "epoch": 1.189784875280454, "percentage": 23.8, "elapsed_time": "0:12:49", "remaining_time": "0:41:03", "throughput": 5760.79, "total_tokens": 4432064}
|
|
{"current_steps": 9020, "total_steps": 37885, "loss": 0.0008, "lr": 1.8861204658196095e-06, "epoch": 1.1904447670582026, "percentage": 23.81, "elapsed_time": "0:12:49", "remaining_time": "0:41:03", "throughput": 5761.27, "total_tokens": 4434304}
|
|
{"current_steps": 9025, "total_steps": 37885, "loss": 0.0008, "lr": 1.8859068589497765e-06, "epoch": 1.1911046588359508, "percentage": 23.82, "elapsed_time": "0:12:50", "remaining_time": "0:41:02", "throughput": 5761.73, "total_tokens": 4436544}
|
|
{"current_steps": 9030, "total_steps": 37885, "loss": 0.0947, "lr": 1.8856930640526277e-06, "epoch": 1.1917645506136993, "percentage": 23.84, "elapsed_time": "0:12:50", "remaining_time": "0:41:01", "throughput": 5762.66, "total_tokens": 4439168}
|
|
{"current_steps": 9035, "total_steps": 37885, "loss": 0.0814, "lr": 1.88547908117354e-06, "epoch": 1.1924244423914478, "percentage": 23.85, "elapsed_time": "0:12:50", "remaining_time": "0:41:00", "throughput": 5763.22, "total_tokens": 4441472}
|
|
{"current_steps": 9040, "total_steps": 37885, "loss": 0.2709, "lr": 1.8852649103579292e-06, "epoch": 1.1930843341691963, "percentage": 23.86, "elapsed_time": "0:12:50", "remaining_time": "0:41:00", "throughput": 5763.85, "total_tokens": 4443840}
|
|
{"current_steps": 9045, "total_steps": 37885, "loss": 0.0662, "lr": 1.885050551651252e-06, "epoch": 1.1937442259469446, "percentage": 23.87, "elapsed_time": "0:12:51", "remaining_time": "0:40:59", "throughput": 5764.22, "total_tokens": 4446016}
|
|
{"current_steps": 9050, "total_steps": 37885, "loss": 0.2496, "lr": 1.8848360050990042e-06, "epoch": 1.1944041177246931, "percentage": 23.89, "elapsed_time": "0:12:51", "remaining_time": "0:40:58", "throughput": 5764.78, "total_tokens": 4448320}
|
|
{"current_steps": 9055, "total_steps": 37885, "loss": 0.0971, "lr": 1.8846212707467216e-06, "epoch": 1.1950640095024416, "percentage": 23.9, "elapsed_time": "0:12:51", "remaining_time": "0:40:57", "throughput": 5765.66, "total_tokens": 4450880}
|
|
{"current_steps": 9060, "total_steps": 37885, "loss": 0.002, "lr": 1.8844063486399805e-06, "epoch": 1.1957239012801901, "percentage": 23.91, "elapsed_time": "0:12:52", "remaining_time": "0:40:57", "throughput": 5766.95, "total_tokens": 4453824}
|
|
{"current_steps": 9065, "total_steps": 37885, "loss": 0.1786, "lr": 1.884191238824396e-06, "epoch": 1.1963837930579384, "percentage": 23.93, "elapsed_time": "0:12:52", "remaining_time": "0:40:56", "throughput": 5767.87, "total_tokens": 4456448}
|
|
{"current_steps": 9070, "total_steps": 37885, "loss": 0.1482, "lr": 1.883975941345624e-06, "epoch": 1.197043684835687, "percentage": 23.94, "elapsed_time": "0:12:52", "remaining_time": "0:40:55", "throughput": 5768.59, "total_tokens": 4458880}
|
|
{"current_steps": 9075, "total_steps": 37885, "loss": 0.1391, "lr": 1.8837604562493597e-06, "epoch": 1.1977035766134354, "percentage": 23.95, "elapsed_time": "0:12:53", "remaining_time": "0:40:54", "throughput": 5769.54, "total_tokens": 4461504}
|
|
{"current_steps": 9080, "total_steps": 37885, "loss": 0.1583, "lr": 1.883544783581338e-06, "epoch": 1.198363468391184, "percentage": 23.97, "elapsed_time": "0:12:53", "remaining_time": "0:40:54", "throughput": 5770.32, "total_tokens": 4464000}
|
|
{"current_steps": 9085, "total_steps": 37885, "loss": 0.0736, "lr": 1.8833289233873346e-06, "epoch": 1.1990233601689324, "percentage": 23.98, "elapsed_time": "0:12:53", "remaining_time": "0:40:53", "throughput": 5770.93, "total_tokens": 4466368}
|
|
{"current_steps": 9090, "total_steps": 37885, "loss": 0.1445, "lr": 1.8831128757131634e-06, "epoch": 1.1996832519466807, "percentage": 23.99, "elapsed_time": "0:12:54", "remaining_time": "0:40:52", "throughput": 5771.63, "total_tokens": 4468800}
|
|
{"current_steps": 9095, "total_steps": 37885, "loss": 0.1592, "lr": 1.8828966406046796e-06, "epoch": 1.2003431437244292, "percentage": 24.01, "elapsed_time": "0:12:54", "remaining_time": "0:40:51", "throughput": 5772.37, "total_tokens": 4471296}
|
|
{"current_steps": 9100, "total_steps": 37885, "loss": 0.0024, "lr": 1.8826802181077771e-06, "epoch": 1.2010030355021777, "percentage": 24.02, "elapsed_time": "0:12:54", "remaining_time": "0:40:51", "throughput": 5773.21, "total_tokens": 4473856}
|
|
{"current_steps": 9105, "total_steps": 37885, "loss": 0.0028, "lr": 1.8824636082683903e-06, "epoch": 1.2016629272799262, "percentage": 24.03, "elapsed_time": "0:12:55", "remaining_time": "0:40:50", "throughput": 5774.05, "total_tokens": 4476416}
|
|
{"current_steps": 9110, "total_steps": 37885, "loss": 0.044, "lr": 1.8822468111324927e-06, "epoch": 1.2023228190576745, "percentage": 24.05, "elapsed_time": "0:12:55", "remaining_time": "0:40:49", "throughput": 5774.73, "total_tokens": 4478848}
|
|
{"current_steps": 9115, "total_steps": 37885, "loss": 0.1416, "lr": 1.8820298267460983e-06, "epoch": 1.202982710835423, "percentage": 24.06, "elapsed_time": "0:12:55", "remaining_time": "0:40:49", "throughput": 5775.2, "total_tokens": 4481088}
|
|
{"current_steps": 9120, "total_steps": 37885, "loss": 0.0922, "lr": 1.8818126551552605e-06, "epoch": 1.2036426026131715, "percentage": 24.07, "elapsed_time": "0:12:56", "remaining_time": "0:40:48", "throughput": 5775.76, "total_tokens": 4483392}
|
|
{"current_steps": 9125, "total_steps": 37885, "loss": 0.1309, "lr": 1.881595296406072e-06, "epoch": 1.20430249439092, "percentage": 24.09, "elapsed_time": "0:12:56", "remaining_time": "0:40:47", "throughput": 5776.37, "total_tokens": 4485760}
|
|
{"current_steps": 9130, "total_steps": 37885, "loss": 0.0023, "lr": 1.881377750544666e-06, "epoch": 1.2049623861686682, "percentage": 24.1, "elapsed_time": "0:12:56", "remaining_time": "0:40:46", "throughput": 5776.91, "total_tokens": 4488064}
|
|
{"current_steps": 9135, "total_steps": 37885, "loss": 0.0897, "lr": 1.8811600176172147e-06, "epoch": 1.2056222779464167, "percentage": 24.11, "elapsed_time": "0:12:57", "remaining_time": "0:40:46", "throughput": 5777.05, "total_tokens": 4490048}
|
|
{"current_steps": 9140, "total_steps": 37885, "loss": 0.0015, "lr": 1.8809420976699308e-06, "epoch": 1.2062821697241652, "percentage": 24.13, "elapsed_time": "0:12:57", "remaining_time": "0:40:45", "throughput": 5777.74, "total_tokens": 4492480}
|
|
{"current_steps": 9145, "total_steps": 37885, "loss": 0.0265, "lr": 1.8807239907490656e-06, "epoch": 1.2069420615019137, "percentage": 24.14, "elapsed_time": "0:12:57", "remaining_time": "0:40:44", "throughput": 5778.66, "total_tokens": 4495104}
|
|
{"current_steps": 9150, "total_steps": 37885, "loss": 0.2775, "lr": 1.8805056969009114e-06, "epoch": 1.2076019532796622, "percentage": 24.15, "elapsed_time": "0:12:58", "remaining_time": "0:40:43", "throughput": 5779.2, "total_tokens": 4497408}
|
|
{"current_steps": 9155, "total_steps": 37885, "loss": 0.0605, "lr": 1.8802872161717988e-06, "epoch": 1.2082618450574105, "percentage": 24.17, "elapsed_time": "0:12:58", "remaining_time": "0:40:43", "throughput": 5779.59, "total_tokens": 4499584}
|
|
{"current_steps": 9160, "total_steps": 37885, "loss": 0.0287, "lr": 1.8800685486080994e-06, "epoch": 1.208921736835159, "percentage": 24.18, "elapsed_time": "0:12:58", "remaining_time": "0:40:42", "throughput": 5780.67, "total_tokens": 4502336}
|
|
{"current_steps": 9165, "total_steps": 37885, "loss": 0.0638, "lr": 1.8798496942562235e-06, "epoch": 1.2095816286129075, "percentage": 24.19, "elapsed_time": "0:12:59", "remaining_time": "0:40:41", "throughput": 5781.64, "total_tokens": 4505024}
|
|
{"current_steps": 9170, "total_steps": 37885, "loss": 0.0649, "lr": 1.879630653162621e-06, "epoch": 1.210241520390656, "percentage": 24.2, "elapsed_time": "0:12:59", "remaining_time": "0:40:41", "throughput": 5782.7, "total_tokens": 4507776}
|
|
{"current_steps": 9175, "total_steps": 37885, "loss": 0.0745, "lr": 1.8794114253737825e-06, "epoch": 1.2109014121684043, "percentage": 24.22, "elapsed_time": "0:12:59", "remaining_time": "0:40:40", "throughput": 5783.32, "total_tokens": 4510144}
|
|
{"current_steps": 9180, "total_steps": 37885, "loss": 0.0387, "lr": 1.8791920109362373e-06, "epoch": 1.2115613039461528, "percentage": 24.23, "elapsed_time": "0:13:00", "remaining_time": "0:40:39", "throughput": 5784.15, "total_tokens": 4512704}
|
|
{"current_steps": 9185, "total_steps": 37885, "loss": 0.0823, "lr": 1.878972409896554e-06, "epoch": 1.2122211957239013, "percentage": 24.24, "elapsed_time": "0:13:00", "remaining_time": "0:40:38", "throughput": 5784.97, "total_tokens": 4515264}
|
|
{"current_steps": 9190, "total_steps": 37885, "loss": 0.0804, "lr": 1.878752622301342e-06, "epoch": 1.2128810875016498, "percentage": 24.26, "elapsed_time": "0:13:00", "remaining_time": "0:40:38", "throughput": 5786.03, "total_tokens": 4518016}
|
|
{"current_steps": 9195, "total_steps": 37885, "loss": 0.0554, "lr": 1.8785326481972491e-06, "epoch": 1.213540979279398, "percentage": 24.27, "elapsed_time": "0:13:01", "remaining_time": "0:40:37", "throughput": 5786.57, "total_tokens": 4520320}
|
|
{"current_steps": 9200, "total_steps": 37885, "loss": 0.0832, "lr": 1.8783124876309637e-06, "epoch": 1.2142008710571466, "percentage": 24.28, "elapsed_time": "0:13:01", "remaining_time": "0:40:36", "throughput": 5786.85, "total_tokens": 4522432}
|
|
{"current_steps": 9205, "total_steps": 37885, "loss": 0.0009, "lr": 1.878092140649213e-06, "epoch": 1.214860762834895, "percentage": 24.3, "elapsed_time": "0:13:01", "remaining_time": "0:40:35", "throughput": 5787.52, "total_tokens": 4524864}
|
|
{"current_steps": 9210, "total_steps": 37885, "loss": 0.0041, "lr": 1.8778716072987638e-06, "epoch": 1.2155206546126436, "percentage": 24.31, "elapsed_time": "0:13:02", "remaining_time": "0:40:35", "throughput": 5788.22, "total_tokens": 4527296}
|
|
{"current_steps": 9215, "total_steps": 37885, "loss": 0.1339, "lr": 1.8776508876264235e-06, "epoch": 1.216180546390392, "percentage": 24.32, "elapsed_time": "0:13:02", "remaining_time": "0:40:34", "throughput": 5788.99, "total_tokens": 4529792}
|
|
{"current_steps": 9220, "total_steps": 37885, "loss": 0.0356, "lr": 1.8774299816790373e-06, "epoch": 1.2168404381681404, "percentage": 24.34, "elapsed_time": "0:13:02", "remaining_time": "0:40:33", "throughput": 5789.74, "total_tokens": 4532288}
|
|
{"current_steps": 9225, "total_steps": 37885, "loss": 0.0769, "lr": 1.8772088895034916e-06, "epoch": 1.2175003299458889, "percentage": 24.35, "elapsed_time": "0:13:03", "remaining_time": "0:40:33", "throughput": 5790.58, "total_tokens": 4534848}
|
|
{"current_steps": 9230, "total_steps": 37885, "loss": 0.0675, "lr": 1.876987611146711e-06, "epoch": 1.2181602217236374, "percentage": 24.36, "elapsed_time": "0:13:03", "remaining_time": "0:40:32", "throughput": 5791.26, "total_tokens": 4537280}
|
|
{"current_steps": 9235, "total_steps": 37885, "loss": 0.158, "lr": 1.876766146655661e-06, "epoch": 1.2188201135013859, "percentage": 24.38, "elapsed_time": "0:13:03", "remaining_time": "0:40:31", "throughput": 5792.02, "total_tokens": 4539776}
|
|
{"current_steps": 9240, "total_steps": 37885, "loss": 0.0008, "lr": 1.8765444960773453e-06, "epoch": 1.2194800052791341, "percentage": 24.39, "elapsed_time": "0:13:04", "remaining_time": "0:40:30", "throughput": 5792.63, "total_tokens": 4542144}
|
|
{"current_steps": 9245, "total_steps": 37885, "loss": 0.0557, "lr": 1.8763226594588078e-06, "epoch": 1.2201398970568826, "percentage": 24.4, "elapsed_time": "0:13:04", "remaining_time": "0:40:30", "throughput": 5793.31, "total_tokens": 4544576}
|
|
{"current_steps": 9250, "total_steps": 37885, "loss": 0.0475, "lr": 1.8761006368471315e-06, "epoch": 1.2207997888346311, "percentage": 24.42, "elapsed_time": "0:13:04", "remaining_time": "0:40:29", "throughput": 5794.27, "total_tokens": 4547264}
|
|
{"current_steps": 9255, "total_steps": 37885, "loss": 0.0003, "lr": 1.8758784282894394e-06, "epoch": 1.2214596806123796, "percentage": 24.43, "elapsed_time": "0:13:05", "remaining_time": "0:40:28", "throughput": 5794.94, "total_tokens": 4549696}
|
|
{"current_steps": 9260, "total_steps": 37885, "loss": 0.0956, "lr": 1.8756560338328934e-06, "epoch": 1.222119572390128, "percentage": 24.44, "elapsed_time": "0:13:05", "remaining_time": "0:40:28", "throughput": 5795.46, "total_tokens": 4552000}
|
|
{"current_steps": 9265, "total_steps": 37885, "loss": 0.0492, "lr": 1.8754334535246952e-06, "epoch": 1.2227794641678764, "percentage": 24.46, "elapsed_time": "0:13:05", "remaining_time": "0:40:27", "throughput": 5796.36, "total_tokens": 4554624}
|
|
{"current_steps": 9270, "total_steps": 37885, "loss": 0.0658, "lr": 1.875210687412086e-06, "epoch": 1.223439355945625, "percentage": 24.47, "elapsed_time": "0:13:06", "remaining_time": "0:40:26", "throughput": 5797.04, "total_tokens": 4557056}
|
|
{"current_steps": 9275, "total_steps": 37885, "loss": 0.0784, "lr": 1.874987735542346e-06, "epoch": 1.2240992477233734, "percentage": 24.48, "elapsed_time": "0:13:06", "remaining_time": "0:40:25", "throughput": 5797.71, "total_tokens": 4559488}
|
|
{"current_steps": 9280, "total_steps": 37885, "loss": 0.1279, "lr": 1.8747645979627955e-06, "epoch": 1.224759139501122, "percentage": 24.5, "elapsed_time": "0:13:06", "remaining_time": "0:40:25", "throughput": 5798.45, "total_tokens": 4561984}
|
|
{"current_steps": 9285, "total_steps": 37885, "loss": 0.0544, "lr": 1.8745412747207933e-06, "epoch": 1.2254190312788702, "percentage": 24.51, "elapsed_time": "0:13:07", "remaining_time": "0:40:24", "throughput": 5799.49, "total_tokens": 4564736}
|
|
{"current_steps": 9290, "total_steps": 37885, "loss": 0.0699, "lr": 1.8743177658637387e-06, "epoch": 1.2260789230566187, "percentage": 24.52, "elapsed_time": "0:13:07", "remaining_time": "0:40:23", "throughput": 5800.25, "total_tokens": 4567232}
|
|
{"current_steps": 9295, "total_steps": 37885, "loss": 0.1415, "lr": 1.8740940714390697e-06, "epoch": 1.2267388148343672, "percentage": 24.53, "elapsed_time": "0:13:07", "remaining_time": "0:40:22", "throughput": 5800.92, "total_tokens": 4569664}
|
|
{"current_steps": 9300, "total_steps": 37885, "loss": 0.0011, "lr": 1.8738701914942636e-06, "epoch": 1.2273987066121157, "percentage": 24.55, "elapsed_time": "0:13:08", "remaining_time": "0:40:22", "throughput": 5801.59, "total_tokens": 4572096}
|
|
{"current_steps": 9305, "total_steps": 37885, "loss": 0.0006, "lr": 1.8736461260768375e-06, "epoch": 1.228058598389864, "percentage": 24.56, "elapsed_time": "0:13:08", "remaining_time": "0:40:21", "throughput": 5802.26, "total_tokens": 4574528}
|
|
{"current_steps": 9310, "total_steps": 37885, "loss": 0.094, "lr": 1.8734218752343475e-06, "epoch": 1.2287184901676125, "percentage": 24.57, "elapsed_time": "0:13:08", "remaining_time": "0:40:20", "throughput": 5803.07, "total_tokens": 4577088}
|
|
{"current_steps": 9315, "total_steps": 37885, "loss": 0.0003, "lr": 1.8731974390143894e-06, "epoch": 1.229378381945361, "percentage": 24.59, "elapsed_time": "0:13:09", "remaining_time": "0:40:20", "throughput": 5803.68, "total_tokens": 4579456}
|
|
{"current_steps": 9320, "total_steps": 37885, "loss": 0.3725, "lr": 1.872972817464598e-06, "epoch": 1.2300382737231095, "percentage": 24.6, "elapsed_time": "0:13:09", "remaining_time": "0:40:19", "throughput": 5804.29, "total_tokens": 4581824}
|
|
{"current_steps": 9325, "total_steps": 37885, "loss": 0.0324, "lr": 1.8727480106326476e-06, "epoch": 1.2306981655008578, "percentage": 24.61, "elapsed_time": "0:13:09", "remaining_time": "0:40:18", "throughput": 5804.96, "total_tokens": 4584256}
|
|
{"current_steps": 9330, "total_steps": 37885, "loss": 0.1018, "lr": 1.872523018566252e-06, "epoch": 1.2313580572786063, "percentage": 24.63, "elapsed_time": "0:13:10", "remaining_time": "0:40:17", "throughput": 5806.0, "total_tokens": 4587008}
|
|
{"current_steps": 9335, "total_steps": 37885, "loss": 0.1404, "lr": 1.8722978413131641e-06, "epoch": 1.2320179490563548, "percentage": 24.64, "elapsed_time": "0:13:10", "remaining_time": "0:40:17", "throughput": 5807.1, "total_tokens": 4589824}
|
|
{"current_steps": 9340, "total_steps": 37885, "loss": 0.1816, "lr": 1.8720724789211758e-06, "epoch": 1.2326778408341033, "percentage": 24.65, "elapsed_time": "0:13:10", "remaining_time": "0:40:16", "throughput": 5807.98, "total_tokens": 4592448}
|
|
{"current_steps": 9345, "total_steps": 37885, "loss": 0.0031, "lr": 1.871846931438119e-06, "epoch": 1.2333377326118518, "percentage": 24.67, "elapsed_time": "0:13:11", "remaining_time": "0:40:15", "throughput": 5808.63, "total_tokens": 4594880}
|
|
{"current_steps": 9350, "total_steps": 37885, "loss": 0.4028, "lr": 1.8716211989118645e-06, "epoch": 1.2339976243896, "percentage": 24.68, "elapsed_time": "0:13:11", "remaining_time": "0:40:15", "throughput": 5809.08, "total_tokens": 4597120}
|
|
{"current_steps": 9355, "total_steps": 37885, "loss": 0.1788, "lr": 1.8713952813903222e-06, "epoch": 1.2346575161673485, "percentage": 24.69, "elapsed_time": "0:13:11", "remaining_time": "0:40:14", "throughput": 5809.75, "total_tokens": 4599552}
|
|
{"current_steps": 9360, "total_steps": 37885, "loss": 0.0456, "lr": 1.8711691789214416e-06, "epoch": 1.235317407945097, "percentage": 24.71, "elapsed_time": "0:13:12", "remaining_time": "0:40:13", "throughput": 5810.49, "total_tokens": 4602048}
|
|
{"current_steps": 9365, "total_steps": 37885, "loss": 0.0675, "lr": 1.8709428915532114e-06, "epoch": 1.2359772997228455, "percentage": 24.72, "elapsed_time": "0:13:12", "remaining_time": "0:40:13", "throughput": 5810.99, "total_tokens": 4604352}
|
|
{"current_steps": 9370, "total_steps": 37885, "loss": 0.1284, "lr": 1.8707164193336595e-06, "epoch": 1.2366371915005938, "percentage": 24.73, "elapsed_time": "0:13:12", "remaining_time": "0:40:12", "throughput": 5811.6, "total_tokens": 4606720}
|
|
{"current_steps": 9375, "total_steps": 37885, "loss": 0.1365, "lr": 1.8704897623108527e-06, "epoch": 1.2372970832783423, "percentage": 24.75, "elapsed_time": "0:13:13", "remaining_time": "0:40:11", "throughput": 5812.2, "total_tokens": 4609088}
|
|
{"current_steps": 9380, "total_steps": 37885, "loss": 0.0896, "lr": 1.8702629205328973e-06, "epoch": 1.2379569750560908, "percentage": 24.76, "elapsed_time": "0:13:13", "remaining_time": "0:40:10", "throughput": 5813.08, "total_tokens": 4611712}
|
|
{"current_steps": 9385, "total_steps": 37885, "loss": 0.1082, "lr": 1.8700358940479387e-06, "epoch": 1.2386168668338393, "percentage": 24.77, "elapsed_time": "0:13:13", "remaining_time": "0:40:10", "throughput": 5813.63, "total_tokens": 4614080}
|
|
{"current_steps": 9390, "total_steps": 37885, "loss": 0.0484, "lr": 1.8698086829041624e-06, "epoch": 1.2392767586115876, "percentage": 24.79, "elapsed_time": "0:13:13", "remaining_time": "0:40:09", "throughput": 5814.0, "total_tokens": 4616256}
|
|
{"current_steps": 9395, "total_steps": 37885, "loss": 0.0028, "lr": 1.8695812871497915e-06, "epoch": 1.239936650389336, "percentage": 24.8, "elapsed_time": "0:13:14", "remaining_time": "0:40:08", "throughput": 5814.64, "total_tokens": 4618688}
|
|
{"current_steps": 9400, "total_steps": 37885, "loss": 0.1368, "lr": 1.8693537068330898e-06, "epoch": 1.2405965421670846, "percentage": 24.81, "elapsed_time": "0:13:14", "remaining_time": "0:40:08", "throughput": 5815.36, "total_tokens": 4621184}
|
|
{"current_steps": 9405, "total_steps": 37885, "loss": 0.189, "lr": 1.8691259420023589e-06, "epoch": 1.241256433944833, "percentage": 24.83, "elapsed_time": "0:13:14", "remaining_time": "0:40:07", "throughput": 5816.03, "total_tokens": 4623616}
|
|
{"current_steps": 9410, "total_steps": 37885, "loss": 0.0022, "lr": 1.8688979927059405e-06, "epoch": 1.2419163257225816, "percentage": 24.84, "elapsed_time": "0:13:15", "remaining_time": "0:40:06", "throughput": 5816.69, "total_tokens": 4626048}
|
|
{"current_steps": 9415, "total_steps": 37885, "loss": 0.2181, "lr": 1.8686698589922154e-06, "epoch": 1.2425762175003299, "percentage": 24.85, "elapsed_time": "0:13:15", "remaining_time": "0:40:05", "throughput": 5817.42, "total_tokens": 4628544}
|
|
{"current_steps": 9420, "total_steps": 37885, "loss": 0.0009, "lr": 1.868441540909603e-06, "epoch": 1.2432361092780784, "percentage": 24.86, "elapsed_time": "0:13:15", "remaining_time": "0:40:05", "throughput": 5818.43, "total_tokens": 4631296}
|
|
{"current_steps": 9425, "total_steps": 37885, "loss": 0.0018, "lr": 1.8682130385065622e-06, "epoch": 1.2438960010558269, "percentage": 24.88, "elapsed_time": "0:13:16", "remaining_time": "0:40:04", "throughput": 5819.03, "total_tokens": 4633664}
|
|
{"current_steps": 9430, "total_steps": 37885, "loss": 0.001, "lr": 1.8679843518315911e-06, "epoch": 1.2445558928335754, "percentage": 24.89, "elapsed_time": "0:13:16", "remaining_time": "0:40:03", "throughput": 5819.83, "total_tokens": 4636224}
|
|
{"current_steps": 9435, "total_steps": 37885, "loss": 0.0792, "lr": 1.8677554809332272e-06, "epoch": 1.2452157846113237, "percentage": 24.9, "elapsed_time": "0:13:16", "remaining_time": "0:40:03", "throughput": 5820.57, "total_tokens": 4638720}
|
|
{"current_steps": 9440, "total_steps": 37885, "loss": 0.0399, "lr": 1.8675264258600459e-06, "epoch": 1.2458756763890722, "percentage": 24.92, "elapsed_time": "0:13:17", "remaining_time": "0:40:02", "throughput": 5821.41, "total_tokens": 4641280}
|
|
{"current_steps": 9445, "total_steps": 37885, "loss": 0.0117, "lr": 1.8672971866606627e-06, "epoch": 1.2465355681668207, "percentage": 24.93, "elapsed_time": "0:13:17", "remaining_time": "0:40:01", "throughput": 5822.01, "total_tokens": 4643648}
|
|
{"current_steps": 9450, "total_steps": 37885, "loss": 0.0879, "lr": 1.8670677633837321e-06, "epoch": 1.2471954599445692, "percentage": 24.94, "elapsed_time": "0:13:17", "remaining_time": "0:40:00", "throughput": 5822.59, "total_tokens": 4646016}
|
|
{"current_steps": 9455, "total_steps": 37885, "loss": 0.2114, "lr": 1.8668381560779478e-06, "epoch": 1.2478553517223174, "percentage": 24.96, "elapsed_time": "0:13:18", "remaining_time": "0:40:00", "throughput": 5823.09, "total_tokens": 4648320}
|
|
{"current_steps": 9460, "total_steps": 37885, "loss": 0.0495, "lr": 1.866608364792042e-06, "epoch": 1.248515243500066, "percentage": 24.97, "elapsed_time": "0:13:18", "remaining_time": "0:39:59", "throughput": 5823.97, "total_tokens": 4650944}
|
|
{"current_steps": 9465, "total_steps": 37885, "loss": 0.0025, "lr": 1.8663783895747863e-06, "epoch": 1.2491751352778144, "percentage": 24.98, "elapsed_time": "0:13:18", "remaining_time": "0:39:58", "throughput": 5824.71, "total_tokens": 4653440}
|
|
{"current_steps": 9470, "total_steps": 37885, "loss": 0.1382, "lr": 1.8661482304749911e-06, "epoch": 1.249835027055563, "percentage": 25.0, "elapsed_time": "0:13:19", "remaining_time": "0:39:58", "throughput": 5825.58, "total_tokens": 4656064}
|
|
{"current_steps": 9475, "total_steps": 37885, "loss": 0.1179, "lr": 1.8659178875415062e-06, "epoch": 1.2504949188333114, "percentage": 25.01, "elapsed_time": "0:13:19", "remaining_time": "0:39:57", "throughput": 5825.93, "total_tokens": 4658240}
|
|
{"current_steps": 9475, "total_steps": 37885, "eval_loss": 0.11660958081483841, "epoch": 1.2504949188333114, "percentage": 25.01, "elapsed_time": "0:13:27", "remaining_time": "0:40:20", "throughput": 5769.5, "total_tokens": 4658240}
|
|
{"current_steps": 9480, "total_steps": 37885, "loss": 0.001, "lr": 1.86568736082322e-06, "epoch": 1.2511548106110597, "percentage": 25.02, "elapsed_time": "0:14:02", "remaining_time": "0:42:04", "throughput": 5532.95, "total_tokens": 4660992}
|
|
{"current_steps": 9485, "total_steps": 37885, "loss": 0.0584, "lr": 1.8654566503690606e-06, "epoch": 1.2518147023888082, "percentage": 25.04, "elapsed_time": "0:14:02", "remaining_time": "0:42:03", "throughput": 5533.75, "total_tokens": 4663488}
|
|
{"current_steps": 9490, "total_steps": 37885, "loss": 0.0698, "lr": 1.8652257562279942e-06, "epoch": 1.2524745941665567, "percentage": 25.05, "elapsed_time": "0:14:03", "remaining_time": "0:42:02", "throughput": 5534.7, "total_tokens": 4666112}
|
|
{"current_steps": 9495, "total_steps": 37885, "loss": 0.1508, "lr": 1.864994678449026e-06, "epoch": 1.2531344859443052, "percentage": 25.06, "elapsed_time": "0:14:03", "remaining_time": "0:42:01", "throughput": 5535.9, "total_tokens": 4668992}
|
|
{"current_steps": 9500, "total_steps": 37885, "loss": 0.1196, "lr": 1.864763417081202e-06, "epoch": 1.2537943777220537, "percentage": 25.08, "elapsed_time": "0:14:03", "remaining_time": "0:42:00", "throughput": 5536.32, "total_tokens": 4671168}
|
|
{"current_steps": 9505, "total_steps": 37885, "loss": 0.0589, "lr": 1.864531972173604e-06, "epoch": 1.254454269499802, "percentage": 25.09, "elapsed_time": "0:14:04", "remaining_time": "0:42:00", "throughput": 5537.04, "total_tokens": 4673600}
|
|
{"current_steps": 9510, "total_steps": 37885, "loss": 0.0006, "lr": 1.8643003437753557e-06, "epoch": 1.2551141612775505, "percentage": 25.1, "elapsed_time": "0:14:04", "remaining_time": "0:41:59", "throughput": 5537.96, "total_tokens": 4676224}
|
|
{"current_steps": 9515, "total_steps": 37885, "loss": 0.1122, "lr": 1.8640685319356181e-06, "epoch": 1.255774053055299, "percentage": 25.12, "elapsed_time": "0:14:04", "remaining_time": "0:41:58", "throughput": 5538.62, "total_tokens": 4678592}
|
|
{"current_steps": 9520, "total_steps": 37885, "loss": 0.001, "lr": 1.8638365367035922e-06, "epoch": 1.2564339448330473, "percentage": 25.13, "elapsed_time": "0:14:05", "remaining_time": "0:41:57", "throughput": 5539.26, "total_tokens": 4680960}
|
|
{"current_steps": 9525, "total_steps": 37885, "loss": 0.0433, "lr": 1.863604358128516e-06, "epoch": 1.2570938366107958, "percentage": 25.14, "elapsed_time": "0:14:05", "remaining_time": "0:41:57", "throughput": 5539.83, "total_tokens": 4683264}
|
|
{"current_steps": 9530, "total_steps": 37885, "loss": 0.0433, "lr": 1.8633719962596693e-06, "epoch": 1.2577537283885443, "percentage": 25.16, "elapsed_time": "0:14:05", "remaining_time": "0:41:56", "throughput": 5540.61, "total_tokens": 4685760}
|
|
{"current_steps": 9535, "total_steps": 37885, "loss": 0.0812, "lr": 1.863139451146368e-06, "epoch": 1.2584136201662928, "percentage": 25.17, "elapsed_time": "0:14:06", "remaining_time": "0:41:55", "throughput": 5541.37, "total_tokens": 4688256}
|
|
{"current_steps": 9540, "total_steps": 37885, "loss": 0.0911, "lr": 1.8629067228379687e-06, "epoch": 1.2590735119440413, "percentage": 25.18, "elapsed_time": "0:14:06", "remaining_time": "0:41:54", "throughput": 5541.99, "total_tokens": 4690624}
|
|
{"current_steps": 9545, "total_steps": 37885, "loss": 0.0416, "lr": 1.8626738113838657e-06, "epoch": 1.2597334037217895, "percentage": 25.19, "elapsed_time": "0:14:06", "remaining_time": "0:41:53", "throughput": 5543.04, "total_tokens": 4693376}
|
|
{"current_steps": 9550, "total_steps": 37885, "loss": 0.0004, "lr": 1.8624407168334938e-06, "epoch": 1.260393295499538, "percentage": 25.21, "elapsed_time": "0:14:07", "remaining_time": "0:41:53", "throughput": 5543.88, "total_tokens": 4695936}
|
|
{"current_steps": 9555, "total_steps": 37885, "loss": 0.0927, "lr": 1.8622074392363249e-06, "epoch": 1.2610531872772865, "percentage": 25.22, "elapsed_time": "0:14:07", "remaining_time": "0:41:52", "throughput": 5544.52, "total_tokens": 4698304}
|
|
{"current_steps": 9560, "total_steps": 37885, "loss": 0.0017, "lr": 1.8619739786418707e-06, "epoch": 1.261713079055035, "percentage": 25.23, "elapsed_time": "0:14:07", "remaining_time": "0:41:51", "throughput": 5545.1, "total_tokens": 4700608}
|
|
{"current_steps": 9565, "total_steps": 37885, "loss": 0.0491, "lr": 1.8617403350996814e-06, "epoch": 1.2623729708327835, "percentage": 25.25, "elapsed_time": "0:14:08", "remaining_time": "0:41:50", "throughput": 5545.73, "total_tokens": 4702976}
|
|
{"current_steps": 9570, "total_steps": 37885, "loss": 0.1877, "lr": 1.861506508659346e-06, "epoch": 1.2630328626105318, "percentage": 25.26, "elapsed_time": "0:14:08", "remaining_time": "0:41:50", "throughput": 5546.44, "total_tokens": 4705408}
|
|
{"current_steps": 9575, "total_steps": 37885, "loss": 0.1504, "lr": 1.861272499370493e-06, "epoch": 1.2636927543882803, "percentage": 25.27, "elapsed_time": "0:14:08", "remaining_time": "0:41:49", "throughput": 5547.35, "total_tokens": 4708032}
|
|
{"current_steps": 9580, "total_steps": 37885, "loss": 0.073, "lr": 1.8610383072827887e-06, "epoch": 1.2643526461660288, "percentage": 25.29, "elapsed_time": "0:14:09", "remaining_time": "0:41:48", "throughput": 5547.96, "total_tokens": 4710400}
|
|
{"current_steps": 9585, "total_steps": 37885, "loss": 0.0615, "lr": 1.8608039324459388e-06, "epoch": 1.265012537943777, "percentage": 25.3, "elapsed_time": "0:14:09", "remaining_time": "0:41:47", "throughput": 5548.79, "total_tokens": 4712960}
|
|
{"current_steps": 9590, "total_steps": 37885, "loss": 0.0543, "lr": 1.8605693749096877e-06, "epoch": 1.2656724297215256, "percentage": 25.31, "elapsed_time": "0:14:09", "remaining_time": "0:41:46", "throughput": 5549.28, "total_tokens": 4715200}
|
|
{"current_steps": 9595, "total_steps": 37885, "loss": 0.1053, "lr": 1.8603346347238185e-06, "epoch": 1.266332321499274, "percentage": 25.33, "elapsed_time": "0:14:10", "remaining_time": "0:41:46", "throughput": 5549.93, "total_tokens": 4717568}
|
|
{"current_steps": 9600, "total_steps": 37885, "loss": 0.1185, "lr": 1.8600997119381533e-06, "epoch": 1.2669922132770226, "percentage": 25.34, "elapsed_time": "0:14:10", "remaining_time": "0:41:45", "throughput": 5550.55, "total_tokens": 4719936}
|
|
{"current_steps": 9605, "total_steps": 37885, "loss": 0.092, "lr": 1.8598646066025523e-06, "epoch": 1.267652105054771, "percentage": 25.35, "elapsed_time": "0:14:10", "remaining_time": "0:41:44", "throughput": 5551.25, "total_tokens": 4722368}
|
|
{"current_steps": 9610, "total_steps": 37885, "loss": 0.0026, "lr": 1.8596293187669155e-06, "epoch": 1.2683119968325194, "percentage": 25.37, "elapsed_time": "0:14:11", "remaining_time": "0:41:43", "throughput": 5552.03, "total_tokens": 4724864}
|
|
{"current_steps": 9615, "total_steps": 37885, "loss": 0.0039, "lr": 1.8593938484811806e-06, "epoch": 1.2689718886102679, "percentage": 25.38, "elapsed_time": "0:14:11", "remaining_time": "0:41:43", "throughput": 5552.86, "total_tokens": 4727424}
|
|
{"current_steps": 9620, "total_steps": 37885, "loss": 0.0911, "lr": 1.8591581957953245e-06, "epoch": 1.2696317803880164, "percentage": 25.39, "elapsed_time": "0:14:11", "remaining_time": "0:41:42", "throughput": 5553.28, "total_tokens": 4729600}
|
|
{"current_steps": 9625, "total_steps": 37885, "loss": 0.0008, "lr": 1.8589223607593628e-06, "epoch": 1.2702916721657649, "percentage": 25.41, "elapsed_time": "0:14:12", "remaining_time": "0:41:41", "throughput": 5554.32, "total_tokens": 4732352}
|
|
{"current_steps": 9630, "total_steps": 37885, "loss": 0.0029, "lr": 1.8586863434233502e-06, "epoch": 1.2709515639435134, "percentage": 25.42, "elapsed_time": "0:14:12", "remaining_time": "0:41:40", "throughput": 5555.08, "total_tokens": 4734848}
|
|
{"current_steps": 9635, "total_steps": 37885, "loss": 0.0696, "lr": 1.8584501438373793e-06, "epoch": 1.2716114557212617, "percentage": 25.43, "elapsed_time": "0:14:12", "remaining_time": "0:41:40", "throughput": 5555.69, "total_tokens": 4737216}
|
|
{"current_steps": 9640, "total_steps": 37885, "loss": 0.0958, "lr": 1.8582137620515816e-06, "epoch": 1.2722713474990102, "percentage": 25.45, "elapsed_time": "0:14:13", "remaining_time": "0:41:39", "throughput": 5556.48, "total_tokens": 4739712}
|
|
{"current_steps": 9645, "total_steps": 37885, "loss": 0.2084, "lr": 1.8579771981161277e-06, "epoch": 1.2729312392767587, "percentage": 25.46, "elapsed_time": "0:14:13", "remaining_time": "0:41:38", "throughput": 5557.19, "total_tokens": 4742144}
|
|
{"current_steps": 9650, "total_steps": 37885, "loss": 0.0001, "lr": 1.8577404520812262e-06, "epoch": 1.273591131054507, "percentage": 25.47, "elapsed_time": "0:14:13", "remaining_time": "0:41:37", "throughput": 5558.17, "total_tokens": 4744832}
|
|
{"current_steps": 9655, "total_steps": 37885, "loss": 0.0457, "lr": 1.8575035239971255e-06, "epoch": 1.2742510228322554, "percentage": 25.49, "elapsed_time": "0:14:14", "remaining_time": "0:41:37", "throughput": 5559.2, "total_tokens": 4747584}
|
|
{"current_steps": 9660, "total_steps": 37885, "loss": 0.0005, "lr": 1.857266413914111e-06, "epoch": 1.274910914610004, "percentage": 25.5, "elapsed_time": "0:14:14", "remaining_time": "0:41:36", "throughput": 5559.89, "total_tokens": 4750016}
|
|
{"current_steps": 9665, "total_steps": 37885, "loss": 0.0567, "lr": 1.8570291218825082e-06, "epoch": 1.2755708063877524, "percentage": 25.51, "elapsed_time": "0:14:14", "remaining_time": "0:41:35", "throughput": 5560.75, "total_tokens": 4752576}
|
|
{"current_steps": 9670, "total_steps": 37885, "loss": 0.0774, "lr": 1.8567916479526802e-06, "epoch": 1.276230698165501, "percentage": 25.52, "elapsed_time": "0:14:14", "remaining_time": "0:41:34", "throughput": 5561.18, "total_tokens": 4754752}
|
|
{"current_steps": 9675, "total_steps": 37885, "loss": 0.0006, "lr": 1.8565539921750295e-06, "epoch": 1.2768905899432492, "percentage": 25.54, "elapsed_time": "0:14:15", "remaining_time": "0:41:33", "throughput": 5561.54, "total_tokens": 4756864}
|
|
{"current_steps": 9680, "total_steps": 37885, "loss": 0.0764, "lr": 1.8563161545999965e-06, "epoch": 1.2775504817209977, "percentage": 25.55, "elapsed_time": "0:14:15", "remaining_time": "0:41:33", "throughput": 5562.4, "total_tokens": 4759424}
|
|
{"current_steps": 9685, "total_steps": 37885, "loss": 0.2287, "lr": 1.8560781352780607e-06, "epoch": 1.2782103734987462, "percentage": 25.56, "elapsed_time": "0:14:15", "remaining_time": "0:41:32", "throughput": 5563.04, "total_tokens": 4761792}
|
|
{"current_steps": 9690, "total_steps": 37885, "loss": 0.0725, "lr": 1.8558399342597402e-06, "epoch": 1.2788702652764947, "percentage": 25.58, "elapsed_time": "0:14:16", "remaining_time": "0:41:31", "throughput": 5564.09, "total_tokens": 4764544}
|
|
{"current_steps": 9695, "total_steps": 37885, "loss": 0.0003, "lr": 1.8556015515955907e-06, "epoch": 1.2795301570542432, "percentage": 25.59, "elapsed_time": "0:14:16", "remaining_time": "0:41:30", "throughput": 5564.74, "total_tokens": 4766912}
|
|
{"current_steps": 9700, "total_steps": 37885, "loss": 0.063, "lr": 1.8553629873362079e-06, "epoch": 1.2801900488319915, "percentage": 25.6, "elapsed_time": "0:14:16", "remaining_time": "0:41:30", "throughput": 5565.39, "total_tokens": 4769280}
|
|
{"current_steps": 9705, "total_steps": 37885, "loss": 0.0511, "lr": 1.855124241532225e-06, "epoch": 1.28084994060974, "percentage": 25.62, "elapsed_time": "0:14:17", "remaining_time": "0:41:29", "throughput": 5566.44, "total_tokens": 4772032}
|
|
{"current_steps": 9710, "total_steps": 37885, "loss": 0.0003, "lr": 1.8548853142343142e-06, "epoch": 1.2815098323874885, "percentage": 25.63, "elapsed_time": "0:14:17", "remaining_time": "0:41:28", "throughput": 5567.07, "total_tokens": 4774400}
|
|
{"current_steps": 9715, "total_steps": 37885, "loss": 0.2591, "lr": 1.854646205493186e-06, "epoch": 1.2821697241652368, "percentage": 25.64, "elapsed_time": "0:14:17", "remaining_time": "0:41:27", "throughput": 5567.56, "total_tokens": 4776640}
|
|
{"current_steps": 9720, "total_steps": 37885, "loss": 0.0848, "lr": 1.8544069153595896e-06, "epoch": 1.2828296159429853, "percentage": 25.66, "elapsed_time": "0:14:18", "remaining_time": "0:41:26", "throughput": 5568.2, "total_tokens": 4779008}
|
|
{"current_steps": 9725, "total_steps": 37885, "loss": 0.2256, "lr": 1.8541674438843125e-06, "epoch": 1.2834895077207338, "percentage": 25.67, "elapsed_time": "0:14:18", "remaining_time": "0:41:26", "throughput": 5569.17, "total_tokens": 4781696}
|
|
{"current_steps": 9730, "total_steps": 37885, "loss": 0.0343, "lr": 1.8539277911181809e-06, "epoch": 1.2841493994984823, "percentage": 25.68, "elapsed_time": "0:14:18", "remaining_time": "0:41:25", "throughput": 5569.97, "total_tokens": 4784192}
|
|
{"current_steps": 9735, "total_steps": 37885, "loss": 0.0027, "lr": 1.8536879571120593e-06, "epoch": 1.2848092912762308, "percentage": 25.7, "elapsed_time": "0:14:19", "remaining_time": "0:41:24", "throughput": 5570.95, "total_tokens": 4786880}
|
|
{"current_steps": 9740, "total_steps": 37885, "loss": 0.1961, "lr": 1.8534479419168508e-06, "epoch": 1.285469183053979, "percentage": 25.71, "elapsed_time": "0:14:19", "remaining_time": "0:41:23", "throughput": 5572.05, "total_tokens": 4789696}
|
|
{"current_steps": 9745, "total_steps": 37885, "loss": 0.0241, "lr": 1.8532077455834964e-06, "epoch": 1.2861290748317276, "percentage": 25.72, "elapsed_time": "0:14:19", "remaining_time": "0:41:23", "throughput": 5573.02, "total_tokens": 4792384}
|
|
{"current_steps": 9750, "total_steps": 37885, "loss": 0.1954, "lr": 1.8529673681629766e-06, "epoch": 1.286788966609476, "percentage": 25.74, "elapsed_time": "0:14:20", "remaining_time": "0:41:22", "throughput": 5573.83, "total_tokens": 4794944}
|
|
{"current_steps": 9755, "total_steps": 37885, "loss": 0.0025, "lr": 1.85272680970631e-06, "epoch": 1.2874488583872246, "percentage": 25.75, "elapsed_time": "0:14:20", "remaining_time": "0:41:21", "throughput": 5574.52, "total_tokens": 4797376}
|
|
{"current_steps": 9760, "total_steps": 37885, "loss": 0.0051, "lr": 1.8524860702645527e-06, "epoch": 1.288108750164973, "percentage": 25.76, "elapsed_time": "0:14:20", "remaining_time": "0:41:20", "throughput": 5575.21, "total_tokens": 4799808}
|
|
{"current_steps": 9765, "total_steps": 37885, "loss": 0.0732, "lr": 1.8522451498888004e-06, "epoch": 1.2887686419427213, "percentage": 25.78, "elapsed_time": "0:14:21", "remaining_time": "0:41:20", "throughput": 5576.24, "total_tokens": 4802560}
|
|
{"current_steps": 9770, "total_steps": 37885, "loss": 0.0006, "lr": 1.8520040486301862e-06, "epoch": 1.2894285337204698, "percentage": 25.79, "elapsed_time": "0:14:21", "remaining_time": "0:41:19", "throughput": 5576.61, "total_tokens": 4804736}
|
|
{"current_steps": 9775, "total_steps": 37885, "loss": 0.1509, "lr": 1.8517627665398825e-06, "epoch": 1.2900884254982183, "percentage": 25.8, "elapsed_time": "0:14:21", "remaining_time": "0:41:18", "throughput": 5577.14, "total_tokens": 4807040}
|
|
{"current_steps": 9780, "total_steps": 37885, "loss": 0.0015, "lr": 1.8515213036690996e-06, "epoch": 1.2907483172759666, "percentage": 25.81, "elapsed_time": "0:14:22", "remaining_time": "0:41:17", "throughput": 5577.47, "total_tokens": 4809152}
|
|
{"current_steps": 9785, "total_steps": 37885, "loss": 0.0002, "lr": 1.8512796600690864e-06, "epoch": 1.2914082090537151, "percentage": 25.83, "elapsed_time": "0:14:22", "remaining_time": "0:41:17", "throughput": 5578.39, "total_tokens": 4811776}
|
|
{"current_steps": 9790, "total_steps": 37885, "loss": 0.0003, "lr": 1.8510378357911297e-06, "epoch": 1.2920681008314636, "percentage": 25.84, "elapsed_time": "0:14:22", "remaining_time": "0:41:16", "throughput": 5579.15, "total_tokens": 4814272}
|
|
{"current_steps": 9795, "total_steps": 37885, "loss": 0.0535, "lr": 1.8507958308865551e-06, "epoch": 1.2927279926092121, "percentage": 25.85, "elapsed_time": "0:14:23", "remaining_time": "0:41:15", "throughput": 5579.67, "total_tokens": 4816576}
|
|
{"current_steps": 9800, "total_steps": 37885, "loss": 0.0654, "lr": 1.8505536454067264e-06, "epoch": 1.2933878843869606, "percentage": 25.87, "elapsed_time": "0:14:23", "remaining_time": "0:41:14", "throughput": 5580.54, "total_tokens": 4819200}
|
|
{"current_steps": 9805, "total_steps": 37885, "loss": 0.134, "lr": 1.8503112794030456e-06, "epoch": 1.294047776164709, "percentage": 25.88, "elapsed_time": "0:14:23", "remaining_time": "0:41:14", "throughput": 5581.44, "total_tokens": 4821824}
|
|
{"current_steps": 9810, "total_steps": 37885, "loss": 0.0005, "lr": 1.8500687329269532e-06, "epoch": 1.2947076679424574, "percentage": 25.89, "elapsed_time": "0:14:24", "remaining_time": "0:41:13", "throughput": 5582.46, "total_tokens": 4824576}
|
|
{"current_steps": 9815, "total_steps": 37885, "loss": 0.0988, "lr": 1.8498260060299282e-06, "epoch": 1.295367559720206, "percentage": 25.91, "elapsed_time": "0:14:24", "remaining_time": "0:41:12", "throughput": 5583.29, "total_tokens": 4827136}
|
|
{"current_steps": 9820, "total_steps": 37885, "loss": 0.0933, "lr": 1.849583098763487e-06, "epoch": 1.2960274514979544, "percentage": 25.92, "elapsed_time": "0:14:24", "remaining_time": "0:41:11", "throughput": 5583.68, "total_tokens": 4829312}
|
|
{"current_steps": 9825, "total_steps": 37885, "loss": 0.0507, "lr": 1.8493400111791858e-06, "epoch": 1.296687343275703, "percentage": 25.93, "elapsed_time": "0:14:25", "remaining_time": "0:41:11", "throughput": 5584.44, "total_tokens": 4831808}
|
|
{"current_steps": 9830, "total_steps": 37885, "loss": 0.1303, "lr": 1.8490967433286172e-06, "epoch": 1.2973472350534512, "percentage": 25.95, "elapsed_time": "0:14:25", "remaining_time": "0:41:10", "throughput": 5584.87, "total_tokens": 4834048}
|
|
{"current_steps": 9835, "total_steps": 37885, "loss": 0.0016, "lr": 1.8488532952634138e-06, "epoch": 1.2980071268311997, "percentage": 25.96, "elapsed_time": "0:14:25", "remaining_time": "0:41:09", "throughput": 5585.48, "total_tokens": 4836416}
|
|
{"current_steps": 9840, "total_steps": 37885, "loss": 0.0792, "lr": 1.8486096670352448e-06, "epoch": 1.2986670186089482, "percentage": 25.97, "elapsed_time": "0:14:26", "remaining_time": "0:41:08", "throughput": 5585.92, "total_tokens": 4838656}
|
|
{"current_steps": 9845, "total_steps": 37885, "loss": 0.1515, "lr": 1.8483658586958198e-06, "epoch": 1.2993269103866965, "percentage": 25.99, "elapsed_time": "0:14:26", "remaining_time": "0:41:08", "throughput": 5586.49, "total_tokens": 4841024}
|
|
{"current_steps": 9850, "total_steps": 37885, "loss": 0.0899, "lr": 1.8481218702968845e-06, "epoch": 1.299986802164445, "percentage": 26.0, "elapsed_time": "0:14:26", "remaining_time": "0:41:07", "throughput": 5587.2, "total_tokens": 4843520}
|
|
{"current_steps": 9855, "total_steps": 37885, "loss": 0.0714, "lr": 1.8478777018902236e-06, "epoch": 1.3006466939421935, "percentage": 26.01, "elapsed_time": "0:14:27", "remaining_time": "0:41:06", "throughput": 5588.13, "total_tokens": 4846208}
|
|
{"current_steps": 9860, "total_steps": 37885, "loss": 0.1439, "lr": 1.8476333535276605e-06, "epoch": 1.301306585719942, "percentage": 26.03, "elapsed_time": "0:14:27", "remaining_time": "0:41:05", "throughput": 5588.93, "total_tokens": 4848768}
|
|
{"current_steps": 9865, "total_steps": 37885, "loss": 0.0974, "lr": 1.8473888252610563e-06, "epoch": 1.3019664774976905, "percentage": 26.04, "elapsed_time": "0:14:27", "remaining_time": "0:41:05", "throughput": 5589.68, "total_tokens": 4851264}
|
|
{"current_steps": 9870, "total_steps": 37885, "loss": 0.1057, "lr": 1.8471441171423101e-06, "epoch": 1.3026263692754387, "percentage": 26.05, "elapsed_time": "0:14:28", "remaining_time": "0:41:04", "throughput": 5590.27, "total_tokens": 4853632}
|
|
{"current_steps": 9875, "total_steps": 37885, "loss": 0.0011, "lr": 1.8468992292233595e-06, "epoch": 1.3032862610531872, "percentage": 26.07, "elapsed_time": "0:14:28", "remaining_time": "0:41:03", "throughput": 5591.15, "total_tokens": 4856256}
|
|
{"current_steps": 9880, "total_steps": 37885, "loss": 0.076, "lr": 1.8466541615561804e-06, "epoch": 1.3039461528309357, "percentage": 26.08, "elapsed_time": "0:14:28", "remaining_time": "0:41:02", "throughput": 5591.87, "total_tokens": 4858752}
|
|
{"current_steps": 9885, "total_steps": 37885, "loss": 0.0014, "lr": 1.8464089141927866e-06, "epoch": 1.3046060446086842, "percentage": 26.09, "elapsed_time": "0:14:29", "remaining_time": "0:41:02", "throughput": 5592.58, "total_tokens": 4861248}
|
|
{"current_steps": 9890, "total_steps": 37885, "loss": 0.2671, "lr": 1.8461634871852298e-06, "epoch": 1.3052659363864327, "percentage": 26.11, "elapsed_time": "0:14:29", "remaining_time": "0:41:01", "throughput": 5593.28, "total_tokens": 4863744}
|
|
{"current_steps": 9895, "total_steps": 37885, "loss": 0.0681, "lr": 1.8459178805856003e-06, "epoch": 1.305925828164181, "percentage": 26.12, "elapsed_time": "0:14:29", "remaining_time": "0:41:00", "throughput": 5593.74, "total_tokens": 4865984}
|
|
{"current_steps": 9900, "total_steps": 37885, "loss": 0.1544, "lr": 1.8456720944460265e-06, "epoch": 1.3065857199419295, "percentage": 26.13, "elapsed_time": "0:14:30", "remaining_time": "0:40:59", "throughput": 5594.43, "total_tokens": 4868480}
|
|
{"current_steps": 9905, "total_steps": 37885, "loss": 0.1641, "lr": 1.8454261288186741e-06, "epoch": 1.307245611719678, "percentage": 26.14, "elapsed_time": "0:14:30", "remaining_time": "0:40:59", "throughput": 5595.17, "total_tokens": 4870976}
|
|
{"current_steps": 9910, "total_steps": 37885, "loss": 0.0584, "lr": 1.8451799837557483e-06, "epoch": 1.3079055034974263, "percentage": 26.16, "elapsed_time": "0:14:30", "remaining_time": "0:40:58", "throughput": 5595.9, "total_tokens": 4873472}
|
|
{"current_steps": 9915, "total_steps": 37885, "loss": 0.0582, "lr": 1.8449336593094914e-06, "epoch": 1.3085653952751748, "percentage": 26.17, "elapsed_time": "0:14:31", "remaining_time": "0:40:57", "throughput": 5596.8, "total_tokens": 4876160}
|
|
{"current_steps": 9920, "total_steps": 37885, "loss": 0.0677, "lr": 1.8446871555321834e-06, "epoch": 1.3092252870529233, "percentage": 26.18, "elapsed_time": "0:14:31", "remaining_time": "0:40:57", "throughput": 5597.25, "total_tokens": 4878400}
|
|
{"current_steps": 9925, "total_steps": 37885, "loss": 0.0026, "lr": 1.8444404724761436e-06, "epoch": 1.3098851788306718, "percentage": 26.2, "elapsed_time": "0:14:31", "remaining_time": "0:40:56", "throughput": 5598.22, "total_tokens": 4881152}
|
|
{"current_steps": 9930, "total_steps": 37885, "loss": 0.0665, "lr": 1.8441936101937285e-06, "epoch": 1.3105450706084203, "percentage": 26.21, "elapsed_time": "0:14:32", "remaining_time": "0:40:55", "throughput": 5598.96, "total_tokens": 4883648}
|
|
{"current_steps": 9935, "total_steps": 37885, "loss": 0.1721, "lr": 1.8439465687373328e-06, "epoch": 1.3112049623861686, "percentage": 26.22, "elapsed_time": "0:14:32", "remaining_time": "0:40:54", "throughput": 5599.29, "total_tokens": 4885760}
|
|
{"current_steps": 9940, "total_steps": 37885, "loss": 0.0015, "lr": 1.8436993481593891e-06, "epoch": 1.311864854163917, "percentage": 26.24, "elapsed_time": "0:14:32", "remaining_time": "0:40:54", "throughput": 5600.15, "total_tokens": 4888384}
|
|
{"current_steps": 9945, "total_steps": 37885, "loss": 0.084, "lr": 1.8434519485123685e-06, "epoch": 1.3125247459416656, "percentage": 26.25, "elapsed_time": "0:14:33", "remaining_time": "0:40:53", "throughput": 5600.88, "total_tokens": 4890880}
|
|
{"current_steps": 9950, "total_steps": 37885, "loss": 0.0374, "lr": 1.8432043698487796e-06, "epoch": 1.313184637719414, "percentage": 26.26, "elapsed_time": "0:14:33", "remaining_time": "0:40:52", "throughput": 5601.39, "total_tokens": 4893184}
|
|
{"current_steps": 9955, "total_steps": 37885, "loss": 0.0611, "lr": 1.8429566122211693e-06, "epoch": 1.3138445294971626, "percentage": 26.28, "elapsed_time": "0:14:33", "remaining_time": "0:40:51", "throughput": 5601.96, "total_tokens": 4895552}
|
|
{"current_steps": 9960, "total_steps": 37885, "loss": 0.1239, "lr": 1.8427086756821222e-06, "epoch": 1.3145044212749109, "percentage": 26.29, "elapsed_time": "0:14:34", "remaining_time": "0:40:51", "throughput": 5602.48, "total_tokens": 4897856}
|
|
{"current_steps": 9965, "total_steps": 37885, "loss": 0.1224, "lr": 1.842460560284261e-06, "epoch": 1.3151643130526594, "percentage": 26.3, "elapsed_time": "0:14:34", "remaining_time": "0:40:50", "throughput": 5603.21, "total_tokens": 4900352}
|
|
{"current_steps": 9970, "total_steps": 37885, "loss": 0.0006, "lr": 1.8422122660802466e-06, "epoch": 1.3158242048304079, "percentage": 26.32, "elapsed_time": "0:14:34", "remaining_time": "0:40:49", "throughput": 5604.12, "total_tokens": 4903040}
|
|
{"current_steps": 9975, "total_steps": 37885, "loss": 0.0633, "lr": 1.8419637931227776e-06, "epoch": 1.3164840966081561, "percentage": 26.33, "elapsed_time": "0:14:35", "remaining_time": "0:40:48", "throughput": 5604.97, "total_tokens": 4905664}
|
|
{"current_steps": 9980, "total_steps": 37885, "loss": 0.0512, "lr": 1.8417151414645904e-06, "epoch": 1.3171439883859046, "percentage": 26.34, "elapsed_time": "0:14:35", "remaining_time": "0:40:48", "throughput": 5605.31, "total_tokens": 4907840}
|
|
{"current_steps": 9985, "total_steps": 37885, "loss": 0.0012, "lr": 1.84146631115846e-06, "epoch": 1.3178038801636531, "percentage": 26.36, "elapsed_time": "0:14:35", "remaining_time": "0:40:47", "throughput": 5605.83, "total_tokens": 4910144}
|
|
{"current_steps": 9990, "total_steps": 37885, "loss": 0.1102, "lr": 1.8412173022571979e-06, "epoch": 1.3184637719414016, "percentage": 26.37, "elapsed_time": "0:14:36", "remaining_time": "0:40:46", "throughput": 5606.55, "total_tokens": 4912640}
|
|
{"current_steps": 9995, "total_steps": 37885, "loss": 0.0006, "lr": 1.8409681148136556e-06, "epoch": 1.3191236637191501, "percentage": 26.38, "elapsed_time": "0:14:36", "remaining_time": "0:40:45", "throughput": 5607.04, "total_tokens": 4914944}
|
|
{"current_steps": 10000, "total_steps": 37885, "loss": 0.0516, "lr": 1.8407187488807203e-06, "epoch": 1.3197835554968984, "percentage": 26.4, "elapsed_time": "0:14:36", "remaining_time": "0:40:45", "throughput": 5607.88, "total_tokens": 4917568}
|
|
{"current_steps": 10005, "total_steps": 37885, "loss": 0.0525, "lr": 1.8404692045113185e-06, "epoch": 1.320443447274647, "percentage": 26.41, "elapsed_time": "0:14:37", "remaining_time": "0:40:44", "throughput": 5608.17, "total_tokens": 4919680}
|
|
{"current_steps": 10010, "total_steps": 37885, "loss": 0.0183, "lr": 1.8402194817584147e-06, "epoch": 1.3211033390523954, "percentage": 26.42, "elapsed_time": "0:14:37", "remaining_time": "0:40:43", "throughput": 5608.53, "total_tokens": 4921856}
|
|
{"current_steps": 10015, "total_steps": 37885, "loss": 0.0421, "lr": 1.8399695806750098e-06, "epoch": 1.321763230830144, "percentage": 26.44, "elapsed_time": "0:14:37", "remaining_time": "0:40:43", "throughput": 5609.19, "total_tokens": 4924288}
|
|
{"current_steps": 10020, "total_steps": 37885, "loss": 0.1288, "lr": 1.8397195013141445e-06, "epoch": 1.3224231226078924, "percentage": 26.45, "elapsed_time": "0:14:38", "remaining_time": "0:40:42", "throughput": 5609.63, "total_tokens": 4926528}
|
|
{"current_steps": 10025, "total_steps": 37885, "loss": 0.004, "lr": 1.8394692437288954e-06, "epoch": 1.3230830143856407, "percentage": 26.46, "elapsed_time": "0:14:38", "remaining_time": "0:40:41", "throughput": 5610.68, "total_tokens": 4929344}
|
|
{"current_steps": 10030, "total_steps": 37885, "loss": 0.0934, "lr": 1.8392188079723784e-06, "epoch": 1.3237429061633892, "percentage": 26.47, "elapsed_time": "0:14:38", "remaining_time": "0:40:40", "throughput": 5611.3, "total_tokens": 4931776}
|
|
{"current_steps": 10035, "total_steps": 37885, "loss": 0.0003, "lr": 1.8389681940977467e-06, "epoch": 1.3244027979411377, "percentage": 26.49, "elapsed_time": "0:14:39", "remaining_time": "0:40:40", "throughput": 5612.03, "total_tokens": 4934272}
|
|
{"current_steps": 10040, "total_steps": 37885, "loss": 0.4409, "lr": 1.838717402158191e-06, "epoch": 1.325062689718886, "percentage": 26.5, "elapsed_time": "0:14:39", "remaining_time": "0:40:39", "throughput": 5612.94, "total_tokens": 4936960}
|
|
{"current_steps": 10045, "total_steps": 37885, "loss": 0.2134, "lr": 1.83846643220694e-06, "epoch": 1.3257225814966347, "percentage": 26.51, "elapsed_time": "0:14:39", "remaining_time": "0:40:38", "throughput": 5613.96, "total_tokens": 4939776}
|
|
{"current_steps": 10050, "total_steps": 37885, "loss": 0.0947, "lr": 1.8382152842972607e-06, "epoch": 1.326382473274383, "percentage": 26.53, "elapsed_time": "0:14:40", "remaining_time": "0:40:37", "throughput": 5614.58, "total_tokens": 4942208}
|
|
{"current_steps": 10055, "total_steps": 37885, "loss": 0.0015, "lr": 1.8379639584824572e-06, "epoch": 1.3270423650521315, "percentage": 26.54, "elapsed_time": "0:14:40", "remaining_time": "0:40:37", "throughput": 5614.98, "total_tokens": 4944448}
|
|
{"current_steps": 10060, "total_steps": 37885, "loss": 0.177, "lr": 1.8377124548158713e-06, "epoch": 1.32770225682988, "percentage": 26.55, "elapsed_time": "0:14:40", "remaining_time": "0:40:36", "throughput": 5615.52, "total_tokens": 4946816}
|
|
{"current_steps": 10065, "total_steps": 37885, "loss": 0.0229, "lr": 1.8374607733508833e-06, "epoch": 1.3283621486076282, "percentage": 26.57, "elapsed_time": "0:14:41", "remaining_time": "0:40:35", "throughput": 5616.07, "total_tokens": 4949184}
|
|
{"current_steps": 10070, "total_steps": 37885, "loss": 0.1654, "lr": 1.8372089141409108e-06, "epoch": 1.3290220403853767, "percentage": 26.58, "elapsed_time": "0:14:41", "remaining_time": "0:40:35", "throughput": 5616.72, "total_tokens": 4951616}
|
|
{"current_steps": 10075, "total_steps": 37885, "loss": 0.1656, "lr": 1.8369568772394087e-06, "epoch": 1.3296819321631252, "percentage": 26.59, "elapsed_time": "0:14:41", "remaining_time": "0:40:34", "throughput": 5617.35, "total_tokens": 4954048}
|
|
{"current_steps": 10080, "total_steps": 37885, "loss": 0.1187, "lr": 1.8367046626998702e-06, "epoch": 1.3303418239408737, "percentage": 26.61, "elapsed_time": "0:14:42", "remaining_time": "0:40:33", "throughput": 5617.59, "total_tokens": 4956160}
|
|
{"current_steps": 10085, "total_steps": 37885, "loss": 0.1228, "lr": 1.8364522705758257e-06, "epoch": 1.3310017157186222, "percentage": 26.62, "elapsed_time": "0:14:42", "remaining_time": "0:40:32", "throughput": 5618.12, "total_tokens": 4958528}
|
|
{"current_steps": 10090, "total_steps": 37885, "loss": 0.1143, "lr": 1.836199700920844e-06, "epoch": 1.3316616074963705, "percentage": 26.63, "elapsed_time": "0:14:42", "remaining_time": "0:40:32", "throughput": 5618.69, "total_tokens": 4960896}
|
|
{"current_steps": 10095, "total_steps": 37885, "loss": 0.0022, "lr": 1.8359469537885312e-06, "epoch": 1.332321499274119, "percentage": 26.65, "elapsed_time": "0:14:43", "remaining_time": "0:40:31", "throughput": 5619.47, "total_tokens": 4963456}
|
|
{"current_steps": 10100, "total_steps": 37885, "loss": 0.0887, "lr": 1.835694029232531e-06, "epoch": 1.3329813910518675, "percentage": 26.66, "elapsed_time": "0:14:43", "remaining_time": "0:40:30", "throughput": 5619.82, "total_tokens": 4965632}
|
|
{"current_steps": 10105, "total_steps": 37885, "loss": 0.1001, "lr": 1.8354409273065247e-06, "epoch": 1.333641282829616, "percentage": 26.67, "elapsed_time": "0:14:43", "remaining_time": "0:40:30", "throughput": 5620.31, "total_tokens": 4967936}
|
|
{"current_steps": 10110, "total_steps": 37885, "loss": 0.0025, "lr": 1.835187648064231e-06, "epoch": 1.3343011746073645, "percentage": 26.69, "elapsed_time": "0:14:44", "remaining_time": "0:40:29", "throughput": 5620.82, "total_tokens": 4970240}
|
|
{"current_steps": 10115, "total_steps": 37885, "loss": 0.001, "lr": 1.8349341915594073e-06, "epoch": 1.3349610663851128, "percentage": 26.7, "elapsed_time": "0:14:44", "remaining_time": "0:40:28", "throughput": 5621.77, "total_tokens": 4972992}
|
|
{"current_steps": 10120, "total_steps": 37885, "loss": 0.1337, "lr": 1.8346805578458474e-06, "epoch": 1.3356209581628613, "percentage": 26.71, "elapsed_time": "0:14:44", "remaining_time": "0:40:27", "throughput": 5622.63, "total_tokens": 4975616}
|
|
{"current_steps": 10125, "total_steps": 37885, "loss": 0.0462, "lr": 1.8344267469773835e-06, "epoch": 1.3362808499406098, "percentage": 26.73, "elapsed_time": "0:14:45", "remaining_time": "0:40:27", "throughput": 5623.36, "total_tokens": 4978112}
|
|
{"current_steps": 10130, "total_steps": 37885, "loss": 0.0005, "lr": 1.8341727590078847e-06, "epoch": 1.336940741718358, "percentage": 26.74, "elapsed_time": "0:14:45", "remaining_time": "0:40:26", "throughput": 5623.82, "total_tokens": 4980352}
|
|
{"current_steps": 10135, "total_steps": 37885, "loss": 0.0783, "lr": 1.8339185939912589e-06, "epoch": 1.3376006334961066, "percentage": 26.75, "elapsed_time": "0:14:45", "remaining_time": "0:40:25", "throughput": 5624.33, "total_tokens": 4982656}
|
|
{"current_steps": 10140, "total_steps": 37885, "loss": 0.0077, "lr": 1.83366425198145e-06, "epoch": 1.338260525273855, "percentage": 26.77, "elapsed_time": "0:14:46", "remaining_time": "0:40:24", "throughput": 5625.12, "total_tokens": 4985216}
|
|
{"current_steps": 10145, "total_steps": 37885, "loss": 0.0159, "lr": 1.8334097330324405e-06, "epoch": 1.3389204170516036, "percentage": 26.78, "elapsed_time": "0:14:46", "remaining_time": "0:40:24", "throughput": 5626.04, "total_tokens": 4987904}
|
|
{"current_steps": 10150, "total_steps": 37885, "loss": 0.0203, "lr": 1.8331550371982503e-06, "epoch": 1.339580308829352, "percentage": 26.79, "elapsed_time": "0:14:46", "remaining_time": "0:40:23", "throughput": 5626.77, "total_tokens": 4990400}
|
|
{"current_steps": 10155, "total_steps": 37885, "loss": 0.071, "lr": 1.8329001645329364e-06, "epoch": 1.3402402006071004, "percentage": 26.8, "elapsed_time": "0:14:47", "remaining_time": "0:40:22", "throughput": 5627.55, "total_tokens": 4992960}
|
|
{"current_steps": 10160, "total_steps": 37885, "loss": 0.0032, "lr": 1.8326451150905945e-06, "epoch": 1.3409000923848489, "percentage": 26.82, "elapsed_time": "0:14:47", "remaining_time": "0:40:22", "throughput": 5628.41, "total_tokens": 4995584}
|
|
{"current_steps": 10165, "total_steps": 37885, "loss": 0.1142, "lr": 1.8323898889253562e-06, "epoch": 1.3415599841625974, "percentage": 26.83, "elapsed_time": "0:14:47", "remaining_time": "0:40:21", "throughput": 5629.0, "total_tokens": 4997952}
|
|
{"current_steps": 10170, "total_steps": 37885, "loss": 0.1238, "lr": 1.8321344860913918e-06, "epoch": 1.3422198759403459, "percentage": 26.84, "elapsed_time": "0:14:48", "remaining_time": "0:40:20", "throughput": 5629.26, "total_tokens": 5000000}
|
|
{"current_steps": 10175, "total_steps": 37885, "loss": 0.0662, "lr": 1.8318789066429083e-06, "epoch": 1.3428797677180944, "percentage": 26.86, "elapsed_time": "0:14:48", "remaining_time": "0:40:19", "throughput": 5630.17, "total_tokens": 5002688}
|
|
{"current_steps": 10180, "total_steps": 37885, "loss": 0.0005, "lr": 1.831623150634151e-06, "epoch": 1.3435396594958426, "percentage": 26.87, "elapsed_time": "0:14:48", "remaining_time": "0:40:19", "throughput": 5630.91, "total_tokens": 5005184}
|
|
{"current_steps": 10185, "total_steps": 37885, "loss": 0.1373, "lr": 1.8313672181194023e-06, "epoch": 1.3441995512735911, "percentage": 26.88, "elapsed_time": "0:14:49", "remaining_time": "0:40:18", "throughput": 5631.37, "total_tokens": 5007424}
|
|
{"current_steps": 10190, "total_steps": 37885, "loss": 0.0557, "lr": 1.8311111091529817e-06, "epoch": 1.3448594430513396, "percentage": 26.9, "elapsed_time": "0:14:49", "remaining_time": "0:40:17", "throughput": 5632.29, "total_tokens": 5010112}
|
|
{"current_steps": 10195, "total_steps": 37885, "loss": 0.0594, "lr": 1.8308548237892465e-06, "epoch": 1.345519334829088, "percentage": 26.91, "elapsed_time": "0:14:49", "remaining_time": "0:40:16", "throughput": 5633.15, "total_tokens": 5012736}
|
|
{"current_steps": 10200, "total_steps": 37885, "loss": 0.0539, "lr": 1.8305983620825915e-06, "epoch": 1.3461792266068364, "percentage": 26.92, "elapsed_time": "0:14:50", "remaining_time": "0:40:16", "throughput": 5633.68, "total_tokens": 5015040}
|
|
{"current_steps": 10205, "total_steps": 37885, "loss": 0.0573, "lr": 1.8303417240874492e-06, "epoch": 1.346839118384585, "percentage": 26.94, "elapsed_time": "0:14:50", "remaining_time": "0:40:15", "throughput": 5634.2, "total_tokens": 5017344}
|
|
{"current_steps": 10210, "total_steps": 37885, "loss": 0.0528, "lr": 1.8300849098582886e-06, "epoch": 1.3474990101623334, "percentage": 26.95, "elapsed_time": "0:14:50", "remaining_time": "0:40:14", "throughput": 5634.84, "total_tokens": 5019776}
|
|
{"current_steps": 10215, "total_steps": 37885, "loss": 0.0395, "lr": 1.829827919449617e-06, "epoch": 1.348158901940082, "percentage": 26.96, "elapsed_time": "0:14:51", "remaining_time": "0:40:13", "throughput": 5635.56, "total_tokens": 5022272}
|
|
{"current_steps": 10220, "total_steps": 37885, "loss": 0.1797, "lr": 1.8295707529159783e-06, "epoch": 1.3488187937178302, "percentage": 26.98, "elapsed_time": "0:14:51", "remaining_time": "0:40:13", "throughput": 5636.27, "total_tokens": 5024768}
|
|
{"current_steps": 10225, "total_steps": 37885, "loss": 0.1089, "lr": 1.829313410311955e-06, "epoch": 1.3494786854955787, "percentage": 26.99, "elapsed_time": "0:14:51", "remaining_time": "0:40:12", "throughput": 5636.78, "total_tokens": 5027072}
|
|
{"current_steps": 10230, "total_steps": 37885, "loss": 0.1722, "lr": 1.8290558916921656e-06, "epoch": 1.3501385772733272, "percentage": 27.0, "elapsed_time": "0:14:52", "remaining_time": "0:40:11", "throughput": 5637.5, "total_tokens": 5029568}
|
|
{"current_steps": 10235, "total_steps": 37885, "loss": 0.0379, "lr": 1.8287981971112668e-06, "epoch": 1.3507984690510757, "percentage": 27.02, "elapsed_time": "0:14:52", "remaining_time": "0:40:11", "throughput": 5638.41, "total_tokens": 5032256}
|
|
{"current_steps": 10240, "total_steps": 37885, "loss": 0.0258, "lr": 1.8285403266239521e-06, "epoch": 1.3514583608288242, "percentage": 27.03, "elapsed_time": "0:14:52", "remaining_time": "0:40:10", "throughput": 5639.34, "total_tokens": 5034944}
|
|
{"current_steps": 10245, "total_steps": 37885, "loss": 0.2289, "lr": 1.8282822802849531e-06, "epoch": 1.3521182526065725, "percentage": 27.04, "elapsed_time": "0:14:53", "remaining_time": "0:40:09", "throughput": 5640.05, "total_tokens": 5037440}
|
|
{"current_steps": 10250, "total_steps": 37885, "loss": 0.0005, "lr": 1.8280240581490381e-06, "epoch": 1.352778144384321, "percentage": 27.06, "elapsed_time": "0:14:53", "remaining_time": "0:40:08", "throughput": 5640.97, "total_tokens": 5040128}
|
|
{"current_steps": 10255, "total_steps": 37885, "loss": 0.0257, "lr": 1.8277656602710127e-06, "epoch": 1.3534380361620695, "percentage": 27.07, "elapsed_time": "0:14:53", "remaining_time": "0:40:08", "throughput": 5641.69, "total_tokens": 5042624}
|
|
{"current_steps": 10260, "total_steps": 37885, "loss": 0.0817, "lr": 1.8275070867057203e-06, "epoch": 1.3540979279398178, "percentage": 27.08, "elapsed_time": "0:14:54", "remaining_time": "0:40:07", "throughput": 5642.19, "total_tokens": 5044928}
|
|
{"current_steps": 10265, "total_steps": 37885, "loss": 0.0005, "lr": 1.827248337508041e-06, "epoch": 1.3547578197175663, "percentage": 27.1, "elapsed_time": "0:14:54", "remaining_time": "0:40:06", "throughput": 5642.98, "total_tokens": 5047488}
|
|
{"current_steps": 10270, "total_steps": 37885, "loss": 0.0283, "lr": 1.8269894127328925e-06, "epoch": 1.3554177114953148, "percentage": 27.11, "elapsed_time": "0:14:54", "remaining_time": "0:40:06", "throughput": 5644.09, "total_tokens": 5050368}
|
|
{"current_steps": 10275, "total_steps": 37885, "loss": 0.0452, "lr": 1.8267303124352295e-06, "epoch": 1.3560776032730633, "percentage": 27.12, "elapsed_time": "0:14:55", "remaining_time": "0:40:05", "throughput": 5644.66, "total_tokens": 5052736}
|
|
{"current_steps": 10280, "total_steps": 37885, "loss": 0.1482, "lr": 1.826471036670045e-06, "epoch": 1.3567374950508118, "percentage": 27.13, "elapsed_time": "0:14:55", "remaining_time": "0:40:04", "throughput": 5645.32, "total_tokens": 5055168}
|
|
{"current_steps": 10285, "total_steps": 37885, "loss": 0.0281, "lr": 1.8262115854923673e-06, "epoch": 1.35739738682856, "percentage": 27.15, "elapsed_time": "0:14:55", "remaining_time": "0:40:03", "throughput": 5646.03, "total_tokens": 5057664}
|
|
{"current_steps": 10290, "total_steps": 37885, "loss": 0.1029, "lr": 1.8259519589572637e-06, "epoch": 1.3580572786063085, "percentage": 27.16, "elapsed_time": "0:14:56", "remaining_time": "0:40:03", "throughput": 5646.75, "total_tokens": 5060160}
|
|
{"current_steps": 10295, "total_steps": 37885, "loss": 0.0132, "lr": 1.8256921571198376e-06, "epoch": 1.358717170384057, "percentage": 27.17, "elapsed_time": "0:14:56", "remaining_time": "0:40:02", "throughput": 5647.72, "total_tokens": 5062912}
|
|
{"current_steps": 10300, "total_steps": 37885, "loss": 0.0083, "lr": 1.8254321800352308e-06, "epoch": 1.3593770621618055, "percentage": 27.19, "elapsed_time": "0:14:56", "remaining_time": "0:40:01", "throughput": 5648.24, "total_tokens": 5065216}
|
|
{"current_steps": 10305, "total_steps": 37885, "loss": 0.0474, "lr": 1.8251720277586209e-06, "epoch": 1.360036953939554, "percentage": 27.2, "elapsed_time": "0:14:57", "remaining_time": "0:40:00", "throughput": 5648.68, "total_tokens": 5067456}
|
|
{"current_steps": 10310, "total_steps": 37885, "loss": 0.2756, "lr": 1.8249117003452233e-06, "epoch": 1.3606968457173023, "percentage": 27.21, "elapsed_time": "0:14:57", "remaining_time": "0:40:00", "throughput": 5649.21, "total_tokens": 5069760}
|
|
{"current_steps": 10315, "total_steps": 37885, "loss": 0.1271, "lr": 1.8246511978502912e-06, "epoch": 1.3613567374950508, "percentage": 27.23, "elapsed_time": "0:14:57", "remaining_time": "0:39:59", "throughput": 5650.01, "total_tokens": 5072320}
|
|
{"current_steps": 10320, "total_steps": 37885, "loss": 0.0017, "lr": 1.8243905203291136e-06, "epoch": 1.3620166292727993, "percentage": 27.24, "elapsed_time": "0:14:58", "remaining_time": "0:39:58", "throughput": 5650.72, "total_tokens": 5074816}
|
|
{"current_steps": 10325, "total_steps": 37885, "loss": 0.0858, "lr": 1.8241296678370184e-06, "epoch": 1.3626765210505476, "percentage": 27.25, "elapsed_time": "0:14:58", "remaining_time": "0:39:58", "throughput": 5651.43, "total_tokens": 5077312}
|
|
{"current_steps": 10330, "total_steps": 37885, "loss": 0.0011, "lr": 1.8238686404293686e-06, "epoch": 1.363336412828296, "percentage": 27.27, "elapsed_time": "0:14:58", "remaining_time": "0:39:57", "throughput": 5651.94, "total_tokens": 5079616}
|
|
{"current_steps": 10335, "total_steps": 37885, "loss": 0.3048, "lr": 1.8236074381615661e-06, "epoch": 1.3639963046060446, "percentage": 27.28, "elapsed_time": "0:14:59", "remaining_time": "0:39:56", "throughput": 5652.18, "total_tokens": 5081664}
|
|
{"current_steps": 10340, "total_steps": 37885, "loss": 0.0004, "lr": 1.823346061089049e-06, "epoch": 1.364656196383793, "percentage": 27.29, "elapsed_time": "0:14:59", "remaining_time": "0:39:55", "throughput": 5652.94, "total_tokens": 5084224}
|
|
{"current_steps": 10345, "total_steps": 37885, "loss": 0.0951, "lr": 1.8230845092672925e-06, "epoch": 1.3653160881615416, "percentage": 27.31, "elapsed_time": "0:14:59", "remaining_time": "0:39:55", "throughput": 5653.45, "total_tokens": 5086528}
|
|
{"current_steps": 10350, "total_steps": 37885, "loss": 0.167, "lr": 1.8228227827518093e-06, "epoch": 1.3659759799392899, "percentage": 27.32, "elapsed_time": "0:15:00", "remaining_time": "0:39:54", "throughput": 5654.11, "total_tokens": 5088960}
|
|
{"current_steps": 10355, "total_steps": 37885, "loss": 0.0787, "lr": 1.8225608815981488e-06, "epoch": 1.3666358717170384, "percentage": 27.33, "elapsed_time": "0:15:00", "remaining_time": "0:39:53", "throughput": 5654.72, "total_tokens": 5091392}
|
|
{"current_steps": 10360, "total_steps": 37885, "loss": 0.0948, "lr": 1.8222988058618976e-06, "epoch": 1.3672957634947869, "percentage": 27.35, "elapsed_time": "0:15:00", "remaining_time": "0:39:53", "throughput": 5655.45, "total_tokens": 5093888}
|
|
{"current_steps": 10365, "total_steps": 37885, "loss": 0.15, "lr": 1.8220365555986797e-06, "epoch": 1.3679556552725354, "percentage": 27.36, "elapsed_time": "0:15:01", "remaining_time": "0:39:52", "throughput": 5656.03, "total_tokens": 5096256}
|
|
{"current_steps": 10370, "total_steps": 37885, "loss": 0.0489, "lr": 1.8217741308641553e-06, "epoch": 1.3686155470502839, "percentage": 27.37, "elapsed_time": "0:15:01", "remaining_time": "0:39:51", "throughput": 5656.81, "total_tokens": 5098816}
|
|
{"current_steps": 10375, "total_steps": 37885, "loss": 0.0487, "lr": 1.8215115317140226e-06, "epoch": 1.3692754388280322, "percentage": 27.39, "elapsed_time": "0:15:01", "remaining_time": "0:39:50", "throughput": 5657.47, "total_tokens": 5101248}
|
|
{"current_steps": 10380, "total_steps": 37885, "loss": 0.0838, "lr": 1.8212487582040164e-06, "epoch": 1.3699353306057807, "percentage": 27.4, "elapsed_time": "0:15:02", "remaining_time": "0:39:50", "throughput": 5657.91, "total_tokens": 5103488}
|
|
{"current_steps": 10385, "total_steps": 37885, "loss": 0.2107, "lr": 1.8209858103899081e-06, "epoch": 1.3705952223835292, "percentage": 27.41, "elapsed_time": "0:15:02", "remaining_time": "0:39:49", "throughput": 5658.54, "total_tokens": 5105920}
|
|
{"current_steps": 10390, "total_steps": 37885, "loss": 0.001, "lr": 1.8207226883275067e-06, "epoch": 1.3712551141612774, "percentage": 27.43, "elapsed_time": "0:15:02", "remaining_time": "0:39:48", "throughput": 5659.17, "total_tokens": 5108352}
|
|
{"current_steps": 10395, "total_steps": 37885, "loss": 0.1689, "lr": 1.820459392072658e-06, "epoch": 1.371915005939026, "percentage": 27.44, "elapsed_time": "0:15:02", "remaining_time": "0:39:48", "throughput": 5659.79, "total_tokens": 5110784}
|
|
{"current_steps": 10400, "total_steps": 37885, "loss": 0.121, "lr": 1.8201959216812443e-06, "epoch": 1.3725748977167744, "percentage": 27.45, "elapsed_time": "0:15:03", "remaining_time": "0:39:47", "throughput": 5660.57, "total_tokens": 5113344}
|
|
{"current_steps": 10405, "total_steps": 37885, "loss": 0.0541, "lr": 1.8199322772091858e-06, "epoch": 1.373234789494523, "percentage": 27.46, "elapsed_time": "0:15:03", "remaining_time": "0:39:46", "throughput": 5661.14, "total_tokens": 5115712}
|
|
{"current_steps": 10410, "total_steps": 37885, "loss": 0.0519, "lr": 1.819668458712439e-06, "epoch": 1.3738946812722714, "percentage": 27.48, "elapsed_time": "0:15:03", "remaining_time": "0:39:45", "throughput": 5661.58, "total_tokens": 5117952}
|
|
{"current_steps": 10415, "total_steps": 37885, "loss": 0.0012, "lr": 1.8194044662469973e-06, "epoch": 1.3745545730500197, "percentage": 27.49, "elapsed_time": "0:15:04", "remaining_time": "0:39:45", "throughput": 5661.96, "total_tokens": 5120128}
|
|
{"current_steps": 10420, "total_steps": 37885, "loss": 0.0045, "lr": 1.8191402998688913e-06, "epoch": 1.3752144648277682, "percentage": 27.5, "elapsed_time": "0:15:04", "remaining_time": "0:39:44", "throughput": 5662.46, "total_tokens": 5122432}
|
|
{"current_steps": 10425, "total_steps": 37885, "loss": 0.0804, "lr": 1.8188759596341888e-06, "epoch": 1.3758743566055167, "percentage": 27.52, "elapsed_time": "0:15:04", "remaining_time": "0:39:43", "throughput": 5663.29, "total_tokens": 5125056}
|
|
{"current_steps": 10430, "total_steps": 37885, "loss": 0.0818, "lr": 1.8186114455989933e-06, "epoch": 1.3765342483832652, "percentage": 27.53, "elapsed_time": "0:15:05", "remaining_time": "0:39:42", "throughput": 5663.86, "total_tokens": 5127424}
|
|
{"current_steps": 10435, "total_steps": 37885, "loss": 0.0692, "lr": 1.8183467578194467e-06, "epoch": 1.3771941401610137, "percentage": 27.54, "elapsed_time": "0:15:05", "remaining_time": "0:39:42", "throughput": 5664.42, "total_tokens": 5129792}
|
|
{"current_steps": 10440, "total_steps": 37885, "loss": 0.1073, "lr": 1.8180818963517264e-06, "epoch": 1.377854031938762, "percentage": 27.56, "elapsed_time": "0:15:05", "remaining_time": "0:39:41", "throughput": 5664.85, "total_tokens": 5132032}
|
|
{"current_steps": 10445, "total_steps": 37885, "loss": 0.0095, "lr": 1.8178168612520478e-06, "epoch": 1.3785139237165105, "percentage": 27.57, "elapsed_time": "0:15:06", "remaining_time": "0:39:40", "throughput": 5665.42, "total_tokens": 5134400}
|
|
{"current_steps": 10450, "total_steps": 37885, "loss": 0.0715, "lr": 1.8175516525766627e-06, "epoch": 1.379173815494259, "percentage": 27.58, "elapsed_time": "0:15:06", "remaining_time": "0:39:40", "throughput": 5665.85, "total_tokens": 5136640}
|
|
{"current_steps": 10455, "total_steps": 37885, "loss": 0.1421, "lr": 1.8172862703818593e-06, "epoch": 1.3798337072720073, "percentage": 27.6, "elapsed_time": "0:15:06", "remaining_time": "0:39:39", "throughput": 5666.53, "total_tokens": 5139136}
|
|
{"current_steps": 10460, "total_steps": 37885, "loss": 0.0011, "lr": 1.8170207147239636e-06, "epoch": 1.3804935990497558, "percentage": 27.61, "elapsed_time": "0:15:07", "remaining_time": "0:39:38", "throughput": 5667.23, "total_tokens": 5141632}
|
|
{"current_steps": 10465, "total_steps": 37885, "loss": 0.0696, "lr": 1.8167549856593374e-06, "epoch": 1.3811534908275043, "percentage": 27.62, "elapsed_time": "0:15:07", "remaining_time": "0:39:38", "throughput": 5668.1, "total_tokens": 5144320}
|
|
{"current_steps": 10470, "total_steps": 37885, "loss": 0.2092, "lr": 1.81648908324438e-06, "epoch": 1.3818133826052528, "percentage": 27.64, "elapsed_time": "0:15:07", "remaining_time": "0:39:37", "throughput": 5668.85, "total_tokens": 5146880}
|
|
{"current_steps": 10475, "total_steps": 37885, "loss": 0.0023, "lr": 1.8162230075355277e-06, "epoch": 1.3824732743830013, "percentage": 27.65, "elapsed_time": "0:15:08", "remaining_time": "0:39:36", "throughput": 5669.8, "total_tokens": 5149632}
|
|
{"current_steps": 10480, "total_steps": 37885, "loss": 0.0611, "lr": 1.8159567585892521e-06, "epoch": 1.3831331661607495, "percentage": 27.66, "elapsed_time": "0:15:08", "remaining_time": "0:39:35", "throughput": 5670.3, "total_tokens": 5151936}
|
|
{"current_steps": 10485, "total_steps": 37885, "loss": 0.2547, "lr": 1.8156903364620632e-06, "epoch": 1.383793057938498, "percentage": 27.68, "elapsed_time": "0:15:08", "remaining_time": "0:39:35", "throughput": 5670.91, "total_tokens": 5154368}
|
|
{"current_steps": 10490, "total_steps": 37885, "loss": 0.0018, "lr": 1.8154237412105074e-06, "epoch": 1.3844529497162466, "percentage": 27.69, "elapsed_time": "0:15:09", "remaining_time": "0:39:34", "throughput": 5671.45, "total_tokens": 5156736}
|
|
{"current_steps": 10495, "total_steps": 37885, "loss": 0.203, "lr": 1.8151569728911672e-06, "epoch": 1.385112841493995, "percentage": 27.7, "elapsed_time": "0:15:09", "remaining_time": "0:39:33", "throughput": 5672.01, "total_tokens": 5159104}
|
|
{"current_steps": 10500, "total_steps": 37885, "loss": 0.1597, "lr": 1.8148900315606625e-06, "epoch": 1.3857727332717436, "percentage": 27.72, "elapsed_time": "0:15:09", "remaining_time": "0:39:33", "throughput": 5672.55, "total_tokens": 5161472}
|
|
{"current_steps": 10505, "total_steps": 37885, "loss": 0.0015, "lr": 1.8146229172756495e-06, "epoch": 1.3864326250494918, "percentage": 27.73, "elapsed_time": "0:15:10", "remaining_time": "0:39:32", "throughput": 5673.17, "total_tokens": 5163904}
|
|
{"current_steps": 10510, "total_steps": 37885, "loss": 0.0844, "lr": 1.8143556300928214e-06, "epoch": 1.3870925168272403, "percentage": 27.74, "elapsed_time": "0:15:10", "remaining_time": "0:39:31", "throughput": 5673.91, "total_tokens": 5166464}
|
|
{"current_steps": 10515, "total_steps": 37885, "loss": 0.0516, "lr": 1.814088170068908e-06, "epoch": 1.3877524086049888, "percentage": 27.76, "elapsed_time": "0:15:10", "remaining_time": "0:39:31", "throughput": 5674.33, "total_tokens": 5168704}
|
|
{"current_steps": 10520, "total_steps": 37885, "loss": 0.0833, "lr": 1.8138205372606756e-06, "epoch": 1.388412300382737, "percentage": 27.77, "elapsed_time": "0:15:11", "remaining_time": "0:39:30", "throughput": 5675.01, "total_tokens": 5171200}
|
|
{"current_steps": 10525, "total_steps": 37885, "loss": 0.0015, "lr": 1.8135527317249273e-06, "epoch": 1.3890721921604856, "percentage": 27.78, "elapsed_time": "0:15:11", "remaining_time": "0:39:29", "throughput": 5675.5, "total_tokens": 5173504}
|
|
{"current_steps": 10530, "total_steps": 37885, "loss": 0.0479, "lr": 1.8132847535185029e-06, "epoch": 1.389732083938234, "percentage": 27.79, "elapsed_time": "0:15:11", "remaining_time": "0:39:28", "throughput": 5676.24, "total_tokens": 5176064}
|
|
{"current_steps": 10535, "total_steps": 37885, "loss": 0.0021, "lr": 1.8130166026982795e-06, "epoch": 1.3903919757159826, "percentage": 27.81, "elapsed_time": "0:15:12", "remaining_time": "0:39:28", "throughput": 5677.18, "total_tokens": 5178816}
|
|
{"current_steps": 10540, "total_steps": 37885, "loss": 0.0802, "lr": 1.8127482793211688e-06, "epoch": 1.391051867493731, "percentage": 27.82, "elapsed_time": "0:15:12", "remaining_time": "0:39:27", "throughput": 5677.8, "total_tokens": 5181248}
|
|
{"current_steps": 10545, "total_steps": 37885, "loss": 0.0009, "lr": 1.8124797834441217e-06, "epoch": 1.3917117592714794, "percentage": 27.83, "elapsed_time": "0:15:12", "remaining_time": "0:39:26", "throughput": 5678.3, "total_tokens": 5183552}
|
|
{"current_steps": 10550, "total_steps": 37885, "loss": 0.0769, "lr": 1.812211115124124e-06, "epoch": 1.3923716510492279, "percentage": 27.85, "elapsed_time": "0:15:13", "remaining_time": "0:39:26", "throughput": 5678.65, "total_tokens": 5185728}
|
|
{"current_steps": 10555, "total_steps": 37885, "loss": 0.0521, "lr": 1.8119422744181984e-06, "epoch": 1.3930315428269764, "percentage": 27.86, "elapsed_time": "0:15:13", "remaining_time": "0:39:25", "throughput": 5679.32, "total_tokens": 5188224}
|
|
{"current_steps": 10560, "total_steps": 37885, "loss": 0.1086, "lr": 1.8116732613834053e-06, "epoch": 1.3936914346047249, "percentage": 27.87, "elapsed_time": "0:15:13", "remaining_time": "0:39:24", "throughput": 5680.38, "total_tokens": 5191104}
|
|
{"current_steps": 10565, "total_steps": 37885, "loss": 0.1069, "lr": 1.81140407607684e-06, "epoch": 1.3943513263824734, "percentage": 27.89, "elapsed_time": "0:15:14", "remaining_time": "0:39:24", "throughput": 5681.08, "total_tokens": 5193600}
|
|
{"current_steps": 10570, "total_steps": 37885, "loss": 0.0569, "lr": 1.8111347185556348e-06, "epoch": 1.3950112181602217, "percentage": 27.9, "elapsed_time": "0:15:14", "remaining_time": "0:39:23", "throughput": 5681.69, "total_tokens": 5196032}
|
|
{"current_steps": 10575, "total_steps": 37885, "loss": 0.0003, "lr": 1.8108651888769595e-06, "epoch": 1.3956711099379702, "percentage": 27.91, "elapsed_time": "0:15:14", "remaining_time": "0:39:22", "throughput": 5682.51, "total_tokens": 5198656}
|
|
{"current_steps": 10580, "total_steps": 37885, "loss": 0.146, "lr": 1.8105954870980198e-06, "epoch": 1.3963310017157187, "percentage": 27.93, "elapsed_time": "0:15:15", "remaining_time": "0:39:21", "throughput": 5682.98, "total_tokens": 5200960}
|
|
{"current_steps": 10585, "total_steps": 37885, "loss": 0.0001, "lr": 1.810325613276058e-06, "epoch": 1.396990893493467, "percentage": 27.94, "elapsed_time": "0:15:15", "remaining_time": "0:39:21", "throughput": 5683.73, "total_tokens": 5203520}
|
|
{"current_steps": 10590, "total_steps": 37885, "loss": 0.0006, "lr": 1.8100555674683524e-06, "epoch": 1.3976507852712154, "percentage": 27.95, "elapsed_time": "0:15:15", "remaining_time": "0:39:20", "throughput": 5684.54, "total_tokens": 5206144}
|
|
{"current_steps": 10595, "total_steps": 37885, "loss": 0.0002, "lr": 1.8097853497322188e-06, "epoch": 1.398310677048964, "percentage": 27.97, "elapsed_time": "0:15:16", "remaining_time": "0:39:19", "throughput": 5685.33, "total_tokens": 5208768}
|
|
{"current_steps": 10600, "total_steps": 37885, "loss": 0.0942, "lr": 1.8095149601250088e-06, "epoch": 1.3989705688267124, "percentage": 27.98, "elapsed_time": "0:15:16", "remaining_time": "0:39:19", "throughput": 5685.87, "total_tokens": 5211136}
|
|
{"current_steps": 10605, "total_steps": 37885, "loss": 0.066, "lr": 1.8092443987041104e-06, "epoch": 1.399630460604461, "percentage": 27.99, "elapsed_time": "0:15:16", "remaining_time": "0:39:18", "throughput": 5686.43, "total_tokens": 5213504}
|
|
{"current_steps": 10610, "total_steps": 37885, "loss": 0.0834, "lr": 1.8089736655269486e-06, "epoch": 1.4002903523822092, "percentage": 28.01, "elapsed_time": "0:15:17", "remaining_time": "0:39:17", "throughput": 5687.11, "total_tokens": 5216000}
|
|
{"current_steps": 10615, "total_steps": 37885, "loss": 0.14, "lr": 1.8087027606509842e-06, "epoch": 1.4009502441599577, "percentage": 28.02, "elapsed_time": "0:15:17", "remaining_time": "0:39:17", "throughput": 5687.97, "total_tokens": 5218688}
|
|
{"current_steps": 10620, "total_steps": 37885, "loss": 0.0022, "lr": 1.808431684133715e-06, "epoch": 1.4016101359377062, "percentage": 28.03, "elapsed_time": "0:15:17", "remaining_time": "0:39:16", "throughput": 5688.9, "total_tokens": 5221440}
|
|
{"current_steps": 10625, "total_steps": 37885, "loss": 0.1496, "lr": 1.8081604360326753e-06, "epoch": 1.4022700277154547, "percentage": 28.05, "elapsed_time": "0:15:18", "remaining_time": "0:39:15", "throughput": 5689.23, "total_tokens": 5223616}
|
|
{"current_steps": 10630, "total_steps": 37885, "loss": 0.0769, "lr": 1.807889016405435e-06, "epoch": 1.4029299194932032, "percentage": 28.06, "elapsed_time": "0:15:18", "remaining_time": "0:39:14", "throughput": 5689.97, "total_tokens": 5226176}
|
|
{"current_steps": 10635, "total_steps": 37885, "loss": 0.0013, "lr": 1.8076174253096014e-06, "epoch": 1.4035898112709515, "percentage": 28.07, "elapsed_time": "0:15:18", "remaining_time": "0:39:14", "throughput": 5690.45, "total_tokens": 5228480}
|
|
{"current_steps": 10640, "total_steps": 37885, "loss": 0.1115, "lr": 1.8073456628028177e-06, "epoch": 1.4042497030487, "percentage": 28.08, "elapsed_time": "0:15:19", "remaining_time": "0:39:13", "throughput": 5691.08, "total_tokens": 5230912}
|
|
{"current_steps": 10645, "total_steps": 37885, "loss": 0.0883, "lr": 1.8070737289427631e-06, "epoch": 1.4049095948264485, "percentage": 28.1, "elapsed_time": "0:15:19", "remaining_time": "0:39:12", "throughput": 5691.87, "total_tokens": 5233536}
|
|
{"current_steps": 10650, "total_steps": 37885, "loss": 0.0003, "lr": 1.8068016237871541e-06, "epoch": 1.4055694866041968, "percentage": 28.11, "elapsed_time": "0:15:19", "remaining_time": "0:39:12", "throughput": 5692.61, "total_tokens": 5236096}
|
|
{"current_steps": 10655, "total_steps": 37885, "loss": 0.1611, "lr": 1.8065293473937429e-06, "epoch": 1.4062293783819453, "percentage": 28.12, "elapsed_time": "0:15:20", "remaining_time": "0:39:11", "throughput": 5693.16, "total_tokens": 5238464}
|
|
{"current_steps": 10660, "total_steps": 37885, "loss": 0.3064, "lr": 1.806256899820318e-06, "epoch": 1.4068892701596938, "percentage": 28.14, "elapsed_time": "0:15:20", "remaining_time": "0:39:10", "throughput": 5693.96, "total_tokens": 5241088}
|
|
{"current_steps": 10665, "total_steps": 37885, "loss": 0.2462, "lr": 1.8059842811247048e-06, "epoch": 1.4075491619374423, "percentage": 28.15, "elapsed_time": "0:15:20", "remaining_time": "0:39:10", "throughput": 5694.63, "total_tokens": 5243584}
|
|
{"current_steps": 10670, "total_steps": 37885, "loss": 0.0303, "lr": 1.805711491364764e-06, "epoch": 1.4082090537151908, "percentage": 28.16, "elapsed_time": "0:15:21", "remaining_time": "0:39:09", "throughput": 5695.23, "total_tokens": 5246016}
|
|
{"current_steps": 10675, "total_steps": 37885, "loss": 0.0026, "lr": 1.8054385305983942e-06, "epoch": 1.408868945492939, "percentage": 28.18, "elapsed_time": "0:15:21", "remaining_time": "0:39:08", "throughput": 5695.58, "total_tokens": 5248192}
|
|
{"current_steps": 10680, "total_steps": 37885, "loss": 0.1616, "lr": 1.8051653988835284e-06, "epoch": 1.4095288372706876, "percentage": 28.19, "elapsed_time": "0:15:21", "remaining_time": "0:39:08", "throughput": 5696.31, "total_tokens": 5250752}
|
|
{"current_steps": 10685, "total_steps": 37885, "loss": 0.1854, "lr": 1.8048920962781372e-06, "epoch": 1.410188729048436, "percentage": 28.2, "elapsed_time": "0:15:22", "remaining_time": "0:39:07", "throughput": 5696.87, "total_tokens": 5253120}
|
|
{"current_steps": 10690, "total_steps": 37885, "loss": 0.0559, "lr": 1.8046186228402273e-06, "epoch": 1.4108486208261846, "percentage": 28.22, "elapsed_time": "0:15:22", "remaining_time": "0:39:06", "throughput": 5697.72, "total_tokens": 5255808}
|
|
{"current_steps": 10695, "total_steps": 37885, "loss": 0.0009, "lr": 1.8043449786278413e-06, "epoch": 1.411508512603933, "percentage": 28.23, "elapsed_time": "0:15:22", "remaining_time": "0:39:05", "throughput": 5698.19, "total_tokens": 5258112}
|
|
{"current_steps": 10700, "total_steps": 37885, "loss": 0.0902, "lr": 1.8040711636990581e-06, "epoch": 1.4121684043816813, "percentage": 28.24, "elapsed_time": "0:15:23", "remaining_time": "0:39:05", "throughput": 5699.05, "total_tokens": 5260800}
|
|
{"current_steps": 10705, "total_steps": 37885, "loss": 0.0008, "lr": 1.8037971781119931e-06, "epoch": 1.4128282961594298, "percentage": 28.26, "elapsed_time": "0:15:23", "remaining_time": "0:39:04", "throughput": 5699.52, "total_tokens": 5263104}
|
|
{"current_steps": 10710, "total_steps": 37885, "loss": 0.204, "lr": 1.8035230219247977e-06, "epoch": 1.4134881879371783, "percentage": 28.27, "elapsed_time": "0:15:23", "remaining_time": "0:39:03", "throughput": 5700.05, "total_tokens": 5265472}
|
|
{"current_steps": 10715, "total_steps": 37885, "loss": 0.0555, "lr": 1.8032486951956596e-06, "epoch": 1.4141480797149266, "percentage": 28.28, "elapsed_time": "0:15:24", "remaining_time": "0:39:03", "throughput": 5700.91, "total_tokens": 5268160}
|
|
{"current_steps": 10720, "total_steps": 37885, "loss": 0.115, "lr": 1.8029741979828026e-06, "epoch": 1.4148079714926751, "percentage": 28.3, "elapsed_time": "0:15:24", "remaining_time": "0:39:02", "throughput": 5701.33, "total_tokens": 5270400}
|
|
{"current_steps": 10725, "total_steps": 37885, "loss": 0.0623, "lr": 1.8026995303444867e-06, "epoch": 1.4154678632704236, "percentage": 28.31, "elapsed_time": "0:15:24", "remaining_time": "0:39:01", "throughput": 5701.88, "total_tokens": 5272768}
|
|
{"current_steps": 10730, "total_steps": 37885, "loss": 0.0005, "lr": 1.802424692339008e-06, "epoch": 1.4161277550481721, "percentage": 28.32, "elapsed_time": "0:15:25", "remaining_time": "0:39:01", "throughput": 5702.88, "total_tokens": 5275584}
|
|
{"current_steps": 10735, "total_steps": 37885, "loss": 0.0371, "lr": 1.8021496840246994e-06, "epoch": 1.4167876468259206, "percentage": 28.34, "elapsed_time": "0:15:25", "remaining_time": "0:39:00", "throughput": 5703.29, "total_tokens": 5277824}
|
|
{"current_steps": 10740, "total_steps": 37885, "loss": 0.0004, "lr": 1.8018745054599292e-06, "epoch": 1.417447538603669, "percentage": 28.35, "elapsed_time": "0:15:25", "remaining_time": "0:38:59", "throughput": 5704.17, "total_tokens": 5280512}
|
|
{"current_steps": 10745, "total_steps": 37885, "loss": 0.0006, "lr": 1.8015991567031015e-06, "epoch": 1.4181074303814174, "percentage": 28.36, "elapsed_time": "0:15:26", "remaining_time": "0:38:59", "throughput": 5704.93, "total_tokens": 5283136}
|
|
{"current_steps": 10750, "total_steps": 37885, "loss": 0.0802, "lr": 1.8013236378126577e-06, "epoch": 1.418767322159166, "percentage": 28.38, "elapsed_time": "0:15:26", "remaining_time": "0:38:58", "throughput": 5705.52, "total_tokens": 5285568}
|
|
{"current_steps": 10755, "total_steps": 37885, "loss": 0.0573, "lr": 1.8010479488470743e-06, "epoch": 1.4194272139369144, "percentage": 28.39, "elapsed_time": "0:15:26", "remaining_time": "0:38:57", "throughput": 5706.07, "total_tokens": 5287936}
|
|
{"current_steps": 10760, "total_steps": 37885, "loss": 0.0006, "lr": 1.8007720898648645e-06, "epoch": 1.420087105714663, "percentage": 28.4, "elapsed_time": "0:15:27", "remaining_time": "0:38:57", "throughput": 5706.27, "total_tokens": 5289984}
|
|
{"current_steps": 10765, "total_steps": 37885, "loss": 0.1727, "lr": 1.8004960609245778e-06, "epoch": 1.4207469974924112, "percentage": 28.41, "elapsed_time": "0:15:27", "remaining_time": "0:38:56", "throughput": 5706.8, "total_tokens": 5292352}
|
|
{"current_steps": 10770, "total_steps": 37885, "loss": 0.0008, "lr": 1.8002198620847988e-06, "epoch": 1.4214068892701597, "percentage": 28.43, "elapsed_time": "0:15:27", "remaining_time": "0:38:55", "throughput": 5707.35, "total_tokens": 5294720}
|
|
{"current_steps": 10775, "total_steps": 37885, "loss": 0.0007, "lr": 1.7999434934041485e-06, "epoch": 1.4220667810479082, "percentage": 28.44, "elapsed_time": "0:15:28", "remaining_time": "0:38:54", "throughput": 5707.81, "total_tokens": 5297024}
|
|
{"current_steps": 10780, "total_steps": 37885, "loss": 0.0005, "lr": 1.7996669549412847e-06, "epoch": 1.4227266728256565, "percentage": 28.45, "elapsed_time": "0:15:28", "remaining_time": "0:38:54", "throughput": 5708.53, "total_tokens": 5299584}
|
|
{"current_steps": 10785, "total_steps": 37885, "loss": 0.1082, "lr": 1.7993902467549002e-06, "epoch": 1.4233865646034052, "percentage": 28.47, "elapsed_time": "0:15:28", "remaining_time": "0:38:53", "throughput": 5709.01, "total_tokens": 5301888}
|
|
{"current_steps": 10790, "total_steps": 37885, "loss": 0.1271, "lr": 1.7991133689037247e-06, "epoch": 1.4240464563811535, "percentage": 28.48, "elapsed_time": "0:15:29", "remaining_time": "0:38:52", "throughput": 5709.56, "total_tokens": 5304256}
|
|
{"current_steps": 10795, "total_steps": 37885, "loss": 0.1573, "lr": 1.7988363214465233e-06, "epoch": 1.424706348158902, "percentage": 28.49, "elapsed_time": "0:15:29", "remaining_time": "0:38:52", "throughput": 5710.16, "total_tokens": 5306688}
|
|
{"current_steps": 10800, "total_steps": 37885, "loss": 0.0624, "lr": 1.7985591044420975e-06, "epoch": 1.4253662399366505, "percentage": 28.51, "elapsed_time": "0:15:29", "remaining_time": "0:38:51", "throughput": 5710.89, "total_tokens": 5309248}
|
|
{"current_steps": 10805, "total_steps": 37885, "loss": 0.001, "lr": 1.7982817179492847e-06, "epoch": 1.4260261317143987, "percentage": 28.52, "elapsed_time": "0:15:29", "remaining_time": "0:38:50", "throughput": 5711.36, "total_tokens": 5311552}
|
|
{"current_steps": 10810, "total_steps": 37885, "loss": 0.0589, "lr": 1.7980041620269577e-06, "epoch": 1.4266860234921472, "percentage": 28.53, "elapsed_time": "0:15:30", "remaining_time": "0:38:50", "throughput": 5712.01, "total_tokens": 5314048}
|
|
{"current_steps": 10815, "total_steps": 37885, "loss": 0.0665, "lr": 1.7977264367340262e-06, "epoch": 1.4273459152698957, "percentage": 28.55, "elapsed_time": "0:15:30", "remaining_time": "0:38:49", "throughput": 5712.58, "total_tokens": 5316480}
|
|
{"current_steps": 10820, "total_steps": 37885, "loss": 0.0012, "lr": 1.7974485421294347e-06, "epoch": 1.4280058070476442, "percentage": 28.56, "elapsed_time": "0:15:30", "remaining_time": "0:38:48", "throughput": 5712.93, "total_tokens": 5318720}
|
|
{"current_steps": 10825, "total_steps": 37885, "loss": 0.0552, "lr": 1.7971704782721652e-06, "epoch": 1.4286656988253927, "percentage": 28.57, "elapsed_time": "0:15:31", "remaining_time": "0:38:48", "throughput": 5713.68, "total_tokens": 5321344}
|
|
{"current_steps": 10830, "total_steps": 37885, "loss": 0.1752, "lr": 1.7968922452212342e-06, "epoch": 1.429325590603141, "percentage": 28.59, "elapsed_time": "0:15:31", "remaining_time": "0:38:47", "throughput": 5714.05, "total_tokens": 5323584}
|
|
{"current_steps": 10835, "total_steps": 37885, "loss": 0.2167, "lr": 1.796613843035695e-06, "epoch": 1.4299854823808895, "percentage": 28.6, "elapsed_time": "0:15:32", "remaining_time": "0:38:46", "throughput": 5714.8, "total_tokens": 5326208}
|
|
{"current_steps": 10840, "total_steps": 37885, "loss": 0.0647, "lr": 1.796335271774636e-06, "epoch": 1.430645374158638, "percentage": 28.61, "elapsed_time": "0:15:32", "remaining_time": "0:38:46", "throughput": 5715.48, "total_tokens": 5328768}
|
|
{"current_steps": 10845, "total_steps": 37885, "loss": 0.1761, "lr": 1.7960565314971823e-06, "epoch": 1.4313052659363863, "percentage": 28.63, "elapsed_time": "0:15:32", "remaining_time": "0:38:45", "throughput": 5716.06, "total_tokens": 5331264}
|
|
{"current_steps": 10850, "total_steps": 37885, "loss": 0.0024, "lr": 1.7957776222624946e-06, "epoch": 1.431965157714135, "percentage": 28.64, "elapsed_time": "0:15:33", "remaining_time": "0:38:44", "throughput": 5716.53, "total_tokens": 5333632}
|
|
{"current_steps": 10855, "total_steps": 37885, "loss": 0.001, "lr": 1.7954985441297684e-06, "epoch": 1.4326250494918833, "percentage": 28.65, "elapsed_time": "0:15:33", "remaining_time": "0:38:44", "throughput": 5717.19, "total_tokens": 5336192}
|
|
{"current_steps": 10860, "total_steps": 37885, "loss": 0.0111, "lr": 1.7952192971582374e-06, "epoch": 1.4332849412696318, "percentage": 28.67, "elapsed_time": "0:15:33", "remaining_time": "0:38:43", "throughput": 5717.58, "total_tokens": 5338496}
|
|
{"current_steps": 10865, "total_steps": 37885, "loss": 0.0531, "lr": 1.794939881407169e-06, "epoch": 1.4339448330473803, "percentage": 28.68, "elapsed_time": "0:15:34", "remaining_time": "0:38:42", "throughput": 5718.16, "total_tokens": 5340992}
|
|
{"current_steps": 10870, "total_steps": 37885, "loss": 0.0015, "lr": 1.7946602969358673e-06, "epoch": 1.4346047248251286, "percentage": 28.69, "elapsed_time": "0:15:34", "remaining_time": "0:38:42", "throughput": 5718.79, "total_tokens": 5343552}
|
|
{"current_steps": 10875, "total_steps": 37885, "loss": 0.0022, "lr": 1.7943805438036718e-06, "epoch": 1.435264616602877, "percentage": 28.71, "elapsed_time": "0:15:34", "remaining_time": "0:38:41", "throughput": 5719.48, "total_tokens": 5346176}
|
|
{"current_steps": 10880, "total_steps": 37885, "loss": 0.0216, "lr": 1.7941006220699588e-06, "epoch": 1.4359245083806256, "percentage": 28.72, "elapsed_time": "0:15:35", "remaining_time": "0:38:40", "throughput": 5720.2, "total_tokens": 5348800}
|
|
{"current_steps": 10885, "total_steps": 37885, "loss": 0.0736, "lr": 1.7938205317941386e-06, "epoch": 1.436584400158374, "percentage": 28.73, "elapsed_time": "0:15:35", "remaining_time": "0:38:40", "throughput": 5720.91, "total_tokens": 5351424}
|
|
{"current_steps": 10890, "total_steps": 37885, "loss": 0.0003, "lr": 1.7935402730356594e-06, "epoch": 1.4372442919361226, "percentage": 28.74, "elapsed_time": "0:15:35", "remaining_time": "0:38:39", "throughput": 5721.63, "total_tokens": 5354048}
|
|
{"current_steps": 10895, "total_steps": 37885, "loss": 0.0753, "lr": 1.7932598458540036e-06, "epoch": 1.4379041837138709, "percentage": 28.76, "elapsed_time": "0:15:36", "remaining_time": "0:38:38", "throughput": 5722.09, "total_tokens": 5356416}
|
|
{"current_steps": 10900, "total_steps": 37885, "loss": 0.03, "lr": 1.7929792503086897e-06, "epoch": 1.4385640754916194, "percentage": 28.77, "elapsed_time": "0:15:36", "remaining_time": "0:38:38", "throughput": 5722.65, "total_tokens": 5358848}
|
|
{"current_steps": 10905, "total_steps": 37885, "loss": 0.0655, "lr": 1.792698486459272e-06, "epoch": 1.4392239672693679, "percentage": 28.78, "elapsed_time": "0:15:36", "remaining_time": "0:38:37", "throughput": 5723.27, "total_tokens": 5361344}
|
|
{"current_steps": 10910, "total_steps": 37885, "loss": 0.0519, "lr": 1.7924175543653411e-06, "epoch": 1.4398838590471164, "percentage": 28.8, "elapsed_time": "0:15:37", "remaining_time": "0:38:36", "throughput": 5723.94, "total_tokens": 5363904}
|
|
{"current_steps": 10915, "total_steps": 37885, "loss": 0.0833, "lr": 1.7921364540865224e-06, "epoch": 1.4405437508248649, "percentage": 28.81, "elapsed_time": "0:15:37", "remaining_time": "0:38:36", "throughput": 5724.3, "total_tokens": 5366144}
|
|
{"current_steps": 10920, "total_steps": 37885, "loss": 0.0007, "lr": 1.7918551856824776e-06, "epoch": 1.4412036426026131, "percentage": 28.82, "elapsed_time": "0:15:37", "remaining_time": "0:38:35", "throughput": 5724.72, "total_tokens": 5368448}
|
|
{"current_steps": 10925, "total_steps": 37885, "loss": 0.1, "lr": 1.7915737492129037e-06, "epoch": 1.4418635343803616, "percentage": 28.84, "elapsed_time": "0:15:38", "remaining_time": "0:38:34", "throughput": 5724.99, "total_tokens": 5370624}
|
|
{"current_steps": 10930, "total_steps": 37885, "loss": 0.0986, "lr": 1.7912921447375338e-06, "epoch": 1.4425234261581101, "percentage": 28.85, "elapsed_time": "0:15:38", "remaining_time": "0:38:34", "throughput": 5725.83, "total_tokens": 5373376}
|
|
{"current_steps": 10935, "total_steps": 37885, "loss": 0.1395, "lr": 1.7910103723161362e-06, "epoch": 1.4431833179358584, "percentage": 28.86, "elapsed_time": "0:15:38", "remaining_time": "0:38:33", "throughput": 5726.59, "total_tokens": 5376064}
|
|
{"current_steps": 10940, "total_steps": 37885, "loss": 0.0167, "lr": 1.7907284320085153e-06, "epoch": 1.443843209713607, "percentage": 28.88, "elapsed_time": "0:15:39", "remaining_time": "0:38:33", "throughput": 5727.24, "total_tokens": 5378624}
|
|
{"current_steps": 10945, "total_steps": 37885, "loss": 0.1895, "lr": 1.7904463238745105e-06, "epoch": 1.4445031014913554, "percentage": 28.89, "elapsed_time": "0:15:39", "remaining_time": "0:38:32", "throughput": 5728.0, "total_tokens": 5381312}
|
|
{"current_steps": 10950, "total_steps": 37885, "loss": 0.1904, "lr": 1.7901640479739974e-06, "epoch": 1.445162993269104, "percentage": 28.9, "elapsed_time": "0:15:39", "remaining_time": "0:38:31", "throughput": 5728.4, "total_tokens": 5383616}
|
|
{"current_steps": 10955, "total_steps": 37885, "loss": 0.1212, "lr": 1.789881604366887e-06, "epoch": 1.4458228850468524, "percentage": 28.92, "elapsed_time": "0:15:40", "remaining_time": "0:38:31", "throughput": 5729.24, "total_tokens": 5386368}
|
|
{"current_steps": 10960, "total_steps": 37885, "loss": 0.1056, "lr": 1.7895989931131262e-06, "epoch": 1.4464827768246007, "percentage": 28.93, "elapsed_time": "0:15:40", "remaining_time": "0:38:30", "throughput": 5729.7, "total_tokens": 5388736}
|
|
{"current_steps": 10965, "total_steps": 37885, "loss": 0.0437, "lr": 1.7893162142726967e-06, "epoch": 1.4471426686023492, "percentage": 28.94, "elapsed_time": "0:15:40", "remaining_time": "0:38:29", "throughput": 5730.27, "total_tokens": 5391232}
|
|
{"current_steps": 10970, "total_steps": 37885, "loss": 0.0009, "lr": 1.7890332679056165e-06, "epoch": 1.4478025603800977, "percentage": 28.96, "elapsed_time": "0:15:41", "remaining_time": "0:38:29", "throughput": 5730.89, "total_tokens": 5393792}
|
|
{"current_steps": 10975, "total_steps": 37885, "loss": 0.1911, "lr": 1.7887501540719389e-06, "epoch": 1.4484624521578462, "percentage": 28.97, "elapsed_time": "0:15:41", "remaining_time": "0:38:28", "throughput": 5731.6, "total_tokens": 5396416}
|
|
{"current_steps": 10980, "total_steps": 37885, "loss": 0.0005, "lr": 1.7884668728317531e-06, "epoch": 1.4491223439355947, "percentage": 28.98, "elapsed_time": "0:15:41", "remaining_time": "0:38:27", "throughput": 5732.49, "total_tokens": 5399232}
|
|
{"current_steps": 10985, "total_steps": 37885, "loss": 0.0615, "lr": 1.7881834242451829e-06, "epoch": 1.449782235713343, "percentage": 29.0, "elapsed_time": "0:15:42", "remaining_time": "0:38:27", "throughput": 5733.01, "total_tokens": 5401664}
|
|
{"current_steps": 10990, "total_steps": 37885, "loss": 0.001, "lr": 1.7878998083723883e-06, "epoch": 1.4504421274910915, "percentage": 29.01, "elapsed_time": "0:15:42", "remaining_time": "0:38:26", "throughput": 5733.67, "total_tokens": 5404224}
|
|
{"current_steps": 10995, "total_steps": 37885, "loss": 0.1332, "lr": 1.7876160252735652e-06, "epoch": 1.45110201926884, "percentage": 29.02, "elapsed_time": "0:15:42", "remaining_time": "0:38:25", "throughput": 5733.87, "total_tokens": 5406336}
|
|
{"current_steps": 11000, "total_steps": 37885, "loss": 0.0553, "lr": 1.7873320750089443e-06, "epoch": 1.4517619110465882, "percentage": 29.04, "elapsed_time": "0:15:43", "remaining_time": "0:38:25", "throughput": 5734.46, "total_tokens": 5408832}
|
|
{"current_steps": 11005, "total_steps": 37885, "loss": 0.0357, "lr": 1.7870479576387916e-06, "epoch": 1.4524218028243367, "percentage": 29.05, "elapsed_time": "0:15:43", "remaining_time": "0:38:24", "throughput": 5734.84, "total_tokens": 5411136}
|
|
{"current_steps": 11010, "total_steps": 37885, "loss": 0.1594, "lr": 1.7867636732234094e-06, "epoch": 1.4530816946020852, "percentage": 29.06, "elapsed_time": "0:15:43", "remaining_time": "0:38:24", "throughput": 5735.16, "total_tokens": 5413376}
|
|
{"current_steps": 11015, "total_steps": 37885, "loss": 0.0837, "lr": 1.7864792218231348e-06, "epoch": 1.4537415863798338, "percentage": 29.07, "elapsed_time": "0:15:44", "remaining_time": "0:38:23", "throughput": 5735.54, "total_tokens": 5415680}
|
|
{"current_steps": 11020, "total_steps": 37885, "loss": 0.066, "lr": 1.7861946034983406e-06, "epoch": 1.4544014781575823, "percentage": 29.09, "elapsed_time": "0:15:44", "remaining_time": "0:38:22", "throughput": 5736.05, "total_tokens": 5418112}
|
|
{"current_steps": 11025, "total_steps": 37885, "loss": 0.0883, "lr": 1.785909818309435e-06, "epoch": 1.4550613699353305, "percentage": 29.1, "elapsed_time": "0:15:44", "remaining_time": "0:38:22", "throughput": 5736.36, "total_tokens": 5420352}
|
|
{"current_steps": 11030, "total_steps": 37885, "loss": 0.1528, "lr": 1.7856248663168616e-06, "epoch": 1.455721261713079, "percentage": 29.11, "elapsed_time": "0:15:45", "remaining_time": "0:38:21", "throughput": 5736.81, "total_tokens": 5422720}
|
|
{"current_steps": 11035, "total_steps": 37885, "loss": 0.1301, "lr": 1.7853397475810995e-06, "epoch": 1.4563811534908275, "percentage": 29.13, "elapsed_time": "0:15:45", "remaining_time": "0:38:20", "throughput": 5737.26, "total_tokens": 5425024}
|
|
{"current_steps": 11040, "total_steps": 37885, "loss": 0.0646, "lr": 1.7850544621626626e-06, "epoch": 1.457041045268576, "percentage": 29.14, "elapsed_time": "0:15:45", "remaining_time": "0:38:20", "throughput": 5737.95, "total_tokens": 5427584}
|
|
{"current_steps": 11045, "total_steps": 37885, "loss": 0.2122, "lr": 1.7847690101221011e-06, "epoch": 1.4577009370463245, "percentage": 29.15, "elapsed_time": "0:15:46", "remaining_time": "0:38:19", "throughput": 5738.9, "total_tokens": 5430400}
|
|
{"current_steps": 11050, "total_steps": 37885, "loss": 0.0005, "lr": 1.7844833915200001e-06, "epoch": 1.4583608288240728, "percentage": 29.17, "elapsed_time": "0:15:46", "remaining_time": "0:38:18", "throughput": 5739.6, "total_tokens": 5432960}
|
|
{"current_steps": 11055, "total_steps": 37885, "loss": 0.0658, "lr": 1.7841976064169803e-06, "epoch": 1.4590207206018213, "percentage": 29.18, "elapsed_time": "0:15:46", "remaining_time": "0:38:18", "throughput": 5740.28, "total_tokens": 5435520}
|
|
{"current_steps": 11060, "total_steps": 37885, "loss": 0.1915, "lr": 1.7839116548736972e-06, "epoch": 1.4596806123795698, "percentage": 29.19, "elapsed_time": "0:15:47", "remaining_time": "0:38:17", "throughput": 5740.91, "total_tokens": 5438016}
|
|
{"current_steps": 11065, "total_steps": 37885, "loss": 0.0019, "lr": 1.7836255369508418e-06, "epoch": 1.460340504157318, "percentage": 29.21, "elapsed_time": "0:15:47", "remaining_time": "0:38:16", "throughput": 5741.44, "total_tokens": 5440384}
|
|
{"current_steps": 11070, "total_steps": 37885, "loss": 0.0514, "lr": 1.7833392527091409e-06, "epoch": 1.4610003959350666, "percentage": 29.22, "elapsed_time": "0:15:47", "remaining_time": "0:38:16", "throughput": 5742.24, "total_tokens": 5443072}
|
|
{"current_steps": 11075, "total_steps": 37885, "loss": 0.1296, "lr": 1.7830528022093559e-06, "epoch": 1.461660287712815, "percentage": 29.23, "elapsed_time": "0:15:48", "remaining_time": "0:38:15", "throughput": 5743.06, "total_tokens": 5445760}
|
|
{"current_steps": 11080, "total_steps": 37885, "loss": 0.0006, "lr": 1.7827661855122842e-06, "epoch": 1.4623201794905636, "percentage": 29.25, "elapsed_time": "0:15:48", "remaining_time": "0:38:14", "throughput": 5743.62, "total_tokens": 5448192}
|
|
{"current_steps": 11085, "total_steps": 37885, "loss": 0.0479, "lr": 1.7824794026787577e-06, "epoch": 1.462980071268312, "percentage": 29.26, "elapsed_time": "0:15:48", "remaining_time": "0:38:14", "throughput": 5744.31, "total_tokens": 5450752}
|
|
{"current_steps": 11090, "total_steps": 37885, "loss": 0.0038, "lr": 1.7821924537696447e-06, "epoch": 1.4636399630460604, "percentage": 29.27, "elapsed_time": "0:15:49", "remaining_time": "0:38:13", "throughput": 5744.76, "total_tokens": 5453056}
|
|
{"current_steps": 11095, "total_steps": 37885, "loss": 0.0636, "lr": 1.7819053388458474e-06, "epoch": 1.4642998548238089, "percentage": 29.29, "elapsed_time": "0:15:49", "remaining_time": "0:38:12", "throughput": 5745.63, "total_tokens": 5455808}
|
|
{"current_steps": 11100, "total_steps": 37885, "loss": 0.1838, "lr": 1.781618057968304e-06, "epoch": 1.4649597466015574, "percentage": 29.3, "elapsed_time": "0:15:49", "remaining_time": "0:38:12", "throughput": 5746.18, "total_tokens": 5458240}
|
|
{"current_steps": 11105, "total_steps": 37885, "loss": 0.0005, "lr": 1.7813306111979878e-06, "epoch": 1.4656196383793059, "percentage": 29.31, "elapsed_time": "0:15:50", "remaining_time": "0:38:11", "throughput": 5746.57, "total_tokens": 5460480}
|
|
{"current_steps": 11110, "total_steps": 37885, "loss": 0.1695, "lr": 1.7810429985959075e-06, "epoch": 1.4662795301570544, "percentage": 29.33, "elapsed_time": "0:15:50", "remaining_time": "0:38:10", "throughput": 5746.8, "total_tokens": 5462592}
|
|
{"current_steps": 11115, "total_steps": 37885, "loss": 0.3442, "lr": 1.7807552202231065e-06, "epoch": 1.4669394219348026, "percentage": 29.34, "elapsed_time": "0:15:50", "remaining_time": "0:38:10", "throughput": 5747.23, "total_tokens": 5464896}
|
|
{"current_steps": 11120, "total_steps": 37885, "loss": 0.1338, "lr": 1.7804672761406636e-06, "epoch": 1.4675993137125511, "percentage": 29.35, "elapsed_time": "0:15:51", "remaining_time": "0:38:09", "throughput": 5747.73, "total_tokens": 5467264}
|
|
{"current_steps": 11125, "total_steps": 37885, "loss": 0.0805, "lr": 1.7801791664096933e-06, "epoch": 1.4682592054902996, "percentage": 29.37, "elapsed_time": "0:15:51", "remaining_time": "0:38:08", "throughput": 5748.29, "total_tokens": 5469696}
|
|
{"current_steps": 11130, "total_steps": 37885, "loss": 0.0636, "lr": 1.7798908910913444e-06, "epoch": 1.468919097268048, "percentage": 29.38, "elapsed_time": "0:15:51", "remaining_time": "0:38:08", "throughput": 5749.21, "total_tokens": 5472512}
|
|
{"current_steps": 11135, "total_steps": 37885, "loss": 0.0954, "lr": 1.7796024502468015e-06, "epoch": 1.4695789890457964, "percentage": 29.39, "elapsed_time": "0:15:52", "remaining_time": "0:38:07", "throughput": 5750.01, "total_tokens": 5475200}
|
|
{"current_steps": 11140, "total_steps": 37885, "loss": 0.1199, "lr": 1.7793138439372839e-06, "epoch": 1.470238880823545, "percentage": 29.4, "elapsed_time": "0:15:52", "remaining_time": "0:38:06", "throughput": 5750.51, "total_tokens": 5477568}
|
|
{"current_steps": 11145, "total_steps": 37885, "loss": 0.0413, "lr": 1.7790250722240463e-06, "epoch": 1.4708987726012934, "percentage": 29.42, "elapsed_time": "0:15:52", "remaining_time": "0:38:06", "throughput": 5751.06, "total_tokens": 5480000}
|
|
{"current_steps": 11150, "total_steps": 37885, "loss": 0.0579, "lr": 1.7787361351683784e-06, "epoch": 1.471558664379042, "percentage": 29.43, "elapsed_time": "0:15:53", "remaining_time": "0:38:05", "throughput": 5751.67, "total_tokens": 5482496}
|
|
{"current_steps": 11155, "total_steps": 37885, "loss": 0.1602, "lr": 1.7784470328316048e-06, "epoch": 1.4722185561567902, "percentage": 29.44, "elapsed_time": "0:15:53", "remaining_time": "0:38:04", "throughput": 5752.23, "total_tokens": 5484928}
|
|
{"current_steps": 11160, "total_steps": 37885, "loss": 0.0452, "lr": 1.7781577652750858e-06, "epoch": 1.4728784479345387, "percentage": 29.46, "elapsed_time": "0:15:53", "remaining_time": "0:38:04", "throughput": 5752.74, "total_tokens": 5487296}
|
|
{"current_steps": 11165, "total_steps": 37885, "loss": 0.0005, "lr": 1.777868332560216e-06, "epoch": 1.4735383397122872, "percentage": 29.47, "elapsed_time": "0:15:54", "remaining_time": "0:38:03", "throughput": 5753.42, "total_tokens": 5489856}
|
|
{"current_steps": 11170, "total_steps": 37885, "loss": 0.0447, "lr": 1.7775787347484255e-06, "epoch": 1.4741982314900357, "percentage": 29.48, "elapsed_time": "0:15:54", "remaining_time": "0:38:02", "throughput": 5754.04, "total_tokens": 5492352}
|
|
{"current_steps": 11175, "total_steps": 37885, "loss": 0.0842, "lr": 1.7772889719011793e-06, "epoch": 1.4748581232677842, "percentage": 29.5, "elapsed_time": "0:15:54", "remaining_time": "0:38:02", "throughput": 5754.72, "total_tokens": 5494912}
|
|
{"current_steps": 11180, "total_steps": 37885, "loss": 0.0015, "lr": 1.7769990440799775e-06, "epoch": 1.4755180150455325, "percentage": 29.51, "elapsed_time": "0:15:55", "remaining_time": "0:38:01", "throughput": 5755.52, "total_tokens": 5497600}
|
|
{"current_steps": 11185, "total_steps": 37885, "loss": 0.0355, "lr": 1.7767089513463552e-06, "epoch": 1.476177906823281, "percentage": 29.52, "elapsed_time": "0:15:55", "remaining_time": "0:38:00", "throughput": 5756.38, "total_tokens": 5500352}
|
|
{"current_steps": 11190, "total_steps": 37885, "loss": 0.1102, "lr": 1.7764186937618826e-06, "epoch": 1.4768377986010295, "percentage": 29.54, "elapsed_time": "0:15:55", "remaining_time": "0:38:00", "throughput": 5756.74, "total_tokens": 5502592}
|
|
{"current_steps": 11195, "total_steps": 37885, "loss": 0.2163, "lr": 1.7761282713881645e-06, "epoch": 1.4774976903787778, "percentage": 29.55, "elapsed_time": "0:15:56", "remaining_time": "0:37:59", "throughput": 5757.56, "total_tokens": 5505280}
|
|
{"current_steps": 11200, "total_steps": 37885, "loss": 0.0617, "lr": 1.775837684286841e-06, "epoch": 1.4781575821565263, "percentage": 29.56, "elapsed_time": "0:15:56", "remaining_time": "0:37:58", "throughput": 5758.35, "total_tokens": 5507968}
|
|
{"current_steps": 11205, "total_steps": 37885, "loss": 0.2514, "lr": 1.7755469325195871e-06, "epoch": 1.4788174739342748, "percentage": 29.58, "elapsed_time": "0:15:56", "remaining_time": "0:37:58", "throughput": 5759.11, "total_tokens": 5510592}
|
|
{"current_steps": 11210, "total_steps": 37885, "loss": 0.054, "lr": 1.7752560161481131e-06, "epoch": 1.4794773657120233, "percentage": 29.59, "elapsed_time": "0:15:57", "remaining_time": "0:37:57", "throughput": 5759.52, "total_tokens": 5512896}
|
|
{"current_steps": 11215, "total_steps": 37885, "loss": 0.0291, "lr": 1.7749649352341636e-06, "epoch": 1.4801372574897718, "percentage": 29.6, "elapsed_time": "0:15:57", "remaining_time": "0:37:57", "throughput": 5760.32, "total_tokens": 5515584}
|
|
{"current_steps": 11220, "total_steps": 37885, "loss": 0.1157, "lr": 1.7746736898395182e-06, "epoch": 1.48079714926752, "percentage": 29.62, "elapsed_time": "0:15:57", "remaining_time": "0:37:56", "throughput": 5760.75, "total_tokens": 5517888}
|
|
{"current_steps": 11225, "total_steps": 37885, "loss": 0.1393, "lr": 1.7743822800259923e-06, "epoch": 1.4814570410452685, "percentage": 29.63, "elapsed_time": "0:15:58", "remaining_time": "0:37:55", "throughput": 5761.3, "total_tokens": 5520320}
|
|
{"current_steps": 11230, "total_steps": 37885, "loss": 0.1536, "lr": 1.7740907058554348e-06, "epoch": 1.482116932823017, "percentage": 29.64, "elapsed_time": "0:15:58", "remaining_time": "0:37:55", "throughput": 5761.73, "total_tokens": 5522624}
|
|
{"current_steps": 11235, "total_steps": 37885, "loss": 0.2235, "lr": 1.7737989673897307e-06, "epoch": 1.4827768246007655, "percentage": 29.66, "elapsed_time": "0:15:58", "remaining_time": "0:37:54", "throughput": 5762.1, "total_tokens": 5524864}
|
|
{"current_steps": 11240, "total_steps": 37885, "loss": 0.1008, "lr": 1.7735070646907988e-06, "epoch": 1.483436716378514, "percentage": 29.67, "elapsed_time": "0:15:59", "remaining_time": "0:37:53", "throughput": 5762.86, "total_tokens": 5527488}
|
|
{"current_steps": 11245, "total_steps": 37885, "loss": 0.0975, "lr": 1.773214997820594e-06, "epoch": 1.4840966081562623, "percentage": 29.68, "elapsed_time": "0:15:59", "remaining_time": "0:37:53", "throughput": 5763.35, "total_tokens": 5529856}
|
|
{"current_steps": 11250, "total_steps": 37885, "loss": 0.0849, "lr": 1.772922766841105e-06, "epoch": 1.4847564999340108, "percentage": 29.7, "elapsed_time": "0:15:59", "remaining_time": "0:37:52", "throughput": 5763.97, "total_tokens": 5532352}
|
|
{"current_steps": 11255, "total_steps": 37885, "loss": 0.0448, "lr": 1.772630371814356e-06, "epoch": 1.4854163917117593, "percentage": 29.71, "elapsed_time": "0:16:00", "remaining_time": "0:37:51", "throughput": 5764.71, "total_tokens": 5534976}
|
|
{"current_steps": 11260, "total_steps": 37885, "loss": 0.0766, "lr": 1.7723378128024056e-06, "epoch": 1.4860762834895076, "percentage": 29.72, "elapsed_time": "0:16:00", "remaining_time": "0:37:51", "throughput": 5765.27, "total_tokens": 5537408}
|
|
{"current_steps": 11265, "total_steps": 37885, "loss": 0.0378, "lr": 1.7720450898673468e-06, "epoch": 1.486736175267256, "percentage": 29.73, "elapsed_time": "0:16:00", "remaining_time": "0:37:50", "throughput": 5766.2, "total_tokens": 5540224}
|
|
{"current_steps": 11270, "total_steps": 37885, "loss": 0.2048, "lr": 1.7717522030713088e-06, "epoch": 1.4873960670450046, "percentage": 29.75, "elapsed_time": "0:16:01", "remaining_time": "0:37:49", "throughput": 5766.89, "total_tokens": 5542784}
|
|
{"current_steps": 11275, "total_steps": 37885, "loss": 0.0836, "lr": 1.771459152476454e-06, "epoch": 1.488055958822753, "percentage": 29.76, "elapsed_time": "0:16:01", "remaining_time": "0:37:49", "throughput": 5767.12, "total_tokens": 5544896}
|
|
{"current_steps": 11280, "total_steps": 37885, "loss": 0.0574, "lr": 1.7711659381449807e-06, "epoch": 1.4887158506005016, "percentage": 29.77, "elapsed_time": "0:16:01", "remaining_time": "0:37:48", "throughput": 5767.86, "total_tokens": 5547520}
|
|
{"current_steps": 11285, "total_steps": 37885, "loss": 0.1081, "lr": 1.7708725601391214e-06, "epoch": 1.4893757423782499, "percentage": 29.79, "elapsed_time": "0:16:02", "remaining_time": "0:37:47", "throughput": 5768.41, "total_tokens": 5549952}
|
|
{"current_steps": 11290, "total_steps": 37885, "loss": 0.0337, "lr": 1.7705790185211433e-06, "epoch": 1.4900356341559984, "percentage": 29.8, "elapsed_time": "0:16:02", "remaining_time": "0:37:47", "throughput": 5769.32, "total_tokens": 5552768}
|
|
{"current_steps": 11295, "total_steps": 37885, "loss": 0.0916, "lr": 1.770285313353349e-06, "epoch": 1.4906955259337469, "percentage": 29.81, "elapsed_time": "0:16:02", "remaining_time": "0:37:46", "throughput": 5770.06, "total_tokens": 5555392}
|
|
{"current_steps": 11300, "total_steps": 37885, "loss": 0.1468, "lr": 1.7699914446980745e-06, "epoch": 1.4913554177114954, "percentage": 29.83, "elapsed_time": "0:16:03", "remaining_time": "0:37:45", "throughput": 5770.57, "total_tokens": 5557760}
|
|
{"current_steps": 11305, "total_steps": 37885, "loss": 0.0169, "lr": 1.7696974126176917e-06, "epoch": 1.4920153094892439, "percentage": 29.84, "elapsed_time": "0:16:03", "remaining_time": "0:37:45", "throughput": 5771.14, "total_tokens": 5560192}
|
|
{"current_steps": 11310, "total_steps": 37885, "loss": 0.042, "lr": 1.769403217174607e-06, "epoch": 1.4926752012669922, "percentage": 29.85, "elapsed_time": "0:16:03", "remaining_time": "0:37:44", "throughput": 5771.57, "total_tokens": 5562496}
|
|
{"current_steps": 11315, "total_steps": 37885, "loss": 0.128, "lr": 1.7691088584312608e-06, "epoch": 1.4933350930447407, "percentage": 29.87, "elapsed_time": "0:16:04", "remaining_time": "0:37:43", "throughput": 5772.19, "total_tokens": 5564992}
|
|
{"current_steps": 11320, "total_steps": 37885, "loss": 0.0005, "lr": 1.7688143364501292e-06, "epoch": 1.4939949848224892, "percentage": 29.88, "elapsed_time": "0:16:04", "remaining_time": "0:37:43", "throughput": 5772.93, "total_tokens": 5567616}
|
|
{"current_steps": 11325, "total_steps": 37885, "loss": 0.001, "lr": 1.7685196512937217e-06, "epoch": 1.4946548766002374, "percentage": 29.89, "elapsed_time": "0:16:04", "remaining_time": "0:37:42", "throughput": 5773.42, "total_tokens": 5569984}
|
|
{"current_steps": 11330, "total_steps": 37885, "loss": 0.1185, "lr": 1.7682248030245836e-06, "epoch": 1.495314768377986, "percentage": 29.91, "elapsed_time": "0:16:05", "remaining_time": "0:37:41", "throughput": 5773.73, "total_tokens": 5572160}
|
|
{"current_steps": 11335, "total_steps": 37885, "loss": 0.0006, "lr": 1.7679297917052939e-06, "epoch": 1.4959746601557344, "percentage": 29.92, "elapsed_time": "0:16:05", "remaining_time": "0:37:41", "throughput": 5774.1, "total_tokens": 5574400}
|
|
{"current_steps": 11340, "total_steps": 37885, "loss": 0.0005, "lr": 1.7676346173984669e-06, "epoch": 1.496634551933483, "percentage": 29.93, "elapsed_time": "0:16:05", "remaining_time": "0:37:40", "throughput": 5774.89, "total_tokens": 5577088}
|
|
{"current_steps": 11345, "total_steps": 37885, "loss": 0.0004, "lr": 1.7673392801667513e-06, "epoch": 1.4972944437112314, "percentage": 29.95, "elapsed_time": "0:16:06", "remaining_time": "0:37:39", "throughput": 5775.51, "total_tokens": 5579584}
|
|
{"current_steps": 11350, "total_steps": 37885, "loss": 0.2305, "lr": 1.7670437800728298e-06, "epoch": 1.4979543354889797, "percentage": 29.96, "elapsed_time": "0:16:06", "remaining_time": "0:37:39", "throughput": 5775.99, "total_tokens": 5581952}
|
|
{"current_steps": 11355, "total_steps": 37885, "loss": 0.0002, "lr": 1.7667481171794205e-06, "epoch": 1.4986142272667282, "percentage": 29.97, "elapsed_time": "0:16:06", "remaining_time": "0:37:38", "throughput": 5776.63, "total_tokens": 5584448}
|
|
{"current_steps": 11360, "total_steps": 37885, "loss": 0.0414, "lr": 1.7664522915492759e-06, "epoch": 1.4992741190444767, "percentage": 29.99, "elapsed_time": "0:16:07", "remaining_time": "0:37:38", "throughput": 5777.3, "total_tokens": 5587008}
|
|
{"current_steps": 11365, "total_steps": 37885, "loss": 0.0593, "lr": 1.7661563032451827e-06, "epoch": 1.4999340108222252, "percentage": 30.0, "elapsed_time": "0:16:07", "remaining_time": "0:37:37", "throughput": 5777.69, "total_tokens": 5589312}
|
|
{"current_steps": 11370, "total_steps": 37885, "loss": 0.1073, "lr": 1.7658601523299619e-06, "epoch": 1.5005939025999737, "percentage": 30.01, "elapsed_time": "0:16:07", "remaining_time": "0:37:36", "throughput": 5778.15, "total_tokens": 5591680}
|
|
{"current_steps": 11370, "total_steps": 37885, "eval_loss": 0.12565796077251434, "epoch": 1.5005939025999737, "percentage": 30.01, "elapsed_time": "0:16:15", "remaining_time": "0:37:55", "throughput": 5731.02, "total_tokens": 5591680}
|
|
{"current_steps": 11375, "total_steps": 37885, "loss": 0.0011, "lr": 1.7655638388664698e-06, "epoch": 1.501253794377722, "percentage": 30.03, "elapsed_time": "0:16:53", "remaining_time": "0:39:22", "throughput": 5518.42, "total_tokens": 5594176}
|
|
{"current_steps": 11380, "total_steps": 37885, "loss": 0.0044, "lr": 1.765267362917597e-06, "epoch": 1.5019136861554705, "percentage": 30.04, "elapsed_time": "0:16:54", "remaining_time": "0:39:21", "throughput": 5519.39, "total_tokens": 5597056}
|
|
{"current_steps": 11385, "total_steps": 37885, "loss": 0.0555, "lr": 1.7649707245462678e-06, "epoch": 1.502573577933219, "percentage": 30.05, "elapsed_time": "0:16:54", "remaining_time": "0:39:21", "throughput": 5519.94, "total_tokens": 5599488}
|
|
{"current_steps": 11390, "total_steps": 37885, "loss": 0.0716, "lr": 1.7646739238154416e-06, "epoch": 1.5032334697109673, "percentage": 30.06, "elapsed_time": "0:16:54", "remaining_time": "0:39:20", "throughput": 5520.45, "total_tokens": 5601856}
|
|
{"current_steps": 11395, "total_steps": 37885, "loss": 0.1088, "lr": 1.7643769607881126e-06, "epoch": 1.503893361488716, "percentage": 30.08, "elapsed_time": "0:16:55", "remaining_time": "0:39:19", "throughput": 5521.42, "total_tokens": 5604736}
|
|
{"current_steps": 11400, "total_steps": 37885, "loss": 0.0004, "lr": 1.7640798355273087e-06, "epoch": 1.5045532532664643, "percentage": 30.09, "elapsed_time": "0:16:55", "remaining_time": "0:39:19", "throughput": 5521.99, "total_tokens": 5607168}
|
|
{"current_steps": 11405, "total_steps": 37885, "loss": 0.0018, "lr": 1.7637825480960929e-06, "epoch": 1.5052131450442128, "percentage": 30.1, "elapsed_time": "0:16:55", "remaining_time": "0:39:18", "throughput": 5522.57, "total_tokens": 5609600}
|
|
{"current_steps": 11410, "total_steps": 37885, "loss": 0.2186, "lr": 1.7634850985575623e-06, "epoch": 1.5058730368219613, "percentage": 30.12, "elapsed_time": "0:16:56", "remaining_time": "0:39:17", "throughput": 5523.14, "total_tokens": 5612032}
|
|
{"current_steps": 11415, "total_steps": 37885, "loss": 0.0788, "lr": 1.7631874869748477e-06, "epoch": 1.5065329285997096, "percentage": 30.13, "elapsed_time": "0:16:56", "remaining_time": "0:39:16", "throughput": 5523.88, "total_tokens": 5614656}
|
|
{"current_steps": 11420, "total_steps": 37885, "loss": 0.128, "lr": 1.7628897134111163e-06, "epoch": 1.507192820377458, "percentage": 30.14, "elapsed_time": "0:16:56", "remaining_time": "0:39:16", "throughput": 5524.15, "total_tokens": 5616768}
|
|
{"current_steps": 11425, "total_steps": 37885, "loss": 0.1947, "lr": 1.762591777929567e-06, "epoch": 1.5078527121552066, "percentage": 30.16, "elapsed_time": "0:16:57", "remaining_time": "0:39:15", "throughput": 5524.57, "total_tokens": 5619008}
|
|
{"current_steps": 11430, "total_steps": 37885, "loss": 0.1306, "lr": 1.7622936805934355e-06, "epoch": 1.5085126039329548, "percentage": 30.17, "elapsed_time": "0:16:57", "remaining_time": "0:39:14", "throughput": 5525.16, "total_tokens": 5621440}
|
|
{"current_steps": 11435, "total_steps": 37885, "loss": 0.0579, "lr": 1.7619954214659901e-06, "epoch": 1.5091724957107036, "percentage": 30.18, "elapsed_time": "0:16:57", "remaining_time": "0:39:14", "throughput": 5525.76, "total_tokens": 5623872}
|
|
{"current_steps": 11440, "total_steps": 37885, "loss": 0.0584, "lr": 1.7616970006105347e-06, "epoch": 1.5098323874884518, "percentage": 30.2, "elapsed_time": "0:16:58", "remaining_time": "0:39:13", "throughput": 5526.3, "total_tokens": 5626240}
|
|
{"current_steps": 11445, "total_steps": 37885, "loss": 0.1526, "lr": 1.7613984180904065e-06, "epoch": 1.5104922792662003, "percentage": 30.21, "elapsed_time": "0:16:58", "remaining_time": "0:39:12", "throughput": 5526.78, "total_tokens": 5628544}
|
|
{"current_steps": 11450, "total_steps": 37885, "loss": 0.116, "lr": 1.7610996739689779e-06, "epoch": 1.5111521710439488, "percentage": 30.22, "elapsed_time": "0:16:58", "remaining_time": "0:39:11", "throughput": 5527.33, "total_tokens": 5630912}
|
|
{"current_steps": 11455, "total_steps": 37885, "loss": 0.1417, "lr": 1.7608007683096547e-06, "epoch": 1.5118120628216971, "percentage": 30.24, "elapsed_time": "0:16:59", "remaining_time": "0:39:11", "throughput": 5528.03, "total_tokens": 5633472}
|
|
{"current_steps": 11460, "total_steps": 37885, "loss": 0.1697, "lr": 1.7605017011758778e-06, "epoch": 1.5124719545994458, "percentage": 30.25, "elapsed_time": "0:16:59", "remaining_time": "0:39:10", "throughput": 5528.45, "total_tokens": 5635712}
|
|
{"current_steps": 11465, "total_steps": 37885, "loss": 0.0017, "lr": 1.7602024726311219e-06, "epoch": 1.5131318463771941, "percentage": 30.26, "elapsed_time": "0:16:59", "remaining_time": "0:39:09", "throughput": 5529.09, "total_tokens": 5638208}
|
|
{"current_steps": 11470, "total_steps": 37885, "loss": 0.0026, "lr": 1.7599030827388963e-06, "epoch": 1.5137917381549426, "percentage": 30.28, "elapsed_time": "0:17:00", "remaining_time": "0:39:09", "throughput": 5529.83, "total_tokens": 5640832}
|
|
{"current_steps": 11475, "total_steps": 37885, "loss": 0.0513, "lr": 1.7596035315627442e-06, "epoch": 1.5144516299326911, "percentage": 30.29, "elapsed_time": "0:17:00", "remaining_time": "0:39:08", "throughput": 5530.35, "total_tokens": 5643200}
|
|
{"current_steps": 11480, "total_steps": 37885, "loss": 0.0494, "lr": 1.7593038191662427e-06, "epoch": 1.5151115217104394, "percentage": 30.3, "elapsed_time": "0:17:00", "remaining_time": "0:39:07", "throughput": 5531.17, "total_tokens": 5645888}
|
|
{"current_steps": 11485, "total_steps": 37885, "loss": 0.0006, "lr": 1.7590039456130046e-06, "epoch": 1.515771413488188, "percentage": 30.32, "elapsed_time": "0:17:01", "remaining_time": "0:39:07", "throughput": 5531.59, "total_tokens": 5648128}
|
|
{"current_steps": 11490, "total_steps": 37885, "loss": 0.0801, "lr": 1.758703910966675e-06, "epoch": 1.5164313052659364, "percentage": 30.33, "elapsed_time": "0:17:01", "remaining_time": "0:39:06", "throughput": 5532.25, "total_tokens": 5650624}
|
|
{"current_steps": 11495, "total_steps": 37885, "loss": 0.1932, "lr": 1.7584037152909344e-06, "epoch": 1.5170911970436847, "percentage": 30.34, "elapsed_time": "0:17:01", "remaining_time": "0:39:05", "throughput": 5532.83, "total_tokens": 5653056}
|
|
{"current_steps": 11500, "total_steps": 37885, "loss": 0.0716, "lr": 1.7581033586494973e-06, "epoch": 1.5177510888214334, "percentage": 30.36, "elapsed_time": "0:17:02", "remaining_time": "0:39:04", "throughput": 5533.48, "total_tokens": 5655552}
|
|
{"current_steps": 11505, "total_steps": 37885, "loss": 0.1644, "lr": 1.757802841106112e-06, "epoch": 1.5184109805991817, "percentage": 30.37, "elapsed_time": "0:17:02", "remaining_time": "0:39:04", "throughput": 5534.19, "total_tokens": 5658112}
|
|
{"current_steps": 11510, "total_steps": 37885, "loss": 0.0823, "lr": 1.7575021627245612e-06, "epoch": 1.5190708723769302, "percentage": 30.38, "elapsed_time": "0:17:02", "remaining_time": "0:39:03", "throughput": 5534.73, "total_tokens": 5660480}
|
|
{"current_steps": 11515, "total_steps": 37885, "loss": 0.1695, "lr": 1.7572013235686618e-06, "epoch": 1.5197307641546787, "percentage": 30.39, "elapsed_time": "0:17:03", "remaining_time": "0:39:02", "throughput": 5535.27, "total_tokens": 5662848}
|
|
{"current_steps": 11520, "total_steps": 37885, "loss": 0.0332, "lr": 1.7569003237022647e-06, "epoch": 1.520390655932427, "percentage": 30.41, "elapsed_time": "0:17:03", "remaining_time": "0:39:02", "throughput": 5536.14, "total_tokens": 5665600}
|
|
{"current_steps": 11525, "total_steps": 37885, "loss": 0.0245, "lr": 1.756599163189255e-06, "epoch": 1.5210505477101757, "percentage": 30.42, "elapsed_time": "0:17:03", "remaining_time": "0:39:01", "throughput": 5536.49, "total_tokens": 5667776}
|
|
{"current_steps": 11530, "total_steps": 37885, "loss": 0.0693, "lr": 1.7562978420935516e-06, "epoch": 1.521710439487924, "percentage": 30.43, "elapsed_time": "0:17:04", "remaining_time": "0:39:00", "throughput": 5537.24, "total_tokens": 5670400}
|
|
{"current_steps": 11535, "total_steps": 37885, "loss": 0.0025, "lr": 1.755996360479108e-06, "epoch": 1.5223703312656724, "percentage": 30.45, "elapsed_time": "0:17:04", "remaining_time": "0:39:00", "throughput": 5538.12, "total_tokens": 5673152}
|
|
{"current_steps": 11540, "total_steps": 37885, "loss": 0.0894, "lr": 1.7556947184099115e-06, "epoch": 1.523030223043421, "percentage": 30.46, "elapsed_time": "0:17:04", "remaining_time": "0:38:59", "throughput": 5538.74, "total_tokens": 5675648}
|
|
{"current_steps": 11545, "total_steps": 37885, "loss": 0.0006, "lr": 1.7553929159499832e-06, "epoch": 1.5236901148211692, "percentage": 30.47, "elapsed_time": "0:17:05", "remaining_time": "0:38:58", "throughput": 5539.41, "total_tokens": 5678144}
|
|
{"current_steps": 11550, "total_steps": 37885, "loss": 0.0004, "lr": 1.755090953163379e-06, "epoch": 1.5243500065989177, "percentage": 30.49, "elapsed_time": "0:17:05", "remaining_time": "0:38:57", "throughput": 5539.87, "total_tokens": 5680448}
|
|
{"current_steps": 11555, "total_steps": 37885, "loss": 0.0007, "lr": 1.754788830114187e-06, "epoch": 1.5250098983766662, "percentage": 30.5, "elapsed_time": "0:17:05", "remaining_time": "0:38:57", "throughput": 5540.56, "total_tokens": 5683008}
|
|
{"current_steps": 11560, "total_steps": 37885, "loss": 0.1216, "lr": 1.7544865468665325e-06, "epoch": 1.5256697901544147, "percentage": 30.51, "elapsed_time": "0:17:06", "remaining_time": "0:38:56", "throughput": 5541.3, "total_tokens": 5685632}
|
|
{"current_steps": 11565, "total_steps": 37885, "loss": 0.0914, "lr": 1.7541841034845714e-06, "epoch": 1.5263296819321632, "percentage": 30.53, "elapsed_time": "0:17:06", "remaining_time": "0:38:55", "throughput": 5541.76, "total_tokens": 5687936}
|
|
{"current_steps": 11570, "total_steps": 37885, "loss": 0.0879, "lr": 1.753881500032496e-06, "epoch": 1.5269895737099115, "percentage": 30.54, "elapsed_time": "0:17:06", "remaining_time": "0:38:55", "throughput": 5542.53, "total_tokens": 5690560}
|
|
{"current_steps": 11575, "total_steps": 37885, "loss": 0.0659, "lr": 1.7535787365745314e-06, "epoch": 1.52764946548766, "percentage": 30.55, "elapsed_time": "0:17:07", "remaining_time": "0:38:54", "throughput": 5543.03, "total_tokens": 5692928}
|
|
{"current_steps": 11580, "total_steps": 37885, "loss": 0.1278, "lr": 1.7532758131749367e-06, "epoch": 1.5283093572654085, "percentage": 30.57, "elapsed_time": "0:17:07", "remaining_time": "0:38:53", "throughput": 5543.51, "total_tokens": 5695232}
|
|
{"current_steps": 11585, "total_steps": 37885, "loss": 0.0222, "lr": 1.7529727298980058e-06, "epoch": 1.5289692490431568, "percentage": 30.58, "elapsed_time": "0:17:07", "remaining_time": "0:38:53", "throughput": 5544.28, "total_tokens": 5697856}
|
|
{"current_steps": 11590, "total_steps": 37885, "loss": 0.1518, "lr": 1.7526694868080654e-06, "epoch": 1.5296291408209055, "percentage": 30.59, "elapsed_time": "0:17:08", "remaining_time": "0:38:52", "throughput": 5545.1, "total_tokens": 5700544}
|
|
{"current_steps": 11595, "total_steps": 37885, "loss": 0.0011, "lr": 1.752366083969477e-06, "epoch": 1.5302890325986538, "percentage": 30.61, "elapsed_time": "0:17:08", "remaining_time": "0:38:51", "throughput": 5545.71, "total_tokens": 5702976}
|
|
{"current_steps": 11600, "total_steps": 37885, "loss": 0.0432, "lr": 1.7520625214466352e-06, "epoch": 1.5309489243764023, "percentage": 30.62, "elapsed_time": "0:17:08", "remaining_time": "0:38:50", "throughput": 5546.47, "total_tokens": 5705600}
|
|
{"current_steps": 11605, "total_steps": 37885, "loss": 0.157, "lr": 1.7517587993039693e-06, "epoch": 1.5316088161541508, "percentage": 30.63, "elapsed_time": "0:17:09", "remaining_time": "0:38:50", "throughput": 5547.01, "total_tokens": 5707968}
|
|
{"current_steps": 11610, "total_steps": 37885, "loss": 0.1714, "lr": 1.751454917605942e-06, "epoch": 1.532268707931899, "percentage": 30.65, "elapsed_time": "0:17:09", "remaining_time": "0:38:49", "throughput": 5547.83, "total_tokens": 5710656}
|
|
{"current_steps": 11615, "total_steps": 37885, "loss": 0.1965, "lr": 1.7511508764170502e-06, "epoch": 1.5329285997096476, "percentage": 30.66, "elapsed_time": "0:17:09", "remaining_time": "0:38:48", "throughput": 5548.29, "total_tokens": 5712960}
|
|
{"current_steps": 11620, "total_steps": 37885, "loss": 0.1463, "lr": 1.7508466758018243e-06, "epoch": 1.533588491487396, "percentage": 30.67, "elapsed_time": "0:17:10", "remaining_time": "0:38:48", "throughput": 5548.94, "total_tokens": 5715456}
|
|
{"current_steps": 11625, "total_steps": 37885, "loss": 0.1403, "lr": 1.7505423158248285e-06, "epoch": 1.5342483832651446, "percentage": 30.68, "elapsed_time": "0:17:10", "remaining_time": "0:38:47", "throughput": 5549.69, "total_tokens": 5718080}
|
|
{"current_steps": 11630, "total_steps": 37885, "loss": 0.1222, "lr": 1.750237796550661e-06, "epoch": 1.534908275042893, "percentage": 30.7, "elapsed_time": "0:17:10", "remaining_time": "0:38:46", "throughput": 5550.22, "total_tokens": 5720448}
|
|
{"current_steps": 11635, "total_steps": 37885, "loss": 0.004, "lr": 1.7499331180439545e-06, "epoch": 1.5355681668206413, "percentage": 30.71, "elapsed_time": "0:17:10", "remaining_time": "0:38:46", "throughput": 5550.74, "total_tokens": 5722816}
|
|
{"current_steps": 11640, "total_steps": 37885, "loss": 0.0761, "lr": 1.749628280369374e-06, "epoch": 1.5362280585983898, "percentage": 30.72, "elapsed_time": "0:17:11", "remaining_time": "0:38:45", "throughput": 5551.26, "total_tokens": 5725184}
|
|
{"current_steps": 11645, "total_steps": 37885, "loss": 0.0645, "lr": 1.7493232835916195e-06, "epoch": 1.5368879503761383, "percentage": 30.74, "elapsed_time": "0:17:11", "remaining_time": "0:38:44", "throughput": 5552.07, "total_tokens": 5727872}
|
|
{"current_steps": 11650, "total_steps": 37885, "loss": 0.0513, "lr": 1.7490181277754238e-06, "epoch": 1.5375478421538866, "percentage": 30.75, "elapsed_time": "0:17:12", "remaining_time": "0:38:44", "throughput": 5552.85, "total_tokens": 5730560}
|
|
{"current_steps": 11655, "total_steps": 37885, "loss": 0.0013, "lr": 1.748712812985555e-06, "epoch": 1.5382077339316353, "percentage": 30.76, "elapsed_time": "0:17:12", "remaining_time": "0:38:43", "throughput": 5553.49, "total_tokens": 5733056}
|
|
{"current_steps": 11660, "total_steps": 37885, "loss": 0.0553, "lr": 1.7484073392868133e-06, "epoch": 1.5388676257093836, "percentage": 30.78, "elapsed_time": "0:17:12", "remaining_time": "0:38:42", "throughput": 5554.31, "total_tokens": 5735744}
|
|
{"current_steps": 11665, "total_steps": 37885, "loss": 0.2724, "lr": 1.7481017067440332e-06, "epoch": 1.5395275174871321, "percentage": 30.79, "elapsed_time": "0:17:13", "remaining_time": "0:38:41", "throughput": 5554.8, "total_tokens": 5738112}
|
|
{"current_steps": 11670, "total_steps": 37885, "loss": 0.0778, "lr": 1.7477959154220834e-06, "epoch": 1.5401874092648806, "percentage": 30.8, "elapsed_time": "0:17:13", "remaining_time": "0:38:41", "throughput": 5555.26, "total_tokens": 5740480}
|
|
{"current_steps": 11675, "total_steps": 37885, "loss": 0.1881, "lr": 1.7474899653858651e-06, "epoch": 1.540847301042629, "percentage": 30.82, "elapsed_time": "0:17:13", "remaining_time": "0:38:40", "throughput": 5555.59, "total_tokens": 5742720}
|
|
{"current_steps": 11680, "total_steps": 37885, "loss": 0.0014, "lr": 1.7471838567003153e-06, "epoch": 1.5415071928203774, "percentage": 30.83, "elapsed_time": "0:17:14", "remaining_time": "0:38:39", "throughput": 5556.1, "total_tokens": 5745088}
|
|
{"current_steps": 11685, "total_steps": 37885, "loss": 0.0763, "lr": 1.746877589430402e-06, "epoch": 1.542167084598126, "percentage": 30.84, "elapsed_time": "0:17:14", "remaining_time": "0:38:39", "throughput": 5556.45, "total_tokens": 5747328}
|
|
{"current_steps": 11690, "total_steps": 37885, "loss": 0.1275, "lr": 1.7465711636411288e-06, "epoch": 1.5428269763758744, "percentage": 30.86, "elapsed_time": "0:17:14", "remaining_time": "0:38:38", "throughput": 5557.17, "total_tokens": 5749952}
|
|
{"current_steps": 11695, "total_steps": 37885, "loss": 0.2444, "lr": 1.746264579397533e-06, "epoch": 1.543486868153623, "percentage": 30.87, "elapsed_time": "0:17:15", "remaining_time": "0:38:37", "throughput": 5557.82, "total_tokens": 5752512}
|
|
{"current_steps": 11700, "total_steps": 37885, "loss": 0.0617, "lr": 1.7459578367646836e-06, "epoch": 1.5441467599313712, "percentage": 30.88, "elapsed_time": "0:17:15", "remaining_time": "0:38:37", "throughput": 5558.54, "total_tokens": 5755136}
|
|
{"current_steps": 11705, "total_steps": 37885, "loss": 0.0409, "lr": 1.7456509358076854e-06, "epoch": 1.5448066517091197, "percentage": 30.9, "elapsed_time": "0:17:15", "remaining_time": "0:38:36", "throughput": 5559.12, "total_tokens": 5757568}
|
|
{"current_steps": 11710, "total_steps": 37885, "loss": 0.0776, "lr": 1.7453438765916758e-06, "epoch": 1.5454665434868682, "percentage": 30.91, "elapsed_time": "0:17:16", "remaining_time": "0:38:35", "throughput": 5559.67, "total_tokens": 5760000}
|
|
{"current_steps": 11715, "total_steps": 37885, "loss": 0.0011, "lr": 1.7450366591818255e-06, "epoch": 1.5461264352646165, "percentage": 30.92, "elapsed_time": "0:17:16", "remaining_time": "0:38:35", "throughput": 5560.28, "total_tokens": 5762496}
|
|
{"current_steps": 11720, "total_steps": 37885, "loss": 0.0578, "lr": 1.7447292836433393e-06, "epoch": 1.5467863270423652, "percentage": 30.94, "elapsed_time": "0:17:16", "remaining_time": "0:38:34", "throughput": 5560.87, "total_tokens": 5764992}
|
|
{"current_steps": 11725, "total_steps": 37885, "loss": 0.2353, "lr": 1.744421750041456e-06, "epoch": 1.5474462188201135, "percentage": 30.95, "elapsed_time": "0:17:17", "remaining_time": "0:38:33", "throughput": 5561.53, "total_tokens": 5767552}
|
|
{"current_steps": 11730, "total_steps": 37885, "loss": 0.0704, "lr": 1.7441140584414466e-06, "epoch": 1.548106110597862, "percentage": 30.96, "elapsed_time": "0:17:17", "remaining_time": "0:38:33", "throughput": 5562.56, "total_tokens": 5770496}
|
|
{"current_steps": 11735, "total_steps": 37885, "loss": 0.0033, "lr": 1.7438062089086167e-06, "epoch": 1.5487660023756105, "percentage": 30.98, "elapsed_time": "0:17:17", "remaining_time": "0:38:32", "throughput": 5563.04, "total_tokens": 5772864}
|
|
{"current_steps": 11740, "total_steps": 37885, "loss": 0.13, "lr": 1.7434982015083056e-06, "epoch": 1.5494258941533587, "percentage": 30.99, "elapsed_time": "0:17:18", "remaining_time": "0:38:31", "throughput": 5563.66, "total_tokens": 5775360}
|
|
{"current_steps": 11745, "total_steps": 37885, "loss": 0.0007, "lr": 1.743190036305885e-06, "epoch": 1.5500857859311075, "percentage": 31.0, "elapsed_time": "0:17:18", "remaining_time": "0:38:31", "throughput": 5564.15, "total_tokens": 5777728}
|
|
{"current_steps": 11750, "total_steps": 37885, "loss": 0.138, "lr": 1.7428817133667607e-06, "epoch": 1.5507456777088557, "percentage": 31.01, "elapsed_time": "0:17:18", "remaining_time": "0:38:30", "throughput": 5564.72, "total_tokens": 5780160}
|
|
{"current_steps": 11755, "total_steps": 37885, "loss": 0.0649, "lr": 1.7425732327563724e-06, "epoch": 1.5514055694866042, "percentage": 31.03, "elapsed_time": "0:17:19", "remaining_time": "0:38:29", "throughput": 5565.34, "total_tokens": 5782656}
|
|
{"current_steps": 11760, "total_steps": 37885, "loss": 0.0009, "lr": 1.742264594540193e-06, "epoch": 1.5520654612643527, "percentage": 31.04, "elapsed_time": "0:17:19", "remaining_time": "0:38:28", "throughput": 5565.67, "total_tokens": 5784832}
|
|
{"current_steps": 11765, "total_steps": 37885, "loss": 0.0474, "lr": 1.7419557987837282e-06, "epoch": 1.552725353042101, "percentage": 31.05, "elapsed_time": "0:17:19", "remaining_time": "0:38:28", "throughput": 5566.33, "total_tokens": 5787392}
|
|
{"current_steps": 11770, "total_steps": 37885, "loss": 0.0006, "lr": 1.7416468455525179e-06, "epoch": 1.5533852448198495, "percentage": 31.07, "elapsed_time": "0:17:20", "remaining_time": "0:38:27", "throughput": 5567.12, "total_tokens": 5790080}
|
|
{"current_steps": 11775, "total_steps": 37885, "loss": 0.1212, "lr": 1.7413377349121353e-06, "epoch": 1.554045136597598, "percentage": 31.08, "elapsed_time": "0:17:20", "remaining_time": "0:38:26", "throughput": 5567.72, "total_tokens": 5792576}
|
|
{"current_steps": 11780, "total_steps": 37885, "loss": 0.0603, "lr": 1.7410284669281868e-06, "epoch": 1.5547050283753463, "percentage": 31.09, "elapsed_time": "0:17:20", "remaining_time": "0:38:26", "throughput": 5568.37, "total_tokens": 5795136}
|
|
{"current_steps": 11785, "total_steps": 37885, "loss": 0.1844, "lr": 1.7407190416663124e-06, "epoch": 1.555364920153095, "percentage": 31.11, "elapsed_time": "0:17:21", "remaining_time": "0:38:25", "throughput": 5568.92, "total_tokens": 5797568}
|
|
{"current_steps": 11790, "total_steps": 37885, "loss": 0.068, "lr": 1.7404094591921852e-06, "epoch": 1.5560248119308433, "percentage": 31.12, "elapsed_time": "0:17:21", "remaining_time": "0:38:24", "throughput": 5569.31, "total_tokens": 5799808}
|
|
{"current_steps": 11795, "total_steps": 37885, "loss": 0.0521, "lr": 1.740099719571512e-06, "epoch": 1.5566847037085918, "percentage": 31.13, "elapsed_time": "0:17:21", "remaining_time": "0:38:24", "throughput": 5569.89, "total_tokens": 5802240}
|
|
{"current_steps": 11800, "total_steps": 37885, "loss": 0.0007, "lr": 1.7397898228700324e-06, "epoch": 1.5573445954863403, "percentage": 31.15, "elapsed_time": "0:17:22", "remaining_time": "0:38:23", "throughput": 5570.26, "total_tokens": 5804480}
|
|
{"current_steps": 11805, "total_steps": 37885, "loss": 0.1066, "lr": 1.7394797691535203e-06, "epoch": 1.5580044872640886, "percentage": 31.16, "elapsed_time": "0:17:22", "remaining_time": "0:38:22", "throughput": 5570.82, "total_tokens": 5806912}
|
|
{"current_steps": 11810, "total_steps": 37885, "loss": 0.0599, "lr": 1.739169558487782e-06, "epoch": 1.5586643790418373, "percentage": 31.17, "elapsed_time": "0:17:22", "remaining_time": "0:38:22", "throughput": 5571.19, "total_tokens": 5809152}
|
|
{"current_steps": 11815, "total_steps": 37885, "loss": 0.033, "lr": 1.7388591909386575e-06, "epoch": 1.5593242708195856, "percentage": 31.19, "elapsed_time": "0:17:23", "remaining_time": "0:38:21", "throughput": 5571.85, "total_tokens": 5811712}
|
|
{"current_steps": 11820, "total_steps": 37885, "loss": 0.1292, "lr": 1.7385486665720203e-06, "epoch": 1.559984162597334, "percentage": 31.2, "elapsed_time": "0:17:23", "remaining_time": "0:38:20", "throughput": 5572.39, "total_tokens": 5814144}
|
|
{"current_steps": 11825, "total_steps": 37885, "loss": 0.0006, "lr": 1.7382379854537767e-06, "epoch": 1.5606440543750826, "percentage": 31.21, "elapsed_time": "0:17:23", "remaining_time": "0:38:20", "throughput": 5572.88, "total_tokens": 5816512}
|
|
{"current_steps": 11830, "total_steps": 37885, "loss": 0.17, "lr": 1.7379271476498666e-06, "epoch": 1.5613039461528309, "percentage": 31.23, "elapsed_time": "0:17:24", "remaining_time": "0:38:19", "throughput": 5573.52, "total_tokens": 5819072}
|
|
{"current_steps": 11835, "total_steps": 37885, "loss": 0.3636, "lr": 1.737616153226263e-06, "epoch": 1.5619638379305794, "percentage": 31.24, "elapsed_time": "0:17:24", "remaining_time": "0:38:18", "throughput": 5574.16, "total_tokens": 5821632}
|
|
{"current_steps": 11840, "total_steps": 37885, "loss": 0.0012, "lr": 1.7373050022489722e-06, "epoch": 1.5626237297083279, "percentage": 31.25, "elapsed_time": "0:17:24", "remaining_time": "0:38:18", "throughput": 5574.64, "total_tokens": 5824000}
|
|
{"current_steps": 11845, "total_steps": 37885, "loss": 0.1184, "lr": 1.736993694784034e-06, "epoch": 1.5632836214860761, "percentage": 31.27, "elapsed_time": "0:17:25", "remaining_time": "0:38:17", "throughput": 5575.07, "total_tokens": 5826304}
|
|
{"current_steps": 11850, "total_steps": 37885, "loss": 0.0028, "lr": 1.736682230897521e-06, "epoch": 1.5639435132638249, "percentage": 31.28, "elapsed_time": "0:17:25", "remaining_time": "0:38:16", "throughput": 5575.48, "total_tokens": 5828608}
|
|
{"current_steps": 11855, "total_steps": 37885, "loss": 0.0607, "lr": 1.7363706106555388e-06, "epoch": 1.5646034050415731, "percentage": 31.29, "elapsed_time": "0:17:25", "remaining_time": "0:38:16", "throughput": 5576.18, "total_tokens": 5831232}
|
|
{"current_steps": 11860, "total_steps": 37885, "loss": 0.1293, "lr": 1.7360588341242273e-06, "epoch": 1.5652632968193216, "percentage": 31.31, "elapsed_time": "0:17:26", "remaining_time": "0:38:15", "throughput": 5576.73, "total_tokens": 5833664}
|
|
{"current_steps": 11865, "total_steps": 37885, "loss": 0.047, "lr": 1.7357469013697582e-06, "epoch": 1.5659231885970701, "percentage": 31.32, "elapsed_time": "0:17:26", "remaining_time": "0:38:14", "throughput": 5577.15, "total_tokens": 5835968}
|
|
{"current_steps": 11870, "total_steps": 37885, "loss": 0.05, "lr": 1.735434812458337e-06, "epoch": 1.5665830803748184, "percentage": 31.33, "elapsed_time": "0:17:26", "remaining_time": "0:38:14", "throughput": 5577.75, "total_tokens": 5838464}
|
|
{"current_steps": 11875, "total_steps": 37885, "loss": 0.0951, "lr": 1.7351225674562023e-06, "epoch": 1.5672429721525671, "percentage": 31.34, "elapsed_time": "0:17:27", "remaining_time": "0:38:13", "throughput": 5578.17, "total_tokens": 5840768}
|
|
{"current_steps": 11880, "total_steps": 37885, "loss": 0.0417, "lr": 1.7348101664296265e-06, "epoch": 1.5679028639303154, "percentage": 31.36, "elapsed_time": "0:17:27", "remaining_time": "0:38:12", "throughput": 5578.84, "total_tokens": 5843328}
|
|
{"current_steps": 11885, "total_steps": 37885, "loss": 0.0526, "lr": 1.7344976094449138e-06, "epoch": 1.568562755708064, "percentage": 31.37, "elapsed_time": "0:17:27", "remaining_time": "0:38:12", "throughput": 5579.44, "total_tokens": 5845824}
|
|
{"current_steps": 11890, "total_steps": 37885, "loss": 0.0011, "lr": 1.734184896568402e-06, "epoch": 1.5692226474858124, "percentage": 31.38, "elapsed_time": "0:17:28", "remaining_time": "0:38:11", "throughput": 5579.76, "total_tokens": 5848000}
|
|
{"current_steps": 11895, "total_steps": 37885, "loss": 0.1243, "lr": 1.7338720278664627e-06, "epoch": 1.5698825392635607, "percentage": 31.4, "elapsed_time": "0:17:28", "remaining_time": "0:38:10", "throughput": 5580.3, "total_tokens": 5850432}
|
|
{"current_steps": 11900, "total_steps": 37885, "loss": 0.0003, "lr": 1.7335590034054997e-06, "epoch": 1.5705424310413092, "percentage": 31.41, "elapsed_time": "0:17:28", "remaining_time": "0:38:10", "throughput": 5580.78, "total_tokens": 5852800}
|
|
{"current_steps": 11905, "total_steps": 37885, "loss": 0.0568, "lr": 1.7332458232519502e-06, "epoch": 1.5712023228190577, "percentage": 31.42, "elapsed_time": "0:17:29", "remaining_time": "0:38:09", "throughput": 5581.21, "total_tokens": 5855104}
|
|
{"current_steps": 11910, "total_steps": 37885, "loss": 0.0006, "lr": 1.7329324874722846e-06, "epoch": 1.571862214596806, "percentage": 31.44, "elapsed_time": "0:17:29", "remaining_time": "0:38:08", "throughput": 5581.86, "total_tokens": 5857664}
|
|
{"current_steps": 11915, "total_steps": 37885, "loss": 0.0007, "lr": 1.7326189961330058e-06, "epoch": 1.5725221063745547, "percentage": 31.45, "elapsed_time": "0:17:29", "remaining_time": "0:38:08", "throughput": 5582.22, "total_tokens": 5859904}
|
|
{"current_steps": 11920, "total_steps": 37885, "loss": 0.0809, "lr": 1.7323053493006505e-06, "epoch": 1.573181998152303, "percentage": 31.46, "elapsed_time": "0:17:30", "remaining_time": "0:38:07", "throughput": 5582.53, "total_tokens": 5862080}
|
|
{"current_steps": 11925, "total_steps": 37885, "loss": 0.0004, "lr": 1.7319915470417876e-06, "epoch": 1.5738418899300515, "percentage": 31.48, "elapsed_time": "0:17:30", "remaining_time": "0:38:06", "throughput": 5582.96, "total_tokens": 5864384}
|
|
{"current_steps": 11930, "total_steps": 37885, "loss": 0.191, "lr": 1.7316775894230197e-06, "epoch": 1.5745017817078, "percentage": 31.49, "elapsed_time": "0:17:30", "remaining_time": "0:38:05", "throughput": 5583.44, "total_tokens": 5866752}
|
|
{"current_steps": 11935, "total_steps": 37885, "loss": 0.0737, "lr": 1.7313634765109816e-06, "epoch": 1.5751616734855483, "percentage": 31.5, "elapsed_time": "0:17:31", "remaining_time": "0:38:05", "throughput": 5584.06, "total_tokens": 5869248}
|
|
{"current_steps": 11940, "total_steps": 37885, "loss": 0.0554, "lr": 1.731049208372342e-06, "epoch": 1.575821565263297, "percentage": 31.52, "elapsed_time": "0:17:31", "remaining_time": "0:38:04", "throughput": 5584.76, "total_tokens": 5871872}
|
|
{"current_steps": 11945, "total_steps": 37885, "loss": 0.1984, "lr": 1.7307347850738014e-06, "epoch": 1.5764814570410453, "percentage": 31.53, "elapsed_time": "0:17:31", "remaining_time": "0:38:03", "throughput": 5585.17, "total_tokens": 5874176}
|
|
{"current_steps": 11950, "total_steps": 37885, "loss": 0.0005, "lr": 1.7304202066820945e-06, "epoch": 1.5771413488187938, "percentage": 31.54, "elapsed_time": "0:17:32", "remaining_time": "0:38:03", "throughput": 5585.58, "total_tokens": 5876480}
|
|
{"current_steps": 11955, "total_steps": 37885, "loss": 0.0588, "lr": 1.7301054732639882e-06, "epoch": 1.5778012405965423, "percentage": 31.56, "elapsed_time": "0:17:32", "remaining_time": "0:38:02", "throughput": 5586.26, "total_tokens": 5879104}
|
|
{"current_steps": 11960, "total_steps": 37885, "loss": 0.1775, "lr": 1.729790584886282e-06, "epoch": 1.5784611323742905, "percentage": 31.57, "elapsed_time": "0:17:32", "remaining_time": "0:38:02", "throughput": 5587.09, "total_tokens": 5881856}
|
|
{"current_steps": 11965, "total_steps": 37885, "loss": 0.0426, "lr": 1.7294755416158089e-06, "epoch": 1.579121024152039, "percentage": 31.58, "elapsed_time": "0:17:33", "remaining_time": "0:38:01", "throughput": 5587.72, "total_tokens": 5884416}
|
|
{"current_steps": 11970, "total_steps": 37885, "loss": 0.1012, "lr": 1.7291603435194344e-06, "epoch": 1.5797809159297875, "percentage": 31.6, "elapsed_time": "0:17:33", "remaining_time": "0:38:00", "throughput": 5588.41, "total_tokens": 5887040}
|
|
{"current_steps": 11975, "total_steps": 37885, "loss": 0.001, "lr": 1.7288449906640571e-06, "epoch": 1.5804408077075358, "percentage": 31.61, "elapsed_time": "0:17:33", "remaining_time": "0:38:00", "throughput": 5588.98, "total_tokens": 5889536}
|
|
{"current_steps": 11980, "total_steps": 37885, "loss": 0.1094, "lr": 1.7285294831166087e-06, "epoch": 1.5811006994852845, "percentage": 31.62, "elapsed_time": "0:17:34", "remaining_time": "0:37:59", "throughput": 5589.28, "total_tokens": 5891712}
|
|
{"current_steps": 11985, "total_steps": 37885, "loss": 0.0588, "lr": 1.728213820944053e-06, "epoch": 1.5817605912630328, "percentage": 31.64, "elapsed_time": "0:17:34", "remaining_time": "0:37:58", "throughput": 5589.68, "total_tokens": 5894016}
|
|
{"current_steps": 11990, "total_steps": 37885, "loss": 0.0014, "lr": 1.727898004213387e-06, "epoch": 1.5824204830407813, "percentage": 31.65, "elapsed_time": "0:17:34", "remaining_time": "0:37:58", "throughput": 5590.12, "total_tokens": 5896320}
|
|
{"current_steps": 11995, "total_steps": 37885, "loss": 0.1045, "lr": 1.7275820329916408e-06, "epoch": 1.5830803748185298, "percentage": 31.66, "elapsed_time": "0:17:35", "remaining_time": "0:37:57", "throughput": 5590.77, "total_tokens": 5898880}
|
|
{"current_steps": 12000, "total_steps": 37885, "loss": 0.1356, "lr": 1.7272659073458766e-06, "epoch": 1.583740266596278, "percentage": 31.67, "elapsed_time": "0:17:35", "remaining_time": "0:37:56", "throughput": 5591.6, "total_tokens": 5901632}
|
|
{"current_steps": 12005, "total_steps": 37885, "loss": 0.0449, "lr": 1.7269496273431903e-06, "epoch": 1.5844001583740268, "percentage": 31.69, "elapsed_time": "0:17:35", "remaining_time": "0:37:56", "throughput": 5592.0, "total_tokens": 5903936}
|
|
{"current_steps": 12010, "total_steps": 37885, "loss": 0.0552, "lr": 1.7266331930507097e-06, "epoch": 1.585060050151775, "percentage": 31.7, "elapsed_time": "0:17:36", "remaining_time": "0:37:55", "throughput": 5592.38, "total_tokens": 5906176}
|
|
{"current_steps": 12015, "total_steps": 37885, "loss": 0.0456, "lr": 1.7263166045355954e-06, "epoch": 1.5857199419295236, "percentage": 31.71, "elapsed_time": "0:17:36", "remaining_time": "0:37:54", "throughput": 5593.17, "total_tokens": 5908864}
|
|
{"current_steps": 12020, "total_steps": 37885, "loss": 0.0356, "lr": 1.7259998618650418e-06, "epoch": 1.586379833707272, "percentage": 31.73, "elapsed_time": "0:17:36", "remaining_time": "0:37:53", "throughput": 5593.84, "total_tokens": 5911424}
|
|
{"current_steps": 12025, "total_steps": 37885, "loss": 0.0003, "lr": 1.7256829651062745e-06, "epoch": 1.5870397254850204, "percentage": 31.74, "elapsed_time": "0:17:37", "remaining_time": "0:37:53", "throughput": 5594.47, "total_tokens": 5913920}
|
|
{"current_steps": 12030, "total_steps": 37885, "loss": 0.2519, "lr": 1.725365914326553e-06, "epoch": 1.5876996172627689, "percentage": 31.75, "elapsed_time": "0:17:37", "remaining_time": "0:37:52", "throughput": 5594.86, "total_tokens": 5916160}
|
|
{"current_steps": 12035, "total_steps": 37885, "loss": 0.0035, "lr": 1.7250487095931687e-06, "epoch": 1.5883595090405174, "percentage": 31.77, "elapsed_time": "0:17:37", "remaining_time": "0:37:51", "throughput": 5595.49, "total_tokens": 5918656}
|
|
{"current_steps": 12040, "total_steps": 37885, "loss": 0.0008, "lr": 1.7247313509734465e-06, "epoch": 1.5890194008182656, "percentage": 31.78, "elapsed_time": "0:17:38", "remaining_time": "0:37:51", "throughput": 5596.05, "total_tokens": 5921088}
|
|
{"current_steps": 12045, "total_steps": 37885, "loss": 0.0678, "lr": 1.7244138385347429e-06, "epoch": 1.5896792925960144, "percentage": 31.79, "elapsed_time": "0:17:38", "remaining_time": "0:37:50", "throughput": 5596.57, "total_tokens": 5923456}
|
|
{"current_steps": 12050, "total_steps": 37885, "loss": 0.0005, "lr": 1.7240961723444479e-06, "epoch": 1.5903391843737626, "percentage": 31.81, "elapsed_time": "0:17:38", "remaining_time": "0:37:49", "throughput": 5597.13, "total_tokens": 5925888}
|
|
{"current_steps": 12055, "total_steps": 37885, "loss": 0.0975, "lr": 1.7237783524699836e-06, "epoch": 1.5909990761515111, "percentage": 31.82, "elapsed_time": "0:17:39", "remaining_time": "0:37:49", "throughput": 5597.76, "total_tokens": 5928384}
|
|
{"current_steps": 12060, "total_steps": 37885, "loss": 0.0006, "lr": 1.7234603789788054e-06, "epoch": 1.5916589679292596, "percentage": 31.83, "elapsed_time": "0:17:39", "remaining_time": "0:37:48", "throughput": 5598.37, "total_tokens": 5930880}
|
|
{"current_steps": 12065, "total_steps": 37885, "loss": 0.0004, "lr": 1.7231422519384008e-06, "epoch": 1.592318859707008, "percentage": 31.85, "elapsed_time": "0:17:39", "remaining_time": "0:37:47", "throughput": 5598.82, "total_tokens": 5933184}
|
|
{"current_steps": 12070, "total_steps": 37885, "loss": 0.2659, "lr": 1.7228239714162895e-06, "epoch": 1.5929787514847566, "percentage": 31.86, "elapsed_time": "0:17:40", "remaining_time": "0:37:47", "throughput": 5599.5, "total_tokens": 5935744}
|
|
{"current_steps": 12075, "total_steps": 37885, "loss": 0.0666, "lr": 1.7225055374800249e-06, "epoch": 1.593638643262505, "percentage": 31.87, "elapsed_time": "0:17:40", "remaining_time": "0:37:46", "throughput": 5600.17, "total_tokens": 5938304}
|
|
{"current_steps": 12080, "total_steps": 37885, "loss": 0.0895, "lr": 1.7221869501971917e-06, "epoch": 1.5942985350402534, "percentage": 31.89, "elapsed_time": "0:17:40", "remaining_time": "0:37:45", "throughput": 5600.96, "total_tokens": 5940992}
|
|
{"current_steps": 12085, "total_steps": 37885, "loss": 0.0722, "lr": 1.721868209635408e-06, "epoch": 1.594958426818002, "percentage": 31.9, "elapsed_time": "0:17:41", "remaining_time": "0:37:45", "throughput": 5601.62, "total_tokens": 5943552}
|
|
{"current_steps": 12090, "total_steps": 37885, "loss": 0.0022, "lr": 1.7215493158623242e-06, "epoch": 1.5956183185957502, "percentage": 31.91, "elapsed_time": "0:17:41", "remaining_time": "0:37:44", "throughput": 5602.35, "total_tokens": 5946176}
|
|
{"current_steps": 12095, "total_steps": 37885, "loss": 0.0013, "lr": 1.7212302689456234e-06, "epoch": 1.5962782103734987, "percentage": 31.93, "elapsed_time": "0:17:41", "remaining_time": "0:37:43", "throughput": 5603.07, "total_tokens": 5948800}
|
|
{"current_steps": 12100, "total_steps": 37885, "loss": 0.0489, "lr": 1.72091106895302e-06, "epoch": 1.5969381021512472, "percentage": 31.94, "elapsed_time": "0:17:42", "remaining_time": "0:37:43", "throughput": 5603.68, "total_tokens": 5951296}
|
|
{"current_steps": 12105, "total_steps": 37885, "loss": 0.0734, "lr": 1.7205917159522635e-06, "epoch": 1.5975979939289955, "percentage": 31.95, "elapsed_time": "0:17:42", "remaining_time": "0:37:42", "throughput": 5604.13, "total_tokens": 5953600}
|
|
{"current_steps": 12110, "total_steps": 37885, "loss": 0.0671, "lr": 1.7202722100111328e-06, "epoch": 1.5982578857067442, "percentage": 31.97, "elapsed_time": "0:17:42", "remaining_time": "0:37:41", "throughput": 5604.84, "total_tokens": 5956224}
|
|
{"current_steps": 12115, "total_steps": 37885, "loss": 0.1586, "lr": 1.7199525511974417e-06, "epoch": 1.5989177774844925, "percentage": 31.98, "elapsed_time": "0:17:43", "remaining_time": "0:37:41", "throughput": 5605.34, "total_tokens": 5958656}
|
|
{"current_steps": 12120, "total_steps": 37885, "loss": 0.0006, "lr": 1.7196327395790352e-06, "epoch": 1.599577669262241, "percentage": 31.99, "elapsed_time": "0:17:43", "remaining_time": "0:37:40", "throughput": 5605.84, "total_tokens": 5961088}
|
|
{"current_steps": 12125, "total_steps": 37885, "loss": 0.1002, "lr": 1.7193127752237906e-06, "epoch": 1.6002375610399895, "percentage": 32.0, "elapsed_time": "0:17:43", "remaining_time": "0:37:39", "throughput": 5606.33, "total_tokens": 5963520}
|
|
{"current_steps": 12130, "total_steps": 37885, "loss": 0.0006, "lr": 1.7189926581996184e-06, "epoch": 1.6008974528177378, "percentage": 32.02, "elapsed_time": "0:17:44", "remaining_time": "0:37:39", "throughput": 5606.78, "total_tokens": 5965888}
|
|
{"current_steps": 12135, "total_steps": 37885, "loss": 0.0669, "lr": 1.7186723885744609e-06, "epoch": 1.6015573445954865, "percentage": 32.03, "elapsed_time": "0:17:44", "remaining_time": "0:37:38", "throughput": 5607.06, "total_tokens": 5968064}
|
|
{"current_steps": 12140, "total_steps": 37885, "loss": 0.138, "lr": 1.7183519664162934e-06, "epoch": 1.6022172363732348, "percentage": 32.04, "elapsed_time": "0:17:44", "remaining_time": "0:37:37", "throughput": 5607.62, "total_tokens": 5970560}
|
|
{"current_steps": 12145, "total_steps": 37885, "loss": 0.1324, "lr": 1.7180313917931226e-06, "epoch": 1.6028771281509833, "percentage": 32.06, "elapsed_time": "0:17:45", "remaining_time": "0:37:37", "throughput": 5608.35, "total_tokens": 5973248}
|
|
{"current_steps": 12150, "total_steps": 37885, "loss": 0.0511, "lr": 1.717710664772989e-06, "epoch": 1.6035370199287318, "percentage": 32.07, "elapsed_time": "0:17:45", "remaining_time": "0:37:36", "throughput": 5608.95, "total_tokens": 5975808}
|
|
{"current_steps": 12155, "total_steps": 37885, "loss": 0.0011, "lr": 1.7173897854239635e-06, "epoch": 1.60419691170648, "percentage": 32.08, "elapsed_time": "0:17:45", "remaining_time": "0:37:35", "throughput": 5609.38, "total_tokens": 5978176}
|
|
{"current_steps": 12160, "total_steps": 37885, "loss": 0.0115, "lr": 1.7170687538141512e-06, "epoch": 1.6048568034842285, "percentage": 32.1, "elapsed_time": "0:17:46", "remaining_time": "0:37:35", "throughput": 5609.88, "total_tokens": 5980608}
|
|
{"current_steps": 12165, "total_steps": 37885, "loss": 0.0712, "lr": 1.7167475700116882e-06, "epoch": 1.605516695261977, "percentage": 32.11, "elapsed_time": "0:17:46", "remaining_time": "0:37:34", "throughput": 5610.72, "total_tokens": 5983424}
|
|
{"current_steps": 12170, "total_steps": 37885, "loss": 0.1335, "lr": 1.7164262340847442e-06, "epoch": 1.6061765870397253, "percentage": 32.12, "elapsed_time": "0:17:46", "remaining_time": "0:37:34", "throughput": 5611.1, "total_tokens": 5985728}
|
|
{"current_steps": 12175, "total_steps": 37885, "loss": 0.0008, "lr": 1.71610474610152e-06, "epoch": 1.606836478817474, "percentage": 32.14, "elapsed_time": "0:17:47", "remaining_time": "0:37:33", "throughput": 5611.71, "total_tokens": 5988288}
|
|
{"current_steps": 12180, "total_steps": 37885, "loss": 0.0775, "lr": 1.7157831061302485e-06, "epoch": 1.6074963705952223, "percentage": 32.15, "elapsed_time": "0:17:47", "remaining_time": "0:37:32", "throughput": 5612.63, "total_tokens": 5991232}
|
|
{"current_steps": 12185, "total_steps": 37885, "loss": 0.0946, "lr": 1.7154613142391968e-06, "epoch": 1.6081562623729708, "percentage": 32.16, "elapsed_time": "0:17:47", "remaining_time": "0:37:32", "throughput": 5613.17, "total_tokens": 5993728}
|
|
{"current_steps": 12190, "total_steps": 37885, "loss": 0.0603, "lr": 1.7151393704966616e-06, "epoch": 1.6088161541507193, "percentage": 32.18, "elapsed_time": "0:17:48", "remaining_time": "0:37:31", "throughput": 5613.77, "total_tokens": 5996288}
|
|
{"current_steps": 12195, "total_steps": 37885, "loss": 0.1577, "lr": 1.7148172749709736e-06, "epoch": 1.6094760459284676, "percentage": 32.19, "elapsed_time": "0:17:48", "remaining_time": "0:37:30", "throughput": 5614.4, "total_tokens": 5998848}
|
|
{"current_steps": 12200, "total_steps": 37885, "loss": 0.0006, "lr": 1.7144950277304955e-06, "epoch": 1.6101359377062163, "percentage": 32.2, "elapsed_time": "0:17:48", "remaining_time": "0:37:30", "throughput": 5615.28, "total_tokens": 6001728}
|
|
{"current_steps": 12205, "total_steps": 37885, "loss": 0.0347, "lr": 1.7141726288436216e-06, "epoch": 1.6107958294839646, "percentage": 32.22, "elapsed_time": "0:17:49", "remaining_time": "0:37:29", "throughput": 5616.07, "total_tokens": 6004480}
|
|
{"current_steps": 12210, "total_steps": 37885, "loss": 0.0302, "lr": 1.713850078378779e-06, "epoch": 1.611455721261713, "percentage": 32.23, "elapsed_time": "0:17:49", "remaining_time": "0:37:28", "throughput": 5616.58, "total_tokens": 6006912}
|
|
{"current_steps": 12215, "total_steps": 37885, "loss": 0.1457, "lr": 1.7135273764044262e-06, "epoch": 1.6121156130394616, "percentage": 32.24, "elapsed_time": "0:17:49", "remaining_time": "0:37:28", "throughput": 5617.1, "total_tokens": 6009344}
|
|
{"current_steps": 12220, "total_steps": 37885, "loss": 0.0659, "lr": 1.7132045229890552e-06, "epoch": 1.6127755048172099, "percentage": 32.26, "elapsed_time": "0:17:50", "remaining_time": "0:37:27", "throughput": 5617.6, "total_tokens": 6011776}
|
|
{"current_steps": 12225, "total_steps": 37885, "loss": 0.1002, "lr": 1.7128815182011886e-06, "epoch": 1.6134353965949584, "percentage": 32.27, "elapsed_time": "0:17:50", "remaining_time": "0:37:26", "throughput": 5617.97, "total_tokens": 6014080}
|
|
{"current_steps": 12230, "total_steps": 37885, "loss": 0.1825, "lr": 1.7125583621093819e-06, "epoch": 1.6140952883727069, "percentage": 32.28, "elapsed_time": "0:17:50", "remaining_time": "0:37:26", "throughput": 5618.56, "total_tokens": 6016640}
|
|
{"current_steps": 12235, "total_steps": 37885, "loss": 0.0623, "lr": 1.712235054782223e-06, "epoch": 1.6147551801504552, "percentage": 32.3, "elapsed_time": "0:17:51", "remaining_time": "0:37:25", "throughput": 5618.87, "total_tokens": 6018816}
|
|
{"current_steps": 12240, "total_steps": 37885, "loss": 0.001, "lr": 1.7119115962883313e-06, "epoch": 1.6154150719282039, "percentage": 32.31, "elapsed_time": "0:17:51", "remaining_time": "0:37:24", "throughput": 5619.48, "total_tokens": 6021312}
|
|
{"current_steps": 12245, "total_steps": 37885, "loss": 0.0006, "lr": 1.7115879866963586e-06, "epoch": 1.6160749637059522, "percentage": 32.32, "elapsed_time": "0:17:51", "remaining_time": "0:37:24", "throughput": 5619.92, "total_tokens": 6023616}
|
|
{"current_steps": 12250, "total_steps": 37885, "loss": 0.1983, "lr": 1.7112642260749885e-06, "epoch": 1.6167348554837007, "percentage": 32.33, "elapsed_time": "0:17:52", "remaining_time": "0:37:23", "throughput": 5620.52, "total_tokens": 6026112}
|
|
{"current_steps": 12255, "total_steps": 37885, "loss": 0.0004, "lr": 1.7109403144929369e-06, "epoch": 1.6173947472614492, "percentage": 32.35, "elapsed_time": "0:17:52", "remaining_time": "0:37:23", "throughput": 5621.05, "total_tokens": 6028544}
|
|
{"current_steps": 12260, "total_steps": 37885, "loss": 0.0435, "lr": 1.7106162520189522e-06, "epoch": 1.6180546390391974, "percentage": 32.36, "elapsed_time": "0:17:52", "remaining_time": "0:37:22", "throughput": 5621.69, "total_tokens": 6031104}
|
|
{"current_steps": 12265, "total_steps": 37885, "loss": 0.1353, "lr": 1.7102920387218136e-06, "epoch": 1.6187145308169462, "percentage": 32.37, "elapsed_time": "0:17:53", "remaining_time": "0:37:21", "throughput": 5622.38, "total_tokens": 6033728}
|
|
{"current_steps": 12270, "total_steps": 37885, "loss": 0.2136, "lr": 1.7099676746703332e-06, "epoch": 1.6193744225946944, "percentage": 32.39, "elapsed_time": "0:17:53", "remaining_time": "0:37:21", "throughput": 5623.09, "total_tokens": 6036352}
|
|
{"current_steps": 12275, "total_steps": 37885, "loss": 0.1958, "lr": 1.7096431599333552e-06, "epoch": 1.620034314372443, "percentage": 32.4, "elapsed_time": "0:17:53", "remaining_time": "0:37:20", "throughput": 5623.72, "total_tokens": 6038912}
|
|
{"current_steps": 12280, "total_steps": 37885, "loss": 0.0934, "lr": 1.709318494579755e-06, "epoch": 1.6206942061501914, "percentage": 32.41, "elapsed_time": "0:17:54", "remaining_time": "0:37:19", "throughput": 5624.36, "total_tokens": 6041472}
|
|
{"current_steps": 12285, "total_steps": 37885, "loss": 0.0024, "lr": 1.7089936786784414e-06, "epoch": 1.6213540979279397, "percentage": 32.43, "elapsed_time": "0:17:54", "remaining_time": "0:37:19", "throughput": 5624.62, "total_tokens": 6043584}
|
|
{"current_steps": 12290, "total_steps": 37885, "loss": 0.059, "lr": 1.708668712298353e-06, "epoch": 1.6220139897056882, "percentage": 32.44, "elapsed_time": "0:17:54", "remaining_time": "0:37:18", "throughput": 5625.26, "total_tokens": 6046144}
|
|
{"current_steps": 12295, "total_steps": 37885, "loss": 0.0014, "lr": 1.7083435955084627e-06, "epoch": 1.6226738814834367, "percentage": 32.45, "elapsed_time": "0:17:55", "remaining_time": "0:37:17", "throughput": 5625.74, "total_tokens": 6048512}
|
|
{"current_steps": 12300, "total_steps": 37885, "loss": 0.0526, "lr": 1.7080183283777733e-06, "epoch": 1.6233337732611852, "percentage": 32.47, "elapsed_time": "0:17:55", "remaining_time": "0:37:17", "throughput": 5625.91, "total_tokens": 6050560}
|
|
{"current_steps": 12305, "total_steps": 37885, "loss": 0.0014, "lr": 1.707692910975321e-06, "epoch": 1.6239936650389337, "percentage": 32.48, "elapsed_time": "0:17:55", "remaining_time": "0:37:16", "throughput": 5626.51, "total_tokens": 6053056}
|
|
{"current_steps": 12310, "total_steps": 37885, "loss": 0.0247, "lr": 1.7073673433701733e-06, "epoch": 1.624653556816682, "percentage": 32.49, "elapsed_time": "0:17:56", "remaining_time": "0:37:15", "throughput": 5627.17, "total_tokens": 6055616}
|
|
{"current_steps": 12315, "total_steps": 37885, "loss": 0.0355, "lr": 1.7070416256314286e-06, "epoch": 1.6253134485944305, "percentage": 32.51, "elapsed_time": "0:17:56", "remaining_time": "0:37:15", "throughput": 5627.42, "total_tokens": 6057728}
|
|
{"current_steps": 12320, "total_steps": 37885, "loss": 0.0661, "lr": 1.7067157578282195e-06, "epoch": 1.625973340372179, "percentage": 32.52, "elapsed_time": "0:17:56", "remaining_time": "0:37:14", "throughput": 5628.08, "total_tokens": 6060288}
|
|
{"current_steps": 12325, "total_steps": 37885, "loss": 0.0912, "lr": 1.7063897400297083e-06, "epoch": 1.6266332321499273, "percentage": 32.53, "elapsed_time": "0:17:57", "remaining_time": "0:37:13", "throughput": 5628.55, "total_tokens": 6062656}
|
|
{"current_steps": 12330, "total_steps": 37885, "loss": 0.0712, "lr": 1.7060635723050899e-06, "epoch": 1.627293123927676, "percentage": 32.55, "elapsed_time": "0:17:57", "remaining_time": "0:37:13", "throughput": 5629.08, "total_tokens": 6065088}
|
|
{"current_steps": 12335, "total_steps": 37885, "loss": 0.0004, "lr": 1.705737254723591e-06, "epoch": 1.6279530157054243, "percentage": 32.56, "elapsed_time": "0:17:57", "remaining_time": "0:37:12", "throughput": 5629.51, "total_tokens": 6067392}
|
|
{"current_steps": 12340, "total_steps": 37885, "loss": 0.1991, "lr": 1.7054107873544704e-06, "epoch": 1.6286129074831728, "percentage": 32.57, "elapsed_time": "0:17:58", "remaining_time": "0:37:11", "throughput": 5630.01, "total_tokens": 6069760}
|
|
{"current_steps": 12345, "total_steps": 37885, "loss": 0.0003, "lr": 1.7050841702670188e-06, "epoch": 1.6292727992609213, "percentage": 32.59, "elapsed_time": "0:17:58", "remaining_time": "0:37:11", "throughput": 5630.6, "total_tokens": 6072256}
|
|
{"current_steps": 12350, "total_steps": 37885, "loss": 0.0482, "lr": 1.7047574035305576e-06, "epoch": 1.6299326910386696, "percentage": 32.6, "elapsed_time": "0:17:58", "remaining_time": "0:37:10", "throughput": 5631.14, "total_tokens": 6074688}
|
|
{"current_steps": 12355, "total_steps": 37885, "loss": 0.0002, "lr": 1.704430487214441e-06, "epoch": 1.630592582816418, "percentage": 32.61, "elapsed_time": "0:17:59", "remaining_time": "0:37:09", "throughput": 5631.74, "total_tokens": 6077184}
|
|
{"current_steps": 12360, "total_steps": 37885, "loss": 0.001, "lr": 1.7041034213880545e-06, "epoch": 1.6312524745941666, "percentage": 32.63, "elapsed_time": "0:17:59", "remaining_time": "0:37:09", "throughput": 5632.1, "total_tokens": 6079424}
|
|
{"current_steps": 12365, "total_steps": 37885, "loss": 0.0385, "lr": 1.7037762061208157e-06, "epoch": 1.631912366371915, "percentage": 32.64, "elapsed_time": "0:17:59", "remaining_time": "0:37:08", "throughput": 5632.7, "total_tokens": 6081920}
|
|
{"current_steps": 12370, "total_steps": 37885, "loss": 0.115, "lr": 1.7034488414821734e-06, "epoch": 1.6325722581496636, "percentage": 32.65, "elapsed_time": "0:18:00", "remaining_time": "0:37:07", "throughput": 5633.24, "total_tokens": 6084352}
|
|
{"current_steps": 12375, "total_steps": 37885, "loss": 0.0007, "lr": 1.7031213275416083e-06, "epoch": 1.6332321499274118, "percentage": 32.66, "elapsed_time": "0:18:00", "remaining_time": "0:37:07", "throughput": 5633.83, "total_tokens": 6086848}
|
|
{"current_steps": 12380, "total_steps": 37885, "loss": 0.1038, "lr": 1.702793664368633e-06, "epoch": 1.6338920417051603, "percentage": 32.68, "elapsed_time": "0:18:00", "remaining_time": "0:37:06", "throughput": 5634.42, "total_tokens": 6089344}
|
|
{"current_steps": 12385, "total_steps": 37885, "loss": 0.0722, "lr": 1.702465852032792e-06, "epoch": 1.6345519334829088, "percentage": 32.69, "elapsed_time": "0:18:01", "remaining_time": "0:37:05", "throughput": 5635.01, "total_tokens": 6091840}
|
|
{"current_steps": 12390, "total_steps": 37885, "loss": 0.0003, "lr": 1.7021378906036607e-06, "epoch": 1.6352118252606571, "percentage": 32.7, "elapsed_time": "0:18:01", "remaining_time": "0:37:05", "throughput": 5635.44, "total_tokens": 6094144}
|
|
{"current_steps": 12395, "total_steps": 37885, "loss": 0.0003, "lr": 1.7018097801508467e-06, "epoch": 1.6358717170384058, "percentage": 32.72, "elapsed_time": "0:18:01", "remaining_time": "0:37:04", "throughput": 5635.85, "total_tokens": 6096448}
|
|
{"current_steps": 12400, "total_steps": 37885, "loss": 0.1521, "lr": 1.7014815207439884e-06, "epoch": 1.6365316088161541, "percentage": 32.73, "elapsed_time": "0:18:02", "remaining_time": "0:37:03", "throughput": 5636.31, "total_tokens": 6098816}
|
|
{"current_steps": 12405, "total_steps": 37885, "loss": 0.1457, "lr": 1.7011531124527578e-06, "epoch": 1.6371915005939026, "percentage": 32.74, "elapsed_time": "0:18:02", "remaining_time": "0:37:03", "throughput": 5636.9, "total_tokens": 6101312}
|
|
{"current_steps": 12410, "total_steps": 37885, "loss": 0.0004, "lr": 1.7008245553468559e-06, "epoch": 1.6378513923716511, "percentage": 32.76, "elapsed_time": "0:18:02", "remaining_time": "0:37:02", "throughput": 5637.61, "total_tokens": 6103936}
|
|
{"current_steps": 12415, "total_steps": 37885, "loss": 0.0008, "lr": 1.7004958494960173e-06, "epoch": 1.6385112841493994, "percentage": 32.77, "elapsed_time": "0:18:03", "remaining_time": "0:37:01", "throughput": 5638.33, "total_tokens": 6106624}
|
|
{"current_steps": 12420, "total_steps": 37885, "loss": 0.0675, "lr": 1.7001669949700073e-06, "epoch": 1.639171175927148, "percentage": 32.78, "elapsed_time": "0:18:03", "remaining_time": "0:37:01", "throughput": 5639.18, "total_tokens": 6109440}
|
|
{"current_steps": 12425, "total_steps": 37885, "loss": 0.0088, "lr": 1.6998379918386228e-06, "epoch": 1.6398310677048964, "percentage": 32.8, "elapsed_time": "0:18:03", "remaining_time": "0:37:00", "throughput": 5639.54, "total_tokens": 6111680}
|
|
{"current_steps": 12430, "total_steps": 37885, "loss": 0.1804, "lr": 1.6995088401716922e-06, "epoch": 1.640490959482645, "percentage": 32.81, "elapsed_time": "0:18:04", "remaining_time": "0:36:59", "throughput": 5640.07, "total_tokens": 6114112}
|
|
{"current_steps": 12435, "total_steps": 37885, "loss": 0.1808, "lr": 1.6991795400390756e-06, "epoch": 1.6411508512603934, "percentage": 32.82, "elapsed_time": "0:18:04", "remaining_time": "0:36:59", "throughput": 5640.6, "total_tokens": 6116544}
|
|
{"current_steps": 12440, "total_steps": 37885, "loss": 0.001, "lr": 1.698850091510665e-06, "epoch": 1.6418107430381417, "percentage": 32.84, "elapsed_time": "0:18:04", "remaining_time": "0:36:58", "throughput": 5641.22, "total_tokens": 6119104}
|
|
{"current_steps": 12445, "total_steps": 37885, "loss": 0.1505, "lr": 1.6985204946563831e-06, "epoch": 1.6424706348158902, "percentage": 32.85, "elapsed_time": "0:18:05", "remaining_time": "0:36:58", "throughput": 5641.64, "total_tokens": 6121408}
|
|
{"current_steps": 12450, "total_steps": 37885, "loss": 0.0506, "lr": 1.6981907495461845e-06, "epoch": 1.6431305265936387, "percentage": 32.86, "elapsed_time": "0:18:05", "remaining_time": "0:36:57", "throughput": 5642.37, "total_tokens": 6124096}
|
|
{"current_steps": 12455, "total_steps": 37885, "loss": 0.001, "lr": 1.697860856250055e-06, "epoch": 1.643790418371387, "percentage": 32.88, "elapsed_time": "0:18:05", "remaining_time": "0:36:56", "throughput": 5642.6, "total_tokens": 6126208}
|
|
{"current_steps": 12460, "total_steps": 37885, "loss": 0.1091, "lr": 1.6975308148380125e-06, "epoch": 1.6444503101491357, "percentage": 32.89, "elapsed_time": "0:18:06", "remaining_time": "0:36:56", "throughput": 5643.19, "total_tokens": 6128704}
|
|
{"current_steps": 12465, "total_steps": 37885, "loss": 0.0021, "lr": 1.6972006253801055e-06, "epoch": 1.645110201926884, "percentage": 32.9, "elapsed_time": "0:18:06", "remaining_time": "0:36:55", "throughput": 5643.55, "total_tokens": 6130944}
|
|
{"current_steps": 12470, "total_steps": 37885, "loss": 0.1985, "lr": 1.6968702879464148e-06, "epoch": 1.6457700937046325, "percentage": 32.92, "elapsed_time": "0:18:06", "remaining_time": "0:36:54", "throughput": 5643.96, "total_tokens": 6133248}
|
|
{"current_steps": 12475, "total_steps": 37885, "loss": 0.2054, "lr": 1.6965398026070517e-06, "epoch": 1.646429985482381, "percentage": 32.93, "elapsed_time": "0:18:07", "remaining_time": "0:36:54", "throughput": 5644.56, "total_tokens": 6135744}
|
|
{"current_steps": 12480, "total_steps": 37885, "loss": 0.0839, "lr": 1.6962091694321595e-06, "epoch": 1.6470898772601292, "percentage": 32.94, "elapsed_time": "0:18:07", "remaining_time": "0:36:53", "throughput": 5645.03, "total_tokens": 6138112}
|
|
{"current_steps": 12485, "total_steps": 37885, "loss": 0.1114, "lr": 1.6958783884919124e-06, "epoch": 1.647749769037878, "percentage": 32.95, "elapsed_time": "0:18:07", "remaining_time": "0:36:52", "throughput": 5645.61, "total_tokens": 6140608}
|
|
{"current_steps": 12490, "total_steps": 37885, "loss": 0.0034, "lr": 1.6955474598565163e-06, "epoch": 1.6484096608156262, "percentage": 32.97, "elapsed_time": "0:18:08", "remaining_time": "0:36:52", "throughput": 5646.18, "total_tokens": 6143104}
|
|
{"current_steps": 12495, "total_steps": 37885, "loss": 0.0456, "lr": 1.695216383596209e-06, "epoch": 1.6490695525933747, "percentage": 32.98, "elapsed_time": "0:18:08", "remaining_time": "0:36:51", "throughput": 5646.72, "total_tokens": 6145536}
|
|
{"current_steps": 12500, "total_steps": 37885, "loss": 0.0014, "lr": 1.6948851597812586e-06, "epoch": 1.6497294443711232, "percentage": 32.99, "elapsed_time": "0:18:08", "remaining_time": "0:36:50", "throughput": 5647.35, "total_tokens": 6148096}
|
|
{"current_steps": 12505, "total_steps": 37885, "loss": 0.1411, "lr": 1.694553788481965e-06, "epoch": 1.6503893361488715, "percentage": 33.01, "elapsed_time": "0:18:09", "remaining_time": "0:36:50", "throughput": 5648.25, "total_tokens": 6150976}
|
|
{"current_steps": 12510, "total_steps": 37885, "loss": 0.1109, "lr": 1.6942222697686593e-06, "epoch": 1.65104922792662, "percentage": 33.02, "elapsed_time": "0:18:09", "remaining_time": "0:36:49", "throughput": 5648.79, "total_tokens": 6153408}
|
|
{"current_steps": 12515, "total_steps": 37885, "loss": 0.0007, "lr": 1.6938906037117039e-06, "epoch": 1.6517091197043685, "percentage": 33.03, "elapsed_time": "0:18:09", "remaining_time": "0:36:48", "throughput": 5649.47, "total_tokens": 6156032}
|
|
{"current_steps": 12520, "total_steps": 37885, "loss": 0.003, "lr": 1.6935587903814926e-06, "epoch": 1.6523690114821168, "percentage": 33.05, "elapsed_time": "0:18:10", "remaining_time": "0:36:48", "throughput": 5650.25, "total_tokens": 6158784}
|
|
{"current_steps": 12525, "total_steps": 37885, "loss": 0.0005, "lr": 1.6932268298484508e-06, "epoch": 1.6530289032598655, "percentage": 33.06, "elapsed_time": "0:18:10", "remaining_time": "0:36:47", "throughput": 5650.92, "total_tokens": 6161408}
|
|
{"current_steps": 12530, "total_steps": 37885, "loss": 0.0756, "lr": 1.692894722183034e-06, "epoch": 1.6536887950376138, "percentage": 33.07, "elapsed_time": "0:18:10", "remaining_time": "0:36:47", "throughput": 5651.35, "total_tokens": 6163712}
|
|
{"current_steps": 12535, "total_steps": 37885, "loss": 0.0003, "lr": 1.6925624674557298e-06, "epoch": 1.6543486868153623, "percentage": 33.09, "elapsed_time": "0:18:10", "remaining_time": "0:36:46", "throughput": 5651.71, "total_tokens": 6165952}
|
|
{"current_steps": 12540, "total_steps": 37885, "loss": 0.0918, "lr": 1.6922300657370573e-06, "epoch": 1.6550085785931108, "percentage": 33.1, "elapsed_time": "0:18:11", "remaining_time": "0:36:45", "throughput": 5652.08, "total_tokens": 6168192}
|
|
{"current_steps": 12545, "total_steps": 37885, "loss": 0.0002, "lr": 1.691897517097566e-06, "epoch": 1.655668470370859, "percentage": 33.11, "elapsed_time": "0:18:11", "remaining_time": "0:36:45", "throughput": 5652.49, "total_tokens": 6170496}
|
|
{"current_steps": 12550, "total_steps": 37885, "loss": 0.0005, "lr": 1.6915648216078374e-06, "epoch": 1.6563283621486078, "percentage": 33.13, "elapsed_time": "0:18:11", "remaining_time": "0:36:44", "throughput": 5653.23, "total_tokens": 6173184}
|
|
{"current_steps": 12555, "total_steps": 37885, "loss": 0.0884, "lr": 1.691231979338483e-06, "epoch": 1.656988253926356, "percentage": 33.14, "elapsed_time": "0:18:12", "remaining_time": "0:36:43", "throughput": 5653.71, "total_tokens": 6175552}
|
|
{"current_steps": 12560, "total_steps": 37885, "loss": 0.1239, "lr": 1.690898990360146e-06, "epoch": 1.6576481457041046, "percentage": 33.15, "elapsed_time": "0:18:12", "remaining_time": "0:36:43", "throughput": 5654.28, "total_tokens": 6178048}
|
|
{"current_steps": 12565, "total_steps": 37885, "loss": 0.1286, "lr": 1.690565854743502e-06, "epoch": 1.658308037481853, "percentage": 33.17, "elapsed_time": "0:18:12", "remaining_time": "0:36:42", "throughput": 5654.85, "total_tokens": 6180544}
|
|
{"current_steps": 12570, "total_steps": 37885, "loss": 0.0003, "lr": 1.690232572559256e-06, "epoch": 1.6589679292596013, "percentage": 33.18, "elapsed_time": "0:18:13", "remaining_time": "0:36:41", "throughput": 5655.44, "total_tokens": 6183040}
|
|
{"current_steps": 12575, "total_steps": 37885, "loss": 0.1679, "lr": 1.6898991438781445e-06, "epoch": 1.6596278210373498, "percentage": 33.19, "elapsed_time": "0:18:13", "remaining_time": "0:36:41", "throughput": 5656.37, "total_tokens": 6185984}
|
|
{"current_steps": 12580, "total_steps": 37885, "loss": 0.0464, "lr": 1.6895655687709356e-06, "epoch": 1.6602877128150983, "percentage": 33.21, "elapsed_time": "0:18:13", "remaining_time": "0:36:40", "throughput": 5656.97, "total_tokens": 6188480}
|
|
{"current_steps": 12585, "total_steps": 37885, "loss": 0.071, "lr": 1.6892318473084283e-06, "epoch": 1.6609476045928466, "percentage": 33.22, "elapsed_time": "0:18:14", "remaining_time": "0:36:39", "throughput": 5657.65, "total_tokens": 6191104}
|
|
{"current_steps": 12590, "total_steps": 37885, "loss": 0.404, "lr": 1.6888979795614524e-06, "epoch": 1.6616074963705953, "percentage": 33.23, "elapsed_time": "0:18:14", "remaining_time": "0:36:39", "throughput": 5658.28, "total_tokens": 6193664}
|
|
{"current_steps": 12595, "total_steps": 37885, "loss": 0.1, "lr": 1.688563965600869e-06, "epoch": 1.6622673881483436, "percentage": 33.25, "elapsed_time": "0:18:14", "remaining_time": "0:36:38", "throughput": 5659.13, "total_tokens": 6196480}
|
|
{"current_steps": 12600, "total_steps": 37885, "loss": 0.1222, "lr": 1.68822980549757e-06, "epoch": 1.6629272799260921, "percentage": 33.26, "elapsed_time": "0:18:15", "remaining_time": "0:36:37", "throughput": 5659.81, "total_tokens": 6199104}
|
|
{"current_steps": 12605, "total_steps": 37885, "loss": 0.0012, "lr": 1.6878954993224786e-06, "epoch": 1.6635871717038406, "percentage": 33.27, "elapsed_time": "0:18:15", "remaining_time": "0:36:37", "throughput": 5660.11, "total_tokens": 6201280}
|
|
{"current_steps": 12610, "total_steps": 37885, "loss": 0.0029, "lr": 1.687561047146549e-06, "epoch": 1.664247063481589, "percentage": 33.28, "elapsed_time": "0:18:15", "remaining_time": "0:36:36", "throughput": 5660.69, "total_tokens": 6203776}
|
|
{"current_steps": 12615, "total_steps": 37885, "loss": 0.1658, "lr": 1.6872264490407656e-06, "epoch": 1.6649069552593376, "percentage": 33.3, "elapsed_time": "0:18:16", "remaining_time": "0:36:36", "throughput": 5661.43, "total_tokens": 6206464}
|
|
{"current_steps": 12620, "total_steps": 37885, "loss": 0.003, "lr": 1.686891705076145e-06, "epoch": 1.665566847037086, "percentage": 33.31, "elapsed_time": "0:18:16", "remaining_time": "0:36:35", "throughput": 5661.94, "total_tokens": 6208896}
|
|
{"current_steps": 12625, "total_steps": 37885, "loss": 0.061, "lr": 1.6865568153237343e-06, "epoch": 1.6662267388148344, "percentage": 33.32, "elapsed_time": "0:18:16", "remaining_time": "0:36:34", "throughput": 5662.3, "total_tokens": 6211136}
|
|
{"current_steps": 12630, "total_steps": 37885, "loss": 0.0895, "lr": 1.6862217798546115e-06, "epoch": 1.666886630592583, "percentage": 33.34, "elapsed_time": "0:18:17", "remaining_time": "0:36:34", "throughput": 5662.82, "total_tokens": 6213568}
|
|
{"current_steps": 12635, "total_steps": 37885, "loss": 0.2668, "lr": 1.6858865987398847e-06, "epoch": 1.6675465223703312, "percentage": 33.35, "elapsed_time": "0:18:17", "remaining_time": "0:36:33", "throughput": 5663.3, "total_tokens": 6215936}
|
|
{"current_steps": 12640, "total_steps": 37885, "loss": 0.0006, "lr": 1.6855512720506941e-06, "epoch": 1.6682064141480797, "percentage": 33.36, "elapsed_time": "0:18:17", "remaining_time": "0:36:32", "throughput": 5663.99, "total_tokens": 6218560}
|
|
{"current_steps": 12645, "total_steps": 37885, "loss": 0.1121, "lr": 1.6852157998582106e-06, "epoch": 1.6688663059258282, "percentage": 33.38, "elapsed_time": "0:18:18", "remaining_time": "0:36:32", "throughput": 5664.68, "total_tokens": 6221184}
|
|
{"current_steps": 12650, "total_steps": 37885, "loss": 0.0836, "lr": 1.6848801822336355e-06, "epoch": 1.6695261977035765, "percentage": 33.39, "elapsed_time": "0:18:18", "remaining_time": "0:36:31", "throughput": 5665.15, "total_tokens": 6223552}
|
|
{"current_steps": 12655, "total_steps": 37885, "loss": 0.0497, "lr": 1.684544419248201e-06, "epoch": 1.6701860894813252, "percentage": 33.4, "elapsed_time": "0:18:18", "remaining_time": "0:36:30", "throughput": 5665.92, "total_tokens": 6226304}
|
|
{"current_steps": 12660, "total_steps": 37885, "loss": 0.1245, "lr": 1.6842085109731708e-06, "epoch": 1.6708459812590735, "percentage": 33.42, "elapsed_time": "0:18:19", "remaining_time": "0:36:30", "throughput": 5666.53, "total_tokens": 6228864}
|
|
{"current_steps": 12665, "total_steps": 37885, "loss": 0.0763, "lr": 1.6838724574798387e-06, "epoch": 1.671505873036822, "percentage": 33.43, "elapsed_time": "0:18:19", "remaining_time": "0:36:29", "throughput": 5667.25, "total_tokens": 6231552}
|
|
{"current_steps": 12670, "total_steps": 37885, "loss": 0.0017, "lr": 1.6835362588395298e-06, "epoch": 1.6721657648145705, "percentage": 33.44, "elapsed_time": "0:18:19", "remaining_time": "0:36:28", "throughput": 5667.65, "total_tokens": 6233856}
|
|
{"current_steps": 12675, "total_steps": 37885, "loss": 0.0827, "lr": 1.6831999151235995e-06, "epoch": 1.6728256565923187, "percentage": 33.46, "elapsed_time": "0:18:20", "remaining_time": "0:36:28", "throughput": 5668.68, "total_tokens": 6236928}
|
|
{"current_steps": 12680, "total_steps": 37885, "loss": 0.0704, "lr": 1.682863426403435e-06, "epoch": 1.6734855483700675, "percentage": 33.47, "elapsed_time": "0:18:20", "remaining_time": "0:36:27", "throughput": 5669.35, "total_tokens": 6239552}
|
|
{"current_steps": 12685, "total_steps": 37885, "loss": 0.0288, "lr": 1.682526792750453e-06, "epoch": 1.6741454401478157, "percentage": 33.48, "elapsed_time": "0:18:20", "remaining_time": "0:36:27", "throughput": 5669.81, "total_tokens": 6241920}
|
|
{"current_steps": 12690, "total_steps": 37885, "loss": 0.2077, "lr": 1.6821900142361015e-06, "epoch": 1.6748053319255642, "percentage": 33.5, "elapsed_time": "0:18:21", "remaining_time": "0:36:26", "throughput": 5670.16, "total_tokens": 6244160}
|
|
{"current_steps": 12695, "total_steps": 37885, "loss": 0.2284, "lr": 1.6818530909318595e-06, "epoch": 1.6754652237033127, "percentage": 33.51, "elapsed_time": "0:18:21", "remaining_time": "0:36:25", "throughput": 5670.79, "total_tokens": 6246720}
|
|
{"current_steps": 12700, "total_steps": 37885, "loss": 0.0043, "lr": 1.6815160229092367e-06, "epoch": 1.676125115481061, "percentage": 33.52, "elapsed_time": "0:18:21", "remaining_time": "0:36:25", "throughput": 5671.26, "total_tokens": 6249088}
|
|
{"current_steps": 12705, "total_steps": 37885, "loss": 0.2497, "lr": 1.6811788102397733e-06, "epoch": 1.6767850072588095, "percentage": 33.54, "elapsed_time": "0:18:22", "remaining_time": "0:36:24", "throughput": 5671.73, "total_tokens": 6251456}
|
|
{"current_steps": 12710, "total_steps": 37885, "loss": 0.0581, "lr": 1.68084145299504e-06, "epoch": 1.677444899036558, "percentage": 33.55, "elapsed_time": "0:18:22", "remaining_time": "0:36:23", "throughput": 5672.54, "total_tokens": 6254272}
|
|
{"current_steps": 12715, "total_steps": 37885, "loss": 0.0853, "lr": 1.6805039512466385e-06, "epoch": 1.6781047908143063, "percentage": 33.56, "elapsed_time": "0:18:22", "remaining_time": "0:36:23", "throughput": 5673.05, "total_tokens": 6256704}
|
|
{"current_steps": 12720, "total_steps": 37885, "loss": 0.0476, "lr": 1.6801663050662012e-06, "epoch": 1.678764682592055, "percentage": 33.58, "elapsed_time": "0:18:23", "remaining_time": "0:36:22", "throughput": 5673.52, "total_tokens": 6259072}
|
|
{"current_steps": 12725, "total_steps": 37885, "loss": 0.0021, "lr": 1.6798285145253907e-06, "epoch": 1.6794245743698033, "percentage": 33.59, "elapsed_time": "0:18:23", "remaining_time": "0:36:21", "throughput": 5674.13, "total_tokens": 6261632}
|
|
{"current_steps": 12730, "total_steps": 37885, "loss": 0.047, "lr": 1.6794905796959017e-06, "epoch": 1.6800844661475518, "percentage": 33.6, "elapsed_time": "0:18:23", "remaining_time": "0:36:21", "throughput": 5674.76, "total_tokens": 6264192}
|
|
{"current_steps": 12735, "total_steps": 37885, "loss": 0.0419, "lr": 1.6791525006494572e-06, "epoch": 1.6807443579253003, "percentage": 33.61, "elapsed_time": "0:18:24", "remaining_time": "0:36:20", "throughput": 5675.27, "total_tokens": 6266624}
|
|
{"current_steps": 12740, "total_steps": 37885, "loss": 0.3103, "lr": 1.6788142774578126e-06, "epoch": 1.6814042497030486, "percentage": 33.63, "elapsed_time": "0:18:24", "remaining_time": "0:36:20", "throughput": 5675.78, "total_tokens": 6269056}
|
|
{"current_steps": 12745, "total_steps": 37885, "loss": 0.0926, "lr": 1.678475910192753e-06, "epoch": 1.6820641414807973, "percentage": 33.64, "elapsed_time": "0:18:24", "remaining_time": "0:36:19", "throughput": 5676.13, "total_tokens": 6271296}
|
|
{"current_steps": 12750, "total_steps": 37885, "loss": 0.2126, "lr": 1.6781373989260948e-06, "epoch": 1.6827240332585456, "percentage": 33.65, "elapsed_time": "0:18:25", "remaining_time": "0:36:18", "throughput": 5676.53, "total_tokens": 6273600}
|
|
{"current_steps": 12755, "total_steps": 37885, "loss": 0.0928, "lr": 1.6777987437296842e-06, "epoch": 1.683383925036294, "percentage": 33.67, "elapsed_time": "0:18:25", "remaining_time": "0:36:18", "throughput": 5677.14, "total_tokens": 6276160}
|
|
{"current_steps": 12760, "total_steps": 37885, "loss": 0.0657, "lr": 1.6774599446753984e-06, "epoch": 1.6840438168140426, "percentage": 33.68, "elapsed_time": "0:18:25", "remaining_time": "0:36:17", "throughput": 5677.77, "total_tokens": 6278720}
|
|
{"current_steps": 12765, "total_steps": 37885, "loss": 0.2448, "lr": 1.6771210018351453e-06, "epoch": 1.6847037085917909, "percentage": 33.69, "elapsed_time": "0:18:26", "remaining_time": "0:36:16", "throughput": 5678.29, "total_tokens": 6281152}
|
|
{"current_steps": 12770, "total_steps": 37885, "loss": 0.0394, "lr": 1.6767819152808627e-06, "epoch": 1.6853636003695394, "percentage": 33.71, "elapsed_time": "0:18:26", "remaining_time": "0:36:16", "throughput": 5678.65, "total_tokens": 6283392}
|
|
{"current_steps": 12775, "total_steps": 37885, "loss": 0.064, "lr": 1.6764426850845194e-06, "epoch": 1.6860234921472879, "percentage": 33.72, "elapsed_time": "0:18:26", "remaining_time": "0:36:15", "throughput": 5678.88, "total_tokens": 6285504}
|
|
{"current_steps": 12780, "total_steps": 37885, "loss": 0.306, "lr": 1.676103311318115e-06, "epoch": 1.6866833839250361, "percentage": 33.73, "elapsed_time": "0:18:27", "remaining_time": "0:36:14", "throughput": 5679.38, "total_tokens": 6287936}
|
|
{"current_steps": 12785, "total_steps": 37885, "loss": 0.0701, "lr": 1.6757637940536787e-06, "epoch": 1.6873432757027849, "percentage": 33.75, "elapsed_time": "0:18:27", "remaining_time": "0:36:14", "throughput": 5679.99, "total_tokens": 6290496}
|
|
{"current_steps": 12790, "total_steps": 37885, "loss": 0.0025, "lr": 1.6754241333632705e-06, "epoch": 1.6880031674805331, "percentage": 33.76, "elapsed_time": "0:18:27", "remaining_time": "0:36:13", "throughput": 5680.6, "total_tokens": 6293056}
|
|
{"current_steps": 12795, "total_steps": 37885, "loss": 0.0408, "lr": 1.6750843293189806e-06, "epoch": 1.6886630592582816, "percentage": 33.77, "elapsed_time": "0:18:28", "remaining_time": "0:36:12", "throughput": 5681.1, "total_tokens": 6295488}
|
|
{"current_steps": 12800, "total_steps": 37885, "loss": 0.0907, "lr": 1.674744381992931e-06, "epoch": 1.6893229510360301, "percentage": 33.79, "elapsed_time": "0:18:28", "remaining_time": "0:36:12", "throughput": 5681.55, "total_tokens": 6297856}
|
|
{"current_steps": 12805, "total_steps": 37885, "loss": 0.18, "lr": 1.674404291457272e-06, "epoch": 1.6899828428137784, "percentage": 33.8, "elapsed_time": "0:18:28", "remaining_time": "0:36:11", "throughput": 5681.95, "total_tokens": 6300160}
|
|
{"current_steps": 12810, "total_steps": 37885, "loss": 0.1037, "lr": 1.6740640577841862e-06, "epoch": 1.6906427345915271, "percentage": 33.81, "elapsed_time": "0:18:29", "remaining_time": "0:36:11", "throughput": 5682.77, "total_tokens": 6302976}
|
|
{"current_steps": 12815, "total_steps": 37885, "loss": 0.003, "lr": 1.673723681045885e-06, "epoch": 1.6913026263692754, "percentage": 33.83, "elapsed_time": "0:18:29", "remaining_time": "0:36:10", "throughput": 5683.27, "total_tokens": 6305408}
|
|
{"current_steps": 12820, "total_steps": 37885, "loss": 0.001, "lr": 1.6733831613146113e-06, "epoch": 1.691962518147024, "percentage": 33.84, "elapsed_time": "0:18:29", "remaining_time": "0:36:09", "throughput": 5684.05, "total_tokens": 6308160}
|
|
{"current_steps": 12825, "total_steps": 37885, "loss": 0.101, "lr": 1.673042498662638e-06, "epoch": 1.6926224099247724, "percentage": 33.85, "elapsed_time": "0:18:30", "remaining_time": "0:36:09", "throughput": 5684.62, "total_tokens": 6310656}
|
|
{"current_steps": 12830, "total_steps": 37885, "loss": 0.0167, "lr": 1.672701693162268e-06, "epoch": 1.6932823017025207, "percentage": 33.87, "elapsed_time": "0:18:30", "remaining_time": "0:36:08", "throughput": 5685.07, "total_tokens": 6313024}
|
|
{"current_steps": 12835, "total_steps": 37885, "loss": 0.0606, "lr": 1.672360744885835e-06, "epoch": 1.6939421934802692, "percentage": 33.88, "elapsed_time": "0:18:30", "remaining_time": "0:36:07", "throughput": 5685.67, "total_tokens": 6315584}
|
|
{"current_steps": 12840, "total_steps": 37885, "loss": 0.0008, "lr": 1.6720196539057025e-06, "epoch": 1.6946020852580177, "percentage": 33.89, "elapsed_time": "0:18:31", "remaining_time": "0:36:07", "throughput": 5686.18, "total_tokens": 6318016}
|
|
{"current_steps": 12845, "total_steps": 37885, "loss": 0.2314, "lr": 1.671678420294265e-06, "epoch": 1.695261977035766, "percentage": 33.91, "elapsed_time": "0:18:31", "remaining_time": "0:36:06", "throughput": 5687.04, "total_tokens": 6320896}
|
|
{"current_steps": 12850, "total_steps": 37885, "loss": 0.0503, "lr": 1.6713370441239469e-06, "epoch": 1.6959218688135147, "percentage": 33.92, "elapsed_time": "0:18:31", "remaining_time": "0:36:06", "throughput": 5687.55, "total_tokens": 6323328}
|
|
{"current_steps": 12855, "total_steps": 37885, "loss": 0.1382, "lr": 1.6709955254672026e-06, "epoch": 1.696581760591263, "percentage": 33.93, "elapsed_time": "0:18:32", "remaining_time": "0:36:05", "throughput": 5688.06, "total_tokens": 6325760}
|
|
{"current_steps": 12860, "total_steps": 37885, "loss": 0.066, "lr": 1.670653864396517e-06, "epoch": 1.6972416523690115, "percentage": 33.94, "elapsed_time": "0:18:32", "remaining_time": "0:36:04", "throughput": 5688.6, "total_tokens": 6328256}
|
|
{"current_steps": 12865, "total_steps": 37885, "loss": 0.1217, "lr": 1.670312060984405e-06, "epoch": 1.69790154414676, "percentage": 33.96, "elapsed_time": "0:18:32", "remaining_time": "0:36:04", "throughput": 5689.21, "total_tokens": 6330816}
|
|
{"current_steps": 12870, "total_steps": 37885, "loss": 0.1156, "lr": 1.669970115303412e-06, "epoch": 1.6985614359245083, "percentage": 33.97, "elapsed_time": "0:18:33", "remaining_time": "0:36:03", "throughput": 5689.65, "total_tokens": 6333184}
|
|
{"current_steps": 12875, "total_steps": 37885, "loss": 0.0019, "lr": 1.6696280274261137e-06, "epoch": 1.699221327702257, "percentage": 33.98, "elapsed_time": "0:18:33", "remaining_time": "0:36:02", "throughput": 5690.24, "total_tokens": 6335744}
|
|
{"current_steps": 12880, "total_steps": 37885, "loss": 0.1754, "lr": 1.6692857974251156e-06, "epoch": 1.6998812194800053, "percentage": 34.0, "elapsed_time": "0:18:33", "remaining_time": "0:36:02", "throughput": 5690.81, "total_tokens": 6338240}
|
|
{"current_steps": 12885, "total_steps": 37885, "loss": 0.0014, "lr": 1.668943425373054e-06, "epoch": 1.7005411112577538, "percentage": 34.01, "elapsed_time": "0:18:34", "remaining_time": "0:36:02", "throughput": 5689.98, "total_tokens": 6340672}
|
|
{"current_steps": 12890, "total_steps": 37885, "loss": 0.0742, "lr": 1.668600911342594e-06, "epoch": 1.7012010030355023, "percentage": 34.02, "elapsed_time": "0:18:34", "remaining_time": "0:36:01", "throughput": 5690.49, "total_tokens": 6343104}
|
|
{"current_steps": 12895, "total_steps": 37885, "loss": 0.0573, "lr": 1.668258255406432e-06, "epoch": 1.7018608948132505, "percentage": 34.04, "elapsed_time": "0:18:35", "remaining_time": "0:36:00", "throughput": 5691.25, "total_tokens": 6345856}
|
|
{"current_steps": 12900, "total_steps": 37885, "loss": 0.1826, "lr": 1.6679154576372949e-06, "epoch": 1.702520786590999, "percentage": 34.05, "elapsed_time": "0:18:35", "remaining_time": "0:36:00", "throughput": 5691.67, "total_tokens": 6348224}
|
|
{"current_steps": 12905, "total_steps": 37885, "loss": 0.1554, "lr": 1.6675725181079384e-06, "epoch": 1.7031806783687475, "percentage": 34.06, "elapsed_time": "0:18:35", "remaining_time": "0:35:59", "throughput": 5692.27, "total_tokens": 6350784}
|
|
{"current_steps": 12910, "total_steps": 37885, "loss": 0.0764, "lr": 1.6672294368911493e-06, "epoch": 1.7038405701464958, "percentage": 34.08, "elapsed_time": "0:18:36", "remaining_time": "0:35:58", "throughput": 5692.86, "total_tokens": 6353344}
|
|
{"current_steps": 12915, "total_steps": 37885, "loss": 0.0498, "lr": 1.6668862140597434e-06, "epoch": 1.7045004619242445, "percentage": 34.09, "elapsed_time": "0:18:36", "remaining_time": "0:35:58", "throughput": 5693.2, "total_tokens": 6355584}
|
|
{"current_steps": 12920, "total_steps": 37885, "loss": 0.0021, "lr": 1.6665428496865684e-06, "epoch": 1.7051603537019928, "percentage": 34.1, "elapsed_time": "0:18:36", "remaining_time": "0:35:57", "throughput": 5693.84, "total_tokens": 6358208}
|
|
{"current_steps": 12925, "total_steps": 37885, "loss": 0.1555, "lr": 1.6661993438445e-06, "epoch": 1.7058202454797413, "percentage": 34.12, "elapsed_time": "0:18:37", "remaining_time": "0:35:57", "throughput": 5694.28, "total_tokens": 6360576}
|
|
{"current_steps": 12930, "total_steps": 37885, "loss": 0.0649, "lr": 1.665855696606445e-06, "epoch": 1.7064801372574898, "percentage": 34.13, "elapsed_time": "0:18:37", "remaining_time": "0:35:56", "throughput": 5694.73, "total_tokens": 6362944}
|
|
{"current_steps": 12935, "total_steps": 37885, "loss": 0.1484, "lr": 1.6655119080453402e-06, "epoch": 1.707140029035238, "percentage": 34.14, "elapsed_time": "0:18:37", "remaining_time": "0:35:55", "throughput": 5695.29, "total_tokens": 6365440}
|
|
{"current_steps": 12940, "total_steps": 37885, "loss": 0.1357, "lr": 1.6651679782341524e-06, "epoch": 1.7077999208129868, "percentage": 34.16, "elapsed_time": "0:18:37", "remaining_time": "0:35:55", "throughput": 5695.75, "total_tokens": 6367808}
|
|
{"current_steps": 12945, "total_steps": 37885, "loss": 0.1377, "lr": 1.6648239072458777e-06, "epoch": 1.708459812590735, "percentage": 34.17, "elapsed_time": "0:18:38", "remaining_time": "0:35:54", "throughput": 5696.3, "total_tokens": 6370304}
|
|
{"current_steps": 12950, "total_steps": 37885, "loss": 0.0023, "lr": 1.6644796951535432e-06, "epoch": 1.7091197043684836, "percentage": 34.18, "elapsed_time": "0:18:38", "remaining_time": "0:35:53", "throughput": 5696.64, "total_tokens": 6372544}
|
|
{"current_steps": 12955, "total_steps": 37885, "loss": 0.0224, "lr": 1.664135342030205e-06, "epoch": 1.709779596146232, "percentage": 34.2, "elapsed_time": "0:18:38", "remaining_time": "0:35:53", "throughput": 5697.36, "total_tokens": 6375232}
|
|
{"current_steps": 12960, "total_steps": 37885, "loss": 0.0619, "lr": 1.6637908479489496e-06, "epoch": 1.7104394879239804, "percentage": 34.21, "elapsed_time": "0:18:39", "remaining_time": "0:35:52", "throughput": 5697.86, "total_tokens": 6377664}
|
|
{"current_steps": 12965, "total_steps": 37885, "loss": 0.1494, "lr": 1.6634462129828938e-06, "epoch": 1.7110993797017289, "percentage": 34.22, "elapsed_time": "0:18:39", "remaining_time": "0:35:52", "throughput": 5698.29, "total_tokens": 6380032}
|
|
{"current_steps": 12970, "total_steps": 37885, "loss": 0.0607, "lr": 1.6631014372051836e-06, "epoch": 1.7117592714794774, "percentage": 34.24, "elapsed_time": "0:18:39", "remaining_time": "0:35:51", "throughput": 5698.78, "total_tokens": 6382464}
|
|
{"current_steps": 12975, "total_steps": 37885, "loss": 0.1611, "lr": 1.6627565206889953e-06, "epoch": 1.7124191632572257, "percentage": 34.25, "elapsed_time": "0:18:40", "remaining_time": "0:35:50", "throughput": 5698.95, "total_tokens": 6384512}
|
|
{"current_steps": 12980, "total_steps": 37885, "loss": 0.0027, "lr": 1.6624114635075344e-06, "epoch": 1.7130790550349744, "percentage": 34.26, "elapsed_time": "0:18:40", "remaining_time": "0:35:50", "throughput": 5699.56, "total_tokens": 6387072}
|
|
{"current_steps": 12985, "total_steps": 37885, "loss": 0.0497, "lr": 1.6620662657340371e-06, "epoch": 1.7137389468127227, "percentage": 34.27, "elapsed_time": "0:18:40", "remaining_time": "0:35:49", "throughput": 5700.21, "total_tokens": 6389696}
|
|
{"current_steps": 12990, "total_steps": 37885, "loss": 0.0436, "lr": 1.66172092744177e-06, "epoch": 1.7143988385904712, "percentage": 34.29, "elapsed_time": "0:18:41", "remaining_time": "0:35:48", "throughput": 5700.67, "total_tokens": 6392064}
|
|
{"current_steps": 12995, "total_steps": 37885, "loss": 0.0485, "lr": 1.661375448704027e-06, "epoch": 1.7150587303682197, "percentage": 34.3, "elapsed_time": "0:18:41", "remaining_time": "0:35:48", "throughput": 5701.42, "total_tokens": 6394816}
|
|
{"current_steps": 13000, "total_steps": 37885, "loss": 0.1353, "lr": 1.6610298295941347e-06, "epoch": 1.715718622145968, "percentage": 34.31, "elapsed_time": "0:18:41", "remaining_time": "0:35:47", "throughput": 5702.02, "total_tokens": 6397376}
|
|
{"current_steps": 13005, "total_steps": 37885, "loss": 0.1308, "lr": 1.6606840701854476e-06, "epoch": 1.7163785139237167, "percentage": 34.33, "elapsed_time": "0:18:42", "remaining_time": "0:35:47", "throughput": 5702.61, "total_tokens": 6399936}
|
|
{"current_steps": 13010, "total_steps": 37885, "loss": 0.1169, "lr": 1.660338170551351e-06, "epoch": 1.717038405701465, "percentage": 34.34, "elapsed_time": "0:18:42", "remaining_time": "0:35:46", "throughput": 5703.25, "total_tokens": 6402560}
|
|
{"current_steps": 13015, "total_steps": 37885, "loss": 0.081, "lr": 1.6599921307652598e-06, "epoch": 1.7176982974792134, "percentage": 34.35, "elapsed_time": "0:18:42", "remaining_time": "0:35:45", "throughput": 5703.69, "total_tokens": 6404928}
|
|
{"current_steps": 13020, "total_steps": 37885, "loss": 0.0065, "lr": 1.659645950900618e-06, "epoch": 1.718358189256962, "percentage": 34.37, "elapsed_time": "0:18:43", "remaining_time": "0:35:45", "throughput": 5704.35, "total_tokens": 6407552}
|
|
{"current_steps": 13025, "total_steps": 37885, "loss": 0.1286, "lr": 1.6592996310308997e-06, "epoch": 1.7190180810347102, "percentage": 34.38, "elapsed_time": "0:18:43", "remaining_time": "0:35:44", "throughput": 5705.04, "total_tokens": 6410240}
|
|
{"current_steps": 13030, "total_steps": 37885, "loss": 0.1372, "lr": 1.658953171229609e-06, "epoch": 1.7196779728124587, "percentage": 34.39, "elapsed_time": "0:18:43", "remaining_time": "0:35:43", "throughput": 5705.37, "total_tokens": 6412480}
|
|
{"current_steps": 13035, "total_steps": 37885, "loss": 0.0805, "lr": 1.6586065715702797e-06, "epoch": 1.7203378645902072, "percentage": 34.41, "elapsed_time": "0:18:44", "remaining_time": "0:35:43", "throughput": 5706.01, "total_tokens": 6415104}
|
|
{"current_steps": 13040, "total_steps": 37885, "loss": 0.04, "lr": 1.658259832126475e-06, "epoch": 1.7209977563679555, "percentage": 34.42, "elapsed_time": "0:18:44", "remaining_time": "0:35:42", "throughput": 5706.66, "total_tokens": 6417728}
|
|
{"current_steps": 13045, "total_steps": 37885, "loss": 0.1079, "lr": 1.6579129529717872e-06, "epoch": 1.7216576481457042, "percentage": 34.43, "elapsed_time": "0:18:44", "remaining_time": "0:35:42", "throughput": 5707.45, "total_tokens": 6420544}
|
|
{"current_steps": 13050, "total_steps": 37885, "loss": 0.0819, "lr": 1.6575659341798396e-06, "epoch": 1.7223175399234525, "percentage": 34.45, "elapsed_time": "0:18:45", "remaining_time": "0:35:41", "throughput": 5707.95, "total_tokens": 6422976}
|
|
{"current_steps": 13055, "total_steps": 37885, "loss": 0.1217, "lr": 1.6572187758242842e-06, "epoch": 1.722977431701201, "percentage": 34.46, "elapsed_time": "0:18:45", "remaining_time": "0:35:40", "throughput": 5708.27, "total_tokens": 6425216}
|
|
{"current_steps": 13060, "total_steps": 37885, "loss": 0.3001, "lr": 1.6568714779788024e-06, "epoch": 1.7236373234789495, "percentage": 34.47, "elapsed_time": "0:18:45", "remaining_time": "0:35:40", "throughput": 5708.58, "total_tokens": 6427456}
|
|
{"current_steps": 13065, "total_steps": 37885, "loss": 0.0018, "lr": 1.6565240407171067e-06, "epoch": 1.7242972152566978, "percentage": 34.49, "elapsed_time": "0:18:46", "remaining_time": "0:35:39", "throughput": 5709.03, "total_tokens": 6429824}
|
|
{"current_steps": 13070, "total_steps": 37885, "loss": 0.1615, "lr": 1.6561764641129371e-06, "epoch": 1.7249571070344465, "percentage": 34.5, "elapsed_time": "0:18:46", "remaining_time": "0:35:38", "throughput": 5709.41, "total_tokens": 6432128}
|
|
{"current_steps": 13075, "total_steps": 37885, "loss": 0.1621, "lr": 1.655828748240065e-06, "epoch": 1.7256169988121948, "percentage": 34.51, "elapsed_time": "0:18:46", "remaining_time": "0:35:38", "throughput": 5709.9, "total_tokens": 6434560}
|
|
{"current_steps": 13080, "total_steps": 37885, "loss": 0.0018, "lr": 1.6554808931722902e-06, "epoch": 1.7262768905899433, "percentage": 34.53, "elapsed_time": "0:18:47", "remaining_time": "0:35:37", "throughput": 5710.5, "total_tokens": 6437120}
|
|
{"current_steps": 13085, "total_steps": 37885, "loss": 0.113, "lr": 1.6551328989834423e-06, "epoch": 1.7269367823676918, "percentage": 34.54, "elapsed_time": "0:18:47", "remaining_time": "0:35:37", "throughput": 5711.04, "total_tokens": 6439616}
|
|
{"current_steps": 13090, "total_steps": 37885, "loss": 0.0015, "lr": 1.6547847657473805e-06, "epoch": 1.72759667414544, "percentage": 34.55, "elapsed_time": "0:18:47", "remaining_time": "0:35:36", "throughput": 5711.67, "total_tokens": 6442240}
|
|
{"current_steps": 13095, "total_steps": 37885, "loss": 0.0015, "lr": 1.654436493537994e-06, "epoch": 1.7282565659231885, "percentage": 34.57, "elapsed_time": "0:18:48", "remaining_time": "0:35:35", "throughput": 5712.3, "total_tokens": 6444864}
|
|
{"current_steps": 13100, "total_steps": 37885, "loss": 0.0301, "lr": 1.6540880824292008e-06, "epoch": 1.728916457700937, "percentage": 34.58, "elapsed_time": "0:18:48", "remaining_time": "0:35:35", "throughput": 5712.8, "total_tokens": 6447296}
|
|
{"current_steps": 13105, "total_steps": 37885, "loss": 0.1013, "lr": 1.6537395324949489e-06, "epoch": 1.7295763494786855, "percentage": 34.59, "elapsed_time": "0:18:48", "remaining_time": "0:35:34", "throughput": 5713.02, "total_tokens": 6449408}
|
|
{"current_steps": 13110, "total_steps": 37885, "loss": 0.0005, "lr": 1.6533908438092149e-06, "epoch": 1.730236241256434, "percentage": 34.6, "elapsed_time": "0:18:49", "remaining_time": "0:35:33", "throughput": 5713.61, "total_tokens": 6451968}
|
|
{"current_steps": 13115, "total_steps": 37885, "loss": 0.0571, "lr": 1.6530420164460055e-06, "epoch": 1.7308961330341823, "percentage": 34.62, "elapsed_time": "0:18:49", "remaining_time": "0:35:33", "throughput": 5714.01, "total_tokens": 6454272}
|
|
{"current_steps": 13120, "total_steps": 37885, "loss": 0.0762, "lr": 1.6526930504793576e-06, "epoch": 1.7315560248119308, "percentage": 34.63, "elapsed_time": "0:18:49", "remaining_time": "0:35:32", "throughput": 5714.44, "total_tokens": 6456640}
|
|
{"current_steps": 13125, "total_steps": 37885, "loss": 0.001, "lr": 1.6523439459833357e-06, "epoch": 1.7322159165896793, "percentage": 34.64, "elapsed_time": "0:18:50", "remaining_time": "0:35:32", "throughput": 5714.98, "total_tokens": 6459136}
|
|
{"current_steps": 13130, "total_steps": 37885, "loss": 0.0997, "lr": 1.6519947030320356e-06, "epoch": 1.7328758083674276, "percentage": 34.66, "elapsed_time": "0:18:50", "remaining_time": "0:35:31", "throughput": 5715.31, "total_tokens": 6461376}
|
|
{"current_steps": 13135, "total_steps": 37885, "loss": 0.0785, "lr": 1.651645321699581e-06, "epoch": 1.7335357001451763, "percentage": 34.67, "elapsed_time": "0:18:50", "remaining_time": "0:35:30", "throughput": 5715.9, "total_tokens": 6463936}
|
|
{"current_steps": 13140, "total_steps": 37885, "loss": 0.0607, "lr": 1.6512958020601256e-06, "epoch": 1.7341955919229246, "percentage": 34.68, "elapsed_time": "0:18:51", "remaining_time": "0:35:30", "throughput": 5716.46, "total_tokens": 6466432}
|
|
{"current_steps": 13145, "total_steps": 37885, "loss": 0.0482, "lr": 1.6509461441878527e-06, "epoch": 1.734855483700673, "percentage": 34.7, "elapsed_time": "0:18:51", "remaining_time": "0:35:29", "throughput": 5716.9, "total_tokens": 6468800}
|
|
{"current_steps": 13150, "total_steps": 37885, "loss": 0.0163, "lr": 1.6505963481569745e-06, "epoch": 1.7355153754784216, "percentage": 34.71, "elapsed_time": "0:18:51", "remaining_time": "0:35:28", "throughput": 5717.5, "total_tokens": 6471360}
|
|
{"current_steps": 13155, "total_steps": 37885, "loss": 0.0005, "lr": 1.6502464140417326e-06, "epoch": 1.7361752672561699, "percentage": 34.72, "elapsed_time": "0:18:52", "remaining_time": "0:35:28", "throughput": 5718.04, "total_tokens": 6473856}
|
|
{"current_steps": 13160, "total_steps": 37885, "loss": 0.2147, "lr": 1.6498963419163978e-06, "epoch": 1.7368351590339184, "percentage": 34.74, "elapsed_time": "0:18:52", "remaining_time": "0:35:27", "throughput": 5718.53, "total_tokens": 6476288}
|
|
{"current_steps": 13165, "total_steps": 37885, "loss": 0.1285, "lr": 1.6495461318552708e-06, "epoch": 1.7374950508116669, "percentage": 34.75, "elapsed_time": "0:18:52", "remaining_time": "0:35:27", "throughput": 5718.87, "total_tokens": 6478528}
|
|
{"current_steps": 13170, "total_steps": 37885, "loss": 0.0915, "lr": 1.6491957839326812e-06, "epoch": 1.7381549425894154, "percentage": 34.76, "elapsed_time": "0:18:53", "remaining_time": "0:35:26", "throughput": 5719.42, "total_tokens": 6481024}
|
|
{"current_steps": 13175, "total_steps": 37885, "loss": 0.0941, "lr": 1.6488452982229873e-06, "epoch": 1.7388148343671639, "percentage": 34.78, "elapsed_time": "0:18:53", "remaining_time": "0:35:25", "throughput": 5719.63, "total_tokens": 6483136}
|
|
{"current_steps": 13180, "total_steps": 37885, "loss": 0.127, "lr": 1.6484946748005773e-06, "epoch": 1.7394747261449122, "percentage": 34.79, "elapsed_time": "0:18:53", "remaining_time": "0:35:25", "throughput": 5720.33, "total_tokens": 6485824}
|
|
{"current_steps": 13185, "total_steps": 37885, "loss": 0.0833, "lr": 1.6481439137398688e-06, "epoch": 1.7401346179226607, "percentage": 34.8, "elapsed_time": "0:18:54", "remaining_time": "0:35:24", "throughput": 5720.9, "total_tokens": 6488384}
|
|
{"current_steps": 13190, "total_steps": 37885, "loss": 0.0005, "lr": 1.6477930151153078e-06, "epoch": 1.7407945097004092, "percentage": 34.82, "elapsed_time": "0:18:54", "remaining_time": "0:35:24", "throughput": 5721.59, "total_tokens": 6491072}
|
|
{"current_steps": 13195, "total_steps": 37885, "loss": 0.0576, "lr": 1.6474419790013707e-06, "epoch": 1.7414544014781574, "percentage": 34.83, "elapsed_time": "0:18:54", "remaining_time": "0:35:23", "throughput": 5722.13, "total_tokens": 6493568}
|
|
{"current_steps": 13200, "total_steps": 37885, "loss": 0.0509, "lr": 1.6470908054725617e-06, "epoch": 1.7421142932559062, "percentage": 34.84, "elapsed_time": "0:18:55", "remaining_time": "0:35:22", "throughput": 5722.87, "total_tokens": 6496320}
|
|
{"current_steps": 13205, "total_steps": 37885, "loss": 0.0011, "lr": 1.6467394946034152e-06, "epoch": 1.7427741850336544, "percentage": 34.86, "elapsed_time": "0:18:55", "remaining_time": "0:35:22", "throughput": 5723.19, "total_tokens": 6498560}
|
|
{"current_steps": 13210, "total_steps": 37885, "loss": 0.0763, "lr": 1.6463880464684942e-06, "epoch": 1.743434076811403, "percentage": 34.87, "elapsed_time": "0:18:55", "remaining_time": "0:35:21", "throughput": 5723.62, "total_tokens": 6500928}
|
|
{"current_steps": 13215, "total_steps": 37885, "loss": 0.069, "lr": 1.6460364611423911e-06, "epoch": 1.7440939685891514, "percentage": 34.88, "elapsed_time": "0:18:56", "remaining_time": "0:35:20", "throughput": 5724.15, "total_tokens": 6503424}
|
|
{"current_steps": 13220, "total_steps": 37885, "loss": 0.1996, "lr": 1.6456847386997277e-06, "epoch": 1.7447538603668997, "percentage": 34.9, "elapsed_time": "0:18:56", "remaining_time": "0:35:20", "throughput": 5724.57, "total_tokens": 6505792}
|
|
{"current_steps": 13225, "total_steps": 37885, "loss": 0.0445, "lr": 1.6453328792151537e-06, "epoch": 1.7454137521446482, "percentage": 34.91, "elapsed_time": "0:18:56", "remaining_time": "0:35:19", "throughput": 5725.0, "total_tokens": 6508160}
|
|
{"current_steps": 13230, "total_steps": 37885, "loss": 0.0011, "lr": 1.6449808827633497e-06, "epoch": 1.7460736439223967, "percentage": 34.92, "elapsed_time": "0:18:57", "remaining_time": "0:35:19", "throughput": 5725.8, "total_tokens": 6510976}
|
|
{"current_steps": 13235, "total_steps": 37885, "loss": 0.0272, "lr": 1.6446287494190237e-06, "epoch": 1.7467335357001452, "percentage": 34.93, "elapsed_time": "0:18:57", "remaining_time": "0:35:18", "throughput": 5726.13, "total_tokens": 6513216}
|
|
{"current_steps": 13240, "total_steps": 37885, "loss": 0.0498, "lr": 1.6442764792569136e-06, "epoch": 1.7473934274778937, "percentage": 34.95, "elapsed_time": "0:18:57", "remaining_time": "0:35:17", "throughput": 5726.82, "total_tokens": 6515904}
|
|
{"current_steps": 13245, "total_steps": 37885, "loss": 0.1935, "lr": 1.6439240723517862e-06, "epoch": 1.748053319255642, "percentage": 34.96, "elapsed_time": "0:18:58", "remaining_time": "0:35:17", "throughput": 5727.47, "total_tokens": 6518528}
|
|
{"current_steps": 13250, "total_steps": 37885, "loss": 0.0038, "lr": 1.6435715287784375e-06, "epoch": 1.7487132110333905, "percentage": 34.97, "elapsed_time": "0:18:58", "remaining_time": "0:35:16", "throughput": 5727.96, "total_tokens": 6520960}
|
|
{"current_steps": 13255, "total_steps": 37885, "loss": 0.0558, "lr": 1.643218848611692e-06, "epoch": 1.749373102811139, "percentage": 34.99, "elapsed_time": "0:18:58", "remaining_time": "0:35:16", "throughput": 5728.55, "total_tokens": 6523520}
|
|
{"current_steps": 13260, "total_steps": 37885, "loss": 0.2054, "lr": 1.642866031926404e-06, "epoch": 1.7500329945888873, "percentage": 35.0, "elapsed_time": "0:18:59", "remaining_time": "0:35:15", "throughput": 5729.14, "total_tokens": 6526080}
|
|
{"current_steps": 13265, "total_steps": 37885, "loss": 0.342, "lr": 1.6425130787974558e-06, "epoch": 1.750692886366636, "percentage": 35.01, "elapsed_time": "0:18:59", "remaining_time": "0:35:14", "throughput": 5729.57, "total_tokens": 6528448}
|
|
{"current_steps": 13265, "total_steps": 37885, "eval_loss": 0.11515690386295319, "epoch": 1.750692886366636, "percentage": 35.01, "elapsed_time": "0:19:07", "remaining_time": "0:35:29", "throughput": 5690.44, "total_tokens": 6528448}
|
|
{"current_steps": 13270, "total_steps": 37885, "loss": 0.0822, "lr": 1.6421599892997596e-06, "epoch": 1.7513527781443843, "percentage": 35.03, "elapsed_time": "0:19:44", "remaining_time": "0:36:38", "throughput": 5511.56, "total_tokens": 6531136}
|
|
{"current_steps": 13275, "total_steps": 37885, "loss": 0.2525, "lr": 1.6418067635082555e-06, "epoch": 1.7520126699221328, "percentage": 35.04, "elapsed_time": "0:19:45", "remaining_time": "0:36:37", "throughput": 5512.27, "total_tokens": 6533824}
|
|
{"current_steps": 13280, "total_steps": 37885, "loss": 0.1643, "lr": 1.6414534014979138e-06, "epoch": 1.7526725616998813, "percentage": 35.05, "elapsed_time": "0:19:45", "remaining_time": "0:36:36", "throughput": 5512.78, "total_tokens": 6536256}
|
|
{"current_steps": 13285, "total_steps": 37885, "loss": 0.0185, "lr": 1.6410999033437323e-06, "epoch": 1.7533324534776296, "percentage": 35.07, "elapsed_time": "0:19:45", "remaining_time": "0:36:36", "throughput": 5513.29, "total_tokens": 6538688}
|
|
{"current_steps": 13290, "total_steps": 37885, "loss": 0.0024, "lr": 1.640746269120739e-06, "epoch": 1.7539923452553783, "percentage": 35.08, "elapsed_time": "0:19:46", "remaining_time": "0:36:35", "throughput": 5514.0, "total_tokens": 6541376}
|
|
{"current_steps": 13295, "total_steps": 37885, "loss": 0.001, "lr": 1.6403924989039899e-06, "epoch": 1.7546522370331266, "percentage": 35.09, "elapsed_time": "0:19:46", "remaining_time": "0:36:34", "throughput": 5514.45, "total_tokens": 6543744}
|
|
{"current_steps": 13300, "total_steps": 37885, "loss": 0.0786, "lr": 1.6400385927685706e-06, "epoch": 1.755312128810875, "percentage": 35.11, "elapsed_time": "0:19:46", "remaining_time": "0:36:34", "throughput": 5514.91, "total_tokens": 6546112}
|
|
{"current_steps": 13305, "total_steps": 37885, "loss": 0.0008, "lr": 1.6396845507895942e-06, "epoch": 1.7559720205886236, "percentage": 35.12, "elapsed_time": "0:19:47", "remaining_time": "0:36:33", "throughput": 5515.43, "total_tokens": 6548544}
|
|
{"current_steps": 13310, "total_steps": 37885, "loss": 0.0397, "lr": 1.6393303730422046e-06, "epoch": 1.7566319123663718, "percentage": 35.13, "elapsed_time": "0:19:47", "remaining_time": "0:36:32", "throughput": 5515.94, "total_tokens": 6550976}
|
|
{"current_steps": 13315, "total_steps": 37885, "loss": 0.027, "lr": 1.6389760596015727e-06, "epoch": 1.7572918041441203, "percentage": 35.15, "elapsed_time": "0:19:47", "remaining_time": "0:36:32", "throughput": 5516.56, "total_tokens": 6553536}
|
|
{"current_steps": 13320, "total_steps": 37885, "loss": 0.0348, "lr": 1.6386216105428993e-06, "epoch": 1.7579516959218688, "percentage": 35.16, "elapsed_time": "0:19:48", "remaining_time": "0:36:31", "throughput": 5517.22, "total_tokens": 6556160}
|
|
{"current_steps": 13325, "total_steps": 37885, "loss": 0.0428, "lr": 1.6382670259414138e-06, "epoch": 1.7586115876996171, "percentage": 35.17, "elapsed_time": "0:19:48", "remaining_time": "0:36:30", "throughput": 5517.53, "total_tokens": 6558336}
|
|
{"current_steps": 13330, "total_steps": 37885, "loss": 0.1086, "lr": 1.637912305872374e-06, "epoch": 1.7592714794773658, "percentage": 35.19, "elapsed_time": "0:19:48", "remaining_time": "0:36:30", "throughput": 5518.17, "total_tokens": 6560960}
|
|
{"current_steps": 13335, "total_steps": 37885, "loss": 0.0529, "lr": 1.6375574504110664e-06, "epoch": 1.7599313712551141, "percentage": 35.2, "elapsed_time": "0:19:49", "remaining_time": "0:36:29", "throughput": 5518.53, "total_tokens": 6563200}
|
|
{"current_steps": 13340, "total_steps": 37885, "loss": 0.0012, "lr": 1.637202459632807e-06, "epoch": 1.7605912630328626, "percentage": 35.21, "elapsed_time": "0:19:49", "remaining_time": "0:36:28", "throughput": 5519.42, "total_tokens": 6566144}
|
|
{"current_steps": 13345, "total_steps": 37885, "loss": 0.1122, "lr": 1.6368473336129395e-06, "epoch": 1.7612511548106111, "percentage": 35.23, "elapsed_time": "0:19:49", "remaining_time": "0:36:28", "throughput": 5519.89, "total_tokens": 6568512}
|
|
{"current_steps": 13350, "total_steps": 37885, "loss": 0.1335, "lr": 1.6364920724268374e-06, "epoch": 1.7619110465883594, "percentage": 35.24, "elapsed_time": "0:19:50", "remaining_time": "0:36:27", "throughput": 5520.2, "total_tokens": 6570688}
|
|
{"current_steps": 13355, "total_steps": 37885, "loss": 0.2444, "lr": 1.6361366761499023e-06, "epoch": 1.7625709383661081, "percentage": 35.25, "elapsed_time": "0:19:50", "remaining_time": "0:36:26", "throughput": 5520.91, "total_tokens": 6573376}
|
|
{"current_steps": 13360, "total_steps": 37885, "loss": 0.0877, "lr": 1.6357811448575638e-06, "epoch": 1.7632308301438564, "percentage": 35.26, "elapsed_time": "0:19:50", "remaining_time": "0:36:26", "throughput": 5521.15, "total_tokens": 6575488}
|
|
{"current_steps": 13365, "total_steps": 37885, "loss": 0.0697, "lr": 1.6354254786252813e-06, "epoch": 1.763890721921605, "percentage": 35.28, "elapsed_time": "0:19:51", "remaining_time": "0:36:25", "throughput": 5521.62, "total_tokens": 6577856}
|
|
{"current_steps": 13370, "total_steps": 37885, "loss": 0.0228, "lr": 1.6350696775285425e-06, "epoch": 1.7645506136993534, "percentage": 35.29, "elapsed_time": "0:19:51", "remaining_time": "0:36:24", "throughput": 5521.97, "total_tokens": 6580096}
|
|
{"current_steps": 13375, "total_steps": 37885, "loss": 0.0148, "lr": 1.6347137416428637e-06, "epoch": 1.7652105054771017, "percentage": 35.3, "elapsed_time": "0:19:51", "remaining_time": "0:36:24", "throughput": 5522.52, "total_tokens": 6582592}
|
|
{"current_steps": 13380, "total_steps": 37885, "loss": 0.1398, "lr": 1.634357671043789e-06, "epoch": 1.7658703972548502, "percentage": 35.32, "elapsed_time": "0:19:52", "remaining_time": "0:36:23", "throughput": 5523.19, "total_tokens": 6585216}
|
|
{"current_steps": 13385, "total_steps": 37885, "loss": 0.0012, "lr": 1.6340014658068923e-06, "epoch": 1.7665302890325987, "percentage": 35.33, "elapsed_time": "0:19:52", "remaining_time": "0:36:22", "throughput": 5523.98, "total_tokens": 6588032}
|
|
{"current_steps": 13390, "total_steps": 37885, "loss": 0.0724, "lr": 1.6336451260077757e-06, "epoch": 1.767190180810347, "percentage": 35.34, "elapsed_time": "0:19:52", "remaining_time": "0:36:22", "throughput": 5524.54, "total_tokens": 6590528}
|
|
{"current_steps": 13395, "total_steps": 37885, "loss": 0.1463, "lr": 1.6332886517220694e-06, "epoch": 1.7678500725880957, "percentage": 35.36, "elapsed_time": "0:19:53", "remaining_time": "0:36:21", "throughput": 5525.03, "total_tokens": 6592960}
|
|
{"current_steps": 13400, "total_steps": 37885, "loss": 0.0916, "lr": 1.632932043025433e-06, "epoch": 1.768509964365844, "percentage": 35.37, "elapsed_time": "0:19:53", "remaining_time": "0:36:21", "throughput": 5525.33, "total_tokens": 6595136}
|
|
{"current_steps": 13405, "total_steps": 37885, "loss": 0.2149, "lr": 1.6325752999935539e-06, "epoch": 1.7691698561435925, "percentage": 35.38, "elapsed_time": "0:19:53", "remaining_time": "0:36:20", "throughput": 5526.03, "total_tokens": 6597824}
|
|
{"current_steps": 13410, "total_steps": 37885, "loss": 0.0036, "lr": 1.6322184227021479e-06, "epoch": 1.769829747921341, "percentage": 35.4, "elapsed_time": "0:19:54", "remaining_time": "0:36:19", "throughput": 5526.45, "total_tokens": 6600128}
|
|
{"current_steps": 13415, "total_steps": 37885, "loss": 0.0014, "lr": 1.6318614112269598e-06, "epoch": 1.7704896396990892, "percentage": 35.41, "elapsed_time": "0:19:54", "remaining_time": "0:36:19", "throughput": 5527.26, "total_tokens": 6602944}
|
|
{"current_steps": 13420, "total_steps": 37885, "loss": 0.1428, "lr": 1.631504265643763e-06, "epoch": 1.771149531476838, "percentage": 35.42, "elapsed_time": "0:19:54", "remaining_time": "0:36:18", "throughput": 5527.62, "total_tokens": 6605184}
|
|
{"current_steps": 13425, "total_steps": 37885, "loss": 0.0775, "lr": 1.6311469860283584e-06, "epoch": 1.7718094232545862, "percentage": 35.44, "elapsed_time": "0:19:55", "remaining_time": "0:36:17", "throughput": 5528.19, "total_tokens": 6607680}
|
|
{"current_steps": 13430, "total_steps": 37885, "loss": 0.0653, "lr": 1.6307895724565768e-06, "epoch": 1.7724693150323347, "percentage": 35.45, "elapsed_time": "0:19:55", "remaining_time": "0:36:17", "throughput": 5528.64, "total_tokens": 6610048}
|
|
{"current_steps": 13435, "total_steps": 37885, "loss": 0.0665, "lr": 1.6304320250042761e-06, "epoch": 1.7731292068100832, "percentage": 35.46, "elapsed_time": "0:19:55", "remaining_time": "0:36:16", "throughput": 5529.16, "total_tokens": 6612480}
|
|
{"current_steps": 13440, "total_steps": 37885, "loss": 0.0553, "lr": 1.6300743437473434e-06, "epoch": 1.7737890985878315, "percentage": 35.48, "elapsed_time": "0:19:56", "remaining_time": "0:36:15", "throughput": 5529.57, "total_tokens": 6614784}
|
|
{"current_steps": 13445, "total_steps": 37885, "loss": 0.0466, "lr": 1.6297165287616936e-06, "epoch": 1.77444899036558, "percentage": 35.49, "elapsed_time": "0:19:56", "remaining_time": "0:36:15", "throughput": 5530.03, "total_tokens": 6617152}
|
|
{"current_steps": 13450, "total_steps": 37885, "loss": 0.2752, "lr": 1.629358580123271e-06, "epoch": 1.7751088821433285, "percentage": 35.5, "elapsed_time": "0:19:56", "remaining_time": "0:36:14", "throughput": 5530.56, "total_tokens": 6619648}
|
|
{"current_steps": 13455, "total_steps": 37885, "loss": 0.0518, "lr": 1.6290004979080473e-06, "epoch": 1.7757687739210768, "percentage": 35.52, "elapsed_time": "0:19:57", "remaining_time": "0:36:13", "throughput": 5531.02, "total_tokens": 6622016}
|
|
{"current_steps": 13460, "total_steps": 37885, "loss": 0.1611, "lr": 1.6286422821920222e-06, "epoch": 1.7764286656988255, "percentage": 35.53, "elapsed_time": "0:19:57", "remaining_time": "0:36:13", "throughput": 5531.46, "total_tokens": 6624384}
|
|
{"current_steps": 13465, "total_steps": 37885, "loss": 0.1648, "lr": 1.6282839330512252e-06, "epoch": 1.7770885574765738, "percentage": 35.54, "elapsed_time": "0:19:57", "remaining_time": "0:36:12", "throughput": 5532.03, "total_tokens": 6626880}
|
|
{"current_steps": 13470, "total_steps": 37885, "loss": 0.0707, "lr": 1.6279254505617134e-06, "epoch": 1.7777484492543223, "percentage": 35.55, "elapsed_time": "0:19:58", "remaining_time": "0:36:11", "throughput": 5532.88, "total_tokens": 6629760}
|
|
{"current_steps": 13475, "total_steps": 37885, "loss": 0.0006, "lr": 1.6275668347995714e-06, "epoch": 1.7784083410320708, "percentage": 35.57, "elapsed_time": "0:19:58", "remaining_time": "0:36:11", "throughput": 5533.67, "total_tokens": 6632576}
|
|
{"current_steps": 13480, "total_steps": 37885, "loss": 0.0429, "lr": 1.6272080858409138e-06, "epoch": 1.779068232809819, "percentage": 35.58, "elapsed_time": "0:19:58", "remaining_time": "0:36:10", "throughput": 5534.28, "total_tokens": 6635136}
|
|
{"current_steps": 13485, "total_steps": 37885, "loss": 0.0012, "lr": 1.6268492037618815e-06, "epoch": 1.7797281245875678, "percentage": 35.59, "elapsed_time": "0:19:59", "remaining_time": "0:36:09", "throughput": 5534.68, "total_tokens": 6637440}
|
|
{"current_steps": 13490, "total_steps": 37885, "loss": 0.0592, "lr": 1.6264901886386448e-06, "epoch": 1.780388016365316, "percentage": 35.61, "elapsed_time": "0:19:59", "remaining_time": "0:36:09", "throughput": 5535.53, "total_tokens": 6640320}
|
|
{"current_steps": 13495, "total_steps": 37885, "loss": 0.1364, "lr": 1.6261310405474022e-06, "epoch": 1.7810479081430646, "percentage": 35.62, "elapsed_time": "0:19:59", "remaining_time": "0:36:08", "throughput": 5536.32, "total_tokens": 6643136}
|
|
{"current_steps": 13500, "total_steps": 37885, "loss": 0.0181, "lr": 1.6257717595643807e-06, "epoch": 1.781707799920813, "percentage": 35.63, "elapsed_time": "0:20:00", "remaining_time": "0:36:07", "throughput": 5536.85, "total_tokens": 6645568}
|
|
{"current_steps": 13505, "total_steps": 37885, "loss": 0.1195, "lr": 1.6254123457658346e-06, "epoch": 1.7823676916985614, "percentage": 35.65, "elapsed_time": "0:20:00", "remaining_time": "0:36:07", "throughput": 5537.47, "total_tokens": 6648128}
|
|
{"current_steps": 13510, "total_steps": 37885, "loss": 0.0976, "lr": 1.625052799228047e-06, "epoch": 1.7830275834763099, "percentage": 35.66, "elapsed_time": "0:20:00", "remaining_time": "0:36:06", "throughput": 5538.23, "total_tokens": 6650880}
|
|
{"current_steps": 13515, "total_steps": 37885, "loss": 0.0987, "lr": 1.624693120027329e-06, "epoch": 1.7836874752540584, "percentage": 35.67, "elapsed_time": "0:20:01", "remaining_time": "0:36:06", "throughput": 5538.86, "total_tokens": 6653504}
|
|
{"current_steps": 13520, "total_steps": 37885, "loss": 0.0598, "lr": 1.6243333082400197e-06, "epoch": 1.7843473670318066, "percentage": 35.69, "elapsed_time": "0:20:01", "remaining_time": "0:36:05", "throughput": 5539.37, "total_tokens": 6655936}
|
|
{"current_steps": 13525, "total_steps": 37885, "loss": 0.0493, "lr": 1.623973363942487e-06, "epoch": 1.7850072588095554, "percentage": 35.7, "elapsed_time": "0:20:01", "remaining_time": "0:36:04", "throughput": 5539.89, "total_tokens": 6658432}
|
|
{"current_steps": 13530, "total_steps": 37885, "loss": 0.0007, "lr": 1.6236132872111266e-06, "epoch": 1.7856671505873036, "percentage": 35.71, "elapsed_time": "0:20:02", "remaining_time": "0:36:04", "throughput": 5540.3, "total_tokens": 6660800}
|
|
{"current_steps": 13535, "total_steps": 37885, "loss": 0.0938, "lr": 1.6232530781223613e-06, "epoch": 1.7863270423650521, "percentage": 35.73, "elapsed_time": "0:20:02", "remaining_time": "0:36:03", "throughput": 5540.77, "total_tokens": 6663232}
|
|
{"current_steps": 13540, "total_steps": 37885, "loss": 0.1012, "lr": 1.6228927367526437e-06, "epoch": 1.7869869341428006, "percentage": 35.74, "elapsed_time": "0:20:02", "remaining_time": "0:36:02", "throughput": 5541.34, "total_tokens": 6665792}
|
|
{"current_steps": 13545, "total_steps": 37885, "loss": 0.0525, "lr": 1.6225322631784533e-06, "epoch": 1.787646825920549, "percentage": 35.75, "elapsed_time": "0:20:03", "remaining_time": "0:36:02", "throughput": 5541.88, "total_tokens": 6668352}
|
|
{"current_steps": 13550, "total_steps": 37885, "loss": 0.0015, "lr": 1.622171657476298e-06, "epoch": 1.7883067176982976, "percentage": 35.77, "elapsed_time": "0:20:03", "remaining_time": "0:36:01", "throughput": 5542.48, "total_tokens": 6670976}
|
|
{"current_steps": 13555, "total_steps": 37885, "loss": 0.0724, "lr": 1.621810919722714e-06, "epoch": 1.788966609476046, "percentage": 35.78, "elapsed_time": "0:20:03", "remaining_time": "0:36:01", "throughput": 5542.91, "total_tokens": 6673472}
|
|
{"current_steps": 13560, "total_steps": 37885, "loss": 0.0431, "lr": 1.6214500499942649e-06, "epoch": 1.7896265012537944, "percentage": 35.79, "elapsed_time": "0:20:04", "remaining_time": "0:36:00", "throughput": 5543.35, "total_tokens": 6675904}
|
|
{"current_steps": 13565, "total_steps": 37885, "loss": 0.1878, "lr": 1.6210890483675427e-06, "epoch": 1.790286393031543, "percentage": 35.81, "elapsed_time": "0:20:04", "remaining_time": "0:35:59", "throughput": 5543.68, "total_tokens": 6678208}
|
|
{"current_steps": 13570, "total_steps": 37885, "loss": 0.0459, "lr": 1.620727914919168e-06, "epoch": 1.7909462848092912, "percentage": 35.82, "elapsed_time": "0:20:04", "remaining_time": "0:35:59", "throughput": 5544.19, "total_tokens": 6680704}
|
|
{"current_steps": 13575, "total_steps": 37885, "loss": 0.0964, "lr": 1.620366649725788e-06, "epoch": 1.7916061765870397, "percentage": 35.83, "elapsed_time": "0:20:05", "remaining_time": "0:35:58", "throughput": 5544.64, "total_tokens": 6683136}
|
|
{"current_steps": 13580, "total_steps": 37885, "loss": 0.0529, "lr": 1.6200052528640792e-06, "epoch": 1.7922660683647882, "percentage": 35.85, "elapsed_time": "0:20:05", "remaining_time": "0:35:57", "throughput": 5545.14, "total_tokens": 6685632}
|
|
{"current_steps": 13585, "total_steps": 37885, "loss": 0.1235, "lr": 1.619643724410745e-06, "epoch": 1.7929259601425365, "percentage": 35.86, "elapsed_time": "0:20:06", "remaining_time": "0:35:57", "throughput": 5545.44, "total_tokens": 6687872}
|
|
{"current_steps": 13590, "total_steps": 37885, "loss": 0.002, "lr": 1.6192820644425176e-06, "epoch": 1.7935858519202852, "percentage": 35.87, "elapsed_time": "0:20:06", "remaining_time": "0:35:56", "throughput": 5545.93, "total_tokens": 6690368}
|
|
{"current_steps": 13595, "total_steps": 37885, "loss": 0.0692, "lr": 1.6189202730361563e-06, "epoch": 1.7942457436980335, "percentage": 35.88, "elapsed_time": "0:20:06", "remaining_time": "0:35:55", "throughput": 5546.56, "total_tokens": 6692992}
|
|
{"current_steps": 13600, "total_steps": 37885, "loss": 0.0568, "lr": 1.618558350268449e-06, "epoch": 1.794905635475782, "percentage": 35.9, "elapsed_time": "0:20:07", "remaining_time": "0:35:55", "throughput": 5546.81, "total_tokens": 6695168}
|
|
{"current_steps": 13605, "total_steps": 37885, "loss": 0.1914, "lr": 1.618196296216211e-06, "epoch": 1.7955655272535305, "percentage": 35.91, "elapsed_time": "0:20:07", "remaining_time": "0:35:54", "throughput": 5547.23, "total_tokens": 6697536}
|
|
{"current_steps": 13610, "total_steps": 37885, "loss": 0.073, "lr": 1.6178341109562859e-06, "epoch": 1.7962254190312787, "percentage": 35.92, "elapsed_time": "0:20:07", "remaining_time": "0:35:54", "throughput": 5547.96, "total_tokens": 6700288}
|
|
{"current_steps": 13615, "total_steps": 37885, "loss": 0.0581, "lr": 1.6174717945655446e-06, "epoch": 1.7968853108090275, "percentage": 35.94, "elapsed_time": "0:20:08", "remaining_time": "0:35:53", "throughput": 5548.57, "total_tokens": 6702912}
|
|
{"current_steps": 13620, "total_steps": 37885, "loss": 0.083, "lr": 1.6171093471208863e-06, "epoch": 1.7975452025867757, "percentage": 35.95, "elapsed_time": "0:20:08", "remaining_time": "0:35:52", "throughput": 5549.11, "total_tokens": 6705408}
|
|
{"current_steps": 13625, "total_steps": 37885, "loss": 0.0008, "lr": 1.616746768699238e-06, "epoch": 1.7982050943645242, "percentage": 35.96, "elapsed_time": "0:20:08", "remaining_time": "0:35:52", "throughput": 5549.63, "total_tokens": 6707904}
|
|
{"current_steps": 13630, "total_steps": 37885, "loss": 0.0738, "lr": 1.616384059377554e-06, "epoch": 1.7988649861422727, "percentage": 35.98, "elapsed_time": "0:20:09", "remaining_time": "0:35:51", "throughput": 5550.14, "total_tokens": 6710400}
|
|
{"current_steps": 13635, "total_steps": 37885, "loss": 0.0009, "lr": 1.616021219232817e-06, "epoch": 1.799524877920021, "percentage": 35.99, "elapsed_time": "0:20:09", "remaining_time": "0:35:50", "throughput": 5550.68, "total_tokens": 6712896}
|
|
{"current_steps": 13640, "total_steps": 37885, "loss": 0.0371, "lr": 1.6156582483420374e-06, "epoch": 1.8001847696977695, "percentage": 36.0, "elapsed_time": "0:20:09", "remaining_time": "0:35:50", "throughput": 5551.28, "total_tokens": 6715520}
|
|
{"current_steps": 13645, "total_steps": 37885, "loss": 0.1065, "lr": 1.6152951467822523e-06, "epoch": 1.800844661475518, "percentage": 36.02, "elapsed_time": "0:20:10", "remaining_time": "0:35:49", "throughput": 5551.85, "total_tokens": 6718080}
|
|
{"current_steps": 13650, "total_steps": 37885, "loss": 0.1254, "lr": 1.614931914630528e-06, "epoch": 1.8015045532532663, "percentage": 36.03, "elapsed_time": "0:20:10", "remaining_time": "0:35:49", "throughput": 5552.16, "total_tokens": 6720320}
|
|
{"current_steps": 13655, "total_steps": 37885, "loss": 0.0896, "lr": 1.6145685519639577e-06, "epoch": 1.802164445031015, "percentage": 36.04, "elapsed_time": "0:20:10", "remaining_time": "0:35:48", "throughput": 5552.68, "total_tokens": 6722816}
|
|
{"current_steps": 13660, "total_steps": 37885, "loss": 0.0551, "lr": 1.6142050588596631e-06, "epoch": 1.8028243368087633, "percentage": 36.06, "elapsed_time": "0:20:11", "remaining_time": "0:35:47", "throughput": 5553.05, "total_tokens": 6725120}
|
|
{"current_steps": 13665, "total_steps": 37885, "loss": 0.0597, "lr": 1.6138414353947923e-06, "epoch": 1.8034842285865118, "percentage": 36.07, "elapsed_time": "0:20:11", "remaining_time": "0:35:47", "throughput": 5553.55, "total_tokens": 6727616}
|
|
{"current_steps": 13670, "total_steps": 37885, "loss": 0.0805, "lr": 1.613477681646522e-06, "epoch": 1.8041441203642603, "percentage": 36.08, "elapsed_time": "0:20:11", "remaining_time": "0:35:46", "throughput": 5554.2, "total_tokens": 6730240}
|
|
{"current_steps": 13675, "total_steps": 37885, "loss": 0.0583, "lr": 1.6131137976920556e-06, "epoch": 1.8048040121420086, "percentage": 36.1, "elapsed_time": "0:20:12", "remaining_time": "0:35:45", "throughput": 5554.64, "total_tokens": 6732608}
|
|
{"current_steps": 13680, "total_steps": 37885, "loss": 0.1257, "lr": 1.612749783608626e-06, "epoch": 1.8054639039197573, "percentage": 36.11, "elapsed_time": "0:20:12", "remaining_time": "0:35:45", "throughput": 5555.22, "total_tokens": 6735168}
|
|
{"current_steps": 13685, "total_steps": 37885, "loss": 0.1853, "lr": 1.612385639473492e-06, "epoch": 1.8061237956975056, "percentage": 36.12, "elapsed_time": "0:20:12", "remaining_time": "0:35:44", "throughput": 5555.76, "total_tokens": 6737664}
|
|
{"current_steps": 13690, "total_steps": 37885, "loss": 0.0448, "lr": 1.6120213653639407e-06, "epoch": 1.806783687475254, "percentage": 36.14, "elapsed_time": "0:20:13", "remaining_time": "0:35:43", "throughput": 5556.33, "total_tokens": 6740224}
|
|
{"current_steps": 13695, "total_steps": 37885, "loss": 0.0295, "lr": 1.6116569613572861e-06, "epoch": 1.8074435792530026, "percentage": 36.15, "elapsed_time": "0:20:13", "remaining_time": "0:35:43", "throughput": 5556.96, "total_tokens": 6742848}
|
|
{"current_steps": 13700, "total_steps": 37885, "loss": 0.1216, "lr": 1.611292427530871e-06, "epoch": 1.8081034710307509, "percentage": 36.16, "elapsed_time": "0:20:13", "remaining_time": "0:35:42", "throughput": 5557.59, "total_tokens": 6745472}
|
|
{"current_steps": 13705, "total_steps": 37885, "loss": 0.0853, "lr": 1.6109277639620648e-06, "epoch": 1.8087633628084994, "percentage": 36.18, "elapsed_time": "0:20:14", "remaining_time": "0:35:42", "throughput": 5558.07, "total_tokens": 6747904}
|
|
{"current_steps": 13710, "total_steps": 37885, "loss": 0.1297, "lr": 1.6105629707282645e-06, "epoch": 1.8094232545862479, "percentage": 36.19, "elapsed_time": "0:20:14", "remaining_time": "0:35:41", "throughput": 5558.77, "total_tokens": 6750592}
|
|
{"current_steps": 13715, "total_steps": 37885, "loss": 0.0997, "lr": 1.6101980479068954e-06, "epoch": 1.8100831463639961, "percentage": 36.2, "elapsed_time": "0:20:14", "remaining_time": "0:35:40", "throughput": 5559.24, "total_tokens": 6753024}
|
|
{"current_steps": 13720, "total_steps": 37885, "loss": 0.0846, "lr": 1.609832995575409e-06, "epoch": 1.8107430381417449, "percentage": 36.21, "elapsed_time": "0:20:15", "remaining_time": "0:35:40", "throughput": 5559.67, "total_tokens": 6755392}
|
|
{"current_steps": 13725, "total_steps": 37885, "loss": 0.0217, "lr": 1.6094678138112854e-06, "epoch": 1.8114029299194931, "percentage": 36.23, "elapsed_time": "0:20:15", "remaining_time": "0:35:39", "throughput": 5560.05, "total_tokens": 6757696}
|
|
{"current_steps": 13730, "total_steps": 37885, "loss": 0.0975, "lr": 1.6091025026920316e-06, "epoch": 1.8120628216972416, "percentage": 36.24, "elapsed_time": "0:20:15", "remaining_time": "0:35:38", "throughput": 5560.52, "total_tokens": 6760128}
|
|
{"current_steps": 13735, "total_steps": 37885, "loss": 0.122, "lr": 1.6087370622951824e-06, "epoch": 1.8127227134749901, "percentage": 36.25, "elapsed_time": "0:20:16", "remaining_time": "0:35:38", "throughput": 5560.95, "total_tokens": 6762496}
|
|
{"current_steps": 13740, "total_steps": 37885, "loss": 0.0623, "lr": 1.6083714926983004e-06, "epoch": 1.8133826052527384, "percentage": 36.27, "elapsed_time": "0:20:16", "remaining_time": "0:35:37", "throughput": 5561.56, "total_tokens": 6765120}
|
|
{"current_steps": 13745, "total_steps": 37885, "loss": 0.0799, "lr": 1.608005793978974e-06, "epoch": 1.8140424970304871, "percentage": 36.28, "elapsed_time": "0:20:16", "remaining_time": "0:35:36", "throughput": 5562.23, "total_tokens": 6767808}
|
|
{"current_steps": 13750, "total_steps": 37885, "loss": 0.1322, "lr": 1.6076399662148207e-06, "epoch": 1.8147023888082354, "percentage": 36.29, "elapsed_time": "0:20:17", "remaining_time": "0:35:36", "throughput": 5562.7, "total_tokens": 6770240}
|
|
{"current_steps": 13755, "total_steps": 37885, "loss": 0.0825, "lr": 1.6072740094834848e-06, "epoch": 1.815362280585984, "percentage": 36.31, "elapsed_time": "0:20:17", "remaining_time": "0:35:35", "throughput": 5563.1, "total_tokens": 6772608}
|
|
{"current_steps": 13760, "total_steps": 37885, "loss": 0.1762, "lr": 1.606907923862638e-06, "epoch": 1.8160221723637324, "percentage": 36.32, "elapsed_time": "0:20:17", "remaining_time": "0:35:35", "throughput": 5563.51, "total_tokens": 6774976}
|
|
{"current_steps": 13765, "total_steps": 37885, "loss": 0.0425, "lr": 1.6065417094299793e-06, "epoch": 1.8166820641414807, "percentage": 36.33, "elapsed_time": "0:20:18", "remaining_time": "0:35:34", "throughput": 5563.99, "total_tokens": 6777408}
|
|
{"current_steps": 13770, "total_steps": 37885, "loss": 0.0008, "lr": 1.6061753662632352e-06, "epoch": 1.8173419559192292, "percentage": 36.35, "elapsed_time": "0:20:18", "remaining_time": "0:35:33", "throughput": 5564.25, "total_tokens": 6779584}
|
|
{"current_steps": 13775, "total_steps": 37885, "loss": 0.0566, "lr": 1.6058088944401586e-06, "epoch": 1.8180018476969777, "percentage": 36.36, "elapsed_time": "0:20:18", "remaining_time": "0:35:33", "throughput": 5564.7, "total_tokens": 6782016}
|
|
{"current_steps": 13780, "total_steps": 37885, "loss": 0.07, "lr": 1.6054422940385315e-06, "epoch": 1.818661739474726, "percentage": 36.37, "elapsed_time": "0:20:19", "remaining_time": "0:35:32", "throughput": 5565.27, "total_tokens": 6784576}
|
|
{"current_steps": 13785, "total_steps": 37885, "loss": 0.0423, "lr": 1.6050755651361617e-06, "epoch": 1.8193216312524747, "percentage": 36.39, "elapsed_time": "0:20:19", "remaining_time": "0:35:31", "throughput": 5565.77, "total_tokens": 6787072}
|
|
{"current_steps": 13790, "total_steps": 37885, "loss": 0.1433, "lr": 1.6047087078108848e-06, "epoch": 1.819981523030223, "percentage": 36.4, "elapsed_time": "0:20:19", "remaining_time": "0:35:31", "throughput": 5566.15, "total_tokens": 6789376}
|
|
{"current_steps": 13795, "total_steps": 37885, "loss": 0.0027, "lr": 1.6043417221405636e-06, "epoch": 1.8206414148079715, "percentage": 36.41, "elapsed_time": "0:20:20", "remaining_time": "0:35:30", "throughput": 5566.77, "total_tokens": 6792000}
|
|
{"current_steps": 13800, "total_steps": 37885, "loss": 0.0624, "lr": 1.6039746082030878e-06, "epoch": 1.82130130658572, "percentage": 36.43, "elapsed_time": "0:20:20", "remaining_time": "0:35:30", "throughput": 5567.18, "total_tokens": 6794368}
|
|
{"current_steps": 13805, "total_steps": 37885, "loss": 0.0008, "lr": 1.6036073660763755e-06, "epoch": 1.8219611983634683, "percentage": 36.44, "elapsed_time": "0:20:20", "remaining_time": "0:35:29", "throughput": 5567.66, "total_tokens": 6796800}
|
|
{"current_steps": 13810, "total_steps": 37885, "loss": 0.0646, "lr": 1.6032399958383706e-06, "epoch": 1.822621090141217, "percentage": 36.45, "elapsed_time": "0:20:21", "remaining_time": "0:35:28", "throughput": 5568.14, "total_tokens": 6799232}
|
|
{"current_steps": 13815, "total_steps": 37885, "loss": 0.1131, "lr": 1.6028724975670454e-06, "epoch": 1.8232809819189653, "percentage": 36.47, "elapsed_time": "0:20:21", "remaining_time": "0:35:28", "throughput": 5568.71, "total_tokens": 6801792}
|
|
{"current_steps": 13820, "total_steps": 37885, "loss": 0.1451, "lr": 1.6025048713403977e-06, "epoch": 1.8239408736967138, "percentage": 36.48, "elapsed_time": "0:20:21", "remaining_time": "0:35:27", "throughput": 5569.32, "total_tokens": 6804416}
|
|
{"current_steps": 13825, "total_steps": 37885, "loss": 0.0421, "lr": 1.6021371172364543e-06, "epoch": 1.8246007654744623, "percentage": 36.49, "elapsed_time": "0:20:22", "remaining_time": "0:35:26", "throughput": 5569.81, "total_tokens": 6806912}
|
|
{"current_steps": 13830, "total_steps": 37885, "loss": 0.0012, "lr": 1.6017692353332676e-06, "epoch": 1.8252606572522105, "percentage": 36.51, "elapsed_time": "0:20:22", "remaining_time": "0:35:26", "throughput": 5570.25, "total_tokens": 6809280}
|
|
{"current_steps": 13835, "total_steps": 37885, "loss": 0.0015, "lr": 1.6014012257089186e-06, "epoch": 1.825920549029959, "percentage": 36.52, "elapsed_time": "0:20:22", "remaining_time": "0:35:25", "throughput": 5570.8, "total_tokens": 6811776}
|
|
{"current_steps": 13840, "total_steps": 37885, "loss": 0.0004, "lr": 1.6010330884415146e-06, "epoch": 1.8265804408077075, "percentage": 36.53, "elapsed_time": "0:20:23", "remaining_time": "0:35:24", "throughput": 5571.17, "total_tokens": 6814080}
|
|
{"current_steps": 13845, "total_steps": 37885, "loss": 0.088, "lr": 1.6006648236091903e-06, "epoch": 1.827240332585456, "percentage": 36.54, "elapsed_time": "0:20:23", "remaining_time": "0:35:24", "throughput": 5571.63, "total_tokens": 6816512}
|
|
{"current_steps": 13850, "total_steps": 37885, "loss": 0.0002, "lr": 1.600296431290106e-06, "epoch": 1.8279002243632045, "percentage": 36.56, "elapsed_time": "0:20:23", "remaining_time": "0:35:23", "throughput": 5571.94, "total_tokens": 6818752}
|
|
{"current_steps": 13855, "total_steps": 37885, "loss": 0.1294, "lr": 1.5999279115624517e-06, "epoch": 1.8285601161409528, "percentage": 36.57, "elapsed_time": "0:20:24", "remaining_time": "0:35:23", "throughput": 5572.4, "total_tokens": 6821248}
|
|
{"current_steps": 13860, "total_steps": 37885, "loss": 0.1328, "lr": 1.5995592645044424e-06, "epoch": 1.8292200079187013, "percentage": 36.58, "elapsed_time": "0:20:24", "remaining_time": "0:35:22", "throughput": 5572.96, "total_tokens": 6823808}
|
|
{"current_steps": 13865, "total_steps": 37885, "loss": 0.1448, "lr": 1.599190490194321e-06, "epoch": 1.8298798996964498, "percentage": 36.6, "elapsed_time": "0:20:24", "remaining_time": "0:35:21", "throughput": 5573.57, "total_tokens": 6826432}
|
|
{"current_steps": 13870, "total_steps": 37885, "loss": 0.0097, "lr": 1.5988215887103568e-06, "epoch": 1.830539791474198, "percentage": 36.61, "elapsed_time": "0:20:25", "remaining_time": "0:35:21", "throughput": 5574.0, "total_tokens": 6828800}
|
|
{"current_steps": 13875, "total_steps": 37885, "loss": 0.1828, "lr": 1.598452560130847e-06, "epoch": 1.8311996832519468, "percentage": 36.62, "elapsed_time": "0:20:25", "remaining_time": "0:35:20", "throughput": 5574.58, "total_tokens": 6831360}
|
|
{"current_steps": 13880, "total_steps": 37885, "loss": 0.1433, "lr": 1.598083404534115e-06, "epoch": 1.831859575029695, "percentage": 36.64, "elapsed_time": "0:20:25", "remaining_time": "0:35:19", "throughput": 5574.97, "total_tokens": 6833664}
|
|
{"current_steps": 13885, "total_steps": 37885, "loss": 0.065, "lr": 1.597714121998511e-06, "epoch": 1.8325194668074436, "percentage": 36.65, "elapsed_time": "0:20:26", "remaining_time": "0:35:19", "throughput": 5575.31, "total_tokens": 6835904}
|
|
{"current_steps": 13890, "total_steps": 37885, "loss": 0.1494, "lr": 1.5973447126024131e-06, "epoch": 1.833179358585192, "percentage": 36.66, "elapsed_time": "0:20:26", "remaining_time": "0:35:18", "throughput": 5575.8, "total_tokens": 6838336}
|
|
{"current_steps": 13895, "total_steps": 37885, "loss": 0.2852, "lr": 1.596975176424226e-06, "epoch": 1.8338392503629404, "percentage": 36.68, "elapsed_time": "0:20:26", "remaining_time": "0:35:18", "throughput": 5576.47, "total_tokens": 6841024}
|
|
{"current_steps": 13900, "total_steps": 37885, "loss": 0.1151, "lr": 1.5966055135423798e-06, "epoch": 1.8344991421406889, "percentage": 36.69, "elapsed_time": "0:20:27", "remaining_time": "0:35:17", "throughput": 5576.76, "total_tokens": 6843200}
|
|
{"current_steps": 13905, "total_steps": 37885, "loss": 0.004, "lr": 1.5962357240353342e-06, "epoch": 1.8351590339184374, "percentage": 36.7, "elapsed_time": "0:20:27", "remaining_time": "0:35:16", "throughput": 5577.21, "total_tokens": 6845568}
|
|
{"current_steps": 13910, "total_steps": 37885, "loss": 0.0585, "lr": 1.5958658079815737e-06, "epoch": 1.8358189256961859, "percentage": 36.72, "elapsed_time": "0:20:27", "remaining_time": "0:35:16", "throughput": 5577.7, "total_tokens": 6848000}
|
|
{"current_steps": 13915, "total_steps": 37885, "loss": 0.0423, "lr": 1.5954957654596102e-06, "epoch": 1.8364788174739344, "percentage": 36.73, "elapsed_time": "0:20:28", "remaining_time": "0:35:15", "throughput": 5578.39, "total_tokens": 6850688}
|
|
{"current_steps": 13920, "total_steps": 37885, "loss": 0.0121, "lr": 1.595125596547983e-06, "epoch": 1.8371387092516827, "percentage": 36.74, "elapsed_time": "0:20:28", "remaining_time": "0:35:14", "throughput": 5578.74, "total_tokens": 6852928}
|
|
{"current_steps": 13925, "total_steps": 37885, "loss": 0.0006, "lr": 1.5947553013252572e-06, "epoch": 1.8377986010294312, "percentage": 36.76, "elapsed_time": "0:20:28", "remaining_time": "0:35:14", "throughput": 5579.28, "total_tokens": 6855424}
|
|
{"current_steps": 13930, "total_steps": 37885, "loss": 0.0626, "lr": 1.594384879870026e-06, "epoch": 1.8384584928071797, "percentage": 36.77, "elapsed_time": "0:20:29", "remaining_time": "0:35:13", "throughput": 5579.92, "total_tokens": 6858048}
|
|
{"current_steps": 13935, "total_steps": 37885, "loss": 0.0881, "lr": 1.594014332260908e-06, "epoch": 1.839118384584928, "percentage": 36.78, "elapsed_time": "0:20:29", "remaining_time": "0:35:12", "throughput": 5580.55, "total_tokens": 6860672}
|
|
{"current_steps": 13940, "total_steps": 37885, "loss": 0.0854, "lr": 1.5936436585765493e-06, "epoch": 1.8397782763626767, "percentage": 36.8, "elapsed_time": "0:20:29", "remaining_time": "0:35:12", "throughput": 5581.04, "total_tokens": 6863104}
|
|
{"current_steps": 13945, "total_steps": 37885, "loss": 0.1164, "lr": 1.5932728588956233e-06, "epoch": 1.840438168140425, "percentage": 36.81, "elapsed_time": "0:20:30", "remaining_time": "0:35:11", "throughput": 5581.49, "total_tokens": 6865472}
|
|
{"current_steps": 13950, "total_steps": 37885, "loss": 0.0017, "lr": 1.5929019332968285e-06, "epoch": 1.8410980599181734, "percentage": 36.82, "elapsed_time": "0:20:30", "remaining_time": "0:35:11", "throughput": 5581.92, "total_tokens": 6867840}
|
|
{"current_steps": 13955, "total_steps": 37885, "loss": 0.0018, "lr": 1.5925308818588926e-06, "epoch": 1.841757951695922, "percentage": 36.84, "elapsed_time": "0:20:30", "remaining_time": "0:35:10", "throughput": 5582.35, "total_tokens": 6870208}
|
|
{"current_steps": 13960, "total_steps": 37885, "loss": 0.055, "lr": 1.5921597046605672e-06, "epoch": 1.8424178434736702, "percentage": 36.85, "elapsed_time": "0:20:31", "remaining_time": "0:35:09", "throughput": 5582.92, "total_tokens": 6872768}
|
|
{"current_steps": 13965, "total_steps": 37885, "loss": 0.046, "lr": 1.5917884017806327e-06, "epoch": 1.8430777352514187, "percentage": 36.86, "elapsed_time": "0:20:31", "remaining_time": "0:35:09", "throughput": 5583.49, "total_tokens": 6875328}
|
|
{"current_steps": 13970, "total_steps": 37885, "loss": 0.1396, "lr": 1.5914169732978957e-06, "epoch": 1.8437376270291672, "percentage": 36.87, "elapsed_time": "0:20:31", "remaining_time": "0:35:08", "throughput": 5583.84, "total_tokens": 6877632}
|
|
{"current_steps": 13975, "total_steps": 37885, "loss": 0.1319, "lr": 1.5910454192911883e-06, "epoch": 1.8443975188069157, "percentage": 36.89, "elapsed_time": "0:20:32", "remaining_time": "0:35:07", "throughput": 5584.17, "total_tokens": 6879872}
|
|
{"current_steps": 13980, "total_steps": 37885, "loss": 0.092, "lr": 1.590673739839371e-06, "epoch": 1.8450574105846642, "percentage": 36.9, "elapsed_time": "0:20:32", "remaining_time": "0:35:07", "throughput": 5584.9, "total_tokens": 6882688}
|
|
{"current_steps": 13985, "total_steps": 37885, "loss": 0.0631, "lr": 1.5903019350213293e-06, "epoch": 1.8457173023624125, "percentage": 36.91, "elapsed_time": "0:20:32", "remaining_time": "0:35:06", "throughput": 5585.44, "total_tokens": 6885248}
|
|
{"current_steps": 13990, "total_steps": 37885, "loss": 0.0017, "lr": 1.589930004915977e-06, "epoch": 1.846377194140161, "percentage": 36.93, "elapsed_time": "0:20:33", "remaining_time": "0:35:06", "throughput": 5585.82, "total_tokens": 6887552}
|
|
{"current_steps": 13995, "total_steps": 37885, "loss": 0.0009, "lr": 1.5895579496022532e-06, "epoch": 1.8470370859179095, "percentage": 36.94, "elapsed_time": "0:20:33", "remaining_time": "0:35:05", "throughput": 5586.31, "total_tokens": 6889984}
|
|
{"current_steps": 14000, "total_steps": 37885, "loss": 0.3151, "lr": 1.5891857691591235e-06, "epoch": 1.8476969776956578, "percentage": 36.95, "elapsed_time": "0:20:33", "remaining_time": "0:35:04", "throughput": 5586.84, "total_tokens": 6892480}
|
|
{"current_steps": 14005, "total_steps": 37885, "loss": 0.0002, "lr": 1.588813463665581e-06, "epoch": 1.8483568694734065, "percentage": 36.97, "elapsed_time": "0:20:34", "remaining_time": "0:35:04", "throughput": 5587.33, "total_tokens": 6894912}
|
|
{"current_steps": 14010, "total_steps": 37885, "loss": 0.1177, "lr": 1.5884410332006443e-06, "epoch": 1.8490167612511548, "percentage": 36.98, "elapsed_time": "0:20:34", "remaining_time": "0:35:03", "throughput": 5587.86, "total_tokens": 6897408}
|
|
{"current_steps": 14015, "total_steps": 37885, "loss": 0.0403, "lr": 1.58806847784336e-06, "epoch": 1.8496766530289033, "percentage": 36.99, "elapsed_time": "0:20:34", "remaining_time": "0:35:02", "throughput": 5588.25, "total_tokens": 6899712}
|
|
{"current_steps": 14020, "total_steps": 37885, "loss": 0.1096, "lr": 1.5876957976727993e-06, "epoch": 1.8503365448066518, "percentage": 37.01, "elapsed_time": "0:20:35", "remaining_time": "0:35:02", "throughput": 5588.64, "total_tokens": 6902016}
|
|
{"current_steps": 14025, "total_steps": 37885, "loss": 0.1389, "lr": 1.5873229927680617e-06, "epoch": 1.8509964365844, "percentage": 37.02, "elapsed_time": "0:20:35", "remaining_time": "0:35:01", "throughput": 5589.07, "total_tokens": 6904384}
|
|
{"current_steps": 14030, "total_steps": 37885, "loss": 0.0433, "lr": 1.5869500632082717e-06, "epoch": 1.8516563283621488, "percentage": 37.03, "elapsed_time": "0:20:35", "remaining_time": "0:35:00", "throughput": 5589.56, "total_tokens": 6906816}
|
|
{"current_steps": 14035, "total_steps": 37885, "loss": 0.0012, "lr": 1.586577009072581e-06, "epoch": 1.852316220139897, "percentage": 37.05, "elapsed_time": "0:20:35", "remaining_time": "0:35:00", "throughput": 5590.13, "total_tokens": 6909376}
|
|
{"current_steps": 14040, "total_steps": 37885, "loss": 0.0512, "lr": 1.5862038304401682e-06, "epoch": 1.8529761119176456, "percentage": 37.06, "elapsed_time": "0:20:36", "remaining_time": "0:34:59", "throughput": 5590.52, "total_tokens": 6911680}
|
|
{"current_steps": 14045, "total_steps": 37885, "loss": 0.0724, "lr": 1.585830527390237e-06, "epoch": 1.853636003695394, "percentage": 37.07, "elapsed_time": "0:20:36", "remaining_time": "0:34:59", "throughput": 5590.86, "total_tokens": 6913920}
|
|
{"current_steps": 14050, "total_steps": 37885, "loss": 0.189, "lr": 1.585457100002019e-06, "epoch": 1.8542958954731423, "percentage": 37.09, "elapsed_time": "0:20:36", "remaining_time": "0:34:58", "throughput": 5591.38, "total_tokens": 6916416}
|
|
{"current_steps": 14055, "total_steps": 37885, "loss": 0.0629, "lr": 1.5850835483547705e-06, "epoch": 1.8549557872508908, "percentage": 37.1, "elapsed_time": "0:20:37", "remaining_time": "0:34:57", "throughput": 5591.86, "total_tokens": 6918848}
|
|
{"current_steps": 14060, "total_steps": 37885, "loss": 0.0666, "lr": 1.5847098725277763e-06, "epoch": 1.8556156790286393, "percentage": 37.11, "elapsed_time": "0:20:37", "remaining_time": "0:34:57", "throughput": 5592.52, "total_tokens": 6921536}
|
|
{"current_steps": 14065, "total_steps": 37885, "loss": 0.0795, "lr": 1.5843360726003454e-06, "epoch": 1.8562755708063876, "percentage": 37.13, "elapsed_time": "0:20:37", "remaining_time": "0:34:56", "throughput": 5593.09, "total_tokens": 6924096}
|
|
{"current_steps": 14070, "total_steps": 37885, "loss": 0.0008, "lr": 1.5839621486518144e-06, "epoch": 1.8569354625841363, "percentage": 37.14, "elapsed_time": "0:20:38", "remaining_time": "0:34:55", "throughput": 5593.47, "total_tokens": 6926400}
|
|
{"current_steps": 14075, "total_steps": 37885, "loss": 0.2018, "lr": 1.5835881007615466e-06, "epoch": 1.8575953543618846, "percentage": 37.15, "elapsed_time": "0:20:38", "remaining_time": "0:34:55", "throughput": 5594.0, "total_tokens": 6928896}
|
|
{"current_steps": 14080, "total_steps": 37885, "loss": 0.184, "lr": 1.5832139290089302e-06, "epoch": 1.858255246139633, "percentage": 37.17, "elapsed_time": "0:20:38", "remaining_time": "0:34:54", "throughput": 5594.53, "total_tokens": 6931392}
|
|
{"current_steps": 14085, "total_steps": 37885, "loss": 0.0733, "lr": 1.5828396334733807e-06, "epoch": 1.8589151379173816, "percentage": 37.18, "elapsed_time": "0:20:39", "remaining_time": "0:34:54", "throughput": 5595.2, "total_tokens": 6934080}
|
|
{"current_steps": 14090, "total_steps": 37885, "loss": 0.1321, "lr": 1.5824652142343394e-06, "epoch": 1.8595750296951299, "percentage": 37.19, "elapsed_time": "0:20:39", "remaining_time": "0:34:53", "throughput": 5595.74, "total_tokens": 6936576}
|
|
{"current_steps": 14095, "total_steps": 37885, "loss": 0.1625, "lr": 1.582090671371274e-06, "epoch": 1.8602349214728786, "percentage": 37.2, "elapsed_time": "0:20:39", "remaining_time": "0:34:52", "throughput": 5596.11, "total_tokens": 6938880}
|
|
{"current_steps": 14100, "total_steps": 37885, "loss": 0.1393, "lr": 1.5817160049636792e-06, "epoch": 1.860894813250627, "percentage": 37.22, "elapsed_time": "0:20:40", "remaining_time": "0:34:52", "throughput": 5596.74, "total_tokens": 6941504}
|
|
{"current_steps": 14105, "total_steps": 37885, "loss": 0.0703, "lr": 1.5813412150910748e-06, "epoch": 1.8615547050283754, "percentage": 37.23, "elapsed_time": "0:20:40", "remaining_time": "0:34:51", "throughput": 5597.27, "total_tokens": 6944000}
|
|
{"current_steps": 14110, "total_steps": 37885, "loss": 0.1415, "lr": 1.580966301833007e-06, "epoch": 1.862214596806124, "percentage": 37.24, "elapsed_time": "0:20:40", "remaining_time": "0:34:50", "throughput": 5597.8, "total_tokens": 6946496}
|
|
{"current_steps": 14115, "total_steps": 37885, "loss": 0.0015, "lr": 1.580591265269049e-06, "epoch": 1.8628744885838722, "percentage": 37.26, "elapsed_time": "0:20:41", "remaining_time": "0:34:50", "throughput": 5598.22, "total_tokens": 6948864}
|
|
{"current_steps": 14120, "total_steps": 37885, "loss": 0.0012, "lr": 1.580216105478799e-06, "epoch": 1.8635343803616207, "percentage": 37.27, "elapsed_time": "0:20:41", "remaining_time": "0:34:49", "throughput": 5598.75, "total_tokens": 6951360}
|
|
{"current_steps": 14125, "total_steps": 37885, "loss": 0.0014, "lr": 1.5798408225418825e-06, "epoch": 1.8641942721393692, "percentage": 37.28, "elapsed_time": "0:20:41", "remaining_time": "0:34:49", "throughput": 5599.28, "total_tokens": 6953856}
|
|
{"current_steps": 14130, "total_steps": 37885, "loss": 0.0719, "lr": 1.57946541653795e-06, "epoch": 1.8648541639171174, "percentage": 37.3, "elapsed_time": "0:20:42", "remaining_time": "0:34:48", "throughput": 5599.61, "total_tokens": 6956096}
|
|
{"current_steps": 14135, "total_steps": 37885, "loss": 0.0571, "lr": 1.579089887546679e-06, "epoch": 1.8655140556948662, "percentage": 37.31, "elapsed_time": "0:20:42", "remaining_time": "0:34:47", "throughput": 5600.08, "total_tokens": 6958528}
|
|
{"current_steps": 14140, "total_steps": 37885, "loss": 0.1446, "lr": 1.578714235647773e-06, "epoch": 1.8661739474726144, "percentage": 37.32, "elapsed_time": "0:20:42", "remaining_time": "0:34:47", "throughput": 5600.37, "total_tokens": 6960704}
|
|
{"current_steps": 14145, "total_steps": 37885, "loss": 0.1826, "lr": 1.5783384609209609e-06, "epoch": 1.866833839250363, "percentage": 37.34, "elapsed_time": "0:20:43", "remaining_time": "0:34:46", "throughput": 5600.8, "total_tokens": 6963072}
|
|
{"current_steps": 14150, "total_steps": 37885, "loss": 0.0619, "lr": 1.577962563445999e-06, "epoch": 1.8674937310281114, "percentage": 37.35, "elapsed_time": "0:20:43", "remaining_time": "0:34:45", "throughput": 5601.33, "total_tokens": 6965568}
|
|
{"current_steps": 14155, "total_steps": 37885, "loss": 0.0009, "lr": 1.5775865433026679e-06, "epoch": 1.8681536228058597, "percentage": 37.36, "elapsed_time": "0:20:43", "remaining_time": "0:34:45", "throughput": 5601.75, "total_tokens": 6967936}
|
|
{"current_steps": 14160, "total_steps": 37885, "loss": 0.0008, "lr": 1.5772104005707756e-06, "epoch": 1.8688135145836084, "percentage": 37.38, "elapsed_time": "0:20:44", "remaining_time": "0:34:44", "throughput": 5602.28, "total_tokens": 6970432}
|
|
{"current_steps": 14165, "total_steps": 37885, "loss": 0.1136, "lr": 1.5768341353301554e-06, "epoch": 1.8694734063613567, "percentage": 37.39, "elapsed_time": "0:20:44", "remaining_time": "0:34:44", "throughput": 5602.94, "total_tokens": 6973120}
|
|
{"current_steps": 14170, "total_steps": 37885, "loss": 0.0006, "lr": 1.5764577476606673e-06, "epoch": 1.8701332981391052, "percentage": 37.4, "elapsed_time": "0:20:44", "remaining_time": "0:34:43", "throughput": 5603.22, "total_tokens": 6975296}
|
|
{"current_steps": 14175, "total_steps": 37885, "loss": 0.0891, "lr": 1.5760812376421965e-06, "epoch": 1.8707931899168537, "percentage": 37.42, "elapsed_time": "0:20:45", "remaining_time": "0:34:42", "throughput": 5603.7, "total_tokens": 6977728}
|
|
{"current_steps": 14180, "total_steps": 37885, "loss": 0.1602, "lr": 1.5757046053546547e-06, "epoch": 1.871453081694602, "percentage": 37.43, "elapsed_time": "0:20:45", "remaining_time": "0:34:42", "throughput": 5604.18, "total_tokens": 6980160}
|
|
{"current_steps": 14185, "total_steps": 37885, "loss": 0.081, "lr": 1.5753278508779797e-06, "epoch": 1.8721129734723505, "percentage": 37.44, "elapsed_time": "0:20:45", "remaining_time": "0:34:41", "throughput": 5604.75, "total_tokens": 6982720}
|
|
{"current_steps": 14190, "total_steps": 37885, "loss": 0.2397, "lr": 1.574950974292134e-06, "epoch": 1.872772865250099, "percentage": 37.46, "elapsed_time": "0:20:46", "remaining_time": "0:34:40", "throughput": 5605.35, "total_tokens": 6985344}
|
|
{"current_steps": 14195, "total_steps": 37885, "loss": 0.0029, "lr": 1.5745739756771078e-06, "epoch": 1.8734327570278473, "percentage": 37.47, "elapsed_time": "0:20:46", "remaining_time": "0:34:40", "throughput": 5605.81, "total_tokens": 6987776}
|
|
{"current_steps": 14200, "total_steps": 37885, "loss": 0.0016, "lr": 1.574196855112916e-06, "epoch": 1.874092648805596, "percentage": 37.48, "elapsed_time": "0:20:46", "remaining_time": "0:34:39", "throughput": 5606.23, "total_tokens": 6990144}
|
|
{"current_steps": 14205, "total_steps": 37885, "loss": 0.0731, "lr": 1.5738196126795998e-06, "epoch": 1.8747525405833443, "percentage": 37.5, "elapsed_time": "0:20:47", "remaining_time": "0:34:39", "throughput": 5606.9, "total_tokens": 6992832}
|
|
{"current_steps": 14210, "total_steps": 37885, "loss": 0.0601, "lr": 1.5734422484572258e-06, "epoch": 1.8754124323610928, "percentage": 37.51, "elapsed_time": "0:20:47", "remaining_time": "0:34:38", "throughput": 5607.36, "total_tokens": 6995264}
|
|
{"current_steps": 14215, "total_steps": 37885, "loss": 0.0583, "lr": 1.573064762525887e-06, "epoch": 1.8760723241388413, "percentage": 37.52, "elapsed_time": "0:20:47", "remaining_time": "0:34:37", "throughput": 5607.68, "total_tokens": 6997504}
|
|
{"current_steps": 14220, "total_steps": 37885, "loss": 0.0743, "lr": 1.5726871549657027e-06, "epoch": 1.8767322159165896, "percentage": 37.53, "elapsed_time": "0:20:48", "remaining_time": "0:34:37", "throughput": 5608.11, "total_tokens": 6999872}
|
|
{"current_steps": 14225, "total_steps": 37885, "loss": 0.0004, "lr": 1.5723094258568161e-06, "epoch": 1.8773921076943383, "percentage": 37.55, "elapsed_time": "0:20:48", "remaining_time": "0:34:36", "throughput": 5608.68, "total_tokens": 7002432}
|
|
{"current_steps": 14230, "total_steps": 37885, "loss": 0.087, "lr": 1.571931575279399e-06, "epoch": 1.8780519994720866, "percentage": 37.56, "elapsed_time": "0:20:48", "remaining_time": "0:34:35", "throughput": 5608.9, "total_tokens": 7004544}
|
|
{"current_steps": 14235, "total_steps": 37885, "loss": 0.0305, "lr": 1.5715536033136462e-06, "epoch": 1.878711891249835, "percentage": 37.57, "elapsed_time": "0:20:49", "remaining_time": "0:34:35", "throughput": 5609.32, "total_tokens": 7006912}
|
|
{"current_steps": 14240, "total_steps": 37885, "loss": 0.1398, "lr": 1.5711755100397798e-06, "epoch": 1.8793717830275836, "percentage": 37.59, "elapsed_time": "0:20:49", "remaining_time": "0:34:34", "throughput": 5609.85, "total_tokens": 7009408}
|
|
{"current_steps": 14245, "total_steps": 37885, "loss": 0.088, "lr": 1.570797295538048e-06, "epoch": 1.8800316748053318, "percentage": 37.6, "elapsed_time": "0:20:49", "remaining_time": "0:34:34", "throughput": 5610.38, "total_tokens": 7011904}
|
|
{"current_steps": 14250, "total_steps": 37885, "loss": 0.0514, "lr": 1.5704189598887232e-06, "epoch": 1.8806915665830803, "percentage": 37.61, "elapsed_time": "0:20:50", "remaining_time": "0:34:33", "throughput": 5610.66, "total_tokens": 7014080}
|
|
{"current_steps": 14255, "total_steps": 37885, "loss": 0.0358, "lr": 1.570040503172105e-06, "epoch": 1.8813514583608288, "percentage": 37.63, "elapsed_time": "0:20:50", "remaining_time": "0:34:32", "throughput": 5611.03, "total_tokens": 7016384}
|
|
{"current_steps": 14260, "total_steps": 37885, "loss": 0.246, "lr": 1.569661925468518e-06, "epoch": 1.8820113501385771, "percentage": 37.64, "elapsed_time": "0:20:50", "remaining_time": "0:34:32", "throughput": 5611.32, "total_tokens": 7018560}
|
|
{"current_steps": 14265, "total_steps": 37885, "loss": 0.0017, "lr": 1.5692832268583126e-06, "epoch": 1.8826712419163258, "percentage": 37.65, "elapsed_time": "0:20:51", "remaining_time": "0:34:31", "throughput": 5611.74, "total_tokens": 7020928}
|
|
{"current_steps": 14270, "total_steps": 37885, "loss": 0.2468, "lr": 1.5689044074218643e-06, "epoch": 1.8833311336940741, "percentage": 37.67, "elapsed_time": "0:20:51", "remaining_time": "0:34:30", "throughput": 5612.49, "total_tokens": 7023744}
|
|
{"current_steps": 14275, "total_steps": 37885, "loss": 0.0838, "lr": 1.5685254672395753e-06, "epoch": 1.8839910254718226, "percentage": 37.68, "elapsed_time": "0:20:51", "remaining_time": "0:34:30", "throughput": 5613.0, "total_tokens": 7026240}
|
|
{"current_steps": 14280, "total_steps": 37885, "loss": 0.0016, "lr": 1.568146406391873e-06, "epoch": 1.8846509172495711, "percentage": 37.69, "elapsed_time": "0:20:52", "remaining_time": "0:34:29", "throughput": 5613.38, "total_tokens": 7028544}
|
|
{"current_steps": 14285, "total_steps": 37885, "loss": 0.0511, "lr": 1.5677672249592101e-06, "epoch": 1.8853108090273194, "percentage": 37.71, "elapsed_time": "0:20:52", "remaining_time": "0:34:29", "throughput": 5613.9, "total_tokens": 7031040}
|
|
{"current_steps": 14290, "total_steps": 37885, "loss": 0.1894, "lr": 1.567387923022065e-06, "epoch": 1.8859707008050681, "percentage": 37.72, "elapsed_time": "0:20:52", "remaining_time": "0:34:28", "throughput": 5614.16, "total_tokens": 7033216}
|
|
{"current_steps": 14295, "total_steps": 37885, "loss": 0.0037, "lr": 1.567008500660942e-06, "epoch": 1.8866305925828164, "percentage": 37.73, "elapsed_time": "0:20:53", "remaining_time": "0:34:27", "throughput": 5614.65, "total_tokens": 7035712}
|
|
{"current_steps": 14300, "total_steps": 37885, "loss": 0.0039, "lr": 1.5666289579563708e-06, "epoch": 1.887290484360565, "percentage": 37.75, "elapsed_time": "0:20:53", "remaining_time": "0:34:27", "throughput": 5615.34, "total_tokens": 7038464}
|
|
{"current_steps": 14305, "total_steps": 37885, "loss": 0.142, "lr": 1.5662492949889065e-06, "epoch": 1.8879503761383134, "percentage": 37.76, "elapsed_time": "0:20:53", "remaining_time": "0:34:26", "throughput": 5615.74, "total_tokens": 7040832}
|
|
{"current_steps": 14310, "total_steps": 37885, "loss": 0.002, "lr": 1.5658695118391299e-06, "epoch": 1.8886102679160617, "percentage": 37.77, "elapsed_time": "0:20:54", "remaining_time": "0:34:26", "throughput": 5616.33, "total_tokens": 7043456}
|
|
{"current_steps": 14315, "total_steps": 37885, "loss": 0.0934, "lr": 1.5654896085876468e-06, "epoch": 1.8892701596938102, "percentage": 37.79, "elapsed_time": "0:20:54", "remaining_time": "0:34:25", "throughput": 5616.88, "total_tokens": 7046016}
|
|
{"current_steps": 14320, "total_steps": 37885, "loss": 0.0622, "lr": 1.5651095853150893e-06, "epoch": 1.8899300514715587, "percentage": 37.8, "elapsed_time": "0:20:54", "remaining_time": "0:34:24", "throughput": 5617.54, "total_tokens": 7048704}
|
|
{"current_steps": 14325, "total_steps": 37885, "loss": 0.1235, "lr": 1.5647294421021144e-06, "epoch": 1.890589943249307, "percentage": 37.81, "elapsed_time": "0:20:55", "remaining_time": "0:34:24", "throughput": 5617.86, "total_tokens": 7050944}
|
|
{"current_steps": 14330, "total_steps": 37885, "loss": 0.0469, "lr": 1.5643491790294054e-06, "epoch": 1.8912498350270557, "percentage": 37.82, "elapsed_time": "0:20:55", "remaining_time": "0:34:23", "throughput": 5618.28, "total_tokens": 7053312}
|
|
{"current_steps": 14335, "total_steps": 37885, "loss": 0.0867, "lr": 1.5639687961776695e-06, "epoch": 1.891909726804804, "percentage": 37.84, "elapsed_time": "0:20:55", "remaining_time": "0:34:22", "throughput": 5618.89, "total_tokens": 7055936}
|
|
{"current_steps": 14340, "total_steps": 37885, "loss": 0.0129, "lr": 1.5635882936276403e-06, "epoch": 1.8925696185825525, "percentage": 37.85, "elapsed_time": "0:20:56", "remaining_time": "0:34:22", "throughput": 5619.58, "total_tokens": 7058688}
|
|
{"current_steps": 14345, "total_steps": 37885, "loss": 0.0382, "lr": 1.5632076714600773e-06, "epoch": 1.893229510360301, "percentage": 37.86, "elapsed_time": "0:20:56", "remaining_time": "0:34:21", "throughput": 5620.17, "total_tokens": 7061312}
|
|
{"current_steps": 14350, "total_steps": 37885, "loss": 0.0011, "lr": 1.5628269297557644e-06, "epoch": 1.8938894021380492, "percentage": 37.88, "elapsed_time": "0:20:56", "remaining_time": "0:34:21", "throughput": 5620.68, "total_tokens": 7063808}
|
|
{"current_steps": 14355, "total_steps": 37885, "loss": 0.1335, "lr": 1.5624460685955115e-06, "epoch": 1.894549293915798, "percentage": 37.89, "elapsed_time": "0:20:57", "remaining_time": "0:34:20", "throughput": 5621.25, "total_tokens": 7066368}
|
|
{"current_steps": 14360, "total_steps": 37885, "loss": 0.1012, "lr": 1.562065088060153e-06, "epoch": 1.8952091856935462, "percentage": 37.9, "elapsed_time": "0:20:57", "remaining_time": "0:34:19", "throughput": 5621.85, "total_tokens": 7068992}
|
|
{"current_steps": 14365, "total_steps": 37885, "loss": 0.0007, "lr": 1.5616839882305498e-06, "epoch": 1.8958690774712947, "percentage": 37.92, "elapsed_time": "0:20:57", "remaining_time": "0:34:19", "throughput": 5622.22, "total_tokens": 7071296}
|
|
{"current_steps": 14370, "total_steps": 37885, "loss": 0.0008, "lr": 1.5613027691875877e-06, "epoch": 1.8965289692490432, "percentage": 37.93, "elapsed_time": "0:20:58", "remaining_time": "0:34:18", "throughput": 5622.53, "total_tokens": 7073536}
|
|
{"current_steps": 14375, "total_steps": 37885, "loss": 0.1195, "lr": 1.5609214310121775e-06, "epoch": 1.8971888610267915, "percentage": 37.94, "elapsed_time": "0:20:58", "remaining_time": "0:34:18", "throughput": 5623.18, "total_tokens": 7076224}
|
|
{"current_steps": 14380, "total_steps": 37885, "loss": 0.0009, "lr": 1.5605399737852554e-06, "epoch": 1.89784875280454, "percentage": 37.96, "elapsed_time": "0:20:58", "remaining_time": "0:34:17", "throughput": 5623.5, "total_tokens": 7078464}
|
|
{"current_steps": 14385, "total_steps": 37885, "loss": 0.0817, "lr": 1.560158397587783e-06, "epoch": 1.8985086445822885, "percentage": 37.97, "elapsed_time": "0:20:59", "remaining_time": "0:34:16", "throughput": 5624.2, "total_tokens": 7081216}
|
|
{"current_steps": 14390, "total_steps": 37885, "loss": 0.1318, "lr": 1.559776702500747e-06, "epoch": 1.8991685363600368, "percentage": 37.98, "elapsed_time": "0:20:59", "remaining_time": "0:34:16", "throughput": 5624.65, "total_tokens": 7083648}
|
|
{"current_steps": 14395, "total_steps": 37885, "loss": 0.0004, "lr": 1.5593948886051592e-06, "epoch": 1.8998284281377855, "percentage": 38.0, "elapsed_time": "0:20:59", "remaining_time": "0:34:15", "throughput": 5625.02, "total_tokens": 7085952}
|
|
{"current_steps": 14400, "total_steps": 37885, "loss": 0.1321, "lr": 1.5590129559820575e-06, "epoch": 1.9004883199155338, "percentage": 38.01, "elapsed_time": "0:21:00", "remaining_time": "0:34:15", "throughput": 5625.67, "total_tokens": 7088640}
|
|
{"current_steps": 14405, "total_steps": 37885, "loss": 0.0028, "lr": 1.5586309047125039e-06, "epoch": 1.9011482116932823, "percentage": 38.02, "elapsed_time": "0:21:00", "remaining_time": "0:34:14", "throughput": 5626.42, "total_tokens": 7091456}
|
|
{"current_steps": 14410, "total_steps": 37885, "loss": 0.0625, "lr": 1.5582487348775862e-06, "epoch": 1.9018081034710308, "percentage": 38.04, "elapsed_time": "0:21:00", "remaining_time": "0:34:13", "throughput": 5626.93, "total_tokens": 7093952}
|
|
{"current_steps": 14415, "total_steps": 37885, "loss": 0.0348, "lr": 1.5578664465584168e-06, "epoch": 1.902467995248779, "percentage": 38.05, "elapsed_time": "0:21:01", "remaining_time": "0:34:13", "throughput": 5627.31, "total_tokens": 7096256}
|
|
{"current_steps": 14420, "total_steps": 37885, "loss": 0.1055, "lr": 1.5574840398361339e-06, "epoch": 1.9031278870265278, "percentage": 38.06, "elapsed_time": "0:21:01", "remaining_time": "0:34:12", "throughput": 5627.86, "total_tokens": 7098816}
|
|
{"current_steps": 14425, "total_steps": 37885, "loss": 0.0709, "lr": 1.5571015147919005e-06, "epoch": 1.903787778804276, "percentage": 38.08, "elapsed_time": "0:21:01", "remaining_time": "0:34:11", "throughput": 5628.19, "total_tokens": 7101056}
|
|
{"current_steps": 14430, "total_steps": 37885, "loss": 0.1621, "lr": 1.5567188715069048e-06, "epoch": 1.9044476705820246, "percentage": 38.09, "elapsed_time": "0:21:02", "remaining_time": "0:34:11", "throughput": 5628.66, "total_tokens": 7103488}
|
|
{"current_steps": 14435, "total_steps": 37885, "loss": 0.0179, "lr": 1.5563361100623604e-06, "epoch": 1.905107562359773, "percentage": 38.1, "elapsed_time": "0:21:02", "remaining_time": "0:34:10", "throughput": 5629.03, "total_tokens": 7105792}
|
|
{"current_steps": 14440, "total_steps": 37885, "loss": 0.0417, "lr": 1.555953230539505e-06, "epoch": 1.9057674541375214, "percentage": 38.12, "elapsed_time": "0:21:02", "remaining_time": "0:34:10", "throughput": 5629.31, "total_tokens": 7107968}
|
|
{"current_steps": 14445, "total_steps": 37885, "loss": 0.0521, "lr": 1.5555702330196021e-06, "epoch": 1.9064273459152699, "percentage": 38.13, "elapsed_time": "0:21:03", "remaining_time": "0:34:09", "throughput": 5629.91, "total_tokens": 7110592}
|
|
{"current_steps": 14450, "total_steps": 37885, "loss": 0.2691, "lr": 1.5551871175839406e-06, "epoch": 1.9070872376930184, "percentage": 38.14, "elapsed_time": "0:21:03", "remaining_time": "0:34:08", "throughput": 5630.13, "total_tokens": 7112704}
|
|
{"current_steps": 14455, "total_steps": 37885, "loss": 0.0585, "lr": 1.5548038843138338e-06, "epoch": 1.9077471294707666, "percentage": 38.15, "elapsed_time": "0:21:03", "remaining_time": "0:34:08", "throughput": 5630.64, "total_tokens": 7115200}
|
|
{"current_steps": 14460, "total_steps": 37885, "loss": 0.1164, "lr": 1.5544205332906201e-06, "epoch": 1.9084070212485154, "percentage": 38.17, "elapsed_time": "0:21:03", "remaining_time": "0:34:07", "throughput": 5631.11, "total_tokens": 7117632}
|
|
{"current_steps": 14465, "total_steps": 37885, "loss": 0.221, "lr": 1.554037064595663e-06, "epoch": 1.9090669130262636, "percentage": 38.18, "elapsed_time": "0:21:04", "remaining_time": "0:34:07", "throughput": 5631.58, "total_tokens": 7120064}
|
|
{"current_steps": 14470, "total_steps": 37885, "loss": 0.1352, "lr": 1.553653478310351e-06, "epoch": 1.9097268048040121, "percentage": 38.19, "elapsed_time": "0:21:04", "remaining_time": "0:34:06", "throughput": 5632.14, "total_tokens": 7122624}
|
|
{"current_steps": 14475, "total_steps": 37885, "loss": 0.0808, "lr": 1.5532697745160972e-06, "epoch": 1.9103866965817606, "percentage": 38.21, "elapsed_time": "0:21:04", "remaining_time": "0:34:05", "throughput": 5632.55, "total_tokens": 7124992}
|
|
{"current_steps": 14480, "total_steps": 37885, "loss": 0.0018, "lr": 1.5528859532943405e-06, "epoch": 1.911046588359509, "percentage": 38.22, "elapsed_time": "0:21:05", "remaining_time": "0:34:05", "throughput": 5633.01, "total_tokens": 7127424}
|
|
{"current_steps": 14485, "total_steps": 37885, "loss": 0.0209, "lr": 1.552502014726544e-06, "epoch": 1.9117064801372576, "percentage": 38.23, "elapsed_time": "0:21:05", "remaining_time": "0:34:04", "throughput": 5633.52, "total_tokens": 7129920}
|
|
{"current_steps": 14490, "total_steps": 37885, "loss": 0.0735, "lr": 1.5521179588941956e-06, "epoch": 1.912366371915006, "percentage": 38.25, "elapsed_time": "0:21:05", "remaining_time": "0:34:03", "throughput": 5633.97, "total_tokens": 7132352}
|
|
{"current_steps": 14495, "total_steps": 37885, "loss": 0.0481, "lr": 1.5517337858788087e-06, "epoch": 1.9130262636927544, "percentage": 38.26, "elapsed_time": "0:21:06", "remaining_time": "0:34:03", "throughput": 5634.48, "total_tokens": 7134848}
|
|
{"current_steps": 14500, "total_steps": 37885, "loss": 0.0019, "lr": 1.551349495761921e-06, "epoch": 1.913686155470503, "percentage": 38.27, "elapsed_time": "0:21:06", "remaining_time": "0:34:02", "throughput": 5634.96, "total_tokens": 7137344}
|
|
{"current_steps": 14505, "total_steps": 37885, "loss": 0.1998, "lr": 1.550965088625095e-06, "epoch": 1.9143460472482512, "percentage": 38.29, "elapsed_time": "0:21:06", "remaining_time": "0:34:02", "throughput": 5635.55, "total_tokens": 7139968}
|
|
{"current_steps": 14510, "total_steps": 37885, "loss": 0.0009, "lr": 1.5505805645499192e-06, "epoch": 1.9150059390259997, "percentage": 38.3, "elapsed_time": "0:21:07", "remaining_time": "0:34:01", "throughput": 5636.05, "total_tokens": 7142464}
|
|
{"current_steps": 14515, "total_steps": 37885, "loss": 0.0447, "lr": 1.5501959236180053e-06, "epoch": 1.9156658308037482, "percentage": 38.31, "elapsed_time": "0:21:07", "remaining_time": "0:34:00", "throughput": 5636.64, "total_tokens": 7145088}
|
|
{"current_steps": 14520, "total_steps": 37885, "loss": 0.0005, "lr": 1.5498111659109908e-06, "epoch": 1.9163257225814965, "percentage": 38.33, "elapsed_time": "0:21:07", "remaining_time": "0:34:00", "throughput": 5637.26, "total_tokens": 7147712}
|
|
{"current_steps": 14525, "total_steps": 37885, "loss": 0.1215, "lr": 1.549426291510538e-06, "epoch": 1.9169856143592452, "percentage": 38.34, "elapsed_time": "0:21:08", "remaining_time": "0:33:59", "throughput": 5637.73, "total_tokens": 7150144}
|
|
{"current_steps": 14530, "total_steps": 37885, "loss": 0.215, "lr": 1.5490413004983334e-06, "epoch": 1.9176455061369935, "percentage": 38.35, "elapsed_time": "0:21:08", "remaining_time": "0:33:59", "throughput": 5638.24, "total_tokens": 7152640}
|
|
{"current_steps": 14535, "total_steps": 37885, "loss": 0.1271, "lr": 1.5486561929560887e-06, "epoch": 1.918305397914742, "percentage": 38.37, "elapsed_time": "0:21:08", "remaining_time": "0:33:58", "throughput": 5638.78, "total_tokens": 7155200}
|
|
{"current_steps": 14540, "total_steps": 37885, "loss": 0.0219, "lr": 1.5482709689655398e-06, "epoch": 1.9189652896924905, "percentage": 38.38, "elapsed_time": "0:21:09", "remaining_time": "0:33:57", "throughput": 5639.04, "total_tokens": 7157376}
|
|
{"current_steps": 14545, "total_steps": 37885, "loss": 0.0957, "lr": 1.5478856286084483e-06, "epoch": 1.9196251814702388, "percentage": 38.39, "elapsed_time": "0:21:09", "remaining_time": "0:33:57", "throughput": 5639.64, "total_tokens": 7160000}
|
|
{"current_steps": 14550, "total_steps": 37885, "loss": 0.1187, "lr": 1.5475001719665994e-06, "epoch": 1.9202850732479875, "percentage": 38.41, "elapsed_time": "0:21:09", "remaining_time": "0:33:56", "throughput": 5640.11, "total_tokens": 7162432}
|
|
{"current_steps": 14555, "total_steps": 37885, "loss": 0.256, "lr": 1.5471145991218037e-06, "epoch": 1.9209449650257358, "percentage": 38.42, "elapsed_time": "0:21:10", "remaining_time": "0:33:56", "throughput": 5640.47, "total_tokens": 7164736}
|
|
{"current_steps": 14560, "total_steps": 37885, "loss": 0.0013, "lr": 1.5467289101558962e-06, "epoch": 1.9216048568034843, "percentage": 38.43, "elapsed_time": "0:21:10", "remaining_time": "0:33:55", "throughput": 5640.87, "total_tokens": 7167104}
|
|
{"current_steps": 14565, "total_steps": 37885, "loss": 0.0016, "lr": 1.5463431051507368e-06, "epoch": 1.9222647485812328, "percentage": 38.45, "elapsed_time": "0:21:10", "remaining_time": "0:33:54", "throughput": 5641.6, "total_tokens": 7169920}
|
|
{"current_steps": 14570, "total_steps": 37885, "loss": 0.0322, "lr": 1.5459571841882095e-06, "epoch": 1.922924640358981, "percentage": 38.46, "elapsed_time": "0:21:11", "remaining_time": "0:33:54", "throughput": 5641.97, "total_tokens": 7172224}
|
|
{"current_steps": 14575, "total_steps": 37885, "loss": 0.0529, "lr": 1.5455711473502233e-06, "epoch": 1.9235845321367295, "percentage": 38.47, "elapsed_time": "0:21:11", "remaining_time": "0:33:53", "throughput": 5642.57, "total_tokens": 7174848}
|
|
{"current_steps": 14580, "total_steps": 37885, "loss": 0.1527, "lr": 1.5451849947187121e-06, "epoch": 1.924244423914478, "percentage": 38.48, "elapsed_time": "0:21:11", "remaining_time": "0:33:53", "throughput": 5643.21, "total_tokens": 7177536}
|
|
{"current_steps": 14585, "total_steps": 37885, "loss": 0.1494, "lr": 1.5447987263756335e-06, "epoch": 1.9249043156922263, "percentage": 38.5, "elapsed_time": "0:21:12", "remaining_time": "0:33:52", "throughput": 5643.49, "total_tokens": 7179712}
|
|
{"current_steps": 14590, "total_steps": 37885, "loss": 0.0758, "lr": 1.5444123424029703e-06, "epoch": 1.925564207469975, "percentage": 38.51, "elapsed_time": "0:21:12", "remaining_time": "0:33:51", "throughput": 5643.85, "total_tokens": 7182016}
|
|
{"current_steps": 14595, "total_steps": 37885, "loss": 0.0535, "lr": 1.5440258428827298e-06, "epoch": 1.9262240992477233, "percentage": 38.52, "elapsed_time": "0:21:12", "remaining_time": "0:33:51", "throughput": 5644.44, "total_tokens": 7184640}
|
|
{"current_steps": 14600, "total_steps": 37885, "loss": 0.0964, "lr": 1.5436392278969438e-06, "epoch": 1.9268839910254718, "percentage": 38.54, "elapsed_time": "0:21:13", "remaining_time": "0:33:50", "throughput": 5644.89, "total_tokens": 7187072}
|
|
{"current_steps": 14605, "total_steps": 37885, "loss": 0.0184, "lr": 1.5432524975276681e-06, "epoch": 1.9275438828032203, "percentage": 38.55, "elapsed_time": "0:21:13", "remaining_time": "0:33:49", "throughput": 5645.21, "total_tokens": 7189312}
|
|
{"current_steps": 14610, "total_steps": 37885, "loss": 0.0008, "lr": 1.5428656518569838e-06, "epoch": 1.9282037745809686, "percentage": 38.56, "elapsed_time": "0:21:13", "remaining_time": "0:33:49", "throughput": 5645.63, "total_tokens": 7191680}
|
|
{"current_steps": 14615, "total_steps": 37885, "loss": 0.2284, "lr": 1.5424786909669962e-06, "epoch": 1.9288636663587173, "percentage": 38.58, "elapsed_time": "0:21:14", "remaining_time": "0:33:48", "throughput": 5646.04, "total_tokens": 7194048}
|
|
{"current_steps": 14620, "total_steps": 37885, "loss": 0.0006, "lr": 1.5420916149398346e-06, "epoch": 1.9295235581364656, "percentage": 38.59, "elapsed_time": "0:21:14", "remaining_time": "0:33:48", "throughput": 5646.72, "total_tokens": 7196800}
|
|
{"current_steps": 14625, "total_steps": 37885, "loss": 0.0009, "lr": 1.5417044238576533e-06, "epoch": 1.930183449914214, "percentage": 38.6, "elapsed_time": "0:21:14", "remaining_time": "0:33:47", "throughput": 5647.16, "total_tokens": 7199232}
|
|
{"current_steps": 14630, "total_steps": 37885, "loss": 0.1908, "lr": 1.5413171178026308e-06, "epoch": 1.9308433416919626, "percentage": 38.62, "elapsed_time": "0:21:15", "remaining_time": "0:33:46", "throughput": 5647.71, "total_tokens": 7201792}
|
|
{"current_steps": 14635, "total_steps": 37885, "loss": 0.073, "lr": 1.5409296968569698e-06, "epoch": 1.9315032334697109, "percentage": 38.63, "elapsed_time": "0:21:15", "remaining_time": "0:33:46", "throughput": 5648.25, "total_tokens": 7204352}
|
|
{"current_steps": 14640, "total_steps": 37885, "loss": 0.0874, "lr": 1.540542161102898e-06, "epoch": 1.9321631252474594, "percentage": 38.64, "elapsed_time": "0:21:15", "remaining_time": "0:33:45", "throughput": 5648.78, "total_tokens": 7206912}
|
|
{"current_steps": 14645, "total_steps": 37885, "loss": 0.0365, "lr": 1.5401545106226665e-06, "epoch": 1.9328230170252079, "percentage": 38.66, "elapsed_time": "0:21:16", "remaining_time": "0:33:45", "throughput": 5649.09, "total_tokens": 7209152}
|
|
{"current_steps": 14650, "total_steps": 37885, "loss": 0.1238, "lr": 1.539766745498552e-06, "epoch": 1.9334829088029564, "percentage": 38.67, "elapsed_time": "0:21:16", "remaining_time": "0:33:44", "throughput": 5649.65, "total_tokens": 7211712}
|
|
{"current_steps": 14655, "total_steps": 37885, "loss": 0.0643, "lr": 1.5393788658128542e-06, "epoch": 1.9341428005807049, "percentage": 38.68, "elapsed_time": "0:21:16", "remaining_time": "0:33:43", "throughput": 5650.1, "total_tokens": 7214144}
|
|
{"current_steps": 14660, "total_steps": 37885, "loss": 0.0293, "lr": 1.538990871647898e-06, "epoch": 1.9348026923584531, "percentage": 38.7, "elapsed_time": "0:21:17", "remaining_time": "0:33:43", "throughput": 5650.69, "total_tokens": 7216768}
|
|
{"current_steps": 14665, "total_steps": 37885, "loss": 0.05, "lr": 1.5386027630860324e-06, "epoch": 1.9354625841362016, "percentage": 38.71, "elapsed_time": "0:21:17", "remaining_time": "0:33:42", "throughput": 5651.14, "total_tokens": 7219200}
|
|
{"current_steps": 14670, "total_steps": 37885, "loss": 0.05, "lr": 1.5382145402096307e-06, "epoch": 1.9361224759139501, "percentage": 38.72, "elapsed_time": "0:21:17", "remaining_time": "0:33:42", "throughput": 5651.68, "total_tokens": 7221760}
|
|
{"current_steps": 14675, "total_steps": 37885, "loss": 0.0418, "lr": 1.53782620310109e-06, "epoch": 1.9367823676916984, "percentage": 38.74, "elapsed_time": "0:21:18", "remaining_time": "0:33:41", "throughput": 5652.13, "total_tokens": 7224192}
|
|
{"current_steps": 14680, "total_steps": 37885, "loss": 0.0005, "lr": 1.5374377518428324e-06, "epoch": 1.9374422594694471, "percentage": 38.75, "elapsed_time": "0:21:18", "remaining_time": "0:33:40", "throughput": 5652.53, "total_tokens": 7226560}
|
|
{"current_steps": 14685, "total_steps": 37885, "loss": 0.0937, "lr": 1.5370491865173042e-06, "epoch": 1.9381021512471954, "percentage": 38.76, "elapsed_time": "0:21:18", "remaining_time": "0:33:40", "throughput": 5653.02, "total_tokens": 7229056}
|
|
{"current_steps": 14690, "total_steps": 37885, "loss": 0.0438, "lr": 1.5366605072069747e-06, "epoch": 1.938762043024944, "percentage": 38.78, "elapsed_time": "0:21:19", "remaining_time": "0:33:39", "throughput": 5653.73, "total_tokens": 7231872}
|
|
{"current_steps": 14695, "total_steps": 37885, "loss": 0.0753, "lr": 1.5362717139943392e-06, "epoch": 1.9394219348026924, "percentage": 38.79, "elapsed_time": "0:21:19", "remaining_time": "0:33:39", "throughput": 5654.14, "total_tokens": 7234240}
|
|
{"current_steps": 14700, "total_steps": 37885, "loss": 0.2984, "lr": 1.5358828069619155e-06, "epoch": 1.9400818265804407, "percentage": 38.8, "elapsed_time": "0:21:19", "remaining_time": "0:33:38", "throughput": 5654.68, "total_tokens": 7236800}
|
|
{"current_steps": 14705, "total_steps": 37885, "loss": 0.0489, "lr": 1.5354937861922463e-06, "epoch": 1.9407417183581892, "percentage": 38.81, "elapsed_time": "0:21:20", "remaining_time": "0:33:37", "throughput": 5655.25, "total_tokens": 7239424}
|
|
{"current_steps": 14710, "total_steps": 37885, "loss": 0.2381, "lr": 1.5351046517678989e-06, "epoch": 1.9414016101359377, "percentage": 38.83, "elapsed_time": "0:21:20", "remaining_time": "0:33:37", "throughput": 5655.75, "total_tokens": 7241920}
|
|
{"current_steps": 14715, "total_steps": 37885, "loss": 0.1093, "lr": 1.534715403771464e-06, "epoch": 1.9420615019136862, "percentage": 38.84, "elapsed_time": "0:21:20", "remaining_time": "0:33:36", "throughput": 5656.06, "total_tokens": 7244160}
|
|
{"current_steps": 14720, "total_steps": 37885, "loss": 0.0006, "lr": 1.5343260422855573e-06, "epoch": 1.9427213936914347, "percentage": 38.85, "elapsed_time": "0:21:21", "remaining_time": "0:33:36", "throughput": 5656.51, "total_tokens": 7246592}
|
|
{"current_steps": 14725, "total_steps": 37885, "loss": 0.0014, "lr": 1.5339365673928168e-06, "epoch": 1.943381285469183, "percentage": 38.87, "elapsed_time": "0:21:21", "remaining_time": "0:33:35", "throughput": 5657.14, "total_tokens": 7249280}
|
|
{"current_steps": 14730, "total_steps": 37885, "loss": 0.0014, "lr": 1.5335469791759068e-06, "epoch": 1.9440411772469315, "percentage": 38.88, "elapsed_time": "0:21:21", "remaining_time": "0:33:34", "throughput": 5657.61, "total_tokens": 7251712}
|
|
{"current_steps": 14735, "total_steps": 37885, "loss": 0.0009, "lr": 1.5331572777175137e-06, "epoch": 1.94470106902468, "percentage": 38.89, "elapsed_time": "0:21:22", "remaining_time": "0:33:34", "throughput": 5658.06, "total_tokens": 7254144}
|
|
{"current_steps": 14740, "total_steps": 37885, "loss": 0.0006, "lr": 1.5327674631003493e-06, "epoch": 1.9453609608024283, "percentage": 38.91, "elapsed_time": "0:21:22", "remaining_time": "0:33:33", "throughput": 5658.56, "total_tokens": 7256640}
|
|
{"current_steps": 14745, "total_steps": 37885, "loss": 0.2108, "lr": 1.5323775354071491e-06, "epoch": 1.946020852580177, "percentage": 38.92, "elapsed_time": "0:21:22", "remaining_time": "0:33:33", "throughput": 5659.14, "total_tokens": 7259264}
|
|
{"current_steps": 14750, "total_steps": 37885, "loss": 0.0767, "lr": 1.531987494720672e-06, "epoch": 1.9466807443579253, "percentage": 38.93, "elapsed_time": "0:21:23", "remaining_time": "0:33:32", "throughput": 5659.64, "total_tokens": 7261760}
|
|
{"current_steps": 14755, "total_steps": 37885, "loss": 0.1239, "lr": 1.5315973411237016e-06, "epoch": 1.9473406361356738, "percentage": 38.95, "elapsed_time": "0:21:23", "remaining_time": "0:33:31", "throughput": 5660.04, "total_tokens": 7264128}
|
|
{"current_steps": 14760, "total_steps": 37885, "loss": 0.0343, "lr": 1.531207074699045e-06, "epoch": 1.9480005279134223, "percentage": 38.96, "elapsed_time": "0:21:23", "remaining_time": "0:33:31", "throughput": 5660.4, "total_tokens": 7266432}
|
|
{"current_steps": 14765, "total_steps": 37885, "loss": 0.2684, "lr": 1.5308166955295334e-06, "epoch": 1.9486604196911705, "percentage": 38.97, "elapsed_time": "0:21:24", "remaining_time": "0:33:30", "throughput": 5660.8, "total_tokens": 7268800}
|
|
{"current_steps": 14770, "total_steps": 37885, "loss": 0.0569, "lr": 1.5304262036980221e-06, "epoch": 1.949320311468919, "percentage": 38.99, "elapsed_time": "0:21:24", "remaining_time": "0:33:30", "throughput": 5661.42, "total_tokens": 7271488}
|
|
{"current_steps": 14775, "total_steps": 37885, "loss": 0.0474, "lr": 1.5300355992873903e-06, "epoch": 1.9499802032466675, "percentage": 39.0, "elapsed_time": "0:21:24", "remaining_time": "0:33:29", "throughput": 5661.9, "total_tokens": 7273984}
|
|
{"current_steps": 14780, "total_steps": 37885, "loss": 0.0888, "lr": 1.5296448823805407e-06, "epoch": 1.950640095024416, "percentage": 39.01, "elapsed_time": "0:21:25", "remaining_time": "0:33:28", "throughput": 5662.63, "total_tokens": 7276800}
|
|
{"current_steps": 14785, "total_steps": 37885, "loss": 0.001, "lr": 1.5292540530603998e-06, "epoch": 1.9512999868021645, "percentage": 39.03, "elapsed_time": "0:21:25", "remaining_time": "0:33:28", "throughput": 5663.33, "total_tokens": 7279616}
|
|
{"current_steps": 14790, "total_steps": 37885, "loss": 0.0512, "lr": 1.5288631114099193e-06, "epoch": 1.9519598785799128, "percentage": 39.04, "elapsed_time": "0:21:25", "remaining_time": "0:33:27", "throughput": 5663.64, "total_tokens": 7281856}
|
|
{"current_steps": 14795, "total_steps": 37885, "loss": 0.0006, "lr": 1.528472057512073e-06, "epoch": 1.9526197703576613, "percentage": 39.05, "elapsed_time": "0:21:26", "remaining_time": "0:33:27", "throughput": 5664.25, "total_tokens": 7284544}
|
|
{"current_steps": 14800, "total_steps": 37885, "loss": 0.0009, "lr": 1.5280808914498593e-06, "epoch": 1.9532796621354098, "percentage": 39.07, "elapsed_time": "0:21:26", "remaining_time": "0:33:26", "throughput": 5664.75, "total_tokens": 7287040}
|
|
{"current_steps": 14805, "total_steps": 37885, "loss": 0.0468, "lr": 1.5276896133063e-06, "epoch": 1.953939553913158, "percentage": 39.08, "elapsed_time": "0:21:26", "remaining_time": "0:33:25", "throughput": 5665.47, "total_tokens": 7289856}
|
|
{"current_steps": 14810, "total_steps": 37885, "loss": 0.1041, "lr": 1.5272982231644421e-06, "epoch": 1.9545994456909068, "percentage": 39.09, "elapsed_time": "0:21:27", "remaining_time": "0:33:25", "throughput": 5665.77, "total_tokens": 7292096}
|
|
{"current_steps": 14815, "total_steps": 37885, "loss": 0.1564, "lr": 1.5269067211073545e-06, "epoch": 1.955259337468655, "percentage": 39.11, "elapsed_time": "0:21:27", "remaining_time": "0:33:24", "throughput": 5666.35, "total_tokens": 7294720}
|
|
{"current_steps": 14820, "total_steps": 37885, "loss": 0.2679, "lr": 1.5265151072181309e-06, "epoch": 1.9559192292464036, "percentage": 39.12, "elapsed_time": "0:21:27", "remaining_time": "0:33:24", "throughput": 5666.94, "total_tokens": 7297344}
|
|
{"current_steps": 14825, "total_steps": 37885, "loss": 0.1056, "lr": 1.5261233815798886e-06, "epoch": 1.956579121024152, "percentage": 39.13, "elapsed_time": "0:21:28", "remaining_time": "0:33:23", "throughput": 5667.38, "total_tokens": 7299776}
|
|
{"current_steps": 14830, "total_steps": 37885, "loss": 0.0649, "lr": 1.5257315442757682e-06, "epoch": 1.9572390128019004, "percentage": 39.14, "elapsed_time": "0:21:28", "remaining_time": "0:33:22", "throughput": 5667.88, "total_tokens": 7302272}
|
|
{"current_steps": 14835, "total_steps": 37885, "loss": 0.0355, "lr": 1.5253395953889349e-06, "epoch": 1.957898904579649, "percentage": 39.16, "elapsed_time": "0:21:28", "remaining_time": "0:33:22", "throughput": 5668.22, "total_tokens": 7304576}
|
|
{"current_steps": 14840, "total_steps": 37885, "loss": 0.0764, "lr": 1.5249475350025764e-06, "epoch": 1.9585587963573974, "percentage": 39.17, "elapsed_time": "0:21:29", "remaining_time": "0:33:21", "throughput": 5668.97, "total_tokens": 7307456}
|
|
{"current_steps": 14845, "total_steps": 37885, "loss": 0.0557, "lr": 1.5245553631999054e-06, "epoch": 1.9592186881351459, "percentage": 39.18, "elapsed_time": "0:21:29", "remaining_time": "0:33:21", "throughput": 5669.36, "total_tokens": 7309824}
|
|
{"current_steps": 14850, "total_steps": 37885, "loss": 0.0007, "lr": 1.5241630800641567e-06, "epoch": 1.9598785799128944, "percentage": 39.2, "elapsed_time": "0:21:29", "remaining_time": "0:33:20", "throughput": 5669.77, "total_tokens": 7312192}
|
|
{"current_steps": 14855, "total_steps": 37885, "loss": 0.0836, "lr": 1.5237706856785898e-06, "epoch": 1.9605384716906427, "percentage": 39.21, "elapsed_time": "0:21:30", "remaining_time": "0:33:19", "throughput": 5670.07, "total_tokens": 7314432}
|
|
{"current_steps": 14860, "total_steps": 37885, "loss": 0.0521, "lr": 1.523378180126488e-06, "epoch": 1.9611983634683912, "percentage": 39.22, "elapsed_time": "0:21:30", "remaining_time": "0:33:19", "throughput": 5670.56, "total_tokens": 7316928}
|
|
{"current_steps": 14865, "total_steps": 37885, "loss": 0.0006, "lr": 1.5229855634911575e-06, "epoch": 1.9618582552461397, "percentage": 39.24, "elapsed_time": "0:21:30", "remaining_time": "0:33:18", "throughput": 5671.18, "total_tokens": 7319616}
|
|
{"current_steps": 14870, "total_steps": 37885, "loss": 0.002, "lr": 1.5225928358559283e-06, "epoch": 1.962518147023888, "percentage": 39.25, "elapsed_time": "0:21:30", "remaining_time": "0:33:18", "throughput": 5671.47, "total_tokens": 7321856}
|
|
{"current_steps": 14875, "total_steps": 37885, "loss": 0.083, "lr": 1.522199997304154e-06, "epoch": 1.9631780388016367, "percentage": 39.26, "elapsed_time": "0:21:31", "remaining_time": "0:33:17", "throughput": 5671.86, "total_tokens": 7324224}
|
|
{"current_steps": 14880, "total_steps": 37885, "loss": 0.047, "lr": 1.5218070479192118e-06, "epoch": 1.963837930579385, "percentage": 39.28, "elapsed_time": "0:21:31", "remaining_time": "0:33:16", "throughput": 5672.31, "total_tokens": 7326656}
|
|
{"current_steps": 14885, "total_steps": 37885, "loss": 0.3865, "lr": 1.521413987784502e-06, "epoch": 1.9644978223571334, "percentage": 39.29, "elapsed_time": "0:21:31", "remaining_time": "0:33:16", "throughput": 5672.89, "total_tokens": 7329280}
|
|
{"current_steps": 14890, "total_steps": 37885, "loss": 0.1041, "lr": 1.5210208169834496e-06, "epoch": 1.965157714134882, "percentage": 39.3, "elapsed_time": "0:21:32", "remaining_time": "0:33:15", "throughput": 5673.29, "total_tokens": 7331648}
|
|
{"current_steps": 14895, "total_steps": 37885, "loss": 0.0013, "lr": 1.5206275355995013e-06, "epoch": 1.9658176059126302, "percentage": 39.32, "elapsed_time": "0:21:32", "remaining_time": "0:33:15", "throughput": 5673.78, "total_tokens": 7334144}
|
|
{"current_steps": 14900, "total_steps": 37885, "loss": 0.0005, "lr": 1.5202341437161288e-06, "epoch": 1.966477497690379, "percentage": 39.33, "elapsed_time": "0:21:32", "remaining_time": "0:33:14", "throughput": 5674.27, "total_tokens": 7336640}
|
|
{"current_steps": 14905, "total_steps": 37885, "loss": 0.0005, "lr": 1.5198406414168266e-06, "epoch": 1.9671373894681272, "percentage": 39.34, "elapsed_time": "0:21:33", "remaining_time": "0:33:13", "throughput": 5674.71, "total_tokens": 7339072}
|
|
{"current_steps": 14910, "total_steps": 37885, "loss": 0.0003, "lr": 1.5194470287851124e-06, "epoch": 1.9677972812458757, "percentage": 39.36, "elapsed_time": "0:21:33", "remaining_time": "0:33:13", "throughput": 5675.15, "total_tokens": 7341504}
|
|
{"current_steps": 14915, "total_steps": 37885, "loss": 0.149, "lr": 1.5190533059045284e-06, "epoch": 1.9684571730236242, "percentage": 39.37, "elapsed_time": "0:21:33", "remaining_time": "0:33:12", "throughput": 5675.59, "total_tokens": 7343936}
|
|
{"current_steps": 14920, "total_steps": 37885, "loss": 0.1659, "lr": 1.5186594728586383e-06, "epoch": 1.9691170648013725, "percentage": 39.38, "elapsed_time": "0:21:34", "remaining_time": "0:33:12", "throughput": 5676.03, "total_tokens": 7346368}
|
|
{"current_steps": 14925, "total_steps": 37885, "loss": 0.0452, "lr": 1.518265529731031e-06, "epoch": 1.969776956579121, "percentage": 39.4, "elapsed_time": "0:21:34", "remaining_time": "0:33:11", "throughput": 5676.47, "total_tokens": 7348800}
|
|
{"current_steps": 14930, "total_steps": 37885, "loss": 0.1882, "lr": 1.5178714766053185e-06, "epoch": 1.9704368483568695, "percentage": 39.41, "elapsed_time": "0:21:34", "remaining_time": "0:33:10", "throughput": 5676.63, "total_tokens": 7350848}
|
|
{"current_steps": 14935, "total_steps": 37885, "loss": 0.1838, "lr": 1.5174773135651347e-06, "epoch": 1.9710967401346178, "percentage": 39.42, "elapsed_time": "0:21:35", "remaining_time": "0:33:10", "throughput": 5677.1, "total_tokens": 7353344}
|
|
{"current_steps": 14940, "total_steps": 37885, "loss": 0.0747, "lr": 1.5170830406941386e-06, "epoch": 1.9717566319123665, "percentage": 39.44, "elapsed_time": "0:21:35", "remaining_time": "0:33:09", "throughput": 5677.53, "total_tokens": 7355776}
|
|
{"current_steps": 14945, "total_steps": 37885, "loss": 0.003, "lr": 1.5166886580760114e-06, "epoch": 1.9724165236901148, "percentage": 39.45, "elapsed_time": "0:21:35", "remaining_time": "0:33:09", "throughput": 5678.33, "total_tokens": 7358720}
|
|
{"current_steps": 14950, "total_steps": 37885, "loss": 0.0494, "lr": 1.5162941657944584e-06, "epoch": 1.9730764154678633, "percentage": 39.46, "elapsed_time": "0:21:36", "remaining_time": "0:33:08", "throughput": 5678.81, "total_tokens": 7361216}
|
|
{"current_steps": 14955, "total_steps": 37885, "loss": 0.0556, "lr": 1.5158995639332073e-06, "epoch": 1.9737363072456118, "percentage": 39.47, "elapsed_time": "0:21:36", "remaining_time": "0:33:08", "throughput": 5679.43, "total_tokens": 7363904}
|
|
{"current_steps": 14960, "total_steps": 37885, "loss": 0.1777, "lr": 1.5155048525760095e-06, "epoch": 1.97439619902336, "percentage": 39.49, "elapsed_time": "0:21:36", "remaining_time": "0:33:07", "throughput": 5679.91, "total_tokens": 7366400}
|
|
{"current_steps": 14965, "total_steps": 37885, "loss": 0.0443, "lr": 1.5151100318066396e-06, "epoch": 1.9750560908011088, "percentage": 39.5, "elapsed_time": "0:21:37", "remaining_time": "0:33:06", "throughput": 5680.39, "total_tokens": 7368896}
|
|
{"current_steps": 14970, "total_steps": 37885, "loss": 0.1217, "lr": 1.5147151017088958e-06, "epoch": 1.975715982578857, "percentage": 39.51, "elapsed_time": "0:21:37", "remaining_time": "0:33:06", "throughput": 5680.87, "total_tokens": 7371392}
|
|
{"current_steps": 14975, "total_steps": 37885, "loss": 0.0033, "lr": 1.514320062366599e-06, "epoch": 1.9763758743566056, "percentage": 39.53, "elapsed_time": "0:21:37", "remaining_time": "0:33:05", "throughput": 5681.35, "total_tokens": 7373888}
|
|
{"current_steps": 14980, "total_steps": 37885, "loss": 0.0015, "lr": 1.513924913863593e-06, "epoch": 1.977035766134354, "percentage": 39.54, "elapsed_time": "0:21:38", "remaining_time": "0:33:05", "throughput": 5681.86, "total_tokens": 7376448}
|
|
{"current_steps": 14985, "total_steps": 37885, "loss": 0.1575, "lr": 1.513529656283746e-06, "epoch": 1.9776956579121023, "percentage": 39.55, "elapsed_time": "0:21:38", "remaining_time": "0:33:04", "throughput": 5682.12, "total_tokens": 7378624}
|
|
{"current_steps": 14990, "total_steps": 37885, "loss": 0.0303, "lr": 1.513134289710948e-06, "epoch": 1.9783555496898508, "percentage": 39.57, "elapsed_time": "0:21:38", "remaining_time": "0:33:03", "throughput": 5682.66, "total_tokens": 7381184}
|
|
{"current_steps": 14995, "total_steps": 37885, "loss": 0.0846, "lr": 1.5127388142291126e-06, "epoch": 1.9790154414675993, "percentage": 39.58, "elapsed_time": "0:21:39", "remaining_time": "0:33:03", "throughput": 5683.0, "total_tokens": 7383488}
|
|
{"current_steps": 15000, "total_steps": 37885, "loss": 0.0485, "lr": 1.5123432299221772e-06, "epoch": 1.9796753332453476, "percentage": 39.59, "elapsed_time": "0:21:39", "remaining_time": "0:33:02", "throughput": 5683.35, "total_tokens": 7385792}
|
|
{"current_steps": 15005, "total_steps": 37885, "loss": 0.0013, "lr": 1.5119475368741013e-06, "epoch": 1.9803352250230963, "percentage": 39.61, "elapsed_time": "0:21:39", "remaining_time": "0:33:02", "throughput": 5683.91, "total_tokens": 7388416}
|
|
{"current_steps": 15010, "total_steps": 37885, "loss": 0.0646, "lr": 1.5115517351688679e-06, "epoch": 1.9809951168008446, "percentage": 39.62, "elapsed_time": "0:21:40", "remaining_time": "0:33:01", "throughput": 5684.26, "total_tokens": 7390720}
|
|
{"current_steps": 15015, "total_steps": 37885, "loss": 0.0839, "lr": 1.5111558248904829e-06, "epoch": 1.9816550085785931, "percentage": 39.63, "elapsed_time": "0:21:40", "remaining_time": "0:33:00", "throughput": 5684.7, "total_tokens": 7393152}
|
|
{"current_steps": 15020, "total_steps": 37885, "loss": 0.1332, "lr": 1.5107598061229755e-06, "epoch": 1.9823149003563416, "percentage": 39.65, "elapsed_time": "0:21:40", "remaining_time": "0:33:00", "throughput": 5684.94, "total_tokens": 7395328}
|
|
{"current_steps": 15025, "total_steps": 37885, "loss": 0.1803, "lr": 1.510363678950398e-06, "epoch": 1.98297479213409, "percentage": 39.66, "elapsed_time": "0:21:41", "remaining_time": "0:32:59", "throughput": 5685.34, "total_tokens": 7397696}
|
|
{"current_steps": 15030, "total_steps": 37885, "loss": 0.0009, "lr": 1.509967443456826e-06, "epoch": 1.9836346839118386, "percentage": 39.67, "elapsed_time": "0:21:41", "remaining_time": "0:32:59", "throughput": 5685.63, "total_tokens": 7399936}
|
|
{"current_steps": 15035, "total_steps": 37885, "loss": 0.0032, "lr": 1.5095710997263562e-06, "epoch": 1.984294575689587, "percentage": 39.69, "elapsed_time": "0:21:41", "remaining_time": "0:32:58", "throughput": 5685.83, "total_tokens": 7402048}
|
|
{"current_steps": 15040, "total_steps": 37885, "loss": 0.0739, "lr": 1.509174647843111e-06, "epoch": 1.9849544674673354, "percentage": 39.7, "elapsed_time": "0:21:42", "remaining_time": "0:32:57", "throughput": 5686.13, "total_tokens": 7404288}
|
|
{"current_steps": 15045, "total_steps": 37885, "loss": 0.0578, "lr": 1.5087780878912335e-06, "epoch": 1.985614359245084, "percentage": 39.71, "elapsed_time": "0:21:42", "remaining_time": "0:32:57", "throughput": 5686.42, "total_tokens": 7406528}
|
|
{"current_steps": 15050, "total_steps": 37885, "loss": 0.0584, "lr": 1.5083814199548912e-06, "epoch": 1.9862742510228322, "percentage": 39.73, "elapsed_time": "0:21:42", "remaining_time": "0:32:56", "throughput": 5686.76, "total_tokens": 7408832}
|
|
{"current_steps": 15055, "total_steps": 37885, "loss": 0.144, "lr": 1.5079846441182744e-06, "epoch": 1.9869341428005807, "percentage": 39.74, "elapsed_time": "0:21:43", "remaining_time": "0:32:56", "throughput": 5687.43, "total_tokens": 7411584}
|
|
{"current_steps": 15060, "total_steps": 37885, "loss": 0.0525, "lr": 1.5075877604655948e-06, "epoch": 1.9875940345783292, "percentage": 39.75, "elapsed_time": "0:21:43", "remaining_time": "0:32:55", "throughput": 5687.91, "total_tokens": 7414080}
|
|
{"current_steps": 15065, "total_steps": 37885, "loss": 0.0009, "lr": 1.5071907690810892e-06, "epoch": 1.9882539263560775, "percentage": 39.77, "elapsed_time": "0:21:43", "remaining_time": "0:32:54", "throughput": 5688.24, "total_tokens": 7416384}
|
|
{"current_steps": 15070, "total_steps": 37885, "loss": 0.1545, "lr": 1.5067936700490153e-06, "epoch": 1.9889138181338262, "percentage": 39.78, "elapsed_time": "0:21:44", "remaining_time": "0:32:54", "throughput": 5688.62, "total_tokens": 7418752}
|
|
{"current_steps": 15075, "total_steps": 37885, "loss": 0.0006, "lr": 1.5063964634536553e-06, "epoch": 1.9895737099115745, "percentage": 39.79, "elapsed_time": "0:21:44", "remaining_time": "0:32:53", "throughput": 5688.92, "total_tokens": 7420992}
|
|
{"current_steps": 15080, "total_steps": 37885, "loss": 0.2178, "lr": 1.5059991493793124e-06, "epoch": 1.990233601689323, "percentage": 39.8, "elapsed_time": "0:21:44", "remaining_time": "0:32:53", "throughput": 5689.37, "total_tokens": 7423488}
|
|
{"current_steps": 15085, "total_steps": 37885, "loss": 0.095, "lr": 1.5056017279103146e-06, "epoch": 1.9908934934670715, "percentage": 39.82, "elapsed_time": "0:21:45", "remaining_time": "0:32:52", "throughput": 5689.82, "total_tokens": 7425920}
|
|
{"current_steps": 15090, "total_steps": 37885, "loss": 0.0005, "lr": 1.505204199131011e-06, "epoch": 1.9915533852448197, "percentage": 39.83, "elapsed_time": "0:21:45", "remaining_time": "0:32:52", "throughput": 5690.42, "total_tokens": 7428608}
|
|
{"current_steps": 15095, "total_steps": 37885, "loss": 0.045, "lr": 1.5048065631257748e-06, "epoch": 1.9922132770225685, "percentage": 39.84, "elapsed_time": "0:21:45", "remaining_time": "0:32:51", "throughput": 5690.94, "total_tokens": 7431168}
|
|
{"current_steps": 15100, "total_steps": 37885, "loss": 0.0011, "lr": 1.5044088199790012e-06, "epoch": 1.9928731688003167, "percentage": 39.86, "elapsed_time": "0:21:46", "remaining_time": "0:32:50", "throughput": 5691.46, "total_tokens": 7433728}
|
|
{"current_steps": 15105, "total_steps": 37885, "loss": 0.1167, "lr": 1.5040109697751082e-06, "epoch": 1.9935330605780652, "percentage": 39.87, "elapsed_time": "0:21:46", "remaining_time": "0:32:50", "throughput": 5692.02, "total_tokens": 7436352}
|
|
{"current_steps": 15110, "total_steps": 37885, "loss": 0.1322, "lr": 1.5036130125985364e-06, "epoch": 1.9941929523558137, "percentage": 39.88, "elapsed_time": "0:21:46", "remaining_time": "0:32:49", "throughput": 5692.46, "total_tokens": 7438784}
|
|
{"current_steps": 15115, "total_steps": 37885, "loss": 0.0004, "lr": 1.5032149485337494e-06, "epoch": 1.994852844133562, "percentage": 39.9, "elapsed_time": "0:21:47", "remaining_time": "0:32:49", "throughput": 5692.85, "total_tokens": 7441152}
|
|
{"current_steps": 15120, "total_steps": 37885, "loss": 0.1143, "lr": 1.5028167776652339e-06, "epoch": 1.9955127359113105, "percentage": 39.91, "elapsed_time": "0:21:47", "remaining_time": "0:32:48", "throughput": 5693.19, "total_tokens": 7443456}
|
|
{"current_steps": 15125, "total_steps": 37885, "loss": 0.0071, "lr": 1.5024185000774984e-06, "epoch": 1.996172627689059, "percentage": 39.92, "elapsed_time": "0:21:47", "remaining_time": "0:32:47", "throughput": 5693.73, "total_tokens": 7446016}
|
|
{"current_steps": 15130, "total_steps": 37885, "loss": 0.172, "lr": 1.5020201158550745e-06, "epoch": 1.9968325194668073, "percentage": 39.94, "elapsed_time": "0:21:48", "remaining_time": "0:32:47", "throughput": 5694.17, "total_tokens": 7448448}
|
|
{"current_steps": 15135, "total_steps": 37885, "loss": 0.1166, "lr": 1.5016216250825164e-06, "epoch": 1.997492411244556, "percentage": 39.95, "elapsed_time": "0:21:48", "remaining_time": "0:32:46", "throughput": 5694.69, "total_tokens": 7451008}
|
|
{"current_steps": 15140, "total_steps": 37885, "loss": 0.0687, "lr": 1.5012230278444005e-06, "epoch": 1.9981523030223043, "percentage": 39.96, "elapsed_time": "0:21:48", "remaining_time": "0:32:46", "throughput": 5695.17, "total_tokens": 7453504}
|
|
{"current_steps": 15145, "total_steps": 37885, "loss": 0.0024, "lr": 1.5008243242253269e-06, "epoch": 1.9988121948000528, "percentage": 39.98, "elapsed_time": "0:21:49", "remaining_time": "0:32:45", "throughput": 5695.64, "total_tokens": 7456000}
|
|
{"current_steps": 15150, "total_steps": 37885, "loss": 0.0027, "lr": 1.5004255143099167e-06, "epoch": 1.9994720865778013, "percentage": 39.99, "elapsed_time": "0:21:49", "remaining_time": "0:32:44", "throughput": 5696.13, "total_tokens": 7458496}
|
|
{"current_steps": 15155, "total_steps": 37885, "loss": 0.0349, "lr": 1.5000265981828153e-06, "epoch": 2.0001319783555496, "percentage": 40.0, "elapsed_time": "0:21:49", "remaining_time": "0:32:44", "throughput": 5696.1, "total_tokens": 7460784}
|
|
{"current_steps": 15160, "total_steps": 37885, "loss": 0.0004, "lr": 1.4996275759286894e-06, "epoch": 2.0007918701332983, "percentage": 40.02, "elapsed_time": "0:21:50", "remaining_time": "0:32:43", "throughput": 5696.39, "total_tokens": 7463024}
|
|
{"current_steps": 15160, "total_steps": 37885, "eval_loss": 0.1182011216878891, "epoch": 2.0007918701332983, "percentage": 40.02, "elapsed_time": "0:21:58", "remaining_time": "0:32:55", "throughput": 5661.91, "total_tokens": 7463024}
|
|
{"current_steps": 15165, "total_steps": 37885, "loss": 0.0012, "lr": 1.4992284476322283e-06, "epoch": 2.0014517619110466, "percentage": 40.03, "elapsed_time": "0:22:38", "remaining_time": "0:33:55", "throughput": 5496.1, "total_tokens": 7465456}
|
|
{"current_steps": 15170, "total_steps": 37885, "loss": 0.0007, "lr": 1.4988292133781445e-06, "epoch": 2.002111653688795, "percentage": 40.04, "elapsed_time": "0:22:38", "remaining_time": "0:33:54", "throughput": 5496.39, "total_tokens": 7467632}
|
|
{"current_steps": 15175, "total_steps": 37885, "loss": 0.0406, "lr": 1.498429873251172e-06, "epoch": 2.0027715454665436, "percentage": 40.06, "elapsed_time": "0:22:38", "remaining_time": "0:33:53", "throughput": 5496.93, "total_tokens": 7470192}
|
|
{"current_steps": 15180, "total_steps": 37885, "loss": 0.1068, "lr": 1.4980304273360686e-06, "epoch": 2.003431437244292, "percentage": 40.07, "elapsed_time": "0:22:39", "remaining_time": "0:33:53", "throughput": 5497.25, "total_tokens": 7472432}
|
|
{"current_steps": 15185, "total_steps": 37885, "loss": 0.0002, "lr": 1.4976308757176135e-06, "epoch": 2.0040913290220406, "percentage": 40.08, "elapsed_time": "0:22:39", "remaining_time": "0:33:52", "throughput": 5497.75, "total_tokens": 7474928}
|
|
{"current_steps": 15190, "total_steps": 37885, "loss": 0.0004, "lr": 1.4972312184806084e-06, "epoch": 2.004751220799789, "percentage": 40.1, "elapsed_time": "0:22:39", "remaining_time": "0:33:51", "throughput": 5498.17, "total_tokens": 7477296}
|
|
{"current_steps": 15195, "total_steps": 37885, "loss": 0.2538, "lr": 1.496831455709878e-06, "epoch": 2.005411112577537, "percentage": 40.11, "elapsed_time": "0:22:40", "remaining_time": "0:33:51", "throughput": 5498.61, "total_tokens": 7479728}
|
|
{"current_steps": 15200, "total_steps": 37885, "loss": 0.0003, "lr": 1.4964315874902687e-06, "epoch": 2.006071004355286, "percentage": 40.12, "elapsed_time": "0:22:40", "remaining_time": "0:33:50", "throughput": 5499.02, "total_tokens": 7482096}
|
|
{"current_steps": 15205, "total_steps": 37885, "loss": 0.0712, "lr": 1.49603161390665e-06, "epoch": 2.006730896133034, "percentage": 40.13, "elapsed_time": "0:22:40", "remaining_time": "0:33:50", "throughput": 5499.38, "total_tokens": 7484400}
|
|
{"current_steps": 15210, "total_steps": 37885, "loss": 0.0005, "lr": 1.495631535043913e-06, "epoch": 2.007390787910783, "percentage": 40.15, "elapsed_time": "0:22:41", "remaining_time": "0:33:49", "throughput": 5499.76, "total_tokens": 7486704}
|
|
{"current_steps": 15215, "total_steps": 37885, "loss": 0.0001, "lr": 1.4952313509869722e-06, "epoch": 2.008050679688531, "percentage": 40.16, "elapsed_time": "0:22:41", "remaining_time": "0:33:48", "throughput": 5500.27, "total_tokens": 7489200}
|
|
{"current_steps": 15220, "total_steps": 37885, "loss": 0.0876, "lr": 1.4948310618207628e-06, "epoch": 2.0087105714662794, "percentage": 40.17, "elapsed_time": "0:22:41", "remaining_time": "0:33:48", "throughput": 5500.65, "total_tokens": 7491504}
|
|
{"current_steps": 15225, "total_steps": 37885, "loss": 0.0002, "lr": 1.4944306676302442e-06, "epoch": 2.009370463244028, "percentage": 40.19, "elapsed_time": "0:22:42", "remaining_time": "0:33:47", "throughput": 5501.07, "total_tokens": 7493872}
|
|
{"current_steps": 15230, "total_steps": 37885, "loss": 0.0005, "lr": 1.4940301685003967e-06, "epoch": 2.0100303550217764, "percentage": 40.2, "elapsed_time": "0:22:42", "remaining_time": "0:33:46", "throughput": 5501.42, "total_tokens": 7496176}
|
|
{"current_steps": 15235, "total_steps": 37885, "loss": 0.0935, "lr": 1.4936295645162232e-06, "epoch": 2.0106902467995247, "percentage": 40.21, "elapsed_time": "0:22:42", "remaining_time": "0:33:46", "throughput": 5502.05, "total_tokens": 7498864}
|
|
{"current_steps": 15240, "total_steps": 37885, "loss": 0.0003, "lr": 1.4932288557627497e-06, "epoch": 2.0113501385772734, "percentage": 40.23, "elapsed_time": "0:22:43", "remaining_time": "0:33:45", "throughput": 5502.52, "total_tokens": 7501296}
|
|
{"current_steps": 15245, "total_steps": 37885, "loss": 0.0768, "lr": 1.4928280423250228e-06, "epoch": 2.0120100303550217, "percentage": 40.24, "elapsed_time": "0:22:43", "remaining_time": "0:33:45", "throughput": 5503.06, "total_tokens": 7503856}
|
|
{"current_steps": 15250, "total_steps": 37885, "loss": 0.0711, "lr": 1.4924271242881128e-06, "epoch": 2.0126699221327704, "percentage": 40.25, "elapsed_time": "0:22:43", "remaining_time": "0:33:44", "throughput": 5503.56, "total_tokens": 7506352}
|
|
{"current_steps": 15255, "total_steps": 37885, "loss": 0.0002, "lr": 1.4920261017371116e-06, "epoch": 2.0133298139105187, "percentage": 40.27, "elapsed_time": "0:22:44", "remaining_time": "0:33:43", "throughput": 5504.1, "total_tokens": 7508912}
|
|
{"current_steps": 15260, "total_steps": 37885, "loss": 0.0002, "lr": 1.4916249747571333e-06, "epoch": 2.013989705688267, "percentage": 40.28, "elapsed_time": "0:22:44", "remaining_time": "0:33:43", "throughput": 5504.51, "total_tokens": 7511280}
|
|
{"current_steps": 15265, "total_steps": 37885, "loss": 0.0789, "lr": 1.4912237434333142e-06, "epoch": 2.0146495974660157, "percentage": 40.29, "elapsed_time": "0:22:44", "remaining_time": "0:33:42", "throughput": 5505.11, "total_tokens": 7513904}
|
|
{"current_steps": 15270, "total_steps": 37885, "loss": 0.0778, "lr": 1.4908224078508125e-06, "epoch": 2.015309489243764, "percentage": 40.31, "elapsed_time": "0:22:45", "remaining_time": "0:33:41", "throughput": 5505.81, "total_tokens": 7516720}
|
|
{"current_steps": 15275, "total_steps": 37885, "loss": 0.0883, "lr": 1.4904209680948092e-06, "epoch": 2.0159693810215127, "percentage": 40.32, "elapsed_time": "0:22:45", "remaining_time": "0:33:41", "throughput": 5506.27, "total_tokens": 7519152}
|
|
{"current_steps": 15280, "total_steps": 37885, "loss": 0.0014, "lr": 1.4900194242505067e-06, "epoch": 2.016629272799261, "percentage": 40.33, "elapsed_time": "0:22:45", "remaining_time": "0:33:40", "throughput": 5506.76, "total_tokens": 7521648}
|
|
{"current_steps": 15285, "total_steps": 37885, "loss": 0.0005, "lr": 1.48961777640313e-06, "epoch": 2.0172891645770092, "percentage": 40.35, "elapsed_time": "0:22:46", "remaining_time": "0:33:40", "throughput": 5507.27, "total_tokens": 7524144}
|
|
{"current_steps": 15290, "total_steps": 37885, "loss": 0.0538, "lr": 1.4892160246379257e-06, "epoch": 2.017949056354758, "percentage": 40.36, "elapsed_time": "0:22:46", "remaining_time": "0:33:39", "throughput": 5507.81, "total_tokens": 7526704}
|
|
{"current_steps": 15295, "total_steps": 37885, "loss": 0.0006, "lr": 1.4888141690401628e-06, "epoch": 2.0186089481325062, "percentage": 40.37, "elapsed_time": "0:22:46", "remaining_time": "0:33:38", "throughput": 5508.35, "total_tokens": 7529264}
|
|
{"current_steps": 15300, "total_steps": 37885, "loss": 0.0493, "lr": 1.488412209695132e-06, "epoch": 2.0192688399102545, "percentage": 40.39, "elapsed_time": "0:22:47", "remaining_time": "0:33:38", "throughput": 5508.81, "total_tokens": 7531696}
|
|
{"current_steps": 15305, "total_steps": 37885, "loss": 0.0108, "lr": 1.4880101466881464e-06, "epoch": 2.0199287316880032, "percentage": 40.4, "elapsed_time": "0:22:47", "remaining_time": "0:33:37", "throughput": 5509.08, "total_tokens": 7533872}
|
|
{"current_steps": 15310, "total_steps": 37885, "loss": 0.0706, "lr": 1.4876079801045418e-06, "epoch": 2.0205886234657515, "percentage": 40.41, "elapsed_time": "0:22:47", "remaining_time": "0:33:36", "throughput": 5509.27, "total_tokens": 7535920}
|
|
{"current_steps": 15315, "total_steps": 37885, "loss": 0.0015, "lr": 1.4872057100296738e-06, "epoch": 2.0212485152435002, "percentage": 40.42, "elapsed_time": "0:22:48", "remaining_time": "0:33:36", "throughput": 5509.68, "total_tokens": 7538288}
|
|
{"current_steps": 15320, "total_steps": 37885, "loss": 0.0009, "lr": 1.4868033365489222e-06, "epoch": 2.0219084070212485, "percentage": 40.44, "elapsed_time": "0:22:48", "remaining_time": "0:33:35", "throughput": 5510.0, "total_tokens": 7540528}
|
|
{"current_steps": 15325, "total_steps": 37885, "loss": 0.0521, "lr": 1.4864008597476873e-06, "epoch": 2.022568298798997, "percentage": 40.45, "elapsed_time": "0:22:48", "remaining_time": "0:33:35", "throughput": 5510.49, "total_tokens": 7543024}
|
|
{"current_steps": 15330, "total_steps": 37885, "loss": 0.0005, "lr": 1.4859982797113923e-06, "epoch": 2.0232281905767455, "percentage": 40.46, "elapsed_time": "0:22:49", "remaining_time": "0:33:34", "throughput": 5510.9, "total_tokens": 7545392}
|
|
{"current_steps": 15335, "total_steps": 37885, "loss": 0.0533, "lr": 1.4855955965254816e-06, "epoch": 2.023888082354494, "percentage": 40.48, "elapsed_time": "0:22:49", "remaining_time": "0:33:33", "throughput": 5511.21, "total_tokens": 7547632}
|
|
{"current_steps": 15340, "total_steps": 37885, "loss": 0.0006, "lr": 1.485192810275422e-06, "epoch": 2.0245479741322425, "percentage": 40.49, "elapsed_time": "0:22:49", "remaining_time": "0:33:33", "throughput": 5511.74, "total_tokens": 7550192}
|
|
{"current_steps": 15345, "total_steps": 37885, "loss": 0.096, "lr": 1.4847899210467021e-06, "epoch": 2.025207865909991, "percentage": 40.5, "elapsed_time": "0:22:50", "remaining_time": "0:33:32", "throughput": 5512.23, "total_tokens": 7552688}
|
|
{"current_steps": 15350, "total_steps": 37885, "loss": 0.1187, "lr": 1.4843869289248318e-06, "epoch": 2.025867757687739, "percentage": 40.52, "elapsed_time": "0:22:50", "remaining_time": "0:33:32", "throughput": 5512.81, "total_tokens": 7555312}
|
|
{"current_steps": 15355, "total_steps": 37885, "loss": 0.0001, "lr": 1.483983833995344e-06, "epoch": 2.026527649465488, "percentage": 40.53, "elapsed_time": "0:22:50", "remaining_time": "0:33:31", "throughput": 5513.48, "total_tokens": 7558128}
|
|
{"current_steps": 15360, "total_steps": 37885, "loss": 0.0011, "lr": 1.4835806363437915e-06, "epoch": 2.027187541243236, "percentage": 40.54, "elapsed_time": "0:22:51", "remaining_time": "0:33:30", "throughput": 5513.77, "total_tokens": 7560368}
|
|
{"current_steps": 15365, "total_steps": 37885, "loss": 0.0002, "lr": 1.4831773360557513e-06, "epoch": 2.0278474330209844, "percentage": 40.56, "elapsed_time": "0:22:51", "remaining_time": "0:33:30", "throughput": 5514.18, "total_tokens": 7562736}
|
|
{"current_steps": 15370, "total_steps": 37885, "loss": 0.0009, "lr": 1.4827739332168201e-06, "epoch": 2.028507324798733, "percentage": 40.57, "elapsed_time": "0:22:51", "remaining_time": "0:33:29", "throughput": 5514.79, "total_tokens": 7565424}
|
|
{"current_steps": 15375, "total_steps": 37885, "loss": 0.0006, "lr": 1.4823704279126172e-06, "epoch": 2.0291672165764814, "percentage": 40.58, "elapsed_time": "0:22:52", "remaining_time": "0:33:28", "throughput": 5515.23, "total_tokens": 7567856}
|
|
{"current_steps": 15380, "total_steps": 37885, "loss": 0.0433, "lr": 1.4819668202287847e-06, "epoch": 2.02982710835423, "percentage": 40.6, "elapsed_time": "0:22:52", "remaining_time": "0:33:28", "throughput": 5515.72, "total_tokens": 7570352}
|
|
{"current_steps": 15385, "total_steps": 37885, "loss": 0.0006, "lr": 1.4815631102509843e-06, "epoch": 2.0304870001319784, "percentage": 40.61, "elapsed_time": "0:22:52", "remaining_time": "0:33:27", "throughput": 5516.12, "total_tokens": 7572720}
|
|
{"current_steps": 15390, "total_steps": 37885, "loss": 0.0006, "lr": 1.4811592980649014e-06, "epoch": 2.0311468919097266, "percentage": 40.62, "elapsed_time": "0:22:53", "remaining_time": "0:33:27", "throughput": 5516.44, "total_tokens": 7574960}
|
|
{"current_steps": 15395, "total_steps": 37885, "loss": 0.0002, "lr": 1.4807553837562415e-06, "epoch": 2.0318067836874754, "percentage": 40.64, "elapsed_time": "0:22:53", "remaining_time": "0:33:26", "throughput": 5517.02, "total_tokens": 7577584}
|
|
{"current_steps": 15400, "total_steps": 37885, "loss": 0.0, "lr": 1.4803513674107325e-06, "epoch": 2.0324666754652236, "percentage": 40.65, "elapsed_time": "0:22:53", "remaining_time": "0:33:25", "throughput": 5517.48, "total_tokens": 7580016}
|
|
{"current_steps": 15405, "total_steps": 37885, "loss": 0.0846, "lr": 1.4799472491141245e-06, "epoch": 2.0331265672429724, "percentage": 40.66, "elapsed_time": "0:22:54", "remaining_time": "0:33:25", "throughput": 5517.84, "total_tokens": 7582320}
|
|
{"current_steps": 15410, "total_steps": 37885, "loss": 0.0432, "lr": 1.4795430289521885e-06, "epoch": 2.0337864590207206, "percentage": 40.68, "elapsed_time": "0:22:54", "remaining_time": "0:33:24", "throughput": 5518.45, "total_tokens": 7585008}
|
|
{"current_steps": 15415, "total_steps": 37885, "loss": 0.0021, "lr": 1.479138707010717e-06, "epoch": 2.034446350798469, "percentage": 40.69, "elapsed_time": "0:22:54", "remaining_time": "0:33:24", "throughput": 5519.11, "total_tokens": 7587760}
|
|
{"current_steps": 15420, "total_steps": 37885, "loss": 0.0004, "lr": 1.4787342833755245e-06, "epoch": 2.0351062425762176, "percentage": 40.7, "elapsed_time": "0:22:55", "remaining_time": "0:33:23", "throughput": 5519.61, "total_tokens": 7590256}
|
|
{"current_steps": 15425, "total_steps": 37885, "loss": 0.0977, "lr": 1.4783297581324472e-06, "epoch": 2.035766134353966, "percentage": 40.72, "elapsed_time": "0:22:55", "remaining_time": "0:33:22", "throughput": 5520.08, "total_tokens": 7592688}
|
|
{"current_steps": 15430, "total_steps": 37885, "loss": 0.0257, "lr": 1.4779251313673422e-06, "epoch": 2.036426026131714, "percentage": 40.73, "elapsed_time": "0:22:55", "remaining_time": "0:33:22", "throughput": 5520.51, "total_tokens": 7595120}
|
|
{"current_steps": 15435, "total_steps": 37885, "loss": 0.0004, "lr": 1.4775204031660894e-06, "epoch": 2.037085917909463, "percentage": 40.74, "elapsed_time": "0:22:56", "remaining_time": "0:33:21", "throughput": 5521.01, "total_tokens": 7597680}
|
|
{"current_steps": 15440, "total_steps": 37885, "loss": 0.0001, "lr": 1.4771155736145888e-06, "epoch": 2.037745809687211, "percentage": 40.75, "elapsed_time": "0:22:56", "remaining_time": "0:33:20", "throughput": 5521.55, "total_tokens": 7600304}
|
|
{"current_steps": 15445, "total_steps": 37885, "loss": 0.001, "lr": 1.4767106427987625e-06, "epoch": 2.03840570146496, "percentage": 40.77, "elapsed_time": "0:22:56", "remaining_time": "0:33:20", "throughput": 5522.02, "total_tokens": 7602800}
|
|
{"current_steps": 15450, "total_steps": 37885, "loss": 0.0798, "lr": 1.4763056108045549e-06, "epoch": 2.039065593242708, "percentage": 40.78, "elapsed_time": "0:22:57", "remaining_time": "0:33:19", "throughput": 5522.31, "total_tokens": 7605040}
|
|
{"current_steps": 15455, "total_steps": 37885, "loss": 0.0007, "lr": 1.4759004777179297e-06, "epoch": 2.0397254850204565, "percentage": 40.79, "elapsed_time": "0:22:57", "remaining_time": "0:33:19", "throughput": 5522.7, "total_tokens": 7607408}
|
|
{"current_steps": 15460, "total_steps": 37885, "loss": 0.0612, "lr": 1.475495243624875e-06, "epoch": 2.040385376798205, "percentage": 40.81, "elapsed_time": "0:22:57", "remaining_time": "0:33:18", "throughput": 5523.17, "total_tokens": 7609904}
|
|
{"current_steps": 15465, "total_steps": 37885, "loss": 0.0879, "lr": 1.475089908611398e-06, "epoch": 2.0410452685759535, "percentage": 40.82, "elapsed_time": "0:22:58", "remaining_time": "0:33:17", "throughput": 5523.77, "total_tokens": 7612592}
|
|
{"current_steps": 15470, "total_steps": 37885, "loss": 0.0002, "lr": 1.4746844727635282e-06, "epoch": 2.041705160353702, "percentage": 40.83, "elapsed_time": "0:22:58", "remaining_time": "0:33:17", "throughput": 5524.29, "total_tokens": 7615152}
|
|
{"current_steps": 15475, "total_steps": 37885, "loss": 0.1432, "lr": 1.474278936167316e-06, "epoch": 2.0423650521314505, "percentage": 40.85, "elapsed_time": "0:22:58", "remaining_time": "0:33:16", "throughput": 5524.71, "total_tokens": 7617584}
|
|
{"current_steps": 15480, "total_steps": 37885, "loss": 0.0, "lr": 1.4738732989088347e-06, "epoch": 2.0430249439091988, "percentage": 40.86, "elapsed_time": "0:22:59", "remaining_time": "0:33:16", "throughput": 5525.26, "total_tokens": 7620208}
|
|
{"current_steps": 15485, "total_steps": 37885, "loss": 0.0001, "lr": 1.4734675610741767e-06, "epoch": 2.0436848356869475, "percentage": 40.87, "elapsed_time": "0:22:59", "remaining_time": "0:33:15", "throughput": 5525.87, "total_tokens": 7622896}
|
|
{"current_steps": 15490, "total_steps": 37885, "loss": 0.0506, "lr": 1.4730617227494577e-06, "epoch": 2.0443447274646958, "percentage": 40.89, "elapsed_time": "0:22:59", "remaining_time": "0:33:14", "throughput": 5526.31, "total_tokens": 7625328}
|
|
{"current_steps": 15495, "total_steps": 37885, "loss": 0.0549, "lr": 1.4726557840208137e-06, "epoch": 2.045004619242444, "percentage": 40.9, "elapsed_time": "0:23:00", "remaining_time": "0:33:14", "throughput": 5527.02, "total_tokens": 7628208}
|
|
{"current_steps": 15500, "total_steps": 37885, "loss": 0.0008, "lr": 1.4722497449744022e-06, "epoch": 2.0456645110201928, "percentage": 40.91, "elapsed_time": "0:23:00", "remaining_time": "0:33:13", "throughput": 5527.46, "total_tokens": 7630640}
|
|
{"current_steps": 15505, "total_steps": 37885, "loss": 0.0009, "lr": 1.471843605696402e-06, "epoch": 2.046324402797941, "percentage": 40.93, "elapsed_time": "0:23:00", "remaining_time": "0:33:13", "throughput": 5527.89, "total_tokens": 7633072}
|
|
{"current_steps": 15510, "total_steps": 37885, "loss": 0.0002, "lr": 1.4714373662730136e-06, "epoch": 2.0469842945756898, "percentage": 40.94, "elapsed_time": "0:23:01", "remaining_time": "0:33:12", "throughput": 5528.36, "total_tokens": 7635568}
|
|
{"current_steps": 15515, "total_steps": 37885, "loss": 0.0002, "lr": 1.4710310267904578e-06, "epoch": 2.047644186353438, "percentage": 40.95, "elapsed_time": "0:23:01", "remaining_time": "0:33:11", "throughput": 5529.01, "total_tokens": 7638320}
|
|
{"current_steps": 15520, "total_steps": 37885, "loss": 0.0627, "lr": 1.4706245873349777e-06, "epoch": 2.0483040781311863, "percentage": 40.97, "elapsed_time": "0:23:01", "remaining_time": "0:33:11", "throughput": 5529.25, "total_tokens": 7640496}
|
|
{"current_steps": 15525, "total_steps": 37885, "loss": 0.0002, "lr": 1.4702180479928368e-06, "epoch": 2.048963969908935, "percentage": 40.98, "elapsed_time": "0:23:02", "remaining_time": "0:33:10", "throughput": 5529.53, "total_tokens": 7642736}
|
|
{"current_steps": 15530, "total_steps": 37885, "loss": 0.0519, "lr": 1.4698114088503203e-06, "epoch": 2.0496238616866833, "percentage": 40.99, "elapsed_time": "0:23:02", "remaining_time": "0:33:10", "throughput": 5529.91, "total_tokens": 7645104}
|
|
{"current_steps": 15535, "total_steps": 37885, "loss": 0.0002, "lr": 1.4694046699937341e-06, "epoch": 2.050283753464432, "percentage": 41.01, "elapsed_time": "0:23:02", "remaining_time": "0:33:09", "throughput": 5530.28, "total_tokens": 7647472}
|
|
{"current_steps": 15540, "total_steps": 37885, "loss": 0.0015, "lr": 1.4689978315094066e-06, "epoch": 2.0509436452421803, "percentage": 41.02, "elapsed_time": "0:23:03", "remaining_time": "0:33:08", "throughput": 5530.75, "total_tokens": 7649968}
|
|
{"current_steps": 15545, "total_steps": 37885, "loss": 0.0712, "lr": 1.468590893483685e-06, "epoch": 2.0516035370199286, "percentage": 41.03, "elapsed_time": "0:23:03", "remaining_time": "0:33:08", "throughput": 5531.29, "total_tokens": 7652592}
|
|
{"current_steps": 15550, "total_steps": 37885, "loss": 0.0006, "lr": 1.4681838560029395e-06, "epoch": 2.0522634287976773, "percentage": 41.05, "elapsed_time": "0:23:03", "remaining_time": "0:33:07", "throughput": 5531.84, "total_tokens": 7655216}
|
|
{"current_steps": 15555, "total_steps": 37885, "loss": 0.0006, "lr": 1.467776719153561e-06, "epoch": 2.0529233205754256, "percentage": 41.06, "elapsed_time": "0:23:04", "remaining_time": "0:33:07", "throughput": 5532.39, "total_tokens": 7657840}
|
|
{"current_steps": 15560, "total_steps": 37885, "loss": 0.0458, "lr": 1.4673694830219613e-06, "epoch": 2.053583212353174, "percentage": 41.07, "elapsed_time": "0:23:04", "remaining_time": "0:33:06", "throughput": 5532.71, "total_tokens": 7660144}
|
|
{"current_steps": 15565, "total_steps": 37885, "loss": 0.0001, "lr": 1.466962147694573e-06, "epoch": 2.0542431041309226, "percentage": 41.08, "elapsed_time": "0:23:04", "remaining_time": "0:33:05", "throughput": 5533.22, "total_tokens": 7662704}
|
|
{"current_steps": 15570, "total_steps": 37885, "loss": 0.093, "lr": 1.4665547132578508e-06, "epoch": 2.054902995908671, "percentage": 41.1, "elapsed_time": "0:23:05", "remaining_time": "0:33:05", "throughput": 5533.7, "total_tokens": 7665200}
|
|
{"current_steps": 15575, "total_steps": 37885, "loss": 0.0002, "lr": 1.466147179798269e-06, "epoch": 2.0555628876864196, "percentage": 41.11, "elapsed_time": "0:23:05", "remaining_time": "0:33:04", "throughput": 5533.94, "total_tokens": 7667376}
|
|
{"current_steps": 15580, "total_steps": 37885, "loss": 0.0003, "lr": 1.4657395474023237e-06, "epoch": 2.056222779464168, "percentage": 41.12, "elapsed_time": "0:23:05", "remaining_time": "0:33:04", "throughput": 5534.25, "total_tokens": 7669680}
|
|
{"current_steps": 15585, "total_steps": 37885, "loss": 0.0004, "lr": 1.4653318161565325e-06, "epoch": 2.056882671241916, "percentage": 41.14, "elapsed_time": "0:23:06", "remaining_time": "0:33:03", "throughput": 5534.72, "total_tokens": 7672176}
|
|
{"current_steps": 15590, "total_steps": 37885, "loss": 0.0523, "lr": 1.4649239861474324e-06, "epoch": 2.057542563019665, "percentage": 41.15, "elapsed_time": "0:23:06", "remaining_time": "0:33:02", "throughput": 5535.27, "total_tokens": 7674800}
|
|
{"current_steps": 15595, "total_steps": 37885, "loss": 0.1052, "lr": 1.4645160574615834e-06, "epoch": 2.058202454797413, "percentage": 41.16, "elapsed_time": "0:23:06", "remaining_time": "0:33:02", "throughput": 5535.7, "total_tokens": 7677232}
|
|
{"current_steps": 15600, "total_steps": 37885, "loss": 0.0003, "lr": 1.4641080301855648e-06, "epoch": 2.058862346575162, "percentage": 41.18, "elapsed_time": "0:23:07", "remaining_time": "0:33:01", "throughput": 5536.02, "total_tokens": 7679536}
|
|
{"current_steps": 15605, "total_steps": 37885, "loss": 0.0002, "lr": 1.4636999044059777e-06, "epoch": 2.05952223835291, "percentage": 41.19, "elapsed_time": "0:23:07", "remaining_time": "0:33:01", "throughput": 5536.37, "total_tokens": 7681840}
|
|
{"current_steps": 15610, "total_steps": 37885, "loss": 0.1445, "lr": 1.4632916802094436e-06, "epoch": 2.0601821301306584, "percentage": 41.2, "elapsed_time": "0:23:07", "remaining_time": "0:33:00", "throughput": 5536.94, "total_tokens": 7684528}
|
|
{"current_steps": 15615, "total_steps": 37885, "loss": 0.0006, "lr": 1.462883357682605e-06, "epoch": 2.060842021908407, "percentage": 41.22, "elapsed_time": "0:23:08", "remaining_time": "0:32:59", "throughput": 5537.49, "total_tokens": 7687152}
|
|
{"current_steps": 15620, "total_steps": 37885, "loss": 0.1174, "lr": 1.4624749369121265e-06, "epoch": 2.0615019136861554, "percentage": 41.23, "elapsed_time": "0:23:08", "remaining_time": "0:32:59", "throughput": 5537.99, "total_tokens": 7689712}
|
|
{"current_steps": 15625, "total_steps": 37885, "loss": 0.0001, "lr": 1.4620664179846908e-06, "epoch": 2.0621618054639037, "percentage": 41.24, "elapsed_time": "0:23:08", "remaining_time": "0:32:58", "throughput": 5538.38, "total_tokens": 7692144}
|
|
{"current_steps": 15630, "total_steps": 37885, "loss": 0.0006, "lr": 1.4616578009870044e-06, "epoch": 2.0628216972416524, "percentage": 41.26, "elapsed_time": "0:23:09", "remaining_time": "0:32:58", "throughput": 5538.62, "total_tokens": 7694320}
|
|
{"current_steps": 15635, "total_steps": 37885, "loss": 0.0008, "lr": 1.4612490860057927e-06, "epoch": 2.0634815890194007, "percentage": 41.27, "elapsed_time": "0:23:09", "remaining_time": "0:32:57", "throughput": 5539.06, "total_tokens": 7696752}
|
|
{"current_steps": 15640, "total_steps": 37885, "loss": 0.0002, "lr": 1.4608402731278022e-06, "epoch": 2.0641414807971494, "percentage": 41.28, "elapsed_time": "0:23:09", "remaining_time": "0:32:56", "throughput": 5539.48, "total_tokens": 7699184}
|
|
{"current_steps": 15645, "total_steps": 37885, "loss": 0.1332, "lr": 1.4604313624398014e-06, "epoch": 2.0648013725748977, "percentage": 41.3, "elapsed_time": "0:23:10", "remaining_time": "0:32:56", "throughput": 5539.77, "total_tokens": 7701424}
|
|
{"current_steps": 15650, "total_steps": 37885, "loss": 0.0006, "lr": 1.4600223540285778e-06, "epoch": 2.065461264352646, "percentage": 41.31, "elapsed_time": "0:23:10", "remaining_time": "0:32:55", "throughput": 5540.19, "total_tokens": 7703856}
|
|
{"current_steps": 15655, "total_steps": 37885, "loss": 0.0737, "lr": 1.459613247980941e-06, "epoch": 2.0661211561303947, "percentage": 41.32, "elapsed_time": "0:23:10", "remaining_time": "0:32:55", "throughput": 5540.58, "total_tokens": 7706224}
|
|
{"current_steps": 15660, "total_steps": 37885, "loss": 0.0006, "lr": 1.4592040443837203e-06, "epoch": 2.066781047908143, "percentage": 41.34, "elapsed_time": "0:23:11", "remaining_time": "0:32:54", "throughput": 5540.84, "total_tokens": 7708400}
|
|
{"current_steps": 15665, "total_steps": 37885, "loss": 0.0015, "lr": 1.458794743323767e-06, "epoch": 2.0674409396858917, "percentage": 41.35, "elapsed_time": "0:23:11", "remaining_time": "0:32:53", "throughput": 5541.34, "total_tokens": 7710960}
|
|
{"current_steps": 15670, "total_steps": 37885, "loss": 0.0003, "lr": 1.4583853448879513e-06, "epoch": 2.06810083146364, "percentage": 41.36, "elapsed_time": "0:23:11", "remaining_time": "0:32:53", "throughput": 5541.83, "total_tokens": 7713520}
|
|
{"current_steps": 15675, "total_steps": 37885, "loss": 0.0001, "lr": 1.4579758491631655e-06, "epoch": 2.0687607232413883, "percentage": 41.38, "elapsed_time": "0:23:12", "remaining_time": "0:32:52", "throughput": 5542.14, "total_tokens": 7715824}
|
|
{"current_steps": 15680, "total_steps": 37885, "loss": 0.0443, "lr": 1.4575662562363222e-06, "epoch": 2.069420615019137, "percentage": 41.39, "elapsed_time": "0:23:12", "remaining_time": "0:32:52", "throughput": 5542.67, "total_tokens": 7718448}
|
|
{"current_steps": 15685, "total_steps": 37885, "loss": 0.0005, "lr": 1.4571565661943542e-06, "epoch": 2.0700805067968853, "percentage": 41.4, "elapsed_time": "0:23:12", "remaining_time": "0:32:51", "throughput": 5543.13, "total_tokens": 7720944}
|
|
{"current_steps": 15690, "total_steps": 37885, "loss": 0.0007, "lr": 1.456746779124216e-06, "epoch": 2.0707403985746335, "percentage": 41.41, "elapsed_time": "0:23:13", "remaining_time": "0:32:50", "throughput": 5543.79, "total_tokens": 7723760}
|
|
{"current_steps": 15695, "total_steps": 37885, "loss": 0.0004, "lr": 1.4563368951128812e-06, "epoch": 2.0714002903523823, "percentage": 41.43, "elapsed_time": "0:23:13", "remaining_time": "0:32:50", "throughput": 5544.24, "total_tokens": 7726256}
|
|
{"current_steps": 15700, "total_steps": 37885, "loss": 0.1174, "lr": 1.4559269142473452e-06, "epoch": 2.0720601821301305, "percentage": 41.44, "elapsed_time": "0:23:13", "remaining_time": "0:32:49", "throughput": 5544.96, "total_tokens": 7729136}
|
|
{"current_steps": 15705, "total_steps": 37885, "loss": 0.0002, "lr": 1.455516836614623e-06, "epoch": 2.0727200739078793, "percentage": 41.45, "elapsed_time": "0:23:14", "remaining_time": "0:32:49", "throughput": 5545.54, "total_tokens": 7731824}
|
|
{"current_steps": 15710, "total_steps": 37885, "loss": 0.1329, "lr": 1.4551066623017505e-06, "epoch": 2.0733799656856275, "percentage": 41.47, "elapsed_time": "0:23:14", "remaining_time": "0:32:48", "throughput": 5545.87, "total_tokens": 7734128}
|
|
{"current_steps": 15715, "total_steps": 37885, "loss": 0.072, "lr": 1.4546963913957848e-06, "epoch": 2.074039857463376, "percentage": 41.48, "elapsed_time": "0:23:14", "remaining_time": "0:32:47", "throughput": 5545.97, "total_tokens": 7736112}
|
|
{"current_steps": 15720, "total_steps": 37885, "loss": 0.0002, "lr": 1.4542860239838025e-06, "epoch": 2.0746997492411245, "percentage": 41.49, "elapsed_time": "0:23:15", "remaining_time": "0:32:47", "throughput": 5546.52, "total_tokens": 7738736}
|
|
{"current_steps": 15725, "total_steps": 37885, "loss": 0.0002, "lr": 1.4538755601529018e-06, "epoch": 2.075359641018873, "percentage": 41.51, "elapsed_time": "0:23:15", "remaining_time": "0:32:46", "throughput": 5547.09, "total_tokens": 7741424}
|
|
{"current_steps": 15730, "total_steps": 37885, "loss": 0.0003, "lr": 1.4534649999901999e-06, "epoch": 2.0760195327966215, "percentage": 41.52, "elapsed_time": "0:23:15", "remaining_time": "0:32:46", "throughput": 5547.35, "total_tokens": 7743664}
|
|
{"current_steps": 15735, "total_steps": 37885, "loss": 0.0002, "lr": 1.4530543435828355e-06, "epoch": 2.07667942457437, "percentage": 41.53, "elapsed_time": "0:23:16", "remaining_time": "0:32:45", "throughput": 5547.71, "total_tokens": 7746032}
|
|
{"current_steps": 15740, "total_steps": 37885, "loss": 0.0567, "lr": 1.4526435910179674e-06, "epoch": 2.077339316352118, "percentage": 41.55, "elapsed_time": "0:23:16", "remaining_time": "0:32:44", "throughput": 5548.02, "total_tokens": 7748336}
|
|
{"current_steps": 15745, "total_steps": 37885, "loss": 0.1586, "lr": 1.4522327423827746e-06, "epoch": 2.077999208129867, "percentage": 41.56, "elapsed_time": "0:23:16", "remaining_time": "0:32:44", "throughput": 5548.56, "total_tokens": 7750960}
|
|
{"current_steps": 15750, "total_steps": 37885, "loss": 0.0005, "lr": 1.4518217977644576e-06, "epoch": 2.078659099907615, "percentage": 41.57, "elapsed_time": "0:23:17", "remaining_time": "0:32:43", "throughput": 5549.21, "total_tokens": 7753776}
|
|
{"current_steps": 15755, "total_steps": 37885, "loss": 0.0007, "lr": 1.4514107572502355e-06, "epoch": 2.079318991685364, "percentage": 41.59, "elapsed_time": "0:23:17", "remaining_time": "0:32:43", "throughput": 5549.73, "total_tokens": 7756400}
|
|
{"current_steps": 15760, "total_steps": 37885, "loss": 0.0781, "lr": 1.450999620927349e-06, "epoch": 2.079978883463112, "percentage": 41.6, "elapsed_time": "0:23:17", "remaining_time": "0:32:42", "throughput": 5550.02, "total_tokens": 7758640}
|
|
{"current_steps": 15765, "total_steps": 37885, "loss": 0.0003, "lr": 1.4505883888830591e-06, "epoch": 2.0806387752408604, "percentage": 41.61, "elapsed_time": "0:23:18", "remaining_time": "0:32:41", "throughput": 5550.43, "total_tokens": 7761072}
|
|
{"current_steps": 15770, "total_steps": 37885, "loss": 0.001, "lr": 1.4501770612046461e-06, "epoch": 2.081298667018609, "percentage": 41.63, "elapsed_time": "0:23:18", "remaining_time": "0:32:41", "throughput": 5550.97, "total_tokens": 7763696}
|
|
{"current_steps": 15775, "total_steps": 37885, "loss": 0.0007, "lr": 1.4497656379794126e-06, "epoch": 2.0819585587963574, "percentage": 41.64, "elapsed_time": "0:23:18", "remaining_time": "0:32:40", "throughput": 5551.42, "total_tokens": 7766192}
|
|
{"current_steps": 15780, "total_steps": 37885, "loss": 0.0007, "lr": 1.4493541192946785e-06, "epoch": 2.0826184505741057, "percentage": 41.65, "elapsed_time": "0:23:19", "remaining_time": "0:32:40", "throughput": 5551.92, "total_tokens": 7768752}
|
|
{"current_steps": 15785, "total_steps": 37885, "loss": 0.0007, "lr": 1.448942505237787e-06, "epoch": 2.0832783423518544, "percentage": 41.67, "elapsed_time": "0:23:19", "remaining_time": "0:32:39", "throughput": 5552.36, "total_tokens": 7771248}
|
|
{"current_steps": 15790, "total_steps": 37885, "loss": 0.0, "lr": 1.4485307958960996e-06, "epoch": 2.0839382341296027, "percentage": 41.68, "elapsed_time": "0:23:19", "remaining_time": "0:32:38", "throughput": 5553.01, "total_tokens": 7774064}
|
|
{"current_steps": 15795, "total_steps": 37885, "loss": 0.0001, "lr": 1.448118991356999e-06, "epoch": 2.0845981259073514, "percentage": 41.69, "elapsed_time": "0:23:20", "remaining_time": "0:32:38", "throughput": 5553.43, "total_tokens": 7776496}
|
|
{"current_steps": 15800, "total_steps": 37885, "loss": 0.1766, "lr": 1.4477070917078876e-06, "epoch": 2.0852580176850997, "percentage": 41.71, "elapsed_time": "0:23:20", "remaining_time": "0:32:37", "throughput": 5553.77, "total_tokens": 7778800}
|
|
{"current_steps": 15805, "total_steps": 37885, "loss": 0.0004, "lr": 1.4472950970361878e-06, "epoch": 2.085917909462848, "percentage": 41.72, "elapsed_time": "0:23:20", "remaining_time": "0:32:37", "throughput": 5554.16, "total_tokens": 7781168}
|
|
{"current_steps": 15810, "total_steps": 37885, "loss": 0.0002, "lr": 1.4468830074293425e-06, "epoch": 2.0865778012405967, "percentage": 41.73, "elapsed_time": "0:23:21", "remaining_time": "0:32:36", "throughput": 5554.72, "total_tokens": 7783792}
|
|
{"current_steps": 15815, "total_steps": 37885, "loss": 0.0104, "lr": 1.4464708229748154e-06, "epoch": 2.087237693018345, "percentage": 41.74, "elapsed_time": "0:23:21", "remaining_time": "0:32:35", "throughput": 5555.2, "total_tokens": 7786288}
|
|
{"current_steps": 15820, "total_steps": 37885, "loss": 0.0001, "lr": 1.4460585437600887e-06, "epoch": 2.087897584796093, "percentage": 41.76, "elapsed_time": "0:23:21", "remaining_time": "0:32:35", "throughput": 5555.71, "total_tokens": 7788848}
|
|
{"current_steps": 15825, "total_steps": 37885, "loss": 0.0001, "lr": 1.4456461698726666e-06, "epoch": 2.088557476573842, "percentage": 41.77, "elapsed_time": "0:23:22", "remaining_time": "0:32:34", "throughput": 5556.27, "total_tokens": 7791472}
|
|
{"current_steps": 15830, "total_steps": 37885, "loss": 0.0002, "lr": 1.445233701400072e-06, "epoch": 2.08921736835159, "percentage": 41.78, "elapsed_time": "0:23:22", "remaining_time": "0:32:34", "throughput": 5556.83, "total_tokens": 7794096}
|
|
{"current_steps": 15835, "total_steps": 37885, "loss": 0.0923, "lr": 1.4448211384298482e-06, "epoch": 2.089877260129339, "percentage": 41.8, "elapsed_time": "0:23:22", "remaining_time": "0:32:33", "throughput": 5557.23, "total_tokens": 7796464}
|
|
{"current_steps": 15840, "total_steps": 37885, "loss": 0.0005, "lr": 1.4444084810495589e-06, "epoch": 2.090537151907087, "percentage": 41.81, "elapsed_time": "0:23:23", "remaining_time": "0:32:32", "throughput": 5557.61, "total_tokens": 7798832}
|
|
{"current_steps": 15845, "total_steps": 37885, "loss": 0.2503, "lr": 1.4439957293467877e-06, "epoch": 2.0911970436848355, "percentage": 41.82, "elapsed_time": "0:23:23", "remaining_time": "0:32:32", "throughput": 5558.17, "total_tokens": 7801456}
|
|
{"current_steps": 15850, "total_steps": 37885, "loss": 0.0013, "lr": 1.4435828834091384e-06, "epoch": 2.091856935462584, "percentage": 41.84, "elapsed_time": "0:23:23", "remaining_time": "0:32:31", "throughput": 5558.68, "total_tokens": 7804016}
|
|
{"current_steps": 15855, "total_steps": 37885, "loss": 0.0461, "lr": 1.443169943324234e-06, "epoch": 2.0925168272403325, "percentage": 41.85, "elapsed_time": "0:23:24", "remaining_time": "0:32:31", "throughput": 5559.07, "total_tokens": 7806384}
|
|
{"current_steps": 15860, "total_steps": 37885, "loss": 0.0001, "lr": 1.4427569091797182e-06, "epoch": 2.0931767190180812, "percentage": 41.86, "elapsed_time": "0:23:24", "remaining_time": "0:32:30", "throughput": 5559.29, "total_tokens": 7808496}
|
|
{"current_steps": 15865, "total_steps": 37885, "loss": 0.0003, "lr": 1.442343781063255e-06, "epoch": 2.0938366107958295, "percentage": 41.88, "elapsed_time": "0:23:24", "remaining_time": "0:32:29", "throughput": 5559.76, "total_tokens": 7810992}
|
|
{"current_steps": 15870, "total_steps": 37885, "loss": 0.0001, "lr": 1.441930559062527e-06, "epoch": 2.094496502573578, "percentage": 41.89, "elapsed_time": "0:23:25", "remaining_time": "0:32:29", "throughput": 5560.35, "total_tokens": 7813680}
|
|
{"current_steps": 15875, "total_steps": 37885, "loss": 0.0673, "lr": 1.4415172432652385e-06, "epoch": 2.0951563943513265, "percentage": 41.9, "elapsed_time": "0:23:25", "remaining_time": "0:32:28", "throughput": 5560.91, "total_tokens": 7816304}
|
|
{"current_steps": 15880, "total_steps": 37885, "loss": 0.0001, "lr": 1.441103833759112e-06, "epoch": 2.095816286129075, "percentage": 41.92, "elapsed_time": "0:23:25", "remaining_time": "0:32:28", "throughput": 5561.43, "total_tokens": 7818864}
|
|
{"current_steps": 15885, "total_steps": 37885, "loss": 0.0004, "lr": 1.4406903306318913e-06, "epoch": 2.0964761779068235, "percentage": 41.93, "elapsed_time": "0:23:26", "remaining_time": "0:32:27", "throughput": 5561.76, "total_tokens": 7821168}
|
|
{"current_steps": 15890, "total_steps": 37885, "loss": 0.0002, "lr": 1.440276733971339e-06, "epoch": 2.097136069684572, "percentage": 41.94, "elapsed_time": "0:23:26", "remaining_time": "0:32:26", "throughput": 5562.4, "total_tokens": 7823920}
|
|
{"current_steps": 15895, "total_steps": 37885, "loss": 0.0001, "lr": 1.439863043865238e-06, "epoch": 2.09779596146232, "percentage": 41.96, "elapsed_time": "0:23:26", "remaining_time": "0:32:26", "throughput": 5562.74, "total_tokens": 7826224}
|
|
{"current_steps": 15900, "total_steps": 37885, "loss": 0.0, "lr": 1.4394492604013914e-06, "epoch": 2.098455853240069, "percentage": 41.97, "elapsed_time": "0:23:27", "remaining_time": "0:32:25", "throughput": 5563.22, "total_tokens": 7828720}
|
|
{"current_steps": 15905, "total_steps": 37885, "loss": 0.0554, "lr": 1.4390353836676217e-06, "epoch": 2.099115745017817, "percentage": 41.98, "elapsed_time": "0:23:27", "remaining_time": "0:32:25", "throughput": 5563.76, "total_tokens": 7831344}
|
|
{"current_steps": 15910, "total_steps": 37885, "loss": 0.0002, "lr": 1.4386214137517707e-06, "epoch": 2.0997756367955653, "percentage": 42.0, "elapsed_time": "0:23:27", "remaining_time": "0:32:24", "throughput": 5564.24, "total_tokens": 7833840}
|
|
{"current_steps": 15915, "total_steps": 37885, "loss": 0.0659, "lr": 1.438207350741701e-06, "epoch": 2.100435528573314, "percentage": 42.01, "elapsed_time": "0:23:28", "remaining_time": "0:32:23", "throughput": 5564.5, "total_tokens": 7836016}
|
|
{"current_steps": 15920, "total_steps": 37885, "loss": 0.0295, "lr": 1.4377931947252943e-06, "epoch": 2.1010954203510623, "percentage": 42.02, "elapsed_time": "0:23:28", "remaining_time": "0:32:23", "throughput": 5565.13, "total_tokens": 7838768}
|
|
{"current_steps": 15925, "total_steps": 37885, "loss": 0.0581, "lr": 1.4373789457904522e-06, "epoch": 2.101755312128811, "percentage": 42.04, "elapsed_time": "0:23:28", "remaining_time": "0:32:22", "throughput": 5565.65, "total_tokens": 7841328}
|
|
{"current_steps": 15930, "total_steps": 37885, "loss": 0.0002, "lr": 1.4369646040250962e-06, "epoch": 2.1024152039065593, "percentage": 42.05, "elapsed_time": "0:23:29", "remaining_time": "0:32:22", "throughput": 5566.09, "total_tokens": 7843760}
|
|
{"current_steps": 15935, "total_steps": 37885, "loss": 0.0908, "lr": 1.4365501695171673e-06, "epoch": 2.1030750956843076, "percentage": 42.06, "elapsed_time": "0:23:29", "remaining_time": "0:32:21", "throughput": 5566.72, "total_tokens": 7846512}
|
|
{"current_steps": 15940, "total_steps": 37885, "loss": 0.0002, "lr": 1.436135642354626e-06, "epoch": 2.1037349874620563, "percentage": 42.07, "elapsed_time": "0:23:29", "remaining_time": "0:32:21", "throughput": 5567.23, "total_tokens": 7849072}
|
|
{"current_steps": 15945, "total_steps": 37885, "loss": 0.088, "lr": 1.4357210226254533e-06, "epoch": 2.1043948792398046, "percentage": 42.09, "elapsed_time": "0:23:30", "remaining_time": "0:32:20", "throughput": 5567.75, "total_tokens": 7851632}
|
|
{"current_steps": 15950, "total_steps": 37885, "loss": 0.0004, "lr": 1.435306310417648e-06, "epoch": 2.105054771017553, "percentage": 42.1, "elapsed_time": "0:23:30", "remaining_time": "0:32:19", "throughput": 5568.17, "total_tokens": 7854064}
|
|
{"current_steps": 15955, "total_steps": 37885, "loss": 0.0478, "lr": 1.4348915058192316e-06, "epoch": 2.1057146627953016, "percentage": 42.11, "elapsed_time": "0:23:30", "remaining_time": "0:32:19", "throughput": 5568.77, "total_tokens": 7856752}
|
|
{"current_steps": 15960, "total_steps": 37885, "loss": 0.0468, "lr": 1.4344766089182416e-06, "epoch": 2.10637455457305, "percentage": 42.13, "elapsed_time": "0:23:31", "remaining_time": "0:32:18", "throughput": 5569.32, "total_tokens": 7859376}
|
|
{"current_steps": 15965, "total_steps": 37885, "loss": 0.0001, "lr": 1.4340616198027377e-06, "epoch": 2.1070344463507986, "percentage": 42.14, "elapsed_time": "0:23:31", "remaining_time": "0:32:18", "throughput": 5569.85, "total_tokens": 7862000}
|
|
{"current_steps": 15970, "total_steps": 37885, "loss": 0.0612, "lr": 1.4336465385607982e-06, "epoch": 2.107694338128547, "percentage": 42.15, "elapsed_time": "0:23:31", "remaining_time": "0:32:17", "throughput": 5570.19, "total_tokens": 7864304}
|
|
{"current_steps": 15975, "total_steps": 37885, "loss": 0.1067, "lr": 1.433231365280521e-06, "epoch": 2.108354229906295, "percentage": 42.17, "elapsed_time": "0:23:32", "remaining_time": "0:32:16", "throughput": 5570.52, "total_tokens": 7866608}
|
|
{"current_steps": 15980, "total_steps": 37885, "loss": 0.0596, "lr": 1.432816100050024e-06, "epoch": 2.109014121684044, "percentage": 42.18, "elapsed_time": "0:23:32", "remaining_time": "0:32:16", "throughput": 5570.95, "total_tokens": 7869040}
|
|
{"current_steps": 15985, "total_steps": 37885, "loss": 0.0001, "lr": 1.432400742957444e-06, "epoch": 2.109674013461792, "percentage": 42.19, "elapsed_time": "0:23:32", "remaining_time": "0:32:15", "throughput": 5571.58, "total_tokens": 7871792}
|
|
{"current_steps": 15990, "total_steps": 37885, "loss": 0.001, "lr": 1.4319852940909377e-06, "epoch": 2.110333905239541, "percentage": 42.21, "elapsed_time": "0:23:33", "remaining_time": "0:32:15", "throughput": 5571.96, "total_tokens": 7874160}
|
|
{"current_steps": 15995, "total_steps": 37885, "loss": 0.0029, "lr": 1.4315697535386804e-06, "epoch": 2.110993797017289, "percentage": 42.22, "elapsed_time": "0:23:33", "remaining_time": "0:32:14", "throughput": 5572.43, "total_tokens": 7876656}
|
|
{"current_steps": 16000, "total_steps": 37885, "loss": 0.0551, "lr": 1.4311541213888682e-06, "epoch": 2.1116536887950375, "percentage": 42.23, "elapsed_time": "0:23:33", "remaining_time": "0:32:13", "throughput": 5572.99, "total_tokens": 7879280}
|
|
{"current_steps": 16005, "total_steps": 37885, "loss": 0.0002, "lr": 1.430738397729716e-06, "epoch": 2.112313580572786, "percentage": 42.25, "elapsed_time": "0:23:34", "remaining_time": "0:32:13", "throughput": 5573.42, "total_tokens": 7881712}
|
|
{"current_steps": 16010, "total_steps": 37885, "loss": 0.0004, "lr": 1.4303225826494583e-06, "epoch": 2.1129734723505345, "percentage": 42.26, "elapsed_time": "0:23:34", "remaining_time": "0:32:12", "throughput": 5573.71, "total_tokens": 7883952}
|
|
{"current_steps": 16015, "total_steps": 37885, "loss": 0.0488, "lr": 1.4299066762363484e-06, "epoch": 2.113633364128283, "percentage": 42.27, "elapsed_time": "0:23:34", "remaining_time": "0:32:12", "throughput": 5574.09, "total_tokens": 7886320}
|
|
{"current_steps": 16020, "total_steps": 37885, "loss": 0.0273, "lr": 1.4294906785786593e-06, "epoch": 2.1142932559060315, "percentage": 42.29, "elapsed_time": "0:23:35", "remaining_time": "0:32:11", "throughput": 5574.69, "total_tokens": 7889008}
|
|
{"current_steps": 16025, "total_steps": 37885, "loss": 0.0751, "lr": 1.429074589764684e-06, "epoch": 2.1149531476837797, "percentage": 42.3, "elapsed_time": "0:23:35", "remaining_time": "0:32:10", "throughput": 5575.07, "total_tokens": 7891376}
|
|
{"current_steps": 16030, "total_steps": 37885, "loss": 0.0536, "lr": 1.4286584098827343e-06, "epoch": 2.1156130394615285, "percentage": 42.31, "elapsed_time": "0:23:35", "remaining_time": "0:32:10", "throughput": 5575.38, "total_tokens": 7893616}
|
|
{"current_steps": 16035, "total_steps": 37885, "loss": 0.0007, "lr": 1.4282421390211411e-06, "epoch": 2.1162729312392767, "percentage": 42.33, "elapsed_time": "0:23:36", "remaining_time": "0:32:09", "throughput": 5575.76, "total_tokens": 7895984}
|
|
{"current_steps": 16040, "total_steps": 37885, "loss": 0.0001, "lr": 1.4278257772682548e-06, "epoch": 2.116932823017025, "percentage": 42.34, "elapsed_time": "0:23:36", "remaining_time": "0:32:09", "throughput": 5576.06, "total_tokens": 7898224}
|
|
{"current_steps": 16045, "total_steps": 37885, "loss": 0.0001, "lr": 1.4274093247124456e-06, "epoch": 2.1175927147947737, "percentage": 42.35, "elapsed_time": "0:23:36", "remaining_time": "0:32:08", "throughput": 5576.49, "total_tokens": 7900656}
|
|
{"current_steps": 16050, "total_steps": 37885, "loss": 0.0001, "lr": 1.4269927814421023e-06, "epoch": 2.118252606572522, "percentage": 42.37, "elapsed_time": "0:23:37", "remaining_time": "0:32:07", "throughput": 5576.95, "total_tokens": 7903152}
|
|
{"current_steps": 16055, "total_steps": 37885, "loss": 0.0001, "lr": 1.426576147545633e-06, "epoch": 2.1189124983502707, "percentage": 42.38, "elapsed_time": "0:23:37", "remaining_time": "0:32:07", "throughput": 5577.33, "total_tokens": 7905520}
|
|
{"current_steps": 16060, "total_steps": 37885, "loss": 0.0009, "lr": 1.4261594231114658e-06, "epoch": 2.119572390128019, "percentage": 42.39, "elapsed_time": "0:23:37", "remaining_time": "0:32:06", "throughput": 5577.75, "total_tokens": 7907952}
|
|
{"current_steps": 16065, "total_steps": 37885, "loss": 0.1114, "lr": 1.4257426082280466e-06, "epoch": 2.1202322819057673, "percentage": 42.4, "elapsed_time": "0:23:38", "remaining_time": "0:32:06", "throughput": 5578.15, "total_tokens": 7910384}
|
|
{"current_steps": 16070, "total_steps": 37885, "loss": 0.009, "lr": 1.4253257029838419e-06, "epoch": 2.120892173683516, "percentage": 42.42, "elapsed_time": "0:23:38", "remaining_time": "0:32:05", "throughput": 5578.6, "total_tokens": 7912880}
|
|
{"current_steps": 16075, "total_steps": 37885, "loss": 0.1324, "lr": 1.4249087074673367e-06, "epoch": 2.1215520654612643, "percentage": 42.43, "elapsed_time": "0:23:38", "remaining_time": "0:32:04", "throughput": 5578.94, "total_tokens": 7915184}
|
|
{"current_steps": 16080, "total_steps": 37885, "loss": 0.0003, "lr": 1.4244916217670352e-06, "epoch": 2.122211957239013, "percentage": 42.44, "elapsed_time": "0:23:39", "remaining_time": "0:32:04", "throughput": 5579.24, "total_tokens": 7917424}
|
|
{"current_steps": 16085, "total_steps": 37885, "loss": 0.0015, "lr": 1.4240744459714612e-06, "epoch": 2.1228718490167613, "percentage": 42.46, "elapsed_time": "0:23:39", "remaining_time": "0:32:03", "throughput": 5579.49, "total_tokens": 7919600}
|
|
{"current_steps": 16090, "total_steps": 37885, "loss": 0.0001, "lr": 1.4236571801691568e-06, "epoch": 2.1235317407945096, "percentage": 42.47, "elapsed_time": "0:23:39", "remaining_time": "0:32:03", "throughput": 5580.05, "total_tokens": 7922224}
|
|
{"current_steps": 16095, "total_steps": 37885, "loss": 0.0682, "lr": 1.4232398244486835e-06, "epoch": 2.1241916325722583, "percentage": 42.48, "elapsed_time": "0:23:40", "remaining_time": "0:32:02", "throughput": 5580.35, "total_tokens": 7924464}
|
|
{"current_steps": 16100, "total_steps": 37885, "loss": 0.0001, "lr": 1.4228223788986226e-06, "epoch": 2.1248515243500066, "percentage": 42.5, "elapsed_time": "0:23:40", "remaining_time": "0:32:01", "throughput": 5580.9, "total_tokens": 7927088}
|
|
{"current_steps": 16105, "total_steps": 37885, "loss": 0.1128, "lr": 1.4224048436075738e-06, "epoch": 2.125511416127755, "percentage": 42.51, "elapsed_time": "0:23:40", "remaining_time": "0:32:01", "throughput": 5581.4, "total_tokens": 7929648}
|
|
{"current_steps": 16110, "total_steps": 37885, "loss": 0.0001, "lr": 1.4219872186641557e-06, "epoch": 2.1261713079055036, "percentage": 42.52, "elapsed_time": "0:23:41", "remaining_time": "0:32:00", "throughput": 5581.78, "total_tokens": 7932016}
|
|
{"current_steps": 16115, "total_steps": 37885, "loss": 0.0535, "lr": 1.421569504157006e-06, "epoch": 2.126831199683252, "percentage": 42.54, "elapsed_time": "0:23:41", "remaining_time": "0:32:00", "throughput": 5582.28, "total_tokens": 7934576}
|
|
{"current_steps": 16120, "total_steps": 37885, "loss": 0.0491, "lr": 1.4211517001747818e-06, "epoch": 2.1274910914610006, "percentage": 42.55, "elapsed_time": "0:23:41", "remaining_time": "0:31:59", "throughput": 5582.61, "total_tokens": 7936880}
|
|
{"current_steps": 16125, "total_steps": 37885, "loss": 0.0007, "lr": 1.420733806806159e-06, "epoch": 2.128150983238749, "percentage": 42.56, "elapsed_time": "0:23:42", "remaining_time": "0:31:58", "throughput": 5582.99, "total_tokens": 7939248}
|
|
{"current_steps": 16130, "total_steps": 37885, "loss": 0.1307, "lr": 1.4203158241398329e-06, "epoch": 2.128810875016497, "percentage": 42.58, "elapsed_time": "0:23:42", "remaining_time": "0:31:58", "throughput": 5583.58, "total_tokens": 7941936}
|
|
{"current_steps": 16135, "total_steps": 37885, "loss": 0.1063, "lr": 1.4198977522645162e-06, "epoch": 2.129470766794246, "percentage": 42.59, "elapsed_time": "0:23:42", "remaining_time": "0:31:57", "throughput": 5583.97, "total_tokens": 7944304}
|
|
{"current_steps": 16140, "total_steps": 37885, "loss": 0.1061, "lr": 1.4194795912689426e-06, "epoch": 2.130130658571994, "percentage": 42.6, "elapsed_time": "0:23:43", "remaining_time": "0:31:57", "throughput": 5584.27, "total_tokens": 7946544}
|
|
{"current_steps": 16145, "total_steps": 37885, "loss": 0.0001, "lr": 1.419061341241863e-06, "epoch": 2.130790550349743, "percentage": 42.62, "elapsed_time": "0:23:43", "remaining_time": "0:31:56", "throughput": 5584.97, "total_tokens": 7949424}
|
|
{"current_steps": 16150, "total_steps": 37885, "loss": 0.0001, "lr": 1.4186430022720488e-06, "epoch": 2.131450442127491, "percentage": 42.63, "elapsed_time": "0:23:43", "remaining_time": "0:31:56", "throughput": 5585.4, "total_tokens": 7951856}
|
|
{"current_steps": 16155, "total_steps": 37885, "loss": 0.0002, "lr": 1.4182245744482886e-06, "epoch": 2.1321103339052394, "percentage": 42.64, "elapsed_time": "0:23:44", "remaining_time": "0:31:55", "throughput": 5585.6, "total_tokens": 7953968}
|
|
{"current_steps": 16160, "total_steps": 37885, "loss": 0.1079, "lr": 1.4178060578593912e-06, "epoch": 2.132770225682988, "percentage": 42.66, "elapsed_time": "0:23:44", "remaining_time": "0:31:54", "throughput": 5586.06, "total_tokens": 7956464}
|
|
{"current_steps": 16165, "total_steps": 37885, "loss": 0.0001, "lr": 1.4173874525941836e-06, "epoch": 2.1334301174607364, "percentage": 42.67, "elapsed_time": "0:23:44", "remaining_time": "0:31:54", "throughput": 5586.47, "total_tokens": 7958896}
|
|
{"current_steps": 16170, "total_steps": 37885, "loss": 0.0126, "lr": 1.4169687587415114e-06, "epoch": 2.1340900092384847, "percentage": 42.68, "elapsed_time": "0:23:45", "remaining_time": "0:31:53", "throughput": 5586.89, "total_tokens": 7961328}
|
|
{"current_steps": 16175, "total_steps": 37885, "loss": 0.0691, "lr": 1.4165499763902399e-06, "epoch": 2.1347499010162334, "percentage": 42.69, "elapsed_time": "0:23:45", "remaining_time": "0:31:53", "throughput": 5587.4, "total_tokens": 7963888}
|
|
{"current_steps": 16180, "total_steps": 37885, "loss": 0.0001, "lr": 1.416131105629252e-06, "epoch": 2.1354097927939817, "percentage": 42.71, "elapsed_time": "0:23:45", "remaining_time": "0:31:52", "throughput": 5587.95, "total_tokens": 7966512}
|
|
{"current_steps": 16185, "total_steps": 37885, "loss": 0.0002, "lr": 1.4157121465474504e-06, "epoch": 2.1360696845717304, "percentage": 42.72, "elapsed_time": "0:23:45", "remaining_time": "0:31:51", "throughput": 5588.37, "total_tokens": 7968944}
|
|
{"current_steps": 16190, "total_steps": 37885, "loss": 0.0018, "lr": 1.4152930992337562e-06, "epoch": 2.1367295763494787, "percentage": 42.73, "elapsed_time": "0:23:46", "remaining_time": "0:31:51", "throughput": 5588.83, "total_tokens": 7971440}
|
|
{"current_steps": 16195, "total_steps": 37885, "loss": 0.0001, "lr": 1.4148739637771088e-06, "epoch": 2.137389468127227, "percentage": 42.75, "elapsed_time": "0:23:46", "remaining_time": "0:31:50", "throughput": 5589.16, "total_tokens": 7973744}
|
|
{"current_steps": 16200, "total_steps": 37885, "loss": 0.0523, "lr": 1.4144547402664674e-06, "epoch": 2.1380493599049757, "percentage": 42.76, "elapsed_time": "0:23:46", "remaining_time": "0:31:50", "throughput": 5589.39, "total_tokens": 7975920}
|
|
{"current_steps": 16205, "total_steps": 37885, "loss": 0.0004, "lr": 1.4140354287908079e-06, "epoch": 2.138709251682724, "percentage": 42.77, "elapsed_time": "0:23:47", "remaining_time": "0:31:49", "throughput": 5589.74, "total_tokens": 7978224}
|
|
{"current_steps": 16210, "total_steps": 37885, "loss": 0.0001, "lr": 1.4136160294391272e-06, "epoch": 2.1393691434604727, "percentage": 42.79, "elapsed_time": "0:23:47", "remaining_time": "0:31:48", "throughput": 5590.11, "total_tokens": 7980592}
|
|
{"current_steps": 16215, "total_steps": 37885, "loss": 0.0001, "lr": 1.4131965423004394e-06, "epoch": 2.140029035238221, "percentage": 42.8, "elapsed_time": "0:23:47", "remaining_time": "0:31:48", "throughput": 5590.44, "total_tokens": 7982896}
|
|
{"current_steps": 16220, "total_steps": 37885, "loss": 0.0297, "lr": 1.4127769674637777e-06, "epoch": 2.1406889270159692, "percentage": 42.81, "elapsed_time": "0:23:48", "remaining_time": "0:31:47", "throughput": 5590.69, "total_tokens": 7985072}
|
|
{"current_steps": 16225, "total_steps": 37885, "loss": 0.1126, "lr": 1.4123573050181937e-06, "epoch": 2.141348818793718, "percentage": 42.83, "elapsed_time": "0:23:48", "remaining_time": "0:31:47", "throughput": 5591.31, "total_tokens": 7987824}
|
|
{"current_steps": 16230, "total_steps": 37885, "loss": 0.0004, "lr": 1.4119375550527578e-06, "epoch": 2.1420087105714662, "percentage": 42.84, "elapsed_time": "0:23:48", "remaining_time": "0:31:46", "throughput": 5591.73, "total_tokens": 7990256}
|
|
{"current_steps": 16235, "total_steps": 37885, "loss": 0.0001, "lr": 1.4115177176565587e-06, "epoch": 2.1426686023492145, "percentage": 42.85, "elapsed_time": "0:23:49", "remaining_time": "0:31:45", "throughput": 5592.31, "total_tokens": 7992944}
|
|
{"current_steps": 16240, "total_steps": 37885, "loss": 0.0803, "lr": 1.4110977929187042e-06, "epoch": 2.1433284941269632, "percentage": 42.87, "elapsed_time": "0:23:49", "remaining_time": "0:31:45", "throughput": 5592.77, "total_tokens": 7995440}
|
|
{"current_steps": 16245, "total_steps": 37885, "loss": 0.0956, "lr": 1.41067778092832e-06, "epoch": 2.1439883859047115, "percentage": 42.88, "elapsed_time": "0:23:49", "remaining_time": "0:31:44", "throughput": 5593.14, "total_tokens": 7997808}
|
|
{"current_steps": 16250, "total_steps": 37885, "loss": 0.0798, "lr": 1.4102576817745506e-06, "epoch": 2.1446482776824602, "percentage": 42.89, "elapsed_time": "0:23:50", "remaining_time": "0:31:44", "throughput": 5593.6, "total_tokens": 8000304}
|
|
{"current_steps": 16255, "total_steps": 37885, "loss": 0.1273, "lr": 1.4098374955465592e-06, "epoch": 2.1453081694602085, "percentage": 42.91, "elapsed_time": "0:23:50", "remaining_time": "0:31:43", "throughput": 5594.18, "total_tokens": 8002992}
|
|
{"current_steps": 16260, "total_steps": 37885, "loss": 0.0005, "lr": 1.409417222333527e-06, "epoch": 2.145968061237957, "percentage": 42.92, "elapsed_time": "0:23:50", "remaining_time": "0:31:43", "throughput": 5594.63, "total_tokens": 8005488}
|
|
{"current_steps": 16265, "total_steps": 37885, "loss": 0.0005, "lr": 1.4089968622246543e-06, "epoch": 2.1466279530157055, "percentage": 42.93, "elapsed_time": "0:23:51", "remaining_time": "0:31:42", "throughput": 5595.26, "total_tokens": 8008240}
|
|
{"current_steps": 16270, "total_steps": 37885, "loss": 0.1245, "lr": 1.4085764153091595e-06, "epoch": 2.147287844793454, "percentage": 42.95, "elapsed_time": "0:23:51", "remaining_time": "0:31:41", "throughput": 5595.8, "total_tokens": 8010864}
|
|
{"current_steps": 16275, "total_steps": 37885, "loss": 0.0894, "lr": 1.4081558816762788e-06, "epoch": 2.1479477365712025, "percentage": 42.96, "elapsed_time": "0:23:51", "remaining_time": "0:31:41", "throughput": 5596.22, "total_tokens": 8013296}
|
|
{"current_steps": 16280, "total_steps": 37885, "loss": 0.0005, "lr": 1.4077352614152683e-06, "epoch": 2.148607628348951, "percentage": 42.97, "elapsed_time": "0:23:52", "remaining_time": "0:31:40", "throughput": 5596.84, "total_tokens": 8016048}
|
|
{"current_steps": 16285, "total_steps": 37885, "loss": 0.0012, "lr": 1.407314554615401e-06, "epoch": 2.149267520126699, "percentage": 42.99, "elapsed_time": "0:23:52", "remaining_time": "0:31:40", "throughput": 5597.25, "total_tokens": 8018480}
|
|
{"current_steps": 16290, "total_steps": 37885, "loss": 0.0413, "lr": 1.406893761365969e-06, "epoch": 2.149927411904448, "percentage": 43.0, "elapsed_time": "0:23:52", "remaining_time": "0:31:39", "throughput": 5597.75, "total_tokens": 8021040}
|
|
{"current_steps": 16295, "total_steps": 37885, "loss": 0.0005, "lr": 1.4064728817562825e-06, "epoch": 2.150587303682196, "percentage": 43.01, "elapsed_time": "0:23:53", "remaining_time": "0:31:38", "throughput": 5598.28, "total_tokens": 8023664}
|
|
{"current_steps": 16300, "total_steps": 37885, "loss": 0.0003, "lr": 1.4060519158756702e-06, "epoch": 2.1512471954599444, "percentage": 43.02, "elapsed_time": "0:23:53", "remaining_time": "0:31:38", "throughput": 5598.45, "total_tokens": 8025712}
|
|
{"current_steps": 16305, "total_steps": 37885, "loss": 0.0002, "lr": 1.4056308638134794e-06, "epoch": 2.151907087237693, "percentage": 43.04, "elapsed_time": "0:23:53", "remaining_time": "0:31:37", "throughput": 5598.91, "total_tokens": 8028208}
|
|
{"current_steps": 16310, "total_steps": 37885, "loss": 0.0002, "lr": 1.4052097256590752e-06, "epoch": 2.1525669790154414, "percentage": 43.05, "elapsed_time": "0:23:54", "remaining_time": "0:31:37", "throughput": 5599.31, "total_tokens": 8030640}
|
|
{"current_steps": 16315, "total_steps": 37885, "loss": 0.0613, "lr": 1.4047885015018407e-06, "epoch": 2.15322687079319, "percentage": 43.06, "elapsed_time": "0:23:54", "remaining_time": "0:31:36", "throughput": 5599.77, "total_tokens": 8033136}
|
|
{"current_steps": 16320, "total_steps": 37885, "loss": 0.054, "lr": 1.4043671914311785e-06, "epoch": 2.1538867625709384, "percentage": 43.08, "elapsed_time": "0:23:54", "remaining_time": "0:31:36", "throughput": 5600.27, "total_tokens": 8035696}
|
|
{"current_steps": 16325, "total_steps": 37885, "loss": 0.0348, "lr": 1.4039457955365077e-06, "epoch": 2.1545466543486866, "percentage": 43.09, "elapsed_time": "0:23:55", "remaining_time": "0:31:35", "throughput": 5600.89, "total_tokens": 8038448}
|
|
{"current_steps": 16330, "total_steps": 37885, "loss": 0.1972, "lr": 1.403524313907267e-06, "epoch": 2.1552065461264354, "percentage": 43.1, "elapsed_time": "0:23:55", "remaining_time": "0:31:34", "throughput": 5601.34, "total_tokens": 8040944}
|
|
{"current_steps": 16335, "total_steps": 37885, "loss": 0.0004, "lr": 1.403102746632913e-06, "epoch": 2.1558664379041836, "percentage": 43.12, "elapsed_time": "0:23:55", "remaining_time": "0:31:34", "throughput": 5601.71, "total_tokens": 8043312}
|
|
{"current_steps": 16340, "total_steps": 37885, "loss": 0.0002, "lr": 1.4026810938029197e-06, "epoch": 2.1565263296819324, "percentage": 43.13, "elapsed_time": "0:23:56", "remaining_time": "0:31:33", "throughput": 5602.21, "total_tokens": 8045872}
|
|
{"current_steps": 16345, "total_steps": 37885, "loss": 0.0644, "lr": 1.4022593555067804e-06, "epoch": 2.1571862214596806, "percentage": 43.14, "elapsed_time": "0:23:56", "remaining_time": "0:31:33", "throughput": 5602.83, "total_tokens": 8048624}
|
|
{"current_steps": 16350, "total_steps": 37885, "loss": 0.1225, "lr": 1.401837531834006e-06, "epoch": 2.157846113237429, "percentage": 43.16, "elapsed_time": "0:23:56", "remaining_time": "0:31:32", "throughput": 5603.1, "total_tokens": 8050864}
|
|
{"current_steps": 16355, "total_steps": 37885, "loss": 0.0007, "lr": 1.401415622874125e-06, "epoch": 2.1585060050151776, "percentage": 43.17, "elapsed_time": "0:23:57", "remaining_time": "0:31:31", "throughput": 5603.72, "total_tokens": 8053616}
|
|
{"current_steps": 16360, "total_steps": 37885, "loss": 0.0002, "lr": 1.400993628716685e-06, "epoch": 2.159165896792926, "percentage": 43.18, "elapsed_time": "0:23:57", "remaining_time": "0:31:31", "throughput": 5604.1, "total_tokens": 8056048}
|
|
{"current_steps": 16365, "total_steps": 37885, "loss": 0.0399, "lr": 1.400571549451251e-06, "epoch": 2.159825788570674, "percentage": 43.2, "elapsed_time": "0:23:57", "remaining_time": "0:31:30", "throughput": 5604.4, "total_tokens": 8058288}
|
|
{"current_steps": 16370, "total_steps": 37885, "loss": 0.0002, "lr": 1.4001493851674066e-06, "epoch": 2.160485680348423, "percentage": 43.21, "elapsed_time": "0:23:58", "remaining_time": "0:31:30", "throughput": 5604.73, "total_tokens": 8060592}
|
|
{"current_steps": 16375, "total_steps": 37885, "loss": 0.0229, "lr": 1.3997271359547529e-06, "epoch": 2.161145572126171, "percentage": 43.22, "elapsed_time": "0:23:58", "remaining_time": "0:31:29", "throughput": 5605.1, "total_tokens": 8062960}
|
|
{"current_steps": 16380, "total_steps": 37885, "loss": 0.0017, "lr": 1.3993048019029088e-06, "epoch": 2.16180546390392, "percentage": 43.24, "elapsed_time": "0:23:58", "remaining_time": "0:31:29", "throughput": 5605.63, "total_tokens": 8065584}
|
|
{"current_steps": 16385, "total_steps": 37885, "loss": 0.0704, "lr": 1.3988823831015125e-06, "epoch": 2.162465355681668, "percentage": 43.25, "elapsed_time": "0:23:59", "remaining_time": "0:31:28", "throughput": 5606.12, "total_tokens": 8068144}
|
|
{"current_steps": 16390, "total_steps": 37885, "loss": 0.0004, "lr": 1.3984598796402183e-06, "epoch": 2.1631252474594165, "percentage": 43.26, "elapsed_time": "0:23:59", "remaining_time": "0:31:27", "throughput": 5606.41, "total_tokens": 8070384}
|
|
{"current_steps": 16395, "total_steps": 37885, "loss": 0.0002, "lr": 1.3980372916087006e-06, "epoch": 2.163785139237165, "percentage": 43.28, "elapsed_time": "0:23:59", "remaining_time": "0:31:27", "throughput": 5606.83, "total_tokens": 8072816}
|
|
{"current_steps": 16400, "total_steps": 37885, "loss": 0.0014, "lr": 1.3976146190966498e-06, "epoch": 2.1644450310149135, "percentage": 43.29, "elapsed_time": "0:24:00", "remaining_time": "0:31:26", "throughput": 5607.2, "total_tokens": 8075184}
|
|
{"current_steps": 16405, "total_steps": 37885, "loss": 0.0581, "lr": 1.3971918621937756e-06, "epoch": 2.165104922792662, "percentage": 43.3, "elapsed_time": "0:24:00", "remaining_time": "0:31:26", "throughput": 5607.48, "total_tokens": 8077424}
|
|
{"current_steps": 16410, "total_steps": 37885, "loss": 0.0001, "lr": 1.3967690209898046e-06, "epoch": 2.1657648145704105, "percentage": 43.32, "elapsed_time": "0:24:00", "remaining_time": "0:31:25", "throughput": 5608.02, "total_tokens": 8080048}
|
|
{"current_steps": 16415, "total_steps": 37885, "loss": 0.0478, "lr": 1.3963460955744824e-06, "epoch": 2.1664247063481588, "percentage": 43.33, "elapsed_time": "0:24:01", "remaining_time": "0:31:24", "throughput": 5608.39, "total_tokens": 8082416}
|
|
{"current_steps": 16420, "total_steps": 37885, "loss": 0.0001, "lr": 1.3959230860375716e-06, "epoch": 2.1670845981259075, "percentage": 43.34, "elapsed_time": "0:24:01", "remaining_time": "0:31:24", "throughput": 5608.97, "total_tokens": 8085104}
|
|
{"current_steps": 16425, "total_steps": 37885, "loss": 0.1084, "lr": 1.3954999924688522e-06, "epoch": 2.1677444899036558, "percentage": 43.35, "elapsed_time": "0:24:01", "remaining_time": "0:31:23", "throughput": 5609.29, "total_tokens": 8087408}
|
|
{"current_steps": 16430, "total_steps": 37885, "loss": 0.0489, "lr": 1.395076814958124e-06, "epoch": 2.1684043816814045, "percentage": 43.37, "elapsed_time": "0:24:02", "remaining_time": "0:31:23", "throughput": 5609.45, "total_tokens": 8089456}
|
|
{"current_steps": 16435, "total_steps": 37885, "loss": 0.0002, "lr": 1.3946535535952024e-06, "epoch": 2.1690642734591528, "percentage": 43.38, "elapsed_time": "0:24:02", "remaining_time": "0:31:22", "throughput": 5610.13, "total_tokens": 8092336}
|
|
{"current_steps": 16440, "total_steps": 37885, "loss": 0.0369, "lr": 1.394230208469922e-06, "epoch": 2.169724165236901, "percentage": 43.39, "elapsed_time": "0:24:02", "remaining_time": "0:31:22", "throughput": 5610.45, "total_tokens": 8094640}
|
|
{"current_steps": 16445, "total_steps": 37885, "loss": 0.0711, "lr": 1.3938067796721349e-06, "epoch": 2.1703840570146498, "percentage": 43.41, "elapsed_time": "0:24:03", "remaining_time": "0:31:21", "throughput": 5610.87, "total_tokens": 8097072}
|
|
{"current_steps": 16450, "total_steps": 37885, "loss": 0.0007, "lr": 1.3933832672917101e-06, "epoch": 2.171043948792398, "percentage": 43.42, "elapsed_time": "0:24:03", "remaining_time": "0:31:20", "throughput": 5611.27, "total_tokens": 8099504}
|
|
{"current_steps": 16455, "total_steps": 37885, "loss": 0.0004, "lr": 1.3929596714185357e-06, "epoch": 2.1717038405701463, "percentage": 43.43, "elapsed_time": "0:24:03", "remaining_time": "0:31:20", "throughput": 5611.5, "total_tokens": 8101680}
|
|
{"current_steps": 16460, "total_steps": 37885, "loss": 0.1802, "lr": 1.3925359921425166e-06, "epoch": 2.172363732347895, "percentage": 43.45, "elapsed_time": "0:24:04", "remaining_time": "0:31:19", "throughput": 5612.03, "total_tokens": 8104304}
|
|
{"current_steps": 16465, "total_steps": 37885, "loss": 0.0001, "lr": 1.3921122295535756e-06, "epoch": 2.1730236241256433, "percentage": 43.46, "elapsed_time": "0:24:04", "remaining_time": "0:31:19", "throughput": 5612.4, "total_tokens": 8106672}
|
|
{"current_steps": 16470, "total_steps": 37885, "loss": 0.091, "lr": 1.3916883837416536e-06, "epoch": 2.173683515903392, "percentage": 43.47, "elapsed_time": "0:24:04", "remaining_time": "0:31:18", "throughput": 5612.73, "total_tokens": 8108976}
|
|
{"current_steps": 16475, "total_steps": 37885, "loss": 0.004, "lr": 1.3912644547967085e-06, "epoch": 2.1743434076811403, "percentage": 43.49, "elapsed_time": "0:24:05", "remaining_time": "0:31:17", "throughput": 5613.12, "total_tokens": 8111408}
|
|
{"current_steps": 16480, "total_steps": 37885, "loss": 0.0458, "lr": 1.390840442808716e-06, "epoch": 2.1750032994588886, "percentage": 43.5, "elapsed_time": "0:24:05", "remaining_time": "0:31:17", "throughput": 5613.57, "total_tokens": 8113904}
|
|
{"current_steps": 16485, "total_steps": 37885, "loss": 0.1022, "lr": 1.3904163478676698e-06, "epoch": 2.1756631912366373, "percentage": 43.51, "elapsed_time": "0:24:05", "remaining_time": "0:31:16", "throughput": 5613.97, "total_tokens": 8116336}
|
|
{"current_steps": 16490, "total_steps": 37885, "loss": 0.0318, "lr": 1.3899921700635808e-06, "epoch": 2.1763230830143856, "percentage": 43.53, "elapsed_time": "0:24:06", "remaining_time": "0:31:16", "throughput": 5614.3, "total_tokens": 8118640}
|
|
{"current_steps": 16495, "total_steps": 37885, "loss": 0.0517, "lr": 1.389567909486478e-06, "epoch": 2.176982974792134, "percentage": 43.54, "elapsed_time": "0:24:06", "remaining_time": "0:31:15", "throughput": 5614.67, "total_tokens": 8121008}
|
|
{"current_steps": 16500, "total_steps": 37885, "loss": 0.065, "lr": 1.3891435662264077e-06, "epoch": 2.1776428665698826, "percentage": 43.55, "elapsed_time": "0:24:06", "remaining_time": "0:31:15", "throughput": 5615.19, "total_tokens": 8123632}
|
|
{"current_steps": 16505, "total_steps": 37885, "loss": 0.0667, "lr": 1.3887191403734328e-06, "epoch": 2.178302758347631, "percentage": 43.57, "elapsed_time": "0:24:07", "remaining_time": "0:31:14", "throughput": 5615.72, "total_tokens": 8126256}
|
|
{"current_steps": 16510, "total_steps": 37885, "loss": 0.0536, "lr": 1.3882946320176358e-06, "epoch": 2.1789626501253796, "percentage": 43.58, "elapsed_time": "0:24:07", "remaining_time": "0:31:13", "throughput": 5616.34, "total_tokens": 8129072}
|
|
{"current_steps": 16515, "total_steps": 37885, "loss": 0.0269, "lr": 1.3878700412491147e-06, "epoch": 2.179622541903128, "percentage": 43.59, "elapsed_time": "0:24:07", "remaining_time": "0:31:13", "throughput": 5616.83, "total_tokens": 8131632}
|
|
{"current_steps": 16520, "total_steps": 37885, "loss": 0.0001, "lr": 1.3874453681579861e-06, "epoch": 2.180282433680876, "percentage": 43.61, "elapsed_time": "0:24:08", "remaining_time": "0:31:12", "throughput": 5617.32, "total_tokens": 8134192}
|
|
{"current_steps": 16525, "total_steps": 37885, "loss": 0.0014, "lr": 1.3870206128343838e-06, "epoch": 2.180942325458625, "percentage": 43.62, "elapsed_time": "0:24:08", "remaining_time": "0:31:12", "throughput": 5617.59, "total_tokens": 8136432}
|
|
{"current_steps": 16530, "total_steps": 37885, "loss": 0.0003, "lr": 1.386595775368459e-06, "epoch": 2.181602217236373, "percentage": 43.63, "elapsed_time": "0:24:08", "remaining_time": "0:31:11", "throughput": 5617.79, "total_tokens": 8138544}
|
|
{"current_steps": 16535, "total_steps": 37885, "loss": 0.0551, "lr": 1.3861708558503804e-06, "epoch": 2.182262109014122, "percentage": 43.65, "elapsed_time": "0:24:09", "remaining_time": "0:31:10", "throughput": 5618.19, "total_tokens": 8140976}
|
|
{"current_steps": 16540, "total_steps": 37885, "loss": 0.0008, "lr": 1.385745854370334e-06, "epoch": 2.18292200079187, "percentage": 43.66, "elapsed_time": "0:24:09", "remaining_time": "0:31:10", "throughput": 5618.56, "total_tokens": 8143344}
|
|
{"current_steps": 16545, "total_steps": 37885, "loss": 0.0352, "lr": 1.3853207710185233e-06, "epoch": 2.1835818925696184, "percentage": 43.67, "elapsed_time": "0:24:09", "remaining_time": "0:31:09", "throughput": 5618.71, "total_tokens": 8145392}
|
|
{"current_steps": 16550, "total_steps": 37885, "loss": 0.0001, "lr": 1.3848956058851695e-06, "epoch": 2.184241784347367, "percentage": 43.68, "elapsed_time": "0:24:10", "remaining_time": "0:31:09", "throughput": 5619.27, "total_tokens": 8148080}
|
|
{"current_steps": 16555, "total_steps": 37885, "loss": 0.0695, "lr": 1.3844703590605105e-06, "epoch": 2.1849016761251154, "percentage": 43.7, "elapsed_time": "0:24:10", "remaining_time": "0:31:08", "throughput": 5619.63, "total_tokens": 8150448}
|
|
{"current_steps": 16560, "total_steps": 37885, "loss": 0.109, "lr": 1.3840450306348017e-06, "epoch": 2.185561567902864, "percentage": 43.71, "elapsed_time": "0:24:10", "remaining_time": "0:31:08", "throughput": 5620.03, "total_tokens": 8152880}
|
|
{"current_steps": 16565, "total_steps": 37885, "loss": 0.0972, "lr": 1.3836196206983162e-06, "epoch": 2.1862214596806124, "percentage": 43.72, "elapsed_time": "0:24:11", "remaining_time": "0:31:07", "throughput": 5620.4, "total_tokens": 8155248}
|
|
{"current_steps": 16570, "total_steps": 37885, "loss": 0.0592, "lr": 1.3831941293413443e-06, "epoch": 2.1868813514583607, "percentage": 43.74, "elapsed_time": "0:24:11", "remaining_time": "0:31:06", "throughput": 5620.88, "total_tokens": 8157808}
|
|
{"current_steps": 16575, "total_steps": 37885, "loss": 0.0009, "lr": 1.3827685566541934e-06, "epoch": 2.1875412432361094, "percentage": 43.75, "elapsed_time": "0:24:11", "remaining_time": "0:31:06", "throughput": 5621.36, "total_tokens": 8160368}
|
|
{"current_steps": 16580, "total_steps": 37885, "loss": 0.0492, "lr": 1.382342902727188e-06, "epoch": 2.1882011350138577, "percentage": 43.76, "elapsed_time": "0:24:11", "remaining_time": "0:31:05", "throughput": 5621.6, "total_tokens": 8162544}
|
|
{"current_steps": 16585, "total_steps": 37885, "loss": 0.0004, "lr": 1.38191716765067e-06, "epoch": 2.188861026791606, "percentage": 43.78, "elapsed_time": "0:24:12", "remaining_time": "0:31:05", "throughput": 5622.13, "total_tokens": 8165168}
|
|
{"current_steps": 16590, "total_steps": 37885, "loss": 0.0538, "lr": 1.381491351514999e-06, "epoch": 2.1895209185693547, "percentage": 43.79, "elapsed_time": "0:24:12", "remaining_time": "0:31:04", "throughput": 5622.45, "total_tokens": 8167472}
|
|
{"current_steps": 16595, "total_steps": 37885, "loss": 0.0013, "lr": 1.3810654544105512e-06, "epoch": 2.190180810347103, "percentage": 43.8, "elapsed_time": "0:24:12", "remaining_time": "0:31:04", "throughput": 5622.82, "total_tokens": 8169840}
|
|
{"current_steps": 16600, "total_steps": 37885, "loss": 0.0007, "lr": 1.38063947642772e-06, "epoch": 2.1908407021248517, "percentage": 43.82, "elapsed_time": "0:24:13", "remaining_time": "0:31:03", "throughput": 5623.21, "total_tokens": 8172272}
|
|
{"current_steps": 16605, "total_steps": 37885, "loss": 0.0001, "lr": 1.3802134176569166e-06, "epoch": 2.1915005939026, "percentage": 43.83, "elapsed_time": "0:24:13", "remaining_time": "0:31:02", "throughput": 5623.58, "total_tokens": 8174640}
|
|
{"current_steps": 16610, "total_steps": 37885, "loss": 0.0298, "lr": 1.3797872781885685e-06, "epoch": 2.1921604856803483, "percentage": 43.84, "elapsed_time": "0:24:13", "remaining_time": "0:31:02", "throughput": 5624.1, "total_tokens": 8177264}
|
|
{"current_steps": 16615, "total_steps": 37885, "loss": 0.0001, "lr": 1.3793610581131207e-06, "epoch": 2.192820377458097, "percentage": 43.86, "elapsed_time": "0:24:14", "remaining_time": "0:31:01", "throughput": 5624.38, "total_tokens": 8179504}
|
|
{"current_steps": 16620, "total_steps": 37885, "loss": 0.0782, "lr": 1.3789347575210352e-06, "epoch": 2.1934802692358453, "percentage": 43.87, "elapsed_time": "0:24:14", "remaining_time": "0:31:01", "throughput": 5624.95, "total_tokens": 8182192}
|
|
{"current_steps": 16625, "total_steps": 37885, "loss": 0.0031, "lr": 1.3785083765027919e-06, "epoch": 2.1941401610135935, "percentage": 43.88, "elapsed_time": "0:24:14", "remaining_time": "0:31:00", "throughput": 5625.27, "total_tokens": 8184496}
|
|
{"current_steps": 16630, "total_steps": 37885, "loss": 0.0002, "lr": 1.3780819151488865e-06, "epoch": 2.1948000527913423, "percentage": 43.9, "elapsed_time": "0:24:15", "remaining_time": "0:31:00", "throughput": 5625.64, "total_tokens": 8186864}
|
|
{"current_steps": 16635, "total_steps": 37885, "loss": 0.2403, "lr": 1.3776553735498321e-06, "epoch": 2.1954599445690905, "percentage": 43.91, "elapsed_time": "0:24:15", "remaining_time": "0:30:59", "throughput": 5625.96, "total_tokens": 8189168}
|
|
{"current_steps": 16640, "total_steps": 37885, "loss": 0.0315, "lr": 1.37722875179616e-06, "epoch": 2.1961198363468393, "percentage": 43.92, "elapsed_time": "0:24:15", "remaining_time": "0:30:58", "throughput": 5626.3, "total_tokens": 8191536}
|
|
{"current_steps": 16645, "total_steps": 37885, "loss": 0.0002, "lr": 1.3768020499784165e-06, "epoch": 2.1967797281245875, "percentage": 43.94, "elapsed_time": "0:24:16", "remaining_time": "0:30:58", "throughput": 5626.94, "total_tokens": 8194352}
|
|
{"current_steps": 16650, "total_steps": 37885, "loss": 0.0444, "lr": 1.3763752681871669e-06, "epoch": 2.197439619902336, "percentage": 43.95, "elapsed_time": "0:24:16", "remaining_time": "0:30:57", "throughput": 5627.35, "total_tokens": 8196784}
|
|
{"current_steps": 16655, "total_steps": 37885, "loss": 0.0195, "lr": 1.375948406512992e-06, "epoch": 2.1980995116800846, "percentage": 43.96, "elapsed_time": "0:24:16", "remaining_time": "0:30:57", "throughput": 5627.75, "total_tokens": 8199216}
|
|
{"current_steps": 16660, "total_steps": 37885, "loss": 0.071, "lr": 1.3755214650464903e-06, "epoch": 2.198759403457833, "percentage": 43.98, "elapsed_time": "0:24:17", "remaining_time": "0:30:56", "throughput": 5628.03, "total_tokens": 8201456}
|
|
{"current_steps": 16665, "total_steps": 37885, "loss": 0.0002, "lr": 1.3750944438782769e-06, "epoch": 2.1994192952355816, "percentage": 43.99, "elapsed_time": "0:24:17", "remaining_time": "0:30:55", "throughput": 5628.22, "total_tokens": 8203568}
|
|
{"current_steps": 16670, "total_steps": 37885, "loss": 0.0002, "lr": 1.374667343098984e-06, "epoch": 2.20007918701333, "percentage": 44.0, "elapsed_time": "0:24:17", "remaining_time": "0:30:55", "throughput": 5628.54, "total_tokens": 8205872}
|
|
{"current_steps": 16675, "total_steps": 37885, "loss": 0.0201, "lr": 1.3742401627992604e-06, "epoch": 2.200739078791078, "percentage": 44.01, "elapsed_time": "0:24:18", "remaining_time": "0:30:54", "throughput": 5629.03, "total_tokens": 8208432}
|
|
{"current_steps": 16680, "total_steps": 37885, "loss": 0.2684, "lr": 1.3738129030697724e-06, "epoch": 2.201398970568827, "percentage": 44.03, "elapsed_time": "0:24:18", "remaining_time": "0:30:54", "throughput": 5629.47, "total_tokens": 8210928}
|
|
{"current_steps": 16685, "total_steps": 37885, "loss": 0.0003, "lr": 1.3733855640012028e-06, "epoch": 2.202058862346575, "percentage": 44.04, "elapsed_time": "0:24:18", "remaining_time": "0:30:53", "throughput": 5629.75, "total_tokens": 8213168}
|
|
{"current_steps": 16690, "total_steps": 37885, "loss": 0.1604, "lr": 1.372958145684251e-06, "epoch": 2.202718754124324, "percentage": 44.05, "elapsed_time": "0:24:19", "remaining_time": "0:30:53", "throughput": 5630.1, "total_tokens": 8215536}
|
|
{"current_steps": 16695, "total_steps": 37885, "loss": 0.0022, "lr": 1.3725306482096337e-06, "epoch": 2.203378645902072, "percentage": 44.07, "elapsed_time": "0:24:19", "remaining_time": "0:30:52", "throughput": 5630.46, "total_tokens": 8217904}
|
|
{"current_steps": 16700, "total_steps": 37885, "loss": 0.0005, "lr": 1.3721030716680835e-06, "epoch": 2.2040385376798204, "percentage": 44.08, "elapsed_time": "0:24:19", "remaining_time": "0:30:51", "throughput": 5630.78, "total_tokens": 8220208}
|
|
{"current_steps": 16705, "total_steps": 37885, "loss": 0.0322, "lr": 1.3716754161503514e-06, "epoch": 2.204698429457569, "percentage": 44.09, "elapsed_time": "0:24:20", "remaining_time": "0:30:51", "throughput": 5631.29, "total_tokens": 8222832}
|
|
{"current_steps": 16710, "total_steps": 37885, "loss": 0.0001, "lr": 1.3712476817472037e-06, "epoch": 2.2053583212353174, "percentage": 44.11, "elapsed_time": "0:24:20", "remaining_time": "0:30:50", "throughput": 5631.69, "total_tokens": 8225264}
|
|
{"current_steps": 16715, "total_steps": 37885, "loss": 0.1097, "lr": 1.3708198685494234e-06, "epoch": 2.2060182130130657, "percentage": 44.12, "elapsed_time": "0:24:20", "remaining_time": "0:30:50", "throughput": 5632.05, "total_tokens": 8227632}
|
|
{"current_steps": 16720, "total_steps": 37885, "loss": 0.0618, "lr": 1.3703919766478116e-06, "epoch": 2.2066781047908144, "percentage": 44.13, "elapsed_time": "0:24:21", "remaining_time": "0:30:49", "throughput": 5632.69, "total_tokens": 8230448}
|
|
{"current_steps": 16725, "total_steps": 37885, "loss": 0.1022, "lr": 1.369964006133185e-06, "epoch": 2.2073379965685627, "percentage": 44.15, "elapsed_time": "0:24:21", "remaining_time": "0:30:49", "throughput": 5633.17, "total_tokens": 8233008}
|
|
{"current_steps": 16730, "total_steps": 37885, "loss": 0.0281, "lr": 1.3695359570963772e-06, "epoch": 2.2079978883463114, "percentage": 44.16, "elapsed_time": "0:24:21", "remaining_time": "0:30:48", "throughput": 5633.64, "total_tokens": 8235568}
|
|
{"current_steps": 16735, "total_steps": 37885, "loss": 0.0354, "lr": 1.3691078296282383e-06, "epoch": 2.2086577801240597, "percentage": 44.17, "elapsed_time": "0:24:22", "remaining_time": "0:30:47", "throughput": 5633.87, "total_tokens": 8237744}
|
|
{"current_steps": 16740, "total_steps": 37885, "loss": 0.0026, "lr": 1.3686796238196357e-06, "epoch": 2.209317671901808, "percentage": 44.19, "elapsed_time": "0:24:22", "remaining_time": "0:30:47", "throughput": 5634.39, "total_tokens": 8240368}
|
|
{"current_steps": 16745, "total_steps": 37885, "loss": 0.127, "lr": 1.3682513397614522e-06, "epoch": 2.2099775636795567, "percentage": 44.2, "elapsed_time": "0:24:22", "remaining_time": "0:30:46", "throughput": 5634.78, "total_tokens": 8242800}
|
|
{"current_steps": 16750, "total_steps": 37885, "loss": 0.0009, "lr": 1.367822977544589e-06, "epoch": 2.210637455457305, "percentage": 44.21, "elapsed_time": "0:24:23", "remaining_time": "0:30:46", "throughput": 5635.17, "total_tokens": 8245232}
|
|
{"current_steps": 16755, "total_steps": 37885, "loss": 0.0009, "lr": 1.3673945372599623e-06, "epoch": 2.2112973472350532, "percentage": 44.23, "elapsed_time": "0:24:23", "remaining_time": "0:30:45", "throughput": 5635.68, "total_tokens": 8247856}
|
|
{"current_steps": 16760, "total_steps": 37885, "loss": 0.0835, "lr": 1.366966018998505e-06, "epoch": 2.211957239012802, "percentage": 44.24, "elapsed_time": "0:24:23", "remaining_time": "0:30:45", "throughput": 5636.12, "total_tokens": 8250352}
|
|
{"current_steps": 16765, "total_steps": 37885, "loss": 0.0053, "lr": 1.3665374228511681e-06, "epoch": 2.2126171307905502, "percentage": 44.25, "elapsed_time": "0:24:24", "remaining_time": "0:30:44", "throughput": 5636.48, "total_tokens": 8252720}
|
|
{"current_steps": 16770, "total_steps": 37885, "loss": 0.0008, "lr": 1.366108748908917e-06, "epoch": 2.213277022568299, "percentage": 44.27, "elapsed_time": "0:24:24", "remaining_time": "0:30:43", "throughput": 5637.0, "total_tokens": 8255344}
|
|
{"current_steps": 16775, "total_steps": 37885, "loss": 0.0782, "lr": 1.3656799972627355e-06, "epoch": 2.2139369143460472, "percentage": 44.28, "elapsed_time": "0:24:24", "remaining_time": "0:30:43", "throughput": 5637.31, "total_tokens": 8257648}
|
|
{"current_steps": 16780, "total_steps": 37885, "loss": 0.0472, "lr": 1.3652511680036227e-06, "epoch": 2.2145968061237955, "percentage": 44.29, "elapsed_time": "0:24:25", "remaining_time": "0:30:42", "throughput": 5637.87, "total_tokens": 8260336}
|
|
{"current_steps": 16785, "total_steps": 37885, "loss": 0.0985, "lr": 1.3648222612225941e-06, "epoch": 2.2152566979015442, "percentage": 44.31, "elapsed_time": "0:24:25", "remaining_time": "0:30:42", "throughput": 5638.49, "total_tokens": 8263152}
|
|
{"current_steps": 16790, "total_steps": 37885, "loss": 0.0595, "lr": 1.3643932770106824e-06, "epoch": 2.2159165896792925, "percentage": 44.32, "elapsed_time": "0:24:25", "remaining_time": "0:30:41", "throughput": 5638.88, "total_tokens": 8265584}
|
|
{"current_steps": 16795, "total_steps": 37885, "loss": 0.0005, "lr": 1.3639642154589365e-06, "epoch": 2.2165764814570412, "percentage": 44.33, "elapsed_time": "0:24:26", "remaining_time": "0:30:41", "throughput": 5639.11, "total_tokens": 8267760}
|
|
{"current_steps": 16800, "total_steps": 37885, "loss": 0.0002, "lr": 1.3635350766584217e-06, "epoch": 2.2172363732347895, "percentage": 44.34, "elapsed_time": "0:24:26", "remaining_time": "0:30:40", "throughput": 5639.55, "total_tokens": 8270256}
|
|
{"current_steps": 16805, "total_steps": 37885, "loss": 0.0003, "lr": 1.363105860700219e-06, "epoch": 2.217896265012538, "percentage": 44.36, "elapsed_time": "0:24:26", "remaining_time": "0:30:39", "throughput": 5640.18, "total_tokens": 8273072}
|
|
{"current_steps": 16810, "total_steps": 37885, "loss": 0.194, "lr": 1.3626765676754274e-06, "epoch": 2.2185561567902865, "percentage": 44.37, "elapsed_time": "0:24:27", "remaining_time": "0:30:39", "throughput": 5640.49, "total_tokens": 8275376}
|
|
{"current_steps": 16815, "total_steps": 37885, "loss": 0.0007, "lr": 1.3622471976751599e-06, "epoch": 2.219216048568035, "percentage": 44.38, "elapsed_time": "0:24:27", "remaining_time": "0:30:38", "throughput": 5640.93, "total_tokens": 8277872}
|
|
{"current_steps": 16820, "total_steps": 37885, "loss": 0.0477, "lr": 1.3618177507905484e-06, "epoch": 2.2198759403457835, "percentage": 44.4, "elapsed_time": "0:24:27", "remaining_time": "0:30:38", "throughput": 5641.4, "total_tokens": 8280432}
|
|
{"current_steps": 16825, "total_steps": 37885, "loss": 0.0001, "lr": 1.361388227112739e-06, "epoch": 2.220535832123532, "percentage": 44.41, "elapsed_time": "0:24:28", "remaining_time": "0:30:37", "throughput": 5641.8, "total_tokens": 8282864}
|
|
{"current_steps": 16830, "total_steps": 37885, "loss": 0.0597, "lr": 1.3609586267328955e-06, "epoch": 2.22119572390128, "percentage": 44.42, "elapsed_time": "0:24:28", "remaining_time": "0:30:37", "throughput": 5642.23, "total_tokens": 8285360}
|
|
{"current_steps": 16835, "total_steps": 37885, "loss": 0.0002, "lr": 1.3605289497421974e-06, "epoch": 2.221855615679029, "percentage": 44.44, "elapsed_time": "0:24:28", "remaining_time": "0:30:36", "throughput": 5642.58, "total_tokens": 8287728}
|
|
{"current_steps": 16840, "total_steps": 37885, "loss": 0.1485, "lr": 1.3600991962318403e-06, "epoch": 2.222515507456777, "percentage": 44.45, "elapsed_time": "0:24:29", "remaining_time": "0:30:35", "throughput": 5643.06, "total_tokens": 8290288}
|
|
{"current_steps": 16845, "total_steps": 37885, "loss": 0.0736, "lr": 1.3596693662930365e-06, "epoch": 2.2231753992345253, "percentage": 44.46, "elapsed_time": "0:24:29", "remaining_time": "0:30:35", "throughput": 5643.45, "total_tokens": 8292720}
|
|
{"current_steps": 16850, "total_steps": 37885, "loss": 0.0003, "lr": 1.3592394600170142e-06, "epoch": 2.223835291012274, "percentage": 44.48, "elapsed_time": "0:24:29", "remaining_time": "0:30:34", "throughput": 5643.93, "total_tokens": 8295280}
|
|
{"current_steps": 16855, "total_steps": 37885, "loss": 0.0475, "lr": 1.3588094774950181e-06, "epoch": 2.2244951827900223, "percentage": 44.49, "elapsed_time": "0:24:30", "remaining_time": "0:30:34", "throughput": 5644.29, "total_tokens": 8297648}
|
|
{"current_steps": 16860, "total_steps": 37885, "loss": 0.0073, "lr": 1.3583794188183087e-06, "epoch": 2.225155074567771, "percentage": 44.5, "elapsed_time": "0:24:30", "remaining_time": "0:30:33", "throughput": 5644.65, "total_tokens": 8300016}
|
|
{"current_steps": 16865, "total_steps": 37885, "loss": 0.0011, "lr": 1.3579492840781625e-06, "epoch": 2.2258149663455193, "percentage": 44.52, "elapsed_time": "0:24:30", "remaining_time": "0:30:33", "throughput": 5645.09, "total_tokens": 8302512}
|
|
{"current_steps": 16870, "total_steps": 37885, "loss": 0.1018, "lr": 1.357519073365873e-06, "epoch": 2.2264748581232676, "percentage": 44.53, "elapsed_time": "0:24:31", "remaining_time": "0:30:32", "throughput": 5645.59, "total_tokens": 8305136}
|
|
{"current_steps": 16875, "total_steps": 37885, "loss": 0.0002, "lr": 1.357088786772749e-06, "epoch": 2.2271347499010163, "percentage": 44.54, "elapsed_time": "0:24:31", "remaining_time": "0:30:31", "throughput": 5646.06, "total_tokens": 8307696}
|
|
{"current_steps": 16880, "total_steps": 37885, "loss": 0.0004, "lr": 1.3566584243901163e-06, "epoch": 2.2277946416787646, "percentage": 44.56, "elapsed_time": "0:24:31", "remaining_time": "0:30:31", "throughput": 5646.36, "total_tokens": 8310000}
|
|
{"current_steps": 16885, "total_steps": 37885, "loss": 0.0002, "lr": 1.3562279863093154e-06, "epoch": 2.228454533456513, "percentage": 44.57, "elapsed_time": "0:24:32", "remaining_time": "0:30:30", "throughput": 5646.67, "total_tokens": 8312304}
|
|
{"current_steps": 16890, "total_steps": 37885, "loss": 0.0001, "lr": 1.3557974726217041e-06, "epoch": 2.2291144252342616, "percentage": 44.58, "elapsed_time": "0:24:32", "remaining_time": "0:30:30", "throughput": 5647.02, "total_tokens": 8314672}
|
|
{"current_steps": 16895, "total_steps": 37885, "loss": 0.0003, "lr": 1.3553668834186556e-06, "epoch": 2.22977431701201, "percentage": 44.6, "elapsed_time": "0:24:32", "remaining_time": "0:30:29", "throughput": 5647.45, "total_tokens": 8317168}
|
|
{"current_steps": 16900, "total_steps": 37885, "loss": 0.0642, "lr": 1.3549362187915593e-06, "epoch": 2.2304342087897586, "percentage": 44.61, "elapsed_time": "0:24:33", "remaining_time": "0:30:29", "throughput": 5647.97, "total_tokens": 8319792}
|
|
{"current_steps": 16905, "total_steps": 37885, "loss": 0.0002, "lr": 1.3545054788318212e-06, "epoch": 2.231094100567507, "percentage": 44.62, "elapsed_time": "0:24:33", "remaining_time": "0:30:28", "throughput": 5648.44, "total_tokens": 8322352}
|
|
{"current_steps": 16910, "total_steps": 37885, "loss": 0.1334, "lr": 1.3540746636308623e-06, "epoch": 2.231753992345255, "percentage": 44.64, "elapsed_time": "0:24:33", "remaining_time": "0:30:27", "throughput": 5648.88, "total_tokens": 8324848}
|
|
{"current_steps": 16915, "total_steps": 37885, "loss": 0.0002, "lr": 1.3536437732801198e-06, "epoch": 2.232413884123004, "percentage": 44.65, "elapsed_time": "0:24:34", "remaining_time": "0:30:27", "throughput": 5649.15, "total_tokens": 8327088}
|
|
{"current_steps": 16920, "total_steps": 37885, "loss": 0.0014, "lr": 1.3532128078710474e-06, "epoch": 2.233073775900752, "percentage": 44.66, "elapsed_time": "0:24:34", "remaining_time": "0:30:26", "throughput": 5649.66, "total_tokens": 8329712}
|
|
{"current_steps": 16925, "total_steps": 37885, "loss": 0.0088, "lr": 1.3527817674951143e-06, "epoch": 2.233733667678501, "percentage": 44.67, "elapsed_time": "0:24:34", "remaining_time": "0:30:26", "throughput": 5650.18, "total_tokens": 8332336}
|
|
{"current_steps": 16930, "total_steps": 37885, "loss": 0.0002, "lr": 1.3523506522438056e-06, "epoch": 2.234393559456249, "percentage": 44.69, "elapsed_time": "0:24:35", "remaining_time": "0:30:25", "throughput": 5650.49, "total_tokens": 8334640}
|
|
{"current_steps": 16935, "total_steps": 37885, "loss": 0.0535, "lr": 1.3519194622086227e-06, "epoch": 2.2350534512339975, "percentage": 44.7, "elapsed_time": "0:24:35", "remaining_time": "0:30:25", "throughput": 5650.89, "total_tokens": 8337072}
|
|
{"current_steps": 16940, "total_steps": 37885, "loss": 0.0806, "lr": 1.3514881974810823e-06, "epoch": 2.235713343011746, "percentage": 44.71, "elapsed_time": "0:24:35", "remaining_time": "0:30:24", "throughput": 5651.19, "total_tokens": 8339376}
|
|
{"current_steps": 16945, "total_steps": 37885, "loss": 0.1198, "lr": 1.3510568581527171e-06, "epoch": 2.2363732347894945, "percentage": 44.73, "elapsed_time": "0:24:36", "remaining_time": "0:30:24", "throughput": 5651.45, "total_tokens": 8341616}
|
|
{"current_steps": 16950, "total_steps": 37885, "loss": 0.0794, "lr": 1.3506254443150761e-06, "epoch": 2.237033126567243, "percentage": 44.74, "elapsed_time": "0:24:36", "remaining_time": "0:30:23", "throughput": 5651.93, "total_tokens": 8344176}
|
|
{"current_steps": 16955, "total_steps": 37885, "loss": 0.0985, "lr": 1.3501939560597233e-06, "epoch": 2.2376930183449915, "percentage": 44.75, "elapsed_time": "0:24:36", "remaining_time": "0:30:22", "throughput": 5652.32, "total_tokens": 8346608}
|
|
{"current_steps": 16960, "total_steps": 37885, "loss": 0.0123, "lr": 1.3497623934782397e-06, "epoch": 2.2383529101227397, "percentage": 44.77, "elapsed_time": "0:24:37", "remaining_time": "0:30:22", "throughput": 5652.95, "total_tokens": 8349424}
|
|
{"current_steps": 16965, "total_steps": 37885, "loss": 0.0019, "lr": 1.3493307566622204e-06, "epoch": 2.2390128019004885, "percentage": 44.78, "elapsed_time": "0:24:37", "remaining_time": "0:30:21", "throughput": 5653.26, "total_tokens": 8351728}
|
|
{"current_steps": 16970, "total_steps": 37885, "loss": 0.0007, "lr": 1.3488990457032778e-06, "epoch": 2.2396726936782367, "percentage": 44.79, "elapsed_time": "0:24:37", "remaining_time": "0:30:21", "throughput": 5653.65, "total_tokens": 8354160}
|
|
{"current_steps": 16975, "total_steps": 37885, "loss": 0.0096, "lr": 1.3484672606930393e-06, "epoch": 2.240332585455985, "percentage": 44.81, "elapsed_time": "0:24:37", "remaining_time": "0:30:20", "throughput": 5653.83, "total_tokens": 8356272}
|
|
{"current_steps": 16980, "total_steps": 37885, "loss": 0.0001, "lr": 1.3480354017231483e-06, "epoch": 2.2409924772337337, "percentage": 44.82, "elapsed_time": "0:24:38", "remaining_time": "0:30:20", "throughput": 5654.14, "total_tokens": 8358576}
|
|
{"current_steps": 16985, "total_steps": 37885, "loss": 0.0025, "lr": 1.3476034688852633e-06, "epoch": 2.241652369011482, "percentage": 44.83, "elapsed_time": "0:24:38", "remaining_time": "0:30:19", "throughput": 5654.53, "total_tokens": 8361008}
|
|
{"current_steps": 16990, "total_steps": 37885, "loss": 0.056, "lr": 1.3471714622710595e-06, "epoch": 2.2423122607892307, "percentage": 44.85, "elapsed_time": "0:24:38", "remaining_time": "0:30:18", "throughput": 5654.96, "total_tokens": 8363504}
|
|
{"current_steps": 16995, "total_steps": 37885, "loss": 0.0, "lr": 1.3467393819722265e-06, "epoch": 2.242972152566979, "percentage": 44.86, "elapsed_time": "0:24:39", "remaining_time": "0:30:18", "throughput": 5655.18, "total_tokens": 8365680}
|
|
{"current_steps": 17000, "total_steps": 37885, "loss": 0.0002, "lr": 1.3463072280804708e-06, "epoch": 2.2436320443447273, "percentage": 44.87, "elapsed_time": "0:24:39", "remaining_time": "0:30:17", "throughput": 5655.76, "total_tokens": 8368432}
|
|
{"current_steps": 17005, "total_steps": 37885, "loss": 0.0004, "lr": 1.3458750006875134e-06, "epoch": 2.244291936122476, "percentage": 44.89, "elapsed_time": "0:24:39", "remaining_time": "0:30:17", "throughput": 5656.08, "total_tokens": 8370736}
|
|
{"current_steps": 17010, "total_steps": 37885, "loss": 0.0002, "lr": 1.3454426998850919e-06, "epoch": 2.2449518279002243, "percentage": 44.9, "elapsed_time": "0:24:40", "remaining_time": "0:30:16", "throughput": 5656.66, "total_tokens": 8373488}
|
|
{"current_steps": 17015, "total_steps": 37885, "loss": 0.0001, "lr": 1.345010325764959e-06, "epoch": 2.245611719677973, "percentage": 44.91, "elapsed_time": "0:24:40", "remaining_time": "0:30:16", "throughput": 5657.09, "total_tokens": 8375984}
|
|
{"current_steps": 17020, "total_steps": 37885, "loss": 0.0002, "lr": 1.3445778784188828e-06, "epoch": 2.2462716114557213, "percentage": 44.93, "elapsed_time": "0:24:40", "remaining_time": "0:30:15", "throughput": 5657.52, "total_tokens": 8378480}
|
|
{"current_steps": 17025, "total_steps": 37885, "loss": 0.0004, "lr": 1.3441453579386468e-06, "epoch": 2.2469315032334696, "percentage": 44.94, "elapsed_time": "0:24:41", "remaining_time": "0:30:14", "throughput": 5657.73, "total_tokens": 8380656}
|
|
{"current_steps": 17030, "total_steps": 37885, "loss": 0.0447, "lr": 1.343712764416051e-06, "epoch": 2.2475913950112183, "percentage": 44.95, "elapsed_time": "0:24:41", "remaining_time": "0:30:14", "throughput": 5658.31, "total_tokens": 8383408}
|
|
{"current_steps": 17035, "total_steps": 37885, "loss": 0.1017, "lr": 1.3432800979429097e-06, "epoch": 2.2482512867889666, "percentage": 44.97, "elapsed_time": "0:24:41", "remaining_time": "0:30:13", "throughput": 5658.73, "total_tokens": 8385904}
|
|
{"current_steps": 17040, "total_steps": 37885, "loss": 0.0847, "lr": 1.3428473586110537e-06, "epoch": 2.248911178566715, "percentage": 44.98, "elapsed_time": "0:24:42", "remaining_time": "0:30:13", "throughput": 5659.17, "total_tokens": 8388400}
|
|
{"current_steps": 17045, "total_steps": 37885, "loss": 0.0001, "lr": 1.3424145465123286e-06, "epoch": 2.2495710703444636, "percentage": 44.99, "elapsed_time": "0:24:42", "remaining_time": "0:30:12", "throughput": 5659.44, "total_tokens": 8390640}
|
|
{"current_steps": 17050, "total_steps": 37885, "loss": 0.0, "lr": 1.3419816617385953e-06, "epoch": 2.250230962122212, "percentage": 45.0, "elapsed_time": "0:24:42", "remaining_time": "0:30:12", "throughput": 5659.89, "total_tokens": 8393200}
|
|
{"current_steps": 17055, "total_steps": 37885, "loss": 0.0556, "lr": 1.3415487043817311e-06, "epoch": 2.2508908538999606, "percentage": 45.02, "elapsed_time": "0:24:43", "remaining_time": "0:30:11", "throughput": 5660.28, "total_tokens": 8395632}
|
|
{"current_steps": 17055, "total_steps": 37885, "eval_loss": 0.1500292271375656, "epoch": 2.2508908538999606, "percentage": 45.02, "elapsed_time": "0:24:51", "remaining_time": "0:30:21", "throughput": 5630.56, "total_tokens": 8395632}
|
|
{"current_steps": 17060, "total_steps": 37885, "loss": 0.0, "lr": 1.3411156745336272e-06, "epoch": 2.251550745677709, "percentage": 45.03, "elapsed_time": "0:25:28", "remaining_time": "0:31:05", "throughput": 5495.23, "total_tokens": 8397872}
|
|
{"current_steps": 17065, "total_steps": 37885, "loss": 0.0002, "lr": 1.3406825722861921e-06, "epoch": 2.252210637455457, "percentage": 45.04, "elapsed_time": "0:25:28", "remaining_time": "0:31:04", "throughput": 5495.71, "total_tokens": 8400432}
|
|
{"current_steps": 17070, "total_steps": 37885, "loss": 0.0627, "lr": 1.3402493977313476e-06, "epoch": 2.252870529233206, "percentage": 45.06, "elapsed_time": "0:25:28", "remaining_time": "0:31:04", "throughput": 5495.94, "total_tokens": 8402608}
|
|
{"current_steps": 17075, "total_steps": 37885, "loss": 0.0002, "lr": 1.3398161509610324e-06, "epoch": 2.253530421010954, "percentage": 45.07, "elapsed_time": "0:25:29", "remaining_time": "0:31:03", "throughput": 5496.21, "total_tokens": 8404848}
|
|
{"current_steps": 17080, "total_steps": 37885, "loss": 0.0001, "lr": 1.3393828320672e-06, "epoch": 2.254190312788703, "percentage": 45.08, "elapsed_time": "0:25:29", "remaining_time": "0:31:03", "throughput": 5496.54, "total_tokens": 8407216}
|
|
{"current_steps": 17085, "total_steps": 37885, "loss": 0.0001, "lr": 1.3389494411418192e-06, "epoch": 2.254850204566451, "percentage": 45.1, "elapsed_time": "0:25:29", "remaining_time": "0:31:02", "throughput": 5496.91, "total_tokens": 8409648}
|
|
{"current_steps": 17090, "total_steps": 37885, "loss": 0.0001, "lr": 1.3385159782768738e-06, "epoch": 2.2555100963441994, "percentage": 45.11, "elapsed_time": "0:25:30", "remaining_time": "0:31:01", "throughput": 5497.26, "total_tokens": 8412016}
|
|
{"current_steps": 17095, "total_steps": 37885, "loss": 0.0763, "lr": 1.3380824435643633e-06, "epoch": 2.256169988121948, "percentage": 45.12, "elapsed_time": "0:25:30", "remaining_time": "0:31:01", "throughput": 5497.62, "total_tokens": 8414448}
|
|
{"current_steps": 17100, "total_steps": 37885, "loss": 0.1161, "lr": 1.3376488370963027e-06, "epoch": 2.2568298798996964, "percentage": 45.14, "elapsed_time": "0:25:30", "remaining_time": "0:31:00", "throughput": 5497.91, "total_tokens": 8416752}
|
|
{"current_steps": 17105, "total_steps": 37885, "loss": 0.0001, "lr": 1.3372151589647212e-06, "epoch": 2.257489771677445, "percentage": 45.15, "elapsed_time": "0:25:31", "remaining_time": "0:31:00", "throughput": 5498.24, "total_tokens": 8419120}
|
|
{"current_steps": 17110, "total_steps": 37885, "loss": 0.0876, "lr": 1.3367814092616642e-06, "epoch": 2.2581496634551934, "percentage": 45.16, "elapsed_time": "0:25:31", "remaining_time": "0:30:59", "throughput": 5498.45, "total_tokens": 8421296}
|
|
{"current_steps": 17115, "total_steps": 37885, "loss": 0.0389, "lr": 1.336347588079192e-06, "epoch": 2.2588095552329417, "percentage": 45.18, "elapsed_time": "0:25:31", "remaining_time": "0:30:59", "throughput": 5498.69, "total_tokens": 8423536}
|
|
{"current_steps": 17120, "total_steps": 37885, "loss": 0.0001, "lr": 1.3359136955093798e-06, "epoch": 2.2594694470106904, "percentage": 45.19, "elapsed_time": "0:25:32", "remaining_time": "0:30:58", "throughput": 5499.15, "total_tokens": 8426096}
|
|
{"current_steps": 17125, "total_steps": 37885, "loss": 0.0383, "lr": 1.335479731644318e-06, "epoch": 2.2601293387884387, "percentage": 45.2, "elapsed_time": "0:25:32", "remaining_time": "0:30:57", "throughput": 5499.5, "total_tokens": 8428464}
|
|
{"current_steps": 17130, "total_steps": 37885, "loss": 0.0004, "lr": 1.3350456965761127e-06, "epoch": 2.260789230566187, "percentage": 45.22, "elapsed_time": "0:25:32", "remaining_time": "0:30:57", "throughput": 5499.99, "total_tokens": 8431088}
|
|
{"current_steps": 17135, "total_steps": 37885, "loss": 0.0001, "lr": 1.3346115903968845e-06, "epoch": 2.2614491223439357, "percentage": 45.23, "elapsed_time": "0:25:33", "remaining_time": "0:30:56", "throughput": 5500.25, "total_tokens": 8433328}
|
|
{"current_steps": 17140, "total_steps": 37885, "loss": 0.0002, "lr": 1.3341774131987694e-06, "epoch": 2.262109014121684, "percentage": 45.24, "elapsed_time": "0:25:33", "remaining_time": "0:30:56", "throughput": 5500.62, "total_tokens": 8435760}
|
|
{"current_steps": 17145, "total_steps": 37885, "loss": 0.0319, "lr": 1.333743165073918e-06, "epoch": 2.2627689058994322, "percentage": 45.26, "elapsed_time": "0:25:33", "remaining_time": "0:30:55", "throughput": 5500.84, "total_tokens": 8437936}
|
|
{"current_steps": 17150, "total_steps": 37885, "loss": 0.0011, "lr": 1.3333088461144967e-06, "epoch": 2.263428797677181, "percentage": 45.27, "elapsed_time": "0:25:34", "remaining_time": "0:30:54", "throughput": 5501.28, "total_tokens": 8440496}
|
|
{"current_steps": 17155, "total_steps": 37885, "loss": 0.0517, "lr": 1.3328744564126868e-06, "epoch": 2.2640886894549292, "percentage": 45.28, "elapsed_time": "0:25:34", "remaining_time": "0:30:54", "throughput": 5501.51, "total_tokens": 8442736}
|
|
{"current_steps": 17160, "total_steps": 37885, "loss": 0.1567, "lr": 1.3324399960606835e-06, "epoch": 2.264748581232678, "percentage": 45.29, "elapsed_time": "0:25:34", "remaining_time": "0:30:53", "throughput": 5502.03, "total_tokens": 8445424}
|
|
{"current_steps": 17165, "total_steps": 37885, "loss": 0.0549, "lr": 1.3320054651506985e-06, "epoch": 2.2654084730104262, "percentage": 45.31, "elapsed_time": "0:25:35", "remaining_time": "0:30:53", "throughput": 5502.53, "total_tokens": 8448048}
|
|
{"current_steps": 17170, "total_steps": 37885, "loss": 0.0001, "lr": 1.331570863774958e-06, "epoch": 2.2660683647881745, "percentage": 45.32, "elapsed_time": "0:25:35", "remaining_time": "0:30:52", "throughput": 5502.79, "total_tokens": 8450288}
|
|
{"current_steps": 17175, "total_steps": 37885, "loss": 0.0566, "lr": 1.3311361920257024e-06, "epoch": 2.2667282565659232, "percentage": 45.33, "elapsed_time": "0:25:35", "remaining_time": "0:30:52", "throughput": 5503.09, "total_tokens": 8452592}
|
|
{"current_steps": 17180, "total_steps": 37885, "loss": 0.0006, "lr": 1.3307014499951882e-06, "epoch": 2.2673881483436715, "percentage": 45.35, "elapsed_time": "0:25:36", "remaining_time": "0:30:51", "throughput": 5503.42, "total_tokens": 8454960}
|
|
{"current_steps": 17185, "total_steps": 37885, "loss": 0.0003, "lr": 1.3302666377756859e-06, "epoch": 2.2680480401214203, "percentage": 45.36, "elapsed_time": "0:25:36", "remaining_time": "0:30:50", "throughput": 5503.76, "total_tokens": 8457328}
|
|
{"current_steps": 17190, "total_steps": 37885, "loss": 0.083, "lr": 1.3298317554594813e-06, "epoch": 2.2687079318991685, "percentage": 45.37, "elapsed_time": "0:25:36", "remaining_time": "0:30:50", "throughput": 5504.18, "total_tokens": 8459824}
|
|
{"current_steps": 17195, "total_steps": 37885, "loss": 0.0001, "lr": 1.3293968031388752e-06, "epoch": 2.269367823676917, "percentage": 45.39, "elapsed_time": "0:25:37", "remaining_time": "0:30:49", "throughput": 5504.66, "total_tokens": 8462448}
|
|
{"current_steps": 17200, "total_steps": 37885, "loss": 0.1421, "lr": 1.3289617809061827e-06, "epoch": 2.2700277154546655, "percentage": 45.4, "elapsed_time": "0:25:37", "remaining_time": "0:30:49", "throughput": 5504.96, "total_tokens": 8464752}
|
|
{"current_steps": 17205, "total_steps": 37885, "loss": 0.0642, "lr": 1.3285266888537346e-06, "epoch": 2.270687607232414, "percentage": 45.41, "elapsed_time": "0:25:37", "remaining_time": "0:30:48", "throughput": 5505.34, "total_tokens": 8467184}
|
|
{"current_steps": 17210, "total_steps": 37885, "loss": 0.1786, "lr": 1.3280915270738754e-06, "epoch": 2.2713474990101625, "percentage": 45.43, "elapsed_time": "0:25:38", "remaining_time": "0:30:48", "throughput": 5505.76, "total_tokens": 8469680}
|
|
{"current_steps": 17215, "total_steps": 37885, "loss": 0.0017, "lr": 1.3276562956589656e-06, "epoch": 2.272007390787911, "percentage": 45.44, "elapsed_time": "0:25:38", "remaining_time": "0:30:47", "throughput": 5506.01, "total_tokens": 8471920}
|
|
{"current_steps": 17220, "total_steps": 37885, "loss": 0.0002, "lr": 1.32722099470138e-06, "epoch": 2.272667282565659, "percentage": 45.45, "elapsed_time": "0:25:39", "remaining_time": "0:30:46", "throughput": 5506.53, "total_tokens": 8474608}
|
|
{"current_steps": 17225, "total_steps": 37885, "loss": 0.0253, "lr": 1.3267856242935076e-06, "epoch": 2.273327174343408, "percentage": 45.47, "elapsed_time": "0:25:39", "remaining_time": "0:30:46", "throughput": 5506.77, "total_tokens": 8476848}
|
|
{"current_steps": 17230, "total_steps": 37885, "loss": 0.0448, "lr": 1.3263501845277528e-06, "epoch": 2.273987066121156, "percentage": 45.48, "elapsed_time": "0:25:39", "remaining_time": "0:30:45", "throughput": 5507.14, "total_tokens": 8479280}
|
|
{"current_steps": 17235, "total_steps": 37885, "loss": 0.0008, "lr": 1.3259146754965346e-06, "epoch": 2.274646957898905, "percentage": 45.49, "elapsed_time": "0:25:40", "remaining_time": "0:30:45", "throughput": 5507.57, "total_tokens": 8481776}
|
|
{"current_steps": 17240, "total_steps": 37885, "loss": 0.0031, "lr": 1.3254790972922867e-06, "epoch": 2.275306849676653, "percentage": 45.51, "elapsed_time": "0:25:40", "remaining_time": "0:30:44", "throughput": 5507.98, "total_tokens": 8484208}
|
|
{"current_steps": 17245, "total_steps": 37885, "loss": 0.047, "lr": 1.3250434500074574e-06, "epoch": 2.2759667414544014, "percentage": 45.52, "elapsed_time": "0:25:40", "remaining_time": "0:30:43", "throughput": 5508.48, "total_tokens": 8486832}
|
|
{"current_steps": 17250, "total_steps": 37885, "loss": 0.0611, "lr": 1.3246077337345097e-06, "epoch": 2.27662663323215, "percentage": 45.53, "elapsed_time": "0:25:41", "remaining_time": "0:30:43", "throughput": 5508.91, "total_tokens": 8489328}
|
|
{"current_steps": 17255, "total_steps": 37885, "loss": 0.0427, "lr": 1.3241719485659206e-06, "epoch": 2.2772865250098984, "percentage": 45.55, "elapsed_time": "0:25:41", "remaining_time": "0:30:42", "throughput": 5509.28, "total_tokens": 8491696}
|
|
{"current_steps": 17260, "total_steps": 37885, "loss": 0.0004, "lr": 1.3237360945941834e-06, "epoch": 2.2779464167876466, "percentage": 45.56, "elapsed_time": "0:25:41", "remaining_time": "0:30:42", "throughput": 5509.8, "total_tokens": 8494320}
|
|
{"current_steps": 17265, "total_steps": 37885, "loss": 0.0442, "lr": 1.3233001719118043e-06, "epoch": 2.2786063085653954, "percentage": 45.57, "elapsed_time": "0:25:42", "remaining_time": "0:30:41", "throughput": 5510.08, "total_tokens": 8496560}
|
|
{"current_steps": 17270, "total_steps": 37885, "loss": 0.1099, "lr": 1.3228641806113047e-06, "epoch": 2.2792662003431436, "percentage": 45.59, "elapsed_time": "0:25:42", "remaining_time": "0:30:41", "throughput": 5510.46, "total_tokens": 8498928}
|
|
{"current_steps": 17275, "total_steps": 37885, "loss": 0.0006, "lr": 1.3224281207852213e-06, "epoch": 2.2799260921208924, "percentage": 45.6, "elapsed_time": "0:25:42", "remaining_time": "0:30:40", "throughput": 5510.97, "total_tokens": 8501552}
|
|
{"current_steps": 17280, "total_steps": 37885, "loss": 0.0854, "lr": 1.3219919925261034e-06, "epoch": 2.2805859838986406, "percentage": 45.61, "elapsed_time": "0:25:42", "remaining_time": "0:30:39", "throughput": 5511.26, "total_tokens": 8503792}
|
|
{"current_steps": 17285, "total_steps": 37885, "loss": 0.0475, "lr": 1.321555795926517e-06, "epoch": 2.281245875676389, "percentage": 45.62, "elapsed_time": "0:25:43", "remaining_time": "0:30:39", "throughput": 5511.38, "total_tokens": 8505776}
|
|
{"current_steps": 17290, "total_steps": 37885, "loss": 0.0001, "lr": 1.3211195310790415e-06, "epoch": 2.2819057674541376, "percentage": 45.64, "elapsed_time": "0:25:43", "remaining_time": "0:30:38", "throughput": 5511.82, "total_tokens": 8508272}
|
|
{"current_steps": 17295, "total_steps": 37885, "loss": 0.0002, "lr": 1.3206831980762712e-06, "epoch": 2.282565659231886, "percentage": 45.65, "elapsed_time": "0:25:43", "remaining_time": "0:30:38", "throughput": 5512.26, "total_tokens": 8510768}
|
|
{"current_steps": 17300, "total_steps": 37885, "loss": 0.0001, "lr": 1.320246797010814e-06, "epoch": 2.283225551009634, "percentage": 45.66, "elapsed_time": "0:25:44", "remaining_time": "0:30:37", "throughput": 5512.85, "total_tokens": 8513520}
|
|
{"current_steps": 17305, "total_steps": 37885, "loss": 0.0009, "lr": 1.319810327975293e-06, "epoch": 2.283885442787383, "percentage": 45.68, "elapsed_time": "0:25:44", "remaining_time": "0:30:36", "throughput": 5513.33, "total_tokens": 8516080}
|
|
{"current_steps": 17310, "total_steps": 37885, "loss": 0.0675, "lr": 1.3193737910623462e-06, "epoch": 2.284545334565131, "percentage": 45.69, "elapsed_time": "0:25:44", "remaining_time": "0:30:36", "throughput": 5513.7, "total_tokens": 8518448}
|
|
{"current_steps": 17315, "total_steps": 37885, "loss": 0.0, "lr": 1.3189371863646246e-06, "epoch": 2.28520522634288, "percentage": 45.7, "elapsed_time": "0:25:45", "remaining_time": "0:30:35", "throughput": 5513.94, "total_tokens": 8520624}
|
|
{"current_steps": 17320, "total_steps": 37885, "loss": 0.1649, "lr": 1.318500513974795e-06, "epoch": 2.285865118120628, "percentage": 45.72, "elapsed_time": "0:25:45", "remaining_time": "0:30:35", "throughput": 5514.45, "total_tokens": 8523248}
|
|
{"current_steps": 17325, "total_steps": 37885, "loss": 0.0001, "lr": 1.3180637739855376e-06, "epoch": 2.2865250098983765, "percentage": 45.73, "elapsed_time": "0:25:45", "remaining_time": "0:30:34", "throughput": 5514.76, "total_tokens": 8525552}
|
|
{"current_steps": 17330, "total_steps": 37885, "loss": 0.0008, "lr": 1.3176269664895476e-06, "epoch": 2.287184901676125, "percentage": 45.74, "elapsed_time": "0:25:46", "remaining_time": "0:30:34", "throughput": 5515.18, "total_tokens": 8528048}
|
|
{"current_steps": 17335, "total_steps": 37885, "loss": 0.0557, "lr": 1.3171900915795338e-06, "epoch": 2.2878447934538735, "percentage": 45.76, "elapsed_time": "0:25:46", "remaining_time": "0:30:33", "throughput": 5515.58, "total_tokens": 8530480}
|
|
{"current_steps": 17340, "total_steps": 37885, "loss": 0.0002, "lr": 1.31675314934822e-06, "epoch": 2.288504685231622, "percentage": 45.77, "elapsed_time": "0:25:46", "remaining_time": "0:30:32", "throughput": 5516.05, "total_tokens": 8533040}
|
|
{"current_steps": 17345, "total_steps": 37885, "loss": 0.1025, "lr": 1.316316139888344e-06, "epoch": 2.2891645770093705, "percentage": 45.78, "elapsed_time": "0:25:47", "remaining_time": "0:30:32", "throughput": 5516.49, "total_tokens": 8535536}
|
|
{"current_steps": 17350, "total_steps": 37885, "loss": 0.0009, "lr": 1.3158790632926579e-06, "epoch": 2.2898244687871188, "percentage": 45.8, "elapsed_time": "0:25:47", "remaining_time": "0:30:31", "throughput": 5516.93, "total_tokens": 8538032}
|
|
{"current_steps": 17355, "total_steps": 37885, "loss": 0.0595, "lr": 1.3154419196539281e-06, "epoch": 2.2904843605648675, "percentage": 45.81, "elapsed_time": "0:25:47", "remaining_time": "0:30:31", "throughput": 5517.36, "total_tokens": 8540528}
|
|
{"current_steps": 17360, "total_steps": 37885, "loss": 0.0001, "lr": 1.315004709064935e-06, "epoch": 2.2911442523426158, "percentage": 45.82, "elapsed_time": "0:25:48", "remaining_time": "0:30:30", "throughput": 5517.8, "total_tokens": 8543024}
|
|
{"current_steps": 17365, "total_steps": 37885, "loss": 0.0565, "lr": 1.3145674316184736e-06, "epoch": 2.2918041441203645, "percentage": 45.84, "elapsed_time": "0:25:48", "remaining_time": "0:30:29", "throughput": 5518.22, "total_tokens": 8545520}
|
|
{"current_steps": 17370, "total_steps": 37885, "loss": 0.0457, "lr": 1.3141300874073524e-06, "epoch": 2.2924640358981128, "percentage": 45.85, "elapsed_time": "0:25:48", "remaining_time": "0:30:29", "throughput": 5518.65, "total_tokens": 8548016}
|
|
{"current_steps": 17375, "total_steps": 37885, "loss": 0.0002, "lr": 1.3136926765243955e-06, "epoch": 2.293123927675861, "percentage": 45.86, "elapsed_time": "0:25:49", "remaining_time": "0:30:28", "throughput": 5519.07, "total_tokens": 8550512}
|
|
{"current_steps": 17380, "total_steps": 37885, "loss": 0.055, "lr": 1.3132551990624392e-06, "epoch": 2.2937838194536098, "percentage": 45.88, "elapsed_time": "0:25:49", "remaining_time": "0:30:28", "throughput": 5519.38, "total_tokens": 8552816}
|
|
{"current_steps": 17385, "total_steps": 37885, "loss": 0.06, "lr": 1.3128176551143352e-06, "epoch": 2.294443711231358, "percentage": 45.89, "elapsed_time": "0:25:49", "remaining_time": "0:30:27", "throughput": 5519.81, "total_tokens": 8555312}
|
|
{"current_steps": 17390, "total_steps": 37885, "loss": 0.0493, "lr": 1.3123800447729497e-06, "epoch": 2.2951036030091063, "percentage": 45.9, "elapsed_time": "0:25:50", "remaining_time": "0:30:27", "throughput": 5520.08, "total_tokens": 8557552}
|
|
{"current_steps": 17395, "total_steps": 37885, "loss": 0.0001, "lr": 1.3119423681311612e-06, "epoch": 2.295763494786855, "percentage": 45.92, "elapsed_time": "0:25:50", "remaining_time": "0:30:26", "throughput": 5520.45, "total_tokens": 8559920}
|
|
{"current_steps": 17400, "total_steps": 37885, "loss": 0.0001, "lr": 1.3115046252818644e-06, "epoch": 2.2964233865646033, "percentage": 45.93, "elapsed_time": "0:25:50", "remaining_time": "0:30:25", "throughput": 5520.94, "total_tokens": 8562544}
|
|
{"current_steps": 17405, "total_steps": 37885, "loss": 0.0001, "lr": 1.3110668163179664e-06, "epoch": 2.297083278342352, "percentage": 45.94, "elapsed_time": "0:25:51", "remaining_time": "0:30:25", "throughput": 5521.43, "total_tokens": 8565168}
|
|
{"current_steps": 17410, "total_steps": 37885, "loss": 0.0382, "lr": 1.3106289413323891e-06, "epoch": 2.2977431701201003, "percentage": 45.95, "elapsed_time": "0:25:51", "remaining_time": "0:30:24", "throughput": 5521.88, "total_tokens": 8567664}
|
|
{"current_steps": 17415, "total_steps": 37885, "loss": 0.006, "lr": 1.3101910004180685e-06, "epoch": 2.2984030618978486, "percentage": 45.97, "elapsed_time": "0:25:51", "remaining_time": "0:30:24", "throughput": 5522.06, "total_tokens": 8569776}
|
|
{"current_steps": 17420, "total_steps": 37885, "loss": 0.0004, "lr": 1.3097529936679545e-06, "epoch": 2.2990629536755973, "percentage": 45.98, "elapsed_time": "0:25:52", "remaining_time": "0:30:23", "throughput": 5522.29, "total_tokens": 8571952}
|
|
{"current_steps": 17425, "total_steps": 37885, "loss": 0.0001, "lr": 1.3093149211750105e-06, "epoch": 2.2997228454533456, "percentage": 45.99, "elapsed_time": "0:25:52", "remaining_time": "0:30:22", "throughput": 5522.68, "total_tokens": 8574384}
|
|
{"current_steps": 17430, "total_steps": 37885, "loss": 0.0007, "lr": 1.3088767830322142e-06, "epoch": 2.300382737231094, "percentage": 46.01, "elapsed_time": "0:25:52", "remaining_time": "0:30:22", "throughput": 5523.08, "total_tokens": 8576816}
|
|
{"current_steps": 17435, "total_steps": 37885, "loss": 0.0389, "lr": 1.3084385793325575e-06, "epoch": 2.3010426290088426, "percentage": 46.02, "elapsed_time": "0:25:53", "remaining_time": "0:30:21", "throughput": 5523.43, "total_tokens": 8579184}
|
|
{"current_steps": 17440, "total_steps": 37885, "loss": 0.0005, "lr": 1.308000310169046e-06, "epoch": 2.301702520786591, "percentage": 46.03, "elapsed_time": "0:25:53", "remaining_time": "0:30:21", "throughput": 5523.83, "total_tokens": 8581616}
|
|
{"current_steps": 17445, "total_steps": 37885, "loss": 0.0001, "lr": 1.307561975634699e-06, "epoch": 2.3023624125643396, "percentage": 46.05, "elapsed_time": "0:25:53", "remaining_time": "0:30:20", "throughput": 5524.23, "total_tokens": 8584048}
|
|
{"current_steps": 17450, "total_steps": 37885, "loss": 0.0, "lr": 1.3071235758225497e-06, "epoch": 2.303022304342088, "percentage": 46.06, "elapsed_time": "0:25:54", "remaining_time": "0:30:20", "throughput": 5524.51, "total_tokens": 8586288}
|
|
{"current_steps": 17455, "total_steps": 37885, "loss": 0.0002, "lr": 1.3066851108256457e-06, "epoch": 2.303682196119836, "percentage": 46.07, "elapsed_time": "0:25:54", "remaining_time": "0:30:19", "throughput": 5524.94, "total_tokens": 8588784}
|
|
{"current_steps": 17460, "total_steps": 37885, "loss": 0.0577, "lr": 1.3062465807370475e-06, "epoch": 2.304342087897585, "percentage": 46.09, "elapsed_time": "0:25:54", "remaining_time": "0:30:18", "throughput": 5525.32, "total_tokens": 8591216}
|
|
{"current_steps": 17465, "total_steps": 37885, "loss": 0.1142, "lr": 1.3058079856498302e-06, "epoch": 2.305001979675333, "percentage": 46.1, "elapsed_time": "0:25:55", "remaining_time": "0:30:18", "throughput": 5525.85, "total_tokens": 8593904}
|
|
{"current_steps": 17470, "total_steps": 37885, "loss": 0.0, "lr": 1.3053693256570829e-06, "epoch": 2.305661871453082, "percentage": 46.11, "elapsed_time": "0:25:55", "remaining_time": "0:30:17", "throughput": 5526.16, "total_tokens": 8596208}
|
|
{"current_steps": 17475, "total_steps": 37885, "loss": 0.085, "lr": 1.304930600851907e-06, "epoch": 2.30632176323083, "percentage": 46.13, "elapsed_time": "0:25:55", "remaining_time": "0:30:17", "throughput": 5526.61, "total_tokens": 8598768}
|
|
{"current_steps": 17480, "total_steps": 37885, "loss": 0.0002, "lr": 1.3044918113274195e-06, "epoch": 2.3069816550085784, "percentage": 46.14, "elapsed_time": "0:25:56", "remaining_time": "0:30:16", "throughput": 5526.88, "total_tokens": 8601008}
|
|
{"current_steps": 17485, "total_steps": 37885, "loss": 0.127, "lr": 1.3040529571767498e-06, "epoch": 2.307641546786327, "percentage": 46.15, "elapsed_time": "0:25:56", "remaining_time": "0:30:16", "throughput": 5527.38, "total_tokens": 8603632}
|
|
{"current_steps": 17490, "total_steps": 37885, "loss": 0.0004, "lr": 1.3036140384930416e-06, "epoch": 2.3083014385640754, "percentage": 46.17, "elapsed_time": "0:25:56", "remaining_time": "0:30:15", "throughput": 5527.65, "total_tokens": 8605872}
|
|
{"current_steps": 17495, "total_steps": 37885, "loss": 0.0001, "lr": 1.3031750553694528e-06, "epoch": 2.308961330341824, "percentage": 46.18, "elapsed_time": "0:25:57", "remaining_time": "0:30:14", "throughput": 5528.12, "total_tokens": 8608432}
|
|
{"current_steps": 17500, "total_steps": 37885, "loss": 0.0001, "lr": 1.3027360078991535e-06, "epoch": 2.3096212221195724, "percentage": 46.19, "elapsed_time": "0:25:57", "remaining_time": "0:30:14", "throughput": 5528.42, "total_tokens": 8610736}
|
|
{"current_steps": 17505, "total_steps": 37885, "loss": 0.0782, "lr": 1.302296896175329e-06, "epoch": 2.3102811138973207, "percentage": 46.21, "elapsed_time": "0:25:57", "remaining_time": "0:30:13", "throughput": 5529.06, "total_tokens": 8613616}
|
|
{"current_steps": 17510, "total_steps": 37885, "loss": 0.0256, "lr": 1.3018577202911774e-06, "epoch": 2.3109410056750694, "percentage": 46.22, "elapsed_time": "0:25:58", "remaining_time": "0:30:13", "throughput": 5529.44, "total_tokens": 8616048}
|
|
{"current_steps": 17515, "total_steps": 37885, "loss": 0.1737, "lr": 1.3014184803399104e-06, "epoch": 2.3116008974528177, "percentage": 46.23, "elapsed_time": "0:25:58", "remaining_time": "0:30:12", "throughput": 5529.66, "total_tokens": 8618224}
|
|
{"current_steps": 17520, "total_steps": 37885, "loss": 0.0467, "lr": 1.3009791764147537e-06, "epoch": 2.312260789230566, "percentage": 46.25, "elapsed_time": "0:25:58", "remaining_time": "0:30:12", "throughput": 5530.1, "total_tokens": 8620784}
|
|
{"current_steps": 17525, "total_steps": 37885, "loss": 0.0, "lr": 1.3005398086089462e-06, "epoch": 2.3129206810083147, "percentage": 46.26, "elapsed_time": "0:25:59", "remaining_time": "0:30:11", "throughput": 5530.42, "total_tokens": 8623152}
|
|
{"current_steps": 17530, "total_steps": 37885, "loss": 0.034, "lr": 1.3001003770157409e-06, "epoch": 2.313580572786063, "percentage": 46.27, "elapsed_time": "0:25:59", "remaining_time": "0:30:10", "throughput": 5530.72, "total_tokens": 8625456}
|
|
{"current_steps": 17535, "total_steps": 37885, "loss": 0.0005, "lr": 1.2996608817284033e-06, "epoch": 2.3142404645638117, "percentage": 46.28, "elapsed_time": "0:25:59", "remaining_time": "0:30:10", "throughput": 5531.14, "total_tokens": 8627952}
|
|
{"current_steps": 17540, "total_steps": 37885, "loss": 0.0003, "lr": 1.2992213228402142e-06, "epoch": 2.31490035634156, "percentage": 46.3, "elapsed_time": "0:26:00", "remaining_time": "0:30:09", "throughput": 5531.68, "total_tokens": 8630640}
|
|
{"current_steps": 17545, "total_steps": 37885, "loss": 0.0008, "lr": 1.2987817004444654e-06, "epoch": 2.3155602481193083, "percentage": 46.31, "elapsed_time": "0:26:00", "remaining_time": "0:30:09", "throughput": 5532.33, "total_tokens": 8633520}
|
|
{"current_steps": 17550, "total_steps": 37885, "loss": 0.0799, "lr": 1.2983420146344648e-06, "epoch": 2.316220139897057, "percentage": 46.32, "elapsed_time": "0:26:00", "remaining_time": "0:30:08", "throughput": 5532.84, "total_tokens": 8636208}
|
|
{"current_steps": 17555, "total_steps": 37885, "loss": 0.0427, "lr": 1.297902265503532e-06, "epoch": 2.3168800316748053, "percentage": 46.34, "elapsed_time": "0:26:01", "remaining_time": "0:30:08", "throughput": 5533.12, "total_tokens": 8638512}
|
|
{"current_steps": 17560, "total_steps": 37885, "loss": 0.1341, "lr": 1.2974624531450003e-06, "epoch": 2.3175399234525536, "percentage": 46.35, "elapsed_time": "0:26:01", "remaining_time": "0:30:07", "throughput": 5533.47, "total_tokens": 8640944}
|
|
{"current_steps": 17565, "total_steps": 37885, "loss": 0.1493, "lr": 1.2970225776522172e-06, "epoch": 2.3181998152303023, "percentage": 46.36, "elapsed_time": "0:26:01", "remaining_time": "0:30:06", "throughput": 5534.0, "total_tokens": 8643632}
|
|
{"current_steps": 17570, "total_steps": 37885, "loss": 0.018, "lr": 1.2965826391185425e-06, "epoch": 2.3188597070080506, "percentage": 46.38, "elapsed_time": "0:26:02", "remaining_time": "0:30:06", "throughput": 5534.36, "total_tokens": 8646064}
|
|
{"current_steps": 17575, "total_steps": 37885, "loss": 0.0023, "lr": 1.2961426376373507e-06, "epoch": 2.3195195987857993, "percentage": 46.39, "elapsed_time": "0:26:02", "remaining_time": "0:30:05", "throughput": 5534.76, "total_tokens": 8648560}
|
|
{"current_steps": 17580, "total_steps": 37885, "loss": 0.0002, "lr": 1.2957025733020285e-06, "epoch": 2.3201794905635476, "percentage": 46.4, "elapsed_time": "0:26:02", "remaining_time": "0:30:05", "throughput": 5535.17, "total_tokens": 8651056}
|
|
{"current_steps": 17585, "total_steps": 37885, "loss": 0.0002, "lr": 1.2952624462059767e-06, "epoch": 2.320839382341296, "percentage": 46.42, "elapsed_time": "0:26:03", "remaining_time": "0:30:04", "throughput": 5535.56, "total_tokens": 8653552}
|
|
{"current_steps": 17590, "total_steps": 37885, "loss": 0.0009, "lr": 1.2948222564426083e-06, "epoch": 2.3214992741190446, "percentage": 46.43, "elapsed_time": "0:26:03", "remaining_time": "0:30:04", "throughput": 5535.97, "total_tokens": 8656048}
|
|
{"current_steps": 17595, "total_steps": 37885, "loss": 0.0003, "lr": 1.2943820041053512e-06, "epoch": 2.322159165896793, "percentage": 46.44, "elapsed_time": "0:26:03", "remaining_time": "0:30:03", "throughput": 5536.25, "total_tokens": 8658352}
|
|
{"current_steps": 17600, "total_steps": 37885, "loss": 0.0001, "lr": 1.2939416892876451e-06, "epoch": 2.3228190576745416, "percentage": 46.46, "elapsed_time": "0:26:04", "remaining_time": "0:30:02", "throughput": 5536.57, "total_tokens": 8660720}
|
|
{"current_steps": 17605, "total_steps": 37885, "loss": 0.0005, "lr": 1.2935013120829443e-06, "epoch": 2.32347894945229, "percentage": 46.47, "elapsed_time": "0:26:04", "remaining_time": "0:30:02", "throughput": 5536.85, "total_tokens": 8663024}
|
|
{"current_steps": 17610, "total_steps": 37885, "loss": 0.0411, "lr": 1.2930608725847156e-06, "epoch": 2.324138841230038, "percentage": 46.48, "elapsed_time": "0:26:04", "remaining_time": "0:30:01", "throughput": 5537.2, "total_tokens": 8665392}
|
|
{"current_steps": 17615, "total_steps": 37885, "loss": 0.0001, "lr": 1.2926203708864385e-06, "epoch": 2.324798733007787, "percentage": 46.5, "elapsed_time": "0:26:05", "remaining_time": "0:30:01", "throughput": 5537.56, "total_tokens": 8667824}
|
|
{"current_steps": 17620, "total_steps": 37885, "loss": 0.1861, "lr": 1.2921798070816068e-06, "epoch": 2.325458624785535, "percentage": 46.51, "elapsed_time": "0:26:05", "remaining_time": "0:30:00", "throughput": 5538.03, "total_tokens": 8670448}
|
|
{"current_steps": 17625, "total_steps": 37885, "loss": 0.0, "lr": 1.2917391812637269e-06, "epoch": 2.326118516563284, "percentage": 46.52, "elapsed_time": "0:26:05", "remaining_time": "0:30:00", "throughput": 5538.46, "total_tokens": 8672944}
|
|
{"current_steps": 17630, "total_steps": 37885, "loss": 0.0659, "lr": 1.2912984935263183e-06, "epoch": 2.326778408341032, "percentage": 46.54, "elapsed_time": "0:26:06", "remaining_time": "0:29:59", "throughput": 5538.77, "total_tokens": 8675248}
|
|
{"current_steps": 17635, "total_steps": 37885, "loss": 0.0007, "lr": 1.290857743962914e-06, "epoch": 2.3274383001187804, "percentage": 46.55, "elapsed_time": "0:26:06", "remaining_time": "0:29:58", "throughput": 5539.15, "total_tokens": 8677680}
|
|
{"current_steps": 17640, "total_steps": 37885, "loss": 0.0005, "lr": 1.2904169326670596e-06, "epoch": 2.328098191896529, "percentage": 46.56, "elapsed_time": "0:26:06", "remaining_time": "0:29:58", "throughput": 5539.48, "total_tokens": 8680048}
|
|
{"current_steps": 17645, "total_steps": 37885, "loss": 0.0002, "lr": 1.2899760597323144e-06, "epoch": 2.3287580836742774, "percentage": 46.58, "elapsed_time": "0:26:07", "remaining_time": "0:29:57", "throughput": 5539.69, "total_tokens": 8682224}
|
|
{"current_steps": 17650, "total_steps": 37885, "loss": 0.0956, "lr": 1.2895351252522502e-06, "epoch": 2.329417975452026, "percentage": 46.59, "elapsed_time": "0:26:07", "remaining_time": "0:29:57", "throughput": 5540.12, "total_tokens": 8684784}
|
|
{"current_steps": 17655, "total_steps": 37885, "loss": 0.0431, "lr": 1.2890941293204525e-06, "epoch": 2.3300778672297744, "percentage": 46.6, "elapsed_time": "0:26:07", "remaining_time": "0:29:56", "throughput": 5540.41, "total_tokens": 8687088}
|
|
{"current_steps": 17660, "total_steps": 37885, "loss": 0.0472, "lr": 1.2886530720305193e-06, "epoch": 2.3307377590075227, "percentage": 46.61, "elapsed_time": "0:26:08", "remaining_time": "0:29:56", "throughput": 5540.61, "total_tokens": 8689264}
|
|
{"current_steps": 17665, "total_steps": 37885, "loss": 0.147, "lr": 1.2882119534760618e-06, "epoch": 2.3313976507852714, "percentage": 46.63, "elapsed_time": "0:26:08", "remaining_time": "0:29:55", "throughput": 5541.03, "total_tokens": 8691760}
|
|
{"current_steps": 17670, "total_steps": 37885, "loss": 0.0008, "lr": 1.2877707737507043e-06, "epoch": 2.3320575425630197, "percentage": 46.64, "elapsed_time": "0:26:08", "remaining_time": "0:29:54", "throughput": 5541.37, "total_tokens": 8694128}
|
|
{"current_steps": 17675, "total_steps": 37885, "loss": 0.0005, "lr": 1.2873295329480837e-06, "epoch": 2.332717434340768, "percentage": 46.65, "elapsed_time": "0:26:09", "remaining_time": "0:29:54", "throughput": 5541.82, "total_tokens": 8696688}
|
|
{"current_steps": 17680, "total_steps": 37885, "loss": 0.1152, "lr": 1.2868882311618505e-06, "epoch": 2.3333773261185167, "percentage": 46.67, "elapsed_time": "0:26:09", "remaining_time": "0:29:53", "throughput": 5542.19, "total_tokens": 8699120}
|
|
{"current_steps": 17685, "total_steps": 37885, "loss": 0.0001, "lr": 1.286446868485668e-06, "epoch": 2.334037217896265, "percentage": 46.68, "elapsed_time": "0:26:09", "remaining_time": "0:29:53", "throughput": 5542.58, "total_tokens": 8701552}
|
|
{"current_steps": 17690, "total_steps": 37885, "loss": 0.0001, "lr": 1.2860054450132116e-06, "epoch": 2.3346971096740132, "percentage": 46.69, "elapsed_time": "0:26:10", "remaining_time": "0:29:52", "throughput": 5543.01, "total_tokens": 8704048}
|
|
{"current_steps": 17695, "total_steps": 37885, "loss": 0.0014, "lr": 1.2855639608381706e-06, "epoch": 2.335357001451762, "percentage": 46.71, "elapsed_time": "0:26:10", "remaining_time": "0:29:52", "throughput": 5543.4, "total_tokens": 8706480}
|
|
{"current_steps": 17700, "total_steps": 37885, "loss": 0.0017, "lr": 1.2851224160542472e-06, "epoch": 2.3360168932295102, "percentage": 46.72, "elapsed_time": "0:26:10", "remaining_time": "0:29:51", "throughput": 5543.86, "total_tokens": 8709040}
|
|
{"current_steps": 17705, "total_steps": 37885, "loss": 0.0613, "lr": 1.2846808107551553e-06, "epoch": 2.336676785007259, "percentage": 46.73, "elapsed_time": "0:26:11", "remaining_time": "0:29:50", "throughput": 5544.25, "total_tokens": 8711472}
|
|
{"current_steps": 17710, "total_steps": 37885, "loss": 0.0097, "lr": 1.2842391450346228e-06, "epoch": 2.3373366767850072, "percentage": 46.75, "elapsed_time": "0:26:11", "remaining_time": "0:29:50", "throughput": 5544.64, "total_tokens": 8713904}
|
|
{"current_steps": 17715, "total_steps": 37885, "loss": 0.1691, "lr": 1.2837974189863902e-06, "epoch": 2.3379965685627555, "percentage": 46.76, "elapsed_time": "0:26:11", "remaining_time": "0:29:49", "throughput": 5544.92, "total_tokens": 8716144}
|
|
{"current_steps": 17720, "total_steps": 37885, "loss": 0.0004, "lr": 1.2833556327042105e-06, "epoch": 2.3386564603405042, "percentage": 46.77, "elapsed_time": "0:26:12", "remaining_time": "0:29:49", "throughput": 5545.23, "total_tokens": 8718448}
|
|
{"current_steps": 17725, "total_steps": 37885, "loss": 0.114, "lr": 1.2829137862818496e-06, "epoch": 2.3393163521182525, "percentage": 46.79, "elapsed_time": "0:26:12", "remaining_time": "0:29:48", "throughput": 5545.45, "total_tokens": 8720624}
|
|
{"current_steps": 17730, "total_steps": 37885, "loss": 0.0005, "lr": 1.2824718798130862e-06, "epoch": 2.3399762438960012, "percentage": 46.8, "elapsed_time": "0:26:12", "remaining_time": "0:29:48", "throughput": 5545.98, "total_tokens": 8723312}
|
|
{"current_steps": 17735, "total_steps": 37885, "loss": 0.1246, "lr": 1.2820299133917122e-06, "epoch": 2.3406361356737495, "percentage": 46.81, "elapsed_time": "0:26:13", "remaining_time": "0:29:47", "throughput": 5546.34, "total_tokens": 8725680}
|
|
{"current_steps": 17740, "total_steps": 37885, "loss": 0.0008, "lr": 1.281587887111531e-06, "epoch": 2.341296027451498, "percentage": 46.83, "elapsed_time": "0:26:13", "remaining_time": "0:29:46", "throughput": 5546.66, "total_tokens": 8727984}
|
|
{"current_steps": 17745, "total_steps": 37885, "loss": 0.0371, "lr": 1.28114580106636e-06, "epoch": 2.3419559192292465, "percentage": 46.84, "elapsed_time": "0:26:13", "remaining_time": "0:29:46", "throughput": 5547.04, "total_tokens": 8730416}
|
|
{"current_steps": 17750, "total_steps": 37885, "loss": 0.0004, "lr": 1.2807036553500286e-06, "epoch": 2.342615811006995, "percentage": 46.85, "elapsed_time": "0:26:14", "remaining_time": "0:29:45", "throughput": 5547.58, "total_tokens": 8733104}
|
|
{"current_steps": 17755, "total_steps": 37885, "loss": 0.0003, "lr": 1.280261450056379e-06, "epoch": 2.3432757027847435, "percentage": 46.87, "elapsed_time": "0:26:14", "remaining_time": "0:29:45", "throughput": 5548.01, "total_tokens": 8735600}
|
|
{"current_steps": 17760, "total_steps": 37885, "loss": 0.0001, "lr": 1.2798191852792662e-06, "epoch": 2.343935594562492, "percentage": 46.88, "elapsed_time": "0:26:14", "remaining_time": "0:29:44", "throughput": 5548.39, "total_tokens": 8738032}
|
|
{"current_steps": 17765, "total_steps": 37885, "loss": 0.0002, "lr": 1.2793768611125576e-06, "epoch": 2.34459548634024, "percentage": 46.89, "elapsed_time": "0:26:15", "remaining_time": "0:29:44", "throughput": 5548.76, "total_tokens": 8740464}
|
|
{"current_steps": 17770, "total_steps": 37885, "loss": 0.0731, "lr": 1.2789344776501333e-06, "epoch": 2.345255378117989, "percentage": 46.91, "elapsed_time": "0:26:15", "remaining_time": "0:29:43", "throughput": 5549.19, "total_tokens": 8742960}
|
|
{"current_steps": 17775, "total_steps": 37885, "loss": 0.0001, "lr": 1.2784920349858858e-06, "epoch": 2.345915269895737, "percentage": 46.92, "elapsed_time": "0:26:15", "remaining_time": "0:29:42", "throughput": 5549.71, "total_tokens": 8745648}
|
|
{"current_steps": 17780, "total_steps": 37885, "loss": 0.0027, "lr": 1.278049533213721e-06, "epoch": 2.346575161673486, "percentage": 46.93, "elapsed_time": "0:26:16", "remaining_time": "0:29:42", "throughput": 5550.21, "total_tokens": 8748272}
|
|
{"current_steps": 17785, "total_steps": 37885, "loss": 0.0004, "lr": 1.2776069724275557e-06, "epoch": 2.347235053451234, "percentage": 46.94, "elapsed_time": "0:26:16", "remaining_time": "0:29:41", "throughput": 5550.67, "total_tokens": 8750832}
|
|
{"current_steps": 17790, "total_steps": 37885, "loss": 0.1713, "lr": 1.277164352721321e-06, "epoch": 2.3478949452289823, "percentage": 46.96, "elapsed_time": "0:26:16", "remaining_time": "0:29:41", "throughput": 5551.02, "total_tokens": 8753200}
|
|
{"current_steps": 17795, "total_steps": 37885, "loss": 0.0009, "lr": 1.27672167418896e-06, "epoch": 2.348554837006731, "percentage": 46.97, "elapsed_time": "0:26:17", "remaining_time": "0:29:40", "throughput": 5551.52, "total_tokens": 8755824}
|
|
{"current_steps": 17800, "total_steps": 37885, "loss": 0.0133, "lr": 1.276278936924427e-06, "epoch": 2.3492147287844793, "percentage": 46.98, "elapsed_time": "0:26:17", "remaining_time": "0:29:40", "throughput": 5551.82, "total_tokens": 8758128}
|
|
{"current_steps": 17805, "total_steps": 37885, "loss": 0.0009, "lr": 1.2758361410216902e-06, "epoch": 2.3498746205622276, "percentage": 47.0, "elapsed_time": "0:26:17", "remaining_time": "0:29:39", "throughput": 5552.21, "total_tokens": 8760624}
|
|
{"current_steps": 17810, "total_steps": 37885, "loss": 0.0004, "lr": 1.2753932865747302e-06, "epoch": 2.3505345123399763, "percentage": 47.01, "elapsed_time": "0:26:18", "remaining_time": "0:29:38", "throughput": 5552.48, "total_tokens": 8762864}
|
|
{"current_steps": 17815, "total_steps": 37885, "loss": 0.0598, "lr": 1.2749503736775395e-06, "epoch": 2.3511944041177246, "percentage": 47.02, "elapsed_time": "0:26:18", "remaining_time": "0:29:38", "throughput": 5552.93, "total_tokens": 8765424}
|
|
{"current_steps": 17820, "total_steps": 37885, "loss": 0.0, "lr": 1.2745074024241227e-06, "epoch": 2.351854295895473, "percentage": 47.04, "elapsed_time": "0:26:18", "remaining_time": "0:29:37", "throughput": 5553.42, "total_tokens": 8768048}
|
|
{"current_steps": 17825, "total_steps": 37885, "loss": 0.0296, "lr": 1.2740643729084974e-06, "epoch": 2.3525141876732216, "percentage": 47.05, "elapsed_time": "0:26:19", "remaining_time": "0:29:37", "throughput": 5553.91, "total_tokens": 8770672}
|
|
{"current_steps": 17830, "total_steps": 37885, "loss": 0.0406, "lr": 1.273621285224694e-06, "epoch": 2.35317407945097, "percentage": 47.06, "elapsed_time": "0:26:19", "remaining_time": "0:29:36", "throughput": 5554.49, "total_tokens": 8773424}
|
|
{"current_steps": 17835, "total_steps": 37885, "loss": 0.0001, "lr": 1.2731781394667538e-06, "epoch": 2.3538339712287186, "percentage": 47.08, "elapsed_time": "0:26:19", "remaining_time": "0:29:36", "throughput": 5554.84, "total_tokens": 8775792}
|
|
{"current_steps": 17840, "total_steps": 37885, "loss": 0.0003, "lr": 1.2727349357287322e-06, "epoch": 2.354493863006467, "percentage": 47.09, "elapsed_time": "0:26:20", "remaining_time": "0:29:35", "throughput": 5555.27, "total_tokens": 8778288}
|
|
{"current_steps": 17845, "total_steps": 37885, "loss": 0.0001, "lr": 1.2722916741046951e-06, "epoch": 2.355153754784215, "percentage": 47.1, "elapsed_time": "0:26:20", "remaining_time": "0:29:34", "throughput": 5555.73, "total_tokens": 8780848}
|
|
{"current_steps": 17850, "total_steps": 37885, "loss": 0.0007, "lr": 1.2718483546887222e-06, "epoch": 2.355813646561964, "percentage": 47.12, "elapsed_time": "0:26:20", "remaining_time": "0:29:34", "throughput": 5556.14, "total_tokens": 8783344}
|
|
{"current_steps": 17855, "total_steps": 37885, "loss": 0.0002, "lr": 1.2714049775749043e-06, "epoch": 2.356473538339712, "percentage": 47.13, "elapsed_time": "0:26:21", "remaining_time": "0:29:33", "throughput": 5556.53, "total_tokens": 8785776}
|
|
{"current_steps": 17860, "total_steps": 37885, "loss": 0.1, "lr": 1.2709615428573454e-06, "epoch": 2.357133430117461, "percentage": 47.14, "elapsed_time": "0:26:21", "remaining_time": "0:29:33", "throughput": 5556.76, "total_tokens": 8787952}
|
|
{"current_steps": 17865, "total_steps": 37885, "loss": 0.2573, "lr": 1.2705180506301614e-06, "epoch": 2.357793321895209, "percentage": 47.16, "elapsed_time": "0:26:21", "remaining_time": "0:29:32", "throughput": 5557.21, "total_tokens": 8790512}
|
|
{"current_steps": 17870, "total_steps": 37885, "loss": 0.0, "lr": 1.2700745009874799e-06, "epoch": 2.3584532136729575, "percentage": 47.17, "elapsed_time": "0:26:22", "remaining_time": "0:29:32", "throughput": 5557.51, "total_tokens": 8792816}
|
|
{"current_steps": 17875, "total_steps": 37885, "loss": 0.0752, "lr": 1.2696308940234414e-06, "epoch": 2.359113105450706, "percentage": 47.18, "elapsed_time": "0:26:22", "remaining_time": "0:29:31", "throughput": 5557.85, "total_tokens": 8795184}
|
|
{"current_steps": 17880, "total_steps": 37885, "loss": 0.1042, "lr": 1.2691872298321978e-06, "epoch": 2.3597729972284545, "percentage": 47.2, "elapsed_time": "0:26:22", "remaining_time": "0:29:30", "throughput": 5558.35, "total_tokens": 8797808}
|
|
{"current_steps": 17885, "total_steps": 37885, "loss": 0.0013, "lr": 1.2687435085079143e-06, "epoch": 2.360432889006203, "percentage": 47.21, "elapsed_time": "0:26:23", "remaining_time": "0:29:30", "throughput": 5558.82, "total_tokens": 8800368}
|
|
{"current_steps": 17890, "total_steps": 37885, "loss": 0.2316, "lr": 1.2682997301447671e-06, "epoch": 2.3610927807839515, "percentage": 47.22, "elapsed_time": "0:26:23", "remaining_time": "0:29:29", "throughput": 5559.31, "total_tokens": 8802992}
|
|
{"current_steps": 17895, "total_steps": 37885, "loss": 0.0001, "lr": 1.267855894836945e-06, "epoch": 2.3617526725616997, "percentage": 47.24, "elapsed_time": "0:26:23", "remaining_time": "0:29:29", "throughput": 5559.87, "total_tokens": 8805744}
|
|
{"current_steps": 17900, "total_steps": 37885, "loss": 0.0021, "lr": 1.267412002678649e-06, "epoch": 2.3624125643394485, "percentage": 47.25, "elapsed_time": "0:26:24", "remaining_time": "0:29:28", "throughput": 5560.15, "total_tokens": 8807984}
|
|
{"current_steps": 17905, "total_steps": 37885, "loss": 0.0008, "lr": 1.2669680537640916e-06, "epoch": 2.3630724561171967, "percentage": 47.26, "elapsed_time": "0:26:24", "remaining_time": "0:29:28", "throughput": 5560.57, "total_tokens": 8810480}
|
|
{"current_steps": 17910, "total_steps": 37885, "loss": 0.001, "lr": 1.2665240481874986e-06, "epoch": 2.3637323478949455, "percentage": 47.27, "elapsed_time": "0:26:24", "remaining_time": "0:29:27", "throughput": 5560.87, "total_tokens": 8812784}
|
|
{"current_steps": 17915, "total_steps": 37885, "loss": 0.0002, "lr": 1.266079986043106e-06, "epoch": 2.3643922396726937, "percentage": 47.29, "elapsed_time": "0:26:25", "remaining_time": "0:29:26", "throughput": 5561.32, "total_tokens": 8815344}
|
|
{"current_steps": 17920, "total_steps": 37885, "loss": 0.1032, "lr": 1.2656358674251633e-06, "epoch": 2.365052131450442, "percentage": 47.3, "elapsed_time": "0:26:25", "remaining_time": "0:29:26", "throughput": 5561.71, "total_tokens": 8817776}
|
|
{"current_steps": 17925, "total_steps": 37885, "loss": 0.0633, "lr": 1.2651916924279311e-06, "epoch": 2.3657120232281907, "percentage": 47.31, "elapsed_time": "0:26:25", "remaining_time": "0:29:25", "throughput": 5562.23, "total_tokens": 8820464}
|
|
{"current_steps": 17930, "total_steps": 37885, "loss": 0.1189, "lr": 1.2647474611456827e-06, "epoch": 2.366371915005939, "percentage": 47.33, "elapsed_time": "0:26:26", "remaining_time": "0:29:25", "throughput": 5562.84, "total_tokens": 8823280}
|
|
{"current_steps": 17935, "total_steps": 37885, "loss": 0.127, "lr": 1.2643031736727029e-06, "epoch": 2.3670318067836873, "percentage": 47.34, "elapsed_time": "0:26:26", "remaining_time": "0:29:24", "throughput": 5563.26, "total_tokens": 8825776}
|
|
{"current_steps": 17940, "total_steps": 37885, "loss": 0.1263, "lr": 1.2638588301032883e-06, "epoch": 2.367691698561436, "percentage": 47.35, "elapsed_time": "0:26:26", "remaining_time": "0:29:24", "throughput": 5563.53, "total_tokens": 8828016}
|
|
{"current_steps": 17945, "total_steps": 37885, "loss": 0.0705, "lr": 1.2634144305317479e-06, "epoch": 2.3683515903391843, "percentage": 47.37, "elapsed_time": "0:26:27", "remaining_time": "0:29:23", "throughput": 5563.76, "total_tokens": 8830192}
|
|
{"current_steps": 17950, "total_steps": 37885, "loss": 0.0828, "lr": 1.2629699750524017e-06, "epoch": 2.3690114821169326, "percentage": 47.38, "elapsed_time": "0:26:27", "remaining_time": "0:29:22", "throughput": 5564.14, "total_tokens": 8832624}
|
|
{"current_steps": 17955, "total_steps": 37885, "loss": 0.0549, "lr": 1.2625254637595829e-06, "epoch": 2.3696713738946813, "percentage": 47.39, "elapsed_time": "0:26:27", "remaining_time": "0:29:22", "throughput": 5564.63, "total_tokens": 8835248}
|
|
{"current_steps": 17960, "total_steps": 37885, "loss": 0.0008, "lr": 1.2620808967476352e-06, "epoch": 2.3703312656724296, "percentage": 47.41, "elapsed_time": "0:26:28", "remaining_time": "0:29:21", "throughput": 5565.19, "total_tokens": 8838000}
|
|
{"current_steps": 17965, "total_steps": 37885, "loss": 0.0022, "lr": 1.2616362741109154e-06, "epoch": 2.3709911574501783, "percentage": 47.42, "elapsed_time": "0:26:28", "remaining_time": "0:29:21", "throughput": 5565.71, "total_tokens": 8840688}
|
|
{"current_steps": 17970, "total_steps": 37885, "loss": 0.1106, "lr": 1.2611915959437908e-06, "epoch": 2.3716510492279266, "percentage": 47.43, "elapsed_time": "0:26:28", "remaining_time": "0:29:20", "throughput": 5566.09, "total_tokens": 8843120}
|
|
{"current_steps": 17975, "total_steps": 37885, "loss": 0.0799, "lr": 1.2607468623406415e-06, "epoch": 2.372310941005675, "percentage": 47.45, "elapsed_time": "0:26:29", "remaining_time": "0:29:20", "throughput": 5566.51, "total_tokens": 8845616}
|
|
{"current_steps": 17980, "total_steps": 37885, "loss": 0.002, "lr": 1.2603020733958588e-06, "epoch": 2.3729708327834236, "percentage": 47.46, "elapsed_time": "0:26:29", "remaining_time": "0:29:19", "throughput": 5566.78, "total_tokens": 8847856}
|
|
{"current_steps": 17985, "total_steps": 37885, "loss": 0.0003, "lr": 1.2598572292038459e-06, "epoch": 2.373630724561172, "percentage": 47.47, "elapsed_time": "0:26:29", "remaining_time": "0:29:19", "throughput": 5567.28, "total_tokens": 8850480}
|
|
{"current_steps": 17990, "total_steps": 37885, "loss": 0.0006, "lr": 1.2594123298590177e-06, "epoch": 2.3742906163389206, "percentage": 47.49, "elapsed_time": "0:26:30", "remaining_time": "0:29:18", "throughput": 5567.84, "total_tokens": 8853232}
|
|
{"current_steps": 17995, "total_steps": 37885, "loss": 0.0675, "lr": 1.2589673754558014e-06, "epoch": 2.374950508116669, "percentage": 47.5, "elapsed_time": "0:26:30", "remaining_time": "0:29:17", "throughput": 5568.21, "total_tokens": 8855664}
|
|
{"current_steps": 18000, "total_steps": 37885, "loss": 0.0001, "lr": 1.2585223660886347e-06, "epoch": 2.375610399894417, "percentage": 47.51, "elapsed_time": "0:26:30", "remaining_time": "0:29:17", "throughput": 5568.62, "total_tokens": 8858160}
|
|
{"current_steps": 18005, "total_steps": 37885, "loss": 0.0011, "lr": 1.258077301851968e-06, "epoch": 2.376270291672166, "percentage": 47.53, "elapsed_time": "0:26:31", "remaining_time": "0:29:16", "throughput": 5568.93, "total_tokens": 8860464}
|
|
{"current_steps": 18010, "total_steps": 37885, "loss": 0.0613, "lr": 1.2576321828402627e-06, "epoch": 2.376930183449914, "percentage": 47.54, "elapsed_time": "0:26:31", "remaining_time": "0:29:16", "throughput": 5569.32, "total_tokens": 8862896}
|
|
{"current_steps": 18015, "total_steps": 37885, "loss": 0.0488, "lr": 1.2571870091479921e-06, "epoch": 2.377590075227663, "percentage": 47.55, "elapsed_time": "0:26:31", "remaining_time": "0:29:15", "throughput": 5569.65, "total_tokens": 8865264}
|
|
{"current_steps": 18020, "total_steps": 37885, "loss": 0.0703, "lr": 1.2567417808696416e-06, "epoch": 2.378249967005411, "percentage": 47.56, "elapsed_time": "0:26:32", "remaining_time": "0:29:15", "throughput": 5570.06, "total_tokens": 8867760}
|
|
{"current_steps": 18025, "total_steps": 37885, "loss": 0.0002, "lr": 1.2562964980997072e-06, "epoch": 2.3789098587831594, "percentage": 47.58, "elapsed_time": "0:26:32", "remaining_time": "0:29:14", "throughput": 5570.58, "total_tokens": 8870448}
|
|
{"current_steps": 18030, "total_steps": 37885, "loss": 0.046, "lr": 1.2558511609326968e-06, "epoch": 2.379569750560908, "percentage": 47.59, "elapsed_time": "0:26:32", "remaining_time": "0:29:13", "throughput": 5571.1, "total_tokens": 8873136}
|
|
{"current_steps": 18035, "total_steps": 37885, "loss": 0.0041, "lr": 1.2554057694631302e-06, "epoch": 2.3802296423386564, "percentage": 47.6, "elapsed_time": "0:26:33", "remaining_time": "0:29:13", "throughput": 5571.51, "total_tokens": 8875632}
|
|
{"current_steps": 18040, "total_steps": 37885, "loss": 0.0006, "lr": 1.2549603237855386e-06, "epoch": 2.380889534116405, "percentage": 47.62, "elapsed_time": "0:26:33", "remaining_time": "0:29:12", "throughput": 5572.08, "total_tokens": 8878384}
|
|
{"current_steps": 18045, "total_steps": 37885, "loss": 0.0615, "lr": 1.2545148239944644e-06, "epoch": 2.3815494258941534, "percentage": 47.63, "elapsed_time": "0:26:33", "remaining_time": "0:29:12", "throughput": 5572.54, "total_tokens": 8880944}
|
|
{"current_steps": 18050, "total_steps": 37885, "loss": 0.0002, "lr": 1.2540692701844625e-06, "epoch": 2.3822093176719017, "percentage": 47.64, "elapsed_time": "0:26:34", "remaining_time": "0:29:11", "throughput": 5573.03, "total_tokens": 8883568}
|
|
{"current_steps": 18055, "total_steps": 37885, "loss": 0.052, "lr": 1.253623662450097e-06, "epoch": 2.3828692094496504, "percentage": 47.66, "elapsed_time": "0:26:34", "remaining_time": "0:29:11", "throughput": 5573.43, "total_tokens": 8886064}
|
|
{"current_steps": 18060, "total_steps": 37885, "loss": 0.0003, "lr": 1.2531780008859464e-06, "epoch": 2.3835291012273987, "percentage": 47.67, "elapsed_time": "0:26:34", "remaining_time": "0:29:10", "throughput": 5573.99, "total_tokens": 8888816}
|
|
{"current_steps": 18065, "total_steps": 37885, "loss": 0.0738, "lr": 1.252732285586598e-06, "epoch": 2.384188993005147, "percentage": 47.68, "elapsed_time": "0:26:35", "remaining_time": "0:29:09", "throughput": 5574.37, "total_tokens": 8891248}
|
|
{"current_steps": 18070, "total_steps": 37885, "loss": 0.0487, "lr": 1.2522865166466528e-06, "epoch": 2.3848488847828957, "percentage": 47.7, "elapsed_time": "0:26:35", "remaining_time": "0:29:09", "throughput": 5574.82, "total_tokens": 8893808}
|
|
{"current_steps": 18075, "total_steps": 37885, "loss": 0.0383, "lr": 1.2518406941607207e-06, "epoch": 2.385508776560644, "percentage": 47.71, "elapsed_time": "0:26:35", "remaining_time": "0:29:08", "throughput": 5575.24, "total_tokens": 8896304}
|
|
{"current_steps": 18080, "total_steps": 37885, "loss": 0.0004, "lr": 1.2513948182234253e-06, "epoch": 2.3861686683383927, "percentage": 47.72, "elapsed_time": "0:26:36", "remaining_time": "0:29:08", "throughput": 5575.57, "total_tokens": 8898672}
|
|
{"current_steps": 18085, "total_steps": 37885, "loss": 0.0002, "lr": 1.2509488889293998e-06, "epoch": 2.386828560116141, "percentage": 47.74, "elapsed_time": "0:26:36", "remaining_time": "0:29:07", "throughput": 5575.99, "total_tokens": 8901168}
|
|
{"current_steps": 18090, "total_steps": 37885, "loss": 0.0005, "lr": 1.2505029063732898e-06, "epoch": 2.3874884518938893, "percentage": 47.75, "elapsed_time": "0:26:36", "remaining_time": "0:29:07", "throughput": 5576.37, "total_tokens": 8903600}
|
|
{"current_steps": 18095, "total_steps": 37885, "loss": 0.0954, "lr": 1.2500568706497526e-06, "epoch": 2.388148343671638, "percentage": 47.76, "elapsed_time": "0:26:36", "remaining_time": "0:29:06", "throughput": 5576.75, "total_tokens": 8906032}
|
|
{"current_steps": 18100, "total_steps": 37885, "loss": 0.0035, "lr": 1.2496107818534548e-06, "epoch": 2.3888082354493863, "percentage": 47.78, "elapsed_time": "0:26:37", "remaining_time": "0:29:06", "throughput": 5577.08, "total_tokens": 8908400}
|
|
{"current_steps": 18105, "total_steps": 37885, "loss": 0.0573, "lr": 1.2491646400790766e-06, "epoch": 2.3894681272271345, "percentage": 47.79, "elapsed_time": "0:26:37", "remaining_time": "0:29:05", "throughput": 5577.46, "total_tokens": 8910832}
|
|
{"current_steps": 18110, "total_steps": 37885, "loss": 0.0613, "lr": 1.2487184454213073e-06, "epoch": 2.3901280190048833, "percentage": 47.8, "elapsed_time": "0:26:37", "remaining_time": "0:29:04", "throughput": 5577.8, "total_tokens": 8913200}
|
|
{"current_steps": 18115, "total_steps": 37885, "loss": 0.0002, "lr": 1.2482721979748494e-06, "epoch": 2.3907879107826315, "percentage": 47.82, "elapsed_time": "0:26:38", "remaining_time": "0:29:04", "throughput": 5578.13, "total_tokens": 8915568}
|
|
{"current_steps": 18120, "total_steps": 37885, "loss": 0.0691, "lr": 1.2478258978344149e-06, "epoch": 2.3914478025603803, "percentage": 47.83, "elapsed_time": "0:26:38", "remaining_time": "0:29:03", "throughput": 5578.32, "total_tokens": 8917680}
|
|
{"current_steps": 18125, "total_steps": 37885, "loss": 0.0109, "lr": 1.2473795450947287e-06, "epoch": 2.3921076943381285, "percentage": 47.84, "elapsed_time": "0:26:38", "remaining_time": "0:29:03", "throughput": 5578.69, "total_tokens": 8920112}
|
|
{"current_steps": 18130, "total_steps": 37885, "loss": 0.0002, "lr": 1.2469331398505254e-06, "epoch": 2.392767586115877, "percentage": 47.86, "elapsed_time": "0:26:39", "remaining_time": "0:29:02", "throughput": 5579.08, "total_tokens": 8922544}
|
|
{"current_steps": 18135, "total_steps": 37885, "loss": 0.094, "lr": 1.246486682196551e-06, "epoch": 2.3934274778936255, "percentage": 47.87, "elapsed_time": "0:26:39", "remaining_time": "0:29:02", "throughput": 5579.49, "total_tokens": 8925040}
|
|
{"current_steps": 18140, "total_steps": 37885, "loss": 0.0004, "lr": 1.2460401722275633e-06, "epoch": 2.394087369671374, "percentage": 47.88, "elapsed_time": "0:26:39", "remaining_time": "0:29:01", "throughput": 5579.83, "total_tokens": 8927408}
|
|
{"current_steps": 18145, "total_steps": 37885, "loss": 0.1117, "lr": 1.2455936100383309e-06, "epoch": 2.3947472614491225, "percentage": 47.89, "elapsed_time": "0:26:40", "remaining_time": "0:29:00", "throughput": 5580.2, "total_tokens": 8929840}
|
|
{"current_steps": 18150, "total_steps": 37885, "loss": 0.0003, "lr": 1.2451469957236334e-06, "epoch": 2.395407153226871, "percentage": 47.91, "elapsed_time": "0:26:40", "remaining_time": "0:29:00", "throughput": 5580.56, "total_tokens": 8932272}
|
|
{"current_steps": 18155, "total_steps": 37885, "loss": 0.0591, "lr": 1.2447003293782607e-06, "epoch": 2.396067045004619, "percentage": 47.92, "elapsed_time": "0:26:40", "remaining_time": "0:28:59", "throughput": 5580.91, "total_tokens": 8934640}
|
|
{"current_steps": 18160, "total_steps": 37885, "loss": 0.0025, "lr": 1.2442536110970152e-06, "epoch": 2.396726936782368, "percentage": 47.93, "elapsed_time": "0:26:41", "remaining_time": "0:28:59", "throughput": 5581.35, "total_tokens": 8937200}
|
|
{"current_steps": 18165, "total_steps": 37885, "loss": 0.0002, "lr": 1.2438068409747097e-06, "epoch": 2.397386828560116, "percentage": 47.95, "elapsed_time": "0:26:41", "remaining_time": "0:28:58", "throughput": 5581.69, "total_tokens": 8939568}
|
|
{"current_steps": 18170, "total_steps": 37885, "loss": 0.0413, "lr": 1.2433600191061677e-06, "epoch": 2.398046720337865, "percentage": 47.96, "elapsed_time": "0:26:41", "remaining_time": "0:28:58", "throughput": 5582.06, "total_tokens": 8942000}
|
|
{"current_steps": 18175, "total_steps": 37885, "loss": 0.0088, "lr": 1.242913145586224e-06, "epoch": 2.398706612115613, "percentage": 47.97, "elapsed_time": "0:26:42", "remaining_time": "0:28:57", "throughput": 5582.58, "total_tokens": 8944688}
|
|
{"current_steps": 18180, "total_steps": 37885, "loss": 0.0345, "lr": 1.2424662205097241e-06, "epoch": 2.3993665038933614, "percentage": 47.99, "elapsed_time": "0:26:42", "remaining_time": "0:28:57", "throughput": 5583.05, "total_tokens": 8947312}
|
|
{"current_steps": 18185, "total_steps": 37885, "loss": 0.0002, "lr": 1.2420192439715247e-06, "epoch": 2.40002639567111, "percentage": 48.0, "elapsed_time": "0:26:42", "remaining_time": "0:28:56", "throughput": 5583.45, "total_tokens": 8949808}
|
|
{"current_steps": 18190, "total_steps": 37885, "loss": 0.0003, "lr": 1.2415722160664933e-06, "epoch": 2.4006862874488584, "percentage": 48.01, "elapsed_time": "0:26:43", "remaining_time": "0:28:55", "throughput": 5583.76, "total_tokens": 8952112}
|
|
{"current_steps": 18195, "total_steps": 37885, "loss": 0.1239, "lr": 1.2411251368895085e-06, "epoch": 2.4013461792266066, "percentage": 48.03, "elapsed_time": "0:26:43", "remaining_time": "0:28:55", "throughput": 5584.28, "total_tokens": 8954800}
|
|
{"current_steps": 18200, "total_steps": 37885, "loss": 0.0001, "lr": 1.2406780065354592e-06, "epoch": 2.4020060710043554, "percentage": 48.04, "elapsed_time": "0:26:43", "remaining_time": "0:28:54", "throughput": 5584.73, "total_tokens": 8957360}
|
|
{"current_steps": 18205, "total_steps": 37885, "loss": 0.0001, "lr": 1.240230825099246e-06, "epoch": 2.4026659627821036, "percentage": 48.05, "elapsed_time": "0:26:44", "remaining_time": "0:28:54", "throughput": 5585.28, "total_tokens": 8960112}
|
|
{"current_steps": 18210, "total_steps": 37885, "loss": 0.0006, "lr": 1.2397835926757798e-06, "epoch": 2.4033258545598524, "percentage": 48.07, "elapsed_time": "0:26:44", "remaining_time": "0:28:53", "throughput": 5585.69, "total_tokens": 8962608}
|
|
{"current_steps": 18215, "total_steps": 37885, "loss": 0.133, "lr": 1.2393363093599823e-06, "epoch": 2.4039857463376006, "percentage": 48.08, "elapsed_time": "0:26:44", "remaining_time": "0:28:53", "throughput": 5586.06, "total_tokens": 8965040}
|
|
{"current_steps": 18220, "total_steps": 37885, "loss": 0.0659, "lr": 1.2388889752467867e-06, "epoch": 2.404645638115349, "percentage": 48.09, "elapsed_time": "0:26:45", "remaining_time": "0:28:52", "throughput": 5586.29, "total_tokens": 8967216}
|
|
{"current_steps": 18225, "total_steps": 37885, "loss": 0.0, "lr": 1.2384415904311357e-06, "epoch": 2.4053055298930976, "percentage": 48.11, "elapsed_time": "0:26:45", "remaining_time": "0:28:51", "throughput": 5586.69, "total_tokens": 8969712}
|
|
{"current_steps": 18230, "total_steps": 37885, "loss": 0.0001, "lr": 1.2379941550079836e-06, "epoch": 2.405965421670846, "percentage": 48.12, "elapsed_time": "0:26:45", "remaining_time": "0:28:51", "throughput": 5587.11, "total_tokens": 8972208}
|
|
{"current_steps": 18235, "total_steps": 37885, "loss": 0.105, "lr": 1.2375466690722957e-06, "epoch": 2.406625313448594, "percentage": 48.13, "elapsed_time": "0:26:46", "remaining_time": "0:28:50", "throughput": 5587.55, "total_tokens": 8974768}
|
|
{"current_steps": 18240, "total_steps": 37885, "loss": 0.0736, "lr": 1.2370991327190473e-06, "epoch": 2.407285205226343, "percentage": 48.15, "elapsed_time": "0:26:46", "remaining_time": "0:28:50", "throughput": 5587.92, "total_tokens": 8977200}
|
|
{"current_steps": 18245, "total_steps": 37885, "loss": 0.0, "lr": 1.2366515460432255e-06, "epoch": 2.407945097004091, "percentage": 48.16, "elapsed_time": "0:26:46", "remaining_time": "0:28:49", "throughput": 5588.25, "total_tokens": 8979568}
|
|
{"current_steps": 18250, "total_steps": 37885, "loss": 0.0907, "lr": 1.2362039091398259e-06, "epoch": 2.40860498878184, "percentage": 48.17, "elapsed_time": "0:26:47", "remaining_time": "0:28:49", "throughput": 5588.58, "total_tokens": 8981936}
|
|
{"current_steps": 18255, "total_steps": 37885, "loss": 0.1141, "lr": 1.235756222103858e-06, "epoch": 2.409264880559588, "percentage": 48.19, "elapsed_time": "0:26:47", "remaining_time": "0:28:48", "throughput": 5588.95, "total_tokens": 8984368}
|
|
{"current_steps": 18260, "total_steps": 37885, "loss": 0.1377, "lr": 1.2353084850303386e-06, "epoch": 2.4099247723373365, "percentage": 48.2, "elapsed_time": "0:26:47", "remaining_time": "0:28:48", "throughput": 5589.29, "total_tokens": 8986736}
|
|
{"current_steps": 18265, "total_steps": 37885, "loss": 0.1191, "lr": 1.2348606980142973e-06, "epoch": 2.410584664115085, "percentage": 48.21, "elapsed_time": "0:26:48", "remaining_time": "0:28:47", "throughput": 5589.39, "total_tokens": 8988720}
|
|
{"current_steps": 18270, "total_steps": 37885, "loss": 0.0002, "lr": 1.2344128611507733e-06, "epoch": 2.4112445558928335, "percentage": 48.22, "elapsed_time": "0:26:48", "remaining_time": "0:28:46", "throughput": 5589.66, "total_tokens": 8990960}
|
|
{"current_steps": 18275, "total_steps": 37885, "loss": 0.0002, "lr": 1.2339649745348176e-06, "epoch": 2.411904447670582, "percentage": 48.24, "elapsed_time": "0:26:48", "remaining_time": "0:28:46", "throughput": 5589.99, "total_tokens": 8993328}
|
|
{"current_steps": 18280, "total_steps": 37885, "loss": 0.0004, "lr": 1.23351703826149e-06, "epoch": 2.4125643394483305, "percentage": 48.25, "elapsed_time": "0:26:49", "remaining_time": "0:28:45", "throughput": 5590.29, "total_tokens": 8995632}
|
|
{"current_steps": 18285, "total_steps": 37885, "loss": 0.046, "lr": 1.2330690524258618e-06, "epoch": 2.4132242312260788, "percentage": 48.26, "elapsed_time": "0:26:49", "remaining_time": "0:28:45", "throughput": 5590.77, "total_tokens": 8998256}
|
|
{"current_steps": 18290, "total_steps": 37885, "loss": 0.1114, "lr": 1.2326210171230152e-06, "epoch": 2.4138841230038275, "percentage": 48.28, "elapsed_time": "0:26:49", "remaining_time": "0:28:44", "throughput": 5590.95, "total_tokens": 9000368}
|
|
{"current_steps": 18295, "total_steps": 37885, "loss": 0.0758, "lr": 1.2321729324480422e-06, "epoch": 2.4145440147815758, "percentage": 48.29, "elapsed_time": "0:26:50", "remaining_time": "0:28:44", "throughput": 5591.31, "total_tokens": 9002800}
|
|
{"current_steps": 18300, "total_steps": 37885, "loss": 0.0675, "lr": 1.2317247984960455e-06, "epoch": 2.4152039065593245, "percentage": 48.3, "elapsed_time": "0:26:50", "remaining_time": "0:28:43", "throughput": 5591.68, "total_tokens": 9005232}
|
|
{"current_steps": 18305, "total_steps": 37885, "loss": 0.0001, "lr": 1.2312766153621383e-06, "epoch": 2.4158637983370728, "percentage": 48.32, "elapsed_time": "0:26:50", "remaining_time": "0:28:42", "throughput": 5592.2, "total_tokens": 9007920}
|
|
{"current_steps": 18310, "total_steps": 37885, "loss": 0.0005, "lr": 1.2308283831414444e-06, "epoch": 2.416523690114821, "percentage": 48.33, "elapsed_time": "0:26:51", "remaining_time": "0:28:42", "throughput": 5592.62, "total_tokens": 9010416}
|
|
{"current_steps": 18315, "total_steps": 37885, "loss": 0.0003, "lr": 1.2303801019290978e-06, "epoch": 2.4171835818925698, "percentage": 48.34, "elapsed_time": "0:26:51", "remaining_time": "0:28:41", "throughput": 5593.16, "total_tokens": 9013168}
|
|
{"current_steps": 18320, "total_steps": 37885, "loss": 0.0476, "lr": 1.2299317718202424e-06, "epoch": 2.417843473670318, "percentage": 48.36, "elapsed_time": "0:26:51", "remaining_time": "0:28:41", "throughput": 5593.59, "total_tokens": 9015728}
|
|
{"current_steps": 18325, "total_steps": 37885, "loss": 0.0326, "lr": 1.229483392910034e-06, "epoch": 2.4185033654480668, "percentage": 48.37, "elapsed_time": "0:26:52", "remaining_time": "0:28:40", "throughput": 5594.1, "total_tokens": 9018416}
|
|
{"current_steps": 18330, "total_steps": 37885, "loss": 0.0002, "lr": 1.229034965293637e-06, "epoch": 2.419163257225815, "percentage": 48.38, "elapsed_time": "0:26:52", "remaining_time": "0:28:40", "throughput": 5594.53, "total_tokens": 9020976}
|
|
{"current_steps": 18335, "total_steps": 37885, "loss": 0.0002, "lr": 1.2285864890662272e-06, "epoch": 2.4198231490035633, "percentage": 48.4, "elapsed_time": "0:26:52", "remaining_time": "0:28:39", "throughput": 5595.09, "total_tokens": 9023728}
|
|
{"current_steps": 18340, "total_steps": 37885, "loss": 0.0007, "lr": 1.2281379643229904e-06, "epoch": 2.420483040781312, "percentage": 48.41, "elapsed_time": "0:26:53", "remaining_time": "0:28:39", "throughput": 5595.43, "total_tokens": 9026096}
|
|
{"current_steps": 18345, "total_steps": 37885, "loss": 0.0509, "lr": 1.2276893911591226e-06, "epoch": 2.4211429325590603, "percentage": 48.42, "elapsed_time": "0:26:53", "remaining_time": "0:28:38", "throughput": 5595.85, "total_tokens": 9028656}
|
|
{"current_steps": 18350, "total_steps": 37885, "loss": 0.0002, "lr": 1.2272407696698303e-06, "epoch": 2.4218028243368086, "percentage": 48.44, "elapsed_time": "0:26:53", "remaining_time": "0:28:38", "throughput": 5596.44, "total_tokens": 9031472}
|
|
{"current_steps": 18355, "total_steps": 37885, "loss": 0.0005, "lr": 1.2267920999503302e-06, "epoch": 2.4224627161145573, "percentage": 48.45, "elapsed_time": "0:26:54", "remaining_time": "0:28:37", "throughput": 5597.05, "total_tokens": 9034352}
|
|
{"current_steps": 18360, "total_steps": 37885, "loss": 0.0001, "lr": 1.2263433820958494e-06, "epoch": 2.4231226078923056, "percentage": 48.46, "elapsed_time": "0:26:54", "remaining_time": "0:28:36", "throughput": 5597.39, "total_tokens": 9036720}
|
|
{"current_steps": 18365, "total_steps": 37885, "loss": 0.0003, "lr": 1.2258946162016247e-06, "epoch": 2.423782499670054, "percentage": 48.48, "elapsed_time": "0:26:54", "remaining_time": "0:28:36", "throughput": 5597.8, "total_tokens": 9039216}
|
|
{"current_steps": 18370, "total_steps": 37885, "loss": 0.0595, "lr": 1.2254458023629035e-06, "epoch": 2.4244423914478026, "percentage": 48.49, "elapsed_time": "0:26:55", "remaining_time": "0:28:35", "throughput": 5598.12, "total_tokens": 9041584}
|
|
{"current_steps": 18375, "total_steps": 37885, "loss": 0.0442, "lr": 1.2249969406749432e-06, "epoch": 2.425102283225551, "percentage": 48.5, "elapsed_time": "0:26:55", "remaining_time": "0:28:35", "throughput": 5598.41, "total_tokens": 9043888}
|
|
{"current_steps": 18380, "total_steps": 37885, "loss": 0.0003, "lr": 1.2245480312330117e-06, "epoch": 2.4257621750032996, "percentage": 48.52, "elapsed_time": "0:26:55", "remaining_time": "0:28:34", "throughput": 5598.78, "total_tokens": 9046320}
|
|
{"current_steps": 18385, "total_steps": 37885, "loss": 0.0675, "lr": 1.2240990741323867e-06, "epoch": 2.426422066781048, "percentage": 48.53, "elapsed_time": "0:26:56", "remaining_time": "0:28:34", "throughput": 5599.22, "total_tokens": 9048880}
|
|
{"current_steps": 18390, "total_steps": 37885, "loss": 0.0, "lr": 1.2236500694683555e-06, "epoch": 2.427081958558796, "percentage": 48.54, "elapsed_time": "0:26:56", "remaining_time": "0:28:33", "throughput": 5599.58, "total_tokens": 9051312}
|
|
{"current_steps": 18395, "total_steps": 37885, "loss": 0.1489, "lr": 1.223201017336217e-06, "epoch": 2.427741850336545, "percentage": 48.55, "elapsed_time": "0:26:56", "remaining_time": "0:28:33", "throughput": 5600.09, "total_tokens": 9054000}
|
|
{"current_steps": 18400, "total_steps": 37885, "loss": 0.001, "lr": 1.222751917831279e-06, "epoch": 2.428401742114293, "percentage": 48.57, "elapsed_time": "0:26:57", "remaining_time": "0:28:32", "throughput": 5600.34, "total_tokens": 9056240}
|
|
{"current_steps": 18405, "total_steps": 37885, "loss": 0.1132, "lr": 1.2223027710488591e-06, "epoch": 2.429061633892042, "percentage": 48.58, "elapsed_time": "0:26:57", "remaining_time": "0:28:31", "throughput": 5600.71, "total_tokens": 9058672}
|
|
{"current_steps": 18410, "total_steps": 37885, "loss": 0.0001, "lr": 1.221853577084286e-06, "epoch": 2.42972152566979, "percentage": 48.59, "elapsed_time": "0:26:57", "remaining_time": "0:28:31", "throughput": 5601.07, "total_tokens": 9061104}
|
|
{"current_steps": 18415, "total_steps": 37885, "loss": 0.1807, "lr": 1.221404336032898e-06, "epoch": 2.4303814174475384, "percentage": 48.61, "elapsed_time": "0:26:58", "remaining_time": "0:28:30", "throughput": 5601.29, "total_tokens": 9063280}
|
|
{"current_steps": 18420, "total_steps": 37885, "loss": 0.0442, "lr": 1.2209550479900425e-06, "epoch": 2.431041309225287, "percentage": 48.62, "elapsed_time": "0:26:58", "remaining_time": "0:28:30", "throughput": 5601.75, "total_tokens": 9065840}
|
|
{"current_steps": 18425, "total_steps": 37885, "loss": 0.0, "lr": 1.2205057130510783e-06, "epoch": 2.4317012010030354, "percentage": 48.63, "elapsed_time": "0:26:58", "remaining_time": "0:28:29", "throughput": 5601.96, "total_tokens": 9068016}
|
|
{"current_steps": 18430, "total_steps": 37885, "loss": 0.0813, "lr": 1.2200563313113732e-06, "epoch": 2.432361092780784, "percentage": 48.65, "elapsed_time": "0:26:59", "remaining_time": "0:28:29", "throughput": 5602.33, "total_tokens": 9070448}
|
|
{"current_steps": 18435, "total_steps": 37885, "loss": 0.0473, "lr": 1.2196069028663057e-06, "epoch": 2.4330209845585324, "percentage": 48.66, "elapsed_time": "0:26:59", "remaining_time": "0:28:28", "throughput": 5602.69, "total_tokens": 9072880}
|
|
{"current_steps": 18440, "total_steps": 37885, "loss": 0.0002, "lr": 1.219157427811263e-06, "epoch": 2.4336808763362807, "percentage": 48.67, "elapsed_time": "0:26:59", "remaining_time": "0:28:27", "throughput": 5603.01, "total_tokens": 9075248}
|
|
{"current_steps": 18445, "total_steps": 37885, "loss": 0.0627, "lr": 1.218707906241643e-06, "epoch": 2.4343407681140294, "percentage": 48.69, "elapsed_time": "0:27:00", "remaining_time": "0:28:27", "throughput": 5603.41, "total_tokens": 9077744}
|
|
{"current_steps": 18450, "total_steps": 37885, "loss": 0.0003, "lr": 1.2182583382528543e-06, "epoch": 2.4350006598917777, "percentage": 48.7, "elapsed_time": "0:27:00", "remaining_time": "0:28:26", "throughput": 5603.99, "total_tokens": 9080560}
|
|
{"current_steps": 18455, "total_steps": 37885, "loss": 0.0109, "lr": 1.2178087239403133e-06, "epoch": 2.4356605516695264, "percentage": 48.71, "elapsed_time": "0:27:00", "remaining_time": "0:28:26", "throughput": 5604.36, "total_tokens": 9082992}
|
|
{"current_steps": 18460, "total_steps": 37885, "loss": 0.0844, "lr": 1.2173590633994479e-06, "epoch": 2.4363204434472747, "percentage": 48.73, "elapsed_time": "0:27:01", "remaining_time": "0:28:25", "throughput": 5604.77, "total_tokens": 9085552}
|
|
{"current_steps": 18465, "total_steps": 37885, "loss": 0.0001, "lr": 1.2169093567256955e-06, "epoch": 2.436980335225023, "percentage": 48.74, "elapsed_time": "0:27:01", "remaining_time": "0:28:25", "throughput": 5604.96, "total_tokens": 9087728}
|
|
{"current_steps": 18470, "total_steps": 37885, "loss": 0.0004, "lr": 1.2164596040145028e-06, "epoch": 2.4376402270027717, "percentage": 48.75, "elapsed_time": "0:27:01", "remaining_time": "0:28:24", "throughput": 5605.21, "total_tokens": 9089968}
|
|
{"current_steps": 18475, "total_steps": 37885, "loss": 0.0011, "lr": 1.2160098053613267e-06, "epoch": 2.43830011878052, "percentage": 48.77, "elapsed_time": "0:27:02", "remaining_time": "0:28:24", "throughput": 5605.6, "total_tokens": 9092464}
|
|
{"current_steps": 18480, "total_steps": 37885, "loss": 0.0345, "lr": 1.2155599608616331e-06, "epoch": 2.4389600105582683, "percentage": 48.78, "elapsed_time": "0:27:02", "remaining_time": "0:28:23", "throughput": 5606.05, "total_tokens": 9095088}
|
|
{"current_steps": 18485, "total_steps": 37885, "loss": 0.0006, "lr": 1.2151100706108996e-06, "epoch": 2.439619902336017, "percentage": 48.79, "elapsed_time": "0:27:02", "remaining_time": "0:28:23", "throughput": 5606.51, "total_tokens": 9097712}
|
|
{"current_steps": 18490, "total_steps": 37885, "loss": 0.0643, "lr": 1.2146601347046107e-06, "epoch": 2.4402797941137653, "percentage": 48.81, "elapsed_time": "0:27:03", "remaining_time": "0:28:22", "throughput": 5606.98, "total_tokens": 9100336}
|
|
{"current_steps": 18495, "total_steps": 37885, "loss": 0.0002, "lr": 1.214210153238263e-06, "epoch": 2.4409396858915136, "percentage": 48.82, "elapsed_time": "0:27:03", "remaining_time": "0:28:21", "throughput": 5607.47, "total_tokens": 9103024}
|
|
{"current_steps": 18500, "total_steps": 37885, "loss": 0.0001, "lr": 1.2137601263073613e-06, "epoch": 2.4415995776692623, "percentage": 48.83, "elapsed_time": "0:27:03", "remaining_time": "0:28:21", "throughput": 5607.86, "total_tokens": 9105520}
|
|
{"current_steps": 18505, "total_steps": 37885, "loss": 0.0689, "lr": 1.2133100540074206e-06, "epoch": 2.4422594694470106, "percentage": 48.85, "elapsed_time": "0:27:04", "remaining_time": "0:28:20", "throughput": 5608.25, "total_tokens": 9108016}
|
|
{"current_steps": 18510, "total_steps": 37885, "loss": 0.0003, "lr": 1.2128599364339663e-06, "epoch": 2.4429193612247593, "percentage": 48.86, "elapsed_time": "0:27:04", "remaining_time": "0:28:20", "throughput": 5608.52, "total_tokens": 9110320}
|
|
{"current_steps": 18515, "total_steps": 37885, "loss": 0.0113, "lr": 1.212409773682531e-06, "epoch": 2.4435792530025076, "percentage": 48.87, "elapsed_time": "0:27:04", "remaining_time": "0:28:19", "throughput": 5608.8, "total_tokens": 9112624}
|
|
{"current_steps": 18520, "total_steps": 37885, "loss": 0.0001, "lr": 1.2119595658486599e-06, "epoch": 2.444239144780256, "percentage": 48.88, "elapsed_time": "0:27:05", "remaining_time": "0:28:19", "throughput": 5609.19, "total_tokens": 9115120}
|
|
{"current_steps": 18525, "total_steps": 37885, "loss": 0.111, "lr": 1.2115093130279055e-06, "epoch": 2.4448990365580046, "percentage": 48.9, "elapsed_time": "0:27:05", "remaining_time": "0:28:18", "throughput": 5609.6, "total_tokens": 9117680}
|
|
{"current_steps": 18530, "total_steps": 37885, "loss": 0.1346, "lr": 1.2110590153158313e-06, "epoch": 2.445558928335753, "percentage": 48.91, "elapsed_time": "0:27:05", "remaining_time": "0:28:18", "throughput": 5609.94, "total_tokens": 9120112}
|
|
{"current_steps": 18535, "total_steps": 37885, "loss": 0.0004, "lr": 1.2106086728080095e-06, "epoch": 2.4462188201135016, "percentage": 48.92, "elapsed_time": "0:27:06", "remaining_time": "0:28:17", "throughput": 5610.42, "total_tokens": 9122800}
|
|
{"current_steps": 18540, "total_steps": 37885, "loss": 0.0473, "lr": 1.2101582856000219e-06, "epoch": 2.44687871189125, "percentage": 48.94, "elapsed_time": "0:27:06", "remaining_time": "0:28:16", "throughput": 5610.6, "total_tokens": 9124976}
|
|
{"current_steps": 18545, "total_steps": 37885, "loss": 0.0337, "lr": 1.20970785378746e-06, "epoch": 2.447538603668998, "percentage": 48.95, "elapsed_time": "0:27:06", "remaining_time": "0:28:16", "throughput": 5610.97, "total_tokens": 9127472}
|
|
{"current_steps": 18550, "total_steps": 37885, "loss": 0.0005, "lr": 1.2092573774659247e-06, "epoch": 2.448198495446747, "percentage": 48.96, "elapsed_time": "0:27:07", "remaining_time": "0:28:15", "throughput": 5611.52, "total_tokens": 9130288}
|
|
{"current_steps": 18555, "total_steps": 37885, "loss": 0.0011, "lr": 1.2088068567310266e-06, "epoch": 2.448858387224495, "percentage": 48.98, "elapsed_time": "0:27:07", "remaining_time": "0:28:15", "throughput": 5611.74, "total_tokens": 9132528}
|
|
{"current_steps": 18560, "total_steps": 37885, "loss": 0.197, "lr": 1.2083562916783852e-06, "epoch": 2.449518279002244, "percentage": 48.99, "elapsed_time": "0:27:07", "remaining_time": "0:28:14", "throughput": 5612.18, "total_tokens": 9135152}
|
|
{"current_steps": 18565, "total_steps": 37885, "loss": 0.088, "lr": 1.2079056824036294e-06, "epoch": 2.450178170779992, "percentage": 49.0, "elapsed_time": "0:27:08", "remaining_time": "0:28:14", "throughput": 5612.77, "total_tokens": 9138032}
|
|
{"current_steps": 18570, "total_steps": 37885, "loss": 0.0004, "lr": 1.207455029002398e-06, "epoch": 2.4508380625577404, "percentage": 49.02, "elapsed_time": "0:27:08", "remaining_time": "0:28:13", "throughput": 5613.12, "total_tokens": 9140528}
|
|
{"current_steps": 18575, "total_steps": 37885, "loss": 0.0495, "lr": 1.207004331570339e-06, "epoch": 2.451497954335489, "percentage": 49.03, "elapsed_time": "0:27:08", "remaining_time": "0:28:13", "throughput": 5613.35, "total_tokens": 9142768}
|
|
{"current_steps": 18580, "total_steps": 37885, "loss": 0.0004, "lr": 1.2065535902031098e-06, "epoch": 2.4521578461132374, "percentage": 49.04, "elapsed_time": "0:27:09", "remaining_time": "0:28:12", "throughput": 5613.78, "total_tokens": 9145392}
|
|
{"current_steps": 18585, "total_steps": 37885, "loss": 0.1586, "lr": 1.206102804996377e-06, "epoch": 2.452817737890986, "percentage": 49.06, "elapsed_time": "0:27:09", "remaining_time": "0:28:12", "throughput": 5614.04, "total_tokens": 9147696}
|
|
{"current_steps": 18590, "total_steps": 37885, "loss": 0.0845, "lr": 1.2056519760458162e-06, "epoch": 2.4534776296687344, "percentage": 49.07, "elapsed_time": "0:27:09", "remaining_time": "0:28:11", "throughput": 5614.49, "total_tokens": 9150320}
|
|
{"current_steps": 18595, "total_steps": 37885, "loss": 0.0212, "lr": 1.2052011034471123e-06, "epoch": 2.4541375214464827, "percentage": 49.08, "elapsed_time": "0:27:10", "remaining_time": "0:28:11", "throughput": 5615.01, "total_tokens": 9153072}
|
|
{"current_steps": 18600, "total_steps": 37885, "loss": 0.0005, "lr": 1.2047501872959606e-06, "epoch": 2.4547974132242314, "percentage": 49.1, "elapsed_time": "0:27:10", "remaining_time": "0:28:10", "throughput": 5615.34, "total_tokens": 9155504}
|
|
{"current_steps": 18605, "total_steps": 37885, "loss": 0.0002, "lr": 1.204299227688064e-06, "epoch": 2.4554573050019797, "percentage": 49.11, "elapsed_time": "0:27:10", "remaining_time": "0:28:09", "throughput": 5615.75, "total_tokens": 9158064}
|
|
{"current_steps": 18610, "total_steps": 37885, "loss": 0.1095, "lr": 1.203848224719136e-06, "epoch": 2.456117196779728, "percentage": 49.12, "elapsed_time": "0:27:11", "remaining_time": "0:28:09", "throughput": 5616.19, "total_tokens": 9160688}
|
|
{"current_steps": 18615, "total_steps": 37885, "loss": 0.0001, "lr": 1.2033971784848985e-06, "epoch": 2.4567770885574767, "percentage": 49.14, "elapsed_time": "0:27:11", "remaining_time": "0:28:08", "throughput": 5616.5, "total_tokens": 9163056}
|
|
{"current_steps": 18620, "total_steps": 37885, "loss": 0.0004, "lr": 1.2029460890810826e-06, "epoch": 2.457436980335225, "percentage": 49.15, "elapsed_time": "0:27:11", "remaining_time": "0:28:08", "throughput": 5616.94, "total_tokens": 9165680}
|
|
{"current_steps": 18625, "total_steps": 37885, "loss": 0.0215, "lr": 1.202494956603429e-06, "epoch": 2.4580968721129732, "percentage": 49.16, "elapsed_time": "0:27:12", "remaining_time": "0:28:07", "throughput": 5617.2, "total_tokens": 9167984}
|
|
{"current_steps": 18630, "total_steps": 37885, "loss": 0.0013, "lr": 1.2020437811476872e-06, "epoch": 2.458756763890722, "percentage": 49.18, "elapsed_time": "0:27:12", "remaining_time": "0:28:07", "throughput": 5617.65, "total_tokens": 9170608}
|
|
{"current_steps": 18635, "total_steps": 37885, "loss": 0.0002, "lr": 1.2015925628096157e-06, "epoch": 2.4594166556684702, "percentage": 49.19, "elapsed_time": "0:27:12", "remaining_time": "0:28:06", "throughput": 5617.94, "total_tokens": 9172976}
|
|
{"current_steps": 18640, "total_steps": 37885, "loss": 0.0003, "lr": 1.2011413016849829e-06, "epoch": 2.460076547446219, "percentage": 49.2, "elapsed_time": "0:27:13", "remaining_time": "0:28:06", "throughput": 5618.38, "total_tokens": 9175600}
|
|
{"current_steps": 18645, "total_steps": 37885, "loss": 0.0007, "lr": 1.2006899978695653e-06, "epoch": 2.4607364392239672, "percentage": 49.21, "elapsed_time": "0:27:13", "remaining_time": "0:28:05", "throughput": 5618.64, "total_tokens": 9177904}
|
|
{"current_steps": 18650, "total_steps": 37885, "loss": 0.0005, "lr": 1.200238651459149e-06, "epoch": 2.4613963310017155, "percentage": 49.23, "elapsed_time": "0:27:13", "remaining_time": "0:28:05", "throughput": 5619.01, "total_tokens": 9180400}
|
|
{"current_steps": 18655, "total_steps": 37885, "loss": 0.0782, "lr": 1.1997872625495284e-06, "epoch": 2.4620562227794642, "percentage": 49.24, "elapsed_time": "0:27:14", "remaining_time": "0:28:04", "throughput": 5619.36, "total_tokens": 9182896}
|
|
{"current_steps": 18660, "total_steps": 37885, "loss": 0.0021, "lr": 1.1993358312365087e-06, "epoch": 2.4627161145572125, "percentage": 49.25, "elapsed_time": "0:27:14", "remaining_time": "0:28:03", "throughput": 5619.7, "total_tokens": 9185328}
|
|
{"current_steps": 18665, "total_steps": 37885, "loss": 0.0001, "lr": 1.198884357615902e-06, "epoch": 2.4633760063349612, "percentage": 49.27, "elapsed_time": "0:27:14", "remaining_time": "0:28:03", "throughput": 5620.03, "total_tokens": 9187760}
|
|
{"current_steps": 18670, "total_steps": 37885, "loss": 0.0844, "lr": 1.1984328417835307e-06, "epoch": 2.4640358981127095, "percentage": 49.28, "elapsed_time": "0:27:15", "remaining_time": "0:28:02", "throughput": 5620.18, "total_tokens": 9189872}
|
|
{"current_steps": 18675, "total_steps": 37885, "loss": 0.1752, "lr": 1.1979812838352257e-06, "epoch": 2.464695789890458, "percentage": 49.29, "elapsed_time": "0:27:15", "remaining_time": "0:28:02", "throughput": 5620.45, "total_tokens": 9192176}
|
|
{"current_steps": 18680, "total_steps": 37885, "loss": 0.0296, "lr": 1.1975296838668266e-06, "epoch": 2.4653556816682065, "percentage": 49.31, "elapsed_time": "0:27:15", "remaining_time": "0:28:01", "throughput": 5620.73, "total_tokens": 9194480}
|
|
{"current_steps": 18685, "total_steps": 37885, "loss": 0.0712, "lr": 1.1970780419741828e-06, "epoch": 2.466015573445955, "percentage": 49.32, "elapsed_time": "0:27:16", "remaining_time": "0:28:01", "throughput": 5621.12, "total_tokens": 9196976}
|
|
{"current_steps": 18690, "total_steps": 37885, "loss": 0.0004, "lr": 1.1966263582531517e-06, "epoch": 2.4666754652237035, "percentage": 49.33, "elapsed_time": "0:27:16", "remaining_time": "0:28:00", "throughput": 5621.36, "total_tokens": 9199216}
|
|
{"current_steps": 18695, "total_steps": 37885, "loss": 0.0064, "lr": 1.1961746327996e-06, "epoch": 2.467335357001452, "percentage": 49.35, "elapsed_time": "0:27:16", "remaining_time": "0:28:00", "throughput": 5621.71, "total_tokens": 9201648}
|
|
{"current_steps": 18700, "total_steps": 37885, "loss": 0.0253, "lr": 1.1957228657094027e-06, "epoch": 2.4679952487792, "percentage": 49.36, "elapsed_time": "0:27:17", "remaining_time": "0:27:59", "throughput": 5621.88, "total_tokens": 9203760}
|
|
{"current_steps": 18705, "total_steps": 37885, "loss": 0.0004, "lr": 1.1952710570784447e-06, "epoch": 2.468655140556949, "percentage": 49.37, "elapsed_time": "0:27:17", "remaining_time": "0:27:59", "throughput": 5622.12, "total_tokens": 9206000}
|
|
{"current_steps": 18710, "total_steps": 37885, "loss": 0.1172, "lr": 1.194819207002619e-06, "epoch": 2.469315032334697, "percentage": 49.39, "elapsed_time": "0:27:17", "remaining_time": "0:27:58", "throughput": 5622.52, "total_tokens": 9208496}
|
|
{"current_steps": 18715, "total_steps": 37885, "loss": 0.1238, "lr": 1.194367315577827e-06, "epoch": 2.469974924112446, "percentage": 49.4, "elapsed_time": "0:27:18", "remaining_time": "0:27:57", "throughput": 5622.98, "total_tokens": 9211120}
|
|
{"current_steps": 18720, "total_steps": 37885, "loss": 0.1403, "lr": 1.1939153828999801e-06, "epoch": 2.470634815890194, "percentage": 49.41, "elapsed_time": "0:27:18", "remaining_time": "0:27:57", "throughput": 5623.44, "total_tokens": 9213744}
|
|
{"current_steps": 18725, "total_steps": 37885, "loss": 0.0015, "lr": 1.1934634090649973e-06, "epoch": 2.4712947076679423, "percentage": 49.43, "elapsed_time": "0:27:18", "remaining_time": "0:27:56", "throughput": 5623.62, "total_tokens": 9215856}
|
|
{"current_steps": 18730, "total_steps": 37885, "loss": 0.1295, "lr": 1.1930113941688072e-06, "epoch": 2.471954599445691, "percentage": 49.44, "elapsed_time": "0:27:19", "remaining_time": "0:27:56", "throughput": 5623.9, "total_tokens": 9218160}
|
|
{"current_steps": 18735, "total_steps": 37885, "loss": 0.0516, "lr": 1.1925593383073458e-06, "epoch": 2.4726144912234393, "percentage": 49.45, "elapsed_time": "0:27:19", "remaining_time": "0:27:55", "throughput": 5624.4, "total_tokens": 9220848}
|
|
{"current_steps": 18740, "total_steps": 37885, "loss": 0.0631, "lr": 1.1921072415765595e-06, "epoch": 2.4732743830011876, "percentage": 49.47, "elapsed_time": "0:27:19", "remaining_time": "0:27:55", "throughput": 5624.78, "total_tokens": 9223344}
|
|
{"current_steps": 18745, "total_steps": 37885, "loss": 0.0004, "lr": 1.1916551040724026e-06, "epoch": 2.4739342747789363, "percentage": 49.48, "elapsed_time": "0:27:20", "remaining_time": "0:27:54", "throughput": 5625.07, "total_tokens": 9225648}
|
|
{"current_steps": 18750, "total_steps": 37885, "loss": 0.0413, "lr": 1.191202925890837e-06, "epoch": 2.4745941665566846, "percentage": 49.49, "elapsed_time": "0:27:20", "remaining_time": "0:27:54", "throughput": 5625.39, "total_tokens": 9228016}
|
|
{"current_steps": 18755, "total_steps": 37885, "loss": 0.0693, "lr": 1.1907507071278358e-06, "epoch": 2.475254058334433, "percentage": 49.51, "elapsed_time": "0:27:20", "remaining_time": "0:27:53", "throughput": 5625.61, "total_tokens": 9230192}
|
|
{"current_steps": 18760, "total_steps": 37885, "loss": 0.0002, "lr": 1.1902984478793776e-06, "epoch": 2.4759139501121816, "percentage": 49.52, "elapsed_time": "0:27:21", "remaining_time": "0:27:53", "throughput": 5625.95, "total_tokens": 9232624}
|
|
{"current_steps": 18765, "total_steps": 37885, "loss": 0.0532, "lr": 1.1898461482414524e-06, "epoch": 2.47657384188993, "percentage": 49.53, "elapsed_time": "0:27:21", "remaining_time": "0:27:52", "throughput": 5626.27, "total_tokens": 9234992}
|
|
{"current_steps": 18770, "total_steps": 37885, "loss": 0.0014, "lr": 1.1893938083100568e-06, "epoch": 2.4772337336676786, "percentage": 49.54, "elapsed_time": "0:27:21", "remaining_time": "0:27:51", "throughput": 5626.59, "total_tokens": 9237360}
|
|
{"current_steps": 18775, "total_steps": 37885, "loss": 0.0253, "lr": 1.188941428181197e-06, "epoch": 2.477893625445427, "percentage": 49.56, "elapsed_time": "0:27:22", "remaining_time": "0:27:51", "throughput": 5626.88, "total_tokens": 9239664}
|
|
{"current_steps": 18780, "total_steps": 37885, "loss": 0.0016, "lr": 1.188489007950887e-06, "epoch": 2.478553517223175, "percentage": 49.57, "elapsed_time": "0:27:22", "remaining_time": "0:27:50", "throughput": 5627.16, "total_tokens": 9241968}
|
|
{"current_steps": 18785, "total_steps": 37885, "loss": 0.0, "lr": 1.1880365477151501e-06, "epoch": 2.479213409000924, "percentage": 49.58, "elapsed_time": "0:27:22", "remaining_time": "0:27:50", "throughput": 5627.49, "total_tokens": 9244336}
|
|
{"current_steps": 18790, "total_steps": 37885, "loss": 0.0007, "lr": 1.1875840475700175e-06, "epoch": 2.479873300778672, "percentage": 49.6, "elapsed_time": "0:27:23", "remaining_time": "0:27:49", "throughput": 5627.95, "total_tokens": 9246960}
|
|
{"current_steps": 18795, "total_steps": 37885, "loss": 0.0004, "lr": 1.1871315076115293e-06, "epoch": 2.480533192556421, "percentage": 49.61, "elapsed_time": "0:27:23", "remaining_time": "0:27:49", "throughput": 5628.24, "total_tokens": 9249264}
|
|
{"current_steps": 18800, "total_steps": 37885, "loss": 0.0001, "lr": 1.186678927935734e-06, "epoch": 2.481193084334169, "percentage": 49.62, "elapsed_time": "0:27:23", "remaining_time": "0:27:48", "throughput": 5628.48, "total_tokens": 9251504}
|
|
{"current_steps": 18805, "total_steps": 37885, "loss": 0.0001, "lr": 1.1862263086386875e-06, "epoch": 2.4818529761119175, "percentage": 49.64, "elapsed_time": "0:27:24", "remaining_time": "0:27:48", "throughput": 5628.62, "total_tokens": 9253552}
|
|
{"current_steps": 18810, "total_steps": 37885, "loss": 0.0002, "lr": 1.1857736498164559e-06, "epoch": 2.482512867889666, "percentage": 49.65, "elapsed_time": "0:27:24", "remaining_time": "0:27:47", "throughput": 5628.97, "total_tokens": 9255984}
|
|
{"current_steps": 18815, "total_steps": 37885, "loss": 0.0003, "lr": 1.1853209515651122e-06, "epoch": 2.4831727596674145, "percentage": 49.66, "elapsed_time": "0:27:24", "remaining_time": "0:27:46", "throughput": 5629.3, "total_tokens": 9258352}
|
|
{"current_steps": 18820, "total_steps": 37885, "loss": 0.0001, "lr": 1.1848682139807387e-06, "epoch": 2.483832651445163, "percentage": 49.68, "elapsed_time": "0:27:25", "remaining_time": "0:27:46", "throughput": 5629.64, "total_tokens": 9260784}
|
|
{"current_steps": 18825, "total_steps": 37885, "loss": 0.0008, "lr": 1.1844154371594254e-06, "epoch": 2.4844925432229115, "percentage": 49.69, "elapsed_time": "0:27:25", "remaining_time": "0:27:45", "throughput": 5630.11, "total_tokens": 9263408}
|
|
{"current_steps": 18830, "total_steps": 37885, "loss": 0.0673, "lr": 1.183962621197271e-06, "epoch": 2.4851524350006597, "percentage": 49.7, "elapsed_time": "0:27:25", "remaining_time": "0:27:45", "throughput": 5630.32, "total_tokens": 9265584}
|
|
{"current_steps": 18835, "total_steps": 37885, "loss": 0.1876, "lr": 1.1835097661903826e-06, "epoch": 2.4858123267784085, "percentage": 49.72, "elapsed_time": "0:27:25", "remaining_time": "0:27:44", "throughput": 5630.6, "total_tokens": 9267888}
|
|
{"current_steps": 18840, "total_steps": 37885, "loss": 0.0001, "lr": 1.1830568722348748e-06, "epoch": 2.4864722185561567, "percentage": 49.73, "elapsed_time": "0:27:26", "remaining_time": "0:27:44", "throughput": 5630.91, "total_tokens": 9270256}
|
|
{"current_steps": 18845, "total_steps": 37885, "loss": 0.0006, "lr": 1.182603939426872e-06, "epoch": 2.4871321103339055, "percentage": 49.74, "elapsed_time": "0:27:26", "remaining_time": "0:27:43", "throughput": 5631.41, "total_tokens": 9272944}
|
|
{"current_steps": 18850, "total_steps": 37885, "loss": 0.0002, "lr": 1.1821509678625048e-06, "epoch": 2.4877920021116537, "percentage": 49.76, "elapsed_time": "0:27:26", "remaining_time": "0:27:43", "throughput": 5631.77, "total_tokens": 9275376}
|
|
{"current_steps": 18855, "total_steps": 37885, "loss": 0.0368, "lr": 1.181697957637914e-06, "epoch": 2.488451893889402, "percentage": 49.77, "elapsed_time": "0:27:27", "remaining_time": "0:27:42", "throughput": 5632.04, "total_tokens": 9277680}
|
|
{"current_steps": 18860, "total_steps": 37885, "loss": 0.2397, "lr": 1.1812449088492474e-06, "epoch": 2.4891117856671507, "percentage": 49.78, "elapsed_time": "0:27:27", "remaining_time": "0:27:42", "throughput": 5632.39, "total_tokens": 9280112}
|
|
{"current_steps": 18865, "total_steps": 37885, "loss": 0.0019, "lr": 1.1807918215926614e-06, "epoch": 2.489771677444899, "percentage": 49.8, "elapsed_time": "0:27:27", "remaining_time": "0:27:41", "throughput": 5632.75, "total_tokens": 9282544}
|
|
{"current_steps": 18870, "total_steps": 37885, "loss": 0.1361, "lr": 1.1803386959643204e-06, "epoch": 2.4904315692226473, "percentage": 49.81, "elapsed_time": "0:27:28", "remaining_time": "0:27:40", "throughput": 5633.11, "total_tokens": 9284976}
|
|
{"current_steps": 18875, "total_steps": 37885, "loss": 0.0009, "lr": 1.179885532060397e-06, "epoch": 2.491091461000396, "percentage": 49.82, "elapsed_time": "0:27:28", "remaining_time": "0:27:40", "throughput": 5633.5, "total_tokens": 9287472}
|
|
{"current_steps": 18880, "total_steps": 37885, "loss": 0.0829, "lr": 1.1794323299770724e-06, "epoch": 2.4917513527781443, "percentage": 49.84, "elapsed_time": "0:27:28", "remaining_time": "0:27:39", "throughput": 5633.96, "total_tokens": 9290096}
|
|
{"current_steps": 18885, "total_steps": 37885, "loss": 0.0012, "lr": 1.1789790898105346e-06, "epoch": 2.492411244555893, "percentage": 49.85, "elapsed_time": "0:27:29", "remaining_time": "0:27:39", "throughput": 5634.29, "total_tokens": 9292464}
|
|
{"current_steps": 18890, "total_steps": 37885, "loss": 0.0004, "lr": 1.1785258116569816e-06, "epoch": 2.4930711363336413, "percentage": 49.86, "elapsed_time": "0:27:29", "remaining_time": "0:27:38", "throughput": 5634.64, "total_tokens": 9294896}
|
|
{"current_steps": 18895, "total_steps": 37885, "loss": 0.0015, "lr": 1.1780724956126173e-06, "epoch": 2.4937310281113896, "percentage": 49.87, "elapsed_time": "0:27:29", "remaining_time": "0:27:38", "throughput": 5635.06, "total_tokens": 9297456}
|
|
{"current_steps": 18900, "total_steps": 37885, "loss": 0.1238, "lr": 1.1776191417736558e-06, "epoch": 2.4943909198891383, "percentage": 49.89, "elapsed_time": "0:27:30", "remaining_time": "0:27:37", "throughput": 5635.43, "total_tokens": 9299888}
|
|
{"current_steps": 18905, "total_steps": 37885, "loss": 0.0385, "lr": 1.1771657502363175e-06, "epoch": 2.4950508116668866, "percentage": 49.9, "elapsed_time": "0:27:30", "remaining_time": "0:27:37", "throughput": 5635.96, "total_tokens": 9302640}
|
|
{"current_steps": 18910, "total_steps": 37885, "loss": 0.0002, "lr": 1.1767123210968315e-06, "epoch": 2.495710703444635, "percentage": 49.91, "elapsed_time": "0:27:30", "remaining_time": "0:27:36", "throughput": 5636.49, "total_tokens": 9305392}
|
|
{"current_steps": 18915, "total_steps": 37885, "loss": 0.0991, "lr": 1.1762588544514352e-06, "epoch": 2.4963705952223836, "percentage": 49.93, "elapsed_time": "0:27:31", "remaining_time": "0:27:36", "throughput": 5636.88, "total_tokens": 9307888}
|
|
{"current_steps": 18920, "total_steps": 37885, "loss": 0.0002, "lr": 1.1758053503963733e-06, "epoch": 2.497030487000132, "percentage": 49.94, "elapsed_time": "0:27:31", "remaining_time": "0:27:35", "throughput": 5637.17, "total_tokens": 9310192}
|
|
{"current_steps": 18925, "total_steps": 37885, "loss": 0.1931, "lr": 1.1753518090278991e-06, "epoch": 2.4976903787778806, "percentage": 49.95, "elapsed_time": "0:27:31", "remaining_time": "0:27:34", "throughput": 5637.57, "total_tokens": 9312688}
|
|
{"current_steps": 18930, "total_steps": 37885, "loss": 0.0463, "lr": 1.1748982304422729e-06, "epoch": 2.498350270555629, "percentage": 49.97, "elapsed_time": "0:27:32", "remaining_time": "0:27:34", "throughput": 5637.98, "total_tokens": 9315248}
|
|
{"current_steps": 18935, "total_steps": 37885, "loss": 0.0443, "lr": 1.174444614735764e-06, "epoch": 2.499010162333377, "percentage": 49.98, "elapsed_time": "0:27:32", "remaining_time": "0:27:33", "throughput": 5638.5, "total_tokens": 9318000}
|
|
{"current_steps": 18940, "total_steps": 37885, "loss": 0.0663, "lr": 1.1739909620046485e-06, "epoch": 2.499670054111126, "percentage": 49.99, "elapsed_time": "0:27:32", "remaining_time": "0:27:33", "throughput": 5639.03, "total_tokens": 9320752}
|
|
{"current_steps": 18945, "total_steps": 37885, "loss": 0.0352, "lr": 1.1735372723452114e-06, "epoch": 2.500329945888874, "percentage": 50.01, "elapsed_time": "0:27:33", "remaining_time": "0:27:32", "throughput": 5639.64, "total_tokens": 9323632}
|
|
{"current_steps": 18950, "total_steps": 37885, "loss": 0.0962, "lr": 1.1730835458537454e-06, "epoch": 2.500989837666623, "percentage": 50.02, "elapsed_time": "0:27:33", "remaining_time": "0:27:32", "throughput": 5640.09, "total_tokens": 9326256}
|
|
{"current_steps": 18950, "total_steps": 37885, "eval_loss": 0.11418119072914124, "epoch": 2.500989837666623, "percentage": 50.02, "elapsed_time": "0:27:41", "remaining_time": "0:27:40", "throughput": 5613.2, "total_tokens": 9326256}
|
|
{"current_steps": 18955, "total_steps": 37885, "loss": 0.0332, "lr": 1.1726297826265497e-06, "epoch": 2.501649729444371, "percentage": 50.03, "elapsed_time": "0:28:15", "remaining_time": "0:28:13", "throughput": 5500.73, "total_tokens": 9328688}
|
|
{"current_steps": 18960, "total_steps": 37885, "loss": 0.0693, "lr": 1.1721759827599326e-06, "epoch": 2.5023096212221194, "percentage": 50.05, "elapsed_time": "0:28:16", "remaining_time": "0:28:13", "throughput": 5501.2, "total_tokens": 9331312}
|
|
{"current_steps": 18965, "total_steps": 37885, "loss": 0.0561, "lr": 1.1717221463502102e-06, "epoch": 2.502969512999868, "percentage": 50.06, "elapsed_time": "0:28:16", "remaining_time": "0:28:12", "throughput": 5501.65, "total_tokens": 9333872}
|
|
{"current_steps": 18970, "total_steps": 37885, "loss": 0.0002, "lr": 1.1712682734937058e-06, "epoch": 2.5036294047776164, "percentage": 50.07, "elapsed_time": "0:28:16", "remaining_time": "0:28:11", "throughput": 5501.94, "total_tokens": 9336176}
|
|
{"current_steps": 18975, "total_steps": 37885, "loss": 0.0008, "lr": 1.1708143642867506e-06, "epoch": 2.504289296555365, "percentage": 50.09, "elapsed_time": "0:28:17", "remaining_time": "0:28:11", "throughput": 5502.43, "total_tokens": 9338800}
|
|
{"current_steps": 18980, "total_steps": 37885, "loss": 0.0002, "lr": 1.1703604188256833e-06, "epoch": 2.5049491883331134, "percentage": 50.1, "elapsed_time": "0:28:17", "remaining_time": "0:28:10", "throughput": 5502.8, "total_tokens": 9341232}
|
|
{"current_steps": 18985, "total_steps": 37885, "loss": 0.0045, "lr": 1.169906437206851e-06, "epoch": 2.5056090801108617, "percentage": 50.11, "elapsed_time": "0:28:17", "remaining_time": "0:28:10", "throughput": 5503.18, "total_tokens": 9343664}
|
|
{"current_steps": 18990, "total_steps": 37885, "loss": 0.0005, "lr": 1.1694524195266077e-06, "epoch": 2.5062689718886104, "percentage": 50.13, "elapsed_time": "0:28:18", "remaining_time": "0:28:09", "throughput": 5503.52, "total_tokens": 9346096}
|
|
{"current_steps": 18995, "total_steps": 37885, "loss": 0.1795, "lr": 1.1689983658813152e-06, "epoch": 2.5069288636663587, "percentage": 50.14, "elapsed_time": "0:28:18", "remaining_time": "0:28:09", "throughput": 5503.92, "total_tokens": 9348592}
|
|
{"current_steps": 19000, "total_steps": 37885, "loss": 0.0014, "lr": 1.1685442763673436e-06, "epoch": 2.5075887554441074, "percentage": 50.15, "elapsed_time": "0:28:18", "remaining_time": "0:28:08", "throughput": 5504.31, "total_tokens": 9351088}
|
|
{"current_steps": 19005, "total_steps": 37885, "loss": 0.0008, "lr": 1.16809015108107e-06, "epoch": 2.5082486472218557, "percentage": 50.16, "elapsed_time": "0:28:19", "remaining_time": "0:28:08", "throughput": 5504.74, "total_tokens": 9353648}
|
|
{"current_steps": 19010, "total_steps": 37885, "loss": 0.0019, "lr": 1.1676359901188785e-06, "epoch": 2.508908538999604, "percentage": 50.18, "elapsed_time": "0:28:19", "remaining_time": "0:28:07", "throughput": 5505.17, "total_tokens": 9356208}
|
|
{"current_steps": 19015, "total_steps": 37885, "loss": 0.0006, "lr": 1.1671817935771623e-06, "epoch": 2.5095684307773523, "percentage": 50.19, "elapsed_time": "0:28:19", "remaining_time": "0:28:06", "throughput": 5505.36, "total_tokens": 9358320}
|
|
{"current_steps": 19020, "total_steps": 37885, "loss": 0.046, "lr": 1.166727561552321e-06, "epoch": 2.510228322555101, "percentage": 50.2, "elapsed_time": "0:28:20", "remaining_time": "0:28:06", "throughput": 5505.69, "total_tokens": 9360688}
|
|
{"current_steps": 19025, "total_steps": 37885, "loss": 0.0188, "lr": 1.1662732941407625e-06, "epoch": 2.5108882143328493, "percentage": 50.22, "elapsed_time": "0:28:20", "remaining_time": "0:28:05", "throughput": 5506.13, "total_tokens": 9363248}
|
|
{"current_steps": 19030, "total_steps": 37885, "loss": 0.0165, "lr": 1.165818991438901e-06, "epoch": 2.511548106110598, "percentage": 50.23, "elapsed_time": "0:28:20", "remaining_time": "0:28:05", "throughput": 5506.6, "total_tokens": 9365872}
|
|
{"current_steps": 19035, "total_steps": 37885, "loss": 0.0002, "lr": 1.1653646535431593e-06, "epoch": 2.5122079978883463, "percentage": 50.24, "elapsed_time": "0:28:21", "remaining_time": "0:28:04", "throughput": 5507.0, "total_tokens": 9368368}
|
|
{"current_steps": 19040, "total_steps": 37885, "loss": 0.0004, "lr": 1.1649102805499676e-06, "epoch": 2.5128678896660945, "percentage": 50.26, "elapsed_time": "0:28:21", "remaining_time": "0:28:04", "throughput": 5507.5, "total_tokens": 9371056}
|
|
{"current_steps": 19045, "total_steps": 37885, "loss": 0.0001, "lr": 1.1644558725557627e-06, "epoch": 2.5135277814438433, "percentage": 50.27, "elapsed_time": "0:28:21", "remaining_time": "0:28:03", "throughput": 5507.97, "total_tokens": 9373680}
|
|
{"current_steps": 19050, "total_steps": 37885, "loss": 0.0, "lr": 1.16400142965699e-06, "epoch": 2.5141876732215915, "percentage": 50.28, "elapsed_time": "0:28:22", "remaining_time": "0:28:02", "throughput": 5508.23, "total_tokens": 9375920}
|
|
{"current_steps": 19055, "total_steps": 37885, "loss": 0.1645, "lr": 1.1635469519501015e-06, "epoch": 2.5148475649993403, "percentage": 50.3, "elapsed_time": "0:28:22", "remaining_time": "0:28:02", "throughput": 5508.73, "total_tokens": 9378608}
|
|
{"current_steps": 19060, "total_steps": 37885, "loss": 0.0565, "lr": 1.1630924395315565e-06, "epoch": 2.5155074567770885, "percentage": 50.31, "elapsed_time": "0:28:22", "remaining_time": "0:28:01", "throughput": 5508.95, "total_tokens": 9380784}
|
|
{"current_steps": 19065, "total_steps": 37885, "loss": 0.0412, "lr": 1.1626378924978223e-06, "epoch": 2.516167348554837, "percentage": 50.32, "elapsed_time": "0:28:23", "remaining_time": "0:28:01", "throughput": 5509.32, "total_tokens": 9383216}
|
|
{"current_steps": 19070, "total_steps": 37885, "loss": 0.0311, "lr": 1.1621833109453734e-06, "epoch": 2.5168272403325855, "percentage": 50.34, "elapsed_time": "0:28:23", "remaining_time": "0:28:00", "throughput": 5509.71, "total_tokens": 9385712}
|
|
{"current_steps": 19075, "total_steps": 37885, "loss": 0.0527, "lr": 1.161728694970691e-06, "epoch": 2.517487132110334, "percentage": 50.35, "elapsed_time": "0:28:23", "remaining_time": "0:28:00", "throughput": 5509.93, "total_tokens": 9387888}
|
|
{"current_steps": 19080, "total_steps": 37885, "loss": 0.0464, "lr": 1.1612740446702645e-06, "epoch": 2.5181470238880825, "percentage": 50.36, "elapsed_time": "0:28:24", "remaining_time": "0:27:59", "throughput": 5510.22, "total_tokens": 9390192}
|
|
{"current_steps": 19085, "total_steps": 37885, "loss": 0.0352, "lr": 1.1608193601405894e-06, "epoch": 2.518806915665831, "percentage": 50.38, "elapsed_time": "0:28:24", "remaining_time": "0:27:59", "throughput": 5510.69, "total_tokens": 9392816}
|
|
{"current_steps": 19090, "total_steps": 37885, "loss": 0.0001, "lr": 1.1603646414781701e-06, "epoch": 2.519466807443579, "percentage": 50.39, "elapsed_time": "0:28:24", "remaining_time": "0:27:58", "throughput": 5511.01, "total_tokens": 9395184}
|
|
{"current_steps": 19095, "total_steps": 37885, "loss": 0.0002, "lr": 1.1599098887795164e-06, "epoch": 2.520126699221328, "percentage": 50.4, "elapsed_time": "0:28:25", "remaining_time": "0:27:57", "throughput": 5511.16, "total_tokens": 9397232}
|
|
{"current_steps": 19100, "total_steps": 37885, "loss": 0.0002, "lr": 1.1594551021411473e-06, "epoch": 2.520786590999076, "percentage": 50.42, "elapsed_time": "0:28:25", "remaining_time": "0:27:57", "throughput": 5511.53, "total_tokens": 9399664}
|
|
{"current_steps": 19105, "total_steps": 37885, "loss": 0.0001, "lr": 1.1590002816595874e-06, "epoch": 2.521446482776825, "percentage": 50.43, "elapsed_time": "0:28:25", "remaining_time": "0:27:56", "throughput": 5511.81, "total_tokens": 9401968}
|
|
{"current_steps": 19110, "total_steps": 37885, "loss": 0.1127, "lr": 1.158545427431369e-06, "epoch": 2.522106374554573, "percentage": 50.44, "elapsed_time": "0:28:26", "remaining_time": "0:27:56", "throughput": 5512.18, "total_tokens": 9404400}
|
|
{"current_steps": 19115, "total_steps": 37885, "loss": 0.073, "lr": 1.1580905395530317e-06, "epoch": 2.5227662663323214, "percentage": 50.46, "elapsed_time": "0:28:26", "remaining_time": "0:27:55", "throughput": 5512.53, "total_tokens": 9406832}
|
|
{"current_steps": 19120, "total_steps": 37885, "loss": 0.0006, "lr": 1.1576356181211223e-06, "epoch": 2.52342615811007, "percentage": 50.47, "elapsed_time": "0:28:26", "remaining_time": "0:27:55", "throughput": 5512.89, "total_tokens": 9409264}
|
|
{"current_steps": 19125, "total_steps": 37885, "loss": 0.1069, "lr": 1.1571806632321941e-06, "epoch": 2.5240860498878184, "percentage": 50.48, "elapsed_time": "0:28:27", "remaining_time": "0:27:54", "throughput": 5513.32, "total_tokens": 9411824}
|
|
{"current_steps": 19130, "total_steps": 37885, "loss": 0.0614, "lr": 1.1567256749828088e-06, "epoch": 2.524745941665567, "percentage": 50.49, "elapsed_time": "0:28:27", "remaining_time": "0:27:53", "throughput": 5513.71, "total_tokens": 9414320}
|
|
{"current_steps": 19135, "total_steps": 37885, "loss": 0.0487, "lr": 1.1562706534695337e-06, "epoch": 2.5254058334433154, "percentage": 50.51, "elapsed_time": "0:28:27", "remaining_time": "0:27:53", "throughput": 5514.03, "total_tokens": 9416688}
|
|
{"current_steps": 19140, "total_steps": 37885, "loss": 0.0504, "lr": 1.1558155987889437e-06, "epoch": 2.5260657252210637, "percentage": 50.52, "elapsed_time": "0:28:28", "remaining_time": "0:27:52", "throughput": 5514.38, "total_tokens": 9419120}
|
|
{"current_steps": 19145, "total_steps": 37885, "loss": 0.0001, "lr": 1.1553605110376216e-06, "epoch": 2.526725616998812, "percentage": 50.53, "elapsed_time": "0:28:28", "remaining_time": "0:27:52", "throughput": 5514.75, "total_tokens": 9421552}
|
|
{"current_steps": 19150, "total_steps": 37885, "loss": 0.0725, "lr": 1.154905390312156e-06, "epoch": 2.5273855087765607, "percentage": 50.55, "elapsed_time": "0:28:28", "remaining_time": "0:27:51", "throughput": 5515.18, "total_tokens": 9424112}
|
|
{"current_steps": 19155, "total_steps": 37885, "loss": 0.0382, "lr": 1.1544502367091428e-06, "epoch": 2.528045400554309, "percentage": 50.56, "elapsed_time": "0:28:29", "remaining_time": "0:27:51", "throughput": 5515.44, "total_tokens": 9426352}
|
|
{"current_steps": 19160, "total_steps": 37885, "loss": 0.0905, "lr": 1.1539950503251858e-06, "epoch": 2.5287052923320577, "percentage": 50.57, "elapsed_time": "0:28:29", "remaining_time": "0:27:50", "throughput": 5515.94, "total_tokens": 9429040}
|
|
{"current_steps": 19165, "total_steps": 37885, "loss": 0.0015, "lr": 1.153539831256894e-06, "epoch": 2.529365184109806, "percentage": 50.59, "elapsed_time": "0:28:29", "remaining_time": "0:27:50", "throughput": 5516.2, "total_tokens": 9431280}
|
|
{"current_steps": 19170, "total_steps": 37885, "loss": 0.023, "lr": 1.1530845796008853e-06, "epoch": 2.530025075887554, "percentage": 50.6, "elapsed_time": "0:28:30", "remaining_time": "0:27:49", "throughput": 5516.53, "total_tokens": 9433648}
|
|
{"current_steps": 19175, "total_steps": 37885, "loss": 0.0277, "lr": 1.1526292954537827e-06, "epoch": 2.530684967665303, "percentage": 50.61, "elapsed_time": "0:28:30", "remaining_time": "0:27:48", "throughput": 5517.06, "total_tokens": 9436400}
|
|
{"current_steps": 19180, "total_steps": 37885, "loss": 0.0384, "lr": 1.1521739789122179e-06, "epoch": 2.531344859443051, "percentage": 50.63, "elapsed_time": "0:28:30", "remaining_time": "0:27:48", "throughput": 5517.46, "total_tokens": 9438896}
|
|
{"current_steps": 19185, "total_steps": 37885, "loss": 0.0001, "lr": 1.1517186300728276e-06, "epoch": 2.5320047512208, "percentage": 50.64, "elapsed_time": "0:28:31", "remaining_time": "0:27:47", "throughput": 5517.71, "total_tokens": 9441136}
|
|
{"current_steps": 19190, "total_steps": 37885, "loss": 0.0023, "lr": 1.151263249032257e-06, "epoch": 2.532664642998548, "percentage": 50.65, "elapsed_time": "0:28:31", "remaining_time": "0:27:47", "throughput": 5517.98, "total_tokens": 9443376}
|
|
{"current_steps": 19195, "total_steps": 37885, "loss": 0.049, "lr": 1.150807835887157e-06, "epoch": 2.5333245347762965, "percentage": 50.67, "elapsed_time": "0:28:31", "remaining_time": "0:27:46", "throughput": 5518.34, "total_tokens": 9445808}
|
|
{"current_steps": 19200, "total_steps": 37885, "loss": 0.1468, "lr": 1.1503523907341858e-06, "epoch": 2.533984426554045, "percentage": 50.68, "elapsed_time": "0:28:32", "remaining_time": "0:27:46", "throughput": 5518.56, "total_tokens": 9447984}
|
|
{"current_steps": 19205, "total_steps": 37885, "loss": 0.0019, "lr": 1.1498969136700087e-06, "epoch": 2.5346443183317935, "percentage": 50.69, "elapsed_time": "0:28:32", "remaining_time": "0:27:45", "throughput": 5518.89, "total_tokens": 9450352}
|
|
{"current_steps": 19210, "total_steps": 37885, "loss": 0.0008, "lr": 1.1494414047912967e-06, "epoch": 2.535304210109542, "percentage": 50.71, "elapsed_time": "0:28:32", "remaining_time": "0:27:44", "throughput": 5519.35, "total_tokens": 9452976}
|
|
{"current_steps": 19215, "total_steps": 37885, "loss": 0.0011, "lr": 1.1489858641947292e-06, "epoch": 2.5359641018872905, "percentage": 50.72, "elapsed_time": "0:28:33", "remaining_time": "0:27:44", "throughput": 5519.79, "total_tokens": 9455536}
|
|
{"current_steps": 19220, "total_steps": 37885, "loss": 0.0336, "lr": 1.1485302919769906e-06, "epoch": 2.5366239936650388, "percentage": 50.73, "elapsed_time": "0:28:33", "remaining_time": "0:27:43", "throughput": 5520.28, "total_tokens": 9458224}
|
|
{"current_steps": 19225, "total_steps": 37885, "loss": 0.0001, "lr": 1.1480746882347733e-06, "epoch": 2.5372838854427875, "percentage": 50.75, "elapsed_time": "0:28:33", "remaining_time": "0:27:43", "throughput": 5520.6, "total_tokens": 9460592}
|
|
{"current_steps": 19230, "total_steps": 37885, "loss": 0.0004, "lr": 1.1476190530647754e-06, "epoch": 2.5379437772205358, "percentage": 50.76, "elapsed_time": "0:28:34", "remaining_time": "0:27:42", "throughput": 5521.04, "total_tokens": 9463152}
|
|
{"current_steps": 19235, "total_steps": 37885, "loss": 0.077, "lr": 1.1471633865637027e-06, "epoch": 2.5386036689982845, "percentage": 50.77, "elapsed_time": "0:28:34", "remaining_time": "0:27:42", "throughput": 5521.47, "total_tokens": 9465712}
|
|
{"current_steps": 19240, "total_steps": 37885, "loss": 0.0003, "lr": 1.146707688828267e-06, "epoch": 2.5392635607760328, "percentage": 50.79, "elapsed_time": "0:28:34", "remaining_time": "0:27:41", "throughput": 5521.96, "total_tokens": 9468400}
|
|
{"current_steps": 19245, "total_steps": 37885, "loss": 0.0, "lr": 1.1462519599551864e-06, "epoch": 2.539923452553781, "percentage": 50.8, "elapsed_time": "0:28:35", "remaining_time": "0:27:41", "throughput": 5522.24, "total_tokens": 9470704}
|
|
{"current_steps": 19250, "total_steps": 37885, "loss": 0.2087, "lr": 1.1457962000411864e-06, "epoch": 2.5405833443315298, "percentage": 50.81, "elapsed_time": "0:28:35", "remaining_time": "0:27:40", "throughput": 5522.5, "total_tokens": 9472944}
|
|
{"current_steps": 19255, "total_steps": 37885, "loss": 0.0457, "lr": 1.1453404091829987e-06, "epoch": 2.541243236109278, "percentage": 50.82, "elapsed_time": "0:28:35", "remaining_time": "0:27:39", "throughput": 5522.89, "total_tokens": 9475440}
|
|
{"current_steps": 19260, "total_steps": 37885, "loss": 0.0014, "lr": 1.1448845874773623e-06, "epoch": 2.5419031278870268, "percentage": 50.84, "elapsed_time": "0:28:35", "remaining_time": "0:27:39", "throughput": 5523.2, "total_tokens": 9477808}
|
|
{"current_steps": 19265, "total_steps": 37885, "loss": 0.0019, "lr": 1.1444287350210208e-06, "epoch": 2.542563019664775, "percentage": 50.85, "elapsed_time": "0:28:36", "remaining_time": "0:27:38", "throughput": 5523.63, "total_tokens": 9480368}
|
|
{"current_steps": 19270, "total_steps": 37885, "loss": 0.0003, "lr": 1.143972851910726e-06, "epoch": 2.5432229114425233, "percentage": 50.86, "elapsed_time": "0:28:36", "remaining_time": "0:27:38", "throughput": 5524.01, "total_tokens": 9482864}
|
|
{"current_steps": 19275, "total_steps": 37885, "loss": 0.0, "lr": 1.143516938243236e-06, "epoch": 2.5438828032202716, "percentage": 50.88, "elapsed_time": "0:28:36", "remaining_time": "0:27:37", "throughput": 5524.47, "total_tokens": 9485488}
|
|
{"current_steps": 19280, "total_steps": 37885, "loss": 0.0001, "lr": 1.1430609941153154e-06, "epoch": 2.5445426949980203, "percentage": 50.89, "elapsed_time": "0:28:37", "remaining_time": "0:27:37", "throughput": 5524.79, "total_tokens": 9487856}
|
|
{"current_steps": 19285, "total_steps": 37885, "loss": 0.0015, "lr": 1.1426050196237347e-06, "epoch": 2.545202586775769, "percentage": 50.9, "elapsed_time": "0:28:37", "remaining_time": "0:27:36", "throughput": 5524.96, "total_tokens": 9489968}
|
|
{"current_steps": 19290, "total_steps": 37885, "loss": 0.0698, "lr": 1.142149014865271e-06, "epoch": 2.5458624785535173, "percentage": 50.92, "elapsed_time": "0:28:37", "remaining_time": "0:27:36", "throughput": 5525.36, "total_tokens": 9492464}
|
|
{"current_steps": 19295, "total_steps": 37885, "loss": 0.0744, "lr": 1.1416929799367086e-06, "epoch": 2.5465223703312656, "percentage": 50.93, "elapsed_time": "0:28:38", "remaining_time": "0:27:35", "throughput": 5525.76, "total_tokens": 9495024}
|
|
{"current_steps": 19300, "total_steps": 37885, "loss": 0.0001, "lr": 1.141236914934837e-06, "epoch": 2.547182262109014, "percentage": 50.94, "elapsed_time": "0:28:38", "remaining_time": "0:27:34", "throughput": 5526.22, "total_tokens": 9497648}
|
|
{"current_steps": 19305, "total_steps": 37885, "loss": 0.0009, "lr": 1.1407808199564532e-06, "epoch": 2.5478421538867626, "percentage": 50.96, "elapsed_time": "0:28:38", "remaining_time": "0:27:34", "throughput": 5526.45, "total_tokens": 9499824}
|
|
{"current_steps": 19310, "total_steps": 37885, "loss": 0.1192, "lr": 1.1403246950983598e-06, "epoch": 2.548502045664511, "percentage": 50.97, "elapsed_time": "0:28:39", "remaining_time": "0:27:33", "throughput": 5526.7, "total_tokens": 9502064}
|
|
{"current_steps": 19315, "total_steps": 37885, "loss": 0.0003, "lr": 1.1398685404573657e-06, "epoch": 2.5491619374422596, "percentage": 50.98, "elapsed_time": "0:28:39", "remaining_time": "0:27:33", "throughput": 5527.08, "total_tokens": 9504560}
|
|
{"current_steps": 19320, "total_steps": 37885, "loss": 0.0002, "lr": 1.139412356130287e-06, "epoch": 2.549821829220008, "percentage": 51.0, "elapsed_time": "0:28:39", "remaining_time": "0:27:32", "throughput": 5527.49, "total_tokens": 9507120}
|
|
{"current_steps": 19325, "total_steps": 37885, "loss": 0.0831, "lr": 1.138956142213945e-06, "epoch": 2.550481720997756, "percentage": 51.01, "elapsed_time": "0:28:40", "remaining_time": "0:27:32", "throughput": 5527.85, "total_tokens": 9509552}
|
|
{"current_steps": 19330, "total_steps": 37885, "loss": 0.0001, "lr": 1.1384998988051684e-06, "epoch": 2.551141612775505, "percentage": 51.02, "elapsed_time": "0:28:40", "remaining_time": "0:27:31", "throughput": 5528.37, "total_tokens": 9512304}
|
|
{"current_steps": 19335, "total_steps": 37885, "loss": 0.0749, "lr": 1.1380436260007914e-06, "epoch": 2.551801504553253, "percentage": 51.04, "elapsed_time": "0:28:40", "remaining_time": "0:27:31", "throughput": 5528.73, "total_tokens": 9514736}
|
|
{"current_steps": 19340, "total_steps": 37885, "loss": 0.0802, "lr": 1.1375873238976542e-06, "epoch": 2.552461396331002, "percentage": 51.05, "elapsed_time": "0:28:41", "remaining_time": "0:27:30", "throughput": 5529.1, "total_tokens": 9517232}
|
|
{"current_steps": 19345, "total_steps": 37885, "loss": 0.0005, "lr": 1.1371309925926034e-06, "epoch": 2.55312128810875, "percentage": 51.06, "elapsed_time": "0:28:41", "remaining_time": "0:27:29", "throughput": 5529.34, "total_tokens": 9519472}
|
|
{"current_steps": 19350, "total_steps": 37885, "loss": 0.0006, "lr": 1.1366746321824928e-06, "epoch": 2.5537811798864984, "percentage": 51.08, "elapsed_time": "0:28:41", "remaining_time": "0:27:29", "throughput": 5529.61, "total_tokens": 9521776}
|
|
{"current_steps": 19355, "total_steps": 37885, "loss": 0.0014, "lr": 1.1362182427641812e-06, "epoch": 2.554441071664247, "percentage": 51.09, "elapsed_time": "0:28:42", "remaining_time": "0:27:28", "throughput": 5529.96, "total_tokens": 9524208}
|
|
{"current_steps": 19360, "total_steps": 37885, "loss": 0.0002, "lr": 1.135761824434534e-06, "epoch": 2.5551009634419954, "percentage": 51.1, "elapsed_time": "0:28:42", "remaining_time": "0:27:28", "throughput": 5530.36, "total_tokens": 9526768}
|
|
{"current_steps": 19365, "total_steps": 37885, "loss": 0.0013, "lr": 1.135305377290423e-06, "epoch": 2.555760855219744, "percentage": 51.12, "elapsed_time": "0:28:42", "remaining_time": "0:27:27", "throughput": 5530.7, "total_tokens": 9529200}
|
|
{"current_steps": 19370, "total_steps": 37885, "loss": 0.1098, "lr": 1.1348489014287248e-06, "epoch": 2.5564207469974924, "percentage": 51.13, "elapsed_time": "0:28:43", "remaining_time": "0:27:27", "throughput": 5531.13, "total_tokens": 9531824}
|
|
{"current_steps": 19375, "total_steps": 37885, "loss": 0.0411, "lr": 1.1343923969463243e-06, "epoch": 2.5570806387752407, "percentage": 51.14, "elapsed_time": "0:28:43", "remaining_time": "0:27:26", "throughput": 5531.44, "total_tokens": 9534192}
|
|
{"current_steps": 19380, "total_steps": 37885, "loss": 0.0, "lr": 1.1339358639401103e-06, "epoch": 2.5577405305529894, "percentage": 51.15, "elapsed_time": "0:28:43", "remaining_time": "0:27:26", "throughput": 5531.82, "total_tokens": 9536688}
|
|
{"current_steps": 19385, "total_steps": 37885, "loss": 0.0505, "lr": 1.1334793025069794e-06, "epoch": 2.5584004223307377, "percentage": 51.17, "elapsed_time": "0:28:44", "remaining_time": "0:27:25", "throughput": 5532.23, "total_tokens": 9539248}
|
|
{"current_steps": 19390, "total_steps": 37885, "loss": 0.0818, "lr": 1.1330227127438332e-06, "epoch": 2.5590603141084864, "percentage": 51.18, "elapsed_time": "0:28:44", "remaining_time": "0:27:25", "throughput": 5532.7, "total_tokens": 9541936}
|
|
{"current_steps": 19395, "total_steps": 37885, "loss": 0.087, "lr": 1.1325660947475792e-06, "epoch": 2.5597202058862347, "percentage": 51.19, "elapsed_time": "0:28:44", "remaining_time": "0:27:24", "throughput": 5532.96, "total_tokens": 9544240}
|
|
{"current_steps": 19400, "total_steps": 37885, "loss": 0.0565, "lr": 1.1321094486151317e-06, "epoch": 2.560380097663983, "percentage": 51.21, "elapsed_time": "0:28:45", "remaining_time": "0:27:23", "throughput": 5533.27, "total_tokens": 9546608}
|
|
{"current_steps": 19405, "total_steps": 37885, "loss": 0.0001, "lr": 1.1316527744434104e-06, "epoch": 2.5610399894417313, "percentage": 51.22, "elapsed_time": "0:28:45", "remaining_time": "0:27:23", "throughput": 5533.62, "total_tokens": 9549040}
|
|
{"current_steps": 19410, "total_steps": 37885, "loss": 0.0177, "lr": 1.131196072329341e-06, "epoch": 2.56169988121948, "percentage": 51.23, "elapsed_time": "0:28:45", "remaining_time": "0:27:22", "throughput": 5534.12, "total_tokens": 9551792}
|
|
{"current_steps": 19415, "total_steps": 37885, "loss": 0.0007, "lr": 1.1307393423698555e-06, "epoch": 2.5623597729972287, "percentage": 51.25, "elapsed_time": "0:28:46", "remaining_time": "0:27:22", "throughput": 5534.57, "total_tokens": 9554480}
|
|
{"current_steps": 19420, "total_steps": 37885, "loss": 0.0, "lr": 1.1302825846618912e-06, "epoch": 2.563019664774977, "percentage": 51.26, "elapsed_time": "0:28:46", "remaining_time": "0:27:21", "throughput": 5534.97, "total_tokens": 9557040}
|
|
{"current_steps": 19425, "total_steps": 37885, "loss": 0.0352, "lr": 1.1298257993023917e-06, "epoch": 2.5636795565527253, "percentage": 51.27, "elapsed_time": "0:28:47", "remaining_time": "0:27:21", "throughput": 5535.37, "total_tokens": 9559600}
|
|
{"current_steps": 19430, "total_steps": 37885, "loss": 0.0736, "lr": 1.1293689863883062e-06, "epoch": 2.5643394483304736, "percentage": 51.29, "elapsed_time": "0:28:47", "remaining_time": "0:27:20", "throughput": 5535.74, "total_tokens": 9562096}
|
|
{"current_steps": 19435, "total_steps": 37885, "loss": 0.0001, "lr": 1.1289121460165907e-06, "epoch": 2.5649993401082223, "percentage": 51.3, "elapsed_time": "0:28:47", "remaining_time": "0:27:20", "throughput": 5536.01, "total_tokens": 9564400}
|
|
{"current_steps": 19440, "total_steps": 37885, "loss": 0.0911, "lr": 1.1284552782842054e-06, "epoch": 2.5656592318859706, "percentage": 51.31, "elapsed_time": "0:28:47", "remaining_time": "0:27:19", "throughput": 5536.33, "total_tokens": 9566768}
|
|
{"current_steps": 19445, "total_steps": 37885, "loss": 0.0004, "lr": 1.1279983832881174e-06, "epoch": 2.5663191236637193, "percentage": 51.33, "elapsed_time": "0:28:48", "remaining_time": "0:27:19", "throughput": 5536.91, "total_tokens": 9569648}
|
|
{"current_steps": 19450, "total_steps": 37885, "loss": 0.0241, "lr": 1.1275414611252996e-06, "epoch": 2.5669790154414676, "percentage": 51.34, "elapsed_time": "0:28:48", "remaining_time": "0:27:18", "throughput": 5537.48, "total_tokens": 9572528}
|
|
{"current_steps": 19455, "total_steps": 37885, "loss": 0.0002, "lr": 1.1270845118927304e-06, "epoch": 2.567638907219216, "percentage": 51.35, "elapsed_time": "0:28:49", "remaining_time": "0:27:17", "throughput": 5537.92, "total_tokens": 9575152}
|
|
{"current_steps": 19460, "total_steps": 37885, "loss": 0.0428, "lr": 1.1266275356873933e-06, "epoch": 2.5682987989969646, "percentage": 51.37, "elapsed_time": "0:28:49", "remaining_time": "0:27:17", "throughput": 5538.32, "total_tokens": 9577712}
|
|
{"current_steps": 19465, "total_steps": 37885, "loss": 0.001, "lr": 1.1261705326062792e-06, "epoch": 2.568958690774713, "percentage": 51.38, "elapsed_time": "0:28:49", "remaining_time": "0:27:16", "throughput": 5538.55, "total_tokens": 9579952}
|
|
{"current_steps": 19470, "total_steps": 37885, "loss": 0.1439, "lr": 1.1257135027463831e-06, "epoch": 2.5696185825524616, "percentage": 51.39, "elapsed_time": "0:28:50", "remaining_time": "0:27:16", "throughput": 5538.96, "total_tokens": 9582512}
|
|
{"current_steps": 19475, "total_steps": 37885, "loss": 0.0001, "lr": 1.1252564462047063e-06, "epoch": 2.57027847433021, "percentage": 51.41, "elapsed_time": "0:28:50", "remaining_time": "0:27:15", "throughput": 5539.12, "total_tokens": 9584624}
|
|
{"current_steps": 19480, "total_steps": 37885, "loss": 0.0707, "lr": 1.124799363078256e-06, "epoch": 2.570938366107958, "percentage": 51.42, "elapsed_time": "0:28:50", "remaining_time": "0:27:15", "throughput": 5539.45, "total_tokens": 9587056}
|
|
{"current_steps": 19485, "total_steps": 37885, "loss": 0.0002, "lr": 1.1243422534640443e-06, "epoch": 2.571598257885707, "percentage": 51.43, "elapsed_time": "0:28:51", "remaining_time": "0:27:14", "throughput": 5539.65, "total_tokens": 9589232}
|
|
{"current_steps": 19490, "total_steps": 37885, "loss": 0.0706, "lr": 1.12388511745909e-06, "epoch": 2.572258149663455, "percentage": 51.45, "elapsed_time": "0:28:51", "remaining_time": "0:27:14", "throughput": 5540.08, "total_tokens": 9591792}
|
|
{"current_steps": 19495, "total_steps": 37885, "loss": 0.0012, "lr": 1.1234279551604164e-06, "epoch": 2.572918041441204, "percentage": 51.46, "elapsed_time": "0:28:51", "remaining_time": "0:27:13", "throughput": 5540.47, "total_tokens": 9594352}
|
|
{"current_steps": 19500, "total_steps": 37885, "loss": 0.0584, "lr": 1.1229707666650531e-06, "epoch": 2.573577933218952, "percentage": 51.47, "elapsed_time": "0:28:52", "remaining_time": "0:27:12", "throughput": 5541.0, "total_tokens": 9597168}
|
|
{"current_steps": 19505, "total_steps": 37885, "loss": 0.0311, "lr": 1.1225135520700355e-06, "epoch": 2.5742378249967004, "percentage": 51.48, "elapsed_time": "0:28:52", "remaining_time": "0:27:12", "throughput": 5541.4, "total_tokens": 9599728}
|
|
{"current_steps": 19510, "total_steps": 37885, "loss": 0.0561, "lr": 1.122056311472403e-06, "epoch": 2.574897716774449, "percentage": 51.5, "elapsed_time": "0:28:52", "remaining_time": "0:27:11", "throughput": 5541.71, "total_tokens": 9602096}
|
|
{"current_steps": 19515, "total_steps": 37885, "loss": 0.0667, "lr": 1.121599044969203e-06, "epoch": 2.5755576085521974, "percentage": 51.51, "elapsed_time": "0:28:53", "remaining_time": "0:27:11", "throughput": 5542.01, "total_tokens": 9604464}
|
|
{"current_steps": 19520, "total_steps": 37885, "loss": 0.0006, "lr": 1.1211417526574858e-06, "epoch": 2.576217500329946, "percentage": 51.52, "elapsed_time": "0:28:53", "remaining_time": "0:27:10", "throughput": 5542.35, "total_tokens": 9606896}
|
|
{"current_steps": 19525, "total_steps": 37885, "loss": 0.0013, "lr": 1.1206844346343089e-06, "epoch": 2.5768773921076944, "percentage": 51.54, "elapsed_time": "0:28:53", "remaining_time": "0:27:10", "throughput": 5542.78, "total_tokens": 9609520}
|
|
{"current_steps": 19530, "total_steps": 37885, "loss": 0.0006, "lr": 1.1202270909967347e-06, "epoch": 2.5775372838854427, "percentage": 51.55, "elapsed_time": "0:28:54", "remaining_time": "0:27:09", "throughput": 5543.32, "total_tokens": 9612336}
|
|
{"current_steps": 19535, "total_steps": 37885, "loss": 0.0861, "lr": 1.119769721841831e-06, "epoch": 2.5781971756631914, "percentage": 51.56, "elapsed_time": "0:28:54", "remaining_time": "0:27:09", "throughput": 5543.42, "total_tokens": 9614320}
|
|
{"current_steps": 19540, "total_steps": 37885, "loss": 0.0004, "lr": 1.119312327266671e-06, "epoch": 2.5788570674409397, "percentage": 51.58, "elapsed_time": "0:28:54", "remaining_time": "0:27:08", "throughput": 5543.95, "total_tokens": 9617136}
|
|
{"current_steps": 19545, "total_steps": 37885, "loss": 0.0001, "lr": 1.1188549073683338e-06, "epoch": 2.5795169592186884, "percentage": 51.59, "elapsed_time": "0:28:55", "remaining_time": "0:27:08", "throughput": 5544.38, "total_tokens": 9619760}
|
|
{"current_steps": 19550, "total_steps": 37885, "loss": 0.0, "lr": 1.1183974622439032e-06, "epoch": 2.5801768509964367, "percentage": 51.6, "elapsed_time": "0:28:55", "remaining_time": "0:27:07", "throughput": 5544.81, "total_tokens": 9622320}
|
|
{"current_steps": 19555, "total_steps": 37885, "loss": 0.0006, "lr": 1.1179399919904683e-06, "epoch": 2.580836742774185, "percentage": 51.62, "elapsed_time": "0:28:55", "remaining_time": "0:27:06", "throughput": 5545.22, "total_tokens": 9624880}
|
|
{"current_steps": 19560, "total_steps": 37885, "loss": 0.0987, "lr": 1.1174824967051244e-06, "epoch": 2.5814966345519332, "percentage": 51.63, "elapsed_time": "0:28:56", "remaining_time": "0:27:06", "throughput": 5545.58, "total_tokens": 9627312}
|
|
{"current_steps": 19565, "total_steps": 37885, "loss": 0.0, "lr": 1.117024976484971e-06, "epoch": 2.582156526329682, "percentage": 51.64, "elapsed_time": "0:28:56", "remaining_time": "0:27:05", "throughput": 5546.07, "total_tokens": 9630000}
|
|
{"current_steps": 19570, "total_steps": 37885, "loss": 0.1392, "lr": 1.1165674314271142e-06, "epoch": 2.5828164181074302, "percentage": 51.66, "elapsed_time": "0:28:56", "remaining_time": "0:27:05", "throughput": 5546.42, "total_tokens": 9632432}
|
|
{"current_steps": 19575, "total_steps": 37885, "loss": 0.1115, "lr": 1.1161098616286641e-06, "epoch": 2.583476309885179, "percentage": 51.67, "elapsed_time": "0:28:57", "remaining_time": "0:27:04", "throughput": 5546.85, "total_tokens": 9634992}
|
|
{"current_steps": 19580, "total_steps": 37885, "loss": 0.0023, "lr": 1.1156522671867366e-06, "epoch": 2.5841362016629272, "percentage": 51.68, "elapsed_time": "0:28:57", "remaining_time": "0:27:04", "throughput": 5547.14, "total_tokens": 9637296}
|
|
{"current_steps": 19585, "total_steps": 37885, "loss": 0.0005, "lr": 1.1151946481984528e-06, "epoch": 2.5847960934406755, "percentage": 51.7, "elapsed_time": "0:28:57", "remaining_time": "0:27:03", "throughput": 5547.47, "total_tokens": 9639664}
|
|
{"current_steps": 19590, "total_steps": 37885, "loss": 0.0971, "lr": 1.1147370047609391e-06, "epoch": 2.5854559852184242, "percentage": 51.71, "elapsed_time": "0:28:58", "remaining_time": "0:27:03", "throughput": 5547.88, "total_tokens": 9642224}
|
|
{"current_steps": 19595, "total_steps": 37885, "loss": 0.1041, "lr": 1.1142793369713273e-06, "epoch": 2.5861158769961725, "percentage": 51.72, "elapsed_time": "0:28:58", "remaining_time": "0:27:02", "throughput": 5548.2, "total_tokens": 9644592}
|
|
{"current_steps": 19600, "total_steps": 37885, "loss": 0.0555, "lr": 1.1138216449267536e-06, "epoch": 2.5867757687739212, "percentage": 51.74, "elapsed_time": "0:28:58", "remaining_time": "0:27:02", "throughput": 5548.62, "total_tokens": 9647152}
|
|
{"current_steps": 19605, "total_steps": 37885, "loss": 0.0006, "lr": 1.11336392872436e-06, "epoch": 2.5874356605516695, "percentage": 51.75, "elapsed_time": "0:28:58", "remaining_time": "0:27:01", "throughput": 5548.98, "total_tokens": 9649584}
|
|
{"current_steps": 19610, "total_steps": 37885, "loss": 0.0612, "lr": 1.112906188461293e-06, "epoch": 2.588095552329418, "percentage": 51.76, "elapsed_time": "0:28:59", "remaining_time": "0:27:00", "throughput": 5549.35, "total_tokens": 9652080}
|
|
{"current_steps": 19615, "total_steps": 37885, "loss": 0.0002, "lr": 1.1124484242347055e-06, "epoch": 2.5887554441071665, "percentage": 51.78, "elapsed_time": "0:28:59", "remaining_time": "0:27:00", "throughput": 5549.67, "total_tokens": 9654448}
|
|
{"current_steps": 19620, "total_steps": 37885, "loss": 0.0007, "lr": 1.1119906361417544e-06, "epoch": 2.589415335884915, "percentage": 51.79, "elapsed_time": "0:28:59", "remaining_time": "0:26:59", "throughput": 5550.09, "total_tokens": 9657008}
|
|
{"current_steps": 19625, "total_steps": 37885, "loss": 0.0618, "lr": 1.1115328242796017e-06, "epoch": 2.5900752276626635, "percentage": 51.8, "elapsed_time": "0:29:00", "remaining_time": "0:26:59", "throughput": 5550.41, "total_tokens": 9659376}
|
|
{"current_steps": 19630, "total_steps": 37885, "loss": 0.0002, "lr": 1.1110749887454146e-06, "epoch": 2.590735119440412, "percentage": 51.81, "elapsed_time": "0:29:00", "remaining_time": "0:26:58", "throughput": 5550.9, "total_tokens": 9662064}
|
|
{"current_steps": 19635, "total_steps": 37885, "loss": 0.0002, "lr": 1.110617129636365e-06, "epoch": 2.59139501121816, "percentage": 51.83, "elapsed_time": "0:29:00", "remaining_time": "0:26:58", "throughput": 5551.03, "total_tokens": 9664112}
|
|
{"current_steps": 19640, "total_steps": 37885, "loss": 0.0611, "lr": 1.1101592470496315e-06, "epoch": 2.592054902995909, "percentage": 51.84, "elapsed_time": "0:29:01", "remaining_time": "0:26:57", "throughput": 5551.23, "total_tokens": 9666288}
|
|
{"current_steps": 19645, "total_steps": 37885, "loss": 0.0007, "lr": 1.1097013410823952e-06, "epoch": 2.592714794773657, "percentage": 51.85, "elapsed_time": "0:29:01", "remaining_time": "0:26:57", "throughput": 5551.48, "total_tokens": 9668528}
|
|
{"current_steps": 19650, "total_steps": 37885, "loss": 0.0945, "lr": 1.1092434118318435e-06, "epoch": 2.593374686551406, "percentage": 51.87, "elapsed_time": "0:29:01", "remaining_time": "0:26:56", "throughput": 5551.77, "total_tokens": 9670832}
|
|
{"current_steps": 19655, "total_steps": 37885, "loss": 0.0826, "lr": 1.1087854593951688e-06, "epoch": 2.594034578329154, "percentage": 51.88, "elapsed_time": "0:29:02", "remaining_time": "0:26:55", "throughput": 5551.98, "total_tokens": 9673008}
|
|
{"current_steps": 19660, "total_steps": 37885, "loss": 0.2127, "lr": 1.108327483869568e-06, "epoch": 2.5946944701069024, "percentage": 51.89, "elapsed_time": "0:29:02", "remaining_time": "0:26:55", "throughput": 5551.57, "total_tokens": 9675568}
|
|
{"current_steps": 19665, "total_steps": 37885, "loss": 0.0008, "lr": 1.1078694853522435e-06, "epoch": 2.595354361884651, "percentage": 51.91, "elapsed_time": "0:29:03", "remaining_time": "0:26:55", "throughput": 5552.01, "total_tokens": 9678192}
|
|
{"current_steps": 19670, "total_steps": 37885, "loss": 0.0001, "lr": 1.1074114639404015e-06, "epoch": 2.5960142536623994, "percentage": 51.92, "elapsed_time": "0:29:03", "remaining_time": "0:26:54", "throughput": 5552.36, "total_tokens": 9680624}
|
|
{"current_steps": 19675, "total_steps": 37885, "loss": 0.0003, "lr": 1.1069534197312544e-06, "epoch": 2.596674145440148, "percentage": 51.93, "elapsed_time": "0:29:03", "remaining_time": "0:26:53", "throughput": 5552.71, "total_tokens": 9683056}
|
|
{"current_steps": 19680, "total_steps": 37885, "loss": 0.091, "lr": 1.1064953528220181e-06, "epoch": 2.5973340372178964, "percentage": 51.95, "elapsed_time": "0:29:04", "remaining_time": "0:26:53", "throughput": 5553.12, "total_tokens": 9685616}
|
|
{"current_steps": 19685, "total_steps": 37885, "loss": 0.0002, "lr": 1.1060372633099146e-06, "epoch": 2.5979939289956446, "percentage": 51.96, "elapsed_time": "0:29:04", "remaining_time": "0:26:52", "throughput": 5553.63, "total_tokens": 9688368}
|
|
{"current_steps": 19690, "total_steps": 37885, "loss": 0.0001, "lr": 1.10557915129217e-06, "epoch": 2.598653820773393, "percentage": 51.97, "elapsed_time": "0:29:04", "remaining_time": "0:26:52", "throughput": 5553.91, "total_tokens": 9690672}
|
|
{"current_steps": 19695, "total_steps": 37885, "loss": 0.0007, "lr": 1.1051210168660146e-06, "epoch": 2.5993137125511416, "percentage": 51.99, "elapsed_time": "0:29:05", "remaining_time": "0:26:51", "throughput": 5554.08, "total_tokens": 9692784}
|
|
{"current_steps": 19700, "total_steps": 37885, "loss": 0.0424, "lr": 1.1046628601286852e-06, "epoch": 2.59997360432889, "percentage": 52.0, "elapsed_time": "0:29:05", "remaining_time": "0:26:51", "throughput": 5554.43, "total_tokens": 9695216}
|
|
{"current_steps": 19705, "total_steps": 37885, "loss": 0.0569, "lr": 1.1042046811774213e-06, "epoch": 2.6006334961066386, "percentage": 52.01, "elapsed_time": "0:29:05", "remaining_time": "0:26:50", "throughput": 5554.67, "total_tokens": 9697456}
|
|
{"current_steps": 19710, "total_steps": 37885, "loss": 0.0568, "lr": 1.1037464801094684e-06, "epoch": 2.601293387884387, "percentage": 52.03, "elapsed_time": "0:29:06", "remaining_time": "0:26:50", "throughput": 5554.95, "total_tokens": 9699760}
|
|
{"current_steps": 19715, "total_steps": 37885, "loss": 0.0536, "lr": 1.1032882570220764e-06, "epoch": 2.601953279662135, "percentage": 52.04, "elapsed_time": "0:29:06", "remaining_time": "0:26:49", "throughput": 5555.25, "total_tokens": 9702128}
|
|
{"current_steps": 19720, "total_steps": 37885, "loss": 0.0003, "lr": 1.1028300120124997e-06, "epoch": 2.602613171439884, "percentage": 52.05, "elapsed_time": "0:29:06", "remaining_time": "0:26:49", "throughput": 5555.83, "total_tokens": 9705008}
|
|
{"current_steps": 19725, "total_steps": 37885, "loss": 0.0, "lr": 1.1023717451779977e-06, "epoch": 2.603273063217632, "percentage": 52.07, "elapsed_time": "0:29:07", "remaining_time": "0:26:48", "throughput": 5556.11, "total_tokens": 9707312}
|
|
{"current_steps": 19730, "total_steps": 37885, "loss": 0.0002, "lr": 1.1019134566158341e-06, "epoch": 2.603932954995381, "percentage": 52.08, "elapsed_time": "0:29:07", "remaining_time": "0:26:47", "throughput": 5556.56, "total_tokens": 9709936}
|
|
{"current_steps": 19735, "total_steps": 37885, "loss": 0.0001, "lr": 1.1014551464232773e-06, "epoch": 2.604592846773129, "percentage": 52.09, "elapsed_time": "0:29:07", "remaining_time": "0:26:47", "throughput": 5556.86, "total_tokens": 9712304}
|
|
{"current_steps": 19740, "total_steps": 37885, "loss": 0.0626, "lr": 1.1009968146976003e-06, "epoch": 2.6052527385508775, "percentage": 52.11, "elapsed_time": "0:29:08", "remaining_time": "0:26:46", "throughput": 5557.22, "total_tokens": 9714736}
|
|
{"current_steps": 19745, "total_steps": 37885, "loss": 0.1001, "lr": 1.100538461536081e-06, "epoch": 2.605912630328626, "percentage": 52.12, "elapsed_time": "0:29:08", "remaining_time": "0:26:46", "throughput": 5557.65, "total_tokens": 9717360}
|
|
{"current_steps": 19750, "total_steps": 37885, "loss": 0.0181, "lr": 1.1000800870360012e-06, "epoch": 2.6065725221063745, "percentage": 52.13, "elapsed_time": "0:29:08", "remaining_time": "0:26:45", "throughput": 5558.1, "total_tokens": 9719984}
|
|
{"current_steps": 19755, "total_steps": 37885, "loss": 0.0023, "lr": 1.0996216912946472e-06, "epoch": 2.607232413884123, "percentage": 52.14, "elapsed_time": "0:29:09", "remaining_time": "0:26:45", "throughput": 5558.41, "total_tokens": 9722352}
|
|
{"current_steps": 19760, "total_steps": 37885, "loss": 0.0001, "lr": 1.099163274409311e-06, "epoch": 2.6078923056618715, "percentage": 52.16, "elapsed_time": "0:29:09", "remaining_time": "0:26:44", "throughput": 5558.79, "total_tokens": 9724848}
|
|
{"current_steps": 19765, "total_steps": 37885, "loss": 0.1899, "lr": 1.098704836477288e-06, "epoch": 2.6085521974396197, "percentage": 52.17, "elapsed_time": "0:29:09", "remaining_time": "0:26:44", "throughput": 5559.24, "total_tokens": 9727472}
|
|
{"current_steps": 19770, "total_steps": 37885, "loss": 0.0, "lr": 1.098246377595878e-06, "epoch": 2.6092120892173685, "percentage": 52.18, "elapsed_time": "0:29:10", "remaining_time": "0:26:43", "throughput": 5559.55, "total_tokens": 9729840}
|
|
{"current_steps": 19775, "total_steps": 37885, "loss": 0.0006, "lr": 1.097787897862386e-06, "epoch": 2.6098719809951167, "percentage": 52.2, "elapsed_time": "0:29:10", "remaining_time": "0:26:43", "throughput": 5560.06, "total_tokens": 9732592}
|
|
{"current_steps": 19780, "total_steps": 37885, "loss": 0.1143, "lr": 1.097329397374121e-06, "epoch": 2.6105318727728655, "percentage": 52.21, "elapsed_time": "0:29:10", "remaining_time": "0:26:42", "throughput": 5560.37, "total_tokens": 9734960}
|
|
{"current_steps": 19785, "total_steps": 37885, "loss": 0.0001, "lr": 1.0968708762283955e-06, "epoch": 2.6111917645506137, "percentage": 52.22, "elapsed_time": "0:29:11", "remaining_time": "0:26:41", "throughput": 5560.69, "total_tokens": 9737328}
|
|
{"current_steps": 19790, "total_steps": 37885, "loss": 0.1288, "lr": 1.0964123345225285e-06, "epoch": 2.611851656328362, "percentage": 52.24, "elapsed_time": "0:29:11", "remaining_time": "0:26:41", "throughput": 5561.1, "total_tokens": 9739888}
|
|
{"current_steps": 19795, "total_steps": 37885, "loss": 0.1315, "lr": 1.0959537723538414e-06, "epoch": 2.6125115481061107, "percentage": 52.25, "elapsed_time": "0:29:11", "remaining_time": "0:26:40", "throughput": 5561.58, "total_tokens": 9742576}
|
|
{"current_steps": 19800, "total_steps": 37885, "loss": 0.014, "lr": 1.0954951898196614e-06, "epoch": 2.613171439883859, "percentage": 52.26, "elapsed_time": "0:29:12", "remaining_time": "0:26:40", "throughput": 5562.06, "total_tokens": 9745264}
|
|
{"current_steps": 19805, "total_steps": 37885, "loss": 0.1164, "lr": 1.0950365870173186e-06, "epoch": 2.6138313316616077, "percentage": 52.28, "elapsed_time": "0:29:12", "remaining_time": "0:26:39", "throughput": 5562.33, "total_tokens": 9747568}
|
|
{"current_steps": 19810, "total_steps": 37885, "loss": 0.0801, "lr": 1.0945779640441484e-06, "epoch": 2.614491223439356, "percentage": 52.29, "elapsed_time": "0:29:12", "remaining_time": "0:26:39", "throughput": 5562.78, "total_tokens": 9750192}
|
|
{"current_steps": 19815, "total_steps": 37885, "loss": 0.0024, "lr": 1.0941193209974902e-06, "epoch": 2.6151511152171043, "percentage": 52.3, "elapsed_time": "0:29:13", "remaining_time": "0:26:38", "throughput": 5563.2, "total_tokens": 9752752}
|
|
{"current_steps": 19820, "total_steps": 37885, "loss": 0.0005, "lr": 1.0936606579746877e-06, "epoch": 2.6158110069948526, "percentage": 52.32, "elapsed_time": "0:29:13", "remaining_time": "0:26:38", "throughput": 5563.71, "total_tokens": 9755504}
|
|
{"current_steps": 19825, "total_steps": 37885, "loss": 0.0253, "lr": 1.0932019750730888e-06, "epoch": 2.6164708987726013, "percentage": 52.33, "elapsed_time": "0:29:13", "remaining_time": "0:26:37", "throughput": 5564.06, "total_tokens": 9757936}
|
|
{"current_steps": 19830, "total_steps": 37885, "loss": 0.0, "lr": 1.0927432723900455e-06, "epoch": 2.6171307905503496, "percentage": 52.34, "elapsed_time": "0:29:14", "remaining_time": "0:26:37", "throughput": 5564.41, "total_tokens": 9760368}
|
|
{"current_steps": 19835, "total_steps": 37885, "loss": 0.0281, "lr": 1.0922845500229143e-06, "epoch": 2.6177906823280983, "percentage": 52.36, "elapsed_time": "0:29:14", "remaining_time": "0:26:36", "throughput": 5564.69, "total_tokens": 9762672}
|
|
{"current_steps": 19840, "total_steps": 37885, "loss": 0.0326, "lr": 1.0918258080690557e-06, "epoch": 2.6184505741058466, "percentage": 52.37, "elapsed_time": "0:29:14", "remaining_time": "0:26:35", "throughput": 5565.01, "total_tokens": 9765040}
|
|
{"current_steps": 19845, "total_steps": 37885, "loss": 0.0002, "lr": 1.0913670466258343e-06, "epoch": 2.619110465883595, "percentage": 52.38, "elapsed_time": "0:29:15", "remaining_time": "0:26:35", "throughput": 5565.38, "total_tokens": 9767536}
|
|
{"current_steps": 19850, "total_steps": 37885, "loss": 0.0271, "lr": 1.090908265790619e-06, "epoch": 2.6197703576613436, "percentage": 52.4, "elapsed_time": "0:29:15", "remaining_time": "0:26:34", "throughput": 5565.77, "total_tokens": 9770032}
|
|
{"current_steps": 19855, "total_steps": 37885, "loss": 0.0798, "lr": 1.0904494656607824e-06, "epoch": 2.620430249439092, "percentage": 52.41, "elapsed_time": "0:29:15", "remaining_time": "0:26:34", "throughput": 5566.2, "total_tokens": 9772656}
|
|
{"current_steps": 19860, "total_steps": 37885, "loss": 0.0459, "lr": 1.0899906463337016e-06, "epoch": 2.6210901412168406, "percentage": 52.42, "elapsed_time": "0:29:16", "remaining_time": "0:26:33", "throughput": 5566.71, "total_tokens": 9775408}
|
|
{"current_steps": 19865, "total_steps": 37885, "loss": 0.0001, "lr": 1.0895318079067576e-06, "epoch": 2.621750032994589, "percentage": 52.44, "elapsed_time": "0:29:16", "remaining_time": "0:26:33", "throughput": 5566.99, "total_tokens": 9777712}
|
|
{"current_steps": 19870, "total_steps": 37885, "loss": 0.1895, "lr": 1.0890729504773359e-06, "epoch": 2.622409924772337, "percentage": 52.45, "elapsed_time": "0:29:16", "remaining_time": "0:26:32", "throughput": 5567.33, "total_tokens": 9780144}
|
|
{"current_steps": 19875, "total_steps": 37885, "loss": 0.0005, "lr": 1.0886140741428257e-06, "epoch": 2.623069816550086, "percentage": 52.46, "elapsed_time": "0:29:17", "remaining_time": "0:26:32", "throughput": 5567.87, "total_tokens": 9782960}
|
|
{"current_steps": 19880, "total_steps": 37885, "loss": 0.0887, "lr": 1.08815517900062e-06, "epoch": 2.623729708327834, "percentage": 52.47, "elapsed_time": "0:29:17", "remaining_time": "0:26:31", "throughput": 5568.08, "total_tokens": 9785136}
|
|
{"current_steps": 19885, "total_steps": 37885, "loss": 0.0035, "lr": 1.0876962651481159e-06, "epoch": 2.624389600105583, "percentage": 52.49, "elapsed_time": "0:29:17", "remaining_time": "0:26:31", "throughput": 5568.5, "total_tokens": 9787696}
|
|
{"current_steps": 19890, "total_steps": 37885, "loss": 0.0009, "lr": 1.0872373326827143e-06, "epoch": 2.625049491883331, "percentage": 52.5, "elapsed_time": "0:29:18", "remaining_time": "0:26:30", "throughput": 5568.88, "total_tokens": 9790192}
|
|
{"current_steps": 19895, "total_steps": 37885, "loss": 0.0338, "lr": 1.0867783817018207e-06, "epoch": 2.6257093836610794, "percentage": 52.51, "elapsed_time": "0:29:18", "remaining_time": "0:26:29", "throughput": 5569.28, "total_tokens": 9792752}
|
|
{"current_steps": 19900, "total_steps": 37885, "loss": 0.1192, "lr": 1.086319412302844e-06, "epoch": 2.626369275438828, "percentage": 52.53, "elapsed_time": "0:29:18", "remaining_time": "0:26:29", "throughput": 5569.73, "total_tokens": 9795376}
|
|
{"current_steps": 19905, "total_steps": 37885, "loss": 0.0018, "lr": 1.085860424583197e-06, "epoch": 2.6270291672165764, "percentage": 52.54, "elapsed_time": "0:29:19", "remaining_time": "0:26:28", "throughput": 5570.1, "total_tokens": 9797872}
|
|
{"current_steps": 19910, "total_steps": 37885, "loss": 0.0488, "lr": 1.0854014186402968e-06, "epoch": 2.627689058994325, "percentage": 52.55, "elapsed_time": "0:29:19", "remaining_time": "0:26:28", "throughput": 5570.5, "total_tokens": 9800432}
|
|
{"current_steps": 19915, "total_steps": 37885, "loss": 0.0001, "lr": 1.0849423945715637e-06, "epoch": 2.6283489507720734, "percentage": 52.57, "elapsed_time": "0:29:19", "remaining_time": "0:26:27", "throughput": 5570.9, "total_tokens": 9802992}
|
|
{"current_steps": 19920, "total_steps": 37885, "loss": 0.0881, "lr": 1.0844833524744226e-06, "epoch": 2.6290088425498217, "percentage": 52.58, "elapsed_time": "0:29:20", "remaining_time": "0:26:27", "throughput": 5571.24, "total_tokens": 9805424}
|
|
{"current_steps": 19925, "total_steps": 37885, "loss": 0.0001, "lr": 1.0840242924463016e-06, "epoch": 2.6296687343275704, "percentage": 52.59, "elapsed_time": "0:29:20", "remaining_time": "0:26:26", "throughput": 5571.68, "total_tokens": 9808048}
|
|
{"current_steps": 19930, "total_steps": 37885, "loss": 0.0007, "lr": 1.0835652145846335e-06, "epoch": 2.6303286261053187, "percentage": 52.61, "elapsed_time": "0:29:20", "remaining_time": "0:26:26", "throughput": 5572.08, "total_tokens": 9810608}
|
|
{"current_steps": 19935, "total_steps": 37885, "loss": 0.1537, "lr": 1.0831061189868531e-06, "epoch": 2.6309885178830674, "percentage": 52.62, "elapsed_time": "0:29:21", "remaining_time": "0:26:25", "throughput": 5572.31, "total_tokens": 9812848}
|
|
{"current_steps": 19940, "total_steps": 37885, "loss": 0.1011, "lr": 1.0826470057504008e-06, "epoch": 2.6316484096608157, "percentage": 52.63, "elapsed_time": "0:29:21", "remaining_time": "0:26:25", "throughput": 5572.55, "total_tokens": 9815088}
|
|
{"current_steps": 19945, "total_steps": 37885, "loss": 0.0767, "lr": 1.0821878749727204e-06, "epoch": 2.632308301438564, "percentage": 52.65, "elapsed_time": "0:29:21", "remaining_time": "0:26:24", "throughput": 5572.89, "total_tokens": 9817520}
|
|
{"current_steps": 19950, "total_steps": 37885, "loss": 0.0003, "lr": 1.0817287267512583e-06, "epoch": 2.6329681932163123, "percentage": 52.66, "elapsed_time": "0:29:21", "remaining_time": "0:26:24", "throughput": 5573.29, "total_tokens": 9820080}
|
|
{"current_steps": 19955, "total_steps": 37885, "loss": 0.1129, "lr": 1.0812695611834664e-06, "epoch": 2.633628084994061, "percentage": 52.67, "elapsed_time": "0:29:22", "remaining_time": "0:26:23", "throughput": 5573.53, "total_tokens": 9822320}
|
|
{"current_steps": 19960, "total_steps": 37885, "loss": 0.0007, "lr": 1.0808103783667981e-06, "epoch": 2.6342879767718093, "percentage": 52.69, "elapsed_time": "0:29:22", "remaining_time": "0:26:22", "throughput": 5573.68, "total_tokens": 9824432}
|
|
{"current_steps": 19965, "total_steps": 37885, "loss": 0.0001, "lr": 1.0803511783987122e-06, "epoch": 2.634947868549558, "percentage": 52.7, "elapsed_time": "0:29:22", "remaining_time": "0:26:22", "throughput": 5574.08, "total_tokens": 9826992}
|
|
{"current_steps": 19970, "total_steps": 37885, "loss": 0.0016, "lr": 1.0798919613766707e-06, "epoch": 2.6356077603273063, "percentage": 52.71, "elapsed_time": "0:29:23", "remaining_time": "0:26:21", "throughput": 5574.41, "total_tokens": 9829424}
|
|
{"current_steps": 19975, "total_steps": 37885, "loss": 0.1057, "lr": 1.079432727398139e-06, "epoch": 2.6362676521050545, "percentage": 52.73, "elapsed_time": "0:29:23", "remaining_time": "0:26:21", "throughput": 5574.86, "total_tokens": 9832048}
|
|
{"current_steps": 19980, "total_steps": 37885, "loss": 0.0002, "lr": 1.078973476560586e-06, "epoch": 2.6369275438828033, "percentage": 52.74, "elapsed_time": "0:29:23", "remaining_time": "0:26:20", "throughput": 5575.13, "total_tokens": 9834352}
|
|
{"current_steps": 19985, "total_steps": 37885, "loss": 0.0002, "lr": 1.0785142089614843e-06, "epoch": 2.6375874356605515, "percentage": 52.75, "elapsed_time": "0:29:24", "remaining_time": "0:26:20", "throughput": 5575.47, "total_tokens": 9836784}
|
|
{"current_steps": 19990, "total_steps": 37885, "loss": 0.0017, "lr": 1.0780549246983105e-06, "epoch": 2.6382473274383003, "percentage": 52.76, "elapsed_time": "0:29:24", "remaining_time": "0:26:19", "throughput": 5575.77, "total_tokens": 9839152}
|
|
{"current_steps": 19995, "total_steps": 37885, "loss": 0.0005, "lr": 1.077595623868544e-06, "epoch": 2.6389072192160485, "percentage": 52.78, "elapsed_time": "0:29:24", "remaining_time": "0:26:19", "throughput": 5576.18, "total_tokens": 9841712}
|
|
{"current_steps": 20000, "total_steps": 37885, "loss": 0.1022, "lr": 1.0771363065696684e-06, "epoch": 2.639567110993797, "percentage": 52.79, "elapsed_time": "0:29:25", "remaining_time": "0:26:18", "throughput": 5576.52, "total_tokens": 9844144}
|
|
{"current_steps": 20005, "total_steps": 37885, "loss": 0.0979, "lr": 1.0766769728991705e-06, "epoch": 2.6402270027715455, "percentage": 52.8, "elapsed_time": "0:29:25", "remaining_time": "0:26:18", "throughput": 5576.84, "total_tokens": 9846512}
|
|
{"current_steps": 20010, "total_steps": 37885, "loss": 0.0752, "lr": 1.0762176229545398e-06, "epoch": 2.640886894549294, "percentage": 52.82, "elapsed_time": "0:29:25", "remaining_time": "0:26:17", "throughput": 5577.28, "total_tokens": 9849136}
|
|
{"current_steps": 20015, "total_steps": 37885, "loss": 0.0312, "lr": 1.0757582568332711e-06, "epoch": 2.6415467863270425, "percentage": 52.83, "elapsed_time": "0:29:26", "remaining_time": "0:26:16", "throughput": 5577.59, "total_tokens": 9851504}
|
|
{"current_steps": 20020, "total_steps": 37885, "loss": 0.0831, "lr": 1.0752988746328607e-06, "epoch": 2.642206678104791, "percentage": 52.84, "elapsed_time": "0:29:26", "remaining_time": "0:26:16", "throughput": 5577.89, "total_tokens": 9853872}
|
|
{"current_steps": 20025, "total_steps": 37885, "loss": 0.0416, "lr": 1.0748394764508095e-06, "epoch": 2.642866569882539, "percentage": 52.86, "elapsed_time": "0:29:26", "remaining_time": "0:26:15", "throughput": 5578.2, "total_tokens": 9856240}
|
|
{"current_steps": 20030, "total_steps": 37885, "loss": 0.0012, "lr": 1.0743800623846213e-06, "epoch": 2.643526461660288, "percentage": 52.87, "elapsed_time": "0:29:27", "remaining_time": "0:26:15", "throughput": 5578.45, "total_tokens": 9858480}
|
|
{"current_steps": 20035, "total_steps": 37885, "loss": 0.0004, "lr": 1.0739206325318038e-06, "epoch": 2.644186353438036, "percentage": 52.88, "elapsed_time": "0:29:27", "remaining_time": "0:26:14", "throughput": 5578.73, "total_tokens": 9860784}
|
|
{"current_steps": 20040, "total_steps": 37885, "loss": 0.0001, "lr": 1.0734611869898668e-06, "epoch": 2.644846245215785, "percentage": 52.9, "elapsed_time": "0:29:27", "remaining_time": "0:26:14", "throughput": 5579.17, "total_tokens": 9863408}
|
|
{"current_steps": 20045, "total_steps": 37885, "loss": 0.0003, "lr": 1.0730017258563253e-06, "epoch": 2.645506136993533, "percentage": 52.91, "elapsed_time": "0:29:28", "remaining_time": "0:26:13", "throughput": 5579.52, "total_tokens": 9865840}
|
|
{"current_steps": 20050, "total_steps": 37885, "loss": 0.0004, "lr": 1.0725422492286957e-06, "epoch": 2.6461660287712814, "percentage": 52.92, "elapsed_time": "0:29:28", "remaining_time": "0:26:13", "throughput": 5579.84, "total_tokens": 9868208}
|
|
{"current_steps": 20055, "total_steps": 37885, "loss": 0.0818, "lr": 1.0720827572044995e-06, "epoch": 2.64682592054903, "percentage": 52.94, "elapsed_time": "0:29:28", "remaining_time": "0:26:12", "throughput": 5580.01, "total_tokens": 9870320}
|
|
{"current_steps": 20060, "total_steps": 37885, "loss": 0.0457, "lr": 1.0716232498812598e-06, "epoch": 2.6474858123267784, "percentage": 52.95, "elapsed_time": "0:29:29", "remaining_time": "0:26:12", "throughput": 5580.35, "total_tokens": 9872752}
|
|
{"current_steps": 20065, "total_steps": 37885, "loss": 0.0002, "lr": 1.0711637273565037e-06, "epoch": 2.648145704104527, "percentage": 52.96, "elapsed_time": "0:29:29", "remaining_time": "0:26:11", "throughput": 5580.79, "total_tokens": 9875376}
|
|
{"current_steps": 20070, "total_steps": 37885, "loss": 0.002, "lr": 1.0707041897277623e-06, "epoch": 2.6488055958822754, "percentage": 52.98, "elapsed_time": "0:29:29", "remaining_time": "0:26:11", "throughput": 5581.06, "total_tokens": 9877680}
|
|
{"current_steps": 20075, "total_steps": 37885, "loss": 0.1485, "lr": 1.0702446370925682e-06, "epoch": 2.6494654876600237, "percentage": 52.99, "elapsed_time": "0:29:30", "remaining_time": "0:26:10", "throughput": 5581.43, "total_tokens": 9880176}
|
|
{"current_steps": 20080, "total_steps": 37885, "loss": 0.0, "lr": 1.069785069548459e-06, "epoch": 2.650125379437772, "percentage": 53.0, "elapsed_time": "0:29:30", "remaining_time": "0:26:09", "throughput": 5581.81, "total_tokens": 9882672}
|
|
{"current_steps": 20085, "total_steps": 37885, "loss": 0.0884, "lr": 1.0693254871929737e-06, "epoch": 2.6507852712155207, "percentage": 53.02, "elapsed_time": "0:29:30", "remaining_time": "0:26:09", "throughput": 5582.19, "total_tokens": 9885168}
|
|
{"current_steps": 20090, "total_steps": 37885, "loss": 0.0369, "lr": 1.068865890123656e-06, "epoch": 2.6514451629932694, "percentage": 53.03, "elapsed_time": "0:29:31", "remaining_time": "0:26:08", "throughput": 5582.59, "total_tokens": 9887728}
|
|
{"current_steps": 20095, "total_steps": 37885, "loss": 0.0975, "lr": 1.068406278438052e-06, "epoch": 2.6521050547710177, "percentage": 53.04, "elapsed_time": "0:29:31", "remaining_time": "0:26:08", "throughput": 5582.8, "total_tokens": 9889904}
|
|
{"current_steps": 20100, "total_steps": 37885, "loss": 0.0004, "lr": 1.0679466522337102e-06, "epoch": 2.652764946548766, "percentage": 53.06, "elapsed_time": "0:29:31", "remaining_time": "0:26:07", "throughput": 5583.1, "total_tokens": 9892272}
|
|
{"current_steps": 20105, "total_steps": 37885, "loss": 0.0012, "lr": 1.0674870116081838e-06, "epoch": 2.653424838326514, "percentage": 53.07, "elapsed_time": "0:29:32", "remaining_time": "0:26:07", "throughput": 5583.51, "total_tokens": 9894832}
|
|
{"current_steps": 20110, "total_steps": 37885, "loss": 0.0145, "lr": 1.067027356659028e-06, "epoch": 2.654084730104263, "percentage": 53.08, "elapsed_time": "0:29:32", "remaining_time": "0:26:06", "throughput": 5583.97, "total_tokens": 9897520}
|
|
{"current_steps": 20115, "total_steps": 37885, "loss": 0.0002, "lr": 1.066567687483801e-06, "epoch": 2.654744621882011, "percentage": 53.09, "elapsed_time": "0:29:32", "remaining_time": "0:26:06", "throughput": 5584.17, "total_tokens": 9899696}
|
|
{"current_steps": 20120, "total_steps": 37885, "loss": 0.0003, "lr": 1.0661080041800642e-06, "epoch": 2.65540451365976, "percentage": 53.11, "elapsed_time": "0:29:33", "remaining_time": "0:26:05", "throughput": 5584.68, "total_tokens": 9902448}
|
|
{"current_steps": 20125, "total_steps": 37885, "loss": 0.0975, "lr": 1.0656483068453828e-06, "epoch": 2.656064405437508, "percentage": 53.12, "elapsed_time": "0:29:33", "remaining_time": "0:26:05", "throughput": 5585.01, "total_tokens": 9904880}
|
|
{"current_steps": 20130, "total_steps": 37885, "loss": 0.0077, "lr": 1.065188595577323e-06, "epoch": 2.6567242972152565, "percentage": 53.13, "elapsed_time": "0:29:33", "remaining_time": "0:26:04", "throughput": 5585.39, "total_tokens": 9907376}
|
|
{"current_steps": 20135, "total_steps": 37885, "loss": 0.0001, "lr": 1.0647288704734563e-06, "epoch": 2.657384188993005, "percentage": 53.15, "elapsed_time": "0:29:34", "remaining_time": "0:26:03", "throughput": 5585.83, "total_tokens": 9910000}
|
|
{"current_steps": 20140, "total_steps": 37885, "loss": 0.0893, "lr": 1.0642691316313556e-06, "epoch": 2.6580440807707535, "percentage": 53.16, "elapsed_time": "0:29:34", "remaining_time": "0:26:03", "throughput": 5586.14, "total_tokens": 9912368}
|
|
{"current_steps": 20145, "total_steps": 37885, "loss": 0.057, "lr": 1.0638093791485964e-06, "epoch": 2.658703972548502, "percentage": 53.17, "elapsed_time": "0:29:34", "remaining_time": "0:26:02", "throughput": 5586.41, "total_tokens": 9914672}
|
|
{"current_steps": 20150, "total_steps": 37885, "loss": 0.0003, "lr": 1.0633496131227593e-06, "epoch": 2.6593638643262505, "percentage": 53.19, "elapsed_time": "0:29:35", "remaining_time": "0:26:02", "throughput": 5586.75, "total_tokens": 9917104}
|
|
{"current_steps": 20155, "total_steps": 37885, "loss": 0.0001, "lr": 1.0628898336514252e-06, "epoch": 2.6600237561039988, "percentage": 53.2, "elapsed_time": "0:29:35", "remaining_time": "0:26:01", "throughput": 5587.08, "total_tokens": 9919536}
|
|
{"current_steps": 20160, "total_steps": 37885, "loss": 0.0844, "lr": 1.0624300408321795e-06, "epoch": 2.6606836478817475, "percentage": 53.21, "elapsed_time": "0:29:35", "remaining_time": "0:26:01", "throughput": 5587.28, "total_tokens": 9921712}
|
|
{"current_steps": 20165, "total_steps": 37885, "loss": 0.0596, "lr": 1.0619702347626098e-06, "epoch": 2.6613435396594958, "percentage": 53.23, "elapsed_time": "0:29:36", "remaining_time": "0:26:00", "throughput": 5587.61, "total_tokens": 9924144}
|
|
{"current_steps": 20170, "total_steps": 37885, "loss": 0.0002, "lr": 1.0615104155403063e-06, "epoch": 2.6620034314372445, "percentage": 53.24, "elapsed_time": "0:29:36", "remaining_time": "0:26:00", "throughput": 5587.98, "total_tokens": 9926640}
|
|
{"current_steps": 20175, "total_steps": 37885, "loss": 0.1212, "lr": 1.0610505832628626e-06, "epoch": 2.6626633232149928, "percentage": 53.25, "elapsed_time": "0:29:36", "remaining_time": "0:25:59", "throughput": 5588.32, "total_tokens": 9929072}
|
|
{"current_steps": 20180, "total_steps": 37885, "loss": 0.0673, "lr": 1.0605907380278745e-06, "epoch": 2.663323214992741, "percentage": 53.27, "elapsed_time": "0:29:37", "remaining_time": "0:25:59", "throughput": 5588.6, "total_tokens": 9931376}
|
|
{"current_steps": 20185, "total_steps": 37885, "loss": 0.0001, "lr": 1.0601308799329413e-06, "epoch": 2.6639831067704898, "percentage": 53.28, "elapsed_time": "0:29:37", "remaining_time": "0:25:58", "throughput": 5588.8, "total_tokens": 9933552}
|
|
{"current_steps": 20190, "total_steps": 37885, "loss": 0.1238, "lr": 1.0596710090756641e-06, "epoch": 2.664642998548238, "percentage": 53.29, "elapsed_time": "0:29:37", "remaining_time": "0:25:58", "throughput": 5589.01, "total_tokens": 9935728}
|
|
{"current_steps": 20195, "total_steps": 37885, "loss": 0.0167, "lr": 1.0592111255536478e-06, "epoch": 2.6653028903259868, "percentage": 53.31, "elapsed_time": "0:29:38", "remaining_time": "0:25:57", "throughput": 5589.25, "total_tokens": 9937968}
|
|
{"current_steps": 20200, "total_steps": 37885, "loss": 0.1255, "lr": 1.0587512294644982e-06, "epoch": 2.665962782103735, "percentage": 53.32, "elapsed_time": "0:29:38", "remaining_time": "0:25:56", "throughput": 5589.53, "total_tokens": 9940272}
|
|
{"current_steps": 20205, "total_steps": 37885, "loss": 0.0168, "lr": 1.0582913209058257e-06, "epoch": 2.6666226738814833, "percentage": 53.33, "elapsed_time": "0:29:38", "remaining_time": "0:25:56", "throughput": 5589.9, "total_tokens": 9942768}
|
|
{"current_steps": 20210, "total_steps": 37885, "loss": 0.2539, "lr": 1.0578313999752427e-06, "epoch": 2.667282565659232, "percentage": 53.35, "elapsed_time": "0:29:39", "remaining_time": "0:25:55", "throughput": 5590.38, "total_tokens": 9945456}
|
|
{"current_steps": 20215, "total_steps": 37885, "loss": 0.001, "lr": 1.0573714667703638e-06, "epoch": 2.6679424574369803, "percentage": 53.36, "elapsed_time": "0:29:39", "remaining_time": "0:25:55", "throughput": 5590.84, "total_tokens": 9948144}
|
|
{"current_steps": 20220, "total_steps": 37885, "loss": 0.0813, "lr": 1.0569115213888067e-06, "epoch": 2.668602349214729, "percentage": 53.37, "elapsed_time": "0:29:39", "remaining_time": "0:25:54", "throughput": 5591.31, "total_tokens": 9950832}
|
|
{"current_steps": 20225, "total_steps": 37885, "loss": 0.1883, "lr": 1.0564515639281911e-06, "epoch": 2.6692622409924773, "percentage": 53.39, "elapsed_time": "0:29:40", "remaining_time": "0:25:54", "throughput": 5591.71, "total_tokens": 9953392}
|
|
{"current_steps": 20230, "total_steps": 37885, "loss": 0.0004, "lr": 1.0559915944861397e-06, "epoch": 2.6699221327702256, "percentage": 53.4, "elapsed_time": "0:29:40", "remaining_time": "0:25:53", "throughput": 5592.15, "total_tokens": 9956016}
|
|
{"current_steps": 20235, "total_steps": 37885, "loss": 0.0942, "lr": 1.0555316131602778e-06, "epoch": 2.670582024547974, "percentage": 53.41, "elapsed_time": "0:29:40", "remaining_time": "0:25:53", "throughput": 5592.52, "total_tokens": 9958512}
|
|
{"current_steps": 20240, "total_steps": 37885, "loss": 0.0016, "lr": 1.0550716200482335e-06, "epoch": 2.6712419163257226, "percentage": 53.42, "elapsed_time": "0:29:41", "remaining_time": "0:25:52", "throughput": 5592.89, "total_tokens": 9961008}
|
|
{"current_steps": 20245, "total_steps": 37885, "loss": 0.0428, "lr": 1.0546116152476366e-06, "epoch": 2.671901808103471, "percentage": 53.44, "elapsed_time": "0:29:41", "remaining_time": "0:25:52", "throughput": 5593.28, "total_tokens": 9963568}
|
|
{"current_steps": 20250, "total_steps": 37885, "loss": 0.1266, "lr": 1.0541515988561195e-06, "epoch": 2.6725616998812196, "percentage": 53.45, "elapsed_time": "0:29:41", "remaining_time": "0:25:51", "throughput": 5593.52, "total_tokens": 9965808}
|
|
{"current_steps": 20255, "total_steps": 37885, "loss": 0.0373, "lr": 1.053691570971318e-06, "epoch": 2.673221591658968, "percentage": 53.46, "elapsed_time": "0:29:42", "remaining_time": "0:25:51", "throughput": 5593.88, "total_tokens": 9968304}
|
|
{"current_steps": 20260, "total_steps": 37885, "loss": 0.0505, "lr": 1.0532315316908691e-06, "epoch": 2.673881483436716, "percentage": 53.48, "elapsed_time": "0:29:42", "remaining_time": "0:25:50", "throughput": 5594.16, "total_tokens": 9970608}
|
|
{"current_steps": 20265, "total_steps": 37885, "loss": 0.0933, "lr": 1.0527714811124132e-06, "epoch": 2.674541375214465, "percentage": 53.49, "elapsed_time": "0:29:42", "remaining_time": "0:25:49", "throughput": 5594.46, "total_tokens": 9972976}
|
|
{"current_steps": 20270, "total_steps": 37885, "loss": 0.0012, "lr": 1.0523114193335926e-06, "epoch": 2.675201266992213, "percentage": 53.5, "elapsed_time": "0:29:42", "remaining_time": "0:25:49", "throughput": 5594.83, "total_tokens": 9975472}
|
|
{"current_steps": 20275, "total_steps": 37885, "loss": 0.1191, "lr": 1.051851346452052e-06, "epoch": 2.675861158769962, "percentage": 53.52, "elapsed_time": "0:29:43", "remaining_time": "0:25:48", "throughput": 5595.13, "total_tokens": 9977840}
|
|
{"current_steps": 20280, "total_steps": 37885, "loss": 0.0011, "lr": 1.0513912625654386e-06, "epoch": 2.67652105054771, "percentage": 53.53, "elapsed_time": "0:29:43", "remaining_time": "0:25:48", "throughput": 5595.36, "total_tokens": 9980080}
|
|
{"current_steps": 20285, "total_steps": 37885, "loss": 0.0008, "lr": 1.0509311677714016e-06, "epoch": 2.6771809423254584, "percentage": 53.54, "elapsed_time": "0:29:43", "remaining_time": "0:25:47", "throughput": 5595.63, "total_tokens": 9982384}
|
|
{"current_steps": 20290, "total_steps": 37885, "loss": 0.0005, "lr": 1.050471062167594e-06, "epoch": 2.677840834103207, "percentage": 53.56, "elapsed_time": "0:29:44", "remaining_time": "0:25:47", "throughput": 5596.14, "total_tokens": 9985136}
|
|
{"current_steps": 20295, "total_steps": 37885, "loss": 0.1043, "lr": 1.050010945851668e-06, "epoch": 2.6785007258809554, "percentage": 53.57, "elapsed_time": "0:29:44", "remaining_time": "0:25:46", "throughput": 5596.57, "total_tokens": 9987760}
|
|
{"current_steps": 20300, "total_steps": 37885, "loss": 0.0001, "lr": 1.049550818921281e-06, "epoch": 2.679160617658704, "percentage": 53.58, "elapsed_time": "0:29:44", "remaining_time": "0:25:46", "throughput": 5596.97, "total_tokens": 9990320}
|
|
{"current_steps": 20305, "total_steps": 37885, "loss": 0.0004, "lr": 1.0490906814740916e-06, "epoch": 2.6798205094364524, "percentage": 53.6, "elapsed_time": "0:29:45", "remaining_time": "0:25:45", "throughput": 5597.33, "total_tokens": 9992816}
|
|
{"current_steps": 20310, "total_steps": 37885, "loss": 0.0006, "lr": 1.0486305336077609e-06, "epoch": 2.6804804012142007, "percentage": 53.61, "elapsed_time": "0:29:45", "remaining_time": "0:25:45", "throughput": 5597.59, "total_tokens": 9995120}
|
|
{"current_steps": 20315, "total_steps": 37885, "loss": 0.1659, "lr": 1.0481703754199513e-06, "epoch": 2.6811402929919494, "percentage": 53.62, "elapsed_time": "0:29:45", "remaining_time": "0:25:44", "throughput": 5597.89, "total_tokens": 9997488}
|
|
{"current_steps": 20320, "total_steps": 37885, "loss": 0.0007, "lr": 1.047710207008328e-06, "epoch": 2.6818001847696977, "percentage": 53.64, "elapsed_time": "0:29:46", "remaining_time": "0:25:44", "throughput": 5598.23, "total_tokens": 9999920}
|
|
{"current_steps": 20325, "total_steps": 37885, "loss": 0.1558, "lr": 1.0472500284705595e-06, "epoch": 2.6824600765474464, "percentage": 53.65, "elapsed_time": "0:29:46", "remaining_time": "0:25:43", "throughput": 5598.57, "total_tokens": 10002352}
|
|
{"current_steps": 20330, "total_steps": 37885, "loss": 0.0008, "lr": 1.046789839904314e-06, "epoch": 2.6831199683251947, "percentage": 53.66, "elapsed_time": "0:29:46", "remaining_time": "0:25:43", "throughput": 5598.8, "total_tokens": 10004592}
|
|
{"current_steps": 20335, "total_steps": 37885, "loss": 0.038, "lr": 1.0463296414072641e-06, "epoch": 2.683779860102943, "percentage": 53.68, "elapsed_time": "0:29:47", "remaining_time": "0:25:42", "throughput": 5599.14, "total_tokens": 10007024}
|
|
{"current_steps": 20340, "total_steps": 37885, "loss": 0.0877, "lr": 1.0458694330770832e-06, "epoch": 2.6844397518806917, "percentage": 53.69, "elapsed_time": "0:29:47", "remaining_time": "0:25:41", "throughput": 5599.61, "total_tokens": 10009712}
|
|
{"current_steps": 20345, "total_steps": 37885, "loss": 0.0001, "lr": 1.0454092150114473e-06, "epoch": 2.68509964365844, "percentage": 53.7, "elapsed_time": "0:29:47", "remaining_time": "0:25:41", "throughput": 5600.08, "total_tokens": 10012400}
|
|
{"current_steps": 20350, "total_steps": 37885, "loss": 0.0013, "lr": 1.0449489873080344e-06, "epoch": 2.6857595354361887, "percentage": 53.72, "elapsed_time": "0:29:48", "remaining_time": "0:25:40", "throughput": 5600.32, "total_tokens": 10014640}
|
|
{"current_steps": 20355, "total_steps": 37885, "loss": 0.0002, "lr": 1.0444887500645244e-06, "epoch": 2.686419427213937, "percentage": 53.73, "elapsed_time": "0:29:48", "remaining_time": "0:25:40", "throughput": 5600.72, "total_tokens": 10017200}
|
|
{"current_steps": 20360, "total_steps": 37885, "loss": 0.0006, "lr": 1.0440285033785994e-06, "epoch": 2.6870793189916853, "percentage": 53.74, "elapsed_time": "0:29:48", "remaining_time": "0:25:39", "throughput": 5601.18, "total_tokens": 10019888}
|
|
{"current_steps": 20365, "total_steps": 37885, "loss": 0.0873, "lr": 1.0435682473479433e-06, "epoch": 2.6877392107694336, "percentage": 53.75, "elapsed_time": "0:29:49", "remaining_time": "0:25:39", "throughput": 5601.38, "total_tokens": 10022064}
|
|
{"current_steps": 20370, "total_steps": 37885, "loss": 0.0001, "lr": 1.0431079820702425e-06, "epoch": 2.6883991025471823, "percentage": 53.77, "elapsed_time": "0:29:49", "remaining_time": "0:25:38", "throughput": 5601.72, "total_tokens": 10024496}
|
|
{"current_steps": 20375, "total_steps": 37885, "loss": 0.0003, "lr": 1.042647707643184e-06, "epoch": 2.6890589943249306, "percentage": 53.78, "elapsed_time": "0:29:49", "remaining_time": "0:25:38", "throughput": 5602.13, "total_tokens": 10027056}
|
|
{"current_steps": 20380, "total_steps": 37885, "loss": 0.0535, "lr": 1.0421874241644591e-06, "epoch": 2.6897188861026793, "percentage": 53.79, "elapsed_time": "0:29:50", "remaining_time": "0:25:37", "throughput": 5602.53, "total_tokens": 10029616}
|
|
{"current_steps": 20385, "total_steps": 37885, "loss": 0.0001, "lr": 1.0417271317317585e-06, "epoch": 2.6903787778804276, "percentage": 53.81, "elapsed_time": "0:29:50", "remaining_time": "0:25:37", "throughput": 5603.0, "total_tokens": 10032304}
|
|
{"current_steps": 20390, "total_steps": 37885, "loss": 0.0001, "lr": 1.0412668304427766e-06, "epoch": 2.691038669658176, "percentage": 53.82, "elapsed_time": "0:29:50", "remaining_time": "0:25:36", "throughput": 5603.36, "total_tokens": 10034800}
|
|
{"current_steps": 20395, "total_steps": 37885, "loss": 0.0005, "lr": 1.0408065203952086e-06, "epoch": 2.6916985614359246, "percentage": 53.83, "elapsed_time": "0:29:51", "remaining_time": "0:25:36", "throughput": 5603.8, "total_tokens": 10037424}
|
|
{"current_steps": 20400, "total_steps": 37885, "loss": 0.0002, "lr": 1.040346201686752e-06, "epoch": 2.692358453213673, "percentage": 53.85, "elapsed_time": "0:29:51", "remaining_time": "0:25:35", "throughput": 5604.2, "total_tokens": 10039984}
|
|
{"current_steps": 20405, "total_steps": 37885, "loss": 0.0596, "lr": 1.0398858744151067e-06, "epoch": 2.6930183449914216, "percentage": 53.86, "elapsed_time": "0:29:51", "remaining_time": "0:25:34", "throughput": 5604.65, "total_tokens": 10042672}
|
|
{"current_steps": 20410, "total_steps": 37885, "loss": 0.0004, "lr": 1.0394255386779728e-06, "epoch": 2.69367823676917, "percentage": 53.87, "elapsed_time": "0:29:52", "remaining_time": "0:25:34", "throughput": 5604.88, "total_tokens": 10044912}
|
|
{"current_steps": 20415, "total_steps": 37885, "loss": 0.0016, "lr": 1.0389651945730545e-06, "epoch": 2.694338128546918, "percentage": 53.89, "elapsed_time": "0:29:52", "remaining_time": "0:25:33", "throughput": 5605.15, "total_tokens": 10047216}
|
|
{"current_steps": 20420, "total_steps": 37885, "loss": 0.1896, "lr": 1.0385048421980554e-06, "epoch": 2.694998020324667, "percentage": 53.9, "elapsed_time": "0:29:52", "remaining_time": "0:25:33", "throughput": 5605.48, "total_tokens": 10049648}
|
|
{"current_steps": 20425, "total_steps": 37885, "loss": 0.0583, "lr": 1.0380444816506822e-06, "epoch": 2.695657912102415, "percentage": 53.91, "elapsed_time": "0:29:53", "remaining_time": "0:25:32", "throughput": 5605.88, "total_tokens": 10052208}
|
|
{"current_steps": 20430, "total_steps": 37885, "loss": 0.0242, "lr": 1.0375841130286436e-06, "epoch": 2.696317803880164, "percentage": 53.93, "elapsed_time": "0:29:53", "remaining_time": "0:25:32", "throughput": 5606.21, "total_tokens": 10054640}
|
|
{"current_steps": 20435, "total_steps": 37885, "loss": 0.1204, "lr": 1.0371237364296491e-06, "epoch": 2.696977695657912, "percentage": 53.94, "elapsed_time": "0:29:53", "remaining_time": "0:25:31", "throughput": 5606.54, "total_tokens": 10057072}
|
|
{"current_steps": 20440, "total_steps": 37885, "loss": 0.0581, "lr": 1.0366633519514104e-06, "epoch": 2.6976375874356604, "percentage": 53.95, "elapsed_time": "0:29:54", "remaining_time": "0:25:31", "throughput": 5606.81, "total_tokens": 10059376}
|
|
{"current_steps": 20445, "total_steps": 37885, "loss": 0.0596, "lr": 1.0362029596916407e-06, "epoch": 2.698297479213409, "percentage": 53.97, "elapsed_time": "0:29:54", "remaining_time": "0:25:30", "throughput": 5607.21, "total_tokens": 10061936}
|
|
{"current_steps": 20450, "total_steps": 37885, "loss": 0.0001, "lr": 1.0357425597480548e-06, "epoch": 2.6989573709911574, "percentage": 53.98, "elapsed_time": "0:29:54", "remaining_time": "0:25:30", "throughput": 5607.48, "total_tokens": 10064240}
|
|
{"current_steps": 20455, "total_steps": 37885, "loss": 0.0458, "lr": 1.0352821522183697e-06, "epoch": 2.699617262768906, "percentage": 53.99, "elapsed_time": "0:29:55", "remaining_time": "0:25:29", "throughput": 5607.78, "total_tokens": 10066608}
|
|
{"current_steps": 20460, "total_steps": 37885, "loss": 0.0203, "lr": 1.0348217372003032e-06, "epoch": 2.7002771545466544, "percentage": 54.01, "elapsed_time": "0:29:55", "remaining_time": "0:25:29", "throughput": 5608.01, "total_tokens": 10068848}
|
|
{"current_steps": 20465, "total_steps": 37885, "loss": 0.0227, "lr": 1.0343613147915748e-06, "epoch": 2.7009370463244027, "percentage": 54.02, "elapsed_time": "0:29:55", "remaining_time": "0:25:28", "throughput": 5608.27, "total_tokens": 10071152}
|
|
{"current_steps": 20470, "total_steps": 37885, "loss": 0.0001, "lr": 1.0339008850899067e-06, "epoch": 2.7015969381021514, "percentage": 54.03, "elapsed_time": "0:29:56", "remaining_time": "0:25:28", "throughput": 5608.66, "total_tokens": 10073712}
|
|
{"current_steps": 20475, "total_steps": 37885, "loss": 0.0004, "lr": 1.033440448193021e-06, "epoch": 2.7022568298798997, "percentage": 54.05, "elapsed_time": "0:29:56", "remaining_time": "0:25:27", "throughput": 5609.06, "total_tokens": 10076272}
|
|
{"current_steps": 20480, "total_steps": 37885, "loss": 0.0001, "lr": 1.0329800041986423e-06, "epoch": 2.7029167216576484, "percentage": 54.06, "elapsed_time": "0:29:56", "remaining_time": "0:25:26", "throughput": 5609.25, "total_tokens": 10078448}
|
|
{"current_steps": 20485, "total_steps": 37885, "loss": 0.0009, "lr": 1.0325195532044966e-06, "epoch": 2.7035766134353967, "percentage": 54.07, "elapsed_time": "0:29:57", "remaining_time": "0:25:26", "throughput": 5609.64, "total_tokens": 10081008}
|
|
{"current_steps": 20490, "total_steps": 37885, "loss": 0.0001, "lr": 1.032059095308311e-06, "epoch": 2.704236505213145, "percentage": 54.08, "elapsed_time": "0:29:57", "remaining_time": "0:25:25", "throughput": 5609.91, "total_tokens": 10083312}
|
|
{"current_steps": 20495, "total_steps": 37885, "loss": 0.0, "lr": 1.0315986306078149e-06, "epoch": 2.7048963969908932, "percentage": 54.1, "elapsed_time": "0:29:57", "remaining_time": "0:25:25", "throughput": 5610.45, "total_tokens": 10086192}
|
|
{"current_steps": 20500, "total_steps": 37885, "loss": 0.097, "lr": 1.031138159200738e-06, "epoch": 2.705556288768642, "percentage": 54.11, "elapsed_time": "0:29:58", "remaining_time": "0:25:24", "throughput": 5610.68, "total_tokens": 10088432}
|
|
{"current_steps": 20505, "total_steps": 37885, "loss": 0.0813, "lr": 1.0306776811848124e-06, "epoch": 2.7062161805463902, "percentage": 54.12, "elapsed_time": "0:29:58", "remaining_time": "0:25:24", "throughput": 5611.1, "total_tokens": 10091056}
|
|
{"current_steps": 20510, "total_steps": 37885, "loss": 0.0367, "lr": 1.030217196657771e-06, "epoch": 2.706876072324139, "percentage": 54.14, "elapsed_time": "0:29:58", "remaining_time": "0:25:23", "throughput": 5611.47, "total_tokens": 10093552}
|
|
{"current_steps": 20515, "total_steps": 37885, "loss": 0.0882, "lr": 1.0297567057173486e-06, "epoch": 2.7075359641018872, "percentage": 54.15, "elapsed_time": "0:29:59", "remaining_time": "0:25:23", "throughput": 5611.82, "total_tokens": 10096048}
|
|
{"current_steps": 20520, "total_steps": 37885, "loss": 0.0012, "lr": 1.0292962084612808e-06, "epoch": 2.7081958558796355, "percentage": 54.16, "elapsed_time": "0:29:59", "remaining_time": "0:25:22", "throughput": 5612.14, "total_tokens": 10098480}
|
|
{"current_steps": 20525, "total_steps": 37885, "loss": 0.0012, "lr": 1.0288357049873051e-06, "epoch": 2.7088557476573842, "percentage": 54.18, "elapsed_time": "0:29:59", "remaining_time": "0:25:22", "throughput": 5612.69, "total_tokens": 10101360}
|
|
{"current_steps": 20530, "total_steps": 37885, "loss": 0.0799, "lr": 1.0283751953931595e-06, "epoch": 2.7095156394351325, "percentage": 54.19, "elapsed_time": "0:30:00", "remaining_time": "0:25:21", "throughput": 5613.05, "total_tokens": 10103856}
|
|
{"current_steps": 20535, "total_steps": 37885, "loss": 0.1165, "lr": 1.0279146797765845e-06, "epoch": 2.7101755312128812, "percentage": 54.2, "elapsed_time": "0:30:00", "remaining_time": "0:25:21", "throughput": 5613.24, "total_tokens": 10106032}
|
|
{"current_steps": 20540, "total_steps": 37885, "loss": 0.1146, "lr": 1.0274541582353204e-06, "epoch": 2.7108354229906295, "percentage": 54.22, "elapsed_time": "0:30:00", "remaining_time": "0:25:20", "throughput": 5613.5, "total_tokens": 10108336}
|
|
{"current_steps": 20545, "total_steps": 37885, "loss": 0.0007, "lr": 1.0269936308671106e-06, "epoch": 2.711495314768378, "percentage": 54.23, "elapsed_time": "0:30:01", "remaining_time": "0:25:20", "throughput": 5613.99, "total_tokens": 10111088}
|
|
{"current_steps": 20550, "total_steps": 37885, "loss": 0.0595, "lr": 1.0265330977696977e-06, "epoch": 2.7121552065461265, "percentage": 54.24, "elapsed_time": "0:30:01", "remaining_time": "0:25:19", "throughput": 5614.35, "total_tokens": 10113584}
|
|
{"current_steps": 20555, "total_steps": 37885, "loss": 0.0, "lr": 1.0260725590408273e-06, "epoch": 2.712815098323875, "percentage": 54.26, "elapsed_time": "0:30:01", "remaining_time": "0:25:19", "throughput": 5614.68, "total_tokens": 10116016}
|
|
{"current_steps": 20560, "total_steps": 37885, "loss": 0.0612, "lr": 1.0256120147782445e-06, "epoch": 2.7134749901016235, "percentage": 54.27, "elapsed_time": "0:30:02", "remaining_time": "0:25:18", "throughput": 5615.16, "total_tokens": 10118768}
|
|
{"current_steps": 20565, "total_steps": 37885, "loss": 0.0013, "lr": 1.0251514650796975e-06, "epoch": 2.714134881879372, "percentage": 54.28, "elapsed_time": "0:30:02", "remaining_time": "0:25:17", "throughput": 5615.39, "total_tokens": 10121008}
|
|
{"current_steps": 20570, "total_steps": 37885, "loss": 0.003, "lr": 1.024690910042934e-06, "epoch": 2.71479477365712, "percentage": 54.3, "elapsed_time": "0:30:02", "remaining_time": "0:25:17", "throughput": 5615.88, "total_tokens": 10123760}
|
|
{"current_steps": 20575, "total_steps": 37885, "loss": 0.0534, "lr": 1.0242303497657038e-06, "epoch": 2.715454665434869, "percentage": 54.31, "elapsed_time": "0:30:03", "remaining_time": "0:25:16", "throughput": 5616.18, "total_tokens": 10126128}
|
|
{"current_steps": 20580, "total_steps": 37885, "loss": 0.0001, "lr": 1.023769784345757e-06, "epoch": 2.716114557212617, "percentage": 54.32, "elapsed_time": "0:30:03", "remaining_time": "0:25:16", "throughput": 5616.52, "total_tokens": 10128560}
|
|
{"current_steps": 20585, "total_steps": 37885, "loss": 0.0412, "lr": 1.0233092138808457e-06, "epoch": 2.716774448990366, "percentage": 54.34, "elapsed_time": "0:30:03", "remaining_time": "0:25:15", "throughput": 5616.84, "total_tokens": 10130992}
|
|
{"current_steps": 20590, "total_steps": 37885, "loss": 0.0032, "lr": 1.0228486384687226e-06, "epoch": 2.717434340768114, "percentage": 54.35, "elapsed_time": "0:30:04", "remaining_time": "0:25:15", "throughput": 5617.33, "total_tokens": 10133744}
|
|
{"current_steps": 20595, "total_steps": 37885, "loss": 0.0358, "lr": 1.0223880582071413e-06, "epoch": 2.7180942325458624, "percentage": 54.36, "elapsed_time": "0:30:04", "remaining_time": "0:25:14", "throughput": 5617.61, "total_tokens": 10136112}
|
|
{"current_steps": 20600, "total_steps": 37885, "loss": 0.0007, "lr": 1.0219274731938574e-06, "epoch": 2.718754124323611, "percentage": 54.38, "elapsed_time": "0:30:04", "remaining_time": "0:25:14", "throughput": 5617.84, "total_tokens": 10138352}
|
|
{"current_steps": 20605, "total_steps": 37885, "loss": 0.0695, "lr": 1.0214668835266255e-06, "epoch": 2.7194140161013594, "percentage": 54.39, "elapsed_time": "0:30:04", "remaining_time": "0:25:13", "throughput": 5618.14, "total_tokens": 10140720}
|
|
{"current_steps": 20610, "total_steps": 37885, "loss": 0.0, "lr": 1.021006289303203e-06, "epoch": 2.720073907879108, "percentage": 54.4, "elapsed_time": "0:30:05", "remaining_time": "0:25:13", "throughput": 5618.39, "total_tokens": 10143024}
|
|
{"current_steps": 20615, "total_steps": 37885, "loss": 0.0383, "lr": 1.020545690621348e-06, "epoch": 2.7207337996568564, "percentage": 54.41, "elapsed_time": "0:30:05", "remaining_time": "0:25:12", "throughput": 5618.72, "total_tokens": 10145456}
|
|
{"current_steps": 20620, "total_steps": 37885, "loss": 0.0006, "lr": 1.0200850875788187e-06, "epoch": 2.7213936914346046, "percentage": 54.43, "elapsed_time": "0:30:05", "remaining_time": "0:25:12", "throughput": 5618.81, "total_tokens": 10147440}
|
|
{"current_steps": 20625, "total_steps": 37885, "loss": 0.0003, "lr": 1.0196244802733752e-06, "epoch": 2.722053583212353, "percentage": 54.44, "elapsed_time": "0:30:06", "remaining_time": "0:25:11", "throughput": 5619.1, "total_tokens": 10149808}
|
|
{"current_steps": 20630, "total_steps": 37885, "loss": 0.0002, "lr": 1.0191638688027777e-06, "epoch": 2.7227134749901016, "percentage": 54.45, "elapsed_time": "0:30:06", "remaining_time": "0:25:11", "throughput": 5619.43, "total_tokens": 10152240}
|
|
{"current_steps": 20635, "total_steps": 37885, "loss": 0.0, "lr": 1.0187032532647881e-06, "epoch": 2.72337336676785, "percentage": 54.47, "elapsed_time": "0:30:06", "remaining_time": "0:25:10", "throughput": 5619.82, "total_tokens": 10154800}
|
|
{"current_steps": 20640, "total_steps": 37885, "loss": 0.0, "lr": 1.018242633757168e-06, "epoch": 2.7240332585455986, "percentage": 54.48, "elapsed_time": "0:30:07", "remaining_time": "0:25:10", "throughput": 5620.08, "total_tokens": 10157104}
|
|
{"current_steps": 20645, "total_steps": 37885, "loss": 0.1595, "lr": 1.0177820103776814e-06, "epoch": 2.724693150323347, "percentage": 54.49, "elapsed_time": "0:30:07", "remaining_time": "0:25:09", "throughput": 5620.5, "total_tokens": 10159728}
|
|
{"current_steps": 20650, "total_steps": 37885, "loss": 0.0002, "lr": 1.0173213832240918e-06, "epoch": 2.725353042101095, "percentage": 54.51, "elapsed_time": "0:30:07", "remaining_time": "0:25:08", "throughput": 5620.88, "total_tokens": 10162288}
|
|
{"current_steps": 20655, "total_steps": 37885, "loss": 0.0004, "lr": 1.0168607523941637e-06, "epoch": 2.726012933878844, "percentage": 54.52, "elapsed_time": "0:30:08", "remaining_time": "0:25:08", "throughput": 5621.24, "total_tokens": 10164784}
|
|
{"current_steps": 20660, "total_steps": 37885, "loss": 0.0767, "lr": 1.0164001179856635e-06, "epoch": 2.726672825656592, "percentage": 54.53, "elapsed_time": "0:30:08", "remaining_time": "0:25:07", "throughput": 5621.64, "total_tokens": 10167344}
|
|
{"current_steps": 20665, "total_steps": 37885, "loss": 0.0, "lr": 1.0159394800963565e-06, "epoch": 2.727332717434341, "percentage": 54.55, "elapsed_time": "0:30:08", "remaining_time": "0:25:07", "throughput": 5622.06, "total_tokens": 10169968}
|
|
{"current_steps": 20670, "total_steps": 37885, "loss": 0.086, "lr": 1.0154788388240105e-06, "epoch": 2.727992609212089, "percentage": 54.56, "elapsed_time": "0:30:09", "remaining_time": "0:25:06", "throughput": 5622.38, "total_tokens": 10172400}
|
|
{"current_steps": 20675, "total_steps": 37885, "loss": 0.0003, "lr": 1.015018194266393e-06, "epoch": 2.7286525009898375, "percentage": 54.57, "elapsed_time": "0:30:09", "remaining_time": "0:25:06", "throughput": 5622.67, "total_tokens": 10174768}
|
|
{"current_steps": 20680, "total_steps": 37885, "loss": 0.0002, "lr": 1.0145575465212727e-06, "epoch": 2.729312392767586, "percentage": 54.59, "elapsed_time": "0:30:09", "remaining_time": "0:25:05", "throughput": 5622.97, "total_tokens": 10177136}
|
|
{"current_steps": 20685, "total_steps": 37885, "loss": 0.0355, "lr": 1.0140968956864186e-06, "epoch": 2.7299722845453345, "percentage": 54.6, "elapsed_time": "0:30:10", "remaining_time": "0:25:05", "throughput": 5623.16, "total_tokens": 10179312}
|
|
{"current_steps": 20690, "total_steps": 37885, "loss": 0.0001, "lr": 1.0136362418596004e-06, "epoch": 2.730632176323083, "percentage": 54.61, "elapsed_time": "0:30:10", "remaining_time": "0:25:04", "throughput": 5623.55, "total_tokens": 10181872}
|
|
{"current_steps": 20695, "total_steps": 37885, "loss": 0.0874, "lr": 1.0131755851385883e-06, "epoch": 2.7312920681008315, "percentage": 54.63, "elapsed_time": "0:30:10", "remaining_time": "0:25:04", "throughput": 5623.85, "total_tokens": 10184240}
|
|
{"current_steps": 20700, "total_steps": 37885, "loss": 0.0445, "lr": 1.012714925621154e-06, "epoch": 2.7319519598785797, "percentage": 54.64, "elapsed_time": "0:30:11", "remaining_time": "0:25:03", "throughput": 5624.1, "total_tokens": 10186544}
|
|
{"current_steps": 20705, "total_steps": 37885, "loss": 0.0517, "lr": 1.012254263405069e-06, "epoch": 2.7326118516563285, "percentage": 54.65, "elapsed_time": "0:30:11", "remaining_time": "0:25:03", "throughput": 5624.59, "total_tokens": 10189296}
|
|
{"current_steps": 20710, "total_steps": 37885, "loss": 0.08, "lr": 1.0117935985881048e-06, "epoch": 2.7332717434340768, "percentage": 54.67, "elapsed_time": "0:30:11", "remaining_time": "0:25:02", "throughput": 5625.03, "total_tokens": 10191984}
|
|
{"current_steps": 20715, "total_steps": 37885, "loss": 0.0001, "lr": 1.0113329312680352e-06, "epoch": 2.7339316352118255, "percentage": 54.68, "elapsed_time": "0:30:12", "remaining_time": "0:25:02", "throughput": 5625.44, "total_tokens": 10194608}
|
|
{"current_steps": 20720, "total_steps": 37885, "loss": 0.0008, "lr": 1.0108722615426326e-06, "epoch": 2.7345915269895738, "percentage": 54.69, "elapsed_time": "0:30:12", "remaining_time": "0:25:01", "throughput": 5625.8, "total_tokens": 10197104}
|
|
{"current_steps": 20725, "total_steps": 37885, "loss": 0.0003, "lr": 1.0104115895096715e-06, "epoch": 2.735251418767322, "percentage": 54.71, "elapsed_time": "0:30:12", "remaining_time": "0:25:01", "throughput": 5626.12, "total_tokens": 10199536}
|
|
{"current_steps": 20730, "total_steps": 37885, "loss": 0.0002, "lr": 1.0099509152669257e-06, "epoch": 2.7359113105450708, "percentage": 54.72, "elapsed_time": "0:30:13", "remaining_time": "0:25:00", "throughput": 5626.51, "total_tokens": 10202096}
|
|
{"current_steps": 20735, "total_steps": 37885, "loss": 0.0002, "lr": 1.0094902389121702e-06, "epoch": 2.736571202322819, "percentage": 54.73, "elapsed_time": "0:30:13", "remaining_time": "0:24:59", "throughput": 5626.81, "total_tokens": 10204464}
|
|
{"current_steps": 20740, "total_steps": 37885, "loss": 0.0001, "lr": 1.0090295605431805e-06, "epoch": 2.7372310941005678, "percentage": 54.74, "elapsed_time": "0:30:13", "remaining_time": "0:24:59", "throughput": 5627.2, "total_tokens": 10207024}
|
|
{"current_steps": 20745, "total_steps": 37885, "loss": 0.0, "lr": 1.0085688802577315e-06, "epoch": 2.737890985878316, "percentage": 54.76, "elapsed_time": "0:30:14", "remaining_time": "0:24:58", "throughput": 5627.65, "total_tokens": 10209712}
|
|
{"current_steps": 20750, "total_steps": 37885, "loss": 0.0011, "lr": 1.0081081981536001e-06, "epoch": 2.7385508776560643, "percentage": 54.77, "elapsed_time": "0:30:14", "remaining_time": "0:24:58", "throughput": 5627.98, "total_tokens": 10212144}
|
|
{"current_steps": 20755, "total_steps": 37885, "loss": 0.0938, "lr": 1.0076475143285623e-06, "epoch": 2.7392107694338126, "percentage": 54.78, "elapsed_time": "0:30:14", "remaining_time": "0:24:57", "throughput": 5628.43, "total_tokens": 10214832}
|
|
{"current_steps": 20760, "total_steps": 37885, "loss": 0.0229, "lr": 1.0071868288803948e-06, "epoch": 2.7398706612115613, "percentage": 54.8, "elapsed_time": "0:30:15", "remaining_time": "0:24:57", "throughput": 5628.78, "total_tokens": 10217328}
|
|
{"current_steps": 20765, "total_steps": 37885, "loss": 0.0003, "lr": 1.006726141906875e-06, "epoch": 2.7405305529893096, "percentage": 54.81, "elapsed_time": "0:30:15", "remaining_time": "0:24:56", "throughput": 5629.08, "total_tokens": 10219696}
|
|
{"current_steps": 20770, "total_steps": 37885, "loss": 0.0504, "lr": 1.0062654535057805e-06, "epoch": 2.7411904447670583, "percentage": 54.82, "elapsed_time": "0:30:15", "remaining_time": "0:24:56", "throughput": 5629.36, "total_tokens": 10222064}
|
|
{"current_steps": 20775, "total_steps": 37885, "loss": 0.0955, "lr": 1.0058047637748886e-06, "epoch": 2.7418503365448066, "percentage": 54.84, "elapsed_time": "0:30:16", "remaining_time": "0:24:55", "throughput": 5629.81, "total_tokens": 10224752}
|
|
{"current_steps": 20780, "total_steps": 37885, "loss": 0.0611, "lr": 1.0053440728119778e-06, "epoch": 2.742510228322555, "percentage": 54.85, "elapsed_time": "0:30:16", "remaining_time": "0:24:55", "throughput": 5630.17, "total_tokens": 10227248}
|
|
{"current_steps": 20785, "total_steps": 37885, "loss": 0.0001, "lr": 1.0048833807148263e-06, "epoch": 2.7431701201003036, "percentage": 54.86, "elapsed_time": "0:30:16", "remaining_time": "0:24:54", "throughput": 5630.53, "total_tokens": 10229744}
|
|
{"current_steps": 20790, "total_steps": 37885, "loss": 0.0001, "lr": 1.004422687581212e-06, "epoch": 2.743830011878052, "percentage": 54.88, "elapsed_time": "0:30:17", "remaining_time": "0:24:54", "throughput": 5630.85, "total_tokens": 10232176}
|
|
{"current_steps": 20795, "total_steps": 37885, "loss": 0.179, "lr": 1.0039619935089149e-06, "epoch": 2.7444899036558006, "percentage": 54.89, "elapsed_time": "0:30:17", "remaining_time": "0:24:53", "throughput": 5631.17, "total_tokens": 10234608}
|
|
{"current_steps": 20800, "total_steps": 37885, "loss": 0.0004, "lr": 1.0035012985957132e-06, "epoch": 2.745149795433549, "percentage": 54.9, "elapsed_time": "0:30:17", "remaining_time": "0:24:53", "throughput": 5631.5, "total_tokens": 10237040}
|
|
{"current_steps": 20805, "total_steps": 37885, "loss": 0.0003, "lr": 1.0030406029393863e-06, "epoch": 2.745809687211297, "percentage": 54.92, "elapsed_time": "0:30:18", "remaining_time": "0:24:52", "throughput": 5631.79, "total_tokens": 10239408}
|
|
{"current_steps": 20810, "total_steps": 37885, "loss": 0.1464, "lr": 1.0025799066377134e-06, "epoch": 2.746469578989046, "percentage": 54.93, "elapsed_time": "0:30:18", "remaining_time": "0:24:52", "throughput": 5632.11, "total_tokens": 10241840}
|
|
{"current_steps": 20815, "total_steps": 37885, "loss": 0.0002, "lr": 1.0021192097884738e-06, "epoch": 2.747129470766794, "percentage": 54.94, "elapsed_time": "0:30:18", "remaining_time": "0:24:51", "throughput": 5632.43, "total_tokens": 10244272}
|
|
{"current_steps": 20820, "total_steps": 37885, "loss": 0.0029, "lr": 1.0016585124894478e-06, "epoch": 2.747789362544543, "percentage": 54.96, "elapsed_time": "0:30:19", "remaining_time": "0:24:51", "throughput": 5632.87, "total_tokens": 10246960}
|
|
{"current_steps": 20825, "total_steps": 37885, "loss": 0.0008, "lr": 1.0011978148384137e-06, "epoch": 2.748449254322291, "percentage": 54.97, "elapsed_time": "0:30:19", "remaining_time": "0:24:50", "throughput": 5633.36, "total_tokens": 10249712}
|
|
{"current_steps": 20830, "total_steps": 37885, "loss": 0.0627, "lr": 1.0007371169331527e-06, "epoch": 2.7491091461000394, "percentage": 54.98, "elapsed_time": "0:30:19", "remaining_time": "0:24:49", "throughput": 5633.81, "total_tokens": 10252400}
|
|
{"current_steps": 20835, "total_steps": 37885, "loss": 0.0152, "lr": 1.0002764188714438e-06, "epoch": 2.749769037877788, "percentage": 55.0, "elapsed_time": "0:30:20", "remaining_time": "0:24:49", "throughput": 5634.22, "total_tokens": 10255024}
|
|
{"current_steps": 20840, "total_steps": 37885, "loss": 0.0001, "lr": 9.99815720751067e-07, "epoch": 2.7504289296555364, "percentage": 55.01, "elapsed_time": "0:30:20", "remaining_time": "0:24:48", "throughput": 5634.52, "total_tokens": 10257392}
|
|
{"current_steps": 20845, "total_steps": 37885, "loss": 0.0429, "lr": 9.993550226698021e-07, "epoch": 2.751088821433285, "percentage": 55.02, "elapsed_time": "0:30:20", "remaining_time": "0:24:48", "throughput": 5634.68, "total_tokens": 10259504}
|
|
{"current_steps": 20845, "total_steps": 37885, "eval_loss": 0.16027498245239258, "epoch": 2.751088821433285, "percentage": 55.02, "elapsed_time": "0:30:28", "remaining_time": "0:24:54", "throughput": 5610.55, "total_tokens": 10259504}
|
|
{"current_steps": 20850, "total_steps": 37885, "loss": 0.0397, "lr": 9.988943247254293e-07, "epoch": 2.7517487132110334, "percentage": 55.03, "elapsed_time": "0:31:03", "remaining_time": "0:25:22", "throughput": 5508.12, "total_tokens": 10261808}
|
|
{"current_steps": 20855, "total_steps": 37885, "loss": 0.038, "lr": 9.984336270157277e-07, "epoch": 2.7524086049887817, "percentage": 55.05, "elapsed_time": "0:31:03", "remaining_time": "0:25:21", "throughput": 5508.45, "total_tokens": 10264240}
|
|
{"current_steps": 20860, "total_steps": 37885, "loss": 0.0004, "lr": 9.979729296384775e-07, "epoch": 2.7530684967665304, "percentage": 55.06, "elapsed_time": "0:31:03", "remaining_time": "0:25:21", "throughput": 5508.81, "total_tokens": 10266736}
|
|
{"current_steps": 20865, "total_steps": 37885, "loss": 0.2016, "lr": 9.97512232691458e-07, "epoch": 2.7537283885442787, "percentage": 55.07, "elapsed_time": "0:31:04", "remaining_time": "0:25:20", "throughput": 5509.29, "total_tokens": 10269488}
|
|
{"current_steps": 20870, "total_steps": 37885, "loss": 0.0143, "lr": 9.970515362724497e-07, "epoch": 2.7543882803220274, "percentage": 55.09, "elapsed_time": "0:31:04", "remaining_time": "0:25:19", "throughput": 5509.61, "total_tokens": 10271920}
|
|
{"current_steps": 20875, "total_steps": 37885, "loss": 0.1161, "lr": 9.965908404792313e-07, "epoch": 2.7550481720997757, "percentage": 55.1, "elapsed_time": "0:31:04", "remaining_time": "0:25:19", "throughput": 5510.08, "total_tokens": 10274672}
|
|
{"current_steps": 20880, "total_steps": 37885, "loss": 0.0524, "lr": 9.96130145409582e-07, "epoch": 2.755708063877524, "percentage": 55.11, "elapsed_time": "0:31:05", "remaining_time": "0:25:18", "throughput": 5510.55, "total_tokens": 10277424}
|
|
{"current_steps": 20885, "total_steps": 37885, "loss": 0.0002, "lr": 9.956694511612817e-07, "epoch": 2.7563679556552723, "percentage": 55.13, "elapsed_time": "0:31:05", "remaining_time": "0:25:18", "throughput": 5510.89, "total_tokens": 10279920}
|
|
{"current_steps": 20890, "total_steps": 37885, "loss": 0.0058, "lr": 9.952087578321086e-07, "epoch": 2.757027847433021, "percentage": 55.14, "elapsed_time": "0:31:05", "remaining_time": "0:25:17", "throughput": 5511.25, "total_tokens": 10282480}
|
|
{"current_steps": 20895, "total_steps": 37885, "loss": 0.1002, "lr": 9.947480655198423e-07, "epoch": 2.7576877392107697, "percentage": 55.15, "elapsed_time": "0:31:06", "remaining_time": "0:25:17", "throughput": 5511.59, "total_tokens": 10284976}
|
|
{"current_steps": 20900, "total_steps": 37885, "loss": 0.0384, "lr": 9.94287374322261e-07, "epoch": 2.758347630988518, "percentage": 55.17, "elapsed_time": "0:31:06", "remaining_time": "0:25:16", "throughput": 5511.85, "total_tokens": 10287344}
|
|
{"current_steps": 20905, "total_steps": 37885, "loss": 0.0002, "lr": 9.93826684337143e-07, "epoch": 2.7590075227662663, "percentage": 55.18, "elapsed_time": "0:31:06", "remaining_time": "0:25:16", "throughput": 5512.07, "total_tokens": 10289648}
|
|
{"current_steps": 20910, "total_steps": 37885, "loss": 0.0342, "lr": 9.933659956622668e-07, "epoch": 2.7596674145440145, "percentage": 55.19, "elapsed_time": "0:31:07", "remaining_time": "0:25:15", "throughput": 5512.29, "total_tokens": 10291952}
|
|
{"current_steps": 20915, "total_steps": 37885, "loss": 0.0798, "lr": 9.929053083954096e-07, "epoch": 2.7603273063217633, "percentage": 55.21, "elapsed_time": "0:31:07", "remaining_time": "0:25:15", "throughput": 5512.74, "total_tokens": 10294704}
|
|
{"current_steps": 20920, "total_steps": 37885, "loss": 0.0007, "lr": 9.924446226343496e-07, "epoch": 2.7609871980995115, "percentage": 55.22, "elapsed_time": "0:31:07", "remaining_time": "0:25:14", "throughput": 5513.1, "total_tokens": 10297264}
|
|
{"current_steps": 20925, "total_steps": 37885, "loss": 0.077, "lr": 9.91983938476864e-07, "epoch": 2.7616470898772603, "percentage": 55.23, "elapsed_time": "0:31:08", "remaining_time": "0:25:14", "throughput": 5513.2, "total_tokens": 10299312}
|
|
{"current_steps": 20930, "total_steps": 37885, "loss": 0.0904, "lr": 9.915232560207288e-07, "epoch": 2.7623069816550085, "percentage": 55.25, "elapsed_time": "0:31:08", "remaining_time": "0:25:13", "throughput": 5513.42, "total_tokens": 10301616}
|
|
{"current_steps": 20935, "total_steps": 37885, "loss": 0.0017, "lr": 9.910625753637215e-07, "epoch": 2.762966873432757, "percentage": 55.26, "elapsed_time": "0:31:08", "remaining_time": "0:25:13", "throughput": 5513.71, "total_tokens": 10303984}
|
|
{"current_steps": 20940, "total_steps": 37885, "loss": 0.075, "lr": 9.906018966036177e-07, "epoch": 2.7636267652105055, "percentage": 55.27, "elapsed_time": "0:31:09", "remaining_time": "0:25:12", "throughput": 5514.13, "total_tokens": 10306608}
|
|
{"current_steps": 20945, "total_steps": 37885, "loss": 0.0004, "lr": 9.901412198381935e-07, "epoch": 2.764286656988254, "percentage": 55.29, "elapsed_time": "0:31:09", "remaining_time": "0:25:11", "throughput": 5514.46, "total_tokens": 10309040}
|
|
{"current_steps": 20950, "total_steps": 37885, "loss": 0.0003, "lr": 9.89680545165224e-07, "epoch": 2.7649465487660025, "percentage": 55.3, "elapsed_time": "0:31:09", "remaining_time": "0:25:11", "throughput": 5514.68, "total_tokens": 10311280}
|
|
{"current_steps": 20955, "total_steps": 37885, "loss": 0.0475, "lr": 9.892198726824835e-07, "epoch": 2.765606440543751, "percentage": 55.31, "elapsed_time": "0:31:10", "remaining_time": "0:25:10", "throughput": 5515.03, "total_tokens": 10313776}
|
|
{"current_steps": 20960, "total_steps": 37885, "loss": 0.0412, "lr": 9.887592024877478e-07, "epoch": 2.766266332321499, "percentage": 55.33, "elapsed_time": "0:31:10", "remaining_time": "0:25:10", "throughput": 5515.45, "total_tokens": 10316400}
|
|
{"current_steps": 20965, "total_steps": 37885, "loss": 0.0002, "lr": 9.882985346787892e-07, "epoch": 2.766926224099248, "percentage": 55.34, "elapsed_time": "0:31:10", "remaining_time": "0:25:09", "throughput": 5515.86, "total_tokens": 10319024}
|
|
{"current_steps": 20970, "total_steps": 37885, "loss": 0.0068, "lr": 9.878378693533825e-07, "epoch": 2.767586115876996, "percentage": 55.35, "elapsed_time": "0:31:11", "remaining_time": "0:25:09", "throughput": 5516.25, "total_tokens": 10321584}
|
|
{"current_steps": 20975, "total_steps": 37885, "loss": 0.0035, "lr": 9.873772066092998e-07, "epoch": 2.768246007654745, "percentage": 55.36, "elapsed_time": "0:31:11", "remaining_time": "0:25:08", "throughput": 5516.55, "total_tokens": 10323952}
|
|
{"current_steps": 20980, "total_steps": 37885, "loss": 0.0556, "lr": 9.869165465443132e-07, "epoch": 2.768905899432493, "percentage": 55.38, "elapsed_time": "0:31:11", "remaining_time": "0:25:08", "throughput": 5516.87, "total_tokens": 10326384}
|
|
{"current_steps": 20985, "total_steps": 37885, "loss": 0.0006, "lr": 9.864558892561955e-07, "epoch": 2.7695657912102414, "percentage": 55.39, "elapsed_time": "0:31:12", "remaining_time": "0:25:07", "throughput": 5517.13, "total_tokens": 10328688}
|
|
{"current_steps": 20990, "total_steps": 37885, "loss": 0.0582, "lr": 9.859952348427167e-07, "epoch": 2.77022568298799, "percentage": 55.4, "elapsed_time": "0:31:12", "remaining_time": "0:25:07", "throughput": 5517.56, "total_tokens": 10331312}
|
|
{"current_steps": 20995, "total_steps": 37885, "loss": 0.0782, "lr": 9.855345834016481e-07, "epoch": 2.7708855747657384, "percentage": 55.42, "elapsed_time": "0:31:12", "remaining_time": "0:25:06", "throughput": 5517.84, "total_tokens": 10333680}
|
|
{"current_steps": 21000, "total_steps": 37885, "loss": 0.0438, "lr": 9.850739350307595e-07, "epoch": 2.771545466543487, "percentage": 55.43, "elapsed_time": "0:31:13", "remaining_time": "0:25:06", "throughput": 5518.23, "total_tokens": 10336240}
|
|
{"current_steps": 21005, "total_steps": 37885, "loss": 0.0004, "lr": 9.846132898278198e-07, "epoch": 2.7722053583212354, "percentage": 55.44, "elapsed_time": "0:31:13", "remaining_time": "0:25:05", "throughput": 5518.52, "total_tokens": 10338608}
|
|
{"current_steps": 21010, "total_steps": 37885, "loss": 0.1489, "lr": 9.84152647890598e-07, "epoch": 2.7728652500989837, "percentage": 55.46, "elapsed_time": "0:31:13", "remaining_time": "0:25:04", "throughput": 5518.95, "total_tokens": 10341296}
|
|
{"current_steps": 21015, "total_steps": 37885, "loss": 0.0001, "lr": 9.83692009316862e-07, "epoch": 2.7735251418767324, "percentage": 55.47, "elapsed_time": "0:31:14", "remaining_time": "0:25:04", "throughput": 5519.43, "total_tokens": 10344048}
|
|
{"current_steps": 21020, "total_steps": 37885, "loss": 0.0002, "lr": 9.832313742043792e-07, "epoch": 2.7741850336544807, "percentage": 55.48, "elapsed_time": "0:31:14", "remaining_time": "0:25:03", "throughput": 5519.69, "total_tokens": 10346352}
|
|
{"current_steps": 21025, "total_steps": 37885, "loss": 0.0472, "lr": 9.827707426509155e-07, "epoch": 2.7748449254322294, "percentage": 55.5, "elapsed_time": "0:31:14", "remaining_time": "0:25:03", "throughput": 5520.01, "total_tokens": 10348784}
|
|
{"current_steps": 21030, "total_steps": 37885, "loss": 0.0002, "lr": 9.823101147542368e-07, "epoch": 2.7755048172099777, "percentage": 55.51, "elapsed_time": "0:31:15", "remaining_time": "0:25:02", "throughput": 5520.42, "total_tokens": 10351344}
|
|
{"current_steps": 21035, "total_steps": 37885, "loss": 0.0003, "lr": 9.818494906121084e-07, "epoch": 2.776164708987726, "percentage": 55.52, "elapsed_time": "0:31:15", "remaining_time": "0:25:02", "throughput": 5520.88, "total_tokens": 10354032}
|
|
{"current_steps": 21040, "total_steps": 37885, "loss": 0.0003, "lr": 9.813888703222938e-07, "epoch": 2.776824600765474, "percentage": 55.54, "elapsed_time": "0:31:15", "remaining_time": "0:25:01", "throughput": 5521.3, "total_tokens": 10356656}
|
|
{"current_steps": 21045, "total_steps": 37885, "loss": 0.0059, "lr": 9.809282539825573e-07, "epoch": 2.777484492543223, "percentage": 55.55, "elapsed_time": "0:31:16", "remaining_time": "0:25:01", "throughput": 5521.72, "total_tokens": 10359280}
|
|
{"current_steps": 21050, "total_steps": 37885, "loss": 0.0612, "lr": 9.804676416906605e-07, "epoch": 2.778144384320971, "percentage": 55.56, "elapsed_time": "0:31:16", "remaining_time": "0:25:00", "throughput": 5522.05, "total_tokens": 10361712}
|
|
{"current_steps": 21055, "total_steps": 37885, "loss": 0.1814, "lr": 9.800070335443651e-07, "epoch": 2.77880427609872, "percentage": 55.58, "elapsed_time": "0:31:16", "remaining_time": "0:25:00", "throughput": 5522.49, "total_tokens": 10364400}
|
|
{"current_steps": 21060, "total_steps": 37885, "loss": 0.024, "lr": 9.795464296414323e-07, "epoch": 2.779464167876468, "percentage": 55.59, "elapsed_time": "0:31:17", "remaining_time": "0:24:59", "throughput": 5522.92, "total_tokens": 10367024}
|
|
{"current_steps": 21065, "total_steps": 37885, "loss": 0.0338, "lr": 9.790858300796214e-07, "epoch": 2.7801240596542165, "percentage": 55.6, "elapsed_time": "0:31:17", "remaining_time": "0:24:59", "throughput": 5523.28, "total_tokens": 10369520}
|
|
{"current_steps": 21070, "total_steps": 37885, "loss": 0.004, "lr": 9.78625234956692e-07, "epoch": 2.780783951431965, "percentage": 55.62, "elapsed_time": "0:31:17", "remaining_time": "0:24:58", "throughput": 5523.79, "total_tokens": 10372336}
|
|
{"current_steps": 21075, "total_steps": 37885, "loss": 0.0019, "lr": 9.781646443704014e-07, "epoch": 2.7814438432097135, "percentage": 55.63, "elapsed_time": "0:31:18", "remaining_time": "0:24:58", "throughput": 5524.25, "total_tokens": 10375024}
|
|
{"current_steps": 21080, "total_steps": 37885, "loss": 0.0002, "lr": 9.777040584185072e-07, "epoch": 2.782103734987462, "percentage": 55.64, "elapsed_time": "0:31:18", "remaining_time": "0:24:57", "throughput": 5524.71, "total_tokens": 10377712}
|
|
{"current_steps": 21085, "total_steps": 37885, "loss": 0.0001, "lr": 9.772434771987652e-07, "epoch": 2.7827636267652105, "percentage": 55.66, "elapsed_time": "0:31:18", "remaining_time": "0:24:56", "throughput": 5524.94, "total_tokens": 10379952}
|
|
{"current_steps": 21090, "total_steps": 37885, "loss": 0.112, "lr": 9.7678290080893e-07, "epoch": 2.7834235185429588, "percentage": 55.67, "elapsed_time": "0:31:19", "remaining_time": "0:24:56", "throughput": 5525.3, "total_tokens": 10382448}
|
|
{"current_steps": 21095, "total_steps": 37885, "loss": 0.0793, "lr": 9.76322329346756e-07, "epoch": 2.7840834103207075, "percentage": 55.68, "elapsed_time": "0:31:19", "remaining_time": "0:24:55", "throughput": 5525.53, "total_tokens": 10384688}
|
|
{"current_steps": 21100, "total_steps": 37885, "loss": 0.1091, "lr": 9.758617629099961e-07, "epoch": 2.7847433020984558, "percentage": 55.69, "elapsed_time": "0:31:19", "remaining_time": "0:24:55", "throughput": 5525.84, "total_tokens": 10387120}
|
|
{"current_steps": 21105, "total_steps": 37885, "loss": 0.0003, "lr": 9.754012015964027e-07, "epoch": 2.7854031938762045, "percentage": 55.71, "elapsed_time": "0:31:20", "remaining_time": "0:24:54", "throughput": 5526.13, "total_tokens": 10389488}
|
|
{"current_steps": 21110, "total_steps": 37885, "loss": 0.0003, "lr": 9.749406455037262e-07, "epoch": 2.7860630856539528, "percentage": 55.72, "elapsed_time": "0:31:20", "remaining_time": "0:24:54", "throughput": 5526.53, "total_tokens": 10392048}
|
|
{"current_steps": 21115, "total_steps": 37885, "loss": 0.0001, "lr": 9.744800947297154e-07, "epoch": 2.786722977431701, "percentage": 55.73, "elapsed_time": "0:31:20", "remaining_time": "0:24:53", "throughput": 5526.83, "total_tokens": 10394416}
|
|
{"current_steps": 21120, "total_steps": 37885, "loss": 0.0831, "lr": 9.740195493721204e-07, "epoch": 2.7873828692094498, "percentage": 55.75, "elapsed_time": "0:31:21", "remaining_time": "0:24:53", "throughput": 5527.17, "total_tokens": 10396912}
|
|
{"current_steps": 21125, "total_steps": 37885, "loss": 0.0001, "lr": 9.735590095286874e-07, "epoch": 2.788042760987198, "percentage": 55.76, "elapsed_time": "0:31:21", "remaining_time": "0:24:52", "throughput": 5527.45, "total_tokens": 10399280}
|
|
{"current_steps": 21130, "total_steps": 37885, "loss": 0.0007, "lr": 9.730984752971634e-07, "epoch": 2.7887026527649468, "percentage": 55.77, "elapsed_time": "0:31:21", "remaining_time": "0:24:52", "throughput": 5527.9, "total_tokens": 10401968}
|
|
{"current_steps": 21135, "total_steps": 37885, "loss": 0.0, "lr": 9.726379467752937e-07, "epoch": 2.789362544542695, "percentage": 55.79, "elapsed_time": "0:31:22", "remaining_time": "0:24:51", "throughput": 5528.46, "total_tokens": 10404912}
|
|
{"current_steps": 21140, "total_steps": 37885, "loss": 0.111, "lr": 9.721774240608208e-07, "epoch": 2.7900224363204433, "percentage": 55.8, "elapsed_time": "0:31:22", "remaining_time": "0:24:51", "throughput": 5528.89, "total_tokens": 10407600}
|
|
{"current_steps": 21145, "total_steps": 37885, "loss": 0.0037, "lr": 9.71716907251489e-07, "epoch": 2.790682328098192, "percentage": 55.81, "elapsed_time": "0:31:22", "remaining_time": "0:24:50", "throughput": 5529.24, "total_tokens": 10410096}
|
|
{"current_steps": 21150, "total_steps": 37885, "loss": 0.0089, "lr": 9.712563964450378e-07, "epoch": 2.7913422198759403, "percentage": 55.83, "elapsed_time": "0:31:23", "remaining_time": "0:24:49", "throughput": 5529.67, "total_tokens": 10412720}
|
|
{"current_steps": 21155, "total_steps": 37885, "loss": 0.0001, "lr": 9.707958917392094e-07, "epoch": 2.792002111653689, "percentage": 55.84, "elapsed_time": "0:31:23", "remaining_time": "0:24:49", "throughput": 5529.95, "total_tokens": 10415088}
|
|
{"current_steps": 21160, "total_steps": 37885, "loss": 0.0325, "lr": 9.70335393231741e-07, "epoch": 2.7926620034314373, "percentage": 55.85, "elapsed_time": "0:31:23", "remaining_time": "0:24:48", "throughput": 5530.32, "total_tokens": 10417648}
|
|
{"current_steps": 21165, "total_steps": 37885, "loss": 0.0001, "lr": 9.698749010203704e-07, "epoch": 2.7933218952091856, "percentage": 55.87, "elapsed_time": "0:31:24", "remaining_time": "0:24:48", "throughput": 5530.61, "total_tokens": 10420016}
|
|
{"current_steps": 21170, "total_steps": 37885, "loss": 0.0985, "lr": 9.694144152028342e-07, "epoch": 2.793981786986934, "percentage": 55.88, "elapsed_time": "0:31:24", "remaining_time": "0:24:47", "throughput": 5531.04, "total_tokens": 10422704}
|
|
{"current_steps": 21175, "total_steps": 37885, "loss": 0.0004, "lr": 9.689539358768668e-07, "epoch": 2.7946416787646826, "percentage": 55.89, "elapsed_time": "0:31:24", "remaining_time": "0:24:47", "throughput": 5531.17, "total_tokens": 10424752}
|
|
{"current_steps": 21180, "total_steps": 37885, "loss": 0.0004, "lr": 9.684934631402016e-07, "epoch": 2.795301570542431, "percentage": 55.91, "elapsed_time": "0:31:25", "remaining_time": "0:24:46", "throughput": 5531.55, "total_tokens": 10427312}
|
|
{"current_steps": 21185, "total_steps": 37885, "loss": 0.0, "lr": 9.68032997090571e-07, "epoch": 2.7959614623201796, "percentage": 55.92, "elapsed_time": "0:31:25", "remaining_time": "0:24:46", "throughput": 5531.9, "total_tokens": 10429808}
|
|
{"current_steps": 21190, "total_steps": 37885, "loss": 0.0, "lr": 9.675725378257047e-07, "epoch": 2.796621354097928, "percentage": 55.93, "elapsed_time": "0:31:25", "remaining_time": "0:24:45", "throughput": 5532.28, "total_tokens": 10432368}
|
|
{"current_steps": 21195, "total_steps": 37885, "loss": 0.0902, "lr": 9.67112085443333e-07, "epoch": 2.797281245875676, "percentage": 55.95, "elapsed_time": "0:31:26", "remaining_time": "0:24:45", "throughput": 5532.52, "total_tokens": 10434672}
|
|
{"current_steps": 21200, "total_steps": 37885, "loss": 0.0641, "lr": 9.666516400411826e-07, "epoch": 2.797941137653425, "percentage": 55.96, "elapsed_time": "0:31:26", "remaining_time": "0:24:44", "throughput": 5532.84, "total_tokens": 10437168}
|
|
{"current_steps": 21205, "total_steps": 37885, "loss": 0.0, "lr": 9.661912017169803e-07, "epoch": 2.798601029431173, "percentage": 55.97, "elapsed_time": "0:31:26", "remaining_time": "0:24:44", "throughput": 5533.09, "total_tokens": 10439472}
|
|
{"current_steps": 21210, "total_steps": 37885, "loss": 0.0, "lr": 9.657307705684507e-07, "epoch": 2.799260921208922, "percentage": 55.99, "elapsed_time": "0:31:27", "remaining_time": "0:24:43", "throughput": 5533.38, "total_tokens": 10441840}
|
|
{"current_steps": 21215, "total_steps": 37885, "loss": 0.0975, "lr": 9.652703466933167e-07, "epoch": 2.79992081298667, "percentage": 56.0, "elapsed_time": "0:31:27", "remaining_time": "0:24:43", "throughput": 5533.68, "total_tokens": 10444272}
|
|
{"current_steps": 21220, "total_steps": 37885, "loss": 0.0003, "lr": 9.648099301893003e-07, "epoch": 2.8005807047644184, "percentage": 56.01, "elapsed_time": "0:31:27", "remaining_time": "0:24:42", "throughput": 5534.04, "total_tokens": 10446832}
|
|
{"current_steps": 21225, "total_steps": 37885, "loss": 0.135, "lr": 9.643495211541212e-07, "epoch": 2.801240596542167, "percentage": 56.02, "elapsed_time": "0:31:28", "remaining_time": "0:24:41", "throughput": 5534.28, "total_tokens": 10449136}
|
|
{"current_steps": 21230, "total_steps": 37885, "loss": 0.0473, "lr": 9.63889119685498e-07, "epoch": 2.8019004883199154, "percentage": 56.04, "elapsed_time": "0:31:28", "remaining_time": "0:24:41", "throughput": 5534.69, "total_tokens": 10451760}
|
|
{"current_steps": 21235, "total_steps": 37885, "loss": 0.0, "lr": 9.634287258811481e-07, "epoch": 2.802560380097664, "percentage": 56.05, "elapsed_time": "0:31:28", "remaining_time": "0:24:40", "throughput": 5534.9, "total_tokens": 10454000}
|
|
{"current_steps": 21240, "total_steps": 37885, "loss": 0.0016, "lr": 9.62968339838786e-07, "epoch": 2.8032202718754125, "percentage": 56.06, "elapsed_time": "0:31:29", "remaining_time": "0:24:40", "throughput": 5535.15, "total_tokens": 10456304}
|
|
{"current_steps": 21245, "total_steps": 37885, "loss": 0.0027, "lr": 9.625079616561256e-07, "epoch": 2.8038801636531607, "percentage": 56.08, "elapsed_time": "0:31:29", "remaining_time": "0:24:39", "throughput": 5535.5, "total_tokens": 10458800}
|
|
{"current_steps": 21250, "total_steps": 37885, "loss": 0.0001, "lr": 9.620475914308787e-07, "epoch": 2.8045400554309095, "percentage": 56.09, "elapsed_time": "0:31:29", "remaining_time": "0:24:39", "throughput": 5535.8, "total_tokens": 10461232}
|
|
{"current_steps": 21255, "total_steps": 37885, "loss": 0.2071, "lr": 9.615872292607559e-07, "epoch": 2.8051999472086577, "percentage": 56.1, "elapsed_time": "0:31:30", "remaining_time": "0:24:38", "throughput": 5536.04, "total_tokens": 10463536}
|
|
{"current_steps": 21260, "total_steps": 37885, "loss": 0.2321, "lr": 9.611268752434658e-07, "epoch": 2.8058598389864065, "percentage": 56.12, "elapsed_time": "0:31:30", "remaining_time": "0:24:38", "throughput": 5536.32, "total_tokens": 10465904}
|
|
{"current_steps": 21265, "total_steps": 37885, "loss": 0.0004, "lr": 9.606665294767144e-07, "epoch": 2.8065197307641547, "percentage": 56.13, "elapsed_time": "0:31:30", "remaining_time": "0:24:37", "throughput": 5536.61, "total_tokens": 10468272}
|
|
{"current_steps": 21270, "total_steps": 37885, "loss": 0.0368, "lr": 9.602061920582076e-07, "epoch": 2.807179622541903, "percentage": 56.14, "elapsed_time": "0:31:31", "remaining_time": "0:24:37", "throughput": 5536.87, "total_tokens": 10470576}
|
|
{"current_steps": 21275, "total_steps": 37885, "loss": 0.0004, "lr": 9.59745863085648e-07, "epoch": 2.8078395143196517, "percentage": 56.16, "elapsed_time": "0:31:31", "remaining_time": "0:24:36", "throughput": 5537.15, "total_tokens": 10472944}
|
|
{"current_steps": 21280, "total_steps": 37885, "loss": 0.0004, "lr": 9.59285542656738e-07, "epoch": 2.8084994060974, "percentage": 56.17, "elapsed_time": "0:31:31", "remaining_time": "0:24:36", "throughput": 5537.4, "total_tokens": 10475248}
|
|
{"current_steps": 21285, "total_steps": 37885, "loss": 0.0534, "lr": 9.588252308691768e-07, "epoch": 2.8091592978751487, "percentage": 56.18, "elapsed_time": "0:31:32", "remaining_time": "0:24:35", "throughput": 5537.77, "total_tokens": 10477808}
|
|
{"current_steps": 21290, "total_steps": 37885, "loss": 0.0402, "lr": 9.583649278206616e-07, "epoch": 2.809819189652897, "percentage": 56.2, "elapsed_time": "0:31:32", "remaining_time": "0:24:35", "throughput": 5538.05, "total_tokens": 10480176}
|
|
{"current_steps": 21295, "total_steps": 37885, "loss": 0.0887, "lr": 9.579046336088894e-07, "epoch": 2.8104790814306453, "percentage": 56.21, "elapsed_time": "0:31:32", "remaining_time": "0:24:34", "throughput": 5538.24, "total_tokens": 10482352}
|
|
{"current_steps": 21300, "total_steps": 37885, "loss": 0.2153, "lr": 9.574443483315533e-07, "epoch": 2.8111389732083936, "percentage": 56.22, "elapsed_time": "0:31:33", "remaining_time": "0:24:34", "throughput": 5538.6, "total_tokens": 10484912}
|
|
{"current_steps": 21305, "total_steps": 37885, "loss": 0.2879, "lr": 9.569840720863469e-07, "epoch": 2.8117988649861423, "percentage": 56.24, "elapsed_time": "0:31:33", "remaining_time": "0:24:33", "throughput": 5538.82, "total_tokens": 10487216}
|
|
{"current_steps": 21310, "total_steps": 37885, "loss": 0.0255, "lr": 9.565238049709596e-07, "epoch": 2.8124587567638906, "percentage": 56.25, "elapsed_time": "0:31:33", "remaining_time": "0:24:32", "throughput": 5539.21, "total_tokens": 10489840}
|
|
{"current_steps": 21315, "total_steps": 37885, "loss": 0.0002, "lr": 9.560635470830794e-07, "epoch": 2.8131186485416393, "percentage": 56.26, "elapsed_time": "0:31:34", "remaining_time": "0:24:32", "throughput": 5539.53, "total_tokens": 10492272}
|
|
{"current_steps": 21320, "total_steps": 37885, "loss": 0.0089, "lr": 9.556032985203934e-07, "epoch": 2.8137785403193876, "percentage": 56.28, "elapsed_time": "0:31:34", "remaining_time": "0:24:31", "throughput": 5539.85, "total_tokens": 10494768}
|
|
{"current_steps": 21325, "total_steps": 37885, "loss": 0.0758, "lr": 9.551430593805854e-07, "epoch": 2.814438432097136, "percentage": 56.29, "elapsed_time": "0:31:34", "remaining_time": "0:24:31", "throughput": 5540.22, "total_tokens": 10497328}
|
|
{"current_steps": 21330, "total_steps": 37885, "loss": 0.0628, "lr": 9.546828297613389e-07, "epoch": 2.8150983238748846, "percentage": 56.3, "elapsed_time": "0:31:35", "remaining_time": "0:24:30", "throughput": 5540.64, "total_tokens": 10500016}
|
|
{"current_steps": 21335, "total_steps": 37885, "loss": 0.1013, "lr": 9.542226097603335e-07, "epoch": 2.815758215652633, "percentage": 56.32, "elapsed_time": "0:31:35", "remaining_time": "0:24:30", "throughput": 5540.95, "total_tokens": 10502448}
|
|
{"current_steps": 21340, "total_steps": 37885, "loss": 0.0005, "lr": 9.537623994752473e-07, "epoch": 2.8164181074303816, "percentage": 56.33, "elapsed_time": "0:31:35", "remaining_time": "0:24:29", "throughput": 5541.31, "total_tokens": 10504944}
|
|
{"current_steps": 21345, "total_steps": 37885, "loss": 0.0752, "lr": 9.533021990037572e-07, "epoch": 2.81707799920813, "percentage": 56.34, "elapsed_time": "0:31:36", "remaining_time": "0:24:29", "throughput": 5541.62, "total_tokens": 10507440}
|
|
{"current_steps": 21350, "total_steps": 37885, "loss": 0.0179, "lr": 9.52842008443537e-07, "epoch": 2.817737890985878, "percentage": 56.35, "elapsed_time": "0:31:36", "remaining_time": "0:24:28", "throughput": 5541.85, "total_tokens": 10509680}
|
|
{"current_steps": 21355, "total_steps": 37885, "loss": 0.002, "lr": 9.523818278922593e-07, "epoch": 2.818397782763627, "percentage": 56.37, "elapsed_time": "0:31:36", "remaining_time": "0:24:28", "throughput": 5542.15, "total_tokens": 10512112}
|
|
{"current_steps": 21360, "total_steps": 37885, "loss": 0.0933, "lr": 9.519216574475937e-07, "epoch": 2.819057674541375, "percentage": 56.38, "elapsed_time": "0:31:37", "remaining_time": "0:24:27", "throughput": 5542.44, "total_tokens": 10514480}
|
|
{"current_steps": 21365, "total_steps": 37885, "loss": 0.0934, "lr": 9.514614972072082e-07, "epoch": 2.819717566319124, "percentage": 56.39, "elapsed_time": "0:31:37", "remaining_time": "0:24:27", "throughput": 5542.81, "total_tokens": 10517040}
|
|
{"current_steps": 21370, "total_steps": 37885, "loss": 0.0255, "lr": 9.510013472687683e-07, "epoch": 2.820377458096872, "percentage": 56.41, "elapsed_time": "0:31:37", "remaining_time": "0:24:26", "throughput": 5543.19, "total_tokens": 10519600}
|
|
{"current_steps": 21375, "total_steps": 37885, "loss": 0.0002, "lr": 9.505412077299377e-07, "epoch": 2.8210373498746204, "percentage": 56.42, "elapsed_time": "0:31:38", "remaining_time": "0:24:26", "throughput": 5543.62, "total_tokens": 10522288}
|
|
{"current_steps": 21380, "total_steps": 37885, "loss": 0.0009, "lr": 9.500810786883776e-07, "epoch": 2.821697241652369, "percentage": 56.43, "elapsed_time": "0:31:38", "remaining_time": "0:24:25", "throughput": 5544.04, "total_tokens": 10524976}
|
|
{"current_steps": 21385, "total_steps": 37885, "loss": 0.034, "lr": 9.496209602417472e-07, "epoch": 2.8223571334301174, "percentage": 56.45, "elapsed_time": "0:31:38", "remaining_time": "0:24:25", "throughput": 5544.45, "total_tokens": 10527600}
|
|
{"current_steps": 21390, "total_steps": 37885, "loss": 0.0767, "lr": 9.49160852487703e-07, "epoch": 2.823017025207866, "percentage": 56.46, "elapsed_time": "0:31:39", "remaining_time": "0:24:24", "throughput": 5544.72, "total_tokens": 10529968}
|
|
{"current_steps": 21395, "total_steps": 37885, "loss": 0.076, "lr": 9.487007555238997e-07, "epoch": 2.8236769169856144, "percentage": 56.47, "elapsed_time": "0:31:39", "remaining_time": "0:24:23", "throughput": 5544.95, "total_tokens": 10532272}
|
|
{"current_steps": 21400, "total_steps": 37885, "loss": 0.0648, "lr": 9.482406694479895e-07, "epoch": 2.8243368087633627, "percentage": 56.49, "elapsed_time": "0:31:39", "remaining_time": "0:24:23", "throughput": 5545.48, "total_tokens": 10535152}
|
|
{"current_steps": 21405, "total_steps": 37885, "loss": 0.1232, "lr": 9.477805943576226e-07, "epoch": 2.8249967005411114, "percentage": 56.5, "elapsed_time": "0:31:40", "remaining_time": "0:24:22", "throughput": 5545.84, "total_tokens": 10537712}
|
|
{"current_steps": 21410, "total_steps": 37885, "loss": 0.0002, "lr": 9.473205303504463e-07, "epoch": 2.8256565923188597, "percentage": 56.51, "elapsed_time": "0:31:40", "remaining_time": "0:24:22", "throughput": 5546.07, "total_tokens": 10540016}
|
|
{"current_steps": 21415, "total_steps": 37885, "loss": 0.1321, "lr": 9.468604775241061e-07, "epoch": 2.8263164840966084, "percentage": 56.53, "elapsed_time": "0:31:40", "remaining_time": "0:24:21", "throughput": 5546.4, "total_tokens": 10542512}
|
|
{"current_steps": 21420, "total_steps": 37885, "loss": 0.0401, "lr": 9.464004359762445e-07, "epoch": 2.8269763758743567, "percentage": 56.54, "elapsed_time": "0:31:41", "remaining_time": "0:24:21", "throughput": 5546.77, "total_tokens": 10545136}
|
|
{"current_steps": 21425, "total_steps": 37885, "loss": 0.0018, "lr": 9.459404058045023e-07, "epoch": 2.827636267652105, "percentage": 56.55, "elapsed_time": "0:31:41", "remaining_time": "0:24:20", "throughput": 5547.16, "total_tokens": 10547760}
|
|
{"current_steps": 21430, "total_steps": 37885, "loss": 0.0399, "lr": 9.454803871065176e-07, "epoch": 2.8282961594298532, "percentage": 56.57, "elapsed_time": "0:31:41", "remaining_time": "0:24:20", "throughput": 5547.34, "total_tokens": 10549936}
|
|
{"current_steps": 21435, "total_steps": 37885, "loss": 0.0011, "lr": 9.450203799799258e-07, "epoch": 2.828956051207602, "percentage": 56.58, "elapsed_time": "0:31:42", "remaining_time": "0:24:19", "throughput": 5547.53, "total_tokens": 10552176}
|
|
{"current_steps": 21440, "total_steps": 37885, "loss": 0.0415, "lr": 9.445603845223603e-07, "epoch": 2.8296159429853502, "percentage": 56.59, "elapsed_time": "0:31:42", "remaining_time": "0:24:19", "throughput": 5547.89, "total_tokens": 10554736}
|
|
{"current_steps": 21445, "total_steps": 37885, "loss": 0.1261, "lr": 9.44100400831452e-07, "epoch": 2.830275834763099, "percentage": 56.61, "elapsed_time": "0:31:42", "remaining_time": "0:24:18", "throughput": 5548.25, "total_tokens": 10557296}
|
|
{"current_steps": 21450, "total_steps": 37885, "loss": 0.0783, "lr": 9.436404290048282e-07, "epoch": 2.8309357265408472, "percentage": 56.62, "elapsed_time": "0:31:43", "remaining_time": "0:24:18", "throughput": 5548.66, "total_tokens": 10559984}
|
|
{"current_steps": 21455, "total_steps": 37885, "loss": 0.1771, "lr": 9.43180469140116e-07, "epoch": 2.8315956183185955, "percentage": 56.63, "elapsed_time": "0:31:43", "remaining_time": "0:24:17", "throughput": 5548.95, "total_tokens": 10562416}
|
|
{"current_steps": 21460, "total_steps": 37885, "loss": 0.0008, "lr": 9.427205213349369e-07, "epoch": 2.8322555100963442, "percentage": 56.65, "elapsed_time": "0:31:43", "remaining_time": "0:24:17", "throughput": 5549.31, "total_tokens": 10564976}
|
|
{"current_steps": 21465, "total_steps": 37885, "loss": 0.0006, "lr": 9.422605856869129e-07, "epoch": 2.8329154018740925, "percentage": 56.66, "elapsed_time": "0:31:44", "remaining_time": "0:24:16", "throughput": 5549.76, "total_tokens": 10567728}
|
|
{"current_steps": 21470, "total_steps": 37885, "loss": 0.0002, "lr": 9.418006622936618e-07, "epoch": 2.8335752936518412, "percentage": 56.67, "elapsed_time": "0:31:44", "remaining_time": "0:24:16", "throughput": 5550.19, "total_tokens": 10570416}
|
|
{"current_steps": 21475, "total_steps": 37885, "loss": 0.1179, "lr": 9.413407512527977e-07, "epoch": 2.8342351854295895, "percentage": 56.68, "elapsed_time": "0:31:44", "remaining_time": "0:24:15", "throughput": 5550.46, "total_tokens": 10572784}
|
|
{"current_steps": 21480, "total_steps": 37885, "loss": 0.0001, "lr": 9.408808526619352e-07, "epoch": 2.834895077207338, "percentage": 56.7, "elapsed_time": "0:31:45", "remaining_time": "0:24:15", "throughput": 5550.75, "total_tokens": 10575152}
|
|
{"current_steps": 21485, "total_steps": 37885, "loss": 0.0002, "lr": 9.404209666186831e-07, "epoch": 2.8355549689850865, "percentage": 56.71, "elapsed_time": "0:31:45", "remaining_time": "0:24:14", "throughput": 5551.09, "total_tokens": 10577648}
|
|
{"current_steps": 21490, "total_steps": 37885, "loss": 0.0001, "lr": 9.3996109322065e-07, "epoch": 2.836214860762835, "percentage": 56.72, "elapsed_time": "0:31:45", "remaining_time": "0:24:13", "throughput": 5551.47, "total_tokens": 10580208}
|
|
{"current_steps": 21495, "total_steps": 37885, "loss": 0.0355, "lr": 9.395012325654398e-07, "epoch": 2.8368747525405835, "percentage": 56.74, "elapsed_time": "0:31:46", "remaining_time": "0:24:13", "throughput": 5551.72, "total_tokens": 10582512}
|
|
{"current_steps": 21500, "total_steps": 37885, "loss": 0.0001, "lr": 9.390413847506547e-07, "epoch": 2.837534644318332, "percentage": 56.75, "elapsed_time": "0:31:46", "remaining_time": "0:24:12", "throughput": 5552.03, "total_tokens": 10584944}
|
|
{"current_steps": 21505, "total_steps": 37885, "loss": 0.049, "lr": 9.385815498738944e-07, "epoch": 2.83819453609608, "percentage": 56.76, "elapsed_time": "0:31:46", "remaining_time": "0:24:12", "throughput": 5552.28, "total_tokens": 10587248}
|
|
{"current_steps": 21510, "total_steps": 37885, "loss": 0.0809, "lr": 9.381217280327552e-07, "epoch": 2.838854427873829, "percentage": 56.78, "elapsed_time": "0:31:47", "remaining_time": "0:24:11", "throughput": 5552.75, "total_tokens": 10590000}
|
|
{"current_steps": 21515, "total_steps": 37885, "loss": 0.0, "lr": 9.376619193248314e-07, "epoch": 2.839514319651577, "percentage": 56.79, "elapsed_time": "0:31:47", "remaining_time": "0:24:11", "throughput": 5553.1, "total_tokens": 10592496}
|
|
{"current_steps": 21520, "total_steps": 37885, "loss": 0.0767, "lr": 9.372021238477138e-07, "epoch": 2.840174211429326, "percentage": 56.8, "elapsed_time": "0:31:47", "remaining_time": "0:24:10", "throughput": 5553.54, "total_tokens": 10595184}
|
|
{"current_steps": 21525, "total_steps": 37885, "loss": 0.001, "lr": 9.367423416989905e-07, "epoch": 2.840834103207074, "percentage": 56.82, "elapsed_time": "0:31:48", "remaining_time": "0:24:10", "throughput": 5553.82, "total_tokens": 10597552}
|
|
{"current_steps": 21530, "total_steps": 37885, "loss": 0.1518, "lr": 9.362825729762472e-07, "epoch": 2.8414939949848224, "percentage": 56.83, "elapsed_time": "0:31:48", "remaining_time": "0:24:09", "throughput": 5554.26, "total_tokens": 10600240}
|
|
{"current_steps": 21535, "total_steps": 37885, "loss": 0.1066, "lr": 9.358228177770663e-07, "epoch": 2.842153886762571, "percentage": 56.84, "elapsed_time": "0:31:48", "remaining_time": "0:24:09", "throughput": 5554.55, "total_tokens": 10602608}
|
|
{"current_steps": 21540, "total_steps": 37885, "loss": 0.0717, "lr": 9.353630761990276e-07, "epoch": 2.8428137785403194, "percentage": 56.86, "elapsed_time": "0:31:49", "remaining_time": "0:24:08", "throughput": 5554.9, "total_tokens": 10605104}
|
|
{"current_steps": 21545, "total_steps": 37885, "loss": 0.0023, "lr": 9.349033483397082e-07, "epoch": 2.843473670318068, "percentage": 56.87, "elapsed_time": "0:31:49", "remaining_time": "0:24:08", "throughput": 5555.25, "total_tokens": 10607600}
|
|
{"current_steps": 21550, "total_steps": 37885, "loss": 0.0004, "lr": 9.344436342966812e-07, "epoch": 2.8441335620958164, "percentage": 56.88, "elapsed_time": "0:31:49", "remaining_time": "0:24:07", "throughput": 5555.63, "total_tokens": 10610160}
|
|
{"current_steps": 21555, "total_steps": 37885, "loss": 0.0421, "lr": 9.339839341675185e-07, "epoch": 2.8447934538735646, "percentage": 56.9, "elapsed_time": "0:31:50", "remaining_time": "0:24:07", "throughput": 5555.85, "total_tokens": 10612400}
|
|
{"current_steps": 21560, "total_steps": 37885, "loss": 0.0003, "lr": 9.335242480497876e-07, "epoch": 2.845453345651313, "percentage": 56.91, "elapsed_time": "0:31:50", "remaining_time": "0:24:06", "throughput": 5556.3, "total_tokens": 10615088}
|
|
{"current_steps": 21565, "total_steps": 37885, "loss": 0.0002, "lr": 9.330645760410537e-07, "epoch": 2.8461132374290616, "percentage": 56.92, "elapsed_time": "0:31:50", "remaining_time": "0:24:06", "throughput": 5556.68, "total_tokens": 10617648}
|
|
{"current_steps": 21570, "total_steps": 37885, "loss": 0.0006, "lr": 9.326049182388789e-07, "epoch": 2.8467731292068104, "percentage": 56.94, "elapsed_time": "0:31:51", "remaining_time": "0:24:05", "throughput": 5557.12, "total_tokens": 10620336}
|
|
{"current_steps": 21575, "total_steps": 37885, "loss": 0.0001, "lr": 9.32145274740822e-07, "epoch": 2.8474330209845586, "percentage": 56.95, "elapsed_time": "0:31:51", "remaining_time": "0:24:04", "throughput": 5557.42, "total_tokens": 10622704}
|
|
{"current_steps": 21580, "total_steps": 37885, "loss": 0.0407, "lr": 9.316856456444392e-07, "epoch": 2.848092912762307, "percentage": 56.96, "elapsed_time": "0:31:51", "remaining_time": "0:24:04", "throughput": 5557.78, "total_tokens": 10625264}
|
|
{"current_steps": 21585, "total_steps": 37885, "loss": 0.0736, "lr": 9.312260310472833e-07, "epoch": 2.848752804540055, "percentage": 56.98, "elapsed_time": "0:31:52", "remaining_time": "0:24:03", "throughput": 5558.25, "total_tokens": 10628016}
|
|
{"current_steps": 21590, "total_steps": 37885, "loss": 0.0009, "lr": 9.307664310469046e-07, "epoch": 2.849412696317804, "percentage": 56.99, "elapsed_time": "0:31:52", "remaining_time": "0:24:03", "throughput": 5558.53, "total_tokens": 10630384}
|
|
{"current_steps": 21595, "total_steps": 37885, "loss": 0.0016, "lr": 9.303068457408497e-07, "epoch": 2.850072588095552, "percentage": 57.0, "elapsed_time": "0:31:52", "remaining_time": "0:24:02", "throughput": 5558.77, "total_tokens": 10632688}
|
|
{"current_steps": 21600, "total_steps": 37885, "loss": 0.0518, "lr": 9.298472752266615e-07, "epoch": 2.850732479873301, "percentage": 57.01, "elapsed_time": "0:31:53", "remaining_time": "0:24:02", "throughput": 5558.9, "total_tokens": 10634800}
|
|
{"current_steps": 21605, "total_steps": 37885, "loss": 0.0001, "lr": 9.293877196018816e-07, "epoch": 2.851392371651049, "percentage": 57.03, "elapsed_time": "0:31:53", "remaining_time": "0:24:01", "throughput": 5559.14, "total_tokens": 10637104}
|
|
{"current_steps": 21610, "total_steps": 37885, "loss": 0.0002, "lr": 9.289281789640465e-07, "epoch": 2.8520522634287975, "percentage": 57.04, "elapsed_time": "0:31:53", "remaining_time": "0:24:01", "throughput": 5559.36, "total_tokens": 10639408}
|
|
{"current_steps": 21615, "total_steps": 37885, "loss": 0.274, "lr": 9.28468653410691e-07, "epoch": 2.852712155206546, "percentage": 57.05, "elapsed_time": "0:31:54", "remaining_time": "0:24:00", "throughput": 5559.51, "total_tokens": 10641584}
|
|
{"current_steps": 21620, "total_steps": 37885, "loss": 0.0001, "lr": 9.280091430393462e-07, "epoch": 2.8533720469842945, "percentage": 57.07, "elapsed_time": "0:31:54", "remaining_time": "0:24:00", "throughput": 5559.73, "total_tokens": 10643888}
|
|
{"current_steps": 21625, "total_steps": 37885, "loss": 0.0001, "lr": 9.275496479475386e-07, "epoch": 2.854031938762043, "percentage": 57.08, "elapsed_time": "0:31:54", "remaining_time": "0:23:59", "throughput": 5560.11, "total_tokens": 10646512}
|
|
{"current_steps": 21630, "total_steps": 37885, "loss": 0.001, "lr": 9.270901682327945e-07, "epoch": 2.8546918305397915, "percentage": 57.09, "elapsed_time": "0:31:55", "remaining_time": "0:23:59", "throughput": 5560.37, "total_tokens": 10648880}
|
|
{"current_steps": 21635, "total_steps": 37885, "loss": 0.0012, "lr": 9.266307039926333e-07, "epoch": 2.8553517223175398, "percentage": 57.11, "elapsed_time": "0:31:55", "remaining_time": "0:23:58", "throughput": 5560.71, "total_tokens": 10651440}
|
|
{"current_steps": 21640, "total_steps": 37885, "loss": 0.0001, "lr": 9.261712553245747e-07, "epoch": 2.8560116140952885, "percentage": 57.12, "elapsed_time": "0:31:55", "remaining_time": "0:23:58", "throughput": 5561.06, "total_tokens": 10654000}
|
|
{"current_steps": 21645, "total_steps": 37885, "loss": 0.202, "lr": 9.257118223261323e-07, "epoch": 2.8566715058730368, "percentage": 57.13, "elapsed_time": "0:31:56", "remaining_time": "0:23:57", "throughput": 5561.39, "total_tokens": 10656560}
|
|
{"current_steps": 21650, "total_steps": 37885, "loss": 0.0427, "lr": 9.252524050948174e-07, "epoch": 2.8573313976507855, "percentage": 57.15, "elapsed_time": "0:31:56", "remaining_time": "0:23:57", "throughput": 5561.63, "total_tokens": 10658928}
|
|
{"current_steps": 21655, "total_steps": 37885, "loss": 0.0675, "lr": 9.247930037281385e-07, "epoch": 2.8579912894285338, "percentage": 57.16, "elapsed_time": "0:31:56", "remaining_time": "0:23:56", "throughput": 5561.92, "total_tokens": 10661360}
|
|
{"current_steps": 21660, "total_steps": 37885, "loss": 0.0338, "lr": 9.243336183235995e-07, "epoch": 2.858651181206282, "percentage": 57.17, "elapsed_time": "0:31:57", "remaining_time": "0:23:56", "throughput": 5562.27, "total_tokens": 10663920}
|
|
{"current_steps": 21665, "total_steps": 37885, "loss": 0.1112, "lr": 9.238742489787027e-07, "epoch": 2.8593110729840308, "percentage": 57.19, "elapsed_time": "0:31:57", "remaining_time": "0:23:55", "throughput": 5562.58, "total_tokens": 10666416}
|
|
{"current_steps": 21670, "total_steps": 37885, "loss": 0.0335, "lr": 9.234148957909451e-07, "epoch": 2.859970964761779, "percentage": 57.2, "elapsed_time": "0:31:57", "remaining_time": "0:23:55", "throughput": 5562.77, "total_tokens": 10668656}
|
|
{"current_steps": 21675, "total_steps": 37885, "loss": 0.1067, "lr": 9.229555588578211e-07, "epoch": 2.8606308565395278, "percentage": 57.21, "elapsed_time": "0:31:58", "remaining_time": "0:23:54", "throughput": 5563.09, "total_tokens": 10671152}
|
|
{"current_steps": 21680, "total_steps": 37885, "loss": 0.0001, "lr": 9.22496238276822e-07, "epoch": 2.861290748317276, "percentage": 57.23, "elapsed_time": "0:31:58", "remaining_time": "0:23:54", "throughput": 5563.28, "total_tokens": 10673392}
|
|
{"current_steps": 21685, "total_steps": 37885, "loss": 0.0005, "lr": 9.220369341454348e-07, "epoch": 2.8619506400950243, "percentage": 57.24, "elapsed_time": "0:31:58", "remaining_time": "0:23:53", "throughput": 5563.5, "total_tokens": 10675696}
|
|
{"current_steps": 21690, "total_steps": 37885, "loss": 0.0005, "lr": 9.215776465611441e-07, "epoch": 2.8626105318727726, "percentage": 57.25, "elapsed_time": "0:31:59", "remaining_time": "0:23:52", "throughput": 5563.69, "total_tokens": 10677936}
|
|
{"current_steps": 21695, "total_steps": 37885, "loss": 0.0022, "lr": 9.2111837562143e-07, "epoch": 2.8632704236505213, "percentage": 57.27, "elapsed_time": "0:31:59", "remaining_time": "0:23:52", "throughput": 5564.23, "total_tokens": 10680880}
|
|
{"current_steps": 21700, "total_steps": 37885, "loss": 0.0003, "lr": 9.206591214237692e-07, "epoch": 2.86393031542827, "percentage": 57.28, "elapsed_time": "0:31:59", "remaining_time": "0:23:51", "throughput": 5564.39, "total_tokens": 10683056}
|
|
{"current_steps": 21705, "total_steps": 37885, "loss": 0.1564, "lr": 9.201998840656355e-07, "epoch": 2.8645902072060183, "percentage": 57.29, "elapsed_time": "0:32:00", "remaining_time": "0:23:51", "throughput": 5564.69, "total_tokens": 10685552}
|
|
{"current_steps": 21710, "total_steps": 37885, "loss": 0.0002, "lr": 9.197406636444984e-07, "epoch": 2.8652500989837666, "percentage": 57.31, "elapsed_time": "0:32:00", "remaining_time": "0:23:50", "throughput": 5564.84, "total_tokens": 10687728}
|
|
{"current_steps": 21715, "total_steps": 37885, "loss": 0.0323, "lr": 9.192814602578245e-07, "epoch": 2.865909990761515, "percentage": 57.32, "elapsed_time": "0:32:00", "remaining_time": "0:23:50", "throughput": 5565.24, "total_tokens": 10690352}
|
|
{"current_steps": 21720, "total_steps": 37885, "loss": 0.1149, "lr": 9.188222740030759e-07, "epoch": 2.8665698825392636, "percentage": 57.33, "elapsed_time": "0:32:01", "remaining_time": "0:23:49", "throughput": 5565.73, "total_tokens": 10693168}
|
|
{"current_steps": 21725, "total_steps": 37885, "loss": 0.0007, "lr": 9.18363104977712e-07, "epoch": 2.867229774317012, "percentage": 57.34, "elapsed_time": "0:32:01", "remaining_time": "0:23:49", "throughput": 5566.04, "total_tokens": 10695600}
|
|
{"current_steps": 21730, "total_steps": 37885, "loss": 0.0816, "lr": 9.179039532791879e-07, "epoch": 2.8678896660947606, "percentage": 57.36, "elapsed_time": "0:32:01", "remaining_time": "0:23:48", "throughput": 5566.35, "total_tokens": 10698032}
|
|
{"current_steps": 21735, "total_steps": 37885, "loss": 0.0805, "lr": 9.174448190049551e-07, "epoch": 2.868549557872509, "percentage": 57.37, "elapsed_time": "0:32:02", "remaining_time": "0:23:48", "throughput": 5566.57, "total_tokens": 10700272}
|
|
{"current_steps": 21740, "total_steps": 37885, "loss": 0.0385, "lr": 9.169857022524616e-07, "epoch": 2.869209449650257, "percentage": 57.38, "elapsed_time": "0:32:02", "remaining_time": "0:23:47", "throughput": 5566.84, "total_tokens": 10702640}
|
|
{"current_steps": 21745, "total_steps": 37885, "loss": 0.1961, "lr": 9.165266031191518e-07, "epoch": 2.869869341428006, "percentage": 57.4, "elapsed_time": "0:32:02", "remaining_time": "0:23:47", "throughput": 5567.17, "total_tokens": 10705136}
|
|
{"current_steps": 21750, "total_steps": 37885, "loss": 0.0003, "lr": 9.160675217024659e-07, "epoch": 2.870529233205754, "percentage": 57.41, "elapsed_time": "0:32:03", "remaining_time": "0:23:46", "throughput": 5567.61, "total_tokens": 10707824}
|
|
{"current_steps": 21755, "total_steps": 37885, "loss": 0.1185, "lr": 9.156084580998409e-07, "epoch": 2.871189124983503, "percentage": 57.42, "elapsed_time": "0:32:03", "remaining_time": "0:23:46", "throughput": 5567.83, "total_tokens": 10710064}
|
|
{"current_steps": 21760, "total_steps": 37885, "loss": 0.0926, "lr": 9.151494124087093e-07, "epoch": 2.871849016761251, "percentage": 57.44, "elapsed_time": "0:32:03", "remaining_time": "0:23:45", "throughput": 5568.1, "total_tokens": 10712432}
|
|
{"current_steps": 21765, "total_steps": 37885, "loss": 0.0872, "lr": 9.146903847265008e-07, "epoch": 2.8725089085389994, "percentage": 57.45, "elapsed_time": "0:32:04", "remaining_time": "0:23:45", "throughput": 5568.31, "total_tokens": 10714672}
|
|
{"current_steps": 21770, "total_steps": 37885, "loss": 0.0009, "lr": 9.142313751506401e-07, "epoch": 2.873168800316748, "percentage": 57.46, "elapsed_time": "0:32:04", "remaining_time": "0:23:44", "throughput": 5568.53, "total_tokens": 10716912}
|
|
{"current_steps": 21775, "total_steps": 37885, "loss": 0.0009, "lr": 9.137723837785491e-07, "epoch": 2.8738286920944964, "percentage": 57.48, "elapsed_time": "0:32:04", "remaining_time": "0:23:44", "throughput": 5568.96, "total_tokens": 10719600}
|
|
{"current_steps": 21780, "total_steps": 37885, "loss": 0.0015, "lr": 9.133134107076455e-07, "epoch": 2.874488583872245, "percentage": 57.49, "elapsed_time": "0:32:05", "remaining_time": "0:23:43", "throughput": 5569.2, "total_tokens": 10721904}
|
|
{"current_steps": 21785, "total_steps": 37885, "loss": 0.0006, "lr": 9.12854456035342e-07, "epoch": 2.8751484756499934, "percentage": 57.5, "elapsed_time": "0:32:05", "remaining_time": "0:23:43", "throughput": 5569.7, "total_tokens": 10724720}
|
|
{"current_steps": 21790, "total_steps": 37885, "loss": 0.0011, "lr": 9.123955198590498e-07, "epoch": 2.8758083674277417, "percentage": 57.52, "elapsed_time": "0:32:05", "remaining_time": "0:23:42", "throughput": 5570.05, "total_tokens": 10727216}
|
|
{"current_steps": 21795, "total_steps": 37885, "loss": 0.0004, "lr": 9.119366022761736e-07, "epoch": 2.8764682592054904, "percentage": 57.53, "elapsed_time": "0:32:06", "remaining_time": "0:23:42", "throughput": 5570.37, "total_tokens": 10729648}
|
|
{"current_steps": 21800, "total_steps": 37885, "loss": 0.0005, "lr": 9.114777033841162e-07, "epoch": 2.8771281509832387, "percentage": 57.54, "elapsed_time": "0:32:06", "remaining_time": "0:23:41", "throughput": 5570.64, "total_tokens": 10732016}
|
|
{"current_steps": 21805, "total_steps": 37885, "loss": 0.058, "lr": 9.110188232802756e-07, "epoch": 2.8777880427609874, "percentage": 57.56, "elapsed_time": "0:32:06", "remaining_time": "0:23:40", "throughput": 5570.89, "total_tokens": 10734320}
|
|
{"current_steps": 21810, "total_steps": 37885, "loss": 0.0001, "lr": 9.105599620620446e-07, "epoch": 2.8784479345387357, "percentage": 57.57, "elapsed_time": "0:32:07", "remaining_time": "0:23:40", "throughput": 5571.32, "total_tokens": 10737008}
|
|
{"current_steps": 21815, "total_steps": 37885, "loss": 0.0938, "lr": 9.101011198268146e-07, "epoch": 2.879107826316484, "percentage": 57.58, "elapsed_time": "0:32:07", "remaining_time": "0:23:39", "throughput": 5571.72, "total_tokens": 10739632}
|
|
{"current_steps": 21820, "total_steps": 37885, "loss": 0.0004, "lr": 9.096422966719704e-07, "epoch": 2.8797677180942327, "percentage": 57.6, "elapsed_time": "0:32:07", "remaining_time": "0:23:39", "throughput": 5572.18, "total_tokens": 10742384}
|
|
{"current_steps": 21825, "total_steps": 37885, "loss": 0.1499, "lr": 9.091834926948949e-07, "epoch": 2.880427609871981, "percentage": 57.61, "elapsed_time": "0:32:08", "remaining_time": "0:23:38", "throughput": 5572.52, "total_tokens": 10744880}
|
|
{"current_steps": 21830, "total_steps": 37885, "loss": 0.0004, "lr": 9.087247079929654e-07, "epoch": 2.8810875016497297, "percentage": 57.62, "elapsed_time": "0:32:08", "remaining_time": "0:23:38", "throughput": 5572.97, "total_tokens": 10747632}
|
|
{"current_steps": 21835, "total_steps": 37885, "loss": 0.0001, "lr": 9.082659426635554e-07, "epoch": 2.881747393427478, "percentage": 57.63, "elapsed_time": "0:32:08", "remaining_time": "0:23:37", "throughput": 5573.31, "total_tokens": 10750128}
|
|
{"current_steps": 21840, "total_steps": 37885, "loss": 0.1334, "lr": 9.07807196804035e-07, "epoch": 2.8824072852052263, "percentage": 57.65, "elapsed_time": "0:32:09", "remaining_time": "0:23:37", "throughput": 5573.77, "total_tokens": 10752880}
|
|
{"current_steps": 21845, "total_steps": 37885, "loss": 0.1001, "lr": 9.073484705117691e-07, "epoch": 2.8830671769829745, "percentage": 57.66, "elapsed_time": "0:32:09", "remaining_time": "0:23:36", "throughput": 5574.17, "total_tokens": 10755504}
|
|
{"current_steps": 21850, "total_steps": 37885, "loss": 0.0001, "lr": 9.068897638841197e-07, "epoch": 2.8837270687607233, "percentage": 57.67, "elapsed_time": "0:32:09", "remaining_time": "0:23:36", "throughput": 5574.43, "total_tokens": 10757808}
|
|
{"current_steps": 21855, "total_steps": 37885, "loss": 0.0008, "lr": 9.064310770184438e-07, "epoch": 2.8843869605384715, "percentage": 57.69, "elapsed_time": "0:32:10", "remaining_time": "0:23:35", "throughput": 5574.83, "total_tokens": 10760432}
|
|
{"current_steps": 21860, "total_steps": 37885, "loss": 0.0008, "lr": 9.059724100120939e-07, "epoch": 2.8850468523162203, "percentage": 57.7, "elapsed_time": "0:32:10", "remaining_time": "0:23:35", "throughput": 5575.13, "total_tokens": 10762864}
|
|
{"current_steps": 21865, "total_steps": 37885, "loss": 0.0027, "lr": 9.055137629624194e-07, "epoch": 2.8857067440939685, "percentage": 57.71, "elapsed_time": "0:32:10", "remaining_time": "0:23:34", "throughput": 5575.42, "total_tokens": 10765232}
|
|
{"current_steps": 21870, "total_steps": 37885, "loss": 0.0253, "lr": 9.05055135966764e-07, "epoch": 2.886366635871717, "percentage": 57.73, "elapsed_time": "0:32:11", "remaining_time": "0:23:34", "throughput": 5575.7, "total_tokens": 10767600}
|
|
{"current_steps": 21875, "total_steps": 37885, "loss": 0.0001, "lr": 9.04596529122469e-07, "epoch": 2.8870265276494655, "percentage": 57.74, "elapsed_time": "0:32:11", "remaining_time": "0:23:33", "throughput": 5575.85, "total_tokens": 10769712}
|
|
{"current_steps": 21880, "total_steps": 37885, "loss": 0.0, "lr": 9.041379425268697e-07, "epoch": 2.887686419427214, "percentage": 57.75, "elapsed_time": "0:32:11", "remaining_time": "0:23:33", "throughput": 5576.07, "total_tokens": 10771952}
|
|
{"current_steps": 21885, "total_steps": 37885, "loss": 0.0676, "lr": 9.036793762772977e-07, "epoch": 2.8883463112049625, "percentage": 57.77, "elapsed_time": "0:32:12", "remaining_time": "0:23:32", "throughput": 5576.44, "total_tokens": 10774512}
|
|
{"current_steps": 21890, "total_steps": 37885, "loss": 0.0001, "lr": 9.032208304710808e-07, "epoch": 2.889006202982711, "percentage": 57.78, "elapsed_time": "0:32:12", "remaining_time": "0:23:32", "throughput": 5576.76, "total_tokens": 10776944}
|
|
{"current_steps": 21895, "total_steps": 37885, "loss": 0.0007, "lr": 9.027623052055417e-07, "epoch": 2.889666094760459, "percentage": 57.79, "elapsed_time": "0:32:12", "remaining_time": "0:23:31", "throughput": 5577.16, "total_tokens": 10779568}
|
|
{"current_steps": 21900, "total_steps": 37885, "loss": 0.0041, "lr": 9.023038005779992e-07, "epoch": 2.890325986538208, "percentage": 57.81, "elapsed_time": "0:32:13", "remaining_time": "0:23:31", "throughput": 5577.71, "total_tokens": 10782512}
|
|
{"current_steps": 21905, "total_steps": 37885, "loss": 0.0001, "lr": 9.018453166857677e-07, "epoch": 2.890985878315956, "percentage": 57.82, "elapsed_time": "0:32:13", "remaining_time": "0:23:30", "throughput": 5577.95, "total_tokens": 10784816}
|
|
{"current_steps": 21910, "total_steps": 37885, "loss": 0.0001, "lr": 9.013868536261566e-07, "epoch": 2.891645770093705, "percentage": 57.83, "elapsed_time": "0:32:13", "remaining_time": "0:23:29", "throughput": 5578.39, "total_tokens": 10787504}
|
|
{"current_steps": 21915, "total_steps": 37885, "loss": 0.0, "lr": 9.009284114964721e-07, "epoch": 2.892305661871453, "percentage": 57.85, "elapsed_time": "0:32:14", "remaining_time": "0:23:29", "throughput": 5578.7, "total_tokens": 10789936}
|
|
{"current_steps": 21920, "total_steps": 37885, "loss": 0.0004, "lr": 9.004699903940146e-07, "epoch": 2.8929655536492014, "percentage": 57.86, "elapsed_time": "0:32:14", "remaining_time": "0:23:28", "throughput": 5579.07, "total_tokens": 10792496}
|
|
{"current_steps": 21925, "total_steps": 37885, "loss": 0.1505, "lr": 9.000115904160811e-07, "epoch": 2.89362544542695, "percentage": 57.87, "elapsed_time": "0:32:14", "remaining_time": "0:23:28", "throughput": 5579.34, "total_tokens": 10794864}
|
|
{"current_steps": 21930, "total_steps": 37885, "loss": 0.0036, "lr": 8.995532116599636e-07, "epoch": 2.8942853372046984, "percentage": 57.89, "elapsed_time": "0:32:15", "remaining_time": "0:23:27", "throughput": 5579.62, "total_tokens": 10797232}
|
|
{"current_steps": 21935, "total_steps": 37885, "loss": 0.0001, "lr": 8.99094854222949e-07, "epoch": 2.894945228982447, "percentage": 57.9, "elapsed_time": "0:32:15", "remaining_time": "0:23:27", "throughput": 5579.96, "total_tokens": 10799728}
|
|
{"current_steps": 21940, "total_steps": 37885, "loss": 0.0537, "lr": 8.986365182023212e-07, "epoch": 2.8956051207601954, "percentage": 57.91, "elapsed_time": "0:32:15", "remaining_time": "0:23:26", "throughput": 5580.3, "total_tokens": 10802224}
|
|
{"current_steps": 21945, "total_steps": 37885, "loss": 0.0036, "lr": 8.981782036953583e-07, "epoch": 2.8962650125379437, "percentage": 57.93, "elapsed_time": "0:32:16", "remaining_time": "0:23:26", "throughput": 5580.58, "total_tokens": 10804592}
|
|
{"current_steps": 21950, "total_steps": 37885, "loss": 0.0397, "lr": 8.977199107993345e-07, "epoch": 2.8969249043156924, "percentage": 57.94, "elapsed_time": "0:32:16", "remaining_time": "0:23:25", "throughput": 5580.81, "total_tokens": 10806896}
|
|
{"current_steps": 21955, "total_steps": 37885, "loss": 0.0016, "lr": 8.972616396115194e-07, "epoch": 2.8975847960934407, "percentage": 57.95, "elapsed_time": "0:32:16", "remaining_time": "0:23:25", "throughput": 5581.12, "total_tokens": 10809328}
|
|
{"current_steps": 21960, "total_steps": 37885, "loss": 0.0631, "lr": 8.968033902291764e-07, "epoch": 2.8982446878711894, "percentage": 57.96, "elapsed_time": "0:32:17", "remaining_time": "0:23:24", "throughput": 5581.51, "total_tokens": 10811952}
|
|
{"current_steps": 21965, "total_steps": 37885, "loss": 0.0689, "lr": 8.963451627495673e-07, "epoch": 2.8989045796489377, "percentage": 57.98, "elapsed_time": "0:32:17", "remaining_time": "0:23:24", "throughput": 5581.76, "total_tokens": 10814256}
|
|
{"current_steps": 21970, "total_steps": 37885, "loss": 0.0617, "lr": 8.95886957269946e-07, "epoch": 2.899564471426686, "percentage": 57.99, "elapsed_time": "0:32:17", "remaining_time": "0:23:23", "throughput": 5582.03, "total_tokens": 10816624}
|
|
{"current_steps": 21975, "total_steps": 37885, "loss": 0.0001, "lr": 8.954287738875649e-07, "epoch": 2.900224363204434, "percentage": 58.0, "elapsed_time": "0:32:18", "remaining_time": "0:23:23", "throughput": 5582.4, "total_tokens": 10819184}
|
|
{"current_steps": 21980, "total_steps": 37885, "loss": 0.0006, "lr": 8.94970612699669e-07, "epoch": 2.900884254982183, "percentage": 58.02, "elapsed_time": "0:32:18", "remaining_time": "0:23:22", "throughput": 5582.71, "total_tokens": 10821616}
|
|
{"current_steps": 21985, "total_steps": 37885, "loss": 0.0308, "lr": 8.945124738034998e-07, "epoch": 2.901544146759931, "percentage": 58.03, "elapsed_time": "0:32:18", "remaining_time": "0:23:22", "throughput": 5582.96, "total_tokens": 10823920}
|
|
{"current_steps": 21990, "total_steps": 37885, "loss": 0.0003, "lr": 8.940543572962944e-07, "epoch": 2.90220403853768, "percentage": 58.04, "elapsed_time": "0:32:19", "remaining_time": "0:23:21", "throughput": 5583.25, "total_tokens": 10826288}
|
|
{"current_steps": 21995, "total_steps": 37885, "loss": 0.0, "lr": 8.93596263275284e-07, "epoch": 2.902863930315428, "percentage": 58.06, "elapsed_time": "0:32:19", "remaining_time": "0:23:21", "throughput": 5583.43, "total_tokens": 10828464}
|
|
{"current_steps": 22000, "total_steps": 37885, "loss": 0.1661, "lr": 8.931381918376969e-07, "epoch": 2.9035238220931765, "percentage": 58.07, "elapsed_time": "0:32:19", "remaining_time": "0:23:20", "throughput": 5583.78, "total_tokens": 10830960}
|
|
{"current_steps": 22005, "total_steps": 37885, "loss": 0.0767, "lr": 8.926801430807545e-07, "epoch": 2.904183713870925, "percentage": 58.08, "elapsed_time": "0:32:20", "remaining_time": "0:23:20", "throughput": 5583.96, "total_tokens": 10833136}
|
|
{"current_steps": 22010, "total_steps": 37885, "loss": 0.0001, "lr": 8.922221171016744e-07, "epoch": 2.9048436056486735, "percentage": 58.1, "elapsed_time": "0:32:20", "remaining_time": "0:23:19", "throughput": 5584.3, "total_tokens": 10835632}
|
|
{"current_steps": 22015, "total_steps": 37885, "loss": 0.0003, "lr": 8.917641139976697e-07, "epoch": 2.905503497426422, "percentage": 58.11, "elapsed_time": "0:32:20", "remaining_time": "0:23:18", "throughput": 5584.61, "total_tokens": 10838064}
|
|
{"current_steps": 22020, "total_steps": 37885, "loss": 0.0798, "lr": 8.913061338659478e-07, "epoch": 2.9061633892041705, "percentage": 58.12, "elapsed_time": "0:32:21", "remaining_time": "0:23:18", "throughput": 5584.85, "total_tokens": 10840368}
|
|
{"current_steps": 22025, "total_steps": 37885, "loss": 0.0282, "lr": 8.908481768037119e-07, "epoch": 2.9068232809819188, "percentage": 58.14, "elapsed_time": "0:32:21", "remaining_time": "0:23:17", "throughput": 5585.17, "total_tokens": 10842800}
|
|
{"current_steps": 22030, "total_steps": 37885, "loss": 0.0005, "lr": 8.903902429081603e-07, "epoch": 2.9074831727596675, "percentage": 58.15, "elapsed_time": "0:32:21", "remaining_time": "0:23:17", "throughput": 5585.56, "total_tokens": 10845424}
|
|
{"current_steps": 22035, "total_steps": 37885, "loss": 0.0004, "lr": 8.899323322764857e-07, "epoch": 2.908143064537416, "percentage": 58.16, "elapsed_time": "0:32:22", "remaining_time": "0:23:16", "throughput": 5585.9, "total_tokens": 10847920}
|
|
{"current_steps": 22040, "total_steps": 37885, "loss": 0.0001, "lr": 8.894744450058767e-07, "epoch": 2.9088029563151645, "percentage": 58.18, "elapsed_time": "0:32:22", "remaining_time": "0:23:16", "throughput": 5586.12, "total_tokens": 10850160}
|
|
{"current_steps": 22045, "total_steps": 37885, "loss": 0.0661, "lr": 8.890165811935161e-07, "epoch": 2.909462848092913, "percentage": 58.19, "elapsed_time": "0:32:22", "remaining_time": "0:23:15", "throughput": 5586.33, "total_tokens": 10852400}
|
|
{"current_steps": 22050, "total_steps": 37885, "loss": 0.1177, "lr": 8.885587409365826e-07, "epoch": 2.910122739870661, "percentage": 58.2, "elapsed_time": "0:32:23", "remaining_time": "0:23:15", "throughput": 5586.63, "total_tokens": 10854832}
|
|
{"current_steps": 22055, "total_steps": 37885, "loss": 0.0018, "lr": 8.881009243322493e-07, "epoch": 2.91078263164841, "percentage": 58.22, "elapsed_time": "0:32:23", "remaining_time": "0:23:14", "throughput": 5587.04, "total_tokens": 10857456}
|
|
{"current_steps": 22060, "total_steps": 37885, "loss": 0.0011, "lr": 8.876431314776847e-07, "epoch": 2.911442523426158, "percentage": 58.23, "elapsed_time": "0:32:23", "remaining_time": "0:23:14", "throughput": 5587.43, "total_tokens": 10860080}
|
|
{"current_steps": 22065, "total_steps": 37885, "loss": 0.0001, "lr": 8.871853624700517e-07, "epoch": 2.912102415203907, "percentage": 58.24, "elapsed_time": "0:32:23", "remaining_time": "0:23:13", "throughput": 5587.81, "total_tokens": 10862640}
|
|
{"current_steps": 22070, "total_steps": 37885, "loss": 0.0494, "lr": 8.867276174065085e-07, "epoch": 2.912762306981655, "percentage": 58.26, "elapsed_time": "0:32:24", "remaining_time": "0:23:13", "throughput": 5588.03, "total_tokens": 10864880}
|
|
{"current_steps": 22075, "total_steps": 37885, "loss": 0.0501, "lr": 8.862698963842084e-07, "epoch": 2.9134221987594033, "percentage": 58.27, "elapsed_time": "0:32:24", "remaining_time": "0:23:12", "throughput": 5588.21, "total_tokens": 10867056}
|
|
{"current_steps": 22080, "total_steps": 37885, "loss": 0.0003, "lr": 8.85812199500299e-07, "epoch": 2.914082090537152, "percentage": 58.28, "elapsed_time": "0:32:24", "remaining_time": "0:23:12", "throughput": 5588.49, "total_tokens": 10869424}
|
|
{"current_steps": 22085, "total_steps": 37885, "loss": 0.0876, "lr": 8.853545268519235e-07, "epoch": 2.9147419823149003, "percentage": 58.29, "elapsed_time": "0:32:25", "remaining_time": "0:23:11", "throughput": 5588.86, "total_tokens": 10871984}
|
|
{"current_steps": 22090, "total_steps": 37885, "loss": 0.0002, "lr": 8.848968785362196e-07, "epoch": 2.915401874092649, "percentage": 58.31, "elapsed_time": "0:32:25", "remaining_time": "0:23:11", "throughput": 5589.13, "total_tokens": 10874352}
|
|
{"current_steps": 22095, "total_steps": 37885, "loss": 0.0004, "lr": 8.844392546503195e-07, "epoch": 2.9160617658703973, "percentage": 58.32, "elapsed_time": "0:32:25", "remaining_time": "0:23:10", "throughput": 5589.35, "total_tokens": 10876592}
|
|
{"current_steps": 22100, "total_steps": 37885, "loss": 0.0537, "lr": 8.83981655291351e-07, "epoch": 2.9167216576481456, "percentage": 58.33, "elapsed_time": "0:32:26", "remaining_time": "0:23:10", "throughput": 5589.66, "total_tokens": 10879024}
|
|
{"current_steps": 22105, "total_steps": 37885, "loss": 0.0757, "lr": 8.835240805564358e-07, "epoch": 2.917381549425894, "percentage": 58.35, "elapsed_time": "0:32:26", "remaining_time": "0:23:09", "throughput": 5590.02, "total_tokens": 10881584}
|
|
{"current_steps": 22110, "total_steps": 37885, "loss": 0.0, "lr": 8.830665305426914e-07, "epoch": 2.9180414412036426, "percentage": 58.36, "elapsed_time": "0:32:26", "remaining_time": "0:23:09", "throughput": 5590.39, "total_tokens": 10884144}
|
|
{"current_steps": 22115, "total_steps": 37885, "loss": 0.1362, "lr": 8.826090053472291e-07, "epoch": 2.918701332981391, "percentage": 58.37, "elapsed_time": "0:32:27", "remaining_time": "0:23:08", "throughput": 5590.82, "total_tokens": 10886832}
|
|
{"current_steps": 22120, "total_steps": 37885, "loss": 0.0011, "lr": 8.821515050671547e-07, "epoch": 2.9193612247591396, "percentage": 58.39, "elapsed_time": "0:32:27", "remaining_time": "0:23:08", "throughput": 5590.94, "total_tokens": 10888880}
|
|
{"current_steps": 22125, "total_steps": 37885, "loss": 0.0005, "lr": 8.816940297995705e-07, "epoch": 2.920021116536888, "percentage": 58.4, "elapsed_time": "0:32:27", "remaining_time": "0:23:07", "throughput": 5591.22, "total_tokens": 10891248}
|
|
{"current_steps": 22130, "total_steps": 37885, "loss": 0.0001, "lr": 8.812365796415715e-07, "epoch": 2.920681008314636, "percentage": 58.41, "elapsed_time": "0:32:28", "remaining_time": "0:23:07", "throughput": 5591.48, "total_tokens": 10893552}
|
|
{"current_steps": 22135, "total_steps": 37885, "loss": 0.0004, "lr": 8.807791546902488e-07, "epoch": 2.921340900092385, "percentage": 58.43, "elapsed_time": "0:32:28", "remaining_time": "0:23:06", "throughput": 5591.76, "total_tokens": 10895920}
|
|
{"current_steps": 22140, "total_steps": 37885, "loss": 0.0001, "lr": 8.803217550426873e-07, "epoch": 2.922000791870133, "percentage": 58.44, "elapsed_time": "0:32:28", "remaining_time": "0:23:05", "throughput": 5592.17, "total_tokens": 10898608}
|
|
{"current_steps": 22145, "total_steps": 37885, "loss": 0.0001, "lr": 8.79864380795966e-07, "epoch": 2.922660683647882, "percentage": 58.45, "elapsed_time": "0:32:29", "remaining_time": "0:23:05", "throughput": 5592.48, "total_tokens": 10901040}
|
|
{"current_steps": 22150, "total_steps": 37885, "loss": 0.094, "lr": 8.794070320471605e-07, "epoch": 2.92332057542563, "percentage": 58.47, "elapsed_time": "0:32:29", "remaining_time": "0:23:04", "throughput": 5592.81, "total_tokens": 10903536}
|
|
{"current_steps": 22155, "total_steps": 37885, "loss": 0.1084, "lr": 8.789497088933386e-07, "epoch": 2.9239804672033785, "percentage": 58.48, "elapsed_time": "0:32:29", "remaining_time": "0:23:04", "throughput": 5593.11, "total_tokens": 10905968}
|
|
{"current_steps": 22160, "total_steps": 37885, "loss": 0.1881, "lr": 8.78492411431565e-07, "epoch": 2.924640358981127, "percentage": 58.49, "elapsed_time": "0:32:30", "remaining_time": "0:23:03", "throughput": 5593.54, "total_tokens": 10908656}
|
|
{"current_steps": 22165, "total_steps": 37885, "loss": 0.0003, "lr": 8.78035139758897e-07, "epoch": 2.9253002507588755, "percentage": 58.51, "elapsed_time": "0:32:30", "remaining_time": "0:23:03", "throughput": 5593.78, "total_tokens": 10910960}
|
|
{"current_steps": 22170, "total_steps": 37885, "loss": 0.0003, "lr": 8.775778939723874e-07, "epoch": 2.925960142536624, "percentage": 58.52, "elapsed_time": "0:32:30", "remaining_time": "0:23:02", "throughput": 5594.17, "total_tokens": 10913584}
|
|
{"current_steps": 22175, "total_steps": 37885, "loss": 0.0009, "lr": 8.771206741690832e-07, "epoch": 2.9266200343143725, "percentage": 58.53, "elapsed_time": "0:32:31", "remaining_time": "0:23:02", "throughput": 5594.57, "total_tokens": 10916208}
|
|
{"current_steps": 22180, "total_steps": 37885, "loss": 0.0003, "lr": 8.76663480446026e-07, "epoch": 2.9272799260921207, "percentage": 58.55, "elapsed_time": "0:32:31", "remaining_time": "0:23:01", "throughput": 5594.9, "total_tokens": 10918704}
|
|
{"current_steps": 22185, "total_steps": 37885, "loss": 0.0007, "lr": 8.762063129002521e-07, "epoch": 2.9279398178698695, "percentage": 58.56, "elapsed_time": "0:32:31", "remaining_time": "0:23:01", "throughput": 5595.23, "total_tokens": 10921200}
|
|
{"current_steps": 22190, "total_steps": 37885, "loss": 0.0567, "lr": 8.757491716287919e-07, "epoch": 2.9285997096476177, "percentage": 58.57, "elapsed_time": "0:32:32", "remaining_time": "0:23:00", "throughput": 5595.51, "total_tokens": 10923568}
|
|
{"current_steps": 22195, "total_steps": 37885, "loss": 0.0356, "lr": 8.752920567286701e-07, "epoch": 2.9292596014253665, "percentage": 58.59, "elapsed_time": "0:32:32", "remaining_time": "0:23:00", "throughput": 5595.76, "total_tokens": 10925872}
|
|
{"current_steps": 22200, "total_steps": 37885, "loss": 0.0273, "lr": 8.748349682969063e-07, "epoch": 2.9299194932031147, "percentage": 58.6, "elapsed_time": "0:32:32", "remaining_time": "0:22:59", "throughput": 5596.15, "total_tokens": 10928496}
|
|
{"current_steps": 22205, "total_steps": 37885, "loss": 0.0001, "lr": 8.743779064305139e-07, "epoch": 2.930579384980863, "percentage": 58.61, "elapsed_time": "0:32:33", "remaining_time": "0:22:59", "throughput": 5596.33, "total_tokens": 10930672}
|
|
{"current_steps": 22210, "total_steps": 37885, "loss": 0.0002, "lr": 8.739208712265015e-07, "epoch": 2.9312392767586117, "percentage": 58.62, "elapsed_time": "0:32:33", "remaining_time": "0:22:58", "throughput": 5596.66, "total_tokens": 10933168}
|
|
{"current_steps": 22215, "total_steps": 37885, "loss": 0.1551, "lr": 8.734638627818711e-07, "epoch": 2.93189916853636, "percentage": 58.64, "elapsed_time": "0:32:33", "remaining_time": "0:22:58", "throughput": 5596.9, "total_tokens": 10935472}
|
|
{"current_steps": 22220, "total_steps": 37885, "loss": 0.1055, "lr": 8.730068811936194e-07, "epoch": 2.9325590603141087, "percentage": 58.65, "elapsed_time": "0:32:34", "remaining_time": "0:22:57", "throughput": 5597.37, "total_tokens": 10938288}
|
|
{"current_steps": 22225, "total_steps": 37885, "loss": 0.052, "lr": 8.725499265587376e-07, "epoch": 2.933218952091857, "percentage": 58.66, "elapsed_time": "0:32:34", "remaining_time": "0:22:57", "throughput": 5597.61, "total_tokens": 10940592}
|
|
{"current_steps": 22230, "total_steps": 37885, "loss": 0.0675, "lr": 8.720929989742108e-07, "epoch": 2.9338788438696053, "percentage": 58.68, "elapsed_time": "0:32:34", "remaining_time": "0:22:56", "throughput": 5597.82, "total_tokens": 10942832}
|
|
{"current_steps": 22235, "total_steps": 37885, "loss": 0.0006, "lr": 8.71636098537019e-07, "epoch": 2.9345387356473536, "percentage": 58.69, "elapsed_time": "0:32:35", "remaining_time": "0:22:56", "throughput": 5598.21, "total_tokens": 10945456}
|
|
{"current_steps": 22240, "total_steps": 37885, "loss": 0.001, "lr": 8.711792253441358e-07, "epoch": 2.9351986274251023, "percentage": 58.7, "elapsed_time": "0:32:35", "remaining_time": "0:22:55", "throughput": 5598.54, "total_tokens": 10947952}
|
|
{"current_steps": 22245, "total_steps": 37885, "loss": 0.0002, "lr": 8.70722379492529e-07, "epoch": 2.9358585192028506, "percentage": 58.72, "elapsed_time": "0:32:35", "remaining_time": "0:22:55", "throughput": 5598.9, "total_tokens": 10950512}
|
|
{"current_steps": 22250, "total_steps": 37885, "loss": 0.0006, "lr": 8.70265561079161e-07, "epoch": 2.9365184109805993, "percentage": 58.73, "elapsed_time": "0:32:36", "remaining_time": "0:22:54", "throughput": 5599.29, "total_tokens": 10953136}
|
|
{"current_steps": 22255, "total_steps": 37885, "loss": 0.0665, "lr": 8.698087702009882e-07, "epoch": 2.9371783027583476, "percentage": 58.74, "elapsed_time": "0:32:36", "remaining_time": "0:22:54", "throughput": 5599.54, "total_tokens": 10955440}
|
|
{"current_steps": 22260, "total_steps": 37885, "loss": 0.0892, "lr": 8.693520069549612e-07, "epoch": 2.937838194536096, "percentage": 58.76, "elapsed_time": "0:32:36", "remaining_time": "0:22:53", "throughput": 5599.94, "total_tokens": 10958064}
|
|
{"current_steps": 22265, "total_steps": 37885, "loss": 0.0043, "lr": 8.688952714380247e-07, "epoch": 2.9384980863138446, "percentage": 58.77, "elapsed_time": "0:32:37", "remaining_time": "0:22:53", "throughput": 5600.33, "total_tokens": 10960688}
|
|
{"current_steps": 22270, "total_steps": 37885, "loss": 0.0382, "lr": 8.684385637471173e-07, "epoch": 2.939157978091593, "percentage": 58.78, "elapsed_time": "0:32:37", "remaining_time": "0:22:52", "throughput": 5600.64, "total_tokens": 10963120}
|
|
{"current_steps": 22275, "total_steps": 37885, "loss": 0.3035, "lr": 8.679818839791721e-07, "epoch": 2.9398178698693416, "percentage": 58.8, "elapsed_time": "0:32:37", "remaining_time": "0:22:52", "throughput": 5600.96, "total_tokens": 10965616}
|
|
{"current_steps": 22280, "total_steps": 37885, "loss": 0.0004, "lr": 8.675252322311161e-07, "epoch": 2.94047776164709, "percentage": 58.81, "elapsed_time": "0:32:38", "remaining_time": "0:22:51", "throughput": 5601.3, "total_tokens": 10968112}
|
|
{"current_steps": 22285, "total_steps": 37885, "loss": 0.0023, "lr": 8.670686085998702e-07, "epoch": 2.941137653424838, "percentage": 58.82, "elapsed_time": "0:32:38", "remaining_time": "0:22:50", "throughput": 5601.61, "total_tokens": 10970544}
|
|
{"current_steps": 22290, "total_steps": 37885, "loss": 0.0015, "lr": 8.666120131823499e-07, "epoch": 2.941797545202587, "percentage": 58.84, "elapsed_time": "0:32:38", "remaining_time": "0:22:50", "throughput": 5601.93, "total_tokens": 10973040}
|
|
{"current_steps": 22295, "total_steps": 37885, "loss": 0.1724, "lr": 8.661554460754631e-07, "epoch": 2.942457436980335, "percentage": 58.85, "elapsed_time": "0:32:39", "remaining_time": "0:22:49", "throughput": 5602.38, "total_tokens": 10975792}
|
|
{"current_steps": 22300, "total_steps": 37885, "loss": 0.1864, "lr": 8.656989073761144e-07, "epoch": 2.943117328758084, "percentage": 58.86, "elapsed_time": "0:32:39", "remaining_time": "0:22:49", "throughput": 5602.77, "total_tokens": 10978416}
|
|
{"current_steps": 22305, "total_steps": 37885, "loss": 0.0476, "lr": 8.652423971811992e-07, "epoch": 2.943777220535832, "percentage": 58.88, "elapsed_time": "0:32:39", "remaining_time": "0:22:48", "throughput": 5603.29, "total_tokens": 10981296}
|
|
{"current_steps": 22310, "total_steps": 37885, "loss": 0.1013, "lr": 8.647859155876103e-07, "epoch": 2.9444371123135804, "percentage": 58.89, "elapsed_time": "0:32:40", "remaining_time": "0:22:48", "throughput": 5603.59, "total_tokens": 10983728}
|
|
{"current_steps": 22315, "total_steps": 37885, "loss": 0.0019, "lr": 8.643294626922314e-07, "epoch": 2.945097004091329, "percentage": 58.9, "elapsed_time": "0:32:40", "remaining_time": "0:22:47", "throughput": 5603.98, "total_tokens": 10986352}
|
|
{"current_steps": 22320, "total_steps": 37885, "loss": 0.0014, "lr": 8.638730385919411e-07, "epoch": 2.9457568958690774, "percentage": 58.92, "elapsed_time": "0:32:40", "remaining_time": "0:22:47", "throughput": 5604.4, "total_tokens": 10989040}
|
|
{"current_steps": 22325, "total_steps": 37885, "loss": 0.0004, "lr": 8.634166433836132e-07, "epoch": 2.946416787646826, "percentage": 58.93, "elapsed_time": "0:32:41", "remaining_time": "0:22:46", "throughput": 5604.65, "total_tokens": 10991344}
|
|
{"current_steps": 22330, "total_steps": 37885, "loss": 0.0746, "lr": 8.629602771641131e-07, "epoch": 2.9470766794245744, "percentage": 58.94, "elapsed_time": "0:32:41", "remaining_time": "0:22:46", "throughput": 5604.91, "total_tokens": 10993712}
|
|
{"current_steps": 22335, "total_steps": 37885, "loss": 0.1069, "lr": 8.625039400303025e-07, "epoch": 2.9477365712023227, "percentage": 58.95, "elapsed_time": "0:32:41", "remaining_time": "0:22:45", "throughput": 5605.15, "total_tokens": 10996016}
|
|
{"current_steps": 22340, "total_steps": 37885, "loss": 0.1457, "lr": 8.620476320790346e-07, "epoch": 2.9483964629800714, "percentage": 58.97, "elapsed_time": "0:32:42", "remaining_time": "0:22:45", "throughput": 5605.49, "total_tokens": 10998512}
|
|
{"current_steps": 22345, "total_steps": 37885, "loss": 0.0385, "lr": 8.615913534071577e-07, "epoch": 2.9490563547578197, "percentage": 58.98, "elapsed_time": "0:32:42", "remaining_time": "0:22:44", "throughput": 5605.9, "total_tokens": 11001200}
|
|
{"current_steps": 22350, "total_steps": 37885, "loss": 0.0011, "lr": 8.61135104111514e-07, "epoch": 2.9497162465355684, "percentage": 58.99, "elapsed_time": "0:32:42", "remaining_time": "0:22:44", "throughput": 5606.19, "total_tokens": 11003632}
|
|
{"current_steps": 22355, "total_steps": 37885, "loss": 0.0005, "lr": 8.606788842889387e-07, "epoch": 2.9503761383133167, "percentage": 59.01, "elapsed_time": "0:32:43", "remaining_time": "0:22:43", "throughput": 5606.43, "total_tokens": 11005936}
|
|
{"current_steps": 22360, "total_steps": 37885, "loss": 0.0006, "lr": 8.602226940362615e-07, "epoch": 2.951036030091065, "percentage": 59.02, "elapsed_time": "0:32:43", "remaining_time": "0:22:43", "throughput": 5606.85, "total_tokens": 11008624}
|
|
{"current_steps": 22365, "total_steps": 37885, "loss": 0.0893, "lr": 8.59766533450305e-07, "epoch": 2.9516959218688132, "percentage": 59.03, "elapsed_time": "0:32:43", "remaining_time": "0:22:42", "throughput": 5607.12, "total_tokens": 11010992}
|
|
{"current_steps": 22370, "total_steps": 37885, "loss": 0.2024, "lr": 8.593104026278866e-07, "epoch": 2.952355813646562, "percentage": 59.05, "elapsed_time": "0:32:44", "remaining_time": "0:22:42", "throughput": 5607.54, "total_tokens": 11013680}
|
|
{"current_steps": 22375, "total_steps": 37885, "loss": 0.0256, "lr": 8.588543016658164e-07, "epoch": 2.9530157054243107, "percentage": 59.06, "elapsed_time": "0:32:44", "remaining_time": "0:22:41", "throughput": 5607.78, "total_tokens": 11015984}
|
|
{"current_steps": 22380, "total_steps": 37885, "loss": 0.0559, "lr": 8.583982306608984e-07, "epoch": 2.953675597202059, "percentage": 59.07, "elapsed_time": "0:32:44", "remaining_time": "0:22:41", "throughput": 5607.98, "total_tokens": 11018224}
|
|
{"current_steps": 22385, "total_steps": 37885, "loss": 0.0007, "lr": 8.579421897099307e-07, "epoch": 2.9543354889798072, "percentage": 59.09, "elapsed_time": "0:32:45", "remaining_time": "0:22:40", "throughput": 5608.21, "total_tokens": 11020528}
|
|
{"current_steps": 22390, "total_steps": 37885, "loss": 0.0417, "lr": 8.574861789097043e-07, "epoch": 2.9549953807575555, "percentage": 59.1, "elapsed_time": "0:32:45", "remaining_time": "0:22:40", "throughput": 5608.57, "total_tokens": 11023088}
|
|
{"current_steps": 22395, "total_steps": 37885, "loss": 0.0681, "lr": 8.570301983570048e-07, "epoch": 2.9556552725353042, "percentage": 59.11, "elapsed_time": "0:32:45", "remaining_time": "0:22:39", "throughput": 5609.02, "total_tokens": 11025840}
|
|
{"current_steps": 22400, "total_steps": 37885, "loss": 0.0693, "lr": 8.565742481486102e-07, "epoch": 2.9563151643130525, "percentage": 59.13, "elapsed_time": "0:32:46", "remaining_time": "0:22:39", "throughput": 5609.19, "total_tokens": 11028016}
|
|
{"current_steps": 22405, "total_steps": 37885, "loss": 0.0002, "lr": 8.561183283812928e-07, "epoch": 2.9569750560908012, "percentage": 59.14, "elapsed_time": "0:32:46", "remaining_time": "0:22:38", "throughput": 5609.37, "total_tokens": 11030192}
|
|
{"current_steps": 22410, "total_steps": 37885, "loss": 0.0005, "lr": 8.556624391518182e-07, "epoch": 2.9576349478685495, "percentage": 59.15, "elapsed_time": "0:32:46", "remaining_time": "0:22:38", "throughput": 5609.7, "total_tokens": 11032688}
|
|
{"current_steps": 22415, "total_steps": 37885, "loss": 0.0344, "lr": 8.552065805569457e-07, "epoch": 2.958294839646298, "percentage": 59.17, "elapsed_time": "0:32:47", "remaining_time": "0:22:37", "throughput": 5610.06, "total_tokens": 11035248}
|
|
{"current_steps": 22420, "total_steps": 37885, "loss": 0.0848, "lr": 8.547507526934281e-07, "epoch": 2.9589547314240465, "percentage": 59.18, "elapsed_time": "0:32:47", "remaining_time": "0:22:37", "throughput": 5610.4, "total_tokens": 11037808}
|
|
{"current_steps": 22425, "total_steps": 37885, "loss": 0.0006, "lr": 8.542949556580114e-07, "epoch": 2.959614623201795, "percentage": 59.19, "elapsed_time": "0:32:47", "remaining_time": "0:22:36", "throughput": 5610.57, "total_tokens": 11039984}
|
|
{"current_steps": 22430, "total_steps": 37885, "loss": 0.0662, "lr": 8.538391895474353e-07, "epoch": 2.9602745149795435, "percentage": 59.21, "elapsed_time": "0:32:48", "remaining_time": "0:22:36", "throughput": 5610.94, "total_tokens": 11042544}
|
|
{"current_steps": 22435, "total_steps": 37885, "loss": 0.0357, "lr": 8.533834544584327e-07, "epoch": 2.960934406757292, "percentage": 59.22, "elapsed_time": "0:32:48", "remaining_time": "0:22:35", "throughput": 5611.33, "total_tokens": 11045168}
|
|
{"current_steps": 22440, "total_steps": 37885, "loss": 0.043, "lr": 8.529277504877301e-07, "epoch": 2.96159429853504, "percentage": 59.23, "elapsed_time": "0:32:48", "remaining_time": "0:22:35", "throughput": 5611.72, "total_tokens": 11047792}
|
|
{"current_steps": 22445, "total_steps": 37885, "loss": 0.1004, "lr": 8.524720777320476e-07, "epoch": 2.962254190312789, "percentage": 59.25, "elapsed_time": "0:32:49", "remaining_time": "0:22:34", "throughput": 5611.98, "total_tokens": 11050160}
|
|
{"current_steps": 22450, "total_steps": 37885, "loss": 0.0013, "lr": 8.520164362880986e-07, "epoch": 2.962914082090537, "percentage": 59.26, "elapsed_time": "0:32:49", "remaining_time": "0:22:33", "throughput": 5612.35, "total_tokens": 11052720}
|
|
{"current_steps": 22455, "total_steps": 37885, "loss": 0.0006, "lr": 8.515608262525886e-07, "epoch": 2.963573973868286, "percentage": 59.27, "elapsed_time": "0:32:49", "remaining_time": "0:22:33", "throughput": 5612.55, "total_tokens": 11054960}
|
|
{"current_steps": 22460, "total_steps": 37885, "loss": 0.0002, "lr": 8.511052477222189e-07, "epoch": 2.964233865646034, "percentage": 59.28, "elapsed_time": "0:32:50", "remaining_time": "0:22:32", "throughput": 5613.0, "total_tokens": 11057712}
|
|
{"current_steps": 22465, "total_steps": 37885, "loss": 0.0526, "lr": 8.50649700793682e-07, "epoch": 2.9648937574237824, "percentage": 59.3, "elapsed_time": "0:32:50", "remaining_time": "0:22:32", "throughput": 5613.42, "total_tokens": 11060400}
|
|
{"current_steps": 22470, "total_steps": 37885, "loss": 0.0001, "lr": 8.501941855636645e-07, "epoch": 2.965553649201531, "percentage": 59.31, "elapsed_time": "0:32:50", "remaining_time": "0:22:31", "throughput": 5613.87, "total_tokens": 11063152}
|
|
{"current_steps": 22475, "total_steps": 37885, "loss": 0.0368, "lr": 8.497387021288468e-07, "epoch": 2.9662135409792794, "percentage": 59.32, "elapsed_time": "0:32:51", "remaining_time": "0:22:31", "throughput": 5614.31, "total_tokens": 11065904}
|
|
{"current_steps": 22480, "total_steps": 37885, "loss": 0.0007, "lr": 8.492832505859007e-07, "epoch": 2.966873432757028, "percentage": 59.34, "elapsed_time": "0:32:51", "remaining_time": "0:22:30", "throughput": 5614.56, "total_tokens": 11068272}
|
|
{"current_steps": 22485, "total_steps": 37885, "loss": 0.0004, "lr": 8.488278310314939e-07, "epoch": 2.9675333245347764, "percentage": 59.35, "elapsed_time": "0:32:51", "remaining_time": "0:22:30", "throughput": 5614.83, "total_tokens": 11070640}
|
|
{"current_steps": 22490, "total_steps": 37885, "loss": 0.0015, "lr": 8.483724435622847e-07, "epoch": 2.9681932163125246, "percentage": 59.36, "elapsed_time": "0:32:52", "remaining_time": "0:22:29", "throughput": 5615.16, "total_tokens": 11073136}
|
|
{"current_steps": 22495, "total_steps": 37885, "loss": 0.0001, "lr": 8.479170882749269e-07, "epoch": 2.968853108090273, "percentage": 59.38, "elapsed_time": "0:32:52", "remaining_time": "0:22:29", "throughput": 5615.6, "total_tokens": 11075888}
|
|
{"current_steps": 22500, "total_steps": 37885, "loss": 0.0995, "lr": 8.474617652660657e-07, "epoch": 2.9695129998680216, "percentage": 59.39, "elapsed_time": "0:32:52", "remaining_time": "0:22:28", "throughput": 5615.97, "total_tokens": 11078448}
|
|
{"current_steps": 22505, "total_steps": 37885, "loss": 0.0001, "lr": 8.470064746323399e-07, "epoch": 2.9701728916457704, "percentage": 59.4, "elapsed_time": "0:32:52", "remaining_time": "0:22:28", "throughput": 5616.36, "total_tokens": 11081072}
|
|
{"current_steps": 22510, "total_steps": 37885, "loss": 0.0007, "lr": 8.465512164703823e-07, "epoch": 2.9708327834235186, "percentage": 59.42, "elapsed_time": "0:32:53", "remaining_time": "0:22:27", "throughput": 5616.65, "total_tokens": 11083504}
|
|
{"current_steps": 22515, "total_steps": 37885, "loss": 0.0006, "lr": 8.460959908768173e-07, "epoch": 2.971492675201267, "percentage": 59.43, "elapsed_time": "0:32:53", "remaining_time": "0:22:27", "throughput": 5616.92, "total_tokens": 11085872}
|
|
{"current_steps": 22520, "total_steps": 37885, "loss": 0.0751, "lr": 8.456407979482645e-07, "epoch": 2.972152566979015, "percentage": 59.44, "elapsed_time": "0:32:53", "remaining_time": "0:22:26", "throughput": 5617.24, "total_tokens": 11088368}
|
|
{"current_steps": 22525, "total_steps": 37885, "loss": 0.0005, "lr": 8.451856377813342e-07, "epoch": 2.972812458756764, "percentage": 59.46, "elapsed_time": "0:32:54", "remaining_time": "0:22:26", "throughput": 5617.58, "total_tokens": 11090864}
|
|
{"current_steps": 22530, "total_steps": 37885, "loss": 0.0004, "lr": 8.44730510472631e-07, "epoch": 2.973472350534512, "percentage": 59.47, "elapsed_time": "0:32:54", "remaining_time": "0:22:25", "throughput": 5617.96, "total_tokens": 11093488}
|
|
{"current_steps": 22535, "total_steps": 37885, "loss": 0.0001, "lr": 8.442754161187528e-07, "epoch": 2.974132242312261, "percentage": 59.48, "elapsed_time": "0:32:54", "remaining_time": "0:22:25", "throughput": 5618.19, "total_tokens": 11095792}
|
|
{"current_steps": 22540, "total_steps": 37885, "loss": 0.0001, "lr": 8.438203548162898e-07, "epoch": 2.974792134090009, "percentage": 59.5, "elapsed_time": "0:32:55", "remaining_time": "0:22:24", "throughput": 5618.52, "total_tokens": 11098288}
|
|
{"current_steps": 22545, "total_steps": 37885, "loss": 0.0257, "lr": 8.433653266618255e-07, "epoch": 2.9754520258677575, "percentage": 59.51, "elapsed_time": "0:32:55", "remaining_time": "0:22:24", "throughput": 5618.74, "total_tokens": 11100528}
|
|
{"current_steps": 22550, "total_steps": 37885, "loss": 0.0707, "lr": 8.429103317519366e-07, "epoch": 2.976111917645506, "percentage": 59.52, "elapsed_time": "0:32:55", "remaining_time": "0:22:23", "throughput": 5619.12, "total_tokens": 11103152}
|
|
{"current_steps": 22555, "total_steps": 37885, "loss": 0.0073, "lr": 8.424553701831919e-07, "epoch": 2.9767718094232545, "percentage": 59.54, "elapsed_time": "0:32:56", "remaining_time": "0:22:23", "throughput": 5619.53, "total_tokens": 11105840}
|
|
{"current_steps": 22560, "total_steps": 37885, "loss": 0.0646, "lr": 8.420004420521542e-07, "epoch": 2.977431701201003, "percentage": 59.55, "elapsed_time": "0:32:56", "remaining_time": "0:22:22", "throughput": 5619.68, "total_tokens": 11107952}
|
|
{"current_steps": 22565, "total_steps": 37885, "loss": 0.0, "lr": 8.415455474553784e-07, "epoch": 2.9780915929787515, "percentage": 59.56, "elapsed_time": "0:32:56", "remaining_time": "0:22:22", "throughput": 5619.98, "total_tokens": 11110384}
|
|
{"current_steps": 22570, "total_steps": 37885, "loss": 0.0783, "lr": 8.41090686489413e-07, "epoch": 2.9787514847564998, "percentage": 59.58, "elapsed_time": "0:32:57", "remaining_time": "0:22:21", "throughput": 5620.34, "total_tokens": 11112944}
|
|
{"current_steps": 22575, "total_steps": 37885, "loss": 0.1547, "lr": 8.406358592507985e-07, "epoch": 2.9794113765342485, "percentage": 59.59, "elapsed_time": "0:32:57", "remaining_time": "0:22:21", "throughput": 5620.61, "total_tokens": 11115312}
|
|
{"current_steps": 22580, "total_steps": 37885, "loss": 0.1548, "lr": 8.401810658360686e-07, "epoch": 2.9800712683119968, "percentage": 59.6, "elapsed_time": "0:32:57", "remaining_time": "0:22:20", "throughput": 5620.96, "total_tokens": 11117872}
|
|
{"current_steps": 22585, "total_steps": 37885, "loss": 0.0782, "lr": 8.397263063417506e-07, "epoch": 2.9807311600897455, "percentage": 59.61, "elapsed_time": "0:32:58", "remaining_time": "0:22:20", "throughput": 5621.41, "total_tokens": 11120624}
|
|
{"current_steps": 22590, "total_steps": 37885, "loss": 0.0001, "lr": 8.39271580864363e-07, "epoch": 2.9813910518674938, "percentage": 59.63, "elapsed_time": "0:32:58", "remaining_time": "0:22:19", "throughput": 5621.74, "total_tokens": 11123120}
|
|
{"current_steps": 22595, "total_steps": 37885, "loss": 0.071, "lr": 8.388168895004189e-07, "epoch": 2.982050943645242, "percentage": 59.64, "elapsed_time": "0:32:58", "remaining_time": "0:22:19", "throughput": 5622.04, "total_tokens": 11125552}
|
|
{"current_steps": 22600, "total_steps": 37885, "loss": 0.1256, "lr": 8.383622323464226e-07, "epoch": 2.9827108354229908, "percentage": 59.65, "elapsed_time": "0:32:59", "remaining_time": "0:22:18", "throughput": 5622.42, "total_tokens": 11128176}
|
|
{"current_steps": 22605, "total_steps": 37885, "loss": 0.0751, "lr": 8.379076094988718e-07, "epoch": 2.983370727200739, "percentage": 59.67, "elapsed_time": "0:32:59", "remaining_time": "0:22:18", "throughput": 5622.66, "total_tokens": 11130480}
|
|
{"current_steps": 22610, "total_steps": 37885, "loss": 0.1028, "lr": 8.374530210542575e-07, "epoch": 2.9840306189784878, "percentage": 59.68, "elapsed_time": "0:32:59", "remaining_time": "0:22:17", "throughput": 5622.94, "total_tokens": 11132848}
|
|
{"current_steps": 22615, "total_steps": 37885, "loss": 0.0007, "lr": 8.369984671090621e-07, "epoch": 2.984690510756236, "percentage": 59.69, "elapsed_time": "0:33:00", "remaining_time": "0:22:17", "throughput": 5623.19, "total_tokens": 11135152}
|
|
{"current_steps": 22620, "total_steps": 37885, "loss": 0.019, "lr": 8.365439477597619e-07, "epoch": 2.9853504025339843, "percentage": 59.71, "elapsed_time": "0:33:00", "remaining_time": "0:22:16", "throughput": 5623.51, "total_tokens": 11137648}
|
|
{"current_steps": 22625, "total_steps": 37885, "loss": 0.0387, "lr": 8.360894631028254e-07, "epoch": 2.986010294311733, "percentage": 59.72, "elapsed_time": "0:33:00", "remaining_time": "0:22:16", "throughput": 5623.7, "total_tokens": 11139888}
|
|
{"current_steps": 22630, "total_steps": 37885, "loss": 0.0004, "lr": 8.356350132347127e-07, "epoch": 2.9866701860894813, "percentage": 59.73, "elapsed_time": "0:33:01", "remaining_time": "0:22:15", "throughput": 5624.09, "total_tokens": 11142512}
|
|
{"current_steps": 22635, "total_steps": 37885, "loss": 0.0013, "lr": 8.351805982518788e-07, "epoch": 2.98733007786723, "percentage": 59.75, "elapsed_time": "0:33:01", "remaining_time": "0:22:15", "throughput": 5624.33, "total_tokens": 11144816}
|
|
{"current_steps": 22640, "total_steps": 37885, "loss": 0.0283, "lr": 8.347262182507688e-07, "epoch": 2.9879899696449783, "percentage": 59.76, "elapsed_time": "0:33:01", "remaining_time": "0:22:14", "throughput": 5624.66, "total_tokens": 11147312}
|
|
{"current_steps": 22645, "total_steps": 37885, "loss": 0.0003, "lr": 8.342718733278228e-07, "epoch": 2.9886498614227266, "percentage": 59.77, "elapsed_time": "0:33:02", "remaining_time": "0:22:14", "throughput": 5624.86, "total_tokens": 11149552}
|
|
{"current_steps": 22650, "total_steps": 37885, "loss": 0.0004, "lr": 8.338175635794713e-07, "epoch": 2.989309753200475, "percentage": 59.79, "elapsed_time": "0:33:02", "remaining_time": "0:22:13", "throughput": 5625.16, "total_tokens": 11151984}
|
|
{"current_steps": 22655, "total_steps": 37885, "loss": 0.1013, "lr": 8.333632891021383e-07, "epoch": 2.9899696449782236, "percentage": 59.8, "elapsed_time": "0:33:02", "remaining_time": "0:22:12", "throughput": 5625.52, "total_tokens": 11154544}
|
|
{"current_steps": 22660, "total_steps": 37885, "loss": 0.0648, "lr": 8.32909049992241e-07, "epoch": 2.990629536755972, "percentage": 59.81, "elapsed_time": "0:33:03", "remaining_time": "0:22:12", "throughput": 5625.84, "total_tokens": 11157040}
|
|
{"current_steps": 22665, "total_steps": 37885, "loss": 0.1119, "lr": 8.324548463461871e-07, "epoch": 2.9912894285337206, "percentage": 59.83, "elapsed_time": "0:33:03", "remaining_time": "0:22:11", "throughput": 5626.11, "total_tokens": 11159408}
|
|
{"current_steps": 22670, "total_steps": 37885, "loss": 0.0001, "lr": 8.320006782603797e-07, "epoch": 2.991949320311469, "percentage": 59.84, "elapsed_time": "0:33:03", "remaining_time": "0:22:11", "throughput": 5626.46, "total_tokens": 11161968}
|
|
{"current_steps": 22675, "total_steps": 37885, "loss": 0.0006, "lr": 8.315465458312114e-07, "epoch": 2.992609212089217, "percentage": 59.85, "elapsed_time": "0:33:04", "remaining_time": "0:22:10", "throughput": 5626.73, "total_tokens": 11164336}
|
|
{"current_steps": 22680, "total_steps": 37885, "loss": 0.0551, "lr": 8.310924491550688e-07, "epoch": 2.993269103866966, "percentage": 59.87, "elapsed_time": "0:33:04", "remaining_time": "0:22:10", "throughput": 5627.06, "total_tokens": 11166832}
|
|
{"current_steps": 22685, "total_steps": 37885, "loss": 0.0014, "lr": 8.306383883283308e-07, "epoch": 2.993928995644714, "percentage": 59.88, "elapsed_time": "0:33:04", "remaining_time": "0:22:09", "throughput": 5627.18, "total_tokens": 11168880}
|
|
{"current_steps": 22690, "total_steps": 37885, "loss": 0.0004, "lr": 8.301843634473683e-07, "epoch": 2.994588887422463, "percentage": 59.89, "elapsed_time": "0:33:05", "remaining_time": "0:22:09", "throughput": 5627.41, "total_tokens": 11171184}
|
|
{"current_steps": 22695, "total_steps": 37885, "loss": 0.0201, "lr": 8.297303746085452e-07, "epoch": 2.995248779200211, "percentage": 59.9, "elapsed_time": "0:33:05", "remaining_time": "0:22:08", "throughput": 5627.74, "total_tokens": 11173680}
|
|
{"current_steps": 22700, "total_steps": 37885, "loss": 0.0418, "lr": 8.292764219082168e-07, "epoch": 2.9959086709779594, "percentage": 59.92, "elapsed_time": "0:33:05", "remaining_time": "0:22:08", "throughput": 5628.09, "total_tokens": 11176240}
|
|
{"current_steps": 22705, "total_steps": 37885, "loss": 0.0009, "lr": 8.28822505442732e-07, "epoch": 2.996568562755708, "percentage": 59.93, "elapsed_time": "0:33:06", "remaining_time": "0:22:07", "throughput": 5628.36, "total_tokens": 11178608}
|
|
{"current_steps": 22710, "total_steps": 37885, "loss": 0.0041, "lr": 8.283686253084306e-07, "epoch": 2.9972284545334564, "percentage": 59.94, "elapsed_time": "0:33:06", "remaining_time": "0:22:07", "throughput": 5628.8, "total_tokens": 11181360}
|
|
{"current_steps": 22715, "total_steps": 37885, "loss": 0.0644, "lr": 8.279147816016455e-07, "epoch": 2.997888346311205, "percentage": 59.96, "elapsed_time": "0:33:06", "remaining_time": "0:22:06", "throughput": 5629.13, "total_tokens": 11183856}
|
|
{"current_steps": 22720, "total_steps": 37885, "loss": 0.0427, "lr": 8.274609744187021e-07, "epoch": 2.9985482380889534, "percentage": 59.97, "elapsed_time": "0:33:07", "remaining_time": "0:22:06", "throughput": 5629.57, "total_tokens": 11186608}
|
|
{"current_steps": 22725, "total_steps": 37885, "loss": 0.0002, "lr": 8.270072038559172e-07, "epoch": 2.9992081298667017, "percentage": 59.98, "elapsed_time": "0:33:07", "remaining_time": "0:22:05", "throughput": 5629.8, "total_tokens": 11188912}
|
|
{"current_steps": 22730, "total_steps": 37885, "loss": 0.0001, "lr": 8.265534700096008e-07, "epoch": 2.9998680216444504, "percentage": 60.0, "elapsed_time": "0:33:07", "remaining_time": "0:22:05", "throughput": 5630.14, "total_tokens": 11191408}
|
|
{"current_steps": 22735, "total_steps": 37885, "loss": 0.0006, "lr": 8.260997729760544e-07, "epoch": 3.0005279134221987, "percentage": 60.01, "elapsed_time": "0:33:08", "remaining_time": "0:22:04", "throughput": 5630.07, "total_tokens": 11193728}
|
|
{"current_steps": 22740, "total_steps": 37885, "loss": 0.0352, "lr": 8.256461128515717e-07, "epoch": 3.001187805199947, "percentage": 60.02, "elapsed_time": "0:33:08", "remaining_time": "0:22:04", "throughput": 5630.33, "total_tokens": 11196096}
|
|
{"current_steps": 22740, "total_steps": 37885, "eval_loss": 0.14833371341228485, "epoch": 3.001187805199947, "percentage": 60.02, "elapsed_time": "0:33:16", "remaining_time": "0:22:09", "throughput": 5607.95, "total_tokens": 11196096}
|
|
{"current_steps": 22745, "total_steps": 37885, "loss": 0.0003, "lr": 8.251924897324392e-07, "epoch": 3.0018476969776957, "percentage": 60.04, "elapsed_time": "0:33:48", "remaining_time": "0:22:30", "throughput": 5521.45, "total_tokens": 11198656}
|
|
{"current_steps": 22750, "total_steps": 37885, "loss": 0.0, "lr": 8.247389037149346e-07, "epoch": 3.002507588755444, "percentage": 60.05, "elapsed_time": "0:33:48", "remaining_time": "0:22:29", "throughput": 5521.75, "total_tokens": 11201088}
|
|
{"current_steps": 22755, "total_steps": 37885, "loss": 0.0, "lr": 8.242853548953288e-07, "epoch": 3.0031674805331927, "percentage": 60.06, "elapsed_time": "0:33:48", "remaining_time": "0:22:29", "throughput": 5522.12, "total_tokens": 11203648}
|
|
{"current_steps": 22760, "total_steps": 37885, "loss": 0.0, "lr": 8.238318433698841e-07, "epoch": 3.003827372310941, "percentage": 60.08, "elapsed_time": "0:33:49", "remaining_time": "0:22:28", "throughput": 5522.57, "total_tokens": 11206400}
|
|
{"current_steps": 22765, "total_steps": 37885, "loss": 0.0, "lr": 8.233783692348546e-07, "epoch": 3.0044872640886893, "percentage": 60.09, "elapsed_time": "0:33:49", "remaining_time": "0:22:27", "throughput": 5522.9, "total_tokens": 11208896}
|
|
{"current_steps": 22770, "total_steps": 37885, "loss": 0.0016, "lr": 8.229249325864874e-07, "epoch": 3.005147155866438, "percentage": 60.1, "elapsed_time": "0:33:49", "remaining_time": "0:22:27", "throughput": 5523.21, "total_tokens": 11211328}
|
|
{"current_steps": 22775, "total_steps": 37885, "loss": 0.0581, "lr": 8.224715335210208e-07, "epoch": 3.0058070476441863, "percentage": 60.12, "elapsed_time": "0:33:50", "remaining_time": "0:22:26", "throughput": 5523.66, "total_tokens": 11214080}
|
|
{"current_steps": 22780, "total_steps": 37885, "loss": 0.0001, "lr": 8.22018172134686e-07, "epoch": 3.006466939421935, "percentage": 60.13, "elapsed_time": "0:33:50", "remaining_time": "0:22:26", "throughput": 5523.88, "total_tokens": 11216320}
|
|
{"current_steps": 22785, "total_steps": 37885, "loss": 0.0502, "lr": 8.215648485237054e-07, "epoch": 3.0071268311996833, "percentage": 60.14, "elapsed_time": "0:33:50", "remaining_time": "0:22:25", "throughput": 5524.24, "total_tokens": 11218880}
|
|
{"current_steps": 22790, "total_steps": 37885, "loss": 0.0004, "lr": 8.211115627842931e-07, "epoch": 3.0077867229774315, "percentage": 60.16, "elapsed_time": "0:33:51", "remaining_time": "0:22:25", "throughput": 5524.54, "total_tokens": 11221312}
|
|
{"current_steps": 22795, "total_steps": 37885, "loss": 0.0, "lr": 8.206583150126564e-07, "epoch": 3.0084466147551803, "percentage": 60.17, "elapsed_time": "0:33:51", "remaining_time": "0:22:24", "throughput": 5524.73, "total_tokens": 11223488}
|
|
{"current_steps": 22800, "total_steps": 37885, "loss": 0.0001, "lr": 8.202051053049936e-07, "epoch": 3.0091065065329285, "percentage": 60.18, "elapsed_time": "0:33:51", "remaining_time": "0:22:24", "throughput": 5524.94, "total_tokens": 11225728}
|
|
{"current_steps": 22805, "total_steps": 37885, "loss": 0.0006, "lr": 8.197519337574953e-07, "epoch": 3.009766398310677, "percentage": 60.2, "elapsed_time": "0:33:52", "remaining_time": "0:22:23", "throughput": 5525.12, "total_tokens": 11227904}
|
|
{"current_steps": 22810, "total_steps": 37885, "loss": 0.0, "lr": 8.192988004663442e-07, "epoch": 3.0104262900884255, "percentage": 60.21, "elapsed_time": "0:33:52", "remaining_time": "0:22:23", "throughput": 5525.48, "total_tokens": 11230464}
|
|
{"current_steps": 22815, "total_steps": 37885, "loss": 0.0004, "lr": 8.188457055277133e-07, "epoch": 3.011086181866174, "percentage": 60.22, "elapsed_time": "0:33:52", "remaining_time": "0:22:22", "throughput": 5525.81, "total_tokens": 11232960}
|
|
{"current_steps": 22820, "total_steps": 37885, "loss": 0.0001, "lr": 8.183926490377703e-07, "epoch": 3.0117460736439225, "percentage": 60.23, "elapsed_time": "0:33:53", "remaining_time": "0:22:22", "throughput": 5526.14, "total_tokens": 11235456}
|
|
{"current_steps": 22825, "total_steps": 37885, "loss": 0.0007, "lr": 8.179396310926719e-07, "epoch": 3.012405965421671, "percentage": 60.25, "elapsed_time": "0:33:53", "remaining_time": "0:22:21", "throughput": 5526.45, "total_tokens": 11237888}
|
|
{"current_steps": 22830, "total_steps": 37885, "loss": 0.0003, "lr": 8.17486651788569e-07, "epoch": 3.013065857199419, "percentage": 60.26, "elapsed_time": "0:33:53", "remaining_time": "0:22:21", "throughput": 5526.87, "total_tokens": 11240576}
|
|
{"current_steps": 22835, "total_steps": 37885, "loss": 0.0001, "lr": 8.170337112216023e-07, "epoch": 3.013725748977168, "percentage": 60.27, "elapsed_time": "0:33:54", "remaining_time": "0:22:20", "throughput": 5527.05, "total_tokens": 11242752}
|
|
{"current_steps": 22840, "total_steps": 37885, "loss": 0.0, "lr": 8.165808094879054e-07, "epoch": 3.014385640754916, "percentage": 60.29, "elapsed_time": "0:33:54", "remaining_time": "0:22:20", "throughput": 5527.36, "total_tokens": 11245184}
|
|
{"current_steps": 22845, "total_steps": 37885, "loss": 0.0, "lr": 8.161279466836036e-07, "epoch": 3.015045532532665, "percentage": 60.3, "elapsed_time": "0:33:54", "remaining_time": "0:22:19", "throughput": 5527.63, "total_tokens": 11247552}
|
|
{"current_steps": 22850, "total_steps": 37885, "loss": 0.0005, "lr": 8.156751229048132e-07, "epoch": 3.015705424310413, "percentage": 60.31, "elapsed_time": "0:33:55", "remaining_time": "0:22:19", "throughput": 5527.88, "total_tokens": 11249856}
|
|
{"current_steps": 22855, "total_steps": 37885, "loss": 0.0, "lr": 8.152223382476438e-07, "epoch": 3.0163653160881614, "percentage": 60.33, "elapsed_time": "0:33:55", "remaining_time": "0:22:18", "throughput": 5528.32, "total_tokens": 11252608}
|
|
{"current_steps": 22860, "total_steps": 37885, "loss": 0.0, "lr": 8.14769592808195e-07, "epoch": 3.01702520786591, "percentage": 60.34, "elapsed_time": "0:33:55", "remaining_time": "0:22:18", "throughput": 5528.61, "total_tokens": 11255040}
|
|
{"current_steps": 22865, "total_steps": 37885, "loss": 0.0565, "lr": 8.143168866825583e-07, "epoch": 3.0176850996436584, "percentage": 60.35, "elapsed_time": "0:33:56", "remaining_time": "0:22:17", "throughput": 5528.97, "total_tokens": 11257600}
|
|
{"current_steps": 22870, "total_steps": 37885, "loss": 0.0, "lr": 8.138642199668183e-07, "epoch": 3.018344991421407, "percentage": 60.37, "elapsed_time": "0:33:56", "remaining_time": "0:22:16", "throughput": 5529.21, "total_tokens": 11259904}
|
|
{"current_steps": 22875, "total_steps": 37885, "loss": 0.1032, "lr": 8.134115927570493e-07, "epoch": 3.0190048831991554, "percentage": 60.38, "elapsed_time": "0:33:56", "remaining_time": "0:22:16", "throughput": 5529.48, "total_tokens": 11262272}
|
|
{"current_steps": 22880, "total_steps": 37885, "loss": 0.0014, "lr": 8.129590051493189e-07, "epoch": 3.0196647749769037, "percentage": 60.39, "elapsed_time": "0:33:57", "remaining_time": "0:22:15", "throughput": 5529.69, "total_tokens": 11264512}
|
|
{"current_steps": 22885, "total_steps": 37885, "loss": 0.0036, "lr": 8.125064572396851e-07, "epoch": 3.0203246667546524, "percentage": 60.41, "elapsed_time": "0:33:57", "remaining_time": "0:22:15", "throughput": 5530.02, "total_tokens": 11267008}
|
|
{"current_steps": 22890, "total_steps": 37885, "loss": 0.0001, "lr": 8.12053949124198e-07, "epoch": 3.0209845585324007, "percentage": 60.42, "elapsed_time": "0:33:57", "remaining_time": "0:22:14", "throughput": 5530.38, "total_tokens": 11269568}
|
|
{"current_steps": 22895, "total_steps": 37885, "loss": 0.0002, "lr": 8.116014808988993e-07, "epoch": 3.021644450310149, "percentage": 60.43, "elapsed_time": "0:33:58", "remaining_time": "0:22:14", "throughput": 5530.76, "total_tokens": 11272192}
|
|
{"current_steps": 22900, "total_steps": 37885, "loss": 0.0002, "lr": 8.111490526598217e-07, "epoch": 3.0223043420878977, "percentage": 60.45, "elapsed_time": "0:33:58", "remaining_time": "0:22:13", "throughput": 5531.23, "total_tokens": 11275008}
|
|
{"current_steps": 22905, "total_steps": 37885, "loss": 0.0367, "lr": 8.106966645029905e-07, "epoch": 3.022964233865646, "percentage": 60.46, "elapsed_time": "0:33:58", "remaining_time": "0:22:13", "throughput": 5531.49, "total_tokens": 11277376}
|
|
{"current_steps": 22910, "total_steps": 37885, "loss": 0.0626, "lr": 8.102443165244213e-07, "epoch": 3.0236241256433947, "percentage": 60.47, "elapsed_time": "0:33:59", "remaining_time": "0:22:12", "throughput": 5531.84, "total_tokens": 11279936}
|
|
{"current_steps": 22915, "total_steps": 37885, "loss": 0.0, "lr": 8.097920088201216e-07, "epoch": 3.024284017421143, "percentage": 60.49, "elapsed_time": "0:33:59", "remaining_time": "0:22:12", "throughput": 5532.15, "total_tokens": 11282432}
|
|
{"current_steps": 22920, "total_steps": 37885, "loss": 0.0323, "lr": 8.09339741486091e-07, "epoch": 3.0249439091988912, "percentage": 60.5, "elapsed_time": "0:33:59", "remaining_time": "0:22:11", "throughput": 5532.59, "total_tokens": 11285184}
|
|
{"current_steps": 22925, "total_steps": 37885, "loss": 0.0, "lr": 8.088875146183192e-07, "epoch": 3.02560380097664, "percentage": 60.51, "elapsed_time": "0:34:00", "remaining_time": "0:22:11", "throughput": 5532.95, "total_tokens": 11287744}
|
|
{"current_steps": 22930, "total_steps": 37885, "loss": 0.0, "lr": 8.084353283127889e-07, "epoch": 3.0262636927543882, "percentage": 60.53, "elapsed_time": "0:34:00", "remaining_time": "0:22:10", "throughput": 5533.17, "total_tokens": 11289984}
|
|
{"current_steps": 22935, "total_steps": 37885, "loss": 0.0457, "lr": 8.079831826654729e-07, "epoch": 3.026923584532137, "percentage": 60.54, "elapsed_time": "0:34:00", "remaining_time": "0:22:10", "throughput": 5533.47, "total_tokens": 11292416}
|
|
{"current_steps": 22940, "total_steps": 37885, "loss": 0.0001, "lr": 8.075310777723357e-07, "epoch": 3.0275834763098852, "percentage": 60.55, "elapsed_time": "0:34:01", "remaining_time": "0:22:09", "throughput": 5533.89, "total_tokens": 11295104}
|
|
{"current_steps": 22945, "total_steps": 37885, "loss": 0.0, "lr": 8.070790137293338e-07, "epoch": 3.0282433680876335, "percentage": 60.56, "elapsed_time": "0:34:01", "remaining_time": "0:22:09", "throughput": 5534.08, "total_tokens": 11297280}
|
|
{"current_steps": 22950, "total_steps": 37885, "loss": 0.0783, "lr": 8.066269906324138e-07, "epoch": 3.0289032598653822, "percentage": 60.58, "elapsed_time": "0:34:01", "remaining_time": "0:22:08", "throughput": 5534.36, "total_tokens": 11299648}
|
|
{"current_steps": 22955, "total_steps": 37885, "loss": 0.0002, "lr": 8.061750085775151e-07, "epoch": 3.0295631516431305, "percentage": 60.59, "elapsed_time": "0:34:02", "remaining_time": "0:22:08", "throughput": 5534.66, "total_tokens": 11302080}
|
|
{"current_steps": 22960, "total_steps": 37885, "loss": 0.0538, "lr": 8.057230676605673e-07, "epoch": 3.030223043420879, "percentage": 60.6, "elapsed_time": "0:34:02", "remaining_time": "0:22:07", "throughput": 5535.13, "total_tokens": 11304896}
|
|
{"current_steps": 22965, "total_steps": 37885, "loss": 0.0, "lr": 8.05271167977491e-07, "epoch": 3.0308829351986275, "percentage": 60.62, "elapsed_time": "0:34:02", "remaining_time": "0:22:07", "throughput": 5535.43, "total_tokens": 11307328}
|
|
{"current_steps": 22970, "total_steps": 37885, "loss": 0.0, "lr": 8.048193096241999e-07, "epoch": 3.031542826976376, "percentage": 60.63, "elapsed_time": "0:34:03", "remaining_time": "0:22:06", "throughput": 5535.59, "total_tokens": 11309440}
|
|
{"current_steps": 22975, "total_steps": 37885, "loss": 0.0001, "lr": 8.043674926965962e-07, "epoch": 3.0322027187541245, "percentage": 60.64, "elapsed_time": "0:34:03", "remaining_time": "0:22:06", "throughput": 5535.92, "total_tokens": 11311936}
|
|
{"current_steps": 22980, "total_steps": 37885, "loss": 0.0834, "lr": 8.039157172905762e-07, "epoch": 3.032862610531873, "percentage": 60.66, "elapsed_time": "0:34:03", "remaining_time": "0:22:05", "throughput": 5536.31, "total_tokens": 11314560}
|
|
{"current_steps": 22985, "total_steps": 37885, "loss": 0.0001, "lr": 8.034639835020251e-07, "epoch": 3.033522502309621, "percentage": 60.67, "elapsed_time": "0:34:04", "remaining_time": "0:22:05", "throughput": 5536.58, "total_tokens": 11316992}
|
|
{"current_steps": 22990, "total_steps": 37885, "loss": 0.0002, "lr": 8.030122914268198e-07, "epoch": 3.03418239408737, "percentage": 60.68, "elapsed_time": "0:34:04", "remaining_time": "0:22:04", "throughput": 5536.96, "total_tokens": 11319616}
|
|
{"current_steps": 22995, "total_steps": 37885, "loss": 0.0005, "lr": 8.025606411608299e-07, "epoch": 3.034842285865118, "percentage": 60.7, "elapsed_time": "0:34:04", "remaining_time": "0:22:04", "throughput": 5537.27, "total_tokens": 11322112}
|
|
{"current_steps": 23000, "total_steps": 37885, "loss": 0.0002, "lr": 8.021090327999135e-07, "epoch": 3.035502177642867, "percentage": 60.71, "elapsed_time": "0:34:05", "remaining_time": "0:22:03", "throughput": 5537.53, "total_tokens": 11324480}
|
|
{"current_steps": 23005, "total_steps": 37885, "loss": 0.0004, "lr": 8.016574664399225e-07, "epoch": 3.036162069420615, "percentage": 60.72, "elapsed_time": "0:34:05", "remaining_time": "0:22:02", "throughput": 5537.87, "total_tokens": 11327040}
|
|
{"current_steps": 23010, "total_steps": 37885, "loss": 0.0001, "lr": 8.012059421766972e-07, "epoch": 3.0368219611983633, "percentage": 60.74, "elapsed_time": "0:34:05", "remaining_time": "0:22:02", "throughput": 5538.14, "total_tokens": 11329408}
|
|
{"current_steps": 23015, "total_steps": 37885, "loss": 0.0, "lr": 8.007544601060719e-07, "epoch": 3.037481852976112, "percentage": 60.75, "elapsed_time": "0:34:06", "remaining_time": "0:22:01", "throughput": 5538.52, "total_tokens": 11332032}
|
|
{"current_steps": 23020, "total_steps": 37885, "loss": 0.0002, "lr": 8.003030203238694e-07, "epoch": 3.0381417447538603, "percentage": 60.76, "elapsed_time": "0:34:06", "remaining_time": "0:22:01", "throughput": 5538.82, "total_tokens": 11334528}
|
|
{"current_steps": 23025, "total_steps": 37885, "loss": 0.0004, "lr": 7.998516229259045e-07, "epoch": 3.0388016365316086, "percentage": 60.78, "elapsed_time": "0:34:06", "remaining_time": "0:22:00", "throughput": 5539.17, "total_tokens": 11337088}
|
|
{"current_steps": 23030, "total_steps": 37885, "loss": 0.0, "lr": 7.994002680079835e-07, "epoch": 3.0394615283093573, "percentage": 60.79, "elapsed_time": "0:34:07", "remaining_time": "0:22:00", "throughput": 5539.49, "total_tokens": 11339584}
|
|
{"current_steps": 23035, "total_steps": 37885, "loss": 0.0, "lr": 7.989489556659028e-07, "epoch": 3.0401214200871056, "percentage": 60.8, "elapsed_time": "0:34:07", "remaining_time": "0:21:59", "throughput": 5539.84, "total_tokens": 11342144}
|
|
{"current_steps": 23040, "total_steps": 37885, "loss": 0.0001, "lr": 7.984976859954506e-07, "epoch": 3.0407813118648543, "percentage": 60.82, "elapsed_time": "0:34:07", "remaining_time": "0:21:59", "throughput": 5540.31, "total_tokens": 11345024}
|
|
{"current_steps": 23045, "total_steps": 37885, "loss": 0.0001, "lr": 7.980464590924054e-07, "epoch": 3.0414412036426026, "percentage": 60.83, "elapsed_time": "0:34:08", "remaining_time": "0:21:58", "throughput": 5540.6, "total_tokens": 11347456}
|
|
{"current_steps": 23050, "total_steps": 37885, "loss": 0.02, "lr": 7.975952750525366e-07, "epoch": 3.042101095420351, "percentage": 60.84, "elapsed_time": "0:34:08", "remaining_time": "0:21:58", "throughput": 5540.84, "total_tokens": 11349760}
|
|
{"current_steps": 23055, "total_steps": 37885, "loss": 0.0, "lr": 7.97144133971605e-07, "epoch": 3.0427609871980996, "percentage": 60.86, "elapsed_time": "0:34:08", "remaining_time": "0:21:57", "throughput": 5541.26, "total_tokens": 11352512}
|
|
{"current_steps": 23060, "total_steps": 37885, "loss": 0.0, "lr": 7.966930359453619e-07, "epoch": 3.043420878975848, "percentage": 60.87, "elapsed_time": "0:34:09", "remaining_time": "0:21:57", "throughput": 5541.55, "total_tokens": 11354944}
|
|
{"current_steps": 23065, "total_steps": 37885, "loss": 0.0003, "lr": 7.9624198106955e-07, "epoch": 3.0440807707535966, "percentage": 60.88, "elapsed_time": "0:34:09", "remaining_time": "0:21:56", "throughput": 5541.81, "total_tokens": 11357312}
|
|
{"current_steps": 23070, "total_steps": 37885, "loss": 0.1689, "lr": 7.957909694399019e-07, "epoch": 3.044740662531345, "percentage": 60.89, "elapsed_time": "0:34:09", "remaining_time": "0:21:56", "throughput": 5542.18, "total_tokens": 11359936}
|
|
{"current_steps": 23075, "total_steps": 37885, "loss": 0.0, "lr": 7.953400011521417e-07, "epoch": 3.045400554309093, "percentage": 60.91, "elapsed_time": "0:34:10", "remaining_time": "0:21:55", "throughput": 5542.39, "total_tokens": 11362240}
|
|
{"current_steps": 23080, "total_steps": 37885, "loss": 0.0002, "lr": 7.948890763019845e-07, "epoch": 3.046060446086842, "percentage": 60.92, "elapsed_time": "0:34:10", "remaining_time": "0:21:55", "throughput": 5542.64, "total_tokens": 11364608}
|
|
{"current_steps": 23085, "total_steps": 37885, "loss": 0.0, "lr": 7.944381949851353e-07, "epoch": 3.04672033786459, "percentage": 60.93, "elapsed_time": "0:34:10", "remaining_time": "0:21:54", "throughput": 5542.9, "total_tokens": 11366976}
|
|
{"current_steps": 23090, "total_steps": 37885, "loss": 0.0002, "lr": 7.939873572972908e-07, "epoch": 3.0473802296423385, "percentage": 60.95, "elapsed_time": "0:34:11", "remaining_time": "0:21:54", "throughput": 5543.21, "total_tokens": 11369408}
|
|
{"current_steps": 23095, "total_steps": 37885, "loss": 0.0001, "lr": 7.93536563334138e-07, "epoch": 3.048040121420087, "percentage": 60.96, "elapsed_time": "0:34:11", "remaining_time": "0:21:53", "throughput": 5543.53, "total_tokens": 11371904}
|
|
{"current_steps": 23100, "total_steps": 37885, "loss": 0.0001, "lr": 7.930858131913541e-07, "epoch": 3.0487000131978355, "percentage": 60.97, "elapsed_time": "0:34:11", "remaining_time": "0:21:53", "throughput": 5543.97, "total_tokens": 11374656}
|
|
{"current_steps": 23105, "total_steps": 37885, "loss": 0.0001, "lr": 7.926351069646084e-07, "epoch": 3.049359904975584, "percentage": 60.99, "elapsed_time": "0:34:12", "remaining_time": "0:21:52", "throughput": 5544.14, "total_tokens": 11376832}
|
|
{"current_steps": 23110, "total_steps": 37885, "loss": 0.0002, "lr": 7.921844447495594e-07, "epoch": 3.0500197967533325, "percentage": 61.0, "elapsed_time": "0:34:12", "remaining_time": "0:21:52", "throughput": 5544.44, "total_tokens": 11379264}
|
|
{"current_steps": 23115, "total_steps": 37885, "loss": 0.0001, "lr": 7.917338266418573e-07, "epoch": 3.0506796885310807, "percentage": 61.01, "elapsed_time": "0:34:12", "remaining_time": "0:21:51", "throughput": 5544.64, "total_tokens": 11381504}
|
|
{"current_steps": 23120, "total_steps": 37885, "loss": 0.0006, "lr": 7.912832527371426e-07, "epoch": 3.0513395803088295, "percentage": 61.03, "elapsed_time": "0:34:13", "remaining_time": "0:21:51", "throughput": 5545.1, "total_tokens": 11384320}
|
|
{"current_steps": 23125, "total_steps": 37885, "loss": 0.0, "lr": 7.908327231310454e-07, "epoch": 3.0519994720865777, "percentage": 61.04, "elapsed_time": "0:34:13", "remaining_time": "0:21:50", "throughput": 5545.39, "total_tokens": 11386752}
|
|
{"current_steps": 23130, "total_steps": 37885, "loss": 0.0001, "lr": 7.903822379191885e-07, "epoch": 3.0526593638643265, "percentage": 61.05, "elapsed_time": "0:34:13", "remaining_time": "0:21:50", "throughput": 5545.63, "total_tokens": 11389120}
|
|
{"current_steps": 23135, "total_steps": 37885, "loss": 0.0002, "lr": 7.899317971971835e-07, "epoch": 3.0533192556420747, "percentage": 61.07, "elapsed_time": "0:34:14", "remaining_time": "0:21:49", "throughput": 5545.98, "total_tokens": 11391680}
|
|
{"current_steps": 23140, "total_steps": 37885, "loss": 0.0, "lr": 7.894814010606336e-07, "epoch": 3.053979147419823, "percentage": 61.08, "elapsed_time": "0:34:14", "remaining_time": "0:21:49", "throughput": 5546.3, "total_tokens": 11394176}
|
|
{"current_steps": 23145, "total_steps": 37885, "loss": 0.0, "lr": 7.890310496051319e-07, "epoch": 3.0546390391975717, "percentage": 61.09, "elapsed_time": "0:34:14", "remaining_time": "0:21:48", "throughput": 5546.51, "total_tokens": 11396480}
|
|
{"current_steps": 23150, "total_steps": 37885, "loss": 0.0, "lr": 7.885807429262616e-07, "epoch": 3.05529893097532, "percentage": 61.11, "elapsed_time": "0:34:15", "remaining_time": "0:21:48", "throughput": 5546.88, "total_tokens": 11399104}
|
|
{"current_steps": 23155, "total_steps": 37885, "loss": 0.0007, "lr": 7.881304811195985e-07, "epoch": 3.0559588227530683, "percentage": 61.12, "elapsed_time": "0:34:15", "remaining_time": "0:21:47", "throughput": 5547.22, "total_tokens": 11401664}
|
|
{"current_steps": 23160, "total_steps": 37885, "loss": 0.0, "lr": 7.876802642807056e-07, "epoch": 3.056618714530817, "percentage": 61.13, "elapsed_time": "0:34:15", "remaining_time": "0:21:47", "throughput": 5547.45, "total_tokens": 11403968}
|
|
{"current_steps": 23165, "total_steps": 37885, "loss": 0.0, "lr": 7.8723009250514e-07, "epoch": 3.0572786063085653, "percentage": 61.15, "elapsed_time": "0:34:16", "remaining_time": "0:21:46", "throughput": 5547.87, "total_tokens": 11406720}
|
|
{"current_steps": 23170, "total_steps": 37885, "loss": 0.0054, "lr": 7.867799658884462e-07, "epoch": 3.057938498086314, "percentage": 61.16, "elapsed_time": "0:34:16", "remaining_time": "0:21:45", "throughput": 5548.3, "total_tokens": 11409472}
|
|
{"current_steps": 23175, "total_steps": 37885, "loss": 0.0196, "lr": 7.863298845261603e-07, "epoch": 3.0585983898640623, "percentage": 61.17, "elapsed_time": "0:34:16", "remaining_time": "0:21:45", "throughput": 5548.69, "total_tokens": 11412160}
|
|
{"current_steps": 23180, "total_steps": 37885, "loss": 0.0, "lr": 7.858798485138095e-07, "epoch": 3.0592582816418106, "percentage": 61.19, "elapsed_time": "0:34:17", "remaining_time": "0:21:44", "throughput": 5548.94, "total_tokens": 11414528}
|
|
{"current_steps": 23185, "total_steps": 37885, "loss": 0.0, "lr": 7.854298579469099e-07, "epoch": 3.0599181734195593, "percentage": 61.2, "elapsed_time": "0:34:17", "remaining_time": "0:21:44", "throughput": 5549.22, "total_tokens": 11416960}
|
|
{"current_steps": 23190, "total_steps": 37885, "loss": 0.0002, "lr": 7.849799129209697e-07, "epoch": 3.0605780651973076, "percentage": 61.21, "elapsed_time": "0:34:17", "remaining_time": "0:21:43", "throughput": 5549.58, "total_tokens": 11419584}
|
|
{"current_steps": 23195, "total_steps": 37885, "loss": 0.0, "lr": 7.845300135314857e-07, "epoch": 3.0612379569750563, "percentage": 61.22, "elapsed_time": "0:34:18", "remaining_time": "0:21:43", "throughput": 5549.87, "total_tokens": 11422016}
|
|
{"current_steps": 23200, "total_steps": 37885, "loss": 0.0002, "lr": 7.840801598739459e-07, "epoch": 3.0618978487528046, "percentage": 61.24, "elapsed_time": "0:34:18", "remaining_time": "0:21:42", "throughput": 5550.18, "total_tokens": 11424512}
|
|
{"current_steps": 23205, "total_steps": 37885, "loss": 0.0374, "lr": 7.836303520438288e-07, "epoch": 3.062557740530553, "percentage": 61.25, "elapsed_time": "0:34:18", "remaining_time": "0:21:42", "throughput": 5550.47, "total_tokens": 11426944}
|
|
{"current_steps": 23210, "total_steps": 37885, "loss": 0.0549, "lr": 7.831805901366025e-07, "epoch": 3.0632176323083016, "percentage": 61.26, "elapsed_time": "0:34:19", "remaining_time": "0:21:41", "throughput": 5550.69, "total_tokens": 11429248}
|
|
{"current_steps": 23215, "total_steps": 37885, "loss": 0.0, "lr": 7.827308742477259e-07, "epoch": 3.06387752408605, "percentage": 61.28, "elapsed_time": "0:34:19", "remaining_time": "0:21:41", "throughput": 5551.07, "total_tokens": 11431872}
|
|
{"current_steps": 23220, "total_steps": 37885, "loss": 0.0, "lr": 7.822812044726479e-07, "epoch": 3.064537415863798, "percentage": 61.29, "elapsed_time": "0:34:19", "remaining_time": "0:21:40", "throughput": 5551.39, "total_tokens": 11434368}
|
|
{"current_steps": 23225, "total_steps": 37885, "loss": 0.1095, "lr": 7.818315809068076e-07, "epoch": 3.065197307641547, "percentage": 61.3, "elapsed_time": "0:34:20", "remaining_time": "0:21:40", "throughput": 5551.66, "total_tokens": 11436800}
|
|
{"current_steps": 23230, "total_steps": 37885, "loss": 0.0, "lr": 7.813820036456344e-07, "epoch": 3.065857199419295, "percentage": 61.32, "elapsed_time": "0:34:20", "remaining_time": "0:21:39", "throughput": 5551.99, "total_tokens": 11439360}
|
|
{"current_steps": 23235, "total_steps": 37885, "loss": 0.0001, "lr": 7.809324727845478e-07, "epoch": 3.066517091197044, "percentage": 61.33, "elapsed_time": "0:34:20", "remaining_time": "0:21:39", "throughput": 5552.24, "total_tokens": 11441728}
|
|
{"current_steps": 23240, "total_steps": 37885, "loss": 0.0, "lr": 7.804829884189576e-07, "epoch": 3.067176982974792, "percentage": 61.34, "elapsed_time": "0:34:21", "remaining_time": "0:21:38", "throughput": 5552.67, "total_tokens": 11444480}
|
|
{"current_steps": 23245, "total_steps": 37885, "loss": 0.0, "lr": 7.800335506442635e-07, "epoch": 3.0678368747525404, "percentage": 61.36, "elapsed_time": "0:34:21", "remaining_time": "0:21:38", "throughput": 5553.07, "total_tokens": 11447168}
|
|
{"current_steps": 23250, "total_steps": 37885, "loss": 0.0, "lr": 7.795841595558554e-07, "epoch": 3.068496766530289, "percentage": 61.37, "elapsed_time": "0:34:21", "remaining_time": "0:21:37", "throughput": 5553.47, "total_tokens": 11449856}
|
|
{"current_steps": 23255, "total_steps": 37885, "loss": 0.0007, "lr": 7.791348152491133e-07, "epoch": 3.0691566583080374, "percentage": 61.38, "elapsed_time": "0:34:22", "remaining_time": "0:21:37", "throughput": 5553.72, "total_tokens": 11452224}
|
|
{"current_steps": 23260, "total_steps": 37885, "loss": 0.0, "lr": 7.78685517819407e-07, "epoch": 3.069816550085786, "percentage": 61.4, "elapsed_time": "0:34:22", "remaining_time": "0:21:36", "throughput": 5554.09, "total_tokens": 11454848}
|
|
{"current_steps": 23265, "total_steps": 37885, "loss": 0.0, "lr": 7.782362673620972e-07, "epoch": 3.0704764418635344, "percentage": 61.41, "elapsed_time": "0:34:22", "remaining_time": "0:21:36", "throughput": 5554.26, "total_tokens": 11457088}
|
|
{"current_steps": 23270, "total_steps": 37885, "loss": 0.0001, "lr": 7.777870639725339e-07, "epoch": 3.0711363336412827, "percentage": 61.42, "elapsed_time": "0:34:23", "remaining_time": "0:21:35", "throughput": 5554.54, "total_tokens": 11459520}
|
|
{"current_steps": 23275, "total_steps": 37885, "loss": 0.0001, "lr": 7.773379077460569e-07, "epoch": 3.0717962254190314, "percentage": 61.44, "elapsed_time": "0:34:23", "remaining_time": "0:21:35", "throughput": 5554.8, "total_tokens": 11461952}
|
|
{"current_steps": 23280, "total_steps": 37885, "loss": 0.0003, "lr": 7.768887987779966e-07, "epoch": 3.0724561171967797, "percentage": 61.45, "elapsed_time": "0:34:23", "remaining_time": "0:21:34", "throughput": 5555.13, "total_tokens": 11464512}
|
|
{"current_steps": 23285, "total_steps": 37885, "loss": 0.0, "lr": 7.764397371636731e-07, "epoch": 3.073116008974528, "percentage": 61.46, "elapsed_time": "0:34:24", "remaining_time": "0:21:34", "throughput": 5555.44, "total_tokens": 11467008}
|
|
{"current_steps": 23290, "total_steps": 37885, "loss": 0.0, "lr": 7.759907229983967e-07, "epoch": 3.0737759007522767, "percentage": 61.48, "elapsed_time": "0:34:24", "remaining_time": "0:21:33", "throughput": 5555.56, "total_tokens": 11469120}
|
|
{"current_steps": 23295, "total_steps": 37885, "loss": 0.0, "lr": 7.755417563774673e-07, "epoch": 3.074435792530025, "percentage": 61.49, "elapsed_time": "0:34:24", "remaining_time": "0:21:33", "throughput": 5555.93, "total_tokens": 11471744}
|
|
{"current_steps": 23300, "total_steps": 37885, "loss": 0.0756, "lr": 7.75092837396174e-07, "epoch": 3.0750956843077737, "percentage": 61.5, "elapsed_time": "0:34:25", "remaining_time": "0:21:32", "throughput": 5556.2, "total_tokens": 11474112}
|
|
{"current_steps": 23305, "total_steps": 37885, "loss": 0.0, "lr": 7.746439661497981e-07, "epoch": 3.075755576085522, "percentage": 61.52, "elapsed_time": "0:34:25", "remaining_time": "0:21:32", "throughput": 5556.56, "total_tokens": 11476736}
|
|
{"current_steps": 23310, "total_steps": 37885, "loss": 0.0813, "lr": 7.741951427336078e-07, "epoch": 3.0764154678632702, "percentage": 61.53, "elapsed_time": "0:34:25", "remaining_time": "0:21:31", "throughput": 5556.83, "total_tokens": 11479168}
|
|
{"current_steps": 23315, "total_steps": 37885, "loss": 0.0, "lr": 7.737463672428638e-07, "epoch": 3.077075359641019, "percentage": 61.54, "elapsed_time": "0:34:26", "remaining_time": "0:21:31", "throughput": 5557.14, "total_tokens": 11481664}
|
|
{"current_steps": 23320, "total_steps": 37885, "loss": 0.0005, "lr": 7.732976397728151e-07, "epoch": 3.0777352514187672, "percentage": 61.55, "elapsed_time": "0:34:26", "remaining_time": "0:21:30", "throughput": 5557.44, "total_tokens": 11484160}
|
|
{"current_steps": 23325, "total_steps": 37885, "loss": 0.0001, "lr": 7.728489604187001e-07, "epoch": 3.078395143196516, "percentage": 61.57, "elapsed_time": "0:34:26", "remaining_time": "0:21:30", "throughput": 5557.72, "total_tokens": 11486592}
|
|
{"current_steps": 23330, "total_steps": 37885, "loss": 0.0, "lr": 7.72400329275749e-07, "epoch": 3.0790550349742642, "percentage": 61.58, "elapsed_time": "0:34:27", "remaining_time": "0:21:29", "throughput": 5558.03, "total_tokens": 11489088}
|
|
{"current_steps": 23335, "total_steps": 37885, "loss": 0.0252, "lr": 7.719517464391791e-07, "epoch": 3.0797149267520125, "percentage": 61.59, "elapsed_time": "0:34:27", "remaining_time": "0:21:29", "throughput": 5558.27, "total_tokens": 11491392}
|
|
{"current_steps": 23340, "total_steps": 37885, "loss": 0.0016, "lr": 7.715032120042004e-07, "epoch": 3.0803748185297612, "percentage": 61.61, "elapsed_time": "0:34:27", "remaining_time": "0:21:28", "throughput": 5558.54, "total_tokens": 11493760}
|
|
{"current_steps": 23345, "total_steps": 37885, "loss": 0.0001, "lr": 7.710547260660096e-07, "epoch": 3.0810347103075095, "percentage": 61.62, "elapsed_time": "0:34:28", "remaining_time": "0:21:28", "throughput": 5558.89, "total_tokens": 11496320}
|
|
{"current_steps": 23350, "total_steps": 37885, "loss": 0.0, "lr": 7.706062887197959e-07, "epoch": 3.081694602085258, "percentage": 61.63, "elapsed_time": "0:34:28", "remaining_time": "0:21:27", "throughput": 5559.16, "total_tokens": 11498688}
|
|
{"current_steps": 23355, "total_steps": 37885, "loss": 0.0, "lr": 7.701579000607362e-07, "epoch": 3.0823544938630065, "percentage": 61.65, "elapsed_time": "0:34:28", "remaining_time": "0:21:27", "throughput": 5559.51, "total_tokens": 11501248}
|
|
{"current_steps": 23360, "total_steps": 37885, "loss": 0.0, "lr": 7.697095601839975e-07, "epoch": 3.083014385640755, "percentage": 61.66, "elapsed_time": "0:34:29", "remaining_time": "0:21:26", "throughput": 5559.81, "total_tokens": 11503680}
|
|
{"current_steps": 23365, "total_steps": 37885, "loss": 0.0, "lr": 7.692612691847373e-07, "epoch": 3.0836742774185035, "percentage": 61.67, "elapsed_time": "0:34:29", "remaining_time": "0:21:26", "throughput": 5560.13, "total_tokens": 11506176}
|
|
{"current_steps": 23370, "total_steps": 37885, "loss": 0.0673, "lr": 7.688130271581015e-07, "epoch": 3.084334169196252, "percentage": 61.69, "elapsed_time": "0:34:29", "remaining_time": "0:21:25", "throughput": 5560.62, "total_tokens": 11509056}
|
|
{"current_steps": 23375, "total_steps": 37885, "loss": 0.0, "lr": 7.68364834199227e-07, "epoch": 3.084994060974, "percentage": 61.7, "elapsed_time": "0:34:30", "remaining_time": "0:21:24", "throughput": 5560.82, "total_tokens": 11511296}
|
|
{"current_steps": 23380, "total_steps": 37885, "loss": 0.0, "lr": 7.679166904032389e-07, "epoch": 3.085653952751749, "percentage": 61.71, "elapsed_time": "0:34:30", "remaining_time": "0:21:24", "throughput": 5561.16, "total_tokens": 11513856}
|
|
{"current_steps": 23385, "total_steps": 37885, "loss": 0.0002, "lr": 7.674685958652525e-07, "epoch": 3.086313844529497, "percentage": 61.73, "elapsed_time": "0:34:30", "remaining_time": "0:21:23", "throughput": 5561.4, "total_tokens": 11516160}
|
|
{"current_steps": 23390, "total_steps": 37885, "loss": 0.0252, "lr": 7.67020550680373e-07, "epoch": 3.086973736307246, "percentage": 61.74, "elapsed_time": "0:34:31", "remaining_time": "0:21:23", "throughput": 5561.61, "total_tokens": 11518400}
|
|
{"current_steps": 23395, "total_steps": 37885, "loss": 0.1016, "lr": 7.665725549436942e-07, "epoch": 3.087633628084994, "percentage": 61.75, "elapsed_time": "0:34:31", "remaining_time": "0:21:22", "throughput": 5562.04, "total_tokens": 11521152}
|
|
{"current_steps": 23400, "total_steps": 37885, "loss": 0.0427, "lr": 7.661246087503006e-07, "epoch": 3.0882935198627424, "percentage": 61.77, "elapsed_time": "0:34:31", "remaining_time": "0:21:22", "throughput": 5562.42, "total_tokens": 11523776}
|
|
{"current_steps": 23405, "total_steps": 37885, "loss": 0.0001, "lr": 7.656767121952651e-07, "epoch": 3.088953411640491, "percentage": 61.78, "elapsed_time": "0:34:32", "remaining_time": "0:21:21", "throughput": 5562.71, "total_tokens": 11526208}
|
|
{"current_steps": 23410, "total_steps": 37885, "loss": 0.0002, "lr": 7.652288653736504e-07, "epoch": 3.0896133034182394, "percentage": 61.79, "elapsed_time": "0:34:32", "remaining_time": "0:21:21", "throughput": 5563.02, "total_tokens": 11528704}
|
|
{"current_steps": 23415, "total_steps": 37885, "loss": 0.0, "lr": 7.647810683805091e-07, "epoch": 3.0902731951959876, "percentage": 61.81, "elapsed_time": "0:34:32", "remaining_time": "0:21:20", "throughput": 5563.45, "total_tokens": 11531456}
|
|
{"current_steps": 23420, "total_steps": 37885, "loss": 0.117, "lr": 7.643333213108827e-07, "epoch": 3.0909330869737364, "percentage": 61.82, "elapsed_time": "0:34:33", "remaining_time": "0:21:20", "throughput": 5563.73, "total_tokens": 11533824}
|
|
{"current_steps": 23425, "total_steps": 37885, "loss": 0.0, "lr": 7.638856242598024e-07, "epoch": 3.0915929787514846, "percentage": 61.83, "elapsed_time": "0:34:33", "remaining_time": "0:21:19", "throughput": 5564.08, "total_tokens": 11536384}
|
|
{"current_steps": 23430, "total_steps": 37885, "loss": 0.002, "lr": 7.634379773222885e-07, "epoch": 3.0922528705292334, "percentage": 61.85, "elapsed_time": "0:34:33", "remaining_time": "0:21:19", "throughput": 5564.43, "total_tokens": 11538944}
|
|
{"current_steps": 23435, "total_steps": 37885, "loss": 0.0, "lr": 7.629903805933506e-07, "epoch": 3.0929127623069816, "percentage": 61.86, "elapsed_time": "0:34:34", "remaining_time": "0:21:18", "throughput": 5564.72, "total_tokens": 11541376}
|
|
{"current_steps": 23440, "total_steps": 37885, "loss": 0.0, "lr": 7.625428341679885e-07, "epoch": 3.09357265408473, "percentage": 61.87, "elapsed_time": "0:34:34", "remaining_time": "0:21:18", "throughput": 5565.04, "total_tokens": 11543872}
|
|
{"current_steps": 23445, "total_steps": 37885, "loss": 0.0456, "lr": 7.6209533814119e-07, "epoch": 3.0942325458624786, "percentage": 61.88, "elapsed_time": "0:34:34", "remaining_time": "0:21:17", "throughput": 5565.34, "total_tokens": 11546368}
|
|
{"current_steps": 23450, "total_steps": 37885, "loss": 0.0, "lr": 7.616478926079335e-07, "epoch": 3.094892437640227, "percentage": 61.9, "elapsed_time": "0:34:35", "remaining_time": "0:21:17", "throughput": 5565.7, "total_tokens": 11548928}
|
|
{"current_steps": 23455, "total_steps": 37885, "loss": 0.0, "lr": 7.612004976631857e-07, "epoch": 3.0955523294179756, "percentage": 61.91, "elapsed_time": "0:34:35", "remaining_time": "0:21:16", "throughput": 5566.12, "total_tokens": 11551680}
|
|
{"current_steps": 23460, "total_steps": 37885, "loss": 0.0, "lr": 7.607531534019028e-07, "epoch": 3.096212221195724, "percentage": 61.92, "elapsed_time": "0:34:35", "remaining_time": "0:21:16", "throughput": 5566.39, "total_tokens": 11554048}
|
|
{"current_steps": 23465, "total_steps": 37885, "loss": 0.0002, "lr": 7.60305859919031e-07, "epoch": 3.096872112973472, "percentage": 61.94, "elapsed_time": "0:34:36", "remaining_time": "0:21:15", "throughput": 5566.65, "total_tokens": 11556416}
|
|
{"current_steps": 23470, "total_steps": 37885, "loss": 0.0, "lr": 7.598586173095043e-07, "epoch": 3.097532004751221, "percentage": 61.95, "elapsed_time": "0:34:36", "remaining_time": "0:21:15", "throughput": 5566.96, "total_tokens": 11558912}
|
|
{"current_steps": 23475, "total_steps": 37885, "loss": 0.0, "lr": 7.594114256682473e-07, "epoch": 3.098191896528969, "percentage": 61.96, "elapsed_time": "0:34:36", "remaining_time": "0:21:14", "throughput": 5567.3, "total_tokens": 11561472}
|
|
{"current_steps": 23480, "total_steps": 37885, "loss": 0.0719, "lr": 7.589642850901733e-07, "epoch": 3.0988517883067175, "percentage": 61.98, "elapsed_time": "0:34:37", "remaining_time": "0:21:14", "throughput": 5567.57, "total_tokens": 11563840}
|
|
{"current_steps": 23485, "total_steps": 37885, "loss": 0.001, "lr": 7.585171956701837e-07, "epoch": 3.099511680084466, "percentage": 61.99, "elapsed_time": "0:34:37", "remaining_time": "0:21:13", "throughput": 5567.97, "total_tokens": 11566528}
|
|
{"current_steps": 23490, "total_steps": 37885, "loss": 0.0, "lr": 7.580701575031713e-07, "epoch": 3.1001715718622145, "percentage": 62.0, "elapsed_time": "0:34:37", "remaining_time": "0:21:13", "throughput": 5568.12, "total_tokens": 11568640}
|
|
{"current_steps": 23495, "total_steps": 37885, "loss": 0.0009, "lr": 7.576231706840154e-07, "epoch": 3.100831463639963, "percentage": 62.02, "elapsed_time": "0:34:37", "remaining_time": "0:21:12", "throughput": 5568.44, "total_tokens": 11571136}
|
|
{"current_steps": 23500, "total_steps": 37885, "loss": 0.0, "lr": 7.571762353075869e-07, "epoch": 3.1014913554177115, "percentage": 62.03, "elapsed_time": "0:34:38", "remaining_time": "0:21:12", "throughput": 5568.73, "total_tokens": 11573568}
|
|
{"current_steps": 23505, "total_steps": 37885, "loss": 0.0, "lr": 7.56729351468744e-07, "epoch": 3.1021512471954598, "percentage": 62.04, "elapsed_time": "0:34:38", "remaining_time": "0:21:11", "throughput": 5569.05, "total_tokens": 11576064}
|
|
{"current_steps": 23510, "total_steps": 37885, "loss": 0.0722, "lr": 7.562825192623341e-07, "epoch": 3.1028111389732085, "percentage": 62.06, "elapsed_time": "0:34:38", "remaining_time": "0:21:11", "throughput": 5569.33, "total_tokens": 11578496}
|
|
{"current_steps": 23515, "total_steps": 37885, "loss": 0.0, "lr": 7.558357387831953e-07, "epoch": 3.1034710307509568, "percentage": 62.07, "elapsed_time": "0:34:39", "remaining_time": "0:21:10", "throughput": 5569.66, "total_tokens": 11580992}
|
|
{"current_steps": 23520, "total_steps": 37885, "loss": 0.0164, "lr": 7.553890101261522e-07, "epoch": 3.1041309225287055, "percentage": 62.08, "elapsed_time": "0:34:39", "remaining_time": "0:21:10", "throughput": 5569.98, "total_tokens": 11583488}
|
|
{"current_steps": 23525, "total_steps": 37885, "loss": 0.0, "lr": 7.54942333386021e-07, "epoch": 3.1047908143064538, "percentage": 62.1, "elapsed_time": "0:34:39", "remaining_time": "0:21:09", "throughput": 5570.41, "total_tokens": 11586240}
|
|
{"current_steps": 23530, "total_steps": 37885, "loss": 0.0, "lr": 7.544957086576049e-07, "epoch": 3.105450706084202, "percentage": 62.11, "elapsed_time": "0:34:40", "remaining_time": "0:21:09", "throughput": 5570.86, "total_tokens": 11589056}
|
|
{"current_steps": 23535, "total_steps": 37885, "loss": 0.0, "lr": 7.540491360356965e-07, "epoch": 3.1061105978619508, "percentage": 62.12, "elapsed_time": "0:34:40", "remaining_time": "0:21:08", "throughput": 5571.07, "total_tokens": 11591296}
|
|
{"current_steps": 23540, "total_steps": 37885, "loss": 0.0001, "lr": 7.53602615615078e-07, "epoch": 3.106770489639699, "percentage": 62.14, "elapsed_time": "0:34:40", "remaining_time": "0:21:08", "throughput": 5571.42, "total_tokens": 11593856}
|
|
{"current_steps": 23545, "total_steps": 37885, "loss": 0.0, "lr": 7.5315614749052e-07, "epoch": 3.1074303814174478, "percentage": 62.15, "elapsed_time": "0:34:41", "remaining_time": "0:21:07", "throughput": 5571.63, "total_tokens": 11596096}
|
|
{"current_steps": 23550, "total_steps": 37885, "loss": 0.0, "lr": 7.527097317567824e-07, "epoch": 3.108090273195196, "percentage": 62.16, "elapsed_time": "0:34:41", "remaining_time": "0:21:07", "throughput": 5571.94, "total_tokens": 11598592}
|
|
{"current_steps": 23555, "total_steps": 37885, "loss": 0.063, "lr": 7.522633685086135e-07, "epoch": 3.1087501649729443, "percentage": 62.18, "elapsed_time": "0:34:41", "remaining_time": "0:21:06", "throughput": 5572.25, "total_tokens": 11601088}
|
|
{"current_steps": 23560, "total_steps": 37885, "loss": 0.0026, "lr": 7.518170578407505e-07, "epoch": 3.109410056750693, "percentage": 62.19, "elapsed_time": "0:34:42", "remaining_time": "0:21:06", "throughput": 5572.64, "total_tokens": 11603712}
|
|
{"current_steps": 23565, "total_steps": 37885, "loss": 0.0003, "lr": 7.513707998479199e-07, "epoch": 3.1100699485284413, "percentage": 62.2, "elapsed_time": "0:34:42", "remaining_time": "0:21:05", "throughput": 5572.9, "total_tokens": 11606080}
|
|
{"current_steps": 23570, "total_steps": 37885, "loss": 0.0, "lr": 7.509245946248363e-07, "epoch": 3.1107298403061896, "percentage": 62.21, "elapsed_time": "0:34:42", "remaining_time": "0:21:05", "throughput": 5573.04, "total_tokens": 11608192}
|
|
{"current_steps": 23575, "total_steps": 37885, "loss": 0.0001, "lr": 7.504784422662042e-07, "epoch": 3.1113897320839383, "percentage": 62.23, "elapsed_time": "0:34:43", "remaining_time": "0:21:04", "throughput": 5573.27, "total_tokens": 11610496}
|
|
{"current_steps": 23580, "total_steps": 37885, "loss": 0.0004, "lr": 7.500323428667159e-07, "epoch": 3.1120496238616866, "percentage": 62.24, "elapsed_time": "0:34:43", "remaining_time": "0:21:04", "throughput": 5573.65, "total_tokens": 11613120}
|
|
{"current_steps": 23585, "total_steps": 37885, "loss": 0.0337, "lr": 7.495862965210525e-07, "epoch": 3.1127095156394353, "percentage": 62.25, "elapsed_time": "0:34:43", "remaining_time": "0:21:03", "throughput": 5573.81, "total_tokens": 11615296}
|
|
{"current_steps": 23590, "total_steps": 37885, "loss": 0.0008, "lr": 7.491403033238844e-07, "epoch": 3.1133694074171836, "percentage": 62.27, "elapsed_time": "0:34:44", "remaining_time": "0:21:02", "throughput": 5574.05, "total_tokens": 11617600}
|
|
{"current_steps": 23595, "total_steps": 37885, "loss": 0.0323, "lr": 7.4869436336987e-07, "epoch": 3.114029299194932, "percentage": 62.28, "elapsed_time": "0:34:44", "remaining_time": "0:21:02", "throughput": 5574.3, "total_tokens": 11619968}
|
|
{"current_steps": 23600, "total_steps": 37885, "loss": 0.0002, "lr": 7.482484767536576e-07, "epoch": 3.1146891909726806, "percentage": 62.29, "elapsed_time": "0:34:44", "remaining_time": "0:21:01", "throughput": 5574.56, "total_tokens": 11622336}
|
|
{"current_steps": 23605, "total_steps": 37885, "loss": 0.0001, "lr": 7.478026435698827e-07, "epoch": 3.115349082750429, "percentage": 62.31, "elapsed_time": "0:34:45", "remaining_time": "0:21:01", "throughput": 5574.91, "total_tokens": 11624896}
|
|
{"current_steps": 23610, "total_steps": 37885, "loss": 0.0005, "lr": 7.473568639131706e-07, "epoch": 3.116008974528177, "percentage": 62.32, "elapsed_time": "0:34:45", "remaining_time": "0:21:00", "throughput": 5575.16, "total_tokens": 11627264}
|
|
{"current_steps": 23615, "total_steps": 37885, "loss": 0.0001, "lr": 7.469111378781346e-07, "epoch": 3.116668866305926, "percentage": 62.33, "elapsed_time": "0:34:45", "remaining_time": "0:21:00", "throughput": 5575.59, "total_tokens": 11630016}
|
|
{"current_steps": 23620, "total_steps": 37885, "loss": 0.1031, "lr": 7.464654655593767e-07, "epoch": 3.117328758083674, "percentage": 62.35, "elapsed_time": "0:34:46", "remaining_time": "0:20:59", "throughput": 5575.89, "total_tokens": 11632448}
|
|
{"current_steps": 23625, "total_steps": 37885, "loss": 0.0, "lr": 7.46019847051488e-07, "epoch": 3.117988649861423, "percentage": 62.36, "elapsed_time": "0:34:46", "remaining_time": "0:20:59", "throughput": 5576.2, "total_tokens": 11634944}
|
|
{"current_steps": 23630, "total_steps": 37885, "loss": 0.0002, "lr": 7.455742824490477e-07, "epoch": 3.118648541639171, "percentage": 62.37, "elapsed_time": "0:34:46", "remaining_time": "0:20:58", "throughput": 5576.61, "total_tokens": 11637632}
|
|
{"current_steps": 23635, "total_steps": 37885, "loss": 0.0, "lr": 7.45128771846623e-07, "epoch": 3.1193084334169194, "percentage": 62.39, "elapsed_time": "0:34:47", "remaining_time": "0:20:58", "throughput": 5576.96, "total_tokens": 11640192}
|
|
{"current_steps": 23640, "total_steps": 37885, "loss": 0.0, "lr": 7.446833153387714e-07, "epoch": 3.119968325194668, "percentage": 62.4, "elapsed_time": "0:34:47", "remaining_time": "0:20:57", "throughput": 5577.35, "total_tokens": 11642880}
|
|
{"current_steps": 23645, "total_steps": 37885, "loss": 0.0, "lr": 7.442379130200369e-07, "epoch": 3.1206282169724164, "percentage": 62.41, "elapsed_time": "0:34:47", "remaining_time": "0:20:57", "throughput": 5577.58, "total_tokens": 11645184}
|
|
{"current_steps": 23650, "total_steps": 37885, "loss": 0.0, "lr": 7.437925649849534e-07, "epoch": 3.121288108750165, "percentage": 62.43, "elapsed_time": "0:34:48", "remaining_time": "0:20:56", "throughput": 5577.84, "total_tokens": 11647552}
|
|
{"current_steps": 23655, "total_steps": 37885, "loss": 0.0613, "lr": 7.433472713280426e-07, "epoch": 3.1219480005279134, "percentage": 62.44, "elapsed_time": "0:34:48", "remaining_time": "0:20:56", "throughput": 5578.23, "total_tokens": 11650240}
|
|
{"current_steps": 23660, "total_steps": 37885, "loss": 0.0004, "lr": 7.42902032143815e-07, "epoch": 3.1226078923056617, "percentage": 62.45, "elapsed_time": "0:34:48", "remaining_time": "0:20:55", "throughput": 5578.51, "total_tokens": 11652672}
|
|
{"current_steps": 23665, "total_steps": 37885, "loss": 0.0, "lr": 7.424568475267697e-07, "epoch": 3.1232677840834104, "percentage": 62.47, "elapsed_time": "0:34:49", "remaining_time": "0:20:55", "throughput": 5578.69, "total_tokens": 11654848}
|
|
{"current_steps": 23670, "total_steps": 37885, "loss": 0.0, "lr": 7.42011717571393e-07, "epoch": 3.1239276758611587, "percentage": 62.48, "elapsed_time": "0:34:49", "remaining_time": "0:20:54", "throughput": 5578.8, "total_tokens": 11656896}
|
|
{"current_steps": 23675, "total_steps": 37885, "loss": 0.0, "lr": 7.415666423721613e-07, "epoch": 3.1245875676389074, "percentage": 62.49, "elapsed_time": "0:34:49", "remaining_time": "0:20:54", "throughput": 5579.06, "total_tokens": 11659264}
|
|
{"current_steps": 23680, "total_steps": 37885, "loss": 0.0, "lr": 7.411216220235381e-07, "epoch": 3.1252474594166557, "percentage": 62.5, "elapsed_time": "0:34:50", "remaining_time": "0:20:53", "throughput": 5579.38, "total_tokens": 11661760}
|
|
{"current_steps": 23685, "total_steps": 37885, "loss": 0.0891, "lr": 7.406766566199762e-07, "epoch": 3.125907351194404, "percentage": 62.52, "elapsed_time": "0:34:50", "remaining_time": "0:20:53", "throughput": 5579.57, "total_tokens": 11664000}
|
|
{"current_steps": 23690, "total_steps": 37885, "loss": 0.0001, "lr": 7.402317462559163e-07, "epoch": 3.1265672429721527, "percentage": 62.53, "elapsed_time": "0:34:50", "remaining_time": "0:20:52", "throughput": 5579.94, "total_tokens": 11666624}
|
|
{"current_steps": 23695, "total_steps": 37885, "loss": 0.0, "lr": 7.397868910257865e-07, "epoch": 3.127227134749901, "percentage": 62.54, "elapsed_time": "0:34:51", "remaining_time": "0:20:52", "throughput": 5580.37, "total_tokens": 11669376}
|
|
{"current_steps": 23700, "total_steps": 37885, "loss": 0.0564, "lr": 7.393420910240054e-07, "epoch": 3.1278870265276493, "percentage": 62.56, "elapsed_time": "0:34:51", "remaining_time": "0:20:51", "throughput": 5580.79, "total_tokens": 11672128}
|
|
{"current_steps": 23705, "total_steps": 37885, "loss": 0.0, "lr": 7.388973463449773e-07, "epoch": 3.128546918305398, "percentage": 62.57, "elapsed_time": "0:34:51", "remaining_time": "0:20:51", "throughput": 5581.05, "total_tokens": 11674496}
|
|
{"current_steps": 23710, "total_steps": 37885, "loss": 0.0, "lr": 7.384526570830972e-07, "epoch": 3.1292068100831463, "percentage": 62.58, "elapsed_time": "0:34:52", "remaining_time": "0:20:50", "throughput": 5581.37, "total_tokens": 11676992}
|
|
{"current_steps": 23715, "total_steps": 37885, "loss": 0.0004, "lr": 7.380080233327466e-07, "epoch": 3.129866701860895, "percentage": 62.6, "elapsed_time": "0:34:52", "remaining_time": "0:20:50", "throughput": 5581.79, "total_tokens": 11679744}
|
|
{"current_steps": 23720, "total_steps": 37885, "loss": 0.0087, "lr": 7.375634451882956e-07, "epoch": 3.1305265936386433, "percentage": 62.61, "elapsed_time": "0:34:52", "remaining_time": "0:20:49", "throughput": 5582.02, "total_tokens": 11682048}
|
|
{"current_steps": 23725, "total_steps": 37885, "loss": 0.0213, "lr": 7.371189227441031e-07, "epoch": 3.1311864854163916, "percentage": 62.62, "elapsed_time": "0:34:53", "remaining_time": "0:20:49", "throughput": 5582.36, "total_tokens": 11684608}
|
|
{"current_steps": 23730, "total_steps": 37885, "loss": 0.0552, "lr": 7.366744560945155e-07, "epoch": 3.1318463771941403, "percentage": 62.64, "elapsed_time": "0:34:53", "remaining_time": "0:20:48", "throughput": 5582.62, "total_tokens": 11686976}
|
|
{"current_steps": 23735, "total_steps": 37885, "loss": 0.0001, "lr": 7.362300453338679e-07, "epoch": 3.1325062689718886, "percentage": 62.65, "elapsed_time": "0:34:53", "remaining_time": "0:20:48", "throughput": 5582.85, "total_tokens": 11689280}
|
|
{"current_steps": 23740, "total_steps": 37885, "loss": 0.0, "lr": 7.357856905564832e-07, "epoch": 3.133166160749637, "percentage": 62.66, "elapsed_time": "0:34:54", "remaining_time": "0:20:47", "throughput": 5583.17, "total_tokens": 11691776}
|
|
{"current_steps": 23745, "total_steps": 37885, "loss": 0.0, "lr": 7.353413918566721e-07, "epoch": 3.1338260525273856, "percentage": 62.68, "elapsed_time": "0:34:54", "remaining_time": "0:20:47", "throughput": 5583.4, "total_tokens": 11694080}
|
|
{"current_steps": 23750, "total_steps": 37885, "loss": 0.0, "lr": 7.348971493287342e-07, "epoch": 3.134485944305134, "percentage": 62.69, "elapsed_time": "0:34:54", "remaining_time": "0:20:46", "throughput": 5583.74, "total_tokens": 11696640}
|
|
{"current_steps": 23755, "total_steps": 37885, "loss": 0.0239, "lr": 7.344529630669565e-07, "epoch": 3.1351458360828826, "percentage": 62.7, "elapsed_time": "0:34:55", "remaining_time": "0:20:46", "throughput": 5584.04, "total_tokens": 11699136}
|
|
{"current_steps": 23760, "total_steps": 37885, "loss": 0.0005, "lr": 7.340088331656147e-07, "epoch": 3.135805727860631, "percentage": 62.72, "elapsed_time": "0:34:55", "remaining_time": "0:20:45", "throughput": 5584.36, "total_tokens": 11701632}
|
|
{"current_steps": 23765, "total_steps": 37885, "loss": 0.0322, "lr": 7.33564759718972e-07, "epoch": 3.136465619638379, "percentage": 62.73, "elapsed_time": "0:34:55", "remaining_time": "0:20:45", "throughput": 5584.62, "total_tokens": 11704000}
|
|
{"current_steps": 23770, "total_steps": 37885, "loss": 0.0472, "lr": 7.331207428212792e-07, "epoch": 3.137125511416128, "percentage": 62.74, "elapsed_time": "0:34:56", "remaining_time": "0:20:44", "throughput": 5584.98, "total_tokens": 11706624}
|
|
{"current_steps": 23775, "total_steps": 37885, "loss": 0.0004, "lr": 7.326767825667766e-07, "epoch": 3.137785403193876, "percentage": 62.76, "elapsed_time": "0:34:56", "remaining_time": "0:20:44", "throughput": 5585.18, "total_tokens": 11708864}
|
|
{"current_steps": 23780, "total_steps": 37885, "loss": 0.0215, "lr": 7.322328790496908e-07, "epoch": 3.138445294971625, "percentage": 62.77, "elapsed_time": "0:34:56", "remaining_time": "0:20:43", "throughput": 5585.46, "total_tokens": 11711296}
|
|
{"current_steps": 23785, "total_steps": 37885, "loss": 0.0001, "lr": 7.317890323642375e-07, "epoch": 3.139105186749373, "percentage": 62.78, "elapsed_time": "0:34:57", "remaining_time": "0:20:43", "throughput": 5585.69, "total_tokens": 11713600}
|
|
{"current_steps": 23790, "total_steps": 37885, "loss": 0.0, "lr": 7.3134524260462e-07, "epoch": 3.1397650785271214, "percentage": 62.8, "elapsed_time": "0:34:57", "remaining_time": "0:20:42", "throughput": 5585.98, "total_tokens": 11716032}
|
|
{"current_steps": 23795, "total_steps": 37885, "loss": 0.0, "lr": 7.30901509865029e-07, "epoch": 3.14042497030487, "percentage": 62.81, "elapsed_time": "0:34:57", "remaining_time": "0:20:42", "throughput": 5586.32, "total_tokens": 11718592}
|
|
{"current_steps": 23800, "total_steps": 37885, "loss": 0.0001, "lr": 7.304578342396441e-07, "epoch": 3.1410848620826184, "percentage": 62.82, "elapsed_time": "0:34:58", "remaining_time": "0:20:41", "throughput": 5586.72, "total_tokens": 11721280}
|
|
{"current_steps": 23805, "total_steps": 37885, "loss": 0.0014, "lr": 7.300142158226319e-07, "epoch": 3.141744753860367, "percentage": 62.83, "elapsed_time": "0:34:58", "remaining_time": "0:20:41", "throughput": 5587.09, "total_tokens": 11723904}
|
|
{"current_steps": 23810, "total_steps": 37885, "loss": 0.0, "lr": 7.295706547081475e-07, "epoch": 3.1424046456381154, "percentage": 62.85, "elapsed_time": "0:34:58", "remaining_time": "0:20:40", "throughput": 5587.36, "total_tokens": 11726336}
|
|
{"current_steps": 23815, "total_steps": 37885, "loss": 0.0, "lr": 7.291271509903334e-07, "epoch": 3.1430645374158637, "percentage": 62.86, "elapsed_time": "0:34:59", "remaining_time": "0:20:40", "throughput": 5587.59, "total_tokens": 11728640}
|
|
{"current_steps": 23820, "total_steps": 37885, "loss": 0.0001, "lr": 7.286837047633195e-07, "epoch": 3.1437244291936124, "percentage": 62.87, "elapsed_time": "0:34:59", "remaining_time": "0:20:39", "throughput": 5587.88, "total_tokens": 11731072}
|
|
{"current_steps": 23825, "total_steps": 37885, "loss": 0.0001, "lr": 7.282403161212251e-07, "epoch": 3.1443843209713607, "percentage": 62.89, "elapsed_time": "0:34:59", "remaining_time": "0:20:39", "throughput": 5588.2, "total_tokens": 11733568}
|
|
{"current_steps": 23830, "total_steps": 37885, "loss": 0.1047, "lr": 7.277969851581551e-07, "epoch": 3.145044212749109, "percentage": 62.9, "elapsed_time": "0:35:00", "remaining_time": "0:20:38", "throughput": 5588.51, "total_tokens": 11736064}
|
|
{"current_steps": 23835, "total_steps": 37885, "loss": 0.0001, "lr": 7.273537119682045e-07, "epoch": 3.1457041045268577, "percentage": 62.91, "elapsed_time": "0:35:00", "remaining_time": "0:20:38", "throughput": 5588.77, "total_tokens": 11738432}
|
|
{"current_steps": 23840, "total_steps": 37885, "loss": 0.0, "lr": 7.26910496645454e-07, "epoch": 3.146363996304606, "percentage": 62.93, "elapsed_time": "0:35:00", "remaining_time": "0:20:37", "throughput": 5589.16, "total_tokens": 11741120}
|
|
{"current_steps": 23845, "total_steps": 37885, "loss": 0.0, "lr": 7.264673392839726e-07, "epoch": 3.1470238880823547, "percentage": 62.94, "elapsed_time": "0:35:01", "remaining_time": "0:20:37", "throughput": 5589.33, "total_tokens": 11743296}
|
|
{"current_steps": 23850, "total_steps": 37885, "loss": 0.0411, "lr": 7.260242399778183e-07, "epoch": 3.147683779860103, "percentage": 62.95, "elapsed_time": "0:35:01", "remaining_time": "0:20:36", "throughput": 5589.64, "total_tokens": 11745792}
|
|
{"current_steps": 23855, "total_steps": 37885, "loss": 0.0252, "lr": 7.255811988210343e-07, "epoch": 3.1483436716378512, "percentage": 62.97, "elapsed_time": "0:35:01", "remaining_time": "0:20:36", "throughput": 5589.81, "total_tokens": 11747968}
|
|
{"current_steps": 23860, "total_steps": 37885, "loss": 0.0896, "lr": 7.251382159076544e-07, "epoch": 3.1490035634156, "percentage": 62.98, "elapsed_time": "0:35:02", "remaining_time": "0:20:35", "throughput": 5589.97, "total_tokens": 11750144}
|
|
{"current_steps": 23865, "total_steps": 37885, "loss": 0.1151, "lr": 7.246952913316977e-07, "epoch": 3.1496634551933482, "percentage": 62.99, "elapsed_time": "0:35:02", "remaining_time": "0:20:35", "throughput": 5590.31, "total_tokens": 11752704}
|
|
{"current_steps": 23870, "total_steps": 37885, "loss": 0.0766, "lr": 7.242524251871714e-07, "epoch": 3.1503233469710965, "percentage": 63.01, "elapsed_time": "0:35:02", "remaining_time": "0:20:34", "throughput": 5590.56, "total_tokens": 11755072}
|
|
{"current_steps": 23875, "total_steps": 37885, "loss": 0.0001, "lr": 7.238096175680714e-07, "epoch": 3.1509832387488452, "percentage": 63.02, "elapsed_time": "0:35:02", "remaining_time": "0:20:34", "throughput": 5590.84, "total_tokens": 11757504}
|
|
{"current_steps": 23880, "total_steps": 37885, "loss": 0.0004, "lr": 7.233668685683798e-07, "epoch": 3.1516431305265935, "percentage": 63.03, "elapsed_time": "0:35:03", "remaining_time": "0:20:33", "throughput": 5591.04, "total_tokens": 11759744}
|
|
{"current_steps": 23885, "total_steps": 37885, "loss": 0.0907, "lr": 7.229241782820673e-07, "epoch": 3.1523030223043422, "percentage": 63.05, "elapsed_time": "0:35:03", "remaining_time": "0:20:33", "throughput": 5591.33, "total_tokens": 11762176}
|
|
{"current_steps": 23890, "total_steps": 37885, "loss": 0.0, "lr": 7.224815468030916e-07, "epoch": 3.1529629140820905, "percentage": 63.06, "elapsed_time": "0:35:03", "remaining_time": "0:20:32", "throughput": 5591.63, "total_tokens": 11764672}
|
|
{"current_steps": 23895, "total_steps": 37885, "loss": 0.0, "lr": 7.220389742253978e-07, "epoch": 3.153622805859839, "percentage": 63.07, "elapsed_time": "0:35:04", "remaining_time": "0:20:32", "throughput": 5591.94, "total_tokens": 11767168}
|
|
{"current_steps": 23900, "total_steps": 37885, "loss": 0.0025, "lr": 7.21596460642919e-07, "epoch": 3.1542826976375875, "percentage": 63.09, "elapsed_time": "0:35:04", "remaining_time": "0:20:31", "throughput": 5592.24, "total_tokens": 11769664}
|
|
{"current_steps": 23905, "total_steps": 37885, "loss": 0.0, "lr": 7.211540061495751e-07, "epoch": 3.154942589415336, "percentage": 63.1, "elapsed_time": "0:35:04", "remaining_time": "0:20:31", "throughput": 5592.63, "total_tokens": 11772352}
|
|
{"current_steps": 23910, "total_steps": 37885, "loss": 0.0087, "lr": 7.207116108392746e-07, "epoch": 3.1556024811930845, "percentage": 63.11, "elapsed_time": "0:35:05", "remaining_time": "0:20:30", "throughput": 5592.86, "total_tokens": 11774656}
|
|
{"current_steps": 23915, "total_steps": 37885, "loss": 0.0001, "lr": 7.202692748059121e-07, "epoch": 3.156262372970833, "percentage": 63.13, "elapsed_time": "0:35:05", "remaining_time": "0:20:30", "throughput": 5593.09, "total_tokens": 11776960}
|
|
{"current_steps": 23920, "total_steps": 37885, "loss": 0.061, "lr": 7.1982699814337e-07, "epoch": 3.156922264748581, "percentage": 63.14, "elapsed_time": "0:35:05", "remaining_time": "0:20:29", "throughput": 5593.4, "total_tokens": 11779456}
|
|
{"current_steps": 23925, "total_steps": 37885, "loss": 0.0001, "lr": 7.193847809455192e-07, "epoch": 3.15758215652633, "percentage": 63.15, "elapsed_time": "0:35:06", "remaining_time": "0:20:28", "throughput": 5593.74, "total_tokens": 11782016}
|
|
{"current_steps": 23930, "total_steps": 37885, "loss": 0.0, "lr": 7.189426233062161e-07, "epoch": 3.158242048304078, "percentage": 63.16, "elapsed_time": "0:35:06", "remaining_time": "0:20:28", "throughput": 5594.02, "total_tokens": 11784448}
|
|
{"current_steps": 23935, "total_steps": 37885, "loss": 0.0, "lr": 7.185005253193064e-07, "epoch": 3.158901940081827, "percentage": 63.18, "elapsed_time": "0:35:06", "remaining_time": "0:20:27", "throughput": 5594.28, "total_tokens": 11786816}
|
|
{"current_steps": 23940, "total_steps": 37885, "loss": 0.0299, "lr": 7.180584870786217e-07, "epoch": 3.159561831859575, "percentage": 63.19, "elapsed_time": "0:35:07", "remaining_time": "0:20:27", "throughput": 5594.51, "total_tokens": 11789120}
|
|
{"current_steps": 23945, "total_steps": 37885, "loss": 0.0052, "lr": 7.17616508677981e-07, "epoch": 3.1602217236373233, "percentage": 63.2, "elapsed_time": "0:35:07", "remaining_time": "0:20:26", "throughput": 5594.93, "total_tokens": 11791872}
|
|
{"current_steps": 23950, "total_steps": 37885, "loss": 0.0961, "lr": 7.171745902111919e-07, "epoch": 3.160881615415072, "percentage": 63.22, "elapsed_time": "0:35:07", "remaining_time": "0:20:26", "throughput": 5595.33, "total_tokens": 11794560}
|
|
{"current_steps": 23955, "total_steps": 37885, "loss": 0.0, "lr": 7.167327317720479e-07, "epoch": 3.1615415071928203, "percentage": 63.23, "elapsed_time": "0:35:08", "remaining_time": "0:20:25", "throughput": 5595.67, "total_tokens": 11797120}
|
|
{"current_steps": 23960, "total_steps": 37885, "loss": 0.0001, "lr": 7.162909334543303e-07, "epoch": 3.1622013989705686, "percentage": 63.24, "elapsed_time": "0:35:08", "remaining_time": "0:20:25", "throughput": 5596.01, "total_tokens": 11799680}
|
|
{"current_steps": 23965, "total_steps": 37885, "loss": 0.0, "lr": 7.158491953518079e-07, "epoch": 3.1628612907483173, "percentage": 63.26, "elapsed_time": "0:35:08", "remaining_time": "0:20:24", "throughput": 5596.27, "total_tokens": 11802048}
|
|
{"current_steps": 23970, "total_steps": 37885, "loss": 0.0431, "lr": 7.154075175582355e-07, "epoch": 3.1635211825260656, "percentage": 63.27, "elapsed_time": "0:35:09", "remaining_time": "0:20:24", "throughput": 5596.57, "total_tokens": 11804544}
|
|
{"current_steps": 23975, "total_steps": 37885, "loss": 0.0, "lr": 7.149659001673572e-07, "epoch": 3.1641810743038143, "percentage": 63.28, "elapsed_time": "0:35:09", "remaining_time": "0:20:23", "throughput": 5596.85, "total_tokens": 11806976}
|
|
{"current_steps": 23980, "total_steps": 37885, "loss": 0.0021, "lr": 7.14524343272902e-07, "epoch": 3.1648409660815626, "percentage": 63.3, "elapsed_time": "0:35:09", "remaining_time": "0:20:23", "throughput": 5597.11, "total_tokens": 11809344}
|
|
{"current_steps": 23985, "total_steps": 37885, "loss": 0.0001, "lr": 7.14082846968588e-07, "epoch": 3.165500857859311, "percentage": 63.31, "elapsed_time": "0:35:10", "remaining_time": "0:20:22", "throughput": 5597.42, "total_tokens": 11811840}
|
|
{"current_steps": 23990, "total_steps": 37885, "loss": 0.0001, "lr": 7.136414113481191e-07, "epoch": 3.1661607496370596, "percentage": 63.32, "elapsed_time": "0:35:10", "remaining_time": "0:20:22", "throughput": 5597.67, "total_tokens": 11814208}
|
|
{"current_steps": 23995, "total_steps": 37885, "loss": 0.0, "lr": 7.132000365051873e-07, "epoch": 3.166820641414808, "percentage": 63.34, "elapsed_time": "0:35:10", "remaining_time": "0:20:21", "throughput": 5598.0, "total_tokens": 11816768}
|
|
{"current_steps": 24000, "total_steps": 37885, "loss": 0.0002, "lr": 7.127587225334712e-07, "epoch": 3.1674805331925566, "percentage": 63.35, "elapsed_time": "0:35:11", "remaining_time": "0:20:21", "throughput": 5598.4, "total_tokens": 11819456}
|
|
{"current_steps": 24005, "total_steps": 37885, "loss": 0.0001, "lr": 7.123174695266354e-07, "epoch": 3.168140424970305, "percentage": 63.36, "elapsed_time": "0:35:11", "remaining_time": "0:20:20", "throughput": 5598.62, "total_tokens": 11821760}
|
|
{"current_steps": 24010, "total_steps": 37885, "loss": 0.0396, "lr": 7.11876277578334e-07, "epoch": 3.168800316748053, "percentage": 63.38, "elapsed_time": "0:35:11", "remaining_time": "0:20:20", "throughput": 5598.9, "total_tokens": 11824192}
|
|
{"current_steps": 24015, "total_steps": 37885, "loss": 0.0, "lr": 7.114351467822058e-07, "epoch": 3.169460208525802, "percentage": 63.39, "elapsed_time": "0:35:12", "remaining_time": "0:20:19", "throughput": 5599.21, "total_tokens": 11826688}
|
|
{"current_steps": 24020, "total_steps": 37885, "loss": 0.0, "lr": 7.109940772318787e-07, "epoch": 3.17012010030355, "percentage": 63.4, "elapsed_time": "0:35:12", "remaining_time": "0:20:19", "throughput": 5599.37, "total_tokens": 11828864}
|
|
{"current_steps": 24025, "total_steps": 37885, "loss": 0.0001, "lr": 7.105530690209656e-07, "epoch": 3.1707799920812985, "percentage": 63.42, "elapsed_time": "0:35:12", "remaining_time": "0:20:18", "throughput": 5599.6, "total_tokens": 11831168}
|
|
{"current_steps": 24030, "total_steps": 37885, "loss": 0.0626, "lr": 7.101121222430675e-07, "epoch": 3.171439883859047, "percentage": 63.43, "elapsed_time": "0:35:13", "remaining_time": "0:20:18", "throughput": 5600.11, "total_tokens": 11834176}
|
|
{"current_steps": 24035, "total_steps": 37885, "loss": 0.0, "lr": 7.096712369917724e-07, "epoch": 3.1720997756367955, "percentage": 63.44, "elapsed_time": "0:35:13", "remaining_time": "0:20:17", "throughput": 5600.25, "total_tokens": 11836288}
|
|
{"current_steps": 24040, "total_steps": 37885, "loss": 0.0008, "lr": 7.092304133606544e-07, "epoch": 3.172759667414544, "percentage": 63.46, "elapsed_time": "0:35:13", "remaining_time": "0:20:17", "throughput": 5600.66, "total_tokens": 11839040}
|
|
{"current_steps": 24045, "total_steps": 37885, "loss": 0.0, "lr": 7.087896514432762e-07, "epoch": 3.1734195591922925, "percentage": 63.47, "elapsed_time": "0:35:14", "remaining_time": "0:20:16", "throughput": 5600.86, "total_tokens": 11841280}
|
|
{"current_steps": 24050, "total_steps": 37885, "loss": 0.0511, "lr": 7.083489513331855e-07, "epoch": 3.1740794509700407, "percentage": 63.48, "elapsed_time": "0:35:14", "remaining_time": "0:20:16", "throughput": 5601.21, "total_tokens": 11843904}
|
|
{"current_steps": 24055, "total_steps": 37885, "loss": 0.0128, "lr": 7.079083131239177e-07, "epoch": 3.1747393427477895, "percentage": 63.49, "elapsed_time": "0:35:14", "remaining_time": "0:20:15", "throughput": 5601.49, "total_tokens": 11846336}
|
|
{"current_steps": 24060, "total_steps": 37885, "loss": 0.0008, "lr": 7.074677369089955e-07, "epoch": 3.1753992345255377, "percentage": 63.51, "elapsed_time": "0:35:15", "remaining_time": "0:20:15", "throughput": 5601.69, "total_tokens": 11848576}
|
|
{"current_steps": 24065, "total_steps": 37885, "loss": 0.0009, "lr": 7.070272227819276e-07, "epoch": 3.1760591263032865, "percentage": 63.52, "elapsed_time": "0:35:15", "remaining_time": "0:20:14", "throughput": 5601.82, "total_tokens": 11850688}
|
|
{"current_steps": 24070, "total_steps": 37885, "loss": 0.0, "lr": 7.065867708362103e-07, "epoch": 3.1767190180810347, "percentage": 63.53, "elapsed_time": "0:35:15", "remaining_time": "0:20:14", "throughput": 5602.05, "total_tokens": 11852992}
|
|
{"current_steps": 24075, "total_steps": 37885, "loss": 0.0001, "lr": 7.061463811653261e-07, "epoch": 3.177378909858783, "percentage": 63.55, "elapsed_time": "0:35:16", "remaining_time": "0:20:13", "throughput": 5602.46, "total_tokens": 11855744}
|
|
{"current_steps": 24080, "total_steps": 37885, "loss": 0.0001, "lr": 7.057060538627445e-07, "epoch": 3.1780388016365317, "percentage": 63.56, "elapsed_time": "0:35:16", "remaining_time": "0:20:13", "throughput": 5602.71, "total_tokens": 11858112}
|
|
{"current_steps": 24085, "total_steps": 37885, "loss": 0.0366, "lr": 7.05265789021922e-07, "epoch": 3.17869869341428, "percentage": 63.57, "elapsed_time": "0:35:16", "remaining_time": "0:20:12", "throughput": 5602.79, "total_tokens": 11860096}
|
|
{"current_steps": 24090, "total_steps": 37885, "loss": 0.0, "lr": 7.048255867363014e-07, "epoch": 3.1793585851920287, "percentage": 63.59, "elapsed_time": "0:35:17", "remaining_time": "0:20:12", "throughput": 5603.16, "total_tokens": 11862720}
|
|
{"current_steps": 24095, "total_steps": 37885, "loss": 0.0682, "lr": 7.043854470993125e-07, "epoch": 3.180018476969777, "percentage": 63.6, "elapsed_time": "0:35:17", "remaining_time": "0:20:11", "throughput": 5603.41, "total_tokens": 11865088}
|
|
{"current_steps": 24100, "total_steps": 37885, "loss": 0.1339, "lr": 7.039453702043719e-07, "epoch": 3.1806783687475253, "percentage": 63.61, "elapsed_time": "0:35:17", "remaining_time": "0:20:11", "throughput": 5603.77, "total_tokens": 11867712}
|
|
{"current_steps": 24105, "total_steps": 37885, "loss": 0.0034, "lr": 7.035053561448825e-07, "epoch": 3.181338260525274, "percentage": 63.63, "elapsed_time": "0:35:18", "remaining_time": "0:20:10", "throughput": 5604.11, "total_tokens": 11870272}
|
|
{"current_steps": 24110, "total_steps": 37885, "loss": 0.0, "lr": 7.030654050142341e-07, "epoch": 3.1819981523030223, "percentage": 63.64, "elapsed_time": "0:35:18", "remaining_time": "0:20:10", "throughput": 5604.47, "total_tokens": 11872896}
|
|
{"current_steps": 24115, "total_steps": 37885, "loss": 0.0706, "lr": 7.026255169058035e-07, "epoch": 3.1826580440807706, "percentage": 63.65, "elapsed_time": "0:35:18", "remaining_time": "0:20:09", "throughput": 5604.78, "total_tokens": 11875392}
|
|
{"current_steps": 24120, "total_steps": 37885, "loss": 0.0114, "lr": 7.021856919129534e-07, "epoch": 3.1833179358585193, "percentage": 63.67, "elapsed_time": "0:35:19", "remaining_time": "0:20:09", "throughput": 5605.0, "total_tokens": 11877696}
|
|
{"current_steps": 24125, "total_steps": 37885, "loss": 0.0308, "lr": 7.017459301290337e-07, "epoch": 3.1839778276362676, "percentage": 63.68, "elapsed_time": "0:35:19", "remaining_time": "0:20:08", "throughput": 5605.39, "total_tokens": 11880384}
|
|
{"current_steps": 24130, "total_steps": 37885, "loss": 0.0813, "lr": 7.013062316473803e-07, "epoch": 3.1846377194140163, "percentage": 63.69, "elapsed_time": "0:35:19", "remaining_time": "0:20:08", "throughput": 5605.73, "total_tokens": 11882944}
|
|
{"current_steps": 24135, "total_steps": 37885, "loss": 0.0, "lr": 7.008665965613165e-07, "epoch": 3.1852976111917646, "percentage": 63.71, "elapsed_time": "0:35:20", "remaining_time": "0:20:07", "throughput": 5606.03, "total_tokens": 11885440}
|
|
{"current_steps": 24140, "total_steps": 37885, "loss": 0.0001, "lr": 7.004270249641513e-07, "epoch": 3.185957502969513, "percentage": 63.72, "elapsed_time": "0:35:20", "remaining_time": "0:20:07", "throughput": 5606.23, "total_tokens": 11887680}
|
|
{"current_steps": 24145, "total_steps": 37885, "loss": 0.0009, "lr": 6.999875169491808e-07, "epoch": 3.1866173947472616, "percentage": 63.73, "elapsed_time": "0:35:20", "remaining_time": "0:20:06", "throughput": 5606.45, "total_tokens": 11889984}
|
|
{"current_steps": 24150, "total_steps": 37885, "loss": 0.0, "lr": 6.995480726096875e-07, "epoch": 3.18727728652501, "percentage": 63.75, "elapsed_time": "0:35:21", "remaining_time": "0:20:06", "throughput": 5606.65, "total_tokens": 11892224}
|
|
{"current_steps": 24155, "total_steps": 37885, "loss": 0.0441, "lr": 6.991086920389395e-07, "epoch": 3.187937178302758, "percentage": 63.76, "elapsed_time": "0:35:21", "remaining_time": "0:20:05", "throughput": 5606.93, "total_tokens": 11894656}
|
|
{"current_steps": 24160, "total_steps": 37885, "loss": 0.1136, "lr": 6.986693753301934e-07, "epoch": 3.188597070080507, "percentage": 63.77, "elapsed_time": "0:35:21", "remaining_time": "0:20:05", "throughput": 5607.27, "total_tokens": 11897216}
|
|
{"current_steps": 24165, "total_steps": 37885, "loss": 0.0016, "lr": 6.982301225766897e-07, "epoch": 3.189256961858255, "percentage": 63.79, "elapsed_time": "0:35:22", "remaining_time": "0:20:04", "throughput": 5607.58, "total_tokens": 11899712}
|
|
{"current_steps": 24170, "total_steps": 37885, "loss": 0.0239, "lr": 6.977909338716578e-07, "epoch": 3.189916853636004, "percentage": 63.8, "elapsed_time": "0:35:22", "remaining_time": "0:20:04", "throughput": 5607.86, "total_tokens": 11902144}
|
|
{"current_steps": 24175, "total_steps": 37885, "loss": 0.099, "lr": 6.973518093083116e-07, "epoch": 3.190576745413752, "percentage": 63.81, "elapsed_time": "0:35:22", "remaining_time": "0:20:03", "throughput": 5608.16, "total_tokens": 11904640}
|
|
{"current_steps": 24180, "total_steps": 37885, "loss": 0.0008, "lr": 6.969127489798519e-07, "epoch": 3.1912366371915004, "percentage": 63.82, "elapsed_time": "0:35:23", "remaining_time": "0:20:03", "throughput": 5608.46, "total_tokens": 11907136}
|
|
{"current_steps": 24185, "total_steps": 37885, "loss": 0.0013, "lr": 6.964737529794669e-07, "epoch": 3.191896528969249, "percentage": 63.84, "elapsed_time": "0:35:23", "remaining_time": "0:20:02", "throughput": 5608.79, "total_tokens": 11909696}
|
|
{"current_steps": 24190, "total_steps": 37885, "loss": 0.0, "lr": 6.960348214003294e-07, "epoch": 3.1925564207469974, "percentage": 63.85, "elapsed_time": "0:35:23", "remaining_time": "0:20:02", "throughput": 5609.04, "total_tokens": 11912064}
|
|
{"current_steps": 24195, "total_steps": 37885, "loss": 0.0, "lr": 6.955959543356005e-07, "epoch": 3.193216312524746, "percentage": 63.86, "elapsed_time": "0:35:24", "remaining_time": "0:20:01", "throughput": 5609.26, "total_tokens": 11914368}
|
|
{"current_steps": 24200, "total_steps": 37885, "loss": 0.0001, "lr": 6.951571518784257e-07, "epoch": 3.1938762043024944, "percentage": 63.88, "elapsed_time": "0:35:24", "remaining_time": "0:20:01", "throughput": 5609.51, "total_tokens": 11916736}
|
|
{"current_steps": 24205, "total_steps": 37885, "loss": 0.0, "lr": 6.947184141219378e-07, "epoch": 3.1945360960802427, "percentage": 63.89, "elapsed_time": "0:35:24", "remaining_time": "0:20:00", "throughput": 5609.68, "total_tokens": 11918912}
|
|
{"current_steps": 24210, "total_steps": 37885, "loss": 0.0, "lr": 6.94279741159256e-07, "epoch": 3.1951959878579914, "percentage": 63.9, "elapsed_time": "0:35:25", "remaining_time": "0:20:00", "throughput": 5609.88, "total_tokens": 11921152}
|
|
{"current_steps": 24215, "total_steps": 37885, "loss": 0.0001, "lr": 6.93841133083485e-07, "epoch": 3.1958558796357397, "percentage": 63.92, "elapsed_time": "0:35:25", "remaining_time": "0:19:59", "throughput": 5610.07, "total_tokens": 11923392}
|
|
{"current_steps": 24220, "total_steps": 37885, "loss": 0.0738, "lr": 6.934025899877167e-07, "epoch": 3.1965157714134884, "percentage": 63.93, "elapsed_time": "0:35:25", "remaining_time": "0:19:59", "throughput": 5610.4, "total_tokens": 11925952}
|
|
{"current_steps": 24225, "total_steps": 37885, "loss": 0.0, "lr": 6.929641119650286e-07, "epoch": 3.1971756631912367, "percentage": 63.94, "elapsed_time": "0:35:26", "remaining_time": "0:19:58", "throughput": 5610.77, "total_tokens": 11928576}
|
|
{"current_steps": 24230, "total_steps": 37885, "loss": 0.0002, "lr": 6.92525699108484e-07, "epoch": 3.197835554968985, "percentage": 63.96, "elapsed_time": "0:35:26", "remaining_time": "0:19:58", "throughput": 5610.99, "total_tokens": 11930880}
|
|
{"current_steps": 24235, "total_steps": 37885, "loss": 0.0001, "lr": 6.920873515111336e-07, "epoch": 3.1984954467467337, "percentage": 63.97, "elapsed_time": "0:35:26", "remaining_time": "0:19:57", "throughput": 5611.27, "total_tokens": 11933312}
|
|
{"current_steps": 24240, "total_steps": 37885, "loss": 0.0142, "lr": 6.916490692660127e-07, "epoch": 3.199155338524482, "percentage": 63.98, "elapsed_time": "0:35:27", "remaining_time": "0:19:57", "throughput": 5611.62, "total_tokens": 11935936}
|
|
{"current_steps": 24245, "total_steps": 37885, "loss": 0.043, "lr": 6.912108524661443e-07, "epoch": 3.1998152303022303, "percentage": 64.0, "elapsed_time": "0:35:27", "remaining_time": "0:19:56", "throughput": 5611.76, "total_tokens": 11938048}
|
|
{"current_steps": 24250, "total_steps": 37885, "loss": 0.0, "lr": 6.907727012045363e-07, "epoch": 3.200475122079979, "percentage": 64.01, "elapsed_time": "0:35:27", "remaining_time": "0:19:56", "throughput": 5612.03, "total_tokens": 11940480}
|
|
{"current_steps": 24255, "total_steps": 37885, "loss": 0.0372, "lr": 6.903346155741831e-07, "epoch": 3.2011350138577273, "percentage": 64.02, "elapsed_time": "0:35:27", "remaining_time": "0:19:55", "throughput": 5612.29, "total_tokens": 11942848}
|
|
{"current_steps": 24260, "total_steps": 37885, "loss": 0.0, "lr": 6.898965956680655e-07, "epoch": 3.201794905635476, "percentage": 64.04, "elapsed_time": "0:35:28", "remaining_time": "0:19:55", "throughput": 5612.57, "total_tokens": 11945280}
|
|
{"current_steps": 24265, "total_steps": 37885, "loss": 0.0002, "lr": 6.894586415791497e-07, "epoch": 3.2024547974132243, "percentage": 64.05, "elapsed_time": "0:35:28", "remaining_time": "0:19:54", "throughput": 5612.98, "total_tokens": 11948032}
|
|
{"current_steps": 24270, "total_steps": 37885, "loss": 0.0007, "lr": 6.890207534003884e-07, "epoch": 3.2031146891909725, "percentage": 64.06, "elapsed_time": "0:35:28", "remaining_time": "0:19:54", "throughput": 5613.26, "total_tokens": 11950464}
|
|
{"current_steps": 24275, "total_steps": 37885, "loss": 0.0, "lr": 6.885829312247207e-07, "epoch": 3.2037745809687213, "percentage": 64.08, "elapsed_time": "0:35:29", "remaining_time": "0:19:53", "throughput": 5613.67, "total_tokens": 11953216}
|
|
{"current_steps": 24280, "total_steps": 37885, "loss": 0.0, "lr": 6.881451751450702e-07, "epoch": 3.2044344727464695, "percentage": 64.09, "elapsed_time": "0:35:29", "remaining_time": "0:19:53", "throughput": 5613.88, "total_tokens": 11955520}
|
|
{"current_steps": 24285, "total_steps": 37885, "loss": 0.0308, "lr": 6.877074852543483e-07, "epoch": 3.205094364524218, "percentage": 64.1, "elapsed_time": "0:35:29", "remaining_time": "0:19:52", "throughput": 5614.19, "total_tokens": 11958016}
|
|
{"current_steps": 24290, "total_steps": 37885, "loss": 0.0, "lr": 6.872698616454511e-07, "epoch": 3.2057542563019665, "percentage": 64.12, "elapsed_time": "0:35:30", "remaining_time": "0:19:52", "throughput": 5614.5, "total_tokens": 11960512}
|
|
{"current_steps": 24295, "total_steps": 37885, "loss": 0.0282, "lr": 6.868323044112612e-07, "epoch": 3.206414148079715, "percentage": 64.13, "elapsed_time": "0:35:30", "remaining_time": "0:19:51", "throughput": 5614.78, "total_tokens": 11962944}
|
|
{"current_steps": 24300, "total_steps": 37885, "loss": 0.0, "lr": 6.863948136446468e-07, "epoch": 3.2070740398574635, "percentage": 64.14, "elapsed_time": "0:35:30", "remaining_time": "0:19:51", "throughput": 5615.17, "total_tokens": 11965632}
|
|
{"current_steps": 24305, "total_steps": 37885, "loss": 0.075, "lr": 6.859573894384625e-07, "epoch": 3.207733931635212, "percentage": 64.15, "elapsed_time": "0:35:31", "remaining_time": "0:19:50", "throughput": 5615.36, "total_tokens": 11967872}
|
|
{"current_steps": 24310, "total_steps": 37885, "loss": 0.0236, "lr": 6.855200318855483e-07, "epoch": 3.20839382341296, "percentage": 64.17, "elapsed_time": "0:35:31", "remaining_time": "0:19:50", "throughput": 5615.52, "total_tokens": 11970048}
|
|
{"current_steps": 24315, "total_steps": 37885, "loss": 0.0001, "lr": 6.850827410787295e-07, "epoch": 3.209053715190709, "percentage": 64.18, "elapsed_time": "0:35:31", "remaining_time": "0:19:49", "throughput": 5615.74, "total_tokens": 11972352}
|
|
{"current_steps": 24320, "total_steps": 37885, "loss": 0.0001, "lr": 6.846455171108187e-07, "epoch": 3.209713606968457, "percentage": 64.19, "elapsed_time": "0:35:32", "remaining_time": "0:19:49", "throughput": 5616.03, "total_tokens": 11974784}
|
|
{"current_steps": 24325, "total_steps": 37885, "loss": 0.0, "lr": 6.842083600746131e-07, "epoch": 3.210373498746206, "percentage": 64.21, "elapsed_time": "0:35:32", "remaining_time": "0:19:48", "throughput": 5616.35, "total_tokens": 11977344}
|
|
{"current_steps": 24330, "total_steps": 37885, "loss": 0.0, "lr": 6.837712700628967e-07, "epoch": 3.211033390523954, "percentage": 64.22, "elapsed_time": "0:35:32", "remaining_time": "0:19:48", "throughput": 5616.69, "total_tokens": 11979904}
|
|
{"current_steps": 24335, "total_steps": 37885, "loss": 0.0001, "lr": 6.833342471684383e-07, "epoch": 3.2116932823017024, "percentage": 64.23, "elapsed_time": "0:35:33", "remaining_time": "0:19:47", "throughput": 5616.9, "total_tokens": 11982208}
|
|
{"current_steps": 24340, "total_steps": 37885, "loss": 0.0238, "lr": 6.828972914839924e-07, "epoch": 3.212353174079451, "percentage": 64.25, "elapsed_time": "0:35:33", "remaining_time": "0:19:47", "throughput": 5617.26, "total_tokens": 11984832}
|
|
{"current_steps": 24345, "total_steps": 37885, "loss": 0.0863, "lr": 6.824604031023005e-07, "epoch": 3.2130130658571994, "percentage": 64.26, "elapsed_time": "0:35:33", "remaining_time": "0:19:46", "throughput": 5617.58, "total_tokens": 11987392}
|
|
{"current_steps": 24350, "total_steps": 37885, "loss": 0.0002, "lr": 6.820235821160881e-07, "epoch": 3.213672957634948, "percentage": 64.27, "elapsed_time": "0:35:34", "remaining_time": "0:19:46", "throughput": 5617.78, "total_tokens": 11989632}
|
|
{"current_steps": 24355, "total_steps": 37885, "loss": 0.0001, "lr": 6.815868286180683e-07, "epoch": 3.2143328494126964, "percentage": 64.29, "elapsed_time": "0:35:34", "remaining_time": "0:19:45", "throughput": 5618.04, "total_tokens": 11992064}
|
|
{"current_steps": 24360, "total_steps": 37885, "loss": 0.001, "lr": 6.811501427009383e-07, "epoch": 3.2149927411904446, "percentage": 64.3, "elapsed_time": "0:35:34", "remaining_time": "0:19:45", "throughput": 5618.4, "total_tokens": 11994688}
|
|
{"current_steps": 24365, "total_steps": 37885, "loss": 0.0003, "lr": 6.807135244573814e-07, "epoch": 3.2156526329681934, "percentage": 64.31, "elapsed_time": "0:35:35", "remaining_time": "0:19:44", "throughput": 5618.67, "total_tokens": 11997120}
|
|
{"current_steps": 24370, "total_steps": 37885, "loss": 0.0005, "lr": 6.802769739800669e-07, "epoch": 3.2163125247459416, "percentage": 64.33, "elapsed_time": "0:35:35", "remaining_time": "0:19:44", "throughput": 5618.97, "total_tokens": 11999616}
|
|
{"current_steps": 24375, "total_steps": 37885, "loss": 0.0, "lr": 6.798404913616491e-07, "epoch": 3.21697241652369, "percentage": 64.34, "elapsed_time": "0:35:35", "remaining_time": "0:19:43", "throughput": 5619.3, "total_tokens": 12002176}
|
|
{"current_steps": 24380, "total_steps": 37885, "loss": 0.0487, "lr": 6.794040766947693e-07, "epoch": 3.2176323083014386, "percentage": 64.35, "elapsed_time": "0:35:36", "remaining_time": "0:19:43", "throughput": 5619.57, "total_tokens": 12004608}
|
|
{"current_steps": 24385, "total_steps": 37885, "loss": 0.0, "lr": 6.789677300720522e-07, "epoch": 3.218292200079187, "percentage": 64.37, "elapsed_time": "0:35:36", "remaining_time": "0:19:42", "throughput": 5619.88, "total_tokens": 12007104}
|
|
{"current_steps": 24390, "total_steps": 37885, "loss": 0.0, "lr": 6.785314515861096e-07, "epoch": 3.2189520918569356, "percentage": 64.38, "elapsed_time": "0:35:36", "remaining_time": "0:19:42", "throughput": 5620.21, "total_tokens": 12009664}
|
|
{"current_steps": 24395, "total_steps": 37885, "loss": 0.0, "lr": 6.780952413295387e-07, "epoch": 3.219611983634684, "percentage": 64.39, "elapsed_time": "0:35:37", "remaining_time": "0:19:41", "throughput": 5620.46, "total_tokens": 12012032}
|
|
{"current_steps": 24400, "total_steps": 37885, "loss": 0.0001, "lr": 6.776590993949217e-07, "epoch": 3.220271875412432, "percentage": 64.41, "elapsed_time": "0:35:37", "remaining_time": "0:19:41", "throughput": 5620.62, "total_tokens": 12014208}
|
|
{"current_steps": 24405, "total_steps": 37885, "loss": 0.0004, "lr": 6.772230258748266e-07, "epoch": 3.220931767190181, "percentage": 64.42, "elapsed_time": "0:35:37", "remaining_time": "0:19:40", "throughput": 5620.86, "total_tokens": 12016576}
|
|
{"current_steps": 24410, "total_steps": 37885, "loss": 0.0006, "lr": 6.767870208618071e-07, "epoch": 3.221591658967929, "percentage": 64.43, "elapsed_time": "0:35:38", "remaining_time": "0:19:40", "throughput": 5621.24, "total_tokens": 12019264}
|
|
{"current_steps": 24415, "total_steps": 37885, "loss": 0.0213, "lr": 6.763510844484015e-07, "epoch": 3.2222515507456775, "percentage": 64.45, "elapsed_time": "0:35:38", "remaining_time": "0:19:39", "throughput": 5621.5, "total_tokens": 12021632}
|
|
{"current_steps": 24420, "total_steps": 37885, "loss": 0.0283, "lr": 6.759152167271349e-07, "epoch": 3.222911442523426, "percentage": 64.46, "elapsed_time": "0:35:38", "remaining_time": "0:19:39", "throughput": 5621.75, "total_tokens": 12024000}
|
|
{"current_steps": 24425, "total_steps": 37885, "loss": 0.0, "lr": 6.754794177905165e-07, "epoch": 3.2235713343011745, "percentage": 64.47, "elapsed_time": "0:35:39", "remaining_time": "0:19:38", "throughput": 5622.02, "total_tokens": 12026432}
|
|
{"current_steps": 24430, "total_steps": 37885, "loss": 0.0, "lr": 6.750436877310418e-07, "epoch": 3.224231226078923, "percentage": 64.48, "elapsed_time": "0:35:39", "remaining_time": "0:19:38", "throughput": 5622.21, "total_tokens": 12028672}
|
|
{"current_steps": 24435, "total_steps": 37885, "loss": 0.0004, "lr": 6.746080266411913e-07, "epoch": 3.2248911178566715, "percentage": 64.5, "elapsed_time": "0:35:39", "remaining_time": "0:19:37", "throughput": 5622.64, "total_tokens": 12031488}
|
|
{"current_steps": 24440, "total_steps": 37885, "loss": 0.0001, "lr": 6.741724346134306e-07, "epoch": 3.2255510096344198, "percentage": 64.51, "elapsed_time": "0:35:40", "remaining_time": "0:19:37", "throughput": 5622.9, "total_tokens": 12033920}
|
|
{"current_steps": 24445, "total_steps": 37885, "loss": 0.0044, "lr": 6.737369117402114e-07, "epoch": 3.2262109014121685, "percentage": 64.52, "elapsed_time": "0:35:40", "remaining_time": "0:19:36", "throughput": 5623.12, "total_tokens": 12036224}
|
|
{"current_steps": 24450, "total_steps": 37885, "loss": 0.115, "lr": 6.733014581139699e-07, "epoch": 3.2268707931899168, "percentage": 64.54, "elapsed_time": "0:35:40", "remaining_time": "0:19:36", "throughput": 5623.34, "total_tokens": 12038528}
|
|
{"current_steps": 24455, "total_steps": 37885, "loss": 0.0, "lr": 6.728660738271283e-07, "epoch": 3.2275306849676655, "percentage": 64.55, "elapsed_time": "0:35:41", "remaining_time": "0:19:35", "throughput": 5623.58, "total_tokens": 12040896}
|
|
{"current_steps": 24460, "total_steps": 37885, "loss": 0.0, "lr": 6.724307589720936e-07, "epoch": 3.2281905767454138, "percentage": 64.56, "elapsed_time": "0:35:41", "remaining_time": "0:19:35", "throughput": 5623.72, "total_tokens": 12043008}
|
|
{"current_steps": 24465, "total_steps": 37885, "loss": 0.0023, "lr": 6.719955136412582e-07, "epoch": 3.228850468523162, "percentage": 64.58, "elapsed_time": "0:35:41", "remaining_time": "0:19:34", "throughput": 5624.01, "total_tokens": 12045504}
|
|
{"current_steps": 24470, "total_steps": 37885, "loss": 0.0, "lr": 6.715603379269998e-07, "epoch": 3.2295103603009108, "percentage": 64.59, "elapsed_time": "0:35:42", "remaining_time": "0:19:34", "throughput": 5624.23, "total_tokens": 12047808}
|
|
{"current_steps": 24475, "total_steps": 37885, "loss": 0.0338, "lr": 6.711252319216814e-07, "epoch": 3.230170252078659, "percentage": 64.6, "elapsed_time": "0:35:42", "remaining_time": "0:19:33", "throughput": 5624.61, "total_tokens": 12050496}
|
|
{"current_steps": 24480, "total_steps": 37885, "loss": 0.0, "lr": 6.70690195717651e-07, "epoch": 3.2308301438564078, "percentage": 64.62, "elapsed_time": "0:35:42", "remaining_time": "0:19:33", "throughput": 5624.86, "total_tokens": 12052864}
|
|
{"current_steps": 24485, "total_steps": 37885, "loss": 0.0, "lr": 6.70255229407242e-07, "epoch": 3.231490035634156, "percentage": 64.63, "elapsed_time": "0:35:43", "remaining_time": "0:19:32", "throughput": 5625.02, "total_tokens": 12055040}
|
|
{"current_steps": 24490, "total_steps": 37885, "loss": 0.0, "lr": 6.698203330827722e-07, "epoch": 3.2321499274119043, "percentage": 64.64, "elapsed_time": "0:35:43", "remaining_time": "0:19:32", "throughput": 5625.37, "total_tokens": 12057664}
|
|
{"current_steps": 24495, "total_steps": 37885, "loss": 0.1253, "lr": 6.693855068365464e-07, "epoch": 3.232809819189653, "percentage": 64.66, "elapsed_time": "0:35:43", "remaining_time": "0:19:31", "throughput": 5625.51, "total_tokens": 12059776}
|
|
{"current_steps": 24500, "total_steps": 37885, "loss": 0.0, "lr": 6.689507507608518e-07, "epoch": 3.2334697109674013, "percentage": 64.67, "elapsed_time": "0:35:44", "remaining_time": "0:19:31", "throughput": 5625.84, "total_tokens": 12062336}
|
|
{"current_steps": 24505, "total_steps": 37885, "loss": 0.0, "lr": 6.685160649479638e-07, "epoch": 3.2341296027451496, "percentage": 64.68, "elapsed_time": "0:35:44", "remaining_time": "0:19:30", "throughput": 5626.0, "total_tokens": 12064512}
|
|
{"current_steps": 24510, "total_steps": 37885, "loss": 0.0, "lr": 6.680814494901406e-07, "epoch": 3.2347894945228983, "percentage": 64.7, "elapsed_time": "0:35:44", "remaining_time": "0:19:30", "throughput": 5626.33, "total_tokens": 12067072}
|
|
{"current_steps": 24515, "total_steps": 37885, "loss": 0.0, "lr": 6.676469044796258e-07, "epoch": 3.2354493863006466, "percentage": 64.71, "elapsed_time": "0:35:45", "remaining_time": "0:19:29", "throughput": 5626.54, "total_tokens": 12069376}
|
|
{"current_steps": 24520, "total_steps": 37885, "loss": 0.0001, "lr": 6.672124300086492e-07, "epoch": 3.2361092780783953, "percentage": 64.72, "elapsed_time": "0:35:45", "remaining_time": "0:19:29", "throughput": 5626.85, "total_tokens": 12071872}
|
|
{"current_steps": 24525, "total_steps": 37885, "loss": 0.0548, "lr": 6.667780261694239e-07, "epoch": 3.2367691698561436, "percentage": 64.74, "elapsed_time": "0:35:45", "remaining_time": "0:19:28", "throughput": 5627.17, "total_tokens": 12074432}
|
|
{"current_steps": 24530, "total_steps": 37885, "loss": 0.0, "lr": 6.663436930541502e-07, "epoch": 3.237429061633892, "percentage": 64.75, "elapsed_time": "0:35:46", "remaining_time": "0:19:28", "throughput": 5627.36, "total_tokens": 12076672}
|
|
{"current_steps": 24535, "total_steps": 37885, "loss": 0.0, "lr": 6.659094307550112e-07, "epoch": 3.2380889534116406, "percentage": 64.76, "elapsed_time": "0:35:46", "remaining_time": "0:19:27", "throughput": 5627.66, "total_tokens": 12079168}
|
|
{"current_steps": 24540, "total_steps": 37885, "loss": 0.0, "lr": 6.654752393641763e-07, "epoch": 3.238748845189389, "percentage": 64.77, "elapsed_time": "0:35:46", "remaining_time": "0:19:27", "throughput": 5627.99, "total_tokens": 12081728}
|
|
{"current_steps": 24545, "total_steps": 37885, "loss": 0.0, "lr": 6.650411189737993e-07, "epoch": 3.239408736967137, "percentage": 64.79, "elapsed_time": "0:35:47", "remaining_time": "0:19:26", "throughput": 5628.26, "total_tokens": 12084160}
|
|
{"current_steps": 24550, "total_steps": 37885, "loss": 0.028, "lr": 6.646070696760192e-07, "epoch": 3.240068628744886, "percentage": 64.8, "elapsed_time": "0:35:47", "remaining_time": "0:19:26", "throughput": 5628.56, "total_tokens": 12086656}
|
|
{"current_steps": 24555, "total_steps": 37885, "loss": 0.0, "lr": 6.6417309156296e-07, "epoch": 3.240728520522634, "percentage": 64.81, "elapsed_time": "0:35:47", "remaining_time": "0:19:25", "throughput": 5628.81, "total_tokens": 12089024}
|
|
{"current_steps": 24560, "total_steps": 37885, "loss": 0.0, "lr": 6.637391847267302e-07, "epoch": 3.241388412300383, "percentage": 64.83, "elapsed_time": "0:35:48", "remaining_time": "0:19:25", "throughput": 5629.08, "total_tokens": 12091456}
|
|
{"current_steps": 24565, "total_steps": 37885, "loss": 0.0, "lr": 6.633053492594232e-07, "epoch": 3.242048304078131, "percentage": 64.84, "elapsed_time": "0:35:48", "remaining_time": "0:19:24", "throughput": 5629.41, "total_tokens": 12094016}
|
|
{"current_steps": 24570, "total_steps": 37885, "loss": 0.0008, "lr": 6.628715852531179e-07, "epoch": 3.2427081958558794, "percentage": 64.85, "elapsed_time": "0:35:48", "remaining_time": "0:19:24", "throughput": 5629.68, "total_tokens": 12096448}
|
|
{"current_steps": 24575, "total_steps": 37885, "loss": 0.0, "lr": 6.624378927998773e-07, "epoch": 3.243368087633628, "percentage": 64.87, "elapsed_time": "0:35:49", "remaining_time": "0:19:23", "throughput": 5630.01, "total_tokens": 12099008}
|
|
{"current_steps": 24580, "total_steps": 37885, "loss": 0.0898, "lr": 6.620042719917495e-07, "epoch": 3.2440279794113764, "percentage": 64.88, "elapsed_time": "0:35:49", "remaining_time": "0:19:23", "throughput": 5630.2, "total_tokens": 12101248}
|
|
{"current_steps": 24585, "total_steps": 37885, "loss": 0.0, "lr": 6.615707229207674e-07, "epoch": 3.244687871189125, "percentage": 64.89, "elapsed_time": "0:35:49", "remaining_time": "0:19:22", "throughput": 5630.51, "total_tokens": 12103744}
|
|
{"current_steps": 24590, "total_steps": 37885, "loss": 0.0, "lr": 6.611372456789486e-07, "epoch": 3.2453477629668734, "percentage": 64.91, "elapsed_time": "0:35:50", "remaining_time": "0:19:22", "throughput": 5630.92, "total_tokens": 12106496}
|
|
{"current_steps": 24595, "total_steps": 37885, "loss": 0.0004, "lr": 6.607038403582956e-07, "epoch": 3.2460076547446217, "percentage": 64.92, "elapsed_time": "0:35:50", "remaining_time": "0:19:21", "throughput": 5631.19, "total_tokens": 12108928}
|
|
{"current_steps": 24600, "total_steps": 37885, "loss": 0.0, "lr": 6.602705070507954e-07, "epoch": 3.2466675465223704, "percentage": 64.93, "elapsed_time": "0:35:50", "remaining_time": "0:19:21", "throughput": 5631.52, "total_tokens": 12111488}
|
|
{"current_steps": 24605, "total_steps": 37885, "loss": 0.0, "lr": 6.598372458484202e-07, "epoch": 3.2473274383001187, "percentage": 64.95, "elapsed_time": "0:35:50", "remaining_time": "0:19:20", "throughput": 5631.87, "total_tokens": 12114112}
|
|
{"current_steps": 24610, "total_steps": 37885, "loss": 0.0266, "lr": 6.594040568431262e-07, "epoch": 3.2479873300778674, "percentage": 64.96, "elapsed_time": "0:35:51", "remaining_time": "0:19:20", "throughput": 5632.06, "total_tokens": 12116352}
|
|
{"current_steps": 24615, "total_steps": 37885, "loss": 0.0909, "lr": 6.589709401268546e-07, "epoch": 3.2486472218556157, "percentage": 64.97, "elapsed_time": "0:35:51", "remaining_time": "0:19:19", "throughput": 5632.42, "total_tokens": 12118976}
|
|
{"current_steps": 24620, "total_steps": 37885, "loss": 0.0822, "lr": 6.585378957915315e-07, "epoch": 3.249307113633364, "percentage": 64.99, "elapsed_time": "0:35:51", "remaining_time": "0:19:19", "throughput": 5632.61, "total_tokens": 12121216}
|
|
{"current_steps": 24625, "total_steps": 37885, "loss": 0.0, "lr": 6.581049239290672e-07, "epoch": 3.2499670054111127, "percentage": 65.0, "elapsed_time": "0:35:52", "remaining_time": "0:19:18", "throughput": 5632.91, "total_tokens": 12123712}
|
|
{"current_steps": 24630, "total_steps": 37885, "loss": 0.0, "lr": 6.576720246313572e-07, "epoch": 3.250626897188861, "percentage": 65.01, "elapsed_time": "0:35:52", "remaining_time": "0:19:18", "throughput": 5633.13, "total_tokens": 12126016}
|
|
{"current_steps": 24635, "total_steps": 37885, "loss": 0.0352, "lr": 6.57239197990281e-07, "epoch": 3.2512867889666097, "percentage": 65.03, "elapsed_time": "0:35:52", "remaining_time": "0:19:17", "throughput": 5633.4, "total_tokens": 12128448}
|
|
{"current_steps": 24635, "total_steps": 37885, "eval_loss": 0.18092882633209229, "epoch": 3.2512867889666097, "percentage": 65.03, "elapsed_time": "0:36:00", "remaining_time": "0:19:22", "throughput": 5612.96, "total_tokens": 12128448}
|
|
{"current_steps": 24640, "total_steps": 37885, "loss": 0.0434, "lr": 6.568064440977028e-07, "epoch": 3.251946680744358, "percentage": 65.04, "elapsed_time": "0:36:35", "remaining_time": "0:19:40", "throughput": 5524.63, "total_tokens": 12130880}
|
|
{"current_steps": 24645, "total_steps": 37885, "loss": 0.0001, "lr": 6.563737630454719e-07, "epoch": 3.2526065725221063, "percentage": 65.05, "elapsed_time": "0:36:36", "remaining_time": "0:19:39", "throughput": 5524.86, "total_tokens": 12133248}
|
|
{"current_steps": 24650, "total_steps": 37885, "loss": 0.0, "lr": 6.559411549254211e-07, "epoch": 3.253266464299855, "percentage": 65.07, "elapsed_time": "0:36:36", "remaining_time": "0:19:39", "throughput": 5525.04, "total_tokens": 12135488}
|
|
{"current_steps": 24655, "total_steps": 37885, "loss": 0.0661, "lr": 6.55508619829369e-07, "epoch": 3.2539263560776033, "percentage": 65.08, "elapsed_time": "0:36:36", "remaining_time": "0:19:38", "throughput": 5525.31, "total_tokens": 12137920}
|
|
{"current_steps": 24660, "total_steps": 37885, "loss": 0.0001, "lr": 6.550761578491175e-07, "epoch": 3.2545862478553516, "percentage": 65.09, "elapsed_time": "0:36:37", "remaining_time": "0:19:38", "throughput": 5525.6, "total_tokens": 12140416}
|
|
{"current_steps": 24665, "total_steps": 37885, "loss": 0.0338, "lr": 6.546437690764539e-07, "epoch": 3.2552461396331003, "percentage": 65.1, "elapsed_time": "0:36:37", "remaining_time": "0:19:37", "throughput": 5525.94, "total_tokens": 12143040}
|
|
{"current_steps": 24670, "total_steps": 37885, "loss": 0.0002, "lr": 6.542114536031498e-07, "epoch": 3.2559060314108486, "percentage": 65.12, "elapsed_time": "0:36:37", "remaining_time": "0:19:37", "throughput": 5526.12, "total_tokens": 12145280}
|
|
{"current_steps": 24675, "total_steps": 37885, "loss": 0.0611, "lr": 6.537792115209599e-07, "epoch": 3.256565923188597, "percentage": 65.13, "elapsed_time": "0:36:38", "remaining_time": "0:19:36", "throughput": 5526.4, "total_tokens": 12147776}
|
|
{"current_steps": 24680, "total_steps": 37885, "loss": 0.0, "lr": 6.533470429216258e-07, "epoch": 3.2572258149663456, "percentage": 65.14, "elapsed_time": "0:36:38", "remaining_time": "0:19:36", "throughput": 5526.69, "total_tokens": 12150272}
|
|
{"current_steps": 24685, "total_steps": 37885, "loss": 0.0004, "lr": 6.529149478968709e-07, "epoch": 3.257885706744094, "percentage": 65.16, "elapsed_time": "0:36:38", "remaining_time": "0:19:35", "throughput": 5526.99, "total_tokens": 12152768}
|
|
{"current_steps": 24690, "total_steps": 37885, "loss": 0.0018, "lr": 6.524829265384058e-07, "epoch": 3.2585455985218426, "percentage": 65.17, "elapsed_time": "0:36:39", "remaining_time": "0:19:35", "throughput": 5527.2, "total_tokens": 12155072}
|
|
{"current_steps": 24695, "total_steps": 37885, "loss": 0.0355, "lr": 6.520509789379227e-07, "epoch": 3.259205490299591, "percentage": 65.18, "elapsed_time": "0:36:39", "remaining_time": "0:19:34", "throughput": 5527.41, "total_tokens": 12157376}
|
|
{"current_steps": 24700, "total_steps": 37885, "loss": 0.0019, "lr": 6.516191051870992e-07, "epoch": 3.259865382077339, "percentage": 65.2, "elapsed_time": "0:36:39", "remaining_time": "0:19:34", "throughput": 5527.58, "total_tokens": 12159616}
|
|
{"current_steps": 24705, "total_steps": 37885, "loss": 0.0296, "lr": 6.511873053775985e-07, "epoch": 3.260525273855088, "percentage": 65.21, "elapsed_time": "0:36:40", "remaining_time": "0:19:33", "throughput": 5527.78, "total_tokens": 12161920}
|
|
{"current_steps": 24710, "total_steps": 37885, "loss": 0.0564, "lr": 6.507555796010658e-07, "epoch": 3.261185165632836, "percentage": 65.22, "elapsed_time": "0:36:40", "remaining_time": "0:19:33", "throughput": 5527.95, "total_tokens": 12164160}
|
|
{"current_steps": 24715, "total_steps": 37885, "loss": 0.0615, "lr": 6.503239279491328e-07, "epoch": 3.261845057410585, "percentage": 65.24, "elapsed_time": "0:36:40", "remaining_time": "0:19:32", "throughput": 5528.17, "total_tokens": 12166464}
|
|
{"current_steps": 24720, "total_steps": 37885, "loss": 0.0311, "lr": 6.498923505134138e-07, "epoch": 3.262504949188333, "percentage": 65.25, "elapsed_time": "0:36:41", "remaining_time": "0:19:32", "throughput": 5528.47, "total_tokens": 12168960}
|
|
{"current_steps": 24725, "total_steps": 37885, "loss": 0.0202, "lr": 6.494608473855079e-07, "epoch": 3.2631648409660814, "percentage": 65.26, "elapsed_time": "0:36:41", "remaining_time": "0:19:31", "throughput": 5528.85, "total_tokens": 12171648}
|
|
{"current_steps": 24730, "total_steps": 37885, "loss": 0.0, "lr": 6.490294186569989e-07, "epoch": 3.26382473274383, "percentage": 65.28, "elapsed_time": "0:36:41", "remaining_time": "0:19:31", "throughput": 5529.26, "total_tokens": 12174400}
|
|
{"current_steps": 24735, "total_steps": 37885, "loss": 0.0045, "lr": 6.485980644194541e-07, "epoch": 3.2644846245215784, "percentage": 65.29, "elapsed_time": "0:36:42", "remaining_time": "0:19:30", "throughput": 5529.48, "total_tokens": 12176704}
|
|
{"current_steps": 24740, "total_steps": 37885, "loss": 0.0608, "lr": 6.481667847644256e-07, "epoch": 3.265144516299327, "percentage": 65.3, "elapsed_time": "0:36:42", "remaining_time": "0:19:30", "throughput": 5529.71, "total_tokens": 12179008}
|
|
{"current_steps": 24745, "total_steps": 37885, "loss": 0.0, "lr": 6.477355797834494e-07, "epoch": 3.2658044080770754, "percentage": 65.32, "elapsed_time": "0:36:42", "remaining_time": "0:19:29", "throughput": 5530.07, "total_tokens": 12181632}
|
|
{"current_steps": 24750, "total_steps": 37885, "loss": 0.0, "lr": 6.473044495680451e-07, "epoch": 3.2664642998548237, "percentage": 65.33, "elapsed_time": "0:36:43", "remaining_time": "0:19:29", "throughput": 5530.33, "total_tokens": 12184000}
|
|
{"current_steps": 24755, "total_steps": 37885, "loss": 0.0241, "lr": 6.468733942097178e-07, "epoch": 3.2671241916325724, "percentage": 65.34, "elapsed_time": "0:36:43", "remaining_time": "0:19:28", "throughput": 5530.58, "total_tokens": 12186368}
|
|
{"current_steps": 24760, "total_steps": 37885, "loss": 0.0, "lr": 6.464424137999551e-07, "epoch": 3.2677840834103207, "percentage": 65.36, "elapsed_time": "0:36:43", "remaining_time": "0:19:28", "throughput": 5530.81, "total_tokens": 12188672}
|
|
{"current_steps": 24765, "total_steps": 37885, "loss": 0.0017, "lr": 6.4601150843023e-07, "epoch": 3.2684439751880694, "percentage": 65.37, "elapsed_time": "0:36:44", "remaining_time": "0:19:27", "throughput": 5530.95, "total_tokens": 12190784}
|
|
{"current_steps": 24770, "total_steps": 37885, "loss": 0.0001, "lr": 6.455806781919988e-07, "epoch": 3.2691038669658177, "percentage": 65.38, "elapsed_time": "0:36:44", "remaining_time": "0:19:27", "throughput": 5531.23, "total_tokens": 12193216}
|
|
{"current_steps": 24775, "total_steps": 37885, "loss": 0.0, "lr": 6.451499231767021e-07, "epoch": 3.269763758743566, "percentage": 65.4, "elapsed_time": "0:36:44", "remaining_time": "0:19:26", "throughput": 5531.53, "total_tokens": 12195712}
|
|
{"current_steps": 24780, "total_steps": 37885, "loss": 0.0023, "lr": 6.447192434757647e-07, "epoch": 3.2704236505213147, "percentage": 65.41, "elapsed_time": "0:36:45", "remaining_time": "0:19:26", "throughput": 5531.75, "total_tokens": 12198016}
|
|
{"current_steps": 24785, "total_steps": 37885, "loss": 0.0, "lr": 6.442886391805948e-07, "epoch": 3.271083542299063, "percentage": 65.42, "elapsed_time": "0:36:45", "remaining_time": "0:19:25", "throughput": 5532.06, "total_tokens": 12200512}
|
|
{"current_steps": 24790, "total_steps": 37885, "loss": 0.0002, "lr": 6.438581103825858e-07, "epoch": 3.2717434340768112, "percentage": 65.43, "elapsed_time": "0:36:45", "remaining_time": "0:19:25", "throughput": 5532.44, "total_tokens": 12203200}
|
|
{"current_steps": 24795, "total_steps": 37885, "loss": 0.0, "lr": 6.434276571731139e-07, "epoch": 3.27240332585456, "percentage": 65.45, "elapsed_time": "0:36:46", "remaining_time": "0:19:24", "throughput": 5532.83, "total_tokens": 12205888}
|
|
{"current_steps": 24800, "total_steps": 37885, "loss": 0.0006, "lr": 6.429972796435392e-07, "epoch": 3.2730632176323082, "percentage": 65.46, "elapsed_time": "0:36:46", "remaining_time": "0:19:24", "throughput": 5533.16, "total_tokens": 12208448}
|
|
{"current_steps": 24805, "total_steps": 37885, "loss": 0.0005, "lr": 6.425669778852072e-07, "epoch": 3.2737231094100565, "percentage": 65.47, "elapsed_time": "0:36:46", "remaining_time": "0:19:23", "throughput": 5533.4, "total_tokens": 12210816}
|
|
{"current_steps": 24810, "total_steps": 37885, "loss": 0.0, "lr": 6.421367519894454e-07, "epoch": 3.2743830011878052, "percentage": 65.49, "elapsed_time": "0:36:47", "remaining_time": "0:19:23", "throughput": 5533.73, "total_tokens": 12213376}
|
|
{"current_steps": 24815, "total_steps": 37885, "loss": 0.0019, "lr": 6.417066020475669e-07, "epoch": 3.2750428929655535, "percentage": 65.5, "elapsed_time": "0:36:47", "remaining_time": "0:19:22", "throughput": 5534.14, "total_tokens": 12216128}
|
|
{"current_steps": 24820, "total_steps": 37885, "loss": 0.0002, "lr": 6.412765281508677e-07, "epoch": 3.2757027847433022, "percentage": 65.51, "elapsed_time": "0:36:47", "remaining_time": "0:19:22", "throughput": 5534.36, "total_tokens": 12218432}
|
|
{"current_steps": 24825, "total_steps": 37885, "loss": 0.0, "lr": 6.408465303906271e-07, "epoch": 3.2763626765210505, "percentage": 65.53, "elapsed_time": "0:36:48", "remaining_time": "0:19:21", "throughput": 5534.82, "total_tokens": 12221312}
|
|
{"current_steps": 24830, "total_steps": 37885, "loss": 0.0, "lr": 6.404166088581102e-07, "epoch": 3.277022568298799, "percentage": 65.54, "elapsed_time": "0:36:48", "remaining_time": "0:19:21", "throughput": 5535.07, "total_tokens": 12223680}
|
|
{"current_steps": 24835, "total_steps": 37885, "loss": 0.0487, "lr": 6.399867636445637e-07, "epoch": 3.2776824600765475, "percentage": 65.55, "elapsed_time": "0:36:48", "remaining_time": "0:19:20", "throughput": 5535.45, "total_tokens": 12226368}
|
|
{"current_steps": 24840, "total_steps": 37885, "loss": 0.0002, "lr": 6.395569948412198e-07, "epoch": 3.278342351854296, "percentage": 65.57, "elapsed_time": "0:36:49", "remaining_time": "0:19:20", "throughput": 5535.7, "total_tokens": 12228736}
|
|
{"current_steps": 24845, "total_steps": 37885, "loss": 0.0559, "lr": 6.39127302539294e-07, "epoch": 3.2790022436320445, "percentage": 65.58, "elapsed_time": "0:36:49", "remaining_time": "0:19:19", "throughput": 5536.05, "total_tokens": 12231360}
|
|
{"current_steps": 24850, "total_steps": 37885, "loss": 0.2746, "lr": 6.386976868299844e-07, "epoch": 3.279662135409793, "percentage": 65.59, "elapsed_time": "0:36:49", "remaining_time": "0:19:19", "throughput": 5536.49, "total_tokens": 12234176}
|
|
{"current_steps": 24855, "total_steps": 37885, "loss": 0.0001, "lr": 6.382681478044749e-07, "epoch": 3.280322027187541, "percentage": 65.61, "elapsed_time": "0:36:50", "remaining_time": "0:19:18", "throughput": 5536.75, "total_tokens": 12236544}
|
|
{"current_steps": 24860, "total_steps": 37885, "loss": 0.0001, "lr": 6.378386855539311e-07, "epoch": 3.28098191896529, "percentage": 65.62, "elapsed_time": "0:36:50", "remaining_time": "0:19:18", "throughput": 5537.06, "total_tokens": 12239040}
|
|
{"current_steps": 24865, "total_steps": 37885, "loss": 0.08, "lr": 6.374093001695042e-07, "epoch": 3.281641810743038, "percentage": 65.63, "elapsed_time": "0:36:50", "remaining_time": "0:19:17", "throughput": 5537.31, "total_tokens": 12241408}
|
|
{"current_steps": 24870, "total_steps": 37885, "loss": 0.0337, "lr": 6.369799917423277e-07, "epoch": 3.282301702520787, "percentage": 65.65, "elapsed_time": "0:36:51", "remaining_time": "0:19:17", "throughput": 5537.59, "total_tokens": 12243840}
|
|
{"current_steps": 24875, "total_steps": 37885, "loss": 0.0001, "lr": 6.365507603635188e-07, "epoch": 3.282961594298535, "percentage": 65.66, "elapsed_time": "0:36:51", "remaining_time": "0:19:16", "throughput": 5537.76, "total_tokens": 12246016}
|
|
{"current_steps": 24880, "total_steps": 37885, "loss": 0.0006, "lr": 6.361216061241792e-07, "epoch": 3.2836214860762833, "percentage": 65.67, "elapsed_time": "0:36:51", "remaining_time": "0:19:16", "throughput": 5537.99, "total_tokens": 12248320}
|
|
{"current_steps": 24885, "total_steps": 37885, "loss": 0.0292, "lr": 6.356925291153936e-07, "epoch": 3.284281377854032, "percentage": 65.69, "elapsed_time": "0:36:52", "remaining_time": "0:19:15", "throughput": 5538.39, "total_tokens": 12251072}
|
|
{"current_steps": 24890, "total_steps": 37885, "loss": 0.0011, "lr": 6.352635294282309e-07, "epoch": 3.2849412696317803, "percentage": 65.7, "elapsed_time": "0:36:52", "remaining_time": "0:19:15", "throughput": 5538.72, "total_tokens": 12253632}
|
|
{"current_steps": 24895, "total_steps": 37885, "loss": 0.0001, "lr": 6.348346071537427e-07, "epoch": 3.285601161409529, "percentage": 65.71, "elapsed_time": "0:36:52", "remaining_time": "0:19:14", "throughput": 5539.03, "total_tokens": 12256128}
|
|
{"current_steps": 24900, "total_steps": 37885, "loss": 0.0, "lr": 6.344057623829648e-07, "epoch": 3.2862610531872773, "percentage": 65.73, "elapsed_time": "0:36:53", "remaining_time": "0:19:14", "throughput": 5539.36, "total_tokens": 12258688}
|
|
{"current_steps": 24905, "total_steps": 37885, "loss": 0.0, "lr": 6.339769952069165e-07, "epoch": 3.2869209449650256, "percentage": 65.74, "elapsed_time": "0:36:53", "remaining_time": "0:19:13", "throughput": 5539.69, "total_tokens": 12261312}
|
|
{"current_steps": 24910, "total_steps": 37885, "loss": 0.0, "lr": 6.335483057166002e-07, "epoch": 3.2875808367427743, "percentage": 65.75, "elapsed_time": "0:36:53", "remaining_time": "0:19:13", "throughput": 5539.91, "total_tokens": 12263616}
|
|
{"current_steps": 24915, "total_steps": 37885, "loss": 0.0, "lr": 6.331196940030026e-07, "epoch": 3.2882407285205226, "percentage": 65.76, "elapsed_time": "0:36:54", "remaining_time": "0:19:12", "throughput": 5540.29, "total_tokens": 12266304}
|
|
{"current_steps": 24920, "total_steps": 37885, "loss": 0.0, "lr": 6.326911601570933e-07, "epoch": 3.288900620298271, "percentage": 65.78, "elapsed_time": "0:36:54", "remaining_time": "0:19:12", "throughput": 5540.5, "total_tokens": 12268608}
|
|
{"current_steps": 24925, "total_steps": 37885, "loss": 0.0, "lr": 6.322627042698251e-07, "epoch": 3.2895605120760196, "percentage": 65.79, "elapsed_time": "0:36:54", "remaining_time": "0:19:11", "throughput": 5540.88, "total_tokens": 12271296}
|
|
{"current_steps": 24930, "total_steps": 37885, "loss": 0.0, "lr": 6.318343264321352e-07, "epoch": 3.290220403853768, "percentage": 65.8, "elapsed_time": "0:36:55", "remaining_time": "0:19:11", "throughput": 5541.12, "total_tokens": 12273664}
|
|
{"current_steps": 24935, "total_steps": 37885, "loss": 0.0, "lr": 6.314060267349432e-07, "epoch": 3.290880295631516, "percentage": 65.82, "elapsed_time": "0:36:55", "remaining_time": "0:19:10", "throughput": 5541.44, "total_tokens": 12276224}
|
|
{"current_steps": 24940, "total_steps": 37885, "loss": 0.0551, "lr": 6.309778052691532e-07, "epoch": 3.291540187409265, "percentage": 65.83, "elapsed_time": "0:36:55", "remaining_time": "0:19:10", "throughput": 5541.7, "total_tokens": 12278656}
|
|
{"current_steps": 24945, "total_steps": 37885, "loss": 0.0, "lr": 6.305496621256516e-07, "epoch": 3.292200079187013, "percentage": 65.84, "elapsed_time": "0:36:56", "remaining_time": "0:19:09", "throughput": 5541.92, "total_tokens": 12280960}
|
|
{"current_steps": 24950, "total_steps": 37885, "loss": 0.0, "lr": 6.30121597395309e-07, "epoch": 3.292859970964762, "percentage": 65.86, "elapsed_time": "0:36:56", "remaining_time": "0:19:09", "throughput": 5542.34, "total_tokens": 12283776}
|
|
{"current_steps": 24955, "total_steps": 37885, "loss": 0.0, "lr": 6.296936111689789e-07, "epoch": 3.29351986274251, "percentage": 65.87, "elapsed_time": "0:36:56", "remaining_time": "0:19:08", "throughput": 5542.5, "total_tokens": 12285952}
|
|
{"current_steps": 24960, "total_steps": 37885, "loss": 0.0, "lr": 6.292657035374981e-07, "epoch": 3.2941797545202585, "percentage": 65.88, "elapsed_time": "0:36:57", "remaining_time": "0:19:08", "throughput": 5542.85, "total_tokens": 12288576}
|
|
{"current_steps": 24965, "total_steps": 37885, "loss": 0.028, "lr": 6.288378745916873e-07, "epoch": 3.294839646298007, "percentage": 65.9, "elapsed_time": "0:36:57", "remaining_time": "0:19:07", "throughput": 5543.19, "total_tokens": 12291200}
|
|
{"current_steps": 24970, "total_steps": 37885, "loss": 0.0, "lr": 6.284101244223497e-07, "epoch": 3.2954995380757555, "percentage": 65.91, "elapsed_time": "0:36:57", "remaining_time": "0:19:07", "throughput": 5543.42, "total_tokens": 12293568}
|
|
{"current_steps": 24975, "total_steps": 37885, "loss": 0.0001, "lr": 6.279824531202725e-07, "epoch": 3.296159429853504, "percentage": 65.92, "elapsed_time": "0:36:58", "remaining_time": "0:19:06", "throughput": 5543.71, "total_tokens": 12296064}
|
|
{"current_steps": 24980, "total_steps": 37885, "loss": 0.0266, "lr": 6.275548607762255e-07, "epoch": 3.2968193216312525, "percentage": 65.94, "elapsed_time": "0:36:58", "remaining_time": "0:19:06", "throughput": 5544.05, "total_tokens": 12298688}
|
|
{"current_steps": 24985, "total_steps": 37885, "loss": 0.0001, "lr": 6.271273474809624e-07, "epoch": 3.2974792134090007, "percentage": 65.95, "elapsed_time": "0:36:58", "remaining_time": "0:19:05", "throughput": 5544.25, "total_tokens": 12300992}
|
|
{"current_steps": 24990, "total_steps": 37885, "loss": 0.0, "lr": 6.266999133252196e-07, "epoch": 3.2981391051867495, "percentage": 65.96, "elapsed_time": "0:36:59", "remaining_time": "0:19:05", "throughput": 5544.62, "total_tokens": 12303680}
|
|
{"current_steps": 24995, "total_steps": 37885, "loss": 0.0, "lr": 6.262725583997169e-07, "epoch": 3.2987989969644977, "percentage": 65.98, "elapsed_time": "0:36:59", "remaining_time": "0:19:04", "throughput": 5544.72, "total_tokens": 12305728}
|
|
{"current_steps": 25000, "total_steps": 37885, "loss": 0.0, "lr": 6.258452827951576e-07, "epoch": 3.2994588887422465, "percentage": 65.99, "elapsed_time": "0:36:59", "remaining_time": "0:19:04", "throughput": 5544.95, "total_tokens": 12308096}
|
|
{"current_steps": 25005, "total_steps": 37885, "loss": 0.0004, "lr": 6.254180866022278e-07, "epoch": 3.3001187805199947, "percentage": 66.0, "elapsed_time": "0:37:00", "remaining_time": "0:19:03", "throughput": 5545.27, "total_tokens": 12310656}
|
|
{"current_steps": 25010, "total_steps": 37885, "loss": 0.0009, "lr": 6.249909699115958e-07, "epoch": 3.300778672297743, "percentage": 66.02, "elapsed_time": "0:37:00", "remaining_time": "0:19:03", "throughput": 5545.73, "total_tokens": 12313600}
|
|
{"current_steps": 25015, "total_steps": 37885, "loss": 0.0266, "lr": 6.245639328139156e-07, "epoch": 3.3014385640754917, "percentage": 66.03, "elapsed_time": "0:37:00", "remaining_time": "0:19:02", "throughput": 5545.91, "total_tokens": 12315840}
|
|
{"current_steps": 25020, "total_steps": 37885, "loss": 0.0, "lr": 6.241369753998213e-07, "epoch": 3.30209845585324, "percentage": 66.04, "elapsed_time": "0:37:01", "remaining_time": "0:19:02", "throughput": 5546.39, "total_tokens": 12318784}
|
|
{"current_steps": 25025, "total_steps": 37885, "loss": 0.0, "lr": 6.23710097759933e-07, "epoch": 3.3027583476309887, "percentage": 66.06, "elapsed_time": "0:37:01", "remaining_time": "0:19:01", "throughput": 5546.61, "total_tokens": 12321152}
|
|
{"current_steps": 25030, "total_steps": 37885, "loss": 0.0045, "lr": 6.232832999848511e-07, "epoch": 3.303418239408737, "percentage": 66.07, "elapsed_time": "0:37:01", "remaining_time": "0:19:01", "throughput": 5546.93, "total_tokens": 12323712}
|
|
{"current_steps": 25035, "total_steps": 37885, "loss": 0.0, "lr": 6.228565821651606e-07, "epoch": 3.3040781311864853, "percentage": 66.08, "elapsed_time": "0:37:02", "remaining_time": "0:19:00", "throughput": 5547.24, "total_tokens": 12326272}
|
|
{"current_steps": 25040, "total_steps": 37885, "loss": 0.0, "lr": 6.224299443914301e-07, "epoch": 3.304738022964234, "percentage": 66.09, "elapsed_time": "0:37:02", "remaining_time": "0:19:00", "throughput": 5547.58, "total_tokens": 12328896}
|
|
{"current_steps": 25045, "total_steps": 37885, "loss": 0.0, "lr": 6.22003386754209e-07, "epoch": 3.3053979147419823, "percentage": 66.11, "elapsed_time": "0:37:02", "remaining_time": "0:18:59", "throughput": 5547.84, "total_tokens": 12331328}
|
|
{"current_steps": 25050, "total_steps": 37885, "loss": 0.0323, "lr": 6.215769093440325e-07, "epoch": 3.3060578065197306, "percentage": 66.12, "elapsed_time": "0:37:03", "remaining_time": "0:18:59", "throughput": 5548.0, "total_tokens": 12333568}
|
|
{"current_steps": 25055, "total_steps": 37885, "loss": 0.0003, "lr": 6.211505122514165e-07, "epoch": 3.3067176982974793, "percentage": 66.13, "elapsed_time": "0:37:03", "remaining_time": "0:18:58", "throughput": 5548.28, "total_tokens": 12336064}
|
|
{"current_steps": 25060, "total_steps": 37885, "loss": 0.0002, "lr": 6.207241955668605e-07, "epoch": 3.3073775900752276, "percentage": 66.15, "elapsed_time": "0:37:03", "remaining_time": "0:18:58", "throughput": 5548.65, "total_tokens": 12338752}
|
|
{"current_steps": 25065, "total_steps": 37885, "loss": 0.0, "lr": 6.202979593808478e-07, "epoch": 3.3080374818529763, "percentage": 66.16, "elapsed_time": "0:37:04", "remaining_time": "0:18:57", "throughput": 5548.9, "total_tokens": 12341184}
|
|
{"current_steps": 25070, "total_steps": 37885, "loss": 0.0533, "lr": 6.198718037838435e-07, "epoch": 3.3086973736307246, "percentage": 66.17, "elapsed_time": "0:37:04", "remaining_time": "0:18:57", "throughput": 5549.1, "total_tokens": 12343488}
|
|
{"current_steps": 25075, "total_steps": 37885, "loss": 0.0465, "lr": 6.194457288662963e-07, "epoch": 3.309357265408473, "percentage": 66.19, "elapsed_time": "0:37:04", "remaining_time": "0:18:56", "throughput": 5549.46, "total_tokens": 12346176}
|
|
{"current_steps": 25080, "total_steps": 37885, "loss": 0.0, "lr": 6.190197347186374e-07, "epoch": 3.3100171571862216, "percentage": 66.2, "elapsed_time": "0:37:05", "remaining_time": "0:18:56", "throughput": 5549.68, "total_tokens": 12348480}
|
|
{"current_steps": 25085, "total_steps": 37885, "loss": 0.0001, "lr": 6.185938214312808e-07, "epoch": 3.31067704896397, "percentage": 66.21, "elapsed_time": "0:37:05", "remaining_time": "0:18:55", "throughput": 5549.9, "total_tokens": 12350848}
|
|
{"current_steps": 25090, "total_steps": 37885, "loss": 0.0, "lr": 6.181679890946238e-07, "epoch": 3.311336940741718, "percentage": 66.23, "elapsed_time": "0:37:05", "remaining_time": "0:18:55", "throughput": 5550.23, "total_tokens": 12353472}
|
|
{"current_steps": 25095, "total_steps": 37885, "loss": 0.0, "lr": 6.17742237799046e-07, "epoch": 3.311996832519467, "percentage": 66.24, "elapsed_time": "0:37:06", "remaining_time": "0:18:54", "throughput": 5550.61, "total_tokens": 12356224}
|
|
{"current_steps": 25100, "total_steps": 37885, "loss": 0.0049, "lr": 6.173165676349102e-07, "epoch": 3.312656724297215, "percentage": 66.25, "elapsed_time": "0:37:06", "remaining_time": "0:18:54", "throughput": 5550.93, "total_tokens": 12358784}
|
|
{"current_steps": 25105, "total_steps": 37885, "loss": 0.0, "lr": 6.168909786925619e-07, "epoch": 3.313316616074964, "percentage": 66.27, "elapsed_time": "0:37:06", "remaining_time": "0:18:53", "throughput": 5551.14, "total_tokens": 12361088}
|
|
{"current_steps": 25110, "total_steps": 37885, "loss": 0.0, "lr": 6.164654710623289e-07, "epoch": 3.313976507852712, "percentage": 66.28, "elapsed_time": "0:37:07", "remaining_time": "0:18:53", "throughput": 5551.37, "total_tokens": 12363456}
|
|
{"current_steps": 25115, "total_steps": 37885, "loss": 0.0001, "lr": 6.160400448345224e-07, "epoch": 3.3146363996304604, "percentage": 66.29, "elapsed_time": "0:37:07", "remaining_time": "0:18:52", "throughput": 5551.68, "total_tokens": 12366016}
|
|
{"current_steps": 25120, "total_steps": 37885, "loss": 0.0005, "lr": 6.156147000994358e-07, "epoch": 3.315296291408209, "percentage": 66.31, "elapsed_time": "0:37:07", "remaining_time": "0:18:52", "throughput": 5552.0, "total_tokens": 12368576}
|
|
{"current_steps": 25125, "total_steps": 37885, "loss": 0.0002, "lr": 6.151894369473459e-07, "epoch": 3.3159561831859574, "percentage": 66.32, "elapsed_time": "0:37:08", "remaining_time": "0:18:51", "throughput": 5552.27, "total_tokens": 12371008}
|
|
{"current_steps": 25130, "total_steps": 37885, "loss": 0.0, "lr": 6.147642554685112e-07, "epoch": 3.316616074963706, "percentage": 66.33, "elapsed_time": "0:37:08", "remaining_time": "0:18:51", "throughput": 5552.49, "total_tokens": 12373376}
|
|
{"current_steps": 25135, "total_steps": 37885, "loss": 0.0, "lr": 6.143391557531738e-07, "epoch": 3.3172759667414544, "percentage": 66.35, "elapsed_time": "0:37:08", "remaining_time": "0:18:50", "throughput": 5552.84, "total_tokens": 12376064}
|
|
{"current_steps": 25140, "total_steps": 37885, "loss": 0.0061, "lr": 6.139141378915578e-07, "epoch": 3.3179358585192027, "percentage": 66.36, "elapsed_time": "0:37:09", "remaining_time": "0:18:50", "throughput": 5553.13, "total_tokens": 12378560}
|
|
{"current_steps": 25145, "total_steps": 37885, "loss": 0.0, "lr": 6.1348920197387e-07, "epoch": 3.3185957502969514, "percentage": 66.37, "elapsed_time": "0:37:09", "remaining_time": "0:18:49", "throughput": 5553.34, "total_tokens": 12380928}
|
|
{"current_steps": 25150, "total_steps": 37885, "loss": 0.0002, "lr": 6.130643480903005e-07, "epoch": 3.3192556420746997, "percentage": 66.39, "elapsed_time": "0:37:09", "remaining_time": "0:18:49", "throughput": 5553.6, "total_tokens": 12383360}
|
|
{"current_steps": 25155, "total_steps": 37885, "loss": 0.0082, "lr": 6.126395763310213e-07, "epoch": 3.3199155338524484, "percentage": 66.4, "elapsed_time": "0:37:10", "remaining_time": "0:18:48", "throughput": 5553.92, "total_tokens": 12385920}
|
|
{"current_steps": 25160, "total_steps": 37885, "loss": 0.0308, "lr": 6.122148867861864e-07, "epoch": 3.3205754256301967, "percentage": 66.41, "elapsed_time": "0:37:10", "remaining_time": "0:18:48", "throughput": 5554.21, "total_tokens": 12388416}
|
|
{"current_steps": 25165, "total_steps": 37885, "loss": 0.02, "lr": 6.117902795459342e-07, "epoch": 3.321235317407945, "percentage": 66.42, "elapsed_time": "0:37:10", "remaining_time": "0:18:47", "throughput": 5554.51, "total_tokens": 12390976}
|
|
{"current_steps": 25170, "total_steps": 37885, "loss": 0.0, "lr": 6.113657547003834e-07, "epoch": 3.3218952091856937, "percentage": 66.44, "elapsed_time": "0:37:11", "remaining_time": "0:18:47", "throughput": 5554.79, "total_tokens": 12393472}
|
|
{"current_steps": 25175, "total_steps": 37885, "loss": 0.0, "lr": 6.109413123396374e-07, "epoch": 3.322555100963442, "percentage": 66.45, "elapsed_time": "0:37:11", "remaining_time": "0:18:46", "throughput": 5555.18, "total_tokens": 12396224}
|
|
{"current_steps": 25180, "total_steps": 37885, "loss": 0.0266, "lr": 6.105169525537805e-07, "epoch": 3.3232149927411903, "percentage": 66.46, "elapsed_time": "0:37:11", "remaining_time": "0:18:46", "throughput": 5555.44, "total_tokens": 12398656}
|
|
{"current_steps": 25185, "total_steps": 37885, "loss": 0.0, "lr": 6.100926754328797e-07, "epoch": 3.323874884518939, "percentage": 66.48, "elapsed_time": "0:37:12", "remaining_time": "0:18:45", "throughput": 5555.64, "total_tokens": 12400960}
|
|
{"current_steps": 25190, "total_steps": 37885, "loss": 0.0, "lr": 6.096684810669855e-07, "epoch": 3.3245347762966873, "percentage": 66.49, "elapsed_time": "0:37:12", "remaining_time": "0:18:45", "throughput": 5555.97, "total_tokens": 12403584}
|
|
{"current_steps": 25195, "total_steps": 37885, "loss": 0.0352, "lr": 6.092443695461289e-07, "epoch": 3.325194668074436, "percentage": 66.5, "elapsed_time": "0:37:12", "remaining_time": "0:18:44", "throughput": 5556.28, "total_tokens": 12406144}
|
|
{"current_steps": 25200, "total_steps": 37885, "loss": 0.0, "lr": 6.08820340960326e-07, "epoch": 3.3258545598521843, "percentage": 66.52, "elapsed_time": "0:37:13", "remaining_time": "0:18:44", "throughput": 5556.5, "total_tokens": 12408512}
|
|
{"current_steps": 25205, "total_steps": 37885, "loss": 0.0, "lr": 6.083963953995728e-07, "epoch": 3.3265144516299325, "percentage": 66.53, "elapsed_time": "0:37:13", "remaining_time": "0:18:43", "throughput": 5556.83, "total_tokens": 12411136}
|
|
{"current_steps": 25210, "total_steps": 37885, "loss": 0.0005, "lr": 6.079725329538486e-07, "epoch": 3.3271743434076813, "percentage": 66.54, "elapsed_time": "0:37:13", "remaining_time": "0:18:43", "throughput": 5556.98, "total_tokens": 12413312}
|
|
{"current_steps": 25215, "total_steps": 37885, "loss": 0.028, "lr": 6.075487537131158e-07, "epoch": 3.3278342351854295, "percentage": 66.56, "elapsed_time": "0:37:14", "remaining_time": "0:18:42", "throughput": 5557.24, "total_tokens": 12415744}
|
|
{"current_steps": 25220, "total_steps": 37885, "loss": 0.0006, "lr": 6.071250577673179e-07, "epoch": 3.328494126963178, "percentage": 66.57, "elapsed_time": "0:37:14", "remaining_time": "0:18:42", "throughput": 5557.47, "total_tokens": 12418112}
|
|
{"current_steps": 25225, "total_steps": 37885, "loss": 0.0, "lr": 6.067014452063816e-07, "epoch": 3.3291540187409265, "percentage": 66.58, "elapsed_time": "0:37:14", "remaining_time": "0:18:41", "throughput": 5557.77, "total_tokens": 12420672}
|
|
{"current_steps": 25230, "total_steps": 37885, "loss": 0.0, "lr": 6.062779161202156e-07, "epoch": 3.329813910518675, "percentage": 66.6, "elapsed_time": "0:37:15", "remaining_time": "0:18:41", "throughput": 5557.91, "total_tokens": 12422848}
|
|
{"current_steps": 25235, "total_steps": 37885, "loss": 0.0, "lr": 6.058544705987105e-07, "epoch": 3.3304738022964235, "percentage": 66.61, "elapsed_time": "0:37:15", "remaining_time": "0:18:40", "throughput": 5558.17, "total_tokens": 12425280}
|
|
{"current_steps": 25240, "total_steps": 37885, "loss": 0.0366, "lr": 6.0543110873174e-07, "epoch": 3.331133694074172, "percentage": 66.62, "elapsed_time": "0:37:15", "remaining_time": "0:18:40", "throughput": 5558.43, "total_tokens": 12427712}
|
|
{"current_steps": 25245, "total_steps": 37885, "loss": 0.0, "lr": 6.050078306091595e-07, "epoch": 3.33179358585192, "percentage": 66.64, "elapsed_time": "0:37:16", "remaining_time": "0:18:39", "throughput": 5558.84, "total_tokens": 12430528}
|
|
{"current_steps": 25250, "total_steps": 37885, "loss": 0.0001, "lr": 6.045846363208066e-07, "epoch": 3.332453477629669, "percentage": 66.65, "elapsed_time": "0:37:16", "remaining_time": "0:18:39", "throughput": 5559.01, "total_tokens": 12432768}
|
|
{"current_steps": 25255, "total_steps": 37885, "loss": 0.0, "lr": 6.041615259565014e-07, "epoch": 3.333113369407417, "percentage": 66.66, "elapsed_time": "0:37:16", "remaining_time": "0:18:38", "throughput": 5559.34, "total_tokens": 12435392}
|
|
{"current_steps": 25260, "total_steps": 37885, "loss": 0.0, "lr": 6.037384996060455e-07, "epoch": 3.333773261185166, "percentage": 66.68, "elapsed_time": "0:37:17", "remaining_time": "0:18:38", "throughput": 5559.5, "total_tokens": 12437568}
|
|
{"current_steps": 25265, "total_steps": 37885, "loss": 0.0266, "lr": 6.033155573592239e-07, "epoch": 3.334433152962914, "percentage": 66.69, "elapsed_time": "0:37:17", "remaining_time": "0:18:37", "throughput": 5559.66, "total_tokens": 12439744}
|
|
{"current_steps": 25270, "total_steps": 37885, "loss": 0.0252, "lr": 6.028926993058026e-07, "epoch": 3.3350930447406624, "percentage": 66.7, "elapsed_time": "0:37:17", "remaining_time": "0:18:37", "throughput": 5559.87, "total_tokens": 12442048}
|
|
{"current_steps": 25275, "total_steps": 37885, "loss": 0.0001, "lr": 6.024699255355302e-07, "epoch": 3.335752936518411, "percentage": 66.72, "elapsed_time": "0:37:18", "remaining_time": "0:18:36", "throughput": 5560.35, "total_tokens": 12444992}
|
|
{"current_steps": 25280, "total_steps": 37885, "loss": 0.0002, "lr": 6.020472361381374e-07, "epoch": 3.3364128282961594, "percentage": 66.73, "elapsed_time": "0:37:18", "remaining_time": "0:18:36", "throughput": 5560.57, "total_tokens": 12447296}
|
|
{"current_steps": 25285, "total_steps": 37885, "loss": 0.0, "lr": 6.016246312033371e-07, "epoch": 3.337072720073908, "percentage": 66.74, "elapsed_time": "0:37:18", "remaining_time": "0:18:35", "throughput": 5560.91, "total_tokens": 12449920}
|
|
{"current_steps": 25290, "total_steps": 37885, "loss": 0.0294, "lr": 6.01202110820824e-07, "epoch": 3.3377326118516564, "percentage": 66.75, "elapsed_time": "0:37:19", "remaining_time": "0:18:35", "throughput": 5561.2, "total_tokens": 12452416}
|
|
{"current_steps": 25295, "total_steps": 37885, "loss": 0.0736, "lr": 6.007796750802748e-07, "epoch": 3.3383925036294047, "percentage": 66.77, "elapsed_time": "0:37:19", "remaining_time": "0:18:34", "throughput": 5561.45, "total_tokens": 12454784}
|
|
{"current_steps": 25300, "total_steps": 37885, "loss": 0.0, "lr": 6.003573240713489e-07, "epoch": 3.3390523954071534, "percentage": 66.78, "elapsed_time": "0:37:19", "remaining_time": "0:18:34", "throughput": 5561.85, "total_tokens": 12457536}
|
|
{"current_steps": 25305, "total_steps": 37885, "loss": 0.0004, "lr": 5.999350578836868e-07, "epoch": 3.3397122871849017, "percentage": 66.79, "elapsed_time": "0:37:20", "remaining_time": "0:18:33", "throughput": 5562.15, "total_tokens": 12460032}
|
|
{"current_steps": 25310, "total_steps": 37885, "loss": 0.0, "lr": 5.995128766069118e-07, "epoch": 3.3403721789626504, "percentage": 66.81, "elapsed_time": "0:37:20", "remaining_time": "0:18:33", "throughput": 5562.37, "total_tokens": 12462336}
|
|
{"current_steps": 25315, "total_steps": 37885, "loss": 0.0813, "lr": 5.990907803306286e-07, "epoch": 3.3410320707403987, "percentage": 66.82, "elapsed_time": "0:37:20", "remaining_time": "0:18:32", "throughput": 5562.72, "total_tokens": 12464960}
|
|
{"current_steps": 25320, "total_steps": 37885, "loss": 0.0001, "lr": 5.986687691444239e-07, "epoch": 3.341691962518147, "percentage": 66.83, "elapsed_time": "0:37:21", "remaining_time": "0:18:32", "throughput": 5563.1, "total_tokens": 12467648}
|
|
{"current_steps": 25325, "total_steps": 37885, "loss": 0.0, "lr": 5.98246843137867e-07, "epoch": 3.3423518542958957, "percentage": 66.85, "elapsed_time": "0:37:21", "remaining_time": "0:18:31", "throughput": 5563.4, "total_tokens": 12470144}
|
|
{"current_steps": 25330, "total_steps": 37885, "loss": 0.0001, "lr": 5.978250024005082e-07, "epoch": 3.343011746073644, "percentage": 66.86, "elapsed_time": "0:37:21", "remaining_time": "0:18:31", "throughput": 5563.64, "total_tokens": 12472512}
|
|
{"current_steps": 25335, "total_steps": 37885, "loss": 0.0, "lr": 5.974032470218804e-07, "epoch": 3.343671637851392, "percentage": 66.87, "elapsed_time": "0:37:22", "remaining_time": "0:18:30", "throughput": 5563.89, "total_tokens": 12474880}
|
|
{"current_steps": 25340, "total_steps": 37885, "loss": 0.0502, "lr": 5.969815770914983e-07, "epoch": 3.344331529629141, "percentage": 66.89, "elapsed_time": "0:37:22", "remaining_time": "0:18:30", "throughput": 5564.28, "total_tokens": 12477632}
|
|
{"current_steps": 25345, "total_steps": 37885, "loss": 0.0, "lr": 5.965599926988575e-07, "epoch": 3.344991421406889, "percentage": 66.9, "elapsed_time": "0:37:22", "remaining_time": "0:18:29", "throughput": 5564.81, "total_tokens": 12480704}
|
|
{"current_steps": 25350, "total_steps": 37885, "loss": 0.0003, "lr": 5.961384939334373e-07, "epoch": 3.3456513131846375, "percentage": 66.91, "elapsed_time": "0:37:23", "remaining_time": "0:18:29", "throughput": 5565.11, "total_tokens": 12483200}
|
|
{"current_steps": 25355, "total_steps": 37885, "loss": 0.0011, "lr": 5.957170808846968e-07, "epoch": 3.346311204962386, "percentage": 66.93, "elapsed_time": "0:37:23", "remaining_time": "0:18:28", "throughput": 5565.32, "total_tokens": 12485504}
|
|
{"current_steps": 25360, "total_steps": 37885, "loss": 0.0, "lr": 5.952957536420786e-07, "epoch": 3.3469710967401345, "percentage": 66.94, "elapsed_time": "0:37:23", "remaining_time": "0:18:28", "throughput": 5565.57, "total_tokens": 12487872}
|
|
{"current_steps": 25365, "total_steps": 37885, "loss": 0.0, "lr": 5.948745122950061e-07, "epoch": 3.347630988517883, "percentage": 66.95, "elapsed_time": "0:37:24", "remaining_time": "0:18:27", "throughput": 5565.71, "total_tokens": 12489984}
|
|
{"current_steps": 25370, "total_steps": 37885, "loss": 0.0, "lr": 5.944533569328841e-07, "epoch": 3.3482908802956315, "percentage": 66.97, "elapsed_time": "0:37:24", "remaining_time": "0:18:27", "throughput": 5566.03, "total_tokens": 12492544}
|
|
{"current_steps": 25375, "total_steps": 37885, "loss": 0.0267, "lr": 5.940322876451009e-07, "epoch": 3.3489507720733798, "percentage": 66.98, "elapsed_time": "0:37:24", "remaining_time": "0:18:26", "throughput": 5566.13, "total_tokens": 12494592}
|
|
{"current_steps": 25380, "total_steps": 37885, "loss": 0.0, "lr": 5.936113045210245e-07, "epoch": 3.3496106638511285, "percentage": 66.99, "elapsed_time": "0:37:25", "remaining_time": "0:18:26", "throughput": 5566.34, "total_tokens": 12496896}
|
|
{"current_steps": 25385, "total_steps": 37885, "loss": 0.1251, "lr": 5.931904076500062e-07, "epoch": 3.3502705556288768, "percentage": 67.01, "elapsed_time": "0:37:25", "remaining_time": "0:18:25", "throughput": 5566.74, "total_tokens": 12499648}
|
|
{"current_steps": 25390, "total_steps": 37885, "loss": 0.002, "lr": 5.927695971213781e-07, "epoch": 3.3509304474066255, "percentage": 67.02, "elapsed_time": "0:37:25", "remaining_time": "0:18:25", "throughput": 5567.09, "total_tokens": 12502272}
|
|
{"current_steps": 25395, "total_steps": 37885, "loss": 0.0, "lr": 5.923488730244537e-07, "epoch": 3.3515903391843738, "percentage": 67.03, "elapsed_time": "0:37:26", "remaining_time": "0:18:24", "throughput": 5567.46, "total_tokens": 12504960}
|
|
{"current_steps": 25400, "total_steps": 37885, "loss": 0.0413, "lr": 5.919282354485293e-07, "epoch": 3.352250230962122, "percentage": 67.05, "elapsed_time": "0:37:26", "remaining_time": "0:18:24", "throughput": 5567.76, "total_tokens": 12507456}
|
|
{"current_steps": 25405, "total_steps": 37885, "loss": 0.0002, "lr": 5.915076844828817e-07, "epoch": 3.3529101227398708, "percentage": 67.06, "elapsed_time": "0:37:26", "remaining_time": "0:18:23", "throughput": 5568.0, "total_tokens": 12509824}
|
|
{"current_steps": 25410, "total_steps": 37885, "loss": 0.0, "lr": 5.910872202167701e-07, "epoch": 3.353570014517619, "percentage": 67.07, "elapsed_time": "0:37:27", "remaining_time": "0:18:23", "throughput": 5568.4, "total_tokens": 12512576}
|
|
{"current_steps": 25415, "total_steps": 37885, "loss": 0.0, "lr": 5.90666842739435e-07, "epoch": 3.3542299062953678, "percentage": 67.08, "elapsed_time": "0:37:27", "remaining_time": "0:18:22", "throughput": 5568.69, "total_tokens": 12515072}
|
|
{"current_steps": 25420, "total_steps": 37885, "loss": 0.0025, "lr": 5.902465521400982e-07, "epoch": 3.354889798073116, "percentage": 67.1, "elapsed_time": "0:37:27", "remaining_time": "0:18:22", "throughput": 5569.01, "total_tokens": 12517632}
|
|
{"current_steps": 25425, "total_steps": 37885, "loss": 0.0032, "lr": 5.898263485079636e-07, "epoch": 3.3555496898508643, "percentage": 67.11, "elapsed_time": "0:37:28", "remaining_time": "0:18:21", "throughput": 5569.2, "total_tokens": 12519872}
|
|
{"current_steps": 25430, "total_steps": 37885, "loss": 0.0, "lr": 5.89406231932216e-07, "epoch": 3.356209581628613, "percentage": 67.12, "elapsed_time": "0:37:28", "remaining_time": "0:18:21", "throughput": 5569.47, "total_tokens": 12522304}
|
|
{"current_steps": 25435, "total_steps": 37885, "loss": 0.0922, "lr": 5.889862025020227e-07, "epoch": 3.3568694734063613, "percentage": 67.14, "elapsed_time": "0:37:28", "remaining_time": "0:18:20", "throughput": 5569.63, "total_tokens": 12524480}
|
|
{"current_steps": 25440, "total_steps": 37885, "loss": 0.0001, "lr": 5.885662603065316e-07, "epoch": 3.35752936518411, "percentage": 67.15, "elapsed_time": "0:37:29", "remaining_time": "0:18:20", "throughput": 5569.95, "total_tokens": 12527040}
|
|
{"current_steps": 25445, "total_steps": 37885, "loss": 0.0, "lr": 5.881464054348721e-07, "epoch": 3.3581892569618583, "percentage": 67.16, "elapsed_time": "0:37:29", "remaining_time": "0:18:19", "throughput": 5570.12, "total_tokens": 12529216}
|
|
{"current_steps": 25450, "total_steps": 37885, "loss": 0.0, "lr": 5.877266379761561e-07, "epoch": 3.3588491487396066, "percentage": 67.18, "elapsed_time": "0:37:29", "remaining_time": "0:18:19", "throughput": 5570.37, "total_tokens": 12531584}
|
|
{"current_steps": 25455, "total_steps": 37885, "loss": 0.0, "lr": 5.873069580194753e-07, "epoch": 3.3595090405173553, "percentage": 67.19, "elapsed_time": "0:37:30", "remaining_time": "0:18:18", "throughput": 5570.69, "total_tokens": 12534144}
|
|
{"current_steps": 25460, "total_steps": 37885, "loss": 0.0337, "lr": 5.868873656539044e-07, "epoch": 3.3601689322951036, "percentage": 67.2, "elapsed_time": "0:37:30", "remaining_time": "0:18:18", "throughput": 5570.85, "total_tokens": 12536320}
|
|
{"current_steps": 25465, "total_steps": 37885, "loss": 0.0002, "lr": 5.864678609684986e-07, "epoch": 3.360828824072852, "percentage": 67.22, "elapsed_time": "0:37:30", "remaining_time": "0:18:17", "throughput": 5571.07, "total_tokens": 12538624}
|
|
{"current_steps": 25470, "total_steps": 37885, "loss": 0.0007, "lr": 5.860484440522946e-07, "epoch": 3.3614887158506006, "percentage": 67.23, "elapsed_time": "0:37:30", "remaining_time": "0:18:17", "throughput": 5571.36, "total_tokens": 12541120}
|
|
{"current_steps": 25475, "total_steps": 37885, "loss": 0.1057, "lr": 5.856291149943109e-07, "epoch": 3.362148607628349, "percentage": 67.24, "elapsed_time": "0:37:31", "remaining_time": "0:18:16", "throughput": 5571.58, "total_tokens": 12543424}
|
|
{"current_steps": 25480, "total_steps": 37885, "loss": 0.0891, "lr": 5.852098738835467e-07, "epoch": 3.362808499406097, "percentage": 67.26, "elapsed_time": "0:37:31", "remaining_time": "0:18:16", "throughput": 5571.91, "total_tokens": 12545984}
|
|
{"current_steps": 25485, "total_steps": 37885, "loss": 0.0404, "lr": 5.847907208089834e-07, "epoch": 3.363468391183846, "percentage": 67.27, "elapsed_time": "0:37:31", "remaining_time": "0:18:15", "throughput": 5572.25, "total_tokens": 12548608}
|
|
{"current_steps": 25490, "total_steps": 37885, "loss": 0.0, "lr": 5.843716558595831e-07, "epoch": 3.364128282961594, "percentage": 67.28, "elapsed_time": "0:37:32", "remaining_time": "0:18:15", "throughput": 5572.54, "total_tokens": 12551104}
|
|
{"current_steps": 25495, "total_steps": 37885, "loss": 0.0276, "lr": 5.839526791242883e-07, "epoch": 3.364788174739343, "percentage": 67.3, "elapsed_time": "0:37:32", "remaining_time": "0:18:14", "throughput": 5572.84, "total_tokens": 12553600}
|
|
{"current_steps": 25500, "total_steps": 37885, "loss": 0.0009, "lr": 5.835337906920253e-07, "epoch": 3.365448066517091, "percentage": 67.31, "elapsed_time": "0:37:32", "remaining_time": "0:18:14", "throughput": 5573.1, "total_tokens": 12556032}
|
|
{"current_steps": 25505, "total_steps": 37885, "loss": 0.0394, "lr": 5.831149906516989e-07, "epoch": 3.3661079582948394, "percentage": 67.32, "elapsed_time": "0:37:33", "remaining_time": "0:18:13", "throughput": 5573.36, "total_tokens": 12558464}
|
|
{"current_steps": 25510, "total_steps": 37885, "loss": 0.0, "lr": 5.826962790921974e-07, "epoch": 3.366767850072588, "percentage": 67.34, "elapsed_time": "0:37:33", "remaining_time": "0:18:13", "throughput": 5573.55, "total_tokens": 12560704}
|
|
{"current_steps": 25515, "total_steps": 37885, "loss": 0.0337, "lr": 5.822776561023885e-07, "epoch": 3.3674277418503364, "percentage": 67.35, "elapsed_time": "0:37:33", "remaining_time": "0:18:12", "throughput": 5573.82, "total_tokens": 12563136}
|
|
{"current_steps": 25520, "total_steps": 37885, "loss": 0.0001, "lr": 5.81859121771122e-07, "epoch": 3.368087633628085, "percentage": 67.36, "elapsed_time": "0:37:34", "remaining_time": "0:18:12", "throughput": 5574.0, "total_tokens": 12565376}
|
|
{"current_steps": 25525, "total_steps": 37885, "loss": 0.0, "lr": 5.814406761872294e-07, "epoch": 3.3687475254058334, "percentage": 67.37, "elapsed_time": "0:37:34", "remaining_time": "0:18:11", "throughput": 5574.22, "total_tokens": 12567680}
|
|
{"current_steps": 25530, "total_steps": 37885, "loss": 0.0018, "lr": 5.810223194395221e-07, "epoch": 3.3694074171835817, "percentage": 67.39, "elapsed_time": "0:37:34", "remaining_time": "0:18:11", "throughput": 5574.51, "total_tokens": 12570176}
|
|
{"current_steps": 25535, "total_steps": 37885, "loss": 0.0631, "lr": 5.806040516167933e-07, "epoch": 3.3700673089613304, "percentage": 67.4, "elapsed_time": "0:37:35", "remaining_time": "0:18:10", "throughput": 5574.81, "total_tokens": 12572672}
|
|
{"current_steps": 25540, "total_steps": 37885, "loss": 0.0364, "lr": 5.801858728078179e-07, "epoch": 3.3707272007390787, "percentage": 67.41, "elapsed_time": "0:37:35", "remaining_time": "0:18:10", "throughput": 5575.15, "total_tokens": 12575296}
|
|
{"current_steps": 25545, "total_steps": 37885, "loss": 0.076, "lr": 5.797677831013506e-07, "epoch": 3.3713870925168274, "percentage": 67.43, "elapsed_time": "0:37:35", "remaining_time": "0:18:09", "throughput": 5575.47, "total_tokens": 12577856}
|
|
{"current_steps": 25550, "total_steps": 37885, "loss": 0.0607, "lr": 5.793497825861283e-07, "epoch": 3.3720469842945757, "percentage": 67.44, "elapsed_time": "0:37:36", "remaining_time": "0:18:09", "throughput": 5575.7, "total_tokens": 12580224}
|
|
{"current_steps": 25555, "total_steps": 37885, "loss": 0.055, "lr": 5.789318713508686e-07, "epoch": 3.372706876072324, "percentage": 67.45, "elapsed_time": "0:37:36", "remaining_time": "0:18:08", "throughput": 5575.89, "total_tokens": 12582464}
|
|
{"current_steps": 25560, "total_steps": 37885, "loss": 0.0001, "lr": 5.785140494842704e-07, "epoch": 3.3733667678500727, "percentage": 67.47, "elapsed_time": "0:37:36", "remaining_time": "0:18:08", "throughput": 5576.1, "total_tokens": 12584768}
|
|
{"current_steps": 25565, "total_steps": 37885, "loss": 0.0001, "lr": 5.780963170750129e-07, "epoch": 3.374026659627821, "percentage": 67.48, "elapsed_time": "0:37:37", "remaining_time": "0:18:07", "throughput": 5576.4, "total_tokens": 12587264}
|
|
{"current_steps": 25570, "total_steps": 37885, "loss": 0.0001, "lr": 5.776786742117564e-07, "epoch": 3.3746865514055697, "percentage": 67.49, "elapsed_time": "0:37:37", "remaining_time": "0:18:07", "throughput": 5576.68, "total_tokens": 12589760}
|
|
{"current_steps": 25575, "total_steps": 37885, "loss": 0.0678, "lr": 5.772611209831436e-07, "epoch": 3.375346443183318, "percentage": 67.51, "elapsed_time": "0:37:37", "remaining_time": "0:18:06", "throughput": 5576.99, "total_tokens": 12592320}
|
|
{"current_steps": 25580, "total_steps": 37885, "loss": 0.0014, "lr": 5.768436574777964e-07, "epoch": 3.3760063349610663, "percentage": 67.52, "elapsed_time": "0:37:38", "remaining_time": "0:18:06", "throughput": 5577.34, "total_tokens": 12594944}
|
|
{"current_steps": 25585, "total_steps": 37885, "loss": 0.0001, "lr": 5.764262837843186e-07, "epoch": 3.376666226738815, "percentage": 67.53, "elapsed_time": "0:37:38", "remaining_time": "0:18:05", "throughput": 5577.58, "total_tokens": 12597312}
|
|
{"current_steps": 25590, "total_steps": 37885, "loss": 0.0004, "lr": 5.760089999912947e-07, "epoch": 3.3773261185165633, "percentage": 67.55, "elapsed_time": "0:37:38", "remaining_time": "0:18:05", "throughput": 5577.85, "total_tokens": 12599744}
|
|
{"current_steps": 25595, "total_steps": 37885, "loss": 0.0473, "lr": 5.755918061872907e-07, "epoch": 3.3779860102943116, "percentage": 67.56, "elapsed_time": "0:37:39", "remaining_time": "0:18:04", "throughput": 5578.25, "total_tokens": 12602496}
|
|
{"current_steps": 25600, "total_steps": 37885, "loss": 0.0001, "lr": 5.751747024608527e-07, "epoch": 3.3786459020720603, "percentage": 67.57, "elapsed_time": "0:37:39", "remaining_time": "0:18:04", "throughput": 5578.44, "total_tokens": 12604736}
|
|
{"current_steps": 25605, "total_steps": 37885, "loss": 0.0474, "lr": 5.747576889005068e-07, "epoch": 3.3793057938498086, "percentage": 67.59, "elapsed_time": "0:37:39", "remaining_time": "0:18:03", "throughput": 5578.7, "total_tokens": 12607168}
|
|
{"current_steps": 25610, "total_steps": 37885, "loss": 0.0322, "lr": 5.743407655947627e-07, "epoch": 3.379965685627557, "percentage": 67.6, "elapsed_time": "0:37:40", "remaining_time": "0:18:03", "throughput": 5579.09, "total_tokens": 12609920}
|
|
{"current_steps": 25615, "total_steps": 37885, "loss": 0.0329, "lr": 5.739239326321086e-07, "epoch": 3.3806255774053056, "percentage": 67.61, "elapsed_time": "0:37:40", "remaining_time": "0:18:02", "throughput": 5579.28, "total_tokens": 12612160}
|
|
{"current_steps": 25620, "total_steps": 37885, "loss": 0.0, "lr": 5.735071901010146e-07, "epoch": 3.381285469183054, "percentage": 67.63, "elapsed_time": "0:37:40", "remaining_time": "0:18:02", "throughput": 5579.59, "total_tokens": 12614720}
|
|
{"current_steps": 25625, "total_steps": 37885, "loss": 0.0001, "lr": 5.730905380899309e-07, "epoch": 3.3819453609608026, "percentage": 67.64, "elapsed_time": "0:37:41", "remaining_time": "0:18:01", "throughput": 5579.98, "total_tokens": 12617472}
|
|
{"current_steps": 25630, "total_steps": 37885, "loss": 0.0, "lr": 5.72673976687289e-07, "epoch": 3.382605252738551, "percentage": 67.65, "elapsed_time": "0:37:41", "remaining_time": "0:18:01", "throughput": 5580.27, "total_tokens": 12619968}
|
|
{"current_steps": 25635, "total_steps": 37885, "loss": 0.0022, "lr": 5.722575059815014e-07, "epoch": 3.383265144516299, "percentage": 67.67, "elapsed_time": "0:37:41", "remaining_time": "0:18:00", "throughput": 5580.49, "total_tokens": 12622272}
|
|
{"current_steps": 25640, "total_steps": 37885, "loss": 0.0005, "lr": 5.718411260609599e-07, "epoch": 3.383925036294048, "percentage": 67.68, "elapsed_time": "0:37:42", "remaining_time": "0:18:00", "throughput": 5580.7, "total_tokens": 12624576}
|
|
{"current_steps": 25645, "total_steps": 37885, "loss": 0.0251, "lr": 5.714248370140397e-07, "epoch": 3.384584928071796, "percentage": 67.69, "elapsed_time": "0:37:42", "remaining_time": "0:17:59", "throughput": 5581.06, "total_tokens": 12627264}
|
|
{"current_steps": 25650, "total_steps": 37885, "loss": 0.004, "lr": 5.710086389290945e-07, "epoch": 3.385244819849545, "percentage": 67.7, "elapsed_time": "0:37:42", "remaining_time": "0:17:59", "throughput": 5581.43, "total_tokens": 12629952}
|
|
{"current_steps": 25655, "total_steps": 37885, "loss": 0.0, "lr": 5.705925318944585e-07, "epoch": 3.385904711627293, "percentage": 67.72, "elapsed_time": "0:37:43", "remaining_time": "0:17:58", "throughput": 5581.85, "total_tokens": 12632768}
|
|
{"current_steps": 25660, "total_steps": 37885, "loss": 0.0, "lr": 5.701765159984483e-07, "epoch": 3.3865646034050414, "percentage": 67.73, "elapsed_time": "0:37:43", "remaining_time": "0:17:58", "throughput": 5582.04, "total_tokens": 12635008}
|
|
{"current_steps": 25665, "total_steps": 37885, "loss": 0.0, "lr": 5.6976059132936e-07, "epoch": 3.38722449518279, "percentage": 67.74, "elapsed_time": "0:37:43", "remaining_time": "0:17:57", "throughput": 5582.36, "total_tokens": 12637568}
|
|
{"current_steps": 25670, "total_steps": 37885, "loss": 0.0675, "lr": 5.69344757975471e-07, "epoch": 3.3878843869605384, "percentage": 67.76, "elapsed_time": "0:37:44", "remaining_time": "0:17:57", "throughput": 5582.7, "total_tokens": 12640192}
|
|
{"current_steps": 25675, "total_steps": 37885, "loss": 0.1155, "lr": 5.689290160250382e-07, "epoch": 3.388544278738287, "percentage": 67.77, "elapsed_time": "0:37:44", "remaining_time": "0:17:56", "throughput": 5582.99, "total_tokens": 12642688}
|
|
{"current_steps": 25680, "total_steps": 37885, "loss": 0.0025, "lr": 5.685133655663001e-07, "epoch": 3.3892041705160354, "percentage": 67.78, "elapsed_time": "0:37:44", "remaining_time": "0:17:56", "throughput": 5583.35, "total_tokens": 12645376}
|
|
{"current_steps": 25685, "total_steps": 37885, "loss": 0.1427, "lr": 5.68097806687476e-07, "epoch": 3.3898640622937837, "percentage": 67.8, "elapsed_time": "0:37:45", "remaining_time": "0:17:55", "throughput": 5583.7, "total_tokens": 12648000}
|
|
{"current_steps": 25690, "total_steps": 37885, "loss": 0.0001, "lr": 5.676823394767644e-07, "epoch": 3.3905239540715324, "percentage": 67.81, "elapsed_time": "0:37:45", "remaining_time": "0:17:55", "throughput": 5584.12, "total_tokens": 12650816}
|
|
{"current_steps": 25695, "total_steps": 37885, "loss": 0.0044, "lr": 5.672669640223458e-07, "epoch": 3.3911838458492807, "percentage": 67.82, "elapsed_time": "0:37:45", "remaining_time": "0:17:54", "throughput": 5584.43, "total_tokens": 12653376}
|
|
{"current_steps": 25700, "total_steps": 37885, "loss": 0.0, "lr": 5.668516804123808e-07, "epoch": 3.3918437376270294, "percentage": 67.84, "elapsed_time": "0:37:46", "remaining_time": "0:17:54", "throughput": 5584.77, "total_tokens": 12656000}
|
|
{"current_steps": 25705, "total_steps": 37885, "loss": 0.0018, "lr": 5.664364887350097e-07, "epoch": 3.3925036294047777, "percentage": 67.85, "elapsed_time": "0:37:46", "remaining_time": "0:17:53", "throughput": 5585.07, "total_tokens": 12658496}
|
|
{"current_steps": 25710, "total_steps": 37885, "loss": 0.0, "lr": 5.660213890783541e-07, "epoch": 3.393163521182526, "percentage": 67.86, "elapsed_time": "0:37:46", "remaining_time": "0:17:53", "throughput": 5585.31, "total_tokens": 12660864}
|
|
{"current_steps": 25715, "total_steps": 37885, "loss": 0.0, "lr": 5.656063815305161e-07, "epoch": 3.3938234129602747, "percentage": 67.88, "elapsed_time": "0:37:47", "remaining_time": "0:17:52", "throughput": 5585.62, "total_tokens": 12663424}
|
|
{"current_steps": 25720, "total_steps": 37885, "loss": 0.0, "lr": 5.651914661795785e-07, "epoch": 3.394483304738023, "percentage": 67.89, "elapsed_time": "0:37:47", "remaining_time": "0:17:52", "throughput": 5585.89, "total_tokens": 12665856}
|
|
{"current_steps": 25725, "total_steps": 37885, "loss": 0.0001, "lr": 5.64776643113603e-07, "epoch": 3.3951431965157712, "percentage": 67.9, "elapsed_time": "0:37:47", "remaining_time": "0:17:51", "throughput": 5586.18, "total_tokens": 12668352}
|
|
{"current_steps": 25730, "total_steps": 37885, "loss": 0.0, "lr": 5.643619124206333e-07, "epoch": 3.39580308829352, "percentage": 67.92, "elapsed_time": "0:37:48", "remaining_time": "0:17:51", "throughput": 5586.37, "total_tokens": 12670592}
|
|
{"current_steps": 25735, "total_steps": 37885, "loss": 0.0518, "lr": 5.639472741886937e-07, "epoch": 3.3964629800712682, "percentage": 67.93, "elapsed_time": "0:37:48", "remaining_time": "0:17:50", "throughput": 5586.64, "total_tokens": 12673024}
|
|
{"current_steps": 25740, "total_steps": 37885, "loss": 0.0003, "lr": 5.635327285057869e-07, "epoch": 3.3971228718490165, "percentage": 67.94, "elapsed_time": "0:37:48", "remaining_time": "0:17:50", "throughput": 5586.9, "total_tokens": 12675456}
|
|
{"current_steps": 25745, "total_steps": 37885, "loss": 0.0, "lr": 5.63118275459898e-07, "epoch": 3.3977827636267652, "percentage": 67.96, "elapsed_time": "0:37:49", "remaining_time": "0:17:49", "throughput": 5587.24, "total_tokens": 12678080}
|
|
{"current_steps": 25750, "total_steps": 37885, "loss": 0.0595, "lr": 5.627039151389917e-07, "epoch": 3.3984426554045135, "percentage": 67.97, "elapsed_time": "0:37:49", "remaining_time": "0:17:49", "throughput": 5587.43, "total_tokens": 12680320}
|
|
{"current_steps": 25755, "total_steps": 37885, "loss": 0.0, "lr": 5.622896476310125e-07, "epoch": 3.3991025471822622, "percentage": 67.98, "elapsed_time": "0:37:49", "remaining_time": "0:17:49", "throughput": 5587.63, "total_tokens": 12682624}
|
|
{"current_steps": 25760, "total_steps": 37885, "loss": 0.0, "lr": 5.618754730238863e-07, "epoch": 3.3997624389600105, "percentage": 68.0, "elapsed_time": "0:37:50", "remaining_time": "0:17:48", "throughput": 5587.92, "total_tokens": 12685120}
|
|
{"current_steps": 25765, "total_steps": 37885, "loss": 0.2906, "lr": 5.614613914055175e-07, "epoch": 3.400422330737759, "percentage": 68.01, "elapsed_time": "0:37:50", "remaining_time": "0:17:48", "throughput": 5588.28, "total_tokens": 12687808}
|
|
{"current_steps": 25770, "total_steps": 37885, "loss": 0.0, "lr": 5.610474028637935e-07, "epoch": 3.4010822225155075, "percentage": 68.02, "elapsed_time": "0:37:50", "remaining_time": "0:17:47", "throughput": 5588.54, "total_tokens": 12690240}
|
|
{"current_steps": 25775, "total_steps": 37885, "loss": 0.0, "lr": 5.606335074865795e-07, "epoch": 3.401742114293256, "percentage": 68.03, "elapsed_time": "0:37:51", "remaining_time": "0:17:47", "throughput": 5588.76, "total_tokens": 12692544}
|
|
{"current_steps": 25780, "total_steps": 37885, "loss": 0.0, "lr": 5.602197053617214e-07, "epoch": 3.4024020060710045, "percentage": 68.05, "elapsed_time": "0:37:51", "remaining_time": "0:17:46", "throughput": 5589.07, "total_tokens": 12695104}
|
|
{"current_steps": 25785, "total_steps": 37885, "loss": 0.0, "lr": 5.598059965770468e-07, "epoch": 3.403061897848753, "percentage": 68.06, "elapsed_time": "0:37:51", "remaining_time": "0:17:46", "throughput": 5589.33, "total_tokens": 12697536}
|
|
{"current_steps": 25790, "total_steps": 37885, "loss": 0.0002, "lr": 5.593923812203613e-07, "epoch": 3.403721789626501, "percentage": 68.07, "elapsed_time": "0:37:52", "remaining_time": "0:17:45", "throughput": 5589.55, "total_tokens": 12699840}
|
|
{"current_steps": 25795, "total_steps": 37885, "loss": 0.0, "lr": 5.589788593794529e-07, "epoch": 3.40438168140425, "percentage": 68.09, "elapsed_time": "0:37:52", "remaining_time": "0:17:45", "throughput": 5589.82, "total_tokens": 12702336}
|
|
{"current_steps": 25800, "total_steps": 37885, "loss": 0.0, "lr": 5.585654311420873e-07, "epoch": 3.405041573181998, "percentage": 68.1, "elapsed_time": "0:37:52", "remaining_time": "0:17:44", "throughput": 5590.03, "total_tokens": 12704640}
|
|
{"current_steps": 25805, "total_steps": 37885, "loss": 0.0001, "lr": 5.581520965960125e-07, "epoch": 3.405701464959747, "percentage": 68.11, "elapsed_time": "0:37:53", "remaining_time": "0:17:44", "throughput": 5590.37, "total_tokens": 12707264}
|
|
{"current_steps": 25810, "total_steps": 37885, "loss": 0.028, "lr": 5.57738855828956e-07, "epoch": 3.406361356737495, "percentage": 68.13, "elapsed_time": "0:37:53", "remaining_time": "0:17:43", "throughput": 5590.7, "total_tokens": 12709888}
|
|
{"current_steps": 25815, "total_steps": 37885, "loss": 0.0, "lr": 5.573257089286243e-07, "epoch": 3.4070212485152434, "percentage": 68.14, "elapsed_time": "0:37:53", "remaining_time": "0:17:43", "throughput": 5590.91, "total_tokens": 12712192}
|
|
{"current_steps": 25820, "total_steps": 37885, "loss": 0.0454, "lr": 5.569126559827053e-07, "epoch": 3.407681140292992, "percentage": 68.15, "elapsed_time": "0:37:54", "remaining_time": "0:17:42", "throughput": 5591.15, "total_tokens": 12714560}
|
|
{"current_steps": 25825, "total_steps": 37885, "loss": 0.0, "lr": 5.564996970788667e-07, "epoch": 3.4083410320707404, "percentage": 68.17, "elapsed_time": "0:37:54", "remaining_time": "0:17:42", "throughput": 5591.44, "total_tokens": 12717056}
|
|
{"current_steps": 25830, "total_steps": 37885, "loss": 0.0059, "lr": 5.560868323047556e-07, "epoch": 3.409000923848489, "percentage": 68.18, "elapsed_time": "0:37:54", "remaining_time": "0:17:41", "throughput": 5591.73, "total_tokens": 12719552}
|
|
{"current_steps": 25835, "total_steps": 37885, "loss": 0.0, "lr": 5.556740617479998e-07, "epoch": 3.4096608156262374, "percentage": 68.19, "elapsed_time": "0:37:55", "remaining_time": "0:17:41", "throughput": 5591.96, "total_tokens": 12721920}
|
|
{"current_steps": 25840, "total_steps": 37885, "loss": 0.0352, "lr": 5.552613854962067e-07, "epoch": 3.4103207074039856, "percentage": 68.21, "elapsed_time": "0:37:55", "remaining_time": "0:17:40", "throughput": 5592.32, "total_tokens": 12724608}
|
|
{"current_steps": 25845, "total_steps": 37885, "loss": 0.0472, "lr": 5.548488036369645e-07, "epoch": 3.4109805991817344, "percentage": 68.22, "elapsed_time": "0:37:55", "remaining_time": "0:17:40", "throughput": 5592.63, "total_tokens": 12727168}
|
|
{"current_steps": 25850, "total_steps": 37885, "loss": 0.0, "lr": 5.544363162578399e-07, "epoch": 3.4116404909594826, "percentage": 68.23, "elapsed_time": "0:37:56", "remaining_time": "0:17:39", "throughput": 5592.81, "total_tokens": 12729408}
|
|
{"current_steps": 25855, "total_steps": 37885, "loss": 0.0, "lr": 5.540239234463804e-07, "epoch": 3.412300382737231, "percentage": 68.25, "elapsed_time": "0:37:56", "remaining_time": "0:17:39", "throughput": 5593.2, "total_tokens": 12732160}
|
|
{"current_steps": 25860, "total_steps": 37885, "loss": 0.0, "lr": 5.536116252901142e-07, "epoch": 3.4129602745149796, "percentage": 68.26, "elapsed_time": "0:37:56", "remaining_time": "0:17:38", "throughput": 5593.48, "total_tokens": 12734656}
|
|
{"current_steps": 25865, "total_steps": 37885, "loss": 0.0213, "lr": 5.531994218765477e-07, "epoch": 3.413620166292728, "percentage": 68.27, "elapsed_time": "0:37:57", "remaining_time": "0:17:38", "throughput": 5593.78, "total_tokens": 12737216}
|
|
{"current_steps": 25870, "total_steps": 37885, "loss": 0.0, "lr": 5.527873132931682e-07, "epoch": 3.4142800580704766, "percentage": 68.29, "elapsed_time": "0:37:57", "remaining_time": "0:17:37", "throughput": 5594.17, "total_tokens": 12739968}
|
|
{"current_steps": 25875, "total_steps": 37885, "loss": 0.0366, "lr": 5.523752996274435e-07, "epoch": 3.414939949848225, "percentage": 68.3, "elapsed_time": "0:37:57", "remaining_time": "0:17:37", "throughput": 5594.35, "total_tokens": 12742208}
|
|
{"current_steps": 25880, "total_steps": 37885, "loss": 0.0003, "lr": 5.519633809668197e-07, "epoch": 3.415599841625973, "percentage": 68.31, "elapsed_time": "0:37:58", "remaining_time": "0:17:36", "throughput": 5594.62, "total_tokens": 12744640}
|
|
{"current_steps": 25885, "total_steps": 37885, "loss": 0.0007, "lr": 5.515515573987238e-07, "epoch": 3.416259733403722, "percentage": 68.33, "elapsed_time": "0:37:58", "remaining_time": "0:17:36", "throughput": 5594.85, "total_tokens": 12747008}
|
|
{"current_steps": 25890, "total_steps": 37885, "loss": 0.1469, "lr": 5.511398290105625e-07, "epoch": 3.41691962518147, "percentage": 68.34, "elapsed_time": "0:37:58", "remaining_time": "0:17:35", "throughput": 5595.16, "total_tokens": 12749568}
|
|
{"current_steps": 25895, "total_steps": 37885, "loss": 0.0004, "lr": 5.507281958897224e-07, "epoch": 3.4175795169592185, "percentage": 68.35, "elapsed_time": "0:37:59", "remaining_time": "0:17:35", "throughput": 5595.47, "total_tokens": 12752128}
|
|
{"current_steps": 25900, "total_steps": 37885, "loss": 0.0002, "lr": 5.503166581235694e-07, "epoch": 3.418239408736967, "percentage": 68.36, "elapsed_time": "0:37:59", "remaining_time": "0:17:34", "throughput": 5595.73, "total_tokens": 12754560}
|
|
{"current_steps": 25905, "total_steps": 37885, "loss": 0.0938, "lr": 5.499052157994486e-07, "epoch": 3.4188993005147155, "percentage": 68.38, "elapsed_time": "0:37:59", "remaining_time": "0:17:34", "throughput": 5596.1, "total_tokens": 12757248}
|
|
{"current_steps": 25910, "total_steps": 37885, "loss": 0.0352, "lr": 5.49493869004687e-07, "epoch": 3.419559192292464, "percentage": 68.39, "elapsed_time": "0:38:00", "remaining_time": "0:17:33", "throughput": 5596.43, "total_tokens": 12759872}
|
|
{"current_steps": 25915, "total_steps": 37885, "loss": 0.0001, "lr": 5.490826178265893e-07, "epoch": 3.4202190840702125, "percentage": 68.4, "elapsed_time": "0:38:00", "remaining_time": "0:17:33", "throughput": 5596.66, "total_tokens": 12762240}
|
|
{"current_steps": 25920, "total_steps": 37885, "loss": 0.0026, "lr": 5.486714623524405e-07, "epoch": 3.4208789758479607, "percentage": 68.42, "elapsed_time": "0:38:00", "remaining_time": "0:17:32", "throughput": 5596.86, "total_tokens": 12764544}
|
|
{"current_steps": 25925, "total_steps": 37885, "loss": 0.0, "lr": 5.482604026695057e-07, "epoch": 3.4215388676257095, "percentage": 68.43, "elapsed_time": "0:38:00", "remaining_time": "0:17:32", "throughput": 5597.25, "total_tokens": 12767296}
|
|
{"current_steps": 25930, "total_steps": 37885, "loss": 0.1487, "lr": 5.478494388650295e-07, "epoch": 3.4221987594034577, "percentage": 68.44, "elapsed_time": "0:38:01", "remaining_time": "0:17:31", "throughput": 5597.59, "total_tokens": 12769920}
|
|
{"current_steps": 25935, "total_steps": 37885, "loss": 0.0041, "lr": 5.474385710262357e-07, "epoch": 3.4228586511812065, "percentage": 68.46, "elapsed_time": "0:38:01", "remaining_time": "0:17:31", "throughput": 5597.87, "total_tokens": 12772416}
|
|
{"current_steps": 25940, "total_steps": 37885, "loss": 0.0268, "lr": 5.470277992403271e-07, "epoch": 3.4235185429589547, "percentage": 68.47, "elapsed_time": "0:38:01", "remaining_time": "0:17:30", "throughput": 5598.09, "total_tokens": 12774720}
|
|
{"current_steps": 25945, "total_steps": 37885, "loss": 0.0657, "lr": 5.466171235944889e-07, "epoch": 3.424178434736703, "percentage": 68.48, "elapsed_time": "0:38:02", "remaining_time": "0:17:30", "throughput": 5598.35, "total_tokens": 12777152}
|
|
{"current_steps": 25950, "total_steps": 37885, "loss": 0.0, "lr": 5.462065441758826e-07, "epoch": 3.4248383265144517, "percentage": 68.5, "elapsed_time": "0:38:02", "remaining_time": "0:17:29", "throughput": 5598.68, "total_tokens": 12779776}
|
|
{"current_steps": 25955, "total_steps": 37885, "loss": 0.0984, "lr": 5.457960610716515e-07, "epoch": 3.4254982182922, "percentage": 68.51, "elapsed_time": "0:38:02", "remaining_time": "0:17:29", "throughput": 5598.96, "total_tokens": 12782272}
|
|
{"current_steps": 25960, "total_steps": 37885, "loss": 0.0387, "lr": 5.453856743689172e-07, "epoch": 3.4261581100699487, "percentage": 68.52, "elapsed_time": "0:38:03", "remaining_time": "0:17:28", "throughput": 5599.36, "total_tokens": 12785088}
|
|
{"current_steps": 25965, "total_steps": 37885, "loss": 0.0009, "lr": 5.449753841547811e-07, "epoch": 3.426818001847697, "percentage": 68.54, "elapsed_time": "0:38:03", "remaining_time": "0:17:28", "throughput": 5599.57, "total_tokens": 12787392}
|
|
{"current_steps": 25970, "total_steps": 37885, "loss": 0.0017, "lr": 5.445651905163253e-07, "epoch": 3.4274778936254453, "percentage": 68.55, "elapsed_time": "0:38:03", "remaining_time": "0:17:27", "throughput": 5599.88, "total_tokens": 12789952}
|
|
{"current_steps": 25975, "total_steps": 37885, "loss": 0.02, "lr": 5.441550935406091e-07, "epoch": 3.428137785403194, "percentage": 68.56, "elapsed_time": "0:38:04", "remaining_time": "0:17:27", "throughput": 5600.24, "total_tokens": 12792640}
|
|
{"current_steps": 25980, "total_steps": 37885, "loss": 0.0001, "lr": 5.43745093314674e-07, "epoch": 3.4287976771809423, "percentage": 68.58, "elapsed_time": "0:38:04", "remaining_time": "0:17:26", "throughput": 5600.53, "total_tokens": 12795136}
|
|
{"current_steps": 25985, "total_steps": 37885, "loss": 0.0, "lr": 5.433351899255389e-07, "epoch": 3.4294575689586906, "percentage": 68.59, "elapsed_time": "0:38:04", "remaining_time": "0:17:26", "throughput": 5600.9, "total_tokens": 12797824}
|
|
{"current_steps": 25990, "total_steps": 37885, "loss": 0.0013, "lr": 5.429253834602025e-07, "epoch": 3.4301174607364393, "percentage": 68.6, "elapsed_time": "0:38:05", "remaining_time": "0:17:25", "throughput": 5601.31, "total_tokens": 12800640}
|
|
{"current_steps": 25995, "total_steps": 37885, "loss": 0.0, "lr": 5.425156740056436e-07, "epoch": 3.4307773525141876, "percentage": 68.62, "elapsed_time": "0:38:05", "remaining_time": "0:17:25", "throughput": 5601.62, "total_tokens": 12803200}
|
|
{"current_steps": 26000, "total_steps": 37885, "loss": 0.0001, "lr": 5.4210606164882e-07, "epoch": 3.4314372442919363, "percentage": 68.63, "elapsed_time": "0:38:05", "remaining_time": "0:17:24", "throughput": 5601.91, "total_tokens": 12805696}
|
|
{"current_steps": 26005, "total_steps": 37885, "loss": 0.1064, "lr": 5.416965464766694e-07, "epoch": 3.4320971360696846, "percentage": 68.64, "elapsed_time": "0:38:06", "remaining_time": "0:17:24", "throughput": 5602.37, "total_tokens": 12808640}
|
|
{"current_steps": 26010, "total_steps": 37885, "loss": 0.0001, "lr": 5.412871285761076e-07, "epoch": 3.432757027847433, "percentage": 68.66, "elapsed_time": "0:38:06", "remaining_time": "0:17:23", "throughput": 5602.73, "total_tokens": 12811328}
|
|
{"current_steps": 26015, "total_steps": 37885, "loss": 0.0701, "lr": 5.408778080340311e-07, "epoch": 3.4334169196251816, "percentage": 68.67, "elapsed_time": "0:38:06", "remaining_time": "0:17:23", "throughput": 5603.14, "total_tokens": 12814144}
|
|
{"current_steps": 26020, "total_steps": 37885, "loss": 0.0001, "lr": 5.404685849373154e-07, "epoch": 3.43407681140293, "percentage": 68.68, "elapsed_time": "0:38:07", "remaining_time": "0:17:22", "throughput": 5603.37, "total_tokens": 12816512}
|
|
{"current_steps": 26025, "total_steps": 37885, "loss": 0.0001, "lr": 5.400594593728146e-07, "epoch": 3.434736703180678, "percentage": 68.69, "elapsed_time": "0:38:07", "remaining_time": "0:17:22", "throughput": 5603.55, "total_tokens": 12818752}
|
|
{"current_steps": 26030, "total_steps": 37885, "loss": 0.0, "lr": 5.396504314273629e-07, "epoch": 3.435396594958427, "percentage": 68.71, "elapsed_time": "0:38:07", "remaining_time": "0:17:22", "throughput": 5603.87, "total_tokens": 12821312}
|
|
{"current_steps": 26035, "total_steps": 37885, "loss": 0.0, "lr": 5.39241501187774e-07, "epoch": 3.436056486736175, "percentage": 68.72, "elapsed_time": "0:38:08", "remaining_time": "0:17:21", "throughput": 5604.18, "total_tokens": 12823872}
|
|
{"current_steps": 26040, "total_steps": 37885, "loss": 0.0004, "lr": 5.388326687408395e-07, "epoch": 3.436716378513924, "percentage": 68.73, "elapsed_time": "0:38:08", "remaining_time": "0:17:21", "throughput": 5604.54, "total_tokens": 12826560}
|
|
{"current_steps": 26045, "total_steps": 37885, "loss": 0.0, "lr": 5.384239341733314e-07, "epoch": 3.437376270291672, "percentage": 68.75, "elapsed_time": "0:38:08", "remaining_time": "0:17:20", "throughput": 5604.9, "total_tokens": 12829248}
|
|
{"current_steps": 26050, "total_steps": 37885, "loss": 0.0, "lr": 5.38015297572001e-07, "epoch": 3.4380361620694204, "percentage": 68.76, "elapsed_time": "0:38:09", "remaining_time": "0:17:20", "throughput": 5605.12, "total_tokens": 12831616}
|
|
{"current_steps": 26055, "total_steps": 37885, "loss": 0.0, "lr": 5.376067590235786e-07, "epoch": 3.438696053847169, "percentage": 68.77, "elapsed_time": "0:38:09", "remaining_time": "0:17:19", "throughput": 5605.46, "total_tokens": 12834240}
|
|
{"current_steps": 26060, "total_steps": 37885, "loss": 0.0, "lr": 5.371983186147729e-07, "epoch": 3.4393559456249174, "percentage": 68.79, "elapsed_time": "0:38:09", "remaining_time": "0:17:19", "throughput": 5605.81, "total_tokens": 12836928}
|
|
{"current_steps": 26065, "total_steps": 37885, "loss": 0.0016, "lr": 5.367899764322725e-07, "epoch": 3.440015837402666, "percentage": 68.8, "elapsed_time": "0:38:10", "remaining_time": "0:17:18", "throughput": 5606.25, "total_tokens": 12839808}
|
|
{"current_steps": 26070, "total_steps": 37885, "loss": 0.0673, "lr": 5.363817325627458e-07, "epoch": 3.4406757291804144, "percentage": 68.81, "elapsed_time": "0:38:10", "remaining_time": "0:17:18", "throughput": 5606.48, "total_tokens": 12842176}
|
|
{"current_steps": 26075, "total_steps": 37885, "loss": 0.0, "lr": 5.359735870928388e-07, "epoch": 3.4413356209581627, "percentage": 68.83, "elapsed_time": "0:38:10", "remaining_time": "0:17:17", "throughput": 5606.77, "total_tokens": 12844672}
|
|
{"current_steps": 26080, "total_steps": 37885, "loss": 0.0938, "lr": 5.355655401091776e-07, "epoch": 3.4419955127359114, "percentage": 68.84, "elapsed_time": "0:38:11", "remaining_time": "0:17:17", "throughput": 5606.98, "total_tokens": 12846976}
|
|
{"current_steps": 26085, "total_steps": 37885, "loss": 0.0001, "lr": 5.351575916983677e-07, "epoch": 3.4426554045136597, "percentage": 68.85, "elapsed_time": "0:38:11", "remaining_time": "0:17:16", "throughput": 5607.31, "total_tokens": 12849600}
|
|
{"current_steps": 26090, "total_steps": 37885, "loss": 0.0457, "lr": 5.347497419469926e-07, "epoch": 3.4433152962914084, "percentage": 68.87, "elapsed_time": "0:38:11", "remaining_time": "0:17:16", "throughput": 5607.46, "total_tokens": 12851776}
|
|
{"current_steps": 26095, "total_steps": 37885, "loss": 0.0, "lr": 5.34341990941616e-07, "epoch": 3.4439751880691567, "percentage": 68.88, "elapsed_time": "0:38:12", "remaining_time": "0:17:15", "throughput": 5607.7, "total_tokens": 12854144}
|
|
{"current_steps": 26100, "total_steps": 37885, "loss": 0.0001, "lr": 5.33934338768779e-07, "epoch": 3.444635079846905, "percentage": 68.89, "elapsed_time": "0:38:12", "remaining_time": "0:17:15", "throughput": 5607.99, "total_tokens": 12856640}
|
|
{"current_steps": 26105, "total_steps": 37885, "loss": 0.0, "lr": 5.335267855150045e-07, "epoch": 3.4452949716246537, "percentage": 68.91, "elapsed_time": "0:38:12", "remaining_time": "0:17:14", "throughput": 5608.25, "total_tokens": 12859072}
|
|
{"current_steps": 26110, "total_steps": 37885, "loss": 0.0, "lr": 5.331193312667916e-07, "epoch": 3.445954863402402, "percentage": 68.92, "elapsed_time": "0:38:13", "remaining_time": "0:17:14", "throughput": 5608.64, "total_tokens": 12861824}
|
|
{"current_steps": 26115, "total_steps": 37885, "loss": 0.0298, "lr": 5.327119761106193e-07, "epoch": 3.4466147551801507, "percentage": 68.93, "elapsed_time": "0:38:13", "remaining_time": "0:17:13", "throughput": 5608.82, "total_tokens": 12864064}
|
|
{"current_steps": 26120, "total_steps": 37885, "loss": 0.0036, "lr": 5.323047201329468e-07, "epoch": 3.447274646957899, "percentage": 68.95, "elapsed_time": "0:38:13", "remaining_time": "0:17:13", "throughput": 5609.16, "total_tokens": 12866688}
|
|
{"current_steps": 26125, "total_steps": 37885, "loss": 0.0441, "lr": 5.318975634202103e-07, "epoch": 3.4479345387356473, "percentage": 68.96, "elapsed_time": "0:38:14", "remaining_time": "0:17:12", "throughput": 5609.38, "total_tokens": 12869056}
|
|
{"current_steps": 26130, "total_steps": 37885, "loss": 0.0, "lr": 5.314905060588266e-07, "epoch": 3.448594430513396, "percentage": 68.97, "elapsed_time": "0:38:14", "remaining_time": "0:17:12", "throughput": 5609.74, "total_tokens": 12871744}
|
|
{"current_steps": 26135, "total_steps": 37885, "loss": 0.0, "lr": 5.310835481351901e-07, "epoch": 3.4492543222911443, "percentage": 68.99, "elapsed_time": "0:38:14", "remaining_time": "0:17:11", "throughput": 5610.0, "total_tokens": 12874176}
|
|
{"current_steps": 26140, "total_steps": 37885, "loss": 0.0, "lr": 5.306766897356747e-07, "epoch": 3.4499142140688925, "percentage": 69.0, "elapsed_time": "0:38:15", "remaining_time": "0:17:11", "throughput": 5610.28, "total_tokens": 12876672}
|
|
{"current_steps": 26145, "total_steps": 37885, "loss": 0.0013, "lr": 5.302699309466338e-07, "epoch": 3.4505741058466413, "percentage": 69.01, "elapsed_time": "0:38:15", "remaining_time": "0:17:10", "throughput": 5610.59, "total_tokens": 12879232}
|
|
{"current_steps": 26150, "total_steps": 37885, "loss": 0.072, "lr": 5.298632718543981e-07, "epoch": 3.4512339976243895, "percentage": 69.02, "elapsed_time": "0:38:15", "remaining_time": "0:17:10", "throughput": 5610.92, "total_tokens": 12881856}
|
|
{"current_steps": 26155, "total_steps": 37885, "loss": 0.0009, "lr": 5.294567125452785e-07, "epoch": 3.451893889402138, "percentage": 69.04, "elapsed_time": "0:38:16", "remaining_time": "0:17:09", "throughput": 5611.12, "total_tokens": 12884160}
|
|
{"current_steps": 26160, "total_steps": 37885, "loss": 0.0, "lr": 5.290502531055648e-07, "epoch": 3.4525537811798865, "percentage": 69.05, "elapsed_time": "0:38:16", "remaining_time": "0:17:09", "throughput": 5611.38, "total_tokens": 12886592}
|
|
{"current_steps": 26165, "total_steps": 37885, "loss": 0.0308, "lr": 5.286438936215239e-07, "epoch": 3.453213672957635, "percentage": 69.06, "elapsed_time": "0:38:16", "remaining_time": "0:17:08", "throughput": 5611.56, "total_tokens": 12888832}
|
|
{"current_steps": 26170, "total_steps": 37885, "loss": 0.0875, "lr": 5.282376341794033e-07, "epoch": 3.4538735647353835, "percentage": 69.08, "elapsed_time": "0:38:17", "remaining_time": "0:17:08", "throughput": 5611.79, "total_tokens": 12891200}
|
|
{"current_steps": 26175, "total_steps": 37885, "loss": 0.0, "lr": 5.278314748654287e-07, "epoch": 3.454533456513132, "percentage": 69.09, "elapsed_time": "0:38:17", "remaining_time": "0:17:07", "throughput": 5612.18, "total_tokens": 12893952}
|
|
{"current_steps": 26180, "total_steps": 37885, "loss": 0.0025, "lr": 5.274254157658048e-07, "epoch": 3.45519334829088, "percentage": 69.1, "elapsed_time": "0:38:17", "remaining_time": "0:17:07", "throughput": 5612.48, "total_tokens": 12896512}
|
|
{"current_steps": 26185, "total_steps": 37885, "loss": 0.0, "lr": 5.270194569667139e-07, "epoch": 3.455853240068629, "percentage": 69.12, "elapsed_time": "0:38:18", "remaining_time": "0:17:06", "throughput": 5612.71, "total_tokens": 12898880}
|
|
{"current_steps": 26190, "total_steps": 37885, "loss": 0.0949, "lr": 5.266135985543181e-07, "epoch": 3.456513131846377, "percentage": 69.13, "elapsed_time": "0:38:18", "remaining_time": "0:17:06", "throughput": 5613.09, "total_tokens": 12901632}
|
|
{"current_steps": 26195, "total_steps": 37885, "loss": 0.0, "lr": 5.262078406147585e-07, "epoch": 3.457173023624126, "percentage": 69.14, "elapsed_time": "0:38:18", "remaining_time": "0:17:05", "throughput": 5613.24, "total_tokens": 12903808}
|
|
{"current_steps": 26200, "total_steps": 37885, "loss": 0.0, "lr": 5.258021832341534e-07, "epoch": 3.457832915401874, "percentage": 69.16, "elapsed_time": "0:38:19", "remaining_time": "0:17:05", "throughput": 5613.45, "total_tokens": 12906112}
|
|
{"current_steps": 26205, "total_steps": 37885, "loss": 0.0007, "lr": 5.25396626498601e-07, "epoch": 3.4584928071796224, "percentage": 69.17, "elapsed_time": "0:38:19", "remaining_time": "0:17:04", "throughput": 5613.66, "total_tokens": 12908416}
|
|
{"current_steps": 26210, "total_steps": 37885, "loss": 0.0, "lr": 5.249911704941782e-07, "epoch": 3.459152698957371, "percentage": 69.18, "elapsed_time": "0:38:19", "remaining_time": "0:17:04", "throughput": 5613.91, "total_tokens": 12910848}
|
|
{"current_steps": 26215, "total_steps": 37885, "loss": 0.0, "lr": 5.245858153069394e-07, "epoch": 3.4598125907351194, "percentage": 69.2, "elapsed_time": "0:38:20", "remaining_time": "0:17:03", "throughput": 5614.21, "total_tokens": 12913408}
|
|
{"current_steps": 26220, "total_steps": 37885, "loss": 0.0016, "lr": 5.241805610229185e-07, "epoch": 3.460472482512868, "percentage": 69.21, "elapsed_time": "0:38:20", "remaining_time": "0:17:03", "throughput": 5614.54, "total_tokens": 12916032}
|
|
{"current_steps": 26225, "total_steps": 37885, "loss": 0.0252, "lr": 5.23775407728128e-07, "epoch": 3.4611323742906164, "percentage": 69.22, "elapsed_time": "0:38:20", "remaining_time": "0:17:02", "throughput": 5614.82, "total_tokens": 12918528}
|
|
{"current_steps": 26230, "total_steps": 37885, "loss": 0.0, "lr": 5.23370355508559e-07, "epoch": 3.4617922660683647, "percentage": 69.24, "elapsed_time": "0:38:21", "remaining_time": "0:17:02", "throughput": 5615.08, "total_tokens": 12920960}
|
|
{"current_steps": 26235, "total_steps": 37885, "loss": 0.0, "lr": 5.229654044501802e-07, "epoch": 3.4624521578461134, "percentage": 69.25, "elapsed_time": "0:38:21", "remaining_time": "0:17:01", "throughput": 5615.35, "total_tokens": 12923456}
|
|
{"current_steps": 26240, "total_steps": 37885, "loss": 0.0, "lr": 5.2256055463894e-07, "epoch": 3.4631120496238617, "percentage": 69.26, "elapsed_time": "0:38:21", "remaining_time": "0:17:01", "throughput": 5615.55, "total_tokens": 12925760}
|
|
{"current_steps": 26245, "total_steps": 37885, "loss": 0.0, "lr": 5.221558061607649e-07, "epoch": 3.4637719414016104, "percentage": 69.28, "elapsed_time": "0:38:22", "remaining_time": "0:17:01", "throughput": 5615.89, "total_tokens": 12928384}
|
|
{"current_steps": 26250, "total_steps": 37885, "loss": 0.0, "lr": 5.217511591015595e-07, "epoch": 3.4644318331793587, "percentage": 69.29, "elapsed_time": "0:38:22", "remaining_time": "0:17:00", "throughput": 5616.14, "total_tokens": 12930816}
|
|
{"current_steps": 26255, "total_steps": 37885, "loss": 0.0066, "lr": 5.213466135472072e-07, "epoch": 3.465091724957107, "percentage": 69.3, "elapsed_time": "0:38:22", "remaining_time": "0:17:00", "throughput": 5616.52, "total_tokens": 12933568}
|
|
{"current_steps": 26260, "total_steps": 37885, "loss": 0.0005, "lr": 5.209421695835701e-07, "epoch": 3.4657516167348557, "percentage": 69.32, "elapsed_time": "0:38:23", "remaining_time": "0:16:59", "throughput": 5616.72, "total_tokens": 12935872}
|
|
{"current_steps": 26265, "total_steps": 37885, "loss": 0.0, "lr": 5.205378272964889e-07, "epoch": 3.466411508512604, "percentage": 69.33, "elapsed_time": "0:38:23", "remaining_time": "0:16:59", "throughput": 5616.91, "total_tokens": 12938176}
|
|
{"current_steps": 26270, "total_steps": 37885, "loss": 0.0396, "lr": 5.201335867717818e-07, "epoch": 3.467071400290352, "percentage": 69.34, "elapsed_time": "0:38:23", "remaining_time": "0:16:58", "throughput": 5617.09, "total_tokens": 12940416}
|
|
{"current_steps": 26275, "total_steps": 37885, "loss": 0.0072, "lr": 5.197294480952452e-07, "epoch": 3.467731292068101, "percentage": 69.35, "elapsed_time": "0:38:24", "remaining_time": "0:16:58", "throughput": 5617.42, "total_tokens": 12943040}
|
|
{"current_steps": 26280, "total_steps": 37885, "loss": 0.0001, "lr": 5.193254113526561e-07, "epoch": 3.468391183845849, "percentage": 69.37, "elapsed_time": "0:38:24", "remaining_time": "0:16:57", "throughput": 5617.64, "total_tokens": 12945408}
|
|
{"current_steps": 26285, "total_steps": 37885, "loss": 0.0396, "lr": 5.189214766297675e-07, "epoch": 3.4690510756235975, "percentage": 69.38, "elapsed_time": "0:38:24", "remaining_time": "0:16:57", "throughput": 5617.89, "total_tokens": 12947840}
|
|
{"current_steps": 26290, "total_steps": 37885, "loss": 0.0, "lr": 5.18517644012312e-07, "epoch": 3.469710967401346, "percentage": 69.39, "elapsed_time": "0:38:25", "remaining_time": "0:16:56", "throughput": 5618.05, "total_tokens": 12950016}
|
|
{"current_steps": 26295, "total_steps": 37885, "loss": 0.0017, "lr": 5.181139135859996e-07, "epoch": 3.4703708591790945, "percentage": 69.41, "elapsed_time": "0:38:25", "remaining_time": "0:16:56", "throughput": 5618.45, "total_tokens": 12952832}
|
|
{"current_steps": 26300, "total_steps": 37885, "loss": 0.0648, "lr": 5.177102854365196e-07, "epoch": 3.471030750956843, "percentage": 69.42, "elapsed_time": "0:38:25", "remaining_time": "0:16:55", "throughput": 5618.67, "total_tokens": 12955200}
|
|
{"current_steps": 26305, "total_steps": 37885, "loss": 0.0, "lr": 5.173067596495393e-07, "epoch": 3.4716906427345915, "percentage": 69.43, "elapsed_time": "0:38:26", "remaining_time": "0:16:55", "throughput": 5618.9, "total_tokens": 12957568}
|
|
{"current_steps": 26310, "total_steps": 37885, "loss": 0.0, "lr": 5.16903336310703e-07, "epoch": 3.4723505345123398, "percentage": 69.45, "elapsed_time": "0:38:26", "remaining_time": "0:16:54", "throughput": 5619.16, "total_tokens": 12960000}
|
|
{"current_steps": 26315, "total_steps": 37885, "loss": 0.0, "lr": 5.165000155056363e-07, "epoch": 3.4730104262900885, "percentage": 69.46, "elapsed_time": "0:38:26", "remaining_time": "0:16:54", "throughput": 5619.39, "total_tokens": 12962368}
|
|
{"current_steps": 26320, "total_steps": 37885, "loss": 0.0007, "lr": 5.1609679731994e-07, "epoch": 3.4736703180678368, "percentage": 69.47, "elapsed_time": "0:38:27", "remaining_time": "0:16:53", "throughput": 5619.67, "total_tokens": 12964864}
|
|
{"current_steps": 26325, "total_steps": 37885, "loss": 0.147, "lr": 5.156936818391937e-07, "epoch": 3.4743302098455855, "percentage": 69.49, "elapsed_time": "0:38:27", "remaining_time": "0:16:53", "throughput": 5620.0, "total_tokens": 12967488}
|
|
{"current_steps": 26330, "total_steps": 37885, "loss": 0.0, "lr": 5.152906691489566e-07, "epoch": 3.4749901016233338, "percentage": 69.5, "elapsed_time": "0:38:27", "remaining_time": "0:16:52", "throughput": 5620.33, "total_tokens": 12970112}
|
|
{"current_steps": 26335, "total_steps": 37885, "loss": 0.0147, "lr": 5.148877593347649e-07, "epoch": 3.475649993401082, "percentage": 69.51, "elapsed_time": "0:38:28", "remaining_time": "0:16:52", "throughput": 5620.66, "total_tokens": 12972736}
|
|
{"current_steps": 26340, "total_steps": 37885, "loss": 0.0, "lr": 5.144849524821337e-07, "epoch": 3.4763098851788308, "percentage": 69.53, "elapsed_time": "0:38:28", "remaining_time": "0:16:51", "throughput": 5620.96, "total_tokens": 12975296}
|
|
{"current_steps": 26345, "total_steps": 37885, "loss": 0.0002, "lr": 5.140822486765552e-07, "epoch": 3.476969776956579, "percentage": 69.54, "elapsed_time": "0:38:28", "remaining_time": "0:16:51", "throughput": 5621.3, "total_tokens": 12977920}
|
|
{"current_steps": 26350, "total_steps": 37885, "loss": 0.0, "lr": 5.136796480035007e-07, "epoch": 3.4776296687343278, "percentage": 69.55, "elapsed_time": "0:38:29", "remaining_time": "0:16:50", "throughput": 5621.6, "total_tokens": 12980480}
|
|
{"current_steps": 26355, "total_steps": 37885, "loss": 0.0, "lr": 5.132771505484197e-07, "epoch": 3.478289560512076, "percentage": 69.57, "elapsed_time": "0:38:29", "remaining_time": "0:16:50", "throughput": 5621.95, "total_tokens": 12983168}
|
|
{"current_steps": 26360, "total_steps": 37885, "loss": 0.0004, "lr": 5.128747563967384e-07, "epoch": 3.4789494522898243, "percentage": 69.58, "elapsed_time": "0:38:29", "remaining_time": "0:16:49", "throughput": 5622.2, "total_tokens": 12985600}
|
|
{"current_steps": 26365, "total_steps": 37885, "loss": 0.0, "lr": 5.124724656338626e-07, "epoch": 3.479609344067573, "percentage": 69.59, "elapsed_time": "0:38:30", "remaining_time": "0:16:49", "throughput": 5622.6, "total_tokens": 12988416}
|
|
{"current_steps": 26370, "total_steps": 37885, "loss": 0.0, "lr": 5.12070278345176e-07, "epoch": 3.4802692358453213, "percentage": 69.61, "elapsed_time": "0:38:30", "remaining_time": "0:16:48", "throughput": 5622.78, "total_tokens": 12990656}
|
|
{"current_steps": 26375, "total_steps": 37885, "loss": 0.0, "lr": 5.116681946160391e-07, "epoch": 3.48092912762307, "percentage": 69.62, "elapsed_time": "0:38:30", "remaining_time": "0:16:48", "throughput": 5623.16, "total_tokens": 12993408}
|
|
{"current_steps": 26380, "total_steps": 37885, "loss": 0.0323, "lr": 5.112662145317917e-07, "epoch": 3.4815890194008183, "percentage": 69.63, "elapsed_time": "0:38:31", "remaining_time": "0:16:47", "throughput": 5623.28, "total_tokens": 12995520}
|
|
{"current_steps": 26385, "total_steps": 37885, "loss": 0.1026, "lr": 5.108643381777511e-07, "epoch": 3.4822489111785666, "percentage": 69.64, "elapsed_time": "0:38:31", "remaining_time": "0:16:47", "throughput": 5623.48, "total_tokens": 12997824}
|
|
{"current_steps": 26390, "total_steps": 37885, "loss": 0.0431, "lr": 5.104625656392132e-07, "epoch": 3.4829088029563153, "percentage": 69.66, "elapsed_time": "0:38:31", "remaining_time": "0:16:46", "throughput": 5623.69, "total_tokens": 13000128}
|
|
{"current_steps": 26395, "total_steps": 37885, "loss": 0.0, "lr": 5.100608970014501e-07, "epoch": 3.4835686947340636, "percentage": 69.67, "elapsed_time": "0:38:32", "remaining_time": "0:16:46", "throughput": 5623.97, "total_tokens": 13002624}
|
|
{"current_steps": 26400, "total_steps": 37885, "loss": 0.0002, "lr": 5.09659332349714e-07, "epoch": 3.484228586511812, "percentage": 69.68, "elapsed_time": "0:38:32", "remaining_time": "0:16:45", "throughput": 5624.23, "total_tokens": 13005120}
|
|
{"current_steps": 26405, "total_steps": 37885, "loss": 0.0, "lr": 5.092578717692341e-07, "epoch": 3.4848884782895606, "percentage": 69.7, "elapsed_time": "0:38:32", "remaining_time": "0:16:45", "throughput": 5624.59, "total_tokens": 13007808}
|
|
{"current_steps": 26410, "total_steps": 37885, "loss": 0.0523, "lr": 5.088565153452171e-07, "epoch": 3.485548370067309, "percentage": 69.71, "elapsed_time": "0:38:32", "remaining_time": "0:16:44", "throughput": 5624.77, "total_tokens": 13010048}
|
|
{"current_steps": 26415, "total_steps": 37885, "loss": 0.0, "lr": 5.084552631628479e-07, "epoch": 3.486208261845057, "percentage": 69.72, "elapsed_time": "0:38:33", "remaining_time": "0:16:44", "throughput": 5625.08, "total_tokens": 13012608}
|
|
{"current_steps": 26420, "total_steps": 37885, "loss": 0.0472, "lr": 5.080541153072902e-07, "epoch": 3.486868153622806, "percentage": 69.74, "elapsed_time": "0:38:33", "remaining_time": "0:16:44", "throughput": 5625.33, "total_tokens": 13015040}
|
|
{"current_steps": 26425, "total_steps": 37885, "loss": 0.0, "lr": 5.076530718636834e-07, "epoch": 3.487528045400554, "percentage": 69.75, "elapsed_time": "0:38:33", "remaining_time": "0:16:43", "throughput": 5625.48, "total_tokens": 13017216}
|
|
{"current_steps": 26430, "total_steps": 37885, "loss": 0.0001, "lr": 5.07252132917147e-07, "epoch": 3.488187937178303, "percentage": 69.76, "elapsed_time": "0:38:34", "remaining_time": "0:16:43", "throughput": 5625.73, "total_tokens": 13019648}
|
|
{"current_steps": 26435, "total_steps": 37885, "loss": 0.0309, "lr": 5.068512985527773e-07, "epoch": 3.488847828956051, "percentage": 69.78, "elapsed_time": "0:38:34", "remaining_time": "0:16:42", "throughput": 5625.89, "total_tokens": 13021824}
|
|
{"current_steps": 26440, "total_steps": 37885, "loss": 0.0554, "lr": 5.064505688556486e-07, "epoch": 3.4895077207337994, "percentage": 69.79, "elapsed_time": "0:38:34", "remaining_time": "0:16:42", "throughput": 5626.11, "total_tokens": 13024192}
|
|
{"current_steps": 26445, "total_steps": 37885, "loss": 0.0002, "lr": 5.060499439108127e-07, "epoch": 3.490167612511548, "percentage": 69.8, "elapsed_time": "0:38:35", "remaining_time": "0:16:41", "throughput": 5626.35, "total_tokens": 13026624}
|
|
{"current_steps": 26450, "total_steps": 37885, "loss": 0.0, "lr": 5.056494238032985e-07, "epoch": 3.4908275042892964, "percentage": 69.82, "elapsed_time": "0:38:35", "remaining_time": "0:16:41", "throughput": 5626.65, "total_tokens": 13029184}
|
|
{"current_steps": 26455, "total_steps": 37885, "loss": 0.0001, "lr": 5.052490086181151e-07, "epoch": 3.491487396067045, "percentage": 69.83, "elapsed_time": "0:38:35", "remaining_time": "0:16:40", "throughput": 5626.91, "total_tokens": 13031616}
|
|
{"current_steps": 26460, "total_steps": 37885, "loss": 0.0, "lr": 5.048486984402467e-07, "epoch": 3.4921472878447934, "percentage": 69.84, "elapsed_time": "0:38:36", "remaining_time": "0:16:40", "throughput": 5627.16, "total_tokens": 13034048}
|
|
{"current_steps": 26465, "total_steps": 37885, "loss": 0.0032, "lr": 5.044484933546565e-07, "epoch": 3.4928071796225417, "percentage": 69.86, "elapsed_time": "0:38:36", "remaining_time": "0:16:39", "throughput": 5627.41, "total_tokens": 13036480}
|
|
{"current_steps": 26470, "total_steps": 37885, "loss": 0.0, "lr": 5.040483934462849e-07, "epoch": 3.4934670714002904, "percentage": 69.87, "elapsed_time": "0:38:36", "remaining_time": "0:16:39", "throughput": 5627.74, "total_tokens": 13039104}
|
|
{"current_steps": 26475, "total_steps": 37885, "loss": 0.0004, "lr": 5.036483988000504e-07, "epoch": 3.4941269631780387, "percentage": 69.88, "elapsed_time": "0:38:37", "remaining_time": "0:16:38", "throughput": 5628.07, "total_tokens": 13041728}
|
|
{"current_steps": 26480, "total_steps": 37885, "loss": 0.0001, "lr": 5.032485095008494e-07, "epoch": 3.4947868549557874, "percentage": 69.9, "elapsed_time": "0:38:37", "remaining_time": "0:16:38", "throughput": 5628.38, "total_tokens": 13044288}
|
|
{"current_steps": 26485, "total_steps": 37885, "loss": 0.0, "lr": 5.028487256335541e-07, "epoch": 3.4954467467335357, "percentage": 69.91, "elapsed_time": "0:38:37", "remaining_time": "0:16:37", "throughput": 5628.66, "total_tokens": 13046784}
|
|
{"current_steps": 26490, "total_steps": 37885, "loss": 0.0, "lr": 5.024490472830176e-07, "epoch": 3.496106638511284, "percentage": 69.92, "elapsed_time": "0:38:38", "remaining_time": "0:16:37", "throughput": 5629.03, "total_tokens": 13049536}
|
|
{"current_steps": 26495, "total_steps": 37885, "loss": 0.0004, "lr": 5.020494745340677e-07, "epoch": 3.4967665302890327, "percentage": 69.94, "elapsed_time": "0:38:38", "remaining_time": "0:16:36", "throughput": 5629.29, "total_tokens": 13051968}
|
|
{"current_steps": 26500, "total_steps": 37885, "loss": 0.0899, "lr": 5.016500074715108e-07, "epoch": 3.497426422066781, "percentage": 69.95, "elapsed_time": "0:38:38", "remaining_time": "0:16:36", "throughput": 5629.61, "total_tokens": 13054592}
|
|
{"current_steps": 26505, "total_steps": 37885, "loss": 0.0003, "lr": 5.01250646180131e-07, "epoch": 3.4980863138445297, "percentage": 69.96, "elapsed_time": "0:38:39", "remaining_time": "0:16:35", "throughput": 5629.89, "total_tokens": 13057088}
|
|
{"current_steps": 26510, "total_steps": 37885, "loss": 0.0, "lr": 5.008513907446898e-07, "epoch": 3.498746205622278, "percentage": 69.97, "elapsed_time": "0:38:39", "remaining_time": "0:16:35", "throughput": 5630.22, "total_tokens": 13059712}
|
|
{"current_steps": 26515, "total_steps": 37885, "loss": 0.0688, "lr": 5.004522412499267e-07, "epoch": 3.4994060974000263, "percentage": 69.99, "elapsed_time": "0:38:40", "remaining_time": "0:16:34", "throughput": 5629.9, "total_tokens": 13062336}
|
|
{"current_steps": 26520, "total_steps": 37885, "loss": 0.0, "lr": 5.000531977805575e-07, "epoch": 3.500065989177775, "percentage": 70.0, "elapsed_time": "0:38:40", "remaining_time": "0:16:34", "throughput": 5630.1, "total_tokens": 13064640}
|
|
{"current_steps": 26525, "total_steps": 37885, "loss": 0.0441, "lr": 4.99654260421277e-07, "epoch": 3.5007258809555233, "percentage": 70.01, "elapsed_time": "0:38:40", "remaining_time": "0:16:33", "throughput": 5630.45, "total_tokens": 13067328}
|
|
{"current_steps": 26530, "total_steps": 37885, "loss": 0.0, "lr": 4.992554292567568e-07, "epoch": 3.501385772733272, "percentage": 70.03, "elapsed_time": "0:38:41", "remaining_time": "0:16:33", "throughput": 5630.72, "total_tokens": 13069824}
|
|
{"current_steps": 26530, "total_steps": 37885, "eval_loss": 0.18093986809253693, "epoch": 3.501385772733272, "percentage": 70.03, "elapsed_time": "0:38:49", "remaining_time": "0:16:36", "throughput": 5611.57, "total_tokens": 13069824}
|
|
{"current_steps": 26535, "total_steps": 37885, "loss": 0.0002, "lr": 4.988567043716452e-07, "epoch": 3.5020456645110203, "percentage": 70.04, "elapsed_time": "0:39:23", "remaining_time": "0:16:50", "throughput": 5531.44, "total_tokens": 13072000}
|
|
{"current_steps": 26540, "total_steps": 37885, "loss": 0.0002, "lr": 4.984580858505691e-07, "epoch": 3.5027055562887686, "percentage": 70.05, "elapsed_time": "0:39:23", "remaining_time": "0:16:50", "throughput": 5531.64, "total_tokens": 13074304}
|
|
{"current_steps": 26545, "total_steps": 37885, "loss": 0.0001, "lr": 4.980595737781328e-07, "epoch": 3.503365448066517, "percentage": 70.07, "elapsed_time": "0:39:23", "remaining_time": "0:16:49", "throughput": 5531.97, "total_tokens": 13076928}
|
|
{"current_steps": 26550, "total_steps": 37885, "loss": 0.0012, "lr": 4.976611682389168e-07, "epoch": 3.5040253398442656, "percentage": 70.08, "elapsed_time": "0:39:24", "remaining_time": "0:16:49", "throughput": 5532.22, "total_tokens": 13079360}
|
|
{"current_steps": 26555, "total_steps": 37885, "loss": 0.0032, "lr": 4.972628693174802e-07, "epoch": 3.504685231622014, "percentage": 70.09, "elapsed_time": "0:39:24", "remaining_time": "0:16:48", "throughput": 5532.48, "total_tokens": 13081792}
|
|
{"current_steps": 26560, "total_steps": 37885, "loss": 0.0, "lr": 4.96864677098359e-07, "epoch": 3.5053451233997626, "percentage": 70.11, "elapsed_time": "0:39:24", "remaining_time": "0:16:48", "throughput": 5532.79, "total_tokens": 13084352}
|
|
{"current_steps": 26565, "total_steps": 37885, "loss": 0.0611, "lr": 4.964665916660671e-07, "epoch": 3.506005015177511, "percentage": 70.12, "elapsed_time": "0:39:25", "remaining_time": "0:16:47", "throughput": 5533.04, "total_tokens": 13086784}
|
|
{"current_steps": 26570, "total_steps": 37885, "loss": 0.0001, "lr": 4.960686131050945e-07, "epoch": 3.506664906955259, "percentage": 70.13, "elapsed_time": "0:39:25", "remaining_time": "0:16:47", "throughput": 5533.35, "total_tokens": 13089344}
|
|
{"current_steps": 26575, "total_steps": 37885, "loss": 0.0, "lr": 4.956707414999095e-07, "epoch": 3.507324798733008, "percentage": 70.15, "elapsed_time": "0:39:25", "remaining_time": "0:16:46", "throughput": 5533.58, "total_tokens": 13091712}
|
|
{"current_steps": 26580, "total_steps": 37885, "loss": 0.0, "lr": 4.95272976934958e-07, "epoch": 3.507984690510756, "percentage": 70.16, "elapsed_time": "0:39:26", "remaining_time": "0:16:46", "throughput": 5533.86, "total_tokens": 13094208}
|
|
{"current_steps": 26585, "total_steps": 37885, "loss": 0.0759, "lr": 4.948753194946617e-07, "epoch": 3.508644582288505, "percentage": 70.17, "elapsed_time": "0:39:26", "remaining_time": "0:16:45", "throughput": 5534.17, "total_tokens": 13096768}
|
|
{"current_steps": 26590, "total_steps": 37885, "loss": 0.0011, "lr": 4.944777692634211e-07, "epoch": 3.509304474066253, "percentage": 70.19, "elapsed_time": "0:39:26", "remaining_time": "0:16:45", "throughput": 5534.46, "total_tokens": 13099264}
|
|
{"current_steps": 26595, "total_steps": 37885, "loss": 0.0011, "lr": 4.940803263256133e-07, "epoch": 3.5099643658440014, "percentage": 70.2, "elapsed_time": "0:39:27", "remaining_time": "0:16:44", "throughput": 5534.81, "total_tokens": 13101952}
|
|
{"current_steps": 26600, "total_steps": 37885, "loss": 0.0, "lr": 4.936829907655929e-07, "epoch": 3.51062425762175, "percentage": 70.21, "elapsed_time": "0:39:27", "remaining_time": "0:16:44", "throughput": 5535.19, "total_tokens": 13104704}
|
|
{"current_steps": 26605, "total_steps": 37885, "loss": 0.0, "lr": 4.932857626676914e-07, "epoch": 3.5112841493994984, "percentage": 70.23, "elapsed_time": "0:39:27", "remaining_time": "0:16:43", "throughput": 5535.42, "total_tokens": 13107072}
|
|
{"current_steps": 26610, "total_steps": 37885, "loss": 0.0846, "lr": 4.928886421162166e-07, "epoch": 3.511944041177247, "percentage": 70.24, "elapsed_time": "0:39:28", "remaining_time": "0:16:43", "throughput": 5535.63, "total_tokens": 13109376}
|
|
{"current_steps": 26615, "total_steps": 37885, "loss": 0.0518, "lr": 4.924916291954561e-07, "epoch": 3.5126039329549954, "percentage": 70.25, "elapsed_time": "0:39:28", "remaining_time": "0:16:42", "throughput": 5535.94, "total_tokens": 13111936}
|
|
{"current_steps": 26620, "total_steps": 37885, "loss": 0.0, "lr": 4.920947239896717e-07, "epoch": 3.5132638247327437, "percentage": 70.27, "elapsed_time": "0:39:28", "remaining_time": "0:16:42", "throughput": 5536.21, "total_tokens": 13114432}
|
|
{"current_steps": 26625, "total_steps": 37885, "loss": 0.0, "lr": 4.916979265831043e-07, "epoch": 3.5139237165104924, "percentage": 70.28, "elapsed_time": "0:39:29", "remaining_time": "0:16:41", "throughput": 5536.45, "total_tokens": 13116800}
|
|
{"current_steps": 26630, "total_steps": 37885, "loss": 0.0003, "lr": 4.913012370599715e-07, "epoch": 3.5145836082882407, "percentage": 70.29, "elapsed_time": "0:39:29", "remaining_time": "0:16:41", "throughput": 5536.73, "total_tokens": 13119296}
|
|
{"current_steps": 26635, "total_steps": 37885, "loss": 0.0001, "lr": 4.909046555044672e-07, "epoch": 3.5152435000659894, "percentage": 70.3, "elapsed_time": "0:39:29", "remaining_time": "0:16:40", "throughput": 5536.96, "total_tokens": 13121664}
|
|
{"current_steps": 26640, "total_steps": 37885, "loss": 0.0035, "lr": 4.905081820007634e-07, "epoch": 3.5159033918437377, "percentage": 70.32, "elapsed_time": "0:39:30", "remaining_time": "0:16:40", "throughput": 5537.29, "total_tokens": 13124288}
|
|
{"current_steps": 26645, "total_steps": 37885, "loss": 0.0002, "lr": 4.901118166330077e-07, "epoch": 3.516563283621486, "percentage": 70.33, "elapsed_time": "0:39:30", "remaining_time": "0:16:39", "throughput": 5537.57, "total_tokens": 13126784}
|
|
{"current_steps": 26650, "total_steps": 37885, "loss": 0.0004, "lr": 4.897155594853275e-07, "epoch": 3.5172231753992347, "percentage": 70.34, "elapsed_time": "0:39:30", "remaining_time": "0:16:39", "throughput": 5537.81, "total_tokens": 13129216}
|
|
{"current_steps": 26655, "total_steps": 37885, "loss": 0.0004, "lr": 4.893194106418246e-07, "epoch": 3.517883067176983, "percentage": 70.36, "elapsed_time": "0:39:31", "remaining_time": "0:16:38", "throughput": 5537.98, "total_tokens": 13131456}
|
|
{"current_steps": 26660, "total_steps": 37885, "loss": 0.0004, "lr": 4.889233701865782e-07, "epoch": 3.5185429589547317, "percentage": 70.37, "elapsed_time": "0:39:31", "remaining_time": "0:16:38", "throughput": 5538.19, "total_tokens": 13133824}
|
|
{"current_steps": 26665, "total_steps": 37885, "loss": 0.0104, "lr": 4.885274382036457e-07, "epoch": 3.51920285073248, "percentage": 70.38, "elapsed_time": "0:39:31", "remaining_time": "0:16:38", "throughput": 5538.42, "total_tokens": 13136256}
|
|
{"current_steps": 26670, "total_steps": 37885, "loss": 0.0014, "lr": 4.881316147770607e-07, "epoch": 3.5198627425102282, "percentage": 70.4, "elapsed_time": "0:39:32", "remaining_time": "0:16:37", "throughput": 5538.75, "total_tokens": 13138944}
|
|
{"current_steps": 26675, "total_steps": 37885, "loss": 0.0, "lr": 4.877358999908339e-07, "epoch": 3.5205226342879765, "percentage": 70.41, "elapsed_time": "0:39:32", "remaining_time": "0:16:37", "throughput": 5539.04, "total_tokens": 13141504}
|
|
{"current_steps": 26680, "total_steps": 37885, "loss": 0.0001, "lr": 4.873402939289527e-07, "epoch": 3.5211825260657252, "percentage": 70.42, "elapsed_time": "0:39:32", "remaining_time": "0:16:36", "throughput": 5539.12, "total_tokens": 13143552}
|
|
{"current_steps": 26685, "total_steps": 37885, "loss": 0.0, "lr": 4.869447966753816e-07, "epoch": 3.5218424178434735, "percentage": 70.44, "elapsed_time": "0:39:33", "remaining_time": "0:16:36", "throughput": 5539.38, "total_tokens": 13146048}
|
|
{"current_steps": 26690, "total_steps": 37885, "loss": 0.0, "lr": 4.865494083140627e-07, "epoch": 3.5225023096212222, "percentage": 70.45, "elapsed_time": "0:39:33", "remaining_time": "0:16:35", "throughput": 5539.53, "total_tokens": 13148288}
|
|
{"current_steps": 26695, "total_steps": 37885, "loss": 0.0001, "lr": 4.861541289289131e-07, "epoch": 3.5231622013989705, "percentage": 70.46, "elapsed_time": "0:39:33", "remaining_time": "0:16:35", "throughput": 5539.77, "total_tokens": 13150720}
|
|
{"current_steps": 26700, "total_steps": 37885, "loss": 0.0381, "lr": 4.857589586038289e-07, "epoch": 3.523822093176719, "percentage": 70.48, "elapsed_time": "0:39:34", "remaining_time": "0:16:34", "throughput": 5540.11, "total_tokens": 13153344}
|
|
{"current_steps": 26705, "total_steps": 37885, "loss": 0.0044, "lr": 4.853638974226822e-07, "epoch": 3.5244819849544675, "percentage": 70.49, "elapsed_time": "0:39:34", "remaining_time": "0:16:34", "throughput": 5540.39, "total_tokens": 13155840}
|
|
{"current_steps": 26710, "total_steps": 37885, "loss": 0.0003, "lr": 4.849689454693212e-07, "epoch": 3.525141876732216, "percentage": 70.5, "elapsed_time": "0:39:34", "remaining_time": "0:16:33", "throughput": 5540.64, "total_tokens": 13158272}
|
|
{"current_steps": 26715, "total_steps": 37885, "loss": 0.0004, "lr": 4.845741028275719e-07, "epoch": 3.5258017685099645, "percentage": 70.52, "elapsed_time": "0:39:35", "remaining_time": "0:16:33", "throughput": 5540.88, "total_tokens": 13160640}
|
|
{"current_steps": 26720, "total_steps": 37885, "loss": 0.0001, "lr": 4.841793695812369e-07, "epoch": 3.526461660287713, "percentage": 70.53, "elapsed_time": "0:39:35", "remaining_time": "0:16:32", "throughput": 5541.11, "total_tokens": 13163008}
|
|
{"current_steps": 26725, "total_steps": 37885, "loss": 0.075, "lr": 4.837847458140959e-07, "epoch": 3.527121552065461, "percentage": 70.54, "elapsed_time": "0:39:35", "remaining_time": "0:16:32", "throughput": 5541.37, "total_tokens": 13165440}
|
|
{"current_steps": 26730, "total_steps": 37885, "loss": 0.0, "lr": 4.833902316099039e-07, "epoch": 3.52778144384321, "percentage": 70.56, "elapsed_time": "0:39:36", "remaining_time": "0:16:31", "throughput": 5541.55, "total_tokens": 13167680}
|
|
{"current_steps": 26735, "total_steps": 37885, "loss": 0.0, "lr": 4.829958270523944e-07, "epoch": 3.528441335620958, "percentage": 70.57, "elapsed_time": "0:39:36", "remaining_time": "0:16:31", "throughput": 5541.66, "total_tokens": 13169728}
|
|
{"current_steps": 26740, "total_steps": 37885, "loss": 0.0626, "lr": 4.82601532225277e-07, "epoch": 3.529101227398707, "percentage": 70.58, "elapsed_time": "0:39:36", "remaining_time": "0:16:30", "throughput": 5541.89, "total_tokens": 13172096}
|
|
{"current_steps": 26745, "total_steps": 37885, "loss": 0.0001, "lr": 4.822073472122374e-07, "epoch": 3.529761119176455, "percentage": 70.6, "elapsed_time": "0:39:37", "remaining_time": "0:16:30", "throughput": 5542.15, "total_tokens": 13174528}
|
|
{"current_steps": 26750, "total_steps": 37885, "loss": 0.0, "lr": 4.818132720969387e-07, "epoch": 3.5304210109542034, "percentage": 70.61, "elapsed_time": "0:39:37", "remaining_time": "0:16:29", "throughput": 5542.4, "total_tokens": 13176960}
|
|
{"current_steps": 26755, "total_steps": 37885, "loss": 0.002, "lr": 4.814193069630211e-07, "epoch": 3.531080902731952, "percentage": 70.62, "elapsed_time": "0:39:37", "remaining_time": "0:16:29", "throughput": 5542.63, "total_tokens": 13179328}
|
|
{"current_steps": 26760, "total_steps": 37885, "loss": 0.0, "lr": 4.810254518941e-07, "epoch": 3.5317407945097004, "percentage": 70.63, "elapsed_time": "0:39:38", "remaining_time": "0:16:28", "throughput": 5542.9, "total_tokens": 13181824}
|
|
{"current_steps": 26765, "total_steps": 37885, "loss": 0.0, "lr": 4.806317069737684e-07, "epoch": 3.532400686287449, "percentage": 70.65, "elapsed_time": "0:39:38", "remaining_time": "0:16:28", "throughput": 5543.15, "total_tokens": 13184256}
|
|
{"current_steps": 26770, "total_steps": 37885, "loss": 0.0, "lr": 4.802380722855961e-07, "epoch": 3.5330605780651974, "percentage": 70.66, "elapsed_time": "0:39:38", "remaining_time": "0:16:27", "throughput": 5543.36, "total_tokens": 13186560}
|
|
{"current_steps": 26775, "total_steps": 37885, "loss": 0.0, "lr": 4.798445479131295e-07, "epoch": 3.5337204698429456, "percentage": 70.67, "elapsed_time": "0:39:39", "remaining_time": "0:16:27", "throughput": 5543.64, "total_tokens": 13189120}
|
|
{"current_steps": 26780, "total_steps": 37885, "loss": 0.0, "lr": 4.794511339398911e-07, "epoch": 3.5343803616206944, "percentage": 70.69, "elapsed_time": "0:39:39", "remaining_time": "0:16:26", "throughput": 5543.89, "total_tokens": 13191552}
|
|
{"current_steps": 26785, "total_steps": 37885, "loss": 0.0001, "lr": 4.790578304493791e-07, "epoch": 3.5350402533984426, "percentage": 70.7, "elapsed_time": "0:39:39", "remaining_time": "0:16:26", "throughput": 5544.09, "total_tokens": 13193856}
|
|
{"current_steps": 26790, "total_steps": 37885, "loss": 0.0891, "lr": 4.786646375250711e-07, "epoch": 3.5357001451761914, "percentage": 70.71, "elapsed_time": "0:39:40", "remaining_time": "0:16:25", "throughput": 5544.35, "total_tokens": 13196288}
|
|
{"current_steps": 26795, "total_steps": 37885, "loss": 0.0003, "lr": 4.78271555250418e-07, "epoch": 3.5363600369539396, "percentage": 70.73, "elapsed_time": "0:39:40", "remaining_time": "0:16:25", "throughput": 5544.59, "total_tokens": 13198720}
|
|
{"current_steps": 26800, "total_steps": 37885, "loss": 0.0001, "lr": 4.778785837088497e-07, "epoch": 3.537019928731688, "percentage": 70.74, "elapsed_time": "0:39:40", "remaining_time": "0:16:24", "throughput": 5544.84, "total_tokens": 13201152}
|
|
{"current_steps": 26805, "total_steps": 37885, "loss": 0.0001, "lr": 4.774857229837708e-07, "epoch": 3.537679820509436, "percentage": 70.75, "elapsed_time": "0:39:41", "remaining_time": "0:16:24", "throughput": 5545.08, "total_tokens": 13203584}
|
|
{"current_steps": 26810, "total_steps": 37885, "loss": 0.0, "lr": 4.770929731585634e-07, "epoch": 3.538339712287185, "percentage": 70.77, "elapsed_time": "0:39:41", "remaining_time": "0:16:23", "throughput": 5545.34, "total_tokens": 13206016}
|
|
{"current_steps": 26815, "total_steps": 37885, "loss": 0.0797, "lr": 4.7670033431658605e-07, "epoch": 3.538999604064933, "percentage": 70.78, "elapsed_time": "0:39:41", "remaining_time": "0:16:23", "throughput": 5545.51, "total_tokens": 13208256}
|
|
{"current_steps": 26820, "total_steps": 37885, "loss": 0.0, "lr": 4.7630780654117273e-07, "epoch": 3.539659495842682, "percentage": 70.79, "elapsed_time": "0:39:42", "remaining_time": "0:16:22", "throughput": 5545.83, "total_tokens": 13210880}
|
|
{"current_steps": 26825, "total_steps": 37885, "loss": 0.0001, "lr": 4.7591538991563594e-07, "epoch": 3.54031938762043, "percentage": 70.81, "elapsed_time": "0:39:42", "remaining_time": "0:16:22", "throughput": 5546.06, "total_tokens": 13213248}
|
|
{"current_steps": 26830, "total_steps": 37885, "loss": 0.0, "lr": 4.755230845232625e-07, "epoch": 3.5409792793981785, "percentage": 70.82, "elapsed_time": "0:39:42", "remaining_time": "0:16:21", "throughput": 5546.28, "total_tokens": 13215616}
|
|
{"current_steps": 26835, "total_steps": 37885, "loss": 0.0213, "lr": 4.7513089044731603e-07, "epoch": 3.541639171175927, "percentage": 70.83, "elapsed_time": "0:39:43", "remaining_time": "0:16:21", "throughput": 5546.49, "total_tokens": 13217920}
|
|
{"current_steps": 26840, "total_steps": 37885, "loss": 0.0, "lr": 4.7473880777103725e-07, "epoch": 3.5422990629536755, "percentage": 70.85, "elapsed_time": "0:39:43", "remaining_time": "0:16:20", "throughput": 5546.72, "total_tokens": 13220288}
|
|
{"current_steps": 26845, "total_steps": 37885, "loss": 0.0487, "lr": 4.74346836577643e-07, "epoch": 3.542958954731424, "percentage": 70.86, "elapsed_time": "0:39:43", "remaining_time": "0:16:20", "throughput": 5547.09, "total_tokens": 13223040}
|
|
{"current_steps": 26850, "total_steps": 37885, "loss": 0.0017, "lr": 4.7395497695032637e-07, "epoch": 3.5436188465091725, "percentage": 70.87, "elapsed_time": "0:39:44", "remaining_time": "0:16:19", "throughput": 5547.21, "total_tokens": 13225152}
|
|
{"current_steps": 26855, "total_steps": 37885, "loss": 0.0, "lr": 4.735632289722563e-07, "epoch": 3.5442787382869207, "percentage": 70.89, "elapsed_time": "0:39:44", "remaining_time": "0:16:19", "throughput": 5547.49, "total_tokens": 13227648}
|
|
{"current_steps": 26860, "total_steps": 37885, "loss": 0.0, "lr": 4.731715927265787e-07, "epoch": 3.5449386300646695, "percentage": 70.9, "elapsed_time": "0:39:44", "remaining_time": "0:16:18", "throughput": 5547.73, "total_tokens": 13230080}
|
|
{"current_steps": 26865, "total_steps": 37885, "loss": 0.1657, "lr": 4.727800682964159e-07, "epoch": 3.5455985218424177, "percentage": 70.91, "elapsed_time": "0:39:45", "remaining_time": "0:16:18", "throughput": 5548.07, "total_tokens": 13232768}
|
|
{"current_steps": 26870, "total_steps": 37885, "loss": 0.0, "lr": 4.723886557648655e-07, "epoch": 3.5462584136201665, "percentage": 70.93, "elapsed_time": "0:39:45", "remaining_time": "0:16:17", "throughput": 5548.25, "total_tokens": 13235008}
|
|
{"current_steps": 26875, "total_steps": 37885, "loss": 0.0839, "lr": 4.719973552150022e-07, "epoch": 3.5469183053979148, "percentage": 70.94, "elapsed_time": "0:39:45", "remaining_time": "0:16:17", "throughput": 5548.59, "total_tokens": 13237696}
|
|
{"current_steps": 26880, "total_steps": 37885, "loss": 0.0, "lr": 4.7160616672987674e-07, "epoch": 3.547578197175663, "percentage": 70.95, "elapsed_time": "0:39:46", "remaining_time": "0:16:16", "throughput": 5548.84, "total_tokens": 13240192}
|
|
{"current_steps": 26885, "total_steps": 37885, "loss": 0.0, "lr": 4.712150903925165e-07, "epoch": 3.5482380889534118, "percentage": 70.96, "elapsed_time": "0:39:46", "remaining_time": "0:16:16", "throughput": 5549.05, "total_tokens": 13242496}
|
|
{"current_steps": 26890, "total_steps": 37885, "loss": 0.0, "lr": 4.708241262859237e-07, "epoch": 3.54889798073116, "percentage": 70.98, "elapsed_time": "0:39:46", "remaining_time": "0:16:15", "throughput": 5549.26, "total_tokens": 13244864}
|
|
{"current_steps": 26895, "total_steps": 37885, "loss": 0.0, "lr": 4.7043327449307813e-07, "epoch": 3.5495578725089088, "percentage": 70.99, "elapsed_time": "0:39:47", "remaining_time": "0:16:15", "throughput": 5549.43, "total_tokens": 13247104}
|
|
{"current_steps": 26900, "total_steps": 37885, "loss": 0.0, "lr": 4.700425350969357e-07, "epoch": 3.550217764286657, "percentage": 71.0, "elapsed_time": "0:39:47", "remaining_time": "0:16:14", "throughput": 5549.67, "total_tokens": 13249536}
|
|
{"current_steps": 26905, "total_steps": 37885, "loss": 0.001, "lr": 4.696519081804271e-07, "epoch": 3.5508776560644053, "percentage": 71.02, "elapsed_time": "0:39:47", "remaining_time": "0:16:14", "throughput": 5549.89, "total_tokens": 13251904}
|
|
{"current_steps": 26910, "total_steps": 37885, "loss": 0.0, "lr": 4.6926139382646045e-07, "epoch": 3.551537547842154, "percentage": 71.03, "elapsed_time": "0:39:48", "remaining_time": "0:16:13", "throughput": 5550.13, "total_tokens": 13254336}
|
|
{"current_steps": 26915, "total_steps": 37885, "loss": 0.0, "lr": 4.6887099211792016e-07, "epoch": 3.5521974396199023, "percentage": 71.04, "elapsed_time": "0:39:48", "remaining_time": "0:16:13", "throughput": 5550.5, "total_tokens": 13257088}
|
|
{"current_steps": 26920, "total_steps": 37885, "loss": 0.0427, "lr": 4.6848070313766507e-07, "epoch": 3.552857331397651, "percentage": 71.06, "elapsed_time": "0:39:48", "remaining_time": "0:16:12", "throughput": 5550.77, "total_tokens": 13259584}
|
|
{"current_steps": 26925, "total_steps": 37885, "loss": 0.0, "lr": 4.68090526968532e-07, "epoch": 3.5535172231753993, "percentage": 71.07, "elapsed_time": "0:39:49", "remaining_time": "0:16:12", "throughput": 5551.07, "total_tokens": 13262208}
|
|
{"current_steps": 26930, "total_steps": 37885, "loss": 0.052, "lr": 4.677004636933327e-07, "epoch": 3.5541771149531476, "percentage": 71.08, "elapsed_time": "0:39:49", "remaining_time": "0:16:12", "throughput": 5551.33, "total_tokens": 13264704}
|
|
{"current_steps": 26935, "total_steps": 37885, "loss": 0.0, "lr": 4.673105133948557e-07, "epoch": 3.554837006730896, "percentage": 71.1, "elapsed_time": "0:39:49", "remaining_time": "0:16:11", "throughput": 5551.45, "total_tokens": 13266816}
|
|
{"current_steps": 26940, "total_steps": 37885, "loss": 0.0001, "lr": 4.6692067615586493e-07, "epoch": 3.5554968985086446, "percentage": 71.11, "elapsed_time": "0:39:50", "remaining_time": "0:16:11", "throughput": 5551.63, "total_tokens": 13269120}
|
|
{"current_steps": 26945, "total_steps": 37885, "loss": 0.0001, "lr": 4.6653095205909955e-07, "epoch": 3.556156790286393, "percentage": 71.12, "elapsed_time": "0:39:50", "remaining_time": "0:16:10", "throughput": 5551.73, "total_tokens": 13271232}
|
|
{"current_steps": 26950, "total_steps": 37885, "loss": 0.0, "lr": 4.661413411872772e-07, "epoch": 3.5568166820641416, "percentage": 71.14, "elapsed_time": "0:39:50", "remaining_time": "0:16:10", "throughput": 5551.91, "total_tokens": 13273536}
|
|
{"current_steps": 26955, "total_steps": 37885, "loss": 0.0281, "lr": 4.6575184362308904e-07, "epoch": 3.55747657384189, "percentage": 71.15, "elapsed_time": "0:39:51", "remaining_time": "0:16:09", "throughput": 5552.22, "total_tokens": 13276160}
|
|
{"current_steps": 26960, "total_steps": 37885, "loss": 0.0, "lr": 4.653624594492033e-07, "epoch": 3.558136465619638, "percentage": 71.16, "elapsed_time": "0:39:51", "remaining_time": "0:16:09", "throughput": 5552.35, "total_tokens": 13278336}
|
|
{"current_steps": 26965, "total_steps": 37885, "loss": 0.0001, "lr": 4.649731887482644e-07, "epoch": 3.558796357397387, "percentage": 71.18, "elapsed_time": "0:39:51", "remaining_time": "0:16:08", "throughput": 5552.6, "total_tokens": 13280832}
|
|
{"current_steps": 26970, "total_steps": 37885, "loss": 0.0, "lr": 4.645840316028914e-07, "epoch": 3.559456249175135, "percentage": 71.19, "elapsed_time": "0:39:52", "remaining_time": "0:16:08", "throughput": 5552.86, "total_tokens": 13283328}
|
|
{"current_steps": 26975, "total_steps": 37885, "loss": 0.002, "lr": 4.641949880956809e-07, "epoch": 3.560116140952884, "percentage": 71.2, "elapsed_time": "0:39:52", "remaining_time": "0:16:07", "throughput": 5553.04, "total_tokens": 13285632}
|
|
{"current_steps": 26980, "total_steps": 37885, "loss": 0.0003, "lr": 4.638060583092035e-07, "epoch": 3.560776032730632, "percentage": 71.22, "elapsed_time": "0:39:52", "remaining_time": "0:16:07", "throughput": 5553.2, "total_tokens": 13287872}
|
|
{"current_steps": 26985, "total_steps": 37885, "loss": 0.069, "lr": 4.634172423260081e-07, "epoch": 3.5614359245083804, "percentage": 71.23, "elapsed_time": "0:39:53", "remaining_time": "0:16:06", "throughput": 5553.52, "total_tokens": 13290560}
|
|
{"current_steps": 26990, "total_steps": 37885, "loss": 0.0322, "lr": 4.6302854022861735e-07, "epoch": 3.562095816286129, "percentage": 71.24, "elapsed_time": "0:39:53", "remaining_time": "0:16:06", "throughput": 5553.77, "total_tokens": 13293056}
|
|
{"current_steps": 26995, "total_steps": 37885, "loss": 0.0, "lr": 4.6263995209953024e-07, "epoch": 3.5627557080638774, "percentage": 71.26, "elapsed_time": "0:39:53", "remaining_time": "0:16:05", "throughput": 5554.0, "total_tokens": 13295488}
|
|
{"current_steps": 27000, "total_steps": 37885, "loss": 0.0, "lr": 4.622514780212219e-07, "epoch": 3.563415599841626, "percentage": 71.27, "elapsed_time": "0:39:54", "remaining_time": "0:16:05", "throughput": 5554.22, "total_tokens": 13297856}
|
|
{"current_steps": 27005, "total_steps": 37885, "loss": 0.0322, "lr": 4.618631180761434e-07, "epoch": 3.5640754916193744, "percentage": 71.28, "elapsed_time": "0:39:54", "remaining_time": "0:16:04", "throughput": 5554.51, "total_tokens": 13300416}
|
|
{"current_steps": 27010, "total_steps": 37885, "loss": 0.0, "lr": 4.6147487234672156e-07, "epoch": 3.5647353833971227, "percentage": 71.29, "elapsed_time": "0:39:54", "remaining_time": "0:16:04", "throughput": 5554.77, "total_tokens": 13302848}
|
|
{"current_steps": 27015, "total_steps": 37885, "loss": 0.0, "lr": 4.6108674091535795e-07, "epoch": 3.5653952751748714, "percentage": 71.31, "elapsed_time": "0:39:55", "remaining_time": "0:16:03", "throughput": 5555.04, "total_tokens": 13305344}
|
|
{"current_steps": 27020, "total_steps": 37885, "loss": 0.0, "lr": 4.6069872386443107e-07, "epoch": 3.5660551669526197, "percentage": 71.32, "elapsed_time": "0:39:55", "remaining_time": "0:16:03", "throughput": 5555.32, "total_tokens": 13307840}
|
|
{"current_steps": 27025, "total_steps": 37885, "loss": 0.0323, "lr": 4.6031082127629514e-07, "epoch": 3.5667150587303684, "percentage": 71.33, "elapsed_time": "0:39:55", "remaining_time": "0:16:02", "throughput": 5555.54, "total_tokens": 13310208}
|
|
{"current_steps": 27030, "total_steps": 37885, "loss": 0.0001, "lr": 4.5992303323327885e-07, "epoch": 3.5673749505081167, "percentage": 71.35, "elapsed_time": "0:39:56", "remaining_time": "0:16:02", "throughput": 5555.76, "total_tokens": 13312576}
|
|
{"current_steps": 27035, "total_steps": 37885, "loss": 0.0004, "lr": 4.5953535981768786e-07, "epoch": 3.568034842285865, "percentage": 71.36, "elapsed_time": "0:39:56", "remaining_time": "0:16:01", "throughput": 5555.9, "total_tokens": 13314752}
|
|
{"current_steps": 27040, "total_steps": 37885, "loss": 0.0, "lr": 4.591478011118034e-07, "epoch": 3.5686947340636137, "percentage": 71.37, "elapsed_time": "0:39:56", "remaining_time": "0:16:01", "throughput": 5556.14, "total_tokens": 13317184}
|
|
{"current_steps": 27045, "total_steps": 37885, "loss": 0.0585, "lr": 4.5876035719788133e-07, "epoch": 3.569354625841362, "percentage": 71.39, "elapsed_time": "0:39:57", "remaining_time": "0:16:00", "throughput": 5556.58, "total_tokens": 13320128}
|
|
{"current_steps": 27050, "total_steps": 37885, "loss": 0.0002, "lr": 4.5837302815815394e-07, "epoch": 3.5700145176191107, "percentage": 71.4, "elapsed_time": "0:39:57", "remaining_time": "0:16:00", "throughput": 5556.92, "total_tokens": 13322816}
|
|
{"current_steps": 27055, "total_steps": 37885, "loss": 0.0, "lr": 4.5798581407482927e-07, "epoch": 3.570674409396859, "percentage": 71.41, "elapsed_time": "0:39:57", "remaining_time": "0:15:59", "throughput": 5557.17, "total_tokens": 13325248}
|
|
{"current_steps": 27060, "total_steps": 37885, "loss": 0.0693, "lr": 4.5759871503009097e-07, "epoch": 3.5713343011746073, "percentage": 71.43, "elapsed_time": "0:39:58", "remaining_time": "0:15:59", "throughput": 5557.42, "total_tokens": 13327680}
|
|
{"current_steps": 27065, "total_steps": 37885, "loss": 0.0, "lr": 4.572117311060972e-07, "epoch": 3.5719941929523555, "percentage": 71.44, "elapsed_time": "0:39:58", "remaining_time": "0:15:58", "throughput": 5557.61, "total_tokens": 13329984}
|
|
{"current_steps": 27070, "total_steps": 37885, "loss": 0.0719, "lr": 4.56824862384983e-07, "epoch": 3.5726540847301043, "percentage": 71.45, "elapsed_time": "0:39:58", "remaining_time": "0:15:58", "throughput": 5557.81, "total_tokens": 13332288}
|
|
{"current_steps": 27075, "total_steps": 37885, "loss": 0.0176, "lr": 4.564381089488587e-07, "epoch": 3.573313976507853, "percentage": 71.47, "elapsed_time": "0:39:59", "remaining_time": "0:15:57", "throughput": 5557.93, "total_tokens": 13334400}
|
|
{"current_steps": 27080, "total_steps": 37885, "loss": 0.1063, "lr": 4.560514708798093e-07, "epoch": 3.5739738682856013, "percentage": 71.48, "elapsed_time": "0:39:59", "remaining_time": "0:15:57", "throughput": 5558.24, "total_tokens": 13337024}
|
|
{"current_steps": 27085, "total_steps": 37885, "loss": 0.0, "lr": 4.556649482598962e-07, "epoch": 3.5746337600633495, "percentage": 71.49, "elapsed_time": "0:39:59", "remaining_time": "0:15:56", "throughput": 5558.42, "total_tokens": 13339328}
|
|
{"current_steps": 27090, "total_steps": 37885, "loss": 0.0412, "lr": 4.552785411711565e-07, "epoch": 3.575293651841098, "percentage": 71.51, "elapsed_time": "0:40:00", "remaining_time": "0:15:56", "throughput": 5558.62, "total_tokens": 13341632}
|
|
{"current_steps": 27095, "total_steps": 37885, "loss": 0.0011, "lr": 4.548922496956015e-07, "epoch": 3.5759535436188465, "percentage": 71.52, "elapsed_time": "0:40:00", "remaining_time": "0:15:55", "throughput": 5558.82, "total_tokens": 13343936}
|
|
{"current_steps": 27100, "total_steps": 37885, "loss": 0.0, "lr": 4.54506073915219e-07, "epoch": 3.576613435396595, "percentage": 71.53, "elapsed_time": "0:40:00", "remaining_time": "0:15:55", "throughput": 5559.19, "total_tokens": 13346688}
|
|
{"current_steps": 27105, "total_steps": 37885, "loss": 0.0001, "lr": 4.541200139119723e-07, "epoch": 3.5772733271743435, "percentage": 71.55, "elapsed_time": "0:40:01", "remaining_time": "0:15:54", "throughput": 5559.53, "total_tokens": 13349376}
|
|
{"current_steps": 27110, "total_steps": 37885, "loss": 0.0673, "lr": 4.537340697678e-07, "epoch": 3.577933218952092, "percentage": 71.56, "elapsed_time": "0:40:01", "remaining_time": "0:15:54", "throughput": 5559.72, "total_tokens": 13351680}
|
|
{"current_steps": 27115, "total_steps": 37885, "loss": 0.0003, "lr": 4.533482415646157e-07, "epoch": 3.57859311072984, "percentage": 71.57, "elapsed_time": "0:40:01", "remaining_time": "0:15:54", "throughput": 5559.94, "total_tokens": 13354048}
|
|
{"current_steps": 27120, "total_steps": 37885, "loss": 0.004, "lr": 4.529625293843078e-07, "epoch": 3.579253002507589, "percentage": 71.59, "elapsed_time": "0:40:02", "remaining_time": "0:15:53", "throughput": 5560.16, "total_tokens": 13356416}
|
|
{"current_steps": 27125, "total_steps": 37885, "loss": 0.0, "lr": 4.525769333087425e-07, "epoch": 3.579912894285337, "percentage": 71.6, "elapsed_time": "0:40:02", "remaining_time": "0:15:53", "throughput": 5560.32, "total_tokens": 13358592}
|
|
{"current_steps": 27130, "total_steps": 37885, "loss": 0.0, "lr": 4.521914534197585e-07, "epoch": 3.580572786063086, "percentage": 71.61, "elapsed_time": "0:40:02", "remaining_time": "0:15:52", "throughput": 5560.65, "total_tokens": 13361216}
|
|
{"current_steps": 27135, "total_steps": 37885, "loss": 0.0, "lr": 4.518060897991721e-07, "epoch": 3.581232677840834, "percentage": 71.62, "elapsed_time": "0:40:03", "remaining_time": "0:15:52", "throughput": 5560.79, "total_tokens": 13363392}
|
|
{"current_steps": 27140, "total_steps": 37885, "loss": 0.0, "lr": 4.51420842528773e-07, "epoch": 3.5818925696185824, "percentage": 71.64, "elapsed_time": "0:40:03", "remaining_time": "0:15:51", "throughput": 5561.02, "total_tokens": 13365760}
|
|
{"current_steps": 27145, "total_steps": 37885, "loss": 0.0, "lr": 4.510357116903275e-07, "epoch": 3.582552461396331, "percentage": 71.65, "elapsed_time": "0:40:03", "remaining_time": "0:15:51", "throughput": 5561.2, "total_tokens": 13368000}
|
|
{"current_steps": 27150, "total_steps": 37885, "loss": 0.0, "lr": 4.5065069736557737e-07, "epoch": 3.5832123531740794, "percentage": 71.66, "elapsed_time": "0:40:04", "remaining_time": "0:15:50", "throughput": 5561.43, "total_tokens": 13370368}
|
|
{"current_steps": 27155, "total_steps": 37885, "loss": 0.0, "lr": 4.502657996362379e-07, "epoch": 3.583872244951828, "percentage": 71.68, "elapsed_time": "0:40:04", "remaining_time": "0:15:50", "throughput": 5561.85, "total_tokens": 13373248}
|
|
{"current_steps": 27160, "total_steps": 37885, "loss": 0.0005, "lr": 4.498810185840023e-07, "epoch": 3.5845321367295764, "percentage": 71.69, "elapsed_time": "0:40:04", "remaining_time": "0:15:49", "throughput": 5562.03, "total_tokens": 13375488}
|
|
{"current_steps": 27165, "total_steps": 37885, "loss": 0.0873, "lr": 4.494963542905369e-07, "epoch": 3.5851920285073247, "percentage": 71.7, "elapsed_time": "0:40:05", "remaining_time": "0:15:49", "throughput": 5562.26, "total_tokens": 13377856}
|
|
{"current_steps": 27170, "total_steps": 37885, "loss": 0.0007, "lr": 4.491118068374835e-07, "epoch": 3.5858519202850734, "percentage": 71.72, "elapsed_time": "0:40:05", "remaining_time": "0:15:48", "throughput": 5562.61, "total_tokens": 13380544}
|
|
{"current_steps": 27175, "total_steps": 37885, "loss": 0.0, "lr": 4.4872737630645984e-07, "epoch": 3.5865118120628217, "percentage": 71.73, "elapsed_time": "0:40:05", "remaining_time": "0:15:48", "throughput": 5562.84, "total_tokens": 13382912}
|
|
{"current_steps": 27180, "total_steps": 37885, "loss": 0.0001, "lr": 4.4834306277905855e-07, "epoch": 3.5871717038405704, "percentage": 71.74, "elapsed_time": "0:40:06", "remaining_time": "0:15:47", "throughput": 5563.02, "total_tokens": 13385152}
|
|
{"current_steps": 27185, "total_steps": 37885, "loss": 0.0, "lr": 4.4795886633684776e-07, "epoch": 3.5878315956183187, "percentage": 71.76, "elapsed_time": "0:40:06", "remaining_time": "0:15:47", "throughput": 5563.2, "total_tokens": 13387392}
|
|
{"current_steps": 27190, "total_steps": 37885, "loss": 0.0472, "lr": 4.4757478706136974e-07, "epoch": 3.588491487396067, "percentage": 71.77, "elapsed_time": "0:40:06", "remaining_time": "0:15:46", "throughput": 5563.4, "total_tokens": 13389696}
|
|
{"current_steps": 27195, "total_steps": 37885, "loss": 0.0004, "lr": 4.4719082503414273e-07, "epoch": 3.5891513791738157, "percentage": 71.78, "elapsed_time": "0:40:07", "remaining_time": "0:15:46", "throughput": 5563.56, "total_tokens": 13391872}
|
|
{"current_steps": 27200, "total_steps": 37885, "loss": 0.0, "lr": 4.468069803366604e-07, "epoch": 3.589811270951564, "percentage": 71.8, "elapsed_time": "0:40:07", "remaining_time": "0:15:45", "throughput": 5563.71, "total_tokens": 13394048}
|
|
{"current_steps": 27205, "total_steps": 37885, "loss": 0.0, "lr": 4.464232530503902e-07, "epoch": 3.5904711627293127, "percentage": 71.81, "elapsed_time": "0:40:07", "remaining_time": "0:15:45", "throughput": 5564.03, "total_tokens": 13396672}
|
|
{"current_steps": 27210, "total_steps": 37885, "loss": 0.1157, "lr": 4.460396432567759e-07, "epoch": 3.591131054507061, "percentage": 71.82, "elapsed_time": "0:40:08", "remaining_time": "0:15:44", "throughput": 5564.34, "total_tokens": 13399232}
|
|
{"current_steps": 27215, "total_steps": 37885, "loss": 0.0, "lr": 4.456561510372358e-07, "epoch": 3.591790946284809, "percentage": 71.84, "elapsed_time": "0:40:08", "remaining_time": "0:15:44", "throughput": 5564.57, "total_tokens": 13401600}
|
|
{"current_steps": 27220, "total_steps": 37885, "loss": 0.0, "lr": 4.4527277647316375e-07, "epoch": 3.5924508380625575, "percentage": 71.85, "elapsed_time": "0:40:08", "remaining_time": "0:15:43", "throughput": 5564.87, "total_tokens": 13404160}
|
|
{"current_steps": 27225, "total_steps": 37885, "loss": 0.0016, "lr": 4.448895196459275e-07, "epoch": 3.593110729840306, "percentage": 71.86, "elapsed_time": "0:40:09", "remaining_time": "0:15:43", "throughput": 5565.12, "total_tokens": 13406592}
|
|
{"current_steps": 27230, "total_steps": 37885, "loss": 0.0012, "lr": 4.4450638063687094e-07, "epoch": 3.5937706216180545, "percentage": 71.88, "elapsed_time": "0:40:09", "remaining_time": "0:15:42", "throughput": 5565.42, "total_tokens": 13409152}
|
|
{"current_steps": 27235, "total_steps": 37885, "loss": 0.0001, "lr": 4.4412335952731284e-07, "epoch": 3.594430513395803, "percentage": 71.89, "elapsed_time": "0:40:09", "remaining_time": "0:15:42", "throughput": 5565.75, "total_tokens": 13411776}
|
|
{"current_steps": 27240, "total_steps": 37885, "loss": 0.0009, "lr": 4.437404563985461e-07, "epoch": 3.5950904051735515, "percentage": 71.9, "elapsed_time": "0:40:10", "remaining_time": "0:15:41", "throughput": 5566.02, "total_tokens": 13414272}
|
|
{"current_steps": 27245, "total_steps": 37885, "loss": 0.0169, "lr": 4.4335767133183923e-07, "epoch": 3.5957502969512998, "percentage": 71.92, "elapsed_time": "0:40:10", "remaining_time": "0:15:41", "throughput": 5566.32, "total_tokens": 13416832}
|
|
{"current_steps": 27250, "total_steps": 37885, "loss": 0.075, "lr": 4.4297500440843616e-07, "epoch": 3.5964101887290485, "percentage": 71.93, "elapsed_time": "0:40:10", "remaining_time": "0:15:40", "throughput": 5566.53, "total_tokens": 13419136}
|
|
{"current_steps": 27255, "total_steps": 37885, "loss": 0.0004, "lr": 4.4259245570955437e-07, "epoch": 3.5970700805067968, "percentage": 71.94, "elapsed_time": "0:40:11", "remaining_time": "0:15:40", "throughput": 5566.8, "total_tokens": 13421632}
|
|
{"current_steps": 27260, "total_steps": 37885, "loss": 0.0001, "lr": 4.422100253163874e-07, "epoch": 3.5977299722845455, "percentage": 71.95, "elapsed_time": "0:40:11", "remaining_time": "0:15:39", "throughput": 5567.02, "total_tokens": 13424000}
|
|
{"current_steps": 27265, "total_steps": 37885, "loss": 0.0337, "lr": 4.4182771331010347e-07, "epoch": 3.5983898640622938, "percentage": 71.97, "elapsed_time": "0:40:11", "remaining_time": "0:15:39", "throughput": 5567.24, "total_tokens": 13426368}
|
|
{"current_steps": 27270, "total_steps": 37885, "loss": 0.002, "lr": 4.414455197718457e-07, "epoch": 3.599049755840042, "percentage": 71.98, "elapsed_time": "0:40:12", "remaining_time": "0:15:38", "throughput": 5567.42, "total_tokens": 13428608}
|
|
{"current_steps": 27275, "total_steps": 37885, "loss": 0.0002, "lr": 4.410634447827316e-07, "epoch": 3.5997096476177908, "percentage": 71.99, "elapsed_time": "0:40:12", "remaining_time": "0:15:38", "throughput": 5567.59, "total_tokens": 13430848}
|
|
{"current_steps": 27280, "total_steps": 37885, "loss": 0.1103, "lr": 4.406814884238532e-07, "epoch": 3.600369539395539, "percentage": 72.01, "elapsed_time": "0:40:12", "remaining_time": "0:15:37", "throughput": 5567.84, "total_tokens": 13433280}
|
|
{"current_steps": 27285, "total_steps": 37885, "loss": 0.0383, "lr": 4.4029965077627927e-07, "epoch": 3.6010294311732878, "percentage": 72.02, "elapsed_time": "0:40:12", "remaining_time": "0:15:37", "throughput": 5568.04, "total_tokens": 13435584}
|
|
{"current_steps": 27290, "total_steps": 37885, "loss": 0.0, "lr": 4.399179319210511e-07, "epoch": 3.601689322951036, "percentage": 72.03, "elapsed_time": "0:40:13", "remaining_time": "0:15:36", "throughput": 5568.32, "total_tokens": 13438080}
|
|
{"current_steps": 27295, "total_steps": 37885, "loss": 0.0, "lr": 4.3953633193918606e-07, "epoch": 3.6023492147287843, "percentage": 72.05, "elapsed_time": "0:40:13", "remaining_time": "0:15:36", "throughput": 5568.69, "total_tokens": 13440832}
|
|
{"current_steps": 27300, "total_steps": 37885, "loss": 0.1113, "lr": 4.3915485091167647e-07, "epoch": 3.603009106506533, "percentage": 72.06, "elapsed_time": "0:40:13", "remaining_time": "0:15:35", "throughput": 5569.03, "total_tokens": 13443520}
|
|
{"current_steps": 27305, "total_steps": 37885, "loss": 0.0, "lr": 4.3877348891948794e-07, "epoch": 3.6036689982842813, "percentage": 72.07, "elapsed_time": "0:40:14", "remaining_time": "0:15:35", "throughput": 5569.23, "total_tokens": 13445824}
|
|
{"current_steps": 27310, "total_steps": 37885, "loss": 0.0001, "lr": 4.3839224604356274e-07, "epoch": 3.60432889006203, "percentage": 72.09, "elapsed_time": "0:40:14", "remaining_time": "0:15:34", "throughput": 5569.57, "total_tokens": 13448512}
|
|
{"current_steps": 27315, "total_steps": 37885, "loss": 0.0, "lr": 4.3801112236481575e-07, "epoch": 3.6049887818397783, "percentage": 72.1, "elapsed_time": "0:40:14", "remaining_time": "0:15:34", "throughput": 5569.82, "total_tokens": 13450944}
|
|
{"current_steps": 27320, "total_steps": 37885, "loss": 0.0001, "lr": 4.3763011796413915e-07, "epoch": 3.6056486736175266, "percentage": 72.11, "elapsed_time": "0:40:15", "remaining_time": "0:15:34", "throughput": 5570.07, "total_tokens": 13453376}
|
|
{"current_steps": 27325, "total_steps": 37885, "loss": 0.0001, "lr": 4.372492329223977e-07, "epoch": 3.6063085653952753, "percentage": 72.13, "elapsed_time": "0:40:15", "remaining_time": "0:15:33", "throughput": 5570.37, "total_tokens": 13455936}
|
|
{"current_steps": 27330, "total_steps": 37885, "loss": 0.0281, "lr": 4.3686846732043105e-07, "epoch": 3.6069684571730236, "percentage": 72.14, "elapsed_time": "0:40:15", "remaining_time": "0:15:33", "throughput": 5570.69, "total_tokens": 13458560}
|
|
{"current_steps": 27335, "total_steps": 37885, "loss": 0.0626, "lr": 4.3648782123905424e-07, "epoch": 3.6076283489507723, "percentage": 72.15, "elapsed_time": "0:40:16", "remaining_time": "0:15:32", "throughput": 5570.89, "total_tokens": 13460864}
|
|
{"current_steps": 27340, "total_steps": 37885, "loss": 0.0001, "lr": 4.361072947590568e-07, "epoch": 3.6082882407285206, "percentage": 72.17, "elapsed_time": "0:40:16", "remaining_time": "0:15:32", "throughput": 5571.17, "total_tokens": 13463360}
|
|
{"current_steps": 27345, "total_steps": 37885, "loss": 0.0001, "lr": 4.3572688796120307e-07, "epoch": 3.608948132506269, "percentage": 72.18, "elapsed_time": "0:40:16", "remaining_time": "0:15:31", "throughput": 5571.53, "total_tokens": 13466112}
|
|
{"current_steps": 27350, "total_steps": 37885, "loss": 0.001, "lr": 4.353466009262309e-07, "epoch": 3.609608024284017, "percentage": 72.19, "elapsed_time": "0:40:17", "remaining_time": "0:15:31", "throughput": 5571.88, "total_tokens": 13468800}
|
|
{"current_steps": 27355, "total_steps": 37885, "loss": 0.0226, "lr": 4.3496643373485367e-07, "epoch": 3.610267916061766, "percentage": 72.21, "elapsed_time": "0:40:17", "remaining_time": "0:15:30", "throughput": 5572.15, "total_tokens": 13471296}
|
|
{"current_steps": 27360, "total_steps": 37885, "loss": 0.0, "lr": 4.345863864677596e-07, "epoch": 3.610927807839514, "percentage": 72.22, "elapsed_time": "0:40:17", "remaining_time": "0:15:30", "throughput": 5572.39, "total_tokens": 13473728}
|
|
{"current_steps": 27365, "total_steps": 37885, "loss": 0.0009, "lr": 4.342064592056103e-07, "epoch": 3.611587699617263, "percentage": 72.23, "elapsed_time": "0:40:18", "remaining_time": "0:15:29", "throughput": 5572.6, "total_tokens": 13476032}
|
|
{"current_steps": 27370, "total_steps": 37885, "loss": 0.0, "lr": 4.338266520290428e-07, "epoch": 3.612247591395011, "percentage": 72.24, "elapsed_time": "0:40:18", "remaining_time": "0:15:29", "throughput": 5572.9, "total_tokens": 13478592}
|
|
{"current_steps": 27375, "total_steps": 37885, "loss": 0.0688, "lr": 4.3344696501866893e-07, "epoch": 3.6129074831727594, "percentage": 72.26, "elapsed_time": "0:40:18", "remaining_time": "0:15:28", "throughput": 5573.18, "total_tokens": 13481088}
|
|
{"current_steps": 27380, "total_steps": 37885, "loss": 0.0001, "lr": 4.330673982550738e-07, "epoch": 3.613567374950508, "percentage": 72.27, "elapsed_time": "0:40:19", "remaining_time": "0:15:28", "throughput": 5573.35, "total_tokens": 13483328}
|
|
{"current_steps": 27385, "total_steps": 37885, "loss": 0.0, "lr": 4.326879518188178e-07, "epoch": 3.6142272667282564, "percentage": 72.28, "elapsed_time": "0:40:19", "remaining_time": "0:15:27", "throughput": 5573.66, "total_tokens": 13485888}
|
|
{"current_steps": 27390, "total_steps": 37885, "loss": 0.0, "lr": 4.323086257904359e-07, "epoch": 3.614887158506005, "percentage": 72.3, "elapsed_time": "0:40:19", "remaining_time": "0:15:27", "throughput": 5573.98, "total_tokens": 13488512}
|
|
{"current_steps": 27395, "total_steps": 37885, "loss": 0.0, "lr": 4.319294202504378e-07, "epoch": 3.6155470502837534, "percentage": 72.31, "elapsed_time": "0:40:20", "remaining_time": "0:15:26", "throughput": 5574.13, "total_tokens": 13490688}
|
|
{"current_steps": 27400, "total_steps": 37885, "loss": 0.0, "lr": 4.3155033527930606e-07, "epoch": 3.6162069420615017, "percentage": 72.32, "elapsed_time": "0:40:20", "remaining_time": "0:15:26", "throughput": 5574.32, "total_tokens": 13492992}
|
|
{"current_steps": 27405, "total_steps": 37885, "loss": 0.0201, "lr": 4.3117137095749945e-07, "epoch": 3.6168668338392505, "percentage": 72.34, "elapsed_time": "0:40:20", "remaining_time": "0:15:25", "throughput": 5574.55, "total_tokens": 13495360}
|
|
{"current_steps": 27410, "total_steps": 37885, "loss": 0.0018, "lr": 4.307925273654505e-07, "epoch": 3.6175267256169987, "percentage": 72.35, "elapsed_time": "0:40:21", "remaining_time": "0:15:25", "throughput": 5574.8, "total_tokens": 13497792}
|
|
{"current_steps": 27415, "total_steps": 37885, "loss": 0.0, "lr": 4.3041380458356534e-07, "epoch": 3.6181866173947475, "percentage": 72.36, "elapsed_time": "0:40:21", "remaining_time": "0:15:24", "throughput": 5575.04, "total_tokens": 13500224}
|
|
{"current_steps": 27420, "total_steps": 37885, "loss": 0.0, "lr": 4.3003520269222557e-07, "epoch": 3.6188465091724957, "percentage": 72.38, "elapsed_time": "0:40:21", "remaining_time": "0:15:24", "throughput": 5575.19, "total_tokens": 13502400}
|
|
{"current_steps": 27425, "total_steps": 37885, "loss": 0.0533, "lr": 4.29656721771787e-07, "epoch": 3.619506400950244, "percentage": 72.39, "elapsed_time": "0:40:22", "remaining_time": "0:15:23", "throughput": 5575.59, "total_tokens": 13505216}
|
|
{"current_steps": 27430, "total_steps": 37885, "loss": 0.0549, "lr": 4.292783619025788e-07, "epoch": 3.6201662927279927, "percentage": 72.4, "elapsed_time": "0:40:22", "remaining_time": "0:15:23", "throughput": 5575.79, "total_tokens": 13507520}
|
|
{"current_steps": 27435, "total_steps": 37885, "loss": 0.0176, "lr": 4.289001231649054e-07, "epoch": 3.620826184505741, "percentage": 72.42, "elapsed_time": "0:40:22", "remaining_time": "0:15:22", "throughput": 5576.11, "total_tokens": 13510144}
|
|
{"current_steps": 27440, "total_steps": 37885, "loss": 0.0595, "lr": 4.285220056390454e-07, "epoch": 3.6214860762834897, "percentage": 72.43, "elapsed_time": "0:40:23", "remaining_time": "0:15:22", "throughput": 5576.39, "total_tokens": 13512640}
|
|
{"current_steps": 27445, "total_steps": 37885, "loss": 0.0001, "lr": 4.2814400940525164e-07, "epoch": 3.622145968061238, "percentage": 72.44, "elapsed_time": "0:40:23", "remaining_time": "0:15:21", "throughput": 5576.68, "total_tokens": 13515200}
|
|
{"current_steps": 27450, "total_steps": 37885, "loss": 0.0, "lr": 4.2776613454375087e-07, "epoch": 3.6228058598389863, "percentage": 72.46, "elapsed_time": "0:40:23", "remaining_time": "0:15:21", "throughput": 5576.9, "total_tokens": 13517568}
|
|
{"current_steps": 27455, "total_steps": 37885, "loss": 0.0004, "lr": 4.2738838113474353e-07, "epoch": 3.623465751616735, "percentage": 72.47, "elapsed_time": "0:40:24", "remaining_time": "0:15:20", "throughput": 5577.17, "total_tokens": 13520064}
|
|
{"current_steps": 27460, "total_steps": 37885, "loss": 0.0002, "lr": 4.2701074925840643e-07, "epoch": 3.6241256433944833, "percentage": 72.48, "elapsed_time": "0:40:24", "remaining_time": "0:15:20", "throughput": 5577.5, "total_tokens": 13522688}
|
|
{"current_steps": 27465, "total_steps": 37885, "loss": 0.0, "lr": 4.266332389948882e-07, "epoch": 3.624785535172232, "percentage": 72.5, "elapsed_time": "0:40:24", "remaining_time": "0:15:19", "throughput": 5577.84, "total_tokens": 13525376}
|
|
{"current_steps": 27470, "total_steps": 37885, "loss": 0.0, "lr": 4.2625585042431347e-07, "epoch": 3.6254454269499803, "percentage": 72.51, "elapsed_time": "0:40:25", "remaining_time": "0:15:19", "throughput": 5578.04, "total_tokens": 13527680}
|
|
{"current_steps": 27475, "total_steps": 37885, "loss": 0.0, "lr": 4.258785836267792e-07, "epoch": 3.6261053187277286, "percentage": 72.52, "elapsed_time": "0:40:25", "remaining_time": "0:15:18", "throughput": 5578.28, "total_tokens": 13530112}
|
|
{"current_steps": 27480, "total_steps": 37885, "loss": 0.0002, "lr": 4.255014386823582e-07, "epoch": 3.626765210505477, "percentage": 72.54, "elapsed_time": "0:40:25", "remaining_time": "0:15:18", "throughput": 5578.48, "total_tokens": 13532416}
|
|
{"current_steps": 27485, "total_steps": 37885, "loss": 0.0, "lr": 4.25124415671097e-07, "epoch": 3.6274251022832256, "percentage": 72.55, "elapsed_time": "0:40:26", "remaining_time": "0:15:18", "throughput": 5578.8, "total_tokens": 13535040}
|
|
{"current_steps": 27490, "total_steps": 37885, "loss": 0.0008, "lr": 4.24747514673015e-07, "epoch": 3.628084994060974, "percentage": 72.56, "elapsed_time": "0:40:26", "remaining_time": "0:15:17", "throughput": 5579.02, "total_tokens": 13537408}
|
|
{"current_steps": 27495, "total_steps": 37885, "loss": 0.0457, "lr": 4.24370735768108e-07, "epoch": 3.6287448858387226, "percentage": 72.57, "elapsed_time": "0:40:26", "remaining_time": "0:15:17", "throughput": 5579.17, "total_tokens": 13539584}
|
|
{"current_steps": 27500, "total_steps": 37885, "loss": 0.0004, "lr": 4.23994079036344e-07, "epoch": 3.629404777616471, "percentage": 72.59, "elapsed_time": "0:40:27", "remaining_time": "0:15:16", "throughput": 5579.47, "total_tokens": 13542144}
|
|
{"current_steps": 27505, "total_steps": 37885, "loss": 0.0005, "lr": 4.2361754455766517e-07, "epoch": 3.630064669394219, "percentage": 72.6, "elapsed_time": "0:40:27", "remaining_time": "0:15:16", "throughput": 5579.72, "total_tokens": 13544576}
|
|
{"current_steps": 27510, "total_steps": 37885, "loss": 0.0, "lr": 4.232411324119888e-07, "epoch": 3.630724561171968, "percentage": 72.61, "elapsed_time": "0:40:27", "remaining_time": "0:15:15", "throughput": 5579.91, "total_tokens": 13546880}
|
|
{"current_steps": 27515, "total_steps": 37885, "loss": 0.0736, "lr": 4.228648426792054e-07, "epoch": 3.631384452949716, "percentage": 72.63, "elapsed_time": "0:40:28", "remaining_time": "0:15:15", "throughput": 5580.21, "total_tokens": 13549440}
|
|
{"current_steps": 27520, "total_steps": 37885, "loss": 0.0, "lr": 4.224886754391803e-07, "epoch": 3.632044344727465, "percentage": 72.64, "elapsed_time": "0:40:28", "remaining_time": "0:15:14", "throughput": 5580.5, "total_tokens": 13552000}
|
|
{"current_steps": 27525, "total_steps": 37885, "loss": 0.001, "lr": 4.2211263077175144e-07, "epoch": 3.632704236505213, "percentage": 72.65, "elapsed_time": "0:40:28", "remaining_time": "0:15:14", "throughput": 5580.84, "total_tokens": 13554688}
|
|
{"current_steps": 27530, "total_steps": 37885, "loss": 0.0611, "lr": 4.2173670875673197e-07, "epoch": 3.6333641282829614, "percentage": 72.67, "elapsed_time": "0:40:29", "remaining_time": "0:15:13", "throughput": 5581.25, "total_tokens": 13557568}
|
|
{"current_steps": 27535, "total_steps": 37885, "loss": 0.0, "lr": 4.213609094739089e-07, "epoch": 3.63402402006071, "percentage": 72.68, "elapsed_time": "0:40:29", "remaining_time": "0:15:13", "throughput": 5581.54, "total_tokens": 13560128}
|
|
{"current_steps": 27540, "total_steps": 37885, "loss": 0.0005, "lr": 4.2098523300304236e-07, "epoch": 3.6346839118384584, "percentage": 72.69, "elapsed_time": "0:40:29", "remaining_time": "0:15:12", "throughput": 5581.8, "total_tokens": 13562560}
|
|
{"current_steps": 27545, "total_steps": 37885, "loss": 0.0001, "lr": 4.2060967942386715e-07, "epoch": 3.635343803616207, "percentage": 72.71, "elapsed_time": "0:40:30", "remaining_time": "0:15:12", "throughput": 5582.02, "total_tokens": 13564928}
|
|
{"current_steps": 27550, "total_steps": 37885, "loss": 0.0001, "lr": 4.2023424881609195e-07, "epoch": 3.6360036953939554, "percentage": 72.72, "elapsed_time": "0:40:30", "remaining_time": "0:15:11", "throughput": 5582.27, "total_tokens": 13567360}
|
|
{"current_steps": 27555, "total_steps": 37885, "loss": 0.0002, "lr": 4.1985894125939947e-07, "epoch": 3.6366635871717037, "percentage": 72.73, "elapsed_time": "0:40:30", "remaining_time": "0:15:11", "throughput": 5582.57, "total_tokens": 13569920}
|
|
{"current_steps": 27560, "total_steps": 37885, "loss": 0.0065, "lr": 4.194837568334452e-07, "epoch": 3.6373234789494524, "percentage": 72.75, "elapsed_time": "0:40:31", "remaining_time": "0:15:10", "throughput": 5582.79, "total_tokens": 13572288}
|
|
{"current_steps": 27565, "total_steps": 37885, "loss": 0.0, "lr": 4.191086956178598e-07, "epoch": 3.6379833707272007, "percentage": 72.76, "elapsed_time": "0:40:31", "remaining_time": "0:15:10", "throughput": 5583.04, "total_tokens": 13574720}
|
|
{"current_steps": 27570, "total_steps": 37885, "loss": 0.0, "lr": 4.187337576922476e-07, "epoch": 3.6386432625049494, "percentage": 72.77, "elapsed_time": "0:40:31", "remaining_time": "0:15:09", "throughput": 5583.28, "total_tokens": 13577152}
|
|
{"current_steps": 27575, "total_steps": 37885, "loss": 0.0028, "lr": 4.1835894313618593e-07, "epoch": 3.6393031542826977, "percentage": 72.79, "elapsed_time": "0:40:32", "remaining_time": "0:15:09", "throughput": 5583.53, "total_tokens": 13579584}
|
|
{"current_steps": 27580, "total_steps": 37885, "loss": 0.0, "lr": 4.179842520292265e-07, "epoch": 3.639963046060446, "percentage": 72.8, "elapsed_time": "0:40:32", "remaining_time": "0:15:08", "throughput": 5583.78, "total_tokens": 13582016}
|
|
{"current_steps": 27585, "total_steps": 37885, "loss": 0.0, "lr": 4.176096844508954e-07, "epoch": 3.6406229378381947, "percentage": 72.81, "elapsed_time": "0:40:32", "remaining_time": "0:15:08", "throughput": 5583.93, "total_tokens": 13584192}
|
|
{"current_steps": 27590, "total_steps": 37885, "loss": 0.0002, "lr": 4.17235240480691e-07, "epoch": 3.641282829615943, "percentage": 72.83, "elapsed_time": "0:40:33", "remaining_time": "0:15:07", "throughput": 5584.18, "total_tokens": 13586624}
|
|
{"current_steps": 27595, "total_steps": 37885, "loss": 0.0016, "lr": 4.1686092019808685e-07, "epoch": 3.6419427213936917, "percentage": 72.84, "elapsed_time": "0:40:33", "remaining_time": "0:15:07", "throughput": 5584.35, "total_tokens": 13588864}
|
|
{"current_steps": 27600, "total_steps": 37885, "loss": 0.0487, "lr": 4.164867236825296e-07, "epoch": 3.64260261317144, "percentage": 72.85, "elapsed_time": "0:40:33", "remaining_time": "0:15:06", "throughput": 5584.69, "total_tokens": 13591552}
|
|
{"current_steps": 27605, "total_steps": 37885, "loss": 0.028, "lr": 4.1611265101344005e-07, "epoch": 3.6432625049491882, "percentage": 72.87, "elapsed_time": "0:40:34", "remaining_time": "0:15:06", "throughput": 5584.91, "total_tokens": 13593920}
|
|
{"current_steps": 27610, "total_steps": 37885, "loss": 0.066, "lr": 4.1573870227021224e-07, "epoch": 3.6439223967269365, "percentage": 72.88, "elapsed_time": "0:40:34", "remaining_time": "0:15:05", "throughput": 5585.13, "total_tokens": 13596288}
|
|
{"current_steps": 27615, "total_steps": 37885, "loss": 0.0, "lr": 4.153648775322132e-07, "epoch": 3.6445822885046852, "percentage": 72.89, "elapsed_time": "0:40:34", "remaining_time": "0:15:05", "throughput": 5585.28, "total_tokens": 13598464}
|
|
{"current_steps": 27620, "total_steps": 37885, "loss": 0.0014, "lr": 4.1499117687878606e-07, "epoch": 3.6452421802824335, "percentage": 72.9, "elapsed_time": "0:40:35", "remaining_time": "0:15:04", "throughput": 5585.46, "total_tokens": 13600704}
|
|
{"current_steps": 27625, "total_steps": 37885, "loss": 0.0487, "lr": 4.1461760038924496e-07, "epoch": 3.6459020720601822, "percentage": 72.92, "elapsed_time": "0:40:35", "remaining_time": "0:15:04", "throughput": 5585.71, "total_tokens": 13603136}
|
|
{"current_steps": 27630, "total_steps": 37885, "loss": 0.0, "lr": 4.142441481428792e-07, "epoch": 3.6465619638379305, "percentage": 72.93, "elapsed_time": "0:40:35", "remaining_time": "0:15:04", "throughput": 5585.89, "total_tokens": 13605440}
|
|
{"current_steps": 27635, "total_steps": 37885, "loss": 0.0, "lr": 4.138708202189516e-07, "epoch": 3.647221855615679, "percentage": 72.94, "elapsed_time": "0:40:36", "remaining_time": "0:15:03", "throughput": 5586.09, "total_tokens": 13607744}
|
|
{"current_steps": 27640, "total_steps": 37885, "loss": 0.0, "lr": 4.134976166966977e-07, "epoch": 3.6478817473934275, "percentage": 72.96, "elapsed_time": "0:40:36", "remaining_time": "0:15:03", "throughput": 5586.36, "total_tokens": 13610240}
|
|
{"current_steps": 27645, "total_steps": 37885, "loss": 0.0754, "lr": 4.131245376553278e-07, "epoch": 3.648541639171176, "percentage": 72.97, "elapsed_time": "0:40:36", "remaining_time": "0:15:02", "throughput": 5586.53, "total_tokens": 13612480}
|
|
{"current_steps": 27650, "total_steps": 37885, "loss": 0.0028, "lr": 4.1275158317402436e-07, "epoch": 3.6492015309489245, "percentage": 72.98, "elapsed_time": "0:40:36", "remaining_time": "0:15:02", "throughput": 5586.85, "total_tokens": 13615104}
|
|
{"current_steps": 27655, "total_steps": 37885, "loss": 0.0, "lr": 4.123787533319455e-07, "epoch": 3.649861422726673, "percentage": 73.0, "elapsed_time": "0:40:37", "remaining_time": "0:15:01", "throughput": 5587.09, "total_tokens": 13617536}
|
|
{"current_steps": 27660, "total_steps": 37885, "loss": 0.0018, "lr": 4.1200604820822103e-07, "epoch": 3.650521314504421, "percentage": 73.01, "elapsed_time": "0:40:37", "remaining_time": "0:15:01", "throughput": 5587.31, "total_tokens": 13619904}
|
|
{"current_steps": 27665, "total_steps": 37885, "loss": 0.0, "lr": 4.1163346788195465e-07, "epoch": 3.65118120628217, "percentage": 73.02, "elapsed_time": "0:40:37", "remaining_time": "0:15:00", "throughput": 5587.6, "total_tokens": 13622464}
|
|
{"current_steps": 27670, "total_steps": 37885, "loss": 0.0018, "lr": 4.11261012432224e-07, "epoch": 3.651841098059918, "percentage": 73.04, "elapsed_time": "0:40:38", "remaining_time": "0:15:00", "throughput": 5587.94, "total_tokens": 13625152}
|
|
{"current_steps": 27675, "total_steps": 37885, "loss": 0.0004, "lr": 4.1088868193808023e-07, "epoch": 3.652500989837667, "percentage": 73.05, "elapsed_time": "0:40:38", "remaining_time": "0:14:59", "throughput": 5588.22, "total_tokens": 13627712}
|
|
{"current_steps": 27680, "total_steps": 37885, "loss": 0.0, "lr": 4.10516476478548e-07, "epoch": 3.653160881615415, "percentage": 73.06, "elapsed_time": "0:40:38", "remaining_time": "0:14:59", "throughput": 5588.39, "total_tokens": 13629952}
|
|
{"current_steps": 27685, "total_steps": 37885, "loss": 0.0688, "lr": 4.101443961326245e-07, "epoch": 3.6538207733931634, "percentage": 73.08, "elapsed_time": "0:40:39", "remaining_time": "0:14:58", "throughput": 5588.72, "total_tokens": 13632576}
|
|
{"current_steps": 27690, "total_steps": 37885, "loss": 0.0, "lr": 4.0977244097928164e-07, "epoch": 3.654480665170912, "percentage": 73.09, "elapsed_time": "0:40:39", "remaining_time": "0:14:58", "throughput": 5588.93, "total_tokens": 13634944}
|
|
{"current_steps": 27695, "total_steps": 37885, "loss": 0.0, "lr": 4.094006110974645e-07, "epoch": 3.6551405569486604, "percentage": 73.1, "elapsed_time": "0:40:39", "remaining_time": "0:14:57", "throughput": 5589.13, "total_tokens": 13637248}
|
|
{"current_steps": 27700, "total_steps": 37885, "loss": 0.0001, "lr": 4.0902890656609044e-07, "epoch": 3.655800448726409, "percentage": 73.12, "elapsed_time": "0:40:40", "remaining_time": "0:14:57", "throughput": 5589.39, "total_tokens": 13639744}
|
|
{"current_steps": 27705, "total_steps": 37885, "loss": 0.1378, "lr": 4.0865732746405145e-07, "epoch": 3.6564603405041574, "percentage": 73.13, "elapsed_time": "0:40:40", "remaining_time": "0:14:56", "throughput": 5589.66, "total_tokens": 13642240}
|
|
{"current_steps": 27710, "total_steps": 37885, "loss": 0.0, "lr": 4.08285873870213e-07, "epoch": 3.6571202322819056, "percentage": 73.14, "elapsed_time": "0:40:40", "remaining_time": "0:14:56", "throughput": 5589.91, "total_tokens": 13644672}
|
|
{"current_steps": 27715, "total_steps": 37885, "loss": 0.0004, "lr": 4.079145458634125e-07, "epoch": 3.6577801240596544, "percentage": 73.16, "elapsed_time": "0:40:41", "remaining_time": "0:14:55", "throughput": 5590.13, "total_tokens": 13647040}
|
|
{"current_steps": 27720, "total_steps": 37885, "loss": 0.0003, "lr": 4.075433435224621e-07, "epoch": 3.6584400158374026, "percentage": 73.17, "elapsed_time": "0:40:41", "remaining_time": "0:14:55", "throughput": 5590.43, "total_tokens": 13649600}
|
|
{"current_steps": 27725, "total_steps": 37885, "loss": 0.0072, "lr": 4.071722669261468e-07, "epoch": 3.6590999076151514, "percentage": 73.18, "elapsed_time": "0:40:41", "remaining_time": "0:14:54", "throughput": 5590.79, "total_tokens": 13652352}
|
|
{"current_steps": 27730, "total_steps": 37885, "loss": 0.0, "lr": 4.068013161532253e-07, "epoch": 3.6597597993928996, "percentage": 73.2, "elapsed_time": "0:40:42", "remaining_time": "0:14:54", "throughput": 5591.11, "total_tokens": 13654976}
|
|
{"current_steps": 27735, "total_steps": 37885, "loss": 0.0007, "lr": 4.064304912824286e-07, "epoch": 3.660419691170648, "percentage": 73.21, "elapsed_time": "0:40:42", "remaining_time": "0:14:53", "throughput": 5591.35, "total_tokens": 13657408}
|
|
{"current_steps": 27740, "total_steps": 37885, "loss": 0.0004, "lr": 4.0605979239246166e-07, "epoch": 3.661079582948396, "percentage": 73.22, "elapsed_time": "0:40:42", "remaining_time": "0:14:53", "throughput": 5591.42, "total_tokens": 13659392}
|
|
{"current_steps": 27745, "total_steps": 37885, "loss": 0.0, "lr": 4.056892195620032e-07, "epoch": 3.661739474726145, "percentage": 73.23, "elapsed_time": "0:40:43", "remaining_time": "0:14:52", "throughput": 5591.67, "total_tokens": 13661824}
|
|
{"current_steps": 27750, "total_steps": 37885, "loss": 0.0, "lr": 4.0531877286970397e-07, "epoch": 3.6623993665038936, "percentage": 73.25, "elapsed_time": "0:40:43", "remaining_time": "0:14:52", "throughput": 5591.96, "total_tokens": 13664384}
|
|
{"current_steps": 27755, "total_steps": 37885, "loss": 0.0001, "lr": 4.0494845239418873e-07, "epoch": 3.663059258281642, "percentage": 73.26, "elapsed_time": "0:40:43", "remaining_time": "0:14:51", "throughput": 5592.16, "total_tokens": 13666688}
|
|
{"current_steps": 27760, "total_steps": 37885, "loss": 0.0844, "lr": 4.045782582140559e-07, "epoch": 3.66371915005939, "percentage": 73.27, "elapsed_time": "0:40:44", "remaining_time": "0:14:51", "throughput": 5592.38, "total_tokens": 13669056}
|
|
{"current_steps": 27765, "total_steps": 37885, "loss": 0.1125, "lr": 4.042081904078757e-07, "epoch": 3.6643790418371385, "percentage": 73.29, "elapsed_time": "0:40:44", "remaining_time": "0:14:51", "throughput": 5592.7, "total_tokens": 13671680}
|
|
{"current_steps": 27770, "total_steps": 37885, "loss": 0.0, "lr": 4.0383824905419263e-07, "epoch": 3.665038933614887, "percentage": 73.3, "elapsed_time": "0:40:44", "remaining_time": "0:14:50", "throughput": 5592.99, "total_tokens": 13674240}
|
|
{"current_steps": 27775, "total_steps": 37885, "loss": 0.0549, "lr": 4.034684342315241e-07, "epoch": 3.6656988253926355, "percentage": 73.31, "elapsed_time": "0:40:45", "remaining_time": "0:14:50", "throughput": 5593.23, "total_tokens": 13676672}
|
|
{"current_steps": 27780, "total_steps": 37885, "loss": 0.0, "lr": 4.0309874601836114e-07, "epoch": 3.666358717170384, "percentage": 73.33, "elapsed_time": "0:40:45", "remaining_time": "0:14:49", "throughput": 5593.43, "total_tokens": 13678976}
|
|
{"current_steps": 27785, "total_steps": 37885, "loss": 0.0626, "lr": 4.0272918449316684e-07, "epoch": 3.6670186089481325, "percentage": 73.34, "elapsed_time": "0:40:45", "remaining_time": "0:14:49", "throughput": 5593.64, "total_tokens": 13681344}
|
|
{"current_steps": 27790, "total_steps": 37885, "loss": 0.0, "lr": 4.0235974973437735e-07, "epoch": 3.6676785007258808, "percentage": 73.35, "elapsed_time": "0:40:46", "remaining_time": "0:14:48", "throughput": 5593.77, "total_tokens": 13683456}
|
|
{"current_steps": 27795, "total_steps": 37885, "loss": 0.0, "lr": 4.0199044182040385e-07, "epoch": 3.6683383925036295, "percentage": 73.37, "elapsed_time": "0:40:46", "remaining_time": "0:14:48", "throughput": 5594.15, "total_tokens": 13686272}
|
|
{"current_steps": 27800, "total_steps": 37885, "loss": 0.1113, "lr": 4.016212608296284e-07, "epoch": 3.6689982842813778, "percentage": 73.38, "elapsed_time": "0:40:46", "remaining_time": "0:14:47", "throughput": 5594.47, "total_tokens": 13688896}
|
|
{"current_steps": 27805, "total_steps": 37885, "loss": 0.0472, "lr": 4.012522068404075e-07, "epoch": 3.6696581760591265, "percentage": 73.39, "elapsed_time": "0:40:47", "remaining_time": "0:14:47", "throughput": 5594.67, "total_tokens": 13691200}
|
|
{"current_steps": 27810, "total_steps": 37885, "loss": 0.0, "lr": 4.0088327993106964e-07, "epoch": 3.6703180678368748, "percentage": 73.41, "elapsed_time": "0:40:47", "remaining_time": "0:14:46", "throughput": 5595.0, "total_tokens": 13693888}
|
|
{"current_steps": 27815, "total_steps": 37885, "loss": 0.0308, "lr": 4.005144801799171e-07, "epoch": 3.670977959614623, "percentage": 73.42, "elapsed_time": "0:40:47", "remaining_time": "0:14:46", "throughput": 5595.22, "total_tokens": 13696256}
|
|
{"current_steps": 27820, "total_steps": 37885, "loss": 0.1735, "lr": 4.001458076652253e-07, "epoch": 3.6716378513923718, "percentage": 73.43, "elapsed_time": "0:40:48", "remaining_time": "0:14:45", "throughput": 5595.49, "total_tokens": 13698752}
|
|
{"current_steps": 27825, "total_steps": 37885, "loss": 0.0006, "lr": 3.9977726246524133e-07, "epoch": 3.67229774317012, "percentage": 73.45, "elapsed_time": "0:40:48", "remaining_time": "0:14:45", "throughput": 5595.8, "total_tokens": 13701376}
|
|
{"current_steps": 27830, "total_steps": 37885, "loss": 0.0, "lr": 3.994088446581877e-07, "epoch": 3.6729576349478688, "percentage": 73.46, "elapsed_time": "0:40:48", "remaining_time": "0:14:44", "throughput": 5596.04, "total_tokens": 13703808}
|
|
{"current_steps": 27835, "total_steps": 37885, "loss": 0.0, "lr": 3.990405543222576e-07, "epoch": 3.673617526725617, "percentage": 73.47, "elapsed_time": "0:40:49", "remaining_time": "0:14:44", "throughput": 5596.25, "total_tokens": 13706176}
|
|
{"current_steps": 27840, "total_steps": 37885, "loss": 0.0079, "lr": 3.9867239153561774e-07, "epoch": 3.6742774185033653, "percentage": 73.49, "elapsed_time": "0:40:49", "remaining_time": "0:14:43", "throughput": 5596.45, "total_tokens": 13708480}
|
|
{"current_steps": 27845, "total_steps": 37885, "loss": 0.0003, "lr": 3.9830435637640825e-07, "epoch": 3.674937310281114, "percentage": 73.5, "elapsed_time": "0:40:49", "remaining_time": "0:14:43", "throughput": 5596.66, "total_tokens": 13710848}
|
|
{"current_steps": 27850, "total_steps": 37885, "loss": 0.0, "lr": 3.979364489227419e-07, "epoch": 3.6755972020588623, "percentage": 73.51, "elapsed_time": "0:40:50", "remaining_time": "0:14:42", "throughput": 5596.8, "total_tokens": 13713024}
|
|
{"current_steps": 27855, "total_steps": 37885, "loss": 0.0005, "lr": 3.9756866925270494e-07, "epoch": 3.676257093836611, "percentage": 73.53, "elapsed_time": "0:40:50", "remaining_time": "0:14:42", "throughput": 5597.17, "total_tokens": 13715776}
|
|
{"current_steps": 27860, "total_steps": 37885, "loss": 0.0004, "lr": 3.972010174443551e-07, "epoch": 3.6769169856143593, "percentage": 73.54, "elapsed_time": "0:40:50", "remaining_time": "0:14:41", "throughput": 5597.46, "total_tokens": 13718336}
|
|
{"current_steps": 27865, "total_steps": 37885, "loss": 0.0, "lr": 3.9683349357572417e-07, "epoch": 3.6775768773921076, "percentage": 73.55, "elapsed_time": "0:40:51", "remaining_time": "0:14:41", "throughput": 5597.75, "total_tokens": 13720896}
|
|
{"current_steps": 27870, "total_steps": 37885, "loss": 0.0, "lr": 3.9646609772481677e-07, "epoch": 3.678236769169856, "percentage": 73.56, "elapsed_time": "0:40:51", "remaining_time": "0:14:40", "throughput": 5597.91, "total_tokens": 13723136}
|
|
{"current_steps": 27875, "total_steps": 37885, "loss": 0.0005, "lr": 3.960988299696094e-07, "epoch": 3.6788966609476046, "percentage": 73.58, "elapsed_time": "0:40:51", "remaining_time": "0:14:40", "throughput": 5598.16, "total_tokens": 13725568}
|
|
{"current_steps": 27880, "total_steps": 37885, "loss": 0.0, "lr": 3.957316903880522e-07, "epoch": 3.6795565527253533, "percentage": 73.59, "elapsed_time": "0:40:52", "remaining_time": "0:14:39", "throughput": 5598.37, "total_tokens": 13727936}
|
|
{"current_steps": 27885, "total_steps": 37885, "loss": 0.0, "lr": 3.953646790580679e-07, "epoch": 3.6802164445031016, "percentage": 73.6, "elapsed_time": "0:40:52", "remaining_time": "0:14:39", "throughput": 5598.56, "total_tokens": 13730240}
|
|
{"current_steps": 27890, "total_steps": 37885, "loss": 0.0001, "lr": 3.949977960575525e-07, "epoch": 3.68087633628085, "percentage": 73.62, "elapsed_time": "0:40:52", "remaining_time": "0:14:39", "throughput": 5598.89, "total_tokens": 13732928}
|
|
{"current_steps": 27895, "total_steps": 37885, "loss": 0.0109, "lr": 3.946310414643734e-07, "epoch": 3.681536228058598, "percentage": 73.63, "elapsed_time": "0:40:53", "remaining_time": "0:14:38", "throughput": 5599.23, "total_tokens": 13735616}
|
|
{"current_steps": 27900, "total_steps": 37885, "loss": 0.0487, "lr": 3.94264415356372e-07, "epoch": 3.682196119836347, "percentage": 73.64, "elapsed_time": "0:40:53", "remaining_time": "0:14:38", "throughput": 5599.48, "total_tokens": 13738048}
|
|
{"current_steps": 27905, "total_steps": 37885, "loss": 0.121, "lr": 3.938979178113625e-07, "epoch": 3.682856011614095, "percentage": 73.66, "elapsed_time": "0:40:53", "remaining_time": "0:14:37", "throughput": 5599.74, "total_tokens": 13740544}
|
|
{"current_steps": 27910, "total_steps": 37885, "loss": 0.0005, "lr": 3.9353154890713037e-07, "epoch": 3.683515903391844, "percentage": 73.67, "elapsed_time": "0:40:54", "remaining_time": "0:14:37", "throughput": 5600.05, "total_tokens": 13743168}
|
|
{"current_steps": 27915, "total_steps": 37885, "loss": 0.0281, "lr": 3.9316530872143537e-07, "epoch": 3.684175795169592, "percentage": 73.68, "elapsed_time": "0:40:54", "remaining_time": "0:14:36", "throughput": 5600.22, "total_tokens": 13745408}
|
|
{"current_steps": 27920, "total_steps": 37885, "loss": 0.0337, "lr": 3.927991973320096e-07, "epoch": 3.6848356869473404, "percentage": 73.7, "elapsed_time": "0:40:54", "remaining_time": "0:14:36", "throughput": 5600.49, "total_tokens": 13747904}
|
|
{"current_steps": 27925, "total_steps": 37885, "loss": 0.0579, "lr": 3.924332148165569e-07, "epoch": 3.685495578725089, "percentage": 73.71, "elapsed_time": "0:40:55", "remaining_time": "0:14:35", "throughput": 5600.75, "total_tokens": 13750400}
|
|
{"current_steps": 27930, "total_steps": 37885, "loss": 0.0, "lr": 3.9206736125275463e-07, "epoch": 3.6861554705028374, "percentage": 73.72, "elapsed_time": "0:40:55", "remaining_time": "0:14:35", "throughput": 5601.06, "total_tokens": 13753024}
|
|
{"current_steps": 27935, "total_steps": 37885, "loss": 0.0, "lr": 3.9170163671825265e-07, "epoch": 3.686815362280586, "percentage": 73.74, "elapsed_time": "0:40:55", "remaining_time": "0:14:34", "throughput": 5601.32, "total_tokens": 13755520}
|
|
{"current_steps": 27940, "total_steps": 37885, "loss": 0.001, "lr": 3.9133604129067364e-07, "epoch": 3.6874752540583344, "percentage": 73.75, "elapsed_time": "0:40:56", "remaining_time": "0:14:34", "throughput": 5601.7, "total_tokens": 13758336}
|
|
{"current_steps": 27945, "total_steps": 37885, "loss": 0.0, "lr": 3.9097057504761234e-07, "epoch": 3.6881351458360827, "percentage": 73.76, "elapsed_time": "0:40:56", "remaining_time": "0:14:33", "throughput": 5602.02, "total_tokens": 13760960}
|
|
{"current_steps": 27950, "total_steps": 37885, "loss": 0.0, "lr": 3.9060523806663556e-07, "epoch": 3.6887950376138314, "percentage": 73.78, "elapsed_time": "0:40:56", "remaining_time": "0:14:33", "throughput": 5602.3, "total_tokens": 13763520}
|
|
{"current_steps": 27955, "total_steps": 37885, "loss": 0.0, "lr": 3.9024003042528474e-07, "epoch": 3.6894549293915797, "percentage": 73.79, "elapsed_time": "0:40:57", "remaining_time": "0:14:32", "throughput": 5602.62, "total_tokens": 13766144}
|
|
{"current_steps": 27960, "total_steps": 37885, "loss": 0.0, "lr": 3.898749522010716e-07, "epoch": 3.6901148211693284, "percentage": 73.8, "elapsed_time": "0:40:57", "remaining_time": "0:14:32", "throughput": 5602.93, "total_tokens": 13768768}
|
|
{"current_steps": 27965, "total_steps": 37885, "loss": 0.0674, "lr": 3.895100034714817e-07, "epoch": 3.6907747129470767, "percentage": 73.82, "elapsed_time": "0:40:57", "remaining_time": "0:14:31", "throughput": 5603.19, "total_tokens": 13771264}
|
|
{"current_steps": 27970, "total_steps": 37885, "loss": 0.0, "lr": 3.8914518431397305e-07, "epoch": 3.691434604724825, "percentage": 73.83, "elapsed_time": "0:40:58", "remaining_time": "0:14:31", "throughput": 5603.33, "total_tokens": 13773440}
|
|
{"current_steps": 27975, "total_steps": 37885, "loss": 0.0007, "lr": 3.887804948059752e-07, "epoch": 3.6920944965025737, "percentage": 73.84, "elapsed_time": "0:40:58", "remaining_time": "0:14:30", "throughput": 5603.58, "total_tokens": 13775872}
|
|
{"current_steps": 27980, "total_steps": 37885, "loss": 0.0, "lr": 3.8841593502489155e-07, "epoch": 3.692754388280322, "percentage": 73.86, "elapsed_time": "0:40:58", "remaining_time": "0:14:30", "throughput": 5603.75, "total_tokens": 13778112}
|
|
{"current_steps": 27985, "total_steps": 37885, "loss": 0.0, "lr": 3.880515050480964e-07, "epoch": 3.6934142800580707, "percentage": 73.87, "elapsed_time": "0:40:59", "remaining_time": "0:14:29", "throughput": 5603.99, "total_tokens": 13780544}
|
|
{"current_steps": 27990, "total_steps": 37885, "loss": 0.0012, "lr": 3.876872049529385e-07, "epoch": 3.694074171835819, "percentage": 73.88, "elapsed_time": "0:40:59", "remaining_time": "0:14:29", "throughput": 5604.23, "total_tokens": 13782976}
|
|
{"current_steps": 27995, "total_steps": 37885, "loss": 0.0, "lr": 3.8732303481673733e-07, "epoch": 3.6947340636135673, "percentage": 73.89, "elapsed_time": "0:40:59", "remaining_time": "0:14:28", "throughput": 5604.49, "total_tokens": 13785472}
|
|
{"current_steps": 28000, "total_steps": 37885, "loss": 0.0673, "lr": 3.869589947167851e-07, "epoch": 3.695393955391316, "percentage": 73.91, "elapsed_time": "0:41:00", "remaining_time": "0:14:28", "throughput": 5604.78, "total_tokens": 13788032}
|
|
{"current_steps": 28005, "total_steps": 37885, "loss": 0.0, "lr": 3.8659508473034684e-07, "epoch": 3.6960538471690643, "percentage": 73.92, "elapsed_time": "0:41:00", "remaining_time": "0:14:28", "throughput": 5605.05, "total_tokens": 13790528}
|
|
{"current_steps": 28010, "total_steps": 37885, "loss": 0.0029, "lr": 3.8623130493465994e-07, "epoch": 3.696713738946813, "percentage": 73.93, "elapsed_time": "0:41:00", "remaining_time": "0:14:27", "throughput": 5605.38, "total_tokens": 13793216}
|
|
{"current_steps": 28015, "total_steps": 37885, "loss": 0.0001, "lr": 3.8586765540693434e-07, "epoch": 3.6973736307245613, "percentage": 73.95, "elapsed_time": "0:41:01", "remaining_time": "0:14:27", "throughput": 5605.6, "total_tokens": 13795584}
|
|
{"current_steps": 28020, "total_steps": 37885, "loss": 0.0, "lr": 3.855041362243514e-07, "epoch": 3.6980335225023095, "percentage": 73.96, "elapsed_time": "0:41:01", "remaining_time": "0:14:26", "throughput": 5605.82, "total_tokens": 13797952}
|
|
{"current_steps": 28025, "total_steps": 37885, "loss": 0.0, "lr": 3.8514074746406566e-07, "epoch": 3.698693414280058, "percentage": 73.97, "elapsed_time": "0:41:01", "remaining_time": "0:14:26", "throughput": 5606.13, "total_tokens": 13800576}
|
|
{"current_steps": 28030, "total_steps": 37885, "loss": 0.0, "lr": 3.847774892032042e-07, "epoch": 3.6993533060578065, "percentage": 73.99, "elapsed_time": "0:41:02", "remaining_time": "0:14:25", "throughput": 5606.42, "total_tokens": 13803136}
|
|
{"current_steps": 28035, "total_steps": 37885, "loss": 0.0004, "lr": 3.844143615188652e-07, "epoch": 3.700013197835555, "percentage": 74.0, "elapsed_time": "0:41:02", "remaining_time": "0:14:25", "throughput": 5606.54, "total_tokens": 13805248}
|
|
{"current_steps": 28040, "total_steps": 37885, "loss": 0.0, "lr": 3.8405136448812023e-07, "epoch": 3.7006730896133035, "percentage": 74.01, "elapsed_time": "0:41:02", "remaining_time": "0:14:24", "throughput": 5606.69, "total_tokens": 13807424}
|
|
{"current_steps": 28045, "total_steps": 37885, "loss": 0.058, "lr": 3.8368849818801317e-07, "epoch": 3.701332981391052, "percentage": 74.03, "elapsed_time": "0:41:03", "remaining_time": "0:14:24", "throughput": 5607.09, "total_tokens": 13810304}
|
|
{"current_steps": 28050, "total_steps": 37885, "loss": 0.0, "lr": 3.8332576269555906e-07, "epoch": 3.7019928731688, "percentage": 74.04, "elapsed_time": "0:41:03", "remaining_time": "0:14:23", "throughput": 5607.25, "total_tokens": 13812544}
|
|
{"current_steps": 28055, "total_steps": 37885, "loss": 0.0, "lr": 3.8296315808774616e-07, "epoch": 3.702652764946549, "percentage": 74.05, "elapsed_time": "0:41:03", "remaining_time": "0:14:23", "throughput": 5607.52, "total_tokens": 13815040}
|
|
{"current_steps": 28060, "total_steps": 37885, "loss": 0.0, "lr": 3.826006844415347e-07, "epoch": 3.703312656724297, "percentage": 74.07, "elapsed_time": "0:41:03", "remaining_time": "0:14:22", "throughput": 5607.78, "total_tokens": 13817536}
|
|
{"current_steps": 28065, "total_steps": 37885, "loss": 0.0, "lr": 3.822383418338576e-07, "epoch": 3.703972548502046, "percentage": 74.08, "elapsed_time": "0:41:04", "remaining_time": "0:14:22", "throughput": 5607.96, "total_tokens": 13819840}
|
|
{"current_steps": 28070, "total_steps": 37885, "loss": 0.0, "lr": 3.8187613034161847e-07, "epoch": 3.704632440279794, "percentage": 74.09, "elapsed_time": "0:41:04", "remaining_time": "0:14:21", "throughput": 5608.18, "total_tokens": 13822208}
|
|
{"current_steps": 28075, "total_steps": 37885, "loss": 0.0001, "lr": 3.815140500416947e-07, "epoch": 3.7052923320575424, "percentage": 74.11, "elapsed_time": "0:41:04", "remaining_time": "0:14:21", "throughput": 5608.37, "total_tokens": 13824512}
|
|
{"current_steps": 28080, "total_steps": 37885, "loss": 0.0008, "lr": 3.811521010109353e-07, "epoch": 3.705952223835291, "percentage": 74.12, "elapsed_time": "0:41:05", "remaining_time": "0:14:20", "throughput": 5608.56, "total_tokens": 13826816}
|
|
{"current_steps": 28085, "total_steps": 37885, "loss": 0.0, "lr": 3.807902833261609e-07, "epoch": 3.7066121156130394, "percentage": 74.13, "elapsed_time": "0:41:05", "remaining_time": "0:14:20", "throughput": 5608.82, "total_tokens": 13829312}
|
|
{"current_steps": 28090, "total_steps": 37885, "loss": 0.0518, "lr": 3.804285970641649e-07, "epoch": 3.707272007390788, "percentage": 74.15, "elapsed_time": "0:41:05", "remaining_time": "0:14:19", "throughput": 5609.04, "total_tokens": 13831680}
|
|
{"current_steps": 28095, "total_steps": 37885, "loss": 0.0533, "lr": 3.800670423017128e-07, "epoch": 3.7079318991685364, "percentage": 74.16, "elapsed_time": "0:41:06", "remaining_time": "0:14:19", "throughput": 5609.33, "total_tokens": 13834240}
|
|
{"current_steps": 28100, "total_steps": 37885, "loss": 0.0718, "lr": 3.7970561911554143e-07, "epoch": 3.7085917909462847, "percentage": 74.17, "elapsed_time": "0:41:06", "remaining_time": "0:14:18", "throughput": 5609.44, "total_tokens": 13836352}
|
|
{"current_steps": 28105, "total_steps": 37885, "loss": 0.0, "lr": 3.793443275823607e-07, "epoch": 3.7092516827240334, "percentage": 74.19, "elapsed_time": "0:41:06", "remaining_time": "0:14:18", "throughput": 5609.75, "total_tokens": 13838976}
|
|
{"current_steps": 28110, "total_steps": 37885, "loss": 0.0011, "lr": 3.7898316777885195e-07, "epoch": 3.7099115745017817, "percentage": 74.2, "elapsed_time": "0:41:07", "remaining_time": "0:14:17", "throughput": 5610.01, "total_tokens": 13841472}
|
|
{"current_steps": 28115, "total_steps": 37885, "loss": 0.0003, "lr": 3.786221397816691e-07, "epoch": 3.7105714662795304, "percentage": 74.21, "elapsed_time": "0:41:07", "remaining_time": "0:14:17", "throughput": 5610.18, "total_tokens": 13843712}
|
|
{"current_steps": 28120, "total_steps": 37885, "loss": 0.0487, "lr": 3.782612436674375e-07, "epoch": 3.7112313580572787, "percentage": 74.22, "elapsed_time": "0:41:07", "remaining_time": "0:14:17", "throughput": 5610.43, "total_tokens": 13846208}
|
|
{"current_steps": 28125, "total_steps": 37885, "loss": 0.0401, "lr": 3.7790047951275394e-07, "epoch": 3.711891249835027, "percentage": 74.24, "elapsed_time": "0:41:08", "remaining_time": "0:14:16", "throughput": 5610.6, "total_tokens": 13848448}
|
|
{"current_steps": 28130, "total_steps": 37885, "loss": 0.0001, "lr": 3.7753984739418945e-07, "epoch": 3.7125511416127757, "percentage": 74.25, "elapsed_time": "0:41:08", "remaining_time": "0:14:16", "throughput": 5610.84, "total_tokens": 13850880}
|
|
{"current_steps": 28135, "total_steps": 37885, "loss": 0.0025, "lr": 3.771793473882844e-07, "epoch": 3.713211033390524, "percentage": 74.26, "elapsed_time": "0:41:08", "remaining_time": "0:14:15", "throughput": 5611.12, "total_tokens": 13853440}
|
|
{"current_steps": 28140, "total_steps": 37885, "loss": 0.0912, "lr": 3.768189795715532e-07, "epoch": 3.7138709251682727, "percentage": 74.28, "elapsed_time": "0:41:09", "remaining_time": "0:14:15", "throughput": 5611.41, "total_tokens": 13856000}
|
|
{"current_steps": 28145, "total_steps": 37885, "loss": 0.0, "lr": 3.764587440204804e-07, "epoch": 3.714530816946021, "percentage": 74.29, "elapsed_time": "0:41:09", "remaining_time": "0:14:14", "throughput": 5611.62, "total_tokens": 13858368}
|
|
{"current_steps": 28150, "total_steps": 37885, "loss": 0.0002, "lr": 3.7609864081152387e-07, "epoch": 3.715190708723769, "percentage": 74.3, "elapsed_time": "0:41:09", "remaining_time": "0:14:14", "throughput": 5611.95, "total_tokens": 13861056}
|
|
{"current_steps": 28155, "total_steps": 37885, "loss": 0.0, "lr": 3.7573867002111324e-07, "epoch": 3.7158506005015175, "percentage": 74.32, "elapsed_time": "0:41:10", "remaining_time": "0:14:13", "throughput": 5612.21, "total_tokens": 13863552}
|
|
{"current_steps": 28160, "total_steps": 37885, "loss": 0.0, "lr": 3.753788317256488e-07, "epoch": 3.716510492279266, "percentage": 74.33, "elapsed_time": "0:41:10", "remaining_time": "0:14:13", "throughput": 5612.53, "total_tokens": 13866240}
|
|
{"current_steps": 28165, "total_steps": 37885, "loss": 0.0, "lr": 3.7501912600150474e-07, "epoch": 3.7171703840570145, "percentage": 74.34, "elapsed_time": "0:41:10", "remaining_time": "0:14:12", "throughput": 5612.68, "total_tokens": 13868480}
|
|
{"current_steps": 28170, "total_steps": 37885, "loss": 0.0, "lr": 3.7465955292502505e-07, "epoch": 3.717830275834763, "percentage": 74.36, "elapsed_time": "0:41:11", "remaining_time": "0:14:12", "throughput": 5612.79, "total_tokens": 13870592}
|
|
{"current_steps": 28175, "total_steps": 37885, "loss": 0.0308, "lr": 3.7430011257252735e-07, "epoch": 3.7184901676125115, "percentage": 74.37, "elapsed_time": "0:41:11", "remaining_time": "0:14:11", "throughput": 5612.88, "total_tokens": 13872704}
|
|
{"current_steps": 28180, "total_steps": 37885, "loss": 0.0003, "lr": 3.7394080502029934e-07, "epoch": 3.7191500593902598, "percentage": 74.38, "elapsed_time": "0:41:11", "remaining_time": "0:14:11", "throughput": 5613.01, "total_tokens": 13874880}
|
|
{"current_steps": 28185, "total_steps": 37885, "loss": 0.0302, "lr": 3.73581630344602e-07, "epoch": 3.7198099511680085, "percentage": 74.4, "elapsed_time": "0:41:12", "remaining_time": "0:14:10", "throughput": 5613.2, "total_tokens": 13877248}
|
|
{"current_steps": 28190, "total_steps": 37885, "loss": 0.0, "lr": 3.732225886216678e-07, "epoch": 3.7204698429457568, "percentage": 74.41, "elapsed_time": "0:41:12", "remaining_time": "0:14:10", "throughput": 5613.44, "total_tokens": 13879744}
|
|
{"current_steps": 28195, "total_steps": 37885, "loss": 0.0001, "lr": 3.7286367992769994e-07, "epoch": 3.7211297347235055, "percentage": 74.42, "elapsed_time": "0:41:12", "remaining_time": "0:14:09", "throughput": 5613.64, "total_tokens": 13882112}
|
|
{"current_steps": 28200, "total_steps": 37885, "loss": 0.0471, "lr": 3.7250490433887473e-07, "epoch": 3.721789626501254, "percentage": 74.44, "elapsed_time": "0:41:13", "remaining_time": "0:14:09", "throughput": 5613.81, "total_tokens": 13884416}
|
|
{"current_steps": 28205, "total_steps": 37885, "loss": 0.0548, "lr": 3.7214626193133993e-07, "epoch": 3.722449518279002, "percentage": 74.45, "elapsed_time": "0:41:13", "remaining_time": "0:14:08", "throughput": 5613.96, "total_tokens": 13886656}
|
|
{"current_steps": 28210, "total_steps": 37885, "loss": 0.1484, "lr": 3.717877527812141e-07, "epoch": 3.723109410056751, "percentage": 74.46, "elapsed_time": "0:41:13", "remaining_time": "0:14:08", "throughput": 5614.18, "total_tokens": 13889088}
|
|
{"current_steps": 28215, "total_steps": 37885, "loss": 0.0813, "lr": 3.714293769645886e-07, "epoch": 3.723769301834499, "percentage": 74.48, "elapsed_time": "0:41:14", "remaining_time": "0:14:07", "throughput": 5614.37, "total_tokens": 13891456}
|
|
{"current_steps": 28220, "total_steps": 37885, "loss": 0.0005, "lr": 3.710711345575261e-07, "epoch": 3.724429193612248, "percentage": 74.49, "elapsed_time": "0:41:14", "remaining_time": "0:14:07", "throughput": 5614.64, "total_tokens": 13894016}
|
|
{"current_steps": 28225, "total_steps": 37885, "loss": 0.0, "lr": 3.707130256360614e-07, "epoch": 3.725089085389996, "percentage": 74.5, "elapsed_time": "0:41:14", "remaining_time": "0:14:07", "throughput": 5614.89, "total_tokens": 13896512}
|
|
{"current_steps": 28230, "total_steps": 37885, "loss": 0.0181, "lr": 3.7035505027619964e-07, "epoch": 3.7257489771677443, "percentage": 74.51, "elapsed_time": "0:41:15", "remaining_time": "0:14:06", "throughput": 5615.13, "total_tokens": 13899008}
|
|
{"current_steps": 28235, "total_steps": 37885, "loss": 0.0411, "lr": 3.6999720855391893e-07, "epoch": 3.726408868945493, "percentage": 74.53, "elapsed_time": "0:41:15", "remaining_time": "0:14:06", "throughput": 5615.42, "total_tokens": 13901632}
|
|
{"current_steps": 28240, "total_steps": 37885, "loss": 0.0704, "lr": 3.696395005451689e-07, "epoch": 3.7270687607232413, "percentage": 74.54, "elapsed_time": "0:41:15", "remaining_time": "0:14:05", "throughput": 5615.59, "total_tokens": 13903936}
|
|
{"current_steps": 28245, "total_steps": 37885, "loss": 0.0001, "lr": 3.6928192632586986e-07, "epoch": 3.72772865250099, "percentage": 74.55, "elapsed_time": "0:41:16", "remaining_time": "0:14:05", "throughput": 5615.81, "total_tokens": 13906368}
|
|
{"current_steps": 28250, "total_steps": 37885, "loss": 0.0881, "lr": 3.6892448597191463e-07, "epoch": 3.7283885442787383, "percentage": 74.57, "elapsed_time": "0:41:16", "remaining_time": "0:14:04", "throughput": 5616.1, "total_tokens": 13908992}
|
|
{"current_steps": 28255, "total_steps": 37885, "loss": 0.0367, "lr": 3.685671795591677e-07, "epoch": 3.7290484360564866, "percentage": 74.58, "elapsed_time": "0:41:16", "remaining_time": "0:14:04", "throughput": 5616.43, "total_tokens": 13911744}
|
|
{"current_steps": 28260, "total_steps": 37885, "loss": 0.0018, "lr": 3.682100071634642e-07, "epoch": 3.7297083278342353, "percentage": 74.59, "elapsed_time": "0:41:17", "remaining_time": "0:14:03", "throughput": 5616.68, "total_tokens": 13914240}
|
|
{"current_steps": 28265, "total_steps": 37885, "loss": 0.0003, "lr": 3.6785296886061144e-07, "epoch": 3.7303682196119836, "percentage": 74.61, "elapsed_time": "0:41:17", "remaining_time": "0:14:03", "throughput": 5617.07, "total_tokens": 13917120}
|
|
{"current_steps": 28270, "total_steps": 37885, "loss": 0.0, "lr": 3.674960647263885e-07, "epoch": 3.7310281113897323, "percentage": 74.62, "elapsed_time": "0:41:17", "remaining_time": "0:14:02", "throughput": 5617.32, "total_tokens": 13919616}
|
|
{"current_steps": 28275, "total_steps": 37885, "loss": 0.0004, "lr": 3.671392948365458e-07, "epoch": 3.7316880031674806, "percentage": 74.63, "elapsed_time": "0:41:18", "remaining_time": "0:14:02", "throughput": 5617.72, "total_tokens": 13922560}
|
|
{"current_steps": 28280, "total_steps": 37885, "loss": 0.0002, "lr": 3.667826592668052e-07, "epoch": 3.732347894945229, "percentage": 74.65, "elapsed_time": "0:41:18", "remaining_time": "0:14:01", "throughput": 5618.08, "total_tokens": 13925376}
|
|
{"current_steps": 28285, "total_steps": 37885, "loss": 0.0006, "lr": 3.664261580928589e-07, "epoch": 3.733007786722977, "percentage": 74.66, "elapsed_time": "0:41:19", "remaining_time": "0:14:01", "throughput": 5618.35, "total_tokens": 13927936}
|
|
{"current_steps": 28290, "total_steps": 37885, "loss": 0.0044, "lr": 3.660697913903733e-07, "epoch": 3.733667678500726, "percentage": 74.67, "elapsed_time": "0:41:19", "remaining_time": "0:14:00", "throughput": 5618.5, "total_tokens": 13930176}
|
|
{"current_steps": 28295, "total_steps": 37885, "loss": 0.0001, "lr": 3.6571355923498346e-07, "epoch": 3.734327570278474, "percentage": 74.69, "elapsed_time": "0:41:19", "remaining_time": "0:14:00", "throughput": 5618.79, "total_tokens": 13932800}
|
|
{"current_steps": 28300, "total_steps": 37885, "loss": 0.0049, "lr": 3.6535746170229777e-07, "epoch": 3.734987462056223, "percentage": 74.7, "elapsed_time": "0:41:20", "remaining_time": "0:13:59", "throughput": 5619.07, "total_tokens": 13935424}
|
|
{"current_steps": 28305, "total_steps": 37885, "loss": 0.0039, "lr": 3.6500149886789524e-07, "epoch": 3.735647353833971, "percentage": 74.71, "elapsed_time": "0:41:20", "remaining_time": "0:13:59", "throughput": 5619.4, "total_tokens": 13938176}
|
|
{"current_steps": 28310, "total_steps": 37885, "loss": 0.0075, "lr": 3.64645670807326e-07, "epoch": 3.7363072456117195, "percentage": 74.73, "elapsed_time": "0:41:20", "remaining_time": "0:13:59", "throughput": 5619.64, "total_tokens": 13940672}
|
|
{"current_steps": 28315, "total_steps": 37885, "loss": 0.0013, "lr": 3.642899775961127e-07, "epoch": 3.736967137389468, "percentage": 74.74, "elapsed_time": "0:41:21", "remaining_time": "0:13:58", "throughput": 5619.91, "total_tokens": 13943232}
|
|
{"current_steps": 28320, "total_steps": 37885, "loss": 0.0224, "lr": 3.6393441930974734e-07, "epoch": 3.7376270291672165, "percentage": 74.75, "elapsed_time": "0:41:21", "remaining_time": "0:13:58", "throughput": 5620.04, "total_tokens": 13945472}
|
|
{"current_steps": 28325, "total_steps": 37885, "loss": 0.1003, "lr": 3.6357899602369626e-07, "epoch": 3.738286920944965, "percentage": 74.77, "elapsed_time": "0:41:21", "remaining_time": "0:13:57", "throughput": 5620.41, "total_tokens": 13948288}
|
|
{"current_steps": 28330, "total_steps": 37885, "loss": 0.0, "lr": 3.632237078133946e-07, "epoch": 3.7389468127227135, "percentage": 74.78, "elapsed_time": "0:41:22", "remaining_time": "0:13:57", "throughput": 5620.52, "total_tokens": 13950464}
|
|
{"current_steps": 28335, "total_steps": 37885, "loss": 0.0087, "lr": 3.628685547542496e-07, "epoch": 3.7396067045004617, "percentage": 74.79, "elapsed_time": "0:41:22", "remaining_time": "0:13:56", "throughput": 5620.65, "total_tokens": 13952640}
|
|
{"current_steps": 28340, "total_steps": 37885, "loss": 0.0, "lr": 3.6251353692164e-07, "epoch": 3.7402665962782105, "percentage": 74.81, "elapsed_time": "0:41:22", "remaining_time": "0:13:56", "throughput": 5620.81, "total_tokens": 13954944}
|
|
{"current_steps": 28345, "total_steps": 37885, "loss": 0.0097, "lr": 3.6215865439091587e-07, "epoch": 3.7409264880559587, "percentage": 74.82, "elapsed_time": "0:41:23", "remaining_time": "0:13:55", "throughput": 5620.95, "total_tokens": 13957184}
|
|
{"current_steps": 28350, "total_steps": 37885, "loss": 0.0, "lr": 3.6180390723739883e-07, "epoch": 3.7415863798337075, "percentage": 74.83, "elapsed_time": "0:41:23", "remaining_time": "0:13:55", "throughput": 5621.14, "total_tokens": 13959552}
|
|
{"current_steps": 28355, "total_steps": 37885, "loss": 0.0004, "lr": 3.614492955363806e-07, "epoch": 3.7422462716114557, "percentage": 74.84, "elapsed_time": "0:41:23", "remaining_time": "0:13:54", "throughput": 5621.46, "total_tokens": 13962240}
|
|
{"current_steps": 28360, "total_steps": 37885, "loss": 0.0844, "lr": 3.610948193631255e-07, "epoch": 3.742906163389204, "percentage": 74.86, "elapsed_time": "0:41:24", "remaining_time": "0:13:54", "throughput": 5621.63, "total_tokens": 13964544}
|
|
{"current_steps": 28365, "total_steps": 37885, "loss": 0.0, "lr": 3.607404787928686e-07, "epoch": 3.7435660551669527, "percentage": 74.87, "elapsed_time": "0:41:24", "remaining_time": "0:13:53", "throughput": 5621.87, "total_tokens": 13967040}
|
|
{"current_steps": 28370, "total_steps": 37885, "loss": 0.0, "lr": 3.6038627390081567e-07, "epoch": 3.744225946944701, "percentage": 74.88, "elapsed_time": "0:41:24", "remaining_time": "0:13:53", "throughput": 5622.19, "total_tokens": 13969728}
|
|
{"current_steps": 28375, "total_steps": 37885, "loss": 0.0, "lr": 3.6003220476214445e-07, "epoch": 3.7448858387224497, "percentage": 74.9, "elapsed_time": "0:41:25", "remaining_time": "0:13:52", "throughput": 5622.5, "total_tokens": 13972416}
|
|
{"current_steps": 28380, "total_steps": 37885, "loss": 0.0004, "lr": 3.596782714520037e-07, "epoch": 3.745545730500198, "percentage": 74.91, "elapsed_time": "0:41:25", "remaining_time": "0:13:52", "throughput": 5622.79, "total_tokens": 13975040}
|
|
{"current_steps": 28385, "total_steps": 37885, "loss": 0.0004, "lr": 3.593244740455127e-07, "epoch": 3.7462056222779463, "percentage": 74.92, "elapsed_time": "0:41:25", "remaining_time": "0:13:51", "throughput": 5623.02, "total_tokens": 13977472}
|
|
{"current_steps": 28390, "total_steps": 37885, "loss": 0.0176, "lr": 3.5897081261776275e-07, "epoch": 3.746865514055695, "percentage": 74.94, "elapsed_time": "0:41:26", "remaining_time": "0:13:51", "throughput": 5623.19, "total_tokens": 13979776}
|
|
{"current_steps": 28395, "total_steps": 37885, "loss": 0.0001, "lr": 3.586172872438158e-07, "epoch": 3.7475254058334433, "percentage": 74.95, "elapsed_time": "0:41:26", "remaining_time": "0:13:50", "throughput": 5623.46, "total_tokens": 13982336}
|
|
{"current_steps": 28400, "total_steps": 37885, "loss": 0.0352, "lr": 3.582638979987054e-07, "epoch": 3.748185297611192, "percentage": 74.96, "elapsed_time": "0:41:26", "remaining_time": "0:13:50", "throughput": 5623.68, "total_tokens": 13984768}
|
|
{"current_steps": 28405, "total_steps": 37885, "loss": 0.0521, "lr": 3.579106449574353e-07, "epoch": 3.7488451893889403, "percentage": 74.98, "elapsed_time": "0:41:27", "remaining_time": "0:13:50", "throughput": 5623.77, "total_tokens": 13986880}
|
|
{"current_steps": 28410, "total_steps": 37885, "loss": 0.0, "lr": 3.5755752819498107e-07, "epoch": 3.7495050811666886, "percentage": 74.99, "elapsed_time": "0:41:27", "remaining_time": "0:13:49", "throughput": 5624.13, "total_tokens": 13989696}
|
|
{"current_steps": 28415, "total_steps": 37885, "loss": 0.0109, "lr": 3.572045477862896e-07, "epoch": 3.750164972944437, "percentage": 75.0, "elapsed_time": "0:41:27", "remaining_time": "0:13:49", "throughput": 5624.27, "total_tokens": 13991936}
|
|
{"current_steps": 28420, "total_steps": 37885, "loss": 0.0003, "lr": 3.568517038062778e-07, "epoch": 3.7508248647221856, "percentage": 75.02, "elapsed_time": "0:41:28", "remaining_time": "0:13:48", "throughput": 5624.54, "total_tokens": 13994496}
|
|
{"current_steps": 28425, "total_steps": 37885, "loss": 0.0243, "lr": 3.564989963298346e-07, "epoch": 3.751484756499934, "percentage": 75.03, "elapsed_time": "0:41:28", "remaining_time": "0:13:48", "throughput": 5624.65, "total_tokens": 13996672}
|
|
{"current_steps": 28425, "total_steps": 37885, "eval_loss": 0.20364880561828613, "epoch": 3.751484756499934, "percentage": 75.03, "elapsed_time": "0:41:36", "remaining_time": "0:13:50", "throughput": 5606.82, "total_tokens": 13996672}
|
|
{"current_steps": 28430, "total_steps": 37885, "loss": 0.1141, "lr": 3.5614642543181996e-07, "epoch": 3.7521446482776826, "percentage": 75.04, "elapsed_time": "0:42:12", "remaining_time": "0:14:02", "throughput": 5526.95, "total_tokens": 13998976}
|
|
{"current_steps": 28435, "total_steps": 37885, "loss": 0.0002, "lr": 3.5579399118706364e-07, "epoch": 3.752804540055431, "percentage": 75.06, "elapsed_time": "0:42:13", "remaining_time": "0:14:01", "throughput": 5527.1, "total_tokens": 14001152}
|
|
{"current_steps": 28440, "total_steps": 37885, "loss": 0.0294, "lr": 3.5544169367036783e-07, "epoch": 3.753464431833179, "percentage": 75.07, "elapsed_time": "0:42:13", "remaining_time": "0:14:01", "throughput": 5527.31, "total_tokens": 14003520}
|
|
{"current_steps": 28445, "total_steps": 37885, "loss": 0.0, "lr": 3.550895329565049e-07, "epoch": 3.754124323610928, "percentage": 75.08, "elapsed_time": "0:42:13", "remaining_time": "0:14:00", "throughput": 5527.51, "total_tokens": 14005824}
|
|
{"current_steps": 28450, "total_steps": 37885, "loss": 0.0, "lr": 3.5473750912021894e-07, "epoch": 3.754784215388676, "percentage": 75.1, "elapsed_time": "0:42:14", "remaining_time": "0:14:00", "throughput": 5527.7, "total_tokens": 14008128}
|
|
{"current_steps": 28455, "total_steps": 37885, "loss": 0.0096, "lr": 3.543856222362239e-07, "epoch": 3.755444107166425, "percentage": 75.11, "elapsed_time": "0:42:14", "remaining_time": "0:13:59", "throughput": 5527.95, "total_tokens": 14010560}
|
|
{"current_steps": 28460, "total_steps": 37885, "loss": 0.0, "lr": 3.540338723792049e-07, "epoch": 3.756103998944173, "percentage": 75.12, "elapsed_time": "0:42:14", "remaining_time": "0:13:59", "throughput": 5528.25, "total_tokens": 14013184}
|
|
{"current_steps": 28465, "total_steps": 37885, "loss": 0.0, "lr": 3.5368225962381924e-07, "epoch": 3.7567638907219214, "percentage": 75.14, "elapsed_time": "0:42:15", "remaining_time": "0:13:58", "throughput": 5528.47, "total_tokens": 14015552}
|
|
{"current_steps": 28470, "total_steps": 37885, "loss": 0.0002, "lr": 3.533307840446935e-07, "epoch": 3.75742378249967, "percentage": 75.15, "elapsed_time": "0:42:15", "remaining_time": "0:13:58", "throughput": 5528.75, "total_tokens": 14018112}
|
|
{"current_steps": 28475, "total_steps": 37885, "loss": 0.0, "lr": 3.529794457164265e-07, "epoch": 3.7580836742774184, "percentage": 75.16, "elapsed_time": "0:42:15", "remaining_time": "0:13:58", "throughput": 5529.07, "total_tokens": 14020736}
|
|
{"current_steps": 28480, "total_steps": 37885, "loss": 0.0088, "lr": 3.526282447135862e-07, "epoch": 3.758743566055167, "percentage": 75.17, "elapsed_time": "0:42:16", "remaining_time": "0:13:57", "throughput": 5529.29, "total_tokens": 14023104}
|
|
{"current_steps": 28485, "total_steps": 37885, "loss": 0.0457, "lr": 3.5227718111071316e-07, "epoch": 3.7594034578329154, "percentage": 75.19, "elapsed_time": "0:42:16", "remaining_time": "0:13:57", "throughput": 5529.58, "total_tokens": 14025664}
|
|
{"current_steps": 28490, "total_steps": 37885, "loss": 0.0175, "lr": 3.519262549823183e-07, "epoch": 3.7600633496106637, "percentage": 75.2, "elapsed_time": "0:42:16", "remaining_time": "0:13:56", "throughput": 5529.7, "total_tokens": 14027776}
|
|
{"current_steps": 28495, "total_steps": 37885, "loss": 0.0004, "lr": 3.5157546640288227e-07, "epoch": 3.7607232413884124, "percentage": 75.21, "elapsed_time": "0:42:17", "remaining_time": "0:13:56", "throughput": 5529.92, "total_tokens": 14030144}
|
|
{"current_steps": 28500, "total_steps": 37885, "loss": 0.0067, "lr": 3.5122481544685857e-07, "epoch": 3.7613831331661607, "percentage": 75.23, "elapsed_time": "0:42:17", "remaining_time": "0:13:55", "throughput": 5530.15, "total_tokens": 14032576}
|
|
{"current_steps": 28505, "total_steps": 37885, "loss": 0.0166, "lr": 3.5087430218866945e-07, "epoch": 3.7620430249439094, "percentage": 75.24, "elapsed_time": "0:42:17", "remaining_time": "0:13:55", "throughput": 5530.37, "total_tokens": 14034944}
|
|
{"current_steps": 28510, "total_steps": 37885, "loss": 0.0, "lr": 3.505239267027094e-07, "epoch": 3.7627029167216577, "percentage": 75.25, "elapsed_time": "0:42:18", "remaining_time": "0:13:54", "throughput": 5530.58, "total_tokens": 14037312}
|
|
{"current_steps": 28515, "total_steps": 37885, "loss": 0.0208, "lr": 3.5017368906334235e-07, "epoch": 3.763362808499406, "percentage": 75.27, "elapsed_time": "0:42:18", "remaining_time": "0:13:54", "throughput": 5530.87, "total_tokens": 14039872}
|
|
{"current_steps": 28520, "total_steps": 37885, "loss": 0.0324, "lr": 3.498235893449042e-07, "epoch": 3.7640227002771547, "percentage": 75.28, "elapsed_time": "0:42:18", "remaining_time": "0:13:53", "throughput": 5531.08, "total_tokens": 14042240}
|
|
{"current_steps": 28525, "total_steps": 37885, "loss": 0.0005, "lr": 3.494736276217013e-07, "epoch": 3.764682592054903, "percentage": 75.29, "elapsed_time": "0:42:19", "remaining_time": "0:13:53", "throughput": 5531.32, "total_tokens": 14044672}
|
|
{"current_steps": 28530, "total_steps": 37885, "loss": 0.0003, "lr": 3.4912380396800987e-07, "epoch": 3.7653424838326517, "percentage": 75.31, "elapsed_time": "0:42:19", "remaining_time": "0:13:52", "throughput": 5531.53, "total_tokens": 14047040}
|
|
{"current_steps": 28535, "total_steps": 37885, "loss": 0.0735, "lr": 3.4877411845807783e-07, "epoch": 3.7660023756104, "percentage": 75.32, "elapsed_time": "0:42:19", "remaining_time": "0:13:52", "throughput": 5531.91, "total_tokens": 14049856}
|
|
{"current_steps": 28540, "total_steps": 37885, "loss": 0.0352, "lr": 3.4842457116612365e-07, "epoch": 3.7666622673881482, "percentage": 75.33, "elapsed_time": "0:42:20", "remaining_time": "0:13:51", "throughput": 5532.15, "total_tokens": 14052288}
|
|
{"current_steps": 28545, "total_steps": 37885, "loss": 0.0001, "lr": 3.4807516216633557e-07, "epoch": 3.7673221591658965, "percentage": 75.35, "elapsed_time": "0:42:20", "remaining_time": "0:13:51", "throughput": 5532.31, "total_tokens": 14054528}
|
|
{"current_steps": 28550, "total_steps": 37885, "loss": 0.0, "lr": 3.477258915328735e-07, "epoch": 3.7679820509436452, "percentage": 75.36, "elapsed_time": "0:42:20", "remaining_time": "0:13:50", "throughput": 5532.52, "total_tokens": 14056896}
|
|
{"current_steps": 28555, "total_steps": 37885, "loss": 0.0, "lr": 3.4737675933986744e-07, "epoch": 3.768641942721394, "percentage": 75.37, "elapsed_time": "0:42:21", "remaining_time": "0:13:50", "throughput": 5532.78, "total_tokens": 14059392}
|
|
{"current_steps": 28560, "total_steps": 37885, "loss": 0.0, "lr": 3.4702776566141864e-07, "epoch": 3.7693018344991422, "percentage": 75.39, "elapsed_time": "0:42:21", "remaining_time": "0:13:49", "throughput": 5532.97, "total_tokens": 14061696}
|
|
{"current_steps": 28565, "total_steps": 37885, "loss": 0.0002, "lr": 3.4667891057159784e-07, "epoch": 3.7699617262768905, "percentage": 75.4, "elapsed_time": "0:42:21", "remaining_time": "0:13:49", "throughput": 5533.06, "total_tokens": 14063744}
|
|
{"current_steps": 28570, "total_steps": 37885, "loss": 0.0906, "lr": 3.463301941444473e-07, "epoch": 3.770621618054639, "percentage": 75.41, "elapsed_time": "0:42:22", "remaining_time": "0:13:48", "throughput": 5533.32, "total_tokens": 14066240}
|
|
{"current_steps": 28575, "total_steps": 37885, "loss": 0.0, "lr": 3.459816164539798e-07, "epoch": 3.7712815098323875, "percentage": 75.43, "elapsed_time": "0:42:22", "remaining_time": "0:13:48", "throughput": 5533.58, "total_tokens": 14068736}
|
|
{"current_steps": 28580, "total_steps": 37885, "loss": 0.1, "lr": 3.456331775741779e-07, "epoch": 3.771941401610136, "percentage": 75.44, "elapsed_time": "0:42:22", "remaining_time": "0:13:47", "throughput": 5533.84, "total_tokens": 14071232}
|
|
{"current_steps": 28585, "total_steps": 37885, "loss": 0.0, "lr": 3.452848775789955e-07, "epoch": 3.7726012933878845, "percentage": 75.45, "elapsed_time": "0:42:23", "remaining_time": "0:13:47", "throughput": 5534.08, "total_tokens": 14073664}
|
|
{"current_steps": 28590, "total_steps": 37885, "loss": 0.0002, "lr": 3.449367165423571e-07, "epoch": 3.773261185165633, "percentage": 75.47, "elapsed_time": "0:42:23", "remaining_time": "0:13:46", "throughput": 5534.24, "total_tokens": 14075904}
|
|
{"current_steps": 28595, "total_steps": 37885, "loss": 0.002, "lr": 3.4458869453815674e-07, "epoch": 3.773921076943381, "percentage": 75.48, "elapsed_time": "0:42:23", "remaining_time": "0:13:46", "throughput": 5534.43, "total_tokens": 14078208}
|
|
{"current_steps": 28600, "total_steps": 37885, "loss": 0.0446, "lr": 3.4424081164025976e-07, "epoch": 3.77458096872113, "percentage": 75.49, "elapsed_time": "0:42:24", "remaining_time": "0:13:45", "throughput": 5534.69, "total_tokens": 14080704}
|
|
{"current_steps": 28605, "total_steps": 37885, "loss": 0.0, "lr": 3.4389306792250194e-07, "epoch": 3.775240860498878, "percentage": 75.5, "elapsed_time": "0:42:24", "remaining_time": "0:13:45", "throughput": 5534.91, "total_tokens": 14083072}
|
|
{"current_steps": 28610, "total_steps": 37885, "loss": 0.0, "lr": 3.435454634586896e-07, "epoch": 3.775900752276627, "percentage": 75.52, "elapsed_time": "0:42:24", "remaining_time": "0:13:44", "throughput": 5535.04, "total_tokens": 14085248}
|
|
{"current_steps": 28615, "total_steps": 37885, "loss": 0.0001, "lr": 3.431979983225987e-07, "epoch": 3.776560644054375, "percentage": 75.53, "elapsed_time": "0:42:25", "remaining_time": "0:13:44", "throughput": 5535.37, "total_tokens": 14087936}
|
|
{"current_steps": 28620, "total_steps": 37885, "loss": 0.0266, "lr": 3.4285067258797626e-07, "epoch": 3.7772205358321234, "percentage": 75.54, "elapsed_time": "0:42:25", "remaining_time": "0:13:44", "throughput": 5535.61, "total_tokens": 14090368}
|
|
{"current_steps": 28625, "total_steps": 37885, "loss": 0.0001, "lr": 3.425034863285404e-07, "epoch": 3.777880427609872, "percentage": 75.56, "elapsed_time": "0:42:25", "remaining_time": "0:13:43", "throughput": 5536.12, "total_tokens": 14093568}
|
|
{"current_steps": 28630, "total_steps": 37885, "loss": 0.0341, "lr": 3.42156439617978e-07, "epoch": 3.7785403193876204, "percentage": 75.57, "elapsed_time": "0:42:26", "remaining_time": "0:13:43", "throughput": 5536.29, "total_tokens": 14095808}
|
|
{"current_steps": 28635, "total_steps": 37885, "loss": 0.0, "lr": 3.418095325299475e-07, "epoch": 3.779200211165369, "percentage": 75.58, "elapsed_time": "0:42:26", "remaining_time": "0:13:42", "throughput": 5536.58, "total_tokens": 14098368}
|
|
{"current_steps": 28640, "total_steps": 37885, "loss": 0.0001, "lr": 3.414627651380778e-07, "epoch": 3.7798601029431174, "percentage": 75.6, "elapsed_time": "0:42:26", "remaining_time": "0:13:42", "throughput": 5536.8, "total_tokens": 14100736}
|
|
{"current_steps": 28645, "total_steps": 37885, "loss": 0.0, "lr": 3.4111613751596725e-07, "epoch": 3.7805199947208656, "percentage": 75.61, "elapsed_time": "0:42:27", "remaining_time": "0:13:41", "throughput": 5537.01, "total_tokens": 14103104}
|
|
{"current_steps": 28650, "total_steps": 37885, "loss": 0.0004, "lr": 3.407696497371855e-07, "epoch": 3.7811798864986144, "percentage": 75.62, "elapsed_time": "0:42:27", "remaining_time": "0:13:41", "throughput": 5537.28, "total_tokens": 14105600}
|
|
{"current_steps": 28655, "total_steps": 37885, "loss": 0.0, "lr": 3.40423301875271e-07, "epoch": 3.7818397782763626, "percentage": 75.64, "elapsed_time": "0:42:27", "remaining_time": "0:13:40", "throughput": 5537.4, "total_tokens": 14107712}
|
|
{"current_steps": 28660, "total_steps": 37885, "loss": 0.0008, "lr": 3.400770940037353e-07, "epoch": 3.7824996700541114, "percentage": 75.65, "elapsed_time": "0:42:28", "remaining_time": "0:13:40", "throughput": 5537.62, "total_tokens": 14110080}
|
|
{"current_steps": 28665, "total_steps": 37885, "loss": 0.0054, "lr": 3.3973102619605753e-07, "epoch": 3.7831595618318596, "percentage": 75.66, "elapsed_time": "0:42:28", "remaining_time": "0:13:39", "throughput": 5537.87, "total_tokens": 14112512}
|
|
{"current_steps": 28670, "total_steps": 37885, "loss": 0.0, "lr": 3.3938509852568773e-07, "epoch": 3.783819453609608, "percentage": 75.68, "elapsed_time": "0:42:28", "remaining_time": "0:13:39", "throughput": 5537.99, "total_tokens": 14114624}
|
|
{"current_steps": 28675, "total_steps": 37885, "loss": 0.0, "lr": 3.390393110660471e-07, "epoch": 3.784479345387356, "percentage": 75.69, "elapsed_time": "0:42:29", "remaining_time": "0:13:38", "throughput": 5538.19, "total_tokens": 14116928}
|
|
{"current_steps": 28680, "total_steps": 37885, "loss": 0.0, "lr": 3.386936638905263e-07, "epoch": 3.785139237165105, "percentage": 75.7, "elapsed_time": "0:42:29", "remaining_time": "0:13:38", "throughput": 5538.41, "total_tokens": 14119296}
|
|
{"current_steps": 28685, "total_steps": 37885, "loss": 0.0, "lr": 3.38348157072487e-07, "epoch": 3.7857991289428536, "percentage": 75.72, "elapsed_time": "0:42:29", "remaining_time": "0:13:37", "throughput": 5538.6, "total_tokens": 14121600}
|
|
{"current_steps": 28690, "total_steps": 37885, "loss": 0.0426, "lr": 3.380027906852596e-07, "epoch": 3.786459020720602, "percentage": 75.73, "elapsed_time": "0:42:29", "remaining_time": "0:13:37", "throughput": 5538.76, "total_tokens": 14123840}
|
|
{"current_steps": 28695, "total_steps": 37885, "loss": 0.0, "lr": 3.3765756480214616e-07, "epoch": 3.78711891249835, "percentage": 75.74, "elapsed_time": "0:42:30", "remaining_time": "0:13:36", "throughput": 5538.97, "total_tokens": 14126208}
|
|
{"current_steps": 28700, "total_steps": 37885, "loss": 0.0, "lr": 3.373124794964185e-07, "epoch": 3.7877788042760985, "percentage": 75.76, "elapsed_time": "0:42:30", "remaining_time": "0:13:36", "throughput": 5539.21, "total_tokens": 14128640}
|
|
{"current_steps": 28705, "total_steps": 37885, "loss": 0.0004, "lr": 3.36967534841318e-07, "epoch": 3.788438696053847, "percentage": 75.77, "elapsed_time": "0:42:30", "remaining_time": "0:13:35", "throughput": 5539.39, "total_tokens": 14130944}
|
|
{"current_steps": 28710, "total_steps": 37885, "loss": 0.0002, "lr": 3.3662273091005687e-07, "epoch": 3.7890985878315955, "percentage": 75.78, "elapsed_time": "0:42:31", "remaining_time": "0:13:35", "throughput": 5539.68, "total_tokens": 14133504}
|
|
{"current_steps": 28715, "total_steps": 37885, "loss": 0.0011, "lr": 3.3627806777581777e-07, "epoch": 3.789758479609344, "percentage": 75.8, "elapsed_time": "0:42:31", "remaining_time": "0:13:34", "throughput": 5539.98, "total_tokens": 14136128}
|
|
{"current_steps": 28720, "total_steps": 37885, "loss": 0.0919, "lr": 3.35933545511752e-07, "epoch": 3.7904183713870925, "percentage": 75.81, "elapsed_time": "0:42:31", "remaining_time": "0:13:34", "throughput": 5540.16, "total_tokens": 14138432}
|
|
{"current_steps": 28725, "total_steps": 37885, "loss": 0.0203, "lr": 3.3558916419098247e-07, "epoch": 3.7910782631648408, "percentage": 75.82, "elapsed_time": "0:42:32", "remaining_time": "0:13:33", "throughput": 5540.43, "total_tokens": 14140928}
|
|
{"current_steps": 28730, "total_steps": 37885, "loss": 0.0014, "lr": 3.3524492388660166e-07, "epoch": 3.7917381549425895, "percentage": 75.83, "elapsed_time": "0:42:32", "remaining_time": "0:13:33", "throughput": 5540.65, "total_tokens": 14143296}
|
|
{"current_steps": 28735, "total_steps": 37885, "loss": 0.0003, "lr": 3.349008246716721e-07, "epoch": 3.7923980467203378, "percentage": 75.85, "elapsed_time": "0:42:32", "remaining_time": "0:13:32", "throughput": 5540.95, "total_tokens": 14145920}
|
|
{"current_steps": 28740, "total_steps": 37885, "loss": 0.0854, "lr": 3.345568666192261e-07, "epoch": 3.7930579384980865, "percentage": 75.86, "elapsed_time": "0:42:33", "remaining_time": "0:13:32", "throughput": 5541.23, "total_tokens": 14148480}
|
|
{"current_steps": 28745, "total_steps": 37885, "loss": 0.0, "lr": 3.3421304980226627e-07, "epoch": 3.7937178302758348, "percentage": 75.87, "elapsed_time": "0:42:33", "remaining_time": "0:13:31", "throughput": 5541.48, "total_tokens": 14150976}
|
|
{"current_steps": 28750, "total_steps": 37885, "loss": 0.0001, "lr": 3.338693742937657e-07, "epoch": 3.794377722053583, "percentage": 75.89, "elapsed_time": "0:42:33", "remaining_time": "0:13:31", "throughput": 5541.81, "total_tokens": 14153728}
|
|
{"current_steps": 28755, "total_steps": 37885, "loss": 0.0002, "lr": 3.3352584016666654e-07, "epoch": 3.7950376138313318, "percentage": 75.9, "elapsed_time": "0:42:34", "remaining_time": "0:13:31", "throughput": 5542.1, "total_tokens": 14156288}
|
|
{"current_steps": 28760, "total_steps": 37885, "loss": 0.0006, "lr": 3.3318244749388136e-07, "epoch": 3.79569750560908, "percentage": 75.91, "elapsed_time": "0:42:34", "remaining_time": "0:13:30", "throughput": 5542.41, "total_tokens": 14158976}
|
|
{"current_steps": 28765, "total_steps": 37885, "loss": 0.0048, "lr": 3.328391963482934e-07, "epoch": 3.7963573973868288, "percentage": 75.93, "elapsed_time": "0:42:34", "remaining_time": "0:13:30", "throughput": 5542.67, "total_tokens": 14161472}
|
|
{"current_steps": 28770, "total_steps": 37885, "loss": 0.0, "lr": 3.3249608680275455e-07, "epoch": 3.797017289164577, "percentage": 75.94, "elapsed_time": "0:42:35", "remaining_time": "0:13:29", "throughput": 5542.92, "total_tokens": 14163968}
|
|
{"current_steps": 28775, "total_steps": 37885, "loss": 0.0007, "lr": 3.3215311893008744e-07, "epoch": 3.7976771809423253, "percentage": 75.95, "elapsed_time": "0:42:35", "remaining_time": "0:13:29", "throughput": 5543.21, "total_tokens": 14166592}
|
|
{"current_steps": 28780, "total_steps": 37885, "loss": 0.0502, "lr": 3.318102928030848e-07, "epoch": 3.798337072720074, "percentage": 75.97, "elapsed_time": "0:42:36", "remaining_time": "0:13:28", "throughput": 5543.56, "total_tokens": 14169344}
|
|
{"current_steps": 28785, "total_steps": 37885, "loss": 0.0001, "lr": 3.3146760849450916e-07, "epoch": 3.7989969644978223, "percentage": 75.98, "elapsed_time": "0:42:36", "remaining_time": "0:13:28", "throughput": 5543.84, "total_tokens": 14171904}
|
|
{"current_steps": 28790, "total_steps": 37885, "loss": 0.0, "lr": 3.3112506607709246e-07, "epoch": 3.799656856275571, "percentage": 75.99, "elapsed_time": "0:42:36", "remaining_time": "0:13:27", "throughput": 5544.07, "total_tokens": 14174336}
|
|
{"current_steps": 28795, "total_steps": 37885, "loss": 0.0001, "lr": 3.307826656235363e-07, "epoch": 3.8003167480533193, "percentage": 76.01, "elapsed_time": "0:42:37", "remaining_time": "0:13:27", "throughput": 5544.25, "total_tokens": 14176640}
|
|
{"current_steps": 28800, "total_steps": 37885, "loss": 0.028, "lr": 3.304404072065139e-07, "epoch": 3.8009766398310676, "percentage": 76.02, "elapsed_time": "0:42:37", "remaining_time": "0:13:26", "throughput": 5544.42, "total_tokens": 14178944}
|
|
{"current_steps": 28805, "total_steps": 37885, "loss": 0.0, "lr": 3.30098290898666e-07, "epoch": 3.8016365316088163, "percentage": 76.03, "elapsed_time": "0:42:37", "remaining_time": "0:13:26", "throughput": 5544.71, "total_tokens": 14181568}
|
|
{"current_steps": 28810, "total_steps": 37885, "loss": 0.0001, "lr": 3.2975631677260505e-07, "epoch": 3.8022964233865646, "percentage": 76.05, "elapsed_time": "0:42:38", "remaining_time": "0:13:25", "throughput": 5544.99, "total_tokens": 14184128}
|
|
{"current_steps": 28815, "total_steps": 37885, "loss": 0.0001, "lr": 3.294144849009122e-07, "epoch": 3.8029563151643133, "percentage": 76.06, "elapsed_time": "0:42:38", "remaining_time": "0:13:25", "throughput": 5545.21, "total_tokens": 14186560}
|
|
{"current_steps": 28820, "total_steps": 37885, "loss": 0.0382, "lr": 3.290727953561393e-07, "epoch": 3.8036162069420616, "percentage": 76.07, "elapsed_time": "0:42:38", "remaining_time": "0:13:24", "throughput": 5545.5, "total_tokens": 14189184}
|
|
{"current_steps": 28825, "total_steps": 37885, "loss": 0.0, "lr": 3.287312482108071e-07, "epoch": 3.80427609871981, "percentage": 76.09, "elapsed_time": "0:42:39", "remaining_time": "0:13:24", "throughput": 5545.71, "total_tokens": 14191616}
|
|
{"current_steps": 28830, "total_steps": 37885, "loss": 0.0738, "lr": 3.2838984353740593e-07, "epoch": 3.804935990497558, "percentage": 76.1, "elapsed_time": "0:42:39", "remaining_time": "0:13:23", "throughput": 5546.07, "total_tokens": 14194432}
|
|
{"current_steps": 28835, "total_steps": 37885, "loss": 0.0114, "lr": 3.2804858140839764e-07, "epoch": 3.805595882275307, "percentage": 76.11, "elapsed_time": "0:42:39", "remaining_time": "0:13:23", "throughput": 5546.39, "total_tokens": 14197120}
|
|
{"current_steps": 28840, "total_steps": 37885, "loss": 0.0564, "lr": 3.277074618962117e-07, "epoch": 3.806255774053055, "percentage": 76.13, "elapsed_time": "0:42:40", "remaining_time": "0:13:22", "throughput": 5546.56, "total_tokens": 14199424}
|
|
{"current_steps": 28845, "total_steps": 37885, "loss": 0.086, "lr": 3.2736648507324903e-07, "epoch": 3.806915665830804, "percentage": 76.14, "elapsed_time": "0:42:40", "remaining_time": "0:13:22", "throughput": 5546.75, "total_tokens": 14201792}
|
|
{"current_steps": 28850, "total_steps": 37885, "loss": 0.0, "lr": 3.270256510118786e-07, "epoch": 3.807575557608552, "percentage": 76.15, "elapsed_time": "0:42:40", "remaining_time": "0:13:21", "throughput": 5547.04, "total_tokens": 14204416}
|
|
{"current_steps": 28855, "total_steps": 37885, "loss": 0.0016, "lr": 3.2668495978444065e-07, "epoch": 3.8082354493863004, "percentage": 76.16, "elapsed_time": "0:42:41", "remaining_time": "0:13:21", "throughput": 5547.35, "total_tokens": 14207104}
|
|
{"current_steps": 28860, "total_steps": 37885, "loss": 0.063, "lr": 3.2634441146324445e-07, "epoch": 3.808895341164049, "percentage": 76.18, "elapsed_time": "0:42:41", "remaining_time": "0:13:20", "throughput": 5547.58, "total_tokens": 14209600}
|
|
{"current_steps": 28865, "total_steps": 37885, "loss": 0.0611, "lr": 3.26004006120568e-07, "epoch": 3.8095552329417974, "percentage": 76.19, "elapsed_time": "0:42:41", "remaining_time": "0:13:20", "throughput": 5547.73, "total_tokens": 14211840}
|
|
{"current_steps": 28870, "total_steps": 37885, "loss": 0.094, "lr": 3.256637438286612e-07, "epoch": 3.810215124719546, "percentage": 76.2, "elapsed_time": "0:42:42", "remaining_time": "0:13:20", "throughput": 5547.97, "total_tokens": 14214336}
|
|
{"current_steps": 28875, "total_steps": 37885, "loss": 0.0, "lr": 3.253236246597417e-07, "epoch": 3.8108750164972944, "percentage": 76.22, "elapsed_time": "0:42:42", "remaining_time": "0:13:19", "throughput": 5548.13, "total_tokens": 14216640}
|
|
{"current_steps": 28880, "total_steps": 37885, "loss": 0.0006, "lr": 3.2498364868599683e-07, "epoch": 3.8115349082750427, "percentage": 76.23, "elapsed_time": "0:42:42", "remaining_time": "0:13:19", "throughput": 5548.43, "total_tokens": 14219264}
|
|
{"current_steps": 28885, "total_steps": 37885, "loss": 0.0352, "lr": 3.2464381597958444e-07, "epoch": 3.8121948000527914, "percentage": 76.24, "elapsed_time": "0:42:43", "remaining_time": "0:13:18", "throughput": 5548.58, "total_tokens": 14221504}
|
|
{"current_steps": 28890, "total_steps": 37885, "loss": 0.0, "lr": 3.243041266126316e-07, "epoch": 3.8128546918305397, "percentage": 76.26, "elapsed_time": "0:42:43", "remaining_time": "0:13:18", "throughput": 5548.74, "total_tokens": 14223744}
|
|
{"current_steps": 28895, "total_steps": 37885, "loss": 0.0, "lr": 3.239645806572352e-07, "epoch": 3.8135145836082884, "percentage": 76.27, "elapsed_time": "0:42:43", "remaining_time": "0:13:17", "throughput": 5549.01, "total_tokens": 14226304}
|
|
{"current_steps": 28900, "total_steps": 37885, "loss": 0.0213, "lr": 3.2362517818546085e-07, "epoch": 3.8141744753860367, "percentage": 76.28, "elapsed_time": "0:42:44", "remaining_time": "0:13:17", "throughput": 5549.22, "total_tokens": 14228672}
|
|
{"current_steps": 28905, "total_steps": 37885, "loss": 0.0001, "lr": 3.2328591926934446e-07, "epoch": 3.814834367163785, "percentage": 76.3, "elapsed_time": "0:42:44", "remaining_time": "0:13:16", "throughput": 5549.53, "total_tokens": 14231360}
|
|
{"current_steps": 28910, "total_steps": 37885, "loss": 0.0001, "lr": 3.229468039808916e-07, "epoch": 3.8154942589415337, "percentage": 76.31, "elapsed_time": "0:42:44", "remaining_time": "0:13:16", "throughput": 5549.77, "total_tokens": 14233856}
|
|
{"current_steps": 28915, "total_steps": 37885, "loss": 0.0, "lr": 3.2260783239207644e-07, "epoch": 3.816154150719282, "percentage": 76.32, "elapsed_time": "0:42:45", "remaining_time": "0:13:15", "throughput": 5550.04, "total_tokens": 14236416}
|
|
{"current_steps": 28920, "total_steps": 37885, "loss": 0.0, "lr": 3.2226900457484354e-07, "epoch": 3.8168140424970307, "percentage": 76.34, "elapsed_time": "0:42:45", "remaining_time": "0:13:15", "throughput": 5550.26, "total_tokens": 14238848}
|
|
{"current_steps": 28925, "total_steps": 37885, "loss": 0.0217, "lr": 3.21930320601107e-07, "epoch": 3.817473934274779, "percentage": 76.35, "elapsed_time": "0:42:45", "remaining_time": "0:13:14", "throughput": 5550.64, "total_tokens": 14241728}
|
|
{"current_steps": 28930, "total_steps": 37885, "loss": 0.0001, "lr": 3.215917805427495e-07, "epoch": 3.8181338260525273, "percentage": 76.36, "elapsed_time": "0:42:46", "remaining_time": "0:13:14", "throughput": 5550.77, "total_tokens": 14243904}
|
|
{"current_steps": 28935, "total_steps": 37885, "loss": 0.0, "lr": 3.2125338447162386e-07, "epoch": 3.818793717830276, "percentage": 76.38, "elapsed_time": "0:42:46", "remaining_time": "0:13:13", "throughput": 5551.01, "total_tokens": 14246336}
|
|
{"current_steps": 28940, "total_steps": 37885, "loss": 0.0003, "lr": 3.209151324595523e-07, "epoch": 3.8194536096080243, "percentage": 76.39, "elapsed_time": "0:42:46", "remaining_time": "0:13:13", "throughput": 5551.14, "total_tokens": 14248512}
|
|
{"current_steps": 28945, "total_steps": 37885, "loss": 0.0657, "lr": 3.205770245783267e-07, "epoch": 3.820113501385773, "percentage": 76.4, "elapsed_time": "0:42:47", "remaining_time": "0:13:12", "throughput": 5551.36, "total_tokens": 14250944}
|
|
{"current_steps": 28950, "total_steps": 37885, "loss": 0.1313, "lr": 3.202390608997072e-07, "epoch": 3.8207733931635213, "percentage": 76.42, "elapsed_time": "0:42:47", "remaining_time": "0:13:12", "throughput": 5551.66, "total_tokens": 14253568}
|
|
{"current_steps": 28955, "total_steps": 37885, "loss": 0.0, "lr": 3.1990124149542465e-07, "epoch": 3.8214332849412695, "percentage": 76.43, "elapsed_time": "0:42:47", "remaining_time": "0:13:11", "throughput": 5551.91, "total_tokens": 14256064}
|
|
{"current_steps": 28960, "total_steps": 37885, "loss": 0.1208, "lr": 3.1956356643717896e-07, "epoch": 3.822093176719018, "percentage": 76.44, "elapsed_time": "0:42:48", "remaining_time": "0:13:11", "throughput": 5552.06, "total_tokens": 14258304}
|
|
{"current_steps": 28965, "total_steps": 37885, "loss": 0.0001, "lr": 3.1922603579663877e-07, "epoch": 3.8227530684967665, "percentage": 76.46, "elapsed_time": "0:42:48", "remaining_time": "0:13:10", "throughput": 5552.24, "total_tokens": 14260608}
|
|
{"current_steps": 28970, "total_steps": 37885, "loss": 0.0001, "lr": 3.188886496454426e-07, "epoch": 3.823412960274515, "percentage": 76.47, "elapsed_time": "0:42:48", "remaining_time": "0:13:10", "throughput": 5552.47, "total_tokens": 14263040}
|
|
{"current_steps": 28975, "total_steps": 37885, "loss": 0.0844, "lr": 3.185514080551986e-07, "epoch": 3.8240728520522635, "percentage": 76.48, "elapsed_time": "0:42:49", "remaining_time": "0:13:10", "throughput": 5552.63, "total_tokens": 14265344}
|
|
{"current_steps": 28980, "total_steps": 37885, "loss": 0.0, "lr": 3.1821431109748344e-07, "epoch": 3.824732743830012, "percentage": 76.49, "elapsed_time": "0:42:49", "remaining_time": "0:13:09", "throughput": 5552.9, "total_tokens": 14267904}
|
|
{"current_steps": 28985, "total_steps": 37885, "loss": 0.1095, "lr": 3.178773588438438e-07, "epoch": 3.82539263560776, "percentage": 76.51, "elapsed_time": "0:42:49", "remaining_time": "0:13:09", "throughput": 5553.13, "total_tokens": 14270400}
|
|
{"current_steps": 28990, "total_steps": 37885, "loss": 0.0, "lr": 3.1754055136579463e-07, "epoch": 3.826052527385509, "percentage": 76.52, "elapsed_time": "0:42:50", "remaining_time": "0:13:08", "throughput": 5553.32, "total_tokens": 14272768}
|
|
{"current_steps": 28995, "total_steps": 37885, "loss": 0.0001, "lr": 3.172038887348221e-07, "epoch": 3.826712419163257, "percentage": 76.53, "elapsed_time": "0:42:50", "remaining_time": "0:13:08", "throughput": 5553.51, "total_tokens": 14275136}
|
|
{"current_steps": 29000, "total_steps": 37885, "loss": 0.0001, "lr": 3.168673710223797e-07, "epoch": 3.827372310941006, "percentage": 76.55, "elapsed_time": "0:42:50", "remaining_time": "0:13:07", "throughput": 5553.77, "total_tokens": 14277696}
|
|
{"current_steps": 29005, "total_steps": 37885, "loss": 0.0001, "lr": 3.165309982998903e-07, "epoch": 3.828032202718754, "percentage": 76.56, "elapsed_time": "0:42:51", "remaining_time": "0:13:07", "throughput": 5553.9, "total_tokens": 14279872}
|
|
{"current_steps": 29010, "total_steps": 37885, "loss": 0.0001, "lr": 3.161947706387479e-07, "epoch": 3.8286920944965024, "percentage": 76.57, "elapsed_time": "0:42:51", "remaining_time": "0:13:06", "throughput": 5554.16, "total_tokens": 14282432}
|
|
{"current_steps": 29015, "total_steps": 37885, "loss": 0.0004, "lr": 3.1585868811031337e-07, "epoch": 3.829351986274251, "percentage": 76.59, "elapsed_time": "0:42:51", "remaining_time": "0:13:06", "throughput": 5554.37, "total_tokens": 14284864}
|
|
{"current_steps": 29020, "total_steps": 37885, "loss": 0.0003, "lr": 3.155227507859185e-07, "epoch": 3.8300118780519994, "percentage": 76.6, "elapsed_time": "0:42:52", "remaining_time": "0:13:05", "throughput": 5554.59, "total_tokens": 14287296}
|
|
{"current_steps": 29025, "total_steps": 37885, "loss": 0.0674, "lr": 3.1518695873686285e-07, "epoch": 3.830671769829748, "percentage": 76.61, "elapsed_time": "0:42:52", "remaining_time": "0:13:05", "throughput": 5554.88, "total_tokens": 14289920}
|
|
{"current_steps": 29030, "total_steps": 37885, "loss": 0.0001, "lr": 3.1485131203441605e-07, "epoch": 3.8313316616074964, "percentage": 76.63, "elapsed_time": "0:42:52", "remaining_time": "0:13:04", "throughput": 5555.12, "total_tokens": 14292416}
|
|
{"current_steps": 29035, "total_steps": 37885, "loss": 0.0065, "lr": 3.1451581074981726e-07, "epoch": 3.8319915533852447, "percentage": 76.64, "elapsed_time": "0:42:53", "remaining_time": "0:13:04", "throughput": 5555.23, "total_tokens": 14294592}
|
|
{"current_steps": 29040, "total_steps": 37885, "loss": 0.0003, "lr": 3.141804549542735e-07, "epoch": 3.8326514451629934, "percentage": 76.65, "elapsed_time": "0:42:53", "remaining_time": "0:13:03", "throughput": 5555.48, "total_tokens": 14297088}
|
|
{"current_steps": 29045, "total_steps": 37885, "loss": 0.1579, "lr": 3.138452447189617e-07, "epoch": 3.8333113369407417, "percentage": 76.67, "elapsed_time": "0:42:53", "remaining_time": "0:13:03", "throughput": 5555.76, "total_tokens": 14299712}
|
|
{"current_steps": 29050, "total_steps": 37885, "loss": 0.0003, "lr": 3.1351018011502837e-07, "epoch": 3.8339712287184904, "percentage": 76.68, "elapsed_time": "0:42:54", "remaining_time": "0:13:02", "throughput": 5555.89, "total_tokens": 14301888}
|
|
{"current_steps": 29055, "total_steps": 37885, "loss": 0.0567, "lr": 3.1317526121358785e-07, "epoch": 3.8346311204962387, "percentage": 76.69, "elapsed_time": "0:42:54", "remaining_time": "0:13:02", "throughput": 5556.09, "total_tokens": 14304256}
|
|
{"current_steps": 29060, "total_steps": 37885, "loss": 0.0001, "lr": 3.128404880857244e-07, "epoch": 3.835291012273987, "percentage": 76.71, "elapsed_time": "0:42:54", "remaining_time": "0:13:01", "throughput": 5556.34, "total_tokens": 14306752}
|
|
{"current_steps": 29065, "total_steps": 37885, "loss": 0.0005, "lr": 3.125058608024914e-07, "epoch": 3.8359509040517357, "percentage": 76.72, "elapsed_time": "0:42:55", "remaining_time": "0:13:01", "throughput": 5556.59, "total_tokens": 14309248}
|
|
{"current_steps": 29070, "total_steps": 37885, "loss": 0.0164, "lr": 3.1217137943491144e-07, "epoch": 3.836610795829484, "percentage": 76.73, "elapsed_time": "0:42:55", "remaining_time": "0:13:00", "throughput": 5556.88, "total_tokens": 14311872}
|
|
{"current_steps": 29075, "total_steps": 37885, "loss": 0.0001, "lr": 3.1183704405397494e-07, "epoch": 3.8372706876072327, "percentage": 76.75, "elapsed_time": "0:42:55", "remaining_time": "0:13:00", "throughput": 5557.13, "total_tokens": 14314368}
|
|
{"current_steps": 29080, "total_steps": 37885, "loss": 0.0381, "lr": 3.1150285473064255e-07, "epoch": 3.837930579384981, "percentage": 76.76, "elapsed_time": "0:42:56", "remaining_time": "0:13:00", "throughput": 5557.37, "total_tokens": 14316864}
|
|
{"current_steps": 29085, "total_steps": 37885, "loss": 0.0239, "lr": 3.1116881153584387e-07, "epoch": 3.8385904711627292, "percentage": 76.77, "elapsed_time": "0:42:56", "remaining_time": "0:12:59", "throughput": 5557.61, "total_tokens": 14319360}
|
|
{"current_steps": 29090, "total_steps": 37885, "loss": 0.0, "lr": 3.108349145404764e-07, "epoch": 3.8392503629404775, "percentage": 76.79, "elapsed_time": "0:42:56", "remaining_time": "0:12:59", "throughput": 5557.93, "total_tokens": 14322048}
|
|
{"current_steps": 29095, "total_steps": 37885, "loss": 0.0382, "lr": 3.1050116381540793e-07, "epoch": 3.8399102547182262, "percentage": 76.8, "elapsed_time": "0:42:57", "remaining_time": "0:12:58", "throughput": 5558.17, "total_tokens": 14324480}
|
|
{"current_steps": 29100, "total_steps": 37885, "loss": 0.0021, "lr": 3.101675594314747e-07, "epoch": 3.8405701464959745, "percentage": 76.81, "elapsed_time": "0:42:57", "remaining_time": "0:12:58", "throughput": 5558.42, "total_tokens": 14326976}
|
|
{"current_steps": 29105, "total_steps": 37885, "loss": 0.0002, "lr": 3.098341014594813e-07, "epoch": 3.8412300382737232, "percentage": 76.82, "elapsed_time": "0:42:57", "remaining_time": "0:12:57", "throughput": 5558.73, "total_tokens": 14329600}
|
|
{"current_steps": 29110, "total_steps": 37885, "loss": 0.0001, "lr": 3.0950078997020214e-07, "epoch": 3.8418899300514715, "percentage": 76.84, "elapsed_time": "0:42:58", "remaining_time": "0:12:57", "throughput": 5558.94, "total_tokens": 14331968}
|
|
{"current_steps": 29115, "total_steps": 37885, "loss": 0.0719, "lr": 3.0916762503438e-07, "epoch": 3.84254982182922, "percentage": 76.85, "elapsed_time": "0:42:58", "remaining_time": "0:12:56", "throughput": 5559.28, "total_tokens": 14334720}
|
|
{"current_steps": 29120, "total_steps": 37885, "loss": 0.0002, "lr": 3.0883460672272724e-07, "epoch": 3.8432097136069685, "percentage": 76.86, "elapsed_time": "0:42:58", "remaining_time": "0:12:56", "throughput": 5559.48, "total_tokens": 14337088}
|
|
{"current_steps": 29125, "total_steps": 37885, "loss": 0.001, "lr": 3.0850173510592415e-07, "epoch": 3.843869605384717, "percentage": 76.88, "elapsed_time": "0:42:59", "remaining_time": "0:12:55", "throughput": 5559.61, "total_tokens": 14339264}
|
|
{"current_steps": 29130, "total_steps": 37885, "loss": 0.0442, "lr": 3.0816901025461974e-07, "epoch": 3.8445294971624655, "percentage": 76.89, "elapsed_time": "0:42:59", "remaining_time": "0:12:55", "throughput": 5559.8, "total_tokens": 14341632}
|
|
{"current_steps": 29135, "total_steps": 37885, "loss": 0.0, "lr": 3.0783643223943367e-07, "epoch": 3.845189388940214, "percentage": 76.9, "elapsed_time": "0:42:59", "remaining_time": "0:12:54", "throughput": 5559.95, "total_tokens": 14343872}
|
|
{"current_steps": 29140, "total_steps": 37885, "loss": 0.0003, "lr": 3.075040011309522e-07, "epoch": 3.845849280717962, "percentage": 76.92, "elapsed_time": "0:43:00", "remaining_time": "0:12:54", "throughput": 5560.14, "total_tokens": 14346240}
|
|
{"current_steps": 29145, "total_steps": 37885, "loss": 0.0001, "lr": 3.0717171699973197e-07, "epoch": 3.846509172495711, "percentage": 76.93, "elapsed_time": "0:43:00", "remaining_time": "0:12:53", "throughput": 5560.31, "total_tokens": 14348544}
|
|
{"current_steps": 29150, "total_steps": 37885, "loss": 0.0, "lr": 3.068395799162976e-07, "epoch": 3.847169064273459, "percentage": 76.94, "elapsed_time": "0:43:00", "remaining_time": "0:12:53", "throughput": 5560.45, "total_tokens": 14350784}
|
|
{"current_steps": 29155, "total_steps": 37885, "loss": 0.0, "lr": 3.0650758995114335e-07, "epoch": 3.847828956051208, "percentage": 76.96, "elapsed_time": "0:43:01", "remaining_time": "0:12:52", "throughput": 5560.74, "total_tokens": 14353408}
|
|
{"current_steps": 29160, "total_steps": 37885, "loss": 0.0, "lr": 3.061757471747313e-07, "epoch": 3.848488847828956, "percentage": 76.97, "elapsed_time": "0:43:01", "remaining_time": "0:12:52", "throughput": 5560.89, "total_tokens": 14355712}
|
|
{"current_steps": 29165, "total_steps": 37885, "loss": 0.0089, "lr": 3.058440516574918e-07, "epoch": 3.8491487396067043, "percentage": 76.98, "elapsed_time": "0:43:01", "remaining_time": "0:12:51", "throughput": 5561.06, "total_tokens": 14358016}
|
|
{"current_steps": 29170, "total_steps": 37885, "loss": 0.0337, "lr": 3.055125034698265e-07, "epoch": 3.849808631384453, "percentage": 77.0, "elapsed_time": "0:43:02", "remaining_time": "0:12:51", "throughput": 5561.31, "total_tokens": 14360576}
|
|
{"current_steps": 29175, "total_steps": 37885, "loss": 0.0004, "lr": 3.051811026821027e-07, "epoch": 3.8504685231622013, "percentage": 77.01, "elapsed_time": "0:43:02", "remaining_time": "0:12:51", "throughput": 5561.53, "total_tokens": 14363008}
|
|
{"current_steps": 29180, "total_steps": 37885, "loss": 0.0003, "lr": 3.04849849364659e-07, "epoch": 3.85112841493995, "percentage": 77.02, "elapsed_time": "0:43:02", "remaining_time": "0:12:50", "throughput": 5561.73, "total_tokens": 14365376}
|
|
{"current_steps": 29185, "total_steps": 37885, "loss": 0.0001, "lr": 3.045187435878003e-07, "epoch": 3.8517883067176983, "percentage": 77.04, "elapsed_time": "0:43:03", "remaining_time": "0:12:50", "throughput": 5561.96, "total_tokens": 14367872}
|
|
{"current_steps": 29190, "total_steps": 37885, "loss": 0.0001, "lr": 3.041877854218021e-07, "epoch": 3.8524481984954466, "percentage": 77.05, "elapsed_time": "0:43:03", "remaining_time": "0:12:49", "throughput": 5562.18, "total_tokens": 14370304}
|
|
{"current_steps": 29195, "total_steps": 37885, "loss": 0.0, "lr": 3.0385697493690807e-07, "epoch": 3.8531080902731953, "percentage": 77.06, "elapsed_time": "0:43:03", "remaining_time": "0:12:49", "throughput": 5562.48, "total_tokens": 14372928}
|
|
{"current_steps": 29200, "total_steps": 37885, "loss": 0.0004, "lr": 3.0352631220332945e-07, "epoch": 3.8537679820509436, "percentage": 77.08, "elapsed_time": "0:43:04", "remaining_time": "0:12:48", "throughput": 5562.71, "total_tokens": 14375360}
|
|
{"current_steps": 29205, "total_steps": 37885, "loss": 0.1616, "lr": 3.031957972912482e-07, "epoch": 3.8544278738286923, "percentage": 77.09, "elapsed_time": "0:43:04", "remaining_time": "0:12:48", "throughput": 5562.98, "total_tokens": 14377920}
|
|
{"current_steps": 29210, "total_steps": 37885, "loss": 0.0, "lr": 3.028654302708131e-07, "epoch": 3.8550877656064406, "percentage": 77.1, "elapsed_time": "0:43:04", "remaining_time": "0:12:47", "throughput": 5563.22, "total_tokens": 14380352}
|
|
{"current_steps": 29215, "total_steps": 37885, "loss": 0.0007, "lr": 3.025352112121419e-07, "epoch": 3.855747657384189, "percentage": 77.11, "elapsed_time": "0:43:05", "remaining_time": "0:12:47", "throughput": 5563.49, "total_tokens": 14382912}
|
|
{"current_steps": 29220, "total_steps": 37885, "loss": 0.0, "lr": 3.022051401853214e-07, "epoch": 3.856407549161937, "percentage": 77.13, "elapsed_time": "0:43:05", "remaining_time": "0:12:46", "throughput": 5563.73, "total_tokens": 14385344}
|
|
{"current_steps": 29225, "total_steps": 37885, "loss": 0.0001, "lr": 3.018752172604069e-07, "epoch": 3.857067440939686, "percentage": 77.14, "elapsed_time": "0:43:05", "remaining_time": "0:12:46", "throughput": 5564.0, "total_tokens": 14387840}
|
|
{"current_steps": 29230, "total_steps": 37885, "loss": 0.0002, "lr": 3.015454425074224e-07, "epoch": 3.857727332717434, "percentage": 77.15, "elapsed_time": "0:43:06", "remaining_time": "0:12:45", "throughput": 5564.14, "total_tokens": 14390016}
|
|
{"current_steps": 29235, "total_steps": 37885, "loss": 0.0, "lr": 3.0121581599635973e-07, "epoch": 3.858387224495183, "percentage": 77.17, "elapsed_time": "0:43:06", "remaining_time": "0:12:45", "throughput": 5564.35, "total_tokens": 14392384}
|
|
{"current_steps": 29240, "total_steps": 37885, "loss": 0.0, "lr": 3.0088633779717975e-07, "epoch": 3.859047116272931, "percentage": 77.18, "elapsed_time": "0:43:06", "remaining_time": "0:12:44", "throughput": 5564.56, "total_tokens": 14394752}
|
|
{"current_steps": 29245, "total_steps": 37885, "loss": 0.0004, "lr": 3.0055700797981244e-07, "epoch": 3.8597070080506795, "percentage": 77.19, "elapsed_time": "0:43:07", "remaining_time": "0:12:44", "throughput": 5564.79, "total_tokens": 14397184}
|
|
{"current_steps": 29250, "total_steps": 37885, "loss": 0.0, "lr": 3.002278266141548e-07, "epoch": 3.860366899828428, "percentage": 77.21, "elapsed_time": "0:43:07", "remaining_time": "0:12:43", "throughput": 5565.07, "total_tokens": 14399744}
|
|
{"current_steps": 29255, "total_steps": 37885, "loss": 0.0, "lr": 2.9989879377007375e-07, "epoch": 3.8610267916061765, "percentage": 77.22, "elapsed_time": "0:43:07", "remaining_time": "0:12:43", "throughput": 5565.29, "total_tokens": 14402112}
|
|
{"current_steps": 29260, "total_steps": 37885, "loss": 0.0, "lr": 2.995699095174041e-07, "epoch": 3.861686683383925, "percentage": 77.23, "elapsed_time": "0:43:08", "remaining_time": "0:12:42", "throughput": 5565.52, "total_tokens": 14404544}
|
|
{"current_steps": 29265, "total_steps": 37885, "loss": 0.0, "lr": 2.9924117392594893e-07, "epoch": 3.8623465751616735, "percentage": 77.25, "elapsed_time": "0:43:08", "remaining_time": "0:12:42", "throughput": 5565.66, "total_tokens": 14406720}
|
|
{"current_steps": 29270, "total_steps": 37885, "loss": 0.0, "lr": 2.9891258706547997e-07, "epoch": 3.8630064669394217, "percentage": 77.26, "elapsed_time": "0:43:08", "remaining_time": "0:12:41", "throughput": 5566.01, "total_tokens": 14409472}
|
|
{"current_steps": 29275, "total_steps": 37885, "loss": 0.0366, "lr": 2.9858414900573757e-07, "epoch": 3.8636663587171705, "percentage": 77.27, "elapsed_time": "0:43:09", "remaining_time": "0:12:41", "throughput": 5566.24, "total_tokens": 14411904}
|
|
{"current_steps": 29280, "total_steps": 37885, "loss": 0.0411, "lr": 2.9825585981643064e-07, "epoch": 3.8643262504949187, "percentage": 77.29, "elapsed_time": "0:43:09", "remaining_time": "0:12:41", "throughput": 5566.5, "total_tokens": 14414400}
|
|
{"current_steps": 29285, "total_steps": 37885, "loss": 0.0, "lr": 2.9792771956723537e-07, "epoch": 3.8649861422726675, "percentage": 77.3, "elapsed_time": "0:43:09", "remaining_time": "0:12:40", "throughput": 5566.75, "total_tokens": 14416896}
|
|
{"current_steps": 29290, "total_steps": 37885, "loss": 0.0, "lr": 2.9759972832779776e-07, "epoch": 3.8656460340504157, "percentage": 77.31, "elapsed_time": "0:43:10", "remaining_time": "0:12:40", "throughput": 5566.98, "total_tokens": 14419328}
|
|
{"current_steps": 29295, "total_steps": 37885, "loss": 0.0002, "lr": 2.972718861677317e-07, "epoch": 3.866305925828164, "percentage": 77.33, "elapsed_time": "0:43:10", "remaining_time": "0:12:39", "throughput": 5567.19, "total_tokens": 14421696}
|
|
{"current_steps": 29300, "total_steps": 37885, "loss": 0.0, "lr": 2.969441931566188e-07, "epoch": 3.8669658176059127, "percentage": 77.34, "elapsed_time": "0:43:10", "remaining_time": "0:12:39", "throughput": 5567.35, "total_tokens": 14423936}
|
|
{"current_steps": 29305, "total_steps": 37885, "loss": 0.0001, "lr": 2.9661664936400964e-07, "epoch": 3.867625709383661, "percentage": 77.35, "elapsed_time": "0:43:11", "remaining_time": "0:12:38", "throughput": 5567.61, "total_tokens": 14426432}
|
|
{"current_steps": 29310, "total_steps": 37885, "loss": 0.0, "lr": 2.9628925485942357e-07, "epoch": 3.8682856011614097, "percentage": 77.37, "elapsed_time": "0:43:11", "remaining_time": "0:12:38", "throughput": 5567.77, "total_tokens": 14428672}
|
|
{"current_steps": 29315, "total_steps": 37885, "loss": 0.0001, "lr": 2.9596200971234687e-07, "epoch": 3.868945492939158, "percentage": 77.38, "elapsed_time": "0:43:11", "remaining_time": "0:12:37", "throughput": 5567.98, "total_tokens": 14431040}
|
|
{"current_steps": 29320, "total_steps": 37885, "loss": 0.0657, "lr": 2.956349139922357e-07, "epoch": 3.8696053847169063, "percentage": 77.39, "elapsed_time": "0:43:12", "remaining_time": "0:12:37", "throughput": 5568.22, "total_tokens": 14433472}
|
|
{"current_steps": 29325, "total_steps": 37885, "loss": 0.0001, "lr": 2.9530796776851283e-07, "epoch": 3.870265276494655, "percentage": 77.41, "elapsed_time": "0:43:12", "remaining_time": "0:12:36", "throughput": 5568.48, "total_tokens": 14435968}
|
|
{"current_steps": 29330, "total_steps": 37885, "loss": 0.0, "lr": 2.9498117111057155e-07, "epoch": 3.8709251682724033, "percentage": 77.42, "elapsed_time": "0:43:12", "remaining_time": "0:12:36", "throughput": 5568.69, "total_tokens": 14438336}
|
|
{"current_steps": 29335, "total_steps": 37885, "loss": 0.0001, "lr": 2.9465452408777126e-07, "epoch": 3.871585060050152, "percentage": 77.43, "elapsed_time": "0:43:13", "remaining_time": "0:12:35", "throughput": 5568.96, "total_tokens": 14440896}
|
|
{"current_steps": 29340, "total_steps": 37885, "loss": 0.0, "lr": 2.943280267694399e-07, "epoch": 3.8722449518279003, "percentage": 77.44, "elapsed_time": "0:43:13", "remaining_time": "0:12:35", "throughput": 5569.22, "total_tokens": 14443392}
|
|
{"current_steps": 29345, "total_steps": 37885, "loss": 0.0009, "lr": 2.940016792248754e-07, "epoch": 3.8729048436056486, "percentage": 77.46, "elapsed_time": "0:43:13", "remaining_time": "0:12:34", "throughput": 5569.5, "total_tokens": 14445952}
|
|
{"current_steps": 29350, "total_steps": 37885, "loss": 0.0611, "lr": 2.936754815233417e-07, "epoch": 3.873564735383397, "percentage": 77.47, "elapsed_time": "0:43:14", "remaining_time": "0:12:34", "throughput": 5569.69, "total_tokens": 14448256}
|
|
{"current_steps": 29355, "total_steps": 37885, "loss": 0.0, "lr": 2.933494337340726e-07, "epoch": 3.8742246271611456, "percentage": 77.48, "elapsed_time": "0:43:14", "remaining_time": "0:12:33", "throughput": 5569.9, "total_tokens": 14450624}
|
|
{"current_steps": 29360, "total_steps": 37885, "loss": 0.0, "lr": 2.930235359262687e-07, "epoch": 3.8748845189388943, "percentage": 77.5, "elapsed_time": "0:43:14", "remaining_time": "0:12:33", "throughput": 5570.13, "total_tokens": 14453056}
|
|
{"current_steps": 29365, "total_steps": 37885, "loss": 0.0, "lr": 2.9269778816909985e-07, "epoch": 3.8755444107166426, "percentage": 77.51, "elapsed_time": "0:43:15", "remaining_time": "0:12:32", "throughput": 5570.41, "total_tokens": 14455616}
|
|
{"current_steps": 29370, "total_steps": 37885, "loss": 0.0, "lr": 2.9237219053170383e-07, "epoch": 3.876204302494391, "percentage": 77.52, "elapsed_time": "0:43:15", "remaining_time": "0:12:32", "throughput": 5570.54, "total_tokens": 14457792}
|
|
{"current_steps": 29375, "total_steps": 37885, "loss": 0.0, "lr": 2.920467430831858e-07, "epoch": 3.876864194272139, "percentage": 77.54, "elapsed_time": "0:43:15", "remaining_time": "0:12:31", "throughput": 5570.73, "total_tokens": 14460096}
|
|
{"current_steps": 29380, "total_steps": 37885, "loss": 0.0019, "lr": 2.917214458926199e-07, "epoch": 3.877524086049888, "percentage": 77.55, "elapsed_time": "0:43:16", "remaining_time": "0:12:31", "throughput": 5571.14, "total_tokens": 14463040}
|
|
{"current_steps": 29385, "total_steps": 37885, "loss": 0.0, "lr": 2.913962990290486e-07, "epoch": 3.878183977827636, "percentage": 77.56, "elapsed_time": "0:43:16", "remaining_time": "0:12:31", "throughput": 5571.37, "total_tokens": 14465472}
|
|
{"current_steps": 29390, "total_steps": 37885, "loss": 0.0891, "lr": 2.910713025614812e-07, "epoch": 3.878843869605385, "percentage": 77.58, "elapsed_time": "0:43:16", "remaining_time": "0:12:30", "throughput": 5571.63, "total_tokens": 14467968}
|
|
{"current_steps": 29395, "total_steps": 37885, "loss": 0.0, "lr": 2.9074645655889604e-07, "epoch": 3.879503761383133, "percentage": 77.59, "elapsed_time": "0:43:17", "remaining_time": "0:12:30", "throughput": 5571.95, "total_tokens": 14470656}
|
|
{"current_steps": 29400, "total_steps": 37885, "loss": 0.0004, "lr": 2.904217610902396e-07, "epoch": 3.8801636531608814, "percentage": 77.6, "elapsed_time": "0:43:17", "remaining_time": "0:12:29", "throughput": 5572.04, "total_tokens": 14472704}
|
|
{"current_steps": 29405, "total_steps": 37885, "loss": 0.0, "lr": 2.900972162244263e-07, "epoch": 3.88082354493863, "percentage": 77.62, "elapsed_time": "0:43:17", "remaining_time": "0:12:29", "throughput": 5572.27, "total_tokens": 14475136}
|
|
{"current_steps": 29410, "total_steps": 37885, "loss": 0.0491, "lr": 2.897728220303378e-07, "epoch": 3.8814834367163784, "percentage": 77.63, "elapsed_time": "0:43:18", "remaining_time": "0:12:28", "throughput": 5572.48, "total_tokens": 14477504}
|
|
{"current_steps": 29415, "total_steps": 37885, "loss": 0.0239, "lr": 2.894485785768248e-07, "epoch": 3.882143328494127, "percentage": 77.64, "elapsed_time": "0:43:18", "remaining_time": "0:12:28", "throughput": 5572.71, "total_tokens": 14479936}
|
|
{"current_steps": 29420, "total_steps": 37885, "loss": 0.1459, "lr": 2.891244859327059e-07, "epoch": 3.8828032202718754, "percentage": 77.66, "elapsed_time": "0:43:18", "remaining_time": "0:12:27", "throughput": 5572.94, "total_tokens": 14482368}
|
|
{"current_steps": 29425, "total_steps": 37885, "loss": 0.0, "lr": 2.888005441667668e-07, "epoch": 3.8834631120496237, "percentage": 77.67, "elapsed_time": "0:43:19", "remaining_time": "0:12:27", "throughput": 5573.14, "total_tokens": 14484736}
|
|
{"current_steps": 29430, "total_steps": 37885, "loss": 0.0049, "lr": 2.88476753347762e-07, "epoch": 3.8841230038273724, "percentage": 77.68, "elapsed_time": "0:43:19", "remaining_time": "0:12:26", "throughput": 5573.42, "total_tokens": 14487296}
|
|
{"current_steps": 29435, "total_steps": 37885, "loss": 0.0, "lr": 2.881531135444143e-07, "epoch": 3.8847828956051207, "percentage": 77.7, "elapsed_time": "0:43:19", "remaining_time": "0:12:26", "throughput": 5573.5, "total_tokens": 14489344}
|
|
{"current_steps": 29440, "total_steps": 37885, "loss": 0.0386, "lr": 2.878296248254131e-07, "epoch": 3.8854427873828694, "percentage": 77.71, "elapsed_time": "0:43:20", "remaining_time": "0:12:25", "throughput": 5573.84, "total_tokens": 14492096}
|
|
{"current_steps": 29445, "total_steps": 37885, "loss": 0.0065, "lr": 2.8750628725941685e-07, "epoch": 3.8861026791606177, "percentage": 77.72, "elapsed_time": "0:43:20", "remaining_time": "0:12:25", "throughput": 5574.13, "total_tokens": 14494720}
|
|
{"current_steps": 29450, "total_steps": 37885, "loss": 0.0, "lr": 2.8718310091505173e-07, "epoch": 3.886762570938366, "percentage": 77.74, "elapsed_time": "0:43:20", "remaining_time": "0:12:24", "throughput": 5574.41, "total_tokens": 14497280}
|
|
{"current_steps": 29455, "total_steps": 37885, "loss": 0.0001, "lr": 2.8686006586091183e-07, "epoch": 3.8874224627161147, "percentage": 77.75, "elapsed_time": "0:43:21", "remaining_time": "0:12:24", "throughput": 5574.7, "total_tokens": 14499904}
|
|
{"current_steps": 29460, "total_steps": 37885, "loss": 0.0854, "lr": 2.8653718216555854e-07, "epoch": 3.888082354493863, "percentage": 77.76, "elapsed_time": "0:43:21", "remaining_time": "0:12:23", "throughput": 5575.07, "total_tokens": 14502784}
|
|
{"current_steps": 29465, "total_steps": 37885, "loss": 0.0, "lr": 2.8621444989752184e-07, "epoch": 3.8887422462716117, "percentage": 77.77, "elapsed_time": "0:43:21", "remaining_time": "0:12:23", "throughput": 5575.2, "total_tokens": 14504960}
|
|
{"current_steps": 29470, "total_steps": 37885, "loss": 0.0, "lr": 2.858918691252997e-07, "epoch": 3.88940213804936, "percentage": 77.79, "elapsed_time": "0:43:22", "remaining_time": "0:12:22", "throughput": 5575.48, "total_tokens": 14507520}
|
|
{"current_steps": 29475, "total_steps": 37885, "loss": 0.0, "lr": 2.855694399173568e-07, "epoch": 3.8900620298271082, "percentage": 77.8, "elapsed_time": "0:43:22", "remaining_time": "0:12:22", "throughput": 5575.73, "total_tokens": 14510016}
|
|
{"current_steps": 29480, "total_steps": 37885, "loss": 0.0009, "lr": 2.8524716234212684e-07, "epoch": 3.890721921604857, "percentage": 77.81, "elapsed_time": "0:43:22", "remaining_time": "0:12:22", "throughput": 5575.98, "total_tokens": 14512512}
|
|
{"current_steps": 29485, "total_steps": 37885, "loss": 0.0, "lr": 2.849250364680108e-07, "epoch": 3.8913818133826052, "percentage": 77.83, "elapsed_time": "0:43:23", "remaining_time": "0:12:21", "throughput": 5576.09, "total_tokens": 14514624}
|
|
{"current_steps": 29490, "total_steps": 37885, "loss": 0.0, "lr": 2.846030623633778e-07, "epoch": 3.892041705160354, "percentage": 77.84, "elapsed_time": "0:43:23", "remaining_time": "0:12:21", "throughput": 5576.28, "total_tokens": 14516928}
|
|
{"current_steps": 29495, "total_steps": 37885, "loss": 0.0, "lr": 2.842812400965645e-07, "epoch": 3.8927015969381022, "percentage": 77.85, "elapsed_time": "0:43:23", "remaining_time": "0:12:20", "throughput": 5576.49, "total_tokens": 14519296}
|
|
{"current_steps": 29500, "total_steps": 37885, "loss": 0.1298, "lr": 2.839595697358744e-07, "epoch": 3.8933614887158505, "percentage": 77.87, "elapsed_time": "0:43:23", "remaining_time": "0:12:20", "throughput": 5576.72, "total_tokens": 14521728}
|
|
{"current_steps": 29505, "total_steps": 37885, "loss": 0.0023, "lr": 2.836380513495812e-07, "epoch": 3.894021380493599, "percentage": 77.88, "elapsed_time": "0:43:24", "remaining_time": "0:12:19", "throughput": 5576.98, "total_tokens": 14524224}
|
|
{"current_steps": 29510, "total_steps": 37885, "loss": 0.0001, "lr": 2.8331668500592374e-07, "epoch": 3.8946812722713475, "percentage": 77.89, "elapsed_time": "0:43:24", "remaining_time": "0:12:19", "throughput": 5577.3, "total_tokens": 14526912}
|
|
{"current_steps": 29515, "total_steps": 37885, "loss": 0.0003, "lr": 2.829954707731104e-07, "epoch": 3.895341164049096, "percentage": 77.91, "elapsed_time": "0:43:24", "remaining_time": "0:12:18", "throughput": 5577.51, "total_tokens": 14529280}
|
|
{"current_steps": 29520, "total_steps": 37885, "loss": 0.0008, "lr": 2.826744087193159e-07, "epoch": 3.8960010558268445, "percentage": 77.92, "elapsed_time": "0:43:25", "remaining_time": "0:12:18", "throughput": 5577.76, "total_tokens": 14531776}
|
|
{"current_steps": 29525, "total_steps": 37885, "loss": 0.0009, "lr": 2.823534989126838e-07, "epoch": 3.896660947604593, "percentage": 77.93, "elapsed_time": "0:43:25", "remaining_time": "0:12:17", "throughput": 5577.89, "total_tokens": 14533952}
|
|
{"current_steps": 29530, "total_steps": 37885, "loss": 0.0, "lr": 2.820327414213249e-07, "epoch": 3.897320839382341, "percentage": 77.95, "elapsed_time": "0:43:25", "remaining_time": "0:12:17", "throughput": 5578.03, "total_tokens": 14536128}
|
|
{"current_steps": 29535, "total_steps": 37885, "loss": 0.024, "lr": 2.8171213631331714e-07, "epoch": 3.89798073116009, "percentage": 77.96, "elapsed_time": "0:43:26", "remaining_time": "0:12:16", "throughput": 5578.44, "total_tokens": 14539072}
|
|
{"current_steps": 29540, "total_steps": 37885, "loss": 0.0002, "lr": 2.813916836567074e-07, "epoch": 3.898640622937838, "percentage": 77.97, "elapsed_time": "0:43:26", "remaining_time": "0:12:16", "throughput": 5578.71, "total_tokens": 14541632}
|
|
{"current_steps": 29545, "total_steps": 37885, "loss": 0.2078, "lr": 2.810713835195092e-07, "epoch": 3.899300514715587, "percentage": 77.99, "elapsed_time": "0:43:26", "remaining_time": "0:12:15", "throughput": 5578.8, "total_tokens": 14543680}
|
|
{"current_steps": 29550, "total_steps": 37885, "loss": 0.0, "lr": 2.807512359697034e-07, "epoch": 3.899960406493335, "percentage": 78.0, "elapsed_time": "0:43:27", "remaining_time": "0:12:15", "throughput": 5579.01, "total_tokens": 14546048}
|
|
{"current_steps": 29555, "total_steps": 37885, "loss": 0.0412, "lr": 2.8043124107523943e-07, "epoch": 3.9006202982710834, "percentage": 78.01, "elapsed_time": "0:43:27", "remaining_time": "0:12:14", "throughput": 5579.24, "total_tokens": 14548480}
|
|
{"current_steps": 29560, "total_steps": 37885, "loss": 0.0004, "lr": 2.801113989040338e-07, "epoch": 3.901280190048832, "percentage": 78.03, "elapsed_time": "0:43:27", "remaining_time": "0:12:14", "throughput": 5579.5, "total_tokens": 14550976}
|
|
{"current_steps": 29565, "total_steps": 37885, "loss": 0.0, "lr": 2.7979170952397103e-07, "epoch": 3.9019400818265804, "percentage": 78.04, "elapsed_time": "0:43:28", "remaining_time": "0:12:14", "throughput": 5579.79, "total_tokens": 14553600}
|
|
{"current_steps": 29570, "total_steps": 37885, "loss": 0.0056, "lr": 2.7947217300290225e-07, "epoch": 3.902599973604329, "percentage": 78.05, "elapsed_time": "0:43:28", "remaining_time": "0:12:13", "throughput": 5580.07, "total_tokens": 14556160}
|
|
{"current_steps": 29575, "total_steps": 37885, "loss": 0.0337, "lr": 2.791527894086472e-07, "epoch": 3.9032598653820774, "percentage": 78.07, "elapsed_time": "0:43:28", "remaining_time": "0:12:13", "throughput": 5580.41, "total_tokens": 14558912}
|
|
{"current_steps": 29580, "total_steps": 37885, "loss": 0.002, "lr": 2.7883355880899286e-07, "epoch": 3.9039197571598256, "percentage": 78.08, "elapsed_time": "0:43:29", "remaining_time": "0:12:12", "throughput": 5580.67, "total_tokens": 14561408}
|
|
{"current_steps": 29585, "total_steps": 37885, "loss": 0.0005, "lr": 2.78514481271693e-07, "epoch": 3.9045796489375744, "percentage": 78.09, "elapsed_time": "0:43:29", "remaining_time": "0:12:12", "throughput": 5580.83, "total_tokens": 14563648}
|
|
{"current_steps": 29590, "total_steps": 37885, "loss": 0.0, "lr": 2.7819555686447004e-07, "epoch": 3.9052395407153226, "percentage": 78.1, "elapsed_time": "0:43:29", "remaining_time": "0:12:11", "throughput": 5580.99, "total_tokens": 14565888}
|
|
{"current_steps": 29595, "total_steps": 37885, "loss": 0.008, "lr": 2.7787678565501347e-07, "epoch": 3.9058994324930714, "percentage": 78.12, "elapsed_time": "0:43:30", "remaining_time": "0:12:11", "throughput": 5581.24, "total_tokens": 14568384}
|
|
{"current_steps": 29600, "total_steps": 37885, "loss": 0.0, "lr": 2.7755816771097963e-07, "epoch": 3.9065593242708196, "percentage": 78.13, "elapsed_time": "0:43:30", "remaining_time": "0:12:10", "throughput": 5581.33, "total_tokens": 14570432}
|
|
{"current_steps": 29605, "total_steps": 37885, "loss": 0.0154, "lr": 2.7723970309999324e-07, "epoch": 3.907219216048568, "percentage": 78.14, "elapsed_time": "0:43:30", "remaining_time": "0:12:10", "throughput": 5581.56, "total_tokens": 14572864}
|
|
{"current_steps": 29610, "total_steps": 37885, "loss": 0.0, "lr": 2.7692139188964594e-07, "epoch": 3.9078791078263166, "percentage": 78.16, "elapsed_time": "0:43:31", "remaining_time": "0:12:09", "throughput": 5581.73, "total_tokens": 14575104}
|
|
{"current_steps": 29615, "total_steps": 37885, "loss": 0.0083, "lr": 2.766032341474975e-07, "epoch": 3.908538999604065, "percentage": 78.17, "elapsed_time": "0:43:31", "remaining_time": "0:12:09", "throughput": 5582.0, "total_tokens": 14577664}
|
|
{"current_steps": 29620, "total_steps": 37885, "loss": 0.0004, "lr": 2.762852299410738e-07, "epoch": 3.9091988913818136, "percentage": 78.18, "elapsed_time": "0:43:31", "remaining_time": "0:12:08", "throughput": 5582.32, "total_tokens": 14580352}
|
|
{"current_steps": 29625, "total_steps": 37885, "loss": 0.0, "lr": 2.759673793378694e-07, "epoch": 3.909858783159562, "percentage": 78.2, "elapsed_time": "0:43:32", "remaining_time": "0:12:08", "throughput": 5582.55, "total_tokens": 14582784}
|
|
{"current_steps": 29630, "total_steps": 37885, "loss": 0.0001, "lr": 2.7564968240534594e-07, "epoch": 3.91051867493731, "percentage": 78.21, "elapsed_time": "0:43:32", "remaining_time": "0:12:07", "throughput": 5582.77, "total_tokens": 14585216}
|
|
{"current_steps": 29635, "total_steps": 37885, "loss": 0.0611, "lr": 2.753321392109318e-07, "epoch": 3.9111785667150585, "percentage": 78.22, "elapsed_time": "0:43:32", "remaining_time": "0:12:07", "throughput": 5582.98, "total_tokens": 14587584}
|
|
{"current_steps": 29640, "total_steps": 37885, "loss": 0.0001, "lr": 2.7501474982202345e-07, "epoch": 3.911838458492807, "percentage": 78.24, "elapsed_time": "0:43:33", "remaining_time": "0:12:06", "throughput": 5583.19, "total_tokens": 14589952}
|
|
{"current_steps": 29645, "total_steps": 37885, "loss": 0.0001, "lr": 2.7469751430598486e-07, "epoch": 3.9124983502705555, "percentage": 78.25, "elapsed_time": "0:43:33", "remaining_time": "0:12:06", "throughput": 5583.4, "total_tokens": 14592320}
|
|
{"current_steps": 29650, "total_steps": 37885, "loss": 0.0266, "lr": 2.743804327301462e-07, "epoch": 3.913158242048304, "percentage": 78.26, "elapsed_time": "0:43:33", "remaining_time": "0:12:05", "throughput": 5583.56, "total_tokens": 14594560}
|
|
{"current_steps": 29655, "total_steps": 37885, "loss": 0.0725, "lr": 2.7406350516180666e-07, "epoch": 3.9138181338260525, "percentage": 78.28, "elapsed_time": "0:43:34", "remaining_time": "0:12:05", "throughput": 5583.89, "total_tokens": 14597248}
|
|
{"current_steps": 29660, "total_steps": 37885, "loss": 0.0, "lr": 2.7374673166823057e-07, "epoch": 3.9144780256038008, "percentage": 78.29, "elapsed_time": "0:43:34", "remaining_time": "0:12:05", "throughput": 5584.04, "total_tokens": 14599488}
|
|
{"current_steps": 29665, "total_steps": 37885, "loss": 0.0, "lr": 2.7343011231665227e-07, "epoch": 3.9151379173815495, "percentage": 78.3, "elapsed_time": "0:43:34", "remaining_time": "0:12:04", "throughput": 5584.2, "total_tokens": 14601728}
|
|
{"current_steps": 29670, "total_steps": 37885, "loss": 0.0, "lr": 2.731136471742712e-07, "epoch": 3.9157978091592978, "percentage": 78.32, "elapsed_time": "0:43:35", "remaining_time": "0:12:04", "throughput": 5584.43, "total_tokens": 14604160}
|
|
{"current_steps": 29675, "total_steps": 37885, "loss": 0.0, "lr": 2.7279733630825417e-07, "epoch": 3.9164577009370465, "percentage": 78.33, "elapsed_time": "0:43:35", "remaining_time": "0:12:03", "throughput": 5584.66, "total_tokens": 14606592}
|
|
{"current_steps": 29680, "total_steps": 37885, "loss": 0.001, "lr": 2.7248117978573725e-07, "epoch": 3.9171175927147948, "percentage": 78.34, "elapsed_time": "0:43:35", "remaining_time": "0:12:03", "throughput": 5584.88, "total_tokens": 14609024}
|
|
{"current_steps": 29685, "total_steps": 37885, "loss": 0.1096, "lr": 2.721651776738212e-07, "epoch": 3.917777484492543, "percentage": 78.36, "elapsed_time": "0:43:36", "remaining_time": "0:12:02", "throughput": 5585.09, "total_tokens": 14611392}
|
|
{"current_steps": 29690, "total_steps": 37885, "loss": 0.0004, "lr": 2.71849330039576e-07, "epoch": 3.9184373762702918, "percentage": 78.37, "elapsed_time": "0:43:36", "remaining_time": "0:12:02", "throughput": 5585.3, "total_tokens": 14613760}
|
|
{"current_steps": 29695, "total_steps": 37885, "loss": 0.0, "lr": 2.715336369500374e-07, "epoch": 3.91909726804804, "percentage": 78.38, "elapsed_time": "0:43:36", "remaining_time": "0:12:01", "throughput": 5585.5, "total_tokens": 14616128}
|
|
{"current_steps": 29700, "total_steps": 37885, "loss": 0.0441, "lr": 2.712180984722091e-07, "epoch": 3.9197571598257888, "percentage": 78.4, "elapsed_time": "0:43:37", "remaining_time": "0:12:01", "throughput": 5585.82, "total_tokens": 14618816}
|
|
{"current_steps": 29705, "total_steps": 37885, "loss": 0.0002, "lr": 2.7090271467306235e-07, "epoch": 3.920417051603537, "percentage": 78.41, "elapsed_time": "0:43:37", "remaining_time": "0:12:00", "throughput": 5586.04, "total_tokens": 14621184}
|
|
{"current_steps": 29710, "total_steps": 37885, "loss": 0.0, "lr": 2.705874856195344e-07, "epoch": 3.9210769433812853, "percentage": 78.42, "elapsed_time": "0:43:37", "remaining_time": "0:12:00", "throughput": 5586.37, "total_tokens": 14623936}
|
|
{"current_steps": 29715, "total_steps": 37885, "loss": 0.0797, "lr": 2.702724113785305e-07, "epoch": 3.921736835159034, "percentage": 78.43, "elapsed_time": "0:43:38", "remaining_time": "0:11:59", "throughput": 5586.53, "total_tokens": 14626176}
|
|
{"current_steps": 29720, "total_steps": 37885, "loss": 0.0506, "lr": 2.6995749201692353e-07, "epoch": 3.9223967269367823, "percentage": 78.45, "elapsed_time": "0:43:38", "remaining_time": "0:11:59", "throughput": 5586.77, "total_tokens": 14628608}
|
|
{"current_steps": 29725, "total_steps": 37885, "loss": 0.0011, "lr": 2.696427276015518e-07, "epoch": 3.923056618714531, "percentage": 78.46, "elapsed_time": "0:43:38", "remaining_time": "0:11:58", "throughput": 5587.12, "total_tokens": 14631424}
|
|
{"current_steps": 29730, "total_steps": 37885, "loss": 0.0049, "lr": 2.693281181992225e-07, "epoch": 3.9237165104922793, "percentage": 78.47, "elapsed_time": "0:43:39", "remaining_time": "0:11:58", "throughput": 5587.33, "total_tokens": 14633792}
|
|
{"current_steps": 29735, "total_steps": 37885, "loss": 0.0009, "lr": 2.6901366387670885e-07, "epoch": 3.9243764022700276, "percentage": 78.49, "elapsed_time": "0:43:39", "remaining_time": "0:11:57", "throughput": 5587.61, "total_tokens": 14636352}
|
|
{"current_steps": 29740, "total_steps": 37885, "loss": 0.0001, "lr": 2.6869936470075214e-07, "epoch": 3.9250362940477763, "percentage": 78.5, "elapsed_time": "0:43:39", "remaining_time": "0:11:57", "throughput": 5587.84, "total_tokens": 14638784}
|
|
{"current_steps": 29745, "total_steps": 37885, "loss": 0.0, "lr": 2.6838522073805915e-07, "epoch": 3.9256961858255246, "percentage": 78.51, "elapsed_time": "0:43:40", "remaining_time": "0:11:57", "throughput": 5588.13, "total_tokens": 14641408}
|
|
{"current_steps": 29750, "total_steps": 37885, "loss": 0.0, "lr": 2.6807123205530523e-07, "epoch": 3.9263560776032733, "percentage": 78.53, "elapsed_time": "0:43:40", "remaining_time": "0:11:56", "throughput": 5588.31, "total_tokens": 14643712}
|
|
{"current_steps": 29755, "total_steps": 37885, "loss": 0.0412, "lr": 2.677573987191323e-07, "epoch": 3.9270159693810216, "percentage": 78.54, "elapsed_time": "0:43:40", "remaining_time": "0:11:56", "throughput": 5588.61, "total_tokens": 14646336}
|
|
{"current_steps": 29760, "total_steps": 37885, "loss": 0.0008, "lr": 2.674437207961487e-07, "epoch": 3.92767586115877, "percentage": 78.55, "elapsed_time": "0:43:41", "remaining_time": "0:11:55", "throughput": 5588.86, "total_tokens": 14648832}
|
|
{"current_steps": 29765, "total_steps": 37885, "loss": 0.0, "lr": 2.671301983529307e-07, "epoch": 3.928335752936518, "percentage": 78.57, "elapsed_time": "0:43:41", "remaining_time": "0:11:55", "throughput": 5589.04, "total_tokens": 14651136}
|
|
{"current_steps": 29770, "total_steps": 37885, "loss": 0.0823, "lr": 2.668168314560213e-07, "epoch": 3.928995644714267, "percentage": 78.58, "elapsed_time": "0:43:41", "remaining_time": "0:11:54", "throughput": 5589.26, "total_tokens": 14653568}
|
|
{"current_steps": 29775, "total_steps": 37885, "loss": 0.0239, "lr": 2.6650362017192986e-07, "epoch": 3.929655536492015, "percentage": 78.59, "elapsed_time": "0:43:42", "remaining_time": "0:11:54", "throughput": 5589.5, "total_tokens": 14656000}
|
|
{"current_steps": 29780, "total_steps": 37885, "loss": 0.0001, "lr": 2.661905645671335e-07, "epoch": 3.930315428269764, "percentage": 78.61, "elapsed_time": "0:43:42", "remaining_time": "0:11:53", "throughput": 5589.72, "total_tokens": 14658432}
|
|
{"current_steps": 29785, "total_steps": 37885, "loss": 0.0035, "lr": 2.658776647080759e-07, "epoch": 3.930975320047512, "percentage": 78.62, "elapsed_time": "0:43:42", "remaining_time": "0:11:53", "throughput": 5590.02, "total_tokens": 14661056}
|
|
{"current_steps": 29790, "total_steps": 37885, "loss": 0.0337, "lr": 2.655649206611683e-07, "epoch": 3.9316352118252604, "percentage": 78.63, "elapsed_time": "0:43:43", "remaining_time": "0:11:52", "throughput": 5590.2, "total_tokens": 14663360}
|
|
{"current_steps": 29795, "total_steps": 37885, "loss": 0.0002, "lr": 2.652523324927876e-07, "epoch": 3.932295103603009, "percentage": 78.65, "elapsed_time": "0:43:43", "remaining_time": "0:11:52", "throughput": 5590.45, "total_tokens": 14665856}
|
|
{"current_steps": 29800, "total_steps": 37885, "loss": 0.0, "lr": 2.649399002692786e-07, "epoch": 3.9329549953807574, "percentage": 78.66, "elapsed_time": "0:43:43", "remaining_time": "0:11:51", "throughput": 5590.65, "total_tokens": 14668224}
|
|
{"current_steps": 29805, "total_steps": 37885, "loss": 0.0, "lr": 2.6462762405695314e-07, "epoch": 3.933614887158506, "percentage": 78.67, "elapsed_time": "0:43:44", "remaining_time": "0:11:51", "throughput": 5590.81, "total_tokens": 14670464}
|
|
{"current_steps": 29810, "total_steps": 37885, "loss": 0.0352, "lr": 2.6431550392208924e-07, "epoch": 3.9342747789362544, "percentage": 78.69, "elapsed_time": "0:43:44", "remaining_time": "0:11:50", "throughput": 5591.11, "total_tokens": 14673088}
|
|
{"current_steps": 29815, "total_steps": 37885, "loss": 0.0, "lr": 2.6400353993093205e-07, "epoch": 3.9349346707140027, "percentage": 78.7, "elapsed_time": "0:43:44", "remaining_time": "0:11:50", "throughput": 5591.36, "total_tokens": 14675584}
|
|
{"current_steps": 29820, "total_steps": 37885, "loss": 0.0, "lr": 2.636917321496939e-07, "epoch": 3.9355945624917514, "percentage": 78.71, "elapsed_time": "0:43:45", "remaining_time": "0:11:49", "throughput": 5591.69, "total_tokens": 14678336}
|
|
{"current_steps": 29825, "total_steps": 37885, "loss": 0.0, "lr": 2.6338008064455395e-07, "epoch": 3.9362544542694997, "percentage": 78.73, "elapsed_time": "0:43:45", "remaining_time": "0:11:49", "throughput": 5591.96, "total_tokens": 14680896}
|
|
{"current_steps": 29830, "total_steps": 37885, "loss": 0.0008, "lr": 2.6306858548165776e-07, "epoch": 3.9369143460472484, "percentage": 78.74, "elapsed_time": "0:43:45", "remaining_time": "0:11:49", "throughput": 5592.14, "total_tokens": 14683200}
|
|
{"current_steps": 29835, "total_steps": 37885, "loss": 0.0, "lr": 2.627572467271172e-07, "epoch": 3.9375742378249967, "percentage": 78.75, "elapsed_time": "0:43:46", "remaining_time": "0:11:48", "throughput": 5592.41, "total_tokens": 14685760}
|
|
{"current_steps": 29840, "total_steps": 37885, "loss": 0.0, "lr": 2.62446064447013e-07, "epoch": 3.938234129602745, "percentage": 78.76, "elapsed_time": "0:43:46", "remaining_time": "0:11:48", "throughput": 5592.66, "total_tokens": 14688256}
|
|
{"current_steps": 29845, "total_steps": 37885, "loss": 0.0617, "lr": 2.621350387073903e-07, "epoch": 3.9388940213804937, "percentage": 78.78, "elapsed_time": "0:43:46", "remaining_time": "0:11:47", "throughput": 5592.82, "total_tokens": 14690496}
|
|
{"current_steps": 29850, "total_steps": 37885, "loss": 0.0, "lr": 2.618241695742628e-07, "epoch": 3.939553913158242, "percentage": 78.79, "elapsed_time": "0:43:47", "remaining_time": "0:11:47", "throughput": 5593.06, "total_tokens": 14692992}
|
|
{"current_steps": 29855, "total_steps": 37885, "loss": 0.0001, "lr": 2.615134571136095e-07, "epoch": 3.9402138049359907, "percentage": 78.8, "elapsed_time": "0:43:47", "remaining_time": "0:11:46", "throughput": 5593.2, "total_tokens": 14695168}
|
|
{"current_steps": 29860, "total_steps": 37885, "loss": 0.1172, "lr": 2.6120290139137726e-07, "epoch": 3.940873696713739, "percentage": 78.82, "elapsed_time": "0:43:47", "remaining_time": "0:11:46", "throughput": 5593.45, "total_tokens": 14697664}
|
|
{"current_steps": 29865, "total_steps": 37885, "loss": 0.0, "lr": 2.608925024734795e-07, "epoch": 3.9415335884914873, "percentage": 78.83, "elapsed_time": "0:43:47", "remaining_time": "0:11:45", "throughput": 5593.8, "total_tokens": 14700480}
|
|
{"current_steps": 29870, "total_steps": 37885, "loss": 0.0001, "lr": 2.605822604257953e-07, "epoch": 3.942193480269236, "percentage": 78.84, "elapsed_time": "0:43:48", "remaining_time": "0:11:45", "throughput": 5594.16, "total_tokens": 14703296}
|
|
{"current_steps": 29875, "total_steps": 37885, "loss": 0.0, "lr": 2.6027217531417256e-07, "epoch": 3.9428533720469843, "percentage": 78.86, "elapsed_time": "0:43:48", "remaining_time": "0:11:44", "throughput": 5594.27, "total_tokens": 14705408}
|
|
{"current_steps": 29880, "total_steps": 37885, "loss": 0.0001, "lr": 2.5996224720442394e-07, "epoch": 3.943513263824733, "percentage": 78.87, "elapsed_time": "0:43:48", "remaining_time": "0:11:44", "throughput": 5594.45, "total_tokens": 14707712}
|
|
{"current_steps": 29885, "total_steps": 37885, "loss": 0.0, "lr": 2.59652476162329e-07, "epoch": 3.9441731556024813, "percentage": 78.88, "elapsed_time": "0:43:49", "remaining_time": "0:11:43", "throughput": 5594.7, "total_tokens": 14710208}
|
|
{"current_steps": 29890, "total_steps": 37885, "loss": 0.0001, "lr": 2.593428622536349e-07, "epoch": 3.9448330473802296, "percentage": 78.9, "elapsed_time": "0:43:49", "remaining_time": "0:11:43", "throughput": 5594.92, "total_tokens": 14712640}
|
|
{"current_steps": 29895, "total_steps": 37885, "loss": 0.061, "lr": 2.5903340554405485e-07, "epoch": 3.945492939157978, "percentage": 78.91, "elapsed_time": "0:43:49", "remaining_time": "0:11:42", "throughput": 5595.17, "total_tokens": 14715136}
|
|
{"current_steps": 29900, "total_steps": 37885, "loss": 0.0, "lr": 2.587241060992691e-07, "epoch": 3.9461528309357266, "percentage": 78.92, "elapsed_time": "0:43:50", "remaining_time": "0:11:42", "throughput": 5595.48, "total_tokens": 14717824}
|
|
{"current_steps": 29905, "total_steps": 37885, "loss": 0.0, "lr": 2.5841496398492366e-07, "epoch": 3.946812722713475, "percentage": 78.94, "elapsed_time": "0:43:50", "remaining_time": "0:11:41", "throughput": 5595.73, "total_tokens": 14720320}
|
|
{"current_steps": 29910, "total_steps": 37885, "loss": 0.0, "lr": 2.5810597926663205e-07, "epoch": 3.9474726144912236, "percentage": 78.95, "elapsed_time": "0:43:50", "remaining_time": "0:11:41", "throughput": 5595.93, "total_tokens": 14722688}
|
|
{"current_steps": 29915, "total_steps": 37885, "loss": 0.1273, "lr": 2.577971520099741e-07, "epoch": 3.948132506268972, "percentage": 78.96, "elapsed_time": "0:43:51", "remaining_time": "0:11:41", "throughput": 5596.09, "total_tokens": 14724928}
|
|
{"current_steps": 29920, "total_steps": 37885, "loss": 0.0, "lr": 2.574884822804958e-07, "epoch": 3.94879239804672, "percentage": 78.98, "elapsed_time": "0:43:51", "remaining_time": "0:11:40", "throughput": 5596.32, "total_tokens": 14727360}
|
|
{"current_steps": 29925, "total_steps": 37885, "loss": 0.0001, "lr": 2.571799701437103e-07, "epoch": 3.949452289824469, "percentage": 78.99, "elapsed_time": "0:43:51", "remaining_time": "0:11:40", "throughput": 5596.56, "total_tokens": 14729856}
|
|
{"current_steps": 29930, "total_steps": 37885, "loss": 0.0003, "lr": 2.568716156650974e-07, "epoch": 3.950112181602217, "percentage": 79.0, "elapsed_time": "0:43:52", "remaining_time": "0:11:39", "throughput": 5596.75, "total_tokens": 14732224}
|
|
{"current_steps": 29935, "total_steps": 37885, "loss": 0.028, "lr": 2.5656341891010236e-07, "epoch": 3.950772073379966, "percentage": 79.02, "elapsed_time": "0:43:52", "remaining_time": "0:11:39", "throughput": 5597.07, "total_tokens": 14734912}
|
|
{"current_steps": 29940, "total_steps": 37885, "loss": 0.0, "lr": 2.5625537994413825e-07, "epoch": 3.951431965157714, "percentage": 79.03, "elapsed_time": "0:43:52", "remaining_time": "0:11:38", "throughput": 5597.25, "total_tokens": 14737216}
|
|
{"current_steps": 29945, "total_steps": 37885, "loss": 0.0523, "lr": 2.559474988325838e-07, "epoch": 3.9520918569354624, "percentage": 79.04, "elapsed_time": "0:43:53", "remaining_time": "0:11:38", "throughput": 5597.48, "total_tokens": 14739648}
|
|
{"current_steps": 29950, "total_steps": 37885, "loss": 0.1603, "lr": 2.556397756407852e-07, "epoch": 3.952751748713211, "percentage": 79.06, "elapsed_time": "0:43:53", "remaining_time": "0:11:37", "throughput": 5597.81, "total_tokens": 14742400}
|
|
{"current_steps": 29955, "total_steps": 37885, "loss": 0.0001, "lr": 2.5533221043405364e-07, "epoch": 3.9534116404909594, "percentage": 79.07, "elapsed_time": "0:43:53", "remaining_time": "0:11:37", "throughput": 5598.03, "total_tokens": 14744832}
|
|
{"current_steps": 29960, "total_steps": 37885, "loss": 0.0003, "lr": 2.5502480327766785e-07, "epoch": 3.954071532268708, "percentage": 79.08, "elapsed_time": "0:43:54", "remaining_time": "0:11:36", "throughput": 5598.31, "total_tokens": 14747392}
|
|
{"current_steps": 29965, "total_steps": 37885, "loss": 0.02, "lr": 2.5471755423687326e-07, "epoch": 3.9547314240464564, "percentage": 79.09, "elapsed_time": "0:43:54", "remaining_time": "0:11:36", "throughput": 5598.57, "total_tokens": 14749952}
|
|
{"current_steps": 29970, "total_steps": 37885, "loss": 0.0, "lr": 2.5441046337688053e-07, "epoch": 3.9553913158242047, "percentage": 79.11, "elapsed_time": "0:43:54", "remaining_time": "0:11:35", "throughput": 5598.8, "total_tokens": 14752384}
|
|
{"current_steps": 29975, "total_steps": 37885, "loss": 0.0849, "lr": 2.541035307628678e-07, "epoch": 3.9560512076019534, "percentage": 79.12, "elapsed_time": "0:43:55", "remaining_time": "0:11:35", "throughput": 5599.05, "total_tokens": 14754880}
|
|
{"current_steps": 29980, "total_steps": 37885, "loss": 0.0002, "lr": 2.5379675645997965e-07, "epoch": 3.9567110993797017, "percentage": 79.13, "elapsed_time": "0:43:55", "remaining_time": "0:11:34", "throughput": 5599.23, "total_tokens": 14757184}
|
|
{"current_steps": 29985, "total_steps": 37885, "loss": 0.0546, "lr": 2.5349014053332604e-07, "epoch": 3.9573709911574504, "percentage": 79.15, "elapsed_time": "0:43:55", "remaining_time": "0:11:34", "throughput": 5599.5, "total_tokens": 14759744}
|
|
{"current_steps": 29990, "total_steps": 37885, "loss": 0.0, "lr": 2.5318368304798464e-07, "epoch": 3.9580308829351987, "percentage": 79.16, "elapsed_time": "0:43:56", "remaining_time": "0:11:33", "throughput": 5599.7, "total_tokens": 14762112}
|
|
{"current_steps": 29995, "total_steps": 37885, "loss": 0.0, "lr": 2.5287738406899783e-07, "epoch": 3.958690774712947, "percentage": 79.17, "elapsed_time": "0:43:56", "remaining_time": "0:11:33", "throughput": 5599.95, "total_tokens": 14764608}
|
|
{"current_steps": 30000, "total_steps": 37885, "loss": 0.0015, "lr": 2.525712436613767e-07, "epoch": 3.9593506664906957, "percentage": 79.19, "elapsed_time": "0:43:56", "remaining_time": "0:11:33", "throughput": 5600.19, "total_tokens": 14767104}
|
|
{"current_steps": 30005, "total_steps": 37885, "loss": 0.0012, "lr": 2.5226526189009656e-07, "epoch": 3.960010558268444, "percentage": 79.2, "elapsed_time": "0:43:57", "remaining_time": "0:11:32", "throughput": 5600.51, "total_tokens": 14769792}
|
|
{"current_steps": 30010, "total_steps": 37885, "loss": 0.0001, "lr": 2.519594388200994e-07, "epoch": 3.9606704500461927, "percentage": 79.21, "elapsed_time": "0:43:57", "remaining_time": "0:11:32", "throughput": 5600.73, "total_tokens": 14772224}
|
|
{"current_steps": 30015, "total_steps": 37885, "loss": 0.0007, "lr": 2.51653774516295e-07, "epoch": 3.961330341823941, "percentage": 79.23, "elapsed_time": "0:43:57", "remaining_time": "0:11:31", "throughput": 5601.01, "total_tokens": 14774784}
|
|
{"current_steps": 30020, "total_steps": 37885, "loss": 0.0472, "lr": 2.5134826904355767e-07, "epoch": 3.9619902336016892, "percentage": 79.24, "elapsed_time": "0:43:58", "remaining_time": "0:11:31", "throughput": 5601.19, "total_tokens": 14777088}
|
|
{"current_steps": 30025, "total_steps": 37885, "loss": 0.0, "lr": 2.510429224667291e-07, "epoch": 3.9626501253794375, "percentage": 79.25, "elapsed_time": "0:43:58", "remaining_time": "0:11:30", "throughput": 5601.32, "total_tokens": 14779264}
|
|
{"current_steps": 30030, "total_steps": 37885, "loss": 0.0, "lr": 2.5073773485061645e-07, "epoch": 3.9633100171571862, "percentage": 79.27, "elapsed_time": "0:43:58", "remaining_time": "0:11:30", "throughput": 5601.55, "total_tokens": 14781696}
|
|
{"current_steps": 30035, "total_steps": 37885, "loss": 0.0704, "lr": 2.504327062599939e-07, "epoch": 3.9639699089349345, "percentage": 79.28, "elapsed_time": "0:43:59", "remaining_time": "0:11:29", "throughput": 5601.87, "total_tokens": 14784384}
|
|
{"current_steps": 30040, "total_steps": 37885, "loss": 0.0657, "lr": 2.501278367596017e-07, "epoch": 3.9646298007126832, "percentage": 79.29, "elapsed_time": "0:43:59", "remaining_time": "0:11:29", "throughput": 5602.07, "total_tokens": 14786752}
|
|
{"current_steps": 30045, "total_steps": 37885, "loss": 0.0, "lr": 2.498231264141458e-07, "epoch": 3.9652896924904315, "percentage": 79.31, "elapsed_time": "0:43:59", "remaining_time": "0:11:28", "throughput": 5602.34, "total_tokens": 14789312}
|
|
{"current_steps": 30050, "total_steps": 37885, "loss": 0.0004, "lr": 2.495185752882989e-07, "epoch": 3.96594958426818, "percentage": 79.32, "elapsed_time": "0:44:00", "remaining_time": "0:11:28", "throughput": 5602.47, "total_tokens": 14791488}
|
|
{"current_steps": 30055, "total_steps": 37885, "loss": 0.0, "lr": 2.492141834467002e-07, "epoch": 3.9666094760459285, "percentage": 79.33, "elapsed_time": "0:44:00", "remaining_time": "0:11:27", "throughput": 5602.67, "total_tokens": 14793856}
|
|
{"current_steps": 30060, "total_steps": 37885, "loss": 0.0518, "lr": 2.4890995095395397e-07, "epoch": 3.967269367823677, "percentage": 79.35, "elapsed_time": "0:44:00", "remaining_time": "0:11:27", "throughput": 5602.92, "total_tokens": 14796352}
|
|
{"current_steps": 30065, "total_steps": 37885, "loss": 0.0, "lr": 2.486058778746316e-07, "epoch": 3.9679292596014255, "percentage": 79.36, "elapsed_time": "0:44:01", "remaining_time": "0:11:26", "throughput": 5603.21, "total_tokens": 14798976}
|
|
{"current_steps": 30070, "total_steps": 37885, "loss": 0.0518, "lr": 2.4830196427327056e-07, "epoch": 3.968589151379174, "percentage": 79.37, "elapsed_time": "0:44:01", "remaining_time": "0:11:26", "throughput": 5603.46, "total_tokens": 14801472}
|
|
{"current_steps": 30075, "total_steps": 37885, "loss": 0.0003, "lr": 2.4799821021437463e-07, "epoch": 3.969249043156922, "percentage": 79.38, "elapsed_time": "0:44:01", "remaining_time": "0:11:26", "throughput": 5603.79, "total_tokens": 14804224}
|
|
{"current_steps": 30080, "total_steps": 37885, "loss": 0.0, "lr": 2.476946157624126e-07, "epoch": 3.969908934934671, "percentage": 79.4, "elapsed_time": "0:44:02", "remaining_time": "0:11:25", "throughput": 5603.95, "total_tokens": 14806464}
|
|
{"current_steps": 30085, "total_steps": 37885, "loss": 0.0001, "lr": 2.4739118098182055e-07, "epoch": 3.970568826712419, "percentage": 79.41, "elapsed_time": "0:44:02", "remaining_time": "0:11:25", "throughput": 5604.29, "total_tokens": 14809216}
|
|
{"current_steps": 30090, "total_steps": 37885, "loss": 0.0, "lr": 2.470879059370008e-07, "epoch": 3.971228718490168, "percentage": 79.42, "elapsed_time": "0:44:02", "remaining_time": "0:11:24", "throughput": 5604.41, "total_tokens": 14811392}
|
|
{"current_steps": 30095, "total_steps": 37885, "loss": 0.0, "lr": 2.467847906923205e-07, "epoch": 3.971888610267916, "percentage": 79.44, "elapsed_time": "0:44:03", "remaining_time": "0:11:24", "throughput": 5604.64, "total_tokens": 14813824}
|
|
{"current_steps": 30100, "total_steps": 37885, "loss": 0.0001, "lr": 2.4648183531211397e-07, "epoch": 3.9725485020456643, "percentage": 79.45, "elapsed_time": "0:44:03", "remaining_time": "0:11:23", "throughput": 5604.78, "total_tokens": 14816000}
|
|
{"current_steps": 30105, "total_steps": 37885, "loss": 0.0005, "lr": 2.4617903986068146e-07, "epoch": 3.973208393823413, "percentage": 79.46, "elapsed_time": "0:44:03", "remaining_time": "0:11:23", "throughput": 5604.98, "total_tokens": 14818368}
|
|
{"current_steps": 30110, "total_steps": 37885, "loss": 0.0939, "lr": 2.458764044022892e-07, "epoch": 3.9738682856011613, "percentage": 79.48, "elapsed_time": "0:44:04", "remaining_time": "0:11:22", "throughput": 5605.12, "total_tokens": 14820544}
|
|
{"current_steps": 30115, "total_steps": 37885, "loss": 0.0003, "lr": 2.455739290011689e-07, "epoch": 3.97452817737891, "percentage": 79.49, "elapsed_time": "0:44:04", "remaining_time": "0:11:22", "throughput": 5605.32, "total_tokens": 14822912}
|
|
{"current_steps": 30120, "total_steps": 37885, "loss": 0.0626, "lr": 2.452716137215191e-07, "epoch": 3.9751880691566583, "percentage": 79.5, "elapsed_time": "0:44:04", "remaining_time": "0:11:21", "throughput": 5605.48, "total_tokens": 14825152}
|
|
{"current_steps": 30125, "total_steps": 37885, "loss": 0.0, "lr": 2.449694586275042e-07, "epoch": 3.9758479609344066, "percentage": 79.52, "elapsed_time": "0:44:05", "remaining_time": "0:11:21", "throughput": 5605.77, "total_tokens": 14827776}
|
|
{"current_steps": 30130, "total_steps": 37885, "loss": 0.0001, "lr": 2.4466746378325384e-07, "epoch": 3.9765078527121553, "percentage": 79.53, "elapsed_time": "0:44:05", "remaining_time": "0:11:20", "throughput": 5606.04, "total_tokens": 14830336}
|
|
{"current_steps": 30135, "total_steps": 37885, "loss": 0.0005, "lr": 2.4436562925286473e-07, "epoch": 3.9771677444899036, "percentage": 79.54, "elapsed_time": "0:44:05", "remaining_time": "0:11:20", "throughput": 5606.31, "total_tokens": 14832896}
|
|
{"current_steps": 30140, "total_steps": 37885, "loss": 0.0213, "lr": 2.440639551003992e-07, "epoch": 3.9778276362676523, "percentage": 79.56, "elapsed_time": "0:44:06", "remaining_time": "0:11:19", "throughput": 5606.46, "total_tokens": 14835136}
|
|
{"current_steps": 30145, "total_steps": 37885, "loss": 0.0001, "lr": 2.437624413898849e-07, "epoch": 3.9784875280454006, "percentage": 79.57, "elapsed_time": "0:44:06", "remaining_time": "0:11:19", "throughput": 5606.64, "total_tokens": 14837440}
|
|
{"current_steps": 30150, "total_steps": 37885, "loss": 0.0549, "lr": 2.4346108818531605e-07, "epoch": 3.979147419823149, "percentage": 79.58, "elapsed_time": "0:44:06", "remaining_time": "0:11:19", "throughput": 5606.95, "total_tokens": 14840128}
|
|
{"current_steps": 30155, "total_steps": 37885, "loss": 0.0001, "lr": 2.4315989555065284e-07, "epoch": 3.979807311600897, "percentage": 79.6, "elapsed_time": "0:44:07", "remaining_time": "0:11:18", "throughput": 5607.17, "total_tokens": 14842560}
|
|
{"current_steps": 30160, "total_steps": 37885, "loss": 0.0, "lr": 2.428588635498215e-07, "epoch": 3.980467203378646, "percentage": 79.61, "elapsed_time": "0:44:07", "remaining_time": "0:11:18", "throughput": 5607.37, "total_tokens": 14844928}
|
|
{"current_steps": 30165, "total_steps": 37885, "loss": 0.0, "lr": 2.425579922467137e-07, "epoch": 3.9811270951563946, "percentage": 79.62, "elapsed_time": "0:44:07", "remaining_time": "0:11:17", "throughput": 5607.5, "total_tokens": 14847104}
|
|
{"current_steps": 30170, "total_steps": 37885, "loss": 0.0, "lr": 2.4225728170518636e-07, "epoch": 3.981786986934143, "percentage": 79.64, "elapsed_time": "0:44:08", "remaining_time": "0:11:17", "throughput": 5607.76, "total_tokens": 14849664}
|
|
{"current_steps": 30175, "total_steps": 37885, "loss": 0.0518, "lr": 2.419567319890645e-07, "epoch": 3.982446878711891, "percentage": 79.65, "elapsed_time": "0:44:08", "remaining_time": "0:11:16", "throughput": 5607.94, "total_tokens": 14851968}
|
|
{"current_steps": 30180, "total_steps": 37885, "loss": 0.0, "lr": 2.416563431621366e-07, "epoch": 3.9831067704896395, "percentage": 79.66, "elapsed_time": "0:44:08", "remaining_time": "0:11:16", "throughput": 5608.16, "total_tokens": 14854400}
|
|
{"current_steps": 30185, "total_steps": 37885, "loss": 0.0024, "lr": 2.413561152881587e-07, "epoch": 3.983766662267388, "percentage": 79.68, "elapsed_time": "0:44:09", "remaining_time": "0:11:15", "throughput": 5608.45, "total_tokens": 14857024}
|
|
{"current_steps": 30190, "total_steps": 37885, "loss": 0.0, "lr": 2.410560484308514e-07, "epoch": 3.9844265540451365, "percentage": 79.69, "elapsed_time": "0:44:09", "remaining_time": "0:11:15", "throughput": 5608.6, "total_tokens": 14859264}
|
|
{"current_steps": 30195, "total_steps": 37885, "loss": 0.0, "lr": 2.407561426539019e-07, "epoch": 3.985086445822885, "percentage": 79.7, "elapsed_time": "0:44:09", "remaining_time": "0:11:14", "throughput": 5608.87, "total_tokens": 14861824}
|
|
{"current_steps": 30200, "total_steps": 37885, "loss": 0.0037, "lr": 2.404563980209634e-07, "epoch": 3.9857463376006335, "percentage": 79.71, "elapsed_time": "0:44:10", "remaining_time": "0:11:14", "throughput": 5609.08, "total_tokens": 14864256}
|
|
{"current_steps": 30205, "total_steps": 37885, "loss": 0.0001, "lr": 2.401568145956537e-07, "epoch": 3.9864062293783817, "percentage": 79.73, "elapsed_time": "0:44:10", "remaining_time": "0:11:13", "throughput": 5609.21, "total_tokens": 14866432}
|
|
{"current_steps": 30210, "total_steps": 37885, "loss": 0.0001, "lr": 2.398573924415583e-07, "epoch": 3.9870661211561305, "percentage": 79.74, "elapsed_time": "0:44:10", "remaining_time": "0:11:13", "throughput": 5609.56, "total_tokens": 14869248}
|
|
{"current_steps": 30215, "total_steps": 37885, "loss": 0.0, "lr": 2.395581316222269e-07, "epoch": 3.9877260129338787, "percentage": 79.75, "elapsed_time": "0:44:11", "remaining_time": "0:11:12", "throughput": 5609.74, "total_tokens": 14871552}
|
|
{"current_steps": 30220, "total_steps": 37885, "loss": 0.0503, "lr": 2.3925903220117506e-07, "epoch": 3.9883859047116275, "percentage": 79.77, "elapsed_time": "0:44:11", "remaining_time": "0:11:12", "throughput": 5610.12, "total_tokens": 14874432}
|
|
{"current_steps": 30225, "total_steps": 37885, "loss": 0.0253, "lr": 2.389600942418848e-07, "epoch": 3.9890457964893757, "percentage": 79.78, "elapsed_time": "0:44:11", "remaining_time": "0:11:12", "throughput": 5610.31, "total_tokens": 14876800}
|
|
{"current_steps": 30230, "total_steps": 37885, "loss": 0.0, "lr": 2.386613178078035e-07, "epoch": 3.989705688267124, "percentage": 79.79, "elapsed_time": "0:44:12", "remaining_time": "0:11:11", "throughput": 5610.51, "total_tokens": 14879168}
|
|
{"current_steps": 30235, "total_steps": 37885, "loss": 0.0, "lr": 2.3836270296234463e-07, "epoch": 3.9903655800448727, "percentage": 79.81, "elapsed_time": "0:44:12", "remaining_time": "0:11:11", "throughput": 5610.77, "total_tokens": 14881728}
|
|
{"current_steps": 30240, "total_steps": 37885, "loss": 0.0001, "lr": 2.3806424976888639e-07, "epoch": 3.991025471822621, "percentage": 79.82, "elapsed_time": "0:44:12", "remaining_time": "0:11:10", "throughput": 5611.03, "total_tokens": 14884288}
|
|
{"current_steps": 30245, "total_steps": 37885, "loss": 0.0472, "lr": 2.3776595829077362e-07, "epoch": 3.9916853636003697, "percentage": 79.83, "elapsed_time": "0:44:13", "remaining_time": "0:11:10", "throughput": 5611.34, "total_tokens": 14886976}
|
|
{"current_steps": 30250, "total_steps": 37885, "loss": 0.0006, "lr": 2.3746782859131685e-07, "epoch": 3.992345255378118, "percentage": 79.85, "elapsed_time": "0:44:13", "remaining_time": "0:11:09", "throughput": 5611.62, "total_tokens": 14889600}
|
|
{"current_steps": 30255, "total_steps": 37885, "loss": 0.0565, "lr": 2.371698607337913e-07, "epoch": 3.9930051471558663, "percentage": 79.86, "elapsed_time": "0:44:13", "remaining_time": "0:11:09", "throughput": 5611.91, "total_tokens": 14892224}
|
|
{"current_steps": 30260, "total_steps": 37885, "loss": 0.0001, "lr": 2.368720547814389e-07, "epoch": 3.993665038933615, "percentage": 79.87, "elapsed_time": "0:44:14", "remaining_time": "0:11:08", "throughput": 5612.1, "total_tokens": 14894592}
|
|
{"current_steps": 30265, "total_steps": 37885, "loss": 0.0, "lr": 2.3657441079746698e-07, "epoch": 3.9943249307113633, "percentage": 79.89, "elapsed_time": "0:44:14", "remaining_time": "0:11:08", "throughput": 5612.43, "total_tokens": 14897344}
|
|
{"current_steps": 30270, "total_steps": 37885, "loss": 0.0411, "lr": 2.362769288450478e-07, "epoch": 3.994984822489112, "percentage": 79.9, "elapsed_time": "0:44:14", "remaining_time": "0:11:07", "throughput": 5612.69, "total_tokens": 14899904}
|
|
{"current_steps": 30275, "total_steps": 37885, "loss": 0.0, "lr": 2.3597960898731995e-07, "epoch": 3.9956447142668603, "percentage": 79.91, "elapsed_time": "0:44:15", "remaining_time": "0:11:07", "throughput": 5612.94, "total_tokens": 14902400}
|
|
{"current_steps": 30280, "total_steps": 37885, "loss": 0.0001, "lr": 2.356824512873876e-07, "epoch": 3.9963046060446086, "percentage": 79.93, "elapsed_time": "0:44:15", "remaining_time": "0:11:06", "throughput": 5613.11, "total_tokens": 14904704}
|
|
{"current_steps": 30285, "total_steps": 37885, "loss": 0.0009, "lr": 2.3538545580832047e-07, "epoch": 3.9969644978223573, "percentage": 79.94, "elapsed_time": "0:44:15", "remaining_time": "0:11:06", "throughput": 5613.46, "total_tokens": 14907520}
|
|
{"current_steps": 30290, "total_steps": 37885, "loss": 0.0414, "lr": 2.350886226131531e-07, "epoch": 3.9976243896001056, "percentage": 79.95, "elapsed_time": "0:44:16", "remaining_time": "0:11:05", "throughput": 5613.69, "total_tokens": 14909952}
|
|
{"current_steps": 30295, "total_steps": 37885, "loss": 0.0, "lr": 2.3479195176488664e-07, "epoch": 3.9982842813778543, "percentage": 79.97, "elapsed_time": "0:44:16", "remaining_time": "0:11:05", "throughput": 5613.99, "total_tokens": 14912640}
|
|
{"current_steps": 30300, "total_steps": 37885, "loss": 0.0626, "lr": 2.344954433264874e-07, "epoch": 3.9989441731556026, "percentage": 79.98, "elapsed_time": "0:44:16", "remaining_time": "0:11:05", "throughput": 5614.23, "total_tokens": 14915136}
|
|
{"current_steps": 30305, "total_steps": 37885, "loss": 0.0, "lr": 2.3419909736088672e-07, "epoch": 3.999604064933351, "percentage": 79.99, "elapsed_time": "0:44:16", "remaining_time": "0:11:04", "throughput": 5614.43, "total_tokens": 14917504}
|
|
{"current_steps": 30310, "total_steps": 37885, "loss": 0.0, "lr": 2.3390291393098215e-07, "epoch": 4.000263956711099, "percentage": 80.01, "elapsed_time": "0:44:17", "remaining_time": "0:11:04", "throughput": 5614.45, "total_tokens": 14919888}
|
|
{"current_steps": 30315, "total_steps": 37885, "loss": 0.0003, "lr": 2.3360689309963666e-07, "epoch": 4.000923848488847, "percentage": 80.02, "elapsed_time": "0:44:17", "remaining_time": "0:11:03", "throughput": 5614.77, "total_tokens": 14922640}
|
|
{"current_steps": 30320, "total_steps": 37885, "loss": 0.0002, "lr": 2.333110349296782e-07, "epoch": 4.001583740266597, "percentage": 80.03, "elapsed_time": "0:44:18", "remaining_time": "0:11:03", "throughput": 5614.95, "total_tokens": 14924944}
|
|
{"current_steps": 30320, "total_steps": 37885, "eval_loss": 0.1816491037607193, "epoch": 4.001583740266597, "percentage": 80.03, "elapsed_time": "0:44:25", "remaining_time": "0:11:05", "throughput": 5598.42, "total_tokens": 14924944}
|
|
{"current_steps": 30325, "total_steps": 37885, "loss": 0.0, "lr": 2.3301533948390072e-07, "epoch": 4.002243632044345, "percentage": 80.04, "elapsed_time": "0:45:03", "remaining_time": "0:11:13", "throughput": 5522.25, "total_tokens": 14927632}
|
|
{"current_steps": 30330, "total_steps": 37885, "loss": 0.0001, "lr": 2.3271980682506297e-07, "epoch": 4.002903523822093, "percentage": 80.06, "elapsed_time": "0:45:03", "remaining_time": "0:11:13", "throughput": 5522.49, "total_tokens": 14930128}
|
|
{"current_steps": 30335, "total_steps": 37885, "loss": 0.0, "lr": 2.3242443701589054e-07, "epoch": 4.003563415599841, "percentage": 80.07, "elapsed_time": "0:45:03", "remaining_time": "0:11:12", "throughput": 5522.76, "total_tokens": 14932688}
|
|
{"current_steps": 30340, "total_steps": 37885, "loss": 0.0002, "lr": 2.3212923011907305e-07, "epoch": 4.00422330737759, "percentage": 80.08, "elapsed_time": "0:45:04", "remaining_time": "0:11:12", "throughput": 5522.98, "total_tokens": 14935120}
|
|
{"current_steps": 30345, "total_steps": 37885, "loss": 0.0, "lr": 2.3183418619726523e-07, "epoch": 4.004883199155339, "percentage": 80.1, "elapsed_time": "0:45:04", "remaining_time": "0:11:12", "throughput": 5523.17, "total_tokens": 14937488}
|
|
{"current_steps": 30350, "total_steps": 37885, "loss": 0.0176, "lr": 2.3153930531308952e-07, "epoch": 4.005543090933087, "percentage": 80.11, "elapsed_time": "0:45:04", "remaining_time": "0:11:11", "throughput": 5523.42, "total_tokens": 14939984}
|
|
{"current_steps": 30355, "total_steps": 37885, "loss": 0.0, "lr": 2.3124458752913123e-07, "epoch": 4.006202982710835, "percentage": 80.12, "elapsed_time": "0:45:05", "remaining_time": "0:11:11", "throughput": 5523.64, "total_tokens": 14942416}
|
|
{"current_steps": 30360, "total_steps": 37885, "loss": 0.0006, "lr": 2.3095003290794258e-07, "epoch": 4.006862874488584, "percentage": 80.14, "elapsed_time": "0:45:05", "remaining_time": "0:11:10", "throughput": 5523.8, "total_tokens": 14944656}
|
|
{"current_steps": 30365, "total_steps": 37885, "loss": 0.0018, "lr": 2.306556415120401e-07, "epoch": 4.007522766266332, "percentage": 80.15, "elapsed_time": "0:45:05", "remaining_time": "0:11:10", "throughput": 5524.1, "total_tokens": 14947344}
|
|
{"current_steps": 30370, "total_steps": 37885, "loss": 0.0014, "lr": 2.3036141340390657e-07, "epoch": 4.008182658044081, "percentage": 80.16, "elapsed_time": "0:45:06", "remaining_time": "0:11:09", "throughput": 5524.28, "total_tokens": 14949648}
|
|
{"current_steps": 30375, "total_steps": 37885, "loss": 0.0001, "lr": 2.3006734864599008e-07, "epoch": 4.008842549821829, "percentage": 80.18, "elapsed_time": "0:45:06", "remaining_time": "0:11:09", "throughput": 5524.63, "total_tokens": 14952464}
|
|
{"current_steps": 30380, "total_steps": 37885, "loss": 0.0004, "lr": 2.2977344730070314e-07, "epoch": 4.009502441599578, "percentage": 80.19, "elapsed_time": "0:45:06", "remaining_time": "0:11:08", "throughput": 5524.86, "total_tokens": 14954896}
|
|
{"current_steps": 30385, "total_steps": 37885, "loss": 0.0504, "lr": 2.294797094304244e-07, "epoch": 4.010162333377326, "percentage": 80.2, "elapsed_time": "0:45:07", "remaining_time": "0:11:08", "throughput": 5525.13, "total_tokens": 14957456}
|
|
{"current_steps": 30390, "total_steps": 37885, "loss": 0.0, "lr": 2.2918613509749795e-07, "epoch": 4.010822225155074, "percentage": 80.22, "elapsed_time": "0:45:07", "remaining_time": "0:11:07", "throughput": 5525.28, "total_tokens": 14959696}
|
|
{"current_steps": 30395, "total_steps": 37885, "loss": 0.0001, "lr": 2.2889272436423233e-07, "epoch": 4.011482116932823, "percentage": 80.23, "elapsed_time": "0:45:07", "remaining_time": "0:11:07", "throughput": 5525.53, "total_tokens": 14962192}
|
|
{"current_steps": 30400, "total_steps": 37885, "loss": 0.0, "lr": 2.2859947729290207e-07, "epoch": 4.012142008710572, "percentage": 80.24, "elapsed_time": "0:45:08", "remaining_time": "0:11:06", "throughput": 5525.69, "total_tokens": 14964432}
|
|
{"current_steps": 30405, "total_steps": 37885, "loss": 0.0, "lr": 2.2830639394574657e-07, "epoch": 4.01280190048832, "percentage": 80.26, "elapsed_time": "0:45:08", "remaining_time": "0:11:06", "throughput": 5525.98, "total_tokens": 14967056}
|
|
{"current_steps": 30410, "total_steps": 37885, "loss": 0.0, "lr": 2.280134743849712e-07, "epoch": 4.013461792266068, "percentage": 80.27, "elapsed_time": "0:45:08", "remaining_time": "0:11:05", "throughput": 5526.14, "total_tokens": 14969296}
|
|
{"current_steps": 30415, "total_steps": 37885, "loss": 0.056, "lr": 2.2772071867274524e-07, "epoch": 4.0141216840438165, "percentage": 80.28, "elapsed_time": "0:45:09", "remaining_time": "0:11:05", "throughput": 5526.32, "total_tokens": 14971600}
|
|
{"current_steps": 30420, "total_steps": 37885, "loss": 0.0006, "lr": 2.2742812687120438e-07, "epoch": 4.014781575821566, "percentage": 80.3, "elapsed_time": "0:45:09", "remaining_time": "0:11:04", "throughput": 5526.49, "total_tokens": 14973904}
|
|
{"current_steps": 30425, "total_steps": 37885, "loss": 0.001, "lr": 2.2713569904244934e-07, "epoch": 4.015441467599314, "percentage": 80.31, "elapsed_time": "0:45:09", "remaining_time": "0:11:04", "throughput": 5526.81, "total_tokens": 14976592}
|
|
{"current_steps": 30430, "total_steps": 37885, "loss": 0.0005, "lr": 2.268434352485452e-07, "epoch": 4.016101359377062, "percentage": 80.32, "elapsed_time": "0:45:10", "remaining_time": "0:11:03", "throughput": 5527.16, "total_tokens": 14979408}
|
|
{"current_steps": 30435, "total_steps": 37885, "loss": 0.0003, "lr": 2.265513355515233e-07, "epoch": 4.0167612511548105, "percentage": 80.34, "elapsed_time": "0:45:10", "remaining_time": "0:11:03", "throughput": 5527.36, "total_tokens": 14981776}
|
|
{"current_steps": 30440, "total_steps": 37885, "loss": 0.0, "lr": 2.262594000133795e-07, "epoch": 4.017421142932559, "percentage": 80.35, "elapsed_time": "0:45:10", "remaining_time": "0:11:03", "throughput": 5527.59, "total_tokens": 14984208}
|
|
{"current_steps": 30445, "total_steps": 37885, "loss": 0.0007, "lr": 2.2596762869607521e-07, "epoch": 4.018081034710307, "percentage": 80.36, "elapsed_time": "0:45:11", "remaining_time": "0:11:02", "throughput": 5527.83, "total_tokens": 14986704}
|
|
{"current_steps": 30450, "total_steps": 37885, "loss": 0.0, "lr": 2.2567602166153653e-07, "epoch": 4.018740926488056, "percentage": 80.37, "elapsed_time": "0:45:11", "remaining_time": "0:11:02", "throughput": 5528.11, "total_tokens": 14989328}
|
|
{"current_steps": 30455, "total_steps": 37885, "loss": 0.0001, "lr": 2.2538457897165498e-07, "epoch": 4.0194008182658045, "percentage": 80.39, "elapsed_time": "0:45:11", "remaining_time": "0:11:01", "throughput": 5528.26, "total_tokens": 14991568}
|
|
{"current_steps": 30460, "total_steps": 37885, "loss": 0.028, "lr": 2.2509330068828748e-07, "epoch": 4.020060710043553, "percentage": 80.4, "elapsed_time": "0:45:12", "remaining_time": "0:11:01", "throughput": 5528.37, "total_tokens": 14993680}
|
|
{"current_steps": 30465, "total_steps": 37885, "loss": 0.0, "lr": 2.2480218687325515e-07, "epoch": 4.020720601821301, "percentage": 80.41, "elapsed_time": "0:45:12", "remaining_time": "0:11:00", "throughput": 5528.57, "total_tokens": 14996048}
|
|
{"current_steps": 30470, "total_steps": 37885, "loss": 0.0, "lr": 2.2451123758834512e-07, "epoch": 4.021380493599049, "percentage": 80.43, "elapsed_time": "0:45:12", "remaining_time": "0:11:00", "throughput": 5528.81, "total_tokens": 14998544}
|
|
{"current_steps": 30475, "total_steps": 37885, "loss": 0.0337, "lr": 2.2422045289530967e-07, "epoch": 4.0220403853767985, "percentage": 80.44, "elapsed_time": "0:45:13", "remaining_time": "0:10:59", "throughput": 5529.04, "total_tokens": 15000976}
|
|
{"current_steps": 30480, "total_steps": 37885, "loss": 0.0, "lr": 2.2392983285586487e-07, "epoch": 4.022700277154547, "percentage": 80.45, "elapsed_time": "0:45:13", "remaining_time": "0:10:59", "throughput": 5529.25, "total_tokens": 15003408}
|
|
{"current_steps": 30485, "total_steps": 37885, "loss": 0.0383, "lr": 2.2363937753169338e-07, "epoch": 4.023360168932295, "percentage": 80.47, "elapsed_time": "0:45:13", "remaining_time": "0:10:58", "throughput": 5529.49, "total_tokens": 15005904}
|
|
{"current_steps": 30490, "total_steps": 37885, "loss": 0.0239, "lr": 2.2334908698444188e-07, "epoch": 4.024020060710043, "percentage": 80.48, "elapsed_time": "0:45:14", "remaining_time": "0:10:58", "throughput": 5529.73, "total_tokens": 15008400}
|
|
{"current_steps": 30495, "total_steps": 37885, "loss": 0.0, "lr": 2.23058961275723e-07, "epoch": 4.024679952487792, "percentage": 80.49, "elapsed_time": "0:45:14", "remaining_time": "0:10:57", "throughput": 5529.99, "total_tokens": 15010960}
|
|
{"current_steps": 30500, "total_steps": 37885, "loss": 0.0, "lr": 2.2276900046711334e-07, "epoch": 4.025339844265541, "percentage": 80.51, "elapsed_time": "0:45:14", "remaining_time": "0:10:57", "throughput": 5530.21, "total_tokens": 15013392}
|
|
{"current_steps": 30505, "total_steps": 37885, "loss": 0.0005, "lr": 2.2247920462015458e-07, "epoch": 4.025999736043289, "percentage": 80.52, "elapsed_time": "0:45:15", "remaining_time": "0:10:56", "throughput": 5530.52, "total_tokens": 15016080}
|
|
{"current_steps": 30510, "total_steps": 37885, "loss": 0.0, "lr": 2.2218957379635483e-07, "epoch": 4.026659627821037, "percentage": 80.53, "elapsed_time": "0:45:15", "remaining_time": "0:10:56", "throughput": 5530.67, "total_tokens": 15018320}
|
|
{"current_steps": 30515, "total_steps": 37885, "loss": 0.0, "lr": 2.2190010805718528e-07, "epoch": 4.027319519598786, "percentage": 80.55, "elapsed_time": "0:45:15", "remaining_time": "0:10:55", "throughput": 5530.98, "total_tokens": 15021008}
|
|
{"current_steps": 30520, "total_steps": 37885, "loss": 0.0, "lr": 2.2161080746408345e-07, "epoch": 4.027979411376534, "percentage": 80.56, "elapsed_time": "0:45:16", "remaining_time": "0:10:55", "throughput": 5531.16, "total_tokens": 15023312}
|
|
{"current_steps": 30525, "total_steps": 37885, "loss": 0.0023, "lr": 2.2132167207845087e-07, "epoch": 4.028639303154283, "percentage": 80.57, "elapsed_time": "0:45:16", "remaining_time": "0:10:54", "throughput": 5531.31, "total_tokens": 15025552}
|
|
{"current_steps": 30530, "total_steps": 37885, "loss": 0.0, "lr": 2.2103270196165468e-07, "epoch": 4.029299194932031, "percentage": 80.59, "elapsed_time": "0:45:16", "remaining_time": "0:10:54", "throughput": 5531.59, "total_tokens": 15028176}
|
|
{"current_steps": 30535, "total_steps": 37885, "loss": 0.0, "lr": 2.2074389717502695e-07, "epoch": 4.02995908670978, "percentage": 80.6, "elapsed_time": "0:45:17", "remaining_time": "0:10:54", "throughput": 5531.76, "total_tokens": 15030416}
|
|
{"current_steps": 30540, "total_steps": 37885, "loss": 0.0008, "lr": 2.204552577798635e-07, "epoch": 4.030618978487528, "percentage": 80.61, "elapsed_time": "0:45:17", "remaining_time": "0:10:53", "throughput": 5531.94, "total_tokens": 15032720}
|
|
{"current_steps": 30545, "total_steps": 37885, "loss": 0.0004, "lr": 2.2016678383742714e-07, "epoch": 4.031278870265276, "percentage": 80.63, "elapsed_time": "0:45:17", "remaining_time": "0:10:53", "throughput": 5532.19, "total_tokens": 15035216}
|
|
{"current_steps": 30550, "total_steps": 37885, "loss": 0.0, "lr": 2.1987847540894378e-07, "epoch": 4.031938762043025, "percentage": 80.64, "elapsed_time": "0:45:18", "remaining_time": "0:10:52", "throughput": 5532.42, "total_tokens": 15037648}
|
|
{"current_steps": 30555, "total_steps": 37885, "loss": 0.0001, "lr": 2.1959033255560455e-07, "epoch": 4.032598653820774, "percentage": 80.65, "elapsed_time": "0:45:18", "remaining_time": "0:10:52", "throughput": 5532.62, "total_tokens": 15040016}
|
|
{"current_steps": 30560, "total_steps": 37885, "loss": 0.0009, "lr": 2.19302355338566e-07, "epoch": 4.033258545598522, "percentage": 80.67, "elapsed_time": "0:45:18", "remaining_time": "0:10:51", "throughput": 5532.94, "total_tokens": 15042768}
|
|
{"current_steps": 30565, "total_steps": 37885, "loss": 0.028, "lr": 2.1901454381894914e-07, "epoch": 4.03391843737627, "percentage": 80.68, "elapsed_time": "0:45:19", "remaining_time": "0:10:51", "throughput": 5533.11, "total_tokens": 15045008}
|
|
{"current_steps": 30570, "total_steps": 37885, "loss": 0.0, "lr": 2.1872689805784007e-07, "epoch": 4.0345783291540185, "percentage": 80.69, "elapsed_time": "0:45:19", "remaining_time": "0:10:50", "throughput": 5533.31, "total_tokens": 15047376}
|
|
{"current_steps": 30575, "total_steps": 37885, "loss": 0.0008, "lr": 2.1843941811628918e-07, "epoch": 4.035238220931767, "percentage": 80.7, "elapsed_time": "0:45:19", "remaining_time": "0:10:50", "throughput": 5533.62, "total_tokens": 15050064}
|
|
{"current_steps": 30580, "total_steps": 37885, "loss": 0.0, "lr": 2.1815210405531214e-07, "epoch": 4.035898112709516, "percentage": 80.72, "elapsed_time": "0:45:20", "remaining_time": "0:10:49", "throughput": 5533.86, "total_tokens": 15052560}
|
|
{"current_steps": 30585, "total_steps": 37885, "loss": 0.0, "lr": 2.1786495593588972e-07, "epoch": 4.036558004487264, "percentage": 80.73, "elapsed_time": "0:45:20", "remaining_time": "0:10:49", "throughput": 5534.1, "total_tokens": 15055056}
|
|
{"current_steps": 30590, "total_steps": 37885, "loss": 0.0, "lr": 2.1757797381896625e-07, "epoch": 4.0372178962650125, "percentage": 80.74, "elapsed_time": "0:45:20", "remaining_time": "0:10:48", "throughput": 5534.2, "total_tokens": 15057168}
|
|
{"current_steps": 30595, "total_steps": 37885, "loss": 0.0, "lr": 2.1729115776545192e-07, "epoch": 4.037877788042761, "percentage": 80.76, "elapsed_time": "0:45:21", "remaining_time": "0:10:48", "throughput": 5534.35, "total_tokens": 15059408}
|
|
{"current_steps": 30600, "total_steps": 37885, "loss": 0.0, "lr": 2.170045078362218e-07, "epoch": 4.038537679820509, "percentage": 80.77, "elapsed_time": "0:45:21", "remaining_time": "0:10:47", "throughput": 5534.63, "total_tokens": 15062032}
|
|
{"current_steps": 30605, "total_steps": 37885, "loss": 0.001, "lr": 2.167180240921145e-07, "epoch": 4.039197571598258, "percentage": 80.78, "elapsed_time": "0:45:21", "remaining_time": "0:10:47", "throughput": 5534.87, "total_tokens": 15064528}
|
|
{"current_steps": 30610, "total_steps": 37885, "loss": 0.0, "lr": 2.1643170659393461e-07, "epoch": 4.0398574633760065, "percentage": 80.8, "elapsed_time": "0:45:22", "remaining_time": "0:10:46", "throughput": 5535.01, "total_tokens": 15066704}
|
|
{"current_steps": 30615, "total_steps": 37885, "loss": 0.0014, "lr": 2.1614555540245083e-07, "epoch": 4.040517355153755, "percentage": 80.81, "elapsed_time": "0:45:22", "remaining_time": "0:10:46", "throughput": 5535.24, "total_tokens": 15069200}
|
|
{"current_steps": 30620, "total_steps": 37885, "loss": 0.0, "lr": 2.1585957057839688e-07, "epoch": 4.041177246931503, "percentage": 80.82, "elapsed_time": "0:45:22", "remaining_time": "0:10:46", "throughput": 5535.4, "total_tokens": 15071440}
|
|
{"current_steps": 30625, "total_steps": 37885, "loss": 0.0001, "lr": 2.1557375218247053e-07, "epoch": 4.041837138709251, "percentage": 80.84, "elapsed_time": "0:45:23", "remaining_time": "0:10:45", "throughput": 5535.72, "total_tokens": 15074192}
|
|
{"current_steps": 30630, "total_steps": 37885, "loss": 0.0, "lr": 2.1528810027533495e-07, "epoch": 4.0424970304870005, "percentage": 80.85, "elapsed_time": "0:45:23", "remaining_time": "0:10:45", "throughput": 5535.95, "total_tokens": 15076624}
|
|
{"current_steps": 30635, "total_steps": 37885, "loss": 0.0, "lr": 2.1500261491761796e-07, "epoch": 4.043156922264749, "percentage": 80.86, "elapsed_time": "0:45:23", "remaining_time": "0:10:44", "throughput": 5536.22, "total_tokens": 15079248}
|
|
{"current_steps": 30640, "total_steps": 37885, "loss": 0.0005, "lr": 2.1471729616991107e-07, "epoch": 4.043816814042497, "percentage": 80.88, "elapsed_time": "0:45:24", "remaining_time": "0:10:44", "throughput": 5536.38, "total_tokens": 15081488}
|
|
{"current_steps": 30645, "total_steps": 37885, "loss": 0.0, "lr": 2.1443214409277154e-07, "epoch": 4.044476705820245, "percentage": 80.89, "elapsed_time": "0:45:24", "remaining_time": "0:10:43", "throughput": 5536.58, "total_tokens": 15083856}
|
|
{"current_steps": 30650, "total_steps": 37885, "loss": 0.0004, "lr": 2.1414715874672117e-07, "epoch": 4.045136597597994, "percentage": 80.9, "elapsed_time": "0:45:24", "remaining_time": "0:10:43", "throughput": 5536.74, "total_tokens": 15086160}
|
|
{"current_steps": 30655, "total_steps": 37885, "loss": 0.0, "lr": 2.1386234019224525e-07, "epoch": 4.045796489375743, "percentage": 80.92, "elapsed_time": "0:45:25", "remaining_time": "0:10:42", "throughput": 5536.88, "total_tokens": 15088336}
|
|
{"current_steps": 30660, "total_steps": 37885, "loss": 0.0, "lr": 2.1357768848979518e-07, "epoch": 4.046456381153491, "percentage": 80.93, "elapsed_time": "0:45:25", "remaining_time": "0:10:42", "throughput": 5537.12, "total_tokens": 15090832}
|
|
{"current_steps": 30665, "total_steps": 37885, "loss": 0.0162, "lr": 2.1329320369978532e-07, "epoch": 4.047116272931239, "percentage": 80.94, "elapsed_time": "0:45:25", "remaining_time": "0:10:41", "throughput": 5537.38, "total_tokens": 15093392}
|
|
{"current_steps": 30670, "total_steps": 37885, "loss": 0.0, "lr": 2.130088858825967e-07, "epoch": 4.047776164708988, "percentage": 80.96, "elapsed_time": "0:45:26", "remaining_time": "0:10:41", "throughput": 5537.71, "total_tokens": 15096144}
|
|
{"current_steps": 30675, "total_steps": 37885, "loss": 0.0028, "lr": 2.1272473509857313e-07, "epoch": 4.048436056486736, "percentage": 80.97, "elapsed_time": "0:45:26", "remaining_time": "0:10:40", "throughput": 5537.89, "total_tokens": 15098512}
|
|
{"current_steps": 30680, "total_steps": 37885, "loss": 0.0188, "lr": 2.1244075140802298e-07, "epoch": 4.049095948264485, "percentage": 80.98, "elapsed_time": "0:45:26", "remaining_time": "0:10:40", "throughput": 5538.13, "total_tokens": 15101008}
|
|
{"current_steps": 30685, "total_steps": 37885, "loss": 0.0352, "lr": 2.1215693487122078e-07, "epoch": 4.049755840042233, "percentage": 81.0, "elapsed_time": "0:45:27", "remaining_time": "0:10:39", "throughput": 5538.41, "total_tokens": 15103632}
|
|
{"current_steps": 30690, "total_steps": 37885, "loss": 0.0001, "lr": 2.118732855484038e-07, "epoch": 4.050415731819982, "percentage": 81.01, "elapsed_time": "0:45:27", "remaining_time": "0:10:39", "throughput": 5538.65, "total_tokens": 15106128}
|
|
{"current_steps": 30695, "total_steps": 37885, "loss": 0.0564, "lr": 2.1158980349977496e-07, "epoch": 4.05107562359773, "percentage": 81.02, "elapsed_time": "0:45:27", "remaining_time": "0:10:38", "throughput": 5538.84, "total_tokens": 15108496}
|
|
{"current_steps": 30700, "total_steps": 37885, "loss": 0.0, "lr": 2.1130648878550095e-07, "epoch": 4.051735515375478, "percentage": 81.03, "elapsed_time": "0:45:28", "remaining_time": "0:10:38", "throughput": 5539.14, "total_tokens": 15111184}
|
|
{"current_steps": 30705, "total_steps": 37885, "loss": 0.0468, "lr": 2.1102334146571342e-07, "epoch": 4.052395407153226, "percentage": 81.05, "elapsed_time": "0:45:28", "remaining_time": "0:10:38", "throughput": 5539.47, "total_tokens": 15114000}
|
|
{"current_steps": 30710, "total_steps": 37885, "loss": 0.0, "lr": 2.1074036160050867e-07, "epoch": 4.053055298930976, "percentage": 81.06, "elapsed_time": "0:45:28", "remaining_time": "0:10:37", "throughput": 5539.62, "total_tokens": 15116240}
|
|
{"current_steps": 30715, "total_steps": 37885, "loss": 0.0352, "lr": 2.104575492499464e-07, "epoch": 4.053715190708724, "percentage": 81.07, "elapsed_time": "0:45:29", "remaining_time": "0:10:37", "throughput": 5539.89, "total_tokens": 15118864}
|
|
{"current_steps": 30720, "total_steps": 37885, "loss": 0.0, "lr": 2.1017490447405195e-07, "epoch": 4.054375082486472, "percentage": 81.09, "elapsed_time": "0:45:29", "remaining_time": "0:10:36", "throughput": 5540.19, "total_tokens": 15121552}
|
|
{"current_steps": 30725, "total_steps": 37885, "loss": 0.0, "lr": 2.0989242733281486e-07, "epoch": 4.05503497426422, "percentage": 81.1, "elapsed_time": "0:45:29", "remaining_time": "0:10:36", "throughput": 5540.33, "total_tokens": 15123792}
|
|
{"current_steps": 30730, "total_steps": 37885, "loss": 0.0002, "lr": 2.0961011788618833e-07, "epoch": 4.055694866041969, "percentage": 81.11, "elapsed_time": "0:45:30", "remaining_time": "0:10:35", "throughput": 5540.53, "total_tokens": 15126224}
|
|
{"current_steps": 30735, "total_steps": 37885, "loss": 0.0, "lr": 2.0932797619409058e-07, "epoch": 4.056354757819718, "percentage": 81.13, "elapsed_time": "0:45:30", "remaining_time": "0:10:35", "throughput": 5540.84, "total_tokens": 15128912}
|
|
{"current_steps": 30740, "total_steps": 37885, "loss": 0.0, "lr": 2.0904600231640435e-07, "epoch": 4.057014649597466, "percentage": 81.14, "elapsed_time": "0:45:30", "remaining_time": "0:10:34", "throughput": 5541.08, "total_tokens": 15131472}
|
|
{"current_steps": 30745, "total_steps": 37885, "loss": 0.0, "lr": 2.0876419631297682e-07, "epoch": 4.057674541375214, "percentage": 81.15, "elapsed_time": "0:45:31", "remaining_time": "0:10:34", "throughput": 5541.25, "total_tokens": 15133776}
|
|
{"current_steps": 30750, "total_steps": 37885, "loss": 0.0004, "lr": 2.084825582436186e-07, "epoch": 4.058334433152963, "percentage": 81.17, "elapsed_time": "0:45:31", "remaining_time": "0:10:33", "throughput": 5541.52, "total_tokens": 15136400}
|
|
{"current_steps": 30755, "total_steps": 37885, "loss": 0.0, "lr": 2.0820108816810565e-07, "epoch": 4.058994324930711, "percentage": 81.18, "elapsed_time": "0:45:31", "remaining_time": "0:10:33", "throughput": 5541.72, "total_tokens": 15138832}
|
|
{"current_steps": 30760, "total_steps": 37885, "loss": 0.0, "lr": 2.0791978614617834e-07, "epoch": 4.05965421670846, "percentage": 81.19, "elapsed_time": "0:45:32", "remaining_time": "0:10:32", "throughput": 5542.01, "total_tokens": 15141520}
|
|
{"current_steps": 30765, "total_steps": 37885, "loss": 0.0, "lr": 2.0763865223754028e-07, "epoch": 4.060314108486208, "percentage": 81.21, "elapsed_time": "0:45:32", "remaining_time": "0:10:32", "throughput": 5542.14, "total_tokens": 15143760}
|
|
{"current_steps": 30770, "total_steps": 37885, "loss": 0.0, "lr": 2.0735768650186058e-07, "epoch": 4.060974000263957, "percentage": 81.22, "elapsed_time": "0:45:32", "remaining_time": "0:10:31", "throughput": 5542.3, "total_tokens": 15146128}
|
|
{"current_steps": 30775, "total_steps": 37885, "loss": 0.0032, "lr": 2.0707688899877195e-07, "epoch": 4.061633892041705, "percentage": 81.23, "elapsed_time": "0:45:33", "remaining_time": "0:10:31", "throughput": 5542.57, "total_tokens": 15148752}
|
|
{"current_steps": 30780, "total_steps": 37885, "loss": 0.0164, "lr": 2.0679625978787196e-07, "epoch": 4.062293783819453, "percentage": 81.25, "elapsed_time": "0:45:33", "remaining_time": "0:10:30", "throughput": 5542.66, "total_tokens": 15150928}
|
|
{"current_steps": 30785, "total_steps": 37885, "loss": 0.0, "lr": 2.0651579892872173e-07, "epoch": 4.062953675597202, "percentage": 81.26, "elapsed_time": "0:45:33", "remaining_time": "0:10:30", "throughput": 5542.87, "total_tokens": 15153424}
|
|
{"current_steps": 30790, "total_steps": 37885, "loss": 0.0, "lr": 2.0623550648084719e-07, "epoch": 4.063613567374951, "percentage": 81.27, "elapsed_time": "0:45:34", "remaining_time": "0:10:30", "throughput": 5543.14, "total_tokens": 15156112}
|
|
{"current_steps": 30795, "total_steps": 37885, "loss": 0.0, "lr": 2.0595538250373868e-07, "epoch": 4.064273459152699, "percentage": 81.29, "elapsed_time": "0:45:34", "remaining_time": "0:10:29", "throughput": 5543.35, "total_tokens": 15158608}
|
|
{"current_steps": 30800, "total_steps": 37885, "loss": 0.0, "lr": 2.0567542705684992e-07, "epoch": 4.064933350930447, "percentage": 81.3, "elapsed_time": "0:45:34", "remaining_time": "0:10:29", "throughput": 5543.55, "total_tokens": 15161040}
|
|
{"current_steps": 30805, "total_steps": 37885, "loss": 0.0, "lr": 2.0539564019959965e-07, "epoch": 4.0655932427081956, "percentage": 81.31, "elapsed_time": "0:45:35", "remaining_time": "0:10:28", "throughput": 5543.84, "total_tokens": 15163792}
|
|
{"current_steps": 30810, "total_steps": 37885, "loss": 0.0, "lr": 2.05116021991371e-07, "epoch": 4.066253134485945, "percentage": 81.33, "elapsed_time": "0:45:35", "remaining_time": "0:10:28", "throughput": 5544.08, "total_tokens": 15166352}
|
|
{"current_steps": 30815, "total_steps": 37885, "loss": 0.0, "lr": 2.0483657249151043e-07, "epoch": 4.066913026263693, "percentage": 81.34, "elapsed_time": "0:45:35", "remaining_time": "0:10:27", "throughput": 5544.2, "total_tokens": 15168592}
|
|
{"current_steps": 30820, "total_steps": 37885, "loss": 0.0002, "lr": 2.045572917593291e-07, "epoch": 4.067572918041441, "percentage": 81.35, "elapsed_time": "0:45:36", "remaining_time": "0:10:27", "throughput": 5544.51, "total_tokens": 15171344}
|
|
{"current_steps": 30825, "total_steps": 37885, "loss": 0.0, "lr": 2.0427817985410245e-07, "epoch": 4.0682328098191896, "percentage": 81.36, "elapsed_time": "0:45:36", "remaining_time": "0:10:26", "throughput": 5544.7, "total_tokens": 15173776}
|
|
{"current_steps": 30830, "total_steps": 37885, "loss": 0.0007, "lr": 2.0399923683507026e-07, "epoch": 4.068892701596938, "percentage": 81.38, "elapsed_time": "0:45:36", "remaining_time": "0:10:26", "throughput": 5544.9, "total_tokens": 15176208}
|
|
{"current_steps": 30835, "total_steps": 37885, "loss": 0.0, "lr": 2.0372046276143596e-07, "epoch": 4.069552593374686, "percentage": 81.39, "elapsed_time": "0:45:37", "remaining_time": "0:10:25", "throughput": 5545.07, "total_tokens": 15178576}
|
|
{"current_steps": 30840, "total_steps": 37885, "loss": 0.0, "lr": 2.0344185769236654e-07, "epoch": 4.070212485152435, "percentage": 81.4, "elapsed_time": "0:45:37", "remaining_time": "0:10:25", "throughput": 5545.17, "total_tokens": 15180752}
|
|
{"current_steps": 30845, "total_steps": 37885, "loss": 0.0001, "lr": 2.0316342168699517e-07, "epoch": 4.070872376930184, "percentage": 81.42, "elapsed_time": "0:45:37", "remaining_time": "0:10:24", "throughput": 5545.38, "total_tokens": 15183248}
|
|
{"current_steps": 30850, "total_steps": 37885, "loss": 0.0001, "lr": 2.0288515480441714e-07, "epoch": 4.071532268707932, "percentage": 81.43, "elapsed_time": "0:45:38", "remaining_time": "0:10:24", "throughput": 5545.67, "total_tokens": 15185936}
|
|
{"current_steps": 30855, "total_steps": 37885, "loss": 0.061, "lr": 2.0260705710369296e-07, "epoch": 4.07219216048568, "percentage": 81.44, "elapsed_time": "0:45:38", "remaining_time": "0:10:23", "throughput": 5545.82, "total_tokens": 15188176}
|
|
{"current_steps": 30860, "total_steps": 37885, "loss": 0.0, "lr": 2.0232912864384644e-07, "epoch": 4.072852052263428, "percentage": 81.46, "elapsed_time": "0:45:39", "remaining_time": "0:10:23", "throughput": 5545.95, "total_tokens": 15190416}
|
|
{"current_steps": 30865, "total_steps": 37885, "loss": 0.0003, "lr": 2.0205136948386604e-07, "epoch": 4.073511944041178, "percentage": 81.47, "elapsed_time": "0:45:39", "remaining_time": "0:10:23", "throughput": 5546.16, "total_tokens": 15192848}
|
|
{"current_steps": 30870, "total_steps": 37885, "loss": 0.0, "lr": 2.0177377968270438e-07, "epoch": 4.074171835818926, "percentage": 81.48, "elapsed_time": "0:45:39", "remaining_time": "0:10:22", "throughput": 5546.52, "total_tokens": 15195728}
|
|
{"current_steps": 30875, "total_steps": 37885, "loss": 0.0, "lr": 2.0149635929927723e-07, "epoch": 4.074831727596674, "percentage": 81.5, "elapsed_time": "0:45:40", "remaining_time": "0:10:22", "throughput": 5546.81, "total_tokens": 15198416}
|
|
{"current_steps": 30880, "total_steps": 37885, "loss": 0.0, "lr": 2.0121910839246593e-07, "epoch": 4.075491619374422, "percentage": 81.51, "elapsed_time": "0:45:40", "remaining_time": "0:10:21", "throughput": 5547.04, "total_tokens": 15200912}
|
|
{"current_steps": 30885, "total_steps": 37885, "loss": 0.0, "lr": 2.0094202702111462e-07, "epoch": 4.076151511152171, "percentage": 81.52, "elapsed_time": "0:45:40", "remaining_time": "0:10:21", "throughput": 5547.21, "total_tokens": 15203280}
|
|
{"current_steps": 30890, "total_steps": 37885, "loss": 0.0, "lr": 2.006651152440315e-07, "epoch": 4.07681140292992, "percentage": 81.54, "elapsed_time": "0:45:41", "remaining_time": "0:10:20", "throughput": 5547.46, "total_tokens": 15205840}
|
|
{"current_steps": 30895, "total_steps": 37885, "loss": 0.0, "lr": 2.0038837311998945e-07, "epoch": 4.077471294707668, "percentage": 81.55, "elapsed_time": "0:45:41", "remaining_time": "0:10:20", "throughput": 5547.64, "total_tokens": 15208208}
|
|
{"current_steps": 30900, "total_steps": 37885, "loss": 0.0, "lr": 2.0011180070772472e-07, "epoch": 4.078131186485416, "percentage": 81.56, "elapsed_time": "0:45:41", "remaining_time": "0:10:19", "throughput": 5547.82, "total_tokens": 15210576}
|
|
{"current_steps": 30905, "total_steps": 37885, "loss": 0.0001, "lr": 1.998353980659383e-07, "epoch": 4.078791078263165, "percentage": 81.58, "elapsed_time": "0:45:42", "remaining_time": "0:10:19", "throughput": 5548.04, "total_tokens": 15213072}
|
|
{"current_steps": 30910, "total_steps": 37885, "loss": 0.0, "lr": 1.9955916525329396e-07, "epoch": 4.079450970040913, "percentage": 81.59, "elapsed_time": "0:45:42", "remaining_time": "0:10:18", "throughput": 5548.25, "total_tokens": 15215504}
|
|
{"current_steps": 30915, "total_steps": 37885, "loss": 0.0013, "lr": 1.992831023284205e-07, "epoch": 4.080110861818662, "percentage": 81.6, "elapsed_time": "0:45:42", "remaining_time": "0:10:18", "throughput": 5548.37, "total_tokens": 15217680}
|
|
{"current_steps": 30920, "total_steps": 37885, "loss": 0.0, "lr": 1.9900720934991055e-07, "epoch": 4.08077075359641, "percentage": 81.62, "elapsed_time": "0:45:43", "remaining_time": "0:10:17", "throughput": 5548.6, "total_tokens": 15220176}
|
|
{"current_steps": 30925, "total_steps": 37885, "loss": 0.0, "lr": 1.9873148637631977e-07, "epoch": 4.081430645374159, "percentage": 81.63, "elapsed_time": "0:45:43", "remaining_time": "0:10:17", "throughput": 5548.81, "total_tokens": 15222608}
|
|
{"current_steps": 30930, "total_steps": 37885, "loss": 0.13, "lr": 1.9845593346616861e-07, "epoch": 4.082090537151907, "percentage": 81.64, "elapsed_time": "0:45:43", "remaining_time": "0:10:16", "throughput": 5548.97, "total_tokens": 15224912}
|
|
{"current_steps": 30935, "total_steps": 37885, "loss": 0.0, "lr": 1.981805506779416e-07, "epoch": 4.082750428929655, "percentage": 81.66, "elapsed_time": "0:45:44", "remaining_time": "0:10:16", "throughput": 5549.15, "total_tokens": 15227280}
|
|
{"current_steps": 30940, "total_steps": 37885, "loss": 0.0, "lr": 1.9790533807008613e-07, "epoch": 4.083410320707404, "percentage": 81.67, "elapsed_time": "0:45:44", "remaining_time": "0:10:16", "throughput": 5549.3, "total_tokens": 15229520}
|
|
{"current_steps": 30945, "total_steps": 37885, "loss": 0.0, "lr": 1.976302957010143e-07, "epoch": 4.084070212485153, "percentage": 81.68, "elapsed_time": "0:45:44", "remaining_time": "0:10:15", "throughput": 5549.52, "total_tokens": 15232016}
|
|
{"current_steps": 30950, "total_steps": 37885, "loss": 0.0188, "lr": 1.9735542362910197e-07, "epoch": 4.084730104262901, "percentage": 81.69, "elapsed_time": "0:45:45", "remaining_time": "0:10:15", "throughput": 5549.69, "total_tokens": 15234320}
|
|
{"current_steps": 30955, "total_steps": 37885, "loss": 0.0998, "lr": 1.9708072191268886e-07, "epoch": 4.085389996040649, "percentage": 81.71, "elapsed_time": "0:45:45", "remaining_time": "0:10:14", "throughput": 5549.91, "total_tokens": 15236752}
|
|
{"current_steps": 30960, "total_steps": 37885, "loss": 0.0001, "lr": 1.9680619061007796e-07, "epoch": 4.0860498878183975, "percentage": 81.72, "elapsed_time": "0:45:45", "remaining_time": "0:10:14", "throughput": 5550.14, "total_tokens": 15239248}
|
|
{"current_steps": 30965, "total_steps": 37885, "loss": 0.0005, "lr": 1.9653182977953699e-07, "epoch": 4.086709779596147, "percentage": 81.73, "elapsed_time": "0:45:46", "remaining_time": "0:10:13", "throughput": 5550.36, "total_tokens": 15241680}
|
|
{"current_steps": 30970, "total_steps": 37885, "loss": 0.0001, "lr": 1.9625763947929698e-07, "epoch": 4.087369671373895, "percentage": 81.75, "elapsed_time": "0:45:46", "remaining_time": "0:10:13", "throughput": 5550.6, "total_tokens": 15244176}
|
|
{"current_steps": 30975, "total_steps": 37885, "loss": 0.0, "lr": 1.9598361976755252e-07, "epoch": 4.088029563151643, "percentage": 81.76, "elapsed_time": "0:45:46", "remaining_time": "0:10:12", "throughput": 5550.75, "total_tokens": 15246416}
|
|
{"current_steps": 30980, "total_steps": 37885, "loss": 0.0, "lr": 1.9570977070246254e-07, "epoch": 4.0886894549293915, "percentage": 81.77, "elapsed_time": "0:45:47", "remaining_time": "0:10:12", "throughput": 5550.9, "total_tokens": 15248656}
|
|
{"current_steps": 30985, "total_steps": 37885, "loss": 0.0066, "lr": 1.9543609234214987e-07, "epoch": 4.08934934670714, "percentage": 81.79, "elapsed_time": "0:45:47", "remaining_time": "0:10:11", "throughput": 5551.16, "total_tokens": 15251216}
|
|
{"current_steps": 30990, "total_steps": 37885, "loss": 0.0, "lr": 1.9516258474470005e-07, "epoch": 4.090009238484888, "percentage": 81.8, "elapsed_time": "0:45:47", "remaining_time": "0:10:11", "throughput": 5551.44, "total_tokens": 15253840}
|
|
{"current_steps": 30995, "total_steps": 37885, "loss": 0.0, "lr": 1.948892479681634e-07, "epoch": 4.090669130262637, "percentage": 81.81, "elapsed_time": "0:45:48", "remaining_time": "0:10:10", "throughput": 5551.7, "total_tokens": 15256400}
|
|
{"current_steps": 31000, "total_steps": 37885, "loss": 0.0, "lr": 1.946160820705538e-07, "epoch": 4.0913290220403855, "percentage": 81.83, "elapsed_time": "0:45:48", "remaining_time": "0:10:10", "throughput": 5551.86, "total_tokens": 15258640}
|
|
{"current_steps": 31005, "total_steps": 37885, "loss": 0.0176, "lr": 1.9434308710984893e-07, "epoch": 4.091988913818134, "percentage": 81.84, "elapsed_time": "0:45:48", "remaining_time": "0:10:09", "throughput": 5552.14, "total_tokens": 15261264}
|
|
{"current_steps": 31010, "total_steps": 37885, "loss": 0.0, "lr": 1.9407026314398966e-07, "epoch": 4.092648805595882, "percentage": 81.85, "elapsed_time": "0:45:49", "remaining_time": "0:10:09", "throughput": 5552.36, "total_tokens": 15263696}
|
|
{"current_steps": 31015, "total_steps": 37885, "loss": 0.0066, "lr": 1.9379761023088047e-07, "epoch": 4.09330869737363, "percentage": 81.87, "elapsed_time": "0:45:49", "remaining_time": "0:10:09", "throughput": 5552.62, "total_tokens": 15266256}
|
|
{"current_steps": 31020, "total_steps": 37885, "loss": 0.0, "lr": 1.9352512842839096e-07, "epoch": 4.0939685891513795, "percentage": 81.88, "elapsed_time": "0:45:49", "remaining_time": "0:10:08", "throughput": 5552.88, "total_tokens": 15268816}
|
|
{"current_steps": 31025, "total_steps": 37885, "loss": 0.0322, "lr": 1.9325281779435265e-07, "epoch": 4.094628480929128, "percentage": 81.89, "elapsed_time": "0:45:50", "remaining_time": "0:10:08", "throughput": 5553.1, "total_tokens": 15271248}
|
|
{"current_steps": 31030, "total_steps": 37885, "loss": 0.0, "lr": 1.9298067838656196e-07, "epoch": 4.095288372706876, "percentage": 81.91, "elapsed_time": "0:45:50", "remaining_time": "0:10:07", "throughput": 5553.41, "total_tokens": 15273936}
|
|
{"current_steps": 31035, "total_steps": 37885, "loss": 0.0, "lr": 1.9270871026277812e-07, "epoch": 4.095948264484624, "percentage": 81.92, "elapsed_time": "0:45:50", "remaining_time": "0:10:07", "throughput": 5553.69, "total_tokens": 15276560}
|
|
{"current_steps": 31040, "total_steps": 37885, "loss": 0.0, "lr": 1.9243691348072454e-07, "epoch": 4.096608156262373, "percentage": 81.93, "elapsed_time": "0:45:51", "remaining_time": "0:10:06", "throughput": 5553.97, "total_tokens": 15279184}
|
|
{"current_steps": 31045, "total_steps": 37885, "loss": 0.0, "lr": 1.9216528809808841e-07, "epoch": 4.097268048040122, "percentage": 81.95, "elapsed_time": "0:45:51", "remaining_time": "0:10:06", "throughput": 5554.13, "total_tokens": 15281424}
|
|
{"current_steps": 31050, "total_steps": 37885, "loss": 0.0, "lr": 1.918938341725198e-07, "epoch": 4.09792793981787, "percentage": 81.96, "elapsed_time": "0:45:51", "remaining_time": "0:10:05", "throughput": 5554.39, "total_tokens": 15283984}
|
|
{"current_steps": 31055, "total_steps": 37885, "loss": 0.0004, "lr": 1.91622551761633e-07, "epoch": 4.098587831595618, "percentage": 81.97, "elapsed_time": "0:45:52", "remaining_time": "0:10:05", "throughput": 5554.66, "total_tokens": 15286544}
|
|
{"current_steps": 31060, "total_steps": 37885, "loss": 0.0001, "lr": 1.9135144092300604e-07, "epoch": 4.099247723373367, "percentage": 81.98, "elapsed_time": "0:45:52", "remaining_time": "0:10:04", "throughput": 5554.9, "total_tokens": 15289040}
|
|
{"current_steps": 31065, "total_steps": 37885, "loss": 0.0, "lr": 1.9108050171417967e-07, "epoch": 4.099907615151115, "percentage": 82.0, "elapsed_time": "0:45:52", "remaining_time": "0:10:04", "throughput": 5555.2, "total_tokens": 15291728}
|
|
{"current_steps": 31070, "total_steps": 37885, "loss": 0.0, "lr": 1.9080973419265922e-07, "epoch": 4.100567506928864, "percentage": 82.01, "elapsed_time": "0:45:53", "remaining_time": "0:10:03", "throughput": 5555.42, "total_tokens": 15294160}
|
|
{"current_steps": 31075, "total_steps": 37885, "loss": 0.0095, "lr": 1.9053913841591285e-07, "epoch": 4.101227398706612, "percentage": 82.02, "elapsed_time": "0:45:53", "remaining_time": "0:10:03", "throughput": 5555.61, "total_tokens": 15296528}
|
|
{"current_steps": 31080, "total_steps": 37885, "loss": 0.0, "lr": 1.9026871444137306e-07, "epoch": 4.101887290484361, "percentage": 82.04, "elapsed_time": "0:45:53", "remaining_time": "0:10:02", "throughput": 5555.81, "total_tokens": 15298896}
|
|
{"current_steps": 31085, "total_steps": 37885, "loss": 0.0, "lr": 1.8999846232643468e-07, "epoch": 4.102547182262109, "percentage": 82.05, "elapsed_time": "0:45:54", "remaining_time": "0:10:02", "throughput": 5556.09, "total_tokens": 15301584}
|
|
{"current_steps": 31090, "total_steps": 37885, "loss": 0.0, "lr": 1.897283821284571e-07, "epoch": 4.103207074039857, "percentage": 82.06, "elapsed_time": "0:45:54", "remaining_time": "0:10:01", "throughput": 5556.36, "total_tokens": 15304208}
|
|
{"current_steps": 31095, "total_steps": 37885, "loss": 0.0226, "lr": 1.894584739047631e-07, "epoch": 4.103866965817606, "percentage": 82.08, "elapsed_time": "0:45:54", "remaining_time": "0:10:01", "throughput": 5556.61, "total_tokens": 15306768}
|
|
{"current_steps": 31100, "total_steps": 37885, "loss": 0.0, "lr": 1.8918873771263842e-07, "epoch": 4.104526857595355, "percentage": 82.09, "elapsed_time": "0:45:55", "remaining_time": "0:10:01", "throughput": 5556.81, "total_tokens": 15309200}
|
|
{"current_steps": 31105, "total_steps": 37885, "loss": 0.0, "lr": 1.8891917360933262e-07, "epoch": 4.105186749373103, "percentage": 82.1, "elapsed_time": "0:45:55", "remaining_time": "0:10:00", "throughput": 5557.01, "total_tokens": 15311632}
|
|
{"current_steps": 31110, "total_steps": 37885, "loss": 0.0, "lr": 1.8864978165205892e-07, "epoch": 4.105846641150851, "percentage": 82.12, "elapsed_time": "0:45:55", "remaining_time": "0:10:00", "throughput": 5557.17, "total_tokens": 15313936}
|
|
{"current_steps": 31115, "total_steps": 37885, "loss": 0.0, "lr": 1.8838056189799388e-07, "epoch": 4.1065065329285995, "percentage": 82.13, "elapsed_time": "0:45:56", "remaining_time": "0:09:59", "throughput": 5557.37, "total_tokens": 15316368}
|
|
{"current_steps": 31120, "total_steps": 37885, "loss": 0.0004, "lr": 1.881115144042771e-07, "epoch": 4.107166424706348, "percentage": 82.14, "elapsed_time": "0:45:56", "remaining_time": "0:09:59", "throughput": 5557.55, "total_tokens": 15318736}
|
|
{"current_steps": 31125, "total_steps": 37885, "loss": 0.0001, "lr": 1.8784263922801212e-07, "epoch": 4.107826316484097, "percentage": 82.16, "elapsed_time": "0:45:56", "remaining_time": "0:09:58", "throughput": 5557.81, "total_tokens": 15321360}
|
|
{"current_steps": 31130, "total_steps": 37885, "loss": 0.0001, "lr": 1.8757393642626606e-07, "epoch": 4.108486208261845, "percentage": 82.17, "elapsed_time": "0:45:57", "remaining_time": "0:09:58", "throughput": 5557.97, "total_tokens": 15323664}
|
|
{"current_steps": 31135, "total_steps": 37885, "loss": 0.0, "lr": 1.873054060560686e-07, "epoch": 4.1091461000395935, "percentage": 82.18, "elapsed_time": "0:45:57", "remaining_time": "0:09:57", "throughput": 5558.11, "total_tokens": 15325904}
|
|
{"current_steps": 31140, "total_steps": 37885, "loss": 0.0, "lr": 1.870370481744137e-07, "epoch": 4.109805991817342, "percentage": 82.2, "elapsed_time": "0:45:57", "remaining_time": "0:09:57", "throughput": 5558.27, "total_tokens": 15328208}
|
|
{"current_steps": 31145, "total_steps": 37885, "loss": 0.0, "lr": 1.8676886283825843e-07, "epoch": 4.11046588359509, "percentage": 82.21, "elapsed_time": "0:45:58", "remaining_time": "0:09:56", "throughput": 5558.5, "total_tokens": 15330704}
|
|
{"current_steps": 31150, "total_steps": 37885, "loss": 0.0, "lr": 1.8650085010452288e-07, "epoch": 4.111125775372839, "percentage": 82.22, "elapsed_time": "0:45:58", "remaining_time": "0:09:56", "throughput": 5558.67, "total_tokens": 15333072}
|
|
{"current_steps": 31155, "total_steps": 37885, "loss": 0.0011, "lr": 1.8623301003009106e-07, "epoch": 4.1117856671505875, "percentage": 82.24, "elapsed_time": "0:45:58", "remaining_time": "0:09:55", "throughput": 5558.84, "total_tokens": 15335440}
|
|
{"current_steps": 31160, "total_steps": 37885, "loss": 0.0001, "lr": 1.8596534267180998e-07, "epoch": 4.112445558928336, "percentage": 82.25, "elapsed_time": "0:45:59", "remaining_time": "0:09:55", "throughput": 5559.19, "total_tokens": 15338320}
|
|
{"current_steps": 31165, "total_steps": 37885, "loss": 0.0, "lr": 1.8569784808649035e-07, "epoch": 4.113105450706084, "percentage": 82.26, "elapsed_time": "0:45:59", "remaining_time": "0:09:55", "throughput": 5559.49, "total_tokens": 15341072}
|
|
{"current_steps": 31170, "total_steps": 37885, "loss": 0.0294, "lr": 1.8543052633090582e-07, "epoch": 4.113765342483832, "percentage": 82.28, "elapsed_time": "0:45:59", "remaining_time": "0:09:54", "throughput": 5559.68, "total_tokens": 15343504}
|
|
{"current_steps": 31175, "total_steps": 37885, "loss": 0.0266, "lr": 1.8516337746179288e-07, "epoch": 4.1144252342615815, "percentage": 82.29, "elapsed_time": "0:46:00", "remaining_time": "0:09:54", "throughput": 5559.94, "total_tokens": 15346128}
|
|
{"current_steps": 31180, "total_steps": 37885, "loss": 0.0, "lr": 1.8489640153585296e-07, "epoch": 4.11508512603933, "percentage": 82.3, "elapsed_time": "0:46:00", "remaining_time": "0:09:53", "throughput": 5560.21, "total_tokens": 15348752}
|
|
{"current_steps": 31185, "total_steps": 37885, "loss": 0.0, "lr": 1.8462959860974914e-07, "epoch": 4.115745017817078, "percentage": 82.31, "elapsed_time": "0:46:00", "remaining_time": "0:09:53", "throughput": 5560.34, "total_tokens": 15350992}
|
|
{"current_steps": 31190, "total_steps": 37885, "loss": 0.0, "lr": 1.843629687401085e-07, "epoch": 4.116404909594826, "percentage": 82.33, "elapsed_time": "0:46:01", "remaining_time": "0:09:52", "throughput": 5560.51, "total_tokens": 15353360}
|
|
{"current_steps": 31195, "total_steps": 37885, "loss": 0.0, "lr": 1.840965119835216e-07, "epoch": 4.117064801372575, "percentage": 82.34, "elapsed_time": "0:46:01", "remaining_time": "0:09:52", "throughput": 5560.73, "total_tokens": 15355856}
|
|
{"current_steps": 31200, "total_steps": 37885, "loss": 0.0, "lr": 1.838302283965415e-07, "epoch": 4.117724693150324, "percentage": 82.35, "elapsed_time": "0:46:01", "remaining_time": "0:09:51", "throughput": 5560.93, "total_tokens": 15358288}
|
|
{"current_steps": 31205, "total_steps": 37885, "loss": 0.0, "lr": 1.835641180356855e-07, "epoch": 4.118384584928072, "percentage": 82.37, "elapsed_time": "0:46:02", "remaining_time": "0:09:51", "throughput": 5561.09, "total_tokens": 15360592}
|
|
{"current_steps": 31210, "total_steps": 37885, "loss": 0.0001, "lr": 1.8329818095743265e-07, "epoch": 4.11904447670582, "percentage": 82.38, "elapsed_time": "0:46:02", "remaining_time": "0:09:50", "throughput": 5561.24, "total_tokens": 15362896}
|
|
{"current_steps": 31215, "total_steps": 37885, "loss": 0.0, "lr": 1.8303241721822737e-07, "epoch": 4.119704368483569, "percentage": 82.39, "elapsed_time": "0:46:02", "remaining_time": "0:09:50", "throughput": 5561.46, "total_tokens": 15365328}
|
|
{"current_steps": 31220, "total_steps": 37885, "loss": 0.0426, "lr": 1.8276682687447553e-07, "epoch": 4.120364260261317, "percentage": 82.41, "elapsed_time": "0:46:03", "remaining_time": "0:09:49", "throughput": 5561.64, "total_tokens": 15367632}
|
|
{"current_steps": 31225, "total_steps": 37885, "loss": 0.0, "lr": 1.825014099825466e-07, "epoch": 4.121024152039066, "percentage": 82.42, "elapsed_time": "0:46:03", "remaining_time": "0:09:49", "throughput": 5561.88, "total_tokens": 15370128}
|
|
{"current_steps": 31230, "total_steps": 37885, "loss": 0.0, "lr": 1.822361665987734e-07, "epoch": 4.121684043816814, "percentage": 82.43, "elapsed_time": "0:46:03", "remaining_time": "0:09:48", "throughput": 5562.14, "total_tokens": 15372688}
|
|
{"current_steps": 31235, "total_steps": 37885, "loss": 0.0, "lr": 1.819710967794521e-07, "epoch": 4.122343935594563, "percentage": 82.45, "elapsed_time": "0:46:04", "remaining_time": "0:09:48", "throughput": 5562.34, "total_tokens": 15375056}
|
|
{"current_steps": 31240, "total_steps": 37885, "loss": 0.0, "lr": 1.8170620058084208e-07, "epoch": 4.123003827372311, "percentage": 82.46, "elapsed_time": "0:46:04", "remaining_time": "0:09:48", "throughput": 5562.58, "total_tokens": 15377552}
|
|
{"current_steps": 31245, "total_steps": 37885, "loss": 0.0, "lr": 1.814414780591651e-07, "epoch": 4.123663719150059, "percentage": 82.47, "elapsed_time": "0:46:04", "remaining_time": "0:09:47", "throughput": 5562.78, "total_tokens": 15379920}
|
|
{"current_steps": 31250, "total_steps": 37885, "loss": 0.0, "lr": 1.811769292706068e-07, "epoch": 4.124323610927807, "percentage": 82.49, "elapsed_time": "0:46:05", "remaining_time": "0:09:47", "throughput": 5562.96, "total_tokens": 15382224}
|
|
{"current_steps": 31255, "total_steps": 37885, "loss": 0.0, "lr": 1.8091255427131614e-07, "epoch": 4.124983502705557, "percentage": 82.5, "elapsed_time": "0:46:05", "remaining_time": "0:09:46", "throughput": 5563.26, "total_tokens": 15384912}
|
|
{"current_steps": 31260, "total_steps": 37885, "loss": 0.0, "lr": 1.8064835311740422e-07, "epoch": 4.125643394483305, "percentage": 82.51, "elapsed_time": "0:46:05", "remaining_time": "0:09:46", "throughput": 5563.43, "total_tokens": 15387216}
|
|
{"current_steps": 31265, "total_steps": 37885, "loss": 0.0035, "lr": 1.80384325864946e-07, "epoch": 4.126303286261053, "percentage": 82.53, "elapsed_time": "0:46:06", "remaining_time": "0:09:45", "throughput": 5563.64, "total_tokens": 15389648}
|
|
{"current_steps": 31270, "total_steps": 37885, "loss": 0.0001, "lr": 1.8012047256997977e-07, "epoch": 4.126963178038801, "percentage": 82.54, "elapsed_time": "0:46:06", "remaining_time": "0:09:45", "throughput": 5563.92, "total_tokens": 15392272}
|
|
{"current_steps": 31275, "total_steps": 37885, "loss": 0.0, "lr": 1.798567932885059e-07, "epoch": 4.12762306981655, "percentage": 82.55, "elapsed_time": "0:46:06", "remaining_time": "0:09:44", "throughput": 5564.2, "total_tokens": 15394896}
|
|
{"current_steps": 31280, "total_steps": 37885, "loss": 0.0343, "lr": 1.7959328807648856e-07, "epoch": 4.128282961594299, "percentage": 82.57, "elapsed_time": "0:46:07", "remaining_time": "0:09:44", "throughput": 5564.51, "total_tokens": 15397584}
|
|
{"current_steps": 31285, "total_steps": 37885, "loss": 0.0004, "lr": 1.7932995698985486e-07, "epoch": 4.128942853372047, "percentage": 82.58, "elapsed_time": "0:46:07", "remaining_time": "0:09:43", "throughput": 5564.77, "total_tokens": 15400144}
|
|
{"current_steps": 31290, "total_steps": 37885, "loss": 0.0, "lr": 1.7906680008449536e-07, "epoch": 4.129602745149795, "percentage": 82.59, "elapsed_time": "0:46:07", "remaining_time": "0:09:43", "throughput": 5565.07, "total_tokens": 15402832}
|
|
{"current_steps": 31295, "total_steps": 37885, "loss": 0.0002, "lr": 1.788038174162625e-07, "epoch": 4.130262636927544, "percentage": 82.61, "elapsed_time": "0:46:08", "remaining_time": "0:09:42", "throughput": 5565.3, "total_tokens": 15405328}
|
|
{"current_steps": 31300, "total_steps": 37885, "loss": 0.0005, "lr": 1.785410090409727e-07, "epoch": 4.130922528705292, "percentage": 82.62, "elapsed_time": "0:46:08", "remaining_time": "0:09:42", "throughput": 5565.57, "total_tokens": 15407952}
|
|
{"current_steps": 31305, "total_steps": 37885, "loss": 0.0001, "lr": 1.7827837501440556e-07, "epoch": 4.131582420483041, "percentage": 82.63, "elapsed_time": "0:46:08", "remaining_time": "0:09:41", "throughput": 5565.77, "total_tokens": 15410320}
|
|
{"current_steps": 31310, "total_steps": 37885, "loss": 0.0001, "lr": 1.7801591539230255e-07, "epoch": 4.132242312260789, "percentage": 82.64, "elapsed_time": "0:46:09", "remaining_time": "0:09:41", "throughput": 5565.97, "total_tokens": 15412688}
|
|
{"current_steps": 31315, "total_steps": 37885, "loss": 0.0338, "lr": 1.7775363023036916e-07, "epoch": 4.132902204038538, "percentage": 82.66, "elapsed_time": "0:46:09", "remaining_time": "0:09:41", "throughput": 5566.16, "total_tokens": 15415056}
|
|
{"current_steps": 31320, "total_steps": 37885, "loss": 0.0, "lr": 1.7749151958427379e-07, "epoch": 4.133562095816286, "percentage": 82.67, "elapsed_time": "0:46:09", "remaining_time": "0:09:40", "throughput": 5566.37, "total_tokens": 15417488}
|
|
{"current_steps": 31325, "total_steps": 37885, "loss": 0.0, "lr": 1.77229583509647e-07, "epoch": 4.134221987594034, "percentage": 82.68, "elapsed_time": "0:46:10", "remaining_time": "0:09:40", "throughput": 5566.55, "total_tokens": 15419792}
|
|
{"current_steps": 31330, "total_steps": 37885, "loss": 0.0, "lr": 1.7696782206208306e-07, "epoch": 4.134881879371783, "percentage": 82.7, "elapsed_time": "0:46:10", "remaining_time": "0:09:39", "throughput": 5566.85, "total_tokens": 15422480}
|
|
{"current_steps": 31335, "total_steps": 37885, "loss": 0.0001, "lr": 1.767062352971389e-07, "epoch": 4.135541771149532, "percentage": 82.71, "elapsed_time": "0:46:10", "remaining_time": "0:09:39", "throughput": 5567.02, "total_tokens": 15424784}
|
|
{"current_steps": 31340, "total_steps": 37885, "loss": 0.0, "lr": 1.7644482327033484e-07, "epoch": 4.13620166292728, "percentage": 82.72, "elapsed_time": "0:46:11", "remaining_time": "0:09:38", "throughput": 5567.29, "total_tokens": 15427344}
|
|
{"current_steps": 31345, "total_steps": 37885, "loss": 0.0, "lr": 1.761835860371532e-07, "epoch": 4.136861554705028, "percentage": 82.74, "elapsed_time": "0:46:11", "remaining_time": "0:09:38", "throughput": 5567.61, "total_tokens": 15430096}
|
|
{"current_steps": 31350, "total_steps": 37885, "loss": 0.0, "lr": 1.759225236530394e-07, "epoch": 4.1375214464827765, "percentage": 82.75, "elapsed_time": "0:46:11", "remaining_time": "0:09:37", "throughput": 5567.91, "total_tokens": 15432784}
|
|
{"current_steps": 31355, "total_steps": 37885, "loss": 0.0, "lr": 1.756616361734029e-07, "epoch": 4.138181338260526, "percentage": 82.76, "elapsed_time": "0:46:12", "remaining_time": "0:09:37", "throughput": 5567.99, "total_tokens": 15434832}
|
|
{"current_steps": 31360, "total_steps": 37885, "loss": 0.0205, "lr": 1.754009236536146e-07, "epoch": 4.138841230038274, "percentage": 82.78, "elapsed_time": "0:46:12", "remaining_time": "0:09:36", "throughput": 5568.21, "total_tokens": 15437264}
|
|
{"current_steps": 31365, "total_steps": 37885, "loss": 0.0, "lr": 1.7514038614900905e-07, "epoch": 4.139501121816022, "percentage": 82.79, "elapsed_time": "0:46:12", "remaining_time": "0:09:36", "throughput": 5568.52, "total_tokens": 15439952}
|
|
{"current_steps": 31370, "total_steps": 37885, "loss": 0.0, "lr": 1.748800237148833e-07, "epoch": 4.1401610135937705, "percentage": 82.8, "elapsed_time": "0:46:13", "remaining_time": "0:09:35", "throughput": 5568.67, "total_tokens": 15442192}
|
|
{"current_steps": 31375, "total_steps": 37885, "loss": 0.0, "lr": 1.7461983640649736e-07, "epoch": 4.140820905371519, "percentage": 82.82, "elapsed_time": "0:46:13", "remaining_time": "0:09:35", "throughput": 5568.86, "total_tokens": 15444560}
|
|
{"current_steps": 31380, "total_steps": 37885, "loss": 0.0, "lr": 1.7435982427907446e-07, "epoch": 4.141480797149267, "percentage": 82.83, "elapsed_time": "0:46:13", "remaining_time": "0:09:34", "throughput": 5569.1, "total_tokens": 15447056}
|
|
{"current_steps": 31385, "total_steps": 37885, "loss": 0.0, "lr": 1.7409998738779962e-07, "epoch": 4.142140688927016, "percentage": 82.84, "elapsed_time": "0:46:14", "remaining_time": "0:09:34", "throughput": 5569.38, "total_tokens": 15449680}
|
|
{"current_steps": 31390, "total_steps": 37885, "loss": 0.0, "lr": 1.7384032578782216e-07, "epoch": 4.1428005807047645, "percentage": 82.86, "elapsed_time": "0:46:14", "remaining_time": "0:09:34", "throughput": 5569.57, "total_tokens": 15452048}
|
|
{"current_steps": 31395, "total_steps": 37885, "loss": 0.0, "lr": 1.7358083953425306e-07, "epoch": 4.143460472482513, "percentage": 82.87, "elapsed_time": "0:46:14", "remaining_time": "0:09:33", "throughput": 5569.87, "total_tokens": 15454736}
|
|
{"current_steps": 31400, "total_steps": 37885, "loss": 0.0001, "lr": 1.7332152868216598e-07, "epoch": 4.144120364260261, "percentage": 82.88, "elapsed_time": "0:46:15", "remaining_time": "0:09:33", "throughput": 5570.11, "total_tokens": 15457232}
|
|
{"current_steps": 31405, "total_steps": 37885, "loss": 0.0, "lr": 1.7306239328659822e-07, "epoch": 4.144780256038009, "percentage": 82.9, "elapsed_time": "0:46:15", "remaining_time": "0:09:32", "throughput": 5570.34, "total_tokens": 15459728}
|
|
{"current_steps": 31410, "total_steps": 37885, "loss": 0.0, "lr": 1.728034334025491e-07, "epoch": 4.1454401478157585, "percentage": 82.91, "elapsed_time": "0:46:15", "remaining_time": "0:09:32", "throughput": 5570.54, "total_tokens": 15462096}
|
|
{"current_steps": 31415, "total_steps": 37885, "loss": 0.0511, "lr": 1.7254464908498156e-07, "epoch": 4.146100039593507, "percentage": 82.92, "elapsed_time": "0:46:16", "remaining_time": "0:09:31", "throughput": 5570.82, "total_tokens": 15464720}
|
|
{"current_steps": 31420, "total_steps": 37885, "loss": 0.0003, "lr": 1.7228604038882e-07, "epoch": 4.146759931371255, "percentage": 82.94, "elapsed_time": "0:46:16", "remaining_time": "0:09:31", "throughput": 5571.0, "total_tokens": 15467024}
|
|
{"current_steps": 31425, "total_steps": 37885, "loss": 0.0, "lr": 1.720276073689525e-07, "epoch": 4.147419823149003, "percentage": 82.95, "elapsed_time": "0:46:16", "remaining_time": "0:09:30", "throughput": 5571.23, "total_tokens": 15469520}
|
|
{"current_steps": 31430, "total_steps": 37885, "loss": 0.0411, "lr": 1.7176935008022986e-07, "epoch": 4.148079714926752, "percentage": 82.96, "elapsed_time": "0:46:17", "remaining_time": "0:09:30", "throughput": 5571.4, "total_tokens": 15471824}
|
|
{"current_steps": 31435, "total_steps": 37885, "loss": 0.0001, "lr": 1.715112685774649e-07, "epoch": 4.148739606704501, "percentage": 82.97, "elapsed_time": "0:46:17", "remaining_time": "0:09:29", "throughput": 5571.53, "total_tokens": 15474000}
|
|
{"current_steps": 31440, "total_steps": 37885, "loss": 0.0, "lr": 1.7125336291543368e-07, "epoch": 4.149399498482249, "percentage": 82.99, "elapsed_time": "0:46:17", "remaining_time": "0:09:29", "throughput": 5571.78, "total_tokens": 15476560}
|
|
{"current_steps": 31445, "total_steps": 37885, "loss": 0.0426, "lr": 1.7099563314887498e-07, "epoch": 4.150059390259997, "percentage": 83.0, "elapsed_time": "0:46:17", "remaining_time": "0:09:28", "throughput": 5571.91, "total_tokens": 15478736}
|
|
{"current_steps": 31450, "total_steps": 37885, "loss": 0.0, "lr": 1.7073807933249008e-07, "epoch": 4.150719282037746, "percentage": 83.01, "elapsed_time": "0:46:18", "remaining_time": "0:09:28", "throughput": 5572.05, "total_tokens": 15480976}
|
|
{"current_steps": 31455, "total_steps": 37885, "loss": 0.0595, "lr": 1.7048070152094263e-07, "epoch": 4.151379173815494, "percentage": 83.03, "elapsed_time": "0:46:18", "remaining_time": "0:09:28", "throughput": 5572.3, "total_tokens": 15483536}
|
|
{"current_steps": 31460, "total_steps": 37885, "loss": 0.0001, "lr": 1.7022349976885941e-07, "epoch": 4.152039065593243, "percentage": 83.04, "elapsed_time": "0:46:18", "remaining_time": "0:09:27", "throughput": 5572.53, "total_tokens": 15486032}
|
|
{"current_steps": 31465, "total_steps": 37885, "loss": 0.0519, "lr": 1.6996647413082977e-07, "epoch": 4.152698957370991, "percentage": 83.05, "elapsed_time": "0:46:19", "remaining_time": "0:09:27", "throughput": 5572.88, "total_tokens": 15488912}
|
|
{"current_steps": 31470, "total_steps": 37885, "loss": 0.0, "lr": 1.6970962466140514e-07, "epoch": 4.15335884914874, "percentage": 83.07, "elapsed_time": "0:46:19", "remaining_time": "0:09:26", "throughput": 5573.1, "total_tokens": 15491408}
|
|
{"current_steps": 31475, "total_steps": 37885, "loss": 0.0, "lr": 1.6945295141510018e-07, "epoch": 4.154018740926488, "percentage": 83.08, "elapsed_time": "0:46:20", "remaining_time": "0:09:26", "throughput": 5573.29, "total_tokens": 15493776}
|
|
{"current_steps": 31480, "total_steps": 37885, "loss": 0.0, "lr": 1.691964544463922e-07, "epoch": 4.154678632704236, "percentage": 83.09, "elapsed_time": "0:46:20", "remaining_time": "0:09:25", "throughput": 5573.52, "total_tokens": 15496272}
|
|
{"current_steps": 31485, "total_steps": 37885, "loss": 0.0, "lr": 1.6894013380972028e-07, "epoch": 4.155338524481985, "percentage": 83.11, "elapsed_time": "0:46:20", "remaining_time": "0:09:25", "throughput": 5573.66, "total_tokens": 15498512}
|
|
{"current_steps": 31490, "total_steps": 37885, "loss": 0.0, "lr": 1.6868398955948693e-07, "epoch": 4.155998416259734, "percentage": 83.12, "elapsed_time": "0:46:21", "remaining_time": "0:09:24", "throughput": 5573.89, "total_tokens": 15501008}
|
|
{"current_steps": 31495, "total_steps": 37885, "loss": 0.0, "lr": 1.684280217500569e-07, "epoch": 4.156658308037482, "percentage": 83.13, "elapsed_time": "0:46:21", "remaining_time": "0:09:24", "throughput": 5574.05, "total_tokens": 15503312}
|
|
{"current_steps": 31500, "total_steps": 37885, "loss": 0.0005, "lr": 1.6817223043575768e-07, "epoch": 4.15731819981523, "percentage": 83.15, "elapsed_time": "0:46:21", "remaining_time": "0:09:23", "throughput": 5574.34, "total_tokens": 15506000}
|
|
{"current_steps": 31505, "total_steps": 37885, "loss": 0.0253, "lr": 1.6791661567087888e-07, "epoch": 4.1579780915929785, "percentage": 83.16, "elapsed_time": "0:46:22", "remaining_time": "0:09:23", "throughput": 5574.65, "total_tokens": 15508752}
|
|
{"current_steps": 31510, "total_steps": 37885, "loss": 0.0, "lr": 1.6766117750967244e-07, "epoch": 4.158637983370728, "percentage": 83.17, "elapsed_time": "0:46:22", "remaining_time": "0:09:22", "throughput": 5574.94, "total_tokens": 15511440}
|
|
{"current_steps": 31515, "total_steps": 37885, "loss": 0.0, "lr": 1.6740591600635433e-07, "epoch": 4.159297875148476, "percentage": 83.19, "elapsed_time": "0:46:22", "remaining_time": "0:09:22", "throughput": 5575.13, "total_tokens": 15513808}
|
|
{"current_steps": 31520, "total_steps": 37885, "loss": 0.0, "lr": 1.671508312151011e-07, "epoch": 4.159957766926224, "percentage": 83.2, "elapsed_time": "0:46:23", "remaining_time": "0:09:21", "throughput": 5575.42, "total_tokens": 15516496}
|
|
{"current_steps": 31525, "total_steps": 37885, "loss": 0.0645, "lr": 1.6689592319005296e-07, "epoch": 4.1606176587039725, "percentage": 83.21, "elapsed_time": "0:46:23", "remaining_time": "0:09:21", "throughput": 5575.66, "total_tokens": 15519056}
|
|
{"current_steps": 31530, "total_steps": 37885, "loss": 0.0001, "lr": 1.6664119198531245e-07, "epoch": 4.161277550481721, "percentage": 83.23, "elapsed_time": "0:46:23", "remaining_time": "0:09:21", "throughput": 5575.73, "total_tokens": 15521104}
|
|
{"current_steps": 31535, "total_steps": 37885, "loss": 0.0294, "lr": 1.6638663765494398e-07, "epoch": 4.161937442259469, "percentage": 83.24, "elapsed_time": "0:46:24", "remaining_time": "0:09:20", "throughput": 5575.88, "total_tokens": 15523344}
|
|
{"current_steps": 31540, "total_steps": 37885, "loss": 0.0, "lr": 1.6613226025297545e-07, "epoch": 4.162597334037218, "percentage": 83.25, "elapsed_time": "0:46:24", "remaining_time": "0:09:20", "throughput": 5576.11, "total_tokens": 15525840}
|
|
{"current_steps": 31545, "total_steps": 37885, "loss": 0.0, "lr": 1.6587805983339564e-07, "epoch": 4.1632572258149665, "percentage": 83.27, "elapsed_time": "0:46:24", "remaining_time": "0:09:19", "throughput": 5576.27, "total_tokens": 15528144}
|
|
{"current_steps": 31550, "total_steps": 37885, "loss": 0.0, "lr": 1.65624036450158e-07, "epoch": 4.163917117592715, "percentage": 83.28, "elapsed_time": "0:46:25", "remaining_time": "0:09:19", "throughput": 5576.46, "total_tokens": 15530512}
|
|
{"current_steps": 31555, "total_steps": 37885, "loss": 0.0, "lr": 1.6537019015717647e-07, "epoch": 4.164577009370463, "percentage": 83.29, "elapsed_time": "0:46:25", "remaining_time": "0:09:18", "throughput": 5576.64, "total_tokens": 15532880}
|
|
{"current_steps": 31560, "total_steps": 37885, "loss": 0.0, "lr": 1.6511652100832797e-07, "epoch": 4.165236901148211, "percentage": 83.3, "elapsed_time": "0:46:25", "remaining_time": "0:09:18", "throughput": 5576.89, "total_tokens": 15535440}
|
|
{"current_steps": 31565, "total_steps": 37885, "loss": 0.0, "lr": 1.648630290574522e-07, "epoch": 4.1658967929259605, "percentage": 83.32, "elapsed_time": "0:46:26", "remaining_time": "0:09:17", "throughput": 5577.13, "total_tokens": 15538000}
|
|
{"current_steps": 31570, "total_steps": 37885, "loss": 0.02, "lr": 1.646097143583508e-07, "epoch": 4.166556684703709, "percentage": 83.33, "elapsed_time": "0:46:26", "remaining_time": "0:09:17", "throughput": 5577.42, "total_tokens": 15540688}
|
|
{"current_steps": 31575, "total_steps": 37885, "loss": 0.0252, "lr": 1.6435657696478844e-07, "epoch": 4.167216576481457, "percentage": 83.34, "elapsed_time": "0:46:26", "remaining_time": "0:09:16", "throughput": 5577.61, "total_tokens": 15543120}
|
|
{"current_steps": 31580, "total_steps": 37885, "loss": 0.0112, "lr": 1.6410361693049114e-07, "epoch": 4.167876468259205, "percentage": 83.36, "elapsed_time": "0:46:27", "remaining_time": "0:09:16", "throughput": 5577.69, "total_tokens": 15545232}
|
|
{"current_steps": 31585, "total_steps": 37885, "loss": 0.0, "lr": 1.6385083430914792e-07, "epoch": 4.168536360036954, "percentage": 83.37, "elapsed_time": "0:46:27", "remaining_time": "0:09:15", "throughput": 5577.97, "total_tokens": 15547920}
|
|
{"current_steps": 31590, "total_steps": 37885, "loss": 0.0456, "lr": 1.6359822915441058e-07, "epoch": 4.169196251814703, "percentage": 83.38, "elapsed_time": "0:46:27", "remaining_time": "0:09:15", "throughput": 5578.13, "total_tokens": 15550224}
|
|
{"current_steps": 31595, "total_steps": 37885, "loss": 0.0, "lr": 1.6334580151989207e-07, "epoch": 4.169856143592451, "percentage": 83.4, "elapsed_time": "0:46:28", "remaining_time": "0:09:15", "throughput": 5578.32, "total_tokens": 15552656}
|
|
{"current_steps": 31600, "total_steps": 37885, "loss": 0.0, "lr": 1.630935514591686e-07, "epoch": 4.170516035370199, "percentage": 83.41, "elapsed_time": "0:46:28", "remaining_time": "0:09:14", "throughput": 5578.58, "total_tokens": 15555280}
|
|
{"current_steps": 31605, "total_steps": 37885, "loss": 0.0, "lr": 1.6284147902577872e-07, "epoch": 4.171175927147948, "percentage": 83.42, "elapsed_time": "0:46:28", "remaining_time": "0:09:14", "throughput": 5578.79, "total_tokens": 15557776}
|
|
{"current_steps": 31610, "total_steps": 37885, "loss": 0.0001, "lr": 1.6258958427322234e-07, "epoch": 4.171835818925696, "percentage": 83.44, "elapsed_time": "0:46:29", "remaining_time": "0:09:13", "throughput": 5578.99, "total_tokens": 15560208}
|
|
{"current_steps": 31615, "total_steps": 37885, "loss": 0.0002, "lr": 1.623378672549628e-07, "epoch": 4.172495710703445, "percentage": 83.45, "elapsed_time": "0:46:29", "remaining_time": "0:09:13", "throughput": 5579.23, "total_tokens": 15562768}
|
|
{"current_steps": 31620, "total_steps": 37885, "loss": 0.0, "lr": 1.620863280244249e-07, "epoch": 4.173155602481193, "percentage": 83.46, "elapsed_time": "0:46:29", "remaining_time": "0:09:12", "throughput": 5579.47, "total_tokens": 15565328}
|
|
{"current_steps": 31625, "total_steps": 37885, "loss": 0.0005, "lr": 1.6183496663499652e-07, "epoch": 4.173815494258942, "percentage": 83.48, "elapsed_time": "0:46:30", "remaining_time": "0:09:12", "throughput": 5579.62, "total_tokens": 15567632}
|
|
{"current_steps": 31630, "total_steps": 37885, "loss": 0.0, "lr": 1.6158378314002673e-07, "epoch": 4.17447538603669, "percentage": 83.49, "elapsed_time": "0:46:30", "remaining_time": "0:09:11", "throughput": 5579.82, "total_tokens": 15570064}
|
|
{"current_steps": 31635, "total_steps": 37885, "loss": 0.0, "lr": 1.613327775928276e-07, "epoch": 4.175135277814438, "percentage": 83.5, "elapsed_time": "0:46:30", "remaining_time": "0:09:11", "throughput": 5580.05, "total_tokens": 15572624}
|
|
{"current_steps": 31640, "total_steps": 37885, "loss": 0.0, "lr": 1.6108195004667357e-07, "epoch": 4.175795169592186, "percentage": 83.52, "elapsed_time": "0:46:31", "remaining_time": "0:09:10", "throughput": 5580.12, "total_tokens": 15574672}
|
|
{"current_steps": 31645, "total_steps": 37885, "loss": 0.0002, "lr": 1.6083130055480033e-07, "epoch": 4.176455061369936, "percentage": 83.53, "elapsed_time": "0:46:31", "remaining_time": "0:09:10", "throughput": 5580.44, "total_tokens": 15577488}
|
|
{"current_steps": 31650, "total_steps": 37885, "loss": 0.0, "lr": 1.6058082917040682e-07, "epoch": 4.177114953147684, "percentage": 83.54, "elapsed_time": "0:46:31", "remaining_time": "0:09:09", "throughput": 5580.64, "total_tokens": 15579920}
|
|
{"current_steps": 31655, "total_steps": 37885, "loss": 0.0, "lr": 1.6033053594665402e-07, "epoch": 4.177774844925432, "percentage": 83.56, "elapsed_time": "0:46:32", "remaining_time": "0:09:09", "throughput": 5580.81, "total_tokens": 15582224}
|
|
{"current_steps": 31660, "total_steps": 37885, "loss": 0.0, "lr": 1.6008042093666428e-07, "epoch": 4.17843473670318, "percentage": 83.57, "elapsed_time": "0:46:32", "remaining_time": "0:09:09", "throughput": 5581.02, "total_tokens": 15584656}
|
|
{"current_steps": 31665, "total_steps": 37885, "loss": 0.0, "lr": 1.5983048419352297e-07, "epoch": 4.179094628480929, "percentage": 83.58, "elapsed_time": "0:46:32", "remaining_time": "0:09:08", "throughput": 5581.21, "total_tokens": 15587024}
|
|
{"current_steps": 31670, "total_steps": 37885, "loss": 0.0002, "lr": 1.5958072577027738e-07, "epoch": 4.179754520258678, "percentage": 83.6, "elapsed_time": "0:46:33", "remaining_time": "0:09:08", "throughput": 5581.48, "total_tokens": 15589648}
|
|
{"current_steps": 31675, "total_steps": 37885, "loss": 0.0, "lr": 1.5933114571993712e-07, "epoch": 4.180414412036426, "percentage": 83.61, "elapsed_time": "0:46:33", "remaining_time": "0:09:07", "throughput": 5581.81, "total_tokens": 15592464}
|
|
{"current_steps": 31680, "total_steps": 37885, "loss": 0.0381, "lr": 1.5908174409547347e-07, "epoch": 4.181074303814174, "percentage": 83.62, "elapsed_time": "0:46:33", "remaining_time": "0:09:07", "throughput": 5582.07, "total_tokens": 15595024}
|
|
{"current_steps": 31685, "total_steps": 37885, "loss": 0.0677, "lr": 1.588325209498198e-07, "epoch": 4.181734195591923, "percentage": 83.63, "elapsed_time": "0:46:34", "remaining_time": "0:09:06", "throughput": 5582.34, "total_tokens": 15597648}
|
|
{"current_steps": 31690, "total_steps": 37885, "loss": 0.0, "lr": 1.5858347633587277e-07, "epoch": 4.182394087369671, "percentage": 83.65, "elapsed_time": "0:46:34", "remaining_time": "0:09:06", "throughput": 5582.59, "total_tokens": 15600208}
|
|
{"current_steps": 31695, "total_steps": 37885, "loss": 0.0, "lr": 1.5833461030648954e-07, "epoch": 4.18305397914742, "percentage": 83.66, "elapsed_time": "0:46:34", "remaining_time": "0:09:05", "throughput": 5582.84, "total_tokens": 15602768}
|
|
{"current_steps": 31700, "total_steps": 37885, "loss": 0.0207, "lr": 1.5808592291449074e-07, "epoch": 4.183713870925168, "percentage": 83.67, "elapsed_time": "0:46:35", "remaining_time": "0:09:05", "throughput": 5583.14, "total_tokens": 15605456}
|
|
{"current_steps": 31705, "total_steps": 37885, "loss": 0.0003, "lr": 1.5783741421265784e-07, "epoch": 4.184373762702917, "percentage": 83.69, "elapsed_time": "0:46:35", "remaining_time": "0:09:04", "throughput": 5583.4, "total_tokens": 15608016}
|
|
{"current_steps": 31710, "total_steps": 37885, "loss": 0.0, "lr": 1.575890842537353e-07, "epoch": 4.185033654480665, "percentage": 83.7, "elapsed_time": "0:46:35", "remaining_time": "0:09:04", "throughput": 5583.55, "total_tokens": 15610256}
|
|
{"current_steps": 31715, "total_steps": 37885, "loss": 0.0, "lr": 1.573409330904296e-07, "epoch": 4.185693546258413, "percentage": 83.71, "elapsed_time": "0:46:36", "remaining_time": "0:09:03", "throughput": 5583.76, "total_tokens": 15612688}
|
|
{"current_steps": 31720, "total_steps": 37885, "loss": 0.0579, "lr": 1.5709296077540835e-07, "epoch": 4.1863534380361624, "percentage": 83.73, "elapsed_time": "0:46:36", "remaining_time": "0:09:03", "throughput": 5584.06, "total_tokens": 15615376}
|
|
{"current_steps": 31725, "total_steps": 37885, "loss": 0.0441, "lr": 1.5684516736130283e-07, "epoch": 4.187013329813911, "percentage": 83.74, "elapsed_time": "0:46:36", "remaining_time": "0:09:03", "throughput": 5584.22, "total_tokens": 15617680}
|
|
{"current_steps": 31730, "total_steps": 37885, "loss": 0.0, "lr": 1.5659755290070453e-07, "epoch": 4.187673221591659, "percentage": 83.75, "elapsed_time": "0:46:37", "remaining_time": "0:09:02", "throughput": 5584.54, "total_tokens": 15620432}
|
|
{"current_steps": 31735, "total_steps": 37885, "loss": 0.0, "lr": 1.5635011744616854e-07, "epoch": 4.188333113369407, "percentage": 83.77, "elapsed_time": "0:46:37", "remaining_time": "0:09:02", "throughput": 5584.71, "total_tokens": 15622736}
|
|
{"current_steps": 31740, "total_steps": 37885, "loss": 0.0, "lr": 1.5610286105021063e-07, "epoch": 4.188993005147156, "percentage": 83.78, "elapsed_time": "0:46:37", "remaining_time": "0:09:01", "throughput": 5585.0, "total_tokens": 15625424}
|
|
{"current_steps": 31745, "total_steps": 37885, "loss": 0.0003, "lr": 1.5585578376530938e-07, "epoch": 4.189652896924905, "percentage": 83.79, "elapsed_time": "0:46:38", "remaining_time": "0:09:01", "throughput": 5585.23, "total_tokens": 15627920}
|
|
{"current_steps": 31750, "total_steps": 37885, "loss": 0.0, "lr": 1.556088856439055e-07, "epoch": 4.190312788702653, "percentage": 83.81, "elapsed_time": "0:46:38", "remaining_time": "0:09:00", "throughput": 5585.44, "total_tokens": 15630352}
|
|
{"current_steps": 31755, "total_steps": 37885, "loss": 0.0518, "lr": 1.5536216673840084e-07, "epoch": 4.190972680480401, "percentage": 83.82, "elapsed_time": "0:46:38", "remaining_time": "0:09:00", "throughput": 5585.68, "total_tokens": 15632848}
|
|
{"current_steps": 31760, "total_steps": 37885, "loss": 0.0, "lr": 1.551156271011599e-07, "epoch": 4.19163257225815, "percentage": 83.83, "elapsed_time": "0:46:39", "remaining_time": "0:08:59", "throughput": 5585.91, "total_tokens": 15635344}
|
|
{"current_steps": 31765, "total_steps": 37885, "loss": 0.0, "lr": 1.5486926678450907e-07, "epoch": 4.192292464035898, "percentage": 83.85, "elapsed_time": "0:46:39", "remaining_time": "0:08:59", "throughput": 5586.14, "total_tokens": 15637840}
|
|
{"current_steps": 31770, "total_steps": 37885, "loss": 0.0, "lr": 1.5462308584073625e-07, "epoch": 4.192952355813647, "percentage": 83.86, "elapsed_time": "0:46:39", "remaining_time": "0:08:58", "throughput": 5586.35, "total_tokens": 15640272}
|
|
{"current_steps": 31775, "total_steps": 37885, "loss": 0.0, "lr": 1.5437708432209174e-07, "epoch": 4.193612247591395, "percentage": 83.87, "elapsed_time": "0:46:40", "remaining_time": "0:08:58", "throughput": 5586.6, "total_tokens": 15642832}
|
|
{"current_steps": 31780, "total_steps": 37885, "loss": 0.0, "lr": 1.5413126228078755e-07, "epoch": 4.194272139369144, "percentage": 83.89, "elapsed_time": "0:46:40", "remaining_time": "0:08:57", "throughput": 5586.78, "total_tokens": 15645136}
|
|
{"current_steps": 31785, "total_steps": 37885, "loss": 0.0, "lr": 1.5388561976899784e-07, "epoch": 4.194932031146892, "percentage": 83.9, "elapsed_time": "0:46:40", "remaining_time": "0:08:57", "throughput": 5586.92, "total_tokens": 15647376}
|
|
{"current_steps": 31790, "total_steps": 37885, "loss": 0.0, "lr": 1.53640156838858e-07, "epoch": 4.19559192292464, "percentage": 83.91, "elapsed_time": "0:46:41", "remaining_time": "0:08:57", "throughput": 5587.07, "total_tokens": 15649616}
|
|
{"current_steps": 31795, "total_steps": 37885, "loss": 0.0, "lr": 1.5339487354246605e-07, "epoch": 4.196251814702388, "percentage": 83.93, "elapsed_time": "0:46:41", "remaining_time": "0:08:56", "throughput": 5587.28, "total_tokens": 15652048}
|
|
{"current_steps": 31800, "total_steps": 37885, "loss": 0.001, "lr": 1.5314976993188177e-07, "epoch": 4.196911706480138, "percentage": 83.94, "elapsed_time": "0:46:41", "remaining_time": "0:08:56", "throughput": 5587.43, "total_tokens": 15654288}
|
|
{"current_steps": 31805, "total_steps": 37885, "loss": 0.0, "lr": 1.5290484605912624e-07, "epoch": 4.197571598257886, "percentage": 83.95, "elapsed_time": "0:46:42", "remaining_time": "0:08:55", "throughput": 5587.66, "total_tokens": 15656784}
|
|
{"current_steps": 31810, "total_steps": 37885, "loss": 0.0, "lr": 1.5266010197618296e-07, "epoch": 4.198231490035634, "percentage": 83.96, "elapsed_time": "0:46:42", "remaining_time": "0:08:55", "throughput": 5587.97, "total_tokens": 15659536}
|
|
{"current_steps": 31815, "total_steps": 37885, "loss": 0.001, "lr": 1.5241553773499727e-07, "epoch": 4.198891381813382, "percentage": 83.98, "elapsed_time": "0:46:42", "remaining_time": "0:08:54", "throughput": 5588.12, "total_tokens": 15661776}
|
|
{"current_steps": 31820, "total_steps": 37885, "loss": 0.0, "lr": 1.5217115338747577e-07, "epoch": 4.199551273591131, "percentage": 83.99, "elapsed_time": "0:46:43", "remaining_time": "0:08:54", "throughput": 5588.33, "total_tokens": 15664208}
|
|
{"current_steps": 31825, "total_steps": 37885, "loss": 0.0132, "lr": 1.5192694898548742e-07, "epoch": 4.20021116536888, "percentage": 84.0, "elapsed_time": "0:46:43", "remaining_time": "0:08:53", "throughput": 5588.51, "total_tokens": 15666576}
|
|
{"current_steps": 31830, "total_steps": 37885, "loss": 0.0, "lr": 1.5168292458086286e-07, "epoch": 4.200871057146628, "percentage": 84.02, "elapsed_time": "0:46:43", "remaining_time": "0:08:53", "throughput": 5588.68, "total_tokens": 15668880}
|
|
{"current_steps": 31835, "total_steps": 37885, "loss": 0.028, "lr": 1.5143908022539487e-07, "epoch": 4.201530948924376, "percentage": 84.03, "elapsed_time": "0:46:44", "remaining_time": "0:08:52", "throughput": 5588.83, "total_tokens": 15671120}
|
|
{"current_steps": 31840, "total_steps": 37885, "loss": 0.0001, "lr": 1.5119541597083718e-07, "epoch": 4.202190840702125, "percentage": 84.04, "elapsed_time": "0:46:44", "remaining_time": "0:08:52", "throughput": 5588.99, "total_tokens": 15673424}
|
|
{"current_steps": 31845, "total_steps": 37885, "loss": 0.0, "lr": 1.5095193186890554e-07, "epoch": 4.202850732479873, "percentage": 84.06, "elapsed_time": "0:46:44", "remaining_time": "0:08:51", "throughput": 5589.29, "total_tokens": 15676112}
|
|
{"current_steps": 31850, "total_steps": 37885, "loss": 0.0006, "lr": 1.5070862797127847e-07, "epoch": 4.203510624257622, "percentage": 84.07, "elapsed_time": "0:46:45", "remaining_time": "0:08:51", "throughput": 5589.52, "total_tokens": 15678608}
|
|
{"current_steps": 31855, "total_steps": 37885, "loss": 0.0074, "lr": 1.504655043295948e-07, "epoch": 4.20417051603537, "percentage": 84.08, "elapsed_time": "0:46:45", "remaining_time": "0:08:51", "throughput": 5589.7, "total_tokens": 15680976}
|
|
{"current_steps": 31860, "total_steps": 37885, "loss": 0.0, "lr": 1.5022256099545594e-07, "epoch": 4.204830407813119, "percentage": 84.1, "elapsed_time": "0:46:45", "remaining_time": "0:08:50", "throughput": 5589.88, "total_tokens": 15683280}
|
|
{"current_steps": 31865, "total_steps": 37885, "loss": 0.0, "lr": 1.4997979802042515e-07, "epoch": 4.205490299590867, "percentage": 84.11, "elapsed_time": "0:46:45", "remaining_time": "0:08:50", "throughput": 5590.06, "total_tokens": 15685648}
|
|
{"current_steps": 31870, "total_steps": 37885, "loss": 0.0, "lr": 1.4973721545602668e-07, "epoch": 4.206150191368615, "percentage": 84.12, "elapsed_time": "0:46:46", "remaining_time": "0:08:49", "throughput": 5590.34, "total_tokens": 15688272}
|
|
{"current_steps": 31875, "total_steps": 37885, "loss": 0.0001, "lr": 1.4949481335374736e-07, "epoch": 4.206810083146364, "percentage": 84.14, "elapsed_time": "0:46:46", "remaining_time": "0:08:49", "throughput": 5590.56, "total_tokens": 15690768}
|
|
{"current_steps": 31880, "total_steps": 37885, "loss": 0.0, "lr": 1.4925259176503446e-07, "epoch": 4.207469974924113, "percentage": 84.15, "elapsed_time": "0:46:46", "remaining_time": "0:08:48", "throughput": 5590.85, "total_tokens": 15693456}
|
|
{"current_steps": 31885, "total_steps": 37885, "loss": 0.0, "lr": 1.4901055074129888e-07, "epoch": 4.208129866701861, "percentage": 84.16, "elapsed_time": "0:46:47", "remaining_time": "0:08:48", "throughput": 5591.06, "total_tokens": 15695888}
|
|
{"current_steps": 31890, "total_steps": 37885, "loss": 0.0, "lr": 1.487686903339115e-07, "epoch": 4.208789758479609, "percentage": 84.18, "elapsed_time": "0:46:47", "remaining_time": "0:08:47", "throughput": 5591.19, "total_tokens": 15698064}
|
|
{"current_steps": 31895, "total_steps": 37885, "loss": 0.0, "lr": 1.4852701059420526e-07, "epoch": 4.2094496502573575, "percentage": 84.19, "elapsed_time": "0:46:47", "remaining_time": "0:08:47", "throughput": 5591.36, "total_tokens": 15700368}
|
|
{"current_steps": 31900, "total_steps": 37885, "loss": 0.0, "lr": 1.4828551157347514e-07, "epoch": 4.210109542035106, "percentage": 84.2, "elapsed_time": "0:46:48", "remaining_time": "0:08:46", "throughput": 5591.59, "total_tokens": 15702864}
|
|
{"current_steps": 31905, "total_steps": 37885, "loss": 0.0, "lr": 1.4804419332297746e-07, "epoch": 4.210769433812855, "percentage": 84.22, "elapsed_time": "0:46:48", "remaining_time": "0:08:46", "throughput": 5591.74, "total_tokens": 15705104}
|
|
{"current_steps": 31910, "total_steps": 37885, "loss": 0.0, "lr": 1.478030558939307e-07, "epoch": 4.211429325590603, "percentage": 84.23, "elapsed_time": "0:46:48", "remaining_time": "0:08:45", "throughput": 5591.89, "total_tokens": 15707344}
|
|
{"current_steps": 31915, "total_steps": 37885, "loss": 0.0, "lr": 1.4756209933751396e-07, "epoch": 4.2120892173683515, "percentage": 84.24, "elapsed_time": "0:46:49", "remaining_time": "0:08:45", "throughput": 5592.14, "total_tokens": 15709904}
|
|
{"current_steps": 31920, "total_steps": 37885, "loss": 0.0, "lr": 1.4732132370486872e-07, "epoch": 4.2127491091461, "percentage": 84.25, "elapsed_time": "0:46:49", "remaining_time": "0:08:45", "throughput": 5592.33, "total_tokens": 15712272}
|
|
{"current_steps": 31925, "total_steps": 37885, "loss": 0.0, "lr": 1.4708072904709812e-07, "epoch": 4.213409000923848, "percentage": 84.27, "elapsed_time": "0:46:49", "remaining_time": "0:08:44", "throughput": 5592.6, "total_tokens": 15714896}
|
|
{"current_steps": 31930, "total_steps": 37885, "loss": 0.0011, "lr": 1.468403154152663e-07, "epoch": 4.214068892701597, "percentage": 84.28, "elapsed_time": "0:46:50", "remaining_time": "0:08:44", "throughput": 5592.85, "total_tokens": 15717456}
|
|
{"current_steps": 31935, "total_steps": 37885, "loss": 0.0113, "lr": 1.4660008286039937e-07, "epoch": 4.2147287844793455, "percentage": 84.29, "elapsed_time": "0:46:50", "remaining_time": "0:08:43", "throughput": 5593.1, "total_tokens": 15720016}
|
|
{"current_steps": 31940, "total_steps": 37885, "loss": 0.0, "lr": 1.4636003143348518e-07, "epoch": 4.215388676257094, "percentage": 84.31, "elapsed_time": "0:46:50", "remaining_time": "0:08:43", "throughput": 5593.27, "total_tokens": 15722320}
|
|
{"current_steps": 31945, "total_steps": 37885, "loss": 0.0, "lr": 1.4612016118547265e-07, "epoch": 4.216048568034842, "percentage": 84.32, "elapsed_time": "0:46:51", "remaining_time": "0:08:42", "throughput": 5593.51, "total_tokens": 15724816}
|
|
{"current_steps": 31950, "total_steps": 37885, "loss": 0.0396, "lr": 1.4588047216727251e-07, "epoch": 4.21670845981259, "percentage": 84.33, "elapsed_time": "0:46:51", "remaining_time": "0:08:42", "throughput": 5593.79, "total_tokens": 15727440}
|
|
{"current_steps": 31955, "total_steps": 37885, "loss": 0.0, "lr": 1.4564096442975715e-07, "epoch": 4.2173683515903395, "percentage": 84.35, "elapsed_time": "0:46:51", "remaining_time": "0:08:41", "throughput": 5593.96, "total_tokens": 15729744}
|
|
{"current_steps": 31960, "total_steps": 37885, "loss": 0.0, "lr": 1.454016380237605e-07, "epoch": 4.218028243368088, "percentage": 84.36, "elapsed_time": "0:46:52", "remaining_time": "0:08:41", "throughput": 5594.21, "total_tokens": 15732304}
|
|
{"current_steps": 31965, "total_steps": 37885, "loss": 0.0, "lr": 1.4516249300007743e-07, "epoch": 4.218688135145836, "percentage": 84.37, "elapsed_time": "0:46:52", "remaining_time": "0:08:40", "throughput": 5594.38, "total_tokens": 15734608}
|
|
{"current_steps": 31970, "total_steps": 37885, "loss": 0.0, "lr": 1.4492352940946506e-07, "epoch": 4.219348026923584, "percentage": 84.39, "elapsed_time": "0:46:52", "remaining_time": "0:08:40", "throughput": 5594.57, "total_tokens": 15736976}
|
|
{"current_steps": 31975, "total_steps": 37885, "loss": 0.0019, "lr": 1.4468474730264168e-07, "epoch": 4.220007918701333, "percentage": 84.4, "elapsed_time": "0:46:53", "remaining_time": "0:08:39", "throughput": 5594.86, "total_tokens": 15739664}
|
|
{"current_steps": 31980, "total_steps": 37885, "loss": 0.0, "lr": 1.4444614673028687e-07, "epoch": 4.220667810479082, "percentage": 84.41, "elapsed_time": "0:46:53", "remaining_time": "0:08:39", "throughput": 5595.07, "total_tokens": 15742096}
|
|
{"current_steps": 31985, "total_steps": 37885, "loss": 0.0, "lr": 1.442077277430419e-07, "epoch": 4.22132770225683, "percentage": 84.43, "elapsed_time": "0:46:53", "remaining_time": "0:08:39", "throughput": 5595.26, "total_tokens": 15744464}
|
|
{"current_steps": 31990, "total_steps": 37885, "loss": 0.0, "lr": 1.4396949039150984e-07, "epoch": 4.221987594034578, "percentage": 84.44, "elapsed_time": "0:46:54", "remaining_time": "0:08:38", "throughput": 5595.46, "total_tokens": 15746896}
|
|
{"current_steps": 31995, "total_steps": 37885, "loss": 0.0, "lr": 1.4373143472625438e-07, "epoch": 4.222647485812327, "percentage": 84.45, "elapsed_time": "0:46:54", "remaining_time": "0:08:38", "throughput": 5595.63, "total_tokens": 15749200}
|
|
{"current_steps": 32000, "total_steps": 37885, "loss": 0.0, "lr": 1.4349356079780116e-07, "epoch": 4.223307377590075, "percentage": 84.47, "elapsed_time": "0:46:54", "remaining_time": "0:08:37", "throughput": 5595.86, "total_tokens": 15751696}
|
|
{"current_steps": 32005, "total_steps": 37885, "loss": 0.0308, "lr": 1.432558686566374e-07, "epoch": 4.223967269367824, "percentage": 84.48, "elapsed_time": "0:46:55", "remaining_time": "0:08:37", "throughput": 5596.11, "total_tokens": 15754256}
|
|
{"current_steps": 32010, "total_steps": 37885, "loss": 0.0323, "lr": 1.4301835835321175e-07, "epoch": 4.224627161145572, "percentage": 84.49, "elapsed_time": "0:46:55", "remaining_time": "0:08:36", "throughput": 5596.42, "total_tokens": 15757008}
|
|
{"current_steps": 32015, "total_steps": 37885, "loss": 0.0, "lr": 1.4278102993793362e-07, "epoch": 4.225287052923321, "percentage": 84.51, "elapsed_time": "0:46:55", "remaining_time": "0:08:36", "throughput": 5596.59, "total_tokens": 15759312}
|
|
{"current_steps": 32020, "total_steps": 37885, "loss": 0.0, "lr": 1.4254388346117408e-07, "epoch": 4.225946944701069, "percentage": 84.52, "elapsed_time": "0:46:56", "remaining_time": "0:08:35", "throughput": 5596.76, "total_tokens": 15761616}
|
|
{"current_steps": 32025, "total_steps": 37885, "loss": 0.0, "lr": 1.423069189732664e-07, "epoch": 4.226606836478817, "percentage": 84.53, "elapsed_time": "0:46:56", "remaining_time": "0:08:35", "throughput": 5597.01, "total_tokens": 15764176}
|
|
{"current_steps": 32030, "total_steps": 37885, "loss": 0.0042, "lr": 1.4207013652450405e-07, "epoch": 4.227266728256566, "percentage": 84.55, "elapsed_time": "0:46:56", "remaining_time": "0:08:34", "throughput": 5597.26, "total_tokens": 15766736}
|
|
{"current_steps": 32035, "total_steps": 37885, "loss": 0.0023, "lr": 1.4183353616514293e-07, "epoch": 4.227926620034315, "percentage": 84.56, "elapsed_time": "0:46:57", "remaining_time": "0:08:34", "throughput": 5597.55, "total_tokens": 15769424}
|
|
{"current_steps": 32040, "total_steps": 37885, "loss": 0.0, "lr": 1.415971179453991e-07, "epoch": 4.228586511812063, "percentage": 84.57, "elapsed_time": "0:46:57", "remaining_time": "0:08:33", "throughput": 5597.88, "total_tokens": 15772240}
|
|
{"current_steps": 32045, "total_steps": 37885, "loss": 0.0001, "lr": 1.4136088191545083e-07, "epoch": 4.229246403589811, "percentage": 84.58, "elapsed_time": "0:46:57", "remaining_time": "0:08:33", "throughput": 5598.07, "total_tokens": 15774608}
|
|
{"current_steps": 32050, "total_steps": 37885, "loss": 0.0, "lr": 1.411248281254379e-07, "epoch": 4.2299062953675595, "percentage": 84.6, "elapsed_time": "0:46:58", "remaining_time": "0:08:33", "throughput": 5598.29, "total_tokens": 15777040}
|
|
{"current_steps": 32055, "total_steps": 37885, "loss": 0.0, "lr": 1.408889566254603e-07, "epoch": 4.230566187145308, "percentage": 84.61, "elapsed_time": "0:46:58", "remaining_time": "0:08:32", "throughput": 5598.49, "total_tokens": 15779472}
|
|
{"current_steps": 32060, "total_steps": 37885, "loss": 0.0, "lr": 1.4065326746558092e-07, "epoch": 4.231226078923057, "percentage": 84.62, "elapsed_time": "0:46:58", "remaining_time": "0:08:32", "throughput": 5598.7, "total_tokens": 15781904}
|
|
{"current_steps": 32065, "total_steps": 37885, "loss": 0.0, "lr": 1.4041776069582233e-07, "epoch": 4.231885970700805, "percentage": 84.64, "elapsed_time": "0:46:59", "remaining_time": "0:08:31", "throughput": 5599.0, "total_tokens": 15784592}
|
|
{"current_steps": 32070, "total_steps": 37885, "loss": 0.0, "lr": 1.4018243636616967e-07, "epoch": 4.2325458624785535, "percentage": 84.65, "elapsed_time": "0:46:59", "remaining_time": "0:08:31", "throughput": 5599.2, "total_tokens": 15787024}
|
|
{"current_steps": 32075, "total_steps": 37885, "loss": 0.0, "lr": 1.399472945265684e-07, "epoch": 4.233205754256302, "percentage": 84.66, "elapsed_time": "0:46:59", "remaining_time": "0:08:30", "throughput": 5599.41, "total_tokens": 15789456}
|
|
{"current_steps": 32080, "total_steps": 37885, "loss": 0.0176, "lr": 1.397123352269257e-07, "epoch": 4.23386564603405, "percentage": 84.68, "elapsed_time": "0:47:00", "remaining_time": "0:08:30", "throughput": 5599.62, "total_tokens": 15791888}
|
|
{"current_steps": 32085, "total_steps": 37885, "loss": 0.0002, "lr": 1.3947755851711053e-07, "epoch": 4.234525537811799, "percentage": 84.69, "elapsed_time": "0:47:00", "remaining_time": "0:08:29", "throughput": 5599.77, "total_tokens": 15794128}
|
|
{"current_steps": 32090, "total_steps": 37885, "loss": 0.0, "lr": 1.3924296444695194e-07, "epoch": 4.2351854295895475, "percentage": 84.7, "elapsed_time": "0:47:00", "remaining_time": "0:08:29", "throughput": 5599.89, "total_tokens": 15796304}
|
|
{"current_steps": 32095, "total_steps": 37885, "loss": 0.0, "lr": 1.3900855306624093e-07, "epoch": 4.235845321367296, "percentage": 84.72, "elapsed_time": "0:47:01", "remaining_time": "0:08:28", "throughput": 5600.12, "total_tokens": 15798800}
|
|
{"current_steps": 32100, "total_steps": 37885, "loss": 0.0007, "lr": 1.387743244247299e-07, "epoch": 4.236505213145044, "percentage": 84.73, "elapsed_time": "0:47:01", "remaining_time": "0:08:28", "throughput": 5600.38, "total_tokens": 15801424}
|
|
{"current_steps": 32105, "total_steps": 37885, "loss": 0.0, "lr": 1.385402785721319e-07, "epoch": 4.237165104922792, "percentage": 84.74, "elapsed_time": "0:47:01", "remaining_time": "0:08:28", "throughput": 5600.71, "total_tokens": 15804240}
|
|
{"current_steps": 32110, "total_steps": 37885, "loss": 0.0, "lr": 1.3830641555812162e-07, "epoch": 4.2378249967005415, "percentage": 84.76, "elapsed_time": "0:47:02", "remaining_time": "0:08:27", "throughput": 5600.88, "total_tokens": 15806544}
|
|
{"current_steps": 32115, "total_steps": 37885, "loss": 0.0268, "lr": 1.3807273543233466e-07, "epoch": 4.23848488847829, "percentage": 84.77, "elapsed_time": "0:47:02", "remaining_time": "0:08:27", "throughput": 5601.27, "total_tokens": 15809552}
|
|
{"current_steps": 32120, "total_steps": 37885, "loss": 0.0001, "lr": 1.3783923824436817e-07, "epoch": 4.239144780256038, "percentage": 84.78, "elapsed_time": "0:47:02", "remaining_time": "0:08:26", "throughput": 5601.48, "total_tokens": 15811984}
|
|
{"current_steps": 32125, "total_steps": 37885, "loss": 0.0, "lr": 1.3760592404377991e-07, "epoch": 4.239804672033786, "percentage": 84.8, "elapsed_time": "0:47:03", "remaining_time": "0:08:26", "throughput": 5601.75, "total_tokens": 15814608}
|
|
{"current_steps": 32130, "total_steps": 37885, "loss": 0.1054, "lr": 1.373727928800894e-07, "epoch": 4.240464563811535, "percentage": 84.81, "elapsed_time": "0:47:03", "remaining_time": "0:08:25", "throughput": 5601.96, "total_tokens": 15817040}
|
|
{"current_steps": 32135, "total_steps": 37885, "loss": 0.0323, "lr": 1.3713984480277708e-07, "epoch": 4.241124455589284, "percentage": 84.82, "elapsed_time": "0:47:03", "remaining_time": "0:08:25", "throughput": 5602.21, "total_tokens": 15819600}
|
|
{"current_steps": 32140, "total_steps": 37885, "loss": 0.0, "lr": 1.3690707986128414e-07, "epoch": 4.241784347367032, "percentage": 84.84, "elapsed_time": "0:47:04", "remaining_time": "0:08:24", "throughput": 5602.6, "total_tokens": 15822608}
|
|
{"current_steps": 32145, "total_steps": 37885, "loss": 0.0, "lr": 1.3667449810501353e-07, "epoch": 4.24244423914478, "percentage": 84.85, "elapsed_time": "0:47:04", "remaining_time": "0:08:24", "throughput": 5602.91, "total_tokens": 15825360}
|
|
{"current_steps": 32150, "total_steps": 37885, "loss": 0.0, "lr": 1.3644209958332908e-07, "epoch": 4.243104130922529, "percentage": 84.86, "elapsed_time": "0:47:04", "remaining_time": "0:08:23", "throughput": 5603.12, "total_tokens": 15827792}
|
|
{"current_steps": 32155, "total_steps": 37885, "loss": 0.0253, "lr": 1.3620988434555546e-07, "epoch": 4.243764022700277, "percentage": 84.88, "elapsed_time": "0:47:05", "remaining_time": "0:08:23", "throughput": 5603.32, "total_tokens": 15830224}
|
|
{"current_steps": 32160, "total_steps": 37885, "loss": 0.0381, "lr": 1.3597785244097882e-07, "epoch": 4.244423914478026, "percentage": 84.89, "elapsed_time": "0:47:05", "remaining_time": "0:08:22", "throughput": 5603.54, "total_tokens": 15832720}
|
|
{"current_steps": 32165, "total_steps": 37885, "loss": 0.0, "lr": 1.3574600391884627e-07, "epoch": 4.245083806255774, "percentage": 84.9, "elapsed_time": "0:47:05", "remaining_time": "0:08:22", "throughput": 5603.75, "total_tokens": 15835152}
|
|
{"current_steps": 32170, "total_steps": 37885, "loss": 0.0, "lr": 1.3551433882836615e-07, "epoch": 4.245743698033523, "percentage": 84.91, "elapsed_time": "0:47:06", "remaining_time": "0:08:22", "throughput": 5603.98, "total_tokens": 15837648}
|
|
{"current_steps": 32175, "total_steps": 37885, "loss": 0.0, "lr": 1.3528285721870747e-07, "epoch": 4.246403589811271, "percentage": 84.93, "elapsed_time": "0:47:06", "remaining_time": "0:08:21", "throughput": 5604.13, "total_tokens": 15839888}
|
|
{"current_steps": 32180, "total_steps": 37885, "loss": 0.0, "lr": 1.3505155913900012e-07, "epoch": 4.247063481589019, "percentage": 84.94, "elapsed_time": "0:47:06", "remaining_time": "0:08:21", "throughput": 5604.44, "total_tokens": 15842640}
|
|
{"current_steps": 32185, "total_steps": 37885, "loss": 0.0411, "lr": 1.3482044463833632e-07, "epoch": 4.247723373366767, "percentage": 84.95, "elapsed_time": "0:47:07", "remaining_time": "0:08:20", "throughput": 5604.65, "total_tokens": 15845072}
|
|
{"current_steps": 32190, "total_steps": 37885, "loss": 0.0046, "lr": 1.3458951376576778e-07, "epoch": 4.248383265144517, "percentage": 84.97, "elapsed_time": "0:47:07", "remaining_time": "0:08:20", "throughput": 5604.85, "total_tokens": 15847504}
|
|
{"current_steps": 32195, "total_steps": 37885, "loss": 0.0, "lr": 1.343587665703082e-07, "epoch": 4.249043156922265, "percentage": 84.98, "elapsed_time": "0:47:07", "remaining_time": "0:08:19", "throughput": 5605.1, "total_tokens": 15850064}
|
|
{"current_steps": 32200, "total_steps": 37885, "loss": 0.0, "lr": 1.341282031009321e-07, "epoch": 4.249703048700013, "percentage": 84.99, "elapsed_time": "0:47:08", "remaining_time": "0:08:19", "throughput": 5605.39, "total_tokens": 15852752}
|
|
{"current_steps": 32205, "total_steps": 37885, "loss": 0.0442, "lr": 1.338978234065745e-07, "epoch": 4.250362940477761, "percentage": 85.01, "elapsed_time": "0:47:08", "remaining_time": "0:08:18", "throughput": 5605.56, "total_tokens": 15855056}
|
|
{"current_steps": 32210, "total_steps": 37885, "loss": 0.0143, "lr": 1.3366762753613236e-07, "epoch": 4.25102283225551, "percentage": 85.02, "elapsed_time": "0:47:08", "remaining_time": "0:08:18", "throughput": 5605.77, "total_tokens": 15857488}
|
|
{"current_steps": 32215, "total_steps": 37885, "loss": 0.0087, "lr": 1.3343761553846222e-07, "epoch": 4.251682724033259, "percentage": 85.03, "elapsed_time": "0:47:09", "remaining_time": "0:08:17", "throughput": 5605.98, "total_tokens": 15859920}
|
|
{"current_steps": 32215, "total_steps": 37885, "eval_loss": 0.24730534851551056, "epoch": 4.251682724033259, "percentage": 85.03, "elapsed_time": "0:47:16", "remaining_time": "0:08:19", "throughput": 5590.54, "total_tokens": 15859920}
|
|
{"current_steps": 32220, "total_steps": 37885, "loss": 0.0, "lr": 1.332077874623836e-07, "epoch": 4.252342615811007, "percentage": 85.05, "elapsed_time": "0:47:51", "remaining_time": "0:08:24", "throughput": 5523.6, "total_tokens": 15862480}
|
|
{"current_steps": 32225, "total_steps": 37885, "loss": 0.0577, "lr": 1.3297814335667523e-07, "epoch": 4.253002507588755, "percentage": 85.06, "elapsed_time": "0:47:52", "remaining_time": "0:08:24", "throughput": 5523.93, "total_tokens": 15865296}
|
|
{"current_steps": 32230, "total_steps": 37885, "loss": 0.0548, "lr": 1.3274868327007715e-07, "epoch": 4.253662399366504, "percentage": 85.07, "elapsed_time": "0:47:52", "remaining_time": "0:08:23", "throughput": 5524.1, "total_tokens": 15867600}
|
|
{"current_steps": 32235, "total_steps": 37885, "loss": 0.0122, "lr": 1.3251940725129108e-07, "epoch": 4.254322291144252, "percentage": 85.09, "elapsed_time": "0:47:52", "remaining_time": "0:08:23", "throughput": 5524.31, "total_tokens": 15870032}
|
|
{"current_steps": 32240, "total_steps": 37885, "loss": 0.0, "lr": 1.3229031534897882e-07, "epoch": 4.254982182922001, "percentage": 85.1, "elapsed_time": "0:47:53", "remaining_time": "0:08:23", "throughput": 5524.51, "total_tokens": 15872464}
|
|
{"current_steps": 32245, "total_steps": 37885, "loss": 0.0, "lr": 1.320614076117641e-07, "epoch": 4.255642074699749, "percentage": 85.11, "elapsed_time": "0:47:53", "remaining_time": "0:08:22", "throughput": 5524.67, "total_tokens": 15874768}
|
|
{"current_steps": 32250, "total_steps": 37885, "loss": 0.0, "lr": 1.318326840882301e-07, "epoch": 4.256301966477498, "percentage": 85.13, "elapsed_time": "0:47:53", "remaining_time": "0:08:22", "throughput": 5524.84, "total_tokens": 15877136}
|
|
{"current_steps": 32255, "total_steps": 37885, "loss": 0.0, "lr": 1.3160414482692217e-07, "epoch": 4.256961858255246, "percentage": 85.14, "elapsed_time": "0:47:54", "remaining_time": "0:08:21", "throughput": 5524.95, "total_tokens": 15879312}
|
|
{"current_steps": 32260, "total_steps": 37885, "loss": 0.0, "lr": 1.3137578987634635e-07, "epoch": 4.257621750032994, "percentage": 85.15, "elapsed_time": "0:47:54", "remaining_time": "0:08:21", "throughput": 5525.21, "total_tokens": 15881936}
|
|
{"current_steps": 32265, "total_steps": 37885, "loss": 0.0, "lr": 1.3114761928496875e-07, "epoch": 4.258281641810743, "percentage": 85.17, "elapsed_time": "0:47:54", "remaining_time": "0:08:20", "throughput": 5525.37, "total_tokens": 15884240}
|
|
{"current_steps": 32270, "total_steps": 37885, "loss": 0.001, "lr": 1.3091963310121734e-07, "epoch": 4.258941533588492, "percentage": 85.18, "elapsed_time": "0:47:55", "remaining_time": "0:08:20", "throughput": 5525.59, "total_tokens": 15886736}
|
|
{"current_steps": 32275, "total_steps": 37885, "loss": 0.0, "lr": 1.306918313734805e-07, "epoch": 4.25960142536624, "percentage": 85.19, "elapsed_time": "0:47:55", "remaining_time": "0:08:19", "throughput": 5525.72, "total_tokens": 15888976}
|
|
{"current_steps": 32280, "total_steps": 37885, "loss": 0.0001, "lr": 1.3046421415010732e-07, "epoch": 4.260261317143988, "percentage": 85.21, "elapsed_time": "0:47:55", "remaining_time": "0:08:19", "throughput": 5525.81, "total_tokens": 15891088}
|
|
{"current_steps": 32285, "total_steps": 37885, "loss": 0.0, "lr": 1.3023678147940797e-07, "epoch": 4.2609212089217365, "percentage": 85.22, "elapsed_time": "0:47:56", "remaining_time": "0:08:18", "throughput": 5526.07, "total_tokens": 15893712}
|
|
{"current_steps": 32290, "total_steps": 37885, "loss": 0.0213, "lr": 1.3000953340965336e-07, "epoch": 4.261581100699486, "percentage": 85.23, "elapsed_time": "0:47:56", "remaining_time": "0:08:18", "throughput": 5526.28, "total_tokens": 15896144}
|
|
{"current_steps": 32295, "total_steps": 37885, "loss": 0.0, "lr": 1.297824699890756e-07, "epoch": 4.262240992477234, "percentage": 85.24, "elapsed_time": "0:47:56", "remaining_time": "0:08:17", "throughput": 5526.5, "total_tokens": 15898640}
|
|
{"current_steps": 32300, "total_steps": 37885, "loss": 0.0, "lr": 1.2955559126586667e-07, "epoch": 4.262900884254982, "percentage": 85.26, "elapsed_time": "0:47:57", "remaining_time": "0:08:17", "throughput": 5526.67, "total_tokens": 15901008}
|
|
{"current_steps": 32305, "total_steps": 37885, "loss": 0.0, "lr": 1.293288972881803e-07, "epoch": 4.2635607760327305, "percentage": 85.27, "elapsed_time": "0:47:57", "remaining_time": "0:08:17", "throughput": 5526.95, "total_tokens": 15903696}
|
|
{"current_steps": 32310, "total_steps": 37885, "loss": 0.0, "lr": 1.2910238810413075e-07, "epoch": 4.264220667810479, "percentage": 85.28, "elapsed_time": "0:47:57", "remaining_time": "0:08:16", "throughput": 5527.14, "total_tokens": 15906128}
|
|
{"current_steps": 32315, "total_steps": 37885, "loss": 0.0, "lr": 1.2887606376179262e-07, "epoch": 4.264880559588228, "percentage": 85.3, "elapsed_time": "0:47:58", "remaining_time": "0:08:16", "throughput": 5527.35, "total_tokens": 15908624}
|
|
{"current_steps": 32320, "total_steps": 37885, "loss": 0.0001, "lr": 1.2864992430920164e-07, "epoch": 4.265540451365976, "percentage": 85.31, "elapsed_time": "0:47:58", "remaining_time": "0:08:15", "throughput": 5527.48, "total_tokens": 15910864}
|
|
{"current_steps": 32325, "total_steps": 37885, "loss": 0.0004, "lr": 1.2842396979435476e-07, "epoch": 4.2662003431437245, "percentage": 85.32, "elapsed_time": "0:47:58", "remaining_time": "0:08:15", "throughput": 5527.66, "total_tokens": 15913296}
|
|
{"current_steps": 32330, "total_steps": 37885, "loss": 0.0, "lr": 1.2819820026520856e-07, "epoch": 4.266860234921473, "percentage": 85.34, "elapsed_time": "0:47:59", "remaining_time": "0:08:14", "throughput": 5527.88, "total_tokens": 15915792}
|
|
{"current_steps": 32335, "total_steps": 37885, "loss": 0.0, "lr": 1.2797261576968133e-07, "epoch": 4.267520126699221, "percentage": 85.35, "elapsed_time": "0:47:59", "remaining_time": "0:08:14", "throughput": 5527.98, "total_tokens": 15917968}
|
|
{"current_steps": 32340, "total_steps": 37885, "loss": 0.0, "lr": 1.2774721635565156e-07, "epoch": 4.268180018476969, "percentage": 85.36, "elapsed_time": "0:47:59", "remaining_time": "0:08:13", "throughput": 5528.26, "total_tokens": 15920656}
|
|
{"current_steps": 32345, "total_steps": 37885, "loss": 0.0, "lr": 1.275220020709591e-07, "epoch": 4.2688399102547185, "percentage": 85.38, "elapsed_time": "0:48:00", "remaining_time": "0:08:13", "throughput": 5528.43, "total_tokens": 15923024}
|
|
{"current_steps": 32350, "total_steps": 37885, "loss": 0.0503, "lr": 1.2729697296340358e-07, "epoch": 4.269499802032467, "percentage": 85.39, "elapsed_time": "0:48:00", "remaining_time": "0:08:12", "throughput": 5528.59, "total_tokens": 15925328}
|
|
{"current_steps": 32355, "total_steps": 37885, "loss": 0.0, "lr": 1.270721290807456e-07, "epoch": 4.270159693810215, "percentage": 85.4, "elapsed_time": "0:48:00", "remaining_time": "0:08:12", "throughput": 5528.79, "total_tokens": 15927760}
|
|
{"current_steps": 32360, "total_steps": 37885, "loss": 0.0, "lr": 1.268474704707073e-07, "epoch": 4.270819585587963, "percentage": 85.42, "elapsed_time": "0:48:01", "remaining_time": "0:08:11", "throughput": 5528.99, "total_tokens": 15930192}
|
|
{"current_steps": 32365, "total_steps": 37885, "loss": 0.0747, "lr": 1.2662299718097036e-07, "epoch": 4.271479477365712, "percentage": 85.43, "elapsed_time": "0:48:01", "remaining_time": "0:08:11", "throughput": 5529.1, "total_tokens": 15932368}
|
|
{"current_steps": 32370, "total_steps": 37885, "loss": 0.0, "lr": 1.2639870925917805e-07, "epoch": 4.272139369143461, "percentage": 85.44, "elapsed_time": "0:48:01", "remaining_time": "0:08:10", "throughput": 5529.35, "total_tokens": 15934928}
|
|
{"current_steps": 32375, "total_steps": 37885, "loss": 0.0, "lr": 1.2617460675293312e-07, "epoch": 4.272799260921209, "percentage": 85.46, "elapsed_time": "0:48:02", "remaining_time": "0:08:10", "throughput": 5529.51, "total_tokens": 15937232}
|
|
{"current_steps": 32380, "total_steps": 37885, "loss": 0.0, "lr": 1.259506897098005e-07, "epoch": 4.273459152698957, "percentage": 85.47, "elapsed_time": "0:48:02", "remaining_time": "0:08:10", "throughput": 5529.86, "total_tokens": 15940176}
|
|
{"current_steps": 32385, "total_steps": 37885, "loss": 0.0005, "lr": 1.2572695817730473e-07, "epoch": 4.274119044476706, "percentage": 85.48, "elapsed_time": "0:48:02", "remaining_time": "0:08:09", "throughput": 5530.05, "total_tokens": 15942608}
|
|
{"current_steps": 32390, "total_steps": 37885, "loss": 0.0, "lr": 1.2550341220293059e-07, "epoch": 4.274778936254454, "percentage": 85.5, "elapsed_time": "0:48:03", "remaining_time": "0:08:09", "throughput": 5530.32, "total_tokens": 15945296}
|
|
{"current_steps": 32395, "total_steps": 37885, "loss": 0.0, "lr": 1.2528005183412503e-07, "epoch": 4.275438828032203, "percentage": 85.51, "elapsed_time": "0:48:03", "remaining_time": "0:08:08", "throughput": 5530.58, "total_tokens": 15947920}
|
|
{"current_steps": 32400, "total_steps": 37885, "loss": 0.0, "lr": 1.2505687711829417e-07, "epoch": 4.276098719809951, "percentage": 85.52, "elapsed_time": "0:48:03", "remaining_time": "0:08:08", "throughput": 5530.86, "total_tokens": 15950672}
|
|
{"current_steps": 32405, "total_steps": 37885, "loss": 0.0016, "lr": 1.2483388810280538e-07, "epoch": 4.2767586115877, "percentage": 85.54, "elapsed_time": "0:48:04", "remaining_time": "0:08:07", "throughput": 5531.2, "total_tokens": 15953552}
|
|
{"current_steps": 32410, "total_steps": 37885, "loss": 0.0007, "lr": 1.2461108483498617e-07, "epoch": 4.277418503365448, "percentage": 85.55, "elapsed_time": "0:48:04", "remaining_time": "0:08:07", "throughput": 5531.38, "total_tokens": 15955920}
|
|
{"current_steps": 32415, "total_steps": 37885, "loss": 0.0, "lr": 1.2438846736212516e-07, "epoch": 4.278078395143196, "percentage": 85.56, "elapsed_time": "0:48:04", "remaining_time": "0:08:06", "throughput": 5531.63, "total_tokens": 15958544}
|
|
{"current_steps": 32420, "total_steps": 37885, "loss": 0.0, "lr": 1.2416603573147155e-07, "epoch": 4.278738286920945, "percentage": 85.57, "elapsed_time": "0:48:05", "remaining_time": "0:08:06", "throughput": 5531.89, "total_tokens": 15961168}
|
|
{"current_steps": 32425, "total_steps": 37885, "loss": 0.0, "lr": 1.2394378999023426e-07, "epoch": 4.279398178698694, "percentage": 85.59, "elapsed_time": "0:48:05", "remaining_time": "0:08:05", "throughput": 5532.02, "total_tokens": 15963408}
|
|
{"current_steps": 32430, "total_steps": 37885, "loss": 0.0, "lr": 1.2372173018558373e-07, "epoch": 4.280058070476442, "percentage": 85.6, "elapsed_time": "0:48:05", "remaining_time": "0:08:05", "throughput": 5532.34, "total_tokens": 15966224}
|
|
{"current_steps": 32435, "total_steps": 37885, "loss": 0.0002, "lr": 1.2349985636465054e-07, "epoch": 4.28071796225419, "percentage": 85.61, "elapsed_time": "0:48:06", "remaining_time": "0:08:04", "throughput": 5532.46, "total_tokens": 15968464}
|
|
{"current_steps": 32440, "total_steps": 37885, "loss": 0.0, "lr": 1.2327816857452567e-07, "epoch": 4.2813778540319385, "percentage": 85.63, "elapsed_time": "0:48:06", "remaining_time": "0:08:04", "throughput": 5532.77, "total_tokens": 15971280}
|
|
{"current_steps": 32445, "total_steps": 37885, "loss": 0.0, "lr": 1.230566668622607e-07, "epoch": 4.282037745809687, "percentage": 85.64, "elapsed_time": "0:48:07", "remaining_time": "0:08:04", "throughput": 5532.9, "total_tokens": 15973520}
|
|
{"current_steps": 32450, "total_steps": 37885, "loss": 0.0, "lr": 1.2283535127486789e-07, "epoch": 4.282697637587436, "percentage": 85.65, "elapsed_time": "0:48:07", "remaining_time": "0:08:03", "throughput": 5533.11, "total_tokens": 15976016}
|
|
{"current_steps": 32455, "total_steps": 37885, "loss": 0.0, "lr": 1.2261422185932003e-07, "epoch": 4.283357529365184, "percentage": 85.67, "elapsed_time": "0:48:07", "remaining_time": "0:08:03", "throughput": 5533.27, "total_tokens": 15978320}
|
|
{"current_steps": 32460, "total_steps": 37885, "loss": 0.0, "lr": 1.223932786625499e-07, "epoch": 4.2840174211429325, "percentage": 85.68, "elapsed_time": "0:48:08", "remaining_time": "0:08:02", "throughput": 5533.5, "total_tokens": 15980880}
|
|
{"current_steps": 32465, "total_steps": 37885, "loss": 0.0, "lr": 1.221725217314512e-07, "epoch": 4.284677312920681, "percentage": 85.69, "elapsed_time": "0:48:08", "remaining_time": "0:08:02", "throughput": 5533.69, "total_tokens": 15983312}
|
|
{"current_steps": 32470, "total_steps": 37885, "loss": 0.0017, "lr": 1.2195195111287827e-07, "epoch": 4.285337204698429, "percentage": 85.71, "elapsed_time": "0:48:08", "remaining_time": "0:08:01", "throughput": 5533.92, "total_tokens": 15985872}
|
|
{"current_steps": 32475, "total_steps": 37885, "loss": 0.0, "lr": 1.2173156685364516e-07, "epoch": 4.285997096476178, "percentage": 85.72, "elapsed_time": "0:48:09", "remaining_time": "0:08:01", "throughput": 5534.1, "total_tokens": 15988304}
|
|
{"current_steps": 32480, "total_steps": 37885, "loss": 0.024, "lr": 1.2151136900052706e-07, "epoch": 4.2866569882539265, "percentage": 85.73, "elapsed_time": "0:48:09", "remaining_time": "0:08:00", "throughput": 5534.28, "total_tokens": 15990672}
|
|
{"current_steps": 32485, "total_steps": 37885, "loss": 0.0008, "lr": 1.2129135760025955e-07, "epoch": 4.287316880031675, "percentage": 85.75, "elapsed_time": "0:48:09", "remaining_time": "0:08:00", "throughput": 5534.44, "total_tokens": 15993040}
|
|
{"current_steps": 32490, "total_steps": 37885, "loss": 0.0007, "lr": 1.2107153269953818e-07, "epoch": 4.287976771809423, "percentage": 85.76, "elapsed_time": "0:48:10", "remaining_time": "0:07:59", "throughput": 5534.72, "total_tokens": 15995792}
|
|
{"current_steps": 32495, "total_steps": 37885, "loss": 0.0, "lr": 1.208518943450192e-07, "epoch": 4.288636663587171, "percentage": 85.77, "elapsed_time": "0:48:10", "remaining_time": "0:07:59", "throughput": 5534.94, "total_tokens": 15998288}
|
|
{"current_steps": 32500, "total_steps": 37885, "loss": 0.0001, "lr": 1.2063244258331938e-07, "epoch": 4.2892965553649205, "percentage": 85.79, "elapsed_time": "0:48:10", "remaining_time": "0:07:58", "throughput": 5535.18, "total_tokens": 16000912}
|
|
{"current_steps": 32505, "total_steps": 37885, "loss": 0.0, "lr": 1.2041317746101599e-07, "epoch": 4.289956447142669, "percentage": 85.8, "elapsed_time": "0:48:11", "remaining_time": "0:07:58", "throughput": 5535.3, "total_tokens": 16003088}
|
|
{"current_steps": 32510, "total_steps": 37885, "loss": 0.0, "lr": 1.2019409902464616e-07, "epoch": 4.290616338920417, "percentage": 85.81, "elapsed_time": "0:48:11", "remaining_time": "0:07:58", "throughput": 5535.56, "total_tokens": 16005776}
|
|
{"current_steps": 32515, "total_steps": 37885, "loss": 0.0, "lr": 1.1997520732070742e-07, "epoch": 4.291276230698165, "percentage": 85.83, "elapsed_time": "0:48:11", "remaining_time": "0:07:57", "throughput": 5535.73, "total_tokens": 16008144}
|
|
{"current_steps": 32520, "total_steps": 37885, "loss": 0.0, "lr": 1.197565023956586e-07, "epoch": 4.291936122475914, "percentage": 85.84, "elapsed_time": "0:48:12", "remaining_time": "0:07:57", "throughput": 5535.98, "total_tokens": 16010768}
|
|
{"current_steps": 32525, "total_steps": 37885, "loss": 0.0, "lr": 1.1953798429591778e-07, "epoch": 4.292596014253663, "percentage": 85.85, "elapsed_time": "0:48:12", "remaining_time": "0:07:56", "throughput": 5536.16, "total_tokens": 16013200}
|
|
{"current_steps": 32530, "total_steps": 37885, "loss": 0.0002, "lr": 1.1931965306786396e-07, "epoch": 4.293255906031411, "percentage": 85.87, "elapsed_time": "0:48:12", "remaining_time": "0:07:56", "throughput": 5536.41, "total_tokens": 16015824}
|
|
{"current_steps": 32535, "total_steps": 37885, "loss": 0.0, "lr": 1.1910150875783664e-07, "epoch": 4.293915797809159, "percentage": 85.88, "elapsed_time": "0:48:13", "remaining_time": "0:07:55", "throughput": 5536.52, "total_tokens": 16018064}
|
|
{"current_steps": 32540, "total_steps": 37885, "loss": 0.0, "lr": 1.1888355141213491e-07, "epoch": 4.294575689586908, "percentage": 85.89, "elapsed_time": "0:48:13", "remaining_time": "0:07:55", "throughput": 5536.68, "total_tokens": 16020432}
|
|
{"current_steps": 32545, "total_steps": 37885, "loss": 0.0001, "lr": 1.1866578107701897e-07, "epoch": 4.295235581364656, "percentage": 85.9, "elapsed_time": "0:48:13", "remaining_time": "0:07:54", "throughput": 5536.93, "total_tokens": 16023056}
|
|
{"current_steps": 32550, "total_steps": 37885, "loss": 0.0001, "lr": 1.1844819779870862e-07, "epoch": 4.295895473142405, "percentage": 85.92, "elapsed_time": "0:48:14", "remaining_time": "0:07:54", "throughput": 5537.07, "total_tokens": 16025360}
|
|
{"current_steps": 32555, "total_steps": 37885, "loss": 0.0, "lr": 1.1823080162338483e-07, "epoch": 4.296555364920153, "percentage": 85.93, "elapsed_time": "0:48:14", "remaining_time": "0:07:53", "throughput": 5537.3, "total_tokens": 16027920}
|
|
{"current_steps": 32560, "total_steps": 37885, "loss": 0.0, "lr": 1.1801359259718823e-07, "epoch": 4.297215256697902, "percentage": 85.94, "elapsed_time": "0:48:14", "remaining_time": "0:07:53", "throughput": 5537.5, "total_tokens": 16030416}
|
|
{"current_steps": 32565, "total_steps": 37885, "loss": 0.0001, "lr": 1.1779657076621951e-07, "epoch": 4.29787514847565, "percentage": 85.96, "elapsed_time": "0:48:15", "remaining_time": "0:07:52", "throughput": 5537.66, "total_tokens": 16032784}
|
|
{"current_steps": 32570, "total_steps": 37885, "loss": 0.0, "lr": 1.1757973617654027e-07, "epoch": 4.298535040253398, "percentage": 85.97, "elapsed_time": "0:48:15", "remaining_time": "0:07:52", "throughput": 5537.86, "total_tokens": 16035216}
|
|
{"current_steps": 32575, "total_steps": 37885, "loss": 0.0, "lr": 1.1736308887417201e-07, "epoch": 4.299194932031147, "percentage": 85.98, "elapsed_time": "0:48:15", "remaining_time": "0:07:52", "throughput": 5538.02, "total_tokens": 16037584}
|
|
{"current_steps": 32580, "total_steps": 37885, "loss": 0.0001, "lr": 1.1714662890509685e-07, "epoch": 4.299854823808896, "percentage": 86.0, "elapsed_time": "0:48:16", "remaining_time": "0:07:51", "throughput": 5538.21, "total_tokens": 16040016}
|
|
{"current_steps": 32585, "total_steps": 37885, "loss": 0.0, "lr": 1.1693035631525628e-07, "epoch": 4.300514715586644, "percentage": 86.01, "elapsed_time": "0:48:16", "remaining_time": "0:07:51", "throughput": 5538.46, "total_tokens": 16042640}
|
|
{"current_steps": 32590, "total_steps": 37885, "loss": 0.0, "lr": 1.1671427115055299e-07, "epoch": 4.301174607364392, "percentage": 86.02, "elapsed_time": "0:48:16", "remaining_time": "0:07:50", "throughput": 5538.66, "total_tokens": 16045136}
|
|
{"current_steps": 32595, "total_steps": 37885, "loss": 0.0006, "lr": 1.1649837345684954e-07, "epoch": 4.3018344991421404, "percentage": 86.04, "elapsed_time": "0:48:17", "remaining_time": "0:07:50", "throughput": 5538.89, "total_tokens": 16047696}
|
|
{"current_steps": 32600, "total_steps": 37885, "loss": 0.0004, "lr": 1.1628266327996827e-07, "epoch": 4.302494390919889, "percentage": 86.05, "elapsed_time": "0:48:17", "remaining_time": "0:07:49", "throughput": 5539.04, "total_tokens": 16050000}
|
|
{"current_steps": 32605, "total_steps": 37885, "loss": 0.0003, "lr": 1.1606714066569235e-07, "epoch": 4.303154282697638, "percentage": 86.06, "elapsed_time": "0:48:17", "remaining_time": "0:07:49", "throughput": 5539.29, "total_tokens": 16052624}
|
|
{"current_steps": 32610, "total_steps": 37885, "loss": 0.0361, "lr": 1.1585180565976515e-07, "epoch": 4.303814174475386, "percentage": 86.08, "elapsed_time": "0:48:18", "remaining_time": "0:07:48", "throughput": 5539.42, "total_tokens": 16054864}
|
|
{"current_steps": 32615, "total_steps": 37885, "loss": 0.0, "lr": 1.1563665830788948e-07, "epoch": 4.3044740662531344, "percentage": 86.09, "elapsed_time": "0:48:18", "remaining_time": "0:07:48", "throughput": 5539.53, "total_tokens": 16057104}
|
|
{"current_steps": 32620, "total_steps": 37885, "loss": 0.0001, "lr": 1.1542169865572904e-07, "epoch": 4.305133958030883, "percentage": 86.1, "elapsed_time": "0:48:18", "remaining_time": "0:07:47", "throughput": 5539.7, "total_tokens": 16059472}
|
|
{"current_steps": 32625, "total_steps": 37885, "loss": 0.0002, "lr": 1.1520692674890741e-07, "epoch": 4.305793849808631, "percentage": 86.12, "elapsed_time": "0:48:19", "remaining_time": "0:07:47", "throughput": 5539.82, "total_tokens": 16061712}
|
|
{"current_steps": 32630, "total_steps": 37885, "loss": 0.0001, "lr": 1.149923426330086e-07, "epoch": 4.30645374158638, "percentage": 86.13, "elapsed_time": "0:48:19", "remaining_time": "0:07:46", "throughput": 5539.96, "total_tokens": 16064016}
|
|
{"current_steps": 32635, "total_steps": 37885, "loss": 0.0001, "lr": 1.1477794635357618e-07, "epoch": 4.3071136333641284, "percentage": 86.14, "elapsed_time": "0:48:19", "remaining_time": "0:07:46", "throughput": 5540.07, "total_tokens": 16066192}
|
|
{"current_steps": 32640, "total_steps": 37885, "loss": 0.0, "lr": 1.145637379561144e-07, "epoch": 4.307773525141877, "percentage": 86.16, "elapsed_time": "0:48:20", "remaining_time": "0:07:46", "throughput": 5540.18, "total_tokens": 16068368}
|
|
{"current_steps": 32645, "total_steps": 37885, "loss": 0.0002, "lr": 1.1434971748608757e-07, "epoch": 4.308433416919625, "percentage": 86.17, "elapsed_time": "0:48:20", "remaining_time": "0:07:45", "throughput": 5540.23, "total_tokens": 16070416}
|
|
{"current_steps": 32650, "total_steps": 37885, "loss": 0.0, "lr": 1.1413588498891957e-07, "epoch": 4.309093308697373, "percentage": 86.18, "elapsed_time": "0:48:21", "remaining_time": "0:07:45", "throughput": 5540.4, "total_tokens": 16072784}
|
|
{"current_steps": 32655, "total_steps": 37885, "loss": 0.0001, "lr": 1.139222405099951e-07, "epoch": 4.3097532004751224, "percentage": 86.2, "elapsed_time": "0:48:21", "remaining_time": "0:07:44", "throughput": 5540.62, "total_tokens": 16075280}
|
|
{"current_steps": 32660, "total_steps": 37885, "loss": 0.0, "lr": 1.137087840946589e-07, "epoch": 4.310413092252871, "percentage": 86.21, "elapsed_time": "0:48:21", "remaining_time": "0:07:44", "throughput": 5540.91, "total_tokens": 16078032}
|
|
{"current_steps": 32665, "total_steps": 37885, "loss": 0.0133, "lr": 1.1349551578821493e-07, "epoch": 4.311072984030619, "percentage": 86.22, "elapsed_time": "0:48:22", "remaining_time": "0:07:43", "throughput": 5541.09, "total_tokens": 16080464}
|
|
{"current_steps": 32670, "total_steps": 37885, "loss": 0.0, "lr": 1.1328243563592831e-07, "epoch": 4.311732875808367, "percentage": 86.23, "elapsed_time": "0:48:22", "remaining_time": "0:07:43", "throughput": 5541.29, "total_tokens": 16082960}
|
|
{"current_steps": 32675, "total_steps": 37885, "loss": 0.0, "lr": 1.1306954368302357e-07, "epoch": 4.312392767586116, "percentage": 86.25, "elapsed_time": "0:48:22", "remaining_time": "0:07:42", "throughput": 5541.5, "total_tokens": 16085456}
|
|
{"current_steps": 32680, "total_steps": 37885, "loss": 0.0015, "lr": 1.1285683997468564e-07, "epoch": 4.313052659363865, "percentage": 86.26, "elapsed_time": "0:48:23", "remaining_time": "0:07:42", "throughput": 5541.56, "total_tokens": 16087504}
|
|
{"current_steps": 32685, "total_steps": 37885, "loss": 0.0, "lr": 1.1264432455605933e-07, "epoch": 4.313712551141613, "percentage": 86.27, "elapsed_time": "0:48:23", "remaining_time": "0:07:41", "throughput": 5541.75, "total_tokens": 16089936}
|
|
{"current_steps": 32690, "total_steps": 37885, "loss": 0.0, "lr": 1.1243199747224897e-07, "epoch": 4.314372442919361, "percentage": 86.29, "elapsed_time": "0:48:23", "remaining_time": "0:07:41", "throughput": 5541.85, "total_tokens": 16092112}
|
|
{"current_steps": 32695, "total_steps": 37885, "loss": 0.0, "lr": 1.122198587683203e-07, "epoch": 4.31503233469711, "percentage": 86.3, "elapsed_time": "0:48:24", "remaining_time": "0:07:40", "throughput": 5542.04, "total_tokens": 16094544}
|
|
{"current_steps": 32700, "total_steps": 37885, "loss": 0.0239, "lr": 1.1200790848929764e-07, "epoch": 4.315692226474858, "percentage": 86.31, "elapsed_time": "0:48:24", "remaining_time": "0:07:40", "throughput": 5542.19, "total_tokens": 16096848}
|
|
{"current_steps": 32705, "total_steps": 37885, "loss": 0.0, "lr": 1.1179614668016624e-07, "epoch": 4.316352118252606, "percentage": 86.33, "elapsed_time": "0:48:24", "remaining_time": "0:07:40", "throughput": 5542.29, "total_tokens": 16099024}
|
|
{"current_steps": 32710, "total_steps": 37885, "loss": 0.0144, "lr": 1.1158457338587047e-07, "epoch": 4.317012010030355, "percentage": 86.34, "elapsed_time": "0:48:25", "remaining_time": "0:07:39", "throughput": 5542.59, "total_tokens": 16101776}
|
|
{"current_steps": 32715, "total_steps": 37885, "loss": 0.0, "lr": 1.1137318865131595e-07, "epoch": 4.317671901808104, "percentage": 86.35, "elapsed_time": "0:48:25", "remaining_time": "0:07:39", "throughput": 5542.75, "total_tokens": 16104144}
|
|
{"current_steps": 32720, "total_steps": 37885, "loss": 0.0, "lr": 1.1116199252136727e-07, "epoch": 4.318331793585852, "percentage": 86.37, "elapsed_time": "0:48:25", "remaining_time": "0:07:38", "throughput": 5542.91, "total_tokens": 16106512}
|
|
{"current_steps": 32725, "total_steps": 37885, "loss": 0.0427, "lr": 1.1095098504084877e-07, "epoch": 4.3189916853636, "percentage": 86.38, "elapsed_time": "0:48:26", "remaining_time": "0:07:38", "throughput": 5543.09, "total_tokens": 16108944}
|
|
{"current_steps": 32730, "total_steps": 37885, "loss": 0.0001, "lr": 1.1074016625454607e-07, "epoch": 4.319651577141348, "percentage": 86.39, "elapsed_time": "0:48:26", "remaining_time": "0:07:37", "throughput": 5543.26, "total_tokens": 16111312}
|
|
{"current_steps": 32735, "total_steps": 37885, "loss": 0.028, "lr": 1.1052953620720351e-07, "epoch": 4.320311468919098, "percentage": 86.41, "elapsed_time": "0:48:26", "remaining_time": "0:07:37", "throughput": 5543.53, "total_tokens": 16114000}
|
|
{"current_steps": 32740, "total_steps": 37885, "loss": 0.0322, "lr": 1.1031909494352588e-07, "epoch": 4.320971360696846, "percentage": 86.42, "elapsed_time": "0:48:27", "remaining_time": "0:07:36", "throughput": 5543.61, "total_tokens": 16116112}
|
|
{"current_steps": 32745, "total_steps": 37885, "loss": 0.0533, "lr": 1.1010884250817765e-07, "epoch": 4.321631252474594, "percentage": 86.43, "elapsed_time": "0:48:27", "remaining_time": "0:07:36", "throughput": 5543.79, "total_tokens": 16118544}
|
|
{"current_steps": 32750, "total_steps": 37885, "loss": 0.0, "lr": 1.098987789457836e-07, "epoch": 4.322291144252342, "percentage": 86.45, "elapsed_time": "0:48:27", "remaining_time": "0:07:35", "throughput": 5543.98, "total_tokens": 16120976}
|
|
{"current_steps": 32755, "total_steps": 37885, "loss": 0.0096, "lr": 1.0968890430092825e-07, "epoch": 4.322951036030091, "percentage": 86.46, "elapsed_time": "0:48:28", "remaining_time": "0:07:35", "throughput": 5544.23, "total_tokens": 16123600}
|
|
{"current_steps": 32760, "total_steps": 37885, "loss": 0.0, "lr": 1.0947921861815557e-07, "epoch": 4.32361092780784, "percentage": 86.47, "elapsed_time": "0:48:28", "remaining_time": "0:07:35", "throughput": 5544.35, "total_tokens": 16125840}
|
|
{"current_steps": 32765, "total_steps": 37885, "loss": 0.0518, "lr": 1.0926972194197015e-07, "epoch": 4.324270819585588, "percentage": 86.49, "elapsed_time": "0:48:28", "remaining_time": "0:07:34", "throughput": 5544.56, "total_tokens": 16128336}
|
|
{"current_steps": 32770, "total_steps": 37885, "loss": 0.0, "lr": 1.0906041431683632e-07, "epoch": 4.324930711363336, "percentage": 86.5, "elapsed_time": "0:48:29", "remaining_time": "0:07:34", "throughput": 5544.82, "total_tokens": 16131024}
|
|
{"current_steps": 32775, "total_steps": 37885, "loss": 0.0, "lr": 1.0885129578717767e-07, "epoch": 4.325590603141085, "percentage": 86.51, "elapsed_time": "0:48:29", "remaining_time": "0:07:33", "throughput": 5545.09, "total_tokens": 16133712}
|
|
{"current_steps": 32780, "total_steps": 37885, "loss": 0.0, "lr": 1.0864236639737823e-07, "epoch": 4.326250494918833, "percentage": 86.53, "elapsed_time": "0:48:29", "remaining_time": "0:07:33", "throughput": 5545.29, "total_tokens": 16136208}
|
|
{"current_steps": 32785, "total_steps": 37885, "loss": 0.0, "lr": 1.0843362619178187e-07, "epoch": 4.326910386696582, "percentage": 86.54, "elapsed_time": "0:48:30", "remaining_time": "0:07:32", "throughput": 5545.45, "total_tokens": 16138576}
|
|
{"current_steps": 32790, "total_steps": 37885, "loss": 0.0014, "lr": 1.0822507521469227e-07, "epoch": 4.32757027847433, "percentage": 86.55, "elapsed_time": "0:48:30", "remaining_time": "0:07:32", "throughput": 5545.66, "total_tokens": 16141072}
|
|
{"current_steps": 32795, "total_steps": 37885, "loss": 0.0, "lr": 1.0801671351037255e-07, "epoch": 4.328230170252079, "percentage": 86.56, "elapsed_time": "0:48:30", "remaining_time": "0:07:31", "throughput": 5545.88, "total_tokens": 16143632}
|
|
{"current_steps": 32800, "total_steps": 37885, "loss": 0.0018, "lr": 1.0780854112304626e-07, "epoch": 4.328890062029827, "percentage": 86.58, "elapsed_time": "0:48:31", "remaining_time": "0:07:31", "throughput": 5546.13, "total_tokens": 16146320}
|
|
{"current_steps": 32805, "total_steps": 37885, "loss": 0.0, "lr": 1.076005580968965e-07, "epoch": 4.329549953807575, "percentage": 86.59, "elapsed_time": "0:48:31", "remaining_time": "0:07:30", "throughput": 5546.4, "total_tokens": 16149008}
|
|
{"current_steps": 32810, "total_steps": 37885, "loss": 0.0, "lr": 1.0739276447606582e-07, "epoch": 4.330209845585324, "percentage": 86.6, "elapsed_time": "0:48:31", "remaining_time": "0:07:30", "throughput": 5546.59, "total_tokens": 16151504}
|
|
{"current_steps": 32815, "total_steps": 37885, "loss": 0.0, "lr": 1.0718516030465708e-07, "epoch": 4.330869737363073, "percentage": 86.62, "elapsed_time": "0:48:32", "remaining_time": "0:07:29", "throughput": 5546.91, "total_tokens": 16154320}
|
|
{"current_steps": 32820, "total_steps": 37885, "loss": 0.0, "lr": 1.0697774562673312e-07, "epoch": 4.331529629140821, "percentage": 86.63, "elapsed_time": "0:48:32", "remaining_time": "0:07:29", "throughput": 5547.11, "total_tokens": 16156816}
|
|
{"current_steps": 32825, "total_steps": 37885, "loss": 0.0, "lr": 1.0677052048631563e-07, "epoch": 4.332189520918569, "percentage": 86.64, "elapsed_time": "0:48:32", "remaining_time": "0:07:29", "throughput": 5547.22, "total_tokens": 16158992}
|
|
{"current_steps": 32830, "total_steps": 37885, "loss": 0.0, "lr": 1.0656348492738687e-07, "epoch": 4.3328494126963175, "percentage": 86.66, "elapsed_time": "0:48:33", "remaining_time": "0:07:28", "throughput": 5547.37, "total_tokens": 16161296}
|
|
{"current_steps": 32835, "total_steps": 37885, "loss": 0.0, "lr": 1.0635663899388881e-07, "epoch": 4.333509304474067, "percentage": 86.67, "elapsed_time": "0:48:33", "remaining_time": "0:07:28", "throughput": 5547.53, "total_tokens": 16163664}
|
|
{"current_steps": 32840, "total_steps": 37885, "loss": 0.0533, "lr": 1.0614998272972298e-07, "epoch": 4.334169196251815, "percentage": 86.68, "elapsed_time": "0:48:34", "remaining_time": "0:07:27", "throughput": 5547.64, "total_tokens": 16165840}
|
|
{"current_steps": 32845, "total_steps": 37885, "loss": 0.0683, "lr": 1.0594351617875053e-07, "epoch": 4.334829088029563, "percentage": 86.7, "elapsed_time": "0:48:34", "remaining_time": "0:07:27", "throughput": 5547.8, "total_tokens": 16168208}
|
|
{"current_steps": 32850, "total_steps": 37885, "loss": 0.0, "lr": 1.0573723938479217e-07, "epoch": 4.3354889798073115, "percentage": 86.71, "elapsed_time": "0:48:34", "remaining_time": "0:07:26", "throughput": 5547.99, "total_tokens": 16170640}
|
|
{"current_steps": 32855, "total_steps": 37885, "loss": 0.0, "lr": 1.0553115239162935e-07, "epoch": 4.33614887158506, "percentage": 86.72, "elapsed_time": "0:48:35", "remaining_time": "0:07:26", "throughput": 5548.11, "total_tokens": 16172880}
|
|
{"current_steps": 32860, "total_steps": 37885, "loss": 0.0, "lr": 1.0532525524300206e-07, "epoch": 4.336808763362809, "percentage": 86.74, "elapsed_time": "0:48:35", "remaining_time": "0:07:25", "throughput": 5548.28, "total_tokens": 16175248}
|
|
{"current_steps": 32865, "total_steps": 37885, "loss": 0.0, "lr": 1.0511954798261058e-07, "epoch": 4.337468655140557, "percentage": 86.75, "elapsed_time": "0:48:35", "remaining_time": "0:07:25", "throughput": 5548.48, "total_tokens": 16177680}
|
|
{"current_steps": 32870, "total_steps": 37885, "loss": 0.0472, "lr": 1.0491403065411508e-07, "epoch": 4.3381285469183055, "percentage": 86.76, "elapsed_time": "0:48:36", "remaining_time": "0:07:24", "throughput": 5548.65, "total_tokens": 16180048}
|
|
{"current_steps": 32875, "total_steps": 37885, "loss": 0.0, "lr": 1.0470870330113457e-07, "epoch": 4.338788438696054, "percentage": 86.78, "elapsed_time": "0:48:36", "remaining_time": "0:07:24", "throughput": 5548.82, "total_tokens": 16182416}
|
|
{"current_steps": 32880, "total_steps": 37885, "loss": 0.1348, "lr": 1.0450356596724886e-07, "epoch": 4.339448330473802, "percentage": 86.79, "elapsed_time": "0:48:36", "remaining_time": "0:07:23", "throughput": 5549.02, "total_tokens": 16184848}
|
|
{"current_steps": 32885, "total_steps": 37885, "loss": 0.0, "lr": 1.0429861869599622e-07, "epoch": 4.34010822225155, "percentage": 86.8, "elapsed_time": "0:48:37", "remaining_time": "0:07:23", "throughput": 5549.21, "total_tokens": 16187280}
|
|
{"current_steps": 32890, "total_steps": 37885, "loss": 0.0004, "lr": 1.0409386153087596e-07, "epoch": 4.3407681140292995, "percentage": 86.82, "elapsed_time": "0:48:37", "remaining_time": "0:07:23", "throughput": 5549.35, "total_tokens": 16189584}
|
|
{"current_steps": 32895, "total_steps": 37885, "loss": 0.0, "lr": 1.0388929451534601e-07, "epoch": 4.341428005807048, "percentage": 86.83, "elapsed_time": "0:48:37", "remaining_time": "0:07:22", "throughput": 5549.46, "total_tokens": 16191760}
|
|
{"current_steps": 32900, "total_steps": 37885, "loss": 0.0, "lr": 1.0368491769282395e-07, "epoch": 4.342087897584796, "percentage": 86.84, "elapsed_time": "0:48:38", "remaining_time": "0:07:22", "throughput": 5549.64, "total_tokens": 16194128}
|
|
{"current_steps": 32905, "total_steps": 37885, "loss": 0.0, "lr": 1.0348073110668743e-07, "epoch": 4.342747789362544, "percentage": 86.85, "elapsed_time": "0:48:38", "remaining_time": "0:07:21", "throughput": 5549.89, "total_tokens": 16196752}
|
|
{"current_steps": 32910, "total_steps": 37885, "loss": 0.0, "lr": 1.0327673480027377e-07, "epoch": 4.343407681140293, "percentage": 86.87, "elapsed_time": "0:48:38", "remaining_time": "0:07:21", "throughput": 5550.1, "total_tokens": 16199248}
|
|
{"current_steps": 32915, "total_steps": 37885, "loss": 0.0002, "lr": 1.0307292881687968e-07, "epoch": 4.344067572918042, "percentage": 86.88, "elapsed_time": "0:48:39", "remaining_time": "0:07:20", "throughput": 5550.34, "total_tokens": 16201808}
|
|
{"current_steps": 32920, "total_steps": 37885, "loss": 0.0, "lr": 1.0286931319976133e-07, "epoch": 4.34472746469579, "percentage": 86.89, "elapsed_time": "0:48:39", "remaining_time": "0:07:20", "throughput": 5550.55, "total_tokens": 16204304}
|
|
{"current_steps": 32925, "total_steps": 37885, "loss": 0.0, "lr": 1.026658879921346e-07, "epoch": 4.345387356473538, "percentage": 86.91, "elapsed_time": "0:48:39", "remaining_time": "0:07:19", "throughput": 5550.78, "total_tokens": 16206864}
|
|
{"current_steps": 32930, "total_steps": 37885, "loss": 0.0061, "lr": 1.024626532371755e-07, "epoch": 4.346047248251287, "percentage": 86.92, "elapsed_time": "0:48:40", "remaining_time": "0:07:19", "throughput": 5550.9, "total_tokens": 16209104}
|
|
{"current_steps": 32935, "total_steps": 37885, "loss": 0.0001, "lr": 1.0225960897801856e-07, "epoch": 4.346707140029035, "percentage": 86.93, "elapsed_time": "0:48:40", "remaining_time": "0:07:18", "throughput": 5551.09, "total_tokens": 16211536}
|
|
{"current_steps": 32940, "total_steps": 37885, "loss": 0.0, "lr": 1.0205675525775858e-07, "epoch": 4.347367031806784, "percentage": 86.95, "elapsed_time": "0:48:40", "remaining_time": "0:07:18", "throughput": 5551.23, "total_tokens": 16213840}
|
|
{"current_steps": 32945, "total_steps": 37885, "loss": 0.0, "lr": 1.0185409211945017e-07, "epoch": 4.348026923584532, "percentage": 86.96, "elapsed_time": "0:48:41", "remaining_time": "0:07:18", "throughput": 5551.38, "total_tokens": 16216144}
|
|
{"current_steps": 32950, "total_steps": 37885, "loss": 0.0, "lr": 1.0165161960610669e-07, "epoch": 4.348686815362281, "percentage": 86.97, "elapsed_time": "0:48:41", "remaining_time": "0:07:17", "throughput": 5551.55, "total_tokens": 16218512}
|
|
{"current_steps": 32955, "total_steps": 37885, "loss": 0.0, "lr": 1.0144933776070163e-07, "epoch": 4.349346707140029, "percentage": 86.99, "elapsed_time": "0:48:41", "remaining_time": "0:07:17", "throughput": 5551.82, "total_tokens": 16221200}
|
|
{"current_steps": 32960, "total_steps": 37885, "loss": 0.0, "lr": 1.012472466261678e-07, "epoch": 4.350006598917777, "percentage": 87.0, "elapsed_time": "0:48:42", "remaining_time": "0:07:16", "throughput": 5552.01, "total_tokens": 16223632}
|
|
{"current_steps": 32965, "total_steps": 37885, "loss": 0.001, "lr": 1.0104534624539785e-07, "epoch": 4.3506664906955255, "percentage": 87.01, "elapsed_time": "0:48:42", "remaining_time": "0:07:16", "throughput": 5552.24, "total_tokens": 16226192}
|
|
{"current_steps": 32970, "total_steps": 37885, "loss": 0.0002, "lr": 1.0084363666124318e-07, "epoch": 4.351326382473275, "percentage": 87.03, "elapsed_time": "0:48:42", "remaining_time": "0:07:15", "throughput": 5552.36, "total_tokens": 16228432}
|
|
{"current_steps": 32975, "total_steps": 37885, "loss": 0.0, "lr": 1.0064211791651544e-07, "epoch": 4.351986274251023, "percentage": 87.04, "elapsed_time": "0:48:43", "remaining_time": "0:07:15", "throughput": 5552.51, "total_tokens": 16230736}
|
|
{"current_steps": 32980, "total_steps": 37885, "loss": 0.0001, "lr": 1.0044079005398576e-07, "epoch": 4.352646166028771, "percentage": 87.05, "elapsed_time": "0:48:43", "remaining_time": "0:07:14", "throughput": 5552.62, "total_tokens": 16232976}
|
|
{"current_steps": 32985, "total_steps": 37885, "loss": 0.0, "lr": 1.0023965311638415e-07, "epoch": 4.3533060578065195, "percentage": 87.07, "elapsed_time": "0:48:43", "remaining_time": "0:07:14", "throughput": 5552.81, "total_tokens": 16235408}
|
|
{"current_steps": 32990, "total_steps": 37885, "loss": 0.0, "lr": 1.0003870714640061e-07, "epoch": 4.353965949584268, "percentage": 87.08, "elapsed_time": "0:48:44", "remaining_time": "0:07:13", "throughput": 5553.06, "total_tokens": 16238032}
|
|
{"current_steps": 32995, "total_steps": 37885, "loss": 0.002, "lr": 9.983795218668456e-08, "epoch": 4.354625841362017, "percentage": 87.09, "elapsed_time": "0:48:44", "remaining_time": "0:07:13", "throughput": 5553.41, "total_tokens": 16240976}
|
|
{"current_steps": 33000, "total_steps": 37885, "loss": 0.0384, "lr": 9.963738827984458e-08, "epoch": 4.355285733139765, "percentage": 87.11, "elapsed_time": "0:48:44", "remaining_time": "0:07:12", "throughput": 5553.48, "total_tokens": 16243088}
|
|
{"current_steps": 33005, "total_steps": 37885, "loss": 0.0, "lr": 9.943701546844906e-08, "epoch": 4.3559456249175135, "percentage": 87.12, "elapsed_time": "0:48:45", "remaining_time": "0:07:12", "throughput": 5553.67, "total_tokens": 16245520}
|
|
{"current_steps": 33010, "total_steps": 37885, "loss": 0.0337, "lr": 9.923683379502557e-08, "epoch": 4.356605516695262, "percentage": 87.13, "elapsed_time": "0:48:45", "remaining_time": "0:07:12", "throughput": 5553.88, "total_tokens": 16248016}
|
|
{"current_steps": 33015, "total_steps": 37885, "loss": 0.0, "lr": 9.903684330206152e-08, "epoch": 4.35726540847301, "percentage": 87.15, "elapsed_time": "0:48:45", "remaining_time": "0:07:11", "throughput": 5554.02, "total_tokens": 16250320}
|
|
{"current_steps": 33020, "total_steps": 37885, "loss": 0.0, "lr": 9.8837044032003e-08, "epoch": 4.357925300250759, "percentage": 87.16, "elapsed_time": "0:48:46", "remaining_time": "0:07:11", "throughput": 5554.31, "total_tokens": 16253072}
|
|
{"current_steps": 33025, "total_steps": 37885, "loss": 0.0, "lr": 9.863743602725627e-08, "epoch": 4.3585851920285075, "percentage": 87.17, "elapsed_time": "0:48:46", "remaining_time": "0:07:10", "throughput": 5554.56, "total_tokens": 16255696}
|
|
{"current_steps": 33030, "total_steps": 37885, "loss": 0.0, "lr": 9.843801933018669e-08, "epoch": 4.359245083806256, "percentage": 87.18, "elapsed_time": "0:48:46", "remaining_time": "0:07:10", "throughput": 5554.78, "total_tokens": 16258256}
|
|
{"current_steps": 33035, "total_steps": 37885, "loss": 0.0, "lr": 9.823879398311874e-08, "epoch": 4.359904975584004, "percentage": 87.2, "elapsed_time": "0:48:47", "remaining_time": "0:07:09", "throughput": 5554.99, "total_tokens": 16260752}
|
|
{"current_steps": 33040, "total_steps": 37885, "loss": 0.0226, "lr": 9.803976002833692e-08, "epoch": 4.360564867361752, "percentage": 87.21, "elapsed_time": "0:48:47", "remaining_time": "0:07:09", "throughput": 5555.26, "total_tokens": 16263440}
|
|
{"current_steps": 33045, "total_steps": 37885, "loss": 0.0009, "lr": 9.78409175080841e-08, "epoch": 4.3612247591395015, "percentage": 87.22, "elapsed_time": "0:48:47", "remaining_time": "0:07:08", "throughput": 5555.49, "total_tokens": 16266000}
|
|
{"current_steps": 33050, "total_steps": 37885, "loss": 0.0, "lr": 9.764226646456408e-08, "epoch": 4.36188465091725, "percentage": 87.24, "elapsed_time": "0:48:48", "remaining_time": "0:07:08", "throughput": 5555.73, "total_tokens": 16268624}
|
|
{"current_steps": 33055, "total_steps": 37885, "loss": 0.0, "lr": 9.744380693993858e-08, "epoch": 4.362544542694998, "percentage": 87.25, "elapsed_time": "0:48:48", "remaining_time": "0:07:07", "throughput": 5555.89, "total_tokens": 16270992}
|
|
{"current_steps": 33060, "total_steps": 37885, "loss": 0.0, "lr": 9.724553897632893e-08, "epoch": 4.363204434472746, "percentage": 87.26, "elapsed_time": "0:48:48", "remaining_time": "0:07:07", "throughput": 5556.08, "total_tokens": 16273424}
|
|
{"current_steps": 33065, "total_steps": 37885, "loss": 0.0441, "lr": 9.704746261581675e-08, "epoch": 4.363864326250495, "percentage": 87.28, "elapsed_time": "0:48:49", "remaining_time": "0:07:07", "throughput": 5556.23, "total_tokens": 16275728}
|
|
{"current_steps": 33070, "total_steps": 37885, "loss": 0.0, "lr": 9.684957790044179e-08, "epoch": 4.364524218028244, "percentage": 87.29, "elapsed_time": "0:48:49", "remaining_time": "0:07:06", "throughput": 5556.33, "total_tokens": 16277904}
|
|
{"current_steps": 33075, "total_steps": 37885, "loss": 0.0001, "lr": 9.665188487220399e-08, "epoch": 4.365184109805992, "percentage": 87.3, "elapsed_time": "0:48:49", "remaining_time": "0:07:06", "throughput": 5556.64, "total_tokens": 16280720}
|
|
{"current_steps": 33080, "total_steps": 37885, "loss": 0.0, "lr": 9.64543835730619e-08, "epoch": 4.36584400158374, "percentage": 87.32, "elapsed_time": "0:48:50", "remaining_time": "0:07:05", "throughput": 5556.8, "total_tokens": 16283088}
|
|
{"current_steps": 33085, "total_steps": 37885, "loss": 0.0, "lr": 9.625707404493399e-08, "epoch": 4.366503893361489, "percentage": 87.33, "elapsed_time": "0:48:50", "remaining_time": "0:07:05", "throughput": 5556.99, "total_tokens": 16285520}
|
|
{"current_steps": 33090, "total_steps": 37885, "loss": 0.0001, "lr": 9.605995632969787e-08, "epoch": 4.367163785139237, "percentage": 87.34, "elapsed_time": "0:48:50", "remaining_time": "0:07:04", "throughput": 5557.16, "total_tokens": 16287888}
|
|
{"current_steps": 33095, "total_steps": 37885, "loss": 0.0226, "lr": 9.586303046919008e-08, "epoch": 4.367823676916986, "percentage": 87.36, "elapsed_time": "0:48:51", "remaining_time": "0:07:04", "throughput": 5557.32, "total_tokens": 16290256}
|
|
{"current_steps": 33100, "total_steps": 37885, "loss": 0.0, "lr": 9.566629650520675e-08, "epoch": 4.368483568694734, "percentage": 87.37, "elapsed_time": "0:48:51", "remaining_time": "0:07:03", "throughput": 5557.45, "total_tokens": 16292496}
|
|
{"current_steps": 33105, "total_steps": 37885, "loss": 0.0001, "lr": 9.546975447950345e-08, "epoch": 4.369143460472483, "percentage": 87.38, "elapsed_time": "0:48:51", "remaining_time": "0:07:03", "throughput": 5557.64, "total_tokens": 16294864}
|
|
{"current_steps": 33110, "total_steps": 37885, "loss": 0.0, "lr": 9.527340443379461e-08, "epoch": 4.369803352250231, "percentage": 87.4, "elapsed_time": "0:48:52", "remaining_time": "0:07:02", "throughput": 5557.94, "total_tokens": 16297616}
|
|
{"current_steps": 33115, "total_steps": 37885, "loss": 0.0, "lr": 9.507724640975412e-08, "epoch": 4.370463244027979, "percentage": 87.41, "elapsed_time": "0:48:52", "remaining_time": "0:07:02", "throughput": 5558.14, "total_tokens": 16300048}
|
|
{"current_steps": 33120, "total_steps": 37885, "loss": 0.0, "lr": 9.488128044901511e-08, "epoch": 4.371123135805728, "percentage": 87.42, "elapsed_time": "0:48:52", "remaining_time": "0:07:01", "throughput": 5558.39, "total_tokens": 16302608}
|
|
{"current_steps": 33125, "total_steps": 37885, "loss": 0.0715, "lr": 9.468550659317009e-08, "epoch": 4.371783027583477, "percentage": 87.44, "elapsed_time": "0:48:53", "remaining_time": "0:07:01", "throughput": 5558.66, "total_tokens": 16305232}
|
|
{"current_steps": 33130, "total_steps": 37885, "loss": 0.028, "lr": 9.44899248837705e-08, "epoch": 4.372442919361225, "percentage": 87.45, "elapsed_time": "0:48:53", "remaining_time": "0:07:01", "throughput": 5558.83, "total_tokens": 16307536}
|
|
{"current_steps": 33135, "total_steps": 37885, "loss": 0.0003, "lr": 9.4294535362327e-08, "epoch": 4.373102811138973, "percentage": 87.46, "elapsed_time": "0:48:53", "remaining_time": "0:07:00", "throughput": 5559.07, "total_tokens": 16310160}
|
|
{"current_steps": 33140, "total_steps": 37885, "loss": 0.0001, "lr": 9.409933807031012e-08, "epoch": 4.373762702916721, "percentage": 87.48, "elapsed_time": "0:48:54", "remaining_time": "0:07:00", "throughput": 5559.39, "total_tokens": 16312976}
|
|
{"current_steps": 33145, "total_steps": 37885, "loss": 0.0, "lr": 9.390433304914846e-08, "epoch": 4.37442259469447, "percentage": 87.49, "elapsed_time": "0:48:54", "remaining_time": "0:06:59", "throughput": 5559.54, "total_tokens": 16315216}
|
|
{"current_steps": 33150, "total_steps": 37885, "loss": 0.0502, "lr": 9.370952034023061e-08, "epoch": 4.375082486472219, "percentage": 87.5, "elapsed_time": "0:48:54", "remaining_time": "0:06:59", "throughput": 5559.73, "total_tokens": 16317584}
|
|
{"current_steps": 33155, "total_steps": 37885, "loss": 0.0, "lr": 9.351489998490447e-08, "epoch": 4.375742378249967, "percentage": 87.51, "elapsed_time": "0:48:55", "remaining_time": "0:06:58", "throughput": 5559.92, "total_tokens": 16319952}
|
|
{"current_steps": 33160, "total_steps": 37885, "loss": 0.0, "lr": 9.332047202447635e-08, "epoch": 4.376402270027715, "percentage": 87.53, "elapsed_time": "0:48:55", "remaining_time": "0:06:58", "throughput": 5560.18, "total_tokens": 16322576}
|
|
{"current_steps": 33165, "total_steps": 37885, "loss": 0.0, "lr": 9.312623650021245e-08, "epoch": 4.377062161805464, "percentage": 87.54, "elapsed_time": "0:48:55", "remaining_time": "0:06:57", "throughput": 5560.48, "total_tokens": 16325328}
|
|
{"current_steps": 33170, "total_steps": 37885, "loss": 0.0001, "lr": 9.29321934533378e-08, "epoch": 4.377722053583212, "percentage": 87.55, "elapsed_time": "0:48:56", "remaining_time": "0:06:57", "throughput": 5560.63, "total_tokens": 16327568}
|
|
{"current_steps": 33175, "total_steps": 37885, "loss": 0.0, "lr": 9.273834292503668e-08, "epoch": 4.378381945360961, "percentage": 87.57, "elapsed_time": "0:48:56", "remaining_time": "0:06:56", "throughput": 5560.95, "total_tokens": 16330384}
|
|
{"current_steps": 33180, "total_steps": 37885, "loss": 0.0, "lr": 9.254468495645251e-08, "epoch": 4.379041837138709, "percentage": 87.58, "elapsed_time": "0:48:56", "remaining_time": "0:06:56", "throughput": 5561.1, "total_tokens": 16332624}
|
|
{"current_steps": 33185, "total_steps": 37885, "loss": 0.0, "lr": 9.235121958868731e-08, "epoch": 4.379701728916458, "percentage": 87.59, "elapsed_time": "0:48:57", "remaining_time": "0:06:56", "throughput": 5561.27, "total_tokens": 16334928}
|
|
{"current_steps": 33190, "total_steps": 37885, "loss": 0.0004, "lr": 9.215794686280343e-08, "epoch": 4.380361620694206, "percentage": 87.61, "elapsed_time": "0:48:57", "remaining_time": "0:06:55", "throughput": 5561.53, "total_tokens": 16337552}
|
|
{"current_steps": 33195, "total_steps": 37885, "loss": 0.0, "lr": 9.196486681982096e-08, "epoch": 4.381021512471954, "percentage": 87.62, "elapsed_time": "0:48:57", "remaining_time": "0:06:55", "throughput": 5561.78, "total_tokens": 16340112}
|
|
{"current_steps": 33200, "total_steps": 37885, "loss": 0.0001, "lr": 9.177197950072012e-08, "epoch": 4.381681404249703, "percentage": 87.63, "elapsed_time": "0:48:58", "remaining_time": "0:06:54", "throughput": 5561.95, "total_tokens": 16342416}
|
|
{"current_steps": 33205, "total_steps": 37885, "loss": 0.0366, "lr": 9.157928494644007e-08, "epoch": 4.382341296027452, "percentage": 87.65, "elapsed_time": "0:48:58", "remaining_time": "0:06:54", "throughput": 5561.69, "total_tokens": 16344912}
|
|
{"current_steps": 33210, "total_steps": 37885, "loss": 0.0001, "lr": 9.138678319787818e-08, "epoch": 4.3830011878052, "percentage": 87.66, "elapsed_time": "0:48:59", "remaining_time": "0:06:53", "throughput": 5562.01, "total_tokens": 16347728}
|
|
{"current_steps": 33215, "total_steps": 37885, "loss": 0.0, "lr": 9.119447429589212e-08, "epoch": 4.383661079582948, "percentage": 87.67, "elapsed_time": "0:48:59", "remaining_time": "0:06:53", "throughput": 5562.28, "total_tokens": 16350352}
|
|
{"current_steps": 33220, "total_steps": 37885, "loss": 0.0, "lr": 9.100235828129743e-08, "epoch": 4.3843209713606965, "percentage": 87.69, "elapsed_time": "0:48:59", "remaining_time": "0:06:52", "throughput": 5562.49, "total_tokens": 16352784}
|
|
{"current_steps": 33225, "total_steps": 37885, "loss": 0.0066, "lr": 9.08104351948702e-08, "epoch": 4.384980863138446, "percentage": 87.7, "elapsed_time": "0:49:00", "remaining_time": "0:06:52", "throughput": 5562.73, "total_tokens": 16355344}
|
|
{"current_steps": 33230, "total_steps": 37885, "loss": 0.0003, "lr": 9.061870507734426e-08, "epoch": 4.385640754916194, "percentage": 87.71, "elapsed_time": "0:49:00", "remaining_time": "0:06:51", "throughput": 5562.92, "total_tokens": 16357712}
|
|
{"current_steps": 33235, "total_steps": 37885, "loss": 0.0, "lr": 9.042716796941275e-08, "epoch": 4.386300646693942, "percentage": 87.73, "elapsed_time": "0:49:00", "remaining_time": "0:06:51", "throughput": 5563.12, "total_tokens": 16360144}
|
|
{"current_steps": 33240, "total_steps": 37885, "loss": 0.0, "lr": 9.023582391172813e-08, "epoch": 4.3869605384716905, "percentage": 87.74, "elapsed_time": "0:49:01", "remaining_time": "0:06:50", "throughput": 5563.33, "total_tokens": 16362576}
|
|
{"current_steps": 33245, "total_steps": 37885, "loss": 0.0, "lr": 9.004467294490203e-08, "epoch": 4.387620430249439, "percentage": 87.75, "elapsed_time": "0:49:01", "remaining_time": "0:06:50", "throughput": 5563.55, "total_tokens": 16365072}
|
|
{"current_steps": 33250, "total_steps": 37885, "loss": 0.0, "lr": 8.98537151095048e-08, "epoch": 4.388280322027187, "percentage": 87.77, "elapsed_time": "0:49:01", "remaining_time": "0:06:50", "throughput": 5563.77, "total_tokens": 16367568}
|
|
{"current_steps": 33255, "total_steps": 37885, "loss": 0.0, "lr": 8.966295044606565e-08, "epoch": 4.388940213804936, "percentage": 87.78, "elapsed_time": "0:49:02", "remaining_time": "0:06:49", "throughput": 5564.02, "total_tokens": 16370128}
|
|
{"current_steps": 33260, "total_steps": 37885, "loss": 0.0, "lr": 8.94723789950731e-08, "epoch": 4.3896001055826845, "percentage": 87.79, "elapsed_time": "0:49:02", "remaining_time": "0:06:49", "throughput": 5564.26, "total_tokens": 16372688}
|
|
{"current_steps": 33265, "total_steps": 37885, "loss": 0.0, "lr": 8.928200079697479e-08, "epoch": 4.390259997360433, "percentage": 87.81, "elapsed_time": "0:49:02", "remaining_time": "0:06:48", "throughput": 5564.47, "total_tokens": 16375120}
|
|
{"current_steps": 33270, "total_steps": 37885, "loss": 0.0006, "lr": 8.909181589217674e-08, "epoch": 4.390919889138181, "percentage": 87.82, "elapsed_time": "0:49:03", "remaining_time": "0:06:48", "throughput": 5564.7, "total_tokens": 16377616}
|
|
{"current_steps": 33275, "total_steps": 37885, "loss": 0.0, "lr": 8.890182432104443e-08, "epoch": 4.391579780915929, "percentage": 87.83, "elapsed_time": "0:49:03", "remaining_time": "0:06:47", "throughput": 5564.93, "total_tokens": 16380112}
|
|
{"current_steps": 33280, "total_steps": 37885, "loss": 0.0, "lr": 8.871202612390249e-08, "epoch": 4.3922396726936785, "percentage": 87.84, "elapsed_time": "0:49:03", "remaining_time": "0:06:47", "throughput": 5565.13, "total_tokens": 16382544}
|
|
{"current_steps": 33285, "total_steps": 37885, "loss": 0.0, "lr": 8.852242134103383e-08, "epoch": 4.392899564471427, "percentage": 87.86, "elapsed_time": "0:49:04", "remaining_time": "0:06:46", "throughput": 5565.37, "total_tokens": 16385104}
|
|
{"current_steps": 33290, "total_steps": 37885, "loss": 0.0, "lr": 8.833301001268078e-08, "epoch": 4.393559456249175, "percentage": 87.87, "elapsed_time": "0:49:04", "remaining_time": "0:06:46", "throughput": 5565.58, "total_tokens": 16387536}
|
|
{"current_steps": 33295, "total_steps": 37885, "loss": 0.0188, "lr": 8.814379217904455e-08, "epoch": 4.394219348026923, "percentage": 87.88, "elapsed_time": "0:49:04", "remaining_time": "0:06:45", "throughput": 5565.75, "total_tokens": 16389840}
|
|
{"current_steps": 33300, "total_steps": 37885, "loss": 0.0153, "lr": 8.795476788028555e-08, "epoch": 4.394879239804672, "percentage": 87.9, "elapsed_time": "0:49:05", "remaining_time": "0:06:45", "throughput": 5565.9, "total_tokens": 16392080}
|
|
{"current_steps": 33305, "total_steps": 37885, "loss": 0.0001, "lr": 8.776593715652226e-08, "epoch": 4.395539131582421, "percentage": 87.91, "elapsed_time": "0:49:05", "remaining_time": "0:06:45", "throughput": 5566.06, "total_tokens": 16394384}
|
|
{"current_steps": 33310, "total_steps": 37885, "loss": 0.002, "lr": 8.757730004783303e-08, "epoch": 4.396199023360169, "percentage": 87.92, "elapsed_time": "0:49:05", "remaining_time": "0:06:44", "throughput": 5566.35, "total_tokens": 16397072}
|
|
{"current_steps": 33315, "total_steps": 37885, "loss": 0.0626, "lr": 8.738885659425477e-08, "epoch": 4.396858915137917, "percentage": 87.94, "elapsed_time": "0:49:06", "remaining_time": "0:06:44", "throughput": 5566.61, "total_tokens": 16399696}
|
|
{"current_steps": 33320, "total_steps": 37885, "loss": 0.0, "lr": 8.72006068357829e-08, "epoch": 4.397518806915666, "percentage": 87.95, "elapsed_time": "0:49:06", "remaining_time": "0:06:43", "throughput": 5566.86, "total_tokens": 16402256}
|
|
{"current_steps": 33325, "total_steps": 37885, "loss": 0.0001, "lr": 8.701255081237225e-08, "epoch": 4.398178698693414, "percentage": 87.96, "elapsed_time": "0:49:06", "remaining_time": "0:06:43", "throughput": 5567.14, "total_tokens": 16404944}
|
|
{"current_steps": 33330, "total_steps": 37885, "loss": 0.0, "lr": 8.682468856393654e-08, "epoch": 4.398838590471163, "percentage": 87.98, "elapsed_time": "0:49:07", "remaining_time": "0:06:42", "throughput": 5567.31, "total_tokens": 16407248}
|
|
{"current_steps": 33335, "total_steps": 37885, "loss": 0.0, "lr": 8.66370201303478e-08, "epoch": 4.399498482248911, "percentage": 87.99, "elapsed_time": "0:49:07", "remaining_time": "0:06:42", "throughput": 5567.43, "total_tokens": 16409424}
|
|
{"current_steps": 33340, "total_steps": 37885, "loss": 0.0004, "lr": 8.644954555143757e-08, "epoch": 4.40015837402666, "percentage": 88.0, "elapsed_time": "0:49:07", "remaining_time": "0:06:41", "throughput": 5567.7, "total_tokens": 16412048}
|
|
{"current_steps": 33345, "total_steps": 37885, "loss": 0.0002, "lr": 8.626226486699573e-08, "epoch": 4.400818265804408, "percentage": 88.02, "elapsed_time": "0:49:08", "remaining_time": "0:06:41", "throughput": 5567.98, "total_tokens": 16414736}
|
|
{"current_steps": 33350, "total_steps": 37885, "loss": 0.0, "lr": 8.607517811677168e-08, "epoch": 4.401478157582156, "percentage": 88.03, "elapsed_time": "0:49:08", "remaining_time": "0:06:40", "throughput": 5568.21, "total_tokens": 16417232}
|
|
{"current_steps": 33355, "total_steps": 37885, "loss": 0.0188, "lr": 8.588828534047276e-08, "epoch": 4.402138049359905, "percentage": 88.04, "elapsed_time": "0:49:08", "remaining_time": "0:06:40", "throughput": 5568.43, "total_tokens": 16419728}
|
|
{"current_steps": 33360, "total_steps": 37885, "loss": 0.0, "lr": 8.570158657776582e-08, "epoch": 4.402797941137654, "percentage": 88.06, "elapsed_time": "0:49:09", "remaining_time": "0:06:40", "throughput": 5568.67, "total_tokens": 16422288}
|
|
{"current_steps": 33365, "total_steps": 37885, "loss": 0.0782, "lr": 8.551508186827639e-08, "epoch": 4.403457832915402, "percentage": 88.07, "elapsed_time": "0:49:09", "remaining_time": "0:06:39", "throughput": 5568.89, "total_tokens": 16424784}
|
|
{"current_steps": 33370, "total_steps": 37885, "loss": 0.0, "lr": 8.532877125158854e-08, "epoch": 4.40411772469315, "percentage": 88.08, "elapsed_time": "0:49:09", "remaining_time": "0:06:39", "throughput": 5569.11, "total_tokens": 16427280}
|
|
{"current_steps": 33375, "total_steps": 37885, "loss": 0.0366, "lr": 8.514265476724547e-08, "epoch": 4.4047776164708985, "percentage": 88.1, "elapsed_time": "0:49:10", "remaining_time": "0:06:38", "throughput": 5569.35, "total_tokens": 16429840}
|
|
{"current_steps": 33380, "total_steps": 37885, "loss": 0.0, "lr": 8.49567324547491e-08, "epoch": 4.405437508248648, "percentage": 88.11, "elapsed_time": "0:49:10", "remaining_time": "0:06:38", "throughput": 5569.54, "total_tokens": 16432208}
|
|
{"current_steps": 33385, "total_steps": 37885, "loss": 0.0001, "lr": 8.47710043535601e-08, "epoch": 4.406097400026396, "percentage": 88.12, "elapsed_time": "0:49:10", "remaining_time": "0:06:37", "throughput": 5569.84, "total_tokens": 16434960}
|
|
{"current_steps": 33390, "total_steps": 37885, "loss": 0.0, "lr": 8.458547050309794e-08, "epoch": 4.406757291804144, "percentage": 88.14, "elapsed_time": "0:49:11", "remaining_time": "0:06:37", "throughput": 5570.1, "total_tokens": 16437584}
|
|
{"current_steps": 33395, "total_steps": 37885, "loss": 0.0, "lr": 8.440013094274035e-08, "epoch": 4.4074171835818925, "percentage": 88.15, "elapsed_time": "0:49:11", "remaining_time": "0:06:36", "throughput": 5570.35, "total_tokens": 16440144}
|
|
{"current_steps": 33400, "total_steps": 37885, "loss": 0.0004, "lr": 8.421498571182517e-08, "epoch": 4.408077075359641, "percentage": 88.16, "elapsed_time": "0:49:11", "remaining_time": "0:06:36", "throughput": 5570.59, "total_tokens": 16442704}
|
|
{"current_steps": 33405, "total_steps": 37885, "loss": 0.0, "lr": 8.403003484964743e-08, "epoch": 4.40873696713739, "percentage": 88.17, "elapsed_time": "0:49:12", "remaining_time": "0:06:35", "throughput": 5570.75, "total_tokens": 16445008}
|
|
{"current_steps": 33410, "total_steps": 37885, "loss": 0.0, "lr": 8.384527839546196e-08, "epoch": 4.409396858915138, "percentage": 88.19, "elapsed_time": "0:49:12", "remaining_time": "0:06:35", "throughput": 5570.9, "total_tokens": 16447248}
|
|
{"current_steps": 33415, "total_steps": 37885, "loss": 0.0, "lr": 8.366071638848183e-08, "epoch": 4.4100567506928865, "percentage": 88.2, "elapsed_time": "0:49:12", "remaining_time": "0:06:34", "throughput": 5571.23, "total_tokens": 16450128}
|
|
{"current_steps": 33420, "total_steps": 37885, "loss": 0.028, "lr": 8.347634886787901e-08, "epoch": 4.410716642470635, "percentage": 88.21, "elapsed_time": "0:49:13", "remaining_time": "0:06:34", "throughput": 5571.49, "total_tokens": 16452752}
|
|
{"current_steps": 33425, "total_steps": 37885, "loss": 0.0, "lr": 8.329217587278437e-08, "epoch": 4.411376534248383, "percentage": 88.23, "elapsed_time": "0:49:13", "remaining_time": "0:06:34", "throughput": 5571.71, "total_tokens": 16455248}
|
|
{"current_steps": 33430, "total_steps": 37885, "loss": 0.0, "lr": 8.310819744228691e-08, "epoch": 4.412036426026131, "percentage": 88.24, "elapsed_time": "0:49:13", "remaining_time": "0:06:33", "throughput": 5571.9, "total_tokens": 16457616}
|
|
{"current_steps": 33435, "total_steps": 37885, "loss": 0.0001, "lr": 8.29244136154349e-08, "epoch": 4.4126963178038805, "percentage": 88.25, "elapsed_time": "0:49:14", "remaining_time": "0:06:33", "throughput": 5572.08, "total_tokens": 16459984}
|
|
{"current_steps": 33440, "total_steps": 37885, "loss": 0.0, "lr": 8.274082443123543e-08, "epoch": 4.413356209581629, "percentage": 88.27, "elapsed_time": "0:49:14", "remaining_time": "0:06:32", "throughput": 5572.3, "total_tokens": 16462480}
|
|
{"current_steps": 33445, "total_steps": 37885, "loss": 0.0308, "lr": 8.255742992865356e-08, "epoch": 4.414016101359377, "percentage": 88.28, "elapsed_time": "0:49:14", "remaining_time": "0:06:32", "throughput": 5572.55, "total_tokens": 16465040}
|
|
{"current_steps": 33450, "total_steps": 37885, "loss": 0.0, "lr": 8.237423014661348e-08, "epoch": 4.414675993137125, "percentage": 88.29, "elapsed_time": "0:49:15", "remaining_time": "0:06:31", "throughput": 5572.83, "total_tokens": 16467728}
|
|
{"current_steps": 33455, "total_steps": 37885, "loss": 0.0, "lr": 8.219122512399813e-08, "epoch": 4.415335884914874, "percentage": 88.31, "elapsed_time": "0:49:15", "remaining_time": "0:06:31", "throughput": 5572.97, "total_tokens": 16469968}
|
|
{"current_steps": 33460, "total_steps": 37885, "loss": 0.0002, "lr": 8.200841489964927e-08, "epoch": 4.415995776692623, "percentage": 88.32, "elapsed_time": "0:49:15", "remaining_time": "0:06:30", "throughput": 5573.23, "total_tokens": 16472592}
|
|
{"current_steps": 33465, "total_steps": 37885, "loss": 0.0, "lr": 8.182579951236657e-08, "epoch": 4.416655668470371, "percentage": 88.33, "elapsed_time": "0:49:15", "remaining_time": "0:06:30", "throughput": 5573.43, "total_tokens": 16475024}
|
|
{"current_steps": 33470, "total_steps": 37885, "loss": 0.0, "lr": 8.164337900090901e-08, "epoch": 4.417315560248119, "percentage": 88.35, "elapsed_time": "0:49:16", "remaining_time": "0:06:29", "throughput": 5573.65, "total_tokens": 16477520}
|
|
{"current_steps": 33475, "total_steps": 37885, "loss": 0.0003, "lr": 8.146115340399418e-08, "epoch": 4.417975452025868, "percentage": 88.36, "elapsed_time": "0:49:16", "remaining_time": "0:06:29", "throughput": 5573.88, "total_tokens": 16480016}
|
|
{"current_steps": 33480, "total_steps": 37885, "loss": 0.0, "lr": 8.127912276029781e-08, "epoch": 4.418635343803616, "percentage": 88.37, "elapsed_time": "0:49:16", "remaining_time": "0:06:29", "throughput": 5574.02, "total_tokens": 16482256}
|
|
{"current_steps": 33485, "total_steps": 37885, "loss": 0.0, "lr": 8.109728710845488e-08, "epoch": 4.419295235581365, "percentage": 88.39, "elapsed_time": "0:49:17", "remaining_time": "0:06:28", "throughput": 5574.16, "total_tokens": 16484496}
|
|
{"current_steps": 33490, "total_steps": 37885, "loss": 0.0, "lr": 8.091564648705874e-08, "epoch": 4.419955127359113, "percentage": 88.4, "elapsed_time": "0:49:17", "remaining_time": "0:06:28", "throughput": 5574.34, "total_tokens": 16486864}
|
|
{"current_steps": 33495, "total_steps": 37885, "loss": 0.0006, "lr": 8.073420093466087e-08, "epoch": 4.420615019136862, "percentage": 88.41, "elapsed_time": "0:49:17", "remaining_time": "0:06:27", "throughput": 5574.51, "total_tokens": 16489168}
|
|
{"current_steps": 33500, "total_steps": 37885, "loss": 0.0426, "lr": 8.055295048977218e-08, "epoch": 4.42127491091461, "percentage": 88.43, "elapsed_time": "0:49:18", "remaining_time": "0:06:27", "throughput": 5574.77, "total_tokens": 16491792}
|
|
{"current_steps": 33505, "total_steps": 37885, "loss": 0.0472, "lr": 8.037189519086163e-08, "epoch": 4.421934802692358, "percentage": 88.44, "elapsed_time": "0:49:18", "remaining_time": "0:06:26", "throughput": 5574.93, "total_tokens": 16494096}
|
|
{"current_steps": 33510, "total_steps": 37885, "loss": 0.0001, "lr": 8.019103507635704e-08, "epoch": 4.4225946944701064, "percentage": 88.45, "elapsed_time": "0:49:18", "remaining_time": "0:06:26", "throughput": 5575.2, "total_tokens": 16496720}
|
|
{"current_steps": 33515, "total_steps": 37885, "loss": 0.0, "lr": 8.00103701846443e-08, "epoch": 4.423254586247856, "percentage": 88.47, "elapsed_time": "0:49:19", "remaining_time": "0:06:25", "throughput": 5575.4, "total_tokens": 16499152}
|
|
{"current_steps": 33520, "total_steps": 37885, "loss": 0.0025, "lr": 7.982990055406846e-08, "epoch": 4.423914478025604, "percentage": 88.48, "elapsed_time": "0:49:19", "remaining_time": "0:06:25", "throughput": 5575.58, "total_tokens": 16501520}
|
|
{"current_steps": 33525, "total_steps": 37885, "loss": 0.0001, "lr": 7.964962622293314e-08, "epoch": 4.424574369803352, "percentage": 88.49, "elapsed_time": "0:49:19", "remaining_time": "0:06:24", "throughput": 5575.74, "total_tokens": 16503824}
|
|
{"current_steps": 33530, "total_steps": 37885, "loss": 0.0, "lr": 7.946954722949972e-08, "epoch": 4.4252342615811004, "percentage": 88.5, "elapsed_time": "0:49:20", "remaining_time": "0:06:24", "throughput": 5576.03, "total_tokens": 16506512}
|
|
{"current_steps": 33535, "total_steps": 37885, "loss": 0.0, "lr": 7.928966361198897e-08, "epoch": 4.425894153358849, "percentage": 88.52, "elapsed_time": "0:49:20", "remaining_time": "0:06:24", "throughput": 5576.2, "total_tokens": 16508880}
|
|
{"current_steps": 33540, "total_steps": 37885, "loss": 0.0001, "lr": 7.910997540858011e-08, "epoch": 4.426554045136598, "percentage": 88.53, "elapsed_time": "0:49:20", "remaining_time": "0:06:23", "throughput": 5576.35, "total_tokens": 16511120}
|
|
{"current_steps": 33545, "total_steps": 37885, "loss": 0.0, "lr": 7.89304826574102e-08, "epoch": 4.427213936914346, "percentage": 88.54, "elapsed_time": "0:49:21", "remaining_time": "0:06:23", "throughput": 5576.51, "total_tokens": 16513424}
|
|
{"current_steps": 33550, "total_steps": 37885, "loss": 0.0, "lr": 7.875118539657566e-08, "epoch": 4.4278738286920944, "percentage": 88.56, "elapsed_time": "0:49:21", "remaining_time": "0:06:22", "throughput": 5576.65, "total_tokens": 16515664}
|
|
{"current_steps": 33555, "total_steps": 37885, "loss": 0.0, "lr": 7.857208366413048e-08, "epoch": 4.428533720469843, "percentage": 88.57, "elapsed_time": "0:49:21", "remaining_time": "0:06:22", "throughput": 5576.88, "total_tokens": 16518224}
|
|
{"current_steps": 33560, "total_steps": 37885, "loss": 0.0006, "lr": 7.839317749808838e-08, "epoch": 4.429193612247591, "percentage": 88.58, "elapsed_time": "0:49:22", "remaining_time": "0:06:21", "throughput": 5577.05, "total_tokens": 16520528}
|
|
{"current_steps": 33565, "total_steps": 37885, "loss": 0.0, "lr": 7.821446693642064e-08, "epoch": 4.42985350402534, "percentage": 88.6, "elapsed_time": "0:49:22", "remaining_time": "0:06:21", "throughput": 5577.23, "total_tokens": 16522896}
|
|
{"current_steps": 33570, "total_steps": 37885, "loss": 0.0, "lr": 7.803595201705692e-08, "epoch": 4.4305133958030885, "percentage": 88.61, "elapsed_time": "0:49:22", "remaining_time": "0:06:20", "throughput": 5577.46, "total_tokens": 16525392}
|
|
{"current_steps": 33575, "total_steps": 37885, "loss": 0.0001, "lr": 7.785763277788648e-08, "epoch": 4.431173287580837, "percentage": 88.62, "elapsed_time": "0:49:23", "remaining_time": "0:06:20", "throughput": 5577.69, "total_tokens": 16527952}
|
|
{"current_steps": 33580, "total_steps": 37885, "loss": 0.0, "lr": 7.767950925675559e-08, "epoch": 4.431833179358585, "percentage": 88.64, "elapsed_time": "0:49:23", "remaining_time": "0:06:19", "throughput": 5577.9, "total_tokens": 16530384}
|
|
{"current_steps": 33585, "total_steps": 37885, "loss": 0.0, "lr": 7.750158149147012e-08, "epoch": 4.432493071136333, "percentage": 88.65, "elapsed_time": "0:49:23", "remaining_time": "0:06:19", "throughput": 5578.08, "total_tokens": 16532752}
|
|
{"current_steps": 33590, "total_steps": 37885, "loss": 0.0, "lr": 7.732384951979354e-08, "epoch": 4.4331529629140825, "percentage": 88.66, "elapsed_time": "0:49:24", "remaining_time": "0:06:19", "throughput": 5578.31, "total_tokens": 16535248}
|
|
{"current_steps": 33595, "total_steps": 37885, "loss": 0.0, "lr": 7.714631337944854e-08, "epoch": 4.433812854691831, "percentage": 88.68, "elapsed_time": "0:49:24", "remaining_time": "0:06:18", "throughput": 5578.51, "total_tokens": 16537680}
|
|
{"current_steps": 33600, "total_steps": 37885, "loss": 0.0, "lr": 7.696897310811579e-08, "epoch": 4.434472746469579, "percentage": 88.69, "elapsed_time": "0:49:24", "remaining_time": "0:06:18", "throughput": 5578.77, "total_tokens": 16540304}
|
|
{"current_steps": 33605, "total_steps": 37885, "loss": 0.092, "lr": 7.679182874343437e-08, "epoch": 4.435132638247327, "percentage": 88.7, "elapsed_time": "0:49:25", "remaining_time": "0:06:17", "throughput": 5579.06, "total_tokens": 16542992}
|
|
{"current_steps": 33610, "total_steps": 37885, "loss": 0.0072, "lr": 7.66148803230019e-08, "epoch": 4.435792530025076, "percentage": 88.72, "elapsed_time": "0:49:25", "remaining_time": "0:06:17", "throughput": 5579.32, "total_tokens": 16545616}
|
|
{"current_steps": 33615, "total_steps": 37885, "loss": 0.0002, "lr": 7.643812788437454e-08, "epoch": 4.436452421802825, "percentage": 88.73, "elapsed_time": "0:49:25", "remaining_time": "0:06:16", "throughput": 5579.52, "total_tokens": 16548048}
|
|
{"current_steps": 33620, "total_steps": 37885, "loss": 0.0, "lr": 7.626157146506651e-08, "epoch": 4.437112313580573, "percentage": 88.74, "elapsed_time": "0:49:26", "remaining_time": "0:06:16", "throughput": 5579.67, "total_tokens": 16550288}
|
|
{"current_steps": 33625, "total_steps": 37885, "loss": 0.008, "lr": 7.608521110255084e-08, "epoch": 4.437772205358321, "percentage": 88.76, "elapsed_time": "0:49:26", "remaining_time": "0:06:15", "throughput": 5579.88, "total_tokens": 16552720}
|
|
{"current_steps": 33630, "total_steps": 37885, "loss": 0.0, "lr": 7.590904683425858e-08, "epoch": 4.43843209713607, "percentage": 88.77, "elapsed_time": "0:49:26", "remaining_time": "0:06:15", "throughput": 5580.04, "total_tokens": 16555024}
|
|
{"current_steps": 33635, "total_steps": 37885, "loss": 0.0, "lr": 7.57330786975795e-08, "epoch": 4.439091988913818, "percentage": 88.78, "elapsed_time": "0:49:27", "remaining_time": "0:06:14", "throughput": 5580.26, "total_tokens": 16557520}
|
|
{"current_steps": 33640, "total_steps": 37885, "loss": 0.0, "lr": 7.555730672986138e-08, "epoch": 4.439751880691567, "percentage": 88.8, "elapsed_time": "0:49:27", "remaining_time": "0:06:14", "throughput": 5580.43, "total_tokens": 16559824}
|
|
{"current_steps": 33645, "total_steps": 37885, "loss": 0.0, "lr": 7.53817309684106e-08, "epoch": 4.440411772469315, "percentage": 88.81, "elapsed_time": "0:49:27", "remaining_time": "0:06:14", "throughput": 5580.62, "total_tokens": 16562256}
|
|
{"current_steps": 33650, "total_steps": 37885, "loss": 0.0001, "lr": 7.520635145049193e-08, "epoch": 4.441071664247064, "percentage": 88.82, "elapsed_time": "0:49:28", "remaining_time": "0:06:13", "throughput": 5580.83, "total_tokens": 16564688}
|
|
{"current_steps": 33655, "total_steps": 37885, "loss": 0.028, "lr": 7.503116821332834e-08, "epoch": 4.441731556024812, "percentage": 88.83, "elapsed_time": "0:49:28", "remaining_time": "0:06:13", "throughput": 5580.97, "total_tokens": 16566928}
|
|
{"current_steps": 33660, "total_steps": 37885, "loss": 0.0, "lr": 7.485618129410109e-08, "epoch": 4.44239144780256, "percentage": 88.85, "elapsed_time": "0:49:28", "remaining_time": "0:06:12", "throughput": 5581.15, "total_tokens": 16569296}
|
|
{"current_steps": 33665, "total_steps": 37885, "loss": 0.0, "lr": 7.468139072994994e-08, "epoch": 4.443051339580309, "percentage": 88.86, "elapsed_time": "0:49:29", "remaining_time": "0:06:12", "throughput": 5581.35, "total_tokens": 16571728}
|
|
{"current_steps": 33670, "total_steps": 37885, "loss": 0.0015, "lr": 7.450679655797321e-08, "epoch": 4.443711231358058, "percentage": 88.87, "elapsed_time": "0:49:29", "remaining_time": "0:06:11", "throughput": 5581.55, "total_tokens": 16574160}
|
|
{"current_steps": 33675, "total_steps": 37885, "loss": 0.0018, "lr": 7.433239881522691e-08, "epoch": 4.444371123135806, "percentage": 88.89, "elapsed_time": "0:49:29", "remaining_time": "0:06:11", "throughput": 5581.67, "total_tokens": 16576336}
|
|
{"current_steps": 33680, "total_steps": 37885, "loss": 0.0001, "lr": 7.415819753872576e-08, "epoch": 4.445031014913554, "percentage": 88.9, "elapsed_time": "0:49:30", "remaining_time": "0:06:10", "throughput": 5581.88, "total_tokens": 16578768}
|
|
{"current_steps": 33685, "total_steps": 37885, "loss": 0.0, "lr": 7.398419276544287e-08, "epoch": 4.445690906691302, "percentage": 88.91, "elapsed_time": "0:49:30", "remaining_time": "0:06:10", "throughput": 5582.06, "total_tokens": 16581136}
|
|
{"current_steps": 33690, "total_steps": 37885, "loss": 0.0049, "lr": 7.381038453230925e-08, "epoch": 4.446350798469051, "percentage": 88.93, "elapsed_time": "0:49:30", "remaining_time": "0:06:09", "throughput": 5582.26, "total_tokens": 16583568}
|
|
{"current_steps": 33695, "total_steps": 37885, "loss": 0.028, "lr": 7.363677287621462e-08, "epoch": 4.4470106902468, "percentage": 88.94, "elapsed_time": "0:49:31", "remaining_time": "0:06:09", "throughput": 5582.47, "total_tokens": 16586000}
|
|
{"current_steps": 33700, "total_steps": 37885, "loss": 0.0, "lr": 7.346335783400693e-08, "epoch": 4.447670582024548, "percentage": 88.95, "elapsed_time": "0:49:31", "remaining_time": "0:06:09", "throughput": 5582.65, "total_tokens": 16588368}
|
|
{"current_steps": 33705, "total_steps": 37885, "loss": 0.0, "lr": 7.329013944249186e-08, "epoch": 4.448330473802296, "percentage": 88.97, "elapsed_time": "0:49:31", "remaining_time": "0:06:08", "throughput": 5582.83, "total_tokens": 16590736}
|
|
{"current_steps": 33710, "total_steps": 37885, "loss": 0.0, "lr": 7.311711773843399e-08, "epoch": 4.448990365580045, "percentage": 88.98, "elapsed_time": "0:49:32", "remaining_time": "0:06:08", "throughput": 5583.04, "total_tokens": 16593168}
|
|
{"current_steps": 33715, "total_steps": 37885, "loss": 0.0294, "lr": 7.294429275855596e-08, "epoch": 4.449650257357793, "percentage": 88.99, "elapsed_time": "0:49:32", "remaining_time": "0:06:07", "throughput": 5583.2, "total_tokens": 16595472}
|
|
{"current_steps": 33720, "total_steps": 37885, "loss": 0.0, "lr": 7.277166453953865e-08, "epoch": 4.450310149135542, "percentage": 89.01, "elapsed_time": "0:49:32", "remaining_time": "0:06:07", "throughput": 5583.3, "total_tokens": 16597584}
|
|
{"current_steps": 33725, "total_steps": 37885, "loss": 0.0005, "lr": 7.259923311802119e-08, "epoch": 4.45097004091329, "percentage": 89.02, "elapsed_time": "0:49:33", "remaining_time": "0:06:06", "throughput": 5583.53, "total_tokens": 16600080}
|
|
{"current_steps": 33730, "total_steps": 37885, "loss": 0.0, "lr": 7.242699853060041e-08, "epoch": 4.451629932691039, "percentage": 89.03, "elapsed_time": "0:49:33", "remaining_time": "0:06:06", "throughput": 5583.75, "total_tokens": 16602576}
|
|
{"current_steps": 33735, "total_steps": 37885, "loss": 0.0001, "lr": 7.225496081383264e-08, "epoch": 4.452289824468787, "percentage": 89.05, "elapsed_time": "0:49:33", "remaining_time": "0:06:05", "throughput": 5584.01, "total_tokens": 16605200}
|
|
{"current_steps": 33740, "total_steps": 37885, "loss": 0.0003, "lr": 7.2083120004231e-08, "epoch": 4.452949716246535, "percentage": 89.06, "elapsed_time": "0:49:34", "remaining_time": "0:06:05", "throughput": 5584.19, "total_tokens": 16607568}
|
|
{"current_steps": 33745, "total_steps": 37885, "loss": 0.0, "lr": 7.191147613826787e-08, "epoch": 4.453609608024284, "percentage": 89.07, "elapsed_time": "0:49:34", "remaining_time": "0:06:04", "throughput": 5584.53, "total_tokens": 16610448}
|
|
{"current_steps": 33750, "total_steps": 37885, "loss": 0.0001, "lr": 7.17400292523731e-08, "epoch": 4.454269499802033, "percentage": 89.09, "elapsed_time": "0:49:34", "remaining_time": "0:06:04", "throughput": 5584.81, "total_tokens": 16613136}
|
|
{"current_steps": 33755, "total_steps": 37885, "loss": 0.0, "lr": 7.156877938293515e-08, "epoch": 4.454929391579781, "percentage": 89.1, "elapsed_time": "0:49:35", "remaining_time": "0:06:04", "throughput": 5585.03, "total_tokens": 16615632}
|
|
{"current_steps": 33760, "total_steps": 37885, "loss": 0.0, "lr": 7.139772656630083e-08, "epoch": 4.455589283357529, "percentage": 89.11, "elapsed_time": "0:49:35", "remaining_time": "0:06:03", "throughput": 5585.27, "total_tokens": 16618192}
|
|
{"current_steps": 33765, "total_steps": 37885, "loss": 0.0, "lr": 7.122687083877422e-08, "epoch": 4.4562491751352775, "percentage": 89.12, "elapsed_time": "0:49:35", "remaining_time": "0:06:03", "throughput": 5585.43, "total_tokens": 16620496}
|
|
{"current_steps": 33770, "total_steps": 37885, "loss": 0.0381, "lr": 7.105621223661906e-08, "epoch": 4.456909066913026, "percentage": 89.14, "elapsed_time": "0:49:36", "remaining_time": "0:06:02", "throughput": 5585.61, "total_tokens": 16622864}
|
|
{"current_steps": 33775, "total_steps": 37885, "loss": 0.0, "lr": 7.088575079605585e-08, "epoch": 4.457568958690775, "percentage": 89.15, "elapsed_time": "0:49:36", "remaining_time": "0:06:02", "throughput": 5585.82, "total_tokens": 16625360}
|
|
{"current_steps": 33780, "total_steps": 37885, "loss": 0.0, "lr": 7.071548655326387e-08, "epoch": 4.458228850468523, "percentage": 89.16, "elapsed_time": "0:49:36", "remaining_time": "0:06:01", "throughput": 5586.05, "total_tokens": 16627856}
|
|
{"current_steps": 33785, "total_steps": 37885, "loss": 0.0, "lr": 7.054541954438053e-08, "epoch": 4.4588887422462715, "percentage": 89.18, "elapsed_time": "0:49:37", "remaining_time": "0:06:01", "throughput": 5586.33, "total_tokens": 16630544}
|
|
{"current_steps": 33790, "total_steps": 37885, "loss": 0.0, "lr": 7.03755498055012e-08, "epoch": 4.45954863402402, "percentage": 89.19, "elapsed_time": "0:49:37", "remaining_time": "0:06:00", "throughput": 5586.57, "total_tokens": 16633104}
|
|
{"current_steps": 33795, "total_steps": 37885, "loss": 0.0386, "lr": 7.02058773726798e-08, "epoch": 4.460208525801768, "percentage": 89.2, "elapsed_time": "0:49:37", "remaining_time": "0:06:00", "throughput": 5586.83, "total_tokens": 16635728}
|
|
{"current_steps": 33800, "total_steps": 37885, "loss": 0.0, "lr": 7.003640228192775e-08, "epoch": 4.460868417579517, "percentage": 89.22, "elapsed_time": "0:49:37", "remaining_time": "0:05:59", "throughput": 5586.95, "total_tokens": 16637904}
|
|
{"current_steps": 33805, "total_steps": 37885, "loss": 0.0, "lr": 6.986712456921506e-08, "epoch": 4.4615283093572655, "percentage": 89.23, "elapsed_time": "0:49:38", "remaining_time": "0:05:59", "throughput": 5587.12, "total_tokens": 16640208}
|
|
{"current_steps": 33810, "total_steps": 37885, "loss": 0.0, "lr": 6.969804427046988e-08, "epoch": 4.462188201135014, "percentage": 89.24, "elapsed_time": "0:49:38", "remaining_time": "0:05:59", "throughput": 5587.32, "total_tokens": 16642640}
|
|
{"current_steps": 33815, "total_steps": 37885, "loss": 0.0239, "lr": 6.952916142157783e-08, "epoch": 4.462848092912762, "percentage": 89.26, "elapsed_time": "0:49:38", "remaining_time": "0:05:58", "throughput": 5587.55, "total_tokens": 16645136}
|
|
{"current_steps": 33820, "total_steps": 37885, "loss": 0.0, "lr": 6.936047605838347e-08, "epoch": 4.46350798469051, "percentage": 89.27, "elapsed_time": "0:49:39", "remaining_time": "0:05:58", "throughput": 5587.69, "total_tokens": 16647376}
|
|
{"current_steps": 33825, "total_steps": 37885, "loss": 0.0, "lr": 6.919198821668892e-08, "epoch": 4.4641678764682595, "percentage": 89.28, "elapsed_time": "0:49:39", "remaining_time": "0:05:57", "throughput": 5587.83, "total_tokens": 16649616}
|
|
{"current_steps": 33830, "total_steps": 37885, "loss": 0.0, "lr": 6.902369793225437e-08, "epoch": 4.464827768246008, "percentage": 89.3, "elapsed_time": "0:49:39", "remaining_time": "0:05:57", "throughput": 5588.04, "total_tokens": 16652048}
|
|
{"current_steps": 33835, "total_steps": 37885, "loss": 0.0, "lr": 6.885560524079837e-08, "epoch": 4.465487660023756, "percentage": 89.31, "elapsed_time": "0:49:40", "remaining_time": "0:05:56", "throughput": 5588.26, "total_tokens": 16654544}
|
|
{"current_steps": 33840, "total_steps": 37885, "loss": 0.001, "lr": 6.868771017799735e-08, "epoch": 4.466147551801504, "percentage": 89.32, "elapsed_time": "0:49:40", "remaining_time": "0:05:56", "throughput": 5588.5, "total_tokens": 16657104}
|
|
{"current_steps": 33845, "total_steps": 37885, "loss": 0.0366, "lr": 6.852001277948593e-08, "epoch": 4.466807443579253, "percentage": 89.34, "elapsed_time": "0:49:40", "remaining_time": "0:05:55", "throughput": 5588.71, "total_tokens": 16659600}
|
|
{"current_steps": 33850, "total_steps": 37885, "loss": 0.0, "lr": 6.835251308085644e-08, "epoch": 4.467467335357002, "percentage": 89.35, "elapsed_time": "0:49:41", "remaining_time": "0:05:55", "throughput": 5589.01, "total_tokens": 16662352}
|
|
{"current_steps": 33855, "total_steps": 37885, "loss": 0.0, "lr": 6.818521111765952e-08, "epoch": 4.46812722713475, "percentage": 89.36, "elapsed_time": "0:49:41", "remaining_time": "0:05:54", "throughput": 5589.15, "total_tokens": 16664592}
|
|
{"current_steps": 33860, "total_steps": 37885, "loss": 0.0, "lr": 6.801810692540411e-08, "epoch": 4.468787118912498, "percentage": 89.38, "elapsed_time": "0:49:41", "remaining_time": "0:05:54", "throughput": 5589.4, "total_tokens": 16667216}
|
|
{"current_steps": 33865, "total_steps": 37885, "loss": 0.0001, "lr": 6.78512005395564e-08, "epoch": 4.469447010690247, "percentage": 89.39, "elapsed_time": "0:49:42", "remaining_time": "0:05:54", "throughput": 5589.64, "total_tokens": 16669776}
|
|
{"current_steps": 33870, "total_steps": 37885, "loss": 0.0, "lr": 6.768449199554127e-08, "epoch": 4.470106902467995, "percentage": 89.4, "elapsed_time": "0:49:42", "remaining_time": "0:05:53", "throughput": 5589.84, "total_tokens": 16672208}
|
|
{"current_steps": 33875, "total_steps": 37885, "loss": 0.0, "lr": 6.751798132874154e-08, "epoch": 4.470766794245744, "percentage": 89.42, "elapsed_time": "0:49:42", "remaining_time": "0:05:53", "throughput": 5590.0, "total_tokens": 16674512}
|
|
{"current_steps": 33880, "total_steps": 37885, "loss": 0.0002, "lr": 6.73516685744977e-08, "epoch": 4.471426686023492, "percentage": 89.43, "elapsed_time": "0:49:43", "remaining_time": "0:05:52", "throughput": 5590.16, "total_tokens": 16676816}
|
|
{"current_steps": 33885, "total_steps": 37885, "loss": 0.0192, "lr": 6.718555376810864e-08, "epoch": 4.472086577801241, "percentage": 89.44, "elapsed_time": "0:49:43", "remaining_time": "0:05:52", "throughput": 5590.4, "total_tokens": 16679376}
|
|
{"current_steps": 33890, "total_steps": 37885, "loss": 0.0, "lr": 6.70196369448306e-08, "epoch": 4.472746469578989, "percentage": 89.45, "elapsed_time": "0:49:43", "remaining_time": "0:05:51", "throughput": 5590.61, "total_tokens": 16681808}
|
|
{"current_steps": 33895, "total_steps": 37885, "loss": 0.0, "lr": 6.685391813987873e-08, "epoch": 4.473406361356737, "percentage": 89.47, "elapsed_time": "0:49:44", "remaining_time": "0:05:51", "throughput": 5590.9, "total_tokens": 16684560}
|
|
{"current_steps": 33900, "total_steps": 37885, "loss": 0.0239, "lr": 6.668839738842547e-08, "epoch": 4.474066253134486, "percentage": 89.48, "elapsed_time": "0:49:44", "remaining_time": "0:05:50", "throughput": 5591.12, "total_tokens": 16687056}
|
|
{"current_steps": 33905, "total_steps": 37885, "loss": 0.0666, "lr": 6.652307472560103e-08, "epoch": 4.474726144912235, "percentage": 89.49, "elapsed_time": "0:49:44", "remaining_time": "0:05:50", "throughput": 5591.3, "total_tokens": 16689424}
|
|
{"current_steps": 33910, "total_steps": 37885, "loss": 0.0, "lr": 6.635795018649459e-08, "epoch": 4.475386036689983, "percentage": 89.51, "elapsed_time": "0:49:45", "remaining_time": "0:05:49", "throughput": 5591.5, "total_tokens": 16691856}
|
|
{"current_steps": 33915, "total_steps": 37885, "loss": 0.0266, "lr": 6.61930238061521e-08, "epoch": 4.476045928467731, "percentage": 89.52, "elapsed_time": "0:49:45", "remaining_time": "0:05:49", "throughput": 5591.69, "total_tokens": 16694288}
|
|
{"current_steps": 33920, "total_steps": 37885, "loss": 0.0395, "lr": 6.602829561957846e-08, "epoch": 4.4767058202454795, "percentage": 89.53, "elapsed_time": "0:49:45", "remaining_time": "0:05:49", "throughput": 5591.96, "total_tokens": 16696976}
|
|
{"current_steps": 33925, "total_steps": 37885, "loss": 0.0, "lr": 6.586376566173556e-08, "epoch": 4.477365712023229, "percentage": 89.55, "elapsed_time": "0:49:46", "remaining_time": "0:05:48", "throughput": 5592.2, "total_tokens": 16699536}
|
|
{"current_steps": 33930, "total_steps": 37885, "loss": 0.0009, "lr": 6.569943396754396e-08, "epoch": 4.478025603800977, "percentage": 89.56, "elapsed_time": "0:49:46", "remaining_time": "0:05:48", "throughput": 5592.38, "total_tokens": 16701904}
|
|
{"current_steps": 33935, "total_steps": 37885, "loss": 0.0, "lr": 6.553530057188206e-08, "epoch": 4.478685495578725, "percentage": 89.57, "elapsed_time": "0:49:46", "remaining_time": "0:05:47", "throughput": 5592.56, "total_tokens": 16704272}
|
|
{"current_steps": 33940, "total_steps": 37885, "loss": 0.0, "lr": 6.537136550958545e-08, "epoch": 4.4793453873564735, "percentage": 89.59, "elapsed_time": "0:49:47", "remaining_time": "0:05:47", "throughput": 5592.82, "total_tokens": 16706896}
|
|
{"current_steps": 33945, "total_steps": 37885, "loss": 0.0016, "lr": 6.52076288154485e-08, "epoch": 4.480005279134222, "percentage": 89.6, "elapsed_time": "0:49:47", "remaining_time": "0:05:46", "throughput": 5592.92, "total_tokens": 16709008}
|
|
{"current_steps": 33950, "total_steps": 37885, "loss": 0.007, "lr": 6.504409052422332e-08, "epoch": 4.48066517091197, "percentage": 89.61, "elapsed_time": "0:49:47", "remaining_time": "0:05:46", "throughput": 5593.12, "total_tokens": 16711440}
|
|
{"current_steps": 33955, "total_steps": 37885, "loss": 0.0969, "lr": 6.488075067061927e-08, "epoch": 4.481325062689719, "percentage": 89.63, "elapsed_time": "0:49:48", "remaining_time": "0:05:45", "throughput": 5593.4, "total_tokens": 16714128}
|
|
{"current_steps": 33960, "total_steps": 37885, "loss": 0.0, "lr": 6.471760928930436e-08, "epoch": 4.4819849544674675, "percentage": 89.64, "elapsed_time": "0:49:48", "remaining_time": "0:05:45", "throughput": 5593.59, "total_tokens": 16716560}
|
|
{"current_steps": 33965, "total_steps": 37885, "loss": 0.0, "lr": 6.455466641490403e-08, "epoch": 4.482644846245216, "percentage": 89.65, "elapsed_time": "0:49:48", "remaining_time": "0:05:44", "throughput": 5593.83, "total_tokens": 16719120}
|
|
{"current_steps": 33970, "total_steps": 37885, "loss": 0.0, "lr": 6.439192208200195e-08, "epoch": 4.483304738022964, "percentage": 89.67, "elapsed_time": "0:49:49", "remaining_time": "0:05:44", "throughput": 5594.03, "total_tokens": 16721552}
|
|
{"current_steps": 33975, "total_steps": 37885, "loss": 0.0, "lr": 6.422937632513914e-08, "epoch": 4.483964629800712, "percentage": 89.68, "elapsed_time": "0:49:49", "remaining_time": "0:05:44", "throughput": 5594.33, "total_tokens": 16724304}
|
|
{"current_steps": 33980, "total_steps": 37885, "loss": 0.0, "lr": 6.40670291788149e-08, "epoch": 4.4846245215784615, "percentage": 89.69, "elapsed_time": "0:49:49", "remaining_time": "0:05:43", "throughput": 5594.6, "total_tokens": 16726992}
|
|
{"current_steps": 33985, "total_steps": 37885, "loss": 0.0, "lr": 6.390488067748634e-08, "epoch": 4.48528441335621, "percentage": 89.71, "elapsed_time": "0:49:50", "remaining_time": "0:05:43", "throughput": 5594.82, "total_tokens": 16729488}
|
|
{"current_steps": 33990, "total_steps": 37885, "loss": 0.0, "lr": 6.374293085556814e-08, "epoch": 4.485944305133958, "percentage": 89.72, "elapsed_time": "0:49:50", "remaining_time": "0:05:42", "throughput": 5595.02, "total_tokens": 16731920}
|
|
{"current_steps": 33995, "total_steps": 37885, "loss": 0.0, "lr": 6.358117974743293e-08, "epoch": 4.486604196911706, "percentage": 89.73, "elapsed_time": "0:49:50", "remaining_time": "0:05:42", "throughput": 5595.24, "total_tokens": 16734416}
|
|
{"current_steps": 34000, "total_steps": 37885, "loss": 0.0, "lr": 6.341962738741125e-08, "epoch": 4.487264088689455, "percentage": 89.75, "elapsed_time": "0:49:51", "remaining_time": "0:05:41", "throughput": 5595.51, "total_tokens": 16737104}
|
|
{"current_steps": 34005, "total_steps": 37885, "loss": 0.0, "lr": 6.325827380979176e-08, "epoch": 4.487923980467204, "percentage": 89.76, "elapsed_time": "0:49:51", "remaining_time": "0:05:41", "throughput": 5595.71, "total_tokens": 16739536}
|
|
{"current_steps": 34010, "total_steps": 37885, "loss": 0.0, "lr": 6.309711904882009e-08, "epoch": 4.488583872244952, "percentage": 89.77, "elapsed_time": "0:49:51", "remaining_time": "0:05:40", "throughput": 5595.83, "total_tokens": 16741712}
|
|
{"current_steps": 34015, "total_steps": 37885, "loss": 0.0, "lr": 6.293616313870032e-08, "epoch": 4.4892437640227, "percentage": 89.78, "elapsed_time": "0:49:52", "remaining_time": "0:05:40", "throughput": 5595.92, "total_tokens": 16743824}
|
|
{"current_steps": 34020, "total_steps": 37885, "loss": 0.0, "lr": 6.277540611359445e-08, "epoch": 4.489903655800449, "percentage": 89.8, "elapsed_time": "0:49:52", "remaining_time": "0:05:39", "throughput": 5596.12, "total_tokens": 16746256}
|
|
{"current_steps": 34025, "total_steps": 37885, "loss": 0.0, "lr": 6.261484800762163e-08, "epoch": 4.490563547578197, "percentage": 89.81, "elapsed_time": "0:49:52", "remaining_time": "0:05:39", "throughput": 5596.3, "total_tokens": 16748624}
|
|
{"current_steps": 34030, "total_steps": 37885, "loss": 0.0001, "lr": 6.245448885485938e-08, "epoch": 4.491223439355946, "percentage": 89.82, "elapsed_time": "0:49:53", "remaining_time": "0:05:39", "throughput": 5596.56, "total_tokens": 16751248}
|
|
{"current_steps": 34035, "total_steps": 37885, "loss": 0.0, "lr": 6.229432868934281e-08, "epoch": 4.491883331133694, "percentage": 89.84, "elapsed_time": "0:49:53", "remaining_time": "0:05:38", "throughput": 5596.75, "total_tokens": 16753680}
|
|
{"current_steps": 34040, "total_steps": 37885, "loss": 0.0001, "lr": 6.21343675450644e-08, "epoch": 4.492543222911443, "percentage": 89.85, "elapsed_time": "0:49:53", "remaining_time": "0:05:38", "throughput": 5596.99, "total_tokens": 16756240}
|
|
{"current_steps": 34045, "total_steps": 37885, "loss": 0.0001, "lr": 6.19746054559751e-08, "epoch": 4.493203114689191, "percentage": 89.86, "elapsed_time": "0:49:54", "remaining_time": "0:05:37", "throughput": 5597.19, "total_tokens": 16758672}
|
|
{"current_steps": 34050, "total_steps": 37885, "loss": 0.0, "lr": 6.181504245598312e-08, "epoch": 4.493863006466939, "percentage": 89.88, "elapsed_time": "0:49:54", "remaining_time": "0:05:37", "throughput": 5597.31, "total_tokens": 16760848}
|
|
{"current_steps": 34055, "total_steps": 37885, "loss": 0.0, "lr": 6.165567857895471e-08, "epoch": 4.494522898244687, "percentage": 89.89, "elapsed_time": "0:49:54", "remaining_time": "0:05:36", "throughput": 5597.53, "total_tokens": 16763344}
|
|
{"current_steps": 34060, "total_steps": 37885, "loss": 0.0005, "lr": 6.149651385871358e-08, "epoch": 4.495182790022437, "percentage": 89.9, "elapsed_time": "0:49:55", "remaining_time": "0:05:36", "throughput": 5597.76, "total_tokens": 16765904}
|
|
{"current_steps": 34065, "total_steps": 37885, "loss": 0.0, "lr": 6.133754832904092e-08, "epoch": 4.495842681800185, "percentage": 89.92, "elapsed_time": "0:49:55", "remaining_time": "0:05:35", "throughput": 5597.97, "total_tokens": 16768336}
|
|
{"current_steps": 34070, "total_steps": 37885, "loss": 0.0213, "lr": 6.117878202367677e-08, "epoch": 4.496502573577933, "percentage": 89.93, "elapsed_time": "0:49:55", "remaining_time": "0:05:35", "throughput": 5598.18, "total_tokens": 16770832}
|
|
{"current_steps": 34075, "total_steps": 37885, "loss": 0.0001, "lr": 6.102021497631749e-08, "epoch": 4.497162465355681, "percentage": 89.94, "elapsed_time": "0:49:56", "remaining_time": "0:05:34", "throughput": 5598.38, "total_tokens": 16773264}
|
|
{"current_steps": 34080, "total_steps": 37885, "loss": 0.0103, "lr": 6.086184722061826e-08, "epoch": 4.49782235713343, "percentage": 89.96, "elapsed_time": "0:49:56", "remaining_time": "0:05:34", "throughput": 5598.62, "total_tokens": 16775824}
|
|
{"current_steps": 34085, "total_steps": 37885, "loss": 0.0014, "lr": 6.070367879019101e-08, "epoch": 4.498482248911179, "percentage": 89.97, "elapsed_time": "0:49:56", "remaining_time": "0:05:34", "throughput": 5598.75, "total_tokens": 16778064}
|
|
{"current_steps": 34090, "total_steps": 37885, "loss": 0.0007, "lr": 6.054570971860618e-08, "epoch": 4.499142140688927, "percentage": 89.98, "elapsed_time": "0:49:57", "remaining_time": "0:05:33", "throughput": 5598.99, "total_tokens": 16780624}
|
|
{"current_steps": 34095, "total_steps": 37885, "loss": 0.0242, "lr": 6.038794003939151e-08, "epoch": 4.499802032466675, "percentage": 90.0, "elapsed_time": "0:49:57", "remaining_time": "0:05:33", "throughput": 5599.25, "total_tokens": 16783248}
|
|
{"current_steps": 34100, "total_steps": 37885, "loss": 0.0016, "lr": 6.023036978603213e-08, "epoch": 4.500461924244424, "percentage": 90.01, "elapsed_time": "0:49:57", "remaining_time": "0:05:32", "throughput": 5599.4, "total_tokens": 16785552}
|
|
{"current_steps": 34105, "total_steps": 37885, "loss": 0.0, "lr": 6.007299899197194e-08, "epoch": 4.501121816022172, "percentage": 90.02, "elapsed_time": "0:49:58", "remaining_time": "0:05:32", "throughput": 5599.52, "total_tokens": 16787728}
|
|
{"current_steps": 34110, "total_steps": 37885, "loss": 0.0, "lr": 5.991582769061121e-08, "epoch": 4.501781707799921, "percentage": 90.04, "elapsed_time": "0:49:58", "remaining_time": "0:05:31", "throughput": 5599.75, "total_tokens": 16790288}
|
|
{"current_steps": 34110, "total_steps": 37885, "eval_loss": 0.2763582170009613, "epoch": 4.501781707799921, "percentage": 90.04, "elapsed_time": "0:50:06", "remaining_time": "0:05:32", "throughput": 5585.2, "total_tokens": 16790288}
|
|
{"current_steps": 34115, "total_steps": 37885, "loss": 0.0, "lr": 5.975885591530827e-08, "epoch": 4.502441599577669, "percentage": 90.05, "elapsed_time": "0:50:42", "remaining_time": "0:05:36", "throughput": 5519.2, "total_tokens": 16792848}
|
|
{"current_steps": 34120, "total_steps": 37885, "loss": 0.0518, "lr": 5.9602083699379577e-08, "epoch": 4.503101491355418, "percentage": 90.06, "elapsed_time": "0:50:42", "remaining_time": "0:05:35", "throughput": 5519.44, "total_tokens": 16795408}
|
|
{"current_steps": 34125, "total_steps": 37885, "loss": 0.028, "lr": 5.9445511076098745e-08, "epoch": 4.503761383133166, "percentage": 90.08, "elapsed_time": "0:50:43", "remaining_time": "0:05:35", "throughput": 5519.72, "total_tokens": 16798096}
|
|
{"current_steps": 34130, "total_steps": 37885, "loss": 0.0, "lr": 5.92891380786974e-08, "epoch": 4.504421274910914, "percentage": 90.09, "elapsed_time": "0:50:43", "remaining_time": "0:05:34", "throughput": 5519.92, "total_tokens": 16800528}
|
|
{"current_steps": 34135, "total_steps": 37885, "loss": 0.0, "lr": 5.913296474036422e-08, "epoch": 4.505081166688663, "percentage": 90.1, "elapsed_time": "0:50:43", "remaining_time": "0:05:34", "throughput": 5520.14, "total_tokens": 16803024}
|
|
{"current_steps": 34140, "total_steps": 37885, "loss": 0.0, "lr": 5.8976991094246034e-08, "epoch": 4.505741058466412, "percentage": 90.11, "elapsed_time": "0:50:44", "remaining_time": "0:05:33", "throughput": 5520.34, "total_tokens": 16805456}
|
|
{"current_steps": 34145, "total_steps": 37885, "loss": 0.0005, "lr": 5.882121717344735e-08, "epoch": 4.50640095024416, "percentage": 90.13, "elapsed_time": "0:50:44", "remaining_time": "0:05:33", "throughput": 5520.46, "total_tokens": 16807632}
|
|
{"current_steps": 34150, "total_steps": 37885, "loss": 0.0, "lr": 5.866564301102972e-08, "epoch": 4.507060842021908, "percentage": 90.14, "elapsed_time": "0:50:44", "remaining_time": "0:05:33", "throughput": 5520.71, "total_tokens": 16810256}
|
|
{"current_steps": 34155, "total_steps": 37885, "loss": 0.0047, "lr": 5.851026864001263e-08, "epoch": 4.5077207337996565, "percentage": 90.15, "elapsed_time": "0:50:45", "remaining_time": "0:05:32", "throughput": 5521.01, "total_tokens": 16813008}
|
|
{"current_steps": 34160, "total_steps": 37885, "loss": 0.0294, "lr": 5.835509409337358e-08, "epoch": 4.508380625577406, "percentage": 90.17, "elapsed_time": "0:50:45", "remaining_time": "0:05:32", "throughput": 5521.18, "total_tokens": 16815376}
|
|
{"current_steps": 34165, "total_steps": 37885, "loss": 0.0, "lr": 5.820011940404668e-08, "epoch": 4.509040517355154, "percentage": 90.18, "elapsed_time": "0:50:45", "remaining_time": "0:05:31", "throughput": 5521.34, "total_tokens": 16817680}
|
|
{"current_steps": 34170, "total_steps": 37885, "loss": 0.0, "lr": 5.804534460492449e-08, "epoch": 4.509700409132902, "percentage": 90.19, "elapsed_time": "0:50:46", "remaining_time": "0:05:31", "throughput": 5521.62, "total_tokens": 16820368}
|
|
{"current_steps": 34175, "total_steps": 37885, "loss": 0.0, "lr": 5.789076972885687e-08, "epoch": 4.5103603009106505, "percentage": 90.21, "elapsed_time": "0:50:46", "remaining_time": "0:05:30", "throughput": 5521.78, "total_tokens": 16822672}
|
|
{"current_steps": 34180, "total_steps": 37885, "loss": 0.0, "lr": 5.7736394808651226e-08, "epoch": 4.511020192688399, "percentage": 90.22, "elapsed_time": "0:50:46", "remaining_time": "0:05:30", "throughput": 5521.94, "total_tokens": 16824976}
|
|
{"current_steps": 34185, "total_steps": 37885, "loss": 0.0, "lr": 5.758221987707235e-08, "epoch": 4.511680084466148, "percentage": 90.23, "elapsed_time": "0:50:47", "remaining_time": "0:05:29", "throughput": 5522.16, "total_tokens": 16827472}
|
|
{"current_steps": 34190, "total_steps": 37885, "loss": 0.0, "lr": 5.742824496684284e-08, "epoch": 4.512339976243896, "percentage": 90.25, "elapsed_time": "0:50:47", "remaining_time": "0:05:29", "throughput": 5522.34, "total_tokens": 16829840}
|
|
{"current_steps": 34195, "total_steps": 37885, "loss": 0.0, "lr": 5.72744701106429e-08, "epoch": 4.5129998680216445, "percentage": 90.26, "elapsed_time": "0:50:47", "remaining_time": "0:05:28", "throughput": 5522.58, "total_tokens": 16832400}
|
|
{"current_steps": 34200, "total_steps": 37885, "loss": 0.0016, "lr": 5.7120895341109864e-08, "epoch": 4.513659759799393, "percentage": 90.27, "elapsed_time": "0:50:48", "remaining_time": "0:05:28", "throughput": 5522.78, "total_tokens": 16834832}
|
|
{"current_steps": 34205, "total_steps": 37885, "loss": 0.0066, "lr": 5.696752069083899e-08, "epoch": 4.514319651577141, "percentage": 90.29, "elapsed_time": "0:50:48", "remaining_time": "0:05:27", "throughput": 5522.96, "total_tokens": 16837200}
|
|
{"current_steps": 34210, "total_steps": 37885, "loss": 0.0, "lr": 5.6814346192383125e-08, "epoch": 4.51497954335489, "percentage": 90.3, "elapsed_time": "0:50:48", "remaining_time": "0:05:27", "throughput": 5523.15, "total_tokens": 16839632}
|
|
{"current_steps": 34215, "total_steps": 37885, "loss": 0.0, "lr": 5.666137187825204e-08, "epoch": 4.5156394351326385, "percentage": 90.31, "elapsed_time": "0:50:49", "remaining_time": "0:05:27", "throughput": 5523.37, "total_tokens": 16842128}
|
|
{"current_steps": 34220, "total_steps": 37885, "loss": 0.0002, "lr": 5.650859778091388e-08, "epoch": 4.516299326910387, "percentage": 90.33, "elapsed_time": "0:50:49", "remaining_time": "0:05:26", "throughput": 5523.47, "total_tokens": 16844240}
|
|
{"current_steps": 34225, "total_steps": 37885, "loss": 0.0, "lr": 5.635602393279326e-08, "epoch": 4.516959218688135, "percentage": 90.34, "elapsed_time": "0:50:49", "remaining_time": "0:05:26", "throughput": 5523.57, "total_tokens": 16846352}
|
|
{"current_steps": 34230, "total_steps": 37885, "loss": 0.0, "lr": 5.62036503662735e-08, "epoch": 4.517619110465883, "percentage": 90.35, "elapsed_time": "0:50:50", "remaining_time": "0:05:25", "throughput": 5523.78, "total_tokens": 16848784}
|
|
{"current_steps": 34235, "total_steps": 37885, "loss": 0.0, "lr": 5.6051477113694625e-08, "epoch": 4.518279002243632, "percentage": 90.37, "elapsed_time": "0:50:50", "remaining_time": "0:05:25", "throughput": 5523.9, "total_tokens": 16850960}
|
|
{"current_steps": 34240, "total_steps": 37885, "loss": 0.0, "lr": 5.589950420735379e-08, "epoch": 4.518938894021381, "percentage": 90.38, "elapsed_time": "0:50:50", "remaining_time": "0:05:24", "throughput": 5524.27, "total_tokens": 16853968}
|
|
{"current_steps": 34245, "total_steps": 37885, "loss": 0.045, "lr": 5.574773167950697e-08, "epoch": 4.519598785799129, "percentage": 90.39, "elapsed_time": "0:50:51", "remaining_time": "0:05:24", "throughput": 5524.52, "total_tokens": 16856592}
|
|
{"current_steps": 34250, "total_steps": 37885, "loss": 0.0, "lr": 5.5596159562366076e-08, "epoch": 4.520258677576877, "percentage": 90.41, "elapsed_time": "0:50:51", "remaining_time": "0:05:23", "throughput": 5524.72, "total_tokens": 16859024}
|
|
{"current_steps": 34255, "total_steps": 37885, "loss": 0.0518, "lr": 5.5444787888101696e-08, "epoch": 4.520918569354626, "percentage": 90.42, "elapsed_time": "0:50:51", "remaining_time": "0:05:23", "throughput": 5524.86, "total_tokens": 16861264}
|
|
{"current_steps": 34260, "total_steps": 37885, "loss": 0.0002, "lr": 5.529361668884103e-08, "epoch": 4.521578461132374, "percentage": 90.43, "elapsed_time": "0:50:52", "remaining_time": "0:05:22", "throughput": 5525.06, "total_tokens": 16863696}
|
|
{"current_steps": 34265, "total_steps": 37885, "loss": 0.0, "lr": 5.514264599666918e-08, "epoch": 4.522238352910123, "percentage": 90.44, "elapsed_time": "0:50:52", "remaining_time": "0:05:22", "throughput": 5525.24, "total_tokens": 16866064}
|
|
{"current_steps": 34270, "total_steps": 37885, "loss": 0.0, "lr": 5.4991875843628745e-08, "epoch": 4.522898244687871, "percentage": 90.46, "elapsed_time": "0:50:52", "remaining_time": "0:05:22", "throughput": 5525.5, "total_tokens": 16868688}
|
|
{"current_steps": 34275, "total_steps": 37885, "loss": 0.0005, "lr": 5.484130626171923e-08, "epoch": 4.52355813646562, "percentage": 90.47, "elapsed_time": "0:50:53", "remaining_time": "0:05:21", "throughput": 5525.6, "total_tokens": 16870800}
|
|
{"current_steps": 34280, "total_steps": 37885, "loss": 0.0, "lr": 5.46909372828982e-08, "epoch": 4.524218028243368, "percentage": 90.48, "elapsed_time": "0:50:53", "remaining_time": "0:05:21", "throughput": 5525.89, "total_tokens": 16873552}
|
|
{"current_steps": 34285, "total_steps": 37885, "loss": 0.0008, "lr": 5.454076893908055e-08, "epoch": 4.524877920021116, "percentage": 90.5, "elapsed_time": "0:50:53", "remaining_time": "0:05:20", "throughput": 5526.09, "total_tokens": 16875984}
|
|
{"current_steps": 34290, "total_steps": 37885, "loss": 0.0003, "lr": 5.439080126213802e-08, "epoch": 4.5255378117988645, "percentage": 90.51, "elapsed_time": "0:50:54", "remaining_time": "0:05:20", "throughput": 5526.33, "total_tokens": 16878544}
|
|
{"current_steps": 34295, "total_steps": 37885, "loss": 0.0, "lr": 5.4241034283900364e-08, "epoch": 4.526197703576614, "percentage": 90.52, "elapsed_time": "0:50:54", "remaining_time": "0:05:19", "throughput": 5526.59, "total_tokens": 16881168}
|
|
{"current_steps": 34300, "total_steps": 37885, "loss": 0.0, "lr": 5.40914680361545e-08, "epoch": 4.526857595354362, "percentage": 90.54, "elapsed_time": "0:50:54", "remaining_time": "0:05:19", "throughput": 5526.75, "total_tokens": 16883472}
|
|
{"current_steps": 34305, "total_steps": 37885, "loss": 0.0, "lr": 5.394210255064502e-08, "epoch": 4.52751748713211, "percentage": 90.55, "elapsed_time": "0:50:55", "remaining_time": "0:05:18", "throughput": 5526.85, "total_tokens": 16885648}
|
|
{"current_steps": 34310, "total_steps": 37885, "loss": 0.0, "lr": 5.379293785907335e-08, "epoch": 4.5281773789098585, "percentage": 90.56, "elapsed_time": "0:50:55", "remaining_time": "0:05:18", "throughput": 5526.99, "total_tokens": 16887888}
|
|
{"current_steps": 34315, "total_steps": 37885, "loss": 0.0005, "lr": 5.364397399309861e-08, "epoch": 4.528837270687607, "percentage": 90.58, "elapsed_time": "0:50:55", "remaining_time": "0:05:17", "throughput": 5527.12, "total_tokens": 16890128}
|
|
{"current_steps": 34320, "total_steps": 37885, "loss": 0.0, "lr": 5.349521098433762e-08, "epoch": 4.529497162465356, "percentage": 90.59, "elapsed_time": "0:50:56", "remaining_time": "0:05:17", "throughput": 5527.3, "total_tokens": 16892496}
|
|
{"current_steps": 34325, "total_steps": 37885, "loss": 0.0415, "lr": 5.334664886436391e-08, "epoch": 4.530157054243104, "percentage": 90.6, "elapsed_time": "0:50:56", "remaining_time": "0:05:17", "throughput": 5527.41, "total_tokens": 16894608}
|
|
{"current_steps": 34330, "total_steps": 37885, "loss": 0.0, "lr": 5.3198287664708907e-08, "epoch": 4.5308169460208525, "percentage": 90.62, "elapsed_time": "0:50:56", "remaining_time": "0:05:16", "throughput": 5527.78, "total_tokens": 16897616}
|
|
{"current_steps": 34335, "total_steps": 37885, "loss": 0.0, "lr": 5.3050127416861104e-08, "epoch": 4.531476837798601, "percentage": 90.63, "elapsed_time": "0:50:57", "remaining_time": "0:05:16", "throughput": 5527.98, "total_tokens": 16900048}
|
|
{"current_steps": 34340, "total_steps": 37885, "loss": 0.0165, "lr": 5.290216815226656e-08, "epoch": 4.532136729576349, "percentage": 90.64, "elapsed_time": "0:50:57", "remaining_time": "0:05:15", "throughput": 5528.16, "total_tokens": 16902416}
|
|
{"current_steps": 34345, "total_steps": 37885, "loss": 0.0003, "lr": 5.275440990232838e-08, "epoch": 4.532796621354098, "percentage": 90.66, "elapsed_time": "0:50:57", "remaining_time": "0:05:15", "throughput": 5528.31, "total_tokens": 16904656}
|
|
{"current_steps": 34350, "total_steps": 37885, "loss": 0.0, "lr": 5.2606852698407367e-08, "epoch": 4.5334565131318465, "percentage": 90.67, "elapsed_time": "0:50:58", "remaining_time": "0:05:14", "throughput": 5528.55, "total_tokens": 16907216}
|
|
{"current_steps": 34355, "total_steps": 37885, "loss": 0.0381, "lr": 5.245949657182136e-08, "epoch": 4.534116404909595, "percentage": 90.68, "elapsed_time": "0:50:58", "remaining_time": "0:05:14", "throughput": 5528.81, "total_tokens": 16909840}
|
|
{"current_steps": 34360, "total_steps": 37885, "loss": 0.0003, "lr": 5.231234155384567e-08, "epoch": 4.534776296687343, "percentage": 90.7, "elapsed_time": "0:50:58", "remaining_time": "0:05:13", "throughput": 5529.07, "total_tokens": 16912464}
|
|
{"current_steps": 34365, "total_steps": 37885, "loss": 0.0, "lr": 5.216538767571277e-08, "epoch": 4.535436188465091, "percentage": 90.71, "elapsed_time": "0:50:59", "remaining_time": "0:05:13", "throughput": 5529.33, "total_tokens": 16915088}
|
|
{"current_steps": 34370, "total_steps": 37885, "loss": 0.002, "lr": 5.201863496861292e-08, "epoch": 4.5360960802428405, "percentage": 90.72, "elapsed_time": "0:50:59", "remaining_time": "0:05:12", "throughput": 5529.55, "total_tokens": 16917584}
|
|
{"current_steps": 34375, "total_steps": 37885, "loss": 0.0, "lr": 5.187208346369276e-08, "epoch": 4.536755972020589, "percentage": 90.74, "elapsed_time": "0:50:59", "remaining_time": "0:05:12", "throughput": 5529.77, "total_tokens": 16920080}
|
|
{"current_steps": 34380, "total_steps": 37885, "loss": 0.0, "lr": 5.17257331920572e-08, "epoch": 4.537415863798337, "percentage": 90.75, "elapsed_time": "0:51:00", "remaining_time": "0:05:11", "throughput": 5530.01, "total_tokens": 16922640}
|
|
{"current_steps": 34385, "total_steps": 37885, "loss": 0.0001, "lr": 5.157958418476793e-08, "epoch": 4.538075755576085, "percentage": 90.76, "elapsed_time": "0:51:00", "remaining_time": "0:05:11", "throughput": 5530.25, "total_tokens": 16925200}
|
|
{"current_steps": 34390, "total_steps": 37885, "loss": 0.0123, "lr": 5.1433636472844045e-08, "epoch": 4.538735647353834, "percentage": 90.77, "elapsed_time": "0:51:00", "remaining_time": "0:05:11", "throughput": 5530.4, "total_tokens": 16927504}
|
|
{"current_steps": 34395, "total_steps": 37885, "loss": 0.0079, "lr": 5.1287890087261864e-08, "epoch": 4.539395539131583, "percentage": 90.79, "elapsed_time": "0:51:01", "remaining_time": "0:05:10", "throughput": 5530.58, "total_tokens": 16929872}
|
|
{"current_steps": 34400, "total_steps": 37885, "loss": 0.0001, "lr": 5.114234505895465e-08, "epoch": 4.540055430909331, "percentage": 90.8, "elapsed_time": "0:51:01", "remaining_time": "0:05:10", "throughput": 5530.63, "total_tokens": 16931856}
|
|
{"current_steps": 34405, "total_steps": 37885, "loss": 0.0, "lr": 5.0997001418814025e-08, "epoch": 4.540715322687079, "percentage": 90.81, "elapsed_time": "0:51:01", "remaining_time": "0:05:09", "throughput": 5530.81, "total_tokens": 16934224}
|
|
{"current_steps": 34410, "total_steps": 37885, "loss": 0.0, "lr": 5.085185919768742e-08, "epoch": 4.541375214464828, "percentage": 90.83, "elapsed_time": "0:51:02", "remaining_time": "0:05:09", "throughput": 5530.98, "total_tokens": 16936592}
|
|
{"current_steps": 34415, "total_steps": 37885, "loss": 0.0, "lr": 5.0706918426380754e-08, "epoch": 4.542035106242576, "percentage": 90.84, "elapsed_time": "0:51:02", "remaining_time": "0:05:08", "throughput": 5531.18, "total_tokens": 16939024}
|
|
{"current_steps": 34420, "total_steps": 37885, "loss": 0.0366, "lr": 5.056217913565619e-08, "epoch": 4.542694998020325, "percentage": 90.85, "elapsed_time": "0:51:02", "remaining_time": "0:05:08", "throughput": 5531.38, "total_tokens": 16941456}
|
|
{"current_steps": 34425, "total_steps": 37885, "loss": 0.0, "lr": 5.0417641356233943e-08, "epoch": 4.543354889798073, "percentage": 90.87, "elapsed_time": "0:51:03", "remaining_time": "0:05:07", "throughput": 5531.49, "total_tokens": 16943632}
|
|
{"current_steps": 34430, "total_steps": 37885, "loss": 0.0, "lr": 5.027330511879102e-08, "epoch": 4.544014781575822, "percentage": 90.88, "elapsed_time": "0:51:03", "remaining_time": "0:05:07", "throughput": 5531.67, "total_tokens": 16946000}
|
|
{"current_steps": 34435, "total_steps": 37885, "loss": 0.0, "lr": 5.012917045396148e-08, "epoch": 4.54467467335357, "percentage": 90.89, "elapsed_time": "0:51:03", "remaining_time": "0:05:06", "throughput": 5531.9, "total_tokens": 16948560}
|
|
{"current_steps": 34440, "total_steps": 37885, "loss": 0.0001, "lr": 4.998523739233729e-08, "epoch": 4.545334565131318, "percentage": 90.91, "elapsed_time": "0:51:04", "remaining_time": "0:05:06", "throughput": 5532.07, "total_tokens": 16950928}
|
|
{"current_steps": 34445, "total_steps": 37885, "loss": 0.0005, "lr": 4.984150596446701e-08, "epoch": 4.545994456909067, "percentage": 90.92, "elapsed_time": "0:51:04", "remaining_time": "0:05:06", "throughput": 5532.26, "total_tokens": 16953360}
|
|
{"current_steps": 34450, "total_steps": 37885, "loss": 0.0, "lr": 4.9697976200856584e-08, "epoch": 4.546654348686816, "percentage": 90.93, "elapsed_time": "0:51:04", "remaining_time": "0:05:05", "throughput": 5532.47, "total_tokens": 16955856}
|
|
{"current_steps": 34455, "total_steps": 37885, "loss": 0.0, "lr": 4.955464813196897e-08, "epoch": 4.547314240464564, "percentage": 90.95, "elapsed_time": "0:51:05", "remaining_time": "0:05:05", "throughput": 5532.62, "total_tokens": 16958160}
|
|
{"current_steps": 34460, "total_steps": 37885, "loss": 0.0001, "lr": 4.941152178822483e-08, "epoch": 4.547974132242312, "percentage": 90.96, "elapsed_time": "0:51:05", "remaining_time": "0:05:04", "throughput": 5532.81, "total_tokens": 16960592}
|
|
{"current_steps": 34465, "total_steps": 37885, "loss": 0.0, "lr": 4.926859720000165e-08, "epoch": 4.5486340240200605, "percentage": 90.97, "elapsed_time": "0:51:05", "remaining_time": "0:05:04", "throughput": 5533.04, "total_tokens": 16963152}
|
|
{"current_steps": 34470, "total_steps": 37885, "loss": 0.0, "lr": 4.912587439763394e-08, "epoch": 4.54929391579781, "percentage": 90.99, "elapsed_time": "0:51:06", "remaining_time": "0:05:03", "throughput": 5533.22, "total_tokens": 16965584}
|
|
{"current_steps": 34475, "total_steps": 37885, "loss": 0.0, "lr": 4.898335341141369e-08, "epoch": 4.549953807575558, "percentage": 91.0, "elapsed_time": "0:51:06", "remaining_time": "0:05:03", "throughput": 5533.37, "total_tokens": 16967888}
|
|
{"current_steps": 34480, "total_steps": 37885, "loss": 0.0, "lr": 4.884103427159014e-08, "epoch": 4.550613699353306, "percentage": 91.01, "elapsed_time": "0:51:06", "remaining_time": "0:05:02", "throughput": 5533.54, "total_tokens": 16970256}
|
|
{"current_steps": 34485, "total_steps": 37885, "loss": 0.0615, "lr": 4.8698917008369144e-08, "epoch": 4.5512735911310545, "percentage": 91.03, "elapsed_time": "0:51:07", "remaining_time": "0:05:02", "throughput": 5533.88, "total_tokens": 16973200}
|
|
{"current_steps": 34490, "total_steps": 37885, "loss": 0.0, "lr": 4.855700165191423e-08, "epoch": 4.551933482908803, "percentage": 91.04, "elapsed_time": "0:51:07", "remaining_time": "0:05:01", "throughput": 5534.05, "total_tokens": 16975568}
|
|
{"current_steps": 34495, "total_steps": 37885, "loss": 0.0, "lr": 4.841528823234609e-08, "epoch": 4.552593374686552, "percentage": 91.05, "elapsed_time": "0:51:07", "remaining_time": "0:05:01", "throughput": 5534.28, "total_tokens": 16978128}
|
|
{"current_steps": 34500, "total_steps": 37885, "loss": 0.0, "lr": 4.8273776779741984e-08, "epoch": 4.5532532664643, "percentage": 91.07, "elapsed_time": "0:51:08", "remaining_time": "0:05:01", "throughput": 5534.48, "total_tokens": 16980560}
|
|
{"current_steps": 34505, "total_steps": 37885, "loss": 0.0, "lr": 4.8132467324136894e-08, "epoch": 4.5539131582420485, "percentage": 91.08, "elapsed_time": "0:51:08", "remaining_time": "0:05:00", "throughput": 5534.73, "total_tokens": 16983184}
|
|
{"current_steps": 34510, "total_steps": 37885, "loss": 0.0, "lr": 4.799135989552272e-08, "epoch": 4.554573050019797, "percentage": 91.09, "elapsed_time": "0:51:08", "remaining_time": "0:05:00", "throughput": 5534.87, "total_tokens": 16985488}
|
|
{"current_steps": 34515, "total_steps": 37885, "loss": 0.0019, "lr": 4.7850454523848725e-08, "epoch": 4.555232941797545, "percentage": 91.1, "elapsed_time": "0:51:09", "remaining_time": "0:04:59", "throughput": 5535.08, "total_tokens": 16987984}
|
|
{"current_steps": 34520, "total_steps": 37885, "loss": 0.0, "lr": 4.770975123902066e-08, "epoch": 4.555892833575293, "percentage": 91.12, "elapsed_time": "0:51:09", "remaining_time": "0:04:59", "throughput": 5535.23, "total_tokens": 16990288}
|
|
{"current_steps": 34525, "total_steps": 37885, "loss": 0.0, "lr": 4.756925007090185e-08, "epoch": 4.5565527253530425, "percentage": 91.13, "elapsed_time": "0:51:09", "remaining_time": "0:04:58", "throughput": 5535.48, "total_tokens": 16992912}
|
|
{"current_steps": 34530, "total_steps": 37885, "loss": 0.0294, "lr": 4.7428951049312996e-08, "epoch": 4.557212617130791, "percentage": 91.14, "elapsed_time": "0:51:10", "remaining_time": "0:04:58", "throughput": 5535.59, "total_tokens": 16995088}
|
|
{"current_steps": 34535, "total_steps": 37885, "loss": 0.0, "lr": 4.728885420403117e-08, "epoch": 4.557872508908539, "percentage": 91.16, "elapsed_time": "0:51:10", "remaining_time": "0:04:57", "throughput": 5535.79, "total_tokens": 16997520}
|
|
{"current_steps": 34540, "total_steps": 37885, "loss": 0.0, "lr": 4.714895956479104e-08, "epoch": 4.558532400686287, "percentage": 91.17, "elapsed_time": "0:51:10", "remaining_time": "0:04:57", "throughput": 5535.89, "total_tokens": 16999696}
|
|
{"current_steps": 34545, "total_steps": 37885, "loss": 0.0337, "lr": 4.700926716128428e-08, "epoch": 4.559192292464036, "percentage": 91.18, "elapsed_time": "0:51:11", "remaining_time": "0:04:56", "throughput": 5536.11, "total_tokens": 17002256}
|
|
{"current_steps": 34550, "total_steps": 37885, "loss": 0.0, "lr": 4.686977702315953e-08, "epoch": 4.559852184241785, "percentage": 91.2, "elapsed_time": "0:51:11", "remaining_time": "0:04:56", "throughput": 5536.35, "total_tokens": 17004880}
|
|
{"current_steps": 34555, "total_steps": 37885, "loss": 0.0719, "lr": 4.673048918002265e-08, "epoch": 4.560512076019533, "percentage": 91.21, "elapsed_time": "0:51:11", "remaining_time": "0:04:56", "throughput": 5536.56, "total_tokens": 17007376}
|
|
{"current_steps": 34560, "total_steps": 37885, "loss": 0.0, "lr": 4.659140366143621e-08, "epoch": 4.561171967797281, "percentage": 91.22, "elapsed_time": "0:51:12", "remaining_time": "0:04:55", "throughput": 5536.75, "total_tokens": 17009808}
|
|
{"current_steps": 34565, "total_steps": 37885, "loss": 0.0, "lr": 4.64525204969205e-08, "epoch": 4.56183185957503, "percentage": 91.24, "elapsed_time": "0:51:12", "remaining_time": "0:04:55", "throughput": 5537.09, "total_tokens": 17012752}
|
|
{"current_steps": 34570, "total_steps": 37885, "loss": 0.0, "lr": 4.631383971595226e-08, "epoch": 4.562491751352778, "percentage": 91.25, "elapsed_time": "0:51:12", "remaining_time": "0:04:54", "throughput": 5537.23, "total_tokens": 17015056}
|
|
{"current_steps": 34575, "total_steps": 37885, "loss": 0.0007, "lr": 4.617536134796529e-08, "epoch": 4.563151643130526, "percentage": 91.26, "elapsed_time": "0:51:13", "remaining_time": "0:04:54", "throughput": 5537.4, "total_tokens": 17017424}
|
|
{"current_steps": 34580, "total_steps": 37885, "loss": 0.0, "lr": 4.6037085422351077e-08, "epoch": 4.563811534908275, "percentage": 91.28, "elapsed_time": "0:51:13", "remaining_time": "0:04:53", "throughput": 5537.62, "total_tokens": 17019984}
|
|
{"current_steps": 34585, "total_steps": 37885, "loss": 0.0, "lr": 4.5899011968457244e-08, "epoch": 4.564471426686024, "percentage": 91.29, "elapsed_time": "0:51:13", "remaining_time": "0:04:53", "throughput": 5537.94, "total_tokens": 17022864}
|
|
{"current_steps": 34590, "total_steps": 37885, "loss": 0.0, "lr": 4.576114101558914e-08, "epoch": 4.565131318463772, "percentage": 91.3, "elapsed_time": "0:51:14", "remaining_time": "0:04:52", "throughput": 5538.1, "total_tokens": 17025168}
|
|
{"current_steps": 34595, "total_steps": 37885, "loss": 0.0, "lr": 4.562347259300881e-08, "epoch": 4.56579121024152, "percentage": 91.32, "elapsed_time": "0:51:14", "remaining_time": "0:04:52", "throughput": 5538.32, "total_tokens": 17027728}
|
|
{"current_steps": 34600, "total_steps": 37885, "loss": 0.0308, "lr": 4.54860067299353e-08, "epoch": 4.566451102019268, "percentage": 91.33, "elapsed_time": "0:51:14", "remaining_time": "0:04:51", "throughput": 5538.49, "total_tokens": 17030096}
|
|
{"current_steps": 34605, "total_steps": 37885, "loss": 0.0253, "lr": 4.534874345554496e-08, "epoch": 4.567110993797018, "percentage": 91.34, "elapsed_time": "0:51:15", "remaining_time": "0:04:51", "throughput": 5538.79, "total_tokens": 17032912}
|
|
{"current_steps": 34610, "total_steps": 37885, "loss": 0.0, "lr": 4.521168279897058e-08, "epoch": 4.567770885574766, "percentage": 91.36, "elapsed_time": "0:51:15", "remaining_time": "0:04:51", "throughput": 5539.05, "total_tokens": 17035600}
|
|
{"current_steps": 34615, "total_steps": 37885, "loss": 0.087, "lr": 4.507482478930258e-08, "epoch": 4.568430777352514, "percentage": 91.37, "elapsed_time": "0:51:15", "remaining_time": "0:04:50", "throughput": 5539.25, "total_tokens": 17038096}
|
|
{"current_steps": 34620, "total_steps": 37885, "loss": 0.0398, "lr": 4.493816945558815e-08, "epoch": 4.569090669130262, "percentage": 91.38, "elapsed_time": "0:51:16", "remaining_time": "0:04:50", "throughput": 5539.5, "total_tokens": 17040720}
|
|
{"current_steps": 34625, "total_steps": 37885, "loss": 0.0105, "lr": 4.480171682683098e-08, "epoch": 4.569750560908011, "percentage": 91.4, "elapsed_time": "0:51:16", "remaining_time": "0:04:49", "throughput": 5539.72, "total_tokens": 17043280}
|
|
{"current_steps": 34630, "total_steps": 37885, "loss": 0.0226, "lr": 4.466546693199247e-08, "epoch": 4.57041045268576, "percentage": 91.41, "elapsed_time": "0:51:16", "remaining_time": "0:04:49", "throughput": 5539.8, "total_tokens": 17045392}
|
|
{"current_steps": 34635, "total_steps": 37885, "loss": 0.0, "lr": 4.4529419799990695e-08, "epoch": 4.571070344463508, "percentage": 91.42, "elapsed_time": "0:51:17", "remaining_time": "0:04:48", "throughput": 5540.01, "total_tokens": 17047888}
|
|
{"current_steps": 34640, "total_steps": 37885, "loss": 0.0016, "lr": 4.439357545970068e-08, "epoch": 4.571730236241256, "percentage": 91.43, "elapsed_time": "0:51:17", "remaining_time": "0:04:48", "throughput": 5540.19, "total_tokens": 17050320}
|
|
{"current_steps": 34645, "total_steps": 37885, "loss": 0.0, "lr": 4.425793393995414e-08, "epoch": 4.572390128019005, "percentage": 91.45, "elapsed_time": "0:51:17", "remaining_time": "0:04:47", "throughput": 5540.44, "total_tokens": 17052944}
|
|
{"current_steps": 34650, "total_steps": 37885, "loss": 0.0, "lr": 4.412249526954015e-08, "epoch": 4.573050019796753, "percentage": 91.46, "elapsed_time": "0:51:18", "remaining_time": "0:04:47", "throughput": 5540.59, "total_tokens": 17055248}
|
|
{"current_steps": 34655, "total_steps": 37885, "loss": 0.0, "lr": 4.398725947720483e-08, "epoch": 4.573709911574502, "percentage": 91.47, "elapsed_time": "0:51:18", "remaining_time": "0:04:46", "throughput": 5540.83, "total_tokens": 17057872}
|
|
{"current_steps": 34660, "total_steps": 37885, "loss": 0.0, "lr": 4.385222659165067e-08, "epoch": 4.57436980335225, "percentage": 91.49, "elapsed_time": "0:51:18", "remaining_time": "0:04:46", "throughput": 5541.01, "total_tokens": 17060304}
|
|
{"current_steps": 34665, "total_steps": 37885, "loss": 0.0, "lr": 4.3717396641537395e-08, "epoch": 4.575029695129999, "percentage": 91.5, "elapsed_time": "0:51:19", "remaining_time": "0:04:46", "throughput": 5541.25, "total_tokens": 17062928}
|
|
{"current_steps": 34670, "total_steps": 37885, "loss": 0.0, "lr": 4.358276965548202e-08, "epoch": 4.575689586907747, "percentage": 91.51, "elapsed_time": "0:51:19", "remaining_time": "0:04:45", "throughput": 5541.44, "total_tokens": 17065360}
|
|
{"current_steps": 34675, "total_steps": 37885, "loss": 0.0001, "lr": 4.344834566205802e-08, "epoch": 4.576349478685495, "percentage": 91.53, "elapsed_time": "0:51:19", "remaining_time": "0:04:45", "throughput": 5541.62, "total_tokens": 17067792}
|
|
{"current_steps": 34680, "total_steps": 37885, "loss": 0.0, "lr": 4.331412468979567e-08, "epoch": 4.577009370463244, "percentage": 91.54, "elapsed_time": "0:51:20", "remaining_time": "0:04:44", "throughput": 5541.67, "total_tokens": 17069776}
|
|
{"current_steps": 34685, "total_steps": 37885, "loss": 0.0, "lr": 4.318010676718254e-08, "epoch": 4.577669262240993, "percentage": 91.55, "elapsed_time": "0:51:20", "remaining_time": "0:04:44", "throughput": 5541.87, "total_tokens": 17072272}
|
|
{"current_steps": 34690, "total_steps": 37885, "loss": 0.0, "lr": 4.304629192266318e-08, "epoch": 4.578329154018741, "percentage": 91.57, "elapsed_time": "0:51:20", "remaining_time": "0:04:43", "throughput": 5542.1, "total_tokens": 17074832}
|
|
{"current_steps": 34695, "total_steps": 37885, "loss": 0.0004, "lr": 4.2912680184638564e-08, "epoch": 4.578989045796489, "percentage": 91.58, "elapsed_time": "0:51:21", "remaining_time": "0:04:43", "throughput": 5542.44, "total_tokens": 17077776}
|
|
{"current_steps": 34700, "total_steps": 37885, "loss": 0.0657, "lr": 4.277927158146688e-08, "epoch": 4.5796489375742375, "percentage": 91.59, "elapsed_time": "0:51:21", "remaining_time": "0:04:42", "throughput": 5542.66, "total_tokens": 17080336}
|
|
{"current_steps": 34705, "total_steps": 37885, "loss": 0.0066, "lr": 4.264606614146327e-08, "epoch": 4.580308829351987, "percentage": 91.61, "elapsed_time": "0:51:21", "remaining_time": "0:04:42", "throughput": 5542.77, "total_tokens": 17082576}
|
|
{"current_steps": 34710, "total_steps": 37885, "loss": 0.0, "lr": 4.251306389289944e-08, "epoch": 4.580968721129735, "percentage": 91.62, "elapsed_time": "0:51:22", "remaining_time": "0:04:41", "throughput": 5542.93, "total_tokens": 17084880}
|
|
{"current_steps": 34715, "total_steps": 37885, "loss": 0.0, "lr": 4.2380264864004143e-08, "epoch": 4.581628612907483, "percentage": 91.63, "elapsed_time": "0:51:22", "remaining_time": "0:04:41", "throughput": 5543.15, "total_tokens": 17087440}
|
|
{"current_steps": 34720, "total_steps": 37885, "loss": 0.0, "lr": 4.2247669082963065e-08, "epoch": 4.5822885046852315, "percentage": 91.65, "elapsed_time": "0:51:22", "remaining_time": "0:04:41", "throughput": 5543.31, "total_tokens": 17089808}
|
|
{"current_steps": 34725, "total_steps": 37885, "loss": 0.0, "lr": 4.211527657791891e-08, "epoch": 4.58294839646298, "percentage": 91.66, "elapsed_time": "0:51:23", "remaining_time": "0:04:40", "throughput": 5543.44, "total_tokens": 17092048}
|
|
{"current_steps": 34730, "total_steps": 37885, "loss": 0.0, "lr": 4.198308737697087e-08, "epoch": 4.583608288240729, "percentage": 91.67, "elapsed_time": "0:51:23", "remaining_time": "0:04:40", "throughput": 5543.65, "total_tokens": 17094544}
|
|
{"current_steps": 34735, "total_steps": 37885, "loss": 0.0001, "lr": 4.1851101508174834e-08, "epoch": 4.584268180018477, "percentage": 91.69, "elapsed_time": "0:51:23", "remaining_time": "0:04:39", "throughput": 5543.77, "total_tokens": 17096784}
|
|
{"current_steps": 34740, "total_steps": 37885, "loss": 0.0001, "lr": 4.171931899954439e-08, "epoch": 4.5849280717962255, "percentage": 91.7, "elapsed_time": "0:51:24", "remaining_time": "0:04:39", "throughput": 5543.94, "total_tokens": 17099152}
|
|
{"current_steps": 34745, "total_steps": 37885, "loss": 0.0, "lr": 4.1587739879049067e-08, "epoch": 4.585587963573974, "percentage": 91.71, "elapsed_time": "0:51:24", "remaining_time": "0:04:38", "throughput": 5544.03, "total_tokens": 17101264}
|
|
{"current_steps": 34750, "total_steps": 37885, "loss": 0.0, "lr": 4.145636417461573e-08, "epoch": 4.586247855351722, "percentage": 91.72, "elapsed_time": "0:51:24", "remaining_time": "0:04:38", "throughput": 5544.16, "total_tokens": 17103504}
|
|
{"current_steps": 34755, "total_steps": 37885, "loss": 0.0, "lr": 4.132519191412787e-08, "epoch": 4.586907747129471, "percentage": 91.74, "elapsed_time": "0:51:25", "remaining_time": "0:04:37", "throughput": 5544.3, "total_tokens": 17105744}
|
|
{"current_steps": 34760, "total_steps": 37885, "loss": 0.0176, "lr": 4.1194223125425753e-08, "epoch": 4.5875676389072195, "percentage": 91.75, "elapsed_time": "0:51:25", "remaining_time": "0:04:37", "throughput": 5544.53, "total_tokens": 17108304}
|
|
{"current_steps": 34765, "total_steps": 37885, "loss": 0.0, "lr": 4.1063457836306716e-08, "epoch": 4.588227530684968, "percentage": 91.76, "elapsed_time": "0:51:25", "remaining_time": "0:04:36", "throughput": 5544.66, "total_tokens": 17110544}
|
|
{"current_steps": 34770, "total_steps": 37885, "loss": 0.0, "lr": 4.0932896074524546e-08, "epoch": 4.588887422462716, "percentage": 91.78, "elapsed_time": "0:51:26", "remaining_time": "0:04:36", "throughput": 5544.89, "total_tokens": 17113104}
|
|
{"current_steps": 34775, "total_steps": 37885, "loss": 0.0226, "lr": 4.080253786779042e-08, "epoch": 4.589547314240464, "percentage": 91.79, "elapsed_time": "0:51:26", "remaining_time": "0:04:36", "throughput": 5545.07, "total_tokens": 17115472}
|
|
{"current_steps": 34780, "total_steps": 37885, "loss": 0.0033, "lr": 4.0672383243771643e-08, "epoch": 4.590207206018213, "percentage": 91.8, "elapsed_time": "0:51:26", "remaining_time": "0:04:35", "throughput": 5545.29, "total_tokens": 17118032}
|
|
{"current_steps": 34785, "total_steps": 37885, "loss": 0.0, "lr": 4.054243223009246e-08, "epoch": 4.590867097795962, "percentage": 91.82, "elapsed_time": "0:51:27", "remaining_time": "0:04:35", "throughput": 5545.53, "total_tokens": 17120592}
|
|
{"current_steps": 34790, "total_steps": 37885, "loss": 0.0, "lr": 4.041268485433413e-08, "epoch": 4.59152698957371, "percentage": 91.83, "elapsed_time": "0:51:27", "remaining_time": "0:04:34", "throughput": 5545.69, "total_tokens": 17122896}
|
|
{"current_steps": 34795, "total_steps": 37885, "loss": 0.0, "lr": 4.028314114403475e-08, "epoch": 4.592186881351458, "percentage": 91.84, "elapsed_time": "0:51:27", "remaining_time": "0:04:34", "throughput": 5545.92, "total_tokens": 17125456}
|
|
{"current_steps": 34800, "total_steps": 37885, "loss": 0.0, "lr": 4.015380112668909e-08, "epoch": 4.592846773129207, "percentage": 91.86, "elapsed_time": "0:51:28", "remaining_time": "0:04:33", "throughput": 5546.16, "total_tokens": 17128016}
|
|
{"current_steps": 34805, "total_steps": 37885, "loss": 0.0, "lr": 4.002466482974831e-08, "epoch": 4.593506664906955, "percentage": 91.87, "elapsed_time": "0:51:28", "remaining_time": "0:04:33", "throughput": 5546.38, "total_tokens": 17130512}
|
|
{"current_steps": 34810, "total_steps": 37885, "loss": 0.0364, "lr": 3.989573228062082e-08, "epoch": 4.594166556684704, "percentage": 91.88, "elapsed_time": "0:51:28", "remaining_time": "0:04:32", "throughput": 5546.57, "total_tokens": 17132944}
|
|
{"current_steps": 34815, "total_steps": 37885, "loss": 0.0, "lr": 3.976700350667173e-08, "epoch": 4.594826448462452, "percentage": 91.9, "elapsed_time": "0:51:29", "remaining_time": "0:04:32", "throughput": 5546.79, "total_tokens": 17135440}
|
|
{"current_steps": 34820, "total_steps": 37885, "loss": 0.0, "lr": 3.963847853522262e-08, "epoch": 4.595486340240201, "percentage": 91.91, "elapsed_time": "0:51:29", "remaining_time": "0:04:31", "throughput": 5546.99, "total_tokens": 17137872}
|
|
{"current_steps": 34825, "total_steps": 37885, "loss": 0.0, "lr": 3.951015739355201e-08, "epoch": 4.596146232017949, "percentage": 91.92, "elapsed_time": "0:51:29", "remaining_time": "0:04:31", "throughput": 5547.15, "total_tokens": 17140176}
|
|
{"current_steps": 34830, "total_steps": 37885, "loss": 0.0, "lr": 3.9382040108895344e-08, "epoch": 4.596806123795697, "percentage": 91.94, "elapsed_time": "0:51:30", "remaining_time": "0:04:31", "throughput": 5547.37, "total_tokens": 17142672}
|
|
{"current_steps": 34835, "total_steps": 37885, "loss": 0.0004, "lr": 3.925412670844419e-08, "epoch": 4.5974660155734455, "percentage": 91.95, "elapsed_time": "0:51:30", "remaining_time": "0:04:30", "throughput": 5547.6, "total_tokens": 17145232}
|
|
{"current_steps": 34840, "total_steps": 37885, "loss": 0.0, "lr": 3.9126417219347506e-08, "epoch": 4.598125907351195, "percentage": 91.96, "elapsed_time": "0:51:30", "remaining_time": "0:04:30", "throughput": 5547.79, "total_tokens": 17147600}
|
|
{"current_steps": 34845, "total_steps": 37885, "loss": 0.0, "lr": 3.899891166871072e-08, "epoch": 4.598785799128943, "percentage": 91.98, "elapsed_time": "0:51:31", "remaining_time": "0:04:29", "throughput": 5547.98, "total_tokens": 17150032}
|
|
{"current_steps": 34850, "total_steps": 37885, "loss": 0.0003, "lr": 3.8871610083595965e-08, "epoch": 4.599445690906691, "percentage": 91.99, "elapsed_time": "0:51:31", "remaining_time": "0:04:29", "throughput": 5548.16, "total_tokens": 17152400}
|
|
{"current_steps": 34855, "total_steps": 37885, "loss": 0.0214, "lr": 3.874451249102195e-08, "epoch": 4.6001055826844395, "percentage": 92.0, "elapsed_time": "0:51:31", "remaining_time": "0:04:28", "throughput": 5548.36, "total_tokens": 17154896}
|
|
{"current_steps": 34860, "total_steps": 37885, "loss": 0.0002, "lr": 3.861761891796433e-08, "epoch": 4.600765474462188, "percentage": 92.02, "elapsed_time": "0:51:32", "remaining_time": "0:04:28", "throughput": 5548.54, "total_tokens": 17157264}
|
|
{"current_steps": 34865, "total_steps": 37885, "loss": 0.0, "lr": 3.8490929391355345e-08, "epoch": 4.601425366239937, "percentage": 92.03, "elapsed_time": "0:51:32", "remaining_time": "0:04:27", "throughput": 5548.72, "total_tokens": 17159632}
|
|
{"current_steps": 34870, "total_steps": 37885, "loss": 0.0, "lr": 3.83644439380838e-08, "epoch": 4.602085258017685, "percentage": 92.04, "elapsed_time": "0:51:32", "remaining_time": "0:04:27", "throughput": 5548.9, "total_tokens": 17162000}
|
|
{"current_steps": 34875, "total_steps": 37885, "loss": 0.0, "lr": 3.823816258499546e-08, "epoch": 4.6027451497954335, "percentage": 92.05, "elapsed_time": "0:51:33", "remaining_time": "0:04:26", "throughput": 5549.06, "total_tokens": 17164304}
|
|
{"current_steps": 34880, "total_steps": 37885, "loss": 0.0105, "lr": 3.811208535889265e-08, "epoch": 4.603405041573182, "percentage": 92.07, "elapsed_time": "0:51:33", "remaining_time": "0:04:26", "throughput": 5549.33, "total_tokens": 17166992}
|
|
{"current_steps": 34885, "total_steps": 37885, "loss": 0.0411, "lr": 3.79862122865342e-08, "epoch": 4.60406493335093, "percentage": 92.08, "elapsed_time": "0:51:33", "remaining_time": "0:04:26", "throughput": 5549.56, "total_tokens": 17169552}
|
|
{"current_steps": 34890, "total_steps": 37885, "loss": 0.0, "lr": 3.786054339463596e-08, "epoch": 4.604724825128679, "percentage": 92.09, "elapsed_time": "0:51:34", "remaining_time": "0:04:25", "throughput": 5549.82, "total_tokens": 17172176}
|
|
{"current_steps": 34895, "total_steps": 37885, "loss": 0.0, "lr": 3.7735078709869804e-08, "epoch": 4.6053847169064275, "percentage": 92.11, "elapsed_time": "0:51:34", "remaining_time": "0:04:25", "throughput": 5549.96, "total_tokens": 17174416}
|
|
{"current_steps": 34900, "total_steps": 37885, "loss": 0.0, "lr": 3.760981825886533e-08, "epoch": 4.606044608684176, "percentage": 92.12, "elapsed_time": "0:51:34", "remaining_time": "0:04:24", "throughput": 5550.11, "total_tokens": 17176720}
|
|
{"current_steps": 34905, "total_steps": 37885, "loss": 0.0239, "lr": 3.748476206820783e-08, "epoch": 4.606704500461924, "percentage": 92.13, "elapsed_time": "0:51:35", "remaining_time": "0:04:24", "throughput": 5550.33, "total_tokens": 17179216}
|
|
{"current_steps": 34910, "total_steps": 37885, "loss": 0.0016, "lr": 3.735991016443929e-08, "epoch": 4.607364392239672, "percentage": 92.15, "elapsed_time": "0:51:35", "remaining_time": "0:04:23", "throughput": 5550.53, "total_tokens": 17181648}
|
|
{"current_steps": 34915, "total_steps": 37885, "loss": 0.0337, "lr": 3.723526257405929e-08, "epoch": 4.6080242840174215, "percentage": 92.16, "elapsed_time": "0:51:35", "remaining_time": "0:04:23", "throughput": 5550.79, "total_tokens": 17184272}
|
|
{"current_steps": 34920, "total_steps": 37885, "loss": 0.0, "lr": 3.711081932352278e-08, "epoch": 4.60868417579517, "percentage": 92.17, "elapsed_time": "0:51:36", "remaining_time": "0:04:22", "throughput": 5550.98, "total_tokens": 17186704}
|
|
{"current_steps": 34925, "total_steps": 37885, "loss": 0.0005, "lr": 3.698658043924241e-08, "epoch": 4.609344067572918, "percentage": 92.19, "elapsed_time": "0:51:36", "remaining_time": "0:04:22", "throughput": 5551.2, "total_tokens": 17189200}
|
|
{"current_steps": 34930, "total_steps": 37885, "loss": 0.0, "lr": 3.686254594758653e-08, "epoch": 4.610003959350666, "percentage": 92.2, "elapsed_time": "0:51:36", "remaining_time": "0:04:21", "throughput": 5551.47, "total_tokens": 17191888}
|
|
{"current_steps": 34935, "total_steps": 37885, "loss": 0.0, "lr": 3.673871587488076e-08, "epoch": 4.610663851128415, "percentage": 92.21, "elapsed_time": "0:51:37", "remaining_time": "0:04:21", "throughput": 5551.72, "total_tokens": 17194448}
|
|
{"current_steps": 34940, "total_steps": 37885, "loss": 0.028, "lr": 3.661509024740739e-08, "epoch": 4.611323742906164, "percentage": 92.23, "elapsed_time": "0:51:37", "remaining_time": "0:04:21", "throughput": 5551.99, "total_tokens": 17197136}
|
|
{"current_steps": 34945, "total_steps": 37885, "loss": 0.0, "lr": 3.6491669091404553e-08, "epoch": 4.611983634683912, "percentage": 92.24, "elapsed_time": "0:51:37", "remaining_time": "0:04:20", "throughput": 5552.16, "total_tokens": 17199504}
|
|
{"current_steps": 34950, "total_steps": 37885, "loss": 0.0, "lr": 3.636845243306785e-08, "epoch": 4.61264352646166, "percentage": 92.25, "elapsed_time": "0:51:38", "remaining_time": "0:04:20", "throughput": 5552.32, "total_tokens": 17201808}
|
|
{"current_steps": 34955, "total_steps": 37885, "loss": 0.0001, "lr": 3.624544029854914e-08, "epoch": 4.613303418239409, "percentage": 92.27, "elapsed_time": "0:51:38", "remaining_time": "0:04:19", "throughput": 5552.56, "total_tokens": 17204432}
|
|
{"current_steps": 34960, "total_steps": 37885, "loss": 0.0001, "lr": 3.6122632713956766e-08, "epoch": 4.613963310017157, "percentage": 92.28, "elapsed_time": "0:51:38", "remaining_time": "0:04:19", "throughput": 5552.77, "total_tokens": 17206928}
|
|
{"current_steps": 34965, "total_steps": 37885, "loss": 0.0001, "lr": 3.600002970535565e-08, "epoch": 4.614623201794906, "percentage": 92.29, "elapsed_time": "0:51:39", "remaining_time": "0:04:18", "throughput": 5553.02, "total_tokens": 17209552}
|
|
{"current_steps": 34970, "total_steps": 37885, "loss": 0.0, "lr": 3.587763129876753e-08, "epoch": 4.615283093572654, "percentage": 92.31, "elapsed_time": "0:51:39", "remaining_time": "0:04:18", "throughput": 5553.24, "total_tokens": 17212048}
|
|
{"current_steps": 34975, "total_steps": 37885, "loss": 0.0, "lr": 3.575543752017063e-08, "epoch": 4.615942985350403, "percentage": 92.32, "elapsed_time": "0:51:39", "remaining_time": "0:04:17", "throughput": 5553.33, "total_tokens": 17214160}
|
|
{"current_steps": 34980, "total_steps": 37885, "loss": 0.0003, "lr": 3.563344839549942e-08, "epoch": 4.616602877128151, "percentage": 92.33, "elapsed_time": "0:51:40", "remaining_time": "0:04:17", "throughput": 5553.55, "total_tokens": 17216656}
|
|
{"current_steps": 34985, "total_steps": 37885, "loss": 0.0001, "lr": 3.5511663950645534e-08, "epoch": 4.617262768905899, "percentage": 92.35, "elapsed_time": "0:51:40", "remaining_time": "0:04:17", "throughput": 5553.72, "total_tokens": 17219024}
|
|
{"current_steps": 34990, "total_steps": 37885, "loss": 0.0, "lr": 3.539008421145673e-08, "epoch": 4.617922660683648, "percentage": 92.36, "elapsed_time": "0:51:40", "remaining_time": "0:04:16", "throughput": 5553.97, "total_tokens": 17221648}
|
|
{"current_steps": 34995, "total_steps": 37885, "loss": 0.0001, "lr": 3.526870920373726e-08, "epoch": 4.618582552461397, "percentage": 92.37, "elapsed_time": "0:51:41", "remaining_time": "0:04:16", "throughput": 5554.13, "total_tokens": 17223952}
|
|
{"current_steps": 35000, "total_steps": 37885, "loss": 0.0, "lr": 3.514753895324829e-08, "epoch": 4.619242444239145, "percentage": 92.38, "elapsed_time": "0:51:41", "remaining_time": "0:04:15", "throughput": 5554.34, "total_tokens": 17226448}
|
|
{"current_steps": 35005, "total_steps": 37885, "loss": 0.0, "lr": 3.5026573485707253e-08, "epoch": 4.619902336016893, "percentage": 92.4, "elapsed_time": "0:51:41", "remaining_time": "0:04:15", "throughput": 5554.56, "total_tokens": 17228944}
|
|
{"current_steps": 35010, "total_steps": 37885, "loss": 0.0, "lr": 3.4905812826788285e-08, "epoch": 4.620562227794641, "percentage": 92.41, "elapsed_time": "0:51:42", "remaining_time": "0:04:14", "throughput": 5554.76, "total_tokens": 17231376}
|
|
{"current_steps": 35015, "total_steps": 37885, "loss": 0.0411, "lr": 3.478525700212176e-08, "epoch": 4.621222119572391, "percentage": 92.42, "elapsed_time": "0:51:42", "remaining_time": "0:04:14", "throughput": 5555.12, "total_tokens": 17234384}
|
|
{"current_steps": 35020, "total_steps": 37885, "loss": 0.004, "lr": 3.4664906037294996e-08, "epoch": 4.621882011350139, "percentage": 92.44, "elapsed_time": "0:51:42", "remaining_time": "0:04:13", "throughput": 5555.32, "total_tokens": 17236816}
|
|
{"current_steps": 35025, "total_steps": 37885, "loss": 0.0, "lr": 3.4544759957851553e-08, "epoch": 4.622541903127887, "percentage": 92.45, "elapsed_time": "0:51:43", "remaining_time": "0:04:13", "throughput": 5555.44, "total_tokens": 17238992}
|
|
{"current_steps": 35030, "total_steps": 37885, "loss": 0.0001, "lr": 3.4424818789291373e-08, "epoch": 4.623201794905635, "percentage": 92.46, "elapsed_time": "0:51:43", "remaining_time": "0:04:12", "throughput": 5555.69, "total_tokens": 17241616}
|
|
{"current_steps": 35035, "total_steps": 37885, "loss": 0.0, "lr": 3.4305082557071316e-08, "epoch": 4.623861686683384, "percentage": 92.48, "elapsed_time": "0:51:43", "remaining_time": "0:04:12", "throughput": 5555.81, "total_tokens": 17243792}
|
|
{"current_steps": 35040, "total_steps": 37885, "loss": 0.0, "lr": 3.418555128660461e-08, "epoch": 4.624521578461132, "percentage": 92.49, "elapsed_time": "0:51:44", "remaining_time": "0:04:12", "throughput": 5556.02, "total_tokens": 17246288}
|
|
{"current_steps": 35045, "total_steps": 37885, "loss": 0.0, "lr": 3.406622500326062e-08, "epoch": 4.625181470238881, "percentage": 92.5, "elapsed_time": "0:51:44", "remaining_time": "0:04:11", "throughput": 5556.31, "total_tokens": 17249040}
|
|
{"current_steps": 35050, "total_steps": 37885, "loss": 0.0018, "lr": 3.3947103732365646e-08, "epoch": 4.625841362016629, "percentage": 92.52, "elapsed_time": "0:51:44", "remaining_time": "0:04:11", "throughput": 5556.56, "total_tokens": 17251664}
|
|
{"current_steps": 35055, "total_steps": 37885, "loss": 0.0, "lr": 3.382818749920224e-08, "epoch": 4.626501253794378, "percentage": 92.53, "elapsed_time": "0:51:45", "remaining_time": "0:04:10", "throughput": 5556.77, "total_tokens": 17254160}
|
|
{"current_steps": 35060, "total_steps": 37885, "loss": 0.0239, "lr": 3.370947632900978e-08, "epoch": 4.627161145572126, "percentage": 92.54, "elapsed_time": "0:51:45", "remaining_time": "0:04:10", "throughput": 5557.03, "total_tokens": 17256784}
|
|
{"current_steps": 35065, "total_steps": 37885, "loss": 0.0, "lr": 3.3590970246983654e-08, "epoch": 4.627821037349874, "percentage": 92.56, "elapsed_time": "0:51:45", "remaining_time": "0:04:09", "throughput": 5557.18, "total_tokens": 17259088}
|
|
{"current_steps": 35070, "total_steps": 37885, "loss": 0.0, "lr": 3.3472669278275637e-08, "epoch": 4.628480929127623, "percentage": 92.57, "elapsed_time": "0:51:46", "remaining_time": "0:04:09", "throughput": 5557.41, "total_tokens": 17261648}
|
|
{"current_steps": 35075, "total_steps": 37885, "loss": 0.0035, "lr": 3.3354573447994637e-08, "epoch": 4.629140820905372, "percentage": 92.58, "elapsed_time": "0:51:46", "remaining_time": "0:04:08", "throughput": 5557.68, "total_tokens": 17264336}
|
|
{"current_steps": 35080, "total_steps": 37885, "loss": 0.0, "lr": 3.3236682781205616e-08, "epoch": 4.62980071268312, "percentage": 92.6, "elapsed_time": "0:51:46", "remaining_time": "0:04:08", "throughput": 5557.82, "total_tokens": 17266576}
|
|
{"current_steps": 35085, "total_steps": 37885, "loss": 0.0, "lr": 3.311899730292989e-08, "epoch": 4.630460604460868, "percentage": 92.61, "elapsed_time": "0:51:47", "remaining_time": "0:04:07", "throughput": 5558.0, "total_tokens": 17268944}
|
|
{"current_steps": 35090, "total_steps": 37885, "loss": 0.0, "lr": 3.3001517038145356e-08, "epoch": 4.6311204962386165, "percentage": 92.62, "elapsed_time": "0:51:47", "remaining_time": "0:04:07", "throughput": 5558.19, "total_tokens": 17271376}
|
|
{"current_steps": 35095, "total_steps": 37885, "loss": 0.0, "lr": 3.28842420117863e-08, "epoch": 4.631780388016365, "percentage": 92.64, "elapsed_time": "0:51:47", "remaining_time": "0:04:07", "throughput": 5558.38, "total_tokens": 17273808}
|
|
{"current_steps": 35100, "total_steps": 37885, "loss": 0.0, "lr": 3.27671722487437e-08, "epoch": 4.632440279794114, "percentage": 92.65, "elapsed_time": "0:51:48", "remaining_time": "0:04:06", "throughput": 5558.54, "total_tokens": 17276112}
|
|
{"current_steps": 35105, "total_steps": 37885, "loss": 0.0005, "lr": 3.265030777386446e-08, "epoch": 4.633100171571862, "percentage": 92.66, "elapsed_time": "0:51:48", "remaining_time": "0:04:06", "throughput": 5558.71, "total_tokens": 17278480}
|
|
{"current_steps": 35110, "total_steps": 37885, "loss": 0.0252, "lr": 3.2533648611952623e-08, "epoch": 4.6337600633496105, "percentage": 92.68, "elapsed_time": "0:51:48", "remaining_time": "0:04:05", "throughput": 5559.02, "total_tokens": 17281296}
|
|
{"current_steps": 35115, "total_steps": 37885, "loss": 0.0, "lr": 3.241719478776805e-08, "epoch": 4.634419955127359, "percentage": 92.69, "elapsed_time": "0:51:49", "remaining_time": "0:04:05", "throughput": 5559.29, "total_tokens": 17283984}
|
|
{"current_steps": 35120, "total_steps": 37885, "loss": 0.0, "lr": 3.230094632602698e-08, "epoch": 4.635079846905107, "percentage": 92.7, "elapsed_time": "0:51:49", "remaining_time": "0:04:04", "throughput": 5559.47, "total_tokens": 17286352}
|
|
{"current_steps": 35125, "total_steps": 37885, "loss": 0.0, "lr": 3.218490325140266e-08, "epoch": 4.635739738682856, "percentage": 92.71, "elapsed_time": "0:51:49", "remaining_time": "0:04:04", "throughput": 5559.74, "total_tokens": 17289040}
|
|
{"current_steps": 35130, "total_steps": 37885, "loss": 0.0, "lr": 3.206906558852418e-08, "epoch": 4.6363996304606045, "percentage": 92.73, "elapsed_time": "0:51:50", "remaining_time": "0:04:03", "throughput": 5559.95, "total_tokens": 17291536}
|
|
{"current_steps": 35135, "total_steps": 37885, "loss": 0.0, "lr": 3.195343336197742e-08, "epoch": 4.637059522238353, "percentage": 92.74, "elapsed_time": "0:51:50", "remaining_time": "0:04:03", "throughput": 5560.2, "total_tokens": 17294160}
|
|
{"current_steps": 35140, "total_steps": 37885, "loss": 0.0755, "lr": 3.183800659630431e-08, "epoch": 4.637719414016101, "percentage": 92.75, "elapsed_time": "0:51:50", "remaining_time": "0:04:02", "throughput": 5560.49, "total_tokens": 17296912}
|
|
{"current_steps": 35145, "total_steps": 37885, "loss": 0.0, "lr": 3.1722785316003475e-08, "epoch": 4.638379305793849, "percentage": 92.77, "elapsed_time": "0:51:51", "remaining_time": "0:04:02", "throughput": 5560.68, "total_tokens": 17299344}
|
|
{"current_steps": 35150, "total_steps": 37885, "loss": 0.0011, "lr": 3.160776954552979e-08, "epoch": 4.6390391975715985, "percentage": 92.78, "elapsed_time": "0:51:51", "remaining_time": "0:04:02", "throughput": 5560.9, "total_tokens": 17301840}
|
|
{"current_steps": 35155, "total_steps": 37885, "loss": 0.0518, "lr": 3.149295930929441e-08, "epoch": 4.639699089349347, "percentage": 92.79, "elapsed_time": "0:51:51", "remaining_time": "0:04:01", "throughput": 5561.1, "total_tokens": 17304336}
|
|
{"current_steps": 35160, "total_steps": 37885, "loss": 0.0016, "lr": 3.137835463166494e-08, "epoch": 4.640358981127095, "percentage": 92.81, "elapsed_time": "0:51:52", "remaining_time": "0:04:01", "throughput": 5561.3, "total_tokens": 17306768}
|
|
{"current_steps": 35165, "total_steps": 37885, "loss": 0.0305, "lr": 3.12639555369657e-08, "epoch": 4.641018872904843, "percentage": 92.82, "elapsed_time": "0:51:52", "remaining_time": "0:04:00", "throughput": 5561.37, "total_tokens": 17308816}
|
|
{"current_steps": 35170, "total_steps": 37885, "loss": 0.0226, "lr": 3.1149762049476724e-08, "epoch": 4.641678764682592, "percentage": 92.83, "elapsed_time": "0:51:52", "remaining_time": "0:04:00", "throughput": 5561.69, "total_tokens": 17311696}
|
|
{"current_steps": 35175, "total_steps": 37885, "loss": 0.0, "lr": 3.103577419343484e-08, "epoch": 4.642338656460341, "percentage": 92.85, "elapsed_time": "0:51:52", "remaining_time": "0:03:59", "throughput": 5561.84, "total_tokens": 17314000}
|
|
{"current_steps": 35180, "total_steps": 37885, "loss": 0.0, "lr": 3.092199199303325e-08, "epoch": 4.642998548238089, "percentage": 92.86, "elapsed_time": "0:51:53", "remaining_time": "0:03:59", "throughput": 5562.01, "total_tokens": 17316368}
|
|
{"current_steps": 35185, "total_steps": 37885, "loss": 0.0, "lr": 3.0808415472421413e-08, "epoch": 4.643658440015837, "percentage": 92.87, "elapsed_time": "0:51:53", "remaining_time": "0:03:58", "throughput": 5562.2, "total_tokens": 17318800}
|
|
{"current_steps": 35190, "total_steps": 37885, "loss": 0.036, "lr": 3.069504465570505e-08, "epoch": 4.644318331793586, "percentage": 92.89, "elapsed_time": "0:51:53", "remaining_time": "0:03:58", "throughput": 5562.42, "total_tokens": 17321296}
|
|
{"current_steps": 35195, "total_steps": 37885, "loss": 0.0, "lr": 3.0581879566946243e-08, "epoch": 4.644978223571334, "percentage": 92.9, "elapsed_time": "0:51:54", "remaining_time": "0:03:58", "throughput": 5562.53, "total_tokens": 17323472}
|
|
{"current_steps": 35200, "total_steps": 37885, "loss": 0.0, "lr": 3.046892023016356e-08, "epoch": 4.645638115349083, "percentage": 92.91, "elapsed_time": "0:51:54", "remaining_time": "0:03:57", "throughput": 5562.67, "total_tokens": 17325712}
|
|
{"current_steps": 35205, "total_steps": 37885, "loss": 0.0, "lr": 3.035616666933183e-08, "epoch": 4.646298007126831, "percentage": 92.93, "elapsed_time": "0:51:54", "remaining_time": "0:03:57", "throughput": 5562.96, "total_tokens": 17328464}
|
|
{"current_steps": 35210, "total_steps": 37885, "loss": 0.0, "lr": 3.024361890838201e-08, "epoch": 4.64695789890458, "percentage": 92.94, "elapsed_time": "0:51:55", "remaining_time": "0:03:56", "throughput": 5563.17, "total_tokens": 17330960}
|
|
{"current_steps": 35215, "total_steps": 37885, "loss": 0.0001, "lr": 3.013127697120166e-08, "epoch": 4.647617790682328, "percentage": 92.95, "elapsed_time": "0:51:55", "remaining_time": "0:03:56", "throughput": 5563.47, "total_tokens": 17333776}
|
|
{"current_steps": 35220, "total_steps": 37885, "loss": 0.0028, "lr": 3.00191408816346e-08, "epoch": 4.648277682460076, "percentage": 92.97, "elapsed_time": "0:51:55", "remaining_time": "0:03:55", "throughput": 5563.64, "total_tokens": 17336144}
|
|
{"current_steps": 35225, "total_steps": 37885, "loss": 0.0, "lr": 2.99072106634809e-08, "epoch": 4.648937574237825, "percentage": 92.98, "elapsed_time": "0:51:56", "remaining_time": "0:03:55", "throughput": 5563.86, "total_tokens": 17338640}
|
|
{"current_steps": 35230, "total_steps": 37885, "loss": 0.0, "lr": 2.9795486340496557e-08, "epoch": 4.649597466015574, "percentage": 92.99, "elapsed_time": "0:51:56", "remaining_time": "0:03:54", "throughput": 5564.06, "total_tokens": 17341136}
|
|
{"current_steps": 35235, "total_steps": 37885, "loss": 0.0, "lr": 2.968396793639494e-08, "epoch": 4.650257357793322, "percentage": 93.01, "elapsed_time": "0:51:56", "remaining_time": "0:03:54", "throughput": 5564.15, "total_tokens": 17343248}
|
|
{"current_steps": 35240, "total_steps": 37885, "loss": 0.0, "lr": 2.9572655474844555e-08, "epoch": 4.65091724957107, "percentage": 93.02, "elapsed_time": "0:51:57", "remaining_time": "0:03:53", "throughput": 5564.37, "total_tokens": 17345744}
|
|
{"current_steps": 35245, "total_steps": 37885, "loss": 0.0, "lr": 2.9461548979470507e-08, "epoch": 4.6515771413488185, "percentage": 93.03, "elapsed_time": "0:51:57", "remaining_time": "0:03:53", "throughput": 5564.63, "total_tokens": 17348432}
|
|
{"current_steps": 35250, "total_steps": 37885, "loss": 0.0001, "lr": 2.9350648473854933e-08, "epoch": 4.652237033126568, "percentage": 93.04, "elapsed_time": "0:51:57", "remaining_time": "0:03:53", "throughput": 5564.82, "total_tokens": 17350864}
|
|
{"current_steps": 35255, "total_steps": 37885, "loss": 0.0, "lr": 2.9239953981535116e-08, "epoch": 4.652896924904316, "percentage": 93.06, "elapsed_time": "0:51:58", "remaining_time": "0:03:52", "throughput": 5565.04, "total_tokens": 17353360}
|
|
{"current_steps": 35260, "total_steps": 37885, "loss": 0.0, "lr": 2.9129465526005592e-08, "epoch": 4.653556816682064, "percentage": 93.07, "elapsed_time": "0:51:58", "remaining_time": "0:03:52", "throughput": 5565.19, "total_tokens": 17355664}
|
|
{"current_steps": 35265, "total_steps": 37885, "loss": 0.0, "lr": 2.9019183130716386e-08, "epoch": 4.6542167084598125, "percentage": 93.08, "elapsed_time": "0:51:58", "remaining_time": "0:03:51", "throughput": 5565.44, "total_tokens": 17358288}
|
|
{"current_steps": 35270, "total_steps": 37885, "loss": 0.0032, "lr": 2.8909106819074214e-08, "epoch": 4.654876600237561, "percentage": 93.1, "elapsed_time": "0:51:59", "remaining_time": "0:03:51", "throughput": 5565.54, "total_tokens": 17360400}
|
|
{"current_steps": 35275, "total_steps": 37885, "loss": 0.0, "lr": 2.8799236614442168e-08, "epoch": 4.65553649201531, "percentage": 93.11, "elapsed_time": "0:51:59", "remaining_time": "0:03:50", "throughput": 5565.84, "total_tokens": 17363216}
|
|
{"current_steps": 35280, "total_steps": 37885, "loss": 0.0, "lr": 2.868957254013915e-08, "epoch": 4.656196383793058, "percentage": 93.12, "elapsed_time": "0:51:59", "remaining_time": "0:03:50", "throughput": 5566.01, "total_tokens": 17365584}
|
|
{"current_steps": 35285, "total_steps": 37885, "loss": 0.0, "lr": 2.8580114619440655e-08, "epoch": 4.6568562755708065, "percentage": 93.14, "elapsed_time": "0:52:00", "remaining_time": "0:03:49", "throughput": 5566.14, "total_tokens": 17367824}
|
|
{"current_steps": 35290, "total_steps": 37885, "loss": 0.0, "lr": 2.8470862875578427e-08, "epoch": 4.657516167348555, "percentage": 93.15, "elapsed_time": "0:52:00", "remaining_time": "0:03:49", "throughput": 5566.28, "total_tokens": 17370064}
|
|
{"current_steps": 35295, "total_steps": 37885, "loss": 0.0, "lr": 2.836181733174037e-08, "epoch": 4.658176059126303, "percentage": 93.16, "elapsed_time": "0:52:00", "remaining_time": "0:03:49", "throughput": 5566.53, "total_tokens": 17372688}
|
|
{"current_steps": 35300, "total_steps": 37885, "loss": 0.0, "lr": 2.8252978011070404e-08, "epoch": 4.658835950904052, "percentage": 93.18, "elapsed_time": "0:52:01", "remaining_time": "0:03:48", "throughput": 5566.78, "total_tokens": 17375312}
|
|
{"current_steps": 35305, "total_steps": 37885, "loss": 0.0, "lr": 2.8144344936669062e-08, "epoch": 4.6594958426818005, "percentage": 93.19, "elapsed_time": "0:52:01", "remaining_time": "0:03:48", "throughput": 5566.98, "total_tokens": 17377744}
|
|
{"current_steps": 35310, "total_steps": 37885, "loss": 0.0, "lr": 2.8035918131592895e-08, "epoch": 4.660155734459549, "percentage": 93.2, "elapsed_time": "0:52:01", "remaining_time": "0:03:47", "throughput": 5567.18, "total_tokens": 17380240}
|
|
{"current_steps": 35315, "total_steps": 37885, "loss": 0.0132, "lr": 2.792769761885472e-08, "epoch": 4.660815626237297, "percentage": 93.22, "elapsed_time": "0:52:02", "remaining_time": "0:03:47", "throughput": 5567.35, "total_tokens": 17382608}
|
|
{"current_steps": 35320, "total_steps": 37885, "loss": 0.0, "lr": 2.781968342142349e-08, "epoch": 4.661475518015045, "percentage": 93.23, "elapsed_time": "0:52:02", "remaining_time": "0:03:46", "throughput": 5567.56, "total_tokens": 17385104}
|
|
{"current_steps": 35325, "total_steps": 37885, "loss": 0.0, "lr": 2.771187556222454e-08, "epoch": 4.662135409792794, "percentage": 93.24, "elapsed_time": "0:52:02", "remaining_time": "0:03:46", "throughput": 5567.8, "total_tokens": 17387664}
|
|
{"current_steps": 35330, "total_steps": 37885, "loss": 0.0011, "lr": 2.7604274064139123e-08, "epoch": 4.662795301570543, "percentage": 93.26, "elapsed_time": "0:52:03", "remaining_time": "0:03:45", "throughput": 5567.87, "total_tokens": 17389712}
|
|
{"current_steps": 35335, "total_steps": 37885, "loss": 0.0, "lr": 2.7496878950005077e-08, "epoch": 4.663455193348291, "percentage": 93.27, "elapsed_time": "0:52:03", "remaining_time": "0:03:45", "throughput": 5568.13, "total_tokens": 17392400}
|
|
{"current_steps": 35340, "total_steps": 37885, "loss": 0.0, "lr": 2.738969024261606e-08, "epoch": 4.664115085126039, "percentage": 93.28, "elapsed_time": "0:52:03", "remaining_time": "0:03:44", "throughput": 5568.4, "total_tokens": 17395088}
|
|
{"current_steps": 35345, "total_steps": 37885, "loss": 0.0, "lr": 2.7282707964722427e-08, "epoch": 4.664774976903788, "percentage": 93.3, "elapsed_time": "0:52:04", "remaining_time": "0:03:44", "throughput": 5568.6, "total_tokens": 17397520}
|
|
{"current_steps": 35350, "total_steps": 37885, "loss": 0.0008, "lr": 2.7175932139030022e-08, "epoch": 4.665434868681536, "percentage": 93.31, "elapsed_time": "0:52:04", "remaining_time": "0:03:44", "throughput": 5568.75, "total_tokens": 17399824}
|
|
{"current_steps": 35355, "total_steps": 37885, "loss": 0.0, "lr": 2.7069362788201267e-08, "epoch": 4.666094760459285, "percentage": 93.32, "elapsed_time": "0:52:04", "remaining_time": "0:03:43", "throughput": 5568.98, "total_tokens": 17402384}
|
|
{"current_steps": 35360, "total_steps": 37885, "loss": 0.0039, "lr": 2.6962999934855068e-08, "epoch": 4.666754652237033, "percentage": 93.34, "elapsed_time": "0:52:05", "remaining_time": "0:03:43", "throughput": 5569.15, "total_tokens": 17404752}
|
|
{"current_steps": 35365, "total_steps": 37885, "loss": 0.0, "lr": 2.6856843601565816e-08, "epoch": 4.667414544014782, "percentage": 93.35, "elapsed_time": "0:52:05", "remaining_time": "0:03:42", "throughput": 5569.34, "total_tokens": 17407184}
|
|
{"current_steps": 35370, "total_steps": 37885, "loss": 0.0, "lr": 2.6750893810864596e-08, "epoch": 4.66807443579253, "percentage": 93.36, "elapsed_time": "0:52:05", "remaining_time": "0:03:42", "throughput": 5569.56, "total_tokens": 17409680}
|
|
{"current_steps": 35375, "total_steps": 37885, "loss": 0.0381, "lr": 2.6645150585238528e-08, "epoch": 4.668734327570278, "percentage": 93.37, "elapsed_time": "0:52:06", "remaining_time": "0:03:41", "throughput": 5569.81, "total_tokens": 17412304}
|
|
{"current_steps": 35380, "total_steps": 37885, "loss": 0.0005, "lr": 2.653961394713067e-08, "epoch": 4.6693942193480265, "percentage": 93.39, "elapsed_time": "0:52:06", "remaining_time": "0:03:41", "throughput": 5570.0, "total_tokens": 17414736}
|
|
{"current_steps": 35385, "total_steps": 37885, "loss": 0.0002, "lr": 2.6434283918940424e-08, "epoch": 4.670054111125776, "percentage": 93.4, "elapsed_time": "0:52:06", "remaining_time": "0:03:40", "throughput": 5570.21, "total_tokens": 17417232}
|
|
{"current_steps": 35390, "total_steps": 37885, "loss": 0.0294, "lr": 2.6329160523023587e-08, "epoch": 4.670714002903524, "percentage": 93.41, "elapsed_time": "0:52:07", "remaining_time": "0:03:40", "throughput": 5570.57, "total_tokens": 17420240}
|
|
{"current_steps": 35395, "total_steps": 37885, "loss": 0.0, "lr": 2.6224243781691636e-08, "epoch": 4.671373894681272, "percentage": 93.43, "elapsed_time": "0:52:07", "remaining_time": "0:03:40", "throughput": 5570.83, "total_tokens": 17422928}
|
|
{"current_steps": 35400, "total_steps": 37885, "loss": 0.0, "lr": 2.6119533717212428e-08, "epoch": 4.6720337864590205, "percentage": 93.44, "elapsed_time": "0:52:07", "remaining_time": "0:03:39", "throughput": 5571.04, "total_tokens": 17425424}
|
|
{"current_steps": 35405, "total_steps": 37885, "loss": 0.0, "lr": 2.601503035180963e-08, "epoch": 4.672693678236769, "percentage": 93.45, "elapsed_time": "0:52:08", "remaining_time": "0:03:39", "throughput": 5571.25, "total_tokens": 17427920}
|
|
{"current_steps": 35410, "total_steps": 37885, "loss": 0.0, "lr": 2.5910733707663947e-08, "epoch": 4.673353570014518, "percentage": 93.47, "elapsed_time": "0:52:08", "remaining_time": "0:03:38", "throughput": 5571.47, "total_tokens": 17430416}
|
|
{"current_steps": 35415, "total_steps": 37885, "loss": 0.0, "lr": 2.5806643806910998e-08, "epoch": 4.674013461792266, "percentage": 93.48, "elapsed_time": "0:52:08", "remaining_time": "0:03:38", "throughput": 5571.65, "total_tokens": 17432784}
|
|
{"current_steps": 35420, "total_steps": 37885, "loss": 0.0, "lr": 2.5702760671643455e-08, "epoch": 4.6746733535700145, "percentage": 93.49, "elapsed_time": "0:52:09", "remaining_time": "0:03:37", "throughput": 5571.74, "total_tokens": 17434896}
|
|
{"current_steps": 35425, "total_steps": 37885, "loss": 0.0, "lr": 2.559908432390967e-08, "epoch": 4.675333245347763, "percentage": 93.51, "elapsed_time": "0:52:09", "remaining_time": "0:03:37", "throughput": 5571.9, "total_tokens": 17437200}
|
|
{"current_steps": 35430, "total_steps": 37885, "loss": 0.0219, "lr": 2.5495614785714047e-08, "epoch": 4.675993137125511, "percentage": 93.52, "elapsed_time": "0:52:09", "remaining_time": "0:03:36", "throughput": 5572.05, "total_tokens": 17439504}
|
|
{"current_steps": 35435, "total_steps": 37885, "loss": 0.0, "lr": 2.5392352079017576e-08, "epoch": 4.67665302890326, "percentage": 93.53, "elapsed_time": "0:52:10", "remaining_time": "0:03:36", "throughput": 5572.25, "total_tokens": 17442000}
|
|
{"current_steps": 35440, "total_steps": 37885, "loss": 0.0, "lr": 2.528929622573661e-08, "epoch": 4.6773129206810085, "percentage": 93.55, "elapsed_time": "0:52:10", "remaining_time": "0:03:35", "throughput": 5572.39, "total_tokens": 17444240}
|
|
{"current_steps": 35445, "total_steps": 37885, "loss": 0.0, "lr": 2.5186447247744436e-08, "epoch": 4.677972812458757, "percentage": 93.56, "elapsed_time": "0:52:10", "remaining_time": "0:03:35", "throughput": 5572.58, "total_tokens": 17446672}
|
|
{"current_steps": 35450, "total_steps": 37885, "loss": 0.0, "lr": 2.5083805166869698e-08, "epoch": 4.678632704236505, "percentage": 93.57, "elapsed_time": "0:52:11", "remaining_time": "0:03:35", "throughput": 5572.81, "total_tokens": 17449232}
|
|
{"current_steps": 35455, "total_steps": 37885, "loss": 0.0657, "lr": 2.4981370004897527e-08, "epoch": 4.679292596014253, "percentage": 93.59, "elapsed_time": "0:52:11", "remaining_time": "0:03:34", "throughput": 5573.08, "total_tokens": 17451920}
|
|
{"current_steps": 35460, "total_steps": 37885, "loss": 0.0, "lr": 2.487914178356898e-08, "epoch": 4.6799524877920025, "percentage": 93.6, "elapsed_time": "0:52:11", "remaining_time": "0:03:34", "throughput": 5573.23, "total_tokens": 17454224}
|
|
{"current_steps": 35465, "total_steps": 37885, "loss": 0.0, "lr": 2.4777120524581364e-08, "epoch": 4.680612379569751, "percentage": 93.61, "elapsed_time": "0:52:12", "remaining_time": "0:03:33", "throughput": 5573.46, "total_tokens": 17456784}
|
|
{"current_steps": 35470, "total_steps": 37885, "loss": 0.0487, "lr": 2.4675306249587912e-08, "epoch": 4.681272271347499, "percentage": 93.63, "elapsed_time": "0:52:12", "remaining_time": "0:03:33", "throughput": 5573.61, "total_tokens": 17459088}
|
|
{"current_steps": 35475, "total_steps": 37885, "loss": 0.0411, "lr": 2.45736989801979e-08, "epoch": 4.681932163125247, "percentage": 93.64, "elapsed_time": "0:52:12", "remaining_time": "0:03:32", "throughput": 5573.78, "total_tokens": 17461456}
|
|
{"current_steps": 35480, "total_steps": 37885, "loss": 0.0, "lr": 2.4472298737976848e-08, "epoch": 4.682592054902996, "percentage": 93.65, "elapsed_time": "0:52:13", "remaining_time": "0:03:32", "throughput": 5573.93, "total_tokens": 17463760}
|
|
{"current_steps": 35485, "total_steps": 37885, "loss": 0.0595, "lr": 2.4371105544446323e-08, "epoch": 4.683251946680745, "percentage": 93.67, "elapsed_time": "0:52:13", "remaining_time": "0:03:31", "throughput": 5574.1, "total_tokens": 17466128}
|
|
{"current_steps": 35490, "total_steps": 37885, "loss": 0.0002, "lr": 2.427011942108348e-08, "epoch": 4.683911838458493, "percentage": 93.68, "elapsed_time": "0:52:13", "remaining_time": "0:03:31", "throughput": 5574.31, "total_tokens": 17468624}
|
|
{"current_steps": 35495, "total_steps": 37885, "loss": 0.0595, "lr": 2.416934038932217e-08, "epoch": 4.684571730236241, "percentage": 93.69, "elapsed_time": "0:52:14", "remaining_time": "0:03:31", "throughput": 5574.46, "total_tokens": 17470928}
|
|
{"current_steps": 35500, "total_steps": 37885, "loss": 0.0095, "lr": 2.406876847055206e-08, "epoch": 4.68523162201399, "percentage": 93.7, "elapsed_time": "0:52:14", "remaining_time": "0:03:30", "throughput": 5574.71, "total_tokens": 17473552}
|
|
{"current_steps": 35505, "total_steps": 37885, "loss": 0.0, "lr": 2.396840368611852e-08, "epoch": 4.685891513791738, "percentage": 93.72, "elapsed_time": "0:52:14", "remaining_time": "0:03:30", "throughput": 5574.78, "total_tokens": 17475600}
|
|
{"current_steps": 35510, "total_steps": 37885, "loss": 0.001, "lr": 2.3868246057323515e-08, "epoch": 4.686551405569487, "percentage": 93.73, "elapsed_time": "0:52:15", "remaining_time": "0:03:29", "throughput": 5575.03, "total_tokens": 17478224}
|
|
{"current_steps": 35515, "total_steps": 37885, "loss": 0.1067, "lr": 2.3768295605424703e-08, "epoch": 4.687211297347235, "percentage": 93.74, "elapsed_time": "0:52:15", "remaining_time": "0:03:29", "throughput": 5575.22, "total_tokens": 17480656}
|
|
{"current_steps": 35520, "total_steps": 37885, "loss": 0.0, "lr": 2.3668552351635896e-08, "epoch": 4.687871189124984, "percentage": 93.76, "elapsed_time": "0:52:15", "remaining_time": "0:03:28", "throughput": 5575.37, "total_tokens": 17482960}
|
|
{"current_steps": 35525, "total_steps": 37885, "loss": 0.0, "lr": 2.356901631712671e-08, "epoch": 4.688531080902732, "percentage": 93.77, "elapsed_time": "0:52:16", "remaining_time": "0:03:28", "throughput": 5575.64, "total_tokens": 17485648}
|
|
{"current_steps": 35530, "total_steps": 37885, "loss": 0.0518, "lr": 2.346968752302303e-08, "epoch": 4.68919097268048, "percentage": 93.78, "elapsed_time": "0:52:16", "remaining_time": "0:03:27", "throughput": 5575.86, "total_tokens": 17488208}
|
|
{"current_steps": 35535, "total_steps": 37885, "loss": 0.0, "lr": 2.3370565990406877e-08, "epoch": 4.689850864458229, "percentage": 93.8, "elapsed_time": "0:52:16", "remaining_time": "0:03:27", "throughput": 5576.07, "total_tokens": 17490704}
|
|
{"current_steps": 35540, "total_steps": 37885, "loss": 0.0, "lr": 2.3271651740315755e-08, "epoch": 4.690510756235978, "percentage": 93.81, "elapsed_time": "0:52:17", "remaining_time": "0:03:26", "throughput": 5576.23, "total_tokens": 17493008}
|
|
{"current_steps": 35545, "total_steps": 37885, "loss": 0.0188, "lr": 2.3172944793743653e-08, "epoch": 4.691170648013726, "percentage": 93.82, "elapsed_time": "0:52:17", "remaining_time": "0:03:26", "throughput": 5576.4, "total_tokens": 17495376}
|
|
{"current_steps": 35550, "total_steps": 37885, "loss": 0.0, "lr": 2.3074445171640366e-08, "epoch": 4.691830539791474, "percentage": 93.84, "elapsed_time": "0:52:17", "remaining_time": "0:03:26", "throughput": 5576.53, "total_tokens": 17497616}
|
|
{"current_steps": 35555, "total_steps": 37885, "loss": 0.0252, "lr": 2.2976152894911838e-08, "epoch": 4.692490431569222, "percentage": 93.85, "elapsed_time": "0:52:18", "remaining_time": "0:03:25", "throughput": 5576.82, "total_tokens": 17500368}
|
|
{"current_steps": 35560, "total_steps": 37885, "loss": 0.0261, "lr": 2.2878067984419825e-08, "epoch": 4.693150323346972, "percentage": 93.86, "elapsed_time": "0:52:18", "remaining_time": "0:03:25", "throughput": 5576.99, "total_tokens": 17502736}
|
|
{"current_steps": 35565, "total_steps": 37885, "loss": 0.0, "lr": 2.2780190460981896e-08, "epoch": 4.69381021512472, "percentage": 93.88, "elapsed_time": "0:52:18", "remaining_time": "0:03:24", "throughput": 5577.2, "total_tokens": 17505232}
|
|
{"current_steps": 35570, "total_steps": 37885, "loss": 0.0, "lr": 2.2682520345372325e-08, "epoch": 4.694470106902468, "percentage": 93.89, "elapsed_time": "0:52:19", "remaining_time": "0:03:24", "throughput": 5577.37, "total_tokens": 17507600}
|
|
{"current_steps": 35575, "total_steps": 37885, "loss": 0.0003, "lr": 2.258505765832064e-08, "epoch": 4.695129998680216, "percentage": 93.9, "elapsed_time": "0:52:19", "remaining_time": "0:03:23", "throughput": 5577.56, "total_tokens": 17510032}
|
|
{"current_steps": 35580, "total_steps": 37885, "loss": 0.0001, "lr": 2.248780242051229e-08, "epoch": 4.695789890457965, "percentage": 93.92, "elapsed_time": "0:52:19", "remaining_time": "0:03:23", "throughput": 5577.86, "total_tokens": 17512848}
|
|
{"current_steps": 35585, "total_steps": 37885, "loss": 0.0, "lr": 2.239075465258966e-08, "epoch": 4.696449782235713, "percentage": 93.93, "elapsed_time": "0:52:20", "remaining_time": "0:03:22", "throughput": 5578.07, "total_tokens": 17515344}
|
|
{"current_steps": 35590, "total_steps": 37885, "loss": 0.0, "lr": 2.2293914375149824e-08, "epoch": 4.697109674013462, "percentage": 93.94, "elapsed_time": "0:52:20", "remaining_time": "0:03:22", "throughput": 5578.26, "total_tokens": 17517776}
|
|
{"current_steps": 35595, "total_steps": 37885, "loss": 0.0, "lr": 2.2197281608746787e-08, "epoch": 4.69776956579121, "percentage": 93.96, "elapsed_time": "0:52:20", "remaining_time": "0:03:22", "throughput": 5578.46, "total_tokens": 17520272}
|
|
{"current_steps": 35600, "total_steps": 37885, "loss": 0.0062, "lr": 2.210085637388992e-08, "epoch": 4.698429457568959, "percentage": 93.97, "elapsed_time": "0:52:21", "remaining_time": "0:03:21", "throughput": 5578.6, "total_tokens": 17522512}
|
|
{"current_steps": 35605, "total_steps": 37885, "loss": 0.0, "lr": 2.2004638691044962e-08, "epoch": 4.699089349346707, "percentage": 93.98, "elapsed_time": "0:52:21", "remaining_time": "0:03:21", "throughput": 5578.88, "total_tokens": 17525264}
|
|
{"current_steps": 35610, "total_steps": 37885, "loss": 0.0338, "lr": 2.190862858063347e-08, "epoch": 4.699749241124455, "percentage": 93.99, "elapsed_time": "0:52:21", "remaining_time": "0:03:20", "throughput": 5579.03, "total_tokens": 17527568}
|
|
{"current_steps": 35615, "total_steps": 37885, "loss": 0.0, "lr": 2.1812826063032584e-08, "epoch": 4.700409132902204, "percentage": 94.01, "elapsed_time": "0:52:22", "remaining_time": "0:03:20", "throughput": 5579.24, "total_tokens": 17530064}
|
|
{"current_steps": 35620, "total_steps": 37885, "loss": 0.0, "lr": 2.1717231158576045e-08, "epoch": 4.701069024679953, "percentage": 94.02, "elapsed_time": "0:52:22", "remaining_time": "0:03:19", "throughput": 5579.48, "total_tokens": 17532688}
|
|
{"current_steps": 35625, "total_steps": 37885, "loss": 0.0, "lr": 2.1621843887552948e-08, "epoch": 4.701728916457701, "percentage": 94.03, "elapsed_time": "0:52:22", "remaining_time": "0:03:19", "throughput": 5579.68, "total_tokens": 17535120}
|
|
{"current_steps": 35630, "total_steps": 37885, "loss": 0.0, "lr": 2.1526664270208662e-08, "epoch": 4.702388808235449, "percentage": 94.05, "elapsed_time": "0:52:23", "remaining_time": "0:03:18", "throughput": 5579.85, "total_tokens": 17537488}
|
|
{"current_steps": 35635, "total_steps": 37885, "loss": 0.0, "lr": 2.1431692326744244e-08, "epoch": 4.7030487000131975, "percentage": 94.06, "elapsed_time": "0:52:23", "remaining_time": "0:03:18", "throughput": 5580.11, "total_tokens": 17540176}
|
|
{"current_steps": 35640, "total_steps": 37885, "loss": 0.0, "lr": 2.1336928077317017e-08, "epoch": 4.703708591790946, "percentage": 94.07, "elapsed_time": "0:52:23", "remaining_time": "0:03:18", "throughput": 5580.32, "total_tokens": 17542672}
|
|
{"current_steps": 35645, "total_steps": 37885, "loss": 0.0016, "lr": 2.1242371542039893e-08, "epoch": 4.704368483568695, "percentage": 94.09, "elapsed_time": "0:52:23", "remaining_time": "0:03:17", "throughput": 5580.43, "total_tokens": 17544848}
|
|
{"current_steps": 35650, "total_steps": 37885, "loss": 0.0, "lr": 2.1148022740981708e-08, "epoch": 4.705028375346443, "percentage": 94.1, "elapsed_time": "0:52:24", "remaining_time": "0:03:17", "throughput": 5580.64, "total_tokens": 17547344}
|
|
{"current_steps": 35655, "total_steps": 37885, "loss": 0.0, "lr": 2.1053881694167442e-08, "epoch": 4.7056882671241915, "percentage": 94.11, "elapsed_time": "0:52:24", "remaining_time": "0:03:16", "throughput": 5580.88, "total_tokens": 17549968}
|
|
{"current_steps": 35660, "total_steps": 37885, "loss": 0.02, "lr": 2.095994842157789e-08, "epoch": 4.70634815890194, "percentage": 94.13, "elapsed_time": "0:52:24", "remaining_time": "0:03:16", "throughput": 5581.03, "total_tokens": 17552272}
|
|
{"current_steps": 35665, "total_steps": 37885, "loss": 0.0002, "lr": 2.086622294314955e-08, "epoch": 4.707008050679688, "percentage": 94.14, "elapsed_time": "0:52:25", "remaining_time": "0:03:15", "throughput": 5581.24, "total_tokens": 17554768}
|
|
{"current_steps": 35670, "total_steps": 37885, "loss": 0.0, "lr": 2.077270527877495e-08, "epoch": 4.707667942457437, "percentage": 94.15, "elapsed_time": "0:52:25", "remaining_time": "0:03:15", "throughput": 5581.41, "total_tokens": 17557136}
|
|
{"current_steps": 35675, "total_steps": 37885, "loss": 0.0164, "lr": 2.067939544830277e-08, "epoch": 4.7083278342351855, "percentage": 94.17, "elapsed_time": "0:52:25", "remaining_time": "0:03:14", "throughput": 5581.64, "total_tokens": 17559696}
|
|
{"current_steps": 35680, "total_steps": 37885, "loss": 0.0, "lr": 2.0586293471537287e-08, "epoch": 4.708987726012934, "percentage": 94.18, "elapsed_time": "0:52:26", "remaining_time": "0:03:14", "throughput": 5581.83, "total_tokens": 17562128}
|
|
{"current_steps": 35685, "total_steps": 37885, "loss": 0.0295, "lr": 2.0493399368238573e-08, "epoch": 4.709647617790682, "percentage": 94.19, "elapsed_time": "0:52:26", "remaining_time": "0:03:13", "throughput": 5582.18, "total_tokens": 17565136}
|
|
{"current_steps": 35690, "total_steps": 37885, "loss": 0.0001, "lr": 2.0400713158122863e-08, "epoch": 4.71030750956843, "percentage": 94.21, "elapsed_time": "0:52:26", "remaining_time": "0:03:13", "throughput": 5582.48, "total_tokens": 17567952}
|
|
{"current_steps": 35695, "total_steps": 37885, "loss": 0.0, "lr": 2.0308234860862084e-08, "epoch": 4.7109674013461795, "percentage": 94.22, "elapsed_time": "0:52:27", "remaining_time": "0:03:13", "throughput": 5582.62, "total_tokens": 17570256}
|
|
{"current_steps": 35700, "total_steps": 37885, "loss": 0.0, "lr": 2.021596449608409e-08, "epoch": 4.711627293123928, "percentage": 94.23, "elapsed_time": "0:52:27", "remaining_time": "0:03:12", "throughput": 5582.79, "total_tokens": 17572624}
|
|
{"current_steps": 35705, "total_steps": 37885, "loss": 0.0, "lr": 2.0123902083372557e-08, "epoch": 4.712287184901676, "percentage": 94.25, "elapsed_time": "0:52:27", "remaining_time": "0:03:12", "throughput": 5582.9, "total_tokens": 17574800}
|
|
{"current_steps": 35710, "total_steps": 37885, "loss": 0.0766, "lr": 2.003204764226718e-08, "epoch": 4.712947076679424, "percentage": 94.26, "elapsed_time": "0:52:28", "remaining_time": "0:03:11", "throughput": 5583.13, "total_tokens": 17577360}
|
|
{"current_steps": 35715, "total_steps": 37885, "loss": 0.0, "lr": 1.9940401192263146e-08, "epoch": 4.713606968457173, "percentage": 94.27, "elapsed_time": "0:52:28", "remaining_time": "0:03:11", "throughput": 5583.33, "total_tokens": 17579856}
|
|
{"current_steps": 35720, "total_steps": 37885, "loss": 0.0, "lr": 1.9848962752812006e-08, "epoch": 4.714266860234922, "percentage": 94.29, "elapsed_time": "0:52:28", "remaining_time": "0:03:10", "throughput": 5583.5, "total_tokens": 17582224}
|
|
{"current_steps": 35725, "total_steps": 37885, "loss": 0.0, "lr": 1.9757732343320898e-08, "epoch": 4.71492675201267, "percentage": 94.3, "elapsed_time": "0:52:29", "remaining_time": "0:03:10", "throughput": 5583.69, "total_tokens": 17584656}
|
|
{"current_steps": 35730, "total_steps": 37885, "loss": 0.0, "lr": 1.9666709983152674e-08, "epoch": 4.715586643790418, "percentage": 94.31, "elapsed_time": "0:52:29", "remaining_time": "0:03:09", "throughput": 5583.91, "total_tokens": 17587152}
|
|
{"current_steps": 35735, "total_steps": 37885, "loss": 0.0, "lr": 1.957589569162632e-08, "epoch": 4.716246535568167, "percentage": 94.32, "elapsed_time": "0:52:29", "remaining_time": "0:03:09", "throughput": 5584.08, "total_tokens": 17589520}
|
|
{"current_steps": 35740, "total_steps": 37885, "loss": 0.0579, "lr": 1.948528948801631e-08, "epoch": 4.716906427345915, "percentage": 94.34, "elapsed_time": "0:52:30", "remaining_time": "0:03:09", "throughput": 5584.23, "total_tokens": 17591824}
|
|
{"current_steps": 35745, "total_steps": 37885, "loss": 0.0, "lr": 1.939489139155337e-08, "epoch": 4.717566319123664, "percentage": 94.35, "elapsed_time": "0:52:30", "remaining_time": "0:03:08", "throughput": 5584.49, "total_tokens": 17594512}
|
|
{"current_steps": 35750, "total_steps": 37885, "loss": 0.0, "lr": 1.9304701421423707e-08, "epoch": 4.718226210901412, "percentage": 94.36, "elapsed_time": "0:52:30", "remaining_time": "0:03:08", "throughput": 5584.66, "total_tokens": 17596880}
|
|
{"current_steps": 35755, "total_steps": 37885, "loss": 0.0, "lr": 1.921471959676957e-08, "epoch": 4.718886102679161, "percentage": 94.38, "elapsed_time": "0:52:31", "remaining_time": "0:03:07", "throughput": 5584.9, "total_tokens": 17599504}
|
|
{"current_steps": 35760, "total_steps": 37885, "loss": 0.02, "lr": 1.9124945936688896e-08, "epoch": 4.719545994456909, "percentage": 94.39, "elapsed_time": "0:52:31", "remaining_time": "0:03:07", "throughput": 5585.07, "total_tokens": 17601872}
|
|
{"current_steps": 35765, "total_steps": 37885, "loss": 0.0016, "lr": 1.903538046023545e-08, "epoch": 4.720205886234657, "percentage": 94.4, "elapsed_time": "0:52:31", "remaining_time": "0:03:06", "throughput": 5585.33, "total_tokens": 17604560}
|
|
{"current_steps": 35770, "total_steps": 37885, "loss": 0.0001, "lr": 1.8946023186419025e-08, "epoch": 4.720865778012406, "percentage": 94.42, "elapsed_time": "0:52:32", "remaining_time": "0:03:06", "throughput": 5585.44, "total_tokens": 17606736}
|
|
{"current_steps": 35775, "total_steps": 37885, "loss": 0.0, "lr": 1.885687413420478e-08, "epoch": 4.721525669790155, "percentage": 94.43, "elapsed_time": "0:52:32", "remaining_time": "0:03:05", "throughput": 5585.68, "total_tokens": 17609360}
|
|
{"current_steps": 35780, "total_steps": 37885, "loss": 0.0, "lr": 1.876793332251425e-08, "epoch": 4.722185561567903, "percentage": 94.44, "elapsed_time": "0:52:32", "remaining_time": "0:03:05", "throughput": 5585.81, "total_tokens": 17611600}
|
|
{"current_steps": 35785, "total_steps": 37885, "loss": 0.0, "lr": 1.8679200770224445e-08, "epoch": 4.722845453345651, "percentage": 94.46, "elapsed_time": "0:52:33", "remaining_time": "0:03:05", "throughput": 5586.05, "total_tokens": 17614224}
|
|
{"current_steps": 35790, "total_steps": 37885, "loss": 0.1016, "lr": 1.859067649616797e-08, "epoch": 4.7235053451233995, "percentage": 94.47, "elapsed_time": "0:52:33", "remaining_time": "0:03:04", "throughput": 5586.24, "total_tokens": 17616656}
|
|
{"current_steps": 35795, "total_steps": 37885, "loss": 0.0009, "lr": 1.8502360519133564e-08, "epoch": 4.724165236901149, "percentage": 94.48, "elapsed_time": "0:52:33", "remaining_time": "0:03:04", "throughput": 5586.57, "total_tokens": 17619600}
|
|
{"current_steps": 35800, "total_steps": 37885, "loss": 0.0, "lr": 1.8414252857865688e-08, "epoch": 4.724825128678897, "percentage": 94.5, "elapsed_time": "0:52:34", "remaining_time": "0:03:03", "throughput": 5586.8, "total_tokens": 17622160}
|
|
{"current_steps": 35805, "total_steps": 37885, "loss": 0.0, "lr": 1.8326353531064708e-08, "epoch": 4.725485020456645, "percentage": 94.51, "elapsed_time": "0:52:34", "remaining_time": "0:03:03", "throughput": 5587.02, "total_tokens": 17624720}
|
|
{"current_steps": 35810, "total_steps": 37885, "loss": 0.0, "lr": 1.8238662557386262e-08, "epoch": 4.7261449122343935, "percentage": 94.52, "elapsed_time": "0:52:34", "remaining_time": "0:03:02", "throughput": 5587.24, "total_tokens": 17627280}
|
|
{"current_steps": 35815, "total_steps": 37885, "loss": 0.0, "lr": 1.8151179955442463e-08, "epoch": 4.726804804012142, "percentage": 94.54, "elapsed_time": "0:52:35", "remaining_time": "0:03:02", "throughput": 5587.52, "total_tokens": 17630032}
|
|
{"current_steps": 35820, "total_steps": 37885, "loss": 0.0337, "lr": 1.806390574380079e-08, "epoch": 4.727464695789891, "percentage": 94.55, "elapsed_time": "0:52:35", "remaining_time": "0:03:01", "throughput": 5587.78, "total_tokens": 17632720}
|
|
{"current_steps": 35825, "total_steps": 37885, "loss": 0.0, "lr": 1.797683994098431e-08, "epoch": 4.728124587567639, "percentage": 94.56, "elapsed_time": "0:52:35", "remaining_time": "0:03:01", "throughput": 5588.01, "total_tokens": 17635280}
|
|
{"current_steps": 35830, "total_steps": 37885, "loss": 0.0, "lr": 1.7889982565472473e-08, "epoch": 4.7287844793453875, "percentage": 94.58, "elapsed_time": "0:52:36", "remaining_time": "0:03:01", "throughput": 5588.24, "total_tokens": 17637840}
|
|
{"current_steps": 35835, "total_steps": 37885, "loss": 0.0001, "lr": 1.780333363569986e-08, "epoch": 4.729444371123136, "percentage": 94.59, "elapsed_time": "0:52:36", "remaining_time": "0:03:00", "throughput": 5588.41, "total_tokens": 17640208}
|
|
{"current_steps": 35840, "total_steps": 37885, "loss": 0.0, "lr": 1.77168931700572e-08, "epoch": 4.730104262900884, "percentage": 94.6, "elapsed_time": "0:52:36", "remaining_time": "0:03:00", "throughput": 5588.63, "total_tokens": 17642768}
|
|
{"current_steps": 35845, "total_steps": 37885, "loss": 0.0647, "lr": 1.7630661186890827e-08, "epoch": 4.730764154678632, "percentage": 94.62, "elapsed_time": "0:52:37", "remaining_time": "0:02:59", "throughput": 5588.79, "total_tokens": 17645136}
|
|
{"current_steps": 35850, "total_steps": 37885, "loss": 0.0, "lr": 1.7544637704502875e-08, "epoch": 4.7314240464563815, "percentage": 94.63, "elapsed_time": "0:52:37", "remaining_time": "0:02:59", "throughput": 5588.97, "total_tokens": 17647504}
|
|
{"current_steps": 35855, "total_steps": 37885, "loss": 0.0, "lr": 1.745882274115118e-08, "epoch": 4.73208393823413, "percentage": 94.64, "elapsed_time": "0:52:37", "remaining_time": "0:02:58", "throughput": 5589.12, "total_tokens": 17649808}
|
|
{"current_steps": 35860, "total_steps": 37885, "loss": 0.0, "lr": 1.7373216315049288e-08, "epoch": 4.732743830011878, "percentage": 94.65, "elapsed_time": "0:52:38", "remaining_time": "0:02:58", "throughput": 5589.42, "total_tokens": 17652624}
|
|
{"current_steps": 35865, "total_steps": 37885, "loss": 0.0, "lr": 1.7287818444366663e-08, "epoch": 4.733403721789626, "percentage": 94.67, "elapsed_time": "0:52:38", "remaining_time": "0:02:57", "throughput": 5589.66, "total_tokens": 17655248}
|
|
{"current_steps": 35870, "total_steps": 37885, "loss": 0.0032, "lr": 1.7202629147228365e-08, "epoch": 4.734063613567375, "percentage": 94.68, "elapsed_time": "0:52:38", "remaining_time": "0:02:57", "throughput": 5589.87, "total_tokens": 17657744}
|
|
{"current_steps": 35875, "total_steps": 37885, "loss": 0.0266, "lr": 1.711764844171515e-08, "epoch": 4.734723505345124, "percentage": 94.69, "elapsed_time": "0:52:39", "remaining_time": "0:02:57", "throughput": 5590.13, "total_tokens": 17660432}
|
|
{"current_steps": 35880, "total_steps": 37885, "loss": 0.0023, "lr": 1.7032876345863588e-08, "epoch": 4.735383397122872, "percentage": 94.71, "elapsed_time": "0:52:39", "remaining_time": "0:02:56", "throughput": 5590.27, "total_tokens": 17662736}
|
|
{"current_steps": 35885, "total_steps": 37885, "loss": 0.0, "lr": 1.694831287766596e-08, "epoch": 4.73604328890062, "percentage": 94.72, "elapsed_time": "0:52:39", "remaining_time": "0:02:56", "throughput": 5590.43, "total_tokens": 17665040}
|
|
{"current_steps": 35890, "total_steps": 37885, "loss": 0.0149, "lr": 1.6863958055070126e-08, "epoch": 4.736703180678369, "percentage": 94.73, "elapsed_time": "0:52:40", "remaining_time": "0:02:55", "throughput": 5590.5, "total_tokens": 17667088}
|
|
{"current_steps": 35895, "total_steps": 37885, "loss": 0.0, "lr": 1.677981189597988e-08, "epoch": 4.737363072456117, "percentage": 94.75, "elapsed_time": "0:52:40", "remaining_time": "0:02:55", "throughput": 5590.67, "total_tokens": 17669456}
|
|
{"current_steps": 35900, "total_steps": 37885, "loss": 0.0, "lr": 1.6695874418254707e-08, "epoch": 4.738022964233865, "percentage": 94.76, "elapsed_time": "0:52:40", "remaining_time": "0:02:54", "throughput": 5590.82, "total_tokens": 17671760}
|
|
{"current_steps": 35905, "total_steps": 37885, "loss": 0.0, "lr": 1.6612145639709696e-08, "epoch": 4.738682856011614, "percentage": 94.77, "elapsed_time": "0:52:41", "remaining_time": "0:02:54", "throughput": 5591.04, "total_tokens": 17674320}
|
|
{"current_steps": 35910, "total_steps": 37885, "loss": 0.0239, "lr": 1.652862557811563e-08, "epoch": 4.739342747789363, "percentage": 94.79, "elapsed_time": "0:52:41", "remaining_time": "0:02:53", "throughput": 5591.21, "total_tokens": 17676688}
|
|
{"current_steps": 35915, "total_steps": 37885, "loss": 0.0, "lr": 1.6445314251198884e-08, "epoch": 4.740002639567111, "percentage": 94.8, "elapsed_time": "0:52:41", "remaining_time": "0:02:53", "throughput": 5591.3, "total_tokens": 17678800}
|
|
{"current_steps": 35920, "total_steps": 37885, "loss": 0.0001, "lr": 1.636221167664209e-08, "epoch": 4.740662531344859, "percentage": 94.81, "elapsed_time": "0:52:42", "remaining_time": "0:02:52", "throughput": 5591.5, "total_tokens": 17681296}
|
|
{"current_steps": 35925, "total_steps": 37885, "loss": 0.0, "lr": 1.6279317872082697e-08, "epoch": 4.741322423122607, "percentage": 94.83, "elapsed_time": "0:52:42", "remaining_time": "0:02:52", "throughput": 5591.69, "total_tokens": 17683728}
|
|
{"current_steps": 35930, "total_steps": 37885, "loss": 0.0, "lr": 1.6196632855114745e-08, "epoch": 4.741982314900357, "percentage": 94.84, "elapsed_time": "0:52:42", "remaining_time": "0:02:52", "throughput": 5591.86, "total_tokens": 17686096}
|
|
{"current_steps": 35935, "total_steps": 37885, "loss": 0.0001, "lr": 1.611415664328708e-08, "epoch": 4.742642206678105, "percentage": 94.85, "elapsed_time": "0:52:43", "remaining_time": "0:02:51", "throughput": 5592.05, "total_tokens": 17688528}
|
|
{"current_steps": 35940, "total_steps": 37885, "loss": 0.0, "lr": 1.6031889254105148e-08, "epoch": 4.743302098455853, "percentage": 94.87, "elapsed_time": "0:52:43", "remaining_time": "0:02:51", "throughput": 5592.27, "total_tokens": 17691088}
|
|
{"current_steps": 35945, "total_steps": 37885, "loss": 0.0381, "lr": 1.594983070502942e-08, "epoch": 4.743961990233601, "percentage": 94.88, "elapsed_time": "0:52:43", "remaining_time": "0:02:50", "throughput": 5592.43, "total_tokens": 17693392}
|
|
{"current_steps": 35950, "total_steps": 37885, "loss": 0.0, "lr": 1.5867981013475974e-08, "epoch": 4.74462188201135, "percentage": 94.89, "elapsed_time": "0:52:44", "remaining_time": "0:02:50", "throughput": 5592.61, "total_tokens": 17695824}
|
|
{"current_steps": 35955, "total_steps": 37885, "loss": 0.0177, "lr": 1.5786340196817127e-08, "epoch": 4.745281773789099, "percentage": 94.91, "elapsed_time": "0:52:44", "remaining_time": "0:02:49", "throughput": 5592.73, "total_tokens": 17698000}
|
|
{"current_steps": 35960, "total_steps": 37885, "loss": 0.0, "lr": 1.570490827238047e-08, "epoch": 4.745941665566847, "percentage": 94.92, "elapsed_time": "0:52:44", "remaining_time": "0:02:49", "throughput": 5592.84, "total_tokens": 17700176}
|
|
{"current_steps": 35965, "total_steps": 37885, "loss": 0.0001, "lr": 1.562368525744939e-08, "epoch": 4.746601557344595, "percentage": 94.93, "elapsed_time": "0:52:45", "remaining_time": "0:02:48", "throughput": 5593.1, "total_tokens": 17702864}
|
|
{"current_steps": 35970, "total_steps": 37885, "loss": 0.0032, "lr": 1.5542671169262667e-08, "epoch": 4.747261449122344, "percentage": 94.95, "elapsed_time": "0:52:45", "remaining_time": "0:02:48", "throughput": 5593.28, "total_tokens": 17705296}
|
|
{"current_steps": 35975, "total_steps": 37885, "loss": 0.0, "lr": 1.5461866025015202e-08, "epoch": 4.747921340900092, "percentage": 94.96, "elapsed_time": "0:52:45", "remaining_time": "0:02:48", "throughput": 5593.52, "total_tokens": 17707920}
|
|
{"current_steps": 35980, "total_steps": 37885, "loss": 0.0, "lr": 1.5381269841857282e-08, "epoch": 4.748581232677841, "percentage": 94.97, "elapsed_time": "0:52:46", "remaining_time": "0:02:47", "throughput": 5593.71, "total_tokens": 17710352}
|
|
{"current_steps": 35985, "total_steps": 37885, "loss": 0.0, "lr": 1.5300882636894662e-08, "epoch": 4.749241124455589, "percentage": 94.98, "elapsed_time": "0:52:46", "remaining_time": "0:02:47", "throughput": 5593.8, "total_tokens": 17712464}
|
|
{"current_steps": 35990, "total_steps": 37885, "loss": 0.0426, "lr": 1.5220704427189145e-08, "epoch": 4.749901016233338, "percentage": 95.0, "elapsed_time": "0:52:46", "remaining_time": "0:02:46", "throughput": 5593.96, "total_tokens": 17714832}
|
|
{"current_steps": 35995, "total_steps": 37885, "loss": 0.075, "lr": 1.5140735229757893e-08, "epoch": 4.750560908011086, "percentage": 95.01, "elapsed_time": "0:52:47", "remaining_time": "0:02:46", "throughput": 5594.13, "total_tokens": 17717200}
|
|
{"current_steps": 36000, "total_steps": 37885, "loss": 0.0, "lr": 1.5060975061573777e-08, "epoch": 4.751220799788834, "percentage": 95.02, "elapsed_time": "0:52:47", "remaining_time": "0:02:45", "throughput": 5594.26, "total_tokens": 17719440}
|
|
{"current_steps": 36005, "total_steps": 37885, "loss": 0.0, "lr": 1.4981423939565364e-08, "epoch": 4.751880691566583, "percentage": 95.04, "elapsed_time": "0:52:47", "remaining_time": "0:02:45", "throughput": 5594.41, "total_tokens": 17721744}
|
|
{"current_steps": 36005, "total_steps": 37885, "eval_loss": 0.2836270332336426, "epoch": 4.751880691566583, "percentage": 95.04, "elapsed_time": "0:52:55", "remaining_time": "0:02:45", "throughput": 5580.67, "total_tokens": 17721744}
|
|
{"current_steps": 36010, "total_steps": 37885, "loss": 0.0, "lr": 1.49020818806167e-08, "epoch": 4.752540583344332, "percentage": 95.05, "elapsed_time": "0:53:33", "remaining_time": "0:02:47", "throughput": 5515.1, "total_tokens": 17724048}
|
|
{"current_steps": 36015, "total_steps": 37885, "loss": 0.0, "lr": 1.4822948901567767e-08, "epoch": 4.75320047512208, "percentage": 95.06, "elapsed_time": "0:53:34", "remaining_time": "0:02:46", "throughput": 5515.34, "total_tokens": 17726672}
|
|
{"current_steps": 36020, "total_steps": 37885, "loss": 0.006, "lr": 1.474402501921368e-08, "epoch": 4.753860366899828, "percentage": 95.08, "elapsed_time": "0:53:34", "remaining_time": "0:02:46", "throughput": 5515.55, "total_tokens": 17729168}
|
|
{"current_steps": 36025, "total_steps": 37885, "loss": 0.0001, "lr": 1.4665310250305708e-08, "epoch": 4.7545202586775765, "percentage": 95.09, "elapsed_time": "0:53:34", "remaining_time": "0:02:45", "throughput": 5515.76, "total_tokens": 17731664}
|
|
{"current_steps": 36030, "total_steps": 37885, "loss": 0.001, "lr": 1.4586804611550484e-08, "epoch": 4.755180150455326, "percentage": 95.1, "elapsed_time": "0:53:35", "remaining_time": "0:02:45", "throughput": 5515.98, "total_tokens": 17734224}
|
|
{"current_steps": 36035, "total_steps": 37885, "loss": 0.0, "lr": 1.4508508119610019e-08, "epoch": 4.755840042233074, "percentage": 95.12, "elapsed_time": "0:53:35", "remaining_time": "0:02:45", "throughput": 5516.18, "total_tokens": 17736656}
|
|
{"current_steps": 36040, "total_steps": 37885, "loss": 0.0, "lr": 1.4430420791102461e-08, "epoch": 4.756499934010822, "percentage": 95.13, "elapsed_time": "0:53:35", "remaining_time": "0:02:44", "throughput": 5516.29, "total_tokens": 17738832}
|
|
{"current_steps": 36045, "total_steps": 37885, "loss": 0.0, "lr": 1.4352542642601106e-08, "epoch": 4.7571598257885706, "percentage": 95.14, "elapsed_time": "0:53:36", "remaining_time": "0:02:44", "throughput": 5516.5, "total_tokens": 17741328}
|
|
{"current_steps": 36050, "total_steps": 37885, "loss": 0.0001, "lr": 1.427487369063507e-08, "epoch": 4.757819717566319, "percentage": 95.16, "elapsed_time": "0:53:36", "remaining_time": "0:02:43", "throughput": 5516.74, "total_tokens": 17743952}
|
|
{"current_steps": 36055, "total_steps": 37885, "loss": 0.0001, "lr": 1.4197413951689052e-08, "epoch": 4.758479609344068, "percentage": 95.17, "elapsed_time": "0:53:36", "remaining_time": "0:02:43", "throughput": 5516.97, "total_tokens": 17746512}
|
|
{"current_steps": 36060, "total_steps": 37885, "loss": 0.0, "lr": 1.4120163442203237e-08, "epoch": 4.759139501121816, "percentage": 95.18, "elapsed_time": "0:53:37", "remaining_time": "0:02:42", "throughput": 5517.21, "total_tokens": 17749072}
|
|
{"current_steps": 36065, "total_steps": 37885, "loss": 0.0, "lr": 1.404312217857373e-08, "epoch": 4.7597993928995646, "percentage": 95.2, "elapsed_time": "0:53:37", "remaining_time": "0:02:42", "throughput": 5517.56, "total_tokens": 17752080}
|
|
{"current_steps": 36070, "total_steps": 37885, "loss": 0.0, "lr": 1.3966290177151674e-08, "epoch": 4.760459284677313, "percentage": 95.21, "elapsed_time": "0:53:37", "remaining_time": "0:02:41", "throughput": 5517.81, "total_tokens": 17754704}
|
|
{"current_steps": 36075, "total_steps": 37885, "loss": 0.0, "lr": 1.3889667454244136e-08, "epoch": 4.761119176455061, "percentage": 95.22, "elapsed_time": "0:53:38", "remaining_time": "0:02:41", "throughput": 5518.09, "total_tokens": 17757456}
|
|
{"current_steps": 36080, "total_steps": 37885, "loss": 0.0, "lr": 1.3813254026113997e-08, "epoch": 4.76177906823281, "percentage": 95.24, "elapsed_time": "0:53:38", "remaining_time": "0:02:41", "throughput": 5518.33, "total_tokens": 17760080}
|
|
{"current_steps": 36085, "total_steps": 37885, "loss": 0.0007, "lr": 1.373704990897917e-08, "epoch": 4.7624389600105586, "percentage": 95.25, "elapsed_time": "0:53:38", "remaining_time": "0:02:40", "throughput": 5518.54, "total_tokens": 17762576}
|
|
{"current_steps": 36090, "total_steps": 37885, "loss": 0.0, "lr": 1.3661055119013608e-08, "epoch": 4.763098851788307, "percentage": 95.26, "elapsed_time": "0:53:39", "remaining_time": "0:02:40", "throughput": 5518.71, "total_tokens": 17764944}
|
|
{"current_steps": 36095, "total_steps": 37885, "loss": 0.0, "lr": 1.3585269672346633e-08, "epoch": 4.763758743566055, "percentage": 95.28, "elapsed_time": "0:53:39", "remaining_time": "0:02:39", "throughput": 5518.92, "total_tokens": 17767440}
|
|
{"current_steps": 36100, "total_steps": 37885, "loss": 0.0, "lr": 1.3509693585063042e-08, "epoch": 4.764418635343803, "percentage": 95.29, "elapsed_time": "0:53:39", "remaining_time": "0:02:39", "throughput": 5519.11, "total_tokens": 17769872}
|
|
{"current_steps": 36105, "total_steps": 37885, "loss": 0.0, "lr": 1.3434326873203449e-08, "epoch": 4.7650785271215526, "percentage": 95.3, "elapsed_time": "0:53:40", "remaining_time": "0:02:38", "throughput": 5519.28, "total_tokens": 17772240}
|
|
{"current_steps": 36110, "total_steps": 37885, "loss": 0.0, "lr": 1.3359169552763727e-08, "epoch": 4.765738418899301, "percentage": 95.31, "elapsed_time": "0:53:40", "remaining_time": "0:02:38", "throughput": 5519.45, "total_tokens": 17774608}
|
|
{"current_steps": 36115, "total_steps": 37885, "loss": 0.0, "lr": 1.328422163969567e-08, "epoch": 4.766398310677049, "percentage": 95.33, "elapsed_time": "0:53:40", "remaining_time": "0:02:37", "throughput": 5519.62, "total_tokens": 17776976}
|
|
{"current_steps": 36120, "total_steps": 37885, "loss": 0.0, "lr": 1.320948314990633e-08, "epoch": 4.767058202454797, "percentage": 95.34, "elapsed_time": "0:53:41", "remaining_time": "0:02:37", "throughput": 5519.77, "total_tokens": 17779280}
|
|
{"current_steps": 36125, "total_steps": 37885, "loss": 0.0, "lr": 1.3134954099258466e-08, "epoch": 4.767718094232546, "percentage": 95.35, "elapsed_time": "0:53:41", "remaining_time": "0:02:36", "throughput": 5519.97, "total_tokens": 17781712}
|
|
{"current_steps": 36130, "total_steps": 37885, "loss": 0.0, "lr": 1.306063450357009e-08, "epoch": 4.768377986010294, "percentage": 95.37, "elapsed_time": "0:53:41", "remaining_time": "0:02:36", "throughput": 5520.18, "total_tokens": 17784208}
|
|
{"current_steps": 36135, "total_steps": 37885, "loss": 0.0, "lr": 1.298652437861536e-08, "epoch": 4.769037877788043, "percentage": 95.38, "elapsed_time": "0:53:42", "remaining_time": "0:02:36", "throughput": 5520.27, "total_tokens": 17786320}
|
|
{"current_steps": 36140, "total_steps": 37885, "loss": 0.0, "lr": 1.2912623740123362e-08, "epoch": 4.769697769565791, "percentage": 95.39, "elapsed_time": "0:53:42", "remaining_time": "0:02:35", "throughput": 5520.44, "total_tokens": 17788688}
|
|
{"current_steps": 36145, "total_steps": 37885, "loss": 0.0, "lr": 1.2838932603779107e-08, "epoch": 4.77035766134354, "percentage": 95.41, "elapsed_time": "0:53:42", "remaining_time": "0:02:35", "throughput": 5520.68, "total_tokens": 17791312}
|
|
{"current_steps": 36150, "total_steps": 37885, "loss": 0.0, "lr": 1.2765450985222859e-08, "epoch": 4.771017553121288, "percentage": 95.42, "elapsed_time": "0:53:42", "remaining_time": "0:02:34", "throughput": 5520.91, "total_tokens": 17793872}
|
|
{"current_steps": 36155, "total_steps": 37885, "loss": 0.0, "lr": 1.269217890005081e-08, "epoch": 4.771677444899036, "percentage": 95.43, "elapsed_time": "0:53:43", "remaining_time": "0:02:34", "throughput": 5521.13, "total_tokens": 17796432}
|
|
{"current_steps": 36160, "total_steps": 37885, "loss": 0.0, "lr": 1.2619116363814075e-08, "epoch": 4.772337336676785, "percentage": 95.45, "elapsed_time": "0:53:43", "remaining_time": "0:02:33", "throughput": 5521.32, "total_tokens": 17798864}
|
|
{"current_steps": 36165, "total_steps": 37885, "loss": 0.0, "lr": 1.2546263392019917e-08, "epoch": 4.772997228454534, "percentage": 95.46, "elapsed_time": "0:53:43", "remaining_time": "0:02:33", "throughput": 5521.47, "total_tokens": 17801168}
|
|
{"current_steps": 36170, "total_steps": 37885, "loss": 0.0, "lr": 1.2473620000130858e-08, "epoch": 4.773657120232282, "percentage": 95.47, "elapsed_time": "0:53:44", "remaining_time": "0:02:32", "throughput": 5521.58, "total_tokens": 17803344}
|
|
{"current_steps": 36175, "total_steps": 37885, "loss": 0.0, "lr": 1.2401186203564784e-08, "epoch": 4.77431701201003, "percentage": 95.49, "elapsed_time": "0:53:44", "remaining_time": "0:02:32", "throughput": 5521.79, "total_tokens": 17805840}
|
|
{"current_steps": 36180, "total_steps": 37885, "loss": 0.0213, "lr": 1.2328962017695288e-08, "epoch": 4.7749769037877785, "percentage": 95.5, "elapsed_time": "0:53:44", "remaining_time": "0:02:31", "throughput": 5521.99, "total_tokens": 17808336}
|
|
{"current_steps": 36185, "total_steps": 37885, "loss": 0.0, "lr": 1.225694745785144e-08, "epoch": 4.775636795565527, "percentage": 95.51, "elapsed_time": "0:53:45", "remaining_time": "0:02:31", "throughput": 5522.22, "total_tokens": 17810960}
|
|
{"current_steps": 36190, "total_steps": 37885, "loss": 0.0, "lr": 1.2185142539317905e-08, "epoch": 4.776296687343276, "percentage": 95.53, "elapsed_time": "0:53:45", "remaining_time": "0:02:31", "throughput": 5522.37, "total_tokens": 17813328}
|
|
{"current_steps": 36195, "total_steps": 37885, "loss": 0.0, "lr": 1.21135472773346e-08, "epoch": 4.776956579121024, "percentage": 95.54, "elapsed_time": "0:53:45", "remaining_time": "0:02:30", "throughput": 5522.55, "total_tokens": 17815760}
|
|
{"current_steps": 36200, "total_steps": 37885, "loss": 0.0, "lr": 1.2042161687097152e-08, "epoch": 4.7776164708987725, "percentage": 95.55, "elapsed_time": "0:53:46", "remaining_time": "0:02:30", "throughput": 5522.78, "total_tokens": 17818384}
|
|
{"current_steps": 36205, "total_steps": 37885, "loss": 0.0, "lr": 1.197098578375677e-08, "epoch": 4.778276362676521, "percentage": 95.57, "elapsed_time": "0:53:46", "remaining_time": "0:02:29", "throughput": 5522.94, "total_tokens": 17820752}
|
|
{"current_steps": 36210, "total_steps": 37885, "loss": 0.0079, "lr": 1.1900019582419818e-08, "epoch": 4.778936254454269, "percentage": 95.58, "elapsed_time": "0:53:47", "remaining_time": "0:02:29", "throughput": 5523.14, "total_tokens": 17823248}
|
|
{"current_steps": 36215, "total_steps": 37885, "loss": 0.0, "lr": 1.1829263098148357e-08, "epoch": 4.779596146232018, "percentage": 95.59, "elapsed_time": "0:53:47", "remaining_time": "0:02:28", "throughput": 5523.32, "total_tokens": 17825680}
|
|
{"current_steps": 36220, "total_steps": 37885, "loss": 0.0, "lr": 1.1758716345960263e-08, "epoch": 4.7802560380097665, "percentage": 95.61, "elapsed_time": "0:53:47", "remaining_time": "0:02:28", "throughput": 5523.5, "total_tokens": 17828112}
|
|
{"current_steps": 36225, "total_steps": 37885, "loss": 0.0, "lr": 1.1688379340828224e-08, "epoch": 4.780915929787515, "percentage": 95.62, "elapsed_time": "0:53:48", "remaining_time": "0:02:27", "throughput": 5523.69, "total_tokens": 17830544}
|
|
{"current_steps": 36230, "total_steps": 37885, "loss": 0.0, "lr": 1.1618252097680858e-08, "epoch": 4.781575821565263, "percentage": 95.63, "elapsed_time": "0:53:48", "remaining_time": "0:02:27", "throughput": 5523.93, "total_tokens": 17833168}
|
|
{"current_steps": 36235, "total_steps": 37885, "loss": 0.0, "lr": 1.1548334631402146e-08, "epoch": 4.782235713343011, "percentage": 95.64, "elapsed_time": "0:53:48", "remaining_time": "0:02:27", "throughput": 5524.09, "total_tokens": 17835536}
|
|
{"current_steps": 36240, "total_steps": 37885, "loss": 0.0, "lr": 1.1478626956831771e-08, "epoch": 4.7828956051207605, "percentage": 95.66, "elapsed_time": "0:53:49", "remaining_time": "0:02:26", "throughput": 5524.19, "total_tokens": 17837712}
|
|
{"current_steps": 36245, "total_steps": 37885, "loss": 0.0, "lr": 1.1409129088764346e-08, "epoch": 4.783555496898509, "percentage": 95.67, "elapsed_time": "0:53:49", "remaining_time": "0:02:26", "throughput": 5524.49, "total_tokens": 17840528}
|
|
{"current_steps": 36250, "total_steps": 37885, "loss": 0.0, "lr": 1.1339841041950516e-08, "epoch": 4.784215388676257, "percentage": 95.68, "elapsed_time": "0:53:49", "remaining_time": "0:02:25", "throughput": 5524.65, "total_tokens": 17842896}
|
|
{"current_steps": 36255, "total_steps": 37885, "loss": 0.0, "lr": 1.1270762831096182e-08, "epoch": 4.784875280454005, "percentage": 95.7, "elapsed_time": "0:53:50", "remaining_time": "0:02:25", "throughput": 5524.89, "total_tokens": 17845520}
|
|
{"current_steps": 36260, "total_steps": 37885, "loss": 0.0, "lr": 1.1201894470862504e-08, "epoch": 4.785535172231754, "percentage": 95.71, "elapsed_time": "0:53:50", "remaining_time": "0:02:24", "throughput": 5525.14, "total_tokens": 17848144}
|
|
{"current_steps": 36265, "total_steps": 37885, "loss": 0.0, "lr": 1.1133235975866572e-08, "epoch": 4.786195064009503, "percentage": 95.72, "elapsed_time": "0:53:50", "remaining_time": "0:02:24", "throughput": 5525.24, "total_tokens": 17850320}
|
|
{"current_steps": 36270, "total_steps": 37885, "loss": 0.0, "lr": 1.1064787360680282e-08, "epoch": 4.786854955787251, "percentage": 95.74, "elapsed_time": "0:53:51", "remaining_time": "0:02:23", "throughput": 5525.45, "total_tokens": 17852816}
|
|
{"current_steps": 36275, "total_steps": 37885, "loss": 0.0, "lr": 1.0996548639831793e-08, "epoch": 4.787514847564999, "percentage": 95.75, "elapsed_time": "0:53:51", "remaining_time": "0:02:23", "throughput": 5525.62, "total_tokens": 17855248}
|
|
{"current_steps": 36280, "total_steps": 37885, "loss": 0.0, "lr": 1.0928519827803961e-08, "epoch": 4.788174739342748, "percentage": 95.76, "elapsed_time": "0:53:51", "remaining_time": "0:02:22", "throughput": 5525.78, "total_tokens": 17857616}
|
|
{"current_steps": 36285, "total_steps": 37885, "loss": 0.0, "lr": 1.086070093903535e-08, "epoch": 4.788834631120496, "percentage": 95.78, "elapsed_time": "0:53:52", "remaining_time": "0:02:22", "throughput": 5526.02, "total_tokens": 17860240}
|
|
{"current_steps": 36290, "total_steps": 37885, "loss": 0.0, "lr": 1.0793091987920444e-08, "epoch": 4.789494522898245, "percentage": 95.79, "elapsed_time": "0:53:52", "remaining_time": "0:02:22", "throughput": 5526.25, "total_tokens": 17862800}
|
|
{"current_steps": 36295, "total_steps": 37885, "loss": 0.0, "lr": 1.0725692988808322e-08, "epoch": 4.790154414675993, "percentage": 95.8, "elapsed_time": "0:53:52", "remaining_time": "0:02:21", "throughput": 5526.39, "total_tokens": 17865168}
|
|
{"current_steps": 36300, "total_steps": 37885, "loss": 0.0, "lr": 1.0658503956004206e-08, "epoch": 4.790814306453742, "percentage": 95.82, "elapsed_time": "0:53:53", "remaining_time": "0:02:21", "throughput": 5526.59, "total_tokens": 17867664}
|
|
{"current_steps": 36305, "total_steps": 37885, "loss": 0.0, "lr": 1.0591524903768245e-08, "epoch": 4.79147419823149, "percentage": 95.83, "elapsed_time": "0:53:53", "remaining_time": "0:02:20", "throughput": 5526.78, "total_tokens": 17870160}
|
|
{"current_steps": 36310, "total_steps": 37885, "loss": 0.0, "lr": 1.0524755846316402e-08, "epoch": 4.792134090009238, "percentage": 95.84, "elapsed_time": "0:53:53", "remaining_time": "0:02:20", "throughput": 5526.97, "total_tokens": 17872656}
|
|
{"current_steps": 36315, "total_steps": 37885, "loss": 0.0, "lr": 1.0458196797820007e-08, "epoch": 4.792793981786987, "percentage": 95.86, "elapsed_time": "0:53:54", "remaining_time": "0:02:19", "throughput": 5527.21, "total_tokens": 17875280}
|
|
{"current_steps": 36320, "total_steps": 37885, "loss": 0.0001, "lr": 1.039184777240565e-08, "epoch": 4.793453873564736, "percentage": 95.87, "elapsed_time": "0:53:54", "remaining_time": "0:02:19", "throughput": 5527.41, "total_tokens": 17877776}
|
|
{"current_steps": 36325, "total_steps": 37885, "loss": 0.0, "lr": 1.0325708784155396e-08, "epoch": 4.794113765342484, "percentage": 95.88, "elapsed_time": "0:53:54", "remaining_time": "0:02:18", "throughput": 5527.52, "total_tokens": 17879952}
|
|
{"current_steps": 36330, "total_steps": 37885, "loss": 0.0, "lr": 1.0259779847106798e-08, "epoch": 4.794773657120232, "percentage": 95.9, "elapsed_time": "0:53:55", "remaining_time": "0:02:18", "throughput": 5527.75, "total_tokens": 17882512}
|
|
{"current_steps": 36335, "total_steps": 37885, "loss": 0.0, "lr": 1.0194060975252772e-08, "epoch": 4.7954335488979805, "percentage": 95.91, "elapsed_time": "0:53:55", "remaining_time": "0:02:18", "throughput": 5527.96, "total_tokens": 17885072}
|
|
{"current_steps": 36340, "total_steps": 37885, "loss": 0.0, "lr": 1.0128552182541606e-08, "epoch": 4.79609344067573, "percentage": 95.92, "elapsed_time": "0:53:55", "remaining_time": "0:02:17", "throughput": 5528.16, "total_tokens": 17887568}
|
|
{"current_steps": 36345, "total_steps": 37885, "loss": 0.0, "lr": 1.0063253482877287e-08, "epoch": 4.796753332453478, "percentage": 95.94, "elapsed_time": "0:53:56", "remaining_time": "0:02:17", "throughput": 5528.41, "total_tokens": 17890192}
|
|
{"current_steps": 36350, "total_steps": 37885, "loss": 0.0005, "lr": 9.998164890118844e-09, "epoch": 4.797413224231226, "percentage": 95.95, "elapsed_time": "0:53:56", "remaining_time": "0:02:16", "throughput": 5528.66, "total_tokens": 17892880}
|
|
{"current_steps": 36355, "total_steps": 37885, "loss": 0.0, "lr": 9.933286418080778e-09, "epoch": 4.7980731160089745, "percentage": 95.96, "elapsed_time": "0:53:56", "remaining_time": "0:02:16", "throughput": 5528.83, "total_tokens": 17895376}
|
|
{"current_steps": 36360, "total_steps": 37885, "loss": 0.0012, "lr": 9.868618080533298e-09, "epoch": 4.798733007786723, "percentage": 95.97, "elapsed_time": "0:53:57", "remaining_time": "0:02:15", "throughput": 5528.98, "total_tokens": 17897680}
|
|
{"current_steps": 36365, "total_steps": 37885, "loss": 0.0011, "lr": 9.804159891201536e-09, "epoch": 4.799392899564472, "percentage": 95.99, "elapsed_time": "0:53:57", "remaining_time": "0:02:15", "throughput": 5529.23, "total_tokens": 17900368}
|
|
{"current_steps": 36370, "total_steps": 37885, "loss": 0.0, "lr": 9.739911863766548e-09, "epoch": 4.80005279134222, "percentage": 96.0, "elapsed_time": "0:53:57", "remaining_time": "0:02:14", "throughput": 5529.44, "total_tokens": 17902928}
|
|
{"current_steps": 36375, "total_steps": 37885, "loss": 0.0, "lr": 9.675874011864205e-09, "epoch": 4.8007126831199685, "percentage": 96.01, "elapsed_time": "0:53:58", "remaining_time": "0:02:14", "throughput": 5529.65, "total_tokens": 17905488}
|
|
{"current_steps": 36380, "total_steps": 37885, "loss": 0.0, "lr": 9.612046349086411e-09, "epoch": 4.801372574897717, "percentage": 96.03, "elapsed_time": "0:53:58", "remaining_time": "0:02:13", "throughput": 5529.82, "total_tokens": 17907920}
|
|
{"current_steps": 36385, "total_steps": 37885, "loss": 0.0396, "lr": 9.548428888979775e-09, "epoch": 4.802032466675465, "percentage": 96.04, "elapsed_time": "0:53:58", "remaining_time": "0:02:13", "throughput": 5530.05, "total_tokens": 17910544}
|
|
{"current_steps": 36390, "total_steps": 37885, "loss": 0.0054, "lr": 9.485021645046941e-09, "epoch": 4.802692358453213, "percentage": 96.05, "elapsed_time": "0:53:59", "remaining_time": "0:02:13", "throughput": 5530.22, "total_tokens": 17912976}
|
|
{"current_steps": 36395, "total_steps": 37885, "loss": 0.0, "lr": 9.421824630745478e-09, "epoch": 4.8033522502309625, "percentage": 96.07, "elapsed_time": "0:53:59", "remaining_time": "0:02:12", "throughput": 5530.37, "total_tokens": 17915280}
|
|
{"current_steps": 36400, "total_steps": 37885, "loss": 0.0, "lr": 9.358837859488544e-09, "epoch": 4.804012142008711, "percentage": 96.08, "elapsed_time": "0:53:59", "remaining_time": "0:02:12", "throughput": 5530.52, "total_tokens": 17917648}
|
|
{"current_steps": 36405, "total_steps": 37885, "loss": 0.0, "lr": 9.296061344644667e-09, "epoch": 4.804672033786459, "percentage": 96.09, "elapsed_time": "0:54:00", "remaining_time": "0:02:11", "throughput": 5530.65, "total_tokens": 17919952}
|
|
{"current_steps": 36410, "total_steps": 37885, "loss": 0.0035, "lr": 9.233495099537525e-09, "epoch": 4.805331925564207, "percentage": 96.11, "elapsed_time": "0:54:00", "remaining_time": "0:02:11", "throughput": 5530.85, "total_tokens": 17922512}
|
|
{"current_steps": 36415, "total_steps": 37885, "loss": 0.0005, "lr": 9.171139137446605e-09, "epoch": 4.805991817341956, "percentage": 96.12, "elapsed_time": "0:54:00", "remaining_time": "0:02:10", "throughput": 5531.02, "total_tokens": 17924944}
|
|
{"current_steps": 36420, "total_steps": 37885, "loss": 0.0, "lr": 9.10899347160632e-09, "epoch": 4.806651709119705, "percentage": 96.13, "elapsed_time": "0:54:01", "remaining_time": "0:02:10", "throughput": 5531.17, "total_tokens": 17927312}
|
|
{"current_steps": 36425, "total_steps": 37885, "loss": 0.0, "lr": 9.047058115206674e-09, "epoch": 4.807311600897453, "percentage": 96.15, "elapsed_time": "0:54:01", "remaining_time": "0:02:09", "throughput": 5531.34, "total_tokens": 17929744}
|
|
{"current_steps": 36430, "total_steps": 37885, "loss": 0.028, "lr": 8.985333081393154e-09, "epoch": 4.807971492675201, "percentage": 96.16, "elapsed_time": "0:54:01", "remaining_time": "0:02:09", "throughput": 5531.49, "total_tokens": 17932112}
|
|
{"current_steps": 36435, "total_steps": 37885, "loss": 0.0, "lr": 8.923818383266169e-09, "epoch": 4.80863138445295, "percentage": 96.17, "elapsed_time": "0:54:02", "remaining_time": "0:02:09", "throughput": 5531.65, "total_tokens": 17934480}
|
|
{"current_steps": 36440, "total_steps": 37885, "loss": 0.0, "lr": 8.862514033882051e-09, "epoch": 4.809291276230698, "percentage": 96.19, "elapsed_time": "0:54:02", "remaining_time": "0:02:08", "throughput": 5531.83, "total_tokens": 17936912}
|
|
{"current_steps": 36445, "total_steps": 37885, "loss": 0.0, "lr": 8.80142004625195e-09, "epoch": 4.809951168008446, "percentage": 96.2, "elapsed_time": "0:54:02", "remaining_time": "0:02:08", "throughput": 5532.05, "total_tokens": 17939536}
|
|
{"current_steps": 36450, "total_steps": 37885, "loss": 0.0001, "lr": 8.740536433342826e-09, "epoch": 4.810611059786195, "percentage": 96.21, "elapsed_time": "0:54:03", "remaining_time": "0:02:07", "throughput": 5532.14, "total_tokens": 17941712}
|
|
{"current_steps": 36455, "total_steps": 37885, "loss": 0.0, "lr": 8.679863208076787e-09, "epoch": 4.811270951563944, "percentage": 96.23, "elapsed_time": "0:54:03", "remaining_time": "0:02:07", "throughput": 5532.28, "total_tokens": 17944016}
|
|
{"current_steps": 36460, "total_steps": 37885, "loss": 0.0, "lr": 8.619400383331088e-09, "epoch": 4.811930843341692, "percentage": 96.24, "elapsed_time": "0:54:03", "remaining_time": "0:02:06", "throughput": 5532.42, "total_tokens": 17946320}
|
|
{"current_steps": 36465, "total_steps": 37885, "loss": 0.0, "lr": 8.559147971938574e-09, "epoch": 4.81259073511944, "percentage": 96.25, "elapsed_time": "0:54:04", "remaining_time": "0:02:06", "throughput": 5532.6, "total_tokens": 17948752}
|
|
{"current_steps": 36470, "total_steps": 37885, "loss": 0.0001, "lr": 8.499105986687572e-09, "epoch": 4.813250626897188, "percentage": 96.27, "elapsed_time": "0:54:04", "remaining_time": "0:02:05", "throughput": 5532.81, "total_tokens": 17951376}
|
|
{"current_steps": 36475, "total_steps": 37885, "loss": 0.0, "lr": 8.439274440321442e-09, "epoch": 4.813910518674938, "percentage": 96.28, "elapsed_time": "0:54:04", "remaining_time": "0:02:05", "throughput": 5532.93, "total_tokens": 17953616}
|
|
{"current_steps": 36480, "total_steps": 37885, "loss": 0.0, "lr": 8.379653345538918e-09, "epoch": 4.814570410452686, "percentage": 96.29, "elapsed_time": "0:54:05", "remaining_time": "0:02:04", "throughput": 5533.03, "total_tokens": 17955792}
|
|
{"current_steps": 36485, "total_steps": 37885, "loss": 0.0, "lr": 8.320242714994319e-09, "epoch": 4.815230302230434, "percentage": 96.3, "elapsed_time": "0:54:05", "remaining_time": "0:02:04", "throughput": 5533.22, "total_tokens": 17958288}
|
|
{"current_steps": 36490, "total_steps": 37885, "loss": 0.0001, "lr": 8.261042561297004e-09, "epoch": 4.815890194008182, "percentage": 96.32, "elapsed_time": "0:54:05", "remaining_time": "0:02:04", "throughput": 5533.5, "total_tokens": 17961104}
|
|
{"current_steps": 36495, "total_steps": 37885, "loss": 0.0, "lr": 8.202052897011702e-09, "epoch": 4.816550085785931, "percentage": 96.33, "elapsed_time": "0:54:06", "remaining_time": "0:02:03", "throughput": 5533.69, "total_tokens": 17963600}
|
|
{"current_steps": 36500, "total_steps": 37885, "loss": 0.0, "lr": 8.143273734658729e-09, "epoch": 4.81720997756368, "percentage": 96.34, "elapsed_time": "0:54:06", "remaining_time": "0:02:03", "throughput": 5533.89, "total_tokens": 17966096}
|
|
{"current_steps": 36505, "total_steps": 37885, "loss": 0.0, "lr": 8.084705086713439e-09, "epoch": 4.817869869341428, "percentage": 96.36, "elapsed_time": "0:54:06", "remaining_time": "0:02:02", "throughput": 5534.06, "total_tokens": 17968592}
|
|
{"current_steps": 36510, "total_steps": 37885, "loss": 0.0412, "lr": 8.026346965606556e-09, "epoch": 4.818529761119176, "percentage": 96.37, "elapsed_time": "0:54:07", "remaining_time": "0:02:02", "throughput": 5534.19, "total_tokens": 17970832}
|
|
{"current_steps": 36515, "total_steps": 37885, "loss": 0.0, "lr": 7.968199383724283e-09, "epoch": 4.819189652896925, "percentage": 96.38, "elapsed_time": "0:54:07", "remaining_time": "0:02:01", "throughput": 5534.33, "total_tokens": 17973136}
|
|
{"current_steps": 36520, "total_steps": 37885, "loss": 0.0, "lr": 7.91026235340786e-09, "epoch": 4.819849544674673, "percentage": 96.4, "elapsed_time": "0:54:07", "remaining_time": "0:02:01", "throughput": 5534.53, "total_tokens": 17975632}
|
|
{"current_steps": 36525, "total_steps": 37885, "loss": 0.0, "lr": 7.852535886954225e-09, "epoch": 4.820509436452422, "percentage": 96.41, "elapsed_time": "0:54:08", "remaining_time": "0:02:00", "throughput": 5534.72, "total_tokens": 17978128}
|
|
{"current_steps": 36530, "total_steps": 37885, "loss": 0.0, "lr": 7.795019996615249e-09, "epoch": 4.82116932823017, "percentage": 96.42, "elapsed_time": "0:54:08", "remaining_time": "0:02:00", "throughput": 5534.95, "total_tokens": 17980752}
|
|
{"current_steps": 36535, "total_steps": 37885, "loss": 0.0, "lr": 7.737714694598274e-09, "epoch": 4.821829220007919, "percentage": 96.44, "elapsed_time": "0:54:08", "remaining_time": "0:02:00", "throughput": 5535.21, "total_tokens": 17983504}
|
|
{"current_steps": 36540, "total_steps": 37885, "loss": 0.0, "lr": 7.680619993065906e-09, "epoch": 4.822489111785667, "percentage": 96.45, "elapsed_time": "0:54:09", "remaining_time": "0:01:59", "throughput": 5535.36, "total_tokens": 17985872}
|
|
{"current_steps": 36545, "total_steps": 37885, "loss": 0.0, "lr": 7.62373590413623e-09, "epoch": 4.823149003563415, "percentage": 96.46, "elapsed_time": "0:54:09", "remaining_time": "0:01:59", "throughput": 5535.6, "total_tokens": 17988560}
|
|
{"current_steps": 36550, "total_steps": 37885, "loss": 0.0001, "lr": 7.567062439882254e-09, "epoch": 4.823808895341164, "percentage": 96.48, "elapsed_time": "0:54:09", "remaining_time": "0:01:58", "throughput": 5535.77, "total_tokens": 17990928}
|
|
{"current_steps": 36555, "total_steps": 37885, "loss": 0.0025, "lr": 7.510599612332801e-09, "epoch": 4.824468787118913, "percentage": 96.49, "elapsed_time": "0:54:10", "remaining_time": "0:01:58", "throughput": 5535.92, "total_tokens": 17993296}
|
|
{"current_steps": 36560, "total_steps": 37885, "loss": 0.0, "lr": 7.454347433471397e-09, "epoch": 4.825128678896661, "percentage": 96.5, "elapsed_time": "0:54:10", "remaining_time": "0:01:57", "throughput": 5536.11, "total_tokens": 17995792}
|
|
{"current_steps": 36565, "total_steps": 37885, "loss": 0.028, "lr": 7.398305915237379e-09, "epoch": 4.825788570674409, "percentage": 96.52, "elapsed_time": "0:54:10", "remaining_time": "0:01:57", "throughput": 5536.24, "total_tokens": 17998096}
|
|
{"current_steps": 36570, "total_steps": 37885, "loss": 0.0, "lr": 7.342475069525012e-09, "epoch": 4.8264484624521575, "percentage": 96.53, "elapsed_time": "0:54:11", "remaining_time": "0:01:56", "throughput": 5536.41, "total_tokens": 18000528}
|
|
{"current_steps": 36575, "total_steps": 37885, "loss": 0.0, "lr": 7.2868549081841476e-09, "epoch": 4.827108354229907, "percentage": 96.54, "elapsed_time": "0:54:11", "remaining_time": "0:01:56", "throughput": 5536.6, "total_tokens": 18003024}
|
|
{"current_steps": 36580, "total_steps": 37885, "loss": 0.0, "lr": 7.2314454430195685e-09, "epoch": 4.827768246007655, "percentage": 96.56, "elapsed_time": "0:54:11", "remaining_time": "0:01:56", "throughput": 5536.84, "total_tokens": 18005712}
|
|
{"current_steps": 36585, "total_steps": 37885, "loss": 0.0915, "lr": 7.176246685791754e-09, "epoch": 4.828428137785403, "percentage": 96.57, "elapsed_time": "0:54:12", "remaining_time": "0:01:55", "throughput": 5537.0, "total_tokens": 18008144}
|
|
{"current_steps": 36590, "total_steps": 37885, "loss": 0.0239, "lr": 7.121258648216e-09, "epoch": 4.8290880295631515, "percentage": 96.58, "elapsed_time": "0:54:12", "remaining_time": "0:01:55", "throughput": 5537.07, "total_tokens": 18010256}
|
|
{"current_steps": 36595, "total_steps": 37885, "loss": 0.0, "lr": 7.066481341963304e-09, "epoch": 4.8297479213409, "percentage": 96.59, "elapsed_time": "0:54:13", "remaining_time": "0:01:54", "throughput": 5537.26, "total_tokens": 18012752}
|
|
{"current_steps": 36600, "total_steps": 37885, "loss": 0.0, "lr": 7.0119147786597e-09, "epoch": 4.830407813118649, "percentage": 96.61, "elapsed_time": "0:54:13", "remaining_time": "0:01:54", "throughput": 5537.38, "total_tokens": 18014992}
|
|
{"current_steps": 36605, "total_steps": 37885, "loss": 0.0004, "lr": 6.957558969886368e-09, "epoch": 4.831067704896397, "percentage": 96.62, "elapsed_time": "0:54:13", "remaining_time": "0:01:53", "throughput": 5537.6, "total_tokens": 18017552}
|
|
{"current_steps": 36610, "total_steps": 37885, "loss": 0.0, "lr": 6.9034139271803015e-09, "epoch": 4.8317275966741455, "percentage": 96.63, "elapsed_time": "0:54:14", "remaining_time": "0:01:53", "throughput": 5537.79, "total_tokens": 18020048}
|
|
{"current_steps": 36615, "total_steps": 37885, "loss": 0.0002, "lr": 6.849479662033086e-09, "epoch": 4.832387488451894, "percentage": 96.65, "elapsed_time": "0:54:14", "remaining_time": "0:01:52", "throughput": 5537.97, "total_tokens": 18022480}
|
|
{"current_steps": 36620, "total_steps": 37885, "loss": 0.0487, "lr": 6.795756185891899e-09, "epoch": 4.833047380229642, "percentage": 96.66, "elapsed_time": "0:54:14", "remaining_time": "0:01:52", "throughput": 5538.12, "total_tokens": 18024848}
|
|
{"current_steps": 36625, "total_steps": 37885, "loss": 0.0, "lr": 6.742243510159396e-09, "epoch": 4.833707272007391, "percentage": 96.67, "elapsed_time": "0:54:15", "remaining_time": "0:01:51", "throughput": 5538.26, "total_tokens": 18027152}
|
|
{"current_steps": 36630, "total_steps": 37885, "loss": 0.0, "lr": 6.688941646193047e-09, "epoch": 4.8343671637851395, "percentage": 96.69, "elapsed_time": "0:54:15", "remaining_time": "0:01:51", "throughput": 5538.44, "total_tokens": 18029584}
|
|
{"current_steps": 36635, "total_steps": 37885, "loss": 0.0, "lr": 6.635850605305804e-09, "epoch": 4.835027055562888, "percentage": 96.7, "elapsed_time": "0:54:15", "remaining_time": "0:01:51", "throughput": 5538.6, "total_tokens": 18031952}
|
|
{"current_steps": 36640, "total_steps": 37885, "loss": 0.0, "lr": 6.582970398765986e-09, "epoch": 4.835686947340636, "percentage": 96.71, "elapsed_time": "0:54:16", "remaining_time": "0:01:50", "throughput": 5538.82, "total_tokens": 18034512}
|
|
{"current_steps": 36645, "total_steps": 37885, "loss": 0.0, "lr": 6.530301037796837e-09, "epoch": 4.836346839118384, "percentage": 96.73, "elapsed_time": "0:54:16", "remaining_time": "0:01:50", "throughput": 5539.08, "total_tokens": 18037200}
|
|
{"current_steps": 36650, "total_steps": 37885, "loss": 0.0, "lr": 6.477842533577194e-09, "epoch": 4.8370067308961335, "percentage": 96.74, "elapsed_time": "0:54:16", "remaining_time": "0:01:49", "throughput": 5539.35, "total_tokens": 18039952}
|
|
{"current_steps": 36655, "total_steps": 37885, "loss": 0.0337, "lr": 6.4255948972409265e-09, "epoch": 4.837666622673882, "percentage": 96.75, "elapsed_time": "0:54:17", "remaining_time": "0:01:49", "throughput": 5539.52, "total_tokens": 18042320}
|
|
{"current_steps": 36660, "total_steps": 37885, "loss": 0.0, "lr": 6.3735581398772775e-09, "epoch": 4.83832651445163, "percentage": 96.77, "elapsed_time": "0:54:17", "remaining_time": "0:01:48", "throughput": 5539.71, "total_tokens": 18044752}
|
|
{"current_steps": 36665, "total_steps": 37885, "loss": 0.0, "lr": 6.321732272530633e-09, "epoch": 4.838986406229378, "percentage": 96.78, "elapsed_time": "0:54:17", "remaining_time": "0:01:48", "throughput": 5539.96, "total_tokens": 18047440}
|
|
{"current_steps": 36670, "total_steps": 37885, "loss": 0.0502, "lr": 6.2701173062006396e-09, "epoch": 4.839646298007127, "percentage": 96.79, "elapsed_time": "0:54:18", "remaining_time": "0:01:47", "throughput": 5540.09, "total_tokens": 18049680}
|
|
{"current_steps": 36675, "total_steps": 37885, "loss": 0.0381, "lr": 6.2187132518422004e-09, "epoch": 4.840306189784875, "percentage": 96.81, "elapsed_time": "0:54:18", "remaining_time": "0:01:47", "throughput": 5540.3, "total_tokens": 18052176}
|
|
{"current_steps": 36680, "total_steps": 37885, "loss": 0.0, "lr": 6.167520120365477e-09, "epoch": 4.840966081562624, "percentage": 96.82, "elapsed_time": "0:54:18", "remaining_time": "0:01:47", "throughput": 5540.6, "total_tokens": 18054992}
|
|
{"current_steps": 36685, "total_steps": 37885, "loss": 0.0001, "lr": 6.1165379226358895e-09, "epoch": 4.841625973340372, "percentage": 96.83, "elapsed_time": "0:54:19", "remaining_time": "0:01:46", "throughput": 5540.8, "total_tokens": 18057488}
|
|
{"current_steps": 36690, "total_steps": 37885, "loss": 0.0, "lr": 6.065766669474004e-09, "epoch": 4.842285865118121, "percentage": 96.85, "elapsed_time": "0:54:19", "remaining_time": "0:01:46", "throughput": 5541.0, "total_tokens": 18059984}
|
|
{"current_steps": 36695, "total_steps": 37885, "loss": 0.0, "lr": 6.015206371655535e-09, "epoch": 4.842945756895869, "percentage": 96.86, "elapsed_time": "0:54:19", "remaining_time": "0:01:45", "throughput": 5541.17, "total_tokens": 18062352}
|
|
{"current_steps": 36700, "total_steps": 37885, "loss": 0.0, "lr": 5.964857039911786e-09, "epoch": 4.843605648673617, "percentage": 96.87, "elapsed_time": "0:54:19", "remaining_time": "0:01:45", "throughput": 5541.26, "total_tokens": 18064464}
|
|
{"current_steps": 36705, "total_steps": 37885, "loss": 0.001, "lr": 5.914718684928766e-09, "epoch": 4.8442655404513655, "percentage": 96.89, "elapsed_time": "0:54:20", "remaining_time": "0:01:44", "throughput": 5541.45, "total_tokens": 18066896}
|
|
{"current_steps": 36710, "total_steps": 37885, "loss": 0.0009, "lr": 5.864791317348183e-09, "epoch": 4.844925432229115, "percentage": 96.9, "elapsed_time": "0:54:20", "remaining_time": "0:01:44", "throughput": 5541.63, "total_tokens": 18069328}
|
|
{"current_steps": 36715, "total_steps": 37885, "loss": 0.0, "lr": 5.815074947766674e-09, "epoch": 4.845585324006863, "percentage": 96.91, "elapsed_time": "0:54:20", "remaining_time": "0:01:43", "throughput": 5541.86, "total_tokens": 18071888}
|
|
{"current_steps": 36720, "total_steps": 37885, "loss": 0.0, "lr": 5.76556958673613e-09, "epoch": 4.846245215784611, "percentage": 96.92, "elapsed_time": "0:54:21", "remaining_time": "0:01:43", "throughput": 5542.08, "total_tokens": 18074448}
|
|
{"current_steps": 36725, "total_steps": 37885, "loss": 0.0, "lr": 5.716275244763813e-09, "epoch": 4.8469051075623595, "percentage": 96.94, "elapsed_time": "0:54:21", "remaining_time": "0:01:43", "throughput": 5542.3, "total_tokens": 18077008}
|
|
{"current_steps": 36730, "total_steps": 37885, "loss": 0.0, "lr": 5.667191932312021e-09, "epoch": 4.847564999340108, "percentage": 96.95, "elapsed_time": "0:54:21", "remaining_time": "0:01:42", "throughput": 5542.62, "total_tokens": 18079952}
|
|
{"current_steps": 36735, "total_steps": 37885, "loss": 0.0, "lr": 5.61831965979831e-09, "epoch": 4.848224891117857, "percentage": 96.96, "elapsed_time": "0:54:22", "remaining_time": "0:01:42", "throughput": 5542.81, "total_tokens": 18082384}
|
|
{"current_steps": 36740, "total_steps": 37885, "loss": 0.0, "lr": 5.5696584375956036e-09, "epoch": 4.848884782895605, "percentage": 96.98, "elapsed_time": "0:54:22", "remaining_time": "0:01:41", "throughput": 5543.0, "total_tokens": 18084816}
|
|
{"current_steps": 36745, "total_steps": 37885, "loss": 0.0, "lr": 5.5212082760316415e-09, "epoch": 4.8495446746733535, "percentage": 96.99, "elapsed_time": "0:54:22", "remaining_time": "0:01:41", "throughput": 5543.14, "total_tokens": 18087120}
|
|
{"current_steps": 36750, "total_steps": 37885, "loss": 0.0, "lr": 5.472969185389975e-09, "epoch": 4.850204566451102, "percentage": 97.0, "elapsed_time": "0:54:23", "remaining_time": "0:01:40", "throughput": 5543.38, "total_tokens": 18089744}
|
|
{"current_steps": 36755, "total_steps": 37885, "loss": 0.0, "lr": 5.424941175908637e-09, "epoch": 4.85086445822885, "percentage": 97.02, "elapsed_time": "0:54:23", "remaining_time": "0:01:40", "throughput": 5543.62, "total_tokens": 18092368}
|
|
{"current_steps": 36760, "total_steps": 37885, "loss": 0.0, "lr": 5.377124257781473e-09, "epoch": 4.851524350006599, "percentage": 97.03, "elapsed_time": "0:54:23", "remaining_time": "0:01:39", "throughput": 5543.84, "total_tokens": 18094928}
|
|
{"current_steps": 36765, "total_steps": 37885, "loss": 0.0, "lr": 5.329518441157144e-09, "epoch": 4.8521842417843475, "percentage": 97.04, "elapsed_time": "0:54:24", "remaining_time": "0:01:39", "throughput": 5543.92, "total_tokens": 18097040}
|
|
{"current_steps": 36770, "total_steps": 37885, "loss": 0.0003, "lr": 5.282123736139677e-09, "epoch": 4.852844133562096, "percentage": 97.06, "elapsed_time": "0:54:24", "remaining_time": "0:01:38", "throughput": 5544.06, "total_tokens": 18099280}
|
|
{"current_steps": 36775, "total_steps": 37885, "loss": 0.0, "lr": 5.234940152788358e-09, "epoch": 4.853504025339844, "percentage": 97.07, "elapsed_time": "0:54:24", "remaining_time": "0:01:38", "throughput": 5544.19, "total_tokens": 18101520}
|
|
{"current_steps": 36780, "total_steps": 37885, "loss": 0.0, "lr": 5.187967701117401e-09, "epoch": 4.854163917117592, "percentage": 97.08, "elapsed_time": "0:54:25", "remaining_time": "0:01:38", "throughput": 5544.28, "total_tokens": 18103632}
|
|
{"current_steps": 36785, "total_steps": 37885, "loss": 0.0, "lr": 5.141206391096387e-09, "epoch": 4.8548238088953415, "percentage": 97.1, "elapsed_time": "0:54:25", "remaining_time": "0:01:37", "throughput": 5544.45, "total_tokens": 18106000}
|
|
{"current_steps": 36790, "total_steps": 37885, "loss": 0.0066, "lr": 5.094656232650263e-09, "epoch": 4.85548370067309, "percentage": 97.11, "elapsed_time": "0:54:25", "remaining_time": "0:01:37", "throughput": 5544.69, "total_tokens": 18108624}
|
|
{"current_steps": 36795, "total_steps": 37885, "loss": 0.0, "lr": 5.0483172356586835e-09, "epoch": 4.856143592450838, "percentage": 97.12, "elapsed_time": "0:54:26", "remaining_time": "0:01:36", "throughput": 5544.9, "total_tokens": 18111120}
|
|
{"current_steps": 36800, "total_steps": 37885, "loss": 0.0, "lr": 5.002189409956892e-09, "epoch": 4.856803484228586, "percentage": 97.14, "elapsed_time": "0:54:26", "remaining_time": "0:01:36", "throughput": 5545.04, "total_tokens": 18113424}
|
|
{"current_steps": 36805, "total_steps": 37885, "loss": 0.0, "lr": 4.956272765335278e-09, "epoch": 4.857463376006335, "percentage": 97.15, "elapsed_time": "0:54:26", "remaining_time": "0:01:35", "throughput": 5545.2, "total_tokens": 18115792}
|
|
{"current_steps": 36810, "total_steps": 37885, "loss": 0.0039, "lr": 4.91056731153916e-09, "epoch": 4.858123267784084, "percentage": 97.16, "elapsed_time": "0:54:27", "remaining_time": "0:01:35", "throughput": 5545.45, "total_tokens": 18118416}
|
|
{"current_steps": 36815, "total_steps": 37885, "loss": 0.0, "lr": 4.865073058269331e-09, "epoch": 4.858783159561832, "percentage": 97.18, "elapsed_time": "0:54:27", "remaining_time": "0:01:34", "throughput": 5545.52, "total_tokens": 18120464}
|
|
{"current_steps": 36820, "total_steps": 37885, "loss": 0.0, "lr": 4.819790015181513e-09, "epoch": 4.85944305133958, "percentage": 97.19, "elapsed_time": "0:54:27", "remaining_time": "0:01:34", "throughput": 5545.72, "total_tokens": 18122960}
|
|
{"current_steps": 36825, "total_steps": 37885, "loss": 0.0657, "lr": 4.774718191886684e-09, "epoch": 4.860102943117329, "percentage": 97.2, "elapsed_time": "0:54:28", "remaining_time": "0:01:34", "throughput": 5545.94, "total_tokens": 18125520}
|
|
{"current_steps": 36830, "total_steps": 37885, "loss": 0.0, "lr": 4.729857597951081e-09, "epoch": 4.860762834895077, "percentage": 97.22, "elapsed_time": "0:54:28", "remaining_time": "0:01:33", "throughput": 5546.17, "total_tokens": 18128080}
|
|
{"current_steps": 36835, "total_steps": 37885, "loss": 0.0, "lr": 4.685208242896088e-09, "epoch": 4.861422726672826, "percentage": 97.23, "elapsed_time": "0:54:28", "remaining_time": "0:01:33", "throughput": 5546.22, "total_tokens": 18130064}
|
|
{"current_steps": 36840, "total_steps": 37885, "loss": 0.0, "lr": 4.6407701361981246e-09, "epoch": 4.862082618450574, "percentage": 97.24, "elapsed_time": "0:54:29", "remaining_time": "0:01:32", "throughput": 5546.46, "total_tokens": 18132688}
|
|
{"current_steps": 36845, "total_steps": 37885, "loss": 0.001, "lr": 4.5965432872888675e-09, "epoch": 4.862742510228323, "percentage": 97.25, "elapsed_time": "0:54:29", "remaining_time": "0:01:32", "throughput": 5546.69, "total_tokens": 18135248}
|
|
{"current_steps": 36850, "total_steps": 37885, "loss": 0.0, "lr": 4.552527705555032e-09, "epoch": 4.863402402006071, "percentage": 97.27, "elapsed_time": "0:54:29", "remaining_time": "0:01:31", "throughput": 5546.91, "total_tokens": 18137808}
|
|
{"current_steps": 36855, "total_steps": 37885, "loss": 0.001, "lr": 4.5087234003388094e-09, "epoch": 4.864062293783819, "percentage": 97.28, "elapsed_time": "0:54:30", "remaining_time": "0:01:31", "throughput": 5547.02, "total_tokens": 18139984}
|
|
{"current_steps": 36860, "total_steps": 37885, "loss": 0.0, "lr": 4.465130380937321e-09, "epoch": 4.864722185561568, "percentage": 97.29, "elapsed_time": "0:54:30", "remaining_time": "0:01:30", "throughput": 5547.16, "total_tokens": 18142288}
|
|
{"current_steps": 36865, "total_steps": 37885, "loss": 0.0, "lr": 4.42174865660283e-09, "epoch": 4.865382077339317, "percentage": 97.31, "elapsed_time": "0:54:30", "remaining_time": "0:01:30", "throughput": 5547.4, "total_tokens": 18144912}
|
|
{"current_steps": 36870, "total_steps": 37885, "loss": 0.0, "lr": 4.37857823654264e-09, "epoch": 4.866041969117065, "percentage": 97.32, "elapsed_time": "0:54:31", "remaining_time": "0:01:30", "throughput": 5547.68, "total_tokens": 18147664}
|
|
{"current_steps": 36875, "total_steps": 37885, "loss": 0.0061, "lr": 4.335619129919643e-09, "epoch": 4.866701860894813, "percentage": 97.33, "elapsed_time": "0:54:31", "remaining_time": "0:01:29", "throughput": 5547.88, "total_tokens": 18150160}
|
|
{"current_steps": 36880, "total_steps": 37885, "loss": 0.0001, "lr": 4.292871345851323e-09, "epoch": 4.867361752672561, "percentage": 97.35, "elapsed_time": "0:54:31", "remaining_time": "0:01:29", "throughput": 5548.08, "total_tokens": 18152656}
|
|
{"current_steps": 36885, "total_steps": 37885, "loss": 0.0, "lr": 4.250334893410867e-09, "epoch": 4.868021644450311, "percentage": 97.36, "elapsed_time": "0:54:32", "remaining_time": "0:01:28", "throughput": 5548.25, "total_tokens": 18155024}
|
|
{"current_steps": 36890, "total_steps": 37885, "loss": 0.0, "lr": 4.208009781626054e-09, "epoch": 4.868681536228059, "percentage": 97.37, "elapsed_time": "0:54:32", "remaining_time": "0:01:28", "throughput": 5548.45, "total_tokens": 18157584}
|
|
{"current_steps": 36895, "total_steps": 37885, "loss": 0.0133, "lr": 4.165896019480253e-09, "epoch": 4.869341428005807, "percentage": 97.39, "elapsed_time": "0:54:32", "remaining_time": "0:01:27", "throughput": 5548.66, "total_tokens": 18160080}
|
|
{"current_steps": 36900, "total_steps": 37885, "loss": 0.0, "lr": 4.123993615911759e-09, "epoch": 4.870001319783555, "percentage": 97.4, "elapsed_time": "0:54:33", "remaining_time": "0:01:27", "throughput": 5548.81, "total_tokens": 18162384}
|
|
{"current_steps": 36905, "total_steps": 37885, "loss": 0.0, "lr": 4.082302579814012e-09, "epoch": 4.870661211561304, "percentage": 97.41, "elapsed_time": "0:54:33", "remaining_time": "0:01:26", "throughput": 5549.05, "total_tokens": 18165008}
|
|
{"current_steps": 36910, "total_steps": 37885, "loss": 0.0054, "lr": 4.040822920035713e-09, "epoch": 4.871321103339053, "percentage": 97.43, "elapsed_time": "0:54:33", "remaining_time": "0:01:26", "throughput": 5549.17, "total_tokens": 18167248}
|
|
{"current_steps": 36915, "total_steps": 37885, "loss": 0.0, "lr": 3.999554645380487e-09, "epoch": 4.871980995116801, "percentage": 97.44, "elapsed_time": "0:54:34", "remaining_time": "0:01:26", "throughput": 5549.36, "total_tokens": 18169680}
|
|
{"current_steps": 36920, "total_steps": 37885, "loss": 0.0, "lr": 3.958497764607438e-09, "epoch": 4.872640886894549, "percentage": 97.45, "elapsed_time": "0:54:34", "remaining_time": "0:01:25", "throughput": 5549.49, "total_tokens": 18171920}
|
|
{"current_steps": 36925, "total_steps": 37885, "loss": 0.0, "lr": 3.917652286430484e-09, "epoch": 4.873300778672298, "percentage": 97.47, "elapsed_time": "0:54:34", "remaining_time": "0:01:25", "throughput": 5549.67, "total_tokens": 18174352}
|
|
{"current_steps": 36930, "total_steps": 37885, "loss": 0.0549, "lr": 3.87701821951869e-09, "epoch": 4.873960670450046, "percentage": 97.48, "elapsed_time": "0:54:35", "remaining_time": "0:01:24", "throughput": 5549.87, "total_tokens": 18176848}
|
|
{"current_steps": 36935, "total_steps": 37885, "loss": 0.0, "lr": 3.836595572496493e-09, "epoch": 4.874620562227794, "percentage": 97.49, "elapsed_time": "0:54:35", "remaining_time": "0:01:24", "throughput": 5550.05, "total_tokens": 18179280}
|
|
{"current_steps": 36940, "total_steps": 37885, "loss": 0.0, "lr": 3.796384353943138e-09, "epoch": 4.875280454005543, "percentage": 97.51, "elapsed_time": "0:54:35", "remaining_time": "0:01:23", "throughput": 5550.25, "total_tokens": 18181776}
|
|
{"current_steps": 36945, "total_steps": 37885, "loss": 0.0001, "lr": 3.756384572393357e-09, "epoch": 4.875940345783292, "percentage": 97.52, "elapsed_time": "0:54:36", "remaining_time": "0:01:23", "throughput": 5550.52, "total_tokens": 18184528}
|
|
{"current_steps": 36950, "total_steps": 37885, "loss": 0.0, "lr": 3.7165962363366888e-09, "epoch": 4.87660023756104, "percentage": 97.53, "elapsed_time": "0:54:36", "remaining_time": "0:01:22", "throughput": 5550.83, "total_tokens": 18187408}
|
|
{"current_steps": 36955, "total_steps": 37885, "loss": 0.0, "lr": 3.677019354217936e-09, "epoch": 4.877260129338788, "percentage": 97.55, "elapsed_time": "0:54:36", "remaining_time": "0:01:22", "throughput": 5551.05, "total_tokens": 18189968}
|
|
{"current_steps": 36960, "total_steps": 37885, "loss": 0.0, "lr": 3.637653934437046e-09, "epoch": 4.8779200211165366, "percentage": 97.56, "elapsed_time": "0:54:37", "remaining_time": "0:01:22", "throughput": 5551.24, "total_tokens": 18192400}
|
|
{"current_steps": 36965, "total_steps": 37885, "loss": 0.0, "lr": 3.5984999853490017e-09, "epoch": 4.878579912894286, "percentage": 97.57, "elapsed_time": "0:54:37", "remaining_time": "0:01:21", "throughput": 5551.47, "total_tokens": 18195024}
|
|
{"current_steps": 36970, "total_steps": 37885, "loss": 0.0, "lr": 3.5595575152639333e-09, "epoch": 4.879239804672034, "percentage": 97.58, "elapsed_time": "0:54:37", "remaining_time": "0:01:21", "throughput": 5551.66, "total_tokens": 18197456}
|
|
{"current_steps": 36975, "total_steps": 37885, "loss": 0.0, "lr": 3.5208265324472297e-09, "epoch": 4.879899696449782, "percentage": 97.6, "elapsed_time": "0:54:38", "remaining_time": "0:01:20", "throughput": 5551.95, "total_tokens": 18200272}
|
|
{"current_steps": 36980, "total_steps": 37885, "loss": 0.0, "lr": 3.4823070451190926e-09, "epoch": 4.8805595882275306, "percentage": 97.61, "elapsed_time": "0:54:38", "remaining_time": "0:01:20", "throughput": 5552.21, "total_tokens": 18202960}
|
|
{"current_steps": 36985, "total_steps": 37885, "loss": 0.0, "lr": 3.443999061455094e-09, "epoch": 4.881219480005279, "percentage": 97.62, "elapsed_time": "0:54:38", "remaining_time": "0:01:19", "throughput": 5552.3, "total_tokens": 18205136}
|
|
{"current_steps": 36990, "total_steps": 37885, "loss": 0.0, "lr": 3.4059025895857295e-09, "epoch": 4.881879371783027, "percentage": 97.64, "elapsed_time": "0:54:39", "remaining_time": "0:01:19", "throughput": 5552.42, "total_tokens": 18207376}
|
|
{"current_steps": 36995, "total_steps": 37885, "loss": 0.0, "lr": 3.368017637596865e-09, "epoch": 4.882539263560776, "percentage": 97.65, "elapsed_time": "0:54:39", "remaining_time": "0:01:18", "throughput": 5552.66, "total_tokens": 18210000}
|
|
{"current_steps": 37000, "total_steps": 37885, "loss": 0.0, "lr": 3.330344213529179e-09, "epoch": 4.8831991553385246, "percentage": 97.66, "elapsed_time": "0:54:39", "remaining_time": "0:01:18", "throughput": 5552.88, "total_tokens": 18212560}
|
|
{"current_steps": 37005, "total_steps": 37885, "loss": 0.0, "lr": 3.29288232537861e-09, "epoch": 4.883859047116273, "percentage": 97.68, "elapsed_time": "0:54:40", "remaining_time": "0:01:18", "throughput": 5553.01, "total_tokens": 18214800}
|
|
{"current_steps": 37010, "total_steps": 37885, "loss": 0.0, "lr": 3.2556319810961297e-09, "epoch": 4.884518938894021, "percentage": 97.69, "elapsed_time": "0:54:40", "remaining_time": "0:01:17", "throughput": 5553.17, "total_tokens": 18217168}
|
|
{"current_steps": 37015, "total_steps": 37885, "loss": 0.0, "lr": 3.21859318858797e-09, "epoch": 4.885178830671769, "percentage": 97.7, "elapsed_time": "0:54:40", "remaining_time": "0:01:17", "throughput": 5553.45, "total_tokens": 18219920}
|
|
{"current_steps": 37020, "total_steps": 37885, "loss": 0.0, "lr": 3.1817659557152876e-09, "epoch": 4.885838722449519, "percentage": 97.72, "elapsed_time": "0:54:41", "remaining_time": "0:01:16", "throughput": 5553.62, "total_tokens": 18222352}
|
|
{"current_steps": 37025, "total_steps": 37885, "loss": 0.0, "lr": 3.1451502902943848e-09, "epoch": 4.886498614227267, "percentage": 97.73, "elapsed_time": "0:54:41", "remaining_time": "0:01:16", "throughput": 5553.82, "total_tokens": 18224848}
|
|
{"current_steps": 37030, "total_steps": 37885, "loss": 0.0782, "lr": 3.1087462000967124e-09, "epoch": 4.887158506005015, "percentage": 97.74, "elapsed_time": "0:54:41", "remaining_time": "0:01:15", "throughput": 5554.01, "total_tokens": 18227280}
|
|
{"current_steps": 37035, "total_steps": 37885, "loss": 0.001, "lr": 3.0725536928486452e-09, "epoch": 4.887818397782763, "percentage": 97.76, "elapsed_time": "0:54:42", "remaining_time": "0:01:15", "throughput": 5554.19, "total_tokens": 18229712}
|
|
{"current_steps": 37040, "total_steps": 37885, "loss": 0.0, "lr": 3.036572776231927e-09, "epoch": 4.888478289560512, "percentage": 97.77, "elapsed_time": "0:54:42", "remaining_time": "0:01:14", "throughput": 5554.44, "total_tokens": 18232400}
|
|
{"current_steps": 37045, "total_steps": 37885, "loss": 0.0213, "lr": 3.0008034578832274e-09, "epoch": 4.889138181338261, "percentage": 97.78, "elapsed_time": "0:54:42", "remaining_time": "0:01:14", "throughput": 5554.64, "total_tokens": 18234896}
|
|
{"current_steps": 37050, "total_steps": 37885, "loss": 0.0, "lr": 2.9652457453942515e-09, "epoch": 4.889798073116009, "percentage": 97.8, "elapsed_time": "0:54:43", "remaining_time": "0:01:13", "throughput": 5554.84, "total_tokens": 18237392}
|
|
{"current_steps": 37055, "total_steps": 37885, "loss": 0.0, "lr": 2.9298996463119618e-09, "epoch": 4.890457964893757, "percentage": 97.81, "elapsed_time": "0:54:43", "remaining_time": "0:01:13", "throughput": 5555.09, "total_tokens": 18240080}
|
|
{"current_steps": 37060, "total_steps": 37885, "loss": 0.0, "lr": 2.894765168138247e-09, "epoch": 4.891117856671506, "percentage": 97.82, "elapsed_time": "0:54:43", "remaining_time": "0:01:13", "throughput": 5555.25, "total_tokens": 18242448}
|
|
{"current_steps": 37065, "total_steps": 37885, "loss": 0.0164, "lr": 2.85984231833003e-09, "epoch": 4.891777748449254, "percentage": 97.84, "elapsed_time": "0:54:44", "remaining_time": "0:01:12", "throughput": 5555.44, "total_tokens": 18244880}
|
|
{"current_steps": 37070, "total_steps": 37885, "loss": 0.0466, "lr": 2.825131104299716e-09, "epoch": 4.892437640227003, "percentage": 97.85, "elapsed_time": "0:54:44", "remaining_time": "0:01:12", "throughput": 5555.6, "total_tokens": 18247248}
|
|
{"current_steps": 37075, "total_steps": 37885, "loss": 0.0, "lr": 2.7906315334143004e-09, "epoch": 4.893097532004751, "percentage": 97.86, "elapsed_time": "0:54:44", "remaining_time": "0:01:11", "throughput": 5555.79, "total_tokens": 18249680}
|
|
{"current_steps": 37080, "total_steps": 37885, "loss": 0.0072, "lr": 2.756343612996148e-09, "epoch": 4.8937574237825, "percentage": 97.88, "elapsed_time": "0:54:45", "remaining_time": "0:01:11", "throughput": 5555.92, "total_tokens": 18251920}
|
|
{"current_steps": 37085, "total_steps": 37885, "loss": 0.0, "lr": 2.722267350322549e-09, "epoch": 4.894417315560248, "percentage": 97.89, "elapsed_time": "0:54:45", "remaining_time": "0:01:10", "throughput": 5556.18, "total_tokens": 18254608}
|
|
{"current_steps": 37090, "total_steps": 37885, "loss": 0.0, "lr": 2.6884027526259403e-09, "epoch": 4.895077207337996, "percentage": 97.9, "elapsed_time": "0:54:45", "remaining_time": "0:01:10", "throughput": 5556.36, "total_tokens": 18257040}
|
|
{"current_steps": 37095, "total_steps": 37885, "loss": 0.0, "lr": 2.654749827093905e-09, "epoch": 4.895737099115745, "percentage": 97.91, "elapsed_time": "0:54:46", "remaining_time": "0:01:09", "throughput": 5556.54, "total_tokens": 18259472}
|
|
{"current_steps": 37100, "total_steps": 37885, "loss": 0.0352, "lr": 2.6213085808691747e-09, "epoch": 4.896396990893494, "percentage": 97.93, "elapsed_time": "0:54:46", "remaining_time": "0:01:09", "throughput": 5556.71, "total_tokens": 18261840}
|
|
{"current_steps": 37105, "total_steps": 37885, "loss": 0.0, "lr": 2.588079021049072e-09, "epoch": 4.897056882671242, "percentage": 97.94, "elapsed_time": "0:54:46", "remaining_time": "0:01:09", "throughput": 5556.94, "total_tokens": 18264464}
|
|
{"current_steps": 37110, "total_steps": 37885, "loss": 0.0, "lr": 2.5550611546866217e-09, "epoch": 4.89771677444899, "percentage": 97.95, "elapsed_time": "0:54:47", "remaining_time": "0:01:08", "throughput": 5557.1, "total_tokens": 18266832}
|
|
{"current_steps": 37115, "total_steps": 37885, "loss": 0.0, "lr": 2.5222549887893295e-09, "epoch": 4.8983766662267385, "percentage": 97.97, "elapsed_time": "0:54:47", "remaining_time": "0:01:08", "throughput": 5557.28, "total_tokens": 18269264}
|
|
{"current_steps": 37120, "total_steps": 37885, "loss": 0.0, "lr": 2.4896605303204034e-09, "epoch": 4.899036558004488, "percentage": 97.98, "elapsed_time": "0:54:47", "remaining_time": "0:01:07", "throughput": 5557.48, "total_tokens": 18271760}
|
|
{"current_steps": 37125, "total_steps": 37885, "loss": 0.0, "lr": 2.4572777861976425e-09, "epoch": 4.899696449782236, "percentage": 97.99, "elapsed_time": "0:54:48", "remaining_time": "0:01:07", "throughput": 5557.6, "total_tokens": 18274000}
|
|
{"current_steps": 37130, "total_steps": 37885, "loss": 0.0, "lr": 2.425106763293994e-09, "epoch": 4.900356341559984, "percentage": 98.01, "elapsed_time": "0:54:48", "remaining_time": "0:01:06", "throughput": 5557.77, "total_tokens": 18276368}
|
|
{"current_steps": 37135, "total_steps": 37885, "loss": 0.0, "lr": 2.393147468437551e-09, "epoch": 4.9010162333377325, "percentage": 98.02, "elapsed_time": "0:54:48", "remaining_time": "0:01:06", "throughput": 5557.95, "total_tokens": 18278800}
|
|
{"current_steps": 37140, "total_steps": 37885, "loss": 0.031, "lr": 2.3613999084114434e-09, "epoch": 4.901676125115481, "percentage": 98.03, "elapsed_time": "0:54:49", "remaining_time": "0:01:05", "throughput": 5558.13, "total_tokens": 18281232}
|
|
{"current_steps": 37145, "total_steps": 37885, "loss": 0.0, "lr": 2.329864089953837e-09, "epoch": 4.90233601689323, "percentage": 98.05, "elapsed_time": "0:54:49", "remaining_time": "0:01:05", "throughput": 5558.28, "total_tokens": 18283536}
|
|
{"current_steps": 37150, "total_steps": 37885, "loss": 0.0352, "lr": 2.298540019758155e-09, "epoch": 4.902995908670978, "percentage": 98.06, "elapsed_time": "0:54:49", "remaining_time": "0:01:05", "throughput": 5558.45, "total_tokens": 18285904}
|
|
{"current_steps": 37155, "total_steps": 37885, "loss": 0.0, "lr": 2.2674277044724134e-09, "epoch": 4.9036558004487265, "percentage": 98.07, "elapsed_time": "0:54:50", "remaining_time": "0:01:04", "throughput": 5558.63, "total_tokens": 18288336}
|
|
{"current_steps": 37160, "total_steps": 37885, "loss": 0.0001, "lr": 2.236527150700218e-09, "epoch": 4.904315692226475, "percentage": 98.09, "elapsed_time": "0:54:50", "remaining_time": "0:01:04", "throughput": 5558.83, "total_tokens": 18290832}
|
|
{"current_steps": 37165, "total_steps": 37885, "loss": 0.0, "lr": 2.205838364999879e-09, "epoch": 4.904975584004223, "percentage": 98.1, "elapsed_time": "0:54:50", "remaining_time": "0:01:03", "throughput": 5559.01, "total_tokens": 18293264}
|
|
{"current_steps": 37170, "total_steps": 37885, "loss": 0.0, "lr": 2.1753613538849636e-09, "epoch": 4.905635475781972, "percentage": 98.11, "elapsed_time": "0:54:51", "remaining_time": "0:01:03", "throughput": 5559.18, "total_tokens": 18295632}
|
|
{"current_steps": 37175, "total_steps": 37885, "loss": 0.0007, "lr": 2.145096123823853e-09, "epoch": 4.9062953675597205, "percentage": 98.13, "elapsed_time": "0:54:51", "remaining_time": "0:01:02", "throughput": 5559.41, "total_tokens": 18298256}
|
|
{"current_steps": 37180, "total_steps": 37885, "loss": 0.0, "lr": 2.1150426812401866e-09, "epoch": 4.906955259337469, "percentage": 98.14, "elapsed_time": "0:54:51", "remaining_time": "0:01:02", "throughput": 5559.49, "total_tokens": 18300304}
|
|
{"current_steps": 37185, "total_steps": 37885, "loss": 0.0006, "lr": 2.0852010325125293e-09, "epoch": 4.907615151115217, "percentage": 98.15, "elapsed_time": "0:54:52", "remaining_time": "0:01:01", "throughput": 5559.74, "total_tokens": 18302992}
|
|
{"current_steps": 37190, "total_steps": 37885, "loss": 0.0, "lr": 2.0555711839747026e-09, "epoch": 4.908275042892965, "percentage": 98.17, "elapsed_time": "0:54:52", "remaining_time": "0:01:01", "throughput": 5559.92, "total_tokens": 18305424}
|
|
{"current_steps": 37195, "total_steps": 37885, "loss": 0.0, "lr": 2.0261531419153433e-09, "epoch": 4.908934934670714, "percentage": 98.18, "elapsed_time": "0:54:52", "remaining_time": "0:01:01", "throughput": 5560.03, "total_tokens": 18307664}
|
|
{"current_steps": 37200, "total_steps": 37885, "loss": 0.0004, "lr": 1.9969469125782346e-09, "epoch": 4.909594826448463, "percentage": 98.19, "elapsed_time": "0:54:53", "remaining_time": "0:01:00", "throughput": 5560.25, "total_tokens": 18310288}
|
|
{"current_steps": 37205, "total_steps": 37885, "loss": 0.0, "lr": 1.9679525021621955e-09, "epoch": 4.910254718226211, "percentage": 98.21, "elapsed_time": "0:54:53", "remaining_time": "0:01:00", "throughput": 5560.42, "total_tokens": 18312720}
|
|
{"current_steps": 37210, "total_steps": 37885, "loss": 0.0, "lr": 1.939169916820971e-09, "epoch": 4.910914610003959, "percentage": 98.22, "elapsed_time": "0:54:53", "remaining_time": "0:00:59", "throughput": 5560.69, "total_tokens": 18315536}
|
|
{"current_steps": 37215, "total_steps": 37885, "loss": 0.0, "lr": 1.910599162663673e-09, "epoch": 4.911574501781708, "percentage": 98.23, "elapsed_time": "0:54:54", "remaining_time": "0:00:59", "throughput": 5560.86, "total_tokens": 18317968}
|
|
{"current_steps": 37220, "total_steps": 37885, "loss": 0.0001, "lr": 1.8822402457540075e-09, "epoch": 4.912234393559456, "percentage": 98.24, "elapsed_time": "0:54:54", "remaining_time": "0:00:58", "throughput": 5561.07, "total_tokens": 18320528}
|
|
{"current_steps": 37225, "total_steps": 37885, "loss": 0.0, "lr": 1.8540931721110487e-09, "epoch": 4.912894285337205, "percentage": 98.26, "elapsed_time": "0:54:54", "remaining_time": "0:00:58", "throughput": 5561.24, "total_tokens": 18322960}
|
|
{"current_steps": 37230, "total_steps": 37885, "loss": 0.0001, "lr": 1.8261579477087951e-09, "epoch": 4.913554177114953, "percentage": 98.27, "elapsed_time": "0:54:55", "remaining_time": "0:00:57", "throughput": 5561.48, "total_tokens": 18325584}
|
|
{"current_steps": 37235, "total_steps": 37885, "loss": 0.0, "lr": 1.7984345784763932e-09, "epoch": 4.914214068892702, "percentage": 98.28, "elapsed_time": "0:54:55", "remaining_time": "0:00:57", "throughput": 5561.65, "total_tokens": 18328016}
|
|
{"current_steps": 37240, "total_steps": 37885, "loss": 0.0, "lr": 1.770923070297803e-09, "epoch": 4.91487396067045, "percentage": 98.3, "elapsed_time": "0:54:55", "remaining_time": "0:00:57", "throughput": 5561.78, "total_tokens": 18330256}
|
|
{"current_steps": 37245, "total_steps": 37885, "loss": 0.0533, "lr": 1.743623429012131e-09, "epoch": 4.915533852448198, "percentage": 98.31, "elapsed_time": "0:54:56", "remaining_time": "0:00:56", "throughput": 5562.06, "total_tokens": 18333072}
|
|
{"current_steps": 37250, "total_steps": 37885, "loss": 0.0, "lr": 1.7165356604136317e-09, "epoch": 4.9161937442259465, "percentage": 98.32, "elapsed_time": "0:54:56", "remaining_time": "0:00:56", "throughput": 5562.22, "total_tokens": 18335440}
|
|
{"current_steps": 37255, "total_steps": 37885, "loss": 0.0, "lr": 1.6896597702514837e-09, "epoch": 4.916853636003696, "percentage": 98.34, "elapsed_time": "0:54:56", "remaining_time": "0:00:55", "throughput": 5562.35, "total_tokens": 18337680}
|
|
{"current_steps": 37260, "total_steps": 37885, "loss": 0.0028, "lr": 1.6629957642297908e-09, "epoch": 4.917513527781444, "percentage": 98.35, "elapsed_time": "0:54:57", "remaining_time": "0:00:55", "throughput": 5562.64, "total_tokens": 18340496}
|
|
{"current_steps": 37265, "total_steps": 37885, "loss": 0.0, "lr": 1.6365436480079153e-09, "epoch": 4.918173419559192, "percentage": 98.36, "elapsed_time": "0:54:57", "remaining_time": "0:00:54", "throughput": 5562.82, "total_tokens": 18342928}
|
|
{"current_steps": 37270, "total_steps": 37885, "loss": 0.0, "lr": 1.6103034272000326e-09, "epoch": 4.9188333113369405, "percentage": 98.38, "elapsed_time": "0:54:57", "remaining_time": "0:00:54", "throughput": 5563.03, "total_tokens": 18345488}
|
|
{"current_steps": 37275, "total_steps": 37885, "loss": 0.0, "lr": 1.5842751073753546e-09, "epoch": 4.919493203114689, "percentage": 98.39, "elapsed_time": "0:54:58", "remaining_time": "0:00:53", "throughput": 5563.26, "total_tokens": 18348112}
|
|
{"current_steps": 37280, "total_steps": 37885, "loss": 0.0, "lr": 1.5584586940584622e-09, "epoch": 4.920153094892438, "percentage": 98.4, "elapsed_time": "0:54:58", "remaining_time": "0:00:53", "throughput": 5563.41, "total_tokens": 18350416}
|
|
{"current_steps": 37285, "total_steps": 37885, "loss": 0.028, "lr": 1.5328541927286387e-09, "epoch": 4.920812986670186, "percentage": 98.42, "elapsed_time": "0:54:58", "remaining_time": "0:00:53", "throughput": 5563.6, "total_tokens": 18352912}
|
|
{"current_steps": 37290, "total_steps": 37885, "loss": 0.0, "lr": 1.507461608819982e-09, "epoch": 4.9214728784479345, "percentage": 98.43, "elapsed_time": "0:54:59", "remaining_time": "0:00:52", "throughput": 5563.84, "total_tokens": 18355536}
|
|
{"current_steps": 37295, "total_steps": 37885, "loss": 0.0, "lr": 1.4822809477222919e-09, "epoch": 4.922132770225683, "percentage": 98.44, "elapsed_time": "0:54:59", "remaining_time": "0:00:52", "throughput": 5563.99, "total_tokens": 18357840}
|
|
{"current_steps": 37300, "total_steps": 37885, "loss": 0.0004, "lr": 1.457312214779627e-09, "epoch": 4.922792662003431, "percentage": 98.46, "elapsed_time": "0:54:59", "remaining_time": "0:00:51", "throughput": 5564.19, "total_tokens": 18360336}
|
|
{"current_steps": 37305, "total_steps": 37885, "loss": 0.0, "lr": 1.4325554152916364e-09, "epoch": 4.92345255378118, "percentage": 98.47, "elapsed_time": "0:55:00", "remaining_time": "0:00:51", "throughput": 5564.49, "total_tokens": 18363216}
|
|
{"current_steps": 37310, "total_steps": 37885, "loss": 0.0, "lr": 1.408010554512673e-09, "epoch": 4.9241124455589285, "percentage": 98.48, "elapsed_time": "0:55:00", "remaining_time": "0:00:50", "throughput": 5564.76, "total_tokens": 18365968}
|
|
{"current_steps": 37315, "total_steps": 37885, "loss": 0.0, "lr": 1.3836776376522364e-09, "epoch": 4.924772337336677, "percentage": 98.5, "elapsed_time": "0:55:00", "remaining_time": "0:00:50", "throughput": 5564.95, "total_tokens": 18368400}
|
|
{"current_steps": 37320, "total_steps": 37885, "loss": 0.0, "lr": 1.3595566698748617e-09, "epoch": 4.925432229114425, "percentage": 98.51, "elapsed_time": "0:55:01", "remaining_time": "0:00:49", "throughput": 5565.1, "total_tokens": 18370704}
|
|
{"current_steps": 37325, "total_steps": 37885, "loss": 0.0, "lr": 1.3356476562998986e-09, "epoch": 4.926092120892173, "percentage": 98.52, "elapsed_time": "0:55:01", "remaining_time": "0:00:49", "throughput": 5565.32, "total_tokens": 18373264}
|
|
{"current_steps": 37330, "total_steps": 37885, "loss": 0.0001, "lr": 1.3119506020020653e-09, "epoch": 4.9267520126699225, "percentage": 98.54, "elapsed_time": "0:55:01", "remaining_time": "0:00:49", "throughput": 5565.48, "total_tokens": 18375632}
|
|
{"current_steps": 37335, "total_steps": 37885, "loss": 0.0226, "lr": 1.2884655120107835e-09, "epoch": 4.927411904447671, "percentage": 98.55, "elapsed_time": "0:55:02", "remaining_time": "0:00:48", "throughput": 5565.69, "total_tokens": 18378192}
|
|
{"current_steps": 37340, "total_steps": 37885, "loss": 0.0, "lr": 1.26519239131051e-09, "epoch": 4.928071796225419, "percentage": 98.56, "elapsed_time": "0:55:02", "remaining_time": "0:00:48", "throughput": 5565.85, "total_tokens": 18380560}
|
|
{"current_steps": 37345, "total_steps": 37885, "loss": 0.0, "lr": 1.2421312448408494e-09, "epoch": 4.928731688003167, "percentage": 98.57, "elapsed_time": "0:55:02", "remaining_time": "0:00:47", "throughput": 5566.1, "total_tokens": 18383248}
|
|
{"current_steps": 37350, "total_steps": 37885, "loss": 0.0014, "lr": 1.2192820774965529e-09, "epoch": 4.929391579780916, "percentage": 98.59, "elapsed_time": "0:55:03", "remaining_time": "0:00:47", "throughput": 5566.27, "total_tokens": 18385616}
|
|
{"current_steps": 37355, "total_steps": 37885, "loss": 0.0, "lr": 1.1966448941269635e-09, "epoch": 4.930051471558665, "percentage": 98.6, "elapsed_time": "0:55:03", "remaining_time": "0:00:46", "throughput": 5566.36, "total_tokens": 18387728}
|
|
{"current_steps": 37360, "total_steps": 37885, "loss": 0.0, "lr": 1.1742196995366827e-09, "epoch": 4.930711363336413, "percentage": 98.61, "elapsed_time": "0:55:03", "remaining_time": "0:00:46", "throughput": 5566.53, "total_tokens": 18390096}
|
|
{"current_steps": 37365, "total_steps": 37885, "loss": 0.0, "lr": 1.1520064984853473e-09, "epoch": 4.931371255114161, "percentage": 98.63, "elapsed_time": "0:55:04", "remaining_time": "0:00:45", "throughput": 5566.74, "total_tokens": 18392656}
|
|
{"current_steps": 37370, "total_steps": 37885, "loss": 0.0, "lr": 1.1300052956876304e-09, "epoch": 4.93203114689191, "percentage": 98.64, "elapsed_time": "0:55:04", "remaining_time": "0:00:45", "throughput": 5566.93, "total_tokens": 18395088}
|
|
{"current_steps": 37375, "total_steps": 37885, "loss": 0.0, "lr": 1.1082160958129082e-09, "epoch": 4.932691038669658, "percentage": 98.65, "elapsed_time": "0:55:04", "remaining_time": "0:00:45", "throughput": 5567.09, "total_tokens": 18397456}
|
|
{"current_steps": 37380, "total_steps": 37885, "loss": 0.0, "lr": 1.0866389034860368e-09, "epoch": 4.933350930447407, "percentage": 98.67, "elapsed_time": "0:55:05", "remaining_time": "0:00:44", "throughput": 5567.3, "total_tokens": 18400016}
|
|
{"current_steps": 37385, "total_steps": 37885, "loss": 0.0, "lr": 1.0652737232864639e-09, "epoch": 4.934010822225155, "percentage": 98.68, "elapsed_time": "0:55:05", "remaining_time": "0:00:44", "throughput": 5567.53, "total_tokens": 18402640}
|
|
{"current_steps": 37390, "total_steps": 37885, "loss": 0.0, "lr": 1.0441205597487845e-09, "epoch": 4.934670714002904, "percentage": 98.69, "elapsed_time": "0:55:05", "remaining_time": "0:00:43", "throughput": 5567.7, "total_tokens": 18405072}
|
|
{"current_steps": 37395, "total_steps": 37885, "loss": 0.0001, "lr": 1.0231794173626296e-09, "epoch": 4.935330605780652, "percentage": 98.71, "elapsed_time": "0:55:06", "remaining_time": "0:00:43", "throughput": 5567.85, "total_tokens": 18407440}
|
|
{"current_steps": 37400, "total_steps": 37885, "loss": 0.0003, "lr": 1.002450300572666e-09, "epoch": 4.9359904975584, "percentage": 98.72, "elapsed_time": "0:55:06", "remaining_time": "0:00:42", "throughput": 5568.03, "total_tokens": 18409872}
|
|
{"current_steps": 37405, "total_steps": 37885, "loss": 0.0, "lr": 9.819332137784853e-10, "epoch": 4.936650389336149, "percentage": 98.73, "elapsed_time": "0:55:06", "remaining_time": "0:00:42", "throughput": 5568.23, "total_tokens": 18412432}
|
|
{"current_steps": 37410, "total_steps": 37885, "loss": 0.0016, "lr": 9.616281613347155e-10, "epoch": 4.937310281113898, "percentage": 98.75, "elapsed_time": "0:55:07", "remaining_time": "0:00:41", "throughput": 5568.33, "total_tokens": 18414608}
|
|
{"current_steps": 37415, "total_steps": 37885, "loss": 0.0044, "lr": 9.415351475507982e-10, "epoch": 4.937970172891646, "percentage": 98.76, "elapsed_time": "0:55:07", "remaining_time": "0:00:41", "throughput": 5568.61, "total_tokens": 18417424}
|
|
{"current_steps": 37420, "total_steps": 37885, "loss": 0.0, "lr": 9.216541766914332e-10, "epoch": 4.938630064669394, "percentage": 98.77, "elapsed_time": "0:55:07", "remaining_time": "0:00:41", "throughput": 5568.73, "total_tokens": 18419664}
|
|
{"current_steps": 37425, "total_steps": 37885, "loss": 0.0001, "lr": 9.019852529762451e-10, "epoch": 4.939289956447142, "percentage": 98.79, "elapsed_time": "0:55:08", "remaining_time": "0:00:40", "throughput": 5568.97, "total_tokens": 18422352}
|
|
{"current_steps": 37430, "total_steps": 37885, "loss": 0.0, "lr": 8.825283805796724e-10, "epoch": 4.939949848224892, "percentage": 98.8, "elapsed_time": "0:55:08", "remaining_time": "0:00:40", "throughput": 5569.19, "total_tokens": 18424912}
|
|
{"current_steps": 37435, "total_steps": 37885, "loss": 0.0, "lr": 8.632835636315227e-10, "epoch": 4.94060974000264, "percentage": 98.81, "elapsed_time": "0:55:08", "remaining_time": "0:00:39", "throughput": 5569.43, "total_tokens": 18427600}
|
|
{"current_steps": 37440, "total_steps": 37885, "loss": 0.0, "lr": 8.442508062163068e-10, "epoch": 4.941269631780388, "percentage": 98.83, "elapsed_time": "0:55:09", "remaining_time": "0:00:39", "throughput": 5569.61, "total_tokens": 18430032}
|
|
{"current_steps": 37445, "total_steps": 37885, "loss": 0.0, "lr": 8.254301123734597e-10, "epoch": 4.941929523558136, "percentage": 98.84, "elapsed_time": "0:55:09", "remaining_time": "0:00:38", "throughput": 5569.77, "total_tokens": 18432400}
|
|
{"current_steps": 37450, "total_steps": 37885, "loss": 0.0322, "lr": 8.068214860976752e-10, "epoch": 4.942589415335885, "percentage": 98.85, "elapsed_time": "0:55:09", "remaining_time": "0:00:38", "throughput": 5569.93, "total_tokens": 18434768}
|
|
{"current_steps": 37455, "total_steps": 37885, "loss": 0.0861, "lr": 7.884249313383495e-10, "epoch": 4.943249307113634, "percentage": 98.86, "elapsed_time": "0:55:10", "remaining_time": "0:00:38", "throughput": 5570.03, "total_tokens": 18436944}
|
|
{"current_steps": 37460, "total_steps": 37885, "loss": 0.0308, "lr": 7.702404520002481e-10, "epoch": 4.943909198891382, "percentage": 98.88, "elapsed_time": "0:55:10", "remaining_time": "0:00:37", "throughput": 5570.16, "total_tokens": 18439248}
|
|
{"current_steps": 37465, "total_steps": 37885, "loss": 0.0001, "lr": 7.522680519426173e-10, "epoch": 4.94456909066913, "percentage": 98.89, "elapsed_time": "0:55:10", "remaining_time": "0:00:37", "throughput": 5570.34, "total_tokens": 18441680}
|
|
{"current_steps": 37470, "total_steps": 37885, "loss": 0.0, "lr": 7.345077349801832e-10, "epoch": 4.945228982446879, "percentage": 98.9, "elapsed_time": "0:55:11", "remaining_time": "0:00:36", "throughput": 5570.43, "total_tokens": 18443856}
|
|
{"current_steps": 37475, "total_steps": 37885, "loss": 0.0, "lr": 7.169595048823751e-10, "epoch": 4.945888874224627, "percentage": 98.92, "elapsed_time": "0:55:11", "remaining_time": "0:00:36", "throughput": 5570.61, "total_tokens": 18446288}
|
|
{"current_steps": 37480, "total_steps": 37885, "loss": 0.0496, "lr": 6.996233653736583e-10, "epoch": 4.946548766002375, "percentage": 98.93, "elapsed_time": "0:55:11", "remaining_time": "0:00:35", "throughput": 5570.7, "total_tokens": 18448464}
|
|
{"current_steps": 37485, "total_steps": 37885, "loss": 0.0, "lr": 6.824993201334228e-10, "epoch": 4.947208657780124, "percentage": 98.94, "elapsed_time": "0:55:12", "remaining_time": "0:00:35", "throughput": 5570.84, "total_tokens": 18450768}
|
|
{"current_steps": 37490, "total_steps": 37885, "loss": 0.0, "lr": 6.655873727963168e-10, "epoch": 4.947868549557873, "percentage": 98.96, "elapsed_time": "0:55:12", "remaining_time": "0:00:34", "throughput": 5571.0, "total_tokens": 18453136}
|
|
{"current_steps": 37495, "total_steps": 37885, "loss": 0.0, "lr": 6.488875269516914e-10, "epoch": 4.948528441335621, "percentage": 98.97, "elapsed_time": "0:55:12", "remaining_time": "0:00:34", "throughput": 5571.15, "total_tokens": 18455504}
|
|
{"current_steps": 37500, "total_steps": 37885, "loss": 0.0, "lr": 6.323997861439334e-10, "epoch": 4.949188333113369, "percentage": 98.98, "elapsed_time": "0:55:13", "remaining_time": "0:00:34", "throughput": 5571.37, "total_tokens": 18458064}
|
|
{"current_steps": 37505, "total_steps": 37885, "loss": 0.0, "lr": 6.16124153872466e-10, "epoch": 4.9498482248911175, "percentage": 99.0, "elapsed_time": "0:55:13", "remaining_time": "0:00:33", "throughput": 5571.59, "total_tokens": 18460624}
|
|
{"current_steps": 37510, "total_steps": 37885, "loss": 0.0, "lr": 6.00060633591748e-10, "epoch": 4.950508116668866, "percentage": 99.01, "elapsed_time": "0:55:13", "remaining_time": "0:00:33", "throughput": 5571.79, "total_tokens": 18463120}
|
|
{"current_steps": 37515, "total_steps": 37885, "loss": 0.0012, "lr": 5.842092287109412e-10, "epoch": 4.951168008446615, "percentage": 99.02, "elapsed_time": "0:55:14", "remaining_time": "0:00:32", "throughput": 5571.99, "total_tokens": 18465616}
|
|
{"current_steps": 37520, "total_steps": 37885, "loss": 0.0, "lr": 5.685699425945767e-10, "epoch": 4.951827900224363, "percentage": 99.04, "elapsed_time": "0:55:14", "remaining_time": "0:00:32", "throughput": 5572.15, "total_tokens": 18467984}
|
|
{"current_steps": 37525, "total_steps": 37885, "loss": 0.0, "lr": 5.531427785619991e-10, "epoch": 4.9524877920021115, "percentage": 99.05, "elapsed_time": "0:55:14", "remaining_time": "0:00:31", "throughput": 5572.22, "total_tokens": 18470032}
|
|
{"current_steps": 37530, "total_steps": 37885, "loss": 0.0, "lr": 5.379277398873671e-10, "epoch": 4.95314768377986, "percentage": 99.06, "elapsed_time": "0:55:14", "remaining_time": "0:00:31", "throughput": 5572.36, "total_tokens": 18472336}
|
|
{"current_steps": 37535, "total_steps": 37885, "loss": 0.0022, "lr": 5.229248298000976e-10, "epoch": 4.953807575557608, "percentage": 99.08, "elapsed_time": "0:55:15", "remaining_time": "0:00:30", "throughput": 5572.64, "total_tokens": 18475088}
|
|
{"current_steps": 37540, "total_steps": 37885, "loss": 0.0252, "lr": 5.081340514843102e-10, "epoch": 4.954467467335357, "percentage": 99.09, "elapsed_time": "0:55:15", "remaining_time": "0:00:30", "throughput": 5572.88, "total_tokens": 18477776}
|
|
{"current_steps": 37545, "total_steps": 37885, "loss": 0.061, "lr": 4.935554080793825e-10, "epoch": 4.9551273591131055, "percentage": 99.1, "elapsed_time": "0:55:15", "remaining_time": "0:00:30", "throughput": 5573.1, "total_tokens": 18480336}
|
|
{"current_steps": 37550, "total_steps": 37885, "loss": 0.0, "lr": 4.791889026793949e-10, "epoch": 4.955787250890854, "percentage": 99.12, "elapsed_time": "0:55:16", "remaining_time": "0:00:29", "throughput": 5573.22, "total_tokens": 18482640}
|
|
{"current_steps": 37555, "total_steps": 37885, "loss": 0.0, "lr": 4.6503453833368623e-10, "epoch": 4.956447142668602, "percentage": 99.13, "elapsed_time": "0:55:16", "remaining_time": "0:00:29", "throughput": 5573.35, "total_tokens": 18484880}
|
|
{"current_steps": 37560, "total_steps": 37885, "loss": 0.0, "lr": 4.5109231804629776e-10, "epoch": 4.95710703444635, "percentage": 99.14, "elapsed_time": "0:55:16", "remaining_time": "0:00:28", "throughput": 5573.64, "total_tokens": 18487696}
|
|
{"current_steps": 37565, "total_steps": 37885, "loss": 0.0473, "lr": 4.37362244776307e-10, "epoch": 4.9577669262240995, "percentage": 99.16, "elapsed_time": "0:55:17", "remaining_time": "0:00:28", "throughput": 5573.89, "total_tokens": 18490384}
|
|
{"current_steps": 37570, "total_steps": 37885, "loss": 0.0, "lr": 4.238443214380494e-10, "epoch": 4.958426818001848, "percentage": 99.17, "elapsed_time": "0:55:17", "remaining_time": "0:00:27", "throughput": 5574.06, "total_tokens": 18492816}
|
|
{"current_steps": 37575, "total_steps": 37885, "loss": 0.0, "lr": 4.105385509004522e-10, "epoch": 4.959086709779596, "percentage": 99.18, "elapsed_time": "0:55:17", "remaining_time": "0:00:27", "throughput": 5574.28, "total_tokens": 18495376}
|
|
{"current_steps": 37580, "total_steps": 37885, "loss": 0.0, "lr": 3.974449359875898e-10, "epoch": 4.959746601557344, "percentage": 99.19, "elapsed_time": "0:55:18", "remaining_time": "0:00:26", "throughput": 5574.4, "total_tokens": 18497616}
|
|
{"current_steps": 37585, "total_steps": 37885, "loss": 0.0, "lr": 3.8456347947835034e-10, "epoch": 4.960406493335093, "percentage": 99.21, "elapsed_time": "0:55:18", "remaining_time": "0:00:26", "throughput": 5574.58, "total_tokens": 18500048}
|
|
{"current_steps": 37590, "total_steps": 37885, "loss": 0.0, "lr": 3.7189418410699114e-10, "epoch": 4.961066385112842, "percentage": 99.22, "elapsed_time": "0:55:18", "remaining_time": "0:00:26", "throughput": 5574.79, "total_tokens": 18502608}
|
|
{"current_steps": 37595, "total_steps": 37885, "loss": 0.0044, "lr": 3.5943705256236136e-10, "epoch": 4.96172627689059, "percentage": 99.23, "elapsed_time": "0:55:19", "remaining_time": "0:00:25", "throughput": 5574.99, "total_tokens": 18505104}
|
|
{"current_steps": 37600, "total_steps": 37885, "loss": 0.0, "lr": 3.4719208748834607e-10, "epoch": 4.962386168668338, "percentage": 99.25, "elapsed_time": "0:55:19", "remaining_time": "0:00:25", "throughput": 5575.16, "total_tokens": 18507536}
|
|
{"current_steps": 37605, "total_steps": 37885, "loss": 0.0001, "lr": 3.3515929148397737e-10, "epoch": 4.963046060446087, "percentage": 99.26, "elapsed_time": "0:55:19", "remaining_time": "0:00:24", "throughput": 5575.36, "total_tokens": 18510032}
|
|
{"current_steps": 37610, "total_steps": 37885, "loss": 0.0, "lr": 3.2333866710299027e-10, "epoch": 4.963705952223835, "percentage": 99.27, "elapsed_time": "0:55:20", "remaining_time": "0:00:24", "throughput": 5575.56, "total_tokens": 18512592}
|
|
{"current_steps": 37615, "total_steps": 37885, "loss": 0.0, "lr": 3.1173021685426684e-10, "epoch": 4.964365844001584, "percentage": 99.29, "elapsed_time": "0:55:20", "remaining_time": "0:00:23", "throughput": 5575.65, "total_tokens": 18514704}
|
|
{"current_steps": 37620, "total_steps": 37885, "loss": 0.0095, "lr": 3.003339432016139e-10, "epoch": 4.965025735779332, "percentage": 99.3, "elapsed_time": "0:55:20", "remaining_time": "0:00:23", "throughput": 5575.87, "total_tokens": 18517328}
|
|
{"current_steps": 37625, "total_steps": 37885, "loss": 0.0006, "lr": 2.891498485638744e-10, "epoch": 4.965685627557081, "percentage": 99.31, "elapsed_time": "0:55:21", "remaining_time": "0:00:22", "throughput": 5576.14, "total_tokens": 18520080}
|
|
{"current_steps": 37630, "total_steps": 37885, "loss": 0.0, "lr": 2.781779353147051e-10, "epoch": 4.966345519334829, "percentage": 99.33, "elapsed_time": "0:55:21", "remaining_time": "0:00:22", "throughput": 5576.28, "total_tokens": 18522384}
|
|
{"current_steps": 37635, "total_steps": 37885, "loss": 0.0, "lr": 2.6741820578290997e-10, "epoch": 4.967005411112577, "percentage": 99.34, "elapsed_time": "0:55:21", "remaining_time": "0:00:22", "throughput": 5576.61, "total_tokens": 18525392}
|
|
{"current_steps": 37640, "total_steps": 37885, "loss": 0.0322, "lr": 2.568706622519956e-10, "epoch": 4.967665302890326, "percentage": 99.35, "elapsed_time": "0:55:22", "remaining_time": "0:00:21", "throughput": 5576.77, "total_tokens": 18527760}
|
|
{"current_steps": 37645, "total_steps": 37885, "loss": 0.0002, "lr": 2.465353069608378e-10, "epoch": 4.968325194668075, "percentage": 99.37, "elapsed_time": "0:55:22", "remaining_time": "0:00:21", "throughput": 5576.88, "total_tokens": 18530000}
|
|
{"current_steps": 37650, "total_steps": 37885, "loss": 0.0836, "lr": 2.3641214210279314e-10, "epoch": 4.968985086445823, "percentage": 99.38, "elapsed_time": "0:55:22", "remaining_time": "0:00:20", "throughput": 5577.11, "total_tokens": 18532624}
|
|
{"current_steps": 37655, "total_steps": 37885, "loss": 0.0, "lr": 2.265011698266983e-10, "epoch": 4.969644978223571, "percentage": 99.39, "elapsed_time": "0:55:23", "remaining_time": "0:00:20", "throughput": 5577.29, "total_tokens": 18535056}
|
|
{"current_steps": 37660, "total_steps": 37885, "loss": 0.0, "lr": 2.168023922357598e-10, "epoch": 4.9703048700013195, "percentage": 99.41, "elapsed_time": "0:55:23", "remaining_time": "0:00:19", "throughput": 5577.47, "total_tokens": 18537488}
|
|
{"current_steps": 37665, "total_steps": 37885, "loss": 0.0, "lr": 2.0731581138877518e-10, "epoch": 4.970964761779069, "percentage": 99.42, "elapsed_time": "0:55:23", "remaining_time": "0:00:19", "throughput": 5577.62, "total_tokens": 18539792}
|
|
{"current_steps": 37670, "total_steps": 37885, "loss": 0.0, "lr": 1.980414292990229e-10, "epoch": 4.971624653556817, "percentage": 99.43, "elapsed_time": "0:55:24", "remaining_time": "0:00:18", "throughput": 5577.8, "total_tokens": 18542224}
|
|
{"current_steps": 37675, "total_steps": 37885, "loss": 0.0, "lr": 1.889792479350394e-10, "epoch": 4.972284545334565, "percentage": 99.45, "elapsed_time": "0:55:24", "remaining_time": "0:00:18", "throughput": 5578.05, "total_tokens": 18544912}
|
|
{"current_steps": 37680, "total_steps": 37885, "loss": 0.0, "lr": 1.8012926922017502e-10, "epoch": 4.9729444371123135, "percentage": 99.46, "elapsed_time": "0:55:24", "remaining_time": "0:00:18", "throughput": 5578.21, "total_tokens": 18547280}
|
|
{"current_steps": 37685, "total_steps": 37885, "loss": 0.0, "lr": 1.714914950327051e-10, "epoch": 4.973604328890062, "percentage": 99.47, "elapsed_time": "0:55:25", "remaining_time": "0:00:17", "throughput": 5578.43, "total_tokens": 18549840}
|
|
{"current_steps": 37690, "total_steps": 37885, "loss": 0.0, "lr": 1.6306592720594093e-10, "epoch": 4.974264220667811, "percentage": 99.49, "elapsed_time": "0:55:25", "remaining_time": "0:00:17", "throughput": 5578.59, "total_tokens": 18552208}
|
|
{"current_steps": 37695, "total_steps": 37885, "loss": 0.0233, "lr": 1.5485256752822973e-10, "epoch": 4.974924112445559, "percentage": 99.5, "elapsed_time": "0:55:25", "remaining_time": "0:00:16", "throughput": 5578.78, "total_tokens": 18554640}
|
|
{"current_steps": 37700, "total_steps": 37885, "loss": 0.0, "lr": 1.4685141774273268e-10, "epoch": 4.9755840042233075, "percentage": 99.51, "elapsed_time": "0:55:26", "remaining_time": "0:00:16", "throughput": 5579.03, "total_tokens": 18557392}
|
|
{"current_steps": 37705, "total_steps": 37885, "loss": 0.0, "lr": 1.3906247954764694e-10, "epoch": 4.976243896001056, "percentage": 99.52, "elapsed_time": "0:55:26", "remaining_time": "0:00:15", "throughput": 5579.17, "total_tokens": 18559696}
|
|
{"current_steps": 37710, "total_steps": 37885, "loss": 0.0, "lr": 1.3148575459609457e-10, "epoch": 4.976903787778804, "percentage": 99.54, "elapsed_time": "0:55:26", "remaining_time": "0:00:15", "throughput": 5579.3, "total_tokens": 18561936}
|
|
{"current_steps": 37715, "total_steps": 37885, "loss": 0.0, "lr": 1.241212444962336e-10, "epoch": 4.977563679556553, "percentage": 99.55, "elapsed_time": "0:55:27", "remaining_time": "0:00:14", "throughput": 5579.5, "total_tokens": 18564432}
|
|
{"current_steps": 37720, "total_steps": 37885, "loss": 0.0, "lr": 1.169689508111471e-10, "epoch": 4.9782235713343015, "percentage": 99.56, "elapsed_time": "0:55:27", "remaining_time": "0:00:14", "throughput": 5579.7, "total_tokens": 18566928}
|
|
{"current_steps": 37725, "total_steps": 37885, "loss": 0.0, "lr": 1.1002887505873193e-10, "epoch": 4.97888346311205, "percentage": 99.58, "elapsed_time": "0:55:27", "remaining_time": "0:00:14", "throughput": 5579.98, "total_tokens": 18569744}
|
|
{"current_steps": 37730, "total_steps": 37885, "loss": 0.0001, "lr": 1.0330101871214303e-10, "epoch": 4.979543354889798, "percentage": 99.59, "elapsed_time": "0:55:28", "remaining_time": "0:00:13", "throughput": 5580.12, "total_tokens": 18572048}
|
|
{"current_steps": 37735, "total_steps": 37885, "loss": 0.0, "lr": 9.678538319923824e-11, "epoch": 4.980203246667546, "percentage": 99.6, "elapsed_time": "0:55:28", "remaining_time": "0:00:13", "throughput": 5580.31, "total_tokens": 18574544}
|
|
{"current_steps": 37740, "total_steps": 37885, "loss": 0.0005, "lr": 9.048196990280033e-11, "epoch": 4.980863138445295, "percentage": 99.62, "elapsed_time": "0:55:28", "remaining_time": "0:00:12", "throughput": 5580.58, "total_tokens": 18577296}
|
|
{"current_steps": 37745, "total_steps": 37885, "loss": 0.0, "lr": 8.439078016087009e-11, "epoch": 4.981523030223044, "percentage": 99.63, "elapsed_time": "0:55:29", "remaining_time": "0:00:12", "throughput": 5580.72, "total_tokens": 18579600}
|
|
{"current_steps": 37750, "total_steps": 37885, "loss": 0.0997, "lr": 7.851181526619122e-11, "epoch": 4.982182922000792, "percentage": 99.64, "elapsed_time": "0:55:29", "remaining_time": "0:00:11", "throughput": 5580.95, "total_tokens": 18582224}
|
|
{"current_steps": 37755, "total_steps": 37885, "loss": 0.0, "lr": 7.284507646654336e-11, "epoch": 4.98284281377854, "percentage": 99.66, "elapsed_time": "0:55:29", "remaining_time": "0:00:11", "throughput": 5581.03, "total_tokens": 18584336}
|
|
{"current_steps": 37760, "total_steps": 37885, "loss": 0.0472, "lr": 6.739056496452011e-11, "epoch": 4.983502705556289, "percentage": 99.67, "elapsed_time": "0:55:30", "remaining_time": "0:00:11", "throughput": 5581.23, "total_tokens": 18586896}
|
|
{"current_steps": 37765, "total_steps": 37885, "loss": 0.0001, "lr": 6.214828191797305e-11, "epoch": 4.984162597334037, "percentage": 99.68, "elapsed_time": "0:55:30", "remaining_time": "0:00:10", "throughput": 5581.46, "total_tokens": 18589520}
|
|
{"current_steps": 37770, "total_steps": 37885, "loss": 0.0324, "lr": 5.711822843945669e-11, "epoch": 4.984822489111786, "percentage": 99.7, "elapsed_time": "0:55:30", "remaining_time": "0:00:10", "throughput": 5581.64, "total_tokens": 18591952}
|
|
{"current_steps": 37775, "total_steps": 37885, "loss": 0.0, "lr": 5.230040559656146e-11, "epoch": 4.985482380889534, "percentage": 99.71, "elapsed_time": "0:55:31", "remaining_time": "0:00:09", "throughput": 5581.81, "total_tokens": 18594384}
|
|
{"current_steps": 37780, "total_steps": 37885, "loss": 0.0019, "lr": 4.769481441191381e-11, "epoch": 4.986142272667283, "percentage": 99.72, "elapsed_time": "0:55:31", "remaining_time": "0:00:09", "throughput": 5582.0, "total_tokens": 18596880}
|
|
{"current_steps": 37785, "total_steps": 37885, "loss": 0.0038, "lr": 4.330145586284306e-11, "epoch": 4.986802164445031, "percentage": 99.74, "elapsed_time": "0:55:31", "remaining_time": "0:00:08", "throughput": 5582.26, "total_tokens": 18599632}
|
|
{"current_steps": 37790, "total_steps": 37885, "loss": 0.0, "lr": 3.912033088204758e-11, "epoch": 4.987462056222779, "percentage": 99.75, "elapsed_time": "0:55:32", "remaining_time": "0:00:08", "throughput": 5582.46, "total_tokens": 18602128}
|
|
{"current_steps": 37795, "total_steps": 37885, "loss": 0.0, "lr": 3.515144035670658e-11, "epoch": 4.988121948000527, "percentage": 99.76, "elapsed_time": "0:55:32", "remaining_time": "0:00:07", "throughput": 5582.61, "total_tokens": 18604496}
|
|
{"current_steps": 37800, "total_steps": 37885, "loss": 0.0, "lr": 3.139478512936833e-11, "epoch": 4.988781839778277, "percentage": 99.78, "elapsed_time": "0:55:32", "remaining_time": "0:00:07", "throughput": 5582.79, "total_tokens": 18606992}
|
|
{"current_steps": 37805, "total_steps": 37885, "loss": 0.0, "lr": 2.7850365997283963e-11, "epoch": 4.989441731556025, "percentage": 99.79, "elapsed_time": "0:55:33", "remaining_time": "0:00:07", "throughput": 5582.91, "total_tokens": 18609232}
|
|
{"current_steps": 37810, "total_steps": 37885, "loss": 0.0, "lr": 2.4518183712740615e-11, "epoch": 4.990101623333773, "percentage": 99.8, "elapsed_time": "0:55:33", "remaining_time": "0:00:06", "throughput": 5583.03, "total_tokens": 18611472}
|
|
{"current_steps": 37815, "total_steps": 37885, "loss": 0.0, "lr": 2.1398238982839324e-11, "epoch": 4.990761515111521, "percentage": 99.82, "elapsed_time": "0:55:33", "remaining_time": "0:00:06", "throughput": 5583.17, "total_tokens": 18613776}
|
|
{"current_steps": 37820, "total_steps": 37885, "loss": 0.0, "lr": 1.8490532470050168e-11, "epoch": 4.99142140688927, "percentage": 99.83, "elapsed_time": "0:55:34", "remaining_time": "0:00:05", "throughput": 5583.37, "total_tokens": 18616272}
|
|
{"current_steps": 37825, "total_steps": 37885, "loss": 0.028, "lr": 1.5795064791213065e-11, "epoch": 4.992081298667019, "percentage": 99.84, "elapsed_time": "0:55:34", "remaining_time": "0:00:05", "throughput": 5583.57, "total_tokens": 18618768}
|
|
{"current_steps": 37830, "total_steps": 37885, "loss": 0.0, "lr": 1.3311836518647978e-11, "epoch": 4.992741190444767, "percentage": 99.85, "elapsed_time": "0:55:34", "remaining_time": "0:00:04", "throughput": 5583.78, "total_tokens": 18621328}
|
|
{"current_steps": 37835, "total_steps": 37885, "loss": 0.0, "lr": 1.104084817926676e-11, "epoch": 4.993401082222515, "percentage": 99.87, "elapsed_time": "0:55:35", "remaining_time": "0:00:04", "throughput": 5583.9, "total_tokens": 18623568}
|
|
{"current_steps": 37840, "total_steps": 37885, "loss": 0.0, "lr": 8.982100255128244e-12, "epoch": 4.994060974000264, "percentage": 99.88, "elapsed_time": "0:55:35", "remaining_time": "0:00:03", "throughput": 5584.08, "total_tokens": 18626000}
|
|
{"current_steps": 37845, "total_steps": 37885, "loss": 0.0, "lr": 7.135593183216215e-12, "epoch": 4.994720865778012, "percentage": 99.89, "elapsed_time": "0:55:35", "remaining_time": "0:00:03", "throughput": 5584.27, "total_tokens": 18628496}
|
|
{"current_steps": 37850, "total_steps": 37885, "loss": 0.0, "lr": 5.501327355328378e-12, "epoch": 4.995380757555761, "percentage": 99.91, "elapsed_time": "0:55:36", "remaining_time": "0:00:03", "throughput": 5584.45, "total_tokens": 18630928}
|
|
{"current_steps": 37855, "total_steps": 37885, "loss": 0.0001, "lr": 4.0793031184094275e-12, "epoch": 4.996040649333509, "percentage": 99.92, "elapsed_time": "0:55:36", "remaining_time": "0:00:02", "throughput": 5584.66, "total_tokens": 18633488}
|
|
{"current_steps": 37860, "total_steps": 37885, "loss": 0.0, "lr": 2.8695207742179816e-12, "epoch": 4.996700541111258, "percentage": 99.93, "elapsed_time": "0:55:36", "remaining_time": "0:00:02", "throughput": 5584.85, "total_tokens": 18635984}
|
|
{"current_steps": 37865, "total_steps": 37885, "loss": 0.0, "lr": 1.8719805796596487e-12, "epoch": 4.997360432889006, "percentage": 99.95, "elapsed_time": "0:55:37", "remaining_time": "0:00:01", "throughput": 5584.99, "total_tokens": 18638288}
|
|
{"current_steps": 37870, "total_steps": 37885, "loss": 0.0, "lr": 1.086682746231915e-12, "epoch": 4.998020324666754, "percentage": 99.96, "elapsed_time": "0:55:37", "remaining_time": "0:00:01", "throughput": 5585.17, "total_tokens": 18640720}
|
|
{"current_steps": 37875, "total_steps": 37885, "loss": 0.0, "lr": 5.136274408013008e-13, "epoch": 4.998680216444503, "percentage": 99.97, "elapsed_time": "0:55:37", "remaining_time": "0:00:00", "throughput": 5585.29, "total_tokens": 18642960}
|
|
{"current_steps": 37880, "total_steps": 37885, "loss": 0.0, "lr": 1.5281478493722745e-13, "epoch": 4.999340108222252, "percentage": 99.99, "elapsed_time": "0:55:38", "remaining_time": "0:00:00", "throughput": 5585.35, "total_tokens": 18645008}
|
|
{"current_steps": 37885, "total_steps": 37885, "loss": 0.0001, "lr": 4.244855245083556e-15, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:55:38", "remaining_time": "0:00:00", "throughput": 5585.43, "total_tokens": 18647328}
|
|
{"current_steps": 37885, "total_steps": 37885, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:56:14", "remaining_time": "0:00:00", "throughput": 5526.12, "total_tokens": 18647328}
|