1536 lines
365 KiB
JSON
1536 lines
365 KiB
JSON
{"current_steps": 5, "total_steps": 7577, "loss": 1.4317, "lr": 1.0554089709762531e-08, "epoch": 0.0006598917777484492, "percentage": 0.07, "elapsed_time": "0:00:00", "remaining_time": "0:16:38", "throughput": 3397.34, "total_tokens": 2240}
|
|
{"current_steps": 10, "total_steps": 7577, "loss": 1.5404, "lr": 2.3746701846965696e-08, "epoch": 0.0013197835554968984, "percentage": 0.13, "elapsed_time": "0:00:00", "remaining_time": "0:12:35", "throughput": 4678.08, "total_tokens": 4672}
|
|
{"current_steps": 15, "total_steps": 7577, "loss": 1.4488, "lr": 3.6939313984168866e-08, "epoch": 0.0019796753332453477, "percentage": 0.2, "elapsed_time": "0:00:01", "remaining_time": "0:11:09", "throughput": 5300.01, "total_tokens": 7040}
|
|
{"current_steps": 20, "total_steps": 7577, "loss": 1.469, "lr": 5.013192612137203e-08, "epoch": 0.002639567110993797, "percentage": 0.26, "elapsed_time": "0:00:01", "remaining_time": "0:10:26", "throughput": 5794.13, "total_tokens": 9600}
|
|
{"current_steps": 25, "total_steps": 7577, "loss": 1.3406, "lr": 6.33245382585752e-08, "epoch": 0.0032994588887422464, "percentage": 0.33, "elapsed_time": "0:00:01", "remaining_time": "0:09:59", "throughput": 6132.03, "total_tokens": 12160}
|
|
{"current_steps": 30, "total_steps": 7577, "loss": 1.421, "lr": 7.651715039577835e-08, "epoch": 0.0039593506664906955, "percentage": 0.4, "elapsed_time": "0:00:02", "remaining_time": "0:09:41", "throughput": 6289.1, "total_tokens": 14528}
|
|
{"current_steps": 35, "total_steps": 7577, "loss": 1.1582, "lr": 8.970976253298153e-08, "epoch": 0.004619242444239145, "percentage": 0.46, "elapsed_time": "0:00:02", "remaining_time": "0:09:27", "throughput": 6362.66, "total_tokens": 16768}
|
|
{"current_steps": 40, "total_steps": 7577, "loss": 1.1864, "lr": 1.0290237467018468e-07, "epoch": 0.005279134221987594, "percentage": 0.53, "elapsed_time": "0:00:02", "remaining_time": "0:09:18", "throughput": 6494.9, "total_tokens": 19264}
|
|
{"current_steps": 45, "total_steps": 7577, "loss": 0.8458, "lr": 1.1609498680738786e-07, "epoch": 0.005939025999736044, "percentage": 0.59, "elapsed_time": "0:00:03", "remaining_time": "0:09:12", "throughput": 6550.58, "total_tokens": 21632}
|
|
{"current_steps": 50, "total_steps": 7577, "loss": 0.7602, "lr": 1.29287598944591e-07, "epoch": 0.006598917777484493, "percentage": 0.66, "elapsed_time": "0:00:03", "remaining_time": "0:09:06", "throughput": 6615.85, "total_tokens": 24000}
|
|
{"current_steps": 55, "total_steps": 7577, "loss": 0.7195, "lr": 1.424802110817942e-07, "epoch": 0.007258809555232942, "percentage": 0.73, "elapsed_time": "0:00:03", "remaining_time": "0:09:01", "throughput": 6696.39, "total_tokens": 26496}
|
|
{"current_steps": 60, "total_steps": 7577, "loss": 0.3928, "lr": 1.5567282321899736e-07, "epoch": 0.007918701332981391, "percentage": 0.79, "elapsed_time": "0:00:04", "remaining_time": "0:08:57", "throughput": 6789.9, "total_tokens": 29120}
|
|
{"current_steps": 65, "total_steps": 7577, "loss": 0.3093, "lr": 1.688654353562005e-07, "epoch": 0.008578593110729841, "percentage": 0.86, "elapsed_time": "0:00:04", "remaining_time": "0:08:53", "throughput": 6870.9, "total_tokens": 31744}
|
|
{"current_steps": 70, "total_steps": 7577, "loss": 0.2986, "lr": 1.820580474934037e-07, "epoch": 0.00923848488847829, "percentage": 0.92, "elapsed_time": "0:00:04", "remaining_time": "0:08:50", "throughput": 6911.09, "total_tokens": 34176}
|
|
{"current_steps": 75, "total_steps": 7577, "loss": 0.2998, "lr": 1.9525065963060686e-07, "epoch": 0.009898376666226739, "percentage": 0.99, "elapsed_time": "0:00:05", "remaining_time": "0:08:47", "throughput": 6985.09, "total_tokens": 36864}
|
|
{"current_steps": 80, "total_steps": 7577, "loss": 0.2555, "lr": 2.0844327176781002e-07, "epoch": 0.010558268443975187, "percentage": 1.06, "elapsed_time": "0:00:05", "remaining_time": "0:08:45", "throughput": 7026.85, "total_tokens": 39424}
|
|
{"current_steps": 85, "total_steps": 7577, "loss": 0.2595, "lr": 2.2163588390501316e-07, "epoch": 0.011218160221723637, "percentage": 1.12, "elapsed_time": "0:00:05", "remaining_time": "0:08:43", "throughput": 7084.01, "total_tokens": 42112}
|
|
{"current_steps": 90, "total_steps": 7577, "loss": 0.2567, "lr": 2.3482849604221635e-07, "epoch": 0.011878051999472087, "percentage": 1.19, "elapsed_time": "0:00:06", "remaining_time": "0:08:41", "throughput": 7100.45, "total_tokens": 44544}
|
|
{"current_steps": 95, "total_steps": 7577, "loss": 0.1981, "lr": 2.480211081794195e-07, "epoch": 0.012537943777220536, "percentage": 1.25, "elapsed_time": "0:00:06", "remaining_time": "0:08:40", "throughput": 7122.92, "total_tokens": 47104}
|
|
{"current_steps": 100, "total_steps": 7577, "loss": 0.1626, "lr": 2.612137203166227e-07, "epoch": 0.013197835554968985, "percentage": 1.32, "elapsed_time": "0:00:06", "remaining_time": "0:08:39", "throughput": 7151.4, "total_tokens": 49664}
|
|
{"current_steps": 105, "total_steps": 7577, "loss": 0.1359, "lr": 2.744063324538258e-07, "epoch": 0.013857727332717434, "percentage": 1.39, "elapsed_time": "0:00:07", "remaining_time": "0:08:37", "throughput": 7197.13, "total_tokens": 52352}
|
|
{"current_steps": 110, "total_steps": 7577, "loss": 0.1025, "lr": 2.8759894459102903e-07, "epoch": 0.014517619110465884, "percentage": 1.45, "elapsed_time": "0:00:07", "remaining_time": "0:08:35", "throughput": 7201.47, "total_tokens": 54720}
|
|
{"current_steps": 115, "total_steps": 7577, "loss": 0.1435, "lr": 3.007915567282322e-07, "epoch": 0.015177510888214334, "percentage": 1.52, "elapsed_time": "0:00:07", "remaining_time": "0:08:34", "throughput": 7209.52, "total_tokens": 57152}
|
|
{"current_steps": 120, "total_steps": 7577, "loss": 0.2027, "lr": 3.139841688654353e-07, "epoch": 0.015837402665962782, "percentage": 1.58, "elapsed_time": "0:00:08", "remaining_time": "0:08:33", "throughput": 7237.62, "total_tokens": 59776}
|
|
{"current_steps": 125, "total_steps": 7577, "loss": 0.1344, "lr": 3.271767810026385e-07, "epoch": 0.01649729444371123, "percentage": 1.65, "elapsed_time": "0:00:08", "remaining_time": "0:08:32", "throughput": 7269.42, "total_tokens": 62464}
|
|
{"current_steps": 130, "total_steps": 7577, "loss": 0.1188, "lr": 3.403693931398417e-07, "epoch": 0.017157186221459682, "percentage": 1.72, "elapsed_time": "0:00:08", "remaining_time": "0:08:31", "throughput": 7292.2, "total_tokens": 65088}
|
|
{"current_steps": 135, "total_steps": 7577, "loss": 0.2073, "lr": 3.5356200527704485e-07, "epoch": 0.01781707799920813, "percentage": 1.78, "elapsed_time": "0:00:09", "remaining_time": "0:08:30", "throughput": 7323.53, "total_tokens": 67776}
|
|
{"current_steps": 140, "total_steps": 7577, "loss": 0.2589, "lr": 3.66754617414248e-07, "epoch": 0.01847696977695658, "percentage": 1.85, "elapsed_time": "0:00:09", "remaining_time": "0:08:29", "throughput": 7344.53, "total_tokens": 70400}
|
|
{"current_steps": 145, "total_steps": 7577, "loss": 0.1435, "lr": 3.7994722955145113e-07, "epoch": 0.01913686155470503, "percentage": 1.91, "elapsed_time": "0:00:09", "remaining_time": "0:08:28", "throughput": 7326.01, "total_tokens": 72704}
|
|
{"current_steps": 150, "total_steps": 7577, "loss": 0.3408, "lr": 3.9313984168865435e-07, "epoch": 0.019796753332453478, "percentage": 1.98, "elapsed_time": "0:00:10", "remaining_time": "0:08:27", "throughput": 7330.81, "total_tokens": 75136}
|
|
{"current_steps": 155, "total_steps": 7577, "loss": 0.0612, "lr": 4.063324538258575e-07, "epoch": 0.020456645110201926, "percentage": 2.05, "elapsed_time": "0:00:10", "remaining_time": "0:08:26", "throughput": 7340.14, "total_tokens": 77632}
|
|
{"current_steps": 160, "total_steps": 7577, "loss": 0.2462, "lr": 4.195250659630606e-07, "epoch": 0.021116536887950375, "percentage": 2.11, "elapsed_time": "0:00:10", "remaining_time": "0:08:25", "throughput": 7360.74, "total_tokens": 80320}
|
|
{"current_steps": 165, "total_steps": 7577, "loss": 0.1542, "lr": 4.3271767810026384e-07, "epoch": 0.021776428665698826, "percentage": 2.18, "elapsed_time": "0:00:11", "remaining_time": "0:08:24", "throughput": 7363.11, "total_tokens": 82752}
|
|
{"current_steps": 170, "total_steps": 7577, "loss": 0.3088, "lr": 4.45910290237467e-07, "epoch": 0.022436320443447275, "percentage": 2.24, "elapsed_time": "0:00:11", "remaining_time": "0:08:23", "throughput": 7370.45, "total_tokens": 85248}
|
|
{"current_steps": 175, "total_steps": 7577, "loss": 0.2887, "lr": 4.5910290237467017e-07, "epoch": 0.023096212221195723, "percentage": 2.31, "elapsed_time": "0:00:11", "remaining_time": "0:08:23", "throughput": 7384.71, "total_tokens": 87872}
|
|
{"current_steps": 180, "total_steps": 7577, "loss": 0.1364, "lr": 4.7229551451187333e-07, "epoch": 0.023756103998944175, "percentage": 2.38, "elapsed_time": "0:00:12", "remaining_time": "0:08:22", "throughput": 7391.26, "total_tokens": 90368}
|
|
{"current_steps": 185, "total_steps": 7577, "loss": 0.0589, "lr": 4.854881266490765e-07, "epoch": 0.024415995776692623, "percentage": 2.44, "elapsed_time": "0:00:12", "remaining_time": "0:08:21", "throughput": 7402.07, "total_tokens": 92928}
|
|
{"current_steps": 190, "total_steps": 7577, "loss": 0.1639, "lr": 4.986807387862796e-07, "epoch": 0.02507588755444107, "percentage": 2.51, "elapsed_time": "0:00:12", "remaining_time": "0:08:20", "throughput": 7397.88, "total_tokens": 95296}
|
|
{"current_steps": 195, "total_steps": 7577, "loss": 0.1103, "lr": 5.118733509234829e-07, "epoch": 0.02573577933218952, "percentage": 2.57, "elapsed_time": "0:00:13", "remaining_time": "0:08:20", "throughput": 7414.35, "total_tokens": 97984}
|
|
{"current_steps": 200, "total_steps": 7577, "loss": 0.2017, "lr": 5.250659630606859e-07, "epoch": 0.02639567110993797, "percentage": 2.64, "elapsed_time": "0:00:13", "remaining_time": "0:08:19", "throughput": 7411.98, "total_tokens": 100352}
|
|
{"current_steps": 205, "total_steps": 7577, "loss": 0.1383, "lr": 5.382585751978892e-07, "epoch": 0.02705556288768642, "percentage": 2.71, "elapsed_time": "0:00:13", "remaining_time": "0:08:18", "throughput": 7391.3, "total_tokens": 102464}
|
|
{"current_steps": 210, "total_steps": 7577, "loss": 0.1266, "lr": 5.514511873350924e-07, "epoch": 0.027715454665434867, "percentage": 2.77, "elapsed_time": "0:00:14", "remaining_time": "0:08:17", "throughput": 7403.67, "total_tokens": 105088}
|
|
{"current_steps": 215, "total_steps": 7577, "loss": 0.3019, "lr": 5.646437994722954e-07, "epoch": 0.02837534644318332, "percentage": 2.84, "elapsed_time": "0:00:14", "remaining_time": "0:08:17", "throughput": 7411.44, "total_tokens": 107648}
|
|
{"current_steps": 220, "total_steps": 7577, "loss": 0.2375, "lr": 5.778364116094987e-07, "epoch": 0.029035238220931767, "percentage": 2.9, "elapsed_time": "0:00:14", "remaining_time": "0:08:16", "throughput": 7415.87, "total_tokens": 110144}
|
|
{"current_steps": 225, "total_steps": 7577, "loss": 0.1332, "lr": 5.910290237467019e-07, "epoch": 0.029695129998680216, "percentage": 2.97, "elapsed_time": "0:00:15", "remaining_time": "0:08:15", "throughput": 7416.39, "total_tokens": 112576}
|
|
{"current_steps": 230, "total_steps": 7577, "loss": 0.2314, "lr": 6.042216358839049e-07, "epoch": 0.030355021776428667, "percentage": 3.04, "elapsed_time": "0:00:15", "remaining_time": "0:08:15", "throughput": 7430.31, "total_tokens": 115264}
|
|
{"current_steps": 235, "total_steps": 7577, "loss": 0.0848, "lr": 6.174142480211082e-07, "epoch": 0.031014913554177116, "percentage": 3.1, "elapsed_time": "0:00:15", "remaining_time": "0:08:14", "throughput": 7441.01, "total_tokens": 117888}
|
|
{"current_steps": 240, "total_steps": 7577, "loss": 0.0189, "lr": 6.306068601583114e-07, "epoch": 0.031674805331925564, "percentage": 3.17, "elapsed_time": "0:00:16", "remaining_time": "0:08:14", "throughput": 7442.18, "total_tokens": 120320}
|
|
{"current_steps": 245, "total_steps": 7577, "loss": 0.1826, "lr": 6.437994722955144e-07, "epoch": 0.032334697109674015, "percentage": 3.23, "elapsed_time": "0:00:16", "remaining_time": "0:08:13", "throughput": 7439.68, "total_tokens": 122688}
|
|
{"current_steps": 250, "total_steps": 7577, "loss": 0.2716, "lr": 6.569920844327177e-07, "epoch": 0.03299458888742246, "percentage": 3.3, "elapsed_time": "0:00:16", "remaining_time": "0:08:12", "throughput": 7446.95, "total_tokens": 125248}
|
|
{"current_steps": 255, "total_steps": 7577, "loss": 0.203, "lr": 6.701846965699208e-07, "epoch": 0.03365448066517091, "percentage": 3.37, "elapsed_time": "0:00:17", "remaining_time": "0:08:12", "throughput": 7447.33, "total_tokens": 127680}
|
|
{"current_steps": 260, "total_steps": 7577, "loss": 0.083, "lr": 6.833773087071239e-07, "epoch": 0.034314372442919364, "percentage": 3.43, "elapsed_time": "0:00:17", "remaining_time": "0:08:11", "throughput": 7464.92, "total_tokens": 130496}
|
|
{"current_steps": 265, "total_steps": 7577, "loss": 0.1837, "lr": 6.965699208443272e-07, "epoch": 0.03497426422066781, "percentage": 3.5, "elapsed_time": "0:00:17", "remaining_time": "0:08:11", "throughput": 7468.93, "total_tokens": 132992}
|
|
{"current_steps": 270, "total_steps": 7577, "loss": 0.1372, "lr": 7.097625329815303e-07, "epoch": 0.03563415599841626, "percentage": 3.56, "elapsed_time": "0:00:18", "remaining_time": "0:08:10", "throughput": 7449.51, "total_tokens": 135040}
|
|
{"current_steps": 275, "total_steps": 7577, "loss": 0.258, "lr": 7.229551451187335e-07, "epoch": 0.03629404777616471, "percentage": 3.63, "elapsed_time": "0:00:18", "remaining_time": "0:08:10", "throughput": 7455.64, "total_tokens": 137600}
|
|
{"current_steps": 280, "total_steps": 7577, "loss": 0.1103, "lr": 7.361477572559367e-07, "epoch": 0.03695393955391316, "percentage": 3.7, "elapsed_time": "0:00:18", "remaining_time": "0:08:09", "throughput": 7448.91, "total_tokens": 139904}
|
|
{"current_steps": 285, "total_steps": 7577, "loss": 0.1476, "lr": 7.493403693931398e-07, "epoch": 0.03761383133166161, "percentage": 3.76, "elapsed_time": "0:00:19", "remaining_time": "0:08:08", "throughput": 7433.88, "total_tokens": 142016}
|
|
{"current_steps": 290, "total_steps": 7577, "loss": 0.0087, "lr": 7.62532981530343e-07, "epoch": 0.03827372310941006, "percentage": 3.83, "elapsed_time": "0:00:19", "remaining_time": "0:08:08", "throughput": 7439.86, "total_tokens": 144576}
|
|
{"current_steps": 295, "total_steps": 7577, "loss": 0.085, "lr": 7.757255936675461e-07, "epoch": 0.038933614887158505, "percentage": 3.89, "elapsed_time": "0:00:19", "remaining_time": "0:08:07", "throughput": 7434.64, "total_tokens": 146880}
|
|
{"current_steps": 300, "total_steps": 7577, "loss": 0.3963, "lr": 7.889182058047493e-07, "epoch": 0.039593506664906956, "percentage": 3.96, "elapsed_time": "0:00:20", "remaining_time": "0:08:07", "throughput": 7429.61, "total_tokens": 149184}
|
|
{"current_steps": 305, "total_steps": 7577, "loss": 0.221, "lr": 8.021108179419525e-07, "epoch": 0.0402533984426554, "percentage": 4.03, "elapsed_time": "0:00:20", "remaining_time": "0:08:06", "throughput": 7416.06, "total_tokens": 151296}
|
|
{"current_steps": 310, "total_steps": 7577, "loss": 0.2076, "lr": 8.153034300791555e-07, "epoch": 0.04091329022040385, "percentage": 4.09, "elapsed_time": "0:00:20", "remaining_time": "0:08:05", "throughput": 7413.55, "total_tokens": 153664}
|
|
{"current_steps": 315, "total_steps": 7577, "loss": 0.2588, "lr": 8.284960422163588e-07, "epoch": 0.041573181998152305, "percentage": 4.16, "elapsed_time": "0:00:21", "remaining_time": "0:08:05", "throughput": 7411.06, "total_tokens": 156032}
|
|
{"current_steps": 320, "total_steps": 7577, "loss": 0.227, "lr": 8.41688654353562e-07, "epoch": 0.04223307377590075, "percentage": 4.22, "elapsed_time": "0:00:21", "remaining_time": "0:08:04", "throughput": 7414.32, "total_tokens": 158528}
|
|
{"current_steps": 325, "total_steps": 7577, "loss": 0.12, "lr": 8.54881266490765e-07, "epoch": 0.0428929655536492, "percentage": 4.29, "elapsed_time": "0:00:21", "remaining_time": "0:08:04", "throughput": 7403.3, "total_tokens": 160704}
|
|
{"current_steps": 330, "total_steps": 7577, "loss": 0.1931, "lr": 8.680738786279683e-07, "epoch": 0.04355285733139765, "percentage": 4.36, "elapsed_time": "0:00:22", "remaining_time": "0:08:03", "throughput": 7401.67, "total_tokens": 163072}
|
|
{"current_steps": 335, "total_steps": 7577, "loss": 0.1389, "lr": 8.812664907651715e-07, "epoch": 0.0442127491091461, "percentage": 4.42, "elapsed_time": "0:00:22", "remaining_time": "0:08:03", "throughput": 7404.99, "total_tokens": 165568}
|
|
{"current_steps": 340, "total_steps": 7577, "loss": 0.2857, "lr": 8.944591029023745e-07, "epoch": 0.04487264088689455, "percentage": 4.49, "elapsed_time": "0:00:22", "remaining_time": "0:08:02", "throughput": 7403.5, "total_tokens": 167936}
|
|
{"current_steps": 345, "total_steps": 7577, "loss": 0.0616, "lr": 9.076517150395778e-07, "epoch": 0.045532532664643, "percentage": 4.55, "elapsed_time": "0:00:23", "remaining_time": "0:08:02", "throughput": 7397.17, "total_tokens": 170176}
|
|
{"current_steps": 350, "total_steps": 7577, "loss": 0.149, "lr": 9.20844327176781e-07, "epoch": 0.046192424442391446, "percentage": 4.62, "elapsed_time": "0:00:23", "remaining_time": "0:08:01", "throughput": 7388.53, "total_tokens": 172352}
|
|
{"current_steps": 355, "total_steps": 7577, "loss": 0.2579, "lr": 9.340369393139841e-07, "epoch": 0.0468523162201399, "percentage": 4.69, "elapsed_time": "0:00:23", "remaining_time": "0:08:01", "throughput": 7402.17, "total_tokens": 175168}
|
|
{"current_steps": 360, "total_steps": 7577, "loss": 0.1596, "lr": 9.472295514511873e-07, "epoch": 0.04751220799788835, "percentage": 4.75, "elapsed_time": "0:00:23", "remaining_time": "0:08:01", "throughput": 7412.04, "total_tokens": 177856}
|
|
{"current_steps": 365, "total_steps": 7577, "loss": 0.2923, "lr": 9.604221635883904e-07, "epoch": 0.048172099775636794, "percentage": 4.82, "elapsed_time": "0:00:24", "remaining_time": "0:08:00", "throughput": 7408.35, "total_tokens": 180160}
|
|
{"current_steps": 370, "total_steps": 7577, "loss": 0.2137, "lr": 9.736147757255936e-07, "epoch": 0.048831991553385246, "percentage": 4.88, "elapsed_time": "0:00:24", "remaining_time": "0:08:00", "throughput": 7415.44, "total_tokens": 182784}
|
|
{"current_steps": 375, "total_steps": 7577, "loss": 0.0578, "lr": 9.86807387862797e-07, "epoch": 0.0494918833311337, "percentage": 4.95, "elapsed_time": "0:00:24", "remaining_time": "0:07:59", "throughput": 7408.63, "total_tokens": 185024}
|
|
{"current_steps": 379, "total_steps": 7577, "eval_loss": 0.16884121298789978, "epoch": 0.05001979675333245, "percentage": 5.0, "elapsed_time": "0:00:33", "remaining_time": "0:10:28", "throughput": 5655.07, "total_tokens": 187072}
|
|
{"current_steps": 380, "total_steps": 7577, "loss": 0.1527, "lr": 1e-06, "epoch": 0.05015177510888214, "percentage": 5.02, "elapsed_time": "0:00:59", "remaining_time": "0:18:47", "throughput": 3152.89, "total_tokens": 187712}
|
|
{"current_steps": 385, "total_steps": 7577, "loss": 0.2528, "lr": 1.0131926121372032e-06, "epoch": 0.050811666886630594, "percentage": 5.08, "elapsed_time": "0:00:59", "remaining_time": "0:18:38", "throughput": 3180.18, "total_tokens": 190400}
|
|
{"current_steps": 390, "total_steps": 7577, "loss": 0.1124, "lr": 1.0263852242744063e-06, "epoch": 0.05147155866437904, "percentage": 5.15, "elapsed_time": "0:01:00", "remaining_time": "0:18:29", "throughput": 3209.98, "total_tokens": 193280}
|
|
{"current_steps": 395, "total_steps": 7577, "loss": 0.1545, "lr": 1.0395778364116096e-06, "epoch": 0.05213145044212749, "percentage": 5.21, "elapsed_time": "0:01:00", "remaining_time": "0:18:20", "throughput": 3230.38, "total_tokens": 195584}
|
|
{"current_steps": 400, "total_steps": 7577, "loss": 0.0824, "lr": 1.0527704485488126e-06, "epoch": 0.05279134221987594, "percentage": 5.28, "elapsed_time": "0:01:00", "remaining_time": "0:18:12", "throughput": 3255.16, "total_tokens": 198208}
|
|
{"current_steps": 405, "total_steps": 7577, "loss": 0.255, "lr": 1.0659630606860157e-06, "epoch": 0.05345123399762439, "percentage": 5.35, "elapsed_time": "0:01:01", "remaining_time": "0:18:04", "throughput": 3278.13, "total_tokens": 200704}
|
|
{"current_steps": 410, "total_steps": 7577, "loss": 0.2055, "lr": 1.079155672823219e-06, "epoch": 0.05411112577537284, "percentage": 5.41, "elapsed_time": "0:01:01", "remaining_time": "0:17:56", "throughput": 3299.92, "total_tokens": 203136}
|
|
{"current_steps": 415, "total_steps": 7577, "loss": 0.1496, "lr": 1.0923482849604222e-06, "epoch": 0.05477101755312129, "percentage": 5.48, "elapsed_time": "0:01:01", "remaining_time": "0:17:48", "throughput": 3320.62, "total_tokens": 205504}
|
|
{"current_steps": 420, "total_steps": 7577, "loss": 0.2539, "lr": 1.1055408970976253e-06, "epoch": 0.055430909330869735, "percentage": 5.54, "elapsed_time": "0:01:02", "remaining_time": "0:17:40", "throughput": 3343.21, "total_tokens": 208000}
|
|
{"current_steps": 425, "total_steps": 7577, "loss": 0.1106, "lr": 1.1187335092348285e-06, "epoch": 0.056090801108618187, "percentage": 5.61, "elapsed_time": "0:01:02", "remaining_time": "0:17:32", "throughput": 3366.51, "total_tokens": 210560}
|
|
{"current_steps": 430, "total_steps": 7577, "loss": 0.1771, "lr": 1.1319261213720316e-06, "epoch": 0.05675069288636664, "percentage": 5.68, "elapsed_time": "0:01:02", "remaining_time": "0:17:25", "throughput": 3388.53, "total_tokens": 213056}
|
|
{"current_steps": 435, "total_steps": 7577, "loss": 0.1155, "lr": 1.1451187335092347e-06, "epoch": 0.05741058466411508, "percentage": 5.74, "elapsed_time": "0:01:03", "remaining_time": "0:17:17", "throughput": 3406.56, "total_tokens": 215296}
|
|
{"current_steps": 440, "total_steps": 7577, "loss": 0.0203, "lr": 1.158311345646438e-06, "epoch": 0.058070476441863535, "percentage": 5.81, "elapsed_time": "0:01:03", "remaining_time": "0:17:10", "throughput": 3423.41, "total_tokens": 217472}
|
|
{"current_steps": 445, "total_steps": 7577, "loss": 0.0346, "lr": 1.1715039577836412e-06, "epoch": 0.058730368219611986, "percentage": 5.87, "elapsed_time": "0:01:03", "remaining_time": "0:17:03", "throughput": 3444.88, "total_tokens": 219968}
|
|
{"current_steps": 450, "total_steps": 7577, "loss": 0.0218, "lr": 1.1846965699208443e-06, "epoch": 0.05939025999736043, "percentage": 5.94, "elapsed_time": "0:01:04", "remaining_time": "0:16:56", "throughput": 3467.95, "total_tokens": 222592}
|
|
{"current_steps": 455, "total_steps": 7577, "loss": 0.522, "lr": 1.1978891820580475e-06, "epoch": 0.06005015177510888, "percentage": 6.01, "elapsed_time": "0:01:04", "remaining_time": "0:16:49", "throughput": 3484.28, "total_tokens": 224768}
|
|
{"current_steps": 460, "total_steps": 7577, "loss": 0.2349, "lr": 1.2110817941952508e-06, "epoch": 0.060710043552857335, "percentage": 6.07, "elapsed_time": "0:01:04", "remaining_time": "0:16:43", "throughput": 3505.04, "total_tokens": 227264}
|
|
{"current_steps": 465, "total_steps": 7577, "loss": 0.2363, "lr": 1.2242744063324536e-06, "epoch": 0.06136993533060578, "percentage": 6.14, "elapsed_time": "0:01:05", "remaining_time": "0:16:36", "throughput": 3525.75, "total_tokens": 229760}
|
|
{"current_steps": 470, "total_steps": 7577, "loss": 0.1719, "lr": 1.237467018469657e-06, "epoch": 0.06202982710835423, "percentage": 6.2, "elapsed_time": "0:01:05", "remaining_time": "0:16:30", "throughput": 3542.62, "total_tokens": 232000}
|
|
{"current_steps": 475, "total_steps": 7577, "loss": 0.1564, "lr": 1.2506596306068602e-06, "epoch": 0.06268971888610268, "percentage": 6.27, "elapsed_time": "0:01:05", "remaining_time": "0:16:24", "throughput": 3558.15, "total_tokens": 234176}
|
|
{"current_steps": 480, "total_steps": 7577, "loss": 0.0993, "lr": 1.2638522427440632e-06, "epoch": 0.06334961066385113, "percentage": 6.33, "elapsed_time": "0:01:06", "remaining_time": "0:16:17", "throughput": 3579.16, "total_tokens": 236736}
|
|
{"current_steps": 485, "total_steps": 7577, "loss": 0.006, "lr": 1.2770448548812665e-06, "epoch": 0.06400950244159957, "percentage": 6.4, "elapsed_time": "0:01:06", "remaining_time": "0:16:11", "throughput": 3597.4, "total_tokens": 239104}
|
|
{"current_steps": 490, "total_steps": 7577, "loss": 0.3751, "lr": 1.2902374670184698e-06, "epoch": 0.06466939421934803, "percentage": 6.47, "elapsed_time": "0:01:06", "remaining_time": "0:16:06", "throughput": 3617.13, "total_tokens": 241600}
|
|
{"current_steps": 495, "total_steps": 7577, "loss": 0.31, "lr": 1.3034300791556726e-06, "epoch": 0.06532928599709648, "percentage": 6.53, "elapsed_time": "0:01:07", "remaining_time": "0:16:00", "throughput": 3637.68, "total_tokens": 244160}
|
|
{"current_steps": 500, "total_steps": 7577, "loss": 0.1775, "lr": 1.316622691292876e-06, "epoch": 0.06598917777484492, "percentage": 6.6, "elapsed_time": "0:01:07", "remaining_time": "0:15:54", "throughput": 3654.33, "total_tokens": 246464}
|
|
{"current_steps": 505, "total_steps": 7577, "loss": 0.0849, "lr": 1.3298153034300792e-06, "epoch": 0.06664906955259338, "percentage": 6.66, "elapsed_time": "0:01:07", "remaining_time": "0:15:49", "throughput": 3673.41, "total_tokens": 248960}
|
|
{"current_steps": 510, "total_steps": 7577, "loss": 0.1716, "lr": 1.3430079155672822e-06, "epoch": 0.06730896133034182, "percentage": 6.73, "elapsed_time": "0:01:08", "remaining_time": "0:15:43", "throughput": 3691.66, "total_tokens": 251392}
|
|
{"current_steps": 515, "total_steps": 7577, "loss": 0.2321, "lr": 1.3562005277044855e-06, "epoch": 0.06796885310809027, "percentage": 6.8, "elapsed_time": "0:01:08", "remaining_time": "0:15:38", "throughput": 3710.57, "total_tokens": 253888}
|
|
{"current_steps": 520, "total_steps": 7577, "loss": 0.1398, "lr": 1.3693931398416888e-06, "epoch": 0.06862874488583873, "percentage": 6.86, "elapsed_time": "0:01:08", "remaining_time": "0:15:32", "throughput": 3729.35, "total_tokens": 256384}
|
|
{"current_steps": 525, "total_steps": 7577, "loss": 0.1866, "lr": 1.3825857519788916e-06, "epoch": 0.06928863666358717, "percentage": 6.93, "elapsed_time": "0:01:09", "remaining_time": "0:15:27", "throughput": 3742.54, "total_tokens": 258496}
|
|
{"current_steps": 530, "total_steps": 7577, "loss": 0.1035, "lr": 1.3957783641160949e-06, "epoch": 0.06994852844133562, "percentage": 6.99, "elapsed_time": "0:01:09", "remaining_time": "0:15:22", "throughput": 3759.15, "total_tokens": 260864}
|
|
{"current_steps": 535, "total_steps": 7577, "loss": 0.0497, "lr": 1.4089709762532982e-06, "epoch": 0.07060842021908408, "percentage": 7.06, "elapsed_time": "0:01:09", "remaining_time": "0:15:17", "throughput": 3777.26, "total_tokens": 263360}
|
|
{"current_steps": 540, "total_steps": 7577, "loss": 0.3041, "lr": 1.4221635883905012e-06, "epoch": 0.07126831199683252, "percentage": 7.13, "elapsed_time": "0:01:10", "remaining_time": "0:15:12", "throughput": 3798.61, "total_tokens": 266112}
|
|
{"current_steps": 545, "total_steps": 7577, "loss": 0.2543, "lr": 1.4353562005277045e-06, "epoch": 0.07192820377458096, "percentage": 7.19, "elapsed_time": "0:01:10", "remaining_time": "0:15:08", "throughput": 3815.52, "total_tokens": 268544}
|
|
{"current_steps": 550, "total_steps": 7577, "loss": 0.4179, "lr": 1.4485488126649078e-06, "epoch": 0.07258809555232942, "percentage": 7.26, "elapsed_time": "0:01:10", "remaining_time": "0:15:03", "throughput": 3831.45, "total_tokens": 270912}
|
|
{"current_steps": 555, "total_steps": 7577, "loss": 0.0028, "lr": 1.4617414248021108e-06, "epoch": 0.07324798733007787, "percentage": 7.32, "elapsed_time": "0:01:11", "remaining_time": "0:14:58", "throughput": 3852.04, "total_tokens": 273664}
|
|
{"current_steps": 560, "total_steps": 7577, "loss": 0.1789, "lr": 1.4749340369393139e-06, "epoch": 0.07390787910782631, "percentage": 7.39, "elapsed_time": "0:01:11", "remaining_time": "0:14:54", "throughput": 3869.31, "total_tokens": 276160}
|
|
{"current_steps": 565, "total_steps": 7577, "loss": 0.0054, "lr": 1.4881266490765171e-06, "epoch": 0.07456777088557477, "percentage": 7.46, "elapsed_time": "0:01:11", "remaining_time": "0:14:49", "throughput": 3887.99, "total_tokens": 278784}
|
|
{"current_steps": 570, "total_steps": 7577, "loss": 0.2245, "lr": 1.5013192612137202e-06, "epoch": 0.07522766266332322, "percentage": 7.52, "elapsed_time": "0:01:12", "remaining_time": "0:14:45", "throughput": 3903.37, "total_tokens": 281152}
|
|
{"current_steps": 575, "total_steps": 7577, "loss": 0.2359, "lr": 1.5145118733509235e-06, "epoch": 0.07588755444107166, "percentage": 7.59, "elapsed_time": "0:01:12", "remaining_time": "0:14:41", "throughput": 3917.72, "total_tokens": 283456}
|
|
{"current_steps": 580, "total_steps": 7577, "loss": 0.0789, "lr": 1.5277044854881265e-06, "epoch": 0.07654744621882012, "percentage": 7.65, "elapsed_time": "0:01:12", "remaining_time": "0:14:36", "throughput": 3935.08, "total_tokens": 286016}
|
|
{"current_steps": 585, "total_steps": 7577, "loss": 0.1872, "lr": 1.5408970976253298e-06, "epoch": 0.07720733799656856, "percentage": 7.72, "elapsed_time": "0:01:13", "remaining_time": "0:14:32", "throughput": 3950.83, "total_tokens": 288448}
|
|
{"current_steps": 590, "total_steps": 7577, "loss": 0.1543, "lr": 1.5540897097625329e-06, "epoch": 0.07786722977431701, "percentage": 7.79, "elapsed_time": "0:01:13", "remaining_time": "0:14:28", "throughput": 3965.56, "total_tokens": 290816}
|
|
{"current_steps": 595, "total_steps": 7577, "loss": 0.2174, "lr": 1.567282321899736e-06, "epoch": 0.07852712155206547, "percentage": 7.85, "elapsed_time": "0:01:13", "remaining_time": "0:14:24", "throughput": 3984.19, "total_tokens": 293504}
|
|
{"current_steps": 600, "total_steps": 7577, "loss": 0.1323, "lr": 1.5804749340369392e-06, "epoch": 0.07918701332981391, "percentage": 7.92, "elapsed_time": "0:01:13", "remaining_time": "0:14:20", "throughput": 3997.1, "total_tokens": 295744}
|
|
{"current_steps": 605, "total_steps": 7577, "loss": 0.0865, "lr": 1.5936675461741425e-06, "epoch": 0.07984690510756236, "percentage": 7.98, "elapsed_time": "0:01:14", "remaining_time": "0:14:16", "throughput": 4011.34, "total_tokens": 298112}
|
|
{"current_steps": 610, "total_steps": 7577, "loss": 0.1034, "lr": 1.6068601583113455e-06, "epoch": 0.0805067968853108, "percentage": 8.05, "elapsed_time": "0:01:14", "remaining_time": "0:14:12", "throughput": 4027.06, "total_tokens": 300608}
|
|
{"current_steps": 615, "total_steps": 7577, "loss": 0.0048, "lr": 1.6200527704485488e-06, "epoch": 0.08116668866305926, "percentage": 8.12, "elapsed_time": "0:01:14", "remaining_time": "0:14:08", "throughput": 4045.74, "total_tokens": 303360}
|
|
{"current_steps": 620, "total_steps": 7577, "loss": 0.0325, "lr": 1.633245382585752e-06, "epoch": 0.0818265804408077, "percentage": 8.18, "elapsed_time": "0:01:15", "remaining_time": "0:14:05", "throughput": 4061.99, "total_tokens": 305920}
|
|
{"current_steps": 625, "total_steps": 7577, "loss": 0.0698, "lr": 1.646437994722955e-06, "epoch": 0.08248647221855615, "percentage": 8.25, "elapsed_time": "0:01:15", "remaining_time": "0:14:01", "throughput": 4077.33, "total_tokens": 308416}
|
|
{"current_steps": 630, "total_steps": 7577, "loss": 0.1542, "lr": 1.6596306068601582e-06, "epoch": 0.08314636399630461, "percentage": 8.31, "elapsed_time": "0:01:15", "remaining_time": "0:13:57", "throughput": 4091.92, "total_tokens": 310848}
|
|
{"current_steps": 635, "total_steps": 7577, "loss": 0.225, "lr": 1.6728232189973614e-06, "epoch": 0.08380625577405305, "percentage": 8.38, "elapsed_time": "0:01:16", "remaining_time": "0:13:54", "throughput": 4107.84, "total_tokens": 313408}
|
|
{"current_steps": 640, "total_steps": 7577, "loss": 0.0946, "lr": 1.6860158311345645e-06, "epoch": 0.0844661475518015, "percentage": 8.45, "elapsed_time": "0:01:16", "remaining_time": "0:13:50", "throughput": 4122.6, "total_tokens": 315904}
|
|
{"current_steps": 645, "total_steps": 7577, "loss": 0.2463, "lr": 1.6992084432717678e-06, "epoch": 0.08512603932954996, "percentage": 8.51, "elapsed_time": "0:01:16", "remaining_time": "0:13:47", "throughput": 4133.35, "total_tokens": 318080}
|
|
{"current_steps": 650, "total_steps": 7577, "loss": 0.0751, "lr": 1.712401055408971e-06, "epoch": 0.0857859311072984, "percentage": 8.58, "elapsed_time": "0:01:17", "remaining_time": "0:13:43", "throughput": 4143.77, "total_tokens": 320256}
|
|
{"current_steps": 655, "total_steps": 7577, "loss": 0.0116, "lr": 1.7255936675461739e-06, "epoch": 0.08644582288504685, "percentage": 8.64, "elapsed_time": "0:01:17", "remaining_time": "0:13:40", "throughput": 4154.73, "total_tokens": 322496}
|
|
{"current_steps": 660, "total_steps": 7577, "loss": 0.1616, "lr": 1.7387862796833772e-06, "epoch": 0.0871057146627953, "percentage": 8.71, "elapsed_time": "0:01:17", "remaining_time": "0:13:37", "throughput": 4170.4, "total_tokens": 325120}
|
|
{"current_steps": 665, "total_steps": 7577, "loss": 0.0802, "lr": 1.7519788918205804e-06, "epoch": 0.08776560644054375, "percentage": 8.78, "elapsed_time": "0:01:18", "remaining_time": "0:13:33", "throughput": 4180.87, "total_tokens": 327296}
|
|
{"current_steps": 670, "total_steps": 7577, "loss": 0.4002, "lr": 1.7651715039577835e-06, "epoch": 0.0884254982182922, "percentage": 8.84, "elapsed_time": "0:01:18", "remaining_time": "0:13:30", "throughput": 4193.44, "total_tokens": 329664}
|
|
{"current_steps": 675, "total_steps": 7577, "loss": 0.0953, "lr": 1.7783641160949868e-06, "epoch": 0.08908538999604065, "percentage": 8.91, "elapsed_time": "0:01:18", "remaining_time": "0:13:27", "throughput": 4209.8, "total_tokens": 332416}
|
|
{"current_steps": 680, "total_steps": 7577, "loss": 0.0779, "lr": 1.79155672823219e-06, "epoch": 0.0897452817737891, "percentage": 8.97, "elapsed_time": "0:01:19", "remaining_time": "0:13:24", "throughput": 4224.17, "total_tokens": 334976}
|
|
{"current_steps": 685, "total_steps": 7577, "loss": 0.0366, "lr": 1.8047493403693929e-06, "epoch": 0.09040517355153754, "percentage": 9.04, "elapsed_time": "0:01:19", "remaining_time": "0:13:21", "throughput": 4237.58, "total_tokens": 337472}
|
|
{"current_steps": 690, "total_steps": 7577, "loss": 0.1887, "lr": 1.8179419525065961e-06, "epoch": 0.091065065329286, "percentage": 9.11, "elapsed_time": "0:01:19", "remaining_time": "0:13:18", "throughput": 4250.27, "total_tokens": 339904}
|
|
{"current_steps": 695, "total_steps": 7577, "loss": 0.2216, "lr": 1.8311345646437994e-06, "epoch": 0.09172495710703445, "percentage": 9.17, "elapsed_time": "0:01:20", "remaining_time": "0:13:15", "throughput": 4262.42, "total_tokens": 342272}
|
|
{"current_steps": 700, "total_steps": 7577, "loss": 0.197, "lr": 1.8443271767810025e-06, "epoch": 0.09238484888478289, "percentage": 9.24, "elapsed_time": "0:01:20", "remaining_time": "0:13:12", "throughput": 4274.6, "total_tokens": 344640}
|
|
{"current_steps": 705, "total_steps": 7577, "loss": 0.0457, "lr": 1.8575197889182057e-06, "epoch": 0.09304474066253135, "percentage": 9.3, "elapsed_time": "0:01:20", "remaining_time": "0:13:09", "throughput": 4287.28, "total_tokens": 347072}
|
|
{"current_steps": 710, "total_steps": 7577, "loss": 0.1862, "lr": 1.870712401055409e-06, "epoch": 0.0937046324402798, "percentage": 9.37, "elapsed_time": "0:01:21", "remaining_time": "0:13:06", "throughput": 4302.16, "total_tokens": 349696}
|
|
{"current_steps": 715, "total_steps": 7577, "loss": 0.0762, "lr": 1.883905013192612e-06, "epoch": 0.09436452421802824, "percentage": 9.44, "elapsed_time": "0:01:21", "remaining_time": "0:13:03", "throughput": 4316.18, "total_tokens": 352256}
|
|
{"current_steps": 720, "total_steps": 7577, "loss": 0.1502, "lr": 1.8970976253298151e-06, "epoch": 0.0950244159957767, "percentage": 9.5, "elapsed_time": "0:01:21", "remaining_time": "0:13:00", "throughput": 4332.17, "total_tokens": 355008}
|
|
{"current_steps": 725, "total_steps": 7577, "loss": 0.2955, "lr": 1.9102902374670186e-06, "epoch": 0.09568430777352514, "percentage": 9.57, "elapsed_time": "0:01:22", "remaining_time": "0:12:57", "throughput": 4343.82, "total_tokens": 357376}
|
|
{"current_steps": 730, "total_steps": 7577, "loss": 0.1803, "lr": 1.9234828496042215e-06, "epoch": 0.09634419955127359, "percentage": 9.63, "elapsed_time": "0:01:22", "remaining_time": "0:12:54", "throughput": 4354.69, "total_tokens": 359680}
|
|
{"current_steps": 735, "total_steps": 7577, "loss": 0.1343, "lr": 1.9366754617414247e-06, "epoch": 0.09700409132902205, "percentage": 9.7, "elapsed_time": "0:01:22", "remaining_time": "0:12:51", "throughput": 4367.4, "total_tokens": 362176}
|
|
{"current_steps": 740, "total_steps": 7577, "loss": 0.0823, "lr": 1.949868073878628e-06, "epoch": 0.09766398310677049, "percentage": 9.77, "elapsed_time": "0:01:23", "remaining_time": "0:12:49", "throughput": 4384.26, "total_tokens": 365056}
|
|
{"current_steps": 745, "total_steps": 7577, "loss": 0.3164, "lr": 1.963060686015831e-06, "epoch": 0.09832387488451894, "percentage": 9.83, "elapsed_time": "0:01:23", "remaining_time": "0:12:46", "throughput": 4396.17, "total_tokens": 367488}
|
|
{"current_steps": 750, "total_steps": 7577, "loss": 0.3437, "lr": 1.976253298153034e-06, "epoch": 0.0989837666622674, "percentage": 9.9, "elapsed_time": "0:01:23", "remaining_time": "0:12:43", "throughput": 4406.74, "total_tokens": 369792}
|
|
{"current_steps": 755, "total_steps": 7577, "loss": 0.1097, "lr": 1.9894459102902374e-06, "epoch": 0.09964365844001584, "percentage": 9.96, "elapsed_time": "0:01:24", "remaining_time": "0:12:41", "throughput": 4417.76, "total_tokens": 372160}
|
|
{"current_steps": 758, "total_steps": 7577, "eval_loss": 0.13667261600494385, "epoch": 0.1000395935066649, "percentage": 10.0, "elapsed_time": "0:01:32", "remaining_time": "0:13:50", "throughput": 4045.07, "total_tokens": 373504}
|
|
{"current_steps": 760, "total_steps": 7577, "loss": 0.1005, "lr": 1.9999998938723955e-06, "epoch": 0.10030355021776428, "percentage": 10.03, "elapsed_time": "0:01:59", "remaining_time": "0:17:51", "throughput": 3133.36, "total_tokens": 374272}
|
|
{"current_steps": 765, "total_steps": 7577, "loss": 0.0727, "lr": 1.9999961794086063e-06, "epoch": 0.10096344199551274, "percentage": 10.1, "elapsed_time": "0:01:59", "remaining_time": "0:17:46", "throughput": 3145.09, "total_tokens": 376704}
|
|
{"current_steps": 770, "total_steps": 7577, "loss": 0.287, "lr": 1.999987158587122e-06, "epoch": 0.10162333377326119, "percentage": 10.16, "elapsed_time": "0:02:00", "remaining_time": "0:17:41", "throughput": 3156.71, "total_tokens": 379136}
|
|
{"current_steps": 775, "total_steps": 7577, "loss": 0.1199, "lr": 1.9999728314558114e-06, "epoch": 0.10228322555100963, "percentage": 10.23, "elapsed_time": "0:02:00", "remaining_time": "0:17:37", "throughput": 3168.31, "total_tokens": 381568}
|
|
{"current_steps": 780, "total_steps": 7577, "loss": 0.2967, "lr": 1.9999531980906988e-06, "epoch": 0.10294311732875808, "percentage": 10.29, "elapsed_time": "0:02:00", "remaining_time": "0:17:32", "throughput": 3180.81, "total_tokens": 384128}
|
|
{"current_steps": 785, "total_steps": 7577, "loss": 0.227, "lr": 1.999928258595967e-06, "epoch": 0.10360300910650654, "percentage": 10.36, "elapsed_time": "0:02:01", "remaining_time": "0:17:27", "throughput": 3190.19, "total_tokens": 386304}
|
|
{"current_steps": 790, "total_steps": 7577, "loss": 0.3706, "lr": 1.9998980131039534e-06, "epoch": 0.10426290088425498, "percentage": 10.43, "elapsed_time": "0:02:01", "remaining_time": "0:17:23", "throughput": 3202.59, "total_tokens": 388864}
|
|
{"current_steps": 795, "total_steps": 7577, "loss": 0.0609, "lr": 1.999862461775153e-06, "epoch": 0.10492279266200343, "percentage": 10.49, "elapsed_time": "0:02:01", "remaining_time": "0:17:18", "throughput": 3212.4, "total_tokens": 391104}
|
|
{"current_steps": 800, "total_steps": 7577, "loss": 0.1122, "lr": 1.999821604798214e-06, "epoch": 0.10558268443975188, "percentage": 10.56, "elapsed_time": "0:02:02", "remaining_time": "0:17:14", "throughput": 3226.19, "total_tokens": 393856}
|
|
{"current_steps": 805, "total_steps": 7577, "loss": 0.2655, "lr": 1.999775442389939e-06, "epoch": 0.10624257621750033, "percentage": 10.62, "elapsed_time": "0:02:02", "remaining_time": "0:17:09", "throughput": 3237.95, "total_tokens": 396352}
|
|
{"current_steps": 810, "total_steps": 7577, "loss": 0.1068, "lr": 1.9997239747952843e-06, "epoch": 0.10690246799524877, "percentage": 10.69, "elapsed_time": "0:02:02", "remaining_time": "0:17:05", "throughput": 3247.63, "total_tokens": 398592}
|
|
{"current_steps": 815, "total_steps": 7577, "loss": 0.0669, "lr": 1.9996672022873546e-06, "epoch": 0.10756235977299723, "percentage": 10.76, "elapsed_time": "0:02:03", "remaining_time": "0:17:01", "throughput": 3259.32, "total_tokens": 401088}
|
|
{"current_steps": 820, "total_steps": 7577, "loss": 0.1053, "lr": 1.9996051251674073e-06, "epoch": 0.10822225155074568, "percentage": 10.82, "elapsed_time": "0:02:03", "remaining_time": "0:16:56", "throughput": 3269.92, "total_tokens": 403456}
|
|
{"current_steps": 825, "total_steps": 7577, "loss": 0.3053, "lr": 1.999537743764847e-06, "epoch": 0.10888214332849412, "percentage": 10.89, "elapsed_time": "0:02:03", "remaining_time": "0:16:52", "throughput": 3279.53, "total_tokens": 405696}
|
|
{"current_steps": 830, "total_steps": 7577, "loss": 0.4762, "lr": 1.999465058437225e-06, "epoch": 0.10954203510624258, "percentage": 10.95, "elapsed_time": "0:02:04", "remaining_time": "0:16:48", "throughput": 3290.54, "total_tokens": 408128}
|
|
{"current_steps": 835, "total_steps": 7577, "loss": 0.0134, "lr": 1.9993870695702364e-06, "epoch": 0.11020192688399102, "percentage": 11.02, "elapsed_time": "0:02:04", "remaining_time": "0:16:44", "throughput": 3304.8, "total_tokens": 411008}
|
|
{"current_steps": 840, "total_steps": 7577, "loss": 0.3147, "lr": 1.9993037775777206e-06, "epoch": 0.11086181866173947, "percentage": 11.09, "elapsed_time": "0:02:04", "remaining_time": "0:16:40", "throughput": 3314.73, "total_tokens": 413312}
|
|
{"current_steps": 845, "total_steps": 7577, "loss": 0.1734, "lr": 1.999215182901656e-06, "epoch": 0.11152171043948793, "percentage": 11.15, "elapsed_time": "0:02:05", "remaining_time": "0:16:35", "throughput": 3324.6, "total_tokens": 415616}
|
|
{"current_steps": 850, "total_steps": 7577, "loss": 0.23, "lr": 1.9991212860121587e-06, "epoch": 0.11218160221723637, "percentage": 11.22, "elapsed_time": "0:02:05", "remaining_time": "0:16:31", "throughput": 3337.74, "total_tokens": 418368}
|
|
{"current_steps": 855, "total_steps": 7577, "loss": 0.0658, "lr": 1.999022087407482e-06, "epoch": 0.11284149399498482, "percentage": 11.28, "elapsed_time": "0:02:05", "remaining_time": "0:16:28", "throughput": 3348.94, "total_tokens": 420864}
|
|
{"current_steps": 860, "total_steps": 7577, "loss": 0.2781, "lr": 1.998917587614011e-06, "epoch": 0.11350138577273328, "percentage": 11.35, "elapsed_time": "0:02:05", "remaining_time": "0:16:24", "throughput": 3357.7, "total_tokens": 423040}
|
|
{"current_steps": 865, "total_steps": 7577, "loss": 0.3602, "lr": 1.9988077871862615e-06, "epoch": 0.11416127755048172, "percentage": 11.42, "elapsed_time": "0:02:06", "remaining_time": "0:16:20", "throughput": 3367.39, "total_tokens": 425344}
|
|
{"current_steps": 870, "total_steps": 7577, "loss": 0.0011, "lr": 1.9986926867068752e-06, "epoch": 0.11482116932823017, "percentage": 11.48, "elapsed_time": "0:02:06", "remaining_time": "0:16:16", "throughput": 3379.33, "total_tokens": 427968}
|
|
{"current_steps": 875, "total_steps": 7577, "loss": 0.3594, "lr": 1.998572286786619e-06, "epoch": 0.11548106110597862, "percentage": 11.55, "elapsed_time": "0:02:06", "remaining_time": "0:16:12", "throughput": 3391.15, "total_tokens": 430592}
|
|
{"current_steps": 880, "total_steps": 7577, "loss": 0.2425, "lr": 1.9984465880643807e-06, "epoch": 0.11614095288372707, "percentage": 11.61, "elapsed_time": "0:02:07", "remaining_time": "0:16:08", "throughput": 3402.56, "total_tokens": 433152}
|
|
{"current_steps": 885, "total_steps": 7577, "loss": 0.0489, "lr": 1.998315591207165e-06, "epoch": 0.11680084466147551, "percentage": 11.68, "elapsed_time": "0:02:07", "remaining_time": "0:16:05", "throughput": 3412.04, "total_tokens": 435456}
|
|
{"current_steps": 890, "total_steps": 7577, "loss": 0.1876, "lr": 1.9981792969100912e-06, "epoch": 0.11746073643922397, "percentage": 11.75, "elapsed_time": "0:02:07", "remaining_time": "0:16:01", "throughput": 3423.78, "total_tokens": 438080}
|
|
{"current_steps": 895, "total_steps": 7577, "loss": 0.2044, "lr": 1.9980377058963875e-06, "epoch": 0.11812062821697242, "percentage": 11.81, "elapsed_time": "0:02:08", "remaining_time": "0:15:57", "throughput": 3434.97, "total_tokens": 440640}
|
|
{"current_steps": 900, "total_steps": 7577, "loss": 0.028, "lr": 1.99789081891739e-06, "epoch": 0.11878051999472086, "percentage": 11.88, "elapsed_time": "0:02:08", "remaining_time": "0:15:54", "throughput": 3444.78, "total_tokens": 443008}
|
|
{"current_steps": 905, "total_steps": 7577, "loss": 0.1773, "lr": 1.997738636752536e-06, "epoch": 0.11944041177246932, "percentage": 11.94, "elapsed_time": "0:02:08", "remaining_time": "0:15:50", "throughput": 3454.0, "total_tokens": 445312}
|
|
{"current_steps": 910, "total_steps": 7577, "loss": 0.014, "lr": 1.9975811602093624e-06, "epoch": 0.12010030355021777, "percentage": 12.01, "elapsed_time": "0:02:09", "remaining_time": "0:15:46", "throughput": 3463.72, "total_tokens": 447680}
|
|
{"current_steps": 915, "total_steps": 7577, "loss": 0.2574, "lr": 1.9974183901234984e-06, "epoch": 0.12076019532796621, "percentage": 12.08, "elapsed_time": "0:02:09", "remaining_time": "0:15:43", "throughput": 3475.58, "total_tokens": 450368}
|
|
{"current_steps": 920, "total_steps": 7577, "loss": 0.1599, "lr": 1.997250327358664e-06, "epoch": 0.12142008710571467, "percentage": 12.14, "elapsed_time": "0:02:09", "remaining_time": "0:15:39", "throughput": 3485.61, "total_tokens": 452800}
|
|
{"current_steps": 925, "total_steps": 7577, "loss": 0.023, "lr": 1.997076972806664e-06, "epoch": 0.12207997888346311, "percentage": 12.21, "elapsed_time": "0:02:10", "remaining_time": "0:15:36", "throughput": 3499.19, "total_tokens": 455744}
|
|
{"current_steps": 930, "total_steps": 7577, "loss": 0.201, "lr": 1.9968983273873827e-06, "epoch": 0.12273987066121156, "percentage": 12.27, "elapsed_time": "0:02:10", "remaining_time": "0:15:33", "throughput": 3509.11, "total_tokens": 458176}
|
|
{"current_steps": 935, "total_steps": 7577, "loss": 0.0808, "lr": 1.99671439204878e-06, "epoch": 0.12339976243896002, "percentage": 12.34, "elapsed_time": "0:02:10", "remaining_time": "0:15:29", "throughput": 3518.09, "total_tokens": 460480}
|
|
{"current_steps": 940, "total_steps": 7577, "loss": 0.1766, "lr": 1.9965251677668873e-06, "epoch": 0.12405965421670846, "percentage": 12.41, "elapsed_time": "0:02:11", "remaining_time": "0:15:26", "throughput": 3526.1, "total_tokens": 462656}
|
|
{"current_steps": 945, "total_steps": 7577, "loss": 0.3062, "lr": 1.9963306555458e-06, "epoch": 0.1247195459944569, "percentage": 12.47, "elapsed_time": "0:02:11", "remaining_time": "0:15:23", "throughput": 3537.67, "total_tokens": 465344}
|
|
{"current_steps": 950, "total_steps": 7577, "loss": 0.4272, "lr": 1.9961308564176723e-06, "epoch": 0.12537943777220537, "percentage": 12.54, "elapsed_time": "0:02:11", "remaining_time": "0:15:19", "throughput": 3546.92, "total_tokens": 467712}
|
|
{"current_steps": 955, "total_steps": 7577, "loss": 0.223, "lr": 1.9959257714427147e-06, "epoch": 0.1260393295499538, "percentage": 12.6, "elapsed_time": "0:02:12", "remaining_time": "0:15:16", "throughput": 3556.18, "total_tokens": 470080}
|
|
{"current_steps": 960, "total_steps": 7577, "loss": 0.1762, "lr": 1.995715401709186e-06, "epoch": 0.12669922132770225, "percentage": 12.67, "elapsed_time": "0:02:12", "remaining_time": "0:15:13", "throughput": 3565.81, "total_tokens": 472512}
|
|
{"current_steps": 965, "total_steps": 7577, "loss": 0.1271, "lr": 1.995499748333387e-06, "epoch": 0.1273591131054507, "percentage": 12.74, "elapsed_time": "0:02:12", "remaining_time": "0:15:10", "throughput": 3574.05, "total_tokens": 474752}
|
|
{"current_steps": 970, "total_steps": 7577, "loss": 0.1233, "lr": 1.9952788124596555e-06, "epoch": 0.12801900488319914, "percentage": 12.8, "elapsed_time": "0:02:13", "remaining_time": "0:15:07", "throughput": 3585.31, "total_tokens": 477440}
|
|
{"current_steps": 975, "total_steps": 7577, "loss": 0.1272, "lr": 1.9950525952603617e-06, "epoch": 0.12867889666094762, "percentage": 12.87, "elapsed_time": "0:02:13", "remaining_time": "0:15:03", "throughput": 3595.66, "total_tokens": 480000}
|
|
{"current_steps": 980, "total_steps": 7577, "loss": 0.1456, "lr": 1.994821097935899e-06, "epoch": 0.12933878843869606, "percentage": 12.93, "elapsed_time": "0:02:13", "remaining_time": "0:15:00", "throughput": 3604.62, "total_tokens": 482368}
|
|
{"current_steps": 985, "total_steps": 7577, "loss": 0.0077, "lr": 1.9945843217146804e-06, "epoch": 0.1299986802164445, "percentage": 13.0, "elapsed_time": "0:02:14", "remaining_time": "0:14:57", "throughput": 3612.14, "total_tokens": 484544}
|
|
{"current_steps": 990, "total_steps": 7577, "loss": 0.0852, "lr": 1.9943422678531293e-06, "epoch": 0.13065857199419295, "percentage": 13.07, "elapsed_time": "0:02:14", "remaining_time": "0:14:54", "throughput": 3619.72, "total_tokens": 486720}
|
|
{"current_steps": 995, "total_steps": 7577, "loss": 0.0741, "lr": 1.994094937635675e-06, "epoch": 0.1313184637719414, "percentage": 13.13, "elapsed_time": "0:02:14", "remaining_time": "0:14:51", "throughput": 3630.28, "total_tokens": 489344}
|
|
{"current_steps": 1000, "total_steps": 7577, "loss": 0.2262, "lr": 1.9938423323747457e-06, "epoch": 0.13197835554968984, "percentage": 13.2, "elapsed_time": "0:02:15", "remaining_time": "0:14:48", "throughput": 3639.51, "total_tokens": 491776}
|
|
{"current_steps": 1005, "total_steps": 7577, "loss": 0.1184, "lr": 1.99358445341076e-06, "epoch": 0.1326382473274383, "percentage": 13.26, "elapsed_time": "0:02:15", "remaining_time": "0:14:45", "throughput": 3646.96, "total_tokens": 493952}
|
|
{"current_steps": 1010, "total_steps": 7577, "loss": 0.2204, "lr": 1.993321302112121e-06, "epoch": 0.13329813910518676, "percentage": 13.33, "elapsed_time": "0:02:15", "remaining_time": "0:14:42", "throughput": 3655.59, "total_tokens": 496320}
|
|
{"current_steps": 1015, "total_steps": 7577, "loss": 0.0399, "lr": 1.993052879875209e-06, "epoch": 0.1339580308829352, "percentage": 13.4, "elapsed_time": "0:02:16", "remaining_time": "0:14:39", "throughput": 3662.89, "total_tokens": 498496}
|
|
{"current_steps": 1020, "total_steps": 7577, "loss": 0.2953, "lr": 1.992779188124374e-06, "epoch": 0.13461792266068365, "percentage": 13.46, "elapsed_time": "0:02:16", "remaining_time": "0:14:36", "throughput": 3672.83, "total_tokens": 501056}
|
|
{"current_steps": 1025, "total_steps": 7577, "loss": 0.1176, "lr": 1.992500228311928e-06, "epoch": 0.1352778144384321, "percentage": 13.53, "elapsed_time": "0:02:16", "remaining_time": "0:14:34", "throughput": 3680.58, "total_tokens": 503296}
|
|
{"current_steps": 1030, "total_steps": 7577, "loss": 0.2695, "lr": 1.9922160019181372e-06, "epoch": 0.13593770621618054, "percentage": 13.59, "elapsed_time": "0:02:17", "remaining_time": "0:14:31", "throughput": 3690.49, "total_tokens": 505856}
|
|
{"current_steps": 1035, "total_steps": 7577, "loss": 0.0981, "lr": 1.9919265104512138e-06, "epoch": 0.13659759799392898, "percentage": 13.66, "elapsed_time": "0:02:17", "remaining_time": "0:14:28", "throughput": 3700.3, "total_tokens": 508416}
|
|
{"current_steps": 1040, "total_steps": 7577, "loss": 0.2986, "lr": 1.9916317554473094e-06, "epoch": 0.13725748977167745, "percentage": 13.73, "elapsed_time": "0:02:17", "remaining_time": "0:14:25", "throughput": 3710.44, "total_tokens": 511040}
|
|
{"current_steps": 1045, "total_steps": 7577, "loss": 0.115, "lr": 1.9913317384705052e-06, "epoch": 0.1379173815494259, "percentage": 13.79, "elapsed_time": "0:02:18", "remaining_time": "0:14:22", "throughput": 3717.57, "total_tokens": 513216}
|
|
{"current_steps": 1050, "total_steps": 7577, "loss": 0.0893, "lr": 1.991026461112805e-06, "epoch": 0.13857727332717434, "percentage": 13.86, "elapsed_time": "0:02:18", "remaining_time": "0:14:20", "throughput": 3725.14, "total_tokens": 515456}
|
|
{"current_steps": 1055, "total_steps": 7577, "loss": 0.1591, "lr": 1.9907159249941257e-06, "epoch": 0.1392371651049228, "percentage": 13.92, "elapsed_time": "0:02:18", "remaining_time": "0:14:17", "throughput": 3733.3, "total_tokens": 517824}
|
|
{"current_steps": 1060, "total_steps": 7577, "loss": 0.0627, "lr": 1.990400131762289e-06, "epoch": 0.13989705688267123, "percentage": 13.99, "elapsed_time": "0:02:19", "remaining_time": "0:14:14", "throughput": 3742.5, "total_tokens": 520320}
|
|
{"current_steps": 1065, "total_steps": 7577, "loss": 0.1068, "lr": 1.9900790830930134e-06, "epoch": 0.14055694866041968, "percentage": 14.06, "elapsed_time": "0:02:19", "remaining_time": "0:14:12", "throughput": 3751.22, "total_tokens": 522752}
|
|
{"current_steps": 1070, "total_steps": 7577, "loss": 0.1066, "lr": 1.9897527806899047e-06, "epoch": 0.14121684043816815, "percentage": 14.12, "elapsed_time": "0:02:19", "remaining_time": "0:14:09", "throughput": 3761.09, "total_tokens": 525376}
|
|
{"current_steps": 1075, "total_steps": 7577, "loss": 0.2544, "lr": 1.9894212262844465e-06, "epoch": 0.1418767322159166, "percentage": 14.19, "elapsed_time": "0:02:20", "remaining_time": "0:14:06", "throughput": 3769.79, "total_tokens": 527808}
|
|
{"current_steps": 1080, "total_steps": 7577, "loss": 0.1578, "lr": 1.989084421635992e-06, "epoch": 0.14253662399366504, "percentage": 14.25, "elapsed_time": "0:02:20", "remaining_time": "0:14:04", "throughput": 3778.82, "total_tokens": 530304}
|
|
{"current_steps": 1085, "total_steps": 7577, "loss": 0.2733, "lr": 1.988742368531754e-06, "epoch": 0.14319651577141349, "percentage": 14.32, "elapsed_time": "0:02:20", "remaining_time": "0:14:01", "throughput": 3785.63, "total_tokens": 532480}
|
|
{"current_steps": 1090, "total_steps": 7577, "loss": 0.0745, "lr": 1.9883950687867947e-06, "epoch": 0.14385640754916193, "percentage": 14.39, "elapsed_time": "0:02:20", "remaining_time": "0:13:59", "throughput": 3795.87, "total_tokens": 535168}
|
|
{"current_steps": 1095, "total_steps": 7577, "loss": 0.1377, "lr": 1.9880425242440187e-06, "epoch": 0.14451629932691037, "percentage": 14.45, "elapsed_time": "0:02:21", "remaining_time": "0:13:56", "throughput": 3804.28, "total_tokens": 537600}
|
|
{"current_steps": 1100, "total_steps": 7577, "loss": 0.0585, "lr": 1.9876847367741607e-06, "epoch": 0.14517619110465885, "percentage": 14.52, "elapsed_time": "0:02:21", "remaining_time": "0:13:54", "throughput": 3813.14, "total_tokens": 540096}
|
|
{"current_steps": 1105, "total_steps": 7577, "loss": 0.1719, "lr": 1.987321708275776e-06, "epoch": 0.1458360828824073, "percentage": 14.58, "elapsed_time": "0:02:21", "remaining_time": "0:13:51", "throughput": 3821.95, "total_tokens": 542592}
|
|
{"current_steps": 1110, "total_steps": 7577, "loss": 0.0895, "lr": 1.986953440675231e-06, "epoch": 0.14649597466015574, "percentage": 14.65, "elapsed_time": "0:02:22", "remaining_time": "0:13:49", "throughput": 3829.86, "total_tokens": 544960}
|
|
{"current_steps": 1115, "total_steps": 7577, "loss": 0.1572, "lr": 1.9865799359266925e-06, "epoch": 0.14715586643790418, "percentage": 14.72, "elapsed_time": "0:02:22", "remaining_time": "0:13:46", "throughput": 3836.51, "total_tokens": 547136}
|
|
{"current_steps": 1120, "total_steps": 7577, "loss": 0.1272, "lr": 1.986201196012118e-06, "epoch": 0.14781575821565263, "percentage": 14.78, "elapsed_time": "0:02:22", "remaining_time": "0:13:44", "throughput": 3843.95, "total_tokens": 549440}
|
|
{"current_steps": 1125, "total_steps": 7577, "loss": 0.2212, "lr": 1.985817222941245e-06, "epoch": 0.14847564999340107, "percentage": 14.85, "elapsed_time": "0:02:23", "remaining_time": "0:13:41", "throughput": 3853.41, "total_tokens": 552064}
|
|
{"current_steps": 1130, "total_steps": 7577, "loss": 0.0754, "lr": 1.9854280187515794e-06, "epoch": 0.14913554177114954, "percentage": 14.91, "elapsed_time": "0:02:23", "remaining_time": "0:13:39", "throughput": 3861.16, "total_tokens": 554432}
|
|
{"current_steps": 1135, "total_steps": 7577, "loss": 0.0564, "lr": 1.985033585508386e-06, "epoch": 0.149795433548898, "percentage": 14.98, "elapsed_time": "0:02:23", "remaining_time": "0:13:36", "throughput": 3868.96, "total_tokens": 556800}
|
|
{"current_steps": 1137, "total_steps": 7577, "eval_loss": 0.16564705967903137, "epoch": 0.15005939025999737, "percentage": 15.01, "elapsed_time": "0:02:31", "remaining_time": "0:14:20", "throughput": 3673.32, "total_tokens": 557824}
|
|
{"current_steps": 1140, "total_steps": 7577, "loss": 0.5119, "lr": 1.9846339253046766e-06, "epoch": 0.15045532532664643, "percentage": 15.05, "elapsed_time": "0:03:44", "remaining_time": "0:21:07", "throughput": 2492.38, "total_tokens": 559296}
|
|
{"current_steps": 1145, "total_steps": 7577, "loss": 0.163, "lr": 1.984229040261199e-06, "epoch": 0.15111521710439488, "percentage": 15.11, "elapsed_time": "0:03:44", "remaining_time": "0:21:02", "throughput": 2501.16, "total_tokens": 562112}
|
|
{"current_steps": 1150, "total_steps": 7577, "loss": 0.1812, "lr": 1.9838189325264263e-06, "epoch": 0.15177510888214332, "percentage": 15.18, "elapsed_time": "0:03:45", "remaining_time": "0:20:57", "throughput": 2507.16, "total_tokens": 564288}
|
|
{"current_steps": 1155, "total_steps": 7577, "loss": 0.033, "lr": 1.983403604276546e-06, "epoch": 0.15243500065989177, "percentage": 15.24, "elapsed_time": "0:03:45", "remaining_time": "0:20:53", "throughput": 2514.85, "total_tokens": 566848}
|
|
{"current_steps": 1160, "total_steps": 7577, "loss": 0.3099, "lr": 1.9829830577154457e-06, "epoch": 0.15309489243764024, "percentage": 15.31, "elapsed_time": "0:03:45", "remaining_time": "0:20:48", "throughput": 2521.4, "total_tokens": 569152}
|
|
{"current_steps": 1165, "total_steps": 7577, "loss": 0.0496, "lr": 1.982557295074705e-06, "epoch": 0.15375478421538868, "percentage": 15.38, "elapsed_time": "0:03:46", "remaining_time": "0:20:44", "throughput": 2527.97, "total_tokens": 571456}
|
|
{"current_steps": 1170, "total_steps": 7577, "loss": 0.0902, "lr": 1.982126318613581e-06, "epoch": 0.15441467599313713, "percentage": 15.44, "elapsed_time": "0:03:46", "remaining_time": "0:20:39", "throughput": 2534.81, "total_tokens": 573824}
|
|
{"current_steps": 1175, "total_steps": 7577, "loss": 0.0556, "lr": 1.9816901306189977e-06, "epoch": 0.15507456777088557, "percentage": 15.51, "elapsed_time": "0:03:46", "remaining_time": "0:20:35", "throughput": 2541.32, "total_tokens": 576128}
|
|
{"current_steps": 1180, "total_steps": 7577, "loss": 0.1048, "lr": 1.9812487334055342e-06, "epoch": 0.15573445954863402, "percentage": 15.57, "elapsed_time": "0:03:47", "remaining_time": "0:20:30", "throughput": 2547.85, "total_tokens": 578432}
|
|
{"current_steps": 1185, "total_steps": 7577, "loss": 0.1522, "lr": 1.98080212931541e-06, "epoch": 0.15639435132638246, "percentage": 15.64, "elapsed_time": "0:03:47", "remaining_time": "0:20:26", "throughput": 2554.34, "total_tokens": 580736}
|
|
{"current_steps": 1190, "total_steps": 7577, "loss": 0.0609, "lr": 1.980350320718476e-06, "epoch": 0.15705424310413094, "percentage": 15.71, "elapsed_time": "0:03:47", "remaining_time": "0:20:21", "throughput": 2560.82, "total_tokens": 583040}
|
|
{"current_steps": 1195, "total_steps": 7577, "loss": 0.0032, "lr": 1.9798933100121985e-06, "epoch": 0.15771413488187938, "percentage": 15.77, "elapsed_time": "0:03:48", "remaining_time": "0:20:17", "throughput": 2567.28, "total_tokens": 585344}
|
|
{"current_steps": 1200, "total_steps": 7577, "loss": 0.0896, "lr": 1.97943109962165e-06, "epoch": 0.15837402665962783, "percentage": 15.84, "elapsed_time": "0:03:48", "remaining_time": "0:20:13", "throughput": 2574.78, "total_tokens": 587904}
|
|
{"current_steps": 1205, "total_steps": 7577, "loss": 0.1423, "lr": 1.978963691999493e-06, "epoch": 0.15903391843737627, "percentage": 15.9, "elapsed_time": "0:03:48", "remaining_time": "0:20:09", "throughput": 2581.2, "total_tokens": 590208}
|
|
{"current_steps": 1210, "total_steps": 7577, "loss": 0.2118, "lr": 1.978491089625969e-06, "epoch": 0.15969381021512472, "percentage": 15.97, "elapsed_time": "0:03:48", "remaining_time": "0:20:04", "throughput": 2587.62, "total_tokens": 592512}
|
|
{"current_steps": 1215, "total_steps": 7577, "loss": 0.2427, "lr": 1.9780132950088854e-06, "epoch": 0.16035370199287316, "percentage": 16.04, "elapsed_time": "0:03:49", "remaining_time": "0:20:00", "throughput": 2595.04, "total_tokens": 595072}
|
|
{"current_steps": 1220, "total_steps": 7577, "loss": 0.3134, "lr": 1.9775303106836e-06, "epoch": 0.1610135937706216, "percentage": 16.1, "elapsed_time": "0:03:49", "remaining_time": "0:19:56", "throughput": 2602.45, "total_tokens": 597632}
|
|
{"current_steps": 1225, "total_steps": 7577, "loss": 0.117, "lr": 1.977042139213011e-06, "epoch": 0.16167348554837008, "percentage": 16.17, "elapsed_time": "0:03:49", "remaining_time": "0:19:52", "throughput": 2609.85, "total_tokens": 600192}
|
|
{"current_steps": 1230, "total_steps": 7577, "loss": 0.1205, "lr": 1.9765487831875404e-06, "epoch": 0.16233337732611852, "percentage": 16.23, "elapsed_time": "0:03:50", "remaining_time": "0:19:48", "throughput": 2615.38, "total_tokens": 602304}
|
|
{"current_steps": 1235, "total_steps": 7577, "loss": 0.1485, "lr": 1.9760502452251217e-06, "epoch": 0.16299326910386697, "percentage": 16.3, "elapsed_time": "0:03:50", "remaining_time": "0:19:44", "throughput": 2621.71, "total_tokens": 604608}
|
|
{"current_steps": 1240, "total_steps": 7577, "loss": 0.1371, "lr": 1.975546527971186e-06, "epoch": 0.1636531608816154, "percentage": 16.37, "elapsed_time": "0:03:50", "remaining_time": "0:19:40", "throughput": 2628.21, "total_tokens": 606976}
|
|
{"current_steps": 1245, "total_steps": 7577, "loss": 0.0801, "lr": 1.9750376340986472e-06, "epoch": 0.16431305265936386, "percentage": 16.43, "elapsed_time": "0:03:51", "remaining_time": "0:19:36", "throughput": 2635.73, "total_tokens": 609600}
|
|
{"current_steps": 1250, "total_steps": 7577, "loss": 0.2009, "lr": 1.974523566307889e-06, "epoch": 0.1649729444371123, "percentage": 16.5, "elapsed_time": "0:03:51", "remaining_time": "0:19:32", "throughput": 2641.72, "total_tokens": 611840}
|
|
{"current_steps": 1255, "total_steps": 7577, "loss": 0.1902, "lr": 1.9740043273267487e-06, "epoch": 0.16563283621486077, "percentage": 16.56, "elapsed_time": "0:03:51", "remaining_time": "0:19:28", "throughput": 2649.53, "total_tokens": 614528}
|
|
{"current_steps": 1260, "total_steps": 7577, "loss": 0.0022, "lr": 1.973479919910505e-06, "epoch": 0.16629272799260922, "percentage": 16.63, "elapsed_time": "0:03:52", "remaining_time": "0:19:24", "throughput": 2656.55, "total_tokens": 617024}
|
|
{"current_steps": 1265, "total_steps": 7577, "loss": 0.1153, "lr": 1.972950346841862e-06, "epoch": 0.16695261977035766, "percentage": 16.7, "elapsed_time": "0:03:52", "remaining_time": "0:19:20", "throughput": 2663.01, "total_tokens": 619392}
|
|
{"current_steps": 1270, "total_steps": 7577, "loss": 0.1353, "lr": 1.972415610930934e-06, "epoch": 0.1676125115481061, "percentage": 16.76, "elapsed_time": "0:03:52", "remaining_time": "0:19:16", "throughput": 2669.96, "total_tokens": 621888}
|
|
{"current_steps": 1275, "total_steps": 7577, "loss": 0.1096, "lr": 1.9718757150152324e-06, "epoch": 0.16827240332585455, "percentage": 16.83, "elapsed_time": "0:03:53", "remaining_time": "0:19:12", "throughput": 2676.14, "total_tokens": 624192}
|
|
{"current_steps": 1280, "total_steps": 7577, "loss": 0.0658, "lr": 1.9713306619596488e-06, "epoch": 0.168932295103603, "percentage": 16.89, "elapsed_time": "0:03:53", "remaining_time": "0:19:09", "throughput": 2682.76, "total_tokens": 626624}
|
|
{"current_steps": 1285, "total_steps": 7577, "loss": 0.0575, "lr": 1.9707804546564407e-06, "epoch": 0.16959218688135147, "percentage": 16.96, "elapsed_time": "0:03:53", "remaining_time": "0:19:05", "throughput": 2688.86, "total_tokens": 628928}
|
|
{"current_steps": 1290, "total_steps": 7577, "loss": 0.1254, "lr": 1.9702250960252164e-06, "epoch": 0.17025207865909991, "percentage": 17.03, "elapsed_time": "0:03:54", "remaining_time": "0:19:01", "throughput": 2696.5, "total_tokens": 631616}
|
|
{"current_steps": 1295, "total_steps": 7577, "loss": 0.0568, "lr": 1.969664589012918e-06, "epoch": 0.17091197043684836, "percentage": 17.09, "elapsed_time": "0:03:54", "remaining_time": "0:18:57", "throughput": 2703.27, "total_tokens": 634112}
|
|
{"current_steps": 1300, "total_steps": 7577, "loss": 0.1854, "lr": 1.9690989365938077e-06, "epoch": 0.1715718622145968, "percentage": 17.16, "elapsed_time": "0:03:54", "remaining_time": "0:18:54", "throughput": 2709.35, "total_tokens": 636416}
|
|
{"current_steps": 1305, "total_steps": 7577, "loss": 0.0006, "lr": 1.9685281417694513e-06, "epoch": 0.17223175399234525, "percentage": 17.22, "elapsed_time": "0:03:55", "remaining_time": "0:18:50", "throughput": 2715.91, "total_tokens": 638848}
|
|
{"current_steps": 1310, "total_steps": 7577, "loss": 0.12, "lr": 1.967952207568702e-06, "epoch": 0.1728916457700937, "percentage": 17.29, "elapsed_time": "0:03:55", "remaining_time": "0:18:46", "throughput": 2722.21, "total_tokens": 641216}
|
|
{"current_steps": 1315, "total_steps": 7577, "loss": 0.0008, "lr": 1.967371137047685e-06, "epoch": 0.17355153754784217, "percentage": 17.36, "elapsed_time": "0:03:55", "remaining_time": "0:18:43", "throughput": 2730.29, "total_tokens": 644032}
|
|
{"current_steps": 1320, "total_steps": 7577, "loss": 0.0887, "lr": 1.966784933289778e-06, "epoch": 0.1742114293255906, "percentage": 17.42, "elapsed_time": "0:03:56", "remaining_time": "0:18:39", "throughput": 2737.01, "total_tokens": 646528}
|
|
{"current_steps": 1325, "total_steps": 7577, "loss": 0.1307, "lr": 1.9661935994056014e-06, "epoch": 0.17487132110333906, "percentage": 17.49, "elapsed_time": "0:03:56", "remaining_time": "0:18:36", "throughput": 2743.98, "total_tokens": 649088}
|
|
{"current_steps": 1330, "total_steps": 7577, "loss": 0.1035, "lr": 1.965597138532996e-06, "epoch": 0.1755312128810875, "percentage": 17.55, "elapsed_time": "0:03:56", "remaining_time": "0:18:32", "throughput": 2750.42, "total_tokens": 651520}
|
|
{"current_steps": 1335, "total_steps": 7577, "loss": 0.0123, "lr": 1.964995553837009e-06, "epoch": 0.17619110465883595, "percentage": 17.62, "elapsed_time": "0:03:57", "remaining_time": "0:18:29", "throughput": 2757.15, "total_tokens": 654016}
|
|
{"current_steps": 1340, "total_steps": 7577, "loss": 0.0924, "lr": 1.964388848509875e-06, "epoch": 0.1768509964365844, "percentage": 17.69, "elapsed_time": "0:03:57", "remaining_time": "0:18:25", "throughput": 2763.1, "total_tokens": 656320}
|
|
{"current_steps": 1345, "total_steps": 7577, "loss": 0.1818, "lr": 1.9637770257710026e-06, "epoch": 0.17751088821433286, "percentage": 17.75, "elapsed_time": "0:03:57", "remaining_time": "0:18:22", "throughput": 2770.06, "total_tokens": 658880}
|
|
{"current_steps": 1350, "total_steps": 7577, "loss": 0.001, "lr": 1.9631600888669545e-06, "epoch": 0.1781707799920813, "percentage": 17.82, "elapsed_time": "0:03:58", "remaining_time": "0:18:18", "throughput": 2775.92, "total_tokens": 661184}
|
|
{"current_steps": 1355, "total_steps": 7577, "loss": 0.0885, "lr": 1.962538041071431e-06, "epoch": 0.17883067176982975, "percentage": 17.88, "elapsed_time": "0:03:58", "remaining_time": "0:18:15", "throughput": 2782.56, "total_tokens": 663680}
|
|
{"current_steps": 1360, "total_steps": 7577, "loss": 0.0377, "lr": 1.961910885685253e-06, "epoch": 0.1794905635475782, "percentage": 17.95, "elapsed_time": "0:03:58", "remaining_time": "0:18:11", "throughput": 2788.7, "total_tokens": 666048}
|
|
{"current_steps": 1365, "total_steps": 7577, "loss": 0.0748, "lr": 1.9612786260363436e-06, "epoch": 0.18015045532532664, "percentage": 18.02, "elapsed_time": "0:03:59", "remaining_time": "0:18:08", "throughput": 2795.05, "total_tokens": 668480}
|
|
{"current_steps": 1370, "total_steps": 7577, "loss": 0.1292, "lr": 1.9606412654797116e-06, "epoch": 0.1808103471030751, "percentage": 18.08, "elapsed_time": "0:03:59", "remaining_time": "0:18:05", "throughput": 2803.57, "total_tokens": 671488}
|
|
{"current_steps": 1375, "total_steps": 7577, "loss": 0.1406, "lr": 1.9599988073974332e-06, "epoch": 0.18147023888082353, "percentage": 18.15, "elapsed_time": "0:03:59", "remaining_time": "0:18:01", "throughput": 2809.9, "total_tokens": 673920}
|
|
{"current_steps": 1380, "total_steps": 7577, "loss": 0.0929, "lr": 1.959351255198634e-06, "epoch": 0.182130130658572, "percentage": 18.21, "elapsed_time": "0:04:00", "remaining_time": "0:17:58", "throughput": 2816.42, "total_tokens": 676416}
|
|
{"current_steps": 1385, "total_steps": 7577, "loss": 0.0006, "lr": 1.9586986123194704e-06, "epoch": 0.18279002243632045, "percentage": 18.28, "elapsed_time": "0:04:00", "remaining_time": "0:17:55", "throughput": 2823.42, "total_tokens": 679040}
|
|
{"current_steps": 1390, "total_steps": 7577, "loss": 0.1902, "lr": 1.958040882223112e-06, "epoch": 0.1834499142140689, "percentage": 18.34, "elapsed_time": "0:04:00", "remaining_time": "0:17:52", "throughput": 2831.36, "total_tokens": 681920}
|
|
{"current_steps": 1395, "total_steps": 7577, "loss": 0.048, "lr": 1.9573780683997235e-06, "epoch": 0.18410980599181734, "percentage": 18.41, "elapsed_time": "0:04:01", "remaining_time": "0:17:48", "throughput": 2837.82, "total_tokens": 684416}
|
|
{"current_steps": 1400, "total_steps": 7577, "loss": 0.0828, "lr": 1.956710174366445e-06, "epoch": 0.18476969776956578, "percentage": 18.48, "elapsed_time": "0:04:01", "remaining_time": "0:17:45", "throughput": 2844.49, "total_tokens": 686976}
|
|
{"current_steps": 1405, "total_steps": 7577, "loss": 0.2272, "lr": 1.9560372036673764e-06, "epoch": 0.18542958954731423, "percentage": 18.54, "elapsed_time": "0:04:01", "remaining_time": "0:17:42", "throughput": 2850.71, "total_tokens": 689408}
|
|
{"current_steps": 1410, "total_steps": 7577, "loss": 0.0649, "lr": 1.955359159873553e-06, "epoch": 0.1860894813250627, "percentage": 18.61, "elapsed_time": "0:04:02", "remaining_time": "0:17:39", "throughput": 2856.37, "total_tokens": 691712}
|
|
{"current_steps": 1415, "total_steps": 7577, "loss": 0.066, "lr": 1.954676046582932e-06, "epoch": 0.18674937310281114, "percentage": 18.67, "elapsed_time": "0:04:02", "remaining_time": "0:17:36", "throughput": 2862.17, "total_tokens": 694080}
|
|
{"current_steps": 1420, "total_steps": 7577, "loss": 0.2175, "lr": 1.9539878674203706e-06, "epoch": 0.1874092648805596, "percentage": 18.74, "elapsed_time": "0:04:02", "remaining_time": "0:17:32", "throughput": 2868.8, "total_tokens": 696640}
|
|
{"current_steps": 1425, "total_steps": 7577, "loss": 0.0003, "lr": 1.9532946260376076e-06, "epoch": 0.18806915665830803, "percentage": 18.81, "elapsed_time": "0:04:03", "remaining_time": "0:17:29", "throughput": 2875.13, "total_tokens": 699136}
|
|
{"current_steps": 1430, "total_steps": 7577, "loss": 0.4092, "lr": 1.952596326113244e-06, "epoch": 0.18872904843605648, "percentage": 18.87, "elapsed_time": "0:04:03", "remaining_time": "0:17:26", "throughput": 2881.74, "total_tokens": 701696}
|
|
{"current_steps": 1435, "total_steps": 7577, "loss": 0.2349, "lr": 1.9518929713527226e-06, "epoch": 0.18938894021380492, "percentage": 18.94, "elapsed_time": "0:04:03", "remaining_time": "0:17:23", "throughput": 2888.81, "total_tokens": 704384}
|
|
{"current_steps": 1440, "total_steps": 7577, "loss": 0.0082, "lr": 1.9511845654883097e-06, "epoch": 0.1900488319915534, "percentage": 19.0, "elapsed_time": "0:04:04", "remaining_time": "0:17:20", "throughput": 2893.87, "total_tokens": 706560}
|
|
{"current_steps": 1445, "total_steps": 7577, "loss": 0.1221, "lr": 1.9504711122790754e-06, "epoch": 0.19070872376930184, "percentage": 19.07, "elapsed_time": "0:04:04", "remaining_time": "0:17:17", "throughput": 2900.94, "total_tokens": 709248}
|
|
{"current_steps": 1450, "total_steps": 7577, "loss": 0.08, "lr": 1.949752615510871e-06, "epoch": 0.19136861554705029, "percentage": 19.14, "elapsed_time": "0:04:04", "remaining_time": "0:17:14", "throughput": 2905.48, "total_tokens": 711296}
|
|
{"current_steps": 1455, "total_steps": 7577, "loss": 0.0655, "lr": 1.949029078996313e-06, "epoch": 0.19202850732479873, "percentage": 19.2, "elapsed_time": "0:04:05", "remaining_time": "0:17:11", "throughput": 2911.53, "total_tokens": 713728}
|
|
{"current_steps": 1460, "total_steps": 7577, "loss": 0.1876, "lr": 1.9483005065747584e-06, "epoch": 0.19268839910254718, "percentage": 19.27, "elapsed_time": "0:04:05", "remaining_time": "0:17:08", "throughput": 2917.82, "total_tokens": 716224}
|
|
{"current_steps": 1465, "total_steps": 7577, "loss": 0.3791, "lr": 1.947566902112289e-06, "epoch": 0.19334829088029562, "percentage": 19.33, "elapsed_time": "0:04:05", "remaining_time": "0:17:05", "throughput": 2921.86, "total_tokens": 718528}
|
|
{"current_steps": 1470, "total_steps": 7577, "loss": 0.2271, "lr": 1.9468282695016863e-06, "epoch": 0.1940081826580441, "percentage": 19.4, "elapsed_time": "0:04:06", "remaining_time": "0:17:02", "throughput": 2927.84, "total_tokens": 720960}
|
|
{"current_steps": 1475, "total_steps": 7577, "loss": 0.1805, "lr": 1.946084612662415e-06, "epoch": 0.19466807443579254, "percentage": 19.47, "elapsed_time": "0:04:06", "remaining_time": "0:17:00", "throughput": 2933.04, "total_tokens": 723200}
|
|
{"current_steps": 1480, "total_steps": 7577, "loss": 0.2018, "lr": 1.9453359355405987e-06, "epoch": 0.19532796621354098, "percentage": 19.53, "elapsed_time": "0:04:06", "remaining_time": "0:16:57", "throughput": 2939.99, "total_tokens": 725888}
|
|
{"current_steps": 1485, "total_steps": 7577, "loss": 0.0364, "lr": 1.944582242109002e-06, "epoch": 0.19598785799128943, "percentage": 19.6, "elapsed_time": "0:04:07", "remaining_time": "0:16:54", "throughput": 2945.7, "total_tokens": 728256}
|
|
{"current_steps": 1490, "total_steps": 7577, "loss": 0.1441, "lr": 1.943823536367006e-06, "epoch": 0.19664774976903787, "percentage": 19.66, "elapsed_time": "0:04:07", "remaining_time": "0:16:51", "throughput": 2951.66, "total_tokens": 730688}
|
|
{"current_steps": 1495, "total_steps": 7577, "loss": 0.1269, "lr": 1.9430598223405913e-06, "epoch": 0.19730764154678632, "percentage": 19.73, "elapsed_time": "0:04:07", "remaining_time": "0:16:48", "throughput": 2957.08, "total_tokens": 732992}
|
|
{"current_steps": 1500, "total_steps": 7577, "loss": 0.1163, "lr": 1.9422911040823125e-06, "epoch": 0.1979675333245348, "percentage": 19.8, "elapsed_time": "0:04:08", "remaining_time": "0:16:45", "throughput": 2962.98, "total_tokens": 735424}
|
|
{"current_steps": 1505, "total_steps": 7577, "loss": 0.1995, "lr": 1.941517385671279e-06, "epoch": 0.19862742510228323, "percentage": 19.86, "elapsed_time": "0:04:08", "remaining_time": "0:16:42", "throughput": 2968.15, "total_tokens": 737664}
|
|
{"current_steps": 1510, "total_steps": 7577, "loss": 0.0888, "lr": 1.940738671213134e-06, "epoch": 0.19928731688003168, "percentage": 19.93, "elapsed_time": "0:04:08", "remaining_time": "0:16:39", "throughput": 2974.01, "total_tokens": 740096}
|
|
{"current_steps": 1515, "total_steps": 7577, "loss": 0.1883, "lr": 1.93995496484003e-06, "epoch": 0.19994720865778012, "percentage": 19.99, "elapsed_time": "0:04:09", "remaining_time": "0:16:37", "throughput": 2981.27, "total_tokens": 742912}
|
|
{"current_steps": 1516, "total_steps": 7577, "eval_loss": 0.10974650084972382, "epoch": 0.2000791870133298, "percentage": 20.01, "elapsed_time": "0:04:17", "remaining_time": "0:17:08", "throughput": 2890.65, "total_tokens": 743424}
|
|
{"current_steps": 1520, "total_steps": 7577, "loss": 0.1544, "lr": 1.9391662707106092e-06, "epoch": 0.20060710043552857, "percentage": 20.06, "elapsed_time": "0:04:41", "remaining_time": "0:18:43", "throughput": 2644.36, "total_tokens": 745536}
|
|
{"current_steps": 1525, "total_steps": 7577, "loss": 0.0053, "lr": 1.9383725930099814e-06, "epoch": 0.201266992213277, "percentage": 20.13, "elapsed_time": "0:04:42", "remaining_time": "0:18:40", "throughput": 2649.83, "total_tokens": 747968}
|
|
{"current_steps": 1530, "total_steps": 7577, "loss": 0.2261, "lr": 1.9375739359497e-06, "epoch": 0.20192688399102549, "percentage": 20.19, "elapsed_time": "0:04:42", "remaining_time": "0:18:36", "throughput": 2655.45, "total_tokens": 750464}
|
|
{"current_steps": 1535, "total_steps": 7577, "loss": 0.2511, "lr": 1.936770303767741e-06, "epoch": 0.20258677576877393, "percentage": 20.26, "elapsed_time": "0:04:42", "remaining_time": "0:18:33", "throughput": 2660.93, "total_tokens": 752896}
|
|
{"current_steps": 1540, "total_steps": 7577, "loss": 0.1788, "lr": 1.9359617007284815e-06, "epoch": 0.20324666754652237, "percentage": 20.32, "elapsed_time": "0:04:43", "remaining_time": "0:18:30", "throughput": 2667.45, "total_tokens": 755648}
|
|
{"current_steps": 1545, "total_steps": 7577, "loss": 0.2098, "lr": 1.9351481311226738e-06, "epoch": 0.20390655932427082, "percentage": 20.39, "elapsed_time": "0:04:43", "remaining_time": "0:18:27", "throughput": 2673.08, "total_tokens": 758144}
|
|
{"current_steps": 1550, "total_steps": 7577, "loss": 0.0831, "lr": 1.934329599267426e-06, "epoch": 0.20456645110201926, "percentage": 20.46, "elapsed_time": "0:04:43", "remaining_time": "0:18:24", "throughput": 2678.96, "total_tokens": 760704}
|
|
{"current_steps": 1555, "total_steps": 7577, "loss": 0.0039, "lr": 1.933506109506178e-06, "epoch": 0.2052263428797677, "percentage": 20.52, "elapsed_time": "0:04:44", "remaining_time": "0:18:20", "throughput": 2684.35, "total_tokens": 763136}
|
|
{"current_steps": 1560, "total_steps": 7577, "loss": 0.0802, "lr": 1.9326776662086765e-06, "epoch": 0.20588623465751615, "percentage": 20.59, "elapsed_time": "0:04:44", "remaining_time": "0:18:17", "throughput": 2691.25, "total_tokens": 766016}
|
|
{"current_steps": 1565, "total_steps": 7577, "loss": 0.2901, "lr": 1.9318442737709565e-06, "epoch": 0.20654612643526463, "percentage": 20.65, "elapsed_time": "0:04:44", "remaining_time": "0:18:14", "throughput": 2696.82, "total_tokens": 768512}
|
|
{"current_steps": 1570, "total_steps": 7577, "loss": 0.1689, "lr": 1.9310059366153116e-06, "epoch": 0.20720601821301307, "percentage": 20.72, "elapsed_time": "0:04:45", "remaining_time": "0:18:11", "throughput": 2701.77, "total_tokens": 770816}
|
|
{"current_steps": 1575, "total_steps": 7577, "loss": 0.248, "lr": 1.930162659190277e-06, "epoch": 0.20786590999076152, "percentage": 20.79, "elapsed_time": "0:04:45", "remaining_time": "0:18:08", "throughput": 2707.33, "total_tokens": 773312}
|
|
{"current_steps": 1580, "total_steps": 7577, "loss": 0.0688, "lr": 1.9293144459706007e-06, "epoch": 0.20852580176850996, "percentage": 20.85, "elapsed_time": "0:04:45", "remaining_time": "0:18:05", "throughput": 2712.45, "total_tokens": 775680}
|
|
{"current_steps": 1585, "total_steps": 7577, "loss": 0.1075, "lr": 1.928461301457223e-06, "epoch": 0.2091856935462584, "percentage": 20.92, "elapsed_time": "0:04:46", "remaining_time": "0:18:02", "throughput": 2717.6, "total_tokens": 778048}
|
|
{"current_steps": 1590, "total_steps": 7577, "loss": 0.2795, "lr": 1.92760323017725e-06, "epoch": 0.20984558532400685, "percentage": 20.98, "elapsed_time": "0:04:46", "remaining_time": "0:17:59", "throughput": 2723.58, "total_tokens": 780672}
|
|
{"current_steps": 1595, "total_steps": 7577, "loss": 0.2294, "lr": 1.9267402366839338e-06, "epoch": 0.21050547710175532, "percentage": 21.05, "elapsed_time": "0:04:46", "remaining_time": "0:17:56", "throughput": 2729.71, "total_tokens": 783360}
|
|
{"current_steps": 1600, "total_steps": 7577, "loss": 0.1843, "lr": 1.9258723255566433e-06, "epoch": 0.21116536887950377, "percentage": 21.12, "elapsed_time": "0:04:47", "remaining_time": "0:17:53", "throughput": 2735.24, "total_tokens": 785856}
|
|
{"current_steps": 1605, "total_steps": 7577, "loss": 0.1321, "lr": 1.924999501400843e-06, "epoch": 0.2118252606572522, "percentage": 21.18, "elapsed_time": "0:04:47", "remaining_time": "0:17:50", "throughput": 2741.19, "total_tokens": 788480}
|
|
{"current_steps": 1610, "total_steps": 7577, "loss": 0.1504, "lr": 1.924121768848068e-06, "epoch": 0.21248515243500066, "percentage": 21.25, "elapsed_time": "0:04:47", "remaining_time": "0:17:47", "throughput": 2746.92, "total_tokens": 791040}
|
|
{"current_steps": 1615, "total_steps": 7577, "loss": 0.1462, "lr": 1.923239132555899e-06, "epoch": 0.2131450442127491, "percentage": 21.31, "elapsed_time": "0:04:48", "remaining_time": "0:17:44", "throughput": 2752.63, "total_tokens": 793600}
|
|
{"current_steps": 1620, "total_steps": 7577, "loss": 0.1516, "lr": 1.9223515972079378e-06, "epoch": 0.21380493599049755, "percentage": 21.38, "elapsed_time": "0:04:48", "remaining_time": "0:17:41", "throughput": 2757.71, "total_tokens": 795968}
|
|
{"current_steps": 1625, "total_steps": 7577, "loss": 0.1019, "lr": 1.9214591675137813e-06, "epoch": 0.21446482776824602, "percentage": 21.45, "elapsed_time": "0:04:48", "remaining_time": "0:17:38", "throughput": 2762.56, "total_tokens": 798272}
|
|
{"current_steps": 1630, "total_steps": 7577, "loss": 0.1059, "lr": 1.9205618482090003e-06, "epoch": 0.21512471954599446, "percentage": 21.51, "elapsed_time": "0:04:49", "remaining_time": "0:17:35", "throughput": 2768.9, "total_tokens": 801024}
|
|
{"current_steps": 1635, "total_steps": 7577, "loss": 0.1862, "lr": 1.91965964405511e-06, "epoch": 0.2157846113237429, "percentage": 21.58, "elapsed_time": "0:04:49", "remaining_time": "0:17:32", "throughput": 2774.59, "total_tokens": 803584}
|
|
{"current_steps": 1640, "total_steps": 7577, "loss": 0.0438, "lr": 1.9187525598395457e-06, "epoch": 0.21644450310149135, "percentage": 21.64, "elapsed_time": "0:04:49", "remaining_time": "0:17:29", "throughput": 2779.67, "total_tokens": 805952}
|
|
{"current_steps": 1645, "total_steps": 7577, "loss": 0.1326, "lr": 1.9178406003756396e-06, "epoch": 0.2171043948792398, "percentage": 21.71, "elapsed_time": "0:04:50", "remaining_time": "0:17:26", "throughput": 2785.27, "total_tokens": 808512}
|
|
{"current_steps": 1650, "total_steps": 7577, "loss": 0.1071, "lr": 1.9169237705025936e-06, "epoch": 0.21776428665698824, "percentage": 21.78, "elapsed_time": "0:04:50", "remaining_time": "0:17:23", "throughput": 2791.11, "total_tokens": 811136}
|
|
{"current_steps": 1655, "total_steps": 7577, "loss": 0.0347, "lr": 1.9160020750854533e-06, "epoch": 0.21842417843473672, "percentage": 21.84, "elapsed_time": "0:04:50", "remaining_time": "0:17:21", "throughput": 2795.66, "total_tokens": 813376}
|
|
{"current_steps": 1660, "total_steps": 7577, "loss": 0.1731, "lr": 1.915075519015083e-06, "epoch": 0.21908407021248516, "percentage": 21.91, "elapsed_time": "0:04:51", "remaining_time": "0:17:18", "throughput": 2801.05, "total_tokens": 815872}
|
|
{"current_steps": 1665, "total_steps": 7577, "loss": 0.0142, "lr": 1.914144107208139e-06, "epoch": 0.2197439619902336, "percentage": 21.97, "elapsed_time": "0:04:51", "remaining_time": "0:17:15", "throughput": 2805.98, "total_tokens": 818240}
|
|
{"current_steps": 1670, "total_steps": 7577, "loss": 0.0476, "lr": 1.913207844607045e-06, "epoch": 0.22040385376798205, "percentage": 22.04, "elapsed_time": "0:04:51", "remaining_time": "0:17:12", "throughput": 2811.37, "total_tokens": 820736}
|
|
{"current_steps": 1675, "total_steps": 7577, "loss": 0.3054, "lr": 1.912266736179964e-06, "epoch": 0.2210637455457305, "percentage": 22.11, "elapsed_time": "0:04:52", "remaining_time": "0:17:09", "throughput": 2817.97, "total_tokens": 823616}
|
|
{"current_steps": 1680, "total_steps": 7577, "loss": 0.2016, "lr": 1.9113207869207727e-06, "epoch": 0.22172363732347894, "percentage": 22.17, "elapsed_time": "0:04:52", "remaining_time": "0:17:07", "throughput": 2823.31, "total_tokens": 826112}
|
|
{"current_steps": 1685, "total_steps": 7577, "loss": 0.1342, "lr": 1.9103700018490365e-06, "epoch": 0.2223835291012274, "percentage": 22.24, "elapsed_time": "0:04:52", "remaining_time": "0:17:04", "throughput": 2828.82, "total_tokens": 828672}
|
|
{"current_steps": 1690, "total_steps": 7577, "loss": 0.163, "lr": 1.9094143860099787e-06, "epoch": 0.22304342087897586, "percentage": 22.3, "elapsed_time": "0:04:53", "remaining_time": "0:17:01", "throughput": 2834.6, "total_tokens": 831296}
|
|
{"current_steps": 1695, "total_steps": 7577, "loss": 0.1052, "lr": 1.9084539444744594e-06, "epoch": 0.2237033126567243, "percentage": 22.37, "elapsed_time": "0:04:53", "remaining_time": "0:16:58", "throughput": 2840.12, "total_tokens": 833856}
|
|
{"current_steps": 1700, "total_steps": 7577, "loss": 0.1483, "lr": 1.907488682338944e-06, "epoch": 0.22436320443447275, "percentage": 22.44, "elapsed_time": "0:04:53", "remaining_time": "0:16:56", "throughput": 2845.85, "total_tokens": 836480}
|
|
{"current_steps": 1705, "total_steps": 7577, "loss": 0.0834, "lr": 1.9065186047254782e-06, "epoch": 0.2250230962122212, "percentage": 22.5, "elapsed_time": "0:04:54", "remaining_time": "0:16:53", "throughput": 2851.14, "total_tokens": 838976}
|
|
{"current_steps": 1710, "total_steps": 7577, "loss": 0.1598, "lr": 1.9055437167816604e-06, "epoch": 0.22568298798996964, "percentage": 22.57, "elapsed_time": "0:04:54", "remaining_time": "0:16:50", "throughput": 2857.25, "total_tokens": 841728}
|
|
{"current_steps": 1715, "total_steps": 7577, "loss": 0.0044, "lr": 1.9045640236806149e-06, "epoch": 0.22634287976771808, "percentage": 22.63, "elapsed_time": "0:04:54", "remaining_time": "0:16:48", "throughput": 2861.72, "total_tokens": 843968}
|
|
{"current_steps": 1720, "total_steps": 7577, "loss": 0.4454, "lr": 1.903579530620963e-06, "epoch": 0.22700277154546655, "percentage": 22.7, "elapsed_time": "0:04:55", "remaining_time": "0:16:45", "throughput": 2867.01, "total_tokens": 846464}
|
|
{"current_steps": 1725, "total_steps": 7577, "loss": 0.1272, "lr": 1.9025902428267975e-06, "epoch": 0.227662663323215, "percentage": 22.77, "elapsed_time": "0:04:55", "remaining_time": "0:16:42", "throughput": 2872.69, "total_tokens": 849088}
|
|
{"current_steps": 1730, "total_steps": 7577, "loss": 0.0506, "lr": 1.901596165547653e-06, "epoch": 0.22832255510096344, "percentage": 22.83, "elapsed_time": "0:04:55", "remaining_time": "0:16:40", "throughput": 2878.35, "total_tokens": 851712}
|
|
{"current_steps": 1735, "total_steps": 7577, "loss": 0.1537, "lr": 1.9005973040584796e-06, "epoch": 0.2289824468787119, "percentage": 22.9, "elapsed_time": "0:04:56", "remaining_time": "0:16:37", "throughput": 2883.59, "total_tokens": 854208}
|
|
{"current_steps": 1740, "total_steps": 7577, "loss": 0.1147, "lr": 1.8995936636596138e-06, "epoch": 0.22964233865646033, "percentage": 22.96, "elapsed_time": "0:04:56", "remaining_time": "0:16:34", "throughput": 2888.38, "total_tokens": 856576}
|
|
{"current_steps": 1745, "total_steps": 7577, "loss": 0.1347, "lr": 1.8985852496767504e-06, "epoch": 0.23030223043420878, "percentage": 23.03, "elapsed_time": "0:04:56", "remaining_time": "0:16:32", "throughput": 2893.4, "total_tokens": 859008}
|
|
{"current_steps": 1750, "total_steps": 7577, "loss": 0.146, "lr": 1.897572067460916e-06, "epoch": 0.23096212221195725, "percentage": 23.1, "elapsed_time": "0:04:57", "remaining_time": "0:16:29", "throughput": 2898.42, "total_tokens": 861440}
|
|
{"current_steps": 1755, "total_steps": 7577, "loss": 0.1079, "lr": 1.8965541223884377e-06, "epoch": 0.2316220139897057, "percentage": 23.16, "elapsed_time": "0:04:57", "remaining_time": "0:16:27", "throughput": 2903.61, "total_tokens": 863936}
|
|
{"current_steps": 1760, "total_steps": 7577, "loss": 0.0547, "lr": 1.8955314198609171e-06, "epoch": 0.23228190576745414, "percentage": 23.23, "elapsed_time": "0:04:57", "remaining_time": "0:16:24", "throughput": 2908.0, "total_tokens": 866176}
|
|
{"current_steps": 1765, "total_steps": 7577, "loss": 0.0747, "lr": 1.8945039653052005e-06, "epoch": 0.23294179754520258, "percentage": 23.29, "elapsed_time": "0:04:58", "remaining_time": "0:16:21", "throughput": 2912.58, "total_tokens": 868480}
|
|
{"current_steps": 1770, "total_steps": 7577, "loss": 0.1454, "lr": 1.8934717641733498e-06, "epoch": 0.23360168932295103, "percentage": 23.36, "elapsed_time": "0:04:58", "remaining_time": "0:16:19", "throughput": 2917.72, "total_tokens": 870976}
|
|
{"current_steps": 1775, "total_steps": 7577, "loss": 0.2544, "lr": 1.8924348219426143e-06, "epoch": 0.23426158110069947, "percentage": 23.43, "elapsed_time": "0:04:58", "remaining_time": "0:16:16", "throughput": 2921.54, "total_tokens": 873088}
|
|
{"current_steps": 1780, "total_steps": 7577, "loss": 0.2463, "lr": 1.8913931441154016e-06, "epoch": 0.23492147287844795, "percentage": 23.49, "elapsed_time": "0:04:59", "remaining_time": "0:16:14", "throughput": 2926.5, "total_tokens": 875520}
|
|
{"current_steps": 1785, "total_steps": 7577, "loss": 0.0807, "lr": 1.8903467362192482e-06, "epoch": 0.2355813646561964, "percentage": 23.56, "elapsed_time": "0:04:59", "remaining_time": "0:16:11", "throughput": 2930.43, "total_tokens": 877632}
|
|
{"current_steps": 1790, "total_steps": 7577, "loss": 0.1002, "lr": 1.8892956038067895e-06, "epoch": 0.23624125643394484, "percentage": 23.62, "elapsed_time": "0:04:59", "remaining_time": "0:16:09", "throughput": 2935.15, "total_tokens": 880000}
|
|
{"current_steps": 1795, "total_steps": 7577, "loss": 0.0187, "lr": 1.8882397524557317e-06, "epoch": 0.23690114821169328, "percentage": 23.69, "elapsed_time": "0:05:00", "remaining_time": "0:16:06", "throughput": 2939.26, "total_tokens": 882176}
|
|
{"current_steps": 1800, "total_steps": 7577, "loss": 0.0792, "lr": 1.8871791877688208e-06, "epoch": 0.23756103998944172, "percentage": 23.76, "elapsed_time": "0:05:00", "remaining_time": "0:16:04", "throughput": 2944.79, "total_tokens": 884800}
|
|
{"current_steps": 1805, "total_steps": 7577, "loss": 0.1225, "lr": 1.8861139153738143e-06, "epoch": 0.23822093176719017, "percentage": 23.82, "elapsed_time": "0:05:00", "remaining_time": "0:16:01", "throughput": 2949.27, "total_tokens": 887104}
|
|
{"current_steps": 1810, "total_steps": 7577, "loss": 0.0388, "lr": 1.8850439409234498e-06, "epoch": 0.23888082354493864, "percentage": 23.89, "elapsed_time": "0:05:01", "remaining_time": "0:15:59", "throughput": 2953.75, "total_tokens": 889408}
|
|
{"current_steps": 1815, "total_steps": 7577, "loss": 0.1219, "lr": 1.8839692700954161e-06, "epoch": 0.2395407153226871, "percentage": 23.95, "elapsed_time": "0:05:01", "remaining_time": "0:15:56", "throughput": 2958.04, "total_tokens": 891648}
|
|
{"current_steps": 1820, "total_steps": 7577, "loss": 0.2359, "lr": 1.8828899085923234e-06, "epoch": 0.24020060710043553, "percentage": 24.02, "elapsed_time": "0:05:01", "remaining_time": "0:15:54", "throughput": 2963.3, "total_tokens": 894208}
|
|
{"current_steps": 1825, "total_steps": 7577, "loss": 0.0859, "lr": 1.881805862141671e-06, "epoch": 0.24086049887818398, "percentage": 24.09, "elapsed_time": "0:05:02", "remaining_time": "0:15:52", "throughput": 2968.37, "total_tokens": 896704}
|
|
{"current_steps": 1830, "total_steps": 7577, "loss": 0.0579, "lr": 1.8807171364958196e-06, "epoch": 0.24152039065593242, "percentage": 24.15, "elapsed_time": "0:05:02", "remaining_time": "0:15:49", "throughput": 2973.58, "total_tokens": 899264}
|
|
{"current_steps": 1835, "total_steps": 7577, "loss": 0.0186, "lr": 1.879623737431959e-06, "epoch": 0.24218028243368087, "percentage": 24.22, "elapsed_time": "0:05:02", "remaining_time": "0:15:47", "throughput": 2978.58, "total_tokens": 901760}
|
|
{"current_steps": 1840, "total_steps": 7577, "loss": 0.1353, "lr": 1.8785256707520778e-06, "epoch": 0.24284017421142934, "percentage": 24.28, "elapsed_time": "0:05:03", "remaining_time": "0:15:44", "throughput": 2982.4, "total_tokens": 903872}
|
|
{"current_steps": 1845, "total_steps": 7577, "loss": 0.0463, "lr": 1.8774229422829325e-06, "epoch": 0.24350006598917778, "percentage": 24.35, "elapsed_time": "0:05:03", "remaining_time": "0:15:42", "throughput": 2987.41, "total_tokens": 906368}
|
|
{"current_steps": 1850, "total_steps": 7577, "loss": 0.0658, "lr": 1.8763155578760181e-06, "epoch": 0.24415995776692623, "percentage": 24.42, "elapsed_time": "0:05:03", "remaining_time": "0:15:40", "throughput": 2992.41, "total_tokens": 908864}
|
|
{"current_steps": 1855, "total_steps": 7577, "loss": 0.163, "lr": 1.8752035234075336e-06, "epoch": 0.24481984954467467, "percentage": 24.48, "elapsed_time": "0:05:04", "remaining_time": "0:15:37", "throughput": 2996.39, "total_tokens": 911040}
|
|
{"current_steps": 1860, "total_steps": 7577, "loss": 0.1979, "lr": 1.8740868447783554e-06, "epoch": 0.24547974132242312, "percentage": 24.55, "elapsed_time": "0:05:04", "remaining_time": "0:15:35", "throughput": 3000.98, "total_tokens": 913408}
|
|
{"current_steps": 1865, "total_steps": 7577, "loss": 0.2382, "lr": 1.8729655279140012e-06, "epoch": 0.24613963310017156, "percentage": 24.61, "elapsed_time": "0:05:04", "remaining_time": "0:15:33", "throughput": 3006.13, "total_tokens": 915968}
|
|
{"current_steps": 1870, "total_steps": 7577, "loss": 0.1639, "lr": 1.8718395787646029e-06, "epoch": 0.24679952487792003, "percentage": 24.68, "elapsed_time": "0:05:05", "remaining_time": "0:15:30", "throughput": 3011.28, "total_tokens": 918528}
|
|
{"current_steps": 1875, "total_steps": 7577, "loss": 0.0099, "lr": 1.870709003304872e-06, "epoch": 0.24745941665566848, "percentage": 24.75, "elapsed_time": "0:05:05", "remaining_time": "0:15:28", "throughput": 3016.62, "total_tokens": 921152}
|
|
{"current_steps": 1880, "total_steps": 7577, "loss": 0.0006, "lr": 1.8695738075340693e-06, "epoch": 0.24811930843341692, "percentage": 24.81, "elapsed_time": "0:05:05", "remaining_time": "0:15:26", "throughput": 3021.16, "total_tokens": 923520}
|
|
{"current_steps": 1885, "total_steps": 7577, "loss": 0.0253, "lr": 1.8684339974759723e-06, "epoch": 0.24877920021116537, "percentage": 24.88, "elapsed_time": "0:05:06", "remaining_time": "0:15:24", "throughput": 3025.69, "total_tokens": 925888}
|
|
{"current_steps": 1890, "total_steps": 7577, "loss": 0.0925, "lr": 1.8672895791788445e-06, "epoch": 0.2494390919889138, "percentage": 24.94, "elapsed_time": "0:05:06", "remaining_time": "0:15:21", "throughput": 3031.57, "total_tokens": 928704}
|
|
{"current_steps": 1895, "total_steps": 7577, "loss": 0.2857, "lr": 1.8661405587154017e-06, "epoch": 0.2500989837666623, "percentage": 25.01, "elapsed_time": "0:05:06", "remaining_time": "0:15:19", "throughput": 3035.68, "total_tokens": 930944}
|
|
{"current_steps": 1895, "total_steps": 7577, "eval_loss": 0.13187885284423828, "epoch": 0.2500989837666623, "percentage": 25.01, "elapsed_time": "0:05:14", "remaining_time": "0:15:43", "throughput": 2959.81, "total_tokens": 930944}
|
|
{"current_steps": 1900, "total_steps": 7577, "loss": 0.3692, "lr": 1.8649869421827808e-06, "epoch": 0.25075887554441073, "percentage": 25.08, "elapsed_time": "0:06:05", "remaining_time": "0:18:12", "throughput": 2553.64, "total_tokens": 933376}
|
|
{"current_steps": 1905, "total_steps": 7577, "loss": 0.0896, "lr": 1.863828735702507e-06, "epoch": 0.2514187673221592, "percentage": 25.14, "elapsed_time": "0:06:05", "remaining_time": "0:18:09", "throughput": 2558.54, "total_tokens": 936000}
|
|
{"current_steps": 1910, "total_steps": 7577, "loss": 0.1121, "lr": 1.862665945420462e-06, "epoch": 0.2520786590999076, "percentage": 25.21, "elapsed_time": "0:06:06", "remaining_time": "0:18:06", "throughput": 2562.89, "total_tokens": 938432}
|
|
{"current_steps": 1915, "total_steps": 7577, "loss": 0.1862, "lr": 1.8614985775068498e-06, "epoch": 0.25273855087765607, "percentage": 25.27, "elapsed_time": "0:06:06", "remaining_time": "0:18:03", "throughput": 2568.37, "total_tokens": 941312}
|
|
{"current_steps": 1920, "total_steps": 7577, "loss": 0.0727, "lr": 1.860326638156167e-06, "epoch": 0.2533984426554045, "percentage": 25.34, "elapsed_time": "0:06:06", "remaining_time": "0:18:00", "throughput": 2571.84, "total_tokens": 943488}
|
|
{"current_steps": 1925, "total_steps": 7577, "loss": 0.083, "lr": 1.8591501335871653e-06, "epoch": 0.25405833443315295, "percentage": 25.41, "elapsed_time": "0:06:07", "remaining_time": "0:17:58", "throughput": 2575.99, "total_tokens": 945856}
|
|
{"current_steps": 1930, "total_steps": 7577, "loss": 0.2212, "lr": 1.857969070042824e-06, "epoch": 0.2547182262109014, "percentage": 25.47, "elapsed_time": "0:06:07", "remaining_time": "0:17:55", "throughput": 2580.45, "total_tokens": 948352}
|
|
{"current_steps": 1935, "total_steps": 7577, "loss": 0.08, "lr": 1.8567834537903116e-06, "epoch": 0.25537811798864984, "percentage": 25.54, "elapsed_time": "0:06:07", "remaining_time": "0:17:52", "throughput": 2585.26, "total_tokens": 950976}
|
|
{"current_steps": 1940, "total_steps": 7577, "loss": 0.1289, "lr": 1.8555932911209565e-06, "epoch": 0.2560380097663983, "percentage": 25.6, "elapsed_time": "0:06:08", "remaining_time": "0:17:49", "throughput": 2589.04, "total_tokens": 953216}
|
|
{"current_steps": 1945, "total_steps": 7577, "loss": 0.0221, "lr": 1.8543985883502119e-06, "epoch": 0.25669790154414673, "percentage": 25.67, "elapsed_time": "0:06:08", "remaining_time": "0:17:47", "throughput": 2593.33, "total_tokens": 955648}
|
|
{"current_steps": 1950, "total_steps": 7577, "loss": 0.0906, "lr": 1.8531993518176216e-06, "epoch": 0.25735779332189523, "percentage": 25.74, "elapsed_time": "0:06:08", "remaining_time": "0:17:44", "throughput": 2597.09, "total_tokens": 957888}
|
|
{"current_steps": 1955, "total_steps": 7577, "loss": 0.3163, "lr": 1.8519955878867889e-06, "epoch": 0.2580176850996437, "percentage": 25.8, "elapsed_time": "0:06:09", "remaining_time": "0:17:41", "throughput": 2600.84, "total_tokens": 960128}
|
|
{"current_steps": 1960, "total_steps": 7577, "loss": 0.1495, "lr": 1.8507873029453392e-06, "epoch": 0.2586775768773921, "percentage": 25.87, "elapsed_time": "0:06:09", "remaining_time": "0:17:38", "throughput": 2604.92, "total_tokens": 962496}
|
|
{"current_steps": 1965, "total_steps": 7577, "loss": 0.1628, "lr": 1.8495745034048896e-06, "epoch": 0.25933746865514057, "percentage": 25.93, "elapsed_time": "0:06:09", "remaining_time": "0:17:36", "throughput": 2609.68, "total_tokens": 965120}
|
|
{"current_steps": 1970, "total_steps": 7577, "loss": 0.045, "lr": 1.8483571957010127e-06, "epoch": 0.259997360432889, "percentage": 26.0, "elapsed_time": "0:06:10", "remaining_time": "0:17:33", "throughput": 2614.07, "total_tokens": 967616}
|
|
{"current_steps": 1975, "total_steps": 7577, "loss": 0.0574, "lr": 1.8471353862932035e-06, "epoch": 0.26065725221063746, "percentage": 26.07, "elapsed_time": "0:06:10", "remaining_time": "0:17:30", "throughput": 2618.79, "total_tokens": 970240}
|
|
{"current_steps": 1980, "total_steps": 7577, "loss": 0.0719, "lr": 1.8459090816648444e-06, "epoch": 0.2613171439883859, "percentage": 26.13, "elapsed_time": "0:06:10", "remaining_time": "0:17:28", "throughput": 2622.68, "total_tokens": 972544}
|
|
{"current_steps": 1985, "total_steps": 7577, "loss": 0.2544, "lr": 1.8446782883231713e-06, "epoch": 0.26197703576613435, "percentage": 26.2, "elapsed_time": "0:06:11", "remaining_time": "0:17:25", "throughput": 2626.76, "total_tokens": 974912}
|
|
{"current_steps": 1990, "total_steps": 7577, "loss": 0.4055, "lr": 1.8434430127992387e-06, "epoch": 0.2626369275438828, "percentage": 26.26, "elapsed_time": "0:06:11", "remaining_time": "0:17:22", "throughput": 2630.34, "total_tokens": 977088}
|
|
{"current_steps": 1995, "total_steps": 7577, "loss": 0.1845, "lr": 1.8422032616478857e-06, "epoch": 0.26329681932163124, "percentage": 26.33, "elapsed_time": "0:06:11", "remaining_time": "0:17:20", "throughput": 2634.87, "total_tokens": 979648}
|
|
{"current_steps": 2000, "total_steps": 7577, "loss": 0.1184, "lr": 1.8409590414477001e-06, "epoch": 0.2639567110993797, "percentage": 26.4, "elapsed_time": "0:06:12", "remaining_time": "0:17:17", "throughput": 2639.7, "total_tokens": 982336}
|
|
{"current_steps": 2005, "total_steps": 7577, "loss": 0.0071, "lr": 1.839710358800985e-06, "epoch": 0.2646166028771281, "percentage": 26.46, "elapsed_time": "0:06:12", "remaining_time": "0:17:15", "throughput": 2643.86, "total_tokens": 984768}
|
|
{"current_steps": 2010, "total_steps": 7577, "loss": 0.049, "lr": 1.8384572203337224e-06, "epoch": 0.2652764946548766, "percentage": 26.53, "elapsed_time": "0:06:12", "remaining_time": "0:17:12", "throughput": 2647.88, "total_tokens": 987136}
|
|
{"current_steps": 2015, "total_steps": 7577, "loss": 0.1358, "lr": 1.837199632695538e-06, "epoch": 0.26593638643262507, "percentage": 26.59, "elapsed_time": "0:06:13", "remaining_time": "0:17:09", "throughput": 2652.68, "total_tokens": 989824}
|
|
{"current_steps": 2020, "total_steps": 7577, "loss": 0.2944, "lr": 1.8359376025596682e-06, "epoch": 0.2665962782103735, "percentage": 26.66, "elapsed_time": "0:06:13", "remaining_time": "0:17:07", "throughput": 2656.35, "total_tokens": 992064}
|
|
{"current_steps": 2025, "total_steps": 7577, "loss": 0.1288, "lr": 1.8346711366229215e-06, "epoch": 0.26725616998812196, "percentage": 26.73, "elapsed_time": "0:06:13", "remaining_time": "0:17:04", "throughput": 2660.19, "total_tokens": 994368}
|
|
{"current_steps": 2030, "total_steps": 7577, "loss": 0.181, "lr": 1.8334002416056442e-06, "epoch": 0.2679160617658704, "percentage": 26.79, "elapsed_time": "0:06:14", "remaining_time": "0:17:02", "throughput": 2664.53, "total_tokens": 996864}
|
|
{"current_steps": 2035, "total_steps": 7577, "loss": 0.2285, "lr": 1.8321249242516865e-06, "epoch": 0.26857595354361885, "percentage": 26.86, "elapsed_time": "0:06:14", "remaining_time": "0:16:59", "throughput": 2668.86, "total_tokens": 999360}
|
|
{"current_steps": 2040, "total_steps": 7577, "loss": 0.0873, "lr": 1.8308451913283638e-06, "epoch": 0.2692358453213673, "percentage": 26.92, "elapsed_time": "0:06:14", "remaining_time": "0:16:57", "throughput": 2673.35, "total_tokens": 1001920}
|
|
{"current_steps": 2045, "total_steps": 7577, "loss": 0.0305, "lr": 1.8295610496264229e-06, "epoch": 0.26989573709911574, "percentage": 26.99, "elapsed_time": "0:06:15", "remaining_time": "0:16:54", "throughput": 2677.13, "total_tokens": 1004224}
|
|
{"current_steps": 2050, "total_steps": 7577, "loss": 0.0393, "lr": 1.828272505960005e-06, "epoch": 0.2705556288768642, "percentage": 27.06, "elapsed_time": "0:06:15", "remaining_time": "0:16:52", "throughput": 2680.92, "total_tokens": 1006528}
|
|
{"current_steps": 2055, "total_steps": 7577, "loss": 0.1813, "lr": 1.8269795671666098e-06, "epoch": 0.27121552065461263, "percentage": 27.12, "elapsed_time": "0:06:15", "remaining_time": "0:16:49", "throughput": 2684.9, "total_tokens": 1008896}
|
|
{"current_steps": 2060, "total_steps": 7577, "loss": 0.1234, "lr": 1.8256822401070591e-06, "epoch": 0.2718754124323611, "percentage": 27.19, "elapsed_time": "0:06:16", "remaining_time": "0:16:47", "throughput": 2689.82, "total_tokens": 1011648}
|
|
{"current_steps": 2065, "total_steps": 7577, "loss": 0.0522, "lr": 1.8243805316654611e-06, "epoch": 0.2725353042101095, "percentage": 27.25, "elapsed_time": "0:06:16", "remaining_time": "0:16:44", "throughput": 2694.27, "total_tokens": 1014208}
|
|
{"current_steps": 2070, "total_steps": 7577, "loss": 0.1725, "lr": 1.823074448749172e-06, "epoch": 0.27319519598785796, "percentage": 27.32, "elapsed_time": "0:06:16", "remaining_time": "0:16:42", "throughput": 2698.36, "total_tokens": 1016640}
|
|
{"current_steps": 2075, "total_steps": 7577, "loss": 0.063, "lr": 1.8217639982887623e-06, "epoch": 0.27385508776560646, "percentage": 27.39, "elapsed_time": "0:06:17", "remaining_time": "0:16:39", "throughput": 2703.03, "total_tokens": 1019328}
|
|
{"current_steps": 2080, "total_steps": 7577, "loss": 0.0781, "lr": 1.8204491872379769e-06, "epoch": 0.2745149795433549, "percentage": 27.45, "elapsed_time": "0:06:17", "remaining_time": "0:16:37", "throughput": 2706.82, "total_tokens": 1021696}
|
|
{"current_steps": 2085, "total_steps": 7577, "loss": 0.09, "lr": 1.8191300225737e-06, "epoch": 0.27517487132110335, "percentage": 27.52, "elapsed_time": "0:06:17", "remaining_time": "0:16:35", "throughput": 2711.22, "total_tokens": 1024256}
|
|
{"current_steps": 2090, "total_steps": 7577, "loss": 0.2074, "lr": 1.8178065112959184e-06, "epoch": 0.2758347630988518, "percentage": 27.58, "elapsed_time": "0:06:18", "remaining_time": "0:16:32", "throughput": 2714.93, "total_tokens": 1026560}
|
|
{"current_steps": 2095, "total_steps": 7577, "loss": 0.2426, "lr": 1.8164786604276832e-06, "epoch": 0.27649465487660024, "percentage": 27.65, "elapsed_time": "0:06:18", "remaining_time": "0:16:30", "throughput": 2719.48, "total_tokens": 1029184}
|
|
{"current_steps": 2100, "total_steps": 7577, "loss": 0.1464, "lr": 1.8151464770150727e-06, "epoch": 0.2771545466543487, "percentage": 27.72, "elapsed_time": "0:06:18", "remaining_time": "0:16:27", "throughput": 2723.88, "total_tokens": 1031744}
|
|
{"current_steps": 2105, "total_steps": 7577, "loss": 0.1528, "lr": 1.8138099681271558e-06, "epoch": 0.27781443843209713, "percentage": 27.78, "elapsed_time": "0:06:19", "remaining_time": "0:16:25", "throughput": 2727.63, "total_tokens": 1034048}
|
|
{"current_steps": 2110, "total_steps": 7577, "loss": 0.1601, "lr": 1.8124691408559536e-06, "epoch": 0.2784743302098456, "percentage": 27.85, "elapsed_time": "0:06:19", "remaining_time": "0:16:23", "throughput": 2731.86, "total_tokens": 1036544}
|
|
{"current_steps": 2115, "total_steps": 7577, "loss": 0.1008, "lr": 1.8111240023164023e-06, "epoch": 0.279134221987594, "percentage": 27.91, "elapsed_time": "0:06:19", "remaining_time": "0:16:20", "throughput": 2735.59, "total_tokens": 1038848}
|
|
{"current_steps": 2120, "total_steps": 7577, "loss": 0.0632, "lr": 1.809774559646316e-06, "epoch": 0.27979411376534247, "percentage": 27.98, "elapsed_time": "0:06:20", "remaining_time": "0:16:18", "throughput": 2739.31, "total_tokens": 1041152}
|
|
{"current_steps": 2125, "total_steps": 7577, "loss": 0.0935, "lr": 1.8084208200063469e-06, "epoch": 0.2804540055430909, "percentage": 28.05, "elapsed_time": "0:06:20", "remaining_time": "0:16:16", "throughput": 2744.29, "total_tokens": 1043968}
|
|
{"current_steps": 2130, "total_steps": 7577, "loss": 0.2149, "lr": 1.8070627905799496e-06, "epoch": 0.28111389732083936, "percentage": 28.11, "elapsed_time": "0:06:20", "remaining_time": "0:16:13", "throughput": 2747.99, "total_tokens": 1046272}
|
|
{"current_steps": 2135, "total_steps": 7577, "loss": 0.0862, "lr": 1.8057004785733413e-06, "epoch": 0.28177378909858786, "percentage": 28.18, "elapsed_time": "0:06:21", "remaining_time": "0:16:11", "throughput": 2751.37, "total_tokens": 1048448}
|
|
{"current_steps": 2140, "total_steps": 7577, "loss": 0.1758, "lr": 1.8043338912154647e-06, "epoch": 0.2824336808763363, "percentage": 28.24, "elapsed_time": "0:06:21", "remaining_time": "0:16:08", "throughput": 2755.87, "total_tokens": 1051072}
|
|
{"current_steps": 2145, "total_steps": 7577, "loss": 0.0486, "lr": 1.8029630357579486e-06, "epoch": 0.28309357265408475, "percentage": 28.31, "elapsed_time": "0:06:21", "remaining_time": "0:16:06", "throughput": 2759.41, "total_tokens": 1053312}
|
|
{"current_steps": 2150, "total_steps": 7577, "loss": 0.0795, "lr": 1.8015879194750702e-06, "epoch": 0.2837534644318332, "percentage": 28.38, "elapsed_time": "0:06:22", "remaining_time": "0:16:04", "throughput": 2763.24, "total_tokens": 1055680}
|
|
{"current_steps": 2155, "total_steps": 7577, "loss": 0.187, "lr": 1.8002085496637165e-06, "epoch": 0.28441335620958164, "percentage": 28.44, "elapsed_time": "0:06:22", "remaining_time": "0:16:02", "throughput": 2766.9, "total_tokens": 1057984}
|
|
{"current_steps": 2160, "total_steps": 7577, "loss": 0.1492, "lr": 1.7988249336433448e-06, "epoch": 0.2850732479873301, "percentage": 28.51, "elapsed_time": "0:06:22", "remaining_time": "0:15:59", "throughput": 2771.66, "total_tokens": 1060736}
|
|
{"current_steps": 2165, "total_steps": 7577, "loss": 0.1319, "lr": 1.7974370787559447e-06, "epoch": 0.2857331397650785, "percentage": 28.57, "elapsed_time": "0:06:23", "remaining_time": "0:15:57", "throughput": 2776.28, "total_tokens": 1063424}
|
|
{"current_steps": 2170, "total_steps": 7577, "loss": 0.0575, "lr": 1.796044992365999e-06, "epoch": 0.28639303154282697, "percentage": 28.64, "elapsed_time": "0:06:23", "remaining_time": "0:15:55", "throughput": 2779.93, "total_tokens": 1065728}
|
|
{"current_steps": 2175, "total_steps": 7577, "loss": 0.0009, "lr": 1.794648681860444e-06, "epoch": 0.2870529233205754, "percentage": 28.71, "elapsed_time": "0:06:23", "remaining_time": "0:15:52", "throughput": 2783.9, "total_tokens": 1068160}
|
|
{"current_steps": 2180, "total_steps": 7577, "loss": 0.2734, "lr": 1.7932481546486312e-06, "epoch": 0.28771281509832386, "percentage": 28.77, "elapsed_time": "0:06:24", "remaining_time": "0:15:50", "throughput": 2787.86, "total_tokens": 1070592}
|
|
{"current_steps": 2185, "total_steps": 7577, "loss": 0.1758, "lr": 1.791843418162287e-06, "epoch": 0.2883727068760723, "percentage": 28.84, "elapsed_time": "0:06:24", "remaining_time": "0:15:48", "throughput": 2792.42, "total_tokens": 1073280}
|
|
{"current_steps": 2190, "total_steps": 7577, "loss": 0.0031, "lr": 1.7904344798554748e-06, "epoch": 0.28903259865382075, "percentage": 28.9, "elapsed_time": "0:06:24", "remaining_time": "0:15:46", "throughput": 2796.04, "total_tokens": 1075584}
|
|
{"current_steps": 2195, "total_steps": 7577, "loss": 0.0927, "lr": 1.789021347204553e-06, "epoch": 0.28969249043156925, "percentage": 28.97, "elapsed_time": "0:06:25", "remaining_time": "0:15:44", "throughput": 2799.97, "total_tokens": 1078016}
|
|
{"current_steps": 2200, "total_steps": 7577, "loss": 0.1631, "lr": 1.7876040277081381e-06, "epoch": 0.2903523822093177, "percentage": 29.04, "elapsed_time": "0:06:25", "remaining_time": "0:15:41", "throughput": 2804.07, "total_tokens": 1080512}
|
|
{"current_steps": 2205, "total_steps": 7577, "loss": 0.0382, "lr": 1.7861825288870632e-06, "epoch": 0.29101227398706614, "percentage": 29.1, "elapsed_time": "0:06:25", "remaining_time": "0:15:39", "throughput": 2807.51, "total_tokens": 1082752}
|
|
{"current_steps": 2210, "total_steps": 7577, "loss": 0.3717, "lr": 1.7847568582843376e-06, "epoch": 0.2916721657648146, "percentage": 29.17, "elapsed_time": "0:06:25", "remaining_time": "0:15:37", "throughput": 2811.43, "total_tokens": 1085184}
|
|
{"current_steps": 2215, "total_steps": 7577, "loss": 0.1799, "lr": 1.7833270234651088e-06, "epoch": 0.29233205754256303, "percentage": 29.23, "elapsed_time": "0:06:26", "remaining_time": "0:15:35", "throughput": 2814.66, "total_tokens": 1087360}
|
|
{"current_steps": 2220, "total_steps": 7577, "loss": 0.0596, "lr": 1.781893032016621e-06, "epoch": 0.2929919493203115, "percentage": 29.3, "elapsed_time": "0:06:26", "remaining_time": "0:15:33", "throughput": 2819.02, "total_tokens": 1089984}
|
|
{"current_steps": 2225, "total_steps": 7577, "loss": 0.0264, "lr": 1.7804548915481746e-06, "epoch": 0.2936518410980599, "percentage": 29.37, "elapsed_time": "0:06:26", "remaining_time": "0:15:30", "throughput": 2823.37, "total_tokens": 1092608}
|
|
{"current_steps": 2230, "total_steps": 7577, "loss": 0.1052, "lr": 1.7790126096910865e-06, "epoch": 0.29431173287580836, "percentage": 29.43, "elapsed_time": "0:06:27", "remaining_time": "0:15:28", "throughput": 2827.25, "total_tokens": 1095040}
|
|
{"current_steps": 2235, "total_steps": 7577, "loss": 0.1063, "lr": 1.7775661940986492e-06, "epoch": 0.2949716246535568, "percentage": 29.5, "elapsed_time": "0:06:27", "remaining_time": "0:15:26", "throughput": 2831.76, "total_tokens": 1097728}
|
|
{"current_steps": 2240, "total_steps": 7577, "loss": 0.1857, "lr": 1.776115652446091e-06, "epoch": 0.29563151643130525, "percentage": 29.56, "elapsed_time": "0:06:27", "remaining_time": "0:15:24", "throughput": 2835.45, "total_tokens": 1100096}
|
|
{"current_steps": 2245, "total_steps": 7577, "loss": 0.1076, "lr": 1.7746609924305336e-06, "epoch": 0.2962914082090537, "percentage": 29.63, "elapsed_time": "0:06:28", "remaining_time": "0:15:22", "throughput": 2839.04, "total_tokens": 1102400}
|
|
{"current_steps": 2250, "total_steps": 7577, "loss": 0.0825, "lr": 1.7732022217709534e-06, "epoch": 0.29695129998680214, "percentage": 29.7, "elapsed_time": "0:06:28", "remaining_time": "0:15:20", "throughput": 2843.22, "total_tokens": 1104960}
|
|
{"current_steps": 2255, "total_steps": 7577, "loss": 0.1648, "lr": 1.7717393482081384e-06, "epoch": 0.2976111917645506, "percentage": 29.76, "elapsed_time": "0:06:28", "remaining_time": "0:15:17", "throughput": 2847.4, "total_tokens": 1107520}
|
|
{"current_steps": 2260, "total_steps": 7577, "loss": 0.1223, "lr": 1.7702723795046492e-06, "epoch": 0.2982710835422991, "percentage": 29.83, "elapsed_time": "0:06:29", "remaining_time": "0:15:15", "throughput": 2851.26, "total_tokens": 1109952}
|
|
{"current_steps": 2265, "total_steps": 7577, "loss": 0.0026, "lr": 1.7688013234447757e-06, "epoch": 0.29893097532004753, "percentage": 29.89, "elapsed_time": "0:06:29", "remaining_time": "0:15:13", "throughput": 2854.5, "total_tokens": 1112128}
|
|
{"current_steps": 2270, "total_steps": 7577, "loss": 0.1387, "lr": 1.7673261878344973e-06, "epoch": 0.299590867097796, "percentage": 29.96, "elapsed_time": "0:06:29", "remaining_time": "0:15:11", "throughput": 2858.68, "total_tokens": 1114688}
|
|
{"current_steps": 2274, "total_steps": 7577, "eval_loss": 0.12411058694124222, "epoch": 0.30011878051999474, "percentage": 30.01, "elapsed_time": "0:06:38", "remaining_time": "0:15:28", "throughput": 2804.9, "total_tokens": 1116800}
|
|
{"current_steps": 2275, "total_steps": 7577, "loss": 0.1185, "lr": 1.7658469805014414e-06, "epoch": 0.3002507588755444, "percentage": 30.03, "elapsed_time": "0:07:25", "remaining_time": "0:17:19", "throughput": 2506.0, "total_tokens": 1117248}
|
|
{"current_steps": 2280, "total_steps": 7577, "loss": 0.1096, "lr": 1.7643637092948415e-06, "epoch": 0.30091065065329287, "percentage": 30.09, "elapsed_time": "0:07:26", "remaining_time": "0:17:16", "throughput": 2509.88, "total_tokens": 1119808}
|
|
{"current_steps": 2285, "total_steps": 7577, "loss": 0.2572, "lr": 1.7628763820854948e-06, "epoch": 0.3015705424310413, "percentage": 30.16, "elapsed_time": "0:07:26", "remaining_time": "0:17:14", "throughput": 2513.23, "total_tokens": 1122112}
|
|
{"current_steps": 2290, "total_steps": 7577, "loss": 0.0884, "lr": 1.7613850067657216e-06, "epoch": 0.30223043420878976, "percentage": 30.22, "elapsed_time": "0:07:26", "remaining_time": "0:17:11", "throughput": 2516.85, "total_tokens": 1124544}
|
|
{"current_steps": 2295, "total_steps": 7577, "loss": 0.1247, "lr": 1.7598895912493232e-06, "epoch": 0.3028903259865382, "percentage": 30.29, "elapsed_time": "0:07:27", "remaining_time": "0:17:09", "throughput": 2520.72, "total_tokens": 1127104}
|
|
{"current_steps": 2300, "total_steps": 7577, "loss": 0.0773, "lr": 1.7583901434715397e-06, "epoch": 0.30355021776428665, "percentage": 30.36, "elapsed_time": "0:07:27", "remaining_time": "0:17:06", "throughput": 2524.32, "total_tokens": 1129536}
|
|
{"current_steps": 2305, "total_steps": 7577, "loss": 0.1009, "lr": 1.7568866713890074e-06, "epoch": 0.3042101095420351, "percentage": 30.42, "elapsed_time": "0:07:27", "remaining_time": "0:17:04", "throughput": 2527.65, "total_tokens": 1131840}
|
|
{"current_steps": 2310, "total_steps": 7577, "loss": 0.1243, "lr": 1.7553791829797175e-06, "epoch": 0.30487000131978353, "percentage": 30.49, "elapsed_time": "0:07:28", "remaining_time": "0:17:01", "throughput": 2531.38, "total_tokens": 1134336}
|
|
{"current_steps": 2315, "total_steps": 7577, "loss": 0.3449, "lr": 1.7538676862429737e-06, "epoch": 0.305529893097532, "percentage": 30.55, "elapsed_time": "0:07:28", "remaining_time": "0:16:59", "throughput": 2534.67, "total_tokens": 1136640}
|
|
{"current_steps": 2320, "total_steps": 7577, "loss": 0.1248, "lr": 1.7523521891993486e-06, "epoch": 0.3061897848752805, "percentage": 30.62, "elapsed_time": "0:07:28", "remaining_time": "0:16:56", "throughput": 2538.39, "total_tokens": 1139136}
|
|
{"current_steps": 2325, "total_steps": 7577, "loss": 0.0367, "lr": 1.7508326998906422e-06, "epoch": 0.3068496766530289, "percentage": 30.68, "elapsed_time": "0:07:29", "remaining_time": "0:16:54", "throughput": 2541.97, "total_tokens": 1141568}
|
|
{"current_steps": 2330, "total_steps": 7577, "loss": 0.0023, "lr": 1.7493092263798394e-06, "epoch": 0.30750956843077737, "percentage": 30.75, "elapsed_time": "0:07:29", "remaining_time": "0:16:52", "throughput": 2545.4, "total_tokens": 1143936}
|
|
{"current_steps": 2335, "total_steps": 7577, "loss": 0.0188, "lr": 1.7477817767510664e-06, "epoch": 0.3081694602085258, "percentage": 30.82, "elapsed_time": "0:07:29", "remaining_time": "0:16:49", "throughput": 2549.49, "total_tokens": 1146624}
|
|
{"current_steps": 2340, "total_steps": 7577, "loss": 0.0209, "lr": 1.7462503591095484e-06, "epoch": 0.30882935198627426, "percentage": 30.88, "elapsed_time": "0:07:30", "remaining_time": "0:16:47", "throughput": 2553.2, "total_tokens": 1149120}
|
|
{"current_steps": 2345, "total_steps": 7577, "loss": 0.1152, "lr": 1.7447149815815659e-06, "epoch": 0.3094892437640227, "percentage": 30.95, "elapsed_time": "0:07:30", "remaining_time": "0:16:44", "throughput": 2556.58, "total_tokens": 1151488}
|
|
{"current_steps": 2350, "total_steps": 7577, "loss": 0.1426, "lr": 1.7431756523144126e-06, "epoch": 0.31014913554177115, "percentage": 31.01, "elapsed_time": "0:07:30", "remaining_time": "0:16:42", "throughput": 2559.44, "total_tokens": 1153600}
|
|
{"current_steps": 2355, "total_steps": 7577, "loss": 0.0021, "lr": 1.7416323794763512e-06, "epoch": 0.3108090273195196, "percentage": 31.08, "elapsed_time": "0:07:31", "remaining_time": "0:16:40", "throughput": 2563.37, "total_tokens": 1156224}
|
|
{"current_steps": 2360, "total_steps": 7577, "loss": 0.234, "lr": 1.7400851712565707e-06, "epoch": 0.31146891909726804, "percentage": 31.15, "elapsed_time": "0:07:31", "remaining_time": "0:16:37", "throughput": 2566.85, "total_tokens": 1158656}
|
|
{"current_steps": 2365, "total_steps": 7577, "loss": 0.1276, "lr": 1.7385340358651432e-06, "epoch": 0.3121288108750165, "percentage": 31.21, "elapsed_time": "0:07:31", "remaining_time": "0:16:35", "throughput": 2570.99, "total_tokens": 1161408}
|
|
{"current_steps": 2370, "total_steps": 7577, "loss": 0.0223, "lr": 1.736978981532979e-06, "epoch": 0.3127887026527649, "percentage": 31.28, "elapsed_time": "0:07:32", "remaining_time": "0:16:33", "throughput": 2574.62, "total_tokens": 1163904}
|
|
{"current_steps": 2375, "total_steps": 7577, "loss": 0.2808, "lr": 1.7354200165117838e-06, "epoch": 0.31344859443051337, "percentage": 31.34, "elapsed_time": "0:07:32", "remaining_time": "0:16:30", "throughput": 2577.81, "total_tokens": 1166208}
|
|
{"current_steps": 2380, "total_steps": 7577, "loss": 0.2086, "lr": 1.733857149074016e-06, "epoch": 0.3141084862082619, "percentage": 31.41, "elapsed_time": "0:07:32", "remaining_time": "0:16:28", "throughput": 2581.0, "total_tokens": 1168512}
|
|
{"current_steps": 2385, "total_steps": 7577, "loss": 0.2359, "lr": 1.7322903875128402e-06, "epoch": 0.3147683779860103, "percentage": 31.48, "elapsed_time": "0:07:33", "remaining_time": "0:16:26", "throughput": 2584.77, "total_tokens": 1171072}
|
|
{"current_steps": 2390, "total_steps": 7577, "loss": 0.0071, "lr": 1.7307197401420858e-06, "epoch": 0.31542826976375876, "percentage": 31.54, "elapsed_time": "0:07:33", "remaining_time": "0:16:23", "throughput": 2587.87, "total_tokens": 1173312}
|
|
{"current_steps": 2395, "total_steps": 7577, "loss": 0.1059, "lr": 1.7291452152962018e-06, "epoch": 0.3160881615415072, "percentage": 31.61, "elapsed_time": "0:07:33", "remaining_time": "0:16:21", "throughput": 2591.37, "total_tokens": 1175744}
|
|
{"current_steps": 2400, "total_steps": 7577, "loss": 0.2033, "lr": 1.7275668213302116e-06, "epoch": 0.31674805331925565, "percentage": 31.67, "elapsed_time": "0:07:34", "remaining_time": "0:16:19", "throughput": 2594.73, "total_tokens": 1178112}
|
|
{"current_steps": 2405, "total_steps": 7577, "loss": 0.0488, "lr": 1.72598456661967e-06, "epoch": 0.3174079450970041, "percentage": 31.74, "elapsed_time": "0:07:34", "remaining_time": "0:16:17", "throughput": 2597.81, "total_tokens": 1180352}
|
|
{"current_steps": 2410, "total_steps": 7577, "loss": 0.1241, "lr": 1.7243984595606191e-06, "epoch": 0.31806783687475254, "percentage": 31.81, "elapsed_time": "0:07:34", "remaining_time": "0:16:14", "throughput": 2600.76, "total_tokens": 1182528}
|
|
{"current_steps": 2415, "total_steps": 7577, "loss": 0.0697, "lr": 1.722808508569542e-06, "epoch": 0.318727728652501, "percentage": 31.87, "elapsed_time": "0:07:35", "remaining_time": "0:16:12", "throughput": 2604.91, "total_tokens": 1185280}
|
|
{"current_steps": 2420, "total_steps": 7577, "loss": 0.0766, "lr": 1.72121472208332e-06, "epoch": 0.31938762043024943, "percentage": 31.94, "elapsed_time": "0:07:35", "remaining_time": "0:16:10", "throughput": 2609.04, "total_tokens": 1188032}
|
|
{"current_steps": 2425, "total_steps": 7577, "loss": 0.1857, "lr": 1.7196171085591864e-06, "epoch": 0.3200475122079979, "percentage": 32.0, "elapsed_time": "0:07:35", "remaining_time": "0:16:08", "throughput": 2612.52, "total_tokens": 1190464}
|
|
{"current_steps": 2430, "total_steps": 7577, "loss": 0.2532, "lr": 1.7180156764746824e-06, "epoch": 0.3207074039857463, "percentage": 32.07, "elapsed_time": "0:07:36", "remaining_time": "0:16:05", "throughput": 2616.14, "total_tokens": 1192960}
|
|
{"current_steps": 2435, "total_steps": 7577, "loss": 0.0694, "lr": 1.7164104343276113e-06, "epoch": 0.32136729576349476, "percentage": 32.14, "elapsed_time": "0:07:36", "remaining_time": "0:16:03", "throughput": 2618.93, "total_tokens": 1195072}
|
|
{"current_steps": 2440, "total_steps": 7577, "loss": 0.0516, "lr": 1.714801390635996e-06, "epoch": 0.3220271875412432, "percentage": 32.2, "elapsed_time": "0:07:36", "remaining_time": "0:16:01", "throughput": 2622.12, "total_tokens": 1197376}
|
|
{"current_steps": 2445, "total_steps": 7577, "loss": 0.0735, "lr": 1.7131885539380297e-06, "epoch": 0.3226870793189917, "percentage": 32.27, "elapsed_time": "0:07:36", "remaining_time": "0:15:59", "throughput": 2625.82, "total_tokens": 1199936}
|
|
{"current_steps": 2450, "total_steps": 7577, "loss": 0.141, "lr": 1.7115719327920335e-06, "epoch": 0.32334697109674015, "percentage": 32.33, "elapsed_time": "0:07:37", "remaining_time": "0:15:56", "throughput": 2629.26, "total_tokens": 1202368}
|
|
{"current_steps": 2455, "total_steps": 7577, "loss": 0.0172, "lr": 1.70995153577641e-06, "epoch": 0.3240068628744886, "percentage": 32.4, "elapsed_time": "0:07:37", "remaining_time": "0:15:54", "throughput": 2632.68, "total_tokens": 1204800}
|
|
{"current_steps": 2460, "total_steps": 7577, "loss": 0.0639, "lr": 1.7083273714895991e-06, "epoch": 0.32466675465223704, "percentage": 32.47, "elapsed_time": "0:07:37", "remaining_time": "0:15:52", "throughput": 2636.77, "total_tokens": 1207552}
|
|
{"current_steps": 2465, "total_steps": 7577, "loss": 0.1031, "lr": 1.7066994485500298e-06, "epoch": 0.3253266464299855, "percentage": 32.53, "elapsed_time": "0:07:38", "remaining_time": "0:15:50", "throughput": 2639.91, "total_tokens": 1209856}
|
|
{"current_steps": 2470, "total_steps": 7577, "loss": 0.1125, "lr": 1.7050677755960762e-06, "epoch": 0.32598653820773393, "percentage": 32.6, "elapsed_time": "0:07:38", "remaining_time": "0:15:48", "throughput": 2643.47, "total_tokens": 1212352}
|
|
{"current_steps": 2475, "total_steps": 7577, "loss": 0.0832, "lr": 1.7034323612860124e-06, "epoch": 0.3266464299854824, "percentage": 32.66, "elapsed_time": "0:07:38", "remaining_time": "0:15:46", "throughput": 2647.15, "total_tokens": 1214912}
|
|
{"current_steps": 2480, "total_steps": 7577, "loss": 0.1275, "lr": 1.7017932142979645e-06, "epoch": 0.3273063217632308, "percentage": 32.73, "elapsed_time": "0:07:39", "remaining_time": "0:15:43", "throughput": 2649.95, "total_tokens": 1217088}
|
|
{"current_steps": 2485, "total_steps": 7577, "loss": 0.2478, "lr": 1.700150343329866e-06, "epoch": 0.32796621354097927, "percentage": 32.8, "elapsed_time": "0:07:39", "remaining_time": "0:15:41", "throughput": 2653.47, "total_tokens": 1219584}
|
|
{"current_steps": 2490, "total_steps": 7577, "loss": 0.1182, "lr": 1.6985037570994113e-06, "epoch": 0.3286261053187277, "percentage": 32.86, "elapsed_time": "0:07:39", "remaining_time": "0:15:39", "throughput": 2657.55, "total_tokens": 1222336}
|
|
{"current_steps": 2495, "total_steps": 7577, "loss": 0.0913, "lr": 1.6968534643440088e-06, "epoch": 0.32928599709647616, "percentage": 32.93, "elapsed_time": "0:07:40", "remaining_time": "0:15:37", "throughput": 2661.07, "total_tokens": 1224832}
|
|
{"current_steps": 2500, "total_steps": 7577, "loss": 0.1998, "lr": 1.6951994738207364e-06, "epoch": 0.3299458888742246, "percentage": 32.99, "elapsed_time": "0:07:40", "remaining_time": "0:15:35", "throughput": 2664.73, "total_tokens": 1227392}
|
|
{"current_steps": 2505, "total_steps": 7577, "loss": 0.2007, "lr": 1.6935417943062928e-06, "epoch": 0.3306057806519731, "percentage": 33.06, "elapsed_time": "0:07:40", "remaining_time": "0:15:33", "throughput": 2668.38, "total_tokens": 1229952}
|
|
{"current_steps": 2510, "total_steps": 7577, "loss": 0.0615, "lr": 1.6918804345969516e-06, "epoch": 0.33126567242972155, "percentage": 33.13, "elapsed_time": "0:07:41", "remaining_time": "0:15:31", "throughput": 2672.28, "total_tokens": 1232640}
|
|
{"current_steps": 2515, "total_steps": 7577, "loss": 0.0479, "lr": 1.6902154035085156e-06, "epoch": 0.33192556420747, "percentage": 33.19, "elapsed_time": "0:07:41", "remaining_time": "0:15:29", "throughput": 2675.9, "total_tokens": 1235200}
|
|
{"current_steps": 2520, "total_steps": 7577, "loss": 0.1243, "lr": 1.688546709876269e-06, "epoch": 0.33258545598521844, "percentage": 33.26, "elapsed_time": "0:07:41", "remaining_time": "0:15:26", "throughput": 2679.28, "total_tokens": 1237632}
|
|
{"current_steps": 2525, "total_steps": 7577, "loss": 0.0662, "lr": 1.6868743625549314e-06, "epoch": 0.3332453477629669, "percentage": 33.32, "elapsed_time": "0:07:42", "remaining_time": "0:15:24", "throughput": 2682.38, "total_tokens": 1239936}
|
|
{"current_steps": 2530, "total_steps": 7577, "loss": 0.0455, "lr": 1.6851983704186092e-06, "epoch": 0.3339052395407153, "percentage": 33.39, "elapsed_time": "0:07:42", "remaining_time": "0:15:22", "throughput": 2685.61, "total_tokens": 1242304}
|
|
{"current_steps": 2535, "total_steps": 7577, "loss": 0.0018, "lr": 1.6835187423607503e-06, "epoch": 0.33456513131846377, "percentage": 33.46, "elapsed_time": "0:07:42", "remaining_time": "0:15:20", "throughput": 2688.97, "total_tokens": 1244736}
|
|
{"current_steps": 2540, "total_steps": 7577, "loss": 0.1863, "lr": 1.681835487294096e-06, "epoch": 0.3352250230962122, "percentage": 33.52, "elapsed_time": "0:07:43", "remaining_time": "0:15:18", "throughput": 2692.97, "total_tokens": 1247488}
|
|
{"current_steps": 2545, "total_steps": 7577, "loss": 0.229, "lr": 1.6801486141506342e-06, "epoch": 0.33588491487396066, "percentage": 33.59, "elapsed_time": "0:07:43", "remaining_time": "0:15:16", "throughput": 2696.58, "total_tokens": 1250048}
|
|
{"current_steps": 2550, "total_steps": 7577, "loss": 0.3541, "lr": 1.6784581318815514e-06, "epoch": 0.3365448066517091, "percentage": 33.65, "elapsed_time": "0:07:43", "remaining_time": "0:15:14", "throughput": 2700.83, "total_tokens": 1252928}
|
|
{"current_steps": 2555, "total_steps": 7577, "loss": 0.1596, "lr": 1.6767640494571849e-06, "epoch": 0.33720469842945755, "percentage": 33.72, "elapsed_time": "0:07:44", "remaining_time": "0:15:12", "throughput": 2704.45, "total_tokens": 1255488}
|
|
{"current_steps": 2560, "total_steps": 7577, "loss": 0.3643, "lr": 1.6750663758669767e-06, "epoch": 0.337864590207206, "percentage": 33.79, "elapsed_time": "0:07:44", "remaining_time": "0:15:10", "throughput": 2707.88, "total_tokens": 1257984}
|
|
{"current_steps": 2565, "total_steps": 7577, "loss": 0.1098, "lr": 1.6733651201194245e-06, "epoch": 0.3385244819849545, "percentage": 33.85, "elapsed_time": "0:07:44", "remaining_time": "0:15:08", "throughput": 2711.21, "total_tokens": 1260416}
|
|
{"current_steps": 2570, "total_steps": 7577, "loss": 0.1186, "lr": 1.6716602912420342e-06, "epoch": 0.33918437376270294, "percentage": 33.92, "elapsed_time": "0:07:45", "remaining_time": "0:15:06", "throughput": 2715.18, "total_tokens": 1263168}
|
|
{"current_steps": 2575, "total_steps": 7577, "loss": 0.1177, "lr": 1.6699518982812726e-06, "epoch": 0.3398442655404514, "percentage": 33.98, "elapsed_time": "0:07:45", "remaining_time": "0:15:04", "throughput": 2718.5, "total_tokens": 1265600}
|
|
{"current_steps": 2580, "total_steps": 7577, "loss": 0.0041, "lr": 1.6682399503025183e-06, "epoch": 0.34050415731819983, "percentage": 34.05, "elapsed_time": "0:07:45", "remaining_time": "0:15:02", "throughput": 2721.82, "total_tokens": 1268032}
|
|
{"current_steps": 2585, "total_steps": 7577, "loss": 0.1831, "lr": 1.666524456390014e-06, "epoch": 0.3411640490959483, "percentage": 34.12, "elapsed_time": "0:07:46", "remaining_time": "0:15:00", "throughput": 2724.86, "total_tokens": 1270336}
|
|
{"current_steps": 2590, "total_steps": 7577, "loss": 0.0639, "lr": 1.664805425646819e-06, "epoch": 0.3418239408736967, "percentage": 34.18, "elapsed_time": "0:07:46", "remaining_time": "0:14:58", "throughput": 2728.83, "total_tokens": 1273088}
|
|
{"current_steps": 2595, "total_steps": 7577, "loss": 0.2223, "lr": 1.6630828671947606e-06, "epoch": 0.34248383265144516, "percentage": 34.25, "elapsed_time": "0:07:46", "remaining_time": "0:14:56", "throughput": 2731.98, "total_tokens": 1275456}
|
|
{"current_steps": 2600, "total_steps": 7577, "loss": 0.0484, "lr": 1.6613567901743842e-06, "epoch": 0.3431437244291936, "percentage": 34.31, "elapsed_time": "0:07:47", "remaining_time": "0:14:54", "throughput": 2735.29, "total_tokens": 1277888}
|
|
{"current_steps": 2605, "total_steps": 7577, "loss": 0.0021, "lr": 1.6596272037449075e-06, "epoch": 0.34380361620694205, "percentage": 34.38, "elapsed_time": "0:07:47", "remaining_time": "0:14:52", "throughput": 2738.71, "total_tokens": 1280384}
|
|
{"current_steps": 2610, "total_steps": 7577, "loss": 0.0633, "lr": 1.6578941170841696e-06, "epoch": 0.3444635079846905, "percentage": 34.45, "elapsed_time": "0:07:47", "remaining_time": "0:14:50", "throughput": 2742.24, "total_tokens": 1282944}
|
|
{"current_steps": 2615, "total_steps": 7577, "loss": 0.0451, "lr": 1.6561575393885833e-06, "epoch": 0.34512339976243894, "percentage": 34.51, "elapsed_time": "0:07:48", "remaining_time": "0:14:48", "throughput": 2745.12, "total_tokens": 1285184}
|
|
{"current_steps": 2620, "total_steps": 7577, "loss": 0.2038, "lr": 1.6544174798730864e-06, "epoch": 0.3457832915401874, "percentage": 34.58, "elapsed_time": "0:07:48", "remaining_time": "0:14:46", "throughput": 2748.79, "total_tokens": 1287808}
|
|
{"current_steps": 2625, "total_steps": 7577, "loss": 0.1932, "lr": 1.6526739477710923e-06, "epoch": 0.34644318331793583, "percentage": 34.64, "elapsed_time": "0:07:48", "remaining_time": "0:14:44", "throughput": 2752.45, "total_tokens": 1290432}
|
|
{"current_steps": 2630, "total_steps": 7577, "loss": 0.2289, "lr": 1.650926952334441e-06, "epoch": 0.34710307509568433, "percentage": 34.71, "elapsed_time": "0:07:49", "remaining_time": "0:14:42", "throughput": 2755.45, "total_tokens": 1292736}
|
|
{"current_steps": 2635, "total_steps": 7577, "loss": 0.2947, "lr": 1.6491765028333516e-06, "epoch": 0.3477629668734328, "percentage": 34.78, "elapsed_time": "0:07:49", "remaining_time": "0:14:40", "throughput": 2758.58, "total_tokens": 1295104}
|
|
{"current_steps": 2640, "total_steps": 7577, "loss": 0.0444, "lr": 1.6474226085563693e-06, "epoch": 0.3484228586511812, "percentage": 34.84, "elapsed_time": "0:07:49", "remaining_time": "0:14:38", "throughput": 2761.97, "total_tokens": 1297600}
|
|
{"current_steps": 2645, "total_steps": 7577, "loss": 0.0057, "lr": 1.6456652788103215e-06, "epoch": 0.34908275042892967, "percentage": 34.91, "elapsed_time": "0:07:50", "remaining_time": "0:14:36", "throughput": 2765.62, "total_tokens": 1300224}
|
|
{"current_steps": 2650, "total_steps": 7577, "loss": 0.0494, "lr": 1.6439045229202631e-06, "epoch": 0.3497426422066781, "percentage": 34.97, "elapsed_time": "0:07:50", "remaining_time": "0:14:34", "throughput": 2768.6, "total_tokens": 1302528}
|
|
{"current_steps": 2653, "total_steps": 7577, "eval_loss": 0.13837853074073792, "epoch": 0.3501385772733272, "percentage": 35.01, "elapsed_time": "0:07:58", "remaining_time": "0:14:48", "throughput": 2724.81, "total_tokens": 1303872}
|
|
{"current_steps": 2655, "total_steps": 7577, "loss": 0.1541, "lr": 1.6421403502294307e-06, "epoch": 0.35040253398442656, "percentage": 35.04, "elapsed_time": "0:08:44", "remaining_time": "0:16:12", "throughput": 2487.41, "total_tokens": 1305024}
|
|
{"current_steps": 2660, "total_steps": 7577, "loss": 0.1878, "lr": 1.6403727700991915e-06, "epoch": 0.351062425762175, "percentage": 35.11, "elapsed_time": "0:08:44", "remaining_time": "0:16:10", "throughput": 2490.32, "total_tokens": 1307392}
|
|
{"current_steps": 2665, "total_steps": 7577, "loss": 0.273, "lr": 1.6386017919089933e-06, "epoch": 0.35172231753992345, "percentage": 35.17, "elapsed_time": "0:08:45", "remaining_time": "0:16:08", "throughput": 2493.7, "total_tokens": 1310016}
|
|
{"current_steps": 2670, "total_steps": 7577, "loss": 0.0471, "lr": 1.636827425056316e-06, "epoch": 0.3523822093176719, "percentage": 35.24, "elapsed_time": "0:08:45", "remaining_time": "0:16:06", "throughput": 2496.99, "total_tokens": 1312576}
|
|
{"current_steps": 2675, "total_steps": 7577, "loss": 0.1356, "lr": 1.635049678956621e-06, "epoch": 0.35304210109542034, "percentage": 35.3, "elapsed_time": "0:08:45", "remaining_time": "0:16:03", "throughput": 2500.17, "total_tokens": 1315072}
|
|
{"current_steps": 2680, "total_steps": 7577, "loss": 0.125, "lr": 1.633268563043301e-06, "epoch": 0.3537019928731688, "percentage": 35.37, "elapsed_time": "0:08:46", "remaining_time": "0:16:01", "throughput": 2503.24, "total_tokens": 1317504}
|
|
{"current_steps": 2685, "total_steps": 7577, "loss": 0.0021, "lr": 1.63148408676763e-06, "epoch": 0.3543618846509172, "percentage": 35.44, "elapsed_time": "0:08:46", "remaining_time": "0:15:59", "throughput": 2505.84, "total_tokens": 1319680}
|
|
{"current_steps": 2690, "total_steps": 7577, "loss": 0.001, "lr": 1.6296962595987141e-06, "epoch": 0.3550217764286657, "percentage": 35.5, "elapsed_time": "0:08:46", "remaining_time": "0:15:57", "throughput": 2509.13, "total_tokens": 1322240}
|
|
{"current_steps": 2695, "total_steps": 7577, "loss": 0.1102, "lr": 1.6279050910234392e-06, "epoch": 0.35568166820641417, "percentage": 35.57, "elapsed_time": "0:08:47", "remaining_time": "0:15:55", "throughput": 2512.31, "total_tokens": 1324736}
|
|
{"current_steps": 2700, "total_steps": 7577, "loss": 0.0617, "lr": 1.626110590546423e-06, "epoch": 0.3563415599841626, "percentage": 35.63, "elapsed_time": "0:08:47", "remaining_time": "0:15:53", "throughput": 2515.26, "total_tokens": 1327104}
|
|
{"current_steps": 2705, "total_steps": 7577, "loss": 0.2021, "lr": 1.6243127676899635e-06, "epoch": 0.35700145176191106, "percentage": 35.7, "elapsed_time": "0:08:47", "remaining_time": "0:15:50", "throughput": 2519.0, "total_tokens": 1329920}
|
|
{"current_steps": 2710, "total_steps": 7577, "loss": 0.2292, "lr": 1.6225116319939884e-06, "epoch": 0.3576613435396595, "percentage": 35.77, "elapsed_time": "0:08:48", "remaining_time": "0:15:48", "throughput": 2522.03, "total_tokens": 1332352}
|
|
{"current_steps": 2715, "total_steps": 7577, "loss": 0.1136, "lr": 1.6207071930160044e-06, "epoch": 0.35832123531740795, "percentage": 35.83, "elapsed_time": "0:08:48", "remaining_time": "0:15:46", "throughput": 2525.52, "total_tokens": 1335040}
|
|
{"current_steps": 2720, "total_steps": 7577, "loss": 0.0427, "lr": 1.6188994603310468e-06, "epoch": 0.3589811270951564, "percentage": 35.9, "elapsed_time": "0:08:48", "remaining_time": "0:15:44", "throughput": 2528.54, "total_tokens": 1337472}
|
|
{"current_steps": 2725, "total_steps": 7577, "loss": 0.158, "lr": 1.617088443531628e-06, "epoch": 0.35964101887290484, "percentage": 35.96, "elapsed_time": "0:08:49", "remaining_time": "0:15:42", "throughput": 2531.24, "total_tokens": 1339712}
|
|
{"current_steps": 2730, "total_steps": 7577, "loss": 0.0092, "lr": 1.6152741522276882e-06, "epoch": 0.3603009106506533, "percentage": 36.03, "elapsed_time": "0:08:49", "remaining_time": "0:15:40", "throughput": 2534.27, "total_tokens": 1342144}
|
|
{"current_steps": 2735, "total_steps": 7577, "loss": 0.0433, "lr": 1.6134565960465425e-06, "epoch": 0.36096080242840173, "percentage": 36.1, "elapsed_time": "0:08:49", "remaining_time": "0:15:38", "throughput": 2537.19, "total_tokens": 1344512}
|
|
{"current_steps": 2740, "total_steps": 7577, "loss": 0.1562, "lr": 1.6116357846328312e-06, "epoch": 0.3616206942061502, "percentage": 36.16, "elapsed_time": "0:08:50", "remaining_time": "0:15:36", "throughput": 2540.07, "total_tokens": 1346880}
|
|
{"current_steps": 2745, "total_steps": 7577, "loss": 0.0123, "lr": 1.609811727648468e-06, "epoch": 0.3622805859838986, "percentage": 36.23, "elapsed_time": "0:08:50", "remaining_time": "0:15:33", "throughput": 2542.6, "total_tokens": 1349056}
|
|
{"current_steps": 2750, "total_steps": 7577, "loss": 0.0425, "lr": 1.6079844347725882e-06, "epoch": 0.36294047776164706, "percentage": 36.29, "elapsed_time": "0:08:50", "remaining_time": "0:15:31", "throughput": 2545.59, "total_tokens": 1351488}
|
|
{"current_steps": 2755, "total_steps": 7577, "loss": 0.0654, "lr": 1.6061539157014987e-06, "epoch": 0.36360036953939556, "percentage": 36.36, "elapsed_time": "0:08:51", "remaining_time": "0:15:29", "throughput": 2548.57, "total_tokens": 1353920}
|
|
{"current_steps": 2760, "total_steps": 7577, "loss": 0.2615, "lr": 1.6043201801486257e-06, "epoch": 0.364260261317144, "percentage": 36.43, "elapsed_time": "0:08:51", "remaining_time": "0:15:27", "throughput": 2551.56, "total_tokens": 1356352}
|
|
{"current_steps": 2765, "total_steps": 7577, "loss": 0.1949, "lr": 1.6024832378444628e-06, "epoch": 0.36492015309489245, "percentage": 36.49, "elapsed_time": "0:08:51", "remaining_time": "0:15:25", "throughput": 2555.14, "total_tokens": 1359104}
|
|
{"current_steps": 2770, "total_steps": 7577, "loss": 0.3066, "lr": 1.6006430985365204e-06, "epoch": 0.3655800448726409, "percentage": 36.56, "elapsed_time": "0:08:52", "remaining_time": "0:15:23", "throughput": 2558.11, "total_tokens": 1361536}
|
|
{"current_steps": 2775, "total_steps": 7577, "loss": 0.2606, "lr": 1.5987997719892735e-06, "epoch": 0.36623993665038934, "percentage": 36.62, "elapsed_time": "0:08:52", "remaining_time": "0:15:21", "throughput": 2561.44, "total_tokens": 1364160}
|
|
{"current_steps": 2780, "total_steps": 7577, "loss": 0.005, "lr": 1.5969532679841088e-06, "epoch": 0.3668998284281378, "percentage": 36.69, "elapsed_time": "0:08:52", "remaining_time": "0:15:19", "throughput": 2564.54, "total_tokens": 1366656}
|
|
{"current_steps": 2785, "total_steps": 7577, "loss": 0.0512, "lr": 1.5951035963192752e-06, "epoch": 0.36755972020588623, "percentage": 36.76, "elapsed_time": "0:08:53", "remaining_time": "0:15:17", "throughput": 2567.76, "total_tokens": 1369216}
|
|
{"current_steps": 2790, "total_steps": 7577, "loss": 0.2619, "lr": 1.593250766809829e-06, "epoch": 0.3682196119836347, "percentage": 36.82, "elapsed_time": "0:08:53", "remaining_time": "0:15:15", "throughput": 2570.85, "total_tokens": 1371712}
|
|
{"current_steps": 2795, "total_steps": 7577, "loss": 0.1739, "lr": 1.5913947892875842e-06, "epoch": 0.3688795037613831, "percentage": 36.89, "elapsed_time": "0:08:53", "remaining_time": "0:15:13", "throughput": 2573.71, "total_tokens": 1374080}
|
|
{"current_steps": 2800, "total_steps": 7577, "loss": 0.1383, "lr": 1.589535673601059e-06, "epoch": 0.36953939553913157, "percentage": 36.95, "elapsed_time": "0:08:54", "remaining_time": "0:15:11", "throughput": 2577.59, "total_tokens": 1377024}
|
|
{"current_steps": 2805, "total_steps": 7577, "loss": 0.0587, "lr": 1.587673429615424e-06, "epoch": 0.37019928731688, "percentage": 37.02, "elapsed_time": "0:08:54", "remaining_time": "0:15:09", "throughput": 2580.44, "total_tokens": 1379392}
|
|
{"current_steps": 2810, "total_steps": 7577, "loss": 0.1913, "lr": 1.5858080672124495e-06, "epoch": 0.37085917909462845, "percentage": 37.09, "elapsed_time": "0:08:54", "remaining_time": "0:15:07", "throughput": 2583.3, "total_tokens": 1381760}
|
|
{"current_steps": 2815, "total_steps": 7577, "loss": 0.0931, "lr": 1.5839395962904536e-06, "epoch": 0.37151907087237696, "percentage": 37.15, "elapsed_time": "0:08:55", "remaining_time": "0:15:05", "throughput": 2586.15, "total_tokens": 1384128}
|
|
{"current_steps": 2820, "total_steps": 7577, "loss": 0.0094, "lr": 1.5820680267642494e-06, "epoch": 0.3721789626501254, "percentage": 37.22, "elapsed_time": "0:08:55", "remaining_time": "0:15:03", "throughput": 2588.98, "total_tokens": 1386496}
|
|
{"current_steps": 2825, "total_steps": 7577, "loss": 0.0489, "lr": 1.5801933685650917e-06, "epoch": 0.37283885442787384, "percentage": 37.28, "elapsed_time": "0:08:55", "remaining_time": "0:15:01", "throughput": 2591.6, "total_tokens": 1388736}
|
|
{"current_steps": 2830, "total_steps": 7577, "loss": 0.0478, "lr": 1.5783156316406259e-06, "epoch": 0.3734987462056223, "percentage": 37.35, "elapsed_time": "0:08:56", "remaining_time": "0:14:59", "throughput": 2594.33, "total_tokens": 1391040}
|
|
{"current_steps": 2835, "total_steps": 7577, "loss": 0.2375, "lr": 1.5764348259548334e-06, "epoch": 0.37415863798337073, "percentage": 37.42, "elapsed_time": "0:08:56", "remaining_time": "0:14:57", "throughput": 2597.06, "total_tokens": 1393344}
|
|
{"current_steps": 2840, "total_steps": 7577, "loss": 0.0726, "lr": 1.5745509614879806e-06, "epoch": 0.3748185297611192, "percentage": 37.48, "elapsed_time": "0:08:56", "remaining_time": "0:14:55", "throughput": 2599.78, "total_tokens": 1395648}
|
|
{"current_steps": 2845, "total_steps": 7577, "loss": 0.3055, "lr": 1.572664048236564e-06, "epoch": 0.3754784215388676, "percentage": 37.55, "elapsed_time": "0:08:57", "remaining_time": "0:14:53", "throughput": 2603.06, "total_tokens": 1398272}
|
|
{"current_steps": 2850, "total_steps": 7577, "loss": 0.0405, "lr": 1.570774096213259e-06, "epoch": 0.37613831331661607, "percentage": 37.61, "elapsed_time": "0:08:57", "remaining_time": "0:14:51", "throughput": 2605.75, "total_tokens": 1400576}
|
|
{"current_steps": 2855, "total_steps": 7577, "loss": 0.0015, "lr": 1.5688811154468649e-06, "epoch": 0.3767982050943645, "percentage": 37.68, "elapsed_time": "0:08:57", "remaining_time": "0:14:49", "throughput": 2608.91, "total_tokens": 1403136}
|
|
{"current_steps": 2860, "total_steps": 7577, "loss": 0.1948, "lr": 1.5669851159822532e-06, "epoch": 0.37745809687211296, "percentage": 37.75, "elapsed_time": "0:08:58", "remaining_time": "0:14:47", "throughput": 2611.72, "total_tokens": 1405504}
|
|
{"current_steps": 2865, "total_steps": 7577, "loss": 0.1405, "lr": 1.5650861078803137e-06, "epoch": 0.3781179886498614, "percentage": 37.81, "elapsed_time": "0:08:58", "remaining_time": "0:14:45", "throughput": 2614.42, "total_tokens": 1407808}
|
|
{"current_steps": 2870, "total_steps": 7577, "loss": 0.0803, "lr": 1.5631841012179013e-06, "epoch": 0.37877788042760985, "percentage": 37.88, "elapsed_time": "0:08:58", "remaining_time": "0:14:43", "throughput": 2617.45, "total_tokens": 1410304}
|
|
{"current_steps": 2875, "total_steps": 7577, "loss": 0.0026, "lr": 1.5612791060877818e-06, "epoch": 0.37943777220535835, "percentage": 37.94, "elapsed_time": "0:08:59", "remaining_time": "0:14:41", "throughput": 2620.38, "total_tokens": 1412736}
|
|
{"current_steps": 2880, "total_steps": 7577, "loss": 0.0403, "lr": 1.5593711325985801e-06, "epoch": 0.3800976639831068, "percentage": 38.01, "elapsed_time": "0:08:59", "remaining_time": "0:14:39", "throughput": 2623.87, "total_tokens": 1415488}
|
|
{"current_steps": 2885, "total_steps": 7577, "loss": 0.213, "lr": 1.5574601908747245e-06, "epoch": 0.38075755576085524, "percentage": 38.08, "elapsed_time": "0:08:59", "remaining_time": "0:14:37", "throughput": 2626.66, "total_tokens": 1417856}
|
|
{"current_steps": 2890, "total_steps": 7577, "loss": 0.077, "lr": 1.5555462910563936e-06, "epoch": 0.3814174475386037, "percentage": 38.14, "elapsed_time": "0:09:00", "remaining_time": "0:14:35", "throughput": 2629.23, "total_tokens": 1420096}
|
|
{"current_steps": 2895, "total_steps": 7577, "loss": 0.2334, "lr": 1.5536294432994636e-06, "epoch": 0.3820773393163521, "percentage": 38.21, "elapsed_time": "0:09:00", "remaining_time": "0:14:34", "throughput": 2632.37, "total_tokens": 1422656}
|
|
{"current_steps": 2900, "total_steps": 7577, "loss": 0.0744, "lr": 1.5517096577754528e-06, "epoch": 0.38273723109410057, "percentage": 38.27, "elapsed_time": "0:09:00", "remaining_time": "0:14:32", "throughput": 2635.39, "total_tokens": 1425152}
|
|
{"current_steps": 2905, "total_steps": 7577, "loss": 0.0393, "lr": 1.5497869446714695e-06, "epoch": 0.383397122871849, "percentage": 38.34, "elapsed_time": "0:09:01", "remaining_time": "0:14:30", "throughput": 2638.74, "total_tokens": 1427840}
|
|
{"current_steps": 2910, "total_steps": 7577, "loss": 0.0065, "lr": 1.5478613141901558e-06, "epoch": 0.38405701464959746, "percentage": 38.41, "elapsed_time": "0:09:01", "remaining_time": "0:14:28", "throughput": 2641.41, "total_tokens": 1430144}
|
|
{"current_steps": 2915, "total_steps": 7577, "loss": 0.1329, "lr": 1.5459327765496348e-06, "epoch": 0.3847169064273459, "percentage": 38.47, "elapsed_time": "0:09:01", "remaining_time": "0:14:26", "throughput": 2644.06, "total_tokens": 1432448}
|
|
{"current_steps": 2920, "total_steps": 7577, "loss": 0.0276, "lr": 1.5440013419834563e-06, "epoch": 0.38537679820509435, "percentage": 38.54, "elapsed_time": "0:09:02", "remaining_time": "0:14:24", "throughput": 2646.73, "total_tokens": 1434752}
|
|
{"current_steps": 2925, "total_steps": 7577, "loss": 0.0011, "lr": 1.5420670207405419e-06, "epoch": 0.3860366899828428, "percentage": 38.6, "elapsed_time": "0:09:02", "remaining_time": "0:14:22", "throughput": 2649.62, "total_tokens": 1437184}
|
|
{"current_steps": 2930, "total_steps": 7577, "loss": 0.0935, "lr": 1.5401298230851314e-06, "epoch": 0.38669658176059124, "percentage": 38.67, "elapsed_time": "0:09:02", "remaining_time": "0:14:20", "throughput": 2653.16, "total_tokens": 1440000}
|
|
{"current_steps": 2935, "total_steps": 7577, "loss": 0.0716, "lr": 1.5381897592967275e-06, "epoch": 0.3873564735383397, "percentage": 38.74, "elapsed_time": "0:09:03", "remaining_time": "0:14:18", "throughput": 2656.36, "total_tokens": 1442624}
|
|
{"current_steps": 2940, "total_steps": 7577, "loss": 0.0019, "lr": 1.5362468396700426e-06, "epoch": 0.3880163653160882, "percentage": 38.8, "elapsed_time": "0:09:03", "remaining_time": "0:14:17", "throughput": 2659.46, "total_tokens": 1445184}
|
|
{"current_steps": 2945, "total_steps": 7577, "loss": 0.2755, "lr": 1.5343010745149418e-06, "epoch": 0.38867625709383663, "percentage": 38.87, "elapsed_time": "0:09:03", "remaining_time": "0:14:15", "throughput": 2662.32, "total_tokens": 1447616}
|
|
{"current_steps": 2950, "total_steps": 7577, "loss": 0.0004, "lr": 1.532352474156391e-06, "epoch": 0.3893361488715851, "percentage": 38.93, "elapsed_time": "0:09:04", "remaining_time": "0:14:13", "throughput": 2665.39, "total_tokens": 1450176}
|
|
{"current_steps": 2955, "total_steps": 7577, "loss": 0.5725, "lr": 1.5304010489343995e-06, "epoch": 0.3899960406493335, "percentage": 39.0, "elapsed_time": "0:09:04", "remaining_time": "0:14:11", "throughput": 2668.37, "total_tokens": 1452672}
|
|
{"current_steps": 2960, "total_steps": 7577, "loss": 0.2012, "lr": 1.528446809203968e-06, "epoch": 0.39065593242708196, "percentage": 39.07, "elapsed_time": "0:09:04", "remaining_time": "0:14:09", "throughput": 2671.46, "total_tokens": 1455232}
|
|
{"current_steps": 2965, "total_steps": 7577, "loss": 0.1031, "lr": 1.526489765335031e-06, "epoch": 0.3913158242048304, "percentage": 39.13, "elapsed_time": "0:09:05", "remaining_time": "0:14:07", "throughput": 2674.52, "total_tokens": 1457792}
|
|
{"current_steps": 2970, "total_steps": 7577, "loss": 0.1237, "lr": 1.5245299277124026e-06, "epoch": 0.39197571598257885, "percentage": 39.2, "elapsed_time": "0:09:05", "remaining_time": "0:14:06", "throughput": 2677.26, "total_tokens": 1460160}
|
|
{"current_steps": 2975, "total_steps": 7577, "loss": 0.1705, "lr": 1.5225673067357218e-06, "epoch": 0.3926356077603273, "percentage": 39.26, "elapsed_time": "0:09:05", "remaining_time": "0:14:04", "throughput": 2679.77, "total_tokens": 1462400}
|
|
{"current_steps": 2980, "total_steps": 7577, "loss": 0.1399, "lr": 1.5206019128193981e-06, "epoch": 0.39329549953807574, "percentage": 39.33, "elapsed_time": "0:09:06", "remaining_time": "0:14:02", "throughput": 2683.06, "total_tokens": 1465088}
|
|
{"current_steps": 2985, "total_steps": 7577, "loss": 0.167, "lr": 1.5186337563925538e-06, "epoch": 0.3939553913158242, "percentage": 39.4, "elapsed_time": "0:09:06", "remaining_time": "0:14:00", "throughput": 2685.8, "total_tokens": 1467456}
|
|
{"current_steps": 2990, "total_steps": 7577, "loss": 0.001, "lr": 1.516662847898971e-06, "epoch": 0.39461528309357263, "percentage": 39.46, "elapsed_time": "0:09:06", "remaining_time": "0:13:58", "throughput": 2688.85, "total_tokens": 1470016}
|
|
{"current_steps": 2995, "total_steps": 7577, "loss": 0.0936, "lr": 1.5146891977970349e-06, "epoch": 0.3952751748713211, "percentage": 39.53, "elapsed_time": "0:09:07", "remaining_time": "0:13:56", "throughput": 2691.7, "total_tokens": 1472448}
|
|
{"current_steps": 3000, "total_steps": 7577, "loss": 0.0271, "lr": 1.5127128165596794e-06, "epoch": 0.3959350666490696, "percentage": 39.59, "elapsed_time": "0:09:07", "remaining_time": "0:13:55", "throughput": 2694.86, "total_tokens": 1475072}
|
|
{"current_steps": 3005, "total_steps": 7577, "loss": 0.0571, "lr": 1.51073371467433e-06, "epoch": 0.396594958426818, "percentage": 39.66, "elapsed_time": "0:09:07", "remaining_time": "0:13:53", "throughput": 2697.58, "total_tokens": 1477440}
|
|
{"current_steps": 3010, "total_steps": 7577, "loss": 0.027, "lr": 1.5087519026428498e-06, "epoch": 0.39725485020456647, "percentage": 39.73, "elapsed_time": "0:09:08", "remaining_time": "0:13:51", "throughput": 2700.36, "total_tokens": 1479872}
|
|
{"current_steps": 3015, "total_steps": 7577, "loss": 0.2218, "lr": 1.5067673909814818e-06, "epoch": 0.3979147419823149, "percentage": 39.79, "elapsed_time": "0:09:08", "remaining_time": "0:13:49", "throughput": 2702.51, "total_tokens": 1481920}
|
|
{"current_steps": 3020, "total_steps": 7577, "loss": 0.1557, "lr": 1.5047801902207953e-06, "epoch": 0.39857463376006336, "percentage": 39.86, "elapsed_time": "0:09:08", "remaining_time": "0:13:47", "throughput": 2706.43, "total_tokens": 1484992}
|
|
{"current_steps": 3025, "total_steps": 7577, "loss": 0.2134, "lr": 1.5027903109056288e-06, "epoch": 0.3992345255378118, "percentage": 39.92, "elapsed_time": "0:09:09", "remaining_time": "0:13:46", "throughput": 2708.9, "total_tokens": 1487232}
|
|
{"current_steps": 3030, "total_steps": 7577, "loss": 0.1412, "lr": 1.5007977635950336e-06, "epoch": 0.39989441731556025, "percentage": 39.99, "elapsed_time": "0:09:09", "remaining_time": "0:13:44", "throughput": 2711.82, "total_tokens": 1489728}
|
|
{"current_steps": 3032, "total_steps": 7577, "eval_loss": 0.13093648850917816, "epoch": 0.4001583740266596, "percentage": 40.02, "elapsed_time": "0:09:17", "remaining_time": "0:13:55", "throughput": 2674.27, "total_tokens": 1490688}
|
|
{"current_steps": 3035, "total_steps": 7577, "loss": 0.2057, "lr": 1.498802558862219e-06, "epoch": 0.4005543090933087, "percentage": 40.06, "elapsed_time": "0:09:40", "remaining_time": "0:14:28", "throughput": 2571.02, "total_tokens": 1491968}
|
|
{"current_steps": 3040, "total_steps": 7577, "loss": 0.1003, "lr": 1.496804707294496e-06, "epoch": 0.40121420087105714, "percentage": 40.12, "elapsed_time": "0:09:40", "remaining_time": "0:14:26", "throughput": 2573.61, "total_tokens": 1494336}
|
|
{"current_steps": 3045, "total_steps": 7577, "loss": 0.065, "lr": 1.4948042194932195e-06, "epoch": 0.4018740926488056, "percentage": 40.19, "elapsed_time": "0:09:40", "remaining_time": "0:14:24", "throughput": 2577.47, "total_tokens": 1497472}
|
|
{"current_steps": 3050, "total_steps": 7577, "loss": 0.0242, "lr": 1.4928011060737341e-06, "epoch": 0.402533984426554, "percentage": 40.25, "elapsed_time": "0:09:41", "remaining_time": "0:14:22", "throughput": 2580.29, "total_tokens": 1499968}
|
|
{"current_steps": 3055, "total_steps": 7577, "loss": 0.0811, "lr": 1.4907953776653171e-06, "epoch": 0.40319387620430247, "percentage": 40.32, "elapsed_time": "0:09:41", "remaining_time": "0:14:20", "throughput": 2582.91, "total_tokens": 1502336}
|
|
{"current_steps": 3060, "total_steps": 7577, "loss": 0.1462, "lr": 1.4887870449111206e-06, "epoch": 0.40385376798205097, "percentage": 40.39, "elapsed_time": "0:09:41", "remaining_time": "0:14:19", "throughput": 2585.31, "total_tokens": 1504576}
|
|
{"current_steps": 3065, "total_steps": 7577, "loss": 0.2155, "lr": 1.486776118468118e-06, "epoch": 0.4045136597597994, "percentage": 40.45, "elapsed_time": "0:09:42", "remaining_time": "0:14:17", "throughput": 2588.24, "total_tokens": 1507136}
|
|
{"current_steps": 3070, "total_steps": 7577, "loss": 0.0479, "lr": 1.4847626090070451e-06, "epoch": 0.40517355153754786, "percentage": 40.52, "elapsed_time": "0:09:42", "remaining_time": "0:14:15", "throughput": 2591.18, "total_tokens": 1509696}
|
|
{"current_steps": 3075, "total_steps": 7577, "loss": 0.3045, "lr": 1.4827465272123439e-06, "epoch": 0.4058334433152963, "percentage": 40.58, "elapsed_time": "0:09:42", "remaining_time": "0:14:13", "throughput": 2594.01, "total_tokens": 1512192}
|
|
{"current_steps": 3080, "total_steps": 7577, "loss": 0.0134, "lr": 1.4807278837821063e-06, "epoch": 0.40649333509304475, "percentage": 40.65, "elapsed_time": "0:09:43", "remaining_time": "0:14:11", "throughput": 2596.94, "total_tokens": 1514752}
|
|
{"current_steps": 3085, "total_steps": 7577, "loss": 0.3, "lr": 1.4787066894280178e-06, "epoch": 0.4071532268707932, "percentage": 40.72, "elapsed_time": "0:09:43", "remaining_time": "0:14:09", "throughput": 2600.06, "total_tokens": 1517440}
|
|
{"current_steps": 3090, "total_steps": 7577, "loss": 0.0679, "lr": 1.476682954875299e-06, "epoch": 0.40781311864854164, "percentage": 40.78, "elapsed_time": "0:09:43", "remaining_time": "0:14:07", "throughput": 2602.57, "total_tokens": 1519744}
|
|
{"current_steps": 3095, "total_steps": 7577, "loss": 0.1745, "lr": 1.4746566908626506e-06, "epoch": 0.4084730104262901, "percentage": 40.85, "elapsed_time": "0:09:44", "remaining_time": "0:14:06", "throughput": 2605.26, "total_tokens": 1522176}
|
|
{"current_steps": 3100, "total_steps": 7577, "loss": 0.0519, "lr": 1.4726279081421956e-06, "epoch": 0.40913290220403853, "percentage": 40.91, "elapsed_time": "0:09:44", "remaining_time": "0:14:04", "throughput": 2607.56, "total_tokens": 1524352}
|
|
{"current_steps": 3105, "total_steps": 7577, "loss": 0.2616, "lr": 1.4705966174794216e-06, "epoch": 0.409792793981787, "percentage": 40.98, "elapsed_time": "0:09:44", "remaining_time": "0:14:02", "throughput": 2610.53, "total_tokens": 1526976}
|
|
{"current_steps": 3110, "total_steps": 7577, "loss": 0.1047, "lr": 1.4685628296531248e-06, "epoch": 0.4104526857595354, "percentage": 41.05, "elapsed_time": "0:09:45", "remaining_time": "0:14:00", "throughput": 2612.78, "total_tokens": 1529152}
|
|
{"current_steps": 3115, "total_steps": 7577, "loss": 0.0288, "lr": 1.466526555455352e-06, "epoch": 0.41111257753728386, "percentage": 41.11, "elapsed_time": "0:09:45", "remaining_time": "0:13:58", "throughput": 2615.53, "total_tokens": 1531648}
|
|
{"current_steps": 3120, "total_steps": 7577, "loss": 0.0023, "lr": 1.4644878056913432e-06, "epoch": 0.4117724693150323, "percentage": 41.18, "elapsed_time": "0:09:45", "remaining_time": "0:13:57", "throughput": 2617.96, "total_tokens": 1533952}
|
|
{"current_steps": 3125, "total_steps": 7577, "loss": 0.165, "lr": 1.4624465911794764e-06, "epoch": 0.4124323610927808, "percentage": 41.24, "elapsed_time": "0:09:46", "remaining_time": "0:13:55", "throughput": 2621.04, "total_tokens": 1536640}
|
|
{"current_steps": 3130, "total_steps": 7577, "loss": 0.0024, "lr": 1.4604029227512062e-06, "epoch": 0.41309225287052925, "percentage": 41.31, "elapsed_time": "0:09:46", "remaining_time": "0:13:53", "throughput": 2623.92, "total_tokens": 1539200}
|
|
{"current_steps": 3135, "total_steps": 7577, "loss": 0.1984, "lr": 1.4583568112510108e-06, "epoch": 0.4137521446482777, "percentage": 41.38, "elapsed_time": "0:09:46", "remaining_time": "0:13:51", "throughput": 2626.61, "total_tokens": 1541632}
|
|
{"current_steps": 3140, "total_steps": 7577, "loss": 0.0697, "lr": 1.4563082675363302e-06, "epoch": 0.41441203642602614, "percentage": 41.44, "elapsed_time": "0:09:47", "remaining_time": "0:13:49", "throughput": 2629.4, "total_tokens": 1544128}
|
|
{"current_steps": 3145, "total_steps": 7577, "loss": 0.0009, "lr": 1.4542573024775122e-06, "epoch": 0.4150719282037746, "percentage": 41.51, "elapsed_time": "0:09:47", "remaining_time": "0:13:48", "throughput": 2631.75, "total_tokens": 1546368}
|
|
{"current_steps": 3150, "total_steps": 7577, "loss": 0.3214, "lr": 1.4522039269577521e-06, "epoch": 0.41573181998152303, "percentage": 41.57, "elapsed_time": "0:09:47", "remaining_time": "0:13:46", "throughput": 2634.32, "total_tokens": 1548736}
|
|
{"current_steps": 3155, "total_steps": 7577, "loss": 0.223, "lr": 1.4501481518730372e-06, "epoch": 0.4163917117592715, "percentage": 41.64, "elapsed_time": "0:09:48", "remaining_time": "0:13:44", "throughput": 2636.99, "total_tokens": 1551168}
|
|
{"current_steps": 3160, "total_steps": 7577, "loss": 0.0751, "lr": 1.4480899881320868e-06, "epoch": 0.4170516035370199, "percentage": 41.71, "elapsed_time": "0:09:48", "remaining_time": "0:13:42", "throughput": 2639.75, "total_tokens": 1553664}
|
|
{"current_steps": 3165, "total_steps": 7577, "loss": 0.1681, "lr": 1.4460294466562956e-06, "epoch": 0.41771149531476837, "percentage": 41.77, "elapsed_time": "0:09:48", "remaining_time": "0:13:40", "throughput": 2642.21, "total_tokens": 1555968}
|
|
{"current_steps": 3170, "total_steps": 7577, "loss": 0.0974, "lr": 1.4439665383796756e-06, "epoch": 0.4183713870925168, "percentage": 41.84, "elapsed_time": "0:09:49", "remaining_time": "0:13:39", "throughput": 2644.57, "total_tokens": 1558208}
|
|
{"current_steps": 3175, "total_steps": 7577, "loss": 0.003, "lr": 1.4419012742487972e-06, "epoch": 0.41903127887026526, "percentage": 41.9, "elapsed_time": "0:09:49", "remaining_time": "0:13:37", "throughput": 2647.22, "total_tokens": 1560640}
|
|
{"current_steps": 3180, "total_steps": 7577, "loss": 0.0019, "lr": 1.4398336652227335e-06, "epoch": 0.4196911706480137, "percentage": 41.97, "elapsed_time": "0:09:49", "remaining_time": "0:13:35", "throughput": 2650.3, "total_tokens": 1563328}
|
|
{"current_steps": 3185, "total_steps": 7577, "loss": 0.1522, "lr": 1.4377637222729986e-06, "epoch": 0.4203510624257622, "percentage": 42.04, "elapsed_time": "0:09:50", "remaining_time": "0:13:33", "throughput": 2652.83, "total_tokens": 1565696}
|
|
{"current_steps": 3190, "total_steps": 7577, "loss": 0.1285, "lr": 1.435691456383493e-06, "epoch": 0.42101095420351065, "percentage": 42.1, "elapsed_time": "0:09:50", "remaining_time": "0:13:32", "throughput": 2656.3, "total_tokens": 1568640}
|
|
{"current_steps": 3195, "total_steps": 7577, "loss": 0.128, "lr": 1.433616878550442e-06, "epoch": 0.4216708459812591, "percentage": 42.17, "elapsed_time": "0:09:50", "remaining_time": "0:13:30", "throughput": 2659.34, "total_tokens": 1571328}
|
|
{"current_steps": 3200, "total_steps": 7577, "loss": 0.3784, "lr": 1.4315399997823403e-06, "epoch": 0.42233073775900754, "percentage": 42.23, "elapsed_time": "0:09:51", "remaining_time": "0:13:28", "throughput": 2662.38, "total_tokens": 1574016}
|
|
{"current_steps": 3205, "total_steps": 7577, "loss": 0.2654, "lr": 1.429460831099891e-06, "epoch": 0.422990629536756, "percentage": 42.3, "elapsed_time": "0:09:51", "remaining_time": "0:13:26", "throughput": 2664.9, "total_tokens": 1576384}
|
|
{"current_steps": 3210, "total_steps": 7577, "loss": 0.2098, "lr": 1.4273793835359492e-06, "epoch": 0.4236505213145044, "percentage": 42.37, "elapsed_time": "0:09:51", "remaining_time": "0:13:25", "throughput": 2668.13, "total_tokens": 1579200}
|
|
{"current_steps": 3215, "total_steps": 7577, "loss": 0.1142, "lr": 1.4252956681354631e-06, "epoch": 0.42431041309225287, "percentage": 42.43, "elapsed_time": "0:09:52", "remaining_time": "0:13:23", "throughput": 2670.75, "total_tokens": 1581632}
|
|
{"current_steps": 3220, "total_steps": 7577, "loss": 0.0092, "lr": 1.4232096959554135e-06, "epoch": 0.4249703048700013, "percentage": 42.5, "elapsed_time": "0:09:52", "remaining_time": "0:13:21", "throughput": 2673.37, "total_tokens": 1584064}
|
|
{"current_steps": 3225, "total_steps": 7577, "loss": 0.0457, "lr": 1.4211214780647572e-06, "epoch": 0.42563019664774976, "percentage": 42.56, "elapsed_time": "0:09:52", "remaining_time": "0:13:20", "throughput": 2676.41, "total_tokens": 1586752}
|
|
{"current_steps": 3230, "total_steps": 7577, "loss": 0.0764, "lr": 1.4190310255443676e-06, "epoch": 0.4262900884254982, "percentage": 42.63, "elapsed_time": "0:09:53", "remaining_time": "0:13:18", "throughput": 2679.15, "total_tokens": 1589248}
|
|
{"current_steps": 3235, "total_steps": 7577, "loss": 0.0141, "lr": 1.4169383494869764e-06, "epoch": 0.42694998020324665, "percentage": 42.69, "elapsed_time": "0:09:53", "remaining_time": "0:13:16", "throughput": 2681.56, "total_tokens": 1591552}
|
|
{"current_steps": 3240, "total_steps": 7577, "loss": 0.051, "lr": 1.414843460997113e-06, "epoch": 0.4276098719809951, "percentage": 42.76, "elapsed_time": "0:09:53", "remaining_time": "0:13:14", "throughput": 2684.28, "total_tokens": 1594048}
|
|
{"current_steps": 3245, "total_steps": 7577, "loss": 0.1432, "lr": 1.4127463711910483e-06, "epoch": 0.4282697637587436, "percentage": 42.83, "elapsed_time": "0:09:54", "remaining_time": "0:13:13", "throughput": 2686.97, "total_tokens": 1596544}
|
|
{"current_steps": 3250, "total_steps": 7577, "loss": 0.1601, "lr": 1.410647091196733e-06, "epoch": 0.42892965553649204, "percentage": 42.89, "elapsed_time": "0:09:54", "remaining_time": "0:13:11", "throughput": 2689.8, "total_tokens": 1599104}
|
|
{"current_steps": 3255, "total_steps": 7577, "loss": 0.1324, "lr": 1.4085456321537402e-06, "epoch": 0.4295895473142405, "percentage": 42.96, "elapsed_time": "0:09:54", "remaining_time": "0:13:09", "throughput": 2692.1, "total_tokens": 1601344}
|
|
{"current_steps": 3260, "total_steps": 7577, "loss": 0.2152, "lr": 1.4064420052132056e-06, "epoch": 0.43024943909198893, "percentage": 43.02, "elapsed_time": "0:09:55", "remaining_time": "0:13:08", "throughput": 2695.01, "total_tokens": 1603968}
|
|
{"current_steps": 3265, "total_steps": 7577, "loss": 0.0317, "lr": 1.4043362215377696e-06, "epoch": 0.4309093308697374, "percentage": 43.09, "elapsed_time": "0:09:55", "remaining_time": "0:13:06", "throughput": 2697.63, "total_tokens": 1606400}
|
|
{"current_steps": 3270, "total_steps": 7577, "loss": 0.1258, "lr": 1.4022282923015158e-06, "epoch": 0.4315692226474858, "percentage": 43.16, "elapsed_time": "0:09:55", "remaining_time": "0:13:04", "throughput": 2700.42, "total_tokens": 1608960}
|
|
{"current_steps": 3275, "total_steps": 7577, "loss": 0.0026, "lr": 1.4001182286899136e-06, "epoch": 0.43222911442523426, "percentage": 43.22, "elapsed_time": "0:09:56", "remaining_time": "0:13:03", "throughput": 2703.14, "total_tokens": 1611456}
|
|
{"current_steps": 3280, "total_steps": 7577, "loss": 0.0702, "lr": 1.398006041899758e-06, "epoch": 0.4328890062029827, "percentage": 43.29, "elapsed_time": "0:09:56", "remaining_time": "0:13:01", "throughput": 2705.84, "total_tokens": 1613952}
|
|
{"current_steps": 3285, "total_steps": 7577, "loss": 0.1748, "lr": 1.3958917431391102e-06, "epoch": 0.43354889798073115, "percentage": 43.35, "elapsed_time": "0:09:56", "remaining_time": "0:12:59", "throughput": 2708.35, "total_tokens": 1616320}
|
|
{"current_steps": 3290, "total_steps": 7577, "loss": 0.1856, "lr": 1.3937753436272388e-06, "epoch": 0.4342087897584796, "percentage": 43.42, "elapsed_time": "0:09:57", "remaining_time": "0:12:58", "throughput": 2711.55, "total_tokens": 1619136}
|
|
{"current_steps": 3295, "total_steps": 7577, "loss": 0.118, "lr": 1.3916568545945597e-06, "epoch": 0.43486868153622804, "percentage": 43.49, "elapsed_time": "0:09:57", "remaining_time": "0:12:56", "throughput": 2714.25, "total_tokens": 1621632}
|
|
{"current_steps": 3300, "total_steps": 7577, "loss": 0.1392, "lr": 1.3895362872825764e-06, "epoch": 0.4355285733139765, "percentage": 43.55, "elapsed_time": "0:09:57", "remaining_time": "0:12:54", "throughput": 2716.84, "total_tokens": 1624064}
|
|
{"current_steps": 3305, "total_steps": 7577, "loss": 0.0494, "lr": 1.3874136529438205e-06, "epoch": 0.43618846509172493, "percentage": 43.62, "elapsed_time": "0:09:58", "remaining_time": "0:12:53", "throughput": 2719.44, "total_tokens": 1626496}
|
|
{"current_steps": 3310, "total_steps": 7577, "loss": 0.1045, "lr": 1.3852889628417918e-06, "epoch": 0.43684835686947343, "percentage": 43.68, "elapsed_time": "0:09:58", "remaining_time": "0:12:51", "throughput": 2721.82, "total_tokens": 1628800}
|
|
{"current_steps": 3315, "total_steps": 7577, "loss": 0.104, "lr": 1.3831622282508994e-06, "epoch": 0.4375082486472219, "percentage": 43.75, "elapsed_time": "0:09:58", "remaining_time": "0:12:49", "throughput": 2724.41, "total_tokens": 1631232}
|
|
{"current_steps": 3320, "total_steps": 7577, "loss": 0.0011, "lr": 1.3810334604564007e-06, "epoch": 0.4381681404249703, "percentage": 43.82, "elapsed_time": "0:09:59", "remaining_time": "0:12:48", "throughput": 2727.09, "total_tokens": 1633728}
|
|
{"current_steps": 3325, "total_steps": 7577, "loss": 0.0735, "lr": 1.3789026707543423e-06, "epoch": 0.43882803220271877, "percentage": 43.88, "elapsed_time": "0:09:59", "remaining_time": "0:12:46", "throughput": 2729.78, "total_tokens": 1636224}
|
|
{"current_steps": 3330, "total_steps": 7577, "loss": 0.063, "lr": 1.3767698704514998e-06, "epoch": 0.4394879239804672, "percentage": 43.95, "elapsed_time": "0:09:59", "remaining_time": "0:12:44", "throughput": 2731.71, "total_tokens": 1638272}
|
|
{"current_steps": 3335, "total_steps": 7577, "loss": 0.1941, "lr": 1.3746350708653175e-06, "epoch": 0.44014781575821565, "percentage": 44.01, "elapsed_time": "0:10:00", "remaining_time": "0:12:43", "throughput": 2733.96, "total_tokens": 1640512}
|
|
{"current_steps": 3340, "total_steps": 7577, "loss": 0.2224, "lr": 1.3724982833238495e-06, "epoch": 0.4408077075359641, "percentage": 44.08, "elapsed_time": "0:10:00", "remaining_time": "0:12:41", "throughput": 2736.52, "total_tokens": 1642944}
|
|
{"current_steps": 3345, "total_steps": 7577, "loss": 0.1334, "lr": 1.370359519165697e-06, "epoch": 0.44146759931371254, "percentage": 44.15, "elapsed_time": "0:10:00", "remaining_time": "0:12:39", "throughput": 2739.08, "total_tokens": 1645376}
|
|
{"current_steps": 3350, "total_steps": 7577, "loss": 0.0429, "lr": 1.368218789739952e-06, "epoch": 0.442127491091461, "percentage": 44.21, "elapsed_time": "0:10:01", "remaining_time": "0:12:38", "throughput": 2741.86, "total_tokens": 1647936}
|
|
{"current_steps": 3355, "total_steps": 7577, "loss": 0.1404, "lr": 1.3660761064061337e-06, "epoch": 0.44278738286920943, "percentage": 44.28, "elapsed_time": "0:10:01", "remaining_time": "0:12:36", "throughput": 2744.6, "total_tokens": 1650496}
|
|
{"current_steps": 3360, "total_steps": 7577, "loss": 0.1065, "lr": 1.3639314805341297e-06, "epoch": 0.4434472746469579, "percentage": 44.34, "elapsed_time": "0:10:01", "remaining_time": "0:12:35", "throughput": 2747.25, "total_tokens": 1652992}
|
|
{"current_steps": 3365, "total_steps": 7577, "loss": 0.0935, "lr": 1.3617849235041355e-06, "epoch": 0.4441071664247063, "percentage": 44.41, "elapsed_time": "0:10:02", "remaining_time": "0:12:33", "throughput": 2749.9, "total_tokens": 1655488}
|
|
{"current_steps": 3370, "total_steps": 7577, "loss": 0.1084, "lr": 1.3596364467065938e-06, "epoch": 0.4447670582024548, "percentage": 44.48, "elapsed_time": "0:10:02", "remaining_time": "0:12:31", "throughput": 2752.54, "total_tokens": 1657984}
|
|
{"current_steps": 3375, "total_steps": 7577, "loss": 0.2514, "lr": 1.3574860615421346e-06, "epoch": 0.44542694998020327, "percentage": 44.54, "elapsed_time": "0:10:02", "remaining_time": "0:12:30", "throughput": 2755.56, "total_tokens": 1660736}
|
|
{"current_steps": 3380, "total_steps": 7577, "loss": 0.1354, "lr": 1.3553337794215147e-06, "epoch": 0.4460868417579517, "percentage": 44.61, "elapsed_time": "0:10:03", "remaining_time": "0:12:28", "throughput": 2757.99, "total_tokens": 1663104}
|
|
{"current_steps": 3385, "total_steps": 7577, "loss": 0.1002, "lr": 1.3531796117655565e-06, "epoch": 0.44674673353570016, "percentage": 44.67, "elapsed_time": "0:10:03", "remaining_time": "0:12:27", "throughput": 2760.19, "total_tokens": 1665344}
|
|
{"current_steps": 3390, "total_steps": 7577, "loss": 0.156, "lr": 1.3510235700050873e-06, "epoch": 0.4474066253134486, "percentage": 44.74, "elapsed_time": "0:10:03", "remaining_time": "0:12:25", "throughput": 2763.22, "total_tokens": 1668096}
|
|
{"current_steps": 3395, "total_steps": 7577, "loss": 0.0959, "lr": 1.3488656655808801e-06, "epoch": 0.44806651709119705, "percentage": 44.81, "elapsed_time": "0:10:04", "remaining_time": "0:12:24", "throughput": 2765.34, "total_tokens": 1670272}
|
|
{"current_steps": 3400, "total_steps": 7577, "loss": 0.0913, "lr": 1.3467059099435912e-06, "epoch": 0.4487264088689455, "percentage": 44.87, "elapsed_time": "0:10:04", "remaining_time": "0:12:22", "throughput": 2767.44, "total_tokens": 1672448}
|
|
{"current_steps": 3405, "total_steps": 7577, "loss": 0.0055, "lr": 1.3445443145537002e-06, "epoch": 0.44938630064669394, "percentage": 44.94, "elapsed_time": "0:10:04", "remaining_time": "0:12:20", "throughput": 2770.45, "total_tokens": 1675200}
|
|
{"current_steps": 3410, "total_steps": 7577, "loss": 0.0509, "lr": 1.3423808908814494e-06, "epoch": 0.4500461924244424, "percentage": 45.0, "elapsed_time": "0:10:05", "remaining_time": "0:12:19", "throughput": 2773.05, "total_tokens": 1677696}
|
|
{"current_steps": 3411, "total_steps": 7577, "eval_loss": 0.11581598222255707, "epoch": 0.45017817077999206, "percentage": 45.02, "elapsed_time": "0:10:12", "remaining_time": "0:12:28", "throughput": 2738.16, "total_tokens": 1678208}
|
|
{"current_steps": 3415, "total_steps": 7577, "loss": 0.0982, "lr": 1.3402156504067826e-06, "epoch": 0.4507060842021908, "percentage": 45.07, "elapsed_time": "0:11:10", "remaining_time": "0:13:37", "throughput": 2505.28, "total_tokens": 1680256}
|
|
{"current_steps": 3420, "total_steps": 7577, "loss": 0.0816, "lr": 1.338048604619284e-06, "epoch": 0.45136597597993927, "percentage": 45.14, "elapsed_time": "0:11:11", "remaining_time": "0:13:35", "throughput": 2507.57, "total_tokens": 1682624}
|
|
{"current_steps": 3425, "total_steps": 7577, "loss": 0.0548, "lr": 1.3358797650181178e-06, "epoch": 0.4520258677576877, "percentage": 45.2, "elapsed_time": "0:11:11", "remaining_time": "0:13:33", "throughput": 2509.96, "total_tokens": 1685056}
|
|
{"current_steps": 3430, "total_steps": 7577, "loss": 0.1743, "lr": 1.3337091431119662e-06, "epoch": 0.45268575953543616, "percentage": 45.27, "elapsed_time": "0:11:11", "remaining_time": "0:13:32", "throughput": 2511.89, "total_tokens": 1687168}
|
|
{"current_steps": 3435, "total_steps": 7577, "loss": 0.263, "lr": 1.3315367504189698e-06, "epoch": 0.45334565131318466, "percentage": 45.33, "elapsed_time": "0:11:11", "remaining_time": "0:13:30", "throughput": 2513.72, "total_tokens": 1689216}
|
|
{"current_steps": 3440, "total_steps": 7577, "loss": 0.0704, "lr": 1.3293625984666656e-06, "epoch": 0.4540055430909331, "percentage": 45.4, "elapsed_time": "0:11:12", "remaining_time": "0:13:28", "throughput": 2516.27, "total_tokens": 1691776}
|
|
{"current_steps": 3445, "total_steps": 7577, "loss": 0.0008, "lr": 1.3271866987919254e-06, "epoch": 0.45466543486868155, "percentage": 45.47, "elapsed_time": "0:11:12", "remaining_time": "0:13:26", "throughput": 2518.84, "total_tokens": 1694336}
|
|
{"current_steps": 3450, "total_steps": 7577, "loss": 0.2066, "lr": 1.325009062940895e-06, "epoch": 0.45532532664643, "percentage": 45.53, "elapsed_time": "0:11:12", "remaining_time": "0:13:25", "throughput": 2521.03, "total_tokens": 1696640}
|
|
{"current_steps": 3455, "total_steps": 7577, "loss": 0.0826, "lr": 1.3228297024689336e-06, "epoch": 0.45598521842417844, "percentage": 45.6, "elapsed_time": "0:11:13", "remaining_time": "0:13:23", "throughput": 2523.13, "total_tokens": 1698880}
|
|
{"current_steps": 3460, "total_steps": 7577, "loss": 0.1221, "lr": 1.3206486289405519e-06, "epoch": 0.4566451102019269, "percentage": 45.66, "elapsed_time": "0:11:13", "remaining_time": "0:13:21", "throughput": 2525.51, "total_tokens": 1701312}
|
|
{"current_steps": 3465, "total_steps": 7577, "loss": 0.1446, "lr": 1.3184658539293496e-06, "epoch": 0.45730500197967533, "percentage": 45.73, "elapsed_time": "0:11:13", "remaining_time": "0:13:19", "throughput": 2527.98, "total_tokens": 1703808}
|
|
{"current_steps": 3470, "total_steps": 7577, "loss": 0.132, "lr": 1.3162813890179564e-06, "epoch": 0.4579648937574238, "percentage": 45.8, "elapsed_time": "0:11:14", "remaining_time": "0:13:18", "throughput": 2530.45, "total_tokens": 1706304}
|
|
{"current_steps": 3475, "total_steps": 7577, "loss": 0.236, "lr": 1.314095245797969e-06, "epoch": 0.4586247855351722, "percentage": 45.86, "elapsed_time": "0:11:14", "remaining_time": "0:13:16", "throughput": 2532.84, "total_tokens": 1708736}
|
|
{"current_steps": 3480, "total_steps": 7577, "loss": 0.1477, "lr": 1.3119074358698891e-06, "epoch": 0.45928467731292066, "percentage": 45.93, "elapsed_time": "0:11:14", "remaining_time": "0:13:14", "throughput": 2535.3, "total_tokens": 1711232}
|
|
{"current_steps": 3485, "total_steps": 7577, "loss": 0.003, "lr": 1.3097179708430634e-06, "epoch": 0.4599445690906691, "percentage": 45.99, "elapsed_time": "0:11:15", "remaining_time": "0:13:12", "throughput": 2537.57, "total_tokens": 1713600}
|
|
{"current_steps": 3490, "total_steps": 7577, "loss": 0.1998, "lr": 1.3075268623356214e-06, "epoch": 0.46060446086841755, "percentage": 46.06, "elapsed_time": "0:11:15", "remaining_time": "0:13:11", "throughput": 2540.22, "total_tokens": 1716224}
|
|
{"current_steps": 3495, "total_steps": 7577, "loss": 0.1462, "lr": 1.305334121974412e-06, "epoch": 0.46126435264616605, "percentage": 46.13, "elapsed_time": "0:11:15", "remaining_time": "0:13:09", "throughput": 2542.69, "total_tokens": 1718720}
|
|
{"current_steps": 3500, "total_steps": 7577, "loss": 0.0858, "lr": 1.3031397613949448e-06, "epoch": 0.4619242444239145, "percentage": 46.19, "elapsed_time": "0:11:16", "remaining_time": "0:13:07", "throughput": 2545.24, "total_tokens": 1721280}
|
|
{"current_steps": 3505, "total_steps": 7577, "loss": 0.1099, "lr": 1.3009437922413266e-06, "epoch": 0.46258413620166294, "percentage": 46.26, "elapsed_time": "0:11:16", "remaining_time": "0:13:06", "throughput": 2547.61, "total_tokens": 1723712}
|
|
{"current_steps": 3510, "total_steps": 7577, "loss": 0.1115, "lr": 1.2987462261661994e-06, "epoch": 0.4632440279794114, "percentage": 46.32, "elapsed_time": "0:11:16", "remaining_time": "0:13:04", "throughput": 2549.7, "total_tokens": 1725952}
|
|
{"current_steps": 3515, "total_steps": 7577, "loss": 0.0017, "lr": 1.2965470748306798e-06, "epoch": 0.46390391975715983, "percentage": 46.39, "elapsed_time": "0:11:17", "remaining_time": "0:13:02", "throughput": 2552.23, "total_tokens": 1728512}
|
|
{"current_steps": 3520, "total_steps": 7577, "loss": 0.1338, "lr": 1.2943463499042957e-06, "epoch": 0.4645638115349083, "percentage": 46.46, "elapsed_time": "0:11:17", "remaining_time": "0:13:00", "throughput": 2554.68, "total_tokens": 1731008}
|
|
{"current_steps": 3525, "total_steps": 7577, "loss": 0.0485, "lr": 1.2921440630649257e-06, "epoch": 0.4652237033126567, "percentage": 46.52, "elapsed_time": "0:11:17", "remaining_time": "0:12:59", "throughput": 2557.38, "total_tokens": 1733696}
|
|
{"current_steps": 3530, "total_steps": 7577, "loss": 0.1013, "lr": 1.2899402259987355e-06, "epoch": 0.46588359509040517, "percentage": 46.59, "elapsed_time": "0:11:18", "remaining_time": "0:12:57", "throughput": 2559.91, "total_tokens": 1736256}
|
|
{"current_steps": 3535, "total_steps": 7577, "loss": 0.2473, "lr": 1.287734850400118e-06, "epoch": 0.4665434868681536, "percentage": 46.65, "elapsed_time": "0:11:18", "remaining_time": "0:12:55", "throughput": 2562.6, "total_tokens": 1738944}
|
|
{"current_steps": 3540, "total_steps": 7577, "loss": 0.3237, "lr": 1.2855279479716297e-06, "epoch": 0.46720337864590206, "percentage": 46.72, "elapsed_time": "0:11:18", "remaining_time": "0:12:54", "throughput": 2565.21, "total_tokens": 1741568}
|
|
{"current_steps": 3545, "total_steps": 7577, "loss": 0.0027, "lr": 1.283319530423929e-06, "epoch": 0.4678632704236505, "percentage": 46.79, "elapsed_time": "0:11:19", "remaining_time": "0:12:52", "throughput": 2567.28, "total_tokens": 1743808}
|
|
{"current_steps": 3550, "total_steps": 7577, "loss": 0.0016, "lr": 1.2811096094757144e-06, "epoch": 0.46852316220139895, "percentage": 46.85, "elapsed_time": "0:11:19", "remaining_time": "0:12:50", "throughput": 2569.54, "total_tokens": 1746176}
|
|
{"current_steps": 3555, "total_steps": 7577, "loss": 0.2019, "lr": 1.2788981968536612e-06, "epoch": 0.46918305397914745, "percentage": 46.92, "elapsed_time": "0:11:19", "remaining_time": "0:12:49", "throughput": 2571.86, "total_tokens": 1748608}
|
|
{"current_steps": 3560, "total_steps": 7577, "loss": 0.1542, "lr": 1.2766853042923607e-06, "epoch": 0.4698429457568959, "percentage": 46.98, "elapsed_time": "0:11:20", "remaining_time": "0:12:47", "throughput": 2574.2, "total_tokens": 1751040}
|
|
{"current_steps": 3565, "total_steps": 7577, "loss": 0.0623, "lr": 1.2744709435342573e-06, "epoch": 0.47050283753464434, "percentage": 47.05, "elapsed_time": "0:11:20", "remaining_time": "0:12:45", "throughput": 2576.24, "total_tokens": 1753280}
|
|
{"current_steps": 3570, "total_steps": 7577, "loss": 0.2919, "lr": 1.2722551263295864e-06, "epoch": 0.4711627293123928, "percentage": 47.12, "elapsed_time": "0:11:20", "remaining_time": "0:12:44", "throughput": 2578.58, "total_tokens": 1755712}
|
|
{"current_steps": 3575, "total_steps": 7577, "loss": 0.1294, "lr": 1.2700378644363114e-06, "epoch": 0.4718226210901412, "percentage": 47.18, "elapsed_time": "0:11:21", "remaining_time": "0:12:42", "throughput": 2580.62, "total_tokens": 1757952}
|
|
{"current_steps": 3580, "total_steps": 7577, "loss": 0.0883, "lr": 1.2678191696200621e-06, "epoch": 0.47248251286788967, "percentage": 47.25, "elapsed_time": "0:11:21", "remaining_time": "0:12:40", "throughput": 2582.94, "total_tokens": 1760384}
|
|
{"current_steps": 3585, "total_steps": 7577, "loss": 0.0025, "lr": 1.2655990536540717e-06, "epoch": 0.4731424046456381, "percentage": 47.31, "elapsed_time": "0:11:21", "remaining_time": "0:12:39", "throughput": 2585.42, "total_tokens": 1762944}
|
|
{"current_steps": 3590, "total_steps": 7577, "loss": 0.2308, "lr": 1.2633775283191144e-06, "epoch": 0.47380229642338656, "percentage": 47.38, "elapsed_time": "0:11:22", "remaining_time": "0:12:37", "throughput": 2587.93, "total_tokens": 1765504}
|
|
{"current_steps": 3595, "total_steps": 7577, "loss": 0.0084, "lr": 1.2611546054034436e-06, "epoch": 0.474462188201135, "percentage": 47.45, "elapsed_time": "0:11:22", "remaining_time": "0:12:36", "throughput": 2590.52, "total_tokens": 1768128}
|
|
{"current_steps": 3600, "total_steps": 7577, "loss": 0.1521, "lr": 1.2589302967027285e-06, "epoch": 0.47512207997888345, "percentage": 47.51, "elapsed_time": "0:11:22", "remaining_time": "0:12:34", "throughput": 2592.92, "total_tokens": 1770624}
|
|
{"current_steps": 3605, "total_steps": 7577, "loss": 0.3261, "lr": 1.2567046140199914e-06, "epoch": 0.4757819717566319, "percentage": 47.58, "elapsed_time": "0:11:23", "remaining_time": "0:12:32", "throughput": 2595.51, "total_tokens": 1773248}
|
|
{"current_steps": 3610, "total_steps": 7577, "loss": 0.1219, "lr": 1.2544775691655463e-06, "epoch": 0.47644186353438034, "percentage": 47.64, "elapsed_time": "0:11:23", "remaining_time": "0:12:31", "throughput": 2597.56, "total_tokens": 1775488}
|
|
{"current_steps": 3615, "total_steps": 7577, "loss": 0.138, "lr": 1.2522491739569346e-06, "epoch": 0.4771017553121288, "percentage": 47.71, "elapsed_time": "0:11:23", "remaining_time": "0:12:29", "throughput": 2599.68, "total_tokens": 1777792}
|
|
{"current_steps": 3620, "total_steps": 7577, "loss": 0.1041, "lr": 1.250019440218864e-06, "epoch": 0.4777616470898773, "percentage": 47.78, "elapsed_time": "0:11:24", "remaining_time": "0:12:27", "throughput": 2602.15, "total_tokens": 1780352}
|
|
{"current_steps": 3625, "total_steps": 7577, "loss": 0.1707, "lr": 1.247788379783144e-06, "epoch": 0.47842153886762573, "percentage": 47.84, "elapsed_time": "0:11:24", "remaining_time": "0:12:26", "throughput": 2604.98, "total_tokens": 1783168}
|
|
{"current_steps": 3630, "total_steps": 7577, "loss": 0.0886, "lr": 1.2455560044886248e-06, "epoch": 0.4790814306453742, "percentage": 47.91, "elapsed_time": "0:11:24", "remaining_time": "0:12:24", "throughput": 2607.73, "total_tokens": 1785920}
|
|
{"current_steps": 3635, "total_steps": 7577, "loss": 0.1456, "lr": 1.2433223261811337e-06, "epoch": 0.4797413224231226, "percentage": 47.97, "elapsed_time": "0:11:25", "remaining_time": "0:12:23", "throughput": 2610.12, "total_tokens": 1788416}
|
|
{"current_steps": 3640, "total_steps": 7577, "loss": 0.0141, "lr": 1.2410873567134115e-06, "epoch": 0.48040121420087106, "percentage": 48.04, "elapsed_time": "0:11:25", "remaining_time": "0:12:21", "throughput": 2612.42, "total_tokens": 1790848}
|
|
{"current_steps": 3645, "total_steps": 7577, "loss": 0.0342, "lr": 1.238851107945051e-06, "epoch": 0.4810611059786195, "percentage": 48.11, "elapsed_time": "0:11:25", "remaining_time": "0:12:19", "throughput": 2614.7, "total_tokens": 1793280}
|
|
{"current_steps": 3650, "total_steps": 7577, "loss": 0.1088, "lr": 1.2366135917424341e-06, "epoch": 0.48172099775636795, "percentage": 48.17, "elapsed_time": "0:11:26", "remaining_time": "0:12:18", "throughput": 2616.89, "total_tokens": 1795648}
|
|
{"current_steps": 3655, "total_steps": 7577, "loss": 0.1796, "lr": 1.2343748199786665e-06, "epoch": 0.4823808895341164, "percentage": 48.24, "elapsed_time": "0:11:26", "remaining_time": "0:12:16", "throughput": 2618.99, "total_tokens": 1797952}
|
|
{"current_steps": 3660, "total_steps": 7577, "loss": 0.0595, "lr": 1.2321348045335182e-06, "epoch": 0.48304078131186484, "percentage": 48.3, "elapsed_time": "0:11:26", "remaining_time": "0:12:15", "throughput": 2621.01, "total_tokens": 1800192}
|
|
{"current_steps": 3665, "total_steps": 7577, "loss": 0.1465, "lr": 1.2298935572933575e-06, "epoch": 0.4837006730896133, "percentage": 48.37, "elapsed_time": "0:11:27", "remaining_time": "0:12:13", "throughput": 2623.2, "total_tokens": 1802560}
|
|
{"current_steps": 3670, "total_steps": 7577, "loss": 0.1584, "lr": 1.2276510901510892e-06, "epoch": 0.48436056486736173, "percentage": 48.44, "elapsed_time": "0:11:27", "remaining_time": "0:12:11", "throughput": 2625.57, "total_tokens": 1805056}
|
|
{"current_steps": 3675, "total_steps": 7577, "loss": 0.1682, "lr": 1.2254074150060915e-06, "epoch": 0.4850204566451102, "percentage": 48.5, "elapsed_time": "0:11:27", "remaining_time": "0:12:10", "throughput": 2628.21, "total_tokens": 1807744}
|
|
{"current_steps": 3680, "total_steps": 7577, "loss": 0.1522, "lr": 1.2231625437641535e-06, "epoch": 0.4856803484228587, "percentage": 48.57, "elapsed_time": "0:11:28", "remaining_time": "0:12:08", "throughput": 2630.75, "total_tokens": 1810368}
|
|
{"current_steps": 3685, "total_steps": 7577, "loss": 0.045, "lr": 1.2209164883374096e-06, "epoch": 0.4863402402006071, "percentage": 48.63, "elapsed_time": "0:11:28", "remaining_time": "0:12:07", "throughput": 2633.38, "total_tokens": 1813056}
|
|
{"current_steps": 3690, "total_steps": 7577, "loss": 0.22, "lr": 1.2186692606442793e-06, "epoch": 0.48700013197835557, "percentage": 48.7, "elapsed_time": "0:11:28", "remaining_time": "0:12:05", "throughput": 2635.45, "total_tokens": 1815360}
|
|
{"current_steps": 3695, "total_steps": 7577, "loss": 0.1641, "lr": 1.216420872609402e-06, "epoch": 0.487660023756104, "percentage": 48.77, "elapsed_time": "0:11:29", "remaining_time": "0:12:04", "throughput": 2637.9, "total_tokens": 1817920}
|
|
{"current_steps": 3700, "total_steps": 7577, "loss": 0.0635, "lr": 1.2141713361635739e-06, "epoch": 0.48831991553385246, "percentage": 48.83, "elapsed_time": "0:11:29", "remaining_time": "0:12:02", "throughput": 2640.03, "total_tokens": 1820288}
|
|
{"current_steps": 3705, "total_steps": 7577, "loss": 0.1703, "lr": 1.2119206632436864e-06, "epoch": 0.4889798073116009, "percentage": 48.9, "elapsed_time": "0:11:29", "remaining_time": "0:12:00", "throughput": 2642.21, "total_tokens": 1822656}
|
|
{"current_steps": 3710, "total_steps": 7577, "loss": 0.0631, "lr": 1.209668865792661e-06, "epoch": 0.48963969908934935, "percentage": 48.96, "elapsed_time": "0:11:30", "remaining_time": "0:11:59", "throughput": 2644.12, "total_tokens": 1824832}
|
|
{"current_steps": 3715, "total_steps": 7577, "loss": 0.0998, "lr": 1.207415955759385e-06, "epoch": 0.4902995908670978, "percentage": 49.03, "elapsed_time": "0:11:30", "remaining_time": "0:11:57", "throughput": 2646.29, "total_tokens": 1827200}
|
|
{"current_steps": 3720, "total_steps": 7577, "loss": 0.1581, "lr": 1.2051619450986514e-06, "epoch": 0.49095948264484623, "percentage": 49.1, "elapsed_time": "0:11:30", "remaining_time": "0:11:56", "throughput": 2648.56, "total_tokens": 1829632}
|
|
{"current_steps": 3725, "total_steps": 7577, "loss": 0.1271, "lr": 1.2029068457710923e-06, "epoch": 0.4916193744225947, "percentage": 49.16, "elapsed_time": "0:11:31", "remaining_time": "0:11:54", "throughput": 2651.0, "total_tokens": 1832192}
|
|
{"current_steps": 3730, "total_steps": 7577, "loss": 0.091, "lr": 1.200650669743117e-06, "epoch": 0.4922792662003431, "percentage": 49.23, "elapsed_time": "0:11:31", "remaining_time": "0:11:53", "throughput": 2653.43, "total_tokens": 1834752}
|
|
{"current_steps": 3735, "total_steps": 7577, "loss": 0.116, "lr": 1.1983934289868488e-06, "epoch": 0.49293915797809157, "percentage": 49.29, "elapsed_time": "0:11:31", "remaining_time": "0:11:51", "throughput": 2655.78, "total_tokens": 1837248}
|
|
{"current_steps": 3740, "total_steps": 7577, "loss": 0.1431, "lr": 1.1961351354800595e-06, "epoch": 0.49359904975584007, "percentage": 49.36, "elapsed_time": "0:11:32", "remaining_time": "0:11:50", "throughput": 2658.03, "total_tokens": 1839680}
|
|
{"current_steps": 3745, "total_steps": 7577, "loss": 0.1399, "lr": 1.193875801206109e-06, "epoch": 0.4942589415335885, "percentage": 49.43, "elapsed_time": "0:11:32", "remaining_time": "0:11:48", "throughput": 2660.55, "total_tokens": 1842304}
|
|
{"current_steps": 3750, "total_steps": 7577, "loss": 0.0471, "lr": 1.1916154381538786e-06, "epoch": 0.49491883331133696, "percentage": 49.49, "elapsed_time": "0:11:32", "remaining_time": "0:11:47", "throughput": 2662.43, "total_tokens": 1844480}
|
|
{"current_steps": 3755, "total_steps": 7577, "loss": 0.1858, "lr": 1.1893540583177083e-06, "epoch": 0.4955787250890854, "percentage": 49.56, "elapsed_time": "0:11:33", "remaining_time": "0:11:45", "throughput": 2664.65, "total_tokens": 1846912}
|
|
{"current_steps": 3760, "total_steps": 7577, "loss": 0.0801, "lr": 1.187091673697335e-06, "epoch": 0.49623861686683385, "percentage": 49.62, "elapsed_time": "0:11:33", "remaining_time": "0:11:43", "throughput": 2666.46, "total_tokens": 1849024}
|
|
{"current_steps": 3765, "total_steps": 7577, "loss": 0.0691, "lr": 1.184828296297826e-06, "epoch": 0.4968985086445823, "percentage": 49.69, "elapsed_time": "0:11:33", "remaining_time": "0:11:42", "throughput": 2669.05, "total_tokens": 1851712}
|
|
{"current_steps": 3770, "total_steps": 7577, "loss": 0.0697, "lr": 1.182563938129518e-06, "epoch": 0.49755840042233074, "percentage": 49.76, "elapsed_time": "0:11:34", "remaining_time": "0:11:40", "throughput": 2671.38, "total_tokens": 1854208}
|
|
{"current_steps": 3775, "total_steps": 7577, "loss": 0.0893, "lr": 1.1802986112079507e-06, "epoch": 0.4982182922000792, "percentage": 49.82, "elapsed_time": "0:11:34", "remaining_time": "0:11:39", "throughput": 2673.71, "total_tokens": 1856704}
|
|
{"current_steps": 3780, "total_steps": 7577, "loss": 0.0831, "lr": 1.1780323275538056e-06, "epoch": 0.4988781839778276, "percentage": 49.89, "elapsed_time": "0:11:34", "remaining_time": "0:11:37", "throughput": 2675.68, "total_tokens": 1858944}
|
|
{"current_steps": 3785, "total_steps": 7577, "loss": 0.2059, "lr": 1.1757650991928393e-06, "epoch": 0.49953807575557607, "percentage": 49.95, "elapsed_time": "0:11:35", "remaining_time": "0:11:36", "throughput": 2678.33, "total_tokens": 1861696}
|
|
{"current_steps": 3790, "total_steps": 7577, "loss": 0.179, "lr": 1.1734969381558235e-06, "epoch": 0.5001979675333246, "percentage": 50.02, "elapsed_time": "0:11:35", "remaining_time": "0:11:34", "throughput": 2680.52, "total_tokens": 1864128}
|
|
{"current_steps": 3790, "total_steps": 7577, "eval_loss": 0.12760794162750244, "epoch": 0.5001979675333246, "percentage": 50.02, "elapsed_time": "0:11:43", "remaining_time": "0:11:42", "throughput": 2650.24, "total_tokens": 1864128}
|
|
{"current_steps": 3795, "total_steps": 7577, "loss": 0.002, "lr": 1.1712278564784774e-06, "epoch": 0.500857859311073, "percentage": 50.09, "elapsed_time": "0:12:25", "remaining_time": "0:12:23", "throughput": 2502.47, "total_tokens": 1866432}
|
|
{"current_steps": 3800, "total_steps": 7577, "loss": 0.0598, "lr": 1.1689578662014064e-06, "epoch": 0.5015177510888215, "percentage": 50.15, "elapsed_time": "0:12:26", "remaining_time": "0:12:21", "throughput": 2504.45, "total_tokens": 1868736}
|
|
{"current_steps": 3805, "total_steps": 7577, "loss": 0.3077, "lr": 1.1666869793700362e-06, "epoch": 0.5021776428665699, "percentage": 50.22, "elapsed_time": "0:12:26", "remaining_time": "0:12:20", "throughput": 2506.86, "total_tokens": 1871360}
|
|
{"current_steps": 3810, "total_steps": 7577, "loss": 0.0016, "lr": 1.1644152080345515e-06, "epoch": 0.5028375346443184, "percentage": 50.28, "elapsed_time": "0:12:26", "remaining_time": "0:12:18", "throughput": 2508.69, "total_tokens": 1873536}
|
|
{"current_steps": 3815, "total_steps": 7577, "loss": 0.2264, "lr": 1.1621425642498289e-06, "epoch": 0.5034974264220667, "percentage": 50.35, "elapsed_time": "0:12:27", "remaining_time": "0:12:16", "throughput": 2510.77, "total_tokens": 1875904}
|
|
{"current_steps": 3820, "total_steps": 7577, "loss": 0.2478, "lr": 1.1598690600753759e-06, "epoch": 0.5041573181998152, "percentage": 50.42, "elapsed_time": "0:12:27", "remaining_time": "0:12:15", "throughput": 2513.09, "total_tokens": 1878464}
|
|
{"current_steps": 3825, "total_steps": 7577, "loss": 0.2181, "lr": 1.1575947075752644e-06, "epoch": 0.5048172099775636, "percentage": 50.48, "elapsed_time": "0:12:27", "remaining_time": "0:12:13", "throughput": 2514.93, "total_tokens": 1880640}
|
|
{"current_steps": 3830, "total_steps": 7577, "loss": 0.1516, "lr": 1.1553195188180691e-06, "epoch": 0.5054771017553121, "percentage": 50.55, "elapsed_time": "0:12:28", "remaining_time": "0:12:11", "throughput": 2516.92, "total_tokens": 1882944}
|
|
{"current_steps": 3835, "total_steps": 7577, "loss": 0.0694, "lr": 1.1530435058768008e-06, "epoch": 0.5061369935330606, "percentage": 50.61, "elapsed_time": "0:12:28", "remaining_time": "0:12:10", "throughput": 2518.92, "total_tokens": 1885248}
|
|
{"current_steps": 3840, "total_steps": 7577, "loss": 0.0841, "lr": 1.150766680828845e-06, "epoch": 0.506796885310809, "percentage": 50.68, "elapsed_time": "0:12:28", "remaining_time": "0:12:08", "throughput": 2521.32, "total_tokens": 1887872}
|
|
{"current_steps": 3845, "total_steps": 7577, "loss": 0.0024, "lr": 1.1484890557558955e-06, "epoch": 0.5074567770885575, "percentage": 50.75, "elapsed_time": "0:12:29", "remaining_time": "0:12:07", "throughput": 2523.8, "total_tokens": 1890560}
|
|
{"current_steps": 3850, "total_steps": 7577, "loss": 0.076, "lr": 1.146210642743892e-06, "epoch": 0.5081166688663059, "percentage": 50.81, "elapsed_time": "0:12:29", "remaining_time": "0:12:05", "throughput": 2526.02, "total_tokens": 1893056}
|
|
{"current_steps": 3855, "total_steps": 7577, "loss": 0.0353, "lr": 1.1439314538829554e-06, "epoch": 0.5087765606440544, "percentage": 50.88, "elapsed_time": "0:12:29", "remaining_time": "0:12:03", "throughput": 2528.01, "total_tokens": 1895360}
|
|
{"current_steps": 3860, "total_steps": 7577, "loss": 0.0793, "lr": 1.141651501267323e-06, "epoch": 0.5094364524218028, "percentage": 50.94, "elapsed_time": "0:12:30", "remaining_time": "0:12:02", "throughput": 2529.98, "total_tokens": 1897664}
|
|
{"current_steps": 3865, "total_steps": 7577, "loss": 0.1073, "lr": 1.1393707969952847e-06, "epoch": 0.5100963441995513, "percentage": 51.01, "elapsed_time": "0:12:30", "remaining_time": "0:12:00", "throughput": 2532.37, "total_tokens": 1900288}
|
|
{"current_steps": 3870, "total_steps": 7577, "loss": 0.0796, "lr": 1.13708935316912e-06, "epoch": 0.5107562359772997, "percentage": 51.08, "elapsed_time": "0:12:30", "remaining_time": "0:11:59", "throughput": 2534.91, "total_tokens": 1903040}
|
|
{"current_steps": 3875, "total_steps": 7577, "loss": 0.0173, "lr": 1.134807181895032e-06, "epoch": 0.5114161277550482, "percentage": 51.14, "elapsed_time": "0:12:31", "remaining_time": "0:11:57", "throughput": 2537.05, "total_tokens": 1905472}
|
|
{"current_steps": 3880, "total_steps": 7577, "loss": 0.0637, "lr": 1.132524295283084e-06, "epoch": 0.5120760195327966, "percentage": 51.21, "elapsed_time": "0:12:31", "remaining_time": "0:11:55", "throughput": 2538.94, "total_tokens": 1907712}
|
|
{"current_steps": 3885, "total_steps": 7577, "loss": 0.0912, "lr": 1.1302407054471355e-06, "epoch": 0.5127359113105451, "percentage": 51.27, "elapsed_time": "0:12:31", "remaining_time": "0:11:54", "throughput": 2541.0, "total_tokens": 1910080}
|
|
{"current_steps": 3890, "total_steps": 7577, "loss": 0.2374, "lr": 1.1279564245047767e-06, "epoch": 0.5133958030882935, "percentage": 51.34, "elapsed_time": "0:12:32", "remaining_time": "0:11:52", "throughput": 2543.13, "total_tokens": 1912512}
|
|
{"current_steps": 3895, "total_steps": 7577, "loss": 0.1336, "lr": 1.1256714645772662e-06, "epoch": 0.514055694866042, "percentage": 51.41, "elapsed_time": "0:12:32", "remaining_time": "0:11:51", "throughput": 2545.02, "total_tokens": 1914752}
|
|
{"current_steps": 3900, "total_steps": 7577, "loss": 0.0021, "lr": 1.1233858377894647e-06, "epoch": 0.5147155866437905, "percentage": 51.47, "elapsed_time": "0:12:32", "remaining_time": "0:11:49", "throughput": 2547.06, "total_tokens": 1917120}
|
|
{"current_steps": 3905, "total_steps": 7577, "loss": 0.014, "lr": 1.1210995562697722e-06, "epoch": 0.5153754784215389, "percentage": 51.54, "elapsed_time": "0:12:32", "remaining_time": "0:11:48", "throughput": 2548.79, "total_tokens": 1919232}
|
|
{"current_steps": 3910, "total_steps": 7577, "loss": 0.0559, "lr": 1.1188126321500621e-06, "epoch": 0.5160353701992874, "percentage": 51.6, "elapsed_time": "0:12:33", "remaining_time": "0:11:46", "throughput": 2551.15, "total_tokens": 1921856}
|
|
{"current_steps": 3915, "total_steps": 7577, "loss": 0.1515, "lr": 1.1165250775656188e-06, "epoch": 0.5166952619770357, "percentage": 51.67, "elapsed_time": "0:12:33", "remaining_time": "0:11:44", "throughput": 2553.18, "total_tokens": 1924224}
|
|
{"current_steps": 3920, "total_steps": 7577, "loss": 0.0373, "lr": 1.1142369046550708e-06, "epoch": 0.5173551537547842, "percentage": 51.74, "elapsed_time": "0:12:33", "remaining_time": "0:11:43", "throughput": 2555.06, "total_tokens": 1926464}
|
|
{"current_steps": 3925, "total_steps": 7577, "loss": 0.1665, "lr": 1.1119481255603289e-06, "epoch": 0.5180150455325326, "percentage": 51.8, "elapsed_time": "0:12:34", "remaining_time": "0:11:41", "throughput": 2557.19, "total_tokens": 1928896}
|
|
{"current_steps": 3930, "total_steps": 7577, "loss": 0.115, "lr": 1.1096587524265197e-06, "epoch": 0.5186749373102811, "percentage": 51.87, "elapsed_time": "0:12:34", "remaining_time": "0:11:40", "throughput": 2559.15, "total_tokens": 1931200}
|
|
{"current_steps": 3935, "total_steps": 7577, "loss": 0.0542, "lr": 1.107368797401923e-06, "epoch": 0.5193348290880295, "percentage": 51.93, "elapsed_time": "0:12:34", "remaining_time": "0:11:38", "throughput": 2561.28, "total_tokens": 1933632}
|
|
{"current_steps": 3940, "total_steps": 7577, "loss": 0.0019, "lr": 1.1050782726379054e-06, "epoch": 0.519994720865778, "percentage": 52.0, "elapsed_time": "0:12:35", "remaining_time": "0:11:37", "throughput": 2563.13, "total_tokens": 1935872}
|
|
{"current_steps": 3945, "total_steps": 7577, "loss": 0.0951, "lr": 1.1027871902888566e-06, "epoch": 0.5206546126435264, "percentage": 52.07, "elapsed_time": "0:12:35", "remaining_time": "0:11:35", "throughput": 2564.9, "total_tokens": 1938048}
|
|
{"current_steps": 3950, "total_steps": 7577, "loss": 0.056, "lr": 1.1004955625121257e-06, "epoch": 0.5213145044212749, "percentage": 52.13, "elapsed_time": "0:12:35", "remaining_time": "0:11:34", "throughput": 2567.17, "total_tokens": 1940608}
|
|
{"current_steps": 3955, "total_steps": 7577, "loss": 0.2829, "lr": 1.0982034014679561e-06, "epoch": 0.5219743961990233, "percentage": 52.2, "elapsed_time": "0:12:36", "remaining_time": "0:11:32", "throughput": 2569.24, "total_tokens": 1943040}
|
|
{"current_steps": 3960, "total_steps": 7577, "loss": 0.2808, "lr": 1.0959107193194206e-06, "epoch": 0.5226342879767718, "percentage": 52.26, "elapsed_time": "0:12:36", "remaining_time": "0:11:31", "throughput": 2571.59, "total_tokens": 1945664}
|
|
{"current_steps": 3965, "total_steps": 7577, "loss": 0.0046, "lr": 1.0936175282323575e-06, "epoch": 0.5232941797545203, "percentage": 52.33, "elapsed_time": "0:12:36", "remaining_time": "0:11:29", "throughput": 2573.63, "total_tokens": 1948032}
|
|
{"current_steps": 3970, "total_steps": 7577, "loss": 0.001, "lr": 1.091323840375305e-06, "epoch": 0.5239540715322687, "percentage": 52.4, "elapsed_time": "0:12:37", "remaining_time": "0:11:28", "throughput": 2575.41, "total_tokens": 1950208}
|
|
{"current_steps": 3975, "total_steps": 7577, "loss": 0.2119, "lr": 1.0890296679194378e-06, "epoch": 0.5246139633100172, "percentage": 52.46, "elapsed_time": "0:12:37", "remaining_time": "0:11:26", "throughput": 2577.84, "total_tokens": 1952896}
|
|
{"current_steps": 3980, "total_steps": 7577, "loss": 0.068, "lr": 1.086735023038502e-06, "epoch": 0.5252738550877656, "percentage": 52.53, "elapsed_time": "0:12:37", "remaining_time": "0:11:24", "throughput": 2579.77, "total_tokens": 1955200}
|
|
{"current_steps": 3985, "total_steps": 7577, "loss": 0.0953, "lr": 1.0844399179087512e-06, "epoch": 0.5259337468655141, "percentage": 52.59, "elapsed_time": "0:12:38", "remaining_time": "0:11:23", "throughput": 2581.55, "total_tokens": 1957376}
|
|
{"current_steps": 3990, "total_steps": 7577, "loss": 0.2058, "lr": 1.0821443647088802e-06, "epoch": 0.5265936386432625, "percentage": 52.66, "elapsed_time": "0:12:38", "remaining_time": "0:11:21", "throughput": 2583.96, "total_tokens": 1960064}
|
|
{"current_steps": 3995, "total_steps": 7577, "loss": 0.0652, "lr": 1.0798483756199623e-06, "epoch": 0.527253530421011, "percentage": 52.73, "elapsed_time": "0:12:38", "remaining_time": "0:11:20", "throughput": 2586.2, "total_tokens": 1962624}
|
|
{"current_steps": 4000, "total_steps": 7577, "loss": 0.0704, "lr": 1.0775519628253833e-06, "epoch": 0.5279134221987594, "percentage": 52.79, "elapsed_time": "0:12:39", "remaining_time": "0:11:18", "throughput": 2588.29, "total_tokens": 1965056}
|
|
{"current_steps": 4005, "total_steps": 7577, "loss": 0.1949, "lr": 1.0752551385107772e-06, "epoch": 0.5285733139765079, "percentage": 52.86, "elapsed_time": "0:12:39", "remaining_time": "0:11:17", "throughput": 2590.29, "total_tokens": 1967424}
|
|
{"current_steps": 4010, "total_steps": 7577, "loss": 0.1433, "lr": 1.0729579148639621e-06, "epoch": 0.5292332057542563, "percentage": 52.92, "elapsed_time": "0:12:39", "remaining_time": "0:11:15", "throughput": 2592.38, "total_tokens": 1969856}
|
|
{"current_steps": 4015, "total_steps": 7577, "loss": 0.0517, "lr": 1.0706603040748747e-06, "epoch": 0.5298930975320048, "percentage": 52.99, "elapsed_time": "0:12:40", "remaining_time": "0:11:14", "throughput": 2594.77, "total_tokens": 1972544}
|
|
{"current_steps": 4020, "total_steps": 7577, "loss": 0.0448, "lr": 1.0683623183355071e-06, "epoch": 0.5305529893097533, "percentage": 53.06, "elapsed_time": "0:12:40", "remaining_time": "0:11:12", "throughput": 2596.74, "total_tokens": 1974912}
|
|
{"current_steps": 4025, "total_steps": 7577, "loss": 0.1542, "lr": 1.0660639698398392e-06, "epoch": 0.5312128810875016, "percentage": 53.12, "elapsed_time": "0:12:40", "remaining_time": "0:11:11", "throughput": 2598.65, "total_tokens": 1977216}
|
|
{"current_steps": 4030, "total_steps": 7577, "loss": 0.1436, "lr": 1.0637652707837773e-06, "epoch": 0.5318727728652501, "percentage": 53.19, "elapsed_time": "0:12:41", "remaining_time": "0:11:09", "throughput": 2600.73, "total_tokens": 1979648}
|
|
{"current_steps": 4035, "total_steps": 7577, "loss": 0.1039, "lr": 1.0614662333650876e-06, "epoch": 0.5325326646429985, "percentage": 53.25, "elapsed_time": "0:12:41", "remaining_time": "0:11:08", "throughput": 2602.57, "total_tokens": 1981888}
|
|
{"current_steps": 4040, "total_steps": 7577, "loss": 0.2167, "lr": 1.0591668697833311e-06, "epoch": 0.533192556420747, "percentage": 53.32, "elapsed_time": "0:12:41", "remaining_time": "0:11:06", "throughput": 2604.81, "total_tokens": 1984448}
|
|
{"current_steps": 4045, "total_steps": 7577, "loss": 0.1567, "lr": 1.0568671922398005e-06, "epoch": 0.5338524481984954, "percentage": 53.39, "elapsed_time": "0:12:42", "remaining_time": "0:11:05", "throughput": 2607.11, "total_tokens": 1987072}
|
|
{"current_steps": 4050, "total_steps": 7577, "loss": 0.1618, "lr": 1.054567212937454e-06, "epoch": 0.5345123399762439, "percentage": 53.45, "elapsed_time": "0:12:42", "remaining_time": "0:11:04", "throughput": 2609.33, "total_tokens": 1989632}
|
|
{"current_steps": 4055, "total_steps": 7577, "loss": 0.0367, "lr": 1.0522669440808508e-06, "epoch": 0.5351722317539923, "percentage": 53.52, "elapsed_time": "0:12:42", "remaining_time": "0:11:02", "throughput": 2611.56, "total_tokens": 1992192}
|
|
{"current_steps": 4060, "total_steps": 7577, "loss": 0.2607, "lr": 1.0499663978760871e-06, "epoch": 0.5358321235317408, "percentage": 53.58, "elapsed_time": "0:12:43", "remaining_time": "0:11:01", "throughput": 2613.63, "total_tokens": 1994624}
|
|
{"current_steps": 4065, "total_steps": 7577, "loss": 0.0821, "lr": 1.0476655865307308e-06, "epoch": 0.5364920153094892, "percentage": 53.65, "elapsed_time": "0:12:43", "remaining_time": "0:10:59", "throughput": 2615.65, "total_tokens": 1997056}
|
|
{"current_steps": 4070, "total_steps": 7577, "loss": 0.0918, "lr": 1.0453645222537556e-06, "epoch": 0.5371519070872377, "percentage": 53.72, "elapsed_time": "0:12:43", "remaining_time": "0:10:58", "throughput": 2617.56, "total_tokens": 1999360}
|
|
{"current_steps": 4075, "total_steps": 7577, "loss": 0.0861, "lr": 1.0430632172554796e-06, "epoch": 0.5378117988649861, "percentage": 53.78, "elapsed_time": "0:12:44", "remaining_time": "0:10:56", "throughput": 2619.7, "total_tokens": 2001856}
|
|
{"current_steps": 4080, "total_steps": 7577, "loss": 0.1321, "lr": 1.0407616837474963e-06, "epoch": 0.5384716906427346, "percentage": 53.85, "elapsed_time": "0:12:44", "remaining_time": "0:10:55", "throughput": 2621.76, "total_tokens": 2004288}
|
|
{"current_steps": 4085, "total_steps": 7577, "loss": 0.0784, "lr": 1.038459933942612e-06, "epoch": 0.5391315824204831, "percentage": 53.91, "elapsed_time": "0:12:44", "remaining_time": "0:10:53", "throughput": 2624.14, "total_tokens": 2006976}
|
|
{"current_steps": 4090, "total_steps": 7577, "loss": 0.0603, "lr": 1.036157980054782e-06, "epoch": 0.5397914741982315, "percentage": 53.98, "elapsed_time": "0:12:45", "remaining_time": "0:10:52", "throughput": 2626.01, "total_tokens": 2009280}
|
|
{"current_steps": 4095, "total_steps": 7577, "loss": 0.0969, "lr": 1.0338558342990431e-06, "epoch": 0.54045136597598, "percentage": 54.05, "elapsed_time": "0:12:45", "remaining_time": "0:10:50", "throughput": 2628.15, "total_tokens": 2011776}
|
|
{"current_steps": 4100, "total_steps": 7577, "loss": 0.2581, "lr": 1.0315535088914508e-06, "epoch": 0.5411112577537284, "percentage": 54.11, "elapsed_time": "0:12:45", "remaining_time": "0:10:49", "throughput": 2630.35, "total_tokens": 2014336}
|
|
{"current_steps": 4105, "total_steps": 7577, "loss": 0.1642, "lr": 1.0292510160490146e-06, "epoch": 0.5417711495314769, "percentage": 54.18, "elapsed_time": "0:12:46", "remaining_time": "0:10:47", "throughput": 2632.88, "total_tokens": 2017152}
|
|
{"current_steps": 4110, "total_steps": 7577, "loss": 0.0998, "lr": 1.0269483679896308e-06, "epoch": 0.5424310413092253, "percentage": 54.24, "elapsed_time": "0:12:46", "remaining_time": "0:10:46", "throughput": 2634.84, "total_tokens": 2019520}
|
|
{"current_steps": 4115, "total_steps": 7577, "loss": 0.162, "lr": 1.0246455769320211e-06, "epoch": 0.5430909330869738, "percentage": 54.31, "elapsed_time": "0:12:46", "remaining_time": "0:10:45", "throughput": 2636.47, "total_tokens": 2021632}
|
|
{"current_steps": 4120, "total_steps": 7577, "loss": 0.1261, "lr": 1.0223426550956647e-06, "epoch": 0.5437508248647221, "percentage": 54.38, "elapsed_time": "0:12:47", "remaining_time": "0:10:43", "throughput": 2638.1, "total_tokens": 2023744}
|
|
{"current_steps": 4125, "total_steps": 7577, "loss": 0.0762, "lr": 1.0200396147007354e-06, "epoch": 0.5444107166424706, "percentage": 54.44, "elapsed_time": "0:12:47", "remaining_time": "0:10:42", "throughput": 2639.99, "total_tokens": 2026048}
|
|
{"current_steps": 4130, "total_steps": 7577, "loss": 0.132, "lr": 1.0177364679680367e-06, "epoch": 0.545070608420219, "percentage": 54.51, "elapsed_time": "0:12:47", "remaining_time": "0:10:40", "throughput": 2641.87, "total_tokens": 2028352}
|
|
{"current_steps": 4135, "total_steps": 7577, "loss": 0.0602, "lr": 1.015433227118935e-06, "epoch": 0.5457305001979675, "percentage": 54.57, "elapsed_time": "0:12:48", "remaining_time": "0:10:39", "throughput": 2643.98, "total_tokens": 2030848}
|
|
{"current_steps": 4140, "total_steps": 7577, "loss": 0.1337, "lr": 1.0131299043752967e-06, "epoch": 0.5463903919757159, "percentage": 54.64, "elapsed_time": "0:12:48", "remaining_time": "0:10:37", "throughput": 2646.09, "total_tokens": 2033344}
|
|
{"current_steps": 4145, "total_steps": 7577, "loss": 0.0548, "lr": 1.0108265119594233e-06, "epoch": 0.5470502837534644, "percentage": 54.71, "elapsed_time": "0:12:48", "remaining_time": "0:10:36", "throughput": 2647.9, "total_tokens": 2035584}
|
|
{"current_steps": 4150, "total_steps": 7577, "loss": 0.0713, "lr": 1.0085230620939853e-06, "epoch": 0.5477101755312129, "percentage": 54.77, "elapsed_time": "0:12:49", "remaining_time": "0:10:35", "throughput": 2650.24, "total_tokens": 2038272}
|
|
{"current_steps": 4155, "total_steps": 7577, "loss": 0.0909, "lr": 1.0062195670019583e-06, "epoch": 0.5483700673089613, "percentage": 54.84, "elapsed_time": "0:12:49", "remaining_time": "0:10:33", "throughput": 2652.33, "total_tokens": 2040768}
|
|
{"current_steps": 4160, "total_steps": 7577, "loss": 0.1428, "lr": 1.0039160389065582e-06, "epoch": 0.5490299590867098, "percentage": 54.9, "elapsed_time": "0:12:49", "remaining_time": "0:10:32", "throughput": 2654.21, "total_tokens": 2043072}
|
|
{"current_steps": 4165, "total_steps": 7577, "loss": 0.1555, "lr": 1.0016124900311755e-06, "epoch": 0.5496898508644582, "percentage": 54.97, "elapsed_time": "0:12:50", "remaining_time": "0:10:30", "throughput": 2656.64, "total_tokens": 2045824}
|
|
{"current_steps": 4169, "total_steps": 7577, "eval_loss": 0.09937019646167755, "epoch": 0.550217764286657, "percentage": 55.02, "elapsed_time": "0:12:58", "remaining_time": "0:10:36", "throughput": 2631.16, "total_tokens": 2047552}
|
|
{"current_steps": 4170, "total_steps": 7577, "loss": 0.2405, "lr": 9.99308932599311e-07, "epoch": 0.5503497426422067, "percentage": 55.03, "elapsed_time": "0:13:55", "remaining_time": "0:11:22", "throughput": 2451.77, "total_tokens": 2048064}
|
|
{"current_steps": 4175, "total_steps": 7577, "loss": 0.0424, "lr": 9.970053788345112e-07, "epoch": 0.5510096344199551, "percentage": 55.1, "elapsed_time": "0:13:55", "remaining_time": "0:11:20", "throughput": 2453.62, "total_tokens": 2050432}
|
|
{"current_steps": 4180, "total_steps": 7577, "loss": 0.0537, "lr": 9.947018409603036e-07, "epoch": 0.5516695261977036, "percentage": 55.17, "elapsed_time": "0:13:56", "remaining_time": "0:11:19", "throughput": 2455.64, "total_tokens": 2052928}
|
|
{"current_steps": 4185, "total_steps": 7577, "loss": 0.1485, "lr": 9.923983312001304e-07, "epoch": 0.552329417975452, "percentage": 55.23, "elapsed_time": "0:13:56", "remaining_time": "0:11:17", "throughput": 2457.66, "total_tokens": 2055424}
|
|
{"current_steps": 4190, "total_steps": 7577, "loss": 0.1457, "lr": 9.900948617772846e-07, "epoch": 0.5529893097532005, "percentage": 55.3, "elapsed_time": "0:13:56", "remaining_time": "0:11:16", "throughput": 2459.23, "total_tokens": 2057536}
|
|
{"current_steps": 4195, "total_steps": 7577, "loss": 0.1713, "lr": 9.877914449148462e-07, "epoch": 0.5536492015309489, "percentage": 55.36, "elapsed_time": "0:13:56", "remaining_time": "0:11:14", "throughput": 2460.99, "total_tokens": 2059840}
|
|
{"current_steps": 4200, "total_steps": 7577, "loss": 0.2209, "lr": 9.854880928356157e-07, "epoch": 0.5543090933086974, "percentage": 55.43, "elapsed_time": "0:13:57", "remaining_time": "0:11:13", "throughput": 2463.35, "total_tokens": 2062656}
|
|
{"current_steps": 4205, "total_steps": 7577, "loss": 0.1398, "lr": 9.831848177620493e-07, "epoch": 0.5549689850864459, "percentage": 55.5, "elapsed_time": "0:13:57", "remaining_time": "0:11:11", "throughput": 2465.11, "total_tokens": 2064960}
|
|
{"current_steps": 4210, "total_steps": 7577, "loss": 0.2412, "lr": 9.808816319161961e-07, "epoch": 0.5556288768641943, "percentage": 55.56, "elapsed_time": "0:13:58", "remaining_time": "0:11:10", "throughput": 2466.58, "total_tokens": 2067008}
|
|
{"current_steps": 4215, "total_steps": 7577, "loss": 0.0009, "lr": 9.785785475196298e-07, "epoch": 0.5562887686419428, "percentage": 55.63, "elapsed_time": "0:13:58", "remaining_time": "0:11:08", "throughput": 2468.79, "total_tokens": 2069696}
|
|
{"current_steps": 4220, "total_steps": 7577, "loss": 0.0507, "lr": 9.76275576793387e-07, "epoch": 0.5569486604196912, "percentage": 55.69, "elapsed_time": "0:13:58", "remaining_time": "0:11:07", "throughput": 2470.92, "total_tokens": 2072320}
|
|
{"current_steps": 4225, "total_steps": 7577, "loss": 0.0016, "lr": 9.739727319579007e-07, "epoch": 0.5576085521974397, "percentage": 55.76, "elapsed_time": "0:13:59", "remaining_time": "0:11:05", "throughput": 2472.83, "total_tokens": 2074752}
|
|
{"current_steps": 4230, "total_steps": 7577, "loss": 0.1143, "lr": 9.716700252329361e-07, "epoch": 0.558268443975188, "percentage": 55.83, "elapsed_time": "0:13:59", "remaining_time": "0:11:04", "throughput": 2475.01, "total_tokens": 2077440}
|
|
{"current_steps": 4235, "total_steps": 7577, "loss": 0.1856, "lr": 9.693674688375254e-07, "epoch": 0.5589283357529365, "percentage": 55.89, "elapsed_time": "0:13:59", "remaining_time": "0:11:02", "throughput": 2477.07, "total_tokens": 2080000}
|
|
{"current_steps": 4240, "total_steps": 7577, "loss": 0.0667, "lr": 9.67065074989903e-07, "epoch": 0.5595882275306849, "percentage": 55.96, "elapsed_time": "0:14:00", "remaining_time": "0:11:01", "throughput": 2479.13, "total_tokens": 2082560}
|
|
{"current_steps": 4245, "total_steps": 7577, "loss": 0.0822, "lr": 9.647628559074415e-07, "epoch": 0.5602481193084334, "percentage": 56.02, "elapsed_time": "0:14:00", "remaining_time": "0:10:59", "throughput": 2480.89, "total_tokens": 2084864}
|
|
{"current_steps": 4250, "total_steps": 7577, "loss": 0.1228, "lr": 9.62460823806585e-07, "epoch": 0.5609080110861818, "percentage": 56.09, "elapsed_time": "0:14:00", "remaining_time": "0:10:58", "throughput": 2482.95, "total_tokens": 2087424}
|
|
{"current_steps": 4255, "total_steps": 7577, "loss": 0.2208, "lr": 9.601589909027857e-07, "epoch": 0.5615679028639303, "percentage": 56.16, "elapsed_time": "0:14:01", "remaining_time": "0:10:56", "throughput": 2485.08, "total_tokens": 2090048}
|
|
{"current_steps": 4260, "total_steps": 7577, "loss": 0.0689, "lr": 9.578573694104394e-07, "epoch": 0.5622277946416787, "percentage": 56.22, "elapsed_time": "0:14:01", "remaining_time": "0:10:55", "throughput": 2486.94, "total_tokens": 2092416}
|
|
{"current_steps": 4265, "total_steps": 7577, "loss": 0.0788, "lr": 9.555559715428199e-07, "epoch": 0.5628876864194272, "percentage": 56.29, "elapsed_time": "0:14:01", "remaining_time": "0:10:53", "throughput": 2488.65, "total_tokens": 2094656}
|
|
{"current_steps": 4270, "total_steps": 7577, "loss": 0.069, "lr": 9.532548095120134e-07, "epoch": 0.5635475781971757, "percentage": 56.35, "elapsed_time": "0:14:02", "remaining_time": "0:10:52", "throughput": 2490.5, "total_tokens": 2097024}
|
|
{"current_steps": 4275, "total_steps": 7577, "loss": 0.0597, "lr": 9.509538955288564e-07, "epoch": 0.5642074699749241, "percentage": 56.42, "elapsed_time": "0:14:02", "remaining_time": "0:10:50", "throughput": 2492.36, "total_tokens": 2099392}
|
|
{"current_steps": 4280, "total_steps": 7577, "loss": 0.09, "lr": 9.486532418028672e-07, "epoch": 0.5648673617526726, "percentage": 56.49, "elapsed_time": "0:14:02", "remaining_time": "0:10:49", "throughput": 2494.49, "total_tokens": 2102016}
|
|
{"current_steps": 4285, "total_steps": 7577, "loss": 0.0804, "lr": 9.463528605421844e-07, "epoch": 0.565527253530421, "percentage": 56.55, "elapsed_time": "0:14:02", "remaining_time": "0:10:47", "throughput": 2496.27, "total_tokens": 2104320}
|
|
{"current_steps": 4290, "total_steps": 7577, "loss": 0.1016, "lr": 9.440527639535004e-07, "epoch": 0.5661871453081695, "percentage": 56.62, "elapsed_time": "0:14:03", "remaining_time": "0:10:46", "throughput": 2498.62, "total_tokens": 2107136}
|
|
{"current_steps": 4295, "total_steps": 7577, "loss": 0.0533, "lr": 9.417529642419971e-07, "epoch": 0.5668470370859179, "percentage": 56.68, "elapsed_time": "0:14:03", "remaining_time": "0:10:44", "throughput": 2500.9, "total_tokens": 2109888}
|
|
{"current_steps": 4300, "total_steps": 7577, "loss": 0.1565, "lr": 9.394534736112815e-07, "epoch": 0.5675069288636664, "percentage": 56.75, "elapsed_time": "0:14:03", "remaining_time": "0:10:43", "throughput": 2502.67, "total_tokens": 2112192}
|
|
{"current_steps": 4305, "total_steps": 7577, "loss": 0.1197, "lr": 9.371543042633192e-07, "epoch": 0.5681668206414148, "percentage": 56.82, "elapsed_time": "0:14:04", "remaining_time": "0:10:41", "throughput": 2504.73, "total_tokens": 2114752}
|
|
{"current_steps": 4310, "total_steps": 7577, "loss": 0.141, "lr": 9.348554683983722e-07, "epoch": 0.5688267124191633, "percentage": 56.88, "elapsed_time": "0:14:04", "remaining_time": "0:10:40", "throughput": 2506.64, "total_tokens": 2117184}
|
|
{"current_steps": 4315, "total_steps": 7577, "loss": 0.1155, "lr": 9.325569782149323e-07, "epoch": 0.5694866041969117, "percentage": 56.95, "elapsed_time": "0:14:04", "remaining_time": "0:10:38", "throughput": 2508.48, "total_tokens": 2119552}
|
|
{"current_steps": 4320, "total_steps": 7577, "loss": 0.0744, "lr": 9.302588459096574e-07, "epoch": 0.5701464959746602, "percentage": 57.01, "elapsed_time": "0:14:05", "remaining_time": "0:10:37", "throughput": 2510.32, "total_tokens": 2121920}
|
|
{"current_steps": 4325, "total_steps": 7577, "loss": 0.2853, "lr": 9.279610836773064e-07, "epoch": 0.5708063877524086, "percentage": 57.08, "elapsed_time": "0:14:05", "remaining_time": "0:10:35", "throughput": 2511.93, "total_tokens": 2124096}
|
|
{"current_steps": 4330, "total_steps": 7577, "loss": 0.1079, "lr": 9.256637037106735e-07, "epoch": 0.571466279530157, "percentage": 57.15, "elapsed_time": "0:14:05", "remaining_time": "0:10:34", "throughput": 2513.82, "total_tokens": 2126528}
|
|
{"current_steps": 4335, "total_steps": 7577, "loss": 0.0589, "lr": 9.233667182005259e-07, "epoch": 0.5721261713079056, "percentage": 57.21, "elapsed_time": "0:14:06", "remaining_time": "0:10:32", "throughput": 2515.29, "total_tokens": 2128576}
|
|
{"current_steps": 4340, "total_steps": 7577, "loss": 0.0648, "lr": 9.210701393355361e-07, "epoch": 0.5727860630856539, "percentage": 57.28, "elapsed_time": "0:14:06", "remaining_time": "0:10:31", "throughput": 2516.81, "total_tokens": 2130688}
|
|
{"current_steps": 4345, "total_steps": 7577, "loss": 0.1324, "lr": 9.187739793022198e-07, "epoch": 0.5734459548634024, "percentage": 57.34, "elapsed_time": "0:14:06", "remaining_time": "0:10:29", "throughput": 2518.94, "total_tokens": 2133312}
|
|
{"current_steps": 4350, "total_steps": 7577, "loss": 0.0724, "lr": 9.164782502848702e-07, "epoch": 0.5741058466411508, "percentage": 57.41, "elapsed_time": "0:14:07", "remaining_time": "0:10:28", "throughput": 2520.77, "total_tokens": 2135680}
|
|
{"current_steps": 4355, "total_steps": 7577, "loss": 0.1448, "lr": 9.141829644654936e-07, "epoch": 0.5747657384188993, "percentage": 57.48, "elapsed_time": "0:14:07", "remaining_time": "0:10:27", "throughput": 2522.66, "total_tokens": 2138112}
|
|
{"current_steps": 4360, "total_steps": 7577, "loss": 0.4271, "lr": 9.118881340237432e-07, "epoch": 0.5754256301966477, "percentage": 57.54, "elapsed_time": "0:14:07", "remaining_time": "0:10:25", "throughput": 2524.35, "total_tokens": 2140352}
|
|
{"current_steps": 4365, "total_steps": 7577, "loss": 0.0894, "lr": 9.095937711368573e-07, "epoch": 0.5760855219743962, "percentage": 57.61, "elapsed_time": "0:14:08", "remaining_time": "0:10:24", "throughput": 2526.52, "total_tokens": 2143040}
|
|
{"current_steps": 4370, "total_steps": 7577, "loss": 0.0716, "lr": 9.072998879795923e-07, "epoch": 0.5767454137521446, "percentage": 57.67, "elapsed_time": "0:14:08", "remaining_time": "0:10:22", "throughput": 2528.19, "total_tokens": 2145280}
|
|
{"current_steps": 4375, "total_steps": 7577, "loss": 0.1048, "lr": 9.050064967241596e-07, "epoch": 0.5774053055298931, "percentage": 57.74, "elapsed_time": "0:14:08", "remaining_time": "0:10:21", "throughput": 2530.3, "total_tokens": 2147904}
|
|
{"current_steps": 4380, "total_steps": 7577, "loss": 0.0838, "lr": 9.027136095401598e-07, "epoch": 0.5780651973076415, "percentage": 57.81, "elapsed_time": "0:14:09", "remaining_time": "0:10:19", "throughput": 2532.26, "total_tokens": 2150400}
|
|
{"current_steps": 4385, "total_steps": 7577, "loss": 0.135, "lr": 9.004212385945187e-07, "epoch": 0.57872508908539, "percentage": 57.87, "elapsed_time": "0:14:09", "remaining_time": "0:10:18", "throughput": 2534.43, "total_tokens": 2153088}
|
|
{"current_steps": 4390, "total_steps": 7577, "loss": 0.0013, "lr": 8.981293960514233e-07, "epoch": 0.5793849808631385, "percentage": 57.94, "elapsed_time": "0:14:09", "remaining_time": "0:10:16", "throughput": 2536.6, "total_tokens": 2155776}
|
|
{"current_steps": 4395, "total_steps": 7577, "loss": 0.1661, "lr": 8.958380940722564e-07, "epoch": 0.5800448726408869, "percentage": 58.0, "elapsed_time": "0:14:10", "remaining_time": "0:10:15", "throughput": 2538.7, "total_tokens": 2158400}
|
|
{"current_steps": 4400, "total_steps": 7577, "loss": 0.0766, "lr": 8.935473448155326e-07, "epoch": 0.5807047644186354, "percentage": 58.07, "elapsed_time": "0:14:10", "remaining_time": "0:10:14", "throughput": 2540.45, "total_tokens": 2160704}
|
|
{"current_steps": 4405, "total_steps": 7577, "loss": 0.0469, "lr": 8.912571604368324e-07, "epoch": 0.5813646561963838, "percentage": 58.14, "elapsed_time": "0:14:10", "remaining_time": "0:10:12", "throughput": 2542.41, "total_tokens": 2163200}
|
|
{"current_steps": 4410, "total_steps": 7577, "loss": 0.1421, "lr": 8.889675530887404e-07, "epoch": 0.5820245479741323, "percentage": 58.2, "elapsed_time": "0:14:11", "remaining_time": "0:10:11", "throughput": 2544.01, "total_tokens": 2165376}
|
|
{"current_steps": 4415, "total_steps": 7577, "loss": 0.0994, "lr": 8.866785349207786e-07, "epoch": 0.5826844397518807, "percentage": 58.27, "elapsed_time": "0:14:11", "remaining_time": "0:10:09", "throughput": 2545.9, "total_tokens": 2167808}
|
|
{"current_steps": 4420, "total_steps": 7577, "loss": 0.1332, "lr": 8.843901180793423e-07, "epoch": 0.5833443315296292, "percentage": 58.33, "elapsed_time": "0:14:11", "remaining_time": "0:10:08", "throughput": 2547.64, "total_tokens": 2170112}
|
|
{"current_steps": 4425, "total_steps": 7577, "loss": 0.0005, "lr": 8.821023147076362e-07, "epoch": 0.5840042233073776, "percentage": 58.4, "elapsed_time": "0:14:12", "remaining_time": "0:10:06", "throughput": 2549.45, "total_tokens": 2172480}
|
|
{"current_steps": 4430, "total_steps": 7577, "loss": 0.0432, "lr": 8.798151369456098e-07, "epoch": 0.5846641150851261, "percentage": 58.47, "elapsed_time": "0:14:12", "remaining_time": "0:10:05", "throughput": 2551.54, "total_tokens": 2175104}
|
|
{"current_steps": 4435, "total_steps": 7577, "loss": 0.1126, "lr": 8.775285969298931e-07, "epoch": 0.5853240068628744, "percentage": 58.53, "elapsed_time": "0:14:12", "remaining_time": "0:10:04", "throughput": 2553.13, "total_tokens": 2177280}
|
|
{"current_steps": 4440, "total_steps": 7577, "loss": 0.0735, "lr": 8.752427067937312e-07, "epoch": 0.585983898640623, "percentage": 58.6, "elapsed_time": "0:14:13", "remaining_time": "0:10:02", "throughput": 2555.08, "total_tokens": 2179776}
|
|
{"current_steps": 4445, "total_steps": 7577, "loss": 0.0949, "lr": 8.729574786669214e-07, "epoch": 0.5866437904183713, "percentage": 58.66, "elapsed_time": "0:14:13", "remaining_time": "0:10:01", "throughput": 2557.16, "total_tokens": 2182400}
|
|
{"current_steps": 4450, "total_steps": 7577, "loss": 0.0655, "lr": 8.706729246757477e-07, "epoch": 0.5873036821961198, "percentage": 58.73, "elapsed_time": "0:14:13", "remaining_time": "0:09:59", "throughput": 2559.32, "total_tokens": 2185088}
|
|
{"current_steps": 4455, "total_steps": 7577, "loss": 0.0697, "lr": 8.683890569429173e-07, "epoch": 0.5879635739738683, "percentage": 58.8, "elapsed_time": "0:14:14", "remaining_time": "0:09:58", "throughput": 2561.48, "total_tokens": 2187776}
|
|
{"current_steps": 4460, "total_steps": 7577, "loss": 0.0006, "lr": 8.661058875874956e-07, "epoch": 0.5886234657516167, "percentage": 58.86, "elapsed_time": "0:14:14", "remaining_time": "0:09:57", "throughput": 2563.11, "total_tokens": 2190016}
|
|
{"current_steps": 4465, "total_steps": 7577, "loss": 0.0008, "lr": 8.638234287248423e-07, "epoch": 0.5892833575293652, "percentage": 58.93, "elapsed_time": "0:14:14", "remaining_time": "0:09:55", "throughput": 2564.8, "total_tokens": 2192320}
|
|
{"current_steps": 4470, "total_steps": 7577, "loss": 0.0355, "lr": 8.615416924665464e-07, "epoch": 0.5899432493071136, "percentage": 58.99, "elapsed_time": "0:14:15", "remaining_time": "0:09:54", "throughput": 2566.65, "total_tokens": 2194752}
|
|
{"current_steps": 4475, "total_steps": 7577, "loss": 0.0743, "lr": 8.592606909203629e-07, "epoch": 0.5906031410848621, "percentage": 59.06, "elapsed_time": "0:14:15", "remaining_time": "0:09:52", "throughput": 2568.35, "total_tokens": 2197056}
|
|
{"current_steps": 4480, "total_steps": 7577, "loss": 0.0489, "lr": 8.569804361901485e-07, "epoch": 0.5912630328626105, "percentage": 59.13, "elapsed_time": "0:14:15", "remaining_time": "0:09:51", "throughput": 2569.98, "total_tokens": 2199296}
|
|
{"current_steps": 4485, "total_steps": 7577, "loss": 0.41, "lr": 8.547009403757963e-07, "epoch": 0.591922924640359, "percentage": 59.19, "elapsed_time": "0:14:16", "remaining_time": "0:09:50", "throughput": 2571.72, "total_tokens": 2201664}
|
|
{"current_steps": 4490, "total_steps": 7577, "loss": 0.227, "lr": 8.524222155731731e-07, "epoch": 0.5925828164181074, "percentage": 59.26, "elapsed_time": "0:14:16", "remaining_time": "0:09:48", "throughput": 2573.77, "total_tokens": 2204288}
|
|
{"current_steps": 4495, "total_steps": 7577, "loss": 0.127, "lr": 8.501442738740538e-07, "epoch": 0.5932427081958559, "percentage": 59.32, "elapsed_time": "0:14:16", "remaining_time": "0:09:47", "throughput": 2575.37, "total_tokens": 2206528}
|
|
{"current_steps": 4500, "total_steps": 7577, "loss": 0.0695, "lr": 8.47867127366058e-07, "epoch": 0.5939025999736043, "percentage": 59.39, "elapsed_time": "0:14:17", "remaining_time": "0:09:46", "throughput": 2577.29, "total_tokens": 2209024}
|
|
{"current_steps": 4505, "total_steps": 7577, "loss": 0.0422, "lr": 8.455907881325858e-07, "epoch": 0.5945624917513528, "percentage": 59.46, "elapsed_time": "0:14:17", "remaining_time": "0:09:44", "throughput": 2579.28, "total_tokens": 2211584}
|
|
{"current_steps": 4510, "total_steps": 7577, "loss": 0.1233, "lr": 8.433152682527533e-07, "epoch": 0.5952223835291012, "percentage": 59.52, "elapsed_time": "0:14:17", "remaining_time": "0:09:43", "throughput": 2581.05, "total_tokens": 2213952}
|
|
{"current_steps": 4515, "total_steps": 7577, "loss": 0.0951, "lr": 8.410405798013298e-07, "epoch": 0.5958822753068497, "percentage": 59.59, "elapsed_time": "0:14:18", "remaining_time": "0:09:41", "throughput": 2582.66, "total_tokens": 2216192}
|
|
{"current_steps": 4520, "total_steps": 7577, "loss": 0.0126, "lr": 8.387667348486712e-07, "epoch": 0.5965421670845982, "percentage": 59.65, "elapsed_time": "0:14:18", "remaining_time": "0:09:40", "throughput": 2584.55, "total_tokens": 2218688}
|
|
{"current_steps": 4525, "total_steps": 7577, "loss": 0.0853, "lr": 8.364937454606585e-07, "epoch": 0.5972020588623466, "percentage": 59.72, "elapsed_time": "0:14:18", "remaining_time": "0:09:39", "throughput": 2586.16, "total_tokens": 2220928}
|
|
{"current_steps": 4530, "total_steps": 7577, "loss": 0.0013, "lr": 8.342216236986329e-07, "epoch": 0.5978619506400951, "percentage": 59.79, "elapsed_time": "0:14:19", "remaining_time": "0:09:37", "throughput": 2587.99, "total_tokens": 2223360}
|
|
{"current_steps": 4535, "total_steps": 7577, "loss": 0.1511, "lr": 8.319503816193305e-07, "epoch": 0.5985218424178435, "percentage": 59.85, "elapsed_time": "0:14:19", "remaining_time": "0:09:36", "throughput": 2589.83, "total_tokens": 2225792}
|
|
{"current_steps": 4540, "total_steps": 7577, "loss": 0.1163, "lr": 8.296800312748206e-07, "epoch": 0.599181734195592, "percentage": 59.92, "elapsed_time": "0:14:19", "remaining_time": "0:09:35", "throughput": 2591.72, "total_tokens": 2228288}
|
|
{"current_steps": 4545, "total_steps": 7577, "loss": 0.1894, "lr": 8.274105847124404e-07, "epoch": 0.5998416259733403, "percentage": 59.98, "elapsed_time": "0:14:20", "remaining_time": "0:09:33", "throughput": 2593.68, "total_tokens": 2230848}
|
|
{"current_steps": 4548, "total_steps": 7577, "eval_loss": 0.1099071353673935, "epoch": 0.6002375610399895, "percentage": 60.02, "elapsed_time": "0:14:28", "remaining_time": "0:09:38", "throughput": 2570.22, "total_tokens": 2232448}
|
|
{"current_steps": 4550, "total_steps": 7577, "loss": 0.233, "lr": 8.251420539747311e-07, "epoch": 0.6005015177510888, "percentage": 60.05, "elapsed_time": "0:15:05", "remaining_time": "0:10:02", "throughput": 2466.82, "total_tokens": 2233472}
|
|
{"current_steps": 4555, "total_steps": 7577, "loss": 0.2218, "lr": 8.228744510993742e-07, "epoch": 0.6011614095288372, "percentage": 60.12, "elapsed_time": "0:15:05", "remaining_time": "0:10:00", "throughput": 2468.82, "total_tokens": 2236096}
|
|
{"current_steps": 4560, "total_steps": 7577, "loss": 0.0831, "lr": 8.206077881191274e-07, "epoch": 0.6018213013065857, "percentage": 60.18, "elapsed_time": "0:15:06", "remaining_time": "0:09:59", "throughput": 2470.81, "total_tokens": 2238720}
|
|
{"current_steps": 4565, "total_steps": 7577, "loss": 0.1877, "lr": 8.183420770617614e-07, "epoch": 0.6024811930843341, "percentage": 60.25, "elapsed_time": "0:15:06", "remaining_time": "0:09:58", "throughput": 2472.66, "total_tokens": 2241216}
|
|
{"current_steps": 4570, "total_steps": 7577, "loss": 0.0651, "lr": 8.160773299499955e-07, "epoch": 0.6031410848620826, "percentage": 60.31, "elapsed_time": "0:15:06", "remaining_time": "0:09:56", "throughput": 2474.45, "total_tokens": 2243648}
|
|
{"current_steps": 4575, "total_steps": 7577, "loss": 0.0543, "lr": 8.138135588014339e-07, "epoch": 0.6038009766398311, "percentage": 60.38, "elapsed_time": "0:15:07", "remaining_time": "0:09:55", "throughput": 2476.24, "total_tokens": 2246080}
|
|
{"current_steps": 4580, "total_steps": 7577, "loss": 0.0016, "lr": 8.115507756285017e-07, "epoch": 0.6044608684175795, "percentage": 60.45, "elapsed_time": "0:15:07", "remaining_time": "0:09:53", "throughput": 2477.75, "total_tokens": 2248256}
|
|
{"current_steps": 4585, "total_steps": 7577, "loss": 0.07, "lr": 8.092889924383819e-07, "epoch": 0.605120760195328, "percentage": 60.51, "elapsed_time": "0:15:07", "remaining_time": "0:09:52", "throughput": 2479.54, "total_tokens": 2250688}
|
|
{"current_steps": 4590, "total_steps": 7577, "loss": 0.0872, "lr": 8.070282212329508e-07, "epoch": 0.6057806519730764, "percentage": 60.58, "elapsed_time": "0:15:08", "remaining_time": "0:09:50", "throughput": 2481.32, "total_tokens": 2253120}
|
|
{"current_steps": 4595, "total_steps": 7577, "loss": 0.1869, "lr": 8.047684740087156e-07, "epoch": 0.6064405437508249, "percentage": 60.64, "elapsed_time": "0:15:08", "remaining_time": "0:09:49", "throughput": 2482.89, "total_tokens": 2255360}
|
|
{"current_steps": 4600, "total_steps": 7577, "loss": 0.2572, "lr": 8.025097627567481e-07, "epoch": 0.6071004355285733, "percentage": 60.71, "elapsed_time": "0:15:08", "remaining_time": "0:09:48", "throughput": 2484.6, "total_tokens": 2257728}
|
|
{"current_steps": 4605, "total_steps": 7577, "loss": 0.0536, "lr": 8.002520994626247e-07, "epoch": 0.6077603273063218, "percentage": 60.78, "elapsed_time": "0:15:09", "remaining_time": "0:09:46", "throughput": 2486.45, "total_tokens": 2260224}
|
|
{"current_steps": 4610, "total_steps": 7577, "loss": 0.0372, "lr": 7.979954961063596e-07, "epoch": 0.6084202190840702, "percentage": 60.84, "elapsed_time": "0:15:09", "remaining_time": "0:09:45", "throughput": 2488.5, "total_tokens": 2262912}
|
|
{"current_steps": 4615, "total_steps": 7577, "loss": 0.2998, "lr": 7.957399646623436e-07, "epoch": 0.6090801108618187, "percentage": 60.91, "elapsed_time": "0:15:09", "remaining_time": "0:09:43", "throughput": 2490.08, "total_tokens": 2265152}
|
|
{"current_steps": 4620, "total_steps": 7577, "loss": 0.0008, "lr": 7.934855170992788e-07, "epoch": 0.6097400026395671, "percentage": 60.97, "elapsed_time": "0:15:10", "remaining_time": "0:09:42", "throughput": 2492.25, "total_tokens": 2267968}
|
|
{"current_steps": 4625, "total_steps": 7577, "loss": 0.0407, "lr": 7.912321653801161e-07, "epoch": 0.6103998944173156, "percentage": 61.04, "elapsed_time": "0:15:10", "remaining_time": "0:09:41", "throughput": 2493.96, "total_tokens": 2270336}
|
|
{"current_steps": 4630, "total_steps": 7577, "loss": 0.135, "lr": 7.889799214619919e-07, "epoch": 0.611059786195064, "percentage": 61.11, "elapsed_time": "0:15:10", "remaining_time": "0:09:39", "throughput": 2495.97, "total_tokens": 2273024}
|
|
{"current_steps": 4635, "total_steps": 7577, "loss": 0.0678, "lr": 7.867287972961629e-07, "epoch": 0.6117196779728125, "percentage": 61.17, "elapsed_time": "0:15:11", "remaining_time": "0:09:38", "throughput": 2497.53, "total_tokens": 2275264}
|
|
{"current_steps": 4640, "total_steps": 7577, "loss": 0.1219, "lr": 7.844788048279453e-07, "epoch": 0.612379569750561, "percentage": 61.24, "elapsed_time": "0:15:11", "remaining_time": "0:09:36", "throughput": 2499.5, "total_tokens": 2277888}
|
|
{"current_steps": 4645, "total_steps": 7577, "loss": 0.0005, "lr": 7.822299559966494e-07, "epoch": 0.6130394615283093, "percentage": 61.3, "elapsed_time": "0:15:11", "remaining_time": "0:09:35", "throughput": 2501.27, "total_tokens": 2280320}
|
|
{"current_steps": 4650, "total_steps": 7577, "loss": 0.0017, "lr": 7.799822627355171e-07, "epoch": 0.6136993533060578, "percentage": 61.37, "elapsed_time": "0:15:11", "remaining_time": "0:09:34", "throughput": 2502.84, "total_tokens": 2282560}
|
|
{"current_steps": 4655, "total_steps": 7577, "loss": 0.1505, "lr": 7.77735736971659e-07, "epoch": 0.6143592450838062, "percentage": 61.44, "elapsed_time": "0:15:12", "remaining_time": "0:09:32", "throughput": 2504.46, "total_tokens": 2284864}
|
|
{"current_steps": 4660, "total_steps": 7577, "loss": 0.165, "lr": 7.754903906259889e-07, "epoch": 0.6150191368615547, "percentage": 61.5, "elapsed_time": "0:15:12", "remaining_time": "0:09:31", "throughput": 2506.09, "total_tokens": 2287168}
|
|
{"current_steps": 4665, "total_steps": 7577, "loss": 0.0716, "lr": 7.732462356131637e-07, "epoch": 0.6156790286393031, "percentage": 61.57, "elapsed_time": "0:15:12", "remaining_time": "0:09:29", "throughput": 2507.85, "total_tokens": 2289600}
|
|
{"current_steps": 4670, "total_steps": 7577, "loss": 0.0317, "lr": 7.710032838415179e-07, "epoch": 0.6163389204170516, "percentage": 61.63, "elapsed_time": "0:15:13", "remaining_time": "0:09:28", "throughput": 2509.74, "total_tokens": 2292160}
|
|
{"current_steps": 4675, "total_steps": 7577, "loss": 0.0943, "lr": 7.687615472130016e-07, "epoch": 0.6169988121948, "percentage": 61.7, "elapsed_time": "0:15:13", "remaining_time": "0:09:27", "throughput": 2511.83, "total_tokens": 2294912}
|
|
{"current_steps": 4680, "total_steps": 7577, "loss": 0.1314, "lr": 7.665210376231165e-07, "epoch": 0.6176587039725485, "percentage": 61.77, "elapsed_time": "0:15:13", "remaining_time": "0:09:25", "throughput": 2513.25, "total_tokens": 2297024}
|
|
{"current_steps": 4685, "total_steps": 7577, "loss": 0.1436, "lr": 7.642817669608536e-07, "epoch": 0.6183185957502969, "percentage": 61.83, "elapsed_time": "0:15:14", "remaining_time": "0:09:24", "throughput": 2515.01, "total_tokens": 2299456}
|
|
{"current_steps": 4690, "total_steps": 7577, "loss": 0.013, "lr": 7.62043747108629e-07, "epoch": 0.6189784875280454, "percentage": 61.9, "elapsed_time": "0:15:14", "remaining_time": "0:09:23", "throughput": 2516.43, "total_tokens": 2301568}
|
|
{"current_steps": 4695, "total_steps": 7577, "loss": 0.168, "lr": 7.598069899422221e-07, "epoch": 0.6196383793057938, "percentage": 61.96, "elapsed_time": "0:15:14", "remaining_time": "0:09:21", "throughput": 2518.12, "total_tokens": 2303936}
|
|
{"current_steps": 4700, "total_steps": 7577, "loss": 0.1745, "lr": 7.575715073307119e-07, "epoch": 0.6202982710835423, "percentage": 62.03, "elapsed_time": "0:15:15", "remaining_time": "0:09:20", "throughput": 2519.68, "total_tokens": 2306176}
|
|
{"current_steps": 4705, "total_steps": 7577, "loss": 0.0901, "lr": 7.55337311136414e-07, "epoch": 0.6209581628612908, "percentage": 62.1, "elapsed_time": "0:15:15", "remaining_time": "0:09:18", "throughput": 2521.57, "total_tokens": 2308736}
|
|
{"current_steps": 4710, "total_steps": 7577, "loss": 0.1379, "lr": 7.531044132148183e-07, "epoch": 0.6216180546390392, "percentage": 62.16, "elapsed_time": "0:15:15", "remaining_time": "0:09:17", "throughput": 2523.23, "total_tokens": 2311104}
|
|
{"current_steps": 4715, "total_steps": 7577, "loss": 0.049, "lr": 7.508728254145245e-07, "epoch": 0.6222779464167877, "percentage": 62.23, "elapsed_time": "0:15:16", "remaining_time": "0:09:16", "throughput": 2524.97, "total_tokens": 2313536}
|
|
{"current_steps": 4720, "total_steps": 7577, "loss": 0.1208, "lr": 7.486425595771817e-07, "epoch": 0.6229378381945361, "percentage": 62.29, "elapsed_time": "0:15:16", "remaining_time": "0:09:14", "throughput": 2526.77, "total_tokens": 2316032}
|
|
{"current_steps": 4725, "total_steps": 7577, "loss": 0.205, "lr": 7.464136275374223e-07, "epoch": 0.6235977299722846, "percentage": 62.36, "elapsed_time": "0:15:16", "remaining_time": "0:09:13", "throughput": 2528.72, "total_tokens": 2318656}
|
|
{"current_steps": 4730, "total_steps": 7577, "loss": 0.1395, "lr": 7.441860411228029e-07, "epoch": 0.624257621750033, "percentage": 62.43, "elapsed_time": "0:15:17", "remaining_time": "0:09:12", "throughput": 2530.6, "total_tokens": 2321216}
|
|
{"current_steps": 4735, "total_steps": 7577, "loss": 0.1494, "lr": 7.419598121537387e-07, "epoch": 0.6249175135277815, "percentage": 62.49, "elapsed_time": "0:15:17", "remaining_time": "0:09:10", "throughput": 2532.33, "total_tokens": 2323648}
|
|
{"current_steps": 4740, "total_steps": 7577, "loss": 0.165, "lr": 7.397349524434424e-07, "epoch": 0.6255774053055299, "percentage": 62.56, "elapsed_time": "0:15:17", "remaining_time": "0:09:09", "throughput": 2534.07, "total_tokens": 2326080}
|
|
{"current_steps": 4745, "total_steps": 7577, "loss": 0.0572, "lr": 7.375114737978605e-07, "epoch": 0.6262372970832784, "percentage": 62.62, "elapsed_time": "0:15:18", "remaining_time": "0:09:08", "throughput": 2535.81, "total_tokens": 2328512}
|
|
{"current_steps": 4750, "total_steps": 7577, "loss": 0.1462, "lr": 7.352893880156106e-07, "epoch": 0.6268971888610267, "percentage": 62.69, "elapsed_time": "0:15:18", "remaining_time": "0:09:06", "throughput": 2537.62, "total_tokens": 2331008}
|
|
{"current_steps": 4755, "total_steps": 7577, "loss": 0.0943, "lr": 7.330687068879202e-07, "epoch": 0.6275570806387752, "percentage": 62.76, "elapsed_time": "0:15:18", "remaining_time": "0:09:05", "throughput": 2539.29, "total_tokens": 2333376}
|
|
{"current_steps": 4760, "total_steps": 7577, "loss": 0.2447, "lr": 7.308494421985626e-07, "epoch": 0.6282169724165237, "percentage": 62.82, "elapsed_time": "0:15:19", "remaining_time": "0:09:04", "throughput": 2541.1, "total_tokens": 2335872}
|
|
{"current_steps": 4765, "total_steps": 7577, "loss": 0.0016, "lr": 7.286316057237951e-07, "epoch": 0.6288768641942721, "percentage": 62.89, "elapsed_time": "0:15:19", "remaining_time": "0:09:02", "throughput": 2542.97, "total_tokens": 2338432}
|
|
{"current_steps": 4770, "total_steps": 7577, "loss": 0.1152, "lr": 7.264152092322963e-07, "epoch": 0.6295367559720206, "percentage": 62.95, "elapsed_time": "0:15:19", "remaining_time": "0:09:01", "throughput": 2544.77, "total_tokens": 2340928}
|
|
{"current_steps": 4775, "total_steps": 7577, "loss": 0.1148, "lr": 7.242002644851035e-07, "epoch": 0.630196647749769, "percentage": 63.02, "elapsed_time": "0:15:20", "remaining_time": "0:08:59", "throughput": 2546.84, "total_tokens": 2343680}
|
|
{"current_steps": 4780, "total_steps": 7577, "loss": 0.0689, "lr": 7.219867832355508e-07, "epoch": 0.6308565395275175, "percentage": 63.09, "elapsed_time": "0:15:20", "remaining_time": "0:08:58", "throughput": 2548.71, "total_tokens": 2346240}
|
|
{"current_steps": 4785, "total_steps": 7577, "loss": 0.0841, "lr": 7.197747772292071e-07, "epoch": 0.6315164313052659, "percentage": 63.15, "elapsed_time": "0:15:20", "remaining_time": "0:08:57", "throughput": 2550.3, "total_tokens": 2348544}
|
|
{"current_steps": 4790, "total_steps": 7577, "loss": 0.2099, "lr": 7.17564258203811e-07, "epoch": 0.6321763230830144, "percentage": 63.22, "elapsed_time": "0:15:21", "remaining_time": "0:08:55", "throughput": 2552.03, "total_tokens": 2350976}
|
|
{"current_steps": 4795, "total_steps": 7577, "loss": 0.115, "lr": 7.153552378892128e-07, "epoch": 0.6328362148607628, "percentage": 63.28, "elapsed_time": "0:15:21", "remaining_time": "0:08:54", "throughput": 2553.56, "total_tokens": 2353216}
|
|
{"current_steps": 4800, "total_steps": 7577, "loss": 0.1212, "lr": 7.131477280073091e-07, "epoch": 0.6334961066385113, "percentage": 63.35, "elapsed_time": "0:15:21", "remaining_time": "0:08:53", "throughput": 2555.21, "total_tokens": 2355584}
|
|
{"current_steps": 4805, "total_steps": 7577, "loss": 0.0817, "lr": 7.109417402719813e-07, "epoch": 0.6341559984162597, "percentage": 63.42, "elapsed_time": "0:15:22", "remaining_time": "0:08:52", "throughput": 2557.06, "total_tokens": 2358144}
|
|
{"current_steps": 4810, "total_steps": 7577, "loss": 0.029, "lr": 7.087372863890346e-07, "epoch": 0.6348158901940082, "percentage": 63.48, "elapsed_time": "0:15:22", "remaining_time": "0:08:50", "throughput": 2559.12, "total_tokens": 2360896}
|
|
{"current_steps": 4815, "total_steps": 7577, "loss": 0.2463, "lr": 7.065343780561344e-07, "epoch": 0.6354757819717566, "percentage": 63.55, "elapsed_time": "0:15:22", "remaining_time": "0:08:49", "throughput": 2560.77, "total_tokens": 2363264}
|
|
{"current_steps": 4820, "total_steps": 7577, "loss": 0.0744, "lr": 7.043330269627448e-07, "epoch": 0.6361356737495051, "percentage": 63.61, "elapsed_time": "0:15:23", "remaining_time": "0:08:48", "throughput": 2562.42, "total_tokens": 2365632}
|
|
{"current_steps": 4825, "total_steps": 7577, "loss": 0.0018, "lr": 7.021332447900671e-07, "epoch": 0.6367955655272536, "percentage": 63.68, "elapsed_time": "0:15:23", "remaining_time": "0:08:46", "throughput": 2564.06, "total_tokens": 2368000}
|
|
{"current_steps": 4830, "total_steps": 7577, "loss": 0.1116, "lr": 6.999350432109766e-07, "epoch": 0.637455457305002, "percentage": 63.75, "elapsed_time": "0:15:23", "remaining_time": "0:08:45", "throughput": 2565.91, "total_tokens": 2370560}
|
|
{"current_steps": 4835, "total_steps": 7577, "loss": 0.0011, "lr": 6.977384338899617e-07, "epoch": 0.6381153490827505, "percentage": 63.81, "elapsed_time": "0:15:24", "remaining_time": "0:08:44", "throughput": 2567.76, "total_tokens": 2373120}
|
|
{"current_steps": 4840, "total_steps": 7577, "loss": 0.033, "lr": 6.955434284830619e-07, "epoch": 0.6387752408604989, "percentage": 63.88, "elapsed_time": "0:15:24", "remaining_time": "0:08:42", "throughput": 2569.81, "total_tokens": 2375872}
|
|
{"current_steps": 4845, "total_steps": 7577, "loss": 0.2167, "lr": 6.933500386378056e-07, "epoch": 0.6394351326382474, "percentage": 63.94, "elapsed_time": "0:15:24", "remaining_time": "0:08:41", "throughput": 2571.65, "total_tokens": 2378432}
|
|
{"current_steps": 4850, "total_steps": 7577, "loss": 0.213, "lr": 6.911582759931482e-07, "epoch": 0.6400950244159958, "percentage": 64.01, "elapsed_time": "0:15:25", "remaining_time": "0:08:40", "throughput": 2573.31, "total_tokens": 2380800}
|
|
{"current_steps": 4855, "total_steps": 7577, "loss": 0.2747, "lr": 6.889681521794109e-07, "epoch": 0.6407549161937443, "percentage": 64.08, "elapsed_time": "0:15:25", "remaining_time": "0:08:38", "throughput": 2575.55, "total_tokens": 2383744}
|
|
{"current_steps": 4860, "total_steps": 7577, "loss": 0.0758, "lr": 6.867796788182181e-07, "epoch": 0.6414148079714926, "percentage": 64.14, "elapsed_time": "0:15:25", "remaining_time": "0:08:37", "throughput": 2577.2, "total_tokens": 2386112}
|
|
{"current_steps": 4865, "total_steps": 7577, "loss": 0.1529, "lr": 6.845928675224366e-07, "epoch": 0.6420746997492411, "percentage": 64.21, "elapsed_time": "0:15:26", "remaining_time": "0:08:36", "throughput": 2579.12, "total_tokens": 2388736}
|
|
{"current_steps": 4870, "total_steps": 7577, "loss": 0.0713, "lr": 6.82407729896114e-07, "epoch": 0.6427345915269895, "percentage": 64.27, "elapsed_time": "0:15:26", "remaining_time": "0:08:35", "throughput": 2580.77, "total_tokens": 2391104}
|
|
{"current_steps": 4875, "total_steps": 7577, "loss": 0.1204, "lr": 6.802242775344163e-07, "epoch": 0.643394483304738, "percentage": 64.34, "elapsed_time": "0:15:26", "remaining_time": "0:08:33", "throughput": 2582.68, "total_tokens": 2393728}
|
|
{"current_steps": 4880, "total_steps": 7577, "loss": 0.1797, "lr": 6.780425220235674e-07, "epoch": 0.6440543750824864, "percentage": 64.41, "elapsed_time": "0:15:27", "remaining_time": "0:08:32", "throughput": 2584.72, "total_tokens": 2396480}
|
|
{"current_steps": 4885, "total_steps": 7577, "loss": 0.0011, "lr": 6.758624749407859e-07, "epoch": 0.6447142668602349, "percentage": 64.47, "elapsed_time": "0:15:27", "remaining_time": "0:08:31", "throughput": 2586.62, "total_tokens": 2399104}
|
|
{"current_steps": 4890, "total_steps": 7577, "loss": 0.0861, "lr": 6.736841478542264e-07, "epoch": 0.6453741586379834, "percentage": 64.54, "elapsed_time": "0:15:27", "remaining_time": "0:08:29", "throughput": 2588.46, "total_tokens": 2401664}
|
|
{"current_steps": 4895, "total_steps": 7577, "loss": 0.0316, "lr": 6.715075523229151e-07, "epoch": 0.6460340504157318, "percentage": 64.6, "elapsed_time": "0:15:28", "remaining_time": "0:08:28", "throughput": 2590.24, "total_tokens": 2404160}
|
|
{"current_steps": 4900, "total_steps": 7577, "loss": 0.1035, "lr": 6.693326998966909e-07, "epoch": 0.6466939421934803, "percentage": 64.67, "elapsed_time": "0:15:28", "remaining_time": "0:08:27", "throughput": 2591.95, "total_tokens": 2406592}
|
|
{"current_steps": 4905, "total_steps": 7577, "loss": 0.0014, "lr": 6.671596021161431e-07, "epoch": 0.6473538339712287, "percentage": 64.74, "elapsed_time": "0:15:28", "remaining_time": "0:08:25", "throughput": 2593.71, "total_tokens": 2409088}
|
|
{"current_steps": 4910, "total_steps": 7577, "loss": 0.1158, "lr": 6.649882705125494e-07, "epoch": 0.6480137257489772, "percentage": 64.8, "elapsed_time": "0:15:29", "remaining_time": "0:08:24", "throughput": 2595.49, "total_tokens": 2411584}
|
|
{"current_steps": 4915, "total_steps": 7577, "loss": 0.472, "lr": 6.628187166078163e-07, "epoch": 0.6486736175267256, "percentage": 64.87, "elapsed_time": "0:15:29", "remaining_time": "0:08:23", "throughput": 2597.57, "total_tokens": 2414400}
|
|
{"current_steps": 4920, "total_steps": 7577, "loss": 0.0382, "lr": 6.606509519144166e-07, "epoch": 0.6493335093044741, "percentage": 64.93, "elapsed_time": "0:15:29", "remaining_time": "0:08:22", "throughput": 2599.08, "total_tokens": 2416640}
|
|
{"current_steps": 4925, "total_steps": 7577, "loss": 0.1297, "lr": 6.584849879353289e-07, "epoch": 0.6499934010822225, "percentage": 65.0, "elapsed_time": "0:15:30", "remaining_time": "0:08:20", "throughput": 2600.84, "total_tokens": 2419136}
|
|
{"current_steps": 4927, "total_steps": 7577, "eval_loss": 0.09970784932374954, "epoch": 0.6502573577933219, "percentage": 65.03, "elapsed_time": "0:15:38", "remaining_time": "0:08:24", "throughput": 2579.64, "total_tokens": 2420096}
|
|
{"current_steps": 4930, "total_steps": 7577, "loss": 0.0423, "lr": 6.563208361639772e-07, "epoch": 0.650653292859971, "percentage": 65.07, "elapsed_time": "0:16:23", "remaining_time": "0:08:48", "throughput": 2462.08, "total_tokens": 2421440}
|
|
{"current_steps": 4935, "total_steps": 7577, "loss": 0.0187, "lr": 6.541585080841687e-07, "epoch": 0.6513131846377194, "percentage": 65.13, "elapsed_time": "0:16:23", "remaining_time": "0:08:46", "throughput": 2463.85, "total_tokens": 2424000}
|
|
{"current_steps": 4940, "total_steps": 7577, "loss": 0.1785, "lr": 6.519980151700332e-07, "epoch": 0.6519730764154679, "percentage": 65.2, "elapsed_time": "0:16:24", "remaining_time": "0:08:45", "throughput": 2465.31, "total_tokens": 2426240}
|
|
{"current_steps": 4945, "total_steps": 7577, "loss": 0.1136, "lr": 6.498393688859629e-07, "epoch": 0.6526329681932164, "percentage": 65.26, "elapsed_time": "0:16:24", "remaining_time": "0:08:43", "throughput": 2467.14, "total_tokens": 2428864}
|
|
{"current_steps": 4950, "total_steps": 7577, "loss": 0.0011, "lr": 6.47682580686551e-07, "epoch": 0.6532928599709648, "percentage": 65.33, "elapsed_time": "0:16:24", "remaining_time": "0:08:42", "throughput": 2468.8, "total_tokens": 2431296}
|
|
{"current_steps": 4955, "total_steps": 7577, "loss": 0.0017, "lr": 6.455276620165307e-07, "epoch": 0.6539527517487133, "percentage": 65.4, "elapsed_time": "0:16:25", "remaining_time": "0:08:41", "throughput": 2470.68, "total_tokens": 2433984}
|
|
{"current_steps": 4960, "total_steps": 7577, "loss": 0.4128, "lr": 6.433746243107152e-07, "epoch": 0.6546126435264616, "percentage": 65.46, "elapsed_time": "0:16:25", "remaining_time": "0:08:39", "throughput": 2472.13, "total_tokens": 2436224}
|
|
{"current_steps": 4965, "total_steps": 7577, "loss": 0.1516, "lr": 6.412234789939359e-07, "epoch": 0.6552725353042101, "percentage": 65.53, "elapsed_time": "0:16:25", "remaining_time": "0:08:38", "throughput": 2473.84, "total_tokens": 2438720}
|
|
{"current_steps": 4970, "total_steps": 7577, "loss": 0.0589, "lr": 6.390742374809832e-07, "epoch": 0.6559324270819585, "percentage": 65.59, "elapsed_time": "0:16:26", "remaining_time": "0:08:37", "throughput": 2475.29, "total_tokens": 2440960}
|
|
{"current_steps": 4975, "total_steps": 7577, "loss": 0.0597, "lr": 6.369269111765454e-07, "epoch": 0.656592318859707, "percentage": 65.66, "elapsed_time": "0:16:26", "remaining_time": "0:08:35", "throughput": 2476.86, "total_tokens": 2443328}
|
|
{"current_steps": 4980, "total_steps": 7577, "loss": 0.1664, "lr": 6.347815114751465e-07, "epoch": 0.6572522106374554, "percentage": 65.73, "elapsed_time": "0:16:26", "remaining_time": "0:08:34", "throughput": 2478.68, "total_tokens": 2445952}
|
|
{"current_steps": 4985, "total_steps": 7577, "loss": 0.1165, "lr": 6.326380497610886e-07, "epoch": 0.6579121024152039, "percentage": 65.79, "elapsed_time": "0:16:27", "remaining_time": "0:08:33", "throughput": 2480.5, "total_tokens": 2448576}
|
|
{"current_steps": 4990, "total_steps": 7577, "loss": 0.3173, "lr": 6.304965374083899e-07, "epoch": 0.6585719941929523, "percentage": 65.86, "elapsed_time": "0:16:27", "remaining_time": "0:08:31", "throughput": 2482.26, "total_tokens": 2451136}
|
|
{"current_steps": 4995, "total_steps": 7577, "loss": 0.002, "lr": 6.283569857807245e-07, "epoch": 0.6592318859707008, "percentage": 65.92, "elapsed_time": "0:16:27", "remaining_time": "0:08:30", "throughput": 2483.96, "total_tokens": 2453632}
|
|
{"current_steps": 5000, "total_steps": 7577, "loss": 0.0644, "lr": 6.262194062313615e-07, "epoch": 0.6598917777484492, "percentage": 65.99, "elapsed_time": "0:16:28", "remaining_time": "0:08:29", "throughput": 2485.72, "total_tokens": 2456192}
|
|
{"current_steps": 5005, "total_steps": 7577, "loss": 0.0013, "lr": 6.240838101031063e-07, "epoch": 0.6605516695261977, "percentage": 66.06, "elapsed_time": "0:16:28", "remaining_time": "0:08:27", "throughput": 2487.34, "total_tokens": 2458624}
|
|
{"current_steps": 5010, "total_steps": 7577, "loss": 0.0896, "lr": 6.21950208728239e-07, "epoch": 0.6612115613039462, "percentage": 66.12, "elapsed_time": "0:16:28", "remaining_time": "0:08:26", "throughput": 2488.85, "total_tokens": 2460928}
|
|
{"current_steps": 5015, "total_steps": 7577, "loss": 0.1054, "lr": 6.198186134284554e-07, "epoch": 0.6618714530816946, "percentage": 66.19, "elapsed_time": "0:16:29", "remaining_time": "0:08:25", "throughput": 2490.66, "total_tokens": 2463552}
|
|
{"current_steps": 5020, "total_steps": 7577, "loss": 0.0028, "lr": 6.176890355148049e-07, "epoch": 0.6625313448594431, "percentage": 66.25, "elapsed_time": "0:16:29", "remaining_time": "0:08:23", "throughput": 2492.17, "total_tokens": 2465856}
|
|
{"current_steps": 5025, "total_steps": 7577, "loss": 0.0981, "lr": 6.155614862876335e-07, "epoch": 0.6631912366371915, "percentage": 66.32, "elapsed_time": "0:16:29", "remaining_time": "0:08:22", "throughput": 2493.79, "total_tokens": 2468288}
|
|
{"current_steps": 5030, "total_steps": 7577, "loss": 0.1368, "lr": 6.134359770365214e-07, "epoch": 0.66385112841494, "percentage": 66.39, "elapsed_time": "0:16:30", "remaining_time": "0:08:21", "throughput": 2495.58, "total_tokens": 2470912}
|
|
{"current_steps": 5035, "total_steps": 7577, "loss": 0.0711, "lr": 6.11312519040224e-07, "epoch": 0.6645110201926884, "percentage": 66.45, "elapsed_time": "0:16:30", "remaining_time": "0:08:20", "throughput": 2497.39, "total_tokens": 2473536}
|
|
{"current_steps": 5040, "total_steps": 7577, "loss": 0.0009, "lr": 6.091911235666125e-07, "epoch": 0.6651709119704369, "percentage": 66.52, "elapsed_time": "0:16:30", "remaining_time": "0:08:18", "throughput": 2499.07, "total_tokens": 2476032}
|
|
{"current_steps": 5045, "total_steps": 7577, "loss": 0.1034, "lr": 6.070718018726124e-07, "epoch": 0.6658308037481853, "percentage": 66.58, "elapsed_time": "0:16:31", "remaining_time": "0:08:17", "throughput": 2500.44, "total_tokens": 2478208}
|
|
{"current_steps": 5050, "total_steps": 7577, "loss": 0.1196, "lr": 6.049545652041459e-07, "epoch": 0.6664906955259338, "percentage": 66.65, "elapsed_time": "0:16:31", "remaining_time": "0:08:16", "throughput": 2501.93, "total_tokens": 2480512}
|
|
{"current_steps": 5055, "total_steps": 7577, "loss": 0.1824, "lr": 6.028394247960709e-07, "epoch": 0.6671505873036822, "percentage": 66.72, "elapsed_time": "0:16:31", "remaining_time": "0:08:14", "throughput": 2503.61, "total_tokens": 2483008}
|
|
{"current_steps": 5060, "total_steps": 7577, "loss": 0.1399, "lr": 6.007263918721221e-07, "epoch": 0.6678104790814307, "percentage": 66.78, "elapsed_time": "0:16:32", "remaining_time": "0:08:13", "throughput": 2505.18, "total_tokens": 2485376}
|
|
{"current_steps": 5065, "total_steps": 7577, "loss": 0.07, "lr": 5.986154776448507e-07, "epoch": 0.668470370859179, "percentage": 66.85, "elapsed_time": "0:16:32", "remaining_time": "0:08:12", "throughput": 2507.04, "total_tokens": 2488064}
|
|
{"current_steps": 5070, "total_steps": 7577, "loss": 0.0635, "lr": 5.965066933155656e-07, "epoch": 0.6691302626369275, "percentage": 66.91, "elapsed_time": "0:16:32", "remaining_time": "0:08:10", "throughput": 2508.78, "total_tokens": 2490624}
|
|
{"current_steps": 5075, "total_steps": 7577, "loss": 0.2914, "lr": 5.944000500742735e-07, "epoch": 0.669790154414676, "percentage": 66.98, "elapsed_time": "0:16:33", "remaining_time": "0:08:09", "throughput": 2510.58, "total_tokens": 2493248}
|
|
{"current_steps": 5080, "total_steps": 7577, "loss": 0.2126, "lr": 5.922955590996195e-07, "epoch": 0.6704500461924244, "percentage": 67.05, "elapsed_time": "0:16:33", "remaining_time": "0:08:08", "throughput": 2512.26, "total_tokens": 2495744}
|
|
{"current_steps": 5085, "total_steps": 7577, "loss": 0.0014, "lr": 5.901932315588281e-07, "epoch": 0.6711099379701729, "percentage": 67.11, "elapsed_time": "0:16:33", "remaining_time": "0:08:07", "throughput": 2513.88, "total_tokens": 2498176}
|
|
{"current_steps": 5090, "total_steps": 7577, "loss": 0.1766, "lr": 5.880930786076441e-07, "epoch": 0.6717698297479213, "percentage": 67.18, "elapsed_time": "0:16:34", "remaining_time": "0:08:05", "throughput": 2515.32, "total_tokens": 2500416}
|
|
{"current_steps": 5095, "total_steps": 7577, "loss": 0.0912, "lr": 5.859951113902728e-07, "epoch": 0.6724297215256698, "percentage": 67.24, "elapsed_time": "0:16:34", "remaining_time": "0:08:04", "throughput": 2516.92, "total_tokens": 2502848}
|
|
{"current_steps": 5100, "total_steps": 7577, "loss": 0.1217, "lr": 5.83899341039321e-07, "epoch": 0.6730896133034182, "percentage": 67.31, "elapsed_time": "0:16:34", "remaining_time": "0:08:03", "throughput": 2518.42, "total_tokens": 2505152}
|
|
{"current_steps": 5105, "total_steps": 7577, "loss": 0.1365, "lr": 5.818057786757386e-07, "epoch": 0.6737495050811667, "percentage": 67.37, "elapsed_time": "0:16:35", "remaining_time": "0:08:01", "throughput": 2520.09, "total_tokens": 2507648}
|
|
{"current_steps": 5110, "total_steps": 7577, "loss": 0.1131, "lr": 5.797144354087588e-07, "epoch": 0.6744093968589151, "percentage": 67.44, "elapsed_time": "0:16:35", "remaining_time": "0:08:00", "throughput": 2521.74, "total_tokens": 2510144}
|
|
{"current_steps": 5115, "total_steps": 7577, "loss": 0.0198, "lr": 5.77625322335839e-07, "epoch": 0.6750692886366636, "percentage": 67.51, "elapsed_time": "0:16:35", "remaining_time": "0:07:59", "throughput": 2523.77, "total_tokens": 2513024}
|
|
{"current_steps": 5120, "total_steps": 7577, "loss": 0.0733, "lr": 5.755384505426032e-07, "epoch": 0.675729180414412, "percentage": 67.57, "elapsed_time": "0:16:36", "remaining_time": "0:07:57", "throughput": 2524.99, "total_tokens": 2515072}
|
|
{"current_steps": 5125, "total_steps": 7577, "loss": 0.0013, "lr": 5.734538311027819e-07, "epoch": 0.6763890721921605, "percentage": 67.64, "elapsed_time": "0:16:36", "remaining_time": "0:07:56", "throughput": 2526.47, "total_tokens": 2517376}
|
|
{"current_steps": 5130, "total_steps": 7577, "loss": 0.003, "lr": 5.713714750781533e-07, "epoch": 0.677048963969909, "percentage": 67.7, "elapsed_time": "0:16:36", "remaining_time": "0:07:55", "throughput": 2528.32, "total_tokens": 2520064}
|
|
{"current_steps": 5135, "total_steps": 7577, "loss": 0.0716, "lr": 5.692913935184862e-07, "epoch": 0.6777088557476574, "percentage": 67.77, "elapsed_time": "0:16:37", "remaining_time": "0:07:54", "throughput": 2530.12, "total_tokens": 2522688}
|
|
{"current_steps": 5140, "total_steps": 7577, "loss": 0.0165, "lr": 5.672135974614794e-07, "epoch": 0.6783687475254059, "percentage": 67.84, "elapsed_time": "0:16:37", "remaining_time": "0:07:52", "throughput": 2531.78, "total_tokens": 2525184}
|
|
{"current_steps": 5145, "total_steps": 7577, "loss": 0.0008, "lr": 5.651380979327034e-07, "epoch": 0.6790286393031543, "percentage": 67.9, "elapsed_time": "0:16:37", "remaining_time": "0:07:51", "throughput": 2533.33, "total_tokens": 2527552}
|
|
{"current_steps": 5150, "total_steps": 7577, "loss": 0.0435, "lr": 5.630649059455444e-07, "epoch": 0.6796885310809028, "percentage": 67.97, "elapsed_time": "0:16:38", "remaining_time": "0:07:50", "throughput": 2535.18, "total_tokens": 2530240}
|
|
{"current_steps": 5155, "total_steps": 7577, "loss": 0.0116, "lr": 5.609940325011413e-07, "epoch": 0.6803484228586512, "percentage": 68.03, "elapsed_time": "0:16:38", "remaining_time": "0:07:49", "throughput": 2536.6, "total_tokens": 2532480}
|
|
{"current_steps": 5160, "total_steps": 7577, "loss": 0.0006, "lr": 5.589254885883325e-07, "epoch": 0.6810083146363997, "percentage": 68.1, "elapsed_time": "0:16:38", "remaining_time": "0:07:47", "throughput": 2538.19, "total_tokens": 2534912}
|
|
{"current_steps": 5165, "total_steps": 7577, "loss": 0.0753, "lr": 5.568592851835936e-07, "epoch": 0.681668206414148, "percentage": 68.17, "elapsed_time": "0:16:39", "remaining_time": "0:07:46", "throughput": 2539.86, "total_tokens": 2537408}
|
|
{"current_steps": 5170, "total_steps": 7577, "loss": 0.2943, "lr": 5.547954332509805e-07, "epoch": 0.6823280981918965, "percentage": 68.23, "elapsed_time": "0:16:39", "remaining_time": "0:07:45", "throughput": 2541.4, "total_tokens": 2539776}
|
|
{"current_steps": 5175, "total_steps": 7577, "loss": 0.001, "lr": 5.527339437420717e-07, "epoch": 0.6829879899696449, "percentage": 68.3, "elapsed_time": "0:16:39", "remaining_time": "0:07:44", "throughput": 2543.0, "total_tokens": 2542208}
|
|
{"current_steps": 5180, "total_steps": 7577, "loss": 0.0691, "lr": 5.506748275959094e-07, "epoch": 0.6836478817473934, "percentage": 68.36, "elapsed_time": "0:16:40", "remaining_time": "0:07:42", "throughput": 2544.65, "total_tokens": 2544704}
|
|
{"current_steps": 5185, "total_steps": 7577, "loss": 0.0527, "lr": 5.48618095738943e-07, "epoch": 0.6843077735251418, "percentage": 68.43, "elapsed_time": "0:16:40", "remaining_time": "0:07:41", "throughput": 2546.18, "total_tokens": 2547072}
|
|
{"current_steps": 5190, "total_steps": 7577, "loss": 0.121, "lr": 5.465637590849681e-07, "epoch": 0.6849676653028903, "percentage": 68.5, "elapsed_time": "0:16:40", "remaining_time": "0:07:40", "throughput": 2547.7, "total_tokens": 2549440}
|
|
{"current_steps": 5195, "total_steps": 7577, "loss": 0.1132, "lr": 5.445118285350723e-07, "epoch": 0.6856275570806388, "percentage": 68.56, "elapsed_time": "0:16:41", "remaining_time": "0:07:38", "throughput": 2549.54, "total_tokens": 2552128}
|
|
{"current_steps": 5200, "total_steps": 7577, "loss": 0.0694, "lr": 5.424623149775745e-07, "epoch": 0.6862874488583872, "percentage": 68.63, "elapsed_time": "0:16:41", "remaining_time": "0:07:37", "throughput": 2550.94, "total_tokens": 2554368}
|
|
{"current_steps": 5205, "total_steps": 7577, "loss": 0.1189, "lr": 5.404152292879676e-07, "epoch": 0.6869473406361357, "percentage": 68.69, "elapsed_time": "0:16:41", "remaining_time": "0:07:36", "throughput": 2552.65, "total_tokens": 2556928}
|
|
{"current_steps": 5210, "total_steps": 7577, "loss": 0.1717, "lr": 5.38370582328863e-07, "epoch": 0.6876072324138841, "percentage": 68.76, "elapsed_time": "0:16:42", "remaining_time": "0:07:35", "throughput": 2554.24, "total_tokens": 2559360}
|
|
{"current_steps": 5215, "total_steps": 7577, "loss": 0.1758, "lr": 5.363283849499293e-07, "epoch": 0.6882671241916326, "percentage": 68.83, "elapsed_time": "0:16:42", "remaining_time": "0:07:33", "throughput": 2555.88, "total_tokens": 2561856}
|
|
{"current_steps": 5220, "total_steps": 7577, "loss": 0.1671, "lr": 5.342886479878387e-07, "epoch": 0.688927015969381, "percentage": 68.89, "elapsed_time": "0:16:42", "remaining_time": "0:07:32", "throughput": 2557.52, "total_tokens": 2564352}
|
|
{"current_steps": 5225, "total_steps": 7577, "loss": 0.0526, "lr": 5.32251382266206e-07, "epoch": 0.6895869077471295, "percentage": 68.96, "elapsed_time": "0:16:43", "remaining_time": "0:07:31", "throughput": 2559.1, "total_tokens": 2566784}
|
|
{"current_steps": 5230, "total_steps": 7577, "loss": 0.1115, "lr": 5.302165985955327e-07, "epoch": 0.6902467995248779, "percentage": 69.02, "elapsed_time": "0:16:43", "remaining_time": "0:07:30", "throughput": 2560.62, "total_tokens": 2569152}
|
|
{"current_steps": 5235, "total_steps": 7577, "loss": 0.0649, "lr": 5.281843077731511e-07, "epoch": 0.6909066913026264, "percentage": 69.09, "elapsed_time": "0:16:43", "remaining_time": "0:07:29", "throughput": 2562.14, "total_tokens": 2571520}
|
|
{"current_steps": 5240, "total_steps": 7577, "loss": 0.0934, "lr": 5.26154520583163e-07, "epoch": 0.6915665830803748, "percentage": 69.16, "elapsed_time": "0:16:43", "remaining_time": "0:07:27", "throughput": 2563.85, "total_tokens": 2574080}
|
|
{"current_steps": 5245, "total_steps": 7577, "loss": 0.1196, "lr": 5.241272477963877e-07, "epoch": 0.6922264748581233, "percentage": 69.22, "elapsed_time": "0:16:44", "remaining_time": "0:07:26", "throughput": 2565.26, "total_tokens": 2576320}
|
|
{"current_steps": 5250, "total_steps": 7577, "loss": 0.0494, "lr": 5.221025001703e-07, "epoch": 0.6928863666358717, "percentage": 69.29, "elapsed_time": "0:16:44", "remaining_time": "0:07:25", "throughput": 2566.85, "total_tokens": 2578752}
|
|
{"current_steps": 5255, "total_steps": 7577, "loss": 0.1279, "lr": 5.200802884489768e-07, "epoch": 0.6935462584136202, "percentage": 69.35, "elapsed_time": "0:16:44", "remaining_time": "0:07:24", "throughput": 2568.43, "total_tokens": 2581184}
|
|
{"current_steps": 5260, "total_steps": 7577, "loss": 0.2016, "lr": 5.180606233630374e-07, "epoch": 0.6942061501913687, "percentage": 69.42, "elapsed_time": "0:16:45", "remaining_time": "0:07:22", "throughput": 2570.24, "total_tokens": 2583872}
|
|
{"current_steps": 5265, "total_steps": 7577, "loss": 0.1809, "lr": 5.160435156295879e-07, "epoch": 0.694866041969117, "percentage": 69.49, "elapsed_time": "0:16:45", "remaining_time": "0:07:21", "throughput": 2571.8, "total_tokens": 2586304}
|
|
{"current_steps": 5270, "total_steps": 7577, "loss": 0.0593, "lr": 5.14028975952165e-07, "epoch": 0.6955259337468656, "percentage": 69.55, "elapsed_time": "0:16:45", "remaining_time": "0:07:20", "throughput": 2573.67, "total_tokens": 2589056}
|
|
{"current_steps": 5275, "total_steps": 7577, "loss": 0.0839, "lr": 5.120170150206768e-07, "epoch": 0.6961858255246139, "percentage": 69.62, "elapsed_time": "0:16:46", "remaining_time": "0:07:19", "throughput": 2575.24, "total_tokens": 2591488}
|
|
{"current_steps": 5280, "total_steps": 7577, "loss": 0.0112, "lr": 5.100076435113496e-07, "epoch": 0.6968457173023624, "percentage": 69.68, "elapsed_time": "0:16:46", "remaining_time": "0:07:17", "throughput": 2576.69, "total_tokens": 2593792}
|
|
{"current_steps": 5285, "total_steps": 7577, "loss": 0.1564, "lr": 5.080008720866673e-07, "epoch": 0.6975056090801108, "percentage": 69.75, "elapsed_time": "0:16:46", "remaining_time": "0:07:16", "throughput": 2578.01, "total_tokens": 2595968}
|
|
{"current_steps": 5290, "total_steps": 7577, "loss": 0.2333, "lr": 5.059967113953173e-07, "epoch": 0.6981655008578593, "percentage": 69.82, "elapsed_time": "0:16:47", "remaining_time": "0:07:15", "throughput": 2579.34, "total_tokens": 2598144}
|
|
{"current_steps": 5295, "total_steps": 7577, "loss": 0.0942, "lr": 5.039951720721349e-07, "epoch": 0.6988253926356077, "percentage": 69.88, "elapsed_time": "0:16:47", "remaining_time": "0:07:14", "throughput": 2580.78, "total_tokens": 2600448}
|
|
{"current_steps": 5300, "total_steps": 7577, "loss": 0.0922, "lr": 5.019962647380429e-07, "epoch": 0.6994852844133562, "percentage": 69.95, "elapsed_time": "0:16:47", "remaining_time": "0:07:13", "throughput": 2582.4, "total_tokens": 2602944}
|
|
{"current_steps": 5305, "total_steps": 7577, "loss": 0.1829, "lr": 5.000000000000002e-07, "epoch": 0.7001451761911046, "percentage": 70.01, "elapsed_time": "0:16:48", "remaining_time": "0:07:11", "throughput": 2583.72, "total_tokens": 2605120}
|
|
{"current_steps": 5306, "total_steps": 7577, "eval_loss": 0.09362584352493286, "epoch": 0.7002771545466544, "percentage": 70.03, "elapsed_time": "0:16:56", "remaining_time": "0:07:15", "throughput": 2563.57, "total_tokens": 2605504}
|
|
{"current_steps": 5310, "total_steps": 7577, "loss": 0.0412, "lr": 4.980063884509414e-07, "epoch": 0.7008050679688531, "percentage": 70.08, "elapsed_time": "0:17:19", "remaining_time": "0:07:23", "throughput": 2508.21, "total_tokens": 2607296}
|
|
{"current_steps": 5315, "total_steps": 7577, "loss": 0.0804, "lr": 4.960154406697229e-07, "epoch": 0.7014649597466015, "percentage": 70.15, "elapsed_time": "0:17:19", "remaining_time": "0:07:22", "throughput": 2509.73, "total_tokens": 2609728}
|
|
{"current_steps": 5320, "total_steps": 7577, "loss": 0.2263, "lr": 4.940271672210667e-07, "epoch": 0.70212485152435, "percentage": 70.21, "elapsed_time": "0:17:20", "remaining_time": "0:07:21", "throughput": 2511.33, "total_tokens": 2612224}
|
|
{"current_steps": 5325, "total_steps": 7577, "loss": 0.0293, "lr": 4.920415786555025e-07, "epoch": 0.7027847433020985, "percentage": 70.28, "elapsed_time": "0:17:20", "remaining_time": "0:07:20", "throughput": 2512.93, "total_tokens": 2614720}
|
|
{"current_steps": 5330, "total_steps": 7577, "loss": 0.2921, "lr": 4.900586855093144e-07, "epoch": 0.7034446350798469, "percentage": 70.34, "elapsed_time": "0:17:20", "remaining_time": "0:07:18", "throughput": 2514.64, "total_tokens": 2617344}
|
|
{"current_steps": 5335, "total_steps": 7577, "loss": 0.1245, "lr": 4.880784983044827e-07, "epoch": 0.7041045268575954, "percentage": 70.41, "elapsed_time": "0:17:21", "remaining_time": "0:07:17", "throughput": 2515.99, "total_tokens": 2619584}
|
|
{"current_steps": 5340, "total_steps": 7577, "loss": 0.0299, "lr": 4.861010275486284e-07, "epoch": 0.7047644186353438, "percentage": 70.48, "elapsed_time": "0:17:21", "remaining_time": "0:07:16", "throughput": 2517.39, "total_tokens": 2621888}
|
|
{"current_steps": 5345, "total_steps": 7577, "loss": 0.0976, "lr": 4.8412628373496e-07, "epoch": 0.7054243104130923, "percentage": 70.54, "elapsed_time": "0:17:21", "remaining_time": "0:07:15", "throughput": 2519.1, "total_tokens": 2624512}
|
|
{"current_steps": 5350, "total_steps": 7577, "loss": 0.0023, "lr": 4.821542773422136e-07, "epoch": 0.7060842021908407, "percentage": 70.61, "elapsed_time": "0:17:22", "remaining_time": "0:07:13", "throughput": 2520.69, "total_tokens": 2627008}
|
|
{"current_steps": 5355, "total_steps": 7577, "loss": 0.0019, "lr": 4.801850188346012e-07, "epoch": 0.7067440939685892, "percentage": 70.67, "elapsed_time": "0:17:22", "remaining_time": "0:07:12", "throughput": 2522.21, "total_tokens": 2629440}
|
|
{"current_steps": 5360, "total_steps": 7577, "loss": 0.1053, "lr": 4.782185186617523e-07, "epoch": 0.7074039857463376, "percentage": 70.74, "elapsed_time": "0:17:22", "remaining_time": "0:07:11", "throughput": 2523.73, "total_tokens": 2631872}
|
|
{"current_steps": 5365, "total_steps": 7577, "loss": 0.0618, "lr": 4.762547872586603e-07, "epoch": 0.7080638775240861, "percentage": 70.81, "elapsed_time": "0:17:23", "remaining_time": "0:07:10", "throughput": 2525.48, "total_tokens": 2634560}
|
|
{"current_steps": 5370, "total_steps": 7577, "loss": 0.1344, "lr": 4.7429383504562605e-07, "epoch": 0.7087237693018344, "percentage": 70.87, "elapsed_time": "0:17:23", "remaining_time": "0:07:08", "throughput": 2527.12, "total_tokens": 2637120}
|
|
{"current_steps": 5375, "total_steps": 7577, "loss": 0.0227, "lr": 4.723356724282029e-07, "epoch": 0.709383661079583, "percentage": 70.94, "elapsed_time": "0:17:23", "remaining_time": "0:07:07", "throughput": 2528.64, "total_tokens": 2639552}
|
|
{"current_steps": 5380, "total_steps": 7577, "loss": 0.128, "lr": 4.703803097971426e-07, "epoch": 0.7100435528573315, "percentage": 71.0, "elapsed_time": "0:17:24", "remaining_time": "0:07:06", "throughput": 2530.17, "total_tokens": 2641984}
|
|
{"current_steps": 5385, "total_steps": 7577, "loss": 0.0719, "lr": 4.6842775752833763e-07, "epoch": 0.7107034446350798, "percentage": 71.07, "elapsed_time": "0:17:24", "remaining_time": "0:07:05", "throughput": 2531.64, "total_tokens": 2644352}
|
|
{"current_steps": 5390, "total_steps": 7577, "loss": 0.0018, "lr": 4.664780259827689e-07, "epoch": 0.7113633364128283, "percentage": 71.14, "elapsed_time": "0:17:24", "remaining_time": "0:07:03", "throughput": 2533.4, "total_tokens": 2647040}
|
|
{"current_steps": 5395, "total_steps": 7577, "loss": 0.001, "lr": 4.6453112550644857e-07, "epoch": 0.7120232281905767, "percentage": 71.2, "elapsed_time": "0:17:25", "remaining_time": "0:07:02", "throughput": 2534.92, "total_tokens": 2649472}
|
|
{"current_steps": 5400, "total_steps": 7577, "loss": 0.0643, "lr": 4.625870664303663e-07, "epoch": 0.7126831199683252, "percentage": 71.27, "elapsed_time": "0:17:25", "remaining_time": "0:07:01", "throughput": 2536.39, "total_tokens": 2651840}
|
|
{"current_steps": 5405, "total_steps": 7577, "loss": 0.0024, "lr": 4.6064585907043486e-07, "epoch": 0.7133430117460736, "percentage": 71.33, "elapsed_time": "0:17:25", "remaining_time": "0:07:00", "throughput": 2538.09, "total_tokens": 2654464}
|
|
{"current_steps": 5410, "total_steps": 7577, "loss": 0.0598, "lr": 4.587075137274334e-07, "epoch": 0.7140029035238221, "percentage": 71.4, "elapsed_time": "0:17:26", "remaining_time": "0:06:59", "throughput": 2539.33, "total_tokens": 2656576}
|
|
{"current_steps": 5415, "total_steps": 7577, "loss": 0.0527, "lr": 4.5677204068695597e-07, "epoch": 0.7146627953015705, "percentage": 71.47, "elapsed_time": "0:17:26", "remaining_time": "0:06:57", "throughput": 2540.85, "total_tokens": 2659008}
|
|
{"current_steps": 5420, "total_steps": 7577, "loss": 0.037, "lr": 4.5483945021935356e-07, "epoch": 0.715322687079319, "percentage": 71.53, "elapsed_time": "0:17:26", "remaining_time": "0:06:56", "throughput": 2542.54, "total_tokens": 2661632}
|
|
{"current_steps": 5425, "total_steps": 7577, "loss": 0.1053, "lr": 4.5290975257968155e-07, "epoch": 0.7159825788570674, "percentage": 71.6, "elapsed_time": "0:17:27", "remaining_time": "0:06:55", "throughput": 2544.18, "total_tokens": 2664192}
|
|
{"current_steps": 5430, "total_steps": 7577, "loss": 0.185, "lr": 4.509829580076452e-07, "epoch": 0.7166424706348159, "percentage": 71.66, "elapsed_time": "0:17:27", "remaining_time": "0:06:54", "throughput": 2545.7, "total_tokens": 2666624}
|
|
{"current_steps": 5435, "total_steps": 7577, "loss": 0.1415, "lr": 4.490590767275442e-07, "epoch": 0.7173023624125643, "percentage": 71.73, "elapsed_time": "0:17:27", "remaining_time": "0:06:52", "throughput": 2547.29, "total_tokens": 2669120}
|
|
{"current_steps": 5440, "total_steps": 7577, "loss": 0.1221, "lr": 4.4713811894822064e-07, "epoch": 0.7179622541903128, "percentage": 71.8, "elapsed_time": "0:17:28", "remaining_time": "0:06:51", "throughput": 2548.82, "total_tokens": 2671552}
|
|
{"current_steps": 5445, "total_steps": 7577, "loss": 0.0782, "lr": 4.4522009486300204e-07, "epoch": 0.7186221459680613, "percentage": 71.86, "elapsed_time": "0:17:28", "remaining_time": "0:06:50", "throughput": 2550.57, "total_tokens": 2674240}
|
|
{"current_steps": 5450, "total_steps": 7577, "loss": 0.0865, "lr": 4.43305014649649e-07, "epoch": 0.7192820377458097, "percentage": 71.93, "elapsed_time": "0:17:28", "remaining_time": "0:06:49", "throughput": 2551.96, "total_tokens": 2676544}
|
|
{"current_steps": 5455, "total_steps": 7577, "loss": 0.0004, "lr": 4.4139288847030155e-07, "epoch": 0.7199419295235582, "percentage": 71.99, "elapsed_time": "0:17:29", "remaining_time": "0:06:48", "throughput": 2553.41, "total_tokens": 2678912}
|
|
{"current_steps": 5460, "total_steps": 7577, "loss": 0.0538, "lr": 4.394837264714233e-07, "epoch": 0.7206018213013066, "percentage": 72.06, "elapsed_time": "0:17:29", "remaining_time": "0:06:46", "throughput": 2554.93, "total_tokens": 2681344}
|
|
{"current_steps": 5465, "total_steps": 7577, "loss": 0.0013, "lr": 4.3757753878375005e-07, "epoch": 0.7212617130790551, "percentage": 72.13, "elapsed_time": "0:17:29", "remaining_time": "0:06:45", "throughput": 2556.43, "total_tokens": 2683776}
|
|
{"current_steps": 5470, "total_steps": 7577, "loss": 0.0553, "lr": 4.3567433552223375e-07, "epoch": 0.7219216048568035, "percentage": 72.19, "elapsed_time": "0:17:30", "remaining_time": "0:06:44", "throughput": 2557.77, "total_tokens": 2686016}
|
|
{"current_steps": 5475, "total_steps": 7577, "loss": 0.1592, "lr": 4.3377412678599e-07, "epoch": 0.722581496634552, "percentage": 72.26, "elapsed_time": "0:17:30", "remaining_time": "0:06:43", "throughput": 2558.98, "total_tokens": 2688128}
|
|
{"current_steps": 5480, "total_steps": 7577, "loss": 0.1678, "lr": 4.318769226582454e-07, "epoch": 0.7232413884123003, "percentage": 72.32, "elapsed_time": "0:17:30", "remaining_time": "0:06:42", "throughput": 2560.33, "total_tokens": 2690368}
|
|
{"current_steps": 5485, "total_steps": 7577, "loss": 0.0807, "lr": 4.299827332062811e-07, "epoch": 0.7239012801900488, "percentage": 72.39, "elapsed_time": "0:17:31", "remaining_time": "0:06:40", "throughput": 2562.02, "total_tokens": 2692992}
|
|
{"current_steps": 5490, "total_steps": 7577, "loss": 0.0951, "lr": 4.2809156848138363e-07, "epoch": 0.7245611719677972, "percentage": 72.46, "elapsed_time": "0:17:31", "remaining_time": "0:06:39", "throughput": 2563.54, "total_tokens": 2695424}
|
|
{"current_steps": 5495, "total_steps": 7577, "loss": 0.1498, "lr": 4.2620343851878616e-07, "epoch": 0.7252210637455457, "percentage": 72.52, "elapsed_time": "0:17:31", "remaining_time": "0:06:38", "throughput": 2565.06, "total_tokens": 2697856}
|
|
{"current_steps": 5500, "total_steps": 7577, "loss": 0.0717, "lr": 4.2431835333762123e-07, "epoch": 0.7258809555232941, "percentage": 72.59, "elapsed_time": "0:17:32", "remaining_time": "0:06:37", "throughput": 2566.86, "total_tokens": 2700608}
|
|
{"current_steps": 5505, "total_steps": 7577, "loss": 0.0003, "lr": 4.224363229408628e-07, "epoch": 0.7265408473010426, "percentage": 72.65, "elapsed_time": "0:17:32", "remaining_time": "0:06:36", "throughput": 2568.43, "total_tokens": 2703104}
|
|
{"current_steps": 5510, "total_steps": 7577, "loss": 0.2012, "lr": 4.205573573152753e-07, "epoch": 0.7272007390787911, "percentage": 72.72, "elapsed_time": "0:17:32", "remaining_time": "0:06:34", "throughput": 2569.78, "total_tokens": 2705344}
|
|
{"current_steps": 5515, "total_steps": 7577, "loss": 0.0556, "lr": 4.18681466431361e-07, "epoch": 0.7278606308565395, "percentage": 72.79, "elapsed_time": "0:17:33", "remaining_time": "0:06:33", "throughput": 2571.06, "total_tokens": 2707520}
|
|
{"current_steps": 5520, "total_steps": 7577, "loss": 0.0709, "lr": 4.168086602433055e-07, "epoch": 0.728520522634288, "percentage": 72.85, "elapsed_time": "0:17:33", "remaining_time": "0:06:32", "throughput": 2572.52, "total_tokens": 2709888}
|
|
{"current_steps": 5525, "total_steps": 7577, "loss": 0.2017, "lr": 4.1493894868892676e-07, "epoch": 0.7291804144120364, "percentage": 72.92, "elapsed_time": "0:17:33", "remaining_time": "0:06:31", "throughput": 2573.92, "total_tokens": 2712192}
|
|
{"current_steps": 5530, "total_steps": 7577, "loss": 0.0165, "lr": 4.1307234168962093e-07, "epoch": 0.7298403061897849, "percentage": 72.98, "elapsed_time": "0:17:34", "remaining_time": "0:06:30", "throughput": 2575.2, "total_tokens": 2714368}
|
|
{"current_steps": 5535, "total_steps": 7577, "loss": 0.0494, "lr": 4.112088491503095e-07, "epoch": 0.7305001979675333, "percentage": 73.05, "elapsed_time": "0:17:34", "remaining_time": "0:06:28", "throughput": 2576.54, "total_tokens": 2716608}
|
|
{"current_steps": 5540, "total_steps": 7577, "loss": 0.0011, "lr": 4.0934848095938937e-07, "epoch": 0.7311600897452818, "percentage": 73.12, "elapsed_time": "0:17:34", "remaining_time": "0:06:27", "throughput": 2577.7, "total_tokens": 2718656}
|
|
{"current_steps": 5545, "total_steps": 7577, "loss": 0.097, "lr": 4.074912469886763e-07, "epoch": 0.7318199815230302, "percentage": 73.18, "elapsed_time": "0:17:35", "remaining_time": "0:06:26", "throughput": 2579.27, "total_tokens": 2721152}
|
|
{"current_steps": 5550, "total_steps": 7577, "loss": 0.0014, "lr": 4.0563715709335657e-07, "epoch": 0.7324798733007787, "percentage": 73.25, "elapsed_time": "0:17:35", "remaining_time": "0:06:25", "throughput": 2580.48, "total_tokens": 2723264}
|
|
{"current_steps": 5555, "total_steps": 7577, "loss": 0.198, "lr": 4.037862211119315e-07, "epoch": 0.7331397650785271, "percentage": 73.31, "elapsed_time": "0:17:35", "remaining_time": "0:06:24", "throughput": 2581.87, "total_tokens": 2725568}
|
|
{"current_steps": 5560, "total_steps": 7577, "loss": 0.0634, "lr": 4.0193844886616715e-07, "epoch": 0.7337996568562756, "percentage": 73.38, "elapsed_time": "0:17:35", "remaining_time": "0:06:23", "throughput": 2583.55, "total_tokens": 2728192}
|
|
{"current_steps": 5565, "total_steps": 7577, "loss": 0.2335, "lr": 4.0009385016104137e-07, "epoch": 0.7344595486340241, "percentage": 73.45, "elapsed_time": "0:17:36", "remaining_time": "0:06:21", "throughput": 2585.43, "total_tokens": 2731072}
|
|
{"current_steps": 5570, "total_steps": 7577, "loss": 0.0761, "lr": 3.9825243478469164e-07, "epoch": 0.7351194404117725, "percentage": 73.51, "elapsed_time": "0:17:36", "remaining_time": "0:06:20", "throughput": 2586.85, "total_tokens": 2733440}
|
|
{"current_steps": 5575, "total_steps": 7577, "loss": 0.0718, "lr": 3.9641421250836484e-07, "epoch": 0.735779332189521, "percentage": 73.58, "elapsed_time": "0:17:36", "remaining_time": "0:06:19", "throughput": 2588.53, "total_tokens": 2736064}
|
|
{"current_steps": 5580, "total_steps": 7577, "loss": 0.0312, "lr": 3.945791930863622e-07, "epoch": 0.7364392239672694, "percentage": 73.64, "elapsed_time": "0:17:37", "remaining_time": "0:06:18", "throughput": 2590.03, "total_tokens": 2738496}
|
|
{"current_steps": 5585, "total_steps": 7577, "loss": 0.0019, "lr": 3.9274738625599137e-07, "epoch": 0.7370991157450179, "percentage": 73.71, "elapsed_time": "0:17:37", "remaining_time": "0:06:17", "throughput": 2591.42, "total_tokens": 2740800}
|
|
{"current_steps": 5590, "total_steps": 7577, "loss": 0.0637, "lr": 3.909188017375112e-07, "epoch": 0.7377590075227662, "percentage": 73.78, "elapsed_time": "0:17:37", "remaining_time": "0:06:16", "throughput": 2592.8, "total_tokens": 2743104}
|
|
{"current_steps": 5595, "total_steps": 7577, "loss": 0.137, "lr": 3.890934492340819e-07, "epoch": 0.7384188993005147, "percentage": 73.84, "elapsed_time": "0:17:38", "remaining_time": "0:06:14", "throughput": 2594.13, "total_tokens": 2745344}
|
|
{"current_steps": 5600, "total_steps": 7577, "loss": 0.0649, "lr": 3.872713384317147e-07, "epoch": 0.7390787910782631, "percentage": 73.91, "elapsed_time": "0:17:38", "remaining_time": "0:06:13", "throughput": 2595.4, "total_tokens": 2747520}
|
|
{"current_steps": 5605, "total_steps": 7577, "loss": 0.1226, "lr": 3.8545247899921776e-07, "epoch": 0.7397386828560116, "percentage": 73.97, "elapsed_time": "0:17:38", "remaining_time": "0:06:12", "throughput": 2596.96, "total_tokens": 2750016}
|
|
{"current_steps": 5610, "total_steps": 7577, "loss": 0.123, "lr": 3.8363688058814614e-07, "epoch": 0.74039857463376, "percentage": 74.04, "elapsed_time": "0:17:39", "remaining_time": "0:06:11", "throughput": 2598.68, "total_tokens": 2752704}
|
|
{"current_steps": 5615, "total_steps": 7577, "loss": 0.1293, "lr": 3.818245528327526e-07, "epoch": 0.7410584664115085, "percentage": 74.11, "elapsed_time": "0:17:39", "remaining_time": "0:06:10", "throughput": 2600.34, "total_tokens": 2755328}
|
|
{"current_steps": 5620, "total_steps": 7577, "loss": 0.0808, "lr": 3.8001550534993164e-07, "epoch": 0.7417183581892569, "percentage": 74.17, "elapsed_time": "0:17:39", "remaining_time": "0:06:09", "throughput": 2601.72, "total_tokens": 2757632}
|
|
{"current_steps": 5625, "total_steps": 7577, "loss": 0.0508, "lr": 3.7820974773917413e-07, "epoch": 0.7423782499670054, "percentage": 74.24, "elapsed_time": "0:17:40", "remaining_time": "0:06:07", "throughput": 2603.33, "total_tokens": 2760192}
|
|
{"current_steps": 5630, "total_steps": 7577, "loss": 0.0011, "lr": 3.764072895825117e-07, "epoch": 0.7430381417447539, "percentage": 74.3, "elapsed_time": "0:17:40", "remaining_time": "0:06:06", "throughput": 2604.99, "total_tokens": 2762816}
|
|
{"current_steps": 5635, "total_steps": 7577, "loss": 0.0735, "lr": 3.7460814044446934e-07, "epoch": 0.7436980335225023, "percentage": 74.37, "elapsed_time": "0:17:40", "remaining_time": "0:06:05", "throughput": 2606.37, "total_tokens": 2765120}
|
|
{"current_steps": 5640, "total_steps": 7577, "loss": 0.1235, "lr": 3.72812309872012e-07, "epoch": 0.7443579253002508, "percentage": 74.44, "elapsed_time": "0:17:41", "remaining_time": "0:06:04", "throughput": 2608.08, "total_tokens": 2767808}
|
|
{"current_steps": 5645, "total_steps": 7577, "loss": 0.0109, "lr": 3.71019807394495e-07, "epoch": 0.7450178170779992, "percentage": 74.5, "elapsed_time": "0:17:41", "remaining_time": "0:06:03", "throughput": 2609.51, "total_tokens": 2770176}
|
|
{"current_steps": 5650, "total_steps": 7577, "loss": 0.1873, "lr": 3.6923064252361505e-07, "epoch": 0.7456777088557477, "percentage": 74.57, "elapsed_time": "0:17:41", "remaining_time": "0:06:02", "throughput": 2611.06, "total_tokens": 2772672}
|
|
{"current_steps": 5655, "total_steps": 7577, "loss": 0.1134, "lr": 3.674448247533561e-07, "epoch": 0.7463376006334961, "percentage": 74.63, "elapsed_time": "0:17:42", "remaining_time": "0:06:01", "throughput": 2612.56, "total_tokens": 2775104}
|
|
{"current_steps": 5660, "total_steps": 7577, "loss": 0.2252, "lr": 3.656623635599432e-07, "epoch": 0.7469974924112446, "percentage": 74.7, "elapsed_time": "0:17:42", "remaining_time": "0:05:59", "throughput": 2614.27, "total_tokens": 2777792}
|
|
{"current_steps": 5665, "total_steps": 7577, "loss": 0.1396, "lr": 3.6388326840178865e-07, "epoch": 0.747657384188993, "percentage": 74.77, "elapsed_time": "0:17:42", "remaining_time": "0:05:58", "throughput": 2615.93, "total_tokens": 2780416}
|
|
{"current_steps": 5670, "total_steps": 7577, "loss": 0.0013, "lr": 3.621075487194435e-07, "epoch": 0.7483172759667415, "percentage": 74.83, "elapsed_time": "0:17:43", "remaining_time": "0:05:57", "throughput": 2617.76, "total_tokens": 2783232}
|
|
{"current_steps": 5675, "total_steps": 7577, "loss": 0.1226, "lr": 3.603352139355483e-07, "epoch": 0.7489771677444899, "percentage": 74.9, "elapsed_time": "0:17:43", "remaining_time": "0:05:56", "throughput": 2619.25, "total_tokens": 2785664}
|
|
{"current_steps": 5680, "total_steps": 7577, "loss": 0.0986, "lr": 3.58566273454781e-07, "epoch": 0.7496370595222384, "percentage": 74.96, "elapsed_time": "0:17:43", "remaining_time": "0:05:55", "throughput": 2620.84, "total_tokens": 2788224}
|
|
{"current_steps": 5685, "total_steps": 7577, "loss": 0.0314, "lr": 3.5680073666380817e-07, "epoch": 0.7502969512999867, "percentage": 75.03, "elapsed_time": "0:17:44", "remaining_time": "0:05:54", "throughput": 2622.33, "total_tokens": 2790656}
|
|
{"current_steps": 5685, "total_steps": 7577, "eval_loss": 0.10055803507566452, "epoch": 0.7502969512999867, "percentage": 75.03, "elapsed_time": "0:17:52", "remaining_time": "0:05:56", "throughput": 2603.19, "total_tokens": 2790656}
|
|
{"current_steps": 5690, "total_steps": 7577, "loss": 0.1421, "lr": 3.5503861293123514e-07, "epoch": 0.7509568430777352, "percentage": 75.1, "elapsed_time": "0:18:54", "remaining_time": "0:06:16", "throughput": 2461.44, "total_tokens": 2792960}
|
|
{"current_steps": 5695, "total_steps": 7577, "loss": 0.092, "lr": 3.532799116075571e-07, "epoch": 0.7516167348554837, "percentage": 75.16, "elapsed_time": "0:18:55", "remaining_time": "0:06:15", "throughput": 2463.05, "total_tokens": 2795648}
|
|
{"current_steps": 5700, "total_steps": 7577, "loss": 0.104, "lr": 3.5152464202510777e-07, "epoch": 0.7522766266332321, "percentage": 75.23, "elapsed_time": "0:18:55", "remaining_time": "0:06:13", "throughput": 2464.13, "total_tokens": 2797696}
|
|
{"current_steps": 5705, "total_steps": 7577, "loss": 0.1443, "lr": 3.4977281349801056e-07, "epoch": 0.7529365184109806, "percentage": 75.29, "elapsed_time": "0:18:55", "remaining_time": "0:06:12", "throughput": 2465.61, "total_tokens": 2800192}
|
|
{"current_steps": 5710, "total_steps": 7577, "loss": 0.0584, "lr": 3.4802443532213056e-07, "epoch": 0.753596410188729, "percentage": 75.36, "elapsed_time": "0:18:56", "remaining_time": "0:06:11", "throughput": 2466.37, "total_tokens": 2802560}
|
|
{"current_steps": 5715, "total_steps": 7577, "loss": 0.2492, "lr": 3.4627951677502233e-07, "epoch": 0.7542563019664775, "percentage": 75.43, "elapsed_time": "0:18:56", "remaining_time": "0:06:10", "throughput": 2467.79, "total_tokens": 2804992}
|
|
{"current_steps": 5720, "total_steps": 7577, "loss": 0.0061, "lr": 3.4453806711588397e-07, "epoch": 0.7549161937442259, "percentage": 75.49, "elapsed_time": "0:18:56", "remaining_time": "0:06:09", "throughput": 2469.11, "total_tokens": 2807296}
|
|
{"current_steps": 5725, "total_steps": 7577, "loss": 0.0309, "lr": 3.428000955855054e-07, "epoch": 0.7555760855219744, "percentage": 75.56, "elapsed_time": "0:18:57", "remaining_time": "0:06:07", "throughput": 2470.75, "total_tokens": 2809984}
|
|
{"current_steps": 5730, "total_steps": 7577, "loss": 0.0018, "lr": 3.4106561140621983e-07, "epoch": 0.7562359772997228, "percentage": 75.62, "elapsed_time": "0:18:57", "remaining_time": "0:06:06", "throughput": 2472.44, "total_tokens": 2812736}
|
|
{"current_steps": 5735, "total_steps": 7577, "loss": 0.173, "lr": 3.393346237818567e-07, "epoch": 0.7568958690774713, "percentage": 75.69, "elapsed_time": "0:18:57", "remaining_time": "0:06:05", "throughput": 2473.75, "total_tokens": 2815040}
|
|
{"current_steps": 5740, "total_steps": 7577, "loss": 0.1591, "lr": 3.3760714189769015e-07, "epoch": 0.7575557608552197, "percentage": 75.76, "elapsed_time": "0:18:58", "remaining_time": "0:06:04", "throughput": 2475.06, "total_tokens": 2817344}
|
|
{"current_steps": 5745, "total_steps": 7577, "loss": 0.024, "lr": 3.3588317492039266e-07, "epoch": 0.7582156526329682, "percentage": 75.82, "elapsed_time": "0:18:58", "remaining_time": "0:06:03", "throughput": 2476.36, "total_tokens": 2819648}
|
|
{"current_steps": 5750, "total_steps": 7577, "loss": 0.1302, "lr": 3.341627319979834e-07, "epoch": 0.7588755444107167, "percentage": 75.89, "elapsed_time": "0:18:58", "remaining_time": "0:06:01", "throughput": 2478.09, "total_tokens": 2822464}
|
|
{"current_steps": 5755, "total_steps": 7577, "loss": 0.2033, "lr": 3.324458222597839e-07, "epoch": 0.7595354361884651, "percentage": 75.95, "elapsed_time": "0:18:59", "remaining_time": "0:06:00", "throughput": 2479.5, "total_tokens": 2824896}
|
|
{"current_steps": 5760, "total_steps": 7577, "loss": 0.0718, "lr": 3.307324548163657e-07, "epoch": 0.7601953279662136, "percentage": 76.02, "elapsed_time": "0:18:59", "remaining_time": "0:05:59", "throughput": 2481.19, "total_tokens": 2827648}
|
|
{"current_steps": 5765, "total_steps": 7577, "loss": 0.0575, "lr": 3.2902263875950374e-07, "epoch": 0.760855219743962, "percentage": 76.09, "elapsed_time": "0:18:59", "remaining_time": "0:05:58", "throughput": 2482.82, "total_tokens": 2830336}
|
|
{"current_steps": 5770, "total_steps": 7577, "loss": 0.0498, "lr": 3.2731638316212894e-07, "epoch": 0.7615151115217105, "percentage": 76.15, "elapsed_time": "0:19:00", "remaining_time": "0:05:57", "throughput": 2484.14, "total_tokens": 2832640}
|
|
{"current_steps": 5775, "total_steps": 7577, "loss": 0.086, "lr": 3.256136970782782e-07, "epoch": 0.7621750032994589, "percentage": 76.22, "elapsed_time": "0:19:00", "remaining_time": "0:05:55", "throughput": 2485.4, "total_tokens": 2834880}
|
|
{"current_steps": 5780, "total_steps": 7577, "loss": 0.1474, "lr": 3.23914589543047e-07, "epoch": 0.7628348950772074, "percentage": 76.28, "elapsed_time": "0:19:00", "remaining_time": "0:05:54", "throughput": 2486.91, "total_tokens": 2837440}
|
|
{"current_steps": 5785, "total_steps": 7577, "loss": 0.0621, "lr": 3.2221906957254276e-07, "epoch": 0.7634947868549558, "percentage": 76.35, "elapsed_time": "0:19:01", "remaining_time": "0:05:53", "throughput": 2488.27, "total_tokens": 2839808}
|
|
{"current_steps": 5790, "total_steps": 7577, "loss": 0.1094, "lr": 3.205271461638346e-07, "epoch": 0.7641546786327043, "percentage": 76.42, "elapsed_time": "0:19:01", "remaining_time": "0:05:52", "throughput": 2489.85, "total_tokens": 2842432}
|
|
{"current_steps": 5795, "total_steps": 7577, "loss": 0.1338, "lr": 3.188388282949085e-07, "epoch": 0.7648145704104526, "percentage": 76.48, "elapsed_time": "0:19:01", "remaining_time": "0:05:51", "throughput": 2491.49, "total_tokens": 2845120}
|
|
{"current_steps": 5800, "total_steps": 7577, "loss": 0.1247, "lr": 3.171541249246166e-07, "epoch": 0.7654744621882011, "percentage": 76.55, "elapsed_time": "0:19:02", "remaining_time": "0:05:49", "throughput": 2493.27, "total_tokens": 2848000}
|
|
{"current_steps": 5805, "total_steps": 7577, "loss": 0.1788, "lr": 3.154730449926316e-07, "epoch": 0.7661343539659495, "percentage": 76.61, "elapsed_time": "0:19:02", "remaining_time": "0:05:48", "throughput": 2494.84, "total_tokens": 2850624}
|
|
{"current_steps": 5810, "total_steps": 7577, "loss": 0.1394, "lr": 3.137955974194e-07, "epoch": 0.766794245743698, "percentage": 76.68, "elapsed_time": "0:19:02", "remaining_time": "0:05:47", "throughput": 2496.19, "total_tokens": 2852992}
|
|
{"current_steps": 5815, "total_steps": 7577, "loss": 0.0413, "lr": 3.1212179110609125e-07, "epoch": 0.7674541375214465, "percentage": 76.75, "elapsed_time": "0:19:03", "remaining_time": "0:05:46", "throughput": 2497.59, "total_tokens": 2855424}
|
|
{"current_steps": 5820, "total_steps": 7577, "loss": 0.1604, "lr": 3.104516349345553e-07, "epoch": 0.7681140292991949, "percentage": 76.81, "elapsed_time": "0:19:03", "remaining_time": "0:05:45", "throughput": 2499.1, "total_tokens": 2857984}
|
|
{"current_steps": 5825, "total_steps": 7577, "loss": 0.0702, "lr": 3.0878513776727144e-07, "epoch": 0.7687739210769434, "percentage": 76.88, "elapsed_time": "0:19:03", "remaining_time": "0:05:44", "throughput": 2500.72, "total_tokens": 2860672}
|
|
{"current_steps": 5830, "total_steps": 7577, "loss": 0.1228, "lr": 3.0712230844730414e-07, "epoch": 0.7694338128546918, "percentage": 76.94, "elapsed_time": "0:19:04", "remaining_time": "0:05:42", "throughput": 2502.07, "total_tokens": 2863040}
|
|
{"current_steps": 5835, "total_steps": 7577, "loss": 0.1412, "lr": 3.054631557982539e-07, "epoch": 0.7700937046324403, "percentage": 77.01, "elapsed_time": "0:19:04", "remaining_time": "0:05:41", "throughput": 2503.81, "total_tokens": 2865856}
|
|
{"current_steps": 5840, "total_steps": 7577, "loss": 0.108, "lr": 3.0380768862421156e-07, "epoch": 0.7707535964101887, "percentage": 77.08, "elapsed_time": "0:19:04", "remaining_time": "0:05:40", "throughput": 2505.05, "total_tokens": 2868096}
|
|
{"current_steps": 5845, "total_steps": 7577, "loss": 0.001, "lr": 3.0215591570971234e-07, "epoch": 0.7714134881879372, "percentage": 77.14, "elapsed_time": "0:19:05", "remaining_time": "0:05:39", "throughput": 2506.67, "total_tokens": 2870784}
|
|
{"current_steps": 5850, "total_steps": 7577, "loss": 0.0313, "lr": 3.005078458196868e-07, "epoch": 0.7720733799656856, "percentage": 77.21, "elapsed_time": "0:19:05", "remaining_time": "0:05:38", "throughput": 2508.07, "total_tokens": 2873216}
|
|
{"current_steps": 5855, "total_steps": 7577, "loss": 0.0011, "lr": 2.988634876994175e-07, "epoch": 0.7727332717434341, "percentage": 77.27, "elapsed_time": "0:19:05", "remaining_time": "0:05:37", "throughput": 2509.58, "total_tokens": 2875776}
|
|
{"current_steps": 5860, "total_steps": 7577, "loss": 0.0322, "lr": 2.972228500744898e-07, "epoch": 0.7733931635211825, "percentage": 77.34, "elapsed_time": "0:19:06", "remaining_time": "0:05:35", "throughput": 2511.09, "total_tokens": 2878336}
|
|
{"current_steps": 5865, "total_steps": 7577, "loss": 0.1583, "lr": 2.955859416507467e-07, "epoch": 0.774053055298931, "percentage": 77.41, "elapsed_time": "0:19:06", "remaining_time": "0:05:34", "throughput": 2512.6, "total_tokens": 2880896}
|
|
{"current_steps": 5870, "total_steps": 7577, "loss": 0.0666, "lr": 2.9395277111424357e-07, "epoch": 0.7747129470766794, "percentage": 77.47, "elapsed_time": "0:19:06", "remaining_time": "0:05:33", "throughput": 2514.25, "total_tokens": 2883648}
|
|
{"current_steps": 5875, "total_steps": 7577, "loss": 0.0659, "lr": 2.9232334713120035e-07, "epoch": 0.7753728388544279, "percentage": 77.54, "elapsed_time": "0:19:07", "remaining_time": "0:05:32", "throughput": 2515.53, "total_tokens": 2885952}
|
|
{"current_steps": 5880, "total_steps": 7577, "loss": 0.0666, "lr": 2.9069767834795655e-07, "epoch": 0.7760327306321764, "percentage": 77.6, "elapsed_time": "0:19:07", "remaining_time": "0:05:31", "throughput": 2517.07, "total_tokens": 2888576}
|
|
{"current_steps": 5885, "total_steps": 7577, "loss": 0.139, "lr": 2.8907577339092483e-07, "epoch": 0.7766926224099248, "percentage": 77.67, "elapsed_time": "0:19:07", "remaining_time": "0:05:30", "throughput": 2518.57, "total_tokens": 2891136}
|
|
{"current_steps": 5890, "total_steps": 7577, "loss": 0.0791, "lr": 2.8745764086654654e-07, "epoch": 0.7773525141876733, "percentage": 77.74, "elapsed_time": "0:19:08", "remaining_time": "0:05:28", "throughput": 2520.07, "total_tokens": 2893696}
|
|
{"current_steps": 5895, "total_steps": 7577, "loss": 0.0014, "lr": 2.8584328936124424e-07, "epoch": 0.7780124059654216, "percentage": 77.8, "elapsed_time": "0:19:08", "remaining_time": "0:05:27", "throughput": 2521.78, "total_tokens": 2896512}
|
|
{"current_steps": 5900, "total_steps": 7577, "loss": 0.1734, "lr": 2.8423272744137674e-07, "epoch": 0.7786722977431701, "percentage": 77.87, "elapsed_time": "0:19:08", "remaining_time": "0:05:26", "throughput": 2523.22, "total_tokens": 2899008}
|
|
{"current_steps": 5905, "total_steps": 7577, "loss": 0.0576, "lr": 2.82625963653195e-07, "epoch": 0.7793321895209185, "percentage": 77.93, "elapsed_time": "0:19:09", "remaining_time": "0:05:25", "throughput": 2524.56, "total_tokens": 2901376}
|
|
{"current_steps": 5910, "total_steps": 7577, "loss": 0.192, "lr": 2.810230065227944e-07, "epoch": 0.779992081298667, "percentage": 78.0, "elapsed_time": "0:19:09", "remaining_time": "0:05:24", "throughput": 2525.99, "total_tokens": 2903872}
|
|
{"current_steps": 5915, "total_steps": 7577, "loss": 0.0012, "lr": 2.7942386455607203e-07, "epoch": 0.7806519730764154, "percentage": 78.07, "elapsed_time": "0:19:09", "remaining_time": "0:05:23", "throughput": 2527.32, "total_tokens": 2906240}
|
|
{"current_steps": 5920, "total_steps": 7577, "loss": 0.068, "lr": 2.77828546238679e-07, "epoch": 0.7813118648541639, "percentage": 78.13, "elapsed_time": "0:19:10", "remaining_time": "0:05:21", "throughput": 2528.77, "total_tokens": 2908736}
|
|
{"current_steps": 5925, "total_steps": 7577, "loss": 0.1235, "lr": 2.762370600359774e-07, "epoch": 0.7819717566319123, "percentage": 78.2, "elapsed_time": "0:19:10", "remaining_time": "0:05:20", "throughput": 2530.1, "total_tokens": 2911104}
|
|
{"current_steps": 5930, "total_steps": 7577, "loss": 0.0383, "lr": 2.7464941439299484e-07, "epoch": 0.7826316484096608, "percentage": 78.26, "elapsed_time": "0:19:10", "remaining_time": "0:05:19", "throughput": 2531.42, "total_tokens": 2913472}
|
|
{"current_steps": 5935, "total_steps": 7577, "loss": 0.1809, "lr": 2.7306561773437887e-07, "epoch": 0.7832915401874093, "percentage": 78.33, "elapsed_time": "0:19:11", "remaining_time": "0:05:18", "throughput": 2532.74, "total_tokens": 2915840}
|
|
{"current_steps": 5940, "total_steps": 7577, "loss": 0.0033, "lr": 2.714856784643533e-07, "epoch": 0.7839514319651577, "percentage": 78.4, "elapsed_time": "0:19:11", "remaining_time": "0:05:17", "throughput": 2534.02, "total_tokens": 2918144}
|
|
{"current_steps": 5945, "total_steps": 7577, "loss": 0.1415, "lr": 2.6990960496667313e-07, "epoch": 0.7846113237429062, "percentage": 78.46, "elapsed_time": "0:19:11", "remaining_time": "0:05:16", "throughput": 2535.56, "total_tokens": 2920768}
|
|
{"current_steps": 5950, "total_steps": 7577, "loss": 0.0698, "lr": 2.6833740560457976e-07, "epoch": 0.7852712155206546, "percentage": 78.53, "elapsed_time": "0:19:12", "remaining_time": "0:05:15", "throughput": 2536.89, "total_tokens": 2923136}
|
|
{"current_steps": 5955, "total_steps": 7577, "loss": 0.0759, "lr": 2.6676908872075757e-07, "epoch": 0.7859311072984031, "percentage": 78.59, "elapsed_time": "0:19:12", "remaining_time": "0:05:13", "throughput": 2538.27, "total_tokens": 2925568}
|
|
{"current_steps": 5960, "total_steps": 7577, "loss": 0.0742, "lr": 2.6520466263728836e-07, "epoch": 0.7865909990761515, "percentage": 78.66, "elapsed_time": "0:19:12", "remaining_time": "0:05:12", "throughput": 2539.71, "total_tokens": 2928064}
|
|
{"current_steps": 5965, "total_steps": 7577, "loss": 0.2564, "lr": 2.636441356556087e-07, "epoch": 0.7872508908539, "percentage": 78.73, "elapsed_time": "0:19:13", "remaining_time": "0:05:11", "throughput": 2540.99, "total_tokens": 2930368}
|
|
{"current_steps": 5970, "total_steps": 7577, "loss": 0.1123, "lr": 2.620875160564645e-07, "epoch": 0.7879107826316484, "percentage": 78.79, "elapsed_time": "0:19:13", "remaining_time": "0:05:10", "throughput": 2542.47, "total_tokens": 2932928}
|
|
{"current_steps": 5975, "total_steps": 7577, "loss": 0.3593, "lr": 2.6053481209986715e-07, "epoch": 0.7885706744093969, "percentage": 78.86, "elapsed_time": "0:19:13", "remaining_time": "0:05:09", "throughput": 2543.85, "total_tokens": 2935360}
|
|
{"current_steps": 5980, "total_steps": 7577, "loss": 0.059, "lr": 2.5898603202505155e-07, "epoch": 0.7892305661871453, "percentage": 78.92, "elapsed_time": "0:19:14", "remaining_time": "0:05:08", "throughput": 2545.32, "total_tokens": 2937920}
|
|
{"current_steps": 5985, "total_steps": 7577, "loss": 0.0823, "lr": 2.5744118405042923e-07, "epoch": 0.7898904579648938, "percentage": 78.99, "elapsed_time": "0:19:14", "remaining_time": "0:05:07", "throughput": 2546.59, "total_tokens": 2940224}
|
|
{"current_steps": 5990, "total_steps": 7577, "loss": 0.0012, "lr": 2.559002763735485e-07, "epoch": 0.7905503497426422, "percentage": 79.06, "elapsed_time": "0:19:14", "remaining_time": "0:05:05", "throughput": 2548.12, "total_tokens": 2942848}
|
|
{"current_steps": 5995, "total_steps": 7577, "loss": 0.0604, "lr": 2.543633171710472e-07, "epoch": 0.7912102415203907, "percentage": 79.12, "elapsed_time": "0:19:15", "remaining_time": "0:05:04", "throughput": 2549.56, "total_tokens": 2945344}
|
|
{"current_steps": 6000, "total_steps": 7577, "loss": 0.008, "lr": 2.5283031459861205e-07, "epoch": 0.7918701332981392, "percentage": 79.19, "elapsed_time": "0:19:15", "remaining_time": "0:05:03", "throughput": 2550.99, "total_tokens": 2947840}
|
|
{"current_steps": 6005, "total_steps": 7577, "loss": 0.0374, "lr": 2.5130127679093396e-07, "epoch": 0.7925300250758875, "percentage": 79.25, "elapsed_time": "0:19:15", "remaining_time": "0:05:02", "throughput": 2552.26, "total_tokens": 2950144}
|
|
{"current_steps": 6010, "total_steps": 7577, "loss": 0.0336, "lr": 2.497762118616652e-07, "epoch": 0.793189916853636, "percentage": 79.32, "elapsed_time": "0:19:16", "remaining_time": "0:05:01", "throughput": 2553.48, "total_tokens": 2952384}
|
|
{"current_steps": 6015, "total_steps": 7577, "loss": 0.071, "lr": 2.4825512790337745e-07, "epoch": 0.7938498086313844, "percentage": 79.38, "elapsed_time": "0:19:16", "remaining_time": "0:05:00", "throughput": 2555.12, "total_tokens": 2955136}
|
|
{"current_steps": 6020, "total_steps": 7577, "loss": 0.0369, "lr": 2.467380329875163e-07, "epoch": 0.7945097004091329, "percentage": 79.45, "elapsed_time": "0:19:16", "remaining_time": "0:04:59", "throughput": 2556.7, "total_tokens": 2957824}
|
|
{"current_steps": 6025, "total_steps": 7577, "loss": 0.0014, "lr": 2.452249351643615e-07, "epoch": 0.7951695921868813, "percentage": 79.52, "elapsed_time": "0:19:17", "remaining_time": "0:04:58", "throughput": 2558.08, "total_tokens": 2960256}
|
|
{"current_steps": 6030, "total_steps": 7577, "loss": 0.1182, "lr": 2.437158424629817e-07, "epoch": 0.7958294839646298, "percentage": 79.58, "elapsed_time": "0:19:17", "remaining_time": "0:04:56", "throughput": 2559.67, "total_tokens": 2962944}
|
|
{"current_steps": 6035, "total_steps": 7577, "loss": 0.2256, "lr": 2.422107628911929e-07, "epoch": 0.7964893757423782, "percentage": 79.65, "elapsed_time": "0:19:17", "remaining_time": "0:04:55", "throughput": 2561.15, "total_tokens": 2965504}
|
|
{"current_steps": 6040, "total_steps": 7577, "loss": 0.274, "lr": 2.4070970443551673e-07, "epoch": 0.7971492675201267, "percentage": 79.71, "elapsed_time": "0:19:18", "remaining_time": "0:04:54", "throughput": 2562.37, "total_tokens": 2967744}
|
|
{"current_steps": 6045, "total_steps": 7577, "loss": 0.0032, "lr": 2.392126750611362e-07, "epoch": 0.7978091592978751, "percentage": 79.78, "elapsed_time": "0:19:18", "remaining_time": "0:04:53", "throughput": 2563.79, "total_tokens": 2970240}
|
|
{"current_steps": 6050, "total_steps": 7577, "loss": 0.1761, "lr": 2.3771968271185538e-07, "epoch": 0.7984690510756236, "percentage": 79.85, "elapsed_time": "0:19:18", "remaining_time": "0:04:52", "throughput": 2565.35, "total_tokens": 2972928}
|
|
{"current_steps": 6055, "total_steps": 7577, "loss": 0.1485, "lr": 2.3623073531005579e-07, "epoch": 0.799128942853372, "percentage": 79.91, "elapsed_time": "0:19:19", "remaining_time": "0:04:51", "throughput": 2566.56, "total_tokens": 2975168}
|
|
{"current_steps": 6060, "total_steps": 7577, "loss": 0.1396, "lr": 2.3474584075665493e-07, "epoch": 0.7997888346311205, "percentage": 79.98, "elapsed_time": "0:19:19", "remaining_time": "0:04:50", "throughput": 2567.76, "total_tokens": 2977408}
|
|
{"current_steps": 6064, "total_steps": 7577, "eval_loss": 0.09634685516357422, "epoch": 0.8003167480533192, "percentage": 80.03, "elapsed_time": "0:19:27", "remaining_time": "0:04:51", "throughput": 2551.28, "total_tokens": 2979456}
|
|
{"current_steps": 6065, "total_steps": 7577, "loss": 0.0051, "lr": 2.3326500693106533e-07, "epoch": 0.800448726408869, "percentage": 80.04, "elapsed_time": "0:20:12", "remaining_time": "0:05:02", "throughput": 2458.32, "total_tokens": 2979968}
|
|
{"current_steps": 6070, "total_steps": 7577, "loss": 0.2232, "lr": 2.3178824169114975e-07, "epoch": 0.8011086181866174, "percentage": 80.11, "elapsed_time": "0:20:12", "remaining_time": "0:05:01", "throughput": 2459.76, "total_tokens": 2982528}
|
|
{"current_steps": 6075, "total_steps": 7577, "loss": 0.0557, "lr": 2.303155528731837e-07, "epoch": 0.8017685099643659, "percentage": 80.18, "elapsed_time": "0:20:12", "remaining_time": "0:04:59", "throughput": 2461.0, "total_tokens": 2984832}
|
|
{"current_steps": 6080, "total_steps": 7577, "loss": 0.0009, "lr": 2.2884694829181016e-07, "epoch": 0.8024284017421143, "percentage": 80.24, "elapsed_time": "0:20:13", "remaining_time": "0:04:58", "throughput": 2462.39, "total_tokens": 2987328}
|
|
{"current_steps": 6085, "total_steps": 7577, "loss": 0.0703, "lr": 2.273824357400005e-07, "epoch": 0.8030882935198628, "percentage": 80.31, "elapsed_time": "0:20:13", "remaining_time": "0:04:57", "throughput": 2463.72, "total_tokens": 2989760}
|
|
{"current_steps": 6090, "total_steps": 7577, "loss": 0.0457, "lr": 2.2592202298901174e-07, "epoch": 0.8037481852976112, "percentage": 80.37, "elapsed_time": "0:20:13", "remaining_time": "0:04:56", "throughput": 2465.16, "total_tokens": 2992320}
|
|
{"current_steps": 6095, "total_steps": 7577, "loss": 0.0022, "lr": 2.2446571778834555e-07, "epoch": 0.8044080770753597, "percentage": 80.44, "elapsed_time": "0:20:14", "remaining_time": "0:04:55", "throughput": 2466.8, "total_tokens": 2995136}
|
|
{"current_steps": 6100, "total_steps": 7577, "loss": 0.0008, "lr": 2.2301352786570827e-07, "epoch": 0.805067968853108, "percentage": 80.51, "elapsed_time": "0:20:14", "remaining_time": "0:04:54", "throughput": 2468.48, "total_tokens": 2998016}
|
|
{"current_steps": 6105, "total_steps": 7577, "loss": 0.1533, "lr": 2.215654609269685e-07, "epoch": 0.8057278606308566, "percentage": 80.57, "elapsed_time": "0:20:14", "remaining_time": "0:04:52", "throughput": 2470.12, "total_tokens": 3000832}
|
|
{"current_steps": 6110, "total_steps": 7577, "loss": 0.1716, "lr": 2.201215246561161e-07, "epoch": 0.8063877524086049, "percentage": 80.64, "elapsed_time": "0:20:15", "remaining_time": "0:04:51", "throughput": 2471.71, "total_tokens": 3003584}
|
|
{"current_steps": 6115, "total_steps": 7577, "loss": 0.0731, "lr": 2.1868172671522357e-07, "epoch": 0.8070476441863534, "percentage": 80.7, "elapsed_time": "0:20:15", "remaining_time": "0:04:50", "throughput": 2473.39, "total_tokens": 3006464}
|
|
{"current_steps": 6120, "total_steps": 7577, "loss": 0.107, "lr": 2.1724607474440216e-07, "epoch": 0.8077075359641019, "percentage": 80.77, "elapsed_time": "0:20:15", "remaining_time": "0:04:49", "throughput": 2474.73, "total_tokens": 3008896}
|
|
{"current_steps": 6125, "total_steps": 7577, "loss": 0.1277, "lr": 2.158145763617646e-07, "epoch": 0.8083674277418503, "percentage": 80.84, "elapsed_time": "0:20:16", "remaining_time": "0:04:48", "throughput": 2476.11, "total_tokens": 3011392}
|
|
{"current_steps": 6130, "total_steps": 7577, "loss": 0.2135, "lr": 2.1438723916338198e-07, "epoch": 0.8090273195195988, "percentage": 80.9, "elapsed_time": "0:20:16", "remaining_time": "0:04:47", "throughput": 2477.59, "total_tokens": 3014016}
|
|
{"current_steps": 6135, "total_steps": 7577, "loss": 0.1745, "lr": 2.1296407072324495e-07, "epoch": 0.8096872112973472, "percentage": 80.97, "elapsed_time": "0:20:16", "remaining_time": "0:04:46", "throughput": 2479.03, "total_tokens": 3016576}
|
|
{"current_steps": 6140, "total_steps": 7577, "loss": 0.0661, "lr": 2.1154507859322336e-07, "epoch": 0.8103471030750957, "percentage": 81.03, "elapsed_time": "0:20:17", "remaining_time": "0:04:44", "throughput": 2480.36, "total_tokens": 3019008}
|
|
{"current_steps": 6145, "total_steps": 7577, "loss": 0.1033, "lr": 2.101302703030252e-07, "epoch": 0.8110069948528441, "percentage": 81.1, "elapsed_time": "0:20:17", "remaining_time": "0:04:43", "throughput": 2481.74, "total_tokens": 3021504}
|
|
{"current_steps": 6150, "total_steps": 7577, "loss": 0.0688, "lr": 2.0871965336015885e-07, "epoch": 0.8116668866305926, "percentage": 81.17, "elapsed_time": "0:20:17", "remaining_time": "0:04:42", "throughput": 2482.77, "total_tokens": 3023552}
|
|
{"current_steps": 6155, "total_steps": 7577, "loss": 0.0896, "lr": 2.0731323524989031e-07, "epoch": 0.812326778408341, "percentage": 81.23, "elapsed_time": "0:20:18", "remaining_time": "0:04:41", "throughput": 2484.0, "total_tokens": 3025856}
|
|
{"current_steps": 6160, "total_steps": 7577, "loss": 0.1915, "lr": 2.0591102343520616e-07, "epoch": 0.8129866701860895, "percentage": 81.3, "elapsed_time": "0:20:18", "remaining_time": "0:04:40", "throughput": 2485.18, "total_tokens": 3028096}
|
|
{"current_steps": 6165, "total_steps": 7577, "loss": 0.1774, "lr": 2.0451302535677206e-07, "epoch": 0.8136465619638379, "percentage": 81.36, "elapsed_time": "0:20:18", "remaining_time": "0:04:39", "throughput": 2486.49, "total_tokens": 3030528}
|
|
{"current_steps": 6170, "total_steps": 7577, "loss": 0.129, "lr": 2.0311924843289396e-07, "epoch": 0.8143064537415864, "percentage": 81.43, "elapsed_time": "0:20:19", "remaining_time": "0:04:38", "throughput": 2487.9, "total_tokens": 3033088}
|
|
{"current_steps": 6175, "total_steps": 7577, "loss": 0.0907, "lr": 2.017297000594794e-07, "epoch": 0.8149663455193348, "percentage": 81.5, "elapsed_time": "0:20:19", "remaining_time": "0:04:36", "throughput": 2488.97, "total_tokens": 3035200}
|
|
{"current_steps": 6180, "total_steps": 7577, "loss": 0.048, "lr": 2.0034438760999696e-07, "epoch": 0.8156262372970833, "percentage": 81.56, "elapsed_time": "0:20:19", "remaining_time": "0:04:35", "throughput": 2490.33, "total_tokens": 3037696}
|
|
{"current_steps": 6185, "total_steps": 7577, "loss": 0.1593, "lr": 1.9896331843543856e-07, "epoch": 0.8162861290748318, "percentage": 81.63, "elapsed_time": "0:20:20", "remaining_time": "0:04:34", "throughput": 2491.65, "total_tokens": 3040128}
|
|
{"current_steps": 6190, "total_steps": 7577, "loss": 0.1511, "lr": 1.975864998642789e-07, "epoch": 0.8169460208525802, "percentage": 81.69, "elapsed_time": "0:20:20", "remaining_time": "0:04:33", "throughput": 2492.97, "total_tokens": 3042560}
|
|
{"current_steps": 6195, "total_steps": 7577, "loss": 0.2786, "lr": 1.9621393920243767e-07, "epoch": 0.8176059126303287, "percentage": 81.76, "elapsed_time": "0:20:20", "remaining_time": "0:04:32", "throughput": 2494.12, "total_tokens": 3044800}
|
|
{"current_steps": 6200, "total_steps": 7577, "loss": 0.062, "lr": 1.9484564373324074e-07, "epoch": 0.8182658044080771, "percentage": 81.83, "elapsed_time": "0:20:21", "remaining_time": "0:04:31", "throughput": 2495.28, "total_tokens": 3047040}
|
|
{"current_steps": 6205, "total_steps": 7577, "loss": 0.0633, "lr": 1.934816207173805e-07, "epoch": 0.8189256961858256, "percentage": 81.89, "elapsed_time": "0:20:21", "remaining_time": "0:04:30", "throughput": 2496.7, "total_tokens": 3049600}
|
|
{"current_steps": 6210, "total_steps": 7577, "loss": 0.1436, "lr": 1.9212187739287943e-07, "epoch": 0.819585587963574, "percentage": 81.96, "elapsed_time": "0:20:21", "remaining_time": "0:04:28", "throughput": 2498.3, "total_tokens": 3052416}
|
|
{"current_steps": 6215, "total_steps": 7577, "loss": 0.0028, "lr": 1.907664209750488e-07, "epoch": 0.8202454797413224, "percentage": 82.02, "elapsed_time": "0:20:22", "remaining_time": "0:04:27", "throughput": 2499.77, "total_tokens": 3055040}
|
|
{"current_steps": 6220, "total_steps": 7577, "loss": 0.0553, "lr": 1.8941525865645336e-07, "epoch": 0.8209053715190708, "percentage": 82.09, "elapsed_time": "0:20:22", "remaining_time": "0:04:26", "throughput": 2501.39, "total_tokens": 3057856}
|
|
{"current_steps": 6225, "total_steps": 7577, "loss": 0.209, "lr": 1.8806839760687076e-07, "epoch": 0.8215652632968193, "percentage": 82.16, "elapsed_time": "0:20:22", "remaining_time": "0:04:25", "throughput": 2502.61, "total_tokens": 3060160}
|
|
{"current_steps": 6230, "total_steps": 7577, "loss": 0.1147, "lr": 1.867258449732545e-07, "epoch": 0.8222251550745677, "percentage": 82.22, "elapsed_time": "0:20:23", "remaining_time": "0:04:24", "throughput": 2503.93, "total_tokens": 3062592}
|
|
{"current_steps": 6235, "total_steps": 7577, "loss": 0.0562, "lr": 1.8538760787969676e-07, "epoch": 0.8228850468523162, "percentage": 82.29, "elapsed_time": "0:20:23", "remaining_time": "0:04:23", "throughput": 2505.31, "total_tokens": 3065088}
|
|
{"current_steps": 6240, "total_steps": 7577, "loss": 0.0017, "lr": 1.8405369342738907e-07, "epoch": 0.8235449386300646, "percentage": 82.35, "elapsed_time": "0:20:23", "remaining_time": "0:04:22", "throughput": 2506.77, "total_tokens": 3067712}
|
|
{"current_steps": 6245, "total_steps": 7577, "loss": 0.0602, "lr": 1.8272410869458598e-07, "epoch": 0.8242048304078131, "percentage": 82.42, "elapsed_time": "0:20:24", "remaining_time": "0:04:21", "throughput": 2508.09, "total_tokens": 3070144}
|
|
{"current_steps": 6250, "total_steps": 7577, "loss": 0.2637, "lr": 1.8139886073656653e-07, "epoch": 0.8248647221855616, "percentage": 82.49, "elapsed_time": "0:20:24", "remaining_time": "0:04:19", "throughput": 2509.31, "total_tokens": 3072448}
|
|
{"current_steps": 6255, "total_steps": 7577, "loss": 0.2488, "lr": 1.800779565855971e-07, "epoch": 0.82552461396331, "percentage": 82.55, "elapsed_time": "0:20:24", "remaining_time": "0:04:18", "throughput": 2510.77, "total_tokens": 3075072}
|
|
{"current_steps": 6260, "total_steps": 7577, "loss": 0.0066, "lr": 1.7876140325089463e-07, "epoch": 0.8261845057410585, "percentage": 82.62, "elapsed_time": "0:20:25", "remaining_time": "0:04:17", "throughput": 2511.98, "total_tokens": 3077376}
|
|
{"current_steps": 6265, "total_steps": 7577, "loss": 0.1657, "lr": 1.774492077185883e-07, "epoch": 0.8268443975188069, "percentage": 82.68, "elapsed_time": "0:20:25", "remaining_time": "0:04:16", "throughput": 2513.29, "total_tokens": 3079808}
|
|
{"current_steps": 6270, "total_steps": 7577, "loss": 0.0009, "lr": 1.7614137695168408e-07, "epoch": 0.8275042892965554, "percentage": 82.75, "elapsed_time": "0:20:25", "remaining_time": "0:04:15", "throughput": 2514.85, "total_tokens": 3082560}
|
|
{"current_steps": 6275, "total_steps": 7577, "loss": 0.0715, "lr": 1.748379178900261e-07, "epoch": 0.8281641810743038, "percentage": 82.82, "elapsed_time": "0:20:26", "remaining_time": "0:04:14", "throughput": 2515.86, "total_tokens": 3084608}
|
|
{"current_steps": 6280, "total_steps": 7577, "loss": 0.229, "lr": 1.7353883745026055e-07, "epoch": 0.8288240728520523, "percentage": 82.88, "elapsed_time": "0:20:26", "remaining_time": "0:04:13", "throughput": 2517.22, "total_tokens": 3087104}
|
|
{"current_steps": 6285, "total_steps": 7577, "loss": 0.1264, "lr": 1.722441425257999e-07, "epoch": 0.8294839646298007, "percentage": 82.95, "elapsed_time": "0:20:26", "remaining_time": "0:04:12", "throughput": 2518.43, "total_tokens": 3089408}
|
|
{"current_steps": 6290, "total_steps": 7577, "loss": 0.0197, "lr": 1.7095383998678402e-07, "epoch": 0.8301438564075492, "percentage": 83.01, "elapsed_time": "0:20:27", "remaining_time": "0:04:11", "throughput": 2519.69, "total_tokens": 3091776}
|
|
{"current_steps": 6295, "total_steps": 7577, "loss": 0.113, "lr": 1.6966793668004653e-07, "epoch": 0.8308037481852976, "percentage": 83.08, "elapsed_time": "0:20:27", "remaining_time": "0:04:09", "throughput": 2520.99, "total_tokens": 3094208}
|
|
{"current_steps": 6300, "total_steps": 7577, "loss": 0.006, "lr": 1.6838643942907625e-07, "epoch": 0.8314636399630461, "percentage": 83.15, "elapsed_time": "0:20:27", "remaining_time": "0:04:08", "throughput": 2522.4, "total_tokens": 3096768}
|
|
{"current_steps": 6305, "total_steps": 7577, "loss": 0.0423, "lr": 1.671093550339815e-07, "epoch": 0.8321235317407946, "percentage": 83.21, "elapsed_time": "0:20:28", "remaining_time": "0:04:07", "throughput": 2523.91, "total_tokens": 3099456}
|
|
{"current_steps": 6310, "total_steps": 7577, "loss": 0.0422, "lr": 1.6583669027145542e-07, "epoch": 0.832783423518543, "percentage": 83.28, "elapsed_time": "0:20:28", "remaining_time": "0:04:06", "throughput": 2525.46, "total_tokens": 3102208}
|
|
{"current_steps": 6315, "total_steps": 7577, "loss": 0.0013, "lr": 1.6456845189473767e-07, "epoch": 0.8334433152962915, "percentage": 83.34, "elapsed_time": "0:20:28", "remaining_time": "0:04:05", "throughput": 2526.97, "total_tokens": 3104896}
|
|
{"current_steps": 6320, "total_steps": 7577, "loss": 0.0676, "lr": 1.6330464663358123e-07, "epoch": 0.8341032070740398, "percentage": 83.41, "elapsed_time": "0:20:29", "remaining_time": "0:04:04", "throughput": 2528.42, "total_tokens": 3107520}
|
|
{"current_steps": 6325, "total_steps": 7577, "loss": 0.0017, "lr": 1.6204528119421346e-07, "epoch": 0.8347630988517883, "percentage": 83.48, "elapsed_time": "0:20:29", "remaining_time": "0:04:03", "throughput": 2529.88, "total_tokens": 3110144}
|
|
{"current_steps": 6330, "total_steps": 7577, "loss": 0.0483, "lr": 1.607903622593042e-07, "epoch": 0.8354229906295367, "percentage": 83.54, "elapsed_time": "0:20:29", "remaining_time": "0:04:02", "throughput": 2531.34, "total_tokens": 3112768}
|
|
{"current_steps": 6335, "total_steps": 7577, "loss": 0.0005, "lr": 1.5953989648792743e-07, "epoch": 0.8360828824072852, "percentage": 83.61, "elapsed_time": "0:20:30", "remaining_time": "0:04:01", "throughput": 2532.75, "total_tokens": 3115328}
|
|
{"current_steps": 6340, "total_steps": 7577, "loss": 0.0166, "lr": 1.5829389051552678e-07, "epoch": 0.8367427741850336, "percentage": 83.67, "elapsed_time": "0:20:30", "remaining_time": "0:04:00", "throughput": 2534.16, "total_tokens": 3117888}
|
|
{"current_steps": 6345, "total_steps": 7577, "loss": 0.0081, "lr": 1.5705235095388136e-07, "epoch": 0.8374026659627821, "percentage": 83.74, "elapsed_time": "0:20:30", "remaining_time": "0:03:58", "throughput": 2535.51, "total_tokens": 3120384}
|
|
{"current_steps": 6350, "total_steps": 7577, "loss": 0.0288, "lr": 1.5581528439106907e-07, "epoch": 0.8380625577405305, "percentage": 83.81, "elapsed_time": "0:20:31", "remaining_time": "0:03:57", "throughput": 2536.96, "total_tokens": 3123008}
|
|
{"current_steps": 6355, "total_steps": 7577, "loss": 0.0373, "lr": 1.5458269739143292e-07, "epoch": 0.838722449518279, "percentage": 83.87, "elapsed_time": "0:20:31", "remaining_time": "0:03:56", "throughput": 2538.32, "total_tokens": 3125504}
|
|
{"current_steps": 6360, "total_steps": 7577, "loss": 0.0028, "lr": 1.5335459649554538e-07, "epoch": 0.8393823412960274, "percentage": 83.94, "elapsed_time": "0:20:31", "remaining_time": "0:03:55", "throughput": 2539.47, "total_tokens": 3127744}
|
|
{"current_steps": 6365, "total_steps": 7577, "loss": 0.1164, "lr": 1.5213098822017357e-07, "epoch": 0.8400422330737759, "percentage": 84.0, "elapsed_time": "0:20:31", "remaining_time": "0:03:54", "throughput": 2540.68, "total_tokens": 3130048}
|
|
{"current_steps": 6370, "total_steps": 7577, "loss": 0.0148, "lr": 1.50911879058246e-07, "epoch": 0.8407021248515244, "percentage": 84.07, "elapsed_time": "0:20:32", "remaining_time": "0:03:53", "throughput": 2541.98, "total_tokens": 3132480}
|
|
{"current_steps": 6375, "total_steps": 7577, "loss": 0.0826, "lr": 1.4969727547881628e-07, "epoch": 0.8413620166292728, "percentage": 84.14, "elapsed_time": "0:20:32", "remaining_time": "0:03:52", "throughput": 2543.43, "total_tokens": 3135104}
|
|
{"current_steps": 6380, "total_steps": 7577, "loss": 0.1787, "lr": 1.4848718392703052e-07, "epoch": 0.8420219084070213, "percentage": 84.2, "elapsed_time": "0:20:32", "remaining_time": "0:03:51", "throughput": 2544.58, "total_tokens": 3137344}
|
|
{"current_steps": 6385, "total_steps": 7577, "loss": 0.1727, "lr": 1.472816108240915e-07, "epoch": 0.8426818001847697, "percentage": 84.27, "elapsed_time": "0:20:33", "remaining_time": "0:03:50", "throughput": 2546.13, "total_tokens": 3140096}
|
|
{"current_steps": 6390, "total_steps": 7577, "loss": 0.0868, "lr": 1.46080562567226e-07, "epoch": 0.8433416919625182, "percentage": 84.33, "elapsed_time": "0:20:33", "remaining_time": "0:03:49", "throughput": 2547.33, "total_tokens": 3142400}
|
|
{"current_steps": 6395, "total_steps": 7577, "loss": 0.0302, "lr": 1.4488404552964993e-07, "epoch": 0.8440015837402666, "percentage": 84.4, "elapsed_time": "0:20:33", "remaining_time": "0:03:48", "throughput": 2548.38, "total_tokens": 3144512}
|
|
{"current_steps": 6400, "total_steps": 7577, "loss": 0.0434, "lr": 1.4369206606053463e-07, "epoch": 0.8446614755180151, "percentage": 84.47, "elapsed_time": "0:20:34", "remaining_time": "0:03:46", "throughput": 2549.68, "total_tokens": 3146944}
|
|
{"current_steps": 6405, "total_steps": 7577, "loss": 0.0951, "lr": 1.425046304849742e-07, "epoch": 0.8453213672957635, "percentage": 84.53, "elapsed_time": "0:20:34", "remaining_time": "0:03:45", "throughput": 2550.98, "total_tokens": 3149376}
|
|
{"current_steps": 6410, "total_steps": 7577, "loss": 0.1082, "lr": 1.4132174510395024e-07, "epoch": 0.845981259073512, "percentage": 84.6, "elapsed_time": "0:20:34", "remaining_time": "0:03:44", "throughput": 2552.23, "total_tokens": 3151744}
|
|
{"current_steps": 6415, "total_steps": 7577, "loss": 0.0054, "lr": 1.4014341619430003e-07, "epoch": 0.8466411508512603, "percentage": 84.66, "elapsed_time": "0:20:35", "remaining_time": "0:03:43", "throughput": 2553.47, "total_tokens": 3154112}
|
|
{"current_steps": 6420, "total_steps": 7577, "loss": 0.0044, "lr": 1.3896965000868188e-07, "epoch": 0.8473010426290088, "percentage": 84.73, "elapsed_time": "0:20:35", "remaining_time": "0:03:42", "throughput": 2554.72, "total_tokens": 3156480}
|
|
{"current_steps": 6425, "total_steps": 7577, "loss": 0.1166, "lr": 1.3780045277554276e-07, "epoch": 0.8479609344067572, "percentage": 84.8, "elapsed_time": "0:20:35", "remaining_time": "0:03:41", "throughput": 2555.92, "total_tokens": 3158784}
|
|
{"current_steps": 6430, "total_steps": 7577, "loss": 0.1708, "lr": 1.3663583069908535e-07, "epoch": 0.8486208261845057, "percentage": 84.86, "elapsed_time": "0:20:36", "remaining_time": "0:03:40", "throughput": 2557.16, "total_tokens": 3161152}
|
|
{"current_steps": 6435, "total_steps": 7577, "loss": 0.0101, "lr": 1.3547578995923447e-07, "epoch": 0.8492807179622542, "percentage": 84.93, "elapsed_time": "0:20:36", "remaining_time": "0:03:39", "throughput": 2558.6, "total_tokens": 3163776}
|
|
{"current_steps": 6440, "total_steps": 7577, "loss": 0.171, "lr": 1.3432033671160458e-07, "epoch": 0.8499406097400026, "percentage": 84.99, "elapsed_time": "0:20:36", "remaining_time": "0:03:38", "throughput": 2559.95, "total_tokens": 3166272}
|
|
{"current_steps": 6443, "total_steps": 7577, "eval_loss": 0.10069070011377335, "epoch": 0.8503365448066517, "percentage": 85.03, "elapsed_time": "0:20:44", "remaining_time": "0:03:39", "throughput": 2544.33, "total_tokens": 3167488}
|
|
{"current_steps": 6445, "total_steps": 7577, "loss": 0.0712, "lr": 1.3316947708746762e-07, "epoch": 0.8506005015177511, "percentage": 85.06, "elapsed_time": "0:21:43", "remaining_time": "0:03:48", "throughput": 2430.55, "total_tokens": 3168640}
|
|
{"current_steps": 6450, "total_steps": 7577, "loss": 0.0922, "lr": 1.3202321719371967e-07, "epoch": 0.8512603932954995, "percentage": 85.13, "elapsed_time": "0:21:43", "remaining_time": "0:03:47", "throughput": 2431.76, "total_tokens": 3171008}
|
|
{"current_steps": 6455, "total_steps": 7577, "loss": 0.0878, "lr": 1.3088156311284893e-07, "epoch": 0.851920285073248, "percentage": 85.19, "elapsed_time": "0:21:44", "remaining_time": "0:03:46", "throughput": 2432.92, "total_tokens": 3173312}
|
|
{"current_steps": 6460, "total_steps": 7577, "loss": 0.2677, "lr": 1.2974452090290322e-07, "epoch": 0.8525801768509964, "percentage": 85.26, "elapsed_time": "0:21:44", "remaining_time": "0:03:45", "throughput": 2434.22, "total_tokens": 3175808}
|
|
{"current_steps": 6465, "total_steps": 7577, "loss": 0.0748, "lr": 1.2861209659745865e-07, "epoch": 0.8532400686287449, "percentage": 85.32, "elapsed_time": "0:21:44", "remaining_time": "0:03:44", "throughput": 2435.34, "total_tokens": 3178048}
|
|
{"current_steps": 6470, "total_steps": 7577, "loss": 0.1044, "lr": 1.2748429620558654e-07, "epoch": 0.8538999604064933, "percentage": 85.39, "elapsed_time": "0:21:45", "remaining_time": "0:03:43", "throughput": 2436.64, "total_tokens": 3180544}
|
|
{"current_steps": 6475, "total_steps": 7577, "loss": 0.1888, "lr": 1.2636112571182167e-07, "epoch": 0.8545598521842418, "percentage": 85.46, "elapsed_time": "0:21:45", "remaining_time": "0:03:42", "throughput": 2437.94, "total_tokens": 3183040}
|
|
{"current_steps": 6480, "total_steps": 7577, "loss": 0.1613, "lr": 1.2524259107613178e-07, "epoch": 0.8552197439619902, "percentage": 85.52, "elapsed_time": "0:21:45", "remaining_time": "0:03:41", "throughput": 2439.33, "total_tokens": 3185664}
|
|
{"current_steps": 6485, "total_steps": 7577, "loss": 0.1552, "lr": 1.2412869823388382e-07, "epoch": 0.8558796357397387, "percentage": 85.59, "elapsed_time": "0:21:46", "remaining_time": "0:03:39", "throughput": 2440.99, "total_tokens": 3188672}
|
|
{"current_steps": 6490, "total_steps": 7577, "loss": 0.0598, "lr": 1.2301945309581486e-07, "epoch": 0.8565395275174872, "percentage": 85.65, "elapsed_time": "0:21:46", "remaining_time": "0:03:38", "throughput": 2442.29, "total_tokens": 3191168}
|
|
{"current_steps": 6495, "total_steps": 7577, "loss": 0.116, "lr": 1.2191486154799846e-07, "epoch": 0.8571994192952356, "percentage": 85.72, "elapsed_time": "0:21:46", "remaining_time": "0:03:37", "throughput": 2443.58, "total_tokens": 3193664}
|
|
{"current_steps": 6500, "total_steps": 7577, "loss": 0.001, "lr": 1.208149294518147e-07, "epoch": 0.8578593110729841, "percentage": 85.79, "elapsed_time": "0:21:47", "remaining_time": "0:03:36", "throughput": 2444.92, "total_tokens": 3196224}
|
|
{"current_steps": 6505, "total_steps": 7577, "loss": 0.1906, "lr": 1.1971966264391954e-07, "epoch": 0.8585192028507325, "percentage": 85.85, "elapsed_time": "0:21:47", "remaining_time": "0:03:35", "throughput": 2446.26, "total_tokens": 3198784}
|
|
{"current_steps": 6510, "total_steps": 7577, "loss": 0.1289, "lr": 1.1862906693621233e-07, "epoch": 0.859179094628481, "percentage": 85.92, "elapsed_time": "0:21:47", "remaining_time": "0:03:34", "throughput": 2447.69, "total_tokens": 3201472}
|
|
{"current_steps": 6515, "total_steps": 7577, "loss": 0.1718, "lr": 1.1754314811580623e-07, "epoch": 0.8598389864062294, "percentage": 85.98, "elapsed_time": "0:21:48", "remaining_time": "0:03:33", "throughput": 2448.7, "total_tokens": 3203584}
|
|
{"current_steps": 6520, "total_steps": 7577, "loss": 0.0728, "lr": 1.1646191194499655e-07, "epoch": 0.8604988781839779, "percentage": 86.05, "elapsed_time": "0:21:48", "remaining_time": "0:03:32", "throughput": 2449.85, "total_tokens": 3205888}
|
|
{"current_steps": 6525, "total_steps": 7577, "loss": 0.1805, "lr": 1.1538536416123168e-07, "epoch": 0.8611587699617262, "percentage": 86.12, "elapsed_time": "0:21:48", "remaining_time": "0:03:31", "throughput": 2450.86, "total_tokens": 3208000}
|
|
{"current_steps": 6530, "total_steps": 7577, "loss": 0.0119, "lr": 1.1431351047708072e-07, "epoch": 0.8618186617394747, "percentage": 86.18, "elapsed_time": "0:21:49", "remaining_time": "0:03:29", "throughput": 2451.96, "total_tokens": 3210240}
|
|
{"current_steps": 6535, "total_steps": 7577, "loss": 0.1218, "lr": 1.1324635658020432e-07, "epoch": 0.8624785535172231, "percentage": 86.25, "elapsed_time": "0:21:49", "remaining_time": "0:03:28", "throughput": 2453.2, "total_tokens": 3212672}
|
|
{"current_steps": 6540, "total_steps": 7577, "loss": 0.1695, "lr": 1.1218390813332479e-07, "epoch": 0.8631384452949716, "percentage": 86.31, "elapsed_time": "0:21:49", "remaining_time": "0:03:27", "throughput": 2454.62, "total_tokens": 3215360}
|
|
{"current_steps": 6545, "total_steps": 7577, "loss": 0.1279, "lr": 1.1112617077419472e-07, "epoch": 0.86379833707272, "percentage": 86.38, "elapsed_time": "0:21:50", "remaining_time": "0:03:26", "throughput": 2456.09, "total_tokens": 3218112}
|
|
{"current_steps": 6550, "total_steps": 7577, "loss": 0.0386, "lr": 1.1007315011556884e-07, "epoch": 0.8644582288504685, "percentage": 86.45, "elapsed_time": "0:21:50", "remaining_time": "0:03:25", "throughput": 2457.14, "total_tokens": 3220288}
|
|
{"current_steps": 6555, "total_steps": 7577, "loss": 0.0013, "lr": 1.0902485174517251e-07, "epoch": 0.865118120628217, "percentage": 86.51, "elapsed_time": "0:21:50", "remaining_time": "0:03:24", "throughput": 2458.56, "total_tokens": 3222976}
|
|
{"current_steps": 6560, "total_steps": 7577, "loss": 0.0532, "lr": 1.0798128122567285e-07, "epoch": 0.8657780124059654, "percentage": 86.58, "elapsed_time": "0:21:51", "remaining_time": "0:03:23", "throughput": 2459.84, "total_tokens": 3225472}
|
|
{"current_steps": 6565, "total_steps": 7577, "loss": 0.1791, "lr": 1.0694244409464992e-07, "epoch": 0.8664379041837139, "percentage": 86.64, "elapsed_time": "0:21:51", "remaining_time": "0:03:22", "throughput": 2461.23, "total_tokens": 3228096}
|
|
{"current_steps": 6570, "total_steps": 7577, "loss": 0.1216, "lr": 1.0590834586456577e-07, "epoch": 0.8670977959614623, "percentage": 86.71, "elapsed_time": "0:21:51", "remaining_time": "0:03:21", "throughput": 2462.61, "total_tokens": 3230720}
|
|
{"current_steps": 6575, "total_steps": 7577, "loss": 0.1303, "lr": 1.0487899202273708e-07, "epoch": 0.8677576877392108, "percentage": 86.78, "elapsed_time": "0:21:52", "remaining_time": "0:03:19", "throughput": 2463.8, "total_tokens": 3233088}
|
|
{"current_steps": 6580, "total_steps": 7577, "loss": 0.1208, "lr": 1.0385438803130364e-07, "epoch": 0.8684175795169592, "percentage": 86.84, "elapsed_time": "0:21:52", "remaining_time": "0:03:18", "throughput": 2465.18, "total_tokens": 3235712}
|
|
{"current_steps": 6585, "total_steps": 7577, "loss": 0.1107, "lr": 1.0283453932720199e-07, "epoch": 0.8690774712947077, "percentage": 86.91, "elapsed_time": "0:21:52", "remaining_time": "0:03:17", "throughput": 2466.7, "total_tokens": 3238528}
|
|
{"current_steps": 6590, "total_steps": 7577, "loss": 0.0808, "lr": 1.0181945132213476e-07, "epoch": 0.8697373630724561, "percentage": 86.97, "elapsed_time": "0:21:53", "remaining_time": "0:03:16", "throughput": 2467.89, "total_tokens": 3240896}
|
|
{"current_steps": 6595, "total_steps": 7577, "loss": 0.0017, "lr": 1.0080912940254227e-07, "epoch": 0.8703972548502046, "percentage": 87.04, "elapsed_time": "0:21:53", "remaining_time": "0:03:15", "throughput": 2469.15, "total_tokens": 3243392}
|
|
{"current_steps": 6600, "total_steps": 7577, "loss": 0.0143, "lr": 9.980357892957492e-08, "epoch": 0.871057146627953, "percentage": 87.11, "elapsed_time": "0:21:53", "remaining_time": "0:03:14", "throughput": 2470.38, "total_tokens": 3245824}
|
|
{"current_steps": 6605, "total_steps": 7577, "loss": 0.0015, "lr": 9.880280523906337e-08, "epoch": 0.8717170384057015, "percentage": 87.17, "elapsed_time": "0:21:54", "remaining_time": "0:03:13", "throughput": 2471.51, "total_tokens": 3248128}
|
|
{"current_steps": 6610, "total_steps": 7577, "loss": 0.1882, "lr": 9.780681364149091e-08, "epoch": 0.8723769301834499, "percentage": 87.24, "elapsed_time": "0:21:54", "remaining_time": "0:03:12", "throughput": 2472.78, "total_tokens": 3250624}
|
|
{"current_steps": 6615, "total_steps": 7577, "loss": 0.108, "lr": 9.681560942196587e-08, "epoch": 0.8730368219611984, "percentage": 87.3, "elapsed_time": "0:21:54", "remaining_time": "0:03:11", "throughput": 2474.2, "total_tokens": 3253312}
|
|
{"current_steps": 6620, "total_steps": 7577, "loss": 0.1163, "lr": 9.582919784019194e-08, "epoch": 0.8736967137389469, "percentage": 87.37, "elapsed_time": "0:21:55", "remaining_time": "0:03:10", "throughput": 2475.25, "total_tokens": 3255488}
|
|
{"current_steps": 6625, "total_steps": 7577, "loss": 0.0734, "lr": 9.484758413044236e-08, "epoch": 0.8743566055166953, "percentage": 87.44, "elapsed_time": "0:21:55", "remaining_time": "0:03:09", "throughput": 2476.29, "total_tokens": 3257664}
|
|
{"current_steps": 6630, "total_steps": 7577, "loss": 0.099, "lr": 9.387077350153017e-08, "epoch": 0.8750164972944438, "percentage": 87.5, "elapsed_time": "0:21:55", "remaining_time": "0:03:07", "throughput": 2477.56, "total_tokens": 3260160}
|
|
{"current_steps": 6635, "total_steps": 7577, "loss": 0.0644, "lr": 9.289877113678168e-08, "epoch": 0.8756763890721921, "percentage": 87.57, "elapsed_time": "0:21:56", "remaining_time": "0:03:06", "throughput": 2478.74, "total_tokens": 3262528}
|
|
{"current_steps": 6640, "total_steps": 7577, "loss": 0.0668, "lr": 9.19315821940092e-08, "epoch": 0.8763362808499406, "percentage": 87.63, "elapsed_time": "0:21:56", "remaining_time": "0:03:05", "throughput": 2480.02, "total_tokens": 3265024}
|
|
{"current_steps": 6645, "total_steps": 7577, "loss": 0.157, "lr": 9.096921180548234e-08, "epoch": 0.876996172627689, "percentage": 87.7, "elapsed_time": "0:21:56", "remaining_time": "0:03:04", "throughput": 2481.24, "total_tokens": 3267456}
|
|
{"current_steps": 6650, "total_steps": 7577, "loss": 0.1047, "lr": 9.001166507790259e-08, "epoch": 0.8776560644054375, "percentage": 87.77, "elapsed_time": "0:21:57", "remaining_time": "0:03:03", "throughput": 2482.7, "total_tokens": 3270208}
|
|
{"current_steps": 6655, "total_steps": 7577, "loss": 0.1139, "lr": 8.905894709237427e-08, "epoch": 0.8783159561831859, "percentage": 87.83, "elapsed_time": "0:21:57", "remaining_time": "0:03:02", "throughput": 2484.16, "total_tokens": 3272960}
|
|
{"current_steps": 6660, "total_steps": 7577, "loss": 0.083, "lr": 8.811106290437975e-08, "epoch": 0.8789758479609344, "percentage": 87.9, "elapsed_time": "0:21:57", "remaining_time": "0:03:01", "throughput": 2485.2, "total_tokens": 3275136}
|
|
{"current_steps": 6665, "total_steps": 7577, "loss": 0.0826, "lr": 8.716801754375036e-08, "epoch": 0.8796357397386828, "percentage": 87.96, "elapsed_time": "0:21:58", "remaining_time": "0:03:00", "throughput": 2486.52, "total_tokens": 3277696}
|
|
{"current_steps": 6670, "total_steps": 7577, "loss": 0.1297, "lr": 8.62298160146413e-08, "epoch": 0.8802956315164313, "percentage": 88.03, "elapsed_time": "0:21:58", "remaining_time": "0:02:59", "throughput": 2487.7, "total_tokens": 3280064}
|
|
{"current_steps": 6675, "total_steps": 7577, "loss": 0.0166, "lr": 8.529646329550466e-08, "epoch": 0.8809555232941798, "percentage": 88.1, "elapsed_time": "0:21:58", "remaining_time": "0:02:58", "throughput": 2488.78, "total_tokens": 3282304}
|
|
{"current_steps": 6680, "total_steps": 7577, "loss": 0.03, "lr": 8.436796433906235e-08, "epoch": 0.8816154150719282, "percentage": 88.16, "elapsed_time": "0:21:59", "remaining_time": "0:02:57", "throughput": 2490.0, "total_tokens": 3284736}
|
|
{"current_steps": 6685, "total_steps": 7577, "loss": 0.0714, "lr": 8.344432407228141e-08, "epoch": 0.8822753068496767, "percentage": 88.23, "elapsed_time": "0:21:59", "remaining_time": "0:02:56", "throughput": 2491.23, "total_tokens": 3287168}
|
|
{"current_steps": 6690, "total_steps": 7577, "loss": 0.0623, "lr": 8.252554739634577e-08, "epoch": 0.8829351986274251, "percentage": 88.29, "elapsed_time": "0:21:59", "remaining_time": "0:02:54", "throughput": 2492.45, "total_tokens": 3289600}
|
|
{"current_steps": 6695, "total_steps": 7577, "loss": 0.0589, "lr": 8.16116391866316e-08, "epoch": 0.8835950904051736, "percentage": 88.36, "elapsed_time": "0:22:00", "remaining_time": "0:02:53", "throughput": 2493.76, "total_tokens": 3292160}
|
|
{"current_steps": 6700, "total_steps": 7577, "loss": 0.1393, "lr": 8.070260429268172e-08, "epoch": 0.884254982182922, "percentage": 88.43, "elapsed_time": "0:22:00", "remaining_time": "0:02:52", "throughput": 2494.97, "total_tokens": 3294592}
|
|
{"current_steps": 6705, "total_steps": 7577, "loss": 0.0539, "lr": 7.979844753817855e-08, "epoch": 0.8849148739606705, "percentage": 88.49, "elapsed_time": "0:22:00", "remaining_time": "0:02:51", "throughput": 2496.14, "total_tokens": 3296960}
|
|
{"current_steps": 6710, "total_steps": 7577, "loss": 0.0709, "lr": 7.889917372091982e-08, "epoch": 0.8855747657384189, "percentage": 88.56, "elapsed_time": "0:22:01", "remaining_time": "0:02:50", "throughput": 2497.23, "total_tokens": 3299200}
|
|
{"current_steps": 6715, "total_steps": 7577, "loss": 0.2047, "lr": 7.800478761279183e-08, "epoch": 0.8862346575161674, "percentage": 88.62, "elapsed_time": "0:22:01", "remaining_time": "0:02:49", "throughput": 2498.4, "total_tokens": 3301568}
|
|
{"current_steps": 6720, "total_steps": 7577, "loss": 0.1478, "lr": 7.711529395974592e-08, "epoch": 0.8868945492939158, "percentage": 88.69, "elapsed_time": "0:22:01", "remaining_time": "0:02:48", "throughput": 2499.66, "total_tokens": 3304064}
|
|
{"current_steps": 6725, "total_steps": 7577, "loss": 0.1855, "lr": 7.623069748177135e-08, "epoch": 0.8875544410716643, "percentage": 88.76, "elapsed_time": "0:22:02", "remaining_time": "0:02:47", "throughput": 2500.83, "total_tokens": 3306432}
|
|
{"current_steps": 6730, "total_steps": 7577, "loss": 0.0866, "lr": 7.535100287287111e-08, "epoch": 0.8882143328494126, "percentage": 88.82, "elapsed_time": "0:22:02", "remaining_time": "0:02:46", "throughput": 2501.95, "total_tokens": 3308736}
|
|
{"current_steps": 6735, "total_steps": 7577, "loss": 0.0024, "lr": 7.447621480103783e-08, "epoch": 0.8888742246271611, "percentage": 88.89, "elapsed_time": "0:22:02", "remaining_time": "0:02:45", "throughput": 2503.17, "total_tokens": 3311168}
|
|
{"current_steps": 6740, "total_steps": 7577, "loss": 0.2758, "lr": 7.360633790822713e-08, "epoch": 0.8895341164049096, "percentage": 88.95, "elapsed_time": "0:22:03", "remaining_time": "0:02:44", "throughput": 2504.43, "total_tokens": 3313664}
|
|
{"current_steps": 6745, "total_steps": 7577, "loss": 0.044, "lr": 7.274137681033498e-08, "epoch": 0.890194008182658, "percentage": 89.02, "elapsed_time": "0:22:03", "remaining_time": "0:02:43", "throughput": 2505.75, "total_tokens": 3316224}
|
|
{"current_steps": 6750, "total_steps": 7577, "loss": 0.0791, "lr": 7.188133609717184e-08, "epoch": 0.8908538999604065, "percentage": 89.09, "elapsed_time": "0:22:03", "remaining_time": "0:02:42", "throughput": 2506.81, "total_tokens": 3318464}
|
|
{"current_steps": 6755, "total_steps": 7577, "loss": 0.0008, "lr": 7.102622033243843e-08, "epoch": 0.8915137917381549, "percentage": 89.15, "elapsed_time": "0:22:04", "remaining_time": "0:02:41", "throughput": 2508.02, "total_tokens": 3320896}
|
|
{"current_steps": 6760, "total_steps": 7577, "loss": 0.1241, "lr": 7.017603405370276e-08, "epoch": 0.8921736835159034, "percentage": 89.22, "elapsed_time": "0:22:04", "remaining_time": "0:02:40", "throughput": 2509.46, "total_tokens": 3323648}
|
|
{"current_steps": 6765, "total_steps": 7577, "loss": 0.1568, "lr": 6.933078177237429e-08, "epoch": 0.8928335752936518, "percentage": 89.28, "elapsed_time": "0:22:04", "remaining_time": "0:02:39", "throughput": 2510.77, "total_tokens": 3326208}
|
|
{"current_steps": 6770, "total_steps": 7577, "loss": 0.0287, "lr": 6.849046797368108e-08, "epoch": 0.8934934670714003, "percentage": 89.35, "elapsed_time": "0:22:05", "remaining_time": "0:02:37", "throughput": 2511.94, "total_tokens": 3328576}
|
|
{"current_steps": 6775, "total_steps": 7577, "loss": 0.0018, "lr": 6.765509711664574e-08, "epoch": 0.8941533588491487, "percentage": 89.42, "elapsed_time": "0:22:05", "remaining_time": "0:02:36", "throughput": 2513.52, "total_tokens": 3331520}
|
|
{"current_steps": 6780, "total_steps": 7577, "loss": 0.0488, "lr": 6.682467363406174e-08, "epoch": 0.8948132506268972, "percentage": 89.48, "elapsed_time": "0:22:05", "remaining_time": "0:02:35", "throughput": 2515.0, "total_tokens": 3334336}
|
|
{"current_steps": 6785, "total_steps": 7577, "loss": 0.0703, "lr": 6.59992019324701e-08, "epoch": 0.8954731424046456, "percentage": 89.55, "elapsed_time": "0:22:06", "remaining_time": "0:02:34", "throughput": 2516.3, "total_tokens": 3336896}
|
|
{"current_steps": 6790, "total_steps": 7577, "loss": 0.0708, "lr": 6.517868639213553e-08, "epoch": 0.8961330341823941, "percentage": 89.61, "elapsed_time": "0:22:06", "remaining_time": "0:02:33", "throughput": 2517.5, "total_tokens": 3339328}
|
|
{"current_steps": 6795, "total_steps": 7577, "loss": 0.0182, "lr": 6.436313136702387e-08, "epoch": 0.8967929259601425, "percentage": 89.68, "elapsed_time": "0:22:06", "remaining_time": "0:02:32", "throughput": 2518.69, "total_tokens": 3341760}
|
|
{"current_steps": 6800, "total_steps": 7577, "loss": 0.0659, "lr": 6.355254118477815e-08, "epoch": 0.897452817737891, "percentage": 89.75, "elapsed_time": "0:22:07", "remaining_time": "0:02:31", "throughput": 2520.06, "total_tokens": 3344448}
|
|
{"current_steps": 6805, "total_steps": 7577, "loss": 0.001, "lr": 6.274692014669602e-08, "epoch": 0.8981127095156395, "percentage": 89.81, "elapsed_time": "0:22:07", "remaining_time": "0:02:30", "throughput": 2521.35, "total_tokens": 3347008}
|
|
{"current_steps": 6810, "total_steps": 7577, "loss": 0.0006, "lr": 6.194627252770768e-08, "epoch": 0.8987726012933879, "percentage": 89.88, "elapsed_time": "0:22:07", "remaining_time": "0:02:29", "throughput": 2522.83, "total_tokens": 3349824}
|
|
{"current_steps": 6815, "total_steps": 7577, "loss": 0.0903, "lr": 6.115060257635174e-08, "epoch": 0.8994324930711364, "percentage": 89.94, "elapsed_time": "0:22:08", "remaining_time": "0:02:28", "throughput": 2524.07, "total_tokens": 3352320}
|
|
{"current_steps": 6820, "total_steps": 7577, "loss": 0.0013, "lr": 6.035991451475375e-08, "epoch": 0.9000923848488848, "percentage": 90.01, "elapsed_time": "0:22:08", "remaining_time": "0:02:27", "throughput": 2525.23, "total_tokens": 3354688}
|
|
{"current_steps": 6822, "total_steps": 7577, "eval_loss": 0.09756959229707718, "epoch": 0.9003563415599841, "percentage": 90.04, "elapsed_time": "0:22:16", "remaining_time": "0:02:27", "throughput": 2510.45, "total_tokens": 3355520}
|
|
{"current_steps": 6825, "total_steps": 7577, "loss": 0.0663, "lr": 5.9574212538603505e-08, "epoch": 0.9007522766266333, "percentage": 90.08, "elapsed_time": "0:22:41", "remaining_time": "0:02:29", "throughput": 2466.49, "total_tokens": 3357056}
|
|
{"current_steps": 6830, "total_steps": 7577, "loss": 0.066, "lr": 5.879350081713252e-08, "epoch": 0.9014121684043817, "percentage": 90.14, "elapsed_time": "0:22:41", "remaining_time": "0:02:28", "throughput": 2467.67, "total_tokens": 3359488}
|
|
{"current_steps": 6835, "total_steps": 7577, "loss": 0.2437, "lr": 5.8017783493092386e-08, "epoch": 0.9020720601821302, "percentage": 90.21, "elapsed_time": "0:22:41", "remaining_time": "0:02:27", "throughput": 2468.86, "total_tokens": 3361920}
|
|
{"current_steps": 6840, "total_steps": 7577, "loss": 0.0012, "lr": 5.7247064682732104e-08, "epoch": 0.9027319519598785, "percentage": 90.27, "elapsed_time": "0:22:42", "remaining_time": "0:02:26", "throughput": 2470.1, "total_tokens": 3364416}
|
|
{"current_steps": 6845, "total_steps": 7577, "loss": 0.0617, "lr": 5.6481348475777566e-08, "epoch": 0.903391843737627, "percentage": 90.34, "elapsed_time": "0:22:42", "remaining_time": "0:02:25", "throughput": 2471.35, "total_tokens": 3366912}
|
|
{"current_steps": 6850, "total_steps": 7577, "loss": 0.0012, "lr": 5.5720638935407796e-08, "epoch": 0.9040517355153754, "percentage": 90.41, "elapsed_time": "0:22:42", "remaining_time": "0:02:24", "throughput": 2472.36, "total_tokens": 3369088}
|
|
{"current_steps": 6855, "total_steps": 7577, "loss": 0.1105, "lr": 5.49649400982356e-08, "epoch": 0.9047116272931239, "percentage": 90.47, "elapsed_time": "0:22:43", "remaining_time": "0:02:23", "throughput": 2473.55, "total_tokens": 3371520}
|
|
{"current_steps": 6860, "total_steps": 7577, "loss": 0.0005, "lr": 5.421425597428442e-08, "epoch": 0.9053715190708723, "percentage": 90.54, "elapsed_time": "0:22:43", "remaining_time": "0:02:22", "throughput": 2474.83, "total_tokens": 3374080}
|
|
{"current_steps": 6865, "total_steps": 7577, "loss": 0.1294, "lr": 5.346859054696784e-08, "epoch": 0.9060314108486208, "percentage": 90.6, "elapsed_time": "0:22:43", "remaining_time": "0:02:21", "throughput": 2476.1, "total_tokens": 3376640}
|
|
{"current_steps": 6870, "total_steps": 7577, "loss": 0.0709, "lr": 5.2727947773068773e-08, "epoch": 0.9066913026263693, "percentage": 90.67, "elapsed_time": "0:22:44", "remaining_time": "0:02:20", "throughput": 2477.29, "total_tokens": 3379072}
|
|
{"current_steps": 6875, "total_steps": 7577, "loss": 0.069, "lr": 5.199233158271732e-08, "epoch": 0.9073511944041177, "percentage": 90.74, "elapsed_time": "0:22:44", "remaining_time": "0:02:19", "throughput": 2478.62, "total_tokens": 3381696}
|
|
{"current_steps": 6880, "total_steps": 7577, "loss": 0.2041, "lr": 5.126174587937149e-08, "epoch": 0.9080110861818662, "percentage": 90.8, "elapsed_time": "0:22:44", "remaining_time": "0:02:18", "throughput": 2479.76, "total_tokens": 3384064}
|
|
{"current_steps": 6885, "total_steps": 7577, "loss": 0.0024, "lr": 5.053619453979485e-08, "epoch": 0.9086709779596146, "percentage": 90.87, "elapsed_time": "0:22:44", "remaining_time": "0:02:17", "throughput": 2480.81, "total_tokens": 3386304}
|
|
{"current_steps": 6890, "total_steps": 7577, "loss": 0.1257, "lr": 4.9815681414037025e-08, "epoch": 0.9093308697373631, "percentage": 90.93, "elapsed_time": "0:22:45", "remaining_time": "0:02:16", "throughput": 2482.04, "total_tokens": 3388800}
|
|
{"current_steps": 6895, "total_steps": 7577, "loss": 0.097, "lr": 4.910021032541334e-08, "epoch": 0.9099907615151115, "percentage": 91.0, "elapsed_time": "0:22:45", "remaining_time": "0:02:15", "throughput": 2483.22, "total_tokens": 3391232}
|
|
{"current_steps": 6900, "total_steps": 7577, "loss": 0.1039, "lr": 4.838978507048319e-08, "epoch": 0.91065065329286, "percentage": 91.07, "elapsed_time": "0:22:45", "remaining_time": "0:02:14", "throughput": 2484.39, "total_tokens": 3393664}
|
|
{"current_steps": 6905, "total_steps": 7577, "loss": 0.0018, "lr": 4.768440941903207e-08, "epoch": 0.9113105450706084, "percentage": 91.13, "elapsed_time": "0:22:46", "remaining_time": "0:02:12", "throughput": 2485.48, "total_tokens": 3395968}
|
|
{"current_steps": 6910, "total_steps": 7577, "loss": 0.0407, "lr": 4.698408711404944e-08, "epoch": 0.9119704368483569, "percentage": 91.2, "elapsed_time": "0:22:46", "remaining_time": "0:02:11", "throughput": 2486.57, "total_tokens": 3398272}
|
|
{"current_steps": 6915, "total_steps": 7577, "loss": 0.0894, "lr": 4.628882187171046e-08, "epoch": 0.9126303286261053, "percentage": 91.26, "elapsed_time": "0:22:46", "remaining_time": "0:02:10", "throughput": 2487.94, "total_tokens": 3400960}
|
|
{"current_steps": 6920, "total_steps": 7577, "loss": 0.1429, "lr": 4.559861738135506e-08, "epoch": 0.9132902204038538, "percentage": 91.33, "elapsed_time": "0:22:47", "remaining_time": "0:02:09", "throughput": 2489.21, "total_tokens": 3403520}
|
|
{"current_steps": 6925, "total_steps": 7577, "loss": 0.2526, "lr": 4.491347730546913e-08, "epoch": 0.9139501121816023, "percentage": 91.4, "elapsed_time": "0:22:47", "remaining_time": "0:02:08", "throughput": 2490.39, "total_tokens": 3405952}
|
|
{"current_steps": 6930, "total_steps": 7577, "loss": 0.1429, "lr": 4.423340527966512e-08, "epoch": 0.9146100039593507, "percentage": 91.46, "elapsed_time": "0:22:47", "remaining_time": "0:02:07", "throughput": 2491.53, "total_tokens": 3408320}
|
|
{"current_steps": 6935, "total_steps": 7577, "loss": 0.0675, "lr": 4.355840491266205e-08, "epoch": 0.9152698957370992, "percentage": 91.53, "elapsed_time": "0:22:48", "remaining_time": "0:02:06", "throughput": 2492.79, "total_tokens": 3410880}
|
|
{"current_steps": 6940, "total_steps": 7577, "loss": 0.0589, "lr": 4.288847978626686e-08, "epoch": 0.9159297875148475, "percentage": 91.59, "elapsed_time": "0:22:48", "remaining_time": "0:02:05", "throughput": 2494.06, "total_tokens": 3413440}
|
|
{"current_steps": 6945, "total_steps": 7577, "loss": 0.0656, "lr": 4.222363345535585e-08, "epoch": 0.916589679292596, "percentage": 91.66, "elapsed_time": "0:22:48", "remaining_time": "0:02:04", "throughput": 2495.33, "total_tokens": 3416000}
|
|
{"current_steps": 6950, "total_steps": 7577, "loss": 0.1151, "lr": 4.1563869447854505e-08, "epoch": 0.9172495710703444, "percentage": 91.72, "elapsed_time": "0:22:49", "remaining_time": "0:02:03", "throughput": 2496.37, "total_tokens": 3418240}
|
|
{"current_steps": 6955, "total_steps": 7577, "loss": 0.1535, "lr": 4.090919126472048e-08, "epoch": 0.9179094628480929, "percentage": 91.79, "elapsed_time": "0:22:49", "remaining_time": "0:02:02", "throughput": 2497.56, "total_tokens": 3420672}
|
|
{"current_steps": 6960, "total_steps": 7577, "loss": 0.0099, "lr": 4.025960237992332e-08, "epoch": 0.9185693546258413, "percentage": 91.86, "elapsed_time": "0:22:49", "remaining_time": "0:02:01", "throughput": 2498.6, "total_tokens": 3422912}
|
|
{"current_steps": 6965, "total_steps": 7577, "loss": 0.0129, "lr": 3.961510624042741e-08, "epoch": 0.9192292464035898, "percentage": 91.92, "elapsed_time": "0:22:50", "remaining_time": "0:02:00", "throughput": 2499.82, "total_tokens": 3425408}
|
|
{"current_steps": 6970, "total_steps": 7577, "loss": 0.1038, "lr": 3.8975706266172636e-08, "epoch": 0.9198891381813382, "percentage": 91.99, "elapsed_time": "0:22:50", "remaining_time": "0:01:59", "throughput": 2500.95, "total_tokens": 3427776}
|
|
{"current_steps": 6975, "total_steps": 7577, "loss": 0.1456, "lr": 3.834140585005696e-08, "epoch": 0.9205490299590867, "percentage": 92.05, "elapsed_time": "0:22:50", "remaining_time": "0:01:58", "throughput": 2502.21, "total_tokens": 3430336}
|
|
{"current_steps": 6980, "total_steps": 7577, "loss": 0.192, "lr": 3.771220835791844e-08, "epoch": 0.9212089217368351, "percentage": 92.12, "elapsed_time": "0:22:51", "remaining_time": "0:01:57", "throughput": 2503.47, "total_tokens": 3432896}
|
|
{"current_steps": 6985, "total_steps": 7577, "loss": 0.023, "lr": 3.708811712851634e-08, "epoch": 0.9218688135145836, "percentage": 92.19, "elapsed_time": "0:22:51", "remaining_time": "0:01:56", "throughput": 2504.51, "total_tokens": 3435136}
|
|
{"current_steps": 6990, "total_steps": 7577, "loss": 0.0816, "lr": 3.6469135473514936e-08, "epoch": 0.9225287052923321, "percentage": 92.25, "elapsed_time": "0:22:51", "remaining_time": "0:01:55", "throughput": 2505.87, "total_tokens": 3437824}
|
|
{"current_steps": 6995, "total_steps": 7577, "loss": 0.0381, "lr": 3.5855266677464744e-08, "epoch": 0.9231885970700805, "percentage": 92.32, "elapsed_time": "0:22:52", "remaining_time": "0:01:54", "throughput": 2507.08, "total_tokens": 3440320}
|
|
{"current_steps": 7000, "total_steps": 7577, "loss": 0.0565, "lr": 3.524651399778555e-08, "epoch": 0.923848488847829, "percentage": 92.38, "elapsed_time": "0:22:52", "remaining_time": "0:01:53", "throughput": 2508.34, "total_tokens": 3442880}
|
|
{"current_steps": 7005, "total_steps": 7577, "loss": 0.0016, "lr": 3.4642880664749296e-08, "epoch": 0.9245083806255774, "percentage": 92.45, "elapsed_time": "0:22:52", "remaining_time": "0:01:52", "throughput": 2509.38, "total_tokens": 3445120}
|
|
{"current_steps": 7010, "total_steps": 7577, "loss": 0.1966, "lr": 3.404436988146242e-08, "epoch": 0.9251682724033259, "percentage": 92.52, "elapsed_time": "0:22:53", "remaining_time": "0:01:51", "throughput": 2510.46, "total_tokens": 3447424}
|
|
{"current_steps": 7015, "total_steps": 7577, "loss": 0.0537, "lr": 3.345098482384956e-08, "epoch": 0.9258281641810743, "percentage": 92.58, "elapsed_time": "0:22:53", "remaining_time": "0:01:50", "throughput": 2511.68, "total_tokens": 3449920}
|
|
{"current_steps": 7020, "total_steps": 7577, "loss": 0.0494, "lr": 3.2862728640636105e-08, "epoch": 0.9264880559588228, "percentage": 92.65, "elapsed_time": "0:22:53", "remaining_time": "0:01:49", "throughput": 2512.89, "total_tokens": 3452416}
|
|
{"current_steps": 7025, "total_steps": 7577, "loss": 0.1016, "lr": 3.227960445333155e-08, "epoch": 0.9271479477365712, "percentage": 92.71, "elapsed_time": "0:22:54", "remaining_time": "0:01:47", "throughput": 2514.11, "total_tokens": 3454912}
|
|
{"current_steps": 7030, "total_steps": 7577, "loss": 0.0633, "lr": 3.1701615356213295e-08, "epoch": 0.9278078395143197, "percentage": 92.78, "elapsed_time": "0:22:54", "remaining_time": "0:01:46", "throughput": 2515.38, "total_tokens": 3457472}
|
|
{"current_steps": 7035, "total_steps": 7577, "loss": 0.0683, "lr": 3.112876441630985e-08, "epoch": 0.928467731292068, "percentage": 92.85, "elapsed_time": "0:22:54", "remaining_time": "0:01:45", "throughput": 2516.41, "total_tokens": 3459712}
|
|
{"current_steps": 7040, "total_steps": 7577, "loss": 0.0577, "lr": 3.05610546733851e-08, "epoch": 0.9291276230698166, "percentage": 92.91, "elapsed_time": "0:22:55", "remaining_time": "0:01:44", "throughput": 2517.58, "total_tokens": 3462144}
|
|
{"current_steps": 7045, "total_steps": 7577, "loss": 0.2195, "lr": 2.99984891399212e-08, "epoch": 0.9297875148475649, "percentage": 92.98, "elapsed_time": "0:22:55", "remaining_time": "0:01:43", "throughput": 2518.71, "total_tokens": 3464512}
|
|
{"current_steps": 7050, "total_steps": 7577, "loss": 0.0013, "lr": 2.9441070801103808e-08, "epoch": 0.9304474066253134, "percentage": 93.04, "elapsed_time": "0:22:55", "remaining_time": "0:01:42", "throughput": 2519.84, "total_tokens": 3466880}
|
|
{"current_steps": 7055, "total_steps": 7577, "loss": 0.1014, "lr": 2.8888802614805085e-08, "epoch": 0.931107298403062, "percentage": 93.11, "elapsed_time": "0:22:56", "remaining_time": "0:01:41", "throughput": 2520.95, "total_tokens": 3469248}
|
|
{"current_steps": 7060, "total_steps": 7577, "loss": 0.2786, "lr": 2.8341687511568734e-08, "epoch": 0.9317671901808103, "percentage": 93.18, "elapsed_time": "0:22:56", "remaining_time": "0:01:40", "throughput": 2522.07, "total_tokens": 3471616}
|
|
{"current_steps": 7065, "total_steps": 7577, "loss": 0.082, "lr": 2.7799728394594547e-08, "epoch": 0.9324270819585588, "percentage": 93.24, "elapsed_time": "0:22:56", "remaining_time": "0:01:39", "throughput": 2523.37, "total_tokens": 3474240}
|
|
{"current_steps": 7070, "total_steps": 7577, "loss": 0.0789, "lr": 2.7262928139722198e-08, "epoch": 0.9330869737363072, "percentage": 93.31, "elapsed_time": "0:22:57", "remaining_time": "0:01:38", "throughput": 2524.61, "total_tokens": 3476800}
|
|
{"current_steps": 7075, "total_steps": 7577, "loss": 0.0807, "lr": 2.673128959541693e-08, "epoch": 0.9337468655140557, "percentage": 93.37, "elapsed_time": "0:22:57", "remaining_time": "0:01:37", "throughput": 2525.94, "total_tokens": 3479488}
|
|
{"current_steps": 7080, "total_steps": 7577, "loss": 0.0005, "lr": 2.620481558275367e-08, "epoch": 0.9344067572918041, "percentage": 93.44, "elapsed_time": "0:22:57", "remaining_time": "0:01:36", "throughput": 2527.29, "total_tokens": 3482176}
|
|
{"current_steps": 7085, "total_steps": 7577, "loss": 0.0278, "lr": 2.5683508895402382e-08, "epoch": 0.9350666490695526, "percentage": 93.51, "elapsed_time": "0:22:58", "remaining_time": "0:01:35", "throughput": 2528.59, "total_tokens": 3484800}
|
|
{"current_steps": 7090, "total_steps": 7577, "loss": 0.1436, "lr": 2.5167372299613853e-08, "epoch": 0.935726540847301, "percentage": 93.57, "elapsed_time": "0:22:58", "remaining_time": "0:01:34", "throughput": 2529.92, "total_tokens": 3487488}
|
|
{"current_steps": 7095, "total_steps": 7577, "loss": 0.2508, "lr": 2.4656408534203365e-08, "epoch": 0.9363864326250495, "percentage": 93.64, "elapsed_time": "0:22:58", "remaining_time": "0:01:33", "throughput": 2530.96, "total_tokens": 3489728}
|
|
{"current_steps": 7100, "total_steps": 7577, "loss": 0.2469, "lr": 2.4150620310538273e-08, "epoch": 0.9370463244027979, "percentage": 93.7, "elapsed_time": "0:22:59", "remaining_time": "0:01:32", "throughput": 2531.94, "total_tokens": 3491904}
|
|
{"current_steps": 7105, "total_steps": 7577, "loss": 0.0832, "lr": 2.3650010312521673e-08, "epoch": 0.9377062161805464, "percentage": 93.77, "elapsed_time": "0:22:59", "remaining_time": "0:01:31", "throughput": 2533.28, "total_tokens": 3494592}
|
|
{"current_steps": 7110, "total_steps": 7577, "loss": 0.1798, "lr": 2.3154581196579648e-08, "epoch": 0.9383661079582949, "percentage": 93.84, "elapsed_time": "0:22:59", "remaining_time": "0:01:30", "throughput": 2534.48, "total_tokens": 3497088}
|
|
{"current_steps": 7115, "total_steps": 7577, "loss": 0.0559, "lr": 2.2664335591646377e-08, "epoch": 0.9390259997360433, "percentage": 93.9, "elapsed_time": "0:23:00", "remaining_time": "0:01:29", "throughput": 2535.65, "total_tokens": 3499520}
|
|
{"current_steps": 7120, "total_steps": 7577, "loss": 0.2113, "lr": 2.2179276099150158e-08, "epoch": 0.9396858915137918, "percentage": 93.97, "elapsed_time": "0:23:00", "remaining_time": "0:01:28", "throughput": 2536.98, "total_tokens": 3502208}
|
|
{"current_steps": 7125, "total_steps": 7577, "loss": 0.1584, "lr": 2.1699405293000182e-08, "epoch": 0.9403457832915402, "percentage": 94.03, "elapsed_time": "0:23:00", "remaining_time": "0:01:27", "throughput": 2538.14, "total_tokens": 3504640}
|
|
{"current_steps": 7130, "total_steps": 7577, "loss": 0.0448, "lr": 2.1224725719572235e-08, "epoch": 0.9410056750692887, "percentage": 94.1, "elapsed_time": "0:23:01", "remaining_time": "0:01:26", "throughput": 2539.21, "total_tokens": 3506944}
|
|
{"current_steps": 7135, "total_steps": 7577, "loss": 0.0454, "lr": 2.0755239897695453e-08, "epoch": 0.9416655668470371, "percentage": 94.17, "elapsed_time": "0:23:01", "remaining_time": "0:01:25", "throughput": 2540.37, "total_tokens": 3509376}
|
|
{"current_steps": 7140, "total_steps": 7577, "loss": 0.1705, "lr": 2.0290950318639256e-08, "epoch": 0.9423254586247856, "percentage": 94.23, "elapsed_time": "0:23:01", "remaining_time": "0:01:24", "throughput": 2541.44, "total_tokens": 3511680}
|
|
{"current_steps": 7145, "total_steps": 7577, "loss": 0.0576, "lr": 1.983185944609944e-08, "epoch": 0.942985350402534, "percentage": 94.3, "elapsed_time": "0:23:02", "remaining_time": "0:01:23", "throughput": 2542.6, "total_tokens": 3514112}
|
|
{"current_steps": 7150, "total_steps": 7577, "loss": 0.0709, "lr": 1.9377969716185994e-08, "epoch": 0.9436452421802825, "percentage": 94.36, "elapsed_time": "0:23:02", "remaining_time": "0:01:22", "throughput": 2543.71, "total_tokens": 3516480}
|
|
{"current_steps": 7155, "total_steps": 7577, "loss": 0.1222, "lr": 1.8929283537408968e-08, "epoch": 0.9443051339580308, "percentage": 94.43, "elapsed_time": "0:23:02", "remaining_time": "0:01:21", "throughput": 2544.74, "total_tokens": 3518720}
|
|
{"current_steps": 7160, "total_steps": 7577, "loss": 0.0012, "lr": 1.848580329066718e-08, "epoch": 0.9449650257357793, "percentage": 94.5, "elapsed_time": "0:23:03", "remaining_time": "0:01:20", "throughput": 2545.94, "total_tokens": 3521216}
|
|
{"current_steps": 7165, "total_steps": 7577, "loss": 0.4089, "lr": 1.804753132923431e-08, "epoch": 0.9456249175135277, "percentage": 94.56, "elapsed_time": "0:23:03", "remaining_time": "0:01:19", "throughput": 2547.18, "total_tokens": 3523776}
|
|
{"current_steps": 7170, "total_steps": 7577, "loss": 0.0013, "lr": 1.7614469978746827e-08, "epoch": 0.9462848092912762, "percentage": 94.63, "elapsed_time": "0:23:03", "remaining_time": "0:01:18", "throughput": 2548.38, "total_tokens": 3526272}
|
|
{"current_steps": 7175, "total_steps": 7577, "loss": 0.0095, "lr": 1.7186621537192304e-08, "epoch": 0.9469447010690247, "percentage": 94.69, "elapsed_time": "0:23:04", "remaining_time": "0:01:17", "throughput": 2549.45, "total_tokens": 3528576}
|
|
{"current_steps": 7180, "total_steps": 7577, "loss": 0.0009, "lr": 1.6763988274896003e-08, "epoch": 0.9476045928467731, "percentage": 94.76, "elapsed_time": "0:23:04", "remaining_time": "0:01:16", "throughput": 2550.69, "total_tokens": 3531136}
|
|
{"current_steps": 7185, "total_steps": 7577, "loss": 0.159, "lr": 1.6346572434509876e-08, "epoch": 0.9482644846245216, "percentage": 94.83, "elapsed_time": "0:23:04", "remaining_time": "0:01:15", "throughput": 2551.93, "total_tokens": 3533696}
|
|
{"current_steps": 7190, "total_steps": 7577, "loss": 0.1343, "lr": 1.5934376231000248e-08, "epoch": 0.94892437640227, "percentage": 94.89, "elapsed_time": "0:23:05", "remaining_time": "0:01:14", "throughput": 2553.04, "total_tokens": 3536064}
|
|
{"current_steps": 7195, "total_steps": 7577, "loss": 0.0016, "lr": 1.55274018516357e-08, "epoch": 0.9495842681800185, "percentage": 94.96, "elapsed_time": "0:23:05", "remaining_time": "0:01:13", "throughput": 2554.14, "total_tokens": 3538432}
|
|
{"current_steps": 7200, "total_steps": 7577, "loss": 0.0683, "lr": 1.512565145597633e-08, "epoch": 0.9502441599577669, "percentage": 95.02, "elapsed_time": "0:23:05", "remaining_time": "0:01:12", "throughput": 2555.47, "total_tokens": 3541120}
|
|
{"current_steps": 7201, "total_steps": 7577, "eval_loss": 0.09811879694461823, "epoch": 0.9503761383133166, "percentage": 95.04, "elapsed_time": "0:23:13", "remaining_time": "0:01:12", "throughput": 2541.26, "total_tokens": 3541632}
|
|
{"current_steps": 7205, "total_steps": 7577, "loss": 0.0802, "lr": 1.47291271758615e-08, "epoch": 0.9509040517355154, "percentage": 95.09, "elapsed_time": "0:23:57", "remaining_time": "0:01:14", "throughput": 2465.41, "total_tokens": 3543680}
|
|
{"current_steps": 7210, "total_steps": 7577, "loss": 0.1379, "lr": 1.4337831115398991e-08, "epoch": 0.9515639435132638, "percentage": 95.16, "elapsed_time": "0:23:57", "remaining_time": "0:01:13", "throughput": 2466.44, "total_tokens": 3545984}
|
|
{"current_steps": 7215, "total_steps": 7577, "loss": 0.1338, "lr": 1.3951765350953548e-08, "epoch": 0.9522238352910123, "percentage": 95.22, "elapsed_time": "0:23:58", "remaining_time": "0:01:12", "throughput": 2467.64, "total_tokens": 3548544}
|
|
{"current_steps": 7220, "total_steps": 7577, "loss": 0.0956, "lr": 1.3570931931136009e-08, "epoch": 0.9528837270687607, "percentage": 95.29, "elapsed_time": "0:23:58", "remaining_time": "0:01:11", "throughput": 2468.78, "total_tokens": 3551040}
|
|
{"current_steps": 7225, "total_steps": 7577, "loss": 0.0867, "lr": 1.3195332876792532e-08, "epoch": 0.9535436188465092, "percentage": 95.35, "elapsed_time": "0:23:58", "remaining_time": "0:01:10", "throughput": 2469.93, "total_tokens": 3553536}
|
|
{"current_steps": 7230, "total_steps": 7577, "loss": 0.1628, "lr": 1.2824970180993488e-08, "epoch": 0.9542035106242576, "percentage": 95.42, "elapsed_time": "0:23:59", "remaining_time": "0:01:09", "throughput": 2470.87, "total_tokens": 3555712}
|
|
{"current_steps": 7235, "total_steps": 7577, "loss": 0.1277, "lr": 1.2459845809023484e-08, "epoch": 0.9548634024020061, "percentage": 95.49, "elapsed_time": "0:23:59", "remaining_time": "0:01:08", "throughput": 2471.95, "total_tokens": 3558080}
|
|
{"current_steps": 7240, "total_steps": 7577, "loss": 0.3486, "lr": 1.2099961698370353e-08, "epoch": 0.9555232941797546, "percentage": 95.55, "elapsed_time": "0:23:59", "remaining_time": "0:01:07", "throughput": 2473.16, "total_tokens": 3560640}
|
|
{"current_steps": 7245, "total_steps": 7577, "loss": 0.0935, "lr": 1.1745319758715288e-08, "epoch": 0.956183185957503, "percentage": 95.62, "elapsed_time": "0:24:00", "remaining_time": "0:01:05", "throughput": 2474.49, "total_tokens": 3563392}
|
|
{"current_steps": 7250, "total_steps": 7577, "loss": 0.1487, "lr": 1.1395921871922509e-08, "epoch": 0.9568430777352515, "percentage": 95.68, "elapsed_time": "0:24:00", "remaining_time": "0:01:04", "throughput": 2475.61, "total_tokens": 3565824}
|
|
{"current_steps": 7255, "total_steps": 7577, "loss": 0.0008, "lr": 1.105176989202905e-08, "epoch": 0.9575029695129998, "percentage": 95.75, "elapsed_time": "0:24:00", "remaining_time": "0:01:03", "throughput": 2476.72, "total_tokens": 3568256}
|
|
{"current_steps": 7260, "total_steps": 7577, "loss": 0.0075, "lr": 1.0712865645235659e-08, "epoch": 0.9581628612907483, "percentage": 95.82, "elapsed_time": "0:24:01", "remaining_time": "0:01:02", "throughput": 2477.89, "total_tokens": 3570752}
|
|
{"current_steps": 7265, "total_steps": 7577, "loss": 0.0681, "lr": 1.0379210929896131e-08, "epoch": 0.9588227530684967, "percentage": 95.88, "elapsed_time": "0:24:01", "remaining_time": "0:01:01", "throughput": 2478.82, "total_tokens": 3572928}
|
|
{"current_steps": 7270, "total_steps": 7577, "loss": 0.1822, "lr": 1.0050807516508553e-08, "epoch": 0.9594826448462452, "percentage": 95.95, "elapsed_time": "0:24:01", "remaining_time": "0:01:00", "throughput": 2479.89, "total_tokens": 3575296}
|
|
{"current_steps": 7275, "total_steps": 7577, "loss": 0.0006, "lr": 9.727657147705737e-09, "epoch": 0.9601425366239936, "percentage": 96.01, "elapsed_time": "0:24:02", "remaining_time": "0:00:59", "throughput": 2480.96, "total_tokens": 3577664}
|
|
{"current_steps": 7280, "total_steps": 7577, "loss": 0.2031, "lr": 9.409761538245575e-09, "epoch": 0.9608024284017421, "percentage": 96.08, "elapsed_time": "0:24:02", "remaining_time": "0:00:58", "throughput": 2482.11, "total_tokens": 3580160}
|
|
{"current_steps": 7285, "total_steps": 7577, "loss": 0.1012, "lr": 9.097122375002264e-09, "epoch": 0.9614623201794905, "percentage": 96.15, "elapsed_time": "0:24:02", "remaining_time": "0:00:57", "throughput": 2483.13, "total_tokens": 3582464}
|
|
{"current_steps": 7290, "total_steps": 7577, "loss": 0.1369, "lr": 8.789741316957312e-09, "epoch": 0.962122211957239, "percentage": 96.21, "elapsed_time": "0:24:03", "remaining_time": "0:00:56", "throughput": 2484.24, "total_tokens": 3584896}
|
|
{"current_steps": 7295, "total_steps": 7577, "loss": 0.0152, "lr": 8.487619995190986e-09, "epoch": 0.9627821037349875, "percentage": 96.28, "elapsed_time": "0:24:03", "remaining_time": "0:00:55", "throughput": 2485.52, "total_tokens": 3587584}
|
|
{"current_steps": 7300, "total_steps": 7577, "loss": 0.1341, "lr": 8.19076001287311e-09, "epoch": 0.9634419955127359, "percentage": 96.34, "elapsed_time": "0:24:03", "remaining_time": "0:00:54", "throughput": 2486.72, "total_tokens": 3590144}
|
|
{"current_steps": 7305, "total_steps": 7577, "loss": 0.001, "lr": 7.899162945254945e-09, "epoch": 0.9641018872904844, "percentage": 96.41, "elapsed_time": "0:24:04", "remaining_time": "0:00:53", "throughput": 2488.0, "total_tokens": 3592832}
|
|
{"current_steps": 7310, "total_steps": 7577, "loss": 0.0999, "lr": 7.612830339660758e-09, "epoch": 0.9647617790682328, "percentage": 96.48, "elapsed_time": "0:24:04", "remaining_time": "0:00:52", "throughput": 2489.23, "total_tokens": 3595456}
|
|
{"current_steps": 7315, "total_steps": 7577, "loss": 0.1011, "lr": 7.3317637154796105e-09, "epoch": 0.9654216708459813, "percentage": 96.54, "elapsed_time": "0:24:04", "remaining_time": "0:00:51", "throughput": 2490.35, "total_tokens": 3597888}
|
|
{"current_steps": 7320, "total_steps": 7577, "loss": 0.0699, "lr": 7.0559645641572465e-09, "epoch": 0.9660815626237297, "percentage": 96.61, "elapsed_time": "0:24:05", "remaining_time": "0:00:50", "throughput": 2491.51, "total_tokens": 3600384}
|
|
{"current_steps": 7325, "total_steps": 7577, "loss": 0.1521, "lr": 6.785434349188102e-09, "epoch": 0.9667414544014782, "percentage": 96.67, "elapsed_time": "0:24:05", "remaining_time": "0:00:49", "throughput": 2492.67, "total_tokens": 3602880}
|
|
{"current_steps": 7330, "total_steps": 7577, "loss": 0.045, "lr": 6.520174506107867e-09, "epoch": 0.9674013461792266, "percentage": 96.74, "elapsed_time": "0:24:05", "remaining_time": "0:00:48", "throughput": 2493.74, "total_tokens": 3605248}
|
|
{"current_steps": 7335, "total_steps": 7577, "loss": 0.001, "lr": 6.260186442485494e-09, "epoch": 0.9680612379569751, "percentage": 96.81, "elapsed_time": "0:24:06", "remaining_time": "0:00:47", "throughput": 2494.94, "total_tokens": 3607808}
|
|
{"current_steps": 7340, "total_steps": 7577, "loss": 0.1172, "lr": 6.005471537915863e-09, "epoch": 0.9687211297347235, "percentage": 96.87, "elapsed_time": "0:24:06", "remaining_time": "0:00:46", "throughput": 2495.95, "total_tokens": 3610112}
|
|
{"current_steps": 7345, "total_steps": 7577, "loss": 0.0319, "lr": 5.756031144012685e-09, "epoch": 0.969381021512472, "percentage": 96.94, "elapsed_time": "0:24:06", "remaining_time": "0:00:45", "throughput": 2496.93, "total_tokens": 3612352}
|
|
{"current_steps": 7350, "total_steps": 7577, "loss": 0.1853, "lr": 5.511866584400837e-09, "epoch": 0.9700409132902204, "percentage": 97.0, "elapsed_time": "0:24:07", "remaining_time": "0:00:44", "throughput": 2498.08, "total_tokens": 3614848}
|
|
{"current_steps": 7355, "total_steps": 7577, "loss": 0.0038, "lr": 5.2729791547097e-09, "epoch": 0.9707008050679689, "percentage": 97.07, "elapsed_time": "0:24:07", "remaining_time": "0:00:43", "throughput": 2499.28, "total_tokens": 3617408}
|
|
{"current_steps": 7360, "total_steps": 7577, "loss": 0.1072, "lr": 5.039370122566389e-09, "epoch": 0.9713606968457174, "percentage": 97.14, "elapsed_time": "0:24:07", "remaining_time": "0:00:42", "throughput": 2500.47, "total_tokens": 3619968}
|
|
{"current_steps": 7365, "total_steps": 7577, "loss": 0.099, "lr": 4.811040727588755e-09, "epoch": 0.9720205886234657, "percentage": 97.2, "elapsed_time": "0:24:08", "remaining_time": "0:00:41", "throughput": 2501.32, "total_tokens": 3622016}
|
|
{"current_steps": 7370, "total_steps": 7577, "loss": 0.1695, "lr": 4.58799218137873e-09, "epoch": 0.9726804804012142, "percentage": 97.27, "elapsed_time": "0:24:08", "remaining_time": "0:00:40", "throughput": 2502.26, "total_tokens": 3624192}
|
|
{"current_steps": 7375, "total_steps": 7577, "loss": 0.0006, "lr": 4.370225667516325e-09, "epoch": 0.9733403721789626, "percentage": 97.33, "elapsed_time": "0:24:08", "remaining_time": "0:00:39", "throughput": 2503.36, "total_tokens": 3626624}
|
|
{"current_steps": 7380, "total_steps": 7577, "loss": 0.1853, "lr": 4.157742341552861e-09, "epoch": 0.9740002639567111, "percentage": 97.4, "elapsed_time": "0:24:09", "remaining_time": "0:00:38", "throughput": 2504.38, "total_tokens": 3628928}
|
|
{"current_steps": 7385, "total_steps": 7577, "loss": 0.1044, "lr": 3.950543331005307e-09, "epoch": 0.9746601557344595, "percentage": 97.47, "elapsed_time": "0:24:09", "remaining_time": "0:00:37", "throughput": 2505.62, "total_tokens": 3631552}
|
|
{"current_steps": 7390, "total_steps": 7577, "loss": 0.0016, "lr": 3.748629735349839e-09, "epoch": 0.975320047512208, "percentage": 97.53, "elapsed_time": "0:24:09", "remaining_time": "0:00:36", "throughput": 2506.72, "total_tokens": 3633984}
|
|
{"current_steps": 7395, "total_steps": 7577, "loss": 0.2102, "lr": 3.552002626016293e-09, "epoch": 0.9759799392899564, "percentage": 97.6, "elapsed_time": "0:24:10", "remaining_time": "0:00:35", "throughput": 2507.7, "total_tokens": 3636224}
|
|
{"current_steps": 7400, "total_steps": 7577, "loss": 0.2375, "lr": 3.3606630463824947e-09, "epoch": 0.9766398310677049, "percentage": 97.66, "elapsed_time": "0:24:10", "remaining_time": "0:00:34", "throughput": 2508.8, "total_tokens": 3638656}
|
|
{"current_steps": 7405, "total_steps": 7577, "loss": 0.0009, "lr": 3.174612011768607e-09, "epoch": 0.9772997228454533, "percentage": 97.73, "elapsed_time": "0:24:10", "remaining_time": "0:00:33", "throughput": 2510.11, "total_tokens": 3641408}
|
|
{"current_steps": 7410, "total_steps": 7577, "loss": 0.1826, "lr": 2.9938505094316834e-09, "epoch": 0.9779596146232018, "percentage": 97.8, "elapsed_time": "0:24:11", "remaining_time": "0:00:32", "throughput": 2511.19, "total_tokens": 3643840}
|
|
{"current_steps": 7415, "total_steps": 7577, "loss": 0.0801, "lr": 2.8183794985605637e-09, "epoch": 0.9786195064009502, "percentage": 97.86, "elapsed_time": "0:24:11", "remaining_time": "0:00:31", "throughput": 2512.3, "total_tokens": 3646336}
|
|
{"current_steps": 7420, "total_steps": 7577, "loss": 0.0731, "lr": 2.6481999102707654e-09, "epoch": 0.9792793981786987, "percentage": 97.93, "elapsed_time": "0:24:11", "remaining_time": "0:00:30", "throughput": 2513.52, "total_tokens": 3648960}
|
|
{"current_steps": 7425, "total_steps": 7577, "loss": 0.0011, "lr": 2.4833126475994894e-09, "epoch": 0.9799392899564472, "percentage": 97.99, "elapsed_time": "0:24:12", "remaining_time": "0:00:29", "throughput": 2514.49, "total_tokens": 3651200}
|
|
{"current_steps": 7430, "total_steps": 7577, "loss": 0.0073, "lr": 2.3237185855008443e-09, "epoch": 0.9805991817341956, "percentage": 98.06, "elapsed_time": "0:24:12", "remaining_time": "0:00:28", "throughput": 2515.5, "total_tokens": 3653504}
|
|
{"current_steps": 7435, "total_steps": 7577, "loss": 0.2482, "lr": 2.1694185708414083e-09, "epoch": 0.9812590735119441, "percentage": 98.13, "elapsed_time": "0:24:12", "remaining_time": "0:00:27", "throughput": 2516.68, "total_tokens": 3656064}
|
|
{"current_steps": 7440, "total_steps": 7577, "loss": 0.2875, "lr": 2.0204134223952284e-09, "epoch": 0.9819189652896925, "percentage": 98.19, "elapsed_time": "0:24:13", "remaining_time": "0:00:26", "throughput": 2517.53, "total_tokens": 3658112}
|
|
{"current_steps": 7445, "total_steps": 7577, "loss": 0.1509, "lr": 1.87670393083994e-09, "epoch": 0.982578857067441, "percentage": 98.26, "elapsed_time": "0:24:13", "remaining_time": "0:00:25", "throughput": 2518.88, "total_tokens": 3660928}
|
|
{"current_steps": 7450, "total_steps": 7577, "loss": 0.0779, "lr": 1.7382908587525447e-09, "epoch": 0.9832387488451894, "percentage": 98.32, "elapsed_time": "0:24:13", "remaining_time": "0:00:24", "throughput": 2519.9, "total_tokens": 3663232}
|
|
{"current_steps": 7455, "total_steps": 7577, "loss": 0.0016, "lr": 1.6051749406049697e-09, "epoch": 0.9838986406229379, "percentage": 98.39, "elapsed_time": "0:24:14", "remaining_time": "0:00:23", "throughput": 2520.95, "total_tokens": 3665600}
|
|
{"current_steps": 7460, "total_steps": 7577, "loss": 0.0008, "lr": 1.4773568827607386e-09, "epoch": 0.9845585324006862, "percentage": 98.46, "elapsed_time": "0:24:14", "remaining_time": "0:00:22", "throughput": 2522.09, "total_tokens": 3668096}
|
|
{"current_steps": 7465, "total_steps": 7577, "loss": 0.0016, "lr": 1.354837363470529e-09, "epoch": 0.9852184241784347, "percentage": 98.52, "elapsed_time": "0:24:14", "remaining_time": "0:00:21", "throughput": 2523.28, "total_tokens": 3670656}
|
|
{"current_steps": 7470, "total_steps": 7577, "loss": 0.0971, "lr": 1.23761703286962e-09, "epoch": 0.9858783159561831, "percentage": 98.59, "elapsed_time": "0:24:15", "remaining_time": "0:00:20", "throughput": 2524.33, "total_tokens": 3673024}
|
|
{"current_steps": 7475, "total_steps": 7577, "loss": 0.0012, "lr": 1.1256965129730068e-09, "epoch": 0.9865382077339316, "percentage": 98.65, "elapsed_time": "0:24:15", "remaining_time": "0:00:19", "throughput": 2525.59, "total_tokens": 3675712}
|
|
{"current_steps": 7480, "total_steps": 7577, "loss": 0.0803, "lr": 1.0190763976734018e-09, "epoch": 0.9871980995116801, "percentage": 98.72, "elapsed_time": "0:24:15", "remaining_time": "0:00:18", "throughput": 2526.65, "total_tokens": 3678080}
|
|
{"current_steps": 7485, "total_steps": 7577, "loss": 0.114, "lr": 9.177572527375721e-10, "epoch": 0.9878579912894285, "percentage": 98.79, "elapsed_time": "0:24:16", "remaining_time": "0:00:17", "throughput": 2527.71, "total_tokens": 3680448}
|
|
{"current_steps": 7490, "total_steps": 7577, "loss": 0.0938, "lr": 8.217396158030076e-10, "epoch": 0.988517883067177, "percentage": 98.85, "elapsed_time": "0:24:16", "remaining_time": "0:00:16", "throughput": 2528.72, "total_tokens": 3682752}
|
|
{"current_steps": 7495, "total_steps": 7577, "loss": 0.108, "lr": 7.310239963755904e-10, "epoch": 0.9891777748449254, "percentage": 98.92, "elapsed_time": "0:24:16", "remaining_time": "0:00:15", "throughput": 2529.93, "total_tokens": 3685376}
|
|
{"current_steps": 7500, "total_steps": 7577, "loss": 0.0012, "lr": 6.456108758268186e-10, "epoch": 0.9898376666226739, "percentage": 98.98, "elapsed_time": "0:24:17", "remaining_time": "0:00:14", "throughput": 2530.97, "total_tokens": 3687744}
|
|
{"current_steps": 7505, "total_steps": 7577, "loss": 0.1473, "lr": 5.655007073909202e-10, "epoch": 0.9904975584004223, "percentage": 99.05, "elapsed_time": "0:24:17", "remaining_time": "0:00:13", "throughput": 2532.1, "total_tokens": 3690240}
|
|
{"current_steps": 7510, "total_steps": 7577, "loss": 0.0775, "lr": 4.906939161627432e-10, "epoch": 0.9911574501781708, "percentage": 99.12, "elapsed_time": "0:24:17", "remaining_time": "0:00:13", "throughput": 2533.24, "total_tokens": 3692736}
|
|
{"current_steps": 7515, "total_steps": 7577, "loss": 0.2057, "lr": 4.2119089909542495e-10, "epoch": 0.9918173419559192, "percentage": 99.18, "elapsed_time": "0:24:18", "remaining_time": "0:00:12", "throughput": 2534.46, "total_tokens": 3695360}
|
|
{"current_steps": 7520, "total_steps": 7577, "loss": 0.0823, "lr": 3.569920249981706e-10, "epoch": 0.9924772337336677, "percentage": 99.25, "elapsed_time": "0:24:18", "remaining_time": "0:00:11", "throughput": 2535.59, "total_tokens": 3697856}
|
|
{"current_steps": 7525, "total_steps": 7577, "loss": 0.0409, "lr": 2.980976345344777e-10, "epoch": 0.9931371255114161, "percentage": 99.31, "elapsed_time": "0:24:18", "remaining_time": "0:00:10", "throughput": 2536.64, "total_tokens": 3700224}
|
|
{"current_steps": 7530, "total_steps": 7577, "loss": 0.0481, "lr": 2.445080402202482e-10, "epoch": 0.9937970172891646, "percentage": 99.38, "elapsed_time": "0:24:19", "remaining_time": "0:00:09", "throughput": 2537.69, "total_tokens": 3702592}
|
|
{"current_steps": 7535, "total_steps": 7577, "loss": 0.1701, "lr": 1.962235264222345e-10, "epoch": 0.994456909066913, "percentage": 99.45, "elapsed_time": "0:24:19", "remaining_time": "0:00:08", "throughput": 2538.7, "total_tokens": 3704896}
|
|
{"current_steps": 7540, "total_steps": 7577, "loss": 0.0378, "lr": 1.5324434935615195e-10, "epoch": 0.9951168008446615, "percentage": 99.51, "elapsed_time": "0:24:19", "remaining_time": "0:00:07", "throughput": 2539.72, "total_tokens": 3707264}
|
|
{"current_steps": 7545, "total_steps": 7577, "loss": 0.1023, "lr": 1.1557073708579057e-10, "epoch": 0.99577669262241, "percentage": 99.58, "elapsed_time": "0:24:20", "remaining_time": "0:00:06", "throughput": 2540.89, "total_tokens": 3709824}
|
|
{"current_steps": 7550, "total_steps": 7577, "loss": 0.0658, "lr": 8.320288952168297e-11, "epoch": 0.9964365844001584, "percentage": 99.64, "elapsed_time": "0:24:20", "remaining_time": "0:00:05", "throughput": 2541.92, "total_tokens": 3712192}
|
|
{"current_steps": 7555, "total_steps": 7577, "loss": 0.1099, "lr": 5.614097841988297e-11, "epoch": 0.9970964761779069, "percentage": 99.71, "elapsed_time": "0:24:20", "remaining_time": "0:00:04", "throughput": 2543.17, "total_tokens": 3714880}
|
|
{"current_steps": 7560, "total_steps": 7577, "loss": 0.0216, "lr": 3.43851473808554e-11, "epoch": 0.9977563679556553, "percentage": 99.78, "elapsed_time": "0:24:21", "remaining_time": "0:00:03", "throughput": 2544.16, "total_tokens": 3717184}
|
|
{"current_steps": 7565, "total_steps": 7577, "loss": 0.0521, "lr": 1.7935511849587192e-11, "epoch": 0.9984162597334038, "percentage": 99.84, "elapsed_time": "0:24:21", "remaining_time": "0:00:02", "throughput": 2545.13, "total_tokens": 3719424}
|
|
{"current_steps": 7570, "total_steps": 7577, "loss": 0.1418, "lr": 6.792159113921947e-12, "epoch": 0.9990761515111521, "percentage": 99.91, "elapsed_time": "0:24:21", "remaining_time": "0:00:01", "throughput": 2546.26, "total_tokens": 3721920}
|
|
{"current_steps": 7575, "total_steps": 7577, "loss": 0.0767, "lr": 9.55148304560005e-13, "epoch": 0.9997360432889006, "percentage": 99.97, "elapsed_time": "0:24:22", "remaining_time": "0:00:00", "throughput": 2547.31, "total_tokens": 3724288}
|
|
{"current_steps": 7577, "total_steps": 7577, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:25:13", "remaining_time": "0:00:00", "throughput": 2461.79, "total_tokens": 3725120}
|