983 lines
232 KiB
JSON
983 lines
232 KiB
JSON
{"current_steps": 5, "total_steps": 4810, "loss": 1.2917, "lr": 4.158004158004159e-08, "epoch": 0.005197505197505198, "percentage": 0.1, "elapsed_time": "0:00:00", "remaining_time": "0:10:09", "throughput": 3231.57, "total_tokens": 2048}
|
|
{"current_steps": 10, "total_steps": 4810, "loss": 1.2491, "lr": 9.355509355509357e-08, "epoch": 0.010395010395010396, "percentage": 0.21, "elapsed_time": "0:00:00", "remaining_time": "0:07:44", "throughput": 4366.27, "total_tokens": 4224}
|
|
{"current_steps": 15, "total_steps": 4810, "loss": 1.3117, "lr": 1.4553014553014554e-07, "epoch": 0.015592515592515593, "percentage": 0.31, "elapsed_time": "0:00:01", "remaining_time": "0:06:52", "throughput": 4863.9, "total_tokens": 6272}
|
|
{"current_steps": 20, "total_steps": 4810, "loss": 1.1366, "lr": 1.9750519750519752e-07, "epoch": 0.02079002079002079, "percentage": 0.42, "elapsed_time": "0:00:01", "remaining_time": "0:06:26", "throughput": 5199.14, "total_tokens": 8384}
|
|
{"current_steps": 25, "total_steps": 4810, "loss": 0.7792, "lr": 2.494802494802495e-07, "epoch": 0.02598752598752599, "percentage": 0.52, "elapsed_time": "0:00:01", "remaining_time": "0:06:10", "throughput": 5421.16, "total_tokens": 10496}
|
|
{"current_steps": 30, "total_steps": 4810, "loss": 0.523, "lr": 3.014553014553015e-07, "epoch": 0.031185031185031187, "percentage": 0.62, "elapsed_time": "0:00:02", "remaining_time": "0:05:59", "throughput": 5553.26, "total_tokens": 12544}
|
|
{"current_steps": 35, "total_steps": 4810, "loss": 0.3194, "lr": 3.534303534303535e-07, "epoch": 0.036382536382536385, "percentage": 0.73, "elapsed_time": "0:00:02", "remaining_time": "0:05:51", "throughput": 5631.11, "total_tokens": 14528}
|
|
{"current_steps": 40, "total_steps": 4810, "loss": 0.3482, "lr": 4.0540540540540546e-07, "epoch": 0.04158004158004158, "percentage": 0.83, "elapsed_time": "0:00:02", "remaining_time": "0:05:46", "throughput": 5711.68, "total_tokens": 16576}
|
|
{"current_steps": 45, "total_steps": 4810, "loss": 0.377, "lr": 4.5738045738045745e-07, "epoch": 0.04677754677754678, "percentage": 0.94, "elapsed_time": "0:00:03", "remaining_time": "0:05:41", "throughput": 5757.88, "total_tokens": 18560}
|
|
{"current_steps": 50, "total_steps": 4810, "loss": 0.2695, "lr": 5.093555093555094e-07, "epoch": 0.05197505197505198, "percentage": 1.04, "elapsed_time": "0:00:03", "remaining_time": "0:05:37", "throughput": 5813.34, "total_tokens": 20608}
|
|
{"current_steps": 55, "total_steps": 4810, "loss": 0.2431, "lr": 5.613305613305614e-07, "epoch": 0.057172557172557176, "percentage": 1.14, "elapsed_time": "0:00:03", "remaining_time": "0:05:34", "throughput": 5859.47, "total_tokens": 22656}
|
|
{"current_steps": 60, "total_steps": 4810, "loss": 0.3162, "lr": 6.133056133056134e-07, "epoch": 0.062370062370062374, "percentage": 1.25, "elapsed_time": "0:00:04", "remaining_time": "0:05:31", "throughput": 5883.2, "total_tokens": 24640}
|
|
{"current_steps": 65, "total_steps": 4810, "loss": 0.3171, "lr": 6.652806652806654e-07, "epoch": 0.06756756756756757, "percentage": 1.35, "elapsed_time": "0:00:04", "remaining_time": "0:05:29", "throughput": 5928.15, "total_tokens": 26752}
|
|
{"current_steps": 70, "total_steps": 4810, "loss": 0.4943, "lr": 7.172557172557173e-07, "epoch": 0.07276507276507277, "percentage": 1.46, "elapsed_time": "0:00:04", "remaining_time": "0:05:28", "throughput": 5898.9, "total_tokens": 28608}
|
|
{"current_steps": 75, "total_steps": 4810, "loss": 0.3067, "lr": 7.692307692307694e-07, "epoch": 0.07796257796257797, "percentage": 1.56, "elapsed_time": "0:00:05", "remaining_time": "0:05:27", "throughput": 5957.08, "total_tokens": 30912}
|
|
{"current_steps": 80, "total_steps": 4810, "loss": 0.5316, "lr": 8.212058212058213e-07, "epoch": 0.08316008316008316, "percentage": 1.66, "elapsed_time": "0:00:05", "remaining_time": "0:05:25", "throughput": 5967.54, "total_tokens": 32896}
|
|
{"current_steps": 85, "total_steps": 4810, "loss": 0.3228, "lr": 8.731808731808733e-07, "epoch": 0.08835758835758836, "percentage": 1.77, "elapsed_time": "0:00:05", "remaining_time": "0:05:24", "throughput": 5965.9, "total_tokens": 34816}
|
|
{"current_steps": 90, "total_steps": 4810, "loss": 0.3339, "lr": 9.251559251559253e-07, "epoch": 0.09355509355509356, "percentage": 1.87, "elapsed_time": "0:00:06", "remaining_time": "0:05:22", "throughput": 5966.22, "total_tokens": 36736}
|
|
{"current_steps": 95, "total_steps": 4810, "loss": 0.2951, "lr": 9.771309771309773e-07, "epoch": 0.09875259875259876, "percentage": 1.98, "elapsed_time": "0:00:06", "remaining_time": "0:05:21", "throughput": 5974.7, "total_tokens": 38720}
|
|
{"current_steps": 100, "total_steps": 4810, "loss": 0.2279, "lr": 1.0291060291060292e-06, "epoch": 0.10395010395010396, "percentage": 2.08, "elapsed_time": "0:00:06", "remaining_time": "0:05:20", "throughput": 5971.58, "total_tokens": 40640}
|
|
{"current_steps": 105, "total_steps": 4810, "loss": 0.2562, "lr": 1.0810810810810812e-06, "epoch": 0.10914760914760915, "percentage": 2.18, "elapsed_time": "0:00:07", "remaining_time": "0:05:19", "throughput": 5987.29, "total_tokens": 42688}
|
|
{"current_steps": 110, "total_steps": 4810, "loss": 0.2794, "lr": 1.1330561330561333e-06, "epoch": 0.11434511434511435, "percentage": 2.29, "elapsed_time": "0:00:07", "remaining_time": "0:05:18", "throughput": 5978.08, "total_tokens": 44544}
|
|
{"current_steps": 115, "total_steps": 4810, "loss": 0.2511, "lr": 1.1850311850311852e-06, "epoch": 0.11954261954261955, "percentage": 2.39, "elapsed_time": "0:00:07", "remaining_time": "0:05:17", "throughput": 5969.5, "total_tokens": 46400}
|
|
{"current_steps": 120, "total_steps": 4810, "loss": 0.2503, "lr": 1.2370062370062372e-06, "epoch": 0.12474012474012475, "percentage": 2.49, "elapsed_time": "0:00:08", "remaining_time": "0:05:16", "throughput": 5983.08, "total_tokens": 48448}
|
|
{"current_steps": 125, "total_steps": 4810, "loss": 0.3088, "lr": 1.288981288981289e-06, "epoch": 0.12993762993762994, "percentage": 2.6, "elapsed_time": "0:00:08", "remaining_time": "0:05:15", "throughput": 5996.41, "total_tokens": 50496}
|
|
{"current_steps": 130, "total_steps": 4810, "loss": 0.2409, "lr": 1.340956340956341e-06, "epoch": 0.13513513513513514, "percentage": 2.7, "elapsed_time": "0:00:08", "remaining_time": "0:05:14", "throughput": 5994.04, "total_tokens": 52416}
|
|
{"current_steps": 135, "total_steps": 4810, "loss": 0.2575, "lr": 1.3929313929313932e-06, "epoch": 0.14033264033264034, "percentage": 2.81, "elapsed_time": "0:00:09", "remaining_time": "0:05:14", "throughput": 6006.12, "total_tokens": 54464}
|
|
{"current_steps": 140, "total_steps": 4810, "loss": 0.2283, "lr": 1.4449064449064451e-06, "epoch": 0.14553014553014554, "percentage": 2.91, "elapsed_time": "0:00:09", "remaining_time": "0:05:13", "throughput": 6010.95, "total_tokens": 56448}
|
|
{"current_steps": 145, "total_steps": 4810, "loss": 0.2004, "lr": 1.496881496881497e-06, "epoch": 0.15072765072765074, "percentage": 3.01, "elapsed_time": "0:00:09", "remaining_time": "0:05:12", "throughput": 6009.87, "total_tokens": 58368}
|
|
{"current_steps": 150, "total_steps": 4810, "loss": 0.3166, "lr": 1.548856548856549e-06, "epoch": 0.15592515592515593, "percentage": 3.12, "elapsed_time": "0:00:10", "remaining_time": "0:05:11", "throughput": 6032.8, "total_tokens": 60544}
|
|
{"current_steps": 155, "total_steps": 4810, "loss": 0.3419, "lr": 1.6008316008316011e-06, "epoch": 0.16112266112266113, "percentage": 3.22, "elapsed_time": "0:00:10", "remaining_time": "0:05:11", "throughput": 6042.22, "total_tokens": 62592}
|
|
{"current_steps": 160, "total_steps": 4810, "loss": 0.2605, "lr": 1.652806652806653e-06, "epoch": 0.16632016632016633, "percentage": 3.33, "elapsed_time": "0:00:10", "remaining_time": "0:05:10", "throughput": 6045.74, "total_tokens": 64576}
|
|
{"current_steps": 165, "total_steps": 4810, "loss": 0.2617, "lr": 1.704781704781705e-06, "epoch": 0.17151767151767153, "percentage": 3.43, "elapsed_time": "0:00:11", "remaining_time": "0:05:09", "throughput": 6059.71, "total_tokens": 66688}
|
|
{"current_steps": 170, "total_steps": 4810, "loss": 0.2505, "lr": 1.756756756756757e-06, "epoch": 0.17671517671517672, "percentage": 3.53, "elapsed_time": "0:00:11", "remaining_time": "0:05:09", "throughput": 6051.99, "total_tokens": 68544}
|
|
{"current_steps": 175, "total_steps": 4810, "loss": 0.2672, "lr": 1.808731808731809e-06, "epoch": 0.18191268191268192, "percentage": 3.64, "elapsed_time": "0:00:11", "remaining_time": "0:05:08", "throughput": 6059.76, "total_tokens": 70592}
|
|
{"current_steps": 180, "total_steps": 4810, "loss": 0.2488, "lr": 1.860706860706861e-06, "epoch": 0.18711018711018712, "percentage": 3.74, "elapsed_time": "0:00:11", "remaining_time": "0:05:07", "throughput": 6062.52, "total_tokens": 72576}
|
|
{"current_steps": 185, "total_steps": 4810, "loss": 0.1863, "lr": 1.912681912681913e-06, "epoch": 0.19230769230769232, "percentage": 3.85, "elapsed_time": "0:00:12", "remaining_time": "0:05:07", "throughput": 6070.09, "total_tokens": 74624}
|
|
{"current_steps": 190, "total_steps": 4810, "loss": 0.1639, "lr": 1.964656964656965e-06, "epoch": 0.19750519750519752, "percentage": 3.95, "elapsed_time": "0:00:12", "remaining_time": "0:05:06", "throughput": 6072.27, "total_tokens": 76608}
|
|
{"current_steps": 195, "total_steps": 4810, "loss": 0.3799, "lr": 2.016632016632017e-06, "epoch": 0.20270270270270271, "percentage": 4.05, "elapsed_time": "0:00:12", "remaining_time": "0:05:06", "throughput": 6083.92, "total_tokens": 78720}
|
|
{"current_steps": 200, "total_steps": 4810, "loss": 0.455, "lr": 2.0686070686070687e-06, "epoch": 0.2079002079002079, "percentage": 4.16, "elapsed_time": "0:00:13", "remaining_time": "0:05:06", "throughput": 6112.65, "total_tokens": 81152}
|
|
{"current_steps": 205, "total_steps": 4810, "loss": 0.2303, "lr": 2.120582120582121e-06, "epoch": 0.2130977130977131, "percentage": 4.26, "elapsed_time": "0:00:13", "remaining_time": "0:05:05", "throughput": 6118.26, "total_tokens": 83200}
|
|
{"current_steps": 210, "total_steps": 4810, "loss": 0.2985, "lr": 2.172557172557173e-06, "epoch": 0.2182952182952183, "percentage": 4.37, "elapsed_time": "0:00:13", "remaining_time": "0:05:04", "throughput": 6118.97, "total_tokens": 85184}
|
|
{"current_steps": 215, "total_steps": 4810, "loss": 0.1462, "lr": 2.2245322245322247e-06, "epoch": 0.2234927234927235, "percentage": 4.47, "elapsed_time": "0:00:14", "remaining_time": "0:05:04", "throughput": 6123.62, "total_tokens": 87232}
|
|
{"current_steps": 220, "total_steps": 4810, "loss": 0.2323, "lr": 2.276507276507277e-06, "epoch": 0.2286902286902287, "percentage": 4.57, "elapsed_time": "0:00:14", "remaining_time": "0:05:03", "throughput": 6120.1, "total_tokens": 89152}
|
|
{"current_steps": 225, "total_steps": 4810, "loss": 0.3453, "lr": 2.3284823284823286e-06, "epoch": 0.2338877338877339, "percentage": 4.68, "elapsed_time": "0:00:14", "remaining_time": "0:05:03", "throughput": 6132.76, "total_tokens": 91328}
|
|
{"current_steps": 230, "total_steps": 4810, "loss": 0.1965, "lr": 2.3804573804573807e-06, "epoch": 0.2390852390852391, "percentage": 4.78, "elapsed_time": "0:00:15", "remaining_time": "0:05:02", "throughput": 6133.17, "total_tokens": 93312}
|
|
{"current_steps": 235, "total_steps": 4810, "loss": 0.186, "lr": 2.432432432432433e-06, "epoch": 0.2442827442827443, "percentage": 4.89, "elapsed_time": "0:00:15", "remaining_time": "0:05:02", "throughput": 6133.43, "total_tokens": 95296}
|
|
{"current_steps": 240, "total_steps": 4810, "loss": 0.2021, "lr": 2.4844074844074846e-06, "epoch": 0.2494802494802495, "percentage": 4.99, "elapsed_time": "0:00:15", "remaining_time": "0:05:01", "throughput": 6129.99, "total_tokens": 97216}
|
|
{"current_steps": 241, "total_steps": 4810, "eval_loss": 0.2780425250530243, "epoch": 0.2505197505197505, "percentage": 5.01, "elapsed_time": "0:00:16", "remaining_time": "0:05:20", "throughput": 5768.67, "total_tokens": 97664}
|
|
{"current_steps": 245, "total_steps": 4810, "loss": 0.2316, "lr": 2.5363825363825367e-06, "epoch": 0.25467775467775466, "percentage": 5.09, "elapsed_time": "0:01:58", "remaining_time": "0:36:52", "throughput": 835.93, "total_tokens": 99264}
|
|
{"current_steps": 250, "total_steps": 4810, "loss": 0.2509, "lr": 2.5883575883575885e-06, "epoch": 0.2598752598752599, "percentage": 5.2, "elapsed_time": "0:01:59", "remaining_time": "0:36:11", "throughput": 849.79, "total_tokens": 101184}
|
|
{"current_steps": 255, "total_steps": 4810, "loss": 0.1696, "lr": 2.6403326403326406e-06, "epoch": 0.26507276507276506, "percentage": 5.3, "elapsed_time": "0:01:59", "remaining_time": "0:35:32", "throughput": 865.18, "total_tokens": 103296}
|
|
{"current_steps": 260, "total_steps": 4810, "loss": 0.3451, "lr": 2.6923076923076923e-06, "epoch": 0.2702702702702703, "percentage": 5.41, "elapsed_time": "0:01:59", "remaining_time": "0:34:55", "throughput": 879.95, "total_tokens": 105344}
|
|
{"current_steps": 265, "total_steps": 4810, "loss": 0.1851, "lr": 2.7442827442827445e-06, "epoch": 0.27546777546777546, "percentage": 5.51, "elapsed_time": "0:02:00", "remaining_time": "0:34:18", "throughput": 894.65, "total_tokens": 107392}
|
|
{"current_steps": 270, "total_steps": 4810, "loss": 0.2177, "lr": 2.796257796257796e-06, "epoch": 0.2806652806652807, "percentage": 5.61, "elapsed_time": "0:02:00", "remaining_time": "0:33:43", "throughput": 909.27, "total_tokens": 109440}
|
|
{"current_steps": 275, "total_steps": 4810, "loss": 0.2767, "lr": 2.8482328482328488e-06, "epoch": 0.28586278586278585, "percentage": 5.72, "elapsed_time": "0:02:00", "remaining_time": "0:33:10", "throughput": 923.27, "total_tokens": 111424}
|
|
{"current_steps": 280, "total_steps": 4810, "loss": 0.3802, "lr": 2.9002079002079005e-06, "epoch": 0.2910602910602911, "percentage": 5.82, "elapsed_time": "0:02:01", "remaining_time": "0:32:37", "throughput": 937.2, "total_tokens": 113408}
|
|
{"current_steps": 285, "total_steps": 4810, "loss": 0.2173, "lr": 2.9521829521829526e-06, "epoch": 0.29625779625779625, "percentage": 5.93, "elapsed_time": "0:02:01", "remaining_time": "0:32:06", "throughput": 951.06, "total_tokens": 115392}
|
|
{"current_steps": 290, "total_steps": 4810, "loss": 0.2155, "lr": 3.0041580041580043e-06, "epoch": 0.30145530145530147, "percentage": 6.03, "elapsed_time": "0:02:01", "remaining_time": "0:31:36", "throughput": 965.36, "total_tokens": 117440}
|
|
{"current_steps": 295, "total_steps": 4810, "loss": 0.2116, "lr": 3.0561330561330565e-06, "epoch": 0.30665280665280664, "percentage": 6.13, "elapsed_time": "0:02:01", "remaining_time": "0:31:06", "throughput": 979.05, "total_tokens": 119424}
|
|
{"current_steps": 300, "total_steps": 4810, "loss": 0.2158, "lr": 3.1081081081081082e-06, "epoch": 0.31185031185031187, "percentage": 6.24, "elapsed_time": "0:02:02", "remaining_time": "0:30:38", "throughput": 992.17, "total_tokens": 121344}
|
|
{"current_steps": 305, "total_steps": 4810, "loss": 0.2048, "lr": 3.1600831600831604e-06, "epoch": 0.31704781704781704, "percentage": 6.34, "elapsed_time": "0:02:02", "remaining_time": "0:30:11", "throughput": 1005.21, "total_tokens": 123264}
|
|
{"current_steps": 310, "total_steps": 4810, "loss": 0.2194, "lr": 3.212058212058212e-06, "epoch": 0.32224532224532226, "percentage": 6.44, "elapsed_time": "0:02:02", "remaining_time": "0:29:44", "throughput": 1018.19, "total_tokens": 125184}
|
|
{"current_steps": 315, "total_steps": 4810, "loss": 0.1868, "lr": 3.2640332640332646e-06, "epoch": 0.32744282744282743, "percentage": 6.55, "elapsed_time": "0:02:03", "remaining_time": "0:29:19", "throughput": 1032.65, "total_tokens": 127296}
|
|
{"current_steps": 320, "total_steps": 4810, "loss": 0.217, "lr": 3.3160083160083164e-06, "epoch": 0.33264033264033266, "percentage": 6.65, "elapsed_time": "0:02:03", "remaining_time": "0:28:54", "throughput": 1047.03, "total_tokens": 129408}
|
|
{"current_steps": 325, "total_steps": 4810, "loss": 0.179, "lr": 3.3679833679833685e-06, "epoch": 0.33783783783783783, "percentage": 6.76, "elapsed_time": "0:02:03", "remaining_time": "0:28:30", "throughput": 1061.33, "total_tokens": 131520}
|
|
{"current_steps": 330, "total_steps": 4810, "loss": 0.2437, "lr": 3.4199584199584202e-06, "epoch": 0.34303534303534305, "percentage": 6.86, "elapsed_time": "0:02:04", "remaining_time": "0:28:06", "throughput": 1075.06, "total_tokens": 133568}
|
|
{"current_steps": 335, "total_steps": 4810, "loss": 0.1981, "lr": 3.4719334719334724e-06, "epoch": 0.3482328482328482, "percentage": 6.96, "elapsed_time": "0:02:04", "remaining_time": "0:27:43", "throughput": 1088.72, "total_tokens": 135616}
|
|
{"current_steps": 340, "total_steps": 4810, "loss": 0.3892, "lr": 3.523908523908524e-06, "epoch": 0.35343035343035345, "percentage": 7.07, "elapsed_time": "0:02:04", "remaining_time": "0:27:21", "throughput": 1102.31, "total_tokens": 137664}
|
|
{"current_steps": 345, "total_steps": 4810, "loss": 0.1327, "lr": 3.5758835758835762e-06, "epoch": 0.3586278586278586, "percentage": 7.17, "elapsed_time": "0:02:05", "remaining_time": "0:27:00", "throughput": 1114.82, "total_tokens": 139584}
|
|
{"current_steps": 350, "total_steps": 4810, "loss": 0.2165, "lr": 3.627858627858628e-06, "epoch": 0.36382536382536385, "percentage": 7.28, "elapsed_time": "0:02:05", "remaining_time": "0:26:39", "throughput": 1127.25, "total_tokens": 141504}
|
|
{"current_steps": 355, "total_steps": 4810, "loss": 0.2907, "lr": 3.6798336798336805e-06, "epoch": 0.369022869022869, "percentage": 7.38, "elapsed_time": "0:02:05", "remaining_time": "0:26:19", "throughput": 1140.64, "total_tokens": 143552}
|
|
{"current_steps": 360, "total_steps": 4810, "loss": 0.3012, "lr": 3.7318087318087322e-06, "epoch": 0.37422037422037424, "percentage": 7.48, "elapsed_time": "0:02:06", "remaining_time": "0:25:59", "throughput": 1153.46, "total_tokens": 145536}
|
|
{"current_steps": 365, "total_steps": 4810, "loss": 0.2293, "lr": 3.7837837837837844e-06, "epoch": 0.3794178794178794, "percentage": 7.59, "elapsed_time": "0:02:06", "remaining_time": "0:25:40", "throughput": 1165.73, "total_tokens": 147456}
|
|
{"current_steps": 370, "total_steps": 4810, "loss": 0.1782, "lr": 3.835758835758836e-06, "epoch": 0.38461538461538464, "percentage": 7.69, "elapsed_time": "0:02:06", "remaining_time": "0:25:21", "throughput": 1178.44, "total_tokens": 149440}
|
|
{"current_steps": 375, "total_steps": 4810, "loss": 0.3696, "lr": 3.887733887733889e-06, "epoch": 0.3898128898128898, "percentage": 7.8, "elapsed_time": "0:02:07", "remaining_time": "0:25:03", "throughput": 1190.6, "total_tokens": 151360}
|
|
{"current_steps": 380, "total_steps": 4810, "loss": 0.309, "lr": 3.9397089397089396e-06, "epoch": 0.39501039501039503, "percentage": 7.9, "elapsed_time": "0:02:07", "remaining_time": "0:24:45", "throughput": 1203.19, "total_tokens": 153344}
|
|
{"current_steps": 385, "total_steps": 4810, "loss": 0.2413, "lr": 3.991683991683992e-06, "epoch": 0.4002079002079002, "percentage": 8.0, "elapsed_time": "0:02:07", "remaining_time": "0:24:28", "throughput": 1215.23, "total_tokens": 155264}
|
|
{"current_steps": 390, "total_steps": 4810, "loss": 0.3064, "lr": 4.043659043659044e-06, "epoch": 0.40540540540540543, "percentage": 8.11, "elapsed_time": "0:02:08", "remaining_time": "0:24:11", "throughput": 1227.69, "total_tokens": 157248}
|
|
{"current_steps": 395, "total_steps": 4810, "loss": 0.2798, "lr": 4.095634095634096e-06, "epoch": 0.4106029106029106, "percentage": 8.21, "elapsed_time": "0:02:08", "remaining_time": "0:23:55", "throughput": 1240.58, "total_tokens": 159296}
|
|
{"current_steps": 400, "total_steps": 4810, "loss": 0.3488, "lr": 4.147609147609148e-06, "epoch": 0.4158004158004158, "percentage": 8.32, "elapsed_time": "0:02:08", "remaining_time": "0:23:39", "throughput": 1253.41, "total_tokens": 161344}
|
|
{"current_steps": 405, "total_steps": 4810, "loss": 0.2072, "lr": 4.1995841995842e-06, "epoch": 0.420997920997921, "percentage": 8.42, "elapsed_time": "0:02:09", "remaining_time": "0:23:23", "throughput": 1265.68, "total_tokens": 163328}
|
|
{"current_steps": 410, "total_steps": 4810, "loss": 0.1704, "lr": 4.2515592515592516e-06, "epoch": 0.4261954261954262, "percentage": 8.52, "elapsed_time": "0:02:09", "remaining_time": "0:23:08", "throughput": 1277.89, "total_tokens": 165312}
|
|
{"current_steps": 415, "total_steps": 4810, "loss": 0.0573, "lr": 4.303534303534304e-06, "epoch": 0.4313929313929314, "percentage": 8.63, "elapsed_time": "0:02:09", "remaining_time": "0:22:53", "throughput": 1290.54, "total_tokens": 167360}
|
|
{"current_steps": 420, "total_steps": 4810, "loss": 0.9576, "lr": 4.355509355509356e-06, "epoch": 0.4365904365904366, "percentage": 8.73, "elapsed_time": "0:02:10", "remaining_time": "0:22:38", "throughput": 1302.64, "total_tokens": 169344}
|
|
{"current_steps": 425, "total_steps": 4810, "loss": 0.3222, "lr": 4.4074844074844084e-06, "epoch": 0.4417879417879418, "percentage": 8.84, "elapsed_time": "0:02:10", "remaining_time": "0:22:24", "throughput": 1315.63, "total_tokens": 171456}
|
|
{"current_steps": 430, "total_steps": 4810, "loss": 0.3442, "lr": 4.45945945945946e-06, "epoch": 0.446985446985447, "percentage": 8.94, "elapsed_time": "0:02:10", "remaining_time": "0:22:10", "throughput": 1328.57, "total_tokens": 173568}
|
|
{"current_steps": 435, "total_steps": 4810, "loss": 0.1851, "lr": 4.511434511434512e-06, "epoch": 0.4521829521829522, "percentage": 9.04, "elapsed_time": "0:02:10", "remaining_time": "0:21:57", "throughput": 1340.46, "total_tokens": 175552}
|
|
{"current_steps": 440, "total_steps": 4810, "loss": 0.2573, "lr": 4.563409563409564e-06, "epoch": 0.4573804573804574, "percentage": 9.15, "elapsed_time": "0:02:11", "remaining_time": "0:21:43", "throughput": 1352.29, "total_tokens": 177536}
|
|
{"current_steps": 445, "total_steps": 4810, "loss": 0.2972, "lr": 4.615384615384616e-06, "epoch": 0.4625779625779626, "percentage": 9.25, "elapsed_time": "0:02:11", "remaining_time": "0:21:30", "throughput": 1364.54, "total_tokens": 179584}
|
|
{"current_steps": 450, "total_steps": 4810, "loss": 0.2247, "lr": 4.667359667359668e-06, "epoch": 0.4677754677754678, "percentage": 9.36, "elapsed_time": "0:02:11", "remaining_time": "0:21:18", "throughput": 1376.29, "total_tokens": 181568}
|
|
{"current_steps": 455, "total_steps": 4810, "loss": 0.2355, "lr": 4.71933471933472e-06, "epoch": 0.47297297297297297, "percentage": 9.46, "elapsed_time": "0:02:12", "remaining_time": "0:21:05", "throughput": 1387.97, "total_tokens": 183552}
|
|
{"current_steps": 460, "total_steps": 4810, "loss": 0.1821, "lr": 4.771309771309771e-06, "epoch": 0.4781704781704782, "percentage": 9.56, "elapsed_time": "0:02:12", "remaining_time": "0:20:53", "throughput": 1400.06, "total_tokens": 185600}
|
|
{"current_steps": 465, "total_steps": 4810, "loss": 0.1938, "lr": 4.823284823284824e-06, "epoch": 0.48336798336798337, "percentage": 9.67, "elapsed_time": "0:02:12", "remaining_time": "0:20:41", "throughput": 1411.62, "total_tokens": 187584}
|
|
{"current_steps": 470, "total_steps": 4810, "loss": 0.2747, "lr": 4.875259875259876e-06, "epoch": 0.4885654885654886, "percentage": 9.77, "elapsed_time": "0:02:13", "remaining_time": "0:20:30", "throughput": 1423.13, "total_tokens": 189568}
|
|
{"current_steps": 475, "total_steps": 4810, "loss": 0.2394, "lr": 4.927234927234928e-06, "epoch": 0.49376299376299376, "percentage": 9.88, "elapsed_time": "0:02:13", "remaining_time": "0:20:18", "throughput": 1435.53, "total_tokens": 191680}
|
|
{"current_steps": 480, "total_steps": 4810, "loss": 0.2402, "lr": 4.97920997920998e-06, "epoch": 0.498960498960499, "percentage": 9.98, "elapsed_time": "0:02:13", "remaining_time": "0:20:07", "throughput": 1447.4, "total_tokens": 193728}
|
|
{"current_steps": 482, "total_steps": 4810, "eval_loss": 0.20022711157798767, "epoch": 0.501039501039501, "percentage": 10.02, "elapsed_time": "0:02:14", "remaining_time": "0:20:12", "throughput": 1441.25, "total_tokens": 194560}
|
|
{"current_steps": 485, "total_steps": 4810, "loss": 0.1873, "lr": 4.999994075155936e-06, "epoch": 0.5041580041580042, "percentage": 10.08, "elapsed_time": "0:03:02", "remaining_time": "0:27:06", "throughput": 1073.4, "total_tokens": 195776}
|
|
{"current_steps": 490, "total_steps": 4810, "loss": 0.1905, "lr": 4.999957867877242e-06, "epoch": 0.5093555093555093, "percentage": 10.19, "elapsed_time": "0:03:02", "remaining_time": "0:26:50", "throughput": 1082.04, "total_tokens": 197696}
|
|
{"current_steps": 495, "total_steps": 4810, "loss": 0.1952, "lr": 4.999888745376028e-06, "epoch": 0.5145530145530145, "percentage": 10.29, "elapsed_time": "0:03:03", "remaining_time": "0:26:35", "throughput": 1091.0, "total_tokens": 199680}
|
|
{"current_steps": 500, "total_steps": 4810, "loss": 0.2149, "lr": 4.999786708562382e-06, "epoch": 0.5197505197505198, "percentage": 10.4, "elapsed_time": "0:03:03", "remaining_time": "0:26:20", "throughput": 1100.61, "total_tokens": 201792}
|
|
{"current_steps": 505, "total_steps": 4810, "loss": 0.2066, "lr": 4.999651758779753e-06, "epoch": 0.524948024948025, "percentage": 10.5, "elapsed_time": "0:03:03", "remaining_time": "0:26:05", "throughput": 1109.81, "total_tokens": 203840}
|
|
{"current_steps": 510, "total_steps": 4810, "loss": 0.2161, "lr": 4.999483897804933e-06, "epoch": 0.5301455301455301, "percentage": 10.6, "elapsed_time": "0:03:03", "remaining_time": "0:25:51", "throughput": 1118.63, "total_tokens": 205824}
|
|
{"current_steps": 515, "total_steps": 4810, "loss": 0.1777, "lr": 4.999283127848029e-06, "epoch": 0.5353430353430353, "percentage": 10.71, "elapsed_time": "0:03:04", "remaining_time": "0:25:37", "throughput": 1128.13, "total_tokens": 207936}
|
|
{"current_steps": 520, "total_steps": 4810, "loss": 0.1931, "lr": 4.999049451552443e-06, "epoch": 0.5405405405405406, "percentage": 10.81, "elapsed_time": "0:03:04", "remaining_time": "0:25:23", "throughput": 1137.24, "total_tokens": 209984}
|
|
{"current_steps": 525, "total_steps": 4810, "loss": 0.3235, "lr": 4.998782871994828e-06, "epoch": 0.5457380457380457, "percentage": 10.91, "elapsed_time": "0:03:04", "remaining_time": "0:25:09", "throughput": 1146.67, "total_tokens": 212096}
|
|
{"current_steps": 530, "total_steps": 4810, "loss": 0.2083, "lr": 4.998483392685055e-06, "epoch": 0.5509355509355509, "percentage": 11.02, "elapsed_time": "0:03:05", "remaining_time": "0:24:56", "throughput": 1155.38, "total_tokens": 214080}
|
|
{"current_steps": 535, "total_steps": 4810, "loss": 0.2592, "lr": 4.9981510175661606e-06, "epoch": 0.5561330561330561, "percentage": 11.12, "elapsed_time": "0:03:05", "remaining_time": "0:24:43", "throughput": 1164.4, "total_tokens": 216128}
|
|
{"current_steps": 540, "total_steps": 4810, "loss": 0.2199, "lr": 4.9977857510143e-06, "epoch": 0.5613305613305614, "percentage": 11.23, "elapsed_time": "0:03:05", "remaining_time": "0:24:30", "throughput": 1173.38, "total_tokens": 218176}
|
|
{"current_steps": 545, "total_steps": 4810, "loss": 0.1414, "lr": 4.997387597838684e-06, "epoch": 0.5665280665280665, "percentage": 11.33, "elapsed_time": "0:03:06", "remaining_time": "0:24:17", "throughput": 1181.66, "total_tokens": 220096}
|
|
{"current_steps": 550, "total_steps": 4810, "loss": 0.1874, "lr": 4.996956563281524e-06, "epoch": 0.5717255717255717, "percentage": 11.43, "elapsed_time": "0:03:06", "remaining_time": "0:24:05", "throughput": 1190.24, "total_tokens": 222080}
|
|
{"current_steps": 555, "total_steps": 4810, "loss": 0.2643, "lr": 4.996492653017953e-06, "epoch": 0.5769230769230769, "percentage": 11.54, "elapsed_time": "0:03:06", "remaining_time": "0:23:52", "throughput": 1198.47, "total_tokens": 224000}
|
|
{"current_steps": 560, "total_steps": 4810, "loss": 0.2975, "lr": 4.995995873155958e-06, "epoch": 0.5821205821205822, "percentage": 11.64, "elapsed_time": "0:03:07", "remaining_time": "0:23:40", "throughput": 1207.01, "total_tokens": 225984}
|
|
{"current_steps": 565, "total_steps": 4810, "loss": 0.1955, "lr": 4.995466230236298e-06, "epoch": 0.5873180873180873, "percentage": 11.75, "elapsed_time": "0:03:07", "remaining_time": "0:23:29", "throughput": 1214.83, "total_tokens": 227840}
|
|
{"current_steps": 570, "total_steps": 4810, "loss": 0.2476, "lr": 4.994903731232415e-06, "epoch": 0.5925155925155925, "percentage": 11.85, "elapsed_time": "0:03:07", "remaining_time": "0:23:17", "throughput": 1223.3, "total_tokens": 229824}
|
|
{"current_steps": 575, "total_steps": 4810, "loss": 0.213, "lr": 4.994308383550347e-06, "epoch": 0.5977130977130977, "percentage": 11.95, "elapsed_time": "0:03:08", "remaining_time": "0:23:06", "throughput": 1232.09, "total_tokens": 231872}
|
|
{"current_steps": 580, "total_steps": 4810, "loss": 0.2039, "lr": 4.993680195028626e-06, "epoch": 0.6029106029106029, "percentage": 12.06, "elapsed_time": "0:03:08", "remaining_time": "0:22:54", "throughput": 1240.84, "total_tokens": 233920}
|
|
{"current_steps": 585, "total_steps": 4810, "loss": 0.2036, "lr": 4.993019173938178e-06, "epoch": 0.6081081081081081, "percentage": 12.16, "elapsed_time": "0:03:08", "remaining_time": "0:22:43", "throughput": 1248.89, "total_tokens": 235840}
|
|
{"current_steps": 590, "total_steps": 4810, "loss": 0.2111, "lr": 4.992325328982212e-06, "epoch": 0.6133056133056133, "percentage": 12.27, "elapsed_time": "0:03:09", "remaining_time": "0:22:33", "throughput": 1258.25, "total_tokens": 238016}
|
|
{"current_steps": 595, "total_steps": 4810, "loss": 0.1706, "lr": 4.991598669296105e-06, "epoch": 0.6185031185031185, "percentage": 12.37, "elapsed_time": "0:03:09", "remaining_time": "0:22:22", "throughput": 1266.9, "total_tokens": 240064}
|
|
{"current_steps": 600, "total_steps": 4810, "loss": 0.2236, "lr": 4.990839204447287e-06, "epoch": 0.6237006237006237, "percentage": 12.47, "elapsed_time": "0:03:09", "remaining_time": "0:22:11", "throughput": 1275.19, "total_tokens": 242048}
|
|
{"current_steps": 605, "total_steps": 4810, "loss": 0.1908, "lr": 4.990046944435105e-06, "epoch": 0.6288981288981289, "percentage": 12.58, "elapsed_time": "0:03:10", "remaining_time": "0:22:01", "throughput": 1283.14, "total_tokens": 243968}
|
|
{"current_steps": 610, "total_steps": 4810, "loss": 0.2409, "lr": 4.989221899690704e-06, "epoch": 0.6340956340956341, "percentage": 12.68, "elapsed_time": "0:03:10", "remaining_time": "0:21:51", "throughput": 1291.71, "total_tokens": 246016}
|
|
{"current_steps": 615, "total_steps": 4810, "loss": 0.2135, "lr": 4.988364081076877e-06, "epoch": 0.6392931392931392, "percentage": 12.79, "elapsed_time": "0:03:10", "remaining_time": "0:21:41", "throughput": 1299.93, "total_tokens": 248000}
|
|
{"current_steps": 620, "total_steps": 4810, "loss": 0.203, "lr": 4.987473499887932e-06, "epoch": 0.6444906444906445, "percentage": 12.89, "elapsed_time": "0:03:11", "remaining_time": "0:21:31", "throughput": 1308.44, "total_tokens": 250048}
|
|
{"current_steps": 625, "total_steps": 4810, "loss": 0.1867, "lr": 4.986550167849538e-06, "epoch": 0.6496881496881497, "percentage": 12.99, "elapsed_time": "0:03:11", "remaining_time": "0:21:21", "throughput": 1316.93, "total_tokens": 252096}
|
|
{"current_steps": 630, "total_steps": 4810, "loss": 0.1162, "lr": 4.9855940971185705e-06, "epoch": 0.6548856548856549, "percentage": 13.1, "elapsed_time": "0:03:11", "remaining_time": "0:21:12", "throughput": 1325.39, "total_tokens": 254144}
|
|
{"current_steps": 635, "total_steps": 4810, "loss": 0.2562, "lr": 4.984605300282955e-06, "epoch": 0.66008316008316, "percentage": 13.2, "elapsed_time": "0:03:12", "remaining_time": "0:21:02", "throughput": 1333.5, "total_tokens": 256128}
|
|
{"current_steps": 640, "total_steps": 4810, "loss": 0.1389, "lr": 4.983583790361497e-06, "epoch": 0.6652806652806653, "percentage": 13.31, "elapsed_time": "0:03:12", "remaining_time": "0:20:53", "throughput": 1341.24, "total_tokens": 258048}
|
|
{"current_steps": 645, "total_steps": 4810, "loss": 0.3054, "lr": 4.982529580803714e-06, "epoch": 0.6704781704781705, "percentage": 13.41, "elapsed_time": "0:03:12", "remaining_time": "0:20:44", "throughput": 1350.93, "total_tokens": 260352}
|
|
{"current_steps": 650, "total_steps": 4810, "loss": 0.2884, "lr": 4.981442685489659e-06, "epoch": 0.6756756756756757, "percentage": 13.51, "elapsed_time": "0:03:13", "remaining_time": "0:20:35", "throughput": 1358.61, "total_tokens": 262272}
|
|
{"current_steps": 655, "total_steps": 4810, "loss": 0.1599, "lr": 4.9803231187297305e-06, "epoch": 0.6808731808731808, "percentage": 13.62, "elapsed_time": "0:03:13", "remaining_time": "0:20:26", "throughput": 1366.93, "total_tokens": 264320}
|
|
{"current_steps": 660, "total_steps": 4810, "loss": 0.1946, "lr": 4.979170895264494e-06, "epoch": 0.6860706860706861, "percentage": 13.72, "elapsed_time": "0:03:13", "remaining_time": "0:20:17", "throughput": 1374.57, "total_tokens": 266240}
|
|
{"current_steps": 665, "total_steps": 4810, "loss": 0.2128, "lr": 4.977986030264483e-06, "epoch": 0.6912681912681913, "percentage": 13.83, "elapsed_time": "0:03:14", "remaining_time": "0:20:09", "throughput": 1382.51, "total_tokens": 268224}
|
|
{"current_steps": 670, "total_steps": 4810, "loss": 0.2326, "lr": 4.9767685393299946e-06, "epoch": 0.6964656964656964, "percentage": 13.93, "elapsed_time": "0:03:14", "remaining_time": "0:20:00", "throughput": 1390.75, "total_tokens": 270272}
|
|
{"current_steps": 675, "total_steps": 4810, "loss": 0.2276, "lr": 4.975518438490897e-06, "epoch": 0.7016632016632016, "percentage": 14.03, "elapsed_time": "0:03:14", "remaining_time": "0:19:52", "throughput": 1398.64, "total_tokens": 272256}
|
|
{"current_steps": 680, "total_steps": 4810, "loss": 0.1786, "lr": 4.974235744206405e-06, "epoch": 0.7068607068607069, "percentage": 14.14, "elapsed_time": "0:03:14", "remaining_time": "0:19:44", "throughput": 1406.5, "total_tokens": 274240}
|
|
{"current_steps": 685, "total_steps": 4810, "loss": 0.1923, "lr": 4.972920473364869e-06, "epoch": 0.7120582120582121, "percentage": 14.24, "elapsed_time": "0:03:15", "remaining_time": "0:19:36", "throughput": 1414.64, "total_tokens": 276288}
|
|
{"current_steps": 690, "total_steps": 4810, "loss": 0.1661, "lr": 4.971572643283557e-06, "epoch": 0.7172557172557172, "percentage": 14.35, "elapsed_time": "0:03:15", "remaining_time": "0:19:28", "throughput": 1422.45, "total_tokens": 278272}
|
|
{"current_steps": 695, "total_steps": 4810, "loss": 0.1867, "lr": 4.970192271708416e-06, "epoch": 0.7224532224532224, "percentage": 14.45, "elapsed_time": "0:03:15", "remaining_time": "0:19:20", "throughput": 1430.88, "total_tokens": 280384}
|
|
{"current_steps": 700, "total_steps": 4810, "loss": 0.3333, "lr": 4.968779376813849e-06, "epoch": 0.7276507276507277, "percentage": 14.55, "elapsed_time": "0:03:16", "remaining_time": "0:19:12", "throughput": 1438.63, "total_tokens": 282368}
|
|
{"current_steps": 705, "total_steps": 4810, "loss": 0.1327, "lr": 4.967333977202469e-06, "epoch": 0.7328482328482329, "percentage": 14.66, "elapsed_time": "0:03:16", "remaining_time": "0:19:04", "throughput": 1446.68, "total_tokens": 284416}
|
|
{"current_steps": 710, "total_steps": 4810, "loss": 0.2235, "lr": 4.965856091904855e-06, "epoch": 0.738045738045738, "percentage": 14.76, "elapsed_time": "0:03:16", "remaining_time": "0:18:57", "throughput": 1454.7, "total_tokens": 286464}
|
|
{"current_steps": 715, "total_steps": 4810, "loss": 0.3413, "lr": 4.964345740379307e-06, "epoch": 0.7432432432432432, "percentage": 14.86, "elapsed_time": "0:03:17", "remaining_time": "0:18:49", "throughput": 1462.39, "total_tokens": 288448}
|
|
{"current_steps": 720, "total_steps": 4810, "loss": 0.1906, "lr": 4.962802942511582e-06, "epoch": 0.7484407484407485, "percentage": 14.97, "elapsed_time": "0:03:17", "remaining_time": "0:18:42", "throughput": 1470.35, "total_tokens": 290496}
|
|
{"current_steps": 723, "total_steps": 4810, "eval_loss": 0.20943090319633484, "epoch": 0.7515592515592515, "percentage": 15.03, "elapsed_time": "0:03:18", "remaining_time": "0:18:43", "throughput": 1467.63, "total_tokens": 291712}
|
|
{"current_steps": 725, "total_steps": 4810, "loss": 0.2576, "lr": 4.961227718614634e-06, "epoch": 0.7536382536382537, "percentage": 15.07, "elapsed_time": "0:04:03", "remaining_time": "0:22:52", "throughput": 1200.99, "total_tokens": 292480}
|
|
{"current_steps": 730, "total_steps": 4810, "loss": 0.2352, "lr": 4.959620089428354e-06, "epoch": 0.7588357588357588, "percentage": 15.18, "elapsed_time": "0:04:03", "remaining_time": "0:22:42", "throughput": 1207.48, "total_tokens": 294464}
|
|
{"current_steps": 735, "total_steps": 4810, "loss": 0.2617, "lr": 4.957980076119285e-06, "epoch": 0.764033264033264, "percentage": 15.28, "elapsed_time": "0:04:04", "remaining_time": "0:22:33", "throughput": 1213.96, "total_tokens": 296448}
|
|
{"current_steps": 740, "total_steps": 4810, "loss": 0.2079, "lr": 4.956307700280354e-06, "epoch": 0.7692307692307693, "percentage": 15.38, "elapsed_time": "0:04:04", "remaining_time": "0:22:24", "throughput": 1220.42, "total_tokens": 298432}
|
|
{"current_steps": 745, "total_steps": 4810, "loss": 0.2712, "lr": 4.954602983930581e-06, "epoch": 0.7744282744282744, "percentage": 15.49, "elapsed_time": "0:04:04", "remaining_time": "0:22:16", "throughput": 1227.12, "total_tokens": 300480}
|
|
{"current_steps": 750, "total_steps": 4810, "loss": 0.2211, "lr": 4.95286594951479e-06, "epoch": 0.7796257796257796, "percentage": 15.59, "elapsed_time": "0:04:05", "remaining_time": "0:22:07", "throughput": 1233.29, "total_tokens": 302400}
|
|
{"current_steps": 755, "total_steps": 4810, "loss": 0.2161, "lr": 4.951096619903317e-06, "epoch": 0.7848232848232848, "percentage": 15.7, "elapsed_time": "0:04:05", "remaining_time": "0:21:58", "throughput": 1239.43, "total_tokens": 304320}
|
|
{"current_steps": 760, "total_steps": 4810, "loss": 0.1828, "lr": 4.949295018391707e-06, "epoch": 0.7900207900207901, "percentage": 15.8, "elapsed_time": "0:04:05", "remaining_time": "0:21:50", "throughput": 1245.58, "total_tokens": 306240}
|
|
{"current_steps": 765, "total_steps": 4810, "loss": 0.2155, "lr": 4.9474611687004025e-06, "epoch": 0.7952182952182952, "percentage": 15.9, "elapsed_time": "0:04:06", "remaining_time": "0:21:41", "throughput": 1251.17, "total_tokens": 308032}
|
|
{"current_steps": 770, "total_steps": 4810, "loss": 0.2009, "lr": 4.945595094974442e-06, "epoch": 0.8004158004158004, "percentage": 16.01, "elapsed_time": "0:04:06", "remaining_time": "0:21:33", "throughput": 1257.29, "total_tokens": 309952}
|
|
{"current_steps": 775, "total_steps": 4810, "loss": 0.1813, "lr": 4.94369682178313e-06, "epoch": 0.8056133056133056, "percentage": 16.11, "elapsed_time": "0:04:06", "remaining_time": "0:21:25", "throughput": 1263.69, "total_tokens": 311936}
|
|
{"current_steps": 780, "total_steps": 4810, "loss": 0.1603, "lr": 4.941766374119724e-06, "epoch": 0.8108108108108109, "percentage": 16.22, "elapsed_time": "0:04:07", "remaining_time": "0:21:17", "throughput": 1270.07, "total_tokens": 313920}
|
|
{"current_steps": 785, "total_steps": 4810, "loss": 0.2613, "lr": 4.939803777401096e-06, "epoch": 0.816008316008316, "percentage": 16.32, "elapsed_time": "0:04:07", "remaining_time": "0:21:08", "throughput": 1276.7, "total_tokens": 315968}
|
|
{"current_steps": 790, "total_steps": 4810, "loss": 0.2641, "lr": 4.937809057467404e-06, "epoch": 0.8212058212058212, "percentage": 16.42, "elapsed_time": "0:04:07", "remaining_time": "0:21:01", "throughput": 1283.05, "total_tokens": 317952}
|
|
{"current_steps": 795, "total_steps": 4810, "loss": 0.1934, "lr": 4.935782240581753e-06, "epoch": 0.8264033264033264, "percentage": 16.53, "elapsed_time": "0:04:08", "remaining_time": "0:20:53", "throughput": 1289.13, "total_tokens": 319872}
|
|
{"current_steps": 800, "total_steps": 4810, "loss": 0.2498, "lr": 4.933723353429842e-06, "epoch": 0.8316008316008316, "percentage": 16.63, "elapsed_time": "0:04:08", "remaining_time": "0:20:45", "throughput": 1295.44, "total_tokens": 321856}
|
|
{"current_steps": 805, "total_steps": 4810, "loss": 0.1671, "lr": 4.931632423119621e-06, "epoch": 0.8367983367983368, "percentage": 16.74, "elapsed_time": "0:04:08", "remaining_time": "0:20:37", "throughput": 1302.26, "total_tokens": 323968}
|
|
{"current_steps": 810, "total_steps": 4810, "loss": 0.2092, "lr": 4.929509477180929e-06, "epoch": 0.841995841995842, "percentage": 16.84, "elapsed_time": "0:04:09", "remaining_time": "0:20:30", "throughput": 1308.54, "total_tokens": 325952}
|
|
{"current_steps": 815, "total_steps": 4810, "loss": 0.0581, "lr": 4.927354543565131e-06, "epoch": 0.8471933471933472, "percentage": 16.94, "elapsed_time": "0:04:09", "remaining_time": "0:20:22", "throughput": 1315.07, "total_tokens": 328000}
|
|
{"current_steps": 820, "total_steps": 4810, "loss": 0.1592, "lr": 4.925167650644752e-06, "epoch": 0.8523908523908524, "percentage": 17.05, "elapsed_time": "0:04:09", "remaining_time": "0:20:15", "throughput": 1321.33, "total_tokens": 329984}
|
|
{"current_steps": 825, "total_steps": 4810, "loss": 0.4462, "lr": 4.922948827213107e-06, "epoch": 0.8575883575883576, "percentage": 17.15, "elapsed_time": "0:04:10", "remaining_time": "0:20:07", "throughput": 1327.32, "total_tokens": 331904}
|
|
{"current_steps": 830, "total_steps": 4810, "loss": 0.4518, "lr": 4.920698102483913e-06, "epoch": 0.8627858627858628, "percentage": 17.26, "elapsed_time": "0:04:10", "remaining_time": "0:20:00", "throughput": 1333.55, "total_tokens": 333888}
|
|
{"current_steps": 835, "total_steps": 4810, "loss": 0.2671, "lr": 4.9184155060909115e-06, "epoch": 0.867983367983368, "percentage": 17.36, "elapsed_time": "0:04:10", "remaining_time": "0:19:53", "throughput": 1339.76, "total_tokens": 335872}
|
|
{"current_steps": 840, "total_steps": 4810, "loss": 0.3681, "lr": 4.916101068087477e-06, "epoch": 0.8731808731808732, "percentage": 17.46, "elapsed_time": "0:04:11", "remaining_time": "0:19:46", "throughput": 1345.96, "total_tokens": 337856}
|
|
{"current_steps": 845, "total_steps": 4810, "loss": 0.2011, "lr": 4.9137548189462185e-06, "epoch": 0.8783783783783784, "percentage": 17.57, "elapsed_time": "0:04:11", "remaining_time": "0:19:39", "throughput": 1351.89, "total_tokens": 339776}
|
|
{"current_steps": 850, "total_steps": 4810, "loss": 0.1852, "lr": 4.911376789558584e-06, "epoch": 0.8835758835758836, "percentage": 17.67, "elapsed_time": "0:04:11", "remaining_time": "0:19:32", "throughput": 1358.05, "total_tokens": 341760}
|
|
{"current_steps": 855, "total_steps": 4810, "loss": 0.3553, "lr": 4.908967011234446e-06, "epoch": 0.8887733887733887, "percentage": 17.78, "elapsed_time": "0:04:11", "remaining_time": "0:19:25", "throughput": 1363.96, "total_tokens": 343680}
|
|
{"current_steps": 860, "total_steps": 4810, "loss": 0.2092, "lr": 4.9065255157016955e-06, "epoch": 0.893970893970894, "percentage": 17.88, "elapsed_time": "0:04:12", "remaining_time": "0:19:18", "throughput": 1369.84, "total_tokens": 345600}
|
|
{"current_steps": 865, "total_steps": 4810, "loss": 0.2165, "lr": 4.904052335105822e-06, "epoch": 0.8991683991683992, "percentage": 17.98, "elapsed_time": "0:04:12", "remaining_time": "0:19:12", "throughput": 1375.72, "total_tokens": 347520}
|
|
{"current_steps": 870, "total_steps": 4810, "loss": 0.1773, "lr": 4.90154750200949e-06, "epoch": 0.9043659043659044, "percentage": 18.09, "elapsed_time": "0:04:12", "remaining_time": "0:19:05", "throughput": 1382.07, "total_tokens": 349568}
|
|
{"current_steps": 875, "total_steps": 4810, "loss": 0.1146, "lr": 4.899011049392111e-06, "epoch": 0.9095634095634095, "percentage": 18.19, "elapsed_time": "0:04:13", "remaining_time": "0:18:58", "throughput": 1388.16, "total_tokens": 351552}
|
|
{"current_steps": 880, "total_steps": 4810, "loss": 0.1213, "lr": 4.896443010649408e-06, "epoch": 0.9147609147609148, "percentage": 18.3, "elapsed_time": "0:04:13", "remaining_time": "0:18:52", "throughput": 1393.98, "total_tokens": 353472}
|
|
{"current_steps": 885, "total_steps": 4810, "loss": 0.123, "lr": 4.893843419592977e-06, "epoch": 0.91995841995842, "percentage": 18.4, "elapsed_time": "0:04:13", "remaining_time": "0:18:46", "throughput": 1399.78, "total_tokens": 355392}
|
|
{"current_steps": 890, "total_steps": 4810, "loss": 0.1794, "lr": 4.891212310449845e-06, "epoch": 0.9251559251559252, "percentage": 18.5, "elapsed_time": "0:04:14", "remaining_time": "0:18:39", "throughput": 1406.08, "total_tokens": 357440}
|
|
{"current_steps": 895, "total_steps": 4810, "loss": 0.1822, "lr": 4.88854971786201e-06, "epoch": 0.9303534303534303, "percentage": 18.61, "elapsed_time": "0:04:14", "remaining_time": "0:18:33", "throughput": 1412.35, "total_tokens": 359488}
|
|
{"current_steps": 900, "total_steps": 4810, "loss": 0.282, "lr": 4.885855676885995e-06, "epoch": 0.9355509355509356, "percentage": 18.71, "elapsed_time": "0:04:14", "remaining_time": "0:18:27", "throughput": 1418.11, "total_tokens": 361408}
|
|
{"current_steps": 905, "total_steps": 4810, "loss": 0.1931, "lr": 4.88313022299238e-06, "epoch": 0.9407484407484408, "percentage": 18.81, "elapsed_time": "0:04:15", "remaining_time": "0:18:21", "throughput": 1424.11, "total_tokens": 363392}
|
|
{"current_steps": 910, "total_steps": 4810, "loss": 0.318, "lr": 4.880373392065339e-06, "epoch": 0.9459459459459459, "percentage": 18.92, "elapsed_time": "0:04:15", "remaining_time": "0:18:14", "throughput": 1430.35, "total_tokens": 365440}
|
|
{"current_steps": 915, "total_steps": 4810, "loss": 0.1793, "lr": 4.877585220402167e-06, "epoch": 0.9511434511434511, "percentage": 19.02, "elapsed_time": "0:04:15", "remaining_time": "0:18:08", "throughput": 1437.05, "total_tokens": 367616}
|
|
{"current_steps": 920, "total_steps": 4810, "loss": 0.1164, "lr": 4.874765744712796e-06, "epoch": 0.9563409563409564, "percentage": 19.13, "elapsed_time": "0:04:16", "remaining_time": "0:18:02", "throughput": 1443.0, "total_tokens": 369600}
|
|
{"current_steps": 925, "total_steps": 4810, "loss": 0.2515, "lr": 4.8719150021193206e-06, "epoch": 0.9615384615384616, "percentage": 19.23, "elapsed_time": "0:04:16", "remaining_time": "0:17:57", "throughput": 1448.7, "total_tokens": 371520}
|
|
{"current_steps": 930, "total_steps": 4810, "loss": 0.3492, "lr": 4.869033030155504e-06, "epoch": 0.9667359667359667, "percentage": 19.33, "elapsed_time": "0:04:16", "remaining_time": "0:17:51", "throughput": 1454.86, "total_tokens": 373568}
|
|
{"current_steps": 935, "total_steps": 4810, "loss": 0.1902, "lr": 4.866119866766286e-06, "epoch": 0.9719334719334719, "percentage": 19.44, "elapsed_time": "0:04:17", "remaining_time": "0:17:45", "throughput": 1460.53, "total_tokens": 375488}
|
|
{"current_steps": 940, "total_steps": 4810, "loss": 0.2238, "lr": 4.86317555030728e-06, "epoch": 0.9771309771309772, "percentage": 19.54, "elapsed_time": "0:04:17", "remaining_time": "0:17:39", "throughput": 1467.39, "total_tokens": 377728}
|
|
{"current_steps": 945, "total_steps": 4810, "loss": 0.11, "lr": 4.860200119544273e-06, "epoch": 0.9823284823284824, "percentage": 19.65, "elapsed_time": "0:04:17", "remaining_time": "0:17:34", "throughput": 1473.75, "total_tokens": 379840}
|
|
{"current_steps": 950, "total_steps": 4810, "loss": 0.2154, "lr": 4.857193613652711e-06, "epoch": 0.9875259875259875, "percentage": 19.75, "elapsed_time": "0:04:18", "remaining_time": "0:17:28", "throughput": 1479.37, "total_tokens": 381760}
|
|
{"current_steps": 955, "total_steps": 4810, "loss": 0.1666, "lr": 4.854156072217185e-06, "epoch": 0.9927234927234927, "percentage": 19.85, "elapsed_time": "0:04:18", "remaining_time": "0:17:22", "throughput": 1485.47, "total_tokens": 383808}
|
|
{"current_steps": 960, "total_steps": 4810, "loss": 0.2397, "lr": 4.851087535230911e-06, "epoch": 0.997920997920998, "percentage": 19.96, "elapsed_time": "0:04:18", "remaining_time": "0:17:17", "throughput": 1491.56, "total_tokens": 385856}
|
|
{"current_steps": 964, "total_steps": 4810, "eval_loss": 0.17627178132534027, "epoch": 1.002079002079002, "percentage": 20.04, "elapsed_time": "0:04:20", "remaining_time": "0:17:17", "throughput": 1490.01, "total_tokens": 387464}
|
|
{"current_steps": 965, "total_steps": 4810, "loss": 0.176, "lr": 4.8479880430952e-06, "epoch": 1.003118503118503, "percentage": 20.06, "elapsed_time": "0:05:53", "remaining_time": "0:23:28", "throughput": 1096.8, "total_tokens": 387848}
|
|
{"current_steps": 970, "total_steps": 4810, "loss": 0.0833, "lr": 4.844857636618928e-06, "epoch": 1.0083160083160083, "percentage": 20.17, "elapsed_time": "0:05:53", "remaining_time": "0:23:21", "throughput": 1100.85, "total_tokens": 389640}
|
|
{"current_steps": 975, "total_steps": 4810, "loss": 0.1134, "lr": 4.841696357018003e-06, "epoch": 1.0135135135135136, "percentage": 20.27, "elapsed_time": "0:05:54", "remaining_time": "0:23:13", "throughput": 1105.44, "total_tokens": 391624}
|
|
{"current_steps": 980, "total_steps": 4810, "loss": 0.0776, "lr": 4.838504245914812e-06, "epoch": 1.0187110187110187, "percentage": 20.37, "elapsed_time": "0:05:54", "remaining_time": "0:23:05", "throughput": 1110.2, "total_tokens": 393672}
|
|
{"current_steps": 985, "total_steps": 4810, "loss": 0.0266, "lr": 4.835281345337684e-06, "epoch": 1.023908523908524, "percentage": 20.48, "elapsed_time": "0:05:54", "remaining_time": "0:22:58", "throughput": 1115.14, "total_tokens": 395784}
|
|
{"current_steps": 990, "total_steps": 4810, "loss": 0.2075, "lr": 4.832027697720329e-06, "epoch": 1.0291060291060292, "percentage": 20.58, "elapsed_time": "0:05:55", "remaining_time": "0:22:50", "throughput": 1119.7, "total_tokens": 397768}
|
|
{"current_steps": 995, "total_steps": 4810, "loss": 0.4063, "lr": 4.828743345901285e-06, "epoch": 1.0343035343035343, "percentage": 20.69, "elapsed_time": "0:05:55", "remaining_time": "0:22:43", "throughput": 1124.44, "total_tokens": 399816}
|
|
{"current_steps": 1000, "total_steps": 4810, "loss": 0.1017, "lr": 4.825428333123346e-06, "epoch": 1.0395010395010396, "percentage": 20.79, "elapsed_time": "0:05:55", "remaining_time": "0:22:35", "throughput": 1129.35, "total_tokens": 401928}
|
|
{"current_steps": 1005, "total_steps": 4810, "loss": 0.0338, "lr": 4.822082703033003e-06, "epoch": 1.0446985446985446, "percentage": 20.89, "elapsed_time": "0:05:56", "remaining_time": "0:22:28", "throughput": 1133.89, "total_tokens": 403912}
|
|
{"current_steps": 1010, "total_steps": 4810, "loss": 0.1392, "lr": 4.818706499679862e-06, "epoch": 1.04989604989605, "percentage": 21.0, "elapsed_time": "0:05:56", "remaining_time": "0:22:21", "throughput": 1138.24, "total_tokens": 405832}
|
|
{"current_steps": 1015, "total_steps": 4810, "loss": 0.1168, "lr": 4.815299767516065e-06, "epoch": 1.0550935550935552, "percentage": 21.1, "elapsed_time": "0:05:56", "remaining_time": "0:22:14", "throughput": 1142.95, "total_tokens": 407880}
|
|
{"current_steps": 1020, "total_steps": 4810, "loss": 0.1001, "lr": 4.811862551395707e-06, "epoch": 1.0602910602910602, "percentage": 21.21, "elapsed_time": "0:05:57", "remaining_time": "0:22:07", "throughput": 1148.18, "total_tokens": 410120}
|
|
{"current_steps": 1025, "total_steps": 4810, "loss": 0.0944, "lr": 4.808394896574246e-06, "epoch": 1.0654885654885655, "percentage": 21.31, "elapsed_time": "0:05:57", "remaining_time": "0:22:00", "throughput": 1152.87, "total_tokens": 412168}
|
|
{"current_steps": 1030, "total_steps": 4810, "loss": 0.1433, "lr": 4.8048968487079e-06, "epoch": 1.0706860706860706, "percentage": 21.41, "elapsed_time": "0:05:57", "remaining_time": "0:21:53", "throughput": 1158.24, "total_tokens": 414472}
|
|
{"current_steps": 1035, "total_steps": 4810, "loss": 0.3131, "lr": 4.801368453853057e-06, "epoch": 1.0758835758835759, "percentage": 21.52, "elapsed_time": "0:05:58", "remaining_time": "0:21:46", "throughput": 1162.91, "total_tokens": 416520}
|
|
{"current_steps": 1040, "total_steps": 4810, "loss": 0.171, "lr": 4.79780975846566e-06, "epoch": 1.0810810810810811, "percentage": 21.62, "elapsed_time": "0:05:58", "remaining_time": "0:21:39", "throughput": 1167.57, "total_tokens": 418568}
|
|
{"current_steps": 1045, "total_steps": 4810, "loss": 0.1287, "lr": 4.7942208094006e-06, "epoch": 1.0862785862785862, "percentage": 21.73, "elapsed_time": "0:05:58", "remaining_time": "0:21:32", "throughput": 1171.86, "total_tokens": 420488}
|
|
{"current_steps": 1050, "total_steps": 4810, "loss": 0.1098, "lr": 4.790601653911094e-06, "epoch": 1.0914760914760915, "percentage": 21.83, "elapsed_time": "0:05:59", "remaining_time": "0:21:26", "throughput": 1176.33, "total_tokens": 422472}
|
|
{"current_steps": 1055, "total_steps": 4810, "loss": 0.297, "lr": 4.786952339648071e-06, "epoch": 1.0966735966735968, "percentage": 21.93, "elapsed_time": "0:05:59", "remaining_time": "0:21:19", "throughput": 1180.79, "total_tokens": 424456}
|
|
{"current_steps": 1060, "total_steps": 4810, "loss": 0.3308, "lr": 4.783272914659535e-06, "epoch": 1.1018711018711018, "percentage": 22.04, "elapsed_time": "0:05:59", "remaining_time": "0:21:12", "throughput": 1185.59, "total_tokens": 426568}
|
|
{"current_steps": 1065, "total_steps": 4810, "loss": 0.1061, "lr": 4.77956342738994e-06, "epoch": 1.107068607068607, "percentage": 22.14, "elapsed_time": "0:06:00", "remaining_time": "0:21:06", "throughput": 1190.03, "total_tokens": 428552}
|
|
{"current_steps": 1070, "total_steps": 4810, "loss": 0.0996, "lr": 4.775823926679549e-06, "epoch": 1.1122661122661124, "percentage": 22.25, "elapsed_time": "0:06:00", "remaining_time": "0:20:59", "throughput": 1194.29, "total_tokens": 430472}
|
|
{"current_steps": 1075, "total_steps": 4810, "loss": 0.1315, "lr": 4.77205446176379e-06, "epoch": 1.1174636174636174, "percentage": 22.35, "elapsed_time": "0:06:00", "remaining_time": "0:20:53", "throughput": 1198.36, "total_tokens": 432328}
|
|
{"current_steps": 1080, "total_steps": 4810, "loss": 0.2841, "lr": 4.768255082272612e-06, "epoch": 1.1226611226611227, "percentage": 22.45, "elapsed_time": "0:06:01", "remaining_time": "0:20:47", "throughput": 1203.13, "total_tokens": 434440}
|
|
{"current_steps": 1085, "total_steps": 4810, "loss": 0.0783, "lr": 4.764425838229823e-06, "epoch": 1.1278586278586278, "percentage": 22.56, "elapsed_time": "0:06:01", "remaining_time": "0:20:40", "throughput": 1207.72, "total_tokens": 436488}
|
|
{"current_steps": 1090, "total_steps": 4810, "loss": 0.346, "lr": 4.760566780052445e-06, "epoch": 1.133056133056133, "percentage": 22.66, "elapsed_time": "0:06:01", "remaining_time": "0:20:34", "throughput": 1212.12, "total_tokens": 438472}
|
|
{"current_steps": 1095, "total_steps": 4810, "loss": 0.4155, "lr": 4.756677958550035e-06, "epoch": 1.1382536382536383, "percentage": 22.77, "elapsed_time": "0:06:02", "remaining_time": "0:20:28", "throughput": 1216.52, "total_tokens": 440456}
|
|
{"current_steps": 1100, "total_steps": 4810, "loss": 0.1236, "lr": 4.752759424924026e-06, "epoch": 1.1434511434511434, "percentage": 22.87, "elapsed_time": "0:06:02", "remaining_time": "0:20:22", "throughput": 1220.9, "total_tokens": 442440}
|
|
{"current_steps": 1105, "total_steps": 4810, "loss": 0.099, "lr": 4.7488112307670515e-06, "epoch": 1.1486486486486487, "percentage": 22.97, "elapsed_time": "0:06:02", "remaining_time": "0:20:16", "throughput": 1225.29, "total_tokens": 444424}
|
|
{"current_steps": 1110, "total_steps": 4810, "loss": 0.1891, "lr": 4.7448334280622624e-06, "epoch": 1.1538461538461537, "percentage": 23.08, "elapsed_time": "0:06:03", "remaining_time": "0:20:10", "throughput": 1229.3, "total_tokens": 446280}
|
|
{"current_steps": 1115, "total_steps": 4810, "loss": 0.1802, "lr": 4.740826069182645e-06, "epoch": 1.159043659043659, "percentage": 23.18, "elapsed_time": "0:06:03", "remaining_time": "0:20:04", "throughput": 1233.66, "total_tokens": 448264}
|
|
{"current_steps": 1120, "total_steps": 4810, "loss": 0.2325, "lr": 4.736789206890332e-06, "epoch": 1.1642411642411643, "percentage": 23.28, "elapsed_time": "0:06:03", "remaining_time": "0:19:58", "throughput": 1238.37, "total_tokens": 450376}
|
|
{"current_steps": 1125, "total_steps": 4810, "loss": 0.1142, "lr": 4.732722894335909e-06, "epoch": 1.1694386694386694, "percentage": 23.39, "elapsed_time": "0:06:04", "remaining_time": "0:19:52", "throughput": 1243.24, "total_tokens": 452552}
|
|
{"current_steps": 1130, "total_steps": 4810, "loss": 0.1432, "lr": 4.728627185057711e-06, "epoch": 1.1746361746361746, "percentage": 23.49, "elapsed_time": "0:06:04", "remaining_time": "0:19:46", "throughput": 1247.75, "total_tokens": 454600}
|
|
{"current_steps": 1135, "total_steps": 4810, "loss": 0.1061, "lr": 4.724502132981119e-06, "epoch": 1.17983367983368, "percentage": 23.6, "elapsed_time": "0:06:04", "remaining_time": "0:19:40", "throughput": 1252.26, "total_tokens": 456648}
|
|
{"current_steps": 1140, "total_steps": 4810, "loss": 0.078, "lr": 4.720347792417851e-06, "epoch": 1.185031185031185, "percentage": 23.7, "elapsed_time": "0:06:04", "remaining_time": "0:19:34", "throughput": 1256.59, "total_tokens": 458632}
|
|
{"current_steps": 1145, "total_steps": 4810, "loss": 0.1068, "lr": 4.716164218065246e-06, "epoch": 1.1902286902286903, "percentage": 23.8, "elapsed_time": "0:06:05", "remaining_time": "0:19:29", "throughput": 1261.08, "total_tokens": 460680}
|
|
{"current_steps": 1150, "total_steps": 4810, "loss": 0.2177, "lr": 4.711951465005548e-06, "epoch": 1.1954261954261955, "percentage": 23.91, "elapsed_time": "0:06:05", "remaining_time": "0:19:23", "throughput": 1265.56, "total_tokens": 462728}
|
|
{"current_steps": 1155, "total_steps": 4810, "loss": 0.058, "lr": 4.707709588705169e-06, "epoch": 1.2006237006237006, "percentage": 24.01, "elapsed_time": "0:06:05", "remaining_time": "0:19:18", "throughput": 1270.03, "total_tokens": 464776}
|
|
{"current_steps": 1160, "total_steps": 4810, "loss": 0.3544, "lr": 4.7034386450139735e-06, "epoch": 1.2058212058212059, "percentage": 24.12, "elapsed_time": "0:06:06", "remaining_time": "0:19:12", "throughput": 1274.14, "total_tokens": 466696}
|
|
{"current_steps": 1165, "total_steps": 4810, "loss": 0.1744, "lr": 4.699138690164533e-06, "epoch": 1.211018711018711, "percentage": 24.22, "elapsed_time": "0:06:06", "remaining_time": "0:19:07", "throughput": 1278.26, "total_tokens": 468616}
|
|
{"current_steps": 1170, "total_steps": 4810, "loss": 0.1842, "lr": 4.694809780771391e-06, "epoch": 1.2162162162162162, "percentage": 24.32, "elapsed_time": "0:06:06", "remaining_time": "0:19:01", "throughput": 1282.89, "total_tokens": 470728}
|
|
{"current_steps": 1175, "total_steps": 4810, "loss": 0.1067, "lr": 4.690451973830314e-06, "epoch": 1.2214137214137215, "percentage": 24.43, "elapsed_time": "0:06:07", "remaining_time": "0:18:56", "throughput": 1287.34, "total_tokens": 472776}
|
|
{"current_steps": 1180, "total_steps": 4810, "loss": 0.177, "lr": 4.6860653267175425e-06, "epoch": 1.2266112266112266, "percentage": 24.53, "elapsed_time": "0:06:07", "remaining_time": "0:18:50", "throughput": 1291.76, "total_tokens": 474824}
|
|
{"current_steps": 1185, "total_steps": 4810, "loss": 0.2562, "lr": 4.681649897189036e-06, "epoch": 1.2318087318087318, "percentage": 24.64, "elapsed_time": "0:06:07", "remaining_time": "0:18:45", "throughput": 1295.83, "total_tokens": 476744}
|
|
{"current_steps": 1190, "total_steps": 4810, "loss": 0.053, "lr": 4.677205743379714e-06, "epoch": 1.237006237006237, "percentage": 24.74, "elapsed_time": "0:06:08", "remaining_time": "0:18:40", "throughput": 1300.4, "total_tokens": 478856}
|
|
{"current_steps": 1195, "total_steps": 4810, "loss": 0.1686, "lr": 4.672732923802685e-06, "epoch": 1.2422037422037422, "percentage": 24.84, "elapsed_time": "0:06:08", "remaining_time": "0:18:34", "throughput": 1304.45, "total_tokens": 480776}
|
|
{"current_steps": 1200, "total_steps": 4810, "loss": 0.0292, "lr": 4.6682314973484844e-06, "epoch": 1.2474012474012475, "percentage": 24.95, "elapsed_time": "0:06:08", "remaining_time": "0:18:29", "throughput": 1309.19, "total_tokens": 482952}
|
|
{"current_steps": 1205, "total_steps": 4810, "loss": 0.0622, "lr": 4.663701523284291e-06, "epoch": 1.2525987525987525, "percentage": 25.05, "elapsed_time": "0:06:09", "remaining_time": "0:18:24", "throughput": 1314.1, "total_tokens": 485192}
|
|
{"current_steps": 1205, "total_steps": 4810, "eval_loss": 0.26757940649986267, "epoch": 1.2525987525987525, "percentage": 25.05, "elapsed_time": "0:06:10", "remaining_time": "0:18:27", "throughput": 1310.35, "total_tokens": 485192}
|
|
{"current_steps": 1210, "total_steps": 4810, "loss": 0.1299, "lr": 4.659143061253152e-06, "epoch": 1.2577962577962578, "percentage": 25.16, "elapsed_time": "0:07:02", "remaining_time": "0:20:55", "throughput": 1153.92, "total_tokens": 487112}
|
|
{"current_steps": 1215, "total_steps": 4810, "loss": 0.2685, "lr": 4.654556171273196e-06, "epoch": 1.262993762993763, "percentage": 25.26, "elapsed_time": "0:07:02", "remaining_time": "0:20:49", "throughput": 1157.89, "total_tokens": 489160}
|
|
{"current_steps": 1220, "total_steps": 4810, "loss": 0.2017, "lr": 4.649940913736841e-06, "epoch": 1.2681912681912682, "percentage": 25.36, "elapsed_time": "0:07:02", "remaining_time": "0:20:44", "throughput": 1161.54, "total_tokens": 491080}
|
|
{"current_steps": 1225, "total_steps": 4810, "loss": 0.0607, "lr": 4.645297349410005e-06, "epoch": 1.2733887733887734, "percentage": 25.47, "elapsed_time": "0:07:03", "remaining_time": "0:20:38", "throughput": 1165.34, "total_tokens": 493064}
|
|
{"current_steps": 1230, "total_steps": 4810, "loss": 0.1537, "lr": 4.640625539431298e-06, "epoch": 1.2785862785862787, "percentage": 25.57, "elapsed_time": "0:07:03", "remaining_time": "0:20:32", "throughput": 1168.98, "total_tokens": 494984}
|
|
{"current_steps": 1235, "total_steps": 4810, "loss": 0.2946, "lr": 4.635925545311224e-06, "epoch": 1.2837837837837838, "percentage": 25.68, "elapsed_time": "0:07:03", "remaining_time": "0:20:26", "throughput": 1172.76, "total_tokens": 496968}
|
|
{"current_steps": 1240, "total_steps": 4810, "loss": 0.0799, "lr": 4.631197428931365e-06, "epoch": 1.288981288981289, "percentage": 25.78, "elapsed_time": "0:07:04", "remaining_time": "0:20:20", "throughput": 1176.24, "total_tokens": 498824}
|
|
{"current_steps": 1245, "total_steps": 4810, "loss": 0.0804, "lr": 4.626441252543572e-06, "epoch": 1.2941787941787941, "percentage": 25.88, "elapsed_time": "0:07:04", "remaining_time": "0:20:15", "throughput": 1180.02, "total_tokens": 500808}
|
|
{"current_steps": 1250, "total_steps": 4810, "loss": 0.251, "lr": 4.621657078769143e-06, "epoch": 1.2993762993762994, "percentage": 25.99, "elapsed_time": "0:07:04", "remaining_time": "0:20:09", "throughput": 1183.94, "total_tokens": 502856}
|
|
{"current_steps": 1255, "total_steps": 4810, "loss": 0.0856, "lr": 4.616844970597996e-06, "epoch": 1.3045738045738045, "percentage": 26.09, "elapsed_time": "0:07:05", "remaining_time": "0:20:04", "throughput": 1187.4, "total_tokens": 504712}
|
|
{"current_steps": 1260, "total_steps": 4810, "loss": 0.3719, "lr": 4.612004991387843e-06, "epoch": 1.3097713097713097, "percentage": 26.2, "elapsed_time": "0:07:05", "remaining_time": "0:19:58", "throughput": 1191.16, "total_tokens": 506696}
|
|
{"current_steps": 1265, "total_steps": 4810, "loss": 0.0936, "lr": 4.607137204863356e-06, "epoch": 1.314968814968815, "percentage": 26.3, "elapsed_time": "0:07:05", "remaining_time": "0:19:53", "throughput": 1194.85, "total_tokens": 508680}
|
|
{"current_steps": 1270, "total_steps": 4810, "loss": 0.1072, "lr": 4.602241675115326e-06, "epoch": 1.32016632016632, "percentage": 26.4, "elapsed_time": "0:07:06", "remaining_time": "0:19:47", "throughput": 1198.75, "total_tokens": 510728}
|
|
{"current_steps": 1275, "total_steps": 4810, "loss": 0.0841, "lr": 4.597318466599819e-06, "epoch": 1.3253638253638254, "percentage": 26.51, "elapsed_time": "0:07:06", "remaining_time": "0:19:42", "throughput": 1202.49, "total_tokens": 512712}
|
|
{"current_steps": 1280, "total_steps": 4810, "loss": 0.1067, "lr": 4.592367644137329e-06, "epoch": 1.3305613305613306, "percentage": 26.61, "elapsed_time": "0:07:06", "remaining_time": "0:19:36", "throughput": 1206.21, "total_tokens": 514696}
|
|
{"current_steps": 1285, "total_steps": 4810, "loss": 0.1895, "lr": 4.587389272911923e-06, "epoch": 1.3357588357588357, "percentage": 26.72, "elapsed_time": "0:07:07", "remaining_time": "0:19:31", "throughput": 1210.23, "total_tokens": 516808}
|
|
{"current_steps": 1290, "total_steps": 4810, "loss": 0.2118, "lr": 4.582383418470386e-06, "epoch": 1.340956340956341, "percentage": 26.82, "elapsed_time": "0:07:07", "remaining_time": "0:19:26", "throughput": 1213.95, "total_tokens": 518792}
|
|
{"current_steps": 1295, "total_steps": 4810, "loss": 0.1325, "lr": 4.5773501467213525e-06, "epoch": 1.3461538461538463, "percentage": 26.92, "elapsed_time": "0:07:07", "remaining_time": "0:19:20", "throughput": 1217.82, "total_tokens": 520840}
|
|
{"current_steps": 1300, "total_steps": 4810, "loss": 0.0526, "lr": 4.572289523934444e-06, "epoch": 1.3513513513513513, "percentage": 27.03, "elapsed_time": "0:07:08", "remaining_time": "0:19:15", "throughput": 1221.39, "total_tokens": 522760}
|
|
{"current_steps": 1305, "total_steps": 4810, "loss": 0.2152, "lr": 4.567201616739393e-06, "epoch": 1.3565488565488566, "percentage": 27.13, "elapsed_time": "0:07:08", "remaining_time": "0:19:10", "throughput": 1225.4, "total_tokens": 524872}
|
|
{"current_steps": 1310, "total_steps": 4810, "loss": 0.1978, "lr": 4.562086492125167e-06, "epoch": 1.3617463617463619, "percentage": 27.23, "elapsed_time": "0:07:08", "remaining_time": "0:19:05", "throughput": 1229.26, "total_tokens": 526920}
|
|
{"current_steps": 1315, "total_steps": 4810, "loss": 0.1374, "lr": 4.5569442174390885e-06, "epoch": 1.366943866943867, "percentage": 27.34, "elapsed_time": "0:07:08", "remaining_time": "0:19:00", "throughput": 1233.11, "total_tokens": 528968}
|
|
{"current_steps": 1320, "total_steps": 4810, "loss": 0.0818, "lr": 4.551774860385944e-06, "epoch": 1.3721413721413722, "percentage": 27.44, "elapsed_time": "0:07:09", "remaining_time": "0:18:55", "throughput": 1236.66, "total_tokens": 530888}
|
|
{"current_steps": 1325, "total_steps": 4810, "loss": 0.1644, "lr": 4.546578489027095e-06, "epoch": 1.3773388773388773, "percentage": 27.55, "elapsed_time": "0:07:09", "remaining_time": "0:18:49", "throughput": 1240.36, "total_tokens": 532872}
|
|
{"current_steps": 1330, "total_steps": 4810, "loss": 0.118, "lr": 4.541355171779582e-06, "epoch": 1.3825363825363826, "percentage": 27.65, "elapsed_time": "0:07:09", "remaining_time": "0:18:44", "throughput": 1244.19, "total_tokens": 534920}
|
|
{"current_steps": 1335, "total_steps": 4810, "loss": 0.0039, "lr": 4.536104977415225e-06, "epoch": 1.3877338877338876, "percentage": 27.75, "elapsed_time": "0:07:10", "remaining_time": "0:18:39", "throughput": 1247.73, "total_tokens": 536840}
|
|
{"current_steps": 1340, "total_steps": 4810, "loss": 0.3705, "lr": 4.530827975059715e-06, "epoch": 1.392931392931393, "percentage": 27.86, "elapsed_time": "0:07:10", "remaining_time": "0:18:34", "throughput": 1251.26, "total_tokens": 538760}
|
|
{"current_steps": 1345, "total_steps": 4810, "loss": 0.2364, "lr": 4.525524234191705e-06, "epoch": 1.3981288981288982, "percentage": 27.96, "elapsed_time": "0:07:10", "remaining_time": "0:18:30", "throughput": 1254.78, "total_tokens": 540680}
|
|
{"current_steps": 1350, "total_steps": 4810, "loss": 0.1405, "lr": 4.520193824641898e-06, "epoch": 1.4033264033264032, "percentage": 28.07, "elapsed_time": "0:07:11", "remaining_time": "0:18:25", "throughput": 1258.44, "total_tokens": 542664}
|
|
{"current_steps": 1355, "total_steps": 4810, "loss": 0.1596, "lr": 4.51483681659212e-06, "epoch": 1.4085239085239085, "percentage": 28.17, "elapsed_time": "0:07:11", "remaining_time": "0:18:20", "throughput": 1262.25, "total_tokens": 544712}
|
|
{"current_steps": 1360, "total_steps": 4810, "loss": 0.2662, "lr": 4.5094532805744075e-06, "epoch": 1.4137214137214138, "percentage": 28.27, "elapsed_time": "0:07:11", "remaining_time": "0:18:15", "throughput": 1266.2, "total_tokens": 546824}
|
|
{"current_steps": 1365, "total_steps": 4810, "loss": 0.0791, "lr": 4.504043287470068e-06, "epoch": 1.4189189189189189, "percentage": 28.38, "elapsed_time": "0:07:12", "remaining_time": "0:18:10", "throughput": 1270.12, "total_tokens": 548936}
|
|
{"current_steps": 1370, "total_steps": 4810, "loss": 0.1218, "lr": 4.498606908508754e-06, "epoch": 1.4241164241164241, "percentage": 28.48, "elapsed_time": "0:07:12", "remaining_time": "0:18:06", "throughput": 1273.73, "total_tokens": 550920}
|
|
{"current_steps": 1375, "total_steps": 4810, "loss": 0.0307, "lr": 4.493144215267519e-06, "epoch": 1.4293139293139294, "percentage": 28.59, "elapsed_time": "0:07:12", "remaining_time": "0:18:01", "throughput": 1277.37, "total_tokens": 552904}
|
|
{"current_steps": 1380, "total_steps": 4810, "loss": 0.1616, "lr": 4.4876552796698814e-06, "epoch": 1.4345114345114345, "percentage": 28.69, "elapsed_time": "0:07:13", "remaining_time": "0:17:56", "throughput": 1280.84, "total_tokens": 554824}
|
|
{"current_steps": 1385, "total_steps": 4810, "loss": 0.214, "lr": 4.482140173984875e-06, "epoch": 1.4397089397089398, "percentage": 28.79, "elapsed_time": "0:07:13", "remaining_time": "0:17:51", "throughput": 1284.62, "total_tokens": 556872}
|
|
{"current_steps": 1390, "total_steps": 4810, "loss": 0.1453, "lr": 4.476598970826093e-06, "epoch": 1.444906444906445, "percentage": 28.9, "elapsed_time": "0:07:13", "remaining_time": "0:17:47", "throughput": 1288.53, "total_tokens": 558984}
|
|
{"current_steps": 1395, "total_steps": 4810, "loss": 0.2061, "lr": 4.471031743150744e-06, "epoch": 1.45010395010395, "percentage": 29.0, "elapsed_time": "0:07:14", "remaining_time": "0:17:42", "throughput": 1292.14, "total_tokens": 560968}
|
|
{"current_steps": 1400, "total_steps": 4810, "loss": 0.2358, "lr": 4.465438564258673e-06, "epoch": 1.4553014553014554, "percentage": 29.11, "elapsed_time": "0:07:14", "remaining_time": "0:17:38", "throughput": 1295.74, "total_tokens": 562952}
|
|
{"current_steps": 1405, "total_steps": 4810, "loss": 0.0357, "lr": 4.459819507791415e-06, "epoch": 1.4604989604989604, "percentage": 29.21, "elapsed_time": "0:07:14", "remaining_time": "0:17:33", "throughput": 1299.63, "total_tokens": 565064}
|
|
{"current_steps": 1410, "total_steps": 4810, "loss": 0.1194, "lr": 4.454174647731213e-06, "epoch": 1.4656964656964657, "percentage": 29.31, "elapsed_time": "0:07:15", "remaining_time": "0:17:29", "throughput": 1303.37, "total_tokens": 567112}
|
|
{"current_steps": 1415, "total_steps": 4810, "loss": 0.2261, "lr": 4.448504058400052e-06, "epoch": 1.4708939708939708, "percentage": 29.42, "elapsed_time": "0:07:15", "remaining_time": "0:17:24", "throughput": 1307.11, "total_tokens": 569160}
|
|
{"current_steps": 1420, "total_steps": 4810, "loss": 0.1794, "lr": 4.4428078144586715e-06, "epoch": 1.476091476091476, "percentage": 29.52, "elapsed_time": "0:07:15", "remaining_time": "0:17:20", "throughput": 1311.13, "total_tokens": 571336}
|
|
{"current_steps": 1425, "total_steps": 4810, "loss": 0.2622, "lr": 4.437085990905591e-06, "epoch": 1.4812889812889813, "percentage": 29.63, "elapsed_time": "0:07:16", "remaining_time": "0:17:15", "throughput": 1314.86, "total_tokens": 573384}
|
|
{"current_steps": 1430, "total_steps": 4810, "loss": 0.1625, "lr": 4.431338663076119e-06, "epoch": 1.4864864864864864, "percentage": 29.73, "elapsed_time": "0:07:16", "remaining_time": "0:17:11", "throughput": 1318.3, "total_tokens": 575304}
|
|
{"current_steps": 1435, "total_steps": 4810, "loss": 0.0647, "lr": 4.42556590664136e-06, "epoch": 1.4916839916839917, "percentage": 29.83, "elapsed_time": "0:07:16", "remaining_time": "0:17:07", "throughput": 1321.57, "total_tokens": 577160}
|
|
{"current_steps": 1440, "total_steps": 4810, "loss": 0.11, "lr": 4.41976779760722e-06, "epoch": 1.496881496881497, "percentage": 29.94, "elapsed_time": "0:07:17", "remaining_time": "0:17:02", "throughput": 1325.26, "total_tokens": 579208}
|
|
{"current_steps": 1445, "total_steps": 4810, "loss": 0.0911, "lr": 4.413944412313405e-06, "epoch": 1.502079002079002, "percentage": 30.04, "elapsed_time": "0:07:17", "remaining_time": "0:16:58", "throughput": 1328.96, "total_tokens": 581256}
|
|
{"current_steps": 1446, "total_steps": 4810, "eval_loss": 0.3145564794540405, "epoch": 1.503118503118503, "percentage": 30.06, "elapsed_time": "0:07:18", "remaining_time": "0:17:00", "throughput": 1326.51, "total_tokens": 581704}
|
|
{"current_steps": 1450, "total_steps": 4810, "loss": 0.1191, "lr": 4.408095827432416e-06, "epoch": 1.5072765072765073, "percentage": 30.15, "elapsed_time": "0:08:14", "remaining_time": "0:19:04", "throughput": 1180.65, "total_tokens": 583304}
|
|
{"current_steps": 1455, "total_steps": 4810, "loss": 0.3479, "lr": 4.40222211996854e-06, "epoch": 1.5124740124740126, "percentage": 30.25, "elapsed_time": "0:08:14", "remaining_time": "0:18:59", "throughput": 1183.77, "total_tokens": 585224}
|
|
{"current_steps": 1460, "total_steps": 4810, "loss": 0.2617, "lr": 4.396323367256836e-06, "epoch": 1.5176715176715176, "percentage": 30.35, "elapsed_time": "0:08:14", "remaining_time": "0:18:55", "throughput": 1187.13, "total_tokens": 587272}
|
|
{"current_steps": 1465, "total_steps": 4810, "loss": 0.1985, "lr": 4.390399646962117e-06, "epoch": 1.5228690228690227, "percentage": 30.46, "elapsed_time": "0:08:15", "remaining_time": "0:18:50", "throughput": 1190.49, "total_tokens": 589320}
|
|
{"current_steps": 1470, "total_steps": 4810, "loss": 0.1369, "lr": 4.384451037077924e-06, "epoch": 1.5280665280665282, "percentage": 30.56, "elapsed_time": "0:08:15", "remaining_time": "0:18:45", "throughput": 1193.72, "total_tokens": 591304}
|
|
{"current_steps": 1475, "total_steps": 4810, "loss": 0.1433, "lr": 4.378477615925506e-06, "epoch": 1.5332640332640333, "percentage": 30.67, "elapsed_time": "0:08:15", "remaining_time": "0:18:40", "throughput": 1196.82, "total_tokens": 593224}
|
|
{"current_steps": 1480, "total_steps": 4810, "loss": 0.1273, "lr": 4.372479462152781e-06, "epoch": 1.5384615384615383, "percentage": 30.77, "elapsed_time": "0:08:15", "remaining_time": "0:18:35", "throughput": 1200.29, "total_tokens": 595336}
|
|
{"current_steps": 1485, "total_steps": 4810, "loss": 0.2715, "lr": 4.366456654733308e-06, "epoch": 1.5436590436590436, "percentage": 30.87, "elapsed_time": "0:08:16", "remaining_time": "0:18:31", "throughput": 1203.38, "total_tokens": 597256}
|
|
{"current_steps": 1490, "total_steps": 4810, "loss": 0.1859, "lr": 4.360409272965242e-06, "epoch": 1.5488565488565489, "percentage": 30.98, "elapsed_time": "0:08:16", "remaining_time": "0:18:26", "throughput": 1206.72, "total_tokens": 599304}
|
|
{"current_steps": 1495, "total_steps": 4810, "loss": 0.0745, "lr": 4.354337396470291e-06, "epoch": 1.554054054054054, "percentage": 31.08, "elapsed_time": "0:08:16", "remaining_time": "0:18:21", "throughput": 1209.93, "total_tokens": 601288}
|
|
{"current_steps": 1500, "total_steps": 4810, "loss": 0.1641, "lr": 4.348241105192668e-06, "epoch": 1.5592515592515592, "percentage": 31.19, "elapsed_time": "0:08:17", "remaining_time": "0:18:17", "throughput": 1213.13, "total_tokens": 603272}
|
|
{"current_steps": 1505, "total_steps": 4810, "loss": 0.1365, "lr": 4.34212047939804e-06, "epoch": 1.5644490644490645, "percentage": 31.29, "elapsed_time": "0:08:17", "remaining_time": "0:18:12", "throughput": 1216.33, "total_tokens": 605256}
|
|
{"current_steps": 1510, "total_steps": 4810, "loss": 0.0868, "lr": 4.335975599672469e-06, "epoch": 1.5696465696465696, "percentage": 31.39, "elapsed_time": "0:08:17", "remaining_time": "0:18:08", "throughput": 1219.66, "total_tokens": 607304}
|
|
{"current_steps": 1515, "total_steps": 4810, "loss": 0.1281, "lr": 4.329806546921354e-06, "epoch": 1.5748440748440748, "percentage": 31.5, "elapsed_time": "0:08:18", "remaining_time": "0:18:03", "throughput": 1222.72, "total_tokens": 609224}
|
|
{"current_steps": 1520, "total_steps": 4810, "loss": 0.0465, "lr": 4.3236134023683565e-06, "epoch": 1.5800415800415801, "percentage": 31.6, "elapsed_time": "0:08:18", "remaining_time": "0:17:59", "throughput": 1226.16, "total_tokens": 611336}
|
|
{"current_steps": 1525, "total_steps": 4810, "loss": 0.1156, "lr": 4.3173962475543475e-06, "epoch": 1.5852390852390852, "percentage": 31.7, "elapsed_time": "0:08:18", "remaining_time": "0:17:54", "throughput": 1229.34, "total_tokens": 613320}
|
|
{"current_steps": 1530, "total_steps": 4810, "loss": 0.2405, "lr": 4.311155164336318e-06, "epoch": 1.5904365904365905, "percentage": 31.81, "elapsed_time": "0:08:19", "remaining_time": "0:17:50", "throughput": 1232.26, "total_tokens": 615176}
|
|
{"current_steps": 1535, "total_steps": 4810, "loss": 0.1673, "lr": 4.3048902348863116e-06, "epoch": 1.5956340956340958, "percentage": 31.91, "elapsed_time": "0:08:19", "remaining_time": "0:17:45", "throughput": 1235.56, "total_tokens": 617224}
|
|
{"current_steps": 1540, "total_steps": 4810, "loss": 0.1683, "lr": 4.298601541690336e-06, "epoch": 1.6008316008316008, "percentage": 32.02, "elapsed_time": "0:08:19", "remaining_time": "0:17:41", "throughput": 1238.73, "total_tokens": 619208}
|
|
{"current_steps": 1545, "total_steps": 4810, "loss": 0.221, "lr": 4.292289167547281e-06, "epoch": 1.6060291060291059, "percentage": 32.12, "elapsed_time": "0:08:20", "remaining_time": "0:17:37", "throughput": 1241.9, "total_tokens": 621192}
|
|
{"current_steps": 1550, "total_steps": 4810, "loss": 0.1458, "lr": 4.285953195567827e-06, "epoch": 1.6112266112266114, "percentage": 32.22, "elapsed_time": "0:08:20", "remaining_time": "0:17:32", "throughput": 1245.06, "total_tokens": 623176}
|
|
{"current_steps": 1555, "total_steps": 4810, "loss": 0.246, "lr": 4.279593709173352e-06, "epoch": 1.6164241164241164, "percentage": 32.33, "elapsed_time": "0:08:20", "remaining_time": "0:17:28", "throughput": 1248.21, "total_tokens": 625160}
|
|
{"current_steps": 1560, "total_steps": 4810, "loss": 0.1381, "lr": 4.27321079209483e-06, "epoch": 1.6216216216216215, "percentage": 32.43, "elapsed_time": "0:08:21", "remaining_time": "0:17:24", "throughput": 1251.36, "total_tokens": 627144}
|
|
{"current_steps": 1565, "total_steps": 4810, "loss": 0.1634, "lr": 4.266804528371732e-06, "epoch": 1.6268191268191268, "percentage": 32.54, "elapsed_time": "0:08:21", "remaining_time": "0:17:19", "throughput": 1254.63, "total_tokens": 629192}
|
|
{"current_steps": 1570, "total_steps": 4810, "loss": 0.1174, "lr": 4.260375002350917e-06, "epoch": 1.632016632016632, "percentage": 32.64, "elapsed_time": "0:08:21", "remaining_time": "0:17:15", "throughput": 1257.91, "total_tokens": 631240}
|
|
{"current_steps": 1575, "total_steps": 4810, "loss": 0.2274, "lr": 4.253922298685525e-06, "epoch": 1.637214137214137, "percentage": 32.74, "elapsed_time": "0:08:22", "remaining_time": "0:17:11", "throughput": 1261.05, "total_tokens": 633224}
|
|
{"current_steps": 1580, "total_steps": 4810, "loss": 0.1367, "lr": 4.2474465023338586e-06, "epoch": 1.6424116424116424, "percentage": 32.85, "elapsed_time": "0:08:22", "remaining_time": "0:17:07", "throughput": 1264.19, "total_tokens": 635208}
|
|
{"current_steps": 1585, "total_steps": 4810, "loss": 0.1048, "lr": 4.2409476985582645e-06, "epoch": 1.6476091476091477, "percentage": 32.95, "elapsed_time": "0:08:22", "remaining_time": "0:17:03", "throughput": 1267.45, "total_tokens": 637256}
|
|
{"current_steps": 1590, "total_steps": 4810, "loss": 0.0156, "lr": 4.234425972924014e-06, "epoch": 1.6528066528066527, "percentage": 33.06, "elapsed_time": "0:08:23", "remaining_time": "0:16:58", "throughput": 1270.46, "total_tokens": 639176}
|
|
{"current_steps": 1595, "total_steps": 4810, "loss": 0.1551, "lr": 4.227881411298175e-06, "epoch": 1.658004158004158, "percentage": 33.16, "elapsed_time": "0:08:23", "remaining_time": "0:16:54", "throughput": 1273.71, "total_tokens": 641224}
|
|
{"current_steps": 1600, "total_steps": 4810, "loss": 0.1125, "lr": 4.221314099848481e-06, "epoch": 1.6632016632016633, "percentage": 33.26, "elapsed_time": "0:08:23", "remaining_time": "0:16:50", "throughput": 1276.7, "total_tokens": 643144}
|
|
{"current_steps": 1605, "total_steps": 4810, "loss": 0.1457, "lr": 4.214724125042195e-06, "epoch": 1.6683991683991684, "percentage": 33.37, "elapsed_time": "0:08:24", "remaining_time": "0:16:46", "throughput": 1279.44, "total_tokens": 644936}
|
|
{"current_steps": 1610, "total_steps": 4810, "loss": 0.1623, "lr": 4.208111573644975e-06, "epoch": 1.6735966735966736, "percentage": 33.47, "elapsed_time": "0:08:24", "remaining_time": "0:16:42", "throughput": 1282.68, "total_tokens": 646984}
|
|
{"current_steps": 1615, "total_steps": 4810, "loss": 0.2052, "lr": 4.2014765327197285e-06, "epoch": 1.678794178794179, "percentage": 33.58, "elapsed_time": "0:08:24", "remaining_time": "0:16:38", "throughput": 1285.92, "total_tokens": 649032}
|
|
{"current_steps": 1620, "total_steps": 4810, "loss": 0.2047, "lr": 4.194819089625466e-06, "epoch": 1.683991683991684, "percentage": 33.68, "elapsed_time": "0:08:25", "remaining_time": "0:16:34", "throughput": 1289.15, "total_tokens": 651080}
|
|
{"current_steps": 1625, "total_steps": 4810, "loss": 0.2123, "lr": 4.188139332016154e-06, "epoch": 1.689189189189189, "percentage": 33.78, "elapsed_time": "0:08:25", "remaining_time": "0:16:30", "throughput": 1292.13, "total_tokens": 653000}
|
|
{"current_steps": 1630, "total_steps": 4810, "loss": 0.2089, "lr": 4.181437347839559e-06, "epoch": 1.6943866943866945, "percentage": 33.89, "elapsed_time": "0:08:25", "remaining_time": "0:16:26", "throughput": 1295.11, "total_tokens": 654920}
|
|
{"current_steps": 1635, "total_steps": 4810, "loss": 0.1685, "lr": 4.174713225336087e-06, "epoch": 1.6995841995841996, "percentage": 33.99, "elapsed_time": "0:08:26", "remaining_time": "0:16:22", "throughput": 1298.2, "total_tokens": 656904}
|
|
{"current_steps": 1640, "total_steps": 4810, "loss": 0.105, "lr": 4.167967053037625e-06, "epoch": 1.7047817047817047, "percentage": 34.1, "elapsed_time": "0:08:26", "remaining_time": "0:16:18", "throughput": 1301.42, "total_tokens": 658952}
|
|
{"current_steps": 1645, "total_steps": 4810, "loss": 0.0899, "lr": 4.161198919766375e-06, "epoch": 1.70997920997921, "percentage": 34.2, "elapsed_time": "0:08:26", "remaining_time": "0:16:14", "throughput": 1304.39, "total_tokens": 660872}
|
|
{"current_steps": 1650, "total_steps": 4810, "loss": 0.2054, "lr": 4.154408914633685e-06, "epoch": 1.7151767151767152, "percentage": 34.3, "elapsed_time": "0:08:26", "remaining_time": "0:16:10", "throughput": 1307.47, "total_tokens": 662856}
|
|
{"current_steps": 1655, "total_steps": 4810, "loss": 0.2025, "lr": 4.147597127038873e-06, "epoch": 1.7203742203742203, "percentage": 34.41, "elapsed_time": "0:08:27", "remaining_time": "0:16:07", "throughput": 1310.67, "total_tokens": 664904}
|
|
{"current_steps": 1660, "total_steps": 4810, "loss": 0.141, "lr": 4.140763646668051e-06, "epoch": 1.7255717255717256, "percentage": 34.51, "elapsed_time": "0:08:27", "remaining_time": "0:16:03", "throughput": 1313.75, "total_tokens": 666888}
|
|
{"current_steps": 1665, "total_steps": 4810, "loss": 0.0252, "lr": 4.133908563492949e-06, "epoch": 1.7307692307692308, "percentage": 34.62, "elapsed_time": "0:08:27", "remaining_time": "0:15:59", "throughput": 1316.95, "total_tokens": 668936}
|
|
{"current_steps": 1670, "total_steps": 4810, "loss": 0.2066, "lr": 4.12703196776972e-06, "epoch": 1.735966735966736, "percentage": 34.72, "elapsed_time": "0:08:28", "remaining_time": "0:15:55", "throughput": 1319.89, "total_tokens": 670856}
|
|
{"current_steps": 1675, "total_steps": 4810, "loss": 0.3627, "lr": 4.120133950037763e-06, "epoch": 1.7411642411642412, "percentage": 34.82, "elapsed_time": "0:08:28", "remaining_time": "0:15:51", "throughput": 1322.95, "total_tokens": 672840}
|
|
{"current_steps": 1680, "total_steps": 4810, "loss": 0.2218, "lr": 4.113214601118523e-06, "epoch": 1.7463617463617465, "percentage": 34.93, "elapsed_time": "0:08:28", "remaining_time": "0:15:48", "throughput": 1326.01, "total_tokens": 674824}
|
|
{"current_steps": 1685, "total_steps": 4810, "loss": 0.1042, "lr": 4.106274012114302e-06, "epoch": 1.7515592515592515, "percentage": 35.03, "elapsed_time": "0:08:29", "remaining_time": "0:15:44", "throughput": 1329.06, "total_tokens": 676808}
|
|
{"current_steps": 1687, "total_steps": 4810, "eval_loss": 0.2114141583442688, "epoch": 1.7536382536382535, "percentage": 35.07, "elapsed_time": "0:08:30", "remaining_time": "0:15:44", "throughput": 1327.52, "total_tokens": 677576}
|
|
{"current_steps": 1690, "total_steps": 4810, "loss": 0.1712, "lr": 4.099312274407049e-06, "epoch": 1.7567567567567568, "percentage": 35.14, "elapsed_time": "0:10:08", "remaining_time": "0:18:42", "throughput": 1115.94, "total_tokens": 678728}
|
|
{"current_steps": 1695, "total_steps": 4810, "loss": 0.1031, "lr": 4.092329479657168e-06, "epoch": 1.761954261954262, "percentage": 35.24, "elapsed_time": "0:10:08", "remaining_time": "0:18:38", "throughput": 1118.71, "total_tokens": 680776}
|
|
{"current_steps": 1700, "total_steps": 4810, "loss": 0.1288, "lr": 4.085325719802307e-06, "epoch": 1.7671517671517671, "percentage": 35.34, "elapsed_time": "0:10:08", "remaining_time": "0:18:33", "throughput": 1121.79, "total_tokens": 683016}
|
|
{"current_steps": 1705, "total_steps": 4810, "loss": 0.0556, "lr": 4.0783010870561445e-06, "epoch": 1.7723492723492722, "percentage": 35.45, "elapsed_time": "0:10:09", "remaining_time": "0:18:29", "throughput": 1124.87, "total_tokens": 685256}
|
|
{"current_steps": 1710, "total_steps": 4810, "loss": 0.3125, "lr": 4.07125567390718e-06, "epoch": 1.7775467775467777, "percentage": 35.55, "elapsed_time": "0:10:09", "remaining_time": "0:18:24", "throughput": 1127.63, "total_tokens": 687304}
|
|
{"current_steps": 1715, "total_steps": 4810, "loss": 0.2158, "lr": 4.064189573117512e-06, "epoch": 1.7827442827442828, "percentage": 35.65, "elapsed_time": "0:10:09", "remaining_time": "0:18:20", "throughput": 1130.18, "total_tokens": 689224}
|
|
{"current_steps": 1720, "total_steps": 4810, "loss": 0.1701, "lr": 4.057102877721621e-06, "epoch": 1.7879417879417878, "percentage": 35.76, "elapsed_time": "0:10:10", "remaining_time": "0:18:16", "throughput": 1133.14, "total_tokens": 691400}
|
|
{"current_steps": 1725, "total_steps": 4810, "loss": 0.1154, "lr": 4.049995681025143e-06, "epoch": 1.793139293139293, "percentage": 35.86, "elapsed_time": "0:10:10", "remaining_time": "0:18:11", "throughput": 1135.68, "total_tokens": 693320}
|
|
{"current_steps": 1730, "total_steps": 4810, "loss": 0.1654, "lr": 4.0428680766036386e-06, "epoch": 1.7983367983367984, "percentage": 35.97, "elapsed_time": "0:10:10", "remaining_time": "0:18:07", "throughput": 1138.53, "total_tokens": 695432}
|
|
{"current_steps": 1735, "total_steps": 4810, "loss": 0.2169, "lr": 4.035720158301363e-06, "epoch": 1.8035343035343034, "percentage": 36.07, "elapsed_time": "0:10:11", "remaining_time": "0:18:03", "throughput": 1141.38, "total_tokens": 697544}
|
|
{"current_steps": 1740, "total_steps": 4810, "loss": 0.1438, "lr": 4.028552020230031e-06, "epoch": 1.8087318087318087, "percentage": 36.17, "elapsed_time": "0:10:11", "remaining_time": "0:17:58", "throughput": 1144.11, "total_tokens": 699592}
|
|
{"current_steps": 1745, "total_steps": 4810, "loss": 0.2247, "lr": 4.021363756767577e-06, "epoch": 1.813929313929314, "percentage": 36.28, "elapsed_time": "0:10:11", "remaining_time": "0:17:54", "throughput": 1146.74, "total_tokens": 701576}
|
|
{"current_steps": 1750, "total_steps": 4810, "loss": 0.2586, "lr": 4.014155462556913e-06, "epoch": 1.819126819126819, "percentage": 36.38, "elapsed_time": "0:10:12", "remaining_time": "0:17:50", "throughput": 1149.58, "total_tokens": 703688}
|
|
{"current_steps": 1755, "total_steps": 4810, "loss": 0.2187, "lr": 4.006927232504682e-06, "epoch": 1.8243243243243243, "percentage": 36.49, "elapsed_time": "0:10:12", "remaining_time": "0:17:46", "throughput": 1152.31, "total_tokens": 705736}
|
|
{"current_steps": 1760, "total_steps": 4810, "loss": 0.043, "lr": 3.999679161780006e-06, "epoch": 1.8295218295218296, "percentage": 36.59, "elapsed_time": "0:10:12", "remaining_time": "0:17:41", "throughput": 1154.94, "total_tokens": 707720}
|
|
{"current_steps": 1765, "total_steps": 4810, "loss": 0.08, "lr": 3.99241134581324e-06, "epoch": 1.8347193347193347, "percentage": 36.69, "elapsed_time": "0:10:13", "remaining_time": "0:17:37", "throughput": 1157.88, "total_tokens": 709896}
|
|
{"current_steps": 1770, "total_steps": 4810, "loss": 0.1669, "lr": 3.985123880294708e-06, "epoch": 1.83991683991684, "percentage": 36.8, "elapsed_time": "0:10:13", "remaining_time": "0:17:33", "throughput": 1160.61, "total_tokens": 711944}
|
|
{"current_steps": 1775, "total_steps": 4810, "loss": 0.1912, "lr": 3.977816861173446e-06, "epoch": 1.8451143451143452, "percentage": 36.9, "elapsed_time": "0:10:13", "remaining_time": "0:17:29", "throughput": 1163.33, "total_tokens": 713992}
|
|
{"current_steps": 1780, "total_steps": 4810, "loss": 0.1846, "lr": 3.970490384655939e-06, "epoch": 1.8503118503118503, "percentage": 37.01, "elapsed_time": "0:10:14", "remaining_time": "0:17:25", "throughput": 1165.95, "total_tokens": 715976}
|
|
{"current_steps": 1785, "total_steps": 4810, "loss": 0.105, "lr": 3.963144547204856e-06, "epoch": 1.8555093555093554, "percentage": 37.11, "elapsed_time": "0:10:14", "remaining_time": "0:17:21", "throughput": 1168.66, "total_tokens": 718024}
|
|
{"current_steps": 1790, "total_steps": 4810, "loss": 0.2342, "lr": 3.955779445537776e-06, "epoch": 1.8607068607068609, "percentage": 37.21, "elapsed_time": "0:10:14", "remaining_time": "0:17:17", "throughput": 1171.38, "total_tokens": 720072}
|
|
{"current_steps": 1795, "total_steps": 4810, "loss": 0.2314, "lr": 3.948395176625918e-06, "epoch": 1.865904365904366, "percentage": 37.32, "elapsed_time": "0:10:15", "remaining_time": "0:17:13", "throughput": 1174.09, "total_tokens": 722120}
|
|
{"current_steps": 1800, "total_steps": 4810, "loss": 0.1187, "lr": 3.940991837692861e-06, "epoch": 1.871101871101871, "percentage": 37.42, "elapsed_time": "0:10:15", "remaining_time": "0:17:09", "throughput": 1176.8, "total_tokens": 724168}
|
|
{"current_steps": 1805, "total_steps": 4810, "loss": 0.1292, "lr": 3.933569526213268e-06, "epoch": 1.8762993762993763, "percentage": 37.53, "elapsed_time": "0:10:15", "remaining_time": "0:17:05", "throughput": 1179.61, "total_tokens": 726280}
|
|
{"current_steps": 1810, "total_steps": 4810, "loss": 0.0843, "lr": 3.926128339911599e-06, "epoch": 1.8814968814968815, "percentage": 37.63, "elapsed_time": "0:10:16", "remaining_time": "0:17:01", "throughput": 1182.21, "total_tokens": 728264}
|
|
{"current_steps": 1815, "total_steps": 4810, "loss": 0.1791, "lr": 3.918668376760827e-06, "epoch": 1.8866943866943866, "percentage": 37.73, "elapsed_time": "0:10:16", "remaining_time": "0:16:57", "throughput": 1184.91, "total_tokens": 730312}
|
|
{"current_steps": 1820, "total_steps": 4810, "loss": 0.1365, "lr": 3.9111897349811455e-06, "epoch": 1.8918918918918919, "percentage": 37.84, "elapsed_time": "0:10:16", "remaining_time": "0:16:53", "throughput": 1187.51, "total_tokens": 732296}
|
|
{"current_steps": 1825, "total_steps": 4810, "loss": 0.1369, "lr": 3.903692513038677e-06, "epoch": 1.8970893970893972, "percentage": 37.94, "elapsed_time": "0:10:16", "remaining_time": "0:16:49", "throughput": 1189.79, "total_tokens": 734088}
|
|
{"current_steps": 1830, "total_steps": 4810, "loss": 0.2305, "lr": 3.896176809644178e-06, "epoch": 1.9022869022869022, "percentage": 38.05, "elapsed_time": "0:10:17", "remaining_time": "0:16:45", "throughput": 1192.37, "total_tokens": 736072}
|
|
{"current_steps": 1835, "total_steps": 4810, "loss": 0.2062, "lr": 3.8886427237517345e-06, "epoch": 1.9074844074844075, "percentage": 38.15, "elapsed_time": "0:10:17", "remaining_time": "0:16:41", "throughput": 1195.06, "total_tokens": 738120}
|
|
{"current_steps": 1840, "total_steps": 4810, "loss": 0.2077, "lr": 3.881090354557463e-06, "epoch": 1.9126819126819128, "percentage": 38.25, "elapsed_time": "0:10:17", "remaining_time": "0:16:37", "throughput": 1197.75, "total_tokens": 740168}
|
|
{"current_steps": 1845, "total_steps": 4810, "loss": 0.1425, "lr": 3.8735198014982066e-06, "epoch": 1.9178794178794178, "percentage": 38.36, "elapsed_time": "0:10:18", "remaining_time": "0:16:33", "throughput": 1200.54, "total_tokens": 742280}
|
|
{"current_steps": 1850, "total_steps": 4810, "loss": 0.0702, "lr": 3.865931164250219e-06, "epoch": 1.9230769230769231, "percentage": 38.46, "elapsed_time": "0:10:18", "remaining_time": "0:16:29", "throughput": 1203.22, "total_tokens": 744328}
|
|
{"current_steps": 1855, "total_steps": 4810, "loss": 0.1732, "lr": 3.858324542727859e-06, "epoch": 1.9282744282744284, "percentage": 38.57, "elapsed_time": "0:10:18", "remaining_time": "0:16:25", "throughput": 1206.0, "total_tokens": 746440}
|
|
{"current_steps": 1860, "total_steps": 4810, "loss": 0.1543, "lr": 3.8507000370822675e-06, "epoch": 1.9334719334719335, "percentage": 38.67, "elapsed_time": "0:10:19", "remaining_time": "0:16:22", "throughput": 1208.68, "total_tokens": 748488}
|
|
{"current_steps": 1865, "total_steps": 4810, "loss": 0.1298, "lr": 3.84305774770006e-06, "epoch": 1.9386694386694385, "percentage": 38.77, "elapsed_time": "0:10:19", "remaining_time": "0:16:18", "throughput": 1211.04, "total_tokens": 750344}
|
|
{"current_steps": 1870, "total_steps": 4810, "loss": 0.0507, "lr": 3.835397775201991e-06, "epoch": 1.943866943866944, "percentage": 38.88, "elapsed_time": "0:10:19", "remaining_time": "0:16:14", "throughput": 1213.61, "total_tokens": 752328}
|
|
{"current_steps": 1875, "total_steps": 4810, "loss": 0.2625, "lr": 3.827720220441642e-06, "epoch": 1.949064449064449, "percentage": 38.98, "elapsed_time": "0:10:20", "remaining_time": "0:16:10", "throughput": 1216.17, "total_tokens": 754312}
|
|
{"current_steps": 1880, "total_steps": 4810, "loss": 0.4145, "lr": 3.820025184504085e-06, "epoch": 1.9542619542619541, "percentage": 39.09, "elapsed_time": "0:10:20", "remaining_time": "0:16:07", "throughput": 1218.63, "total_tokens": 756232}
|
|
{"current_steps": 1885, "total_steps": 4810, "loss": 0.2626, "lr": 3.812312768704557e-06, "epoch": 1.9594594594594594, "percentage": 39.19, "elapsed_time": "0:10:20", "remaining_time": "0:16:03", "throughput": 1221.29, "total_tokens": 758280}
|
|
{"current_steps": 1890, "total_steps": 4810, "loss": 0.1128, "lr": 3.80458307458712e-06, "epoch": 1.9646569646569647, "percentage": 39.29, "elapsed_time": "0:10:21", "remaining_time": "0:15:59", "throughput": 1223.95, "total_tokens": 760328}
|
|
{"current_steps": 1895, "total_steps": 4810, "loss": 0.1213, "lr": 3.7968362039233315e-06, "epoch": 1.9698544698544698, "percentage": 39.4, "elapsed_time": "0:10:21", "remaining_time": "0:15:56", "throughput": 1226.4, "total_tokens": 762248}
|
|
{"current_steps": 1900, "total_steps": 4810, "loss": 0.077, "lr": 3.7890722587108985e-06, "epoch": 1.975051975051975, "percentage": 39.5, "elapsed_time": "0:10:21", "remaining_time": "0:15:52", "throughput": 1228.84, "total_tokens": 764168}
|
|
{"current_steps": 1905, "total_steps": 4810, "loss": 0.0655, "lr": 3.7812913411723377e-06, "epoch": 1.9802494802494803, "percentage": 39.6, "elapsed_time": "0:10:22", "remaining_time": "0:15:48", "throughput": 1231.49, "total_tokens": 766216}
|
|
{"current_steps": 1910, "total_steps": 4810, "loss": 0.0962, "lr": 3.773493553753628e-06, "epoch": 1.9854469854469854, "percentage": 39.71, "elapsed_time": "0:10:22", "remaining_time": "0:15:45", "throughput": 1234.14, "total_tokens": 768264}
|
|
{"current_steps": 1915, "total_steps": 4810, "loss": 0.0219, "lr": 3.7656789991228638e-06, "epoch": 1.9906444906444907, "percentage": 39.81, "elapsed_time": "0:10:22", "remaining_time": "0:15:41", "throughput": 1236.58, "total_tokens": 770184}
|
|
{"current_steps": 1920, "total_steps": 4810, "loss": 0.1279, "lr": 3.7578477801689e-06, "epoch": 1.995841995841996, "percentage": 39.92, "elapsed_time": "0:10:23", "remaining_time": "0:15:37", "throughput": 1239.12, "total_tokens": 772168}
|
|
{"current_steps": 1925, "total_steps": 4810, "loss": 0.096, "lr": 3.7500000000000005e-06, "epoch": 2.001039501039501, "percentage": 40.02, "elapsed_time": "0:10:23", "remaining_time": "0:15:34", "throughput": 1241.5, "total_tokens": 774160}
|
|
{"current_steps": 1928, "total_steps": 4810, "eval_loss": 0.3561875522136688, "epoch": 2.004158004158004, "percentage": 40.08, "elapsed_time": "0:10:24", "remaining_time": "0:15:33", "throughput": 1240.92, "total_tokens": 775312}
|
|
{"current_steps": 1930, "total_steps": 4810, "loss": 0.0698, "lr": 3.7421357619424793e-06, "epoch": 2.006237006237006, "percentage": 40.12, "elapsed_time": "0:11:14", "remaining_time": "0:16:46", "throughput": 1150.84, "total_tokens": 776144}
|
|
{"current_steps": 1935, "total_steps": 4810, "loss": 0.0941, "lr": 3.7342551695393375e-06, "epoch": 2.0114345114345116, "percentage": 40.23, "elapsed_time": "0:11:14", "remaining_time": "0:16:42", "throughput": 1153.24, "total_tokens": 778128}
|
|
{"current_steps": 1940, "total_steps": 4810, "loss": 0.0863, "lr": 3.7263583265489077e-06, "epoch": 2.0166320166320166, "percentage": 40.33, "elapsed_time": "0:11:15", "remaining_time": "0:16:38", "throughput": 1155.72, "total_tokens": 780176}
|
|
{"current_steps": 1945, "total_steps": 4810, "loss": 0.0572, "lr": 3.718445336943478e-06, "epoch": 2.0218295218295217, "percentage": 40.44, "elapsed_time": "0:11:15", "remaining_time": "0:16:34", "throughput": 1158.11, "total_tokens": 782160}
|
|
{"current_steps": 1950, "total_steps": 4810, "loss": 0.0675, "lr": 3.7105163049079305e-06, "epoch": 2.027027027027027, "percentage": 40.54, "elapsed_time": "0:11:15", "remaining_time": "0:16:31", "throughput": 1160.59, "total_tokens": 784208}
|
|
{"current_steps": 1955, "total_steps": 4810, "loss": 0.0002, "lr": 3.702571334838365e-06, "epoch": 2.0322245322245323, "percentage": 40.64, "elapsed_time": "0:11:16", "remaining_time": "0:16:27", "throughput": 1163.07, "total_tokens": 786256}
|
|
{"current_steps": 1960, "total_steps": 4810, "loss": 0.1288, "lr": 3.6946105313407287e-06, "epoch": 2.0374220374220373, "percentage": 40.75, "elapsed_time": "0:11:16", "remaining_time": "0:16:23", "throughput": 1165.46, "total_tokens": 788240}
|
|
{"current_steps": 1965, "total_steps": 4810, "loss": 0.1179, "lr": 3.6866339992294347e-06, "epoch": 2.042619542619543, "percentage": 40.85, "elapsed_time": "0:11:16", "remaining_time": "0:16:19", "throughput": 1167.93, "total_tokens": 790288}
|
|
{"current_steps": 1970, "total_steps": 4810, "loss": 0.0768, "lr": 3.678641843525986e-06, "epoch": 2.047817047817048, "percentage": 40.96, "elapsed_time": "0:11:16", "remaining_time": "0:16:15", "throughput": 1170.31, "total_tokens": 792272}
|
|
{"current_steps": 1975, "total_steps": 4810, "loss": 0.0297, "lr": 3.670634169457587e-06, "epoch": 2.053014553014553, "percentage": 41.06, "elapsed_time": "0:11:17", "remaining_time": "0:16:12", "throughput": 1172.88, "total_tokens": 794384}
|
|
{"current_steps": 1980, "total_steps": 4810, "loss": 0.1305, "lr": 3.662611082455766e-06, "epoch": 2.0582120582120584, "percentage": 41.16, "elapsed_time": "0:11:17", "remaining_time": "0:16:08", "throughput": 1175.25, "total_tokens": 796368}
|
|
{"current_steps": 1985, "total_steps": 4810, "loss": 0.0029, "lr": 3.6545726881549792e-06, "epoch": 2.0634095634095635, "percentage": 41.27, "elapsed_time": "0:11:17", "remaining_time": "0:16:04", "throughput": 1177.81, "total_tokens": 798480}
|
|
{"current_steps": 1990, "total_steps": 4810, "loss": 0.0937, "lr": 3.6465190923912275e-06, "epoch": 2.0686070686070686, "percentage": 41.37, "elapsed_time": "0:11:18", "remaining_time": "0:16:01", "throughput": 1180.28, "total_tokens": 800528}
|
|
{"current_steps": 1995, "total_steps": 4810, "loss": 0.1904, "lr": 3.6384504012006544e-06, "epoch": 2.0738045738045736, "percentage": 41.48, "elapsed_time": "0:11:18", "remaining_time": "0:15:57", "throughput": 1183.02, "total_tokens": 802768}
|
|
{"current_steps": 2000, "total_steps": 4810, "loss": 0.1647, "lr": 3.6303667208181576e-06, "epoch": 2.079002079002079, "percentage": 41.58, "elapsed_time": "0:11:18", "remaining_time": "0:15:53", "throughput": 1185.38, "total_tokens": 804752}
|
|
{"current_steps": 2005, "total_steps": 4810, "loss": 0.0649, "lr": 3.622268157675986e-06, "epoch": 2.084199584199584, "percentage": 41.68, "elapsed_time": "0:11:19", "remaining_time": "0:15:50", "throughput": 1187.65, "total_tokens": 806672}
|
|
{"current_steps": 2010, "total_steps": 4810, "loss": 0.0186, "lr": 3.614154818402339e-06, "epoch": 2.0893970893970892, "percentage": 41.79, "elapsed_time": "0:11:19", "remaining_time": "0:15:46", "throughput": 1190.01, "total_tokens": 808656}
|
|
{"current_steps": 2015, "total_steps": 4810, "loss": 0.0494, "lr": 3.6060268098199656e-06, "epoch": 2.0945945945945947, "percentage": 41.89, "elapsed_time": "0:11:19", "remaining_time": "0:15:43", "throughput": 1192.37, "total_tokens": 810640}
|
|
{"current_steps": 2020, "total_steps": 4810, "loss": 0.0657, "lr": 3.5978842389447523e-06, "epoch": 2.0997920997921, "percentage": 42.0, "elapsed_time": "0:11:20", "remaining_time": "0:15:39", "throughput": 1194.82, "total_tokens": 812688}
|
|
{"current_steps": 2025, "total_steps": 4810, "loss": 0.0206, "lr": 3.5897272129843198e-06, "epoch": 2.104989604989605, "percentage": 42.1, "elapsed_time": "0:11:20", "remaining_time": "0:15:35", "throughput": 1197.36, "total_tokens": 814800}
|
|
{"current_steps": 2030, "total_steps": 4810, "loss": 0.0252, "lr": 3.5815558393366064e-06, "epoch": 2.1101871101871104, "percentage": 42.2, "elapsed_time": "0:11:20", "remaining_time": "0:15:32", "throughput": 1199.9, "total_tokens": 816912}
|
|
{"current_steps": 2035, "total_steps": 4810, "loss": 0.1156, "lr": 3.57337022558846e-06, "epoch": 2.1153846153846154, "percentage": 42.31, "elapsed_time": "0:11:21", "remaining_time": "0:15:28", "throughput": 1202.24, "total_tokens": 818896}
|
|
{"current_steps": 2040, "total_steps": 4810, "loss": 0.0855, "lr": 3.5651704795142137e-06, "epoch": 2.1205821205821205, "percentage": 42.41, "elapsed_time": "0:11:21", "remaining_time": "0:15:25", "throughput": 1204.59, "total_tokens": 820880}
|
|
{"current_steps": 2045, "total_steps": 4810, "loss": 0.1594, "lr": 3.5569567090742763e-06, "epoch": 2.125779625779626, "percentage": 42.52, "elapsed_time": "0:11:21", "remaining_time": "0:15:21", "throughput": 1206.94, "total_tokens": 822864}
|
|
{"current_steps": 2050, "total_steps": 4810, "loss": 0.0265, "lr": 3.548729022413701e-06, "epoch": 2.130977130977131, "percentage": 42.62, "elapsed_time": "0:11:22", "remaining_time": "0:15:18", "throughput": 1209.56, "total_tokens": 825040}
|
|
{"current_steps": 2055, "total_steps": 4810, "loss": 0.0995, "lr": 3.5404875278607693e-06, "epoch": 2.136174636174636, "percentage": 42.72, "elapsed_time": "0:11:22", "remaining_time": "0:15:14", "throughput": 1211.9, "total_tokens": 827024}
|
|
{"current_steps": 2060, "total_steps": 4810, "loss": 0.072, "lr": 3.5322323339255602e-06, "epoch": 2.141372141372141, "percentage": 42.83, "elapsed_time": "0:11:22", "remaining_time": "0:15:11", "throughput": 1214.42, "total_tokens": 829136}
|
|
{"current_steps": 2065, "total_steps": 4810, "loss": 0.0483, "lr": 3.5239635492985248e-06, "epoch": 2.1465696465696467, "percentage": 42.93, "elapsed_time": "0:11:23", "remaining_time": "0:15:07", "throughput": 1216.85, "total_tokens": 831184}
|
|
{"current_steps": 2070, "total_steps": 4810, "loss": 0.0007, "lr": 3.5156812828490507e-06, "epoch": 2.1517671517671517, "percentage": 43.04, "elapsed_time": "0:11:23", "remaining_time": "0:15:04", "throughput": 1219.18, "total_tokens": 833168}
|
|
{"current_steps": 2075, "total_steps": 4810, "loss": 0.0685, "lr": 3.5073856436240335e-06, "epoch": 2.156964656964657, "percentage": 43.14, "elapsed_time": "0:11:23", "remaining_time": "0:15:01", "throughput": 1221.61, "total_tokens": 835216}
|
|
{"current_steps": 2080, "total_steps": 4810, "loss": 0.0004, "lr": 3.4990767408464383e-06, "epoch": 2.1621621621621623, "percentage": 43.24, "elapsed_time": "0:11:24", "remaining_time": "0:14:57", "throughput": 1223.84, "total_tokens": 837136}
|
|
{"current_steps": 2085, "total_steps": 4810, "loss": 0.1832, "lr": 3.4907546839138627e-06, "epoch": 2.1673596673596673, "percentage": 43.35, "elapsed_time": "0:11:24", "remaining_time": "0:14:54", "throughput": 1226.17, "total_tokens": 839120}
|
|
{"current_steps": 2090, "total_steps": 4810, "loss": 0.0608, "lr": 3.4824195823970954e-06, "epoch": 2.1725571725571724, "percentage": 43.45, "elapsed_time": "0:11:24", "remaining_time": "0:14:51", "throughput": 1228.49, "total_tokens": 841104}
|
|
{"current_steps": 2095, "total_steps": 4810, "loss": 0.0894, "lr": 3.4740715460386732e-06, "epoch": 2.177754677754678, "percentage": 43.56, "elapsed_time": "0:11:24", "remaining_time": "0:14:47", "throughput": 1230.91, "total_tokens": 843152}
|
|
{"current_steps": 2100, "total_steps": 4810, "loss": 0.0972, "lr": 3.46571068475144e-06, "epoch": 2.182952182952183, "percentage": 43.66, "elapsed_time": "0:11:25", "remaining_time": "0:14:44", "throughput": 1233.22, "total_tokens": 845136}
|
|
{"current_steps": 2105, "total_steps": 4810, "loss": 0.1887, "lr": 3.457337108617094e-06, "epoch": 2.188149688149688, "percentage": 43.76, "elapsed_time": "0:11:25", "remaining_time": "0:14:41", "throughput": 1235.47, "total_tokens": 847120}
|
|
{"current_steps": 2110, "total_steps": 4810, "loss": 0.2052, "lr": 3.4489509278847415e-06, "epoch": 2.1933471933471935, "percentage": 43.87, "elapsed_time": "0:11:25", "remaining_time": "0:14:37", "throughput": 1237.87, "total_tokens": 849168}
|
|
{"current_steps": 2115, "total_steps": 4810, "loss": 0.0731, "lr": 3.440552252969446e-06, "epoch": 2.1985446985446986, "percentage": 43.97, "elapsed_time": "0:11:26", "remaining_time": "0:14:34", "throughput": 1240.18, "total_tokens": 851152}
|
|
{"current_steps": 2120, "total_steps": 4810, "loss": 0.0078, "lr": 3.432141194450772e-06, "epoch": 2.2037422037422036, "percentage": 44.07, "elapsed_time": "0:11:26", "remaining_time": "0:14:31", "throughput": 1242.29, "total_tokens": 853008}
|
|
{"current_steps": 2125, "total_steps": 4810, "loss": 0.0651, "lr": 3.4237178630713312e-06, "epoch": 2.208939708939709, "percentage": 44.18, "elapsed_time": "0:11:26", "remaining_time": "0:14:27", "throughput": 1244.78, "total_tokens": 855120}
|
|
{"current_steps": 2130, "total_steps": 4810, "loss": 0.1599, "lr": 3.4152823697353237e-06, "epoch": 2.214137214137214, "percentage": 44.28, "elapsed_time": "0:11:27", "remaining_time": "0:14:24", "throughput": 1247.27, "total_tokens": 857232}
|
|
{"current_steps": 2135, "total_steps": 4810, "loss": 0.057, "lr": 3.4068348255070764e-06, "epoch": 2.2193347193347193, "percentage": 44.39, "elapsed_time": "0:11:27", "remaining_time": "0:14:21", "throughput": 1249.75, "total_tokens": 859344}
|
|
{"current_steps": 2140, "total_steps": 4810, "loss": 0.0868, "lr": 3.3983753416095844e-06, "epoch": 2.2245322245322248, "percentage": 44.49, "elapsed_time": "0:11:27", "remaining_time": "0:14:18", "throughput": 1252.05, "total_tokens": 861328}
|
|
{"current_steps": 2145, "total_steps": 4810, "loss": 0.2098, "lr": 3.3899040294230413e-06, "epoch": 2.22972972972973, "percentage": 44.59, "elapsed_time": "0:11:28", "remaining_time": "0:14:15", "throughput": 1254.45, "total_tokens": 863376}
|
|
{"current_steps": 2150, "total_steps": 4810, "loss": 0.0096, "lr": 3.381421000483378e-06, "epoch": 2.234927234927235, "percentage": 44.7, "elapsed_time": "0:11:28", "remaining_time": "0:14:11", "throughput": 1256.84, "total_tokens": 865424}
|
|
{"current_steps": 2155, "total_steps": 4810, "loss": 0.0981, "lr": 3.37292636648079e-06, "epoch": 2.24012474012474, "percentage": 44.8, "elapsed_time": "0:11:28", "remaining_time": "0:14:08", "throughput": 1259.22, "total_tokens": 867472}
|
|
{"current_steps": 2160, "total_steps": 4810, "loss": 0.1542, "lr": 3.3644202392582703e-06, "epoch": 2.2453222453222454, "percentage": 44.91, "elapsed_time": "0:11:29", "remaining_time": "0:14:05", "throughput": 1261.7, "total_tokens": 869584}
|
|
{"current_steps": 2165, "total_steps": 4810, "loss": 0.0094, "lr": 3.3559027308101344e-06, "epoch": 2.2505197505197505, "percentage": 45.01, "elapsed_time": "0:11:29", "remaining_time": "0:14:02", "throughput": 1263.99, "total_tokens": 871568}
|
|
{"current_steps": 2169, "total_steps": 4810, "eval_loss": 0.30345332622528076, "epoch": 2.2546777546777546, "percentage": 45.09, "elapsed_time": "0:11:30", "remaining_time": "0:14:01", "throughput": 1263.84, "total_tokens": 873104}
|
|
{"current_steps": 2170, "total_steps": 4810, "loss": 0.0945, "lr": 3.3473739532805464e-06, "epoch": 2.2557172557172556, "percentage": 45.11, "elapsed_time": "0:12:17", "remaining_time": "0:14:57", "throughput": 1184.44, "total_tokens": 873488}
|
|
{"current_steps": 2175, "total_steps": 4810, "loss": 0.1038, "lr": 3.3388340189620427e-06, "epoch": 2.260914760914761, "percentage": 45.22, "elapsed_time": "0:12:17", "remaining_time": "0:14:53", "throughput": 1186.61, "total_tokens": 875472}
|
|
{"current_steps": 2180, "total_steps": 4810, "loss": 0.0275, "lr": 3.3302830402940534e-06, "epoch": 2.266112266112266, "percentage": 45.32, "elapsed_time": "0:12:18", "remaining_time": "0:14:50", "throughput": 1188.7, "total_tokens": 877392}
|
|
{"current_steps": 2185, "total_steps": 4810, "loss": 0.1037, "lr": 3.3217211298614225e-06, "epoch": 2.271309771309771, "percentage": 45.43, "elapsed_time": "0:12:18", "remaining_time": "0:14:47", "throughput": 1191.04, "total_tokens": 879504}
|
|
{"current_steps": 2190, "total_steps": 4810, "loss": 0.0551, "lr": 3.313148400392925e-06, "epoch": 2.2765072765072767, "percentage": 45.53, "elapsed_time": "0:12:18", "remaining_time": "0:14:43", "throughput": 1193.04, "total_tokens": 881360}
|
|
{"current_steps": 2195, "total_steps": 4810, "loss": 0.094, "lr": 3.3045649647597814e-06, "epoch": 2.2817047817047817, "percentage": 45.63, "elapsed_time": "0:12:19", "remaining_time": "0:14:40", "throughput": 1195.12, "total_tokens": 883280}
|
|
{"current_steps": 2200, "total_steps": 4810, "loss": 0.0053, "lr": 3.2959709359741743e-06, "epoch": 2.286902286902287, "percentage": 45.74, "elapsed_time": "0:12:19", "remaining_time": "0:14:37", "throughput": 1197.37, "total_tokens": 885328}
|
|
{"current_steps": 2205, "total_steps": 4810, "loss": 0.0732, "lr": 3.2873664271877588e-06, "epoch": 2.2920997920997923, "percentage": 45.84, "elapsed_time": "0:12:19", "remaining_time": "0:14:33", "throughput": 1199.53, "total_tokens": 887312}
|
|
{"current_steps": 2210, "total_steps": 4810, "loss": 0.0574, "lr": 3.2787515516901717e-06, "epoch": 2.2972972972972974, "percentage": 45.95, "elapsed_time": "0:12:20", "remaining_time": "0:14:30", "throughput": 1201.69, "total_tokens": 889296}
|
|
{"current_steps": 2215, "total_steps": 4810, "loss": 0.0007, "lr": 3.2701264229075443e-06, "epoch": 2.3024948024948024, "percentage": 46.05, "elapsed_time": "0:12:20", "remaining_time": "0:14:27", "throughput": 1204.02, "total_tokens": 891408}
|
|
{"current_steps": 2220, "total_steps": 4810, "loss": 0.001, "lr": 3.261491154401001e-06, "epoch": 2.3076923076923075, "percentage": 46.15, "elapsed_time": "0:12:20", "remaining_time": "0:14:24", "throughput": 1206.18, "total_tokens": 893392}
|
|
{"current_steps": 2225, "total_steps": 4810, "loss": 0.0047, "lr": 3.2528458598651735e-06, "epoch": 2.312889812889813, "percentage": 46.26, "elapsed_time": "0:12:21", "remaining_time": "0:14:20", "throughput": 1208.42, "total_tokens": 895440}
|
|
{"current_steps": 2230, "total_steps": 4810, "loss": 0.1493, "lr": 3.2441906531266963e-06, "epoch": 2.318087318087318, "percentage": 46.36, "elapsed_time": "0:12:21", "remaining_time": "0:14:17", "throughput": 1210.83, "total_tokens": 897616}
|
|
{"current_steps": 2235, "total_steps": 4810, "loss": 0.0359, "lr": 3.2355256481427145e-06, "epoch": 2.323284823284823, "percentage": 46.47, "elapsed_time": "0:12:21", "remaining_time": "0:14:14", "throughput": 1212.89, "total_tokens": 899536}
|
|
{"current_steps": 2240, "total_steps": 4810, "loss": 0.0408, "lr": 3.2268509589993745e-06, "epoch": 2.3284823284823286, "percentage": 46.57, "elapsed_time": "0:12:21", "remaining_time": "0:14:11", "throughput": 1215.21, "total_tokens": 901648}
|
|
{"current_steps": 2245, "total_steps": 4810, "loss": 0.1054, "lr": 3.218166699910332e-06, "epoch": 2.3336798336798337, "percentage": 46.67, "elapsed_time": "0:12:22", "remaining_time": "0:14:08", "throughput": 1217.45, "total_tokens": 903696}
|
|
{"current_steps": 2250, "total_steps": 4810, "loss": 0.1455, "lr": 3.209472985215243e-06, "epoch": 2.3388773388773387, "percentage": 46.78, "elapsed_time": "0:12:22", "remaining_time": "0:14:04", "throughput": 1219.43, "total_tokens": 905552}
|
|
{"current_steps": 2255, "total_steps": 4810, "loss": 0.0118, "lr": 3.2007699293782557e-06, "epoch": 2.3440748440748442, "percentage": 46.88, "elapsed_time": "0:12:22", "remaining_time": "0:14:01", "throughput": 1221.49, "total_tokens": 907472}
|
|
{"current_steps": 2260, "total_steps": 4810, "loss": 0.1043, "lr": 3.1920576469865115e-06, "epoch": 2.3492723492723493, "percentage": 46.99, "elapsed_time": "0:12:23", "remaining_time": "0:13:58", "throughput": 1223.8, "total_tokens": 909584}
|
|
{"current_steps": 2265, "total_steps": 4810, "loss": 0.0544, "lr": 3.183336252748627e-06, "epoch": 2.3544698544698544, "percentage": 47.09, "elapsed_time": "0:12:23", "remaining_time": "0:13:55", "throughput": 1226.03, "total_tokens": 911632}
|
|
{"current_steps": 2270, "total_steps": 4810, "loss": 0.0396, "lr": 3.1746058614931918e-06, "epoch": 2.35966735966736, "percentage": 47.19, "elapsed_time": "0:12:23", "remaining_time": "0:13:52", "throughput": 1228.17, "total_tokens": 913616}
|
|
{"current_steps": 2275, "total_steps": 4810, "loss": 0.0559, "lr": 3.16586658816725e-06, "epoch": 2.364864864864865, "percentage": 47.3, "elapsed_time": "0:12:24", "remaining_time": "0:13:49", "throughput": 1230.47, "total_tokens": 915728}
|
|
{"current_steps": 2280, "total_steps": 4810, "loss": 0.1154, "lr": 3.157118547834793e-06, "epoch": 2.37006237006237, "percentage": 47.4, "elapsed_time": "0:12:24", "remaining_time": "0:13:46", "throughput": 1232.7, "total_tokens": 917776}
|
|
{"current_steps": 2285, "total_steps": 4810, "loss": 0.1803, "lr": 3.1483618556752373e-06, "epoch": 2.375259875259875, "percentage": 47.51, "elapsed_time": "0:12:24", "remaining_time": "0:13:43", "throughput": 1235.08, "total_tokens": 919952}
|
|
{"current_steps": 2290, "total_steps": 4810, "loss": 0.0648, "lr": 3.139596626981916e-06, "epoch": 2.3804573804573805, "percentage": 47.61, "elapsed_time": "0:12:25", "remaining_time": "0:13:40", "throughput": 1237.12, "total_tokens": 921872}
|
|
{"current_steps": 2295, "total_steps": 4810, "loss": 0.1079, "lr": 3.1308229771605546e-06, "epoch": 2.3856548856548856, "percentage": 47.71, "elapsed_time": "0:12:25", "remaining_time": "0:13:36", "throughput": 1239.22, "total_tokens": 923856}
|
|
{"current_steps": 2300, "total_steps": 4810, "loss": 0.1516, "lr": 3.1220410217277546e-06, "epoch": 2.390852390852391, "percentage": 47.82, "elapsed_time": "0:12:25", "remaining_time": "0:13:33", "throughput": 1241.51, "total_tokens": 925968}
|
|
{"current_steps": 2305, "total_steps": 4810, "loss": 0.0496, "lr": 3.1132508763094715e-06, "epoch": 2.396049896049896, "percentage": 47.92, "elapsed_time": "0:12:26", "remaining_time": "0:13:30", "throughput": 1243.55, "total_tokens": 927888}
|
|
{"current_steps": 2310, "total_steps": 4810, "loss": 0.0691, "lr": 3.1044526566394924e-06, "epoch": 2.401247401247401, "percentage": 48.02, "elapsed_time": "0:12:26", "remaining_time": "0:13:27", "throughput": 1245.57, "total_tokens": 929808}
|
|
{"current_steps": 2315, "total_steps": 4810, "loss": 0.0009, "lr": 3.0956464785579125e-06, "epoch": 2.4064449064449063, "percentage": 48.13, "elapsed_time": "0:12:26", "remaining_time": "0:13:24", "throughput": 1247.61, "total_tokens": 931728}
|
|
{"current_steps": 2320, "total_steps": 4810, "loss": 0.0309, "lr": 3.0868324580096113e-06, "epoch": 2.4116424116424118, "percentage": 48.23, "elapsed_time": "0:12:27", "remaining_time": "0:13:21", "throughput": 1249.9, "total_tokens": 933840}
|
|
{"current_steps": 2325, "total_steps": 4810, "loss": 0.1115, "lr": 3.078010711042723e-06, "epoch": 2.416839916839917, "percentage": 48.34, "elapsed_time": "0:12:27", "remaining_time": "0:13:18", "throughput": 1252.02, "total_tokens": 935824}
|
|
{"current_steps": 2330, "total_steps": 4810, "loss": 0.043, "lr": 3.069181353807111e-06, "epoch": 2.422037422037422, "percentage": 48.44, "elapsed_time": "0:12:27", "remaining_time": "0:13:15", "throughput": 1254.22, "total_tokens": 937872}
|
|
{"current_steps": 2335, "total_steps": 4810, "loss": 0.098, "lr": 3.0603445025528377e-06, "epoch": 2.4272349272349274, "percentage": 48.54, "elapsed_time": "0:12:28", "remaining_time": "0:13:12", "throughput": 1256.5, "total_tokens": 939984}
|
|
{"current_steps": 2340, "total_steps": 4810, "loss": 0.0482, "lr": 3.051500273628633e-06, "epoch": 2.4324324324324325, "percentage": 48.65, "elapsed_time": "0:12:28", "remaining_time": "0:13:09", "throughput": 1258.61, "total_tokens": 941968}
|
|
{"current_steps": 2345, "total_steps": 4810, "loss": 0.0265, "lr": 3.042648783480366e-06, "epoch": 2.4376299376299375, "percentage": 48.75, "elapsed_time": "0:12:28", "remaining_time": "0:13:07", "throughput": 1260.72, "total_tokens": 943952}
|
|
{"current_steps": 2350, "total_steps": 4810, "loss": 0.0727, "lr": 3.0337901486495073e-06, "epoch": 2.442827442827443, "percentage": 48.86, "elapsed_time": "0:12:29", "remaining_time": "0:13:04", "throughput": 1262.74, "total_tokens": 945872}
|
|
{"current_steps": 2355, "total_steps": 4810, "loss": 0.1045, "lr": 3.0249244857715977e-06, "epoch": 2.448024948024948, "percentage": 48.96, "elapsed_time": "0:12:29", "remaining_time": "0:13:01", "throughput": 1264.85, "total_tokens": 947856}
|
|
{"current_steps": 2360, "total_steps": 4810, "loss": 0.0835, "lr": 3.01605191157471e-06, "epoch": 2.453222453222453, "percentage": 49.06, "elapsed_time": "0:12:29", "remaining_time": "0:12:58", "throughput": 1266.95, "total_tokens": 949840}
|
|
{"current_steps": 2365, "total_steps": 4810, "loss": 0.0307, "lr": 3.0071725428779152e-06, "epoch": 2.4584199584199586, "percentage": 49.17, "elapsed_time": "0:12:30", "remaining_time": "0:12:55", "throughput": 1268.97, "total_tokens": 951760}
|
|
{"current_steps": 2370, "total_steps": 4810, "loss": 0.0294, "lr": 2.9982864965897423e-06, "epoch": 2.4636174636174637, "percentage": 49.27, "elapsed_time": "0:12:30", "remaining_time": "0:12:52", "throughput": 1270.99, "total_tokens": 953680}
|
|
{"current_steps": 2375, "total_steps": 4810, "loss": 0.0349, "lr": 2.9893938897066392e-06, "epoch": 2.4688149688149688, "percentage": 49.38, "elapsed_time": "0:12:30", "remaining_time": "0:12:49", "throughput": 1273.0, "total_tokens": 955600}
|
|
{"current_steps": 2380, "total_steps": 4810, "loss": 0.2071, "lr": 2.9804948393114325e-06, "epoch": 2.474012474012474, "percentage": 49.48, "elapsed_time": "0:12:30", "remaining_time": "0:12:46", "throughput": 1274.93, "total_tokens": 957456}
|
|
{"current_steps": 2385, "total_steps": 4810, "loss": 0.0055, "lr": 2.9715894625717868e-06, "epoch": 2.4792099792099793, "percentage": 49.58, "elapsed_time": "0:12:31", "remaining_time": "0:12:43", "throughput": 1277.1, "total_tokens": 959504}
|
|
{"current_steps": 2390, "total_steps": 4810, "loss": 0.0277, "lr": 2.9626778767386604e-06, "epoch": 2.4844074844074844, "percentage": 49.69, "elapsed_time": "0:12:31", "remaining_time": "0:12:41", "throughput": 1279.18, "total_tokens": 961488}
|
|
{"current_steps": 2395, "total_steps": 4810, "loss": 0.1288, "lr": 2.953760199144764e-06, "epoch": 2.4896049896049894, "percentage": 49.79, "elapsed_time": "0:12:31", "remaining_time": "0:12:38", "throughput": 1281.19, "total_tokens": 963408}
|
|
{"current_steps": 2400, "total_steps": 4810, "loss": 0.0595, "lr": 2.9448365472030116e-06, "epoch": 2.494802494802495, "percentage": 49.9, "elapsed_time": "0:12:32", "remaining_time": "0:12:35", "throughput": 1283.27, "total_tokens": 965392}
|
|
{"current_steps": 2405, "total_steps": 4810, "loss": 0.0664, "lr": 2.935907038404981e-06, "epoch": 2.5, "percentage": 50.0, "elapsed_time": "0:12:32", "remaining_time": "0:12:32", "throughput": 1285.44, "total_tokens": 967440}
|
|
{"current_steps": 2410, "total_steps": 4810, "loss": 0.0894, "lr": 2.9269717903193603e-06, "epoch": 2.505197505197505, "percentage": 50.1, "elapsed_time": "0:12:32", "remaining_time": "0:12:29", "throughput": 1287.43, "total_tokens": 969360}
|
|
{"current_steps": 2410, "total_steps": 4810, "eval_loss": 0.3648892641067505, "epoch": 2.505197505197505, "percentage": 50.1, "elapsed_time": "0:12:34", "remaining_time": "0:12:30", "throughput": 1285.59, "total_tokens": 969360}
|
|
{"current_steps": 2415, "total_steps": 4810, "loss": 0.0082, "lr": 2.918030920590403e-06, "epoch": 2.51039501039501, "percentage": 50.21, "elapsed_time": "0:13:11", "remaining_time": "0:13:05", "throughput": 1226.85, "total_tokens": 971472}
|
|
{"current_steps": 2420, "total_steps": 4810, "loss": 0.0006, "lr": 2.9090845469363804e-06, "epoch": 2.5155925155925156, "percentage": 50.31, "elapsed_time": "0:13:12", "remaining_time": "0:13:02", "throughput": 1228.86, "total_tokens": 973456}
|
|
{"current_steps": 2425, "total_steps": 4810, "loss": 0.0004, "lr": 2.9001327871480296e-06, "epoch": 2.5207900207900207, "percentage": 50.42, "elapsed_time": "0:13:12", "remaining_time": "0:12:59", "throughput": 1230.94, "total_tokens": 975504}
|
|
{"current_steps": 2430, "total_steps": 4810, "loss": 0.0019, "lr": 2.8911757590870028e-06, "epoch": 2.525987525987526, "percentage": 50.52, "elapsed_time": "0:13:12", "remaining_time": "0:12:56", "throughput": 1233.03, "total_tokens": 977552}
|
|
{"current_steps": 2435, "total_steps": 4810, "loss": 0.1355, "lr": 2.8822135806843156e-06, "epoch": 2.5311850311850312, "percentage": 50.62, "elapsed_time": "0:13:13", "remaining_time": "0:12:53", "throughput": 1235.03, "total_tokens": 979536}
|
|
{"current_steps": 2440, "total_steps": 4810, "loss": 0.084, "lr": 2.873246369938797e-06, "epoch": 2.5363825363825363, "percentage": 50.73, "elapsed_time": "0:13:13", "remaining_time": "0:12:50", "throughput": 1237.11, "total_tokens": 981584}
|
|
{"current_steps": 2445, "total_steps": 4810, "loss": 0.0365, "lr": 2.8642742449155287e-06, "epoch": 2.5415800415800414, "percentage": 50.83, "elapsed_time": "0:13:13", "remaining_time": "0:12:47", "throughput": 1239.19, "total_tokens": 983632}
|
|
{"current_steps": 2450, "total_steps": 4810, "loss": 0.1776, "lr": 2.855297323744301e-06, "epoch": 2.546777546777547, "percentage": 50.94, "elapsed_time": "0:13:14", "remaining_time": "0:12:44", "throughput": 1241.27, "total_tokens": 985680}
|
|
{"current_steps": 2455, "total_steps": 4810, "loss": 0.0731, "lr": 2.8463157246180465e-06, "epoch": 2.551975051975052, "percentage": 51.04, "elapsed_time": "0:13:14", "remaining_time": "0:12:42", "throughput": 1243.26, "total_tokens": 987664}
|
|
{"current_steps": 2460, "total_steps": 4810, "loss": 0.0002, "lr": 2.8373295657912947e-06, "epoch": 2.5571725571725574, "percentage": 51.14, "elapsed_time": "0:13:14", "remaining_time": "0:12:39", "throughput": 1245.26, "total_tokens": 989648}
|
|
{"current_steps": 2465, "total_steps": 4810, "loss": 0.0005, "lr": 2.828338965578603e-06, "epoch": 2.5623700623700625, "percentage": 51.25, "elapsed_time": "0:13:15", "remaining_time": "0:12:36", "throughput": 1247.33, "total_tokens": 991696}
|
|
{"current_steps": 2470, "total_steps": 4810, "loss": 0.2142, "lr": 2.8193440423530117e-06, "epoch": 2.5675675675675675, "percentage": 51.35, "elapsed_time": "0:13:15", "remaining_time": "0:12:33", "throughput": 1249.24, "total_tokens": 993616}
|
|
{"current_steps": 2475, "total_steps": 4810, "loss": 0.0503, "lr": 2.810344914544475e-06, "epoch": 2.5727650727650726, "percentage": 51.46, "elapsed_time": "0:13:15", "remaining_time": "0:12:30", "throughput": 1251.3, "total_tokens": 995664}
|
|
{"current_steps": 2480, "total_steps": 4810, "loss": 0.1017, "lr": 2.8013417006383078e-06, "epoch": 2.577962577962578, "percentage": 51.56, "elapsed_time": "0:13:16", "remaining_time": "0:12:27", "throughput": 1253.29, "total_tokens": 997648}
|
|
{"current_steps": 2485, "total_steps": 4810, "loss": 0.0796, "lr": 2.792334519173624e-06, "epoch": 2.583160083160083, "percentage": 51.66, "elapsed_time": "0:13:16", "remaining_time": "0:12:25", "throughput": 1255.35, "total_tokens": 999696}
|
|
{"current_steps": 2490, "total_steps": 4810, "loss": 0.1002, "lr": 2.7833234887417745e-06, "epoch": 2.5883575883575882, "percentage": 51.77, "elapsed_time": "0:13:16", "remaining_time": "0:12:22", "throughput": 1257.33, "total_tokens": 1001680}
|
|
{"current_steps": 2495, "total_steps": 4810, "loss": 0.0836, "lr": 2.774308727984787e-06, "epoch": 2.5935550935550937, "percentage": 51.87, "elapsed_time": "0:13:16", "remaining_time": "0:12:19", "throughput": 1259.39, "total_tokens": 1003728}
|
|
{"current_steps": 2500, "total_steps": 4810, "loss": 0.0495, "lr": 2.7652903555938047e-06, "epoch": 2.598752598752599, "percentage": 51.98, "elapsed_time": "0:13:17", "remaining_time": "0:12:16", "throughput": 1261.21, "total_tokens": 1005584}
|
|
{"current_steps": 2505, "total_steps": 4810, "loss": 0.061, "lr": 2.756268490307524e-06, "epoch": 2.603950103950104, "percentage": 52.08, "elapsed_time": "0:13:17", "remaining_time": "0:12:13", "throughput": 1263.34, "total_tokens": 1007696}
|
|
{"current_steps": 2510, "total_steps": 4810, "loss": 0.2945, "lr": 2.747243250910625e-06, "epoch": 2.609147609147609, "percentage": 52.18, "elapsed_time": "0:13:17", "remaining_time": "0:12:11", "throughput": 1265.32, "total_tokens": 1009680}
|
|
{"current_steps": 2515, "total_steps": 4810, "loss": 0.0414, "lr": 2.7382147562322175e-06, "epoch": 2.6143451143451144, "percentage": 52.29, "elapsed_time": "0:13:18", "remaining_time": "0:12:08", "throughput": 1267.38, "total_tokens": 1011728}
|
|
{"current_steps": 2520, "total_steps": 4810, "loss": 0.0023, "lr": 2.729183125144269e-06, "epoch": 2.6195426195426195, "percentage": 52.39, "elapsed_time": "0:13:18", "remaining_time": "0:12:05", "throughput": 1269.51, "total_tokens": 1013840}
|
|
{"current_steps": 2525, "total_steps": 4810, "loss": 0.1403, "lr": 2.7201484765600426e-06, "epoch": 2.624740124740125, "percentage": 52.49, "elapsed_time": "0:13:18", "remaining_time": "0:12:02", "throughput": 1271.48, "total_tokens": 1015824}
|
|
{"current_steps": 2530, "total_steps": 4810, "loss": 0.1792, "lr": 2.71111092943253e-06, "epoch": 2.62993762993763, "percentage": 52.6, "elapsed_time": "0:13:19", "remaining_time": "0:12:00", "throughput": 1273.38, "total_tokens": 1017744}
|
|
{"current_steps": 2535, "total_steps": 4810, "loss": 0.0616, "lr": 2.702070602752887e-06, "epoch": 2.635135135135135, "percentage": 52.7, "elapsed_time": "0:13:19", "remaining_time": "0:11:57", "throughput": 1275.35, "total_tokens": 1019728}
|
|
{"current_steps": 2540, "total_steps": 4810, "loss": 0.0836, "lr": 2.693027615548864e-06, "epoch": 2.64033264033264, "percentage": 52.81, "elapsed_time": "0:13:19", "remaining_time": "0:11:54", "throughput": 1277.48, "total_tokens": 1021840}
|
|
{"current_steps": 2545, "total_steps": 4810, "loss": 0.0909, "lr": 2.6839820868832433e-06, "epoch": 2.6455301455301456, "percentage": 52.91, "elapsed_time": "0:13:20", "remaining_time": "0:11:52", "throughput": 1279.44, "total_tokens": 1023824}
|
|
{"current_steps": 2550, "total_steps": 4810, "loss": 0.0143, "lr": 2.6749341358522675e-06, "epoch": 2.6507276507276507, "percentage": 53.01, "elapsed_time": "0:13:20", "remaining_time": "0:11:49", "throughput": 1281.17, "total_tokens": 1025616}
|
|
{"current_steps": 2555, "total_steps": 4810, "loss": 0.0105, "lr": 2.665883881584072e-06, "epoch": 2.6559251559251558, "percentage": 53.12, "elapsed_time": "0:13:20", "remaining_time": "0:11:46", "throughput": 1283.22, "total_tokens": 1027664}
|
|
{"current_steps": 2560, "total_steps": 4810, "loss": 0.0167, "lr": 2.6568314432371183e-06, "epoch": 2.6611226611226613, "percentage": 53.22, "elapsed_time": "0:13:21", "remaining_time": "0:11:44", "throughput": 1285.18, "total_tokens": 1029648}
|
|
{"current_steps": 2565, "total_steps": 4810, "loss": 0.0354, "lr": 2.647776939998625e-06, "epoch": 2.6663201663201663, "percentage": 53.33, "elapsed_time": "0:13:21", "remaining_time": "0:11:41", "throughput": 1287.15, "total_tokens": 1031632}
|
|
{"current_steps": 2570, "total_steps": 4810, "loss": 0.0416, "lr": 2.6387204910829954e-06, "epoch": 2.6715176715176714, "percentage": 53.43, "elapsed_time": "0:13:21", "remaining_time": "0:11:38", "throughput": 1288.95, "total_tokens": 1033488}
|
|
{"current_steps": 2575, "total_steps": 4810, "loss": 0.0011, "lr": 2.629662215730253e-06, "epoch": 2.6767151767151764, "percentage": 53.53, "elapsed_time": "0:13:22", "remaining_time": "0:11:36", "throughput": 1290.99, "total_tokens": 1035536}
|
|
{"current_steps": 2580, "total_steps": 4810, "loss": 0.0636, "lr": 2.620602233204467e-06, "epoch": 2.681912681912682, "percentage": 53.64, "elapsed_time": "0:13:22", "remaining_time": "0:11:33", "throughput": 1293.02, "total_tokens": 1037584}
|
|
{"current_steps": 2585, "total_steps": 4810, "loss": 0.1506, "lr": 2.6115406627921823e-06, "epoch": 2.687110187110187, "percentage": 53.74, "elapsed_time": "0:13:22", "remaining_time": "0:11:30", "throughput": 1294.98, "total_tokens": 1039568}
|
|
{"current_steps": 2590, "total_steps": 4810, "loss": 0.0269, "lr": 2.6024776238008543e-06, "epoch": 2.6923076923076925, "percentage": 53.85, "elapsed_time": "0:13:23", "remaining_time": "0:11:28", "throughput": 1297.01, "total_tokens": 1041616}
|
|
{"current_steps": 2595, "total_steps": 4810, "loss": 0.1038, "lr": 2.5934132355572713e-06, "epoch": 2.6975051975051976, "percentage": 53.95, "elapsed_time": "0:13:23", "remaining_time": "0:11:25", "throughput": 1299.04, "total_tokens": 1043664}
|
|
{"current_steps": 2600, "total_steps": 4810, "loss": 0.159, "lr": 2.5843476174059874e-06, "epoch": 2.7027027027027026, "percentage": 54.05, "elapsed_time": "0:13:23", "remaining_time": "0:11:23", "throughput": 1300.84, "total_tokens": 1045520}
|
|
{"current_steps": 2605, "total_steps": 4810, "loss": 0.1412, "lr": 2.575280888707748e-06, "epoch": 2.7079002079002077, "percentage": 54.16, "elapsed_time": "0:13:24", "remaining_time": "0:11:20", "throughput": 1302.63, "total_tokens": 1047376}
|
|
{"current_steps": 2610, "total_steps": 4810, "loss": 0.0029, "lr": 2.5662131688379244e-06, "epoch": 2.713097713097713, "percentage": 54.26, "elapsed_time": "0:13:24", "remaining_time": "0:11:18", "throughput": 1304.57, "total_tokens": 1049360}
|
|
{"current_steps": 2615, "total_steps": 4810, "loss": 0.054, "lr": 2.557144577184933e-06, "epoch": 2.7182952182952183, "percentage": 54.37, "elapsed_time": "0:13:24", "remaining_time": "0:11:15", "throughput": 1306.52, "total_tokens": 1051344}
|
|
{"current_steps": 2620, "total_steps": 4810, "loss": 0.0051, "lr": 2.5480752331486742e-06, "epoch": 2.7234927234927238, "percentage": 54.47, "elapsed_time": "0:13:25", "remaining_time": "0:11:12", "throughput": 1308.39, "total_tokens": 1053264}
|
|
{"current_steps": 2625, "total_steps": 4810, "loss": 0.0494, "lr": 2.539005256138948e-06, "epoch": 2.728690228690229, "percentage": 54.57, "elapsed_time": "0:13:25", "remaining_time": "0:11:10", "throughput": 1310.33, "total_tokens": 1055248}
|
|
{"current_steps": 2630, "total_steps": 4810, "loss": 0.0155, "lr": 2.529934765573893e-06, "epoch": 2.733887733887734, "percentage": 54.68, "elapsed_time": "0:13:25", "remaining_time": "0:11:07", "throughput": 1312.12, "total_tokens": 1057104}
|
|
{"current_steps": 2635, "total_steps": 4810, "loss": 0.0379, "lr": 2.520863880878408e-06, "epoch": 2.739085239085239, "percentage": 54.78, "elapsed_time": "0:13:25", "remaining_time": "0:11:05", "throughput": 1313.98, "total_tokens": 1059024}
|
|
{"current_steps": 2640, "total_steps": 4810, "loss": 0.2379, "lr": 2.511792721482581e-06, "epoch": 2.7442827442827444, "percentage": 54.89, "elapsed_time": "0:13:26", "remaining_time": "0:11:02", "throughput": 1315.84, "total_tokens": 1060944}
|
|
{"current_steps": 2645, "total_steps": 4810, "loss": 0.038, "lr": 2.502721406820116e-06, "epoch": 2.7494802494802495, "percentage": 54.99, "elapsed_time": "0:13:26", "remaining_time": "0:11:00", "throughput": 1317.86, "total_tokens": 1062992}
|
|
{"current_steps": 2650, "total_steps": 4810, "loss": 0.0705, "lr": 2.493650056326763e-06, "epoch": 2.7546777546777546, "percentage": 55.09, "elapsed_time": "0:13:26", "remaining_time": "0:10:57", "throughput": 1319.64, "total_tokens": 1064848}
|
|
{"current_steps": 2651, "total_steps": 4810, "eval_loss": 0.306118369102478, "epoch": 2.7557172557172556, "percentage": 55.11, "elapsed_time": "0:13:28", "remaining_time": "0:10:58", "throughput": 1318.26, "total_tokens": 1065232}
|
|
{"current_steps": 2655, "total_steps": 4810, "loss": 0.2106, "lr": 2.4845787894387427e-06, "epoch": 2.75987525987526, "percentage": 55.2, "elapsed_time": "0:15:04", "remaining_time": "0:12:14", "throughput": 1179.21, "total_tokens": 1066832}
|
|
{"current_steps": 2660, "total_steps": 4810, "loss": 0.0032, "lr": 2.4755077255911746e-06, "epoch": 2.765072765072765, "percentage": 55.3, "elapsed_time": "0:15:05", "remaining_time": "0:12:11", "throughput": 1181.05, "total_tokens": 1068880}
|
|
{"current_steps": 2665, "total_steps": 4810, "loss": 0.151, "lr": 2.466436984216507e-06, "epoch": 2.77027027027027, "percentage": 55.41, "elapsed_time": "0:15:05", "remaining_time": "0:12:08", "throughput": 1182.82, "total_tokens": 1070864}
|
|
{"current_steps": 2670, "total_steps": 4810, "loss": 0.1102, "lr": 2.4573666847429383e-06, "epoch": 2.7754677754677752, "percentage": 55.51, "elapsed_time": "0:15:05", "remaining_time": "0:12:05", "throughput": 1184.59, "total_tokens": 1072848}
|
|
{"current_steps": 2675, "total_steps": 4810, "loss": 0.0628, "lr": 2.4482969465928545e-06, "epoch": 2.7806652806652807, "percentage": 55.61, "elapsed_time": "0:15:05", "remaining_time": "0:12:03", "throughput": 1186.36, "total_tokens": 1074832}
|
|
{"current_steps": 2680, "total_steps": 4810, "loss": 0.002, "lr": 2.4392278891812457e-06, "epoch": 2.785862785862786, "percentage": 55.72, "elapsed_time": "0:15:06", "remaining_time": "0:12:00", "throughput": 1188.27, "total_tokens": 1076944}
|
|
{"current_steps": 2685, "total_steps": 4810, "loss": 0.0233, "lr": 2.430159631914141e-06, "epoch": 2.7910602910602913, "percentage": 55.82, "elapsed_time": "0:15:06", "remaining_time": "0:11:57", "throughput": 1189.89, "total_tokens": 1078800}
|
|
{"current_steps": 2690, "total_steps": 4810, "loss": 0.1463, "lr": 2.421092294187037e-06, "epoch": 2.7962577962577964, "percentage": 55.93, "elapsed_time": "0:15:06", "remaining_time": "0:11:54", "throughput": 1191.8, "total_tokens": 1080912}
|
|
{"current_steps": 2695, "total_steps": 4810, "loss": 0.0068, "lr": 2.41202599538332e-06, "epoch": 2.8014553014553014, "percentage": 56.03, "elapsed_time": "0:15:07", "remaining_time": "0:11:52", "throughput": 1193.63, "total_tokens": 1082960}
|
|
{"current_steps": 2700, "total_steps": 4810, "loss": 0.0591, "lr": 2.402960854872697e-06, "epoch": 2.8066528066528065, "percentage": 56.13, "elapsed_time": "0:15:07", "remaining_time": "0:11:49", "throughput": 1195.46, "total_tokens": 1085008}
|
|
{"current_steps": 2705, "total_steps": 4810, "loss": 0.0729, "lr": 2.39389699200963e-06, "epoch": 2.811850311850312, "percentage": 56.24, "elapsed_time": "0:15:07", "remaining_time": "0:11:46", "throughput": 1197.43, "total_tokens": 1087184}
|
|
{"current_steps": 2710, "total_steps": 4810, "loss": 0.0013, "lr": 2.3848345261317523e-06, "epoch": 2.817047817047817, "percentage": 56.34, "elapsed_time": "0:15:08", "remaining_time": "0:11:43", "throughput": 1199.12, "total_tokens": 1089104}
|
|
{"current_steps": 2715, "total_steps": 4810, "loss": 0.1587, "lr": 2.3757735765583083e-06, "epoch": 2.822245322245322, "percentage": 56.44, "elapsed_time": "0:15:08", "remaining_time": "0:11:41", "throughput": 1200.81, "total_tokens": 1091024}
|
|
{"current_steps": 2720, "total_steps": 4810, "loss": 0.0685, "lr": 2.3667142625885774e-06, "epoch": 2.8274428274428276, "percentage": 56.55, "elapsed_time": "0:15:08", "remaining_time": "0:11:38", "throughput": 1202.56, "total_tokens": 1093008}
|
|
{"current_steps": 2725, "total_steps": 4810, "loss": 0.0005, "lr": 2.357656703500303e-06, "epoch": 2.8326403326403327, "percentage": 56.65, "elapsed_time": "0:15:09", "remaining_time": "0:11:35", "throughput": 1204.32, "total_tokens": 1094992}
|
|
{"current_steps": 2730, "total_steps": 4810, "loss": 0.0003, "lr": 2.3486010185481247e-06, "epoch": 2.8378378378378377, "percentage": 56.76, "elapsed_time": "0:15:09", "remaining_time": "0:11:32", "throughput": 1206.15, "total_tokens": 1097040}
|
|
{"current_steps": 2735, "total_steps": 4810, "loss": 0.1532, "lr": 2.3395473269620055e-06, "epoch": 2.8430353430353428, "percentage": 56.86, "elapsed_time": "0:15:09", "remaining_time": "0:11:30", "throughput": 1207.82, "total_tokens": 1098960}
|
|
{"current_steps": 2740, "total_steps": 4810, "loss": 0.0005, "lr": 2.330495747945665e-06, "epoch": 2.8482328482328483, "percentage": 56.96, "elapsed_time": "0:15:10", "remaining_time": "0:11:27", "throughput": 1209.85, "total_tokens": 1101200}
|
|
{"current_steps": 2745, "total_steps": 4810, "loss": 0.1635, "lr": 2.321446400675005e-06, "epoch": 2.8534303534303533, "percentage": 57.07, "elapsed_time": "0:15:10", "remaining_time": "0:11:24", "throughput": 1211.53, "total_tokens": 1103120}
|
|
{"current_steps": 2750, "total_steps": 4810, "loss": 0.0648, "lr": 2.3123994042965454e-06, "epoch": 2.858627858627859, "percentage": 57.17, "elapsed_time": "0:15:10", "remaining_time": "0:11:22", "throughput": 1213.35, "total_tokens": 1105168}
|
|
{"current_steps": 2755, "total_steps": 4810, "loss": 0.0463, "lr": 2.3033548779258535e-06, "epoch": 2.863825363825364, "percentage": 57.28, "elapsed_time": "0:15:11", "remaining_time": "0:11:19", "throughput": 1215.1, "total_tokens": 1107152}
|
|
{"current_steps": 2760, "total_steps": 4810, "loss": 0.2765, "lr": 2.2943129406459754e-06, "epoch": 2.869022869022869, "percentage": 57.38, "elapsed_time": "0:15:11", "remaining_time": "0:11:17", "throughput": 1216.91, "total_tokens": 1109200}
|
|
{"current_steps": 2765, "total_steps": 4810, "loss": 0.2216, "lr": 2.2852737115058684e-06, "epoch": 2.874220374220374, "percentage": 57.48, "elapsed_time": "0:15:11", "remaining_time": "0:11:14", "throughput": 1218.72, "total_tokens": 1111248}
|
|
{"current_steps": 2770, "total_steps": 4810, "loss": 0.1188, "lr": 2.2762373095188344e-06, "epoch": 2.8794178794178795, "percentage": 57.59, "elapsed_time": "0:15:12", "remaining_time": "0:11:11", "throughput": 1220.47, "total_tokens": 1113232}
|
|
{"current_steps": 2775, "total_steps": 4810, "loss": 0.0557, "lr": 2.2672038536609487e-06, "epoch": 2.8846153846153846, "percentage": 57.69, "elapsed_time": "0:15:12", "remaining_time": "0:11:09", "throughput": 1222.21, "total_tokens": 1115216}
|
|
{"current_steps": 2780, "total_steps": 4810, "loss": 0.0011, "lr": 2.2581734628695034e-06, "epoch": 2.88981288981289, "percentage": 57.8, "elapsed_time": "0:15:12", "remaining_time": "0:11:06", "throughput": 1224.02, "total_tokens": 1117264}
|
|
{"current_steps": 2785, "total_steps": 4810, "loss": 0.1068, "lr": 2.2491462560414287e-06, "epoch": 2.895010395010395, "percentage": 57.9, "elapsed_time": "0:15:13", "remaining_time": "0:11:03", "throughput": 1225.9, "total_tokens": 1119376}
|
|
{"current_steps": 2790, "total_steps": 4810, "loss": 0.1178, "lr": 2.2401223520317363e-06, "epoch": 2.9002079002079, "percentage": 58.0, "elapsed_time": "0:15:13", "remaining_time": "0:11:01", "throughput": 1227.71, "total_tokens": 1121424}
|
|
{"current_steps": 2795, "total_steps": 4810, "loss": 0.0582, "lr": 2.2311018696519532e-06, "epoch": 2.9054054054054053, "percentage": 58.11, "elapsed_time": "0:15:13", "remaining_time": "0:10:58", "throughput": 1229.52, "total_tokens": 1123472}
|
|
{"current_steps": 2800, "total_steps": 4810, "loss": 0.0007, "lr": 2.2220849276685533e-06, "epoch": 2.9106029106029108, "percentage": 58.21, "elapsed_time": "0:15:14", "remaining_time": "0:10:56", "throughput": 1231.4, "total_tokens": 1125584}
|
|
{"current_steps": 2805, "total_steps": 4810, "loss": 0.0783, "lr": 2.2130716448014e-06, "epoch": 2.915800415800416, "percentage": 58.32, "elapsed_time": "0:15:14", "remaining_time": "0:10:53", "throughput": 1233.13, "total_tokens": 1127568}
|
|
{"current_steps": 2810, "total_steps": 4810, "loss": 0.0946, "lr": 2.2040621397221762e-06, "epoch": 2.920997920997921, "percentage": 58.42, "elapsed_time": "0:15:14", "remaining_time": "0:10:51", "throughput": 1234.87, "total_tokens": 1129552}
|
|
{"current_steps": 2815, "total_steps": 4810, "loss": 0.0011, "lr": 2.1950565310528264e-06, "epoch": 2.9261954261954264, "percentage": 58.52, "elapsed_time": "0:15:15", "remaining_time": "0:10:48", "throughput": 1236.54, "total_tokens": 1131472}
|
|
{"current_steps": 2820, "total_steps": 4810, "loss": 0.0005, "lr": 2.186054937363996e-06, "epoch": 2.9313929313929314, "percentage": 58.63, "elapsed_time": "0:15:15", "remaining_time": "0:10:45", "throughput": 1238.21, "total_tokens": 1133392}
|
|
{"current_steps": 2825, "total_steps": 4810, "loss": 0.0004, "lr": 2.1770574771734644e-06, "epoch": 2.9365904365904365, "percentage": 58.73, "elapsed_time": "0:15:15", "remaining_time": "0:10:43", "throughput": 1240.01, "total_tokens": 1135440}
|
|
{"current_steps": 2830, "total_steps": 4810, "loss": 0.0037, "lr": 2.168064268944591e-06, "epoch": 2.9417879417879416, "percentage": 58.84, "elapsed_time": "0:15:15", "remaining_time": "0:10:40", "throughput": 1241.74, "total_tokens": 1137424}
|
|
{"current_steps": 2835, "total_steps": 4810, "loss": 0.018, "lr": 2.1590754310847513e-06, "epoch": 2.946985446985447, "percentage": 58.94, "elapsed_time": "0:15:16", "remaining_time": "0:10:38", "throughput": 1243.48, "total_tokens": 1139408}
|
|
{"current_steps": 2840, "total_steps": 4810, "loss": 0.1722, "lr": 2.150091081943777e-06, "epoch": 2.952182952182952, "percentage": 59.04, "elapsed_time": "0:15:16", "remaining_time": "0:10:35", "throughput": 1245.28, "total_tokens": 1141456}
|
|
{"current_steps": 2845, "total_steps": 4810, "loss": 0.1002, "lr": 2.141111339812405e-06, "epoch": 2.9573804573804576, "percentage": 59.15, "elapsed_time": "0:15:16", "remaining_time": "0:10:33", "throughput": 1247.01, "total_tokens": 1143440}
|
|
{"current_steps": 2850, "total_steps": 4810, "loss": 0.0783, "lr": 2.1321363229207097e-06, "epoch": 2.9625779625779627, "percentage": 59.25, "elapsed_time": "0:15:17", "remaining_time": "0:10:30", "throughput": 1248.67, "total_tokens": 1145360}
|
|
{"current_steps": 2855, "total_steps": 4810, "loss": 0.1061, "lr": 2.123166149436556e-06, "epoch": 2.9677754677754677, "percentage": 59.36, "elapsed_time": "0:15:17", "remaining_time": "0:10:28", "throughput": 1250.33, "total_tokens": 1147280}
|
|
{"current_steps": 2860, "total_steps": 4810, "loss": 0.1705, "lr": 2.114200937464035e-06, "epoch": 2.972972972972973, "percentage": 59.46, "elapsed_time": "0:15:17", "remaining_time": "0:10:25", "throughput": 1251.99, "total_tokens": 1149200}
|
|
{"current_steps": 2865, "total_steps": 4810, "loss": 0.003, "lr": 2.1052408050419153e-06, "epoch": 2.9781704781704783, "percentage": 59.56, "elapsed_time": "0:15:18", "remaining_time": "0:10:23", "throughput": 1253.71, "total_tokens": 1151184}
|
|
{"current_steps": 2870, "total_steps": 4810, "loss": 0.0952, "lr": 2.0962858701420867e-06, "epoch": 2.9833679833679834, "percentage": 59.67, "elapsed_time": "0:15:18", "remaining_time": "0:10:20", "throughput": 1255.51, "total_tokens": 1153232}
|
|
{"current_steps": 2875, "total_steps": 4810, "loss": 0.1992, "lr": 2.087336250668006e-06, "epoch": 2.9885654885654884, "percentage": 59.77, "elapsed_time": "0:15:18", "remaining_time": "0:10:18", "throughput": 1257.23, "total_tokens": 1155216}
|
|
{"current_steps": 2880, "total_steps": 4810, "loss": 0.1408, "lr": 2.0783920644531443e-06, "epoch": 2.993762993762994, "percentage": 59.88, "elapsed_time": "0:15:19", "remaining_time": "0:10:15", "throughput": 1259.02, "total_tokens": 1157264}
|
|
{"current_steps": 2885, "total_steps": 4810, "loss": 0.2101, "lr": 2.069453429259439e-06, "epoch": 2.998960498960499, "percentage": 59.98, "elapsed_time": "0:15:19", "remaining_time": "0:10:13", "throughput": 1260.81, "total_tokens": 1159312}
|
|
{"current_steps": 2890, "total_steps": 4810, "loss": 0.0016, "lr": 2.06052046277574e-06, "epoch": 3.004158004158004, "percentage": 60.08, "elapsed_time": "0:15:20", "remaining_time": "0:10:11", "throughput": 1261.75, "total_tokens": 1161248}
|
|
{"current_steps": 2892, "total_steps": 4810, "eval_loss": 0.2698093056678772, "epoch": 3.006237006237006, "percentage": 60.12, "elapsed_time": "0:15:21", "remaining_time": "0:10:11", "throughput": 1261.01, "total_tokens": 1162016}
|
|
{"current_steps": 2895, "total_steps": 4810, "loss": 0.0012, "lr": 2.051593282616262e-06, "epoch": 3.0093555093555096, "percentage": 60.19, "elapsed_time": "0:16:08", "remaining_time": "0:10:40", "throughput": 1201.34, "total_tokens": 1163168}
|
|
{"current_steps": 2900, "total_steps": 4810, "loss": 0.0559, "lr": 2.0426720063190335e-06, "epoch": 3.0145530145530146, "percentage": 60.29, "elapsed_time": "0:16:08", "remaining_time": "0:10:37", "throughput": 1202.92, "total_tokens": 1165088}
|
|
{"current_steps": 2905, "total_steps": 4810, "loss": 0.0012, "lr": 2.0337567513443518e-06, "epoch": 3.0197505197505197, "percentage": 60.4, "elapsed_time": "0:16:08", "remaining_time": "0:10:35", "throughput": 1204.64, "total_tokens": 1167136}
|
|
{"current_steps": 2910, "total_steps": 4810, "loss": 0.046, "lr": 2.0248476350732368e-06, "epoch": 3.024948024948025, "percentage": 60.5, "elapsed_time": "0:16:09", "remaining_time": "0:10:32", "throughput": 1206.28, "total_tokens": 1169120}
|
|
{"current_steps": 2915, "total_steps": 4810, "loss": 0.0235, "lr": 2.0159447748058803e-06, "epoch": 3.0301455301455302, "percentage": 60.6, "elapsed_time": "0:16:09", "remaining_time": "0:10:30", "throughput": 1207.87, "total_tokens": 1171040}
|
|
{"current_steps": 2920, "total_steps": 4810, "loss": 0.1135, "lr": 2.007048287760113e-06, "epoch": 3.0353430353430353, "percentage": 60.71, "elapsed_time": "0:16:09", "remaining_time": "0:10:27", "throughput": 1209.51, "total_tokens": 1173024}
|
|
{"current_steps": 2925, "total_steps": 4810, "loss": 0.0007, "lr": 1.998158291069845e-06, "epoch": 3.0405405405405403, "percentage": 60.81, "elapsed_time": "0:16:10", "remaining_time": "0:10:25", "throughput": 1211.09, "total_tokens": 1174944}
|
|
{"current_steps": 2930, "total_steps": 4810, "loss": 0.009, "lr": 1.989274901783538e-06, "epoch": 3.045738045738046, "percentage": 60.91, "elapsed_time": "0:16:10", "remaining_time": "0:10:22", "throughput": 1212.86, "total_tokens": 1177056}
|
|
{"current_steps": 2935, "total_steps": 4810, "loss": 0.0004, "lr": 1.9803982368626582e-06, "epoch": 3.050935550935551, "percentage": 61.02, "elapsed_time": "0:16:10", "remaining_time": "0:10:20", "throughput": 1214.44, "total_tokens": 1178976}
|
|
{"current_steps": 2940, "total_steps": 4810, "loss": 0.0007, "lr": 1.9715284131801353e-06, "epoch": 3.056133056133056, "percentage": 61.12, "elapsed_time": "0:16:11", "remaining_time": "0:10:17", "throughput": 1216.15, "total_tokens": 1181024}
|
|
{"current_steps": 2945, "total_steps": 4810, "loss": 0.0003, "lr": 1.9626655475188237e-06, "epoch": 3.0613305613305615, "percentage": 61.23, "elapsed_time": "0:16:11", "remaining_time": "0:10:15", "throughput": 1217.79, "total_tokens": 1183008}
|
|
{"current_steps": 2950, "total_steps": 4810, "loss": 0.0003, "lr": 1.953809756569971e-06, "epoch": 3.0665280665280665, "percentage": 61.33, "elapsed_time": "0:16:11", "remaining_time": "0:10:12", "throughput": 1219.49, "total_tokens": 1185056}
|
|
{"current_steps": 2955, "total_steps": 4810, "loss": 0.0623, "lr": 1.9449611569316716e-06, "epoch": 3.0717255717255716, "percentage": 61.43, "elapsed_time": "0:16:12", "remaining_time": "0:10:10", "throughput": 1221.07, "total_tokens": 1186976}
|
|
{"current_steps": 2960, "total_steps": 4810, "loss": 0.1065, "lr": 1.936119865107341e-06, "epoch": 3.076923076923077, "percentage": 61.54, "elapsed_time": "0:16:12", "remaining_time": "0:10:07", "throughput": 1222.7, "total_tokens": 1188960}
|
|
{"current_steps": 2965, "total_steps": 4810, "loss": 0.0002, "lr": 1.9272859975041757e-06, "epoch": 3.082120582120582, "percentage": 61.64, "elapsed_time": "0:16:12", "remaining_time": "0:10:05", "throughput": 1224.34, "total_tokens": 1190944}
|
|
{"current_steps": 2970, "total_steps": 4810, "loss": 0.0381, "lr": 1.918459670431622e-06, "epoch": 3.087318087318087, "percentage": 61.75, "elapsed_time": "0:16:13", "remaining_time": "0:10:02", "throughput": 1225.98, "total_tokens": 1192928}
|
|
{"current_steps": 2975, "total_steps": 4810, "loss": 0.0045, "lr": 1.9096410000998478e-06, "epoch": 3.0925155925155927, "percentage": 61.85, "elapsed_time": "0:16:13", "remaining_time": "0:10:00", "throughput": 1227.55, "total_tokens": 1194848}
|
|
{"current_steps": 2980, "total_steps": 4810, "loss": 0.0019, "lr": 1.9008301026182064e-06, "epoch": 3.0977130977130978, "percentage": 61.95, "elapsed_time": "0:16:13", "remaining_time": "0:09:57", "throughput": 1229.12, "total_tokens": 1196768}
|
|
{"current_steps": 2985, "total_steps": 4810, "loss": 0.0002, "lr": 1.892027093993716e-06, "epoch": 3.102910602910603, "percentage": 62.06, "elapsed_time": "0:16:14", "remaining_time": "0:09:55", "throughput": 1230.68, "total_tokens": 1198688}
|
|
{"current_steps": 2990, "total_steps": 4810, "loss": 0.0002, "lr": 1.883232090129523e-06, "epoch": 3.108108108108108, "percentage": 62.16, "elapsed_time": "0:16:14", "remaining_time": "0:09:53", "throughput": 1232.31, "total_tokens": 1200672}
|
|
{"current_steps": 2995, "total_steps": 4810, "loss": 0.0713, "lr": 1.8744452068233826e-06, "epoch": 3.1133056133056134, "percentage": 62.27, "elapsed_time": "0:16:14", "remaining_time": "0:09:50", "throughput": 1234.01, "total_tokens": 1202720}
|
|
{"current_steps": 3000, "total_steps": 4810, "loss": 0.0002, "lr": 1.8656665597661334e-06, "epoch": 3.1185031185031185, "percentage": 62.37, "elapsed_time": "0:16:14", "remaining_time": "0:09:48", "throughput": 1235.7, "total_tokens": 1204768}
|
|
{"current_steps": 3005, "total_steps": 4810, "loss": 0.0001, "lr": 1.8568962645401702e-06, "epoch": 3.1237006237006235, "percentage": 62.47, "elapsed_time": "0:16:15", "remaining_time": "0:09:45", "throughput": 1237.53, "total_tokens": 1206944}
|
|
{"current_steps": 3010, "total_steps": 4810, "loss": 0.095, "lr": 1.8481344366179284e-06, "epoch": 3.128898128898129, "percentage": 62.58, "elapsed_time": "0:16:15", "remaining_time": "0:09:43", "throughput": 1239.28, "total_tokens": 1209056}
|
|
{"current_steps": 3015, "total_steps": 4810, "loss": 0.0002, "lr": 1.8393811913603583e-06, "epoch": 3.134095634095634, "percentage": 62.68, "elapsed_time": "0:16:15", "remaining_time": "0:09:41", "throughput": 1240.84, "total_tokens": 1210976}
|
|
{"current_steps": 3020, "total_steps": 4810, "loss": 0.0002, "lr": 1.8306366440154067e-06, "epoch": 3.139293139293139, "percentage": 62.79, "elapsed_time": "0:16:16", "remaining_time": "0:09:38", "throughput": 1242.53, "total_tokens": 1213024}
|
|
{"current_steps": 3025, "total_steps": 4810, "loss": 0.0302, "lr": 1.8219009097165042e-06, "epoch": 3.1444906444906446, "percentage": 62.89, "elapsed_time": "0:16:16", "remaining_time": "0:09:36", "throughput": 1244.29, "total_tokens": 1215136}
|
|
{"current_steps": 3030, "total_steps": 4810, "loss": 0.0004, "lr": 1.8131741034810436e-06, "epoch": 3.1496881496881497, "percentage": 62.99, "elapsed_time": "0:16:16", "remaining_time": "0:09:33", "throughput": 1245.85, "total_tokens": 1217056}
|
|
{"current_steps": 3035, "total_steps": 4810, "loss": 0.0003, "lr": 1.8044563402088686e-06, "epoch": 3.1548856548856548, "percentage": 63.1, "elapsed_time": "0:16:17", "remaining_time": "0:09:31", "throughput": 1247.6, "total_tokens": 1219168}
|
|
{"current_steps": 3040, "total_steps": 4810, "loss": 0.0002, "lr": 1.7957477346807622e-06, "epoch": 3.1600831600831603, "percentage": 63.2, "elapsed_time": "0:16:17", "remaining_time": "0:09:29", "throughput": 1249.16, "total_tokens": 1221088}
|
|
{"current_steps": 3045, "total_steps": 4810, "loss": 0.0002, "lr": 1.7870484015569306e-06, "epoch": 3.1652806652806653, "percentage": 63.31, "elapsed_time": "0:16:17", "remaining_time": "0:09:26", "throughput": 1250.97, "total_tokens": 1223264}
|
|
{"current_steps": 3050, "total_steps": 4810, "loss": 0.0002, "lr": 1.7783584553755007e-06, "epoch": 3.1704781704781704, "percentage": 63.41, "elapsed_time": "0:16:18", "remaining_time": "0:09:24", "throughput": 1252.78, "total_tokens": 1225440}
|
|
{"current_steps": 3055, "total_steps": 4810, "loss": 0.0864, "lr": 1.769678010551003e-06, "epoch": 3.175675675675676, "percentage": 63.51, "elapsed_time": "0:16:18", "remaining_time": "0:09:22", "throughput": 1254.4, "total_tokens": 1227424}
|
|
{"current_steps": 3060, "total_steps": 4810, "loss": 0.0793, "lr": 1.7610071813728741e-06, "epoch": 3.180873180873181, "percentage": 63.62, "elapsed_time": "0:16:18", "remaining_time": "0:09:19", "throughput": 1255.95, "total_tokens": 1229344}
|
|
{"current_steps": 3065, "total_steps": 4810, "loss": 0.0974, "lr": 1.7523460820039466e-06, "epoch": 3.186070686070686, "percentage": 63.72, "elapsed_time": "0:16:19", "remaining_time": "0:09:17", "throughput": 1257.7, "total_tokens": 1231456}
|
|
{"current_steps": 3070, "total_steps": 4810, "loss": 0.0003, "lr": 1.7436948264789465e-06, "epoch": 3.1912681912681915, "percentage": 63.83, "elapsed_time": "0:16:19", "remaining_time": "0:09:15", "throughput": 1259.31, "total_tokens": 1233440}
|
|
{"current_steps": 3075, "total_steps": 4810, "loss": 0.0779, "lr": 1.7350535287029957e-06, "epoch": 3.1964656964656966, "percentage": 63.93, "elapsed_time": "0:16:19", "remaining_time": "0:09:12", "throughput": 1261.05, "total_tokens": 1235552}
|
|
{"current_steps": 3080, "total_steps": 4810, "loss": 0.152, "lr": 1.7264223024501064e-06, "epoch": 3.2016632016632016, "percentage": 64.03, "elapsed_time": "0:16:20", "remaining_time": "0:09:10", "throughput": 1262.67, "total_tokens": 1237536}
|
|
{"current_steps": 3085, "total_steps": 4810, "loss": 0.0004, "lr": 1.717801261361685e-06, "epoch": 3.2068607068607067, "percentage": 64.14, "elapsed_time": "0:16:20", "remaining_time": "0:09:08", "throughput": 1264.34, "total_tokens": 1239584}
|
|
{"current_steps": 3090, "total_steps": 4810, "loss": 0.0013, "lr": 1.7091905189450425e-06, "epoch": 3.212058212058212, "percentage": 64.24, "elapsed_time": "0:16:20", "remaining_time": "0:09:05", "throughput": 1265.89, "total_tokens": 1241504}
|
|
{"current_steps": 3095, "total_steps": 4810, "loss": 0.0375, "lr": 1.700590188571887e-06, "epoch": 3.2172557172557172, "percentage": 64.35, "elapsed_time": "0:16:21", "remaining_time": "0:09:03", "throughput": 1267.56, "total_tokens": 1243552}
|
|
{"current_steps": 3100, "total_steps": 4810, "loss": 0.0002, "lr": 1.6920003834768438e-06, "epoch": 3.2224532224532223, "percentage": 64.45, "elapsed_time": "0:16:21", "remaining_time": "0:09:01", "throughput": 1269.23, "total_tokens": 1245600}
|
|
{"current_steps": 3105, "total_steps": 4810, "loss": 0.0002, "lr": 1.6834212167559578e-06, "epoch": 3.227650727650728, "percentage": 64.55, "elapsed_time": "0:16:21", "remaining_time": "0:08:59", "throughput": 1270.97, "total_tokens": 1247712}
|
|
{"current_steps": 3110, "total_steps": 4810, "loss": 0.031, "lr": 1.6748528013652032e-06, "epoch": 3.232848232848233, "percentage": 64.66, "elapsed_time": "0:16:22", "remaining_time": "0:08:56", "throughput": 1272.57, "total_tokens": 1249696}
|
|
{"current_steps": 3115, "total_steps": 4810, "loss": 0.0647, "lr": 1.6662952501190032e-06, "epoch": 3.238045738045738, "percentage": 64.76, "elapsed_time": "0:16:22", "remaining_time": "0:08:54", "throughput": 1274.31, "total_tokens": 1251808}
|
|
{"current_steps": 3120, "total_steps": 4810, "loss": 0.0462, "lr": 1.6577486756887376e-06, "epoch": 3.2432432432432434, "percentage": 64.86, "elapsed_time": "0:16:22", "remaining_time": "0:08:52", "throughput": 1275.85, "total_tokens": 1253728}
|
|
{"current_steps": 3125, "total_steps": 4810, "loss": 0.0289, "lr": 1.6492131906012608e-06, "epoch": 3.2484407484407485, "percentage": 64.97, "elapsed_time": "0:16:22", "remaining_time": "0:08:50", "throughput": 1277.58, "total_tokens": 1255840}
|
|
{"current_steps": 3130, "total_steps": 4810, "loss": 0.0469, "lr": 1.640688907237425e-06, "epoch": 3.2536382536382535, "percentage": 65.07, "elapsed_time": "0:16:23", "remaining_time": "0:08:47", "throughput": 1279.24, "total_tokens": 1257888}
|
|
{"current_steps": 3133, "total_steps": 4810, "eval_loss": 0.36025160551071167, "epoch": 3.2567567567567566, "percentage": 65.14, "elapsed_time": "0:16:24", "remaining_time": "0:08:47", "throughput": 1278.91, "total_tokens": 1259168}
|
|
{"current_steps": 3135, "total_steps": 4810, "loss": 0.0767, "lr": 1.632175937830594e-06, "epoch": 3.258835758835759, "percentage": 65.18, "elapsed_time": "0:16:58", "remaining_time": "0:09:04", "throughput": 1237.0, "total_tokens": 1259936}
|
|
{"current_steps": 3140, "total_steps": 4810, "loss": 0.0504, "lr": 1.6236743944651703e-06, "epoch": 3.264033264033264, "percentage": 65.28, "elapsed_time": "0:16:58", "remaining_time": "0:09:01", "throughput": 1238.74, "total_tokens": 1262112}
|
|
{"current_steps": 3145, "total_steps": 4810, "loss": 0.0185, "lr": 1.6151843890751172e-06, "epoch": 3.269230769230769, "percentage": 65.38, "elapsed_time": "0:16:59", "remaining_time": "0:08:59", "throughput": 1240.11, "total_tokens": 1263904}
|
|
{"current_steps": 3150, "total_steps": 4810, "loss": 0.0131, "lr": 1.6067060334424836e-06, "epoch": 3.274428274428274, "percentage": 65.49, "elapsed_time": "0:16:59", "remaining_time": "0:08:57", "throughput": 1241.73, "total_tokens": 1265952}
|
|
{"current_steps": 3155, "total_steps": 4810, "loss": 0.0002, "lr": 1.5982394391959382e-06, "epoch": 3.2796257796257797, "percentage": 65.59, "elapsed_time": "0:16:59", "remaining_time": "0:08:54", "throughput": 1243.21, "total_tokens": 1267872}
|
|
{"current_steps": 3160, "total_steps": 4810, "loss": 0.0937, "lr": 1.5897847178092902e-06, "epoch": 3.284823284823285, "percentage": 65.7, "elapsed_time": "0:17:00", "remaining_time": "0:08:52", "throughput": 1244.71, "total_tokens": 1269792}
|
|
{"current_steps": 3165, "total_steps": 4810, "loss": 0.0014, "lr": 1.5813419806000329e-06, "epoch": 3.29002079002079, "percentage": 65.8, "elapsed_time": "0:17:00", "remaining_time": "0:08:50", "throughput": 1246.26, "total_tokens": 1271776}
|
|
{"current_steps": 3170, "total_steps": 4810, "loss": 0.0785, "lr": 1.5729113387278675e-06, "epoch": 3.2952182952182953, "percentage": 65.9, "elapsed_time": "0:17:00", "remaining_time": "0:08:48", "throughput": 1247.81, "total_tokens": 1273760}
|
|
{"current_steps": 3175, "total_steps": 4810, "loss": 0.1213, "lr": 1.5644929031932455e-06, "epoch": 3.3004158004158004, "percentage": 66.01, "elapsed_time": "0:17:01", "remaining_time": "0:08:45", "throughput": 1249.42, "total_tokens": 1275808}
|
|
{"current_steps": 3180, "total_steps": 4810, "loss": 0.0576, "lr": 1.556086784835908e-06, "epoch": 3.3056133056133055, "percentage": 66.11, "elapsed_time": "0:17:01", "remaining_time": "0:08:43", "throughput": 1250.97, "total_tokens": 1277792}
|
|
{"current_steps": 3185, "total_steps": 4810, "loss": 0.0004, "lr": 1.547693094333421e-06, "epoch": 3.310810810810811, "percentage": 66.22, "elapsed_time": "0:17:01", "remaining_time": "0:08:41", "throughput": 1252.52, "total_tokens": 1279776}
|
|
{"current_steps": 3190, "total_steps": 4810, "loss": 0.1482, "lr": 1.5393119421997252e-06, "epoch": 3.316008316008316, "percentage": 66.32, "elapsed_time": "0:17:02", "remaining_time": "0:08:39", "throughput": 1254.07, "total_tokens": 1281760}
|
|
{"current_steps": 3195, "total_steps": 4810, "loss": 0.0042, "lr": 1.5309434387836737e-06, "epoch": 3.321205821205821, "percentage": 66.42, "elapsed_time": "0:17:02", "remaining_time": "0:08:36", "throughput": 1255.61, "total_tokens": 1283744}
|
|
{"current_steps": 3200, "total_steps": 4810, "loss": 0.0005, "lr": 1.5225876942675844e-06, "epoch": 3.3264033264033266, "percentage": 66.53, "elapsed_time": "0:17:02", "remaining_time": "0:08:34", "throughput": 1257.22, "total_tokens": 1285792}
|
|
{"current_steps": 3205, "total_steps": 4810, "loss": 0.0525, "lr": 1.514244818665788e-06, "epoch": 3.3316008316008316, "percentage": 66.63, "elapsed_time": "0:17:03", "remaining_time": "0:08:32", "throughput": 1258.77, "total_tokens": 1287776}
|
|
{"current_steps": 3210, "total_steps": 4810, "loss": 0.0002, "lr": 1.505914921823178e-06, "epoch": 3.3367983367983367, "percentage": 66.74, "elapsed_time": "0:17:03", "remaining_time": "0:08:30", "throughput": 1260.25, "total_tokens": 1289696}
|
|
{"current_steps": 3215, "total_steps": 4810, "loss": 0.0006, "lr": 1.497598113413766e-06, "epoch": 3.3419958419958418, "percentage": 66.84, "elapsed_time": "0:17:03", "remaining_time": "0:08:27", "throughput": 1261.79, "total_tokens": 1291680}
|
|
{"current_steps": 3220, "total_steps": 4810, "loss": 0.0003, "lr": 1.489294502939238e-06, "epoch": 3.3471933471933473, "percentage": 66.94, "elapsed_time": "0:17:04", "remaining_time": "0:08:25", "throughput": 1263.21, "total_tokens": 1293536}
|
|
{"current_steps": 3225, "total_steps": 4810, "loss": 0.0003, "lr": 1.4810041997275094e-06, "epoch": 3.3523908523908523, "percentage": 67.05, "elapsed_time": "0:17:04", "remaining_time": "0:08:23", "throughput": 1264.94, "total_tokens": 1295712}
|
|
{"current_steps": 3230, "total_steps": 4810, "loss": 0.0008, "lr": 1.4727273129312918e-06, "epoch": 3.357588357588358, "percentage": 67.15, "elapsed_time": "0:17:04", "remaining_time": "0:08:21", "throughput": 1266.54, "total_tokens": 1297760}
|
|
{"current_steps": 3235, "total_steps": 4810, "loss": 0.0001, "lr": 1.4644639515266484e-06, "epoch": 3.362785862785863, "percentage": 67.26, "elapsed_time": "0:17:04", "remaining_time": "0:08:19", "throughput": 1268.14, "total_tokens": 1299808}
|
|
{"current_steps": 3240, "total_steps": 4810, "loss": 0.0002, "lr": 1.4562142243115646e-06, "epoch": 3.367983367983368, "percentage": 67.36, "elapsed_time": "0:17:05", "remaining_time": "0:08:16", "throughput": 1269.8, "total_tokens": 1301920}
|
|
{"current_steps": 3245, "total_steps": 4810, "loss": 0.0054, "lr": 1.4479782399045152e-06, "epoch": 3.373180873180873, "percentage": 67.46, "elapsed_time": "0:17:05", "remaining_time": "0:08:14", "throughput": 1271.33, "total_tokens": 1303904}
|
|
{"current_steps": 3250, "total_steps": 4810, "loss": 0.0424, "lr": 1.43975610674303e-06, "epoch": 3.3783783783783785, "percentage": 67.57, "elapsed_time": "0:17:05", "remaining_time": "0:08:12", "throughput": 1272.87, "total_tokens": 1305888}
|
|
{"current_steps": 3255, "total_steps": 4810, "loss": 0.1061, "lr": 1.4315479330822711e-06, "epoch": 3.3835758835758836, "percentage": 67.67, "elapsed_time": "0:17:06", "remaining_time": "0:08:10", "throughput": 1274.59, "total_tokens": 1308064}
|
|
{"current_steps": 3260, "total_steps": 4810, "loss": 0.0016, "lr": 1.4233538269936042e-06, "epoch": 3.3887733887733886, "percentage": 67.78, "elapsed_time": "0:17:06", "remaining_time": "0:08:08", "throughput": 1276.12, "total_tokens": 1310048}
|
|
{"current_steps": 3265, "total_steps": 4810, "loss": 0.1162, "lr": 1.415173896363178e-06, "epoch": 3.393970893970894, "percentage": 67.88, "elapsed_time": "0:17:06", "remaining_time": "0:08:05", "throughput": 1277.59, "total_tokens": 1311968}
|
|
{"current_steps": 3270, "total_steps": 4810, "loss": 0.0003, "lr": 1.4070082488905034e-06, "epoch": 3.399168399168399, "percentage": 67.98, "elapsed_time": "0:17:07", "remaining_time": "0:08:03", "throughput": 1279.06, "total_tokens": 1313888}
|
|
{"current_steps": 3275, "total_steps": 4810, "loss": 0.0648, "lr": 1.3988569920870315e-06, "epoch": 3.4043659043659042, "percentage": 68.09, "elapsed_time": "0:17:07", "remaining_time": "0:08:01", "throughput": 1280.78, "total_tokens": 1316064}
|
|
{"current_steps": 3280, "total_steps": 4810, "loss": 0.0011, "lr": 1.3907202332747454e-06, "epoch": 3.4095634095634098, "percentage": 68.19, "elapsed_time": "0:17:07", "remaining_time": "0:07:59", "throughput": 1282.37, "total_tokens": 1318112}
|
|
{"current_steps": 3285, "total_steps": 4810, "loss": 0.0003, "lr": 1.3825980795847401e-06, "epoch": 3.414760914760915, "percentage": 68.3, "elapsed_time": "0:17:08", "remaining_time": "0:07:57", "throughput": 1283.78, "total_tokens": 1319968}
|
|
{"current_steps": 3290, "total_steps": 4810, "loss": 0.038, "lr": 1.3744906379558165e-06, "epoch": 3.41995841995842, "percentage": 68.4, "elapsed_time": "0:17:08", "remaining_time": "0:07:55", "throughput": 1285.37, "total_tokens": 1322016}
|
|
{"current_steps": 3295, "total_steps": 4810, "loss": 0.0009, "lr": 1.3663980151330734e-06, "epoch": 3.4251559251559254, "percentage": 68.5, "elapsed_time": "0:17:08", "remaining_time": "0:07:53", "throughput": 1286.83, "total_tokens": 1323936}
|
|
{"current_steps": 3300, "total_steps": 4810, "loss": 0.0241, "lr": 1.358320317666496e-06, "epoch": 3.4303534303534304, "percentage": 68.61, "elapsed_time": "0:17:09", "remaining_time": "0:07:50", "throughput": 1288.36, "total_tokens": 1325920}
|
|
{"current_steps": 3305, "total_steps": 4810, "loss": 0.0668, "lr": 1.350257651909562e-06, "epoch": 3.4355509355509355, "percentage": 68.71, "elapsed_time": "0:17:09", "remaining_time": "0:07:48", "throughput": 1289.82, "total_tokens": 1327840}
|
|
{"current_steps": 3310, "total_steps": 4810, "loss": 0.0001, "lr": 1.3422101240178365e-06, "epoch": 3.4407484407484406, "percentage": 68.81, "elapsed_time": "0:17:09", "remaining_time": "0:07:46", "throughput": 1291.29, "total_tokens": 1329760}
|
|
{"current_steps": 3315, "total_steps": 4810, "loss": 0.0002, "lr": 1.3341778399475714e-06, "epoch": 3.445945945945946, "percentage": 68.92, "elapsed_time": "0:17:10", "remaining_time": "0:07:44", "throughput": 1292.81, "total_tokens": 1331744}
|
|
{"current_steps": 3320, "total_steps": 4810, "loss": 0.0278, "lr": 1.3261609054543178e-06, "epoch": 3.451143451143451, "percentage": 69.02, "elapsed_time": "0:17:10", "remaining_time": "0:07:42", "throughput": 1294.4, "total_tokens": 1333792}
|
|
{"current_steps": 3325, "total_steps": 4810, "loss": 0.0412, "lr": 1.3181594260915263e-06, "epoch": 3.456340956340956, "percentage": 69.13, "elapsed_time": "0:17:10", "remaining_time": "0:07:40", "throughput": 1295.92, "total_tokens": 1335776}
|
|
{"current_steps": 3330, "total_steps": 4810, "loss": 0.0003, "lr": 1.3101735072091624e-06, "epoch": 3.4615384615384617, "percentage": 69.23, "elapsed_time": "0:17:11", "remaining_time": "0:07:38", "throughput": 1297.5, "total_tokens": 1337824}
|
|
{"current_steps": 3335, "total_steps": 4810, "loss": 0.0311, "lr": 1.3022032539523177e-06, "epoch": 3.4667359667359667, "percentage": 69.33, "elapsed_time": "0:17:11", "remaining_time": "0:07:36", "throughput": 1299.08, "total_tokens": 1339872}
|
|
{"current_steps": 3340, "total_steps": 4810, "loss": 0.0937, "lr": 1.2942487712598234e-06, "epoch": 3.471933471933472, "percentage": 69.44, "elapsed_time": "0:17:11", "remaining_time": "0:07:34", "throughput": 1300.66, "total_tokens": 1341920}
|
|
{"current_steps": 3345, "total_steps": 4810, "loss": 0.0176, "lr": 1.2863101638628716e-06, "epoch": 3.4771309771309773, "percentage": 69.54, "elapsed_time": "0:17:12", "remaining_time": "0:07:31", "throughput": 1302.18, "total_tokens": 1343904}
|
|
{"current_steps": 3350, "total_steps": 4810, "loss": 0.0738, "lr": 1.2783875362836373e-06, "epoch": 3.4823284823284824, "percentage": 69.65, "elapsed_time": "0:17:12", "remaining_time": "0:07:29", "throughput": 1303.76, "total_tokens": 1345952}
|
|
{"current_steps": 3355, "total_steps": 4810, "loss": 0.0394, "lr": 1.2704809928338957e-06, "epoch": 3.4875259875259874, "percentage": 69.75, "elapsed_time": "0:17:12", "remaining_time": "0:07:27", "throughput": 1305.46, "total_tokens": 1348128}
|
|
{"current_steps": 3360, "total_steps": 4810, "loss": 0.0012, "lr": 1.2625906376136582e-06, "epoch": 3.492723492723493, "percentage": 69.85, "elapsed_time": "0:17:13", "remaining_time": "0:07:25", "throughput": 1306.92, "total_tokens": 1350048}
|
|
{"current_steps": 3365, "total_steps": 4810, "loss": 0.1121, "lr": 1.2547165745097927e-06, "epoch": 3.497920997920998, "percentage": 69.96, "elapsed_time": "0:17:13", "remaining_time": "0:07:23", "throughput": 1308.37, "total_tokens": 1351968}
|
|
{"current_steps": 3370, "total_steps": 4810, "loss": 0.0682, "lr": 1.2468589071946632e-06, "epoch": 3.503118503118503, "percentage": 70.06, "elapsed_time": "0:17:13", "remaining_time": "0:07:21", "throughput": 1309.88, "total_tokens": 1353952}
|
|
{"current_steps": 3374, "total_steps": 4810, "eval_loss": 0.4127735495567322, "epoch": 3.507276507276507, "percentage": 70.15, "elapsed_time": "0:17:14", "remaining_time": "0:07:20", "throughput": 1309.81, "total_tokens": 1355552}
|
|
{"current_steps": 3375, "total_steps": 4810, "loss": 0.0726, "lr": 1.2390177391247616e-06, "epoch": 3.508316008316008, "percentage": 70.17, "elapsed_time": "0:18:52", "remaining_time": "0:08:01", "throughput": 1197.82, "total_tokens": 1356000}
|
|
{"current_steps": 3380, "total_steps": 4810, "loss": 0.1161, "lr": 1.2311931735393417e-06, "epoch": 3.5135135135135136, "percentage": 70.27, "elapsed_time": "0:18:52", "remaining_time": "0:07:59", "throughput": 1199.23, "total_tokens": 1357984}
|
|
{"current_steps": 3385, "total_steps": 4810, "loss": 0.0002, "lr": 1.2233853134590698e-06, "epoch": 3.5187110187110187, "percentage": 70.37, "elapsed_time": "0:18:52", "remaining_time": "0:07:56", "throughput": 1200.59, "total_tokens": 1359904}
|
|
{"current_steps": 3390, "total_steps": 4810, "loss": 0.0385, "lr": 1.2155942616846562e-06, "epoch": 3.523908523908524, "percentage": 70.48, "elapsed_time": "0:18:53", "remaining_time": "0:07:54", "throughput": 1202.06, "total_tokens": 1361952}
|
|
{"current_steps": 3395, "total_steps": 4810, "loss": 0.1318, "lr": 1.2078201207955122e-06, "epoch": 3.529106029106029, "percentage": 70.58, "elapsed_time": "0:18:53", "remaining_time": "0:07:52", "throughput": 1203.52, "total_tokens": 1364000}
|
|
{"current_steps": 3400, "total_steps": 4810, "loss": 0.0008, "lr": 1.2000629931483947e-06, "epoch": 3.5343035343035343, "percentage": 70.69, "elapsed_time": "0:18:53", "remaining_time": "0:07:50", "throughput": 1205.05, "total_tokens": 1366112}
|
|
{"current_steps": 3405, "total_steps": 4810, "loss": 0.0016, "lr": 1.1923229808760565e-06, "epoch": 3.5395010395010393, "percentage": 70.79, "elapsed_time": "0:18:53", "remaining_time": "0:07:47", "throughput": 1206.45, "total_tokens": 1368096}
|
|
{"current_steps": 3410, "total_steps": 4810, "loss": 0.0661, "lr": 1.1846001858859054e-06, "epoch": 3.544698544698545, "percentage": 70.89, "elapsed_time": "0:18:54", "remaining_time": "0:07:45", "throughput": 1207.97, "total_tokens": 1370208}
|
|
{"current_steps": 3415, "total_steps": 4810, "loss": 0.0004, "lr": 1.1768947098586628e-06, "epoch": 3.54989604989605, "percentage": 71.0, "elapsed_time": "0:18:54", "remaining_time": "0:07:43", "throughput": 1209.37, "total_tokens": 1372192}
|
|
{"current_steps": 3420, "total_steps": 4810, "loss": 0.0171, "lr": 1.1692066542470202e-06, "epoch": 3.555093555093555, "percentage": 71.1, "elapsed_time": "0:18:54", "remaining_time": "0:07:41", "throughput": 1210.83, "total_tokens": 1374240}
|
|
{"current_steps": 3425, "total_steps": 4810, "loss": 0.0003, "lr": 1.1615361202743088e-06, "epoch": 3.5602910602910605, "percentage": 71.21, "elapsed_time": "0:18:55", "remaining_time": "0:07:39", "throughput": 1212.18, "total_tokens": 1376160}
|
|
{"current_steps": 3430, "total_steps": 4810, "loss": 0.0008, "lr": 1.1538832089331628e-06, "epoch": 3.5654885654885655, "percentage": 71.31, "elapsed_time": "0:18:55", "remaining_time": "0:07:36", "throughput": 1213.64, "total_tokens": 1378208}
|
|
{"current_steps": 3435, "total_steps": 4810, "loss": 0.0007, "lr": 1.1462480209841928e-06, "epoch": 3.5706860706860706, "percentage": 71.41, "elapsed_time": "0:18:55", "remaining_time": "0:07:34", "throughput": 1215.04, "total_tokens": 1380192}
|
|
{"current_steps": 3440, "total_steps": 4810, "loss": 0.0491, "lr": 1.1386306569546578e-06, "epoch": 3.5758835758835756, "percentage": 71.52, "elapsed_time": "0:18:56", "remaining_time": "0:07:32", "throughput": 1216.61, "total_tokens": 1382368}
|
|
{"current_steps": 3445, "total_steps": 4810, "loss": 0.0002, "lr": 1.1310312171371394e-06, "epoch": 3.581081081081081, "percentage": 71.62, "elapsed_time": "0:18:56", "remaining_time": "0:07:30", "throughput": 1218.23, "total_tokens": 1384608}
|
|
{"current_steps": 3450, "total_steps": 4810, "loss": 0.1426, "lr": 1.123449801588226e-06, "epoch": 3.586278586278586, "percentage": 71.73, "elapsed_time": "0:18:56", "remaining_time": "0:07:28", "throughput": 1219.63, "total_tokens": 1386592}
|
|
{"current_steps": 3455, "total_steps": 4810, "loss": 0.098, "lr": 1.1158865101271906e-06, "epoch": 3.5914760914760917, "percentage": 71.83, "elapsed_time": "0:18:57", "remaining_time": "0:07:26", "throughput": 1220.91, "total_tokens": 1388448}
|
|
{"current_steps": 3460, "total_steps": 4810, "loss": 0.0001, "lr": 1.1083414423346807e-06, "epoch": 3.5966735966735968, "percentage": 71.93, "elapsed_time": "0:18:57", "remaining_time": "0:07:23", "throughput": 1222.41, "total_tokens": 1390560}
|
|
{"current_steps": 3465, "total_steps": 4810, "loss": 0.0977, "lr": 1.100814697551406e-06, "epoch": 3.601871101871102, "percentage": 72.04, "elapsed_time": "0:18:57", "remaining_time": "0:07:21", "throughput": 1223.98, "total_tokens": 1392736}
|
|
{"current_steps": 3470, "total_steps": 4810, "loss": 0.1036, "lr": 1.0933063748768254e-06, "epoch": 3.607068607068607, "percentage": 72.14, "elapsed_time": "0:18:58", "remaining_time": "0:07:19", "throughput": 1225.37, "total_tokens": 1394720}
|
|
{"current_steps": 3475, "total_steps": 4810, "loss": 0.0001, "lr": 1.0858165731678514e-06, "epoch": 3.6122661122661124, "percentage": 72.25, "elapsed_time": "0:18:58", "remaining_time": "0:07:17", "throughput": 1226.71, "total_tokens": 1396640}
|
|
{"current_steps": 3480, "total_steps": 4810, "loss": 0.0528, "lr": 1.0783453910375423e-06, "epoch": 3.6174636174636174, "percentage": 72.35, "elapsed_time": "0:18:58", "remaining_time": "0:07:15", "throughput": 1228.22, "total_tokens": 1398752}
|
|
{"current_steps": 3485, "total_steps": 4810, "loss": 0.0787, "lr": 1.0708929268538034e-06, "epoch": 3.6226611226611225, "percentage": 72.45, "elapsed_time": "0:18:59", "remaining_time": "0:07:13", "throughput": 1229.66, "total_tokens": 1400800}
|
|
{"current_steps": 3490, "total_steps": 4810, "loss": 0.0007, "lr": 1.0634592787380964e-06, "epoch": 3.627858627858628, "percentage": 72.56, "elapsed_time": "0:18:59", "remaining_time": "0:07:10", "throughput": 1231.0, "total_tokens": 1402720}
|
|
{"current_steps": 3495, "total_steps": 4810, "loss": 0.0827, "lr": 1.0560445445641423e-06, "epoch": 3.633056133056133, "percentage": 72.66, "elapsed_time": "0:18:59", "remaining_time": "0:07:08", "throughput": 1232.39, "total_tokens": 1404704}
|
|
{"current_steps": 3500, "total_steps": 4810, "loss": 0.0002, "lr": 1.048648821956637e-06, "epoch": 3.638253638253638, "percentage": 72.77, "elapsed_time": "0:19:00", "remaining_time": "0:07:06", "throughput": 1233.66, "total_tokens": 1406560}
|
|
{"current_steps": 3505, "total_steps": 4810, "loss": 0.0586, "lr": 1.0412722082899647e-06, "epoch": 3.643451143451143, "percentage": 72.87, "elapsed_time": "0:19:00", "remaining_time": "0:07:04", "throughput": 1235.05, "total_tokens": 1408544}
|
|
{"current_steps": 3510, "total_steps": 4810, "loss": 0.0003, "lr": 1.033914800686912e-06, "epoch": 3.6486486486486487, "percentage": 72.97, "elapsed_time": "0:19:00", "remaining_time": "0:07:02", "throughput": 1236.38, "total_tokens": 1410464}
|
|
{"current_steps": 3515, "total_steps": 4810, "loss": 0.0001, "lr": 1.0265766960173964e-06, "epoch": 3.6538461538461537, "percentage": 73.08, "elapsed_time": "0:19:01", "remaining_time": "0:07:00", "throughput": 1237.76, "total_tokens": 1412448}
|
|
{"current_steps": 3520, "total_steps": 4810, "loss": 0.042, "lr": 1.019257990897185e-06, "epoch": 3.6590436590436592, "percentage": 73.18, "elapsed_time": "0:19:01", "remaining_time": "0:06:58", "throughput": 1239.37, "total_tokens": 1414688}
|
|
{"current_steps": 3525, "total_steps": 4810, "loss": 0.0036, "lr": 1.0119587816866258e-06, "epoch": 3.6642411642411643, "percentage": 73.28, "elapsed_time": "0:19:01", "remaining_time": "0:06:56", "throughput": 1240.76, "total_tokens": 1416672}
|
|
{"current_steps": 3530, "total_steps": 4810, "loss": 0.0002, "lr": 1.0046791644893757e-06, "epoch": 3.6694386694386694, "percentage": 73.39, "elapsed_time": "0:19:02", "remaining_time": "0:06:54", "throughput": 1242.09, "total_tokens": 1418592}
|
|
{"current_steps": 3535, "total_steps": 4810, "loss": 0.0004, "lr": 9.97419235151137e-07, "epoch": 3.6746361746361744, "percentage": 73.49, "elapsed_time": "0:19:02", "remaining_time": "0:06:52", "throughput": 1243.48, "total_tokens": 1420576}
|
|
{"current_steps": 3540, "total_steps": 4810, "loss": 0.0005, "lr": 9.901790892583973e-07, "epoch": 3.67983367983368, "percentage": 73.6, "elapsed_time": "0:19:02", "remaining_time": "0:06:49", "throughput": 1244.86, "total_tokens": 1422560}
|
|
{"current_steps": 3545, "total_steps": 4810, "loss": 0.0001, "lr": 9.829588221371694e-07, "epoch": 3.685031185031185, "percentage": 73.7, "elapsed_time": "0:19:03", "remaining_time": "0:06:47", "throughput": 1246.3, "total_tokens": 1424608}
|
|
{"current_steps": 3550, "total_steps": 4810, "loss": 0.0002, "lr": 9.757585288517329e-07, "epoch": 3.6902286902286905, "percentage": 73.8, "elapsed_time": "0:19:03", "remaining_time": "0:06:45", "throughput": 1247.85, "total_tokens": 1426784}
|
|
{"current_steps": 3555, "total_steps": 4810, "loss": 0.0507, "lr": 9.6857830420339e-07, "epoch": 3.6954261954261955, "percentage": 73.91, "elapsed_time": "0:19:03", "remaining_time": "0:06:43", "throughput": 1249.35, "total_tokens": 1428896}
|
|
{"current_steps": 3560, "total_steps": 4810, "loss": 0.0001, "lr": 9.614182427292076e-07, "epoch": 3.7006237006237006, "percentage": 74.01, "elapsed_time": "0:19:04", "remaining_time": "0:06:41", "throughput": 1250.73, "total_tokens": 1430880}
|
|
{"current_steps": 3565, "total_steps": 4810, "loss": 0.0706, "lr": 9.54278438700785e-07, "epoch": 3.7058212058212057, "percentage": 74.12, "elapsed_time": "0:19:04", "remaining_time": "0:06:39", "throughput": 1252.11, "total_tokens": 1432864}
|
|
{"current_steps": 3570, "total_steps": 4810, "loss": 0.0558, "lr": 9.471589861229999e-07, "epoch": 3.711018711018711, "percentage": 74.22, "elapsed_time": "0:19:04", "remaining_time": "0:06:37", "throughput": 1253.55, "total_tokens": 1434912}
|
|
{"current_steps": 3575, "total_steps": 4810, "loss": 0.0451, "lr": 9.400599787327774e-07, "epoch": 3.7162162162162162, "percentage": 74.32, "elapsed_time": "0:19:05", "remaining_time": "0:06:35", "throughput": 1254.87, "total_tokens": 1436832}
|
|
{"current_steps": 3580, "total_steps": 4810, "loss": 0.0456, "lr": 9.329815099978567e-07, "epoch": 3.7214137214137213, "percentage": 74.43, "elapsed_time": "0:19:05", "remaining_time": "0:06:33", "throughput": 1256.19, "total_tokens": 1438752}
|
|
{"current_steps": 3585, "total_steps": 4810, "loss": 0.0002, "lr": 9.259236731155583e-07, "epoch": 3.726611226611227, "percentage": 74.53, "elapsed_time": "0:19:05", "remaining_time": "0:06:31", "throughput": 1257.52, "total_tokens": 1440672}
|
|
{"current_steps": 3590, "total_steps": 4810, "loss": 0.0311, "lr": 9.188865610115572e-07, "epoch": 3.731808731808732, "percentage": 74.64, "elapsed_time": "0:19:05", "remaining_time": "0:06:29", "throughput": 1259.0, "total_tokens": 1442784}
|
|
{"current_steps": 3595, "total_steps": 4810, "loss": 0.0596, "lr": 9.118702663386583e-07, "epoch": 3.737006237006237, "percentage": 74.74, "elapsed_time": "0:19:06", "remaining_time": "0:06:27", "throughput": 1260.55, "total_tokens": 1444960}
|
|
{"current_steps": 3600, "total_steps": 4810, "loss": 0.0648, "lr": 9.048748814755783e-07, "epoch": 3.742203742203742, "percentage": 74.84, "elapsed_time": "0:19:06", "remaining_time": "0:06:25", "throughput": 1261.87, "total_tokens": 1446880}
|
|
{"current_steps": 3605, "total_steps": 4810, "loss": 0.0394, "lr": 8.979004985257294e-07, "epoch": 3.7474012474012475, "percentage": 74.95, "elapsed_time": "0:19:06", "remaining_time": "0:06:23", "throughput": 1263.35, "total_tokens": 1448992}
|
|
{"current_steps": 3610, "total_steps": 4810, "loss": 0.0295, "lr": 8.909472093160066e-07, "epoch": 3.7525987525987525, "percentage": 75.05, "elapsed_time": "0:19:07", "remaining_time": "0:06:21", "throughput": 1264.73, "total_tokens": 1450976}
|
|
{"current_steps": 3615, "total_steps": 4810, "loss": 0.0128, "lr": 8.840151053955773e-07, "epoch": 3.757796257796258, "percentage": 75.16, "elapsed_time": "0:19:07", "remaining_time": "0:06:19", "throughput": 1266.21, "total_tokens": 1453088}
|
|
{"current_steps": 3615, "total_steps": 4810, "eval_loss": 0.36968719959259033, "epoch": 3.757796257796258, "percentage": 75.16, "elapsed_time": "0:19:08", "remaining_time": "0:06:19", "throughput": 1264.85, "total_tokens": 1453088}
|
|
{"current_steps": 3620, "total_steps": 4810, "loss": 0.0014, "lr": 8.771042780346767e-07, "epoch": 3.762993762993763, "percentage": 75.26, "elapsed_time": "0:19:51", "remaining_time": "0:06:31", "throughput": 1221.67, "total_tokens": 1455136}
|
|
{"current_steps": 3625, "total_steps": 4810, "loss": 0.1087, "lr": 8.702148182234043e-07, "epoch": 3.768191268191268, "percentage": 75.36, "elapsed_time": "0:19:51", "remaining_time": "0:06:29", "throughput": 1223.01, "total_tokens": 1457120}
|
|
{"current_steps": 3630, "total_steps": 4810, "loss": 0.0001, "lr": 8.633468166705336e-07, "epoch": 3.773388773388773, "percentage": 75.47, "elapsed_time": "0:19:51", "remaining_time": "0:06:27", "throughput": 1224.39, "total_tokens": 1459168}
|
|
{"current_steps": 3635, "total_steps": 4810, "loss": 0.0061, "lr": 8.565003638023065e-07, "epoch": 3.7785862785862787, "percentage": 75.57, "elapsed_time": "0:19:52", "remaining_time": "0:06:25", "throughput": 1225.73, "total_tokens": 1461152}
|
|
{"current_steps": 3640, "total_steps": 4810, "loss": 0.0002, "lr": 8.496755497612491e-07, "epoch": 3.7837837837837838, "percentage": 75.68, "elapsed_time": "0:19:52", "remaining_time": "0:06:23", "throughput": 1227.06, "total_tokens": 1463136}
|
|
{"current_steps": 3645, "total_steps": 4810, "loss": 0.0001, "lr": 8.42872464404986e-07, "epoch": 3.788981288981289, "percentage": 75.78, "elapsed_time": "0:19:52", "remaining_time": "0:06:21", "throughput": 1228.39, "total_tokens": 1465120}
|
|
{"current_steps": 3650, "total_steps": 4810, "loss": 0.0322, "lr": 8.360911973050537e-07, "epoch": 3.7941787941787943, "percentage": 75.88, "elapsed_time": "0:19:53", "remaining_time": "0:06:19", "throughput": 1229.72, "total_tokens": 1467104}
|
|
{"current_steps": 3655, "total_steps": 4810, "loss": 0.0004, "lr": 8.29331837745724e-07, "epoch": 3.7993762993762994, "percentage": 75.99, "elapsed_time": "0:19:53", "remaining_time": "0:06:17", "throughput": 1231.1, "total_tokens": 1469152}
|
|
{"current_steps": 3660, "total_steps": 4810, "loss": 0.1215, "lr": 8.225944747228257e-07, "epoch": 3.8045738045738045, "percentage": 76.09, "elapsed_time": "0:19:53", "remaining_time": "0:06:15", "throughput": 1232.54, "total_tokens": 1471264}
|
|
{"current_steps": 3665, "total_steps": 4810, "loss": 0.0868, "lr": 8.158791969425739e-07, "epoch": 3.8097713097713095, "percentage": 76.2, "elapsed_time": "0:19:54", "remaining_time": "0:06:13", "throughput": 1233.86, "total_tokens": 1473248}
|
|
{"current_steps": 3670, "total_steps": 4810, "loss": 0.0009, "lr": 8.091860928204048e-07, "epoch": 3.814968814968815, "percentage": 76.3, "elapsed_time": "0:19:54", "remaining_time": "0:06:10", "throughput": 1235.3, "total_tokens": 1475360}
|
|
{"current_steps": 3675, "total_steps": 4810, "loss": 0.0001, "lr": 8.025152504798078e-07, "epoch": 3.82016632016632, "percentage": 76.4, "elapsed_time": "0:19:54", "remaining_time": "0:06:08", "throughput": 1236.73, "total_tokens": 1477472}
|
|
{"current_steps": 3680, "total_steps": 4810, "loss": 0.0912, "lr": 7.958667577511684e-07, "epoch": 3.8253638253638256, "percentage": 76.51, "elapsed_time": "0:19:54", "remaining_time": "0:06:06", "throughput": 1237.95, "total_tokens": 1479328}
|
|
{"current_steps": 3685, "total_steps": 4810, "loss": 0.0447, "lr": 7.892407021706064e-07, "epoch": 3.8305613305613306, "percentage": 76.61, "elapsed_time": "0:19:55", "remaining_time": "0:06:04", "throughput": 1239.22, "total_tokens": 1481248}
|
|
{"current_steps": 3690, "total_steps": 4810, "loss": 0.0001, "lr": 7.826371709788314e-07, "epoch": 3.8357588357588357, "percentage": 76.72, "elapsed_time": "0:19:55", "remaining_time": "0:06:02", "throughput": 1240.49, "total_tokens": 1483168}
|
|
{"current_steps": 3695, "total_steps": 4810, "loss": 0.0007, "lr": 7.760562511199881e-07, "epoch": 3.8409563409563408, "percentage": 76.82, "elapsed_time": "0:19:55", "remaining_time": "0:06:00", "throughput": 1241.81, "total_tokens": 1485152}
|
|
{"current_steps": 3700, "total_steps": 4810, "loss": 0.0407, "lr": 7.694980292405122e-07, "epoch": 3.8461538461538463, "percentage": 76.92, "elapsed_time": "0:19:56", "remaining_time": "0:05:58", "throughput": 1243.19, "total_tokens": 1487200}
|
|
{"current_steps": 3705, "total_steps": 4810, "loss": 0.0294, "lr": 7.629625916879932e-07, "epoch": 3.8513513513513513, "percentage": 77.03, "elapsed_time": "0:19:56", "remaining_time": "0:05:56", "throughput": 1244.51, "total_tokens": 1489184}
|
|
{"current_steps": 3710, "total_steps": 4810, "loss": 0.0046, "lr": 7.564500245100326e-07, "epoch": 3.856548856548857, "percentage": 77.13, "elapsed_time": "0:19:56", "remaining_time": "0:05:54", "throughput": 1245.83, "total_tokens": 1491168}
|
|
{"current_steps": 3715, "total_steps": 4810, "loss": 0.0001, "lr": 7.49960413453115e-07, "epoch": 3.861746361746362, "percentage": 77.23, "elapsed_time": "0:19:57", "remaining_time": "0:05:52", "throughput": 1247.21, "total_tokens": 1493216}
|
|
{"current_steps": 3720, "total_steps": 4810, "loss": 0.0738, "lr": 7.434938439614781e-07, "epoch": 3.866943866943867, "percentage": 77.34, "elapsed_time": "0:19:57", "remaining_time": "0:05:50", "throughput": 1248.53, "total_tokens": 1495200}
|
|
{"current_steps": 3725, "total_steps": 4810, "loss": 0.0047, "lr": 7.370504011759855e-07, "epoch": 3.872141372141372, "percentage": 77.44, "elapsed_time": "0:19:57", "remaining_time": "0:05:48", "throughput": 1249.85, "total_tokens": 1497184}
|
|
{"current_steps": 3730, "total_steps": 4810, "loss": 0.0633, "lr": 7.306301699330065e-07, "epoch": 3.8773388773388775, "percentage": 77.55, "elapsed_time": "0:19:58", "remaining_time": "0:05:46", "throughput": 1251.06, "total_tokens": 1499040}
|
|
{"current_steps": 3735, "total_steps": 4810, "loss": 0.0354, "lr": 7.242332347633052e-07, "epoch": 3.8825363825363826, "percentage": 77.65, "elapsed_time": "0:19:58", "remaining_time": "0:05:44", "throughput": 1252.38, "total_tokens": 1501024}
|
|
{"current_steps": 3740, "total_steps": 4810, "loss": 0.042, "lr": 7.17859679890916e-07, "epoch": 3.8877338877338876, "percentage": 77.75, "elapsed_time": "0:19:58", "remaining_time": "0:05:42", "throughput": 1253.74, "total_tokens": 1503072}
|
|
{"current_steps": 3745, "total_steps": 4810, "loss": 0.0002, "lr": 7.115095892320456e-07, "epoch": 3.892931392931393, "percentage": 77.86, "elapsed_time": "0:19:59", "remaining_time": "0:05:41", "throughput": 1255.22, "total_tokens": 1505248}
|
|
{"current_steps": 3750, "total_steps": 4810, "loss": 0.0084, "lr": 7.051830463939605e-07, "epoch": 3.898128898128898, "percentage": 77.96, "elapsed_time": "0:19:59", "remaining_time": "0:05:39", "throughput": 1256.59, "total_tokens": 1507296}
|
|
{"current_steps": 3755, "total_steps": 4810, "loss": 0.0226, "lr": 6.988801346738911e-07, "epoch": 3.9033264033264032, "percentage": 78.07, "elapsed_time": "0:19:59", "remaining_time": "0:05:37", "throughput": 1257.95, "total_tokens": 1509344}
|
|
{"current_steps": 3760, "total_steps": 4810, "loss": 0.0001, "lr": 6.926009370579334e-07, "epoch": 3.9085239085239083, "percentage": 78.17, "elapsed_time": "0:20:00", "remaining_time": "0:05:35", "throughput": 1259.37, "total_tokens": 1511456}
|
|
{"current_steps": 3765, "total_steps": 4810, "loss": 0.0235, "lr": 6.863455362199542e-07, "epoch": 3.913721413721414, "percentage": 78.27, "elapsed_time": "0:20:00", "remaining_time": "0:05:33", "throughput": 1260.69, "total_tokens": 1513440}
|
|
{"current_steps": 3770, "total_steps": 4810, "loss": 0.0001, "lr": 6.801140145205071e-07, "epoch": 3.918918918918919, "percentage": 78.38, "elapsed_time": "0:20:00", "remaining_time": "0:05:31", "throughput": 1262.05, "total_tokens": 1515488}
|
|
{"current_steps": 3775, "total_steps": 4810, "loss": 0.0065, "lr": 6.739064540057425e-07, "epoch": 3.9241164241164244, "percentage": 78.48, "elapsed_time": "0:20:01", "remaining_time": "0:05:29", "throughput": 1263.31, "total_tokens": 1517408}
|
|
{"current_steps": 3780, "total_steps": 4810, "loss": 0.0335, "lr": 6.677229364063329e-07, "epoch": 3.9293139293139294, "percentage": 78.59, "elapsed_time": "0:20:01", "remaining_time": "0:05:27", "throughput": 1264.62, "total_tokens": 1519392}
|
|
{"current_steps": 3785, "total_steps": 4810, "loss": 0.0001, "lr": 6.615635431363943e-07, "epoch": 3.9345114345114345, "percentage": 78.69, "elapsed_time": "0:20:01", "remaining_time": "0:05:25", "throughput": 1265.98, "total_tokens": 1521440}
|
|
{"current_steps": 3790, "total_steps": 4810, "loss": 0.0844, "lr": 6.554283552924118e-07, "epoch": 3.9397089397089395, "percentage": 78.79, "elapsed_time": "0:20:02", "remaining_time": "0:05:23", "throughput": 1267.35, "total_tokens": 1523488}
|
|
{"current_steps": 3795, "total_steps": 4810, "loss": 0.0001, "lr": 6.493174536521768e-07, "epoch": 3.944906444906445, "percentage": 78.9, "elapsed_time": "0:20:02", "remaining_time": "0:05:21", "throughput": 1268.76, "total_tokens": 1525600}
|
|
{"current_steps": 3800, "total_steps": 4810, "loss": 0.0715, "lr": 6.43230918673721e-07, "epoch": 3.95010395010395, "percentage": 79.0, "elapsed_time": "0:20:02", "remaining_time": "0:05:19", "throughput": 1270.07, "total_tokens": 1527584}
|
|
{"current_steps": 3805, "total_steps": 4810, "loss": 0.0002, "lr": 6.371688304942544e-07, "epoch": 3.955301455301455, "percentage": 79.11, "elapsed_time": "0:20:03", "remaining_time": "0:05:17", "throughput": 1271.32, "total_tokens": 1529504}
|
|
{"current_steps": 3810, "total_steps": 4810, "loss": 0.0805, "lr": 6.311312689291166e-07, "epoch": 3.9604989604989607, "percentage": 79.21, "elapsed_time": "0:20:03", "remaining_time": "0:05:15", "throughput": 1272.58, "total_tokens": 1531424}
|
|
{"current_steps": 3815, "total_steps": 4810, "loss": 0.0, "lr": 6.251183134707183e-07, "epoch": 3.9656964656964657, "percentage": 79.31, "elapsed_time": "0:20:03", "remaining_time": "0:05:13", "throughput": 1273.88, "total_tokens": 1533408}
|
|
{"current_steps": 3820, "total_steps": 4810, "loss": 0.1432, "lr": 6.191300432875017e-07, "epoch": 3.970893970893971, "percentage": 79.42, "elapsed_time": "0:20:04", "remaining_time": "0:05:12", "throughput": 1275.19, "total_tokens": 1535392}
|
|
{"current_steps": 3825, "total_steps": 4810, "loss": 0.0178, "lr": 6.13166537222894e-07, "epoch": 3.976091476091476, "percentage": 79.52, "elapsed_time": "0:20:04", "remaining_time": "0:05:10", "throughput": 1276.45, "total_tokens": 1537312}
|
|
{"current_steps": 3830, "total_steps": 4810, "loss": 0.0611, "lr": 6.072278737942691e-07, "epoch": 3.9812889812889813, "percentage": 79.63, "elapsed_time": "0:20:04", "remaining_time": "0:05:08", "throughput": 1277.8, "total_tokens": 1539360}
|
|
{"current_steps": 3835, "total_steps": 4810, "loss": 0.0019, "lr": 6.013141311919168e-07, "epoch": 3.9864864864864864, "percentage": 79.73, "elapsed_time": "0:20:05", "remaining_time": "0:05:06", "throughput": 1279.06, "total_tokens": 1541280}
|
|
{"current_steps": 3840, "total_steps": 4810, "loss": 0.0644, "lr": 5.954253872780102e-07, "epoch": 3.991683991683992, "percentage": 79.83, "elapsed_time": "0:20:05", "remaining_time": "0:05:04", "throughput": 1280.26, "total_tokens": 1543136}
|
|
{"current_steps": 3845, "total_steps": 4810, "loss": 0.1091, "lr": 5.895617195855827e-07, "epoch": 3.996881496881497, "percentage": 79.94, "elapsed_time": "0:20:05", "remaining_time": "0:05:02", "throughput": 1281.57, "total_tokens": 1545120}
|
|
{"current_steps": 3850, "total_steps": 4810, "loss": 0.0001, "lr": 5.837232053175065e-07, "epoch": 4.002079002079002, "percentage": 80.04, "elapsed_time": "0:20:06", "remaining_time": "0:05:00", "throughput": 1282.73, "total_tokens": 1547056}
|
|
{"current_steps": 3855, "total_steps": 4810, "loss": 0.0238, "lr": 5.77909921345475e-07, "epoch": 4.007276507276507, "percentage": 80.15, "elapsed_time": "0:20:06", "remaining_time": "0:04:58", "throughput": 1283.97, "total_tokens": 1548976}
|
|
{"current_steps": 3856, "total_steps": 4810, "eval_loss": 0.3716074526309967, "epoch": 4.008316008316008, "percentage": 80.17, "elapsed_time": "0:20:07", "remaining_time": "0:04:58", "throughput": 1283.1, "total_tokens": 1549360}
|
|
{"current_steps": 3860, "total_steps": 4810, "loss": 0.0133, "lr": 5.721219442089925e-07, "epoch": 4.012474012474012, "percentage": 80.25, "elapsed_time": "0:20:35", "remaining_time": "0:05:04", "throughput": 1255.6, "total_tokens": 1550960}
|
|
{"current_steps": 3865, "total_steps": 4810, "loss": 0.011, "lr": 5.663593501143663e-07, "epoch": 4.017671517671518, "percentage": 80.35, "elapsed_time": "0:20:35", "remaining_time": "0:05:02", "throughput": 1256.87, "total_tokens": 1552944}
|
|
{"current_steps": 3870, "total_steps": 4810, "loss": 0.0378, "lr": 5.606222149337004e-07, "epoch": 4.022869022869023, "percentage": 80.46, "elapsed_time": "0:20:35", "remaining_time": "0:05:00", "throughput": 1258.2, "total_tokens": 1554992}
|
|
{"current_steps": 3875, "total_steps": 4810, "loss": 0.0001, "lr": 5.549106142039018e-07, "epoch": 4.028066528066528, "percentage": 80.56, "elapsed_time": "0:20:36", "remaining_time": "0:04:58", "throughput": 1259.57, "total_tokens": 1557104}
|
|
{"current_steps": 3880, "total_steps": 4810, "loss": 0.0008, "lr": 5.492246231256798e-07, "epoch": 4.033264033264033, "percentage": 80.67, "elapsed_time": "0:20:36", "remaining_time": "0:04:56", "throughput": 1260.84, "total_tokens": 1559088}
|
|
{"current_steps": 3885, "total_steps": 4810, "loss": 0.0001, "lr": 5.435643165625615e-07, "epoch": 4.038461538461538, "percentage": 80.77, "elapsed_time": "0:20:36", "remaining_time": "0:04:54", "throughput": 1262.07, "total_tokens": 1561008}
|
|
{"current_steps": 3890, "total_steps": 4810, "loss": 0.0007, "lr": 5.379297690399035e-07, "epoch": 4.043659043659043, "percentage": 80.87, "elapsed_time": "0:20:37", "remaining_time": "0:04:52", "throughput": 1263.4, "total_tokens": 1563056}
|
|
{"current_steps": 3895, "total_steps": 4810, "loss": 0.0001, "lr": 5.323210547439089e-07, "epoch": 4.048856548856548, "percentage": 80.98, "elapsed_time": "0:20:37", "remaining_time": "0:04:50", "throughput": 1264.67, "total_tokens": 1565040}
|
|
{"current_steps": 3900, "total_steps": 4810, "loss": 0.0001, "lr": 5.267382475206548e-07, "epoch": 4.054054054054054, "percentage": 81.08, "elapsed_time": "0:20:37", "remaining_time": "0:04:48", "throughput": 1265.94, "total_tokens": 1567024}
|
|
{"current_steps": 3905, "total_steps": 4810, "loss": 0.0003, "lr": 5.21181420875117e-07, "epoch": 4.0592515592515594, "percentage": 81.19, "elapsed_time": "0:20:38", "remaining_time": "0:04:46", "throughput": 1267.31, "total_tokens": 1569136}
|
|
{"current_steps": 3910, "total_steps": 4810, "loss": 0.0001, "lr": 5.15650647970202e-07, "epoch": 4.0644490644490645, "percentage": 81.29, "elapsed_time": "0:20:38", "remaining_time": "0:04:45", "throughput": 1268.58, "total_tokens": 1571120}
|
|
{"current_steps": 3915, "total_steps": 4810, "loss": 0.0001, "lr": 5.101460016257858e-07, "epoch": 4.06964656964657, "percentage": 81.39, "elapsed_time": "0:20:38", "remaining_time": "0:04:43", "throughput": 1269.8, "total_tokens": 1573040}
|
|
{"current_steps": 3920, "total_steps": 4810, "loss": 0.0005, "lr": 5.046675543177531e-07, "epoch": 4.074844074844075, "percentage": 81.5, "elapsed_time": "0:20:39", "remaining_time": "0:04:41", "throughput": 1270.96, "total_tokens": 1574896}
|
|
{"current_steps": 3925, "total_steps": 4810, "loss": 0.0001, "lr": 4.992153781770448e-07, "epoch": 4.08004158004158, "percentage": 81.6, "elapsed_time": "0:20:39", "remaining_time": "0:04:39", "throughput": 1272.23, "total_tokens": 1576880}
|
|
{"current_steps": 3930, "total_steps": 4810, "loss": 0.0001, "lr": 4.937895449887076e-07, "epoch": 4.085239085239086, "percentage": 81.7, "elapsed_time": "0:20:39", "remaining_time": "0:04:37", "throughput": 1273.49, "total_tokens": 1578864}
|
|
{"current_steps": 3935, "total_steps": 4810, "loss": 0.0, "lr": 4.883901261909466e-07, "epoch": 4.090436590436591, "percentage": 81.81, "elapsed_time": "0:20:40", "remaining_time": "0:04:35", "throughput": 1274.76, "total_tokens": 1580848}
|
|
{"current_steps": 3940, "total_steps": 4810, "loss": 0.0001, "lr": 4.830171928741901e-07, "epoch": 4.095634095634096, "percentage": 81.91, "elapsed_time": "0:20:40", "remaining_time": "0:04:33", "throughput": 1275.92, "total_tokens": 1582704}
|
|
{"current_steps": 3945, "total_steps": 4810, "loss": 0.0008, "lr": 4.776708157801463e-07, "epoch": 4.100831600831601, "percentage": 82.02, "elapsed_time": "0:20:40", "remaining_time": "0:04:32", "throughput": 1277.29, "total_tokens": 1584816}
|
|
{"current_steps": 3950, "total_steps": 4810, "loss": 0.0387, "lr": 4.723510653008809e-07, "epoch": 4.106029106029106, "percentage": 82.12, "elapsed_time": "0:20:41", "remaining_time": "0:04:30", "throughput": 1278.56, "total_tokens": 1586800}
|
|
{"current_steps": 3955, "total_steps": 4810, "loss": 0.081, "lr": 4.6705801147788136e-07, "epoch": 4.111226611226611, "percentage": 82.22, "elapsed_time": "0:20:41", "remaining_time": "0:04:28", "throughput": 1279.77, "total_tokens": 1588720}
|
|
{"current_steps": 3960, "total_steps": 4810, "loss": 0.0001, "lr": 4.617917240011394e-07, "epoch": 4.116424116424117, "percentage": 82.33, "elapsed_time": "0:20:41", "remaining_time": "0:04:26", "throughput": 1280.93, "total_tokens": 1590576}
|
|
{"current_steps": 3965, "total_steps": 4810, "loss": 0.0001, "lr": 4.5655227220823355e-07, "epoch": 4.121621621621622, "percentage": 82.43, "elapsed_time": "0:20:42", "remaining_time": "0:04:24", "throughput": 1282.15, "total_tokens": 1592496}
|
|
{"current_steps": 3970, "total_steps": 4810, "loss": 0.0123, "lr": 4.513397250834159e-07, "epoch": 4.126819126819127, "percentage": 82.54, "elapsed_time": "0:20:42", "remaining_time": "0:04:22", "throughput": 1283.46, "total_tokens": 1594544}
|
|
{"current_steps": 3975, "total_steps": 4810, "loss": 0.0007, "lr": 4.461541512567011e-07, "epoch": 4.132016632016632, "percentage": 82.64, "elapsed_time": "0:20:42", "remaining_time": "0:04:21", "throughput": 1284.62, "total_tokens": 1596400}
|
|
{"current_steps": 3980, "total_steps": 4810, "loss": 0.0585, "lr": 4.409956190029674e-07, "epoch": 4.137214137214137, "percentage": 82.74, "elapsed_time": "0:20:43", "remaining_time": "0:04:19", "throughput": 1285.83, "total_tokens": 1598320}
|
|
{"current_steps": 3985, "total_steps": 4810, "loss": 0.0202, "lr": 4.358641962410537e-07, "epoch": 4.142411642411642, "percentage": 82.85, "elapsed_time": "0:20:43", "remaining_time": "0:04:17", "throughput": 1287.14, "total_tokens": 1600368}
|
|
{"current_steps": 3990, "total_steps": 4810, "loss": 0.0, "lr": 4.3075995053286716e-07, "epoch": 4.147609147609147, "percentage": 82.95, "elapsed_time": "0:20:43", "remaining_time": "0:04:15", "throughput": 1288.4, "total_tokens": 1602352}
|
|
{"current_steps": 3995, "total_steps": 4810, "loss": 0.0002, "lr": 4.2568294908249486e-07, "epoch": 4.152806652806653, "percentage": 83.06, "elapsed_time": "0:20:43", "remaining_time": "0:04:13", "throughput": 1289.66, "total_tokens": 1604336}
|
|
{"current_steps": 4000, "total_steps": 4810, "loss": 0.0, "lr": 4.2063325873531485e-07, "epoch": 4.158004158004158, "percentage": 83.16, "elapsed_time": "0:20:44", "remaining_time": "0:04:11", "throughput": 1290.87, "total_tokens": 1606256}
|
|
{"current_steps": 4005, "total_steps": 4810, "loss": 0.0001, "lr": 4.156109459771215e-07, "epoch": 4.163201663201663, "percentage": 83.26, "elapsed_time": "0:20:44", "remaining_time": "0:04:10", "throughput": 1292.18, "total_tokens": 1608304}
|
|
{"current_steps": 4010, "total_steps": 4810, "loss": 0.0001, "lr": 4.106160769332443e-07, "epoch": 4.168399168399168, "percentage": 83.37, "elapsed_time": "0:20:44", "remaining_time": "0:04:08", "throughput": 1293.59, "total_tokens": 1610480}
|
|
{"current_steps": 4015, "total_steps": 4810, "loss": 0.0382, "lr": 4.056487173676843e-07, "epoch": 4.173596673596673, "percentage": 83.47, "elapsed_time": "0:20:45", "remaining_time": "0:04:06", "throughput": 1294.89, "total_tokens": 1612528}
|
|
{"current_steps": 4020, "total_steps": 4810, "loss": 0.0001, "lr": 4.0070893268224055e-07, "epoch": 4.1787941787941785, "percentage": 83.58, "elapsed_time": "0:20:45", "remaining_time": "0:04:04", "throughput": 1296.2, "total_tokens": 1614576}
|
|
{"current_steps": 4025, "total_steps": 4810, "loss": 0.0, "lr": 3.9579678791565323e-07, "epoch": 4.183991683991684, "percentage": 83.68, "elapsed_time": "0:20:45", "remaining_time": "0:04:02", "throughput": 1297.51, "total_tokens": 1616624}
|
|
{"current_steps": 4030, "total_steps": 4810, "loss": 0.0378, "lr": 3.9091234774274873e-07, "epoch": 4.1891891891891895, "percentage": 83.78, "elapsed_time": "0:20:46", "remaining_time": "0:04:01", "throughput": 1298.81, "total_tokens": 1618672}
|
|
{"current_steps": 4035, "total_steps": 4810, "loss": 0.0029, "lr": 3.8605567647358426e-07, "epoch": 4.1943866943866945, "percentage": 83.89, "elapsed_time": "0:20:46", "remaining_time": "0:03:59", "throughput": 1300.17, "total_tokens": 1620784}
|
|
{"current_steps": 4040, "total_steps": 4810, "loss": 0.0002, "lr": 3.812268380526046e-07, "epoch": 4.1995841995842, "percentage": 83.99, "elapsed_time": "0:20:46", "remaining_time": "0:03:57", "throughput": 1301.42, "total_tokens": 1622768}
|
|
{"current_steps": 4045, "total_steps": 4810, "loss": 0.0001, "lr": 3.764258960577971e-07, "epoch": 4.204781704781705, "percentage": 84.1, "elapsed_time": "0:20:47", "remaining_time": "0:03:55", "throughput": 1302.62, "total_tokens": 1624688}
|
|
{"current_steps": 4050, "total_steps": 4810, "loss": 0.0004, "lr": 3.7165291369985616e-07, "epoch": 4.20997920997921, "percentage": 84.2, "elapsed_time": "0:20:47", "remaining_time": "0:03:54", "throughput": 1303.87, "total_tokens": 1626672}
|
|
{"current_steps": 4055, "total_steps": 4810, "loss": 0.0001, "lr": 3.6690795382135184e-07, "epoch": 4.215176715176715, "percentage": 84.3, "elapsed_time": "0:20:47", "remaining_time": "0:03:52", "throughput": 1305.28, "total_tokens": 1628848}
|
|
{"current_steps": 4060, "total_steps": 4810, "loss": 0.0001, "lr": 3.6219107889590154e-07, "epoch": 4.220374220374221, "percentage": 84.41, "elapsed_time": "0:20:48", "remaining_time": "0:03:50", "throughput": 1306.53, "total_tokens": 1630832}
|
|
{"current_steps": 4065, "total_steps": 4810, "loss": 0.0007, "lr": 3.575023510273462e-07, "epoch": 4.225571725571726, "percentage": 84.51, "elapsed_time": "0:20:48", "remaining_time": "0:03:48", "throughput": 1307.83, "total_tokens": 1632880}
|
|
{"current_steps": 4070, "total_steps": 4810, "loss": 0.0001, "lr": 3.528418319489349e-07, "epoch": 4.230769230769231, "percentage": 84.62, "elapsed_time": "0:20:48", "remaining_time": "0:03:47", "throughput": 1309.18, "total_tokens": 1634992}
|
|
{"current_steps": 4075, "total_steps": 4810, "loss": 0.0001, "lr": 3.48209583022511e-07, "epoch": 4.235966735966736, "percentage": 84.72, "elapsed_time": "0:20:49", "remaining_time": "0:03:45", "throughput": 1310.38, "total_tokens": 1636912}
|
|
{"current_steps": 4080, "total_steps": 4810, "loss": 0.0, "lr": 3.436056652377043e-07, "epoch": 4.241164241164241, "percentage": 84.82, "elapsed_time": "0:20:49", "remaining_time": "0:03:43", "throughput": 1311.58, "total_tokens": 1638832}
|
|
{"current_steps": 4085, "total_steps": 4810, "loss": 0.056, "lr": 3.3903013921112753e-07, "epoch": 4.246361746361746, "percentage": 84.93, "elapsed_time": "0:20:49", "remaining_time": "0:03:41", "throughput": 1313.02, "total_tokens": 1641072}
|
|
{"current_steps": 4090, "total_steps": 4810, "loss": 0.0001, "lr": 3.3448306518557795e-07, "epoch": 4.251559251559252, "percentage": 85.03, "elapsed_time": "0:20:50", "remaining_time": "0:03:40", "throughput": 1314.22, "total_tokens": 1642992}
|
|
{"current_steps": 4095, "total_steps": 4810, "loss": 0.0, "lr": 3.299645030292467e-07, "epoch": 4.256756756756757, "percentage": 85.14, "elapsed_time": "0:20:50", "remaining_time": "0:03:38", "throughput": 1315.52, "total_tokens": 1645040}
|
|
{"current_steps": 4097, "total_steps": 4810, "eval_loss": 0.4492134153842926, "epoch": 4.258835758835759, "percentage": 85.18, "elapsed_time": "0:20:51", "remaining_time": "0:03:37", "throughput": 1314.93, "total_tokens": 1645808}
|
|
{"current_steps": 4100, "total_steps": 4810, "loss": 0.0, "lr": 3.254745122349279e-07, "epoch": 4.261954261954262, "percentage": 85.24, "elapsed_time": "0:21:37", "remaining_time": "0:03:44", "throughput": 1269.52, "total_tokens": 1647024}
|
|
{"current_steps": 4105, "total_steps": 4810, "loss": 0.0001, "lr": 3.2101315191923667e-07, "epoch": 4.267151767151767, "percentage": 85.34, "elapsed_time": "0:21:37", "remaining_time": "0:03:42", "throughput": 1270.74, "total_tokens": 1649008}
|
|
{"current_steps": 4110, "total_steps": 4810, "loss": 0.0003, "lr": 3.1658048082182926e-07, "epoch": 4.272349272349272, "percentage": 85.45, "elapsed_time": "0:21:37", "remaining_time": "0:03:41", "throughput": 1272.0, "total_tokens": 1651056}
|
|
{"current_steps": 4115, "total_steps": 4810, "loss": 0.0001, "lr": 3.1217655730463094e-07, "epoch": 4.277546777546777, "percentage": 85.55, "elapsed_time": "0:21:38", "remaining_time": "0:03:39", "throughput": 1273.27, "total_tokens": 1653104}
|
|
{"current_steps": 4120, "total_steps": 4810, "loss": 0.0001, "lr": 3.078014393510695e-07, "epoch": 4.282744282744282, "percentage": 85.65, "elapsed_time": "0:21:38", "remaining_time": "0:03:37", "throughput": 1274.67, "total_tokens": 1655344}
|
|
{"current_steps": 4125, "total_steps": 4810, "loss": 0.042, "lr": 3.0345518456530666e-07, "epoch": 4.287941787941788, "percentage": 85.76, "elapsed_time": "0:21:38", "remaining_time": "0:03:35", "throughput": 1275.94, "total_tokens": 1657392}
|
|
{"current_steps": 4130, "total_steps": 4810, "loss": 0.0002, "lr": 2.9913785017148563e-07, "epoch": 4.293139293139293, "percentage": 85.86, "elapsed_time": "0:21:39", "remaining_time": "0:03:33", "throughput": 1277.1, "total_tokens": 1659312}
|
|
{"current_steps": 4135, "total_steps": 4810, "loss": 0.0557, "lr": 2.9484949301297166e-07, "epoch": 4.298336798336798, "percentage": 85.97, "elapsed_time": "0:21:39", "remaining_time": "0:03:32", "throughput": 1278.41, "total_tokens": 1661424}
|
|
{"current_steps": 4140, "total_steps": 4810, "loss": 0.0239, "lr": 2.905901695516092e-07, "epoch": 4.303534303534303, "percentage": 86.07, "elapsed_time": "0:21:39", "remaining_time": "0:03:30", "throughput": 1279.62, "total_tokens": 1663408}
|
|
{"current_steps": 4145, "total_steps": 4810, "loss": 0.0001, "lr": 2.8635993586697555e-07, "epoch": 4.3087318087318085, "percentage": 86.17, "elapsed_time": "0:21:40", "remaining_time": "0:03:28", "throughput": 1280.77, "total_tokens": 1665328}
|
|
{"current_steps": 4150, "total_steps": 4810, "loss": 0.0001, "lr": 2.8215884765564197e-07, "epoch": 4.313929313929314, "percentage": 86.28, "elapsed_time": "0:21:40", "remaining_time": "0:03:26", "throughput": 1281.98, "total_tokens": 1667312}
|
|
{"current_steps": 4155, "total_steps": 4810, "loss": 0.0003, "lr": 2.779869602304416e-07, "epoch": 4.3191268191268195, "percentage": 86.38, "elapsed_time": "0:21:40", "remaining_time": "0:03:25", "throughput": 1283.19, "total_tokens": 1669296}
|
|
{"current_steps": 4160, "total_steps": 4810, "loss": 0.0003, "lr": 2.73844328519742e-07, "epoch": 4.324324324324325, "percentage": 86.49, "elapsed_time": "0:21:41", "remaining_time": "0:03:23", "throughput": 1284.39, "total_tokens": 1671280}
|
|
{"current_steps": 4165, "total_steps": 4810, "loss": 0.0002, "lr": 2.6973100706672e-07, "epoch": 4.32952182952183, "percentage": 86.59, "elapsed_time": "0:21:41", "remaining_time": "0:03:21", "throughput": 1285.74, "total_tokens": 1673456}
|
|
{"current_steps": 4170, "total_steps": 4810, "loss": 0.0001, "lr": 2.656470500286451e-07, "epoch": 4.334719334719335, "percentage": 86.69, "elapsed_time": "0:21:41", "remaining_time": "0:03:19", "throughput": 1286.99, "total_tokens": 1675504}
|
|
{"current_steps": 4175, "total_steps": 4810, "loss": 0.0, "lr": 2.615925111761647e-07, "epoch": 4.33991683991684, "percentage": 86.8, "elapsed_time": "0:21:42", "remaining_time": "0:03:18", "throughput": 1288.19, "total_tokens": 1677488}
|
|
{"current_steps": 4180, "total_steps": 4810, "loss": 0.0633, "lr": 2.575674438925974e-07, "epoch": 4.345114345114345, "percentage": 86.9, "elapsed_time": "0:21:42", "remaining_time": "0:03:16", "throughput": 1289.44, "total_tokens": 1679536}
|
|
{"current_steps": 4185, "total_steps": 4810, "loss": 0.0875, "lr": 2.535719011732321e-07, "epoch": 4.350311850311851, "percentage": 87.01, "elapsed_time": "0:21:42", "remaining_time": "0:03:14", "throughput": 1290.65, "total_tokens": 1681520}
|
|
{"current_steps": 4190, "total_steps": 4810, "loss": 0.0372, "lr": 2.4960593562462496e-07, "epoch": 4.355509355509356, "percentage": 87.11, "elapsed_time": "0:21:43", "remaining_time": "0:03:12", "throughput": 1291.9, "total_tokens": 1683568}
|
|
{"current_steps": 4195, "total_steps": 4810, "loss": 0.0001, "lr": 2.4566959946391246e-07, "epoch": 4.360706860706861, "percentage": 87.21, "elapsed_time": "0:21:43", "remaining_time": "0:03:11", "throughput": 1293.05, "total_tokens": 1685488}
|
|
{"current_steps": 4200, "total_steps": 4810, "loss": 0.0341, "lr": 2.4176294451811936e-07, "epoch": 4.365904365904366, "percentage": 87.32, "elapsed_time": "0:21:43", "remaining_time": "0:03:09", "throughput": 1294.2, "total_tokens": 1687408}
|
|
{"current_steps": 4205, "total_steps": 4810, "loss": 0.0001, "lr": 2.378860222234794e-07, "epoch": 4.371101871101871, "percentage": 87.42, "elapsed_time": "0:21:44", "remaining_time": "0:03:07", "throughput": 1295.49, "total_tokens": 1689520}
|
|
{"current_steps": 4210, "total_steps": 4810, "loss": 0.0003, "lr": 2.3403888362475784e-07, "epoch": 4.376299376299376, "percentage": 87.53, "elapsed_time": "0:21:44", "remaining_time": "0:03:05", "throughput": 1296.74, "total_tokens": 1691568}
|
|
{"current_steps": 4215, "total_steps": 4810, "loss": 0.0, "lr": 2.3022157937457628e-07, "epoch": 4.381496881496881, "percentage": 87.63, "elapsed_time": "0:21:44", "remaining_time": "0:03:04", "throughput": 1297.99, "total_tokens": 1693616}
|
|
{"current_steps": 4220, "total_steps": 4810, "loss": 0.0001, "lr": 2.2643415973275017e-07, "epoch": 4.386694386694387, "percentage": 87.73, "elapsed_time": "0:21:45", "remaining_time": "0:03:02", "throughput": 1299.19, "total_tokens": 1695600}
|
|
{"current_steps": 4225, "total_steps": 4810, "loss": 0.0001, "lr": 2.226766745656231e-07, "epoch": 4.391891891891892, "percentage": 87.84, "elapsed_time": "0:21:45", "remaining_time": "0:03:00", "throughput": 1300.39, "total_tokens": 1697584}
|
|
{"current_steps": 4230, "total_steps": 4810, "loss": 0.0001, "lr": 2.1894917334541355e-07, "epoch": 4.397089397089397, "percentage": 87.94, "elapsed_time": "0:21:45", "remaining_time": "0:02:59", "throughput": 1301.59, "total_tokens": 1699568}
|
|
{"current_steps": 4235, "total_steps": 4810, "loss": 0.0017, "lr": 2.15251705149562e-07, "epoch": 4.402286902286902, "percentage": 88.05, "elapsed_time": "0:21:46", "remaining_time": "0:02:57", "throughput": 1302.93, "total_tokens": 1701744}
|
|
{"current_steps": 4240, "total_steps": 4810, "loss": 0.0, "lr": 2.11584318660083e-07, "epoch": 4.407484407484407, "percentage": 88.15, "elapsed_time": "0:21:46", "remaining_time": "0:02:55", "throughput": 1304.03, "total_tokens": 1703600}
|
|
{"current_steps": 4245, "total_steps": 4810, "loss": 0.0613, "lr": 2.0794706216292815e-07, "epoch": 4.412681912681912, "percentage": 88.25, "elapsed_time": "0:21:46", "remaining_time": "0:02:53", "throughput": 1305.32, "total_tokens": 1705712}
|
|
{"current_steps": 4250, "total_steps": 4810, "loss": 0.0001, "lr": 2.043399835473475e-07, "epoch": 4.417879417879418, "percentage": 88.36, "elapsed_time": "0:21:47", "remaining_time": "0:02:52", "throughput": 1306.52, "total_tokens": 1707696}
|
|
{"current_steps": 4255, "total_steps": 4810, "loss": 0.0012, "lr": 2.0076313030525845e-07, "epoch": 4.423076923076923, "percentage": 88.46, "elapsed_time": "0:21:47", "remaining_time": "0:02:50", "throughput": 1307.76, "total_tokens": 1709744}
|
|
{"current_steps": 4260, "total_steps": 4810, "loss": 0.0001, "lr": 1.9721654953062412e-07, "epoch": 4.428274428274428, "percentage": 88.57, "elapsed_time": "0:21:47", "remaining_time": "0:02:48", "throughput": 1309.01, "total_tokens": 1711792}
|
|
{"current_steps": 4265, "total_steps": 4810, "loss": 0.0002, "lr": 1.937002879188285e-07, "epoch": 4.4334719334719335, "percentage": 88.67, "elapsed_time": "0:21:48", "remaining_time": "0:02:47", "throughput": 1310.3, "total_tokens": 1713904}
|
|
{"current_steps": 4270, "total_steps": 4810, "loss": 0.0, "lr": 1.9021439176606565e-07, "epoch": 4.4386694386694385, "percentage": 88.77, "elapsed_time": "0:21:48", "remaining_time": "0:02:45", "throughput": 1311.44, "total_tokens": 1715824}
|
|
{"current_steps": 4275, "total_steps": 4810, "loss": 0.0001, "lr": 1.8675890696872838e-07, "epoch": 4.443866943866944, "percentage": 88.88, "elapsed_time": "0:21:48", "remaining_time": "0:02:43", "throughput": 1312.64, "total_tokens": 1717808}
|
|
{"current_steps": 4280, "total_steps": 4810, "loss": 0.0326, "lr": 1.8333387902280314e-07, "epoch": 4.4490644490644495, "percentage": 88.98, "elapsed_time": "0:21:48", "remaining_time": "0:02:42", "throughput": 1313.88, "total_tokens": 1719856}
|
|
{"current_steps": 4285, "total_steps": 4810, "loss": 0.0001, "lr": 1.799393530232729e-07, "epoch": 4.454261954261955, "percentage": 89.09, "elapsed_time": "0:21:49", "remaining_time": "0:02:40", "throughput": 1315.02, "total_tokens": 1721776}
|
|
{"current_steps": 4290, "total_steps": 4810, "loss": 0.0001, "lr": 1.765753736635234e-07, "epoch": 4.45945945945946, "percentage": 89.19, "elapsed_time": "0:21:49", "remaining_time": "0:02:38", "throughput": 1316.11, "total_tokens": 1723632}
|
|
{"current_steps": 4295, "total_steps": 4810, "loss": 0.0001, "lr": 1.7324198523475111e-07, "epoch": 4.464656964656965, "percentage": 89.29, "elapsed_time": "0:21:49", "remaining_time": "0:02:37", "throughput": 1317.2, "total_tokens": 1725488}
|
|
{"current_steps": 4300, "total_steps": 4810, "loss": 0.0001, "lr": 1.6993923162538562e-07, "epoch": 4.46985446985447, "percentage": 89.4, "elapsed_time": "0:21:50", "remaining_time": "0:02:35", "throughput": 1318.49, "total_tokens": 1727600}
|
|
{"current_steps": 4305, "total_steps": 4810, "loss": 0.0462, "lr": 1.666671563205069e-07, "epoch": 4.475051975051975, "percentage": 89.5, "elapsed_time": "0:21:50", "remaining_time": "0:02:33", "throughput": 1319.77, "total_tokens": 1729712}
|
|
{"current_steps": 4310, "total_steps": 4810, "loss": 0.0, "lr": 1.6342580240127582e-07, "epoch": 4.48024948024948, "percentage": 89.6, "elapsed_time": "0:21:50", "remaining_time": "0:02:32", "throughput": 1320.96, "total_tokens": 1731696}
|
|
{"current_steps": 4315, "total_steps": 4810, "loss": 0.0169, "lr": 1.6021521254436678e-07, "epoch": 4.485446985446986, "percentage": 89.71, "elapsed_time": "0:21:51", "remaining_time": "0:02:30", "throughput": 1322.2, "total_tokens": 1733744}
|
|
{"current_steps": 4320, "total_steps": 4810, "loss": 0.0001, "lr": 1.5703542902140296e-07, "epoch": 4.490644490644491, "percentage": 89.81, "elapsed_time": "0:21:51", "remaining_time": "0:02:28", "throughput": 1323.38, "total_tokens": 1735728}
|
|
{"current_steps": 4325, "total_steps": 4810, "loss": 0.0001, "lr": 1.538864936984036e-07, "epoch": 4.495841995841996, "percentage": 89.92, "elapsed_time": "0:21:51", "remaining_time": "0:02:27", "throughput": 1324.61, "total_tokens": 1737776}
|
|
{"current_steps": 4330, "total_steps": 4810, "loss": 0.0313, "lr": 1.507684480352292e-07, "epoch": 4.501039501039501, "percentage": 90.02, "elapsed_time": "0:21:52", "remaining_time": "0:02:25", "throughput": 1325.85, "total_tokens": 1739824}
|
|
{"current_steps": 4335, "total_steps": 4810, "loss": 0.0202, "lr": 1.476813330850388e-07, "epoch": 4.506237006237006, "percentage": 90.12, "elapsed_time": "0:21:52", "remaining_time": "0:02:23", "throughput": 1326.98, "total_tokens": 1741744}
|
|
{"current_steps": 4338, "total_steps": 4810, "eval_loss": 0.43684616684913635, "epoch": 4.509355509355509, "percentage": 90.19, "elapsed_time": "0:21:53", "remaining_time": "0:02:22", "throughput": 1326.69, "total_tokens": 1742960}
|
|
{"current_steps": 4340, "total_steps": 4810, "loss": 0.0002, "lr": 1.4462518949374838e-07, "epoch": 4.511434511434511, "percentage": 90.23, "elapsed_time": "0:22:16", "remaining_time": "0:02:24", "throughput": 1304.41, "total_tokens": 1743728}
|
|
{"current_steps": 4345, "total_steps": 4810, "loss": 0.0723, "lr": 1.4160005749949328e-07, "epoch": 4.516632016632016, "percentage": 90.33, "elapsed_time": "0:22:17", "remaining_time": "0:02:23", "throughput": 1305.72, "total_tokens": 1745904}
|
|
{"current_steps": 4350, "total_steps": 4810, "loss": 0.0001, "lr": 1.386059769321027e-07, "epoch": 4.521829521829522, "percentage": 90.44, "elapsed_time": "0:22:17", "remaining_time": "0:02:21", "throughput": 1306.84, "total_tokens": 1747824}
|
|
{"current_steps": 4355, "total_steps": 4810, "loss": 0.0002, "lr": 1.3564298721257223e-07, "epoch": 4.527027027027027, "percentage": 90.54, "elapsed_time": "0:22:17", "remaining_time": "0:02:19", "throughput": 1308.05, "total_tokens": 1749872}
|
|
{"current_steps": 4360, "total_steps": 4810, "loss": 0.0, "lr": 1.32711127352545e-07, "epoch": 4.532224532224532, "percentage": 90.64, "elapsed_time": "0:22:18", "remaining_time": "0:02:18", "throughput": 1309.17, "total_tokens": 1751792}
|
|
{"current_steps": 4365, "total_steps": 4810, "loss": 0.0001, "lr": 1.2981043595380048e-07, "epoch": 4.537422037422037, "percentage": 90.75, "elapsed_time": "0:22:18", "remaining_time": "0:02:16", "throughput": 1310.33, "total_tokens": 1753776}
|
|
{"current_steps": 4370, "total_steps": 4810, "loss": 0.0, "lr": 1.269409512077427e-07, "epoch": 4.542619542619542, "percentage": 90.85, "elapsed_time": "0:22:18", "remaining_time": "0:02:14", "throughput": 1311.54, "total_tokens": 1755824}
|
|
{"current_steps": 4375, "total_steps": 4810, "loss": 0.0001, "lr": 1.241027108949e-07, "epoch": 4.547817047817047, "percentage": 90.96, "elapsed_time": "0:22:19", "remaining_time": "0:02:13", "throughput": 1312.85, "total_tokens": 1758000}
|
|
{"current_steps": 4380, "total_steps": 4810, "loss": 0.0006, "lr": 1.2129575238442715e-07, "epoch": 4.553014553014553, "percentage": 91.06, "elapsed_time": "0:22:19", "remaining_time": "0:02:11", "throughput": 1314.01, "total_tokens": 1759984}
|
|
{"current_steps": 4385, "total_steps": 4810, "loss": 0.0002, "lr": 1.1852011263361218e-07, "epoch": 4.558212058212058, "percentage": 91.16, "elapsed_time": "0:22:19", "remaining_time": "0:02:09", "throughput": 1315.18, "total_tokens": 1761968}
|
|
{"current_steps": 4390, "total_steps": 4810, "loss": 0.0, "lr": 1.1577582818739136e-07, "epoch": 4.5634095634095635, "percentage": 91.27, "elapsed_time": "0:22:20", "remaining_time": "0:02:08", "throughput": 1316.39, "total_tokens": 1764016}
|
|
{"current_steps": 4395, "total_steps": 4810, "loss": 0.0046, "lr": 1.1306293517786615e-07, "epoch": 4.5686070686070686, "percentage": 91.37, "elapsed_time": "0:22:20", "remaining_time": "0:02:06", "throughput": 1317.5, "total_tokens": 1765936}
|
|
{"current_steps": 4400, "total_steps": 4810, "loss": 0.0002, "lr": 1.1038146932383003e-07, "epoch": 4.573804573804574, "percentage": 91.48, "elapsed_time": "0:22:20", "remaining_time": "0:02:04", "throughput": 1318.71, "total_tokens": 1767984}
|
|
{"current_steps": 4405, "total_steps": 4810, "loss": 0.0266, "lr": 1.0773146593029637e-07, "epoch": 4.579002079002079, "percentage": 91.58, "elapsed_time": "0:22:21", "remaining_time": "0:02:03", "throughput": 1319.82, "total_tokens": 1769904}
|
|
{"current_steps": 4410, "total_steps": 4810, "loss": 0.0001, "lr": 1.0511295988803293e-07, "epoch": 4.584199584199585, "percentage": 91.68, "elapsed_time": "0:22:21", "remaining_time": "0:02:01", "throughput": 1320.97, "total_tokens": 1771888}
|
|
{"current_steps": 4415, "total_steps": 4810, "loss": 0.0027, "lr": 1.0252598567310451e-07, "epoch": 4.58939708939709, "percentage": 91.79, "elapsed_time": "0:22:21", "remaining_time": "0:02:00", "throughput": 1322.18, "total_tokens": 1773936}
|
|
{"current_steps": 4420, "total_steps": 4810, "loss": 0.0, "lr": 9.997057734641852e-08, "epoch": 4.594594594594595, "percentage": 91.89, "elapsed_time": "0:22:22", "remaining_time": "0:01:58", "throughput": 1323.38, "total_tokens": 1775984}
|
|
{"current_steps": 4425, "total_steps": 4810, "loss": 0.0, "lr": 9.744676855327484e-08, "epoch": 4.5997920997921, "percentage": 92.0, "elapsed_time": "0:22:22", "remaining_time": "0:01:56", "throughput": 1324.45, "total_tokens": 1777840}
|
|
{"current_steps": 4430, "total_steps": 4810, "loss": 0.0267, "lr": 9.495459252292505e-08, "epoch": 4.604989604989605, "percentage": 92.1, "elapsed_time": "0:22:22", "remaining_time": "0:01:55", "throughput": 1325.61, "total_tokens": 1779824}
|
|
{"current_steps": 4435, "total_steps": 4810, "loss": 0.0723, "lr": 9.249408206813332e-08, "epoch": 4.61018711018711, "percentage": 92.2, "elapsed_time": "0:22:22", "remaining_time": "0:01:53", "throughput": 1326.82, "total_tokens": 1781872}
|
|
{"current_steps": 4440, "total_steps": 4810, "loss": 0.0001, "lr": 9.00652695847451e-08, "epoch": 4.615384615384615, "percentage": 92.31, "elapsed_time": "0:22:23", "remaining_time": "0:01:51", "throughput": 1328.08, "total_tokens": 1783984}
|
|
{"current_steps": 4445, "total_steps": 4810, "loss": 0.0, "lr": 8.766818705126134e-08, "epoch": 4.620582120582121, "percentage": 92.41, "elapsed_time": "0:22:23", "remaining_time": "0:01:50", "throughput": 1329.28, "total_tokens": 1786032}
|
|
{"current_steps": 4450, "total_steps": 4810, "loss": 0.0058, "lr": 8.530286602841525e-08, "epoch": 4.625779625779626, "percentage": 92.52, "elapsed_time": "0:22:23", "remaining_time": "0:01:48", "throughput": 1330.44, "total_tokens": 1788016}
|
|
{"current_steps": 4455, "total_steps": 4810, "loss": 0.0001, "lr": 8.296933765875898e-08, "epoch": 4.630977130977131, "percentage": 92.62, "elapsed_time": "0:22:24", "remaining_time": "0:01:47", "throughput": 1331.65, "total_tokens": 1790064}
|
|
{"current_steps": 4460, "total_steps": 4810, "loss": 0.0003, "lr": 8.066763266625283e-08, "epoch": 4.636174636174636, "percentage": 92.72, "elapsed_time": "0:22:24", "remaining_time": "0:01:45", "throughput": 1332.76, "total_tokens": 1791984}
|
|
{"current_steps": 4465, "total_steps": 4810, "loss": 0.0321, "lr": 7.839778135586007e-08, "epoch": 4.641372141372141, "percentage": 92.83, "elapsed_time": "0:22:24", "remaining_time": "0:01:43", "throughput": 1333.87, "total_tokens": 1793904}
|
|
{"current_steps": 4470, "total_steps": 4810, "loss": 0.0003, "lr": 7.61598136131489e-08, "epoch": 4.646569646569646, "percentage": 92.93, "elapsed_time": "0:22:25", "remaining_time": "0:01:42", "throughput": 1335.02, "total_tokens": 1795888}
|
|
{"current_steps": 4475, "total_steps": 4810, "loss": 0.028, "lr": 7.3953758903898e-08, "epoch": 4.651767151767151, "percentage": 93.04, "elapsed_time": "0:22:25", "remaining_time": "0:01:40", "throughput": 1336.18, "total_tokens": 1797872}
|
|
{"current_steps": 4480, "total_steps": 4810, "loss": 0.0007, "lr": 7.177964627370999e-08, "epoch": 4.656964656964657, "percentage": 93.14, "elapsed_time": "0:22:25", "remaining_time": "0:01:39", "throughput": 1337.38, "total_tokens": 1799920}
|
|
{"current_steps": 4485, "total_steps": 4810, "loss": 0.0001, "lr": 6.963750434762745e-08, "epoch": 4.662162162162162, "percentage": 93.24, "elapsed_time": "0:22:26", "remaining_time": "0:01:37", "throughput": 1338.44, "total_tokens": 1801776}
|
|
{"current_steps": 4490, "total_steps": 4810, "loss": 0.0157, "lr": 6.752736132975696e-08, "epoch": 4.667359667359667, "percentage": 93.35, "elapsed_time": "0:22:26", "remaining_time": "0:01:35", "throughput": 1339.65, "total_tokens": 1803824}
|
|
{"current_steps": 4495, "total_steps": 4810, "loss": 0.0562, "lr": 6.544924500289789e-08, "epoch": 4.672557172557172, "percentage": 93.45, "elapsed_time": "0:22:26", "remaining_time": "0:01:34", "throughput": 1340.75, "total_tokens": 1805744}
|
|
{"current_steps": 4500, "total_steps": 4810, "loss": 0.0, "lr": 6.340318272817476e-08, "epoch": 4.6777546777546775, "percentage": 93.56, "elapsed_time": "0:22:27", "remaining_time": "0:01:32", "throughput": 1341.91, "total_tokens": 1807728}
|
|
{"current_steps": 4505, "total_steps": 4810, "loss": 0.0329, "lr": 6.138920144468124e-08, "epoch": 4.682952182952183, "percentage": 93.66, "elapsed_time": "0:22:27", "remaining_time": "0:01:31", "throughput": 1343.06, "total_tokens": 1809712}
|
|
{"current_steps": 4510, "total_steps": 4810, "loss": 0.1284, "lr": 5.940732766912011e-08, "epoch": 4.6881496881496885, "percentage": 93.76, "elapsed_time": "0:22:27", "remaining_time": "0:01:29", "throughput": 1344.17, "total_tokens": 1811632}
|
|
{"current_steps": 4515, "total_steps": 4810, "loss": 0.0, "lr": 5.745758749545749e-08, "epoch": 4.6933471933471935, "percentage": 93.87, "elapsed_time": "0:22:28", "remaining_time": "0:01:28", "throughput": 1345.27, "total_tokens": 1813552}
|
|
{"current_steps": 4520, "total_steps": 4810, "loss": 0.0, "lr": 5.554000659457881e-08, "epoch": 4.698544698544699, "percentage": 93.97, "elapsed_time": "0:22:28", "remaining_time": "0:01:26", "throughput": 1346.52, "total_tokens": 1815664}
|
|
{"current_steps": 4525, "total_steps": 4810, "loss": 0.0056, "lr": 5.365461021395096e-08, "epoch": 4.703742203742204, "percentage": 94.07, "elapsed_time": "0:22:28", "remaining_time": "0:01:24", "throughput": 1347.67, "total_tokens": 1817648}
|
|
{"current_steps": 4530, "total_steps": 4810, "loss": 0.0226, "lr": 5.1801423177288146e-08, "epoch": 4.708939708939709, "percentage": 94.18, "elapsed_time": "0:22:29", "remaining_time": "0:01:23", "throughput": 1348.87, "total_tokens": 1819696}
|
|
{"current_steps": 4535, "total_steps": 4810, "loss": 0.0, "lr": 4.998046988422767e-08, "epoch": 4.714137214137214, "percentage": 94.28, "elapsed_time": "0:22:29", "remaining_time": "0:01:21", "throughput": 1350.02, "total_tokens": 1821680}
|
|
{"current_steps": 4540, "total_steps": 4810, "loss": 0.0, "lr": 4.8191774310006045e-08, "epoch": 4.71933471933472, "percentage": 94.39, "elapsed_time": "0:22:29", "remaining_time": "0:01:20", "throughput": 1351.21, "total_tokens": 1823728}
|
|
{"current_steps": 4545, "total_steps": 4810, "loss": 0.0006, "lr": 4.6435360005145647e-08, "epoch": 4.724532224532225, "percentage": 94.49, "elapsed_time": "0:22:30", "remaining_time": "0:01:18", "throughput": 1352.36, "total_tokens": 1825712}
|
|
{"current_steps": 4550, "total_steps": 4810, "loss": 0.0258, "lr": 4.471125009514326e-08, "epoch": 4.72972972972973, "percentage": 94.59, "elapsed_time": "0:22:30", "remaining_time": "0:01:17", "throughput": 1353.55, "total_tokens": 1827760}
|
|
{"current_steps": 4555, "total_steps": 4810, "loss": 0.0, "lr": 4.30194672801662e-08, "epoch": 4.734927234927235, "percentage": 94.7, "elapsed_time": "0:22:30", "remaining_time": "0:01:15", "throughput": 1354.65, "total_tokens": 1829680}
|
|
{"current_steps": 4560, "total_steps": 4810, "loss": 0.0002, "lr": 4.136003383475251e-08, "epoch": 4.74012474012474, "percentage": 94.8, "elapsed_time": "0:22:30", "remaining_time": "0:01:14", "throughput": 1355.85, "total_tokens": 1831728}
|
|
{"current_steps": 4565, "total_steps": 4810, "loss": 0.0001, "lr": 3.9732971607519264e-08, "epoch": 4.745322245322245, "percentage": 94.91, "elapsed_time": "0:22:31", "remaining_time": "0:01:12", "throughput": 1356.94, "total_tokens": 1833648}
|
|
{"current_steps": 4570, "total_steps": 4810, "loss": 0.0, "lr": 3.813830202087338e-08, "epoch": 4.75051975051975, "percentage": 95.01, "elapsed_time": "0:22:31", "remaining_time": "0:01:10", "throughput": 1358.14, "total_tokens": 1835696}
|
|
{"current_steps": 4575, "total_steps": 4810, "loss": 0.0001, "lr": 3.6576046070730676e-08, "epoch": 4.755717255717256, "percentage": 95.11, "elapsed_time": "0:22:31", "remaining_time": "0:01:09", "throughput": 1359.37, "total_tokens": 1837808}
|
|
{"current_steps": 4579, "total_steps": 4810, "eval_loss": 0.4380520284175873, "epoch": 4.75987525987526, "percentage": 95.2, "elapsed_time": "0:22:33", "remaining_time": "0:01:08", "throughput": 1359.23, "total_tokens": 1839344}
|
|
{"current_steps": 4580, "total_steps": 4810, "loss": 0.0003, "lr": 3.504622432623811e-08, "epoch": 4.760914760914761, "percentage": 95.22, "elapsed_time": "0:23:11", "remaining_time": "0:01:09", "throughput": 1321.78, "total_tokens": 1839728}
|
|
{"current_steps": 4585, "total_steps": 4810, "loss": 0.002, "lr": 3.354885692950505e-08, "epoch": 4.766112266112266, "percentage": 95.32, "elapsed_time": "0:23:12", "remaining_time": "0:01:08", "throughput": 1322.94, "total_tokens": 1841776}
|
|
{"current_steps": 4590, "total_steps": 4810, "loss": 0.0001, "lr": 3.208396359533572e-08, "epoch": 4.771309771309771, "percentage": 95.43, "elapsed_time": "0:23:12", "remaining_time": "0:01:06", "throughput": 1324.02, "total_tokens": 1843696}
|
|
{"current_steps": 4595, "total_steps": 4810, "loss": 0.0002, "lr": 3.065156361097138e-08, "epoch": 4.776507276507276, "percentage": 95.53, "elapsed_time": "0:23:12", "remaining_time": "0:01:05", "throughput": 1325.18, "total_tokens": 1845744}
|
|
{"current_steps": 4600, "total_steps": 4810, "loss": 0.0009, "lr": 2.925167583583577e-08, "epoch": 4.781704781704782, "percentage": 95.63, "elapsed_time": "0:23:13", "remaining_time": "0:01:03", "throughput": 1326.34, "total_tokens": 1847792}
|
|
{"current_steps": 4605, "total_steps": 4810, "loss": 0.0712, "lr": 2.7884318701285883e-08, "epoch": 4.786902286902287, "percentage": 95.74, "elapsed_time": "0:23:13", "remaining_time": "0:01:02", "throughput": 1327.45, "total_tokens": 1849776}
|
|
{"current_steps": 4610, "total_steps": 4810, "loss": 0.0, "lr": 2.654951021037161e-08, "epoch": 4.792099792099792, "percentage": 95.84, "elapsed_time": "0:23:13", "remaining_time": "0:01:00", "throughput": 1328.75, "total_tokens": 1852016}
|
|
{"current_steps": 4615, "total_steps": 4810, "loss": 0.0001, "lr": 2.524726793759591e-08, "epoch": 4.797297297297297, "percentage": 95.95, "elapsed_time": "0:23:14", "remaining_time": "0:00:58", "throughput": 1329.91, "total_tokens": 1854064}
|
|
{"current_steps": 4620, "total_steps": 4810, "loss": 0.0002, "lr": 2.3977609028686123e-08, "epoch": 4.802494802494802, "percentage": 96.05, "elapsed_time": "0:23:14", "remaining_time": "0:00:57", "throughput": 1331.07, "total_tokens": 1856112}
|
|
{"current_steps": 4625, "total_steps": 4810, "loss": 0.0, "lr": 2.2740550200365528e-08, "epoch": 4.8076923076923075, "percentage": 96.15, "elapsed_time": "0:23:14", "remaining_time": "0:00:55", "throughput": 1332.18, "total_tokens": 1858096}
|
|
{"current_steps": 4630, "total_steps": 4810, "loss": 0.0, "lr": 2.153610774013548e-08, "epoch": 4.8128898128898125, "percentage": 96.26, "elapsed_time": "0:23:15", "remaining_time": "0:00:54", "throughput": 1333.43, "total_tokens": 1860272}
|
|
{"current_steps": 4635, "total_steps": 4810, "loss": 0.0001, "lr": 2.0364297506060005e-08, "epoch": 4.8180873180873185, "percentage": 96.36, "elapsed_time": "0:23:15", "remaining_time": "0:00:52", "throughput": 1334.55, "total_tokens": 1862256}
|
|
{"current_steps": 4640, "total_steps": 4810, "loss": 0.0, "lr": 1.922513492655653e-08, "epoch": 4.8232848232848236, "percentage": 96.47, "elapsed_time": "0:23:15", "remaining_time": "0:00:51", "throughput": 1335.71, "total_tokens": 1864304}
|
|
{"current_steps": 4645, "total_steps": 4810, "loss": 0.0, "lr": 1.8118635000194395e-08, "epoch": 4.828482328482329, "percentage": 96.57, "elapsed_time": "0:23:16", "remaining_time": "0:00:49", "throughput": 1336.77, "total_tokens": 1866224}
|
|
{"current_steps": 4650, "total_steps": 4810, "loss": 0.0002, "lr": 1.704481229549526e-08, "epoch": 4.833679833679834, "percentage": 96.67, "elapsed_time": "0:23:16", "remaining_time": "0:00:48", "throughput": 1337.97, "total_tokens": 1868336}
|
|
{"current_steps": 4655, "total_steps": 4810, "loss": 0.0001, "lr": 1.6003680950742728e-08, "epoch": 4.838877338877339, "percentage": 96.78, "elapsed_time": "0:23:16", "remaining_time": "0:00:46", "throughput": 1339.18, "total_tokens": 1870448}
|
|
{"current_steps": 4660, "total_steps": 4810, "loss": 0.0076, "lr": 1.499525467379581e-08, "epoch": 4.844074844074844, "percentage": 96.88, "elapsed_time": "0:23:17", "remaining_time": "0:00:44", "throughput": 1340.24, "total_tokens": 1872368}
|
|
{"current_steps": 4665, "total_steps": 4810, "loss": 0.0001, "lr": 1.4019546741908252e-08, "epoch": 4.849272349272349, "percentage": 96.99, "elapsed_time": "0:23:17", "remaining_time": "0:00:43", "throughput": 1341.44, "total_tokens": 1874480}
|
|
{"current_steps": 4670, "total_steps": 4810, "loss": 0.0214, "lr": 1.3076570001553934e-08, "epoch": 4.854469854469855, "percentage": 97.09, "elapsed_time": "0:23:17", "remaining_time": "0:00:41", "throughput": 1342.55, "total_tokens": 1876464}
|
|
{"current_steps": 4675, "total_steps": 4810, "loss": 0.0, "lr": 1.216633686825841e-08, "epoch": 4.85966735966736, "percentage": 97.19, "elapsed_time": "0:23:18", "remaining_time": "0:00:40", "throughput": 1343.66, "total_tokens": 1878448}
|
|
{"current_steps": 4680, "total_steps": 4810, "loss": 0.0426, "lr": 1.1288859326433477e-08, "epoch": 4.864864864864865, "percentage": 97.3, "elapsed_time": "0:23:18", "remaining_time": "0:00:38", "throughput": 1344.77, "total_tokens": 1880432}
|
|
{"current_steps": 4685, "total_steps": 4810, "loss": 0.0598, "lr": 1.0444148929221466e-08, "epoch": 4.87006237006237, "percentage": 97.4, "elapsed_time": "0:23:18", "remaining_time": "0:00:37", "throughput": 1345.96, "total_tokens": 1882544}
|
|
{"current_steps": 4690, "total_steps": 4810, "loss": 0.0001, "lr": 9.632216798342032e-09, "epoch": 4.875259875259875, "percentage": 97.51, "elapsed_time": "0:23:18", "remaining_time": "0:00:35", "throughput": 1347.07, "total_tokens": 1884528}
|
|
{"current_steps": 4695, "total_steps": 4810, "loss": 0.0, "lr": 8.853073623946163e-09, "epoch": 4.88045738045738, "percentage": 97.61, "elapsed_time": "0:23:19", "remaining_time": "0:00:34", "throughput": 1348.27, "total_tokens": 1886640}
|
|
{"current_steps": 4700, "total_steps": 4810, "loss": 0.0369, "lr": 8.106729664475178e-09, "epoch": 4.885654885654886, "percentage": 97.71, "elapsed_time": "0:23:19", "remaining_time": "0:00:32", "throughput": 1349.42, "total_tokens": 1888688}
|
|
{"current_steps": 4705, "total_steps": 4810, "loss": 0.0001, "lr": 7.3931947465252786e-09, "epoch": 4.890852390852391, "percentage": 97.82, "elapsed_time": "0:23:19", "remaining_time": "0:00:31", "throughput": 1350.57, "total_tokens": 1890736}
|
|
{"current_steps": 4710, "total_steps": 4810, "loss": 0.0, "lr": 6.7124782647196015e-09, "epoch": 4.896049896049896, "percentage": 97.92, "elapsed_time": "0:23:20", "remaining_time": "0:00:29", "throughput": 1351.67, "total_tokens": 1892720}
|
|
{"current_steps": 4715, "total_steps": 4810, "loss": 0.0, "lr": 6.064589181582481e-09, "epoch": 4.901247401247401, "percentage": 98.02, "elapsed_time": "0:23:20", "remaining_time": "0:00:28", "throughput": 1352.77, "total_tokens": 1894704}
|
|
{"current_steps": 4720, "total_steps": 4810, "loss": 0.0287, "lr": 5.4495360274231526e-09, "epoch": 4.906444906444906, "percentage": 98.13, "elapsed_time": "0:23:20", "remaining_time": "0:00:26", "throughput": 1353.83, "total_tokens": 1896624}
|
|
{"current_steps": 4725, "total_steps": 4810, "loss": 0.0307, "lr": 4.867326900223068e-09, "epoch": 4.911642411642411, "percentage": 98.23, "elapsed_time": "0:23:21", "remaining_time": "0:00:25", "throughput": 1354.89, "total_tokens": 1898544}
|
|
{"current_steps": 4730, "total_steps": 4810, "loss": 0.0353, "lr": 4.317969465527927e-09, "epoch": 4.916839916839917, "percentage": 98.34, "elapsed_time": "0:23:21", "remaining_time": "0:00:23", "throughput": 1356.04, "total_tokens": 1900592}
|
|
{"current_steps": 4735, "total_steps": 4810, "loss": 0.0287, "lr": 3.801470956348863e-09, "epoch": 4.922037422037422, "percentage": 98.44, "elapsed_time": "0:23:21", "remaining_time": "0:00:22", "throughput": 1357.14, "total_tokens": 1902576}
|
|
{"current_steps": 4740, "total_steps": 4810, "loss": 0.0001, "lr": 3.3178381730661345e-09, "epoch": 4.927234927234927, "percentage": 98.54, "elapsed_time": "0:23:22", "remaining_time": "0:00:20", "throughput": 1358.29, "total_tokens": 1904624}
|
|
{"current_steps": 4745, "total_steps": 4810, "loss": 0.0, "lr": 2.8670774833386427e-09, "epoch": 4.9324324324324325, "percentage": 98.65, "elapsed_time": "0:23:22", "remaining_time": "0:00:19", "throughput": 1359.48, "total_tokens": 1906736}
|
|
{"current_steps": 4750, "total_steps": 4810, "loss": 0.0, "lr": 2.449194822022327e-09, "epoch": 4.9376299376299375, "percentage": 98.75, "elapsed_time": "0:23:22", "remaining_time": "0:00:17", "throughput": 1360.49, "total_tokens": 1908592}
|
|
{"current_steps": 4755, "total_steps": 4810, "loss": 0.0006, "lr": 2.064195691089954e-09, "epoch": 4.942827442827443, "percentage": 98.86, "elapsed_time": "0:23:23", "remaining_time": "0:00:16", "throughput": 1361.59, "total_tokens": 1910576}
|
|
{"current_steps": 4760, "total_steps": 4810, "loss": 0.0, "lr": 1.7120851595597842e-09, "epoch": 4.948024948024948, "percentage": 98.96, "elapsed_time": "0:23:23", "remaining_time": "0:00:14", "throughput": 1362.74, "total_tokens": 1912624}
|
|
{"current_steps": 4765, "total_steps": 4810, "loss": 0.0283, "lr": 1.3928678634289595e-09, "epoch": 4.953222453222454, "percentage": 99.06, "elapsed_time": "0:23:23", "remaining_time": "0:00:13", "throughput": 1363.84, "total_tokens": 1914608}
|
|
{"current_steps": 4770, "total_steps": 4810, "loss": 0.0004, "lr": 1.1065480056110521e-09, "epoch": 4.958419958419959, "percentage": 99.17, "elapsed_time": "0:23:24", "remaining_time": "0:00:11", "throughput": 1364.94, "total_tokens": 1916592}
|
|
{"current_steps": 4775, "total_steps": 4810, "loss": 0.0001, "lr": 8.531293558824983e-10, "epoch": 4.963617463617464, "percentage": 99.27, "elapsed_time": "0:23:24", "remaining_time": "0:00:10", "throughput": 1366.13, "total_tokens": 1918704}
|
|
{"current_steps": 4780, "total_steps": 4810, "loss": 0.0001, "lr": 6.326152508320804e-10, "epoch": 4.968814968814969, "percentage": 99.38, "elapsed_time": "0:23:24", "remaining_time": "0:00:08", "throughput": 1367.18, "total_tokens": 1920624}
|
|
{"current_steps": 4785, "total_steps": 4810, "loss": 0.0013, "lr": 4.450085938170756e-10, "epoch": 4.974012474012474, "percentage": 99.48, "elapsed_time": "0:23:25", "remaining_time": "0:00:07", "throughput": 1368.19, "total_tokens": 1922480}
|
|
{"current_steps": 4790, "total_steps": 4810, "loss": 0.0, "lr": 2.903118549252293e-10, "epoch": 4.979209979209979, "percentage": 99.58, "elapsed_time": "0:23:25", "remaining_time": "0:00:05", "throughput": 1369.28, "total_tokens": 1924464}
|
|
{"current_steps": 4795, "total_steps": 4810, "loss": 0.0001, "lr": 1.6852707094172637e-10, "epoch": 4.984407484407484, "percentage": 99.69, "elapsed_time": "0:23:25", "remaining_time": "0:00:04", "throughput": 1370.38, "total_tokens": 1926448}
|
|
{"current_steps": 4800, "total_steps": 4810, "loss": 0.0002, "lr": 7.965584532282356e-11, "epoch": 4.98960498960499, "percentage": 99.79, "elapsed_time": "0:23:26", "remaining_time": "0:00:02", "throughput": 1371.56, "total_tokens": 1928560}
|
|
{"current_steps": 4805, "total_steps": 4810, "loss": 0.0177, "lr": 2.3699348174754943e-11, "epoch": 4.994802494802495, "percentage": 99.9, "elapsed_time": "0:23:26", "remaining_time": "0:00:01", "throughput": 1372.65, "total_tokens": 1930544}
|
|
{"current_steps": 4810, "total_steps": 4810, "loss": 0.0001, "lr": 6.583162381890162e-13, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:23:26", "remaining_time": "0:00:00", "throughput": 1373.76, "total_tokens": 1932608}
|
|
{"current_steps": 4810, "total_steps": 4810, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:23:50", "remaining_time": "0:00:00", "throughput": 1351.3, "total_tokens": 1932608}
|