983 lines
232 KiB
JSON
983 lines
232 KiB
JSON
{"current_steps": 5, "total_steps": 4810, "loss": 1.2842, "lr": 4.1580041580041583e-07, "epoch": 0.005197505197505198, "percentage": 0.1, "elapsed_time": "0:00:00", "remaining_time": "0:11:53", "throughput": 2757.56, "total_tokens": 2048}
|
|
{"current_steps": 10, "total_steps": 4810, "loss": 0.8545, "lr": 9.355509355509356e-07, "epoch": 0.010395010395010396, "percentage": 0.21, "elapsed_time": "0:00:01", "remaining_time": "0:08:39", "throughput": 3904.41, "total_tokens": 4224}
|
|
{"current_steps": 15, "total_steps": 4810, "loss": 0.3278, "lr": 1.4553014553014554e-06, "epoch": 0.015592515592515593, "percentage": 0.31, "elapsed_time": "0:00:01", "remaining_time": "0:07:30", "throughput": 4453.08, "total_tokens": 6272}
|
|
{"current_steps": 20, "total_steps": 4810, "loss": 0.365, "lr": 1.975051975051975e-06, "epoch": 0.02079002079002079, "percentage": 0.42, "elapsed_time": "0:00:01", "remaining_time": "0:06:55", "throughput": 4827.17, "total_tokens": 8384}
|
|
{"current_steps": 25, "total_steps": 4810, "loss": 0.4433, "lr": 2.494802494802495e-06, "epoch": 0.02598752598752599, "percentage": 0.52, "elapsed_time": "0:00:02", "remaining_time": "0:06:35", "throughput": 5084.42, "total_tokens": 10496}
|
|
{"current_steps": 30, "total_steps": 4810, "loss": 0.2526, "lr": 3.014553014553015e-06, "epoch": 0.031185031185031187, "percentage": 0.62, "elapsed_time": "0:00:02", "remaining_time": "0:06:21", "throughput": 5245.81, "total_tokens": 12544}
|
|
{"current_steps": 35, "total_steps": 4810, "loss": 0.3051, "lr": 3.5343035343035348e-06, "epoch": 0.036382536382536385, "percentage": 0.73, "elapsed_time": "0:00:02", "remaining_time": "0:06:10", "throughput": 5345.36, "total_tokens": 14528}
|
|
{"current_steps": 40, "total_steps": 4810, "loss": 0.5129, "lr": 4.0540540540540545e-06, "epoch": 0.04158004158004158, "percentage": 0.83, "elapsed_time": "0:00:03", "remaining_time": "0:06:03", "throughput": 5443.98, "total_tokens": 16576}
|
|
{"current_steps": 45, "total_steps": 4810, "loss": 0.283, "lr": 4.573804573804574e-06, "epoch": 0.04677754677754678, "percentage": 0.94, "elapsed_time": "0:00:03", "remaining_time": "0:05:57", "throughput": 5503.03, "total_tokens": 18560}
|
|
{"current_steps": 50, "total_steps": 4810, "loss": 0.454, "lr": 5.093555093555094e-06, "epoch": 0.05197505197505198, "percentage": 1.04, "elapsed_time": "0:00:03", "remaining_time": "0:05:52", "throughput": 5567.22, "total_tokens": 20608}
|
|
{"current_steps": 55, "total_steps": 4810, "loss": 0.2518, "lr": 5.613305613305614e-06, "epoch": 0.057172557172557176, "percentage": 1.14, "elapsed_time": "0:00:04", "remaining_time": "0:05:48", "throughput": 5624.89, "total_tokens": 22656}
|
|
{"current_steps": 60, "total_steps": 4810, "loss": 0.2538, "lr": 6.1330561330561335e-06, "epoch": 0.062370062370062374, "percentage": 1.25, "elapsed_time": "0:00:04", "remaining_time": "0:05:44", "throughput": 5658.03, "total_tokens": 24640}
|
|
{"current_steps": 65, "total_steps": 4810, "loss": 0.3571, "lr": 6.652806652806653e-06, "epoch": 0.06756756756756757, "percentage": 1.35, "elapsed_time": "0:00:04", "remaining_time": "0:05:41", "throughput": 5713.0, "total_tokens": 26752}
|
|
{"current_steps": 70, "total_steps": 4810, "loss": 0.3588, "lr": 7.172557172557173e-06, "epoch": 0.07276507276507277, "percentage": 1.46, "elapsed_time": "0:00:05", "remaining_time": "0:05:40", "throughput": 5695.8, "total_tokens": 28608}
|
|
{"current_steps": 75, "total_steps": 4810, "loss": 0.2553, "lr": 7.692307692307694e-06, "epoch": 0.07796257796257797, "percentage": 1.56, "elapsed_time": "0:00:05", "remaining_time": "0:05:38", "throughput": 5762.97, "total_tokens": 30912}
|
|
{"current_steps": 80, "total_steps": 4810, "loss": 0.3869, "lr": 8.212058212058212e-06, "epoch": 0.08316008316008316, "percentage": 1.66, "elapsed_time": "0:00:05", "remaining_time": "0:05:36", "throughput": 5779.52, "total_tokens": 32896}
|
|
{"current_steps": 85, "total_steps": 4810, "loss": 0.3812, "lr": 8.731808731808733e-06, "epoch": 0.08835758835758836, "percentage": 1.77, "elapsed_time": "0:00:06", "remaining_time": "0:05:34", "throughput": 5785.23, "total_tokens": 34816}
|
|
{"current_steps": 90, "total_steps": 4810, "loss": 0.735, "lr": 9.251559251559252e-06, "epoch": 0.09355509355509356, "percentage": 1.87, "elapsed_time": "0:00:06", "remaining_time": "0:05:32", "throughput": 5789.57, "total_tokens": 36736}
|
|
{"current_steps": 95, "total_steps": 4810, "loss": 0.4027, "lr": 9.771309771309773e-06, "epoch": 0.09875259875259876, "percentage": 1.98, "elapsed_time": "0:00:06", "remaining_time": "0:05:31", "throughput": 5803.2, "total_tokens": 38720}
|
|
{"current_steps": 100, "total_steps": 4810, "loss": 0.4605, "lr": 1.0291060291060291e-05, "epoch": 0.10395010395010396, "percentage": 2.08, "elapsed_time": "0:00:06", "remaining_time": "0:05:29", "throughput": 5806.07, "total_tokens": 40640}
|
|
{"current_steps": 105, "total_steps": 4810, "loss": 0.2999, "lr": 1.0810810810810812e-05, "epoch": 0.10914760914760915, "percentage": 2.18, "elapsed_time": "0:00:07", "remaining_time": "0:05:28", "throughput": 5825.69, "total_tokens": 42688}
|
|
{"current_steps": 110, "total_steps": 4810, "loss": 0.2839, "lr": 1.1330561330561331e-05, "epoch": 0.11434511434511435, "percentage": 2.29, "elapsed_time": "0:00:07", "remaining_time": "0:05:27", "throughput": 5819.07, "total_tokens": 44544}
|
|
{"current_steps": 115, "total_steps": 4810, "loss": 0.3329, "lr": 1.1850311850311852e-05, "epoch": 0.11954261954261955, "percentage": 2.39, "elapsed_time": "0:00:07", "remaining_time": "0:05:25", "throughput": 5813.31, "total_tokens": 46400}
|
|
{"current_steps": 120, "total_steps": 4810, "loss": 0.2089, "lr": 1.2370062370062372e-05, "epoch": 0.12474012474012475, "percentage": 2.49, "elapsed_time": "0:00:08", "remaining_time": "0:05:24", "throughput": 5831.11, "total_tokens": 48448}
|
|
{"current_steps": 125, "total_steps": 4810, "loss": 0.2829, "lr": 1.2889812889812891e-05, "epoch": 0.12993762993762994, "percentage": 2.6, "elapsed_time": "0:00:08", "remaining_time": "0:05:23", "throughput": 5847.65, "total_tokens": 50496}
|
|
{"current_steps": 130, "total_steps": 4810, "loss": 0.2434, "lr": 1.3409563409563412e-05, "epoch": 0.13513513513513514, "percentage": 2.7, "elapsed_time": "0:00:08", "remaining_time": "0:05:22", "throughput": 5848.59, "total_tokens": 52416}
|
|
{"current_steps": 135, "total_steps": 4810, "loss": 0.3621, "lr": 1.392931392931393e-05, "epoch": 0.14033264033264034, "percentage": 2.81, "elapsed_time": "0:00:09", "remaining_time": "0:05:21", "throughput": 5862.86, "total_tokens": 54464}
|
|
{"current_steps": 140, "total_steps": 4810, "loss": 0.3338, "lr": 1.4449064449064451e-05, "epoch": 0.14553014553014554, "percentage": 2.91, "elapsed_time": "0:00:09", "remaining_time": "0:05:20", "throughput": 5869.77, "total_tokens": 56448}
|
|
{"current_steps": 145, "total_steps": 4810, "loss": 0.3185, "lr": 1.496881496881497e-05, "epoch": 0.15072765072765074, "percentage": 3.01, "elapsed_time": "0:00:09", "remaining_time": "0:05:19", "throughput": 5869.29, "total_tokens": 58368}
|
|
{"current_steps": 150, "total_steps": 4810, "loss": 0.2379, "lr": 1.548856548856549e-05, "epoch": 0.15592515592515593, "percentage": 3.12, "elapsed_time": "0:00:10", "remaining_time": "0:05:19", "throughput": 5893.67, "total_tokens": 60544}
|
|
{"current_steps": 155, "total_steps": 4810, "loss": 0.3634, "lr": 1.600831600831601e-05, "epoch": 0.16112266112266113, "percentage": 3.22, "elapsed_time": "0:00:10", "remaining_time": "0:05:18", "throughput": 5905.86, "total_tokens": 62592}
|
|
{"current_steps": 160, "total_steps": 4810, "loss": 0.3093, "lr": 1.652806652806653e-05, "epoch": 0.16632016632016633, "percentage": 3.33, "elapsed_time": "0:00:10", "remaining_time": "0:05:17", "throughput": 5911.89, "total_tokens": 64576}
|
|
{"current_steps": 165, "total_steps": 4810, "loss": 0.3377, "lr": 1.704781704781705e-05, "epoch": 0.17151767151767153, "percentage": 3.43, "elapsed_time": "0:00:11", "remaining_time": "0:05:16", "throughput": 5927.46, "total_tokens": 66688}
|
|
{"current_steps": 170, "total_steps": 4810, "loss": 0.3224, "lr": 1.756756756756757e-05, "epoch": 0.17671517671517672, "percentage": 3.53, "elapsed_time": "0:00:11", "remaining_time": "0:05:16", "throughput": 5919.3, "total_tokens": 68544}
|
|
{"current_steps": 175, "total_steps": 4810, "loss": 0.3814, "lr": 1.808731808731809e-05, "epoch": 0.18191268191268192, "percentage": 3.64, "elapsed_time": "0:00:12", "remaining_time": "0:05:27", "throughput": 5710.55, "total_tokens": 70592}
|
|
{"current_steps": 180, "total_steps": 4810, "loss": 0.338, "lr": 1.8607068607068607e-05, "epoch": 0.18711018711018712, "percentage": 3.74, "elapsed_time": "0:00:12", "remaining_time": "0:05:26", "throughput": 5718.74, "total_tokens": 72576}
|
|
{"current_steps": 185, "total_steps": 4810, "loss": 0.2853, "lr": 1.9126819126819128e-05, "epoch": 0.19230769230769232, "percentage": 3.85, "elapsed_time": "0:00:13", "remaining_time": "0:05:25", "throughput": 5731.48, "total_tokens": 74624}
|
|
{"current_steps": 190, "total_steps": 4810, "loss": 0.3374, "lr": 1.964656964656965e-05, "epoch": 0.19750519750519752, "percentage": 3.95, "elapsed_time": "0:00:13", "remaining_time": "0:05:24", "throughput": 5738.89, "total_tokens": 76608}
|
|
{"current_steps": 195, "total_steps": 4810, "loss": 0.3221, "lr": 2.016632016632017e-05, "epoch": 0.20270270270270271, "percentage": 4.05, "elapsed_time": "0:00:13", "remaining_time": "0:05:23", "throughput": 5754.71, "total_tokens": 78720}
|
|
{"current_steps": 200, "total_steps": 4810, "loss": 0.3944, "lr": 2.068607068607069e-05, "epoch": 0.2079002079002079, "percentage": 4.16, "elapsed_time": "0:00:14", "remaining_time": "0:05:23", "throughput": 5787.47, "total_tokens": 81152}
|
|
{"current_steps": 205, "total_steps": 4810, "loss": 0.3755, "lr": 2.1205821205821207e-05, "epoch": 0.2130977130977131, "percentage": 4.26, "elapsed_time": "0:00:14", "remaining_time": "0:05:22", "throughput": 5796.3, "total_tokens": 83200}
|
|
{"current_steps": 210, "total_steps": 4810, "loss": 0.2949, "lr": 2.1725571725571728e-05, "epoch": 0.2182952182952183, "percentage": 4.37, "elapsed_time": "0:00:14", "remaining_time": "0:05:21", "throughput": 5799.6, "total_tokens": 85184}
|
|
{"current_steps": 215, "total_steps": 4810, "loss": 0.2174, "lr": 2.2245322245322248e-05, "epoch": 0.2234927234927235, "percentage": 4.47, "elapsed_time": "0:00:15", "remaining_time": "0:05:21", "throughput": 5807.71, "total_tokens": 87232}
|
|
{"current_steps": 220, "total_steps": 4810, "loss": 0.6706, "lr": 2.276507276507277e-05, "epoch": 0.2286902286902287, "percentage": 4.57, "elapsed_time": "0:00:15", "remaining_time": "0:05:20", "throughput": 5807.19, "total_tokens": 89152}
|
|
{"current_steps": 225, "total_steps": 4810, "loss": 0.4387, "lr": 2.3284823284823286e-05, "epoch": 0.2338877338877339, "percentage": 4.68, "elapsed_time": "0:00:15", "remaining_time": "0:05:19", "throughput": 5823.81, "total_tokens": 91328}
|
|
{"current_steps": 230, "total_steps": 4810, "loss": 0.3047, "lr": 2.3804573804573807e-05, "epoch": 0.2390852390852391, "percentage": 4.78, "elapsed_time": "0:00:16", "remaining_time": "0:05:18", "throughput": 5827.3, "total_tokens": 93312}
|
|
{"current_steps": 235, "total_steps": 4810, "loss": 0.2717, "lr": 2.4324324324324327e-05, "epoch": 0.2442827442827443, "percentage": 4.89, "elapsed_time": "0:00:16", "remaining_time": "0:05:18", "throughput": 5831.51, "total_tokens": 95296}
|
|
{"current_steps": 240, "total_steps": 4810, "loss": 0.2682, "lr": 2.4844074844074848e-05, "epoch": 0.2494802494802495, "percentage": 4.99, "elapsed_time": "0:00:16", "remaining_time": "0:05:17", "throughput": 5831.46, "total_tokens": 97216}
|
|
{"current_steps": 241, "total_steps": 4810, "eval_loss": 0.38468512892723083, "epoch": 0.2505197505197505, "percentage": 5.01, "elapsed_time": "0:00:18", "remaining_time": "0:05:47", "throughput": 5322.21, "total_tokens": 97664}
|
|
{"current_steps": 245, "total_steps": 4810, "loss": 0.344, "lr": 2.5363825363825365e-05, "epoch": 0.25467775467775466, "percentage": 5.09, "elapsed_time": "0:01:24", "remaining_time": "0:26:21", "throughput": 1169.55, "total_tokens": 99264}
|
|
{"current_steps": 250, "total_steps": 4810, "loss": 0.3993, "lr": 2.5883575883575882e-05, "epoch": 0.2598752598752599, "percentage": 5.2, "elapsed_time": "0:01:25", "remaining_time": "0:25:53", "throughput": 1187.72, "total_tokens": 101184}
|
|
{"current_steps": 255, "total_steps": 4810, "loss": 0.4414, "lr": 2.6403326403326406e-05, "epoch": 0.26507276507276506, "percentage": 5.3, "elapsed_time": "0:01:25", "remaining_time": "0:25:27", "throughput": 1207.94, "total_tokens": 103296}
|
|
{"current_steps": 260, "total_steps": 4810, "loss": 0.3322, "lr": 2.6923076923076923e-05, "epoch": 0.2702702702702703, "percentage": 5.41, "elapsed_time": "0:01:25", "remaining_time": "0:25:02", "throughput": 1227.21, "total_tokens": 105344}
|
|
{"current_steps": 265, "total_steps": 4810, "loss": 0.3314, "lr": 2.7442827442827447e-05, "epoch": 0.27546777546777546, "percentage": 5.51, "elapsed_time": "0:01:26", "remaining_time": "0:24:37", "throughput": 1246.38, "total_tokens": 107392}
|
|
{"current_steps": 270, "total_steps": 4810, "loss": 0.2951, "lr": 2.796257796257796e-05, "epoch": 0.2806652806652807, "percentage": 5.61, "elapsed_time": "0:01:26", "remaining_time": "0:24:14", "throughput": 1265.4, "total_tokens": 109440}
|
|
{"current_steps": 275, "total_steps": 4810, "loss": 0.2986, "lr": 2.8482328482328485e-05, "epoch": 0.28586278586278585, "percentage": 5.72, "elapsed_time": "0:01:26", "remaining_time": "0:23:51", "throughput": 1283.53, "total_tokens": 111424}
|
|
{"current_steps": 280, "total_steps": 4810, "loss": 0.407, "lr": 2.9002079002079002e-05, "epoch": 0.2910602910602911, "percentage": 5.82, "elapsed_time": "0:01:27", "remaining_time": "0:23:29", "throughput": 1301.55, "total_tokens": 113408}
|
|
{"current_steps": 285, "total_steps": 4810, "loss": 0.4506, "lr": 2.9521829521829526e-05, "epoch": 0.29625779625779625, "percentage": 5.93, "elapsed_time": "0:01:27", "remaining_time": "0:23:08", "throughput": 1319.43, "total_tokens": 115392}
|
|
{"current_steps": 290, "total_steps": 4810, "loss": 0.4738, "lr": 3.0041580041580043e-05, "epoch": 0.30145530145530147, "percentage": 6.03, "elapsed_time": "0:01:27", "remaining_time": "0:22:48", "throughput": 1337.92, "total_tokens": 117440}
|
|
{"current_steps": 295, "total_steps": 4810, "loss": 0.5248, "lr": 3.056133056133057e-05, "epoch": 0.30665280665280664, "percentage": 6.13, "elapsed_time": "0:01:28", "remaining_time": "0:22:28", "throughput": 1355.54, "total_tokens": 119424}
|
|
{"current_steps": 300, "total_steps": 4810, "loss": 0.3321, "lr": 3.108108108108108e-05, "epoch": 0.31185031185031187, "percentage": 6.24, "elapsed_time": "0:01:28", "remaining_time": "0:22:09", "throughput": 1372.32, "total_tokens": 121344}
|
|
{"current_steps": 305, "total_steps": 4810, "loss": 0.3707, "lr": 3.16008316008316e-05, "epoch": 0.31704781704781704, "percentage": 6.34, "elapsed_time": "0:01:28", "remaining_time": "0:21:50", "throughput": 1388.99, "total_tokens": 123264}
|
|
{"current_steps": 310, "total_steps": 4810, "loss": 0.3574, "lr": 3.212058212058212e-05, "epoch": 0.32224532224532226, "percentage": 6.44, "elapsed_time": "0:01:29", "remaining_time": "0:21:32", "throughput": 1405.52, "total_tokens": 125184}
|
|
{"current_steps": 315, "total_steps": 4810, "loss": 0.2851, "lr": 3.264033264033264e-05, "epoch": 0.32744282744282743, "percentage": 6.55, "elapsed_time": "0:01:29", "remaining_time": "0:21:15", "throughput": 1424.06, "total_tokens": 127296}
|
|
{"current_steps": 320, "total_steps": 4810, "loss": 0.2794, "lr": 3.3160083160083164e-05, "epoch": 0.33264033264033266, "percentage": 6.65, "elapsed_time": "0:01:29", "remaining_time": "0:20:58", "throughput": 1442.47, "total_tokens": 129408}
|
|
{"current_steps": 325, "total_steps": 4810, "loss": 0.3015, "lr": 3.3679833679833684e-05, "epoch": 0.33783783783783783, "percentage": 6.76, "elapsed_time": "0:01:30", "remaining_time": "0:20:42", "throughput": 1460.75, "total_tokens": 131520}
|
|
{"current_steps": 330, "total_steps": 4810, "loss": 0.2826, "lr": 3.41995841995842e-05, "epoch": 0.34303534303534305, "percentage": 6.86, "elapsed_time": "0:01:30", "remaining_time": "0:20:26", "throughput": 1478.21, "total_tokens": 133568}
|
|
{"current_steps": 335, "total_steps": 4810, "loss": 0.3704, "lr": 3.4719334719334725e-05, "epoch": 0.3482328482328482, "percentage": 6.96, "elapsed_time": "0:01:30", "remaining_time": "0:20:11", "throughput": 1495.54, "total_tokens": 135616}
|
|
{"current_steps": 340, "total_steps": 4810, "loss": 0.4519, "lr": 3.523908523908524e-05, "epoch": 0.35343035343035345, "percentage": 7.07, "elapsed_time": "0:01:31", "remaining_time": "0:19:56", "throughput": 1512.75, "total_tokens": 137664}
|
|
{"current_steps": 345, "total_steps": 4810, "loss": 0.2863, "lr": 3.575883575883576e-05, "epoch": 0.3586278586278586, "percentage": 7.17, "elapsed_time": "0:01:31", "remaining_time": "0:19:41", "throughput": 1528.46, "total_tokens": 139584}
|
|
{"current_steps": 350, "total_steps": 4810, "loss": 0.4921, "lr": 3.627858627858628e-05, "epoch": 0.36382536382536385, "percentage": 7.28, "elapsed_time": "0:01:31", "remaining_time": "0:19:27", "throughput": 1544.03, "total_tokens": 141504}
|
|
{"current_steps": 355, "total_steps": 4810, "loss": 0.3078, "lr": 3.67983367983368e-05, "epoch": 0.369022869022869, "percentage": 7.38, "elapsed_time": "0:01:31", "remaining_time": "0:19:14", "throughput": 1560.88, "total_tokens": 143552}
|
|
{"current_steps": 360, "total_steps": 4810, "loss": 0.252, "lr": 3.731808731808732e-05, "epoch": 0.37422037422037424, "percentage": 7.48, "elapsed_time": "0:01:32", "remaining_time": "0:19:00", "throughput": 1576.94, "total_tokens": 145536}
|
|
{"current_steps": 365, "total_steps": 4810, "loss": 0.2894, "lr": 3.783783783783784e-05, "epoch": 0.3794178794178794, "percentage": 7.59, "elapsed_time": "0:01:32", "remaining_time": "0:18:47", "throughput": 1592.2, "total_tokens": 147456}
|
|
{"current_steps": 370, "total_steps": 4810, "loss": 0.1779, "lr": 3.8357588357588356e-05, "epoch": 0.38461538461538464, "percentage": 7.69, "elapsed_time": "0:01:32", "remaining_time": "0:18:35", "throughput": 1608.02, "total_tokens": 149440}
|
|
{"current_steps": 375, "total_steps": 4810, "loss": 0.4928, "lr": 3.8877338877338883e-05, "epoch": 0.3898128898128898, "percentage": 7.8, "elapsed_time": "0:01:33", "remaining_time": "0:18:22", "throughput": 1623.06, "total_tokens": 151360}
|
|
{"current_steps": 380, "total_steps": 4810, "loss": 0.4363, "lr": 3.93970893970894e-05, "epoch": 0.39501039501039503, "percentage": 7.9, "elapsed_time": "0:01:33", "remaining_time": "0:18:10", "throughput": 1638.67, "total_tokens": 153344}
|
|
{"current_steps": 385, "total_steps": 4810, "loss": 0.2689, "lr": 3.991683991683992e-05, "epoch": 0.4002079002079002, "percentage": 8.0, "elapsed_time": "0:01:33", "remaining_time": "0:17:59", "throughput": 1653.51, "total_tokens": 155264}
|
|
{"current_steps": 390, "total_steps": 4810, "loss": 0.2539, "lr": 4.043659043659044e-05, "epoch": 0.40540540540540543, "percentage": 8.11, "elapsed_time": "0:01:34", "remaining_time": "0:17:47", "throughput": 1668.91, "total_tokens": 157248}
|
|
{"current_steps": 395, "total_steps": 4810, "loss": 0.332, "lr": 4.095634095634096e-05, "epoch": 0.4106029106029106, "percentage": 8.21, "elapsed_time": "0:01:34", "remaining_time": "0:17:36", "throughput": 1684.86, "total_tokens": 159296}
|
|
{"current_steps": 400, "total_steps": 4810, "loss": 0.3, "lr": 4.147609147609148e-05, "epoch": 0.4158004158004158, "percentage": 8.32, "elapsed_time": "0:01:34", "remaining_time": "0:17:25", "throughput": 1700.71, "total_tokens": 161344}
|
|
{"current_steps": 405, "total_steps": 4810, "loss": 0.2376, "lr": 4.1995841995842e-05, "epoch": 0.420997920997921, "percentage": 8.42, "elapsed_time": "0:01:35", "remaining_time": "0:17:15", "throughput": 1715.8, "total_tokens": 163328}
|
|
{"current_steps": 410, "total_steps": 4810, "loss": 0.2709, "lr": 4.2515592515592514e-05, "epoch": 0.4261954261954262, "percentage": 8.52, "elapsed_time": "0:01:35", "remaining_time": "0:17:05", "throughput": 1730.79, "total_tokens": 165312}
|
|
{"current_steps": 415, "total_steps": 4810, "loss": 0.1471, "lr": 4.303534303534304e-05, "epoch": 0.4313929313929314, "percentage": 8.63, "elapsed_time": "0:01:35", "remaining_time": "0:16:54", "throughput": 1746.32, "total_tokens": 167360}
|
|
{"current_steps": 420, "total_steps": 4810, "loss": 0.9994, "lr": 4.3555093555093555e-05, "epoch": 0.4365904365904366, "percentage": 8.73, "elapsed_time": "0:01:36", "remaining_time": "0:16:45", "throughput": 1761.1, "total_tokens": 169344}
|
|
{"current_steps": 425, "total_steps": 4810, "loss": 0.603, "lr": 4.407484407484408e-05, "epoch": 0.4417879417879418, "percentage": 8.84, "elapsed_time": "0:01:36", "remaining_time": "0:16:35", "throughput": 1777.09, "total_tokens": 171456}
|
|
{"current_steps": 430, "total_steps": 4810, "loss": 0.3054, "lr": 4.4594594594594596e-05, "epoch": 0.446985446985447, "percentage": 8.94, "elapsed_time": "0:01:36", "remaining_time": "0:16:26", "throughput": 1792.97, "total_tokens": 173568}
|
|
{"current_steps": 435, "total_steps": 4810, "loss": 0.3208, "lr": 4.511434511434512e-05, "epoch": 0.4521829521829522, "percentage": 9.04, "elapsed_time": "0:01:37", "remaining_time": "0:16:16", "throughput": 1807.43, "total_tokens": 175552}
|
|
{"current_steps": 440, "total_steps": 4810, "loss": 0.2942, "lr": 4.563409563409564e-05, "epoch": 0.4573804573804574, "percentage": 9.15, "elapsed_time": "0:01:37", "remaining_time": "0:16:07", "throughput": 1821.82, "total_tokens": 177536}
|
|
{"current_steps": 445, "total_steps": 4810, "loss": 0.3005, "lr": 4.615384615384616e-05, "epoch": 0.4625779625779626, "percentage": 9.25, "elapsed_time": "0:01:37", "remaining_time": "0:15:59", "throughput": 1836.74, "total_tokens": 179584}
|
|
{"current_steps": 450, "total_steps": 4810, "loss": 1.7773, "lr": 4.667359667359668e-05, "epoch": 0.4677754677754678, "percentage": 9.36, "elapsed_time": "0:01:38", "remaining_time": "0:15:50", "throughput": 1850.96, "total_tokens": 181568}
|
|
{"current_steps": 455, "total_steps": 4810, "loss": 0.2889, "lr": 4.71933471933472e-05, "epoch": 0.47297297297297297, "percentage": 9.46, "elapsed_time": "0:01:38", "remaining_time": "0:15:41", "throughput": 1865.07, "total_tokens": 183552}
|
|
{"current_steps": 460, "total_steps": 4810, "loss": 0.2556, "lr": 4.771309771309771e-05, "epoch": 0.4781704781704782, "percentage": 9.56, "elapsed_time": "0:01:38", "remaining_time": "0:15:33", "throughput": 1879.73, "total_tokens": 185600}
|
|
{"current_steps": 465, "total_steps": 4810, "loss": 0.2378, "lr": 4.823284823284824e-05, "epoch": 0.48336798336798337, "percentage": 9.67, "elapsed_time": "0:01:39", "remaining_time": "0:15:25", "throughput": 1893.65, "total_tokens": 187584}
|
|
{"current_steps": 470, "total_steps": 4810, "loss": 0.2859, "lr": 4.8752598752598754e-05, "epoch": 0.4885654885654886, "percentage": 9.77, "elapsed_time": "0:01:39", "remaining_time": "0:15:17", "throughput": 1907.49, "total_tokens": 189568}
|
|
{"current_steps": 475, "total_steps": 4810, "loss": 0.2438, "lr": 4.9272349272349275e-05, "epoch": 0.49376299376299376, "percentage": 9.88, "elapsed_time": "0:01:39", "remaining_time": "0:15:09", "throughput": 1922.49, "total_tokens": 191680}
|
|
{"current_steps": 480, "total_steps": 4810, "loss": 0.3383, "lr": 4.9792099792099796e-05, "epoch": 0.498960498960499, "percentage": 9.98, "elapsed_time": "0:01:40", "remaining_time": "0:15:02", "throughput": 1936.77, "total_tokens": 193728}
|
|
{"current_steps": 482, "total_steps": 4810, "eval_loss": 0.4080815315246582, "epoch": 0.501039501039501, "percentage": 10.02, "elapsed_time": "0:01:42", "remaining_time": "0:15:22", "throughput": 1893.12, "total_tokens": 194560}
|
|
{"current_steps": 485, "total_steps": 4810, "loss": 0.381, "lr": 4.999994075155936e-05, "epoch": 0.5041580041580042, "percentage": 10.08, "elapsed_time": "0:02:26", "remaining_time": "0:21:48", "throughput": 1333.87, "total_tokens": 195776}
|
|
{"current_steps": 490, "total_steps": 4810, "loss": 0.2916, "lr": 4.999957867877242e-05, "epoch": 0.5093555093555093, "percentage": 10.19, "elapsed_time": "0:02:27", "remaining_time": "0:21:36", "throughput": 1343.99, "total_tokens": 197696}
|
|
{"current_steps": 495, "total_steps": 4810, "loss": 0.2766, "lr": 4.999888745376028e-05, "epoch": 0.5145530145530145, "percentage": 10.29, "elapsed_time": "0:02:27", "remaining_time": "0:21:25", "throughput": 1354.51, "total_tokens": 199680}
|
|
{"current_steps": 500, "total_steps": 4810, "loss": 0.2596, "lr": 4.9997867085623824e-05, "epoch": 0.5197505197505198, "percentage": 10.4, "elapsed_time": "0:02:27", "remaining_time": "0:21:13", "throughput": 1365.84, "total_tokens": 201792}
|
|
{"current_steps": 505, "total_steps": 4810, "loss": 0.2689, "lr": 4.999651758779754e-05, "epoch": 0.524948024948025, "percentage": 10.5, "elapsed_time": "0:02:28", "remaining_time": "0:21:02", "throughput": 1376.7, "total_tokens": 203840}
|
|
{"current_steps": 510, "total_steps": 4810, "loss": 0.3363, "lr": 4.999483897804933e-05, "epoch": 0.5301455301455301, "percentage": 10.6, "elapsed_time": "0:02:28", "remaining_time": "0:20:51", "throughput": 1387.08, "total_tokens": 205824}
|
|
{"current_steps": 515, "total_steps": 4810, "loss": 0.2583, "lr": 4.999283127848029e-05, "epoch": 0.5353430353430353, "percentage": 10.71, "elapsed_time": "0:02:28", "remaining_time": "0:20:40", "throughput": 1398.29, "total_tokens": 207936}
|
|
{"current_steps": 520, "total_steps": 4810, "loss": 0.3915, "lr": 4.999049451552443e-05, "epoch": 0.5405405405405406, "percentage": 10.81, "elapsed_time": "0:02:29", "remaining_time": "0:20:29", "throughput": 1409.0, "total_tokens": 209984}
|
|
{"current_steps": 525, "total_steps": 4810, "loss": 0.2461, "lr": 4.9987828719948284e-05, "epoch": 0.5457380457380457, "percentage": 10.91, "elapsed_time": "0:02:29", "remaining_time": "0:20:19", "throughput": 1420.08, "total_tokens": 212096}
|
|
{"current_steps": 530, "total_steps": 4810, "loss": 0.2863, "lr": 4.998483392685055e-05, "epoch": 0.5509355509355509, "percentage": 11.02, "elapsed_time": "0:02:29", "remaining_time": "0:20:08", "throughput": 1430.25, "total_tokens": 214080}
|
|
{"current_steps": 535, "total_steps": 4810, "loss": 0.3052, "lr": 4.9981510175661606e-05, "epoch": 0.5561330561330561, "percentage": 11.12, "elapsed_time": "0:02:30", "remaining_time": "0:19:58", "throughput": 1440.81, "total_tokens": 216128}
|
|
{"current_steps": 540, "total_steps": 4810, "loss": 0.3625, "lr": 4.9977857510143e-05, "epoch": 0.5613305613305614, "percentage": 11.23, "elapsed_time": "0:02:30", "remaining_time": "0:19:48", "throughput": 1451.32, "total_tokens": 218176}
|
|
{"current_steps": 545, "total_steps": 4810, "loss": 0.2801, "lr": 4.9973875978386843e-05, "epoch": 0.5665280665280665, "percentage": 11.33, "elapsed_time": "0:02:30", "remaining_time": "0:19:38", "throughput": 1460.95, "total_tokens": 220096}
|
|
{"current_steps": 550, "total_steps": 4810, "loss": 0.3041, "lr": 4.996956563281524e-05, "epoch": 0.5717255717255717, "percentage": 11.43, "elapsed_time": "0:02:30", "remaining_time": "0:19:29", "throughput": 1470.96, "total_tokens": 222080}
|
|
{"current_steps": 555, "total_steps": 4810, "loss": 0.2498, "lr": 4.996492653017952e-05, "epoch": 0.5769230769230769, "percentage": 11.54, "elapsed_time": "0:02:31", "remaining_time": "0:19:19", "throughput": 1480.53, "total_tokens": 224000}
|
|
{"current_steps": 560, "total_steps": 4810, "loss": 0.3224, "lr": 4.995995873155958e-05, "epoch": 0.5821205821205822, "percentage": 11.64, "elapsed_time": "0:02:31", "remaining_time": "0:19:10", "throughput": 1490.44, "total_tokens": 225984}
|
|
{"current_steps": 565, "total_steps": 4810, "loss": 0.322, "lr": 4.9954662302362973e-05, "epoch": 0.5873180873180873, "percentage": 11.75, "elapsed_time": "0:02:31", "remaining_time": "0:19:01", "throughput": 1499.45, "total_tokens": 227840}
|
|
{"current_steps": 570, "total_steps": 4810, "loss": 0.302, "lr": 4.9949037312324155e-05, "epoch": 0.5925155925155925, "percentage": 11.85, "elapsed_time": "0:02:32", "remaining_time": "0:18:52", "throughput": 1509.26, "total_tokens": 229824}
|
|
{"current_steps": 575, "total_steps": 4810, "loss": 0.3875, "lr": 4.9943083835503467e-05, "epoch": 0.5977130977130977, "percentage": 11.95, "elapsed_time": "0:02:32", "remaining_time": "0:18:43", "throughput": 1519.42, "total_tokens": 231872}
|
|
{"current_steps": 580, "total_steps": 4810, "loss": 0.314, "lr": 4.993680195028626e-05, "epoch": 0.6029106029106029, "percentage": 12.06, "elapsed_time": "0:02:32", "remaining_time": "0:18:35", "throughput": 1529.47, "total_tokens": 233920}
|
|
{"current_steps": 585, "total_steps": 4810, "loss": 0.3189, "lr": 4.9930191739381775e-05, "epoch": 0.6081081081081081, "percentage": 12.16, "elapsed_time": "0:02:33", "remaining_time": "0:18:27", "throughput": 1538.53, "total_tokens": 235840}
|
|
{"current_steps": 590, "total_steps": 4810, "loss": 0.3418, "lr": 4.9923253289822116e-05, "epoch": 0.6133056133056133, "percentage": 12.27, "elapsed_time": "0:02:33", "remaining_time": "0:18:18", "throughput": 1549.37, "total_tokens": 238016}
|
|
{"current_steps": 595, "total_steps": 4810, "loss": 0.3307, "lr": 4.9915986692961045e-05, "epoch": 0.6185031185031185, "percentage": 12.37, "elapsed_time": "0:02:33", "remaining_time": "0:18:10", "throughput": 1559.36, "total_tokens": 240064}
|
|
{"current_steps": 600, "total_steps": 4810, "loss": 0.2835, "lr": 4.9908392044472865e-05, "epoch": 0.6237006237006237, "percentage": 12.47, "elapsed_time": "0:02:34", "remaining_time": "0:18:02", "throughput": 1568.82, "total_tokens": 242048}
|
|
{"current_steps": 605, "total_steps": 4810, "loss": 0.2584, "lr": 4.990046944435105e-05, "epoch": 0.6288981288981289, "percentage": 12.58, "elapsed_time": "0:02:34", "remaining_time": "0:17:54", "throughput": 1577.81, "total_tokens": 243968}
|
|
{"current_steps": 610, "total_steps": 4810, "loss": 0.2953, "lr": 4.989221899690704e-05, "epoch": 0.6340956340956341, "percentage": 12.68, "elapsed_time": "0:02:34", "remaining_time": "0:17:46", "throughput": 1587.65, "total_tokens": 246016}
|
|
{"current_steps": 615, "total_steps": 4810, "loss": 0.3214, "lr": 4.9883640810768764e-05, "epoch": 0.6392931392931392, "percentage": 12.79, "elapsed_time": "0:02:35", "remaining_time": "0:17:39", "throughput": 1597.04, "total_tokens": 248000}
|
|
{"current_steps": 620, "total_steps": 4810, "loss": 0.312, "lr": 4.9874734998879316e-05, "epoch": 0.6444906444906445, "percentage": 12.89, "elapsed_time": "0:02:35", "remaining_time": "0:17:31", "throughput": 1606.81, "total_tokens": 250048}
|
|
{"current_steps": 625, "total_steps": 4810, "loss": 0.3215, "lr": 4.9865501678495375e-05, "epoch": 0.6496881496881497, "percentage": 12.99, "elapsed_time": "0:02:35", "remaining_time": "0:17:24", "throughput": 1616.56, "total_tokens": 252096}
|
|
{"current_steps": 630, "total_steps": 4810, "loss": 0.2892, "lr": 4.98559409711857e-05, "epoch": 0.6548856548856549, "percentage": 13.1, "elapsed_time": "0:02:36", "remaining_time": "0:17:16", "throughput": 1626.24, "total_tokens": 254144}
|
|
{"current_steps": 635, "total_steps": 4810, "loss": 0.3016, "lr": 4.984605300282954e-05, "epoch": 0.66008316008316, "percentage": 13.2, "elapsed_time": "0:02:36", "remaining_time": "0:17:09", "throughput": 1635.48, "total_tokens": 256128}
|
|
{"current_steps": 640, "total_steps": 4810, "loss": 0.2596, "lr": 4.983583790361497e-05, "epoch": 0.6652806652806653, "percentage": 13.31, "elapsed_time": "0:02:36", "remaining_time": "0:17:02", "throughput": 1644.28, "total_tokens": 258048}
|
|
{"current_steps": 645, "total_steps": 4810, "loss": 0.363, "lr": 4.982529580803714e-05, "epoch": 0.6704781704781705, "percentage": 13.41, "elapsed_time": "0:02:37", "remaining_time": "0:16:55", "throughput": 1655.47, "total_tokens": 260352}
|
|
{"current_steps": 650, "total_steps": 4810, "loss": 0.2849, "lr": 4.981442685489659e-05, "epoch": 0.6756756756756757, "percentage": 13.51, "elapsed_time": "0:02:37", "remaining_time": "0:16:48", "throughput": 1664.2, "total_tokens": 262272}
|
|
{"current_steps": 655, "total_steps": 4810, "loss": 0.2969, "lr": 4.9803231187297304e-05, "epoch": 0.6808731808731808, "percentage": 13.62, "elapsed_time": "0:02:37", "remaining_time": "0:16:41", "throughput": 1673.71, "total_tokens": 264320}
|
|
{"current_steps": 660, "total_steps": 4810, "loss": 0.3962, "lr": 4.979170895264494e-05, "epoch": 0.6860706860706861, "percentage": 13.72, "elapsed_time": "0:02:38", "remaining_time": "0:16:35", "throughput": 1682.4, "total_tokens": 266240}
|
|
{"current_steps": 665, "total_steps": 4810, "loss": 0.312, "lr": 4.977986030264482e-05, "epoch": 0.6912681912681913, "percentage": 13.83, "elapsed_time": "0:02:38", "remaining_time": "0:16:28", "throughput": 1691.44, "total_tokens": 268224}
|
|
{"current_steps": 670, "total_steps": 4810, "loss": 0.2552, "lr": 4.976768539329994e-05, "epoch": 0.6964656964656964, "percentage": 13.93, "elapsed_time": "0:02:38", "remaining_time": "0:16:21", "throughput": 1700.79, "total_tokens": 270272}
|
|
{"current_steps": 675, "total_steps": 4810, "loss": 0.312, "lr": 4.975518438490897e-05, "epoch": 0.7016632016632016, "percentage": 14.03, "elapsed_time": "0:02:39", "remaining_time": "0:16:15", "throughput": 1709.69, "total_tokens": 272256}
|
|
{"current_steps": 680, "total_steps": 4810, "loss": 0.2889, "lr": 4.9742357442064045e-05, "epoch": 0.7068607068607069, "percentage": 14.14, "elapsed_time": "0:02:39", "remaining_time": "0:16:09", "throughput": 1718.52, "total_tokens": 274240}
|
|
{"current_steps": 685, "total_steps": 4810, "loss": 0.2841, "lr": 4.972920473364869e-05, "epoch": 0.7120582120582121, "percentage": 14.24, "elapsed_time": "0:02:39", "remaining_time": "0:16:02", "throughput": 1727.83, "total_tokens": 276288}
|
|
{"current_steps": 690, "total_steps": 4810, "loss": 0.3076, "lr": 4.971572643283557e-05, "epoch": 0.7172557172557172, "percentage": 14.35, "elapsed_time": "0:02:40", "remaining_time": "0:15:56", "throughput": 1736.68, "total_tokens": 278272}
|
|
{"current_steps": 695, "total_steps": 4810, "loss": 0.285, "lr": 4.970192271708416e-05, "epoch": 0.7224532224532224, "percentage": 14.45, "elapsed_time": "0:02:40", "remaining_time": "0:15:50", "throughput": 1746.31, "total_tokens": 280384}
|
|
{"current_steps": 700, "total_steps": 4810, "loss": 0.2376, "lr": 4.968779376813849e-05, "epoch": 0.7276507276507277, "percentage": 14.55, "elapsed_time": "0:02:40", "remaining_time": "0:15:44", "throughput": 1755.12, "total_tokens": 282368}
|
|
{"current_steps": 705, "total_steps": 4810, "loss": 0.2787, "lr": 4.967333977202469e-05, "epoch": 0.7328482328482329, "percentage": 14.66, "elapsed_time": "0:02:41", "remaining_time": "0:15:38", "throughput": 1764.26, "total_tokens": 284416}
|
|
{"current_steps": 710, "total_steps": 4810, "loss": 0.214, "lr": 4.965856091904855e-05, "epoch": 0.738045738045738, "percentage": 14.76, "elapsed_time": "0:02:41", "remaining_time": "0:15:32", "throughput": 1773.35, "total_tokens": 286464}
|
|
{"current_steps": 715, "total_steps": 4810, "loss": 0.2858, "lr": 4.964345740379307e-05, "epoch": 0.7432432432432432, "percentage": 14.86, "elapsed_time": "0:02:41", "remaining_time": "0:15:27", "throughput": 1782.08, "total_tokens": 288448}
|
|
{"current_steps": 720, "total_steps": 4810, "loss": 0.2962, "lr": 4.962802942511581e-05, "epoch": 0.7484407484407485, "percentage": 14.97, "elapsed_time": "0:02:42", "remaining_time": "0:15:21", "throughput": 1791.15, "total_tokens": 290496}
|
|
{"current_steps": 723, "total_steps": 4810, "eval_loss": 0.29600390791893005, "epoch": 0.7515592515592515, "percentage": 15.03, "elapsed_time": "0:02:45", "remaining_time": "0:15:36", "throughput": 1760.16, "total_tokens": 291712}
|
|
{"current_steps": 725, "total_steps": 4810, "loss": 0.3061, "lr": 4.9612277186146335e-05, "epoch": 0.7536382536382537, "percentage": 15.07, "elapsed_time": "0:03:10", "remaining_time": "0:17:54", "throughput": 1533.17, "total_tokens": 292480}
|
|
{"current_steps": 730, "total_steps": 4810, "loss": 0.281, "lr": 4.959620089428354e-05, "epoch": 0.7588357588357588, "percentage": 15.18, "elapsed_time": "0:03:11", "remaining_time": "0:17:48", "throughput": 1540.92, "total_tokens": 294464}
|
|
{"current_steps": 735, "total_steps": 4810, "loss": 0.2702, "lr": 4.957980076119285e-05, "epoch": 0.764033264033264, "percentage": 15.28, "elapsed_time": "0:03:11", "remaining_time": "0:17:41", "throughput": 1548.64, "total_tokens": 296448}
|
|
{"current_steps": 740, "total_steps": 4810, "loss": 0.3146, "lr": 4.956307700280354e-05, "epoch": 0.7692307692307693, "percentage": 15.38, "elapsed_time": "0:03:11", "remaining_time": "0:17:34", "throughput": 1556.34, "total_tokens": 298432}
|
|
{"current_steps": 745, "total_steps": 4810, "loss": 0.2567, "lr": 4.954602983930581e-05, "epoch": 0.7744282744282744, "percentage": 15.49, "elapsed_time": "0:03:12", "remaining_time": "0:17:28", "throughput": 1564.35, "total_tokens": 300480}
|
|
{"current_steps": 750, "total_steps": 4810, "loss": 0.2488, "lr": 4.95286594951479e-05, "epoch": 0.7796257796257796, "percentage": 15.59, "elapsed_time": "0:03:12", "remaining_time": "0:17:21", "throughput": 1571.67, "total_tokens": 302400}
|
|
{"current_steps": 755, "total_steps": 4810, "loss": 0.2852, "lr": 4.9510966199033174e-05, "epoch": 0.7848232848232848, "percentage": 15.7, "elapsed_time": "0:03:12", "remaining_time": "0:17:15", "throughput": 1578.95, "total_tokens": 304320}
|
|
{"current_steps": 760, "total_steps": 4810, "loss": 0.2968, "lr": 4.949295018391706e-05, "epoch": 0.7900207900207901, "percentage": 15.8, "elapsed_time": "0:03:13", "remaining_time": "0:17:08", "throughput": 1586.22, "total_tokens": 306240}
|
|
{"current_steps": 765, "total_steps": 4810, "loss": 0.2957, "lr": 4.947461168700402e-05, "epoch": 0.7952182952182952, "percentage": 15.9, "elapsed_time": "0:03:13", "remaining_time": "0:17:02", "throughput": 1592.79, "total_tokens": 308032}
|
|
{"current_steps": 770, "total_steps": 4810, "loss": 0.2531, "lr": 4.945595094974442e-05, "epoch": 0.8004158004158004, "percentage": 16.01, "elapsed_time": "0:03:13", "remaining_time": "0:16:56", "throughput": 1600.02, "total_tokens": 309952}
|
|
{"current_steps": 775, "total_steps": 4810, "loss": 0.2396, "lr": 4.94369682178313e-05, "epoch": 0.8056133056133056, "percentage": 16.11, "elapsed_time": "0:03:14", "remaining_time": "0:16:50", "throughput": 1607.56, "total_tokens": 311936}
|
|
{"current_steps": 780, "total_steps": 4810, "loss": 0.3127, "lr": 4.9417663741197236e-05, "epoch": 0.8108108108108109, "percentage": 16.22, "elapsed_time": "0:03:14", "remaining_time": "0:16:44", "throughput": 1615.06, "total_tokens": 313920}
|
|
{"current_steps": 785, "total_steps": 4810, "loss": 0.2873, "lr": 4.939803777401095e-05, "epoch": 0.816008316008316, "percentage": 16.32, "elapsed_time": "0:03:14", "remaining_time": "0:16:38", "throughput": 1622.88, "total_tokens": 315968}
|
|
{"current_steps": 790, "total_steps": 4810, "loss": 0.263, "lr": 4.937809057467404e-05, "epoch": 0.8212058212058212, "percentage": 16.42, "elapsed_time": "0:03:15", "remaining_time": "0:16:32", "throughput": 1630.33, "total_tokens": 317952}
|
|
{"current_steps": 795, "total_steps": 4810, "loss": 0.2762, "lr": 4.935782240581752e-05, "epoch": 0.8264033264033264, "percentage": 16.53, "elapsed_time": "0:03:15", "remaining_time": "0:16:26", "throughput": 1637.42, "total_tokens": 319872}
|
|
{"current_steps": 800, "total_steps": 4810, "loss": 0.2759, "lr": 4.9337233534298425e-05, "epoch": 0.8316008316008316, "percentage": 16.63, "elapsed_time": "0:03:15", "remaining_time": "0:16:20", "throughput": 1644.81, "total_tokens": 321856}
|
|
{"current_steps": 805, "total_steps": 4810, "loss": 0.2849, "lr": 4.931632423119621e-05, "epoch": 0.8367983367983368, "percentage": 16.74, "elapsed_time": "0:03:16", "remaining_time": "0:16:15", "throughput": 1652.83, "total_tokens": 323968}
|
|
{"current_steps": 810, "total_steps": 4810, "loss": 0.2775, "lr": 4.9295094771809285e-05, "epoch": 0.841995841995842, "percentage": 16.84, "elapsed_time": "0:03:16", "remaining_time": "0:16:09", "throughput": 1660.17, "total_tokens": 325952}
|
|
{"current_steps": 815, "total_steps": 4810, "loss": 0.1962, "lr": 4.92735454356513e-05, "epoch": 0.8471933471933472, "percentage": 16.94, "elapsed_time": "0:03:16", "remaining_time": "0:16:04", "throughput": 1667.81, "total_tokens": 328000}
|
|
{"current_steps": 820, "total_steps": 4810, "loss": 0.2237, "lr": 4.925167650644752e-05, "epoch": 0.8523908523908524, "percentage": 17.05, "elapsed_time": "0:03:16", "remaining_time": "0:15:58", "throughput": 1675.11, "total_tokens": 329984}
|
|
{"current_steps": 825, "total_steps": 4810, "loss": 0.3432, "lr": 4.9229488272131067e-05, "epoch": 0.8575883575883576, "percentage": 17.15, "elapsed_time": "0:03:17", "remaining_time": "0:15:55", "throughput": 1678.38, "total_tokens": 331904}
|
|
{"current_steps": 830, "total_steps": 4810, "loss": 0.3102, "lr": 4.920698102483912e-05, "epoch": 0.8627858627858628, "percentage": 17.26, "elapsed_time": "0:03:18", "remaining_time": "0:15:49", "throughput": 1685.65, "total_tokens": 333888}
|
|
{"current_steps": 835, "total_steps": 4810, "loss": 0.3035, "lr": 4.918415506090911e-05, "epoch": 0.867983367983368, "percentage": 17.36, "elapsed_time": "0:03:18", "remaining_time": "0:15:44", "throughput": 1692.85, "total_tokens": 335872}
|
|
{"current_steps": 840, "total_steps": 4810, "loss": 0.2682, "lr": 4.916101068087476e-05, "epoch": 0.8731808731808732, "percentage": 17.46, "elapsed_time": "0:03:18", "remaining_time": "0:15:39", "throughput": 1700.06, "total_tokens": 337856}
|
|
{"current_steps": 845, "total_steps": 4810, "loss": 0.2422, "lr": 4.913754818946219e-05, "epoch": 0.8783783783783784, "percentage": 17.57, "elapsed_time": "0:03:19", "remaining_time": "0:15:34", "throughput": 1706.95, "total_tokens": 339776}
|
|
{"current_steps": 850, "total_steps": 4810, "loss": 0.1949, "lr": 4.911376789558584e-05, "epoch": 0.8835758835758836, "percentage": 17.67, "elapsed_time": "0:03:19", "remaining_time": "0:15:28", "throughput": 1714.11, "total_tokens": 341760}
|
|
{"current_steps": 855, "total_steps": 4810, "loss": 0.3477, "lr": 4.9089670112344456e-05, "epoch": 0.8887733887733887, "percentage": 17.78, "elapsed_time": "0:03:19", "remaining_time": "0:15:23", "throughput": 1720.91, "total_tokens": 343680}
|
|
{"current_steps": 860, "total_steps": 4810, "loss": 0.292, "lr": 4.906525515701695e-05, "epoch": 0.893970893970894, "percentage": 17.88, "elapsed_time": "0:03:20", "remaining_time": "0:15:18", "throughput": 1727.68, "total_tokens": 345600}
|
|
{"current_steps": 865, "total_steps": 4810, "loss": 0.2896, "lr": 4.904052335105822e-05, "epoch": 0.8991683991683992, "percentage": 17.98, "elapsed_time": "0:03:20", "remaining_time": "0:15:13", "throughput": 1734.39, "total_tokens": 347520}
|
|
{"current_steps": 870, "total_steps": 4810, "loss": 0.2644, "lr": 4.90154750200949e-05, "epoch": 0.9043659043659044, "percentage": 18.09, "elapsed_time": "0:03:20", "remaining_time": "0:15:08", "throughput": 1741.73, "total_tokens": 349568}
|
|
{"current_steps": 875, "total_steps": 4810, "loss": 0.3212, "lr": 4.8990110493921105e-05, "epoch": 0.9095634095634095, "percentage": 18.19, "elapsed_time": "0:03:21", "remaining_time": "0:15:04", "throughput": 1748.72, "total_tokens": 351552}
|
|
{"current_steps": 880, "total_steps": 4810, "loss": 0.2787, "lr": 4.8964430106494075e-05, "epoch": 0.9147609147609148, "percentage": 18.3, "elapsed_time": "0:03:21", "remaining_time": "0:14:59", "throughput": 1755.37, "total_tokens": 353472}
|
|
{"current_steps": 885, "total_steps": 4810, "loss": 0.2656, "lr": 4.893843419592977e-05, "epoch": 0.91995841995842, "percentage": 18.4, "elapsed_time": "0:03:21", "remaining_time": "0:14:54", "throughput": 1762.01, "total_tokens": 355392}
|
|
{"current_steps": 890, "total_steps": 4810, "loss": 0.2593, "lr": 4.891212310449844e-05, "epoch": 0.9251559251559252, "percentage": 18.5, "elapsed_time": "0:03:22", "remaining_time": "0:14:49", "throughput": 1769.28, "total_tokens": 357440}
|
|
{"current_steps": 895, "total_steps": 4810, "loss": 0.2785, "lr": 4.8885497178620095e-05, "epoch": 0.9303534303534303, "percentage": 18.61, "elapsed_time": "0:03:22", "remaining_time": "0:14:45", "throughput": 1776.53, "total_tokens": 359488}
|
|
{"current_steps": 900, "total_steps": 4810, "loss": 0.283, "lr": 4.8858556768859944e-05, "epoch": 0.9355509355509356, "percentage": 18.71, "elapsed_time": "0:03:22", "remaining_time": "0:14:40", "throughput": 1783.12, "total_tokens": 361408}
|
|
{"current_steps": 905, "total_steps": 4810, "loss": 0.27, "lr": 4.88313022299238e-05, "epoch": 0.9407484407484408, "percentage": 18.81, "elapsed_time": "0:03:23", "remaining_time": "0:14:35", "throughput": 1790.0, "total_tokens": 363392}
|
|
{"current_steps": 910, "total_steps": 4810, "loss": 0.2987, "lr": 4.88037339206534e-05, "epoch": 0.9459459459459459, "percentage": 18.92, "elapsed_time": "0:03:23", "remaining_time": "0:14:31", "throughput": 1797.17, "total_tokens": 365440}
|
|
{"current_steps": 915, "total_steps": 4810, "loss": 0.2998, "lr": 4.8775852204021665e-05, "epoch": 0.9511434511434511, "percentage": 19.02, "elapsed_time": "0:03:23", "remaining_time": "0:14:26", "throughput": 1804.95, "total_tokens": 367616}
|
|
{"current_steps": 920, "total_steps": 4810, "loss": 0.358, "lr": 4.874765744712796e-05, "epoch": 0.9563409563409564, "percentage": 19.13, "elapsed_time": "0:03:24", "remaining_time": "0:14:22", "throughput": 1811.76, "total_tokens": 369600}
|
|
{"current_steps": 925, "total_steps": 4810, "loss": 0.2755, "lr": 4.871915002119321e-05, "epoch": 0.9615384615384616, "percentage": 19.23, "elapsed_time": "0:03:24", "remaining_time": "0:14:18", "throughput": 1818.25, "total_tokens": 371520}
|
|
{"current_steps": 930, "total_steps": 4810, "loss": 0.3085, "lr": 4.8690330301555045e-05, "epoch": 0.9667359667359667, "percentage": 19.33, "elapsed_time": "0:03:24", "remaining_time": "0:14:13", "throughput": 1825.34, "total_tokens": 373568}
|
|
{"current_steps": 935, "total_steps": 4810, "loss": 0.2548, "lr": 4.8661198667662854e-05, "epoch": 0.9719334719334719, "percentage": 19.44, "elapsed_time": "0:03:24", "remaining_time": "0:14:09", "throughput": 1831.78, "total_tokens": 375488}
|
|
{"current_steps": 940, "total_steps": 4810, "loss": 0.3274, "lr": 4.86317555030728e-05, "epoch": 0.9771309771309772, "percentage": 19.54, "elapsed_time": "0:03:25", "remaining_time": "0:14:05", "throughput": 1839.74, "total_tokens": 377728}
|
|
{"current_steps": 945, "total_steps": 4810, "loss": 0.2924, "lr": 4.8602001195442725e-05, "epoch": 0.9823284823284824, "percentage": 19.65, "elapsed_time": "0:03:25", "remaining_time": "0:14:01", "throughput": 1847.07, "total_tokens": 379840}
|
|
{"current_steps": 950, "total_steps": 4810, "loss": 0.2685, "lr": 4.857193613652711e-05, "epoch": 0.9875259875259875, "percentage": 19.75, "elapsed_time": "0:03:25", "remaining_time": "0:13:56", "throughput": 1853.45, "total_tokens": 381760}
|
|
{"current_steps": 955, "total_steps": 4810, "loss": 0.2743, "lr": 4.8541560722171855e-05, "epoch": 0.9927234927234927, "percentage": 19.85, "elapsed_time": "0:03:26", "remaining_time": "0:13:52", "throughput": 1860.42, "total_tokens": 383808}
|
|
{"current_steps": 960, "total_steps": 4810, "loss": 0.2807, "lr": 4.8510875352309106e-05, "epoch": 0.997920997920998, "percentage": 19.96, "elapsed_time": "0:03:26", "remaining_time": "0:13:48", "throughput": 1867.39, "total_tokens": 385856}
|
|
{"current_steps": 964, "total_steps": 4810, "eval_loss": 0.2738620638847351, "epoch": 1.002079002079002, "percentage": 20.04, "elapsed_time": "0:03:27", "remaining_time": "0:13:49", "throughput": 1862.86, "total_tokens": 387464}
|
|
{"current_steps": 965, "total_steps": 4810, "loss": 0.2643, "lr": 4.8479880430951995e-05, "epoch": 1.003118503118503, "percentage": 20.06, "elapsed_time": "0:04:47", "remaining_time": "0:19:04", "throughput": 1350.42, "total_tokens": 387848}
|
|
{"current_steps": 970, "total_steps": 4810, "loss": 0.2613, "lr": 4.844857636618928e-05, "epoch": 1.0083160083160083, "percentage": 20.17, "elapsed_time": "0:04:47", "remaining_time": "0:18:58", "throughput": 1355.13, "total_tokens": 389640}
|
|
{"current_steps": 975, "total_steps": 4810, "loss": 0.2824, "lr": 4.8416963570180025e-05, "epoch": 1.0135135135135136, "percentage": 20.27, "elapsed_time": "0:04:47", "remaining_time": "0:18:52", "throughput": 1360.5, "total_tokens": 391624}
|
|
{"current_steps": 980, "total_steps": 4810, "loss": 0.3008, "lr": 4.838504245914812e-05, "epoch": 1.0187110187110187, "percentage": 20.37, "elapsed_time": "0:04:48", "remaining_time": "0:18:46", "throughput": 1366.09, "total_tokens": 393672}
|
|
{"current_steps": 985, "total_steps": 4810, "loss": 0.3048, "lr": 4.8352813453376836e-05, "epoch": 1.023908523908524, "percentage": 20.48, "elapsed_time": "0:04:48", "remaining_time": "0:18:40", "throughput": 1371.87, "total_tokens": 395784}
|
|
{"current_steps": 990, "total_steps": 4810, "loss": 0.3205, "lr": 4.83202769772033e-05, "epoch": 1.0291060291060292, "percentage": 20.58, "elapsed_time": "0:04:48", "remaining_time": "0:18:34", "throughput": 1377.22, "total_tokens": 397768}
|
|
{"current_steps": 995, "total_steps": 4810, "loss": 0.2694, "lr": 4.8287433459012844e-05, "epoch": 1.0343035343035343, "percentage": 20.69, "elapsed_time": "0:04:49", "remaining_time": "0:18:28", "throughput": 1382.77, "total_tokens": 399816}
|
|
{"current_steps": 1000, "total_steps": 4810, "loss": 0.3263, "lr": 4.8254283331233464e-05, "epoch": 1.0395010395010396, "percentage": 20.79, "elapsed_time": "0:04:49", "remaining_time": "0:18:22", "throughput": 1388.51, "total_tokens": 401928}
|
|
{"current_steps": 1005, "total_steps": 4810, "loss": 0.3028, "lr": 4.822082703033003e-05, "epoch": 1.0446985446985446, "percentage": 20.89, "elapsed_time": "0:04:49", "remaining_time": "0:18:17", "throughput": 1393.82, "total_tokens": 403912}
|
|
{"current_steps": 1010, "total_steps": 4810, "loss": 0.2501, "lr": 4.818706499679862e-05, "epoch": 1.04989604989605, "percentage": 21.0, "elapsed_time": "0:04:50", "remaining_time": "0:18:11", "throughput": 1398.9, "total_tokens": 405832}
|
|
{"current_steps": 1015, "total_steps": 4810, "loss": 0.339, "lr": 4.815299767516065e-05, "epoch": 1.0550935550935552, "percentage": 21.1, "elapsed_time": "0:04:50", "remaining_time": "0:18:05", "throughput": 1404.4, "total_tokens": 407880}
|
|
{"current_steps": 1020, "total_steps": 4810, "loss": 0.2748, "lr": 4.8118625513957074e-05, "epoch": 1.0602910602910602, "percentage": 21.21, "elapsed_time": "0:04:50", "remaining_time": "0:18:00", "throughput": 1410.53, "total_tokens": 410120}
|
|
{"current_steps": 1025, "total_steps": 4810, "loss": 0.3246, "lr": 4.808394896574245e-05, "epoch": 1.0654885654885655, "percentage": 21.31, "elapsed_time": "0:04:51", "remaining_time": "0:17:54", "throughput": 1416.01, "total_tokens": 412168}
|
|
{"current_steps": 1030, "total_steps": 4810, "loss": 0.2849, "lr": 4.8048968487079e-05, "epoch": 1.0706860706860706, "percentage": 21.41, "elapsed_time": "0:04:51", "remaining_time": "0:17:49", "throughput": 1422.32, "total_tokens": 414472}
|
|
{"current_steps": 1035, "total_steps": 4810, "loss": 0.2908, "lr": 4.8013684538530565e-05, "epoch": 1.0758835758835759, "percentage": 21.52, "elapsed_time": "0:04:51", "remaining_time": "0:17:44", "throughput": 1427.78, "total_tokens": 416520}
|
|
{"current_steps": 1040, "total_steps": 4810, "loss": 0.2774, "lr": 4.79780975846566e-05, "epoch": 1.0810810810810811, "percentage": 21.62, "elapsed_time": "0:04:52", "remaining_time": "0:17:38", "throughput": 1433.21, "total_tokens": 418568}
|
|
{"current_steps": 1045, "total_steps": 4810, "loss": 0.2417, "lr": 4.7942208094006e-05, "epoch": 1.0862785862785862, "percentage": 21.73, "elapsed_time": "0:04:52", "remaining_time": "0:17:33", "throughput": 1438.21, "total_tokens": 420488}
|
|
{"current_steps": 1050, "total_steps": 4810, "loss": 0.2736, "lr": 4.790601653911094e-05, "epoch": 1.0914760914760915, "percentage": 21.83, "elapsed_time": "0:04:52", "remaining_time": "0:17:28", "throughput": 1443.4, "total_tokens": 422472}
|
|
{"current_steps": 1055, "total_steps": 4810, "loss": 0.3165, "lr": 4.786952339648071e-05, "epoch": 1.0966735966735968, "percentage": 21.93, "elapsed_time": "0:04:53", "remaining_time": "0:17:22", "throughput": 1448.59, "total_tokens": 424456}
|
|
{"current_steps": 1060, "total_steps": 4810, "loss": 0.305, "lr": 4.783272914659535e-05, "epoch": 1.1018711018711018, "percentage": 22.04, "elapsed_time": "0:04:53", "remaining_time": "0:17:17", "throughput": 1454.19, "total_tokens": 426568}
|
|
{"current_steps": 1065, "total_steps": 4810, "loss": 0.2809, "lr": 4.77956342738994e-05, "epoch": 1.107068607068607, "percentage": 22.14, "elapsed_time": "0:04:53", "remaining_time": "0:17:12", "throughput": 1459.35, "total_tokens": 428552}
|
|
{"current_steps": 1070, "total_steps": 4810, "loss": 0.2758, "lr": 4.775823926679548e-05, "epoch": 1.1122661122661124, "percentage": 22.25, "elapsed_time": "0:04:53", "remaining_time": "0:17:07", "throughput": 1464.27, "total_tokens": 430472}
|
|
{"current_steps": 1075, "total_steps": 4810, "loss": 0.2657, "lr": 4.77205446176379e-05, "epoch": 1.1174636174636174, "percentage": 22.35, "elapsed_time": "0:04:54", "remaining_time": "0:17:02", "throughput": 1468.97, "total_tokens": 432328}
|
|
{"current_steps": 1080, "total_steps": 4810, "loss": 0.2881, "lr": 4.768255082272611e-05, "epoch": 1.1226611226611227, "percentage": 22.45, "elapsed_time": "0:04:54", "remaining_time": "0:16:57", "throughput": 1474.52, "total_tokens": 434440}
|
|
{"current_steps": 1085, "total_steps": 4810, "loss": 0.2938, "lr": 4.764425838229824e-05, "epoch": 1.1278586278586278, "percentage": 22.56, "elapsed_time": "0:04:54", "remaining_time": "0:16:52", "throughput": 1479.85, "total_tokens": 436488}
|
|
{"current_steps": 1090, "total_steps": 4810, "loss": 0.4153, "lr": 4.760566780052445e-05, "epoch": 1.133056133056133, "percentage": 22.66, "elapsed_time": "0:04:55", "remaining_time": "0:16:47", "throughput": 1484.95, "total_tokens": 438472}
|
|
{"current_steps": 1095, "total_steps": 4810, "loss": 0.3296, "lr": 4.7566779585500347e-05, "epoch": 1.1382536382536383, "percentage": 22.77, "elapsed_time": "0:04:55", "remaining_time": "0:16:42", "throughput": 1490.05, "total_tokens": 440456}
|
|
{"current_steps": 1100, "total_steps": 4810, "loss": 0.3082, "lr": 4.7527594249240264e-05, "epoch": 1.1434511434511434, "percentage": 22.87, "elapsed_time": "0:04:55", "remaining_time": "0:16:38", "throughput": 1495.12, "total_tokens": 442440}
|
|
{"current_steps": 1105, "total_steps": 4810, "loss": 0.3109, "lr": 4.748811230767051e-05, "epoch": 1.1486486486486487, "percentage": 22.97, "elapsed_time": "0:04:56", "remaining_time": "0:16:33", "throughput": 1500.18, "total_tokens": 444424}
|
|
{"current_steps": 1110, "total_steps": 4810, "loss": 0.2754, "lr": 4.744833428062262e-05, "epoch": 1.1538461538461537, "percentage": 23.08, "elapsed_time": "0:04:56", "remaining_time": "0:16:28", "throughput": 1504.81, "total_tokens": 446280}
|
|
{"current_steps": 1115, "total_steps": 4810, "loss": 0.286, "lr": 4.740826069182645e-05, "epoch": 1.159043659043659, "percentage": 23.18, "elapsed_time": "0:04:56", "remaining_time": "0:16:23", "throughput": 1509.85, "total_tokens": 448264}
|
|
{"current_steps": 1120, "total_steps": 4810, "loss": 0.2806, "lr": 4.736789206890332e-05, "epoch": 1.1642411642411643, "percentage": 23.28, "elapsed_time": "0:04:57", "remaining_time": "0:16:19", "throughput": 1515.31, "total_tokens": 450376}
|
|
{"current_steps": 1125, "total_steps": 4810, "loss": 0.2575, "lr": 4.732722894335909e-05, "epoch": 1.1694386694386694, "percentage": 23.39, "elapsed_time": "0:04:57", "remaining_time": "0:16:14", "throughput": 1520.98, "total_tokens": 452552}
|
|
{"current_steps": 1130, "total_steps": 4810, "loss": 0.2989, "lr": 4.7286271850577105e-05, "epoch": 1.1746361746361746, "percentage": 23.49, "elapsed_time": "0:04:57", "remaining_time": "0:16:10", "throughput": 1526.2, "total_tokens": 454600}
|
|
{"current_steps": 1135, "total_steps": 4810, "loss": 0.5214, "lr": 4.724502132981119e-05, "epoch": 1.17983367983368, "percentage": 23.6, "elapsed_time": "0:04:58", "remaining_time": "0:16:05", "throughput": 1531.41, "total_tokens": 456648}
|
|
{"current_steps": 1140, "total_steps": 4810, "loss": 0.3325, "lr": 4.7203477924178506e-05, "epoch": 1.185031185031185, "percentage": 23.7, "elapsed_time": "0:04:58", "remaining_time": "0:16:00", "throughput": 1536.4, "total_tokens": 458632}
|
|
{"current_steps": 1145, "total_steps": 4810, "loss": 0.2795, "lr": 4.7161642180652464e-05, "epoch": 1.1902286902286903, "percentage": 23.8, "elapsed_time": "0:04:58", "remaining_time": "0:15:56", "throughput": 1541.59, "total_tokens": 460680}
|
|
{"current_steps": 1150, "total_steps": 4810, "loss": 0.2697, "lr": 4.7119514650055476e-05, "epoch": 1.1954261954261955, "percentage": 23.91, "elapsed_time": "0:04:59", "remaining_time": "0:15:52", "throughput": 1546.76, "total_tokens": 462728}
|
|
{"current_steps": 1155, "total_steps": 4810, "loss": 0.2429, "lr": 4.7077095887051686e-05, "epoch": 1.2006237006237006, "percentage": 24.01, "elapsed_time": "0:04:59", "remaining_time": "0:15:47", "throughput": 1551.93, "total_tokens": 464776}
|
|
{"current_steps": 1160, "total_steps": 4810, "loss": 0.2844, "lr": 4.7034386450139735e-05, "epoch": 1.2058212058212059, "percentage": 24.12, "elapsed_time": "0:04:59", "remaining_time": "0:15:43", "throughput": 1556.65, "total_tokens": 466696}
|
|
{"current_steps": 1165, "total_steps": 4810, "loss": 0.259, "lr": 4.699138690164533e-05, "epoch": 1.211018711018711, "percentage": 24.22, "elapsed_time": "0:05:00", "remaining_time": "0:15:39", "throughput": 1561.37, "total_tokens": 468616}
|
|
{"current_steps": 1170, "total_steps": 4810, "loss": 0.2734, "lr": 4.694809780771391e-05, "epoch": 1.2162162162162162, "percentage": 24.32, "elapsed_time": "0:05:00", "remaining_time": "0:15:34", "throughput": 1566.71, "total_tokens": 470728}
|
|
{"current_steps": 1175, "total_steps": 4810, "loss": 0.2763, "lr": 4.690451973830313e-05, "epoch": 1.2214137214137215, "percentage": 24.43, "elapsed_time": "0:05:00", "remaining_time": "0:15:30", "throughput": 1571.84, "total_tokens": 472776}
|
|
{"current_steps": 1180, "total_steps": 4810, "loss": 0.2615, "lr": 4.6860653267175416e-05, "epoch": 1.2266112266112266, "percentage": 24.53, "elapsed_time": "0:05:01", "remaining_time": "0:15:26", "throughput": 1576.95, "total_tokens": 474824}
|
|
{"current_steps": 1185, "total_steps": 4810, "loss": 0.2894, "lr": 4.681649897189036e-05, "epoch": 1.2318087318087318, "percentage": 24.64, "elapsed_time": "0:05:01", "remaining_time": "0:15:22", "throughput": 1581.63, "total_tokens": 476744}
|
|
{"current_steps": 1190, "total_steps": 4810, "loss": 0.2065, "lr": 4.677205743379713e-05, "epoch": 1.237006237006237, "percentage": 24.74, "elapsed_time": "0:05:01", "remaining_time": "0:15:17", "throughput": 1586.92, "total_tokens": 478856}
|
|
{"current_steps": 1195, "total_steps": 4810, "loss": 0.4129, "lr": 4.672732923802685e-05, "epoch": 1.2422037422037422, "percentage": 24.84, "elapsed_time": "0:05:02", "remaining_time": "0:15:13", "throughput": 1591.58, "total_tokens": 480776}
|
|
{"current_steps": 1200, "total_steps": 4810, "loss": 0.2716, "lr": 4.668231497348484e-05, "epoch": 1.2474012474012475, "percentage": 24.95, "elapsed_time": "0:05:02", "remaining_time": "0:15:09", "throughput": 1597.07, "total_tokens": 482952}
|
|
{"current_steps": 1205, "total_steps": 4810, "loss": 0.2836, "lr": 4.663701523284291e-05, "epoch": 1.2525987525987525, "percentage": 25.05, "elapsed_time": "0:05:02", "remaining_time": "0:15:05", "throughput": 1602.75, "total_tokens": 485192}
|
|
{"current_steps": 1205, "total_steps": 4810, "eval_loss": 0.2581372559070587, "epoch": 1.2525987525987525, "percentage": 25.05, "elapsed_time": "0:05:04", "remaining_time": "0:15:10", "throughput": 1594.6, "total_tokens": 485192}
|
|
{"current_steps": 1210, "total_steps": 4810, "loss": 0.2541, "lr": 4.6591430612531515e-05, "epoch": 1.2577962577962578, "percentage": 25.16, "elapsed_time": "0:05:38", "remaining_time": "0:16:45", "throughput": 1440.85, "total_tokens": 487112}
|
|
{"current_steps": 1215, "total_steps": 4810, "loss": 0.3056, "lr": 4.6545561712731954e-05, "epoch": 1.262993762993763, "percentage": 25.26, "elapsed_time": "0:05:38", "remaining_time": "0:16:41", "throughput": 1445.51, "total_tokens": 489160}
|
|
{"current_steps": 1220, "total_steps": 4810, "loss": 0.2656, "lr": 4.649940913736841e-05, "epoch": 1.2681912681912682, "percentage": 25.36, "elapsed_time": "0:05:38", "remaining_time": "0:16:36", "throughput": 1449.79, "total_tokens": 491080}
|
|
{"current_steps": 1225, "total_steps": 4810, "loss": 0.2917, "lr": 4.645297349410005e-05, "epoch": 1.2733887733887734, "percentage": 25.47, "elapsed_time": "0:05:39", "remaining_time": "0:16:32", "throughput": 1454.25, "total_tokens": 493064}
|
|
{"current_steps": 1230, "total_steps": 4810, "loss": 0.2878, "lr": 4.640625539431298e-05, "epoch": 1.2785862785862787, "percentage": 25.57, "elapsed_time": "0:05:39", "remaining_time": "0:16:27", "throughput": 1458.51, "total_tokens": 494984}
|
|
{"current_steps": 1235, "total_steps": 4810, "loss": 0.2686, "lr": 4.635925545311224e-05, "epoch": 1.2837837837837838, "percentage": 25.68, "elapsed_time": "0:05:39", "remaining_time": "0:16:23", "throughput": 1462.97, "total_tokens": 496968}
|
|
{"current_steps": 1240, "total_steps": 4810, "loss": 0.2747, "lr": 4.6311974289313646e-05, "epoch": 1.288981288981289, "percentage": 25.78, "elapsed_time": "0:05:40", "remaining_time": "0:16:18", "throughput": 1467.03, "total_tokens": 498824}
|
|
{"current_steps": 1245, "total_steps": 4810, "loss": 0.2269, "lr": 4.6264412525435716e-05, "epoch": 1.2941787941787941, "percentage": 25.88, "elapsed_time": "0:05:40", "remaining_time": "0:16:14", "throughput": 1471.46, "total_tokens": 500808}
|
|
{"current_steps": 1250, "total_steps": 4810, "loss": 0.2595, "lr": 4.6216570787691423e-05, "epoch": 1.2993762993762994, "percentage": 25.99, "elapsed_time": "0:05:40", "remaining_time": "0:16:10", "throughput": 1476.07, "total_tokens": 502856}
|
|
{"current_steps": 1255, "total_steps": 4810, "loss": 0.2367, "lr": 4.6168449705979956e-05, "epoch": 1.3045738045738045, "percentage": 26.09, "elapsed_time": "0:05:40", "remaining_time": "0:16:05", "throughput": 1480.09, "total_tokens": 504712}
|
|
{"current_steps": 1260, "total_steps": 4810, "loss": 0.3177, "lr": 4.612004991387843e-05, "epoch": 1.3097713097713097, "percentage": 26.2, "elapsed_time": "0:05:41", "remaining_time": "0:16:01", "throughput": 1484.5, "total_tokens": 506696}
|
|
{"current_steps": 1265, "total_steps": 4810, "loss": 0.2562, "lr": 4.6071372048633566e-05, "epoch": 1.314968814968815, "percentage": 26.3, "elapsed_time": "0:05:41", "remaining_time": "0:15:57", "throughput": 1488.88, "total_tokens": 508680}
|
|
{"current_steps": 1270, "total_steps": 4810, "loss": 0.291, "lr": 4.6022416751153255e-05, "epoch": 1.32016632016632, "percentage": 26.4, "elapsed_time": "0:05:41", "remaining_time": "0:15:53", "throughput": 1493.46, "total_tokens": 510728}
|
|
{"current_steps": 1275, "total_steps": 4810, "loss": 0.2441, "lr": 4.5973184665998186e-05, "epoch": 1.3253638253638254, "percentage": 26.51, "elapsed_time": "0:05:42", "remaining_time": "0:15:49", "throughput": 1497.83, "total_tokens": 512712}
|
|
{"current_steps": 1280, "total_steps": 4810, "loss": 0.2674, "lr": 4.5923676441373287e-05, "epoch": 1.3305613305613306, "percentage": 26.61, "elapsed_time": "0:05:42", "remaining_time": "0:15:44", "throughput": 1502.2, "total_tokens": 514696}
|
|
{"current_steps": 1285, "total_steps": 4810, "loss": 0.2628, "lr": 4.5873892729119225e-05, "epoch": 1.3357588357588357, "percentage": 26.72, "elapsed_time": "0:05:42", "remaining_time": "0:15:40", "throughput": 1506.93, "total_tokens": 516808}
|
|
{"current_steps": 1290, "total_steps": 4810, "loss": 0.208, "lr": 4.582383418470386e-05, "epoch": 1.340956340956341, "percentage": 26.82, "elapsed_time": "0:05:43", "remaining_time": "0:15:36", "throughput": 1511.28, "total_tokens": 518792}
|
|
{"current_steps": 1295, "total_steps": 4810, "loss": 0.2791, "lr": 4.577350146721353e-05, "epoch": 1.3461538461538463, "percentage": 26.92, "elapsed_time": "0:05:43", "remaining_time": "0:15:32", "throughput": 1515.81, "total_tokens": 520840}
|
|
{"current_steps": 1300, "total_steps": 4810, "loss": 0.2367, "lr": 4.5722895239344435e-05, "epoch": 1.3513513513513513, "percentage": 27.03, "elapsed_time": "0:05:43", "remaining_time": "0:15:28", "throughput": 1519.95, "total_tokens": 522760}
|
|
{"current_steps": 1305, "total_steps": 4810, "loss": 0.2853, "lr": 4.567201616739393e-05, "epoch": 1.3565488565488566, "percentage": 27.13, "elapsed_time": "0:05:44", "remaining_time": "0:15:24", "throughput": 1524.65, "total_tokens": 524872}
|
|
{"current_steps": 1310, "total_steps": 4810, "loss": 0.2922, "lr": 4.562086492125167e-05, "epoch": 1.3617463617463619, "percentage": 27.23, "elapsed_time": "0:05:44", "remaining_time": "0:15:20", "throughput": 1529.15, "total_tokens": 526920}
|
|
{"current_steps": 1315, "total_steps": 4810, "loss": 0.2892, "lr": 4.556944217439088e-05, "epoch": 1.366943866943867, "percentage": 27.34, "elapsed_time": "0:05:44", "remaining_time": "0:15:16", "throughput": 1533.64, "total_tokens": 528968}
|
|
{"current_steps": 1320, "total_steps": 4810, "loss": 0.2689, "lr": 4.5517748603859435e-05, "epoch": 1.3721413721413722, "percentage": 27.44, "elapsed_time": "0:05:45", "remaining_time": "0:15:12", "throughput": 1537.76, "total_tokens": 530888}
|
|
{"current_steps": 1325, "total_steps": 4810, "loss": 0.2348, "lr": 4.546578489027095e-05, "epoch": 1.3773388773388773, "percentage": 27.55, "elapsed_time": "0:05:45", "remaining_time": "0:15:08", "throughput": 1542.06, "total_tokens": 532872}
|
|
{"current_steps": 1330, "total_steps": 4810, "loss": 0.2971, "lr": 4.541355171779582e-05, "epoch": 1.3825363825363826, "percentage": 27.65, "elapsed_time": "0:05:45", "remaining_time": "0:15:05", "throughput": 1546.54, "total_tokens": 534920}
|
|
{"current_steps": 1335, "total_steps": 4810, "loss": 0.2582, "lr": 4.5361049774152256e-05, "epoch": 1.3877338877338876, "percentage": 27.75, "elapsed_time": "0:05:46", "remaining_time": "0:15:01", "throughput": 1550.64, "total_tokens": 536840}
|
|
{"current_steps": 1340, "total_steps": 4810, "loss": 0.2788, "lr": 4.530827975059715e-05, "epoch": 1.392931392931393, "percentage": 27.86, "elapsed_time": "0:05:46", "remaining_time": "0:14:57", "throughput": 1554.74, "total_tokens": 538760}
|
|
{"current_steps": 1345, "total_steps": 4810, "loss": 0.2367, "lr": 4.5255242341917055e-05, "epoch": 1.3981288981288982, "percentage": 27.96, "elapsed_time": "0:05:46", "remaining_time": "0:14:53", "throughput": 1558.83, "total_tokens": 540680}
|
|
{"current_steps": 1350, "total_steps": 4810, "loss": 0.2864, "lr": 4.5201938246418976e-05, "epoch": 1.4033264033264032, "percentage": 28.07, "elapsed_time": "0:05:47", "remaining_time": "0:14:49", "throughput": 1563.09, "total_tokens": 542664}
|
|
{"current_steps": 1355, "total_steps": 4810, "loss": 0.1771, "lr": 4.51483681659212e-05, "epoch": 1.4085239085239085, "percentage": 28.17, "elapsed_time": "0:05:47", "remaining_time": "0:14:46", "throughput": 1567.53, "total_tokens": 544712}
|
|
{"current_steps": 1360, "total_steps": 4810, "loss": 0.3864, "lr": 4.509453280574407e-05, "epoch": 1.4137214137214138, "percentage": 28.27, "elapsed_time": "0:05:47", "remaining_time": "0:14:42", "throughput": 1572.14, "total_tokens": 546824}
|
|
{"current_steps": 1365, "total_steps": 4810, "loss": 0.2952, "lr": 4.504043287470068e-05, "epoch": 1.4189189189189189, "percentage": 28.38, "elapsed_time": "0:05:48", "remaining_time": "0:14:38", "throughput": 1576.74, "total_tokens": 548936}
|
|
{"current_steps": 1370, "total_steps": 4810, "loss": 0.3433, "lr": 4.498606908508754e-05, "epoch": 1.4241164241164241, "percentage": 28.48, "elapsed_time": "0:05:48", "remaining_time": "0:14:34", "throughput": 1580.97, "total_tokens": 550920}
|
|
{"current_steps": 1375, "total_steps": 4810, "loss": 0.2757, "lr": 4.4931442152675185e-05, "epoch": 1.4293139293139294, "percentage": 28.59, "elapsed_time": "0:05:48", "remaining_time": "0:14:31", "throughput": 1585.2, "total_tokens": 552904}
|
|
{"current_steps": 1380, "total_steps": 4810, "loss": 0.3025, "lr": 4.487655279669881e-05, "epoch": 1.4345114345114345, "percentage": 28.69, "elapsed_time": "0:05:49", "remaining_time": "0:14:27", "throughput": 1589.23, "total_tokens": 554824}
|
|
{"current_steps": 1385, "total_steps": 4810, "loss": 0.2663, "lr": 4.482140173984875e-05, "epoch": 1.4397089397089398, "percentage": 28.79, "elapsed_time": "0:05:49", "remaining_time": "0:14:24", "throughput": 1593.62, "total_tokens": 556872}
|
|
{"current_steps": 1390, "total_steps": 4810, "loss": 0.2717, "lr": 4.476598970826094e-05, "epoch": 1.444906444906445, "percentage": 28.9, "elapsed_time": "0:05:49", "remaining_time": "0:14:20", "throughput": 1598.18, "total_tokens": 558984}
|
|
{"current_steps": 1395, "total_steps": 4810, "loss": 0.2791, "lr": 4.4710317431507434e-05, "epoch": 1.45010395010395, "percentage": 29.0, "elapsed_time": "0:05:50", "remaining_time": "0:14:17", "throughput": 1602.37, "total_tokens": 560968}
|
|
{"current_steps": 1400, "total_steps": 4810, "loss": 0.2617, "lr": 4.465438564258673e-05, "epoch": 1.4553014553014554, "percentage": 29.11, "elapsed_time": "0:05:50", "remaining_time": "0:14:13", "throughput": 1606.56, "total_tokens": 562952}
|
|
{"current_steps": 1405, "total_steps": 4810, "loss": 0.2452, "lr": 4.4598195077914145e-05, "epoch": 1.4604989604989604, "percentage": 29.21, "elapsed_time": "0:05:50", "remaining_time": "0:14:10", "throughput": 1611.0, "total_tokens": 565064}
|
|
{"current_steps": 1410, "total_steps": 4810, "loss": 0.2761, "lr": 4.454174647731213e-05, "epoch": 1.4656964656964657, "percentage": 29.31, "elapsed_time": "0:05:51", "remaining_time": "0:14:06", "throughput": 1615.35, "total_tokens": 567112}
|
|
{"current_steps": 1415, "total_steps": 4810, "loss": 0.2875, "lr": 4.4485040584000514e-05, "epoch": 1.4708939708939708, "percentage": 29.42, "elapsed_time": "0:05:51", "remaining_time": "0:14:03", "throughput": 1619.69, "total_tokens": 569160}
|
|
{"current_steps": 1420, "total_steps": 4810, "loss": 0.2782, "lr": 4.442807814458672e-05, "epoch": 1.476091476091476, "percentage": 29.52, "elapsed_time": "0:05:51", "remaining_time": "0:13:59", "throughput": 1624.37, "total_tokens": 571336}
|
|
{"current_steps": 1425, "total_steps": 4810, "loss": 0.25, "lr": 4.437085990905591e-05, "epoch": 1.4812889812889813, "percentage": 29.63, "elapsed_time": "0:05:52", "remaining_time": "0:13:56", "throughput": 1628.69, "total_tokens": 573384}
|
|
{"current_steps": 1430, "total_steps": 4810, "loss": 0.2596, "lr": 4.431338663076119e-05, "epoch": 1.4864864864864864, "percentage": 29.73, "elapsed_time": "0:05:52", "remaining_time": "0:13:52", "throughput": 1632.65, "total_tokens": 575304}
|
|
{"current_steps": 1435, "total_steps": 4810, "loss": 0.2788, "lr": 4.4255659066413595e-05, "epoch": 1.4916839916839917, "percentage": 29.83, "elapsed_time": "0:05:52", "remaining_time": "0:13:49", "throughput": 1636.42, "total_tokens": 577160}
|
|
{"current_steps": 1440, "total_steps": 4810, "loss": 0.2892, "lr": 4.419767797607219e-05, "epoch": 1.496881496881497, "percentage": 29.94, "elapsed_time": "0:05:53", "remaining_time": "0:13:47", "throughput": 1638.89, "total_tokens": 579208}
|
|
{"current_steps": 1445, "total_steps": 4810, "loss": 0.2936, "lr": 4.413944412313405e-05, "epoch": 1.502079002079002, "percentage": 30.04, "elapsed_time": "0:05:53", "remaining_time": "0:13:43", "throughput": 1643.18, "total_tokens": 581256}
|
|
{"current_steps": 1446, "total_steps": 4810, "eval_loss": 0.25699949264526367, "epoch": 1.503118503118503, "percentage": 30.06, "elapsed_time": "0:05:55", "remaining_time": "0:13:46", "throughput": 1636.61, "total_tokens": 581704}
|
|
{"current_steps": 1450, "total_steps": 4810, "loss": 0.2702, "lr": 4.4080958274324155e-05, "epoch": 1.5072765072765073, "percentage": 30.15, "elapsed_time": "0:07:05", "remaining_time": "0:16:24", "throughput": 1372.36, "total_tokens": 583304}
|
|
{"current_steps": 1455, "total_steps": 4810, "loss": 0.2252, "lr": 4.40222211996854e-05, "epoch": 1.5124740124740126, "percentage": 30.25, "elapsed_time": "0:07:05", "remaining_time": "0:16:20", "throughput": 1375.81, "total_tokens": 585224}
|
|
{"current_steps": 1460, "total_steps": 4810, "loss": 0.4066, "lr": 4.396323367256836e-05, "epoch": 1.5176715176715176, "percentage": 30.35, "elapsed_time": "0:07:05", "remaining_time": "0:16:16", "throughput": 1379.55, "total_tokens": 587272}
|
|
{"current_steps": 1465, "total_steps": 4810, "loss": 0.2413, "lr": 4.390399646962117e-05, "epoch": 1.5228690228690227, "percentage": 30.46, "elapsed_time": "0:07:06", "remaining_time": "0:16:12", "throughput": 1383.28, "total_tokens": 589320}
|
|
{"current_steps": 1470, "total_steps": 4810, "loss": 0.2593, "lr": 4.384451037077924e-05, "epoch": 1.5280665280665282, "percentage": 30.56, "elapsed_time": "0:07:06", "remaining_time": "0:16:08", "throughput": 1386.86, "total_tokens": 591304}
|
|
{"current_steps": 1475, "total_steps": 4810, "loss": 0.2499, "lr": 4.378477615925505e-05, "epoch": 1.5332640332640333, "percentage": 30.67, "elapsed_time": "0:07:06", "remaining_time": "0:16:04", "throughput": 1390.27, "total_tokens": 593224}
|
|
{"current_steps": 1480, "total_steps": 4810, "loss": 0.2672, "lr": 4.372479462152781e-05, "epoch": 1.5384615384615383, "percentage": 30.77, "elapsed_time": "0:07:07", "remaining_time": "0:16:00", "throughput": 1394.12, "total_tokens": 595336}
|
|
{"current_steps": 1485, "total_steps": 4810, "loss": 0.2898, "lr": 4.366456654733308e-05, "epoch": 1.5436590436590436, "percentage": 30.87, "elapsed_time": "0:07:07", "remaining_time": "0:15:56", "throughput": 1397.52, "total_tokens": 597256}
|
|
{"current_steps": 1490, "total_steps": 4810, "loss": 0.2852, "lr": 4.360409272965242e-05, "epoch": 1.5488565488565489, "percentage": 30.98, "elapsed_time": "0:07:07", "remaining_time": "0:15:53", "throughput": 1401.21, "total_tokens": 599304}
|
|
{"current_steps": 1495, "total_steps": 4810, "loss": 0.2828, "lr": 4.3543373964702907e-05, "epoch": 1.554054054054054, "percentage": 31.08, "elapsed_time": "0:07:08", "remaining_time": "0:15:49", "throughput": 1404.74, "total_tokens": 601288}
|
|
{"current_steps": 1500, "total_steps": 4810, "loss": 0.2597, "lr": 4.348241105192668e-05, "epoch": 1.5592515592515592, "percentage": 31.19, "elapsed_time": "0:07:08", "remaining_time": "0:15:45", "throughput": 1408.27, "total_tokens": 603272}
|
|
{"current_steps": 1505, "total_steps": 4810, "loss": 0.2584, "lr": 4.34212047939804e-05, "epoch": 1.5644490644490645, "percentage": 31.29, "elapsed_time": "0:07:08", "remaining_time": "0:15:41", "throughput": 1411.8, "total_tokens": 605256}
|
|
{"current_steps": 1510, "total_steps": 4810, "loss": 0.2713, "lr": 4.335975599672469e-05, "epoch": 1.5696465696465696, "percentage": 31.39, "elapsed_time": "0:07:09", "remaining_time": "0:15:37", "throughput": 1415.48, "total_tokens": 607304}
|
|
{"current_steps": 1515, "total_steps": 4810, "loss": 0.2702, "lr": 4.329806546921353e-05, "epoch": 1.5748440748440748, "percentage": 31.5, "elapsed_time": "0:07:09", "remaining_time": "0:15:33", "throughput": 1418.85, "total_tokens": 609224}
|
|
{"current_steps": 1520, "total_steps": 4810, "loss": 0.2648, "lr": 4.323613402368357e-05, "epoch": 1.5800415800415801, "percentage": 31.6, "elapsed_time": "0:07:09", "remaining_time": "0:15:30", "throughput": 1422.67, "total_tokens": 611336}
|
|
{"current_steps": 1525, "total_steps": 4810, "loss": 0.2879, "lr": 4.317396247554347e-05, "epoch": 1.5852390852390852, "percentage": 31.7, "elapsed_time": "0:07:10", "remaining_time": "0:15:26", "throughput": 1426.18, "total_tokens": 613320}
|
|
{"current_steps": 1530, "total_steps": 4810, "loss": 0.2953, "lr": 4.311155164336318e-05, "epoch": 1.5904365904365905, "percentage": 31.81, "elapsed_time": "0:07:10", "remaining_time": "0:15:22", "throughput": 1429.38, "total_tokens": 615176}
|
|
{"current_steps": 1535, "total_steps": 4810, "loss": 0.2754, "lr": 4.3048902348863116e-05, "epoch": 1.5956340956340958, "percentage": 31.91, "elapsed_time": "0:07:10", "remaining_time": "0:15:18", "throughput": 1433.04, "total_tokens": 617224}
|
|
{"current_steps": 1540, "total_steps": 4810, "loss": 0.2785, "lr": 4.298601541690336e-05, "epoch": 1.6008316008316008, "percentage": 32.02, "elapsed_time": "0:07:11", "remaining_time": "0:15:15", "throughput": 1436.53, "total_tokens": 619208}
|
|
{"current_steps": 1545, "total_steps": 4810, "loss": 0.278, "lr": 4.292289167547281e-05, "epoch": 1.6060291060291059, "percentage": 32.12, "elapsed_time": "0:07:11", "remaining_time": "0:15:11", "throughput": 1440.02, "total_tokens": 621192}
|
|
{"current_steps": 1550, "total_steps": 4810, "loss": 0.2618, "lr": 4.285953195567827e-05, "epoch": 1.6112266112266114, "percentage": 32.22, "elapsed_time": "0:07:11", "remaining_time": "0:15:07", "throughput": 1443.5, "total_tokens": 623176}
|
|
{"current_steps": 1555, "total_steps": 4810, "loss": 0.2506, "lr": 4.2795937091733515e-05, "epoch": 1.6164241164241164, "percentage": 32.33, "elapsed_time": "0:07:12", "remaining_time": "0:15:04", "throughput": 1446.97, "total_tokens": 625160}
|
|
{"current_steps": 1560, "total_steps": 4810, "loss": 0.3095, "lr": 4.27321079209483e-05, "epoch": 1.6216216216216215, "percentage": 32.43, "elapsed_time": "0:07:12", "remaining_time": "0:15:00", "throughput": 1450.45, "total_tokens": 627144}
|
|
{"current_steps": 1565, "total_steps": 4810, "loss": 0.2951, "lr": 4.266804528371732e-05, "epoch": 1.6268191268191268, "percentage": 32.54, "elapsed_time": "0:07:12", "remaining_time": "0:14:57", "throughput": 1454.07, "total_tokens": 629192}
|
|
{"current_steps": 1570, "total_steps": 4810, "loss": 0.2796, "lr": 4.260375002350917e-05, "epoch": 1.632016632016632, "percentage": 32.64, "elapsed_time": "0:07:13", "remaining_time": "0:14:53", "throughput": 1457.68, "total_tokens": 631240}
|
|
{"current_steps": 1575, "total_steps": 4810, "loss": 0.2407, "lr": 4.253922298685525e-05, "epoch": 1.637214137214137, "percentage": 32.74, "elapsed_time": "0:07:13", "remaining_time": "0:14:50", "throughput": 1461.14, "total_tokens": 633224}
|
|
{"current_steps": 1580, "total_steps": 4810, "loss": 0.2386, "lr": 4.247446502333858e-05, "epoch": 1.6424116424116424, "percentage": 32.85, "elapsed_time": "0:07:13", "remaining_time": "0:14:46", "throughput": 1464.6, "total_tokens": 635208}
|
|
{"current_steps": 1585, "total_steps": 4810, "loss": 0.2872, "lr": 4.2409476985582644e-05, "epoch": 1.6476091476091477, "percentage": 32.95, "elapsed_time": "0:07:14", "remaining_time": "0:14:43", "throughput": 1468.2, "total_tokens": 637256}
|
|
{"current_steps": 1590, "total_steps": 4810, "loss": 0.2806, "lr": 4.234425972924014e-05, "epoch": 1.6528066528066527, "percentage": 33.06, "elapsed_time": "0:07:14", "remaining_time": "0:14:39", "throughput": 1471.5, "total_tokens": 639176}
|
|
{"current_steps": 1595, "total_steps": 4810, "loss": 0.2715, "lr": 4.227881411298175e-05, "epoch": 1.658004158004158, "percentage": 33.16, "elapsed_time": "0:07:14", "remaining_time": "0:14:36", "throughput": 1475.08, "total_tokens": 641224}
|
|
{"current_steps": 1600, "total_steps": 4810, "loss": 0.2872, "lr": 4.221314099848481e-05, "epoch": 1.6632016632016633, "percentage": 33.26, "elapsed_time": "0:07:15", "remaining_time": "0:14:32", "throughput": 1478.37, "total_tokens": 643144}
|
|
{"current_steps": 1605, "total_steps": 4810, "loss": 0.2204, "lr": 4.2147241250421944e-05, "epoch": 1.6683991683991684, "percentage": 33.37, "elapsed_time": "0:07:15", "remaining_time": "0:14:29", "throughput": 1481.35, "total_tokens": 644936}
|
|
{"current_steps": 1610, "total_steps": 4810, "loss": 0.2557, "lr": 4.208111573644975e-05, "epoch": 1.6735966735966736, "percentage": 33.47, "elapsed_time": "0:07:15", "remaining_time": "0:14:25", "throughput": 1484.92, "total_tokens": 646984}
|
|
{"current_steps": 1615, "total_steps": 4810, "loss": 0.2777, "lr": 4.201476532719728e-05, "epoch": 1.678794178794179, "percentage": 33.58, "elapsed_time": "0:07:16", "remaining_time": "0:14:22", "throughput": 1488.49, "total_tokens": 649032}
|
|
{"current_steps": 1620, "total_steps": 4810, "loss": 0.2778, "lr": 4.194819089625466e-05, "epoch": 1.683991683991684, "percentage": 33.68, "elapsed_time": "0:07:16", "remaining_time": "0:14:19", "throughput": 1492.05, "total_tokens": 651080}
|
|
{"current_steps": 1625, "total_steps": 4810, "loss": 0.2953, "lr": 4.188139332016154e-05, "epoch": 1.689189189189189, "percentage": 33.78, "elapsed_time": "0:07:16", "remaining_time": "0:14:15", "throughput": 1495.31, "total_tokens": 653000}
|
|
{"current_steps": 1630, "total_steps": 4810, "loss": 0.2955, "lr": 4.1814373478395586e-05, "epoch": 1.6943866943866945, "percentage": 33.89, "elapsed_time": "0:07:17", "remaining_time": "0:14:12", "throughput": 1498.56, "total_tokens": 654920}
|
|
{"current_steps": 1635, "total_steps": 4810, "loss": 0.2599, "lr": 4.174713225336086e-05, "epoch": 1.6995841995841996, "percentage": 33.99, "elapsed_time": "0:07:17", "remaining_time": "0:14:09", "throughput": 1501.95, "total_tokens": 656904}
|
|
{"current_steps": 1640, "total_steps": 4810, "loss": 0.2986, "lr": 4.1679670530376244e-05, "epoch": 1.7047817047817047, "percentage": 34.1, "elapsed_time": "0:07:17", "remaining_time": "0:14:06", "throughput": 1505.49, "total_tokens": 658952}
|
|
{"current_steps": 1645, "total_steps": 4810, "loss": 0.264, "lr": 4.161198919766375e-05, "epoch": 1.70997920997921, "percentage": 34.2, "elapsed_time": "0:07:18", "remaining_time": "0:14:02", "throughput": 1508.73, "total_tokens": 660872}
|
|
{"current_steps": 1650, "total_steps": 4810, "loss": 0.2337, "lr": 4.154408914633685e-05, "epoch": 1.7151767151767152, "percentage": 34.3, "elapsed_time": "0:07:18", "remaining_time": "0:13:59", "throughput": 1512.11, "total_tokens": 662856}
|
|
{"current_steps": 1655, "total_steps": 4810, "loss": 0.2968, "lr": 4.147597127038873e-05, "epoch": 1.7203742203742203, "percentage": 34.41, "elapsed_time": "0:07:18", "remaining_time": "0:13:56", "throughput": 1515.63, "total_tokens": 664904}
|
|
{"current_steps": 1660, "total_steps": 4810, "loss": 0.2433, "lr": 4.140763646668052e-05, "epoch": 1.7255717255717256, "percentage": 34.51, "elapsed_time": "0:07:19", "remaining_time": "0:13:53", "throughput": 1518.99, "total_tokens": 666888}
|
|
{"current_steps": 1665, "total_steps": 4810, "loss": 0.3127, "lr": 4.1339085634929485e-05, "epoch": 1.7307692307692308, "percentage": 34.62, "elapsed_time": "0:07:19", "remaining_time": "0:13:49", "throughput": 1522.51, "total_tokens": 668936}
|
|
{"current_steps": 1670, "total_steps": 4810, "loss": 0.2454, "lr": 4.12703196776972e-05, "epoch": 1.735966735966736, "percentage": 34.72, "elapsed_time": "0:07:19", "remaining_time": "0:13:46", "throughput": 1525.72, "total_tokens": 670856}
|
|
{"current_steps": 1675, "total_steps": 4810, "loss": 0.3639, "lr": 4.120133950037763e-05, "epoch": 1.7411642411642412, "percentage": 34.82, "elapsed_time": "0:07:20", "remaining_time": "0:13:43", "throughput": 1529.07, "total_tokens": 672840}
|
|
{"current_steps": 1680, "total_steps": 4810, "loss": 0.2468, "lr": 4.113214601118524e-05, "epoch": 1.7463617463617465, "percentage": 34.93, "elapsed_time": "0:07:20", "remaining_time": "0:13:40", "throughput": 1532.42, "total_tokens": 674824}
|
|
{"current_steps": 1685, "total_steps": 4810, "loss": 0.2705, "lr": 4.1062740121143016e-05, "epoch": 1.7515592515592515, "percentage": 35.03, "elapsed_time": "0:07:20", "remaining_time": "0:13:37", "throughput": 1535.76, "total_tokens": 676808}
|
|
{"current_steps": 1687, "total_steps": 4810, "eval_loss": 0.25601524114608765, "epoch": 1.7536382536382535, "percentage": 35.07, "elapsed_time": "0:07:21", "remaining_time": "0:13:38", "throughput": 1533.39, "total_tokens": 677576}
|
|
{"current_steps": 1690, "total_steps": 4810, "loss": 0.2969, "lr": 4.099312274407048e-05, "epoch": 1.7567567567567568, "percentage": 35.14, "elapsed_time": "0:08:12", "remaining_time": "0:15:10", "throughput": 1376.94, "total_tokens": 678728}
|
|
{"current_steps": 1695, "total_steps": 4810, "loss": 0.2874, "lr": 4.0923294796571676e-05, "epoch": 1.761954261954262, "percentage": 35.24, "elapsed_time": "0:08:13", "remaining_time": "0:15:06", "throughput": 1380.18, "total_tokens": 680776}
|
|
{"current_steps": 1700, "total_steps": 4810, "loss": 0.2651, "lr": 4.085325719802307e-05, "epoch": 1.7671517671517671, "percentage": 35.34, "elapsed_time": "0:08:13", "remaining_time": "0:15:02", "throughput": 1383.8, "total_tokens": 683016}
|
|
{"current_steps": 1705, "total_steps": 4810, "loss": 0.2924, "lr": 4.078301087056144e-05, "epoch": 1.7723492723492722, "percentage": 35.45, "elapsed_time": "0:08:13", "remaining_time": "0:14:59", "throughput": 1387.41, "total_tokens": 685256}
|
|
{"current_steps": 1710, "total_steps": 4810, "loss": 0.2762, "lr": 4.0712556739071795e-05, "epoch": 1.7775467775467777, "percentage": 35.55, "elapsed_time": "0:08:14", "remaining_time": "0:14:55", "throughput": 1390.63, "total_tokens": 687304}
|
|
{"current_steps": 1715, "total_steps": 4810, "loss": 0.2888, "lr": 4.064189573117512e-05, "epoch": 1.7827442827442828, "percentage": 35.65, "elapsed_time": "0:08:14", "remaining_time": "0:14:52", "throughput": 1393.59, "total_tokens": 689224}
|
|
{"current_steps": 1720, "total_steps": 4810, "loss": 0.2282, "lr": 4.0571028777216214e-05, "epoch": 1.7879417879417878, "percentage": 35.76, "elapsed_time": "0:08:14", "remaining_time": "0:14:49", "throughput": 1397.04, "total_tokens": 691400}
|
|
{"current_steps": 1725, "total_steps": 4810, "loss": 0.187, "lr": 4.049995681025143e-05, "epoch": 1.793139293139293, "percentage": 35.86, "elapsed_time": "0:08:15", "remaining_time": "0:14:45", "throughput": 1400.0, "total_tokens": 693320}
|
|
{"current_steps": 1730, "total_steps": 4810, "loss": 0.4406, "lr": 4.0428680766036384e-05, "epoch": 1.7983367983367984, "percentage": 35.97, "elapsed_time": "0:08:15", "remaining_time": "0:14:42", "throughput": 1403.34, "total_tokens": 695432}
|
|
{"current_steps": 1735, "total_steps": 4810, "loss": 0.3552, "lr": 4.035720158301363e-05, "epoch": 1.8035343035343034, "percentage": 36.07, "elapsed_time": "0:08:15", "remaining_time": "0:14:38", "throughput": 1406.67, "total_tokens": 697544}
|
|
{"current_steps": 1740, "total_steps": 4810, "loss": 0.3263, "lr": 4.028552020230031e-05, "epoch": 1.8087318087318087, "percentage": 36.17, "elapsed_time": "0:08:16", "remaining_time": "0:14:35", "throughput": 1409.87, "total_tokens": 699592}
|
|
{"current_steps": 1745, "total_steps": 4810, "loss": 0.2859, "lr": 4.0213637567675774e-05, "epoch": 1.813929313929314, "percentage": 36.28, "elapsed_time": "0:08:16", "remaining_time": "0:14:32", "throughput": 1412.94, "total_tokens": 701576}
|
|
{"current_steps": 1750, "total_steps": 4810, "loss": 0.2657, "lr": 4.0141554625569125e-05, "epoch": 1.819126819126819, "percentage": 36.38, "elapsed_time": "0:08:16", "remaining_time": "0:14:28", "throughput": 1416.26, "total_tokens": 703688}
|
|
{"current_steps": 1755, "total_steps": 4810, "loss": 0.2842, "lr": 4.0069272325046816e-05, "epoch": 1.8243243243243243, "percentage": 36.49, "elapsed_time": "0:08:17", "remaining_time": "0:14:25", "throughput": 1419.45, "total_tokens": 705736}
|
|
{"current_steps": 1760, "total_steps": 4810, "loss": 0.2479, "lr": 3.999679161780005e-05, "epoch": 1.8295218295218296, "percentage": 36.59, "elapsed_time": "0:08:17", "remaining_time": "0:14:22", "throughput": 1422.51, "total_tokens": 707720}
|
|
{"current_steps": 1765, "total_steps": 4810, "loss": 0.3132, "lr": 3.99241134581324e-05, "epoch": 1.8347193347193347, "percentage": 36.69, "elapsed_time": "0:08:17", "remaining_time": "0:14:18", "throughput": 1425.94, "total_tokens": 709896}
|
|
{"current_steps": 1770, "total_steps": 4810, "loss": 0.2661, "lr": 3.985123880294708e-05, "epoch": 1.83991683991684, "percentage": 36.8, "elapsed_time": "0:08:18", "remaining_time": "0:14:15", "throughput": 1429.13, "total_tokens": 711944}
|
|
{"current_steps": 1775, "total_steps": 4810, "loss": 0.2664, "lr": 3.9778168611734456e-05, "epoch": 1.8451143451143452, "percentage": 36.9, "elapsed_time": "0:08:18", "remaining_time": "0:14:12", "throughput": 1432.3, "total_tokens": 713992}
|
|
{"current_steps": 1780, "total_steps": 4810, "loss": 0.2443, "lr": 3.970490384655939e-05, "epoch": 1.8503118503118503, "percentage": 37.01, "elapsed_time": "0:08:18", "remaining_time": "0:14:09", "throughput": 1435.35, "total_tokens": 715976}
|
|
{"current_steps": 1785, "total_steps": 4810, "loss": 0.2659, "lr": 3.963144547204856e-05, "epoch": 1.8555093555093554, "percentage": 37.11, "elapsed_time": "0:08:19", "remaining_time": "0:14:05", "throughput": 1438.53, "total_tokens": 718024}
|
|
{"current_steps": 1790, "total_steps": 4810, "loss": 0.2441, "lr": 3.955779445537776e-05, "epoch": 1.8607068607068609, "percentage": 37.21, "elapsed_time": "0:08:19", "remaining_time": "0:14:02", "throughput": 1441.69, "total_tokens": 720072}
|
|
{"current_steps": 1795, "total_steps": 4810, "loss": 0.279, "lr": 3.948395176625918e-05, "epoch": 1.865904365904366, "percentage": 37.32, "elapsed_time": "0:08:19", "remaining_time": "0:13:59", "throughput": 1444.85, "total_tokens": 722120}
|
|
{"current_steps": 1800, "total_steps": 4810, "loss": 0.2851, "lr": 3.9409918376928604e-05, "epoch": 1.871101871101871, "percentage": 37.42, "elapsed_time": "0:08:20", "remaining_time": "0:13:56", "throughput": 1448.02, "total_tokens": 724168}
|
|
{"current_steps": 1805, "total_steps": 4810, "loss": 0.2928, "lr": 3.933569526213268e-05, "epoch": 1.8762993762993763, "percentage": 37.53, "elapsed_time": "0:08:20", "remaining_time": "0:13:53", "throughput": 1451.3, "total_tokens": 726280}
|
|
{"current_steps": 1810, "total_steps": 4810, "loss": 0.2677, "lr": 3.926128339911599e-05, "epoch": 1.8814968814968815, "percentage": 37.63, "elapsed_time": "0:08:20", "remaining_time": "0:13:49", "throughput": 1454.33, "total_tokens": 728264}
|
|
{"current_steps": 1815, "total_steps": 4810, "loss": 0.2924, "lr": 3.918668376760827e-05, "epoch": 1.8866943866943866, "percentage": 37.73, "elapsed_time": "0:08:21", "remaining_time": "0:13:46", "throughput": 1457.48, "total_tokens": 730312}
|
|
{"current_steps": 1820, "total_steps": 4810, "loss": 0.2771, "lr": 3.9111897349811454e-05, "epoch": 1.8918918918918919, "percentage": 37.84, "elapsed_time": "0:08:21", "remaining_time": "0:13:43", "throughput": 1460.5, "total_tokens": 732296}
|
|
{"current_steps": 1825, "total_steps": 4810, "loss": 0.2412, "lr": 3.903692513038677e-05, "epoch": 1.8970893970893972, "percentage": 37.94, "elapsed_time": "0:08:21", "remaining_time": "0:13:40", "throughput": 1463.12, "total_tokens": 734088}
|
|
{"current_steps": 1830, "total_steps": 4810, "loss": 0.2897, "lr": 3.896176809644178e-05, "epoch": 1.9022869022869022, "percentage": 38.05, "elapsed_time": "0:08:22", "remaining_time": "0:13:37", "throughput": 1465.63, "total_tokens": 736072}
|
|
{"current_steps": 1835, "total_steps": 4810, "loss": 0.3063, "lr": 3.8886427237517344e-05, "epoch": 1.9074844074844075, "percentage": 38.15, "elapsed_time": "0:08:22", "remaining_time": "0:13:34", "throughput": 1468.76, "total_tokens": 738120}
|
|
{"current_steps": 1840, "total_steps": 4810, "loss": 0.3038, "lr": 3.881090354557463e-05, "epoch": 1.9126819126819128, "percentage": 38.25, "elapsed_time": "0:08:22", "remaining_time": "0:13:31", "throughput": 1471.89, "total_tokens": 740168}
|
|
{"current_steps": 1845, "total_steps": 4810, "loss": 0.2716, "lr": 3.8735198014982064e-05, "epoch": 1.9178794178794178, "percentage": 38.36, "elapsed_time": "0:08:23", "remaining_time": "0:13:28", "throughput": 1475.14, "total_tokens": 742280}
|
|
{"current_steps": 1850, "total_steps": 4810, "loss": 0.2834, "lr": 3.865931164250219e-05, "epoch": 1.9230769230769231, "percentage": 38.46, "elapsed_time": "0:08:23", "remaining_time": "0:13:25", "throughput": 1478.14, "total_tokens": 744328}
|
|
{"current_steps": 1855, "total_steps": 4810, "loss": 0.2845, "lr": 3.8583245427278584e-05, "epoch": 1.9282744282744284, "percentage": 38.57, "elapsed_time": "0:08:23", "remaining_time": "0:13:22", "throughput": 1481.39, "total_tokens": 746440}
|
|
{"current_steps": 1860, "total_steps": 4810, "loss": 0.3004, "lr": 3.850700037082268e-05, "epoch": 1.9334719334719335, "percentage": 38.67, "elapsed_time": "0:08:24", "remaining_time": "0:13:19", "throughput": 1484.5, "total_tokens": 748488}
|
|
{"current_steps": 1865, "total_steps": 4810, "loss": 0.2696, "lr": 3.8430577477000595e-05, "epoch": 1.9386694386694385, "percentage": 38.77, "elapsed_time": "0:08:24", "remaining_time": "0:13:16", "throughput": 1487.23, "total_tokens": 750344}
|
|
{"current_steps": 1870, "total_steps": 4810, "loss": 0.2567, "lr": 3.835397775201991e-05, "epoch": 1.943866943866944, "percentage": 38.88, "elapsed_time": "0:08:24", "remaining_time": "0:13:13", "throughput": 1490.21, "total_tokens": 752328}
|
|
{"current_steps": 1875, "total_steps": 4810, "loss": 0.269, "lr": 3.827720220441642e-05, "epoch": 1.949064449064449, "percentage": 38.98, "elapsed_time": "0:08:25", "remaining_time": "0:13:10", "throughput": 1493.19, "total_tokens": 754312}
|
|
{"current_steps": 1880, "total_steps": 4810, "loss": 0.2816, "lr": 3.8200251845040855e-05, "epoch": 1.9542619542619541, "percentage": 39.09, "elapsed_time": "0:08:25", "remaining_time": "0:13:07", "throughput": 1496.03, "total_tokens": 756232}
|
|
{"current_steps": 1885, "total_steps": 4810, "loss": 0.2706, "lr": 3.812312768704557e-05, "epoch": 1.9594594594594594, "percentage": 39.19, "elapsed_time": "0:08:25", "remaining_time": "0:13:04", "throughput": 1499.12, "total_tokens": 758280}
|
|
{"current_steps": 1890, "total_steps": 4810, "loss": 0.2412, "lr": 3.8045830745871195e-05, "epoch": 1.9646569646569647, "percentage": 39.29, "elapsed_time": "0:08:26", "remaining_time": "0:13:01", "throughput": 1502.21, "total_tokens": 760328}
|
|
{"current_steps": 1895, "total_steps": 4810, "loss": 0.2593, "lr": 3.7968362039233316e-05, "epoch": 1.9698544698544698, "percentage": 39.4, "elapsed_time": "0:08:26", "remaining_time": "0:12:59", "throughput": 1505.04, "total_tokens": 762248}
|
|
{"current_steps": 1900, "total_steps": 4810, "loss": 0.2765, "lr": 3.789072258710898e-05, "epoch": 1.975051975051975, "percentage": 39.5, "elapsed_time": "0:08:26", "remaining_time": "0:12:56", "throughput": 1507.87, "total_tokens": 764168}
|
|
{"current_steps": 1905, "total_steps": 4810, "loss": 0.2703, "lr": 3.781291341172338e-05, "epoch": 1.9802494802494803, "percentage": 39.6, "elapsed_time": "0:08:27", "remaining_time": "0:12:53", "throughput": 1510.95, "total_tokens": 766216}
|
|
{"current_steps": 1910, "total_steps": 4810, "loss": 0.2418, "lr": 3.7734935537536276e-05, "epoch": 1.9854469854469854, "percentage": 39.71, "elapsed_time": "0:08:27", "remaining_time": "0:12:50", "throughput": 1514.02, "total_tokens": 768264}
|
|
{"current_steps": 1915, "total_steps": 4810, "loss": 0.2502, "lr": 3.7656789991228636e-05, "epoch": 1.9906444906444907, "percentage": 39.81, "elapsed_time": "0:08:27", "remaining_time": "0:12:47", "throughput": 1516.84, "total_tokens": 770184}
|
|
{"current_steps": 1920, "total_steps": 4810, "loss": 0.2432, "lr": 3.7578477801689e-05, "epoch": 1.995841995841996, "percentage": 39.92, "elapsed_time": "0:08:28", "remaining_time": "0:12:44", "throughput": 1519.78, "total_tokens": 772168}
|
|
{"current_steps": 1925, "total_steps": 4810, "loss": 0.2243, "lr": 3.7500000000000003e-05, "epoch": 2.001039501039501, "percentage": 40.02, "elapsed_time": "0:08:28", "remaining_time": "0:12:42", "throughput": 1522.47, "total_tokens": 774160}
|
|
{"current_steps": 1928, "total_steps": 4810, "eval_loss": 0.2575376331806183, "epoch": 2.004158004158004, "percentage": 40.08, "elapsed_time": "0:08:29", "remaining_time": "0:12:41", "throughput": 1521.1, "total_tokens": 775312}
|
|
{"current_steps": 1930, "total_steps": 4810, "loss": 0.26, "lr": 3.742135761942479e-05, "epoch": 2.006237006237006, "percentage": 40.12, "elapsed_time": "0:09:37", "remaining_time": "0:14:21", "throughput": 1344.97, "total_tokens": 776144}
|
|
{"current_steps": 1935, "total_steps": 4810, "loss": 0.2814, "lr": 3.734255169539337e-05, "epoch": 2.0114345114345116, "percentage": 40.23, "elapsed_time": "0:09:37", "remaining_time": "0:14:17", "throughput": 1347.66, "total_tokens": 778128}
|
|
{"current_steps": 1940, "total_steps": 4810, "loss": 0.2911, "lr": 3.7263583265489074e-05, "epoch": 2.0166320166320166, "percentage": 40.33, "elapsed_time": "0:09:37", "remaining_time": "0:14:14", "throughput": 1350.46, "total_tokens": 780176}
|
|
{"current_steps": 1945, "total_steps": 4810, "loss": 0.2723, "lr": 3.718445336943478e-05, "epoch": 2.0218295218295217, "percentage": 40.44, "elapsed_time": "0:09:38", "remaining_time": "0:14:11", "throughput": 1353.14, "total_tokens": 782160}
|
|
{"current_steps": 1950, "total_steps": 4810, "loss": 0.3159, "lr": 3.710516304907931e-05, "epoch": 2.027027027027027, "percentage": 40.54, "elapsed_time": "0:09:38", "remaining_time": "0:14:08", "throughput": 1355.92, "total_tokens": 784208}
|
|
{"current_steps": 1955, "total_steps": 4810, "loss": 0.2713, "lr": 3.702571334838365e-05, "epoch": 2.0322245322245323, "percentage": 40.64, "elapsed_time": "0:09:38", "remaining_time": "0:14:05", "throughput": 1358.71, "total_tokens": 786256}
|
|
{"current_steps": 1960, "total_steps": 4810, "loss": 0.2491, "lr": 3.694610531340729e-05, "epoch": 2.0374220374220373, "percentage": 40.75, "elapsed_time": "0:09:38", "remaining_time": "0:14:01", "throughput": 1361.39, "total_tokens": 788240}
|
|
{"current_steps": 1965, "total_steps": 4810, "loss": 0.2663, "lr": 3.6866339992294344e-05, "epoch": 2.042619542619543, "percentage": 40.85, "elapsed_time": "0:09:39", "remaining_time": "0:13:58", "throughput": 1364.18, "total_tokens": 790288}
|
|
{"current_steps": 1970, "total_steps": 4810, "loss": 0.2681, "lr": 3.6786418435259854e-05, "epoch": 2.047817047817048, "percentage": 40.96, "elapsed_time": "0:09:39", "remaining_time": "0:13:55", "throughput": 1366.85, "total_tokens": 792272}
|
|
{"current_steps": 1975, "total_steps": 4810, "loss": 0.2757, "lr": 3.670634169457587e-05, "epoch": 2.053014553014553, "percentage": 41.06, "elapsed_time": "0:09:39", "remaining_time": "0:13:52", "throughput": 1369.73, "total_tokens": 794384}
|
|
{"current_steps": 1980, "total_steps": 4810, "loss": 0.2727, "lr": 3.662611082455766e-05, "epoch": 2.0582120582120584, "percentage": 41.16, "elapsed_time": "0:09:40", "remaining_time": "0:13:49", "throughput": 1372.4, "total_tokens": 796368}
|
|
{"current_steps": 1985, "total_steps": 4810, "loss": 0.2711, "lr": 3.654572688154979e-05, "epoch": 2.0634095634095635, "percentage": 41.27, "elapsed_time": "0:09:40", "remaining_time": "0:13:46", "throughput": 1375.28, "total_tokens": 798480}
|
|
{"current_steps": 1990, "total_steps": 4810, "loss": 0.2843, "lr": 3.646519092391227e-05, "epoch": 2.0686070686070686, "percentage": 41.37, "elapsed_time": "0:09:40", "remaining_time": "0:13:43", "throughput": 1378.04, "total_tokens": 800528}
|
|
{"current_steps": 1995, "total_steps": 4810, "loss": 0.2917, "lr": 3.6384504012006544e-05, "epoch": 2.0738045738045736, "percentage": 41.48, "elapsed_time": "0:09:41", "remaining_time": "0:13:40", "throughput": 1381.13, "total_tokens": 802768}
|
|
{"current_steps": 2000, "total_steps": 4810, "loss": 0.2846, "lr": 3.6303667208181575e-05, "epoch": 2.079002079002079, "percentage": 41.58, "elapsed_time": "0:09:41", "remaining_time": "0:13:37", "throughput": 1383.78, "total_tokens": 804752}
|
|
{"current_steps": 2005, "total_steps": 4810, "loss": 0.2932, "lr": 3.622268157675986e-05, "epoch": 2.084199584199584, "percentage": 41.68, "elapsed_time": "0:09:41", "remaining_time": "0:13:34", "throughput": 1386.32, "total_tokens": 806672}
|
|
{"current_steps": 2010, "total_steps": 4810, "loss": 0.2602, "lr": 3.614154818402339e-05, "epoch": 2.0893970893970892, "percentage": 41.79, "elapsed_time": "0:09:42", "remaining_time": "0:13:31", "throughput": 1388.97, "total_tokens": 808656}
|
|
{"current_steps": 2015, "total_steps": 4810, "loss": 0.2402, "lr": 3.606026809819966e-05, "epoch": 2.0945945945945947, "percentage": 41.89, "elapsed_time": "0:09:42", "remaining_time": "0:13:28", "throughput": 1391.62, "total_tokens": 810640}
|
|
{"current_steps": 2020, "total_steps": 4810, "loss": 0.2832, "lr": 3.597884238944752e-05, "epoch": 2.0997920997921, "percentage": 42.0, "elapsed_time": "0:09:42", "remaining_time": "0:13:25", "throughput": 1394.37, "total_tokens": 812688}
|
|
{"current_steps": 2025, "total_steps": 4810, "loss": 0.262, "lr": 3.5897272129843194e-05, "epoch": 2.104989604989605, "percentage": 42.1, "elapsed_time": "0:09:43", "remaining_time": "0:13:22", "throughput": 1397.22, "total_tokens": 814800}
|
|
{"current_steps": 2030, "total_steps": 4810, "loss": 0.2348, "lr": 3.581555839336606e-05, "epoch": 2.1101871101871104, "percentage": 42.2, "elapsed_time": "0:09:43", "remaining_time": "0:13:19", "throughput": 1400.07, "total_tokens": 816912}
|
|
{"current_steps": 2035, "total_steps": 4810, "loss": 0.2981, "lr": 3.57337022558846e-05, "epoch": 2.1153846153846154, "percentage": 42.31, "elapsed_time": "0:09:43", "remaining_time": "0:13:16", "throughput": 1402.7, "total_tokens": 818896}
|
|
{"current_steps": 2040, "total_steps": 4810, "loss": 0.2857, "lr": 3.565170479514214e-05, "epoch": 2.1205821205821205, "percentage": 42.41, "elapsed_time": "0:09:44", "remaining_time": "0:13:13", "throughput": 1405.33, "total_tokens": 820880}
|
|
{"current_steps": 2045, "total_steps": 4810, "loss": 0.2745, "lr": 3.5569567090742764e-05, "epoch": 2.125779625779626, "percentage": 42.52, "elapsed_time": "0:09:44", "remaining_time": "0:13:10", "throughput": 1407.96, "total_tokens": 822864}
|
|
{"current_steps": 2050, "total_steps": 4810, "loss": 0.2705, "lr": 3.548729022413701e-05, "epoch": 2.130977130977131, "percentage": 42.62, "elapsed_time": "0:09:44", "remaining_time": "0:13:07", "throughput": 1410.91, "total_tokens": 825040}
|
|
{"current_steps": 2055, "total_steps": 4810, "loss": 0.2397, "lr": 3.540487527860769e-05, "epoch": 2.136174636174636, "percentage": 42.72, "elapsed_time": "0:09:45", "remaining_time": "0:13:04", "throughput": 1413.53, "total_tokens": 827024}
|
|
{"current_steps": 2060, "total_steps": 4810, "loss": 0.2921, "lr": 3.53223233392556e-05, "epoch": 2.141372141372141, "percentage": 42.83, "elapsed_time": "0:09:45", "remaining_time": "0:13:01", "throughput": 1416.36, "total_tokens": 829136}
|
|
{"current_steps": 2065, "total_steps": 4810, "loss": 0.274, "lr": 3.523963549298525e-05, "epoch": 2.1465696465696467, "percentage": 42.93, "elapsed_time": "0:09:45", "remaining_time": "0:12:58", "throughput": 1419.08, "total_tokens": 831184}
|
|
{"current_steps": 2070, "total_steps": 4810, "loss": 0.3599, "lr": 3.51568128284905e-05, "epoch": 2.1517671517671517, "percentage": 43.04, "elapsed_time": "0:09:46", "remaining_time": "0:12:55", "throughput": 1421.69, "total_tokens": 833168}
|
|
{"current_steps": 2075, "total_steps": 4810, "loss": 0.2991, "lr": 3.5073856436240334e-05, "epoch": 2.156964656964657, "percentage": 43.14, "elapsed_time": "0:09:46", "remaining_time": "0:12:52", "throughput": 1424.41, "total_tokens": 835216}
|
|
{"current_steps": 2080, "total_steps": 4810, "loss": 0.2711, "lr": 3.499076740846438e-05, "epoch": 2.1621621621621623, "percentage": 43.24, "elapsed_time": "0:09:46", "remaining_time": "0:12:50", "throughput": 1426.91, "total_tokens": 837136}
|
|
{"current_steps": 2085, "total_steps": 4810, "loss": 0.263, "lr": 3.490754683913863e-05, "epoch": 2.1673596673596673, "percentage": 43.35, "elapsed_time": "0:09:46", "remaining_time": "0:12:47", "throughput": 1429.51, "total_tokens": 839120}
|
|
{"current_steps": 2090, "total_steps": 4810, "loss": 0.2114, "lr": 3.482419582397095e-05, "epoch": 2.1725571725571724, "percentage": 43.45, "elapsed_time": "0:09:47", "remaining_time": "0:12:44", "throughput": 1432.12, "total_tokens": 841104}
|
|
{"current_steps": 2095, "total_steps": 4810, "loss": 0.3437, "lr": 3.474071546038673e-05, "epoch": 2.177754677754678, "percentage": 43.56, "elapsed_time": "0:09:47", "remaining_time": "0:12:41", "throughput": 1434.83, "total_tokens": 843152}
|
|
{"current_steps": 2100, "total_steps": 4810, "loss": 0.2665, "lr": 3.46571068475144e-05, "epoch": 2.182952182952183, "percentage": 43.66, "elapsed_time": "0:09:47", "remaining_time": "0:12:38", "throughput": 1437.42, "total_tokens": 845136}
|
|
{"current_steps": 2105, "total_steps": 4810, "loss": 0.2736, "lr": 3.4573371086170936e-05, "epoch": 2.188149688149688, "percentage": 43.76, "elapsed_time": "0:09:48", "remaining_time": "0:12:35", "throughput": 1440.01, "total_tokens": 847120}
|
|
{"current_steps": 2110, "total_steps": 4810, "loss": 0.2967, "lr": 3.4489509278847414e-05, "epoch": 2.1933471933471935, "percentage": 43.87, "elapsed_time": "0:09:48", "remaining_time": "0:12:33", "throughput": 1442.71, "total_tokens": 849168}
|
|
{"current_steps": 2115, "total_steps": 4810, "loss": 0.2804, "lr": 3.4405522529694454e-05, "epoch": 2.1985446985446986, "percentage": 43.97, "elapsed_time": "0:09:48", "remaining_time": "0:12:30", "throughput": 1445.3, "total_tokens": 851152}
|
|
{"current_steps": 2120, "total_steps": 4810, "loss": 0.2789, "lr": 3.432141194450772e-05, "epoch": 2.2037422037422036, "percentage": 44.07, "elapsed_time": "0:09:49", "remaining_time": "0:12:27", "throughput": 1447.67, "total_tokens": 853008}
|
|
{"current_steps": 2125, "total_steps": 4810, "loss": 0.2593, "lr": 3.4237178630713314e-05, "epoch": 2.208939708939709, "percentage": 44.18, "elapsed_time": "0:09:49", "remaining_time": "0:12:24", "throughput": 1450.47, "total_tokens": 855120}
|
|
{"current_steps": 2130, "total_steps": 4810, "loss": 0.3184, "lr": 3.415282369735324e-05, "epoch": 2.214137214137214, "percentage": 44.28, "elapsed_time": "0:09:49", "remaining_time": "0:12:22", "throughput": 1453.26, "total_tokens": 857232}
|
|
{"current_steps": 2135, "total_steps": 4810, "loss": 0.2697, "lr": 3.4068348255070763e-05, "epoch": 2.2193347193347193, "percentage": 44.39, "elapsed_time": "0:09:50", "remaining_time": "0:12:19", "throughput": 1456.05, "total_tokens": 859344}
|
|
{"current_steps": 2140, "total_steps": 4810, "loss": 0.2666, "lr": 3.3983753416095845e-05, "epoch": 2.2245322245322248, "percentage": 44.49, "elapsed_time": "0:09:50", "remaining_time": "0:12:16", "throughput": 1458.62, "total_tokens": 861328}
|
|
{"current_steps": 2145, "total_steps": 4810, "loss": 0.2807, "lr": 3.389904029423041e-05, "epoch": 2.22972972972973, "percentage": 44.59, "elapsed_time": "0:09:50", "remaining_time": "0:12:14", "throughput": 1461.3, "total_tokens": 863376}
|
|
{"current_steps": 2150, "total_steps": 4810, "loss": 0.277, "lr": 3.381421000483378e-05, "epoch": 2.234927234927235, "percentage": 44.7, "elapsed_time": "0:09:51", "remaining_time": "0:12:11", "throughput": 1463.97, "total_tokens": 865424}
|
|
{"current_steps": 2155, "total_steps": 4810, "loss": 0.2884, "lr": 3.37292636648079e-05, "epoch": 2.24012474012474, "percentage": 44.8, "elapsed_time": "0:09:51", "remaining_time": "0:12:08", "throughput": 1466.64, "total_tokens": 867472}
|
|
{"current_steps": 2160, "total_steps": 4810, "loss": 0.259, "lr": 3.36442023925827e-05, "epoch": 2.2453222453222454, "percentage": 44.91, "elapsed_time": "0:09:51", "remaining_time": "0:12:06", "throughput": 1469.42, "total_tokens": 869584}
|
|
{"current_steps": 2165, "total_steps": 4810, "loss": 0.2477, "lr": 3.3559027308101345e-05, "epoch": 2.2505197505197505, "percentage": 45.01, "elapsed_time": "0:09:52", "remaining_time": "0:12:03", "throughput": 1471.97, "total_tokens": 871568}
|
|
{"current_steps": 2169, "total_steps": 4810, "eval_loss": 0.2923731505870819, "epoch": 2.2546777546777546, "percentage": 45.09, "elapsed_time": "0:09:54", "remaining_time": "0:12:04", "throughput": 1468.33, "total_tokens": 873104}
|
|
{"current_steps": 2170, "total_steps": 4810, "loss": 0.3482, "lr": 3.3473739532805467e-05, "epoch": 2.2557172557172556, "percentage": 45.11, "elapsed_time": "0:10:28", "remaining_time": "0:12:44", "throughput": 1390.87, "total_tokens": 873488}
|
|
{"current_steps": 2175, "total_steps": 4810, "loss": 0.26, "lr": 3.3388340189620424e-05, "epoch": 2.260914760914761, "percentage": 45.22, "elapsed_time": "0:10:28", "remaining_time": "0:12:41", "throughput": 1393.31, "total_tokens": 875472}
|
|
{"current_steps": 2180, "total_steps": 4810, "loss": 0.2554, "lr": 3.330283040294053e-05, "epoch": 2.266112266112266, "percentage": 45.32, "elapsed_time": "0:10:28", "remaining_time": "0:12:38", "throughput": 1395.65, "total_tokens": 877392}
|
|
{"current_steps": 2185, "total_steps": 4810, "loss": 0.2621, "lr": 3.321721129861422e-05, "epoch": 2.271309771309771, "percentage": 45.43, "elapsed_time": "0:10:28", "remaining_time": "0:12:35", "throughput": 1398.28, "total_tokens": 879504}
|
|
{"current_steps": 2190, "total_steps": 4810, "loss": 0.29, "lr": 3.3131484003929246e-05, "epoch": 2.2765072765072767, "percentage": 45.53, "elapsed_time": "0:10:29", "remaining_time": "0:12:32", "throughput": 1400.51, "total_tokens": 881360}
|
|
{"current_steps": 2195, "total_steps": 4810, "loss": 0.2916, "lr": 3.3045649647597815e-05, "epoch": 2.2817047817047817, "percentage": 45.63, "elapsed_time": "0:10:29", "remaining_time": "0:12:30", "throughput": 1402.84, "total_tokens": 883280}
|
|
{"current_steps": 2200, "total_steps": 4810, "loss": 0.2572, "lr": 3.2959709359741744e-05, "epoch": 2.286902286902287, "percentage": 45.74, "elapsed_time": "0:10:29", "remaining_time": "0:12:27", "throughput": 1405.37, "total_tokens": 885328}
|
|
{"current_steps": 2205, "total_steps": 4810, "loss": 0.3062, "lr": 3.2873664271877584e-05, "epoch": 2.2920997920997923, "percentage": 45.84, "elapsed_time": "0:10:30", "remaining_time": "0:12:24", "throughput": 1407.8, "total_tokens": 887312}
|
|
{"current_steps": 2210, "total_steps": 4810, "loss": 0.2672, "lr": 3.278751551690172e-05, "epoch": 2.2972972972972974, "percentage": 45.95, "elapsed_time": "0:10:30", "remaining_time": "0:12:21", "throughput": 1410.22, "total_tokens": 889296}
|
|
{"current_steps": 2215, "total_steps": 4810, "loss": 0.2647, "lr": 3.270126422907544e-05, "epoch": 2.3024948024948024, "percentage": 46.05, "elapsed_time": "0:10:30", "remaining_time": "0:12:19", "throughput": 1412.84, "total_tokens": 891408}
|
|
{"current_steps": 2220, "total_steps": 4810, "loss": 0.2884, "lr": 3.261491154401001e-05, "epoch": 2.3076923076923075, "percentage": 46.15, "elapsed_time": "0:10:31", "remaining_time": "0:12:16", "throughput": 1415.25, "total_tokens": 893392}
|
|
{"current_steps": 2225, "total_steps": 4810, "loss": 0.2719, "lr": 3.2528458598651734e-05, "epoch": 2.312889812889813, "percentage": 46.26, "elapsed_time": "0:10:31", "remaining_time": "0:12:13", "throughput": 1417.77, "total_tokens": 895440}
|
|
{"current_steps": 2230, "total_steps": 4810, "loss": 0.2718, "lr": 3.244190653126696e-05, "epoch": 2.318087318087318, "percentage": 46.36, "elapsed_time": "0:10:31", "remaining_time": "0:12:11", "throughput": 1420.48, "total_tokens": 897616}
|
|
{"current_steps": 2235, "total_steps": 4810, "loss": 0.2636, "lr": 3.2355256481427145e-05, "epoch": 2.323284823284823, "percentage": 46.47, "elapsed_time": "0:10:32", "remaining_time": "0:12:08", "throughput": 1422.78, "total_tokens": 899536}
|
|
{"current_steps": 2240, "total_steps": 4810, "loss": 0.2544, "lr": 3.226850958999375e-05, "epoch": 2.3284823284823286, "percentage": 46.57, "elapsed_time": "0:10:32", "remaining_time": "0:12:05", "throughput": 1425.39, "total_tokens": 901648}
|
|
{"current_steps": 2245, "total_steps": 4810, "loss": 0.2258, "lr": 3.2181666999103324e-05, "epoch": 2.3336798336798337, "percentage": 46.67, "elapsed_time": "0:10:32", "remaining_time": "0:12:03", "throughput": 1427.9, "total_tokens": 903696}
|
|
{"current_steps": 2250, "total_steps": 4810, "loss": 0.3054, "lr": 3.209472985215243e-05, "epoch": 2.3388773388773387, "percentage": 46.78, "elapsed_time": "0:10:33", "remaining_time": "0:12:00", "throughput": 1430.1, "total_tokens": 905552}
|
|
{"current_steps": 2255, "total_steps": 4810, "loss": 0.3446, "lr": 3.2007699293782555e-05, "epoch": 2.3440748440748442, "percentage": 46.88, "elapsed_time": "0:10:33", "remaining_time": "0:11:57", "throughput": 1432.39, "total_tokens": 907472}
|
|
{"current_steps": 2260, "total_steps": 4810, "loss": 0.2495, "lr": 3.1920576469865115e-05, "epoch": 2.3492723492723493, "percentage": 46.99, "elapsed_time": "0:10:33", "remaining_time": "0:11:55", "throughput": 1434.92, "total_tokens": 909584}
|
|
{"current_steps": 2265, "total_steps": 4810, "loss": 0.2754, "lr": 3.183336252748627e-05, "epoch": 2.3544698544698544, "percentage": 47.09, "elapsed_time": "0:10:34", "remaining_time": "0:11:52", "throughput": 1437.42, "total_tokens": 911632}
|
|
{"current_steps": 2270, "total_steps": 4810, "loss": 0.2604, "lr": 3.1746058614931916e-05, "epoch": 2.35966735966736, "percentage": 47.19, "elapsed_time": "0:10:34", "remaining_time": "0:11:50", "throughput": 1439.81, "total_tokens": 913616}
|
|
{"current_steps": 2275, "total_steps": 4810, "loss": 0.2711, "lr": 3.16586658816725e-05, "epoch": 2.364864864864865, "percentage": 47.3, "elapsed_time": "0:10:34", "remaining_time": "0:11:47", "throughput": 1442.39, "total_tokens": 915728}
|
|
{"current_steps": 2280, "total_steps": 4810, "loss": 0.2566, "lr": 3.157118547834793e-05, "epoch": 2.37006237006237, "percentage": 47.4, "elapsed_time": "0:10:35", "remaining_time": "0:11:44", "throughput": 1444.87, "total_tokens": 917776}
|
|
{"current_steps": 2285, "total_steps": 4810, "loss": 0.2684, "lr": 3.148361855675237e-05, "epoch": 2.375259875259875, "percentage": 47.51, "elapsed_time": "0:10:35", "remaining_time": "0:11:42", "throughput": 1447.56, "total_tokens": 919952}
|
|
{"current_steps": 2290, "total_steps": 4810, "loss": 0.2294, "lr": 3.139596626981916e-05, "epoch": 2.3804573804573805, "percentage": 47.61, "elapsed_time": "0:10:35", "remaining_time": "0:11:39", "throughput": 1449.84, "total_tokens": 921872}
|
|
{"current_steps": 2295, "total_steps": 4810, "loss": 0.2603, "lr": 3.130822977160554e-05, "epoch": 2.3856548856548856, "percentage": 47.71, "elapsed_time": "0:10:36", "remaining_time": "0:11:37", "throughput": 1452.23, "total_tokens": 923856}
|
|
{"current_steps": 2300, "total_steps": 4810, "loss": 0.3942, "lr": 3.122041021727755e-05, "epoch": 2.390852390852391, "percentage": 47.82, "elapsed_time": "0:10:36", "remaining_time": "0:11:34", "throughput": 1454.81, "total_tokens": 925968}
|
|
{"current_steps": 2305, "total_steps": 4810, "loss": 0.3128, "lr": 3.1132508763094715e-05, "epoch": 2.396049896049896, "percentage": 47.92, "elapsed_time": "0:10:36", "remaining_time": "0:11:32", "throughput": 1457.09, "total_tokens": 927888}
|
|
{"current_steps": 2310, "total_steps": 4810, "loss": 0.2467, "lr": 3.104452656639492e-05, "epoch": 2.401247401247401, "percentage": 48.02, "elapsed_time": "0:10:37", "remaining_time": "0:11:29", "throughput": 1459.36, "total_tokens": 929808}
|
|
{"current_steps": 2315, "total_steps": 4810, "loss": 0.1963, "lr": 3.0956464785579124e-05, "epoch": 2.4064449064449063, "percentage": 48.13, "elapsed_time": "0:10:37", "remaining_time": "0:11:27", "throughput": 1461.62, "total_tokens": 931728}
|
|
{"current_steps": 2320, "total_steps": 4810, "loss": 0.3533, "lr": 3.0868324580096114e-05, "epoch": 2.4116424116424118, "percentage": 48.23, "elapsed_time": "0:10:37", "remaining_time": "0:11:24", "throughput": 1464.19, "total_tokens": 933840}
|
|
{"current_steps": 2325, "total_steps": 4810, "loss": 0.2936, "lr": 3.078010711042723e-05, "epoch": 2.416839916839917, "percentage": 48.34, "elapsed_time": "0:10:38", "remaining_time": "0:11:22", "throughput": 1466.56, "total_tokens": 935824}
|
|
{"current_steps": 2330, "total_steps": 4810, "loss": 0.274, "lr": 3.0691813538071105e-05, "epoch": 2.422037422037422, "percentage": 48.44, "elapsed_time": "0:10:38", "remaining_time": "0:11:19", "throughput": 1469.02, "total_tokens": 937872}
|
|
{"current_steps": 2335, "total_steps": 4810, "loss": 0.3378, "lr": 3.0603445025528376e-05, "epoch": 2.4272349272349274, "percentage": 48.54, "elapsed_time": "0:10:38", "remaining_time": "0:11:17", "throughput": 1471.58, "total_tokens": 939984}
|
|
{"current_steps": 2340, "total_steps": 4810, "loss": 0.2418, "lr": 3.051500273628633e-05, "epoch": 2.4324324324324325, "percentage": 48.65, "elapsed_time": "0:10:39", "remaining_time": "0:11:14", "throughput": 1473.94, "total_tokens": 941968}
|
|
{"current_steps": 2345, "total_steps": 4810, "loss": 0.2943, "lr": 3.0426487834803657e-05, "epoch": 2.4376299376299375, "percentage": 48.75, "elapsed_time": "0:10:39", "remaining_time": "0:11:12", "throughput": 1476.3, "total_tokens": 943952}
|
|
{"current_steps": 2350, "total_steps": 4810, "loss": 0.2435, "lr": 3.0337901486495073e-05, "epoch": 2.442827442827443, "percentage": 48.86, "elapsed_time": "0:10:39", "remaining_time": "0:11:09", "throughput": 1478.55, "total_tokens": 945872}
|
|
{"current_steps": 2355, "total_steps": 4810, "loss": 0.267, "lr": 3.0249244857715976e-05, "epoch": 2.448024948024948, "percentage": 48.96, "elapsed_time": "0:10:40", "remaining_time": "0:11:07", "throughput": 1480.91, "total_tokens": 947856}
|
|
{"current_steps": 2360, "total_steps": 4810, "loss": 0.2452, "lr": 3.01605191157471e-05, "epoch": 2.453222453222453, "percentage": 49.06, "elapsed_time": "0:10:40", "remaining_time": "0:11:04", "throughput": 1483.26, "total_tokens": 949840}
|
|
{"current_steps": 2365, "total_steps": 4810, "loss": 0.2342, "lr": 3.007172542877915e-05, "epoch": 2.4584199584199586, "percentage": 49.17, "elapsed_time": "0:10:40", "remaining_time": "0:11:02", "throughput": 1485.49, "total_tokens": 951760}
|
|
{"current_steps": 2370, "total_steps": 4810, "loss": 0.3294, "lr": 2.998286496589742e-05, "epoch": 2.4636174636174637, "percentage": 49.27, "elapsed_time": "0:10:41", "remaining_time": "0:10:59", "throughput": 1487.73, "total_tokens": 953680}
|
|
{"current_steps": 2375, "total_steps": 4810, "loss": 0.2417, "lr": 2.9893938897066393e-05, "epoch": 2.4688149688149688, "percentage": 49.38, "elapsed_time": "0:10:41", "remaining_time": "0:10:57", "throughput": 1489.95, "total_tokens": 955600}
|
|
{"current_steps": 2380, "total_steps": 4810, "loss": 0.2781, "lr": 2.9804948393114324e-05, "epoch": 2.474012474012474, "percentage": 49.48, "elapsed_time": "0:10:41", "remaining_time": "0:10:55", "throughput": 1492.09, "total_tokens": 957456}
|
|
{"current_steps": 2385, "total_steps": 4810, "loss": 0.2721, "lr": 2.9715894625717866e-05, "epoch": 2.4792099792099793, "percentage": 49.58, "elapsed_time": "0:10:42", "remaining_time": "0:10:52", "throughput": 1494.52, "total_tokens": 959504}
|
|
{"current_steps": 2390, "total_steps": 4810, "loss": 0.277, "lr": 2.9626778767386604e-05, "epoch": 2.4844074844074844, "percentage": 49.69, "elapsed_time": "0:10:42", "remaining_time": "0:10:50", "throughput": 1496.85, "total_tokens": 961488}
|
|
{"current_steps": 2395, "total_steps": 4810, "loss": 0.2759, "lr": 2.953760199144764e-05, "epoch": 2.4896049896049894, "percentage": 49.79, "elapsed_time": "0:10:42", "remaining_time": "0:10:48", "throughput": 1499.07, "total_tokens": 963408}
|
|
{"current_steps": 2400, "total_steps": 4810, "loss": 0.2633, "lr": 2.9448365472030115e-05, "epoch": 2.494802494802495, "percentage": 49.9, "elapsed_time": "0:10:42", "remaining_time": "0:10:45", "throughput": 1501.4, "total_tokens": 965392}
|
|
{"current_steps": 2405, "total_steps": 4810, "loss": 0.2744, "lr": 2.935907038404981e-05, "epoch": 2.5, "percentage": 50.0, "elapsed_time": "0:10:43", "remaining_time": "0:10:43", "throughput": 1503.81, "total_tokens": 967440}
|
|
{"current_steps": 2410, "total_steps": 4810, "loss": 0.2379, "lr": 2.92697179031936e-05, "epoch": 2.505197505197505, "percentage": 50.1, "elapsed_time": "0:10:43", "remaining_time": "0:10:40", "throughput": 1506.02, "total_tokens": 969360}
|
|
{"current_steps": 2410, "total_steps": 4810, "eval_loss": 0.2576568126678467, "epoch": 2.505197505197505, "percentage": 50.1, "elapsed_time": "0:10:44", "remaining_time": "0:10:42", "throughput": 1503.49, "total_tokens": 969360}
|
|
{"current_steps": 2415, "total_steps": 4810, "loss": 0.2923, "lr": 2.9180309205904027e-05, "epoch": 2.51039501039501, "percentage": 50.21, "elapsed_time": "0:11:35", "remaining_time": "0:11:29", "throughput": 1397.45, "total_tokens": 971472}
|
|
{"current_steps": 2420, "total_steps": 4810, "loss": 0.296, "lr": 2.9090845469363805e-05, "epoch": 2.5155925155925156, "percentage": 50.31, "elapsed_time": "0:11:35", "remaining_time": "0:11:26", "throughput": 1399.64, "total_tokens": 973456}
|
|
{"current_steps": 2425, "total_steps": 4810, "loss": 0.2911, "lr": 2.9001327871480294e-05, "epoch": 2.5207900207900207, "percentage": 50.42, "elapsed_time": "0:11:35", "remaining_time": "0:11:24", "throughput": 1401.92, "total_tokens": 975504}
|
|
{"current_steps": 2430, "total_steps": 4810, "loss": 0.285, "lr": 2.8911757590870027e-05, "epoch": 2.525987525987526, "percentage": 50.52, "elapsed_time": "0:11:36", "remaining_time": "0:11:21", "throughput": 1404.2, "total_tokens": 977552}
|
|
{"current_steps": 2435, "total_steps": 4810, "loss": 0.2552, "lr": 2.8822135806843154e-05, "epoch": 2.5311850311850312, "percentage": 50.62, "elapsed_time": "0:11:36", "remaining_time": "0:11:19", "throughput": 1406.38, "total_tokens": 979536}
|
|
{"current_steps": 2440, "total_steps": 4810, "loss": 0.2906, "lr": 2.8732463699387968e-05, "epoch": 2.5363825363825363, "percentage": 50.73, "elapsed_time": "0:11:36", "remaining_time": "0:11:16", "throughput": 1408.66, "total_tokens": 981584}
|
|
{"current_steps": 2445, "total_steps": 4810, "loss": 0.2795, "lr": 2.8642742449155284e-05, "epoch": 2.5415800415800414, "percentage": 50.83, "elapsed_time": "0:11:37", "remaining_time": "0:11:14", "throughput": 1410.85, "total_tokens": 983632}
|
|
{"current_steps": 2450, "total_steps": 4810, "loss": 0.228, "lr": 2.855297323744301e-05, "epoch": 2.546777546777547, "percentage": 50.94, "elapsed_time": "0:11:37", "remaining_time": "0:11:11", "throughput": 1413.12, "total_tokens": 985680}
|
|
{"current_steps": 2455, "total_steps": 4810, "loss": 0.2414, "lr": 2.8463157246180468e-05, "epoch": 2.551975051975052, "percentage": 51.04, "elapsed_time": "0:11:37", "remaining_time": "0:11:09", "throughput": 1415.3, "total_tokens": 987664}
|
|
{"current_steps": 2460, "total_steps": 4810, "loss": 0.2636, "lr": 2.8373295657912945e-05, "epoch": 2.5571725571725574, "percentage": 51.14, "elapsed_time": "0:11:38", "remaining_time": "0:11:06", "throughput": 1417.47, "total_tokens": 989648}
|
|
{"current_steps": 2465, "total_steps": 4810, "loss": 0.2691, "lr": 2.828338965578603e-05, "epoch": 2.5623700623700625, "percentage": 51.25, "elapsed_time": "0:11:38", "remaining_time": "0:11:04", "throughput": 1419.73, "total_tokens": 991696}
|
|
{"current_steps": 2470, "total_steps": 4810, "loss": 0.2598, "lr": 2.8193440423530114e-05, "epoch": 2.5675675675675675, "percentage": 51.35, "elapsed_time": "0:11:38", "remaining_time": "0:11:02", "throughput": 1421.81, "total_tokens": 993616}
|
|
{"current_steps": 2475, "total_steps": 4810, "loss": 0.2688, "lr": 2.810344914544475e-05, "epoch": 2.5727650727650726, "percentage": 51.46, "elapsed_time": "0:11:39", "remaining_time": "0:10:59", "throughput": 1424.07, "total_tokens": 995664}
|
|
{"current_steps": 2480, "total_steps": 4810, "loss": 0.295, "lr": 2.8013417006383076e-05, "epoch": 2.577962577962578, "percentage": 51.56, "elapsed_time": "0:11:39", "remaining_time": "0:10:57", "throughput": 1426.24, "total_tokens": 997648}
|
|
{"current_steps": 2485, "total_steps": 4810, "loss": 0.2802, "lr": 2.792334519173624e-05, "epoch": 2.583160083160083, "percentage": 51.66, "elapsed_time": "0:11:39", "remaining_time": "0:10:54", "throughput": 1428.49, "total_tokens": 999696}
|
|
{"current_steps": 2490, "total_steps": 4810, "loss": 0.2897, "lr": 2.7833234887417743e-05, "epoch": 2.5883575883575882, "percentage": 51.77, "elapsed_time": "0:11:40", "remaining_time": "0:10:52", "throughput": 1430.65, "total_tokens": 1001680}
|
|
{"current_steps": 2495, "total_steps": 4810, "loss": 0.2723, "lr": 2.7743087279847868e-05, "epoch": 2.5935550935550937, "percentage": 51.87, "elapsed_time": "0:11:40", "remaining_time": "0:10:49", "throughput": 1432.9, "total_tokens": 1003728}
|
|
{"current_steps": 2500, "total_steps": 4810, "loss": 0.2874, "lr": 2.765290355593805e-05, "epoch": 2.598752598752599, "percentage": 51.98, "elapsed_time": "0:11:40", "remaining_time": "0:10:47", "throughput": 1434.87, "total_tokens": 1005584}
|
|
{"current_steps": 2505, "total_steps": 4810, "loss": 0.2405, "lr": 2.7562684903075238e-05, "epoch": 2.603950103950104, "percentage": 52.08, "elapsed_time": "0:11:41", "remaining_time": "0:10:45", "throughput": 1437.21, "total_tokens": 1007696}
|
|
{"current_steps": 2510, "total_steps": 4810, "loss": 0.2737, "lr": 2.7472432509106248e-05, "epoch": 2.609147609147609, "percentage": 52.18, "elapsed_time": "0:11:41", "remaining_time": "0:10:42", "throughput": 1439.36, "total_tokens": 1009680}
|
|
{"current_steps": 2515, "total_steps": 4810, "loss": 0.2753, "lr": 2.7382147562322174e-05, "epoch": 2.6143451143451144, "percentage": 52.29, "elapsed_time": "0:11:41", "remaining_time": "0:10:40", "throughput": 1441.61, "total_tokens": 1011728}
|
|
{"current_steps": 2520, "total_steps": 4810, "loss": 0.2553, "lr": 2.729183125144269e-05, "epoch": 2.6195426195426195, "percentage": 52.39, "elapsed_time": "0:11:42", "remaining_time": "0:10:38", "throughput": 1443.94, "total_tokens": 1013840}
|
|
{"current_steps": 2525, "total_steps": 4810, "loss": 0.2564, "lr": 2.7201484765600426e-05, "epoch": 2.624740124740125, "percentage": 52.49, "elapsed_time": "0:11:42", "remaining_time": "0:10:35", "throughput": 1446.08, "total_tokens": 1015824}
|
|
{"current_steps": 2530, "total_steps": 4810, "loss": 0.277, "lr": 2.7111109294325297e-05, "epoch": 2.62993762993763, "percentage": 52.6, "elapsed_time": "0:11:42", "remaining_time": "0:10:33", "throughput": 1448.13, "total_tokens": 1017744}
|
|
{"current_steps": 2535, "total_steps": 4810, "loss": 0.2439, "lr": 2.702070602752887e-05, "epoch": 2.635135135135135, "percentage": 52.7, "elapsed_time": "0:11:43", "remaining_time": "0:10:31", "throughput": 1450.27, "total_tokens": 1019728}
|
|
{"current_steps": 2540, "total_steps": 4810, "loss": 0.2958, "lr": 2.693027615548864e-05, "epoch": 2.64033264033264, "percentage": 52.81, "elapsed_time": "0:11:43", "remaining_time": "0:10:28", "throughput": 1452.6, "total_tokens": 1021840}
|
|
{"current_steps": 2545, "total_steps": 4810, "loss": 0.2809, "lr": 2.6839820868832433e-05, "epoch": 2.6455301455301456, "percentage": 52.91, "elapsed_time": "0:11:43", "remaining_time": "0:10:26", "throughput": 1454.73, "total_tokens": 1023824}
|
|
{"current_steps": 2550, "total_steps": 4810, "loss": 0.2583, "lr": 2.6749341358522674e-05, "epoch": 2.6507276507276507, "percentage": 53.01, "elapsed_time": "0:11:44", "remaining_time": "0:10:24", "throughput": 1456.6, "total_tokens": 1025616}
|
|
{"current_steps": 2555, "total_steps": 4810, "loss": 0.2494, "lr": 2.665883881584072e-05, "epoch": 2.6559251559251558, "percentage": 53.12, "elapsed_time": "0:11:44", "remaining_time": "0:10:21", "throughput": 1458.82, "total_tokens": 1027664}
|
|
{"current_steps": 2560, "total_steps": 4810, "loss": 0.2477, "lr": 2.6568314432371183e-05, "epoch": 2.6611226611226613, "percentage": 53.22, "elapsed_time": "0:11:44", "remaining_time": "0:10:19", "throughput": 1460.95, "total_tokens": 1029648}
|
|
{"current_steps": 2565, "total_steps": 4810, "loss": 0.2402, "lr": 2.6477769399986245e-05, "epoch": 2.6663201663201663, "percentage": 53.33, "elapsed_time": "0:11:45", "remaining_time": "0:10:17", "throughput": 1463.08, "total_tokens": 1031632}
|
|
{"current_steps": 2570, "total_steps": 4810, "loss": 0.2583, "lr": 2.6387204910829956e-05, "epoch": 2.6715176715176714, "percentage": 53.43, "elapsed_time": "0:11:45", "remaining_time": "0:10:14", "throughput": 1465.03, "total_tokens": 1033488}
|
|
{"current_steps": 2575, "total_steps": 4810, "loss": 0.2162, "lr": 2.629662215730253e-05, "epoch": 2.6767151767151764, "percentage": 53.53, "elapsed_time": "0:11:45", "remaining_time": "0:10:12", "throughput": 1467.25, "total_tokens": 1035536}
|
|
{"current_steps": 2580, "total_steps": 4810, "loss": 0.2652, "lr": 2.6206022332044667e-05, "epoch": 2.681912681912682, "percentage": 53.64, "elapsed_time": "0:11:46", "remaining_time": "0:10:10", "throughput": 1469.46, "total_tokens": 1037584}
|
|
{"current_steps": 2585, "total_steps": 4810, "loss": 0.2401, "lr": 2.6115406627921825e-05, "epoch": 2.687110187110187, "percentage": 53.74, "elapsed_time": "0:11:46", "remaining_time": "0:10:08", "throughput": 1471.58, "total_tokens": 1039568}
|
|
{"current_steps": 2590, "total_steps": 4810, "loss": 0.2416, "lr": 2.6024776238008543e-05, "epoch": 2.6923076923076925, "percentage": 53.85, "elapsed_time": "0:11:46", "remaining_time": "0:10:05", "throughput": 1473.79, "total_tokens": 1041616}
|
|
{"current_steps": 2595, "total_steps": 4810, "loss": 0.2479, "lr": 2.593413235557271e-05, "epoch": 2.6975051975051976, "percentage": 53.95, "elapsed_time": "0:11:47", "remaining_time": "0:10:03", "throughput": 1476.0, "total_tokens": 1043664}
|
|
{"current_steps": 2600, "total_steps": 4810, "loss": 0.2624, "lr": 2.5843476174059872e-05, "epoch": 2.7027027027027026, "percentage": 54.05, "elapsed_time": "0:11:47", "remaining_time": "0:10:01", "throughput": 1477.94, "total_tokens": 1045520}
|
|
{"current_steps": 2605, "total_steps": 4810, "loss": 0.2519, "lr": 2.5752808887077477e-05, "epoch": 2.7079002079002077, "percentage": 54.16, "elapsed_time": "0:11:47", "remaining_time": "0:09:59", "throughput": 1479.87, "total_tokens": 1047376}
|
|
{"current_steps": 2610, "total_steps": 4810, "loss": 0.2347, "lr": 2.5662131688379242e-05, "epoch": 2.713097713097713, "percentage": 54.26, "elapsed_time": "0:11:48", "remaining_time": "0:09:56", "throughput": 1481.98, "total_tokens": 1049360}
|
|
{"current_steps": 2615, "total_steps": 4810, "loss": 0.2785, "lr": 2.5571445771849327e-05, "epoch": 2.7182952182952183, "percentage": 54.37, "elapsed_time": "0:11:48", "remaining_time": "0:09:54", "throughput": 1484.09, "total_tokens": 1051344}
|
|
{"current_steps": 2620, "total_steps": 4810, "loss": 0.2622, "lr": 2.548075233148674e-05, "epoch": 2.7234927234927238, "percentage": 54.47, "elapsed_time": "0:11:48", "remaining_time": "0:09:52", "throughput": 1486.11, "total_tokens": 1053264}
|
|
{"current_steps": 2625, "total_steps": 4810, "loss": 0.2798, "lr": 2.5390052561389478e-05, "epoch": 2.728690228690229, "percentage": 54.57, "elapsed_time": "0:11:49", "remaining_time": "0:09:50", "throughput": 1488.21, "total_tokens": 1055248}
|
|
{"current_steps": 2630, "total_steps": 4810, "loss": 0.2568, "lr": 2.529934765573893e-05, "epoch": 2.733887733887734, "percentage": 54.68, "elapsed_time": "0:11:49", "remaining_time": "0:09:48", "throughput": 1490.13, "total_tokens": 1057104}
|
|
{"current_steps": 2635, "total_steps": 4810, "loss": 0.2622, "lr": 2.520863880878408e-05, "epoch": 2.739085239085239, "percentage": 54.78, "elapsed_time": "0:11:49", "remaining_time": "0:09:45", "throughput": 1492.14, "total_tokens": 1059024}
|
|
{"current_steps": 2640, "total_steps": 4810, "loss": 0.2707, "lr": 2.511792721482581e-05, "epoch": 2.7442827442827444, "percentage": 54.89, "elapsed_time": "0:11:50", "remaining_time": "0:09:43", "throughput": 1494.15, "total_tokens": 1060944}
|
|
{"current_steps": 2645, "total_steps": 4810, "loss": 0.2525, "lr": 2.502721406820116e-05, "epoch": 2.7494802494802495, "percentage": 54.99, "elapsed_time": "0:11:50", "remaining_time": "0:09:41", "throughput": 1496.34, "total_tokens": 1062992}
|
|
{"current_steps": 2650, "total_steps": 4810, "loss": 0.2934, "lr": 2.4936500563267627e-05, "epoch": 2.7546777546777546, "percentage": 55.09, "elapsed_time": "0:11:50", "remaining_time": "0:09:39", "throughput": 1498.26, "total_tokens": 1064848}
|
|
{"current_steps": 2651, "total_steps": 4810, "eval_loss": 0.2561495900154114, "epoch": 2.7557172557172556, "percentage": 55.11, "elapsed_time": "0:11:51", "remaining_time": "0:09:39", "throughput": 1496.46, "total_tokens": 1065232}
|
|
{"current_steps": 2655, "total_steps": 4810, "loss": 0.2493, "lr": 2.4845787894387425e-05, "epoch": 2.75987525987526, "percentage": 55.2, "elapsed_time": "0:12:35", "remaining_time": "0:10:13", "throughput": 1411.33, "total_tokens": 1066832}
|
|
{"current_steps": 2660, "total_steps": 4810, "loss": 0.2661, "lr": 2.4755077255911743e-05, "epoch": 2.765072765072765, "percentage": 55.3, "elapsed_time": "0:12:36", "remaining_time": "0:10:11", "throughput": 1413.43, "total_tokens": 1068880}
|
|
{"current_steps": 2665, "total_steps": 4810, "loss": 0.2398, "lr": 2.4664369842165068e-05, "epoch": 2.77027027027027, "percentage": 55.41, "elapsed_time": "0:12:36", "remaining_time": "0:10:08", "throughput": 1415.46, "total_tokens": 1070864}
|
|
{"current_steps": 2670, "total_steps": 4810, "loss": 0.2523, "lr": 2.4573666847429384e-05, "epoch": 2.7754677754677752, "percentage": 55.51, "elapsed_time": "0:12:36", "remaining_time": "0:10:06", "throughput": 1417.47, "total_tokens": 1072848}
|
|
{"current_steps": 2675, "total_steps": 4810, "loss": 0.3167, "lr": 2.4482969465928543e-05, "epoch": 2.7806652806652807, "percentage": 55.61, "elapsed_time": "0:12:37", "remaining_time": "0:10:04", "throughput": 1419.49, "total_tokens": 1074832}
|
|
{"current_steps": 2680, "total_steps": 4810, "loss": 0.2951, "lr": 2.4392278891812455e-05, "epoch": 2.785862785862786, "percentage": 55.72, "elapsed_time": "0:12:37", "remaining_time": "0:10:02", "throughput": 1421.67, "total_tokens": 1076944}
|
|
{"current_steps": 2685, "total_steps": 4810, "loss": 0.2844, "lr": 2.430159631914141e-05, "epoch": 2.7910602910602913, "percentage": 55.82, "elapsed_time": "0:12:37", "remaining_time": "0:09:59", "throughput": 1423.52, "total_tokens": 1078800}
|
|
{"current_steps": 2690, "total_steps": 4810, "loss": 0.2706, "lr": 2.4210922941870367e-05, "epoch": 2.7962577962577964, "percentage": 55.93, "elapsed_time": "0:12:38", "remaining_time": "0:09:57", "throughput": 1425.7, "total_tokens": 1080912}
|
|
{"current_steps": 2695, "total_steps": 4810, "loss": 0.244, "lr": 2.41202599538332e-05, "epoch": 2.8014553014553014, "percentage": 56.03, "elapsed_time": "0:12:38", "remaining_time": "0:09:55", "throughput": 1427.79, "total_tokens": 1082960}
|
|
{"current_steps": 2700, "total_steps": 4810, "loss": 0.2877, "lr": 2.402960854872697e-05, "epoch": 2.8066528066528065, "percentage": 56.13, "elapsed_time": "0:12:38", "remaining_time": "0:09:52", "throughput": 1429.89, "total_tokens": 1085008}
|
|
{"current_steps": 2705, "total_steps": 4810, "loss": 0.1818, "lr": 2.39389699200963e-05, "epoch": 2.811850311850312, "percentage": 56.24, "elapsed_time": "0:12:39", "remaining_time": "0:09:50", "throughput": 1432.15, "total_tokens": 1087184}
|
|
{"current_steps": 2710, "total_steps": 4810, "loss": 0.2384, "lr": 2.384834526131752e-05, "epoch": 2.817047817047817, "percentage": 56.34, "elapsed_time": "0:12:39", "remaining_time": "0:09:48", "throughput": 1434.07, "total_tokens": 1089104}
|
|
{"current_steps": 2715, "total_steps": 4810, "loss": 0.2707, "lr": 2.3757735765583083e-05, "epoch": 2.822245322245322, "percentage": 56.44, "elapsed_time": "0:12:39", "remaining_time": "0:09:46", "throughput": 1435.99, "total_tokens": 1091024}
|
|
{"current_steps": 2720, "total_steps": 4810, "loss": 0.2399, "lr": 2.366714262588577e-05, "epoch": 2.8274428274428276, "percentage": 56.55, "elapsed_time": "0:12:40", "remaining_time": "0:09:44", "throughput": 1437.99, "total_tokens": 1093008}
|
|
{"current_steps": 2725, "total_steps": 4810, "loss": 0.2595, "lr": 2.3576567035003027e-05, "epoch": 2.8326403326403327, "percentage": 56.65, "elapsed_time": "0:12:40", "remaining_time": "0:09:41", "throughput": 1439.99, "total_tokens": 1094992}
|
|
{"current_steps": 2730, "total_steps": 4810, "loss": 0.2918, "lr": 2.3486010185481248e-05, "epoch": 2.8378378378378377, "percentage": 56.76, "elapsed_time": "0:12:40", "remaining_time": "0:09:39", "throughput": 1442.08, "total_tokens": 1097040}
|
|
{"current_steps": 2735, "total_steps": 4810, "loss": 0.274, "lr": 2.3395473269620056e-05, "epoch": 2.8430353430353428, "percentage": 56.86, "elapsed_time": "0:12:41", "remaining_time": "0:09:37", "throughput": 1443.99, "total_tokens": 1098960}
|
|
{"current_steps": 2740, "total_steps": 4810, "loss": 0.2749, "lr": 2.330495747945665e-05, "epoch": 2.8482328482328483, "percentage": 56.96, "elapsed_time": "0:12:41", "remaining_time": "0:09:35", "throughput": 1446.31, "total_tokens": 1101200}
|
|
{"current_steps": 2745, "total_steps": 4810, "loss": 0.2766, "lr": 2.321446400675005e-05, "epoch": 2.8534303534303533, "percentage": 57.07, "elapsed_time": "0:12:41", "remaining_time": "0:09:33", "throughput": 1448.23, "total_tokens": 1103120}
|
|
{"current_steps": 2750, "total_steps": 4810, "loss": 0.255, "lr": 2.3123994042965453e-05, "epoch": 2.858627858627859, "percentage": 57.17, "elapsed_time": "0:12:42", "remaining_time": "0:09:30", "throughput": 1450.3, "total_tokens": 1105168}
|
|
{"current_steps": 2755, "total_steps": 4810, "loss": 0.2452, "lr": 2.3033548779258535e-05, "epoch": 2.863825363825364, "percentage": 57.28, "elapsed_time": "0:12:42", "remaining_time": "0:09:28", "throughput": 1452.29, "total_tokens": 1107152}
|
|
{"current_steps": 2760, "total_steps": 4810, "loss": 0.3016, "lr": 2.294312940645975e-05, "epoch": 2.869022869022869, "percentage": 57.38, "elapsed_time": "0:12:42", "remaining_time": "0:09:26", "throughput": 1454.36, "total_tokens": 1109200}
|
|
{"current_steps": 2765, "total_steps": 4810, "loss": 0.282, "lr": 2.2852737115058682e-05, "epoch": 2.874220374220374, "percentage": 57.48, "elapsed_time": "0:12:42", "remaining_time": "0:09:24", "throughput": 1456.43, "total_tokens": 1111248}
|
|
{"current_steps": 2770, "total_steps": 4810, "loss": 0.2753, "lr": 2.276237309518834e-05, "epoch": 2.8794178794178795, "percentage": 57.59, "elapsed_time": "0:12:43", "remaining_time": "0:09:22", "throughput": 1458.42, "total_tokens": 1113232}
|
|
{"current_steps": 2775, "total_steps": 4810, "loss": 0.268, "lr": 2.2672038536609487e-05, "epoch": 2.8846153846153846, "percentage": 57.69, "elapsed_time": "0:12:43", "remaining_time": "0:09:20", "throughput": 1460.4, "total_tokens": 1115216}
|
|
{"current_steps": 2780, "total_steps": 4810, "loss": 0.2816, "lr": 2.2581734628695034e-05, "epoch": 2.88981288981289, "percentage": 57.8, "elapsed_time": "0:12:43", "remaining_time": "0:09:17", "throughput": 1462.47, "total_tokens": 1117264}
|
|
{"current_steps": 2785, "total_steps": 4810, "loss": 0.2795, "lr": 2.2491462560414287e-05, "epoch": 2.895010395010395, "percentage": 57.9, "elapsed_time": "0:12:44", "remaining_time": "0:09:15", "throughput": 1464.61, "total_tokens": 1119376}
|
|
{"current_steps": 2790, "total_steps": 4810, "loss": 0.283, "lr": 2.2401223520317362e-05, "epoch": 2.9002079002079, "percentage": 58.0, "elapsed_time": "0:12:44", "remaining_time": "0:09:13", "throughput": 1466.68, "total_tokens": 1121424}
|
|
{"current_steps": 2795, "total_steps": 4810, "loss": 0.2663, "lr": 2.2311018696519532e-05, "epoch": 2.9054054054054053, "percentage": 58.11, "elapsed_time": "0:12:44", "remaining_time": "0:09:11", "throughput": 1468.74, "total_tokens": 1123472}
|
|
{"current_steps": 2800, "total_steps": 4810, "loss": 0.218, "lr": 2.222084927668553e-05, "epoch": 2.9106029106029108, "percentage": 58.21, "elapsed_time": "0:12:45", "remaining_time": "0:09:09", "throughput": 1470.88, "total_tokens": 1125584}
|
|
{"current_steps": 2805, "total_steps": 4810, "loss": 0.2474, "lr": 2.2130716448014e-05, "epoch": 2.915800415800416, "percentage": 58.32, "elapsed_time": "0:12:45", "remaining_time": "0:09:07", "throughput": 1472.85, "total_tokens": 1127568}
|
|
{"current_steps": 2810, "total_steps": 4810, "loss": 0.3308, "lr": 2.204062139722176e-05, "epoch": 2.920997920997921, "percentage": 58.42, "elapsed_time": "0:12:45", "remaining_time": "0:09:05", "throughput": 1474.83, "total_tokens": 1129552}
|
|
{"current_steps": 2815, "total_steps": 4810, "loss": 0.2914, "lr": 2.1950565310528266e-05, "epoch": 2.9261954261954264, "percentage": 58.52, "elapsed_time": "0:12:46", "remaining_time": "0:09:03", "throughput": 1476.71, "total_tokens": 1131472}
|
|
{"current_steps": 2820, "total_steps": 4810, "loss": 0.3008, "lr": 2.186054937363996e-05, "epoch": 2.9313929313929314, "percentage": 58.63, "elapsed_time": "0:12:46", "remaining_time": "0:09:00", "throughput": 1478.6, "total_tokens": 1133392}
|
|
{"current_steps": 2825, "total_steps": 4810, "loss": 0.2747, "lr": 2.1770574771734642e-05, "epoch": 2.9365904365904365, "percentage": 58.73, "elapsed_time": "0:12:46", "remaining_time": "0:08:58", "throughput": 1480.65, "total_tokens": 1135440}
|
|
{"current_steps": 2830, "total_steps": 4810, "loss": 0.2694, "lr": 2.168064268944591e-05, "epoch": 2.9417879417879416, "percentage": 58.84, "elapsed_time": "0:12:47", "remaining_time": "0:08:56", "throughput": 1482.59, "total_tokens": 1137424}
|
|
{"current_steps": 2835, "total_steps": 4810, "loss": 0.2574, "lr": 2.159075431084751e-05, "epoch": 2.946985446985447, "percentage": 58.94, "elapsed_time": "0:12:47", "remaining_time": "0:08:54", "throughput": 1484.56, "total_tokens": 1139408}
|
|
{"current_steps": 2840, "total_steps": 4810, "loss": 0.2677, "lr": 2.1500910819437766e-05, "epoch": 2.952182952182952, "percentage": 59.04, "elapsed_time": "0:12:47", "remaining_time": "0:08:52", "throughput": 1486.6, "total_tokens": 1141456}
|
|
{"current_steps": 2845, "total_steps": 4810, "loss": 0.2513, "lr": 2.141111339812405e-05, "epoch": 2.9573804573804576, "percentage": 59.15, "elapsed_time": "0:12:48", "remaining_time": "0:08:50", "throughput": 1488.56, "total_tokens": 1143440}
|
|
{"current_steps": 2850, "total_steps": 4810, "loss": 0.256, "lr": 2.1321363229207096e-05, "epoch": 2.9625779625779627, "percentage": 59.25, "elapsed_time": "0:12:48", "remaining_time": "0:08:48", "throughput": 1490.44, "total_tokens": 1145360}
|
|
{"current_steps": 2855, "total_steps": 4810, "loss": 0.2779, "lr": 2.123166149436556e-05, "epoch": 2.9677754677754677, "percentage": 59.36, "elapsed_time": "0:12:48", "remaining_time": "0:08:46", "throughput": 1492.32, "total_tokens": 1147280}
|
|
{"current_steps": 2860, "total_steps": 4810, "loss": 0.2814, "lr": 2.114200937464035e-05, "epoch": 2.972972972972973, "percentage": 59.46, "elapsed_time": "0:12:49", "remaining_time": "0:08:44", "throughput": 1494.2, "total_tokens": 1149200}
|
|
{"current_steps": 2865, "total_steps": 4810, "loss": 0.2671, "lr": 2.1052408050419152e-05, "epoch": 2.9781704781704783, "percentage": 59.56, "elapsed_time": "0:12:49", "remaining_time": "0:08:42", "throughput": 1496.15, "total_tokens": 1151184}
|
|
{"current_steps": 2870, "total_steps": 4810, "loss": 0.2391, "lr": 2.0962858701420866e-05, "epoch": 2.9833679833679834, "percentage": 59.67, "elapsed_time": "0:12:49", "remaining_time": "0:08:40", "throughput": 1498.19, "total_tokens": 1153232}
|
|
{"current_steps": 2875, "total_steps": 4810, "loss": 0.251, "lr": 2.0873362506680057e-05, "epoch": 2.9885654885654884, "percentage": 59.77, "elapsed_time": "0:12:50", "remaining_time": "0:08:38", "throughput": 1500.14, "total_tokens": 1155216}
|
|
{"current_steps": 2880, "total_steps": 4810, "loss": 0.2661, "lr": 2.078392064453144e-05, "epoch": 2.993762993762994, "percentage": 59.88, "elapsed_time": "0:12:50", "remaining_time": "0:08:36", "throughput": 1502.17, "total_tokens": 1157264}
|
|
{"current_steps": 2885, "total_steps": 4810, "loss": 0.2609, "lr": 2.0694534292594392e-05, "epoch": 2.998960498960499, "percentage": 59.98, "elapsed_time": "0:12:50", "remaining_time": "0:08:34", "throughput": 1504.21, "total_tokens": 1159312}
|
|
{"current_steps": 2890, "total_steps": 4810, "loss": 0.2209, "lr": 2.0605204627757403e-05, "epoch": 3.004158004158004, "percentage": 60.08, "elapsed_time": "0:12:51", "remaining_time": "0:08:32", "throughput": 1505.85, "total_tokens": 1161248}
|
|
{"current_steps": 2892, "total_steps": 4810, "eval_loss": 0.257083535194397, "epoch": 3.006237006237006, "percentage": 60.12, "elapsed_time": "0:12:52", "remaining_time": "0:08:32", "throughput": 1504.54, "total_tokens": 1162016}
|
|
{"current_steps": 2895, "total_steps": 4810, "loss": 0.2146, "lr": 2.051593282616262e-05, "epoch": 3.0093555093555096, "percentage": 60.19, "elapsed_time": "0:13:26", "remaining_time": "0:08:53", "throughput": 1442.55, "total_tokens": 1163168}
|
|
{"current_steps": 2900, "total_steps": 4810, "loss": 0.2637, "lr": 2.0426720063190335e-05, "epoch": 3.0145530145530146, "percentage": 60.29, "elapsed_time": "0:13:26", "remaining_time": "0:08:51", "throughput": 1444.35, "total_tokens": 1165088}
|
|
{"current_steps": 2905, "total_steps": 4810, "loss": 0.2303, "lr": 2.033756751344352e-05, "epoch": 3.0197505197505197, "percentage": 60.4, "elapsed_time": "0:13:26", "remaining_time": "0:08:49", "throughput": 1446.31, "total_tokens": 1167136}
|
|
{"current_steps": 2910, "total_steps": 4810, "loss": 0.2915, "lr": 2.0248476350732368e-05, "epoch": 3.024948024948025, "percentage": 60.5, "elapsed_time": "0:13:27", "remaining_time": "0:08:47", "throughput": 1448.19, "total_tokens": 1169120}
|
|
{"current_steps": 2915, "total_steps": 4810, "loss": 0.3333, "lr": 2.0159447748058805e-05, "epoch": 3.0301455301455302, "percentage": 60.6, "elapsed_time": "0:13:27", "remaining_time": "0:08:45", "throughput": 1449.99, "total_tokens": 1171040}
|
|
{"current_steps": 2920, "total_steps": 4810, "loss": 0.2142, "lr": 2.0070482877601127e-05, "epoch": 3.0353430353430353, "percentage": 60.71, "elapsed_time": "0:13:27", "remaining_time": "0:08:42", "throughput": 1451.87, "total_tokens": 1173024}
|
|
{"current_steps": 2925, "total_steps": 4810, "loss": 0.2912, "lr": 1.998158291069845e-05, "epoch": 3.0405405405405403, "percentage": 60.81, "elapsed_time": "0:13:28", "remaining_time": "0:08:40", "throughput": 1453.66, "total_tokens": 1174944}
|
|
{"current_steps": 2930, "total_steps": 4810, "loss": 0.2799, "lr": 1.9892749017835384e-05, "epoch": 3.045738045738046, "percentage": 60.91, "elapsed_time": "0:13:28", "remaining_time": "0:08:38", "throughput": 1455.7, "total_tokens": 1177056}
|
|
{"current_steps": 2935, "total_steps": 4810, "loss": 0.318, "lr": 1.9803982368626583e-05, "epoch": 3.050935550935551, "percentage": 61.02, "elapsed_time": "0:13:28", "remaining_time": "0:08:36", "throughput": 1457.49, "total_tokens": 1178976}
|
|
{"current_steps": 2940, "total_steps": 4810, "loss": 0.2836, "lr": 1.9715284131801353e-05, "epoch": 3.056133056133056, "percentage": 61.12, "elapsed_time": "0:13:29", "remaining_time": "0:08:34", "throughput": 1459.44, "total_tokens": 1181024}
|
|
{"current_steps": 2945, "total_steps": 4810, "loss": 0.2515, "lr": 1.9626655475188238e-05, "epoch": 3.0613305613305615, "percentage": 61.23, "elapsed_time": "0:13:29", "remaining_time": "0:08:32", "throughput": 1461.31, "total_tokens": 1183008}
|
|
{"current_steps": 2950, "total_steps": 4810, "loss": 0.2949, "lr": 1.953809756569971e-05, "epoch": 3.0665280665280665, "percentage": 61.33, "elapsed_time": "0:13:29", "remaining_time": "0:08:30", "throughput": 1463.26, "total_tokens": 1185056}
|
|
{"current_steps": 2955, "total_steps": 4810, "loss": 0.2646, "lr": 1.9449611569316717e-05, "epoch": 3.0717255717255716, "percentage": 61.43, "elapsed_time": "0:13:30", "remaining_time": "0:08:28", "throughput": 1465.05, "total_tokens": 1186976}
|
|
{"current_steps": 2960, "total_steps": 4810, "loss": 0.3027, "lr": 1.9361198651073408e-05, "epoch": 3.076923076923077, "percentage": 61.54, "elapsed_time": "0:13:30", "remaining_time": "0:08:26", "throughput": 1466.91, "total_tokens": 1188960}
|
|
{"current_steps": 2965, "total_steps": 4810, "loss": 0.2629, "lr": 1.9272859975041754e-05, "epoch": 3.082120582120582, "percentage": 61.64, "elapsed_time": "0:13:30", "remaining_time": "0:08:24", "throughput": 1468.78, "total_tokens": 1190944}
|
|
{"current_steps": 2970, "total_steps": 4810, "loss": 0.2261, "lr": 1.918459670431622e-05, "epoch": 3.087318087318087, "percentage": 61.75, "elapsed_time": "0:13:31", "remaining_time": "0:08:22", "throughput": 1470.59, "total_tokens": 1192928}
|
|
{"current_steps": 2975, "total_steps": 4810, "loss": 0.239, "lr": 1.9096410000998475e-05, "epoch": 3.0925155925155927, "percentage": 61.85, "elapsed_time": "0:13:31", "remaining_time": "0:08:20", "throughput": 1472.36, "total_tokens": 1194848}
|
|
{"current_steps": 2980, "total_steps": 4810, "loss": 0.2484, "lr": 1.900830102618206e-05, "epoch": 3.0977130977130978, "percentage": 61.95, "elapsed_time": "0:13:31", "remaining_time": "0:08:18", "throughput": 1474.14, "total_tokens": 1196768}
|
|
{"current_steps": 2985, "total_steps": 4810, "loss": 0.2739, "lr": 1.892027093993716e-05, "epoch": 3.102910602910603, "percentage": 62.06, "elapsed_time": "0:13:32", "remaining_time": "0:08:16", "throughput": 1475.91, "total_tokens": 1198688}
|
|
{"current_steps": 2990, "total_steps": 4810, "loss": 0.2701, "lr": 1.8832320901295227e-05, "epoch": 3.108108108108108, "percentage": 62.16, "elapsed_time": "0:13:32", "remaining_time": "0:08:14", "throughput": 1477.76, "total_tokens": 1200672}
|
|
{"current_steps": 2995, "total_steps": 4810, "loss": 0.29, "lr": 1.8744452068233825e-05, "epoch": 3.1133056133056134, "percentage": 62.27, "elapsed_time": "0:13:32", "remaining_time": "0:08:12", "throughput": 1479.69, "total_tokens": 1202720}
|
|
{"current_steps": 3000, "total_steps": 4810, "loss": 0.2816, "lr": 1.8656665597661333e-05, "epoch": 3.1185031185031185, "percentage": 62.37, "elapsed_time": "0:13:33", "remaining_time": "0:08:10", "throughput": 1481.62, "total_tokens": 1204768}
|
|
{"current_steps": 3005, "total_steps": 4810, "loss": 0.2677, "lr": 1.85689626454017e-05, "epoch": 3.1237006237006235, "percentage": 62.47, "elapsed_time": "0:13:33", "remaining_time": "0:08:08", "throughput": 1483.7, "total_tokens": 1206944}
|
|
{"current_steps": 3010, "total_steps": 4810, "loss": 0.2308, "lr": 1.8481344366179284e-05, "epoch": 3.128898128898129, "percentage": 62.58, "elapsed_time": "0:13:33", "remaining_time": "0:08:06", "throughput": 1485.7, "total_tokens": 1209056}
|
|
{"current_steps": 3015, "total_steps": 4810, "loss": 0.2745, "lr": 1.839381191360358e-05, "epoch": 3.134095634095634, "percentage": 62.68, "elapsed_time": "0:13:34", "remaining_time": "0:08:04", "throughput": 1487.47, "total_tokens": 1210976}
|
|
{"current_steps": 3020, "total_steps": 4810, "loss": 0.2645, "lr": 1.8306366440154066e-05, "epoch": 3.139293139293139, "percentage": 62.79, "elapsed_time": "0:13:34", "remaining_time": "0:08:02", "throughput": 1489.39, "total_tokens": 1213024}
|
|
{"current_steps": 3025, "total_steps": 4810, "loss": 0.2854, "lr": 1.821900909716504e-05, "epoch": 3.1444906444906446, "percentage": 62.89, "elapsed_time": "0:13:34", "remaining_time": "0:08:00", "throughput": 1491.39, "total_tokens": 1215136}
|
|
{"current_steps": 3030, "total_steps": 4810, "loss": 0.2667, "lr": 1.8131741034810435e-05, "epoch": 3.1496881496881497, "percentage": 62.99, "elapsed_time": "0:13:35", "remaining_time": "0:07:58", "throughput": 1493.15, "total_tokens": 1217056}
|
|
{"current_steps": 3035, "total_steps": 4810, "loss": 0.2799, "lr": 1.8044563402088684e-05, "epoch": 3.1548856548856548, "percentage": 63.1, "elapsed_time": "0:13:35", "remaining_time": "0:07:56", "throughput": 1495.14, "total_tokens": 1219168}
|
|
{"current_steps": 3040, "total_steps": 4810, "loss": 0.2724, "lr": 1.795747734680762e-05, "epoch": 3.1600831600831603, "percentage": 63.2, "elapsed_time": "0:13:35", "remaining_time": "0:07:54", "throughput": 1496.9, "total_tokens": 1221088}
|
|
{"current_steps": 3045, "total_steps": 4810, "loss": 0.2666, "lr": 1.7870484015569306e-05, "epoch": 3.1652806652806653, "percentage": 63.31, "elapsed_time": "0:13:36", "remaining_time": "0:07:53", "throughput": 1498.97, "total_tokens": 1223264}
|
|
{"current_steps": 3050, "total_steps": 4810, "loss": 0.2586, "lr": 1.7783584553755006e-05, "epoch": 3.1704781704781704, "percentage": 63.41, "elapsed_time": "0:13:36", "remaining_time": "0:07:51", "throughput": 1501.03, "total_tokens": 1225440}
|
|
{"current_steps": 3055, "total_steps": 4810, "loss": 0.2519, "lr": 1.769678010551003e-05, "epoch": 3.175675675675676, "percentage": 63.51, "elapsed_time": "0:13:36", "remaining_time": "0:07:49", "throughput": 1502.87, "total_tokens": 1227424}
|
|
{"current_steps": 3060, "total_steps": 4810, "loss": 0.241, "lr": 1.761007181372874e-05, "epoch": 3.180873180873181, "percentage": 63.62, "elapsed_time": "0:13:37", "remaining_time": "0:07:47", "throughput": 1504.63, "total_tokens": 1229344}
|
|
{"current_steps": 3065, "total_steps": 4810, "loss": 0.2406, "lr": 1.7523460820039464e-05, "epoch": 3.186070686070686, "percentage": 63.72, "elapsed_time": "0:13:37", "remaining_time": "0:07:45", "throughput": 1506.62, "total_tokens": 1231456}
|
|
{"current_steps": 3070, "total_steps": 4810, "loss": 0.3145, "lr": 1.7436948264789466e-05, "epoch": 3.1912681912681915, "percentage": 63.83, "elapsed_time": "0:13:37", "remaining_time": "0:07:43", "throughput": 1508.46, "total_tokens": 1233440}
|
|
{"current_steps": 3075, "total_steps": 4810, "loss": 0.2568, "lr": 1.7350535287029957e-05, "epoch": 3.1964656964656966, "percentage": 63.93, "elapsed_time": "0:13:38", "remaining_time": "0:07:41", "throughput": 1510.45, "total_tokens": 1235552}
|
|
{"current_steps": 3080, "total_steps": 4810, "loss": 0.2696, "lr": 1.7264223024501064e-05, "epoch": 3.2016632016632016, "percentage": 64.03, "elapsed_time": "0:13:38", "remaining_time": "0:07:39", "throughput": 1512.28, "total_tokens": 1237536}
|
|
{"current_steps": 3085, "total_steps": 4810, "loss": 0.2861, "lr": 1.717801261361685e-05, "epoch": 3.2068607068607067, "percentage": 64.14, "elapsed_time": "0:13:38", "remaining_time": "0:07:37", "throughput": 1514.19, "total_tokens": 1239584}
|
|
{"current_steps": 3090, "total_steps": 4810, "loss": 0.2501, "lr": 1.7091905189450423e-05, "epoch": 3.212058212058212, "percentage": 64.24, "elapsed_time": "0:13:38", "remaining_time": "0:07:35", "throughput": 1515.94, "total_tokens": 1241504}
|
|
{"current_steps": 3095, "total_steps": 4810, "loss": 0.2578, "lr": 1.700590188571887e-05, "epoch": 3.2172557172557172, "percentage": 64.35, "elapsed_time": "0:13:39", "remaining_time": "0:07:33", "throughput": 1517.85, "total_tokens": 1243552}
|
|
{"current_steps": 3100, "total_steps": 4810, "loss": 0.3075, "lr": 1.6920003834768438e-05, "epoch": 3.2224532224532223, "percentage": 64.45, "elapsed_time": "0:13:39", "remaining_time": "0:07:32", "throughput": 1519.75, "total_tokens": 1245600}
|
|
{"current_steps": 3105, "total_steps": 4810, "loss": 0.2389, "lr": 1.6834212167559575e-05, "epoch": 3.227650727650728, "percentage": 64.55, "elapsed_time": "0:13:39", "remaining_time": "0:07:30", "throughput": 1521.73, "total_tokens": 1247712}
|
|
{"current_steps": 3110, "total_steps": 4810, "loss": 0.26, "lr": 1.674852801365203e-05, "epoch": 3.232848232848233, "percentage": 64.66, "elapsed_time": "0:13:40", "remaining_time": "0:07:28", "throughput": 1523.55, "total_tokens": 1249696}
|
|
{"current_steps": 3115, "total_steps": 4810, "loss": 0.3027, "lr": 1.6662952501190033e-05, "epoch": 3.238045738045738, "percentage": 64.76, "elapsed_time": "0:13:40", "remaining_time": "0:07:26", "throughput": 1525.53, "total_tokens": 1251808}
|
|
{"current_steps": 3120, "total_steps": 4810, "loss": 0.2315, "lr": 1.6577486756887374e-05, "epoch": 3.2432432432432434, "percentage": 64.86, "elapsed_time": "0:13:40", "remaining_time": "0:07:24", "throughput": 1527.27, "total_tokens": 1253728}
|
|
{"current_steps": 3125, "total_steps": 4810, "loss": 0.249, "lr": 1.649213190601261e-05, "epoch": 3.2484407484407485, "percentage": 64.97, "elapsed_time": "0:13:41", "remaining_time": "0:07:22", "throughput": 1529.25, "total_tokens": 1255840}
|
|
{"current_steps": 3130, "total_steps": 4810, "loss": 0.2647, "lr": 1.640688907237425e-05, "epoch": 3.2536382536382535, "percentage": 65.07, "elapsed_time": "0:13:41", "remaining_time": "0:07:20", "throughput": 1531.14, "total_tokens": 1257888}
|
|
{"current_steps": 3133, "total_steps": 4810, "eval_loss": 0.2563324272632599, "epoch": 3.2567567567567566, "percentage": 65.14, "elapsed_time": "0:13:42", "remaining_time": "0:07:20", "throughput": 1530.42, "total_tokens": 1259168}
|
|
{"current_steps": 3135, "total_steps": 4810, "loss": 0.2584, "lr": 1.632175937830594e-05, "epoch": 3.258835758835759, "percentage": 65.18, "elapsed_time": "0:14:31", "remaining_time": "0:07:45", "throughput": 1445.42, "total_tokens": 1259936}
|
|
{"current_steps": 3140, "total_steps": 4810, "loss": 0.2355, "lr": 1.6236743944651703e-05, "epoch": 3.264033264033264, "percentage": 65.28, "elapsed_time": "0:14:31", "remaining_time": "0:07:43", "throughput": 1447.38, "total_tokens": 1262112}
|
|
{"current_steps": 3145, "total_steps": 4810, "loss": 0.2481, "lr": 1.615184389075117e-05, "epoch": 3.269230769230769, "percentage": 65.38, "elapsed_time": "0:14:32", "remaining_time": "0:07:41", "throughput": 1448.91, "total_tokens": 1263904}
|
|
{"current_steps": 3150, "total_steps": 4810, "loss": 0.2607, "lr": 1.6067060334424835e-05, "epoch": 3.274428274428274, "percentage": 65.49, "elapsed_time": "0:14:32", "remaining_time": "0:07:39", "throughput": 1450.72, "total_tokens": 1265952}
|
|
{"current_steps": 3155, "total_steps": 4810, "loss": 0.3119, "lr": 1.5982394391959382e-05, "epoch": 3.2796257796257797, "percentage": 65.59, "elapsed_time": "0:14:32", "remaining_time": "0:07:37", "throughput": 1452.39, "total_tokens": 1267872}
|
|
{"current_steps": 3160, "total_steps": 4810, "loss": 0.2925, "lr": 1.58978471780929e-05, "epoch": 3.284823284823285, "percentage": 65.7, "elapsed_time": "0:14:33", "remaining_time": "0:07:35", "throughput": 1454.05, "total_tokens": 1269792}
|
|
{"current_steps": 3165, "total_steps": 4810, "loss": 0.2586, "lr": 1.581341980600033e-05, "epoch": 3.29002079002079, "percentage": 65.8, "elapsed_time": "0:14:33", "remaining_time": "0:07:34", "throughput": 1455.79, "total_tokens": 1271776}
|
|
{"current_steps": 3170, "total_steps": 4810, "loss": 0.2671, "lr": 1.5729113387278673e-05, "epoch": 3.2952182952182953, "percentage": 65.9, "elapsed_time": "0:14:33", "remaining_time": "0:07:32", "throughput": 1457.53, "total_tokens": 1273760}
|
|
{"current_steps": 3175, "total_steps": 4810, "loss": 0.2864, "lr": 1.5644929031932454e-05, "epoch": 3.3004158004158004, "percentage": 66.01, "elapsed_time": "0:14:34", "remaining_time": "0:07:30", "throughput": 1459.34, "total_tokens": 1275808}
|
|
{"current_steps": 3180, "total_steps": 4810, "loss": 0.2666, "lr": 1.5560867848359077e-05, "epoch": 3.3056133056133055, "percentage": 66.11, "elapsed_time": "0:14:34", "remaining_time": "0:07:28", "throughput": 1461.07, "total_tokens": 1277792}
|
|
{"current_steps": 3185, "total_steps": 4810, "loss": 0.2648, "lr": 1.547693094333421e-05, "epoch": 3.310810810810811, "percentage": 66.22, "elapsed_time": "0:14:34", "remaining_time": "0:07:26", "throughput": 1462.81, "total_tokens": 1279776}
|
|
{"current_steps": 3190, "total_steps": 4810, "loss": 0.2586, "lr": 1.539311942199725e-05, "epoch": 3.316008316008316, "percentage": 66.32, "elapsed_time": "0:14:35", "remaining_time": "0:07:24", "throughput": 1464.54, "total_tokens": 1281760}
|
|
{"current_steps": 3195, "total_steps": 4810, "loss": 0.2391, "lr": 1.5309434387836735e-05, "epoch": 3.321205821205821, "percentage": 66.42, "elapsed_time": "0:14:35", "remaining_time": "0:07:22", "throughput": 1466.27, "total_tokens": 1283744}
|
|
{"current_steps": 3200, "total_steps": 4810, "loss": 0.2907, "lr": 1.5225876942675842e-05, "epoch": 3.3264033264033266, "percentage": 66.53, "elapsed_time": "0:14:35", "remaining_time": "0:07:20", "throughput": 1468.07, "total_tokens": 1285792}
|
|
{"current_steps": 3205, "total_steps": 4810, "loss": 0.2942, "lr": 1.5142448186657878e-05, "epoch": 3.3316008316008316, "percentage": 66.63, "elapsed_time": "0:14:36", "remaining_time": "0:07:18", "throughput": 1469.8, "total_tokens": 1287776}
|
|
{"current_steps": 3210, "total_steps": 4810, "loss": 0.3321, "lr": 1.505914921823178e-05, "epoch": 3.3367983367983367, "percentage": 66.74, "elapsed_time": "0:14:36", "remaining_time": "0:07:16", "throughput": 1471.45, "total_tokens": 1289696}
|
|
{"current_steps": 3215, "total_steps": 4810, "loss": 0.2486, "lr": 1.4975981134137659e-05, "epoch": 3.3419958419958418, "percentage": 66.84, "elapsed_time": "0:14:36", "remaining_time": "0:07:14", "throughput": 1473.18, "total_tokens": 1291680}
|
|
{"current_steps": 3220, "total_steps": 4810, "loss": 0.2502, "lr": 1.489294502939238e-05, "epoch": 3.3471933471933473, "percentage": 66.94, "elapsed_time": "0:14:37", "remaining_time": "0:07:13", "throughput": 1474.76, "total_tokens": 1293536}
|
|
{"current_steps": 3225, "total_steps": 4810, "loss": 0.2878, "lr": 1.4810041997275092e-05, "epoch": 3.3523908523908523, "percentage": 67.05, "elapsed_time": "0:14:37", "remaining_time": "0:07:11", "throughput": 1476.7, "total_tokens": 1295712}
|
|
{"current_steps": 3230, "total_steps": 4810, "loss": 0.2824, "lr": 1.4727273129312918e-05, "epoch": 3.357588357588358, "percentage": 67.15, "elapsed_time": "0:14:37", "remaining_time": "0:07:09", "throughput": 1478.49, "total_tokens": 1297760}
|
|
{"current_steps": 3235, "total_steps": 4810, "loss": 0.2772, "lr": 1.4644639515266483e-05, "epoch": 3.362785862785863, "percentage": 67.26, "elapsed_time": "0:14:38", "remaining_time": "0:07:07", "throughput": 1480.28, "total_tokens": 1299808}
|
|
{"current_steps": 3240, "total_steps": 4810, "loss": 0.2602, "lr": 1.4562142243115644e-05, "epoch": 3.367983367983368, "percentage": 67.36, "elapsed_time": "0:14:38", "remaining_time": "0:07:05", "throughput": 1482.15, "total_tokens": 1301920}
|
|
{"current_steps": 3245, "total_steps": 4810, "loss": 0.2737, "lr": 1.4479782399045152e-05, "epoch": 3.373180873180873, "percentage": 67.46, "elapsed_time": "0:14:38", "remaining_time": "0:07:03", "throughput": 1483.86, "total_tokens": 1303904}
|
|
{"current_steps": 3250, "total_steps": 4810, "loss": 0.2683, "lr": 1.4397561067430298e-05, "epoch": 3.3783783783783785, "percentage": 67.57, "elapsed_time": "0:14:39", "remaining_time": "0:07:01", "throughput": 1485.58, "total_tokens": 1305888}
|
|
{"current_steps": 3255, "total_steps": 4810, "loss": 0.261, "lr": 1.4315479330822712e-05, "epoch": 3.3835758835758836, "percentage": 67.67, "elapsed_time": "0:14:39", "remaining_time": "0:07:00", "throughput": 1487.51, "total_tokens": 1308064}
|
|
{"current_steps": 3260, "total_steps": 4810, "loss": 0.2702, "lr": 1.4233538269936042e-05, "epoch": 3.3887733887733886, "percentage": 67.78, "elapsed_time": "0:14:39", "remaining_time": "0:06:58", "throughput": 1489.23, "total_tokens": 1310048}
|
|
{"current_steps": 3265, "total_steps": 4810, "loss": 0.2761, "lr": 1.415173896363178e-05, "epoch": 3.393970893970894, "percentage": 67.88, "elapsed_time": "0:14:40", "remaining_time": "0:06:56", "throughput": 1490.86, "total_tokens": 1311968}
|
|
{"current_steps": 3270, "total_steps": 4810, "loss": 0.2631, "lr": 1.4070082488905034e-05, "epoch": 3.399168399168399, "percentage": 67.98, "elapsed_time": "0:14:40", "remaining_time": "0:06:54", "throughput": 1492.49, "total_tokens": 1313888}
|
|
{"current_steps": 3275, "total_steps": 4810, "loss": 0.2702, "lr": 1.3988569920870314e-05, "epoch": 3.4043659043659042, "percentage": 68.09, "elapsed_time": "0:14:40", "remaining_time": "0:06:52", "throughput": 1494.41, "total_tokens": 1316064}
|
|
{"current_steps": 3280, "total_steps": 4810, "loss": 0.2643, "lr": 1.3907202332747454e-05, "epoch": 3.4095634095634098, "percentage": 68.19, "elapsed_time": "0:14:40", "remaining_time": "0:06:50", "throughput": 1496.19, "total_tokens": 1318112}
|
|
{"current_steps": 3285, "total_steps": 4810, "loss": 0.2877, "lr": 1.3825980795847402e-05, "epoch": 3.414760914760915, "percentage": 68.3, "elapsed_time": "0:14:41", "remaining_time": "0:06:49", "throughput": 1497.74, "total_tokens": 1319968}
|
|
{"current_steps": 3290, "total_steps": 4810, "loss": 0.271, "lr": 1.3744906379558165e-05, "epoch": 3.41995841995842, "percentage": 68.4, "elapsed_time": "0:14:41", "remaining_time": "0:06:47", "throughput": 1499.52, "total_tokens": 1322016}
|
|
{"current_steps": 3295, "total_steps": 4810, "loss": 0.2729, "lr": 1.3663980151330732e-05, "epoch": 3.4251559251559254, "percentage": 68.5, "elapsed_time": "0:14:41", "remaining_time": "0:06:45", "throughput": 1501.14, "total_tokens": 1323936}
|
|
{"current_steps": 3300, "total_steps": 4810, "loss": 0.261, "lr": 1.3583203176664961e-05, "epoch": 3.4303534303534304, "percentage": 68.61, "elapsed_time": "0:14:42", "remaining_time": "0:06:43", "throughput": 1502.84, "total_tokens": 1325920}
|
|
{"current_steps": 3305, "total_steps": 4810, "loss": 0.2498, "lr": 1.350257651909562e-05, "epoch": 3.4355509355509355, "percentage": 68.71, "elapsed_time": "0:14:42", "remaining_time": "0:06:41", "throughput": 1504.47, "total_tokens": 1327840}
|
|
{"current_steps": 3310, "total_steps": 4810, "loss": 0.2384, "lr": 1.3422101240178365e-05, "epoch": 3.4407484407484406, "percentage": 68.81, "elapsed_time": "0:14:42", "remaining_time": "0:06:40", "throughput": 1506.09, "total_tokens": 1329760}
|
|
{"current_steps": 3315, "total_steps": 4810, "loss": 0.2789, "lr": 1.3341778399475713e-05, "epoch": 3.445945945945946, "percentage": 68.92, "elapsed_time": "0:14:43", "remaining_time": "0:06:38", "throughput": 1507.78, "total_tokens": 1331744}
|
|
{"current_steps": 3320, "total_steps": 4810, "loss": 0.26, "lr": 1.3261609054543179e-05, "epoch": 3.451143451143451, "percentage": 69.02, "elapsed_time": "0:14:43", "remaining_time": "0:06:36", "throughput": 1509.55, "total_tokens": 1333792}
|
|
{"current_steps": 3325, "total_steps": 4810, "loss": 0.2975, "lr": 1.3181594260915262e-05, "epoch": 3.456340956340956, "percentage": 69.13, "elapsed_time": "0:14:43", "remaining_time": "0:06:34", "throughput": 1511.24, "total_tokens": 1335776}
|
|
{"current_steps": 3330, "total_steps": 4810, "loss": 0.2479, "lr": 1.3101735072091622e-05, "epoch": 3.4615384615384617, "percentage": 69.23, "elapsed_time": "0:14:44", "remaining_time": "0:06:32", "throughput": 1513.0, "total_tokens": 1337824}
|
|
{"current_steps": 3335, "total_steps": 4810, "loss": 0.223, "lr": 1.3022032539523176e-05, "epoch": 3.4667359667359667, "percentage": 69.33, "elapsed_time": "0:14:44", "remaining_time": "0:06:31", "throughput": 1514.76, "total_tokens": 1339872}
|
|
{"current_steps": 3340, "total_steps": 4810, "loss": 0.2543, "lr": 1.2942487712598234e-05, "epoch": 3.471933471933472, "percentage": 69.44, "elapsed_time": "0:14:44", "remaining_time": "0:06:29", "throughput": 1516.52, "total_tokens": 1341920}
|
|
{"current_steps": 3345, "total_steps": 4810, "loss": 0.2449, "lr": 1.2863101638628717e-05, "epoch": 3.4771309771309773, "percentage": 69.54, "elapsed_time": "0:14:45", "remaining_time": "0:06:27", "throughput": 1518.21, "total_tokens": 1343904}
|
|
{"current_steps": 3350, "total_steps": 4810, "loss": 0.2881, "lr": 1.2783875362836373e-05, "epoch": 3.4823284823284824, "percentage": 69.65, "elapsed_time": "0:14:45", "remaining_time": "0:06:25", "throughput": 1519.96, "total_tokens": 1345952}
|
|
{"current_steps": 3355, "total_steps": 4810, "loss": 0.2574, "lr": 1.2704809928338956e-05, "epoch": 3.4875259875259874, "percentage": 69.75, "elapsed_time": "0:14:45", "remaining_time": "0:06:24", "throughput": 1521.86, "total_tokens": 1348128}
|
|
{"current_steps": 3360, "total_steps": 4810, "loss": 0.2915, "lr": 1.2625906376136581e-05, "epoch": 3.492723492723493, "percentage": 69.85, "elapsed_time": "0:14:46", "remaining_time": "0:06:22", "throughput": 1523.47, "total_tokens": 1350048}
|
|
{"current_steps": 3365, "total_steps": 4810, "loss": 0.2653, "lr": 1.2547165745097928e-05, "epoch": 3.497920997920998, "percentage": 69.96, "elapsed_time": "0:14:46", "remaining_time": "0:06:20", "throughput": 1525.08, "total_tokens": 1351968}
|
|
{"current_steps": 3370, "total_steps": 4810, "loss": 0.2795, "lr": 1.2468589071946632e-05, "epoch": 3.503118503118503, "percentage": 70.06, "elapsed_time": "0:14:46", "remaining_time": "0:06:18", "throughput": 1526.76, "total_tokens": 1353952}
|
|
{"current_steps": 3374, "total_steps": 4810, "eval_loss": 0.2641850709915161, "epoch": 3.507276507276507, "percentage": 70.15, "elapsed_time": "0:14:48", "remaining_time": "0:06:17", "throughput": 1526.35, "total_tokens": 1355552}
|
|
{"current_steps": 3375, "total_steps": 4810, "loss": 0.2673, "lr": 1.2390177391247614e-05, "epoch": 3.508316008316008, "percentage": 70.17, "elapsed_time": "0:15:15", "remaining_time": "0:06:29", "throughput": 1480.51, "total_tokens": 1356000}
|
|
{"current_steps": 3380, "total_steps": 4810, "loss": 0.2527, "lr": 1.2311931735393417e-05, "epoch": 3.5135135135135136, "percentage": 70.27, "elapsed_time": "0:15:16", "remaining_time": "0:06:27", "throughput": 1482.15, "total_tokens": 1357984}
|
|
{"current_steps": 3385, "total_steps": 4810, "loss": 0.2635, "lr": 1.2233853134590697e-05, "epoch": 3.5187110187110187, "percentage": 70.37, "elapsed_time": "0:15:16", "remaining_time": "0:06:25", "throughput": 1483.73, "total_tokens": 1359904}
|
|
{"current_steps": 3390, "total_steps": 4810, "loss": 0.2737, "lr": 1.215594261684656e-05, "epoch": 3.523908523908524, "percentage": 70.48, "elapsed_time": "0:15:16", "remaining_time": "0:06:24", "throughput": 1485.44, "total_tokens": 1361952}
|
|
{"current_steps": 3395, "total_steps": 4810, "loss": 0.2521, "lr": 1.2078201207955123e-05, "epoch": 3.529106029106029, "percentage": 70.58, "elapsed_time": "0:15:17", "remaining_time": "0:06:22", "throughput": 1487.16, "total_tokens": 1364000}
|
|
{"current_steps": 3400, "total_steps": 4810, "loss": 0.246, "lr": 1.2000629931483947e-05, "epoch": 3.5343035343035343, "percentage": 70.69, "elapsed_time": "0:15:17", "remaining_time": "0:06:20", "throughput": 1488.94, "total_tokens": 1366112}
|
|
{"current_steps": 3405, "total_steps": 4810, "loss": 0.3136, "lr": 1.1923229808760564e-05, "epoch": 3.5395010395010393, "percentage": 70.79, "elapsed_time": "0:15:17", "remaining_time": "0:06:18", "throughput": 1490.58, "total_tokens": 1368096}
|
|
{"current_steps": 3410, "total_steps": 4810, "loss": 0.2365, "lr": 1.1846001858859054e-05, "epoch": 3.544698544698545, "percentage": 70.89, "elapsed_time": "0:15:18", "remaining_time": "0:06:16", "throughput": 1492.36, "total_tokens": 1370208}
|
|
{"current_steps": 3415, "total_steps": 4810, "loss": 0.2545, "lr": 1.1768947098586628e-05, "epoch": 3.54989604989605, "percentage": 71.0, "elapsed_time": "0:15:18", "remaining_time": "0:06:15", "throughput": 1494.0, "total_tokens": 1372192}
|
|
{"current_steps": 3420, "total_steps": 4810, "loss": 0.2743, "lr": 1.1692066542470201e-05, "epoch": 3.555093555093555, "percentage": 71.1, "elapsed_time": "0:15:18", "remaining_time": "0:06:13", "throughput": 1495.71, "total_tokens": 1374240}
|
|
{"current_steps": 3425, "total_steps": 4810, "loss": 0.2821, "lr": 1.1615361202743088e-05, "epoch": 3.5602910602910605, "percentage": 71.21, "elapsed_time": "0:15:19", "remaining_time": "0:06:11", "throughput": 1497.28, "total_tokens": 1376160}
|
|
{"current_steps": 3430, "total_steps": 4810, "loss": 0.2579, "lr": 1.1538832089331628e-05, "epoch": 3.5654885654885655, "percentage": 71.31, "elapsed_time": "0:15:19", "remaining_time": "0:06:09", "throughput": 1498.99, "total_tokens": 1378208}
|
|
{"current_steps": 3435, "total_steps": 4810, "loss": 0.2526, "lr": 1.1462480209841928e-05, "epoch": 3.5706860706860706, "percentage": 71.41, "elapsed_time": "0:15:19", "remaining_time": "0:06:08", "throughput": 1500.62, "total_tokens": 1380192}
|
|
{"current_steps": 3440, "total_steps": 4810, "loss": 0.2369, "lr": 1.138630656954658e-05, "epoch": 3.5758835758835756, "percentage": 71.52, "elapsed_time": "0:15:20", "remaining_time": "0:06:06", "throughput": 1502.46, "total_tokens": 1382368}
|
|
{"current_steps": 3445, "total_steps": 4810, "loss": 0.3155, "lr": 1.1310312171371393e-05, "epoch": 3.581081081081081, "percentage": 71.62, "elapsed_time": "0:15:20", "remaining_time": "0:06:04", "throughput": 1504.37, "total_tokens": 1384608}
|
|
{"current_steps": 3450, "total_steps": 4810, "loss": 0.2615, "lr": 1.1234498015882261e-05, "epoch": 3.586278586278586, "percentage": 71.73, "elapsed_time": "0:15:20", "remaining_time": "0:06:02", "throughput": 1506.0, "total_tokens": 1386592}
|
|
{"current_steps": 3455, "total_steps": 4810, "loss": 0.2614, "lr": 1.1158865101271906e-05, "epoch": 3.5914760914760917, "percentage": 71.83, "elapsed_time": "0:15:21", "remaining_time": "0:06:01", "throughput": 1507.49, "total_tokens": 1388448}
|
|
{"current_steps": 3460, "total_steps": 4810, "loss": 0.2222, "lr": 1.1083414423346807e-05, "epoch": 3.5966735966735968, "percentage": 71.93, "elapsed_time": "0:15:21", "remaining_time": "0:05:59", "throughput": 1509.26, "total_tokens": 1390560}
|
|
{"current_steps": 3465, "total_steps": 4810, "loss": 0.3213, "lr": 1.1008146975514059e-05, "epoch": 3.601871101871102, "percentage": 72.04, "elapsed_time": "0:15:21", "remaining_time": "0:05:57", "throughput": 1511.1, "total_tokens": 1392736}
|
|
{"current_steps": 3470, "total_steps": 4810, "loss": 0.2762, "lr": 1.0933063748768254e-05, "epoch": 3.607068607068607, "percentage": 72.14, "elapsed_time": "0:15:22", "remaining_time": "0:05:56", "throughput": 1512.71, "total_tokens": 1394720}
|
|
{"current_steps": 3475, "total_steps": 4810, "loss": 0.23, "lr": 1.0858165731678513e-05, "epoch": 3.6122661122661124, "percentage": 72.25, "elapsed_time": "0:15:22", "remaining_time": "0:05:54", "throughput": 1514.27, "total_tokens": 1396640}
|
|
{"current_steps": 3480, "total_steps": 4810, "loss": 0.3322, "lr": 1.0783453910375424e-05, "epoch": 3.6174636174636174, "percentage": 72.35, "elapsed_time": "0:15:22", "remaining_time": "0:05:52", "throughput": 1516.03, "total_tokens": 1398752}
|
|
{"current_steps": 3485, "total_steps": 4810, "loss": 0.2494, "lr": 1.0708929268538034e-05, "epoch": 3.6226611226611225, "percentage": 72.45, "elapsed_time": "0:15:22", "remaining_time": "0:05:50", "throughput": 1517.72, "total_tokens": 1400800}
|
|
{"current_steps": 3490, "total_steps": 4810, "loss": 0.2596, "lr": 1.0634592787380965e-05, "epoch": 3.627858627858628, "percentage": 72.56, "elapsed_time": "0:15:23", "remaining_time": "0:05:49", "throughput": 1519.28, "total_tokens": 1402720}
|
|
{"current_steps": 3495, "total_steps": 4810, "loss": 0.2563, "lr": 1.0560445445641423e-05, "epoch": 3.633056133056133, "percentage": 72.66, "elapsed_time": "0:15:23", "remaining_time": "0:05:47", "throughput": 1520.9, "total_tokens": 1404704}
|
|
{"current_steps": 3500, "total_steps": 4810, "loss": 0.2589, "lr": 1.048648821956637e-05, "epoch": 3.638253638253638, "percentage": 72.77, "elapsed_time": "0:15:23", "remaining_time": "0:05:45", "throughput": 1522.38, "total_tokens": 1406560}
|
|
{"current_steps": 3505, "total_steps": 4810, "loss": 0.2386, "lr": 1.0412722082899644e-05, "epoch": 3.643451143451143, "percentage": 72.87, "elapsed_time": "0:15:24", "remaining_time": "0:05:44", "throughput": 1524.0, "total_tokens": 1408544}
|
|
{"current_steps": 3510, "total_steps": 4810, "loss": 0.2512, "lr": 1.033914800686912e-05, "epoch": 3.6486486486486487, "percentage": 72.97, "elapsed_time": "0:15:24", "remaining_time": "0:05:42", "throughput": 1525.55, "total_tokens": 1410464}
|
|
{"current_steps": 3515, "total_steps": 4810, "loss": 0.2277, "lr": 1.0265766960173965e-05, "epoch": 3.6538461538461537, "percentage": 73.08, "elapsed_time": "0:15:24", "remaining_time": "0:05:40", "throughput": 1527.17, "total_tokens": 1412448}
|
|
{"current_steps": 3520, "total_steps": 4810, "loss": 0.2869, "lr": 1.019257990897185e-05, "epoch": 3.6590436590436592, "percentage": 73.18, "elapsed_time": "0:15:25", "remaining_time": "0:05:39", "throughput": 1529.05, "total_tokens": 1414688}
|
|
{"current_steps": 3525, "total_steps": 4810, "loss": 0.2914, "lr": 1.0119587816866258e-05, "epoch": 3.6642411642411643, "percentage": 73.28, "elapsed_time": "0:15:25", "remaining_time": "0:05:37", "throughput": 1530.66, "total_tokens": 1416672}
|
|
{"current_steps": 3530, "total_steps": 4810, "loss": 0.2836, "lr": 1.0046791644893758e-05, "epoch": 3.6694386694386694, "percentage": 73.39, "elapsed_time": "0:15:25", "remaining_time": "0:05:35", "throughput": 1532.2, "total_tokens": 1418592}
|
|
{"current_steps": 3535, "total_steps": 4810, "loss": 0.2675, "lr": 9.974192351511368e-06, "epoch": 3.6746361746361744, "percentage": 73.49, "elapsed_time": "0:15:26", "remaining_time": "0:05:34", "throughput": 1533.8, "total_tokens": 1420576}
|
|
{"current_steps": 3540, "total_steps": 4810, "loss": 0.2679, "lr": 9.901790892583974e-06, "epoch": 3.67983367983368, "percentage": 73.6, "elapsed_time": "0:15:26", "remaining_time": "0:05:32", "throughput": 1535.41, "total_tokens": 1422560}
|
|
{"current_steps": 3545, "total_steps": 4810, "loss": 0.2697, "lr": 9.829588221371694e-06, "epoch": 3.685031185031185, "percentage": 73.7, "elapsed_time": "0:15:26", "remaining_time": "0:05:30", "throughput": 1537.08, "total_tokens": 1424608}
|
|
{"current_steps": 3550, "total_steps": 4810, "loss": 0.2612, "lr": 9.757585288517328e-06, "epoch": 3.6902286902286905, "percentage": 73.8, "elapsed_time": "0:15:27", "remaining_time": "0:05:29", "throughput": 1538.89, "total_tokens": 1426784}
|
|
{"current_steps": 3555, "total_steps": 4810, "loss": 0.2501, "lr": 9.6857830420339e-06, "epoch": 3.6954261954261955, "percentage": 73.91, "elapsed_time": "0:15:27", "remaining_time": "0:05:27", "throughput": 1540.63, "total_tokens": 1428896}
|
|
{"current_steps": 3560, "total_steps": 4810, "loss": 0.2586, "lr": 9.614182427292077e-06, "epoch": 3.7006237006237006, "percentage": 74.01, "elapsed_time": "0:15:27", "remaining_time": "0:05:25", "throughput": 1542.23, "total_tokens": 1430880}
|
|
{"current_steps": 3565, "total_steps": 4810, "loss": 0.2484, "lr": 9.54278438700785e-06, "epoch": 3.7058212058212057, "percentage": 74.12, "elapsed_time": "0:15:28", "remaining_time": "0:05:24", "throughput": 1543.83, "total_tokens": 1432864}
|
|
{"current_steps": 3570, "total_steps": 4810, "loss": 0.287, "lr": 9.471589861229998e-06, "epoch": 3.711018711018711, "percentage": 74.22, "elapsed_time": "0:15:28", "remaining_time": "0:05:22", "throughput": 1545.49, "total_tokens": 1434912}
|
|
{"current_steps": 3575, "total_steps": 4810, "loss": 0.3025, "lr": 9.400599787327773e-06, "epoch": 3.7162162162162162, "percentage": 74.32, "elapsed_time": "0:15:28", "remaining_time": "0:05:20", "throughput": 1547.02, "total_tokens": 1436832}
|
|
{"current_steps": 3580, "total_steps": 4810, "loss": 0.2433, "lr": 9.329815099978568e-06, "epoch": 3.7214137214137213, "percentage": 74.43, "elapsed_time": "0:15:29", "remaining_time": "0:05:19", "throughput": 1548.55, "total_tokens": 1438752}
|
|
{"current_steps": 3585, "total_steps": 4810, "loss": 0.3809, "lr": 9.259236731155582e-06, "epoch": 3.726611226611227, "percentage": 74.53, "elapsed_time": "0:15:29", "remaining_time": "0:05:17", "throughput": 1550.08, "total_tokens": 1440672}
|
|
{"current_steps": 3590, "total_steps": 4810, "loss": 0.2221, "lr": 9.18886561011557e-06, "epoch": 3.731808731808732, "percentage": 74.64, "elapsed_time": "0:15:29", "remaining_time": "0:05:15", "throughput": 1551.81, "total_tokens": 1442784}
|
|
{"current_steps": 3595, "total_steps": 4810, "loss": 0.2622, "lr": 9.118702663386584e-06, "epoch": 3.737006237006237, "percentage": 74.74, "elapsed_time": "0:15:30", "remaining_time": "0:05:14", "throughput": 1553.6, "total_tokens": 1444960}
|
|
{"current_steps": 3600, "total_steps": 4810, "loss": 0.3094, "lr": 9.048748814755784e-06, "epoch": 3.742203742203742, "percentage": 74.84, "elapsed_time": "0:15:30", "remaining_time": "0:05:12", "throughput": 1555.13, "total_tokens": 1446880}
|
|
{"current_steps": 3605, "total_steps": 4810, "loss": 0.2723, "lr": 8.979004985257294e-06, "epoch": 3.7474012474012475, "percentage": 74.95, "elapsed_time": "0:15:30", "remaining_time": "0:05:11", "throughput": 1556.85, "total_tokens": 1448992}
|
|
{"current_steps": 3610, "total_steps": 4810, "loss": 0.2755, "lr": 8.909472093160065e-06, "epoch": 3.7525987525987525, "percentage": 75.05, "elapsed_time": "0:15:31", "remaining_time": "0:05:09", "throughput": 1558.44, "total_tokens": 1450976}
|
|
{"current_steps": 3615, "total_steps": 4810, "loss": 0.2751, "lr": 8.840151053955773e-06, "epoch": 3.757796257796258, "percentage": 75.16, "elapsed_time": "0:15:31", "remaining_time": "0:05:07", "throughput": 1560.17, "total_tokens": 1453088}
|
|
{"current_steps": 3615, "total_steps": 4810, "eval_loss": 0.25871533155441284, "epoch": 3.757796257796258, "percentage": 75.16, "elapsed_time": "0:15:32", "remaining_time": "0:05:08", "throughput": 1558.41, "total_tokens": 1453088}
|
|
{"current_steps": 3620, "total_steps": 4810, "loss": 0.2834, "lr": 8.771042780346766e-06, "epoch": 3.762993762993763, "percentage": 75.26, "elapsed_time": "0:16:11", "remaining_time": "0:05:19", "throughput": 1497.07, "total_tokens": 1455136}
|
|
{"current_steps": 3625, "total_steps": 4810, "loss": 0.2472, "lr": 8.702148182234043e-06, "epoch": 3.768191268191268, "percentage": 75.36, "elapsed_time": "0:16:12", "remaining_time": "0:05:17", "throughput": 1498.62, "total_tokens": 1457120}
|
|
{"current_steps": 3630, "total_steps": 4810, "loss": 0.2772, "lr": 8.633468166705336e-06, "epoch": 3.773388773388773, "percentage": 75.47, "elapsed_time": "0:16:12", "remaining_time": "0:05:16", "throughput": 1500.23, "total_tokens": 1459168}
|
|
{"current_steps": 3635, "total_steps": 4810, "loss": 0.2651, "lr": 8.565003638023065e-06, "epoch": 3.7785862785862787, "percentage": 75.57, "elapsed_time": "0:16:12", "remaining_time": "0:05:14", "throughput": 1501.78, "total_tokens": 1461152}
|
|
{"current_steps": 3640, "total_steps": 4810, "loss": 0.2756, "lr": 8.496755497612492e-06, "epoch": 3.7837837837837838, "percentage": 75.68, "elapsed_time": "0:16:13", "remaining_time": "0:05:12", "throughput": 1503.32, "total_tokens": 1463136}
|
|
{"current_steps": 3645, "total_steps": 4810, "loss": 0.2693, "lr": 8.42872464404986e-06, "epoch": 3.788981288981289, "percentage": 75.78, "elapsed_time": "0:16:13", "remaining_time": "0:05:11", "throughput": 1504.86, "total_tokens": 1465120}
|
|
{"current_steps": 3650, "total_steps": 4810, "loss": 0.2816, "lr": 8.360911973050537e-06, "epoch": 3.7941787941787943, "percentage": 75.88, "elapsed_time": "0:16:13", "remaining_time": "0:05:09", "throughput": 1506.4, "total_tokens": 1467104}
|
|
{"current_steps": 3655, "total_steps": 4810, "loss": 0.2571, "lr": 8.293318377457241e-06, "epoch": 3.7993762993762994, "percentage": 75.99, "elapsed_time": "0:16:14", "remaining_time": "0:05:07", "throughput": 1508.01, "total_tokens": 1469152}
|
|
{"current_steps": 3660, "total_steps": 4810, "loss": 0.268, "lr": 8.225944747228257e-06, "epoch": 3.8045738045738045, "percentage": 76.09, "elapsed_time": "0:16:14", "remaining_time": "0:05:06", "throughput": 1509.67, "total_tokens": 1471264}
|
|
{"current_steps": 3665, "total_steps": 4810, "loss": 0.2128, "lr": 8.158791969425738e-06, "epoch": 3.8097713097713095, "percentage": 76.2, "elapsed_time": "0:16:14", "remaining_time": "0:05:04", "throughput": 1511.21, "total_tokens": 1473248}
|
|
{"current_steps": 3670, "total_steps": 4810, "loss": 0.3101, "lr": 8.091860928204049e-06, "epoch": 3.814968814968815, "percentage": 76.3, "elapsed_time": "0:16:15", "remaining_time": "0:05:02", "throughput": 1512.87, "total_tokens": 1475360}
|
|
{"current_steps": 3675, "total_steps": 4810, "loss": 0.3044, "lr": 8.025152504798078e-06, "epoch": 3.82016632016632, "percentage": 76.4, "elapsed_time": "0:16:15", "remaining_time": "0:05:01", "throughput": 1514.54, "total_tokens": 1477472}
|
|
{"current_steps": 3680, "total_steps": 4810, "loss": 0.2471, "lr": 7.958667577511683e-06, "epoch": 3.8253638253638256, "percentage": 76.51, "elapsed_time": "0:16:15", "remaining_time": "0:04:59", "throughput": 1515.93, "total_tokens": 1479328}
|
|
{"current_steps": 3685, "total_steps": 4810, "loss": 0.2552, "lr": 7.892407021706063e-06, "epoch": 3.8305613305613306, "percentage": 76.61, "elapsed_time": "0:16:16", "remaining_time": "0:04:58", "throughput": 1517.4, "total_tokens": 1481248}
|
|
{"current_steps": 3690, "total_steps": 4810, "loss": 0.3112, "lr": 7.826371709788313e-06, "epoch": 3.8357588357588357, "percentage": 76.72, "elapsed_time": "0:16:16", "remaining_time": "0:04:56", "throughput": 1518.86, "total_tokens": 1483168}
|
|
{"current_steps": 3695, "total_steps": 4810, "loss": 0.2585, "lr": 7.760562511199882e-06, "epoch": 3.8409563409563408, "percentage": 76.82, "elapsed_time": "0:16:16", "remaining_time": "0:04:54", "throughput": 1520.39, "total_tokens": 1485152}
|
|
{"current_steps": 3700, "total_steps": 4810, "loss": 0.2673, "lr": 7.694980292405122e-06, "epoch": 3.8461538461538463, "percentage": 76.92, "elapsed_time": "0:16:17", "remaining_time": "0:04:53", "throughput": 1521.98, "total_tokens": 1487200}
|
|
{"current_steps": 3705, "total_steps": 4810, "loss": 0.2763, "lr": 7.629625916879932e-06, "epoch": 3.8513513513513513, "percentage": 77.03, "elapsed_time": "0:16:17", "remaining_time": "0:04:51", "throughput": 1523.5, "total_tokens": 1489184}
|
|
{"current_steps": 3710, "total_steps": 4810, "loss": 0.278, "lr": 7.564500245100325e-06, "epoch": 3.856548856548857, "percentage": 77.13, "elapsed_time": "0:16:17", "remaining_time": "0:04:49", "throughput": 1525.03, "total_tokens": 1491168}
|
|
{"current_steps": 3715, "total_steps": 4810, "loss": 0.2727, "lr": 7.499604134531149e-06, "epoch": 3.861746361746362, "percentage": 77.23, "elapsed_time": "0:16:18", "remaining_time": "0:04:48", "throughput": 1526.62, "total_tokens": 1493216}
|
|
{"current_steps": 3720, "total_steps": 4810, "loss": 0.2667, "lr": 7.434938439614781e-06, "epoch": 3.866943866943867, "percentage": 77.34, "elapsed_time": "0:16:18", "remaining_time": "0:04:46", "throughput": 1528.14, "total_tokens": 1495200}
|
|
{"current_steps": 3725, "total_steps": 4810, "loss": 0.2707, "lr": 7.370504011759855e-06, "epoch": 3.872141372141372, "percentage": 77.44, "elapsed_time": "0:16:18", "remaining_time": "0:04:45", "throughput": 1529.66, "total_tokens": 1497184}
|
|
{"current_steps": 3730, "total_steps": 4810, "loss": 0.2656, "lr": 7.306301699330065e-06, "epoch": 3.8773388773388775, "percentage": 77.55, "elapsed_time": "0:16:19", "remaining_time": "0:04:43", "throughput": 1531.05, "total_tokens": 1499040}
|
|
{"current_steps": 3735, "total_steps": 4810, "loss": 0.2423, "lr": 7.242332347633052e-06, "epoch": 3.8825363825363826, "percentage": 77.65, "elapsed_time": "0:16:19", "remaining_time": "0:04:41", "throughput": 1532.57, "total_tokens": 1501024}
|
|
{"current_steps": 3740, "total_steps": 4810, "loss": 0.2487, "lr": 7.178596798909159e-06, "epoch": 3.8877338877338876, "percentage": 77.75, "elapsed_time": "0:16:19", "remaining_time": "0:04:40", "throughput": 1534.15, "total_tokens": 1503072}
|
|
{"current_steps": 3745, "total_steps": 4810, "loss": 0.2847, "lr": 7.115095892320456e-06, "epoch": 3.892931392931393, "percentage": 77.86, "elapsed_time": "0:16:20", "remaining_time": "0:04:38", "throughput": 1535.86, "total_tokens": 1505248}
|
|
{"current_steps": 3750, "total_steps": 4810, "loss": 0.2334, "lr": 7.051830463939604e-06, "epoch": 3.898128898128898, "percentage": 77.96, "elapsed_time": "0:16:20", "remaining_time": "0:04:37", "throughput": 1537.44, "total_tokens": 1507296}
|
|
{"current_steps": 3755, "total_steps": 4810, "loss": 0.2595, "lr": 6.98880134673891e-06, "epoch": 3.9033264033264032, "percentage": 78.07, "elapsed_time": "0:16:20", "remaining_time": "0:04:35", "throughput": 1539.02, "total_tokens": 1509344}
|
|
{"current_steps": 3760, "total_steps": 4810, "loss": 0.2098, "lr": 6.926009370579334e-06, "epoch": 3.9085239085239083, "percentage": 78.17, "elapsed_time": "0:16:21", "remaining_time": "0:04:33", "throughput": 1540.66, "total_tokens": 1511456}
|
|
{"current_steps": 3765, "total_steps": 4810, "loss": 0.2277, "lr": 6.8634553621995416e-06, "epoch": 3.913721413721414, "percentage": 78.27, "elapsed_time": "0:16:21", "remaining_time": "0:04:32", "throughput": 1542.17, "total_tokens": 1513440}
|
|
{"current_steps": 3770, "total_steps": 4810, "loss": 0.3251, "lr": 6.80114014520507e-06, "epoch": 3.918918918918919, "percentage": 78.38, "elapsed_time": "0:16:21", "remaining_time": "0:04:30", "throughput": 1543.74, "total_tokens": 1515488}
|
|
{"current_steps": 3775, "total_steps": 4810, "loss": 0.2604, "lr": 6.739064540057424e-06, "epoch": 3.9241164241164244, "percentage": 78.48, "elapsed_time": "0:16:22", "remaining_time": "0:04:29", "throughput": 1545.19, "total_tokens": 1517408}
|
|
{"current_steps": 3780, "total_steps": 4810, "loss": 0.2458, "lr": 6.677229364063328e-06, "epoch": 3.9293139293139294, "percentage": 78.59, "elapsed_time": "0:16:22", "remaining_time": "0:04:27", "throughput": 1546.7, "total_tokens": 1519392}
|
|
{"current_steps": 3785, "total_steps": 4810, "loss": 0.2596, "lr": 6.615635431363942e-06, "epoch": 3.9345114345114345, "percentage": 78.69, "elapsed_time": "0:16:22", "remaining_time": "0:04:26", "throughput": 1548.27, "total_tokens": 1521440}
|
|
{"current_steps": 3790, "total_steps": 4810, "loss": 0.2766, "lr": 6.554283552924118e-06, "epoch": 3.9397089397089395, "percentage": 78.79, "elapsed_time": "0:16:22", "remaining_time": "0:04:24", "throughput": 1549.84, "total_tokens": 1523488}
|
|
{"current_steps": 3795, "total_steps": 4810, "loss": 0.2551, "lr": 6.493174536521768e-06, "epoch": 3.944906444906445, "percentage": 78.9, "elapsed_time": "0:16:23", "remaining_time": "0:04:22", "throughput": 1551.48, "total_tokens": 1525600}
|
|
{"current_steps": 3800, "total_steps": 4810, "loss": 0.2612, "lr": 6.4323091867372095e-06, "epoch": 3.95010395010395, "percentage": 79.0, "elapsed_time": "0:16:23", "remaining_time": "0:04:21", "throughput": 1552.98, "total_tokens": 1527584}
|
|
{"current_steps": 3805, "total_steps": 4810, "loss": 0.2575, "lr": 6.371688304942544e-06, "epoch": 3.955301455301455, "percentage": 79.11, "elapsed_time": "0:16:23", "remaining_time": "0:04:19", "throughput": 1554.42, "total_tokens": 1529504}
|
|
{"current_steps": 3810, "total_steps": 4810, "loss": 0.2897, "lr": 6.311312689291166e-06, "epoch": 3.9604989604989607, "percentage": 79.21, "elapsed_time": "0:16:24", "remaining_time": "0:04:18", "throughput": 1555.86, "total_tokens": 1531424}
|
|
{"current_steps": 3815, "total_steps": 4810, "loss": 0.2366, "lr": 6.251183134707184e-06, "epoch": 3.9656964656964657, "percentage": 79.31, "elapsed_time": "0:16:24", "remaining_time": "0:04:16", "throughput": 1557.36, "total_tokens": 1533408}
|
|
{"current_steps": 3820, "total_steps": 4810, "loss": 0.2668, "lr": 6.191300432875017e-06, "epoch": 3.970893970893971, "percentage": 79.42, "elapsed_time": "0:16:24", "remaining_time": "0:04:15", "throughput": 1558.87, "total_tokens": 1535392}
|
|
{"current_steps": 3825, "total_steps": 4810, "loss": 0.1661, "lr": 6.13166537222894e-06, "epoch": 3.976091476091476, "percentage": 79.52, "elapsed_time": "0:16:25", "remaining_time": "0:04:13", "throughput": 1560.3, "total_tokens": 1537312}
|
|
{"current_steps": 3830, "total_steps": 4810, "loss": 0.28, "lr": 6.072278737942691e-06, "epoch": 3.9812889812889813, "percentage": 79.63, "elapsed_time": "0:16:25", "remaining_time": "0:04:12", "throughput": 1561.87, "total_tokens": 1539360}
|
|
{"current_steps": 3835, "total_steps": 4810, "loss": 0.3038, "lr": 6.0131413119191685e-06, "epoch": 3.9864864864864864, "percentage": 79.73, "elapsed_time": "0:16:25", "remaining_time": "0:04:10", "throughput": 1563.3, "total_tokens": 1541280}
|
|
{"current_steps": 3840, "total_steps": 4810, "loss": 0.2598, "lr": 5.954253872780102e-06, "epoch": 3.991683991683992, "percentage": 79.83, "elapsed_time": "0:16:26", "remaining_time": "0:04:09", "throughput": 1564.67, "total_tokens": 1543136}
|
|
{"current_steps": 3845, "total_steps": 4810, "loss": 0.2387, "lr": 5.8956171958558266e-06, "epoch": 3.996881496881497, "percentage": 79.94, "elapsed_time": "0:16:26", "remaining_time": "0:04:07", "throughput": 1566.17, "total_tokens": 1545120}
|
|
{"current_steps": 3850, "total_steps": 4810, "loss": 0.2258, "lr": 5.8372320531750655e-06, "epoch": 4.002079002079002, "percentage": 80.04, "elapsed_time": "0:16:26", "remaining_time": "0:04:06", "throughput": 1567.44, "total_tokens": 1547056}
|
|
{"current_steps": 3855, "total_steps": 4810, "loss": 0.279, "lr": 5.77909921345475e-06, "epoch": 4.007276507276507, "percentage": 80.15, "elapsed_time": "0:16:27", "remaining_time": "0:04:04", "throughput": 1568.87, "total_tokens": 1548976}
|
|
{"current_steps": 3856, "total_steps": 4810, "eval_loss": 0.25588732957839966, "epoch": 4.008316008316008, "percentage": 80.17, "elapsed_time": "0:16:28", "remaining_time": "0:04:04", "throughput": 1567.47, "total_tokens": 1549360}
|
|
{"current_steps": 3860, "total_steps": 4810, "loss": 0.2636, "lr": 5.721219442089926e-06, "epoch": 4.012474012474012, "percentage": 80.25, "elapsed_time": "0:16:53", "remaining_time": "0:04:09", "throughput": 1529.55, "total_tokens": 1550960}
|
|
{"current_steps": 3865, "total_steps": 4810, "loss": 0.2772, "lr": 5.663593501143663e-06, "epoch": 4.017671517671518, "percentage": 80.35, "elapsed_time": "0:16:54", "remaining_time": "0:04:08", "throughput": 1531.0, "total_tokens": 1552944}
|
|
{"current_steps": 3870, "total_steps": 4810, "loss": 0.2654, "lr": 5.6062221493370035e-06, "epoch": 4.022869022869023, "percentage": 80.46, "elapsed_time": "0:16:54", "remaining_time": "0:04:06", "throughput": 1532.51, "total_tokens": 1554992}
|
|
{"current_steps": 3875, "total_steps": 4810, "loss": 0.2682, "lr": 5.549106142039018e-06, "epoch": 4.028066528066528, "percentage": 80.56, "elapsed_time": "0:16:55", "remaining_time": "0:04:04", "throughput": 1534.07, "total_tokens": 1557104}
|
|
{"current_steps": 3880, "total_steps": 4810, "loss": 0.2818, "lr": 5.492246231256798e-06, "epoch": 4.033264033264033, "percentage": 80.67, "elapsed_time": "0:16:55", "remaining_time": "0:04:03", "throughput": 1535.52, "total_tokens": 1559088}
|
|
{"current_steps": 3885, "total_steps": 4810, "loss": 0.2739, "lr": 5.435643165625614e-06, "epoch": 4.038461538461538, "percentage": 80.77, "elapsed_time": "0:16:55", "remaining_time": "0:04:01", "throughput": 1536.9, "total_tokens": 1561008}
|
|
{"current_steps": 3890, "total_steps": 4810, "loss": 0.273, "lr": 5.379297690399035e-06, "epoch": 4.043659043659043, "percentage": 80.87, "elapsed_time": "0:16:56", "remaining_time": "0:04:00", "throughput": 1538.41, "total_tokens": 1563056}
|
|
{"current_steps": 3895, "total_steps": 4810, "loss": 0.2571, "lr": 5.3232105474390895e-06, "epoch": 4.048856548856548, "percentage": 80.98, "elapsed_time": "0:16:56", "remaining_time": "0:03:58", "throughput": 1539.85, "total_tokens": 1565040}
|
|
{"current_steps": 3900, "total_steps": 4810, "loss": 0.2529, "lr": 5.267382475206548e-06, "epoch": 4.054054054054054, "percentage": 81.08, "elapsed_time": "0:16:56", "remaining_time": "0:03:57", "throughput": 1541.29, "total_tokens": 1567024}
|
|
{"current_steps": 3905, "total_steps": 4810, "loss": 0.264, "lr": 5.2118142087511705e-06, "epoch": 4.0592515592515594, "percentage": 81.19, "elapsed_time": "0:16:57", "remaining_time": "0:03:55", "throughput": 1542.85, "total_tokens": 1569136}
|
|
{"current_steps": 3910, "total_steps": 4810, "loss": 0.2638, "lr": 5.156506479702019e-06, "epoch": 4.0644490644490645, "percentage": 81.29, "elapsed_time": "0:16:57", "remaining_time": "0:03:54", "throughput": 1544.3, "total_tokens": 1571120}
|
|
{"current_steps": 3915, "total_steps": 4810, "loss": 0.2652, "lr": 5.101460016257859e-06, "epoch": 4.06964656964657, "percentage": 81.39, "elapsed_time": "0:16:57", "remaining_time": "0:03:52", "throughput": 1545.7, "total_tokens": 1573040}
|
|
{"current_steps": 3920, "total_steps": 4810, "loss": 0.2582, "lr": 5.0466755431775316e-06, "epoch": 4.074844074844075, "percentage": 81.5, "elapsed_time": "0:16:58", "remaining_time": "0:03:51", "throughput": 1547.04, "total_tokens": 1574896}
|
|
{"current_steps": 3925, "total_steps": 4810, "loss": 0.2618, "lr": 4.992153781770448e-06, "epoch": 4.08004158004158, "percentage": 81.6, "elapsed_time": "0:16:58", "remaining_time": "0:03:49", "throughput": 1548.5, "total_tokens": 1576880}
|
|
{"current_steps": 3930, "total_steps": 4810, "loss": 0.2455, "lr": 4.937895449887075e-06, "epoch": 4.085239085239086, "percentage": 81.7, "elapsed_time": "0:16:58", "remaining_time": "0:03:48", "throughput": 1549.96, "total_tokens": 1578864}
|
|
{"current_steps": 3935, "total_steps": 4810, "loss": 0.2813, "lr": 4.883901261909465e-06, "epoch": 4.090436590436591, "percentage": 81.81, "elapsed_time": "0:16:58", "remaining_time": "0:03:46", "throughput": 1551.42, "total_tokens": 1580848}
|
|
{"current_steps": 3940, "total_steps": 4810, "loss": 0.2815, "lr": 4.8301719287419e-06, "epoch": 4.095634095634096, "percentage": 81.91, "elapsed_time": "0:16:59", "remaining_time": "0:03:45", "throughput": 1552.75, "total_tokens": 1582704}
|
|
{"current_steps": 3945, "total_steps": 4810, "loss": 0.2796, "lr": 4.776708157801463e-06, "epoch": 4.100831600831601, "percentage": 82.02, "elapsed_time": "0:16:59", "remaining_time": "0:03:43", "throughput": 1554.34, "total_tokens": 1584816}
|
|
{"current_steps": 3950, "total_steps": 4810, "loss": 0.2491, "lr": 4.7235106530088085e-06, "epoch": 4.106029106029106, "percentage": 82.12, "elapsed_time": "0:16:59", "remaining_time": "0:03:42", "throughput": 1555.79, "total_tokens": 1586800}
|
|
{"current_steps": 3955, "total_steps": 4810, "loss": 0.2528, "lr": 4.670580114778813e-06, "epoch": 4.111226611226611, "percentage": 82.22, "elapsed_time": "0:17:00", "remaining_time": "0:03:40", "throughput": 1557.19, "total_tokens": 1588720}
|
|
{"current_steps": 3960, "total_steps": 4810, "loss": 0.2732, "lr": 4.617917240011394e-06, "epoch": 4.116424116424117, "percentage": 82.33, "elapsed_time": "0:17:00", "remaining_time": "0:03:39", "throughput": 1558.52, "total_tokens": 1590576}
|
|
{"current_steps": 3965, "total_steps": 4810, "loss": 0.2573, "lr": 4.565522722082336e-06, "epoch": 4.121621621621622, "percentage": 82.43, "elapsed_time": "0:17:00", "remaining_time": "0:03:37", "throughput": 1559.91, "total_tokens": 1592496}
|
|
{"current_steps": 3970, "total_steps": 4810, "loss": 0.2638, "lr": 4.513397250834159e-06, "epoch": 4.126819126819127, "percentage": 82.54, "elapsed_time": "0:17:01", "remaining_time": "0:03:36", "throughput": 1561.43, "total_tokens": 1594544}
|
|
{"current_steps": 3975, "total_steps": 4810, "loss": 0.2925, "lr": 4.461541512567011e-06, "epoch": 4.132016632016632, "percentage": 82.64, "elapsed_time": "0:17:01", "remaining_time": "0:03:34", "throughput": 1562.76, "total_tokens": 1596400}
|
|
{"current_steps": 3980, "total_steps": 4810, "loss": 0.2786, "lr": 4.409956190029674e-06, "epoch": 4.137214137214137, "percentage": 82.74, "elapsed_time": "0:17:01", "remaining_time": "0:03:33", "throughput": 1564.15, "total_tokens": 1598320}
|
|
{"current_steps": 3985, "total_steps": 4810, "loss": 0.2286, "lr": 4.358641962410537e-06, "epoch": 4.142411642411642, "percentage": 82.85, "elapsed_time": "0:17:02", "remaining_time": "0:03:31", "throughput": 1565.65, "total_tokens": 1600368}
|
|
{"current_steps": 3990, "total_steps": 4810, "loss": 0.2986, "lr": 4.307599505328672e-06, "epoch": 4.147609147609147, "percentage": 82.95, "elapsed_time": "0:17:02", "remaining_time": "0:03:30", "throughput": 1567.1, "total_tokens": 1602352}
|
|
{"current_steps": 3995, "total_steps": 4810, "loss": 0.2363, "lr": 4.256829490824949e-06, "epoch": 4.152806652806653, "percentage": 83.06, "elapsed_time": "0:17:02", "remaining_time": "0:03:28", "throughput": 1568.54, "total_tokens": 1604336}
|
|
{"current_steps": 4000, "total_steps": 4810, "loss": 0.2652, "lr": 4.206332587353149e-06, "epoch": 4.158004158004158, "percentage": 83.16, "elapsed_time": "0:17:03", "remaining_time": "0:03:27", "throughput": 1569.92, "total_tokens": 1606256}
|
|
{"current_steps": 4005, "total_steps": 4810, "loss": 0.2641, "lr": 4.1561094597712155e-06, "epoch": 4.163201663201663, "percentage": 83.26, "elapsed_time": "0:17:03", "remaining_time": "0:03:25", "throughput": 1571.42, "total_tokens": 1608304}
|
|
{"current_steps": 4010, "total_steps": 4810, "loss": 0.2415, "lr": 4.106160769332443e-06, "epoch": 4.168399168399168, "percentage": 83.37, "elapsed_time": "0:17:03", "remaining_time": "0:03:24", "throughput": 1573.04, "total_tokens": 1610480}
|
|
{"current_steps": 4015, "total_steps": 4810, "loss": 0.2449, "lr": 4.056487173676843e-06, "epoch": 4.173596673596673, "percentage": 83.47, "elapsed_time": "0:17:04", "remaining_time": "0:03:22", "throughput": 1574.54, "total_tokens": 1612528}
|
|
{"current_steps": 4020, "total_steps": 4810, "loss": 0.2742, "lr": 4.007089326822405e-06, "epoch": 4.1787941787941785, "percentage": 83.58, "elapsed_time": "0:17:04", "remaining_time": "0:03:21", "throughput": 1576.04, "total_tokens": 1614576}
|
|
{"current_steps": 4025, "total_steps": 4810, "loss": 0.264, "lr": 3.957967879156533e-06, "epoch": 4.183991683991684, "percentage": 83.68, "elapsed_time": "0:17:04", "remaining_time": "0:03:19", "throughput": 1577.54, "total_tokens": 1616624}
|
|
{"current_steps": 4030, "total_steps": 4810, "loss": 0.2548, "lr": 3.909123477427487e-06, "epoch": 4.1891891891891895, "percentage": 83.78, "elapsed_time": "0:17:05", "remaining_time": "0:03:18", "throughput": 1579.03, "total_tokens": 1618672}
|
|
{"current_steps": 4035, "total_steps": 4810, "loss": 0.2113, "lr": 3.860556764735842e-06, "epoch": 4.1943866943866945, "percentage": 83.89, "elapsed_time": "0:17:05", "remaining_time": "0:03:16", "throughput": 1580.59, "total_tokens": 1620784}
|
|
{"current_steps": 4040, "total_steps": 4810, "loss": 0.3261, "lr": 3.812268380526046e-06, "epoch": 4.1995841995842, "percentage": 83.99, "elapsed_time": "0:17:05", "remaining_time": "0:03:15", "throughput": 1582.02, "total_tokens": 1622768}
|
|
{"current_steps": 4045, "total_steps": 4810, "loss": 0.2481, "lr": 3.764258960577971e-06, "epoch": 4.204781704781705, "percentage": 84.1, "elapsed_time": "0:17:06", "remaining_time": "0:03:14", "throughput": 1583.39, "total_tokens": 1624688}
|
|
{"current_steps": 4050, "total_steps": 4810, "loss": 0.2599, "lr": 3.7165291369985618e-06, "epoch": 4.20997920997921, "percentage": 84.2, "elapsed_time": "0:17:06", "remaining_time": "0:03:12", "throughput": 1584.82, "total_tokens": 1626672}
|
|
{"current_steps": 4055, "total_steps": 4810, "loss": 0.258, "lr": 3.6690795382135186e-06, "epoch": 4.215176715176715, "percentage": 84.3, "elapsed_time": "0:17:06", "remaining_time": "0:03:11", "throughput": 1586.43, "total_tokens": 1628848}
|
|
{"current_steps": 4060, "total_steps": 4810, "loss": 0.2809, "lr": 3.6219107889590155e-06, "epoch": 4.220374220374221, "percentage": 84.41, "elapsed_time": "0:17:07", "remaining_time": "0:03:09", "throughput": 1587.86, "total_tokens": 1630832}
|
|
{"current_steps": 4065, "total_steps": 4810, "loss": 0.2292, "lr": 3.575023510273462e-06, "epoch": 4.225571725571726, "percentage": 84.51, "elapsed_time": "0:17:07", "remaining_time": "0:03:08", "throughput": 1589.34, "total_tokens": 1632880}
|
|
{"current_steps": 4070, "total_steps": 4810, "loss": 0.2871, "lr": 3.5284183194893488e-06, "epoch": 4.230769230769231, "percentage": 84.62, "elapsed_time": "0:17:07", "remaining_time": "0:03:06", "throughput": 1590.88, "total_tokens": 1634992}
|
|
{"current_steps": 4075, "total_steps": 4810, "loss": 0.2634, "lr": 3.48209583022511e-06, "epoch": 4.235966735966736, "percentage": 84.72, "elapsed_time": "0:17:08", "remaining_time": "0:03:05", "throughput": 1592.24, "total_tokens": 1636912}
|
|
{"current_steps": 4080, "total_steps": 4810, "loss": 0.2477, "lr": 3.4360566523770426e-06, "epoch": 4.241164241164241, "percentage": 84.82, "elapsed_time": "0:17:08", "remaining_time": "0:03:04", "throughput": 1593.59, "total_tokens": 1638832}
|
|
{"current_steps": 4085, "total_steps": 4810, "loss": 0.2351, "lr": 3.3903013921112755e-06, "epoch": 4.246361746361746, "percentage": 84.93, "elapsed_time": "0:17:08", "remaining_time": "0:03:02", "throughput": 1595.25, "total_tokens": 1641072}
|
|
{"current_steps": 4090, "total_steps": 4810, "loss": 0.2808, "lr": 3.3448306518557795e-06, "epoch": 4.251559251559252, "percentage": 85.03, "elapsed_time": "0:17:09", "remaining_time": "0:03:01", "throughput": 1596.61, "total_tokens": 1642992}
|
|
{"current_steps": 4095, "total_steps": 4810, "loss": 0.2511, "lr": 3.299645030292467e-06, "epoch": 4.256756756756757, "percentage": 85.14, "elapsed_time": "0:17:09", "remaining_time": "0:02:59", "throughput": 1598.09, "total_tokens": 1645040}
|
|
{"current_steps": 4097, "total_steps": 4810, "eval_loss": 0.2517484128475189, "epoch": 4.258835758835759, "percentage": 85.18, "elapsed_time": "0:17:10", "remaining_time": "0:02:59", "throughput": 1597.01, "total_tokens": 1645808}
|
|
{"current_steps": 4100, "total_steps": 4810, "loss": 0.2495, "lr": 3.2547451223492786e-06, "epoch": 4.261954261954262, "percentage": 85.24, "elapsed_time": "0:17:59", "remaining_time": "0:03:06", "throughput": 1526.33, "total_tokens": 1647024}
|
|
{"current_steps": 4105, "total_steps": 4810, "loss": 0.2774, "lr": 3.2101315191923663e-06, "epoch": 4.267151767151767, "percentage": 85.34, "elapsed_time": "0:17:59", "remaining_time": "0:03:05", "throughput": 1527.71, "total_tokens": 1649008}
|
|
{"current_steps": 4110, "total_steps": 4810, "loss": 0.2208, "lr": 3.165804808218292e-06, "epoch": 4.272349272349272, "percentage": 85.45, "elapsed_time": "0:17:59", "remaining_time": "0:03:03", "throughput": 1529.14, "total_tokens": 1651056}
|
|
{"current_steps": 4115, "total_steps": 4810, "loss": 0.2601, "lr": 3.1217655730463093e-06, "epoch": 4.277546777546777, "percentage": 85.55, "elapsed_time": "0:18:00", "remaining_time": "0:03:02", "throughput": 1530.58, "total_tokens": 1653104}
|
|
{"current_steps": 4120, "total_steps": 4810, "loss": 0.2427, "lr": 3.078014393510695e-06, "epoch": 4.282744282744282, "percentage": 85.65, "elapsed_time": "0:18:00", "remaining_time": "0:03:00", "throughput": 1532.18, "total_tokens": 1655344}
|
|
{"current_steps": 4125, "total_steps": 4810, "loss": 0.2959, "lr": 3.0345518456530665e-06, "epoch": 4.287941787941788, "percentage": 85.76, "elapsed_time": "0:18:00", "remaining_time": "0:02:59", "throughput": 1533.62, "total_tokens": 1657392}
|
|
{"current_steps": 4130, "total_steps": 4810, "loss": 0.2807, "lr": 2.991378501714856e-06, "epoch": 4.293139293139293, "percentage": 85.86, "elapsed_time": "0:18:01", "remaining_time": "0:02:57", "throughput": 1534.93, "total_tokens": 1659312}
|
|
{"current_steps": 4135, "total_steps": 4810, "loss": 0.2579, "lr": 2.9484949301297166e-06, "epoch": 4.298336798336798, "percentage": 85.97, "elapsed_time": "0:18:01", "remaining_time": "0:02:56", "throughput": 1536.42, "total_tokens": 1661424}
|
|
{"current_steps": 4140, "total_steps": 4810, "loss": 0.2498, "lr": 2.9059016955160916e-06, "epoch": 4.303534303534303, "percentage": 86.07, "elapsed_time": "0:18:01", "remaining_time": "0:02:55", "throughput": 1537.79, "total_tokens": 1663408}
|
|
{"current_steps": 4145, "total_steps": 4810, "loss": 0.226, "lr": 2.8635993586697553e-06, "epoch": 4.3087318087318085, "percentage": 86.17, "elapsed_time": "0:18:02", "remaining_time": "0:02:53", "throughput": 1539.11, "total_tokens": 1665328}
|
|
{"current_steps": 4150, "total_steps": 4810, "loss": 0.2687, "lr": 2.8215884765564193e-06, "epoch": 4.313929313929314, "percentage": 86.28, "elapsed_time": "0:18:02", "remaining_time": "0:02:52", "throughput": 1540.48, "total_tokens": 1667312}
|
|
{"current_steps": 4155, "total_steps": 4810, "loss": 0.1693, "lr": 2.7798696023044163e-06, "epoch": 4.3191268191268195, "percentage": 86.38, "elapsed_time": "0:18:02", "remaining_time": "0:02:50", "throughput": 1541.85, "total_tokens": 1669296}
|
|
{"current_steps": 4160, "total_steps": 4810, "loss": 0.2217, "lr": 2.73844328519742e-06, "epoch": 4.324324324324325, "percentage": 86.49, "elapsed_time": "0:18:02", "remaining_time": "0:02:49", "throughput": 1543.21, "total_tokens": 1671280}
|
|
{"current_steps": 4165, "total_steps": 4810, "loss": 0.2814, "lr": 2.6973100706672e-06, "epoch": 4.32952182952183, "percentage": 86.59, "elapsed_time": "0:18:03", "remaining_time": "0:02:47", "throughput": 1544.76, "total_tokens": 1673456}
|
|
{"current_steps": 4170, "total_steps": 4810, "loss": 0.2523, "lr": 2.656470500286451e-06, "epoch": 4.334719334719335, "percentage": 86.69, "elapsed_time": "0:18:03", "remaining_time": "0:02:46", "throughput": 1546.18, "total_tokens": 1675504}
|
|
{"current_steps": 4175, "total_steps": 4810, "loss": 0.228, "lr": 2.615925111761647e-06, "epoch": 4.33991683991684, "percentage": 86.8, "elapsed_time": "0:18:03", "remaining_time": "0:02:44", "throughput": 1547.55, "total_tokens": 1677488}
|
|
{"current_steps": 4180, "total_steps": 4810, "loss": 0.2399, "lr": 2.5756744389259734e-06, "epoch": 4.345114345114345, "percentage": 86.9, "elapsed_time": "0:18:04", "remaining_time": "0:02:43", "throughput": 1548.98, "total_tokens": 1679536}
|
|
{"current_steps": 4185, "total_steps": 4810, "loss": 0.2345, "lr": 2.535719011732321e-06, "epoch": 4.350311850311851, "percentage": 87.01, "elapsed_time": "0:18:04", "remaining_time": "0:02:41", "throughput": 1550.34, "total_tokens": 1681520}
|
|
{"current_steps": 4190, "total_steps": 4810, "loss": 0.2726, "lr": 2.49605935624625e-06, "epoch": 4.355509355509356, "percentage": 87.11, "elapsed_time": "0:18:04", "remaining_time": "0:02:40", "throughput": 1551.77, "total_tokens": 1683568}
|
|
{"current_steps": 4195, "total_steps": 4810, "loss": 0.236, "lr": 2.4566959946391243e-06, "epoch": 4.360706860706861, "percentage": 87.21, "elapsed_time": "0:18:05", "remaining_time": "0:02:39", "throughput": 1553.07, "total_tokens": 1685488}
|
|
{"current_steps": 4200, "total_steps": 4810, "loss": 0.2476, "lr": 2.417629445181194e-06, "epoch": 4.365904365904366, "percentage": 87.32, "elapsed_time": "0:18:05", "remaining_time": "0:02:37", "throughput": 1554.38, "total_tokens": 1687408}
|
|
{"current_steps": 4205, "total_steps": 4810, "loss": 0.2325, "lr": 2.378860222234794e-06, "epoch": 4.371101871101871, "percentage": 87.42, "elapsed_time": "0:18:05", "remaining_time": "0:02:36", "throughput": 1555.85, "total_tokens": 1689520}
|
|
{"current_steps": 4210, "total_steps": 4810, "loss": 0.2749, "lr": 2.3403888362475782e-06, "epoch": 4.376299376299376, "percentage": 87.53, "elapsed_time": "0:18:06", "remaining_time": "0:02:34", "throughput": 1557.27, "total_tokens": 1691568}
|
|
{"current_steps": 4215, "total_steps": 4810, "loss": 0.2235, "lr": 2.3022157937457627e-06, "epoch": 4.381496881496881, "percentage": 87.63, "elapsed_time": "0:18:06", "remaining_time": "0:02:33", "throughput": 1558.69, "total_tokens": 1693616}
|
|
{"current_steps": 4220, "total_steps": 4810, "loss": 0.2369, "lr": 2.2643415973275016e-06, "epoch": 4.386694386694387, "percentage": 87.73, "elapsed_time": "0:18:06", "remaining_time": "0:02:31", "throughput": 1560.05, "total_tokens": 1695600}
|
|
{"current_steps": 4225, "total_steps": 4810, "loss": 0.285, "lr": 2.2267667456562307e-06, "epoch": 4.391891891891892, "percentage": 87.84, "elapsed_time": "0:18:07", "remaining_time": "0:02:30", "throughput": 1561.41, "total_tokens": 1697584}
|
|
{"current_steps": 4230, "total_steps": 4810, "loss": 0.2273, "lr": 2.1894917334541354e-06, "epoch": 4.397089397089397, "percentage": 87.94, "elapsed_time": "0:18:07", "remaining_time": "0:02:29", "throughput": 1562.77, "total_tokens": 1699568}
|
|
{"current_steps": 4235, "total_steps": 4810, "loss": 0.2953, "lr": 2.15251705149562e-06, "epoch": 4.402286902286902, "percentage": 88.05, "elapsed_time": "0:18:07", "remaining_time": "0:02:27", "throughput": 1564.3, "total_tokens": 1701744}
|
|
{"current_steps": 4240, "total_steps": 4810, "loss": 0.2404, "lr": 2.11584318660083e-06, "epoch": 4.407484407484407, "percentage": 88.15, "elapsed_time": "0:18:08", "remaining_time": "0:02:26", "throughput": 1565.54, "total_tokens": 1703600}
|
|
{"current_steps": 4245, "total_steps": 4810, "loss": 0.3067, "lr": 2.0794706216292813e-06, "epoch": 4.412681912681912, "percentage": 88.25, "elapsed_time": "0:18:08", "remaining_time": "0:02:24", "throughput": 1567.01, "total_tokens": 1705712}
|
|
{"current_steps": 4250, "total_steps": 4810, "loss": 0.2088, "lr": 2.043399835473475e-06, "epoch": 4.417879417879418, "percentage": 88.36, "elapsed_time": "0:18:08", "remaining_time": "0:02:23", "throughput": 1568.36, "total_tokens": 1707696}
|
|
{"current_steps": 4255, "total_steps": 4810, "loss": 0.2195, "lr": 2.0076313030525844e-06, "epoch": 4.423076923076923, "percentage": 88.46, "elapsed_time": "0:18:09", "remaining_time": "0:02:22", "throughput": 1569.77, "total_tokens": 1709744}
|
|
{"current_steps": 4260, "total_steps": 4810, "loss": 0.2399, "lr": 1.972165495306241e-06, "epoch": 4.428274428274428, "percentage": 88.57, "elapsed_time": "0:18:09", "remaining_time": "0:02:20", "throughput": 1571.18, "total_tokens": 1711792}
|
|
{"current_steps": 4265, "total_steps": 4810, "loss": 0.2491, "lr": 1.937002879188285e-06, "epoch": 4.4334719334719335, "percentage": 88.67, "elapsed_time": "0:18:09", "remaining_time": "0:02:19", "throughput": 1572.65, "total_tokens": 1713904}
|
|
{"current_steps": 4270, "total_steps": 4810, "loss": 0.2257, "lr": 1.9021439176606564e-06, "epoch": 4.4386694386694385, "percentage": 88.77, "elapsed_time": "0:18:10", "remaining_time": "0:02:17", "throughput": 1573.94, "total_tokens": 1715824}
|
|
{"current_steps": 4275, "total_steps": 4810, "loss": 0.2438, "lr": 1.8675890696872838e-06, "epoch": 4.443866943866944, "percentage": 88.88, "elapsed_time": "0:18:10", "remaining_time": "0:02:16", "throughput": 1575.29, "total_tokens": 1717808}
|
|
{"current_steps": 4280, "total_steps": 4810, "loss": 0.2773, "lr": 1.8333387902280314e-06, "epoch": 4.4490644490644495, "percentage": 88.98, "elapsed_time": "0:18:10", "remaining_time": "0:02:15", "throughput": 1576.69, "total_tokens": 1719856}
|
|
{"current_steps": 4285, "total_steps": 4810, "loss": 0.2193, "lr": 1.7993935302327292e-06, "epoch": 4.454261954261955, "percentage": 89.09, "elapsed_time": "0:18:11", "remaining_time": "0:02:13", "throughput": 1577.99, "total_tokens": 1721776}
|
|
{"current_steps": 4290, "total_steps": 4810, "loss": 0.238, "lr": 1.7657537366352338e-06, "epoch": 4.45945945945946, "percentage": 89.19, "elapsed_time": "0:18:11", "remaining_time": "0:02:12", "throughput": 1579.22, "total_tokens": 1723632}
|
|
{"current_steps": 4295, "total_steps": 4810, "loss": 0.1772, "lr": 1.732419852347511e-06, "epoch": 4.464656964656965, "percentage": 89.29, "elapsed_time": "0:18:11", "remaining_time": "0:02:10", "throughput": 1580.45, "total_tokens": 1725488}
|
|
{"current_steps": 4300, "total_steps": 4810, "loss": 0.2837, "lr": 1.699392316253856e-06, "epoch": 4.46985446985447, "percentage": 89.4, "elapsed_time": "0:18:12", "remaining_time": "0:02:09", "throughput": 1581.91, "total_tokens": 1727600}
|
|
{"current_steps": 4305, "total_steps": 4810, "loss": 0.2494, "lr": 1.666671563205069e-06, "epoch": 4.475051975051975, "percentage": 89.5, "elapsed_time": "0:18:12", "remaining_time": "0:02:08", "throughput": 1583.37, "total_tokens": 1729712}
|
|
{"current_steps": 4310, "total_steps": 4810, "loss": 0.269, "lr": 1.6342580240127582e-06, "epoch": 4.48024948024948, "percentage": 89.6, "elapsed_time": "0:18:12", "remaining_time": "0:02:06", "throughput": 1584.71, "total_tokens": 1731696}
|
|
{"current_steps": 4315, "total_steps": 4810, "loss": 0.2551, "lr": 1.6021521254436678e-06, "epoch": 4.485446985446986, "percentage": 89.71, "elapsed_time": "0:18:13", "remaining_time": "0:02:05", "throughput": 1586.11, "total_tokens": 1733744}
|
|
{"current_steps": 4320, "total_steps": 4810, "loss": 0.2408, "lr": 1.5703542902140294e-06, "epoch": 4.490644490644491, "percentage": 89.81, "elapsed_time": "0:18:13", "remaining_time": "0:02:04", "throughput": 1587.46, "total_tokens": 1735728}
|
|
{"current_steps": 4325, "total_steps": 4810, "loss": 0.1891, "lr": 1.5388649369840357e-06, "epoch": 4.495841995841996, "percentage": 89.92, "elapsed_time": "0:18:13", "remaining_time": "0:02:02", "throughput": 1588.86, "total_tokens": 1737776}
|
|
{"current_steps": 4330, "total_steps": 4810, "loss": 0.2684, "lr": 1.5076844803522922e-06, "epoch": 4.501039501039501, "percentage": 90.02, "elapsed_time": "0:18:14", "remaining_time": "0:02:01", "throughput": 1590.26, "total_tokens": 1739824}
|
|
{"current_steps": 4335, "total_steps": 4810, "loss": 0.2709, "lr": 1.476813330850388e-06, "epoch": 4.506237006237006, "percentage": 90.12, "elapsed_time": "0:18:14", "remaining_time": "0:01:59", "throughput": 1591.54, "total_tokens": 1741744}
|
|
{"current_steps": 4338, "total_steps": 4810, "eval_loss": 0.2577267587184906, "epoch": 4.509355509355509, "percentage": 90.19, "elapsed_time": "0:18:15", "remaining_time": "0:01:59", "throughput": 1590.85, "total_tokens": 1742960}
|
|
{"current_steps": 4340, "total_steps": 4810, "loss": 0.2731, "lr": 1.4462518949374838e-06, "epoch": 4.511434511434511, "percentage": 90.23, "elapsed_time": "0:18:39", "remaining_time": "0:02:01", "throughput": 1558.23, "total_tokens": 1743728}
|
|
{"current_steps": 4345, "total_steps": 4810, "loss": 0.2431, "lr": 1.4160005749949328e-06, "epoch": 4.516632016632016, "percentage": 90.33, "elapsed_time": "0:18:39", "remaining_time": "0:01:59", "throughput": 1559.72, "total_tokens": 1745904}
|
|
{"current_steps": 4350, "total_steps": 4810, "loss": 0.2649, "lr": 1.386059769321027e-06, "epoch": 4.521829521829522, "percentage": 90.44, "elapsed_time": "0:18:39", "remaining_time": "0:01:58", "throughput": 1560.96, "total_tokens": 1747824}
|
|
{"current_steps": 4355, "total_steps": 4810, "loss": 0.2569, "lr": 1.3564298721257223e-06, "epoch": 4.527027027027027, "percentage": 90.54, "elapsed_time": "0:18:40", "remaining_time": "0:01:57", "throughput": 1562.34, "total_tokens": 1749872}
|
|
{"current_steps": 4360, "total_steps": 4810, "loss": 0.1998, "lr": 1.3271112735254498e-06, "epoch": 4.532224532224532, "percentage": 90.64, "elapsed_time": "0:18:40", "remaining_time": "0:01:55", "throughput": 1563.6, "total_tokens": 1751792}
|
|
{"current_steps": 4365, "total_steps": 4810, "loss": 0.251, "lr": 1.298104359538005e-06, "epoch": 4.537422037422037, "percentage": 90.75, "elapsed_time": "0:18:40", "remaining_time": "0:01:54", "throughput": 1564.91, "total_tokens": 1753776}
|
|
{"current_steps": 4370, "total_steps": 4810, "loss": 0.2705, "lr": 1.269409512077427e-06, "epoch": 4.542619542619542, "percentage": 90.85, "elapsed_time": "0:18:41", "remaining_time": "0:01:52", "throughput": 1566.28, "total_tokens": 1755824}
|
|
{"current_steps": 4375, "total_steps": 4810, "loss": 0.202, "lr": 1.241027108949e-06, "epoch": 4.547817047817047, "percentage": 90.96, "elapsed_time": "0:18:41", "remaining_time": "0:01:51", "throughput": 1567.77, "total_tokens": 1758000}
|
|
{"current_steps": 4380, "total_steps": 4810, "loss": 0.2565, "lr": 1.2129575238442715e-06, "epoch": 4.553014553014553, "percentage": 91.06, "elapsed_time": "0:18:41", "remaining_time": "0:01:50", "throughput": 1569.08, "total_tokens": 1759984}
|
|
{"current_steps": 4385, "total_steps": 4810, "loss": 0.2607, "lr": 1.185201126336122e-06, "epoch": 4.558212058212058, "percentage": 91.16, "elapsed_time": "0:18:41", "remaining_time": "0:01:48", "throughput": 1570.38, "total_tokens": 1761968}
|
|
{"current_steps": 4390, "total_steps": 4810, "loss": 0.2392, "lr": 1.1577582818739135e-06, "epoch": 4.5634095634095635, "percentage": 91.27, "elapsed_time": "0:18:42", "remaining_time": "0:01:47", "throughput": 1571.74, "total_tokens": 1764016}
|
|
{"current_steps": 4395, "total_steps": 4810, "loss": 0.2808, "lr": 1.1306293517786614e-06, "epoch": 4.5686070686070686, "percentage": 91.37, "elapsed_time": "0:18:42", "remaining_time": "0:01:46", "throughput": 1572.99, "total_tokens": 1765936}
|
|
{"current_steps": 4400, "total_steps": 4810, "loss": 0.1891, "lr": 1.1038146932383004e-06, "epoch": 4.573804573804574, "percentage": 91.48, "elapsed_time": "0:18:42", "remaining_time": "0:01:44", "throughput": 1574.35, "total_tokens": 1767984}
|
|
{"current_steps": 4405, "total_steps": 4810, "loss": 0.2029, "lr": 1.0773146593029637e-06, "epoch": 4.579002079002079, "percentage": 91.58, "elapsed_time": "0:18:43", "remaining_time": "0:01:43", "throughput": 1575.6, "total_tokens": 1769904}
|
|
{"current_steps": 4410, "total_steps": 4810, "loss": 0.2743, "lr": 1.0511295988803294e-06, "epoch": 4.584199584199585, "percentage": 91.68, "elapsed_time": "0:18:43", "remaining_time": "0:01:41", "throughput": 1576.89, "total_tokens": 1771888}
|
|
{"current_steps": 4415, "total_steps": 4810, "loss": 0.2115, "lr": 1.0252598567310451e-06, "epoch": 4.58939708939709, "percentage": 91.79, "elapsed_time": "0:18:43", "remaining_time": "0:01:40", "throughput": 1578.25, "total_tokens": 1773936}
|
|
{"current_steps": 4420, "total_steps": 4810, "loss": 0.2778, "lr": 9.99705773464185e-07, "epoch": 4.594594594594595, "percentage": 91.89, "elapsed_time": "0:18:44", "remaining_time": "0:01:39", "throughput": 1579.61, "total_tokens": 1775984}
|
|
{"current_steps": 4425, "total_steps": 4810, "loss": 0.2428, "lr": 9.744676855327483e-07, "epoch": 4.5997920997921, "percentage": 92.0, "elapsed_time": "0:18:44", "remaining_time": "0:01:37", "throughput": 1580.79, "total_tokens": 1777840}
|
|
{"current_steps": 4430, "total_steps": 4810, "loss": 0.2124, "lr": 9.495459252292504e-07, "epoch": 4.604989604989605, "percentage": 92.1, "elapsed_time": "0:18:44", "remaining_time": "0:01:36", "throughput": 1582.08, "total_tokens": 1779824}
|
|
{"current_steps": 4435, "total_steps": 4810, "loss": 0.1939, "lr": 9.249408206813332e-07, "epoch": 4.61018711018711, "percentage": 92.2, "elapsed_time": "0:18:45", "remaining_time": "0:01:35", "throughput": 1583.43, "total_tokens": 1781872}
|
|
{"current_steps": 4440, "total_steps": 4810, "loss": 0.2364, "lr": 9.006526958474509e-07, "epoch": 4.615384615384615, "percentage": 92.31, "elapsed_time": "0:18:45", "remaining_time": "0:01:33", "throughput": 1584.84, "total_tokens": 1783984}
|
|
{"current_steps": 4445, "total_steps": 4810, "loss": 0.2023, "lr": 8.766818705126134e-07, "epoch": 4.620582120582121, "percentage": 92.41, "elapsed_time": "0:18:45", "remaining_time": "0:01:32", "throughput": 1586.18, "total_tokens": 1786032}
|
|
{"current_steps": 4450, "total_steps": 4810, "loss": 0.2455, "lr": 8.530286602841525e-07, "epoch": 4.625779625779626, "percentage": 92.52, "elapsed_time": "0:18:46", "remaining_time": "0:01:31", "throughput": 1587.47, "total_tokens": 1788016}
|
|
{"current_steps": 4455, "total_steps": 4810, "loss": 0.2154, "lr": 8.296933765875897e-07, "epoch": 4.630977130977131, "percentage": 92.62, "elapsed_time": "0:18:46", "remaining_time": "0:01:29", "throughput": 1588.82, "total_tokens": 1790064}
|
|
{"current_steps": 4460, "total_steps": 4810, "loss": 0.2046, "lr": 8.066763266625282e-07, "epoch": 4.636174636174636, "percentage": 92.72, "elapsed_time": "0:18:46", "remaining_time": "0:01:28", "throughput": 1590.05, "total_tokens": 1791984}
|
|
{"current_steps": 4465, "total_steps": 4810, "loss": 0.1884, "lr": 7.839778135586007e-07, "epoch": 4.641372141372141, "percentage": 92.83, "elapsed_time": "0:18:47", "remaining_time": "0:01:27", "throughput": 1591.29, "total_tokens": 1793904}
|
|
{"current_steps": 4470, "total_steps": 4810, "loss": 0.223, "lr": 7.615981361314889e-07, "epoch": 4.646569646569646, "percentage": 92.93, "elapsed_time": "0:18:47", "remaining_time": "0:01:25", "throughput": 1592.58, "total_tokens": 1795888}
|
|
{"current_steps": 4475, "total_steps": 4810, "loss": 0.3424, "lr": 7.3953758903898e-07, "epoch": 4.651767151767151, "percentage": 93.04, "elapsed_time": "0:18:47", "remaining_time": "0:01:24", "throughput": 1593.87, "total_tokens": 1797872}
|
|
{"current_steps": 4480, "total_steps": 4810, "loss": 0.2152, "lr": 7.177964627370997e-07, "epoch": 4.656964656964657, "percentage": 93.14, "elapsed_time": "0:18:48", "remaining_time": "0:01:23", "throughput": 1595.23, "total_tokens": 1799920}
|
|
{"current_steps": 4485, "total_steps": 4810, "loss": 0.2628, "lr": 6.963750434762745e-07, "epoch": 4.662162162162162, "percentage": 93.24, "elapsed_time": "0:18:48", "remaining_time": "0:01:21", "throughput": 1596.41, "total_tokens": 1801776}
|
|
{"current_steps": 4490, "total_steps": 4810, "loss": 0.2974, "lr": 6.752736132975696e-07, "epoch": 4.667359667359667, "percentage": 93.35, "elapsed_time": "0:18:48", "remaining_time": "0:01:20", "throughput": 1597.75, "total_tokens": 1803824}
|
|
{"current_steps": 4495, "total_steps": 4810, "loss": 0.1697, "lr": 6.54492450028979e-07, "epoch": 4.672557172557172, "percentage": 93.45, "elapsed_time": "0:18:49", "remaining_time": "0:01:19", "throughput": 1598.98, "total_tokens": 1805744}
|
|
{"current_steps": 4500, "total_steps": 4810, "loss": 0.2489, "lr": 6.340318272817474e-07, "epoch": 4.6777546777546775, "percentage": 93.56, "elapsed_time": "0:18:49", "remaining_time": "0:01:17", "throughput": 1600.27, "total_tokens": 1807728}
|
|
{"current_steps": 4505, "total_steps": 4810, "loss": 0.255, "lr": 6.138920144468124e-07, "epoch": 4.682952182952183, "percentage": 93.66, "elapsed_time": "0:18:49", "remaining_time": "0:01:16", "throughput": 1601.55, "total_tokens": 1809712}
|
|
{"current_steps": 4510, "total_steps": 4810, "loss": 0.2946, "lr": 5.94073276691201e-07, "epoch": 4.6881496881496885, "percentage": 93.76, "elapsed_time": "0:18:50", "remaining_time": "0:01:15", "throughput": 1602.78, "total_tokens": 1811632}
|
|
{"current_steps": 4515, "total_steps": 4810, "loss": 0.2011, "lr": 5.745758749545749e-07, "epoch": 4.6933471933471935, "percentage": 93.87, "elapsed_time": "0:18:50", "remaining_time": "0:01:13", "throughput": 1604.01, "total_tokens": 1813552}
|
|
{"current_steps": 4520, "total_steps": 4810, "loss": 0.2354, "lr": 5.554000659457881e-07, "epoch": 4.698544698544699, "percentage": 93.97, "elapsed_time": "0:18:50", "remaining_time": "0:01:12", "throughput": 1605.4, "total_tokens": 1815664}
|
|
{"current_steps": 4525, "total_steps": 4810, "loss": 0.2284, "lr": 5.365461021395096e-07, "epoch": 4.703742203742204, "percentage": 94.07, "elapsed_time": "0:18:51", "remaining_time": "0:01:11", "throughput": 1606.68, "total_tokens": 1817648}
|
|
{"current_steps": 4530, "total_steps": 4810, "loss": 0.2259, "lr": 5.180142317728815e-07, "epoch": 4.708939708939709, "percentage": 94.18, "elapsed_time": "0:18:51", "remaining_time": "0:01:09", "throughput": 1608.02, "total_tokens": 1819696}
|
|
{"current_steps": 4535, "total_steps": 4810, "loss": 0.269, "lr": 4.998046988422766e-07, "epoch": 4.714137214137214, "percentage": 94.28, "elapsed_time": "0:18:51", "remaining_time": "0:01:08", "throughput": 1609.3, "total_tokens": 1821680}
|
|
{"current_steps": 4540, "total_steps": 4810, "loss": 0.2786, "lr": 4.819177431000604e-07, "epoch": 4.71933471933472, "percentage": 94.39, "elapsed_time": "0:18:52", "remaining_time": "0:01:07", "throughput": 1610.64, "total_tokens": 1823728}
|
|
{"current_steps": 4545, "total_steps": 4810, "loss": 0.3228, "lr": 4.6435360005145644e-07, "epoch": 4.724532224532225, "percentage": 94.49, "elapsed_time": "0:18:52", "remaining_time": "0:01:06", "throughput": 1611.92, "total_tokens": 1825712}
|
|
{"current_steps": 4550, "total_steps": 4810, "loss": 0.2664, "lr": 4.4711250095143267e-07, "epoch": 4.72972972972973, "percentage": 94.59, "elapsed_time": "0:18:52", "remaining_time": "0:01:04", "throughput": 1613.25, "total_tokens": 1827760}
|
|
{"current_steps": 4555, "total_steps": 4810, "loss": 0.2014, "lr": 4.30194672801662e-07, "epoch": 4.734927234927235, "percentage": 94.7, "elapsed_time": "0:18:53", "remaining_time": "0:01:03", "throughput": 1614.47, "total_tokens": 1829680}
|
|
{"current_steps": 4560, "total_steps": 4810, "loss": 0.2992, "lr": 4.136003383475251e-07, "epoch": 4.74012474012474, "percentage": 94.8, "elapsed_time": "0:18:53", "remaining_time": "0:01:02", "throughput": 1615.81, "total_tokens": 1831728}
|
|
{"current_steps": 4565, "total_steps": 4810, "loss": 0.2033, "lr": 3.9732971607519265e-07, "epoch": 4.745322245322245, "percentage": 94.91, "elapsed_time": "0:18:53", "remaining_time": "0:01:00", "throughput": 1617.02, "total_tokens": 1833648}
|
|
{"current_steps": 4570, "total_steps": 4810, "loss": 0.2388, "lr": 3.8138302020873373e-07, "epoch": 4.75051975051975, "percentage": 95.01, "elapsed_time": "0:18:54", "remaining_time": "0:00:59", "throughput": 1618.36, "total_tokens": 1835696}
|
|
{"current_steps": 4575, "total_steps": 4810, "loss": 0.2582, "lr": 3.6576046070730675e-07, "epoch": 4.755717255717256, "percentage": 95.11, "elapsed_time": "0:18:54", "remaining_time": "0:00:58", "throughput": 1619.75, "total_tokens": 1837808}
|
|
{"current_steps": 4579, "total_steps": 4810, "eval_loss": 0.2604904770851135, "epoch": 4.75987525987526, "percentage": 95.2, "elapsed_time": "0:18:55", "remaining_time": "0:00:57", "throughput": 1619.22, "total_tokens": 1839344}
|
|
{"current_steps": 4580, "total_steps": 4810, "loss": 0.2365, "lr": 3.5046224326238107e-07, "epoch": 4.760914760914761, "percentage": 95.22, "elapsed_time": "0:19:38", "remaining_time": "0:00:59", "throughput": 1561.72, "total_tokens": 1839728}
|
|
{"current_steps": 4585, "total_steps": 4810, "loss": 0.2569, "lr": 3.3548856929505047e-07, "epoch": 4.766112266112266, "percentage": 95.32, "elapsed_time": "0:19:38", "remaining_time": "0:00:57", "throughput": 1563.03, "total_tokens": 1841776}
|
|
{"current_steps": 4590, "total_steps": 4810, "loss": 0.2548, "lr": 3.208396359533572e-07, "epoch": 4.771309771309771, "percentage": 95.43, "elapsed_time": "0:19:38", "remaining_time": "0:00:56", "throughput": 1564.22, "total_tokens": 1843696}
|
|
{"current_steps": 4595, "total_steps": 4810, "loss": 0.2391, "lr": 3.065156361097138e-07, "epoch": 4.776507276507276, "percentage": 95.53, "elapsed_time": "0:19:38", "remaining_time": "0:00:55", "throughput": 1565.52, "total_tokens": 1845744}
|
|
{"current_steps": 4600, "total_steps": 4810, "loss": 0.2364, "lr": 2.925167583583577e-07, "epoch": 4.781704781704782, "percentage": 95.63, "elapsed_time": "0:19:39", "remaining_time": "0:00:53", "throughput": 1566.82, "total_tokens": 1847792}
|
|
{"current_steps": 4605, "total_steps": 4810, "loss": 0.2843, "lr": 2.7884318701285885e-07, "epoch": 4.786902286902287, "percentage": 95.74, "elapsed_time": "0:19:39", "remaining_time": "0:00:52", "throughput": 1568.06, "total_tokens": 1849776}
|
|
{"current_steps": 4610, "total_steps": 4810, "loss": 0.2187, "lr": 2.6549510210371607e-07, "epoch": 4.792099792099792, "percentage": 95.84, "elapsed_time": "0:19:39", "remaining_time": "0:00:51", "throughput": 1569.52, "total_tokens": 1852016}
|
|
{"current_steps": 4615, "total_steps": 4810, "loss": 0.246, "lr": 2.524726793759591e-07, "epoch": 4.797297297297297, "percentage": 95.95, "elapsed_time": "0:19:40", "remaining_time": "0:00:49", "throughput": 1570.82, "total_tokens": 1854064}
|
|
{"current_steps": 4620, "total_steps": 4810, "loss": 0.2382, "lr": 2.397760902868612e-07, "epoch": 4.802494802494802, "percentage": 96.05, "elapsed_time": "0:19:40", "remaining_time": "0:00:48", "throughput": 1572.11, "total_tokens": 1856112}
|
|
{"current_steps": 4625, "total_steps": 4810, "loss": 0.2361, "lr": 2.274055020036553e-07, "epoch": 4.8076923076923075, "percentage": 96.15, "elapsed_time": "0:19:40", "remaining_time": "0:00:47", "throughput": 1573.35, "total_tokens": 1858096}
|
|
{"current_steps": 4630, "total_steps": 4810, "loss": 0.2671, "lr": 2.1536107740135482e-07, "epoch": 4.8128898128898125, "percentage": 96.26, "elapsed_time": "0:19:41", "remaining_time": "0:00:45", "throughput": 1574.76, "total_tokens": 1860272}
|
|
{"current_steps": 4635, "total_steps": 4810, "loss": 0.2501, "lr": 2.0364297506060003e-07, "epoch": 4.8180873180873185, "percentage": 96.36, "elapsed_time": "0:19:41", "remaining_time": "0:00:44", "throughput": 1576.0, "total_tokens": 1862256}
|
|
{"current_steps": 4640, "total_steps": 4810, "loss": 0.1984, "lr": 1.922513492655653e-07, "epoch": 4.8232848232848236, "percentage": 96.47, "elapsed_time": "0:19:41", "remaining_time": "0:00:43", "throughput": 1577.29, "total_tokens": 1864304}
|
|
{"current_steps": 4645, "total_steps": 4810, "loss": 0.2677, "lr": 1.8118635000194396e-07, "epoch": 4.828482328482329, "percentage": 96.57, "elapsed_time": "0:19:42", "remaining_time": "0:00:41", "throughput": 1578.48, "total_tokens": 1866224}
|
|
{"current_steps": 4650, "total_steps": 4810, "loss": 0.2349, "lr": 1.704481229549526e-07, "epoch": 4.833679833679834, "percentage": 96.67, "elapsed_time": "0:19:42", "remaining_time": "0:00:40", "throughput": 1579.82, "total_tokens": 1868336}
|
|
{"current_steps": 4655, "total_steps": 4810, "loss": 0.2927, "lr": 1.6003680950742728e-07, "epoch": 4.838877338877339, "percentage": 96.78, "elapsed_time": "0:19:42", "remaining_time": "0:00:39", "throughput": 1581.17, "total_tokens": 1870448}
|
|
{"current_steps": 4660, "total_steps": 4810, "loss": 0.2206, "lr": 1.4995254673795812e-07, "epoch": 4.844074844074844, "percentage": 96.88, "elapsed_time": "0:19:43", "remaining_time": "0:00:38", "throughput": 1582.35, "total_tokens": 1872368}
|
|
{"current_steps": 4665, "total_steps": 4810, "loss": 0.2374, "lr": 1.4019546741908251e-07, "epoch": 4.849272349272349, "percentage": 96.99, "elapsed_time": "0:19:43", "remaining_time": "0:00:36", "throughput": 1583.7, "total_tokens": 1874480}
|
|
{"current_steps": 4670, "total_steps": 4810, "loss": 0.2712, "lr": 1.3076570001553934e-07, "epoch": 4.854469854469855, "percentage": 97.09, "elapsed_time": "0:19:43", "remaining_time": "0:00:35", "throughput": 1584.93, "total_tokens": 1876464}
|
|
{"current_steps": 4675, "total_steps": 4810, "loss": 0.2406, "lr": 1.216633686825841e-07, "epoch": 4.85966735966736, "percentage": 97.19, "elapsed_time": "0:19:44", "remaining_time": "0:00:34", "throughput": 1586.17, "total_tokens": 1878448}
|
|
{"current_steps": 4680, "total_steps": 4810, "loss": 0.2743, "lr": 1.1288859326433477e-07, "epoch": 4.864864864864865, "percentage": 97.3, "elapsed_time": "0:19:44", "remaining_time": "0:00:32", "throughput": 1587.4, "total_tokens": 1880432}
|
|
{"current_steps": 4685, "total_steps": 4810, "loss": 0.2828, "lr": 1.0444148929221464e-07, "epoch": 4.87006237006237, "percentage": 97.4, "elapsed_time": "0:19:44", "remaining_time": "0:00:31", "throughput": 1588.74, "total_tokens": 1882544}
|
|
{"current_steps": 4690, "total_steps": 4810, "loss": 0.274, "lr": 9.63221679834203e-08, "epoch": 4.875259875259875, "percentage": 97.51, "elapsed_time": "0:19:45", "remaining_time": "0:00:30", "throughput": 1589.98, "total_tokens": 1884528}
|
|
{"current_steps": 4695, "total_steps": 4810, "loss": 0.2457, "lr": 8.853073623946162e-08, "epoch": 4.88045738045738, "percentage": 97.61, "elapsed_time": "0:19:45", "remaining_time": "0:00:29", "throughput": 1591.32, "total_tokens": 1886640}
|
|
{"current_steps": 4700, "total_steps": 4810, "loss": 0.2663, "lr": 8.106729664475176e-08, "epoch": 4.885654885654886, "percentage": 97.71, "elapsed_time": "0:19:45", "remaining_time": "0:00:27", "throughput": 1592.6, "total_tokens": 1888688}
|
|
{"current_steps": 4705, "total_steps": 4810, "loss": 0.2572, "lr": 7.393194746525279e-08, "epoch": 4.890852390852391, "percentage": 97.82, "elapsed_time": "0:19:46", "remaining_time": "0:00:26", "throughput": 1593.89, "total_tokens": 1890736}
|
|
{"current_steps": 4710, "total_steps": 4810, "loss": 0.2399, "lr": 6.712478264719601e-08, "epoch": 4.896049896049896, "percentage": 97.92, "elapsed_time": "0:19:46", "remaining_time": "0:00:25", "throughput": 1595.12, "total_tokens": 1892720}
|
|
{"current_steps": 4715, "total_steps": 4810, "loss": 0.2267, "lr": 6.064589181582481e-08, "epoch": 4.901247401247401, "percentage": 98.02, "elapsed_time": "0:19:46", "remaining_time": "0:00:23", "throughput": 1596.35, "total_tokens": 1894704}
|
|
{"current_steps": 4720, "total_steps": 4810, "loss": 0.2652, "lr": 5.4495360274231524e-08, "epoch": 4.906444906444906, "percentage": 98.13, "elapsed_time": "0:19:47", "remaining_time": "0:00:22", "throughput": 1597.52, "total_tokens": 1896624}
|
|
{"current_steps": 4725, "total_steps": 4810, "loss": 0.2254, "lr": 4.867326900223068e-08, "epoch": 4.911642411642411, "percentage": 98.23, "elapsed_time": "0:19:47", "remaining_time": "0:00:21", "throughput": 1598.7, "total_tokens": 1898544}
|
|
{"current_steps": 4730, "total_steps": 4810, "loss": 0.2342, "lr": 4.317969465527927e-08, "epoch": 4.916839916839917, "percentage": 98.34, "elapsed_time": "0:19:47", "remaining_time": "0:00:20", "throughput": 1599.98, "total_tokens": 1900592}
|
|
{"current_steps": 4735, "total_steps": 4810, "loss": 0.2151, "lr": 3.8014709563488625e-08, "epoch": 4.922037422037422, "percentage": 98.44, "elapsed_time": "0:19:48", "remaining_time": "0:00:18", "throughput": 1601.2, "total_tokens": 1902576}
|
|
{"current_steps": 4740, "total_steps": 4810, "loss": 0.2368, "lr": 3.317838173066135e-08, "epoch": 4.927234927234927, "percentage": 98.54, "elapsed_time": "0:19:48", "remaining_time": "0:00:17", "throughput": 1602.48, "total_tokens": 1904624}
|
|
{"current_steps": 4745, "total_steps": 4810, "loss": 0.2662, "lr": 2.8670774833386426e-08, "epoch": 4.9324324324324325, "percentage": 98.65, "elapsed_time": "0:19:48", "remaining_time": "0:00:16", "throughput": 1603.82, "total_tokens": 1906736}
|
|
{"current_steps": 4750, "total_steps": 4810, "loss": 0.221, "lr": 2.449194822022327e-08, "epoch": 4.9376299376299375, "percentage": 98.75, "elapsed_time": "0:19:49", "remaining_time": "0:00:15", "throughput": 1604.93, "total_tokens": 1908592}
|
|
{"current_steps": 4755, "total_steps": 4810, "loss": 0.2286, "lr": 2.064195691089954e-08, "epoch": 4.942827442827443, "percentage": 98.86, "elapsed_time": "0:19:49", "remaining_time": "0:00:13", "throughput": 1606.16, "total_tokens": 1910576}
|
|
{"current_steps": 4760, "total_steps": 4810, "loss": 0.2104, "lr": 1.712085159559784e-08, "epoch": 4.948024948024948, "percentage": 98.96, "elapsed_time": "0:19:49", "remaining_time": "0:00:12", "throughput": 1607.43, "total_tokens": 1912624}
|
|
{"current_steps": 4765, "total_steps": 4810, "loss": 0.2802, "lr": 1.3928678634289593e-08, "epoch": 4.953222453222454, "percentage": 99.06, "elapsed_time": "0:19:50", "remaining_time": "0:00:11", "throughput": 1608.66, "total_tokens": 1914608}
|
|
{"current_steps": 4770, "total_steps": 4810, "loss": 0.2125, "lr": 1.1065480056110522e-08, "epoch": 4.958419958419959, "percentage": 99.17, "elapsed_time": "0:19:50", "remaining_time": "0:00:09", "throughput": 1609.88, "total_tokens": 1916592}
|
|
{"current_steps": 4775, "total_steps": 4810, "loss": 0.2162, "lr": 8.531293558824982e-09, "epoch": 4.963617463617464, "percentage": 99.27, "elapsed_time": "0:19:50", "remaining_time": "0:00:08", "throughput": 1611.2, "total_tokens": 1918704}
|
|
{"current_steps": 4780, "total_steps": 4810, "loss": 0.2508, "lr": 6.326152508320804e-09, "epoch": 4.968814968814969, "percentage": 99.38, "elapsed_time": "0:19:51", "remaining_time": "0:00:07", "throughput": 1612.36, "total_tokens": 1920624}
|
|
{"current_steps": 4785, "total_steps": 4810, "loss": 0.2453, "lr": 4.450085938170756e-09, "epoch": 4.974012474012474, "percentage": 99.48, "elapsed_time": "0:19:51", "remaining_time": "0:00:06", "throughput": 1613.47, "total_tokens": 1922480}
|
|
{"current_steps": 4790, "total_steps": 4810, "loss": 0.2758, "lr": 2.9031185492522926e-09, "epoch": 4.979209979209979, "percentage": 99.58, "elapsed_time": "0:19:51", "remaining_time": "0:00:04", "throughput": 1614.69, "total_tokens": 1924464}
|
|
{"current_steps": 4795, "total_steps": 4810, "loss": 0.2275, "lr": 1.6852707094172636e-09, "epoch": 4.984407484407484, "percentage": 99.69, "elapsed_time": "0:19:52", "remaining_time": "0:00:03", "throughput": 1615.9, "total_tokens": 1926448}
|
|
{"current_steps": 4800, "total_steps": 4810, "loss": 0.2635, "lr": 7.965584532282355e-10, "epoch": 4.98960498960499, "percentage": 99.79, "elapsed_time": "0:19:52", "remaining_time": "0:00:02", "throughput": 1617.23, "total_tokens": 1928560}
|
|
{"current_steps": 4805, "total_steps": 4810, "loss": 0.2384, "lr": 2.3699348174754945e-10, "epoch": 4.994802494802495, "percentage": 99.9, "elapsed_time": "0:19:52", "remaining_time": "0:00:01", "throughput": 1618.45, "total_tokens": 1930544}
|
|
{"current_steps": 4810, "total_steps": 4810, "loss": 0.2943, "lr": 6.583162381890162e-12, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:19:53", "remaining_time": "0:00:00", "throughput": 1619.66, "total_tokens": 1932608}
|
|
{"current_steps": 4810, "total_steps": 4810, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:20:17", "remaining_time": "0:00:00", "throughput": 1587.52, "total_tokens": 1932608}
|