初始化项目,由ModelHub XC社区提供模型
Model: waleko/Qwen3-8B-SFT-envbench_gpt5-yellow-green Source: Original Platform
This commit is contained in:
307
trainer_log.jsonl
Normal file
307
trainer_log.jsonl
Normal file
@@ -0,0 +1,307 @@
|
||||
{"current_steps": 1, "total_steps": 295, "loss": 1.6295, "lr": 0.0, "epoch": 0.01694915254237288, "percentage": 0.34, "elapsed_time": "0:00:22", "remaining_time": "1:52:16", "throughput": 4897.75, "total_tokens": 112224}
|
||||
{"current_steps": 2, "total_steps": 295, "loss": 1.6581, "lr": 1.6666666666666667e-06, "epoch": 0.03389830508474576, "percentage": 0.68, "elapsed_time": "0:00:33", "remaining_time": "1:21:08", "throughput": 6653.28, "total_tokens": 221080}
|
||||
{"current_steps": 3, "total_steps": 295, "loss": 1.6797, "lr": 3.3333333333333333e-06, "epoch": 0.05084745762711865, "percentage": 1.02, "elapsed_time": "0:00:43", "remaining_time": "1:10:33", "throughput": 7509.73, "total_tokens": 326656}
|
||||
{"current_steps": 4, "total_steps": 295, "loss": 1.4077, "lr": 5e-06, "epoch": 0.06779661016949153, "percentage": 1.36, "elapsed_time": "0:00:53", "remaining_time": "1:05:16", "throughput": 8059.27, "total_tokens": 433832}
|
||||
{"current_steps": 5, "total_steps": 295, "loss": 1.3219, "lr": 6.666666666666667e-06, "epoch": 0.0847457627118644, "percentage": 1.69, "elapsed_time": "0:01:04", "remaining_time": "1:02:06", "throughput": 8531.25, "total_tokens": 548184}
|
||||
{"current_steps": 6, "total_steps": 295, "loss": 1.14, "lr": 8.333333333333334e-06, "epoch": 0.1016949152542373, "percentage": 2.03, "elapsed_time": "0:01:14", "remaining_time": "0:59:55", "throughput": 8784.11, "total_tokens": 655632}
|
||||
{"current_steps": 7, "total_steps": 295, "loss": 1.0531, "lr": 1e-05, "epoch": 0.11864406779661017, "percentage": 2.37, "elapsed_time": "0:01:24", "remaining_time": "0:58:03", "throughput": 8972.34, "total_tokens": 759704}
|
||||
{"current_steps": 8, "total_steps": 295, "loss": 0.9491, "lr": 1.1666666666666668e-05, "epoch": 0.13559322033898305, "percentage": 2.71, "elapsed_time": "0:01:34", "remaining_time": "0:56:42", "throughput": 9233.39, "total_tokens": 875624}
|
||||
{"current_steps": 9, "total_steps": 295, "loss": 0.9397, "lr": 1.3333333333333333e-05, "epoch": 0.15254237288135594, "percentage": 3.05, "elapsed_time": "0:01:45", "remaining_time": "0:55:43", "throughput": 9406.61, "total_tokens": 989656}
|
||||
{"current_steps": 10, "total_steps": 295, "loss": 0.8694, "lr": 1.5e-05, "epoch": 0.1694915254237288, "percentage": 3.39, "elapsed_time": "0:01:55", "remaining_time": "0:54:39", "throughput": 9489.91, "total_tokens": 1092152}
|
||||
{"current_steps": 11, "total_steps": 295, "loss": 0.8185, "lr": 1.6666666666666667e-05, "epoch": 0.1864406779661017, "percentage": 3.73, "elapsed_time": "0:02:05", "remaining_time": "0:53:58", "throughput": 9605.72, "total_tokens": 1204856}
|
||||
{"current_steps": 12, "total_steps": 295, "loss": 0.7581, "lr": 1.8333333333333333e-05, "epoch": 0.2033898305084746, "percentage": 4.07, "elapsed_time": "0:02:15", "remaining_time": "0:53:19", "throughput": 9605.33, "total_tokens": 1302968}
|
||||
{"current_steps": 13, "total_steps": 295, "loss": 0.7413, "lr": 2e-05, "epoch": 0.22033898305084745, "percentage": 4.41, "elapsed_time": "0:02:25", "remaining_time": "0:52:43", "throughput": 9636.62, "total_tokens": 1405520}
|
||||
{"current_steps": 14, "total_steps": 295, "loss": 0.719, "lr": 2.1666666666666667e-05, "epoch": 0.23728813559322035, "percentage": 4.75, "elapsed_time": "0:02:36", "remaining_time": "0:52:12", "throughput": 9677.56, "total_tokens": 1510384}
|
||||
{"current_steps": 15, "total_steps": 295, "loss": 0.7001, "lr": 2.3333333333333336e-05, "epoch": 0.2542372881355932, "percentage": 5.08, "elapsed_time": "0:02:46", "remaining_time": "0:51:43", "throughput": 9716.99, "total_tokens": 1615456}
|
||||
{"current_steps": 16, "total_steps": 295, "loss": 0.682, "lr": 2.5e-05, "epoch": 0.2711864406779661, "percentage": 5.42, "elapsed_time": "0:02:56", "remaining_time": "0:51:11", "throughput": 9767.97, "total_tokens": 1720464}
|
||||
{"current_steps": 17, "total_steps": 295, "loss": 0.6806, "lr": 2.6666666666666667e-05, "epoch": 0.288135593220339, "percentage": 5.76, "elapsed_time": "0:03:06", "remaining_time": "0:50:48", "throughput": 9796.14, "total_tokens": 1825984}
|
||||
{"current_steps": 18, "total_steps": 295, "loss": 0.6672, "lr": 2.8333333333333335e-05, "epoch": 0.3050847457627119, "percentage": 6.1, "elapsed_time": "0:03:16", "remaining_time": "0:50:24", "throughput": 9797.28, "total_tokens": 1925424}
|
||||
{"current_steps": 19, "total_steps": 295, "loss": 0.6324, "lr": 3e-05, "epoch": 0.3220338983050847, "percentage": 6.44, "elapsed_time": "0:03:26", "remaining_time": "0:50:00", "throughput": 9800.62, "total_tokens": 2024656}
|
||||
{"current_steps": 20, "total_steps": 295, "loss": 0.6849, "lr": 3.1666666666666666e-05, "epoch": 0.3389830508474576, "percentage": 6.78, "elapsed_time": "0:03:36", "remaining_time": "0:49:39", "throughput": 9801.83, "total_tokens": 2123808}
|
||||
{"current_steps": 21, "total_steps": 295, "loss": 0.6998, "lr": 3.3333333333333335e-05, "epoch": 0.3559322033898305, "percentage": 7.12, "elapsed_time": "0:03:47", "remaining_time": "0:49:22", "throughput": 9850.3, "total_tokens": 2236536}
|
||||
{"current_steps": 22, "total_steps": 295, "loss": 0.6178, "lr": 3.5e-05, "epoch": 0.3728813559322034, "percentage": 7.46, "elapsed_time": "0:03:57", "remaining_time": "0:49:02", "throughput": 9889.0, "total_tokens": 2345176}
|
||||
{"current_steps": 23, "total_steps": 295, "loss": 0.6417, "lr": 3.6666666666666666e-05, "epoch": 0.3898305084745763, "percentage": 7.8, "elapsed_time": "0:04:07", "remaining_time": "0:48:42", "throughput": 9915.97, "total_tokens": 2450632}
|
||||
{"current_steps": 24, "total_steps": 295, "loss": 0.681, "lr": 3.8333333333333334e-05, "epoch": 0.4067796610169492, "percentage": 8.14, "elapsed_time": "0:04:17", "remaining_time": "0:48:26", "throughput": 9947.59, "total_tokens": 2560968}
|
||||
{"current_steps": 25, "total_steps": 295, "loss": 0.6877, "lr": 4e-05, "epoch": 0.423728813559322, "percentage": 8.47, "elapsed_time": "0:04:27", "remaining_time": "0:48:11", "throughput": 9983.73, "total_tokens": 2673304}
|
||||
{"current_steps": 25, "total_steps": 295, "eval_loss": 0.671046793460846, "epoch": 0.423728813559322, "percentage": 8.47, "elapsed_time": "0:04:32", "remaining_time": "0:48:58", "throughput": 9823.87, "total_tokens": 2673304}
|
||||
{"current_steps": 26, "total_steps": 295, "loss": 0.6362, "lr": 4.166666666666667e-05, "epoch": 0.4406779661016949, "percentage": 8.81, "elapsed_time": "0:04:41", "remaining_time": "0:48:36", "throughput": 9823.97, "total_tokens": 2769736}
|
||||
{"current_steps": 27, "total_steps": 295, "loss": 0.6535, "lr": 4.3333333333333334e-05, "epoch": 0.4576271186440678, "percentage": 9.15, "elapsed_time": "0:04:52", "remaining_time": "0:48:19", "throughput": 9845.5, "total_tokens": 2875984}
|
||||
{"current_steps": 28, "total_steps": 295, "loss": 0.7158, "lr": 4.5e-05, "epoch": 0.4745762711864407, "percentage": 9.49, "elapsed_time": "0:05:02", "remaining_time": "0:48:03", "throughput": 9876.74, "total_tokens": 2986112}
|
||||
{"current_steps": 29, "total_steps": 295, "loss": 0.6744, "lr": 4.666666666666667e-05, "epoch": 0.4915254237288136, "percentage": 9.83, "elapsed_time": "0:05:12", "remaining_time": "0:47:48", "throughput": 9893.32, "total_tokens": 3093880}
|
||||
{"current_steps": 30, "total_steps": 295, "loss": 0.6442, "lr": 4.8333333333333334e-05, "epoch": 0.5084745762711864, "percentage": 10.17, "elapsed_time": "0:05:22", "remaining_time": "0:47:31", "throughput": 9922.4, "total_tokens": 3202720}
|
||||
{"current_steps": 31, "total_steps": 295, "loss": 0.6758, "lr": 5e-05, "epoch": 0.5254237288135594, "percentage": 10.51, "elapsed_time": "0:05:33", "remaining_time": "0:47:16", "throughput": 9942.05, "total_tokens": 3311816}
|
||||
{"current_steps": 32, "total_steps": 295, "loss": 0.7101, "lr": 4.999824323801887e-05, "epoch": 0.5423728813559322, "percentage": 10.85, "elapsed_time": "0:05:43", "remaining_time": "0:47:01", "throughput": 9948.53, "total_tokens": 3415424}
|
||||
{"current_steps": 33, "total_steps": 295, "loss": 0.6392, "lr": 4.9992973198972505e-05, "epoch": 0.559322033898305, "percentage": 11.19, "elapsed_time": "0:05:53", "remaining_time": "0:46:47", "throughput": 9965.53, "total_tokens": 3524136}
|
||||
{"current_steps": 34, "total_steps": 295, "loss": 0.5893, "lr": 4.998419062351724e-05, "epoch": 0.576271186440678, "percentage": 11.53, "elapsed_time": "0:06:03", "remaining_time": "0:46:32", "throughput": 9982.83, "total_tokens": 3631528}
|
||||
{"current_steps": 35, "total_steps": 295, "loss": 0.6018, "lr": 4.997189674596463e-05, "epoch": 0.5932203389830508, "percentage": 11.86, "elapsed_time": "0:06:14", "remaining_time": "0:46:19", "throughput": 9996.29, "total_tokens": 3740096}
|
||||
{"current_steps": 36, "total_steps": 295, "loss": 0.6345, "lr": 4.995609329410804e-05, "epoch": 0.6101694915254238, "percentage": 12.2, "elapsed_time": "0:06:24", "remaining_time": "0:46:05", "throughput": 10008.14, "total_tokens": 3846680}
|
||||
{"current_steps": 37, "total_steps": 295, "loss": 0.6499, "lr": 4.993678248897972e-05, "epoch": 0.6271186440677966, "percentage": 12.54, "elapsed_time": "0:06:34", "remaining_time": "0:45:51", "throughput": 10015.34, "total_tokens": 3951992}
|
||||
{"current_steps": 38, "total_steps": 295, "loss": 0.6397, "lr": 4.9913967044538734e-05, "epoch": 0.6440677966101694, "percentage": 12.88, "elapsed_time": "0:06:44", "remaining_time": "0:45:38", "throughput": 10023.83, "total_tokens": 4058880}
|
||||
{"current_steps": 39, "total_steps": 295, "loss": 0.6596, "lr": 4.9887650167289525e-05, "epoch": 0.6610169491525424, "percentage": 13.22, "elapsed_time": "0:06:54", "remaining_time": "0:45:23", "throughput": 10010.12, "total_tokens": 4152608}
|
||||
{"current_steps": 40, "total_steps": 295, "loss": 0.5761, "lr": 4.985783555583123e-05, "epoch": 0.6779661016949152, "percentage": 13.56, "elapsed_time": "0:07:04", "remaining_time": "0:45:08", "throughput": 10031.38, "total_tokens": 4261232}
|
||||
{"current_steps": 41, "total_steps": 295, "loss": 0.65, "lr": 4.982452740033793e-05, "epoch": 0.6949152542372882, "percentage": 13.9, "elapsed_time": "0:07:15", "remaining_time": "0:44:55", "throughput": 10040.68, "total_tokens": 4368744}
|
||||
{"current_steps": 42, "total_steps": 295, "loss": 0.6319, "lr": 4.978773038196972e-05, "epoch": 0.711864406779661, "percentage": 14.24, "elapsed_time": "0:07:25", "remaining_time": "0:44:42", "throughput": 10039.27, "total_tokens": 4470520}
|
||||
{"current_steps": 43, "total_steps": 295, "loss": 0.6236, "lr": 4.974744967221483e-05, "epoch": 0.7288135593220338, "percentage": 14.58, "elapsed_time": "0:07:35", "remaining_time": "0:44:30", "throughput": 10042.45, "total_tokens": 4575488}
|
||||
{"current_steps": 44, "total_steps": 295, "loss": 0.6215, "lr": 4.9703690932162824e-05, "epoch": 0.7457627118644068, "percentage": 14.92, "elapsed_time": "0:07:45", "remaining_time": "0:44:17", "throughput": 10047.75, "total_tokens": 4680592}
|
||||
{"current_steps": 45, "total_steps": 295, "loss": 0.6175, "lr": 4.9656460311708963e-05, "epoch": 0.7627118644067796, "percentage": 15.25, "elapsed_time": "0:07:55", "remaining_time": "0:44:03", "throughput": 10051.64, "total_tokens": 4783320}
|
||||
{"current_steps": 46, "total_steps": 295, "loss": 0.5959, "lr": 4.960576444868992e-05, "epoch": 0.7796610169491526, "percentage": 15.59, "elapsed_time": "0:08:06", "remaining_time": "0:43:51", "throughput": 10046.63, "total_tokens": 4883848}
|
||||
{"current_steps": 47, "total_steps": 295, "loss": 0.6428, "lr": 4.955161046795088e-05, "epoch": 0.7966101694915254, "percentage": 15.93, "elapsed_time": "0:08:16", "remaining_time": "0:43:39", "throughput": 10061.8, "total_tokens": 4995680}
|
||||
{"current_steps": 48, "total_steps": 295, "loss": 0.6275, "lr": 4.9494005980344194e-05, "epoch": 0.8135593220338984, "percentage": 16.27, "elapsed_time": "0:08:26", "remaining_time": "0:43:26", "throughput": 10079.28, "total_tokens": 5106208}
|
||||
{"current_steps": 49, "total_steps": 295, "loss": 0.5977, "lr": 4.943295908165977e-05, "epoch": 0.8305084745762712, "percentage": 16.61, "elapsed_time": "0:08:36", "remaining_time": "0:43:15", "throughput": 10089.32, "total_tokens": 5215272}
|
||||
{"current_steps": 50, "total_steps": 295, "loss": 0.5901, "lr": 4.936847835148725e-05, "epoch": 0.847457627118644, "percentage": 16.95, "elapsed_time": "0:08:47", "remaining_time": "0:43:03", "throughput": 10091.9, "total_tokens": 5320152}
|
||||
{"current_steps": 50, "total_steps": 295, "eval_loss": 0.5966207981109619, "epoch": 0.847457627118644, "percentage": 16.95, "elapsed_time": "0:08:51", "remaining_time": "0:43:24", "throughput": 10009.49, "total_tokens": 5320152}
|
||||
{"current_steps": 51, "total_steps": 295, "loss": 0.6284, "lr": 4.930057285201027e-05, "epoch": 0.864406779661017, "percentage": 17.29, "elapsed_time": "0:09:01", "remaining_time": "0:43:12", "throughput": 10009.91, "total_tokens": 5424336}
|
||||
{"current_steps": 52, "total_steps": 295, "loss": 0.6046, "lr": 4.9229252126732814e-05, "epoch": 0.8813559322033898, "percentage": 17.63, "elapsed_time": "0:09:12", "remaining_time": "0:43:00", "throughput": 10025.3, "total_tokens": 5536400}
|
||||
{"current_steps": 53, "total_steps": 295, "loss": 0.5333, "lr": 4.9154526199137964e-05, "epoch": 0.8983050847457628, "percentage": 17.97, "elapsed_time": "0:09:22", "remaining_time": "0:42:49", "throughput": 10037.54, "total_tokens": 5647488}
|
||||
{"current_steps": 54, "total_steps": 295, "loss": 0.6442, "lr": 4.9076405571279207e-05, "epoch": 0.9152542372881356, "percentage": 18.31, "elapsed_time": "0:09:32", "remaining_time": "0:42:36", "throughput": 10039.18, "total_tokens": 5751248}
|
||||
{"current_steps": 55, "total_steps": 295, "loss": 0.663, "lr": 4.8994901222304465e-05, "epoch": 0.9322033898305084, "percentage": 18.64, "elapsed_time": "0:09:43", "remaining_time": "0:42:25", "throughput": 10044.15, "total_tokens": 5858104}
|
||||
{"current_steps": 56, "total_steps": 295, "loss": 0.5856, "lr": 4.891002460691306e-05, "epoch": 0.9491525423728814, "percentage": 18.98, "elapsed_time": "0:09:53", "remaining_time": "0:42:12", "throughput": 10041.25, "total_tokens": 5958920}
|
||||
{"current_steps": 57, "total_steps": 295, "loss": 0.5298, "lr": 4.882178765374589e-05, "epoch": 0.9661016949152542, "percentage": 19.32, "elapsed_time": "0:10:03", "remaining_time": "0:42:01", "throughput": 10048.19, "total_tokens": 6067168}
|
||||
{"current_steps": 58, "total_steps": 295, "loss": 0.6364, "lr": 4.87302027637089e-05, "epoch": 0.9830508474576272, "percentage": 19.66, "elapsed_time": "0:10:13", "remaining_time": "0:41:48", "throughput": 10041.93, "total_tokens": 6164248}
|
||||
{"current_steps": 59, "total_steps": 295, "loss": 0.5508, "lr": 4.863528280823033e-05, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:10:24", "remaining_time": "0:41:36", "throughput": 10046.95, "total_tokens": 6269656}
|
||||
{"current_steps": 60, "total_steps": 295, "loss": 0.5221, "lr": 4.853704112745172e-05, "epoch": 1.0169491525423728, "percentage": 20.34, "elapsed_time": "0:10:34", "remaining_time": "0:41:24", "throughput": 10051.51, "total_tokens": 6377200}
|
||||
{"current_steps": 61, "total_steps": 295, "loss": 0.5232, "lr": 4.8435491528353026e-05, "epoch": 1.0338983050847457, "percentage": 20.68, "elapsed_time": "0:10:44", "remaining_time": "0:41:12", "throughput": 10053.46, "total_tokens": 6478784}
|
||||
{"current_steps": 62, "total_steps": 295, "loss": 0.4441, "lr": 4.833064828281225e-05, "epoch": 1.0508474576271187, "percentage": 21.02, "elapsed_time": "0:10:54", "remaining_time": "0:41:00", "throughput": 10071.77, "total_tokens": 6594704}
|
||||
{"current_steps": 63, "total_steps": 295, "loss": 0.5256, "lr": 4.822252612559961e-05, "epoch": 1.0677966101694916, "percentage": 21.36, "elapsed_time": "0:11:04", "remaining_time": "0:40:45", "throughput": 10078.66, "total_tokens": 6694384}
|
||||
{"current_steps": 64, "total_steps": 295, "loss": 0.518, "lr": 4.811114025230672e-05, "epoch": 1.0847457627118644, "percentage": 21.69, "elapsed_time": "0:11:14", "remaining_time": "0:40:33", "throughput": 10077.45, "total_tokens": 6795736}
|
||||
{"current_steps": 65, "total_steps": 295, "loss": 0.5539, "lr": 4.799650631721096e-05, "epoch": 1.1016949152542372, "percentage": 22.03, "elapsed_time": "0:11:24", "remaining_time": "0:40:22", "throughput": 10083.47, "total_tokens": 6903368}
|
||||
{"current_steps": 66, "total_steps": 295, "loss": 0.4718, "lr": 4.787864043107546e-05, "epoch": 1.11864406779661, "percentage": 22.37, "elapsed_time": "0:11:34", "remaining_time": "0:40:10", "throughput": 10081.29, "total_tokens": 7002776}
|
||||
{"current_steps": 67, "total_steps": 295, "loss": 0.4699, "lr": 4.775755915888483e-05, "epoch": 1.1355932203389831, "percentage": 22.71, "elapsed_time": "0:11:44", "remaining_time": "0:39:57", "throughput": 10072.86, "total_tokens": 7095696}
|
||||
{"current_steps": 68, "total_steps": 295, "loss": 0.5381, "lr": 4.763327951751711e-05, "epoch": 1.152542372881356, "percentage": 23.05, "elapsed_time": "0:11:55", "remaining_time": "0:39:49", "throughput": 10069.96, "total_tokens": 7206656}
|
||||
{"current_steps": 69, "total_steps": 295, "loss": 0.4825, "lr": 4.750581897335222e-05, "epoch": 1.1694915254237288, "percentage": 23.39, "elapsed_time": "0:12:07", "remaining_time": "0:39:41", "throughput": 10073.64, "total_tokens": 7324712}
|
||||
{"current_steps": 70, "total_steps": 295, "loss": 0.5406, "lr": 4.737519543981721e-05, "epoch": 1.1864406779661016, "percentage": 23.73, "elapsed_time": "0:12:18", "remaining_time": "0:39:33", "throughput": 10068.92, "total_tokens": 7436408}
|
||||
{"current_steps": 71, "total_steps": 295, "loss": 0.5419, "lr": 4.724142727486869e-05, "epoch": 1.2033898305084745, "percentage": 24.07, "elapsed_time": "0:12:29", "remaining_time": "0:39:24", "throughput": 10056.58, "total_tokens": 7537992}
|
||||
{"current_steps": 72, "total_steps": 295, "loss": 0.5035, "lr": 4.7104533278412763e-05, "epoch": 1.2203389830508475, "percentage": 24.41, "elapsed_time": "0:12:41", "remaining_time": "0:39:17", "throughput": 10054.86, "total_tokens": 7651968}
|
||||
{"current_steps": 73, "total_steps": 295, "loss": 0.529, "lr": 4.696453268966291e-05, "epoch": 1.2372881355932204, "percentage": 24.75, "elapsed_time": "0:12:52", "remaining_time": "0:39:08", "throughput": 10033.37, "total_tokens": 7748848}
|
||||
{"current_steps": 74, "total_steps": 295, "loss": 0.4829, "lr": 4.6821445184436066e-05, "epoch": 1.2542372881355932, "percentage": 25.08, "elapsed_time": "0:13:02", "remaining_time": "0:38:56", "throughput": 10028.97, "total_tokens": 7845760}
|
||||
{"current_steps": 75, "total_steps": 295, "loss": 0.4792, "lr": 4.667529087238736e-05, "epoch": 1.271186440677966, "percentage": 25.42, "elapsed_time": "0:13:12", "remaining_time": "0:38:44", "throughput": 10029.44, "total_tokens": 7948872}
|
||||
{"current_steps": 75, "total_steps": 295, "eval_loss": 0.5687937140464783, "epoch": 1.271186440677966, "percentage": 25.42, "elapsed_time": "0:13:16", "remaining_time": "0:38:57", "throughput": 9974.72, "total_tokens": 7948872}
|
||||
{"current_steps": 76, "total_steps": 295, "loss": 0.4475, "lr": 4.652609029418389e-05, "epoch": 1.288135593220339, "percentage": 25.76, "elapsed_time": "0:13:27", "remaining_time": "0:38:45", "throughput": 9976.71, "total_tokens": 8053096}
|
||||
{"current_steps": 77, "total_steps": 295, "loss": 0.4814, "lr": 4.6373864418617935e-05, "epoch": 1.305084745762712, "percentage": 26.1, "elapsed_time": "0:13:37", "remaining_time": "0:38:34", "throughput": 9988.24, "total_tokens": 8165544}
|
||||
{"current_steps": 78, "total_steps": 295, "loss": 0.551, "lr": 4.6218634639659954e-05, "epoch": 1.3220338983050848, "percentage": 26.44, "elapsed_time": "0:13:47", "remaining_time": "0:38:23", "throughput": 10001.99, "total_tokens": 8280040}
|
||||
{"current_steps": 79, "total_steps": 295, "loss": 0.4689, "lr": 4.606042277345185e-05, "epoch": 1.3389830508474576, "percentage": 26.78, "elapsed_time": "0:13:57", "remaining_time": "0:38:10", "throughput": 10002.36, "total_tokens": 8380248}
|
||||
{"current_steps": 80, "total_steps": 295, "loss": 0.4489, "lr": 4.5899251055240963e-05, "epoch": 1.3559322033898304, "percentage": 27.12, "elapsed_time": "0:14:08", "remaining_time": "0:37:59", "throughput": 10009.19, "total_tokens": 8488848}
|
||||
{"current_steps": 81, "total_steps": 295, "loss": 0.593, "lr": 4.573514213625505e-05, "epoch": 1.3728813559322033, "percentage": 27.46, "elapsed_time": "0:14:18", "remaining_time": "0:37:47", "throughput": 10011.69, "total_tokens": 8593232}
|
||||
{"current_steps": 82, "total_steps": 295, "loss": 0.5531, "lr": 4.5568119080518864e-05, "epoch": 1.3898305084745763, "percentage": 27.8, "elapsed_time": "0:14:28", "remaining_time": "0:37:35", "throughput": 10008.25, "total_tokens": 8692096}
|
||||
{"current_steps": 83, "total_steps": 295, "loss": 0.4688, "lr": 4.539820536161278e-05, "epoch": 1.4067796610169492, "percentage": 28.14, "elapsed_time": "0:14:38", "remaining_time": "0:37:24", "throughput": 10015.45, "total_tokens": 8802024}
|
||||
{"current_steps": 84, "total_steps": 295, "loss": 0.4728, "lr": 4.522542485937369e-05, "epoch": 1.423728813559322, "percentage": 28.47, "elapsed_time": "0:14:49", "remaining_time": "0:37:13", "throughput": 10028.05, "total_tokens": 8915296}
|
||||
{"current_steps": 85, "total_steps": 295, "loss": 0.5405, "lr": 4.504980185653899e-05, "epoch": 1.4406779661016949, "percentage": 28.81, "elapsed_time": "0:14:59", "remaining_time": "0:37:02", "throughput": 10026.39, "total_tokens": 9018176}
|
||||
{"current_steps": 86, "total_steps": 295, "loss": 0.4356, "lr": 4.4871361035333836e-05, "epoch": 1.457627118644068, "percentage": 29.15, "elapsed_time": "0:15:09", "remaining_time": "0:36:51", "throughput": 10032.83, "total_tokens": 9127880}
|
||||
{"current_steps": 87, "total_steps": 295, "loss": 0.4982, "lr": 4.469012747400227e-05, "epoch": 1.4745762711864407, "percentage": 29.49, "elapsed_time": "0:15:20", "remaining_time": "0:36:39", "throughput": 10043.21, "total_tokens": 9241680}
|
||||
{"current_steps": 88, "total_steps": 295, "loss": 0.535, "lr": 4.450612664328271e-05, "epoch": 1.4915254237288136, "percentage": 29.83, "elapsed_time": "0:15:30", "remaining_time": "0:36:28", "throughput": 10047.77, "total_tokens": 9349080}
|
||||
{"current_steps": 89, "total_steps": 295, "loss": 0.4983, "lr": 4.431938440282828e-05, "epoch": 1.5084745762711864, "percentage": 30.17, "elapsed_time": "0:15:40", "remaining_time": "0:36:17", "throughput": 10055.42, "total_tokens": 9460424}
|
||||
{"current_steps": 90, "total_steps": 295, "loss": 0.4654, "lr": 4.412992699757244e-05, "epoch": 1.5254237288135593, "percentage": 30.51, "elapsed_time": "0:15:51", "remaining_time": "0:36:06", "throughput": 10068.26, "total_tokens": 9577240}
|
||||
{"current_steps": 91, "total_steps": 295, "loss": 0.4369, "lr": 4.3937781054040505e-05, "epoch": 1.542372881355932, "percentage": 30.85, "elapsed_time": "0:16:01", "remaining_time": "0:35:55", "throughput": 10068.3, "total_tokens": 9680760}
|
||||
{"current_steps": 92, "total_steps": 295, "loss": 0.4837, "lr": 4.374297357660756e-05, "epoch": 1.559322033898305, "percentage": 31.19, "elapsed_time": "0:16:11", "remaining_time": "0:35:44", "throughput": 10075.26, "total_tokens": 9791984}
|
||||
{"current_steps": 93, "total_steps": 295, "loss": 0.4735, "lr": 4.354553194370321e-05, "epoch": 1.576271186440678, "percentage": 31.53, "elapsed_time": "0:16:22", "remaining_time": "0:35:33", "throughput": 10075.82, "total_tokens": 9895992}
|
||||
{"current_steps": 94, "total_steps": 295, "loss": 0.4411, "lr": 4.334548390396377e-05, "epoch": 1.5932203389830508, "percentage": 31.86, "elapsed_time": "0:16:32", "remaining_time": "0:35:22", "throughput": 10083.83, "total_tokens": 10007400}
|
||||
{"current_steps": 95, "total_steps": 295, "loss": 0.4829, "lr": 4.3142857572332504e-05, "epoch": 1.6101694915254239, "percentage": 32.2, "elapsed_time": "0:16:42", "remaining_time": "0:35:10", "throughput": 10094.81, "total_tokens": 10122232}
|
||||
{"current_steps": 96, "total_steps": 295, "loss": 0.4373, "lr": 4.293768142610828e-05, "epoch": 1.6271186440677967, "percentage": 32.54, "elapsed_time": "0:16:52", "remaining_time": "0:34:58", "throughput": 10085.08, "total_tokens": 10211000}
|
||||
{"current_steps": 97, "total_steps": 295, "loss": 0.4391, "lr": 4.272998430094334e-05, "epoch": 1.6440677966101696, "percentage": 32.88, "elapsed_time": "0:17:02", "remaining_time": "0:34:47", "throughput": 10093.0, "total_tokens": 10323768}
|
||||
{"current_steps": 98, "total_steps": 295, "loss": 0.5014, "lr": 4.2519795386790716e-05, "epoch": 1.6610169491525424, "percentage": 33.22, "elapsed_time": "0:17:12", "remaining_time": "0:34:36", "throughput": 10091.8, "total_tokens": 10423272}
|
||||
{"current_steps": 99, "total_steps": 295, "loss": 0.5095, "lr": 4.23071442238019e-05, "epoch": 1.6779661016949152, "percentage": 33.56, "elapsed_time": "0:17:23", "remaining_time": "0:34:25", "throughput": 10099.01, "total_tokens": 10535696}
|
||||
{"current_steps": 100, "total_steps": 295, "loss": 0.4444, "lr": 4.209206069817513e-05, "epoch": 1.694915254237288, "percentage": 33.9, "elapsed_time": "0:17:33", "remaining_time": "0:34:14", "throughput": 10099.79, "total_tokens": 10640880}
|
||||
{"current_steps": 100, "total_steps": 295, "eval_loss": 0.5552906394004822, "epoch": 1.694915254237288, "percentage": 33.9, "elapsed_time": "0:17:37", "remaining_time": "0:34:22", "throughput": 10058.21, "total_tokens": 10640880}
|
||||
{"current_steps": 101, "total_steps": 295, "loss": 0.4959, "lr": 4.187457503795527e-05, "epoch": 1.711864406779661, "percentage": 34.24, "elapsed_time": "0:17:48", "remaining_time": "0:34:11", "throughput": 10059.28, "total_tokens": 10745624}
|
||||
{"current_steps": 102, "total_steps": 295, "loss": 0.433, "lr": 4.165471780878546e-05, "epoch": 1.7288135593220337, "percentage": 34.58, "elapsed_time": "0:17:58", "remaining_time": "0:34:00", "throughput": 10055.2, "total_tokens": 10844744}
|
||||
{"current_steps": 103, "total_steps": 295, "loss": 0.4856, "lr": 4.1432519909611415e-05, "epoch": 1.7457627118644068, "percentage": 34.92, "elapsed_time": "0:18:08", "remaining_time": "0:33:49", "throughput": 10058.55, "total_tokens": 10952176}
|
||||
{"current_steps": 104, "total_steps": 295, "loss": 0.4413, "lr": 4.120801256833887e-05, "epoch": 1.7627118644067796, "percentage": 35.25, "elapsed_time": "0:18:19", "remaining_time": "0:33:38", "throughput": 10067.71, "total_tokens": 11066704}
|
||||
{"current_steps": 105, "total_steps": 295, "loss": 0.4558, "lr": 4.098122733744475e-05, "epoch": 1.7796610169491527, "percentage": 35.59, "elapsed_time": "0:18:29", "remaining_time": "0:33:27", "throughput": 10068.16, "total_tokens": 11167664}
|
||||
{"current_steps": 106, "total_steps": 295, "loss": 0.5277, "lr": 4.075219608954278e-05, "epoch": 1.7966101694915255, "percentage": 35.93, "elapsed_time": "0:18:39", "remaining_time": "0:33:15", "throughput": 10068.54, "total_tokens": 11267192}
|
||||
{"current_steps": 107, "total_steps": 295, "loss": 0.4027, "lr": 4.052095101290406e-05, "epoch": 1.8135593220338984, "percentage": 36.27, "elapsed_time": "0:18:49", "remaining_time": "0:33:04", "throughput": 10078.12, "total_tokens": 11381440}
|
||||
{"current_steps": 108, "total_steps": 295, "loss": 0.459, "lr": 4.02875246069333e-05, "epoch": 1.8305084745762712, "percentage": 36.61, "elapsed_time": "0:18:59", "remaining_time": "0:32:52", "throughput": 10073.83, "total_tokens": 11478120}
|
||||
{"current_steps": 109, "total_steps": 295, "loss": 0.4386, "lr": 4.005194967760135e-05, "epoch": 1.847457627118644, "percentage": 36.95, "elapsed_time": "0:19:09", "remaining_time": "0:32:41", "throughput": 10077.97, "total_tokens": 11584096}
|
||||
{"current_steps": 110, "total_steps": 295, "loss": 0.3941, "lr": 3.981425933283456e-05, "epoch": 1.8644067796610169, "percentage": 37.29, "elapsed_time": "0:19:19", "remaining_time": "0:32:30", "throughput": 10083.19, "total_tokens": 11695448}
|
||||
{"current_steps": 111, "total_steps": 295, "loss": 0.4912, "lr": 3.95744869778618e-05, "epoch": 1.8813559322033897, "percentage": 37.63, "elapsed_time": "0:19:30", "remaining_time": "0:32:19", "throughput": 10088.28, "total_tokens": 11805544}
|
||||
{"current_steps": 112, "total_steps": 295, "loss": 0.4393, "lr": 3.933266631051968e-05, "epoch": 1.8983050847457628, "percentage": 37.97, "elapsed_time": "0:19:40", "remaining_time": "0:32:08", "throughput": 10087.0, "total_tokens": 11903888}
|
||||
{"current_steps": 113, "total_steps": 295, "loss": 0.4292, "lr": 3.9088831316516564e-05, "epoch": 1.9152542372881356, "percentage": 38.31, "elapsed_time": "0:19:49", "remaining_time": "0:31:56", "throughput": 10088.39, "total_tokens": 12004128}
|
||||
{"current_steps": 114, "total_steps": 295, "loss": 0.4825, "lr": 3.8843016264656215e-05, "epoch": 1.9322033898305084, "percentage": 38.64, "elapsed_time": "0:20:00", "remaining_time": "0:31:45", "throughput": 10093.89, "total_tokens": 12115840}
|
||||
{"current_steps": 115, "total_steps": 295, "loss": 0.5266, "lr": 3.8595255702021635e-05, "epoch": 1.9491525423728815, "percentage": 38.98, "elapsed_time": "0:20:10", "remaining_time": "0:31:35", "throughput": 10103.52, "total_tokens": 12232504}
|
||||
{"current_steps": 116, "total_steps": 295, "loss": 0.4424, "lr": 3.8345584449119776e-05, "epoch": 1.9661016949152543, "percentage": 39.32, "elapsed_time": "0:20:21", "remaining_time": "0:31:24", "throughput": 10106.04, "total_tokens": 12339872}
|
||||
{"current_steps": 117, "total_steps": 295, "loss": 0.4777, "lr": 3.809403759498782e-05, "epoch": 1.9830508474576272, "percentage": 39.66, "elapsed_time": "0:20:31", "remaining_time": "0:31:13", "throughput": 10103.55, "total_tokens": 12440032}
|
||||
{"current_steps": 118, "total_steps": 295, "loss": 0.4401, "lr": 3.784065049226176e-05, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:20:41", "remaining_time": "0:31:01", "throughput": 10105.27, "total_tokens": 12542672}
|
||||
{"current_steps": 119, "total_steps": 295, "loss": 0.3796, "lr": 3.758545875220788e-05, "epoch": 2.016949152542373, "percentage": 40.34, "elapsed_time": "0:20:51", "remaining_time": "0:30:51", "throughput": 10110.87, "total_tokens": 12655008}
|
||||
{"current_steps": 120, "total_steps": 295, "loss": 0.3662, "lr": 3.732849823971793e-05, "epoch": 2.0338983050847457, "percentage": 40.68, "elapsed_time": "0:21:02", "remaining_time": "0:30:40", "throughput": 10118.7, "total_tokens": 12769960}
|
||||
{"current_steps": 121, "total_steps": 295, "loss": 0.3615, "lr": 3.706980506826863e-05, "epoch": 2.0508474576271185, "percentage": 41.02, "elapsed_time": "0:21:12", "remaining_time": "0:30:29", "throughput": 10126.89, "total_tokens": 12881984}
|
||||
{"current_steps": 122, "total_steps": 295, "loss": 0.3831, "lr": 3.6809415594846236e-05, "epoch": 2.0677966101694913, "percentage": 41.36, "elapsed_time": "0:21:22", "remaining_time": "0:30:18", "throughput": 10123.72, "total_tokens": 12981888}
|
||||
{"current_steps": 123, "total_steps": 295, "loss": 0.2879, "lr": 3.6547366414836936e-05, "epoch": 2.084745762711864, "percentage": 41.69, "elapsed_time": "0:21:32", "remaining_time": "0:30:07", "throughput": 10120.84, "total_tokens": 13082360}
|
||||
{"current_steps": 124, "total_steps": 295, "loss": 0.4776, "lr": 3.628369435688366e-05, "epoch": 2.1016949152542375, "percentage": 42.03, "elapsed_time": "0:21:42", "remaining_time": "0:29:56", "throughput": 10127.01, "total_tokens": 13195264}
|
||||
{"current_steps": 125, "total_steps": 295, "loss": 0.3788, "lr": 3.601843647771016e-05, "epoch": 2.1186440677966103, "percentage": 42.37, "elapsed_time": "0:21:53", "remaining_time": "0:29:46", "throughput": 10126.69, "total_tokens": 13298752}
|
||||
{"current_steps": 125, "total_steps": 295, "eval_loss": 0.529534637928009, "epoch": 2.1186440677966103, "percentage": 42.37, "elapsed_time": "0:21:57", "remaining_time": "0:29:51", "throughput": 10093.28, "total_tokens": 13298752}
|
||||
{"current_steps": 126, "total_steps": 295, "loss": 0.3697, "lr": 3.575163005691302e-05, "epoch": 2.135593220338983, "percentage": 42.71, "elapsed_time": "0:22:07", "remaining_time": "0:29:40", "throughput": 10089.56, "total_tokens": 13394544}
|
||||
{"current_steps": 127, "total_steps": 295, "loss": 0.3783, "lr": 3.548331259172234e-05, "epoch": 2.152542372881356, "percentage": 43.05, "elapsed_time": "0:22:17", "remaining_time": "0:29:29", "throughput": 10093.46, "total_tokens": 13503584}
|
||||
{"current_steps": 128, "total_steps": 295, "loss": 0.3652, "lr": 3.5213521791731875e-05, "epoch": 2.169491525423729, "percentage": 43.39, "elapsed_time": "0:22:28", "remaining_time": "0:29:18", "throughput": 10093.46, "total_tokens": 13607464}
|
||||
{"current_steps": 129, "total_steps": 295, "loss": 0.366, "lr": 3.4942295573599245e-05, "epoch": 2.1864406779661016, "percentage": 43.73, "elapsed_time": "0:22:38", "remaining_time": "0:29:07", "throughput": 10093.31, "total_tokens": 13708112}
|
||||
{"current_steps": 130, "total_steps": 295, "loss": 0.3134, "lr": 3.46696720557171e-05, "epoch": 2.2033898305084745, "percentage": 44.07, "elapsed_time": "0:22:48", "remaining_time": "0:28:56", "throughput": 10096.13, "total_tokens": 13815872}
|
||||
{"current_steps": 131, "total_steps": 295, "loss": 0.3162, "lr": 3.4395689552855955e-05, "epoch": 2.2203389830508473, "percentage": 44.41, "elapsed_time": "0:22:58", "remaining_time": "0:28:45", "throughput": 10099.68, "total_tokens": 13920760}
|
||||
{"current_steps": 132, "total_steps": 295, "loss": 0.3835, "lr": 3.412038657077939e-05, "epoch": 2.23728813559322, "percentage": 44.75, "elapsed_time": "0:23:07", "remaining_time": "0:28:33", "throughput": 10096.97, "total_tokens": 14014280}
|
||||
{"current_steps": 133, "total_steps": 295, "loss": 0.3628, "lr": 3.3843801800832354e-05, "epoch": 2.2542372881355934, "percentage": 45.08, "elapsed_time": "0:23:18", "remaining_time": "0:28:22", "throughput": 10092.24, "total_tokens": 14109848}
|
||||
{"current_steps": 134, "total_steps": 295, "loss": 0.3635, "lr": 3.356597411450353e-05, "epoch": 2.2711864406779663, "percentage": 45.42, "elapsed_time": "0:23:28", "remaining_time": "0:28:11", "throughput": 10096.67, "total_tokens": 14217008}
|
||||
{"current_steps": 135, "total_steps": 295, "loss": 0.3426, "lr": 3.328694255796226e-05, "epoch": 2.288135593220339, "percentage": 45.76, "elapsed_time": "0:23:38", "remaining_time": "0:28:00", "throughput": 10101.04, "total_tokens": 14326608}
|
||||
{"current_steps": 136, "total_steps": 295, "loss": 0.3817, "lr": 3.300674634657094e-05, "epoch": 2.305084745762712, "percentage": 46.1, "elapsed_time": "0:23:48", "remaining_time": "0:27:50", "throughput": 10097.39, "total_tokens": 14425192}
|
||||
{"current_steps": 137, "total_steps": 295, "loss": 0.367, "lr": 3.272542485937369e-05, "epoch": 2.3220338983050848, "percentage": 46.44, "elapsed_time": "0:23:58", "remaining_time": "0:27:38", "throughput": 10092.85, "total_tokens": 14516048}
|
||||
{"current_steps": 138, "total_steps": 295, "loss": 0.4014, "lr": 3.244301763356195e-05, "epoch": 2.3389830508474576, "percentage": 46.78, "elapsed_time": "0:24:08", "remaining_time": "0:27:27", "throughput": 10088.59, "total_tokens": 14612784}
|
||||
{"current_steps": 139, "total_steps": 295, "loss": 0.3442, "lr": 3.215956435891793e-05, "epoch": 2.3559322033898304, "percentage": 47.12, "elapsed_time": "0:24:18", "remaining_time": "0:27:17", "throughput": 10086.66, "total_tokens": 14712832}
|
||||
{"current_steps": 140, "total_steps": 295, "loss": 0.3084, "lr": 3.187510487223655e-05, "epoch": 2.3728813559322033, "percentage": 47.46, "elapsed_time": "0:24:29", "remaining_time": "0:27:06", "throughput": 10093.04, "total_tokens": 14826672}
|
||||
{"current_steps": 141, "total_steps": 295, "loss": 0.3533, "lr": 3.158967915172669e-05, "epoch": 2.389830508474576, "percentage": 47.8, "elapsed_time": "0:24:39", "remaining_time": "0:26:55", "throughput": 10094.31, "total_tokens": 14931848}
|
||||
{"current_steps": 142, "total_steps": 295, "loss": 0.3522, "lr": 3.130332731139272e-05, "epoch": 2.406779661016949, "percentage": 48.14, "elapsed_time": "0:24:49", "remaining_time": "0:26:44", "throughput": 10093.04, "total_tokens": 15033416}
|
||||
{"current_steps": 143, "total_steps": 295, "loss": 0.3409, "lr": 3.101608959539671e-05, "epoch": 2.423728813559322, "percentage": 48.47, "elapsed_time": "0:24:59", "remaining_time": "0:26:34", "throughput": 10097.64, "total_tokens": 15144040}
|
||||
{"current_steps": 144, "total_steps": 295, "loss": 0.399, "lr": 3.072800637240261e-05, "epoch": 2.440677966101695, "percentage": 48.81, "elapsed_time": "0:25:10", "remaining_time": "0:26:23", "throughput": 10101.41, "total_tokens": 15253280}
|
||||
{"current_steps": 145, "total_steps": 295, "loss": 0.2888, "lr": 3.0439118129902698e-05, "epoch": 2.457627118644068, "percentage": 49.15, "elapsed_time": "0:25:20", "remaining_time": "0:26:12", "throughput": 10104.05, "total_tokens": 15361952}
|
||||
{"current_steps": 146, "total_steps": 295, "loss": 0.4014, "lr": 3.014946546852746e-05, "epoch": 2.4745762711864407, "percentage": 49.49, "elapsed_time": "0:25:30", "remaining_time": "0:26:02", "throughput": 10099.1, "total_tokens": 15457896}
|
||||
{"current_steps": 147, "total_steps": 295, "loss": 0.3612, "lr": 2.9859089096339566e-05, "epoch": 2.4915254237288136, "percentage": 49.83, "elapsed_time": "0:25:40", "remaining_time": "0:25:51", "throughput": 10104.43, "total_tokens": 15570464}
|
||||
{"current_steps": 148, "total_steps": 295, "loss": 0.4234, "lr": 2.9568029823112688e-05, "epoch": 2.5084745762711864, "percentage": 50.17, "elapsed_time": "0:25:51", "remaining_time": "0:25:40", "throughput": 10108.34, "total_tokens": 15681264}
|
||||
{"current_steps": 149, "total_steps": 295, "loss": 0.4073, "lr": 2.9276328554596055e-05, "epoch": 2.5254237288135593, "percentage": 50.51, "elapsed_time": "0:26:01", "remaining_time": "0:25:30", "throughput": 10110.05, "total_tokens": 15788384}
|
||||
{"current_steps": 150, "total_steps": 295, "loss": 0.435, "lr": 2.8984026286765542e-05, "epoch": 2.542372881355932, "percentage": 50.85, "elapsed_time": "0:26:12", "remaining_time": "0:25:19", "throughput": 10108.77, "total_tokens": 15891024}
|
||||
{"current_steps": 150, "total_steps": 295, "eval_loss": 0.5156561136245728, "epoch": 2.542372881355932, "percentage": 50.85, "elapsed_time": "0:26:16", "remaining_time": "0:25:23", "throughput": 10080.87, "total_tokens": 15891024}
|
||||
{"current_steps": 151, "total_steps": 295, "loss": 0.4432, "lr": 2.8691164100062034e-05, "epoch": 2.559322033898305, "percentage": 51.19, "elapsed_time": "0:26:26", "remaining_time": "0:25:13", "throughput": 10082.33, "total_tokens": 15998080}
|
||||
{"current_steps": 152, "total_steps": 295, "loss": 0.4135, "lr": 2.8397783153617958e-05, "epoch": 2.576271186440678, "percentage": 51.53, "elapsed_time": "0:26:36", "remaining_time": "0:25:02", "throughput": 10089.32, "total_tokens": 16111136}
|
||||
{"current_steps": 153, "total_steps": 295, "loss": 0.3563, "lr": 2.8103924679472737e-05, "epoch": 2.593220338983051, "percentage": 51.86, "elapsed_time": "0:26:47", "remaining_time": "0:24:51", "throughput": 10086.74, "total_tokens": 16210312}
|
||||
{"current_steps": 154, "total_steps": 295, "loss": 0.3564, "lr": 2.7809629976777973e-05, "epoch": 2.610169491525424, "percentage": 52.2, "elapsed_time": "0:26:57", "remaining_time": "0:24:40", "throughput": 10088.99, "total_tokens": 16315056}
|
||||
{"current_steps": 155, "total_steps": 295, "loss": 0.3611, "lr": 2.7514940405993272e-05, "epoch": 2.6271186440677967, "percentage": 52.54, "elapsed_time": "0:27:07", "remaining_time": "0:24:29", "throughput": 10088.23, "total_tokens": 16417080}
|
||||
{"current_steps": 156, "total_steps": 295, "loss": 0.3847, "lr": 2.7219897383073373e-05, "epoch": 2.6440677966101696, "percentage": 52.88, "elapsed_time": "0:27:17", "remaining_time": "0:24:19", "throughput": 10095.02, "total_tokens": 16532576}
|
||||
{"current_steps": 157, "total_steps": 295, "loss": 0.3309, "lr": 2.6924542373647505e-05, "epoch": 2.6610169491525424, "percentage": 53.22, "elapsed_time": "0:27:28", "remaining_time": "0:24:08", "throughput": 10099.71, "total_tokens": 16644840}
|
||||
{"current_steps": 158, "total_steps": 295, "loss": 0.3207, "lr": 2.6628916887191784e-05, "epoch": 2.6779661016949152, "percentage": 53.56, "elapsed_time": "0:27:37", "remaining_time": "0:23:57", "throughput": 10101.26, "total_tokens": 16745864}
|
||||
{"current_steps": 159, "total_steps": 295, "loss": 0.3676, "lr": 2.633306247119544e-05, "epoch": 2.694915254237288, "percentage": 53.9, "elapsed_time": "0:27:48", "remaining_time": "0:23:46", "throughput": 10106.64, "total_tokens": 16858920}
|
||||
{"current_steps": 160, "total_steps": 295, "loss": 0.3098, "lr": 2.603702070532167e-05, "epoch": 2.711864406779661, "percentage": 54.24, "elapsed_time": "0:27:58", "remaining_time": "0:23:36", "throughput": 10110.11, "total_tokens": 16968168}
|
||||
{"current_steps": 161, "total_steps": 295, "loss": 0.3391, "lr": 2.5740833195563996e-05, "epoch": 2.7288135593220337, "percentage": 54.58, "elapsed_time": "0:28:08", "remaining_time": "0:23:25", "throughput": 10111.76, "total_tokens": 17075304}
|
||||
{"current_steps": 162, "total_steps": 295, "loss": 0.3637, "lr": 2.5444541568398937e-05, "epoch": 2.7457627118644066, "percentage": 54.92, "elapsed_time": "0:28:19", "remaining_time": "0:23:14", "throughput": 10119.83, "total_tokens": 17193960}
|
||||
{"current_steps": 163, "total_steps": 295, "loss": 0.3388, "lr": 2.5148187464935763e-05, "epoch": 2.7627118644067794, "percentage": 55.25, "elapsed_time": "0:28:29", "remaining_time": "0:23:04", "throughput": 10123.65, "total_tokens": 17304184}
|
||||
{"current_steps": 164, "total_steps": 295, "loss": 0.361, "lr": 2.485181253506424e-05, "epoch": 2.7796610169491527, "percentage": 55.59, "elapsed_time": "0:28:39", "remaining_time": "0:22:53", "throughput": 10125.36, "total_tokens": 17408944}
|
||||
{"current_steps": 165, "total_steps": 295, "loss": 0.3551, "lr": 2.4555458431601065e-05, "epoch": 2.7966101694915255, "percentage": 55.93, "elapsed_time": "0:28:49", "remaining_time": "0:22:42", "throughput": 10125.78, "total_tokens": 17512736}
|
||||
{"current_steps": 166, "total_steps": 295, "loss": 0.386, "lr": 2.4259166804436006e-05, "epoch": 2.8135593220338984, "percentage": 56.27, "elapsed_time": "0:28:59", "remaining_time": "0:22:31", "throughput": 10127.38, "total_tokens": 17617368}
|
||||
{"current_steps": 167, "total_steps": 295, "loss": 0.3624, "lr": 2.3962979294678337e-05, "epoch": 2.830508474576271, "percentage": 56.61, "elapsed_time": "0:29:09", "remaining_time": "0:22:21", "throughput": 10128.46, "total_tokens": 17723424}
|
||||
{"current_steps": 168, "total_steps": 295, "loss": 0.3517, "lr": 2.3666937528804563e-05, "epoch": 2.847457627118644, "percentage": 56.95, "elapsed_time": "0:29:20", "remaining_time": "0:22:10", "throughput": 10135.69, "total_tokens": 17840688}
|
||||
{"current_steps": 169, "total_steps": 295, "loss": 0.345, "lr": 2.337108311280822e-05, "epoch": 2.864406779661017, "percentage": 57.29, "elapsed_time": "0:29:30", "remaining_time": "0:22:00", "throughput": 10142.82, "total_tokens": 17958736}
|
||||
{"current_steps": 170, "total_steps": 295, "loss": 0.3491, "lr": 2.3075457626352504e-05, "epoch": 2.8813559322033897, "percentage": 57.63, "elapsed_time": "0:29:40", "remaining_time": "0:21:49", "throughput": 10141.4, "total_tokens": 18060792}
|
||||
{"current_steps": 171, "total_steps": 295, "loss": 0.3555, "lr": 2.2780102616926633e-05, "epoch": 2.898305084745763, "percentage": 57.97, "elapsed_time": "0:29:50", "remaining_time": "0:21:38", "throughput": 10141.33, "total_tokens": 18161344}
|
||||
{"current_steps": 172, "total_steps": 295, "loss": 0.3597, "lr": 2.2485059594006734e-05, "epoch": 2.915254237288136, "percentage": 58.31, "elapsed_time": "0:30:00", "remaining_time": "0:21:27", "throughput": 10143.69, "total_tokens": 18267840}
|
||||
{"current_steps": 173, "total_steps": 295, "loss": 0.3601, "lr": 2.2190370023222033e-05, "epoch": 2.9322033898305087, "percentage": 58.64, "elapsed_time": "0:30:10", "remaining_time": "0:21:17", "throughput": 10145.17, "total_tokens": 18371632}
|
||||
{"current_steps": 174, "total_steps": 295, "loss": 0.3321, "lr": 2.189607532052727e-05, "epoch": 2.9491525423728815, "percentage": 58.98, "elapsed_time": "0:30:21", "remaining_time": "0:21:06", "throughput": 10153.97, "total_tokens": 18493104}
|
||||
{"current_steps": 175, "total_steps": 295, "loss": 0.2966, "lr": 2.1602216846382048e-05, "epoch": 2.9661016949152543, "percentage": 59.32, "elapsed_time": "0:30:31", "remaining_time": "0:20:55", "throughput": 10158.86, "total_tokens": 18607368}
|
||||
{"current_steps": 175, "total_steps": 295, "eval_loss": 0.4958828091621399, "epoch": 2.9661016949152543, "percentage": 59.32, "elapsed_time": "0:30:35", "remaining_time": "0:20:58", "throughput": 10134.78, "total_tokens": 18607368}
|
||||
{"current_steps": 176, "total_steps": 295, "loss": 0.3531, "lr": 2.1308835899937972e-05, "epoch": 2.983050847457627, "percentage": 59.66, "elapsed_time": "0:30:46", "remaining_time": "0:20:48", "throughput": 10135.04, "total_tokens": 18709368}
|
||||
{"current_steps": 177, "total_steps": 295, "loss": 0.3393, "lr": 2.1015973713234464e-05, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "0:30:56", "remaining_time": "0:20:37", "throughput": 10135.92, "total_tokens": 18815328}
|
||||
{"current_steps": 178, "total_steps": 295, "loss": 0.2607, "lr": 2.0723671445403954e-05, "epoch": 3.016949152542373, "percentage": 60.34, "elapsed_time": "0:31:06", "remaining_time": "0:20:26", "throughput": 10136.51, "total_tokens": 18917216}
|
||||
{"current_steps": 179, "total_steps": 295, "loss": 0.271, "lr": 2.0431970176887315e-05, "epoch": 3.0338983050847457, "percentage": 60.68, "elapsed_time": "0:31:16", "remaining_time": "0:20:15", "throughput": 10140.56, "total_tokens": 19027776}
|
||||
{"current_steps": 180, "total_steps": 295, "loss": 0.2439, "lr": 2.014091090366044e-05, "epoch": 3.0508474576271185, "percentage": 61.02, "elapsed_time": "0:31:26", "remaining_time": "0:20:05", "throughput": 10141.76, "total_tokens": 19135480}
|
||||
{"current_steps": 181, "total_steps": 295, "loss": 0.2996, "lr": 1.9850534531472546e-05, "epoch": 3.0677966101694913, "percentage": 61.36, "elapsed_time": "0:31:37", "remaining_time": "0:19:54", "throughput": 10143.56, "total_tokens": 19243480}
|
||||
{"current_steps": 182, "total_steps": 295, "loss": 0.2709, "lr": 1.9560881870097308e-05, "epoch": 3.084745762711864, "percentage": 61.69, "elapsed_time": "0:31:47", "remaining_time": "0:19:44", "throughput": 10145.77, "total_tokens": 19349232}
|
||||
{"current_steps": 183, "total_steps": 295, "loss": 0.2506, "lr": 1.9271993627597396e-05, "epoch": 3.1016949152542375, "percentage": 62.03, "elapsed_time": "0:31:57", "remaining_time": "0:19:33", "throughput": 10152.96, "total_tokens": 19468240}
|
||||
{"current_steps": 184, "total_steps": 295, "loss": 0.1991, "lr": 1.8983910404603296e-05, "epoch": 3.1186440677966103, "percentage": 62.37, "elapsed_time": "0:32:07", "remaining_time": "0:19:23", "throughput": 10156.62, "total_tokens": 19580696}
|
||||
{"current_steps": 185, "total_steps": 295, "loss": 0.2483, "lr": 1.8696672688607293e-05, "epoch": 3.135593220338983, "percentage": 62.71, "elapsed_time": "0:32:17", "remaining_time": "0:19:12", "throughput": 10158.64, "total_tokens": 19685800}
|
||||
{"current_steps": 186, "total_steps": 295, "loss": 0.2796, "lr": 1.8410320848273315e-05, "epoch": 3.152542372881356, "percentage": 63.05, "elapsed_time": "0:32:28", "remaining_time": "0:19:01", "throughput": 10157.05, "total_tokens": 19787360}
|
||||
{"current_steps": 187, "total_steps": 295, "loss": 0.2604, "lr": 1.8124895127763458e-05, "epoch": 3.169491525423729, "percentage": 63.39, "elapsed_time": "0:32:38", "remaining_time": "0:18:51", "throughput": 10155.07, "total_tokens": 19887912}
|
||||
{"current_steps": 188, "total_steps": 295, "loss": 0.2759, "lr": 1.7840435641082072e-05, "epoch": 3.1864406779661016, "percentage": 63.73, "elapsed_time": "0:32:48", "remaining_time": "0:18:40", "throughput": 10149.59, "total_tokens": 19978168}
|
||||
{"current_steps": 189, "total_steps": 295, "loss": 0.2912, "lr": 1.7556982366438053e-05, "epoch": 3.2033898305084745, "percentage": 64.07, "elapsed_time": "0:32:58", "remaining_time": "0:18:29", "throughput": 10152.62, "total_tokens": 20090288}
|
||||
{"current_steps": 190, "total_steps": 295, "loss": 0.2852, "lr": 1.7274575140626318e-05, "epoch": 3.2203389830508473, "percentage": 64.41, "elapsed_time": "0:33:09", "remaining_time": "0:18:19", "throughput": 10149.63, "total_tokens": 20188176}
|
||||
{"current_steps": 191, "total_steps": 295, "loss": 0.2625, "lr": 1.6993253653429063e-05, "epoch": 3.23728813559322, "percentage": 64.75, "elapsed_time": "0:33:19", "remaining_time": "0:18:08", "throughput": 10150.99, "total_tokens": 20294944}
|
||||
{"current_steps": 192, "total_steps": 295, "loss": 0.2443, "lr": 1.6713057442037743e-05, "epoch": 3.2542372881355934, "percentage": 65.08, "elapsed_time": "0:33:29", "remaining_time": "0:17:57", "throughput": 10148.7, "total_tokens": 20393248}
|
||||
{"current_steps": 193, "total_steps": 295, "loss": 0.2252, "lr": 1.6434025885496467e-05, "epoch": 3.2711864406779663, "percentage": 65.42, "elapsed_time": "0:33:39", "remaining_time": "0:17:47", "throughput": 10147.29, "total_tokens": 20495136}
|
||||
{"current_steps": 194, "total_steps": 295, "loss": 0.3712, "lr": 1.6156198199167655e-05, "epoch": 3.288135593220339, "percentage": 65.76, "elapsed_time": "0:33:49", "remaining_time": "0:17:36", "throughput": 10143.64, "total_tokens": 20590656}
|
||||
{"current_steps": 195, "total_steps": 295, "loss": 0.2243, "lr": 1.5879613429220626e-05, "epoch": 3.305084745762712, "percentage": 66.1, "elapsed_time": "0:34:00", "remaining_time": "0:17:26", "throughput": 10146.59, "total_tokens": 20701792}
|
||||
{"current_steps": 196, "total_steps": 295, "loss": 0.2873, "lr": 1.560431044714405e-05, "epoch": 3.3220338983050848, "percentage": 66.44, "elapsed_time": "0:34:10", "remaining_time": "0:17:15", "throughput": 10146.76, "total_tokens": 20806728}
|
||||
{"current_steps": 197, "total_steps": 295, "loss": 0.259, "lr": 1.5330327944282913e-05, "epoch": 3.3389830508474576, "percentage": 66.78, "elapsed_time": "0:34:20", "remaining_time": "0:17:05", "throughput": 10145.74, "total_tokens": 20909128}
|
||||
{"current_steps": 198, "total_steps": 295, "loss": 0.2636, "lr": 1.5057704426400767e-05, "epoch": 3.3559322033898304, "percentage": 67.12, "elapsed_time": "0:34:31", "remaining_time": "0:16:54", "throughput": 10149.93, "total_tokens": 21021888}
|
||||
{"current_steps": 199, "total_steps": 295, "loss": 0.2666, "lr": 1.4786478208268134e-05, "epoch": 3.3728813559322033, "percentage": 67.46, "elapsed_time": "0:34:41", "remaining_time": "0:16:44", "throughput": 10150.86, "total_tokens": 21127504}
|
||||
{"current_steps": 200, "total_steps": 295, "loss": 0.2524, "lr": 1.4516687408277669e-05, "epoch": 3.389830508474576, "percentage": 67.8, "elapsed_time": "0:34:51", "remaining_time": "0:16:33", "throughput": 10150.72, "total_tokens": 21230584}
|
||||
{"current_steps": 200, "total_steps": 295, "eval_loss": 0.4950821101665497, "epoch": 3.389830508474576, "percentage": 67.8, "elapsed_time": "0:34:55", "remaining_time": "0:16:35", "throughput": 10129.67, "total_tokens": 21230584}
|
||||
{"current_steps": 201, "total_steps": 295, "loss": 0.2496, "lr": 1.4248369943086998e-05, "epoch": 3.406779661016949, "percentage": 68.14, "elapsed_time": "0:35:06", "remaining_time": "0:16:25", "throughput": 10133.78, "total_tokens": 21344472}
|
||||
{"current_steps": 202, "total_steps": 295, "loss": 0.3348, "lr": 1.3981563522289848e-05, "epoch": 3.423728813559322, "percentage": 68.47, "elapsed_time": "0:35:16", "remaining_time": "0:16:14", "throughput": 10134.76, "total_tokens": 21449200}
|
||||
{"current_steps": 203, "total_steps": 295, "loss": 0.242, "lr": 1.3716305643116345e-05, "epoch": 3.440677966101695, "percentage": 68.81, "elapsed_time": "0:35:26", "remaining_time": "0:16:03", "throughput": 10132.9, "total_tokens": 21543072}
|
||||
{"current_steps": 204, "total_steps": 295, "loss": 0.2973, "lr": 1.3452633585163072e-05, "epoch": 3.457627118644068, "percentage": 69.15, "elapsed_time": "0:35:36", "remaining_time": "0:15:52", "throughput": 10138.32, "total_tokens": 21656624}
|
||||
{"current_steps": 205, "total_steps": 295, "loss": 0.2397, "lr": 1.3190584405153767e-05, "epoch": 3.4745762711864407, "percentage": 69.49, "elapsed_time": "0:35:46", "remaining_time": "0:15:42", "throughput": 10142.8, "total_tokens": 21771480}
|
||||
{"current_steps": 206, "total_steps": 295, "loss": 0.2163, "lr": 1.2930194931731382e-05, "epoch": 3.4915254237288136, "percentage": 69.83, "elapsed_time": "0:35:56", "remaining_time": "0:15:31", "throughput": 10146.6, "total_tokens": 21884760}
|
||||
{"current_steps": 207, "total_steps": 295, "loss": 0.3422, "lr": 1.2671501760282079e-05, "epoch": 3.5084745762711864, "percentage": 70.17, "elapsed_time": "0:36:07", "remaining_time": "0:15:21", "throughput": 10148.21, "total_tokens": 21991712}
|
||||
{"current_steps": 208, "total_steps": 295, "loss": 0.2829, "lr": 1.2414541247792121e-05, "epoch": 3.5254237288135593, "percentage": 70.51, "elapsed_time": "0:36:17", "remaining_time": "0:15:10", "throughput": 10152.02, "total_tokens": 22104552}
|
||||
{"current_steps": 209, "total_steps": 295, "loss": 0.2411, "lr": 1.2159349507738247e-05, "epoch": 3.542372881355932, "percentage": 70.85, "elapsed_time": "0:36:27", "remaining_time": "0:15:00", "throughput": 10152.51, "total_tokens": 22209288}
|
||||
{"current_steps": 210, "total_steps": 295, "loss": 0.2872, "lr": 1.1905962405012192e-05, "epoch": 3.559322033898305, "percentage": 71.19, "elapsed_time": "0:36:37", "remaining_time": "0:14:49", "throughput": 10151.64, "total_tokens": 22307624}
|
||||
{"current_steps": 211, "total_steps": 295, "loss": 0.3551, "lr": 1.1654415550880243e-05, "epoch": 3.576271186440678, "percentage": 71.53, "elapsed_time": "0:36:47", "remaining_time": "0:14:38", "throughput": 10150.03, "total_tokens": 22407656}
|
||||
{"current_steps": 212, "total_steps": 295, "loss": 0.2102, "lr": 1.1404744297978373e-05, "epoch": 3.593220338983051, "percentage": 71.86, "elapsed_time": "0:36:57", "remaining_time": "0:14:28", "throughput": 10152.53, "total_tokens": 22516640}
|
||||
{"current_steps": 213, "total_steps": 295, "loss": 0.2977, "lr": 1.1156983735343796e-05, "epoch": 3.610169491525424, "percentage": 72.2, "elapsed_time": "0:37:08", "remaining_time": "0:14:17", "throughput": 10152.18, "total_tokens": 22620992}
|
||||
{"current_steps": 214, "total_steps": 295, "loss": 0.2581, "lr": 1.0911168683483449e-05, "epoch": 3.6271186440677967, "percentage": 72.54, "elapsed_time": "0:37:18", "remaining_time": "0:14:07", "throughput": 10156.91, "total_tokens": 22736624}
|
||||
{"current_steps": 215, "total_steps": 295, "loss": 0.2166, "lr": 1.0667333689480322e-05, "epoch": 3.6440677966101696, "percentage": 72.88, "elapsed_time": "0:37:28", "remaining_time": "0:13:56", "throughput": 10162.77, "total_tokens": 22855144}
|
||||
{"current_steps": 216, "total_steps": 295, "loss": 0.2322, "lr": 1.0425513022138203e-05, "epoch": 3.6610169491525424, "percentage": 73.22, "elapsed_time": "0:37:39", "remaining_time": "0:13:46", "throughput": 10165.09, "total_tokens": 22965704}
|
||||
{"current_steps": 217, "total_steps": 295, "loss": 0.301, "lr": 1.0185740667165456e-05, "epoch": 3.6779661016949152, "percentage": 73.56, "elapsed_time": "0:37:49", "remaining_time": "0:13:35", "throughput": 10166.5, "total_tokens": 23070056}
|
||||
{"current_steps": 218, "total_steps": 295, "loss": 0.2224, "lr": 9.948050322398658e-06, "epoch": 3.694915254237288, "percentage": 73.9, "elapsed_time": "0:37:59", "remaining_time": "0:13:25", "throughput": 10168.42, "total_tokens": 23180184}
|
||||
{"current_steps": 219, "total_steps": 295, "loss": 0.3068, "lr": 9.712475393066705e-06, "epoch": 3.711864406779661, "percentage": 74.24, "elapsed_time": "0:38:09", "remaining_time": "0:13:14", "throughput": 10168.9, "total_tokens": 23285216}
|
||||
{"current_steps": 220, "total_steps": 295, "loss": 0.2098, "lr": 9.479048987095954e-06, "epoch": 3.7288135593220337, "percentage": 74.58, "elapsed_time": "0:38:20", "remaining_time": "0:13:04", "throughput": 10170.08, "total_tokens": 23393240}
|
||||
{"current_steps": 221, "total_steps": 295, "loss": 0.2637, "lr": 9.247803910457226e-06, "epoch": 3.7457627118644066, "percentage": 74.92, "elapsed_time": "0:38:30", "remaining_time": "0:12:53", "throughput": 10172.96, "total_tokens": 23505224}
|
||||
{"current_steps": 222, "total_steps": 295, "loss": 0.2402, "lr": 9.018772662555252e-06, "epoch": 3.7627118644067794, "percentage": 75.25, "elapsed_time": "0:38:40", "remaining_time": "0:12:43", "throughput": 10169.77, "total_tokens": 23602096}
|
||||
{"current_steps": 223, "total_steps": 295, "loss": 0.232, "lr": 8.791987431661137e-06, "epoch": 3.7796610169491527, "percentage": 75.59, "elapsed_time": "0:38:50", "remaining_time": "0:12:32", "throughput": 10167.92, "total_tokens": 23697608}
|
||||
{"current_steps": 224, "total_steps": 295, "loss": 0.298, "lr": 8.567480090388586e-06, "epoch": 3.7966101694915255, "percentage": 75.93, "elapsed_time": "0:39:00", "remaining_time": "0:12:21", "throughput": 10165.38, "total_tokens": 23795304}
|
||||
{"current_steps": 225, "total_steps": 295, "loss": 0.2689, "lr": 8.34528219121455e-06, "epoch": 3.8135593220338984, "percentage": 76.27, "elapsed_time": "0:39:11", "remaining_time": "0:12:11", "throughput": 10167.81, "total_tokens": 23905280}
|
||||
{"current_steps": 225, "total_steps": 295, "eval_loss": 0.4844910502433777, "epoch": 3.8135593220338984, "percentage": 76.27, "elapsed_time": "0:39:15", "remaining_time": "0:12:12", "throughput": 10149.11, "total_tokens": 23905280}
|
||||
{"current_steps": 226, "total_steps": 295, "loss": 0.2417, "lr": 8.125424962044742e-06, "epoch": 3.830508474576271, "percentage": 76.61, "elapsed_time": "0:39:25", "remaining_time": "0:12:02", "throughput": 10151.31, "total_tokens": 24015504}
|
||||
{"current_steps": 227, "total_steps": 295, "loss": 0.2673, "lr": 7.907939301824884e-06, "epoch": 3.847457627118644, "percentage": 76.95, "elapsed_time": "0:39:35", "remaining_time": "0:11:51", "throughput": 10155.94, "total_tokens": 24128928}
|
||||
{"current_steps": 228, "total_steps": 295, "loss": 0.2541, "lr": 7.692855776198114e-06, "epoch": 3.864406779661017, "percentage": 77.29, "elapsed_time": "0:39:46", "remaining_time": "0:11:41", "throughput": 10155.55, "total_tokens": 24232712}
|
||||
{"current_steps": 229, "total_steps": 295, "loss": 0.2341, "lr": 7.480204613209288e-06, "epoch": 3.8813559322033897, "percentage": 77.63, "elapsed_time": "0:39:56", "remaining_time": "0:11:30", "throughput": 10156.04, "total_tokens": 24337744}
|
||||
{"current_steps": 230, "total_steps": 295, "loss": 0.2309, "lr": 7.2700156990566675e-06, "epoch": 3.898305084745763, "percentage": 77.97, "elapsed_time": "0:40:06", "remaining_time": "0:11:20", "throughput": 10157.53, "total_tokens": 24446736}
|
||||
{"current_steps": 231, "total_steps": 295, "loss": 0.2718, "lr": 7.062318573891716e-06, "epoch": 3.915254237288136, "percentage": 78.31, "elapsed_time": "0:40:17", "remaining_time": "0:11:09", "throughput": 10161.84, "total_tokens": 24562728}
|
||||
{"current_steps": 232, "total_steps": 295, "loss": 0.2529, "lr": 6.85714242766749e-06, "epoch": 3.9322033898305087, "percentage": 78.64, "elapsed_time": "0:40:27", "remaining_time": "0:10:59", "throughput": 10162.84, "total_tokens": 24669264}
|
||||
{"current_steps": 233, "total_steps": 295, "loss": 0.2899, "lr": 6.654516096036231e-06, "epoch": 3.9491525423728815, "percentage": 78.98, "elapsed_time": "0:40:38", "remaining_time": "0:10:48", "throughput": 10158.56, "total_tokens": 24774256}
|
||||
{"current_steps": 234, "total_steps": 295, "loss": 0.3025, "lr": 6.4544680562968e-06, "epoch": 3.9661016949152543, "percentage": 79.32, "elapsed_time": "0:40:50", "remaining_time": "0:10:38", "throughput": 10155.56, "total_tokens": 24882752}
|
||||
{"current_steps": 235, "total_steps": 295, "loss": 0.2283, "lr": 6.25702642339244e-06, "epoch": 3.983050847457627, "percentage": 79.66, "elapsed_time": "0:41:01", "remaining_time": "0:10:28", "throughput": 10150.34, "total_tokens": 24984864}
|
||||
{"current_steps": 236, "total_steps": 295, "loss": 0.2404, "lr": 6.062218945959497e-06, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "0:41:12", "remaining_time": "0:10:18", "throughput": 10145.59, "total_tokens": 25087648}
|
||||
{"current_steps": 237, "total_steps": 295, "loss": 0.1724, "lr": 5.87007300242757e-06, "epoch": 4.016949152542373, "percentage": 80.34, "elapsed_time": "0:41:24", "remaining_time": "0:10:07", "throughput": 10142.52, "total_tokens": 25195544}
|
||||
{"current_steps": 238, "total_steps": 295, "loss": 0.1724, "lr": 5.680615597171718e-06, "epoch": 4.033898305084746, "percentage": 80.68, "elapsed_time": "0:41:35", "remaining_time": "0:09:57", "throughput": 10137.89, "total_tokens": 25298896}
|
||||
{"current_steps": 239, "total_steps": 295, "loss": 0.2116, "lr": 5.493873356717288e-06, "epoch": 4.0508474576271185, "percentage": 81.02, "elapsed_time": "0:41:46", "remaining_time": "0:09:47", "throughput": 10132.61, "total_tokens": 25400872}
|
||||
{"current_steps": 240, "total_steps": 295, "loss": 0.1622, "lr": 5.309872525997736e-06, "epoch": 4.067796610169491, "percentage": 81.36, "elapsed_time": "0:41:58", "remaining_time": "0:09:37", "throughput": 10132.02, "total_tokens": 25512440}
|
||||
{"current_steps": 241, "total_steps": 295, "loss": 0.1675, "lr": 5.128638964666166e-06, "epoch": 4.084745762711864, "percentage": 81.69, "elapsed_time": "0:42:09", "remaining_time": "0:09:26", "throughput": 10132.94, "total_tokens": 25630856}
|
||||
{"current_steps": 242, "total_steps": 295, "loss": 0.177, "lr": 4.950198143461013e-06, "epoch": 4.101694915254237, "percentage": 82.03, "elapsed_time": "0:42:20", "remaining_time": "0:09:16", "throughput": 10128.79, "total_tokens": 25735072}
|
||||
{"current_steps": 243, "total_steps": 295, "loss": 0.1777, "lr": 4.7745751406263165e-06, "epoch": 4.11864406779661, "percentage": 82.37, "elapsed_time": "0:42:31", "remaining_time": "0:09:05", "throughput": 10123.13, "total_tokens": 25828728}
|
||||
{"current_steps": 244, "total_steps": 295, "loss": 0.2315, "lr": 4.601794638387219e-06, "epoch": 4.135593220338983, "percentage": 82.71, "elapsed_time": "0:42:42", "remaining_time": "0:08:55", "throughput": 10120.38, "total_tokens": 25934056}
|
||||
{"current_steps": 245, "total_steps": 295, "loss": 0.1943, "lr": 4.43188091948113e-06, "epoch": 4.1525423728813555, "percentage": 83.05, "elapsed_time": "0:42:53", "remaining_time": "0:08:45", "throughput": 10119.2, "total_tokens": 26045936}
|
||||
{"current_steps": 246, "total_steps": 295, "loss": 0.2207, "lr": 4.264857863744956e-06, "epoch": 4.169491525423728, "percentage": 83.39, "elapsed_time": "0:43:05", "remaining_time": "0:08:34", "throughput": 10115.02, "total_tokens": 26149848}
|
||||
{"current_steps": 247, "total_steps": 295, "loss": 0.1831, "lr": 4.1007489447590365e-06, "epoch": 4.186440677966102, "percentage": 83.73, "elapsed_time": "0:43:16", "remaining_time": "0:08:24", "throughput": 10112.31, "total_tokens": 26254680}
|
||||
{"current_steps": 248, "total_steps": 295, "loss": 0.2043, "lr": 3.939577226548152e-06, "epoch": 4.203389830508475, "percentage": 84.07, "elapsed_time": "0:43:27", "remaining_time": "0:08:14", "throughput": 10107.12, "total_tokens": 26355904}
|
||||
{"current_steps": 249, "total_steps": 295, "loss": 0.1638, "lr": 3.781365360340056e-06, "epoch": 4.220338983050848, "percentage": 84.41, "elapsed_time": "0:43:38", "remaining_time": "0:08:03", "throughput": 10099.29, "total_tokens": 26449120}
|
||||
{"current_steps": 250, "total_steps": 295, "loss": 0.1457, "lr": 3.6261355813820645e-06, "epoch": 4.237288135593221, "percentage": 84.75, "elapsed_time": "0:43:50", "remaining_time": "0:07:53", "throughput": 10094.4, "total_tokens": 26551272}
|
||||
{"current_steps": 250, "total_steps": 295, "eval_loss": 0.48583686351776123, "epoch": 4.237288135593221, "percentage": 84.75, "elapsed_time": "0:43:54", "remaining_time": "0:07:54", "throughput": 10077.73, "total_tokens": 26551272}
|
||||
{"current_steps": 251, "total_steps": 295, "loss": 0.213, "lr": 3.4739097058161114e-06, "epoch": 4.254237288135593, "percentage": 85.08, "elapsed_time": "0:44:06", "remaining_time": "0:07:43", "throughput": 10074.31, "total_tokens": 26656864}
|
||||
{"current_steps": 252, "total_steps": 295, "loss": 0.1551, "lr": 3.324709127612649e-06, "epoch": 4.271186440677966, "percentage": 85.42, "elapsed_time": "0:44:17", "remaining_time": "0:07:33", "throughput": 10070.74, "total_tokens": 26761368}
|
||||
{"current_steps": 253, "total_steps": 295, "loss": 0.2033, "lr": 3.1785548155639444e-06, "epoch": 4.288135593220339, "percentage": 85.76, "elapsed_time": "0:44:28", "remaining_time": "0:07:23", "throughput": 10065.02, "total_tokens": 26860000}
|
||||
{"current_steps": 254, "total_steps": 295, "loss": 0.1509, "lr": 3.035467310337095e-06, "epoch": 4.305084745762712, "percentage": 86.1, "elapsed_time": "0:44:39", "remaining_time": "0:07:12", "throughput": 10064.15, "total_tokens": 26971656}
|
||||
{"current_steps": 255, "total_steps": 295, "loss": 0.1798, "lr": 2.895466721587245e-06, "epoch": 4.322033898305085, "percentage": 86.44, "elapsed_time": "0:44:51", "remaining_time": "0:07:02", "throughput": 10059.09, "total_tokens": 27072048}
|
||||
{"current_steps": 256, "total_steps": 295, "loss": 0.1817, "lr": 2.75857272513132e-06, "epoch": 4.338983050847458, "percentage": 86.78, "elapsed_time": "0:45:02", "remaining_time": "0:06:51", "throughput": 10056.91, "total_tokens": 27181384}
|
||||
{"current_steps": 257, "total_steps": 295, "loss": 0.2302, "lr": 2.624804560182789e-06, "epoch": 4.3559322033898304, "percentage": 87.12, "elapsed_time": "0:45:14", "remaining_time": "0:06:41", "throughput": 10052.45, "total_tokens": 27283960}
|
||||
{"current_steps": 258, "total_steps": 295, "loss": 0.1795, "lr": 2.494181026647782e-06, "epoch": 4.372881355932203, "percentage": 87.46, "elapsed_time": "0:45:25", "remaining_time": "0:06:30", "throughput": 10048.49, "total_tokens": 27387568}
|
||||
{"current_steps": 259, "total_steps": 295, "loss": 0.158, "lr": 2.3667204824828953e-06, "epoch": 4.389830508474576, "percentage": 87.8, "elapsed_time": "0:45:36", "remaining_time": "0:06:20", "throughput": 10046.12, "total_tokens": 27495760}
|
||||
{"current_steps": 260, "total_steps": 295, "loss": 0.1672, "lr": 2.2424408411151704e-06, "epoch": 4.406779661016949, "percentage": 88.14, "elapsed_time": "0:45:48", "remaining_time": "0:06:09", "throughput": 10043.4, "total_tokens": 27602880}
|
||||
{"current_steps": 261, "total_steps": 295, "loss": 0.2301, "lr": 2.1213595689245386e-06, "epoch": 4.423728813559322, "percentage": 88.47, "elapsed_time": "0:45:59", "remaining_time": "0:05:59", "throughput": 10040.99, "total_tokens": 27704640}
|
||||
{"current_steps": 262, "total_steps": 295, "loss": 0.1928, "lr": 2.00349368278904e-06, "epoch": 4.440677966101695, "percentage": 88.81, "elapsed_time": "0:46:10", "remaining_time": "0:05:48", "throughput": 10038.75, "total_tokens": 27812152}
|
||||
{"current_steps": 263, "total_steps": 295, "loss": 0.218, "lr": 1.8888597476932834e-06, "epoch": 4.4576271186440675, "percentage": 89.15, "elapsed_time": "0:46:21", "remaining_time": "0:05:38", "throughput": 10035.53, "total_tokens": 27916960}
|
||||
{"current_steps": 264, "total_steps": 295, "loss": 0.1459, "lr": 1.7774738744003927e-06, "epoch": 4.47457627118644, "percentage": 89.49, "elapsed_time": "0:46:33", "remaining_time": "0:05:28", "throughput": 10035.18, "total_tokens": 28032376}
|
||||
{"current_steps": 265, "total_steps": 295, "loss": 0.1564, "lr": 1.6693517171877533e-06, "epoch": 4.491525423728813, "percentage": 89.83, "elapsed_time": "0:46:44", "remaining_time": "0:05:17", "throughput": 10031.0, "total_tokens": 28132512}
|
||||
{"current_steps": 266, "total_steps": 295, "loss": 0.134, "lr": 1.5645084716469777e-06, "epoch": 4.508474576271187, "percentage": 90.17, "elapsed_time": "0:46:55", "remaining_time": "0:05:06", "throughput": 10027.66, "total_tokens": 28234672}
|
||||
{"current_steps": 267, "total_steps": 295, "loss": 0.1599, "lr": 1.4629588725482841e-06, "epoch": 4.52542372881356, "percentage": 90.51, "elapsed_time": "0:47:06", "remaining_time": "0:04:56", "throughput": 10025.14, "total_tokens": 28338776}
|
||||
{"current_steps": 268, "total_steps": 295, "loss": 0.1864, "lr": 1.3647171917696684e-06, "epoch": 4.5423728813559325, "percentage": 90.85, "elapsed_time": "0:47:18", "remaining_time": "0:04:45", "throughput": 10026.95, "total_tokens": 28458896}
|
||||
{"current_steps": 269, "total_steps": 295, "loss": 0.2124, "lr": 1.2697972362911064e-06, "epoch": 4.559322033898305, "percentage": 91.19, "elapsed_time": "0:47:29", "remaining_time": "0:04:35", "throughput": 10025.83, "total_tokens": 28570072}
|
||||
{"current_steps": 270, "total_steps": 295, "loss": 0.1485, "lr": 1.1782123462541178e-06, "epoch": 4.576271186440678, "percentage": 91.53, "elapsed_time": "0:47:40", "remaining_time": "0:04:24", "throughput": 10022.89, "total_tokens": 28674856}
|
||||
{"current_steps": 271, "total_steps": 295, "loss": 0.2151, "lr": 1.0899753930869394e-06, "epoch": 4.593220338983051, "percentage": 91.86, "elapsed_time": "0:47:52", "remaining_time": "0:04:14", "throughput": 10020.28, "total_tokens": 28779768}
|
||||
{"current_steps": 272, "total_steps": 295, "loss": 0.2067, "lr": 1.00509877769554e-06, "epoch": 4.610169491525424, "percentage": 92.2, "elapsed_time": "0:48:03", "remaining_time": "0:04:03", "throughput": 10017.5, "total_tokens": 28885568}
|
||||
{"current_steps": 273, "total_steps": 295, "loss": 0.1291, "lr": 9.235944287207976e-07, "epoch": 4.627118644067797, "percentage": 92.54, "elapsed_time": "0:48:14", "remaining_time": "0:03:53", "throughput": 10017.68, "total_tokens": 29000280}
|
||||
{"current_steps": 274, "total_steps": 295, "loss": 0.2032, "lr": 8.454738008620456e-07, "epoch": 4.6440677966101696, "percentage": 92.88, "elapsed_time": "0:48:26", "remaining_time": "0:03:42", "throughput": 10013.95, "total_tokens": 29100816}
|
||||
{"current_steps": 275, "total_steps": 295, "loss": 0.1878, "lr": 7.707478732671941e-07, "epoch": 4.661016949152542, "percentage": 93.22, "elapsed_time": "0:48:37", "remaining_time": "0:03:32", "throughput": 10008.1, "total_tokens": 29196936}
|
||||
{"current_steps": 275, "total_steps": 295, "eval_loss": 0.48511388897895813, "epoch": 4.661016949152542, "percentage": 93.22, "elapsed_time": "0:48:41", "remaining_time": "0:03:32", "throughput": 9993.2, "total_tokens": 29196936}
|
||||
{"current_steps": 276, "total_steps": 295, "loss": 0.2355, "lr": 6.994271479897314e-07, "epoch": 4.677966101694915, "percentage": 93.56, "elapsed_time": "0:48:53", "remaining_time": "0:03:21", "throughput": 9993.06, "total_tokens": 29310880}
|
||||
{"current_steps": 277, "total_steps": 295, "loss": 0.1741, "lr": 6.315216485127506e-07, "epoch": 4.694915254237288, "percentage": 93.9, "elapsed_time": "0:49:04", "remaining_time": "0:03:11", "throughput": 9992.8, "total_tokens": 29421104}
|
||||
{"current_steps": 278, "total_steps": 295, "loss": 0.2569, "lr": 5.670409183402364e-07, "epoch": 4.711864406779661, "percentage": 94.24, "elapsed_time": "0:49:15", "remaining_time": "0:03:00", "throughput": 9989.38, "total_tokens": 29524408}
|
||||
{"current_steps": 279, "total_steps": 295, "loss": 0.2181, "lr": 5.059940196558088e-07, "epoch": 4.728813559322034, "percentage": 94.58, "elapsed_time": "0:49:26", "remaining_time": "0:02:50", "throughput": 9986.21, "total_tokens": 29624776}
|
||||
{"current_steps": 280, "total_steps": 295, "loss": 0.2237, "lr": 4.4838953204912326e-07, "epoch": 4.745762711864407, "percentage": 94.92, "elapsed_time": "0:49:37", "remaining_time": "0:02:39", "throughput": 9982.9, "total_tokens": 29728640}
|
||||
{"current_steps": 281, "total_steps": 295, "loss": 0.2027, "lr": 3.9423555131007925e-07, "epoch": 4.762711864406779, "percentage": 95.25, "elapsed_time": "0:49:49", "remaining_time": "0:02:28", "throughput": 9979.58, "total_tokens": 29832320}
|
||||
{"current_steps": 282, "total_steps": 295, "loss": 0.1866, "lr": 3.435396882910391e-07, "epoch": 4.779661016949152, "percentage": 95.59, "elapsed_time": "0:50:00", "remaining_time": "0:02:18", "throughput": 9977.21, "total_tokens": 29938136}
|
||||
{"current_steps": 283, "total_steps": 295, "loss": 0.1648, "lr": 2.963090678371805e-07, "epoch": 4.796610169491525, "percentage": 95.93, "elapsed_time": "0:50:11", "remaining_time": "0:02:07", "throughput": 9973.4, "total_tokens": 30038384}
|
||||
{"current_steps": 284, "total_steps": 295, "loss": 0.1827, "lr": 2.5255032778517264e-07, "epoch": 4.813559322033898, "percentage": 96.27, "elapsed_time": "0:50:23", "remaining_time": "0:01:57", "throughput": 9974.19, "total_tokens": 30155184}
|
||||
{"current_steps": 285, "total_steps": 295, "loss": 0.194, "lr": 2.1226961803028632e-07, "epoch": 4.830508474576272, "percentage": 96.61, "elapsed_time": "0:50:34", "remaining_time": "0:01:46", "throughput": 9971.73, "total_tokens": 30258792}
|
||||
{"current_steps": 286, "total_steps": 295, "loss": 0.197, "lr": 1.7547259966207708e-07, "epoch": 4.847457627118644, "percentage": 96.95, "elapsed_time": "0:50:45", "remaining_time": "0:01:35", "throughput": 9968.09, "total_tokens": 30360016}
|
||||
{"current_steps": 287, "total_steps": 295, "loss": 0.1479, "lr": 1.4216444416877695e-07, "epoch": 4.864406779661017, "percentage": 97.29, "elapsed_time": "0:50:57", "remaining_time": "0:01:25", "throughput": 9963.14, "total_tokens": 30457880}
|
||||
{"current_steps": 288, "total_steps": 295, "loss": 0.1922, "lr": 1.1234983271048161e-07, "epoch": 4.88135593220339, "percentage": 97.63, "elapsed_time": "0:51:08", "remaining_time": "0:01:14", "throughput": 9960.21, "total_tokens": 30561968}
|
||||
{"current_steps": 289, "total_steps": 295, "loss": 0.2168, "lr": 8.603295546126821e-08, "epoch": 4.898305084745763, "percentage": 97.97, "elapsed_time": "0:51:19", "remaining_time": "0:01:03", "throughput": 9958.05, "total_tokens": 30666960}
|
||||
{"current_steps": 290, "total_steps": 295, "loss": 0.1848, "lr": 6.321751102028595e-08, "epoch": 4.915254237288136, "percentage": 98.31, "elapsed_time": "0:51:31", "remaining_time": "0:00:53", "throughput": 9960.64, "total_tokens": 30789448}
|
||||
{"current_steps": 291, "total_steps": 295, "loss": 0.1965, "lr": 4.390670589196622e-08, "epoch": 4.932203389830509, "percentage": 98.64, "elapsed_time": "0:51:42", "remaining_time": "0:00:42", "throughput": 9963.05, "total_tokens": 30910944}
|
||||
{"current_steps": 292, "total_steps": 295, "loss": 0.2339, "lr": 2.8103254035369285e-08, "epoch": 4.9491525423728815, "percentage": 98.98, "elapsed_time": "0:51:54", "remaining_time": "0:00:31", "throughput": 9963.03, "total_tokens": 31025152}
|
||||
{"current_steps": 293, "total_steps": 295, "loss": 0.1937, "lr": 1.5809376482767147e-08, "epoch": 4.966101694915254, "percentage": 99.32, "elapsed_time": "0:52:05", "remaining_time": "0:00:21", "throughput": 9962.07, "total_tokens": 31135592}
|
||||
{"current_steps": 294, "total_steps": 295, "loss": 0.1793, "lr": 7.0268010274959775e-09, "epoch": 4.983050847457627, "percentage": 99.66, "elapsed_time": "0:52:16", "remaining_time": "0:00:10", "throughput": 9961.48, "total_tokens": 31247744}
|
||||
{"current_steps": 295, "total_steps": 295, "loss": 0.1822, "lr": 1.7567619811281744e-09, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:52:28", "remaining_time": "0:00:00", "throughput": 9961.82, "total_tokens": 31362728}
|
||||
{"current_steps": 295, "total_steps": 295, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:54:24", "remaining_time": "0:00:00", "throughput": 9606.94, "total_tokens": 31362728}
|
||||
Reference in New Issue
Block a user