Files
OpenThoughts3-greedy-groups…/trainer_log.jsonl

102 lines
18 KiB
Plaintext
Raw Normal View History

{"current_steps": 1, "total_steps": 100, "loss": 1.3158, "lr": 0.00015996052482925854, "epoch": 0.02, "percentage": 1.0, "elapsed_time": "0:02:33", "remaining_time": "4:13:49"}
{"current_steps": 2, "total_steps": 100, "loss": 3.9659, "lr": 0.00015984213827426174, "epoch": 0.04, "percentage": 2.0, "elapsed_time": "0:05:05", "remaining_time": "4:09:24"}
{"current_steps": 3, "total_steps": 100, "loss": 2.6879, "lr": 0.0001596449571682464, "epoch": 0.06, "percentage": 3.0, "elapsed_time": "0:07:36", "remaining_time": "4:05:47"}
{"current_steps": 4, "total_steps": 100, "loss": 2.0717, "lr": 0.00015936917610515826, "epoch": 0.08, "percentage": 4.0, "elapsed_time": "0:10:07", "remaining_time": "4:03:07"}
{"current_steps": 5, "total_steps": 100, "loss": 1.9221, "lr": 0.00015901506724761103, "epoch": 0.1, "percentage": 5.0, "elapsed_time": "0:12:38", "remaining_time": "4:00:14"}
{"current_steps": 6, "total_steps": 100, "loss": 1.7919, "lr": 0.00015858298005829512, "epoch": 0.12, "percentage": 6.0, "elapsed_time": "0:15:10", "remaining_time": "3:57:39"}
{"current_steps": 7, "total_steps": 100, "loss": 1.6514, "lr": 0.0001580733409550998, "epoch": 0.14, "percentage": 7.0, "elapsed_time": "0:17:40", "remaining_time": "3:54:55"}
{"current_steps": 8, "total_steps": 100, "loss": 1.561, "lr": 0.0001574866528902905, "epoch": 0.16, "percentage": 8.0, "elapsed_time": "0:20:10", "remaining_time": "3:51:59"}
{"current_steps": 9, "total_steps": 100, "loss": 1.4794, "lr": 0.00015682349485415545, "epoch": 0.18, "percentage": 9.0, "elapsed_time": "0:22:41", "remaining_time": "3:49:24"}
{"current_steps": 10, "total_steps": 100, "loss": 1.4122, "lr": 0.0001560845213036123, "epoch": 0.2, "percentage": 10.0, "elapsed_time": "0:25:11", "remaining_time": "3:46:46"}
{"current_steps": 11, "total_steps": 100, "loss": 1.4144, "lr": 0.00015527046151633805, "epoch": 0.22, "percentage": 11.0, "elapsed_time": "0:27:41", "remaining_time": "3:43:59"}
{"current_steps": 12, "total_steps": 100, "loss": 1.3658, "lr": 0.00015438211887106013, "epoch": 0.24, "percentage": 12.0, "elapsed_time": "0:30:11", "remaining_time": "3:41:24"}
{"current_steps": 13, "total_steps": 100, "loss": 1.3555, "lr": 0.0001534203700547185, "epoch": 0.26, "percentage": 13.0, "elapsed_time": "0:32:40", "remaining_time": "3:38:37"}
{"current_steps": 14, "total_steps": 100, "loss": 1.3031, "lr": 0.00015238616419728157, "epoch": 0.28, "percentage": 14.0, "elapsed_time": "0:35:08", "remaining_time": "3:35:50"}
{"current_steps": 15, "total_steps": 100, "loss": 1.2992, "lr": 0.00015128052193506944, "epoch": 0.3, "percentage": 15.0, "elapsed_time": "0:37:37", "remaining_time": "3:33:14"}
{"current_steps": 16, "total_steps": 100, "loss": 1.3053, "lr": 0.0001501045344035091, "epoch": 0.32, "percentage": 16.0, "elapsed_time": "0:40:06", "remaining_time": "3:30:33"}
{"current_steps": 17, "total_steps": 100, "loss": 1.2677, "lr": 0.0001488593621603155, "epoch": 0.34, "percentage": 17.0, "elapsed_time": "0:42:34", "remaining_time": "3:27:53"}
{"current_steps": 18, "total_steps": 100, "loss": 1.243, "lr": 0.00014754623404016122, "epoch": 0.36, "percentage": 18.0, "elapsed_time": "0:45:02", "remaining_time": "3:25:11"}
{"current_steps": 19, "total_steps": 100, "loss": 1.2565, "lr": 0.00014616644594196495, "epoch": 0.38, "percentage": 19.0, "elapsed_time": "0:47:30", "remaining_time": "3:22:32"}
{"current_steps": 20, "total_steps": 100, "loss": 1.2198, "lr": 0.00014472135954999581, "epoch": 0.4, "percentage": 20.0, "elapsed_time": "0:49:58", "remaining_time": "3:19:52"}
{"current_steps": 21, "total_steps": 100, "loss": 1.2149, "lr": 0.00014321240099005524, "epoch": 0.42, "percentage": 21.0, "elapsed_time": "0:52:24", "remaining_time": "3:17:08"}
{"current_steps": 22, "total_steps": 100, "loss": 1.2114, "lr": 0.00014164105942206316, "epoch": 0.44, "percentage": 22.0, "elapsed_time": "0:54:52", "remaining_time": "3:14:31"}
{"current_steps": 23, "total_steps": 100, "loss": 1.2158, "lr": 0.00014000888557043678, "epoch": 0.46, "percentage": 23.0, "elapsed_time": "0:57:20", "remaining_time": "3:11:58"}
{"current_steps": 24, "total_steps": 100, "loss": 1.2238, "lr": 0.00013831749019371293, "epoch": 0.48, "percentage": 24.0, "elapsed_time": "0:59:49", "remaining_time": "3:09:27"}
{"current_steps": 25, "total_steps": 100, "loss": 1.2145, "lr": 0.00013656854249492382, "epoch": 0.5, "percentage": 25.0, "elapsed_time": "1:02:17", "remaining_time": "3:06:51"}
{"current_steps": 26, "total_steps": 100, "loss": 1.2081, "lr": 0.00013476376847429511, "epoch": 0.52, "percentage": 26.0, "elapsed_time": "1:04:45", "remaining_time": "3:04:17"}
{"current_steps": 27, "total_steps": 100, "loss": 1.1975, "lr": 0.00013290494922589216, "epoch": 0.54, "percentage": 27.0, "elapsed_time": "1:07:12", "remaining_time": "3:01:43"}
{"current_steps": 28, "total_steps": 100, "loss": 1.1867, "lr": 0.0001309939191798952, "epoch": 0.56, "percentage": 28.0, "elapsed_time": "1:09:39", "remaining_time": "2:59:07"}
{"current_steps": 29, "total_steps": 100, "loss": 1.1566, "lr": 0.00012903256429223813, "epoch": 0.58, "percentage": 29.0, "elapsed_time": "1:12:04", "remaining_time": "2:56:27"}
{"current_steps": 30, "total_steps": 100, "loss": 1.1663, "lr": 0.00012702282018339786, "epoch": 0.6, "percentage": 30.0, "elapsed_time": "1:14:31", "remaining_time": "2:53:53"}
{"current_steps": 31, "total_steps": 100, "loss": 1.1654, "lr": 0.00012496667022817044, "epoch": 0.62, "percentage": 31.0, "elapsed_time": "1:16:59", "remaining_time": "2:51:22"}
{"current_steps": 32, "total_steps": 100, "loss": 1.1489, "lr": 0.00012286614359831974, "epoch": 0.64, "percentage": 32.0, "elapsed_time": "1:19:26", "remaining_time": "2:48:48"}
{"current_steps": 33, "total_steps": 100, "loss": 1.18, "lr": 0.00012072331326002972, "epoch": 0.66, "percentage": 33.0, "elapsed_time": "1:21:52", "remaining_time": "2:46:13"}
{"current_steps": 34, "total_steps": 100, "loss": 1.139, "lr": 0.00011854029392813723, "epoch": 0.68, "percentage": 34.0, "elapsed_time": "1:24:17", "remaining_time": "2:43:38"}
{"current_steps": 35, "total_steps": 100, "loss": 1.1694, "lr": 0.00011631923997916375, "epoch": 0.7, "percentage": 35.0, "elapsed_time": "1:26:45", "remaining_time": "2:41:06"}
{"current_steps": 36, "total_steps": 100, "loss": 1.1404, "lr": 0.00011406234332520582, "epoch": 0.72, "percentage": 36.0, "elapsed_time": "1:29:10", "remaining_time": "2:38:32"}
{"current_steps": 37, "total_steps": 100, "loss": 1.1311, "lr": 0.00011177183125078245, "epoch": 0.74, "percentage": 37.0, "elapsed_time": "1:31:37", "remaining_time": "2:35:59"}
{"current_steps": 38, "total_steps": 100, "loss": 1.1536, "lr": 0.00010944996421477426, "epoch": 0.76, "percentage": 38.0, "elapsed_time": "1:34:03", "remaining_time": "2:33:27"}
{"current_steps": 39, "total_steps": 100, "loss": 1.137, "lr": 0.00010709903361962333, "epoch": 0.78, "percentage": 39.0, "elapsed_time": "1:36:26", "remaining_time": "2:30:51"}
{"current_steps": 40, "total_steps": 100, "loss": 1.1247, "lr": 0.0001047213595499958, "epoch": 0.8, "percentage": 40.0, "elapsed_time": "1:38:52", "remaining_time": "2:28:18"}
{"current_steps": 41, "total_steps": 100, "loss": 1.1189, "lr": 0.00010231928848313836, "epoch": 0.82, "percentage": 41.0, "elapsed_time": "1:41:15", "remaining_time": "2:25:42"}
{"current_steps": 42, "total_steps": 100, "loss": 1.1254, "lr": 9.989519097318841e-05, "epoch": 0.84, "percentage": 42.0, "elapsed_time": "1:43:40", "remaining_time": "2:23:10"}
{"current_steps": 43, "total_steps": 100, "loss": 1.0805, "lr": 9.745145931172342e-05, "epoch": 0.86, "percentage": 43.0, "elapsed_time": "1:46:05", "remaining_time": "2:20:38"}
{"current_steps": 44, "total_steps": 100, "loss": 1.1303, "lr": 9.4990505166858e-05, "epoch": 0.88, "percentage": 44.0, "elapsed_time": "1:48:29", "remaining_time": "2:18:04"}
{"current_steps": 45, "total_steps": 100, "loss": 1.1149, "lr": 9.251475720321848e-05, "epoch": 0.9, "percentage": 45.0, "elapsed_time": "1:50:53", "remaining_time": "2:15:32"}
{"current_steps": 46, "total_steps": 100, "loss": 1.1049, "lr": 9.002665868514435e-05, "epoch": 0.92, "percentage": 46.0, "elapsed_time": "1:53:16", "remaining_time": "2:12:58"}
{"current_steps": 47, "total_steps": 100, "loss": 1.0833, "lr": 8.752866506548117e-05, "epoch": 0.94, "percentage": 47.0, "elapsed_time": "1:55:42", "remaining_time": "2:10:28"}
{"current_steps": 48, "total_steps": 100, "loss": 1.0629, "lr": 8.502324156234508e-05, "epoch": 0.96, "percentage": 48.0, "elapsed_time": "1:58:06", "remaining_time": "2:07:56"}
{"current_steps": 49, "total_steps": 100, "loss": 1.1009, "lr": 8.251286072625027e-05, "epoch": 0.98, "percentage": 49.0, "elapsed_time": "2:00:28", "remaining_time": "2:05:23"}
{"current_steps": 50, "total_steps": 100, "loss": 1.07, "lr": 8e-05, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "2:02:51", "remaining_time": "2:02:51"}
{"current_steps": 51, "total_steps": 100, "loss": 1.113, "lr": 7.748713927374974e-05, "epoch": 1.02, "percentage": 51.0, "elapsed_time": "2:06:45", "remaining_time": "2:01:47"}
{"current_steps": 52, "total_steps": 100, "loss": 1.2082, "lr": 7.497675843765493e-05, "epoch": 1.04, "percentage": 52.0, "elapsed_time": "2:09:17", "remaining_time": "1:59:20"}
{"current_steps": 53, "total_steps": 100, "loss": 1.1663, "lr": 7.247133493451886e-05, "epoch": 1.06, "percentage": 53.0, "elapsed_time": "2:11:48", "remaining_time": "1:56:52"}
{"current_steps": 54, "total_steps": 100, "loss": 1.195, "lr": 6.997334131485565e-05, "epoch": 1.08, "percentage": 54.0, "elapsed_time": "2:14:20", "remaining_time": "1:54:26"}
{"current_steps": 55, "total_steps": 100, "loss": 1.1827, "lr": 6.748524279678152e-05, "epoch": 1.1, "percentage": 55.0, "elapsed_time": "2:16:52", "remaining_time": "1:51:59"}
{"current_steps": 56, "total_steps": 100, "loss": 1.2075, "lr": 6.500949483314202e-05, "epoch": 1.12, "percentage": 56.0, "elapsed_time": "2:19:24", "remaining_time": "1:49:31"}
{"current_steps": 57, "total_steps": 100, "loss": 1.1787, "lr": 6.254854068827662e-05, "epoch": 1.1400000000000001, "percentage": 57.0, "elapsed_time": "2:21:55", "remaining_time": "1:47:03"}
{"current_steps": 58, "total_steps": 100, "loss": 1.1946, "lr": 6.0104809026811634e-05, "epoch": 1.16, "percentage": 58.0, "elapsed_time": "2:24:24", "remaining_time": "1:44:34"}
{"current_steps": 59, "total_steps": 100, "loss": 1.1632, "lr": 5.7680711516861674e-05, "epoch": 1.18, "percentage": 59.0, "elapsed_time": "2:26:55", "remaining_time": "1:42:05"}
{"current_steps": 60, "total_steps": 100, "loss": 1.15, "lr": 5.5278640450004216e-05, "epoch": 1.2, "percentage": 60.0, "elapsed_time": "2:29:25", "remaining_time": "1:39:37"}
{"current_steps": 61, "total_steps": 100, "loss": 1.1817, "lr": 5.2900966380376693e-05, "epoch": 1.22, "percentage": 61.0, "elapsed_time": "2:31:54", "remaining_time": "1:37:07"}
{"current_steps": 62, "total_steps": 100, "loss": 1.1487, "lr": 5.055003578522577e-05, "epoch": 1.24, "percentage": 62.0, "elapsed_time": "2:34:25", "remaining_time": "1:34:38"}
{"current_steps": 63, "total_steps": 100, "loss": 1.1649, "lr": 4.822816874921756e-05, "epoch": 1.26, "percentage": 63.0, "elapsed_time": "2:36:53", "remaining_time": "1:32:08"}
{"current_steps": 64, "total_steps": 100, "loss": 1.1325, "lr": 4.593765667479419e-05, "epoch": 1.28, "percentage": 64.0, "elapsed_time": "2:39:21", "remaining_time": "1:29:38"}
{"current_steps": 65, "total_steps": 100, "loss": 1.1281, "lr": 4.3680760020836266e-05, "epoch": 1.3, "percentage": 65.0, "elapsed_time": "2:41:51", "remaining_time": "1:27:09"}
{"current_steps": 66, "total_steps": 100, "loss": 1.1476, "lr": 4.145970607186277e-05, "epoch": 1.32, "percentage": 66.0, "elapsed_time": "2:44:19", "remaining_time": "1:24:39"}
{"current_steps": 67, "total_steps": 100, "loss": 1.1233, "lr": 3.92766867399703e-05, "epoch": 1.34, "percentage": 67.0, "elapsed_time": "2:46:48", "remaining_time": "1:22:09"}
{"current_steps": 68, "total_steps": 100, "loss": 1.1051, "lr": 3.7133856401680256e-05, "epoch": 1.3599999999999999, "percentage": 68.0, "elapsed_time": "2:49:16", "remaining_time": "1:19:39"}
{"current_steps": 69, "total_steps": 100, "loss": 1.1266, "lr": 3.5033329771829576e-05, "epoch": 1.38, "percentage": 69.0, "elapsed_time": "2:51:44", "remaining_time": "1:17:09"}
{"current_steps": 70, "total_steps": 100, "loss": 1.0983, "lr": 3.297717981660216e-05, "epoch": 1.4, "percentage": 70.0, "elapsed_time": "2:54:12", "remaining_time": "1:14:39"}
{"current_steps": 71, "total_steps": 100, "loss": 1.0976, "lr": 3.09674357077619e-05, "epoch": 1.42, "percentage": 71.0, "elapsed_time": "2:56:38", "remaining_time": "1:12:09"}
{"current_steps": 72, "total_steps": 100, "loss": 1.0988, "lr": 2.9006080820104823e-05, "epoch": 1.44, "percentage": 72.0, "elapsed_time": "2:59:06", "remaining_time": "1:09:39"}
{"current_steps": 73, "total_steps": 100, "loss": 1.1055, "lr": 2.7095050774107867e-05, "epoch": 1.46, "percentage": 73.0, "elapsed_time": "3:01:34", "remaining_time": "1:07:09"}
{"current_steps": 74, "total_steps": 100, "loss": 1.117, "lr": 2.5236231525704902e-05, "epoch": 1.48, "percentage": 74.0, "elapsed_time": "3:04:03", "remaining_time": "1:04:40"}
{"current_steps": 75, "total_steps": 100, "loss": 1.1131, "lr": 2.3431457505076205e-05, "epoch": 1.5, "percentage": 75.0, "elapsed_time": "3:06:30", "remaining_time": "1:02:10"}
{"current_steps": 76, "total_steps": 100, "loss": 1.1111, "lr": 2.1682509806287094e-05, "epoch": 1.52, "percentage": 76.0, "elapsed_time": "3:08:58", "remaining_time": "0:59:40"}
{"current_steps": 77, "total_steps": 100, "loss": 1.1045, "lr": 1.9991114429563236e-05, "epoch": 1.54, "percentage": 77.0, "elapsed_time": "3:11:26", "remaining_time": "0:57:11"}
{"current_steps": 78, "total_steps": 100, "loss": 1.0936, "lr": 1.835894057793687e-05, "epoch": 1.56, "percentage": 78.0, "elapsed_time": "3:13:53", "remaining_time": "0:54:41"}
{"current_steps": 79, "total_steps": 100, "loss": 1.0665, "lr": 1.678759900994477e-05, "epoch": 1.58, "percentage": 79.0, "elapsed_time": "3:16:18", "remaining_time": "0:52:11"}
{"current_steps": 80, "total_steps": 100, "loss": 1.0794, "lr": 1.5278640450004213e-05, "epoch": 1.6, "percentage": 80.0, "elapsed_time": "3:18:46", "remaining_time": "0:49:41"}
{"current_steps": 81, "total_steps": 100, "loss": 1.084, "lr": 1.3833554058035045e-05, "epoch": 1.62, "percentage": 81.0, "elapsed_time": "3:21:14", "remaining_time": "0:47:12"}
{"current_steps": 82, "total_steps": 100, "loss": 1.0694, "lr": 1.2453765959838813e-05, "epoch": 1.6400000000000001, "percentage": 82.0, "elapsed_time": "3:23:41", "remaining_time": "0:44:42"}
{"current_steps": 83, "total_steps": 100, "loss": 1.0983, "lr": 1.1140637839684519e-05, "epoch": 1.6600000000000001, "percentage": 83.0, "elapsed_time": "3:26:07", "remaining_time": "0:42:13"}
{"current_steps": 84, "total_steps": 100, "loss": 1.0624, "lr": 9.895465596490931e-06, "epoch": 1.6800000000000002, "percentage": 84.0, "elapsed_time": "3:28:32", "remaining_time": "0:39:43"}
{"current_steps": 85, "total_steps": 100, "loss": 1.0937, "lr": 8.719478064930578e-06, "epoch": 1.7, "percentage": 85.0, "elapsed_time": "3:31:00", "remaining_time": "0:37:14"}
{"current_steps": 86, "total_steps": 100, "loss": 1.07, "lr": 7.613835802718452e-06, "epoch": 1.72, "percentage": 86.0, "elapsed_time": "3:33:25", "remaining_time": "0:34:44"}
{"current_steps": 87, "total_steps": 100, "loss": 1.0622, "lr": 6.579629945281509e-06, "epoch": 1.74, "percentage": 87.0, "elapsed_time": "3:35:52", "remaining_time": "0:32:15"}
{"current_steps": 88, "total_steps": 100, "loss": 1.0805, "lr": 5.6178811289398925e-06, "epoch": 1.76, "percentage": 88.0, "elapsed_time": "3:38:18", "remaining_time": "0:29:46"}
{"current_steps": 89, "total_steps": 100, "loss": 1.0685, "lr": 4.729538483661964e-06, "epoch": 1.78, "percentage": 89.0, "elapsed_time": "3:40:41", "remaining_time": "0:27:16"}
{"current_steps": 90, "total_steps": 100, "loss": 1.0612, "lr": 3.915478696387718e-06, "epoch": 1.8, "percentage": 90.0, "elapsed_time": "3:43:07", "remaining_time": "0:24:47"}
{"current_steps": 91, "total_steps": 100, "loss": 1.053, "lr": 3.176505145844555e-06, "epoch": 1.8199999999999998, "percentage": 91.0, "elapsed_time": "3:45:30", "remaining_time": "0:22:18"}
{"current_steps": 92, "total_steps": 100, "loss": 1.0649, "lr": 2.513347109709514e-06, "epoch": 1.8399999999999999, "percentage": 92.0, "elapsed_time": "3:47:55", "remaining_time": "0:19:49"}
{"current_steps": 93, "total_steps": 100, "loss": 1.024, "lr": 1.9266590449002052e-06, "epoch": 1.8599999999999999, "percentage": 93.0, "elapsed_time": "3:50:21", "remaining_time": "0:17:20"}
{"current_steps": 94, "total_steps": 100, "loss": 1.0724, "lr": 1.4170199417049114e-06, "epoch": 1.88, "percentage": 94.0, "elapsed_time": "3:52:44", "remaining_time": "0:14:51"}
{"current_steps": 95, "total_steps": 100, "loss": 1.0594, "lr": 9.849327523889873e-07, "epoch": 1.9, "percentage": 95.0, "elapsed_time": "3:55:09", "remaining_time": "0:12:22"}
{"current_steps": 96, "total_steps": 100, "loss": 1.0471, "lr": 6.308238948417788e-07, "epoch": 1.92, "percentage": 96.0, "elapsed_time": "3:57:31", "remaining_time": "0:09:53"}
{"current_steps": 97, "total_steps": 100, "loss": 1.0315, "lr": 3.550428317536003e-07, "epoch": 1.94, "percentage": 97.0, "elapsed_time": "3:59:57", "remaining_time": "0:07:25"}
{"current_steps": 98, "total_steps": 100, "loss": 1.0129, "lr": 1.578617257382753e-07, "epoch": 1.96, "percentage": 98.0, "elapsed_time": "4:02:21", "remaining_time": "0:04:56"}
{"current_steps": 99, "total_steps": 100, "loss": 1.0481, "lr": 3.9475170741472005e-08, "epoch": 1.98, "percentage": 99.0, "elapsed_time": "4:04:43", "remaining_time": "0:02:28"}
{"current_steps": 100, "total_steps": 100, "loss": 1.0204, "lr": 0.0, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "4:07:07", "remaining_time": "0:00:00"}
{"current_steps": 100, "total_steps": 100, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "4:08:03", "remaining_time": "0:00:00"}