Files
ModelHub XC 070e055bf5 初始化项目,由ModelHub XC社区提供模型
Model: divakar-yadav/transformer-1b-chat
Source: Original Platform
2026-06-20 17:27:58 +08:00

325 lines
29 KiB
JSON

{"step": 10, "epoch": 1, "loss": 2.1095, "lr": 9.000000000000001e-07, "elapsed_s": 11.3}
{"step": 20, "epoch": 1, "loss": 1.8581, "lr": 1.9e-06, "elapsed_s": 20.6}
{"step": 30, "epoch": 1, "loss": 1.6729, "lr": 2.9e-06, "elapsed_s": 30.0}
{"step": 40, "epoch": 1, "loss": 1.6325, "lr": 3.900000000000001e-06, "elapsed_s": 39.3}
{"step": 50, "epoch": 1, "loss": 1.5802, "lr": 4.9000000000000005e-06, "elapsed_s": 48.7}
{"step": 60, "epoch": 1, "loss": 1.5845, "lr": 5.9e-06, "elapsed_s": 58.1}
{"step": 70, "epoch": 1, "loss": 1.5295, "lr": 6.900000000000001e-06, "elapsed_s": 67.4}
{"step": 80, "epoch": 1, "loss": 1.5132, "lr": 7.9e-06, "elapsed_s": 76.8}
{"step": 90, "epoch": 1, "loss": 1.5379, "lr": 8.900000000000001e-06, "elapsed_s": 86.2}
{"step": 100, "epoch": 1, "loss": 1.4603, "lr": 9.9e-06, "elapsed_s": 95.5}
{"step": 110, "epoch": 1, "loss": 1.468, "lr": 1.09e-05, "elapsed_s": 104.9}
{"step": 120, "epoch": 1, "loss": 1.4928, "lr": 1.1900000000000001e-05, "elapsed_s": 114.3}
{"step": 130, "epoch": 1, "loss": 1.4656, "lr": 1.2900000000000002e-05, "elapsed_s": 123.6}
{"step": 140, "epoch": 1, "loss": 1.424, "lr": 1.3900000000000002e-05, "elapsed_s": 133.0}
{"step": 150, "epoch": 1, "loss": 1.4656, "lr": 1.4900000000000003e-05, "elapsed_s": 142.4}
{"step": 160, "epoch": 1, "loss": 1.3754, "lr": 1.59e-05, "elapsed_s": 151.7}
{"step": 170, "epoch": 1, "loss": 1.4413, "lr": 1.69e-05, "elapsed_s": 161.1}
{"step": 180, "epoch": 1, "loss": 1.4202, "lr": 1.79e-05, "elapsed_s": 170.4}
{"step": 190, "epoch": 1, "loss": 1.4522, "lr": 1.8900000000000002e-05, "elapsed_s": 179.8}
{"step": 200, "epoch": 1, "loss": 1.3809, "lr": 1.99e-05, "elapsed_s": 189.2}
{"step": 210, "epoch": 1, "loss": 1.4069, "lr": 1.9999612774242138e-05, "elapsed_s": 198.5}
{"step": 220, "epoch": 1, "loss": 1.4143, "lr": 1.9998274258845686e-05, "elapsed_s": 207.9}
{"step": 230, "epoch": 1, "loss": 1.4054, "lr": 1.9995979815408517e-05, "elapsed_s": 217.3}
{"step": 240, "epoch": 1, "loss": 1.4373, "lr": 1.9992729687679906e-05, "elapsed_s": 226.6}
{"step": 250, "epoch": 1, "loss": 1.3924, "lr": 1.9988524220935858e-05, "elapsed_s": 236.0}
{"step": 260, "epoch": 1, "loss": 1.4041, "lr": 1.9983363861942443e-05, "elapsed_s": 245.4}
{"step": 270, "epoch": 1, "loss": 1.3964, "lr": 1.997724915890832e-05, "elapsed_s": 254.7}
{"step": 280, "epoch": 1, "loss": 1.3783, "lr": 1.9970180761426505e-05, "elapsed_s": 264.1}
{"step": 290, "epoch": 1, "loss": 1.3313, "lr": 1.996215942040535e-05, "elapsed_s": 273.4}
{"step": 300, "epoch": 1, "loss": 1.3576, "lr": 1.995318598798879e-05, "elapsed_s": 282.8}
{"step": 310, "epoch": 1, "loss": 1.3684, "lr": 1.9943261417465805e-05, "elapsed_s": 292.1}
{"step": 320, "epoch": 1, "loss": 1.3309, "lr": 1.9932386763169144e-05, "elapsed_s": 301.5}
{"step": 330, "epoch": 1, "loss": 1.3728, "lr": 1.9920563180363322e-05, "elapsed_s": 310.9}
{"step": 340, "epoch": 1, "loss": 1.4069, "lr": 1.9907791925121902e-05, "elapsed_s": 320.2}
{"step": 350, "epoch": 1, "loss": 1.3658, "lr": 1.9894074354194032e-05, "elapsed_s": 329.6}
{"step": 360, "epoch": 1, "loss": 1.3596, "lr": 1.987941192486034e-05, "elapsed_s": 339.0}
{"step": 370, "epoch": 1, "loss": 1.3535, "lr": 1.986380619477809e-05, "elapsed_s": 348.3}
{"step": 380, "epoch": 1, "loss": 1.3938, "lr": 1.984725882181574e-05, "elapsed_s": 357.7}
{"step": 390, "epoch": 1, "loss": 1.3241, "lr": 1.9829771563876787e-05, "elapsed_s": 367.0}
{"step": 400, "epoch": 1, "loss": 1.3316, "lr": 1.9811346278713027e-05, "elapsed_s": 376.4}
{"step": 410, "epoch": 1, "loss": 1.3579, "lr": 1.9791984923727213e-05, "elapsed_s": 385.8}
{"step": 420, "epoch": 1, "loss": 1.3888, "lr": 1.9771689555765092e-05, "elapsed_s": 395.1}
{"step": 430, "epoch": 1, "loss": 1.3647, "lr": 1.97504623308969e-05, "elapsed_s": 404.5}
{"step": 440, "epoch": 1, "loss": 1.395, "lr": 1.9728305504188318e-05, "elapsed_s": 413.8}
{"step": 450, "epoch": 1, "loss": 1.3856, "lr": 1.9705221429460907e-05, "elapsed_s": 423.2}
{"step": 460, "epoch": 1, "loss": 1.2899, "lr": 1.9681212559042047e-05, "elapsed_s": 432.6}
{"step": 470, "epoch": 1, "loss": 1.2937, "lr": 1.9656281443504413e-05, "elapsed_s": 441.9}
{"step": 480, "epoch": 1, "loss": 1.3547, "lr": 1.963043073139502e-05, "elapsed_s": 451.3}
{"step": 490, "epoch": 1, "loss": 1.341, "lr": 1.9603663168953853e-05, "elapsed_s": 460.7}
{"step": 500, "epoch": 1, "loss": 1.3235, "lr": 1.9575981599822124e-05, "elapsed_s": 470.0}
{"step": 510, "epoch": 1, "loss": 1.3391, "lr": 1.9547388964740182e-05, "elapsed_s": 490.7}
{"step": 520, "epoch": 1, "loss": 1.3466, "lr": 1.951788830123509e-05, "elapsed_s": 500.1}
{"step": 530, "epoch": 1, "loss": 1.3823, "lr": 1.9487482743297954e-05, "elapsed_s": 509.5}
{"step": 540, "epoch": 1, "loss": 1.3413, "lr": 1.945617552105097e-05, "elapsed_s": 518.8}
{"step": 550, "epoch": 1, "loss": 1.3231, "lr": 1.9423969960404283e-05, "elapsed_s": 528.2}
{"step": 560, "epoch": 1, "loss": 1.3231, "lr": 1.939086948270265e-05, "elapsed_s": 537.5}
{"step": 570, "epoch": 1, "loss": 1.3111, "lr": 1.9356877604361987e-05, "elapsed_s": 546.9}
{"step": 580, "epoch": 1, "loss": 1.3378, "lr": 1.9321997936495792e-05, "elapsed_s": 556.2}
{"step": 590, "epoch": 1, "loss": 1.3231, "lr": 1.9286234184531536e-05, "elapsed_s": 565.6}
{"step": 600, "epoch": 1, "loss": 1.363, "lr": 1.924959014781699e-05, "elapsed_s": 575.0}
{"step": 610, "epoch": 1, "loss": 1.2943, "lr": 1.9212069719216638e-05, "elapsed_s": 584.3}
{"step": 620, "epoch": 1, "loss": 1.3628, "lr": 1.9173676884698097e-05, "elapsed_s": 593.7}
{"step": 630, "epoch": 1, "loss": 1.3413, "lr": 1.9134415722908673e-05, "elapsed_s": 603.1}
{"step": 640, "epoch": 1, "loss": 1.3496, "lr": 1.909429040474207e-05, "elapsed_s": 612.4}
{"step": 650, "epoch": 1, "loss": 1.3215, "lr": 1.9053305192895297e-05, "elapsed_s": 621.8}
{"step": 660, "epoch": 1, "loss": 1.3538, "lr": 1.901146444141583e-05, "elapsed_s": 631.2}
{"step": 670, "epoch": 1, "loss": 1.3237, "lr": 1.8968772595239035e-05, "elapsed_s": 640.5}
{"step": 680, "epoch": 1, "loss": 1.2838, "lr": 1.8925234189716e-05, "elapsed_s": 649.9}
{"step": 690, "epoch": 1, "loss": 1.3398, "lr": 1.8880853850131694e-05, "elapsed_s": 659.3}
{"step": 700, "epoch": 1, "loss": 1.3126, "lr": 1.883563629121361e-05, "elapsed_s": 668.6}
{"step": 710, "epoch": 1, "loss": 1.368, "lr": 1.8789586316630903e-05, "elapsed_s": 678.0}
{"step": 720, "epoch": 1, "loss": 1.2845, "lr": 1.874270881848407e-05, "elapsed_s": 687.4}
{"step": 730, "epoch": 1, "loss": 1.3338, "lr": 1.8695008776785244e-05, "elapsed_s": 696.7}
{"step": 740, "epoch": 1, "loss": 1.3256, "lr": 1.8646491258929136e-05, "elapsed_s": 706.1}
{"step": 750, "epoch": 1, "loss": 1.379, "lr": 1.8597161419154707e-05, "elapsed_s": 715.4}
{"step": 760, "epoch": 1, "loss": 1.3103, "lr": 1.8547024497997615e-05, "elapsed_s": 724.8}
{"step": 770, "epoch": 1, "loss": 1.3564, "lr": 1.8496085821733482e-05, "elapsed_s": 734.2}
{"step": 780, "epoch": 1, "loss": 1.3612, "lr": 1.844435080181205e-05, "elapsed_s": 743.5}
{"step": 790, "epoch": 1, "loss": 1.3408, "lr": 1.839182493428233e-05, "elapsed_s": 752.9}
{"step": 800, "epoch": 1, "loss": 1.3056, "lr": 1.8338513799208684e-05, "elapsed_s": 762.2}
{"step": 810, "epoch": 1, "loss": 1.3447, "lr": 1.8284423060078082e-05, "elapsed_s": 771.6}
{"step": 820, "epoch": 1, "loss": 1.3323, "lr": 1.8229558463198396e-05, "elapsed_s": 781.0}
{"step": 830, "epoch": 1, "loss": 1.3024, "lr": 1.8173925837087975e-05, "elapsed_s": 790.3}
{"step": 840, "epoch": 1, "loss": 1.3355, "lr": 1.8117531091856436e-05, "elapsed_s": 799.7}
{"step": 850, "epoch": 1, "loss": 1.3239, "lr": 1.8060380218576828e-05, "elapsed_s": 809.0}
{"step": 860, "epoch": 1, "loss": 1.3156, "lr": 1.8002479288649142e-05, "elapsed_s": 818.4}
{"step": 870, "epoch": 1, "loss": 1.3339, "lr": 1.794383445315534e-05, "elapsed_s": 827.8}
{"step": 880, "epoch": 1, "loss": 1.3303, "lr": 1.7884451942205902e-05, "elapsed_s": 837.1}
{"step": 890, "epoch": 1, "loss": 1.2924, "lr": 1.782433806427795e-05, "elapsed_s": 846.5}
{"step": 900, "epoch": 1, "loss": 1.3421, "lr": 1.7763499205545092e-05, "elapsed_s": 855.9}
{"step": 910, "epoch": 1, "loss": 1.2697, "lr": 1.7701941829198966e-05, "elapsed_s": 865.2}
{"step": 920, "epoch": 1, "loss": 1.3115, "lr": 1.7639672474762658e-05, "elapsed_s": 874.6}
{"step": 930, "epoch": 1, "loss": 1.2875, "lr": 1.7576697757395946e-05, "elapsed_s": 884.0}
{"step": 940, "epoch": 1, "loss": 1.2877, "lr": 1.7513024367192556e-05, "elapsed_s": 893.3}
{"step": 950, "epoch": 1, "loss": 1.3348, "lr": 1.7448659068469446e-05, "elapsed_s": 902.7}
{"step": 960, "epoch": 1, "loss": 1.3133, "lr": 1.7383608699048193e-05, "elapsed_s": 912.1}
{"step": 970, "epoch": 1, "loss": 1.3313, "lr": 1.731788016952859e-05, "elapsed_s": 921.4}
{"step": 980, "epoch": 1, "loss": 1.3189, "lr": 1.725148046255449e-05, "elapsed_s": 930.8}
{"step": 990, "epoch": 1, "loss": 1.3097, "lr": 1.7184416632072002e-05, "elapsed_s": 940.2}
{"step": 1000, "epoch": 1, "loss": 1.2952, "lr": 1.7116695802580155e-05, "elapsed_s": 949.5}
{"step": 1010, "epoch": 1, "loss": 1.3551, "lr": 1.7048325168373977e-05, "elapsed_s": 974.2}
{"step": 1020, "epoch": 1, "loss": 1.3245, "lr": 1.697931199278025e-05, "elapsed_s": 983.6}
{"step": 1030, "epoch": 1, "loss": 1.302, "lr": 1.690966360738588e-05, "elapsed_s": 993.0}
{"step": 1040, "epoch": 1, "loss": 1.2576, "lr": 1.6839387411259027e-05, "elapsed_s": 1002.3}
{"step": 1050, "epoch": 1, "loss": 1.3409, "lr": 1.676849087016308e-05, "elapsed_s": 1011.7}
{"step": 1060, "epoch": 1, "loss": 1.3087, "lr": 1.669698151576352e-05, "elapsed_s": 1021.0}
{"step": 1070, "epoch": 1, "loss": 1.3103, "lr": 1.662486694482779e-05, "elapsed_s": 1030.4}
{"step": 1080, "epoch": 1, "loss": 1.3006, "lr": 1.65521548184183e-05, "elapsed_s": 1039.7}
{"step": 1090, "epoch": 1, "loss": 1.3006, "lr": 1.6478852861078486e-05, "elapsed_s": 1049.1}
{"step": 1100, "epoch": 1, "loss": 1.3208, "lr": 1.6404968860012266e-05, "elapsed_s": 1058.5}
{"step": 1110, "epoch": 1, "loss": 1.3296, "lr": 1.633051066425673e-05, "elapsed_s": 1067.9}
{"step": 1120, "epoch": 1, "loss": 1.325, "lr": 1.6255486183848293e-05, "elapsed_s": 1077.2}
{"step": 1130, "epoch": 1, "loss": 1.2892, "lr": 1.6179903388982417e-05, "elapsed_s": 1086.6}
{"step": 1140, "epoch": 1, "loss": 1.3014, "lr": 1.6103770309166864e-05, "elapsed_s": 1095.9}
{"step": 1150, "epoch": 1, "loss": 1.2787, "lr": 1.602709503236869e-05, "elapsed_s": 1105.3}
{"step": 1160, "epoch": 1, "loss": 1.3036, "lr": 1.5949885704155044e-05, "elapsed_s": 1114.7}
{"step": 1170, "epoch": 1, "loss": 1.2744, "lr": 1.587215052682779e-05, "elapsed_s": 1124.0}
{"step": 1180, "epoch": 1, "loss": 1.2592, "lr": 1.5793897758552187e-05, "elapsed_s": 1133.3}
{"step": 1190, "epoch": 1, "loss": 1.2926, "lr": 1.571513571247954e-05, "elapsed_s": 1142.7}
{"step": 1200, "epoch": 1, "loss": 1.2902, "lr": 1.5635872755864088e-05, "elapsed_s": 1152.1}
{"step": 1210, "epoch": 1, "loss": 1.3357, "lr": 1.5556117309174085e-05, "elapsed_s": 1161.4}
{"step": 1220, "epoch": 1, "loss": 1.3249, "lr": 1.5475877845197284e-05, "elapsed_s": 1170.8}
{"step": 1230, "epoch": 1, "loss": 1.3064, "lr": 1.5395162888140815e-05, "elapsed_s": 1180.2}
{"step": 1240, "epoch": 1, "loss": 1.2959, "lr": 1.531398101272562e-05, "elapsed_s": 1189.5}
{"step": 1250, "epoch": 1, "loss": 1.2756, "lr": 1.523234084327553e-05, "elapsed_s": 1198.9}
{"step": 1260, "epoch": 1, "loss": 1.3085, "lr": 1.5150251052801055e-05, "elapsed_s": 1208.3}
{"step": 1270, "epoch": 1, "loss": 1.2786, "lr": 1.5067720362078014e-05, "elapsed_s": 1217.6}
{"step": 1280, "epoch": 1, "loss": 1.2962, "lr": 1.498475753872109e-05, "elapsed_s": 1227.0}
{"step": 1290, "epoch": 1, "loss": 1.3318, "lr": 1.4901371396252392e-05, "elapsed_s": 1236.4}
{"step": 1300, "epoch": 1, "loss": 1.3128, "lr": 1.4817570793165175e-05, "elapsed_s": 1245.7}
{"step": 1310, "epoch": 1, "loss": 1.2791, "lr": 1.473336463198275e-05, "elapsed_s": 1255.1}
{"step": 1320, "epoch": 1, "loss": 1.2507, "lr": 1.4648761858312718e-05, "elapsed_s": 1264.5}
{"step": 1330, "epoch": 1, "loss": 1.3079, "lr": 1.456377145989666e-05, "elapsed_s": 1273.8}
{"step": 1340, "epoch": 1, "loss": 1.2738, "lr": 1.4478402465655313e-05, "elapsed_s": 1283.2}
{"step": 1350, "epoch": 1, "loss": 1.2544, "lr": 1.4392663944729386e-05, "elapsed_s": 1292.5}
{"step": 1360, "epoch": 1, "loss": 1.298, "lr": 1.4306565005516104e-05, "elapsed_s": 1301.9}
{"step": 1370, "epoch": 1, "loss": 1.272, "lr": 1.4220114794701593e-05, "elapsed_s": 1311.2}
{"step": 1380, "epoch": 1, "loss": 1.2523, "lr": 1.4133322496289168e-05, "elapsed_s": 1320.6}
{"step": 1390, "epoch": 1, "loss": 1.258, "lr": 1.4046197330623684e-05, "elapsed_s": 1330.0}
{"step": 1400, "epoch": 1, "loss": 1.2995, "lr": 1.3958748553412014e-05, "elapsed_s": 1339.3}
{"step": 1410, "epoch": 1, "loss": 1.312, "lr": 1.3870985454739776e-05, "elapsed_s": 1348.7}
{"step": 1420, "epoch": 1, "loss": 1.2912, "lr": 1.37829173580844e-05, "elapsed_s": 1358.1}
{"step": 1430, "epoch": 1, "loss": 1.2625, "lr": 1.369455361932465e-05, "elapsed_s": 1367.4}
{"step": 1440, "epoch": 1, "loss": 1.2606, "lr": 1.3605903625746721e-05, "elapsed_s": 1376.8}
{"step": 1450, "epoch": 1, "loss": 1.2693, "lr": 1.3516976795046961e-05, "elapsed_s": 1386.1}
{"step": 1460, "epoch": 1, "loss": 1.2725, "lr": 1.3427782574331403e-05, "elapsed_s": 1395.5}
{"step": 1470, "epoch": 1, "loss": 1.2454, "lr": 1.3338330439112152e-05, "elapsed_s": 1404.9}
{"step": 1480, "epoch": 1, "loss": 1.2741, "lr": 1.3248629892300753e-05, "elapsed_s": 1414.2}
{"step": 1490, "epoch": 1, "loss": 1.2666, "lr": 1.3158690463198665e-05, "elapsed_s": 1423.6}
{"step": 1500, "epoch": 1, "loss": 1.3066, "lr": 1.3068521706484893e-05, "elapsed_s": 1433.0}
{"step": 1510, "epoch": 1, "loss": 1.2859, "lr": 1.2978133201200992e-05, "elapsed_s": 1454.2}
{"step": 1520, "epoch": 1, "loss": 1.3685, "lr": 1.2887534549733395e-05, "elapsed_s": 1463.5}
{"step": 1530, "epoch": 1, "loss": 1.2583, "lr": 1.279673537679335e-05, "elapsed_s": 1472.9}
{"step": 1540, "epoch": 1, "loss": 1.3214, "lr": 1.2705745328394408e-05, "elapsed_s": 1482.2}
{"step": 1550, "epoch": 1, "loss": 1.2376, "lr": 1.2614574070827704e-05, "elapsed_s": 1491.6}
{"step": 1560, "epoch": 1, "loss": 1.2253, "lr": 1.252323128963506e-05, "elapsed_s": 1501.0}
{"step": 1570, "epoch": 1, "loss": 1.2817, "lr": 1.2431726688580025e-05, "elapsed_s": 1510.3}
{"step": 1580, "epoch": 1, "loss": 1.2539, "lr": 1.234006998861704e-05, "elapsed_s": 1519.7}
{"step": 1590, "epoch": 1, "loss": 1.2711, "lr": 1.224827092685869e-05, "elapsed_s": 1529.1}
{"step": 1600, "epoch": 1, "loss": 1.3262, "lr": 1.2156339255541325e-05, "elapsed_s": 1538.4}
{"step": 1610, "epoch": 1, "loss": 1.2492, "lr": 1.2064284740989003e-05, "elapsed_s": 1547.8}
{"step": 1620, "epoch": 1, "loss": 1.2889, "lr": 1.1972117162575997e-05, "elapsed_s": 1557.2}
{"step": 1630, "epoch": 2, "loss": 1.2466, "lr": 1.1879846311687867e-05, "elapsed_s": 1568.2}
{"step": 1640, "epoch": 2, "loss": 1.2516, "lr": 1.1787481990681277e-05, "elapsed_s": 1577.5}
{"step": 1650, "epoch": 2, "loss": 1.2368, "lr": 1.1695034011842666e-05, "elapsed_s": 1586.9}
{"step": 1660, "epoch": 2, "loss": 1.2125, "lr": 1.1602512196345819e-05, "elapsed_s": 1596.2}
{"step": 1670, "epoch": 2, "loss": 1.2006, "lr": 1.150992637320853e-05, "elapsed_s": 1605.6}
{"step": 1680, "epoch": 2, "loss": 1.2363, "lr": 1.1417286378248416e-05, "elapsed_s": 1615.0}
{"step": 1690, "epoch": 2, "loss": 1.2311, "lr": 1.1324602053038026e-05, "elapsed_s": 1624.4}
{"step": 1700, "epoch": 2, "loss": 1.2346, "lr": 1.1231883243859305e-05, "elapsed_s": 1633.7}
{"step": 1710, "epoch": 2, "loss": 1.2462, "lr": 1.113913980065759e-05, "elapsed_s": 1643.1}
{"step": 1720, "epoch": 2, "loss": 1.2654, "lr": 1.104638157599521e-05, "elapsed_s": 1652.4}
{"step": 1730, "epoch": 2, "loss": 1.2545, "lr": 1.0953618424004792e-05, "elapsed_s": 1661.8}
{"step": 1740, "epoch": 2, "loss": 1.2452, "lr": 1.0860860199342411e-05, "elapsed_s": 1671.2}
{"step": 1750, "epoch": 2, "loss": 1.2416, "lr": 1.0768116756140696e-05, "elapsed_s": 1680.5}
{"step": 1760, "epoch": 2, "loss": 1.2029, "lr": 1.0675397946961972e-05, "elapsed_s": 1689.9}
{"step": 1770, "epoch": 2, "loss": 1.2086, "lr": 1.0582713621751584e-05, "elapsed_s": 1699.2}
{"step": 1780, "epoch": 2, "loss": 1.2165, "lr": 1.049007362679147e-05, "elapsed_s": 1708.6}
{"step": 1790, "epoch": 2, "loss": 1.1799, "lr": 1.039748780365418e-05, "elapsed_s": 1718.0}
{"step": 1800, "epoch": 2, "loss": 1.2598, "lr": 1.0304965988157335e-05, "elapsed_s": 1727.3}
{"step": 1810, "epoch": 2, "loss": 1.2294, "lr": 1.0212518009318725e-05, "elapsed_s": 1736.7}
{"step": 1820, "epoch": 2, "loss": 1.2215, "lr": 1.0120153688312134e-05, "elapsed_s": 1746.1}
{"step": 1830, "epoch": 2, "loss": 1.2428, "lr": 1.0027882837424002e-05, "elapsed_s": 1755.4}
{"step": 1840, "epoch": 2, "loss": 1.1979, "lr": 9.935715259010998e-06, "elapsed_s": 1764.8}
{"step": 1850, "epoch": 2, "loss": 1.2127, "lr": 9.843660744458676e-06, "elapsed_s": 1774.2}
{"step": 1860, "epoch": 2, "loss": 1.2166, "lr": 9.751729073141308e-06, "elapsed_s": 1783.6}
{"step": 1870, "epoch": 2, "loss": 1.2319, "lr": 9.659930011382963e-06, "elapsed_s": 1793.0}
{"step": 1880, "epoch": 2, "loss": 1.1878, "lr": 9.568273311419975e-06, "elapsed_s": 1802.3}
{"step": 1890, "epoch": 2, "loss": 1.2353, "lr": 9.476768710364943e-06, "elapsed_s": 1811.7}
{"step": 1900, "epoch": 2, "loss": 1.1796, "lr": 9.385425929172294e-06, "elapsed_s": 1821.1}
{"step": 1910, "epoch": 2, "loss": 1.1965, "lr": 9.294254671605594e-06, "elapsed_s": 1830.4}
{"step": 1920, "epoch": 2, "loss": 1.2011, "lr": 9.20326462320665e-06, "elapsed_s": 1839.8}
{"step": 1930, "epoch": 2, "loss": 1.207, "lr": 9.112465450266603e-06, "elapsed_s": 1849.2}
{"step": 1940, "epoch": 2, "loss": 1.2013, "lr": 9.021866798799013e-06, "elapsed_s": 1858.6}
{"step": 1950, "epoch": 2, "loss": 1.2454, "lr": 8.931478293515108e-06, "elapsed_s": 1867.9}
{"step": 1960, "epoch": 2, "loss": 1.2415, "lr": 8.841309536801337e-06, "elapsed_s": 1877.3}
{"step": 1970, "epoch": 2, "loss": 1.2149, "lr": 8.751370107699245e-06, "elapsed_s": 1886.7}
{"step": 1980, "epoch": 2, "loss": 1.2339, "lr": 8.66166956088785e-06, "elapsed_s": 1896.1}
{"step": 1990, "epoch": 2, "loss": 1.2489, "lr": 8.572217425668599e-06, "elapsed_s": 1905.4}
{"step": 2000, "epoch": 2, "loss": 1.2701, "lr": 8.48302320495304e-06, "elapsed_s": 1914.8}
{"step": 2010, "epoch": 2, "loss": 1.2353, "lr": 8.394096374253282e-06, "elapsed_s": 1935.5}
{"step": 2020, "epoch": 2, "loss": 1.2168, "lr": 8.30544638067535e-06, "elapsed_s": 1944.9}
{"step": 2030, "epoch": 2, "loss": 1.2411, "lr": 8.217082641915602e-06, "elapsed_s": 1954.3}
{"step": 2040, "epoch": 2, "loss": 1.2011, "lr": 8.129014545260226e-06, "elapsed_s": 1963.6}
{"step": 2050, "epoch": 2, "loss": 1.2018, "lr": 8.041251446587989e-06, "elapsed_s": 1973.0}
{"step": 2060, "epoch": 2, "loss": 1.2175, "lr": 7.953802669376318e-06, "elapsed_s": 1982.3}
{"step": 2070, "epoch": 2, "loss": 1.2739, "lr": 7.866677503710832e-06, "elapsed_s": 1991.7}
{"step": 2080, "epoch": 2, "loss": 1.2066, "lr": 7.779885205298407e-06, "elapsed_s": 2001.1}
{"step": 2090, "epoch": 2, "loss": 1.2177, "lr": 7.693434994483897e-06, "elapsed_s": 2010.5}
{"step": 2100, "epoch": 2, "loss": 1.2246, "lr": 7.607336055270615e-06, "elapsed_s": 2019.8}
{"step": 2110, "epoch": 2, "loss": 1.1861, "lr": 7.521597534344686e-06, "elapsed_s": 2029.2}
{"step": 2120, "epoch": 2, "loss": 1.2146, "lr": 7.436228540103342e-06, "elapsed_s": 2038.6}
{"step": 2130, "epoch": 2, "loss": 1.2034, "lr": 7.351238141687283e-06, "elapsed_s": 2048.0}
{"step": 2140, "epoch": 2, "loss": 1.2174, "lr": 7.266635368017252e-06, "elapsed_s": 2057.3}
{"step": 2150, "epoch": 2, "loss": 1.1906, "lr": 7.182429206834824e-06, "elapsed_s": 2066.7}
{"step": 2160, "epoch": 2, "loss": 1.2253, "lr": 7.0986286037476105e-06, "elapsed_s": 2076.0}
{"step": 2170, "epoch": 2, "loss": 1.2498, "lr": 7.0152424612789135e-06, "elapsed_s": 2085.4}
{"step": 2180, "epoch": 2, "loss": 1.2238, "lr": 6.932279637921987e-06, "elapsed_s": 2094.8}
{"step": 2190, "epoch": 2, "loss": 1.1924, "lr": 6.8497489471989465e-06, "elapsed_s": 2104.1}
{"step": 2200, "epoch": 2, "loss": 1.2275, "lr": 6.767659156724471e-06, "elapsed_s": 2113.5}
{"step": 2210, "epoch": 2, "loss": 1.2431, "lr": 6.686018987274381e-06, "elapsed_s": 2122.9}
{"step": 2220, "epoch": 2, "loss": 1.2693, "lr": 6.604837111859187e-06, "elapsed_s": 2132.3}
{"step": 2230, "epoch": 2, "loss": 1.2064, "lr": 6.524122154802721e-06, "elapsed_s": 2141.6}
{"step": 2240, "epoch": 2, "loss": 1.2139, "lr": 6.44388269082592e-06, "elapsed_s": 2151.0}
{"step": 2250, "epoch": 2, "loss": 1.2408, "lr": 6.3641272441359165e-06, "elapsed_s": 2160.4}
{"step": 2260, "epoch": 2, "loss": 1.2177, "lr": 6.28486428752046e-06, "elapsed_s": 2169.7}
{"step": 2270, "epoch": 2, "loss": 1.2337, "lr": 6.206102241447814e-06, "elapsed_s": 2179.1}
{"step": 2280, "epoch": 2, "loss": 1.2438, "lr": 6.127849473172208e-06, "elapsed_s": 2188.5}
{"step": 2290, "epoch": 2, "loss": 1.2196, "lr": 6.050114295844959e-06, "elapsed_s": 2197.8}
{"step": 2300, "epoch": 2, "loss": 1.2225, "lr": 5.972904967631312e-06, "elapsed_s": 2207.2}
{"step": 2310, "epoch": 2, "loss": 1.1983, "lr": 5.8962296908331385e-06, "elapsed_s": 2216.6}
{"step": 2320, "epoch": 2, "loss": 1.19, "lr": 5.820096611017584e-06, "elapsed_s": 2226.0}
{"step": 2330, "epoch": 2, "loss": 1.2123, "lr": 5.744513816151708e-06, "elapsed_s": 2235.3}
{"step": 2340, "epoch": 2, "loss": 1.2204, "lr": 5.6694893357432744e-06, "elapsed_s": 2244.7}
{"step": 2350, "epoch": 2, "loss": 1.2133, "lr": 5.595031139987734e-06, "elapsed_s": 2254.1}
{"step": 2360, "epoch": 2, "loss": 1.2689, "lr": 5.5211471389215135e-06, "elapsed_s": 2263.5}
{"step": 2370, "epoch": 2, "loss": 1.2408, "lr": 5.447845181581706e-06, "elapsed_s": 2272.9}
{"step": 2380, "epoch": 2, "loss": 1.1791, "lr": 5.37513305517221e-06, "elapsed_s": 2282.2}
{"step": 2390, "epoch": 2, "loss": 1.2051, "lr": 5.303018484236485e-06, "elapsed_s": 2291.6}
{"step": 2400, "epoch": 2, "loss": 1.227, "lr": 5.23150912983692e-06, "elapsed_s": 2301.0}
{"step": 2410, "epoch": 2, "loss": 1.2673, "lr": 5.160612588740973e-06, "elapsed_s": 2310.4}
{"step": 2420, "epoch": 2, "loss": 1.2131, "lr": 5.090336392614121e-06, "elapsed_s": 2319.7}
{"step": 2430, "epoch": 2, "loss": 1.214, "lr": 5.020688007219751e-06, "elapsed_s": 2329.1}
{"step": 2440, "epoch": 2, "loss": 1.2311, "lr": 4.951674831626027e-06, "elapsed_s": 2338.5}
{"step": 2450, "epoch": 2, "loss": 1.2143, "lr": 4.883304197419848e-06, "elapsed_s": 2347.8}
{"step": 2460, "epoch": 2, "loss": 1.2233, "lr": 4.815583367927997e-06, "elapsed_s": 2357.2}
{"step": 2470, "epoch": 2, "loss": 1.2608, "lr": 4.748519537445514e-06, "elapsed_s": 2366.6}
{"step": 2480, "epoch": 2, "loss": 1.2123, "lr": 4.682119830471411e-06, "elapsed_s": 2375.9}
{"step": 2490, "epoch": 2, "loss": 1.2159, "lr": 4.616391300951807e-06, "elapsed_s": 2385.3}
{"step": 2500, "epoch": 2, "loss": 1.2447, "lr": 4.551340931530556e-06, "elapsed_s": 2394.7}
{"step": 2510, "epoch": 2, "loss": 1.1837, "lr": 4.486975632807449e-06, "elapsed_s": 2415.2}
{"step": 2520, "epoch": 2, "loss": 1.2267, "lr": 4.423302242604059e-06, "elapsed_s": 2424.6}
{"step": 2530, "epoch": 2, "loss": 1.1857, "lr": 4.360327525237345e-06, "elapsed_s": 2433.9}
{"step": 2540, "epoch": 2, "loss": 1.2175, "lr": 4.298058170801035e-06, "elapsed_s": 2443.3}
{"step": 2550, "epoch": 2, "loss": 1.215, "lr": 4.236500794454911e-06, "elapsed_s": 2452.7}
{"step": 2560, "epoch": 2, "loss": 1.2179, "lr": 4.17566193572205e-06, "elapsed_s": 2462.1}
{"step": 2570, "epoch": 2, "loss": 1.2297, "lr": 4.1155480577940984e-06, "elapsed_s": 2471.4}
{"step": 2580, "epoch": 2, "loss": 1.245, "lr": 4.056165546844662e-06, "elapsed_s": 2480.8}
{"step": 2590, "epoch": 2, "loss": 1.1813, "lr": 3.997520711350863e-06, "elapsed_s": 2490.2}
{"step": 2600, "epoch": 2, "loss": 1.1759, "lr": 3.939619781423175e-06, "elapsed_s": 2499.6}
{"step": 2610, "epoch": 2, "loss": 1.1998, "lr": 3.882468908143565e-06, "elapsed_s": 2508.9}
{"step": 2620, "epoch": 2, "loss": 1.1735, "lr": 3.826074162912028e-06, "elapsed_s": 2518.3}
{"step": 2630, "epoch": 2, "loss": 1.2292, "lr": 3.770441536801607e-06, "elapsed_s": 2527.7}
{"step": 2640, "epoch": 2, "loss": 1.178, "lr": 3.71557693992192e-06, "elapsed_s": 2537.1}
{"step": 2650, "epoch": 2, "loss": 1.2265, "lr": 3.6614862007913155e-06, "elapsed_s": 2546.4}
{"step": 2660, "epoch": 2, "loss": 1.2151, "lr": 3.608175065717676e-06, "elapsed_s": 2555.8}
{"step": 2670, "epoch": 2, "loss": 1.1929, "lr": 3.5556491981879526e-06, "elapsed_s": 2565.1}
{"step": 2680, "epoch": 2, "loss": 1.1812, "lr": 3.503914178266523e-06, "elapsed_s": 2574.5}
{"step": 2690, "epoch": 2, "loss": 1.2171, "lr": 3.452975502002387e-06, "elapsed_s": 2583.9}
{"step": 2700, "epoch": 2, "loss": 1.2061, "lr": 3.402838580845295e-06, "elapsed_s": 2593.3}
{"step": 2710, "epoch": 2, "loss": 1.2286, "lr": 3.353508741070866e-06, "elapsed_s": 2602.6}
{"step": 2720, "epoch": 2, "loss": 1.2086, "lr": 3.3049912232147573e-06, "elapsed_s": 2612.0}
{"step": 2730, "epoch": 2, "loss": 1.1707, "lr": 3.257291181515933e-06, "elapsed_s": 2621.4}
{"step": 2740, "epoch": 2, "loss": 1.1635, "lr": 3.210413683369101e-06, "elapsed_s": 2630.8}
{"step": 2750, "epoch": 2, "loss": 1.1937, "lr": 3.164363708786394e-06, "elapsed_s": 2640.1}
{"step": 2760, "epoch": 2, "loss": 1.2802, "lr": 3.119146149868308e-06, "elapsed_s": 2649.5}
{"step": 2770, "epoch": 2, "loss": 1.2147, "lr": 3.0747658102840005e-06, "elapsed_s": 2658.9}
{"step": 2780, "epoch": 2, "loss": 1.1958, "lr": 3.0312274047609644e-06, "elapsed_s": 2668.3}
{"step": 2790, "epoch": 2, "loss": 1.1907, "lr": 2.9885355585841722e-06, "elapsed_s": 2677.6}
{"step": 2800, "epoch": 2, "loss": 1.197, "lr": 2.9466948071047043e-06, "elapsed_s": 2687.0}
{"step": 2810, "epoch": 2, "loss": 1.1978, "lr": 2.9057095952579336e-06, "elapsed_s": 2696.4}
{"step": 2820, "epoch": 2, "loss": 1.2169, "lr": 2.8655842770913302e-06, "elapsed_s": 2705.8}
{"step": 2830, "epoch": 2, "loss": 1.2068, "lr": 2.826323115301905e-06, "elapsed_s": 2715.1}
{"step": 2840, "epoch": 2, "loss": 1.2295, "lr": 2.7879302807833625e-06, "elapsed_s": 2724.5}
{"step": 2850, "epoch": 2, "loss": 1.2024, "lr": 2.7504098521830113e-06, "elapsed_s": 2733.9}
{"step": 2860, "epoch": 2, "loss": 1.1798, "lr": 2.713765815468467e-06, "elapsed_s": 2743.3}
{"step": 2870, "epoch": 2, "loss": 1.2136, "lr": 2.67800206350421e-06, "elapsed_s": 2752.6}
{"step": 2880, "epoch": 2, "loss": 1.2771, "lr": 2.6431223956380163e-06, "elapsed_s": 2762.0}
{"step": 2890, "epoch": 2, "loss": 1.2585, "lr": 2.6091305172973524e-06, "elapsed_s": 2771.4}
{"step": 2900, "epoch": 2, "loss": 1.2018, "lr": 2.5760300395957185e-06, "elapsed_s": 2780.7}
{"step": 2910, "epoch": 2, "loss": 1.2763, "lr": 2.543824478949031e-06, "elapsed_s": 2790.1}
{"step": 2920, "epoch": 2, "loss": 1.1883, "lr": 2.5125172567020476e-06, "elapsed_s": 2799.5}
{"step": 2930, "epoch": 2, "loss": 1.1856, "lr": 2.4821116987649116e-06, "elapsed_s": 2808.9}
{"step": 2940, "epoch": 2, "loss": 1.2025, "lr": 2.4526110352598214e-06, "elapsed_s": 2818.2}
{"step": 2950, "epoch": 2, "loss": 1.1775, "lr": 2.424018400177877e-06, "elapsed_s": 2827.6}
{"step": 2960, "epoch": 2, "loss": 1.2086, "lr": 2.3963368310461503e-06, "elapsed_s": 2837.0}
{"step": 2970, "epoch": 2, "loss": 1.2529, "lr": 2.3695692686049823e-06, "elapsed_s": 2846.3}
{"step": 2980, "epoch": 2, "loss": 1.2575, "lr": 2.3437185564955893e-06, "elapsed_s": 2855.7}
{"step": 2990, "epoch": 2, "loss": 1.26, "lr": 2.3187874409579548e-06, "elapsed_s": 2865.1}
{"step": 3000, "epoch": 2, "loss": 1.2286, "lr": 2.294778570539094e-06, "elapsed_s": 2874.4}
{"step": 3010, "epoch": 2, "loss": 1.2415, "lr": 2.2716944958116844e-06, "elapsed_s": 2895.6}
{"step": 3020, "epoch": 2, "loss": 1.2682, "lr": 2.2495376691031034e-06, "elapsed_s": 2905.0}
{"step": 3030, "epoch": 2, "loss": 1.2742, "lr": 2.2283104442349107e-06, "elapsed_s": 2914.4}
{"step": 3040, "epoch": 2, "loss": 1.2586, "lr": 2.208015076272787e-06, "elapsed_s": 2923.7}
{"step": 3050, "epoch": 2, "loss": 1.2062, "lr": 2.1886537212869744e-06, "elapsed_s": 2933.1}
{"step": 3060, "epoch": 2, "loss": 1.258, "lr": 2.170228436123217e-06, "elapsed_s": 2942.5}
{"step": 3070, "epoch": 2, "loss": 1.189, "lr": 2.1527411781842617e-06, "elapsed_s": 2951.9}
{"step": 3080, "epoch": 2, "loss": 1.2488, "lr": 2.1361938052219115e-06, "elapsed_s": 2961.2}
{"step": 3090, "epoch": 2, "loss": 1.2498, "lr": 2.1205880751396636e-06, "elapsed_s": 2970.6}
{"step": 3100, "epoch": 2, "loss": 1.1549, "lr": 2.105925645805969e-06, "elapsed_s": 2979.9}
{"step": 3110, "epoch": 2, "loss": 1.2015, "lr": 2.0922080748780995e-06, "elapsed_s": 2989.3}
{"step": 3120, "epoch": 2, "loss": 1.2792, "lr": 2.079436819636678e-06, "elapsed_s": 2998.7}
{"step": 3130, "epoch": 2, "loss": 1.2746, "lr": 2.0676132368308576e-06, "elapsed_s": 3008.0}
{"step": 3140, "epoch": 2, "loss": 1.2482, "lr": 2.056738582534195e-06, "elapsed_s": 3017.4}
{"step": 3150, "epoch": 2, "loss": 1.2272, "lr": 2.04681401201121e-06, "elapsed_s": 3026.8}
{"step": 3160, "epoch": 2, "loss": 1.2122, "lr": 2.037840579594651e-06, "elapsed_s": 3036.1}
{"step": 3170, "epoch": 2, "loss": 1.175, "lr": 2.0298192385734965e-06, "elapsed_s": 3045.5}
{"step": 3180, "epoch": 2, "loss": 1.1944, "lr": 2.0227508410916793e-06, "elapsed_s": 3054.8}
{"step": 3190, "epoch": 2, "loss": 1.1978, "lr": 2.016636138057557e-06, "elapsed_s": 3064.2}
{"step": 3200, "epoch": 2, "loss": 1.2078, "lr": 2.011475779064144e-06, "elapsed_s": 3073.6}
{"step": 3210, "epoch": 2, "loss": 1.2308, "lr": 2.0072703123200985e-06, "elapsed_s": 3082.9}
{"step": 3220, "epoch": 2, "loss": 1.2213, "lr": 2.0040201845914854e-06, "elapsed_s": 3092.3}
{"step": 3230, "epoch": 2, "loss": 1.1818, "lr": 2.001725741154316e-06, "elapsed_s": 3101.7}
{"step": 3240, "epoch": 2, "loss": 1.2036, "lr": 2.0003872257578625e-06, "elapsed_s": 3111.1}