初始化项目,由ModelHub XC社区提供模型

Model: IonGrozea/whisper-medium_ro-80mel
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-12 13:17:24 +08:00
commit 43dc7f97c0
30 changed files with 663840 additions and 0 deletions

530
training_log.jsonl Normal file
View File

@@ -0,0 +1,530 @@
{"epoch": 0.015445431772218495, "grad_norm": 669.9632568359375, "learning_rate": 9.800000000000001e-07, "loss": 3.2493, "step": 50}
{"epoch": 0.03089086354443699, "grad_norm": 23.059450149536133, "learning_rate": 1.98e-06, "loss": 1.7909, "step": 100}
{"epoch": 0.046336295316655486, "grad_norm": 20.541318893432617, "learning_rate": 2.9800000000000003e-06, "loss": 1.0734, "step": 150}
{"epoch": 0.06178172708887398, "grad_norm": 19.459171295166016, "learning_rate": 3.980000000000001e-06, "loss": 0.5546, "step": 200}
{"epoch": 0.07722715886109248, "grad_norm": 12.552342414855957, "learning_rate": 4.980000000000001e-06, "loss": 0.4515, "step": 250}
{"epoch": 0.09267259063331097, "grad_norm": 11.14599609375, "learning_rate": 5.98e-06, "loss": 0.4284, "step": 300}
{"epoch": 0.10811802240552947, "grad_norm": 9.141664505004883, "learning_rate": 6.98e-06, "loss": 0.4006, "step": 350}
{"epoch": 0.12356345417774796, "grad_norm": 8.795552253723145, "learning_rate": 7.980000000000002e-06, "loss": 0.3781, "step": 400}
{"epoch": 0.13900888594996647, "grad_norm": 11.979874610900879, "learning_rate": 8.98e-06, "loss": 0.372, "step": 450}
{"epoch": 0.15445431772218496, "grad_norm": 8.280326843261719, "learning_rate": 9.980000000000001e-06, "loss": 0.3506, "step": 500}
{"epoch": 0.16989974949440345, "grad_norm": 8.773298263549805, "learning_rate": 9.999908203446256e-06, "loss": 0.3346, "step": 550}
{"epoch": 0.18534518126662194, "grad_norm": 9.848203659057617, "learning_rate": 9.999625285490022e-06, "loss": 0.311, "step": 600}
{"epoch": 0.20079061303884044, "grad_norm": 8.453655242919922, "learning_rate": 9.999151218715245e-06, "loss": 0.3134, "step": 650}
{"epoch": 0.21623604481105893, "grad_norm": 8.594161033630371, "learning_rate": 9.998486021246747e-06, "loss": 0.298, "step": 700}
{"epoch": 0.23168147658327742, "grad_norm": 10.005301475524902, "learning_rate": 9.997629718516775e-06, "loss": 0.2955, "step": 750}
{"epoch": 0.24712690835549592, "grad_norm": 7.731537342071533, "learning_rate": 9.99658234326403e-06, "loss": 0.2887, "step": 800}
{"epoch": 0.2625723401277144, "grad_norm": 10.453512191772461, "learning_rate": 9.995343935532425e-06, "loss": 0.2781, "step": 850}
{"epoch": 0.27801777189993293, "grad_norm": 9.661601066589355, "learning_rate": 9.993914542669543e-06, "loss": 0.2601, "step": 900}
{"epoch": 0.2934632036721514, "grad_norm": 8.928487777709961, "learning_rate": 9.992294219324829e-06, "loss": 0.2689, "step": 950}
{"epoch": 0.3089086354443699, "grad_norm": 8.46379280090332, "learning_rate": 9.990483027447507e-06, "loss": 0.2456, "step": 1000}
{"epoch": 0.3243540672165884, "grad_norm": 7.934906005859375, "learning_rate": 9.9884810362842e-06, "loss": 0.2465, "step": 1050}
{"epoch": 0.3397994989888069, "grad_norm": 8.935380935668945, "learning_rate": 9.9862883223763e-06, "loss": 0.2506, "step": 1100}
{"epoch": 0.35524493076102537, "grad_norm": 7.77918004989624, "learning_rate": 9.983904969557017e-06, "loss": 0.2405, "step": 1150}
{"epoch": 0.3706903625332439, "grad_norm": 8.004393577575684, "learning_rate": 9.981331068948201e-06, "loss": 0.2389, "step": 1200}
{"epoch": 0.38613579430546235, "grad_norm": 6.807073593139648, "learning_rate": 9.978566718956834e-06, "loss": 0.2263, "step": 1250}
{"epoch": 0.4015812260776809, "grad_norm": 8.159706115722656, "learning_rate": 9.975612025271287e-06, "loss": 0.2292, "step": 1300}
{"epoch": 0.41702665784989934, "grad_norm": 9.060724258422852, "learning_rate": 9.972467100857266e-06, "loss": 0.22, "step": 1350}
{"epoch": 0.43247208962211786, "grad_norm": 7.030888080596924, "learning_rate": 9.969132065953499e-06, "loss": 0.2261, "step": 1400}
{"epoch": 0.4479175213943363, "grad_norm": 7.7540154457092285, "learning_rate": 9.965607048067138e-06, "loss": 0.2217, "step": 1450}
{"epoch": 0.46336295316655485, "grad_norm": 7.522276401519775, "learning_rate": 9.961892181968886e-06, "loss": 0.2139, "step": 1500}
{"epoch": 0.47880838493877337, "grad_norm": 8.068655967712402, "learning_rate": 9.95798760968784e-06, "loss": 0.2139, "step": 1550}
{"epoch": 0.49425381671099183, "grad_norm": 7.6135029792785645, "learning_rate": 9.95389348050606e-06, "loss": 0.2108, "step": 1600}
{"epoch": 0.5096992484832104, "grad_norm": 7.2439165115356445, "learning_rate": 9.949609950952872e-06, "loss": 0.2086, "step": 1650}
{"epoch": 0.5251446802554288, "grad_norm": 7.373188018798828, "learning_rate": 9.945137184798867e-06, "loss": 0.2088, "step": 1700}
{"epoch": 0.5405901120276473, "grad_norm": 7.067547798156738, "learning_rate": 9.94047535304966e-06, "loss": 0.1986, "step": 1750}
{"epoch": 0.5560355437998659, "grad_norm": 6.83748197555542, "learning_rate": 9.935624633939332e-06, "loss": 0.1976, "step": 1800}
{"epoch": 0.5714809755720843, "grad_norm": 6.874297142028809, "learning_rate": 9.930585212923625e-06, "loss": 0.2022, "step": 1850}
{"epoch": 0.5869264073443028, "grad_norm": 7.241828441619873, "learning_rate": 9.925357282672854e-06, "loss": 0.1932, "step": 1900}
{"epoch": 0.6023718391165213, "grad_norm": 7.975379467010498, "learning_rate": 9.919941043064538e-06, "loss": 0.2061, "step": 1950}
{"epoch": 0.6178172708887398, "grad_norm": 7.61032772064209, "learning_rate": 9.914336701175757e-06, "loss": 0.1905, "step": 2000}
{"epoch": 0.6332627026609583, "grad_norm": 8.128066062927246, "learning_rate": 9.908544471275232e-06, "loss": 0.1891, "step": 2050}
{"epoch": 0.6487081344331768, "grad_norm": 7.489062786102295, "learning_rate": 9.90256457481514e-06, "loss": 0.195, "step": 2100}
{"epoch": 0.6641535662053952, "grad_norm": 6.639970302581787, "learning_rate": 9.896397240422649e-06, "loss": 0.1736, "step": 2150}
{"epoch": 0.6795989979776138, "grad_norm": 8.832330703735352, "learning_rate": 9.890042703891159e-06, "loss": 0.1885, "step": 2200}
{"epoch": 0.6950444297498323, "grad_norm": 6.815544128417969, "learning_rate": 9.88350120817131e-06, "loss": 0.176, "step": 2250}
{"epoch": 0.7104898615220507, "grad_norm": 5.902234077453613, "learning_rate": 9.876773003361685e-06, "loss": 0.1797, "step": 2300}
{"epoch": 0.7259352932942693, "grad_norm": 6.814188003540039, "learning_rate": 9.869858346699239e-06, "loss": 0.1764, "step": 2350}
{"epoch": 0.7413807250664878, "grad_norm": 5.749416351318359, "learning_rate": 9.862757502549475e-06, "loss": 0.173, "step": 2400}
{"epoch": 0.7568261568387062, "grad_norm": 6.667733192443848, "learning_rate": 9.85547074239633e-06, "loss": 0.168, "step": 2450}
{"epoch": 0.7722715886109247, "grad_norm": 7.026228904724121, "learning_rate": 9.847998344831804e-06, "loss": 0.1665, "step": 2500}
{"epoch": 0.7722715886109247, "eval_cer": 0.9148, "eval_loss": 0.08388473838567734, "eval_runtime": 31716.069, "eval_samples_per_second": 0.857, "eval_steps_per_second": 0.857, "eval_wer": 0.7855, "step": 2500}
{"epoch": 0.7877170203831433, "grad_norm": 6.534465789794922, "learning_rate": 9.840340595545302e-06, "loss": 0.1625, "step": 2550}
{"epoch": 0.8031624521553618, "grad_norm": 6.152275562286377, "learning_rate": 9.832497787312708e-06, "loss": 0.1684, "step": 2600}
{"epoch": 0.8186078839275802, "grad_norm": 6.8435282707214355, "learning_rate": 9.8244702199852e-06, "loss": 0.178, "step": 2650}
{"epoch": 0.8340533156997987, "grad_norm": 7.73077392578125, "learning_rate": 9.816258200477784e-06, "loss": 0.1754, "step": 2700}
{"epoch": 0.8494987474720173, "grad_norm": 5.495504856109619, "learning_rate": 9.807862042757553e-06, "loss": 0.1717, "step": 2750}
{"epoch": 0.8649441792442357, "grad_norm": 7.388563632965088, "learning_rate": 9.79928206783169e-06, "loss": 0.1628, "step": 2800}
{"epoch": 0.8803896110164542, "grad_norm": 7.5919365882873535, "learning_rate": 9.790518603735191e-06, "loss": 0.1638, "step": 2850}
{"epoch": 0.8958350427886727, "grad_norm": 7.306176662445068, "learning_rate": 9.78157198551833e-06, "loss": 0.1553, "step": 2900}
{"epoch": 0.9112804745608912, "grad_norm": 6.015721797943115, "learning_rate": 9.772442555233842e-06, "loss": 0.164, "step": 2950}
{"epoch": 0.9267259063331097, "grad_norm": 5.933145999908447, "learning_rate": 9.763130661923846e-06, "loss": 0.1669, "step": 3000}
{"epoch": 0.9421713381053282, "grad_norm": 5.938354015350342, "learning_rate": 9.753636661606504e-06, "loss": 0.1528, "step": 3050}
{"epoch": 0.9576167698775467, "grad_norm": 7.161938667297363, "learning_rate": 9.74396091726241e-06, "loss": 0.1599, "step": 3100}
{"epoch": 0.9730622016497652, "grad_norm": 5.638302803039551, "learning_rate": 9.734103798820705e-06, "loss": 0.1588, "step": 3150}
{"epoch": 0.9885076334219837, "grad_norm": 6.934235572814941, "learning_rate": 9.724065683144942e-06, "loss": 0.1501, "step": 3200}
{"epoch": 1.0037069036253325, "grad_norm": 2.2055320739746094, "learning_rate": 9.713846954018673e-06, "loss": 0.1627, "step": 3250}
{"epoch": 1.019152335397551, "grad_norm": 1.8734009265899658, "learning_rate": 9.703448002130778e-06, "loss": 0.1327, "step": 3300}
{"epoch": 1.0345977671697695, "grad_norm": 2.080509901046753, "learning_rate": 9.692869225060527e-06, "loss": 0.129, "step": 3350}
{"epoch": 1.0500431989419878, "grad_norm": 1.7221331596374512, "learning_rate": 9.682111027262374e-06, "loss": 0.1333, "step": 3400}
{"epoch": 1.0654886307142064, "grad_norm": 1.4521435499191284, "learning_rate": 9.671173820050509e-06, "loss": 0.1289, "step": 3450}
{"epoch": 1.080934062486425, "grad_norm": 2.1676228046417236, "learning_rate": 9.660058021583117e-06, "loss": 0.1323, "step": 3500}
{"epoch": 1.0963794942586433, "grad_norm": 2.0535106658935547, "learning_rate": 9.648764056846393e-06, "loss": 0.1258, "step": 3550}
{"epoch": 1.111824926030862, "grad_norm": 1.9602024555206299, "learning_rate": 9.637292357638303e-06, "loss": 0.1216, "step": 3600}
{"epoch": 1.1272703578030805, "grad_norm": 1.7838118076324463, "learning_rate": 9.625643362552066e-06, "loss": 0.1268, "step": 3650}
{"epoch": 1.1427157895752988, "grad_norm": 1.9576209783554077, "learning_rate": 9.613817516959389e-06, "loss": 0.127, "step": 3700}
{"epoch": 1.1581612213475174, "grad_norm": 2.0949792861938477, "learning_rate": 9.60181527299344e-06, "loss": 0.1256, "step": 3750}
{"epoch": 1.173606653119736, "grad_norm": 2.0513432025909424, "learning_rate": 9.589637089531555e-06, "loss": 0.1286, "step": 3800}
{"epoch": 1.1890520848919544, "grad_norm": 2.063535690307617, "learning_rate": 9.577283432177712e-06, "loss": 0.1264, "step": 3850}
{"epoch": 1.204497516664173, "grad_norm": 2.2408931255340576, "learning_rate": 9.564754773244702e-06, "loss": 0.121, "step": 3900}
{"epoch": 1.2199429484363913, "grad_norm": 1.9545060396194458, "learning_rate": 9.552051591736094e-06, "loss": 0.127, "step": 3950}
{"epoch": 1.2353883802086099, "grad_norm": 1.8761259317398071, "learning_rate": 9.539174373327917e-06, "loss": 0.1222, "step": 4000}
{"epoch": 1.2508338119808284, "grad_norm": 2.296283006668091, "learning_rate": 9.526123610350081e-06, "loss": 0.127, "step": 4050}
{"epoch": 1.2662792437530468, "grad_norm": 2.0646207332611084, "learning_rate": 9.512899801767568e-06, "loss": 0.1262, "step": 4100}
{"epoch": 1.2817246755252654, "grad_norm": 1.7854886054992676, "learning_rate": 9.499503453161345e-06, "loss": 0.1182, "step": 4150}
{"epoch": 1.2971701072974837, "grad_norm": 1.925052285194397, "learning_rate": 9.485935076709034e-06, "loss": 0.1225, "step": 4200}
{"epoch": 1.3126155390697023, "grad_norm": 1.780465841293335, "learning_rate": 9.472195191165341e-06, "loss": 0.1156, "step": 4250}
{"epoch": 1.3280609708419209, "grad_norm": 2.034806251525879, "learning_rate": 9.45828432184221e-06, "loss": 0.1168, "step": 4300}
{"epoch": 1.3435064026141394, "grad_norm": 1.9286572933197021, "learning_rate": 9.444203000588749e-06, "loss": 0.1239, "step": 4350}
{"epoch": 1.3589518343863578, "grad_norm": 1.8748537302017212, "learning_rate": 9.429951765770889e-06, "loss": 0.1165, "step": 4400}
{"epoch": 1.3743972661585764, "grad_norm": 1.9169151782989502, "learning_rate": 9.415531162250799e-06, "loss": 0.1197, "step": 4450}
{"epoch": 1.3898426979307947, "grad_norm": 1.848450779914856, "learning_rate": 9.400941741366067e-06, "loss": 0.1212, "step": 4500}
{"epoch": 1.4052881297030133, "grad_norm": 2.0307743549346924, "learning_rate": 9.386184060908607e-06, "loss": 0.1133, "step": 4550}
{"epoch": 1.4207335614752319, "grad_norm": 2.035086154937744, "learning_rate": 9.371258685103341e-06, "loss": 0.1141, "step": 4600}
{"epoch": 1.4361789932474502, "grad_norm": 2.0341715812683105, "learning_rate": 9.356166184586627e-06, "loss": 0.1197, "step": 4650}
{"epoch": 1.4516244250196688, "grad_norm": 1.8443515300750732, "learning_rate": 9.340907136384432e-06, "loss": 0.1144, "step": 4700}
{"epoch": 1.4670698567918872, "grad_norm": 1.5683515071868896, "learning_rate": 9.325482123890286e-06, "loss": 0.1181, "step": 4750}
{"epoch": 1.4825152885641057, "grad_norm": 1.9800331592559814, "learning_rate": 9.30989173684297e-06, "loss": 0.1151, "step": 4800}
{"epoch": 1.4979607203363243, "grad_norm": 1.7654318809509277, "learning_rate": 9.294136571303972e-06, "loss": 0.1156, "step": 4850}
{"epoch": 1.513406152108543, "grad_norm": 2.1010735034942627, "learning_rate": 9.278217229634681e-06, "loss": 0.1158, "step": 4900}
{"epoch": 1.5288515838807613, "grad_norm": 2.041102647781372, "learning_rate": 9.262134320473386e-06, "loss": 0.1164, "step": 4950}
{"epoch": 1.5442970156529796, "grad_norm": 1.864942193031311, "learning_rate": 9.245888458711986e-06, "loss": 0.108, "step": 5000}
{"epoch": 1.5442970156529796, "eval_cer": 2.035, "eval_loss": 0.07222334295511246, "eval_runtime": 39286.4718, "eval_samples_per_second": 0.692, "eval_steps_per_second": 0.692, "eval_wer": 2.1169, "step": 5000}
{"epoch": 1.5597424474251982, "grad_norm": 1.7110646963119507, "learning_rate": 9.229480265472483e-06, "loss": 0.1144, "step": 5050}
{"epoch": 1.5751878791974168, "grad_norm": 2.0892207622528076, "learning_rate": 9.212910368083246e-06, "loss": 0.1125, "step": 5100}
{"epoch": 1.5906333109696353, "grad_norm": 2.1225571632385254, "learning_rate": 9.19617940005501e-06, "loss": 0.1171, "step": 5150}
{"epoch": 1.6060787427418537, "grad_norm": 1.8015507459640503, "learning_rate": 9.17928800105667e-06, "loss": 0.1103, "step": 5200}
{"epoch": 1.6215241745140723, "grad_norm": 1.6901594400405884, "learning_rate": 9.162236816890816e-06, "loss": 0.1079, "step": 5250}
{"epoch": 1.6369696062862906, "grad_norm": 1.8348067998886108, "learning_rate": 9.145026499469045e-06, "loss": 0.1132, "step": 5300}
{"epoch": 1.6524150380585092, "grad_norm": 1.5873007774353027, "learning_rate": 9.127657706787039e-06, "loss": 0.1108, "step": 5350}
{"epoch": 1.6678604698307278, "grad_norm": 1.6405525207519531, "learning_rate": 9.110131102899403e-06, "loss": 0.107, "step": 5400}
{"epoch": 1.6833059016029464, "grad_norm": 1.8404589891433716, "learning_rate": 9.092447357894283e-06, "loss": 0.1047, "step": 5450}
{"epoch": 1.6987513333751647, "grad_norm": 1.9395616054534912, "learning_rate": 9.074607147867736e-06, "loss": 0.1103, "step": 5500}
{"epoch": 1.714196765147383, "grad_norm": 2.207754373550415, "learning_rate": 9.056611154897897e-06, "loss": 0.1118, "step": 5550}
{"epoch": 1.7296421969196016, "grad_norm": 1.9293147325515747, "learning_rate": 9.038460067018885e-06, "loss": 0.1104, "step": 5600}
{"epoch": 1.7450876286918202, "grad_norm": 1.9223028421401978, "learning_rate": 9.020154578194513e-06, "loss": 0.1084, "step": 5650}
{"epoch": 1.7605330604640388, "grad_norm": 1.7569994926452637, "learning_rate": 9.00169538829174e-06, "loss": 0.1018, "step": 5700}
{"epoch": 1.7759784922362571, "grad_norm": 2.1125595569610596, "learning_rate": 8.983083203053924e-06, "loss": 0.1073, "step": 5750}
{"epoch": 1.7914239240084757, "grad_norm": 2.003296136856079, "learning_rate": 8.964318734073839e-06, "loss": 0.1003, "step": 5800}
{"epoch": 1.806869355780694, "grad_norm": 1.670918583869934, "learning_rate": 8.945402698766465e-06, "loss": 0.1085, "step": 5850}
{"epoch": 1.8223147875529127, "grad_norm": 1.5286263227462769, "learning_rate": 8.92633582034156e-06, "loss": 0.1028, "step": 5900}
{"epoch": 1.8377602193251312, "grad_norm": 1.6136894226074219, "learning_rate": 8.907118827776016e-06, "loss": 0.1047, "step": 5950}
{"epoch": 1.8532056510973498, "grad_norm": 1.49209725856781, "learning_rate": 8.887752455785978e-06, "loss": 0.1015, "step": 6000}
{"epoch": 1.8686510828695682, "grad_norm": 1.9675025939941406, "learning_rate": 8.868237444798761e-06, "loss": 0.1061, "step": 6050}
{"epoch": 1.8840965146417865, "grad_norm": 1.893781065940857, "learning_rate": 8.84857454092454e-06, "loss": 0.1073, "step": 6100}
{"epoch": 1.899541946414005, "grad_norm": 1.8742083311080933, "learning_rate": 8.82876449592782e-06, "loss": 0.1016, "step": 6150}
{"epoch": 1.9149873781862237, "grad_norm": 1.6538318395614624, "learning_rate": 8.808808067198705e-06, "loss": 0.0999, "step": 6200}
{"epoch": 1.9304328099584422, "grad_norm": 1.6403934955596924, "learning_rate": 8.788706017723928e-06, "loss": 0.0998, "step": 6250}
{"epoch": 1.9458782417306606, "grad_norm": 1.6903971433639526, "learning_rate": 8.76845911605769e-06, "loss": 0.1012, "step": 6300}
{"epoch": 1.9613236735028792, "grad_norm": 1.7559572458267212, "learning_rate": 8.74806813629227e-06, "loss": 0.1035, "step": 6350}
{"epoch": 1.9767691052750975, "grad_norm": 1.546286940574646, "learning_rate": 8.72753385802843e-06, "loss": 0.1059, "step": 6400}
{"epoch": 1.992214537047316, "grad_norm": 1.9154149293899536, "learning_rate": 8.706857066345614e-06, "loss": 0.1028, "step": 6450}
{"epoch": 2.007413807250665, "grad_norm": 1.407510757446289, "learning_rate": 8.686038551771926e-06, "loss": 0.0984, "step": 6500}
{"epoch": 2.0228592390228832, "grad_norm": 1.0669528245925903, "learning_rate": 8.665079110253908e-06, "loss": 0.0868, "step": 6550}
{"epoch": 2.038304670795102, "grad_norm": 1.7095180749893188, "learning_rate": 8.643979543126114e-06, "loss": 0.0789, "step": 6600}
{"epoch": 2.0537501025673204, "grad_norm": 1.716349482536316, "learning_rate": 8.622740657080465e-06, "loss": 0.0838, "step": 6650}
{"epoch": 2.069195534339539, "grad_norm": 1.5523982048034668, "learning_rate": 8.601363264135408e-06, "loss": 0.0857, "step": 6700}
{"epoch": 2.0846409661117575, "grad_norm": 1.8714712858200073, "learning_rate": 8.579848181604879e-06, "loss": 0.0825, "step": 6750}
{"epoch": 2.1000863978839757, "grad_norm": 1.3873988389968872, "learning_rate": 8.558196232067044e-06, "loss": 0.0813, "step": 6800}
{"epoch": 2.1155318296561942, "grad_norm": 1.1852190494537354, "learning_rate": 8.536408243332857e-06, "loss": 0.0858, "step": 6850}
{"epoch": 2.130977261428413, "grad_norm": 1.771012306213379, "learning_rate": 8.514485048414408e-06, "loss": 0.0849, "step": 6900}
{"epoch": 2.1464226932006314, "grad_norm": 1.2716418504714966, "learning_rate": 8.49242748549307e-06, "loss": 0.08, "step": 6950}
{"epoch": 2.16186812497285, "grad_norm": 2.1080219745635986, "learning_rate": 8.470236397887468e-06, "loss": 0.0819, "step": 7000}
{"epoch": 2.1773135567450685, "grad_norm": 1.855688214302063, "learning_rate": 8.447912634021219e-06, "loss": 0.0768, "step": 7050}
{"epoch": 2.1927589885172867, "grad_norm": 1.447838306427002, "learning_rate": 8.425457047390502e-06, "loss": 0.0844, "step": 7100}
{"epoch": 2.2082044202895053, "grad_norm": 1.5317448377609253, "learning_rate": 8.402870496531432e-06, "loss": 0.0801, "step": 7150}
{"epoch": 2.223649852061724, "grad_norm": 1.758915901184082, "learning_rate": 8.380153844987225e-06, "loss": 0.0814, "step": 7200}
{"epoch": 2.2390952838339424, "grad_norm": 1.8075814247131348, "learning_rate": 8.357307961275194e-06, "loss": 0.0859, "step": 7250}
{"epoch": 2.254540715606161, "grad_norm": 1.7054452896118164, "learning_rate": 8.334333718853531e-06, "loss": 0.0766, "step": 7300}
{"epoch": 2.269986147378379, "grad_norm": 1.670913815498352, "learning_rate": 8.311231996087924e-06, "loss": 0.0787, "step": 7350}
{"epoch": 2.2854315791505977, "grad_norm": 1.3846805095672607, "learning_rate": 8.288003676217972e-06, "loss": 0.0762, "step": 7400}
{"epoch": 2.3008770109228163, "grad_norm": 1.5229676961898804, "learning_rate": 8.264649647323403e-06, "loss": 0.0786, "step": 7450}
{"epoch": 2.316322442695035, "grad_norm": 1.704161524772644, "learning_rate": 8.241170802290144e-06, "loss": 0.0805, "step": 7500}
{"epoch": 2.316322442695035, "eval_cer": 2.5247, "eval_loss": 0.07180380821228027, "eval_runtime": 49611.055, "eval_samples_per_second": 0.548, "eval_steps_per_second": 0.548, "eval_wer": 2.4453, "step": 7500}
{"epoch": 2.3317678744672534, "grad_norm": 1.584214448928833, "learning_rate": 8.217568038776165e-06, "loss": 0.081, "step": 7550}
{"epoch": 2.347213306239472, "grad_norm": 1.605106234550476, "learning_rate": 8.193842259177163e-06, "loss": 0.0835, "step": 7600}
{"epoch": 2.36265873801169, "grad_norm": 2.0109360218048096, "learning_rate": 8.16999437059207e-06, "loss": 0.0869, "step": 7650}
{"epoch": 2.3781041697839087, "grad_norm": 2.1605384349823, "learning_rate": 8.146025284788362e-06, "loss": 0.0816, "step": 7700}
{"epoch": 2.3935496015561273, "grad_norm": 1.894849419593811, "learning_rate": 8.121935918167201e-06, "loss": 0.0793, "step": 7750}
{"epoch": 2.408995033328346, "grad_norm": 1.2899354696273804, "learning_rate": 8.097727191728406e-06, "loss": 0.0772, "step": 7800}
{"epoch": 2.4244404651005644, "grad_norm": 1.5185883045196533, "learning_rate": 8.07340003103523e-06, "loss": 0.079, "step": 7850}
{"epoch": 2.4398858968727826, "grad_norm": 1.8623127937316895, "learning_rate": 8.048955366178983e-06, "loss": 0.0818, "step": 7900}
{"epoch": 2.455331328645001, "grad_norm": 1.4539504051208496, "learning_rate": 8.024394131743469e-06, "loss": 0.0774, "step": 7950}
{"epoch": 2.4707767604172197, "grad_norm": 2.2928173542022705, "learning_rate": 7.999717266769246e-06, "loss": 0.0794, "step": 8000}
{"epoch": 2.4862221921894383, "grad_norm": 1.8412299156188965, "learning_rate": 7.974925714717743e-06, "loss": 0.0749, "step": 8050}
{"epoch": 2.501667623961657, "grad_norm": 1.652535319328308, "learning_rate": 7.950020423435167e-06, "loss": 0.0756, "step": 8100}
{"epoch": 2.5171130557338754, "grad_norm": 1.0609238147735596, "learning_rate": 7.92500234511628e-06, "loss": 0.0796, "step": 8150}
{"epoch": 2.5325584875060936, "grad_norm": 1.7159970998764038, "learning_rate": 7.899872436267988e-06, "loss": 0.0773, "step": 8200}
{"epoch": 2.548003919278312, "grad_norm": 1.8446630239486694, "learning_rate": 7.874631657672773e-06, "loss": 0.0766, "step": 8250}
{"epoch": 2.5634493510505307, "grad_norm": 1.4900892972946167, "learning_rate": 7.849280974351957e-06, "loss": 0.0782, "step": 8300}
{"epoch": 2.5788947828227493, "grad_norm": 1.4647912979125977, "learning_rate": 7.82382135552881e-06, "loss": 0.076, "step": 8350}
{"epoch": 2.5943402145949674, "grad_norm": 1.4272511005401611, "learning_rate": 7.798253774591492e-06, "loss": 0.0771, "step": 8400}
{"epoch": 2.609785646367186, "grad_norm": 1.7571097612380981, "learning_rate": 7.77257920905584e-06, "loss": 0.0749, "step": 8450}
{"epoch": 2.6252310781394046, "grad_norm": 1.5228626728057861, "learning_rate": 7.74679864052799e-06, "loss": 0.0788, "step": 8500}
{"epoch": 2.640676509911623, "grad_norm": 1.8134883642196655, "learning_rate": 7.720913054666854e-06, "loss": 0.0749, "step": 8550}
{"epoch": 2.6561219416838417, "grad_norm": 1.3394718170166016, "learning_rate": 7.694923441146434e-06, "loss": 0.0761, "step": 8600}
{"epoch": 2.6715673734560603, "grad_norm": 2.3067879676818848, "learning_rate": 7.668830793617976e-06, "loss": 0.0788, "step": 8650}
{"epoch": 2.687012805228279, "grad_norm": 1.2476001977920532, "learning_rate": 7.642636109671999e-06, "loss": 0.0761, "step": 8700}
{"epoch": 2.702458237000497, "grad_norm": 1.8888031244277954, "learning_rate": 7.616340390800127e-06, "loss": 0.0735, "step": 8750}
{"epoch": 2.7179036687727156, "grad_norm": 1.547959566116333, "learning_rate": 7.589944642356832e-06, "loss": 0.0745, "step": 8800}
{"epoch": 2.733349100544934, "grad_norm": 1.4970035552978516, "learning_rate": 7.563449873520963e-06, "loss": 0.0737, "step": 8850}
{"epoch": 2.7487945323171528, "grad_norm": 1.5223023891448975, "learning_rate": 7.536857097257191e-06, "loss": 0.0713, "step": 8900}
{"epoch": 2.764239964089371, "grad_norm": 1.9684793949127197, "learning_rate": 7.510167330277262e-06, "loss": 0.0674, "step": 8950}
{"epoch": 2.7796853958615895, "grad_norm": 1.468606948852539, "learning_rate": 7.483381593001137e-06, "loss": 0.0819, "step": 9000}
{"epoch": 2.795130827633808, "grad_norm": 1.3774394989013672, "learning_rate": 7.4565009095179694e-06, "loss": 0.0715, "step": 9050}
{"epoch": 2.8105762594060266, "grad_norm": 1.7880630493164062, "learning_rate": 7.429526307546957e-06, "loss": 0.0699, "step": 9100}
{"epoch": 2.826021691178245, "grad_norm": 1.7236732244491577, "learning_rate": 7.402458818398048e-06, "loss": 0.0731, "step": 9150}
{"epoch": 2.8414671229504638, "grad_norm": 1.8902820348739624, "learning_rate": 7.375299476932516e-06, "loss": 0.0753, "step": 9200}
{"epoch": 2.8569125547226824, "grad_norm": 1.6625146865844727, "learning_rate": 7.34804932152338e-06, "loss": 0.0701, "step": 9250}
{"epoch": 2.8723579864949005, "grad_norm": 1.802640438079834, "learning_rate": 7.320709394015728e-06, "loss": 0.0707, "step": 9300}
{"epoch": 2.887803418267119, "grad_norm": 1.6399686336517334, "learning_rate": 7.293280739686861e-06, "loss": 0.0754, "step": 9350}
{"epoch": 2.9032488500393376, "grad_norm": 2.1284708976745605, "learning_rate": 7.265764407206342e-06, "loss": 0.0726, "step": 9400}
{"epoch": 2.918694281811556, "grad_norm": 1.4841082096099854, "learning_rate": 7.238161448595904e-06, "loss": 0.0749, "step": 9450}
{"epoch": 2.9341397135837743, "grad_norm": 1.1791646480560303, "learning_rate": 7.21047291918922e-06, "loss": 0.0714, "step": 9500}
{"epoch": 2.949585145355993, "grad_norm": 1.7156027555465698, "learning_rate": 7.182699877591564e-06, "loss": 0.0713, "step": 9550}
{"epoch": 2.9650305771282115, "grad_norm": 1.6085469722747803, "learning_rate": 7.1548433856393284e-06, "loss": 0.0666, "step": 9600}
{"epoch": 2.98047600890043, "grad_norm": 1.685760259628296, "learning_rate": 7.126904508359438e-06, "loss": 0.0702, "step": 9650}
{"epoch": 2.9959214406726487, "grad_norm": 1.4954265356063843, "learning_rate": 7.098884313928617e-06, "loss": 0.07, "step": 9700}
{"epoch": 3.011120710875997, "grad_norm": 1.3601444959640503, "learning_rate": 7.0707838736325654e-06, "loss": 0.0634, "step": 9750}
{"epoch": 3.0265661426482158, "grad_norm": 1.1960657835006714, "learning_rate": 7.042604261824991e-06, "loss": 0.0623, "step": 9800}
{"epoch": 3.0420115744204343, "grad_norm": 0.3930482268333435, "learning_rate": 7.014346555886537e-06, "loss": 0.0546, "step": 9850}
{"epoch": 3.057457006192653, "grad_norm": 1.3976786136627197, "learning_rate": 6.986011836183589e-06, "loss": 0.0604, "step": 9900}
{"epoch": 3.0729024379648715, "grad_norm": 1.1836129426956177, "learning_rate": 6.957601186026973e-06, "loss": 0.0578, "step": 9950}
{"epoch": 3.0883478697370896, "grad_norm": 1.406906247138977, "learning_rate": 6.929115691630537e-06, "loss": 0.0589, "step": 10000}
{"epoch": 3.0883478697370896, "eval_cer": 2.8635, "eval_loss": 0.05827642232179642, "eval_runtime": 52277.5516, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.52, "eval_wer": 2.8751, "step": 10000}
{"epoch": 3.103793301509308, "grad_norm": 1.4350087642669678, "learning_rate": 6.900556442069615e-06, "loss": 0.0591, "step": 10050}
{"epoch": 3.119238733281527, "grad_norm": 1.4390084743499756, "learning_rate": 6.871924529239404e-06, "loss": 0.0586, "step": 10100}
{"epoch": 3.1346841650537454, "grad_norm": 1.3584338426589966, "learning_rate": 6.843221047813201e-06, "loss": 0.0571, "step": 10150}
{"epoch": 3.150129596825964, "grad_norm": 0.9605140089988708, "learning_rate": 6.814447095200563e-06, "loss": 0.0581, "step": 10200}
{"epoch": 3.165575028598182, "grad_norm": 1.531211256980896, "learning_rate": 6.785603771505343e-06, "loss": 0.0571, "step": 10250}
{"epoch": 3.1810204603704006, "grad_norm": 1.5958681106567383, "learning_rate": 6.756692179483635e-06, "loss": 0.0613, "step": 10300}
{"epoch": 3.1964658921426192, "grad_norm": 1.5995062589645386, "learning_rate": 6.727713424501611e-06, "loss": 0.0574, "step": 10350}
{"epoch": 3.211911323914838, "grad_norm": 1.3173878192901611, "learning_rate": 6.698668614493257e-06, "loss": 0.0554, "step": 10400}
{"epoch": 3.2273567556870564, "grad_norm": 2.648559808731079, "learning_rate": 6.6695588599180164e-06, "loss": 0.0591, "step": 10450}
{"epoch": 3.242802187459275, "grad_norm": 2.1928234100341797, "learning_rate": 6.6403852737183375e-06, "loss": 0.0612, "step": 10500}
{"epoch": 3.258247619231493, "grad_norm": 0.9929393529891968, "learning_rate": 6.611148971277116e-06, "loss": 0.0576, "step": 10550}
{"epoch": 3.2736930510037117, "grad_norm": 1.5254093408584595, "learning_rate": 6.581851070375056e-06, "loss": 0.0606, "step": 10600}
{"epoch": 3.2891384827759302, "grad_norm": 1.8986523151397705, "learning_rate": 6.552492691147933e-06, "loss": 0.0577, "step": 10650}
{"epoch": 3.304583914548149, "grad_norm": 1.5019969940185547, "learning_rate": 6.523074956043767e-06, "loss": 0.054, "step": 10700}
{"epoch": 3.3200293463203674, "grad_norm": 1.144919991493225, "learning_rate": 6.493598989779908e-06, "loss": 0.0588, "step": 10750}
{"epoch": 3.3354747780925855, "grad_norm": 1.1544852256774902, "learning_rate": 6.46406591930004e-06, "loss": 0.0616, "step": 10800}
{"epoch": 3.350920209864804, "grad_norm": 1.4365992546081543, "learning_rate": 6.434476873731088e-06, "loss": 0.0582, "step": 10850}
{"epoch": 3.3663656416370227, "grad_norm": 0.9468691945075989, "learning_rate": 6.404832984340053e-06, "loss": 0.0555, "step": 10900}
{"epoch": 3.3818110734092413, "grad_norm": 1.2298778295516968, "learning_rate": 6.375135384490758e-06, "loss": 0.0619, "step": 10950}
{"epoch": 3.39725650518146, "grad_norm": 1.4707703590393066, "learning_rate": 6.345385209600521e-06, "loss": 0.0579, "step": 11000}
{"epoch": 3.4127019369536784, "grad_norm": 1.4021185636520386, "learning_rate": 6.315583597096742e-06, "loss": 0.058, "step": 11050}
{"epoch": 3.4281473687258965, "grad_norm": 1.349861979484558, "learning_rate": 6.285731686373411e-06, "loss": 0.057, "step": 11100}
{"epoch": 3.443592800498115, "grad_norm": 1.0955054759979248, "learning_rate": 6.255830618747557e-06, "loss": 0.0581, "step": 11150}
{"epoch": 3.4590382322703337, "grad_norm": 1.0882643461227417, "learning_rate": 6.225881537415604e-06, "loss": 0.0577, "step": 11200}
{"epoch": 3.4744836640425523, "grad_norm": 0.9772672653198242, "learning_rate": 6.1958855874096655e-06, "loss": 0.0561, "step": 11250}
{"epoch": 3.489929095814771, "grad_norm": 1.2397129535675049, "learning_rate": 6.165843915553771e-06, "loss": 0.0559, "step": 11300}
{"epoch": 3.505374527586989, "grad_norm": 1.126537799835205, "learning_rate": 6.135757670420013e-06, "loss": 0.0553, "step": 11350}
{"epoch": 3.5208199593592076, "grad_norm": 1.4523117542266846, "learning_rate": 6.1056280022846405e-06, "loss": 0.0588, "step": 11400}
{"epoch": 3.536265391131426, "grad_norm": 0.8349596261978149, "learning_rate": 6.075456063084073e-06, "loss": 0.0604, "step": 11450}
{"epoch": 3.5517108229036447, "grad_norm": 1.0020091533660889, "learning_rate": 6.04524300637087e-06, "loss": 0.0564, "step": 11500}
{"epoch": 3.5671562546758633, "grad_norm": 0.9773091077804565, "learning_rate": 6.014989987269617e-06, "loss": 0.0554, "step": 11550}
{"epoch": 3.582601686448082, "grad_norm": 0.9030627012252808, "learning_rate": 5.984698162432772e-06, "loss": 0.0546, "step": 11600}
{"epoch": 3.5980471182203, "grad_norm": 1.1969341039657593, "learning_rate": 5.954368689996433e-06, "loss": 0.0582, "step": 11650}
{"epoch": 3.6134925499925186, "grad_norm": 1.4267902374267578, "learning_rate": 5.924002729536065e-06, "loss": 0.0603, "step": 11700}
{"epoch": 3.628937981764737, "grad_norm": 0.9697352051734924, "learning_rate": 5.893601442022169e-06, "loss": 0.06, "step": 11750}
{"epoch": 3.6443834135369557, "grad_norm": 1.8329150676727295, "learning_rate": 5.863165989775891e-06, "loss": 0.054, "step": 11800}
{"epoch": 3.659828845309174, "grad_norm": 1.4708176851272583, "learning_rate": 5.832697536424583e-06, "loss": 0.0584, "step": 11850}
{"epoch": 3.6752742770813924, "grad_norm": 1.0277737379074097, "learning_rate": 5.802197246857321e-06, "loss": 0.0546, "step": 11900}
{"epoch": 3.690719708853611, "grad_norm": 1.0722907781600952, "learning_rate": 5.771666287180355e-06, "loss": 0.0544, "step": 11950}
{"epoch": 3.7061651406258296, "grad_norm": 1.4933980703353882, "learning_rate": 5.741105824672541e-06, "loss": 0.0555, "step": 12000}
{"epoch": 3.721610572398048, "grad_norm": 0.7271960377693176, "learning_rate": 5.710517027740704e-06, "loss": 0.0546, "step": 12050}
{"epoch": 3.7370560041702667, "grad_norm": 1.6103026866912842, "learning_rate": 5.679901065874965e-06, "loss": 0.0594, "step": 12100}
{"epoch": 3.7525014359424853, "grad_norm": 1.29445481300354, "learning_rate": 5.649259109604037e-06, "loss": 0.0519, "step": 12150}
{"epoch": 3.7679468677147034, "grad_norm": 1.5380432605743408, "learning_rate": 5.618592330450464e-06, "loss": 0.0509, "step": 12200}
{"epoch": 3.783392299486922, "grad_norm": 1.2497230768203735, "learning_rate": 5.587901900885837e-06, "loss": 0.0539, "step": 12250}
{"epoch": 3.7988377312591406, "grad_norm": 0.5510421991348267, "learning_rate": 5.5571889942859605e-06, "loss": 0.0537, "step": 12300}
{"epoch": 3.814283163031359, "grad_norm": 0.8449147343635559, "learning_rate": 5.526454784885999e-06, "loss": 0.0573, "step": 12350}
{"epoch": 3.8297285948035773, "grad_norm": 1.4641717672348022, "learning_rate": 5.495700447735572e-06, "loss": 0.055, "step": 12400}
{"epoch": 3.845174026575796, "grad_norm": 1.134308099746704, "learning_rate": 5.46492715865385e-06, "loss": 0.0561, "step": 12450}
{"epoch": 3.8606194583480145, "grad_norm": 0.7543144226074219, "learning_rate": 5.4341360941845685e-06, "loss": 0.0516, "step": 12500}
{"epoch": 3.8606194583480145, "eval_cer": 3.1281, "eval_loss": 0.05106096714735031, "eval_runtime": 53321.6724, "eval_samples_per_second": 0.51, "eval_steps_per_second": 0.51, "eval_wer": 3.2984, "step": 12500}
{"epoch": 3.876064890120233, "grad_norm": 1.0329837799072266, "learning_rate": 5.403328431551077e-06, "loss": 0.0543, "step": 12550}
{"epoch": 3.8915103218924516, "grad_norm": 0.6294991970062256, "learning_rate": 5.372505348611306e-06, "loss": 0.0551, "step": 12600}
{"epoch": 3.90695575366467, "grad_norm": 1.4618242979049683, "learning_rate": 5.341668023812754e-06, "loss": 0.0524, "step": 12650}
{"epoch": 3.9224011854368888, "grad_norm": 1.2675455808639526, "learning_rate": 5.310817636147412e-06, "loss": 0.0554, "step": 12700}
{"epoch": 3.937846617209107, "grad_norm": 1.277928352355957, "learning_rate": 5.27995536510671e-06, "loss": 0.0582, "step": 12750}
{"epoch": 3.9532920489813255, "grad_norm": 0.9280942678451538, "learning_rate": 5.2490823906364065e-06, "loss": 0.0549, "step": 12800}
{"epoch": 3.968737480753544, "grad_norm": 0.44305741786956787, "learning_rate": 5.218199893091478e-06, "loss": 0.0529, "step": 12850}
{"epoch": 3.9841829125257626, "grad_norm": 1.999466061592102, "learning_rate": 5.187309053190994e-06, "loss": 0.0533, "step": 12900}
{"epoch": 3.9996283442979808, "grad_norm": 1.936800241470337, "learning_rate": 5.156411051972977e-06, "loss": 0.0514, "step": 12950}
{"epoch": 4.01482761450133, "grad_norm": 2.0569820404052734, "learning_rate": 5.125507070749243e-06, "loss": 0.0451, "step": 13000}
{"epoch": 4.030273046273548, "grad_norm": 2.873948335647583, "learning_rate": 5.094598291060238e-06, "loss": 0.0455, "step": 13050}
{"epoch": 4.0457184780457665, "grad_norm": 2.51173734664917, "learning_rate": 5.063685894629871e-06, "loss": 0.0476, "step": 13100}
{"epoch": 4.061163909817985, "grad_norm": 2.5149686336517334, "learning_rate": 5.032771063320323e-06, "loss": 0.0475, "step": 13150}
{"epoch": 4.076609341590204, "grad_norm": 2.095987319946289, "learning_rate": 5.001854979086871e-06, "loss": 0.047, "step": 13200}
{"epoch": 4.092054773362422, "grad_norm": 3.2679123878479004, "learning_rate": 4.970938823932691e-06, "loss": 0.0474, "step": 13250}
{"epoch": 4.107500205134641, "grad_norm": 1.7078492641448975, "learning_rate": 4.940023779863673e-06, "loss": 0.048, "step": 13300}
{"epoch": 4.122945636906859, "grad_norm": 2.072380304336548, "learning_rate": 4.909111028843226e-06, "loss": 0.0461, "step": 13350}
{"epoch": 4.138391068679078, "grad_norm": 2.5650672912597656, "learning_rate": 4.878201752747092e-06, "loss": 0.0471, "step": 13400}
{"epoch": 4.1538365004512965, "grad_norm": 1.899710774421692, "learning_rate": 4.847297133318155e-06, "loss": 0.0471, "step": 13450}
{"epoch": 4.169281932223515, "grad_norm": 2.471426010131836, "learning_rate": 4.816398352121264e-06, "loss": 0.0452, "step": 13500}
{"epoch": 4.184727363995734, "grad_norm": 2.462597370147705, "learning_rate": 4.785506590498053e-06, "loss": 0.0517, "step": 13550}
{"epoch": 4.200172795767951, "grad_norm": 0.6728315949440002, "learning_rate": 4.754623029521784e-06, "loss": 0.0456, "step": 13600}
{"epoch": 4.21561822754017, "grad_norm": 1.1088227033615112, "learning_rate": 4.723748849952186e-06, "loss": 0.0449, "step": 13650}
{"epoch": 4.2310636593123885, "grad_norm": 1.955267310142517, "learning_rate": 4.692885232190314e-06, "loss": 0.0436, "step": 13700}
{"epoch": 4.246509091084607, "grad_norm": 2.578188180923462, "learning_rate": 4.662033356233409e-06, "loss": 0.0454, "step": 13750}
{"epoch": 4.261954522856826, "grad_norm": 1.5380196571350098, "learning_rate": 4.6311944016298005e-06, "loss": 0.0447, "step": 13800}
{"epoch": 4.277399954629044, "grad_norm": 1.8381695747375488, "learning_rate": 4.6003695474337975e-06, "loss": 0.0458, "step": 13850}
{"epoch": 4.292845386401263, "grad_norm": 2.318171501159668, "learning_rate": 4.569559972160615e-06, "loss": 0.0432, "step": 13900}
{"epoch": 4.308290818173481, "grad_norm": 2.8723549842834473, "learning_rate": 4.53876685374131e-06, "loss": 0.046, "step": 13950}
{"epoch": 4.3237362499457, "grad_norm": 2.2531979084014893, "learning_rate": 4.5079913694777564e-06, "loss": 0.0484, "step": 14000}
{"epoch": 4.3391816817179185, "grad_norm": 1.8091078996658325, "learning_rate": 4.477234695997625e-06, "loss": 0.0428, "step": 14050}
{"epoch": 4.354627113490137, "grad_norm": 1.5737905502319336, "learning_rate": 4.4464980092094005e-06, "loss": 0.0452, "step": 14100}
{"epoch": 4.370072545262355, "grad_norm": 2.036376714706421, "learning_rate": 4.415782484257427e-06, "loss": 0.0488, "step": 14150}
{"epoch": 4.385517977034573, "grad_norm": 2.6779673099517822, "learning_rate": 4.385089295476971e-06, "loss": 0.0446, "step": 14200}
{"epoch": 4.400963408806792, "grad_norm": 1.663352608680725, "learning_rate": 4.354419616349333e-06, "loss": 0.0468, "step": 14250}
{"epoch": 4.4164088405790105, "grad_norm": 1.9492076635360718, "learning_rate": 4.323774619456974e-06, "loss": 0.0402, "step": 14300}
{"epoch": 4.431854272351229, "grad_norm": 2.1021711826324463, "learning_rate": 4.293155476438693e-06, "loss": 0.0443, "step": 14350}
{"epoch": 4.447299704123448, "grad_norm": 1.310408115386963, "learning_rate": 4.262563357944822e-06, "loss": 0.0443, "step": 14400}
{"epoch": 4.462745135895666, "grad_norm": 1.6177648305892944, "learning_rate": 4.231999433592476e-06, "loss": 0.0463, "step": 14450}
{"epoch": 4.478190567667885, "grad_norm": 2.0417137145996094, "learning_rate": 4.201464871920837e-06, "loss": 0.0462, "step": 14500}
{"epoch": 4.493635999440103, "grad_norm": 2.171947956085205, "learning_rate": 4.170960840346473e-06, "loss": 0.0459, "step": 14550}
{"epoch": 4.509081431212322, "grad_norm": 2.0733096599578857, "learning_rate": 4.140488505118701e-06, "loss": 0.0436, "step": 14600}
{"epoch": 4.5245268629845405, "grad_norm": 1.9364445209503174, "learning_rate": 4.110049031275012e-06, "loss": 0.044, "step": 14650}
{"epoch": 4.539972294756758, "grad_norm": 1.9756009578704834, "learning_rate": 4.079643582596513e-06, "loss": 0.0444, "step": 14700}
{"epoch": 4.555417726528977, "grad_norm": 1.4544475078582764, "learning_rate": 4.049273321563444e-06, "loss": 0.0397, "step": 14750}
{"epoch": 4.570863158301195, "grad_norm": 1.6351209878921509, "learning_rate": 4.018939409310727e-06, "loss": 0.045, "step": 14800}
{"epoch": 4.586308590073414, "grad_norm": 1.9688271284103394, "learning_rate": 3.988643005583572e-06, "loss": 0.0439, "step": 14850}
{"epoch": 4.6017540218456325, "grad_norm": 2.413205862045288, "learning_rate": 3.958385268693144e-06, "loss": 0.0426, "step": 14900}
{"epoch": 4.617199453617851, "grad_norm": 2.4667603969573975, "learning_rate": 3.92816735547227e-06, "loss": 0.0442, "step": 14950}
{"epoch": 4.63264488539007, "grad_norm": 2.2367050647735596, "learning_rate": 3.897990421231217e-06, "loss": 0.0415, "step": 15000}
{"epoch": 4.63264488539007, "eval_cer": 3.2897, "eval_loss": 0.05402516573667526, "eval_runtime": 52806.5318, "eval_samples_per_second": 0.515, "eval_steps_per_second": 0.515, "eval_wer": 3.365, "step": 15000}
{"loss": 0.0388, "grad_norm": 2.5312867164611816, "learning_rate": 3.86785561971351e-06, "epoch": 4.648090317162288, "step": 15050}
{"loss": 0.0385, "grad_norm": 1.3888521194458008, "learning_rate": 3.837764103051838e-06, "epoch": 4.663535748934507, "step": 15100}
{"loss": 0.0398, "grad_norm": 2.358097791671753, "learning_rate": 3.8077170217239944e-06, "epoch": 4.678981180706725, "step": 15150}
{"loss": 0.0397, "grad_norm": 2.1909303665161133, "learning_rate": 3.7777155245088913e-06, "epoch": 4.694426612478944, "step": 15200}
{"loss": 0.0404, "grad_norm": 1.5980212688446045, "learning_rate": 3.7477607584426362e-06, "epoch": 4.709872044251162, "step": 15250}
{"loss": 0.0404, "grad_norm": 2.0475852489471436, "learning_rate": 3.717853868774688e-06, "epoch": 4.72531747602338, "step": 15300}
{"loss": 0.0432, "grad_norm": 1.9329392910003662, "learning_rate": 3.6879959989240644e-06, "epoch": 4.740762907795599, "step": 15350}
{"loss": 0.0402, "grad_norm": 2.21789288520813, "learning_rate": 3.658188290435625e-06, "epoch": 4.756208339567817, "step": 15400}
{"loss": 0.0411, "grad_norm": 2.467203140258789, "learning_rate": 3.628431882936427e-06, "epoch": 4.771653771340036, "step": 15450}
{"loss": 0.0387, "grad_norm": 2.1761415004730225, "learning_rate": 3.5987279140921603e-06, "epoch": 4.787099203112255, "step": 15500}
{"loss": 0.0397, "grad_norm": 2.0781192779541016, "learning_rate": 3.5690775195636434e-06, "epoch": 4.802544634884473, "step": 15550}
{"loss": 0.042, "grad_norm": 2.0276036262512207, "learning_rate": 3.5394818329634097e-06, "epoch": 4.817990066656692, "step": 15600}
{"loss": 0.0393, "grad_norm": 2.5976736545562744, "learning_rate": 3.5099419858123645e-06, "epoch": 4.83343549842891, "step": 15650}
{"loss": 0.0453, "grad_norm": 2.919811487197876, "learning_rate": 3.4804591074965226e-06, "epoch": 4.848880930201129, "step": 15700}
{"loss": 0.041, "grad_norm": 0.9966694712638855, "learning_rate": 3.4510343252238327e-06, "epoch": 4.8643263619733474, "step": 15750}
{"loss": 0.041, "grad_norm": 1.4679787158966064, "learning_rate": 3.4216687639810777e-06, "epoch": 4.879771793745565, "step": 15800}
{"loss": 0.0375, "grad_norm": 2.0488719940185547, "learning_rate": 3.3923635464908666e-06, "epoch": 4.895217225517784, "step": 15850}
{"loss": 0.0398, "grad_norm": 1.5179309844970703, "learning_rate": 3.363119793168704e-06, "epoch": 4.910662657290002, "step": 15900}
{"loss": 0.0386, "grad_norm": 1.5366339683532715, "learning_rate": 3.3339386220801607e-06, "epoch": 4.926108089062221, "step": 15950}
{"loss": 0.0404, "grad_norm": 1.4580926895141602, "learning_rate": 3.3048211488981257e-06, "epoch": 4.941553520834439, "step": 16000}
{"loss": 0.0422, "grad_norm": 2.476271629333496, "learning_rate": 3.275768486860149e-06, "epoch": 4.956998952606658, "step": 16050}
{"loss": 0.0402, "grad_norm": 1.902818202972412, "learning_rate": 3.2467817467258755e-06, "epoch": 4.972444384378877, "step": 16100}
{"loss": 0.0386, "grad_norm": 1.909420371055603, "learning_rate": 3.217862036734587e-06, "epoch": 4.987889816151095, "step": 16150}
{"loss": 0.0433, "grad_norm": 1.3479013442993164, "learning_rate": 3.1890104625628258e-06, "epoch": 5.003089086354444, "step": 16200}
{"loss": 0.042, "grad_norm": 1.1648472547531128, "learning_rate": 3.1602281272821233e-06, "epoch": 5.018534518126662, "step": 16250}
{"loss": 0.0405, "grad_norm": 0.9297541975975037, "learning_rate": 3.1315161313168252e-06, "epoch": 5.033979949898881, "step": 16300}
{"loss": 0.0383, "grad_norm": 1.3858672380447388, "learning_rate": 3.1028755724020187e-06, "epoch": 5.0494253816710994, "step": 16350}
{"loss": 0.0387, "grad_norm": 1.363135576248169, "learning_rate": 3.074307545541567e-06, "epoch": 5.064870813443318, "step": 16400}
{"loss": 0.0388, "grad_norm": 1.6581306457519531, "learning_rate": 3.045813142966243e-06, "epoch": 5.080316245215537, "step": 16450}
{"loss": 0.0386, "grad_norm": 1.2532027959823608, "learning_rate": 3.017393454091967e-06, "epoch": 5.095761676987754, "step": 16500}
{"loss": 0.0378, "grad_norm": 1.1806824207305908, "learning_rate": 2.989049565478159e-06, "epoch": 5.111207108759973, "step": 16550}
{"loss": 0.0348, "grad_norm": 0.9931106567382812, "learning_rate": 2.960782560786196e-06, "epoch": 5.126652540532191, "step": 16600}
{"loss": 0.038, "grad_norm": 1.3448480367660522, "learning_rate": 2.9325935207379807e-06, "epoch": 5.14209797230441, "step": 16650}
{"loss": 0.0385, "grad_norm": 1.0759299993515015, "learning_rate": 2.9044835230746244e-06, "epoch": 5.157543404076629, "step": 16700}
{"loss": 0.039, "grad_norm": 1.4627045392990112, "learning_rate": 2.876453642515236e-06, "epoch": 5.172988835848847, "step": 16750}
{"loss": 0.0391, "grad_norm": 1.2251198291778564, "learning_rate": 2.8485049507158367e-06, "epoch": 5.188434267621066, "step": 16800}
{"loss": 0.0368, "grad_norm": 1.1165056228637695, "learning_rate": 2.8206385162283958e-06, "epoch": 5.203879699393284, "step": 16850}
{"loss": 0.0376, "grad_norm": 1.2397181987762451, "learning_rate": 2.7928554044599584e-06, "epoch": 5.219325131165503, "step": 16900}
{"loss": 0.0372, "grad_norm": 1.0362519025802612, "learning_rate": 2.7651566776319333e-06, "epoch": 5.2347705629377215, "step": 16950}
{"loss": 0.0358, "grad_norm": 1.2584264278411865, "learning_rate": 2.737543394739464e-06, "epoch": 5.25021599470994, "step": 17000}
{"loss": 0.0356, "grad_norm": 1.024266004562378, "learning_rate": 2.710016611510949e-06, "epoch": 5.265661426482158, "step": 17050}
{"loss": 0.0357, "grad_norm": 1.416258454322815, "learning_rate": 2.682577380367681e-06, "epoch": 5.281106858254376, "step": 17100}
{"loss": 0.0369, "grad_norm": 1.137723445892334, "learning_rate": 2.6552267503836016e-06, "epoch": 5.296552290026595, "step": 17150}
{"loss": 0.0372, "grad_norm": 1.2025176286697388, "learning_rate": 2.6279657672452e-06, "epoch": 5.3119977217988135, "step": 17200}
{"loss": 0.0383, "grad_norm": 1.2218643426895142, "learning_rate": 2.600795473211528e-06, "epoch": 5.327443153571032, "step": 17250}
{"loss": 0.0361, "grad_norm": 0.9973156452178955, "learning_rate": 2.5737169070743594e-06, "epoch": 5.342888585343251, "step": 17300}
{"loss": 0.0354, "grad_norm": 1.2816466093063354, "learning_rate": 2.5467311041184655e-06, "epoch": 5.358334017115469, "step": 17350}
{"loss": 0.04, "grad_norm": 1.1314411163330078, "learning_rate": 2.5198390960820355e-06, "epoch": 5.373779448887688, "step": 17400}
{"loss": 0.038, "grad_norm": 1.2395871877670288, "learning_rate": 2.4930419111172397e-06, "epoch": 5.389224880659906, "step": 17450}
{"loss": 0.0368, "grad_norm": 1.4650291204452515, "learning_rate": 2.4663405737509037e-06, "epoch": 5.404670312432125, "step": 17500}
{"eval_loss": 0.05429032817482948, "eval_wer": 3.4251, "eval_cer": 3.298, "eval_runtime": 54406.3939, "eval_samples_per_second": 0.499, "eval_steps_per_second": 0.499, "epoch": 5.404670312432125, "step": 17500}
{"loss": 0.0363, "grad_norm": 1.1124913692474365, "learning_rate": 2.4397361048453565e-06, "epoch": 5.4201157442043435, "step": 17550}
{"loss": 0.0388, "grad_norm": 1.2796242237091064, "learning_rate": 2.4132295215593842e-06, "epoch": 5.435561175976561, "step": 17600}
{"loss": 0.0332, "grad_norm": 1.504075288772583, "learning_rate": 2.38682183730935e-06, "epoch": 5.45100660774878, "step": 17650}
{"loss": 0.0352, "grad_norm": 1.079721212387085, "learning_rate": 2.3605140617304513e-06, "epoch": 5.466452039520998, "step": 17700}
{"loss": 0.0342, "grad_norm": 1.1886216402053833, "learning_rate": 2.334307200638106e-06, "epoch": 5.481897471293217, "step": 17750}
{"loss": 0.0361, "grad_norm": 1.0235615968704224, "learning_rate": 2.308202255989518e-06, "epoch": 5.4973429030654355, "step": 17800}
{"loss": 0.0362, "grad_norm": 1.072338581085205, "learning_rate": 2.2822002258453425e-06, "epoch": 5.512788334837654, "step": 17850}
{"loss": 0.0372, "grad_norm": 1.2269715070724487, "learning_rate": 2.256302104331553e-06, "epoch": 5.528233766609873, "step": 17900}
{"loss": 0.0375, "grad_norm": 1.1194442510604858, "learning_rate": 2.2305088816014243e-06, "epoch": 5.543679198382091, "step": 17950}
{"loss": 0.0358, "grad_norm": 1.06613290309906, "learning_rate": 2.204821543797668e-06, "epoch": 5.55912463015431, "step": 18000}
{"loss": 0.0359, "grad_norm": 0.9360151886940002, "learning_rate": 2.1792410730147397e-06, "epoch": 5.574570061926528, "step": 18050}
{"loss": 0.0375, "grad_norm": 1.262405276298523, "learning_rate": 2.1537684472612856e-06, "epoch": 5.590015493698747, "step": 18100}
{"loss": 0.0365, "grad_norm": 1.4639431238174438, "learning_rate": 2.1284046404227598e-06, "epoch": 5.605460925470965, "step": 18150}
{"loss": 0.0349, "grad_norm": 1.2364749908447266, "learning_rate": 2.1031506222241743e-06, "epoch": 5.620906357243183, "step": 18200}
{"loss": 0.0369, "grad_norm": 1.1730759143829346, "learning_rate": 2.0780073581930406e-06, "epoch": 5.636351789015402, "step": 18250}
{"loss": 0.0371, "grad_norm": 1.3337668180465698, "learning_rate": 2.052975809622441e-06, "epoch": 5.65179722078762, "step": 18300}
{"loss": 0.0345, "grad_norm": 1.0913158655166626, "learning_rate": 2.0280569335342843e-06, "epoch": 5.667242652559839, "step": 18350}
{"loss": 0.0352, "grad_norm": 1.01255202293396, "learning_rate": 2.0032516826427174e-06, "epoch": 5.6826880843320575, "step": 18400}
{"loss": 0.0356, "grad_norm": 1.1388089656829834, "learning_rate": 1.978561005317692e-06, "epoch": 5.698133516104276, "step": 18450}
{"loss": 0.0357, "grad_norm": 1.3929470777511597, "learning_rate": 1.9539858455487114e-06, "epoch": 5.713578947876495, "step": 18500}
{"loss": 0.0342, "grad_norm": 1.1314735412597656, "learning_rate": 1.929527142908745e-06, "epoch": 5.729024379648713, "step": 18550}
{"loss": 0.0356, "grad_norm": 1.2910658121109009, "learning_rate": 1.9051858325182925e-06, "epoch": 5.744469811420932, "step": 18600}
{"loss": 0.038, "grad_norm": 1.1597943305969238, "learning_rate": 1.8809628450096484e-06, "epoch": 5.75991524319315, "step": 18650}
{"loss": 0.0364, "grad_norm": 1.2124665975570679, "learning_rate": 1.8568591064912989e-06, "epoch": 5.775360674965368, "step": 18700}
{"loss": 0.0337, "grad_norm": 0.787893533706665, "learning_rate": 1.8328755385125414e-06, "epoch": 5.790806106737587, "step": 18750}
{"loss": 0.0347, "grad_norm": 0.9876325130462646, "learning_rate": 1.809013058028228e-06, "epoch": 5.806251538509805, "step": 18800}
{"loss": 0.0346, "grad_norm": 1.0792776346206665, "learning_rate": 1.785272577363723e-06, "epoch": 5.821696970282024, "step": 18850}
{"loss": 0.0348, "grad_norm": 1.1874064207077026, "learning_rate": 1.7616550041800195e-06, "epoch": 5.837142402054242, "step": 18900}
{"loss": 0.0327, "grad_norm": 1.037471055984497, "learning_rate": 1.7381612414390232e-06, "epoch": 5.852587833826461, "step": 18950}
{"loss": 0.0333, "grad_norm": 1.2237623929977417, "learning_rate": 1.7147921873690527e-06, "epoch": 5.8680332655986795, "step": 19000}
{"loss": 0.0341, "grad_norm": 1.2377524375915527, "learning_rate": 1.6915487354304788e-06, "epoch": 5.883478697370898, "step": 19050}
{"loss": 0.0344, "grad_norm": 1.0181424617767334, "learning_rate": 1.6684317742815786e-06, "epoch": 5.898924129143117, "step": 19100}
{"loss": 0.0328, "grad_norm": 1.0538455247879028, "learning_rate": 1.6454421877445491e-06, "epoch": 5.914369560915335, "step": 19150}
{"loss": 0.0334, "grad_norm": 1.1398926973342896, "learning_rate": 1.6225808547717209e-06, "epoch": 5.929814992687554, "step": 19200}
{"loss": 0.0335, "grad_norm": 0.8994346261024475, "learning_rate": 1.5998486494119592e-06, "epoch": 5.9452604244597715, "step": 19250}
{"loss": 0.0361, "grad_norm": 1.1772350072860718, "learning_rate": 1.5772464407772347e-06, "epoch": 5.96070585623199, "step": 19300}
{"loss": 0.0341, "grad_norm": 0.7631919384002686, "learning_rate": 1.554775093009403e-06, "epoch": 5.976151288004209, "step": 19350}
{"loss": 0.037, "grad_norm": 1.120029091835022, "learning_rate": 1.5324354652471697e-06, "epoch": 5.991596719776427, "step": 19400}
{"loss": 0.0366, "grad_norm": 1.4275178909301758, "learning_rate": 1.5102284115932297e-06, "epoch": 6.006795989979776, "step": 19450}
{"loss": 0.033, "grad_norm": 1.1705217361450195, "learning_rate": 1.4881547810816315e-06, "epoch": 6.022241421751994, "step": 19500}
{"loss": 0.0334, "grad_norm": 1.0045534372329712, "learning_rate": 1.4662154176452946e-06, "epoch": 6.037686853524213, "step": 19550}
{"loss": 0.0314, "grad_norm": 1.2588648796081543, "learning_rate": 1.4444111600837651e-06, "epoch": 6.0531322852964315, "step": 19600}
{"loss": 0.034, "grad_norm": 1.2225759029388428, "learning_rate": 1.42274284203113e-06, "epoch": 6.06857771706865, "step": 19650}
{"loss": 0.0361, "grad_norm": 0.9935839176177979, "learning_rate": 1.4012112919241562e-06, "epoch": 6.084023148840869, "step": 19700}
{"loss": 0.031, "grad_norm": 1.4447047710418701, "learning_rate": 1.3798173329706094e-06, "epoch": 6.099468580613087, "step": 19750}
{"loss": 0.0301, "grad_norm": 1.0066790580749512, "learning_rate": 1.3585617831177822e-06, "epoch": 6.114914012385306, "step": 19800}
{"loss": 0.0355, "grad_norm": 0.988544225692749, "learning_rate": 1.3374454550212291e-06, "epoch": 6.130359444157524, "step": 19850}
{"loss": 0.031, "grad_norm": 0.8867697715759277, "learning_rate": 1.316469156013685e-06, "epoch": 6.145804875929743, "step": 19900}
{"loss": 0.0293, "grad_norm": 1.514653205871582, "learning_rate": 1.2956336880742094e-06, "epoch": 6.161250307701961, "step": 19950}
{"loss": 0.0315, "grad_norm": 1.0995004177093506, "learning_rate": 1.2749398477975161e-06, "epoch": 6.176695739474179, "step": 20000}
{"eval_loss": 0.05590539053082466, "eval_wer": 3.2634, "eval_cer": 3.2452, "eval_runtime": 52645.6415, "eval_samples_per_second": 0.516, "eval_steps_per_second": 0.516, "epoch": 6.176695739474179, "step": 20000}
{"loss": 0.0314, "grad_norm": 0.8651966452598572, "learning_rate": 1.2543884263635209e-06, "epoch": 6.192141171246398, "step": 20050}
{"loss": 0.0314, "grad_norm": 0.9705133438110352, "learning_rate": 1.2339802095070975e-06, "epoch": 6.207586603018616, "step": 20100}
{"loss": 0.0308, "grad_norm": 0.9956588745117188, "learning_rate": 1.2137159774880275e-06, "epoch": 6.223032034790835, "step": 20150}
{"loss": 0.0344, "grad_norm": 1.34178626537323, "learning_rate": 1.1935965050611746e-06, "epoch": 6.238477466563054, "step": 20200}
{"loss": 0.0295, "grad_norm": 1.2686400413513184, "learning_rate": 1.1736225614468627e-06, "epoch": 6.253922898335272, "step": 20250}
{"loss": 0.0335, "grad_norm": 1.3069908618927002, "learning_rate": 1.1537949103014684e-06, "epoch": 6.269368330107491, "step": 20300}
{"loss": 0.0296, "grad_norm": 0.9910427927970886, "learning_rate": 1.1341143096882217e-06, "epoch": 6.284813761879709, "step": 20350}
{"loss": 0.0297, "grad_norm": 1.5726985931396484, "learning_rate": 1.1145815120482234e-06, "epoch": 6.300259193651928, "step": 20400}
{"loss": 0.0329, "grad_norm": 0.9690712094306946, "learning_rate": 1.0951972641716778e-06, "epoch": 6.3157046254241465, "step": 20450}
{"loss": 0.0296, "grad_norm": 0.9835613369941711, "learning_rate": 1.0759623071693415e-06, "epoch": 6.331150057196364, "step": 20500}
{"loss": 0.0296, "grad_norm": 1.2649805545806885, "learning_rate": 1.056877376444191e-06, "epoch": 6.346595488968583, "step": 20550}
{"loss": 0.0333, "grad_norm": 0.956112802028656, "learning_rate": 1.0379432016633e-06, "epoch": 6.362040920740801, "step": 20600}
{"loss": 0.0312, "grad_norm": 1.1795705556869507, "learning_rate": 1.0191605067299465e-06, "epoch": 6.37748635251302, "step": 20650}
{"loss": 0.0324, "grad_norm": 1.045459508895874, "learning_rate": 1.00053000975594e-06, "epoch": 6.3929317842852385, "step": 20700}
{"loss": 0.0272, "grad_norm": 1.12046217918396, "learning_rate": 9.820524230341566e-07, "epoch": 6.408377216057457, "step": 20750}
{"loss": 0.032, "grad_norm": 1.405485987663269, "learning_rate": 9.637284530113155e-07, "epoch": 6.423822647829676, "step": 20800}
{"loss": 0.0318, "grad_norm": 1.584161639213562, "learning_rate": 9.455588002609634e-07, "epoch": 6.439268079601894, "step": 20850}
{"loss": 0.0326, "grad_norm": 1.105044960975647, "learning_rate": 9.275441594566908e-07, "epoch": 6.454713511374113, "step": 20900}
{"loss": 0.0337, "grad_norm": 1.457451581954956, "learning_rate": 9.096852193455779e-07, "epoch": 6.470158943146331, "step": 20950}
{"loss": 0.0324, "grad_norm": 1.5277788639068604, "learning_rate": 8.919826627218536e-07, "epoch": 6.48560437491855, "step": 21000}
{"loss": 0.0325, "grad_norm": 1.1611404418945312, "learning_rate": 8.744371664008011e-07, "epoch": 6.501049806690768, "step": 21050}
{"loss": 0.0304, "grad_norm": 1.3482595682144165, "learning_rate": 8.57049401192866e-07, "epoch": 6.516495238462986, "step": 21100}
{"loss": 0.0312, "grad_norm": 1.0619451999664307, "learning_rate": 8.398200318780281e-07, "epoch": 6.531940670235205, "step": 21150}
{"loss": 0.0313, "grad_norm": 1.314241647720337, "learning_rate": 8.227497171803694e-07, "epoch": 6.547386102007423, "step": 21200}
{"loss": 0.0302, "grad_norm": 1.2173668146133423, "learning_rate": 8.058391097429008e-07, "epoch": 6.562831533779642, "step": 21250}
{"loss": 0.0291, "grad_norm": 1.204345703125, "learning_rate": 7.890888561026006e-07, "epoch": 6.5782769655518605, "step": 21300}
{"loss": 0.0279, "grad_norm": 1.122661828994751, "learning_rate": 7.724995966657001e-07, "epoch": 6.593722397324079, "step": 21350}
{"loss": 0.0336, "grad_norm": 1.1982940435409546, "learning_rate": 7.560719656832027e-07, "epoch": 6.609167829096298, "step": 21400}
{"loss": 0.0279, "grad_norm": 0.9608955979347229, "learning_rate": 7.398065912266283e-07, "epoch": 6.624613260868516, "step": 21450}
{"loss": 0.0309, "grad_norm": 1.2982630729675293, "learning_rate": 7.237040951640012e-07, "epoch": 6.640058692640735, "step": 21500}
{"loss": 0.0299, "grad_norm": 1.4781590700149536, "learning_rate": 7.077650931360824e-07, "epoch": 6.655504124412953, "step": 21550}
{"loss": 0.0305, "grad_norm": 1.2990409135818481, "learning_rate": 6.919901945328194e-07, "epoch": 6.670949556185171, "step": 21600}
{"loss": 0.0346, "grad_norm": 1.140785574913025, "learning_rate": 6.763800024700601e-07, "epoch": 6.68639498795739, "step": 21650}
{"loss": 0.0343, "grad_norm": 1.1472089290618896, "learning_rate": 6.609351137664854e-07, "epoch": 6.701840419729608, "step": 21700}
{"loss": 0.032, "grad_norm": 0.9984555840492249, "learning_rate": 6.456561189207921e-07, "epoch": 6.717285851501827, "step": 21750}
{"loss": 0.0308, "grad_norm": 1.1736177206039429, "learning_rate": 6.305436020891248e-07, "epoch": 6.732731283274045, "step": 21800}
{"loss": 0.0313, "grad_norm": 1.011386752128601, "learning_rate": 6.155981410627299e-07, "epoch": 6.748176715046264, "step": 21850}
{"loss": 0.0329, "grad_norm": 0.9934018850326538, "learning_rate": 6.008203072458757e-07, "epoch": 6.7636221468184825, "step": 21900}
{"loss": 0.0308, "grad_norm": 1.39398992061615, "learning_rate": 5.862106656339967e-07, "epoch": 6.779067578590701, "step": 21950}
{"loss": 0.0313, "grad_norm": 1.3345685005187988, "learning_rate": 5.717697747921025e-07, "epoch": 6.79451301036292, "step": 22000}
{"loss": 0.029, "grad_norm": 1.2288165092468262, "learning_rate": 5.574981868334133e-07, "epoch": 6.809958442135138, "step": 22050}
{"loss": 0.0304, "grad_norm": 0.9188125133514404, "learning_rate": 5.433964473982573e-07, "epoch": 6.825403873907357, "step": 22100}
{"loss": 0.0299, "grad_norm": 1.12663996219635, "learning_rate": 5.294650956332065e-07, "epoch": 6.8408493056795745, "step": 22150}
{"loss": 0.0276, "grad_norm": 1.1604453325271606, "learning_rate": 5.157046641704616e-07, "epoch": 6.856294737451793, "step": 22200}
{"loss": 0.0338, "grad_norm": 1.3098431825637817, "learning_rate": 5.021156791074966e-07, "epoch": 6.871740169224012, "step": 22250}
{"loss": 0.0308, "grad_norm": 1.2260301113128662, "learning_rate": 4.886986599869326e-07, "epoch": 6.88718560099623, "step": 22300}
{"loss": 0.0287, "grad_norm": 1.033446192741394, "learning_rate": 4.7545411977668596e-07, "epoch": 6.902631032768449, "step": 22350}
{"loss": 0.0295, "grad_norm": 1.4619596004486084, "learning_rate": 4.6238256485034715e-07, "epoch": 6.918076464540667, "step": 22400}
{"loss": 0.0293, "grad_norm": 1.2507656812667847, "learning_rate": 4.494844949678251e-07, "epoch": 6.933521896312886, "step": 22450}
{"loss": 0.0316, "grad_norm": 1.0726207494735718, "learning_rate": 4.367604032562417e-07, "epoch": 6.9489673280851045, "step": 22500}
{"eval_loss": 0.054116539657115936, "eval_wer": 3.6173, "eval_cer": 3.4674, "eval_runtime": 52593.9978, "eval_samples_per_second": 0.517, "eval_steps_per_second": 0.517, "epoch": 6.9489673280851045, "step": 22500}
{"loss": 0.0282, "grad_norm": 1.4471551179885864, "learning_rate": 4.2421077619107355e-07, "epoch": 6.964412759857323, "step": 22550}
{"loss": 0.0318, "grad_norm": 1.008939504623413, "learning_rate": 4.1183609357755506e-07, "epoch": 6.979858191629542, "step": 22600}
{"loss": 0.0283, "grad_norm": 1.3363741636276245, "learning_rate": 3.9963682853233763e-07, "epoch": 6.99530362340176, "step": 22650}
{"loss": 0.0333, "grad_norm": 0.7932761311531067, "learning_rate": 3.8761344746539254e-07, "epoch": 7.010502893605109, "step": 22700}
{"loss": 0.0285, "grad_norm": 1.1240192651748657, "learning_rate": 3.7576641006219105e-07, "epoch": 7.025948325377327, "step": 22750}
{"loss": 0.0287, "grad_norm": 1.375722050666809, "learning_rate": 3.640961692661138e-07, "epoch": 7.041393757149546, "step": 22800}
{"loss": 0.0301, "grad_norm": 0.16981202363967896, "learning_rate": 3.526031712611511e-07, "epoch": 7.056839188921764, "step": 22850}
{"loss": 0.0296, "grad_norm": 1.863183617591858, "learning_rate": 3.412878554548282e-07, "epoch": 7.072284620693982, "step": 22900}
{"loss": 0.0286, "grad_norm": 1.230214238166809, "learning_rate": 3.301506544614197e-07, "epoch": 7.087730052466201, "step": 22950}
{"loss": 0.0291, "grad_norm": 1.3884838819503784, "learning_rate": 3.1919199408539603e-07, "epoch": 7.103175484238419, "step": 23000}
{"loss": 0.0322, "grad_norm": 0.39363518357276917, "learning_rate": 3.084122933051542e-07, "epoch": 7.118620916010638, "step": 23050}
{"loss": 0.0279, "grad_norm": 0.7999137043952942, "learning_rate": 2.9781196425699543e-07, "epoch": 7.1340663477828565, "step": 23100}
{"loss": 0.0295, "grad_norm": 0.3544481694698334, "learning_rate": 2.873914122193655e-07, "epoch": 7.149511779555075, "step": 23150}
{"loss": 0.0268, "grad_norm": 0.8160250186920166, "learning_rate": 2.7715103559736613e-07, "epoch": 7.164957211327294, "step": 23200}
{"loss": 0.0283, "grad_norm": 0.44636547565460205, "learning_rate": 2.6709122590751643e-07, "epoch": 7.180402643099512, "step": 23250}
{"loss": 0.0311, "grad_norm": 0.7338014841079712, "learning_rate": 2.572123677627869e-07, "epoch": 7.195848074871731, "step": 23300}
{"loss": 0.0321, "grad_norm": 0.6597551703453064, "learning_rate": 2.475148388578974e-07, "epoch": 7.211293506643949, "step": 23350}
{"loss": 0.0303, "grad_norm": 1.102907657623291, "learning_rate": 2.3799900995487125e-07, "epoch": 7.226738938416167, "step": 23400}
{"loss": 0.0307, "grad_norm": 0.8434990644454956, "learning_rate": 2.2866524486886422e-07, "epoch": 7.242184370188386, "step": 23450}
{"loss": 0.0316, "grad_norm": 0.543166995048523, "learning_rate": 2.1951390045425403e-07, "epoch": 7.257629801960604, "step": 23500}
{"loss": 0.0282, "grad_norm": 0.927478551864624, "learning_rate": 2.1054532659099625e-07, "epoch": 7.273075233732823, "step": 23550}
{"loss": 0.0301, "grad_norm": 0.5592683553695679, "learning_rate": 2.0175986617124888e-07, "epoch": 7.288520665505041, "step": 23600}
{"loss": 0.0302, "grad_norm": 0.921030580997467, "learning_rate": 1.9315785508625894e-07, "epoch": 7.30396609727726, "step": 23650}
{"loss": 0.0273, "grad_norm": 0.6287786960601807, "learning_rate": 1.8473962221352505e-07, "epoch": 7.319411529049479, "step": 23700}
{"loss": 0.0306, "grad_norm": 0.8215200304985046, "learning_rate": 1.7650548940422117e-07, "epoch": 7.334856960821697, "step": 23750}
{"loss": 0.0284, "grad_norm": 0.9423342347145081, "learning_rate": 1.6845577147089288e-07, "epoch": 7.350302392593916, "step": 23800}
{"loss": 0.032, "grad_norm": 0.7084172964096069, "learning_rate": 1.6059077617541885e-07, "epoch": 7.365747824366134, "step": 23850}
{"loss": 0.0271, "grad_norm": 0.5657592415809631, "learning_rate": 1.5291080421724501e-07, "epoch": 7.381193256138353, "step": 23900}
{"loss": 0.0318, "grad_norm": 0.573272168636322, "learning_rate": 1.4541614922189084e-07, "epoch": 7.3966386879105706, "step": 23950}
{"loss": 0.0292, "grad_norm": 0.9874823689460754, "learning_rate": 1.3810709772971943e-07, "epoch": 7.412084119682789, "step": 24000}
{"loss": 0.0314, "grad_norm": 0.6263732314109802, "learning_rate": 1.3098392918498515e-07, "epoch": 7.427529551455008, "step": 24050}
{"loss": 0.0304, "grad_norm": 1.2983208894729614, "learning_rate": 1.240469159251484e-07, "epoch": 7.442974983227226, "step": 24100}
{"loss": 0.029, "grad_norm": 0.7494723200798035, "learning_rate": 1.1729632317046214e-07, "epoch": 7.458420414999445, "step": 24150}
{"loss": 0.028, "grad_norm": 0.9327051043510437, "learning_rate": 1.1073240901383564e-07, "epoch": 7.473865846771663, "step": 24200}
{"loss": 0.0261, "grad_norm": 0.5246080160140991, "learning_rate": 1.0435542441096292e-07, "epoch": 7.489311278543882, "step": 24250}
{"loss": 0.0266, "grad_norm": 0.4416763484477997, "learning_rate": 9.816561317073036e-08, "epoch": 7.504756710316101, "step": 24300}
{"loss": 0.0318, "grad_norm": 0.8656935691833496, "learning_rate": 9.216321194589428e-08, "epoch": 7.520202142088319, "step": 24350}
{"loss": 0.0321, "grad_norm": 0.7265862822532654, "learning_rate": 8.63484502240336e-08, "epoch": 7.535647573860538, "step": 24400}
{"loss": 0.0326, "grad_norm": 0.46257850527763367, "learning_rate": 8.072155031877637e-08, "epoch": 7.551093005632756, "step": 24450}
{"loss": 0.0328, "grad_norm": 0.5929704904556274, "learning_rate": 7.528272736129882e-08, "epoch": 7.566538437404974, "step": 24500}
{"loss": 0.0362, "grad_norm": 0.8801257610321045, "learning_rate": 7.003218929210077e-08, "epoch": 7.581983869177193, "step": 24550}
{"loss": 0.029, "grad_norm": 0.5928084254264832, "learning_rate": 6.497013685305586e-08, "epoch": 7.597429300949411, "step": 24600}
{"loss": 0.0285, "grad_norm": 0.984961986541748, "learning_rate": 6.009676357973782e-08, "epoch": 7.61287473272163, "step": 24650}
{"loss": 0.0299, "grad_norm": 0.5032494068145752, "learning_rate": 5.541225579401843e-08, "epoch": 7.628320164493848, "step": 24700}
{"loss": 0.0306, "grad_norm": 0.6203643083572388, "learning_rate": 5.091679259694504e-08, "epoch": 7.643765596266067, "step": 24750}
{"loss": 0.0294, "grad_norm": 0.5431063175201416, "learning_rate": 4.6610545861895394e-08, "epoch": 7.6592110280382855, "step": 24800}
{"loss": 0.0297, "grad_norm": 0.9515995979309082, "learning_rate": 4.249368022800182e-08, "epoch": 7.674656459810504, "step": 24850}
{"loss": 0.0301, "grad_norm": 1.5214015245437622, "learning_rate": 3.8566353093861805e-08, "epoch": 7.690101891582723, "step": 24900}
{"loss": 0.0279, "grad_norm": 0.4596222937107086, "learning_rate": 3.482871461151616e-08, "epoch": 7.705547323354941, "step": 24950}
{"loss": 0.0267, "grad_norm": 0.9344026446342468, "learning_rate": 3.1280907680709684e-08, "epoch": 7.72099275512716, "step": 25000}
{"eval_loss": 0.05469387769699097, "eval_wer": 3.3599, "eval_cer": 3.2115, "eval_runtime": 52575.1298, "eval_samples_per_second": 0.517, "eval_steps_per_second": 0.517, "epoch": 7.72099275512716, "step": 25000}
{"loss": 0.0298, "grad_norm": 0.9881210327148438, "learning_rate": 2.7923067943428916e-08, "epoch": 7.7364381868993775, "step": 25050}
{"loss": 0.0313, "grad_norm": 0.9921440482139587, "learning_rate": 2.475532377871459e-08, "epoch": 7.751883618671596, "step": 25100}
{"loss": 0.0321, "grad_norm": 1.006397008895874, "learning_rate": 2.177779629775445e-08, "epoch": 7.767329050443815, "step": 25150}
{"loss": 0.0277, "grad_norm": 1.3127951622009277, "learning_rate": 1.899059933925085e-08, "epoch": 7.782774482216033, "step": 25200}
{"loss": 0.0283, "grad_norm": 1.2630587816238403, "learning_rate": 1.6393839465072558e-08, "epoch": 7.798219913988252, "step": 25250}
{"loss": 0.0292, "grad_norm": 1.159556269645691, "learning_rate": 1.3987615956174705e-08, "epoch": 7.81366534576047, "step": 25300}
{"loss": 0.0286, "grad_norm": 0.7944751977920532, "learning_rate": 1.1772020808809015e-08, "epoch": 7.829110777532689, "step": 25350}
{"loss": 0.0302, "grad_norm": 1.121640682220459, "learning_rate": 9.747138731002192e-09, "epoch": 7.8445562093049075, "step": 25400}
{"loss": 0.0299, "grad_norm": 0.7246575951576233, "learning_rate": 7.913047139319618e-09, "epoch": 7.860001641077126, "step": 25450}
{"loss": 0.0273, "grad_norm": 0.7901625633239746, "learning_rate": 6.26981615590494e-09, "epoch": 7.875447072849345, "step": 25500}
{"loss": 0.0304, "grad_norm": 1.3983275890350342, "learning_rate": 4.817508605798327e-09, "epoch": 7.890892504621563, "step": 25550}
{"loss": 0.0326, "grad_norm": 0.7403478026390076, "learning_rate": 3.556180014535615e-09, "epoch": 7.906337936393781, "step": 25600}
{"loss": 0.0293, "grad_norm": 0.8732885122299194, "learning_rate": 2.48587860602445e-09, "epoch": 7.9217833681659995, "step": 25650}
{"loss": 0.0303, "grad_norm": 1.3072352409362793, "learning_rate": 1.6066453007018702e-09, "epoch": 7.937228799938218, "step": 25700}
{"loss": 0.0295, "grad_norm": 1.2954020500183105, "learning_rate": 9.185137139683387e-10, "epoch": 7.952674231710437, "step": 25750}
{"loss": 0.0322, "grad_norm": 0.371503621339798, "learning_rate": 4.2151015490432544e-10, "epoch": 7.968119663482655, "step": 25800}
{"loss": 0.0309, "grad_norm": 1.471834421157837, "learning_rate": 1.1565362526111401e-10, "epoch": 7.983565095254874, "step": 25850}
{"loss": 0.023, "grad_norm": 0.9522649645805359, "learning_rate": 9.55818739156733e-13, "epoch": 7.999010527027092, "step": 25900}
{"train_runtime": 344855.4595, "train_samples_per_second": 4.806, "train_steps_per_second": 0.075, "total_flos": 1.691599562759209e+21, "train_loss": 0.014023746631595457, "epoch": 8.0, "step": 25904}
{"eval_loss": 0.08388473838567734, "eval_wer": 0.6935, "eval_cer": 1.202, "eval_runtime": 113429.374, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.24, "epoch": 8.0, "step": 25904}