Files
ModelHub XC 6076bb9561 初始化项目,由ModelHub XC社区提供模型
Model: KasuleTrevor/cdli-whisper-ml-eng-lug-full-a40-5e-5
Source: Original Platform
2026-05-09 01:04:12 +08:00

1315 lines
31 KiB
JSON

{
"best_global_step": 2500,
"best_metric": 0.4086505663851241,
"best_model_checkpoint": "results/finetuned/ML-ENG-LUG-FULL-A40-5e-5/checkpoint-2500",
"epoch": 11.563787495482472,
"eval_steps": 250,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07228044813877846,
"grad_norm": 5.96875,
"learning_rate": 8.000000000000001e-06,
"loss": 1.1364,
"step": 25
},
{
"epoch": 0.14456089627755692,
"grad_norm": 8.75,
"learning_rate": 1.6333333333333335e-05,
"loss": 0.8278,
"step": 50
},
{
"epoch": 0.21684134441633537,
"grad_norm": 4.3125,
"learning_rate": 2.466666666666667e-05,
"loss": 0.7449,
"step": 75
},
{
"epoch": 0.28912179255511383,
"grad_norm": 4.15625,
"learning_rate": 3.3e-05,
"loss": 0.6846,
"step": 100
},
{
"epoch": 0.3614022406938923,
"grad_norm": 3.09375,
"learning_rate": 4.133333333333333e-05,
"loss": 0.7195,
"step": 125
},
{
"epoch": 0.43368268883267075,
"grad_norm": 3.15625,
"learning_rate": 4.966666666666667e-05,
"loss": 0.5968,
"step": 150
},
{
"epoch": 0.5059631369714492,
"grad_norm": 3.8125,
"learning_rate": 4.948936170212766e-05,
"loss": 0.6925,
"step": 175
},
{
"epoch": 0.5782435851102277,
"grad_norm": 3.84375,
"learning_rate": 4.895744680851064e-05,
"loss": 0.6004,
"step": 200
},
{
"epoch": 0.6505240332490061,
"grad_norm": 3.171875,
"learning_rate": 4.842553191489362e-05,
"loss": 0.6391,
"step": 225
},
{
"epoch": 0.7228044813877846,
"grad_norm": 3.515625,
"learning_rate": 4.78936170212766e-05,
"loss": 0.6253,
"step": 250
},
{
"epoch": 0.7228044813877846,
"eval_cer": 0.2721768277530176,
"eval_loss": 0.8155665397644043,
"eval_runtime": 1120.749,
"eval_samples_per_second": 1.238,
"eval_steps_per_second": 0.31,
"eval_wer": 0.4660412216168416,
"step": 250
},
{
"epoch": 0.7950849295265631,
"grad_norm": 2.59375,
"learning_rate": 4.736170212765957e-05,
"loss": 0.5837,
"step": 275
},
{
"epoch": 0.8673653776653415,
"grad_norm": 3.078125,
"learning_rate": 4.682978723404256e-05,
"loss": 0.5739,
"step": 300
},
{
"epoch": 0.93964582580412,
"grad_norm": 2.765625,
"learning_rate": 4.6297872340425536e-05,
"loss": 0.5402,
"step": 325
},
{
"epoch": 1.0115648717022045,
"grad_norm": 2.5625,
"learning_rate": 4.576595744680851e-05,
"loss": 0.5285,
"step": 350
},
{
"epoch": 1.083845319840983,
"grad_norm": 2.734375,
"learning_rate": 4.5234042553191494e-05,
"loss": 0.4131,
"step": 375
},
{
"epoch": 1.1561257679797614,
"grad_norm": 3.3125,
"learning_rate": 4.4702127659574474e-05,
"loss": 0.3972,
"step": 400
},
{
"epoch": 1.2284062161185398,
"grad_norm": 2.171875,
"learning_rate": 4.4170212765957446e-05,
"loss": 0.4073,
"step": 425
},
{
"epoch": 1.3006866642573183,
"grad_norm": 2.671875,
"learning_rate": 4.3638297872340425e-05,
"loss": 0.3999,
"step": 450
},
{
"epoch": 1.372967112396097,
"grad_norm": 2.34375,
"learning_rate": 4.310638297872341e-05,
"loss": 0.4226,
"step": 475
},
{
"epoch": 1.4452475605348754,
"grad_norm": 2.6875,
"learning_rate": 4.2574468085106384e-05,
"loss": 0.4188,
"step": 500
},
{
"epoch": 1.4452475605348754,
"eval_cer": 0.2362257256765857,
"eval_loss": 0.8118799924850464,
"eval_runtime": 991.3995,
"eval_samples_per_second": 1.399,
"eval_steps_per_second": 0.35,
"eval_wer": 0.4247106653572799,
"step": 500
},
{
"epoch": 1.5175280086736538,
"grad_norm": 2.953125,
"learning_rate": 4.204255319148936e-05,
"loss": 0.3959,
"step": 525
},
{
"epoch": 1.5898084568124322,
"grad_norm": 2.421875,
"learning_rate": 4.151063829787234e-05,
"loss": 0.4188,
"step": 550
},
{
"epoch": 1.6620889049512106,
"grad_norm": 2.953125,
"learning_rate": 4.097872340425532e-05,
"loss": 0.4241,
"step": 575
},
{
"epoch": 1.7343693530899893,
"grad_norm": 2.53125,
"learning_rate": 4.04468085106383e-05,
"loss": 0.4018,
"step": 600
},
{
"epoch": 1.8066498012287675,
"grad_norm": 2.421875,
"learning_rate": 3.991489361702127e-05,
"loss": 0.4323,
"step": 625
},
{
"epoch": 1.8789302493675462,
"grad_norm": 2.625,
"learning_rate": 3.938297872340426e-05,
"loss": 0.4012,
"step": 650
},
{
"epoch": 1.9512106975063246,
"grad_norm": 2.328125,
"learning_rate": 3.885106382978724e-05,
"loss": 0.4,
"step": 675
},
{
"epoch": 2.023129743404409,
"grad_norm": 2.109375,
"learning_rate": 3.831914893617021e-05,
"loss": 0.3423,
"step": 700
},
{
"epoch": 2.0954101915431878,
"grad_norm": 2.09375,
"learning_rate": 3.7787234042553196e-05,
"loss": 0.2476,
"step": 725
},
{
"epoch": 2.167690639681966,
"grad_norm": 1.8984375,
"learning_rate": 3.7255319148936176e-05,
"loss": 0.2709,
"step": 750
},
{
"epoch": 2.167690639681966,
"eval_cer": 0.23517525369611372,
"eval_loss": 0.8228520750999451,
"eval_runtime": 1015.9667,
"eval_samples_per_second": 1.365,
"eval_steps_per_second": 0.342,
"eval_wer": 0.42064168739296354,
"step": 750
},
{
"epoch": 2.2399710878207446,
"grad_norm": 2.3125,
"learning_rate": 3.672340425531915e-05,
"loss": 0.2533,
"step": 775
},
{
"epoch": 2.312251535959523,
"grad_norm": 2.015625,
"learning_rate": 3.619148936170213e-05,
"loss": 0.2579,
"step": 800
},
{
"epoch": 2.3845319840983015,
"grad_norm": 2.203125,
"learning_rate": 3.565957446808511e-05,
"loss": 0.2544,
"step": 825
},
{
"epoch": 2.4568124322370797,
"grad_norm": 2.421875,
"learning_rate": 3.5127659574468086e-05,
"loss": 0.2634,
"step": 850
},
{
"epoch": 2.5290928803758583,
"grad_norm": 2.359375,
"learning_rate": 3.4595744680851065e-05,
"loss": 0.2584,
"step": 875
},
{
"epoch": 2.6013733285146365,
"grad_norm": 2.15625,
"learning_rate": 3.4063829787234044e-05,
"loss": 0.2578,
"step": 900
},
{
"epoch": 2.673653776653415,
"grad_norm": 1.75,
"learning_rate": 3.353191489361702e-05,
"loss": 0.2491,
"step": 925
},
{
"epoch": 2.745934224792194,
"grad_norm": 2.703125,
"learning_rate": 3.3e-05,
"loss": 0.2488,
"step": 950
},
{
"epoch": 2.818214672930972,
"grad_norm": 2.0,
"learning_rate": 3.2468085106382975e-05,
"loss": 0.2489,
"step": 975
},
{
"epoch": 2.8904951210697507,
"grad_norm": 2.5625,
"learning_rate": 3.193617021276596e-05,
"loss": 0.2571,
"step": 1000
},
{
"epoch": 2.8904951210697507,
"eval_cer": 0.22612252442822162,
"eval_loss": 0.814146876335144,
"eval_runtime": 916.4096,
"eval_samples_per_second": 1.514,
"eval_steps_per_second": 0.379,
"eval_wer": 0.4153135913858677,
"step": 1000
},
{
"epoch": 2.962775569208529,
"grad_norm": 2.578125,
"learning_rate": 3.140425531914894e-05,
"loss": 0.2534,
"step": 1025
},
{
"epoch": 3.0346946151066136,
"grad_norm": 1.6796875,
"learning_rate": 3.087234042553191e-05,
"loss": 0.2146,
"step": 1050
},
{
"epoch": 3.1069750632453923,
"grad_norm": 1.9140625,
"learning_rate": 3.0340425531914895e-05,
"loss": 0.1515,
"step": 1075
},
{
"epoch": 3.1792555113841705,
"grad_norm": 1.8828125,
"learning_rate": 2.9808510638297878e-05,
"loss": 0.1359,
"step": 1100
},
{
"epoch": 3.251535959522949,
"grad_norm": 1.9765625,
"learning_rate": 2.927659574468085e-05,
"loss": 0.1575,
"step": 1125
},
{
"epoch": 3.3238164076617274,
"grad_norm": 2.453125,
"learning_rate": 2.8744680851063833e-05,
"loss": 0.1416,
"step": 1150
},
{
"epoch": 3.396096855800506,
"grad_norm": 1.9296875,
"learning_rate": 2.821276595744681e-05,
"loss": 0.1545,
"step": 1175
},
{
"epoch": 3.4683773039392847,
"grad_norm": 1.9296875,
"learning_rate": 2.7680851063829788e-05,
"loss": 0.1562,
"step": 1200
},
{
"epoch": 3.540657752078063,
"grad_norm": 1.828125,
"learning_rate": 2.714893617021277e-05,
"loss": 0.1489,
"step": 1225
},
{
"epoch": 3.612938200216841,
"grad_norm": 1.8515625,
"learning_rate": 2.6617021276595743e-05,
"loss": 0.1581,
"step": 1250
},
{
"epoch": 3.612938200216841,
"eval_cer": 0.2291752867000159,
"eval_loss": 0.9097059369087219,
"eval_runtime": 904.9745,
"eval_samples_per_second": 1.533,
"eval_steps_per_second": 0.383,
"eval_wer": 0.41673846986102747,
"step": 1250
},
{
"epoch": 3.6852186483556197,
"grad_norm": 1.859375,
"learning_rate": 2.6085106382978725e-05,
"loss": 0.1481,
"step": 1275
},
{
"epoch": 3.7574990964943984,
"grad_norm": 2.40625,
"learning_rate": 2.5553191489361705e-05,
"loss": 0.1498,
"step": 1300
},
{
"epoch": 3.8297795446331766,
"grad_norm": 1.984375,
"learning_rate": 2.502127659574468e-05,
"loss": 0.1629,
"step": 1325
},
{
"epoch": 3.9020599927719553,
"grad_norm": 2.171875,
"learning_rate": 2.448936170212766e-05,
"loss": 0.1493,
"step": 1350
},
{
"epoch": 3.9743404409107335,
"grad_norm": 1.7421875,
"learning_rate": 2.395744680851064e-05,
"loss": 0.15,
"step": 1375
},
{
"epoch": 4.046259486808818,
"grad_norm": 2.0,
"learning_rate": 2.3425531914893618e-05,
"loss": 0.1028,
"step": 1400
},
{
"epoch": 4.118539934947597,
"grad_norm": 1.8203125,
"learning_rate": 2.2893617021276597e-05,
"loss": 0.0812,
"step": 1425
},
{
"epoch": 4.1908203830863755,
"grad_norm": 1.90625,
"learning_rate": 2.2361702127659576e-05,
"loss": 0.0859,
"step": 1450
},
{
"epoch": 4.263100831225153,
"grad_norm": 1.5078125,
"learning_rate": 2.1829787234042552e-05,
"loss": 0.0862,
"step": 1475
},
{
"epoch": 4.335381279363932,
"grad_norm": 1.5,
"learning_rate": 2.1297872340425535e-05,
"loss": 0.083,
"step": 1500
},
{
"epoch": 4.335381279363932,
"eval_cer": 0.2270746892469672,
"eval_loss": 0.9749350547790527,
"eval_runtime": 962.2618,
"eval_samples_per_second": 1.441,
"eval_steps_per_second": 0.361,
"eval_wer": 0.41771518722192486,
"step": 1500
},
{
"epoch": 4.407661727502711,
"grad_norm": 1.8515625,
"learning_rate": 2.076595744680851e-05,
"loss": 0.0907,
"step": 1525
},
{
"epoch": 4.479942175641489,
"grad_norm": 1.5546875,
"learning_rate": 2.023404255319149e-05,
"loss": 0.0829,
"step": 1550
},
{
"epoch": 4.552222623780267,
"grad_norm": 2.03125,
"learning_rate": 1.970212765957447e-05,
"loss": 0.0893,
"step": 1575
},
{
"epoch": 4.624503071919046,
"grad_norm": 1.46875,
"learning_rate": 1.9170212765957448e-05,
"loss": 0.0947,
"step": 1600
},
{
"epoch": 4.696783520057824,
"grad_norm": 1.96875,
"learning_rate": 1.8638297872340427e-05,
"loss": 0.0844,
"step": 1625
},
{
"epoch": 4.769063968196603,
"grad_norm": 1.5234375,
"learning_rate": 1.8106382978723403e-05,
"loss": 0.0872,
"step": 1650
},
{
"epoch": 4.841344416335382,
"grad_norm": 1.8984375,
"learning_rate": 1.7574468085106382e-05,
"loss": 0.0854,
"step": 1675
},
{
"epoch": 4.913624864474159,
"grad_norm": 1.6015625,
"learning_rate": 1.704255319148936e-05,
"loss": 0.0822,
"step": 1700
},
{
"epoch": 4.985905312612938,
"grad_norm": 1.4453125,
"learning_rate": 1.651063829787234e-05,
"loss": 0.0841,
"step": 1725
},
{
"epoch": 5.057824358511023,
"grad_norm": 1.3984375,
"learning_rate": 1.597872340425532e-05,
"loss": 0.0593,
"step": 1750
},
{
"epoch": 5.057824358511023,
"eval_cer": 0.2265957095050262,
"eval_loss": 1.0612818002700806,
"eval_runtime": 882.62,
"eval_samples_per_second": 1.571,
"eval_steps_per_second": 0.393,
"eval_wer": 0.41074975953764603,
"step": 1750
},
{
"epoch": 5.130104806649801,
"grad_norm": 1.3515625,
"learning_rate": 1.54468085106383e-05,
"loss": 0.0517,
"step": 1775
},
{
"epoch": 5.20238525478858,
"grad_norm": 1.6796875,
"learning_rate": 1.4914893617021278e-05,
"loss": 0.0462,
"step": 1800
},
{
"epoch": 5.274665702927358,
"grad_norm": 1.0703125,
"learning_rate": 1.4382978723404256e-05,
"loss": 0.0576,
"step": 1825
},
{
"epoch": 5.3469461510661365,
"grad_norm": 1.6484375,
"learning_rate": 1.3851063829787233e-05,
"loss": 0.0528,
"step": 1850
},
{
"epoch": 5.419226599204915,
"grad_norm": 1.5078125,
"learning_rate": 1.3319148936170214e-05,
"loss": 0.0537,
"step": 1875
},
{
"epoch": 5.491507047343694,
"grad_norm": 2.0625,
"learning_rate": 1.2787234042553192e-05,
"loss": 0.0546,
"step": 1900
},
{
"epoch": 5.563787495482472,
"grad_norm": 2.0,
"learning_rate": 1.225531914893617e-05,
"loss": 0.0536,
"step": 1925
},
{
"epoch": 5.63606794362125,
"grad_norm": 1.8046875,
"learning_rate": 1.172340425531915e-05,
"loss": 0.0531,
"step": 1950
},
{
"epoch": 5.708348391760029,
"grad_norm": 2.0625,
"learning_rate": 1.119148936170213e-05,
"loss": 0.0517,
"step": 1975
},
{
"epoch": 5.7806288398988075,
"grad_norm": 1.7734375,
"learning_rate": 1.0659574468085107e-05,
"loss": 0.0518,
"step": 2000
},
{
"epoch": 5.7806288398988075,
"eval_cer": 0.22345740881745507,
"eval_loss": 1.054749608039856,
"eval_runtime": 879.5621,
"eval_samples_per_second": 1.577,
"eval_steps_per_second": 0.395,
"eval_wer": 0.4107791260867282,
"step": 2000
},
{
"epoch": 5.852909288037586,
"grad_norm": 1.7890625,
"learning_rate": 1.0127659574468086e-05,
"loss": 0.0497,
"step": 2025
},
{
"epoch": 5.925189736176364,
"grad_norm": 1.5078125,
"learning_rate": 9.595744680851064e-06,
"loss": 0.0507,
"step": 2050
},
{
"epoch": 5.997470184315143,
"grad_norm": 1.7734375,
"learning_rate": 9.063829787234043e-06,
"loss": 0.0509,
"step": 2075
},
{
"epoch": 6.069389230213227,
"grad_norm": 1.203125,
"learning_rate": 8.53191489361702e-06,
"loss": 0.0415,
"step": 2100
},
{
"epoch": 6.141669678352006,
"grad_norm": 2.03125,
"learning_rate": 8.000000000000001e-06,
"loss": 0.04,
"step": 2125
},
{
"epoch": 6.213950126490785,
"grad_norm": 1.7421875,
"learning_rate": 7.468085106382979e-06,
"loss": 0.0394,
"step": 2150
},
{
"epoch": 6.286230574629562,
"grad_norm": 1.765625,
"learning_rate": 6.936170212765958e-06,
"loss": 0.042,
"step": 2175
},
{
"epoch": 6.358511022768341,
"grad_norm": 1.3125,
"learning_rate": 6.404255319148937e-06,
"loss": 0.0361,
"step": 2200
},
{
"epoch": 6.43079147090712,
"grad_norm": 1.5234375,
"learning_rate": 5.872340425531915e-06,
"loss": 0.0383,
"step": 2225
},
{
"epoch": 6.503071919045898,
"grad_norm": 1.390625,
"learning_rate": 5.340425531914894e-06,
"loss": 0.0382,
"step": 2250
},
{
"epoch": 6.503071919045898,
"eval_cer": 0.22488223459611492,
"eval_loss": 1.1097996234893799,
"eval_runtime": 877.748,
"eval_samples_per_second": 1.58,
"eval_steps_per_second": 0.395,
"eval_wer": 0.4095101068765971,
"step": 2250
},
{
"epoch": 6.575352367184676,
"grad_norm": 1.4921875,
"learning_rate": 4.808510638297872e-06,
"loss": 0.0398,
"step": 2275
},
{
"epoch": 6.647632815323455,
"grad_norm": 1.46875,
"learning_rate": 4.2765957446808515e-06,
"loss": 0.0356,
"step": 2300
},
{
"epoch": 6.719913263462233,
"grad_norm": 0.97265625,
"learning_rate": 3.7446808510638303e-06,
"loss": 0.0373,
"step": 2325
},
{
"epoch": 6.792193711601012,
"grad_norm": 1.3984375,
"learning_rate": 3.2127659574468086e-06,
"loss": 0.0347,
"step": 2350
},
{
"epoch": 6.864474159739791,
"grad_norm": 1.0703125,
"learning_rate": 2.6808510638297874e-06,
"loss": 0.0369,
"step": 2375
},
{
"epoch": 6.936754607878569,
"grad_norm": 0.9375,
"learning_rate": 2.148936170212766e-06,
"loss": 0.039,
"step": 2400
},
{
"epoch": 7.008673653776653,
"grad_norm": 1.84375,
"learning_rate": 1.6170212765957448e-06,
"loss": 0.0376,
"step": 2425
},
{
"epoch": 7.080954101915432,
"grad_norm": 2.0625,
"learning_rate": 1.0851063829787236e-06,
"loss": 0.04,
"step": 2450
},
{
"epoch": 7.1532345500542105,
"grad_norm": 1.171875,
"learning_rate": 5.531914893617021e-07,
"loss": 0.038,
"step": 2475
},
{
"epoch": 7.225514998192989,
"grad_norm": 1.8046875,
"learning_rate": 2.1276595744680853e-08,
"loss": 0.0356,
"step": 2500
},
{
"epoch": 7.225514998192989,
"eval_cer": 0.223833001387121,
"eval_loss": 1.1148525476455688,
"eval_runtime": 877.5866,
"eval_samples_per_second": 1.58,
"eval_steps_per_second": 0.395,
"eval_wer": 0.4086505663851241,
"step": 2500
},
{
"epoch": 7.297795446331767,
"grad_norm": 1.1328125,
"learning_rate": 1.916883116883117e-05,
"loss": 0.0364,
"step": 2525
},
{
"epoch": 7.370075894470546,
"grad_norm": 1.4375,
"learning_rate": 1.8844155844155846e-05,
"loss": 0.0418,
"step": 2550
},
{
"epoch": 7.442356342609324,
"grad_norm": 1.546875,
"learning_rate": 1.851948051948052e-05,
"loss": 0.0377,
"step": 2575
},
{
"epoch": 7.514636790748103,
"grad_norm": 1.4453125,
"learning_rate": 1.8194805194805195e-05,
"loss": 0.0429,
"step": 2600
},
{
"epoch": 7.5869172388868815,
"grad_norm": 1.6484375,
"learning_rate": 1.7870129870129872e-05,
"loss": 0.0387,
"step": 2625
},
{
"epoch": 7.659197687025659,
"grad_norm": 1.765625,
"learning_rate": 1.7545454545454545e-05,
"loss": 0.0463,
"step": 2650
},
{
"epoch": 7.731478135164438,
"grad_norm": 1.5234375,
"learning_rate": 1.722077922077922e-05,
"loss": 0.0421,
"step": 2675
},
{
"epoch": 7.803758583303217,
"grad_norm": 0.91015625,
"learning_rate": 1.6896103896103898e-05,
"loss": 0.0394,
"step": 2700
},
{
"epoch": 7.876039031441995,
"grad_norm": 1.59375,
"learning_rate": 1.657142857142857e-05,
"loss": 0.0415,
"step": 2725
},
{
"epoch": 7.948319479580773,
"grad_norm": 1.1796875,
"learning_rate": 1.6246753246753247e-05,
"loss": 0.0408,
"step": 2750
},
{
"epoch": 7.948319479580773,
"eval_cer": 0.22607830826054334,
"eval_loss": 1.116784930229187,
"eval_runtime": 899.4546,
"eval_samples_per_second": 1.542,
"eval_steps_per_second": 0.386,
"eval_wer": 0.41392684859068163,
"step": 2750
},
{
"epoch": 8.023129743404409,
"grad_norm": 2.171875,
"learning_rate": 1.592207792207792e-05,
"loss": 0.0422,
"step": 2775
},
{
"epoch": 8.095410191543188,
"grad_norm": 1.40625,
"learning_rate": 1.55974025974026e-05,
"loss": 0.0335,
"step": 2800
},
{
"epoch": 8.167690639681966,
"grad_norm": 1.1953125,
"learning_rate": 1.5272727272727276e-05,
"loss": 0.0312,
"step": 2825
},
{
"epoch": 8.239971087820745,
"grad_norm": 1.421875,
"learning_rate": 1.494805194805195e-05,
"loss": 0.0341,
"step": 2850
},
{
"epoch": 8.312251535959524,
"grad_norm": 1.21875,
"learning_rate": 1.4623376623376626e-05,
"loss": 0.0336,
"step": 2875
},
{
"epoch": 8.3845319840983,
"grad_norm": 1.2109375,
"learning_rate": 1.42987012987013e-05,
"loss": 0.0357,
"step": 2900
},
{
"epoch": 8.45681243223708,
"grad_norm": 1.7578125,
"learning_rate": 1.3974025974025975e-05,
"loss": 0.0343,
"step": 2925
},
{
"epoch": 8.529092880375858,
"grad_norm": 1.3984375,
"learning_rate": 1.3649350649350651e-05,
"loss": 0.0293,
"step": 2950
},
{
"epoch": 8.601373328514637,
"grad_norm": 1.15625,
"learning_rate": 1.3324675324675326e-05,
"loss": 0.0371,
"step": 2975
},
{
"epoch": 8.673653776653415,
"grad_norm": 2.09375,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.0368,
"step": 3000
},
{
"epoch": 8.673653776653415,
"eval_cer": 0.2278660614765563,
"eval_loss": 1.1499484777450562,
"eval_runtime": 895.0529,
"eval_samples_per_second": 1.55,
"eval_steps_per_second": 0.388,
"eval_wer": 0.4171725128409374,
"step": 3000
},
{
"epoch": 8.745934224792194,
"grad_norm": 2.40625,
"learning_rate": 1.2675324675324676e-05,
"loss": 0.0344,
"step": 3025
},
{
"epoch": 8.818214672930972,
"grad_norm": 1.109375,
"learning_rate": 1.2350649350649352e-05,
"loss": 0.0338,
"step": 3050
},
{
"epoch": 8.890495121069751,
"grad_norm": 1.890625,
"learning_rate": 1.2025974025974027e-05,
"loss": 0.0323,
"step": 3075
},
{
"epoch": 8.96277556920853,
"grad_norm": 1.390625,
"learning_rate": 1.1701298701298701e-05,
"loss": 0.0342,
"step": 3100
},
{
"epoch": 9.034694615106615,
"grad_norm": 1.1015625,
"learning_rate": 1.1376623376623378e-05,
"loss": 0.0291,
"step": 3125
},
{
"epoch": 9.106975063245391,
"grad_norm": 2.453125,
"learning_rate": 1.1051948051948053e-05,
"loss": 0.0275,
"step": 3150
},
{
"epoch": 9.17925551138417,
"grad_norm": 0.9453125,
"learning_rate": 1.0727272727272727e-05,
"loss": 0.0245,
"step": 3175
},
{
"epoch": 9.251535959522949,
"grad_norm": 1.0546875,
"learning_rate": 1.0402597402597402e-05,
"loss": 0.0251,
"step": 3200
},
{
"epoch": 9.323816407661727,
"grad_norm": 1.25,
"learning_rate": 1.0077922077922078e-05,
"loss": 0.0214,
"step": 3225
},
{
"epoch": 9.396096855800506,
"grad_norm": 1.5390625,
"learning_rate": 9.753246753246753e-06,
"loss": 0.0271,
"step": 3250
},
{
"epoch": 9.396096855800506,
"eval_cer": 0.22706083911883618,
"eval_loss": 1.205234169960022,
"eval_runtime": 893.8344,
"eval_samples_per_second": 1.552,
"eval_steps_per_second": 0.388,
"eval_wer": 0.41319892817631626,
"step": 3250
},
{
"epoch": 9.468377303939285,
"grad_norm": 1.5078125,
"learning_rate": 9.42857142857143e-06,
"loss": 0.0272,
"step": 3275
},
{
"epoch": 9.540657752078063,
"grad_norm": 0.734375,
"learning_rate": 9.103896103896104e-06,
"loss": 0.0268,
"step": 3300
},
{
"epoch": 9.612938200216842,
"grad_norm": 1.65625,
"learning_rate": 8.77922077922078e-06,
"loss": 0.027,
"step": 3325
},
{
"epoch": 9.68521864835562,
"grad_norm": 1.359375,
"learning_rate": 8.454545454545455e-06,
"loss": 0.0249,
"step": 3350
},
{
"epoch": 9.757499096494398,
"grad_norm": 1.3671875,
"learning_rate": 8.12987012987013e-06,
"loss": 0.0249,
"step": 3375
},
{
"epoch": 9.829779544633176,
"grad_norm": 1.5390625,
"learning_rate": 7.805194805194806e-06,
"loss": 0.0257,
"step": 3400
},
{
"epoch": 9.902059992771955,
"grad_norm": 1.6015625,
"learning_rate": 7.480519480519481e-06,
"loss": 0.0245,
"step": 3425
},
{
"epoch": 9.974340440910733,
"grad_norm": 1.1484375,
"learning_rate": 7.155844155844156e-06,
"loss": 0.0251,
"step": 3450
},
{
"epoch": 10.046259486808818,
"grad_norm": 0.88671875,
"learning_rate": 6.8311688311688315e-06,
"loss": 0.0238,
"step": 3475
},
{
"epoch": 10.118539934947597,
"grad_norm": 1.1328125,
"learning_rate": 6.506493506493506e-06,
"loss": 0.0237,
"step": 3500
},
{
"epoch": 10.118539934947597,
"eval_cer": 0.2262828635227838,
"eval_loss": 1.2106597423553467,
"eval_runtime": 916.7169,
"eval_samples_per_second": 1.513,
"eval_steps_per_second": 0.379,
"eval_wer": 0.41136634469731537,
"step": 3500
},
{
"epoch": 10.190820383086376,
"grad_norm": 1.3515625,
"learning_rate": 6.181818181818183e-06,
"loss": 0.0239,
"step": 3525
},
{
"epoch": 10.263100831225154,
"grad_norm": 2.078125,
"learning_rate": 5.857142857142857e-06,
"loss": 0.0232,
"step": 3550
},
{
"epoch": 10.335381279363933,
"grad_norm": 1.5234375,
"learning_rate": 5.532467532467533e-06,
"loss": 0.021,
"step": 3575
},
{
"epoch": 10.40766172750271,
"grad_norm": 0.94921875,
"learning_rate": 5.207792207792208e-06,
"loss": 0.0239,
"step": 3600
},
{
"epoch": 10.479942175641488,
"grad_norm": 0.84375,
"learning_rate": 4.883116883116883e-06,
"loss": 0.0217,
"step": 3625
},
{
"epoch": 10.552222623780267,
"grad_norm": 1.1875,
"learning_rate": 4.558441558441559e-06,
"loss": 0.0226,
"step": 3650
},
{
"epoch": 10.624503071919046,
"grad_norm": 2.703125,
"learning_rate": 4.233766233766234e-06,
"loss": 0.0238,
"step": 3675
},
{
"epoch": 10.696783520057824,
"grad_norm": 1.6640625,
"learning_rate": 3.90909090909091e-06,
"loss": 0.0255,
"step": 3700
},
{
"epoch": 10.769063968196603,
"grad_norm": 0.84375,
"learning_rate": 3.5844155844155846e-06,
"loss": 0.0229,
"step": 3725
},
{
"epoch": 10.841344416335382,
"grad_norm": 1.0625,
"learning_rate": 3.2597402597402597e-06,
"loss": 0.0212,
"step": 3750
},
{
"epoch": 10.841344416335382,
"eval_cer": 0.22498029349070886,
"eval_loss": 1.2275042533874512,
"eval_runtime": 921.5359,
"eval_samples_per_second": 1.505,
"eval_steps_per_second": 0.377,
"eval_wer": 0.41112972866628295,
"step": 3750
},
{
"epoch": 10.91362486447416,
"grad_norm": 0.765625,
"learning_rate": 2.9350649350649353e-06,
"loss": 0.0229,
"step": 3775
},
{
"epoch": 10.985905312612939,
"grad_norm": 1.8515625,
"learning_rate": 2.6103896103896104e-06,
"loss": 0.0235,
"step": 3800
},
{
"epoch": 11.057824358511024,
"grad_norm": 0.5703125,
"learning_rate": 2.285714285714286e-06,
"loss": 0.0226,
"step": 3825
},
{
"epoch": 11.1301048066498,
"grad_norm": 1.0234375,
"learning_rate": 1.961038961038961e-06,
"loss": 0.0227,
"step": 3850
},
{
"epoch": 11.20238525478858,
"grad_norm": 1.75,
"learning_rate": 1.6363636363636367e-06,
"loss": 0.0215,
"step": 3875
},
{
"epoch": 11.274665702927358,
"grad_norm": 0.953125,
"learning_rate": 1.3116883116883118e-06,
"loss": 0.0246,
"step": 3900
},
{
"epoch": 11.346946151066136,
"grad_norm": 0.9375,
"learning_rate": 9.870129870129872e-07,
"loss": 0.0202,
"step": 3925
},
{
"epoch": 11.419226599204915,
"grad_norm": 1.015625,
"learning_rate": 6.623376623376623e-07,
"loss": 0.0233,
"step": 3950
},
{
"epoch": 11.491507047343694,
"grad_norm": 1.0078125,
"learning_rate": 3.3766233766233765e-07,
"loss": 0.0214,
"step": 3975
},
{
"epoch": 11.563787495482472,
"grad_norm": 1.390625,
"learning_rate": 1.2987012987012988e-08,
"loss": 0.0221,
"step": 4000
},
{
"epoch": 11.563787495482472,
"eval_cer": 0.2271312877577234,
"eval_loss": 1.228308916091919,
"eval_runtime": 912.0398,
"eval_samples_per_second": 1.521,
"eval_steps_per_second": 0.38,
"eval_wer": 0.4136988597675676,
"step": 4000
}
],
"logging_steps": 25,
"max_steps": 4000,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.347268958158848e+20,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}