2933 lines
71 KiB
JSON
2933 lines
71 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 8.456659619450317,
|
|
"eval_steps": 1000,
|
|
"global_step": 10000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.021141649048625793,
|
|
"grad_norm": 13.565643310546875,
|
|
"learning_rate": 4.800000000000001e-07,
|
|
"loss": 1.0091,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.042283298097251586,
|
|
"grad_norm": 9.143893241882324,
|
|
"learning_rate": 9.800000000000001e-07,
|
|
"loss": 0.7687,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.06342494714587738,
|
|
"grad_norm": 6.373236179351807,
|
|
"learning_rate": 1.48e-06,
|
|
"loss": 0.3227,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.08456659619450317,
|
|
"grad_norm": 5.452936172485352,
|
|
"learning_rate": 1.98e-06,
|
|
"loss": 0.2331,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.10570824524312897,
|
|
"grad_norm": 5.6975626945495605,
|
|
"learning_rate": 2.4800000000000004e-06,
|
|
"loss": 0.2023,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.12684989429175475,
|
|
"grad_norm": 4.201228141784668,
|
|
"learning_rate": 2.9800000000000003e-06,
|
|
"loss": 0.2185,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.14799154334038056,
|
|
"grad_norm": 5.918435573577881,
|
|
"learning_rate": 3.48e-06,
|
|
"loss": 0.1956,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.16913319238900634,
|
|
"grad_norm": 4.987053871154785,
|
|
"learning_rate": 3.980000000000001e-06,
|
|
"loss": 0.1898,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.19027484143763213,
|
|
"grad_norm": 4.0125203132629395,
|
|
"learning_rate": 4.48e-06,
|
|
"loss": 0.195,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.21141649048625794,
|
|
"grad_norm": 4.057735919952393,
|
|
"learning_rate": 4.980000000000001e-06,
|
|
"loss": 0.1765,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.23255813953488372,
|
|
"grad_norm": 5.463469982147217,
|
|
"learning_rate": 5.480000000000001e-06,
|
|
"loss": 0.1882,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.2536997885835095,
|
|
"grad_norm": 4.29346227645874,
|
|
"learning_rate": 5.98e-06,
|
|
"loss": 0.1591,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.2748414376321353,
|
|
"grad_norm": 5.261208534240723,
|
|
"learning_rate": 6.480000000000001e-06,
|
|
"loss": 0.1784,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.2959830866807611,
|
|
"grad_norm": 5.871801376342773,
|
|
"learning_rate": 6.98e-06,
|
|
"loss": 0.1618,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.3171247357293869,
|
|
"grad_norm": 4.745639324188232,
|
|
"learning_rate": 7.48e-06,
|
|
"loss": 0.1639,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.3382663847780127,
|
|
"grad_norm": 5.110447406768799,
|
|
"learning_rate": 7.980000000000002e-06,
|
|
"loss": 0.1769,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.3594080338266385,
|
|
"grad_norm": 6.022309303283691,
|
|
"learning_rate": 8.48e-06,
|
|
"loss": 0.1736,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.38054968287526425,
|
|
"grad_norm": 3.8885881900787354,
|
|
"learning_rate": 8.98e-06,
|
|
"loss": 0.1723,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.40169133192389006,
|
|
"grad_norm": 5.72865104675293,
|
|
"learning_rate": 9.48e-06,
|
|
"loss": 0.1688,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.42283298097251587,
|
|
"grad_norm": 4.54589319229126,
|
|
"learning_rate": 9.980000000000001e-06,
|
|
"loss": 0.1789,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.4439746300211416,
|
|
"grad_norm": 4.388618469238281,
|
|
"learning_rate": 9.974736842105263e-06,
|
|
"loss": 0.1712,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.46511627906976744,
|
|
"grad_norm": 3.574472427368164,
|
|
"learning_rate": 9.94842105263158e-06,
|
|
"loss": 0.1549,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.48625792811839325,
|
|
"grad_norm": 4.669585227966309,
|
|
"learning_rate": 9.922105263157895e-06,
|
|
"loss": 0.1652,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.507399577167019,
|
|
"grad_norm": 4.39477014541626,
|
|
"learning_rate": 9.895789473684212e-06,
|
|
"loss": 0.1504,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.5285412262156448,
|
|
"grad_norm": 2.9595720767974854,
|
|
"learning_rate": 9.869473684210528e-06,
|
|
"loss": 0.1596,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.5496828752642706,
|
|
"grad_norm": 3.7537808418273926,
|
|
"learning_rate": 9.843157894736843e-06,
|
|
"loss": 0.1459,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.5708245243128964,
|
|
"grad_norm": 5.807609558105469,
|
|
"learning_rate": 9.816842105263158e-06,
|
|
"loss": 0.1486,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.5919661733615222,
|
|
"grad_norm": 3.948618173599243,
|
|
"learning_rate": 9.790526315789475e-06,
|
|
"loss": 0.1673,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.6131078224101479,
|
|
"grad_norm": 5.866642475128174,
|
|
"learning_rate": 9.76421052631579e-06,
|
|
"loss": 0.1667,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.6342494714587738,
|
|
"grad_norm": 3.490234375,
|
|
"learning_rate": 9.737894736842107e-06,
|
|
"loss": 0.1321,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.6553911205073996,
|
|
"grad_norm": 4.5152740478515625,
|
|
"learning_rate": 9.711578947368422e-06,
|
|
"loss": 0.1446,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.6765327695560254,
|
|
"grad_norm": 4.738922595977783,
|
|
"learning_rate": 9.685263157894738e-06,
|
|
"loss": 0.1567,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.6976744186046512,
|
|
"grad_norm": 5.560998439788818,
|
|
"learning_rate": 9.658947368421053e-06,
|
|
"loss": 0.1503,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.718816067653277,
|
|
"grad_norm": 4.573572158813477,
|
|
"learning_rate": 9.63263157894737e-06,
|
|
"loss": 0.1613,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.7399577167019028,
|
|
"grad_norm": 4.3108134269714355,
|
|
"learning_rate": 9.606315789473685e-06,
|
|
"loss": 0.148,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.7610993657505285,
|
|
"grad_norm": 4.116722583770752,
|
|
"learning_rate": 9.58e-06,
|
|
"loss": 0.1437,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.7822410147991543,
|
|
"grad_norm": 4.437409400939941,
|
|
"learning_rate": 9.553684210526316e-06,
|
|
"loss": 0.1572,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.8033826638477801,
|
|
"grad_norm": 3.4169201850891113,
|
|
"learning_rate": 9.527368421052631e-06,
|
|
"loss": 0.1631,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.8245243128964059,
|
|
"grad_norm": 5.115376949310303,
|
|
"learning_rate": 9.501052631578948e-06,
|
|
"loss": 0.1533,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.8456659619450317,
|
|
"grad_norm": 3.852684259414673,
|
|
"learning_rate": 9.474736842105265e-06,
|
|
"loss": 0.132,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.8456659619450317,
|
|
"eval_loss": 0.09629034250974655,
|
|
"eval_runtime": 803.1828,
|
|
"eval_samples_per_second": 4.533,
|
|
"eval_steps_per_second": 0.284,
|
|
"eval_wer": 0.0747134437792937,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.8668076109936576,
|
|
"grad_norm": 3.996983766555786,
|
|
"learning_rate": 9.44842105263158e-06,
|
|
"loss": 0.149,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.8879492600422833,
|
|
"grad_norm": 5.054391860961914,
|
|
"learning_rate": 9.422105263157896e-06,
|
|
"loss": 0.122,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 3.469428777694702,
|
|
"learning_rate": 9.395789473684211e-06,
|
|
"loss": 0.1259,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.9302325581395349,
|
|
"grad_norm": 4.308193206787109,
|
|
"learning_rate": 9.369473684210528e-06,
|
|
"loss": 0.1187,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.9513742071881607,
|
|
"grad_norm": 3.8684639930725098,
|
|
"learning_rate": 9.343157894736843e-06,
|
|
"loss": 0.1332,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.9725158562367865,
|
|
"grad_norm": 3.6519436836242676,
|
|
"learning_rate": 9.316842105263158e-06,
|
|
"loss": 0.1314,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.9936575052854123,
|
|
"grad_norm": 4.258838653564453,
|
|
"learning_rate": 9.290526315789475e-06,
|
|
"loss": 0.1244,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 1.014799154334038,
|
|
"grad_norm": 2.686086654663086,
|
|
"learning_rate": 9.265263157894737e-06,
|
|
"loss": 0.0855,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 1.0359408033826638,
|
|
"grad_norm": 3.826284646987915,
|
|
"learning_rate": 9.238947368421052e-06,
|
|
"loss": 0.0683,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 1.0570824524312896,
|
|
"grad_norm": 2.2538001537323,
|
|
"learning_rate": 9.21263157894737e-06,
|
|
"loss": 0.0571,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 1.0782241014799154,
|
|
"grad_norm": 3.7690203189849854,
|
|
"learning_rate": 9.186315789473685e-06,
|
|
"loss": 0.0692,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 1.0993657505285412,
|
|
"grad_norm": 3.0382161140441895,
|
|
"learning_rate": 9.16e-06,
|
|
"loss": 0.0604,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 1.120507399577167,
|
|
"grad_norm": 2.6046435832977295,
|
|
"learning_rate": 9.133684210526317e-06,
|
|
"loss": 0.055,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 1.1416490486257929,
|
|
"grad_norm": 2.4167184829711914,
|
|
"learning_rate": 9.107368421052632e-06,
|
|
"loss": 0.0679,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 1.1627906976744187,
|
|
"grad_norm": 3.173344850540161,
|
|
"learning_rate": 9.081052631578949e-06,
|
|
"loss": 0.0556,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 1.1839323467230445,
|
|
"grad_norm": 3.099440336227417,
|
|
"learning_rate": 9.054736842105264e-06,
|
|
"loss": 0.0707,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 1.20507399577167,
|
|
"grad_norm": 2.8110969066619873,
|
|
"learning_rate": 9.02842105263158e-06,
|
|
"loss": 0.0564,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 1.226215644820296,
|
|
"grad_norm": 2.5299370288848877,
|
|
"learning_rate": 9.002105263157895e-06,
|
|
"loss": 0.0621,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 1.2473572938689217,
|
|
"grad_norm": 1.7993814945220947,
|
|
"learning_rate": 8.97578947368421e-06,
|
|
"loss": 0.063,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 1.2684989429175475,
|
|
"grad_norm": 2.9655444622039795,
|
|
"learning_rate": 8.949473684210527e-06,
|
|
"loss": 0.0578,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 1.2896405919661733,
|
|
"grad_norm": 3.150512456893921,
|
|
"learning_rate": 8.923157894736842e-06,
|
|
"loss": 0.0676,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 1.3107822410147991,
|
|
"grad_norm": 2.2454750537872314,
|
|
"learning_rate": 8.896842105263159e-06,
|
|
"loss": 0.057,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 1.331923890063425,
|
|
"grad_norm": 2.760533332824707,
|
|
"learning_rate": 8.870526315789474e-06,
|
|
"loss": 0.0738,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 1.3530655391120507,
|
|
"grad_norm": 3.960843086242676,
|
|
"learning_rate": 8.84421052631579e-06,
|
|
"loss": 0.0641,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 1.3742071881606766,
|
|
"grad_norm": 2.070232391357422,
|
|
"learning_rate": 8.817894736842107e-06,
|
|
"loss": 0.055,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 1.3953488372093024,
|
|
"grad_norm": 2.532212734222412,
|
|
"learning_rate": 8.791578947368422e-06,
|
|
"loss": 0.0523,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 1.4164904862579282,
|
|
"grad_norm": 3.0113463401794434,
|
|
"learning_rate": 8.765263157894739e-06,
|
|
"loss": 0.0613,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 1.437632135306554,
|
|
"grad_norm": 2.228800058364868,
|
|
"learning_rate": 8.738947368421053e-06,
|
|
"loss": 0.0506,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 1.4587737843551798,
|
|
"grad_norm": 4.342855453491211,
|
|
"learning_rate": 8.712631578947368e-06,
|
|
"loss": 0.078,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 1.4799154334038054,
|
|
"grad_norm": 2.6026878356933594,
|
|
"learning_rate": 8.686315789473685e-06,
|
|
"loss": 0.0566,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 1.5010570824524314,
|
|
"grad_norm": 1.9011883735656738,
|
|
"learning_rate": 8.66e-06,
|
|
"loss": 0.0524,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 1.522198731501057,
|
|
"grad_norm": 5.088387489318848,
|
|
"learning_rate": 8.633684210526317e-06,
|
|
"loss": 0.0708,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 1.543340380549683,
|
|
"grad_norm": 2.1307568550109863,
|
|
"learning_rate": 8.607368421052632e-06,
|
|
"loss": 0.0586,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 1.5644820295983086,
|
|
"grad_norm": 2.45223069190979,
|
|
"learning_rate": 8.581052631578948e-06,
|
|
"loss": 0.0566,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 1.5856236786469344,
|
|
"grad_norm": 2.4557642936706543,
|
|
"learning_rate": 8.554736842105263e-06,
|
|
"loss": 0.0506,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 1.6067653276955602,
|
|
"grad_norm": 3.5096094608306885,
|
|
"learning_rate": 8.528421052631578e-06,
|
|
"loss": 0.0569,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 1.627906976744186,
|
|
"grad_norm": 1.774232268333435,
|
|
"learning_rate": 8.502105263157897e-06,
|
|
"loss": 0.0545,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 1.6490486257928119,
|
|
"grad_norm": 2.894585609436035,
|
|
"learning_rate": 8.475789473684212e-06,
|
|
"loss": 0.0557,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 1.6701902748414377,
|
|
"grad_norm": 2.1152944564819336,
|
|
"learning_rate": 8.449473684210527e-06,
|
|
"loss": 0.0533,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 1.6913319238900635,
|
|
"grad_norm": 2.191254138946533,
|
|
"learning_rate": 8.423157894736843e-06,
|
|
"loss": 0.0503,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.6913319238900635,
|
|
"eval_loss": 0.0664231926202774,
|
|
"eval_runtime": 764.9205,
|
|
"eval_samples_per_second": 4.76,
|
|
"eval_steps_per_second": 0.298,
|
|
"eval_wer": 0.05257784583971414,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.712473572938689,
|
|
"grad_norm": 2.3676247596740723,
|
|
"learning_rate": 8.396842105263158e-06,
|
|
"loss": 0.0624,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 1.733615221987315,
|
|
"grad_norm": 3.942397356033325,
|
|
"learning_rate": 8.370526315789475e-06,
|
|
"loss": 0.0592,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 1.7547568710359407,
|
|
"grad_norm": 3.637500047683716,
|
|
"learning_rate": 8.34421052631579e-06,
|
|
"loss": 0.0437,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 1.7758985200845667,
|
|
"grad_norm": 1.2622654438018799,
|
|
"learning_rate": 8.317894736842107e-06,
|
|
"loss": 0.0543,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 1.7970401691331923,
|
|
"grad_norm": 2.234199047088623,
|
|
"learning_rate": 8.291578947368422e-06,
|
|
"loss": 0.0571,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 1.8181818181818183,
|
|
"grad_norm": 2.8800275325775146,
|
|
"learning_rate": 8.265263157894737e-06,
|
|
"loss": 0.0475,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.839323467230444,
|
|
"grad_norm": 3.2477574348449707,
|
|
"learning_rate": 8.238947368421053e-06,
|
|
"loss": 0.0529,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 1.8604651162790697,
|
|
"grad_norm": 3.2765121459960938,
|
|
"learning_rate": 8.212631578947368e-06,
|
|
"loss": 0.0476,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 1.8816067653276956,
|
|
"grad_norm": 2.850074529647827,
|
|
"learning_rate": 8.186315789473685e-06,
|
|
"loss": 0.0464,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 1.9027484143763214,
|
|
"grad_norm": 2.3164877891540527,
|
|
"learning_rate": 8.16e-06,
|
|
"loss": 0.048,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 1.9238900634249472,
|
|
"grad_norm": 4.057616710662842,
|
|
"learning_rate": 8.133684210526316e-06,
|
|
"loss": 0.044,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 1.945031712473573,
|
|
"grad_norm": 2.687540292739868,
|
|
"learning_rate": 8.107368421052633e-06,
|
|
"loss": 0.0439,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 1.9661733615221988,
|
|
"grad_norm": 2.555338144302368,
|
|
"learning_rate": 8.081052631578948e-06,
|
|
"loss": 0.0563,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 1.9873150105708244,
|
|
"grad_norm": 2.214106798171997,
|
|
"learning_rate": 8.054736842105265e-06,
|
|
"loss": 0.0512,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 2.0084566596194504,
|
|
"grad_norm": 1.0390377044677734,
|
|
"learning_rate": 8.02842105263158e-06,
|
|
"loss": 0.035,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 2.029598308668076,
|
|
"grad_norm": 1.4279701709747314,
|
|
"learning_rate": 8.002105263157895e-06,
|
|
"loss": 0.0214,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 2.050739957716702,
|
|
"grad_norm": 1.2955684661865234,
|
|
"learning_rate": 7.975789473684211e-06,
|
|
"loss": 0.0221,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 2.0718816067653276,
|
|
"grad_norm": 1.0678218603134155,
|
|
"learning_rate": 7.949473684210526e-06,
|
|
"loss": 0.0185,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 2.0930232558139537,
|
|
"grad_norm": 2.0018396377563477,
|
|
"learning_rate": 7.923157894736843e-06,
|
|
"loss": 0.0201,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 2.1141649048625792,
|
|
"grad_norm": 1.0335798263549805,
|
|
"learning_rate": 7.896842105263158e-06,
|
|
"loss": 0.0156,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 2.1353065539112053,
|
|
"grad_norm": 3.049020528793335,
|
|
"learning_rate": 7.870526315789475e-06,
|
|
"loss": 0.0218,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 2.156448202959831,
|
|
"grad_norm": 0.5251482725143433,
|
|
"learning_rate": 7.84421052631579e-06,
|
|
"loss": 0.0192,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 2.177589852008457,
|
|
"grad_norm": 2.4763119220733643,
|
|
"learning_rate": 7.817894736842105e-06,
|
|
"loss": 0.0199,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 2.1987315010570825,
|
|
"grad_norm": 0.3287382423877716,
|
|
"learning_rate": 7.791578947368423e-06,
|
|
"loss": 0.0202,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 2.219873150105708,
|
|
"grad_norm": 2.3478755950927734,
|
|
"learning_rate": 7.765263157894738e-06,
|
|
"loss": 0.0172,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 2.241014799154334,
|
|
"grad_norm": 1.5515483617782593,
|
|
"learning_rate": 7.738947368421053e-06,
|
|
"loss": 0.0157,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 2.2621564482029597,
|
|
"grad_norm": 3.578237295150757,
|
|
"learning_rate": 7.71263157894737e-06,
|
|
"loss": 0.024,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 2.2832980972515857,
|
|
"grad_norm": 1.1182405948638916,
|
|
"learning_rate": 7.686315789473685e-06,
|
|
"loss": 0.0165,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 2.3044397463002113,
|
|
"grad_norm": 1.8532599210739136,
|
|
"learning_rate": 7.660000000000001e-06,
|
|
"loss": 0.0241,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 2.3255813953488373,
|
|
"grad_norm": 0.5601603984832764,
|
|
"learning_rate": 7.633684210526316e-06,
|
|
"loss": 0.0178,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 2.346723044397463,
|
|
"grad_norm": 3.074402332305908,
|
|
"learning_rate": 7.607368421052632e-06,
|
|
"loss": 0.0276,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 2.367864693446089,
|
|
"grad_norm": 1.695233702659607,
|
|
"learning_rate": 7.581052631578948e-06,
|
|
"loss": 0.0227,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 2.3890063424947146,
|
|
"grad_norm": 2.522712469100952,
|
|
"learning_rate": 7.554736842105264e-06,
|
|
"loss": 0.029,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 2.41014799154334,
|
|
"grad_norm": 0.7854623794555664,
|
|
"learning_rate": 7.5284210526315794e-06,
|
|
"loss": 0.0197,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 2.431289640591966,
|
|
"grad_norm": 0.5606433749198914,
|
|
"learning_rate": 7.502105263157895e-06,
|
|
"loss": 0.02,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 2.452431289640592,
|
|
"grad_norm": 2.8828301429748535,
|
|
"learning_rate": 7.475789473684211e-06,
|
|
"loss": 0.0198,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 2.473572938689218,
|
|
"grad_norm": 1.5144062042236328,
|
|
"learning_rate": 7.449473684210526e-06,
|
|
"loss": 0.0201,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 2.4947145877378434,
|
|
"grad_norm": 1.25803542137146,
|
|
"learning_rate": 7.4231578947368436e-06,
|
|
"loss": 0.0216,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 2.5158562367864694,
|
|
"grad_norm": 1.2275983095169067,
|
|
"learning_rate": 7.3968421052631585e-06,
|
|
"loss": 0.032,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 2.536997885835095,
|
|
"grad_norm": 1.5076512098312378,
|
|
"learning_rate": 7.370526315789474e-06,
|
|
"loss": 0.023,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 2.536997885835095,
|
|
"eval_loss": 0.06277064979076385,
|
|
"eval_runtime": 778.7671,
|
|
"eval_samples_per_second": 4.675,
|
|
"eval_steps_per_second": 0.293,
|
|
"eval_wer": 0.07271799155413244,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 2.558139534883721,
|
|
"grad_norm": 2.08162784576416,
|
|
"learning_rate": 7.34421052631579e-06,
|
|
"loss": 0.0211,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 2.5792811839323466,
|
|
"grad_norm": 1.1717668771743774,
|
|
"learning_rate": 7.317894736842106e-06,
|
|
"loss": 0.019,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 2.6004228329809727,
|
|
"grad_norm": 1.7563270330429077,
|
|
"learning_rate": 7.291578947368422e-06,
|
|
"loss": 0.0235,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 2.6215644820295982,
|
|
"grad_norm": 2.0151233673095703,
|
|
"learning_rate": 7.265263157894738e-06,
|
|
"loss": 0.0201,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 2.6427061310782243,
|
|
"grad_norm": 1.1807574033737183,
|
|
"learning_rate": 7.2389473684210534e-06,
|
|
"loss": 0.0232,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 2.66384778012685,
|
|
"grad_norm": 2.389129877090454,
|
|
"learning_rate": 7.212631578947369e-06,
|
|
"loss": 0.0189,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 2.6849894291754755,
|
|
"grad_norm": 1.0791960954666138,
|
|
"learning_rate": 7.186315789473684e-06,
|
|
"loss": 0.0138,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 2.7061310782241015,
|
|
"grad_norm": 1.2188448905944824,
|
|
"learning_rate": 7.16e-06,
|
|
"loss": 0.0177,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 2.7272727272727275,
|
|
"grad_norm": 2.194073438644409,
|
|
"learning_rate": 7.133684210526316e-06,
|
|
"loss": 0.0236,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 2.748414376321353,
|
|
"grad_norm": 3.0970866680145264,
|
|
"learning_rate": 7.107368421052632e-06,
|
|
"loss": 0.0246,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 2.7695560253699787,
|
|
"grad_norm": 1.133362054824829,
|
|
"learning_rate": 7.0810526315789475e-06,
|
|
"loss": 0.0177,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 2.7906976744186047,
|
|
"grad_norm": 2.0186614990234375,
|
|
"learning_rate": 7.054736842105264e-06,
|
|
"loss": 0.0206,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 2.8118393234672303,
|
|
"grad_norm": 1.397058367729187,
|
|
"learning_rate": 7.02842105263158e-06,
|
|
"loss": 0.0182,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 2.8329809725158563,
|
|
"grad_norm": 0.7610916495323181,
|
|
"learning_rate": 7.002105263157896e-06,
|
|
"loss": 0.0186,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 2.854122621564482,
|
|
"grad_norm": 1.2149909734725952,
|
|
"learning_rate": 6.975789473684212e-06,
|
|
"loss": 0.0244,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 2.875264270613108,
|
|
"grad_norm": 0.7443040609359741,
|
|
"learning_rate": 6.9494736842105275e-06,
|
|
"loss": 0.0204,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 2.8964059196617336,
|
|
"grad_norm": 1.4333350658416748,
|
|
"learning_rate": 6.9231578947368424e-06,
|
|
"loss": 0.0202,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 2.9175475687103596,
|
|
"grad_norm": 0.8187249898910522,
|
|
"learning_rate": 6.896842105263158e-06,
|
|
"loss": 0.0236,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 2.938689217758985,
|
|
"grad_norm": 0.5101013779640198,
|
|
"learning_rate": 6.870526315789474e-06,
|
|
"loss": 0.0265,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 2.9598308668076108,
|
|
"grad_norm": 0.9081612229347229,
|
|
"learning_rate": 6.84421052631579e-06,
|
|
"loss": 0.0213,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 2.980972515856237,
|
|
"grad_norm": 3.310152769088745,
|
|
"learning_rate": 6.817894736842106e-06,
|
|
"loss": 0.0158,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 3.0021141649048624,
|
|
"grad_norm": 0.17649300396442413,
|
|
"learning_rate": 6.7915789473684215e-06,
|
|
"loss": 0.013,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 3.0232558139534884,
|
|
"grad_norm": 1.0084452629089355,
|
|
"learning_rate": 6.765263157894737e-06,
|
|
"loss": 0.0098,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 3.044397463002114,
|
|
"grad_norm": 0.7109575271606445,
|
|
"learning_rate": 6.738947368421052e-06,
|
|
"loss": 0.0063,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 3.06553911205074,
|
|
"grad_norm": 1.4222323894500732,
|
|
"learning_rate": 6.71263157894737e-06,
|
|
"loss": 0.0104,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 3.0866807610993656,
|
|
"grad_norm": 1.3189831972122192,
|
|
"learning_rate": 6.686315789473685e-06,
|
|
"loss": 0.0071,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 3.1078224101479917,
|
|
"grad_norm": 0.26805758476257324,
|
|
"learning_rate": 6.660000000000001e-06,
|
|
"loss": 0.0069,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 3.1289640591966172,
|
|
"grad_norm": 1.9511579275131226,
|
|
"learning_rate": 6.6336842105263164e-06,
|
|
"loss": 0.0107,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 3.1501057082452433,
|
|
"grad_norm": 0.3703208863735199,
|
|
"learning_rate": 6.607368421052632e-06,
|
|
"loss": 0.0068,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 3.171247357293869,
|
|
"grad_norm": 0.5500373840332031,
|
|
"learning_rate": 6.581052631578948e-06,
|
|
"loss": 0.0055,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 3.192389006342495,
|
|
"grad_norm": 1.709043025970459,
|
|
"learning_rate": 6.554736842105264e-06,
|
|
"loss": 0.0087,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 3.2135306553911205,
|
|
"grad_norm": 1.1865346431732178,
|
|
"learning_rate": 6.52842105263158e-06,
|
|
"loss": 0.0095,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 3.234672304439746,
|
|
"grad_norm": 0.505714476108551,
|
|
"learning_rate": 6.5021052631578955e-06,
|
|
"loss": 0.0051,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 3.255813953488372,
|
|
"grad_norm": 0.646237850189209,
|
|
"learning_rate": 6.4757894736842105e-06,
|
|
"loss": 0.0109,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 3.276955602536998,
|
|
"grad_norm": 4.061963081359863,
|
|
"learning_rate": 6.449473684210526e-06,
|
|
"loss": 0.0091,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 3.2980972515856237,
|
|
"grad_norm": 0.9798858761787415,
|
|
"learning_rate": 6.423157894736842e-06,
|
|
"loss": 0.0071,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 3.3192389006342493,
|
|
"grad_norm": 0.07478220015764236,
|
|
"learning_rate": 6.396842105263158e-06,
|
|
"loss": 0.0087,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 3.3403805496828753,
|
|
"grad_norm": 0.36445191502571106,
|
|
"learning_rate": 6.370526315789474e-06,
|
|
"loss": 0.0112,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 3.361522198731501,
|
|
"grad_norm": 0.7756998538970947,
|
|
"learning_rate": 6.3442105263157904e-06,
|
|
"loss": 0.0092,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 3.382663847780127,
|
|
"grad_norm": 0.12039519846439362,
|
|
"learning_rate": 6.317894736842106e-06,
|
|
"loss": 0.011,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 3.382663847780127,
|
|
"eval_loss": 0.059302762150764465,
|
|
"eval_runtime": 760.4332,
|
|
"eval_samples_per_second": 4.788,
|
|
"eval_steps_per_second": 0.3,
|
|
"eval_wer": 0.04371432549074203,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 3.4038054968287526,
|
|
"grad_norm": 0.5047985315322876,
|
|
"learning_rate": 6.291578947368422e-06,
|
|
"loss": 0.0082,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 3.4249471458773786,
|
|
"grad_norm": 1.0148366689682007,
|
|
"learning_rate": 6.265263157894738e-06,
|
|
"loss": 0.0059,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 3.446088794926004,
|
|
"grad_norm": 2.402777910232544,
|
|
"learning_rate": 6.238947368421054e-06,
|
|
"loss": 0.0075,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 3.46723044397463,
|
|
"grad_norm": 1.122701644897461,
|
|
"learning_rate": 6.212631578947369e-06,
|
|
"loss": 0.0091,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 3.488372093023256,
|
|
"grad_norm": 1.688050627708435,
|
|
"learning_rate": 6.1863157894736845e-06,
|
|
"loss": 0.0066,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 3.5095137420718814,
|
|
"grad_norm": 0.5308606624603271,
|
|
"learning_rate": 6.16e-06,
|
|
"loss": 0.0084,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 3.5306553911205074,
|
|
"grad_norm": 0.182705819606781,
|
|
"learning_rate": 6.133684210526316e-06,
|
|
"loss": 0.01,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 3.5517970401691334,
|
|
"grad_norm": 1.427296757698059,
|
|
"learning_rate": 6.107368421052632e-06,
|
|
"loss": 0.0093,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 3.572938689217759,
|
|
"grad_norm": 0.2774752974510193,
|
|
"learning_rate": 6.081052631578948e-06,
|
|
"loss": 0.0088,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 3.5940803382663846,
|
|
"grad_norm": 1.2511072158813477,
|
|
"learning_rate": 6.054736842105264e-06,
|
|
"loss": 0.0105,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 3.6152219873150107,
|
|
"grad_norm": 0.6384909152984619,
|
|
"learning_rate": 6.0284210526315786e-06,
|
|
"loss": 0.0112,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 3.6363636363636362,
|
|
"grad_norm": 3.168208599090576,
|
|
"learning_rate": 6.002105263157896e-06,
|
|
"loss": 0.0108,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 3.6575052854122623,
|
|
"grad_norm": 1.4083985090255737,
|
|
"learning_rate": 5.975789473684212e-06,
|
|
"loss": 0.0075,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 3.678646934460888,
|
|
"grad_norm": 0.34528228640556335,
|
|
"learning_rate": 5.949473684210527e-06,
|
|
"loss": 0.0113,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 3.699788583509514,
|
|
"grad_norm": 1.5832304954528809,
|
|
"learning_rate": 5.923157894736843e-06,
|
|
"loss": 0.0183,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 3.7209302325581395,
|
|
"grad_norm": 0.5489822030067444,
|
|
"learning_rate": 5.8968421052631585e-06,
|
|
"loss": 0.0111,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 3.7420718816067655,
|
|
"grad_norm": 2.3120322227478027,
|
|
"learning_rate": 5.870526315789474e-06,
|
|
"loss": 0.0081,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 3.763213530655391,
|
|
"grad_norm": 0.9819779396057129,
|
|
"learning_rate": 5.84421052631579e-06,
|
|
"loss": 0.0111,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 3.7843551797040167,
|
|
"grad_norm": 1.0312461853027344,
|
|
"learning_rate": 5.817894736842106e-06,
|
|
"loss": 0.0087,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 3.8054968287526427,
|
|
"grad_norm": 3.042786121368408,
|
|
"learning_rate": 5.791578947368422e-06,
|
|
"loss": 0.0127,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 3.8266384778012688,
|
|
"grad_norm": 0.740356981754303,
|
|
"learning_rate": 5.765263157894737e-06,
|
|
"loss": 0.0091,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 3.8477801268498943,
|
|
"grad_norm": 0.25214338302612305,
|
|
"learning_rate": 5.7389473684210526e-06,
|
|
"loss": 0.0077,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 3.86892177589852,
|
|
"grad_norm": 0.2431076020002365,
|
|
"learning_rate": 5.712631578947368e-06,
|
|
"loss": 0.0084,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 3.890063424947146,
|
|
"grad_norm": 0.2818649113178253,
|
|
"learning_rate": 5.686315789473684e-06,
|
|
"loss": 0.0082,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 3.9112050739957716,
|
|
"grad_norm": 0.6972672343254089,
|
|
"learning_rate": 5.66e-06,
|
|
"loss": 0.01,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 3.9323467230443976,
|
|
"grad_norm": 1.6746132373809814,
|
|
"learning_rate": 5.633684210526317e-06,
|
|
"loss": 0.0097,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 3.953488372093023,
|
|
"grad_norm": 1.0258132219314575,
|
|
"learning_rate": 5.6073684210526325e-06,
|
|
"loss": 0.0104,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 3.974630021141649,
|
|
"grad_norm": 2.0761055946350098,
|
|
"learning_rate": 5.581052631578948e-06,
|
|
"loss": 0.008,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 3.995771670190275,
|
|
"grad_norm": 0.18523390591144562,
|
|
"learning_rate": 5.554736842105264e-06,
|
|
"loss": 0.0078,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 4.016913319238901,
|
|
"grad_norm": 0.3107724189758301,
|
|
"learning_rate": 5.52842105263158e-06,
|
|
"loss": 0.0042,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 4.038054968287526,
|
|
"grad_norm": 1.8015187978744507,
|
|
"learning_rate": 5.502105263157895e-06,
|
|
"loss": 0.0059,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 4.059196617336152,
|
|
"grad_norm": 0.2251424789428711,
|
|
"learning_rate": 5.475789473684211e-06,
|
|
"loss": 0.0047,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 4.080338266384778,
|
|
"grad_norm": 1.2889034748077393,
|
|
"learning_rate": 5.4494736842105266e-06,
|
|
"loss": 0.0046,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 4.101479915433404,
|
|
"grad_norm": 0.32070282101631165,
|
|
"learning_rate": 5.423157894736842e-06,
|
|
"loss": 0.0031,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 4.12262156448203,
|
|
"grad_norm": 0.26217707991600037,
|
|
"learning_rate": 5.396842105263158e-06,
|
|
"loss": 0.0063,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 4.143763213530655,
|
|
"grad_norm": 0.8193647861480713,
|
|
"learning_rate": 5.370526315789474e-06,
|
|
"loss": 0.0043,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 4.164904862579281,
|
|
"grad_norm": 0.054519519209861755,
|
|
"learning_rate": 5.34421052631579e-06,
|
|
"loss": 0.0047,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 4.186046511627907,
|
|
"grad_norm": 0.17601265013217926,
|
|
"learning_rate": 5.317894736842105e-06,
|
|
"loss": 0.0058,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 4.207188160676533,
|
|
"grad_norm": 0.08115002512931824,
|
|
"learning_rate": 5.291578947368422e-06,
|
|
"loss": 0.0015,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 4.2283298097251585,
|
|
"grad_norm": 0.0700853168964386,
|
|
"learning_rate": 5.265263157894738e-06,
|
|
"loss": 0.0033,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 4.2283298097251585,
|
|
"eval_loss": 0.057503484189510345,
|
|
"eval_runtime": 759.4854,
|
|
"eval_samples_per_second": 4.794,
|
|
"eval_steps_per_second": 0.3,
|
|
"eval_wer": 0.040697944220149426,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 4.249471458773784,
|
|
"grad_norm": 0.49534428119659424,
|
|
"learning_rate": 5.238947368421053e-06,
|
|
"loss": 0.0025,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 4.2706131078224105,
|
|
"grad_norm": 0.10419642180204391,
|
|
"learning_rate": 5.212631578947369e-06,
|
|
"loss": 0.003,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 4.291754756871036,
|
|
"grad_norm": 1.5288044214248657,
|
|
"learning_rate": 5.186315789473685e-06,
|
|
"loss": 0.0044,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 4.312896405919662,
|
|
"grad_norm": 0.3455657660961151,
|
|
"learning_rate": 5.1600000000000006e-06,
|
|
"loss": 0.0053,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 4.334038054968287,
|
|
"grad_norm": 0.32805994153022766,
|
|
"learning_rate": 5.133684210526316e-06,
|
|
"loss": 0.006,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 4.355179704016914,
|
|
"grad_norm": 0.0669274553656578,
|
|
"learning_rate": 5.107368421052632e-06,
|
|
"loss": 0.0042,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 4.376321353065539,
|
|
"grad_norm": 0.46637606620788574,
|
|
"learning_rate": 5.081052631578948e-06,
|
|
"loss": 0.0039,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 4.397463002114165,
|
|
"grad_norm": 0.06938227266073227,
|
|
"learning_rate": 5.054736842105263e-06,
|
|
"loss": 0.0045,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 4.4186046511627906,
|
|
"grad_norm": 0.25897443294525146,
|
|
"learning_rate": 5.028421052631579e-06,
|
|
"loss": 0.0019,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 4.439746300211416,
|
|
"grad_norm": 0.0884798988699913,
|
|
"learning_rate": 5.002105263157895e-06,
|
|
"loss": 0.0051,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 4.460887949260043,
|
|
"grad_norm": 2.0697453022003174,
|
|
"learning_rate": 4.976842105263158e-06,
|
|
"loss": 0.0036,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 4.482029598308668,
|
|
"grad_norm": 0.494328111410141,
|
|
"learning_rate": 4.950526315789474e-06,
|
|
"loss": 0.0044,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 4.503171247357294,
|
|
"grad_norm": 1.0127891302108765,
|
|
"learning_rate": 4.92421052631579e-06,
|
|
"loss": 0.0032,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 4.524312896405919,
|
|
"grad_norm": Infinity,
|
|
"learning_rate": 4.898947368421053e-06,
|
|
"loss": 0.0058,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 4.545454545454545,
|
|
"grad_norm": 0.9486972093582153,
|
|
"learning_rate": 4.872631578947369e-06,
|
|
"loss": 0.0072,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 4.5665961945031714,
|
|
"grad_norm": 0.37844017148017883,
|
|
"learning_rate": 4.846315789473685e-06,
|
|
"loss": 0.0043,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 4.587737843551797,
|
|
"grad_norm": 1.3698139190673828,
|
|
"learning_rate": 4.8200000000000004e-06,
|
|
"loss": 0.0046,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 4.608879492600423,
|
|
"grad_norm": 0.14651530981063843,
|
|
"learning_rate": 4.793684210526316e-06,
|
|
"loss": 0.0041,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 4.630021141649049,
|
|
"grad_norm": 1.030840277671814,
|
|
"learning_rate": 4.767368421052632e-06,
|
|
"loss": 0.0053,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 4.651162790697675,
|
|
"grad_norm": 0.837679922580719,
|
|
"learning_rate": 4.741052631578948e-06,
|
|
"loss": 0.0034,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 4.6723044397463,
|
|
"grad_norm": 0.12558767199516296,
|
|
"learning_rate": 4.714736842105264e-06,
|
|
"loss": 0.0024,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 4.693446088794926,
|
|
"grad_norm": 2.1240885257720947,
|
|
"learning_rate": 4.6884210526315795e-06,
|
|
"loss": 0.0042,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 4.7145877378435515,
|
|
"grad_norm": 0.08116896450519562,
|
|
"learning_rate": 4.662105263157895e-06,
|
|
"loss": 0.0029,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 4.735729386892178,
|
|
"grad_norm": 1.2541615962982178,
|
|
"learning_rate": 4.63578947368421e-06,
|
|
"loss": 0.0085,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 4.7568710359408035,
|
|
"grad_norm": 0.21351023018360138,
|
|
"learning_rate": 4.609473684210526e-06,
|
|
"loss": 0.0026,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 4.778012684989429,
|
|
"grad_norm": 0.967060387134552,
|
|
"learning_rate": 4.583157894736843e-06,
|
|
"loss": 0.0058,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 4.799154334038055,
|
|
"grad_norm": 0.8480948209762573,
|
|
"learning_rate": 4.556842105263159e-06,
|
|
"loss": 0.0038,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 4.82029598308668,
|
|
"grad_norm": 0.0918637365102768,
|
|
"learning_rate": 4.5305263157894744e-06,
|
|
"loss": 0.0038,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 4.841437632135307,
|
|
"grad_norm": 0.32755616307258606,
|
|
"learning_rate": 4.504210526315789e-06,
|
|
"loss": 0.0039,
|
|
"step": 5725
|
|
},
|
|
{
|
|
"epoch": 4.862579281183932,
|
|
"grad_norm": 0.1790982037782669,
|
|
"learning_rate": 4.477894736842105e-06,
|
|
"loss": 0.0025,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 4.883720930232558,
|
|
"grad_norm": 0.16250011324882507,
|
|
"learning_rate": 4.451578947368421e-06,
|
|
"loss": 0.0046,
|
|
"step": 5775
|
|
},
|
|
{
|
|
"epoch": 4.904862579281184,
|
|
"grad_norm": 0.056490566581487656,
|
|
"learning_rate": 4.425263157894737e-06,
|
|
"loss": 0.0029,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 4.92600422832981,
|
|
"grad_norm": 0.6812607645988464,
|
|
"learning_rate": 4.3989473684210535e-06,
|
|
"loss": 0.0034,
|
|
"step": 5825
|
|
},
|
|
{
|
|
"epoch": 4.947145877378436,
|
|
"grad_norm": 0.10844116657972336,
|
|
"learning_rate": 4.3726315789473685e-06,
|
|
"loss": 0.0041,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 4.968287526427061,
|
|
"grad_norm": 0.13944801688194275,
|
|
"learning_rate": 4.346315789473684e-06,
|
|
"loss": 0.0032,
|
|
"step": 5875
|
|
},
|
|
{
|
|
"epoch": 4.989429175475687,
|
|
"grad_norm": 0.1417611837387085,
|
|
"learning_rate": 4.32e-06,
|
|
"loss": 0.0021,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 5.010570824524313,
|
|
"grad_norm": 0.04675103724002838,
|
|
"learning_rate": 4.293684210526316e-06,
|
|
"loss": 0.0018,
|
|
"step": 5925
|
|
},
|
|
{
|
|
"epoch": 5.031712473572939,
|
|
"grad_norm": 0.06213444098830223,
|
|
"learning_rate": 4.267368421052632e-06,
|
|
"loss": 0.0017,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 5.052854122621564,
|
|
"grad_norm": 0.025097988545894623,
|
|
"learning_rate": 4.241052631578948e-06,
|
|
"loss": 0.0022,
|
|
"step": 5975
|
|
},
|
|
{
|
|
"epoch": 5.07399577167019,
|
|
"grad_norm": 1.2869890928268433,
|
|
"learning_rate": 4.214736842105263e-06,
|
|
"loss": 0.0017,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 5.07399577167019,
|
|
"eval_loss": 0.057429373264312744,
|
|
"eval_runtime": 764.4931,
|
|
"eval_samples_per_second": 4.763,
|
|
"eval_steps_per_second": 0.298,
|
|
"eval_wer": 0.044828066267576225,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 5.0951374207188165,
|
|
"grad_norm": 0.08272965997457504,
|
|
"learning_rate": 4.188421052631579e-06,
|
|
"loss": 0.0038,
|
|
"step": 6025
|
|
},
|
|
{
|
|
"epoch": 5.116279069767442,
|
|
"grad_norm": 0.03754328191280365,
|
|
"learning_rate": 4.162105263157895e-06,
|
|
"loss": 0.0019,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 5.137420718816068,
|
|
"grad_norm": 0.021387765184044838,
|
|
"learning_rate": 4.135789473684211e-06,
|
|
"loss": 0.0009,
|
|
"step": 6075
|
|
},
|
|
{
|
|
"epoch": 5.158562367864693,
|
|
"grad_norm": 0.8662779927253723,
|
|
"learning_rate": 4.109473684210527e-06,
|
|
"loss": 0.0013,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 5.179704016913319,
|
|
"grad_norm": 0.04760267958045006,
|
|
"learning_rate": 4.0831578947368425e-06,
|
|
"loss": 0.0037,
|
|
"step": 6125
|
|
},
|
|
{
|
|
"epoch": 5.200845665961945,
|
|
"grad_norm": 0.07986485213041306,
|
|
"learning_rate": 4.056842105263158e-06,
|
|
"loss": 0.0021,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 5.221987315010571,
|
|
"grad_norm": 0.025027699768543243,
|
|
"learning_rate": 4.030526315789474e-06,
|
|
"loss": 0.0034,
|
|
"step": 6175
|
|
},
|
|
{
|
|
"epoch": 5.2431289640591965,
|
|
"grad_norm": 0.04851312190294266,
|
|
"learning_rate": 4.00421052631579e-06,
|
|
"loss": 0.0025,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 5.264270613107822,
|
|
"grad_norm": 0.7498076558113098,
|
|
"learning_rate": 3.977894736842106e-06,
|
|
"loss": 0.0028,
|
|
"step": 6225
|
|
},
|
|
{
|
|
"epoch": 5.2854122621564485,
|
|
"grad_norm": 0.032242584973573685,
|
|
"learning_rate": 3.951578947368422e-06,
|
|
"loss": 0.0023,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 5.306553911205074,
|
|
"grad_norm": 0.10821150243282318,
|
|
"learning_rate": 3.9252631578947366e-06,
|
|
"loss": 0.0065,
|
|
"step": 6275
|
|
},
|
|
{
|
|
"epoch": 5.3276955602537,
|
|
"grad_norm": 0.05587538704276085,
|
|
"learning_rate": 3.898947368421052e-06,
|
|
"loss": 0.0011,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 5.348837209302325,
|
|
"grad_norm": 0.046281397342681885,
|
|
"learning_rate": 3.872631578947369e-06,
|
|
"loss": 0.0027,
|
|
"step": 6325
|
|
},
|
|
{
|
|
"epoch": 5.369978858350952,
|
|
"grad_norm": 0.6012808680534363,
|
|
"learning_rate": 3.846315789473685e-06,
|
|
"loss": 0.0019,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 5.391120507399577,
|
|
"grad_norm": 0.08833307027816772,
|
|
"learning_rate": 3.820000000000001e-06,
|
|
"loss": 0.0013,
|
|
"step": 6375
|
|
},
|
|
{
|
|
"epoch": 5.412262156448203,
|
|
"grad_norm": 0.02564876712858677,
|
|
"learning_rate": 3.793684210526316e-06,
|
|
"loss": 0.0021,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 5.4334038054968286,
|
|
"grad_norm": 0.06358140707015991,
|
|
"learning_rate": 3.767368421052632e-06,
|
|
"loss": 0.003,
|
|
"step": 6425
|
|
},
|
|
{
|
|
"epoch": 5.454545454545454,
|
|
"grad_norm": 0.048556044697761536,
|
|
"learning_rate": 3.7410526315789473e-06,
|
|
"loss": 0.0013,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 5.475687103594081,
|
|
"grad_norm": 0.1476634293794632,
|
|
"learning_rate": 3.714736842105263e-06,
|
|
"loss": 0.0017,
|
|
"step": 6475
|
|
},
|
|
{
|
|
"epoch": 5.496828752642706,
|
|
"grad_norm": 0.11191302537918091,
|
|
"learning_rate": 3.6884210526315794e-06,
|
|
"loss": 0.0022,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 5.517970401691332,
|
|
"grad_norm": 0.08527684956789017,
|
|
"learning_rate": 3.662105263157895e-06,
|
|
"loss": 0.0034,
|
|
"step": 6525
|
|
},
|
|
{
|
|
"epoch": 5.539112050739957,
|
|
"grad_norm": 0.04578416422009468,
|
|
"learning_rate": 3.635789473684211e-06,
|
|
"loss": 0.0028,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 5.560253699788584,
|
|
"grad_norm": 1.1695352792739868,
|
|
"learning_rate": 3.6094736842105264e-06,
|
|
"loss": 0.0017,
|
|
"step": 6575
|
|
},
|
|
{
|
|
"epoch": 5.5813953488372094,
|
|
"grad_norm": 0.02468816004693508,
|
|
"learning_rate": 3.5831578947368422e-06,
|
|
"loss": 0.0027,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 5.602536997885835,
|
|
"grad_norm": 2.5693840980529785,
|
|
"learning_rate": 3.556842105263158e-06,
|
|
"loss": 0.0038,
|
|
"step": 6625
|
|
},
|
|
{
|
|
"epoch": 5.623678646934461,
|
|
"grad_norm": 0.12754817306995392,
|
|
"learning_rate": 3.5305263157894743e-06,
|
|
"loss": 0.0047,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 5.644820295983086,
|
|
"grad_norm": 0.22026591002941132,
|
|
"learning_rate": 3.50421052631579e-06,
|
|
"loss": 0.0013,
|
|
"step": 6675
|
|
},
|
|
{
|
|
"epoch": 5.665961945031713,
|
|
"grad_norm": 0.052474796772003174,
|
|
"learning_rate": 3.4778947368421055e-06,
|
|
"loss": 0.004,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 5.687103594080338,
|
|
"grad_norm": 0.05022185668349266,
|
|
"learning_rate": 3.4515789473684213e-06,
|
|
"loss": 0.0015,
|
|
"step": 6725
|
|
},
|
|
{
|
|
"epoch": 5.708245243128964,
|
|
"grad_norm": 0.10212918370962143,
|
|
"learning_rate": 3.425263157894737e-06,
|
|
"loss": 0.0024,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 5.72938689217759,
|
|
"grad_norm": 0.09313949197530746,
|
|
"learning_rate": 3.398947368421053e-06,
|
|
"loss": 0.0048,
|
|
"step": 6775
|
|
},
|
|
{
|
|
"epoch": 5.750528541226216,
|
|
"grad_norm": 0.07640087604522705,
|
|
"learning_rate": 3.3726315789473683e-06,
|
|
"loss": 0.0026,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 5.7716701902748415,
|
|
"grad_norm": 0.05444110184907913,
|
|
"learning_rate": 3.3463157894736846e-06,
|
|
"loss": 0.0017,
|
|
"step": 6825
|
|
},
|
|
{
|
|
"epoch": 5.792811839323467,
|
|
"grad_norm": 0.03372509405016899,
|
|
"learning_rate": 3.3200000000000004e-06,
|
|
"loss": 0.0028,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 5.813953488372093,
|
|
"grad_norm": 0.04323631897568703,
|
|
"learning_rate": 3.2936842105263162e-06,
|
|
"loss": 0.0015,
|
|
"step": 6875
|
|
},
|
|
{
|
|
"epoch": 5.835095137420719,
|
|
"grad_norm": 0.6991068720817566,
|
|
"learning_rate": 3.267368421052632e-06,
|
|
"loss": 0.0027,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 5.856236786469345,
|
|
"grad_norm": 0.12359145283699036,
|
|
"learning_rate": 3.2410526315789474e-06,
|
|
"loss": 0.0039,
|
|
"step": 6925
|
|
},
|
|
{
|
|
"epoch": 5.87737843551797,
|
|
"grad_norm": 1.7748690843582153,
|
|
"learning_rate": 3.2147368421052633e-06,
|
|
"loss": 0.0021,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 5.898520084566596,
|
|
"grad_norm": 0.050521768629550934,
|
|
"learning_rate": 3.188421052631579e-06,
|
|
"loss": 0.0012,
|
|
"step": 6975
|
|
},
|
|
{
|
|
"epoch": 5.9196617336152215,
|
|
"grad_norm": 0.33487266302108765,
|
|
"learning_rate": 3.1621052631578953e-06,
|
|
"loss": 0.0013,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 5.9196617336152215,
|
|
"eval_loss": 0.05544720217585564,
|
|
"eval_runtime": 757.6669,
|
|
"eval_samples_per_second": 4.806,
|
|
"eval_steps_per_second": 0.301,
|
|
"eval_wer": 0.038563274397883894,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 5.940803382663848,
|
|
"grad_norm": 1.779146671295166,
|
|
"learning_rate": 3.135789473684211e-06,
|
|
"loss": 0.0046,
|
|
"step": 7025
|
|
},
|
|
{
|
|
"epoch": 5.961945031712474,
|
|
"grad_norm": 0.150315061211586,
|
|
"learning_rate": 3.1094736842105265e-06,
|
|
"loss": 0.0016,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 5.983086680761099,
|
|
"grad_norm": 1.2545819282531738,
|
|
"learning_rate": 3.0831578947368423e-06,
|
|
"loss": 0.0023,
|
|
"step": 7075
|
|
},
|
|
{
|
|
"epoch": 6.004228329809725,
|
|
"grad_norm": 0.5065405368804932,
|
|
"learning_rate": 3.056842105263158e-06,
|
|
"loss": 0.0017,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 6.025369978858351,
|
|
"grad_norm": 0.10508285462856293,
|
|
"learning_rate": 3.0305263157894736e-06,
|
|
"loss": 0.0015,
|
|
"step": 7125
|
|
},
|
|
{
|
|
"epoch": 6.046511627906977,
|
|
"grad_norm": 0.020149648189544678,
|
|
"learning_rate": 3.0042105263157894e-06,
|
|
"loss": 0.001,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 6.067653276955602,
|
|
"grad_norm": 0.021365733817219734,
|
|
"learning_rate": 2.9778947368421056e-06,
|
|
"loss": 0.0017,
|
|
"step": 7175
|
|
},
|
|
{
|
|
"epoch": 6.088794926004228,
|
|
"grad_norm": 0.026428379118442535,
|
|
"learning_rate": 2.9515789473684214e-06,
|
|
"loss": 0.0014,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 6.1099365750528545,
|
|
"grad_norm": 0.03086891956627369,
|
|
"learning_rate": 2.9252631578947373e-06,
|
|
"loss": 0.0011,
|
|
"step": 7225
|
|
},
|
|
{
|
|
"epoch": 6.13107822410148,
|
|
"grad_norm": 1.588483452796936,
|
|
"learning_rate": 2.8989473684210526e-06,
|
|
"loss": 0.0019,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 6.152219873150106,
|
|
"grad_norm": 1.1269233226776123,
|
|
"learning_rate": 2.8726315789473685e-06,
|
|
"loss": 0.0006,
|
|
"step": 7275
|
|
},
|
|
{
|
|
"epoch": 6.173361522198731,
|
|
"grad_norm": 0.06000743806362152,
|
|
"learning_rate": 2.8463157894736843e-06,
|
|
"loss": 0.0008,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 6.194503171247358,
|
|
"grad_norm": 0.022325266152620316,
|
|
"learning_rate": 2.82e-06,
|
|
"loss": 0.0008,
|
|
"step": 7325
|
|
},
|
|
{
|
|
"epoch": 6.215644820295983,
|
|
"grad_norm": 0.023885082453489304,
|
|
"learning_rate": 2.7936842105263163e-06,
|
|
"loss": 0.0012,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 6.236786469344609,
|
|
"grad_norm": 0.01704108528792858,
|
|
"learning_rate": 2.7673684210526317e-06,
|
|
"loss": 0.0009,
|
|
"step": 7375
|
|
},
|
|
{
|
|
"epoch": 6.2579281183932345,
|
|
"grad_norm": 0.23580221831798553,
|
|
"learning_rate": 2.7410526315789476e-06,
|
|
"loss": 0.0012,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 6.27906976744186,
|
|
"grad_norm": 0.018755685538053513,
|
|
"learning_rate": 2.7147368421052634e-06,
|
|
"loss": 0.0015,
|
|
"step": 7425
|
|
},
|
|
{
|
|
"epoch": 6.3002114164904865,
|
|
"grad_norm": 0.039177898317575455,
|
|
"learning_rate": 2.688421052631579e-06,
|
|
"loss": 0.0008,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 6.321353065539112,
|
|
"grad_norm": 1.1837704181671143,
|
|
"learning_rate": 2.6621052631578946e-06,
|
|
"loss": 0.0012,
|
|
"step": 7475
|
|
},
|
|
{
|
|
"epoch": 6.342494714587738,
|
|
"grad_norm": 0.07270597666501999,
|
|
"learning_rate": 2.635789473684211e-06,
|
|
"loss": 0.0006,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 6.363636363636363,
|
|
"grad_norm": 0.014915907755494118,
|
|
"learning_rate": 2.6094736842105267e-06,
|
|
"loss": 0.0011,
|
|
"step": 7525
|
|
},
|
|
{
|
|
"epoch": 6.38477801268499,
|
|
"grad_norm": 0.020498577505350113,
|
|
"learning_rate": 2.5831578947368425e-06,
|
|
"loss": 0.0013,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 6.405919661733615,
|
|
"grad_norm": 0.11151342839002609,
|
|
"learning_rate": 2.5568421052631583e-06,
|
|
"loss": 0.0014,
|
|
"step": 7575
|
|
},
|
|
{
|
|
"epoch": 6.427061310782241,
|
|
"grad_norm": 0.07982934266328812,
|
|
"learning_rate": 2.5305263157894737e-06,
|
|
"loss": 0.001,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 6.4482029598308666,
|
|
"grad_norm": 0.035042643547058105,
|
|
"learning_rate": 2.5042105263157895e-06,
|
|
"loss": 0.0009,
|
|
"step": 7625
|
|
},
|
|
{
|
|
"epoch": 6.469344608879492,
|
|
"grad_norm": 0.016940327361226082,
|
|
"learning_rate": 2.4778947368421053e-06,
|
|
"loss": 0.002,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 6.490486257928119,
|
|
"grad_norm": 0.017380723729729652,
|
|
"learning_rate": 2.451578947368421e-06,
|
|
"loss": 0.0006,
|
|
"step": 7675
|
|
},
|
|
{
|
|
"epoch": 6.511627906976744,
|
|
"grad_norm": 0.028508609160780907,
|
|
"learning_rate": 2.425263157894737e-06,
|
|
"loss": 0.0018,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 6.53276955602537,
|
|
"grad_norm": 0.020123794674873352,
|
|
"learning_rate": 2.3989473684210528e-06,
|
|
"loss": 0.0017,
|
|
"step": 7725
|
|
},
|
|
{
|
|
"epoch": 6.553911205073996,
|
|
"grad_norm": 0.06142396479845047,
|
|
"learning_rate": 2.3726315789473686e-06,
|
|
"loss": 0.0016,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 6.575052854122622,
|
|
"grad_norm": 0.02878367342054844,
|
|
"learning_rate": 2.3463157894736844e-06,
|
|
"loss": 0.0012,
|
|
"step": 7775
|
|
},
|
|
{
|
|
"epoch": 6.5961945031712474,
|
|
"grad_norm": 0.020016765221953392,
|
|
"learning_rate": 2.3200000000000002e-06,
|
|
"loss": 0.0023,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 6.617336152219873,
|
|
"grad_norm": 0.02607109770178795,
|
|
"learning_rate": 2.293684210526316e-06,
|
|
"loss": 0.0012,
|
|
"step": 7825
|
|
},
|
|
{
|
|
"epoch": 6.638477801268499,
|
|
"grad_norm": 0.030171332880854607,
|
|
"learning_rate": 2.267368421052632e-06,
|
|
"loss": 0.0016,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 6.659619450317125,
|
|
"grad_norm": 0.02076024003326893,
|
|
"learning_rate": 2.2410526315789473e-06,
|
|
"loss": 0.0006,
|
|
"step": 7875
|
|
},
|
|
{
|
|
"epoch": 6.680761099365751,
|
|
"grad_norm": 0.014649393036961555,
|
|
"learning_rate": 2.2147368421052635e-06,
|
|
"loss": 0.0017,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 6.701902748414376,
|
|
"grad_norm": 0.034021761268377304,
|
|
"learning_rate": 2.188421052631579e-06,
|
|
"loss": 0.0014,
|
|
"step": 7925
|
|
},
|
|
{
|
|
"epoch": 6.723044397463002,
|
|
"grad_norm": 0.01666625402867794,
|
|
"learning_rate": 2.1621052631578947e-06,
|
|
"loss": 0.0017,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 6.7441860465116275,
|
|
"grad_norm": 0.02546251006424427,
|
|
"learning_rate": 2.135789473684211e-06,
|
|
"loss": 0.0005,
|
|
"step": 7975
|
|
},
|
|
{
|
|
"epoch": 6.765327695560254,
|
|
"grad_norm": 0.39473649859428406,
|
|
"learning_rate": 2.1094736842105264e-06,
|
|
"loss": 0.002,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 6.765327695560254,
|
|
"eval_loss": 0.055500857532024384,
|
|
"eval_runtime": 764.4429,
|
|
"eval_samples_per_second": 4.763,
|
|
"eval_steps_per_second": 0.298,
|
|
"eval_wer": 0.04255417884820641,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 6.7864693446088795,
|
|
"grad_norm": 0.03211165964603424,
|
|
"learning_rate": 2.083157894736842e-06,
|
|
"loss": 0.0018,
|
|
"step": 8025
|
|
},
|
|
{
|
|
"epoch": 6.807610993657505,
|
|
"grad_norm": 0.01745191030204296,
|
|
"learning_rate": 2.056842105263158e-06,
|
|
"loss": 0.0016,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 6.828752642706131,
|
|
"grad_norm": 0.020556088536977768,
|
|
"learning_rate": 2.030526315789474e-06,
|
|
"loss": 0.0003,
|
|
"step": 8075
|
|
},
|
|
{
|
|
"epoch": 6.849894291754757,
|
|
"grad_norm": 0.01723705790936947,
|
|
"learning_rate": 2.0042105263157896e-06,
|
|
"loss": 0.0008,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 6.871035940803383,
|
|
"grad_norm": 0.02272706665098667,
|
|
"learning_rate": 1.9778947368421055e-06,
|
|
"loss": 0.0018,
|
|
"step": 8125
|
|
},
|
|
{
|
|
"epoch": 6.892177589852008,
|
|
"grad_norm": 0.015349403955042362,
|
|
"learning_rate": 1.9515789473684213e-06,
|
|
"loss": 0.0005,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 6.913319238900634,
|
|
"grad_norm": 0.9683336019515991,
|
|
"learning_rate": 1.925263157894737e-06,
|
|
"loss": 0.0008,
|
|
"step": 8175
|
|
},
|
|
{
|
|
"epoch": 6.93446088794926,
|
|
"grad_norm": 0.02504642680287361,
|
|
"learning_rate": 1.8989473684210527e-06,
|
|
"loss": 0.0022,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 6.955602536997886,
|
|
"grad_norm": 0.013240883126854897,
|
|
"learning_rate": 1.8726315789473687e-06,
|
|
"loss": 0.0004,
|
|
"step": 8225
|
|
},
|
|
{
|
|
"epoch": 6.976744186046512,
|
|
"grad_norm": 0.10656526684761047,
|
|
"learning_rate": 1.8463157894736843e-06,
|
|
"loss": 0.0008,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 6.997885835095137,
|
|
"grad_norm": 0.02900248020887375,
|
|
"learning_rate": 1.8200000000000002e-06,
|
|
"loss": 0.0015,
|
|
"step": 8275
|
|
},
|
|
{
|
|
"epoch": 7.019027484143764,
|
|
"grad_norm": 0.01494303997606039,
|
|
"learning_rate": 1.7936842105263158e-06,
|
|
"loss": 0.0007,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 7.040169133192389,
|
|
"grad_norm": 0.011092742905020714,
|
|
"learning_rate": 1.7673684210526318e-06,
|
|
"loss": 0.0006,
|
|
"step": 8325
|
|
},
|
|
{
|
|
"epoch": 7.061310782241015,
|
|
"grad_norm": 0.00914891716092825,
|
|
"learning_rate": 1.7410526315789474e-06,
|
|
"loss": 0.0005,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 7.08245243128964,
|
|
"grad_norm": 0.01832897588610649,
|
|
"learning_rate": 1.7147368421052632e-06,
|
|
"loss": 0.0002,
|
|
"step": 8375
|
|
},
|
|
{
|
|
"epoch": 7.103594080338266,
|
|
"grad_norm": 0.016141528263688087,
|
|
"learning_rate": 1.6884210526315792e-06,
|
|
"loss": 0.0004,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 7.1247357293868925,
|
|
"grad_norm": 0.01363268494606018,
|
|
"learning_rate": 1.6621052631578948e-06,
|
|
"loss": 0.0006,
|
|
"step": 8425
|
|
},
|
|
{
|
|
"epoch": 7.145877378435518,
|
|
"grad_norm": 0.7340702414512634,
|
|
"learning_rate": 1.6357894736842107e-06,
|
|
"loss": 0.0008,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 7.167019027484144,
|
|
"grad_norm": 0.01294713094830513,
|
|
"learning_rate": 1.6094736842105265e-06,
|
|
"loss": 0.0008,
|
|
"step": 8475
|
|
},
|
|
{
|
|
"epoch": 7.188160676532769,
|
|
"grad_norm": 0.009142986498773098,
|
|
"learning_rate": 1.5831578947368423e-06,
|
|
"loss": 0.0003,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 7.209302325581396,
|
|
"grad_norm": 0.017382999882102013,
|
|
"learning_rate": 1.556842105263158e-06,
|
|
"loss": 0.0009,
|
|
"step": 8525
|
|
},
|
|
{
|
|
"epoch": 7.230443974630021,
|
|
"grad_norm": 0.014677566476166248,
|
|
"learning_rate": 1.5305263157894737e-06,
|
|
"loss": 0.0002,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 7.251585623678647,
|
|
"grad_norm": 0.9460182785987854,
|
|
"learning_rate": 1.5042105263157898e-06,
|
|
"loss": 0.0014,
|
|
"step": 8575
|
|
},
|
|
{
|
|
"epoch": 7.2727272727272725,
|
|
"grad_norm": 0.9913358688354492,
|
|
"learning_rate": 1.4778947368421054e-06,
|
|
"loss": 0.0009,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 7.293868921775898,
|
|
"grad_norm": 0.017043571919202805,
|
|
"learning_rate": 1.4515789473684212e-06,
|
|
"loss": 0.0007,
|
|
"step": 8625
|
|
},
|
|
{
|
|
"epoch": 7.3150105708245245,
|
|
"grad_norm": 0.010510086081922054,
|
|
"learning_rate": 1.425263157894737e-06,
|
|
"loss": 0.0004,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 7.33615221987315,
|
|
"grad_norm": 0.015979068353772163,
|
|
"learning_rate": 1.3989473684210528e-06,
|
|
"loss": 0.0004,
|
|
"step": 8675
|
|
},
|
|
{
|
|
"epoch": 7.357293868921776,
|
|
"grad_norm": 0.017196275293827057,
|
|
"learning_rate": 1.3726315789473684e-06,
|
|
"loss": 0.0005,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 7.378435517970401,
|
|
"grad_norm": 0.009500819258391857,
|
|
"learning_rate": 1.3463157894736842e-06,
|
|
"loss": 0.0002,
|
|
"step": 8725
|
|
},
|
|
{
|
|
"epoch": 7.399577167019028,
|
|
"grad_norm": 0.019156746566295624,
|
|
"learning_rate": 1.32e-06,
|
|
"loss": 0.0016,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 7.420718816067653,
|
|
"grad_norm": 0.010634716600179672,
|
|
"learning_rate": 1.2936842105263159e-06,
|
|
"loss": 0.0012,
|
|
"step": 8775
|
|
},
|
|
{
|
|
"epoch": 7.441860465116279,
|
|
"grad_norm": 0.02140488103032112,
|
|
"learning_rate": 1.2673684210526315e-06,
|
|
"loss": 0.0006,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 7.4630021141649046,
|
|
"grad_norm": 0.01485748216509819,
|
|
"learning_rate": 1.2410526315789475e-06,
|
|
"loss": 0.0006,
|
|
"step": 8825
|
|
},
|
|
{
|
|
"epoch": 7.484143763213531,
|
|
"grad_norm": 0.015150833874940872,
|
|
"learning_rate": 1.2147368421052633e-06,
|
|
"loss": 0.0009,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 7.505285412262157,
|
|
"grad_norm": 0.018471376970410347,
|
|
"learning_rate": 1.188421052631579e-06,
|
|
"loss": 0.0006,
|
|
"step": 8875
|
|
},
|
|
{
|
|
"epoch": 7.526427061310782,
|
|
"grad_norm": 0.011308133602142334,
|
|
"learning_rate": 1.1621052631578948e-06,
|
|
"loss": 0.0005,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 7.547568710359408,
|
|
"grad_norm": 0.01056403573602438,
|
|
"learning_rate": 1.1357894736842106e-06,
|
|
"loss": 0.0014,
|
|
"step": 8925
|
|
},
|
|
{
|
|
"epoch": 7.568710359408033,
|
|
"grad_norm": 0.013321136124432087,
|
|
"learning_rate": 1.1094736842105264e-06,
|
|
"loss": 0.0009,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 7.58985200845666,
|
|
"grad_norm": 0.01472330279648304,
|
|
"learning_rate": 1.0831578947368422e-06,
|
|
"loss": 0.0005,
|
|
"step": 8975
|
|
},
|
|
{
|
|
"epoch": 7.6109936575052854,
|
|
"grad_norm": 0.01450197771191597,
|
|
"learning_rate": 1.0568421052631578e-06,
|
|
"loss": 0.0002,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 7.6109936575052854,
|
|
"eval_loss": 0.05712108314037323,
|
|
"eval_runtime": 762.8356,
|
|
"eval_samples_per_second": 4.773,
|
|
"eval_steps_per_second": 0.299,
|
|
"eval_wer": 0.042136526056893595,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 7.632135306553911,
|
|
"grad_norm": 0.014398843050003052,
|
|
"learning_rate": 1.0305263157894739e-06,
|
|
"loss": 0.0006,
|
|
"step": 9025
|
|
},
|
|
{
|
|
"epoch": 7.653276955602537,
|
|
"grad_norm": 0.035577815026044846,
|
|
"learning_rate": 1.0042105263157897e-06,
|
|
"loss": 0.0004,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 7.674418604651163,
|
|
"grad_norm": 0.012484509497880936,
|
|
"learning_rate": 9.778947368421053e-07,
|
|
"loss": 0.0003,
|
|
"step": 9075
|
|
},
|
|
{
|
|
"epoch": 7.695560253699789,
|
|
"grad_norm": 0.015736937522888184,
|
|
"learning_rate": 9.515789473684212e-07,
|
|
"loss": 0.0013,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 7.716701902748414,
|
|
"grad_norm": 0.02214883267879486,
|
|
"learning_rate": 9.252631578947368e-07,
|
|
"loss": 0.0008,
|
|
"step": 9125
|
|
},
|
|
{
|
|
"epoch": 7.73784355179704,
|
|
"grad_norm": 0.015202338807284832,
|
|
"learning_rate": 8.989473684210527e-07,
|
|
"loss": 0.0008,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 7.758985200845666,
|
|
"grad_norm": 0.01783289574086666,
|
|
"learning_rate": 8.726315789473686e-07,
|
|
"loss": 0.0002,
|
|
"step": 9175
|
|
},
|
|
{
|
|
"epoch": 7.780126849894292,
|
|
"grad_norm": 0.01469349954277277,
|
|
"learning_rate": 8.463157894736843e-07,
|
|
"loss": 0.0007,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 7.8012684989429175,
|
|
"grad_norm": 0.014686803333461285,
|
|
"learning_rate": 8.200000000000001e-07,
|
|
"loss": 0.0005,
|
|
"step": 9225
|
|
},
|
|
{
|
|
"epoch": 7.822410147991543,
|
|
"grad_norm": 0.012407947331666946,
|
|
"learning_rate": 7.936842105263158e-07,
|
|
"loss": 0.0004,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 7.843551797040169,
|
|
"grad_norm": 0.12860046327114105,
|
|
"learning_rate": 7.673684210526316e-07,
|
|
"loss": 0.0002,
|
|
"step": 9275
|
|
},
|
|
{
|
|
"epoch": 7.864693446088795,
|
|
"grad_norm": 0.007339488714933395,
|
|
"learning_rate": 7.410526315789475e-07,
|
|
"loss": 0.0007,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 7.885835095137421,
|
|
"grad_norm": 0.10270128399133682,
|
|
"learning_rate": 7.147368421052632e-07,
|
|
"loss": 0.0007,
|
|
"step": 9325
|
|
},
|
|
{
|
|
"epoch": 7.906976744186046,
|
|
"grad_norm": 0.010488270781934261,
|
|
"learning_rate": 6.884210526315791e-07,
|
|
"loss": 0.0013,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 7.928118393234672,
|
|
"grad_norm": 0.015831220895051956,
|
|
"learning_rate": 6.621052631578948e-07,
|
|
"loss": 0.0002,
|
|
"step": 9375
|
|
},
|
|
{
|
|
"epoch": 7.949260042283298,
|
|
"grad_norm": 0.0610765777528286,
|
|
"learning_rate": 6.357894736842106e-07,
|
|
"loss": 0.0005,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 7.970401691331924,
|
|
"grad_norm": 0.01717539131641388,
|
|
"learning_rate": 6.094736842105263e-07,
|
|
"loss": 0.0002,
|
|
"step": 9425
|
|
},
|
|
{
|
|
"epoch": 7.99154334038055,
|
|
"grad_norm": 0.016577402129769325,
|
|
"learning_rate": 5.831578947368421e-07,
|
|
"loss": 0.0009,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 8.012684989429175,
|
|
"grad_norm": 0.007952416315674782,
|
|
"learning_rate": 5.56842105263158e-07,
|
|
"loss": 0.0005,
|
|
"step": 9475
|
|
},
|
|
{
|
|
"epoch": 8.033826638477802,
|
|
"grad_norm": 0.0123605253174901,
|
|
"learning_rate": 5.305263157894737e-07,
|
|
"loss": 0.0002,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 8.054968287526426,
|
|
"grad_norm": 0.009480569511651993,
|
|
"learning_rate": 5.042105263157895e-07,
|
|
"loss": 0.0002,
|
|
"step": 9525
|
|
},
|
|
{
|
|
"epoch": 8.076109936575053,
|
|
"grad_norm": 0.7348806262016296,
|
|
"learning_rate": 4.778947368421053e-07,
|
|
"loss": 0.0003,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 8.09725158562368,
|
|
"grad_norm": 0.011551867239177227,
|
|
"learning_rate": 4.5157894736842107e-07,
|
|
"loss": 0.0002,
|
|
"step": 9575
|
|
},
|
|
{
|
|
"epoch": 8.118393234672304,
|
|
"grad_norm": 0.010176840238273144,
|
|
"learning_rate": 4.2526315789473684e-07,
|
|
"loss": 0.0002,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 8.13953488372093,
|
|
"grad_norm": 0.021270159631967545,
|
|
"learning_rate": 3.9894736842105266e-07,
|
|
"loss": 0.0004,
|
|
"step": 9625
|
|
},
|
|
{
|
|
"epoch": 8.160676532769555,
|
|
"grad_norm": 0.006264516618102789,
|
|
"learning_rate": 3.726315789473685e-07,
|
|
"loss": 0.0003,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 8.181818181818182,
|
|
"grad_norm": 0.013031111098825932,
|
|
"learning_rate": 3.4631578947368424e-07,
|
|
"loss": 0.0005,
|
|
"step": 9675
|
|
},
|
|
{
|
|
"epoch": 8.202959830866808,
|
|
"grad_norm": 0.011172004975378513,
|
|
"learning_rate": 3.2e-07,
|
|
"loss": 0.0002,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 8.224101479915433,
|
|
"grad_norm": 0.01253934670239687,
|
|
"learning_rate": 2.936842105263158e-07,
|
|
"loss": 0.0006,
|
|
"step": 9725
|
|
},
|
|
{
|
|
"epoch": 8.24524312896406,
|
|
"grad_norm": 0.012547343969345093,
|
|
"learning_rate": 2.6736842105263164e-07,
|
|
"loss": 0.0002,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 8.266384778012686,
|
|
"grad_norm": 0.011723175644874573,
|
|
"learning_rate": 2.410526315789474e-07,
|
|
"loss": 0.0003,
|
|
"step": 9775
|
|
},
|
|
{
|
|
"epoch": 8.28752642706131,
|
|
"grad_norm": 0.014090826734900475,
|
|
"learning_rate": 2.1473684210526317e-07,
|
|
"loss": 0.0002,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 8.308668076109937,
|
|
"grad_norm": 0.011757822707295418,
|
|
"learning_rate": 1.8842105263157897e-07,
|
|
"loss": 0.0002,
|
|
"step": 9825
|
|
},
|
|
{
|
|
"epoch": 8.329809725158562,
|
|
"grad_norm": 0.014979742467403412,
|
|
"learning_rate": 1.6210526315789476e-07,
|
|
"loss": 0.0008,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 8.350951374207188,
|
|
"grad_norm": 0.01544391643255949,
|
|
"learning_rate": 1.3578947368421055e-07,
|
|
"loss": 0.0004,
|
|
"step": 9875
|
|
},
|
|
{
|
|
"epoch": 8.372093023255815,
|
|
"grad_norm": 0.009578757919371128,
|
|
"learning_rate": 1.0947368421052632e-07,
|
|
"loss": 0.0002,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 8.39323467230444,
|
|
"grad_norm": 0.007570895832031965,
|
|
"learning_rate": 8.315789473684211e-08,
|
|
"loss": 0.0003,
|
|
"step": 9925
|
|
},
|
|
{
|
|
"epoch": 8.414376321353066,
|
|
"grad_norm": 0.014304804615676403,
|
|
"learning_rate": 5.68421052631579e-08,
|
|
"loss": 0.0008,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 8.43551797040169,
|
|
"grad_norm": 0.9690393209457397,
|
|
"learning_rate": 3.0526315789473686e-08,
|
|
"loss": 0.0004,
|
|
"step": 9975
|
|
},
|
|
{
|
|
"epoch": 8.456659619450317,
|
|
"grad_norm": 0.025486772879958153,
|
|
"learning_rate": 4.210526315789474e-09,
|
|
"loss": 0.0005,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 8.456659619450317,
|
|
"eval_loss": 0.057420697063207626,
|
|
"eval_runtime": 765.3675,
|
|
"eval_samples_per_second": 4.757,
|
|
"eval_steps_per_second": 0.298,
|
|
"eval_wer": 0.04241496125110214,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 8.456659619450317,
|
|
"step": 10000,
|
|
"total_flos": 3.264874274960179e+20,
|
|
"train_loss": 0.03444647903675213,
|
|
"train_runtime": 65926.1076,
|
|
"train_samples_per_second": 4.854,
|
|
"train_steps_per_second": 0.152
|
|
}
|
|
],
|
|
"logging_steps": 25,
|
|
"max_steps": 10000,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 9,
|
|
"save_steps": 1000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3.264874274960179e+20,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|