1517 lines
36 KiB
JSON
1517 lines
36 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 76.49968454258675,
|
||
|
|
"global_step": 40392,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"learning_rate": 0.0002971500949968334,
|
||
|
|
"loss": 28.5848,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"eval_cer": 1.0,
|
||
|
|
"eval_loss": 6.502501964569092,
|
||
|
|
"eval_runtime": 11.659,
|
||
|
|
"eval_samples_per_second": 44.344,
|
||
|
|
"eval_steps_per_second": 5.575,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.01,
|
||
|
|
"learning_rate": 0.000293350221659278,
|
||
|
|
"loss": 6.3672,
|
||
|
|
"step": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.01,
|
||
|
|
"eval_cer": 1.0,
|
||
|
|
"eval_loss": 6.260684490203857,
|
||
|
|
"eval_runtime": 10.5591,
|
||
|
|
"eval_samples_per_second": 48.963,
|
||
|
|
"eval_steps_per_second": 6.156,
|
||
|
|
"step": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.51,
|
||
|
|
"learning_rate": 0.0002895503483217226,
|
||
|
|
"loss": 6.2055,
|
||
|
|
"step": 1200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.51,
|
||
|
|
"eval_cer": 1.0,
|
||
|
|
"eval_loss": 6.158237934112549,
|
||
|
|
"eval_runtime": 10.6391,
|
||
|
|
"eval_samples_per_second": 48.594,
|
||
|
|
"eval_steps_per_second": 6.11,
|
||
|
|
"step": 1200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.02,
|
||
|
|
"learning_rate": 0.00028575047498416716,
|
||
|
|
"loss": 5.7804,
|
||
|
|
"step": 1600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.02,
|
||
|
|
"eval_cer": 0.9059449866903283,
|
||
|
|
"eval_loss": 4.536555767059326,
|
||
|
|
"eval_runtime": 10.5927,
|
||
|
|
"eval_samples_per_second": 48.807,
|
||
|
|
"eval_steps_per_second": 6.136,
|
||
|
|
"step": 1600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.52,
|
||
|
|
"learning_rate": 0.00028195060164661176,
|
||
|
|
"loss": 3.9161,
|
||
|
|
"step": 2000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.52,
|
||
|
|
"eval_cer": 0.8118899733806566,
|
||
|
|
"eval_loss": 3.134929895401001,
|
||
|
|
"eval_runtime": 10.6118,
|
||
|
|
"eval_samples_per_second": 48.72,
|
||
|
|
"eval_steps_per_second": 6.125,
|
||
|
|
"step": 2000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.03,
|
||
|
|
"learning_rate": 0.00027815072830905636,
|
||
|
|
"loss": 3.0555,
|
||
|
|
"step": 2400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.03,
|
||
|
|
"eval_cer": 0.7320319432120674,
|
||
|
|
"eval_loss": 2.483675241470337,
|
||
|
|
"eval_runtime": 10.6525,
|
||
|
|
"eval_samples_per_second": 48.533,
|
||
|
|
"eval_steps_per_second": 6.102,
|
||
|
|
"step": 2400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.54,
|
||
|
|
"learning_rate": 0.00027435085497150096,
|
||
|
|
"loss": 2.4529,
|
||
|
|
"step": 2800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.54,
|
||
|
|
"eval_cer": 0.6789707187222715,
|
||
|
|
"eval_loss": 2.2075281143188477,
|
||
|
|
"eval_runtime": 10.7002,
|
||
|
|
"eval_samples_per_second": 48.317,
|
||
|
|
"eval_steps_per_second": 6.075,
|
||
|
|
"step": 2800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.04,
|
||
|
|
"learning_rate": 0.0002705509816339455,
|
||
|
|
"loss": 2.1371,
|
||
|
|
"step": 3200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.04,
|
||
|
|
"eval_cer": 0.5902395740905058,
|
||
|
|
"eval_loss": 1.737073302268982,
|
||
|
|
"eval_runtime": 10.7189,
|
||
|
|
"eval_samples_per_second": 48.232,
|
||
|
|
"eval_steps_per_second": 6.064,
|
||
|
|
"step": 3200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.55,
|
||
|
|
"learning_rate": 0.0002667511082963901,
|
||
|
|
"loss": 1.83,
|
||
|
|
"step": 3600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.55,
|
||
|
|
"eval_cer": 0.5771073646850045,
|
||
|
|
"eval_loss": 1.6357048749923706,
|
||
|
|
"eval_runtime": 10.6531,
|
||
|
|
"eval_samples_per_second": 48.53,
|
||
|
|
"eval_steps_per_second": 6.102,
|
||
|
|
"step": 3600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.05,
|
||
|
|
"learning_rate": 0.0002629512349588347,
|
||
|
|
"loss": 1.7147,
|
||
|
|
"step": 4000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.05,
|
||
|
|
"eval_cer": 0.540550133096717,
|
||
|
|
"eval_loss": 1.4679120779037476,
|
||
|
|
"eval_runtime": 10.6485,
|
||
|
|
"eval_samples_per_second": 48.551,
|
||
|
|
"eval_steps_per_second": 6.104,
|
||
|
|
"step": 4000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.56,
|
||
|
|
"learning_rate": 0.00025915136162127925,
|
||
|
|
"loss": 1.5428,
|
||
|
|
"step": 4400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.56,
|
||
|
|
"eval_cer": 0.5210292812777285,
|
||
|
|
"eval_loss": 1.4024958610534668,
|
||
|
|
"eval_runtime": 11.1389,
|
||
|
|
"eval_samples_per_second": 46.414,
|
||
|
|
"eval_steps_per_second": 5.835,
|
||
|
|
"step": 4400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.06,
|
||
|
|
"learning_rate": 0.00025535148828372385,
|
||
|
|
"loss": 1.4859,
|
||
|
|
"step": 4800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.06,
|
||
|
|
"eval_cer": 0.5094942324755989,
|
||
|
|
"eval_loss": 1.3681739568710327,
|
||
|
|
"eval_runtime": 10.7225,
|
||
|
|
"eval_samples_per_second": 48.217,
|
||
|
|
"eval_steps_per_second": 6.062,
|
||
|
|
"step": 4800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.57,
|
||
|
|
"learning_rate": 0.00025155161494616845,
|
||
|
|
"loss": 1.359,
|
||
|
|
"step": 5200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.57,
|
||
|
|
"eval_cer": 0.49955634427684115,
|
||
|
|
"eval_loss": 1.3149375915527344,
|
||
|
|
"eval_runtime": 10.7188,
|
||
|
|
"eval_samples_per_second": 48.233,
|
||
|
|
"eval_steps_per_second": 6.064,
|
||
|
|
"step": 5200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.07,
|
||
|
|
"learning_rate": 0.00024775174160861305,
|
||
|
|
"loss": 1.3425,
|
||
|
|
"step": 5600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.07,
|
||
|
|
"eval_cer": 0.4913930789707187,
|
||
|
|
"eval_loss": 1.3069249391555786,
|
||
|
|
"eval_runtime": 10.672,
|
||
|
|
"eval_samples_per_second": 48.444,
|
||
|
|
"eval_steps_per_second": 6.091,
|
||
|
|
"step": 5600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.58,
|
||
|
|
"learning_rate": 0.00024395186827105763,
|
||
|
|
"loss": 1.2121,
|
||
|
|
"step": 6000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.58,
|
||
|
|
"eval_cer": 0.49032830523513754,
|
||
|
|
"eval_loss": 1.284098744392395,
|
||
|
|
"eval_runtime": 10.6314,
|
||
|
|
"eval_samples_per_second": 48.629,
|
||
|
|
"eval_steps_per_second": 6.114,
|
||
|
|
"step": 6000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 8.08,
|
||
|
|
"learning_rate": 0.0002401519949335022,
|
||
|
|
"loss": 1.1872,
|
||
|
|
"step": 6400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 8.08,
|
||
|
|
"eval_cer": 0.4727595385980479,
|
||
|
|
"eval_loss": 1.2425189018249512,
|
||
|
|
"eval_runtime": 10.4012,
|
||
|
|
"eval_samples_per_second": 49.706,
|
||
|
|
"eval_steps_per_second": 6.249,
|
||
|
|
"step": 6400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 8.59,
|
||
|
|
"learning_rate": 0.00023635212159594677,
|
||
|
|
"loss": 1.0969,
|
||
|
|
"step": 6800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 8.59,
|
||
|
|
"eval_cer": 0.47346938775510206,
|
||
|
|
"eval_loss": 1.2218185663223267,
|
||
|
|
"eval_runtime": 10.6834,
|
||
|
|
"eval_samples_per_second": 48.393,
|
||
|
|
"eval_steps_per_second": 6.084,
|
||
|
|
"step": 6800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 9.09,
|
||
|
|
"learning_rate": 0.00023255224825839138,
|
||
|
|
"loss": 1.0807,
|
||
|
|
"step": 7200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 9.09,
|
||
|
|
"eval_cer": 0.4603371783496007,
|
||
|
|
"eval_loss": 1.2110862731933594,
|
||
|
|
"eval_runtime": 10.7448,
|
||
|
|
"eval_samples_per_second": 48.116,
|
||
|
|
"eval_steps_per_second": 6.049,
|
||
|
|
"step": 7200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 9.6,
|
||
|
|
"learning_rate": 0.00022875237492083595,
|
||
|
|
"loss": 0.9964,
|
||
|
|
"step": 7600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 9.6,
|
||
|
|
"eval_cer": 0.44986690328305234,
|
||
|
|
"eval_loss": 1.1391839981079102,
|
||
|
|
"eval_runtime": 10.7437,
|
||
|
|
"eval_samples_per_second": 48.121,
|
||
|
|
"eval_steps_per_second": 6.05,
|
||
|
|
"step": 7600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 10.1,
|
||
|
|
"learning_rate": 0.00022495250158328055,
|
||
|
|
"loss": 0.9758,
|
||
|
|
"step": 8000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 10.1,
|
||
|
|
"eval_cer": 0.4433007985803017,
|
||
|
|
"eval_loss": 1.115509033203125,
|
||
|
|
"eval_runtime": 10.6429,
|
||
|
|
"eval_samples_per_second": 48.577,
|
||
|
|
"eval_steps_per_second": 6.107,
|
||
|
|
"step": 8000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 10.61,
|
||
|
|
"learning_rate": 0.00022115262824572512,
|
||
|
|
"loss": 0.8896,
|
||
|
|
"step": 8400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 10.61,
|
||
|
|
"eval_cer": 0.4456078083407276,
|
||
|
|
"eval_loss": 1.134329080581665,
|
||
|
|
"eval_runtime": 10.6834,
|
||
|
|
"eval_samples_per_second": 48.393,
|
||
|
|
"eval_steps_per_second": 6.084,
|
||
|
|
"step": 8400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 11.11,
|
||
|
|
"learning_rate": 0.00021735275490816972,
|
||
|
|
"loss": 0.869,
|
||
|
|
"step": 8800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 11.11,
|
||
|
|
"eval_cer": 0.4413487133984028,
|
||
|
|
"eval_loss": 1.1351521015167236,
|
||
|
|
"eval_runtime": 10.6859,
|
||
|
|
"eval_samples_per_second": 48.381,
|
||
|
|
"eval_steps_per_second": 6.083,
|
||
|
|
"step": 8800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 11.62,
|
||
|
|
"learning_rate": 0.0002135528815706143,
|
||
|
|
"loss": 0.8204,
|
||
|
|
"step": 9200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 11.62,
|
||
|
|
"eval_cer": 0.4431233362910382,
|
||
|
|
"eval_loss": 1.1095759868621826,
|
||
|
|
"eval_runtime": 10.6541,
|
||
|
|
"eval_samples_per_second": 48.526,
|
||
|
|
"eval_steps_per_second": 6.101,
|
||
|
|
"step": 9200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 12.12,
|
||
|
|
"learning_rate": 0.00020975300823305887,
|
||
|
|
"loss": 0.7935,
|
||
|
|
"step": 9600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 12.12,
|
||
|
|
"eval_cer": 0.4427684117125111,
|
||
|
|
"eval_loss": 1.1288646459579468,
|
||
|
|
"eval_runtime": 10.7946,
|
||
|
|
"eval_samples_per_second": 47.894,
|
||
|
|
"eval_steps_per_second": 6.022,
|
||
|
|
"step": 9600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 12.63,
|
||
|
|
"learning_rate": 0.00020595313489550347,
|
||
|
|
"loss": 0.728,
|
||
|
|
"step": 10000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 12.63,
|
||
|
|
"eval_cer": 0.4321206743566992,
|
||
|
|
"eval_loss": 1.086965799331665,
|
||
|
|
"eval_runtime": 10.816,
|
||
|
|
"eval_samples_per_second": 47.8,
|
||
|
|
"eval_steps_per_second": 6.01,
|
||
|
|
"step": 10000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 13.13,
|
||
|
|
"learning_rate": 0.00020215326155794804,
|
||
|
|
"loss": 0.7185,
|
||
|
|
"step": 10400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 13.13,
|
||
|
|
"eval_cer": 0.42058562555456963,
|
||
|
|
"eval_loss": 1.0575684309005737,
|
||
|
|
"eval_runtime": 10.6718,
|
||
|
|
"eval_samples_per_second": 48.446,
|
||
|
|
"eval_steps_per_second": 6.091,
|
||
|
|
"step": 10400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 13.64,
|
||
|
|
"learning_rate": 0.00019835338822039264,
|
||
|
|
"loss": 0.6604,
|
||
|
|
"step": 10800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 13.64,
|
||
|
|
"eval_cer": 0.4262644188110027,
|
||
|
|
"eval_loss": 1.0773364305496216,
|
||
|
|
"eval_runtime": 10.6968,
|
||
|
|
"eval_samples_per_second": 48.332,
|
||
|
|
"eval_steps_per_second": 6.077,
|
||
|
|
"step": 10800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 14.14,
|
||
|
|
"learning_rate": 0.00019455351488283722,
|
||
|
|
"loss": 0.6319,
|
||
|
|
"step": 11200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 14.14,
|
||
|
|
"eval_cer": 0.41543921916592724,
|
||
|
|
"eval_loss": 1.0636992454528809,
|
||
|
|
"eval_runtime": 10.6335,
|
||
|
|
"eval_samples_per_second": 48.62,
|
||
|
|
"eval_steps_per_second": 6.113,
|
||
|
|
"step": 11200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 14.65,
|
||
|
|
"learning_rate": 0.00019075364154528182,
|
||
|
|
"loss": 0.5949,
|
||
|
|
"step": 11600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 14.65,
|
||
|
|
"eval_cer": 0.41774622892635316,
|
||
|
|
"eval_loss": 1.0470980405807495,
|
||
|
|
"eval_runtime": 10.6966,
|
||
|
|
"eval_samples_per_second": 48.333,
|
||
|
|
"eval_steps_per_second": 6.077,
|
||
|
|
"step": 11600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 15.15,
|
||
|
|
"learning_rate": 0.0001869537682077264,
|
||
|
|
"loss": 0.5729,
|
||
|
|
"step": 12000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 15.15,
|
||
|
|
"eval_cer": 0.4111801242236025,
|
||
|
|
"eval_loss": 1.069692850112915,
|
||
|
|
"eval_runtime": 10.6729,
|
||
|
|
"eval_samples_per_second": 48.44,
|
||
|
|
"eval_steps_per_second": 6.09,
|
||
|
|
"step": 12000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 15.66,
|
||
|
|
"learning_rate": 0.000183153894870171,
|
||
|
|
"loss": 0.5408,
|
||
|
|
"step": 12400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 15.66,
|
||
|
|
"eval_cer": 0.4157941437444543,
|
||
|
|
"eval_loss": 1.042482614517212,
|
||
|
|
"eval_runtime": 10.665,
|
||
|
|
"eval_samples_per_second": 48.476,
|
||
|
|
"eval_steps_per_second": 6.095,
|
||
|
|
"step": 12400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 16.16,
|
||
|
|
"learning_rate": 0.00017935402153261557,
|
||
|
|
"loss": 0.5246,
|
||
|
|
"step": 12800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 16.16,
|
||
|
|
"eval_cer": 0.4085181898846495,
|
||
|
|
"eval_loss": 1.0480538606643677,
|
||
|
|
"eval_runtime": 10.7079,
|
||
|
|
"eval_samples_per_second": 48.282,
|
||
|
|
"eval_steps_per_second": 6.07,
|
||
|
|
"step": 12800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 16.67,
|
||
|
|
"learning_rate": 0.00017555414819506014,
|
||
|
|
"loss": 0.4757,
|
||
|
|
"step": 13200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 16.67,
|
||
|
|
"eval_cer": 0.4065661047027507,
|
||
|
|
"eval_loss": 1.0319401025772095,
|
||
|
|
"eval_runtime": 10.661,
|
||
|
|
"eval_samples_per_second": 48.494,
|
||
|
|
"eval_steps_per_second": 6.097,
|
||
|
|
"step": 13200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 17.17,
|
||
|
|
"learning_rate": 0.00017175427485750474,
|
||
|
|
"loss": 0.4694,
|
||
|
|
"step": 13600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 17.17,
|
||
|
|
"eval_cer": 0.402661934338953,
|
||
|
|
"eval_loss": 1.0221748352050781,
|
||
|
|
"eval_runtime": 10.6738,
|
||
|
|
"eval_samples_per_second": 48.436,
|
||
|
|
"eval_steps_per_second": 6.09,
|
||
|
|
"step": 13600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 17.68,
|
||
|
|
"learning_rate": 0.0001679544015199493,
|
||
|
|
"loss": 0.4514,
|
||
|
|
"step": 14000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 17.68,
|
||
|
|
"eval_cer": 0.4010647737355812,
|
||
|
|
"eval_loss": 1.0336159467697144,
|
||
|
|
"eval_runtime": 10.6673,
|
||
|
|
"eval_samples_per_second": 48.466,
|
||
|
|
"eval_steps_per_second": 6.093,
|
||
|
|
"step": 14000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 18.18,
|
||
|
|
"learning_rate": 0.00016415452818239391,
|
||
|
|
"loss": 0.4479,
|
||
|
|
"step": 14400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 18.18,
|
||
|
|
"eval_cer": 0.40301685891748,
|
||
|
|
"eval_loss": 1.0329766273498535,
|
||
|
|
"eval_runtime": 11.2617,
|
||
|
|
"eval_samples_per_second": 45.908,
|
||
|
|
"eval_steps_per_second": 5.772,
|
||
|
|
"step": 14400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 18.69,
|
||
|
|
"learning_rate": 0.0001603546548448385,
|
||
|
|
"loss": 0.4206,
|
||
|
|
"step": 14800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 18.69,
|
||
|
|
"eval_cer": 0.3953859804791482,
|
||
|
|
"eval_loss": 1.0453214645385742,
|
||
|
|
"eval_runtime": 10.4709,
|
||
|
|
"eval_samples_per_second": 49.375,
|
||
|
|
"eval_steps_per_second": 6.208,
|
||
|
|
"step": 14800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 19.19,
|
||
|
|
"learning_rate": 0.0001565547815072831,
|
||
|
|
"loss": 0.4025,
|
||
|
|
"step": 15200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 19.19,
|
||
|
|
"eval_cer": 0.4,
|
||
|
|
"eval_loss": 1.0425928831100464,
|
||
|
|
"eval_runtime": 10.7087,
|
||
|
|
"eval_samples_per_second": 48.278,
|
||
|
|
"eval_steps_per_second": 6.07,
|
||
|
|
"step": 15200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 19.7,
|
||
|
|
"learning_rate": 0.00015275490816972766,
|
||
|
|
"loss": 0.368,
|
||
|
|
"step": 15600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 19.7,
|
||
|
|
"eval_cer": 0.391659272404614,
|
||
|
|
"eval_loss": 1.0207164287567139,
|
||
|
|
"eval_runtime": 10.853,
|
||
|
|
"eval_samples_per_second": 47.637,
|
||
|
|
"eval_steps_per_second": 5.989,
|
||
|
|
"step": 15600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 20.2,
|
||
|
|
"learning_rate": 0.00014895503483217226,
|
||
|
|
"loss": 0.3652,
|
||
|
|
"step": 16000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 20.2,
|
||
|
|
"eval_cer": 0.3877551020408163,
|
||
|
|
"eval_loss": 1.019087791442871,
|
||
|
|
"eval_runtime": 10.7301,
|
||
|
|
"eval_samples_per_second": 48.182,
|
||
|
|
"eval_steps_per_second": 6.058,
|
||
|
|
"step": 16000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 20.71,
|
||
|
|
"learning_rate": 0.00014515516149461683,
|
||
|
|
"loss": 0.3362,
|
||
|
|
"step": 16400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 20.71,
|
||
|
|
"eval_cer": 0.38846495119787045,
|
||
|
|
"eval_loss": 1.0187304019927979,
|
||
|
|
"eval_runtime": 10.6995,
|
||
|
|
"eval_samples_per_second": 48.32,
|
||
|
|
"eval_steps_per_second": 6.075,
|
||
|
|
"step": 16400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 21.21,
|
||
|
|
"learning_rate": 0.0001413552881570614,
|
||
|
|
"loss": 0.354,
|
||
|
|
"step": 16800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 21.21,
|
||
|
|
"eval_cer": 0.3881100266193434,
|
||
|
|
"eval_loss": 1.0370773077011108,
|
||
|
|
"eval_runtime": 10.6833,
|
||
|
|
"eval_samples_per_second": 48.393,
|
||
|
|
"eval_steps_per_second": 6.084,
|
||
|
|
"step": 16800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 21.72,
|
||
|
|
"learning_rate": 0.000137555414819506,
|
||
|
|
"loss": 0.3296,
|
||
|
|
"step": 17200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 21.72,
|
||
|
|
"eval_cer": 0.3893522626441881,
|
||
|
|
"eval_loss": 1.0535281896591187,
|
||
|
|
"eval_runtime": 10.7771,
|
||
|
|
"eval_samples_per_second": 47.972,
|
||
|
|
"eval_steps_per_second": 6.031,
|
||
|
|
"step": 17200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 22.22,
|
||
|
|
"learning_rate": 0.00013375554148195058,
|
||
|
|
"loss": 0.3134,
|
||
|
|
"step": 17600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 22.22,
|
||
|
|
"eval_cer": 0.3877551020408163,
|
||
|
|
"eval_loss": 1.0371551513671875,
|
||
|
|
"eval_runtime": 10.68,
|
||
|
|
"eval_samples_per_second": 48.408,
|
||
|
|
"eval_steps_per_second": 6.086,
|
||
|
|
"step": 17600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 22.73,
|
||
|
|
"learning_rate": 0.00012995566814439518,
|
||
|
|
"loss": 0.3077,
|
||
|
|
"step": 18000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 22.73,
|
||
|
|
"eval_cer": 0.39077196095829636,
|
||
|
|
"eval_loss": 1.0353987216949463,
|
||
|
|
"eval_runtime": 10.4328,
|
||
|
|
"eval_samples_per_second": 49.555,
|
||
|
|
"eval_steps_per_second": 6.23,
|
||
|
|
"step": 18000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 23.23,
|
||
|
|
"learning_rate": 0.00012615579480683976,
|
||
|
|
"loss": 0.289,
|
||
|
|
"step": 18400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 23.23,
|
||
|
|
"eval_cer": 0.3934338952972493,
|
||
|
|
"eval_loss": 1.0498236417770386,
|
||
|
|
"eval_runtime": 10.6646,
|
||
|
|
"eval_samples_per_second": 48.478,
|
||
|
|
"eval_steps_per_second": 6.095,
|
||
|
|
"step": 18400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 23.74,
|
||
|
|
"learning_rate": 0.00012235592146928436,
|
||
|
|
"loss": 0.2753,
|
||
|
|
"step": 18800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 23.74,
|
||
|
|
"eval_cer": 0.39023957409050575,
|
||
|
|
"eval_loss": 1.0461602210998535,
|
||
|
|
"eval_runtime": 10.685,
|
||
|
|
"eval_samples_per_second": 48.386,
|
||
|
|
"eval_steps_per_second": 6.083,
|
||
|
|
"step": 18800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 24.24,
|
||
|
|
"learning_rate": 0.00011855604813172893,
|
||
|
|
"loss": 0.2791,
|
||
|
|
"step": 19200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 24.24,
|
||
|
|
"eval_cer": 0.38846495119787045,
|
||
|
|
"eval_loss": 1.07412588596344,
|
||
|
|
"eval_runtime": 10.7167,
|
||
|
|
"eval_samples_per_second": 48.243,
|
||
|
|
"eval_steps_per_second": 6.065,
|
||
|
|
"step": 19200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 24.75,
|
||
|
|
"learning_rate": 0.00011475617479417352,
|
||
|
|
"loss": 0.2757,
|
||
|
|
"step": 19600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 24.75,
|
||
|
|
"eval_cer": 0.385980479148181,
|
||
|
|
"eval_loss": 1.0546280145645142,
|
||
|
|
"eval_runtime": 10.5367,
|
||
|
|
"eval_samples_per_second": 49.067,
|
||
|
|
"eval_steps_per_second": 6.169,
|
||
|
|
"step": 19600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 25.25,
|
||
|
|
"learning_rate": 0.0001109563014566181,
|
||
|
|
"loss": 0.2533,
|
||
|
|
"step": 20000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 25.25,
|
||
|
|
"eval_cer": 0.3817213842058563,
|
||
|
|
"eval_loss": 1.0429767370224,
|
||
|
|
"eval_runtime": 10.7598,
|
||
|
|
"eval_samples_per_second": 48.049,
|
||
|
|
"eval_steps_per_second": 6.041,
|
||
|
|
"step": 20000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 25.76,
|
||
|
|
"learning_rate": 0.00010715642811906269,
|
||
|
|
"loss": 0.2499,
|
||
|
|
"step": 20400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 25.76,
|
||
|
|
"eval_cer": 0.38456078083407275,
|
||
|
|
"eval_loss": 1.0354866981506348,
|
||
|
|
"eval_runtime": 10.6619,
|
||
|
|
"eval_samples_per_second": 48.49,
|
||
|
|
"eval_steps_per_second": 6.096,
|
||
|
|
"step": 20400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 26.26,
|
||
|
|
"learning_rate": 0.00010335655478150728,
|
||
|
|
"loss": 0.2407,
|
||
|
|
"step": 20800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 26.26,
|
||
|
|
"eval_cer": 0.38101153504880214,
|
||
|
|
"eval_loss": 1.0512378215789795,
|
||
|
|
"eval_runtime": 10.667,
|
||
|
|
"eval_samples_per_second": 48.467,
|
||
|
|
"eval_steps_per_second": 6.094,
|
||
|
|
"step": 20800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 26.77,
|
||
|
|
"learning_rate": 9.955668144395185e-05,
|
||
|
|
"loss": 0.2373,
|
||
|
|
"step": 21200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 26.77,
|
||
|
|
"eval_cer": 0.3758651286601597,
|
||
|
|
"eval_loss": 1.032917857170105,
|
||
|
|
"eval_runtime": 10.6927,
|
||
|
|
"eval_samples_per_second": 48.351,
|
||
|
|
"eval_steps_per_second": 6.079,
|
||
|
|
"step": 21200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 27.27,
|
||
|
|
"learning_rate": 9.575680810639644e-05,
|
||
|
|
"loss": 0.2295,
|
||
|
|
"step": 21600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 27.27,
|
||
|
|
"eval_cer": 0.3785270629991127,
|
||
|
|
"eval_loss": 1.031385064125061,
|
||
|
|
"eval_runtime": 10.7343,
|
||
|
|
"eval_samples_per_second": 48.163,
|
||
|
|
"eval_steps_per_second": 6.055,
|
||
|
|
"step": 21600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 27.78,
|
||
|
|
"learning_rate": 9.195693476884103e-05,
|
||
|
|
"loss": 0.2186,
|
||
|
|
"step": 22000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 27.78,
|
||
|
|
"eval_cer": 0.3742679680567879,
|
||
|
|
"eval_loss": 1.028822422027588,
|
||
|
|
"eval_runtime": 10.6374,
|
||
|
|
"eval_samples_per_second": 48.602,
|
||
|
|
"eval_steps_per_second": 6.111,
|
||
|
|
"step": 22000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 28.28,
|
||
|
|
"learning_rate": 8.815706143128561e-05,
|
||
|
|
"loss": 0.2084,
|
||
|
|
"step": 22400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 28.28,
|
||
|
|
"eval_cer": 0.37373558118899736,
|
||
|
|
"eval_loss": 1.0298017263412476,
|
||
|
|
"eval_runtime": 10.6689,
|
||
|
|
"eval_samples_per_second": 48.459,
|
||
|
|
"eval_steps_per_second": 6.092,
|
||
|
|
"step": 22400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 28.79,
|
||
|
|
"learning_rate": 8.43571880937302e-05,
|
||
|
|
"loss": 0.2066,
|
||
|
|
"step": 22800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 28.79,
|
||
|
|
"eval_cer": 0.37497781721384205,
|
||
|
|
"eval_loss": 1.0195808410644531,
|
||
|
|
"eval_runtime": 10.4803,
|
||
|
|
"eval_samples_per_second": 49.331,
|
||
|
|
"eval_steps_per_second": 6.202,
|
||
|
|
"step": 22800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 29.29,
|
||
|
|
"learning_rate": 8.055731475617479e-05,
|
||
|
|
"loss": 0.1933,
|
||
|
|
"step": 23200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 29.29,
|
||
|
|
"eval_cer": 0.380301685891748,
|
||
|
|
"eval_loss": 1.0443964004516602,
|
||
|
|
"eval_runtime": 10.6391,
|
||
|
|
"eval_samples_per_second": 48.594,
|
||
|
|
"eval_steps_per_second": 6.11,
|
||
|
|
"step": 23200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 29.8,
|
||
|
|
"learning_rate": 7.675744141861937e-05,
|
||
|
|
"loss": 0.1875,
|
||
|
|
"step": 23600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 29.8,
|
||
|
|
"eval_cer": 0.3691215616681455,
|
||
|
|
"eval_loss": 1.0274165868759155,
|
||
|
|
"eval_runtime": 10.6901,
|
||
|
|
"eval_samples_per_second": 48.363,
|
||
|
|
"eval_steps_per_second": 6.08,
|
||
|
|
"step": 23600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 30.3,
|
||
|
|
"learning_rate": 7.295756808106396e-05,
|
||
|
|
"loss": 0.184,
|
||
|
|
"step": 24000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 30.3,
|
||
|
|
"eval_cer": 0.37267080745341613,
|
||
|
|
"eval_loss": 1.0159742832183838,
|
||
|
|
"eval_runtime": 10.6909,
|
||
|
|
"eval_samples_per_second": 48.359,
|
||
|
|
"eval_steps_per_second": 6.08,
|
||
|
|
"step": 24000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 30.81,
|
||
|
|
"learning_rate": 6.915769474350855e-05,
|
||
|
|
"loss": 0.1864,
|
||
|
|
"step": 24400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 30.81,
|
||
|
|
"eval_cer": 0.37089618456078083,
|
||
|
|
"eval_loss": 1.0185551643371582,
|
||
|
|
"eval_runtime": 10.6616,
|
||
|
|
"eval_samples_per_second": 48.492,
|
||
|
|
"eval_steps_per_second": 6.097,
|
||
|
|
"step": 24400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 31.31,
|
||
|
|
"learning_rate": 6.535782140595312e-05,
|
||
|
|
"loss": 0.176,
|
||
|
|
"step": 24800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 31.31,
|
||
|
|
"eval_cer": 0.3682342502218279,
|
||
|
|
"eval_loss": 1.01682710647583,
|
||
|
|
"eval_runtime": 10.7487,
|
||
|
|
"eval_samples_per_second": 48.099,
|
||
|
|
"eval_steps_per_second": 6.047,
|
||
|
|
"step": 24800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 31.82,
|
||
|
|
"learning_rate": 6.155794806839771e-05,
|
||
|
|
"loss": 0.1734,
|
||
|
|
"step": 25200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 31.82,
|
||
|
|
"eval_cer": 0.3685891748003549,
|
||
|
|
"eval_loss": 1.0079487562179565,
|
||
|
|
"eval_runtime": 10.6916,
|
||
|
|
"eval_samples_per_second": 48.356,
|
||
|
|
"eval_steps_per_second": 6.08,
|
||
|
|
"step": 25200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 32.32,
|
||
|
|
"learning_rate": 5.7758074730842294e-05,
|
||
|
|
"loss": 0.1686,
|
||
|
|
"step": 25600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 32.32,
|
||
|
|
"eval_cer": 0.37107364685004435,
|
||
|
|
"eval_loss": 1.0045541524887085,
|
||
|
|
"eval_runtime": 10.6896,
|
||
|
|
"eval_samples_per_second": 48.365,
|
||
|
|
"eval_steps_per_second": 6.081,
|
||
|
|
"step": 25600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 32.83,
|
||
|
|
"learning_rate": 5.395820139328688e-05,
|
||
|
|
"loss": 0.1636,
|
||
|
|
"step": 26000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 32.83,
|
||
|
|
"eval_cer": 0.366282165039929,
|
||
|
|
"eval_loss": 1.0012236833572388,
|
||
|
|
"eval_runtime": 10.6269,
|
||
|
|
"eval_samples_per_second": 48.65,
|
||
|
|
"eval_steps_per_second": 6.117,
|
||
|
|
"step": 26000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 33.33,
|
||
|
|
"learning_rate": 5.015832805573147e-05,
|
||
|
|
"loss": 0.1584,
|
||
|
|
"step": 26400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 33.33,
|
||
|
|
"eval_cer": 0.3634427684117125,
|
||
|
|
"eval_loss": 0.9943842887878418,
|
||
|
|
"eval_runtime": 10.7058,
|
||
|
|
"eval_samples_per_second": 48.292,
|
||
|
|
"eval_steps_per_second": 6.071,
|
||
|
|
"step": 26400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 33.84,
|
||
|
|
"learning_rate": 4.635845471817606e-05,
|
||
|
|
"loss": 0.1592,
|
||
|
|
"step": 26800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 33.84,
|
||
|
|
"eval_cer": 0.3678793256433008,
|
||
|
|
"eval_loss": 0.9912722110748291,
|
||
|
|
"eval_runtime": 10.6845,
|
||
|
|
"eval_samples_per_second": 48.388,
|
||
|
|
"eval_steps_per_second": 6.084,
|
||
|
|
"step": 26800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 34.34,
|
||
|
|
"learning_rate": 4.255858138062065e-05,
|
||
|
|
"loss": 0.1574,
|
||
|
|
"step": 27200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 34.34,
|
||
|
|
"eval_cer": 0.36876663708961843,
|
||
|
|
"eval_loss": 1.0088311433792114,
|
||
|
|
"eval_runtime": 10.6592,
|
||
|
|
"eval_samples_per_second": 48.503,
|
||
|
|
"eval_steps_per_second": 6.098,
|
||
|
|
"step": 27200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 34.85,
|
||
|
|
"learning_rate": 3.875870804306523e-05,
|
||
|
|
"loss": 0.1537,
|
||
|
|
"step": 27600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 34.85,
|
||
|
|
"eval_cer": 0.3645075421472937,
|
||
|
|
"eval_loss": 0.9913118481636047,
|
||
|
|
"eval_runtime": 11.1744,
|
||
|
|
"eval_samples_per_second": 46.266,
|
||
|
|
"eval_steps_per_second": 5.817,
|
||
|
|
"step": 27600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 35.35,
|
||
|
|
"learning_rate": 3.495883470550981e-05,
|
||
|
|
"loss": 0.1461,
|
||
|
|
"step": 28000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 35.35,
|
||
|
|
"eval_cer": 0.3634427684117125,
|
||
|
|
"eval_loss": 0.9954361915588379,
|
||
|
|
"eval_runtime": 10.4022,
|
||
|
|
"eval_samples_per_second": 49.701,
|
||
|
|
"eval_steps_per_second": 6.249,
|
||
|
|
"step": 28000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 35.86,
|
||
|
|
"learning_rate": 3.1158961367954396e-05,
|
||
|
|
"loss": 0.1462,
|
||
|
|
"step": 28400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 35.86,
|
||
|
|
"eval_cer": 0.35989352262644186,
|
||
|
|
"eval_loss": 0.9881103038787842,
|
||
|
|
"eval_runtime": 10.6493,
|
||
|
|
"eval_samples_per_second": 48.548,
|
||
|
|
"eval_steps_per_second": 6.104,
|
||
|
|
"step": 28400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 36.36,
|
||
|
|
"learning_rate": 2.7359088030398983e-05,
|
||
|
|
"loss": 0.1412,
|
||
|
|
"step": 28800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 36.36,
|
||
|
|
"eval_cer": 0.3593611357586513,
|
||
|
|
"eval_loss": 0.9881191849708557,
|
||
|
|
"eval_runtime": 11.1771,
|
||
|
|
"eval_samples_per_second": 46.255,
|
||
|
|
"eval_steps_per_second": 5.815,
|
||
|
|
"step": 28800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 36.87,
|
||
|
|
"learning_rate": 2.3559214692843567e-05,
|
||
|
|
"loss": 0.1382,
|
||
|
|
"step": 29200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 36.87,
|
||
|
|
"eval_cer": 0.36184560780834074,
|
||
|
|
"eval_loss": 0.9879063963890076,
|
||
|
|
"eval_runtime": 10.6397,
|
||
|
|
"eval_samples_per_second": 48.592,
|
||
|
|
"eval_steps_per_second": 6.109,
|
||
|
|
"step": 29200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 37.37,
|
||
|
|
"learning_rate": 1.9759341355288154e-05,
|
||
|
|
"loss": 0.1395,
|
||
|
|
"step": 29600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 37.37,
|
||
|
|
"eval_cer": 0.3582963620230701,
|
||
|
|
"eval_loss": 0.9859166145324707,
|
||
|
|
"eval_runtime": 10.6674,
|
||
|
|
"eval_samples_per_second": 48.465,
|
||
|
|
"eval_steps_per_second": 6.093,
|
||
|
|
"step": 29600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 37.88,
|
||
|
|
"learning_rate": 1.595946801773274e-05,
|
||
|
|
"loss": 0.1375,
|
||
|
|
"step": 30000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 37.88,
|
||
|
|
"eval_cer": 0.36024844720496896,
|
||
|
|
"eval_loss": 0.9944302439689636,
|
||
|
|
"eval_runtime": 10.7121,
|
||
|
|
"eval_samples_per_second": 48.263,
|
||
|
|
"eval_steps_per_second": 6.068,
|
||
|
|
"step": 30000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 38.38,
|
||
|
|
"learning_rate": 1.2159594680177326e-05,
|
||
|
|
"loss": 0.1277,
|
||
|
|
"step": 30400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 38.38,
|
||
|
|
"eval_cer": 0.3611357586512866,
|
||
|
|
"eval_loss": 0.9873452186584473,
|
||
|
|
"eval_runtime": 10.6816,
|
||
|
|
"eval_samples_per_second": 48.401,
|
||
|
|
"eval_steps_per_second": 6.085,
|
||
|
|
"step": 30400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 38.89,
|
||
|
|
"learning_rate": 8.359721342621911e-06,
|
||
|
|
"loss": 0.1356,
|
||
|
|
"step": 30800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 38.89,
|
||
|
|
"eval_cer": 0.36007098491570544,
|
||
|
|
"eval_loss": 0.9833679795265198,
|
||
|
|
"eval_runtime": 10.6711,
|
||
|
|
"eval_samples_per_second": 48.449,
|
||
|
|
"eval_steps_per_second": 6.091,
|
||
|
|
"step": 30800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 59.09,
|
||
|
|
"learning_rate": 7.859515899383008e-05,
|
||
|
|
"loss": 0.141,
|
||
|
|
"step": 31200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 59.09,
|
||
|
|
"eval_cer": 0.36539485359361135,
|
||
|
|
"eval_loss": 1.0076383352279663,
|
||
|
|
"eval_runtime": 11.8848,
|
||
|
|
"eval_samples_per_second": 43.501,
|
||
|
|
"eval_steps_per_second": 5.469,
|
||
|
|
"step": 31200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 59.85,
|
||
|
|
"learning_rate": 7.574750830564784e-05,
|
||
|
|
"loss": 0.1391,
|
||
|
|
"step": 31600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 59.85,
|
||
|
|
"eval_cer": 0.363265306122449,
|
||
|
|
"eval_loss": 1.0228257179260254,
|
||
|
|
"eval_runtime": 10.7532,
|
||
|
|
"eval_samples_per_second": 48.079,
|
||
|
|
"eval_steps_per_second": 6.045,
|
||
|
|
"step": 31600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 60.61,
|
||
|
|
"learning_rate": 7.289985761746559e-05,
|
||
|
|
"loss": 0.1444,
|
||
|
|
"step": 32000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 60.61,
|
||
|
|
"eval_cer": 0.36876663708961843,
|
||
|
|
"eval_loss": 1.0302114486694336,
|
||
|
|
"eval_runtime": 10.5859,
|
||
|
|
"eval_samples_per_second": 48.838,
|
||
|
|
"eval_steps_per_second": 6.14,
|
||
|
|
"step": 32000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 61.36,
|
||
|
|
"learning_rate": 7.005220692928333e-05,
|
||
|
|
"loss": 0.1396,
|
||
|
|
"step": 32400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 61.36,
|
||
|
|
"eval_cer": 0.3634427684117125,
|
||
|
|
"eval_loss": 1.0219813585281372,
|
||
|
|
"eval_runtime": 10.7349,
|
||
|
|
"eval_samples_per_second": 48.161,
|
||
|
|
"eval_steps_per_second": 6.055,
|
||
|
|
"step": 32400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 62.12,
|
||
|
|
"learning_rate": 6.720455624110109e-05,
|
||
|
|
"loss": 0.1383,
|
||
|
|
"step": 32800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 62.12,
|
||
|
|
"eval_cer": 0.3625554569653949,
|
||
|
|
"eval_loss": 1.0074561834335327,
|
||
|
|
"eval_runtime": 10.7365,
|
||
|
|
"eval_samples_per_second": 48.154,
|
||
|
|
"eval_steps_per_second": 6.054,
|
||
|
|
"step": 32800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 62.88,
|
||
|
|
"learning_rate": 6.435690555291883e-05,
|
||
|
|
"loss": 0.1338,
|
||
|
|
"step": 33200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 62.88,
|
||
|
|
"eval_cer": 0.36131322094055013,
|
||
|
|
"eval_loss": 1.009969711303711,
|
||
|
|
"eval_runtime": 10.8491,
|
||
|
|
"eval_samples_per_second": 47.654,
|
||
|
|
"eval_steps_per_second": 5.991,
|
||
|
|
"step": 33200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 63.64,
|
||
|
|
"learning_rate": 6.150925486473658e-05,
|
||
|
|
"loss": 0.1322,
|
||
|
|
"step": 33600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 63.64,
|
||
|
|
"eval_cer": 0.35989352262644186,
|
||
|
|
"eval_loss": 1.0064263343811035,
|
||
|
|
"eval_runtime": 10.7017,
|
||
|
|
"eval_samples_per_second": 48.31,
|
||
|
|
"eval_steps_per_second": 6.074,
|
||
|
|
"step": 33600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 64.39,
|
||
|
|
"learning_rate": 5.866160417655434e-05,
|
||
|
|
"loss": 0.1313,
|
||
|
|
"step": 34000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 64.39,
|
||
|
|
"eval_cer": 0.3611357586512866,
|
||
|
|
"eval_loss": 1.0025349855422974,
|
||
|
|
"eval_runtime": 10.8036,
|
||
|
|
"eval_samples_per_second": 47.855,
|
||
|
|
"eval_steps_per_second": 6.017,
|
||
|
|
"step": 34000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 65.15,
|
||
|
|
"learning_rate": 5.581395348837209e-05,
|
||
|
|
"loss": 0.1275,
|
||
|
|
"step": 34400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 65.15,
|
||
|
|
"eval_cer": 0.3625554569653949,
|
||
|
|
"eval_loss": 0.9986574649810791,
|
||
|
|
"eval_runtime": 10.7283,
|
||
|
|
"eval_samples_per_second": 48.19,
|
||
|
|
"eval_steps_per_second": 6.059,
|
||
|
|
"step": 34400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 65.91,
|
||
|
|
"learning_rate": 5.296630280018984e-05,
|
||
|
|
"loss": 0.125,
|
||
|
|
"step": 34800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 65.91,
|
||
|
|
"eval_cer": 0.36574977817213844,
|
||
|
|
"eval_loss": 1.010204553604126,
|
||
|
|
"eval_runtime": 10.8039,
|
||
|
|
"eval_samples_per_second": 47.853,
|
||
|
|
"eval_steps_per_second": 6.016,
|
||
|
|
"step": 34800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 66.67,
|
||
|
|
"learning_rate": 5.011865211200759e-05,
|
||
|
|
"loss": 0.121,
|
||
|
|
"step": 35200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 66.67,
|
||
|
|
"eval_cer": 0.36308784383318543,
|
||
|
|
"eval_loss": 1.0088319778442383,
|
||
|
|
"eval_runtime": 10.4383,
|
||
|
|
"eval_samples_per_second": 49.529,
|
||
|
|
"eval_steps_per_second": 6.227,
|
||
|
|
"step": 35200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 67.42,
|
||
|
|
"learning_rate": 4.727100142382534e-05,
|
||
|
|
"loss": 0.1247,
|
||
|
|
"step": 35600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 67.42,
|
||
|
|
"eval_cer": 0.3648624667258208,
|
||
|
|
"eval_loss": 1.0154913663864136,
|
||
|
|
"eval_runtime": 10.7106,
|
||
|
|
"eval_samples_per_second": 48.27,
|
||
|
|
"eval_steps_per_second": 6.069,
|
||
|
|
"step": 35600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 68.18,
|
||
|
|
"learning_rate": 4.442335073564309e-05,
|
||
|
|
"loss": 0.1164,
|
||
|
|
"step": 36000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 68.18,
|
||
|
|
"eval_cer": 0.3622005323868678,
|
||
|
|
"eval_loss": 0.9949304461479187,
|
||
|
|
"eval_runtime": 10.713,
|
||
|
|
"eval_samples_per_second": 48.259,
|
||
|
|
"eval_steps_per_second": 6.067,
|
||
|
|
"step": 36000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 68.94,
|
||
|
|
"learning_rate": 4.157570004746084e-05,
|
||
|
|
"loss": 0.1112,
|
||
|
|
"step": 36400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 68.94,
|
||
|
|
"eval_cer": 0.3609582963620231,
|
||
|
|
"eval_loss": 1.00165593624115,
|
||
|
|
"eval_runtime": 10.7166,
|
||
|
|
"eval_samples_per_second": 48.243,
|
||
|
|
"eval_steps_per_second": 6.065,
|
||
|
|
"step": 36400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 69.7,
|
||
|
|
"learning_rate": 3.872804935927859e-05,
|
||
|
|
"loss": 0.1143,
|
||
|
|
"step": 36800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 69.7,
|
||
|
|
"eval_cer": 0.3595385980479148,
|
||
|
|
"eval_loss": 0.9980924725532532,
|
||
|
|
"eval_runtime": 10.7622,
|
||
|
|
"eval_samples_per_second": 48.038,
|
||
|
|
"eval_steps_per_second": 6.04,
|
||
|
|
"step": 36800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 70.45,
|
||
|
|
"learning_rate": 3.588039867109634e-05,
|
||
|
|
"loss": 0.109,
|
||
|
|
"step": 37200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 70.45,
|
||
|
|
"eval_cer": 0.3604259094942325,
|
||
|
|
"eval_loss": 1.001591682434082,
|
||
|
|
"eval_runtime": 10.6962,
|
||
|
|
"eval_samples_per_second": 48.335,
|
||
|
|
"eval_steps_per_second": 6.077,
|
||
|
|
"step": 37200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 71.21,
|
||
|
|
"learning_rate": 3.303274798291409e-05,
|
||
|
|
"loss": 0.1066,
|
||
|
|
"step": 37600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 71.21,
|
||
|
|
"eval_cer": 0.35918367346938773,
|
||
|
|
"eval_loss": 0.9884746074676514,
|
||
|
|
"eval_runtime": 10.6604,
|
||
|
|
"eval_samples_per_second": 48.497,
|
||
|
|
"eval_steps_per_second": 6.097,
|
||
|
|
"step": 37600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 71.97,
|
||
|
|
"learning_rate": 3.0185097294731845e-05,
|
||
|
|
"loss": 0.1042,
|
||
|
|
"step": 38000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 71.97,
|
||
|
|
"eval_cer": 0.36007098491570544,
|
||
|
|
"eval_loss": 0.9990329742431641,
|
||
|
|
"eval_runtime": 10.7259,
|
||
|
|
"eval_samples_per_second": 48.201,
|
||
|
|
"eval_steps_per_second": 6.06,
|
||
|
|
"step": 38000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 72.73,
|
||
|
|
"learning_rate": 2.7337446606549593e-05,
|
||
|
|
"loss": 0.1024,
|
||
|
|
"step": 38400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 72.73,
|
||
|
|
"eval_cer": 0.36007098491570544,
|
||
|
|
"eval_loss": 0.9916397333145142,
|
||
|
|
"eval_runtime": 10.721,
|
||
|
|
"eval_samples_per_second": 48.223,
|
||
|
|
"eval_steps_per_second": 6.063,
|
||
|
|
"step": 38400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 73.48,
|
||
|
|
"learning_rate": 2.448979591836734e-05,
|
||
|
|
"loss": 0.1064,
|
||
|
|
"step": 38800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 73.48,
|
||
|
|
"eval_cer": 0.35811889973380656,
|
||
|
|
"eval_loss": 0.9944778084754944,
|
||
|
|
"eval_runtime": 10.7694,
|
||
|
|
"eval_samples_per_second": 48.006,
|
||
|
|
"eval_steps_per_second": 6.036,
|
||
|
|
"step": 38800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 74.24,
|
||
|
|
"learning_rate": 2.1642145230185097e-05,
|
||
|
|
"loss": 0.1019,
|
||
|
|
"step": 39200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 74.24,
|
||
|
|
"eval_cer": 0.3566992014196983,
|
||
|
|
"eval_loss": 0.9997159838676453,
|
||
|
|
"eval_runtime": 10.7395,
|
||
|
|
"eval_samples_per_second": 48.14,
|
||
|
|
"eval_steps_per_second": 6.052,
|
||
|
|
"step": 39200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 75.0,
|
||
|
|
"learning_rate": 1.8794494542002845e-05,
|
||
|
|
"loss": 0.0977,
|
||
|
|
"step": 39600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 75.0,
|
||
|
|
"eval_cer": 0.35616681455190774,
|
||
|
|
"eval_loss": 0.9909945130348206,
|
||
|
|
"eval_runtime": 10.7111,
|
||
|
|
"eval_samples_per_second": 48.268,
|
||
|
|
"eval_steps_per_second": 6.068,
|
||
|
|
"step": 39600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 75.76,
|
||
|
|
"learning_rate": 1.5946843853820597e-05,
|
||
|
|
"loss": 0.097,
|
||
|
|
"step": 40000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 75.76,
|
||
|
|
"eval_cer": 0.35598935226264417,
|
||
|
|
"eval_loss": 0.9969141483306885,
|
||
|
|
"eval_runtime": 10.7789,
|
||
|
|
"eval_samples_per_second": 47.964,
|
||
|
|
"eval_steps_per_second": 6.03,
|
||
|
|
"step": 40000
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"max_steps": 42240,
|
||
|
|
"num_train_epochs": 80,
|
||
|
|
"total_flos": 9.124217746582361e+19,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|