22198 lines
437 KiB
JSON
22198 lines
437 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3696,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 1.0810810810810812e-07,
|
|
"loss": 2.1692,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 2.1621621621621625e-07,
|
|
"loss": 1.9692,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 3.2432432432432436e-07,
|
|
"loss": 2.005,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 4.324324324324325e-07,
|
|
"loss": 1.9709,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 5.405405405405406e-07,
|
|
"loss": 2.1698,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 6.486486486486487e-07,
|
|
"loss": 1.9184,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 7.567567567567569e-07,
|
|
"loss": 1.914,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 8.64864864864865e-07,
|
|
"loss": 2.1652,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.72972972972973e-07,
|
|
"loss": 2.1109,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.0810810810810812e-06,
|
|
"loss": 2.0998,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.1891891891891893e-06,
|
|
"loss": 2.0638,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.2972972972972974e-06,
|
|
"loss": 2.1366,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.4054054054054056e-06,
|
|
"loss": 1.8154,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.5135135135135137e-06,
|
|
"loss": 1.9925,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.6216216216216219e-06,
|
|
"loss": 1.7854,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.72972972972973e-06,
|
|
"loss": 2.0494,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.8378378378378381e-06,
|
|
"loss": 2.0259,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.945945945945946e-06,
|
|
"loss": 1.8385,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.054054054054054e-06,
|
|
"loss": 1.8736,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.1621621621621623e-06,
|
|
"loss": 1.9918,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.2702702702702705e-06,
|
|
"loss": 1.669,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.3783783783783786e-06,
|
|
"loss": 2.0495,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.4864864864864867e-06,
|
|
"loss": 1.8761,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.594594594594595e-06,
|
|
"loss": 2.0214,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.702702702702703e-06,
|
|
"loss": 1.7357,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.810810810810811e-06,
|
|
"loss": 1.9684,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.9189189189189193e-06,
|
|
"loss": 1.8697,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 3.0270270270270274e-06,
|
|
"loss": 1.7187,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 3.1351351351351356e-06,
|
|
"loss": 1.5924,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 3.2432432432432437e-06,
|
|
"loss": 1.9131,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 3.351351351351352e-06,
|
|
"loss": 1.868,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 3.45945945945946e-06,
|
|
"loss": 1.8283,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 3.567567567567568e-06,
|
|
"loss": 1.8289,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 3.6756756756756763e-06,
|
|
"loss": 1.9307,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 3.7837837837837844e-06,
|
|
"loss": 1.9499,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 3.891891891891892e-06,
|
|
"loss": 1.6075,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 1.887,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.108108108108108e-06,
|
|
"loss": 1.7399,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.216216216216217e-06,
|
|
"loss": 1.645,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.324324324324325e-06,
|
|
"loss": 1.6938,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.432432432432433e-06,
|
|
"loss": 1.7409,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.540540540540541e-06,
|
|
"loss": 1.7627,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.6486486486486495e-06,
|
|
"loss": 1.5578,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 4.756756756756757e-06,
|
|
"loss": 1.6094,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 4.864864864864866e-06,
|
|
"loss": 1.6204,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 4.9729729729729735e-06,
|
|
"loss": 1.6456,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 5.081081081081082e-06,
|
|
"loss": 1.7422,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 5.18918918918919e-06,
|
|
"loss": 1.9356,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 5.297297297297298e-06,
|
|
"loss": 1.5422,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 5.405405405405406e-06,
|
|
"loss": 1.9021,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 5.513513513513515e-06,
|
|
"loss": 1.6136,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 5.621621621621622e-06,
|
|
"loss": 1.7092,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 5.729729729729731e-06,
|
|
"loss": 1.7545,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 5.837837837837839e-06,
|
|
"loss": 1.8105,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 5.945945945945947e-06,
|
|
"loss": 1.8338,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 6.054054054054055e-06,
|
|
"loss": 1.3632,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 6.162162162162163e-06,
|
|
"loss": 1.6174,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 6.270270270270271e-06,
|
|
"loss": 1.7029,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 6.378378378378379e-06,
|
|
"loss": 1.6328,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 6.486486486486487e-06,
|
|
"loss": 1.6502,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 6.594594594594595e-06,
|
|
"loss": 1.6911,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 6.702702702702704e-06,
|
|
"loss": 1.6751,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 6.810810810810811e-06,
|
|
"loss": 1.7419,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 6.91891891891892e-06,
|
|
"loss": 1.5681,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 7.027027027027028e-06,
|
|
"loss": 1.6999,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 7.135135135135136e-06,
|
|
"loss": 1.608,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 7.243243243243244e-06,
|
|
"loss": 1.6676,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 7.3513513513513525e-06,
|
|
"loss": 1.5906,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 7.45945945945946e-06,
|
|
"loss": 1.505,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 7.567567567567569e-06,
|
|
"loss": 1.6478,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 7.675675675675676e-06,
|
|
"loss": 1.8254,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 7.783783783783784e-06,
|
|
"loss": 1.712,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 7.891891891891894e-06,
|
|
"loss": 1.7243,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 8.000000000000001e-06,
|
|
"loss": 1.6525,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 8.108108108108109e-06,
|
|
"loss": 1.7529,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 8.216216216216217e-06,
|
|
"loss": 1.9293,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 8.324324324324326e-06,
|
|
"loss": 1.6179,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 8.432432432432434e-06,
|
|
"loss": 1.5725,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 8.540540540540542e-06,
|
|
"loss": 1.6493,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 8.64864864864865e-06,
|
|
"loss": 1.7388,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 8.756756756756759e-06,
|
|
"loss": 1.4627,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 8.864864864864866e-06,
|
|
"loss": 1.5662,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 8.972972972972974e-06,
|
|
"loss": 1.823,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.081081081081082e-06,
|
|
"loss": 1.7808,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.189189189189191e-06,
|
|
"loss": 1.4259,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.297297297297299e-06,
|
|
"loss": 1.7357,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.405405405405407e-06,
|
|
"loss": 1.7906,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.513513513513514e-06,
|
|
"loss": 1.5941,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.621621621621622e-06,
|
|
"loss": 1.5096,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.729729729729732e-06,
|
|
"loss": 1.7866,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.83783783783784e-06,
|
|
"loss": 1.7468,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.945945945945947e-06,
|
|
"loss": 1.6451,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.0054054054054055e-05,
|
|
"loss": 1.6399,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.0162162162162164e-05,
|
|
"loss": 1.5937,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.027027027027027e-05,
|
|
"loss": 1.5869,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.037837837837838e-05,
|
|
"loss": 1.6166,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.0486486486486487e-05,
|
|
"loss": 1.5568,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.0594594594594597e-05,
|
|
"loss": 1.5097,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.0702702702702703e-05,
|
|
"loss": 1.7392,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.0810810810810812e-05,
|
|
"loss": 1.5906,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.091891891891892e-05,
|
|
"loss": 1.5869,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.102702702702703e-05,
|
|
"loss": 1.75,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.1135135135135135e-05,
|
|
"loss": 1.6112,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.1243243243243245e-05,
|
|
"loss": 1.5636,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.1351351351351352e-05,
|
|
"loss": 1.619,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.1459459459459462e-05,
|
|
"loss": 1.6108,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.1567567567567568e-05,
|
|
"loss": 1.4139,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.1675675675675677e-05,
|
|
"loss": 1.6085,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.1783783783783785e-05,
|
|
"loss": 1.6746,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.1891891891891894e-05,
|
|
"loss": 1.4579,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.2e-05,
|
|
"loss": 1.5606,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.210810810810811e-05,
|
|
"loss": 1.5887,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.2216216216216217e-05,
|
|
"loss": 1.5532,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.2324324324324327e-05,
|
|
"loss": 1.5989,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.2432432432432433e-05,
|
|
"loss": 1.6376,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.2540540540540542e-05,
|
|
"loss": 1.7451,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.264864864864865e-05,
|
|
"loss": 1.5376,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.2756756756756758e-05,
|
|
"loss": 1.6816,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.2864864864864865e-05,
|
|
"loss": 1.7369,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.2972972972972975e-05,
|
|
"loss": 1.6929,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.3081081081081083e-05,
|
|
"loss": 1.6333,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.318918918918919e-05,
|
|
"loss": 1.486,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.3297297297297298e-05,
|
|
"loss": 1.5422,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.3405405405405407e-05,
|
|
"loss": 1.6363,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.3513513513513515e-05,
|
|
"loss": 1.6146,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.3621621621621623e-05,
|
|
"loss": 1.6895,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.372972972972973e-05,
|
|
"loss": 1.5712,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.383783783783784e-05,
|
|
"loss": 1.762,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.3945945945945946e-05,
|
|
"loss": 1.5899,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.4054054054054055e-05,
|
|
"loss": 1.7189,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.4162162162162163e-05,
|
|
"loss": 1.5934,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.4270270270270272e-05,
|
|
"loss": 1.2734,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.4378378378378378e-05,
|
|
"loss": 1.689,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.4486486486486488e-05,
|
|
"loss": 1.587,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.4594594594594596e-05,
|
|
"loss": 1.6869,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.4702702702702705e-05,
|
|
"loss": 1.3972,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.4810810810810811e-05,
|
|
"loss": 1.5444,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.491891891891892e-05,
|
|
"loss": 1.6193,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.5027027027027028e-05,
|
|
"loss": 1.6251,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.5135135135135138e-05,
|
|
"loss": 1.5753,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.5243243243243244e-05,
|
|
"loss": 1.6492,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.5351351351351353e-05,
|
|
"loss": 1.6493,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.545945945945946e-05,
|
|
"loss": 2.0071,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.556756756756757e-05,
|
|
"loss": 1.769,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.5675675675675676e-05,
|
|
"loss": 1.5203,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.5783783783783787e-05,
|
|
"loss": 1.7819,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.589189189189189e-05,
|
|
"loss": 1.8217,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.6000000000000003e-05,
|
|
"loss": 1.7686,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.610810810810811e-05,
|
|
"loss": 1.7356,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.6216216216216218e-05,
|
|
"loss": 1.7302,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.6324324324324326e-05,
|
|
"loss": 1.5567,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.6432432432432434e-05,
|
|
"loss": 1.3269,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.654054054054054e-05,
|
|
"loss": 1.4891,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.6648648648648652e-05,
|
|
"loss": 1.5148,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.6756756756756757e-05,
|
|
"loss": 1.6084,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.6864864864864868e-05,
|
|
"loss": 1.6542,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.6972972972972975e-05,
|
|
"loss": 1.605,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.7081081081081083e-05,
|
|
"loss": 1.778,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.718918918918919e-05,
|
|
"loss": 1.6913,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.72972972972973e-05,
|
|
"loss": 1.8156,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.7405405405405406e-05,
|
|
"loss": 1.4803,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.7513513513513517e-05,
|
|
"loss": 1.6661,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.7621621621621622e-05,
|
|
"loss": 1.4052,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.7729729729729733e-05,
|
|
"loss": 1.633,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.783783783783784e-05,
|
|
"loss": 1.6257,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.7945945945945948e-05,
|
|
"loss": 1.5775,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.8054054054054056e-05,
|
|
"loss": 1.9391,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.8162162162162164e-05,
|
|
"loss": 1.6306,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.827027027027027e-05,
|
|
"loss": 1.5366,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.8378378378378383e-05,
|
|
"loss": 1.9441,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.8486486486486487e-05,
|
|
"loss": 1.5683,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.8594594594594598e-05,
|
|
"loss": 1.8234,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.8702702702702706e-05,
|
|
"loss": 1.5457,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.8810810810810813e-05,
|
|
"loss": 1.5235,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.891891891891892e-05,
|
|
"loss": 1.5809,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.902702702702703e-05,
|
|
"loss": 1.5877,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9135135135135137e-05,
|
|
"loss": 1.7839,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9243243243243244e-05,
|
|
"loss": 1.4496,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9351351351351352e-05,
|
|
"loss": 1.8033,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9459459459459463e-05,
|
|
"loss": 1.5614,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.956756756756757e-05,
|
|
"loss": 1.6684,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.967567567567568e-05,
|
|
"loss": 2.0082,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9783783783783786e-05,
|
|
"loss": 1.634,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9891891891891894e-05,
|
|
"loss": 1.4182,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 2e-05,
|
|
"loss": 1.736,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.999999599679287e-05,
|
|
"loss": 1.6983,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9999983987174676e-05,
|
|
"loss": 1.6493,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9999963971155038e-05,
|
|
"loss": 1.6042,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9999935948749983e-05,
|
|
"loss": 1.4718,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.999989991998194e-05,
|
|
"loss": 1.661,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.9999855884879763e-05,
|
|
"loss": 1.6538,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.9999803843478705e-05,
|
|
"loss": 1.4945,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.9999743795820433e-05,
|
|
"loss": 1.7639,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.999967574195302e-05,
|
|
"loss": 1.6904,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.999959968193096e-05,
|
|
"loss": 1.7192,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.9999515615815143e-05,
|
|
"loss": 1.5819,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.999942354367288e-05,
|
|
"loss": 1.7685,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.9999323465577887e-05,
|
|
"loss": 1.5378,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.999921538161029e-05,
|
|
"loss": 1.7201,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.9999099291856624e-05,
|
|
"loss": 1.8976,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.9998975196409838e-05,
|
|
"loss": 1.7367,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.9998843095369286e-05,
|
|
"loss": 1.6401,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.9998702988840734e-05,
|
|
"loss": 1.6867,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.999855487693636e-05,
|
|
"loss": 1.8064,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.9998398759774743e-05,
|
|
"loss": 1.7657,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.9998234637480883e-05,
|
|
"loss": 1.9743,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.9998062510186176e-05,
|
|
"loss": 1.6131,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.999788237802844e-05,
|
|
"loss": 1.7105,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.9997694241151896e-05,
|
|
"loss": 1.8506,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.999749809970717e-05,
|
|
"loss": 1.5981,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.9997293953851307e-05,
|
|
"loss": 1.7449,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.9997081803747748e-05,
|
|
"loss": 1.7099,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.9996861649566353e-05,
|
|
"loss": 1.6868,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.9996633491483388e-05,
|
|
"loss": 1.7224,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.9996397329681522e-05,
|
|
"loss": 1.5681,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9996153164349838e-05,
|
|
"loss": 1.5671,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9995900995683827e-05,
|
|
"loss": 1.7861,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9995640823885378e-05,
|
|
"loss": 1.5992,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9995372649162803e-05,
|
|
"loss": 1.7878,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9995096471730812e-05,
|
|
"loss": 1.7365,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9994812291810524e-05,
|
|
"loss": 1.7124,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9994520109629466e-05,
|
|
"loss": 1.506,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9994219925421565e-05,
|
|
"loss": 1.5369,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.999391173942717e-05,
|
|
"loss": 1.9449,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9993595551893022e-05,
|
|
"loss": 1.71,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9993271363072275e-05,
|
|
"loss": 1.7479,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.9992939173224485e-05,
|
|
"loss": 1.6676,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9992598982615624e-05,
|
|
"loss": 1.5317,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9992250791518057e-05,
|
|
"loss": 1.5235,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9991894600210563e-05,
|
|
"loss": 1.5146,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.999153040897832e-05,
|
|
"loss": 1.4592,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9991158218112923e-05,
|
|
"loss": 1.5421,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9990778027912353e-05,
|
|
"loss": 1.4027,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.999038983868101e-05,
|
|
"loss": 1.7591,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9989993650729695e-05,
|
|
"loss": 1.7466,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9989589464375614e-05,
|
|
"loss": 1.7819,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9989177279942372e-05,
|
|
"loss": 1.7878,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9988757097759982e-05,
|
|
"loss": 1.4788,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.9988328918164863e-05,
|
|
"loss": 1.9761,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.998789274149983e-05,
|
|
"loss": 1.6012,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.99874485681141e-05,
|
|
"loss": 1.543,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.9986996398363304e-05,
|
|
"loss": 1.6938,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.9986536232609465e-05,
|
|
"loss": 1.7449,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.998606807122101e-05,
|
|
"loss": 1.4347,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.998559191457277e-05,
|
|
"loss": 1.6147,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.9985107763045973e-05,
|
|
"loss": 1.8373,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.998461561702825e-05,
|
|
"loss": 1.5355,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.998411547691364e-05,
|
|
"loss": 1.6515,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.9983607343102573e-05,
|
|
"loss": 1.5628,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.998309121600188e-05,
|
|
"loss": 1.4482,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.998256709602479e-05,
|
|
"loss": 1.7765,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.9982034983590944e-05,
|
|
"loss": 1.4668,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9981494879126364e-05,
|
|
"loss": 1.6406,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.998094678306349e-05,
|
|
"loss": 1.732,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9980390695841146e-05,
|
|
"loss": 1.757,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9979826617904554e-05,
|
|
"loss": 1.5234,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9979254549705343e-05,
|
|
"loss": 1.7328,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9978674491701536e-05,
|
|
"loss": 1.5992,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9978086444357546e-05,
|
|
"loss": 1.4434,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9977490408144193e-05,
|
|
"loss": 1.625,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9976886383538684e-05,
|
|
"loss": 1.6449,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.997627437102463e-05,
|
|
"loss": 1.5421,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9975654371092032e-05,
|
|
"loss": 1.727,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.9975026384237288e-05,
|
|
"loss": 1.5503,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.997439041096319e-05,
|
|
"loss": 1.5949,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.9973746451778925e-05,
|
|
"loss": 1.7027,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.9973094507200073e-05,
|
|
"loss": 1.6321,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.9972434577748605e-05,
|
|
"loss": 1.5142,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.9971766663952892e-05,
|
|
"loss": 1.5576,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.9971090766347694e-05,
|
|
"loss": 1.8237,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.997040688547416e-05,
|
|
"loss": 1.7067,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.996971502187983e-05,
|
|
"loss": 1.2765,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.9969015176118644e-05,
|
|
"loss": 1.5985,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.996830734875093e-05,
|
|
"loss": 1.5088,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.9967591540343395e-05,
|
|
"loss": 1.724,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.9966867751469155e-05,
|
|
"loss": 1.7505,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.99661359827077e-05,
|
|
"loss": 1.3618,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.996539623464491e-05,
|
|
"loss": 1.7775,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.9964648507873067e-05,
|
|
"loss": 1.575,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.9963892802990824e-05,
|
|
"loss": 1.6256,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.996312912060324e-05,
|
|
"loss": 1.6419,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.996235746132174e-05,
|
|
"loss": 1.5018,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.9961577825764146e-05,
|
|
"loss": 1.6676,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.996079021455468e-05,
|
|
"loss": 1.4557,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.995999462832392e-05,
|
|
"loss": 1.8168,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.9959191067708855e-05,
|
|
"loss": 1.5655,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.9958379533352846e-05,
|
|
"loss": 1.662,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.995756002590564e-05,
|
|
"loss": 1.4982,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.9956732546023373e-05,
|
|
"loss": 1.5138,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.9955897094368556e-05,
|
|
"loss": 1.5293,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.9955053671610082e-05,
|
|
"loss": 1.7638,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.995420227842324e-05,
|
|
"loss": 1.7521,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.9953342915489685e-05,
|
|
"loss": 1.6726,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.9952475583497457e-05,
|
|
"loss": 1.8973,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.9951600283140984e-05,
|
|
"loss": 1.8243,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.995071701512106e-05,
|
|
"loss": 1.3959,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.9949825780144875e-05,
|
|
"loss": 1.8112,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.994892657892598e-05,
|
|
"loss": 1.7778,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.9948019412184316e-05,
|
|
"loss": 1.6458,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.99471042806462e-05,
|
|
"loss": 1.2722,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.9946181185044322e-05,
|
|
"loss": 1.7227,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.994525012611775e-05,
|
|
"loss": 1.5105,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.994431110461193e-05,
|
|
"loss": 1.4635,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.9943364121278687e-05,
|
|
"loss": 1.7083,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.9942409176876202e-05,
|
|
"loss": 1.7743,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.9941446272169055e-05,
|
|
"loss": 1.6275,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.994047540792818e-05,
|
|
"loss": 1.7892,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.9939496584930894e-05,
|
|
"loss": 1.4667,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.9938509803960885e-05,
|
|
"loss": 1.5074,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.9937515065808207e-05,
|
|
"loss": 1.5398,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.9936512371269294e-05,
|
|
"loss": 1.523,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.9935501721146936e-05,
|
|
"loss": 1.8736,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.993448311625031e-05,
|
|
"loss": 1.7216,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.993345655739495e-05,
|
|
"loss": 1.6854,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.993242204540276e-05,
|
|
"loss": 1.6465,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9931379581102016e-05,
|
|
"loss": 1.6322,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9930329165327354e-05,
|
|
"loss": 1.7167,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9929270798919786e-05,
|
|
"loss": 1.716,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9928204482726676e-05,
|
|
"loss": 1.6287,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.992713021760177e-05,
|
|
"loss": 1.7884,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9926048004405168e-05,
|
|
"loss": 1.6563,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9924957844003326e-05,
|
|
"loss": 1.7419,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.992385973726908e-05,
|
|
"loss": 1.562,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9922753685081616e-05,
|
|
"loss": 1.6819,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9921639688326487e-05,
|
|
"loss": 1.5446,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9920517747895608e-05,
|
|
"loss": 2.0263,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.9919387864687244e-05,
|
|
"loss": 1.6096,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.991825003960603e-05,
|
|
"loss": 1.7334,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.991710427356295e-05,
|
|
"loss": 1.4979,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.9915950567475365e-05,
|
|
"loss": 1.6418,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.991478892226697e-05,
|
|
"loss": 1.4726,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.991361933886783e-05,
|
|
"loss": 1.6948,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.9912441818214357e-05,
|
|
"loss": 1.4977,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.9911256361249327e-05,
|
|
"loss": 1.6509,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.9910062968921866e-05,
|
|
"loss": 1.5656,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.9908861642187453e-05,
|
|
"loss": 1.6335,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.9907652382007917e-05,
|
|
"loss": 1.4573,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.9906435189351446e-05,
|
|
"loss": 1.8128,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.9905210065192573e-05,
|
|
"loss": 1.987,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.9903977010512182e-05,
|
|
"loss": 1.8208,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.990273602629751e-05,
|
|
"loss": 1.6041,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.990148711354214e-05,
|
|
"loss": 1.4626,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.9900230273246e-05,
|
|
"loss": 1.5474,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.9898965506415373e-05,
|
|
"loss": 1.8108,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.989769281406288e-05,
|
|
"loss": 1.6569,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.9896412197207496e-05,
|
|
"loss": 1.6492,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.989512365687453e-05,
|
|
"loss": 1.4973,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.9893827194095645e-05,
|
|
"loss": 1.5606,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.989252280990884e-05,
|
|
"loss": 1.5172,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.989121050535846e-05,
|
|
"loss": 1.5312,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.9889890281495195e-05,
|
|
"loss": 1.5852,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.9888562139376064e-05,
|
|
"loss": 1.7158,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.9887226080064433e-05,
|
|
"loss": 1.7092,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.988588210463001e-05,
|
|
"loss": 1.7643,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.9884530214148836e-05,
|
|
"loss": 1.6948,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.9883170409703295e-05,
|
|
"loss": 1.6716,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.9881802692382094e-05,
|
|
"loss": 1.591,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.9880427063280287e-05,
|
|
"loss": 1.5771,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.9879043523499265e-05,
|
|
"loss": 1.5792,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.987765207414674e-05,
|
|
"loss": 1.6749,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.987625271633677e-05,
|
|
"loss": 1.5023,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.9874845451189736e-05,
|
|
"loss": 1.7177,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.9873430279832354e-05,
|
|
"loss": 1.7913,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9872007203397667e-05,
|
|
"loss": 1.7108,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.987057622302505e-05,
|
|
"loss": 1.7958,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9869137339860204e-05,
|
|
"loss": 1.3703,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.986769055505516e-05,
|
|
"loss": 1.5055,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9866235869768272e-05,
|
|
"loss": 1.4789,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9864773285164226e-05,
|
|
"loss": 2.0889,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9863302802414022e-05,
|
|
"loss": 1.4585,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9861824422694993e-05,
|
|
"loss": 1.6607,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.986033814719079e-05,
|
|
"loss": 1.6256,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9858843977091388e-05,
|
|
"loss": 1.5536,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9857341913593077e-05,
|
|
"loss": 1.73,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9855831957898473e-05,
|
|
"loss": 1.5445,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.9854314111216512e-05,
|
|
"loss": 1.7442,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.9852788374762448e-05,
|
|
"loss": 1.6469,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.985125474975784e-05,
|
|
"loss": 1.4878,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.9849713237430575e-05,
|
|
"loss": 1.6814,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.9848163839014855e-05,
|
|
"loss": 1.6772,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.984660655575119e-05,
|
|
"loss": 1.5309,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.9845041388886405e-05,
|
|
"loss": 1.6565,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.984346833967364e-05,
|
|
"loss": 1.6505,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.9841887409372336e-05,
|
|
"loss": 1.6464,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.984029859924826e-05,
|
|
"loss": 1.8189,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.9838701910573476e-05,
|
|
"loss": 1.5572,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.9837097344626358e-05,
|
|
"loss": 1.7788,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.983548490269159e-05,
|
|
"loss": 1.703,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.983386458606016e-05,
|
|
"loss": 1.6113,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.9832236396029358e-05,
|
|
"loss": 1.4801,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.983060033390278e-05,
|
|
"loss": 1.5669,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.9828956400990333e-05,
|
|
"loss": 1.5963,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.9827304598608203e-05,
|
|
"loss": 1.8041,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.9825644928078905e-05,
|
|
"loss": 1.5276,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.9823977390731236e-05,
|
|
"loss": 1.566,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.9822301987900294e-05,
|
|
"loss": 1.8048,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.9820618720927472e-05,
|
|
"loss": 1.5508,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.9818927591160474e-05,
|
|
"loss": 1.756,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.981722859995328e-05,
|
|
"loss": 1.4842,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.9815521748666175e-05,
|
|
"loss": 1.5421,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.9813807038665732e-05,
|
|
"loss": 1.7914,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.981208447132483e-05,
|
|
"loss": 1.6267,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.981035404802261e-05,
|
|
"loss": 1.5455,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.9808615770144537e-05,
|
|
"loss": 1.5406,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.9806869639082337e-05,
|
|
"loss": 1.6815,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.980511565623404e-05,
|
|
"loss": 1.6465,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.9803353823003956e-05,
|
|
"loss": 1.6945,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.9801584140802686e-05,
|
|
"loss": 1.51,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.9799806611047104e-05,
|
|
"loss": 1.6151,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.9798021235160378e-05,
|
|
"loss": 1.8236,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.9796228014571954e-05,
|
|
"loss": 1.671,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.9794426950717555e-05,
|
|
"loss": 1.5145,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.9792618045039195e-05,
|
|
"loss": 1.7242,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.979080129898515e-05,
|
|
"loss": 1.4336,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.9788976714009985e-05,
|
|
"loss": 1.6558,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.978714429157454e-05,
|
|
"loss": 1.5934,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.978530403314593e-05,
|
|
"loss": 1.5595,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.9783455940197537e-05,
|
|
"loss": 1.8317,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.9781600014209026e-05,
|
|
"loss": 1.57,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.9779736256666323e-05,
|
|
"loss": 1.6724,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.9777864669061632e-05,
|
|
"loss": 1.7102,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.9775985252893427e-05,
|
|
"loss": 1.7356,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.9774098009666444e-05,
|
|
"loss": 1.5092,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.977220294089168e-05,
|
|
"loss": 1.766,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.9770300048086422e-05,
|
|
"loss": 1.4475,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.976838933277419e-05,
|
|
"loss": 1.7271,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.976647079648479e-05,
|
|
"loss": 1.5465,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.976454444075428e-05,
|
|
"loss": 1.5326,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.9762610267124976e-05,
|
|
"loss": 1.7409,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.9760668277145468e-05,
|
|
"loss": 1.6891,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.975871847237058e-05,
|
|
"loss": 1.704,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.9756760854361413e-05,
|
|
"loss": 1.6255,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.975479542468532e-05,
|
|
"loss": 1.5826,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.9752822184915904e-05,
|
|
"loss": 1.5105,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.9750841136633017e-05,
|
|
"loss": 1.5442,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.9748852281422772e-05,
|
|
"loss": 1.6869,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.9746855620877535e-05,
|
|
"loss": 1.5539,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9744851156595905e-05,
|
|
"loss": 1.7374,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9742838890182744e-05,
|
|
"loss": 1.352,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9740818823249155e-05,
|
|
"loss": 1.6982,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9738790957412485e-05,
|
|
"loss": 1.7023,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9736755294296336e-05,
|
|
"loss": 1.6499,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9734711835530534e-05,
|
|
"loss": 1.5718,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9732660582751165e-05,
|
|
"loss": 1.8472,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.973060153760054e-05,
|
|
"loss": 1.4305,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.972853470172722e-05,
|
|
"loss": 1.6836,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9726460076786e-05,
|
|
"loss": 1.782,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9724377664437903e-05,
|
|
"loss": 1.5982,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.9722287466350205e-05,
|
|
"loss": 1.7363,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.97201894841964e-05,
|
|
"loss": 1.4575,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.9718083719656218e-05,
|
|
"loss": 1.4968,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.9715970174415627e-05,
|
|
"loss": 1.6592,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.9713848850166816e-05,
|
|
"loss": 1.4682,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.97117197486082e-05,
|
|
"loss": 1.6849,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.9709582871444433e-05,
|
|
"loss": 1.5316,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.9707438220386383e-05,
|
|
"loss": 1.5986,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.970528579715115e-05,
|
|
"loss": 1.6555,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.970312560346205e-05,
|
|
"loss": 1.7888,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.9700957641048625e-05,
|
|
"loss": 1.56,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.969878191164664e-05,
|
|
"loss": 1.5221,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.9696598416998062e-05,
|
|
"loss": 1.4575,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.96944071588511e-05,
|
|
"loss": 1.3712,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.969220813896016e-05,
|
|
"loss": 1.7288,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.9690001359085868e-05,
|
|
"loss": 1.4522,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.9687786820995063e-05,
|
|
"loss": 1.504,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.96855645264608e-05,
|
|
"loss": 1.6136,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.968333447726234e-05,
|
|
"loss": 1.4187,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.9681096675185147e-05,
|
|
"loss": 1.6414,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.9678851122020905e-05,
|
|
"loss": 1.6591,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.9676597819567486e-05,
|
|
"loss": 1.5688,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.967433676962899e-05,
|
|
"loss": 1.6149,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.9672067974015702e-05,
|
|
"loss": 1.5971,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.9669791434544116e-05,
|
|
"loss": 1.6075,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.966750715303692e-05,
|
|
"loss": 1.5158,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9665215131323003e-05,
|
|
"loss": 1.5607,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9662915371237457e-05,
|
|
"loss": 1.6355,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.966060787462156e-05,
|
|
"loss": 1.2966,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9658292643322797e-05,
|
|
"loss": 2.044,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9655969679194834e-05,
|
|
"loss": 1.6494,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9653638984097533e-05,
|
|
"loss": 1.4834,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9651300559896943e-05,
|
|
"loss": 1.4426,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9648954408465307e-05,
|
|
"loss": 1.51,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9646600531681045e-05,
|
|
"loss": 1.6422,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9644238931428777e-05,
|
|
"loss": 1.5568,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.964186960959929e-05,
|
|
"loss": 1.5021,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.9639492568089563e-05,
|
|
"loss": 1.463,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.9637107808802757e-05,
|
|
"loss": 1.4697,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.9634715333648207e-05,
|
|
"loss": 1.662,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.963231514454143e-05,
|
|
"loss": 1.6877,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.962990724340411e-05,
|
|
"loss": 1.7412,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.9627491632164118e-05,
|
|
"loss": 1.4477,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.9625068312755493e-05,
|
|
"loss": 1.5708,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.9622637287118445e-05,
|
|
"loss": 1.5449,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.9620198557199354e-05,
|
|
"loss": 1.6561,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.9617752124950762e-05,
|
|
"loss": 1.6509,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.961529799233139e-05,
|
|
"loss": 1.4518,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.9612836161306113e-05,
|
|
"loss": 1.6984,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.9610366633845984e-05,
|
|
"loss": 1.4885,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9607889411928205e-05,
|
|
"loss": 1.7276,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9605404497536136e-05,
|
|
"loss": 1.525,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9602911892659314e-05,
|
|
"loss": 1.6191,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.960041159929341e-05,
|
|
"loss": 1.6363,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.959790361944027e-05,
|
|
"loss": 1.486,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9595387955107883e-05,
|
|
"loss": 1.7079,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9592864608310397e-05,
|
|
"loss": 1.6046,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9590333581068103e-05,
|
|
"loss": 1.3811,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9587794875407453e-05,
|
|
"loss": 1.7188,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9585248493361037e-05,
|
|
"loss": 1.8155,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9582694436967592e-05,
|
|
"loss": 1.5589,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9580132708272003e-05,
|
|
"loss": 1.5913,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.9577563309325295e-05,
|
|
"loss": 1.4502,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9574986242184636e-05,
|
|
"loss": 1.6595,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9572401508913334e-05,
|
|
"loss": 1.6753,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9569809111580834e-05,
|
|
"loss": 1.6419,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.956720905226271e-05,
|
|
"loss": 1.6141,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9564601333040692e-05,
|
|
"loss": 1.9846,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.956198595600261e-05,
|
|
"loss": 1.5881,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9559362923242457e-05,
|
|
"loss": 1.4706,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9556732236860332e-05,
|
|
"loss": 1.2808,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9554093898962478e-05,
|
|
"loss": 1.7937,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9551447911661255e-05,
|
|
"loss": 1.9357,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9548794277075148e-05,
|
|
"loss": 1.5343,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.9546132997328773e-05,
|
|
"loss": 1.6146,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9543464074552853e-05,
|
|
"loss": 1.4534,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9540787510884248e-05,
|
|
"loss": 1.3378,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9538103308465913e-05,
|
|
"loss": 1.4766,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9535411469446943e-05,
|
|
"loss": 1.4777,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9532711995982533e-05,
|
|
"loss": 1.4866,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9530004890233988e-05,
|
|
"loss": 1.5357,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9527290154368734e-05,
|
|
"loss": 1.4479,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.95245677905603e-05,
|
|
"loss": 1.5105,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9521837800988326e-05,
|
|
"loss": 1.5399,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9519100187838546e-05,
|
|
"loss": 1.6355,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9516354953302815e-05,
|
|
"loss": 1.5126,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.9513602099579078e-05,
|
|
"loss": 1.7351,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9510841628871387e-05,
|
|
"loss": 1.6979,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9508073543389884e-05,
|
|
"loss": 1.5849,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9505297845350817e-05,
|
|
"loss": 1.4713,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9502514536976523e-05,
|
|
"loss": 1.5211,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.949972362049543e-05,
|
|
"loss": 2.0167,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9496925098142074e-05,
|
|
"loss": 1.5763,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9494118972157054e-05,
|
|
"loss": 1.6508,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9491305244787078e-05,
|
|
"loss": 1.5622,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.948848391828493e-05,
|
|
"loss": 1.8889,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9485654994909483e-05,
|
|
"loss": 1.563,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9482818476925687e-05,
|
|
"loss": 1.6001,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9479974366604583e-05,
|
|
"loss": 1.797,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.9477122666223275e-05,
|
|
"loss": 1.5079,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.9474263378064956e-05,
|
|
"loss": 1.7688,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.9471396504418895e-05,
|
|
"loss": 1.7246,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.946852204758042e-05,
|
|
"loss": 1.513,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.9465640009850947e-05,
|
|
"loss": 1.7343,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.946275039353795e-05,
|
|
"loss": 1.4962,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.9459853200954987e-05,
|
|
"loss": 1.6605,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.9456948434421655e-05,
|
|
"loss": 1.6887,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.945403609626364e-05,
|
|
"loss": 1.5736,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.945111618881268e-05,
|
|
"loss": 1.6498,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.944818871440657e-05,
|
|
"loss": 1.6518,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.944525367538917e-05,
|
|
"loss": 1.7411,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.9442311074110398e-05,
|
|
"loss": 1.7142,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.943936091292621e-05,
|
|
"loss": 1.5259,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.9436403194198638e-05,
|
|
"loss": 1.9108,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.943343792029575e-05,
|
|
"loss": 1.5786,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.9430465093591673e-05,
|
|
"loss": 1.4557,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.942748471646657e-05,
|
|
"loss": 1.7046,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.9424496791306652e-05,
|
|
"loss": 1.5168,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.942150132050418e-05,
|
|
"loss": 1.7883,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.9418498306457447e-05,
|
|
"loss": 1.658,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.9415487751570798e-05,
|
|
"loss": 1.7542,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.94124696582546e-05,
|
|
"loss": 1.5603,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.9409444028925272e-05,
|
|
"loss": 1.5836,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.940641086600525e-05,
|
|
"loss": 1.6744,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9403370171923015e-05,
|
|
"loss": 1.5294,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.940032194911307e-05,
|
|
"loss": 1.5706,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9397266200015953e-05,
|
|
"loss": 1.373,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.939420292707822e-05,
|
|
"loss": 1.5337,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9391132132752454e-05,
|
|
"loss": 1.478,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9388053819497258e-05,
|
|
"loss": 1.4968,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9384967989777262e-05,
|
|
"loss": 1.7383,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9381874646063102e-05,
|
|
"loss": 1.7119,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9378773790831448e-05,
|
|
"loss": 1.6528,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.937566542656496e-05,
|
|
"loss": 1.543,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9372549555752335e-05,
|
|
"loss": 1.3731,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9369426180888266e-05,
|
|
"loss": 1.6216,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.9366295304473448e-05,
|
|
"loss": 1.3186,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.93631569290146e-05,
|
|
"loss": 1.5202,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.9360011057024427e-05,
|
|
"loss": 1.4517,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.9356857691021652e-05,
|
|
"loss": 1.335,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.9353696833530988e-05,
|
|
"loss": 1.5957,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.9350528487083146e-05,
|
|
"loss": 1.5267,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.9347352654214837e-05,
|
|
"loss": 1.1766,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.9344169337468767e-05,
|
|
"loss": 1.5451,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.9340978539393623e-05,
|
|
"loss": 1.8329,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.93377802625441e-05,
|
|
"loss": 1.4743,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.9334574509480867e-05,
|
|
"loss": 1.4459,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.933136128277058e-05,
|
|
"loss": 1.4562,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.932814058498589e-05,
|
|
"loss": 1.4063,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.9324912418705413e-05,
|
|
"loss": 1.5817,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.932167678651375e-05,
|
|
"loss": 1.6484,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.931843369100149e-05,
|
|
"loss": 1.6025,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.9315183134765186e-05,
|
|
"loss": 1.4817,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.9311925120407372e-05,
|
|
"loss": 1.5944,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.930865965053654e-05,
|
|
"loss": 1.5252,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.930538672776717e-05,
|
|
"loss": 1.6071,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.9302106354719698e-05,
|
|
"loss": 1.8087,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.929881853402052e-05,
|
|
"loss": 1.547,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.9295523268302004e-05,
|
|
"loss": 1.6679,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.929222056020248e-05,
|
|
"loss": 1.6229,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.9288910412366232e-05,
|
|
"loss": 1.6609,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.9285592827443503e-05,
|
|
"loss": 1.6222,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.9282267808090482e-05,
|
|
"loss": 1.549,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.9278935356969325e-05,
|
|
"loss": 1.3829,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.9275595476748122e-05,
|
|
"loss": 1.4641,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.927224817010093e-05,
|
|
"loss": 1.9315,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.926889343970773e-05,
|
|
"loss": 1.4166,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.9265531288254472e-05,
|
|
"loss": 1.6335,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.926216171843302e-05,
|
|
"loss": 1.7584,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.9258784732941196e-05,
|
|
"loss": 1.4562,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.9255400334482755e-05,
|
|
"loss": 1.7905,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.9252008525767394e-05,
|
|
"loss": 1.4796,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.9248609309510723e-05,
|
|
"loss": 1.6041,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.92452026884343e-05,
|
|
"loss": 1.6486,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.9241788665265613e-05,
|
|
"loss": 1.7357,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.9238367242738062e-05,
|
|
"loss": 1.2799,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.923493842359098e-05,
|
|
"loss": 1.6351,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.923150221056963e-05,
|
|
"loss": 1.6758,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.9228058606425176e-05,
|
|
"loss": 1.6137,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.922460761391472e-05,
|
|
"loss": 1.5543,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.922114923580126e-05,
|
|
"loss": 1.5863,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.9217683474853722e-05,
|
|
"loss": 1.5753,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.921421033384694e-05,
|
|
"loss": 1.5184,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.921072981556165e-05,
|
|
"loss": 1.5023,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.9207241922784507e-05,
|
|
"loss": 1.7049,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 1.9203746658308047e-05,
|
|
"loss": 1.2695,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.9200244024930738e-05,
|
|
"loss": 1.6214,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.919673402545693e-05,
|
|
"loss": 1.494,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.919321666269687e-05,
|
|
"loss": 1.2971,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.9189691939466707e-05,
|
|
"loss": 1.4544,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.9186159858588476e-05,
|
|
"loss": 1.6387,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.9182620422890116e-05,
|
|
"loss": 1.7401,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.917907363520544e-05,
|
|
"loss": 1.8325,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.9175519498374157e-05,
|
|
"loss": 1.4129,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.917195801524185e-05,
|
|
"loss": 1.5586,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.916838918865999e-05,
|
|
"loss": 1.6194,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.9164813021485938e-05,
|
|
"loss": 1.6161,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.9161229516582912e-05,
|
|
"loss": 1.7505,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.915763867682002e-05,
|
|
"loss": 1.6288,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.915404050507223e-05,
|
|
"loss": 1.4916,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.9150435004220394e-05,
|
|
"loss": 1.4063,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.914682217715122e-05,
|
|
"loss": 1.5466,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.9143202026757292e-05,
|
|
"loss": 1.36,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.913957455593705e-05,
|
|
"loss": 1.7272,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.9135939767594793e-05,
|
|
"loss": 1.66,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.9132297664640694e-05,
|
|
"loss": 1.6201,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.9128648249990763e-05,
|
|
"loss": 1.8807,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.9124991526566874e-05,
|
|
"loss": 1.568,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.912132749729675e-05,
|
|
"loss": 1.7018,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.911765616511397e-05,
|
|
"loss": 1.5132,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 1.9113977532957952e-05,
|
|
"loss": 1.3244,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.9110291603773956e-05,
|
|
"loss": 1.8395,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.9106598380513098e-05,
|
|
"loss": 1.8351,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.9102897866132322e-05,
|
|
"loss": 1.5974,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.909919006359441e-05,
|
|
"loss": 1.7605,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.9095474975867986e-05,
|
|
"loss": 1.4698,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.9091752605927503e-05,
|
|
"loss": 1.4936,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.9088022956753246e-05,
|
|
"loss": 1.6531,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.908428603133132e-05,
|
|
"loss": 1.5673,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.908054183265367e-05,
|
|
"loss": 1.5969,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.907679036371805e-05,
|
|
"loss": 1.6278,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.907303162752805e-05,
|
|
"loss": 1.7252,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.9069265627093058e-05,
|
|
"loss": 1.356,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.9065492365428303e-05,
|
|
"loss": 1.4331,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.9061711845554805e-05,
|
|
"loss": 1.4462,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.905792407049941e-05,
|
|
"loss": 1.5836,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.905412904329476e-05,
|
|
"loss": 1.4011,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.9050326766979322e-05,
|
|
"loss": 1.4672,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.904651724459735e-05,
|
|
"loss": 1.744,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.9042700479198908e-05,
|
|
"loss": 1.8685,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.903887647383985e-05,
|
|
"loss": 1.5598,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.903504523158184e-05,
|
|
"loss": 1.4841,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.9031206755492326e-05,
|
|
"loss": 1.7306,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.9027361048644553e-05,
|
|
"loss": 1.6409,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 1.9023508114117554e-05,
|
|
"loss": 1.5563,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.9019647954996145e-05,
|
|
"loss": 1.6211,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.9015780574370928e-05,
|
|
"loss": 1.7717,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.901190597533829e-05,
|
|
"loss": 1.6272,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.9008024161000398e-05,
|
|
"loss": 1.7891,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.9004135134465192e-05,
|
|
"loss": 1.468,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.9000238898846385e-05,
|
|
"loss": 1.6385,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.899633545726347e-05,
|
|
"loss": 1.868,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.89924248128417e-05,
|
|
"loss": 1.4764,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.8988506968712096e-05,
|
|
"loss": 1.8818,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.8984581928011454e-05,
|
|
"loss": 1.8098,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.898064969388232e-05,
|
|
"loss": 1.3206,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.8976710269473003e-05,
|
|
"loss": 1.6474,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.897276365793757e-05,
|
|
"loss": 1.6653,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.8968809862435843e-05,
|
|
"loss": 1.7682,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.896484888613339e-05,
|
|
"loss": 1.5616,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.896088073220154e-05,
|
|
"loss": 1.6396,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.8956905403817355e-05,
|
|
"loss": 1.5851,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.8952922904163652e-05,
|
|
"loss": 1.2962,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.894893323642898e-05,
|
|
"loss": 1.5742,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.894493640380764e-05,
|
|
"loss": 1.7622,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.894093240949966e-05,
|
|
"loss": 1.6525,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.8936921256710793e-05,
|
|
"loss": 1.5926,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.8932902948652545e-05,
|
|
"loss": 1.7493,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.8928877488542137e-05,
|
|
"loss": 1.6403,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1.892484487960252e-05,
|
|
"loss": 1.6155,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.8920805125062365e-05,
|
|
"loss": 1.5732,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.8916758228156073e-05,
|
|
"loss": 1.5349,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.8912704192123753e-05,
|
|
"loss": 1.7293,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.890864302021123e-05,
|
|
"loss": 1.6658,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.890457471567005e-05,
|
|
"loss": 1.5811,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.890049928175747e-05,
|
|
"loss": 1.6424,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.889641672173645e-05,
|
|
"loss": 1.7044,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.889232703887565e-05,
|
|
"loss": 1.3625,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.888823023644945e-05,
|
|
"loss": 1.6735,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.888412631773791e-05,
|
|
"loss": 1.6835,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.88800152860268e-05,
|
|
"loss": 1.5181,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.8875897144607588e-05,
|
|
"loss": 1.6836,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.887177189677742e-05,
|
|
"loss": 1.6234,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.886763954583915e-05,
|
|
"loss": 1.5088,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.8863500095101298e-05,
|
|
"loss": 1.6677,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.885935354787809e-05,
|
|
"loss": 1.5796,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.8855199907489415e-05,
|
|
"loss": 1.5966,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.8851039177260858e-05,
|
|
"loss": 1.6881,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.8846871360523665e-05,
|
|
"loss": 1.52,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.8842696460614766e-05,
|
|
"loss": 1.7054,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.883851448087676e-05,
|
|
"loss": 1.5164,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.883432542465791e-05,
|
|
"loss": 1.4946,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.8830129295312152e-05,
|
|
"loss": 1.5348,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.8825926096199077e-05,
|
|
"loss": 1.6427,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 1.8821715830683938e-05,
|
|
"loss": 1.514,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.8817498502137657e-05,
|
|
"loss": 1.3981,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.8813274113936794e-05,
|
|
"loss": 1.38,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.8809042669463573e-05,
|
|
"loss": 1.3486,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.880480417210586e-05,
|
|
"loss": 1.7064,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.880055862525718e-05,
|
|
"loss": 1.618,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.8796306032316683e-05,
|
|
"loss": 1.748,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.8792046396689176e-05,
|
|
"loss": 1.7247,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.87877797217851e-05,
|
|
"loss": 1.7108,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.8783506011020536e-05,
|
|
"loss": 1.6777,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.8779225267817188e-05,
|
|
"loss": 1.5169,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.8774937495602393e-05,
|
|
"loss": 1.3427,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.8770642697809128e-05,
|
|
"loss": 1.6519,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8766340877875978e-05,
|
|
"loss": 1.5662,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8762032039247165e-05,
|
|
"loss": 1.5232,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.875771618537252e-05,
|
|
"loss": 1.5248,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8753393319707493e-05,
|
|
"loss": 1.343,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8749063445713154e-05,
|
|
"loss": 1.7635,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8744726566856174e-05,
|
|
"loss": 1.3126,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8740382686608846e-05,
|
|
"loss": 1.5937,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.873603180844905e-05,
|
|
"loss": 1.6922,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8731673935860288e-05,
|
|
"loss": 1.5771,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8727309072331647e-05,
|
|
"loss": 1.677,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8722937221357825e-05,
|
|
"loss": 1.5197,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.8718558386439096e-05,
|
|
"loss": 1.5139,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.871417257108135e-05,
|
|
"loss": 1.5503,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.870977977879604e-05,
|
|
"loss": 1.6972,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.8705380013100226e-05,
|
|
"loss": 1.6074,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.8700973277516537e-05,
|
|
"loss": 1.425,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.8696559575573197e-05,
|
|
"loss": 1.4547,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.869213891080399e-05,
|
|
"loss": 1.5875,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.868771128674828e-05,
|
|
"loss": 1.5589,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.8683276706951017e-05,
|
|
"loss": 1.7076,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.8678835174962703e-05,
|
|
"loss": 1.5488,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.8674386694339413e-05,
|
|
"loss": 1.5364,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.8669931268642788e-05,
|
|
"loss": 1.6705,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.8665468901440023e-05,
|
|
"loss": 1.5919,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 1.8660999596303873e-05,
|
|
"loss": 1.5183,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.8656523356812656e-05,
|
|
"loss": 1.4988,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.8652040186550224e-05,
|
|
"loss": 1.7088,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.8647550089105997e-05,
|
|
"loss": 1.4645,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.864305306807493e-05,
|
|
"loss": 1.4432,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.8638549127057526e-05,
|
|
"loss": 1.4153,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.8634038269659826e-05,
|
|
"loss": 1.5311,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.8629520499493408e-05,
|
|
"loss": 1.6288,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.8624995820175388e-05,
|
|
"loss": 1.3745,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.862046423532841e-05,
|
|
"loss": 1.6554,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.861592574858065e-05,
|
|
"loss": 1.4147,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.8611380363565804e-05,
|
|
"loss": 1.439,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.8606828083923102e-05,
|
|
"loss": 1.5798,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.8602268913297282e-05,
|
|
"loss": 1.6038,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.8597702855338612e-05,
|
|
"loss": 1.7581,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.8593129913702862e-05,
|
|
"loss": 1.6508,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.858855009205132e-05,
|
|
"loss": 1.653,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.8583963394050778e-05,
|
|
"loss": 1.6496,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.857936982337354e-05,
|
|
"loss": 1.4839,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.857476938369741e-05,
|
|
"loss": 1.6382,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.8570162078705685e-05,
|
|
"loss": 1.6401,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.856554791208717e-05,
|
|
"loss": 1.4761,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.8560926887536153e-05,
|
|
"loss": 1.5476,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.855629900875242e-05,
|
|
"loss": 1.6288,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.8551664279441243e-05,
|
|
"loss": 1.4006,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.854702270331338e-05,
|
|
"loss": 1.7524,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.854237428408507e-05,
|
|
"loss": 1.7441,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.853771902547802e-05,
|
|
"loss": 1.667,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.8533056931219437e-05,
|
|
"loss": 1.5021,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.8528388005041975e-05,
|
|
"loss": 1.6713,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.8523712250683778e-05,
|
|
"loss": 1.555,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.851902967188844e-05,
|
|
"loss": 1.3896,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.8514340272405036e-05,
|
|
"loss": 1.2944,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.8509644055988092e-05,
|
|
"loss": 1.7481,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.850494102639759e-05,
|
|
"loss": 1.4937,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.850023118739897e-05,
|
|
"loss": 1.3376,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.8495514542763128e-05,
|
|
"loss": 1.5804,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.8490791096266404e-05,
|
|
"loss": 1.6496,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.8486060851690585e-05,
|
|
"loss": 1.4564,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.8481323812822894e-05,
|
|
"loss": 1.7039,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.8476579983456014e-05,
|
|
"loss": 1.5527,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.847182936738804e-05,
|
|
"loss": 1.5825,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.846707196842252e-05,
|
|
"loss": 1.5064,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.8462307790368415e-05,
|
|
"loss": 1.5881,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.845753683704013e-05,
|
|
"loss": 1.536,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.845275911225749e-05,
|
|
"loss": 1.3428,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.8447974619845733e-05,
|
|
"loss": 1.6038,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.8443183363635527e-05,
|
|
"loss": 1.4291,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.843838534746295e-05,
|
|
"loss": 1.473,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.8433580575169484e-05,
|
|
"loss": 1.4353,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.842876905060204e-05,
|
|
"loss": 1.4683,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.842395077761292e-05,
|
|
"loss": 1.5332,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.8419125760059827e-05,
|
|
"loss": 2.0511,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.8414294001805882e-05,
|
|
"loss": 1.4397,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.840945550671958e-05,
|
|
"loss": 1.6149,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.840461027867483e-05,
|
|
"loss": 1.5254,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.839975832155091e-05,
|
|
"loss": 1.7833,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.839489963923251e-05,
|
|
"loss": 1.5959,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.8390034235609683e-05,
|
|
"loss": 1.5142,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.838516211457788e-05,
|
|
"loss": 1.3363,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.838028328003792e-05,
|
|
"loss": 1.6712,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.8375397735895996e-05,
|
|
"loss": 1.6192,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8370505486063687e-05,
|
|
"loss": 1.502,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8365606534457917e-05,
|
|
"loss": 1.2443,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8360700885001e-05,
|
|
"loss": 1.4834,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8355788541620604e-05,
|
|
"loss": 1.5204,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8350869508249746e-05,
|
|
"loss": 1.6425,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8345943788826816e-05,
|
|
"loss": 1.4578,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8341011387295542e-05,
|
|
"loss": 1.5451,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.833607230760501e-05,
|
|
"loss": 1.8175,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.833112655370966e-05,
|
|
"loss": 1.2678,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8326174129569257e-05,
|
|
"loss": 1.7283,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8321215039148922e-05,
|
|
"loss": 1.6484,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8316249286419105e-05,
|
|
"loss": 1.7154,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.8311276875355598e-05,
|
|
"loss": 1.7344,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.830629780993952e-05,
|
|
"loss": 1.6188,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.8301312094157307e-05,
|
|
"loss": 1.6951,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.8296319732000745e-05,
|
|
"loss": 1.5505,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.829132072746691e-05,
|
|
"loss": 1.3854,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.8286315084558228e-05,
|
|
"loss": 1.5972,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.8281302807282412e-05,
|
|
"loss": 1.6662,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.8276283899652504e-05,
|
|
"loss": 1.7569,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.8271258365686845e-05,
|
|
"loss": 1.6185,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.8266226209409092e-05,
|
|
"loss": 1.7016,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.826118743484819e-05,
|
|
"loss": 1.7861,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.82561420460384e-05,
|
|
"loss": 1.4276,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.8251090047019265e-05,
|
|
"loss": 1.7748,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.824603144183562e-05,
|
|
"loss": 1.4996,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.82409662345376e-05,
|
|
"loss": 1.6135,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.823589442918062e-05,
|
|
"loss": 1.5092,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.823081602982537e-05,
|
|
"loss": 1.7329,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.8225731040537837e-05,
|
|
"loss": 1.4042,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.822063946538927e-05,
|
|
"loss": 1.6783,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.821554130845619e-05,
|
|
"loss": 1.8483,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.82104365738204e-05,
|
|
"loss": 1.5987,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.820532526556896e-05,
|
|
"loss": 1.5662,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.8200207387794192e-05,
|
|
"loss": 1.6235,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.8195082944593686e-05,
|
|
"loss": 1.6054,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.8189951940070277e-05,
|
|
"loss": 1.8404,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8184814378332064e-05,
|
|
"loss": 1.3125,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8179670263492393e-05,
|
|
"loss": 1.621,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8174519599669852e-05,
|
|
"loss": 1.5629,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8169362390988276e-05,
|
|
"loss": 1.5495,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8164198641576743e-05,
|
|
"loss": 1.6132,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8159028355569564e-05,
|
|
"loss": 1.4228,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8153851537106284e-05,
|
|
"loss": 1.5186,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8148668190331675e-05,
|
|
"loss": 1.5249,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8143478319395745e-05,
|
|
"loss": 1.5949,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.813828192845371e-05,
|
|
"loss": 1.4339,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.813307902166603e-05,
|
|
"loss": 1.6281,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.8127869603198354e-05,
|
|
"loss": 1.4463,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.8122653677221567e-05,
|
|
"loss": 1.6676,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.8117431247911753e-05,
|
|
"loss": 1.8067,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.8112202319450204e-05,
|
|
"loss": 1.6794,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.8106966896023417e-05,
|
|
"loss": 1.6067,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.810172498182309e-05,
|
|
"loss": 1.6287,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.8096476581046112e-05,
|
|
"loss": 1.4122,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.809122169789458e-05,
|
|
"loss": 1.5467,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.8085960336575765e-05,
|
|
"loss": 1.5299,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.8080692501302128e-05,
|
|
"loss": 1.6419,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.8075418196291323e-05,
|
|
"loss": 1.5362,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.807013742576617e-05,
|
|
"loss": 1.6864,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.806485019395468e-05,
|
|
"loss": 1.6277,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.805955650509002e-05,
|
|
"loss": 1.5159,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.805425636341055e-05,
|
|
"loss": 1.6995,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.804894977315977e-05,
|
|
"loss": 1.663,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.804363673858636e-05,
|
|
"loss": 1.3763,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.803831726394416e-05,
|
|
"loss": 1.6992,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.803299135349216e-05,
|
|
"loss": 1.5723,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.80276590114945e-05,
|
|
"loss": 1.6323,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.802232024222048e-05,
|
|
"loss": 1.6596,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.8016975049944534e-05,
|
|
"loss": 1.5222,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.801162343894625e-05,
|
|
"loss": 1.6843,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.8006265413510346e-05,
|
|
"loss": 1.5744,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.8000900977926682e-05,
|
|
"loss": 1.4186,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.799553013649024e-05,
|
|
"loss": 1.7217,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.7990152893501152e-05,
|
|
"loss": 1.375,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.798476925326465e-05,
|
|
"loss": 1.558,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.7979379220091098e-05,
|
|
"loss": 1.4841,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.7973982798295988e-05,
|
|
"loss": 1.4796,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.7968579992199916e-05,
|
|
"loss": 1.5388,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.796317080612859e-05,
|
|
"loss": 1.7815,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.795775524441283e-05,
|
|
"loss": 1.4672,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.795233331138856e-05,
|
|
"loss": 1.5152,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.79469050113968e-05,
|
|
"loss": 1.6899,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.7941470348783677e-05,
|
|
"loss": 1.6124,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.7936029327900408e-05,
|
|
"loss": 1.7942,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.793058195310329e-05,
|
|
"loss": 1.6466,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.792512822875373e-05,
|
|
"loss": 1.7392,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.79196681592182e-05,
|
|
"loss": 1.6193,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.7914201748868254e-05,
|
|
"loss": 1.8874,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.790872900208053e-05,
|
|
"loss": 1.8996,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.7903249923236736e-05,
|
|
"loss": 1.5048,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.789776451672365e-05,
|
|
"loss": 1.7384,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.7892272786933115e-05,
|
|
"loss": 1.6107,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.7886774738262036e-05,
|
|
"loss": 1.6783,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.7881270375112382e-05,
|
|
"loss": 1.4156,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.787575970189117e-05,
|
|
"loss": 1.7418,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.7870242723010473e-05,
|
|
"loss": 1.7733,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.7864719442887416e-05,
|
|
"loss": 1.5888,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.7859189865944167e-05,
|
|
"loss": 1.5894,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7853653996607934e-05,
|
|
"loss": 1.4164,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7848111839310957e-05,
|
|
"loss": 1.5023,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7842563398490526e-05,
|
|
"loss": 1.4484,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7837008678588944e-05,
|
|
"loss": 1.305,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7831447684053554e-05,
|
|
"loss": 1.4335,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7825880419336722e-05,
|
|
"loss": 1.4895,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7820306888895825e-05,
|
|
"loss": 1.679,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7814727097193264e-05,
|
|
"loss": 1.4426,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7809141048696453e-05,
|
|
"loss": 1.6739,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.780354874787781e-05,
|
|
"loss": 1.7426,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7797950199214768e-05,
|
|
"loss": 1.5771,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.7792345407189753e-05,
|
|
"loss": 1.4997,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.7786734376290196e-05,
|
|
"loss": 1.7565,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.7781117111008518e-05,
|
|
"loss": 1.6297,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.777549361584214e-05,
|
|
"loss": 1.653,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.776986389529346e-05,
|
|
"loss": 1.6453,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.7764227953869862e-05,
|
|
"loss": 1.482,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.7758585796083724e-05,
|
|
"loss": 1.4083,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.7752937426452383e-05,
|
|
"loss": 1.6919,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.7747282849498165e-05,
|
|
"loss": 1.6055,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.7741622069748352e-05,
|
|
"loss": 1.4426,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.77359550917352e-05,
|
|
"loss": 1.5279,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.773028191999593e-05,
|
|
"loss": 1.3404,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.7724602559072718e-05,
|
|
"loss": 1.4397,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.771891701351269e-05,
|
|
"loss": 1.4989,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7713225287867935e-05,
|
|
"loss": 1.6979,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7707527386695484e-05,
|
|
"loss": 1.8412,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7701823314557306e-05,
|
|
"loss": 1.8489,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7696113076020327e-05,
|
|
"loss": 1.7091,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.769039667565639e-05,
|
|
"loss": 1.574,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7684674118042294e-05,
|
|
"loss": 1.7657,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7678945407759747e-05,
|
|
"loss": 1.5477,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7673210549395398e-05,
|
|
"loss": 1.5278,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7667469547540805e-05,
|
|
"loss": 1.6385,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7661722406792456e-05,
|
|
"loss": 1.6477,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7655969131751747e-05,
|
|
"loss": 1.4878,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.7650209727024994e-05,
|
|
"loss": 1.4805,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.7644444197223413e-05,
|
|
"loss": 1.6373,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.763867254696312e-05,
|
|
"loss": 1.4578,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.7632894780865148e-05,
|
|
"loss": 1.7319,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.7627110903555405e-05,
|
|
"loss": 1.7094,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.762132091966471e-05,
|
|
"loss": 1.5167,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.761552483382876e-05,
|
|
"loss": 1.6813,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.7609722650688147e-05,
|
|
"loss": 1.5909,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.7603914374888333e-05,
|
|
"loss": 1.6023,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.7598100011079665e-05,
|
|
"loss": 1.5122,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.759227956391737e-05,
|
|
"loss": 1.8068,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.758645303806153e-05,
|
|
"loss": 1.6791,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.7580620438177107e-05,
|
|
"loss": 1.6497,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.7574781768933926e-05,
|
|
"loss": 1.8648,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.756893703500666e-05,
|
|
"loss": 1.4595,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.7563086241074853e-05,
|
|
"loss": 1.4459,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.7557229391822884e-05,
|
|
"loss": 1.6123,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.7551366491939995e-05,
|
|
"loss": 1.4214,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.7545497546120266e-05,
|
|
"loss": 1.5038,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.753962255906262e-05,
|
|
"loss": 1.7372,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.753374153547081e-05,
|
|
"loss": 1.6419,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.7527854480053435e-05,
|
|
"loss": 1.7121,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.7521961397523905e-05,
|
|
"loss": 1.5493,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.7516062292600473e-05,
|
|
"loss": 1.4837,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.7510157170006204e-05,
|
|
"loss": 1.5158,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7504246034468986e-05,
|
|
"loss": 1.5718,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7498328890721516e-05,
|
|
"loss": 1.8148,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.749240574350131e-05,
|
|
"loss": 1.5478,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7486476597550676e-05,
|
|
"loss": 1.5994,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7480541457616745e-05,
|
|
"loss": 1.6989,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7474600328451425e-05,
|
|
"loss": 1.6853,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7468653214811437e-05,
|
|
"loss": 1.6673,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.746270012145829e-05,
|
|
"loss": 1.4969,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7456741053158264e-05,
|
|
"loss": 1.2711,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7450776014682445e-05,
|
|
"loss": 1.8335,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7444805010806693e-05,
|
|
"loss": 1.5655,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7438828046311635e-05,
|
|
"loss": 1.7225,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7432845125982676e-05,
|
|
"loss": 1.701,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7426856254609993e-05,
|
|
"loss": 1.6989,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7420861436988527e-05,
|
|
"loss": 1.557,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.741486067791797e-05,
|
|
"loss": 1.8976,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7408853982202784e-05,
|
|
"loss": 1.6633,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7402841354652174e-05,
|
|
"loss": 1.6968,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7396822800080104e-05,
|
|
"loss": 1.5893,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7390798323305277e-05,
|
|
"loss": 1.4468,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7384767929151138e-05,
|
|
"loss": 1.7966,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7378731622445863e-05,
|
|
"loss": 1.7174,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.737268940802238e-05,
|
|
"loss": 1.6591,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7366641290718333e-05,
|
|
"loss": 1.4726,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7360587275376092e-05,
|
|
"loss": 1.5904,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7354527366842755e-05,
|
|
"loss": 1.3801,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7348461569970135e-05,
|
|
"loss": 1.4843,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7342389889614762e-05,
|
|
"loss": 1.5991,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7336312330637873e-05,
|
|
"loss": 1.5462,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7330228897905416e-05,
|
|
"loss": 1.4646,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7324139596288033e-05,
|
|
"loss": 1.4415,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7318044430661078e-05,
|
|
"loss": 1.7267,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.731194340590459e-05,
|
|
"loss": 1.4259,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.73058365269033e-05,
|
|
"loss": 1.5303,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7299723798546636e-05,
|
|
"loss": 1.7183,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7293605225728697e-05,
|
|
"loss": 1.5722,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.728748081334826e-05,
|
|
"loss": 1.4289,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.7281350566308793e-05,
|
|
"loss": 1.5786,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.7275214489518418e-05,
|
|
"loss": 1.7584,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.726907258788994e-05,
|
|
"loss": 1.5112,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.7262924866340812e-05,
|
|
"loss": 1.6238,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.7256771329793164e-05,
|
|
"loss": 1.6647,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.725061198317376e-05,
|
|
"loss": 1.7252,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.7244446831414035e-05,
|
|
"loss": 1.5492,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.723827587945007e-05,
|
|
"loss": 1.4404,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.7232099132222573e-05,
|
|
"loss": 1.8804,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.7225916594676914e-05,
|
|
"loss": 1.3384,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.721972827176308e-05,
|
|
"loss": 1.5001,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.7213534168435707e-05,
|
|
"loss": 1.4629,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.7207334289654046e-05,
|
|
"loss": 1.5236,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7201128640381977e-05,
|
|
"loss": 1.774,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7194917225588e-05,
|
|
"loss": 1.5131,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7188700050245236e-05,
|
|
"loss": 1.6493,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7182477119331405e-05,
|
|
"loss": 1.5789,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7176248437828845e-05,
|
|
"loss": 1.6965,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7170014010724503e-05,
|
|
"loss": 1.4912,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7163773843009913e-05,
|
|
"loss": 1.3074,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7157527939681215e-05,
|
|
"loss": 1.6447,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7151276305739135e-05,
|
|
"loss": 1.5039,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7145018946188994e-05,
|
|
"loss": 1.2581,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.713875586604069e-05,
|
|
"loss": 1.8388,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.7132487070308705e-05,
|
|
"loss": 1.4898,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.7126212564012102e-05,
|
|
"loss": 1.5612,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.71199323521745e-05,
|
|
"loss": 1.4623,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.7113646439824105e-05,
|
|
"loss": 1.2464,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.710735483199368e-05,
|
|
"loss": 1.5896,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.710105753372054e-05,
|
|
"loss": 1.6267,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.7094754550046562e-05,
|
|
"loss": 1.5345,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.7088445886018184e-05,
|
|
"loss": 1.7219,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.708213154668638e-05,
|
|
"loss": 1.5236,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.7075811537106672e-05,
|
|
"loss": 1.7433,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.7069485862339123e-05,
|
|
"loss": 1.6493,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.7063154527448325e-05,
|
|
"loss": 1.3767,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.705681753750341e-05,
|
|
"loss": 1.6052,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.7050474897578037e-05,
|
|
"loss": 1.7966,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.7044126612750386e-05,
|
|
"loss": 1.6523,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.703777268810315e-05,
|
|
"loss": 1.7448,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.703141312872355e-05,
|
|
"loss": 1.4933,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.7025047939703318e-05,
|
|
"loss": 1.5628,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.7018677126138673e-05,
|
|
"loss": 1.5789,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.7012300693130367e-05,
|
|
"loss": 1.6377,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.700591864578363e-05,
|
|
"loss": 1.5115,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.6999530989208194e-05,
|
|
"loss": 1.5945,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.6993137728518282e-05,
|
|
"loss": 1.5008,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.6986738868832602e-05,
|
|
"loss": 1.4278,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.698033441527435e-05,
|
|
"loss": 1.7068,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.6973924372971188e-05,
|
|
"loss": 1.5433,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.696750874705527e-05,
|
|
"loss": 1.5216,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.696108754266321e-05,
|
|
"loss": 1.5637,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.6954660764936094e-05,
|
|
"loss": 1.6202,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.6948228419019455e-05,
|
|
"loss": 1.5721,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.694179051006331e-05,
|
|
"loss": 1.5462,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.69353470432221e-05,
|
|
"loss": 1.6751,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.6928898023654745e-05,
|
|
"loss": 1.4845,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.6922443456524592e-05,
|
|
"loss": 1.422,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.6915983346999433e-05,
|
|
"loss": 1.551,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.69095177002515e-05,
|
|
"loss": 1.689,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.6903046521457463e-05,
|
|
"loss": 1.9211,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.689656981579841e-05,
|
|
"loss": 1.4598,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.689008758845986e-05,
|
|
"loss": 1.4382,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.6883599844631754e-05,
|
|
"loss": 1.4713,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.6877106589508447e-05,
|
|
"loss": 1.5432,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.6870607828288714e-05,
|
|
"loss": 1.6671,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.6864103566175726e-05,
|
|
"loss": 1.4679,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.6857593808377067e-05,
|
|
"loss": 1.33,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.685107856010472e-05,
|
|
"loss": 1.6527,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.684455782657506e-05,
|
|
"loss": 1.4477,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.6838031613008856e-05,
|
|
"loss": 1.4373,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.6831499924631266e-05,
|
|
"loss": 1.6999,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.6824962766671837e-05,
|
|
"loss": 1.3948,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.6818420144364477e-05,
|
|
"loss": 1.2561,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.6811872062947488e-05,
|
|
"loss": 1.4545,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.680531852766353e-05,
|
|
"loss": 1.4157,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.6798759543759645e-05,
|
|
"loss": 1.6461,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.6792195116487215e-05,
|
|
"loss": 1.6739,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.6785625251101998e-05,
|
|
"loss": 1.5258,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.67790499528641e-05,
|
|
"loss": 1.6338,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.6772469227037977e-05,
|
|
"loss": 1.6566,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.676588307889243e-05,
|
|
"loss": 1.3663,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.6759291513700606e-05,
|
|
"loss": 1.705,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.675269453673998e-05,
|
|
"loss": 1.7285,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.6746092153292367e-05,
|
|
"loss": 1.4598,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.6739484368643908e-05,
|
|
"loss": 1.6212,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6732871188085073e-05,
|
|
"loss": 1.7168,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6726252616910645e-05,
|
|
"loss": 1.6044,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6719628660419722e-05,
|
|
"loss": 1.6037,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6712999323915727e-05,
|
|
"loss": 1.7058,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6706364612706373e-05,
|
|
"loss": 1.4652,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.669972453210369e-05,
|
|
"loss": 1.4048,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6693079087424002e-05,
|
|
"loss": 1.6143,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6686428283987924e-05,
|
|
"loss": 1.6847,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.667977212712037e-05,
|
|
"loss": 1.4045,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6673110622150527e-05,
|
|
"loss": 1.5121,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6666443774411874e-05,
|
|
"loss": 1.7612,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6659771589242173e-05,
|
|
"loss": 1.414,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.6653094071983443e-05,
|
|
"loss": 1.3927,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.6646411227981985e-05,
|
|
"loss": 1.691,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.663972306258836e-05,
|
|
"loss": 1.6058,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.663302958115739e-05,
|
|
"loss": 1.2803,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.6626330789048154e-05,
|
|
"loss": 1.7933,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.6619626691623984e-05,
|
|
"loss": 1.9921,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.6612917294252454e-05,
|
|
"loss": 1.4484,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.6606202602305392e-05,
|
|
"loss": 1.5338,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.659948262115885e-05,
|
|
"loss": 1.5213,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.6592757356193127e-05,
|
|
"loss": 1.6847,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.6586026812792756e-05,
|
|
"loss": 1.3978,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.6579290996346477e-05,
|
|
"loss": 1.5734,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 1.657254991224727e-05,
|
|
"loss": 1.6896,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6565803565892325e-05,
|
|
"loss": 1.547,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6559051962683046e-05,
|
|
"loss": 1.4645,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6552295108025046e-05,
|
|
"loss": 1.6257,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6545533007328146e-05,
|
|
"loss": 1.7532,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6538765666006363e-05,
|
|
"loss": 1.538,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6531993089477905e-05,
|
|
"loss": 1.461,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6525215283165184e-05,
|
|
"loss": 1.505,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.651843225249479e-05,
|
|
"loss": 1.7795,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.65116440028975e-05,
|
|
"loss": 1.6337,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6504850539808265e-05,
|
|
"loss": 1.9206,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6498051868666216e-05,
|
|
"loss": 1.4196,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.6491247994914648e-05,
|
|
"loss": 1.4504,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.6484438924001023e-05,
|
|
"loss": 1.6497,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.647762466137697e-05,
|
|
"loss": 1.3986,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.6470805212498268e-05,
|
|
"loss": 1.4357,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.6463980582824847e-05,
|
|
"loss": 1.6155,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.6457150777820793e-05,
|
|
"loss": 1.497,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.645031580295433e-05,
|
|
"loss": 1.4838,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.644347566369782e-05,
|
|
"loss": 1.5368,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.643663036552776e-05,
|
|
"loss": 1.6863,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.6429779913924785e-05,
|
|
"loss": 1.5809,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.642292431437365e-05,
|
|
"loss": 1.7188,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.6416063572363224e-05,
|
|
"loss": 1.5265,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.640919769338651e-05,
|
|
"loss": 1.6861,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 1.6402326682940607e-05,
|
|
"loss": 1.5855,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.639545054652674e-05,
|
|
"loss": 1.3991,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6388569289650224e-05,
|
|
"loss": 1.5891,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6381682917820477e-05,
|
|
"loss": 1.6768,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6374791436551016e-05,
|
|
"loss": 1.6618,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6367894851359446e-05,
|
|
"loss": 1.5693,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6360993167767458e-05,
|
|
"loss": 1.5513,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6354086391300824e-05,
|
|
"loss": 1.4719,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6347174527489396e-05,
|
|
"loss": 1.4088,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6340257581867104e-05,
|
|
"loss": 1.4769,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6333335559971934e-05,
|
|
"loss": 1.5257,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6326408467345948e-05,
|
|
"loss": 1.5617,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.6319476309535258e-05,
|
|
"loss": 1.3983,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.6312539092090046e-05,
|
|
"loss": 1.569,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.6305596820564527e-05,
|
|
"loss": 1.7293,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.6298649500516974e-05,
|
|
"loss": 1.5675,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.62916971375097e-05,
|
|
"loss": 1.4498,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.628473973710906e-05,
|
|
"loss": 1.6463,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.6277777304885426e-05,
|
|
"loss": 1.5379,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.6270809846413212e-05,
|
|
"loss": 1.5979,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.6263837367270863e-05,
|
|
"loss": 1.4492,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.6256859873040828e-05,
|
|
"loss": 1.7799,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.6249877369309576e-05,
|
|
"loss": 1.5914,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.624288986166759e-05,
|
|
"loss": 1.5671,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 1.6235897355709365e-05,
|
|
"loss": 1.8066,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.622889985703338e-05,
|
|
"loss": 1.7329,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.6221897371242133e-05,
|
|
"loss": 1.5895,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.6214889903942092e-05,
|
|
"loss": 1.4323,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.620787746074374e-05,
|
|
"loss": 1.4184,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.6200860047261518e-05,
|
|
"loss": 1.7059,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.619383766911386e-05,
|
|
"loss": 1.6959,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.618681033192318e-05,
|
|
"loss": 1.5266,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.6179778041315845e-05,
|
|
"loss": 1.7495,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.6172740802922207e-05,
|
|
"loss": 1.5334,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.6165698622376565e-05,
|
|
"loss": 1.3993,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.615865150531718e-05,
|
|
"loss": 1.2997,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.6151599457386272e-05,
|
|
"loss": 1.6633,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.614454248423e-05,
|
|
"loss": 1.4993,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6137480591498463e-05,
|
|
"loss": 1.5129,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6130413784845716e-05,
|
|
"loss": 1.5654,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.612334206992973e-05,
|
|
"loss": 1.5716,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6116265452412407e-05,
|
|
"loss": 1.3112,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6109183937959595e-05,
|
|
"loss": 1.4747,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6102097532241035e-05,
|
|
"loss": 1.6585,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6095006240930405e-05,
|
|
"loss": 1.451,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.608791006970528e-05,
|
|
"loss": 1.6996,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6080809024247153e-05,
|
|
"loss": 1.7244,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.607370311024142e-05,
|
|
"loss": 1.6903,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6066592333377356e-05,
|
|
"loss": 1.4892,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6059476699348157e-05,
|
|
"loss": 1.4246,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.6052356213850888e-05,
|
|
"loss": 1.7428,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.6045230882586507e-05,
|
|
"loss": 1.5679,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.603810071125985e-05,
|
|
"loss": 1.556,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.603096570557962e-05,
|
|
"loss": 1.5972,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.602382587125841e-05,
|
|
"loss": 1.4578,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.6016681214012656e-05,
|
|
"loss": 1.7046,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.6009531739562675e-05,
|
|
"loss": 1.6585,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.600237745363263e-05,
|
|
"loss": 1.38,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.5995218361950536e-05,
|
|
"loss": 1.5158,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.598805447024826e-05,
|
|
"loss": 1.3433,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.5980885784261514e-05,
|
|
"loss": 1.5962,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.5973712309729837e-05,
|
|
"loss": 1.5837,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5966534052396618e-05,
|
|
"loss": 1.6336,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5959351018009063e-05,
|
|
"loss": 1.6838,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.595216321231821e-05,
|
|
"loss": 1.6078,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5944970641078907e-05,
|
|
"loss": 1.4486,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5937773310049832e-05,
|
|
"loss": 1.3104,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5930571224993462e-05,
|
|
"loss": 1.5935,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5923364391676087e-05,
|
|
"loss": 1.5921,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5916152815867794e-05,
|
|
"loss": 1.4842,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5908936503342473e-05,
|
|
"loss": 1.3898,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5901715459877804e-05,
|
|
"loss": 1.2668,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5894489691255245e-05,
|
|
"loss": 1.4387,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.588725920326005e-05,
|
|
"loss": 1.2488,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.5880024001681248e-05,
|
|
"loss": 1.3228,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.5872784092311647e-05,
|
|
"loss": 1.5497,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.586553948094781e-05,
|
|
"loss": 1.3823,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.5858290173390073e-05,
|
|
"loss": 1.3546,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.5851036175442536e-05,
|
|
"loss": 1.5244,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.584377749291305e-05,
|
|
"loss": 1.4045,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.583651413161321e-05,
|
|
"loss": 1.3409,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.5829246097358377e-05,
|
|
"loss": 1.6047,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.582197339596763e-05,
|
|
"loss": 1.3744,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.58146960332638e-05,
|
|
"loss": 1.5978,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.5807414015073438e-05,
|
|
"loss": 1.6809,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.580012734722684e-05,
|
|
"loss": 1.3381,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1.5792836035558003e-05,
|
|
"loss": 1.6233,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.578554008590466e-05,
|
|
"loss": 1.3421,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.5778239504108254e-05,
|
|
"loss": 1.4599,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.5770934296013923e-05,
|
|
"loss": 1.259,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.576362446747053e-05,
|
|
"loss": 1.2823,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.575631002433062e-05,
|
|
"loss": 1.4537,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.574899097245044e-05,
|
|
"loss": 1.1656,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.5741667317689927e-05,
|
|
"loss": 1.3568,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.57343390659127e-05,
|
|
"loss": 1.6016,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.5727006222986062e-05,
|
|
"loss": 1.6304,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.5719668794780996e-05,
|
|
"loss": 1.5371,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.5712326787172143e-05,
|
|
"loss": 1.4653,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 1.570498020603782e-05,
|
|
"loss": 1.1719,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.5697629057260013e-05,
|
|
"loss": 1.2864,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.569027334672435e-05,
|
|
"loss": 1.4829,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.568291308032011e-05,
|
|
"loss": 1.1393,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.5675548263940237e-05,
|
|
"loss": 1.1904,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.566817890348131e-05,
|
|
"loss": 1.3741,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.5660805004843536e-05,
|
|
"loss": 1.3951,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.5653426573930768e-05,
|
|
"loss": 1.1888,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.5646043616650485e-05,
|
|
"loss": 1.3707,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.5638656138913786e-05,
|
|
"loss": 1.2685,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.563126414663539e-05,
|
|
"loss": 1.5302,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.5623867645733634e-05,
|
|
"loss": 1.3159,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.561646664213047e-05,
|
|
"loss": 1.2943,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 1.5609061141751434e-05,
|
|
"loss": 1.4176,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.5601651150525686e-05,
|
|
"loss": 1.5295,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.559423667438597e-05,
|
|
"loss": 1.5309,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.5586817719268626e-05,
|
|
"loss": 1.2331,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.5579394291113568e-05,
|
|
"loss": 1.3654,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.557196639586431e-05,
|
|
"loss": 1.3621,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.5564534039467923e-05,
|
|
"loss": 1.2631,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.5557097227875067e-05,
|
|
"loss": 1.4423,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.554965596703996e-05,
|
|
"loss": 1.534,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.5542210262920385e-05,
|
|
"loss": 1.4085,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.5534760121477677e-05,
|
|
"loss": 1.4314,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.5527305548676728e-05,
|
|
"loss": 1.6163,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 1.551984655048598e-05,
|
|
"loss": 1.4961,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.5512383132877417e-05,
|
|
"loss": 1.4134,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.5504915301826556e-05,
|
|
"loss": 1.3585,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.549744306331246e-05,
|
|
"loss": 1.354,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.5489966423317705e-05,
|
|
"loss": 1.4319,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.54824853878284e-05,
|
|
"loss": 1.3654,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.547499996283417e-05,
|
|
"loss": 1.2821,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.5467510154328164e-05,
|
|
"loss": 1.1616,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.5460015968307027e-05,
|
|
"loss": 1.5458,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.5452517410770916e-05,
|
|
"loss": 1.3442,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.5445014487723486e-05,
|
|
"loss": 1.2765,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.5437507205171893e-05,
|
|
"loss": 1.4692,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 1.5429995569126773e-05,
|
|
"loss": 1.2705,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.542247958560225e-05,
|
|
"loss": 1.2312,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.5414959260615943e-05,
|
|
"loss": 1.4155,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.5407434600188922e-05,
|
|
"loss": 1.5061,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.5399905610345757e-05,
|
|
"loss": 1.373,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.5392372297114454e-05,
|
|
"loss": 1.3039,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.5384834666526508e-05,
|
|
"loss": 1.4292,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.537729272461685e-05,
|
|
"loss": 1.0857,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.5369746477423872e-05,
|
|
"loss": 1.2835,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.5362195930989416e-05,
|
|
"loss": 1.3203,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.535464109135876e-05,
|
|
"loss": 1.3036,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.534708196458062e-05,
|
|
"loss": 1.4433,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.533951855670715e-05,
|
|
"loss": 1.3049,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 1.5331950873793928e-05,
|
|
"loss": 1.3035,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.532437892189995e-05,
|
|
"loss": 1.3476,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.531680270708764e-05,
|
|
"loss": 1.3325,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.530922223542282e-05,
|
|
"loss": 1.2586,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.5301637512974737e-05,
|
|
"loss": 1.3626,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.5294048545816038e-05,
|
|
"loss": 1.5196,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.5286455340022756e-05,
|
|
"loss": 1.3395,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.5278857901674324e-05,
|
|
"loss": 1.4167,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.5271256236853577e-05,
|
|
"loss": 1.4593,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.5263650351646715e-05,
|
|
"loss": 1.4279,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.5256040252143326e-05,
|
|
"loss": 1.5522,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.5248425944436376e-05,
|
|
"loss": 1.4674,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 1.5240807434622184e-05,
|
|
"loss": 1.1478,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.5233184728800454e-05,
|
|
"loss": 1.3981,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.522555783307424e-05,
|
|
"loss": 1.2276,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.5217926753549946e-05,
|
|
"loss": 1.3408,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.521029149633733e-05,
|
|
"loss": 1.3207,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.52026520675495e-05,
|
|
"loss": 1.2482,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.51950084733029e-05,
|
|
"loss": 1.4515,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.51873607197173e-05,
|
|
"loss": 1.2237,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.5179708812915817e-05,
|
|
"loss": 1.242,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.5172052759024878e-05,
|
|
"loss": 1.6046,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.516439256417424e-05,
|
|
"loss": 1.2874,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.5156728234496974e-05,
|
|
"loss": 1.3001,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 1.5149059776129457e-05,
|
|
"loss": 1.486,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5141387195211377e-05,
|
|
"loss": 1.6148,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5133710497885719e-05,
|
|
"loss": 1.3662,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5126029690298762e-05,
|
|
"loss": 1.2857,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.511834477860008e-05,
|
|
"loss": 1.5284,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5110655768942536e-05,
|
|
"loss": 1.4389,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5102962667482266e-05,
|
|
"loss": 1.5458,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5095265480378687e-05,
|
|
"loss": 1.1502,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5087564213794486e-05,
|
|
"loss": 1.2203,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5079858873895612e-05,
|
|
"loss": 1.0416,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5072149466851284e-05,
|
|
"loss": 1.3265,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5064435998833972e-05,
|
|
"loss": 1.4583,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5056718476019398e-05,
|
|
"loss": 1.2747,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 1.5048996904586525e-05,
|
|
"loss": 1.4742,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.5041271290717573e-05,
|
|
"loss": 1.2944,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.5033541640597982e-05,
|
|
"loss": 1.3328,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.502580796041643e-05,
|
|
"loss": 1.2349,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.5018070256364822e-05,
|
|
"loss": 1.4199,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.501032853463829e-05,
|
|
"loss": 1.4693,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.5002582801435168e-05,
|
|
"loss": 1.4208,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.4994833062957014e-05,
|
|
"loss": 1.5243,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.4987079325408598e-05,
|
|
"loss": 1.468,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.4979321594997873e-05,
|
|
"loss": 1.4502,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.4971559877936004e-05,
|
|
"loss": 1.5123,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.496379418043734e-05,
|
|
"loss": 1.4094,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 1.4956024508719422e-05,
|
|
"loss": 1.484,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.4948250869002973e-05,
|
|
"loss": 1.447,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.494047326751189e-05,
|
|
"loss": 1.4323,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.4932691710473242e-05,
|
|
"loss": 1.1701,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.4924906204117264e-05,
|
|
"loss": 1.3654,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.491711675467736e-05,
|
|
"loss": 1.3315,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.4909323368390083e-05,
|
|
"loss": 1.4096,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.4901526051495138e-05,
|
|
"loss": 1.4771,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.4893724810235388e-05,
|
|
"loss": 1.2899,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.488591965085682e-05,
|
|
"loss": 1.2806,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.4878110579608574e-05,
|
|
"loss": 1.5297,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.4870297602742915e-05,
|
|
"loss": 1.3678,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 1.4862480726515238e-05,
|
|
"loss": 1.2538,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4854659957184052e-05,
|
|
"loss": 1.738,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4846835301010995e-05,
|
|
"loss": 1.295,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4839006764260806e-05,
|
|
"loss": 1.3813,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4831174353201341e-05,
|
|
"loss": 1.3276,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4823338074103546e-05,
|
|
"loss": 1.5628,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4815497933241478e-05,
|
|
"loss": 1.3783,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4807653936892274e-05,
|
|
"loss": 1.3739,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4799806091336164e-05,
|
|
"loss": 1.728,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4791954402856458e-05,
|
|
"loss": 1.4756,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4784098877739544e-05,
|
|
"loss": 1.2754,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.477623952227488e-05,
|
|
"loss": 1.5261,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.4768376342754989e-05,
|
|
"loss": 1.361,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 1.476050934547546e-05,
|
|
"loss": 1.3097,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.475263853673494e-05,
|
|
"loss": 1.4778,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4744763922835122e-05,
|
|
"loss": 1.3131,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4736885510080747e-05,
|
|
"loss": 1.4416,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4729003304779604e-05,
|
|
"loss": 1.4844,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4721117313242503e-05,
|
|
"loss": 1.3036,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4713227541783304e-05,
|
|
"loss": 1.33,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4705333996718882e-05,
|
|
"loss": 1.2134,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4697436684369138e-05,
|
|
"loss": 1.5081,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4689535611056984e-05,
|
|
"loss": 1.3803,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4681630783108347e-05,
|
|
"loss": 1.6432,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4673722206852166e-05,
|
|
"loss": 1.4073,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 1.4665809888620368e-05,
|
|
"loss": 1.2309,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4657893834747883e-05,
|
|
"loss": 1.2942,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4649974051572635e-05,
|
|
"loss": 1.464,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.464205054543553e-05,
|
|
"loss": 1.1867,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4634123322680451e-05,
|
|
"loss": 1.3862,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4626192389654265e-05,
|
|
"loss": 1.312,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4618257752706809e-05,
|
|
"loss": 1.2966,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4610319418190874e-05,
|
|
"loss": 1.28,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4602377392462223e-05,
|
|
"loss": 1.3463,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4594431681879571e-05,
|
|
"loss": 1.3479,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4586482292804584e-05,
|
|
"loss": 1.6344,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.457852923160187e-05,
|
|
"loss": 1.2699,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 1.4570572504638985e-05,
|
|
"loss": 1.4873,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4562612118286406e-05,
|
|
"loss": 1.4436,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4554648078917552e-05,
|
|
"loss": 1.3016,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4546680392908763e-05,
|
|
"loss": 1.1707,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4538709066639299e-05,
|
|
"loss": 1.3997,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4530734106491333e-05,
|
|
"loss": 1.5978,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4522755518849947e-05,
|
|
"loss": 1.378,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4514773310103132e-05,
|
|
"loss": 1.5168,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4506787486641774e-05,
|
|
"loss": 1.4781,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4498798054859653e-05,
|
|
"loss": 1.1946,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4490805021153437e-05,
|
|
"loss": 1.5544,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4482808391922687e-05,
|
|
"loss": 1.3663,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 1.4474808173569827e-05,
|
|
"loss": 1.3812,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.446680437250017e-05,
|
|
"loss": 1.3658,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.445879699512189e-05,
|
|
"loss": 1.2963,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.4450786047846019e-05,
|
|
"loss": 1.343,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.444277153708646e-05,
|
|
"loss": 1.2628,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.4434753469259957e-05,
|
|
"loss": 1.3534,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.4426731850786113e-05,
|
|
"loss": 1.3491,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.4418706688087365e-05,
|
|
"loss": 1.3512,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.4410677987588988e-05,
|
|
"loss": 1.3783,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.4402645755719097e-05,
|
|
"loss": 1.41,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.4394609998908629e-05,
|
|
"loss": 1.411,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.438657072359134e-05,
|
|
"loss": 1.5563,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.437852793620381e-05,
|
|
"loss": 1.1971,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 1.4370481643185428e-05,
|
|
"loss": 1.3309,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.436243185097839e-05,
|
|
"loss": 1.3271,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.4354378566027688e-05,
|
|
"loss": 1.4036,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.4346321794781117e-05,
|
|
"loss": 1.3649,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.433826154368927e-05,
|
|
"loss": 1.5008,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.433019781920551e-05,
|
|
"loss": 1.3452,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.4322130627785989e-05,
|
|
"loss": 1.4657,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.4314059975889635e-05,
|
|
"loss": 1.5981,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.4305985869978152e-05,
|
|
"loss": 1.4665,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.4297908316515998e-05,
|
|
"loss": 1.4115,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.42898273219704e-05,
|
|
"loss": 1.4655,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.4281742892811332e-05,
|
|
"loss": 1.5185,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 1.4273655035511528e-05,
|
|
"loss": 1.4752,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4265563756546459e-05,
|
|
"loss": 1.3547,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.425746906239434e-05,
|
|
"loss": 1.3683,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4249370959536118e-05,
|
|
"loss": 1.526,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4241269454455469e-05,
|
|
"loss": 1.4236,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4233164553638789e-05,
|
|
"loss": 1.2895,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4225056263575208e-05,
|
|
"loss": 1.3244,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4216944590756549e-05,
|
|
"loss": 1.4082,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4208829541677358e-05,
|
|
"loss": 1.3477,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4200711122834881e-05,
|
|
"loss": 1.3194,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4192589340729058e-05,
|
|
"loss": 1.343,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.418446420186252e-05,
|
|
"loss": 1.1944,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 1.4176335712740596e-05,
|
|
"loss": 1.4211,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4168203879871292e-05,
|
|
"loss": 1.4247,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4160068709765286e-05,
|
|
"loss": 1.3206,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4151930208935933e-05,
|
|
"loss": 1.2663,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4143788383899252e-05,
|
|
"loss": 1.1434,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4135643241173936e-05,
|
|
"loss": 1.5125,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4127494787281309e-05,
|
|
"loss": 1.3421,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4119343028745369e-05,
|
|
"loss": 1.4453,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4111187972092753e-05,
|
|
"loss": 1.2799,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4103029623852728e-05,
|
|
"loss": 1.4159,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4094867990557214e-05,
|
|
"loss": 1.3469,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4086703078740754e-05,
|
|
"loss": 1.1904,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.407853489494051e-05,
|
|
"loss": 1.1196,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 1.4070363445696269e-05,
|
|
"loss": 1.5459,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.4062188737550428e-05,
|
|
"loss": 1.3647,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.4054010777048009e-05,
|
|
"loss": 1.2955,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.4045829570736613e-05,
|
|
"loss": 1.1781,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.4037645125166459e-05,
|
|
"loss": 1.5709,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.4029457446890354e-05,
|
|
"loss": 1.1287,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.4021266542463687e-05,
|
|
"loss": 1.444,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.4013072418444442e-05,
|
|
"loss": 1.2879,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.400487508139317e-05,
|
|
"loss": 1.3374,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.3996674537873001e-05,
|
|
"loss": 1.1921,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.3988470794449626e-05,
|
|
"loss": 1.3943,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.3980263857691305e-05,
|
|
"loss": 1.3007,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 1.397205373416885e-05,
|
|
"loss": 1.5504,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3963840430455631e-05,
|
|
"loss": 1.1766,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3955623953127554e-05,
|
|
"loss": 1.1733,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3947404308763071e-05,
|
|
"loss": 1.5159,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3939181503943172e-05,
|
|
"loss": 1.4738,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3930955545251372e-05,
|
|
"loss": 1.2234,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3922726439273717e-05,
|
|
"loss": 1.3908,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3914494192598772e-05,
|
|
"loss": 1.2859,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3906258811817613e-05,
|
|
"loss": 1.1726,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3898020303523822e-05,
|
|
"loss": 1.1625,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3889778674313497e-05,
|
|
"loss": 1.3615,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3881533930785227e-05,
|
|
"loss": 1.3425,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 1.3873286079540092e-05,
|
|
"loss": 1.4091,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3865035127181666e-05,
|
|
"loss": 1.2977,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3856781080316e-05,
|
|
"loss": 1.4668,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3848523945551628e-05,
|
|
"loss": 1.1731,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3840263729499557e-05,
|
|
"loss": 1.431,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3832000438773252e-05,
|
|
"loss": 1.3746,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3823734079988651e-05,
|
|
"loss": 1.3419,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3815464659764142e-05,
|
|
"loss": 1.2946,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3807192184720564e-05,
|
|
"loss": 1.4345,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3798916661481203e-05,
|
|
"loss": 1.4758,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.379063809667179e-05,
|
|
"loss": 1.466,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3782356496920481e-05,
|
|
"loss": 1.542,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.377407186885787e-05,
|
|
"loss": 1.2993,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 1.3765784219116975e-05,
|
|
"loss": 1.1287,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3757493554333232e-05,
|
|
"loss": 1.4296,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3749199881144488e-05,
|
|
"loss": 1.3555,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3740903206191005e-05,
|
|
"loss": 1.1915,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3732603536115441e-05,
|
|
"loss": 1.3514,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3724300877562856e-05,
|
|
"loss": 1.5251,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3715995237180704e-05,
|
|
"loss": 1.2751,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3707686621618826e-05,
|
|
"loss": 1.4912,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3699375037529441e-05,
|
|
"loss": 1.3438,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3691060491567147e-05,
|
|
"loss": 1.4048,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.368274299038892e-05,
|
|
"loss": 1.3261,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3674422540654087e-05,
|
|
"loss": 1.2173,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 1.3666099149024353e-05,
|
|
"loss": 1.5061,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.3657772822163762e-05,
|
|
"loss": 1.5242,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.3649443566738725e-05,
|
|
"loss": 1.378,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.364111138941798e-05,
|
|
"loss": 1.3816,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.363277629687262e-05,
|
|
"loss": 1.2557,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.3624438295776065e-05,
|
|
"loss": 1.1901,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.3616097392804064e-05,
|
|
"loss": 1.25,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.3607753594634686e-05,
|
|
"loss": 1.5553,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.3599406907948323e-05,
|
|
"loss": 1.1987,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.3591057339427676e-05,
|
|
"loss": 1.3669,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.358270489575776e-05,
|
|
"loss": 1.5365,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.3574349583625886e-05,
|
|
"loss": 1.2655,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 1.3565991409721663e-05,
|
|
"loss": 1.4135,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3557630380736992e-05,
|
|
"loss": 1.4979,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3549266503366054e-05,
|
|
"loss": 1.514,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3540899784305321e-05,
|
|
"loss": 1.5026,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3532530230253533e-05,
|
|
"loss": 1.2502,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3524157847911706e-05,
|
|
"loss": 1.2161,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3515782643983108e-05,
|
|
"loss": 1.4827,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3507404625173276e-05,
|
|
"loss": 1.1685,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3499023798190009e-05,
|
|
"loss": 1.398,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3490640169743334e-05,
|
|
"loss": 1.4503,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.348225374654553e-05,
|
|
"loss": 1.4451,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3473864535311126e-05,
|
|
"loss": 1.3711,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3465472542756856e-05,
|
|
"loss": 1.3776,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 1.3457077775601713e-05,
|
|
"loss": 1.1468,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.3448680240566882e-05,
|
|
"loss": 1.4449,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.344027994437579e-05,
|
|
"loss": 1.6278,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.3431876893754052e-05,
|
|
"loss": 1.3936,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.3423471095429503e-05,
|
|
"loss": 1.2479,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.341506255613217e-05,
|
|
"loss": 1.3661,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.3406651282594285e-05,
|
|
"loss": 1.3819,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.3398237281550255e-05,
|
|
"loss": 1.2895,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.3389820559736676e-05,
|
|
"loss": 1.1686,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.3381401123892333e-05,
|
|
"loss": 1.1607,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.337297898075817e-05,
|
|
"loss": 1.2698,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.3364554137077303e-05,
|
|
"loss": 1.4363,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 1.3356126599595015e-05,
|
|
"loss": 1.3276,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3347696375058737e-05,
|
|
"loss": 1.4119,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3339263470218055e-05,
|
|
"loss": 1.3512,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3330827891824707e-05,
|
|
"loss": 1.4483,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3322389646632563e-05,
|
|
"loss": 1.3127,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3313948741397635e-05,
|
|
"loss": 1.5442,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3305505182878058e-05,
|
|
"loss": 1.5216,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3297058977834096e-05,
|
|
"loss": 1.1778,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.328861013302813e-05,
|
|
"loss": 1.3419,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3280158655224656e-05,
|
|
"loss": 1.4901,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3271704551190276e-05,
|
|
"loss": 1.3854,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.32632478276937e-05,
|
|
"loss": 1.3625,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 1.3254788491505726e-05,
|
|
"loss": 1.1688,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.324632654939925e-05,
|
|
"loss": 1.4073,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3237862008149257e-05,
|
|
"loss": 1.3542,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3229394874532805e-05,
|
|
"loss": 1.4333,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3220925155329037e-05,
|
|
"loss": 1.4013,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3212452857319154e-05,
|
|
"loss": 1.3901,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3203977987286431e-05,
|
|
"loss": 1.4585,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3195500552016204e-05,
|
|
"loss": 1.3536,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3187020558295857e-05,
|
|
"loss": 1.251,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3178538012914825e-05,
|
|
"loss": 1.4833,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3170052922664582e-05,
|
|
"loss": 1.2106,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3161565294338645e-05,
|
|
"loss": 1.4431,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.315307513473256e-05,
|
|
"loss": 1.3413,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 1.3144582450643903e-05,
|
|
"loss": 1.4538,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3136087248872268e-05,
|
|
"loss": 1.348,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.312758953621926e-05,
|
|
"loss": 1.3626,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3119089319488508e-05,
|
|
"loss": 1.0874,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3110586605485632e-05,
|
|
"loss": 1.2894,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3102081401018261e-05,
|
|
"loss": 1.3767,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3093573712896009e-05,
|
|
"loss": 1.3777,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3085063547930487e-05,
|
|
"loss": 1.4207,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3076550912935285e-05,
|
|
"loss": 1.5194,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.306803581472597e-05,
|
|
"loss": 1.2201,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3059518260120086e-05,
|
|
"loss": 1.39,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3050998255937138e-05,
|
|
"loss": 1.1031,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 1.3042475808998594e-05,
|
|
"loss": 1.2231,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.3033950926127877e-05,
|
|
"loss": 1.4202,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.3025423614150361e-05,
|
|
"loss": 1.3133,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.3016893879893368e-05,
|
|
"loss": 1.4693,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.3008361730186154e-05,
|
|
"loss": 1.3915,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.2999827171859916e-05,
|
|
"loss": 1.4321,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.299129021174777e-05,
|
|
"loss": 1.0971,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.2982750856684761e-05,
|
|
"loss": 1.2198,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.297420911350785e-05,
|
|
"loss": 1.262,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.2965664989055915e-05,
|
|
"loss": 1.3038,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.2957118490169731e-05,
|
|
"loss": 1.2564,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.294856962369198e-05,
|
|
"loss": 1.435,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 1.2940018396467238e-05,
|
|
"loss": 1.2157,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2931464815341977e-05,
|
|
"loss": 1.2414,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.292290888716454e-05,
|
|
"loss": 1.4363,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2914350618785164e-05,
|
|
"loss": 1.3435,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.290579001705595e-05,
|
|
"loss": 1.3291,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2897227088830872e-05,
|
|
"loss": 1.4179,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2888661840965765e-05,
|
|
"loss": 1.5886,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2880094280318321e-05,
|
|
"loss": 1.1919,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2871524413748086e-05,
|
|
"loss": 1.2229,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2862952248116444e-05,
|
|
"loss": 1.5276,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2854377790286634e-05,
|
|
"loss": 1.2807,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2845801047123713e-05,
|
|
"loss": 1.5177,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.2837222025494587e-05,
|
|
"loss": 1.4845,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 1.282864073226797e-05,
|
|
"loss": 1.3229,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.28200571743144e-05,
|
|
"loss": 1.6084,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.2811471358506229e-05,
|
|
"loss": 1.1552,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.2802883291717623e-05,
|
|
"loss": 1.3066,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.2794292980824539e-05,
|
|
"loss": 1.3494,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.2785700432704735e-05,
|
|
"loss": 1.5595,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.2777105654237764e-05,
|
|
"loss": 1.2633,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.276850865230496e-05,
|
|
"loss": 1.2219,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.275990943378944e-05,
|
|
"loss": 1.2539,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.2751308005576094e-05,
|
|
"loss": 1.2979,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.274270437455158e-05,
|
|
"loss": 1.4673,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.2734098547604324e-05,
|
|
"loss": 1.4527,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 1.2725490531624506e-05,
|
|
"loss": 1.4201,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.271688033350406e-05,
|
|
"loss": 1.1408,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.270826796013667e-05,
|
|
"loss": 1.5987,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2699653418417755e-05,
|
|
"loss": 1.2848,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2691036715244478e-05,
|
|
"loss": 1.3588,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2682417857515726e-05,
|
|
"loss": 1.2502,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2673796852132112e-05,
|
|
"loss": 1.5378,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2665173705995973e-05,
|
|
"loss": 1.5244,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2656548426011356e-05,
|
|
"loss": 1.3541,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2647921019084022e-05,
|
|
"loss": 1.356,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2639291492121419e-05,
|
|
"loss": 1.3065,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2630659852032712e-05,
|
|
"loss": 1.5785,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 1.2622026105728753e-05,
|
|
"loss": 1.5106,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2613390260122068e-05,
|
|
"loss": 1.3887,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.260475232212688e-05,
|
|
"loss": 1.3105,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2596112298659073e-05,
|
|
"loss": 1.3814,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2587470196636214e-05,
|
|
"loss": 1.2523,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2578826022977524e-05,
|
|
"loss": 1.205,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2570179784603891e-05,
|
|
"loss": 1.7147,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2561531488437849e-05,
|
|
"loss": 1.3481,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.255288114140358e-05,
|
|
"loss": 1.4658,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.254422875042691e-05,
|
|
"loss": 1.5643,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2535574322435309e-05,
|
|
"loss": 1.2003,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2526917864357865e-05,
|
|
"loss": 1.3564,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2518259383125294e-05,
|
|
"loss": 1.4848,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 1.2509598885669938e-05,
|
|
"loss": 1.3381,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.250093637892575e-05,
|
|
"loss": 1.2341,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.2492271869828292e-05,
|
|
"loss": 1.3658,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.2483605365314733e-05,
|
|
"loss": 1.1806,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.2474936872323828e-05,
|
|
"loss": 1.2806,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.2466266397795937e-05,
|
|
"loss": 1.5973,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.2457593948672994e-05,
|
|
"loss": 1.3868,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.2448919531898531e-05,
|
|
"loss": 1.2798,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.244024315441764e-05,
|
|
"loss": 1.7203,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.2431564823176987e-05,
|
|
"loss": 1.2801,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.2422884545124805e-05,
|
|
"loss": 1.5749,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.2414202327210885e-05,
|
|
"loss": 1.4191,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 1.240551817638657e-05,
|
|
"loss": 1.3111,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.2396832099604751e-05,
|
|
"loss": 1.5119,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.238814410381986e-05,
|
|
"loss": 1.4839,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.237945419598787e-05,
|
|
"loss": 1.4956,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.2370762383066274e-05,
|
|
"loss": 1.1764,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.2362068672014103e-05,
|
|
"loss": 1.3471,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.23533730697919e-05,
|
|
"loss": 1.3504,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.2344675583361729e-05,
|
|
"loss": 1.3642,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.2335976219687149e-05,
|
|
"loss": 1.4143,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.2327274985733239e-05,
|
|
"loss": 1.4078,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.2318571888466563e-05,
|
|
"loss": 1.4427,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.2309866934855184e-05,
|
|
"loss": 1.2632,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 1.2301160131868648e-05,
|
|
"loss": 1.2185,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2292451486477982e-05,
|
|
"loss": 1.5336,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2283741005655684e-05,
|
|
"loss": 1.4941,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.227502869637573e-05,
|
|
"loss": 1.1236,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2266314565613557e-05,
|
|
"loss": 1.2926,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2257598620346057e-05,
|
|
"loss": 1.6736,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2248880867551576e-05,
|
|
"loss": 1.6271,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2240161314209911e-05,
|
|
"loss": 1.4419,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2231439967302293e-05,
|
|
"loss": 1.552,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.22227168338114e-05,
|
|
"loss": 1.3393,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2213991920721325e-05,
|
|
"loss": 1.241,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2205265235017603e-05,
|
|
"loss": 1.3234,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 1.2196536783687178e-05,
|
|
"loss": 1.5139,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2187806573718409e-05,
|
|
"loss": 1.4869,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2179074612101064e-05,
|
|
"loss": 1.4606,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2170340905826313e-05,
|
|
"loss": 1.313,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2161605461886721e-05,
|
|
"loss": 1.1431,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.215286828727625e-05,
|
|
"loss": 1.427,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.214412938899024e-05,
|
|
"loss": 1.4114,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.213538877402542e-05,
|
|
"loss": 1.5207,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2126646449379889e-05,
|
|
"loss": 1.3779,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2117902422053106e-05,
|
|
"loss": 1.3646,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2109156699045907e-05,
|
|
"loss": 1.4336,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2100409287360479e-05,
|
|
"loss": 1.4126,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2091660194000366e-05,
|
|
"loss": 1.3375,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 1.2082909425970449e-05,
|
|
"loss": 1.3362,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.2074156990276959e-05,
|
|
"loss": 1.4741,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.2065402893927458e-05,
|
|
"loss": 1.3806,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.2056647143930834e-05,
|
|
"loss": 1.4421,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.204788974729731e-05,
|
|
"loss": 1.2279,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.2039130711038418e-05,
|
|
"loss": 1.3705,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.2030370042167003e-05,
|
|
"loss": 1.2887,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.2021607747697222e-05,
|
|
"loss": 1.4646,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.201284383464453e-05,
|
|
"loss": 1.3647,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.2004078310025677e-05,
|
|
"loss": 1.3371,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.1995311180858709e-05,
|
|
"loss": 1.209,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.198654245416295e-05,
|
|
"loss": 1.2178,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 1.1977772136959008e-05,
|
|
"loss": 1.4035,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1969000236268758e-05,
|
|
"loss": 1.2608,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.196022675911535e-05,
|
|
"loss": 1.5304,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1951451712523197e-05,
|
|
"loss": 1.3412,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1942675103517958e-05,
|
|
"loss": 1.3394,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1933896939126552e-05,
|
|
"loss": 1.1842,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1925117226377145e-05,
|
|
"loss": 1.642,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1916335972299134e-05,
|
|
"loss": 1.2934,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1907553183923154e-05,
|
|
"loss": 1.2969,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1898768868281075e-05,
|
|
"loss": 1.3686,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1889983032405981e-05,
|
|
"loss": 1.5193,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1881195683332175e-05,
|
|
"loss": 1.5007,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 1.1872406828095172e-05,
|
|
"loss": 1.1765,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1863616473731697e-05,
|
|
"loss": 1.2524,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.185482462727967e-05,
|
|
"loss": 1.4331,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1846031295778204e-05,
|
|
"loss": 1.2925,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1837236486267611e-05,
|
|
"loss": 1.3702,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1828440205789375e-05,
|
|
"loss": 1.4268,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1819642461386167e-05,
|
|
"loss": 1.2939,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1810843260101819e-05,
|
|
"loss": 1.3884,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1802042608981344e-05,
|
|
"loss": 1.3406,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1793240515070903e-05,
|
|
"loss": 1.4246,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1784436985417816e-05,
|
|
"loss": 1.3585,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1775632027070557e-05,
|
|
"loss": 1.6153,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1766825647078737e-05,
|
|
"loss": 1.4179,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 1.1758017852493113e-05,
|
|
"loss": 1.4929,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1749208650365564e-05,
|
|
"loss": 1.4399,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.174039804774911e-05,
|
|
"loss": 1.2623,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1731586051697878e-05,
|
|
"loss": 1.4141,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1722772669267119e-05,
|
|
"loss": 1.1584,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1713957907513196e-05,
|
|
"loss": 1.3865,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1705141773493567e-05,
|
|
"loss": 1.3309,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1696324274266792e-05,
|
|
"loss": 1.3112,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1687505416892534e-05,
|
|
"loss": 1.2483,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1678685208431533e-05,
|
|
"loss": 1.4256,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.166986365594561e-05,
|
|
"loss": 1.2796,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1661040766497666e-05,
|
|
"loss": 1.38,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 1.1652216547151678e-05,
|
|
"loss": 1.4245,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1643391004972671e-05,
|
|
"loss": 1.2343,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.163456414702675e-05,
|
|
"loss": 1.1912,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1625735980381055e-05,
|
|
"loss": 1.3505,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1616906512103787e-05,
|
|
"loss": 1.2572,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1608075749264184e-05,
|
|
"loss": 1.4162,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1599243698932516e-05,
|
|
"loss": 1.5289,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1590410368180093e-05,
|
|
"loss": 1.243,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1581575764079244e-05,
|
|
"loss": 1.2575,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1572739893703323e-05,
|
|
"loss": 1.2799,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1563902764126684e-05,
|
|
"loss": 1.3568,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1555064382424705e-05,
|
|
"loss": 1.4652,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 1.1546224755673765e-05,
|
|
"loss": 1.2834,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.1537383890951225e-05,
|
|
"loss": 1.3822,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.1528541795335459e-05,
|
|
"loss": 1.2673,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.1519698475905809e-05,
|
|
"loss": 1.4577,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.15108539397426e-05,
|
|
"loss": 1.2184,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.1502008193927139e-05,
|
|
"loss": 1.1849,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.1493161245541696e-05,
|
|
"loss": 1.1562,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.1484313101669504e-05,
|
|
"loss": 1.4044,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.147546376939475e-05,
|
|
"loss": 1.4412,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.1466613255802581e-05,
|
|
"loss": 1.2601,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.145776156797908e-05,
|
|
"loss": 1.5612,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.144890871301128e-05,
|
|
"loss": 1.4824,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.144005469798714e-05,
|
|
"loss": 1.4276,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 1.1431199529995553e-05,
|
|
"loss": 1.5217,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1422343216126333e-05,
|
|
"loss": 1.2807,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1413485763470212e-05,
|
|
"loss": 1.2648,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.140462717911883e-05,
|
|
"loss": 1.3887,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1395767470164743e-05,
|
|
"loss": 1.5596,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1386906643701396e-05,
|
|
"loss": 1.3727,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1378044706823133e-05,
|
|
"loss": 1.4401,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1369181666625191e-05,
|
|
"loss": 1.3577,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1360317530203689e-05,
|
|
"loss": 1.2192,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.135145230465562e-05,
|
|
"loss": 1.41,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1342585997078845e-05,
|
|
"loss": 1.3414,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1333718614572106e-05,
|
|
"loss": 1.3447,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 1.1324850164234991e-05,
|
|
"loss": 1.3827,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1315980653167948e-05,
|
|
"loss": 1.3029,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1307110088472283e-05,
|
|
"loss": 1.4296,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.129823847725013e-05,
|
|
"loss": 1.3935,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1289365826604473e-05,
|
|
"loss": 1.3787,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1280492143639115e-05,
|
|
"loss": 1.3537,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1271617435458704e-05,
|
|
"loss": 1.2934,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1262741709168696e-05,
|
|
"loss": 1.3282,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1253864971875364e-05,
|
|
"loss": 1.3369,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1244987230685794e-05,
|
|
"loss": 1.4544,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1236108492707867e-05,
|
|
"loss": 1.2788,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1227228765050278e-05,
|
|
"loss": 1.4919,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 1.1218348054822495e-05,
|
|
"loss": 1.277,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1209466369134792e-05,
|
|
"loss": 1.2894,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1200583715098207e-05,
|
|
"loss": 1.2629,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1191700099824566e-05,
|
|
"loss": 1.3967,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1182815530426453e-05,
|
|
"loss": 1.3258,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1173930014017232e-05,
|
|
"loss": 1.399,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1165043557711006e-05,
|
|
"loss": 1.3594,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1156156168622645e-05,
|
|
"loss": 1.4375,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1147267853867758e-05,
|
|
"loss": 1.5027,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1138378620562702e-05,
|
|
"loss": 1.4177,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1129488475824561e-05,
|
|
"loss": 1.4306,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.112059742677116e-05,
|
|
"loss": 1.2067,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1111705480521033e-05,
|
|
"loss": 1.374,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 1.1102812644193447e-05,
|
|
"loss": 1.3734,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.1093918924908369e-05,
|
|
"loss": 1.4599,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.1085024329786485e-05,
|
|
"loss": 1.4371,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.1076128865949172e-05,
|
|
"loss": 1.3526,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.1067232540518509e-05,
|
|
"loss": 1.583,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.1058335360617263e-05,
|
|
"loss": 1.3718,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.1049437333368884e-05,
|
|
"loss": 1.6341,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.10405384658975e-05,
|
|
"loss": 1.1471,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.1031638765327917e-05,
|
|
"loss": 1.3457,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.1022738238785599e-05,
|
|
"loss": 1.1847,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.1013836893396681e-05,
|
|
"loss": 1.4654,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.100493473628794e-05,
|
|
"loss": 1.4052,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 1.0996031774586823e-05,
|
|
"loss": 1.4091,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0987128015421407e-05,
|
|
"loss": 1.1489,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0978223465920407e-05,
|
|
"loss": 1.4343,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0969318133213176e-05,
|
|
"loss": 1.4311,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.096041202442969e-05,
|
|
"loss": 1.4817,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0951505146700552e-05,
|
|
"loss": 1.3512,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0942597507156975e-05,
|
|
"loss": 1.2316,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0933689112930788e-05,
|
|
"loss": 1.1582,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0924779971154416e-05,
|
|
"loss": 1.382,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0915870088960887e-05,
|
|
"loss": 1.3988,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0906959473483825e-05,
|
|
"loss": 1.2548,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.0898048131857437e-05,
|
|
"loss": 1.225,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 1.088913607121651e-05,
|
|
"loss": 1.172,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.088022329869641e-05,
|
|
"loss": 1.332,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0871309821433076e-05,
|
|
"loss": 1.4419,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0862395646562999e-05,
|
|
"loss": 1.3903,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0853480781223247e-05,
|
|
"loss": 1.3717,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.084456523255142e-05,
|
|
"loss": 1.4772,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0835649007685683e-05,
|
|
"loss": 1.3871,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0826732113764731e-05,
|
|
"loss": 1.3242,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0817814557927799e-05,
|
|
"loss": 1.58,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0808896347314655e-05,
|
|
"loss": 1.1219,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0799977489065586e-05,
|
|
"loss": 1.4183,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.07910579903214e-05,
|
|
"loss": 1.3424,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0782137858223412e-05,
|
|
"loss": 1.225,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 1.0773217099913454e-05,
|
|
"loss": 1.4291,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.0764295722533855e-05,
|
|
"loss": 1.2866,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.0755373733227438e-05,
|
|
"loss": 1.4247,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.0746451139137519e-05,
|
|
"loss": 1.2513,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.0737527947407895e-05,
|
|
"loss": 1.4632,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.0728604165182843e-05,
|
|
"loss": 1.336,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.071967979960711e-05,
|
|
"loss": 1.2925,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.071075485782592e-05,
|
|
"loss": 1.4407,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.0701829346984944e-05,
|
|
"loss": 1.2831,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.0692903274230319e-05,
|
|
"loss": 1.3686,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.0683976646708628e-05,
|
|
"loss": 1.3686,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.06750494715669e-05,
|
|
"loss": 1.1596,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 1.06661217559526e-05,
|
|
"loss": 1.3261,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0657193507013625e-05,
|
|
"loss": 1.3758,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0648264731898306e-05,
|
|
"loss": 1.3585,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0639335437755383e-05,
|
|
"loss": 1.5834,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0630405631734026e-05,
|
|
"loss": 1.2812,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0621475320983804e-05,
|
|
"loss": 1.4357,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0612544512654696e-05,
|
|
"loss": 1.3995,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0603613213897075e-05,
|
|
"loss": 1.4973,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0594681431861708e-05,
|
|
"loss": 1.3388,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.058574917369975e-05,
|
|
"loss": 1.4048,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0576816446562741e-05,
|
|
"loss": 1.2072,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0567883257602588e-05,
|
|
"loss": 1.459,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 1.0558949613971575e-05,
|
|
"loss": 1.2638,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0550015522822342e-05,
|
|
"loss": 1.2822,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.05410809913079e-05,
|
|
"loss": 1.3592,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0532146026581598e-05,
|
|
"loss": 1.4121,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0523210635797145e-05,
|
|
"loss": 1.3248,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0514274826108581e-05,
|
|
"loss": 1.63,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0505338604670288e-05,
|
|
"loss": 1.3351,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0496401978636969e-05,
|
|
"loss": 1.0923,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0487464955163666e-05,
|
|
"loss": 1.2586,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0478527541405727e-05,
|
|
"loss": 1.4526,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0469589744518812e-05,
|
|
"loss": 1.2367,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0460651571658894e-05,
|
|
"loss": 1.3272,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0451713029982245e-05,
|
|
"loss": 1.374,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 1.0442774126645432e-05,
|
|
"loss": 1.2719,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.043383486880531e-05,
|
|
"loss": 1.3313,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0424895263619019e-05,
|
|
"loss": 1.56,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0415955318243978e-05,
|
|
"loss": 1.3897,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0407015039837877e-05,
|
|
"loss": 1.1725,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0398074435558679e-05,
|
|
"loss": 1.382,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0389133512564594e-05,
|
|
"loss": 1.4238,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0380192278014098e-05,
|
|
"loss": 1.2015,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0371250739065913e-05,
|
|
"loss": 1.4076,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0362308902879006e-05,
|
|
"loss": 1.6571,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0353366776612583e-05,
|
|
"loss": 1.4207,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.034442436742608e-05,
|
|
"loss": 1.463,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 1.0335481682479163e-05,
|
|
"loss": 1.272,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0326538728931712e-05,
|
|
"loss": 1.2712,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0317595513943823e-05,
|
|
"loss": 1.5191,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0308652044675811e-05,
|
|
"loss": 1.5126,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0299708328288189e-05,
|
|
"loss": 1.4951,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0290764371941662e-05,
|
|
"loss": 1.322,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.028182018279713e-05,
|
|
"loss": 1.147,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0272875768015686e-05,
|
|
"loss": 1.2702,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.02639311347586e-05,
|
|
"loss": 1.5392,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0254986290187314e-05,
|
|
"loss": 1.3263,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0246041241463439e-05,
|
|
"loss": 1.0942,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0237095995748757e-05,
|
|
"loss": 1.4659,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 1.0228150560205192e-05,
|
|
"loss": 1.3598,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.021920494199484e-05,
|
|
"loss": 1.388,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.0210259148279933e-05,
|
|
"loss": 1.3448,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.020131318622284e-05,
|
|
"loss": 1.4721,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.0192367062986072e-05,
|
|
"loss": 1.2998,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.0183420785732261e-05,
|
|
"loss": 1.2965,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.0174474361624172e-05,
|
|
"loss": 1.5159,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.0165527797824682e-05,
|
|
"loss": 1.5008,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.0156581101496778e-05,
|
|
"loss": 1.0739,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.014763427980356e-05,
|
|
"loss": 1.4152,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.0138687339908219e-05,
|
|
"loss": 1.3814,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.012974028897405e-05,
|
|
"loss": 1.377,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.012079313416443e-05,
|
|
"loss": 1.2279,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 1.0111845882642824e-05,
|
|
"loss": 1.3148,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0102898541572772e-05,
|
|
"loss": 1.1499,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0093951118117882e-05,
|
|
"loss": 1.333,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0085003619441834e-05,
|
|
"loss": 1.3545,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.007605605270837e-05,
|
|
"loss": 1.338,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.006710842508128e-05,
|
|
"loss": 1.4845,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0058160743724401e-05,
|
|
"loss": 1.4036,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0049213015801624e-05,
|
|
"loss": 1.394,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0040265248476866e-05,
|
|
"loss": 1.1927,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0031317448914082e-05,
|
|
"loss": 1.5658,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0022369624277252e-05,
|
|
"loss": 1.394,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0013421781730377e-05,
|
|
"loss": 1.4063,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 1.0004473928437465e-05,
|
|
"loss": 1.2678,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.99552607156254e-06,
|
|
"loss": 1.2457,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.986578218269626e-06,
|
|
"loss": 1.3116,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.977630375722748e-06,
|
|
"loss": 1.2905,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.968682551085921e-06,
|
|
"loss": 1.3456,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.959734751523137e-06,
|
|
"loss": 1.2678,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.95078698419838e-06,
|
|
"loss": 1.1139,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.941839256275602e-06,
|
|
"loss": 1.2593,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.932891574918724e-06,
|
|
"loss": 1.2834,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.923943947291635e-06,
|
|
"loss": 1.4561,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.914996380558168e-06,
|
|
"loss": 1.3187,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.906048881882122e-06,
|
|
"loss": 1.1716,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 9.897101458427235e-06,
|
|
"loss": 1.4233,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.888154117357178e-06,
|
|
"loss": 1.4619,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.879206865835571e-06,
|
|
"loss": 1.5272,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.870259711025955e-06,
|
|
"loss": 1.2376,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.861312660091784e-06,
|
|
"loss": 1.1281,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.852365720196441e-06,
|
|
"loss": 1.273,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.843418898503225e-06,
|
|
"loss": 1.3156,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.83447220217532e-06,
|
|
"loss": 1.3843,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.82552563837583e-06,
|
|
"loss": 1.3253,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.81657921426774e-06,
|
|
"loss": 1.6142,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.80763293701393e-06,
|
|
"loss": 1.289,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.798686813777165e-06,
|
|
"loss": 1.4349,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.78974085172007e-06,
|
|
"loss": 1.325,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 9.780795058005161e-06,
|
|
"loss": 1.224,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.771849439794811e-06,
|
|
"loss": 1.276,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.762904004251248e-06,
|
|
"loss": 1.1431,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.753958758536561e-06,
|
|
"loss": 1.4747,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.745013709812691e-06,
|
|
"loss": 1.4453,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.736068865241403e-06,
|
|
"loss": 1.1505,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.727124231984314e-06,
|
|
"loss": 1.2398,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.718179817202872e-06,
|
|
"loss": 1.3048,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.709235628058341e-06,
|
|
"loss": 1.2661,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.700291671711813e-06,
|
|
"loss": 1.3703,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.69134795532419e-06,
|
|
"loss": 1.3124,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.682404486056179e-06,
|
|
"loss": 1.4432,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 9.673461271068293e-06,
|
|
"loss": 1.1156,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.66451831752084e-06,
|
|
"loss": 1.2397,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.65557563257392e-06,
|
|
"loss": 1.1842,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.64663322338742e-06,
|
|
"loss": 1.3953,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.637691097120997e-06,
|
|
"loss": 1.242,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.62874926093409e-06,
|
|
"loss": 1.2762,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.619807721985908e-06,
|
|
"loss": 1.5879,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.610866487435411e-06,
|
|
"loss": 1.3831,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.601925564441323e-06,
|
|
"loss": 1.1721,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.592984960162125e-06,
|
|
"loss": 1.7178,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.584044681756023e-06,
|
|
"loss": 1.2176,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.575104736380985e-06,
|
|
"loss": 1.3244,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 9.566165131194693e-06,
|
|
"loss": 1.5657,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.557225873354571e-06,
|
|
"loss": 1.3577,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.548286970017759e-06,
|
|
"loss": 1.3155,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.539348428341107e-06,
|
|
"loss": 1.512,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.53041025548119e-06,
|
|
"loss": 1.5359,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.521472458594278e-06,
|
|
"loss": 1.4224,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.512535044836337e-06,
|
|
"loss": 1.2917,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.503598021363031e-06,
|
|
"loss": 1.4996,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.494661395329717e-06,
|
|
"loss": 1.4169,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.48572517389142e-06,
|
|
"loss": 1.3721,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.476789364202855e-06,
|
|
"loss": 1.2461,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.467853973418406e-06,
|
|
"loss": 1.6205,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.458919008692104e-06,
|
|
"loss": 1.4039,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 9.44998447717766e-06,
|
|
"loss": 1.1611,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.441050386028428e-06,
|
|
"loss": 1.4122,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.432116742397413e-06,
|
|
"loss": 1.4336,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.423183553437264e-06,
|
|
"loss": 1.3523,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.414250826300251e-06,
|
|
"loss": 1.2184,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.405318568138295e-06,
|
|
"loss": 1.4254,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.39638678610293e-06,
|
|
"loss": 1.4245,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.387455487345307e-06,
|
|
"loss": 1.4752,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.378524679016197e-06,
|
|
"loss": 1.5452,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.369594368265978e-06,
|
|
"loss": 1.4958,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.360664562244619e-06,
|
|
"loss": 1.4065,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.351735268101698e-06,
|
|
"loss": 1.5481,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 9.342806492986378e-06,
|
|
"loss": 1.1524,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.333878244047402e-06,
|
|
"loss": 1.4154,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.324950528433105e-06,
|
|
"loss": 1.2596,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.316023353291376e-06,
|
|
"loss": 1.3581,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.307096725769683e-06,
|
|
"loss": 1.3605,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.29817065301506e-06,
|
|
"loss": 1.4041,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.289245142174084e-06,
|
|
"loss": 1.3854,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.28032020039289e-06,
|
|
"loss": 1.3803,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.271395834817162e-06,
|
|
"loss": 1.573,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.262472052592106e-06,
|
|
"loss": 1.2565,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.253548860862481e-06,
|
|
"loss": 1.3364,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.244626266772565e-06,
|
|
"loss": 1.3914,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 9.235704277466148e-06,
|
|
"loss": 1.7444,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.226782900086548e-06,
|
|
"loss": 1.3178,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.217862141776593e-06,
|
|
"loss": 1.2497,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.208942009678604e-06,
|
|
"loss": 1.3084,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.20002251093442e-06,
|
|
"loss": 1.461,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.191103652685348e-06,
|
|
"loss": 1.2542,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.182185442072203e-06,
|
|
"loss": 1.368,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.173267886235272e-06,
|
|
"loss": 1.3993,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.164350992314319e-06,
|
|
"loss": 1.4769,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.155434767448581e-06,
|
|
"loss": 1.5584,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.146519218776758e-06,
|
|
"loss": 1.5657,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.137604353437003e-06,
|
|
"loss": 1.5124,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 9.128690178566926e-06,
|
|
"loss": 1.2568,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.119776701303591e-06,
|
|
"loss": 1.2187,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.110863928783492e-06,
|
|
"loss": 1.4107,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.101951868142565e-06,
|
|
"loss": 1.3541,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.09304052651618e-06,
|
|
"loss": 1.3701,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.084129911039115e-06,
|
|
"loss": 1.2891,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.075220028845589e-06,
|
|
"loss": 1.3847,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.066310887069215e-06,
|
|
"loss": 1.3216,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.057402492843027e-06,
|
|
"loss": 1.0815,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.048494853299453e-06,
|
|
"loss": 1.3375,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.039587975570314e-06,
|
|
"loss": 1.4144,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.030681866786827e-06,
|
|
"loss": 1.3102,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.021776534079598e-06,
|
|
"loss": 1.4685,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 9.012871984578594e-06,
|
|
"loss": 1.2715,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 9.003968225413175e-06,
|
|
"loss": 1.4278,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.995065263712062e-06,
|
|
"loss": 1.3344,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.986163106603322e-06,
|
|
"loss": 1.2915,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.977261761214401e-06,
|
|
"loss": 1.4315,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.968361234672088e-06,
|
|
"loss": 1.259,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.959461534102501e-06,
|
|
"loss": 1.249,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.95056266663112e-06,
|
|
"loss": 1.3643,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.941664639382738e-06,
|
|
"loss": 1.5233,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.932767459481493e-06,
|
|
"loss": 1.39,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.923871134050833e-06,
|
|
"loss": 1.4001,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.914975670213519e-06,
|
|
"loss": 1.5012,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 8.906081075091633e-06,
|
|
"loss": 1.4134,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.897187355806557e-06,
|
|
"loss": 1.3454,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.888294519478969e-06,
|
|
"loss": 1.506,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.879402573228841e-06,
|
|
"loss": 1.3202,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.87051152417544e-06,
|
|
"loss": 1.3494,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.8616213794373e-06,
|
|
"loss": 1.3923,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.852732146132245e-06,
|
|
"loss": 1.4877,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.843843831377358e-06,
|
|
"loss": 1.1933,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.834956442288996e-06,
|
|
"loss": 1.4817,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.826069985982775e-06,
|
|
"loss": 1.4266,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.81718446957355e-06,
|
|
"loss": 1.4754,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.80829990017544e-06,
|
|
"loss": 1.4969,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 8.799416284901798e-06,
|
|
"loss": 1.6428,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.790533630865211e-06,
|
|
"loss": 1.4375,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.781651945177505e-06,
|
|
"loss": 1.5112,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.772771234949729e-06,
|
|
"loss": 1.1683,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.763891507292136e-06,
|
|
"loss": 1.1901,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.755012769314211e-06,
|
|
"loss": 1.5318,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.74613502812464e-06,
|
|
"loss": 1.3868,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.737258290831307e-06,
|
|
"loss": 1.3868,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.728382564541301e-06,
|
|
"loss": 1.5071,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.71950785636089e-06,
|
|
"loss": 1.2651,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.71063417339553e-06,
|
|
"loss": 1.444,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.701761522749873e-06,
|
|
"loss": 1.3422,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.692889911527719e-06,
|
|
"loss": 1.2447,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 8.68401934683205e-06,
|
|
"loss": 1.4211,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.675149835765012e-06,
|
|
"loss": 1.075,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.666281385427897e-06,
|
|
"loss": 1.4266,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.657414002921155e-06,
|
|
"loss": 1.33,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.648547695344385e-06,
|
|
"loss": 1.4661,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.639682469796313e-06,
|
|
"loss": 1.4675,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.630818333374809e-06,
|
|
"loss": 1.1853,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.621955293176868e-06,
|
|
"loss": 1.679,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.613093356298607e-06,
|
|
"loss": 1.4696,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.604232529835264e-06,
|
|
"loss": 1.3299,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.595372820881173e-06,
|
|
"loss": 1.1705,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.586514236529793e-06,
|
|
"loss": 1.4245,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 8.577656783873672e-06,
|
|
"loss": 1.2601,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.568800470004449e-06,
|
|
"loss": 1.127,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.55994530201286e-06,
|
|
"loss": 1.4745,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.551091286988724e-06,
|
|
"loss": 1.3639,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.542238432020923e-06,
|
|
"loss": 1.3271,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.533386744197424e-06,
|
|
"loss": 1.2248,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.524536230605253e-06,
|
|
"loss": 1.5048,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.5156868983305e-06,
|
|
"loss": 1.3686,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.506838754458306e-06,
|
|
"loss": 1.4516,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.497991806072864e-06,
|
|
"loss": 1.2265,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.489146060257403e-06,
|
|
"loss": 1.3052,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.480301524094196e-06,
|
|
"loss": 1.3186,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 8.471458204664543e-06,
|
|
"loss": 1.4062,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.462616109048773e-06,
|
|
"loss": 1.4386,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.45377524432624e-06,
|
|
"loss": 1.589,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.444935617575296e-06,
|
|
"loss": 1.4193,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.43609723587332e-06,
|
|
"loss": 1.2372,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.427260106296684e-06,
|
|
"loss": 1.2995,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.418424235920758e-06,
|
|
"loss": 1.1849,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.409589631819909e-06,
|
|
"loss": 1.4484,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.400756301067487e-06,
|
|
"loss": 1.2232,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.391924250735819e-06,
|
|
"loss": 1.4329,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.383093487896213e-06,
|
|
"loss": 1.552,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.374264019618949e-06,
|
|
"loss": 1.3988,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.365435852973254e-06,
|
|
"loss": 1.175,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 8.35660899502733e-06,
|
|
"loss": 1.3661,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.347783452848325e-06,
|
|
"loss": 1.452,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.338959233502334e-06,
|
|
"loss": 1.2696,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.330136344054395e-06,
|
|
"loss": 1.5616,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.32131479156847e-06,
|
|
"loss": 1.22,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.312494583107466e-06,
|
|
"loss": 1.3061,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.30367572573321e-06,
|
|
"loss": 1.3268,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.294858226506437e-06,
|
|
"loss": 1.4356,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.286042092486806e-06,
|
|
"loss": 1.5554,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.277227330732883e-06,
|
|
"loss": 1.338,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.268413948302124e-06,
|
|
"loss": 1.3644,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.259601952250891e-06,
|
|
"loss": 1.2905,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.250791349634438e-06,
|
|
"loss": 1.3499,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.241982147506889e-06,
|
|
"loss": 1.5112,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.233174352921266e-06,
|
|
"loss": 1.4158,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.224367972929448e-06,
|
|
"loss": 1.4317,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.215563014582186e-06,
|
|
"loss": 1.6379,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.206759484929102e-06,
|
|
"loss": 1.1974,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.197957391018658e-06,
|
|
"loss": 1.3971,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.189156739898181e-06,
|
|
"loss": 1.2049,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.18035753861384e-06,
|
|
"loss": 1.307,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.171559794210629e-06,
|
|
"loss": 1.2827,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.162763513732392e-06,
|
|
"loss": 0.8881,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.153968704221799e-06,
|
|
"loss": 1.4889,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 8.145175372720334e-06,
|
|
"loss": 1.2584,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.13638352626831e-06,
|
|
"loss": 1.3671,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.127593171904831e-06,
|
|
"loss": 1.4347,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.118804316667828e-06,
|
|
"loss": 1.2841,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.110016967594024e-06,
|
|
"loss": 1.2404,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.101231131718927e-06,
|
|
"loss": 1.2466,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.092446816076847e-06,
|
|
"loss": 1.2446,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.083664027700871e-06,
|
|
"loss": 1.3069,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.074882773622857e-06,
|
|
"loss": 1.3987,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.066103060873448e-06,
|
|
"loss": 1.252,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.057324896482047e-06,
|
|
"loss": 1.3498,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.048548287476808e-06,
|
|
"loss": 1.4029,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.03977324088465e-06,
|
|
"loss": 1.2225,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 8.030999763731245e-06,
|
|
"loss": 1.5255,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 8.022227863040996e-06,
|
|
"loss": 1.2087,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 8.013457545837055e-06,
|
|
"loss": 1.5185,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 8.004688819141296e-06,
|
|
"loss": 1.299,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 7.995921689974326e-06,
|
|
"loss": 1.4527,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 7.987156165355475e-06,
|
|
"loss": 1.2578,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 7.97839225230278e-06,
|
|
"loss": 1.3946,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 7.969629957832998e-06,
|
|
"loss": 1.401,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 7.960869288961587e-06,
|
|
"loss": 1.293,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 7.952110252702692e-06,
|
|
"loss": 1.3171,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 7.943352856069167e-06,
|
|
"loss": 1.632,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 7.934597106072546e-06,
|
|
"loss": 1.3504,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 7.925843009723043e-06,
|
|
"loss": 1.4421,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.917090574029551e-06,
|
|
"loss": 1.2566,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.908339805999639e-06,
|
|
"loss": 1.254,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.899590712639522e-06,
|
|
"loss": 1.2446,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.890843300954098e-06,
|
|
"loss": 1.4612,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.882097577946898e-06,
|
|
"loss": 1.2265,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.873353550620115e-06,
|
|
"loss": 1.2419,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.864611225974583e-06,
|
|
"loss": 1.3433,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.855870611009761e-06,
|
|
"loss": 1.3596,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.847131712723752e-06,
|
|
"loss": 1.1952,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.838394538113284e-06,
|
|
"loss": 1.4007,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.829659094173692e-06,
|
|
"loss": 1.3609,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 7.820925387898938e-06,
|
|
"loss": 1.255,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.812193426281593e-06,
|
|
"loss": 1.2453,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.803463216312824e-06,
|
|
"loss": 1.181,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.794734764982397e-06,
|
|
"loss": 1.2381,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.786008079278679e-06,
|
|
"loss": 1.2533,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.777283166188606e-06,
|
|
"loss": 1.3894,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.76856003269771e-06,
|
|
"loss": 1.3404,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.759838685790094e-06,
|
|
"loss": 1.5004,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.751119132448426e-06,
|
|
"loss": 1.3744,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.742401379653948e-06,
|
|
"loss": 1.2626,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.733685434386447e-06,
|
|
"loss": 1.3633,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.724971303624271e-06,
|
|
"loss": 1.2818,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.716258994344319e-06,
|
|
"loss": 1.3817,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 7.707548513522022e-06,
|
|
"loss": 1.6378,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.698839868131354e-06,
|
|
"loss": 1.3556,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.69013306514482e-06,
|
|
"loss": 1.3185,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.681428111533438e-06,
|
|
"loss": 1.5491,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.672725014266763e-06,
|
|
"loss": 1.2915,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.664023780312853e-06,
|
|
"loss": 1.5594,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.655324416638275e-06,
|
|
"loss": 1.2077,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.646626930208103e-06,
|
|
"loss": 1.4546,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.6379313279859e-06,
|
|
"loss": 1.2976,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.629237616933729e-06,
|
|
"loss": 1.4133,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.620545804012135e-06,
|
|
"loss": 1.1285,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.611855896180141e-06,
|
|
"loss": 1.4018,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 7.60316790039525e-06,
|
|
"loss": 1.3067,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.594481823613433e-06,
|
|
"loss": 1.445,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.585797672789118e-06,
|
|
"loss": 1.1475,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.5771154548751945e-06,
|
|
"loss": 1.3136,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.568435176823017e-06,
|
|
"loss": 1.3359,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.559756845582363e-06,
|
|
"loss": 1.3192,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.551080468101471e-06,
|
|
"loss": 1.4314,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.542406051327007e-06,
|
|
"loss": 1.2526,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.5337336022040664e-06,
|
|
"loss": 1.393,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.525063127676176e-06,
|
|
"loss": 1.195,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.516394634685271e-06,
|
|
"loss": 1.4192,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.5077281301717074e-06,
|
|
"loss": 1.3034,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 7.499063621074252e-06,
|
|
"loss": 1.4616,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.490401114330065e-06,
|
|
"loss": 1.4068,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.481740616874709e-06,
|
|
"loss": 1.4363,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.473082135642142e-06,
|
|
"loss": 1.405,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.464425677564695e-06,
|
|
"loss": 1.0711,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.455771249573089e-06,
|
|
"loss": 1.2508,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.447118858596424e-06,
|
|
"loss": 1.419,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.4384685115621536e-06,
|
|
"loss": 1.3068,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.429820215396113e-06,
|
|
"loss": 1.291,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.421173977022478e-06,
|
|
"loss": 1.0726,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.412529803363788e-06,
|
|
"loss": 1.3061,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.4038877013409305e-06,
|
|
"loss": 1.5382,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.395247677873125e-06,
|
|
"loss": 1.4927,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 7.386609739877935e-06,
|
|
"loss": 1.4411,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.377973894271253e-06,
|
|
"loss": 1.4418,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.3693401479672896e-06,
|
|
"loss": 1.3825,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.360708507878583e-06,
|
|
"loss": 1.4174,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.352078980915985e-06,
|
|
"loss": 0.9493,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.343451573988645e-06,
|
|
"loss": 1.1499,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.334826294004027e-06,
|
|
"loss": 1.094,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.326203147867892e-06,
|
|
"loss": 1.1892,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.317582142484278e-06,
|
|
"loss": 1.3403,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.3089632847555255e-06,
|
|
"loss": 1.6355,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.300346581582248e-06,
|
|
"loss": 1.3522,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.291732039863331e-06,
|
|
"loss": 1.383,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.2831196664959435e-06,
|
|
"loss": 1.4541,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.274509468375496e-06,
|
|
"loss": 1.2379,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.265901452395677e-06,
|
|
"loss": 1.1832,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.257295625448424e-06,
|
|
"loss": 1.2206,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.24869199442391e-06,
|
|
"loss": 1.3609,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.2400905662105625e-06,
|
|
"loss": 1.272,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.231491347695042e-06,
|
|
"loss": 1.1594,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.2228943457622365e-06,
|
|
"loss": 1.4896,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.214299567295264e-06,
|
|
"loss": 1.3038,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.205707019175465e-06,
|
|
"loss": 1.1426,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.19711670828238e-06,
|
|
"loss": 1.2793,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.188528641493773e-06,
|
|
"loss": 1.4488,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 7.179942825685604e-06,
|
|
"loss": 1.3565,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.171359267732034e-06,
|
|
"loss": 1.2956,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.16277797450542e-06,
|
|
"loss": 1.4731,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.15419895287629e-06,
|
|
"loss": 1.4828,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.14562220971337e-06,
|
|
"loss": 1.3656,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.137047751883559e-06,
|
|
"loss": 1.3091,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.1284755862519175e-06,
|
|
"loss": 1.1784,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.119905719681679e-06,
|
|
"loss": 1.4861,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.111338159034238e-06,
|
|
"loss": 1.3584,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.10277291116913e-06,
|
|
"loss": 1.3712,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.094209982944051e-06,
|
|
"loss": 1.2167,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.085649381214839e-06,
|
|
"loss": 1.3296,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.077091112835461e-06,
|
|
"loss": 1.3655,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 7.068535184658029e-06,
|
|
"loss": 1.1538,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 7.059981603532765e-06,
|
|
"loss": 1.5042,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 7.0514303763080236e-06,
|
|
"loss": 1.3823,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 7.042881509830274e-06,
|
|
"loss": 1.5541,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 7.034335010944089e-06,
|
|
"loss": 1.3412,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 7.02579088649215e-06,
|
|
"loss": 1.5585,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 7.017249143315243e-06,
|
|
"loss": 1.5353,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 7.008709788252233e-06,
|
|
"loss": 1.3897,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 7.000172828140085e-06,
|
|
"loss": 1.1663,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 6.991638269813849e-06,
|
|
"loss": 1.3785,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 6.983106120106635e-06,
|
|
"loss": 1.4169,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 6.9745763858496404e-06,
|
|
"loss": 1.3659,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 6.9660490738721275e-06,
|
|
"loss": 1.4686,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.957524191001408e-06,
|
|
"loss": 1.3561,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.949001744062866e-06,
|
|
"loss": 1.2276,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.940481739879916e-06,
|
|
"loss": 1.5583,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.93196418527403e-06,
|
|
"loss": 1.3925,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.923449087064718e-06,
|
|
"loss": 1.3465,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.914936452069515e-06,
|
|
"loss": 1.2794,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.906426287103993e-06,
|
|
"loss": 1.3644,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.897918598981745e-06,
|
|
"loss": 1.0715,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.889413394514371e-06,
|
|
"loss": 1.2744,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.880910680511493e-06,
|
|
"loss": 1.3331,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.872410463780741e-06,
|
|
"loss": 1.4469,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 6.863912751127735e-06,
|
|
"loss": 1.2543,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.855417549356096e-06,
|
|
"loss": 1.3258,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.846924865267442e-06,
|
|
"loss": 1.0815,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.838434705661356e-06,
|
|
"loss": 1.2405,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.829947077335422e-06,
|
|
"loss": 1.5926,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.821461987085179e-06,
|
|
"loss": 1.4747,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.812979441704144e-06,
|
|
"loss": 1.3316,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.804499447983799e-06,
|
|
"loss": 1.4487,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.796022012713572e-06,
|
|
"loss": 1.3741,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.78754714268085e-06,
|
|
"loss": 1.5823,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.779074844670969e-06,
|
|
"loss": 1.3509,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.770605125467198e-06,
|
|
"loss": 1.4265,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 6.762137991850746e-06,
|
|
"loss": 1.3514,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.753673450600754e-06,
|
|
"loss": 1.3671,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.745211508494276e-06,
|
|
"loss": 1.3814,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.736752172306304e-06,
|
|
"loss": 1.2831,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.728295448809726e-06,
|
|
"loss": 1.2457,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.7198413447753465e-06,
|
|
"loss": 1.3997,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.711389866971874e-06,
|
|
"loss": 1.5296,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.702941022165906e-06,
|
|
"loss": 1.5858,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.694494817121944e-06,
|
|
"loss": 1.551,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.6860512586023686e-06,
|
|
"loss": 1.3202,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.677610353367439e-06,
|
|
"loss": 1.306,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.669172108175295e-06,
|
|
"loss": 1.1308,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.660736529781947e-06,
|
|
"loss": 1.6042,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 6.652303624941266e-06,
|
|
"loss": 1.1223,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.643873400404987e-06,
|
|
"loss": 1.1182,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.635445862922699e-06,
|
|
"loss": 1.2887,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.6270210192418325e-06,
|
|
"loss": 1.5276,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.618598876107669e-06,
|
|
"loss": 1.3404,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.6101794402633245e-06,
|
|
"loss": 1.1976,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.601762718449749e-06,
|
|
"loss": 1.2033,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.593348717405721e-06,
|
|
"loss": 1.4492,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.584937443867832e-06,
|
|
"loss": 1.3417,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.576528904570501e-06,
|
|
"loss": 1.4512,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.568123106245952e-06,
|
|
"loss": 1.2752,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.559720055624212e-06,
|
|
"loss": 1.3496,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 6.551319759433117e-06,
|
|
"loss": 1.3337,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.542922224398292e-06,
|
|
"loss": 1.3916,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.534527457243145e-06,
|
|
"loss": 1.391,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.526135464688878e-06,
|
|
"loss": 1.3676,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.517746253454471e-06,
|
|
"loss": 1.4351,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.50935983025667e-06,
|
|
"loss": 1.2638,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.500976201809996e-06,
|
|
"loss": 1.2645,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.492595374826724e-06,
|
|
"loss": 1.2305,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.484217356016895e-06,
|
|
"loss": 1.3279,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.4758421520882984e-06,
|
|
"loss": 1.3666,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.467469769746468e-06,
|
|
"loss": 1.4155,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.459100215694681e-06,
|
|
"loss": 1.1813,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 6.450733496633948e-06,
|
|
"loss": 1.4268,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.442369619263012e-06,
|
|
"loss": 1.3997,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.4340085902783365e-06,
|
|
"loss": 1.1383,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.4256504163741165e-06,
|
|
"loss": 1.1157,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.417295104242241e-06,
|
|
"loss": 1.4008,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.408942660572324e-06,
|
|
"loss": 1.2924,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.400593092051681e-06,
|
|
"loss": 1.2741,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.392246405365317e-06,
|
|
"loss": 1.0494,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.383902607195942e-06,
|
|
"loss": 1.4334,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.375561704223937e-06,
|
|
"loss": 1.2707,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.36722370312738e-06,
|
|
"loss": 1.2625,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.358888610582021e-06,
|
|
"loss": 1.5441,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.350556433261279e-06,
|
|
"loss": 1.4802,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 6.342227177836239e-06,
|
|
"loss": 1.4539,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.333900850975654e-06,
|
|
"loss": 1.1581,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.325577459345916e-06,
|
|
"loss": 1.3935,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.317257009611084e-06,
|
|
"loss": 1.3214,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.3089395084328555e-06,
|
|
"loss": 1.2702,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.3006249624705615e-06,
|
|
"loss": 1.339,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.292313378381175e-06,
|
|
"loss": 1.4418,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.284004762819299e-06,
|
|
"loss": 1.0684,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.275699122437147e-06,
|
|
"loss": 1.5028,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.267396463884563e-06,
|
|
"loss": 1.2403,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.259096793808998e-06,
|
|
"loss": 1.5359,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.250800118855514e-06,
|
|
"loss": 1.4132,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 6.242506445666772e-06,
|
|
"loss": 1.321,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.234215780883028e-06,
|
|
"loss": 1.2629,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.225928131142132e-06,
|
|
"loss": 1.4493,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.217643503079523e-06,
|
|
"loss": 1.5434,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.209361903328215e-06,
|
|
"loss": 1.3913,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.2010833385187985e-06,
|
|
"loss": 1.3465,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.192807815279439e-06,
|
|
"loss": 1.3375,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.18453534023586e-06,
|
|
"loss": 1.5118,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.17626592001135e-06,
|
|
"loss": 1.3519,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.167999561226752e-06,
|
|
"loss": 1.4646,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.159736270500447e-06,
|
|
"loss": 1.2491,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.151476054448375e-06,
|
|
"loss": 1.1922,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 6.1432189196840016e-06,
|
|
"loss": 1.5045,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.1349648728183365e-06,
|
|
"loss": 1.3597,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.1267139204599124e-06,
|
|
"loss": 1.6327,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.118466069214777e-06,
|
|
"loss": 1.1874,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.110221325686504e-06,
|
|
"loss": 1.3119,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.10197969647618e-06,
|
|
"loss": 1.5522,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.093741188182391e-06,
|
|
"loss": 1.3211,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.0855058074012285e-06,
|
|
"loss": 1.6759,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.077273560726286e-06,
|
|
"loss": 1.2327,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.069044454748632e-06,
|
|
"loss": 1.4327,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.060818496056833e-06,
|
|
"loss": 1.4579,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.052595691236933e-06,
|
|
"loss": 0.9999,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.044376046872449e-06,
|
|
"loss": 1.2211,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 6.036159569544373e-06,
|
|
"loss": 1.4419,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 6.02794626583115e-06,
|
|
"loss": 1.2397,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 6.019736142308696e-06,
|
|
"loss": 1.334,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 6.011529205550377e-06,
|
|
"loss": 1.1488,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 6.003325462127003e-06,
|
|
"loss": 1.4907,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 5.995124918606832e-06,
|
|
"loss": 1.3138,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 5.986927581555562e-06,
|
|
"loss": 1.3281,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 5.978733457536316e-06,
|
|
"loss": 1.3284,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 5.97054255310965e-06,
|
|
"loss": 1.2837,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 5.962354874833543e-06,
|
|
"loss": 1.3293,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 5.954170429263388e-06,
|
|
"loss": 1.4615,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 5.945989222951998e-06,
|
|
"loss": 1.454,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 5.9378112624495734e-06,
|
|
"loss": 1.3926,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.929636554303733e-06,
|
|
"loss": 1.3702,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.9214651050594955e-06,
|
|
"loss": 1.5897,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.913296921259249e-06,
|
|
"loss": 1.3703,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.905132009442786e-06,
|
|
"loss": 1.1788,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.896970376147274e-06,
|
|
"loss": 1.3996,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.888812027907251e-06,
|
|
"loss": 1.4265,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.880656971254633e-06,
|
|
"loss": 1.2492,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.8725052127186955e-06,
|
|
"loss": 1.3745,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.864356758826068e-06,
|
|
"loss": 1.3588,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.856211616100747e-06,
|
|
"loss": 1.2429,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.848069791064071e-06,
|
|
"loss": 1.2136,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 5.839931290234716e-06,
|
|
"loss": 1.3047,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.83179612012871e-06,
|
|
"loss": 1.5274,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.823664287259405e-06,
|
|
"loss": 1.3519,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.8155357981374795e-06,
|
|
"loss": 1.2872,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.807410659270949e-06,
|
|
"loss": 1.2672,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.799288877165121e-06,
|
|
"loss": 1.5417,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.791170458322643e-06,
|
|
"loss": 1.3629,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.783055409243454e-06,
|
|
"loss": 1.1351,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.774943736424794e-06,
|
|
"loss": 1.492,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.766835446361212e-06,
|
|
"loss": 1.296,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.758730545544536e-06,
|
|
"loss": 1.582,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.750629040463887e-06,
|
|
"loss": 1.5014,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.742530937605661e-06,
|
|
"loss": 1.3627,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 5.734436243453543e-06,
|
|
"loss": 1.2583,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.726344964488475e-06,
|
|
"loss": 1.2956,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.718257107188672e-06,
|
|
"loss": 1.3692,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.7101726780296065e-06,
|
|
"loss": 1.1711,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.7020916834840035e-06,
|
|
"loss": 1.4352,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.6940141300218495e-06,
|
|
"loss": 1.3485,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.685940024110365e-06,
|
|
"loss": 1.3898,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.677869372214012e-06,
|
|
"loss": 1.4908,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.669802180794497e-06,
|
|
"loss": 1.3843,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.661738456310733e-06,
|
|
"loss": 1.1039,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.653678205218881e-06,
|
|
"loss": 1.4066,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.645621433972318e-06,
|
|
"loss": 1.4238,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 5.637568149021615e-06,
|
|
"loss": 1.3482,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.629518356814575e-06,
|
|
"loss": 1.0104,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.621472063796194e-06,
|
|
"loss": 1.1884,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.61342927640866e-06,
|
|
"loss": 1.1787,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.605390001091377e-06,
|
|
"loss": 1.2922,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.597354244280904e-06,
|
|
"loss": 1.0228,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.589322012411014e-06,
|
|
"loss": 1.1985,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.58129331191264e-06,
|
|
"loss": 1.3077,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.5732681492138886e-06,
|
|
"loss": 1.4713,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.565246530740044e-06,
|
|
"loss": 1.2718,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.5572284629135446e-06,
|
|
"loss": 1.3642,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.549213952153985e-06,
|
|
"loss": 1.333,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 5.541203004878113e-06,
|
|
"loss": 1.1928,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.533195627499833e-06,
|
|
"loss": 1.4835,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.525191826430175e-06,
|
|
"loss": 1.3974,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.517191608077315e-06,
|
|
"loss": 1.1443,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.509194978846567e-06,
|
|
"loss": 1.6824,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.5012019451403505e-06,
|
|
"loss": 1.2356,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.493212513358231e-06,
|
|
"loss": 1.4131,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.485226689896872e-06,
|
|
"loss": 1.5353,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.477244481150055e-06,
|
|
"loss": 1.2823,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.469265893508674e-06,
|
|
"loss": 1.2686,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.4612909333607045e-06,
|
|
"loss": 1.1065,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.453319607091241e-06,
|
|
"loss": 1.0106,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.4453519210824535e-06,
|
|
"loss": 1.3196,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 5.437387881713596e-06,
|
|
"loss": 1.4275,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.429427495361019e-06,
|
|
"loss": 1.0999,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.421470768398131e-06,
|
|
"loss": 1.3002,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.413517707195415e-06,
|
|
"loss": 1.1039,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.405568318120433e-06,
|
|
"loss": 1.1218,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.397622607537779e-06,
|
|
"loss": 1.296,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.38968058180913e-06,
|
|
"loss": 1.3127,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.381742247293197e-06,
|
|
"loss": 1.1939,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.373807610345734e-06,
|
|
"loss": 1.219,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.3658766773195505e-06,
|
|
"loss": 1.4214,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.357949454564475e-06,
|
|
"loss": 1.3332,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.350025948427369e-06,
|
|
"loss": 1.1676,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 5.342106165252119e-06,
|
|
"loss": 1.2341,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.334190111379636e-06,
|
|
"loss": 1.2345,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.326277793147838e-06,
|
|
"loss": 1.0614,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.318369216891652e-06,
|
|
"loss": 1.1961,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.3104643889430215e-06,
|
|
"loss": 1.3767,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.302563315630866e-06,
|
|
"loss": 1.1242,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.294666003281122e-06,
|
|
"loss": 1.2634,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.2867724582167004e-06,
|
|
"loss": 1.1466,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.278882686757499e-06,
|
|
"loss": 1.2161,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.270996695220405e-06,
|
|
"loss": 1.3929,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.263114489919254e-06,
|
|
"loss": 1.3648,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.255236077164881e-06,
|
|
"loss": 1.2426,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 5.247361463265064e-06,
|
|
"loss": 1.1822,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.23949065452454e-06,
|
|
"loss": 1.2813,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.231623657245014e-06,
|
|
"loss": 1.2495,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.223760477725125e-06,
|
|
"loss": 1.2617,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.215901122260461e-06,
|
|
"loss": 1.2054,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.2080455971435425e-06,
|
|
"loss": 1.1082,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.200193908663838e-06,
|
|
"loss": 1.1356,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.192346063107729e-06,
|
|
"loss": 1.3727,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.184502066758525e-06,
|
|
"loss": 1.2974,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.176661925896458e-06,
|
|
"loss": 1.0305,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.168825646798663e-06,
|
|
"loss": 1.0317,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.160993235739197e-06,
|
|
"loss": 1.2814,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.153164698989011e-06,
|
|
"loss": 1.298,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 5.145340042815949e-06,
|
|
"loss": 1.1754,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.137519273484766e-06,
|
|
"loss": 1.1742,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.129702397257087e-06,
|
|
"loss": 1.3396,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.121889420391426e-06,
|
|
"loss": 1.1646,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.114080349143185e-06,
|
|
"loss": 1.1062,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.106275189764616e-06,
|
|
"loss": 1.0097,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.0984739485048625e-06,
|
|
"loss": 0.9731,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.090676631609921e-06,
|
|
"loss": 1.0839,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.082883245322641e-06,
|
|
"loss": 1.0205,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.075093795882739e-06,
|
|
"loss": 0.9599,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.06730828952676e-06,
|
|
"loss": 0.968,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.059526732488113e-06,
|
|
"loss": 1.1862,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 5.05174913099703e-06,
|
|
"loss": 1.1104,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 5.043975491280578e-06,
|
|
"loss": 1.3498,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 5.036205819562663e-06,
|
|
"loss": 1.0846,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 5.028440122064e-06,
|
|
"loss": 1.1987,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 5.020678405002131e-06,
|
|
"loss": 1.1852,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 5.012920674591404e-06,
|
|
"loss": 1.2857,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 5.005166937042984e-06,
|
|
"loss": 1.3205,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 4.997417198564834e-06,
|
|
"loss": 0.9617,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 4.989671465361712e-06,
|
|
"loss": 1.0253,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 4.981929743635181e-06,
|
|
"loss": 1.4414,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 4.974192039583572e-06,
|
|
"loss": 1.16,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 4.966458359402022e-06,
|
|
"loss": 1.245,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 4.958728709282431e-06,
|
|
"loss": 1.0871,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.9510030954134745e-06,
|
|
"loss": 1.4032,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.94328152398061e-06,
|
|
"loss": 1.1361,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.935564001166031e-06,
|
|
"loss": 1.2091,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.927850533148716e-06,
|
|
"loss": 1.2024,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.920141126104393e-06,
|
|
"loss": 1.2697,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.912435786205517e-06,
|
|
"loss": 1.1745,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.904734519621316e-06,
|
|
"loss": 1.2337,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.897037332517736e-06,
|
|
"loss": 1.1072,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.8893442310574635e-06,
|
|
"loss": 1.5117,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.88165522139992e-06,
|
|
"loss": 0.9866,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.873970309701242e-06,
|
|
"loss": 1.3145,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.8662895021142865e-06,
|
|
"loss": 1.2549,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 4.858612804788627e-06,
|
|
"loss": 1.2057,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.8509402238705435e-06,
|
|
"loss": 1.2111,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.843271765503028e-06,
|
|
"loss": 1.3067,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.835607435825762e-06,
|
|
"loss": 1.3226,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.827947240975126e-06,
|
|
"loss": 1.3185,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.8202911870841855e-06,
|
|
"loss": 1.102,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.812639280282702e-06,
|
|
"loss": 1.1184,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.8049915266971044e-06,
|
|
"loss": 1.1252,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.797347932450499e-06,
|
|
"loss": 1.2171,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.789708503662674e-06,
|
|
"loss": 1.2832,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.7820732464500575e-06,
|
|
"loss": 1.2748,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.774442166925763e-06,
|
|
"loss": 1.0884,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 4.766815271199549e-06,
|
|
"loss": 1.1929,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.759192565377816e-06,
|
|
"loss": 1.0928,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.751574055563633e-06,
|
|
"loss": 1.1519,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.743959747856676e-06,
|
|
"loss": 1.2364,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.7363496483532885e-06,
|
|
"loss": 1.1463,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.728743763146428e-06,
|
|
"loss": 1.2383,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.721142098325676e-06,
|
|
"loss": 1.2911,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.713544659977249e-06,
|
|
"loss": 1.1835,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.705951454183968e-06,
|
|
"loss": 1.0652,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.698362487025265e-06,
|
|
"loss": 1.2886,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.690777764577181e-06,
|
|
"loss": 1.6215,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.683197292912365e-06,
|
|
"loss": 1.2067,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.6756210781000526e-06,
|
|
"loss": 1.0343,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.668049126206076e-06,
|
|
"loss": 0.9569,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.660481443292849e-06,
|
|
"loss": 1.3596,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.652918035419381e-06,
|
|
"loss": 1.0511,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.645358908641243e-06,
|
|
"loss": 1.0873,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.637804069010589e-06,
|
|
"loss": 1.3446,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.63025352257613e-06,
|
|
"loss": 1.209,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.6227072753831545e-06,
|
|
"loss": 1.094,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.615165333473497e-06,
|
|
"loss": 1.1393,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.6076277028855455e-06,
|
|
"loss": 1.0833,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.600094389654249e-06,
|
|
"loss": 1.1785,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.592565399811077e-06,
|
|
"loss": 1.3271,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.58504073938406e-06,
|
|
"loss": 1.1553,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.57752041439775e-06,
|
|
"loss": 1.22,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.570004430873228e-06,
|
|
"loss": 1.0577,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.562492794828112e-06,
|
|
"loss": 1.2813,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.554985512276514e-06,
|
|
"loss": 1.1287,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.547482589229086e-06,
|
|
"loss": 1.2558,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.539984031692976e-06,
|
|
"loss": 1.1414,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.532489845671837e-06,
|
|
"loss": 1.1212,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.525000037165831e-06,
|
|
"loss": 1.3776,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.517514612171605e-06,
|
|
"loss": 1.3077,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.510033576682302e-06,
|
|
"loss": 1.3701,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.502556936687543e-06,
|
|
"loss": 1.2488,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.495084698173445e-06,
|
|
"loss": 1.3305,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.487616867122587e-06,
|
|
"loss": 1.2293,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.480153449514021e-06,
|
|
"loss": 1.232,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.472694451323278e-06,
|
|
"loss": 1.1541,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.465239878522327e-06,
|
|
"loss": 1.2447,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.45778973707962e-06,
|
|
"loss": 1.2096,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.4503440329600424e-06,
|
|
"loss": 1.0981,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.442902772124933e-06,
|
|
"loss": 0.9908,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.435465960532082e-06,
|
|
"loss": 1.3129,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.428033604135696e-06,
|
|
"loss": 1.2251,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.4206057088864314e-06,
|
|
"loss": 1.0756,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.41318228073138e-06,
|
|
"loss": 1.0678,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.405763325614031e-06,
|
|
"loss": 0.9767,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 4.398348849474317e-06,
|
|
"loss": 1.1425,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.39093885824857e-06,
|
|
"loss": 1.0482,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.383533357869534e-06,
|
|
"loss": 1.0328,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.376132354266366e-06,
|
|
"loss": 1.2662,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.368735853364612e-06,
|
|
"loss": 1.1566,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.361343861086218e-06,
|
|
"loss": 1.1186,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.353956383349519e-06,
|
|
"loss": 1.3049,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.346573426069231e-06,
|
|
"loss": 1.3829,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.339194995156465e-06,
|
|
"loss": 1.1011,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.3318210965186925e-06,
|
|
"loss": 1.2403,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.324451736059763e-06,
|
|
"loss": 1.3072,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.317086919679891e-06,
|
|
"loss": 1.3975,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.3097266532756555e-06,
|
|
"loss": 1.039,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 4.3023709427399895e-06,
|
|
"loss": 1.1112,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.295019793962177e-06,
|
|
"loss": 1.1001,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.287673212827862e-06,
|
|
"loss": 1.2604,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.280331205219008e-06,
|
|
"loss": 1.2777,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.272993777013941e-06,
|
|
"loss": 1.2497,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.265660934087306e-06,
|
|
"loss": 1.2568,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.258332682310077e-06,
|
|
"loss": 1.2537,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.251009027549567e-06,
|
|
"loss": 1.0296,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.243689975669384e-06,
|
|
"loss": 1.2124,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.236375532529473e-06,
|
|
"loss": 1.2989,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.229065703986078e-06,
|
|
"loss": 1.3123,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.2217604958917465e-06,
|
|
"loss": 1.2501,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.214459914095339e-06,
|
|
"loss": 1.3463,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.207163964441999e-06,
|
|
"loss": 1.4815,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.199872652773166e-06,
|
|
"loss": 1.2523,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.192585984926563e-06,
|
|
"loss": 1.2834,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.185303966736205e-06,
|
|
"loss": 1.2826,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.178026604032374e-06,
|
|
"loss": 0.9242,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.170753902641626e-06,
|
|
"loss": 1.4105,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.163485868386788e-06,
|
|
"loss": 1.0917,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.1562225070869536e-06,
|
|
"loss": 1.1437,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.148963824557467e-06,
|
|
"loss": 1.0545,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.141709826609931e-06,
|
|
"loss": 1.2361,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.134460519052192e-06,
|
|
"loss": 1.1873,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 4.127215907688355e-06,
|
|
"loss": 1.3316,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.119975998318751e-06,
|
|
"loss": 1.1121,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.112740796739949e-06,
|
|
"loss": 1.1923,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.105510308744761e-06,
|
|
"loss": 1.3043,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.0982845401222e-06,
|
|
"loss": 1.0815,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.091063496657528e-06,
|
|
"loss": 1.2234,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.083847184132208e-06,
|
|
"loss": 1.2251,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.0766356083239135e-06,
|
|
"loss": 1.2104,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.069428775006543e-06,
|
|
"loss": 1.2379,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.0622266899501715e-06,
|
|
"loss": 1.0288,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.055029358921097e-06,
|
|
"loss": 1.1659,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.047836787681796e-06,
|
|
"loss": 1.2307,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.0406489819909375e-06,
|
|
"loss": 1.1422,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 4.033465947603383e-06,
|
|
"loss": 1.4013,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 4.0262876902701645e-06,
|
|
"loss": 1.0783,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 4.019114215738491e-06,
|
|
"loss": 1.3818,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 4.011945529751739e-06,
|
|
"loss": 1.0501,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 4.004781638049465e-06,
|
|
"loss": 1.2815,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.997622546367373e-06,
|
|
"loss": 1.0606,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.990468260437325e-06,
|
|
"loss": 1.3899,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.983318785987347e-06,
|
|
"loss": 1.1811,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.976174128741594e-06,
|
|
"loss": 1.2362,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.969034294420382e-06,
|
|
"loss": 1.4571,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.961899288740156e-06,
|
|
"loss": 1.1636,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.954769117413494e-06,
|
|
"loss": 1.0789,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.947643786149117e-06,
|
|
"loss": 1.2439,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.940523300651845e-06,
|
|
"loss": 1.1752,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.933407666622643e-06,
|
|
"loss": 1.066,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.9262968897585875e-06,
|
|
"loss": 1.4695,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.919190975752847e-06,
|
|
"loss": 1.0734,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.912089930294722e-06,
|
|
"loss": 1.3198,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.9049937590696e-06,
|
|
"loss": 1.2891,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.897902467758966e-06,
|
|
"loss": 0.9462,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.890816062040408e-06,
|
|
"loss": 1.2934,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.883734547587594e-06,
|
|
"loss": 1.51,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.876657930070277e-06,
|
|
"loss": 1.3013,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.8695862151542894e-06,
|
|
"loss": 1.0739,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.862519408501536e-06,
|
|
"loss": 1.2365,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.855457515770003e-06,
|
|
"loss": 1.3233,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.8484005426137295e-06,
|
|
"loss": 1.1933,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.841348494682822e-06,
|
|
"loss": 1.0792,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.8343013776234375e-06,
|
|
"loss": 1.274,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.8272591970777965e-06,
|
|
"loss": 1.176,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.8202219586841575e-06,
|
|
"loss": 1.2036,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.813189668076822e-06,
|
|
"loss": 1.1749,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.806162330886144e-06,
|
|
"loss": 1.3569,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.7991399527384855e-06,
|
|
"loss": 1.1648,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.7921225392562645e-06,
|
|
"loss": 1.0877,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.785110096057909e-06,
|
|
"loss": 1.2359,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.7781026287578693e-06,
|
|
"loss": 1.3317,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.771100142966624e-06,
|
|
"loss": 1.0427,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.764102644290638e-06,
|
|
"loss": 1.5872,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.7571101383324114e-06,
|
|
"loss": 1.1649,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.750122630690428e-06,
|
|
"loss": 1.0313,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.743140126959175e-06,
|
|
"loss": 1.1143,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.736162632729139e-06,
|
|
"loss": 1.302,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.7291901535867892e-06,
|
|
"loss": 1.1872,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.722222695114577e-06,
|
|
"loss": 1.1854,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.715260262890944e-06,
|
|
"loss": 0.9324,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.7083028624903005e-06,
|
|
"loss": 1.0432,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.7013504994830285e-06,
|
|
"loss": 1.2963,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.694403179435474e-06,
|
|
"loss": 1.3832,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.6874609079099565e-06,
|
|
"loss": 1.3436,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.6805236904647423e-06,
|
|
"loss": 1.288,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.6735915326540573e-06,
|
|
"loss": 1.1284,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.6666644400280704e-06,
|
|
"loss": 1.1681,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.659742418132899e-06,
|
|
"loss": 1.1412,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.652825472510606e-06,
|
|
"loss": 1.0824,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.6459136086991807e-06,
|
|
"loss": 1.155,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.6390068322325444e-06,
|
|
"loss": 1.3866,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.63210514864056e-06,
|
|
"loss": 1.0944,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.6252085634489853e-06,
|
|
"loss": 1.2253,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.6183170821795245e-06,
|
|
"loss": 1.3071,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.6114307103497794e-06,
|
|
"loss": 1.5133,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.6045494534732594e-06,
|
|
"loss": 1.2986,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.597673317059396e-06,
|
|
"loss": 1.0798,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.5908023066134934e-06,
|
|
"loss": 1.138,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.5839364276367793e-06,
|
|
"loss": 1.2249,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.5770756856263557e-06,
|
|
"loss": 1.3359,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.5702200860752156e-06,
|
|
"loss": 1.2146,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.56336963447224e-06,
|
|
"loss": 1.2472,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.556524336302184e-06,
|
|
"loss": 1.0505,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.549684197045674e-06,
|
|
"loss": 1.4241,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.5428492221792066e-06,
|
|
"loss": 1.2083,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.536019417175154e-06,
|
|
"loss": 1.2978,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.529194787501735e-06,
|
|
"loss": 1.0606,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.5223753386230296e-06,
|
|
"loss": 1.0116,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.515561075998981e-06,
|
|
"loss": 1.4586,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.508752005085355e-06,
|
|
"loss": 1.2117,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.501948131333788e-06,
|
|
"loss": 1.1682,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.495149460191738e-06,
|
|
"loss": 1.2868,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.4883559971025006e-06,
|
|
"loss": 1.1528,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.48156774750521e-06,
|
|
"loss": 1.1911,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.474784716834817e-06,
|
|
"loss": 1.2878,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.4680069105220937e-06,
|
|
"loss": 1.0565,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.4612343339936416e-06,
|
|
"loss": 1.1749,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.4544669926718545e-06,
|
|
"loss": 1.1436,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.4477048919749545e-06,
|
|
"loss": 1.1349,
|
|
"step": 2739
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.440948037316957e-06,
|
|
"loss": 1.3094,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.434196434107677e-06,
|
|
"loss": 1.0459,
|
|
"step": 2741
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.427450087752733e-06,
|
|
"loss": 0.9729,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.4207090036535273e-06,
|
|
"loss": 1.2487,
|
|
"step": 2743
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.41397318720725e-06,
|
|
"loss": 1.3042,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.407242643806875e-06,
|
|
"loss": 1.2261,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.4005173788411516e-06,
|
|
"loss": 1.0368,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.3937973976946125e-06,
|
|
"loss": 1.2019,
|
|
"step": 2747
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.3870827057475485e-06,
|
|
"loss": 1.3005,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.3803733083760204e-06,
|
|
"loss": 1.0447,
|
|
"step": 2749
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.3736692109518464e-06,
|
|
"loss": 1.1139,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.366970418842611e-06,
|
|
"loss": 1.2403,
|
|
"step": 2751
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.3602769374116427e-06,
|
|
"loss": 1.1305,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.353588772018015e-06,
|
|
"loss": 1.065,
|
|
"step": 2753
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.3469059280165616e-06,
|
|
"loss": 1.0502,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.34022841075783e-06,
|
|
"loss": 1.2448,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.3335562255881272e-06,
|
|
"loss": 1.0779,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.3268893778494772e-06,
|
|
"loss": 0.9968,
|
|
"step": 2757
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.3202278728796334e-06,
|
|
"loss": 1.1893,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.3135717160120793e-06,
|
|
"loss": 1.4064,
|
|
"step": 2759
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.3069209125759994e-06,
|
|
"loss": 1.0925,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.300275467896308e-06,
|
|
"loss": 1.2451,
|
|
"step": 2761
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.29363538729363e-06,
|
|
"loss": 1.265,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.2870006760842763e-06,
|
|
"loss": 1.1963,
|
|
"step": 2763
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.2803713395802805e-06,
|
|
"loss": 1.0281,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.273747383089361e-06,
|
|
"loss": 1.3674,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.267128811914929e-06,
|
|
"loss": 1.2965,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.2605156313560927e-06,
|
|
"loss": 1.3364,
|
|
"step": 2767
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.2539078467076356e-06,
|
|
"loss": 1.2887,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.2473054632600233e-06,
|
|
"loss": 1.2492,
|
|
"step": 2769
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.2407084862993955e-06,
|
|
"loss": 1.429,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.2341169211075706e-06,
|
|
"loss": 1.2893,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.227530772962025e-06,
|
|
"loss": 1.1793,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.2209500471359034e-06,
|
|
"loss": 1.312,
|
|
"step": 2773
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.214374748898006e-06,
|
|
"loss": 1.0166,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.2078048835127874e-06,
|
|
"loss": 1.0934,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.201240456240359e-06,
|
|
"loss": 1.1883,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.1946814723364705e-06,
|
|
"loss": 1.1433,
|
|
"step": 2777
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.188127937052512e-06,
|
|
"loss": 1.082,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.181579855635527e-06,
|
|
"loss": 1.0416,
|
|
"step": 2779
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.1750372333281655e-06,
|
|
"loss": 1.0494,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.1685000753687335e-06,
|
|
"loss": 1.1188,
|
|
"step": 2781
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.1619683869911466e-06,
|
|
"loss": 1.355,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.155442173424942e-06,
|
|
"loss": 1.0772,
|
|
"step": 2783
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.148921439895283e-06,
|
|
"loss": 1.1423,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.1424061916229354e-06,
|
|
"loss": 1.3381,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.135896433824277e-06,
|
|
"loss": 0.9714,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.1293921717112897e-06,
|
|
"loss": 1.2657,
|
|
"step": 2787
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.1228934104915534e-06,
|
|
"loss": 1.1798,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.1164001553682486e-06,
|
|
"loss": 1.0889,
|
|
"step": 2789
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.1099124115401447e-06,
|
|
"loss": 1.1902,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.1034301842015956e-06,
|
|
"loss": 1.3495,
|
|
"step": 2791
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.096953478542539e-06,
|
|
"loss": 0.8912,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.0904822997485007e-06,
|
|
"loss": 1.0173,
|
|
"step": 2793
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.08401665300057e-06,
|
|
"loss": 1.2213,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.0775565434754095e-06,
|
|
"loss": 1.2127,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.0711019763452554e-06,
|
|
"loss": 1.3036,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.0646529567779003e-06,
|
|
"loss": 1.0738,
|
|
"step": 2797
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.0582094899366954e-06,
|
|
"loss": 1.2384,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.051771580980547e-06,
|
|
"loss": 1.2369,
|
|
"step": 2799
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.0453392350639086e-06,
|
|
"loss": 1.1699,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.0389124573367888e-06,
|
|
"loss": 1.2472,
|
|
"step": 2801
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 3.0324912529447304e-06,
|
|
"loss": 1.165,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 3.0260756270288117e-06,
|
|
"loss": 1.1897,
|
|
"step": 2803
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 3.019665584725657e-06,
|
|
"loss": 1.0674,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 3.013261131167401e-06,
|
|
"loss": 1.4547,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 3.006862271481722e-06,
|
|
"loss": 1.1031,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 3.00046901079181e-06,
|
|
"loss": 1.2436,
|
|
"step": 2807
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9940813542163718e-06,
|
|
"loss": 1.0089,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9876993068696348e-06,
|
|
"loss": 1.0658,
|
|
"step": 2809
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9813228738613288e-06,
|
|
"loss": 1.4424,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.97495206029669e-06,
|
|
"loss": 1.0485,
|
|
"step": 2811
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.968586871276453e-06,
|
|
"loss": 1.3838,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9622273118968527e-06,
|
|
"loss": 1.1266,
|
|
"step": 2813
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9558733872496203e-06,
|
|
"loss": 1.2666,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9495251024219673e-06,
|
|
"loss": 1.1287,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.943182462496593e-06,
|
|
"loss": 1.077,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.936845472551677e-06,
|
|
"loss": 1.4125,
|
|
"step": 2817
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9305141376608814e-06,
|
|
"loss": 1.319,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9241884628933302e-06,
|
|
"loss": 1.3469,
|
|
"step": 2819
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9178684533136193e-06,
|
|
"loss": 1.0481,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9115541139818193e-06,
|
|
"loss": 1.2747,
|
|
"step": 2821
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9052454499534387e-06,
|
|
"loss": 1.2317,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.8989424662794662e-06,
|
|
"loss": 1.1892,
|
|
"step": 2823
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.8926451680063273e-06,
|
|
"loss": 1.1401,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.8863535601758962e-06,
|
|
"loss": 1.2515,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.880067647825505e-06,
|
|
"loss": 1.3564,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.8737874359879028e-06,
|
|
"loss": 0.9757,
|
|
"step": 2827
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.867512929691294e-06,
|
|
"loss": 1.2656,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.8612441339593132e-06,
|
|
"loss": 1.2598,
|
|
"step": 2829
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.8549810538110078e-06,
|
|
"loss": 1.1479,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.8487236942608664e-06,
|
|
"loss": 1.0848,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.8424720603187882e-06,
|
|
"loss": 1.0461,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.836226156990087e-06,
|
|
"loss": 1.3646,
|
|
"step": 2833
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.8299859892754986e-06,
|
|
"loss": 1.1296,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.8237515621711564e-06,
|
|
"loss": 1.1809,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.8175228806685996e-06,
|
|
"loss": 0.9695,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.8112999497547667e-06,
|
|
"loss": 1.3151,
|
|
"step": 2837
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.805082774412001e-06,
|
|
"loss": 1.0517,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.798871359618026e-06,
|
|
"loss": 0.9697,
|
|
"step": 2839
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7926657103459586e-06,
|
|
"loss": 1.3886,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7864658315642978e-06,
|
|
"loss": 1.2959,
|
|
"step": 2841
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7802717282369217e-06,
|
|
"loss": 0.919,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7740834053230902e-06,
|
|
"loss": 1.1627,
|
|
"step": 2843
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7679008677774298e-06,
|
|
"loss": 1.3269,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7617241205499336e-06,
|
|
"loss": 1.2136,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7555531685859684e-06,
|
|
"loss": 1.1514,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.749388016826242e-06,
|
|
"loss": 1.2076,
|
|
"step": 2847
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.743228670206841e-06,
|
|
"loss": 0.9937,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7370751336591893e-06,
|
|
"loss": 1.1081,
|
|
"step": 2849
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7309274121100604e-06,
|
|
"loss": 1.1269,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7247855104815813e-06,
|
|
"loss": 1.2046,
|
|
"step": 2851
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.7186494336912096e-06,
|
|
"loss": 1.2286,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.712519186651742e-06,
|
|
"loss": 1.3973,
|
|
"step": 2853
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.706394774271308e-06,
|
|
"loss": 1.3364,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.7002762014533644e-06,
|
|
"loss": 1.0843,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.6941634730966993e-06,
|
|
"loss": 1.0794,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.6880565940954128e-06,
|
|
"loss": 1.1837,
|
|
"step": 2857
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.6819555693389267e-06,
|
|
"loss": 1.1954,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.6758604037119686e-06,
|
|
"loss": 1.3124,
|
|
"step": 2859
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.6697711020945893e-06,
|
|
"loss": 1.1889,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.6636876693621304e-06,
|
|
"loss": 1.2933,
|
|
"step": 2861
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.6576101103852393e-06,
|
|
"loss": 1.1614,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.651538430029866e-06,
|
|
"loss": 1.1823,
|
|
"step": 2863
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.645472633157248e-06,
|
|
"loss": 1.1016,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.639412724623909e-06,
|
|
"loss": 1.3649,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.6333587092816724e-06,
|
|
"loss": 0.8976,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.627310591977621e-06,
|
|
"loss": 1.0561,
|
|
"step": 2867
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.621268377554138e-06,
|
|
"loss": 1.0242,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.6152320708488677e-06,
|
|
"loss": 0.9308,
|
|
"step": 2869
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.609201676694724e-06,
|
|
"loss": 1.1505,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.6031771999198984e-06,
|
|
"loss": 0.9702,
|
|
"step": 2871
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.5971586453478257e-06,
|
|
"loss": 1.1772,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.5911460177972194e-06,
|
|
"loss": 1.3345,
|
|
"step": 2873
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.585139322082033e-06,
|
|
"loss": 1.1151,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.579138563011475e-06,
|
|
"loss": 1.2123,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.5731437453900076e-06,
|
|
"loss": 1.1104,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.567154874017326e-06,
|
|
"loss": 1.0099,
|
|
"step": 2877
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.5611719536883696e-06,
|
|
"loss": 1.1201,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.555194989193308e-06,
|
|
"loss": 1.1074,
|
|
"step": 2879
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.549223985317555e-06,
|
|
"loss": 1.2592,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.5432589468417392e-06,
|
|
"loss": 1.2132,
|
|
"step": 2881
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.537299878541717e-06,
|
|
"loss": 1.3405,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.531346785188565e-06,
|
|
"loss": 1.2627,
|
|
"step": 2883
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.525399671548575e-06,
|
|
"loss": 1.0622,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.5194585423832586e-06,
|
|
"loss": 1.1091,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.513523402449325e-06,
|
|
"loss": 1.2214,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.507594256498692e-06,
|
|
"loss": 1.3454,
|
|
"step": 2887
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.5016711092784873e-06,
|
|
"loss": 1.1334,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.495753965531017e-06,
|
|
"loss": 1.1643,
|
|
"step": 2889
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.489842829993796e-06,
|
|
"loss": 1.1248,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.483937707399532e-06,
|
|
"loss": 1.253,
|
|
"step": 2891
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.478038602476097e-06,
|
|
"loss": 1.025,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.4721455199465696e-06,
|
|
"loss": 1.1608,
|
|
"step": 2893
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.466258464529191e-06,
|
|
"loss": 1.1603,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.4603774409373805e-06,
|
|
"loss": 1.4369,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.454502453879736e-06,
|
|
"loss": 1.0495,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.4486335080600052e-06,
|
|
"loss": 1.3258,
|
|
"step": 2897
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.4427706081771187e-06,
|
|
"loss": 1.0812,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.4369137589251523e-06,
|
|
"loss": 1.1873,
|
|
"step": 2899
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.4310629649933403e-06,
|
|
"loss": 1.2705,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.4252182310660765e-06,
|
|
"loss": 1.3162,
|
|
"step": 2901
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.419379561822894e-06,
|
|
"loss": 1.3585,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.4135469619384734e-06,
|
|
"loss": 1.1402,
|
|
"step": 2903
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.407720436082632e-06,
|
|
"loss": 1.1269,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.401899988920334e-06,
|
|
"loss": 1.1853,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.3960856251116695e-06,
|
|
"loss": 1.2964,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.390277349311856e-06,
|
|
"loss": 1.4157,
|
|
"step": 2907
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.384475166171242e-06,
|
|
"loss": 1.0411,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.378679080335291e-06,
|
|
"loss": 1.2438,
|
|
"step": 2909
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.372889096444596e-06,
|
|
"loss": 1.21,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.3671052191348563e-06,
|
|
"loss": 1.4291,
|
|
"step": 2911
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.361327453036879e-06,
|
|
"loss": 1.0826,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.3555558027765923e-06,
|
|
"loss": 1.1556,
|
|
"step": 2913
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.3497902729750076e-06,
|
|
"loss": 1.1609,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.344030868248255e-06,
|
|
"loss": 1.4149,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.3382775932075485e-06,
|
|
"loss": 1.2097,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.3325304524591974e-06,
|
|
"loss": 0.9413,
|
|
"step": 2917
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.326789450604606e-06,
|
|
"loss": 1.2844,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.321054592240255e-06,
|
|
"loss": 1.1926,
|
|
"step": 2919
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.3153258819577087e-06,
|
|
"loss": 0.9268,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.3096033243436113e-06,
|
|
"loss": 1.3254,
|
|
"step": 2921
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.3038869239796755e-06,
|
|
"loss": 1.418,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.2981766854426957e-06,
|
|
"loss": 1.0603,
|
|
"step": 2923
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.2924726133045216e-06,
|
|
"loss": 1.2655,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.286774712132066e-06,
|
|
"loss": 1.1537,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.2810829864873107e-06,
|
|
"loss": 1.1567,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.275397440927285e-06,
|
|
"loss": 0.9134,
|
|
"step": 2927
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.269718080004071e-06,
|
|
"loss": 1.1938,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.2640449082647987e-06,
|
|
"loss": 1.2756,
|
|
"step": 2929
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.2583779302516497e-06,
|
|
"loss": 1.1118,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.2527171505018376e-06,
|
|
"loss": 1.3197,
|
|
"step": 2931
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.247062573547616e-06,
|
|
"loss": 1.2696,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.2414142039162802e-06,
|
|
"loss": 1.0358,
|
|
"step": 2933
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.2357720461301393e-06,
|
|
"loss": 1.3146,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.230136104706545e-06,
|
|
"loss": 1.3084,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.2245063841578653e-06,
|
|
"loss": 1.1391,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.218882888991483e-06,
|
|
"loss": 1.2307,
|
|
"step": 2937
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.2132656237098093e-06,
|
|
"loss": 1.1251,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.2076545928102498e-06,
|
|
"loss": 0.9966,
|
|
"step": 2939
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.202049800785235e-06,
|
|
"loss": 1.3093,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.1964512521221937e-06,
|
|
"loss": 1.103,
|
|
"step": 2941
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.1908589513035507e-06,
|
|
"loss": 1.1003,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.1852729028067386e-06,
|
|
"loss": 1.1276,
|
|
"step": 2943
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.1796931111041784e-06,
|
|
"loss": 1.0997,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.174119580663282e-06,
|
|
"loss": 1.0498,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.168552315946445e-06,
|
|
"loss": 1.2657,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.1629913214110575e-06,
|
|
"loss": 0.9905,
|
|
"step": 2947
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.157436601509477e-06,
|
|
"loss": 1.1519,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.151888160689044e-06,
|
|
"loss": 1.1681,
|
|
"step": 2949
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.14634600339207e-06,
|
|
"loss": 1.3574,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.140810134055833e-06,
|
|
"loss": 1.1735,
|
|
"step": 2951
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.1352805571125836e-06,
|
|
"loss": 1.2781,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.1297572769895292e-06,
|
|
"loss": 0.9195,
|
|
"step": 2953
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.1242402981088317e-06,
|
|
"loss": 1.2194,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.1187296248876232e-06,
|
|
"loss": 1.2394,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.1132252617379656e-06,
|
|
"loss": 1.2229,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.1077272130668856e-06,
|
|
"loss": 1.2328,
|
|
"step": 2957
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.1022354832763534e-06,
|
|
"loss": 1.2751,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.096750076763264e-06,
|
|
"loss": 1.0386,
|
|
"step": 2959
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.091270997919471e-06,
|
|
"loss": 1.2383,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.085798251131749e-06,
|
|
"loss": 1.2233,
|
|
"step": 2961
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.0803318407818018e-06,
|
|
"loss": 1.2355,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0748717712462727e-06,
|
|
"loss": 1.1152,
|
|
"step": 2963
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0694180468967085e-06,
|
|
"loss": 1.1067,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0639706720995954e-06,
|
|
"loss": 1.3264,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0585296512163254e-06,
|
|
"loss": 1.0374,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0530949886032004e-06,
|
|
"loss": 1.1553,
|
|
"step": 2967
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0476666886114436e-06,
|
|
"loss": 1.1223,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.042244755587173e-06,
|
|
"loss": 1.0933,
|
|
"step": 2969
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0368291938714136e-06,
|
|
"loss": 1.1878,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0314200078000857e-06,
|
|
"loss": 1.1863,
|
|
"step": 2971
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0260172017040138e-06,
|
|
"loss": 1.1519,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0206207799089026e-06,
|
|
"loss": 1.2185,
|
|
"step": 2973
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0152307467353548e-06,
|
|
"loss": 1.4839,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.0098471064988522e-06,
|
|
"loss": 1.279,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 2.004469863509758e-06,
|
|
"loss": 1.2496,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.999099022073321e-06,
|
|
"loss": 1.2385,
|
|
"step": 2977
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.993734586489656e-06,
|
|
"loss": 1.2295,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.988376561053751e-06,
|
|
"loss": 1.2487,
|
|
"step": 2979
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.9830249500554688e-06,
|
|
"loss": 0.958,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.977679757779524e-06,
|
|
"loss": 1.1284,
|
|
"step": 2981
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.9723409885055035e-06,
|
|
"loss": 1.1333,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.967008646507845e-06,
|
|
"loss": 1.501,
|
|
"step": 2983
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.961682736055842e-06,
|
|
"loss": 1.2149,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.956363261413642e-06,
|
|
"loss": 1.1938,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.9510502268402354e-06,
|
|
"loss": 1.3602,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 1.945743636589454e-06,
|
|
"loss": 1.3049,
|
|
"step": 2987
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.9404434949099837e-06,
|
|
"loss": 1.3541,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.9351498060453243e-06,
|
|
"loss": 1.1633,
|
|
"step": 2989
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.9298625742338327e-06,
|
|
"loss": 1.02,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.9245818037086815e-06,
|
|
"loss": 1.2848,
|
|
"step": 2991
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.919307498697872e-06,
|
|
"loss": 1.279,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.9140396634242375e-06,
|
|
"loss": 1.0266,
|
|
"step": 2993
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.908778302105422e-06,
|
|
"loss": 1.6259,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.9035234189538888e-06,
|
|
"loss": 1.1959,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.8982750181769127e-06,
|
|
"loss": 1.1485,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.8930331039765858e-06,
|
|
"loss": 1.2732,
|
|
"step": 2997
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.8877976805498e-06,
|
|
"loss": 1.0878,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 1.8825687520882485e-06,
|
|
"loss": 1.244,
|
|
"step": 2999
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.877346322778436e-06,
|
|
"loss": 1.2636,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8721303968016468e-06,
|
|
"loss": 1.2349,
|
|
"step": 3001
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8669209783339736e-06,
|
|
"loss": 1.1511,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8617180715462913e-06,
|
|
"loss": 1.1877,
|
|
"step": 3003
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8565216806042574e-06,
|
|
"loss": 1.2159,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8513318096683285e-06,
|
|
"loss": 1.0952,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8461484628937187e-06,
|
|
"loss": 1.3726,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.840971644430438e-06,
|
|
"loss": 1.1411,
|
|
"step": 3007
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8358013584232593e-06,
|
|
"loss": 1.3205,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.830637609011724e-06,
|
|
"loss": 1.228,
|
|
"step": 3009
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8254804003301508e-06,
|
|
"loss": 0.9721,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8203297365076112e-06,
|
|
"loss": 1.0082,
|
|
"step": 3011
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 1.8151856216679397e-06,
|
|
"loss": 1.0759,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.8100480599297254e-06,
|
|
"loss": 1.1058,
|
|
"step": 3013
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.804917055406319e-06,
|
|
"loss": 1.2792,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.7997926122058108e-06,
|
|
"loss": 1.2165,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.7946747344310445e-06,
|
|
"loss": 1.3616,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.7895634261796035e-06,
|
|
"loss": 1.296,
|
|
"step": 3017
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.784458691543811e-06,
|
|
"loss": 1.226,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.7793605346107335e-06,
|
|
"loss": 1.2127,
|
|
"step": 3019
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.7742689594621654e-06,
|
|
"loss": 1.191,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.7691839701746294e-06,
|
|
"loss": 1.2482,
|
|
"step": 3021
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.7641055708193833e-06,
|
|
"loss": 1.1426,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.7590337654624023e-06,
|
|
"loss": 0.8085,
|
|
"step": 3023
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 1.7539685581643807e-06,
|
|
"loss": 1.091,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.7489099529807407e-06,
|
|
"loss": 1.0346,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.7438579539616018e-06,
|
|
"loss": 1.0985,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.7388125651518107e-06,
|
|
"loss": 1.1112,
|
|
"step": 3027
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.7337737905909124e-06,
|
|
"loss": 1.0485,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.7287416343131569e-06,
|
|
"loss": 1.1595,
|
|
"step": 3029
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.7237161003475023e-06,
|
|
"loss": 1.2121,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.718697192717591e-06,
|
|
"loss": 1.1899,
|
|
"step": 3031
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.7136849154417756e-06,
|
|
"loss": 1.2891,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.7086792725330904e-06,
|
|
"loss": 1.1486,
|
|
"step": 3033
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.703680267999257e-06,
|
|
"loss": 1.1111,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.698687905842692e-06,
|
|
"loss": 1.3982,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 1.6937021900604844e-06,
|
|
"loss": 1.396,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6887231246444047e-06,
|
|
"loss": 1.3596,
|
|
"step": 3037
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6837507135808962e-06,
|
|
"loss": 1.3534,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.678784960851082e-06,
|
|
"loss": 1.2092,
|
|
"step": 3039
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6738258704307476e-06,
|
|
"loss": 1.2184,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6688734462903432e-06,
|
|
"loss": 1.0169,
|
|
"step": 3041
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6639276923949922e-06,
|
|
"loss": 1.1618,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6589886127044607e-06,
|
|
"loss": 1.1449,
|
|
"step": 3043
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6540562111731884e-06,
|
|
"loss": 1.2539,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.649130491750256e-06,
|
|
"loss": 1.1903,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6442114583793978e-06,
|
|
"loss": 0.9656,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6392991149990011e-06,
|
|
"loss": 1.2419,
|
|
"step": 3047
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6343934655420847e-06,
|
|
"loss": 1.2045,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 1.6294945139363195e-06,
|
|
"loss": 1.0785,
|
|
"step": 3049
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.6246022641040072e-06,
|
|
"loss": 1.0356,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.6197167199620834e-06,
|
|
"loss": 1.2467,
|
|
"step": 3051
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.6148378854221226e-06,
|
|
"loss": 1.2407,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.6099657643903189e-06,
|
|
"loss": 1.2351,
|
|
"step": 3053
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.605100360767492e-06,
|
|
"loss": 1.2947,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.6002416784490936e-06,
|
|
"loss": 1.3197,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.5953897213251746e-06,
|
|
"loss": 1.0015,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.5905444932804215e-06,
|
|
"loss": 1.0639,
|
|
"step": 3057
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.5857059981941214e-06,
|
|
"loss": 1.255,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.580874239940171e-06,
|
|
"loss": 1.5061,
|
|
"step": 3059
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.5760492223870827e-06,
|
|
"loss": 1.1375,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 1.5712309493979628e-06,
|
|
"loss": 1.2299,
|
|
"step": 3061
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.5664194248305187e-06,
|
|
"loss": 1.0377,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.5616146525370547e-06,
|
|
"loss": 1.1915,
|
|
"step": 3063
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.5568166363644754e-06,
|
|
"loss": 1.2,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.5520253801542696e-06,
|
|
"loss": 0.983,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.5472408877425116e-06,
|
|
"loss": 1.1839,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.542463162959873e-06,
|
|
"loss": 1.2119,
|
|
"step": 3067
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.537692209631587e-06,
|
|
"loss": 0.9335,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.5329280315774854e-06,
|
|
"loss": 1.4445,
|
|
"step": 3069
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.5281706326119616e-06,
|
|
"loss": 1.2327,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.5234200165439873e-06,
|
|
"loss": 1.0621,
|
|
"step": 3071
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.5186761871771073e-06,
|
|
"loss": 1.3748,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.513939148309419e-06,
|
|
"loss": 1.0615,
|
|
"step": 3073
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.5092089037335988e-06,
|
|
"loss": 1.1438,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.504485457236875e-06,
|
|
"loss": 1.1158,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4997688126010313e-06,
|
|
"loss": 1.0408,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4950589736024135e-06,
|
|
"loss": 1.1047,
|
|
"step": 3077
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.490355944011912e-06,
|
|
"loss": 1.0323,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4856597275949658e-06,
|
|
"loss": 1.1355,
|
|
"step": 3079
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4809703281115606e-06,
|
|
"loss": 1.0828,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4762877493162265e-06,
|
|
"loss": 1.2907,
|
|
"step": 3081
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4716119949580276e-06,
|
|
"loss": 1.1535,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4669430687805674e-06,
|
|
"loss": 1.0415,
|
|
"step": 3083
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4622809745219823e-06,
|
|
"loss": 1.2596,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4576257159149343e-06,
|
|
"loss": 1.1332,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 1.4529772966866207e-06,
|
|
"loss": 0.9762,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.4483357205587577e-06,
|
|
"loss": 1.2017,
|
|
"step": 3087
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.4437009912475796e-06,
|
|
"loss": 1.0811,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.439073112463849e-06,
|
|
"loss": 1.109,
|
|
"step": 3089
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.4344520879128342e-06,
|
|
"loss": 1.3922,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.4298379212943159e-06,
|
|
"loss": 1.339,
|
|
"step": 3091
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.4252306163025953e-06,
|
|
"loss": 1.0104,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.420630176626462e-06,
|
|
"loss": 1.002,
|
|
"step": 3093
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.4160366059492236e-06,
|
|
"loss": 1.0072,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.4114499079486833e-06,
|
|
"loss": 1.1355,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.4068700862971385e-06,
|
|
"loss": 1.2832,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.402297144661391e-06,
|
|
"loss": 1.2097,
|
|
"step": 3097
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 1.3977310867027173e-06,
|
|
"loss": 1.1407,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3931719160769009e-06,
|
|
"loss": 1.208,
|
|
"step": 3099
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3886196364341997e-06,
|
|
"loss": 1.0987,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3840742514193538e-06,
|
|
"loss": 1.3224,
|
|
"step": 3101
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3795357646715934e-06,
|
|
"loss": 1.2591,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.375004179824616e-06,
|
|
"loss": 1.1959,
|
|
"step": 3103
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3704795005065952e-06,
|
|
"loss": 1.2655,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3659617303401763e-06,
|
|
"loss": 1.1652,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3614508729424759e-06,
|
|
"loss": 1.1269,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3569469319250718e-06,
|
|
"loss": 1.0461,
|
|
"step": 3107
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3524499108940036e-06,
|
|
"loss": 1.2416,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.3479598134497796e-06,
|
|
"loss": 1.2198,
|
|
"step": 3109
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 1.343476643187348e-06,
|
|
"loss": 0.943,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.339000403696129e-06,
|
|
"loss": 1.0365,
|
|
"step": 3111
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.3345310985599814e-06,
|
|
"loss": 0.9632,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.3300687313572147e-06,
|
|
"loss": 1.1703,
|
|
"step": 3113
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.3256133056605901e-06,
|
|
"loss": 1.248,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.3211648250372999e-06,
|
|
"loss": 1.4824,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.316723293048985e-06,
|
|
"loss": 1.1282,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.3122887132517237e-06,
|
|
"loss": 1.2775,
|
|
"step": 3117
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.3078610891960153e-06,
|
|
"loss": 1.1124,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.3034404244268074e-06,
|
|
"loss": 1.0652,
|
|
"step": 3119
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.299026722483464e-06,
|
|
"loss": 1.2322,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.2946199868997744e-06,
|
|
"loss": 1.0111,
|
|
"step": 3121
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.2902202212039606e-06,
|
|
"loss": 1.0744,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 1.2858274289186534e-06,
|
|
"loss": 1.159,
|
|
"step": 3123
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.2814416135609042e-06,
|
|
"loss": 0.8308,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.2770627786421796e-06,
|
|
"loss": 1.2301,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.272690927668353e-06,
|
|
"loss": 1.213,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.2683260641397144e-06,
|
|
"loss": 1.0811,
|
|
"step": 3127
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.2639681915509515e-06,
|
|
"loss": 1.2594,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.2596173133911583e-06,
|
|
"loss": 1.1433,
|
|
"step": 3129
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.2552734331438255e-06,
|
|
"loss": 1.139,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.250936554286848e-06,
|
|
"loss": 1.2039,
|
|
"step": 3131
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.2466066802925092e-06,
|
|
"loss": 1.3901,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.242283814627482e-06,
|
|
"loss": 1.1637,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.2379679607528394e-06,
|
|
"loss": 1.3145,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 1.2336591221240235e-06,
|
|
"loss": 1.3193,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.2293573021908756e-06,
|
|
"loss": 1.2842,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.2250625043976094e-06,
|
|
"loss": 1.1351,
|
|
"step": 3137
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.2207747321828156e-06,
|
|
"loss": 1.02,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.216493988979468e-06,
|
|
"loss": 1.1485,
|
|
"step": 3139
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.2122202782148996e-06,
|
|
"loss": 1.2964,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.2079536033108252e-06,
|
|
"loss": 1.1138,
|
|
"step": 3141
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.2036939676833193e-06,
|
|
"loss": 1.1051,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.1994413747428223e-06,
|
|
"loss": 1.2778,
|
|
"step": 3143
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.195195827894139e-06,
|
|
"loss": 1.2468,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.1909573305364297e-06,
|
|
"loss": 1.0817,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.186725886063208e-06,
|
|
"loss": 1.2708,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 1.1825014978623438e-06,
|
|
"loss": 1.068,
|
|
"step": 3147
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.1782841693160617e-06,
|
|
"loss": 1.0462,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.1740739038009264e-06,
|
|
"loss": 1.2906,
|
|
"step": 3149
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.169870704687851e-06,
|
|
"loss": 1.2752,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.1656745753420894e-06,
|
|
"loss": 1.2309,
|
|
"step": 3151
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.161485519123241e-06,
|
|
"loss": 1.2661,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.157303539385234e-06,
|
|
"loss": 1.3248,
|
|
"step": 3153
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.1531286394763364e-06,
|
|
"loss": 1.3471,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.1489608227391424e-06,
|
|
"loss": 1.2229,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.144800092510585e-06,
|
|
"loss": 1.1888,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.140646452121913e-06,
|
|
"loss": 0.9956,
|
|
"step": 3157
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.1364999048987025e-06,
|
|
"loss": 1.3579,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.1323604541608556e-06,
|
|
"loss": 1.233,
|
|
"step": 3159
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 1.1282281032225806e-06,
|
|
"loss": 1.4886,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.124102855392415e-06,
|
|
"loss": 1.1025,
|
|
"step": 3161
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.1199847139732013e-06,
|
|
"loss": 1.2036,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.115873682262092e-06,
|
|
"loss": 1.0916,
|
|
"step": 3163
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.1117697635505553e-06,
|
|
"loss": 1.4043,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.1076729611243509e-06,
|
|
"loss": 0.9279,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.1035832782635537e-06,
|
|
"loss": 1.465,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.099500718242532e-06,
|
|
"loss": 1.4862,
|
|
"step": 3167
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.0954252843299496e-06,
|
|
"loss": 1.1407,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.0913569797887725e-06,
|
|
"loss": 1.4324,
|
|
"step": 3169
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.0872958078762519e-06,
|
|
"loss": 1.1099,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.0832417718439303e-06,
|
|
"loss": 1.3079,
|
|
"step": 3171
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 1.0791948749376335e-06,
|
|
"loss": 1.0832,
|
|
"step": 3172
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0751551203974807e-06,
|
|
"loss": 1.2252,
|
|
"step": 3173
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.071122511457865e-06,
|
|
"loss": 1.0642,
|
|
"step": 3174
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0670970513474554e-06,
|
|
"loss": 0.9786,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0630787432892108e-06,
|
|
"loss": 1.3739,
|
|
"step": 3176
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0590675905003457e-06,
|
|
"loss": 1.5802,
|
|
"step": 3177
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0550635961923615e-06,
|
|
"loss": 1.1817,
|
|
"step": 3178
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0510667635710203e-06,
|
|
"loss": 1.1054,
|
|
"step": 3179
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0470770958363486e-06,
|
|
"loss": 1.372,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0430945961826477e-06,
|
|
"loss": 1.0386,
|
|
"step": 3181
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0391192677984618e-06,
|
|
"loss": 1.3375,
|
|
"step": 3182
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0351511138666092e-06,
|
|
"loss": 1.1659,
|
|
"step": 3183
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 1.0311901375641609e-06,
|
|
"loss": 1.1052,
|
|
"step": 3184
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 1.027236342062432e-06,
|
|
"loss": 1.0866,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 1.0232897305269996e-06,
|
|
"loss": 0.9665,
|
|
"step": 3186
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 1.0193503061176835e-06,
|
|
"loss": 1.2142,
|
|
"step": 3187
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 1.0154180719885464e-06,
|
|
"loss": 1.2334,
|
|
"step": 3188
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 1.0114930312879046e-06,
|
|
"loss": 1.0841,
|
|
"step": 3189
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 1.0075751871583039e-06,
|
|
"loss": 0.9993,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 1.0036645427365332e-06,
|
|
"loss": 1.0957,
|
|
"step": 3191
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 9.997611011536168e-07,
|
|
"loss": 1.0189,
|
|
"step": 3192
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 9.958648655348102e-07,
|
|
"loss": 1.0542,
|
|
"step": 3193
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 9.919758389996037e-07,
|
|
"loss": 1.3784,
|
|
"step": 3194
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 9.880940246617121e-07,
|
|
"loss": 1.1462,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 9.84219425629076e-07,
|
|
"loss": 1.083,
|
|
"step": 3196
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 9.803520450038595e-07,
|
|
"loss": 1.404,
|
|
"step": 3197
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.76491885882449e-07,
|
|
"loss": 1.3753,
|
|
"step": 3198
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.72638951355449e-07,
|
|
"loss": 1.3762,
|
|
"step": 3199
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.687932445076743e-07,
|
|
"loss": 1.1616,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.649547684181627e-07,
|
|
"loss": 1.2071,
|
|
"step": 3201
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.611235261601515e-07,
|
|
"loss": 1.1725,
|
|
"step": 3202
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.572995208010961e-07,
|
|
"loss": 1.2339,
|
|
"step": 3203
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.534827554026516e-07,
|
|
"loss": 1.1087,
|
|
"step": 3204
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.496732330206782e-07,
|
|
"loss": 1.0838,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.458709567052416e-07,
|
|
"loss": 1.3058,
|
|
"step": 3206
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.420759295005943e-07,
|
|
"loss": 1.0464,
|
|
"step": 3207
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.382881544451983e-07,
|
|
"loss": 1.1827,
|
|
"step": 3208
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 9.345076345717008e-07,
|
|
"loss": 1.1702,
|
|
"step": 3209
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 9.307343729069418e-07,
|
|
"loss": 1.2813,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 9.269683724719535e-07,
|
|
"loss": 1.2506,
|
|
"step": 3211
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 9.232096362819509e-07,
|
|
"loss": 1.086,
|
|
"step": 3212
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 9.194581673463321e-07,
|
|
"loss": 1.1063,
|
|
"step": 3213
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 9.157139686686811e-07,
|
|
"loss": 1.1284,
|
|
"step": 3214
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 9.119770432467578e-07,
|
|
"loss": 1.3046,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 9.082473940724989e-07,
|
|
"loss": 1.0537,
|
|
"step": 3216
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 9.045250241320158e-07,
|
|
"loss": 1.3346,
|
|
"step": 3217
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 9.008099364055911e-07,
|
|
"loss": 1.119,
|
|
"step": 3218
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 8.97102133867681e-07,
|
|
"loss": 1.3242,
|
|
"step": 3219
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 8.934016194869033e-07,
|
|
"loss": 1.0422,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 8.897083962260456e-07,
|
|
"loss": 1.1714,
|
|
"step": 3221
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.860224670420503e-07,
|
|
"loss": 1.2706,
|
|
"step": 3222
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.823438348860314e-07,
|
|
"loss": 0.8979,
|
|
"step": 3223
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.786725027032505e-07,
|
|
"loss": 1.0903,
|
|
"step": 3224
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.750084734331277e-07,
|
|
"loss": 1.3761,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.713517500092405e-07,
|
|
"loss": 1.1495,
|
|
"step": 3226
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.677023353593084e-07,
|
|
"loss": 0.9962,
|
|
"step": 3227
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.640602324052072e-07,
|
|
"loss": 1.1878,
|
|
"step": 3228
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.604254440629545e-07,
|
|
"loss": 1.1036,
|
|
"step": 3229
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.5679797324271e-07,
|
|
"loss": 1.3567,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.531778228487831e-07,
|
|
"loss": 1.2395,
|
|
"step": 3231
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.495649957796093e-07,
|
|
"loss": 1.2988,
|
|
"step": 3232
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.459594949277727e-07,
|
|
"loss": 1.033,
|
|
"step": 3233
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 8.423613231799843e-07,
|
|
"loss": 1.1096,
|
|
"step": 3234
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.387704834170874e-07,
|
|
"loss": 1.2737,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.35186978514062e-07,
|
|
"loss": 1.3325,
|
|
"step": 3236
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.316108113400079e-07,
|
|
"loss": 1.1328,
|
|
"step": 3237
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.280419847581533e-07,
|
|
"loss": 0.9314,
|
|
"step": 3238
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.244805016258462e-07,
|
|
"loss": 1.4144,
|
|
"step": 3239
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.20926364794562e-07,
|
|
"loss": 1.1281,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.173795771098858e-07,
|
|
"loss": 1.0335,
|
|
"step": 3241
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.138401414115238e-07,
|
|
"loss": 1.214,
|
|
"step": 3242
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.103080605332992e-07,
|
|
"loss": 1.5078,
|
|
"step": 3243
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.067833373031342e-07,
|
|
"loss": 1.0639,
|
|
"step": 3244
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 8.032659745430749e-07,
|
|
"loss": 1.1189,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 7.99755975069264e-07,
|
|
"loss": 1.1639,
|
|
"step": 3246
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.962533416919538e-07,
|
|
"loss": 1.3128,
|
|
"step": 3247
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.927580772154985e-07,
|
|
"loss": 1.2063,
|
|
"step": 3248
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.892701844383499e-07,
|
|
"loss": 1.2968,
|
|
"step": 3249
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.857896661530595e-07,
|
|
"loss": 1.0301,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.823165251462794e-07,
|
|
"loss": 1.0298,
|
|
"step": 3251
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.788507641987408e-07,
|
|
"loss": 1.4305,
|
|
"step": 3252
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.753923860852841e-07,
|
|
"loss": 1.1602,
|
|
"step": 3253
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.719413935748254e-07,
|
|
"loss": 0.9241,
|
|
"step": 3254
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.684977894303724e-07,
|
|
"loss": 1.0676,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.650615764090208e-07,
|
|
"loss": 1.282,
|
|
"step": 3256
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.616327572619419e-07,
|
|
"loss": 1.4053,
|
|
"step": 3257
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 7.582113347343912e-07,
|
|
"loss": 1.2807,
|
|
"step": 3258
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.547973115657015e-07,
|
|
"loss": 1.2137,
|
|
"step": 3259
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.513906904892788e-07,
|
|
"loss": 1.3285,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.479914742326089e-07,
|
|
"loss": 1.3916,
|
|
"step": 3261
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.445996655172438e-07,
|
|
"loss": 1.1849,
|
|
"step": 3262
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.412152670588058e-07,
|
|
"loss": 1.0784,
|
|
"step": 3263
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.378382815669827e-07,
|
|
"loss": 1.0849,
|
|
"step": 3264
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.344687117455318e-07,
|
|
"loss": 1.1374,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.311065602922695e-07,
|
|
"loss": 1.2198,
|
|
"step": 3266
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.277518298990716e-07,
|
|
"loss": 1.1347,
|
|
"step": 3267
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.244045232518792e-07,
|
|
"loss": 1.0632,
|
|
"step": 3268
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.210646430306789e-07,
|
|
"loss": 1.0476,
|
|
"step": 3269
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 7.177321919095204e-07,
|
|
"loss": 1.027,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 7.144071725565005e-07,
|
|
"loss": 1.1775,
|
|
"step": 3271
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 7.110895876337676e-07,
|
|
"loss": 1.2122,
|
|
"step": 3272
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 7.077794397975213e-07,
|
|
"loss": 1.2143,
|
|
"step": 3273
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 7.044767316979973e-07,
|
|
"loss": 1.05,
|
|
"step": 3274
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 7.011814659794847e-07,
|
|
"loss": 1.0954,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 6.978936452803076e-07,
|
|
"loss": 0.9869,
|
|
"step": 3276
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 6.946132722328314e-07,
|
|
"loss": 1.253,
|
|
"step": 3277
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 6.913403494634607e-07,
|
|
"loss": 1.1044,
|
|
"step": 3278
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 6.880748795926317e-07,
|
|
"loss": 1.1792,
|
|
"step": 3279
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 6.848168652348131e-07,
|
|
"loss": 1.0293,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 6.815663089985103e-07,
|
|
"loss": 1.262,
|
|
"step": 3281
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 6.783232134862505e-07,
|
|
"loss": 1.2078,
|
|
"step": 3282
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 6.750875812945911e-07,
|
|
"loss": 1.3267,
|
|
"step": 3283
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.718594150141111e-07,
|
|
"loss": 1.0912,
|
|
"step": 3284
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.686387172294173e-07,
|
|
"loss": 1.0795,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.654254905191337e-07,
|
|
"loss": 1.3051,
|
|
"step": 3286
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.622197374559002e-07,
|
|
"loss": 1.1298,
|
|
"step": 3287
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.590214606063783e-07,
|
|
"loss": 1.1104,
|
|
"step": 3288
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.558306625312361e-07,
|
|
"loss": 1.0075,
|
|
"step": 3289
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.526473457851657e-07,
|
|
"loss": 1.2779,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.494715129168571e-07,
|
|
"loss": 1.1352,
|
|
"step": 3291
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.463031664690144e-07,
|
|
"loss": 1.2104,
|
|
"step": 3292
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.431423089783506e-07,
|
|
"loss": 1.3355,
|
|
"step": 3293
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.399889429755746e-07,
|
|
"loss": 1.3794,
|
|
"step": 3294
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 6.368430709854035e-07,
|
|
"loss": 1.1177,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.337046955265547e-07,
|
|
"loss": 0.9951,
|
|
"step": 3296
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.305738191117372e-07,
|
|
"loss": 1.2369,
|
|
"step": 3297
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.27450444247667e-07,
|
|
"loss": 1.0495,
|
|
"step": 3298
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.243345734350392e-07,
|
|
"loss": 1.1524,
|
|
"step": 3299
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.212262091685561e-07,
|
|
"loss": 1.0677,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.181253539368992e-07,
|
|
"loss": 1.1069,
|
|
"step": 3301
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.150320102227414e-07,
|
|
"loss": 1.0589,
|
|
"step": 3302
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.119461805027438e-07,
|
|
"loss": 1.0847,
|
|
"step": 3303
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.088678672475501e-07,
|
|
"loss": 0.9135,
|
|
"step": 3304
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.057970729217832e-07,
|
|
"loss": 1.0856,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 6.027337999840465e-07,
|
|
"loss": 1.2117,
|
|
"step": 3306
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 5.996780508869293e-07,
|
|
"loss": 1.2398,
|
|
"step": 3307
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.966298280769856e-07,
|
|
"loss": 1.0026,
|
|
"step": 3308
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.935891339947508e-07,
|
|
"loss": 1.5573,
|
|
"step": 3309
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.905559710747322e-07,
|
|
"loss": 1.0367,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.875303417454014e-07,
|
|
"loss": 1.2144,
|
|
"step": 3311
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.845122484292065e-07,
|
|
"loss": 0.9328,
|
|
"step": 3312
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.815016935425565e-07,
|
|
"loss": 1.0404,
|
|
"step": 3313
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.784986794958247e-07,
|
|
"loss": 1.179,
|
|
"step": 3314
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.755032086933521e-07,
|
|
"loss": 1.0599,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.725152835334347e-07,
|
|
"loss": 1.1193,
|
|
"step": 3316
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.695349064083278e-07,
|
|
"loss": 1.2635,
|
|
"step": 3317
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.665620797042504e-07,
|
|
"loss": 1.1812,
|
|
"step": 3318
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.63596805801363e-07,
|
|
"loss": 1.1361,
|
|
"step": 3319
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 5.606390870737921e-07,
|
|
"loss": 0.9801,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.576889258896079e-07,
|
|
"loss": 1.1637,
|
|
"step": 3321
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.547463246108297e-07,
|
|
"loss": 1.2502,
|
|
"step": 3322
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.51811285593431e-07,
|
|
"loss": 0.9948,
|
|
"step": 3323
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.488838111873218e-07,
|
|
"loss": 1.1991,
|
|
"step": 3324
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.459639037363618e-07,
|
|
"loss": 1.0933,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.430515655783475e-07,
|
|
"loss": 1.1025,
|
|
"step": 3326
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.401467990450171e-07,
|
|
"loss": 1.1604,
|
|
"step": 3327
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.372496064620491e-07,
|
|
"loss": 1.0892,
|
|
"step": 3328
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.343599901490559e-07,
|
|
"loss": 1.2129,
|
|
"step": 3329
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.314779524195835e-07,
|
|
"loss": 1.1743,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.286034955811093e-07,
|
|
"loss": 1.2436,
|
|
"step": 3331
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 5.257366219350446e-07,
|
|
"loss": 1.1367,
|
|
"step": 3332
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 5.228773337767268e-07,
|
|
"loss": 1.244,
|
|
"step": 3333
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 5.200256333954179e-07,
|
|
"loss": 1.0988,
|
|
"step": 3334
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 5.171815230743127e-07,
|
|
"loss": 1.2167,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 5.143450050905174e-07,
|
|
"loss": 1.2555,
|
|
"step": 3336
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 5.11516081715071e-07,
|
|
"loss": 1.2322,
|
|
"step": 3337
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 5.08694755212924e-07,
|
|
"loss": 1.1996,
|
|
"step": 3338
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 5.058810278429472e-07,
|
|
"loss": 0.953,
|
|
"step": 3339
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 5.030749018579307e-07,
|
|
"loss": 1.2841,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 5.002763795045696e-07,
|
|
"loss": 1.2426,
|
|
"step": 3341
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 4.974854630234793e-07,
|
|
"loss": 1.2571,
|
|
"step": 3342
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 4.947021546491859e-07,
|
|
"loss": 1.3156,
|
|
"step": 3343
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 4.919264566101179e-07,
|
|
"loss": 1.1524,
|
|
"step": 3344
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.891583711286152e-07,
|
|
"loss": 1.0775,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.86397900420923e-07,
|
|
"loss": 0.9396,
|
|
"step": 3346
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.836450466971853e-07,
|
|
"loss": 1.3119,
|
|
"step": 3347
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.808998121614549e-07,
|
|
"loss": 1.2535,
|
|
"step": 3348
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.781621990116781e-07,
|
|
"loss": 1.0931,
|
|
"step": 3349
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.7543220943970124e-07,
|
|
"loss": 0.9401,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.727098456312662e-07,
|
|
"loss": 1.1544,
|
|
"step": 3351
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.699951097660127e-07,
|
|
"loss": 0.8482,
|
|
"step": 3352
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.6728800401746985e-07,
|
|
"loss": 1.2513,
|
|
"step": 3353
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.645885305530573e-07,
|
|
"loss": 1.3921,
|
|
"step": 3354
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.6189669153408724e-07,
|
|
"loss": 0.9831,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.5921248911575454e-07,
|
|
"loss": 1.1329,
|
|
"step": 3356
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 4.5653592544714686e-07,
|
|
"loss": 1.1546,
|
|
"step": 3357
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.5386700267122994e-07,
|
|
"loss": 1.2398,
|
|
"step": 3358
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.512057229248523e-07,
|
|
"loss": 1.1249,
|
|
"step": 3359
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.485520883387495e-07,
|
|
"loss": 1.1918,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.459061010375254e-07,
|
|
"loss": 0.981,
|
|
"step": 3361
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.432677631396709e-07,
|
|
"loss": 1.3035,
|
|
"step": 3362
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.406370767575474e-07,
|
|
"loss": 1.0695,
|
|
"step": 3363
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.3801404399739124e-07,
|
|
"loss": 1.156,
|
|
"step": 3364
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.353986669593113e-07,
|
|
"loss": 1.4431,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.3279094773728823e-07,
|
|
"loss": 1.6149,
|
|
"step": 3366
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.3019088841916856e-07,
|
|
"loss": 1.255,
|
|
"step": 3367
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.2759849108666706e-07,
|
|
"loss": 1.0339,
|
|
"step": 3368
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.250137578153646e-07,
|
|
"loss": 1.1305,
|
|
"step": 3369
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 4.224366906747068e-07,
|
|
"loss": 1.121,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 4.19867291728e-07,
|
|
"loss": 1.6182,
|
|
"step": 3371
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 4.173055630324119e-07,
|
|
"loss": 1.022,
|
|
"step": 3372
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 4.1475150663896623e-07,
|
|
"loss": 1.2889,
|
|
"step": 3373
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 4.1220512459254847e-07,
|
|
"loss": 1.0634,
|
|
"step": 3374
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 4.096664189318977e-07,
|
|
"loss": 0.967,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 4.0713539168960593e-07,
|
|
"loss": 1.2031,
|
|
"step": 3376
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 4.046120448921187e-07,
|
|
"loss": 1.3038,
|
|
"step": 3377
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 4.020963805597333e-07,
|
|
"loss": 1.1008,
|
|
"step": 3378
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 3.99588400706592e-07,
|
|
"loss": 1.1675,
|
|
"step": 3379
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 3.970881073406907e-07,
|
|
"loss": 0.9289,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 3.945955024638637e-07,
|
|
"loss": 0.9564,
|
|
"step": 3381
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.921105880717979e-07,
|
|
"loss": 1.0749,
|
|
"step": 3382
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.896333661540164e-07,
|
|
"loss": 1.0972,
|
|
"step": 3383
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.8716383869388476e-07,
|
|
"loss": 1.4732,
|
|
"step": 3384
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.8470200766861363e-07,
|
|
"loss": 1.313,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.822478750492409e-07,
|
|
"loss": 1.1263,
|
|
"step": 3386
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.798014428006513e-07,
|
|
"loss": 1.3283,
|
|
"step": 3387
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.7736271288155714e-07,
|
|
"loss": 1.0524,
|
|
"step": 3388
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.7493168724450746e-07,
|
|
"loss": 1.5594,
|
|
"step": 3389
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.725083678358832e-07,
|
|
"loss": 1.1026,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.700927565958923e-07,
|
|
"loss": 1.0998,
|
|
"step": 3391
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.676848554585755e-07,
|
|
"loss": 1.3351,
|
|
"step": 3392
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.6528466635179617e-07,
|
|
"loss": 1.2376,
|
|
"step": 3393
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 3.6289219119724496e-07,
|
|
"loss": 1.0574,
|
|
"step": 3394
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.605074319104396e-07,
|
|
"loss": 1.1749,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.5813039040071385e-07,
|
|
"loss": 1.149,
|
|
"step": 3396
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.557610685712276e-07,
|
|
"loss": 1.2691,
|
|
"step": 3397
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.5339946831895566e-07,
|
|
"loss": 1.1357,
|
|
"step": 3398
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.5104559153469553e-07,
|
|
"loss": 1.2063,
|
|
"step": 3399
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.486994401030586e-07,
|
|
"loss": 1.0551,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.463610159024677e-07,
|
|
"loss": 1.439,
|
|
"step": 3401
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.4403032080516764e-07,
|
|
"loss": 1.1018,
|
|
"step": 3402
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.417073566772033e-07,
|
|
"loss": 1.2565,
|
|
"step": 3403
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.3939212537844045e-07,
|
|
"loss": 1.4157,
|
|
"step": 3404
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.3708462876254734e-07,
|
|
"loss": 0.9081,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 3.3478486867700057e-07,
|
|
"loss": 1.3914,
|
|
"step": 3406
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.3249284696308613e-07,
|
|
"loss": 1.1881,
|
|
"step": 3407
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.302085654558873e-07,
|
|
"loss": 1.1879,
|
|
"step": 3408
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.2793202598429775e-07,
|
|
"loss": 1.2973,
|
|
"step": 3409
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.256632303710106e-07,
|
|
"loss": 1.5699,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.2340218043251383e-07,
|
|
"loss": 1.1414,
|
|
"step": 3411
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.2114887797909946e-07,
|
|
"loss": 1.0985,
|
|
"step": 3412
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.189033248148554e-07,
|
|
"loss": 1.0619,
|
|
"step": 3413
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.166655227376625e-07,
|
|
"loss": 1.1677,
|
|
"step": 3414
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.144354735392008e-07,
|
|
"loss": 1.0922,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.122131790049376e-07,
|
|
"loss": 1.1658,
|
|
"step": 3416
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.0999864091413536e-07,
|
|
"loss": 1.4022,
|
|
"step": 3417
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 3.077918610398434e-07,
|
|
"loss": 1.0965,
|
|
"step": 3418
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 3.0559284114890284e-07,
|
|
"loss": 1.1876,
|
|
"step": 3419
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 3.034015830019388e-07,
|
|
"loss": 1.1824,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 3.012180883533655e-07,
|
|
"loss": 1.1538,
|
|
"step": 3421
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.990423589513758e-07,
|
|
"loss": 1.0845,
|
|
"step": 3422
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.9687439653795057e-07,
|
|
"loss": 1.4105,
|
|
"step": 3423
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.9471420284885255e-07,
|
|
"loss": 1.1789,
|
|
"step": 3424
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.925617796136182e-07,
|
|
"loss": 1.1617,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.90417128555569e-07,
|
|
"loss": 1.0605,
|
|
"step": 3426
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.882802513918026e-07,
|
|
"loss": 1.3199,
|
|
"step": 3427
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.8615114983318813e-07,
|
|
"loss": 0.8938,
|
|
"step": 3428
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.840298255843754e-07,
|
|
"loss": 1.1488,
|
|
"step": 3429
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.819162803437825e-07,
|
|
"loss": 1.1269,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.798105158036024e-07,
|
|
"loss": 1.0397,
|
|
"step": 3431
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.7771253364979655e-07,
|
|
"loss": 1.4195,
|
|
"step": 3432
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.756223355620979e-07,
|
|
"loss": 1.0558,
|
|
"step": 3433
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.7353992321400566e-07,
|
|
"loss": 1.1392,
|
|
"step": 3434
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.714652982727828e-07,
|
|
"loss": 1.0788,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.6939846239946163e-07,
|
|
"loss": 1.2682,
|
|
"step": 3436
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.673394172488375e-07,
|
|
"loss": 1.4173,
|
|
"step": 3437
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.6528816446946714e-07,
|
|
"loss": 1.1795,
|
|
"step": 3438
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.632447057036658e-07,
|
|
"loss": 1.1003,
|
|
"step": 3439
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.6120904258751467e-07,
|
|
"loss": 1.2132,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.5918117675084786e-07,
|
|
"loss": 1.2901,
|
|
"step": 3441
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.5716110981725996e-07,
|
|
"loss": 1.0628,
|
|
"step": 3442
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.551488434040983e-07,
|
|
"loss": 1.0381,
|
|
"step": 3443
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.531443791224686e-07,
|
|
"loss": 1.1176,
|
|
"step": 3444
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.511477185772282e-07,
|
|
"loss": 1.1356,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.49158863366985e-07,
|
|
"loss": 1.3625,
|
|
"step": 3446
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.471778150841009e-07,
|
|
"loss": 1.4396,
|
|
"step": 3447
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.4520457531468145e-07,
|
|
"loss": 1.1871,
|
|
"step": 3448
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.432391456385874e-07,
|
|
"loss": 1.3778,
|
|
"step": 3449
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.412815276294223e-07,
|
|
"loss": 1.0534,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.3933172285453555e-07,
|
|
"loss": 1.1843,
|
|
"step": 3451
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.373897328750241e-07,
|
|
"loss": 1.133,
|
|
"step": 3452
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.3545555924572085e-07,
|
|
"loss": 1.2101,
|
|
"step": 3453
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.3352920351521035e-07,
|
|
"loss": 1.2651,
|
|
"step": 3454
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.3161066722581004e-07,
|
|
"loss": 1.1408,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.2969995191357896e-07,
|
|
"loss": 0.9689,
|
|
"step": 3456
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.2779705910831785e-07,
|
|
"loss": 1.3193,
|
|
"step": 3457
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.2590199033355908e-07,
|
|
"loss": 1.128,
|
|
"step": 3458
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.2401474710657455e-07,
|
|
"loss": 1.1803,
|
|
"step": 3459
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.221353309383678e-07,
|
|
"loss": 1.0748,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.202637433336785e-07,
|
|
"loss": 1.2014,
|
|
"step": 3461
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.1839998579097688e-07,
|
|
"loss": 1.16,
|
|
"step": 3462
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.1654405980246484e-07,
|
|
"loss": 0.901,
|
|
"step": 3463
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.1469596685407268e-07,
|
|
"loss": 1.2067,
|
|
"step": 3464
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.1285570842546122e-07,
|
|
"loss": 1.0532,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.1102328599001743e-07,
|
|
"loss": 1.3107,
|
|
"step": 3466
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.091987010148533e-07,
|
|
"loss": 1.1904,
|
|
"step": 3467
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.0738195496080804e-07,
|
|
"loss": 1.4137,
|
|
"step": 3468
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 2.0557304928244592e-07,
|
|
"loss": 1.3081,
|
|
"step": 3469
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 2.0377198542804842e-07,
|
|
"loss": 1.0453,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 2.019787648396221e-07,
|
|
"loss": 1.0431,
|
|
"step": 3471
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 2.001933889528973e-07,
|
|
"loss": 1.2327,
|
|
"step": 3472
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 1.984158591973151e-07,
|
|
"loss": 1.1921,
|
|
"step": 3473
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 1.9664617699604373e-07,
|
|
"loss": 1.3476,
|
|
"step": 3474
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 1.9488434376596087e-07,
|
|
"loss": 1.224,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 1.931303609176638e-07,
|
|
"loss": 1.3558,
|
|
"step": 3476
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 1.913842298554669e-07,
|
|
"loss": 1.2467,
|
|
"step": 3477
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 1.8964595197739077e-07,
|
|
"loss": 1.0457,
|
|
"step": 3478
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 1.8791552867517436e-07,
|
|
"loss": 1.3802,
|
|
"step": 3479
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 1.8619296133426723e-07,
|
|
"loss": 1.1685,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.8447825133382614e-07,
|
|
"loss": 1.0963,
|
|
"step": 3481
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.827714000467218e-07,
|
|
"loss": 1.0179,
|
|
"step": 3482
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.810724088395277e-07,
|
|
"loss": 1.1532,
|
|
"step": 3483
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.793812790725269e-07,
|
|
"loss": 1.0739,
|
|
"step": 3484
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.7769801209970848e-07,
|
|
"loss": 1.4065,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.7602260926876556e-07,
|
|
"loss": 1.1055,
|
|
"step": 3486
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.7435507192109623e-07,
|
|
"loss": 0.9735,
|
|
"step": 3487
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.7269540139179808e-07,
|
|
"loss": 1.2397,
|
|
"step": 3488
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.710435990096726e-07,
|
|
"loss": 1.2358,
|
|
"step": 3489
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.693996660972208e-07,
|
|
"loss": 1.2345,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.6776360397064429e-07,
|
|
"loss": 1.0512,
|
|
"step": 3491
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 1.66135413939843e-07,
|
|
"loss": 1.0837,
|
|
"step": 3492
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.6451509730841086e-07,
|
|
"loss": 1.1677,
|
|
"step": 3493
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.6290265537364347e-07,
|
|
"loss": 1.1526,
|
|
"step": 3494
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.6129808942652593e-07,
|
|
"loss": 1.1716,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.5970140075174166e-07,
|
|
"loss": 0.9879,
|
|
"step": 3496
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.5811259062766592e-07,
|
|
"loss": 1.2505,
|
|
"step": 3497
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.5653166032636558e-07,
|
|
"loss": 1.2338,
|
|
"step": 3498
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.549586111135981e-07,
|
|
"loss": 1.1179,
|
|
"step": 3499
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.5339344424881275e-07,
|
|
"loss": 1.0472,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.518361609851482e-07,
|
|
"loss": 1.3757,
|
|
"step": 3501
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.502867625694271e-07,
|
|
"loss": 0.9655,
|
|
"step": 3502
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.4874525024216268e-07,
|
|
"loss": 1.1258,
|
|
"step": 3503
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.472116252375555e-07,
|
|
"loss": 1.2318,
|
|
"step": 3504
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 1.456858887834889e-07,
|
|
"loss": 1.2321,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.441680421015279e-07,
|
|
"loss": 1.1499,
|
|
"step": 3506
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.42658086406926e-07,
|
|
"loss": 0.8806,
|
|
"step": 3507
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.411560229086173e-07,
|
|
"loss": 1.1222,
|
|
"step": 3508
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.3966185280921307e-07,
|
|
"loss": 1.1523,
|
|
"step": 3509
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.3817557730500863e-07,
|
|
"loss": 1.236,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.3669719758597988e-07,
|
|
"loss": 0.9662,
|
|
"step": 3511
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.3522671483577664e-07,
|
|
"loss": 1.2246,
|
|
"step": 3512
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.3376413023172828e-07,
|
|
"loss": 1.202,
|
|
"step": 3513
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.3230944494484253e-07,
|
|
"loss": 1.2806,
|
|
"step": 3514
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.3086266013979777e-07,
|
|
"loss": 1.2678,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.2942377697495288e-07,
|
|
"loss": 1.0923,
|
|
"step": 3516
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 1.2799279660233532e-07,
|
|
"loss": 1.0523,
|
|
"step": 3517
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.2656972016764745e-07,
|
|
"loss": 1.1318,
|
|
"step": 3518
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.2515454881026567e-07,
|
|
"loss": 1.0751,
|
|
"step": 3519
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.237472836632314e-07,
|
|
"loss": 1.1738,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.2234792585326004e-07,
|
|
"loss": 1.1677,
|
|
"step": 3521
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.209564765007376e-07,
|
|
"loss": 1.2006,
|
|
"step": 3522
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.1957293671971404e-07,
|
|
"loss": 1.2652,
|
|
"step": 3523
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.1819730761790992e-07,
|
|
"loss": 1.2161,
|
|
"step": 3524
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.1682959029670982e-07,
|
|
"loss": 1.1017,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.1546978585116442e-07,
|
|
"loss": 1.3815,
|
|
"step": 3526
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.1411789536999063e-07,
|
|
"loss": 1.3019,
|
|
"step": 3527
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.127739199355693e-07,
|
|
"loss": 1.2527,
|
|
"step": 3528
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.1143786062393969e-07,
|
|
"loss": 1.2631,
|
|
"step": 3529
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 1.1010971850480834e-07,
|
|
"loss": 0.9689,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 1.087894946415402e-07,
|
|
"loss": 1.1106,
|
|
"step": 3531
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 1.0747719009116087e-07,
|
|
"loss": 1.2009,
|
|
"step": 3532
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 1.0617280590435653e-07,
|
|
"loss": 0.9959,
|
|
"step": 3533
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 1.0487634312547179e-07,
|
|
"loss": 1.2263,
|
|
"step": 3534
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 1.0358780279250635e-07,
|
|
"loss": 1.2681,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 1.0230718593712053e-07,
|
|
"loss": 1.2203,
|
|
"step": 3536
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 1.0103449358462858e-07,
|
|
"loss": 1.1437,
|
|
"step": 3537
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 9.976972675399988e-08,
|
|
"loss": 1.0969,
|
|
"step": 3538
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 9.851288645786328e-08,
|
|
"loss": 1.2331,
|
|
"step": 3539
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 9.726397370249163e-08,
|
|
"loss": 0.9636,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 9.602298948781952e-08,
|
|
"loss": 1.0291,
|
|
"step": 3541
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 9.478993480742881e-08,
|
|
"loss": 1.0651,
|
|
"step": 3542
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 9.356481064855538e-08,
|
|
"loss": 1.3595,
|
|
"step": 3543
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 9.234761799208459e-08,
|
|
"loss": 1.1676,
|
|
"step": 3544
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 9.11383578125502e-08,
|
|
"loss": 1.18,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 8.993703107813556e-08,
|
|
"loss": 1.1163,
|
|
"step": 3546
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 8.87436387506746e-08,
|
|
"loss": 1.223,
|
|
"step": 3547
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 8.755818178564523e-08,
|
|
"loss": 0.9707,
|
|
"step": 3548
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 8.638066113217269e-08,
|
|
"loss": 1.0818,
|
|
"step": 3549
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 8.521107773303172e-08,
|
|
"loss": 1.1335,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 8.40494325246366e-08,
|
|
"loss": 1.1934,
|
|
"step": 3551
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 8.28957264370478e-08,
|
|
"loss": 1.2071,
|
|
"step": 3552
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 8.174996039397309e-08,
|
|
"loss": 1.2789,
|
|
"step": 3553
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 8.061213531275869e-08,
|
|
"loss": 1.1694,
|
|
"step": 3554
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 7.948225210439587e-08,
|
|
"loss": 1.2646,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 7.836031167351322e-08,
|
|
"loss": 1.0332,
|
|
"step": 3556
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 7.724631491838441e-08,
|
|
"loss": 1.1539,
|
|
"step": 3557
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 7.614026273092157e-08,
|
|
"loss": 0.9393,
|
|
"step": 3558
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 7.50421559966752e-08,
|
|
"loss": 1.0491,
|
|
"step": 3559
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 7.395199559483534e-08,
|
|
"loss": 1.1521,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 7.286978239823162e-08,
|
|
"loss": 1.1449,
|
|
"step": 3561
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 7.179551727332423e-08,
|
|
"loss": 0.9918,
|
|
"step": 3562
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 7.072920108021852e-08,
|
|
"loss": 1.4336,
|
|
"step": 3563
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 6.967083467264823e-08,
|
|
"loss": 1.174,
|
|
"step": 3564
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 6.862041889798776e-08,
|
|
"loss": 1.3232,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 6.757795459724325e-08,
|
|
"loss": 1.1283,
|
|
"step": 3566
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 6.654344260505374e-08,
|
|
"loss": 1.065,
|
|
"step": 3567
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 6.551688374969223e-08,
|
|
"loss": 1.2516,
|
|
"step": 3568
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 6.44982788530657e-08,
|
|
"loss": 1.3142,
|
|
"step": 3569
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 6.348762873070957e-08,
|
|
"loss": 1.161,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 6.248493419179435e-08,
|
|
"loss": 1.3181,
|
|
"step": 3571
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 6.149019603911676e-08,
|
|
"loss": 1.3443,
|
|
"step": 3572
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 6.050341506910639e-08,
|
|
"loss": 1.2247,
|
|
"step": 3573
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 5.952459207182126e-08,
|
|
"loss": 1.2466,
|
|
"step": 3574
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 5.8553727830947816e-08,
|
|
"loss": 0.9368,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 5.759082312379871e-08,
|
|
"loss": 1.0062,
|
|
"step": 3576
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 5.663587872131615e-08,
|
|
"loss": 1.2004,
|
|
"step": 3577
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 5.568889538806854e-08,
|
|
"loss": 1.0722,
|
|
"step": 3578
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 5.4749873882250506e-08,
|
|
"loss": 1.1934,
|
|
"step": 3579
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 5.3818814955679535e-08,
|
|
"loss": 1.2005,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 5.2895719353802663e-08,
|
|
"loss": 0.9607,
|
|
"step": 3581
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 5.198058781568538e-08,
|
|
"loss": 1.4644,
|
|
"step": 3582
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 5.107342107402269e-08,
|
|
"loss": 1.2209,
|
|
"step": 3583
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 5.017421985512805e-08,
|
|
"loss": 1.313,
|
|
"step": 3584
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 4.928298487894001e-08,
|
|
"loss": 1.3152,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 4.839971685901779e-08,
|
|
"loss": 1.282,
|
|
"step": 3586
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 4.7524416502543466e-08,
|
|
"loss": 0.9764,
|
|
"step": 3587
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 4.6657084510317584e-08,
|
|
"loss": 1.2525,
|
|
"step": 3588
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 4.579772157676132e-08,
|
|
"loss": 1.0598,
|
|
"step": 3589
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 4.494632838991875e-08,
|
|
"loss": 1.129,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 4.410290563144792e-08,
|
|
"loss": 1.2218,
|
|
"step": 3591
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 4.32674539766309e-08,
|
|
"loss": 1.0849,
|
|
"step": 3592
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 4.2439974094361515e-08,
|
|
"loss": 1.2845,
|
|
"step": 3593
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 4.1620466647156466e-08,
|
|
"loss": 1.0431,
|
|
"step": 3594
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 4.0808932291147574e-08,
|
|
"loss": 1.2356,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 4.0005371676082874e-08,
|
|
"loss": 1.3116,
|
|
"step": 3596
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 3.920978544532439e-08,
|
|
"loss": 1.4307,
|
|
"step": 3597
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 3.8422174235853704e-08,
|
|
"loss": 1.08,
|
|
"step": 3598
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 3.7642538678264174e-08,
|
|
"loss": 1.2234,
|
|
"step": 3599
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 3.687087939676315e-08,
|
|
"loss": 1.3605,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 3.610719700917531e-08,
|
|
"loss": 1.3186,
|
|
"step": 3601
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 3.5351492126933785e-08,
|
|
"loss": 1.1415,
|
|
"step": 3602
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 3.460376535509013e-08,
|
|
"loss": 1.2595,
|
|
"step": 3603
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 3.386401729230326e-08,
|
|
"loss": 1.0148,
|
|
"step": 3604
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 3.3132248530846065e-08,
|
|
"loss": 1.1951,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 3.240845965660433e-08,
|
|
"loss": 1.3566,
|
|
"step": 3606
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 3.1692651249072284e-08,
|
|
"loss": 1.2799,
|
|
"step": 3607
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 3.098482388135593e-08,
|
|
"loss": 1.3107,
|
|
"step": 3608
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 3.028497812017195e-08,
|
|
"loss": 1.407,
|
|
"step": 3609
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 2.9593114525844346e-08,
|
|
"loss": 1.2381,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 2.890923365231002e-08,
|
|
"loss": 1.0487,
|
|
"step": 3611
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 2.823333604710987e-08,
|
|
"loss": 1.1469,
|
|
"step": 3612
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 2.7565422251396577e-08,
|
|
"loss": 1.2159,
|
|
"step": 3613
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 2.6905492799931265e-08,
|
|
"loss": 1.1546,
|
|
"step": 3614
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 2.625354822107795e-08,
|
|
"loss": 1.1857,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.560958903681132e-08,
|
|
"loss": 1.3503,
|
|
"step": 3616
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.49736157627134e-08,
|
|
"loss": 1.1874,
|
|
"step": 3617
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.43456289079691e-08,
|
|
"loss": 1.1085,
|
|
"step": 3618
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.3725628975370675e-08,
|
|
"loss": 1.3518,
|
|
"step": 3619
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.31136164613166e-08,
|
|
"loss": 1.2023,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.2509591855809366e-08,
|
|
"loss": 1.3214,
|
|
"step": 3621
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.1913555642456564e-08,
|
|
"loss": 0.9269,
|
|
"step": 3622
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.1325508298467577e-08,
|
|
"loss": 1.2431,
|
|
"step": 3623
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.074545029465802e-08,
|
|
"loss": 1.2842,
|
|
"step": 3624
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 2.0173382095448613e-08,
|
|
"loss": 1.1214,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.9609304158858533e-08,
|
|
"loss": 1.0493,
|
|
"step": 3626
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.9053216936512076e-08,
|
|
"loss": 1.3128,
|
|
"step": 3627
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.850512087363643e-08,
|
|
"loss": 1.512,
|
|
"step": 3628
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.7965016409059455e-08,
|
|
"loss": 1.2547,
|
|
"step": 3629
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.7432903975211912e-08,
|
|
"loss": 1.1057,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.6908783998125235e-08,
|
|
"loss": 1.3358,
|
|
"step": 3631
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.6392656897430414e-08,
|
|
"loss": 1.2089,
|
|
"step": 3632
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.5884523086361348e-08,
|
|
"loss": 1.2772,
|
|
"step": 3633
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.538438297175038e-08,
|
|
"loss": 1.0732,
|
|
"step": 3634
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.4892236954030526e-08,
|
|
"loss": 1.1161,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.4408085427233265e-08,
|
|
"loss": 1.1706,
|
|
"step": 3636
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.3931928778992965e-08,
|
|
"loss": 1.3762,
|
|
"step": 3637
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.3463767390536898e-08,
|
|
"loss": 1.132,
|
|
"step": 3638
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.300360163669745e-08,
|
|
"loss": 1.1476,
|
|
"step": 3639
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.2551431885901022e-08,
|
|
"loss": 1.3799,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.2107258500173579e-08,
|
|
"loss": 1.1152,
|
|
"step": 3641
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.1671081835139541e-08,
|
|
"loss": 1.1717,
|
|
"step": 3642
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.124290224001845e-08,
|
|
"loss": 0.9476,
|
|
"step": 3643
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.082272005762941e-08,
|
|
"loss": 1.0534,
|
|
"step": 3644
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.0410535624388873e-08,
|
|
"loss": 1.2465,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.0006349270306192e-08,
|
|
"loss": 0.9996,
|
|
"step": 3646
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 9.610161318992505e-09,
|
|
"loss": 1.1808,
|
|
"step": 3647
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 9.221972087650744e-09,
|
|
"loss": 1.055,
|
|
"step": 3648
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 8.84178188708007e-09,
|
|
"loss": 1.1918,
|
|
"step": 3649
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 8.469591021679214e-09,
|
|
"loss": 1.1071,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 8.105399789438694e-09,
|
|
"loss": 1.2239,
|
|
"step": 3651
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 7.74920848194416e-09,
|
|
"loss": 1.0938,
|
|
"step": 3652
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 7.401017384377485e-09,
|
|
"loss": 1.1698,
|
|
"step": 3653
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 7.060826775515672e-09,
|
|
"loss": 1.3489,
|
|
"step": 3654
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 6.728636927727516e-09,
|
|
"loss": 1.1756,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 6.404448106980266e-09,
|
|
"loss": 0.9766,
|
|
"step": 3656
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 6.08826057283185e-09,
|
|
"loss": 1.2331,
|
|
"step": 3657
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 5.780074578436435e-09,
|
|
"loss": 1.21,
|
|
"step": 3658
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 5.479890370537755e-09,
|
|
"loss": 0.9963,
|
|
"step": 3659
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 5.187708189478002e-09,
|
|
"loss": 1.2551,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 4.90352826918894e-09,
|
|
"loss": 1.1691,
|
|
"step": 3661
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 4.627350837197453e-09,
|
|
"loss": 1.1605,
|
|
"step": 3662
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 4.359176114623332e-09,
|
|
"loss": 1.2439,
|
|
"step": 3663
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 4.099004316177047e-09,
|
|
"loss": 1.2288,
|
|
"step": 3664
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 3.846835650163083e-09,
|
|
"loss": 1.0522,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 3.6026703184799395e-09,
|
|
"loss": 1.2131,
|
|
"step": 3666
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 3.366508516613465e-09,
|
|
"loss": 1.1374,
|
|
"step": 3667
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 3.138350433647963e-09,
|
|
"loss": 1.1023,
|
|
"step": 3668
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 2.9181962522528696e-09,
|
|
"loss": 1.1514,
|
|
"step": 3669
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 2.706046148694963e-09,
|
|
"loss": 1.0384,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 2.5019002928305948e-09,
|
|
"loss": 1.3393,
|
|
"step": 3671
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 2.305758848105688e-09,
|
|
"loss": 1.0343,
|
|
"step": 3672
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 2.11762197156018e-09,
|
|
"loss": 1.205,
|
|
"step": 3673
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.937489813824689e-09,
|
|
"loss": 1.2338,
|
|
"step": 3674
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.765362519120517e-09,
|
|
"loss": 1.2172,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.601240225258538e-09,
|
|
"loss": 1.1465,
|
|
"step": 3676
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.4451230636425283e-09,
|
|
"loss": 1.4931,
|
|
"step": 3677
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.2970111592669476e-09,
|
|
"loss": 1.3252,
|
|
"step": 3678
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.1569046307158272e-09,
|
|
"loss": 1.1847,
|
|
"step": 3679
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.024803590163881e-09,
|
|
"loss": 1.2332,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 9.007081433776155e-10,
|
|
"loss": 1.1014,
|
|
"step": 3681
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 7.846183897131099e-10,
|
|
"loss": 1.5037,
|
|
"step": 3682
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 6.765344221149051e-10,
|
|
"loss": 1.2618,
|
|
"step": 3683
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 5.764563271215551e-10,
|
|
"loss": 1.3902,
|
|
"step": 3684
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 4.843841848578557e-10,
|
|
"loss": 1.2179,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 4.0031806904261563e-10,
|
|
"loss": 1.0111,
|
|
"step": 3686
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 3.2425804698088535e-10,
|
|
"loss": 1.2062,
|
|
"step": 3687
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 2.5620417957061826e-10,
|
|
"loss": 1.2862,
|
|
"step": 3688
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.9615652129711948e-10,
|
|
"loss": 1.2717,
|
|
"step": 3689
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 1.4411512023859708e-10,
|
|
"loss": 0.9529,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 1.0008001806061096e-10,
|
|
"loss": 1.2395,
|
|
"step": 3691
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 6.405125002051372e-11,
|
|
"loss": 1.248,
|
|
"step": 3692
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 3.602884496411996e-11,
|
|
"loss": 1.3066,
|
|
"step": 3693
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 1.601282532570636e-11,
|
|
"loss": 1.244,
|
|
"step": 3694
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 4.003207132452502e-12,
|
|
"loss": 1.1923,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.4002,
|
|
"step": 3696
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 3696,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"total_flos": 4.847576146000216e+18,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|