Files
vicuna-160m/trainer_state.json
ModelHub XC e6849c3017 初始化项目,由ModelHub XC社区提供模型
Model: eqhylxx/vicuna-160m
Source: Original Platform
2026-05-06 05:27:37 +08:00

6812 lines
133 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 1131,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.9411764705882355e-06,
"loss": 3.5459,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 5.882352941176471e-06,
"loss": 3.4515,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 8.823529411764707e-06,
"loss": 3.5459,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 1.1764705882352942e-05,
"loss": 3.3973,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 1.4705882352941177e-05,
"loss": 3.3427,
"step": 5
},
{
"epoch": 0.02,
"learning_rate": 1.7647058823529414e-05,
"loss": 3.1525,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 2.058823529411765e-05,
"loss": 3.1169,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 2.3529411764705884e-05,
"loss": 3.0744,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 2.647058823529412e-05,
"loss": 2.9719,
"step": 9
},
{
"epoch": 0.03,
"learning_rate": 2.9411764705882354e-05,
"loss": 2.8901,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 3.235294117647059e-05,
"loss": 2.9138,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 3.529411764705883e-05,
"loss": 2.7916,
"step": 12
},
{
"epoch": 0.03,
"learning_rate": 3.8235294117647055e-05,
"loss": 2.7394,
"step": 13
},
{
"epoch": 0.04,
"learning_rate": 4.11764705882353e-05,
"loss": 2.6911,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 4.411764705882353e-05,
"loss": 2.6669,
"step": 15
},
{
"epoch": 0.04,
"learning_rate": 4.705882352941177e-05,
"loss": 2.7453,
"step": 16
},
{
"epoch": 0.05,
"learning_rate": 5e-05,
"loss": 2.5887,
"step": 17
},
{
"epoch": 0.05,
"learning_rate": 5.294117647058824e-05,
"loss": 2.5516,
"step": 18
},
{
"epoch": 0.05,
"learning_rate": 5.588235294117647e-05,
"loss": 2.6737,
"step": 19
},
{
"epoch": 0.05,
"learning_rate": 5.882352941176471e-05,
"loss": 2.4736,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 6.176470588235295e-05,
"loss": 2.397,
"step": 21
},
{
"epoch": 0.06,
"learning_rate": 6.470588235294118e-05,
"loss": 2.4433,
"step": 22
},
{
"epoch": 0.06,
"learning_rate": 6.764705882352942e-05,
"loss": 2.354,
"step": 23
},
{
"epoch": 0.06,
"learning_rate": 7.058823529411765e-05,
"loss": 2.4111,
"step": 24
},
{
"epoch": 0.07,
"learning_rate": 7.352941176470589e-05,
"loss": 2.3523,
"step": 25
},
{
"epoch": 0.07,
"learning_rate": 7.647058823529411e-05,
"loss": 2.3626,
"step": 26
},
{
"epoch": 0.07,
"learning_rate": 7.941176470588235e-05,
"loss": 2.2676,
"step": 27
},
{
"epoch": 0.07,
"learning_rate": 8.23529411764706e-05,
"loss": 2.403,
"step": 28
},
{
"epoch": 0.08,
"learning_rate": 8.529411764705883e-05,
"loss": 2.2134,
"step": 29
},
{
"epoch": 0.08,
"learning_rate": 8.823529411764706e-05,
"loss": 2.2442,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 9.11764705882353e-05,
"loss": 2.2206,
"step": 31
},
{
"epoch": 0.08,
"learning_rate": 9.411764705882353e-05,
"loss": 2.1881,
"step": 32
},
{
"epoch": 0.09,
"learning_rate": 9.705882352941177e-05,
"loss": 2.1922,
"step": 33
},
{
"epoch": 0.09,
"learning_rate": 0.0001,
"loss": 2.1636,
"step": 34
},
{
"epoch": 0.09,
"learning_rate": 9.999979496585003e-05,
"loss": 2.1973,
"step": 35
},
{
"epoch": 0.1,
"learning_rate": 9.999917986508165e-05,
"loss": 2.1669,
"step": 36
},
{
"epoch": 0.1,
"learning_rate": 9.999815470273954e-05,
"loss": 2.1175,
"step": 37
},
{
"epoch": 0.1,
"learning_rate": 9.999671948723141e-05,
"loss": 2.2195,
"step": 38
},
{
"epoch": 0.1,
"learning_rate": 9.999487423032803e-05,
"loss": 2.0737,
"step": 39
},
{
"epoch": 0.11,
"learning_rate": 9.999261894716299e-05,
"loss": 2.1123,
"step": 40
},
{
"epoch": 0.11,
"learning_rate": 9.998995365623271e-05,
"loss": 2.1978,
"step": 41
},
{
"epoch": 0.11,
"learning_rate": 9.998687837939621e-05,
"loss": 2.0849,
"step": 42
},
{
"epoch": 0.11,
"learning_rate": 9.998339314187497e-05,
"loss": 2.0458,
"step": 43
},
{
"epoch": 0.12,
"learning_rate": 9.997949797225268e-05,
"loss": 1.9736,
"step": 44
},
{
"epoch": 0.12,
"learning_rate": 9.997519290247507e-05,
"loss": 2.1129,
"step": 45
},
{
"epoch": 0.12,
"learning_rate": 9.997047796784959e-05,
"loss": 2.0466,
"step": 46
},
{
"epoch": 0.12,
"learning_rate": 9.996535320704514e-05,
"loss": 2.0619,
"step": 47
},
{
"epoch": 0.13,
"learning_rate": 9.995981866209174e-05,
"loss": 2.1507,
"step": 48
},
{
"epoch": 0.13,
"learning_rate": 9.995387437838026e-05,
"loss": 2.0693,
"step": 49
},
{
"epoch": 0.13,
"learning_rate": 9.99475204046619e-05,
"loss": 1.9994,
"step": 50
},
{
"epoch": 0.14,
"learning_rate": 9.994075679304798e-05,
"loss": 2.1005,
"step": 51
},
{
"epoch": 0.14,
"learning_rate": 9.993358359900931e-05,
"loss": 2.0013,
"step": 52
},
{
"epoch": 0.14,
"learning_rate": 9.99260008813759e-05,
"loss": 2.0427,
"step": 53
},
{
"epoch": 0.14,
"learning_rate": 9.991800870233638e-05,
"loss": 2.0812,
"step": 54
},
{
"epoch": 0.15,
"learning_rate": 9.990960712743754e-05,
"loss": 1.9826,
"step": 55
},
{
"epoch": 0.15,
"learning_rate": 9.990079622558377e-05,
"loss": 2.0451,
"step": 56
},
{
"epoch": 0.15,
"learning_rate": 9.989157606903649e-05,
"loss": 2.0134,
"step": 57
},
{
"epoch": 0.15,
"learning_rate": 9.988194673341362e-05,
"loss": 2.0029,
"step": 58
},
{
"epoch": 0.16,
"learning_rate": 9.987190829768882e-05,
"loss": 1.9063,
"step": 59
},
{
"epoch": 0.16,
"learning_rate": 9.986146084419099e-05,
"loss": 1.9806,
"step": 60
},
{
"epoch": 0.16,
"learning_rate": 9.985060445860352e-05,
"loss": 1.9337,
"step": 61
},
{
"epoch": 0.16,
"learning_rate": 9.983933922996361e-05,
"loss": 1.9956,
"step": 62
},
{
"epoch": 0.17,
"learning_rate": 9.982766525066152e-05,
"loss": 2.0342,
"step": 63
},
{
"epoch": 0.17,
"learning_rate": 9.981558261643981e-05,
"loss": 2.0178,
"step": 64
},
{
"epoch": 0.17,
"learning_rate": 9.980309142639261e-05,
"loss": 1.9531,
"step": 65
},
{
"epoch": 0.18,
"learning_rate": 9.979019178296473e-05,
"loss": 2.0035,
"step": 66
},
{
"epoch": 0.18,
"learning_rate": 9.977688379195087e-05,
"loss": 1.8774,
"step": 67
},
{
"epoch": 0.18,
"learning_rate": 9.976316756249472e-05,
"loss": 2.0842,
"step": 68
},
{
"epoch": 0.18,
"learning_rate": 9.97490432070881e-05,
"loss": 1.9861,
"step": 69
},
{
"epoch": 0.19,
"learning_rate": 9.973451084157006e-05,
"loss": 1.8747,
"step": 70
},
{
"epoch": 0.19,
"learning_rate": 9.97195705851258e-05,
"loss": 1.9883,
"step": 71
},
{
"epoch": 0.19,
"learning_rate": 9.970422256028587e-05,
"loss": 1.9212,
"step": 72
},
{
"epoch": 0.19,
"learning_rate": 9.9688466892925e-05,
"loss": 1.9101,
"step": 73
},
{
"epoch": 0.2,
"learning_rate": 9.96723037122612e-05,
"loss": 2.0329,
"step": 74
},
{
"epoch": 0.2,
"learning_rate": 9.965573315085462e-05,
"loss": 1.9547,
"step": 75
},
{
"epoch": 0.2,
"learning_rate": 9.963875534460653e-05,
"loss": 1.9255,
"step": 76
},
{
"epoch": 0.2,
"learning_rate": 9.96213704327581e-05,
"loss": 1.9336,
"step": 77
},
{
"epoch": 0.21,
"learning_rate": 9.960357855788938e-05,
"loss": 1.8824,
"step": 78
},
{
"epoch": 0.21,
"learning_rate": 9.958537986591803e-05,
"loss": 1.884,
"step": 79
},
{
"epoch": 0.21,
"learning_rate": 9.95667745060982e-05,
"loss": 1.9605,
"step": 80
},
{
"epoch": 0.21,
"learning_rate": 9.954776263101924e-05,
"loss": 1.8576,
"step": 81
},
{
"epoch": 0.22,
"learning_rate": 9.95283443966045e-05,
"loss": 1.9995,
"step": 82
},
{
"epoch": 0.22,
"learning_rate": 9.950851996211004e-05,
"loss": 1.944,
"step": 83
},
{
"epoch": 0.22,
"learning_rate": 9.948828949012327e-05,
"loss": 1.9854,
"step": 84
},
{
"epoch": 0.23,
"learning_rate": 9.946765314656174e-05,
"loss": 1.9635,
"step": 85
},
{
"epoch": 0.23,
"learning_rate": 9.944661110067162e-05,
"loss": 2.0009,
"step": 86
},
{
"epoch": 0.23,
"learning_rate": 9.942516352502644e-05,
"loss": 1.9422,
"step": 87
},
{
"epoch": 0.23,
"learning_rate": 9.940331059552563e-05,
"loss": 1.8891,
"step": 88
},
{
"epoch": 0.24,
"learning_rate": 9.938105249139306e-05,
"loss": 1.9399,
"step": 89
},
{
"epoch": 0.24,
"learning_rate": 9.935838939517556e-05,
"loss": 1.9283,
"step": 90
},
{
"epoch": 0.24,
"learning_rate": 9.933532149274152e-05,
"loss": 1.9132,
"step": 91
},
{
"epoch": 0.24,
"learning_rate": 9.931184897327922e-05,
"loss": 1.8997,
"step": 92
},
{
"epoch": 0.25,
"learning_rate": 9.928797202929539e-05,
"loss": 1.8664,
"step": 93
},
{
"epoch": 0.25,
"learning_rate": 9.92636908566136e-05,
"loss": 1.8697,
"step": 94
},
{
"epoch": 0.25,
"learning_rate": 9.923900565437262e-05,
"loss": 1.9275,
"step": 95
},
{
"epoch": 0.25,
"learning_rate": 9.921391662502483e-05,
"loss": 1.9188,
"step": 96
},
{
"epoch": 0.26,
"learning_rate": 9.918842397433455e-05,
"loss": 1.8527,
"step": 97
},
{
"epoch": 0.26,
"learning_rate": 9.916252791137631e-05,
"loss": 1.9087,
"step": 98
},
{
"epoch": 0.26,
"learning_rate": 9.913622864853325e-05,
"loss": 1.8689,
"step": 99
},
{
"epoch": 0.27,
"learning_rate": 9.91095264014952e-05,
"loss": 1.9366,
"step": 100
},
{
"epoch": 0.27,
"learning_rate": 9.908242138925709e-05,
"loss": 1.8494,
"step": 101
},
{
"epoch": 0.27,
"learning_rate": 9.905491383411705e-05,
"loss": 1.8945,
"step": 102
},
{
"epoch": 0.27,
"learning_rate": 9.902700396167459e-05,
"loss": 1.914,
"step": 103
},
{
"epoch": 0.28,
"learning_rate": 9.899869200082881e-05,
"loss": 1.8494,
"step": 104
},
{
"epoch": 0.28,
"learning_rate": 9.896997818377642e-05,
"loss": 1.8909,
"step": 105
},
{
"epoch": 0.28,
"learning_rate": 9.894086274601e-05,
"loss": 1.8623,
"step": 106
},
{
"epoch": 0.28,
"learning_rate": 9.891134592631587e-05,
"loss": 1.8872,
"step": 107
},
{
"epoch": 0.29,
"learning_rate": 9.88814279667723e-05,
"loss": 1.8787,
"step": 108
},
{
"epoch": 0.29,
"learning_rate": 9.885110911274738e-05,
"loss": 1.8782,
"step": 109
},
{
"epoch": 0.29,
"learning_rate": 9.88203896128972e-05,
"loss": 1.7577,
"step": 110
},
{
"epoch": 0.29,
"learning_rate": 9.878926971916354e-05,
"loss": 1.8954,
"step": 111
},
{
"epoch": 0.3,
"learning_rate": 9.87577496867721e-05,
"loss": 1.8517,
"step": 112
},
{
"epoch": 0.3,
"learning_rate": 9.872582977423018e-05,
"loss": 1.839,
"step": 113
},
{
"epoch": 0.3,
"learning_rate": 9.869351024332467e-05,
"loss": 1.8851,
"step": 114
},
{
"epoch": 0.31,
"learning_rate": 9.866079135911986e-05,
"loss": 1.854,
"step": 115
},
{
"epoch": 0.31,
"learning_rate": 9.86276733899553e-05,
"loss": 1.8384,
"step": 116
},
{
"epoch": 0.31,
"learning_rate": 9.85941566074436e-05,
"loss": 1.8775,
"step": 117
},
{
"epoch": 0.31,
"learning_rate": 9.856024128646812e-05,
"loss": 1.8111,
"step": 118
},
{
"epoch": 0.32,
"learning_rate": 9.852592770518085e-05,
"loss": 1.8075,
"step": 119
},
{
"epoch": 0.32,
"learning_rate": 9.849121614500001e-05,
"loss": 1.7952,
"step": 120
},
{
"epoch": 0.32,
"learning_rate": 9.845610689060782e-05,
"loss": 1.8371,
"step": 121
},
{
"epoch": 0.32,
"learning_rate": 9.842060022994814e-05,
"loss": 1.8761,
"step": 122
},
{
"epoch": 0.33,
"learning_rate": 9.838469645422406e-05,
"loss": 1.8242,
"step": 123
},
{
"epoch": 0.33,
"learning_rate": 9.834839585789559e-05,
"loss": 1.8772,
"step": 124
},
{
"epoch": 0.33,
"learning_rate": 9.831169873867723e-05,
"loss": 1.7998,
"step": 125
},
{
"epoch": 0.33,
"learning_rate": 9.827460539753546e-05,
"loss": 1.8881,
"step": 126
},
{
"epoch": 0.34,
"learning_rate": 9.823711613868636e-05,
"loss": 1.8629,
"step": 127
},
{
"epoch": 0.34,
"learning_rate": 9.819923126959308e-05,
"loss": 1.8018,
"step": 128
},
{
"epoch": 0.34,
"learning_rate": 9.816095110096325e-05,
"loss": 1.8126,
"step": 129
},
{
"epoch": 0.34,
"learning_rate": 9.812227594674659e-05,
"loss": 1.8217,
"step": 130
},
{
"epoch": 0.35,
"learning_rate": 9.808320612413217e-05,
"loss": 1.8537,
"step": 131
},
{
"epoch": 0.35,
"learning_rate": 9.804374195354591e-05,
"loss": 1.8028,
"step": 132
},
{
"epoch": 0.35,
"learning_rate": 9.80038837586479e-05,
"loss": 1.8954,
"step": 133
},
{
"epoch": 0.36,
"learning_rate": 9.796363186632985e-05,
"loss": 1.8818,
"step": 134
},
{
"epoch": 0.36,
"learning_rate": 9.792298660671217e-05,
"loss": 1.8965,
"step": 135
},
{
"epoch": 0.36,
"learning_rate": 9.788194831314158e-05,
"loss": 1.8414,
"step": 136
},
{
"epoch": 0.36,
"learning_rate": 9.784051732218808e-05,
"loss": 1.8456,
"step": 137
},
{
"epoch": 0.37,
"learning_rate": 9.779869397364247e-05,
"loss": 1.8479,
"step": 138
},
{
"epoch": 0.37,
"learning_rate": 9.775647861051329e-05,
"loss": 1.8176,
"step": 139
},
{
"epoch": 0.37,
"learning_rate": 9.771387157902417e-05,
"loss": 1.7994,
"step": 140
},
{
"epoch": 0.37,
"learning_rate": 9.767087322861102e-05,
"loss": 1.8153,
"step": 141
},
{
"epoch": 0.38,
"learning_rate": 9.7627483911919e-05,
"loss": 1.8048,
"step": 142
},
{
"epoch": 0.38,
"learning_rate": 9.758370398479981e-05,
"loss": 1.8491,
"step": 143
},
{
"epoch": 0.38,
"learning_rate": 9.753953380630862e-05,
"loss": 1.82,
"step": 144
},
{
"epoch": 0.38,
"learning_rate": 9.74949737387013e-05,
"loss": 1.922,
"step": 145
},
{
"epoch": 0.39,
"learning_rate": 9.745002414743119e-05,
"loss": 1.8061,
"step": 146
},
{
"epoch": 0.39,
"learning_rate": 9.740468540114638e-05,
"loss": 1.8676,
"step": 147
},
{
"epoch": 0.39,
"learning_rate": 9.735895787168652e-05,
"loss": 1.904,
"step": 148
},
{
"epoch": 0.4,
"learning_rate": 9.73128419340798e-05,
"loss": 1.7908,
"step": 149
},
{
"epoch": 0.4,
"learning_rate": 9.726633796653994e-05,
"loss": 1.8096,
"step": 150
},
{
"epoch": 0.4,
"learning_rate": 9.721944635046297e-05,
"loss": 1.8669,
"step": 151
},
{
"epoch": 0.4,
"learning_rate": 9.717216747042419e-05,
"loss": 1.7547,
"step": 152
},
{
"epoch": 0.41,
"learning_rate": 9.712450171417502e-05,
"loss": 1.7849,
"step": 153
},
{
"epoch": 0.41,
"learning_rate": 9.707644947263976e-05,
"loss": 1.8122,
"step": 154
},
{
"epoch": 0.41,
"learning_rate": 9.702801113991243e-05,
"loss": 1.768,
"step": 155
},
{
"epoch": 0.41,
"learning_rate": 9.697918711325353e-05,
"loss": 1.8519,
"step": 156
},
{
"epoch": 0.42,
"learning_rate": 9.692997779308677e-05,
"loss": 1.7329,
"step": 157
},
{
"epoch": 0.42,
"learning_rate": 9.688038358299578e-05,
"loss": 1.7725,
"step": 158
},
{
"epoch": 0.42,
"learning_rate": 9.683040488972086e-05,
"loss": 1.7678,
"step": 159
},
{
"epoch": 0.42,
"learning_rate": 9.678004212315554e-05,
"loss": 1.7351,
"step": 160
},
{
"epoch": 0.43,
"learning_rate": 9.672929569634331e-05,
"loss": 1.8248,
"step": 161
},
{
"epoch": 0.43,
"learning_rate": 9.66781660254742e-05,
"loss": 1.7674,
"step": 162
},
{
"epoch": 0.43,
"learning_rate": 9.662665352988133e-05,
"loss": 1.7685,
"step": 163
},
{
"epoch": 0.44,
"learning_rate": 9.657475863203755e-05,
"loss": 1.8122,
"step": 164
},
{
"epoch": 0.44,
"learning_rate": 9.65224817575519e-05,
"loss": 1.8858,
"step": 165
},
{
"epoch": 0.44,
"learning_rate": 9.646982333516616e-05,
"loss": 1.8532,
"step": 166
},
{
"epoch": 0.44,
"learning_rate": 9.641678379675135e-05,
"loss": 1.8341,
"step": 167
},
{
"epoch": 0.45,
"learning_rate": 9.63633635773041e-05,
"loss": 1.6986,
"step": 168
},
{
"epoch": 0.45,
"learning_rate": 9.63095631149432e-05,
"loss": 1.7467,
"step": 169
},
{
"epoch": 0.45,
"learning_rate": 9.625538285090595e-05,
"loss": 1.769,
"step": 170
},
{
"epoch": 0.45,
"learning_rate": 9.620082322954448e-05,
"loss": 1.7554,
"step": 171
},
{
"epoch": 0.46,
"learning_rate": 9.614588469832225e-05,
"loss": 1.6883,
"step": 172
},
{
"epoch": 0.46,
"learning_rate": 9.609056770781026e-05,
"loss": 1.7908,
"step": 173
},
{
"epoch": 0.46,
"learning_rate": 9.603487271168336e-05,
"loss": 1.769,
"step": 174
},
{
"epoch": 0.46,
"learning_rate": 9.597880016671665e-05,
"loss": 1.8701,
"step": 175
},
{
"epoch": 0.47,
"learning_rate": 9.592235053278157e-05,
"loss": 1.7404,
"step": 176
},
{
"epoch": 0.47,
"learning_rate": 9.586552427284223e-05,
"loss": 1.7462,
"step": 177
},
{
"epoch": 0.47,
"learning_rate": 9.580832185295156e-05,
"loss": 1.7704,
"step": 178
},
{
"epoch": 0.47,
"learning_rate": 9.575074374224758e-05,
"loss": 1.7354,
"step": 179
},
{
"epoch": 0.48,
"learning_rate": 9.569279041294944e-05,
"loss": 1.9016,
"step": 180
},
{
"epoch": 0.48,
"learning_rate": 9.563446234035358e-05,
"loss": 1.7546,
"step": 181
},
{
"epoch": 0.48,
"learning_rate": 9.557576000282991e-05,
"loss": 1.7814,
"step": 182
},
{
"epoch": 0.49,
"learning_rate": 9.551668388181776e-05,
"loss": 1.7423,
"step": 183
},
{
"epoch": 0.49,
"learning_rate": 9.545723446182202e-05,
"loss": 1.737,
"step": 184
},
{
"epoch": 0.49,
"learning_rate": 9.539741223040915e-05,
"loss": 1.7577,
"step": 185
},
{
"epoch": 0.49,
"learning_rate": 9.533721767820317e-05,
"loss": 1.7864,
"step": 186
},
{
"epoch": 0.5,
"learning_rate": 9.527665129888161e-05,
"loss": 1.7015,
"step": 187
},
{
"epoch": 0.5,
"learning_rate": 9.521571358917153e-05,
"loss": 1.7017,
"step": 188
},
{
"epoch": 0.5,
"learning_rate": 9.51544050488454e-05,
"loss": 1.7616,
"step": 189
},
{
"epoch": 0.5,
"learning_rate": 9.509272618071699e-05,
"loss": 1.7538,
"step": 190
},
{
"epoch": 0.51,
"learning_rate": 9.503067749063726e-05,
"loss": 1.8012,
"step": 191
},
{
"epoch": 0.51,
"learning_rate": 9.496825948749024e-05,
"loss": 1.7607,
"step": 192
},
{
"epoch": 0.51,
"learning_rate": 9.490547268318881e-05,
"loss": 1.7575,
"step": 193
},
{
"epoch": 0.51,
"learning_rate": 9.484231759267054e-05,
"loss": 1.7102,
"step": 194
},
{
"epoch": 0.52,
"learning_rate": 9.477879473389345e-05,
"loss": 1.7801,
"step": 195
},
{
"epoch": 0.52,
"learning_rate": 9.471490462783175e-05,
"loss": 1.7379,
"step": 196
},
{
"epoch": 0.52,
"learning_rate": 9.465064779847156e-05,
"loss": 1.718,
"step": 197
},
{
"epoch": 0.53,
"learning_rate": 9.458602477280668e-05,
"loss": 1.6832,
"step": 198
},
{
"epoch": 0.53,
"learning_rate": 9.452103608083417e-05,
"loss": 1.7995,
"step": 199
},
{
"epoch": 0.53,
"learning_rate": 9.445568225555014e-05,
"loss": 1.7036,
"step": 200
},
{
"epoch": 0.53,
"learning_rate": 9.438996383294516e-05,
"loss": 1.6973,
"step": 201
},
{
"epoch": 0.54,
"learning_rate": 9.43238813520001e-05,
"loss": 1.757,
"step": 202
},
{
"epoch": 0.54,
"learning_rate": 9.425743535468156e-05,
"loss": 1.7293,
"step": 203
},
{
"epoch": 0.54,
"learning_rate": 9.41906263859375e-05,
"loss": 1.8156,
"step": 204
},
{
"epoch": 0.54,
"learning_rate": 9.412345499369271e-05,
"loss": 1.7483,
"step": 205
},
{
"epoch": 0.55,
"learning_rate": 9.405592172884437e-05,
"loss": 1.7947,
"step": 206
},
{
"epoch": 0.55,
"learning_rate": 9.39880271452575e-05,
"loss": 1.8237,
"step": 207
},
{
"epoch": 0.55,
"learning_rate": 9.391977179976043e-05,
"loss": 1.6674,
"step": 208
},
{
"epoch": 0.55,
"learning_rate": 9.385115625214022e-05,
"loss": 1.7484,
"step": 209
},
{
"epoch": 0.56,
"learning_rate": 9.378218106513812e-05,
"loss": 1.7449,
"step": 210
},
{
"epoch": 0.56,
"learning_rate": 9.371284680444483e-05,
"loss": 1.7444,
"step": 211
},
{
"epoch": 0.56,
"learning_rate": 9.364315403869606e-05,
"loss": 1.6666,
"step": 212
},
{
"epoch": 0.56,
"learning_rate": 9.357310333946763e-05,
"loss": 1.7569,
"step": 213
},
{
"epoch": 0.57,
"learning_rate": 9.3502695281271e-05,
"loss": 1.6859,
"step": 214
},
{
"epoch": 0.57,
"learning_rate": 9.343193044154843e-05,
"loss": 1.7095,
"step": 215
},
{
"epoch": 0.57,
"learning_rate": 9.336080940066826e-05,
"loss": 1.8226,
"step": 216
},
{
"epoch": 0.58,
"learning_rate": 9.328933274192015e-05,
"loss": 1.8059,
"step": 217
},
{
"epoch": 0.58,
"learning_rate": 9.32175010515104e-05,
"loss": 1.699,
"step": 218
},
{
"epoch": 0.58,
"learning_rate": 9.314531491855692e-05,
"loss": 1.7162,
"step": 219
},
{
"epoch": 0.58,
"learning_rate": 9.307277493508465e-05,
"loss": 1.7898,
"step": 220
},
{
"epoch": 0.59,
"learning_rate": 9.299988169602054e-05,
"loss": 1.8133,
"step": 221
},
{
"epoch": 0.59,
"learning_rate": 9.292663579918873e-05,
"loss": 1.81,
"step": 222
},
{
"epoch": 0.59,
"learning_rate": 9.285303784530558e-05,
"loss": 1.694,
"step": 223
},
{
"epoch": 0.59,
"learning_rate": 9.277908843797492e-05,
"loss": 1.648,
"step": 224
},
{
"epoch": 0.6,
"learning_rate": 9.270478818368287e-05,
"loss": 1.8439,
"step": 225
},
{
"epoch": 0.6,
"learning_rate": 9.263013769179298e-05,
"loss": 1.7486,
"step": 226
},
{
"epoch": 0.6,
"learning_rate": 9.25551375745413e-05,
"loss": 1.7601,
"step": 227
},
{
"epoch": 0.6,
"learning_rate": 9.247978844703122e-05,
"loss": 1.7399,
"step": 228
},
{
"epoch": 0.61,
"learning_rate": 9.240409092722852e-05,
"loss": 1.8162,
"step": 229
},
{
"epoch": 0.61,
"learning_rate": 9.232804563595626e-05,
"loss": 1.6533,
"step": 230
},
{
"epoch": 0.61,
"learning_rate": 9.22516531968897e-05,
"loss": 1.7488,
"step": 231
},
{
"epoch": 0.62,
"learning_rate": 9.217491423655123e-05,
"loss": 1.7544,
"step": 232
},
{
"epoch": 0.62,
"learning_rate": 9.209782938430509e-05,
"loss": 1.6406,
"step": 233
},
{
"epoch": 0.62,
"learning_rate": 9.202039927235241e-05,
"loss": 1.7158,
"step": 234
},
{
"epoch": 0.62,
"learning_rate": 9.194262453572586e-05,
"loss": 1.7827,
"step": 235
},
{
"epoch": 0.63,
"learning_rate": 9.186450581228454e-05,
"loss": 1.6567,
"step": 236
},
{
"epoch": 0.63,
"learning_rate": 9.178604374270867e-05,
"loss": 1.7305,
"step": 237
},
{
"epoch": 0.63,
"learning_rate": 9.170723897049439e-05,
"loss": 1.7544,
"step": 238
},
{
"epoch": 0.63,
"learning_rate": 9.162809214194851e-05,
"loss": 1.7247,
"step": 239
},
{
"epoch": 0.64,
"learning_rate": 9.154860390618313e-05,
"loss": 1.8192,
"step": 240
},
{
"epoch": 0.64,
"learning_rate": 9.146877491511035e-05,
"loss": 1.7016,
"step": 241
},
{
"epoch": 0.64,
"learning_rate": 9.138860582343696e-05,
"loss": 1.7377,
"step": 242
},
{
"epoch": 0.64,
"learning_rate": 9.130809728865901e-05,
"loss": 1.6459,
"step": 243
},
{
"epoch": 0.65,
"learning_rate": 9.122724997105647e-05,
"loss": 1.7161,
"step": 244
},
{
"epoch": 0.65,
"learning_rate": 9.114606453368779e-05,
"loss": 1.6868,
"step": 245
},
{
"epoch": 0.65,
"learning_rate": 9.106454164238442e-05,
"loss": 1.7086,
"step": 246
},
{
"epoch": 0.66,
"learning_rate": 9.098268196574546e-05,
"loss": 1.7164,
"step": 247
},
{
"epoch": 0.66,
"learning_rate": 9.090048617513207e-05,
"loss": 1.6877,
"step": 248
},
{
"epoch": 0.66,
"learning_rate": 9.081795494466201e-05,
"loss": 1.6701,
"step": 249
},
{
"epoch": 0.66,
"learning_rate": 9.073508895120411e-05,
"loss": 1.7393,
"step": 250
},
{
"epoch": 0.67,
"learning_rate": 9.065188887437273e-05,
"loss": 1.746,
"step": 251
},
{
"epoch": 0.67,
"learning_rate": 9.056835539652211e-05,
"loss": 1.79,
"step": 252
},
{
"epoch": 0.67,
"learning_rate": 9.048448920274088e-05,
"loss": 1.6791,
"step": 253
},
{
"epoch": 0.67,
"learning_rate": 9.040029098084643e-05,
"loss": 1.6771,
"step": 254
},
{
"epoch": 0.68,
"learning_rate": 9.031576142137919e-05,
"loss": 1.644,
"step": 255
},
{
"epoch": 0.68,
"learning_rate": 9.023090121759699e-05,
"loss": 1.7242,
"step": 256
},
{
"epoch": 0.68,
"learning_rate": 9.01457110654694e-05,
"loss": 1.7745,
"step": 257
},
{
"epoch": 0.68,
"learning_rate": 9.006019166367208e-05,
"loss": 1.7381,
"step": 258
},
{
"epoch": 0.69,
"learning_rate": 8.997434371358093e-05,
"loss": 1.6923,
"step": 259
},
{
"epoch": 0.69,
"learning_rate": 8.98881679192664e-05,
"loss": 1.8049,
"step": 260
},
{
"epoch": 0.69,
"learning_rate": 8.980166498748774e-05,
"loss": 1.6683,
"step": 261
},
{
"epoch": 0.69,
"learning_rate": 8.971483562768712e-05,
"loss": 1.7033,
"step": 262
},
{
"epoch": 0.7,
"learning_rate": 8.962768055198394e-05,
"loss": 1.761,
"step": 263
},
{
"epoch": 0.7,
"learning_rate": 8.954020047516884e-05,
"loss": 1.7824,
"step": 264
},
{
"epoch": 0.7,
"learning_rate": 8.945239611469796e-05,
"loss": 1.725,
"step": 265
},
{
"epoch": 0.71,
"learning_rate": 8.9364268190687e-05,
"loss": 1.6417,
"step": 266
},
{
"epoch": 0.71,
"learning_rate": 8.927581742590533e-05,
"loss": 1.7119,
"step": 267
},
{
"epoch": 0.71,
"learning_rate": 8.918704454577003e-05,
"loss": 1.7466,
"step": 268
},
{
"epoch": 0.71,
"learning_rate": 8.909795027833998e-05,
"loss": 1.6963,
"step": 269
},
{
"epoch": 0.72,
"learning_rate": 8.900853535430986e-05,
"loss": 1.7345,
"step": 270
},
{
"epoch": 0.72,
"learning_rate": 8.891880050700424e-05,
"loss": 1.6779,
"step": 271
},
{
"epoch": 0.72,
"learning_rate": 8.882874647237138e-05,
"loss": 1.6923,
"step": 272
},
{
"epoch": 0.72,
"learning_rate": 8.873837398897742e-05,
"loss": 1.6592,
"step": 273
},
{
"epoch": 0.73,
"learning_rate": 8.864768379800016e-05,
"loss": 1.6333,
"step": 274
},
{
"epoch": 0.73,
"learning_rate": 8.855667664322307e-05,
"loss": 1.7154,
"step": 275
},
{
"epoch": 0.73,
"learning_rate": 8.846535327102909e-05,
"loss": 1.7901,
"step": 276
},
{
"epoch": 0.73,
"learning_rate": 8.837371443039466e-05,
"loss": 1.6907,
"step": 277
},
{
"epoch": 0.74,
"learning_rate": 8.828176087288345e-05,
"loss": 1.7244,
"step": 278
},
{
"epoch": 0.74,
"learning_rate": 8.818949335264021e-05,
"loss": 1.7037,
"step": 279
},
{
"epoch": 0.74,
"learning_rate": 8.809691262638467e-05,
"loss": 1.6272,
"step": 280
},
{
"epoch": 0.75,
"learning_rate": 8.800401945340523e-05,
"loss": 1.6574,
"step": 281
},
{
"epoch": 0.75,
"learning_rate": 8.791081459555281e-05,
"loss": 1.6544,
"step": 282
},
{
"epoch": 0.75,
"learning_rate": 8.781729881723458e-05,
"loss": 1.6271,
"step": 283
},
{
"epoch": 0.75,
"learning_rate": 8.772347288540763e-05,
"loss": 1.7392,
"step": 284
},
{
"epoch": 0.76,
"learning_rate": 8.762933756957281e-05,
"loss": 1.6172,
"step": 285
},
{
"epoch": 0.76,
"learning_rate": 8.753489364176826e-05,
"loss": 1.7241,
"step": 286
},
{
"epoch": 0.76,
"learning_rate": 8.744014187656321e-05,
"loss": 1.726,
"step": 287
},
{
"epoch": 0.76,
"learning_rate": 8.734508305105158e-05,
"loss": 1.699,
"step": 288
},
{
"epoch": 0.77,
"learning_rate": 8.724971794484556e-05,
"loss": 1.6371,
"step": 289
},
{
"epoch": 0.77,
"learning_rate": 8.715404734006931e-05,
"loss": 1.7337,
"step": 290
},
{
"epoch": 0.77,
"learning_rate": 8.705807202135248e-05,
"loss": 1.6385,
"step": 291
},
{
"epoch": 0.77,
"learning_rate": 8.69617927758238e-05,
"loss": 1.7023,
"step": 292
},
{
"epoch": 0.78,
"learning_rate": 8.686521039310454e-05,
"loss": 1.6796,
"step": 293
},
{
"epoch": 0.78,
"learning_rate": 8.676832566530221e-05,
"loss": 1.7157,
"step": 294
},
{
"epoch": 0.78,
"learning_rate": 8.667113938700396e-05,
"loss": 1.6873,
"step": 295
},
{
"epoch": 0.79,
"learning_rate": 8.657365235526995e-05,
"loss": 1.7194,
"step": 296
},
{
"epoch": 0.79,
"learning_rate": 8.647586536962707e-05,
"loss": 1.7695,
"step": 297
},
{
"epoch": 0.79,
"learning_rate": 8.637777923206215e-05,
"loss": 1.6464,
"step": 298
},
{
"epoch": 0.79,
"learning_rate": 8.62793947470155e-05,
"loss": 1.7462,
"step": 299
},
{
"epoch": 0.8,
"learning_rate": 8.618071272137431e-05,
"loss": 1.6386,
"step": 300
},
{
"epoch": 0.8,
"learning_rate": 8.608173396446598e-05,
"loss": 1.6692,
"step": 301
},
{
"epoch": 0.8,
"learning_rate": 8.598245928805152e-05,
"loss": 1.7241,
"step": 302
},
{
"epoch": 0.8,
"learning_rate": 8.588288950631889e-05,
"loss": 1.744,
"step": 303
},
{
"epoch": 0.81,
"learning_rate": 8.578302543587631e-05,
"loss": 1.6958,
"step": 304
},
{
"epoch": 0.81,
"learning_rate": 8.568286789574557e-05,
"loss": 1.7288,
"step": 305
},
{
"epoch": 0.81,
"learning_rate": 8.558241770735531e-05,
"loss": 1.7376,
"step": 306
},
{
"epoch": 0.81,
"learning_rate": 8.548167569453429e-05,
"loss": 1.668,
"step": 307
},
{
"epoch": 0.82,
"learning_rate": 8.538064268350465e-05,
"loss": 1.6949,
"step": 308
},
{
"epoch": 0.82,
"learning_rate": 8.527931950287507e-05,
"loss": 1.645,
"step": 309
},
{
"epoch": 0.82,
"learning_rate": 8.517770698363404e-05,
"loss": 1.6848,
"step": 310
},
{
"epoch": 0.82,
"learning_rate": 8.507580595914303e-05,
"loss": 1.7163,
"step": 311
},
{
"epoch": 0.83,
"learning_rate": 8.497361726512965e-05,
"loss": 1.7366,
"step": 312
},
{
"epoch": 0.83,
"learning_rate": 8.487114173968074e-05,
"loss": 1.7858,
"step": 313
},
{
"epoch": 0.83,
"learning_rate": 8.476838022323561e-05,
"loss": 1.6975,
"step": 314
},
{
"epoch": 0.84,
"learning_rate": 8.466533355857908e-05,
"loss": 1.7549,
"step": 315
},
{
"epoch": 0.84,
"learning_rate": 8.456200259083454e-05,
"loss": 1.6796,
"step": 316
},
{
"epoch": 0.84,
"learning_rate": 8.445838816745709e-05,
"loss": 1.6895,
"step": 317
},
{
"epoch": 0.84,
"learning_rate": 8.435449113822655e-05,
"loss": 1.6524,
"step": 318
},
{
"epoch": 0.85,
"learning_rate": 8.425031235524046e-05,
"loss": 1.7097,
"step": 319
},
{
"epoch": 0.85,
"learning_rate": 8.414585267290715e-05,
"loss": 1.7021,
"step": 320
},
{
"epoch": 0.85,
"learning_rate": 8.404111294793873e-05,
"loss": 1.7239,
"step": 321
},
{
"epoch": 0.85,
"learning_rate": 8.393609403934398e-05,
"loss": 1.6201,
"step": 322
},
{
"epoch": 0.86,
"learning_rate": 8.383079680842145e-05,
"loss": 1.6921,
"step": 323
},
{
"epoch": 0.86,
"learning_rate": 8.372522211875224e-05,
"loss": 1.6285,
"step": 324
},
{
"epoch": 0.86,
"learning_rate": 8.361937083619304e-05,
"loss": 1.692,
"step": 325
},
{
"epoch": 0.86,
"learning_rate": 8.351324382886895e-05,
"loss": 1.7094,
"step": 326
},
{
"epoch": 0.87,
"learning_rate": 8.340684196716639e-05,
"loss": 1.661,
"step": 327
},
{
"epoch": 0.87,
"learning_rate": 8.330016612372599e-05,
"loss": 1.6573,
"step": 328
},
{
"epoch": 0.87,
"learning_rate": 8.319321717343535e-05,
"loss": 1.7666,
"step": 329
},
{
"epoch": 0.88,
"learning_rate": 8.308599599342202e-05,
"loss": 1.6458,
"step": 330
},
{
"epoch": 0.88,
"learning_rate": 8.297850346304608e-05,
"loss": 1.6689,
"step": 331
},
{
"epoch": 0.88,
"learning_rate": 8.287074046389312e-05,
"loss": 1.6694,
"step": 332
},
{
"epoch": 0.88,
"learning_rate": 8.276270787976696e-05,
"loss": 1.7342,
"step": 333
},
{
"epoch": 0.89,
"learning_rate": 8.265440659668236e-05,
"loss": 1.7041,
"step": 334
},
{
"epoch": 0.89,
"learning_rate": 8.254583750285776e-05,
"loss": 1.707,
"step": 335
},
{
"epoch": 0.89,
"learning_rate": 8.243700148870805e-05,
"loss": 1.6359,
"step": 336
},
{
"epoch": 0.89,
"learning_rate": 8.232789944683723e-05,
"loss": 1.6944,
"step": 337
},
{
"epoch": 0.9,
"learning_rate": 8.221853227203106e-05,
"loss": 1.6221,
"step": 338
},
{
"epoch": 0.9,
"learning_rate": 8.210890086124977e-05,
"loss": 1.6485,
"step": 339
},
{
"epoch": 0.9,
"learning_rate": 8.199900611362068e-05,
"loss": 1.6927,
"step": 340
},
{
"epoch": 0.9,
"learning_rate": 8.188884893043083e-05,
"loss": 1.71,
"step": 341
},
{
"epoch": 0.91,
"learning_rate": 8.177843021511962e-05,
"loss": 1.6721,
"step": 342
},
{
"epoch": 0.91,
"learning_rate": 8.166775087327133e-05,
"loss": 1.7052,
"step": 343
},
{
"epoch": 0.91,
"learning_rate": 8.155681181260777e-05,
"loss": 1.679,
"step": 344
},
{
"epoch": 0.92,
"learning_rate": 8.144561394298075e-05,
"loss": 1.6976,
"step": 345
},
{
"epoch": 0.92,
"learning_rate": 8.133415817636471e-05,
"loss": 1.591,
"step": 346
},
{
"epoch": 0.92,
"learning_rate": 8.12224454268492e-05,
"loss": 1.7302,
"step": 347
},
{
"epoch": 0.92,
"learning_rate": 8.111047661063136e-05,
"loss": 1.649,
"step": 348
},
{
"epoch": 0.93,
"learning_rate": 8.099825264600842e-05,
"loss": 1.7271,
"step": 349
},
{
"epoch": 0.93,
"learning_rate": 8.08857744533702e-05,
"loss": 1.7033,
"step": 350
},
{
"epoch": 0.93,
"learning_rate": 8.077304295519151e-05,
"loss": 1.6853,
"step": 351
},
{
"epoch": 0.93,
"learning_rate": 8.066005907602465e-05,
"loss": 1.6198,
"step": 352
},
{
"epoch": 0.94,
"learning_rate": 8.054682374249174e-05,
"loss": 1.5788,
"step": 353
},
{
"epoch": 0.94,
"learning_rate": 8.04333378832772e-05,
"loss": 1.6358,
"step": 354
},
{
"epoch": 0.94,
"learning_rate": 8.031960242912011e-05,
"loss": 1.6205,
"step": 355
},
{
"epoch": 0.94,
"learning_rate": 8.020561831280654e-05,
"loss": 1.6251,
"step": 356
},
{
"epoch": 0.95,
"learning_rate": 8.009138646916196e-05,
"loss": 1.6325,
"step": 357
},
{
"epoch": 0.95,
"learning_rate": 7.997690783504353e-05,
"loss": 1.5752,
"step": 358
},
{
"epoch": 0.95,
"learning_rate": 7.986218334933241e-05,
"loss": 1.702,
"step": 359
},
{
"epoch": 0.95,
"learning_rate": 7.97472139529261e-05,
"loss": 1.6434,
"step": 360
},
{
"epoch": 0.96,
"learning_rate": 7.963200058873072e-05,
"loss": 1.6503,
"step": 361
},
{
"epoch": 0.96,
"learning_rate": 7.951654420165323e-05,
"loss": 1.6811,
"step": 362
},
{
"epoch": 0.96,
"learning_rate": 7.940084573859369e-05,
"loss": 1.6883,
"step": 363
},
{
"epoch": 0.97,
"learning_rate": 7.928490614843757e-05,
"loss": 1.6747,
"step": 364
},
{
"epoch": 0.97,
"learning_rate": 7.916872638204788e-05,
"loss": 1.6585,
"step": 365
},
{
"epoch": 0.97,
"learning_rate": 7.90523073922574e-05,
"loss": 1.6598,
"step": 366
},
{
"epoch": 0.97,
"learning_rate": 7.893565013386087e-05,
"loss": 1.6732,
"step": 367
},
{
"epoch": 0.98,
"learning_rate": 7.881875556360717e-05,
"loss": 1.6139,
"step": 368
},
{
"epoch": 0.98,
"learning_rate": 7.870162464019144e-05,
"loss": 1.7143,
"step": 369
},
{
"epoch": 0.98,
"learning_rate": 7.858425832424728e-05,
"loss": 1.6749,
"step": 370
},
{
"epoch": 0.98,
"learning_rate": 7.846665757833878e-05,
"loss": 1.7282,
"step": 371
},
{
"epoch": 0.99,
"learning_rate": 7.83488233669527e-05,
"loss": 1.6329,
"step": 372
},
{
"epoch": 0.99,
"learning_rate": 7.823075665649056e-05,
"loss": 1.6273,
"step": 373
},
{
"epoch": 0.99,
"learning_rate": 7.811245841526063e-05,
"loss": 1.6262,
"step": 374
},
{
"epoch": 0.99,
"learning_rate": 7.79939296134701e-05,
"loss": 1.6977,
"step": 375
},
{
"epoch": 1.0,
"learning_rate": 7.787517122321706e-05,
"loss": 1.735,
"step": 376
},
{
"epoch": 1.0,
"learning_rate": 7.775618421848252e-05,
"loss": 1.6294,
"step": 377
},
{
"epoch": 1.0,
"learning_rate": 7.763696957512246e-05,
"loss": 1.5115,
"step": 378
},
{
"epoch": 1.01,
"learning_rate": 7.75175282708598e-05,
"loss": 1.5511,
"step": 379
},
{
"epoch": 1.01,
"learning_rate": 7.739786128527643e-05,
"loss": 1.6208,
"step": 380
},
{
"epoch": 1.01,
"learning_rate": 7.727796959980504e-05,
"loss": 1.5682,
"step": 381
},
{
"epoch": 1.01,
"learning_rate": 7.715785419772126e-05,
"loss": 1.5706,
"step": 382
},
{
"epoch": 1.02,
"learning_rate": 7.703751606413542e-05,
"loss": 1.6126,
"step": 383
},
{
"epoch": 1.02,
"learning_rate": 7.691695618598467e-05,
"loss": 1.6065,
"step": 384
},
{
"epoch": 1.02,
"learning_rate": 7.679617555202463e-05,
"loss": 1.5688,
"step": 385
},
{
"epoch": 1.02,
"learning_rate": 7.667517515282152e-05,
"loss": 1.5788,
"step": 386
},
{
"epoch": 1.03,
"learning_rate": 7.655395598074389e-05,
"loss": 1.513,
"step": 387
},
{
"epoch": 1.03,
"learning_rate": 7.643251902995452e-05,
"loss": 1.5044,
"step": 388
},
{
"epoch": 1.03,
"learning_rate": 7.63108652964023e-05,
"loss": 1.5667,
"step": 389
},
{
"epoch": 1.03,
"learning_rate": 7.618899577781404e-05,
"loss": 1.5765,
"step": 390
},
{
"epoch": 1.04,
"learning_rate": 7.606691147368627e-05,
"loss": 1.5661,
"step": 391
},
{
"epoch": 1.04,
"learning_rate": 7.594461338527701e-05,
"loss": 1.5763,
"step": 392
},
{
"epoch": 1.04,
"learning_rate": 7.582210251559769e-05,
"loss": 1.5253,
"step": 393
},
{
"epoch": 1.05,
"learning_rate": 7.569937986940477e-05,
"loss": 1.5982,
"step": 394
},
{
"epoch": 1.05,
"learning_rate": 7.557644645319158e-05,
"loss": 1.5583,
"step": 395
},
{
"epoch": 1.05,
"learning_rate": 7.545330327518007e-05,
"loss": 1.488,
"step": 396
},
{
"epoch": 1.05,
"learning_rate": 7.532995134531251e-05,
"loss": 1.5368,
"step": 397
},
{
"epoch": 1.06,
"learning_rate": 7.520639167524322e-05,
"loss": 1.5863,
"step": 398
},
{
"epoch": 1.06,
"learning_rate": 7.508262527833029e-05,
"loss": 1.6736,
"step": 399
},
{
"epoch": 1.06,
"learning_rate": 7.495865316962723e-05,
"loss": 1.5957,
"step": 400
},
{
"epoch": 1.06,
"learning_rate": 7.483447636587467e-05,
"loss": 1.5553,
"step": 401
},
{
"epoch": 1.07,
"learning_rate": 7.471009588549205e-05,
"loss": 1.5217,
"step": 402
},
{
"epoch": 1.07,
"learning_rate": 7.458551274856918e-05,
"loss": 1.5806,
"step": 403
},
{
"epoch": 1.07,
"learning_rate": 7.4460727976858e-05,
"loss": 1.6075,
"step": 404
},
{
"epoch": 1.07,
"learning_rate": 7.433574259376407e-05,
"loss": 1.5302,
"step": 405
},
{
"epoch": 1.08,
"learning_rate": 7.421055762433826e-05,
"loss": 1.4965,
"step": 406
},
{
"epoch": 1.08,
"learning_rate": 7.408517409526835e-05,
"loss": 1.6272,
"step": 407
},
{
"epoch": 1.08,
"learning_rate": 7.39595930348705e-05,
"loss": 1.5668,
"step": 408
},
{
"epoch": 1.08,
"learning_rate": 7.3833815473081e-05,
"loss": 1.5652,
"step": 409
},
{
"epoch": 1.09,
"learning_rate": 7.370784244144762e-05,
"loss": 1.5885,
"step": 410
},
{
"epoch": 1.09,
"learning_rate": 7.358167497312134e-05,
"loss": 1.5324,
"step": 411
},
{
"epoch": 1.09,
"learning_rate": 7.345531410284774e-05,
"loss": 1.6304,
"step": 412
},
{
"epoch": 1.1,
"learning_rate": 7.332876086695855e-05,
"loss": 1.5931,
"step": 413
},
{
"epoch": 1.1,
"learning_rate": 7.320201630336318e-05,
"loss": 1.5992,
"step": 414
},
{
"epoch": 1.1,
"learning_rate": 7.307508145154019e-05,
"loss": 1.5467,
"step": 415
},
{
"epoch": 1.1,
"learning_rate": 7.294795735252875e-05,
"loss": 1.5775,
"step": 416
},
{
"epoch": 1.11,
"learning_rate": 7.282064504892015e-05,
"loss": 1.5119,
"step": 417
},
{
"epoch": 1.11,
"learning_rate": 7.269314558484914e-05,
"loss": 1.5829,
"step": 418
},
{
"epoch": 1.11,
"learning_rate": 7.256546000598551e-05,
"loss": 1.6211,
"step": 419
},
{
"epoch": 1.11,
"learning_rate": 7.243758935952547e-05,
"loss": 1.5241,
"step": 420
},
{
"epoch": 1.12,
"learning_rate": 7.230953469418292e-05,
"loss": 1.5521,
"step": 421
},
{
"epoch": 1.12,
"learning_rate": 7.218129706018108e-05,
"loss": 1.5349,
"step": 422
},
{
"epoch": 1.12,
"learning_rate": 7.205287750924372e-05,
"loss": 1.5815,
"step": 423
},
{
"epoch": 1.12,
"learning_rate": 7.192427709458656e-05,
"loss": 1.5188,
"step": 424
},
{
"epoch": 1.13,
"learning_rate": 7.179549687090867e-05,
"loss": 1.5987,
"step": 425
},
{
"epoch": 1.13,
"learning_rate": 7.166653789438382e-05,
"loss": 1.5643,
"step": 426
},
{
"epoch": 1.13,
"learning_rate": 7.153740122265176e-05,
"loss": 1.5052,
"step": 427
},
{
"epoch": 1.14,
"learning_rate": 7.140808791480959e-05,
"loss": 1.6092,
"step": 428
},
{
"epoch": 1.14,
"learning_rate": 7.127859903140311e-05,
"loss": 1.5671,
"step": 429
},
{
"epoch": 1.14,
"learning_rate": 7.114893563441802e-05,
"loss": 1.5004,
"step": 430
},
{
"epoch": 1.14,
"learning_rate": 7.101909878727128e-05,
"loss": 1.5558,
"step": 431
},
{
"epoch": 1.15,
"learning_rate": 7.088908955480244e-05,
"loss": 1.5113,
"step": 432
},
{
"epoch": 1.15,
"learning_rate": 7.075890900326475e-05,
"loss": 1.6546,
"step": 433
},
{
"epoch": 1.15,
"learning_rate": 7.062855820031659e-05,
"loss": 1.5282,
"step": 434
},
{
"epoch": 1.15,
"learning_rate": 7.049803821501259e-05,
"loss": 1.5285,
"step": 435
},
{
"epoch": 1.16,
"learning_rate": 7.036735011779492e-05,
"loss": 1.5854,
"step": 436
},
{
"epoch": 1.16,
"learning_rate": 7.023649498048451e-05,
"loss": 1.6048,
"step": 437
},
{
"epoch": 1.16,
"learning_rate": 7.01054738762722e-05,
"loss": 1.5618,
"step": 438
},
{
"epoch": 1.16,
"learning_rate": 6.997428787971005e-05,
"loss": 1.6191,
"step": 439
},
{
"epoch": 1.17,
"learning_rate": 6.984293806670244e-05,
"loss": 1.5588,
"step": 440
},
{
"epoch": 1.17,
"learning_rate": 6.971142551449725e-05,
"loss": 1.6202,
"step": 441
},
{
"epoch": 1.17,
"learning_rate": 6.957975130167705e-05,
"loss": 1.607,
"step": 442
},
{
"epoch": 1.18,
"learning_rate": 6.944791650815023e-05,
"loss": 1.554,
"step": 443
},
{
"epoch": 1.18,
"learning_rate": 6.931592221514222e-05,
"loss": 1.6057,
"step": 444
},
{
"epoch": 1.18,
"learning_rate": 6.91837695051865e-05,
"loss": 1.5725,
"step": 445
},
{
"epoch": 1.18,
"learning_rate": 6.905145946211583e-05,
"loss": 1.5788,
"step": 446
},
{
"epoch": 1.19,
"learning_rate": 6.891899317105329e-05,
"loss": 1.5324,
"step": 447
},
{
"epoch": 1.19,
"learning_rate": 6.878637171840343e-05,
"loss": 1.5962,
"step": 448
},
{
"epoch": 1.19,
"learning_rate": 6.865359619184331e-05,
"loss": 1.5458,
"step": 449
},
{
"epoch": 1.19,
"learning_rate": 6.85206676803136e-05,
"loss": 1.6023,
"step": 450
},
{
"epoch": 1.2,
"learning_rate": 6.83875872740097e-05,
"loss": 1.5291,
"step": 451
},
{
"epoch": 1.2,
"learning_rate": 6.825435606437273e-05,
"loss": 1.5929,
"step": 452
},
{
"epoch": 1.2,
"learning_rate": 6.81209751440806e-05,
"loss": 1.5424,
"step": 453
},
{
"epoch": 1.2,
"learning_rate": 6.798744560703905e-05,
"loss": 1.5881,
"step": 454
},
{
"epoch": 1.21,
"learning_rate": 6.785376854837268e-05,
"loss": 1.4747,
"step": 455
},
{
"epoch": 1.21,
"learning_rate": 6.771994506441597e-05,
"loss": 1.5215,
"step": 456
},
{
"epoch": 1.21,
"learning_rate": 6.758597625270433e-05,
"loss": 1.465,
"step": 457
},
{
"epoch": 1.21,
"learning_rate": 6.745186321196495e-05,
"loss": 1.5071,
"step": 458
},
{
"epoch": 1.22,
"learning_rate": 6.731760704210802e-05,
"loss": 1.4882,
"step": 459
},
{
"epoch": 1.22,
"learning_rate": 6.718320884421751e-05,
"loss": 1.5905,
"step": 460
},
{
"epoch": 1.22,
"learning_rate": 6.704866972054223e-05,
"loss": 1.5922,
"step": 461
},
{
"epoch": 1.23,
"learning_rate": 6.691399077448677e-05,
"loss": 1.5448,
"step": 462
},
{
"epoch": 1.23,
"learning_rate": 6.677917311060246e-05,
"loss": 1.5675,
"step": 463
},
{
"epoch": 1.23,
"learning_rate": 6.66442178345783e-05,
"loss": 1.6005,
"step": 464
},
{
"epoch": 1.23,
"learning_rate": 6.650912605323194e-05,
"loss": 1.6179,
"step": 465
},
{
"epoch": 1.24,
"learning_rate": 6.637389887450045e-05,
"loss": 1.5711,
"step": 466
},
{
"epoch": 1.24,
"learning_rate": 6.623853740743146e-05,
"loss": 1.6179,
"step": 467
},
{
"epoch": 1.24,
"learning_rate": 6.610304276217392e-05,
"loss": 1.6407,
"step": 468
},
{
"epoch": 1.24,
"learning_rate": 6.596741604996897e-05,
"loss": 1.6296,
"step": 469
},
{
"epoch": 1.25,
"learning_rate": 6.583165838314095e-05,
"loss": 1.6393,
"step": 470
},
{
"epoch": 1.25,
"learning_rate": 6.569577087508814e-05,
"loss": 1.5851,
"step": 471
},
{
"epoch": 1.25,
"learning_rate": 6.555975464027375e-05,
"loss": 1.5772,
"step": 472
},
{
"epoch": 1.25,
"learning_rate": 6.542361079421669e-05,
"loss": 1.5792,
"step": 473
},
{
"epoch": 1.26,
"learning_rate": 6.528734045348248e-05,
"loss": 1.5866,
"step": 474
},
{
"epoch": 1.26,
"learning_rate": 6.515094473567407e-05,
"loss": 1.5141,
"step": 475
},
{
"epoch": 1.26,
"learning_rate": 6.501442475942265e-05,
"loss": 1.5783,
"step": 476
},
{
"epoch": 1.27,
"learning_rate": 6.48777816443785e-05,
"loss": 1.5052,
"step": 477
},
{
"epoch": 1.27,
"learning_rate": 6.474101651120184e-05,
"loss": 1.5681,
"step": 478
},
{
"epoch": 1.27,
"learning_rate": 6.460413048155355e-05,
"loss": 1.6441,
"step": 479
},
{
"epoch": 1.27,
"learning_rate": 6.446712467808608e-05,
"loss": 1.5737,
"step": 480
},
{
"epoch": 1.28,
"learning_rate": 6.433000022443419e-05,
"loss": 1.5541,
"step": 481
},
{
"epoch": 1.28,
"learning_rate": 6.419275824520568e-05,
"loss": 1.5573,
"step": 482
},
{
"epoch": 1.28,
"learning_rate": 6.405539986597225e-05,
"loss": 1.5178,
"step": 483
},
{
"epoch": 1.28,
"learning_rate": 6.391792621326027e-05,
"loss": 1.5345,
"step": 484
},
{
"epoch": 1.29,
"learning_rate": 6.378033841454147e-05,
"loss": 1.6092,
"step": 485
},
{
"epoch": 1.29,
"learning_rate": 6.364263759822371e-05,
"loss": 1.5439,
"step": 486
},
{
"epoch": 1.29,
"learning_rate": 6.350482489364186e-05,
"loss": 1.547,
"step": 487
},
{
"epoch": 1.29,
"learning_rate": 6.336690143104827e-05,
"loss": 1.5803,
"step": 488
},
{
"epoch": 1.3,
"learning_rate": 6.322886834160378e-05,
"loss": 1.584,
"step": 489
},
{
"epoch": 1.3,
"learning_rate": 6.309072675736827e-05,
"loss": 1.46,
"step": 490
},
{
"epoch": 1.3,
"learning_rate": 6.29524778112914e-05,
"loss": 1.5754,
"step": 491
},
{
"epoch": 1.31,
"learning_rate": 6.281412263720344e-05,
"loss": 1.5056,
"step": 492
},
{
"epoch": 1.31,
"learning_rate": 6.267566236980574e-05,
"loss": 1.5539,
"step": 493
},
{
"epoch": 1.31,
"learning_rate": 6.253709814466168e-05,
"loss": 1.5229,
"step": 494
},
{
"epoch": 1.31,
"learning_rate": 6.239843109818716e-05,
"loss": 1.4894,
"step": 495
},
{
"epoch": 1.32,
"learning_rate": 6.22596623676414e-05,
"loss": 1.5337,
"step": 496
},
{
"epoch": 1.32,
"learning_rate": 6.212079309111753e-05,
"loss": 1.592,
"step": 497
},
{
"epoch": 1.32,
"learning_rate": 6.19818244075333e-05,
"loss": 1.4937,
"step": 498
},
{
"epoch": 1.32,
"learning_rate": 6.18427574566218e-05,
"loss": 1.5862,
"step": 499
},
{
"epoch": 1.33,
"learning_rate": 6.170359337892194e-05,
"loss": 1.5252,
"step": 500
},
{
"epoch": 1.33,
"learning_rate": 6.156433331576927e-05,
"loss": 1.5639,
"step": 501
},
{
"epoch": 1.33,
"learning_rate": 6.142497840928656e-05,
"loss": 1.5306,
"step": 502
},
{
"epoch": 1.33,
"learning_rate": 6.128552980237437e-05,
"loss": 1.6537,
"step": 503
},
{
"epoch": 1.34,
"learning_rate": 6.114598863870177e-05,
"loss": 1.4589,
"step": 504
},
{
"epoch": 1.34,
"learning_rate": 6.100635606269694e-05,
"loss": 1.5472,
"step": 505
},
{
"epoch": 1.34,
"learning_rate": 6.0866633219537694e-05,
"loss": 1.5372,
"step": 506
},
{
"epoch": 1.34,
"learning_rate": 6.0726821255142255e-05,
"loss": 1.4852,
"step": 507
},
{
"epoch": 1.35,
"learning_rate": 6.058692131615968e-05,
"loss": 1.509,
"step": 508
},
{
"epoch": 1.35,
"learning_rate": 6.04469345499606e-05,
"loss": 1.5736,
"step": 509
},
{
"epoch": 1.35,
"learning_rate": 6.0306862104627705e-05,
"loss": 1.5348,
"step": 510
},
{
"epoch": 1.36,
"learning_rate": 6.0166705128946375e-05,
"loss": 1.5519,
"step": 511
},
{
"epoch": 1.36,
"learning_rate": 6.00264647723953e-05,
"loss": 1.5526,
"step": 512
},
{
"epoch": 1.36,
"learning_rate": 5.988614218513693e-05,
"loss": 1.5908,
"step": 513
},
{
"epoch": 1.36,
"learning_rate": 5.974573851800818e-05,
"loss": 1.5455,
"step": 514
},
{
"epoch": 1.37,
"learning_rate": 5.9605254922510926e-05,
"loss": 1.5317,
"step": 515
},
{
"epoch": 1.37,
"learning_rate": 5.946469255080251e-05,
"loss": 1.5962,
"step": 516
},
{
"epoch": 1.37,
"learning_rate": 5.9324052555686436e-05,
"loss": 1.6437,
"step": 517
},
{
"epoch": 1.37,
"learning_rate": 5.918333609060276e-05,
"loss": 1.5306,
"step": 518
},
{
"epoch": 1.38,
"learning_rate": 5.9042544309618694e-05,
"loss": 1.5289,
"step": 519
},
{
"epoch": 1.38,
"learning_rate": 5.890167836741919e-05,
"loss": 1.5338,
"step": 520
},
{
"epoch": 1.38,
"learning_rate": 5.8760739419297384e-05,
"loss": 1.6154,
"step": 521
},
{
"epoch": 1.38,
"learning_rate": 5.861972862114518e-05,
"loss": 1.5108,
"step": 522
},
{
"epoch": 1.39,
"learning_rate": 5.847864712944373e-05,
"loss": 1.5818,
"step": 523
},
{
"epoch": 1.39,
"learning_rate": 5.833749610125402e-05,
"loss": 1.6317,
"step": 524
},
{
"epoch": 1.39,
"learning_rate": 5.819627669420724e-05,
"loss": 1.5724,
"step": 525
},
{
"epoch": 1.4,
"learning_rate": 5.805499006649547e-05,
"loss": 1.5023,
"step": 526
},
{
"epoch": 1.4,
"learning_rate": 5.791363737686205e-05,
"loss": 1.5374,
"step": 527
},
{
"epoch": 1.4,
"learning_rate": 5.7772219784592105e-05,
"loss": 1.5141,
"step": 528
},
{
"epoch": 1.4,
"learning_rate": 5.76307384495031e-05,
"loss": 1.6443,
"step": 529
},
{
"epoch": 1.41,
"learning_rate": 5.748919453193521e-05,
"loss": 1.5954,
"step": 530
},
{
"epoch": 1.41,
"learning_rate": 5.734758919274192e-05,
"loss": 1.6019,
"step": 531
},
{
"epoch": 1.41,
"learning_rate": 5.720592359328047e-05,
"loss": 1.6241,
"step": 532
},
{
"epoch": 1.41,
"learning_rate": 5.706419889540225e-05,
"loss": 1.5813,
"step": 533
},
{
"epoch": 1.42,
"learning_rate": 5.69224162614434e-05,
"loss": 1.518,
"step": 534
},
{
"epoch": 1.42,
"learning_rate": 5.6780576854215195e-05,
"loss": 1.5473,
"step": 535
},
{
"epoch": 1.42,
"learning_rate": 5.6638681836994535e-05,
"loss": 1.6277,
"step": 536
},
{
"epoch": 1.42,
"learning_rate": 5.649673237351436e-05,
"loss": 1.6213,
"step": 537
},
{
"epoch": 1.43,
"learning_rate": 5.6354729627954195e-05,
"loss": 1.5182,
"step": 538
},
{
"epoch": 1.43,
"learning_rate": 5.621267476493053e-05,
"loss": 1.6186,
"step": 539
},
{
"epoch": 1.43,
"learning_rate": 5.607056894948728e-05,
"loss": 1.5195,
"step": 540
},
{
"epoch": 1.44,
"learning_rate": 5.592841334708624e-05,
"loss": 1.5293,
"step": 541
},
{
"epoch": 1.44,
"learning_rate": 5.578620912359758e-05,
"loss": 1.6225,
"step": 542
},
{
"epoch": 1.44,
"learning_rate": 5.564395744529012e-05,
"loss": 1.5548,
"step": 543
},
{
"epoch": 1.44,
"learning_rate": 5.5501659478821964e-05,
"loss": 1.556,
"step": 544
},
{
"epoch": 1.45,
"learning_rate": 5.535931639123083e-05,
"loss": 1.4946,
"step": 545
},
{
"epoch": 1.45,
"learning_rate": 5.521692934992447e-05,
"loss": 1.5343,
"step": 546
},
{
"epoch": 1.45,
"learning_rate": 5.5074499522671106e-05,
"loss": 1.5353,
"step": 547
},
{
"epoch": 1.45,
"learning_rate": 5.493202807758992e-05,
"loss": 1.5644,
"step": 548
},
{
"epoch": 1.46,
"learning_rate": 5.478951618314133e-05,
"loss": 1.4671,
"step": 549
},
{
"epoch": 1.46,
"learning_rate": 5.464696500811757e-05,
"loss": 1.553,
"step": 550
},
{
"epoch": 1.46,
"learning_rate": 5.450437572163298e-05,
"loss": 1.5658,
"step": 551
},
{
"epoch": 1.46,
"learning_rate": 5.4361749493114514e-05,
"loss": 1.5448,
"step": 552
},
{
"epoch": 1.47,
"learning_rate": 5.4219087492292054e-05,
"loss": 1.5305,
"step": 553
},
{
"epoch": 1.47,
"learning_rate": 5.407639088918888e-05,
"loss": 1.5567,
"step": 554
},
{
"epoch": 1.47,
"learning_rate": 5.3933660854112075e-05,
"loss": 1.5312,
"step": 555
},
{
"epoch": 1.47,
"learning_rate": 5.37908985576429e-05,
"loss": 1.4669,
"step": 556
},
{
"epoch": 1.48,
"learning_rate": 5.364810517062717e-05,
"loss": 1.6714,
"step": 557
},
{
"epoch": 1.48,
"learning_rate": 5.350528186416573e-05,
"loss": 1.5867,
"step": 558
},
{
"epoch": 1.48,
"learning_rate": 5.3362429809604806e-05,
"loss": 1.5232,
"step": 559
},
{
"epoch": 1.49,
"learning_rate": 5.321955017852637e-05,
"loss": 1.5636,
"step": 560
},
{
"epoch": 1.49,
"learning_rate": 5.307664414273855e-05,
"loss": 1.4686,
"step": 561
},
{
"epoch": 1.49,
"learning_rate": 5.2933712874266084e-05,
"loss": 1.5301,
"step": 562
},
{
"epoch": 1.49,
"learning_rate": 5.2790757545340586e-05,
"loss": 1.5631,
"step": 563
},
{
"epoch": 1.5,
"learning_rate": 5.2647779328391045e-05,
"loss": 1.5805,
"step": 564
},
{
"epoch": 1.5,
"learning_rate": 5.2504779396034146e-05,
"loss": 1.6171,
"step": 565
},
{
"epoch": 1.5,
"learning_rate": 5.236175892106467e-05,
"loss": 1.5264,
"step": 566
},
{
"epoch": 1.5,
"learning_rate": 5.221871907644589e-05,
"loss": 1.5189,
"step": 567
},
{
"epoch": 1.51,
"learning_rate": 5.207566103529991e-05,
"loss": 1.5974,
"step": 568
},
{
"epoch": 1.51,
"learning_rate": 5.1932585970898096e-05,
"loss": 1.5221,
"step": 569
},
{
"epoch": 1.51,
"learning_rate": 5.17894950566514e-05,
"loss": 1.5471,
"step": 570
},
{
"epoch": 1.51,
"learning_rate": 5.1646389466100795e-05,
"loss": 1.521,
"step": 571
},
{
"epoch": 1.52,
"learning_rate": 5.150327037290761e-05,
"loss": 1.5258,
"step": 572
},
{
"epoch": 1.52,
"learning_rate": 5.136013895084388e-05,
"loss": 1.4685,
"step": 573
},
{
"epoch": 1.52,
"learning_rate": 5.121699637378282e-05,
"loss": 1.5678,
"step": 574
},
{
"epoch": 1.53,
"learning_rate": 5.107384381568907e-05,
"loss": 1.4684,
"step": 575
},
{
"epoch": 1.53,
"learning_rate": 5.093068245060917e-05,
"loss": 1.4688,
"step": 576
},
{
"epoch": 1.53,
"learning_rate": 5.0787513452661864e-05,
"loss": 1.566,
"step": 577
},
{
"epoch": 1.53,
"learning_rate": 5.064433799602849e-05,
"loss": 1.5323,
"step": 578
},
{
"epoch": 1.54,
"learning_rate": 5.05011572549434e-05,
"loss": 1.581,
"step": 579
},
{
"epoch": 1.54,
"learning_rate": 5.0357972403684225e-05,
"loss": 1.5065,
"step": 580
},
{
"epoch": 1.54,
"learning_rate": 5.021478461656235e-05,
"loss": 1.5708,
"step": 581
},
{
"epoch": 1.54,
"learning_rate": 5.007159506791325e-05,
"loss": 1.5121,
"step": 582
},
{
"epoch": 1.55,
"learning_rate": 4.992840493208676e-05,
"loss": 1.5743,
"step": 583
},
{
"epoch": 1.55,
"learning_rate": 4.9785215383437646e-05,
"loss": 1.5861,
"step": 584
},
{
"epoch": 1.55,
"learning_rate": 4.9642027596315786e-05,
"loss": 1.5671,
"step": 585
},
{
"epoch": 1.55,
"learning_rate": 4.949884274505661e-05,
"loss": 1.5105,
"step": 586
},
{
"epoch": 1.56,
"learning_rate": 4.935566200397152e-05,
"loss": 1.5658,
"step": 587
},
{
"epoch": 1.56,
"learning_rate": 4.921248654733814e-05,
"loss": 1.5483,
"step": 588
},
{
"epoch": 1.56,
"learning_rate": 4.906931754939084e-05,
"loss": 1.567,
"step": 589
},
{
"epoch": 1.56,
"learning_rate": 4.8926156184310946e-05,
"loss": 1.5763,
"step": 590
},
{
"epoch": 1.57,
"learning_rate": 4.878300362621719e-05,
"loss": 1.5044,
"step": 591
},
{
"epoch": 1.57,
"learning_rate": 4.8639861049156136e-05,
"loss": 1.5653,
"step": 592
},
{
"epoch": 1.57,
"learning_rate": 4.8496729627092405e-05,
"loss": 1.5588,
"step": 593
},
{
"epoch": 1.58,
"learning_rate": 4.835361053389922e-05,
"loss": 1.4821,
"step": 594
},
{
"epoch": 1.58,
"learning_rate": 4.821050494334861e-05,
"loss": 1.6273,
"step": 595
},
{
"epoch": 1.58,
"learning_rate": 4.806741402910193e-05,
"loss": 1.4818,
"step": 596
},
{
"epoch": 1.58,
"learning_rate": 4.7924338964700096e-05,
"loss": 1.4659,
"step": 597
},
{
"epoch": 1.59,
"learning_rate": 4.778128092355412e-05,
"loss": 1.5297,
"step": 598
},
{
"epoch": 1.59,
"learning_rate": 4.7638241078935325e-05,
"loss": 1.585,
"step": 599
},
{
"epoch": 1.59,
"learning_rate": 4.7495220603965866e-05,
"loss": 1.4958,
"step": 600
},
{
"epoch": 1.59,
"learning_rate": 4.735222067160896e-05,
"loss": 1.5098,
"step": 601
},
{
"epoch": 1.6,
"learning_rate": 4.720924245465943e-05,
"loss": 1.6065,
"step": 602
},
{
"epoch": 1.6,
"learning_rate": 4.706628712573394e-05,
"loss": 1.5091,
"step": 603
},
{
"epoch": 1.6,
"learning_rate": 4.6923355857261455e-05,
"loss": 1.4611,
"step": 604
},
{
"epoch": 1.6,
"learning_rate": 4.678044982147365e-05,
"loss": 1.5287,
"step": 605
},
{
"epoch": 1.61,
"learning_rate": 4.6637570190395205e-05,
"loss": 1.5573,
"step": 606
},
{
"epoch": 1.61,
"learning_rate": 4.649471813583427e-05,
"loss": 1.6371,
"step": 607
},
{
"epoch": 1.61,
"learning_rate": 4.635189482937284e-05,
"loss": 1.5336,
"step": 608
},
{
"epoch": 1.62,
"learning_rate": 4.620910144235712e-05,
"loss": 1.5559,
"step": 609
},
{
"epoch": 1.62,
"learning_rate": 4.606633914588793e-05,
"loss": 1.5399,
"step": 610
},
{
"epoch": 1.62,
"learning_rate": 4.592360911081113e-05,
"loss": 1.487,
"step": 611
},
{
"epoch": 1.62,
"learning_rate": 4.5780912507707944e-05,
"loss": 1.5583,
"step": 612
},
{
"epoch": 1.63,
"learning_rate": 4.563825050688549e-05,
"loss": 1.5271,
"step": 613
},
{
"epoch": 1.63,
"learning_rate": 4.549562427836701e-05,
"loss": 1.5934,
"step": 614
},
{
"epoch": 1.63,
"learning_rate": 4.535303499188244e-05,
"loss": 1.5261,
"step": 615
},
{
"epoch": 1.63,
"learning_rate": 4.5210483816858676e-05,
"loss": 1.6577,
"step": 616
},
{
"epoch": 1.64,
"learning_rate": 4.506797192241009e-05,
"loss": 1.4575,
"step": 617
},
{
"epoch": 1.64,
"learning_rate": 4.49255004773289e-05,
"loss": 1.4948,
"step": 618
},
{
"epoch": 1.64,
"learning_rate": 4.478307065007554e-05,
"loss": 1.4523,
"step": 619
},
{
"epoch": 1.64,
"learning_rate": 4.464068360876919e-05,
"loss": 1.6135,
"step": 620
},
{
"epoch": 1.65,
"learning_rate": 4.449834052117804e-05,
"loss": 1.5568,
"step": 621
},
{
"epoch": 1.65,
"learning_rate": 4.4356042554709905e-05,
"loss": 1.5823,
"step": 622
},
{
"epoch": 1.65,
"learning_rate": 4.421379087640244e-05,
"loss": 1.664,
"step": 623
},
{
"epoch": 1.66,
"learning_rate": 4.407158665291377e-05,
"loss": 1.5322,
"step": 624
},
{
"epoch": 1.66,
"learning_rate": 4.3929431050512727e-05,
"loss": 1.5811,
"step": 625
},
{
"epoch": 1.66,
"learning_rate": 4.3787325235069487e-05,
"loss": 1.5768,
"step": 626
},
{
"epoch": 1.66,
"learning_rate": 4.36452703720458e-05,
"loss": 1.5219,
"step": 627
},
{
"epoch": 1.67,
"learning_rate": 4.350326762648565e-05,
"loss": 1.5525,
"step": 628
},
{
"epoch": 1.67,
"learning_rate": 4.3361318163005484e-05,
"loss": 1.477,
"step": 629
},
{
"epoch": 1.67,
"learning_rate": 4.321942314578482e-05,
"loss": 1.524,
"step": 630
},
{
"epoch": 1.67,
"learning_rate": 4.307758373855661e-05,
"loss": 1.5741,
"step": 631
},
{
"epoch": 1.68,
"learning_rate": 4.293580110459776e-05,
"loss": 1.531,
"step": 632
},
{
"epoch": 1.68,
"learning_rate": 4.279407640671956e-05,
"loss": 1.5424,
"step": 633
},
{
"epoch": 1.68,
"learning_rate": 4.265241080725808e-05,
"loss": 1.471,
"step": 634
},
{
"epoch": 1.68,
"learning_rate": 4.251080546806481e-05,
"loss": 1.5149,
"step": 635
},
{
"epoch": 1.69,
"learning_rate": 4.2369261550496905e-05,
"loss": 1.5289,
"step": 636
},
{
"epoch": 1.69,
"learning_rate": 4.22277802154079e-05,
"loss": 1.455,
"step": 637
},
{
"epoch": 1.69,
"learning_rate": 4.2086362623137955e-05,
"loss": 1.5351,
"step": 638
},
{
"epoch": 1.69,
"learning_rate": 4.194500993350454e-05,
"loss": 1.5747,
"step": 639
},
{
"epoch": 1.7,
"learning_rate": 4.180372330579276e-05,
"loss": 1.5356,
"step": 640
},
{
"epoch": 1.7,
"learning_rate": 4.1662503898745994e-05,
"loss": 1.4969,
"step": 641
},
{
"epoch": 1.7,
"learning_rate": 4.1521352870556266e-05,
"loss": 1.5077,
"step": 642
},
{
"epoch": 1.71,
"learning_rate": 4.1380271378854833e-05,
"loss": 1.5598,
"step": 643
},
{
"epoch": 1.71,
"learning_rate": 4.1239260580702635e-05,
"loss": 1.5431,
"step": 644
},
{
"epoch": 1.71,
"learning_rate": 4.1098321632580824e-05,
"loss": 1.5806,
"step": 645
},
{
"epoch": 1.71,
"learning_rate": 4.095745569038133e-05,
"loss": 1.4687,
"step": 646
},
{
"epoch": 1.72,
"learning_rate": 4.0816663909397256e-05,
"loss": 1.534,
"step": 647
},
{
"epoch": 1.72,
"learning_rate": 4.067594744431358e-05,
"loss": 1.5602,
"step": 648
},
{
"epoch": 1.72,
"learning_rate": 4.053530744919749e-05,
"loss": 1.5434,
"step": 649
},
{
"epoch": 1.72,
"learning_rate": 4.03947450774891e-05,
"loss": 1.4529,
"step": 650
},
{
"epoch": 1.73,
"learning_rate": 4.0254261481991825e-05,
"loss": 1.5127,
"step": 651
},
{
"epoch": 1.73,
"learning_rate": 4.011385781486308e-05,
"loss": 1.5195,
"step": 652
},
{
"epoch": 1.73,
"learning_rate": 3.9973535227604714e-05,
"loss": 1.5714,
"step": 653
},
{
"epoch": 1.73,
"learning_rate": 3.983329487105364e-05,
"loss": 1.5864,
"step": 654
},
{
"epoch": 1.74,
"learning_rate": 3.96931378953723e-05,
"loss": 1.5457,
"step": 655
},
{
"epoch": 1.74,
"learning_rate": 3.955306545003941e-05,
"loss": 1.5544,
"step": 656
},
{
"epoch": 1.74,
"learning_rate": 3.941307868384034e-05,
"loss": 1.5802,
"step": 657
},
{
"epoch": 1.75,
"learning_rate": 3.927317874485776e-05,
"loss": 1.4793,
"step": 658
},
{
"epoch": 1.75,
"learning_rate": 3.9133366780462325e-05,
"loss": 1.5746,
"step": 659
},
{
"epoch": 1.75,
"learning_rate": 3.899364393730308e-05,
"loss": 1.5031,
"step": 660
},
{
"epoch": 1.75,
"learning_rate": 3.8854011361298246e-05,
"loss": 1.5029,
"step": 661
},
{
"epoch": 1.76,
"learning_rate": 3.871447019762564e-05,
"loss": 1.4952,
"step": 662
},
{
"epoch": 1.76,
"learning_rate": 3.857502159071346e-05,
"loss": 1.556,
"step": 663
},
{
"epoch": 1.76,
"learning_rate": 3.843566668423073e-05,
"loss": 1.5939,
"step": 664
},
{
"epoch": 1.76,
"learning_rate": 3.829640662107807e-05,
"loss": 1.5231,
"step": 665
},
{
"epoch": 1.77,
"learning_rate": 3.8157242543378205e-05,
"loss": 1.543,
"step": 666
},
{
"epoch": 1.77,
"learning_rate": 3.8018175592466695e-05,
"loss": 1.5051,
"step": 667
},
{
"epoch": 1.77,
"learning_rate": 3.787920690888248e-05,
"loss": 1.4483,
"step": 668
},
{
"epoch": 1.77,
"learning_rate": 3.7740337632358616e-05,
"loss": 1.5926,
"step": 669
},
{
"epoch": 1.78,
"learning_rate": 3.760156890181283e-05,
"loss": 1.5499,
"step": 670
},
{
"epoch": 1.78,
"learning_rate": 3.746290185533833e-05,
"loss": 1.6084,
"step": 671
},
{
"epoch": 1.78,
"learning_rate": 3.732433763019428e-05,
"loss": 1.4915,
"step": 672
},
{
"epoch": 1.79,
"learning_rate": 3.718587736279658e-05,
"loss": 1.5149,
"step": 673
},
{
"epoch": 1.79,
"learning_rate": 3.704752218870861e-05,
"loss": 1.5557,
"step": 674
},
{
"epoch": 1.79,
"learning_rate": 3.690927324263175e-05,
"loss": 1.4818,
"step": 675
},
{
"epoch": 1.79,
"learning_rate": 3.677113165839623e-05,
"loss": 1.4723,
"step": 676
},
{
"epoch": 1.8,
"learning_rate": 3.663309856895174e-05,
"loss": 1.4855,
"step": 677
},
{
"epoch": 1.8,
"learning_rate": 3.6495175106358154e-05,
"loss": 1.5185,
"step": 678
},
{
"epoch": 1.8,
"learning_rate": 3.6357362401776277e-05,
"loss": 1.5155,
"step": 679
},
{
"epoch": 1.8,
"learning_rate": 3.621966158545855e-05,
"loss": 1.5517,
"step": 680
},
{
"epoch": 1.81,
"learning_rate": 3.608207378673973e-05,
"loss": 1.4894,
"step": 681
},
{
"epoch": 1.81,
"learning_rate": 3.594460013402775e-05,
"loss": 1.4591,
"step": 682
},
{
"epoch": 1.81,
"learning_rate": 3.580724175479432e-05,
"loss": 1.5306,
"step": 683
},
{
"epoch": 1.81,
"learning_rate": 3.566999977556582e-05,
"loss": 1.4702,
"step": 684
},
{
"epoch": 1.82,
"learning_rate": 3.5532875321913935e-05,
"loss": 1.6138,
"step": 685
},
{
"epoch": 1.82,
"learning_rate": 3.5395869518446464e-05,
"loss": 1.4431,
"step": 686
},
{
"epoch": 1.82,
"learning_rate": 3.525898348879819e-05,
"loss": 1.6268,
"step": 687
},
{
"epoch": 1.82,
"learning_rate": 3.5122218355621514e-05,
"loss": 1.5443,
"step": 688
},
{
"epoch": 1.83,
"learning_rate": 3.4985575240577365e-05,
"loss": 1.6156,
"step": 689
},
{
"epoch": 1.83,
"learning_rate": 3.484905526432594e-05,
"loss": 1.5196,
"step": 690
},
{
"epoch": 1.83,
"learning_rate": 3.471265954651752e-05,
"loss": 1.5153,
"step": 691
},
{
"epoch": 1.84,
"learning_rate": 3.457638920578331e-05,
"loss": 1.5637,
"step": 692
},
{
"epoch": 1.84,
"learning_rate": 3.4440245359726266e-05,
"loss": 1.555,
"step": 693
},
{
"epoch": 1.84,
"learning_rate": 3.4304229124911856e-05,
"loss": 1.5495,
"step": 694
},
{
"epoch": 1.84,
"learning_rate": 3.416834161685907e-05,
"loss": 1.5596,
"step": 695
},
{
"epoch": 1.85,
"learning_rate": 3.403258395003102e-05,
"loss": 1.5496,
"step": 696
},
{
"epoch": 1.85,
"learning_rate": 3.389695723782609e-05,
"loss": 1.5649,
"step": 697
},
{
"epoch": 1.85,
"learning_rate": 3.376146259256855e-05,
"loss": 1.5552,
"step": 698
},
{
"epoch": 1.85,
"learning_rate": 3.3626101125499555e-05,
"loss": 1.5355,
"step": 699
},
{
"epoch": 1.86,
"learning_rate": 3.349087394676809e-05,
"loss": 1.5022,
"step": 700
},
{
"epoch": 1.86,
"learning_rate": 3.33557821654217e-05,
"loss": 1.527,
"step": 701
},
{
"epoch": 1.86,
"learning_rate": 3.322082688939755e-05,
"loss": 1.5452,
"step": 702
},
{
"epoch": 1.86,
"learning_rate": 3.308600922551324e-05,
"loss": 1.5208,
"step": 703
},
{
"epoch": 1.87,
"learning_rate": 3.295133027945778e-05,
"loss": 1.47,
"step": 704
},
{
"epoch": 1.87,
"learning_rate": 3.281679115578249e-05,
"loss": 1.5202,
"step": 705
},
{
"epoch": 1.87,
"learning_rate": 3.2682392957891985e-05,
"loss": 1.4507,
"step": 706
},
{
"epoch": 1.88,
"learning_rate": 3.254813678803504e-05,
"loss": 1.6117,
"step": 707
},
{
"epoch": 1.88,
"learning_rate": 3.241402374729569e-05,
"loss": 1.6149,
"step": 708
},
{
"epoch": 1.88,
"learning_rate": 3.2280054935584025e-05,
"loss": 1.5947,
"step": 709
},
{
"epoch": 1.88,
"learning_rate": 3.2146231451627334e-05,
"loss": 1.5165,
"step": 710
},
{
"epoch": 1.89,
"learning_rate": 3.2012554392960966e-05,
"loss": 1.4893,
"step": 711
},
{
"epoch": 1.89,
"learning_rate": 3.187902485591941e-05,
"loss": 1.6028,
"step": 712
},
{
"epoch": 1.89,
"learning_rate": 3.174564393562728e-05,
"loss": 1.5429,
"step": 713
},
{
"epoch": 1.89,
"learning_rate": 3.161241272599031e-05,
"loss": 1.5214,
"step": 714
},
{
"epoch": 1.9,
"learning_rate": 3.147933231968642e-05,
"loss": 1.5541,
"step": 715
},
{
"epoch": 1.9,
"learning_rate": 3.1346403808156713e-05,
"loss": 1.5747,
"step": 716
},
{
"epoch": 1.9,
"learning_rate": 3.121362828159659e-05,
"loss": 1.5768,
"step": 717
},
{
"epoch": 1.9,
"learning_rate": 3.108100682894671e-05,
"loss": 1.6119,
"step": 718
},
{
"epoch": 1.91,
"learning_rate": 3.094854053788418e-05,
"loss": 1.577,
"step": 719
},
{
"epoch": 1.91,
"learning_rate": 3.08162304948135e-05,
"loss": 1.5888,
"step": 720
},
{
"epoch": 1.91,
"learning_rate": 3.06840777848578e-05,
"loss": 1.5093,
"step": 721
},
{
"epoch": 1.92,
"learning_rate": 3.055208349184977e-05,
"loss": 1.4787,
"step": 722
},
{
"epoch": 1.92,
"learning_rate": 3.0420248698322973e-05,
"loss": 1.5513,
"step": 723
},
{
"epoch": 1.92,
"learning_rate": 3.0288574485502757e-05,
"loss": 1.594,
"step": 724
},
{
"epoch": 1.92,
"learning_rate": 3.015706193329757e-05,
"loss": 1.5548,
"step": 725
},
{
"epoch": 1.93,
"learning_rate": 3.002571212028995e-05,
"loss": 1.5783,
"step": 726
},
{
"epoch": 1.93,
"learning_rate": 2.9894526123727808e-05,
"loss": 1.5001,
"step": 727
},
{
"epoch": 1.93,
"learning_rate": 2.9763505019515525e-05,
"loss": 1.5542,
"step": 728
},
{
"epoch": 1.93,
"learning_rate": 2.9632649882205088e-05,
"loss": 1.5134,
"step": 729
},
{
"epoch": 1.94,
"learning_rate": 2.950196178498743e-05,
"loss": 1.5232,
"step": 730
},
{
"epoch": 1.94,
"learning_rate": 2.937144179968342e-05,
"loss": 1.4753,
"step": 731
},
{
"epoch": 1.94,
"learning_rate": 2.9241090996735266e-05,
"loss": 1.4371,
"step": 732
},
{
"epoch": 1.94,
"learning_rate": 2.911091044519757e-05,
"loss": 1.5026,
"step": 733
},
{
"epoch": 1.95,
"learning_rate": 2.8980901212728728e-05,
"loss": 1.5565,
"step": 734
},
{
"epoch": 1.95,
"learning_rate": 2.8851064365581982e-05,
"loss": 1.509,
"step": 735
},
{
"epoch": 1.95,
"learning_rate": 2.8721400968596903e-05,
"loss": 1.5417,
"step": 736
},
{
"epoch": 1.95,
"learning_rate": 2.8591912085190392e-05,
"loss": 1.4827,
"step": 737
},
{
"epoch": 1.96,
"learning_rate": 2.8462598777348247e-05,
"loss": 1.5347,
"step": 738
},
{
"epoch": 1.96,
"learning_rate": 2.8333462105616194e-05,
"loss": 1.5072,
"step": 739
},
{
"epoch": 1.96,
"learning_rate": 2.820450312909134e-05,
"loss": 1.4506,
"step": 740
},
{
"epoch": 1.97,
"learning_rate": 2.807572290541346e-05,
"loss": 1.5673,
"step": 741
},
{
"epoch": 1.97,
"learning_rate": 2.79471224907563e-05,
"loss": 1.5108,
"step": 742
},
{
"epoch": 1.97,
"learning_rate": 2.781870293981893e-05,
"loss": 1.4845,
"step": 743
},
{
"epoch": 1.97,
"learning_rate": 2.7690465305817088e-05,
"loss": 1.5846,
"step": 744
},
{
"epoch": 1.98,
"learning_rate": 2.756241064047456e-05,
"loss": 1.5504,
"step": 745
},
{
"epoch": 1.98,
"learning_rate": 2.7434539994014475e-05,
"loss": 1.5451,
"step": 746
},
{
"epoch": 1.98,
"learning_rate": 2.730685441515088e-05,
"loss": 1.4817,
"step": 747
},
{
"epoch": 1.98,
"learning_rate": 2.7179354951079856e-05,
"loss": 1.4819,
"step": 748
},
{
"epoch": 1.99,
"learning_rate": 2.7052042647471252e-05,
"loss": 1.5487,
"step": 749
},
{
"epoch": 1.99,
"learning_rate": 2.69249185484598e-05,
"loss": 1.4851,
"step": 750
},
{
"epoch": 1.99,
"learning_rate": 2.679798369663683e-05,
"loss": 1.5208,
"step": 751
},
{
"epoch": 1.99,
"learning_rate": 2.667123913304146e-05,
"loss": 1.536,
"step": 752
},
{
"epoch": 2.0,
"learning_rate": 2.6544685897152272e-05,
"loss": 1.5505,
"step": 753
},
{
"epoch": 2.0,
"learning_rate": 2.6418325026878665e-05,
"loss": 1.6026,
"step": 754
},
{
"epoch": 2.0,
"learning_rate": 2.629215755855239e-05,
"loss": 1.4181,
"step": 755
},
{
"epoch": 2.01,
"learning_rate": 2.6166184526919047e-05,
"loss": 1.4751,
"step": 756
},
{
"epoch": 2.01,
"learning_rate": 2.6040406965129515e-05,
"loss": 1.4894,
"step": 757
},
{
"epoch": 2.01,
"learning_rate": 2.5914825904731686e-05,
"loss": 1.5007,
"step": 758
},
{
"epoch": 2.01,
"learning_rate": 2.5789442375661744e-05,
"loss": 1.372,
"step": 759
},
{
"epoch": 2.02,
"learning_rate": 2.5664257406235955e-05,
"loss": 1.4389,
"step": 760
},
{
"epoch": 2.02,
"learning_rate": 2.5539272023141995e-05,
"loss": 1.4259,
"step": 761
},
{
"epoch": 2.02,
"learning_rate": 2.541448725143083e-05,
"loss": 1.4355,
"step": 762
},
{
"epoch": 2.02,
"learning_rate": 2.5289904114507946e-05,
"loss": 1.4497,
"step": 763
},
{
"epoch": 2.03,
"learning_rate": 2.516552363412534e-05,
"loss": 1.4206,
"step": 764
},
{
"epoch": 2.03,
"learning_rate": 2.504134683037278e-05,
"loss": 1.481,
"step": 765
},
{
"epoch": 2.03,
"learning_rate": 2.491737472166972e-05,
"loss": 1.4599,
"step": 766
},
{
"epoch": 2.03,
"learning_rate": 2.479360832475679e-05,
"loss": 1.5219,
"step": 767
},
{
"epoch": 2.04,
"learning_rate": 2.46700486546875e-05,
"loss": 1.4852,
"step": 768
},
{
"epoch": 2.04,
"learning_rate": 2.4546696724819963e-05,
"loss": 1.4385,
"step": 769
},
{
"epoch": 2.04,
"learning_rate": 2.4423553546808427e-05,
"loss": 1.4962,
"step": 770
},
{
"epoch": 2.05,
"learning_rate": 2.430062013059526e-05,
"loss": 1.441,
"step": 771
},
{
"epoch": 2.05,
"learning_rate": 2.4177897484402306e-05,
"loss": 1.4178,
"step": 772
},
{
"epoch": 2.05,
"learning_rate": 2.4055386614722996e-05,
"loss": 1.499,
"step": 773
},
{
"epoch": 2.05,
"learning_rate": 2.393308852631373e-05,
"loss": 1.4574,
"step": 774
},
{
"epoch": 2.06,
"learning_rate": 2.381100422218596e-05,
"loss": 1.4838,
"step": 775
},
{
"epoch": 2.06,
"learning_rate": 2.3689134703597706e-05,
"loss": 1.479,
"step": 776
},
{
"epoch": 2.06,
"learning_rate": 2.3567480970045492e-05,
"loss": 1.5401,
"step": 777
},
{
"epoch": 2.06,
"learning_rate": 2.344604401925613e-05,
"loss": 1.4839,
"step": 778
},
{
"epoch": 2.07,
"learning_rate": 2.3324824847178494e-05,
"loss": 1.4536,
"step": 779
},
{
"epoch": 2.07,
"learning_rate": 2.3203824447975392e-05,
"loss": 1.3847,
"step": 780
},
{
"epoch": 2.07,
"learning_rate": 2.308304381401534e-05,
"loss": 1.4686,
"step": 781
},
{
"epoch": 2.07,
"learning_rate": 2.296248393586459e-05,
"loss": 1.4785,
"step": 782
},
{
"epoch": 2.08,
"learning_rate": 2.284214580227875e-05,
"loss": 1.4651,
"step": 783
},
{
"epoch": 2.08,
"learning_rate": 2.2722030400194976e-05,
"loss": 1.4577,
"step": 784
},
{
"epoch": 2.08,
"learning_rate": 2.2602138714723574e-05,
"loss": 1.4656,
"step": 785
},
{
"epoch": 2.08,
"learning_rate": 2.24824717291402e-05,
"loss": 1.4736,
"step": 786
},
{
"epoch": 2.09,
"learning_rate": 2.2363030424877535e-05,
"loss": 1.4946,
"step": 787
},
{
"epoch": 2.09,
"learning_rate": 2.2243815781517496e-05,
"loss": 1.4902,
"step": 788
},
{
"epoch": 2.09,
"learning_rate": 2.2124828776782957e-05,
"loss": 1.3805,
"step": 789
},
{
"epoch": 2.1,
"learning_rate": 2.2006070386529913e-05,
"loss": 1.4926,
"step": 790
},
{
"epoch": 2.1,
"learning_rate": 2.1887541584739385e-05,
"loss": 1.4136,
"step": 791
},
{
"epoch": 2.1,
"learning_rate": 2.1769243343509454e-05,
"loss": 1.4177,
"step": 792
},
{
"epoch": 2.1,
"learning_rate": 2.165117663304732e-05,
"loss": 1.4555,
"step": 793
},
{
"epoch": 2.11,
"learning_rate": 2.153334242166123e-05,
"loss": 1.4362,
"step": 794
},
{
"epoch": 2.11,
"learning_rate": 2.1415741675752742e-05,
"loss": 1.4483,
"step": 795
},
{
"epoch": 2.11,
"learning_rate": 2.129837535980856e-05,
"loss": 1.3899,
"step": 796
},
{
"epoch": 2.11,
"learning_rate": 2.1181244436392855e-05,
"loss": 1.521,
"step": 797
},
{
"epoch": 2.12,
"learning_rate": 2.1064349866139132e-05,
"loss": 1.4221,
"step": 798
},
{
"epoch": 2.12,
"learning_rate": 2.094769260774262e-05,
"loss": 1.4968,
"step": 799
},
{
"epoch": 2.12,
"learning_rate": 2.0831273617952136e-05,
"loss": 1.4631,
"step": 800
},
{
"epoch": 2.12,
"learning_rate": 2.071509385156244e-05,
"loss": 1.4571,
"step": 801
},
{
"epoch": 2.13,
"learning_rate": 2.0599154261406316e-05,
"loss": 1.4922,
"step": 802
},
{
"epoch": 2.13,
"learning_rate": 2.0483455798346786e-05,
"loss": 1.4316,
"step": 803
},
{
"epoch": 2.13,
"learning_rate": 2.0367999411269285e-05,
"loss": 1.4226,
"step": 804
},
{
"epoch": 2.14,
"learning_rate": 2.0252786047073895e-05,
"loss": 1.4586,
"step": 805
},
{
"epoch": 2.14,
"learning_rate": 2.0137816650667612e-05,
"loss": 1.4131,
"step": 806
},
{
"epoch": 2.14,
"learning_rate": 2.0023092164956474e-05,
"loss": 1.4782,
"step": 807
},
{
"epoch": 2.14,
"learning_rate": 1.9908613530838055e-05,
"loss": 1.4648,
"step": 808
},
{
"epoch": 2.15,
"learning_rate": 1.979438168719346e-05,
"loss": 1.4328,
"step": 809
},
{
"epoch": 2.15,
"learning_rate": 1.968039757087991e-05,
"loss": 1.4804,
"step": 810
},
{
"epoch": 2.15,
"learning_rate": 1.9566662116722793e-05,
"loss": 1.5185,
"step": 811
},
{
"epoch": 2.15,
"learning_rate": 1.9453176257508275e-05,
"loss": 1.418,
"step": 812
},
{
"epoch": 2.16,
"learning_rate": 1.9339940923975364e-05,
"loss": 1.5342,
"step": 813
},
{
"epoch": 2.16,
"learning_rate": 1.9226957044808497e-05,
"loss": 1.4951,
"step": 814
},
{
"epoch": 2.16,
"learning_rate": 1.911422554662981e-05,
"loss": 1.5001,
"step": 815
},
{
"epoch": 2.16,
"learning_rate": 1.9001747353991582e-05,
"loss": 1.4289,
"step": 816
},
{
"epoch": 2.17,
"learning_rate": 1.888952338936864e-05,
"loss": 1.4779,
"step": 817
},
{
"epoch": 2.17,
"learning_rate": 1.8777554573150795e-05,
"loss": 1.4541,
"step": 818
},
{
"epoch": 2.17,
"learning_rate": 1.8665841823635284e-05,
"loss": 1.3708,
"step": 819
},
{
"epoch": 2.18,
"learning_rate": 1.855438605701925e-05,
"loss": 1.4434,
"step": 820
},
{
"epoch": 2.18,
"learning_rate": 1.8443188187392257e-05,
"loss": 1.4388,
"step": 821
},
{
"epoch": 2.18,
"learning_rate": 1.8332249126728666e-05,
"loss": 1.543,
"step": 822
},
{
"epoch": 2.18,
"learning_rate": 1.8221569784880397e-05,
"loss": 1.4487,
"step": 823
},
{
"epoch": 2.19,
"learning_rate": 1.811115106956918e-05,
"loss": 1.4323,
"step": 824
},
{
"epoch": 2.19,
"learning_rate": 1.8000993886379342e-05,
"loss": 1.4424,
"step": 825
},
{
"epoch": 2.19,
"learning_rate": 1.789109913875025e-05,
"loss": 1.3609,
"step": 826
},
{
"epoch": 2.19,
"learning_rate": 1.7781467727968953e-05,
"loss": 1.4008,
"step": 827
},
{
"epoch": 2.2,
"learning_rate": 1.7672100553162774e-05,
"loss": 1.4308,
"step": 828
},
{
"epoch": 2.2,
"learning_rate": 1.7562998511291946e-05,
"loss": 1.5258,
"step": 829
},
{
"epoch": 2.2,
"learning_rate": 1.745416249714224e-05,
"loss": 1.4535,
"step": 830
},
{
"epoch": 2.2,
"learning_rate": 1.734559340331765e-05,
"loss": 1.4607,
"step": 831
},
{
"epoch": 2.21,
"learning_rate": 1.7237292120233044e-05,
"loss": 1.3692,
"step": 832
},
{
"epoch": 2.21,
"learning_rate": 1.7129259536106885e-05,
"loss": 1.4383,
"step": 833
},
{
"epoch": 2.21,
"learning_rate": 1.702149653695395e-05,
"loss": 1.3952,
"step": 834
},
{
"epoch": 2.21,
"learning_rate": 1.691400400657799e-05,
"loss": 1.3934,
"step": 835
},
{
"epoch": 2.22,
"learning_rate": 1.6806782826564654e-05,
"loss": 1.4273,
"step": 836
},
{
"epoch": 2.22,
"learning_rate": 1.6699833876274028e-05,
"loss": 1.4847,
"step": 837
},
{
"epoch": 2.22,
"learning_rate": 1.6593158032833624e-05,
"loss": 1.4369,
"step": 838
},
{
"epoch": 2.23,
"learning_rate": 1.6486756171131063e-05,
"loss": 1.4289,
"step": 839
},
{
"epoch": 2.23,
"learning_rate": 1.638062916380697e-05,
"loss": 1.4509,
"step": 840
},
{
"epoch": 2.23,
"learning_rate": 1.627477788124776e-05,
"loss": 1.4375,
"step": 841
},
{
"epoch": 2.23,
"learning_rate": 1.6169203191578557e-05,
"loss": 1.5603,
"step": 842
},
{
"epoch": 2.24,
"learning_rate": 1.606390596065602e-05,
"loss": 1.5002,
"step": 843
},
{
"epoch": 2.24,
"learning_rate": 1.5958887052061283e-05,
"loss": 1.4442,
"step": 844
},
{
"epoch": 2.24,
"learning_rate": 1.5854147327092855e-05,
"loss": 1.4966,
"step": 845
},
{
"epoch": 2.24,
"learning_rate": 1.5749687644759552e-05,
"loss": 1.4576,
"step": 846
},
{
"epoch": 2.25,
"learning_rate": 1.564550886177348e-05,
"loss": 1.3977,
"step": 847
},
{
"epoch": 2.25,
"learning_rate": 1.5541611832542925e-05,
"loss": 1.5316,
"step": 848
},
{
"epoch": 2.25,
"learning_rate": 1.5437997409165478e-05,
"loss": 1.434,
"step": 849
},
{
"epoch": 2.25,
"learning_rate": 1.533466644142095e-05,
"loss": 1.449,
"step": 850
},
{
"epoch": 2.26,
"learning_rate": 1.523161977676441e-05,
"loss": 1.4726,
"step": 851
},
{
"epoch": 2.26,
"learning_rate": 1.5128858260319285e-05,
"loss": 1.4609,
"step": 852
},
{
"epoch": 2.26,
"learning_rate": 1.5026382734870376e-05,
"loss": 1.3527,
"step": 853
},
{
"epoch": 2.27,
"learning_rate": 1.4924194040856975e-05,
"loss": 1.4362,
"step": 854
},
{
"epoch": 2.27,
"learning_rate": 1.4822293016365962e-05,
"loss": 1.4483,
"step": 855
},
{
"epoch": 2.27,
"learning_rate": 1.4720680497124934e-05,
"loss": 1.3976,
"step": 856
},
{
"epoch": 2.27,
"learning_rate": 1.4619357316495352e-05,
"loss": 1.4461,
"step": 857
},
{
"epoch": 2.28,
"learning_rate": 1.4518324305465702e-05,
"loss": 1.4909,
"step": 858
},
{
"epoch": 2.28,
"learning_rate": 1.4417582292644694e-05,
"loss": 1.4755,
"step": 859
},
{
"epoch": 2.28,
"learning_rate": 1.4317132104254438e-05,
"loss": 1.4927,
"step": 860
},
{
"epoch": 2.28,
"learning_rate": 1.421697456412371e-05,
"loss": 1.4471,
"step": 861
},
{
"epoch": 2.29,
"learning_rate": 1.4117110493681124e-05,
"loss": 1.4859,
"step": 862
},
{
"epoch": 2.29,
"learning_rate": 1.401754071194849e-05,
"loss": 1.471,
"step": 863
},
{
"epoch": 2.29,
"learning_rate": 1.3918266035534027e-05,
"loss": 1.3538,
"step": 864
},
{
"epoch": 2.29,
"learning_rate": 1.3819287278625697e-05,
"loss": 1.4906,
"step": 865
},
{
"epoch": 2.3,
"learning_rate": 1.3720605252984503e-05,
"loss": 1.5514,
"step": 866
},
{
"epoch": 2.3,
"learning_rate": 1.362222076793786e-05,
"loss": 1.45,
"step": 867
},
{
"epoch": 2.3,
"learning_rate": 1.3524134630372937e-05,
"loss": 1.4378,
"step": 868
},
{
"epoch": 2.31,
"learning_rate": 1.3426347644730047e-05,
"loss": 1.4655,
"step": 869
},
{
"epoch": 2.31,
"learning_rate": 1.3328860612996053e-05,
"loss": 1.4872,
"step": 870
},
{
"epoch": 2.31,
"learning_rate": 1.3231674334697774e-05,
"loss": 1.5583,
"step": 871
},
{
"epoch": 2.31,
"learning_rate": 1.3134789606895476e-05,
"loss": 1.3942,
"step": 872
},
{
"epoch": 2.32,
"learning_rate": 1.3038207224176213e-05,
"loss": 1.4931,
"step": 873
},
{
"epoch": 2.32,
"learning_rate": 1.2941927978647528e-05,
"loss": 1.4525,
"step": 874
},
{
"epoch": 2.32,
"learning_rate": 1.2845952659930693e-05,
"loss": 1.5043,
"step": 875
},
{
"epoch": 2.32,
"learning_rate": 1.275028205515445e-05,
"loss": 1.3989,
"step": 876
},
{
"epoch": 2.33,
"learning_rate": 1.2654916948948436e-05,
"loss": 1.4537,
"step": 877
},
{
"epoch": 2.33,
"learning_rate": 1.2559858123436802e-05,
"loss": 1.4277,
"step": 878
},
{
"epoch": 2.33,
"learning_rate": 1.2465106358231753e-05,
"loss": 1.441,
"step": 879
},
{
"epoch": 2.33,
"learning_rate": 1.23706624304272e-05,
"loss": 1.4923,
"step": 880
},
{
"epoch": 2.34,
"learning_rate": 1.2276527114592367e-05,
"loss": 1.4097,
"step": 881
},
{
"epoch": 2.34,
"learning_rate": 1.2182701182765426e-05,
"loss": 1.4913,
"step": 882
},
{
"epoch": 2.34,
"learning_rate": 1.208918540444719e-05,
"loss": 1.421,
"step": 883
},
{
"epoch": 2.34,
"learning_rate": 1.1995980546594776e-05,
"loss": 1.4794,
"step": 884
},
{
"epoch": 2.35,
"learning_rate": 1.1903087373615351e-05,
"loss": 1.4501,
"step": 885
},
{
"epoch": 2.35,
"learning_rate": 1.1810506647359793e-05,
"loss": 1.5201,
"step": 886
},
{
"epoch": 2.35,
"learning_rate": 1.171823912711657e-05,
"loss": 1.4111,
"step": 887
},
{
"epoch": 2.36,
"learning_rate": 1.1626285569605344e-05,
"loss": 1.4176,
"step": 888
},
{
"epoch": 2.36,
"learning_rate": 1.153464672897091e-05,
"loss": 1.5283,
"step": 889
},
{
"epoch": 2.36,
"learning_rate": 1.144332335677694e-05,
"loss": 1.4544,
"step": 890
},
{
"epoch": 2.36,
"learning_rate": 1.1352316201999841e-05,
"loss": 1.4489,
"step": 891
},
{
"epoch": 2.37,
"learning_rate": 1.1261626011022586e-05,
"loss": 1.4405,
"step": 892
},
{
"epoch": 2.37,
"learning_rate": 1.1171253527628628e-05,
"loss": 1.4862,
"step": 893
},
{
"epoch": 2.37,
"learning_rate": 1.1081199492995781e-05,
"loss": 1.4785,
"step": 894
},
{
"epoch": 2.37,
"learning_rate": 1.0991464645690142e-05,
"loss": 1.4659,
"step": 895
},
{
"epoch": 2.38,
"learning_rate": 1.0902049721660046e-05,
"loss": 1.438,
"step": 896
},
{
"epoch": 2.38,
"learning_rate": 1.0812955454229978e-05,
"loss": 1.4393,
"step": 897
},
{
"epoch": 2.38,
"learning_rate": 1.0724182574094682e-05,
"loss": 1.4228,
"step": 898
},
{
"epoch": 2.38,
"learning_rate": 1.0635731809312993e-05,
"loss": 1.4681,
"step": 899
},
{
"epoch": 2.39,
"learning_rate": 1.0547603885302049e-05,
"loss": 1.4792,
"step": 900
},
{
"epoch": 2.39,
"learning_rate": 1.045979952483117e-05,
"loss": 1.4224,
"step": 901
},
{
"epoch": 2.39,
"learning_rate": 1.037231944801607e-05,
"loss": 1.4102,
"step": 902
},
{
"epoch": 2.4,
"learning_rate": 1.0285164372312884e-05,
"loss": 1.4084,
"step": 903
},
{
"epoch": 2.4,
"learning_rate": 1.0198335012512272e-05,
"loss": 1.493,
"step": 904
},
{
"epoch": 2.4,
"learning_rate": 1.0111832080733601e-05,
"loss": 1.4612,
"step": 905
},
{
"epoch": 2.4,
"learning_rate": 1.0025656286419078e-05,
"loss": 1.4915,
"step": 906
},
{
"epoch": 2.41,
"learning_rate": 9.939808336327921e-06,
"loss": 1.4335,
"step": 907
},
{
"epoch": 2.41,
"learning_rate": 9.854288934530604e-06,
"loss": 1.4599,
"step": 908
},
{
"epoch": 2.41,
"learning_rate": 9.769098782403041e-06,
"loss": 1.3922,
"step": 909
},
{
"epoch": 2.41,
"learning_rate": 9.684238578620814e-06,
"loss": 1.4095,
"step": 910
},
{
"epoch": 2.42,
"learning_rate": 9.599709019153568e-06,
"loss": 1.4492,
"step": 911
},
{
"epoch": 2.42,
"learning_rate": 9.515510797259102e-06,
"loss": 1.4238,
"step": 912
},
{
"epoch": 2.42,
"learning_rate": 9.431644603477907e-06,
"loss": 1.4694,
"step": 913
},
{
"epoch": 2.42,
"learning_rate": 9.34811112562728e-06,
"loss": 1.5137,
"step": 914
},
{
"epoch": 2.43,
"learning_rate": 9.264911048795893e-06,
"loss": 1.3667,
"step": 915
},
{
"epoch": 2.43,
"learning_rate": 9.182045055337995e-06,
"loss": 1.4456,
"step": 916
},
{
"epoch": 2.43,
"learning_rate": 9.099513824867939e-06,
"loss": 1.4658,
"step": 917
},
{
"epoch": 2.44,
"learning_rate": 9.017318034254546e-06,
"loss": 1.4212,
"step": 918
},
{
"epoch": 2.44,
"learning_rate": 8.935458357615584e-06,
"loss": 1.4453,
"step": 919
},
{
"epoch": 2.44,
"learning_rate": 8.853935466312225e-06,
"loss": 1.4445,
"step": 920
},
{
"epoch": 2.44,
"learning_rate": 8.772750028943527e-06,
"loss": 1.3734,
"step": 921
},
{
"epoch": 2.45,
"learning_rate": 8.691902711341e-06,
"loss": 1.4893,
"step": 922
},
{
"epoch": 2.45,
"learning_rate": 8.611394176563038e-06,
"loss": 1.5218,
"step": 923
},
{
"epoch": 2.45,
"learning_rate": 8.531225084889654e-06,
"loss": 1.4519,
"step": 924
},
{
"epoch": 2.45,
"learning_rate": 8.451396093816872e-06,
"loss": 1.4522,
"step": 925
},
{
"epoch": 2.46,
"learning_rate": 8.371907858051497e-06,
"loss": 1.4729,
"step": 926
},
{
"epoch": 2.46,
"learning_rate": 8.292761029505603e-06,
"loss": 1.4275,
"step": 927
},
{
"epoch": 2.46,
"learning_rate": 8.21395625729135e-06,
"loss": 1.4604,
"step": 928
},
{
"epoch": 2.46,
"learning_rate": 8.135494187715475e-06,
"loss": 1.4039,
"step": 929
},
{
"epoch": 2.47,
"learning_rate": 8.057375464274142e-06,
"loss": 1.4912,
"step": 930
},
{
"epoch": 2.47,
"learning_rate": 7.979600727647596e-06,
"loss": 1.4187,
"step": 931
},
{
"epoch": 2.47,
"learning_rate": 7.902170615694915e-06,
"loss": 1.4225,
"step": 932
},
{
"epoch": 2.47,
"learning_rate": 7.825085763448798e-06,
"loss": 1.4011,
"step": 933
},
{
"epoch": 2.48,
"learning_rate": 7.748346803110295e-06,
"loss": 1.3841,
"step": 934
},
{
"epoch": 2.48,
"learning_rate": 7.671954364043754e-06,
"loss": 1.474,
"step": 935
},
{
"epoch": 2.48,
"learning_rate": 7.595909072771485e-06,
"loss": 1.4427,
"step": 936
},
{
"epoch": 2.49,
"learning_rate": 7.520211552968792e-06,
"loss": 1.5081,
"step": 937
},
{
"epoch": 2.49,
"learning_rate": 7.444862425458699e-06,
"loss": 1.4544,
"step": 938
},
{
"epoch": 2.49,
"learning_rate": 7.369862308207026e-06,
"loss": 1.5018,
"step": 939
},
{
"epoch": 2.49,
"learning_rate": 7.295211816317149e-06,
"loss": 1.4605,
"step": 940
},
{
"epoch": 2.5,
"learning_rate": 7.220911562025085e-06,
"loss": 1.4122,
"step": 941
},
{
"epoch": 2.5,
"learning_rate": 7.146962154694409e-06,
"loss": 1.4672,
"step": 942
},
{
"epoch": 2.5,
"learning_rate": 7.0733642008112836e-06,
"loss": 1.5257,
"step": 943
},
{
"epoch": 2.5,
"learning_rate": 7.000118303979464e-06,
"loss": 1.4355,
"step": 944
},
{
"epoch": 2.51,
"learning_rate": 6.927225064915349e-06,
"loss": 1.4799,
"step": 945
},
{
"epoch": 2.51,
"learning_rate": 6.854685081443097e-06,
"loss": 1.5002,
"step": 946
},
{
"epoch": 2.51,
"learning_rate": 6.782498948489613e-06,
"loss": 1.478,
"step": 947
},
{
"epoch": 2.51,
"learning_rate": 6.71066725807985e-06,
"loss": 1.4507,
"step": 948
},
{
"epoch": 2.52,
"learning_rate": 6.639190599331746e-06,
"loss": 1.482,
"step": 949
},
{
"epoch": 2.52,
"learning_rate": 6.5680695584515725e-06,
"loss": 1.4785,
"step": 950
},
{
"epoch": 2.52,
"learning_rate": 6.497304718728986e-06,
"loss": 1.4368,
"step": 951
},
{
"epoch": 2.53,
"learning_rate": 6.4268966605323725e-06,
"loss": 1.4422,
"step": 952
},
{
"epoch": 2.53,
"learning_rate": 6.3568459613039536e-06,
"loss": 1.4643,
"step": 953
},
{
"epoch": 2.53,
"learning_rate": 6.287153195555174e-06,
"loss": 1.4136,
"step": 954
},
{
"epoch": 2.53,
"learning_rate": 6.217818934861896e-06,
"loss": 1.4759,
"step": 955
},
{
"epoch": 2.54,
"learning_rate": 6.148843747859778e-06,
"loss": 1.5521,
"step": 956
},
{
"epoch": 2.54,
"learning_rate": 6.080228200239585e-06,
"loss": 1.4326,
"step": 957
},
{
"epoch": 2.54,
"learning_rate": 6.011972854742503e-06,
"loss": 1.4925,
"step": 958
},
{
"epoch": 2.54,
"learning_rate": 5.94407827115564e-06,
"loss": 1.5337,
"step": 959
},
{
"epoch": 2.55,
"learning_rate": 5.876545006307288e-06,
"loss": 1.4804,
"step": 960
},
{
"epoch": 2.55,
"learning_rate": 5.809373614062508e-06,
"loss": 1.4134,
"step": 961
},
{
"epoch": 2.55,
"learning_rate": 5.742564645318432e-06,
"loss": 1.4892,
"step": 962
},
{
"epoch": 2.55,
"learning_rate": 5.6761186479999115e-06,
"loss": 1.5565,
"step": 963
},
{
"epoch": 2.56,
"learning_rate": 5.610036167054839e-06,
"loss": 1.4506,
"step": 964
},
{
"epoch": 2.56,
"learning_rate": 5.544317744449873e-06,
"loss": 1.3972,
"step": 965
},
{
"epoch": 2.56,
"learning_rate": 5.478963919165819e-06,
"loss": 1.5298,
"step": 966
},
{
"epoch": 2.56,
"learning_rate": 5.4139752271933295e-06,
"loss": 1.507,
"step": 967
},
{
"epoch": 2.57,
"learning_rate": 5.349352201528446e-06,
"loss": 1.5116,
"step": 968
},
{
"epoch": 2.57,
"learning_rate": 5.285095372168264e-06,
"loss": 1.4707,
"step": 969
},
{
"epoch": 2.57,
"learning_rate": 5.2212052661065656e-06,
"loss": 1.4136,
"step": 970
},
{
"epoch": 2.58,
"learning_rate": 5.157682407329456e-06,
"loss": 1.5139,
"step": 971
},
{
"epoch": 2.58,
"learning_rate": 5.094527316811204e-06,
"loss": 1.4348,
"step": 972
},
{
"epoch": 2.58,
"learning_rate": 5.031740512509769e-06,
"loss": 1.5051,
"step": 973
},
{
"epoch": 2.58,
"learning_rate": 4.969322509362762e-06,
"loss": 1.4504,
"step": 974
},
{
"epoch": 2.59,
"learning_rate": 4.9072738192830255e-06,
"loss": 1.3567,
"step": 975
},
{
"epoch": 2.59,
"learning_rate": 4.845594951154614e-06,
"loss": 1.5088,
"step": 976
},
{
"epoch": 2.59,
"learning_rate": 4.784286410828481e-06,
"loss": 1.4605,
"step": 977
},
{
"epoch": 2.59,
"learning_rate": 4.723348701118407e-06,
"loss": 1.4053,
"step": 978
},
{
"epoch": 2.6,
"learning_rate": 4.662782321796849e-06,
"loss": 1.3966,
"step": 979
},
{
"epoch": 2.6,
"learning_rate": 4.60258776959086e-06,
"loss": 1.4311,
"step": 980
},
{
"epoch": 2.6,
"learning_rate": 4.54276553817799e-06,
"loss": 1.4258,
"step": 981
},
{
"epoch": 2.6,
"learning_rate": 4.483316118182251e-06,
"loss": 1.436,
"step": 982
},
{
"epoch": 2.61,
"learning_rate": 4.424239997170105e-06,
"loss": 1.4556,
"step": 983
},
{
"epoch": 2.61,
"learning_rate": 4.365537659646418e-06,
"loss": 1.399,
"step": 984
},
{
"epoch": 2.61,
"learning_rate": 4.307209587050576e-06,
"loss": 1.5502,
"step": 985
},
{
"epoch": 2.62,
"learning_rate": 4.249256257752421e-06,
"loss": 1.4414,
"step": 986
},
{
"epoch": 2.62,
"learning_rate": 4.191678147048445e-06,
"loss": 1.5388,
"step": 987
},
{
"epoch": 2.62,
"learning_rate": 4.134475727157777e-06,
"loss": 1.419,
"step": 988
},
{
"epoch": 2.62,
"learning_rate": 4.077649467218436e-06,
"loss": 1.471,
"step": 989
},
{
"epoch": 2.63,
"learning_rate": 4.0211998332833514e-06,
"loss": 1.4475,
"step": 990
},
{
"epoch": 2.63,
"learning_rate": 3.965127288316634e-06,
"loss": 1.4398,
"step": 991
},
{
"epoch": 2.63,
"learning_rate": 3.9094322921897574e-06,
"loss": 1.3985,
"step": 992
},
{
"epoch": 2.63,
"learning_rate": 3.854115301677757e-06,
"loss": 1.3773,
"step": 993
},
{
"epoch": 2.64,
"learning_rate": 3.799176770455526e-06,
"loss": 1.4409,
"step": 994
},
{
"epoch": 2.64,
"learning_rate": 3.7446171490940706e-06,
"loss": 1.4241,
"step": 995
},
{
"epoch": 2.64,
"learning_rate": 3.690436885056808e-06,
"loss": 1.4467,
"step": 996
},
{
"epoch": 2.64,
"learning_rate": 3.6366364226959047e-06,
"loss": 1.433,
"step": 997
},
{
"epoch": 2.65,
"learning_rate": 3.5832162032486684e-06,
"loss": 1.4797,
"step": 998
},
{
"epoch": 2.65,
"learning_rate": 3.530176664833834e-06,
"loss": 1.3771,
"step": 999
},
{
"epoch": 2.65,
"learning_rate": 3.4775182424481135e-06,
"loss": 1.4442,
"step": 1000
},
{
"epoch": 2.66,
"learning_rate": 3.4252413679624616e-06,
"loss": 1.4842,
"step": 1001
},
{
"epoch": 2.66,
"learning_rate": 3.373346470118682e-06,
"loss": 1.4969,
"step": 1002
},
{
"epoch": 2.66,
"learning_rate": 3.321833974525812e-06,
"loss": 1.4653,
"step": 1003
},
{
"epoch": 2.66,
"learning_rate": 3.2707043036566965e-06,
"loss": 1.4349,
"step": 1004
},
{
"epoch": 2.67,
"learning_rate": 3.219957876844465e-06,
"loss": 1.4189,
"step": 1005
},
{
"epoch": 2.67,
"learning_rate": 3.16959511027915e-06,
"loss": 1.4463,
"step": 1006
},
{
"epoch": 2.67,
"learning_rate": 3.119616417004223e-06,
"loss": 1.4771,
"step": 1007
},
{
"epoch": 2.67,
"learning_rate": 3.0700222069132422e-06,
"loss": 1.4248,
"step": 1008
},
{
"epoch": 2.68,
"learning_rate": 3.020812886746477e-06,
"loss": 1.4778,
"step": 1009
},
{
"epoch": 2.68,
"learning_rate": 2.9719888600875713e-06,
"loss": 1.4952,
"step": 1010
},
{
"epoch": 2.68,
"learning_rate": 2.923550527360247e-06,
"loss": 1.4907,
"step": 1011
},
{
"epoch": 2.68,
"learning_rate": 2.875498285824979e-06,
"loss": 1.4447,
"step": 1012
},
{
"epoch": 2.69,
"learning_rate": 2.8278325295758134e-06,
"loss": 1.4653,
"step": 1013
},
{
"epoch": 2.69,
"learning_rate": 2.7805536495370375e-06,
"loss": 1.4342,
"step": 1014
},
{
"epoch": 2.69,
"learning_rate": 2.7336620334600604e-06,
"loss": 1.4871,
"step": 1015
},
{
"epoch": 2.69,
"learning_rate": 2.687158065920192e-06,
"loss": 1.4212,
"step": 1016
},
{
"epoch": 2.7,
"learning_rate": 2.6410421283134866e-06,
"loss": 1.4416,
"step": 1017
},
{
"epoch": 2.7,
"learning_rate": 2.595314598853632e-06,
"loss": 1.5976,
"step": 1018
},
{
"epoch": 2.7,
"learning_rate": 2.54997585256882e-06,
"loss": 1.4335,
"step": 1019
},
{
"epoch": 2.71,
"learning_rate": 2.5050262612987206e-06,
"loss": 1.4337,
"step": 1020
},
{
"epoch": 2.71,
"learning_rate": 2.4604661936913687e-06,
"loss": 1.4384,
"step": 1021
},
{
"epoch": 2.71,
"learning_rate": 2.416296015200198e-06,
"loss": 1.4824,
"step": 1022
},
{
"epoch": 2.71,
"learning_rate": 2.372516088081006e-06,
"loss": 1.4162,
"step": 1023
},
{
"epoch": 2.72,
"learning_rate": 2.3291267713889953e-06,
"loss": 1.4193,
"step": 1024
},
{
"epoch": 2.72,
"learning_rate": 2.286128420975836e-06,
"loss": 1.442,
"step": 1025
},
{
"epoch": 2.72,
"learning_rate": 2.2435213894867303e-06,
"loss": 1.4323,
"step": 1026
},
{
"epoch": 2.72,
"learning_rate": 2.2013060263575415e-06,
"loss": 1.4348,
"step": 1027
},
{
"epoch": 2.73,
"learning_rate": 2.159482677811919e-06,
"loss": 1.4858,
"step": 1028
},
{
"epoch": 2.73,
"learning_rate": 2.1180516868584467e-06,
"loss": 1.3906,
"step": 1029
},
{
"epoch": 2.73,
"learning_rate": 2.0770133932878412e-06,
"loss": 1.374,
"step": 1030
},
{
"epoch": 2.73,
"learning_rate": 2.0363681336701746e-06,
"loss": 1.4733,
"step": 1031
},
{
"epoch": 2.74,
"learning_rate": 1.996116241352092e-06,
"loss": 1.4614,
"step": 1032
},
{
"epoch": 2.74,
"learning_rate": 1.9562580464541014e-06,
"loss": 1.5179,
"step": 1033
},
{
"epoch": 2.74,
"learning_rate": 1.9167938758678394e-06,
"loss": 1.3801,
"step": 1034
},
{
"epoch": 2.75,
"learning_rate": 1.8777240532534212e-06,
"loss": 1.4825,
"step": 1035
},
{
"epoch": 2.75,
"learning_rate": 1.8390488990367493e-06,
"loss": 1.3541,
"step": 1036
},
{
"epoch": 2.75,
"learning_rate": 1.8007687304069375e-06,
"loss": 1.4733,
"step": 1037
},
{
"epoch": 2.75,
"learning_rate": 1.7628838613136412e-06,
"loss": 1.4835,
"step": 1038
},
{
"epoch": 2.76,
"learning_rate": 1.7253946024645473e-06,
"loss": 1.4677,
"step": 1039
},
{
"epoch": 2.76,
"learning_rate": 1.6883012613227778e-06,
"loss": 1.446,
"step": 1040
},
{
"epoch": 2.76,
"learning_rate": 1.6516041421044127e-06,
"loss": 1.461,
"step": 1041
},
{
"epoch": 2.76,
"learning_rate": 1.6153035457759536e-06,
"loss": 1.4779,
"step": 1042
},
{
"epoch": 2.77,
"learning_rate": 1.579399770051876e-06,
"loss": 1.4892,
"step": 1043
},
{
"epoch": 2.77,
"learning_rate": 1.5438931093921805e-06,
"loss": 1.4253,
"step": 1044
},
{
"epoch": 2.77,
"learning_rate": 1.5087838549999956e-06,
"loss": 1.4644,
"step": 1045
},
{
"epoch": 2.77,
"learning_rate": 1.474072294819162e-06,
"loss": 1.4157,
"step": 1046
},
{
"epoch": 2.78,
"learning_rate": 1.4397587135318857e-06,
"loss": 1.4122,
"step": 1047
},
{
"epoch": 2.78,
"learning_rate": 1.4058433925564107e-06,
"loss": 1.5177,
"step": 1048
},
{
"epoch": 2.78,
"learning_rate": 1.3723266100447053e-06,
"loss": 1.4533,
"step": 1049
},
{
"epoch": 2.79,
"learning_rate": 1.3392086408801518e-06,
"loss": 1.4155,
"step": 1050
},
{
"epoch": 2.79,
"learning_rate": 1.3064897566753442e-06,
"loss": 1.4637,
"step": 1051
},
{
"epoch": 2.79,
"learning_rate": 1.2741702257698273e-06,
"loss": 1.395,
"step": 1052
},
{
"epoch": 2.79,
"learning_rate": 1.242250313227905e-06,
"loss": 1.4807,
"step": 1053
},
{
"epoch": 2.8,
"learning_rate": 1.210730280836464e-06,
"loss": 1.4223,
"step": 1054
},
{
"epoch": 2.8,
"learning_rate": 1.1796103871028196e-06,
"loss": 1.461,
"step": 1055
},
{
"epoch": 2.8,
"learning_rate": 1.1488908872526183e-06,
"loss": 1.4161,
"step": 1056
},
{
"epoch": 2.8,
"learning_rate": 1.1185720332277162e-06,
"loss": 1.4039,
"step": 1057
},
{
"epoch": 2.81,
"learning_rate": 1.0886540736841311e-06,
"loss": 1.4827,
"step": 1058
},
{
"epoch": 2.81,
"learning_rate": 1.0591372539900058e-06,
"loss": 1.4112,
"step": 1059
},
{
"epoch": 2.81,
"learning_rate": 1.0300218162235752e-06,
"loss": 1.3737,
"step": 1060
},
{
"epoch": 2.81,
"learning_rate": 1.0013079991711972e-06,
"loss": 1.4186,
"step": 1061
},
{
"epoch": 2.82,
"learning_rate": 9.729960383254134e-07,
"loss": 1.4583,
"step": 1062
},
{
"epoch": 2.82,
"learning_rate": 9.450861658829469e-07,
"loss": 1.4222,
"step": 1063
},
{
"epoch": 2.82,
"learning_rate": 9.175786107429085e-07,
"loss": 1.4342,
"step": 1064
},
{
"epoch": 2.82,
"learning_rate": 8.90473598504804e-07,
"loss": 1.4549,
"step": 1065
},
{
"epoch": 2.83,
"learning_rate": 8.637713514667634e-07,
"loss": 1.4385,
"step": 1066
},
{
"epoch": 2.83,
"learning_rate": 8.37472088623692e-07,
"loss": 1.447,
"step": 1067
},
{
"epoch": 2.83,
"learning_rate": 8.115760256654669e-07,
"loss": 1.5006,
"step": 1068
},
{
"epoch": 2.84,
"learning_rate": 7.860833749751773e-07,
"loss": 1.4861,
"step": 1069
},
{
"epoch": 2.84,
"learning_rate": 7.60994345627386e-07,
"loss": 1.4061,
"step": 1070
},
{
"epoch": 2.84,
"learning_rate": 7.363091433864044e-07,
"loss": 1.4612,
"step": 1071
},
{
"epoch": 2.84,
"learning_rate": 7.120279707046096e-07,
"loss": 1.432,
"step": 1072
},
{
"epoch": 2.85,
"learning_rate": 6.881510267207846e-07,
"loss": 1.4702,
"step": 1073
},
{
"epoch": 2.85,
"learning_rate": 6.646785072584872e-07,
"loss": 1.5084,
"step": 1074
},
{
"epoch": 2.85,
"learning_rate": 6.416106048244386e-07,
"loss": 1.4661,
"step": 1075
},
{
"epoch": 2.85,
"learning_rate": 6.189475086069485e-07,
"loss": 1.3731,
"step": 1076
},
{
"epoch": 2.86,
"learning_rate": 5.966894044743709e-07,
"loss": 1.455,
"step": 1077
},
{
"epoch": 2.86,
"learning_rate": 5.748364749735613e-07,
"loss": 1.4169,
"step": 1078
},
{
"epoch": 2.86,
"learning_rate": 5.533888993283831e-07,
"loss": 1.3907,
"step": 1079
},
{
"epoch": 2.86,
"learning_rate": 5.323468534382703e-07,
"loss": 1.4668,
"step": 1080
},
{
"epoch": 2.87,
"learning_rate": 5.117105098767283e-07,
"loss": 1.4628,
"step": 1081
},
{
"epoch": 2.87,
"learning_rate": 4.914800378899687e-07,
"loss": 1.3697,
"step": 1082
},
{
"epoch": 2.87,
"learning_rate": 4.7165560339549886e-07,
"loss": 1.5115,
"step": 1083
},
{
"epoch": 2.88,
"learning_rate": 4.522373689807624e-07,
"loss": 1.4415,
"step": 1084
},
{
"epoch": 2.88,
"learning_rate": 4.33225493901801e-07,
"loss": 1.4368,
"step": 1085
},
{
"epoch": 2.88,
"learning_rate": 4.1462013408196664e-07,
"loss": 1.4338,
"step": 1086
},
{
"epoch": 2.88,
"learning_rate": 3.9642144211061714e-07,
"loss": 1.3896,
"step": 1087
},
{
"epoch": 2.89,
"learning_rate": 3.7862956724190045e-07,
"loss": 1.4796,
"step": 1088
},
{
"epoch": 2.89,
"learning_rate": 3.612446553934723e-07,
"loss": 1.4081,
"step": 1089
},
{
"epoch": 2.89,
"learning_rate": 3.4426684914538045e-07,
"loss": 1.3993,
"step": 1090
},
{
"epoch": 2.89,
"learning_rate": 3.276962877388157e-07,
"loss": 1.4188,
"step": 1091
},
{
"epoch": 2.9,
"learning_rate": 3.115331070750127e-07,
"loss": 1.4747,
"step": 1092
},
{
"epoch": 2.9,
"learning_rate": 2.957774397141455e-07,
"loss": 1.4073,
"step": 1093
},
{
"epoch": 2.9,
"learning_rate": 2.8042941487419483e-07,
"loss": 1.4816,
"step": 1094
},
{
"epoch": 2.9,
"learning_rate": 2.6548915842993793e-07,
"loss": 1.4085,
"step": 1095
},
{
"epoch": 2.91,
"learning_rate": 2.5095679291188833e-07,
"loss": 1.3901,
"step": 1096
},
{
"epoch": 2.91,
"learning_rate": 2.368324375052855e-07,
"loss": 1.4146,
"step": 1097
},
{
"epoch": 2.91,
"learning_rate": 2.2311620804914002e-07,
"loss": 1.4465,
"step": 1098
},
{
"epoch": 2.92,
"learning_rate": 2.0980821703527886e-07,
"loss": 1.4298,
"step": 1099
},
{
"epoch": 2.92,
"learning_rate": 1.9690857360739612e-07,
"loss": 1.5303,
"step": 1100
},
{
"epoch": 2.92,
"learning_rate": 1.8441738356019256e-07,
"loss": 1.5039,
"step": 1101
},
{
"epoch": 2.92,
"learning_rate": 1.7233474933849303e-07,
"loss": 1.4276,
"step": 1102
},
{
"epoch": 2.93,
"learning_rate": 1.6066077003639714e-07,
"loss": 1.4591,
"step": 1103
},
{
"epoch": 2.93,
"learning_rate": 1.4939554139648537e-07,
"loss": 1.4353,
"step": 1104
},
{
"epoch": 2.93,
"learning_rate": 1.3853915580901988e-07,
"loss": 1.3791,
"step": 1105
},
{
"epoch": 2.93,
"learning_rate": 1.2809170231118938e-07,
"loss": 1.4862,
"step": 1106
},
{
"epoch": 2.94,
"learning_rate": 1.1805326658639316e-07,
"loss": 1.3861,
"step": 1107
},
{
"epoch": 2.94,
"learning_rate": 1.0842393096350823e-07,
"loss": 1.4563,
"step": 1108
},
{
"epoch": 2.94,
"learning_rate": 9.920377441623996e-08,
"loss": 1.4102,
"step": 1109
},
{
"epoch": 2.94,
"learning_rate": 9.039287256247253e-08,
"loss": 1.4573,
"step": 1110
},
{
"epoch": 2.95,
"learning_rate": 8.199129766363056e-08,
"loss": 1.4369,
"step": 1111
},
{
"epoch": 2.95,
"learning_rate": 7.399911862410735e-08,
"loss": 1.5305,
"step": 1112
},
{
"epoch": 2.95,
"learning_rate": 6.641640099068758e-08,
"loss": 1.4532,
"step": 1113
},
{
"epoch": 2.95,
"learning_rate": 5.9243206952019904e-08,
"loss": 1.473,
"step": 1114
},
{
"epoch": 2.96,
"learning_rate": 5.247959533808966e-08,
"loss": 1.3916,
"step": 1115
},
{
"epoch": 2.96,
"learning_rate": 4.612562161974698e-08,
"loss": 1.3834,
"step": 1116
},
{
"epoch": 2.96,
"learning_rate": 4.018133790826273e-08,
"loss": 1.4524,
"step": 1117
},
{
"epoch": 2.97,
"learning_rate": 3.464679295487328e-08,
"loss": 1.428,
"step": 1118
},
{
"epoch": 2.97,
"learning_rate": 2.952203215041971e-08,
"loss": 1.4697,
"step": 1119
},
{
"epoch": 2.97,
"learning_rate": 2.480709752493704e-08,
"loss": 1.3832,
"step": 1120
},
{
"epoch": 2.97,
"learning_rate": 2.050202774732668e-08,
"loss": 1.5032,
"step": 1121
},
{
"epoch": 2.98,
"learning_rate": 1.6606858125040038e-08,
"loss": 1.4712,
"step": 1122
},
{
"epoch": 2.98,
"learning_rate": 1.3121620603795404e-08,
"loss": 1.4285,
"step": 1123
},
{
"epoch": 2.98,
"learning_rate": 1.0046343767294852e-08,
"loss": 1.4409,
"step": 1124
},
{
"epoch": 2.98,
"learning_rate": 7.381052837013291e-09,
"loss": 1.4037,
"step": 1125
},
{
"epoch": 2.99,
"learning_rate": 5.125769671976421e-09,
"loss": 1.4707,
"step": 1126
},
{
"epoch": 2.99,
"learning_rate": 3.2805127685886504e-09,
"loss": 1.4793,
"step": 1127
},
{
"epoch": 2.99,
"learning_rate": 1.845297260472112e-09,
"loss": 1.4402,
"step": 1128
},
{
"epoch": 2.99,
"learning_rate": 8.201349183611928e-10,
"loss": 1.4417,
"step": 1129
},
{
"epoch": 3.0,
"learning_rate": 2.0503414998040982e-10,
"loss": 1.4528,
"step": 1130
},
{
"epoch": 3.0,
"learning_rate": 0.0,
"loss": 1.4566,
"step": 1131
},
{
"epoch": 3.0,
"step": 1131,
"total_flos": 2.4511080029906534e+17,
"train_loss": 1.623349694105295,
"train_runtime": 8800.864,
"train_samples_per_second": 16.443,
"train_steps_per_second": 0.129
}
],
"max_steps": 1131,
"num_train_epochs": 3,
"total_flos": 2.4511080029906534e+17,
"trial_name": null,
"trial_params": null
}