Files
ToolLLaMA-2-7b-v2/trainer_state.json
ModelHub XC 87952c11bd 初始化项目,由ModelHub XC社区提供模型
Model: ToolBench/ToolLLaMA-2-7b-v2
Source: Original Platform
2026-06-06 21:42:28 +08:00

35217 lines
693 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 5862,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.1276595744680852e-07,
"loss": 0.6677,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 4.2553191489361704e-07,
"loss": 0.764,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 6.382978723404255e-07,
"loss": 0.7506,
"step": 3
},
{
"epoch": 0.0,
"learning_rate": 8.510638297872341e-07,
"loss": 0.7243,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 1.0638297872340427e-06,
"loss": 0.768,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 1.276595744680851e-06,
"loss": 0.788,
"step": 6
},
{
"epoch": 0.0,
"learning_rate": 1.4893617021276598e-06,
"loss": 0.7433,
"step": 7
},
{
"epoch": 0.0,
"learning_rate": 1.7021276595744682e-06,
"loss": 0.7169,
"step": 8
},
{
"epoch": 0.0,
"learning_rate": 1.9148936170212767e-06,
"loss": 0.6692,
"step": 9
},
{
"epoch": 0.0,
"learning_rate": 2.1276595744680853e-06,
"loss": 0.6926,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 2.3404255319148935e-06,
"loss": 0.6667,
"step": 11
},
{
"epoch": 0.0,
"learning_rate": 2.553191489361702e-06,
"loss": 0.5724,
"step": 12
},
{
"epoch": 0.0,
"learning_rate": 2.7659574468085106e-06,
"loss": 0.638,
"step": 13
},
{
"epoch": 0.0,
"learning_rate": 2.9787234042553196e-06,
"loss": 0.5975,
"step": 14
},
{
"epoch": 0.01,
"learning_rate": 3.1914893617021277e-06,
"loss": 0.5579,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 3.4042553191489363e-06,
"loss": 0.5203,
"step": 16
},
{
"epoch": 0.01,
"learning_rate": 3.6170212765957445e-06,
"loss": 0.5005,
"step": 17
},
{
"epoch": 0.01,
"learning_rate": 3.8297872340425535e-06,
"loss": 0.4405,
"step": 18
},
{
"epoch": 0.01,
"learning_rate": 4.0425531914893625e-06,
"loss": 0.5277,
"step": 19
},
{
"epoch": 0.01,
"learning_rate": 4.255319148936171e-06,
"loss": 0.4405,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.468085106382979e-06,
"loss": 0.4822,
"step": 21
},
{
"epoch": 0.01,
"learning_rate": 4.680851063829787e-06,
"loss": 0.5165,
"step": 22
},
{
"epoch": 0.01,
"learning_rate": 4.893617021276596e-06,
"loss": 0.4441,
"step": 23
},
{
"epoch": 0.01,
"learning_rate": 5.106382978723404e-06,
"loss": 0.4173,
"step": 24
},
{
"epoch": 0.01,
"learning_rate": 5.319148936170213e-06,
"loss": 0.4572,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 5.531914893617021e-06,
"loss": 0.4769,
"step": 26
},
{
"epoch": 0.01,
"learning_rate": 5.74468085106383e-06,
"loss": 0.4042,
"step": 27
},
{
"epoch": 0.01,
"learning_rate": 5.957446808510639e-06,
"loss": 0.4513,
"step": 28
},
{
"epoch": 0.01,
"learning_rate": 6.170212765957447e-06,
"loss": 0.3607,
"step": 29
},
{
"epoch": 0.01,
"learning_rate": 6.3829787234042555e-06,
"loss": 0.4167,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 6.595744680851064e-06,
"loss": 0.4514,
"step": 31
},
{
"epoch": 0.01,
"learning_rate": 6.808510638297873e-06,
"loss": 0.3886,
"step": 32
},
{
"epoch": 0.01,
"learning_rate": 7.021276595744682e-06,
"loss": 0.3782,
"step": 33
},
{
"epoch": 0.01,
"learning_rate": 7.234042553191489e-06,
"loss": 0.4119,
"step": 34
},
{
"epoch": 0.01,
"learning_rate": 7.446808510638298e-06,
"loss": 0.3471,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 7.659574468085107e-06,
"loss": 0.3922,
"step": 36
},
{
"epoch": 0.01,
"learning_rate": 7.872340425531916e-06,
"loss": 0.3973,
"step": 37
},
{
"epoch": 0.01,
"learning_rate": 8.085106382978725e-06,
"loss": 0.4148,
"step": 38
},
{
"epoch": 0.01,
"learning_rate": 8.297872340425532e-06,
"loss": 0.4031,
"step": 39
},
{
"epoch": 0.01,
"learning_rate": 8.510638297872341e-06,
"loss": 0.3396,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 8.72340425531915e-06,
"loss": 0.4384,
"step": 41
},
{
"epoch": 0.01,
"learning_rate": 8.936170212765958e-06,
"loss": 0.3539,
"step": 42
},
{
"epoch": 0.01,
"learning_rate": 9.148936170212767e-06,
"loss": 0.368,
"step": 43
},
{
"epoch": 0.02,
"learning_rate": 9.361702127659574e-06,
"loss": 0.3847,
"step": 44
},
{
"epoch": 0.02,
"learning_rate": 9.574468085106383e-06,
"loss": 0.4405,
"step": 45
},
{
"epoch": 0.02,
"learning_rate": 9.787234042553192e-06,
"loss": 0.3488,
"step": 46
},
{
"epoch": 0.02,
"learning_rate": 1e-05,
"loss": 0.3636,
"step": 47
},
{
"epoch": 0.02,
"learning_rate": 1.0212765957446808e-05,
"loss": 0.3444,
"step": 48
},
{
"epoch": 0.02,
"learning_rate": 1.0425531914893617e-05,
"loss": 0.376,
"step": 49
},
{
"epoch": 0.02,
"learning_rate": 1.0638297872340426e-05,
"loss": 0.3506,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 1.0851063829787235e-05,
"loss": 0.3647,
"step": 51
},
{
"epoch": 0.02,
"learning_rate": 1.1063829787234042e-05,
"loss": 0.3821,
"step": 52
},
{
"epoch": 0.02,
"learning_rate": 1.1276595744680851e-05,
"loss": 0.3962,
"step": 53
},
{
"epoch": 0.02,
"learning_rate": 1.148936170212766e-05,
"loss": 0.4277,
"step": 54
},
{
"epoch": 0.02,
"learning_rate": 1.170212765957447e-05,
"loss": 0.344,
"step": 55
},
{
"epoch": 0.02,
"learning_rate": 1.1914893617021278e-05,
"loss": 0.3481,
"step": 56
},
{
"epoch": 0.02,
"learning_rate": 1.2127659574468086e-05,
"loss": 0.3727,
"step": 57
},
{
"epoch": 0.02,
"learning_rate": 1.2340425531914895e-05,
"loss": 0.3648,
"step": 58
},
{
"epoch": 0.02,
"learning_rate": 1.2553191489361702e-05,
"loss": 0.3576,
"step": 59
},
{
"epoch": 0.02,
"learning_rate": 1.2765957446808511e-05,
"loss": 0.4458,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 1.2978723404255318e-05,
"loss": 0.3467,
"step": 61
},
{
"epoch": 0.02,
"learning_rate": 1.3191489361702127e-05,
"loss": 0.3488,
"step": 62
},
{
"epoch": 0.02,
"learning_rate": 1.3404255319148936e-05,
"loss": 0.3589,
"step": 63
},
{
"epoch": 0.02,
"learning_rate": 1.3617021276595745e-05,
"loss": 0.3507,
"step": 64
},
{
"epoch": 0.02,
"learning_rate": 1.3829787234042554e-05,
"loss": 0.3901,
"step": 65
},
{
"epoch": 0.02,
"learning_rate": 1.4042553191489363e-05,
"loss": 0.3814,
"step": 66
},
{
"epoch": 0.02,
"learning_rate": 1.4255319148936172e-05,
"loss": 0.3728,
"step": 67
},
{
"epoch": 0.02,
"learning_rate": 1.4468085106382978e-05,
"loss": 0.3987,
"step": 68
},
{
"epoch": 0.02,
"learning_rate": 1.4680851063829787e-05,
"loss": 0.3301,
"step": 69
},
{
"epoch": 0.02,
"learning_rate": 1.4893617021276596e-05,
"loss": 0.3474,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 1.5106382978723405e-05,
"loss": 0.4003,
"step": 71
},
{
"epoch": 0.02,
"learning_rate": 1.5319148936170214e-05,
"loss": 0.4107,
"step": 72
},
{
"epoch": 0.02,
"learning_rate": 1.5531914893617023e-05,
"loss": 0.3792,
"step": 73
},
{
"epoch": 0.03,
"learning_rate": 1.5744680851063832e-05,
"loss": 0.3952,
"step": 74
},
{
"epoch": 0.03,
"learning_rate": 1.595744680851064e-05,
"loss": 0.4233,
"step": 75
},
{
"epoch": 0.03,
"learning_rate": 1.617021276595745e-05,
"loss": 0.3341,
"step": 76
},
{
"epoch": 0.03,
"learning_rate": 1.6382978723404255e-05,
"loss": 0.3294,
"step": 77
},
{
"epoch": 0.03,
"learning_rate": 1.6595744680851064e-05,
"loss": 0.3623,
"step": 78
},
{
"epoch": 0.03,
"learning_rate": 1.6808510638297873e-05,
"loss": 0.3712,
"step": 79
},
{
"epoch": 0.03,
"learning_rate": 1.7021276595744682e-05,
"loss": 0.3895,
"step": 80
},
{
"epoch": 0.03,
"learning_rate": 1.723404255319149e-05,
"loss": 0.3816,
"step": 81
},
{
"epoch": 0.03,
"learning_rate": 1.74468085106383e-05,
"loss": 0.384,
"step": 82
},
{
"epoch": 0.03,
"learning_rate": 1.7659574468085106e-05,
"loss": 0.3701,
"step": 83
},
{
"epoch": 0.03,
"learning_rate": 1.7872340425531915e-05,
"loss": 0.3453,
"step": 84
},
{
"epoch": 0.03,
"learning_rate": 1.8085106382978724e-05,
"loss": 0.3908,
"step": 85
},
{
"epoch": 0.03,
"learning_rate": 1.8297872340425533e-05,
"loss": 0.3829,
"step": 86
},
{
"epoch": 0.03,
"learning_rate": 1.851063829787234e-05,
"loss": 0.4038,
"step": 87
},
{
"epoch": 0.03,
"learning_rate": 1.8723404255319148e-05,
"loss": 0.4345,
"step": 88
},
{
"epoch": 0.03,
"learning_rate": 1.8936170212765957e-05,
"loss": 0.3702,
"step": 89
},
{
"epoch": 0.03,
"learning_rate": 1.9148936170212766e-05,
"loss": 0.4068,
"step": 90
},
{
"epoch": 0.03,
"learning_rate": 1.9361702127659575e-05,
"loss": 0.386,
"step": 91
},
{
"epoch": 0.03,
"learning_rate": 1.9574468085106384e-05,
"loss": 0.3929,
"step": 92
},
{
"epoch": 0.03,
"learning_rate": 1.9787234042553193e-05,
"loss": 0.3897,
"step": 93
},
{
"epoch": 0.03,
"learning_rate": 2e-05,
"loss": 0.3529,
"step": 94
},
{
"epoch": 0.03,
"learning_rate": 2.0212765957446807e-05,
"loss": 0.3884,
"step": 95
},
{
"epoch": 0.03,
"learning_rate": 2.0425531914893616e-05,
"loss": 0.391,
"step": 96
},
{
"epoch": 0.03,
"learning_rate": 2.0638297872340425e-05,
"loss": 0.4112,
"step": 97
},
{
"epoch": 0.03,
"learning_rate": 2.0851063829787234e-05,
"loss": 0.4045,
"step": 98
},
{
"epoch": 0.03,
"learning_rate": 2.1063829787234043e-05,
"loss": 0.3663,
"step": 99
},
{
"epoch": 0.03,
"learning_rate": 2.1276595744680852e-05,
"loss": 0.3779,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 2.148936170212766e-05,
"loss": 0.4283,
"step": 101
},
{
"epoch": 0.03,
"learning_rate": 2.170212765957447e-05,
"loss": 0.3949,
"step": 102
},
{
"epoch": 0.04,
"learning_rate": 2.191489361702128e-05,
"loss": 0.3788,
"step": 103
},
{
"epoch": 0.04,
"learning_rate": 2.2127659574468085e-05,
"loss": 0.3533,
"step": 104
},
{
"epoch": 0.04,
"learning_rate": 2.2340425531914894e-05,
"loss": 0.3784,
"step": 105
},
{
"epoch": 0.04,
"learning_rate": 2.2553191489361703e-05,
"loss": 0.375,
"step": 106
},
{
"epoch": 0.04,
"learning_rate": 2.2765957446808512e-05,
"loss": 0.3847,
"step": 107
},
{
"epoch": 0.04,
"learning_rate": 2.297872340425532e-05,
"loss": 0.3854,
"step": 108
},
{
"epoch": 0.04,
"learning_rate": 2.319148936170213e-05,
"loss": 0.3837,
"step": 109
},
{
"epoch": 0.04,
"learning_rate": 2.340425531914894e-05,
"loss": 0.3928,
"step": 110
},
{
"epoch": 0.04,
"learning_rate": 2.3617021276595748e-05,
"loss": 0.3398,
"step": 111
},
{
"epoch": 0.04,
"learning_rate": 2.3829787234042557e-05,
"loss": 0.3558,
"step": 112
},
{
"epoch": 0.04,
"learning_rate": 2.4042553191489362e-05,
"loss": 0.3972,
"step": 113
},
{
"epoch": 0.04,
"learning_rate": 2.425531914893617e-05,
"loss": 0.4176,
"step": 114
},
{
"epoch": 0.04,
"learning_rate": 2.446808510638298e-05,
"loss": 0.4012,
"step": 115
},
{
"epoch": 0.04,
"learning_rate": 2.468085106382979e-05,
"loss": 0.3635,
"step": 116
},
{
"epoch": 0.04,
"learning_rate": 2.48936170212766e-05,
"loss": 0.3842,
"step": 117
},
{
"epoch": 0.04,
"learning_rate": 2.5106382978723404e-05,
"loss": 0.3543,
"step": 118
},
{
"epoch": 0.04,
"learning_rate": 2.5319148936170213e-05,
"loss": 0.3933,
"step": 119
},
{
"epoch": 0.04,
"learning_rate": 2.5531914893617022e-05,
"loss": 0.4029,
"step": 120
},
{
"epoch": 0.04,
"learning_rate": 2.574468085106383e-05,
"loss": 0.3621,
"step": 121
},
{
"epoch": 0.04,
"learning_rate": 2.5957446808510637e-05,
"loss": 0.3908,
"step": 122
},
{
"epoch": 0.04,
"learning_rate": 2.6170212765957446e-05,
"loss": 0.37,
"step": 123
},
{
"epoch": 0.04,
"learning_rate": 2.6382978723404255e-05,
"loss": 0.3784,
"step": 124
},
{
"epoch": 0.04,
"learning_rate": 2.6595744680851064e-05,
"loss": 0.327,
"step": 125
},
{
"epoch": 0.04,
"learning_rate": 2.6808510638297873e-05,
"loss": 0.3395,
"step": 126
},
{
"epoch": 0.04,
"learning_rate": 2.702127659574468e-05,
"loss": 0.3359,
"step": 127
},
{
"epoch": 0.04,
"learning_rate": 2.723404255319149e-05,
"loss": 0.3664,
"step": 128
},
{
"epoch": 0.04,
"learning_rate": 2.74468085106383e-05,
"loss": 0.4259,
"step": 129
},
{
"epoch": 0.04,
"learning_rate": 2.765957446808511e-05,
"loss": 0.3788,
"step": 130
},
{
"epoch": 0.04,
"learning_rate": 2.7872340425531918e-05,
"loss": 0.3661,
"step": 131
},
{
"epoch": 0.05,
"learning_rate": 2.8085106382978727e-05,
"loss": 0.3618,
"step": 132
},
{
"epoch": 0.05,
"learning_rate": 2.8297872340425536e-05,
"loss": 0.3453,
"step": 133
},
{
"epoch": 0.05,
"learning_rate": 2.8510638297872345e-05,
"loss": 0.4179,
"step": 134
},
{
"epoch": 0.05,
"learning_rate": 2.8723404255319154e-05,
"loss": 0.3756,
"step": 135
},
{
"epoch": 0.05,
"learning_rate": 2.8936170212765956e-05,
"loss": 0.4077,
"step": 136
},
{
"epoch": 0.05,
"learning_rate": 2.9148936170212765e-05,
"loss": 0.4112,
"step": 137
},
{
"epoch": 0.05,
"learning_rate": 2.9361702127659574e-05,
"loss": 0.3537,
"step": 138
},
{
"epoch": 0.05,
"learning_rate": 2.9574468085106383e-05,
"loss": 0.3797,
"step": 139
},
{
"epoch": 0.05,
"learning_rate": 2.9787234042553192e-05,
"loss": 0.3603,
"step": 140
},
{
"epoch": 0.05,
"learning_rate": 3e-05,
"loss": 0.3871,
"step": 141
},
{
"epoch": 0.05,
"learning_rate": 3.021276595744681e-05,
"loss": 0.419,
"step": 142
},
{
"epoch": 0.05,
"learning_rate": 3.042553191489362e-05,
"loss": 0.3761,
"step": 143
},
{
"epoch": 0.05,
"learning_rate": 3.063829787234043e-05,
"loss": 0.3628,
"step": 144
},
{
"epoch": 0.05,
"learning_rate": 3.085106382978723e-05,
"loss": 0.3537,
"step": 145
},
{
"epoch": 0.05,
"learning_rate": 3.1063829787234046e-05,
"loss": 0.3989,
"step": 146
},
{
"epoch": 0.05,
"learning_rate": 3.127659574468085e-05,
"loss": 0.3834,
"step": 147
},
{
"epoch": 0.05,
"learning_rate": 3.1489361702127664e-05,
"loss": 0.3811,
"step": 148
},
{
"epoch": 0.05,
"learning_rate": 3.170212765957447e-05,
"loss": 0.4016,
"step": 149
},
{
"epoch": 0.05,
"learning_rate": 3.191489361702128e-05,
"loss": 0.4284,
"step": 150
},
{
"epoch": 0.05,
"learning_rate": 3.212765957446809e-05,
"loss": 0.3395,
"step": 151
},
{
"epoch": 0.05,
"learning_rate": 3.23404255319149e-05,
"loss": 0.3949,
"step": 152
},
{
"epoch": 0.05,
"learning_rate": 3.2553191489361705e-05,
"loss": 0.4735,
"step": 153
},
{
"epoch": 0.05,
"learning_rate": 3.276595744680851e-05,
"loss": 0.3722,
"step": 154
},
{
"epoch": 0.05,
"learning_rate": 3.2978723404255317e-05,
"loss": 0.3791,
"step": 155
},
{
"epoch": 0.05,
"learning_rate": 3.319148936170213e-05,
"loss": 0.4151,
"step": 156
},
{
"epoch": 0.05,
"learning_rate": 3.3404255319148935e-05,
"loss": 0.3837,
"step": 157
},
{
"epoch": 0.05,
"learning_rate": 3.361702127659575e-05,
"loss": 0.34,
"step": 158
},
{
"epoch": 0.05,
"learning_rate": 3.382978723404255e-05,
"loss": 0.4021,
"step": 159
},
{
"epoch": 0.05,
"learning_rate": 3.4042553191489365e-05,
"loss": 0.4003,
"step": 160
},
{
"epoch": 0.05,
"learning_rate": 3.425531914893617e-05,
"loss": 0.3571,
"step": 161
},
{
"epoch": 0.06,
"learning_rate": 3.446808510638298e-05,
"loss": 0.3964,
"step": 162
},
{
"epoch": 0.06,
"learning_rate": 3.468085106382979e-05,
"loss": 0.3846,
"step": 163
},
{
"epoch": 0.06,
"learning_rate": 3.48936170212766e-05,
"loss": 0.3738,
"step": 164
},
{
"epoch": 0.06,
"learning_rate": 3.5106382978723407e-05,
"loss": 0.3824,
"step": 165
},
{
"epoch": 0.06,
"learning_rate": 3.531914893617021e-05,
"loss": 0.3824,
"step": 166
},
{
"epoch": 0.06,
"learning_rate": 3.5531914893617025e-05,
"loss": 0.4301,
"step": 167
},
{
"epoch": 0.06,
"learning_rate": 3.574468085106383e-05,
"loss": 0.3413,
"step": 168
},
{
"epoch": 0.06,
"learning_rate": 3.595744680851064e-05,
"loss": 0.3921,
"step": 169
},
{
"epoch": 0.06,
"learning_rate": 3.617021276595745e-05,
"loss": 0.4033,
"step": 170
},
{
"epoch": 0.06,
"learning_rate": 3.638297872340426e-05,
"loss": 0.4013,
"step": 171
},
{
"epoch": 0.06,
"learning_rate": 3.6595744680851066e-05,
"loss": 0.3857,
"step": 172
},
{
"epoch": 0.06,
"learning_rate": 3.680851063829787e-05,
"loss": 0.3854,
"step": 173
},
{
"epoch": 0.06,
"learning_rate": 3.702127659574468e-05,
"loss": 0.3864,
"step": 174
},
{
"epoch": 0.06,
"learning_rate": 3.723404255319149e-05,
"loss": 0.346,
"step": 175
},
{
"epoch": 0.06,
"learning_rate": 3.7446808510638295e-05,
"loss": 0.3573,
"step": 176
},
{
"epoch": 0.06,
"learning_rate": 3.765957446808511e-05,
"loss": 0.3976,
"step": 177
},
{
"epoch": 0.06,
"learning_rate": 3.787234042553191e-05,
"loss": 0.4101,
"step": 178
},
{
"epoch": 0.06,
"learning_rate": 3.8085106382978726e-05,
"loss": 0.4003,
"step": 179
},
{
"epoch": 0.06,
"learning_rate": 3.829787234042553e-05,
"loss": 0.3662,
"step": 180
},
{
"epoch": 0.06,
"learning_rate": 3.8510638297872344e-05,
"loss": 0.3493,
"step": 181
},
{
"epoch": 0.06,
"learning_rate": 3.872340425531915e-05,
"loss": 0.3725,
"step": 182
},
{
"epoch": 0.06,
"learning_rate": 3.893617021276596e-05,
"loss": 0.6858,
"step": 183
},
{
"epoch": 0.06,
"learning_rate": 3.914893617021277e-05,
"loss": 0.3415,
"step": 184
},
{
"epoch": 0.06,
"learning_rate": 3.936170212765958e-05,
"loss": 0.3658,
"step": 185
},
{
"epoch": 0.06,
"learning_rate": 3.9574468085106385e-05,
"loss": 0.3586,
"step": 186
},
{
"epoch": 0.06,
"learning_rate": 3.97872340425532e-05,
"loss": 0.4025,
"step": 187
},
{
"epoch": 0.06,
"learning_rate": 4e-05,
"loss": 0.3759,
"step": 188
},
{
"epoch": 0.06,
"learning_rate": 4.0212765957446816e-05,
"loss": 0.4257,
"step": 189
},
{
"epoch": 0.06,
"learning_rate": 4.0425531914893614e-05,
"loss": 0.4015,
"step": 190
},
{
"epoch": 0.07,
"learning_rate": 4.063829787234043e-05,
"loss": 0.4202,
"step": 191
},
{
"epoch": 0.07,
"learning_rate": 4.085106382978723e-05,
"loss": 0.3657,
"step": 192
},
{
"epoch": 0.07,
"learning_rate": 4.1063829787234045e-05,
"loss": 0.4086,
"step": 193
},
{
"epoch": 0.07,
"learning_rate": 4.127659574468085e-05,
"loss": 0.3942,
"step": 194
},
{
"epoch": 0.07,
"learning_rate": 4.148936170212766e-05,
"loss": 0.4417,
"step": 195
},
{
"epoch": 0.07,
"learning_rate": 4.170212765957447e-05,
"loss": 0.3837,
"step": 196
},
{
"epoch": 0.07,
"learning_rate": 4.191489361702128e-05,
"loss": 0.3539,
"step": 197
},
{
"epoch": 0.07,
"learning_rate": 4.2127659574468086e-05,
"loss": 0.4167,
"step": 198
},
{
"epoch": 0.07,
"learning_rate": 4.23404255319149e-05,
"loss": 0.4009,
"step": 199
},
{
"epoch": 0.07,
"learning_rate": 4.2553191489361704e-05,
"loss": 0.3925,
"step": 200
},
{
"epoch": 0.07,
"learning_rate": 4.276595744680851e-05,
"loss": 0.4209,
"step": 201
},
{
"epoch": 0.07,
"learning_rate": 4.297872340425532e-05,
"loss": 0.3469,
"step": 202
},
{
"epoch": 0.07,
"learning_rate": 4.319148936170213e-05,
"loss": 0.3514,
"step": 203
},
{
"epoch": 0.07,
"learning_rate": 4.340425531914894e-05,
"loss": 0.3768,
"step": 204
},
{
"epoch": 0.07,
"learning_rate": 4.3617021276595746e-05,
"loss": 0.3745,
"step": 205
},
{
"epoch": 0.07,
"learning_rate": 4.382978723404256e-05,
"loss": 0.4612,
"step": 206
},
{
"epoch": 0.07,
"learning_rate": 4.4042553191489364e-05,
"loss": 0.3555,
"step": 207
},
{
"epoch": 0.07,
"learning_rate": 4.425531914893617e-05,
"loss": 0.4696,
"step": 208
},
{
"epoch": 0.07,
"learning_rate": 4.4468085106382975e-05,
"loss": 0.3571,
"step": 209
},
{
"epoch": 0.07,
"learning_rate": 4.468085106382979e-05,
"loss": 0.4076,
"step": 210
},
{
"epoch": 0.07,
"learning_rate": 4.489361702127659e-05,
"loss": 0.4056,
"step": 211
},
{
"epoch": 0.07,
"learning_rate": 4.5106382978723406e-05,
"loss": 0.3665,
"step": 212
},
{
"epoch": 0.07,
"learning_rate": 4.531914893617021e-05,
"loss": 0.4442,
"step": 213
},
{
"epoch": 0.07,
"learning_rate": 4.5531914893617024e-05,
"loss": 0.3823,
"step": 214
},
{
"epoch": 0.07,
"learning_rate": 4.574468085106383e-05,
"loss": 0.3484,
"step": 215
},
{
"epoch": 0.07,
"learning_rate": 4.595744680851064e-05,
"loss": 0.3711,
"step": 216
},
{
"epoch": 0.07,
"learning_rate": 4.617021276595745e-05,
"loss": 0.355,
"step": 217
},
{
"epoch": 0.07,
"learning_rate": 4.638297872340426e-05,
"loss": 0.4111,
"step": 218
},
{
"epoch": 0.07,
"learning_rate": 4.6595744680851065e-05,
"loss": 0.3628,
"step": 219
},
{
"epoch": 0.08,
"learning_rate": 4.680851063829788e-05,
"loss": 0.4505,
"step": 220
},
{
"epoch": 0.08,
"learning_rate": 4.702127659574468e-05,
"loss": 0.3718,
"step": 221
},
{
"epoch": 0.08,
"learning_rate": 4.7234042553191496e-05,
"loss": 0.4159,
"step": 222
},
{
"epoch": 0.08,
"learning_rate": 4.74468085106383e-05,
"loss": 0.3452,
"step": 223
},
{
"epoch": 0.08,
"learning_rate": 4.7659574468085114e-05,
"loss": 0.4174,
"step": 224
},
{
"epoch": 0.08,
"learning_rate": 4.787234042553192e-05,
"loss": 0.4268,
"step": 225
},
{
"epoch": 0.08,
"learning_rate": 4.8085106382978725e-05,
"loss": 0.438,
"step": 226
},
{
"epoch": 0.08,
"learning_rate": 4.829787234042553e-05,
"loss": 0.4401,
"step": 227
},
{
"epoch": 0.08,
"learning_rate": 4.851063829787234e-05,
"loss": 0.384,
"step": 228
},
{
"epoch": 0.08,
"learning_rate": 4.872340425531915e-05,
"loss": 0.3921,
"step": 229
},
{
"epoch": 0.08,
"learning_rate": 4.893617021276596e-05,
"loss": 0.3706,
"step": 230
},
{
"epoch": 0.08,
"learning_rate": 4.9148936170212766e-05,
"loss": 0.4154,
"step": 231
},
{
"epoch": 0.08,
"learning_rate": 4.936170212765958e-05,
"loss": 0.3774,
"step": 232
},
{
"epoch": 0.08,
"learning_rate": 4.9574468085106384e-05,
"loss": 0.4357,
"step": 233
},
{
"epoch": 0.08,
"learning_rate": 4.97872340425532e-05,
"loss": 0.3946,
"step": 234
},
{
"epoch": 0.08,
"learning_rate": 5e-05,
"loss": 0.3855,
"step": 235
},
{
"epoch": 0.08,
"learning_rate": 4.999999610366835e-05,
"loss": 0.3769,
"step": 236
},
{
"epoch": 0.08,
"learning_rate": 4.9999984414674593e-05,
"loss": 0.3621,
"step": 237
},
{
"epoch": 0.08,
"learning_rate": 4.9999964933022394e-05,
"loss": 0.379,
"step": 238
},
{
"epoch": 0.08,
"learning_rate": 4.999993765871782e-05,
"loss": 0.3987,
"step": 239
},
{
"epoch": 0.08,
"learning_rate": 4.999990259176936e-05,
"loss": 0.4201,
"step": 240
},
{
"epoch": 0.08,
"learning_rate": 4.999985973218796e-05,
"loss": 0.4762,
"step": 241
},
{
"epoch": 0.08,
"learning_rate": 4.999980907998697e-05,
"loss": 0.4307,
"step": 242
},
{
"epoch": 0.08,
"learning_rate": 4.999975063518218e-05,
"loss": 0.4027,
"step": 243
},
{
"epoch": 0.08,
"learning_rate": 4.999968439779181e-05,
"loss": 0.3858,
"step": 244
},
{
"epoch": 0.08,
"learning_rate": 4.99996103678365e-05,
"loss": 0.5104,
"step": 245
},
{
"epoch": 0.08,
"learning_rate": 4.9999528545339345e-05,
"loss": 0.3792,
"step": 246
},
{
"epoch": 0.08,
"learning_rate": 4.999943893032583e-05,
"loss": 0.412,
"step": 247
},
{
"epoch": 0.08,
"learning_rate": 4.99993415228239e-05,
"loss": 0.4597,
"step": 248
},
{
"epoch": 0.08,
"learning_rate": 4.999923632286389e-05,
"loss": 0.413,
"step": 249
},
{
"epoch": 0.09,
"learning_rate": 4.9999123330478636e-05,
"loss": 0.4112,
"step": 250
},
{
"epoch": 0.09,
"learning_rate": 4.9999002545703333e-05,
"loss": 0.3509,
"step": 251
},
{
"epoch": 0.09,
"learning_rate": 4.9998873968575636e-05,
"loss": 0.4245,
"step": 252
},
{
"epoch": 0.09,
"learning_rate": 4.999873759913561e-05,
"loss": 0.3676,
"step": 253
},
{
"epoch": 0.09,
"learning_rate": 4.999859343742578e-05,
"loss": 0.4154,
"step": 254
},
{
"epoch": 0.09,
"learning_rate": 4.999844148349108e-05,
"loss": 0.3947,
"step": 255
},
{
"epoch": 0.09,
"learning_rate": 4.999828173737886e-05,
"loss": 0.3872,
"step": 256
},
{
"epoch": 0.09,
"learning_rate": 4.9998114199138926e-05,
"loss": 0.3985,
"step": 257
},
{
"epoch": 0.09,
"learning_rate": 4.99979388688235e-05,
"loss": 0.4101,
"step": 258
},
{
"epoch": 0.09,
"learning_rate": 4.999775574648724e-05,
"loss": 0.4349,
"step": 259
},
{
"epoch": 0.09,
"learning_rate": 4.9997564832187216e-05,
"loss": 0.3921,
"step": 260
},
{
"epoch": 0.09,
"learning_rate": 4.9997366125982934e-05,
"loss": 0.4459,
"step": 261
},
{
"epoch": 0.09,
"learning_rate": 4.999715962793634e-05,
"loss": 0.4204,
"step": 262
},
{
"epoch": 0.09,
"learning_rate": 4.999694533811181e-05,
"loss": 0.4179,
"step": 263
},
{
"epoch": 0.09,
"learning_rate": 4.999672325657612e-05,
"loss": 0.4148,
"step": 264
},
{
"epoch": 0.09,
"learning_rate": 4.99964933833985e-05,
"loss": 0.463,
"step": 265
},
{
"epoch": 0.09,
"learning_rate": 4.999625571865061e-05,
"loss": 0.3729,
"step": 266
},
{
"epoch": 0.09,
"learning_rate": 4.9996010262406526e-05,
"loss": 0.4077,
"step": 267
},
{
"epoch": 0.09,
"learning_rate": 4.999575701474276e-05,
"loss": 0.3674,
"step": 268
},
{
"epoch": 0.09,
"learning_rate": 4.999549597573825e-05,
"loss": 0.4214,
"step": 269
},
{
"epoch": 0.09,
"learning_rate": 4.9995227145474364e-05,
"loss": 0.4176,
"step": 270
},
{
"epoch": 0.09,
"learning_rate": 4.9994950524034905e-05,
"loss": 0.3789,
"step": 271
},
{
"epoch": 0.09,
"learning_rate": 4.999466611150609e-05,
"loss": 0.3644,
"step": 272
},
{
"epoch": 0.09,
"learning_rate": 4.9994373907976564e-05,
"loss": 0.3681,
"step": 273
},
{
"epoch": 0.09,
"learning_rate": 4.999407391353743e-05,
"loss": 0.4424,
"step": 274
},
{
"epoch": 0.09,
"learning_rate": 4.999376612828218e-05,
"loss": 0.4185,
"step": 275
},
{
"epoch": 0.09,
"learning_rate": 4.999345055230677e-05,
"loss": 0.3608,
"step": 276
},
{
"epoch": 0.09,
"learning_rate": 4.9993127185709536e-05,
"loss": 0.4107,
"step": 277
},
{
"epoch": 0.09,
"learning_rate": 4.99927960285913e-05,
"loss": 0.4221,
"step": 278
},
{
"epoch": 0.1,
"learning_rate": 4.999245708105529e-05,
"loss": 0.4372,
"step": 279
},
{
"epoch": 0.1,
"learning_rate": 4.9992110343207145e-05,
"loss": 0.389,
"step": 280
},
{
"epoch": 0.1,
"learning_rate": 4.999175581515494e-05,
"loss": 0.3931,
"step": 281
},
{
"epoch": 0.1,
"learning_rate": 4.99913934970092e-05,
"loss": 0.3498,
"step": 282
},
{
"epoch": 0.1,
"learning_rate": 4.9991023388882854e-05,
"loss": 0.4115,
"step": 283
},
{
"epoch": 0.1,
"learning_rate": 4.999064549089126e-05,
"loss": 0.3949,
"step": 284
},
{
"epoch": 0.1,
"learning_rate": 4.9990259803152225e-05,
"loss": 0.382,
"step": 285
},
{
"epoch": 0.1,
"learning_rate": 4.998986632578596e-05,
"loss": 0.416,
"step": 286
},
{
"epoch": 0.1,
"learning_rate": 4.9989465058915116e-05,
"loss": 0.4084,
"step": 287
},
{
"epoch": 0.1,
"learning_rate": 4.998905600266478e-05,
"loss": 0.4012,
"step": 288
},
{
"epoch": 0.1,
"learning_rate": 4.998863915716244e-05,
"loss": 0.4464,
"step": 289
},
{
"epoch": 0.1,
"learning_rate": 4.998821452253805e-05,
"loss": 0.3788,
"step": 290
},
{
"epoch": 0.1,
"learning_rate": 4.9987782098923955e-05,
"loss": 0.4288,
"step": 291
},
{
"epoch": 0.1,
"learning_rate": 4.9987341886454945e-05,
"loss": 0.4141,
"step": 292
},
{
"epoch": 0.1,
"learning_rate": 4.9986893885268247e-05,
"loss": 0.4026,
"step": 293
},
{
"epoch": 0.1,
"learning_rate": 4.9986438095503504e-05,
"loss": 0.4128,
"step": 294
},
{
"epoch": 0.1,
"learning_rate": 4.998597451730279e-05,
"loss": 0.3682,
"step": 295
},
{
"epoch": 0.1,
"learning_rate": 4.998550315081059e-05,
"loss": 0.3991,
"step": 296
},
{
"epoch": 0.1,
"learning_rate": 4.998502399617385e-05,
"loss": 0.4281,
"step": 297
},
{
"epoch": 0.1,
"learning_rate": 4.998453705354192e-05,
"loss": 0.429,
"step": 298
},
{
"epoch": 0.1,
"learning_rate": 4.998404232306658e-05,
"loss": 0.4002,
"step": 299
},
{
"epoch": 0.1,
"learning_rate": 4.9983539804902044e-05,
"loss": 0.4029,
"step": 300
},
{
"epoch": 0.1,
"learning_rate": 4.998302949920495e-05,
"loss": 0.4472,
"step": 301
},
{
"epoch": 0.1,
"learning_rate": 4.9982511406134356e-05,
"loss": 0.3662,
"step": 302
},
{
"epoch": 0.1,
"learning_rate": 4.9981985525851765e-05,
"loss": 0.3915,
"step": 303
},
{
"epoch": 0.1,
"learning_rate": 4.9981451858521094e-05,
"loss": 0.3335,
"step": 304
},
{
"epoch": 0.1,
"learning_rate": 4.998091040430869e-05,
"loss": 0.4067,
"step": 305
},
{
"epoch": 0.1,
"learning_rate": 4.998036116338333e-05,
"loss": 0.3585,
"step": 306
},
{
"epoch": 0.1,
"learning_rate": 4.997980413591622e-05,
"loss": 0.3905,
"step": 307
},
{
"epoch": 0.11,
"learning_rate": 4.9979239322080976e-05,
"loss": 0.4419,
"step": 308
},
{
"epoch": 0.11,
"learning_rate": 4.997866672205366e-05,
"loss": 0.4358,
"step": 309
},
{
"epoch": 0.11,
"learning_rate": 4.9978086336012756e-05,
"loss": 0.3869,
"step": 310
},
{
"epoch": 0.11,
"learning_rate": 4.997749816413918e-05,
"loss": 0.3583,
"step": 311
},
{
"epoch": 0.11,
"learning_rate": 4.9976902206616264e-05,
"loss": 0.4075,
"step": 312
},
{
"epoch": 0.11,
"learning_rate": 4.9976298463629764e-05,
"loss": 0.3696,
"step": 313
},
{
"epoch": 0.11,
"learning_rate": 4.997568693536789e-05,
"loss": 0.402,
"step": 314
},
{
"epoch": 0.11,
"learning_rate": 4.9975067622021245e-05,
"loss": 0.3933,
"step": 315
},
{
"epoch": 0.11,
"learning_rate": 4.997444052378287e-05,
"loss": 0.3914,
"step": 316
},
{
"epoch": 0.11,
"learning_rate": 4.997380564084824e-05,
"loss": 0.3559,
"step": 317
},
{
"epoch": 0.11,
"learning_rate": 4.997316297341526e-05,
"loss": 0.4058,
"step": 318
},
{
"epoch": 0.11,
"learning_rate": 4.997251252168425e-05,
"loss": 0.3793,
"step": 319
},
{
"epoch": 0.11,
"learning_rate": 4.997185428585794e-05,
"loss": 0.3783,
"step": 320
},
{
"epoch": 0.11,
"learning_rate": 4.997118826614155e-05,
"loss": 0.4229,
"step": 321
},
{
"epoch": 0.11,
"learning_rate": 4.9970514462742634e-05,
"loss": 0.3879,
"step": 322
},
{
"epoch": 0.11,
"learning_rate": 4.9969832875871246e-05,
"loss": 0.3654,
"step": 323
},
{
"epoch": 0.11,
"learning_rate": 4.9969143505739846e-05,
"loss": 0.3828,
"step": 324
},
{
"epoch": 0.11,
"learning_rate": 4.99684463525633e-05,
"loss": 0.3588,
"step": 325
},
{
"epoch": 0.11,
"learning_rate": 4.9967741416558936e-05,
"loss": 0.4131,
"step": 326
},
{
"epoch": 0.11,
"learning_rate": 4.996702869794646e-05,
"loss": 0.4336,
"step": 327
},
{
"epoch": 0.11,
"learning_rate": 4.9966308196948054e-05,
"loss": 0.4052,
"step": 328
},
{
"epoch": 0.11,
"learning_rate": 4.996557991378828e-05,
"loss": 0.4317,
"step": 329
},
{
"epoch": 0.11,
"learning_rate": 4.996484384869418e-05,
"loss": 0.3655,
"step": 330
},
{
"epoch": 0.11,
"learning_rate": 4.996410000189516e-05,
"loss": 0.4058,
"step": 331
},
{
"epoch": 0.11,
"learning_rate": 4.9963348373623104e-05,
"loss": 0.4011,
"step": 332
},
{
"epoch": 0.11,
"learning_rate": 4.996258896411229e-05,
"loss": 0.3735,
"step": 333
},
{
"epoch": 0.11,
"learning_rate": 4.996182177359942e-05,
"loss": 0.3731,
"step": 334
},
{
"epoch": 0.11,
"learning_rate": 4.996104680232365e-05,
"loss": 0.4436,
"step": 335
},
{
"epoch": 0.11,
"learning_rate": 4.996026405052654e-05,
"loss": 0.3761,
"step": 336
},
{
"epoch": 0.11,
"learning_rate": 4.995947351845207e-05,
"loss": 0.3377,
"step": 337
},
{
"epoch": 0.12,
"learning_rate": 4.9958675206346663e-05,
"loss": 0.3533,
"step": 338
},
{
"epoch": 0.12,
"learning_rate": 4.9957869114459156e-05,
"loss": 0.3534,
"step": 339
},
{
"epoch": 0.12,
"learning_rate": 4.9957055243040803e-05,
"loss": 0.4177,
"step": 340
},
{
"epoch": 0.12,
"learning_rate": 4.995623359234531e-05,
"loss": 0.4364,
"step": 341
},
{
"epoch": 0.12,
"learning_rate": 4.9955404162628785e-05,
"loss": 0.3784,
"step": 342
},
{
"epoch": 0.12,
"learning_rate": 4.9954566954149755e-05,
"loss": 0.4643,
"step": 343
},
{
"epoch": 0.12,
"learning_rate": 4.9953721967169195e-05,
"loss": 0.3432,
"step": 344
},
{
"epoch": 0.12,
"learning_rate": 4.99528692019505e-05,
"loss": 0.3372,
"step": 345
},
{
"epoch": 0.12,
"learning_rate": 4.9952008658759465e-05,
"loss": 0.4094,
"step": 346
},
{
"epoch": 0.12,
"learning_rate": 4.995114033786433e-05,
"loss": 0.3813,
"step": 347
},
{
"epoch": 0.12,
"learning_rate": 4.995026423953577e-05,
"loss": 0.3743,
"step": 348
},
{
"epoch": 0.12,
"learning_rate": 4.9949380364046865e-05,
"loss": 0.3592,
"step": 349
},
{
"epoch": 0.12,
"learning_rate": 4.9948488711673117e-05,
"loss": 0.3958,
"step": 350
},
{
"epoch": 0.12,
"learning_rate": 4.994758928269246e-05,
"loss": 0.3748,
"step": 351
},
{
"epoch": 0.12,
"learning_rate": 4.994668207738527e-05,
"loss": 0.3795,
"step": 352
},
{
"epoch": 0.12,
"learning_rate": 4.994576709603431e-05,
"loss": 0.3871,
"step": 353
},
{
"epoch": 0.12,
"learning_rate": 4.994484433892479e-05,
"loss": 0.3671,
"step": 354
},
{
"epoch": 0.12,
"learning_rate": 4.9943913806344334e-05,
"loss": 0.3587,
"step": 355
},
{
"epoch": 0.12,
"learning_rate": 4.9942975498583004e-05,
"loss": 0.3794,
"step": 356
},
{
"epoch": 0.12,
"learning_rate": 4.9942029415933276e-05,
"loss": 0.3394,
"step": 357
},
{
"epoch": 0.12,
"learning_rate": 4.994107555869005e-05,
"loss": 0.369,
"step": 358
},
{
"epoch": 0.12,
"learning_rate": 4.994011392715065e-05,
"loss": 0.3716,
"step": 359
},
{
"epoch": 0.12,
"learning_rate": 4.993914452161482e-05,
"loss": 0.4321,
"step": 360
},
{
"epoch": 0.12,
"learning_rate": 4.993816734238472e-05,
"loss": 0.4041,
"step": 361
},
{
"epoch": 0.12,
"learning_rate": 4.9937182389764966e-05,
"loss": 0.3966,
"step": 362
},
{
"epoch": 0.12,
"learning_rate": 4.993618966406256e-05,
"loss": 0.3735,
"step": 363
},
{
"epoch": 0.12,
"learning_rate": 4.9935189165586944e-05,
"loss": 0.3554,
"step": 364
},
{
"epoch": 0.12,
"learning_rate": 4.9934180894649974e-05,
"loss": 0.4128,
"step": 365
},
{
"epoch": 0.12,
"learning_rate": 4.993316485156594e-05,
"loss": 0.3662,
"step": 366
},
{
"epoch": 0.13,
"learning_rate": 4.993214103665156e-05,
"loss": 0.3913,
"step": 367
},
{
"epoch": 0.13,
"learning_rate": 4.993110945022594e-05,
"loss": 0.3825,
"step": 368
},
{
"epoch": 0.13,
"learning_rate": 4.993007009261065e-05,
"loss": 0.3352,
"step": 369
},
{
"epoch": 0.13,
"learning_rate": 4.992902296412966e-05,
"loss": 0.4566,
"step": 370
},
{
"epoch": 0.13,
"learning_rate": 4.992796806510936e-05,
"loss": 0.4114,
"step": 371
},
{
"epoch": 0.13,
"learning_rate": 4.9926905395878586e-05,
"loss": 0.4181,
"step": 372
},
{
"epoch": 0.13,
"learning_rate": 4.992583495676856e-05,
"loss": 0.4337,
"step": 373
},
{
"epoch": 0.13,
"learning_rate": 4.9924756748112956e-05,
"loss": 0.4143,
"step": 374
},
{
"epoch": 0.13,
"learning_rate": 4.9923670770247854e-05,
"loss": 0.3993,
"step": 375
},
{
"epoch": 0.13,
"learning_rate": 4.9922577023511763e-05,
"loss": 0.3964,
"step": 376
},
{
"epoch": 0.13,
"learning_rate": 4.992147550824561e-05,
"loss": 0.3912,
"step": 377
},
{
"epoch": 0.13,
"learning_rate": 4.992036622479275e-05,
"loss": 0.3543,
"step": 378
},
{
"epoch": 0.13,
"learning_rate": 4.9919249173498947e-05,
"loss": 0.4007,
"step": 379
},
{
"epoch": 0.13,
"learning_rate": 4.991812435471239e-05,
"loss": 0.3811,
"step": 380
},
{
"epoch": 0.13,
"learning_rate": 4.99169917687837e-05,
"loss": 0.3924,
"step": 381
},
{
"epoch": 0.13,
"learning_rate": 4.9915851416065904e-05,
"loss": 0.3991,
"step": 382
},
{
"epoch": 0.13,
"learning_rate": 4.9914703296914464e-05,
"loss": 0.4415,
"step": 383
},
{
"epoch": 0.13,
"learning_rate": 4.9913547411687256e-05,
"loss": 0.4219,
"step": 384
},
{
"epoch": 0.13,
"learning_rate": 4.9912383760744576e-05,
"loss": 0.3864,
"step": 385
},
{
"epoch": 0.13,
"learning_rate": 4.991121234444914e-05,
"loss": 0.3748,
"step": 386
},
{
"epoch": 0.13,
"learning_rate": 4.991003316316609e-05,
"loss": 0.3996,
"step": 387
},
{
"epoch": 0.13,
"learning_rate": 4.990884621726297e-05,
"loss": 0.4588,
"step": 388
},
{
"epoch": 0.13,
"learning_rate": 4.9907651507109785e-05,
"loss": 0.3765,
"step": 389
},
{
"epoch": 0.13,
"learning_rate": 4.9906449033078915e-05,
"loss": 0.3757,
"step": 390
},
{
"epoch": 0.13,
"learning_rate": 4.990523879554517e-05,
"loss": 0.4079,
"step": 391
},
{
"epoch": 0.13,
"learning_rate": 4.990402079488582e-05,
"loss": 0.3938,
"step": 392
},
{
"epoch": 0.13,
"learning_rate": 4.99027950314805e-05,
"loss": 0.3372,
"step": 393
},
{
"epoch": 0.13,
"learning_rate": 4.9901561505711294e-05,
"loss": 0.4265,
"step": 394
},
{
"epoch": 0.13,
"learning_rate": 4.9900320217962694e-05,
"loss": 0.3787,
"step": 395
},
{
"epoch": 0.14,
"learning_rate": 4.9899071168621626e-05,
"loss": 0.3681,
"step": 396
},
{
"epoch": 0.14,
"learning_rate": 4.989781435807743e-05,
"loss": 0.3762,
"step": 397
},
{
"epoch": 0.14,
"learning_rate": 4.989654978672185e-05,
"loss": 0.4188,
"step": 398
},
{
"epoch": 0.14,
"learning_rate": 4.9895277454949074e-05,
"loss": 0.3678,
"step": 399
},
{
"epoch": 0.14,
"learning_rate": 4.9893997363155685e-05,
"loss": 0.3314,
"step": 400
},
{
"epoch": 0.14,
"learning_rate": 4.989270951174071e-05,
"loss": 0.4553,
"step": 401
},
{
"epoch": 0.14,
"learning_rate": 4.989141390110557e-05,
"loss": 0.382,
"step": 402
},
{
"epoch": 0.14,
"learning_rate": 4.989011053165411e-05,
"loss": 0.3993,
"step": 403
},
{
"epoch": 0.14,
"learning_rate": 4.9888799403792606e-05,
"loss": 0.4016,
"step": 404
},
{
"epoch": 0.14,
"learning_rate": 4.9887480517929746e-05,
"loss": 0.3329,
"step": 405
},
{
"epoch": 0.14,
"learning_rate": 4.988615387447664e-05,
"loss": 0.3603,
"step": 406
},
{
"epoch": 0.14,
"learning_rate": 4.98848194738468e-05,
"loss": 0.4414,
"step": 407
},
{
"epoch": 0.14,
"learning_rate": 4.988347731645617e-05,
"loss": 0.393,
"step": 408
},
{
"epoch": 0.14,
"learning_rate": 4.988212740272312e-05,
"loss": 0.3875,
"step": 409
},
{
"epoch": 0.14,
"learning_rate": 4.988076973306841e-05,
"loss": 0.3937,
"step": 410
},
{
"epoch": 0.14,
"learning_rate": 4.987940430791525e-05,
"loss": 0.3987,
"step": 411
},
{
"epoch": 0.14,
"learning_rate": 4.987803112768925e-05,
"loss": 0.4119,
"step": 412
},
{
"epoch": 0.14,
"learning_rate": 4.987665019281843e-05,
"loss": 0.4284,
"step": 413
},
{
"epoch": 0.14,
"learning_rate": 4.987526150373324e-05,
"loss": 0.3574,
"step": 414
},
{
"epoch": 0.14,
"learning_rate": 4.987386506086654e-05,
"loss": 0.3979,
"step": 415
},
{
"epoch": 0.14,
"learning_rate": 4.987246086465362e-05,
"loss": 0.3805,
"step": 416
},
{
"epoch": 0.14,
"learning_rate": 4.987104891553218e-05,
"loss": 0.4038,
"step": 417
},
{
"epoch": 0.14,
"learning_rate": 4.9869629213942315e-05,
"loss": 0.3897,
"step": 418
},
{
"epoch": 0.14,
"learning_rate": 4.986820176032657e-05,
"loss": 0.3942,
"step": 419
},
{
"epoch": 0.14,
"learning_rate": 4.98667665551299e-05,
"loss": 0.399,
"step": 420
},
{
"epoch": 0.14,
"learning_rate": 4.986532359879964e-05,
"loss": 0.3391,
"step": 421
},
{
"epoch": 0.14,
"learning_rate": 4.9863872891785586e-05,
"loss": 0.4044,
"step": 422
},
{
"epoch": 0.14,
"learning_rate": 4.9862414434539934e-05,
"loss": 0.3521,
"step": 423
},
{
"epoch": 0.14,
"learning_rate": 4.986094822751729e-05,
"loss": 0.3755,
"step": 424
},
{
"epoch": 0.15,
"learning_rate": 4.9859474271174686e-05,
"loss": 0.4062,
"step": 425
},
{
"epoch": 0.15,
"learning_rate": 4.985799256597155e-05,
"loss": 0.3783,
"step": 426
},
{
"epoch": 0.15,
"learning_rate": 4.985650311236977e-05,
"loss": 0.3782,
"step": 427
},
{
"epoch": 0.15,
"learning_rate": 4.9855005910833586e-05,
"loss": 0.3578,
"step": 428
},
{
"epoch": 0.15,
"learning_rate": 4.9853500961829696e-05,
"loss": 0.3793,
"step": 429
},
{
"epoch": 0.15,
"learning_rate": 4.985198826582721e-05,
"loss": 0.3509,
"step": 430
},
{
"epoch": 0.15,
"learning_rate": 4.985046782329763e-05,
"loss": 0.3299,
"step": 431
},
{
"epoch": 0.15,
"learning_rate": 4.9848939634714906e-05,
"loss": 0.3669,
"step": 432
},
{
"epoch": 0.15,
"learning_rate": 4.9847403700555365e-05,
"loss": 0.411,
"step": 433
},
{
"epoch": 0.15,
"learning_rate": 4.984586002129778e-05,
"loss": 0.4475,
"step": 434
},
{
"epoch": 0.15,
"learning_rate": 4.984430859742333e-05,
"loss": 0.3813,
"step": 435
},
{
"epoch": 0.15,
"learning_rate": 4.98427494294156e-05,
"loss": 0.3752,
"step": 436
},
{
"epoch": 0.15,
"learning_rate": 4.984118251776059e-05,
"loss": 0.3297,
"step": 437
},
{
"epoch": 0.15,
"learning_rate": 4.983960786294671e-05,
"loss": 0.3372,
"step": 438
},
{
"epoch": 0.15,
"learning_rate": 4.98380254654648e-05,
"loss": 0.3675,
"step": 439
},
{
"epoch": 0.15,
"learning_rate": 4.9836435325808105e-05,
"loss": 0.3573,
"step": 440
},
{
"epoch": 0.15,
"learning_rate": 4.983483744447227e-05,
"loss": 0.3827,
"step": 441
},
{
"epoch": 0.15,
"learning_rate": 4.9833231821955386e-05,
"loss": 0.3423,
"step": 442
},
{
"epoch": 0.15,
"learning_rate": 4.983161845875791e-05,
"loss": 0.3774,
"step": 443
},
{
"epoch": 0.15,
"learning_rate": 4.9829997355382764e-05,
"loss": 0.3463,
"step": 444
},
{
"epoch": 0.15,
"learning_rate": 4.9828368512335235e-05,
"loss": 0.3846,
"step": 445
},
{
"epoch": 0.15,
"learning_rate": 4.982673193012305e-05,
"loss": 0.3923,
"step": 446
},
{
"epoch": 0.15,
"learning_rate": 4.982508760925635e-05,
"loss": 0.3957,
"step": 447
},
{
"epoch": 0.15,
"learning_rate": 4.982343555024767e-05,
"loss": 0.3925,
"step": 448
},
{
"epoch": 0.15,
"learning_rate": 4.982177575361198e-05,
"loss": 0.4243,
"step": 449
},
{
"epoch": 0.15,
"learning_rate": 4.982010821986664e-05,
"loss": 0.361,
"step": 450
},
{
"epoch": 0.15,
"learning_rate": 4.981843294953143e-05,
"loss": 0.394,
"step": 451
},
{
"epoch": 0.15,
"learning_rate": 4.981674994312855e-05,
"loss": 0.3591,
"step": 452
},
{
"epoch": 0.15,
"learning_rate": 4.98150592011826e-05,
"loss": 0.3955,
"step": 453
},
{
"epoch": 0.15,
"learning_rate": 4.9813360724220594e-05,
"loss": 0.3892,
"step": 454
},
{
"epoch": 0.16,
"learning_rate": 4.9811654512771964e-05,
"loss": 0.3604,
"step": 455
},
{
"epoch": 0.16,
"learning_rate": 4.980994056736854e-05,
"loss": 0.3896,
"step": 456
},
{
"epoch": 0.16,
"learning_rate": 4.980821888854458e-05,
"loss": 0.3824,
"step": 457
},
{
"epoch": 0.16,
"learning_rate": 4.9806489476836725e-05,
"loss": 0.4012,
"step": 458
},
{
"epoch": 0.16,
"learning_rate": 4.980475233278406e-05,
"loss": 0.369,
"step": 459
},
{
"epoch": 0.16,
"learning_rate": 4.980300745692806e-05,
"loss": 0.4059,
"step": 460
},
{
"epoch": 0.16,
"learning_rate": 4.9801254849812606e-05,
"loss": 0.4166,
"step": 461
},
{
"epoch": 0.16,
"learning_rate": 4.979949451198401e-05,
"loss": 0.3894,
"step": 462
},
{
"epoch": 0.16,
"learning_rate": 4.979772644399098e-05,
"loss": 0.3954,
"step": 463
},
{
"epoch": 0.16,
"learning_rate": 4.9795950646384624e-05,
"loss": 0.334,
"step": 464
},
{
"epoch": 0.16,
"learning_rate": 4.979416711971848e-05,
"loss": 0.4614,
"step": 465
},
{
"epoch": 0.16,
"learning_rate": 4.979237586454847e-05,
"loss": 0.3654,
"step": 466
},
{
"epoch": 0.16,
"learning_rate": 4.979057688143296e-05,
"loss": 0.4303,
"step": 467
},
{
"epoch": 0.16,
"learning_rate": 4.978877017093268e-05,
"loss": 0.3988,
"step": 468
},
{
"epoch": 0.16,
"learning_rate": 4.978695573361083e-05,
"loss": 0.4,
"step": 469
},
{
"epoch": 0.16,
"learning_rate": 4.978513357003294e-05,
"loss": 0.4016,
"step": 470
},
{
"epoch": 0.16,
"learning_rate": 4.9783303680767024e-05,
"loss": 0.4093,
"step": 471
},
{
"epoch": 0.16,
"learning_rate": 4.9781466066383454e-05,
"loss": 0.3822,
"step": 472
},
{
"epoch": 0.16,
"learning_rate": 4.9779620727455024e-05,
"loss": 0.3547,
"step": 473
},
{
"epoch": 0.16,
"learning_rate": 4.977776766455695e-05,
"loss": 0.3476,
"step": 474
},
{
"epoch": 0.16,
"learning_rate": 4.977590687826683e-05,
"loss": 0.3939,
"step": 475
},
{
"epoch": 0.16,
"learning_rate": 4.97740383691647e-05,
"loss": 0.5032,
"step": 476
},
{
"epoch": 0.16,
"learning_rate": 4.977216213783297e-05,
"loss": 0.3959,
"step": 477
},
{
"epoch": 0.16,
"learning_rate": 4.977027818485648e-05,
"loss": 0.3767,
"step": 478
},
{
"epoch": 0.16,
"learning_rate": 4.9768386510822475e-05,
"loss": 0.4035,
"step": 479
},
{
"epoch": 0.16,
"learning_rate": 4.976648711632059e-05,
"loss": 0.3627,
"step": 480
},
{
"epoch": 0.16,
"learning_rate": 4.97645800019429e-05,
"loss": 0.3727,
"step": 481
},
{
"epoch": 0.16,
"learning_rate": 4.976266516828384e-05,
"loss": 0.4056,
"step": 482
},
{
"epoch": 0.16,
"learning_rate": 4.976074261594029e-05,
"loss": 0.4138,
"step": 483
},
{
"epoch": 0.17,
"learning_rate": 4.975881234551153e-05,
"loss": 0.3884,
"step": 484
},
{
"epoch": 0.17,
"learning_rate": 4.975687435759922e-05,
"loss": 0.4172,
"step": 485
},
{
"epoch": 0.17,
"learning_rate": 4.9754928652807456e-05,
"loss": 0.4045,
"step": 486
},
{
"epoch": 0.17,
"learning_rate": 4.975297523174271e-05,
"loss": 0.3713,
"step": 487
},
{
"epoch": 0.17,
"learning_rate": 4.9751014095013905e-05,
"loss": 0.3751,
"step": 488
},
{
"epoch": 0.17,
"learning_rate": 4.974904524323232e-05,
"loss": 0.4276,
"step": 489
},
{
"epoch": 0.17,
"learning_rate": 4.974706867701165e-05,
"loss": 0.4027,
"step": 490
},
{
"epoch": 0.17,
"learning_rate": 4.974508439696803e-05,
"loss": 0.3919,
"step": 491
},
{
"epoch": 0.17,
"learning_rate": 4.974309240371996e-05,
"loss": 0.3336,
"step": 492
},
{
"epoch": 0.17,
"learning_rate": 4.974109269788835e-05,
"loss": 0.3937,
"step": 493
},
{
"epoch": 0.17,
"learning_rate": 4.973908528009653e-05,
"loss": 0.4244,
"step": 494
},
{
"epoch": 0.17,
"learning_rate": 4.973707015097022e-05,
"loss": 0.3609,
"step": 495
},
{
"epoch": 0.17,
"learning_rate": 4.9735047311137556e-05,
"loss": 0.3636,
"step": 496
},
{
"epoch": 0.17,
"learning_rate": 4.973301676122907e-05,
"loss": 0.4057,
"step": 497
},
{
"epoch": 0.17,
"learning_rate": 4.973097850187768e-05,
"loss": 0.4317,
"step": 498
},
{
"epoch": 0.17,
"learning_rate": 4.972893253371875e-05,
"loss": 0.3745,
"step": 499
},
{
"epoch": 0.17,
"learning_rate": 4.972687885739001e-05,
"loss": 0.3917,
"step": 500
},
{
"epoch": 0.17,
"learning_rate": 4.97248174735316e-05,
"loss": 0.3655,
"step": 501
},
{
"epoch": 0.17,
"learning_rate": 4.9722748382786077e-05,
"loss": 0.4415,
"step": 502
},
{
"epoch": 0.17,
"learning_rate": 4.9720671585798385e-05,
"loss": 0.3916,
"step": 503
},
{
"epoch": 0.17,
"learning_rate": 4.971858708321587e-05,
"loss": 0.358,
"step": 504
},
{
"epoch": 0.17,
"learning_rate": 4.97164948756883e-05,
"loss": 0.3632,
"step": 505
},
{
"epoch": 0.17,
"learning_rate": 4.971439496386782e-05,
"loss": 0.3323,
"step": 506
},
{
"epoch": 0.17,
"learning_rate": 4.971228734840897e-05,
"loss": 0.3689,
"step": 507
},
{
"epoch": 0.17,
"learning_rate": 4.971017202996874e-05,
"loss": 0.3824,
"step": 508
},
{
"epoch": 0.17,
"learning_rate": 4.970804900920646e-05,
"loss": 0.3987,
"step": 509
},
{
"epoch": 0.17,
"learning_rate": 4.9705918286783906e-05,
"loss": 0.3871,
"step": 510
},
{
"epoch": 0.17,
"learning_rate": 4.970377986336524e-05,
"loss": 0.3855,
"step": 511
},
{
"epoch": 0.17,
"learning_rate": 4.9701633739617e-05,
"loss": 0.3836,
"step": 512
},
{
"epoch": 0.18,
"learning_rate": 4.969947991620818e-05,
"loss": 0.3967,
"step": 513
},
{
"epoch": 0.18,
"learning_rate": 4.969731839381012e-05,
"loss": 0.4485,
"step": 514
},
{
"epoch": 0.18,
"learning_rate": 4.9695149173096575e-05,
"loss": 0.3904,
"step": 515
},
{
"epoch": 0.18,
"learning_rate": 4.969297225474373e-05,
"loss": 0.3631,
"step": 516
},
{
"epoch": 0.18,
"learning_rate": 4.969078763943012e-05,
"loss": 0.3748,
"step": 517
},
{
"epoch": 0.18,
"learning_rate": 4.968859532783672e-05,
"loss": 0.3873,
"step": 518
},
{
"epoch": 0.18,
"learning_rate": 4.9686395320646876e-05,
"loss": 0.4058,
"step": 519
},
{
"epoch": 0.18,
"learning_rate": 4.9684187618546344e-05,
"loss": 0.3774,
"step": 520
},
{
"epoch": 0.18,
"learning_rate": 4.9681972222223295e-05,
"loss": 0.4021,
"step": 521
},
{
"epoch": 0.18,
"learning_rate": 4.967974913236826e-05,
"loss": 0.3549,
"step": 522
},
{
"epoch": 0.18,
"learning_rate": 4.967751834967421e-05,
"loss": 0.377,
"step": 523
},
{
"epoch": 0.18,
"learning_rate": 4.9675279874836497e-05,
"loss": 0.3911,
"step": 524
},
{
"epoch": 0.18,
"learning_rate": 4.967303370855285e-05,
"loss": 0.3561,
"step": 525
},
{
"epoch": 0.18,
"learning_rate": 4.9670779851523416e-05,
"loss": 0.3493,
"step": 526
},
{
"epoch": 0.18,
"learning_rate": 4.9668518304450756e-05,
"loss": 0.3994,
"step": 527
},
{
"epoch": 0.18,
"learning_rate": 4.9666249068039795e-05,
"loss": 0.3487,
"step": 528
},
{
"epoch": 0.18,
"learning_rate": 4.966397214299786e-05,
"loss": 0.3662,
"step": 529
},
{
"epoch": 0.18,
"learning_rate": 4.9661687530034706e-05,
"loss": 0.3485,
"step": 530
},
{
"epoch": 0.18,
"learning_rate": 4.9659395229862435e-05,
"loss": 0.4107,
"step": 531
},
{
"epoch": 0.18,
"learning_rate": 4.96570952431956e-05,
"loss": 0.3997,
"step": 532
},
{
"epoch": 0.18,
"learning_rate": 4.9654787570751104e-05,
"loss": 0.3654,
"step": 533
},
{
"epoch": 0.18,
"learning_rate": 4.965247221324827e-05,
"loss": 0.3678,
"step": 534
},
{
"epoch": 0.18,
"learning_rate": 4.96501491714088e-05,
"loss": 0.4096,
"step": 535
},
{
"epoch": 0.18,
"learning_rate": 4.9647818445956814e-05,
"loss": 0.3694,
"step": 536
},
{
"epoch": 0.18,
"learning_rate": 4.9645480037618816e-05,
"loss": 0.4129,
"step": 537
},
{
"epoch": 0.18,
"learning_rate": 4.964313394712369e-05,
"loss": 0.3938,
"step": 538
},
{
"epoch": 0.18,
"learning_rate": 4.9640780175202734e-05,
"loss": 0.4155,
"step": 539
},
{
"epoch": 0.18,
"learning_rate": 4.9638418722589636e-05,
"loss": 0.395,
"step": 540
},
{
"epoch": 0.18,
"learning_rate": 4.9636049590020475e-05,
"loss": 0.3851,
"step": 541
},
{
"epoch": 0.18,
"learning_rate": 4.963367277823373e-05,
"loss": 0.3599,
"step": 542
},
{
"epoch": 0.19,
"learning_rate": 4.963128828797026e-05,
"loss": 0.4015,
"step": 543
},
{
"epoch": 0.19,
"learning_rate": 4.9628896119973324e-05,
"loss": 0.3984,
"step": 544
},
{
"epoch": 0.19,
"learning_rate": 4.962649627498859e-05,
"loss": 0.3386,
"step": 545
},
{
"epoch": 0.19,
"learning_rate": 4.962408875376409e-05,
"loss": 0.3961,
"step": 546
},
{
"epoch": 0.19,
"learning_rate": 4.9621673557050275e-05,
"loss": 0.3297,
"step": 547
},
{
"epoch": 0.19,
"learning_rate": 4.961925068559997e-05,
"loss": 0.4011,
"step": 548
},
{
"epoch": 0.19,
"learning_rate": 4.961682014016841e-05,
"loss": 0.3785,
"step": 549
},
{
"epoch": 0.19,
"learning_rate": 4.961438192151321e-05,
"loss": 0.3827,
"step": 550
},
{
"epoch": 0.19,
"learning_rate": 4.9611936030394365e-05,
"loss": 0.4081,
"step": 551
},
{
"epoch": 0.19,
"learning_rate": 4.9609482467574293e-05,
"loss": 0.4064,
"step": 552
},
{
"epoch": 0.19,
"learning_rate": 4.960702123381777e-05,
"loss": 0.3486,
"step": 553
},
{
"epoch": 0.19,
"learning_rate": 4.9604552329891996e-05,
"loss": 0.3769,
"step": 554
},
{
"epoch": 0.19,
"learning_rate": 4.9602075756566524e-05,
"loss": 0.355,
"step": 555
},
{
"epoch": 0.19,
"learning_rate": 4.9599591514613336e-05,
"loss": 0.4216,
"step": 556
},
{
"epoch": 0.19,
"learning_rate": 4.9597099604806777e-05,
"loss": 0.3567,
"step": 557
},
{
"epoch": 0.19,
"learning_rate": 4.9594600027923594e-05,
"loss": 0.3474,
"step": 558
},
{
"epoch": 0.19,
"learning_rate": 4.9592092784742924e-05,
"loss": 0.3664,
"step": 559
},
{
"epoch": 0.19,
"learning_rate": 4.958957787604628e-05,
"loss": 0.3708,
"step": 560
},
{
"epoch": 0.19,
"learning_rate": 4.958705530261759e-05,
"loss": 0.358,
"step": 561
},
{
"epoch": 0.19,
"learning_rate": 4.958452506524315e-05,
"loss": 0.3594,
"step": 562
},
{
"epoch": 0.19,
"learning_rate": 4.9581987164711654e-05,
"loss": 0.3695,
"step": 563
},
{
"epoch": 0.19,
"learning_rate": 4.957944160181417e-05,
"loss": 0.4056,
"step": 564
},
{
"epoch": 0.19,
"learning_rate": 4.957688837734419e-05,
"loss": 0.4069,
"step": 565
},
{
"epoch": 0.19,
"learning_rate": 4.957432749209755e-05,
"loss": 0.3838,
"step": 566
},
{
"epoch": 0.19,
"learning_rate": 4.95717589468725e-05,
"loss": 0.3876,
"step": 567
},
{
"epoch": 0.19,
"learning_rate": 4.9569182742469675e-05,
"loss": 0.4115,
"step": 568
},
{
"epoch": 0.19,
"learning_rate": 4.9566598879692093e-05,
"loss": 0.3657,
"step": 569
},
{
"epoch": 0.19,
"learning_rate": 4.9564007359345164e-05,
"loss": 0.3544,
"step": 570
},
{
"epoch": 0.19,
"learning_rate": 4.956140818223669e-05,
"loss": 0.3775,
"step": 571
},
{
"epoch": 0.2,
"learning_rate": 4.955880134917683e-05,
"loss": 0.3944,
"step": 572
},
{
"epoch": 0.2,
"learning_rate": 4.955618686097816e-05,
"loss": 0.3606,
"step": 573
},
{
"epoch": 0.2,
"learning_rate": 4.9553564718455644e-05,
"loss": 0.3286,
"step": 574
},
{
"epoch": 0.2,
"learning_rate": 4.9550934922426604e-05,
"loss": 0.3551,
"step": 575
},
{
"epoch": 0.2,
"learning_rate": 4.954829747371078e-05,
"loss": 0.4077,
"step": 576
},
{
"epoch": 0.2,
"learning_rate": 4.954565237313027e-05,
"loss": 0.3858,
"step": 577
},
{
"epoch": 0.2,
"learning_rate": 4.954299962150958e-05,
"loss": 0.4152,
"step": 578
},
{
"epoch": 0.2,
"learning_rate": 4.954033921967557e-05,
"loss": 0.3483,
"step": 579
},
{
"epoch": 0.2,
"learning_rate": 4.953767116845753e-05,
"loss": 0.346,
"step": 580
},
{
"epoch": 0.2,
"learning_rate": 4.953499546868709e-05,
"loss": 0.3716,
"step": 581
},
{
"epoch": 0.2,
"learning_rate": 4.9532312121198305e-05,
"loss": 0.3548,
"step": 582
},
{
"epoch": 0.2,
"learning_rate": 4.952962112682756e-05,
"loss": 0.3629,
"step": 583
},
{
"epoch": 0.2,
"learning_rate": 4.9526922486413686e-05,
"loss": 0.3547,
"step": 584
},
{
"epoch": 0.2,
"learning_rate": 4.9524216200797854e-05,
"loss": 0.4244,
"step": 585
},
{
"epoch": 0.2,
"learning_rate": 4.952150227082363e-05,
"loss": 0.3777,
"step": 586
},
{
"epoch": 0.2,
"learning_rate": 4.951878069733695e-05,
"loss": 0.3613,
"step": 587
},
{
"epoch": 0.2,
"learning_rate": 4.9516051481186174e-05,
"loss": 0.3475,
"step": 588
},
{
"epoch": 0.2,
"learning_rate": 4.9513314623222005e-05,
"loss": 0.4175,
"step": 589
},
{
"epoch": 0.2,
"learning_rate": 4.951057012429753e-05,
"loss": 0.3181,
"step": 590
},
{
"epoch": 0.2,
"learning_rate": 4.950781798526824e-05,
"loss": 0.3633,
"step": 591
},
{
"epoch": 0.2,
"learning_rate": 4.950505820699199e-05,
"loss": 0.3479,
"step": 592
},
{
"epoch": 0.2,
"learning_rate": 4.950229079032902e-05,
"loss": 0.3652,
"step": 593
},
{
"epoch": 0.2,
"learning_rate": 4.949951573614195e-05,
"loss": 0.3398,
"step": 594
},
{
"epoch": 0.2,
"learning_rate": 4.9496733045295785e-05,
"loss": 0.3889,
"step": 595
},
{
"epoch": 0.2,
"learning_rate": 4.9493942718657906e-05,
"loss": 0.402,
"step": 596
},
{
"epoch": 0.2,
"learning_rate": 4.9491144757098084e-05,
"loss": 0.3798,
"step": 597
},
{
"epoch": 0.2,
"learning_rate": 4.9488339161488456e-05,
"loss": 0.4103,
"step": 598
},
{
"epoch": 0.2,
"learning_rate": 4.948552593270354e-05,
"loss": 0.3774,
"step": 599
},
{
"epoch": 0.2,
"learning_rate": 4.948270507162025e-05,
"loss": 0.3516,
"step": 600
},
{
"epoch": 0.21,
"learning_rate": 4.9479876579117844e-05,
"loss": 0.3636,
"step": 601
},
{
"epoch": 0.21,
"learning_rate": 4.9477040456078e-05,
"loss": 0.3594,
"step": 602
},
{
"epoch": 0.21,
"learning_rate": 4.947419670338477e-05,
"loss": 0.3563,
"step": 603
},
{
"epoch": 0.21,
"learning_rate": 4.947134532192454e-05,
"loss": 0.3645,
"step": 604
},
{
"epoch": 0.21,
"learning_rate": 4.9468486312586116e-05,
"loss": 0.3743,
"step": 605
},
{
"epoch": 0.21,
"learning_rate": 4.946561967626068e-05,
"loss": 0.3667,
"step": 606
},
{
"epoch": 0.21,
"learning_rate": 4.946274541384176e-05,
"loss": 0.3402,
"step": 607
},
{
"epoch": 0.21,
"learning_rate": 4.9459863526225306e-05,
"loss": 0.3823,
"step": 608
},
{
"epoch": 0.21,
"learning_rate": 4.94569740143096e-05,
"loss": 0.3551,
"step": 609
},
{
"epoch": 0.21,
"learning_rate": 4.9454076878995335e-05,
"loss": 0.4547,
"step": 610
},
{
"epoch": 0.21,
"learning_rate": 4.945117212118556e-05,
"loss": 0.3499,
"step": 611
},
{
"epoch": 0.21,
"learning_rate": 4.9448259741785715e-05,
"loss": 0.3629,
"step": 612
},
{
"epoch": 0.21,
"learning_rate": 4.9445339741703607e-05,
"loss": 0.4158,
"step": 613
},
{
"epoch": 0.21,
"learning_rate": 4.9442412121849415e-05,
"loss": 0.3956,
"step": 614
},
{
"epoch": 0.21,
"learning_rate": 4.9439476883135695e-05,
"loss": 0.4046,
"step": 615
},
{
"epoch": 0.21,
"learning_rate": 4.9436534026477376e-05,
"loss": 0.3945,
"step": 616
},
{
"epoch": 0.21,
"learning_rate": 4.943358355279178e-05,
"loss": 0.3489,
"step": 617
},
{
"epoch": 0.21,
"learning_rate": 4.943062546299858e-05,
"loss": 0.4001,
"step": 618
},
{
"epoch": 0.21,
"learning_rate": 4.942765975801983e-05,
"loss": 0.3662,
"step": 619
},
{
"epoch": 0.21,
"learning_rate": 4.942468643877996e-05,
"loss": 0.3467,
"step": 620
},
{
"epoch": 0.21,
"learning_rate": 4.942170550620579e-05,
"loss": 0.378,
"step": 621
},
{
"epoch": 0.21,
"learning_rate": 4.941871696122647e-05,
"loss": 0.3878,
"step": 622
},
{
"epoch": 0.21,
"learning_rate": 4.941572080477357e-05,
"loss": 0.3219,
"step": 623
},
{
"epoch": 0.21,
"learning_rate": 4.941271703778099e-05,
"loss": 0.3944,
"step": 624
},
{
"epoch": 0.21,
"learning_rate": 4.9409705661185044e-05,
"loss": 0.371,
"step": 625
},
{
"epoch": 0.21,
"learning_rate": 4.940668667592439e-05,
"loss": 0.3981,
"step": 626
},
{
"epoch": 0.21,
"learning_rate": 4.940366008294007e-05,
"loss": 0.3622,
"step": 627
},
{
"epoch": 0.21,
"learning_rate": 4.9400625883175475e-05,
"loss": 0.3077,
"step": 628
},
{
"epoch": 0.21,
"learning_rate": 4.9397584077576406e-05,
"loss": 0.4211,
"step": 629
},
{
"epoch": 0.21,
"learning_rate": 4.9394534667091005e-05,
"loss": 0.3031,
"step": 630
},
{
"epoch": 0.22,
"learning_rate": 4.9391477652669806e-05,
"loss": 0.3831,
"step": 631
},
{
"epoch": 0.22,
"learning_rate": 4.938841303526567e-05,
"loss": 0.337,
"step": 632
},
{
"epoch": 0.22,
"learning_rate": 4.9385340815833884e-05,
"loss": 0.3786,
"step": 633
},
{
"epoch": 0.22,
"learning_rate": 4.938226099533207e-05,
"loss": 0.3738,
"step": 634
},
{
"epoch": 0.22,
"learning_rate": 4.937917357472024e-05,
"loss": 0.3846,
"step": 635
},
{
"epoch": 0.22,
"learning_rate": 4.9376078554960743e-05,
"loss": 0.3593,
"step": 636
},
{
"epoch": 0.22,
"learning_rate": 4.937297593701833e-05,
"loss": 0.3559,
"step": 637
},
{
"epoch": 0.22,
"learning_rate": 4.93698657218601e-05,
"loss": 0.3503,
"step": 638
},
{
"epoch": 0.22,
"learning_rate": 4.936674791045553e-05,
"loss": 0.4037,
"step": 639
},
{
"epoch": 0.22,
"learning_rate": 4.936362250377647e-05,
"loss": 0.3999,
"step": 640
},
{
"epoch": 0.22,
"learning_rate": 4.936048950279712e-05,
"loss": 0.3718,
"step": 641
},
{
"epoch": 0.22,
"learning_rate": 4.935734890849406e-05,
"loss": 0.3298,
"step": 642
},
{
"epoch": 0.22,
"learning_rate": 4.935420072184623e-05,
"loss": 0.3825,
"step": 643
},
{
"epoch": 0.22,
"learning_rate": 4.935104494383495e-05,
"loss": 0.2997,
"step": 644
},
{
"epoch": 0.22,
"learning_rate": 4.934788157544389e-05,
"loss": 0.3334,
"step": 645
},
{
"epoch": 0.22,
"learning_rate": 4.93447106176591e-05,
"loss": 0.4027,
"step": 646
},
{
"epoch": 0.22,
"learning_rate": 4.934153207146896e-05,
"loss": 0.3983,
"step": 647
},
{
"epoch": 0.22,
"learning_rate": 4.9338345937864286e-05,
"loss": 0.346,
"step": 648
},
{
"epoch": 0.22,
"learning_rate": 4.9335152217838196e-05,
"loss": 0.3757,
"step": 649
},
{
"epoch": 0.22,
"learning_rate": 4.9331950912386185e-05,
"loss": 0.3489,
"step": 650
},
{
"epoch": 0.22,
"learning_rate": 4.932874202250613e-05,
"loss": 0.3501,
"step": 651
},
{
"epoch": 0.22,
"learning_rate": 4.932552554919826e-05,
"loss": 0.343,
"step": 652
},
{
"epoch": 0.22,
"learning_rate": 4.932230149346518e-05,
"loss": 0.3962,
"step": 653
},
{
"epoch": 0.22,
"learning_rate": 4.9319069856311836e-05,
"loss": 0.3881,
"step": 654
},
{
"epoch": 0.22,
"learning_rate": 4.931583063874556e-05,
"loss": 0.3951,
"step": 655
},
{
"epoch": 0.22,
"learning_rate": 4.931258384177603e-05,
"loss": 0.3189,
"step": 656
},
{
"epoch": 0.22,
"learning_rate": 4.93093294664153e-05,
"loss": 0.3676,
"step": 657
},
{
"epoch": 0.22,
"learning_rate": 4.9306067513677786e-05,
"loss": 0.4031,
"step": 658
},
{
"epoch": 0.22,
"learning_rate": 4.930279798458024e-05,
"loss": 0.4024,
"step": 659
},
{
"epoch": 0.23,
"learning_rate": 4.929952088014181e-05,
"loss": 0.3883,
"step": 660
},
{
"epoch": 0.23,
"learning_rate": 4.9296236201384e-05,
"loss": 0.3496,
"step": 661
},
{
"epoch": 0.23,
"learning_rate": 4.9292943949330635e-05,
"loss": 0.3396,
"step": 662
},
{
"epoch": 0.23,
"learning_rate": 4.928964412500796e-05,
"loss": 0.4452,
"step": 663
},
{
"epoch": 0.23,
"learning_rate": 4.9286336729444535e-05,
"loss": 0.3818,
"step": 664
},
{
"epoch": 0.23,
"learning_rate": 4.928302176367131e-05,
"loss": 0.3485,
"step": 665
},
{
"epoch": 0.23,
"learning_rate": 4.9279699228721575e-05,
"loss": 0.3812,
"step": 666
},
{
"epoch": 0.23,
"learning_rate": 4.927636912563099e-05,
"loss": 0.3793,
"step": 667
},
{
"epoch": 0.23,
"learning_rate": 4.927303145543756e-05,
"loss": 0.4071,
"step": 668
},
{
"epoch": 0.23,
"learning_rate": 4.926968621918166e-05,
"loss": 0.3339,
"step": 669
},
{
"epoch": 0.23,
"learning_rate": 4.926633341790604e-05,
"loss": 0.3846,
"step": 670
},
{
"epoch": 0.23,
"learning_rate": 4.926297305265577e-05,
"loss": 0.3915,
"step": 671
},
{
"epoch": 0.23,
"learning_rate": 4.925960512447831e-05,
"loss": 0.3103,
"step": 672
},
{
"epoch": 0.23,
"learning_rate": 4.925622963442346e-05,
"loss": 0.4936,
"step": 673
},
{
"epoch": 0.23,
"learning_rate": 4.925284658354337e-05,
"loss": 0.3785,
"step": 674
},
{
"epoch": 0.23,
"learning_rate": 4.924945597289258e-05,
"loss": 0.3524,
"step": 675
},
{
"epoch": 0.23,
"learning_rate": 4.924605780352796e-05,
"loss": 0.3386,
"step": 676
},
{
"epoch": 0.23,
"learning_rate": 4.9242652076508736e-05,
"loss": 0.3945,
"step": 677
},
{
"epoch": 0.23,
"learning_rate": 4.923923879289649e-05,
"loss": 0.3744,
"step": 678
},
{
"epoch": 0.23,
"learning_rate": 4.923581795375518e-05,
"loss": 0.4101,
"step": 679
},
{
"epoch": 0.23,
"learning_rate": 4.923238956015109e-05,
"loss": 0.3886,
"step": 680
},
{
"epoch": 0.23,
"learning_rate": 4.9228953613152884e-05,
"loss": 0.3462,
"step": 681
},
{
"epoch": 0.23,
"learning_rate": 4.9225510113831564e-05,
"loss": 0.3565,
"step": 682
},
{
"epoch": 0.23,
"learning_rate": 4.922205906326049e-05,
"loss": 0.3661,
"step": 683
},
{
"epoch": 0.23,
"learning_rate": 4.9218600462515385e-05,
"loss": 0.4009,
"step": 684
},
{
"epoch": 0.23,
"learning_rate": 4.921513431267431e-05,
"loss": 0.3459,
"step": 685
},
{
"epoch": 0.23,
"learning_rate": 4.921166061481768e-05,
"loss": 0.319,
"step": 686
},
{
"epoch": 0.23,
"learning_rate": 4.920817937002827e-05,
"loss": 0.3282,
"step": 687
},
{
"epoch": 0.23,
"learning_rate": 4.920469057939123e-05,
"loss": 0.3524,
"step": 688
},
{
"epoch": 0.24,
"learning_rate": 4.9201194243994016e-05,
"loss": 0.4298,
"step": 689
},
{
"epoch": 0.24,
"learning_rate": 4.9197690364926464e-05,
"loss": 0.2674,
"step": 690
},
{
"epoch": 0.24,
"learning_rate": 4.9194178943280756e-05,
"loss": 0.3873,
"step": 691
},
{
"epoch": 0.24,
"learning_rate": 4.919065998015142e-05,
"loss": 0.408,
"step": 692
},
{
"epoch": 0.24,
"learning_rate": 4.918713347663536e-05,
"loss": 0.3405,
"step": 693
},
{
"epoch": 0.24,
"learning_rate": 4.918359943383178e-05,
"loss": 0.3958,
"step": 694
},
{
"epoch": 0.24,
"learning_rate": 4.91800578528423e-05,
"loss": 0.4128,
"step": 695
},
{
"epoch": 0.24,
"learning_rate": 4.917650873477082e-05,
"loss": 0.4097,
"step": 696
},
{
"epoch": 0.24,
"learning_rate": 4.9172952080723636e-05,
"loss": 0.3834,
"step": 697
},
{
"epoch": 0.24,
"learning_rate": 4.916938789180939e-05,
"loss": 0.353,
"step": 698
},
{
"epoch": 0.24,
"learning_rate": 4.916581616913904e-05,
"loss": 0.3846,
"step": 699
},
{
"epoch": 0.24,
"learning_rate": 4.916223691382595e-05,
"loss": 0.4598,
"step": 700
},
{
"epoch": 0.24,
"learning_rate": 4.915865012698577e-05,
"loss": 0.347,
"step": 701
},
{
"epoch": 0.24,
"learning_rate": 4.9155055809736525e-05,
"loss": 0.3945,
"step": 702
},
{
"epoch": 0.24,
"learning_rate": 4.91514539631986e-05,
"loss": 0.3722,
"step": 703
},
{
"epoch": 0.24,
"learning_rate": 4.9147844588494705e-05,
"loss": 0.3973,
"step": 704
},
{
"epoch": 0.24,
"learning_rate": 4.914422768674991e-05,
"loss": 0.3956,
"step": 705
},
{
"epoch": 0.24,
"learning_rate": 4.9140603259091634e-05,
"loss": 0.3346,
"step": 706
},
{
"epoch": 0.24,
"learning_rate": 4.913697130664962e-05,
"loss": 0.3781,
"step": 707
},
{
"epoch": 0.24,
"learning_rate": 4.913333183055597e-05,
"loss": 0.334,
"step": 708
},
{
"epoch": 0.24,
"learning_rate": 4.912968483194516e-05,
"loss": 0.3562,
"step": 709
},
{
"epoch": 0.24,
"learning_rate": 4.912603031195394e-05,
"loss": 0.3799,
"step": 710
},
{
"epoch": 0.24,
"learning_rate": 4.912236827172148e-05,
"loss": 0.3756,
"step": 711
},
{
"epoch": 0.24,
"learning_rate": 4.911869871238926e-05,
"loss": 0.3766,
"step": 712
},
{
"epoch": 0.24,
"learning_rate": 4.91150216351011e-05,
"loss": 0.3937,
"step": 713
},
{
"epoch": 0.24,
"learning_rate": 4.9111337041003164e-05,
"loss": 0.3901,
"step": 714
},
{
"epoch": 0.24,
"learning_rate": 4.910764493124397e-05,
"loss": 0.3636,
"step": 715
},
{
"epoch": 0.24,
"learning_rate": 4.910394530697436e-05,
"loss": 0.4095,
"step": 716
},
{
"epoch": 0.24,
"learning_rate": 4.910023816934756e-05,
"loss": 0.4292,
"step": 717
},
{
"epoch": 0.24,
"learning_rate": 4.9096523519519076e-05,
"loss": 0.3662,
"step": 718
},
{
"epoch": 0.25,
"learning_rate": 4.909280135864681e-05,
"loss": 0.3515,
"step": 719
},
{
"epoch": 0.25,
"learning_rate": 4.908907168789097e-05,
"loss": 0.4288,
"step": 720
},
{
"epoch": 0.25,
"learning_rate": 4.908533450841414e-05,
"loss": 0.3634,
"step": 721
},
{
"epoch": 0.25,
"learning_rate": 4.90815898213812e-05,
"loss": 0.3911,
"step": 722
},
{
"epoch": 0.25,
"learning_rate": 4.90778376279594e-05,
"loss": 0.3591,
"step": 723
},
{
"epoch": 0.25,
"learning_rate": 4.907407792931833e-05,
"loss": 0.4248,
"step": 724
},
{
"epoch": 0.25,
"learning_rate": 4.907031072662991e-05,
"loss": 0.3144,
"step": 725
},
{
"epoch": 0.25,
"learning_rate": 4.90665360210684e-05,
"loss": 0.4093,
"step": 726
},
{
"epoch": 0.25,
"learning_rate": 4.9062753813810396e-05,
"loss": 0.3416,
"step": 727
},
{
"epoch": 0.25,
"learning_rate": 4.9058964106034844e-05,
"loss": 0.4166,
"step": 728
},
{
"epoch": 0.25,
"learning_rate": 4.9055166898923024e-05,
"loss": 0.3576,
"step": 729
},
{
"epoch": 0.25,
"learning_rate": 4.905136219365854e-05,
"loss": 0.3873,
"step": 730
},
{
"epoch": 0.25,
"learning_rate": 4.9047549991427347e-05,
"loss": 0.3951,
"step": 731
},
{
"epoch": 0.25,
"learning_rate": 4.904373029341773e-05,
"loss": 0.3795,
"step": 732
},
{
"epoch": 0.25,
"learning_rate": 4.903990310082032e-05,
"loss": 0.3793,
"step": 733
},
{
"epoch": 0.25,
"learning_rate": 4.903606841482808e-05,
"loss": 0.4235,
"step": 734
},
{
"epoch": 0.25,
"learning_rate": 4.90322262366363e-05,
"loss": 0.3502,
"step": 735
},
{
"epoch": 0.25,
"learning_rate": 4.902837656744261e-05,
"loss": 0.3657,
"step": 736
},
{
"epoch": 0.25,
"learning_rate": 4.902451940844699e-05,
"loss": 0.3653,
"step": 737
},
{
"epoch": 0.25,
"learning_rate": 4.9020654760851726e-05,
"loss": 0.4012,
"step": 738
},
{
"epoch": 0.25,
"learning_rate": 4.901678262586147e-05,
"loss": 0.34,
"step": 739
},
{
"epoch": 0.25,
"learning_rate": 4.901290300468317e-05,
"loss": 0.3407,
"step": 740
},
{
"epoch": 0.25,
"learning_rate": 4.900901589852615e-05,
"loss": 0.3598,
"step": 741
},
{
"epoch": 0.25,
"learning_rate": 4.900512130860204e-05,
"loss": 0.3478,
"step": 742
},
{
"epoch": 0.25,
"learning_rate": 4.9001219236124806e-05,
"loss": 0.3805,
"step": 743
},
{
"epoch": 0.25,
"learning_rate": 4.899730968231075e-05,
"loss": 0.3347,
"step": 744
},
{
"epoch": 0.25,
"learning_rate": 4.89933926483785e-05,
"loss": 0.3942,
"step": 745
},
{
"epoch": 0.25,
"learning_rate": 4.8989468135549044e-05,
"loss": 0.3769,
"step": 746
},
{
"epoch": 0.25,
"learning_rate": 4.898553614504565e-05,
"loss": 0.3734,
"step": 747
},
{
"epoch": 0.26,
"learning_rate": 4.898159667809396e-05,
"loss": 0.3821,
"step": 748
},
{
"epoch": 0.26,
"learning_rate": 4.897764973592193e-05,
"loss": 0.4227,
"step": 749
},
{
"epoch": 0.26,
"learning_rate": 4.8973695319759845e-05,
"loss": 0.3903,
"step": 750
},
{
"epoch": 0.26,
"learning_rate": 4.8969733430840324e-05,
"loss": 0.3854,
"step": 751
},
{
"epoch": 0.26,
"learning_rate": 4.896576407039831e-05,
"loss": 0.3495,
"step": 752
},
{
"epoch": 0.26,
"learning_rate": 4.896178723967108e-05,
"loss": 0.3402,
"step": 753
},
{
"epoch": 0.26,
"learning_rate": 4.895780293989825e-05,
"loss": 0.3594,
"step": 754
},
{
"epoch": 0.26,
"learning_rate": 4.895381117232174e-05,
"loss": 0.3063,
"step": 755
},
{
"epoch": 0.26,
"learning_rate": 4.894981193818581e-05,
"loss": 0.3899,
"step": 756
},
{
"epoch": 0.26,
"learning_rate": 4.8945805238737044e-05,
"loss": 0.3434,
"step": 757
},
{
"epoch": 0.26,
"learning_rate": 4.894179107522437e-05,
"loss": 0.3806,
"step": 758
},
{
"epoch": 0.26,
"learning_rate": 4.8937769448899015e-05,
"loss": 0.3507,
"step": 759
},
{
"epoch": 0.26,
"learning_rate": 4.8933740361014554e-05,
"loss": 0.4326,
"step": 760
},
{
"epoch": 0.26,
"learning_rate": 4.8929703812826885e-05,
"loss": 0.374,
"step": 761
},
{
"epoch": 0.26,
"learning_rate": 4.892565980559422e-05,
"loss": 0.3908,
"step": 762
},
{
"epoch": 0.26,
"learning_rate": 4.892160834057709e-05,
"loss": 0.3865,
"step": 763
},
{
"epoch": 0.26,
"learning_rate": 4.891754941903838e-05,
"loss": 0.3934,
"step": 764
},
{
"epoch": 0.26,
"learning_rate": 4.891348304224329e-05,
"loss": 0.3372,
"step": 765
},
{
"epoch": 0.26,
"learning_rate": 4.890940921145931e-05,
"loss": 0.3752,
"step": 766
},
{
"epoch": 0.26,
"learning_rate": 4.8905327927956294e-05,
"loss": 0.3598,
"step": 767
},
{
"epoch": 0.26,
"learning_rate": 4.890123919300641e-05,
"loss": 0.3502,
"step": 768
},
{
"epoch": 0.26,
"learning_rate": 4.8897143007884136e-05,
"loss": 0.4189,
"step": 769
},
{
"epoch": 0.26,
"learning_rate": 4.8893039373866276e-05,
"loss": 0.3706,
"step": 770
},
{
"epoch": 0.26,
"learning_rate": 4.888892829223197e-05,
"loss": 0.4151,
"step": 771
},
{
"epoch": 0.26,
"learning_rate": 4.888480976426267e-05,
"loss": 0.4136,
"step": 772
},
{
"epoch": 0.26,
"learning_rate": 4.8880683791242134e-05,
"loss": 0.3558,
"step": 773
},
{
"epoch": 0.26,
"learning_rate": 4.887655037445646e-05,
"loss": 0.3548,
"step": 774
},
{
"epoch": 0.26,
"learning_rate": 4.8872409515194076e-05,
"loss": 0.3529,
"step": 775
},
{
"epoch": 0.26,
"learning_rate": 4.886826121474569e-05,
"loss": 0.3886,
"step": 776
},
{
"epoch": 0.27,
"learning_rate": 4.886410547440437e-05,
"loss": 0.3881,
"step": 777
},
{
"epoch": 0.27,
"learning_rate": 4.8859942295465486e-05,
"loss": 0.3713,
"step": 778
},
{
"epoch": 0.27,
"learning_rate": 4.885577167922672e-05,
"loss": 0.3643,
"step": 779
},
{
"epoch": 0.27,
"learning_rate": 4.88515936269881e-05,
"loss": 0.3476,
"step": 780
},
{
"epoch": 0.27,
"learning_rate": 4.884740814005193e-05,
"loss": 0.4322,
"step": 781
},
{
"epoch": 0.27,
"learning_rate": 4.884321521972287e-05,
"loss": 0.3403,
"step": 782
},
{
"epoch": 0.27,
"learning_rate": 4.883901486730786e-05,
"loss": 0.3116,
"step": 783
},
{
"epoch": 0.27,
"learning_rate": 4.88348070841162e-05,
"loss": 0.3855,
"step": 784
},
{
"epoch": 0.27,
"learning_rate": 4.8830591871459475e-05,
"loss": 0.426,
"step": 785
},
{
"epoch": 0.27,
"learning_rate": 4.882636923065159e-05,
"loss": 0.389,
"step": 786
},
{
"epoch": 0.27,
"learning_rate": 4.8822139163008775e-05,
"loss": 0.3947,
"step": 787
},
{
"epoch": 0.27,
"learning_rate": 4.881790166984957e-05,
"loss": 0.3327,
"step": 788
},
{
"epoch": 0.27,
"learning_rate": 4.881365675249482e-05,
"loss": 0.3728,
"step": 789
},
{
"epoch": 0.27,
"learning_rate": 4.88094044122677e-05,
"loss": 0.3582,
"step": 790
},
{
"epoch": 0.27,
"learning_rate": 4.8805144650493695e-05,
"loss": 0.3289,
"step": 791
},
{
"epoch": 0.27,
"learning_rate": 4.8800877468500604e-05,
"loss": 0.3237,
"step": 792
},
{
"epoch": 0.27,
"learning_rate": 4.879660286761852e-05,
"loss": 0.3716,
"step": 793
},
{
"epoch": 0.27,
"learning_rate": 4.879232084917988e-05,
"loss": 0.4081,
"step": 794
},
{
"epoch": 0.27,
"learning_rate": 4.878803141451941e-05,
"loss": 0.3753,
"step": 795
},
{
"epoch": 0.27,
"learning_rate": 4.878373456497416e-05,
"loss": 0.3314,
"step": 796
},
{
"epoch": 0.27,
"learning_rate": 4.8779430301883475e-05,
"loss": 0.3725,
"step": 797
},
{
"epoch": 0.27,
"learning_rate": 4.8775118626589034e-05,
"loss": 0.3691,
"step": 798
},
{
"epoch": 0.27,
"learning_rate": 4.877079954043481e-05,
"loss": 0.364,
"step": 799
},
{
"epoch": 0.27,
"learning_rate": 4.876647304476709e-05,
"loss": 0.381,
"step": 800
},
{
"epoch": 0.27,
"learning_rate": 4.8762139140934456e-05,
"loss": 0.3521,
"step": 801
},
{
"epoch": 0.27,
"learning_rate": 4.8757797830287846e-05,
"loss": 0.3521,
"step": 802
},
{
"epoch": 0.27,
"learning_rate": 4.875344911418045e-05,
"loss": 0.3946,
"step": 803
},
{
"epoch": 0.27,
"learning_rate": 4.87490929939678e-05,
"loss": 0.3943,
"step": 804
},
{
"epoch": 0.27,
"learning_rate": 4.874472947100772e-05,
"loss": 0.3557,
"step": 805
},
{
"epoch": 0.27,
"learning_rate": 4.874035854666036e-05,
"loss": 0.3889,
"step": 806
},
{
"epoch": 0.28,
"learning_rate": 4.873598022228817e-05,
"loss": 0.3674,
"step": 807
},
{
"epoch": 0.28,
"learning_rate": 4.873159449925588e-05,
"loss": 0.4205,
"step": 808
},
{
"epoch": 0.28,
"learning_rate": 4.872720137893056e-05,
"loss": 0.3368,
"step": 809
},
{
"epoch": 0.28,
"learning_rate": 4.8722800862681576e-05,
"loss": 0.3555,
"step": 810
},
{
"epoch": 0.28,
"learning_rate": 4.8718392951880606e-05,
"loss": 0.3359,
"step": 811
},
{
"epoch": 0.28,
"learning_rate": 4.8713977647901605e-05,
"loss": 0.3581,
"step": 812
},
{
"epoch": 0.28,
"learning_rate": 4.870955495212087e-05,
"loss": 0.3512,
"step": 813
},
{
"epoch": 0.28,
"learning_rate": 4.870512486591697e-05,
"loss": 0.3713,
"step": 814
},
{
"epoch": 0.28,
"learning_rate": 4.87006873906708e-05,
"loss": 0.3268,
"step": 815
},
{
"epoch": 0.28,
"learning_rate": 4.869624252776555e-05,
"loss": 0.3887,
"step": 816
},
{
"epoch": 0.28,
"learning_rate": 4.869179027858671e-05,
"loss": 0.3753,
"step": 817
},
{
"epoch": 0.28,
"learning_rate": 4.868733064452208e-05,
"loss": 0.3794,
"step": 818
},
{
"epoch": 0.28,
"learning_rate": 4.868286362696175e-05,
"loss": 0.3933,
"step": 819
},
{
"epoch": 0.28,
"learning_rate": 4.867838922729813e-05,
"loss": 0.3575,
"step": 820
},
{
"epoch": 0.28,
"learning_rate": 4.8673907446925904e-05,
"loss": 0.3572,
"step": 821
},
{
"epoch": 0.28,
"learning_rate": 4.866941828724208e-05,
"loss": 0.3499,
"step": 822
},
{
"epoch": 0.28,
"learning_rate": 4.866492174964596e-05,
"loss": 0.4202,
"step": 823
},
{
"epoch": 0.28,
"learning_rate": 4.866041783553914e-05,
"loss": 0.3401,
"step": 824
},
{
"epoch": 0.28,
"learning_rate": 4.865590654632552e-05,
"loss": 0.4061,
"step": 825
},
{
"epoch": 0.28,
"learning_rate": 4.8651387883411316e-05,
"loss": 0.4104,
"step": 826
},
{
"epoch": 0.28,
"learning_rate": 4.8646861848204994e-05,
"loss": 0.3251,
"step": 827
},
{
"epoch": 0.28,
"learning_rate": 4.864232844211737e-05,
"loss": 0.3727,
"step": 828
},
{
"epoch": 0.28,
"learning_rate": 4.863778766656152e-05,
"loss": 0.3878,
"step": 829
},
{
"epoch": 0.28,
"learning_rate": 4.863323952295286e-05,
"loss": 0.392,
"step": 830
},
{
"epoch": 0.28,
"learning_rate": 4.8628684012709045e-05,
"loss": 0.3763,
"step": 831
},
{
"epoch": 0.28,
"learning_rate": 4.862412113725008e-05,
"loss": 0.429,
"step": 832
},
{
"epoch": 0.28,
"learning_rate": 4.861955089799823e-05,
"loss": 0.3342,
"step": 833
},
{
"epoch": 0.28,
"learning_rate": 4.8614973296378076e-05,
"loss": 0.3734,
"step": 834
},
{
"epoch": 0.28,
"learning_rate": 4.861038833381648e-05,
"loss": 0.3194,
"step": 835
},
{
"epoch": 0.29,
"learning_rate": 4.860579601174261e-05,
"loss": 0.3137,
"step": 836
},
{
"epoch": 0.29,
"learning_rate": 4.860119633158793e-05,
"loss": 0.3826,
"step": 837
},
{
"epoch": 0.29,
"learning_rate": 4.8596589294786166e-05,
"loss": 0.3467,
"step": 838
},
{
"epoch": 0.29,
"learning_rate": 4.859197490277339e-05,
"loss": 0.3548,
"step": 839
},
{
"epoch": 0.29,
"learning_rate": 4.858735315698792e-05,
"loss": 0.3422,
"step": 840
},
{
"epoch": 0.29,
"learning_rate": 4.8582724058870386e-05,
"loss": 0.3329,
"step": 841
},
{
"epoch": 0.29,
"learning_rate": 4.857808760986372e-05,
"loss": 0.3639,
"step": 842
},
{
"epoch": 0.29,
"learning_rate": 4.857344381141312e-05,
"loss": 0.3232,
"step": 843
},
{
"epoch": 0.29,
"learning_rate": 4.856879266496609e-05,
"loss": 0.365,
"step": 844
},
{
"epoch": 0.29,
"learning_rate": 4.856413417197243e-05,
"loss": 0.385,
"step": 845
},
{
"epoch": 0.29,
"learning_rate": 4.855946833388422e-05,
"loss": 0.3336,
"step": 846
},
{
"epoch": 0.29,
"learning_rate": 4.855479515215583e-05,
"loss": 0.3834,
"step": 847
},
{
"epoch": 0.29,
"learning_rate": 4.855011462824392e-05,
"loss": 0.3436,
"step": 848
},
{
"epoch": 0.29,
"learning_rate": 4.854542676360745e-05,
"loss": 0.3814,
"step": 849
},
{
"epoch": 0.29,
"learning_rate": 4.854073155970765e-05,
"loss": 0.3381,
"step": 850
},
{
"epoch": 0.29,
"learning_rate": 4.853602901800803e-05,
"loss": 0.3708,
"step": 851
},
{
"epoch": 0.29,
"learning_rate": 4.8531319139974444e-05,
"loss": 0.3476,
"step": 852
},
{
"epoch": 0.29,
"learning_rate": 4.8526601927074945e-05,
"loss": 0.391,
"step": 853
},
{
"epoch": 0.29,
"learning_rate": 4.852187738077995e-05,
"loss": 0.3569,
"step": 854
},
{
"epoch": 0.29,
"learning_rate": 4.8517145502562123e-05,
"loss": 0.3687,
"step": 855
},
{
"epoch": 0.29,
"learning_rate": 4.851240629389642e-05,
"loss": 0.3563,
"step": 856
},
{
"epoch": 0.29,
"learning_rate": 4.850765975626007e-05,
"loss": 0.426,
"step": 857
},
{
"epoch": 0.29,
"learning_rate": 4.8502905891132626e-05,
"loss": 0.371,
"step": 858
},
{
"epoch": 0.29,
"learning_rate": 4.8498144699995884e-05,
"loss": 0.3479,
"step": 859
},
{
"epoch": 0.29,
"learning_rate": 4.849337618433394e-05,
"loss": 0.3489,
"step": 860
},
{
"epoch": 0.29,
"learning_rate": 4.848860034563317e-05,
"loss": 0.4309,
"step": 861
},
{
"epoch": 0.29,
"learning_rate": 4.848381718538224e-05,
"loss": 0.3706,
"step": 862
},
{
"epoch": 0.29,
"learning_rate": 4.847902670507208e-05,
"loss": 0.3817,
"step": 863
},
{
"epoch": 0.29,
"learning_rate": 4.8474228906195936e-05,
"loss": 0.3388,
"step": 864
},
{
"epoch": 0.3,
"learning_rate": 4.846942379024929e-05,
"loss": 0.3407,
"step": 865
},
{
"epoch": 0.3,
"learning_rate": 4.846461135872994e-05,
"loss": 0.3491,
"step": 866
},
{
"epoch": 0.3,
"learning_rate": 4.8459791613137953e-05,
"loss": 0.3803,
"step": 867
},
{
"epoch": 0.3,
"learning_rate": 4.845496455497566e-05,
"loss": 0.3636,
"step": 868
},
{
"epoch": 0.3,
"learning_rate": 4.845013018574771e-05,
"loss": 0.3562,
"step": 869
},
{
"epoch": 0.3,
"learning_rate": 4.8445288506961e-05,
"loss": 0.3572,
"step": 870
},
{
"epoch": 0.3,
"learning_rate": 4.844043952012469e-05,
"loss": 0.376,
"step": 871
},
{
"epoch": 0.3,
"learning_rate": 4.8435583226750274e-05,
"loss": 0.3449,
"step": 872
},
{
"epoch": 0.3,
"learning_rate": 4.843071962835147e-05,
"loss": 0.3211,
"step": 873
},
{
"epoch": 0.3,
"learning_rate": 4.842584872644431e-05,
"loss": 0.369,
"step": 874
},
{
"epoch": 0.3,
"learning_rate": 4.8420970522547074e-05,
"loss": 0.351,
"step": 875
},
{
"epoch": 0.3,
"learning_rate": 4.841608501818033e-05,
"loss": 0.3159,
"step": 876
},
{
"epoch": 0.3,
"learning_rate": 4.841119221486691e-05,
"loss": 0.3272,
"step": 877
},
{
"epoch": 0.3,
"learning_rate": 4.840629211413196e-05,
"loss": 0.4106,
"step": 878
},
{
"epoch": 0.3,
"learning_rate": 4.840138471750285e-05,
"loss": 0.5168,
"step": 879
},
{
"epoch": 0.3,
"learning_rate": 4.8396470026509266e-05,
"loss": 0.4267,
"step": 880
},
{
"epoch": 0.3,
"learning_rate": 4.839154804268313e-05,
"loss": 0.3123,
"step": 881
},
{
"epoch": 0.3,
"learning_rate": 4.8386618767558676e-05,
"loss": 0.3444,
"step": 882
},
{
"epoch": 0.3,
"learning_rate": 4.838168220267238e-05,
"loss": 0.3526,
"step": 883
},
{
"epoch": 0.3,
"learning_rate": 4.8376738349563e-05,
"loss": 0.357,
"step": 884
},
{
"epoch": 0.3,
"learning_rate": 4.837178720977157e-05,
"loss": 0.33,
"step": 885
},
{
"epoch": 0.3,
"learning_rate": 4.83668287848414e-05,
"loss": 0.3025,
"step": 886
},
{
"epoch": 0.3,
"learning_rate": 4.8361863076318055e-05,
"loss": 0.3444,
"step": 887
},
{
"epoch": 0.3,
"learning_rate": 4.835689008574938e-05,
"loss": 0.3459,
"step": 888
},
{
"epoch": 0.3,
"learning_rate": 4.835190981468549e-05,
"loss": 0.3633,
"step": 889
},
{
"epoch": 0.3,
"learning_rate": 4.8346922264678765e-05,
"loss": 0.3391,
"step": 890
},
{
"epoch": 0.3,
"learning_rate": 4.8341927437283855e-05,
"loss": 0.3658,
"step": 891
},
{
"epoch": 0.3,
"learning_rate": 4.8336925334057695e-05,
"loss": 0.416,
"step": 892
},
{
"epoch": 0.3,
"learning_rate": 4.8331915956559455e-05,
"loss": 0.3577,
"step": 893
},
{
"epoch": 0.31,
"learning_rate": 4.8326899306350604e-05,
"loss": 0.3336,
"step": 894
},
{
"epoch": 0.31,
"learning_rate": 4.832187538499485e-05,
"loss": 0.3629,
"step": 895
},
{
"epoch": 0.31,
"learning_rate": 4.8316844194058194e-05,
"loss": 0.3682,
"step": 896
},
{
"epoch": 0.31,
"learning_rate": 4.8311805735108894e-05,
"loss": 0.339,
"step": 897
},
{
"epoch": 0.31,
"learning_rate": 4.830676000971747e-05,
"loss": 0.4038,
"step": 898
},
{
"epoch": 0.31,
"learning_rate": 4.830170701945669e-05,
"loss": 0.3582,
"step": 899
},
{
"epoch": 0.31,
"learning_rate": 4.829664676590162e-05,
"loss": 0.379,
"step": 900
},
{
"epoch": 0.31,
"learning_rate": 4.8291579250629574e-05,
"loss": 0.354,
"step": 901
},
{
"epoch": 0.31,
"learning_rate": 4.828650447522013e-05,
"loss": 0.3983,
"step": 902
},
{
"epoch": 0.31,
"learning_rate": 4.828142244125512e-05,
"loss": 0.4093,
"step": 903
},
{
"epoch": 0.31,
"learning_rate": 4.827633315031865e-05,
"loss": 0.3365,
"step": 904
},
{
"epoch": 0.31,
"learning_rate": 4.827123660399708e-05,
"loss": 0.4063,
"step": 905
},
{
"epoch": 0.31,
"learning_rate": 4.826613280387906e-05,
"loss": 0.353,
"step": 906
},
{
"epoch": 0.31,
"learning_rate": 4.8261021751555456e-05,
"loss": 0.3662,
"step": 907
},
{
"epoch": 0.31,
"learning_rate": 4.825590344861942e-05,
"loss": 0.3807,
"step": 908
},
{
"epoch": 0.31,
"learning_rate": 4.825077789666637e-05,
"loss": 0.3588,
"step": 909
},
{
"epoch": 0.31,
"learning_rate": 4.824564509729396e-05,
"loss": 0.3714,
"step": 910
},
{
"epoch": 0.31,
"learning_rate": 4.824050505210212e-05,
"loss": 0.4295,
"step": 911
},
{
"epoch": 0.31,
"learning_rate": 4.823535776269305e-05,
"loss": 0.3331,
"step": 912
},
{
"epoch": 0.31,
"learning_rate": 4.8230203230671174e-05,
"loss": 0.3961,
"step": 913
},
{
"epoch": 0.31,
"learning_rate": 4.8225041457643206e-05,
"loss": 0.337,
"step": 914
},
{
"epoch": 0.31,
"learning_rate": 4.82198724452181e-05,
"loss": 0.4421,
"step": 915
},
{
"epoch": 0.31,
"learning_rate": 4.821469619500707e-05,
"loss": 0.3451,
"step": 916
},
{
"epoch": 0.31,
"learning_rate": 4.8209512708623596e-05,
"loss": 0.3853,
"step": 917
},
{
"epoch": 0.31,
"learning_rate": 4.820432198768339e-05,
"loss": 0.3964,
"step": 918
},
{
"epoch": 0.31,
"learning_rate": 4.8199124033804444e-05,
"loss": 0.3503,
"step": 919
},
{
"epoch": 0.31,
"learning_rate": 4.8193918848606994e-05,
"loss": 0.3835,
"step": 920
},
{
"epoch": 0.31,
"learning_rate": 4.818870643371353e-05,
"loss": 0.4013,
"step": 921
},
{
"epoch": 0.31,
"learning_rate": 4.8183486790748786e-05,
"loss": 0.3504,
"step": 922
},
{
"epoch": 0.31,
"learning_rate": 4.817825992133977e-05,
"loss": 0.3861,
"step": 923
},
{
"epoch": 0.32,
"learning_rate": 4.817302582711572e-05,
"loss": 0.3193,
"step": 924
},
{
"epoch": 0.32,
"learning_rate": 4.816778450970816e-05,
"loss": 0.3526,
"step": 925
},
{
"epoch": 0.32,
"learning_rate": 4.816253597075082e-05,
"loss": 0.3449,
"step": 926
},
{
"epoch": 0.32,
"learning_rate": 4.815728021187971e-05,
"loss": 0.3711,
"step": 927
},
{
"epoch": 0.32,
"learning_rate": 4.815201723473308e-05,
"loss": 0.367,
"step": 928
},
{
"epoch": 0.32,
"learning_rate": 4.814674704095145e-05,
"loss": 0.4106,
"step": 929
},
{
"epoch": 0.32,
"learning_rate": 4.814146963217756e-05,
"loss": 0.3487,
"step": 930
},
{
"epoch": 0.32,
"learning_rate": 4.8136185010056416e-05,
"loss": 0.3187,
"step": 931
},
{
"epoch": 0.32,
"learning_rate": 4.813089317623527e-05,
"loss": 0.3686,
"step": 932
},
{
"epoch": 0.32,
"learning_rate": 4.8125594132363625e-05,
"loss": 0.366,
"step": 933
},
{
"epoch": 0.32,
"learning_rate": 4.8120287880093224e-05,
"loss": 0.3671,
"step": 934
},
{
"epoch": 0.32,
"learning_rate": 4.811497442107806e-05,
"loss": 0.3703,
"step": 935
},
{
"epoch": 0.32,
"learning_rate": 4.8109653756974374e-05,
"loss": 0.412,
"step": 936
},
{
"epoch": 0.32,
"learning_rate": 4.810432588944064e-05,
"loss": 0.3442,
"step": 937
},
{
"epoch": 0.32,
"learning_rate": 4.809899082013761e-05,
"loss": 0.3454,
"step": 938
},
{
"epoch": 0.32,
"learning_rate": 4.8093648550728253e-05,
"loss": 0.3656,
"step": 939
},
{
"epoch": 0.32,
"learning_rate": 4.808829908287778e-05,
"loss": 0.4164,
"step": 940
},
{
"epoch": 0.32,
"learning_rate": 4.808294241825366e-05,
"loss": 0.3517,
"step": 941
},
{
"epoch": 0.32,
"learning_rate": 4.807757855852561e-05,
"loss": 0.3678,
"step": 942
},
{
"epoch": 0.32,
"learning_rate": 4.807220750536557e-05,
"loss": 0.3347,
"step": 943
},
{
"epoch": 0.32,
"learning_rate": 4.8066829260447735e-05,
"loss": 0.3617,
"step": 944
},
{
"epoch": 0.32,
"learning_rate": 4.8061443825448526e-05,
"loss": 0.3611,
"step": 945
},
{
"epoch": 0.32,
"learning_rate": 4.8056051202046646e-05,
"loss": 0.3365,
"step": 946
},
{
"epoch": 0.32,
"learning_rate": 4.805065139192299e-05,
"loss": 0.3601,
"step": 947
},
{
"epoch": 0.32,
"learning_rate": 4.8045244396760714e-05,
"loss": 0.3508,
"step": 948
},
{
"epoch": 0.32,
"learning_rate": 4.803983021824523e-05,
"loss": 0.3755,
"step": 949
},
{
"epoch": 0.32,
"learning_rate": 4.803440885806415e-05,
"loss": 0.3431,
"step": 950
},
{
"epoch": 0.32,
"learning_rate": 4.802898031790737e-05,
"loss": 0.3283,
"step": 951
},
{
"epoch": 0.32,
"learning_rate": 4.802354459946699e-05,
"loss": 0.359,
"step": 952
},
{
"epoch": 0.33,
"learning_rate": 4.801810170443735e-05,
"loss": 0.3419,
"step": 953
},
{
"epoch": 0.33,
"learning_rate": 4.801265163451505e-05,
"loss": 0.3137,
"step": 954
},
{
"epoch": 0.33,
"learning_rate": 4.8007194391398916e-05,
"loss": 0.3742,
"step": 955
},
{
"epoch": 0.33,
"learning_rate": 4.8001729976789996e-05,
"loss": 0.3464,
"step": 956
},
{
"epoch": 0.33,
"learning_rate": 4.799625839239158e-05,
"loss": 0.3714,
"step": 957
},
{
"epoch": 0.33,
"learning_rate": 4.799077963990921e-05,
"loss": 0.3663,
"step": 958
},
{
"epoch": 0.33,
"learning_rate": 4.798529372105064e-05,
"loss": 0.3485,
"step": 959
},
{
"epoch": 0.33,
"learning_rate": 4.7979800637525864e-05,
"loss": 0.3674,
"step": 960
},
{
"epoch": 0.33,
"learning_rate": 4.7974300391047125e-05,
"loss": 0.3723,
"step": 961
},
{
"epoch": 0.33,
"learning_rate": 4.796879298332887e-05,
"loss": 0.3864,
"step": 962
},
{
"epoch": 0.33,
"learning_rate": 4.796327841608781e-05,
"loss": 0.3674,
"step": 963
},
{
"epoch": 0.33,
"learning_rate": 4.795775669104285e-05,
"loss": 0.3857,
"step": 964
},
{
"epoch": 0.33,
"learning_rate": 4.795222780991516e-05,
"loss": 0.4047,
"step": 965
},
{
"epoch": 0.33,
"learning_rate": 4.7946691774428144e-05,
"loss": 0.3159,
"step": 966
},
{
"epoch": 0.33,
"learning_rate": 4.7941148586307396e-05,
"loss": 0.3996,
"step": 967
},
{
"epoch": 0.33,
"learning_rate": 4.7935598247280776e-05,
"loss": 0.3178,
"step": 968
},
{
"epoch": 0.33,
"learning_rate": 4.7930040759078353e-05,
"loss": 0.3911,
"step": 969
},
{
"epoch": 0.33,
"learning_rate": 4.7924476123432446e-05,
"loss": 0.4339,
"step": 970
},
{
"epoch": 0.33,
"learning_rate": 4.791890434207757e-05,
"loss": 0.3385,
"step": 971
},
{
"epoch": 0.33,
"learning_rate": 4.791332541675051e-05,
"loss": 0.3144,
"step": 972
},
{
"epoch": 0.33,
"learning_rate": 4.790773934919023e-05,
"loss": 0.3131,
"step": 973
},
{
"epoch": 0.33,
"learning_rate": 4.790214614113796e-05,
"loss": 0.3411,
"step": 974
},
{
"epoch": 0.33,
"learning_rate": 4.7896545794337125e-05,
"loss": 0.3696,
"step": 975
},
{
"epoch": 0.33,
"learning_rate": 4.7890938310533395e-05,
"loss": 0.3334,
"step": 976
},
{
"epoch": 0.33,
"learning_rate": 4.788532369147467e-05,
"loss": 0.3149,
"step": 977
},
{
"epoch": 0.33,
"learning_rate": 4.787970193891105e-05,
"loss": 0.3743,
"step": 978
},
{
"epoch": 0.33,
"learning_rate": 4.7874073054594874e-05,
"loss": 0.3851,
"step": 979
},
{
"epoch": 0.33,
"learning_rate": 4.786843704028071e-05,
"loss": 0.3401,
"step": 980
},
{
"epoch": 0.33,
"learning_rate": 4.7862793897725334e-05,
"loss": 0.3455,
"step": 981
},
{
"epoch": 0.34,
"learning_rate": 4.785714362868775e-05,
"loss": 0.3165,
"step": 982
},
{
"epoch": 0.34,
"learning_rate": 4.7851486234929185e-05,
"loss": 0.37,
"step": 983
},
{
"epoch": 0.34,
"learning_rate": 4.784582171821309e-05,
"loss": 0.3539,
"step": 984
},
{
"epoch": 0.34,
"learning_rate": 4.784015008030513e-05,
"loss": 0.3448,
"step": 985
},
{
"epoch": 0.34,
"learning_rate": 4.783447132297318e-05,
"loss": 0.3716,
"step": 986
},
{
"epoch": 0.34,
"learning_rate": 4.7828785447987355e-05,
"loss": 0.3881,
"step": 987
},
{
"epoch": 0.34,
"learning_rate": 4.782309245711999e-05,
"loss": 0.3687,
"step": 988
},
{
"epoch": 0.34,
"learning_rate": 4.781739235214561e-05,
"loss": 0.3369,
"step": 989
},
{
"epoch": 0.34,
"learning_rate": 4.781168513484099e-05,
"loss": 0.418,
"step": 990
},
{
"epoch": 0.34,
"learning_rate": 4.780597080698509e-05,
"loss": 0.3599,
"step": 991
},
{
"epoch": 0.34,
"learning_rate": 4.7800249370359116e-05,
"loss": 0.3806,
"step": 992
},
{
"epoch": 0.34,
"learning_rate": 4.7794520826746474e-05,
"loss": 0.3949,
"step": 993
},
{
"epoch": 0.34,
"learning_rate": 4.778878517793279e-05,
"loss": 0.3195,
"step": 994
},
{
"epoch": 0.34,
"learning_rate": 4.778304242570589e-05,
"loss": 0.4595,
"step": 995
},
{
"epoch": 0.34,
"learning_rate": 4.7777292571855855e-05,
"loss": 0.3118,
"step": 996
},
{
"epoch": 0.34,
"learning_rate": 4.777153561817492e-05,
"loss": 0.3927,
"step": 997
},
{
"epoch": 0.34,
"learning_rate": 4.7765771566457597e-05,
"loss": 0.3494,
"step": 998
},
{
"epoch": 0.34,
"learning_rate": 4.776000041850055e-05,
"loss": 0.3758,
"step": 999
},
{
"epoch": 0.34,
"learning_rate": 4.7754222176102694e-05,
"loss": 0.396,
"step": 1000
},
{
"epoch": 0.34,
"learning_rate": 4.7748436841065157e-05,
"loss": 0.3539,
"step": 1001
},
{
"epoch": 0.34,
"learning_rate": 4.774264441519125e-05,
"loss": 0.3674,
"step": 1002
},
{
"epoch": 0.34,
"learning_rate": 4.773684490028651e-05,
"loss": 0.3301,
"step": 1003
},
{
"epoch": 0.34,
"learning_rate": 4.77310382981587e-05,
"loss": 0.386,
"step": 1004
},
{
"epoch": 0.34,
"learning_rate": 4.772522461061776e-05,
"loss": 0.365,
"step": 1005
},
{
"epoch": 0.34,
"learning_rate": 4.771940383947585e-05,
"loss": 0.3274,
"step": 1006
},
{
"epoch": 0.34,
"learning_rate": 4.7713575986547366e-05,
"loss": 0.4089,
"step": 1007
},
{
"epoch": 0.34,
"learning_rate": 4.770774105364887e-05,
"loss": 0.3524,
"step": 1008
},
{
"epoch": 0.34,
"learning_rate": 4.7701899042599146e-05,
"loss": 0.3477,
"step": 1009
},
{
"epoch": 0.34,
"learning_rate": 4.769604995521919e-05,
"loss": 0.3645,
"step": 1010
},
{
"epoch": 0.34,
"learning_rate": 4.769019379333222e-05,
"loss": 0.3434,
"step": 1011
},
{
"epoch": 0.35,
"learning_rate": 4.7684330558763616e-05,
"loss": 0.3363,
"step": 1012
},
{
"epoch": 0.35,
"learning_rate": 4.7678460253340987e-05,
"loss": 0.3969,
"step": 1013
},
{
"epoch": 0.35,
"learning_rate": 4.767258287889416e-05,
"loss": 0.3556,
"step": 1014
},
{
"epoch": 0.35,
"learning_rate": 4.766669843725514e-05,
"loss": 0.4047,
"step": 1015
},
{
"epoch": 0.35,
"learning_rate": 4.766080693025815e-05,
"loss": 0.3849,
"step": 1016
},
{
"epoch": 0.35,
"learning_rate": 4.7654908359739614e-05,
"loss": 0.3765,
"step": 1017
},
{
"epoch": 0.35,
"learning_rate": 4.764900272753815e-05,
"loss": 0.3582,
"step": 1018
},
{
"epoch": 0.35,
"learning_rate": 4.764309003549459e-05,
"loss": 0.3625,
"step": 1019
},
{
"epoch": 0.35,
"learning_rate": 4.7637170285451936e-05,
"loss": 0.3583,
"step": 1020
},
{
"epoch": 0.35,
"learning_rate": 4.763124347925545e-05,
"loss": 0.3321,
"step": 1021
},
{
"epoch": 0.35,
"learning_rate": 4.7625309618752524e-05,
"loss": 0.3856,
"step": 1022
},
{
"epoch": 0.35,
"learning_rate": 4.76193687057928e-05,
"loss": 0.3837,
"step": 1023
},
{
"epoch": 0.35,
"learning_rate": 4.761342074222808e-05,
"loss": 0.3268,
"step": 1024
},
{
"epoch": 0.35,
"learning_rate": 4.7607465729912406e-05,
"loss": 0.3088,
"step": 1025
},
{
"epoch": 0.35,
"learning_rate": 4.760150367070198e-05,
"loss": 0.3884,
"step": 1026
},
{
"epoch": 0.35,
"learning_rate": 4.7595534566455216e-05,
"loss": 0.3546,
"step": 1027
},
{
"epoch": 0.35,
"learning_rate": 4.758955841903273e-05,
"loss": 0.3562,
"step": 1028
},
{
"epoch": 0.35,
"learning_rate": 4.7583575230297316e-05,
"loss": 0.3469,
"step": 1029
},
{
"epoch": 0.35,
"learning_rate": 4.7577585002113986e-05,
"loss": 0.361,
"step": 1030
},
{
"epoch": 0.35,
"learning_rate": 4.757158773634992e-05,
"loss": 0.3765,
"step": 1031
},
{
"epoch": 0.35,
"learning_rate": 4.756558343487452e-05,
"loss": 0.3798,
"step": 1032
},
{
"epoch": 0.35,
"learning_rate": 4.7559572099559346e-05,
"loss": 0.3364,
"step": 1033
},
{
"epoch": 0.35,
"learning_rate": 4.755355373227818e-05,
"loss": 0.3836,
"step": 1034
},
{
"epoch": 0.35,
"learning_rate": 4.754752833490699e-05,
"loss": 0.3717,
"step": 1035
},
{
"epoch": 0.35,
"learning_rate": 4.754149590932393e-05,
"loss": 0.3309,
"step": 1036
},
{
"epoch": 0.35,
"learning_rate": 4.7535456457409344e-05,
"loss": 0.3238,
"step": 1037
},
{
"epoch": 0.35,
"learning_rate": 4.752940998104577e-05,
"loss": 0.316,
"step": 1038
},
{
"epoch": 0.35,
"learning_rate": 4.752335648211794e-05,
"loss": 0.3679,
"step": 1039
},
{
"epoch": 0.35,
"learning_rate": 4.751729596251276e-05,
"loss": 0.393,
"step": 1040
},
{
"epoch": 0.36,
"learning_rate": 4.7511228424119334e-05,
"loss": 0.3478,
"step": 1041
},
{
"epoch": 0.36,
"learning_rate": 4.750515386882896e-05,
"loss": 0.3966,
"step": 1042
},
{
"epoch": 0.36,
"learning_rate": 4.749907229853511e-05,
"loss": 0.3665,
"step": 1043
},
{
"epoch": 0.36,
"learning_rate": 4.749298371513345e-05,
"loss": 0.345,
"step": 1044
},
{
"epoch": 0.36,
"learning_rate": 4.7486888120521846e-05,
"loss": 0.3904,
"step": 1045
},
{
"epoch": 0.36,
"learning_rate": 4.748078551660031e-05,
"loss": 0.333,
"step": 1046
},
{
"epoch": 0.36,
"learning_rate": 4.7474675905271083e-05,
"loss": 0.4079,
"step": 1047
},
{
"epoch": 0.36,
"learning_rate": 4.746855928843856e-05,
"loss": 0.3402,
"step": 1048
},
{
"epoch": 0.36,
"learning_rate": 4.746243566800933e-05,
"loss": 0.3299,
"step": 1049
},
{
"epoch": 0.36,
"learning_rate": 4.745630504589217e-05,
"loss": 0.3126,
"step": 1050
},
{
"epoch": 0.36,
"learning_rate": 4.745016742399804e-05,
"loss": 0.3259,
"step": 1051
},
{
"epoch": 0.36,
"learning_rate": 4.7444022804240065e-05,
"loss": 0.4203,
"step": 1052
},
{
"epoch": 0.36,
"learning_rate": 4.743787118853358e-05,
"loss": 0.3883,
"step": 1053
},
{
"epoch": 0.36,
"learning_rate": 4.743171257879606e-05,
"loss": 0.3409,
"step": 1054
},
{
"epoch": 0.36,
"learning_rate": 4.74255469769472e-05,
"loss": 0.3872,
"step": 1055
},
{
"epoch": 0.36,
"learning_rate": 4.741937438490885e-05,
"loss": 0.3627,
"step": 1056
},
{
"epoch": 0.36,
"learning_rate": 4.7413194804605066e-05,
"loss": 0.3762,
"step": 1057
},
{
"epoch": 0.36,
"learning_rate": 4.7407008237962045e-05,
"loss": 0.3382,
"step": 1058
},
{
"epoch": 0.36,
"learning_rate": 4.740081468690818e-05,
"loss": 0.3977,
"step": 1059
},
{
"epoch": 0.36,
"learning_rate": 4.739461415337405e-05,
"loss": 0.4623,
"step": 1060
},
{
"epoch": 0.36,
"learning_rate": 4.73884066392924e-05,
"loss": 0.3632,
"step": 1061
},
{
"epoch": 0.36,
"learning_rate": 4.7382192146598154e-05,
"loss": 0.3674,
"step": 1062
},
{
"epoch": 0.36,
"learning_rate": 4.73759706772284e-05,
"loss": 0.4448,
"step": 1063
},
{
"epoch": 0.36,
"learning_rate": 4.736974223312242e-05,
"loss": 0.3566,
"step": 1064
},
{
"epoch": 0.36,
"learning_rate": 4.736350681622166e-05,
"loss": 0.3473,
"step": 1065
},
{
"epoch": 0.36,
"learning_rate": 4.7357264428469736e-05,
"loss": 0.36,
"step": 1066
},
{
"epoch": 0.36,
"learning_rate": 4.7351015071812445e-05,
"loss": 0.4104,
"step": 1067
},
{
"epoch": 0.36,
"learning_rate": 4.7344758748197754e-05,
"loss": 0.3794,
"step": 1068
},
{
"epoch": 0.36,
"learning_rate": 4.733849545957579e-05,
"loss": 0.3997,
"step": 1069
},
{
"epoch": 0.37,
"learning_rate": 4.733222520789887e-05,
"loss": 0.3513,
"step": 1070
},
{
"epoch": 0.37,
"learning_rate": 4.7325947995121475e-05,
"loss": 0.3356,
"step": 1071
},
{
"epoch": 0.37,
"learning_rate": 4.7319663823200245e-05,
"loss": 0.3688,
"step": 1072
},
{
"epoch": 0.37,
"learning_rate": 4.7313372694094e-05,
"loss": 0.3415,
"step": 1073
},
{
"epoch": 0.37,
"learning_rate": 4.730707460976373e-05,
"loss": 0.3617,
"step": 1074
},
{
"epoch": 0.37,
"learning_rate": 4.730076957217259e-05,
"loss": 0.3814,
"step": 1075
},
{
"epoch": 0.37,
"learning_rate": 4.729445758328589e-05,
"loss": 0.3609,
"step": 1076
},
{
"epoch": 0.37,
"learning_rate": 4.7288138645071124e-05,
"loss": 0.3601,
"step": 1077
},
{
"epoch": 0.37,
"learning_rate": 4.7281812759497945e-05,
"loss": 0.3553,
"step": 1078
},
{
"epoch": 0.37,
"learning_rate": 4.7275479928538185e-05,
"loss": 0.3518,
"step": 1079
},
{
"epoch": 0.37,
"learning_rate": 4.726914015416581e-05,
"loss": 0.372,
"step": 1080
},
{
"epoch": 0.37,
"learning_rate": 4.726279343835699e-05,
"loss": 0.344,
"step": 1081
},
{
"epoch": 0.37,
"learning_rate": 4.725643978309002e-05,
"loss": 0.3542,
"step": 1082
},
{
"epoch": 0.37,
"learning_rate": 4.7250079190345386e-05,
"loss": 0.3457,
"step": 1083
},
{
"epoch": 0.37,
"learning_rate": 4.724371166210572e-05,
"loss": 0.303,
"step": 1084
},
{
"epoch": 0.37,
"learning_rate": 4.723733720035582e-05,
"loss": 0.3775,
"step": 1085
},
{
"epoch": 0.37,
"learning_rate": 4.723095580708266e-05,
"loss": 0.3448,
"step": 1086
},
{
"epoch": 0.37,
"learning_rate": 4.722456748427535e-05,
"loss": 0.3667,
"step": 1087
},
{
"epoch": 0.37,
"learning_rate": 4.721817223392518e-05,
"loss": 0.3671,
"step": 1088
},
{
"epoch": 0.37,
"learning_rate": 4.7211770058025584e-05,
"loss": 0.3756,
"step": 1089
},
{
"epoch": 0.37,
"learning_rate": 4.720536095857217e-05,
"loss": 0.3904,
"step": 1090
},
{
"epoch": 0.37,
"learning_rate": 4.719894493756268e-05,
"loss": 0.3256,
"step": 1091
},
{
"epoch": 0.37,
"learning_rate": 4.7192521996997044e-05,
"loss": 0.3988,
"step": 1092
},
{
"epoch": 0.37,
"learning_rate": 4.718609213887734e-05,
"loss": 0.2986,
"step": 1093
},
{
"epoch": 0.37,
"learning_rate": 4.717965536520778e-05,
"loss": 0.3735,
"step": 1094
},
{
"epoch": 0.37,
"learning_rate": 4.7173211677994764e-05,
"loss": 0.3433,
"step": 1095
},
{
"epoch": 0.37,
"learning_rate": 4.716676107924682e-05,
"loss": 0.4307,
"step": 1096
},
{
"epoch": 0.37,
"learning_rate": 4.716030357097465e-05,
"loss": 0.3976,
"step": 1097
},
{
"epoch": 0.37,
"learning_rate": 4.7153839155191095e-05,
"loss": 0.365,
"step": 1098
},
{
"epoch": 0.37,
"learning_rate": 4.714736783391115e-05,
"loss": 0.3747,
"step": 1099
},
{
"epoch": 0.38,
"learning_rate": 4.714088960915198e-05,
"loss": 0.3493,
"step": 1100
},
{
"epoch": 0.38,
"learning_rate": 4.713440448293289e-05,
"loss": 0.3375,
"step": 1101
},
{
"epoch": 0.38,
"learning_rate": 4.712791245727534e-05,
"loss": 0.3177,
"step": 1102
},
{
"epoch": 0.38,
"learning_rate": 4.712141353420292e-05,
"loss": 0.367,
"step": 1103
},
{
"epoch": 0.38,
"learning_rate": 4.7114907715741395e-05,
"loss": 0.3575,
"step": 1104
},
{
"epoch": 0.38,
"learning_rate": 4.710839500391868e-05,
"loss": 0.3459,
"step": 1105
},
{
"epoch": 0.38,
"learning_rate": 4.710187540076482e-05,
"loss": 0.3421,
"step": 1106
},
{
"epoch": 0.38,
"learning_rate": 4.709534890831201e-05,
"loss": 0.3894,
"step": 1107
},
{
"epoch": 0.38,
"learning_rate": 4.708881552859462e-05,
"loss": 0.3465,
"step": 1108
},
{
"epoch": 0.38,
"learning_rate": 4.7082275263649135e-05,
"loss": 0.3717,
"step": 1109
},
{
"epoch": 0.38,
"learning_rate": 4.70757281155142e-05,
"loss": 0.3598,
"step": 1110
},
{
"epoch": 0.38,
"learning_rate": 4.70691740862306e-05,
"loss": 0.3295,
"step": 1111
},
{
"epoch": 0.38,
"learning_rate": 4.706261317784128e-05,
"loss": 0.3354,
"step": 1112
},
{
"epoch": 0.38,
"learning_rate": 4.705604539239131e-05,
"loss": 0.3392,
"step": 1113
},
{
"epoch": 0.38,
"learning_rate": 4.704947073192791e-05,
"loss": 0.3673,
"step": 1114
},
{
"epoch": 0.38,
"learning_rate": 4.704288919850045e-05,
"loss": 0.3308,
"step": 1115
},
{
"epoch": 0.38,
"learning_rate": 4.7036300794160435e-05,
"loss": 0.3761,
"step": 1116
},
{
"epoch": 0.38,
"learning_rate": 4.702970552096151e-05,
"loss": 0.3442,
"step": 1117
},
{
"epoch": 0.38,
"learning_rate": 4.702310338095947e-05,
"loss": 0.3447,
"step": 1118
},
{
"epoch": 0.38,
"learning_rate": 4.701649437621224e-05,
"loss": 0.3564,
"step": 1119
},
{
"epoch": 0.38,
"learning_rate": 4.700987850877989e-05,
"loss": 0.3279,
"step": 1120
},
{
"epoch": 0.38,
"learning_rate": 4.700325578072464e-05,
"loss": 0.3563,
"step": 1121
},
{
"epoch": 0.38,
"learning_rate": 4.699662619411083e-05,
"loss": 0.2977,
"step": 1122
},
{
"epoch": 0.38,
"learning_rate": 4.6989989751004936e-05,
"loss": 0.4167,
"step": 1123
},
{
"epoch": 0.38,
"learning_rate": 4.6983346453475596e-05,
"loss": 0.3771,
"step": 1124
},
{
"epoch": 0.38,
"learning_rate": 4.6976696303593565e-05,
"loss": 0.3609,
"step": 1125
},
{
"epoch": 0.38,
"learning_rate": 4.697003930343173e-05,
"loss": 0.3347,
"step": 1126
},
{
"epoch": 0.38,
"learning_rate": 4.696337545506514e-05,
"loss": 0.363,
"step": 1127
},
{
"epoch": 0.38,
"learning_rate": 4.6956704760570936e-05,
"loss": 0.3847,
"step": 1128
},
{
"epoch": 0.39,
"learning_rate": 4.695002722202843e-05,
"loss": 0.3273,
"step": 1129
},
{
"epoch": 0.39,
"learning_rate": 4.694334284151906e-05,
"loss": 0.3174,
"step": 1130
},
{
"epoch": 0.39,
"learning_rate": 4.6936651621126374e-05,
"loss": 0.4282,
"step": 1131
},
{
"epoch": 0.39,
"learning_rate": 4.692995356293609e-05,
"loss": 0.3376,
"step": 1132
},
{
"epoch": 0.39,
"learning_rate": 4.692324866903601e-05,
"loss": 0.3612,
"step": 1133
},
{
"epoch": 0.39,
"learning_rate": 4.691653694151613e-05,
"loss": 0.3558,
"step": 1134
},
{
"epoch": 0.39,
"learning_rate": 4.6909818382468503e-05,
"loss": 0.361,
"step": 1135
},
{
"epoch": 0.39,
"learning_rate": 4.690309299398736e-05,
"loss": 0.3183,
"step": 1136
},
{
"epoch": 0.39,
"learning_rate": 4.689636077816907e-05,
"loss": 0.3874,
"step": 1137
},
{
"epoch": 0.39,
"learning_rate": 4.6889621737112076e-05,
"loss": 0.4168,
"step": 1138
},
{
"epoch": 0.39,
"learning_rate": 4.6882875872916996e-05,
"loss": 0.432,
"step": 1139
},
{
"epoch": 0.39,
"learning_rate": 4.687612318768656e-05,
"loss": 0.3756,
"step": 1140
},
{
"epoch": 0.39,
"learning_rate": 4.686936368352563e-05,
"loss": 0.3797,
"step": 1141
},
{
"epoch": 0.39,
"learning_rate": 4.686259736254117e-05,
"loss": 0.3856,
"step": 1142
},
{
"epoch": 0.39,
"learning_rate": 4.6855824226842306e-05,
"loss": 0.3444,
"step": 1143
},
{
"epoch": 0.39,
"learning_rate": 4.684904427854026e-05,
"loss": 0.3696,
"step": 1144
},
{
"epoch": 0.39,
"learning_rate": 4.684225751974838e-05,
"loss": 0.3442,
"step": 1145
},
{
"epoch": 0.39,
"learning_rate": 4.683546395258216e-05,
"loss": 0.3785,
"step": 1146
},
{
"epoch": 0.39,
"learning_rate": 4.6828663579159184e-05,
"loss": 0.3736,
"step": 1147
},
{
"epoch": 0.39,
"learning_rate": 4.682185640159917e-05,
"loss": 0.3936,
"step": 1148
},
{
"epoch": 0.39,
"learning_rate": 4.681504242202397e-05,
"loss": 0.3965,
"step": 1149
},
{
"epoch": 0.39,
"learning_rate": 4.680822164255755e-05,
"loss": 0.3555,
"step": 1150
},
{
"epoch": 0.39,
"learning_rate": 4.680139406532598e-05,
"loss": 0.3907,
"step": 1151
},
{
"epoch": 0.39,
"learning_rate": 4.679455969245746e-05,
"loss": 0.3139,
"step": 1152
},
{
"epoch": 0.39,
"learning_rate": 4.6787718526082326e-05,
"loss": 0.3643,
"step": 1153
},
{
"epoch": 0.39,
"learning_rate": 4.678087056833299e-05,
"loss": 0.3467,
"step": 1154
},
{
"epoch": 0.39,
"learning_rate": 4.677401582134402e-05,
"loss": 0.3722,
"step": 1155
},
{
"epoch": 0.39,
"learning_rate": 4.6767154287252084e-05,
"loss": 0.3498,
"step": 1156
},
{
"epoch": 0.39,
"learning_rate": 4.676028596819597e-05,
"loss": 0.3471,
"step": 1157
},
{
"epoch": 0.4,
"learning_rate": 4.675341086631656e-05,
"loss": 0.3416,
"step": 1158
},
{
"epoch": 0.4,
"learning_rate": 4.674652898375689e-05,
"loss": 0.3749,
"step": 1159
},
{
"epoch": 0.4,
"learning_rate": 4.673964032266208e-05,
"loss": 0.4216,
"step": 1160
},
{
"epoch": 0.4,
"learning_rate": 4.673274488517937e-05,
"loss": 0.3825,
"step": 1161
},
{
"epoch": 0.4,
"learning_rate": 4.6725842673458116e-05,
"loss": 0.3836,
"step": 1162
},
{
"epoch": 0.4,
"learning_rate": 4.6718933689649775e-05,
"loss": 0.3802,
"step": 1163
},
{
"epoch": 0.4,
"learning_rate": 4.671201793590792e-05,
"loss": 0.3279,
"step": 1164
},
{
"epoch": 0.4,
"learning_rate": 4.670509541438826e-05,
"loss": 0.3362,
"step": 1165
},
{
"epoch": 0.4,
"learning_rate": 4.669816612724856e-05,
"loss": 0.427,
"step": 1166
},
{
"epoch": 0.4,
"learning_rate": 4.669123007664874e-05,
"loss": 0.3516,
"step": 1167
},
{
"epoch": 0.4,
"learning_rate": 4.6684287264750814e-05,
"loss": 0.3347,
"step": 1168
},
{
"epoch": 0.4,
"learning_rate": 4.6677337693718895e-05,
"loss": 0.5573,
"step": 1169
},
{
"epoch": 0.4,
"learning_rate": 4.6670381365719215e-05,
"loss": 0.3515,
"step": 1170
},
{
"epoch": 0.4,
"learning_rate": 4.6663418282920104e-05,
"loss": 0.3665,
"step": 1171
},
{
"epoch": 0.4,
"learning_rate": 4.6656448447492e-05,
"loss": 0.3696,
"step": 1172
},
{
"epoch": 0.4,
"learning_rate": 4.664947186160744e-05,
"loss": 0.3694,
"step": 1173
},
{
"epoch": 0.4,
"learning_rate": 4.664248852744109e-05,
"loss": 0.3649,
"step": 1174
},
{
"epoch": 0.4,
"learning_rate": 4.663549844716968e-05,
"loss": 0.3753,
"step": 1175
},
{
"epoch": 0.4,
"learning_rate": 4.662850162297208e-05,
"loss": 0.3789,
"step": 1176
},
{
"epoch": 0.4,
"learning_rate": 4.662149805702922e-05,
"loss": 0.3759,
"step": 1177
},
{
"epoch": 0.4,
"learning_rate": 4.661448775152419e-05,
"loss": 0.3571,
"step": 1178
},
{
"epoch": 0.4,
"learning_rate": 4.6607470708642145e-05,
"loss": 0.3231,
"step": 1179
},
{
"epoch": 0.4,
"learning_rate": 4.660044693057032e-05,
"loss": 0.3458,
"step": 1180
},
{
"epoch": 0.4,
"learning_rate": 4.659341641949808e-05,
"loss": 0.3274,
"step": 1181
},
{
"epoch": 0.4,
"learning_rate": 4.6586379177616894e-05,
"loss": 0.326,
"step": 1182
},
{
"epoch": 0.4,
"learning_rate": 4.65793352071203e-05,
"loss": 0.3857,
"step": 1183
},
{
"epoch": 0.4,
"learning_rate": 4.6572284510203964e-05,
"loss": 0.3376,
"step": 1184
},
{
"epoch": 0.4,
"learning_rate": 4.656522708906563e-05,
"loss": 0.3206,
"step": 1185
},
{
"epoch": 0.4,
"learning_rate": 4.655816294590514e-05,
"loss": 0.3834,
"step": 1186
},
{
"epoch": 0.4,
"learning_rate": 4.655109208292443e-05,
"loss": 0.3492,
"step": 1187
},
{
"epoch": 0.41,
"learning_rate": 4.6544014502327535e-05,
"loss": 0.348,
"step": 1188
},
{
"epoch": 0.41,
"learning_rate": 4.65369302063206e-05,
"loss": 0.3661,
"step": 1189
},
{
"epoch": 0.41,
"learning_rate": 4.6529839197111814e-05,
"loss": 0.3846,
"step": 1190
},
{
"epoch": 0.41,
"learning_rate": 4.6522741476911526e-05,
"loss": 0.3209,
"step": 1191
},
{
"epoch": 0.41,
"learning_rate": 4.651563704793212e-05,
"loss": 0.3147,
"step": 1192
},
{
"epoch": 0.41,
"learning_rate": 4.650852591238809e-05,
"loss": 0.3291,
"step": 1193
},
{
"epoch": 0.41,
"learning_rate": 4.6501408072496036e-05,
"loss": 0.3526,
"step": 1194
},
{
"epoch": 0.41,
"learning_rate": 4.6494283530474624e-05,
"loss": 0.4177,
"step": 1195
},
{
"epoch": 0.41,
"learning_rate": 4.648715228854463e-05,
"loss": 0.4025,
"step": 1196
},
{
"epoch": 0.41,
"learning_rate": 4.6480014348928914e-05,
"loss": 0.3895,
"step": 1197
},
{
"epoch": 0.41,
"learning_rate": 4.647286971385241e-05,
"loss": 0.3792,
"step": 1198
},
{
"epoch": 0.41,
"learning_rate": 4.646571838554214e-05,
"loss": 0.3407,
"step": 1199
},
{
"epoch": 0.41,
"learning_rate": 4.6458560366227224e-05,
"loss": 0.4844,
"step": 1200
},
{
"epoch": 0.41,
"learning_rate": 4.6451395658138874e-05,
"loss": 0.3485,
"step": 1201
},
{
"epoch": 0.41,
"learning_rate": 4.6444224263510375e-05,
"loss": 0.4483,
"step": 1202
},
{
"epoch": 0.41,
"learning_rate": 4.6437046184577073e-05,
"loss": 0.3221,
"step": 1203
},
{
"epoch": 0.41,
"learning_rate": 4.642986142357645e-05,
"loss": 0.4153,
"step": 1204
},
{
"epoch": 0.41,
"learning_rate": 4.6422669982748036e-05,
"loss": 0.3795,
"step": 1205
},
{
"epoch": 0.41,
"learning_rate": 4.641547186433345e-05,
"loss": 0.3537,
"step": 1206
},
{
"epoch": 0.41,
"learning_rate": 4.640826707057638e-05,
"loss": 0.3704,
"step": 1207
},
{
"epoch": 0.41,
"learning_rate": 4.640105560372263e-05,
"loss": 0.3233,
"step": 1208
},
{
"epoch": 0.41,
"learning_rate": 4.6393837466020035e-05,
"loss": 0.3271,
"step": 1209
},
{
"epoch": 0.41,
"learning_rate": 4.638661265971856e-05,
"loss": 0.4121,
"step": 1210
},
{
"epoch": 0.41,
"learning_rate": 4.63793811870702e-05,
"loss": 0.3606,
"step": 1211
},
{
"epoch": 0.41,
"learning_rate": 4.637214305032908e-05,
"loss": 0.3999,
"step": 1212
},
{
"epoch": 0.41,
"learning_rate": 4.636489825175135e-05,
"loss": 0.3039,
"step": 1213
},
{
"epoch": 0.41,
"learning_rate": 4.635764679359528e-05,
"loss": 0.3978,
"step": 1214
},
{
"epoch": 0.41,
"learning_rate": 4.6350388678121184e-05,
"loss": 0.3253,
"step": 1215
},
{
"epoch": 0.41,
"learning_rate": 4.6343123907591476e-05,
"loss": 0.4326,
"step": 1216
},
{
"epoch": 0.42,
"learning_rate": 4.6335852484270617e-05,
"loss": 0.3516,
"step": 1217
},
{
"epoch": 0.42,
"learning_rate": 4.6328574410425164e-05,
"loss": 0.4009,
"step": 1218
},
{
"epoch": 0.42,
"learning_rate": 4.632128968832374e-05,
"loss": 0.4642,
"step": 1219
},
{
"epoch": 0.42,
"learning_rate": 4.6313998320237054e-05,
"loss": 0.411,
"step": 1220
},
{
"epoch": 0.42,
"learning_rate": 4.630670030843785e-05,
"loss": 0.3414,
"step": 1221
},
{
"epoch": 0.42,
"learning_rate": 4.629939565520097e-05,
"loss": 0.3474,
"step": 1222
},
{
"epoch": 0.42,
"learning_rate": 4.629208436280334e-05,
"loss": 0.32,
"step": 1223
},
{
"epoch": 0.42,
"learning_rate": 4.628476643352392e-05,
"loss": 0.3521,
"step": 1224
},
{
"epoch": 0.42,
"learning_rate": 4.6277441869643764e-05,
"loss": 0.3898,
"step": 1225
},
{
"epoch": 0.42,
"learning_rate": 4.627011067344599e-05,
"loss": 0.3183,
"step": 1226
},
{
"epoch": 0.42,
"learning_rate": 4.626277284721577e-05,
"loss": 0.3529,
"step": 1227
},
{
"epoch": 0.42,
"learning_rate": 4.625542839324036e-05,
"loss": 0.3722,
"step": 1228
},
{
"epoch": 0.42,
"learning_rate": 4.624807731380907e-05,
"loss": 0.3176,
"step": 1229
},
{
"epoch": 0.42,
"learning_rate": 4.624071961121328e-05,
"loss": 0.3139,
"step": 1230
},
{
"epoch": 0.42,
"learning_rate": 4.623335528774643e-05,
"loss": 0.3183,
"step": 1231
},
{
"epoch": 0.42,
"learning_rate": 4.622598434570404e-05,
"loss": 0.3716,
"step": 1232
},
{
"epoch": 0.42,
"learning_rate": 4.621860678738367e-05,
"loss": 0.3707,
"step": 1233
},
{
"epoch": 0.42,
"learning_rate": 4.621122261508496e-05,
"loss": 0.3447,
"step": 1234
},
{
"epoch": 0.42,
"learning_rate": 4.620383183110959e-05,
"loss": 0.3824,
"step": 1235
},
{
"epoch": 0.42,
"learning_rate": 4.6196434437761335e-05,
"loss": 0.3746,
"step": 1236
},
{
"epoch": 0.42,
"learning_rate": 4.618903043734601e-05,
"loss": 0.325,
"step": 1237
},
{
"epoch": 0.42,
"learning_rate": 4.6181619832171473e-05,
"loss": 0.3985,
"step": 1238
},
{
"epoch": 0.42,
"learning_rate": 4.6174202624547676e-05,
"loss": 0.3478,
"step": 1239
},
{
"epoch": 0.42,
"learning_rate": 4.616677881678659e-05,
"loss": 0.393,
"step": 1240
},
{
"epoch": 0.42,
"learning_rate": 4.615934841120229e-05,
"loss": 0.376,
"step": 1241
},
{
"epoch": 0.42,
"learning_rate": 4.6151911410110874e-05,
"loss": 0.3479,
"step": 1242
},
{
"epoch": 0.42,
"learning_rate": 4.6144467815830484e-05,
"loss": 0.3732,
"step": 1243
},
{
"epoch": 0.42,
"learning_rate": 4.6137017630681355e-05,
"loss": 0.3125,
"step": 1244
},
{
"epoch": 0.42,
"learning_rate": 4.6129560856985764e-05,
"loss": 0.362,
"step": 1245
},
{
"epoch": 0.43,
"learning_rate": 4.6122097497068026e-05,
"loss": 0.3791,
"step": 1246
},
{
"epoch": 0.43,
"learning_rate": 4.611462755325451e-05,
"loss": 0.3403,
"step": 1247
},
{
"epoch": 0.43,
"learning_rate": 4.610715102787367e-05,
"loss": 0.3537,
"step": 1248
},
{
"epoch": 0.43,
"learning_rate": 4.609966792325597e-05,
"loss": 0.3791,
"step": 1249
},
{
"epoch": 0.43,
"learning_rate": 4.6092178241733946e-05,
"loss": 0.3715,
"step": 1250
},
{
"epoch": 0.43,
"learning_rate": 4.608468198564218e-05,
"loss": 0.3834,
"step": 1251
},
{
"epoch": 0.43,
"learning_rate": 4.6077179157317304e-05,
"loss": 0.3643,
"step": 1252
},
{
"epoch": 0.43,
"learning_rate": 4.606966975909801e-05,
"loss": 0.3429,
"step": 1253
},
{
"epoch": 0.43,
"learning_rate": 4.606215379332501e-05,
"loss": 0.3888,
"step": 1254
},
{
"epoch": 0.43,
"learning_rate": 4.6054631262341096e-05,
"loss": 0.3736,
"step": 1255
},
{
"epoch": 0.43,
"learning_rate": 4.6047102168491076e-05,
"loss": 0.332,
"step": 1256
},
{
"epoch": 0.43,
"learning_rate": 4.603956651412182e-05,
"loss": 0.3499,
"step": 1257
},
{
"epoch": 0.43,
"learning_rate": 4.603202430158225e-05,
"loss": 0.3175,
"step": 1258
},
{
"epoch": 0.43,
"learning_rate": 4.60244755332233e-05,
"loss": 0.3505,
"step": 1259
},
{
"epoch": 0.43,
"learning_rate": 4.6016920211397995e-05,
"loss": 0.355,
"step": 1260
},
{
"epoch": 0.43,
"learning_rate": 4.6009358338461374e-05,
"loss": 0.364,
"step": 1261
},
{
"epoch": 0.43,
"learning_rate": 4.600178991677052e-05,
"loss": 0.3264,
"step": 1262
},
{
"epoch": 0.43,
"learning_rate": 4.599421494868455e-05,
"loss": 0.3491,
"step": 1263
},
{
"epoch": 0.43,
"learning_rate": 4.598663343656464e-05,
"loss": 0.3608,
"step": 1264
},
{
"epoch": 0.43,
"learning_rate": 4.5979045382774e-05,
"loss": 0.3699,
"step": 1265
},
{
"epoch": 0.43,
"learning_rate": 4.597145078967786e-05,
"loss": 0.3494,
"step": 1266
},
{
"epoch": 0.43,
"learning_rate": 4.5963849659643526e-05,
"loss": 0.3625,
"step": 1267
},
{
"epoch": 0.43,
"learning_rate": 4.5956241995040304e-05,
"loss": 0.3511,
"step": 1268
},
{
"epoch": 0.43,
"learning_rate": 4.594862779823956e-05,
"loss": 0.3909,
"step": 1269
},
{
"epoch": 0.43,
"learning_rate": 4.5941007071614696e-05,
"loss": 0.3593,
"step": 1270
},
{
"epoch": 0.43,
"learning_rate": 4.593337981754112e-05,
"loss": 0.3624,
"step": 1271
},
{
"epoch": 0.43,
"learning_rate": 4.5925746038396314e-05,
"loss": 0.3442,
"step": 1272
},
{
"epoch": 0.43,
"learning_rate": 4.591810573655978e-05,
"loss": 0.3365,
"step": 1273
},
{
"epoch": 0.43,
"learning_rate": 4.5910458914413035e-05,
"loss": 0.3185,
"step": 1274
},
{
"epoch": 0.44,
"learning_rate": 4.590280557433965e-05,
"loss": 0.3701,
"step": 1275
},
{
"epoch": 0.44,
"learning_rate": 4.589514571872523e-05,
"loss": 0.3802,
"step": 1276
},
{
"epoch": 0.44,
"learning_rate": 4.588747934995739e-05,
"loss": 0.3635,
"step": 1277
},
{
"epoch": 0.44,
"learning_rate": 4.587980647042579e-05,
"loss": 0.3763,
"step": 1278
},
{
"epoch": 0.44,
"learning_rate": 4.587212708252212e-05,
"loss": 0.4003,
"step": 1279
},
{
"epoch": 0.44,
"learning_rate": 4.586444118864009e-05,
"loss": 0.3556,
"step": 1280
},
{
"epoch": 0.44,
"learning_rate": 4.585674879117546e-05,
"loss": 0.3706,
"step": 1281
},
{
"epoch": 0.44,
"learning_rate": 4.584904989252598e-05,
"loss": 0.3535,
"step": 1282
},
{
"epoch": 0.44,
"learning_rate": 4.5841344495091454e-05,
"loss": 0.3755,
"step": 1283
},
{
"epoch": 0.44,
"learning_rate": 4.58336326012737e-05,
"loss": 0.3422,
"step": 1284
},
{
"epoch": 0.44,
"learning_rate": 4.582591421347658e-05,
"loss": 0.3767,
"step": 1285
},
{
"epoch": 0.44,
"learning_rate": 4.581818933410596e-05,
"loss": 0.343,
"step": 1286
},
{
"epoch": 0.44,
"learning_rate": 4.581045796556972e-05,
"loss": 0.3394,
"step": 1287
},
{
"epoch": 0.44,
"learning_rate": 4.58027201102778e-05,
"loss": 0.3373,
"step": 1288
},
{
"epoch": 0.44,
"learning_rate": 4.5794975770642125e-05,
"loss": 0.3696,
"step": 1289
},
{
"epoch": 0.44,
"learning_rate": 4.578722494907666e-05,
"loss": 0.3349,
"step": 1290
},
{
"epoch": 0.44,
"learning_rate": 4.577946764799739e-05,
"loss": 0.3706,
"step": 1291
},
{
"epoch": 0.44,
"learning_rate": 4.5771703869822324e-05,
"loss": 0.3497,
"step": 1292
},
{
"epoch": 0.44,
"learning_rate": 4.576393361697146e-05,
"loss": 0.3182,
"step": 1293
},
{
"epoch": 0.44,
"learning_rate": 4.5756156891866856e-05,
"loss": 0.393,
"step": 1294
},
{
"epoch": 0.44,
"learning_rate": 4.5748373696932566e-05,
"loss": 0.3896,
"step": 1295
},
{
"epoch": 0.44,
"learning_rate": 4.574058403459465e-05,
"loss": 0.3648,
"step": 1296
},
{
"epoch": 0.44,
"learning_rate": 4.573278790728121e-05,
"loss": 0.37,
"step": 1297
},
{
"epoch": 0.44,
"learning_rate": 4.5724985317422343e-05,
"loss": 0.4031,
"step": 1298
},
{
"epoch": 0.44,
"learning_rate": 4.5717176267450176e-05,
"loss": 0.4519,
"step": 1299
},
{
"epoch": 0.44,
"learning_rate": 4.5709360759798824e-05,
"loss": 0.3217,
"step": 1300
},
{
"epoch": 0.44,
"learning_rate": 4.5701538796904447e-05,
"loss": 0.3393,
"step": 1301
},
{
"epoch": 0.44,
"learning_rate": 4.56937103812052e-05,
"loss": 0.4134,
"step": 1302
},
{
"epoch": 0.44,
"learning_rate": 4.568587551514124e-05,
"loss": 0.3087,
"step": 1303
},
{
"epoch": 0.44,
"learning_rate": 4.5678034201154765e-05,
"loss": 0.343,
"step": 1304
},
{
"epoch": 0.45,
"learning_rate": 4.567018644168994e-05,
"loss": 0.4063,
"step": 1305
},
{
"epoch": 0.45,
"learning_rate": 4.566233223919298e-05,
"loss": 0.4115,
"step": 1306
},
{
"epoch": 0.45,
"learning_rate": 4.565447159611209e-05,
"loss": 0.3322,
"step": 1307
},
{
"epoch": 0.45,
"learning_rate": 4.564660451489749e-05,
"loss": 0.3032,
"step": 1308
},
{
"epoch": 0.45,
"learning_rate": 4.5638730998001374e-05,
"loss": 0.3336,
"step": 1309
},
{
"epoch": 0.45,
"learning_rate": 4.563085104787799e-05,
"loss": 0.3876,
"step": 1310
},
{
"epoch": 0.45,
"learning_rate": 4.562296466698357e-05,
"loss": 0.3449,
"step": 1311
},
{
"epoch": 0.45,
"learning_rate": 4.561507185777635e-05,
"loss": 0.3347,
"step": 1312
},
{
"epoch": 0.45,
"learning_rate": 4.5607172622716555e-05,
"loss": 0.3764,
"step": 1313
},
{
"epoch": 0.45,
"learning_rate": 4.559926696426645e-05,
"loss": 0.389,
"step": 1314
},
{
"epoch": 0.45,
"learning_rate": 4.5591354884890264e-05,
"loss": 0.3249,
"step": 1315
},
{
"epoch": 0.45,
"learning_rate": 4.558343638705426e-05,
"loss": 0.3791,
"step": 1316
},
{
"epoch": 0.45,
"learning_rate": 4.557551147322666e-05,
"loss": 0.3398,
"step": 1317
},
{
"epoch": 0.45,
"learning_rate": 4.556758014587774e-05,
"loss": 0.3086,
"step": 1318
},
{
"epoch": 0.45,
"learning_rate": 4.5559642407479726e-05,
"loss": 0.4698,
"step": 1319
},
{
"epoch": 0.45,
"learning_rate": 4.555169826050687e-05,
"loss": 0.3371,
"step": 1320
},
{
"epoch": 0.45,
"learning_rate": 4.554374770743542e-05,
"loss": 0.3264,
"step": 1321
},
{
"epoch": 0.45,
"learning_rate": 4.553579075074361e-05,
"loss": 0.3442,
"step": 1322
},
{
"epoch": 0.45,
"learning_rate": 4.552782739291167e-05,
"loss": 0.4239,
"step": 1323
},
{
"epoch": 0.45,
"learning_rate": 4.551985763642184e-05,
"loss": 0.3945,
"step": 1324
},
{
"epoch": 0.45,
"learning_rate": 4.551188148375834e-05,
"loss": 0.4069,
"step": 1325
},
{
"epoch": 0.45,
"learning_rate": 4.550389893740739e-05,
"loss": 0.3729,
"step": 1326
},
{
"epoch": 0.45,
"learning_rate": 4.5495909999857204e-05,
"loss": 0.3857,
"step": 1327
},
{
"epoch": 0.45,
"learning_rate": 4.5487914673597985e-05,
"loss": 0.3759,
"step": 1328
},
{
"epoch": 0.45,
"learning_rate": 4.547991296112193e-05,
"loss": 0.325,
"step": 1329
},
{
"epoch": 0.45,
"learning_rate": 4.547190486492321e-05,
"loss": 0.3326,
"step": 1330
},
{
"epoch": 0.45,
"learning_rate": 4.546389038749802e-05,
"loss": 0.3233,
"step": 1331
},
{
"epoch": 0.45,
"learning_rate": 4.545586953134452e-05,
"loss": 0.3834,
"step": 1332
},
{
"epoch": 0.45,
"learning_rate": 4.5447842298962864e-05,
"loss": 0.3589,
"step": 1333
},
{
"epoch": 0.46,
"learning_rate": 4.543980869285519e-05,
"loss": 0.3646,
"step": 1334
},
{
"epoch": 0.46,
"learning_rate": 4.543176871552563e-05,
"loss": 0.3593,
"step": 1335
},
{
"epoch": 0.46,
"learning_rate": 4.542372236948028e-05,
"loss": 0.3832,
"step": 1336
},
{
"epoch": 0.46,
"learning_rate": 4.541566965722726e-05,
"loss": 0.3214,
"step": 1337
},
{
"epoch": 0.46,
"learning_rate": 4.540761058127664e-05,
"loss": 0.3561,
"step": 1338
},
{
"epoch": 0.46,
"learning_rate": 4.53995451441405e-05,
"loss": 0.3328,
"step": 1339
},
{
"epoch": 0.46,
"learning_rate": 4.539147334833288e-05,
"loss": 0.3604,
"step": 1340
},
{
"epoch": 0.46,
"learning_rate": 4.5383395196369806e-05,
"loss": 0.3656,
"step": 1341
},
{
"epoch": 0.46,
"learning_rate": 4.53753106907693e-05,
"loss": 0.3048,
"step": 1342
},
{
"epoch": 0.46,
"learning_rate": 4.5367219834051354e-05,
"loss": 0.3515,
"step": 1343
},
{
"epoch": 0.46,
"learning_rate": 4.535912262873794e-05,
"loss": 0.3567,
"step": 1344
},
{
"epoch": 0.46,
"learning_rate": 4.5351019077353006e-05,
"loss": 0.3619,
"step": 1345
},
{
"epoch": 0.46,
"learning_rate": 4.534290918242249e-05,
"loss": 0.3787,
"step": 1346
},
{
"epoch": 0.46,
"learning_rate": 4.533479294647429e-05,
"loss": 0.341,
"step": 1347
},
{
"epoch": 0.46,
"learning_rate": 4.5326670372038296e-05,
"loss": 0.3134,
"step": 1348
},
{
"epoch": 0.46,
"learning_rate": 4.531854146164637e-05,
"loss": 0.3385,
"step": 1349
},
{
"epoch": 0.46,
"learning_rate": 4.531040621783233e-05,
"loss": 0.3745,
"step": 1350
},
{
"epoch": 0.46,
"learning_rate": 4.530226464313201e-05,
"loss": 0.3826,
"step": 1351
},
{
"epoch": 0.46,
"learning_rate": 4.5294116740083173e-05,
"loss": 0.3747,
"step": 1352
},
{
"epoch": 0.46,
"learning_rate": 4.528596251122558e-05,
"loss": 0.3884,
"step": 1353
},
{
"epoch": 0.46,
"learning_rate": 4.5277801959100954e-05,
"loss": 0.3285,
"step": 1354
},
{
"epoch": 0.46,
"learning_rate": 4.5269635086253e-05,
"loss": 0.3403,
"step": 1355
},
{
"epoch": 0.46,
"learning_rate": 4.5261461895227374e-05,
"loss": 0.3755,
"step": 1356
},
{
"epoch": 0.46,
"learning_rate": 4.525328238857173e-05,
"loss": 0.3581,
"step": 1357
},
{
"epoch": 0.46,
"learning_rate": 4.5245096568835656e-05,
"loss": 0.3367,
"step": 1358
},
{
"epoch": 0.46,
"learning_rate": 4.5236904438570734e-05,
"loss": 0.3459,
"step": 1359
},
{
"epoch": 0.46,
"learning_rate": 4.52287060003305e-05,
"loss": 0.3876,
"step": 1360
},
{
"epoch": 0.46,
"learning_rate": 4.5220501256670466e-05,
"loss": 0.3075,
"step": 1361
},
{
"epoch": 0.46,
"learning_rate": 4.521229021014811e-05,
"loss": 0.4039,
"step": 1362
},
{
"epoch": 0.47,
"learning_rate": 4.520407286332285e-05,
"loss": 0.3766,
"step": 1363
},
{
"epoch": 0.47,
"learning_rate": 4.5195849218756104e-05,
"loss": 0.3743,
"step": 1364
},
{
"epoch": 0.47,
"learning_rate": 4.518761927901123e-05,
"loss": 0.4024,
"step": 1365
},
{
"epoch": 0.47,
"learning_rate": 4.517938304665355e-05,
"loss": 0.3614,
"step": 1366
},
{
"epoch": 0.47,
"learning_rate": 4.5171140524250356e-05,
"loss": 0.3423,
"step": 1367
},
{
"epoch": 0.47,
"learning_rate": 4.51628917143709e-05,
"loss": 0.3368,
"step": 1368
},
{
"epoch": 0.47,
"learning_rate": 4.515463661958638e-05,
"loss": 0.3375,
"step": 1369
},
{
"epoch": 0.47,
"learning_rate": 4.5146375242469975e-05,
"loss": 0.345,
"step": 1370
},
{
"epoch": 0.47,
"learning_rate": 4.51381075855968e-05,
"loss": 0.3569,
"step": 1371
},
{
"epoch": 0.47,
"learning_rate": 4.5129833651543936e-05,
"loss": 0.3914,
"step": 1372
},
{
"epoch": 0.47,
"learning_rate": 4.512155344289043e-05,
"loss": 0.3757,
"step": 1373
},
{
"epoch": 0.47,
"learning_rate": 4.511326696221727e-05,
"loss": 0.3409,
"step": 1374
},
{
"epoch": 0.47,
"learning_rate": 4.510497421210742e-05,
"loss": 0.376,
"step": 1375
},
{
"epoch": 0.47,
"learning_rate": 4.509667519514577e-05,
"loss": 0.3474,
"step": 1376
},
{
"epoch": 0.47,
"learning_rate": 4.5088369913919184e-05,
"loss": 0.3754,
"step": 1377
},
{
"epoch": 0.47,
"learning_rate": 4.5080058371016464e-05,
"loss": 0.3683,
"step": 1378
},
{
"epoch": 0.47,
"learning_rate": 4.5071740569028386e-05,
"loss": 0.3682,
"step": 1379
},
{
"epoch": 0.47,
"learning_rate": 4.5063416510547655e-05,
"loss": 0.3474,
"step": 1380
},
{
"epoch": 0.47,
"learning_rate": 4.505508619816894e-05,
"loss": 0.3266,
"step": 1381
},
{
"epoch": 0.47,
"learning_rate": 4.5046749634488844e-05,
"loss": 0.3463,
"step": 1382
},
{
"epoch": 0.47,
"learning_rate": 4.5038406822105935e-05,
"loss": 0.39,
"step": 1383
},
{
"epoch": 0.47,
"learning_rate": 4.5030057763620724e-05,
"loss": 0.3505,
"step": 1384
},
{
"epoch": 0.47,
"learning_rate": 4.502170246163566e-05,
"loss": 0.4029,
"step": 1385
},
{
"epoch": 0.47,
"learning_rate": 4.501334091875515e-05,
"loss": 0.3718,
"step": 1386
},
{
"epoch": 0.47,
"learning_rate": 4.500497313758555e-05,
"loss": 0.3494,
"step": 1387
},
{
"epoch": 0.47,
"learning_rate": 4.4996599120735134e-05,
"loss": 0.3477,
"step": 1388
},
{
"epoch": 0.47,
"learning_rate": 4.498821887081415e-05,
"loss": 0.3732,
"step": 1389
},
{
"epoch": 0.47,
"learning_rate": 4.4979832390434775e-05,
"loss": 0.3492,
"step": 1390
},
{
"epoch": 0.47,
"learning_rate": 4.4971439682211125e-05,
"loss": 0.3297,
"step": 1391
},
{
"epoch": 0.47,
"learning_rate": 4.496304074875926e-05,
"loss": 0.3744,
"step": 1392
},
{
"epoch": 0.48,
"learning_rate": 4.495463559269719e-05,
"loss": 0.3399,
"step": 1393
},
{
"epoch": 0.48,
"learning_rate": 4.4946224216644864e-05,
"loss": 0.3623,
"step": 1394
},
{
"epoch": 0.48,
"learning_rate": 4.493780662322414e-05,
"loss": 0.3963,
"step": 1395
},
{
"epoch": 0.48,
"learning_rate": 4.4929382815058864e-05,
"loss": 0.3138,
"step": 1396
},
{
"epoch": 0.48,
"learning_rate": 4.4920952794774776e-05,
"loss": 0.3846,
"step": 1397
},
{
"epoch": 0.48,
"learning_rate": 4.491251656499956e-05,
"loss": 0.392,
"step": 1398
},
{
"epoch": 0.48,
"learning_rate": 4.490407412836286e-05,
"loss": 0.3155,
"step": 1399
},
{
"epoch": 0.48,
"learning_rate": 4.489562548749624e-05,
"loss": 0.3732,
"step": 1400
},
{
"epoch": 0.48,
"learning_rate": 4.4887170645033185e-05,
"loss": 0.348,
"step": 1401
},
{
"epoch": 0.48,
"learning_rate": 4.487870960360913e-05,
"loss": 0.3664,
"step": 1402
},
{
"epoch": 0.48,
"learning_rate": 4.487024236586143e-05,
"loss": 0.3491,
"step": 1403
},
{
"epoch": 0.48,
"learning_rate": 4.486176893442939e-05,
"loss": 0.3589,
"step": 1404
},
{
"epoch": 0.48,
"learning_rate": 4.485328931195423e-05,
"loss": 0.3259,
"step": 1405
},
{
"epoch": 0.48,
"learning_rate": 4.48448035010791e-05,
"loss": 0.3843,
"step": 1406
},
{
"epoch": 0.48,
"learning_rate": 4.483631150444909e-05,
"loss": 0.388,
"step": 1407
},
{
"epoch": 0.48,
"learning_rate": 4.48278133247112e-05,
"loss": 0.3444,
"step": 1408
},
{
"epoch": 0.48,
"learning_rate": 4.481930896451437e-05,
"loss": 0.348,
"step": 1409
},
{
"epoch": 0.48,
"learning_rate": 4.4810798426509484e-05,
"loss": 0.3238,
"step": 1410
},
{
"epoch": 0.48,
"learning_rate": 4.4802281713349306e-05,
"loss": 0.389,
"step": 1411
},
{
"epoch": 0.48,
"learning_rate": 4.479375882768856e-05,
"loss": 0.3862,
"step": 1412
},
{
"epoch": 0.48,
"learning_rate": 4.478522977218389e-05,
"loss": 0.342,
"step": 1413
},
{
"epoch": 0.48,
"learning_rate": 4.477669454949386e-05,
"loss": 0.3816,
"step": 1414
},
{
"epoch": 0.48,
"learning_rate": 4.4768153162278944e-05,
"loss": 0.3251,
"step": 1415
},
{
"epoch": 0.48,
"learning_rate": 4.475960561320156e-05,
"loss": 0.3605,
"step": 1416
},
{
"epoch": 0.48,
"learning_rate": 4.475105190492602e-05,
"loss": 0.3333,
"step": 1417
},
{
"epoch": 0.48,
"learning_rate": 4.474249204011859e-05,
"loss": 0.3305,
"step": 1418
},
{
"epoch": 0.48,
"learning_rate": 4.4733926021447416e-05,
"loss": 0.3462,
"step": 1419
},
{
"epoch": 0.48,
"learning_rate": 4.4725353851582593e-05,
"loss": 0.3989,
"step": 1420
},
{
"epoch": 0.48,
"learning_rate": 4.471677553319613e-05,
"loss": 0.3584,
"step": 1421
},
{
"epoch": 0.49,
"learning_rate": 4.470819106896193e-05,
"loss": 0.3686,
"step": 1422
},
{
"epoch": 0.49,
"learning_rate": 4.469960046155584e-05,
"loss": 0.3344,
"step": 1423
},
{
"epoch": 0.49,
"learning_rate": 4.4691003713655586e-05,
"loss": 0.3808,
"step": 1424
},
{
"epoch": 0.49,
"learning_rate": 4.468240082794086e-05,
"loss": 0.3406,
"step": 1425
},
{
"epoch": 0.49,
"learning_rate": 4.467379180709321e-05,
"loss": 0.3408,
"step": 1426
},
{
"epoch": 0.49,
"learning_rate": 4.466517665379615e-05,
"loss": 0.3473,
"step": 1427
},
{
"epoch": 0.49,
"learning_rate": 4.4656555370735065e-05,
"loss": 0.3398,
"step": 1428
},
{
"epoch": 0.49,
"learning_rate": 4.464792796059726e-05,
"loss": 0.3811,
"step": 1429
},
{
"epoch": 0.49,
"learning_rate": 4.463929442607197e-05,
"loss": 0.3369,
"step": 1430
},
{
"epoch": 0.49,
"learning_rate": 4.46306547698503e-05,
"loss": 0.3154,
"step": 1431
},
{
"epoch": 0.49,
"learning_rate": 4.462200899462532e-05,
"loss": 0.3425,
"step": 1432
},
{
"epoch": 0.49,
"learning_rate": 4.461335710309195e-05,
"loss": 0.3457,
"step": 1433
},
{
"epoch": 0.49,
"learning_rate": 4.4604699097947055e-05,
"loss": 0.3602,
"step": 1434
},
{
"epoch": 0.49,
"learning_rate": 4.459603498188938e-05,
"loss": 0.3298,
"step": 1435
},
{
"epoch": 0.49,
"learning_rate": 4.45873647576196e-05,
"loss": 0.5398,
"step": 1436
},
{
"epoch": 0.49,
"learning_rate": 4.4578688427840266e-05,
"loss": 0.3266,
"step": 1437
},
{
"epoch": 0.49,
"learning_rate": 4.457000599525586e-05,
"loss": 0.3363,
"step": 1438
},
{
"epoch": 0.49,
"learning_rate": 4.456131746257274e-05,
"loss": 0.3668,
"step": 1439
},
{
"epoch": 0.49,
"learning_rate": 4.455262283249919e-05,
"loss": 0.3462,
"step": 1440
},
{
"epoch": 0.49,
"learning_rate": 4.4543922107745375e-05,
"loss": 0.3685,
"step": 1441
},
{
"epoch": 0.49,
"learning_rate": 4.453521529102337e-05,
"loss": 0.3295,
"step": 1442
},
{
"epoch": 0.49,
"learning_rate": 4.4526502385047144e-05,
"loss": 0.3249,
"step": 1443
},
{
"epoch": 0.49,
"learning_rate": 4.4517783392532574e-05,
"loss": 0.3957,
"step": 1444
},
{
"epoch": 0.49,
"learning_rate": 4.450905831619743e-05,
"loss": 0.3979,
"step": 1445
},
{
"epoch": 0.49,
"learning_rate": 4.450032715876136e-05,
"loss": 0.3448,
"step": 1446
},
{
"epoch": 0.49,
"learning_rate": 4.449158992294593e-05,
"loss": 0.3681,
"step": 1447
},
{
"epoch": 0.49,
"learning_rate": 4.44828466114746e-05,
"loss": 0.3927,
"step": 1448
},
{
"epoch": 0.49,
"learning_rate": 4.4474097227072715e-05,
"loss": 0.352,
"step": 1449
},
{
"epoch": 0.49,
"learning_rate": 4.446534177246751e-05,
"loss": 0.3488,
"step": 1450
},
{
"epoch": 0.5,
"learning_rate": 4.445658025038811e-05,
"loss": 0.3408,
"step": 1451
},
{
"epoch": 0.5,
"learning_rate": 4.444781266356556e-05,
"loss": 0.3645,
"step": 1452
},
{
"epoch": 0.5,
"learning_rate": 4.4439039014732755e-05,
"loss": 0.3847,
"step": 1453
},
{
"epoch": 0.5,
"learning_rate": 4.443025930662451e-05,
"loss": 0.3667,
"step": 1454
},
{
"epoch": 0.5,
"learning_rate": 4.4421473541977515e-05,
"loss": 0.3276,
"step": 1455
},
{
"epoch": 0.5,
"learning_rate": 4.441268172353035e-05,
"loss": 0.3521,
"step": 1456
},
{
"epoch": 0.5,
"learning_rate": 4.440388385402347e-05,
"loss": 0.3365,
"step": 1457
},
{
"epoch": 0.5,
"learning_rate": 4.4395079936199245e-05,
"loss": 0.3147,
"step": 1458
},
{
"epoch": 0.5,
"learning_rate": 4.438626997280191e-05,
"loss": 0.3224,
"step": 1459
},
{
"epoch": 0.5,
"learning_rate": 4.4377453966577585e-05,
"loss": 0.3394,
"step": 1460
},
{
"epoch": 0.5,
"learning_rate": 4.436863192027428e-05,
"loss": 0.3494,
"step": 1461
},
{
"epoch": 0.5,
"learning_rate": 4.4359803836641876e-05,
"loss": 0.3532,
"step": 1462
},
{
"epoch": 0.5,
"learning_rate": 4.4350969718432154e-05,
"loss": 0.3713,
"step": 1463
},
{
"epoch": 0.5,
"learning_rate": 4.434212956839877e-05,
"loss": 0.4061,
"step": 1464
},
{
"epoch": 0.5,
"learning_rate": 4.433328338929724e-05,
"loss": 0.3462,
"step": 1465
},
{
"epoch": 0.5,
"learning_rate": 4.432443118388499e-05,
"loss": 0.3584,
"step": 1466
},
{
"epoch": 0.5,
"learning_rate": 4.4315572954921306e-05,
"loss": 0.3691,
"step": 1467
},
{
"epoch": 0.5,
"learning_rate": 4.430670870516735e-05,
"loss": 0.3593,
"step": 1468
},
{
"epoch": 0.5,
"learning_rate": 4.4297838437386176e-05,
"loss": 0.3886,
"step": 1469
},
{
"epoch": 0.5,
"learning_rate": 4.42889621543427e-05,
"loss": 0.3357,
"step": 1470
},
{
"epoch": 0.5,
"learning_rate": 4.4280079858803716e-05,
"loss": 0.3475,
"step": 1471
},
{
"epoch": 0.5,
"learning_rate": 4.42711915535379e-05,
"loss": 0.3437,
"step": 1472
},
{
"epoch": 0.5,
"learning_rate": 4.426229724131579e-05,
"loss": 0.3728,
"step": 1473
},
{
"epoch": 0.5,
"learning_rate": 4.4253396924909795e-05,
"loss": 0.3177,
"step": 1474
},
{
"epoch": 0.5,
"learning_rate": 4.4244490607094215e-05,
"loss": 0.394,
"step": 1475
},
{
"epoch": 0.5,
"learning_rate": 4.4235578290645194e-05,
"loss": 0.377,
"step": 1476
},
{
"epoch": 0.5,
"learning_rate": 4.422665997834077e-05,
"loss": 0.3536,
"step": 1477
},
{
"epoch": 0.5,
"learning_rate": 4.421773567296084e-05,
"loss": 0.4041,
"step": 1478
},
{
"epoch": 0.5,
"learning_rate": 4.420880537728715e-05,
"loss": 0.3707,
"step": 1479
},
{
"epoch": 0.5,
"learning_rate": 4.419986909410335e-05,
"loss": 0.3472,
"step": 1480
},
{
"epoch": 0.51,
"learning_rate": 4.4190926826194936e-05,
"loss": 0.3849,
"step": 1481
},
{
"epoch": 0.51,
"learning_rate": 4.418197857634927e-05,
"loss": 0.2987,
"step": 1482
},
{
"epoch": 0.51,
"learning_rate": 4.417302434735557e-05,
"loss": 0.3686,
"step": 1483
},
{
"epoch": 0.51,
"learning_rate": 4.4164064142004946e-05,
"loss": 0.3504,
"step": 1484
},
{
"epoch": 0.51,
"learning_rate": 4.415509796309033e-05,
"loss": 0.2969,
"step": 1485
},
{
"epoch": 0.51,
"learning_rate": 4.4146125813406555e-05,
"loss": 0.3268,
"step": 1486
},
{
"epoch": 0.51,
"learning_rate": 4.4137147695750305e-05,
"loss": 0.3653,
"step": 1487
},
{
"epoch": 0.51,
"learning_rate": 4.4128163612920096e-05,
"loss": 0.3559,
"step": 1488
},
{
"epoch": 0.51,
"learning_rate": 4.4119173567716334e-05,
"loss": 0.4042,
"step": 1489
},
{
"epoch": 0.51,
"learning_rate": 4.411017756294128e-05,
"loss": 0.3758,
"step": 1490
},
{
"epoch": 0.51,
"learning_rate": 4.4101175601399046e-05,
"loss": 0.3732,
"step": 1491
},
{
"epoch": 0.51,
"learning_rate": 4.40921676858956e-05,
"loss": 0.3399,
"step": 1492
},
{
"epoch": 0.51,
"learning_rate": 4.408315381923878e-05,
"loss": 0.3145,
"step": 1493
},
{
"epoch": 0.51,
"learning_rate": 4.4074134004238246e-05,
"loss": 0.2858,
"step": 1494
},
{
"epoch": 0.51,
"learning_rate": 4.406510824370554e-05,
"loss": 0.3538,
"step": 1495
},
{
"epoch": 0.51,
"learning_rate": 4.405607654045406e-05,
"loss": 0.3745,
"step": 1496
},
{
"epoch": 0.51,
"learning_rate": 4.4047038897299044e-05,
"loss": 0.3325,
"step": 1497
},
{
"epoch": 0.51,
"learning_rate": 4.403799531705757e-05,
"loss": 0.3501,
"step": 1498
},
{
"epoch": 0.51,
"learning_rate": 4.4028945802548604e-05,
"loss": 0.4226,
"step": 1499
},
{
"epoch": 0.51,
"learning_rate": 4.401989035659292e-05,
"loss": 0.3657,
"step": 1500
},
{
"epoch": 0.51,
"learning_rate": 4.4010828982013176e-05,
"loss": 0.3823,
"step": 1501
},
{
"epoch": 0.51,
"learning_rate": 4.400176168163384e-05,
"loss": 0.3345,
"step": 1502
},
{
"epoch": 0.51,
"learning_rate": 4.399268845828127e-05,
"loss": 0.3628,
"step": 1503
},
{
"epoch": 0.51,
"learning_rate": 4.3983609314783635e-05,
"loss": 0.4098,
"step": 1504
},
{
"epoch": 0.51,
"learning_rate": 4.3974524253970974e-05,
"loss": 0.3427,
"step": 1505
},
{
"epoch": 0.51,
"learning_rate": 4.396543327867515e-05,
"loss": 0.3921,
"step": 1506
},
{
"epoch": 0.51,
"learning_rate": 4.395633639172988e-05,
"loss": 0.385,
"step": 1507
},
{
"epoch": 0.51,
"learning_rate": 4.394723359597074e-05,
"loss": 0.342,
"step": 1508
},
{
"epoch": 0.51,
"learning_rate": 4.39381248942351e-05,
"loss": 0.3198,
"step": 1509
},
{
"epoch": 0.52,
"learning_rate": 4.392901028936223e-05,
"loss": 0.3442,
"step": 1510
},
{
"epoch": 0.52,
"learning_rate": 4.39198897841932e-05,
"loss": 0.3344,
"step": 1511
},
{
"epoch": 0.52,
"learning_rate": 4.391076338157093e-05,
"loss": 0.3615,
"step": 1512
},
{
"epoch": 0.52,
"learning_rate": 4.390163108434019e-05,
"loss": 0.375,
"step": 1513
},
{
"epoch": 0.52,
"learning_rate": 4.3892492895347556e-05,
"loss": 0.3486,
"step": 1514
},
{
"epoch": 0.52,
"learning_rate": 4.388334881744149e-05,
"loss": 0.3376,
"step": 1515
},
{
"epoch": 0.52,
"learning_rate": 4.387419885347223e-05,
"loss": 0.3603,
"step": 1516
},
{
"epoch": 0.52,
"learning_rate": 4.38650430062919e-05,
"loss": 0.3299,
"step": 1517
},
{
"epoch": 0.52,
"learning_rate": 4.385588127875443e-05,
"loss": 0.3399,
"step": 1518
},
{
"epoch": 0.52,
"learning_rate": 4.38467136737156e-05,
"loss": 0.3453,
"step": 1519
},
{
"epoch": 0.52,
"learning_rate": 4.3837540194032986e-05,
"loss": 0.3634,
"step": 1520
},
{
"epoch": 0.52,
"learning_rate": 4.3828360842566054e-05,
"loss": 0.3433,
"step": 1521
},
{
"epoch": 0.52,
"learning_rate": 4.381917562217605e-05,
"loss": 0.3812,
"step": 1522
},
{
"epoch": 0.52,
"learning_rate": 4.3809984535726064e-05,
"loss": 0.3225,
"step": 1523
},
{
"epoch": 0.52,
"learning_rate": 4.3800787586081036e-05,
"loss": 0.3392,
"step": 1524
},
{
"epoch": 0.52,
"learning_rate": 4.37915847761077e-05,
"loss": 0.3199,
"step": 1525
},
{
"epoch": 0.52,
"learning_rate": 4.3782376108674636e-05,
"loss": 0.3484,
"step": 1526
},
{
"epoch": 0.52,
"learning_rate": 4.377316158665224e-05,
"loss": 0.3253,
"step": 1527
},
{
"epoch": 0.52,
"learning_rate": 4.376394121291275e-05,
"loss": 0.4147,
"step": 1528
},
{
"epoch": 0.52,
"learning_rate": 4.37547149903302e-05,
"loss": 0.3649,
"step": 1529
},
{
"epoch": 0.52,
"learning_rate": 4.374548292178049e-05,
"loss": 0.322,
"step": 1530
},
{
"epoch": 0.52,
"learning_rate": 4.3736245010141295e-05,
"loss": 0.311,
"step": 1531
},
{
"epoch": 0.52,
"learning_rate": 4.372700125829213e-05,
"loss": 0.3577,
"step": 1532
},
{
"epoch": 0.52,
"learning_rate": 4.371775166911435e-05,
"loss": 0.3841,
"step": 1533
},
{
"epoch": 0.52,
"learning_rate": 4.3708496245491106e-05,
"loss": 0.3583,
"step": 1534
},
{
"epoch": 0.52,
"learning_rate": 4.369923499030737e-05,
"loss": 0.3334,
"step": 1535
},
{
"epoch": 0.52,
"learning_rate": 4.368996790644992e-05,
"loss": 0.3593,
"step": 1536
},
{
"epoch": 0.52,
"learning_rate": 4.36806949968074e-05,
"loss": 0.3276,
"step": 1537
},
{
"epoch": 0.52,
"learning_rate": 4.367141626427022e-05,
"loss": 0.3725,
"step": 1538
},
{
"epoch": 0.53,
"learning_rate": 4.3662131711730616e-05,
"loss": 0.3489,
"step": 1539
},
{
"epoch": 0.53,
"learning_rate": 4.3652841342082654e-05,
"loss": 0.3813,
"step": 1540
},
{
"epoch": 0.53,
"learning_rate": 4.3643545158222197e-05,
"loss": 0.3584,
"step": 1541
},
{
"epoch": 0.53,
"learning_rate": 4.363424316304693e-05,
"loss": 0.3035,
"step": 1542
},
{
"epoch": 0.53,
"learning_rate": 4.362493535945634e-05,
"loss": 0.3461,
"step": 1543
},
{
"epoch": 0.53,
"learning_rate": 4.361562175035173e-05,
"loss": 0.3911,
"step": 1544
},
{
"epoch": 0.53,
"learning_rate": 4.360630233863623e-05,
"loss": 0.429,
"step": 1545
},
{
"epoch": 0.53,
"learning_rate": 4.359697712721473e-05,
"loss": 0.3445,
"step": 1546
},
{
"epoch": 0.53,
"learning_rate": 4.358764611899399e-05,
"loss": 0.3465,
"step": 1547
},
{
"epoch": 0.53,
"learning_rate": 4.357830931688253e-05,
"loss": 0.3277,
"step": 1548
},
{
"epoch": 0.53,
"learning_rate": 4.3568966723790686e-05,
"loss": 0.3615,
"step": 1549
},
{
"epoch": 0.53,
"learning_rate": 4.3559618342630626e-05,
"loss": 0.3543,
"step": 1550
},
{
"epoch": 0.53,
"learning_rate": 4.355026417631629e-05,
"loss": 0.3262,
"step": 1551
},
{
"epoch": 0.53,
"learning_rate": 4.3540904227763425e-05,
"loss": 0.3738,
"step": 1552
},
{
"epoch": 0.53,
"learning_rate": 4.3531538499889605e-05,
"loss": 0.396,
"step": 1553
},
{
"epoch": 0.53,
"learning_rate": 4.352216699561417e-05,
"loss": 0.3117,
"step": 1554
},
{
"epoch": 0.53,
"learning_rate": 4.35127897178583e-05,
"loss": 0.3343,
"step": 1555
},
{
"epoch": 0.53,
"learning_rate": 4.3503406669544936e-05,
"loss": 0.3716,
"step": 1556
},
{
"epoch": 0.53,
"learning_rate": 4.349401785359884e-05,
"loss": 0.3365,
"step": 1557
},
{
"epoch": 0.53,
"learning_rate": 4.348462327294657e-05,
"loss": 0.3893,
"step": 1558
},
{
"epoch": 0.53,
"learning_rate": 4.347522293051648e-05,
"loss": 0.3779,
"step": 1559
},
{
"epoch": 0.53,
"learning_rate": 4.346581682923871e-05,
"loss": 0.3223,
"step": 1560
},
{
"epoch": 0.53,
"learning_rate": 4.3456404972045216e-05,
"loss": 0.4029,
"step": 1561
},
{
"epoch": 0.53,
"learning_rate": 4.344698736186973e-05,
"loss": 0.3546,
"step": 1562
},
{
"epoch": 0.53,
"learning_rate": 4.343756400164777e-05,
"loss": 0.4174,
"step": 1563
},
{
"epoch": 0.53,
"learning_rate": 4.3428134894316676e-05,
"loss": 0.3611,
"step": 1564
},
{
"epoch": 0.53,
"learning_rate": 4.3418700042815555e-05,
"loss": 0.345,
"step": 1565
},
{
"epoch": 0.53,
"learning_rate": 4.3409259450085315e-05,
"loss": 0.3724,
"step": 1566
},
{
"epoch": 0.53,
"learning_rate": 4.339981311906865e-05,
"loss": 0.3582,
"step": 1567
},
{
"epoch": 0.53,
"learning_rate": 4.339036105271004e-05,
"loss": 0.3125,
"step": 1568
},
{
"epoch": 0.54,
"learning_rate": 4.338090325395575e-05,
"loss": 0.3446,
"step": 1569
},
{
"epoch": 0.54,
"learning_rate": 4.337143972575385e-05,
"loss": 0.3919,
"step": 1570
},
{
"epoch": 0.54,
"learning_rate": 4.336197047105418e-05,
"loss": 0.3197,
"step": 1571
},
{
"epoch": 0.54,
"learning_rate": 4.3352495492808365e-05,
"loss": 0.3715,
"step": 1572
},
{
"epoch": 0.54,
"learning_rate": 4.3343014793969816e-05,
"loss": 0.3466,
"step": 1573
},
{
"epoch": 0.54,
"learning_rate": 4.3333528377493736e-05,
"loss": 0.3692,
"step": 1574
},
{
"epoch": 0.54,
"learning_rate": 4.3324036246337096e-05,
"loss": 0.353,
"step": 1575
},
{
"epoch": 0.54,
"learning_rate": 4.331453840345867e-05,
"loss": 0.3253,
"step": 1576
},
{
"epoch": 0.54,
"learning_rate": 4.3305034851818974e-05,
"loss": 0.3605,
"step": 1577
},
{
"epoch": 0.54,
"learning_rate": 4.329552559438034e-05,
"loss": 0.3113,
"step": 1578
},
{
"epoch": 0.54,
"learning_rate": 4.3286010634106875e-05,
"loss": 0.2882,
"step": 1579
},
{
"epoch": 0.54,
"learning_rate": 4.3276489973964434e-05,
"loss": 0.3358,
"step": 1580
},
{
"epoch": 0.54,
"learning_rate": 4.326696361692069e-05,
"loss": 0.3528,
"step": 1581
},
{
"epoch": 0.54,
"learning_rate": 4.3257431565945045e-05,
"loss": 0.3269,
"step": 1582
},
{
"epoch": 0.54,
"learning_rate": 4.3247893824008725e-05,
"loss": 0.3309,
"step": 1583
},
{
"epoch": 0.54,
"learning_rate": 4.32383503940847e-05,
"loss": 0.3722,
"step": 1584
},
{
"epoch": 0.54,
"learning_rate": 4.3228801279147715e-05,
"loss": 0.4254,
"step": 1585
},
{
"epoch": 0.54,
"learning_rate": 4.321924648217429e-05,
"loss": 0.3217,
"step": 1586
},
{
"epoch": 0.54,
"learning_rate": 4.3209686006142726e-05,
"loss": 0.3389,
"step": 1587
},
{
"epoch": 0.54,
"learning_rate": 4.3200119854033084e-05,
"loss": 0.3284,
"step": 1588
},
{
"epoch": 0.54,
"learning_rate": 4.3190548028827185e-05,
"loss": 0.323,
"step": 1589
},
{
"epoch": 0.54,
"learning_rate": 4.3180970533508645e-05,
"loss": 0.3196,
"step": 1590
},
{
"epoch": 0.54,
"learning_rate": 4.317138737106282e-05,
"loss": 0.3439,
"step": 1591
},
{
"epoch": 0.54,
"learning_rate": 4.316179854447685e-05,
"loss": 0.3586,
"step": 1592
},
{
"epoch": 0.54,
"learning_rate": 4.315220405673964e-05,
"loss": 0.3157,
"step": 1593
},
{
"epoch": 0.54,
"learning_rate": 4.314260391084184e-05,
"loss": 0.3283,
"step": 1594
},
{
"epoch": 0.54,
"learning_rate": 4.313299810977589e-05,
"loss": 0.3382,
"step": 1595
},
{
"epoch": 0.54,
"learning_rate": 4.312338665653598e-05,
"loss": 0.363,
"step": 1596
},
{
"epoch": 0.54,
"learning_rate": 4.311376955411806e-05,
"loss": 0.4139,
"step": 1597
},
{
"epoch": 0.55,
"learning_rate": 4.310414680551985e-05,
"loss": 0.3327,
"step": 1598
},
{
"epoch": 0.55,
"learning_rate": 4.30945184137408e-05,
"loss": 0.34,
"step": 1599
},
{
"epoch": 0.55,
"learning_rate": 4.308488438178218e-05,
"loss": 0.3818,
"step": 1600
},
{
"epoch": 0.55,
"learning_rate": 4.307524471264695e-05,
"loss": 0.357,
"step": 1601
},
{
"epoch": 0.55,
"learning_rate": 4.3065599409339867e-05,
"loss": 0.3788,
"step": 1602
},
{
"epoch": 0.55,
"learning_rate": 4.3055948474867446e-05,
"loss": 0.3873,
"step": 1603
},
{
"epoch": 0.55,
"learning_rate": 4.304629191223793e-05,
"loss": 0.3432,
"step": 1604
},
{
"epoch": 0.55,
"learning_rate": 4.3036629724461344e-05,
"loss": 0.3434,
"step": 1605
},
{
"epoch": 0.55,
"learning_rate": 4.302696191454945e-05,
"loss": 0.3318,
"step": 1606
},
{
"epoch": 0.55,
"learning_rate": 4.301728848551576e-05,
"loss": 0.3389,
"step": 1607
},
{
"epoch": 0.55,
"learning_rate": 4.300760944037556e-05,
"loss": 0.3557,
"step": 1608
},
{
"epoch": 0.55,
"learning_rate": 4.299792478214587e-05,
"loss": 0.2988,
"step": 1609
},
{
"epoch": 0.55,
"learning_rate": 4.298823451384545e-05,
"loss": 0.3513,
"step": 1610
},
{
"epoch": 0.55,
"learning_rate": 4.2978538638494824e-05,
"loss": 0.3569,
"step": 1611
},
{
"epoch": 0.55,
"learning_rate": 4.296883715911627e-05,
"loss": 0.3475,
"step": 1612
},
{
"epoch": 0.55,
"learning_rate": 4.295913007873379e-05,
"loss": 0.3861,
"step": 1613
},
{
"epoch": 0.55,
"learning_rate": 4.294941740037315e-05,
"loss": 0.359,
"step": 1614
},
{
"epoch": 0.55,
"learning_rate": 4.293969912706185e-05,
"loss": 0.3674,
"step": 1615
},
{
"epoch": 0.55,
"learning_rate": 4.292997526182915e-05,
"loss": 0.3195,
"step": 1616
},
{
"epoch": 0.55,
"learning_rate": 4.292024580770603e-05,
"loss": 0.3315,
"step": 1617
},
{
"epoch": 0.55,
"learning_rate": 4.2910510767725235e-05,
"loss": 0.4013,
"step": 1618
},
{
"epoch": 0.55,
"learning_rate": 4.2900770144921224e-05,
"loss": 0.3449,
"step": 1619
},
{
"epoch": 0.55,
"learning_rate": 4.2891023942330235e-05,
"loss": 0.3796,
"step": 1620
},
{
"epoch": 0.55,
"learning_rate": 4.28812721629902e-05,
"loss": 0.3773,
"step": 1621
},
{
"epoch": 0.55,
"learning_rate": 4.287151480994084e-05,
"loss": 0.3421,
"step": 1622
},
{
"epoch": 0.55,
"learning_rate": 4.2861751886223565e-05,
"loss": 0.3517,
"step": 1623
},
{
"epoch": 0.55,
"learning_rate": 4.2851983394881545e-05,
"loss": 0.3237,
"step": 1624
},
{
"epoch": 0.55,
"learning_rate": 4.2842209338959694e-05,
"loss": 0.3634,
"step": 1625
},
{
"epoch": 0.55,
"learning_rate": 4.283242972150463e-05,
"loss": 0.321,
"step": 1626
},
{
"epoch": 0.56,
"learning_rate": 4.282264454556473e-05,
"loss": 0.3727,
"step": 1627
},
{
"epoch": 0.56,
"learning_rate": 4.2812853814190115e-05,
"loss": 0.3283,
"step": 1628
},
{
"epoch": 0.56,
"learning_rate": 4.28030575304326e-05,
"loss": 0.3508,
"step": 1629
},
{
"epoch": 0.56,
"learning_rate": 4.2793255697345745e-05,
"loss": 0.378,
"step": 1630
},
{
"epoch": 0.56,
"learning_rate": 4.2783448317984875e-05,
"loss": 0.3429,
"step": 1631
},
{
"epoch": 0.56,
"learning_rate": 4.277363539540698e-05,
"loss": 0.3166,
"step": 1632
},
{
"epoch": 0.56,
"learning_rate": 4.276381693267084e-05,
"loss": 0.3204,
"step": 1633
},
{
"epoch": 0.56,
"learning_rate": 4.275399293283692e-05,
"loss": 0.3975,
"step": 1634
},
{
"epoch": 0.56,
"learning_rate": 4.274416339896742e-05,
"loss": 0.3335,
"step": 1635
},
{
"epoch": 0.56,
"learning_rate": 4.2734328334126286e-05,
"loss": 0.3635,
"step": 1636
},
{
"epoch": 0.56,
"learning_rate": 4.272448774137916e-05,
"loss": 0.3529,
"step": 1637
},
{
"epoch": 0.56,
"learning_rate": 4.271464162379342e-05,
"loss": 0.3408,
"step": 1638
},
{
"epoch": 0.56,
"learning_rate": 4.270478998443817e-05,
"loss": 0.352,
"step": 1639
},
{
"epoch": 0.56,
"learning_rate": 4.2694932826384226e-05,
"loss": 0.3793,
"step": 1640
},
{
"epoch": 0.56,
"learning_rate": 4.268507015270413e-05,
"loss": 0.313,
"step": 1641
},
{
"epoch": 0.56,
"learning_rate": 4.2675201966472145e-05,
"loss": 0.3513,
"step": 1642
},
{
"epoch": 0.56,
"learning_rate": 4.2665328270764235e-05,
"loss": 0.3324,
"step": 1643
},
{
"epoch": 0.56,
"learning_rate": 4.265544906865812e-05,
"loss": 0.3867,
"step": 1644
},
{
"epoch": 0.56,
"learning_rate": 4.264556436323319e-05,
"loss": 0.3431,
"step": 1645
},
{
"epoch": 0.56,
"learning_rate": 4.263567415757058e-05,
"loss": 0.3895,
"step": 1646
},
{
"epoch": 0.56,
"learning_rate": 4.262577845475314e-05,
"loss": 0.33,
"step": 1647
},
{
"epoch": 0.56,
"learning_rate": 4.2615877257865415e-05,
"loss": 0.3657,
"step": 1648
},
{
"epoch": 0.56,
"learning_rate": 4.260597056999367e-05,
"loss": 0.3531,
"step": 1649
},
{
"epoch": 0.56,
"learning_rate": 4.2596058394225893e-05,
"loss": 0.384,
"step": 1650
},
{
"epoch": 0.56,
"learning_rate": 4.258614073365177e-05,
"loss": 0.3438,
"step": 1651
},
{
"epoch": 0.56,
"learning_rate": 4.257621759136271e-05,
"loss": 0.3454,
"step": 1652
},
{
"epoch": 0.56,
"learning_rate": 4.2566288970451794e-05,
"loss": 0.3907,
"step": 1653
},
{
"epoch": 0.56,
"learning_rate": 4.255635487401387e-05,
"loss": 0.3386,
"step": 1654
},
{
"epoch": 0.56,
"learning_rate": 4.254641530514544e-05,
"loss": 0.345,
"step": 1655
},
{
"epoch": 0.56,
"learning_rate": 4.253647026694475e-05,
"loss": 0.357,
"step": 1656
},
{
"epoch": 0.57,
"learning_rate": 4.2526519762511716e-05,
"loss": 0.3547,
"step": 1657
},
{
"epoch": 0.57,
"learning_rate": 4.251656379494798e-05,
"loss": 0.2969,
"step": 1658
},
{
"epoch": 0.57,
"learning_rate": 4.2506602367356884e-05,
"loss": 0.3459,
"step": 1659
},
{
"epoch": 0.57,
"learning_rate": 4.249663548284347e-05,
"loss": 0.3326,
"step": 1660
},
{
"epoch": 0.57,
"learning_rate": 4.2486663144514486e-05,
"loss": 0.3829,
"step": 1661
},
{
"epoch": 0.57,
"learning_rate": 4.247668535547837e-05,
"loss": 0.3645,
"step": 1662
},
{
"epoch": 0.57,
"learning_rate": 4.2466702118845265e-05,
"loss": 0.3224,
"step": 1663
},
{
"epoch": 0.57,
"learning_rate": 4.2456713437727e-05,
"loss": 0.3465,
"step": 1664
},
{
"epoch": 0.57,
"learning_rate": 4.2446719315237135e-05,
"loss": 0.366,
"step": 1665
},
{
"epoch": 0.57,
"learning_rate": 4.2436719754490896e-05,
"loss": 0.3734,
"step": 1666
},
{
"epoch": 0.57,
"learning_rate": 4.242671475860519e-05,
"loss": 0.3447,
"step": 1667
},
{
"epoch": 0.57,
"learning_rate": 4.2416704330698665e-05,
"loss": 0.3176,
"step": 1668
},
{
"epoch": 0.57,
"learning_rate": 4.2406688473891626e-05,
"loss": 0.3468,
"step": 1669
},
{
"epoch": 0.57,
"learning_rate": 4.239666719130608e-05,
"loss": 0.3515,
"step": 1670
},
{
"epoch": 0.57,
"learning_rate": 4.238664048606573e-05,
"loss": 0.3732,
"step": 1671
},
{
"epoch": 0.57,
"learning_rate": 4.237660836129597e-05,
"loss": 0.3358,
"step": 1672
},
{
"epoch": 0.57,
"learning_rate": 4.236657082012387e-05,
"loss": 0.3472,
"step": 1673
},
{
"epoch": 0.57,
"learning_rate": 4.2356527865678196e-05,
"loss": 0.3629,
"step": 1674
},
{
"epoch": 0.57,
"learning_rate": 4.234647950108941e-05,
"loss": 0.3888,
"step": 1675
},
{
"epoch": 0.57,
"learning_rate": 4.233642572948966e-05,
"loss": 0.3201,
"step": 1676
},
{
"epoch": 0.57,
"learning_rate": 4.232636655401275e-05,
"loss": 0.3273,
"step": 1677
},
{
"epoch": 0.57,
"learning_rate": 4.2316301977794203e-05,
"loss": 0.3665,
"step": 1678
},
{
"epoch": 0.57,
"learning_rate": 4.2306232003971215e-05,
"loss": 0.3501,
"step": 1679
},
{
"epoch": 0.57,
"learning_rate": 4.229615663568266e-05,
"loss": 0.3101,
"step": 1680
},
{
"epoch": 0.57,
"learning_rate": 4.22860758760691e-05,
"loss": 0.3885,
"step": 1681
},
{
"epoch": 0.57,
"learning_rate": 4.227598972827277e-05,
"loss": 0.3322,
"step": 1682
},
{
"epoch": 0.57,
"learning_rate": 4.226589819543758e-05,
"loss": 0.3988,
"step": 1683
},
{
"epoch": 0.57,
"learning_rate": 4.2255801280709144e-05,
"loss": 0.3371,
"step": 1684
},
{
"epoch": 0.57,
"learning_rate": 4.224569898723473e-05,
"loss": 0.3069,
"step": 1685
},
{
"epoch": 0.58,
"learning_rate": 4.223559131816328e-05,
"loss": 0.3375,
"step": 1686
},
{
"epoch": 0.58,
"learning_rate": 4.2225478276645423e-05,
"loss": 0.2907,
"step": 1687
},
{
"epoch": 0.58,
"learning_rate": 4.221535986583347e-05,
"loss": 0.362,
"step": 1688
},
{
"epoch": 0.58,
"learning_rate": 4.220523608888139e-05,
"loss": 0.3448,
"step": 1689
},
{
"epoch": 0.58,
"learning_rate": 4.219510694894483e-05,
"loss": 0.4547,
"step": 1690
},
{
"epoch": 0.58,
"learning_rate": 4.2184972449181105e-05,
"loss": 0.3015,
"step": 1691
},
{
"epoch": 0.58,
"learning_rate": 4.2174832592749214e-05,
"loss": 0.3628,
"step": 1692
},
{
"epoch": 0.58,
"learning_rate": 4.216468738280981e-05,
"loss": 0.3431,
"step": 1693
},
{
"epoch": 0.58,
"learning_rate": 4.215453682252522e-05,
"loss": 0.327,
"step": 1694
},
{
"epoch": 0.58,
"learning_rate": 4.2144380915059445e-05,
"loss": 0.3749,
"step": 1695
},
{
"epoch": 0.58,
"learning_rate": 4.2134219663578146e-05,
"loss": 0.3149,
"step": 1696
},
{
"epoch": 0.58,
"learning_rate": 4.212405307124865e-05,
"loss": 0.3633,
"step": 1697
},
{
"epoch": 0.58,
"learning_rate": 4.211388114123995e-05,
"loss": 0.3065,
"step": 1698
},
{
"epoch": 0.58,
"learning_rate": 4.21037038767227e-05,
"loss": 0.3755,
"step": 1699
},
{
"epoch": 0.58,
"learning_rate": 4.209352128086923e-05,
"loss": 0.4098,
"step": 1700
},
{
"epoch": 0.58,
"learning_rate": 4.208333335685352e-05,
"loss": 0.3522,
"step": 1701
},
{
"epoch": 0.58,
"learning_rate": 4.207314010785119e-05,
"loss": 0.366,
"step": 1702
},
{
"epoch": 0.58,
"learning_rate": 4.206294153703958e-05,
"loss": 0.3263,
"step": 1703
},
{
"epoch": 0.58,
"learning_rate": 4.2052737647597616e-05,
"loss": 0.3367,
"step": 1704
},
{
"epoch": 0.58,
"learning_rate": 4.204252844270594e-05,
"loss": 0.3041,
"step": 1705
},
{
"epoch": 0.58,
"learning_rate": 4.2032313925546826e-05,
"loss": 0.3564,
"step": 1706
},
{
"epoch": 0.58,
"learning_rate": 4.2022094099304194e-05,
"loss": 0.3432,
"step": 1707
},
{
"epoch": 0.58,
"learning_rate": 4.201186896716363e-05,
"loss": 0.3273,
"step": 1708
},
{
"epoch": 0.58,
"learning_rate": 4.200163853231239e-05,
"loss": 0.3365,
"step": 1709
},
{
"epoch": 0.58,
"learning_rate": 4.1991402797939356e-05,
"loss": 0.3551,
"step": 1710
},
{
"epoch": 0.58,
"learning_rate": 4.1981161767235075e-05,
"loss": 0.4036,
"step": 1711
},
{
"epoch": 0.58,
"learning_rate": 4.1970915443391745e-05,
"loss": 0.3735,
"step": 1712
},
{
"epoch": 0.58,
"learning_rate": 4.196066382960321e-05,
"loss": 0.3368,
"step": 1713
},
{
"epoch": 0.58,
"learning_rate": 4.1950406929064965e-05,
"loss": 0.3372,
"step": 1714
},
{
"epoch": 0.59,
"learning_rate": 4.194014474497415e-05,
"loss": 0.3303,
"step": 1715
},
{
"epoch": 0.59,
"learning_rate": 4.1929877280529564e-05,
"loss": 0.3246,
"step": 1716
},
{
"epoch": 0.59,
"learning_rate": 4.191960453893165e-05,
"loss": 0.3366,
"step": 1717
},
{
"epoch": 0.59,
"learning_rate": 4.190932652338245e-05,
"loss": 0.3361,
"step": 1718
},
{
"epoch": 0.59,
"learning_rate": 4.189904323708573e-05,
"loss": 0.3684,
"step": 1719
},
{
"epoch": 0.59,
"learning_rate": 4.188875468324685e-05,
"loss": 0.3263,
"step": 1720
},
{
"epoch": 0.59,
"learning_rate": 4.18784608650728e-05,
"loss": 0.3371,
"step": 1721
},
{
"epoch": 0.59,
"learning_rate": 4.186816178577223e-05,
"loss": 0.3286,
"step": 1722
},
{
"epoch": 0.59,
"learning_rate": 4.185785744855546e-05,
"loss": 0.4653,
"step": 1723
},
{
"epoch": 0.59,
"learning_rate": 4.1847547856634395e-05,
"loss": 0.4089,
"step": 1724
},
{
"epoch": 0.59,
"learning_rate": 4.183723301322261e-05,
"loss": 0.3597,
"step": 1725
},
{
"epoch": 0.59,
"learning_rate": 4.182691292153531e-05,
"loss": 0.3581,
"step": 1726
},
{
"epoch": 0.59,
"learning_rate": 4.181658758478934e-05,
"loss": 0.3718,
"step": 1727
},
{
"epoch": 0.59,
"learning_rate": 4.1806257006203157e-05,
"loss": 0.3301,
"step": 1728
},
{
"epoch": 0.59,
"learning_rate": 4.1795921188996864e-05,
"loss": 0.3489,
"step": 1729
},
{
"epoch": 0.59,
"learning_rate": 4.178558013639224e-05,
"loss": 0.3693,
"step": 1730
},
{
"epoch": 0.59,
"learning_rate": 4.177523385161264e-05,
"loss": 0.3153,
"step": 1731
},
{
"epoch": 0.59,
"learning_rate": 4.176488233788306e-05,
"loss": 0.3489,
"step": 1732
},
{
"epoch": 0.59,
"learning_rate": 4.175452559843014e-05,
"loss": 0.3853,
"step": 1733
},
{
"epoch": 0.59,
"learning_rate": 4.174416363648215e-05,
"loss": 0.3767,
"step": 1734
},
{
"epoch": 0.59,
"learning_rate": 4.1733796455268966e-05,
"loss": 0.3677,
"step": 1735
},
{
"epoch": 0.59,
"learning_rate": 4.172342405802212e-05,
"loss": 0.3516,
"step": 1736
},
{
"epoch": 0.59,
"learning_rate": 4.171304644797476e-05,
"loss": 0.375,
"step": 1737
},
{
"epoch": 0.59,
"learning_rate": 4.1702663628361636e-05,
"loss": 0.3405,
"step": 1738
},
{
"epoch": 0.59,
"learning_rate": 4.169227560241915e-05,
"loss": 0.3683,
"step": 1739
},
{
"epoch": 0.59,
"learning_rate": 4.1681882373385326e-05,
"loss": 0.4032,
"step": 1740
},
{
"epoch": 0.59,
"learning_rate": 4.167148394449979e-05,
"loss": 0.3569,
"step": 1741
},
{
"epoch": 0.59,
"learning_rate": 4.16610803190038e-05,
"loss": 0.3325,
"step": 1742
},
{
"epoch": 0.59,
"learning_rate": 4.1650671500140244e-05,
"loss": 0.3425,
"step": 1743
},
{
"epoch": 0.6,
"learning_rate": 4.164025749115361e-05,
"loss": 0.3125,
"step": 1744
},
{
"epoch": 0.6,
"learning_rate": 4.1629838295290014e-05,
"loss": 0.3477,
"step": 1745
},
{
"epoch": 0.6,
"learning_rate": 4.1619413915797195e-05,
"loss": 0.4626,
"step": 1746
},
{
"epoch": 0.6,
"learning_rate": 4.160898435592449e-05,
"loss": 0.3109,
"step": 1747
},
{
"epoch": 0.6,
"learning_rate": 4.159854961892287e-05,
"loss": 0.3455,
"step": 1748
},
{
"epoch": 0.6,
"learning_rate": 4.15881097080449e-05,
"loss": 0.3475,
"step": 1749
},
{
"epoch": 0.6,
"learning_rate": 4.157766462654478e-05,
"loss": 0.3495,
"step": 1750
},
{
"epoch": 0.6,
"learning_rate": 4.156721437767829e-05,
"loss": 0.3629,
"step": 1751
},
{
"epoch": 0.6,
"learning_rate": 4.1556758964702866e-05,
"loss": 0.3568,
"step": 1752
},
{
"epoch": 0.6,
"learning_rate": 4.154629839087752e-05,
"loss": 0.3337,
"step": 1753
},
{
"epoch": 0.6,
"learning_rate": 4.153583265946288e-05,
"loss": 0.3206,
"step": 1754
},
{
"epoch": 0.6,
"learning_rate": 4.1525361773721176e-05,
"loss": 0.3544,
"step": 1755
},
{
"epoch": 0.6,
"learning_rate": 4.151488573691626e-05,
"loss": 0.3701,
"step": 1756
},
{
"epoch": 0.6,
"learning_rate": 4.150440455231357e-05,
"loss": 0.4084,
"step": 1757
},
{
"epoch": 0.6,
"learning_rate": 4.1493918223180187e-05,
"loss": 0.548,
"step": 1758
},
{
"epoch": 0.6,
"learning_rate": 4.148342675278474e-05,
"loss": 0.3565,
"step": 1759
},
{
"epoch": 0.6,
"learning_rate": 4.147293014439749e-05,
"loss": 0.337,
"step": 1760
},
{
"epoch": 0.6,
"learning_rate": 4.146242840129031e-05,
"loss": 0.2923,
"step": 1761
},
{
"epoch": 0.6,
"learning_rate": 4.145192152673667e-05,
"loss": 0.3551,
"step": 1762
},
{
"epoch": 0.6,
"learning_rate": 4.144140952401161e-05,
"loss": 0.3446,
"step": 1763
},
{
"epoch": 0.6,
"learning_rate": 4.14308923963918e-05,
"loss": 0.3477,
"step": 1764
},
{
"epoch": 0.6,
"learning_rate": 4.142037014715549e-05,
"loss": 0.3604,
"step": 1765
},
{
"epoch": 0.6,
"learning_rate": 4.140984277958256e-05,
"loss": 0.3753,
"step": 1766
},
{
"epoch": 0.6,
"learning_rate": 4.139931029695443e-05,
"loss": 0.3256,
"step": 1767
},
{
"epoch": 0.6,
"learning_rate": 4.1388772702554154e-05,
"loss": 0.3353,
"step": 1768
},
{
"epoch": 0.6,
"learning_rate": 4.1378229999666376e-05,
"loss": 0.3429,
"step": 1769
},
{
"epoch": 0.6,
"learning_rate": 4.136768219157731e-05,
"loss": 0.3389,
"step": 1770
},
{
"epoch": 0.6,
"learning_rate": 4.135712928157478e-05,
"loss": 0.3319,
"step": 1771
},
{
"epoch": 0.6,
"learning_rate": 4.134657127294821e-05,
"loss": 0.3258,
"step": 1772
},
{
"epoch": 0.6,
"learning_rate": 4.1336008168988596e-05,
"loss": 0.4302,
"step": 1773
},
{
"epoch": 0.61,
"learning_rate": 4.132543997298851e-05,
"loss": 0.3146,
"step": 1774
},
{
"epoch": 0.61,
"learning_rate": 4.131486668824215e-05,
"loss": 0.3126,
"step": 1775
},
{
"epoch": 0.61,
"learning_rate": 4.130428831804527e-05,
"loss": 0.3476,
"step": 1776
},
{
"epoch": 0.61,
"learning_rate": 4.129370486569521e-05,
"loss": 0.3492,
"step": 1777
},
{
"epoch": 0.61,
"learning_rate": 4.1283116334490914e-05,
"loss": 0.3163,
"step": 1778
},
{
"epoch": 0.61,
"learning_rate": 4.127252272773288e-05,
"loss": 0.3532,
"step": 1779
},
{
"epoch": 0.61,
"learning_rate": 4.126192404872322e-05,
"loss": 0.3182,
"step": 1780
},
{
"epoch": 0.61,
"learning_rate": 4.12513203007656e-05,
"loss": 0.3743,
"step": 1781
},
{
"epoch": 0.61,
"learning_rate": 4.1240711487165284e-05,
"loss": 0.3423,
"step": 1782
},
{
"epoch": 0.61,
"learning_rate": 4.1230097611229114e-05,
"loss": 0.3466,
"step": 1783
},
{
"epoch": 0.61,
"learning_rate": 4.121947867626549e-05,
"loss": 0.3435,
"step": 1784
},
{
"epoch": 0.61,
"learning_rate": 4.120885468558441e-05,
"loss": 0.409,
"step": 1785
},
{
"epoch": 0.61,
"learning_rate": 4.1198225642497445e-05,
"loss": 0.3036,
"step": 1786
},
{
"epoch": 0.61,
"learning_rate": 4.118759155031774e-05,
"loss": 0.3846,
"step": 1787
},
{
"epoch": 0.61,
"learning_rate": 4.117695241236e-05,
"loss": 0.3111,
"step": 1788
},
{
"epoch": 0.61,
"learning_rate": 4.116630823194051e-05,
"loss": 0.3199,
"step": 1789
},
{
"epoch": 0.61,
"learning_rate": 4.1155659012377155e-05,
"loss": 0.3575,
"step": 1790
},
{
"epoch": 0.61,
"learning_rate": 4.1145004756989344e-05,
"loss": 0.4029,
"step": 1791
},
{
"epoch": 0.61,
"learning_rate": 4.113434546909808e-05,
"loss": 0.3862,
"step": 1792
},
{
"epoch": 0.61,
"learning_rate": 4.112368115202595e-05,
"loss": 0.353,
"step": 1793
},
{
"epoch": 0.61,
"learning_rate": 4.111301180909707e-05,
"loss": 0.3607,
"step": 1794
},
{
"epoch": 0.61,
"learning_rate": 4.110233744363716e-05,
"loss": 0.3355,
"step": 1795
},
{
"epoch": 0.61,
"learning_rate": 4.109165805897347e-05,
"loss": 0.3507,
"step": 1796
},
{
"epoch": 0.61,
"learning_rate": 4.108097365843486e-05,
"loss": 0.3294,
"step": 1797
},
{
"epoch": 0.61,
"learning_rate": 4.107028424535171e-05,
"loss": 0.3349,
"step": 1798
},
{
"epoch": 0.61,
"learning_rate": 4.105958982305598e-05,
"loss": 0.3591,
"step": 1799
},
{
"epoch": 0.61,
"learning_rate": 4.104889039488119e-05,
"loss": 0.3801,
"step": 1800
},
{
"epoch": 0.61,
"learning_rate": 4.1038185964162434e-05,
"loss": 0.3464,
"step": 1801
},
{
"epoch": 0.61,
"learning_rate": 4.1027476534236345e-05,
"loss": 0.3489,
"step": 1802
},
{
"epoch": 0.62,
"learning_rate": 4.101676210844113e-05,
"loss": 0.3842,
"step": 1803
},
{
"epoch": 0.62,
"learning_rate": 4.100604269011653e-05,
"loss": 0.2983,
"step": 1804
},
{
"epoch": 0.62,
"learning_rate": 4.0995318282603855e-05,
"loss": 0.3093,
"step": 1805
},
{
"epoch": 0.62,
"learning_rate": 4.0984588889246e-05,
"loss": 0.3336,
"step": 1806
},
{
"epoch": 0.62,
"learning_rate": 4.0973854513387366e-05,
"loss": 0.3486,
"step": 1807
},
{
"epoch": 0.62,
"learning_rate": 4.096311515837393e-05,
"loss": 0.3464,
"step": 1808
},
{
"epoch": 0.62,
"learning_rate": 4.095237082755323e-05,
"loss": 0.3302,
"step": 1809
},
{
"epoch": 0.62,
"learning_rate": 4.0941621524274335e-05,
"loss": 0.2946,
"step": 1810
},
{
"epoch": 0.62,
"learning_rate": 4.0930867251887874e-05,
"loss": 0.3079,
"step": 1811
},
{
"epoch": 0.62,
"learning_rate": 4.092010801374602e-05,
"loss": 0.3564,
"step": 1812
},
{
"epoch": 0.62,
"learning_rate": 4.0909343813202494e-05,
"loss": 0.3269,
"step": 1813
},
{
"epoch": 0.62,
"learning_rate": 4.0898574653612585e-05,
"loss": 0.375,
"step": 1814
},
{
"epoch": 0.62,
"learning_rate": 4.0887800538333096e-05,
"loss": 0.3518,
"step": 1815
},
{
"epoch": 0.62,
"learning_rate": 4.087702147072241e-05,
"loss": 0.3628,
"step": 1816
},
{
"epoch": 0.62,
"learning_rate": 4.08662374541404e-05,
"loss": 0.3218,
"step": 1817
},
{
"epoch": 0.62,
"learning_rate": 4.0855448491948536e-05,
"loss": 0.3295,
"step": 1818
},
{
"epoch": 0.62,
"learning_rate": 4.0844654587509804e-05,
"loss": 0.3375,
"step": 1819
},
{
"epoch": 0.62,
"learning_rate": 4.083385574418873e-05,
"loss": 0.2979,
"step": 1820
},
{
"epoch": 0.62,
"learning_rate": 4.08230519653514e-05,
"loss": 0.3226,
"step": 1821
},
{
"epoch": 0.62,
"learning_rate": 4.08122432543654e-05,
"loss": 0.366,
"step": 1822
},
{
"epoch": 0.62,
"learning_rate": 4.080142961459989e-05,
"loss": 0.338,
"step": 1823
},
{
"epoch": 0.62,
"learning_rate": 4.079061104942555e-05,
"loss": 0.3134,
"step": 1824
},
{
"epoch": 0.62,
"learning_rate": 4.077978756221459e-05,
"loss": 0.3042,
"step": 1825
},
{
"epoch": 0.62,
"learning_rate": 4.076895915634077e-05,
"loss": 0.3507,
"step": 1826
},
{
"epoch": 0.62,
"learning_rate": 4.075812583517937e-05,
"loss": 0.3606,
"step": 1827
},
{
"epoch": 0.62,
"learning_rate": 4.074728760210721e-05,
"loss": 0.3869,
"step": 1828
},
{
"epoch": 0.62,
"learning_rate": 4.073644446050264e-05,
"loss": 0.3239,
"step": 1829
},
{
"epoch": 0.62,
"learning_rate": 4.072559641374554e-05,
"loss": 0.3423,
"step": 1830
},
{
"epoch": 0.62,
"learning_rate": 4.07147434652173e-05,
"loss": 0.4036,
"step": 1831
},
{
"epoch": 0.63,
"learning_rate": 4.0703885618300876e-05,
"loss": 0.3262,
"step": 1832
},
{
"epoch": 0.63,
"learning_rate": 4.069302287638071e-05,
"loss": 0.3171,
"step": 1833
},
{
"epoch": 0.63,
"learning_rate": 4.068215524284281e-05,
"loss": 0.3347,
"step": 1834
},
{
"epoch": 0.63,
"learning_rate": 4.067128272107468e-05,
"loss": 0.3188,
"step": 1835
},
{
"epoch": 0.63,
"learning_rate": 4.0660405314465354e-05,
"loss": 0.3305,
"step": 1836
},
{
"epoch": 0.63,
"learning_rate": 4.064952302640539e-05,
"loss": 0.3293,
"step": 1837
},
{
"epoch": 0.63,
"learning_rate": 4.0638635860286875e-05,
"loss": 0.342,
"step": 1838
},
{
"epoch": 0.63,
"learning_rate": 4.06277438195034e-05,
"loss": 0.3251,
"step": 1839
},
{
"epoch": 0.63,
"learning_rate": 4.061684690745009e-05,
"loss": 0.3348,
"step": 1840
},
{
"epoch": 0.63,
"learning_rate": 4.060594512752359e-05,
"loss": 0.315,
"step": 1841
},
{
"epoch": 0.63,
"learning_rate": 4.059503848312204e-05,
"loss": 0.3551,
"step": 1842
},
{
"epoch": 0.63,
"learning_rate": 4.058412697764513e-05,
"loss": 0.3225,
"step": 1843
},
{
"epoch": 0.63,
"learning_rate": 4.057321061449404e-05,
"loss": 0.3642,
"step": 1844
},
{
"epoch": 0.63,
"learning_rate": 4.056228939707147e-05,
"loss": 0.3192,
"step": 1845
},
{
"epoch": 0.63,
"learning_rate": 4.055136332878163e-05,
"loss": 0.3988,
"step": 1846
},
{
"epoch": 0.63,
"learning_rate": 4.054043241303026e-05,
"loss": 0.3182,
"step": 1847
},
{
"epoch": 0.63,
"learning_rate": 4.052949665322459e-05,
"loss": 0.3427,
"step": 1848
},
{
"epoch": 0.63,
"learning_rate": 4.0518556052773366e-05,
"loss": 0.288,
"step": 1849
},
{
"epoch": 0.63,
"learning_rate": 4.050761061508685e-05,
"loss": 0.3293,
"step": 1850
},
{
"epoch": 0.63,
"learning_rate": 4.0496660343576796e-05,
"loss": 0.3673,
"step": 1851
},
{
"epoch": 0.63,
"learning_rate": 4.04857052416565e-05,
"loss": 0.3552,
"step": 1852
},
{
"epoch": 0.63,
"learning_rate": 4.04747453127407e-05,
"loss": 0.3318,
"step": 1853
},
{
"epoch": 0.63,
"learning_rate": 4.0463780560245713e-05,
"loss": 0.3585,
"step": 1854
},
{
"epoch": 0.63,
"learning_rate": 4.0452810987589304e-05,
"loss": 0.3517,
"step": 1855
},
{
"epoch": 0.63,
"learning_rate": 4.0441836598190776e-05,
"loss": 0.311,
"step": 1856
},
{
"epoch": 0.63,
"learning_rate": 4.04308573954709e-05,
"loss": 0.3412,
"step": 1857
},
{
"epoch": 0.63,
"learning_rate": 4.041987338285198e-05,
"loss": 0.3599,
"step": 1858
},
{
"epoch": 0.63,
"learning_rate": 4.0408884563757796e-05,
"loss": 0.3327,
"step": 1859
},
{
"epoch": 0.63,
"learning_rate": 4.0397890941613636e-05,
"loss": 0.3191,
"step": 1860
},
{
"epoch": 0.63,
"learning_rate": 4.0386892519846274e-05,
"loss": 0.3756,
"step": 1861
},
{
"epoch": 0.64,
"learning_rate": 4.037588930188402e-05,
"loss": 0.3165,
"step": 1862
},
{
"epoch": 0.64,
"learning_rate": 4.036488129115662e-05,
"loss": 0.372,
"step": 1863
},
{
"epoch": 0.64,
"learning_rate": 4.035386849109535e-05,
"loss": 0.3607,
"step": 1864
},
{
"epoch": 0.64,
"learning_rate": 4.0342850905132976e-05,
"loss": 0.3455,
"step": 1865
},
{
"epoch": 0.64,
"learning_rate": 4.0331828536703754e-05,
"loss": 0.3349,
"step": 1866
},
{
"epoch": 0.64,
"learning_rate": 4.032080138924341e-05,
"loss": 0.3387,
"step": 1867
},
{
"epoch": 0.64,
"learning_rate": 4.030976946618921e-05,
"loss": 0.3307,
"step": 1868
},
{
"epoch": 0.64,
"learning_rate": 4.029873277097985e-05,
"loss": 0.3365,
"step": 1869
},
{
"epoch": 0.64,
"learning_rate": 4.028769130705555e-05,
"loss": 0.3279,
"step": 1870
},
{
"epoch": 0.64,
"learning_rate": 4.0276645077858e-05,
"loss": 0.3744,
"step": 1871
},
{
"epoch": 0.64,
"learning_rate": 4.026559408683039e-05,
"loss": 0.3119,
"step": 1872
},
{
"epoch": 0.64,
"learning_rate": 4.025453833741738e-05,
"loss": 0.3447,
"step": 1873
},
{
"epoch": 0.64,
"learning_rate": 4.024347783306512e-05,
"loss": 0.3046,
"step": 1874
},
{
"epoch": 0.64,
"learning_rate": 4.023241257722126e-05,
"loss": 0.3477,
"step": 1875
},
{
"epoch": 0.64,
"learning_rate": 4.022134257333487e-05,
"loss": 0.3114,
"step": 1876
},
{
"epoch": 0.64,
"learning_rate": 4.021026782485659e-05,
"loss": 0.3573,
"step": 1877
},
{
"epoch": 0.64,
"learning_rate": 4.0199188335238456e-05,
"loss": 0.3292,
"step": 1878
},
{
"epoch": 0.64,
"learning_rate": 4.018810410793404e-05,
"loss": 0.3374,
"step": 1879
},
{
"epoch": 0.64,
"learning_rate": 4.0177015146398355e-05,
"loss": 0.3376,
"step": 1880
},
{
"epoch": 0.64,
"learning_rate": 4.0165921454087904e-05,
"loss": 0.3274,
"step": 1881
},
{
"epoch": 0.64,
"learning_rate": 4.0154823034460675e-05,
"loss": 0.289,
"step": 1882
},
{
"epoch": 0.64,
"learning_rate": 4.014371989097611e-05,
"loss": 0.3655,
"step": 1883
},
{
"epoch": 0.64,
"learning_rate": 4.0132612027095116e-05,
"loss": 0.37,
"step": 1884
},
{
"epoch": 0.64,
"learning_rate": 4.0121499446280114e-05,
"loss": 0.3256,
"step": 1885
},
{
"epoch": 0.64,
"learning_rate": 4.011038215199495e-05,
"loss": 0.3564,
"step": 1886
},
{
"epoch": 0.64,
"learning_rate": 4.009926014770496e-05,
"loss": 0.3432,
"step": 1887
},
{
"epoch": 0.64,
"learning_rate": 4.008813343687695e-05,
"loss": 0.3677,
"step": 1888
},
{
"epoch": 0.64,
"learning_rate": 4.007700202297919e-05,
"loss": 0.3168,
"step": 1889
},
{
"epoch": 0.64,
"learning_rate": 4.0065865909481417e-05,
"loss": 0.328,
"step": 1890
},
{
"epoch": 0.65,
"learning_rate": 4.005472509985481e-05,
"loss": 0.2907,
"step": 1891
},
{
"epoch": 0.65,
"learning_rate": 4.0043579597572054e-05,
"loss": 0.3311,
"step": 1892
},
{
"epoch": 0.65,
"learning_rate": 4.003242940610726e-05,
"loss": 0.2887,
"step": 1893
},
{
"epoch": 0.65,
"learning_rate": 4.002127452893604e-05,
"loss": 0.3408,
"step": 1894
},
{
"epoch": 0.65,
"learning_rate": 4.001011496953541e-05,
"loss": 0.3712,
"step": 1895
},
{
"epoch": 0.65,
"learning_rate": 3.999895073138389e-05,
"loss": 0.3368,
"step": 1896
},
{
"epoch": 0.65,
"learning_rate": 3.998778181796145e-05,
"loss": 0.4231,
"step": 1897
},
{
"epoch": 0.65,
"learning_rate": 3.997660823274952e-05,
"loss": 0.3775,
"step": 1898
},
{
"epoch": 0.65,
"learning_rate": 3.996542997923096e-05,
"loss": 0.3006,
"step": 1899
},
{
"epoch": 0.65,
"learning_rate": 3.995424706089013e-05,
"loss": 0.3339,
"step": 1900
},
{
"epoch": 0.65,
"learning_rate": 3.9943059481212795e-05,
"loss": 0.3631,
"step": 1901
},
{
"epoch": 0.65,
"learning_rate": 3.993186724368621e-05,
"loss": 0.3271,
"step": 1902
},
{
"epoch": 0.65,
"learning_rate": 3.992067035179906e-05,
"loss": 0.3671,
"step": 1903
},
{
"epoch": 0.65,
"learning_rate": 3.9909468809041494e-05,
"loss": 0.3464,
"step": 1904
},
{
"epoch": 0.65,
"learning_rate": 3.98982626189051e-05,
"loss": 0.3196,
"step": 1905
},
{
"epoch": 0.65,
"learning_rate": 3.988705178488294e-05,
"loss": 0.3665,
"step": 1906
},
{
"epoch": 0.65,
"learning_rate": 3.9875836310469485e-05,
"loss": 0.3753,
"step": 1907
},
{
"epoch": 0.65,
"learning_rate": 3.9864616199160677e-05,
"loss": 0.3822,
"step": 1908
},
{
"epoch": 0.65,
"learning_rate": 3.9853391454453894e-05,
"loss": 0.3061,
"step": 1909
},
{
"epoch": 0.65,
"learning_rate": 3.984216207984798e-05,
"loss": 0.3414,
"step": 1910
},
{
"epoch": 0.65,
"learning_rate": 3.983092807884318e-05,
"loss": 0.3369,
"step": 1911
},
{
"epoch": 0.65,
"learning_rate": 3.981968945494122e-05,
"loss": 0.388,
"step": 1912
},
{
"epoch": 0.65,
"learning_rate": 3.9808446211645245e-05,
"loss": 0.3637,
"step": 1913
},
{
"epoch": 0.65,
"learning_rate": 3.979719835245986e-05,
"loss": 0.3859,
"step": 1914
},
{
"epoch": 0.65,
"learning_rate": 3.978594588089108e-05,
"loss": 0.3547,
"step": 1915
},
{
"epoch": 0.65,
"learning_rate": 3.9774688800446384e-05,
"loss": 0.3424,
"step": 1916
},
{
"epoch": 0.65,
"learning_rate": 3.9763427114634674e-05,
"loss": 0.3406,
"step": 1917
},
{
"epoch": 0.65,
"learning_rate": 3.9752160826966295e-05,
"loss": 0.347,
"step": 1918
},
{
"epoch": 0.65,
"learning_rate": 3.974088994095302e-05,
"loss": 0.3663,
"step": 1919
},
{
"epoch": 0.66,
"learning_rate": 3.972961446010806e-05,
"loss": 0.3702,
"step": 1920
},
{
"epoch": 0.66,
"learning_rate": 3.9718334387946045e-05,
"loss": 0.3767,
"step": 1921
},
{
"epoch": 0.66,
"learning_rate": 3.970704972798306e-05,
"loss": 0.3369,
"step": 1922
},
{
"epoch": 0.66,
"learning_rate": 3.96957604837366e-05,
"loss": 0.3045,
"step": 1923
},
{
"epoch": 0.66,
"learning_rate": 3.968446665872561e-05,
"loss": 0.3841,
"step": 1924
},
{
"epoch": 0.66,
"learning_rate": 3.967316825647042e-05,
"loss": 0.3444,
"step": 1925
},
{
"epoch": 0.66,
"learning_rate": 3.966186528049285e-05,
"loss": 0.3261,
"step": 1926
},
{
"epoch": 0.66,
"learning_rate": 3.965055773431608e-05,
"loss": 0.3205,
"step": 1927
},
{
"epoch": 0.66,
"learning_rate": 3.963924562146477e-05,
"loss": 0.3292,
"step": 1928
},
{
"epoch": 0.66,
"learning_rate": 3.9627928945464976e-05,
"loss": 0.314,
"step": 1929
},
{
"epoch": 0.66,
"learning_rate": 3.9616607709844166e-05,
"loss": 0.3405,
"step": 1930
},
{
"epoch": 0.66,
"learning_rate": 3.960528191813125e-05,
"loss": 0.3517,
"step": 1931
},
{
"epoch": 0.66,
"learning_rate": 3.959395157385656e-05,
"loss": 0.3274,
"step": 1932
},
{
"epoch": 0.66,
"learning_rate": 3.958261668055183e-05,
"loss": 0.3681,
"step": 1933
},
{
"epoch": 0.66,
"learning_rate": 3.9571277241750226e-05,
"loss": 0.3433,
"step": 1934
},
{
"epoch": 0.66,
"learning_rate": 3.955993326098631e-05,
"loss": 0.3405,
"step": 1935
},
{
"epoch": 0.66,
"learning_rate": 3.95485847417961e-05,
"loss": 0.3345,
"step": 1936
},
{
"epoch": 0.66,
"learning_rate": 3.9537231687716976e-05,
"loss": 0.336,
"step": 1937
},
{
"epoch": 0.66,
"learning_rate": 3.9525874102287776e-05,
"loss": 0.3423,
"step": 1938
},
{
"epoch": 0.66,
"learning_rate": 3.9514511989048733e-05,
"loss": 0.3829,
"step": 1939
},
{
"epoch": 0.66,
"learning_rate": 3.950314535154148e-05,
"loss": 0.3275,
"step": 1940
},
{
"epoch": 0.66,
"learning_rate": 3.9491774193309096e-05,
"loss": 0.3717,
"step": 1941
},
{
"epoch": 0.66,
"learning_rate": 3.948039851789602e-05,
"loss": 0.3391,
"step": 1942
},
{
"epoch": 0.66,
"learning_rate": 3.946901832884813e-05,
"loss": 0.3753,
"step": 1943
},
{
"epoch": 0.66,
"learning_rate": 3.9457633629712716e-05,
"loss": 0.3829,
"step": 1944
},
{
"epoch": 0.66,
"learning_rate": 3.944624442403846e-05,
"loss": 0.3513,
"step": 1945
},
{
"epoch": 0.66,
"learning_rate": 3.9434850715375446e-05,
"loss": 0.3225,
"step": 1946
},
{
"epoch": 0.66,
"learning_rate": 3.9423452507275166e-05,
"loss": 0.3616,
"step": 1947
},
{
"epoch": 0.66,
"learning_rate": 3.941204980329053e-05,
"loss": 0.3659,
"step": 1948
},
{
"epoch": 0.66,
"learning_rate": 3.940064260697581e-05,
"loss": 0.324,
"step": 1949
},
{
"epoch": 0.67,
"learning_rate": 3.9389230921886734e-05,
"loss": 0.3254,
"step": 1950
},
{
"epoch": 0.67,
"learning_rate": 3.937781475158038e-05,
"loss": 0.3636,
"step": 1951
},
{
"epoch": 0.67,
"learning_rate": 3.936639409961524e-05,
"loss": 0.3487,
"step": 1952
},
{
"epoch": 0.67,
"learning_rate": 3.935496896955122e-05,
"loss": 0.3155,
"step": 1953
},
{
"epoch": 0.67,
"learning_rate": 3.9343539364949594e-05,
"loss": 0.3267,
"step": 1954
},
{
"epoch": 0.67,
"learning_rate": 3.9332105289373054e-05,
"loss": 0.3279,
"step": 1955
},
{
"epoch": 0.67,
"learning_rate": 3.932066674638567e-05,
"loss": 0.3125,
"step": 1956
},
{
"epoch": 0.67,
"learning_rate": 3.9309223739552926e-05,
"loss": 0.2901,
"step": 1957
},
{
"epoch": 0.67,
"learning_rate": 3.929777627244165e-05,
"loss": 0.3665,
"step": 1958
},
{
"epoch": 0.67,
"learning_rate": 3.9286324348620115e-05,
"loss": 0.3042,
"step": 1959
},
{
"epoch": 0.67,
"learning_rate": 3.927486797165797e-05,
"loss": 0.3187,
"step": 1960
},
{
"epoch": 0.67,
"learning_rate": 3.926340714512622e-05,
"loss": 0.2991,
"step": 1961
},
{
"epoch": 0.67,
"learning_rate": 3.925194187259729e-05,
"loss": 0.3591,
"step": 1962
},
{
"epoch": 0.67,
"learning_rate": 3.924047215764498e-05,
"loss": 0.3436,
"step": 1963
},
{
"epoch": 0.67,
"learning_rate": 3.9228998003844474e-05,
"loss": 0.3214,
"step": 1964
},
{
"epoch": 0.67,
"learning_rate": 3.921751941477234e-05,
"loss": 0.355,
"step": 1965
},
{
"epoch": 0.67,
"learning_rate": 3.920603639400653e-05,
"loss": 0.3811,
"step": 1966
},
{
"epoch": 0.67,
"learning_rate": 3.9194548945126386e-05,
"loss": 0.34,
"step": 1967
},
{
"epoch": 0.67,
"learning_rate": 3.9183057071712605e-05,
"loss": 0.3662,
"step": 1968
},
{
"epoch": 0.67,
"learning_rate": 3.917156077734728e-05,
"loss": 0.3034,
"step": 1969
},
{
"epoch": 0.67,
"learning_rate": 3.916006006561389e-05,
"loss": 0.3571,
"step": 1970
},
{
"epoch": 0.67,
"learning_rate": 3.9148554940097285e-05,
"loss": 0.326,
"step": 1971
},
{
"epoch": 0.67,
"learning_rate": 3.913704540438368e-05,
"loss": 0.2981,
"step": 1972
},
{
"epoch": 0.67,
"learning_rate": 3.912553146206066e-05,
"loss": 0.361,
"step": 1973
},
{
"epoch": 0.67,
"learning_rate": 3.9114013116717224e-05,
"loss": 0.3411,
"step": 1974
},
{
"epoch": 0.67,
"learning_rate": 3.91024903719437e-05,
"loss": 0.3016,
"step": 1975
},
{
"epoch": 0.67,
"learning_rate": 3.90909632313318e-05,
"loss": 0.3286,
"step": 1976
},
{
"epoch": 0.67,
"learning_rate": 3.907943169847462e-05,
"loss": 0.3152,
"step": 1977
},
{
"epoch": 0.67,
"learning_rate": 3.9067895776966604e-05,
"loss": 0.3682,
"step": 1978
},
{
"epoch": 0.68,
"learning_rate": 3.9056355470403574e-05,
"loss": 0.4004,
"step": 1979
},
{
"epoch": 0.68,
"learning_rate": 3.904481078238272e-05,
"loss": 0.3526,
"step": 1980
},
{
"epoch": 0.68,
"learning_rate": 3.903326171650261e-05,
"loss": 0.3602,
"step": 1981
},
{
"epoch": 0.68,
"learning_rate": 3.902170827636314e-05,
"loss": 0.3388,
"step": 1982
},
{
"epoch": 0.68,
"learning_rate": 3.901015046556562e-05,
"loss": 0.3662,
"step": 1983
},
{
"epoch": 0.68,
"learning_rate": 3.8998588287712674e-05,
"loss": 0.3654,
"step": 1984
},
{
"epoch": 0.68,
"learning_rate": 3.8987021746408315e-05,
"loss": 0.3434,
"step": 1985
},
{
"epoch": 0.68,
"learning_rate": 3.89754508452579e-05,
"loss": 0.3523,
"step": 1986
},
{
"epoch": 0.68,
"learning_rate": 3.8963875587868173e-05,
"loss": 0.3761,
"step": 1987
},
{
"epoch": 0.68,
"learning_rate": 3.895229597784721e-05,
"loss": 0.3642,
"step": 1988
},
{
"epoch": 0.68,
"learning_rate": 3.894071201880444e-05,
"loss": 0.3523,
"step": 1989
},
{
"epoch": 0.68,
"learning_rate": 3.892912371435068e-05,
"loss": 0.3106,
"step": 1990
},
{
"epoch": 0.68,
"learning_rate": 3.8917531068098054e-05,
"loss": 0.3141,
"step": 1991
},
{
"epoch": 0.68,
"learning_rate": 3.890593408366009e-05,
"loss": 0.3445,
"step": 1992
},
{
"epoch": 0.68,
"learning_rate": 3.889433276465163e-05,
"loss": 0.3688,
"step": 1993
},
{
"epoch": 0.68,
"learning_rate": 3.8882727114688885e-05,
"loss": 0.3733,
"step": 1994
},
{
"epoch": 0.68,
"learning_rate": 3.887111713738941e-05,
"loss": 0.366,
"step": 1995
},
{
"epoch": 0.68,
"learning_rate": 3.885950283637211e-05,
"loss": 0.4771,
"step": 1996
},
{
"epoch": 0.68,
"learning_rate": 3.884788421525725e-05,
"loss": 0.3583,
"step": 1997
},
{
"epoch": 0.68,
"learning_rate": 3.8836261277666405e-05,
"loss": 0.2864,
"step": 1998
},
{
"epoch": 0.68,
"learning_rate": 3.882463402722254e-05,
"loss": 0.3454,
"step": 1999
},
{
"epoch": 0.68,
"learning_rate": 3.881300246754995e-05,
"loss": 0.3024,
"step": 2000
},
{
"epoch": 0.68,
"learning_rate": 3.880136660227426e-05,
"loss": 0.3587,
"step": 2001
},
{
"epoch": 0.68,
"learning_rate": 3.878972643502243e-05,
"loss": 0.3661,
"step": 2002
},
{
"epoch": 0.68,
"learning_rate": 3.8778081969422807e-05,
"loss": 0.3696,
"step": 2003
},
{
"epoch": 0.68,
"learning_rate": 3.876643320910502e-05,
"loss": 0.3282,
"step": 2004
},
{
"epoch": 0.68,
"learning_rate": 3.8754780157700075e-05,
"loss": 0.3301,
"step": 2005
},
{
"epoch": 0.68,
"learning_rate": 3.874312281884031e-05,
"loss": 0.3684,
"step": 2006
},
{
"epoch": 0.68,
"learning_rate": 3.873146119615938e-05,
"loss": 0.3648,
"step": 2007
},
{
"epoch": 0.69,
"learning_rate": 3.8719795293292295e-05,
"loss": 0.3275,
"step": 2008
},
{
"epoch": 0.69,
"learning_rate": 3.8708125113875406e-05,
"loss": 0.4067,
"step": 2009
},
{
"epoch": 0.69,
"learning_rate": 3.869645066154636e-05,
"loss": 0.2998,
"step": 2010
},
{
"epoch": 0.69,
"learning_rate": 3.8684771939944174e-05,
"loss": 0.3566,
"step": 2011
},
{
"epoch": 0.69,
"learning_rate": 3.867308895270919e-05,
"loss": 0.3761,
"step": 2012
},
{
"epoch": 0.69,
"learning_rate": 3.8661401703483046e-05,
"loss": 0.3366,
"step": 2013
},
{
"epoch": 0.69,
"learning_rate": 3.8649710195908764e-05,
"loss": 0.3485,
"step": 2014
},
{
"epoch": 0.69,
"learning_rate": 3.863801443363064e-05,
"loss": 0.3184,
"step": 2015
},
{
"epoch": 0.69,
"learning_rate": 3.862631442029434e-05,
"loss": 0.3933,
"step": 2016
},
{
"epoch": 0.69,
"learning_rate": 3.861461015954681e-05,
"loss": 0.3452,
"step": 2017
},
{
"epoch": 0.69,
"learning_rate": 3.860290165503636e-05,
"loss": 0.3548,
"step": 2018
},
{
"epoch": 0.69,
"learning_rate": 3.859118891041261e-05,
"loss": 0.3583,
"step": 2019
},
{
"epoch": 0.69,
"learning_rate": 3.8579471929326494e-05,
"loss": 0.3529,
"step": 2020
},
{
"epoch": 0.69,
"learning_rate": 3.856775071543026e-05,
"loss": 0.3081,
"step": 2021
},
{
"epoch": 0.69,
"learning_rate": 3.8556025272377515e-05,
"loss": 0.4169,
"step": 2022
},
{
"epoch": 0.69,
"learning_rate": 3.854429560382313e-05,
"loss": 0.3666,
"step": 2023
},
{
"epoch": 0.69,
"learning_rate": 3.853256171342333e-05,
"loss": 0.3751,
"step": 2024
},
{
"epoch": 0.69,
"learning_rate": 3.852082360483564e-05,
"loss": 0.3109,
"step": 2025
},
{
"epoch": 0.69,
"learning_rate": 3.850908128171892e-05,
"loss": 0.3096,
"step": 2026
},
{
"epoch": 0.69,
"learning_rate": 3.849733474773331e-05,
"loss": 0.2982,
"step": 2027
},
{
"epoch": 0.69,
"learning_rate": 3.848558400654029e-05,
"loss": 0.3361,
"step": 2028
},
{
"epoch": 0.69,
"learning_rate": 3.8473829061802644e-05,
"loss": 0.359,
"step": 2029
},
{
"epoch": 0.69,
"learning_rate": 3.846206991718446e-05,
"loss": 0.3308,
"step": 2030
},
{
"epoch": 0.69,
"learning_rate": 3.8450306576351145e-05,
"loss": 0.3309,
"step": 2031
},
{
"epoch": 0.69,
"learning_rate": 3.843853904296942e-05,
"loss": 0.3365,
"step": 2032
},
{
"epoch": 0.69,
"learning_rate": 3.842676732070728e-05,
"loss": 0.3483,
"step": 2033
},
{
"epoch": 0.69,
"learning_rate": 3.841499141323405e-05,
"loss": 0.3599,
"step": 2034
},
{
"epoch": 0.69,
"learning_rate": 3.8403211324220375e-05,
"loss": 0.3503,
"step": 2035
},
{
"epoch": 0.69,
"learning_rate": 3.839142705733817e-05,
"loss": 0.3595,
"step": 2036
},
{
"epoch": 0.69,
"learning_rate": 3.837963861626067e-05,
"loss": 0.3714,
"step": 2037
},
{
"epoch": 0.7,
"learning_rate": 3.836784600466242e-05,
"loss": 0.3804,
"step": 2038
},
{
"epoch": 0.7,
"learning_rate": 3.835604922621925e-05,
"loss": 0.3849,
"step": 2039
},
{
"epoch": 0.7,
"learning_rate": 3.8344248284608284e-05,
"loss": 0.307,
"step": 2040
},
{
"epoch": 0.7,
"learning_rate": 3.833244318350796e-05,
"loss": 0.3268,
"step": 2041
},
{
"epoch": 0.7,
"learning_rate": 3.8320633926597995e-05,
"loss": 0.348,
"step": 2042
},
{
"epoch": 0.7,
"learning_rate": 3.830882051755942e-05,
"loss": 0.3664,
"step": 2043
},
{
"epoch": 0.7,
"learning_rate": 3.829700296007456e-05,
"loss": 0.3177,
"step": 2044
},
{
"epoch": 0.7,
"learning_rate": 3.8285181257827016e-05,
"loss": 0.3401,
"step": 2045
},
{
"epoch": 0.7,
"learning_rate": 3.827335541450169e-05,
"loss": 0.3103,
"step": 2046
},
{
"epoch": 0.7,
"learning_rate": 3.8261525433784764e-05,
"loss": 0.3251,
"step": 2047
},
{
"epoch": 0.7,
"learning_rate": 3.824969131936375e-05,
"loss": 0.349,
"step": 2048
},
{
"epoch": 0.7,
"learning_rate": 3.8237853074927376e-05,
"loss": 0.3374,
"step": 2049
},
{
"epoch": 0.7,
"learning_rate": 3.822601070416574e-05,
"loss": 0.3055,
"step": 2050
},
{
"epoch": 0.7,
"learning_rate": 3.8214164210770175e-05,
"loss": 0.3594,
"step": 2051
},
{
"epoch": 0.7,
"learning_rate": 3.820231359843329e-05,
"loss": 0.3079,
"step": 2052
},
{
"epoch": 0.7,
"learning_rate": 3.819045887084903e-05,
"loss": 0.3968,
"step": 2053
},
{
"epoch": 0.7,
"learning_rate": 3.817860003171256e-05,
"loss": 0.351,
"step": 2054
},
{
"epoch": 0.7,
"learning_rate": 3.816673708472038e-05,
"loss": 0.3506,
"step": 2055
},
{
"epoch": 0.7,
"learning_rate": 3.815487003357025e-05,
"loss": 0.3597,
"step": 2056
},
{
"epoch": 0.7,
"learning_rate": 3.8142998881961176e-05,
"loss": 0.3526,
"step": 2057
},
{
"epoch": 0.7,
"learning_rate": 3.8131123633593514e-05,
"loss": 0.3267,
"step": 2058
},
{
"epoch": 0.7,
"learning_rate": 3.811924429216883e-05,
"loss": 0.3823,
"step": 2059
},
{
"epoch": 0.7,
"learning_rate": 3.8107360861390004e-05,
"loss": 0.3695,
"step": 2060
},
{
"epoch": 0.7,
"learning_rate": 3.8095473344961175e-05,
"loss": 0.3494,
"step": 2061
},
{
"epoch": 0.7,
"learning_rate": 3.8083581746587746e-05,
"loss": 0.32,
"step": 2062
},
{
"epoch": 0.7,
"learning_rate": 3.807168606997644e-05,
"loss": 0.3436,
"step": 2063
},
{
"epoch": 0.7,
"learning_rate": 3.805978631883519e-05,
"loss": 0.3409,
"step": 2064
},
{
"epoch": 0.7,
"learning_rate": 3.804788249687323e-05,
"loss": 0.3246,
"step": 2065
},
{
"epoch": 0.7,
"learning_rate": 3.803597460780107e-05,
"loss": 0.3212,
"step": 2066
},
{
"epoch": 0.71,
"learning_rate": 3.8024062655330455e-05,
"loss": 0.3291,
"step": 2067
},
{
"epoch": 0.71,
"learning_rate": 3.801214664317444e-05,
"loss": 0.3162,
"step": 2068
},
{
"epoch": 0.71,
"learning_rate": 3.8000226575047314e-05,
"loss": 0.3427,
"step": 2069
},
{
"epoch": 0.71,
"learning_rate": 3.798830245466465e-05,
"loss": 0.3582,
"step": 2070
},
{
"epoch": 0.71,
"learning_rate": 3.797637428574326e-05,
"loss": 0.3649,
"step": 2071
},
{
"epoch": 0.71,
"learning_rate": 3.7964442072001236e-05,
"loss": 0.3345,
"step": 2072
},
{
"epoch": 0.71,
"learning_rate": 3.795250581715793e-05,
"loss": 0.3004,
"step": 2073
},
{
"epoch": 0.71,
"learning_rate": 3.7940565524933956e-05,
"loss": 0.3106,
"step": 2074
},
{
"epoch": 0.71,
"learning_rate": 3.792862119905117e-05,
"loss": 0.3602,
"step": 2075
},
{
"epoch": 0.71,
"learning_rate": 3.79166728432327e-05,
"loss": 0.3233,
"step": 2076
},
{
"epoch": 0.71,
"learning_rate": 3.790472046120294e-05,
"loss": 0.3783,
"step": 2077
},
{
"epoch": 0.71,
"learning_rate": 3.78927640566875e-05,
"loss": 0.3494,
"step": 2078
},
{
"epoch": 0.71,
"learning_rate": 3.78808036334133e-05,
"loss": 0.3352,
"step": 2079
},
{
"epoch": 0.71,
"learning_rate": 3.786883919510844e-05,
"loss": 0.3543,
"step": 2080
},
{
"epoch": 0.71,
"learning_rate": 3.785687074550236e-05,
"loss": 0.3377,
"step": 2081
},
{
"epoch": 0.71,
"learning_rate": 3.784489828832567e-05,
"loss": 0.3835,
"step": 2082
},
{
"epoch": 0.71,
"learning_rate": 3.7832921827310286e-05,
"loss": 0.3483,
"step": 2083
},
{
"epoch": 0.71,
"learning_rate": 3.782094136618933e-05,
"loss": 0.3414,
"step": 2084
},
{
"epoch": 0.71,
"learning_rate": 3.780895690869721e-05,
"loss": 0.3665,
"step": 2085
},
{
"epoch": 0.71,
"learning_rate": 3.779696845856954e-05,
"loss": 0.307,
"step": 2086
},
{
"epoch": 0.71,
"learning_rate": 3.7784976019543204e-05,
"loss": 0.3742,
"step": 2087
},
{
"epoch": 0.71,
"learning_rate": 3.777297959535634e-05,
"loss": 0.3328,
"step": 2088
},
{
"epoch": 0.71,
"learning_rate": 3.776097918974828e-05,
"loss": 0.3789,
"step": 2089
},
{
"epoch": 0.71,
"learning_rate": 3.774897480645966e-05,
"loss": 0.3132,
"step": 2090
},
{
"epoch": 0.71,
"learning_rate": 3.7736966449232314e-05,
"loss": 0.3319,
"step": 2091
},
{
"epoch": 0.71,
"learning_rate": 3.772495412180932e-05,
"loss": 0.3278,
"step": 2092
},
{
"epoch": 0.71,
"learning_rate": 3.7712937827934994e-05,
"loss": 0.3737,
"step": 2093
},
{
"epoch": 0.71,
"learning_rate": 3.770091757135491e-05,
"loss": 0.3952,
"step": 2094
},
{
"epoch": 0.71,
"learning_rate": 3.7688893355815846e-05,
"loss": 0.3708,
"step": 2095
},
{
"epoch": 0.72,
"learning_rate": 3.767686518506583e-05,
"loss": 0.3105,
"step": 2096
},
{
"epoch": 0.72,
"learning_rate": 3.7664833062854143e-05,
"loss": 0.3864,
"step": 2097
},
{
"epoch": 0.72,
"learning_rate": 3.765279699293125e-05,
"loss": 0.389,
"step": 2098
},
{
"epoch": 0.72,
"learning_rate": 3.764075697904887e-05,
"loss": 0.3433,
"step": 2099
},
{
"epoch": 0.72,
"learning_rate": 3.7628713024959984e-05,
"loss": 0.3258,
"step": 2100
},
{
"epoch": 0.72,
"learning_rate": 3.761666513441874e-05,
"loss": 0.3748,
"step": 2101
},
{
"epoch": 0.72,
"learning_rate": 3.760461331118057e-05,
"loss": 0.3152,
"step": 2102
},
{
"epoch": 0.72,
"learning_rate": 3.7592557559002076e-05,
"loss": 0.3554,
"step": 2103
},
{
"epoch": 0.72,
"learning_rate": 3.7580497881641144e-05,
"loss": 0.3374,
"step": 2104
},
{
"epoch": 0.72,
"learning_rate": 3.756843428285684e-05,
"loss": 0.3274,
"step": 2105
},
{
"epoch": 0.72,
"learning_rate": 3.7556366766409463e-05,
"loss": 0.354,
"step": 2106
},
{
"epoch": 0.72,
"learning_rate": 3.7544295336060544e-05,
"loss": 0.3729,
"step": 2107
},
{
"epoch": 0.72,
"learning_rate": 3.7532219995572824e-05,
"loss": 0.3859,
"step": 2108
},
{
"epoch": 0.72,
"learning_rate": 3.7520140748710274e-05,
"loss": 0.321,
"step": 2109
},
{
"epoch": 0.72,
"learning_rate": 3.7508057599238056e-05,
"loss": 0.3339,
"step": 2110
},
{
"epoch": 0.72,
"learning_rate": 3.749597055092258e-05,
"loss": 0.3383,
"step": 2111
},
{
"epoch": 0.72,
"learning_rate": 3.748387960753145e-05,
"loss": 0.3113,
"step": 2112
},
{
"epoch": 0.72,
"learning_rate": 3.7471784772833504e-05,
"loss": 0.3236,
"step": 2113
},
{
"epoch": 0.72,
"learning_rate": 3.745968605059878e-05,
"loss": 0.3459,
"step": 2114
},
{
"epoch": 0.72,
"learning_rate": 3.744758344459852e-05,
"loss": 0.4025,
"step": 2115
},
{
"epoch": 0.72,
"learning_rate": 3.743547695860518e-05,
"loss": 0.3421,
"step": 2116
},
{
"epoch": 0.72,
"learning_rate": 3.742336659639244e-05,
"loss": 0.3515,
"step": 2117
},
{
"epoch": 0.72,
"learning_rate": 3.7411252361735186e-05,
"loss": 0.3631,
"step": 2118
},
{
"epoch": 0.72,
"learning_rate": 3.739913425840948e-05,
"loss": 0.3478,
"step": 2119
},
{
"epoch": 0.72,
"learning_rate": 3.738701229019265e-05,
"loss": 0.3487,
"step": 2120
},
{
"epoch": 0.72,
"learning_rate": 3.7374886460863154e-05,
"loss": 0.3463,
"step": 2121
},
{
"epoch": 0.72,
"learning_rate": 3.7362756774200725e-05,
"loss": 0.3123,
"step": 2122
},
{
"epoch": 0.72,
"learning_rate": 3.735062323398625e-05,
"loss": 0.3619,
"step": 2123
},
{
"epoch": 0.72,
"learning_rate": 3.7338485844001824e-05,
"loss": 0.3219,
"step": 2124
},
{
"epoch": 0.73,
"learning_rate": 3.732634460803076e-05,
"loss": 0.3594,
"step": 2125
},
{
"epoch": 0.73,
"learning_rate": 3.731419952985758e-05,
"loss": 0.3018,
"step": 2126
},
{
"epoch": 0.73,
"learning_rate": 3.730205061326795e-05,
"loss": 0.3614,
"step": 2127
},
{
"epoch": 0.73,
"learning_rate": 3.7289897862048783e-05,
"loss": 0.3142,
"step": 2128
},
{
"epoch": 0.73,
"learning_rate": 3.727774127998817e-05,
"loss": 0.3757,
"step": 2129
},
{
"epoch": 0.73,
"learning_rate": 3.726558087087541e-05,
"loss": 0.3775,
"step": 2130
},
{
"epoch": 0.73,
"learning_rate": 3.725341663850095e-05,
"loss": 0.3821,
"step": 2131
},
{
"epoch": 0.73,
"learning_rate": 3.724124858665649e-05,
"loss": 0.3809,
"step": 2132
},
{
"epoch": 0.73,
"learning_rate": 3.722907671913488e-05,
"loss": 0.3592,
"step": 2133
},
{
"epoch": 0.73,
"learning_rate": 3.721690103973018e-05,
"loss": 0.3607,
"step": 2134
},
{
"epoch": 0.73,
"learning_rate": 3.720472155223761e-05,
"loss": 0.3375,
"step": 2135
},
{
"epoch": 0.73,
"learning_rate": 3.7192538260453616e-05,
"loss": 0.3307,
"step": 2136
},
{
"epoch": 0.73,
"learning_rate": 3.718035116817579e-05,
"loss": 0.3363,
"step": 2137
},
{
"epoch": 0.73,
"learning_rate": 3.7168160279202935e-05,
"loss": 0.3302,
"step": 2138
},
{
"epoch": 0.73,
"learning_rate": 3.715596559733504e-05,
"loss": 0.3047,
"step": 2139
},
{
"epoch": 0.73,
"learning_rate": 3.714376712637326e-05,
"loss": 0.3677,
"step": 2140
},
{
"epoch": 0.73,
"learning_rate": 3.7131564870119945e-05,
"loss": 0.3119,
"step": 2141
},
{
"epoch": 0.73,
"learning_rate": 3.71193588323786e-05,
"loss": 0.3472,
"step": 2142
},
{
"epoch": 0.73,
"learning_rate": 3.710714901695396e-05,
"loss": 0.3729,
"step": 2143
},
{
"epoch": 0.73,
"learning_rate": 3.709493542765186e-05,
"loss": 0.2948,
"step": 2144
},
{
"epoch": 0.73,
"learning_rate": 3.7082718068279386e-05,
"loss": 0.3968,
"step": 2145
},
{
"epoch": 0.73,
"learning_rate": 3.707049694264476e-05,
"loss": 0.3731,
"step": 2146
},
{
"epoch": 0.73,
"learning_rate": 3.70582720545574e-05,
"loss": 0.3589,
"step": 2147
},
{
"epoch": 0.73,
"learning_rate": 3.704604340782786e-05,
"loss": 0.3628,
"step": 2148
},
{
"epoch": 0.73,
"learning_rate": 3.70338110062679e-05,
"loss": 0.3394,
"step": 2149
},
{
"epoch": 0.73,
"learning_rate": 3.7021574853690444e-05,
"loss": 0.3661,
"step": 2150
},
{
"epoch": 0.73,
"learning_rate": 3.700933495390958e-05,
"loss": 0.3085,
"step": 2151
},
{
"epoch": 0.73,
"learning_rate": 3.699709131074055e-05,
"loss": 0.3334,
"step": 2152
},
{
"epoch": 0.73,
"learning_rate": 3.698484392799979e-05,
"loss": 0.3313,
"step": 2153
},
{
"epoch": 0.73,
"learning_rate": 3.6972592809504906e-05,
"loss": 0.3727,
"step": 2154
},
{
"epoch": 0.74,
"learning_rate": 3.696033795907462e-05,
"loss": 0.2809,
"step": 2155
},
{
"epoch": 0.74,
"learning_rate": 3.694807938052887e-05,
"loss": 0.3374,
"step": 2156
},
{
"epoch": 0.74,
"learning_rate": 3.6935817077688716e-05,
"loss": 0.3638,
"step": 2157
},
{
"epoch": 0.74,
"learning_rate": 3.692355105437642e-05,
"loss": 0.3716,
"step": 2158
},
{
"epoch": 0.74,
"learning_rate": 3.691128131441536e-05,
"loss": 0.3451,
"step": 2159
},
{
"epoch": 0.74,
"learning_rate": 3.6899007861630116e-05,
"loss": 0.275,
"step": 2160
},
{
"epoch": 0.74,
"learning_rate": 3.6886730699846386e-05,
"loss": 0.326,
"step": 2161
},
{
"epoch": 0.74,
"learning_rate": 3.6874449832891056e-05,
"loss": 0.328,
"step": 2162
},
{
"epoch": 0.74,
"learning_rate": 3.686216526459214e-05,
"loss": 0.3717,
"step": 2163
},
{
"epoch": 0.74,
"learning_rate": 3.684987699877882e-05,
"loss": 0.324,
"step": 2164
},
{
"epoch": 0.74,
"learning_rate": 3.683758503928143e-05,
"loss": 0.3203,
"step": 2165
},
{
"epoch": 0.74,
"learning_rate": 3.682528938993146e-05,
"loss": 0.3382,
"step": 2166
},
{
"epoch": 0.74,
"learning_rate": 3.681299005456155e-05,
"loss": 0.3318,
"step": 2167
},
{
"epoch": 0.74,
"learning_rate": 3.680068703700546e-05,
"loss": 0.3314,
"step": 2168
},
{
"epoch": 0.74,
"learning_rate": 3.678838034109814e-05,
"loss": 0.3671,
"step": 2169
},
{
"epoch": 0.74,
"learning_rate": 3.677606997067566e-05,
"loss": 0.3209,
"step": 2170
},
{
"epoch": 0.74,
"learning_rate": 3.6763755929575236e-05,
"loss": 0.3336,
"step": 2171
},
{
"epoch": 0.74,
"learning_rate": 3.675143822163526e-05,
"loss": 0.3471,
"step": 2172
},
{
"epoch": 0.74,
"learning_rate": 3.673911685069521e-05,
"loss": 0.3584,
"step": 2173
},
{
"epoch": 0.74,
"learning_rate": 3.6726791820595766e-05,
"loss": 0.3474,
"step": 2174
},
{
"epoch": 0.74,
"learning_rate": 3.67144631351787e-05,
"loss": 0.3674,
"step": 2175
},
{
"epoch": 0.74,
"learning_rate": 3.670213079828695e-05,
"loss": 0.3524,
"step": 2176
},
{
"epoch": 0.74,
"learning_rate": 3.6689794813764596e-05,
"loss": 0.4197,
"step": 2177
},
{
"epoch": 0.74,
"learning_rate": 3.667745518545683e-05,
"loss": 0.3026,
"step": 2178
},
{
"epoch": 0.74,
"learning_rate": 3.666511191721e-05,
"loss": 0.3248,
"step": 2179
},
{
"epoch": 0.74,
"learning_rate": 3.665276501287159e-05,
"loss": 0.2784,
"step": 2180
},
{
"epoch": 0.74,
"learning_rate": 3.6640414476290205e-05,
"loss": 0.3923,
"step": 2181
},
{
"epoch": 0.74,
"learning_rate": 3.662806031131558e-05,
"loss": 0.2961,
"step": 2182
},
{
"epoch": 0.74,
"learning_rate": 3.66157025217986e-05,
"loss": 0.3187,
"step": 2183
},
{
"epoch": 0.75,
"learning_rate": 3.660334111159127e-05,
"loss": 0.359,
"step": 2184
},
{
"epoch": 0.75,
"learning_rate": 3.659097608454672e-05,
"loss": 0.3143,
"step": 2185
},
{
"epoch": 0.75,
"learning_rate": 3.65786074445192e-05,
"loss": 0.2923,
"step": 2186
},
{
"epoch": 0.75,
"learning_rate": 3.656623519536411e-05,
"loss": 0.3926,
"step": 2187
},
{
"epoch": 0.75,
"learning_rate": 3.6553859340937955e-05,
"loss": 0.3007,
"step": 2188
},
{
"epoch": 0.75,
"learning_rate": 3.654147988509836e-05,
"loss": 0.3296,
"step": 2189
},
{
"epoch": 0.75,
"learning_rate": 3.65290968317041e-05,
"loss": 0.3266,
"step": 2190
},
{
"epoch": 0.75,
"learning_rate": 3.651671018461504e-05,
"loss": 0.3225,
"step": 2191
},
{
"epoch": 0.75,
"learning_rate": 3.6504319947692195e-05,
"loss": 0.3014,
"step": 2192
},
{
"epoch": 0.75,
"learning_rate": 3.649192612479766e-05,
"loss": 0.3786,
"step": 2193
},
{
"epoch": 0.75,
"learning_rate": 3.647952871979469e-05,
"loss": 0.3254,
"step": 2194
},
{
"epoch": 0.75,
"learning_rate": 3.646712773654763e-05,
"loss": 0.313,
"step": 2195
},
{
"epoch": 0.75,
"learning_rate": 3.645472317892193e-05,
"loss": 0.3186,
"step": 2196
},
{
"epoch": 0.75,
"learning_rate": 3.644231505078421e-05,
"loss": 0.34,
"step": 2197
},
{
"epoch": 0.75,
"learning_rate": 3.642990335600214e-05,
"loss": 0.3323,
"step": 2198
},
{
"epoch": 0.75,
"learning_rate": 3.641748809844452e-05,
"loss": 0.3021,
"step": 2199
},
{
"epoch": 0.75,
"learning_rate": 3.640506928198128e-05,
"loss": 0.2975,
"step": 2200
},
{
"epoch": 0.75,
"learning_rate": 3.639264691048345e-05,
"loss": 0.3293,
"step": 2201
},
{
"epoch": 0.75,
"learning_rate": 3.638022098782314e-05,
"loss": 0.3768,
"step": 2202
},
{
"epoch": 0.75,
"learning_rate": 3.636779151787362e-05,
"loss": 0.3483,
"step": 2203
},
{
"epoch": 0.75,
"learning_rate": 3.635535850450923e-05,
"loss": 0.3402,
"step": 2204
},
{
"epoch": 0.75,
"learning_rate": 3.634292195160541e-05,
"loss": 0.3241,
"step": 2205
},
{
"epoch": 0.75,
"learning_rate": 3.633048186303872e-05,
"loss": 0.3354,
"step": 2206
},
{
"epoch": 0.75,
"learning_rate": 3.631803824268682e-05,
"loss": 0.298,
"step": 2207
},
{
"epoch": 0.75,
"learning_rate": 3.630559109442847e-05,
"loss": 0.3344,
"step": 2208
},
{
"epoch": 0.75,
"learning_rate": 3.629314042214351e-05,
"loss": 0.375,
"step": 2209
},
{
"epoch": 0.75,
"learning_rate": 3.6280686229712924e-05,
"loss": 0.3278,
"step": 2210
},
{
"epoch": 0.75,
"learning_rate": 3.626822852101875e-05,
"loss": 0.3292,
"step": 2211
},
{
"epoch": 0.75,
"learning_rate": 3.6255767299944134e-05,
"loss": 0.3328,
"step": 2212
},
{
"epoch": 0.76,
"learning_rate": 3.624330257037333e-05,
"loss": 0.3296,
"step": 2213
},
{
"epoch": 0.76,
"learning_rate": 3.623083433619166e-05,
"loss": 0.3522,
"step": 2214
},
{
"epoch": 0.76,
"learning_rate": 3.621836260128556e-05,
"loss": 0.2695,
"step": 2215
},
{
"epoch": 0.76,
"learning_rate": 3.6205887369542555e-05,
"loss": 0.3719,
"step": 2216
},
{
"epoch": 0.76,
"learning_rate": 3.6193408644851265e-05,
"loss": 0.3334,
"step": 2217
},
{
"epoch": 0.76,
"learning_rate": 3.618092643110138e-05,
"loss": 0.3394,
"step": 2218
},
{
"epoch": 0.76,
"learning_rate": 3.616844073218369e-05,
"loss": 0.3362,
"step": 2219
},
{
"epoch": 0.76,
"learning_rate": 3.6155951551990064e-05,
"loss": 0.3185,
"step": 2220
},
{
"epoch": 0.76,
"learning_rate": 3.6143458894413465e-05,
"loss": 0.3336,
"step": 2221
},
{
"epoch": 0.76,
"learning_rate": 3.613096276334793e-05,
"loss": 0.3274,
"step": 2222
},
{
"epoch": 0.76,
"learning_rate": 3.611846316268859e-05,
"loss": 0.3575,
"step": 2223
},
{
"epoch": 0.76,
"learning_rate": 3.610596009633166e-05,
"loss": 0.4068,
"step": 2224
},
{
"epoch": 0.76,
"learning_rate": 3.609345356817442e-05,
"loss": 0.3701,
"step": 2225
},
{
"epoch": 0.76,
"learning_rate": 3.608094358211523e-05,
"loss": 0.3292,
"step": 2226
},
{
"epoch": 0.76,
"learning_rate": 3.606843014205354e-05,
"loss": 0.3689,
"step": 2227
},
{
"epoch": 0.76,
"learning_rate": 3.6055913251889876e-05,
"loss": 0.3326,
"step": 2228
},
{
"epoch": 0.76,
"learning_rate": 3.6043392915525826e-05,
"loss": 0.3157,
"step": 2229
},
{
"epoch": 0.76,
"learning_rate": 3.603086913686407e-05,
"loss": 0.3228,
"step": 2230
},
{
"epoch": 0.76,
"learning_rate": 3.6018341919808344e-05,
"loss": 0.3536,
"step": 2231
},
{
"epoch": 0.76,
"learning_rate": 3.600581126826346e-05,
"loss": 0.3548,
"step": 2232
},
{
"epoch": 0.76,
"learning_rate": 3.5993277186135324e-05,
"loss": 0.3242,
"step": 2233
},
{
"epoch": 0.76,
"learning_rate": 3.598073967733086e-05,
"loss": 0.2965,
"step": 2234
},
{
"epoch": 0.76,
"learning_rate": 3.596819874575811e-05,
"loss": 0.3076,
"step": 2235
},
{
"epoch": 0.76,
"learning_rate": 3.595565439532617e-05,
"loss": 0.3006,
"step": 2236
},
{
"epoch": 0.76,
"learning_rate": 3.594310662994519e-05,
"loss": 0.3491,
"step": 2237
},
{
"epoch": 0.76,
"learning_rate": 3.5930555453526385e-05,
"loss": 0.3262,
"step": 2238
},
{
"epoch": 0.76,
"learning_rate": 3.591800086998205e-05,
"loss": 0.3731,
"step": 2239
},
{
"epoch": 0.76,
"learning_rate": 3.590544288322551e-05,
"loss": 0.3551,
"step": 2240
},
{
"epoch": 0.76,
"learning_rate": 3.589288149717119e-05,
"loss": 0.3117,
"step": 2241
},
{
"epoch": 0.76,
"learning_rate": 3.588031671573455e-05,
"loss": 0.3237,
"step": 2242
},
{
"epoch": 0.77,
"learning_rate": 3.5867748542832116e-05,
"loss": 0.3272,
"step": 2243
},
{
"epoch": 0.77,
"learning_rate": 3.5855176982381476e-05,
"loss": 0.3242,
"step": 2244
},
{
"epoch": 0.77,
"learning_rate": 3.584260203830126e-05,
"loss": 0.3413,
"step": 2245
},
{
"epoch": 0.77,
"learning_rate": 3.583002371451116e-05,
"loss": 0.3522,
"step": 2246
},
{
"epoch": 0.77,
"learning_rate": 3.581744201493191e-05,
"loss": 0.3228,
"step": 2247
},
{
"epoch": 0.77,
"learning_rate": 3.5804856943485334e-05,
"loss": 0.3037,
"step": 2248
},
{
"epoch": 0.77,
"learning_rate": 3.5792268504094256e-05,
"loss": 0.3407,
"step": 2249
},
{
"epoch": 0.77,
"learning_rate": 3.5779676700682595e-05,
"loss": 0.3598,
"step": 2250
},
{
"epoch": 0.77,
"learning_rate": 3.5767081537175285e-05,
"loss": 0.348,
"step": 2251
},
{
"epoch": 0.77,
"learning_rate": 3.575448301749833e-05,
"loss": 0.3469,
"step": 2252
},
{
"epoch": 0.77,
"learning_rate": 3.574188114557876e-05,
"loss": 0.2799,
"step": 2253
},
{
"epoch": 0.77,
"learning_rate": 3.5729275925344676e-05,
"loss": 0.3655,
"step": 2254
},
{
"epoch": 0.77,
"learning_rate": 3.571666736072519e-05,
"loss": 0.2945,
"step": 2255
},
{
"epoch": 0.77,
"learning_rate": 3.5704055455650484e-05,
"loss": 0.3237,
"step": 2256
},
{
"epoch": 0.77,
"learning_rate": 3.569144021405177e-05,
"loss": 0.3584,
"step": 2257
},
{
"epoch": 0.77,
"learning_rate": 3.5678821639861305e-05,
"loss": 0.3335,
"step": 2258
},
{
"epoch": 0.77,
"learning_rate": 3.566619973701237e-05,
"loss": 0.3527,
"step": 2259
},
{
"epoch": 0.77,
"learning_rate": 3.56535745094393e-05,
"loss": 0.3798,
"step": 2260
},
{
"epoch": 0.77,
"learning_rate": 3.564094596107747e-05,
"loss": 0.3145,
"step": 2261
},
{
"epoch": 0.77,
"learning_rate": 3.562831409586327e-05,
"loss": 0.356,
"step": 2262
},
{
"epoch": 0.77,
"learning_rate": 3.561567891773414e-05,
"loss": 0.3584,
"step": 2263
},
{
"epoch": 0.77,
"learning_rate": 3.560304043062854e-05,
"loss": 0.3244,
"step": 2264
},
{
"epoch": 0.77,
"learning_rate": 3.559039863848598e-05,
"loss": 0.303,
"step": 2265
},
{
"epoch": 0.77,
"learning_rate": 3.557775354524698e-05,
"loss": 0.3663,
"step": 2266
},
{
"epoch": 0.77,
"learning_rate": 3.556510515485311e-05,
"loss": 0.3153,
"step": 2267
},
{
"epoch": 0.77,
"learning_rate": 3.555245347124694e-05,
"loss": 0.3328,
"step": 2268
},
{
"epoch": 0.77,
"learning_rate": 3.55397984983721e-05,
"loss": 0.281,
"step": 2269
},
{
"epoch": 0.77,
"learning_rate": 3.552714024017321e-05,
"loss": 0.2885,
"step": 2270
},
{
"epoch": 0.77,
"learning_rate": 3.551447870059594e-05,
"loss": 0.3839,
"step": 2271
},
{
"epoch": 0.78,
"learning_rate": 3.5501813883586974e-05,
"loss": 0.3421,
"step": 2272
},
{
"epoch": 0.78,
"learning_rate": 3.548914579309402e-05,
"loss": 0.2969,
"step": 2273
},
{
"epoch": 0.78,
"learning_rate": 3.54764744330658e-05,
"loss": 0.3679,
"step": 2274
},
{
"epoch": 0.78,
"learning_rate": 3.5463799807452075e-05,
"loss": 0.334,
"step": 2275
},
{
"epoch": 0.78,
"learning_rate": 3.5451121920203584e-05,
"loss": 0.3139,
"step": 2276
},
{
"epoch": 0.78,
"learning_rate": 3.5438440775272125e-05,
"loss": 0.3383,
"step": 2277
},
{
"epoch": 0.78,
"learning_rate": 3.542575637661049e-05,
"loss": 0.3173,
"step": 2278
},
{
"epoch": 0.78,
"learning_rate": 3.541306872817248e-05,
"loss": 0.3449,
"step": 2279
},
{
"epoch": 0.78,
"learning_rate": 3.540037783391293e-05,
"loss": 0.3164,
"step": 2280
},
{
"epoch": 0.78,
"learning_rate": 3.538768369778768e-05,
"loss": 0.2896,
"step": 2281
},
{
"epoch": 0.78,
"learning_rate": 3.537498632375355e-05,
"loss": 0.3279,
"step": 2282
},
{
"epoch": 0.78,
"learning_rate": 3.536228571576841e-05,
"loss": 0.3119,
"step": 2283
},
{
"epoch": 0.78,
"learning_rate": 3.534958187779113e-05,
"loss": 0.3569,
"step": 2284
},
{
"epoch": 0.78,
"learning_rate": 3.533687481378157e-05,
"loss": 0.3836,
"step": 2285
},
{
"epoch": 0.78,
"learning_rate": 3.53241645277006e-05,
"loss": 0.3295,
"step": 2286
},
{
"epoch": 0.78,
"learning_rate": 3.531145102351012e-05,
"loss": 0.3856,
"step": 2287
},
{
"epoch": 0.78,
"learning_rate": 3.529873430517299e-05,
"loss": 0.2707,
"step": 2288
},
{
"epoch": 0.78,
"learning_rate": 3.52860143766531e-05,
"loss": 0.3567,
"step": 2289
},
{
"epoch": 0.78,
"learning_rate": 3.527329124191533e-05,
"loss": 0.2622,
"step": 2290
},
{
"epoch": 0.78,
"learning_rate": 3.5260564904925576e-05,
"loss": 0.3002,
"step": 2291
},
{
"epoch": 0.78,
"learning_rate": 3.5247835369650715e-05,
"loss": 0.3283,
"step": 2292
},
{
"epoch": 0.78,
"learning_rate": 3.5235102640058634e-05,
"loss": 0.3556,
"step": 2293
},
{
"epoch": 0.78,
"learning_rate": 3.5222366720118196e-05,
"loss": 0.32,
"step": 2294
},
{
"epoch": 0.78,
"learning_rate": 3.520962761379927e-05,
"loss": 0.3686,
"step": 2295
},
{
"epoch": 0.78,
"learning_rate": 3.519688532507273e-05,
"loss": 0.2927,
"step": 2296
},
{
"epoch": 0.78,
"learning_rate": 3.5184139857910416e-05,
"loss": 0.319,
"step": 2297
},
{
"epoch": 0.78,
"learning_rate": 3.5171391216285196e-05,
"loss": 0.3505,
"step": 2298
},
{
"epoch": 0.78,
"learning_rate": 3.515863940417087e-05,
"loss": 0.3126,
"step": 2299
},
{
"epoch": 0.78,
"learning_rate": 3.51458844255423e-05,
"loss": 0.355,
"step": 2300
},
{
"epoch": 0.79,
"learning_rate": 3.5133126284375275e-05,
"loss": 0.295,
"step": 2301
},
{
"epoch": 0.79,
"learning_rate": 3.5120364984646594e-05,
"loss": 0.3707,
"step": 2302
},
{
"epoch": 0.79,
"learning_rate": 3.5107600530334026e-05,
"loss": 0.3644,
"step": 2303
},
{
"epoch": 0.79,
"learning_rate": 3.5094832925416355e-05,
"loss": 0.3274,
"step": 2304
},
{
"epoch": 0.79,
"learning_rate": 3.5082062173873316e-05,
"loss": 0.3241,
"step": 2305
},
{
"epoch": 0.79,
"learning_rate": 3.506928827968564e-05,
"loss": 0.3192,
"step": 2306
},
{
"epoch": 0.79,
"learning_rate": 3.5056511246835025e-05,
"loss": 0.328,
"step": 2307
},
{
"epoch": 0.79,
"learning_rate": 3.5043731079304174e-05,
"loss": 0.33,
"step": 2308
},
{
"epoch": 0.79,
"learning_rate": 3.503094778107673e-05,
"loss": 0.368,
"step": 2309
},
{
"epoch": 0.79,
"learning_rate": 3.501816135613733e-05,
"loss": 0.3417,
"step": 2310
},
{
"epoch": 0.79,
"learning_rate": 3.500537180847159e-05,
"loss": 0.3119,
"step": 2311
},
{
"epoch": 0.79,
"learning_rate": 3.4992579142066104e-05,
"loss": 0.356,
"step": 2312
},
{
"epoch": 0.79,
"learning_rate": 3.497978336090843e-05,
"loss": 0.373,
"step": 2313
},
{
"epoch": 0.79,
"learning_rate": 3.496698446898709e-05,
"loss": 0.3771,
"step": 2314
},
{
"epoch": 0.79,
"learning_rate": 3.495418247029158e-05,
"loss": 0.3233,
"step": 2315
},
{
"epoch": 0.79,
"learning_rate": 3.494137736881237e-05,
"loss": 0.3339,
"step": 2316
},
{
"epoch": 0.79,
"learning_rate": 3.492856916854089e-05,
"loss": 0.341,
"step": 2317
},
{
"epoch": 0.79,
"learning_rate": 3.491575787346954e-05,
"loss": 0.3383,
"step": 2318
},
{
"epoch": 0.79,
"learning_rate": 3.4902943487591696e-05,
"loss": 0.3593,
"step": 2319
},
{
"epoch": 0.79,
"learning_rate": 3.489012601490167e-05,
"loss": 0.3579,
"step": 2320
},
{
"epoch": 0.79,
"learning_rate": 3.487730545939477e-05,
"loss": 0.4045,
"step": 2321
},
{
"epoch": 0.79,
"learning_rate": 3.486448182506723e-05,
"loss": 0.3094,
"step": 2322
},
{
"epoch": 0.79,
"learning_rate": 3.485165511591627e-05,
"loss": 0.3625,
"step": 2323
},
{
"epoch": 0.79,
"learning_rate": 3.483882533594005e-05,
"loss": 0.3524,
"step": 2324
},
{
"epoch": 0.79,
"learning_rate": 3.48259924891377e-05,
"loss": 0.3285,
"step": 2325
},
{
"epoch": 0.79,
"learning_rate": 3.481315657950931e-05,
"loss": 0.3239,
"step": 2326
},
{
"epoch": 0.79,
"learning_rate": 3.48003176110559e-05,
"loss": 0.3651,
"step": 2327
},
{
"epoch": 0.79,
"learning_rate": 3.478747558777949e-05,
"loss": 0.3451,
"step": 2328
},
{
"epoch": 0.79,
"learning_rate": 3.4774630513682995e-05,
"loss": 0.3242,
"step": 2329
},
{
"epoch": 0.79,
"learning_rate": 3.476178239277031e-05,
"loss": 0.3192,
"step": 2330
},
{
"epoch": 0.8,
"learning_rate": 3.4748931229046295e-05,
"loss": 0.3222,
"step": 2331
},
{
"epoch": 0.8,
"learning_rate": 3.4736077026516724e-05,
"loss": 0.3731,
"step": 2332
},
{
"epoch": 0.8,
"learning_rate": 3.472321978918835e-05,
"loss": 0.3818,
"step": 2333
},
{
"epoch": 0.8,
"learning_rate": 3.471035952106885e-05,
"loss": 0.3541,
"step": 2334
},
{
"epoch": 0.8,
"learning_rate": 3.469749622616686e-05,
"loss": 0.3525,
"step": 2335
},
{
"epoch": 0.8,
"learning_rate": 3.468462990849195e-05,
"loss": 0.3241,
"step": 2336
},
{
"epoch": 0.8,
"learning_rate": 3.4671760572054624e-05,
"loss": 0.3313,
"step": 2337
},
{
"epoch": 0.8,
"learning_rate": 3.465888822086636e-05,
"loss": 0.3377,
"step": 2338
},
{
"epoch": 0.8,
"learning_rate": 3.4646012858939526e-05,
"loss": 0.306,
"step": 2339
},
{
"epoch": 0.8,
"learning_rate": 3.463313449028748e-05,
"loss": 0.3329,
"step": 2340
},
{
"epoch": 0.8,
"learning_rate": 3.462025311892449e-05,
"loss": 0.3022,
"step": 2341
},
{
"epoch": 0.8,
"learning_rate": 3.460736874886576e-05,
"loss": 0.3242,
"step": 2342
},
{
"epoch": 0.8,
"learning_rate": 3.4594481384127426e-05,
"loss": 0.3285,
"step": 2343
},
{
"epoch": 0.8,
"learning_rate": 3.458159102872658e-05,
"loss": 0.3784,
"step": 2344
},
{
"epoch": 0.8,
"learning_rate": 3.456869768668121e-05,
"loss": 0.3378,
"step": 2345
},
{
"epoch": 0.8,
"learning_rate": 3.455580136201026e-05,
"loss": 0.3229,
"step": 2346
},
{
"epoch": 0.8,
"learning_rate": 3.454290205873362e-05,
"loss": 0.3602,
"step": 2347
},
{
"epoch": 0.8,
"learning_rate": 3.452999978087206e-05,
"loss": 0.3658,
"step": 2348
},
{
"epoch": 0.8,
"learning_rate": 3.451709453244732e-05,
"loss": 0.3349,
"step": 2349
},
{
"epoch": 0.8,
"learning_rate": 3.450418631748203e-05,
"loss": 0.3026,
"step": 2350
},
{
"epoch": 0.8,
"learning_rate": 3.44912751399998e-05,
"loss": 0.3471,
"step": 2351
},
{
"epoch": 0.8,
"learning_rate": 3.44783610040251e-05,
"loss": 0.3114,
"step": 2352
},
{
"epoch": 0.8,
"learning_rate": 3.446544391358336e-05,
"loss": 0.3059,
"step": 2353
},
{
"epoch": 0.8,
"learning_rate": 3.445252387270092e-05,
"loss": 0.36,
"step": 2354
},
{
"epoch": 0.8,
"learning_rate": 3.443960088540504e-05,
"loss": 0.3533,
"step": 2355
},
{
"epoch": 0.8,
"learning_rate": 3.44266749557239e-05,
"loss": 0.2955,
"step": 2356
},
{
"epoch": 0.8,
"learning_rate": 3.44137460876866e-05,
"loss": 0.3348,
"step": 2357
},
{
"epoch": 0.8,
"learning_rate": 3.4400814285323146e-05,
"loss": 0.3277,
"step": 2358
},
{
"epoch": 0.8,
"learning_rate": 3.438787955266447e-05,
"loss": 0.3425,
"step": 2359
},
{
"epoch": 0.81,
"learning_rate": 3.437494189374242e-05,
"loss": 0.3535,
"step": 2360
},
{
"epoch": 0.81,
"learning_rate": 3.4362001312589733e-05,
"loss": 0.3169,
"step": 2361
},
{
"epoch": 0.81,
"learning_rate": 3.4349057813240095e-05,
"loss": 0.3216,
"step": 2362
},
{
"epoch": 0.81,
"learning_rate": 3.433611139972804e-05,
"loss": 0.3685,
"step": 2363
},
{
"epoch": 0.81,
"learning_rate": 3.43231620760891e-05,
"loss": 0.3281,
"step": 2364
},
{
"epoch": 0.81,
"learning_rate": 3.4310209846359624e-05,
"loss": 0.338,
"step": 2365
},
{
"epoch": 0.81,
"learning_rate": 3.429725471457692e-05,
"loss": 0.3174,
"step": 2366
},
{
"epoch": 0.81,
"learning_rate": 3.42842966847792e-05,
"loss": 0.31,
"step": 2367
},
{
"epoch": 0.81,
"learning_rate": 3.4271335761005544e-05,
"loss": 0.3218,
"step": 2368
},
{
"epoch": 0.81,
"learning_rate": 3.425837194729597e-05,
"loss": 0.3316,
"step": 2369
},
{
"epoch": 0.81,
"learning_rate": 3.424540524769139e-05,
"loss": 0.3462,
"step": 2370
},
{
"epoch": 0.81,
"learning_rate": 3.4232435666233585e-05,
"loss": 0.3229,
"step": 2371
},
{
"epoch": 0.81,
"learning_rate": 3.421946320696528e-05,
"loss": 0.3114,
"step": 2372
},
{
"epoch": 0.81,
"learning_rate": 3.420648787393007e-05,
"loss": 0.2831,
"step": 2373
},
{
"epoch": 0.81,
"learning_rate": 3.419350967117245e-05,
"loss": 0.3115,
"step": 2374
},
{
"epoch": 0.81,
"learning_rate": 3.418052860273781e-05,
"loss": 0.3927,
"step": 2375
},
{
"epoch": 0.81,
"learning_rate": 3.416754467267242e-05,
"loss": 0.3468,
"step": 2376
},
{
"epoch": 0.81,
"learning_rate": 3.415455788502349e-05,
"loss": 0.317,
"step": 2377
},
{
"epoch": 0.81,
"learning_rate": 3.414156824383906e-05,
"loss": 0.2958,
"step": 2378
},
{
"epoch": 0.81,
"learning_rate": 3.412857575316809e-05,
"loss": 0.4387,
"step": 2379
},
{
"epoch": 0.81,
"learning_rate": 3.4115580417060423e-05,
"loss": 0.2938,
"step": 2380
},
{
"epoch": 0.81,
"learning_rate": 3.41025822395668e-05,
"loss": 0.3542,
"step": 2381
},
{
"epoch": 0.81,
"learning_rate": 3.408958122473883e-05,
"loss": 0.3658,
"step": 2382
},
{
"epoch": 0.81,
"learning_rate": 3.4076577376629014e-05,
"loss": 0.3804,
"step": 2383
},
{
"epoch": 0.81,
"learning_rate": 3.406357069929074e-05,
"loss": 0.2952,
"step": 2384
},
{
"epoch": 0.81,
"learning_rate": 3.405056119677827e-05,
"loss": 0.3159,
"step": 2385
},
{
"epoch": 0.81,
"learning_rate": 3.4037548873146755e-05,
"loss": 0.3347,
"step": 2386
},
{
"epoch": 0.81,
"learning_rate": 3.402453373245222e-05,
"loss": 0.3441,
"step": 2387
},
{
"epoch": 0.81,
"learning_rate": 3.401151577875157e-05,
"loss": 0.35,
"step": 2388
},
{
"epoch": 0.82,
"learning_rate": 3.3998495016102574e-05,
"loss": 0.3159,
"step": 2389
},
{
"epoch": 0.82,
"learning_rate": 3.398547144856391e-05,
"loss": 0.3628,
"step": 2390
},
{
"epoch": 0.82,
"learning_rate": 3.3972445080195104e-05,
"loss": 0.3371,
"step": 2391
},
{
"epoch": 0.82,
"learning_rate": 3.395941591505655e-05,
"loss": 0.3967,
"step": 2392
},
{
"epoch": 0.82,
"learning_rate": 3.394638395720953e-05,
"loss": 0.3442,
"step": 2393
},
{
"epoch": 0.82,
"learning_rate": 3.393334921071618e-05,
"loss": 0.3422,
"step": 2394
},
{
"epoch": 0.82,
"learning_rate": 3.392031167963953e-05,
"loss": 0.3124,
"step": 2395
},
{
"epoch": 0.82,
"learning_rate": 3.3907271368043466e-05,
"loss": 0.3706,
"step": 2396
},
{
"epoch": 0.82,
"learning_rate": 3.3894228279992726e-05,
"loss": 0.3624,
"step": 2397
},
{
"epoch": 0.82,
"learning_rate": 3.388118241955293e-05,
"loss": 0.3233,
"step": 2398
},
{
"epoch": 0.82,
"learning_rate": 3.386813379079056e-05,
"loss": 0.3375,
"step": 2399
},
{
"epoch": 0.82,
"learning_rate": 3.385508239777295e-05,
"loss": 0.3056,
"step": 2400
},
{
"epoch": 0.82,
"learning_rate": 3.3842028244568324e-05,
"loss": 0.335,
"step": 2401
},
{
"epoch": 0.82,
"learning_rate": 3.3828971335245725e-05,
"loss": 0.3207,
"step": 2402
},
{
"epoch": 0.82,
"learning_rate": 3.381591167387509e-05,
"loss": 0.362,
"step": 2403
},
{
"epoch": 0.82,
"learning_rate": 3.3802849264527205e-05,
"loss": 0.3676,
"step": 2404
},
{
"epoch": 0.82,
"learning_rate": 3.378978411127369e-05,
"loss": 0.3744,
"step": 2405
},
{
"epoch": 0.82,
"learning_rate": 3.377671621818706e-05,
"loss": 0.3508,
"step": 2406
},
{
"epoch": 0.82,
"learning_rate": 3.376364558934064e-05,
"loss": 0.3862,
"step": 2407
},
{
"epoch": 0.82,
"learning_rate": 3.375057222880865e-05,
"loss": 0.3436,
"step": 2408
},
{
"epoch": 0.82,
"learning_rate": 3.373749614066613e-05,
"loss": 0.3004,
"step": 2409
},
{
"epoch": 0.82,
"learning_rate": 3.372441732898899e-05,
"loss": 0.3113,
"step": 2410
},
{
"epoch": 0.82,
"learning_rate": 3.3711335797853977e-05,
"loss": 0.3483,
"step": 2411
},
{
"epoch": 0.82,
"learning_rate": 3.369825155133869e-05,
"loss": 0.2809,
"step": 2412
},
{
"epoch": 0.82,
"learning_rate": 3.3685164593521565e-05,
"loss": 0.3241,
"step": 2413
},
{
"epoch": 0.82,
"learning_rate": 3.3672074928481916e-05,
"loss": 0.4024,
"step": 2414
},
{
"epoch": 0.82,
"learning_rate": 3.365898256029985e-05,
"loss": 0.311,
"step": 2415
},
{
"epoch": 0.82,
"learning_rate": 3.364588749305637e-05,
"loss": 0.3243,
"step": 2416
},
{
"epoch": 0.82,
"learning_rate": 3.363278973083327e-05,
"loss": 0.3912,
"step": 2417
},
{
"epoch": 0.82,
"learning_rate": 3.3619689277713226e-05,
"loss": 0.3064,
"step": 2418
},
{
"epoch": 0.83,
"learning_rate": 3.360658613777972e-05,
"loss": 0.3441,
"step": 2419
},
{
"epoch": 0.83,
"learning_rate": 3.3593480315117095e-05,
"loss": 0.363,
"step": 2420
},
{
"epoch": 0.83,
"learning_rate": 3.358037181381052e-05,
"loss": 0.3244,
"step": 2421
},
{
"epoch": 0.83,
"learning_rate": 3.3567260637946e-05,
"loss": 0.3094,
"step": 2422
},
{
"epoch": 0.83,
"learning_rate": 3.355414679161037e-05,
"loss": 0.3072,
"step": 2423
},
{
"epoch": 0.83,
"learning_rate": 3.3541030278891315e-05,
"loss": 0.3098,
"step": 2424
},
{
"epoch": 0.83,
"learning_rate": 3.352791110387733e-05,
"loss": 0.3047,
"step": 2425
},
{
"epoch": 0.83,
"learning_rate": 3.351478927065774e-05,
"loss": 0.3906,
"step": 2426
},
{
"epoch": 0.83,
"learning_rate": 3.350166478332272e-05,
"loss": 0.2923,
"step": 2427
},
{
"epoch": 0.83,
"learning_rate": 3.348853764596324e-05,
"loss": 0.348,
"step": 2428
},
{
"epoch": 0.83,
"learning_rate": 3.347540786267113e-05,
"loss": 0.3156,
"step": 2429
},
{
"epoch": 0.83,
"learning_rate": 3.346227543753903e-05,
"loss": 0.3241,
"step": 2430
},
{
"epoch": 0.83,
"learning_rate": 3.344914037466039e-05,
"loss": 0.347,
"step": 2431
},
{
"epoch": 0.83,
"learning_rate": 3.343600267812951e-05,
"loss": 0.2728,
"step": 2432
},
{
"epoch": 0.83,
"learning_rate": 3.342286235204147e-05,
"loss": 0.3629,
"step": 2433
},
{
"epoch": 0.83,
"learning_rate": 3.340971940049223e-05,
"loss": 0.347,
"step": 2434
},
{
"epoch": 0.83,
"learning_rate": 3.339657382757852e-05,
"loss": 0.3655,
"step": 2435
},
{
"epoch": 0.83,
"learning_rate": 3.3383425637397895e-05,
"loss": 0.2947,
"step": 2436
},
{
"epoch": 0.83,
"learning_rate": 3.337027483404873e-05,
"loss": 0.3316,
"step": 2437
},
{
"epoch": 0.83,
"learning_rate": 3.335712142163022e-05,
"loss": 0.3272,
"step": 2438
},
{
"epoch": 0.83,
"learning_rate": 3.334396540424238e-05,
"loss": 0.2951,
"step": 2439
},
{
"epoch": 0.83,
"learning_rate": 3.3330806785986015e-05,
"loss": 0.3167,
"step": 2440
},
{
"epoch": 0.83,
"learning_rate": 3.3317645570962755e-05,
"loss": 0.2966,
"step": 2441
},
{
"epoch": 0.83,
"learning_rate": 3.330448176327503e-05,
"loss": 0.3821,
"step": 2442
},
{
"epoch": 0.83,
"learning_rate": 3.32913153670261e-05,
"loss": 0.3387,
"step": 2443
},
{
"epoch": 0.83,
"learning_rate": 3.327814638632e-05,
"loss": 0.3253,
"step": 2444
},
{
"epoch": 0.83,
"learning_rate": 3.3264974825261595e-05,
"loss": 0.3018,
"step": 2445
},
{
"epoch": 0.83,
"learning_rate": 3.3251800687956554e-05,
"loss": 0.3385,
"step": 2446
},
{
"epoch": 0.83,
"learning_rate": 3.323862397851133e-05,
"loss": 0.3261,
"step": 2447
},
{
"epoch": 0.84,
"learning_rate": 3.32254447010332e-05,
"loss": 0.3297,
"step": 2448
},
{
"epoch": 0.84,
"learning_rate": 3.321226285963022e-05,
"loss": 0.3122,
"step": 2449
},
{
"epoch": 0.84,
"learning_rate": 3.319907845841126e-05,
"loss": 0.3403,
"step": 2450
},
{
"epoch": 0.84,
"learning_rate": 3.318589150148599e-05,
"loss": 0.3369,
"step": 2451
},
{
"epoch": 0.84,
"learning_rate": 3.317270199296486e-05,
"loss": 0.3463,
"step": 2452
},
{
"epoch": 0.84,
"learning_rate": 3.315950993695913e-05,
"loss": 0.3533,
"step": 2453
},
{
"epoch": 0.84,
"learning_rate": 3.314631533758086e-05,
"loss": 0.2919,
"step": 2454
},
{
"epoch": 0.84,
"learning_rate": 3.3133118198942885e-05,
"loss": 0.3271,
"step": 2455
},
{
"epoch": 0.84,
"learning_rate": 3.311991852515883e-05,
"loss": 0.3338,
"step": 2456
},
{
"epoch": 0.84,
"learning_rate": 3.3106716320343134e-05,
"loss": 0.3612,
"step": 2457
},
{
"epoch": 0.84,
"learning_rate": 3.3093511588611e-05,
"loss": 0.3367,
"step": 2458
},
{
"epoch": 0.84,
"learning_rate": 3.308030433407844e-05,
"loss": 0.3518,
"step": 2459
},
{
"epoch": 0.84,
"learning_rate": 3.306709456086223e-05,
"loss": 0.3515,
"step": 2460
},
{
"epoch": 0.84,
"learning_rate": 3.305388227307996e-05,
"loss": 0.3571,
"step": 2461
},
{
"epoch": 0.84,
"learning_rate": 3.304066747484997e-05,
"loss": 0.3413,
"step": 2462
},
{
"epoch": 0.84,
"learning_rate": 3.302745017029139e-05,
"loss": 0.3144,
"step": 2463
},
{
"epoch": 0.84,
"learning_rate": 3.301423036352417e-05,
"loss": 0.3849,
"step": 2464
},
{
"epoch": 0.84,
"learning_rate": 3.300100805866898e-05,
"loss": 0.2997,
"step": 2465
},
{
"epoch": 0.84,
"learning_rate": 3.298778325984732e-05,
"loss": 0.3604,
"step": 2466
},
{
"epoch": 0.84,
"learning_rate": 3.297455597118144e-05,
"loss": 0.3408,
"step": 2467
},
{
"epoch": 0.84,
"learning_rate": 3.2961326196794367e-05,
"loss": 0.3416,
"step": 2468
},
{
"epoch": 0.84,
"learning_rate": 3.294809394080992e-05,
"loss": 0.3121,
"step": 2469
},
{
"epoch": 0.84,
"learning_rate": 3.293485920735266e-05,
"loss": 0.286,
"step": 2470
},
{
"epoch": 0.84,
"learning_rate": 3.2921622000547956e-05,
"loss": 0.326,
"step": 2471
},
{
"epoch": 0.84,
"learning_rate": 3.2908382324521926e-05,
"loss": 0.3383,
"step": 2472
},
{
"epoch": 0.84,
"learning_rate": 3.289514018340148e-05,
"loss": 0.3163,
"step": 2473
},
{
"epoch": 0.84,
"learning_rate": 3.288189558131425e-05,
"loss": 0.3483,
"step": 2474
},
{
"epoch": 0.84,
"learning_rate": 3.286864852238868e-05,
"loss": 0.3274,
"step": 2475
},
{
"epoch": 0.84,
"learning_rate": 3.2855399010753965e-05,
"loss": 0.3063,
"step": 2476
},
{
"epoch": 0.85,
"learning_rate": 3.284214705054006e-05,
"loss": 0.3557,
"step": 2477
},
{
"epoch": 0.85,
"learning_rate": 3.28288926458777e-05,
"loss": 0.3595,
"step": 2478
},
{
"epoch": 0.85,
"learning_rate": 3.2815635800898356e-05,
"loss": 0.326,
"step": 2479
},
{
"epoch": 0.85,
"learning_rate": 3.2802376519734284e-05,
"loss": 0.336,
"step": 2480
},
{
"epoch": 0.85,
"learning_rate": 3.2789114806518486e-05,
"loss": 0.3314,
"step": 2481
},
{
"epoch": 0.85,
"learning_rate": 3.277585066538471e-05,
"loss": 0.3388,
"step": 2482
},
{
"epoch": 0.85,
"learning_rate": 3.27625841004675e-05,
"loss": 0.3005,
"step": 2483
},
{
"epoch": 0.85,
"learning_rate": 3.27493151159021e-05,
"loss": 0.2823,
"step": 2484
},
{
"epoch": 0.85,
"learning_rate": 3.273604371582456e-05,
"loss": 0.3331,
"step": 2485
},
{
"epoch": 0.85,
"learning_rate": 3.272276990437167e-05,
"loss": 0.3432,
"step": 2486
},
{
"epoch": 0.85,
"learning_rate": 3.2709493685680947e-05,
"loss": 0.2664,
"step": 2487
},
{
"epoch": 0.85,
"learning_rate": 3.269621506389069e-05,
"loss": 0.3105,
"step": 2488
},
{
"epoch": 0.85,
"learning_rate": 3.2682934043139915e-05,
"loss": 0.3146,
"step": 2489
},
{
"epoch": 0.85,
"learning_rate": 3.2669650627568405e-05,
"loss": 0.3486,
"step": 2490
},
{
"epoch": 0.85,
"learning_rate": 3.26563648213167e-05,
"loss": 0.2999,
"step": 2491
},
{
"epoch": 0.85,
"learning_rate": 3.264307662852606e-05,
"loss": 0.3232,
"step": 2492
},
{
"epoch": 0.85,
"learning_rate": 3.262978605333852e-05,
"loss": 0.3146,
"step": 2493
},
{
"epoch": 0.85,
"learning_rate": 3.261649309989682e-05,
"loss": 0.3246,
"step": 2494
},
{
"epoch": 0.85,
"learning_rate": 3.260319777234446e-05,
"loss": 0.3733,
"step": 2495
},
{
"epoch": 0.85,
"learning_rate": 3.2589900074825696e-05,
"loss": 0.3411,
"step": 2496
},
{
"epoch": 0.85,
"learning_rate": 3.25766000114855e-05,
"loss": 0.3399,
"step": 2497
},
{
"epoch": 0.85,
"learning_rate": 3.256329758646958e-05,
"loss": 0.2965,
"step": 2498
},
{
"epoch": 0.85,
"learning_rate": 3.2549992803924394e-05,
"loss": 0.3338,
"step": 2499
},
{
"epoch": 0.85,
"learning_rate": 3.253668566799714e-05,
"loss": 0.4007,
"step": 2500
},
{
"epoch": 0.85,
"learning_rate": 3.252337618283573e-05,
"loss": 0.3264,
"step": 2501
},
{
"epoch": 0.85,
"learning_rate": 3.251006435258882e-05,
"loss": 0.3374,
"step": 2502
},
{
"epoch": 0.85,
"learning_rate": 3.249675018140578e-05,
"loss": 0.3395,
"step": 2503
},
{
"epoch": 0.85,
"learning_rate": 3.2483433673436755e-05,
"loss": 0.3551,
"step": 2504
},
{
"epoch": 0.85,
"learning_rate": 3.247011483283255e-05,
"loss": 0.3364,
"step": 2505
},
{
"epoch": 0.85,
"learning_rate": 3.245679366374477e-05,
"loss": 0.3158,
"step": 2506
},
{
"epoch": 0.86,
"learning_rate": 3.244347017032569e-05,
"loss": 0.3424,
"step": 2507
},
{
"epoch": 0.86,
"learning_rate": 3.243014435672833e-05,
"loss": 0.298,
"step": 2508
},
{
"epoch": 0.86,
"learning_rate": 3.241681622710644e-05,
"loss": 0.3437,
"step": 2509
},
{
"epoch": 0.86,
"learning_rate": 3.240348578561449e-05,
"loss": 0.326,
"step": 2510
},
{
"epoch": 0.86,
"learning_rate": 3.239015303640765e-05,
"loss": 0.3219,
"step": 2511
},
{
"epoch": 0.86,
"learning_rate": 3.237681798364183e-05,
"loss": 0.3406,
"step": 2512
},
{
"epoch": 0.86,
"learning_rate": 3.236348063147366e-05,
"loss": 0.3287,
"step": 2513
},
{
"epoch": 0.86,
"learning_rate": 3.2350140984060486e-05,
"loss": 0.3456,
"step": 2514
},
{
"epoch": 0.86,
"learning_rate": 3.233679904556034e-05,
"loss": 0.3549,
"step": 2515
},
{
"epoch": 0.86,
"learning_rate": 3.2323454820132005e-05,
"loss": 0.3302,
"step": 2516
},
{
"epoch": 0.86,
"learning_rate": 3.231010831193497e-05,
"loss": 0.3247,
"step": 2517
},
{
"epoch": 0.86,
"learning_rate": 3.229675952512942e-05,
"loss": 0.366,
"step": 2518
},
{
"epoch": 0.86,
"learning_rate": 3.228340846387626e-05,
"loss": 0.3068,
"step": 2519
},
{
"epoch": 0.86,
"learning_rate": 3.2270055132337105e-05,
"loss": 0.2877,
"step": 2520
},
{
"epoch": 0.86,
"learning_rate": 3.225669953467427e-05,
"loss": 0.3339,
"step": 2521
},
{
"epoch": 0.86,
"learning_rate": 3.22433416750508e-05,
"loss": 0.3223,
"step": 2522
},
{
"epoch": 0.86,
"learning_rate": 3.2229981557630395e-05,
"loss": 0.3371,
"step": 2523
},
{
"epoch": 0.86,
"learning_rate": 3.2216619186577524e-05,
"loss": 0.3354,
"step": 2524
},
{
"epoch": 0.86,
"learning_rate": 3.2203254566057304e-05,
"loss": 0.3292,
"step": 2525
},
{
"epoch": 0.86,
"learning_rate": 3.218988770023558e-05,
"loss": 0.2931,
"step": 2526
},
{
"epoch": 0.86,
"learning_rate": 3.217651859327889e-05,
"loss": 0.3547,
"step": 2527
},
{
"epoch": 0.86,
"learning_rate": 3.2163147249354484e-05,
"loss": 0.2943,
"step": 2528
},
{
"epoch": 0.86,
"learning_rate": 3.214977367263029e-05,
"loss": 0.3219,
"step": 2529
},
{
"epoch": 0.86,
"learning_rate": 3.2136397867274925e-05,
"loss": 0.3779,
"step": 2530
},
{
"epoch": 0.86,
"learning_rate": 3.212301983745774e-05,
"loss": 0.3275,
"step": 2531
},
{
"epoch": 0.86,
"learning_rate": 3.2109639587348734e-05,
"loss": 0.3785,
"step": 2532
},
{
"epoch": 0.86,
"learning_rate": 3.2096257121118626e-05,
"loss": 0.3111,
"step": 2533
},
{
"epoch": 0.86,
"learning_rate": 3.208287244293882e-05,
"loss": 0.3575,
"step": 2534
},
{
"epoch": 0.86,
"learning_rate": 3.20694855569814e-05,
"loss": 0.3208,
"step": 2535
},
{
"epoch": 0.87,
"learning_rate": 3.2056096467419156e-05,
"loss": 0.3246,
"step": 2536
},
{
"epoch": 0.87,
"learning_rate": 3.204270517842555e-05,
"loss": 0.3424,
"step": 2537
},
{
"epoch": 0.87,
"learning_rate": 3.202931169417474e-05,
"loss": 0.3536,
"step": 2538
},
{
"epoch": 0.87,
"learning_rate": 3.2015916018841547e-05,
"loss": 0.3487,
"step": 2539
},
{
"epoch": 0.87,
"learning_rate": 3.2002518156601504e-05,
"loss": 0.3126,
"step": 2540
},
{
"epoch": 0.87,
"learning_rate": 3.198911811163081e-05,
"loss": 0.3041,
"step": 2541
},
{
"epoch": 0.87,
"learning_rate": 3.197571588810634e-05,
"loss": 0.3466,
"step": 2542
},
{
"epoch": 0.87,
"learning_rate": 3.196231149020566e-05,
"loss": 0.333,
"step": 2543
},
{
"epoch": 0.87,
"learning_rate": 3.1948904922107015e-05,
"loss": 0.362,
"step": 2544
},
{
"epoch": 0.87,
"learning_rate": 3.193549618798931e-05,
"loss": 0.3174,
"step": 2545
},
{
"epoch": 0.87,
"learning_rate": 3.192208529203214e-05,
"loss": 0.3694,
"step": 2546
},
{
"epoch": 0.87,
"learning_rate": 3.190867223841576e-05,
"loss": 0.3291,
"step": 2547
},
{
"epoch": 0.87,
"learning_rate": 3.189525703132112e-05,
"loss": 0.3488,
"step": 2548
},
{
"epoch": 0.87,
"learning_rate": 3.1881839674929815e-05,
"loss": 0.3251,
"step": 2549
},
{
"epoch": 0.87,
"learning_rate": 3.186842017342413e-05,
"loss": 0.3422,
"step": 2550
},
{
"epoch": 0.87,
"learning_rate": 3.185499853098701e-05,
"loss": 0.3471,
"step": 2551
},
{
"epoch": 0.87,
"learning_rate": 3.1841574751802076e-05,
"loss": 0.3071,
"step": 2552
},
{
"epoch": 0.87,
"learning_rate": 3.18281488400536e-05,
"loss": 0.3242,
"step": 2553
},
{
"epoch": 0.87,
"learning_rate": 3.181472079992651e-05,
"loss": 0.2937,
"step": 2554
},
{
"epoch": 0.87,
"learning_rate": 3.180129063560644e-05,
"loss": 0.3341,
"step": 2555
},
{
"epoch": 0.87,
"learning_rate": 3.1787858351279653e-05,
"loss": 0.3255,
"step": 2556
},
{
"epoch": 0.87,
"learning_rate": 3.177442395113308e-05,
"loss": 0.3252,
"step": 2557
},
{
"epoch": 0.87,
"learning_rate": 3.17609874393543e-05,
"loss": 0.3051,
"step": 2558
},
{
"epoch": 0.87,
"learning_rate": 3.174754882013158e-05,
"loss": 0.37,
"step": 2559
},
{
"epoch": 0.87,
"learning_rate": 3.1734108097653805e-05,
"loss": 0.3032,
"step": 2560
},
{
"epoch": 0.87,
"learning_rate": 3.172066527611055e-05,
"loss": 0.3378,
"step": 2561
},
{
"epoch": 0.87,
"learning_rate": 3.170722035969203e-05,
"loss": 0.2864,
"step": 2562
},
{
"epoch": 0.87,
"learning_rate": 3.16937733525891e-05,
"loss": 0.3531,
"step": 2563
},
{
"epoch": 0.87,
"learning_rate": 3.16803242589933e-05,
"loss": 0.3099,
"step": 2564
},
{
"epoch": 0.88,
"learning_rate": 3.1666873083096784e-05,
"loss": 0.3104,
"step": 2565
},
{
"epoch": 0.88,
"learning_rate": 3.165341982909239e-05,
"loss": 0.2914,
"step": 2566
},
{
"epoch": 0.88,
"learning_rate": 3.1639964501173556e-05,
"loss": 0.3499,
"step": 2567
},
{
"epoch": 0.88,
"learning_rate": 3.1626507103534415e-05,
"loss": 0.3349,
"step": 2568
},
{
"epoch": 0.88,
"learning_rate": 3.161304764036973e-05,
"loss": 0.3913,
"step": 2569
},
{
"epoch": 0.88,
"learning_rate": 3.159958611587489e-05,
"loss": 0.32,
"step": 2570
},
{
"epoch": 0.88,
"learning_rate": 3.158612253424595e-05,
"loss": 0.336,
"step": 2571
},
{
"epoch": 0.88,
"learning_rate": 3.15726568996796e-05,
"loss": 0.3859,
"step": 2572
},
{
"epoch": 0.88,
"learning_rate": 3.1559189216373154e-05,
"loss": 0.3541,
"step": 2573
},
{
"epoch": 0.88,
"learning_rate": 3.1545719488524576e-05,
"loss": 0.3436,
"step": 2574
},
{
"epoch": 0.88,
"learning_rate": 3.1532247720332476e-05,
"loss": 0.3394,
"step": 2575
},
{
"epoch": 0.88,
"learning_rate": 3.15187739159961e-05,
"loss": 0.3689,
"step": 2576
},
{
"epoch": 0.88,
"learning_rate": 3.15052980797153e-05,
"loss": 0.3324,
"step": 2577
},
{
"epoch": 0.88,
"learning_rate": 3.14918202156906e-05,
"loss": 0.327,
"step": 2578
},
{
"epoch": 0.88,
"learning_rate": 3.147834032812314e-05,
"loss": 0.3707,
"step": 2579
},
{
"epoch": 0.88,
"learning_rate": 3.146485842121466e-05,
"loss": 0.3532,
"step": 2580
},
{
"epoch": 0.88,
"learning_rate": 3.1451374499167586e-05,
"loss": 0.3106,
"step": 2581
},
{
"epoch": 0.88,
"learning_rate": 3.143788856618495e-05,
"loss": 0.3437,
"step": 2582
},
{
"epoch": 0.88,
"learning_rate": 3.142440062647038e-05,
"loss": 0.3458,
"step": 2583
},
{
"epoch": 0.88,
"learning_rate": 3.141091068422818e-05,
"loss": 0.3583,
"step": 2584
},
{
"epoch": 0.88,
"learning_rate": 3.139741874366323e-05,
"loss": 0.3567,
"step": 2585
},
{
"epoch": 0.88,
"learning_rate": 3.138392480898107e-05,
"loss": 0.2971,
"step": 2586
},
{
"epoch": 0.88,
"learning_rate": 3.137042888438785e-05,
"loss": 0.358,
"step": 2587
},
{
"epoch": 0.88,
"learning_rate": 3.135693097409033e-05,
"loss": 0.3674,
"step": 2588
},
{
"epoch": 0.88,
"learning_rate": 3.134343108229589e-05,
"loss": 0.3379,
"step": 2589
},
{
"epoch": 0.88,
"learning_rate": 3.1329929213212564e-05,
"loss": 0.3111,
"step": 2590
},
{
"epoch": 0.88,
"learning_rate": 3.131642537104894e-05,
"loss": 0.3186,
"step": 2591
},
{
"epoch": 0.88,
"learning_rate": 3.130291956001427e-05,
"loss": 0.3483,
"step": 2592
},
{
"epoch": 0.88,
"learning_rate": 3.128941178431839e-05,
"loss": 0.3157,
"step": 2593
},
{
"epoch": 0.89,
"learning_rate": 3.127590204817178e-05,
"loss": 0.3163,
"step": 2594
},
{
"epoch": 0.89,
"learning_rate": 3.126239035578551e-05,
"loss": 0.354,
"step": 2595
},
{
"epoch": 0.89,
"learning_rate": 3.124887671137125e-05,
"loss": 0.3248,
"step": 2596
},
{
"epoch": 0.89,
"learning_rate": 3.1235361119141305e-05,
"loss": 0.3112,
"step": 2597
},
{
"epoch": 0.89,
"learning_rate": 3.1221843583308566e-05,
"loss": 0.33,
"step": 2598
},
{
"epoch": 0.89,
"learning_rate": 3.120832410808654e-05,
"loss": 0.2996,
"step": 2599
},
{
"epoch": 0.89,
"learning_rate": 3.119480269768933e-05,
"loss": 0.3169,
"step": 2600
},
{
"epoch": 0.89,
"learning_rate": 3.118127935633166e-05,
"loss": 0.3376,
"step": 2601
},
{
"epoch": 0.89,
"learning_rate": 3.116775408822882e-05,
"loss": 0.325,
"step": 2602
},
{
"epoch": 0.89,
"learning_rate": 3.1154226897596754e-05,
"loss": 0.3249,
"step": 2603
},
{
"epoch": 0.89,
"learning_rate": 3.1140697788651964e-05,
"loss": 0.2811,
"step": 2604
},
{
"epoch": 0.89,
"learning_rate": 3.1127166765611556e-05,
"loss": 0.3094,
"step": 2605
},
{
"epoch": 0.89,
"learning_rate": 3.111363383269323e-05,
"loss": 0.3301,
"step": 2606
},
{
"epoch": 0.89,
"learning_rate": 3.110009899411532e-05,
"loss": 0.3442,
"step": 2607
},
{
"epoch": 0.89,
"learning_rate": 3.10865622540967e-05,
"loss": 0.3159,
"step": 2608
},
{
"epoch": 0.89,
"learning_rate": 3.107302361685686e-05,
"loss": 0.2932,
"step": 2609
},
{
"epoch": 0.89,
"learning_rate": 3.105948308661589e-05,
"loss": 0.3102,
"step": 2610
},
{
"epoch": 0.89,
"learning_rate": 3.104594066759446e-05,
"loss": 0.2946,
"step": 2611
},
{
"epoch": 0.89,
"learning_rate": 3.103239636401384e-05,
"loss": 0.3043,
"step": 2612
},
{
"epoch": 0.89,
"learning_rate": 3.101885018009585e-05,
"loss": 0.2836,
"step": 2613
},
{
"epoch": 0.89,
"learning_rate": 3.1005302120062954e-05,
"loss": 0.2664,
"step": 2614
},
{
"epoch": 0.89,
"learning_rate": 3.099175218813816e-05,
"loss": 0.3021,
"step": 2615
},
{
"epoch": 0.89,
"learning_rate": 3.097820038854505e-05,
"loss": 0.3188,
"step": 2616
},
{
"epoch": 0.89,
"learning_rate": 3.096464672550785e-05,
"loss": 0.2533,
"step": 2617
},
{
"epoch": 0.89,
"learning_rate": 3.0951091203251286e-05,
"loss": 0.3333,
"step": 2618
},
{
"epoch": 0.89,
"learning_rate": 3.093753382600072e-05,
"loss": 0.3874,
"step": 2619
},
{
"epoch": 0.89,
"learning_rate": 3.092397459798208e-05,
"loss": 0.2593,
"step": 2620
},
{
"epoch": 0.89,
"learning_rate": 3.0910413523421865e-05,
"loss": 0.3162,
"step": 2621
},
{
"epoch": 0.89,
"learning_rate": 3.089685060654714e-05,
"loss": 0.316,
"step": 2622
},
{
"epoch": 0.89,
"learning_rate": 3.088328585158556e-05,
"loss": 0.3508,
"step": 2623
},
{
"epoch": 0.9,
"learning_rate": 3.086971926276535e-05,
"loss": 0.3264,
"step": 2624
},
{
"epoch": 0.9,
"learning_rate": 3.08561508443153e-05,
"loss": 0.3169,
"step": 2625
},
{
"epoch": 0.9,
"learning_rate": 3.0842580600464774e-05,
"loss": 0.3316,
"step": 2626
},
{
"epoch": 0.9,
"learning_rate": 3.082900853544371e-05,
"loss": 0.3187,
"step": 2627
},
{
"epoch": 0.9,
"learning_rate": 3.081543465348262e-05,
"loss": 0.3181,
"step": 2628
},
{
"epoch": 0.9,
"learning_rate": 3.080185895881255e-05,
"loss": 0.3086,
"step": 2629
},
{
"epoch": 0.9,
"learning_rate": 3.078828145566514e-05,
"loss": 0.3527,
"step": 2630
},
{
"epoch": 0.9,
"learning_rate": 3.0774702148272596e-05,
"loss": 0.312,
"step": 2631
},
{
"epoch": 0.9,
"learning_rate": 3.076112104086766e-05,
"loss": 0.3075,
"step": 2632
},
{
"epoch": 0.9,
"learning_rate": 3.0747538137683675e-05,
"loss": 0.348,
"step": 2633
},
{
"epoch": 0.9,
"learning_rate": 3.07339534429545e-05,
"loss": 0.3511,
"step": 2634
},
{
"epoch": 0.9,
"learning_rate": 3.072036696091459e-05,
"loss": 0.4192,
"step": 2635
},
{
"epoch": 0.9,
"learning_rate": 3.070677869579892e-05,
"loss": 0.3311,
"step": 2636
},
{
"epoch": 0.9,
"learning_rate": 3.0693188651843054e-05,
"loss": 0.4121,
"step": 2637
},
{
"epoch": 0.9,
"learning_rate": 3.0679596833283094e-05,
"loss": 0.3121,
"step": 2638
},
{
"epoch": 0.9,
"learning_rate": 3.066600324435569e-05,
"loss": 0.3767,
"step": 2639
},
{
"epoch": 0.9,
"learning_rate": 3.0652407889298086e-05,
"loss": 0.3189,
"step": 2640
},
{
"epoch": 0.9,
"learning_rate": 3.0638810772348e-05,
"loss": 0.3694,
"step": 2641
},
{
"epoch": 0.9,
"learning_rate": 3.0625211897743776e-05,
"loss": 0.3035,
"step": 2642
},
{
"epoch": 0.9,
"learning_rate": 3.061161126972425e-05,
"loss": 0.3372,
"step": 2643
},
{
"epoch": 0.9,
"learning_rate": 3.059800889252883e-05,
"loss": 0.3337,
"step": 2644
},
{
"epoch": 0.9,
"learning_rate": 3.058440477039747e-05,
"loss": 0.3291,
"step": 2645
},
{
"epoch": 0.9,
"learning_rate": 3.0570798907570666e-05,
"loss": 0.3098,
"step": 2646
},
{
"epoch": 0.9,
"learning_rate": 3.0557191308289454e-05,
"loss": 0.3073,
"step": 2647
},
{
"epoch": 0.9,
"learning_rate": 3.054358197679542e-05,
"loss": 0.3155,
"step": 2648
},
{
"epoch": 0.9,
"learning_rate": 3.052997091733065e-05,
"loss": 0.326,
"step": 2649
},
{
"epoch": 0.9,
"learning_rate": 3.051635813413784e-05,
"loss": 0.2786,
"step": 2650
},
{
"epoch": 0.9,
"learning_rate": 3.050274363146015e-05,
"loss": 0.3401,
"step": 2651
},
{
"epoch": 0.9,
"learning_rate": 3.0489127413541325e-05,
"loss": 0.3476,
"step": 2652
},
{
"epoch": 0.91,
"learning_rate": 3.0475509484625636e-05,
"loss": 0.3024,
"step": 2653
},
{
"epoch": 0.91,
"learning_rate": 3.046188984895787e-05,
"loss": 0.3453,
"step": 2654
},
{
"epoch": 0.91,
"learning_rate": 3.044826851078336e-05,
"loss": 0.3822,
"step": 2655
},
{
"epoch": 0.91,
"learning_rate": 3.0434645474347966e-05,
"loss": 0.335,
"step": 2656
},
{
"epoch": 0.91,
"learning_rate": 3.042102074389807e-05,
"loss": 0.3301,
"step": 2657
},
{
"epoch": 0.91,
"learning_rate": 3.04073943236806e-05,
"loss": 0.2975,
"step": 2658
},
{
"epoch": 0.91,
"learning_rate": 3.0393766217942994e-05,
"loss": 0.3509,
"step": 2659
},
{
"epoch": 0.91,
"learning_rate": 3.0380136430933225e-05,
"loss": 0.3159,
"step": 2660
},
{
"epoch": 0.91,
"learning_rate": 3.036650496689979e-05,
"loss": 0.3375,
"step": 2661
},
{
"epoch": 0.91,
"learning_rate": 3.03528718300917e-05,
"loss": 0.3372,
"step": 2662
},
{
"epoch": 0.91,
"learning_rate": 3.0339237024758492e-05,
"loss": 0.3024,
"step": 2663
},
{
"epoch": 0.91,
"learning_rate": 3.0325600555150225e-05,
"loss": 0.3567,
"step": 2664
},
{
"epoch": 0.91,
"learning_rate": 3.031196242551747e-05,
"loss": 0.3274,
"step": 2665
},
{
"epoch": 0.91,
"learning_rate": 3.029832264011133e-05,
"loss": 0.3018,
"step": 2666
},
{
"epoch": 0.91,
"learning_rate": 3.0284681203183417e-05,
"loss": 0.3183,
"step": 2667
},
{
"epoch": 0.91,
"learning_rate": 3.027103811898585e-05,
"loss": 0.3112,
"step": 2668
},
{
"epoch": 0.91,
"learning_rate": 3.025739339177127e-05,
"loss": 0.3287,
"step": 2669
},
{
"epoch": 0.91,
"learning_rate": 3.0243747025792822e-05,
"loss": 0.2826,
"step": 2670
},
{
"epoch": 0.91,
"learning_rate": 3.0230099025304175e-05,
"loss": 0.3669,
"step": 2671
},
{
"epoch": 0.91,
"learning_rate": 3.02164493945595e-05,
"loss": 0.3488,
"step": 2672
},
{
"epoch": 0.91,
"learning_rate": 3.020279813781347e-05,
"loss": 0.2474,
"step": 2673
},
{
"epoch": 0.91,
"learning_rate": 3.0189145259321273e-05,
"loss": 0.2929,
"step": 2674
},
{
"epoch": 0.91,
"learning_rate": 3.0175490763338603e-05,
"loss": 0.2875,
"step": 2675
},
{
"epoch": 0.91,
"learning_rate": 3.0161834654121657e-05,
"loss": 0.3454,
"step": 2676
},
{
"epoch": 0.91,
"learning_rate": 3.014817693592712e-05,
"loss": 0.3113,
"step": 2677
},
{
"epoch": 0.91,
"learning_rate": 3.0134517613012202e-05,
"loss": 0.301,
"step": 2678
},
{
"epoch": 0.91,
"learning_rate": 3.0120856689634613e-05,
"loss": 0.323,
"step": 2679
},
{
"epoch": 0.91,
"learning_rate": 3.0107194170052542e-05,
"loss": 0.315,
"step": 2680
},
{
"epoch": 0.91,
"learning_rate": 3.0093530058524682e-05,
"loss": 0.3267,
"step": 2681
},
{
"epoch": 0.92,
"learning_rate": 3.0079864359310232e-05,
"loss": 0.3469,
"step": 2682
},
{
"epoch": 0.92,
"learning_rate": 3.0066197076668878e-05,
"loss": 0.3501,
"step": 2683
},
{
"epoch": 0.92,
"learning_rate": 3.0052528214860797e-05,
"loss": 0.3166,
"step": 2684
},
{
"epoch": 0.92,
"learning_rate": 3.003885777814667e-05,
"loss": 0.3374,
"step": 2685
},
{
"epoch": 0.92,
"learning_rate": 3.0025185770787657e-05,
"loss": 0.3247,
"step": 2686
},
{
"epoch": 0.92,
"learning_rate": 3.0011512197045412e-05,
"loss": 0.3644,
"step": 2687
},
{
"epoch": 0.92,
"learning_rate": 2.999783706118208e-05,
"loss": 0.3192,
"step": 2688
},
{
"epoch": 0.92,
"learning_rate": 2.998416036746029e-05,
"loss": 0.2881,
"step": 2689
},
{
"epoch": 0.92,
"learning_rate": 2.997048212014315e-05,
"loss": 0.2663,
"step": 2690
},
{
"epoch": 0.92,
"learning_rate": 2.9956802323494266e-05,
"loss": 0.3175,
"step": 2691
},
{
"epoch": 0.92,
"learning_rate": 2.9943120981777718e-05,
"loss": 0.3412,
"step": 2692
},
{
"epoch": 0.92,
"learning_rate": 2.9929438099258068e-05,
"loss": 0.3461,
"step": 2693
},
{
"epoch": 0.92,
"learning_rate": 2.9915753680200366e-05,
"loss": 0.3148,
"step": 2694
},
{
"epoch": 0.92,
"learning_rate": 2.9902067728870133e-05,
"loss": 0.3129,
"step": 2695
},
{
"epoch": 0.92,
"learning_rate": 2.9888380249533354e-05,
"loss": 0.3733,
"step": 2696
},
{
"epoch": 0.92,
"learning_rate": 2.9874691246456527e-05,
"loss": 0.3156,
"step": 2697
},
{
"epoch": 0.92,
"learning_rate": 2.9861000723906596e-05,
"loss": 0.336,
"step": 2698
},
{
"epoch": 0.92,
"learning_rate": 2.9847308686150982e-05,
"loss": 0.321,
"step": 2699
},
{
"epoch": 0.92,
"learning_rate": 2.9833615137457587e-05,
"loss": 0.3333,
"step": 2700
},
{
"epoch": 0.92,
"learning_rate": 2.9819920082094783e-05,
"loss": 0.3766,
"step": 2701
},
{
"epoch": 0.92,
"learning_rate": 2.98062235243314e-05,
"loss": 0.2803,
"step": 2702
},
{
"epoch": 0.92,
"learning_rate": 2.979252546843675e-05,
"loss": 0.3199,
"step": 2703
},
{
"epoch": 0.92,
"learning_rate": 2.9778825918680603e-05,
"loss": 0.3171,
"step": 2704
},
{
"epoch": 0.92,
"learning_rate": 2.97651248793332e-05,
"loss": 0.3223,
"step": 2705
},
{
"epoch": 0.92,
"learning_rate": 2.9751422354665243e-05,
"loss": 0.3216,
"step": 2706
},
{
"epoch": 0.92,
"learning_rate": 2.9737718348947902e-05,
"loss": 0.3018,
"step": 2707
},
{
"epoch": 0.92,
"learning_rate": 2.9724012866452805e-05,
"loss": 0.3085,
"step": 2708
},
{
"epoch": 0.92,
"learning_rate": 2.9710305911452035e-05,
"loss": 0.3405,
"step": 2709
},
{
"epoch": 0.92,
"learning_rate": 2.9696597488218147e-05,
"loss": 0.3298,
"step": 2710
},
{
"epoch": 0.92,
"learning_rate": 2.9682887601024138e-05,
"loss": 0.3123,
"step": 2711
},
{
"epoch": 0.93,
"learning_rate": 2.9669176254143476e-05,
"loss": 0.3635,
"step": 2712
},
{
"epoch": 0.93,
"learning_rate": 2.9655463451850074e-05,
"loss": 0.3085,
"step": 2713
},
{
"epoch": 0.93,
"learning_rate": 2.9641749198418307e-05,
"loss": 0.3573,
"step": 2714
},
{
"epoch": 0.93,
"learning_rate": 2.962803349812299e-05,
"loss": 0.3528,
"step": 2715
},
{
"epoch": 0.93,
"learning_rate": 2.961431635523939e-05,
"loss": 0.2993,
"step": 2716
},
{
"epoch": 0.93,
"learning_rate": 2.9600597774043255e-05,
"loss": 0.3031,
"step": 2717
},
{
"epoch": 0.93,
"learning_rate": 2.9586877758810732e-05,
"loss": 0.3602,
"step": 2718
},
{
"epoch": 0.93,
"learning_rate": 2.957315631381845e-05,
"loss": 0.3329,
"step": 2719
},
{
"epoch": 0.93,
"learning_rate": 2.955943344334347e-05,
"loss": 0.2966,
"step": 2720
},
{
"epoch": 0.93,
"learning_rate": 2.9545709151663308e-05,
"loss": 0.2866,
"step": 2721
},
{
"epoch": 0.93,
"learning_rate": 2.95319834430559e-05,
"loss": 0.2969,
"step": 2722
},
{
"epoch": 0.93,
"learning_rate": 2.9518256321799653e-05,
"loss": 0.3189,
"step": 2723
},
{
"epoch": 0.93,
"learning_rate": 2.9504527792173397e-05,
"loss": 0.3444,
"step": 2724
},
{
"epoch": 0.93,
"learning_rate": 2.9490797858456404e-05,
"loss": 0.3359,
"step": 2725
},
{
"epoch": 0.93,
"learning_rate": 2.947706652492838e-05,
"loss": 0.3217,
"step": 2726
},
{
"epoch": 0.93,
"learning_rate": 2.9463333795869474e-05,
"loss": 0.277,
"step": 2727
},
{
"epoch": 0.93,
"learning_rate": 2.944959967556027e-05,
"loss": 0.3268,
"step": 2728
},
{
"epoch": 0.93,
"learning_rate": 2.9435864168281772e-05,
"loss": 0.3133,
"step": 2729
},
{
"epoch": 0.93,
"learning_rate": 2.9422127278315448e-05,
"loss": 0.2643,
"step": 2730
},
{
"epoch": 0.93,
"learning_rate": 2.940838900994316e-05,
"loss": 0.321,
"step": 2731
},
{
"epoch": 0.93,
"learning_rate": 2.9394649367447225e-05,
"loss": 0.3048,
"step": 2732
},
{
"epoch": 0.93,
"learning_rate": 2.938090835511037e-05,
"loss": 0.3106,
"step": 2733
},
{
"epoch": 0.93,
"learning_rate": 2.936716597721576e-05,
"loss": 0.3078,
"step": 2734
},
{
"epoch": 0.93,
"learning_rate": 2.9353422238046985e-05,
"loss": 0.3894,
"step": 2735
},
{
"epoch": 0.93,
"learning_rate": 2.9339677141888068e-05,
"loss": 0.2992,
"step": 2736
},
{
"epoch": 0.93,
"learning_rate": 2.9325930693023433e-05,
"loss": 0.3939,
"step": 2737
},
{
"epoch": 0.93,
"learning_rate": 2.9312182895737943e-05,
"loss": 0.3387,
"step": 2738
},
{
"epoch": 0.93,
"learning_rate": 2.9298433754316878e-05,
"loss": 0.3343,
"step": 2739
},
{
"epoch": 0.93,
"learning_rate": 2.928468327304593e-05,
"loss": 0.3214,
"step": 2740
},
{
"epoch": 0.94,
"learning_rate": 2.927093145621122e-05,
"loss": 0.3621,
"step": 2741
},
{
"epoch": 0.94,
"learning_rate": 2.925717830809927e-05,
"loss": 0.2799,
"step": 2742
},
{
"epoch": 0.94,
"learning_rate": 2.9243423832997037e-05,
"loss": 0.3814,
"step": 2743
},
{
"epoch": 0.94,
"learning_rate": 2.922966803519187e-05,
"loss": 0.2912,
"step": 2744
},
{
"epoch": 0.94,
"learning_rate": 2.9215910918971557e-05,
"loss": 0.328,
"step": 2745
},
{
"epoch": 0.94,
"learning_rate": 2.9202152488624263e-05,
"loss": 0.3346,
"step": 2746
},
{
"epoch": 0.94,
"learning_rate": 2.9188392748438588e-05,
"loss": 0.3287,
"step": 2747
},
{
"epoch": 0.94,
"learning_rate": 2.9174631702703532e-05,
"loss": 0.3473,
"step": 2748
},
{
"epoch": 0.94,
"learning_rate": 2.916086935570851e-05,
"loss": 0.3062,
"step": 2749
},
{
"epoch": 0.94,
"learning_rate": 2.914710571174332e-05,
"loss": 0.3088,
"step": 2750
},
{
"epoch": 0.94,
"learning_rate": 2.91333407750982e-05,
"loss": 0.31,
"step": 2751
},
{
"epoch": 0.94,
"learning_rate": 2.9119574550063755e-05,
"loss": 0.3297,
"step": 2752
},
{
"epoch": 0.94,
"learning_rate": 2.910580704093101e-05,
"loss": 0.3222,
"step": 2753
},
{
"epoch": 0.94,
"learning_rate": 2.9092038251991393e-05,
"loss": 0.3403,
"step": 2754
},
{
"epoch": 0.94,
"learning_rate": 2.9078268187536718e-05,
"loss": 0.3813,
"step": 2755
},
{
"epoch": 0.94,
"learning_rate": 2.9064496851859214e-05,
"loss": 0.3618,
"step": 2756
},
{
"epoch": 0.94,
"learning_rate": 2.9050724249251488e-05,
"loss": 0.308,
"step": 2757
},
{
"epoch": 0.94,
"learning_rate": 2.9036950384006557e-05,
"loss": 0.3473,
"step": 2758
},
{
"epoch": 0.94,
"learning_rate": 2.902317526041782e-05,
"loss": 0.3248,
"step": 2759
},
{
"epoch": 0.94,
"learning_rate": 2.900939888277906e-05,
"loss": 0.3277,
"step": 2760
},
{
"epoch": 0.94,
"learning_rate": 2.8995621255384487e-05,
"loss": 0.2908,
"step": 2761
},
{
"epoch": 0.94,
"learning_rate": 2.8981842382528667e-05,
"loss": 0.2865,
"step": 2762
},
{
"epoch": 0.94,
"learning_rate": 2.896806226850657e-05,
"loss": 0.3604,
"step": 2763
},
{
"epoch": 0.94,
"learning_rate": 2.8954280917613536e-05,
"loss": 0.3523,
"step": 2764
},
{
"epoch": 0.94,
"learning_rate": 2.8940498334145316e-05,
"loss": 0.3022,
"step": 2765
},
{
"epoch": 0.94,
"learning_rate": 2.8926714522398013e-05,
"loss": 0.3214,
"step": 2766
},
{
"epoch": 0.94,
"learning_rate": 2.8912929486668155e-05,
"loss": 0.2935,
"step": 2767
},
{
"epoch": 0.94,
"learning_rate": 2.8899143231252605e-05,
"loss": 0.302,
"step": 2768
},
{
"epoch": 0.94,
"learning_rate": 2.8885355760448646e-05,
"loss": 0.3452,
"step": 2769
},
{
"epoch": 0.95,
"learning_rate": 2.887156707855392e-05,
"loss": 0.3295,
"step": 2770
},
{
"epoch": 0.95,
"learning_rate": 2.8857777189866437e-05,
"loss": 0.3108,
"step": 2771
},
{
"epoch": 0.95,
"learning_rate": 2.8843986098684606e-05,
"loss": 0.3041,
"step": 2772
},
{
"epoch": 0.95,
"learning_rate": 2.8830193809307192e-05,
"loss": 0.3344,
"step": 2773
},
{
"epoch": 0.95,
"learning_rate": 2.881640032603336e-05,
"loss": 0.3297,
"step": 2774
},
{
"epoch": 0.95,
"learning_rate": 2.8802605653162608e-05,
"loss": 0.3574,
"step": 2775
},
{
"epoch": 0.95,
"learning_rate": 2.878880979499483e-05,
"loss": 0.3674,
"step": 2776
},
{
"epoch": 0.95,
"learning_rate": 2.87750127558303e-05,
"loss": 0.3182,
"step": 2777
},
{
"epoch": 0.95,
"learning_rate": 2.8761214539969626e-05,
"loss": 0.3057,
"step": 2778
},
{
"epoch": 0.95,
"learning_rate": 2.8747415151713812e-05,
"loss": 0.335,
"step": 2779
},
{
"epoch": 0.95,
"learning_rate": 2.8733614595364216e-05,
"loss": 0.359,
"step": 2780
},
{
"epoch": 0.95,
"learning_rate": 2.871981287522256e-05,
"loss": 0.3138,
"step": 2781
},
{
"epoch": 0.95,
"learning_rate": 2.8706009995590933e-05,
"loss": 0.303,
"step": 2782
},
{
"epoch": 0.95,
"learning_rate": 2.8692205960771782e-05,
"loss": 0.2763,
"step": 2783
},
{
"epoch": 0.95,
"learning_rate": 2.8678400775067916e-05,
"loss": 0.288,
"step": 2784
},
{
"epoch": 0.95,
"learning_rate": 2.86645944427825e-05,
"loss": 0.2932,
"step": 2785
},
{
"epoch": 0.95,
"learning_rate": 2.865078696821905e-05,
"loss": 0.3342,
"step": 2786
},
{
"epoch": 0.95,
"learning_rate": 2.863697835568146e-05,
"loss": 0.3794,
"step": 2787
},
{
"epoch": 0.95,
"learning_rate": 2.862316860947396e-05,
"loss": 0.3289,
"step": 2788
},
{
"epoch": 0.95,
"learning_rate": 2.8609357733901133e-05,
"loss": 0.3126,
"step": 2789
},
{
"epoch": 0.95,
"learning_rate": 2.859554573326792e-05,
"loss": 0.2998,
"step": 2790
},
{
"epoch": 0.95,
"learning_rate": 2.858173261187962e-05,
"loss": 0.2901,
"step": 2791
},
{
"epoch": 0.95,
"learning_rate": 2.8567918374041863e-05,
"loss": 0.3665,
"step": 2792
},
{
"epoch": 0.95,
"learning_rate": 2.8554103024060648e-05,
"loss": 0.2948,
"step": 2793
},
{
"epoch": 0.95,
"learning_rate": 2.8540286566242298e-05,
"loss": 0.3486,
"step": 2794
},
{
"epoch": 0.95,
"learning_rate": 2.8526469004893503e-05,
"loss": 0.3342,
"step": 2795
},
{
"epoch": 0.95,
"learning_rate": 2.851265034432128e-05,
"loss": 0.2766,
"step": 2796
},
{
"epoch": 0.95,
"learning_rate": 2.8498830588832996e-05,
"loss": 0.334,
"step": 2797
},
{
"epoch": 0.95,
"learning_rate": 2.8485009742736368e-05,
"loss": 0.3009,
"step": 2798
},
{
"epoch": 0.95,
"learning_rate": 2.8471187810339433e-05,
"loss": 0.3246,
"step": 2799
},
{
"epoch": 0.96,
"learning_rate": 2.8457364795950587e-05,
"loss": 0.2941,
"step": 2800
},
{
"epoch": 0.96,
"learning_rate": 2.8443540703878547e-05,
"loss": 0.3598,
"step": 2801
},
{
"epoch": 0.96,
"learning_rate": 2.8429715538432377e-05,
"loss": 0.2653,
"step": 2802
},
{
"epoch": 0.96,
"learning_rate": 2.8415889303921463e-05,
"loss": 0.3012,
"step": 2803
},
{
"epoch": 0.96,
"learning_rate": 2.8402062004655544e-05,
"loss": 0.3377,
"step": 2804
},
{
"epoch": 0.96,
"learning_rate": 2.8388233644944673e-05,
"loss": 0.3209,
"step": 2805
},
{
"epoch": 0.96,
"learning_rate": 2.8374404229099242e-05,
"loss": 0.337,
"step": 2806
},
{
"epoch": 0.96,
"learning_rate": 2.8360573761429972e-05,
"loss": 0.3181,
"step": 2807
},
{
"epoch": 0.96,
"learning_rate": 2.834674224624791e-05,
"loss": 0.3071,
"step": 2808
},
{
"epoch": 0.96,
"learning_rate": 2.8332909687864424e-05,
"loss": 0.3316,
"step": 2809
},
{
"epoch": 0.96,
"learning_rate": 2.8319076090591213e-05,
"loss": 0.3326,
"step": 2810
},
{
"epoch": 0.96,
"learning_rate": 2.8305241458740305e-05,
"loss": 0.3219,
"step": 2811
},
{
"epoch": 0.96,
"learning_rate": 2.8291405796624043e-05,
"loss": 0.3186,
"step": 2812
},
{
"epoch": 0.96,
"learning_rate": 2.8277569108555102e-05,
"loss": 0.2997,
"step": 2813
},
{
"epoch": 0.96,
"learning_rate": 2.826373139884645e-05,
"loss": 0.3026,
"step": 2814
},
{
"epoch": 0.96,
"learning_rate": 2.8249892671811406e-05,
"loss": 0.3053,
"step": 2815
},
{
"epoch": 0.96,
"learning_rate": 2.823605293176359e-05,
"loss": 0.2969,
"step": 2816
},
{
"epoch": 0.96,
"learning_rate": 2.8222212183016927e-05,
"loss": 0.3225,
"step": 2817
},
{
"epoch": 0.96,
"learning_rate": 2.8208370429885677e-05,
"loss": 0.3209,
"step": 2818
},
{
"epoch": 0.96,
"learning_rate": 2.8194527676684413e-05,
"loss": 0.3105,
"step": 2819
},
{
"epoch": 0.96,
"learning_rate": 2.8180683927728004e-05,
"loss": 0.2878,
"step": 2820
},
{
"epoch": 0.96,
"learning_rate": 2.8166839187331636e-05,
"loss": 0.3214,
"step": 2821
},
{
"epoch": 0.96,
"learning_rate": 2.8152993459810806e-05,
"loss": 0.3378,
"step": 2822
},
{
"epoch": 0.96,
"learning_rate": 2.813914674948132e-05,
"loss": 0.3041,
"step": 2823
},
{
"epoch": 0.96,
"learning_rate": 2.8125299060659276e-05,
"loss": 0.2817,
"step": 2824
},
{
"epoch": 0.96,
"learning_rate": 2.8111450397661105e-05,
"loss": 0.3293,
"step": 2825
},
{
"epoch": 0.96,
"learning_rate": 2.809760076480352e-05,
"loss": 0.3317,
"step": 2826
},
{
"epoch": 0.96,
"learning_rate": 2.808375016640355e-05,
"loss": 0.3244,
"step": 2827
},
{
"epoch": 0.96,
"learning_rate": 2.8069898606778504e-05,
"loss": 0.2874,
"step": 2828
},
{
"epoch": 0.97,
"learning_rate": 2.8056046090246007e-05,
"loss": 0.3016,
"step": 2829
},
{
"epoch": 0.97,
"learning_rate": 2.804219262112398e-05,
"loss": 0.2883,
"step": 2830
},
{
"epoch": 0.97,
"learning_rate": 2.8028338203730636e-05,
"loss": 0.3162,
"step": 2831
},
{
"epoch": 0.97,
"learning_rate": 2.80144828423845e-05,
"loss": 0.3126,
"step": 2832
},
{
"epoch": 0.97,
"learning_rate": 2.8000626541404366e-05,
"loss": 0.3394,
"step": 2833
},
{
"epoch": 0.97,
"learning_rate": 2.7986769305109338e-05,
"loss": 0.3025,
"step": 2834
},
{
"epoch": 0.97,
"learning_rate": 2.7972911137818814e-05,
"loss": 0.3203,
"step": 2835
},
{
"epoch": 0.97,
"learning_rate": 2.7959052043852464e-05,
"loss": 0.322,
"step": 2836
},
{
"epoch": 0.97,
"learning_rate": 2.7945192027530254e-05,
"loss": 0.3578,
"step": 2837
},
{
"epoch": 0.97,
"learning_rate": 2.793133109317245e-05,
"loss": 0.3276,
"step": 2838
},
{
"epoch": 0.97,
"learning_rate": 2.7917469245099602e-05,
"loss": 0.3193,
"step": 2839
},
{
"epoch": 0.97,
"learning_rate": 2.7903606487632532e-05,
"loss": 0.3534,
"step": 2840
},
{
"epoch": 0.97,
"learning_rate": 2.7889742825092358e-05,
"loss": 0.374,
"step": 2841
},
{
"epoch": 0.97,
"learning_rate": 2.7875878261800464e-05,
"loss": 0.329,
"step": 2842
},
{
"epoch": 0.97,
"learning_rate": 2.7862012802078525e-05,
"loss": 0.337,
"step": 2843
},
{
"epoch": 0.97,
"learning_rate": 2.7848146450248502e-05,
"loss": 0.3612,
"step": 2844
},
{
"epoch": 0.97,
"learning_rate": 2.7834279210632623e-05,
"loss": 0.3118,
"step": 2845
},
{
"epoch": 0.97,
"learning_rate": 2.7820411087553404e-05,
"loss": 0.3733,
"step": 2846
},
{
"epoch": 0.97,
"learning_rate": 2.7806542085333627e-05,
"loss": 0.2939,
"step": 2847
},
{
"epoch": 0.97,
"learning_rate": 2.779267220829635e-05,
"loss": 0.292,
"step": 2848
},
{
"epoch": 0.97,
"learning_rate": 2.77788014607649e-05,
"loss": 0.3048,
"step": 2849
},
{
"epoch": 0.97,
"learning_rate": 2.7764929847062876e-05,
"loss": 0.3016,
"step": 2850
},
{
"epoch": 0.97,
"learning_rate": 2.775105737151416e-05,
"loss": 0.3519,
"step": 2851
},
{
"epoch": 0.97,
"learning_rate": 2.7737184038442894e-05,
"loss": 0.325,
"step": 2852
},
{
"epoch": 0.97,
"learning_rate": 2.772330985217348e-05,
"loss": 0.3268,
"step": 2853
},
{
"epoch": 0.97,
"learning_rate": 2.77094348170306e-05,
"loss": 0.3394,
"step": 2854
},
{
"epoch": 0.97,
"learning_rate": 2.7695558937339183e-05,
"loss": 0.3076,
"step": 2855
},
{
"epoch": 0.97,
"learning_rate": 2.7681682217424437e-05,
"loss": 0.3005,
"step": 2856
},
{
"epoch": 0.97,
"learning_rate": 2.766780466161183e-05,
"loss": 0.2886,
"step": 2857
},
{
"epoch": 0.98,
"learning_rate": 2.7653926274227077e-05,
"loss": 0.3065,
"step": 2858
},
{
"epoch": 0.98,
"learning_rate": 2.7640047059596176e-05,
"loss": 0.2826,
"step": 2859
},
{
"epoch": 0.98,
"learning_rate": 2.7626167022045357e-05,
"loss": 0.3298,
"step": 2860
},
{
"epoch": 0.98,
"learning_rate": 2.7612286165901125e-05,
"loss": 0.2817,
"step": 2861
},
{
"epoch": 0.98,
"learning_rate": 2.7598404495490226e-05,
"loss": 0.3745,
"step": 2862
},
{
"epoch": 0.98,
"learning_rate": 2.758452201513967e-05,
"loss": 0.2956,
"step": 2863
},
{
"epoch": 0.98,
"learning_rate": 2.7570638729176723e-05,
"loss": 0.3011,
"step": 2864
},
{
"epoch": 0.98,
"learning_rate": 2.755675464192889e-05,
"loss": 0.321,
"step": 2865
},
{
"epoch": 0.98,
"learning_rate": 2.7542869757723933e-05,
"loss": 0.2936,
"step": 2866
},
{
"epoch": 0.98,
"learning_rate": 2.7528984080889864e-05,
"loss": 0.2859,
"step": 2867
},
{
"epoch": 0.98,
"learning_rate": 2.7515097615754936e-05,
"loss": 0.3188,
"step": 2868
},
{
"epoch": 0.98,
"learning_rate": 2.750121036664765e-05,
"loss": 0.2878,
"step": 2869
},
{
"epoch": 0.98,
"learning_rate": 2.7487322337896754e-05,
"loss": 0.3279,
"step": 2870
},
{
"epoch": 0.98,
"learning_rate": 2.7473433533831243e-05,
"loss": 0.2676,
"step": 2871
},
{
"epoch": 0.98,
"learning_rate": 2.745954395878033e-05,
"loss": 0.2977,
"step": 2872
},
{
"epoch": 0.98,
"learning_rate": 2.744565361707351e-05,
"loss": 0.309,
"step": 2873
},
{
"epoch": 0.98,
"learning_rate": 2.7431762513040477e-05,
"loss": 0.3097,
"step": 2874
},
{
"epoch": 0.98,
"learning_rate": 2.7417870651011185e-05,
"loss": 0.3512,
"step": 2875
},
{
"epoch": 0.98,
"learning_rate": 2.7403978035315813e-05,
"loss": 0.3231,
"step": 2876
},
{
"epoch": 0.98,
"learning_rate": 2.7390084670284788e-05,
"loss": 0.3212,
"step": 2877
},
{
"epoch": 0.98,
"learning_rate": 2.737619056024876e-05,
"loss": 0.3314,
"step": 2878
},
{
"epoch": 0.98,
"learning_rate": 2.7362295709538606e-05,
"loss": 0.2878,
"step": 2879
},
{
"epoch": 0.98,
"learning_rate": 2.734840012248545e-05,
"loss": 0.2851,
"step": 2880
},
{
"epoch": 0.98,
"learning_rate": 2.7334503803420636e-05,
"loss": 0.3421,
"step": 2881
},
{
"epoch": 0.98,
"learning_rate": 2.732060675667574e-05,
"loss": 0.3129,
"step": 2882
},
{
"epoch": 0.98,
"learning_rate": 2.7306708986582553e-05,
"loss": 0.3211,
"step": 2883
},
{
"epoch": 0.98,
"learning_rate": 2.729281049747311e-05,
"loss": 0.3462,
"step": 2884
},
{
"epoch": 0.98,
"learning_rate": 2.727891129367966e-05,
"loss": 0.3069,
"step": 2885
},
{
"epoch": 0.98,
"learning_rate": 2.726501137953466e-05,
"loss": 0.3142,
"step": 2886
},
{
"epoch": 0.98,
"learning_rate": 2.725111075937083e-05,
"loss": 0.358,
"step": 2887
},
{
"epoch": 0.99,
"learning_rate": 2.723720943752107e-05,
"loss": 0.319,
"step": 2888
},
{
"epoch": 0.99,
"learning_rate": 2.722330741831851e-05,
"loss": 0.3147,
"step": 2889
},
{
"epoch": 0.99,
"learning_rate": 2.720940470609651e-05,
"loss": 0.3243,
"step": 2890
},
{
"epoch": 0.99,
"learning_rate": 2.7195501305188624e-05,
"loss": 0.3191,
"step": 2891
},
{
"epoch": 0.99,
"learning_rate": 2.7181597219928645e-05,
"loss": 0.3478,
"step": 2892
},
{
"epoch": 0.99,
"learning_rate": 2.716769245465055e-05,
"loss": 0.3504,
"step": 2893
},
{
"epoch": 0.99,
"learning_rate": 2.715378701368857e-05,
"loss": 0.3426,
"step": 2894
},
{
"epoch": 0.99,
"learning_rate": 2.71398809013771e-05,
"loss": 0.3443,
"step": 2895
},
{
"epoch": 0.99,
"learning_rate": 2.712597412205078e-05,
"loss": 0.3409,
"step": 2896
},
{
"epoch": 0.99,
"learning_rate": 2.711206668004444e-05,
"loss": 0.3396,
"step": 2897
},
{
"epoch": 0.99,
"learning_rate": 2.7098158579693116e-05,
"loss": 0.3113,
"step": 2898
},
{
"epoch": 0.99,
"learning_rate": 2.708424982533206e-05,
"loss": 0.3362,
"step": 2899
},
{
"epoch": 0.99,
"learning_rate": 2.707034042129671e-05,
"loss": 0.3454,
"step": 2900
},
{
"epoch": 0.99,
"learning_rate": 2.705643037192273e-05,
"loss": 0.3573,
"step": 2901
},
{
"epoch": 0.99,
"learning_rate": 2.7042519681545968e-05,
"loss": 0.3144,
"step": 2902
},
{
"epoch": 0.99,
"learning_rate": 2.702860835450248e-05,
"loss": 0.3302,
"step": 2903
},
{
"epoch": 0.99,
"learning_rate": 2.7014696395128523e-05,
"loss": 0.3212,
"step": 2904
},
{
"epoch": 0.99,
"learning_rate": 2.7000783807760533e-05,
"loss": 0.3048,
"step": 2905
},
{
"epoch": 0.99,
"learning_rate": 2.6986870596735164e-05,
"loss": 0.3072,
"step": 2906
},
{
"epoch": 0.99,
"learning_rate": 2.6972956766389245e-05,
"loss": 0.3235,
"step": 2907
},
{
"epoch": 0.99,
"learning_rate": 2.6959042321059814e-05,
"loss": 0.3386,
"step": 2908
},
{
"epoch": 0.99,
"learning_rate": 2.69451272650841e-05,
"loss": 0.3045,
"step": 2909
},
{
"epoch": 0.99,
"learning_rate": 2.6931211602799505e-05,
"loss": 0.3135,
"step": 2910
},
{
"epoch": 0.99,
"learning_rate": 2.6917295338543647e-05,
"loss": 0.2985,
"step": 2911
},
{
"epoch": 0.99,
"learning_rate": 2.69033784766543e-05,
"loss": 0.3195,
"step": 2912
},
{
"epoch": 0.99,
"learning_rate": 2.6889461021469453e-05,
"loss": 0.2911,
"step": 2913
},
{
"epoch": 0.99,
"learning_rate": 2.6875542977327257e-05,
"loss": 0.2812,
"step": 2914
},
{
"epoch": 0.99,
"learning_rate": 2.686162434856606e-05,
"loss": 0.3033,
"step": 2915
},
{
"epoch": 0.99,
"learning_rate": 2.6847705139524403e-05,
"loss": 0.273,
"step": 2916
},
{
"epoch": 1.0,
"learning_rate": 2.683378535454098e-05,
"loss": 0.284,
"step": 2917
},
{
"epoch": 1.0,
"learning_rate": 2.6819864997954685e-05,
"loss": 0.338,
"step": 2918
},
{
"epoch": 1.0,
"learning_rate": 2.6805944074104582e-05,
"loss": 0.3286,
"step": 2919
},
{
"epoch": 1.0,
"learning_rate": 2.6792022587329912e-05,
"loss": 0.2931,
"step": 2920
},
{
"epoch": 1.0,
"learning_rate": 2.677810054197009e-05,
"loss": 0.3398,
"step": 2921
},
{
"epoch": 1.0,
"learning_rate": 2.676417794236472e-05,
"loss": 0.3692,
"step": 2922
},
{
"epoch": 1.0,
"learning_rate": 2.6750254792853563e-05,
"loss": 0.3583,
"step": 2923
},
{
"epoch": 1.0,
"learning_rate": 2.6736331097776552e-05,
"loss": 0.4662,
"step": 2924
},
{
"epoch": 1.0,
"learning_rate": 2.67224068614738e-05,
"loss": 0.3385,
"step": 2925
},
{
"epoch": 1.0,
"learning_rate": 2.6708482088285567e-05,
"loss": 0.3338,
"step": 2926
},
{
"epoch": 1.0,
"learning_rate": 2.6694556782552317e-05,
"loss": 0.3011,
"step": 2927
},
{
"epoch": 1.0,
"learning_rate": 2.668063094861464e-05,
"loss": 0.3895,
"step": 2928
},
{
"epoch": 1.0,
"learning_rate": 2.666670459081332e-05,
"loss": 0.3016,
"step": 2929
},
{
"epoch": 1.0,
"learning_rate": 2.6652777713489296e-05,
"loss": 0.2765,
"step": 2930
},
{
"epoch": 1.0,
"learning_rate": 2.663885032098366e-05,
"loss": 0.2915,
"step": 2931
},
{
"epoch": 1.0,
"eval_loss": 0.32550960779190063,
"eval_runtime": 82.7705,
"eval_samples_per_second": 9.206,
"eval_steps_per_second": 0.58,
"step": 2931
},
{
"epoch": 1.0,
"learning_rate": 2.6624922417637672e-05,
"loss": 0.2319,
"step": 2932
},
{
"epoch": 1.0,
"learning_rate": 2.661099400779275e-05,
"loss": 0.2446,
"step": 2933
},
{
"epoch": 1.0,
"learning_rate": 2.6597065095790476e-05,
"loss": 0.2427,
"step": 2934
},
{
"epoch": 1.0,
"learning_rate": 2.6583135685972572e-05,
"loss": 0.2668,
"step": 2935
},
{
"epoch": 1.0,
"learning_rate": 2.656920578268094e-05,
"loss": 0.255,
"step": 2936
},
{
"epoch": 1.0,
"learning_rate": 2.6555275390257618e-05,
"loss": 0.2442,
"step": 2937
},
{
"epoch": 1.0,
"learning_rate": 2.654134451304479e-05,
"loss": 0.2198,
"step": 2938
},
{
"epoch": 1.0,
"learning_rate": 2.6527413155384818e-05,
"loss": 0.2289,
"step": 2939
},
{
"epoch": 1.0,
"learning_rate": 2.6513481321620176e-05,
"loss": 0.23,
"step": 2940
},
{
"epoch": 1.0,
"learning_rate": 2.649954901609352e-05,
"loss": 0.2252,
"step": 2941
},
{
"epoch": 1.0,
"learning_rate": 2.6485616243147645e-05,
"loss": 0.2442,
"step": 2942
},
{
"epoch": 1.0,
"learning_rate": 2.6471683007125485e-05,
"loss": 0.2256,
"step": 2943
},
{
"epoch": 1.0,
"learning_rate": 2.6457749312370118e-05,
"loss": 0.2073,
"step": 2944
},
{
"epoch": 1.0,
"learning_rate": 2.644381516322476e-05,
"loss": 0.2695,
"step": 2945
},
{
"epoch": 1.01,
"learning_rate": 2.642988056403279e-05,
"loss": 0.2388,
"step": 2946
},
{
"epoch": 1.01,
"learning_rate": 2.6415945519137707e-05,
"loss": 0.2431,
"step": 2947
},
{
"epoch": 1.01,
"learning_rate": 2.6402010032883157e-05,
"loss": 0.2139,
"step": 2948
},
{
"epoch": 1.01,
"learning_rate": 2.6388074109612915e-05,
"loss": 0.226,
"step": 2949
},
{
"epoch": 1.01,
"learning_rate": 2.6374137753670912e-05,
"loss": 0.2369,
"step": 2950
},
{
"epoch": 1.01,
"learning_rate": 2.63602009694012e-05,
"loss": 0.2409,
"step": 2951
},
{
"epoch": 1.01,
"learning_rate": 2.6346263761147954e-05,
"loss": 0.2362,
"step": 2952
},
{
"epoch": 1.01,
"learning_rate": 2.6332326133255497e-05,
"loss": 0.2214,
"step": 2953
},
{
"epoch": 1.01,
"learning_rate": 2.631838809006828e-05,
"loss": 0.2033,
"step": 2954
},
{
"epoch": 1.01,
"learning_rate": 2.6304449635930882e-05,
"loss": 0.2518,
"step": 2955
},
{
"epoch": 1.01,
"learning_rate": 2.6290510775188022e-05,
"loss": 0.2995,
"step": 2956
},
{
"epoch": 1.01,
"learning_rate": 2.6276571512184515e-05,
"loss": 0.2312,
"step": 2957
},
{
"epoch": 1.01,
"learning_rate": 2.6262631851265333e-05,
"loss": 0.2397,
"step": 2958
},
{
"epoch": 1.01,
"learning_rate": 2.624869179677555e-05,
"loss": 0.1944,
"step": 2959
},
{
"epoch": 1.01,
"learning_rate": 2.6234751353060382e-05,
"loss": 0.233,
"step": 2960
},
{
"epoch": 1.01,
"learning_rate": 2.622081052446515e-05,
"loss": 0.2417,
"step": 2961
},
{
"epoch": 1.01,
"learning_rate": 2.6206869315335296e-05,
"loss": 0.237,
"step": 2962
},
{
"epoch": 1.01,
"learning_rate": 2.6192927730016404e-05,
"loss": 0.216,
"step": 2963
},
{
"epoch": 1.01,
"learning_rate": 2.6178985772854142e-05,
"loss": 0.2545,
"step": 2964
},
{
"epoch": 1.01,
"learning_rate": 2.616504344819431e-05,
"loss": 0.2245,
"step": 2965
},
{
"epoch": 1.01,
"learning_rate": 2.6151100760382823e-05,
"loss": 0.2566,
"step": 2966
},
{
"epoch": 1.01,
"learning_rate": 2.6137157713765713e-05,
"loss": 0.2468,
"step": 2967
},
{
"epoch": 1.01,
"learning_rate": 2.612321431268912e-05,
"loss": 0.2272,
"step": 2968
},
{
"epoch": 1.01,
"learning_rate": 2.6109270561499278e-05,
"loss": 0.2335,
"step": 2969
},
{
"epoch": 1.01,
"learning_rate": 2.6095326464542564e-05,
"loss": 0.2468,
"step": 2970
},
{
"epoch": 1.01,
"learning_rate": 2.6081382026165435e-05,
"loss": 0.2266,
"step": 2971
},
{
"epoch": 1.01,
"learning_rate": 2.606743725071446e-05,
"loss": 0.2636,
"step": 2972
},
{
"epoch": 1.01,
"learning_rate": 2.6053492142536324e-05,
"loss": 0.2435,
"step": 2973
},
{
"epoch": 1.01,
"learning_rate": 2.6039546705977807e-05,
"loss": 0.2223,
"step": 2974
},
{
"epoch": 1.02,
"learning_rate": 2.6025600945385792e-05,
"loss": 0.2497,
"step": 2975
},
{
"epoch": 1.02,
"learning_rate": 2.6011654865107256e-05,
"loss": 0.2497,
"step": 2976
},
{
"epoch": 1.02,
"learning_rate": 2.5997708469489297e-05,
"loss": 0.2239,
"step": 2977
},
{
"epoch": 1.02,
"learning_rate": 2.5983761762879082e-05,
"loss": 0.203,
"step": 2978
},
{
"epoch": 1.02,
"learning_rate": 2.5969814749623906e-05,
"loss": 0.2354,
"step": 2979
},
{
"epoch": 1.02,
"learning_rate": 2.5955867434071134e-05,
"loss": 0.2691,
"step": 2980
},
{
"epoch": 1.02,
"learning_rate": 2.5941919820568245e-05,
"loss": 0.2536,
"step": 2981
},
{
"epoch": 1.02,
"learning_rate": 2.592797191346279e-05,
"loss": 0.2443,
"step": 2982
},
{
"epoch": 1.02,
"learning_rate": 2.5914023717102426e-05,
"loss": 0.2289,
"step": 2983
},
{
"epoch": 1.02,
"learning_rate": 2.5900075235834902e-05,
"loss": 0.2559,
"step": 2984
},
{
"epoch": 1.02,
"learning_rate": 2.5886126474008044e-05,
"loss": 0.2239,
"step": 2985
},
{
"epoch": 1.02,
"learning_rate": 2.587217743596978e-05,
"loss": 0.2363,
"step": 2986
},
{
"epoch": 1.02,
"learning_rate": 2.5858228126068114e-05,
"loss": 0.2383,
"step": 2987
},
{
"epoch": 1.02,
"learning_rate": 2.584427854865113e-05,
"loss": 0.2332,
"step": 2988
},
{
"epoch": 1.02,
"learning_rate": 2.583032870806701e-05,
"loss": 0.2231,
"step": 2989
},
{
"epoch": 1.02,
"learning_rate": 2.5816378608664e-05,
"loss": 0.235,
"step": 2990
},
{
"epoch": 1.02,
"learning_rate": 2.580242825479045e-05,
"loss": 0.204,
"step": 2991
},
{
"epoch": 1.02,
"learning_rate": 2.578847765079477e-05,
"loss": 0.2194,
"step": 2992
},
{
"epoch": 1.02,
"learning_rate": 2.577452680102546e-05,
"loss": 0.2616,
"step": 2993
},
{
"epoch": 1.02,
"learning_rate": 2.5760575709831086e-05,
"loss": 0.215,
"step": 2994
},
{
"epoch": 1.02,
"learning_rate": 2.574662438156029e-05,
"loss": 0.2366,
"step": 2995
},
{
"epoch": 1.02,
"learning_rate": 2.5732672820561804e-05,
"loss": 0.2304,
"step": 2996
},
{
"epoch": 1.02,
"learning_rate": 2.5718721031184405e-05,
"loss": 0.2136,
"step": 2997
},
{
"epoch": 1.02,
"learning_rate": 2.5704769017776965e-05,
"loss": 0.217,
"step": 2998
},
{
"epoch": 1.02,
"learning_rate": 2.5690816784688427e-05,
"loss": 0.2306,
"step": 2999
},
{
"epoch": 1.02,
"learning_rate": 2.567686433626778e-05,
"loss": 0.2238,
"step": 3000
},
{
"epoch": 1.02,
"learning_rate": 2.5662911676864104e-05,
"loss": 0.258,
"step": 3001
},
{
"epoch": 1.02,
"learning_rate": 2.5648958810826524e-05,
"loss": 0.2433,
"step": 3002
},
{
"epoch": 1.02,
"learning_rate": 2.5635005742504238e-05,
"loss": 0.2205,
"step": 3003
},
{
"epoch": 1.02,
"learning_rate": 2.562105247624652e-05,
"loss": 0.2464,
"step": 3004
},
{
"epoch": 1.03,
"learning_rate": 2.560709901640268e-05,
"loss": 0.202,
"step": 3005
},
{
"epoch": 1.03,
"learning_rate": 2.559314536732212e-05,
"loss": 0.2308,
"step": 3006
},
{
"epoch": 1.03,
"learning_rate": 2.5579191533354274e-05,
"loss": 0.2077,
"step": 3007
},
{
"epoch": 1.03,
"learning_rate": 2.5565237518848644e-05,
"loss": 0.2574,
"step": 3008
},
{
"epoch": 1.03,
"learning_rate": 2.5551283328154784e-05,
"loss": 0.2194,
"step": 3009
},
{
"epoch": 1.03,
"learning_rate": 2.5537328965622303e-05,
"loss": 0.2204,
"step": 3010
},
{
"epoch": 1.03,
"learning_rate": 2.552337443560088e-05,
"loss": 0.214,
"step": 3011
},
{
"epoch": 1.03,
"learning_rate": 2.5509419742440227e-05,
"loss": 0.2199,
"step": 3012
},
{
"epoch": 1.03,
"learning_rate": 2.5495464890490113e-05,
"loss": 0.2224,
"step": 3013
},
{
"epoch": 1.03,
"learning_rate": 2.5481509884100357e-05,
"loss": 0.224,
"step": 3014
},
{
"epoch": 1.03,
"learning_rate": 2.5467554727620825e-05,
"loss": 0.2144,
"step": 3015
},
{
"epoch": 1.03,
"learning_rate": 2.5453599425401424e-05,
"loss": 0.2113,
"step": 3016
},
{
"epoch": 1.03,
"learning_rate": 2.543964398179213e-05,
"loss": 0.2462,
"step": 3017
},
{
"epoch": 1.03,
"learning_rate": 2.5425688401142915e-05,
"loss": 0.2521,
"step": 3018
},
{
"epoch": 1.03,
"learning_rate": 2.5411732687803862e-05,
"loss": 0.2222,
"step": 3019
},
{
"epoch": 1.03,
"learning_rate": 2.5397776846125037e-05,
"loss": 0.255,
"step": 3020
},
{
"epoch": 1.03,
"learning_rate": 2.538382088045657e-05,
"loss": 0.2543,
"step": 3021
},
{
"epoch": 1.03,
"learning_rate": 2.536986479514863e-05,
"loss": 0.2183,
"step": 3022
},
{
"epoch": 1.03,
"learning_rate": 2.5355908594551414e-05,
"loss": 0.2323,
"step": 3023
},
{
"epoch": 1.03,
"learning_rate": 2.5341952283015162e-05,
"loss": 0.2147,
"step": 3024
},
{
"epoch": 1.03,
"learning_rate": 2.5327995864890152e-05,
"loss": 0.2153,
"step": 3025
},
{
"epoch": 1.03,
"learning_rate": 2.5314039344526693e-05,
"loss": 0.2214,
"step": 3026
},
{
"epoch": 1.03,
"learning_rate": 2.5300082726275108e-05,
"loss": 0.2065,
"step": 3027
},
{
"epoch": 1.03,
"learning_rate": 2.5286126014485788e-05,
"loss": 0.2457,
"step": 3028
},
{
"epoch": 1.03,
"learning_rate": 2.5272169213509112e-05,
"loss": 0.2473,
"step": 3029
},
{
"epoch": 1.03,
"learning_rate": 2.525821232769552e-05,
"loss": 0.2508,
"step": 3030
},
{
"epoch": 1.03,
"learning_rate": 2.524425536139546e-05,
"loss": 0.225,
"step": 3031
},
{
"epoch": 1.03,
"learning_rate": 2.52302983189594e-05,
"loss": 0.2038,
"step": 3032
},
{
"epoch": 1.03,
"learning_rate": 2.5216341204737855e-05,
"loss": 0.2413,
"step": 3033
},
{
"epoch": 1.04,
"learning_rate": 2.5202384023081344e-05,
"loss": 0.2182,
"step": 3034
},
{
"epoch": 1.04,
"learning_rate": 2.5188426778340414e-05,
"loss": 0.2234,
"step": 3035
},
{
"epoch": 1.04,
"learning_rate": 2.5174469474865613e-05,
"loss": 0.2098,
"step": 3036
},
{
"epoch": 1.04,
"learning_rate": 2.516051211700755e-05,
"loss": 0.2639,
"step": 3037
},
{
"epoch": 1.04,
"learning_rate": 2.5146554709116808e-05,
"loss": 0.251,
"step": 3038
},
{
"epoch": 1.04,
"learning_rate": 2.5132597255543998e-05,
"loss": 0.2324,
"step": 3039
},
{
"epoch": 1.04,
"learning_rate": 2.511863976063977e-05,
"loss": 0.2237,
"step": 3040
},
{
"epoch": 1.04,
"learning_rate": 2.510468222875475e-05,
"loss": 0.2273,
"step": 3041
},
{
"epoch": 1.04,
"learning_rate": 2.509072466423959e-05,
"loss": 0.2269,
"step": 3042
},
{
"epoch": 1.04,
"learning_rate": 2.5076767071444962e-05,
"loss": 0.2629,
"step": 3043
},
{
"epoch": 1.04,
"learning_rate": 2.506280945472154e-05,
"loss": 0.2287,
"step": 3044
},
{
"epoch": 1.04,
"learning_rate": 2.504885181842e-05,
"loss": 0.2158,
"step": 3045
},
{
"epoch": 1.04,
"learning_rate": 2.5034894166891027e-05,
"loss": 0.2285,
"step": 3046
},
{
"epoch": 1.04,
"learning_rate": 2.5020936504485313e-05,
"loss": 0.2011,
"step": 3047
},
{
"epoch": 1.04,
"learning_rate": 2.5006978835553556e-05,
"loss": 0.2302,
"step": 3048
},
{
"epoch": 1.04,
"learning_rate": 2.499302116444645e-05,
"loss": 0.2686,
"step": 3049
},
{
"epoch": 1.04,
"learning_rate": 2.497906349551469e-05,
"loss": 0.2664,
"step": 3050
},
{
"epoch": 1.04,
"learning_rate": 2.4965105833108982e-05,
"loss": 0.2482,
"step": 3051
},
{
"epoch": 1.04,
"learning_rate": 2.495114818158001e-05,
"loss": 0.2505,
"step": 3052
},
{
"epoch": 1.04,
"learning_rate": 2.4937190545278468e-05,
"loss": 0.2388,
"step": 3053
},
{
"epoch": 1.04,
"learning_rate": 2.492323292855504e-05,
"loss": 0.2442,
"step": 3054
},
{
"epoch": 1.04,
"learning_rate": 2.4909275335760415e-05,
"loss": 0.2052,
"step": 3055
},
{
"epoch": 1.04,
"learning_rate": 2.4895317771245256e-05,
"loss": 0.2212,
"step": 3056
},
{
"epoch": 1.04,
"learning_rate": 2.488136023936023e-05,
"loss": 0.2201,
"step": 3057
},
{
"epoch": 1.04,
"learning_rate": 2.4867402744455998e-05,
"loss": 0.2054,
"step": 3058
},
{
"epoch": 1.04,
"learning_rate": 2.4853445290883194e-05,
"loss": 0.2429,
"step": 3059
},
{
"epoch": 1.04,
"learning_rate": 2.4839487882992456e-05,
"loss": 0.233,
"step": 3060
},
{
"epoch": 1.04,
"learning_rate": 2.482553052513439e-05,
"loss": 0.2279,
"step": 3061
},
{
"epoch": 1.04,
"learning_rate": 2.4811573221659595e-05,
"loss": 0.2291,
"step": 3062
},
{
"epoch": 1.05,
"learning_rate": 2.4797615976918662e-05,
"loss": 0.2008,
"step": 3063
},
{
"epoch": 1.05,
"learning_rate": 2.478365879526215e-05,
"loss": 0.2435,
"step": 3064
},
{
"epoch": 1.05,
"learning_rate": 2.476970168104061e-05,
"loss": 0.235,
"step": 3065
},
{
"epoch": 1.05,
"learning_rate": 2.4755744638604554e-05,
"loss": 0.2256,
"step": 3066
},
{
"epoch": 1.05,
"learning_rate": 2.4741787672304486e-05,
"loss": 0.2273,
"step": 3067
},
{
"epoch": 1.05,
"learning_rate": 2.4727830786490894e-05,
"loss": 0.2341,
"step": 3068
},
{
"epoch": 1.05,
"learning_rate": 2.471387398551422e-05,
"loss": 0.2425,
"step": 3069
},
{
"epoch": 1.05,
"learning_rate": 2.4699917273724887e-05,
"loss": 0.2113,
"step": 3070
},
{
"epoch": 1.05,
"learning_rate": 2.468596065547331e-05,
"loss": 0.2194,
"step": 3071
},
{
"epoch": 1.05,
"learning_rate": 2.467200413510985e-05,
"loss": 0.2506,
"step": 3072
},
{
"epoch": 1.05,
"learning_rate": 2.465804771698484e-05,
"loss": 0.2046,
"step": 3073
},
{
"epoch": 1.05,
"learning_rate": 2.4644091405448592e-05,
"loss": 0.2341,
"step": 3074
},
{
"epoch": 1.05,
"learning_rate": 2.4630135204851373e-05,
"loss": 0.2098,
"step": 3075
},
{
"epoch": 1.05,
"learning_rate": 2.4616179119543433e-05,
"loss": 0.2209,
"step": 3076
},
{
"epoch": 1.05,
"learning_rate": 2.460222315387497e-05,
"loss": 0.2071,
"step": 3077
},
{
"epoch": 1.05,
"learning_rate": 2.4588267312196144e-05,
"loss": 0.2278,
"step": 3078
},
{
"epoch": 1.05,
"learning_rate": 2.4574311598857088e-05,
"loss": 0.2397,
"step": 3079
},
{
"epoch": 1.05,
"learning_rate": 2.456035601820788e-05,
"loss": 0.2639,
"step": 3080
},
{
"epoch": 1.05,
"learning_rate": 2.4546400574598582e-05,
"loss": 0.2122,
"step": 3081
},
{
"epoch": 1.05,
"learning_rate": 2.4532445272379188e-05,
"loss": 0.1979,
"step": 3082
},
{
"epoch": 1.05,
"learning_rate": 2.4518490115899652e-05,
"loss": 0.1969,
"step": 3083
},
{
"epoch": 1.05,
"learning_rate": 2.4504535109509886e-05,
"loss": 0.1964,
"step": 3084
},
{
"epoch": 1.05,
"learning_rate": 2.4490580257559772e-05,
"loss": 0.2453,
"step": 3085
},
{
"epoch": 1.05,
"learning_rate": 2.447662556439912e-05,
"loss": 0.229,
"step": 3086
},
{
"epoch": 1.05,
"learning_rate": 2.44626710343777e-05,
"loss": 0.235,
"step": 3087
},
{
"epoch": 1.05,
"learning_rate": 2.444871667184522e-05,
"loss": 0.2237,
"step": 3088
},
{
"epoch": 1.05,
"learning_rate": 2.4434762481151365e-05,
"loss": 0.2331,
"step": 3089
},
{
"epoch": 1.05,
"learning_rate": 2.442080846664573e-05,
"loss": 0.2312,
"step": 3090
},
{
"epoch": 1.05,
"learning_rate": 2.4406854632677883e-05,
"loss": 0.2135,
"step": 3091
},
{
"epoch": 1.05,
"learning_rate": 2.4392900983597326e-05,
"loss": 0.1992,
"step": 3092
},
{
"epoch": 1.06,
"learning_rate": 2.437894752375349e-05,
"loss": 0.241,
"step": 3093
},
{
"epoch": 1.06,
"learning_rate": 2.4364994257495768e-05,
"loss": 0.1956,
"step": 3094
},
{
"epoch": 1.06,
"learning_rate": 2.435104118917349e-05,
"loss": 0.2035,
"step": 3095
},
{
"epoch": 1.06,
"learning_rate": 2.433708832313591e-05,
"loss": 0.2251,
"step": 3096
},
{
"epoch": 1.06,
"learning_rate": 2.4323135663732226e-05,
"loss": 0.2493,
"step": 3097
},
{
"epoch": 1.06,
"learning_rate": 2.4309183215311575e-05,
"loss": 0.2115,
"step": 3098
},
{
"epoch": 1.06,
"learning_rate": 2.4295230982223034e-05,
"loss": 0.2055,
"step": 3099
},
{
"epoch": 1.06,
"learning_rate": 2.4281278968815598e-05,
"loss": 0.2887,
"step": 3100
},
{
"epoch": 1.06,
"learning_rate": 2.42673271794382e-05,
"loss": 0.2015,
"step": 3101
},
{
"epoch": 1.06,
"learning_rate": 2.4253375618439713e-05,
"loss": 0.2177,
"step": 3102
},
{
"epoch": 1.06,
"learning_rate": 2.4239424290168923e-05,
"loss": 0.2458,
"step": 3103
},
{
"epoch": 1.06,
"learning_rate": 2.4225473198974548e-05,
"loss": 0.214,
"step": 3104
},
{
"epoch": 1.06,
"learning_rate": 2.4211522349205233e-05,
"loss": 0.2249,
"step": 3105
},
{
"epoch": 1.06,
"learning_rate": 2.4197571745209552e-05,
"loss": 0.2382,
"step": 3106
},
{
"epoch": 1.06,
"learning_rate": 2.4183621391336005e-05,
"loss": 0.2161,
"step": 3107
},
{
"epoch": 1.06,
"learning_rate": 2.4169671291933003e-05,
"loss": 0.2138,
"step": 3108
},
{
"epoch": 1.06,
"learning_rate": 2.4155721451348883e-05,
"loss": 0.2393,
"step": 3109
},
{
"epoch": 1.06,
"learning_rate": 2.4141771873931902e-05,
"loss": 0.2391,
"step": 3110
},
{
"epoch": 1.06,
"learning_rate": 2.412782256403022e-05,
"loss": 0.2264,
"step": 3111
},
{
"epoch": 1.06,
"learning_rate": 2.4113873525991955e-05,
"loss": 0.2267,
"step": 3112
},
{
"epoch": 1.06,
"learning_rate": 2.4099924764165104e-05,
"loss": 0.2346,
"step": 3113
},
{
"epoch": 1.06,
"learning_rate": 2.4085976282897573e-05,
"loss": 0.2527,
"step": 3114
},
{
"epoch": 1.06,
"learning_rate": 2.4072028086537212e-05,
"loss": 0.2151,
"step": 3115
},
{
"epoch": 1.06,
"learning_rate": 2.405808017943176e-05,
"loss": 0.2766,
"step": 3116
},
{
"epoch": 1.06,
"learning_rate": 2.4044132565928872e-05,
"loss": 0.2058,
"step": 3117
},
{
"epoch": 1.06,
"learning_rate": 2.40301852503761e-05,
"loss": 0.2063,
"step": 3118
},
{
"epoch": 1.06,
"learning_rate": 2.401623823712092e-05,
"loss": 0.2459,
"step": 3119
},
{
"epoch": 1.06,
"learning_rate": 2.4002291530510712e-05,
"loss": 0.1991,
"step": 3120
},
{
"epoch": 1.06,
"learning_rate": 2.398834513489275e-05,
"loss": 0.2683,
"step": 3121
},
{
"epoch": 1.07,
"learning_rate": 2.397439905461422e-05,
"loss": 0.2189,
"step": 3122
},
{
"epoch": 1.07,
"learning_rate": 2.3960453294022206e-05,
"loss": 0.2135,
"step": 3123
},
{
"epoch": 1.07,
"learning_rate": 2.3946507857463688e-05,
"loss": 0.2375,
"step": 3124
},
{
"epoch": 1.07,
"learning_rate": 2.3932562749285543e-05,
"loss": 0.2304,
"step": 3125
},
{
"epoch": 1.07,
"learning_rate": 2.391861797383457e-05,
"loss": 0.2168,
"step": 3126
},
{
"epoch": 1.07,
"learning_rate": 2.3904673535457435e-05,
"loss": 0.2444,
"step": 3127
},
{
"epoch": 1.07,
"learning_rate": 2.389072943850072e-05,
"loss": 0.2183,
"step": 3128
},
{
"epoch": 1.07,
"learning_rate": 2.387678568731089e-05,
"loss": 0.2293,
"step": 3129
},
{
"epoch": 1.07,
"learning_rate": 2.3862842286234293e-05,
"loss": 0.2695,
"step": 3130
},
{
"epoch": 1.07,
"learning_rate": 2.3848899239617183e-05,
"loss": 0.2071,
"step": 3131
},
{
"epoch": 1.07,
"learning_rate": 2.3834956551805693e-05,
"loss": 0.2313,
"step": 3132
},
{
"epoch": 1.07,
"learning_rate": 2.3821014227145867e-05,
"loss": 0.2132,
"step": 3133
},
{
"epoch": 1.07,
"learning_rate": 2.38070722699836e-05,
"loss": 0.2322,
"step": 3134
},
{
"epoch": 1.07,
"learning_rate": 2.3793130684664707e-05,
"loss": 0.2427,
"step": 3135
},
{
"epoch": 1.07,
"learning_rate": 2.377918947553486e-05,
"loss": 0.2394,
"step": 3136
},
{
"epoch": 1.07,
"learning_rate": 2.3765248646939627e-05,
"loss": 0.21,
"step": 3137
},
{
"epoch": 1.07,
"learning_rate": 2.3751308203224454e-05,
"loss": 0.2251,
"step": 3138
},
{
"epoch": 1.07,
"learning_rate": 2.373736814873467e-05,
"loss": 0.2167,
"step": 3139
},
{
"epoch": 1.07,
"learning_rate": 2.3723428487815484e-05,
"loss": 0.2186,
"step": 3140
},
{
"epoch": 1.07,
"learning_rate": 2.3709489224811977e-05,
"loss": 0.2518,
"step": 3141
},
{
"epoch": 1.07,
"learning_rate": 2.3695550364069113e-05,
"loss": 0.2262,
"step": 3142
},
{
"epoch": 1.07,
"learning_rate": 2.3681611909931724e-05,
"loss": 0.2002,
"step": 3143
},
{
"epoch": 1.07,
"learning_rate": 2.3667673866744512e-05,
"loss": 0.2228,
"step": 3144
},
{
"epoch": 1.07,
"learning_rate": 2.3653736238852055e-05,
"loss": 0.2331,
"step": 3145
},
{
"epoch": 1.07,
"learning_rate": 2.363979903059881e-05,
"loss": 0.2416,
"step": 3146
},
{
"epoch": 1.07,
"learning_rate": 2.3625862246329093e-05,
"loss": 0.2419,
"step": 3147
},
{
"epoch": 1.07,
"learning_rate": 2.3611925890387087e-05,
"loss": 0.2534,
"step": 3148
},
{
"epoch": 1.07,
"learning_rate": 2.3597989967116856e-05,
"loss": 0.2229,
"step": 3149
},
{
"epoch": 1.07,
"learning_rate": 2.3584054480862303e-05,
"loss": 0.1696,
"step": 3150
},
{
"epoch": 1.08,
"learning_rate": 2.3570119435967216e-05,
"loss": 0.2292,
"step": 3151
},
{
"epoch": 1.08,
"learning_rate": 2.3556184836775246e-05,
"loss": 0.2532,
"step": 3152
},
{
"epoch": 1.08,
"learning_rate": 2.3542250687629884e-05,
"loss": 0.2261,
"step": 3153
},
{
"epoch": 1.08,
"learning_rate": 2.3528316992874518e-05,
"loss": 0.2432,
"step": 3154
},
{
"epoch": 1.08,
"learning_rate": 2.3514383756852354e-05,
"loss": 0.2159,
"step": 3155
},
{
"epoch": 1.08,
"learning_rate": 2.350045098390648e-05,
"loss": 0.2172,
"step": 3156
},
{
"epoch": 1.08,
"learning_rate": 2.3486518678379833e-05,
"loss": 0.2186,
"step": 3157
},
{
"epoch": 1.08,
"learning_rate": 2.347258684461519e-05,
"loss": 0.2395,
"step": 3158
},
{
"epoch": 1.08,
"learning_rate": 2.3458655486955213e-05,
"loss": 0.2081,
"step": 3159
},
{
"epoch": 1.08,
"learning_rate": 2.344472460974239e-05,
"loss": 0.2172,
"step": 3160
},
{
"epoch": 1.08,
"learning_rate": 2.3430794217319065e-05,
"loss": 0.2207,
"step": 3161
},
{
"epoch": 1.08,
"learning_rate": 2.3416864314027434e-05,
"loss": 0.2137,
"step": 3162
},
{
"epoch": 1.08,
"learning_rate": 2.3402934904209537e-05,
"loss": 0.2131,
"step": 3163
},
{
"epoch": 1.08,
"learning_rate": 2.338900599220726e-05,
"loss": 0.1979,
"step": 3164
},
{
"epoch": 1.08,
"learning_rate": 2.337507758236234e-05,
"loss": 0.2042,
"step": 3165
},
{
"epoch": 1.08,
"learning_rate": 2.3361149679016352e-05,
"loss": 0.2411,
"step": 3166
},
{
"epoch": 1.08,
"learning_rate": 2.3347222286510706e-05,
"loss": 0.2176,
"step": 3167
},
{
"epoch": 1.08,
"learning_rate": 2.333329540918668e-05,
"loss": 0.214,
"step": 3168
},
{
"epoch": 1.08,
"learning_rate": 2.3319369051385363e-05,
"loss": 0.2365,
"step": 3169
},
{
"epoch": 1.08,
"learning_rate": 2.3305443217447693e-05,
"loss": 0.2464,
"step": 3170
},
{
"epoch": 1.08,
"learning_rate": 2.3291517911714435e-05,
"loss": 0.2395,
"step": 3171
},
{
"epoch": 1.08,
"learning_rate": 2.3277593138526207e-05,
"loss": 0.2244,
"step": 3172
},
{
"epoch": 1.08,
"learning_rate": 2.326366890222345e-05,
"loss": 0.2375,
"step": 3173
},
{
"epoch": 1.08,
"learning_rate": 2.3249745207146442e-05,
"loss": 0.2305,
"step": 3174
},
{
"epoch": 1.08,
"learning_rate": 2.3235822057635285e-05,
"loss": 0.212,
"step": 3175
},
{
"epoch": 1.08,
"learning_rate": 2.3221899458029918e-05,
"loss": 0.2143,
"step": 3176
},
{
"epoch": 1.08,
"learning_rate": 2.3207977412670094e-05,
"loss": 0.208,
"step": 3177
},
{
"epoch": 1.08,
"learning_rate": 2.3194055925895427e-05,
"loss": 0.2498,
"step": 3178
},
{
"epoch": 1.08,
"learning_rate": 2.3180135002045325e-05,
"loss": 0.1996,
"step": 3179
},
{
"epoch": 1.08,
"learning_rate": 2.3166214645459028e-05,
"loss": 0.2209,
"step": 3180
},
{
"epoch": 1.09,
"learning_rate": 2.3152294860475596e-05,
"loss": 0.2934,
"step": 3181
},
{
"epoch": 1.09,
"learning_rate": 2.3138375651433934e-05,
"loss": 0.2476,
"step": 3182
},
{
"epoch": 1.09,
"learning_rate": 2.312445702267275e-05,
"loss": 0.204,
"step": 3183
},
{
"epoch": 1.09,
"learning_rate": 2.3110538978530552e-05,
"loss": 0.2157,
"step": 3184
},
{
"epoch": 1.09,
"learning_rate": 2.3096621523345702e-05,
"loss": 0.2318,
"step": 3185
},
{
"epoch": 1.09,
"learning_rate": 2.308270466145636e-05,
"loss": 0.2313,
"step": 3186
},
{
"epoch": 1.09,
"learning_rate": 2.3068788397200497e-05,
"loss": 0.2078,
"step": 3187
},
{
"epoch": 1.09,
"learning_rate": 2.3054872734915907e-05,
"loss": 0.2437,
"step": 3188
},
{
"epoch": 1.09,
"learning_rate": 2.304095767894019e-05,
"loss": 0.2099,
"step": 3189
},
{
"epoch": 1.09,
"learning_rate": 2.302704323361076e-05,
"loss": 0.2429,
"step": 3190
},
{
"epoch": 1.09,
"learning_rate": 2.3013129403264845e-05,
"loss": 0.227,
"step": 3191
},
{
"epoch": 1.09,
"learning_rate": 2.2999216192239476e-05,
"loss": 0.2093,
"step": 3192
},
{
"epoch": 1.09,
"learning_rate": 2.298530360487149e-05,
"loss": 0.2311,
"step": 3193
},
{
"epoch": 1.09,
"learning_rate": 2.2971391645497528e-05,
"loss": 0.2285,
"step": 3194
},
{
"epoch": 1.09,
"learning_rate": 2.2957480318454034e-05,
"loss": 0.2172,
"step": 3195
},
{
"epoch": 1.09,
"learning_rate": 2.2943569628077275e-05,
"loss": 0.2261,
"step": 3196
},
{
"epoch": 1.09,
"learning_rate": 2.292965957870329e-05,
"loss": 0.2469,
"step": 3197
},
{
"epoch": 1.09,
"learning_rate": 2.291575017466795e-05,
"loss": 0.2248,
"step": 3198
},
{
"epoch": 1.09,
"learning_rate": 2.290184142030689e-05,
"loss": 0.2335,
"step": 3199
},
{
"epoch": 1.09,
"learning_rate": 2.2887933319955568e-05,
"loss": 0.1989,
"step": 3200
},
{
"epoch": 1.09,
"learning_rate": 2.2874025877949226e-05,
"loss": 0.2347,
"step": 3201
},
{
"epoch": 1.09,
"learning_rate": 2.2860119098622906e-05,
"loss": 0.2099,
"step": 3202
},
{
"epoch": 1.09,
"learning_rate": 2.2846212986311436e-05,
"loss": 0.2109,
"step": 3203
},
{
"epoch": 1.09,
"learning_rate": 2.283230754534945e-05,
"loss": 0.2467,
"step": 3204
},
{
"epoch": 1.09,
"learning_rate": 2.2818402780071368e-05,
"loss": 0.2084,
"step": 3205
},
{
"epoch": 1.09,
"learning_rate": 2.2804498694811385e-05,
"loss": 0.2251,
"step": 3206
},
{
"epoch": 1.09,
"learning_rate": 2.2790595293903504e-05,
"loss": 0.2164,
"step": 3207
},
{
"epoch": 1.09,
"learning_rate": 2.2776692581681492e-05,
"loss": 0.225,
"step": 3208
},
{
"epoch": 1.09,
"learning_rate": 2.2762790562478935e-05,
"loss": 0.211,
"step": 3209
},
{
"epoch": 1.1,
"learning_rate": 2.2748889240629167e-05,
"loss": 0.2281,
"step": 3210
},
{
"epoch": 1.1,
"learning_rate": 2.2734988620465338e-05,
"loss": 0.2073,
"step": 3211
},
{
"epoch": 1.1,
"learning_rate": 2.2721088706320347e-05,
"loss": 0.2494,
"step": 3212
},
{
"epoch": 1.1,
"learning_rate": 2.2707189502526895e-05,
"loss": 0.207,
"step": 3213
},
{
"epoch": 1.1,
"learning_rate": 2.2693291013417453e-05,
"loss": 0.2163,
"step": 3214
},
{
"epoch": 1.1,
"learning_rate": 2.267939324332427e-05,
"loss": 0.223,
"step": 3215
},
{
"epoch": 1.1,
"learning_rate": 2.2665496196579367e-05,
"loss": 0.2518,
"step": 3216
},
{
"epoch": 1.1,
"learning_rate": 2.2651599877514555e-05,
"loss": 0.2127,
"step": 3217
},
{
"epoch": 1.1,
"learning_rate": 2.26377042904614e-05,
"loss": 0.2366,
"step": 3218
},
{
"epoch": 1.1,
"learning_rate": 2.2623809439751252e-05,
"loss": 0.243,
"step": 3219
},
{
"epoch": 1.1,
"learning_rate": 2.260991532971522e-05,
"loss": 0.2417,
"step": 3220
},
{
"epoch": 1.1,
"learning_rate": 2.259602196468419e-05,
"loss": 0.2262,
"step": 3221
},
{
"epoch": 1.1,
"learning_rate": 2.2582129348988817e-05,
"loss": 0.2212,
"step": 3222
},
{
"epoch": 1.1,
"learning_rate": 2.256823748695952e-05,
"loss": 0.1948,
"step": 3223
},
{
"epoch": 1.1,
"learning_rate": 2.255434638292649e-05,
"loss": 0.2336,
"step": 3224
},
{
"epoch": 1.1,
"learning_rate": 2.2540456041219668e-05,
"loss": 0.2305,
"step": 3225
},
{
"epoch": 1.1,
"learning_rate": 2.2526566466168763e-05,
"loss": 0.2132,
"step": 3226
},
{
"epoch": 1.1,
"learning_rate": 2.251267766210325e-05,
"loss": 0.2159,
"step": 3227
},
{
"epoch": 1.1,
"learning_rate": 2.2498789633352358e-05,
"loss": 0.274,
"step": 3228
},
{
"epoch": 1.1,
"learning_rate": 2.248490238424507e-05,
"loss": 0.1872,
"step": 3229
},
{
"epoch": 1.1,
"learning_rate": 2.2471015919110142e-05,
"loss": 0.219,
"step": 3230
},
{
"epoch": 1.1,
"learning_rate": 2.2457130242276073e-05,
"loss": 0.2415,
"step": 3231
},
{
"epoch": 1.1,
"learning_rate": 2.244324535807112e-05,
"loss": 0.247,
"step": 3232
},
{
"epoch": 1.1,
"learning_rate": 2.2429361270823287e-05,
"loss": 0.2555,
"step": 3233
},
{
"epoch": 1.1,
"learning_rate": 2.2415477984860335e-05,
"loss": 0.2128,
"step": 3234
},
{
"epoch": 1.1,
"learning_rate": 2.2401595504509783e-05,
"loss": 0.2667,
"step": 3235
},
{
"epoch": 1.1,
"learning_rate": 2.2387713834098878e-05,
"loss": 0.2409,
"step": 3236
},
{
"epoch": 1.1,
"learning_rate": 2.2373832977954646e-05,
"loss": 0.2449,
"step": 3237
},
{
"epoch": 1.1,
"learning_rate": 2.2359952940403827e-05,
"loss": 0.205,
"step": 3238
},
{
"epoch": 1.11,
"learning_rate": 2.2346073725772922e-05,
"loss": 0.2129,
"step": 3239
},
{
"epoch": 1.11,
"learning_rate": 2.2332195338388177e-05,
"loss": 0.228,
"step": 3240
},
{
"epoch": 1.11,
"learning_rate": 2.231831778257557e-05,
"loss": 0.2148,
"step": 3241
},
{
"epoch": 1.11,
"learning_rate": 2.2304441062660822e-05,
"loss": 0.2073,
"step": 3242
},
{
"epoch": 1.11,
"learning_rate": 2.2290565182969407e-05,
"loss": 0.2371,
"step": 3243
},
{
"epoch": 1.11,
"learning_rate": 2.2276690147826527e-05,
"loss": 0.226,
"step": 3244
},
{
"epoch": 1.11,
"learning_rate": 2.2262815961557115e-05,
"loss": 0.2138,
"step": 3245
},
{
"epoch": 1.11,
"learning_rate": 2.224894262848585e-05,
"loss": 0.2122,
"step": 3246
},
{
"epoch": 1.11,
"learning_rate": 2.2235070152937133e-05,
"loss": 0.2357,
"step": 3247
},
{
"epoch": 1.11,
"learning_rate": 2.2221198539235114e-05,
"loss": 0.2174,
"step": 3248
},
{
"epoch": 1.11,
"learning_rate": 2.2207327791703665e-05,
"loss": 0.2373,
"step": 3249
},
{
"epoch": 1.11,
"learning_rate": 2.2193457914666372e-05,
"loss": 0.2309,
"step": 3250
},
{
"epoch": 1.11,
"learning_rate": 2.2179588912446595e-05,
"loss": 0.2207,
"step": 3251
},
{
"epoch": 1.11,
"learning_rate": 2.2165720789367376e-05,
"loss": 0.1978,
"step": 3252
},
{
"epoch": 1.11,
"learning_rate": 2.2151853549751503e-05,
"loss": 0.216,
"step": 3253
},
{
"epoch": 1.11,
"learning_rate": 2.213798719792148e-05,
"loss": 0.216,
"step": 3254
},
{
"epoch": 1.11,
"learning_rate": 2.2124121738199542e-05,
"loss": 0.21,
"step": 3255
},
{
"epoch": 1.11,
"learning_rate": 2.2110257174907648e-05,
"loss": 0.2123,
"step": 3256
},
{
"epoch": 1.11,
"learning_rate": 2.209639351236747e-05,
"loss": 0.2534,
"step": 3257
},
{
"epoch": 1.11,
"learning_rate": 2.20825307549004e-05,
"loss": 0.1969,
"step": 3258
},
{
"epoch": 1.11,
"learning_rate": 2.2068668906827554e-05,
"loss": 0.2198,
"step": 3259
},
{
"epoch": 1.11,
"learning_rate": 2.2054807972469755e-05,
"loss": 0.2066,
"step": 3260
},
{
"epoch": 1.11,
"learning_rate": 2.204094795614755e-05,
"loss": 0.2182,
"step": 3261
},
{
"epoch": 1.11,
"learning_rate": 2.20270888621812e-05,
"loss": 0.2077,
"step": 3262
},
{
"epoch": 1.11,
"learning_rate": 2.201323069489067e-05,
"loss": 0.2384,
"step": 3263
},
{
"epoch": 1.11,
"learning_rate": 2.1999373458595636e-05,
"loss": 0.228,
"step": 3264
},
{
"epoch": 1.11,
"learning_rate": 2.1985517157615503e-05,
"loss": 0.2119,
"step": 3265
},
{
"epoch": 1.11,
"learning_rate": 2.1971661796269367e-05,
"loss": 0.2369,
"step": 3266
},
{
"epoch": 1.11,
"learning_rate": 2.195780737887603e-05,
"loss": 0.2239,
"step": 3267
},
{
"epoch": 1.11,
"learning_rate": 2.1943953909754e-05,
"loss": 0.223,
"step": 3268
},
{
"epoch": 1.12,
"learning_rate": 2.19301013932215e-05,
"loss": 0.2242,
"step": 3269
},
{
"epoch": 1.12,
"learning_rate": 2.1916249833596456e-05,
"loss": 0.2247,
"step": 3270
},
{
"epoch": 1.12,
"learning_rate": 2.190239923519648e-05,
"loss": 0.2517,
"step": 3271
},
{
"epoch": 1.12,
"learning_rate": 2.1888549602338898e-05,
"loss": 0.252,
"step": 3272
},
{
"epoch": 1.12,
"learning_rate": 2.1874700939340727e-05,
"loss": 0.1928,
"step": 3273
},
{
"epoch": 1.12,
"learning_rate": 2.1860853250518692e-05,
"loss": 0.1987,
"step": 3274
},
{
"epoch": 1.12,
"learning_rate": 2.1847006540189203e-05,
"loss": 0.2064,
"step": 3275
},
{
"epoch": 1.12,
"learning_rate": 2.1833160812668377e-05,
"loss": 0.2042,
"step": 3276
},
{
"epoch": 1.12,
"learning_rate": 2.181931607227201e-05,
"loss": 0.2213,
"step": 3277
},
{
"epoch": 1.12,
"learning_rate": 2.180547232331559e-05,
"loss": 0.1994,
"step": 3278
},
{
"epoch": 1.12,
"learning_rate": 2.1791629570114325e-05,
"loss": 0.1944,
"step": 3279
},
{
"epoch": 1.12,
"learning_rate": 2.177778781698308e-05,
"loss": 0.2393,
"step": 3280
},
{
"epoch": 1.12,
"learning_rate": 2.1763947068236417e-05,
"loss": 0.2139,
"step": 3281
},
{
"epoch": 1.12,
"learning_rate": 2.17501073281886e-05,
"loss": 0.2365,
"step": 3282
},
{
"epoch": 1.12,
"learning_rate": 2.1736268601153553e-05,
"loss": 0.2351,
"step": 3283
},
{
"epoch": 1.12,
"learning_rate": 2.1722430891444907e-05,
"loss": 0.2105,
"step": 3284
},
{
"epoch": 1.12,
"learning_rate": 2.170859420337596e-05,
"loss": 0.2137,
"step": 3285
},
{
"epoch": 1.12,
"learning_rate": 2.1694758541259697e-05,
"loss": 0.2164,
"step": 3286
},
{
"epoch": 1.12,
"learning_rate": 2.1680923909408793e-05,
"loss": 0.2316,
"step": 3287
},
{
"epoch": 1.12,
"learning_rate": 2.1667090312135585e-05,
"loss": 0.2268,
"step": 3288
},
{
"epoch": 1.12,
"learning_rate": 2.1653257753752103e-05,
"loss": 0.2215,
"step": 3289
},
{
"epoch": 1.12,
"learning_rate": 2.1639426238570037e-05,
"loss": 0.203,
"step": 3290
},
{
"epoch": 1.12,
"learning_rate": 2.1625595770900763e-05,
"loss": 0.2339,
"step": 3291
},
{
"epoch": 1.12,
"learning_rate": 2.161176635505533e-05,
"loss": 0.2353,
"step": 3292
},
{
"epoch": 1.12,
"learning_rate": 2.1597937995344462e-05,
"loss": 0.2056,
"step": 3293
},
{
"epoch": 1.12,
"learning_rate": 2.158411069607854e-05,
"loss": 0.2527,
"step": 3294
},
{
"epoch": 1.12,
"learning_rate": 2.1570284461567632e-05,
"loss": 0.2242,
"step": 3295
},
{
"epoch": 1.12,
"learning_rate": 2.155645929612146e-05,
"loss": 0.2372,
"step": 3296
},
{
"epoch": 1.12,
"learning_rate": 2.1542635204049422e-05,
"loss": 0.2397,
"step": 3297
},
{
"epoch": 1.13,
"learning_rate": 2.1528812189660573e-05,
"loss": 0.2163,
"step": 3298
},
{
"epoch": 1.13,
"learning_rate": 2.1514990257263638e-05,
"loss": 0.202,
"step": 3299
},
{
"epoch": 1.13,
"learning_rate": 2.1501169411167006e-05,
"loss": 0.2449,
"step": 3300
},
{
"epoch": 1.13,
"learning_rate": 2.148734965567873e-05,
"loss": 0.2393,
"step": 3301
},
{
"epoch": 1.13,
"learning_rate": 2.147353099510651e-05,
"loss": 0.2149,
"step": 3302
},
{
"epoch": 1.13,
"learning_rate": 2.145971343375771e-05,
"loss": 0.2114,
"step": 3303
},
{
"epoch": 1.13,
"learning_rate": 2.144589697593936e-05,
"loss": 0.2222,
"step": 3304
},
{
"epoch": 1.13,
"learning_rate": 2.143208162595814e-05,
"loss": 0.2163,
"step": 3305
},
{
"epoch": 1.13,
"learning_rate": 2.1418267388120384e-05,
"loss": 0.2358,
"step": 3306
},
{
"epoch": 1.13,
"learning_rate": 2.1404454266732077e-05,
"loss": 0.2153,
"step": 3307
},
{
"epoch": 1.13,
"learning_rate": 2.139064226609887e-05,
"loss": 0.2364,
"step": 3308
},
{
"epoch": 1.13,
"learning_rate": 2.1376831390526045e-05,
"loss": 0.2042,
"step": 3309
},
{
"epoch": 1.13,
"learning_rate": 2.136302164431854e-05,
"loss": 0.2125,
"step": 3310
},
{
"epoch": 1.13,
"learning_rate": 2.1349213031780955e-05,
"loss": 0.2504,
"step": 3311
},
{
"epoch": 1.13,
"learning_rate": 2.1335405557217507e-05,
"loss": 0.1996,
"step": 3312
},
{
"epoch": 1.13,
"learning_rate": 2.1321599224932086e-05,
"loss": 0.225,
"step": 3313
},
{
"epoch": 1.13,
"learning_rate": 2.130779403922822e-05,
"loss": 0.1989,
"step": 3314
},
{
"epoch": 1.13,
"learning_rate": 2.129399000440907e-05,
"loss": 0.2364,
"step": 3315
},
{
"epoch": 1.13,
"learning_rate": 2.1280187124777444e-05,
"loss": 0.204,
"step": 3316
},
{
"epoch": 1.13,
"learning_rate": 2.126638540463579e-05,
"loss": 0.2365,
"step": 3317
},
{
"epoch": 1.13,
"learning_rate": 2.1252584848286193e-05,
"loss": 0.2247,
"step": 3318
},
{
"epoch": 1.13,
"learning_rate": 2.1238785460030376e-05,
"loss": 0.2789,
"step": 3319
},
{
"epoch": 1.13,
"learning_rate": 2.12249872441697e-05,
"loss": 0.2158,
"step": 3320
},
{
"epoch": 1.13,
"learning_rate": 2.1211190205005165e-05,
"loss": 0.2411,
"step": 3321
},
{
"epoch": 1.13,
"learning_rate": 2.1197394346837395e-05,
"loss": 0.2177,
"step": 3322
},
{
"epoch": 1.13,
"learning_rate": 2.118359967396665e-05,
"loss": 0.2247,
"step": 3323
},
{
"epoch": 1.13,
"learning_rate": 2.116980619069281e-05,
"loss": 0.2088,
"step": 3324
},
{
"epoch": 1.13,
"learning_rate": 2.1156013901315397e-05,
"loss": 0.2397,
"step": 3325
},
{
"epoch": 1.13,
"learning_rate": 2.1142222810133565e-05,
"loss": 0.205,
"step": 3326
},
{
"epoch": 1.14,
"learning_rate": 2.112843292144609e-05,
"loss": 0.1994,
"step": 3327
},
{
"epoch": 1.14,
"learning_rate": 2.111464423955136e-05,
"loss": 0.2172,
"step": 3328
},
{
"epoch": 1.14,
"learning_rate": 2.11008567687474e-05,
"loss": 0.2075,
"step": 3329
},
{
"epoch": 1.14,
"learning_rate": 2.1087070513331854e-05,
"loss": 0.1855,
"step": 3330
},
{
"epoch": 1.14,
"learning_rate": 2.107328547760199e-05,
"loss": 0.2346,
"step": 3331
},
{
"epoch": 1.14,
"learning_rate": 2.1059501665854697e-05,
"loss": 0.2203,
"step": 3332
},
{
"epoch": 1.14,
"learning_rate": 2.1045719082386463e-05,
"loss": 0.2013,
"step": 3333
},
{
"epoch": 1.14,
"learning_rate": 2.1031937731493434e-05,
"loss": 0.2256,
"step": 3334
},
{
"epoch": 1.14,
"learning_rate": 2.1018157617471335e-05,
"loss": 0.2203,
"step": 3335
},
{
"epoch": 1.14,
"learning_rate": 2.1004378744615515e-05,
"loss": 0.2073,
"step": 3336
},
{
"epoch": 1.14,
"learning_rate": 2.0990601117220942e-05,
"loss": 0.2363,
"step": 3337
},
{
"epoch": 1.14,
"learning_rate": 2.097682473958219e-05,
"loss": 0.1997,
"step": 3338
},
{
"epoch": 1.14,
"learning_rate": 2.096304961599345e-05,
"loss": 0.2461,
"step": 3339
},
{
"epoch": 1.14,
"learning_rate": 2.0949275750748514e-05,
"loss": 0.2289,
"step": 3340
},
{
"epoch": 1.14,
"learning_rate": 2.0935503148140792e-05,
"loss": 0.2196,
"step": 3341
},
{
"epoch": 1.14,
"learning_rate": 2.0921731812463284e-05,
"loss": 0.2463,
"step": 3342
},
{
"epoch": 1.14,
"learning_rate": 2.0907961748008613e-05,
"loss": 0.2169,
"step": 3343
},
{
"epoch": 1.14,
"learning_rate": 2.0894192959068994e-05,
"loss": 0.2209,
"step": 3344
},
{
"epoch": 1.14,
"learning_rate": 2.0880425449936254e-05,
"loss": 0.2247,
"step": 3345
},
{
"epoch": 1.14,
"learning_rate": 2.0866659224901812e-05,
"loss": 0.2255,
"step": 3346
},
{
"epoch": 1.14,
"learning_rate": 2.0852894288256674e-05,
"loss": 0.2455,
"step": 3347
},
{
"epoch": 1.14,
"learning_rate": 2.0839130644291492e-05,
"loss": 0.2313,
"step": 3348
},
{
"epoch": 1.14,
"learning_rate": 2.082536829729647e-05,
"loss": 0.1746,
"step": 3349
},
{
"epoch": 1.14,
"learning_rate": 2.0811607251561418e-05,
"loss": 0.2206,
"step": 3350
},
{
"epoch": 1.14,
"learning_rate": 2.079784751137574e-05,
"loss": 0.2014,
"step": 3351
},
{
"epoch": 1.14,
"learning_rate": 2.078408908102845e-05,
"loss": 0.2044,
"step": 3352
},
{
"epoch": 1.14,
"learning_rate": 2.0770331964808133e-05,
"loss": 0.2335,
"step": 3353
},
{
"epoch": 1.14,
"learning_rate": 2.075657616700297e-05,
"loss": 0.2197,
"step": 3354
},
{
"epoch": 1.14,
"learning_rate": 2.0742821691900738e-05,
"loss": 0.2029,
"step": 3355
},
{
"epoch": 1.15,
"learning_rate": 2.0729068543788786e-05,
"loss": 0.2486,
"step": 3356
},
{
"epoch": 1.15,
"learning_rate": 2.0715316726954075e-05,
"loss": 0.2481,
"step": 3357
},
{
"epoch": 1.15,
"learning_rate": 2.0701566245683128e-05,
"loss": 0.1967,
"step": 3358
},
{
"epoch": 1.15,
"learning_rate": 2.0687817104262063e-05,
"loss": 0.2223,
"step": 3359
},
{
"epoch": 1.15,
"learning_rate": 2.067406930697658e-05,
"loss": 0.173,
"step": 3360
},
{
"epoch": 1.15,
"learning_rate": 2.0660322858111934e-05,
"loss": 0.1889,
"step": 3361
},
{
"epoch": 1.15,
"learning_rate": 2.0646577761953014e-05,
"loss": 0.2334,
"step": 3362
},
{
"epoch": 1.15,
"learning_rate": 2.0632834022784247e-05,
"loss": 0.2502,
"step": 3363
},
{
"epoch": 1.15,
"learning_rate": 2.0619091644889635e-05,
"loss": 0.2216,
"step": 3364
},
{
"epoch": 1.15,
"learning_rate": 2.0605350632552784e-05,
"loss": 0.2329,
"step": 3365
},
{
"epoch": 1.15,
"learning_rate": 2.059161099005684e-05,
"loss": 0.2112,
"step": 3366
},
{
"epoch": 1.15,
"learning_rate": 2.0577872721684558e-05,
"loss": 0.2192,
"step": 3367
},
{
"epoch": 1.15,
"learning_rate": 2.056413583171823e-05,
"loss": 0.2105,
"step": 3368
},
{
"epoch": 1.15,
"learning_rate": 2.0550400324439733e-05,
"loss": 0.2135,
"step": 3369
},
{
"epoch": 1.15,
"learning_rate": 2.0536666204130532e-05,
"loss": 0.2349,
"step": 3370
},
{
"epoch": 1.15,
"learning_rate": 2.0522933475071625e-05,
"loss": 0.2078,
"step": 3371
},
{
"epoch": 1.15,
"learning_rate": 2.0509202141543606e-05,
"loss": 0.2168,
"step": 3372
},
{
"epoch": 1.15,
"learning_rate": 2.0495472207826612e-05,
"loss": 0.2435,
"step": 3373
},
{
"epoch": 1.15,
"learning_rate": 2.0481743678200353e-05,
"loss": 0.2402,
"step": 3374
},
{
"epoch": 1.15,
"learning_rate": 2.04680165569441e-05,
"loss": 0.2286,
"step": 3375
},
{
"epoch": 1.15,
"learning_rate": 2.0454290848336698e-05,
"loss": 0.2378,
"step": 3376
},
{
"epoch": 1.15,
"learning_rate": 2.044056655665653e-05,
"loss": 0.2307,
"step": 3377
},
{
"epoch": 1.15,
"learning_rate": 2.0426843686181553e-05,
"loss": 0.2176,
"step": 3378
},
{
"epoch": 1.15,
"learning_rate": 2.0413122241189274e-05,
"loss": 0.2138,
"step": 3379
},
{
"epoch": 1.15,
"learning_rate": 2.0399402225956754e-05,
"loss": 0.2104,
"step": 3380
},
{
"epoch": 1.15,
"learning_rate": 2.0385683644760613e-05,
"loss": 0.2595,
"step": 3381
},
{
"epoch": 1.15,
"learning_rate": 2.0371966501877017e-05,
"loss": 0.2369,
"step": 3382
},
{
"epoch": 1.15,
"learning_rate": 2.0358250801581703e-05,
"loss": 0.2044,
"step": 3383
},
{
"epoch": 1.15,
"learning_rate": 2.034453654814993e-05,
"loss": 0.207,
"step": 3384
},
{
"epoch": 1.15,
"learning_rate": 2.033082374585653e-05,
"loss": 0.2065,
"step": 3385
},
{
"epoch": 1.16,
"learning_rate": 2.031711239897587e-05,
"loss": 0.2642,
"step": 3386
},
{
"epoch": 1.16,
"learning_rate": 2.0303402511781865e-05,
"loss": 0.2165,
"step": 3387
},
{
"epoch": 1.16,
"learning_rate": 2.028969408854797e-05,
"loss": 0.2126,
"step": 3388
},
{
"epoch": 1.16,
"learning_rate": 2.0275987133547197e-05,
"loss": 0.2108,
"step": 3389
},
{
"epoch": 1.16,
"learning_rate": 2.0262281651052094e-05,
"loss": 0.2191,
"step": 3390
},
{
"epoch": 1.16,
"learning_rate": 2.0248577645334756e-05,
"loss": 0.2082,
"step": 3391
},
{
"epoch": 1.16,
"learning_rate": 2.0234875120666806e-05,
"loss": 0.1964,
"step": 3392
},
{
"epoch": 1.16,
"learning_rate": 2.0221174081319406e-05,
"loss": 0.2211,
"step": 3393
},
{
"epoch": 1.16,
"learning_rate": 2.020747453156326e-05,
"loss": 0.2235,
"step": 3394
},
{
"epoch": 1.16,
"learning_rate": 2.0193776475668605e-05,
"loss": 0.1884,
"step": 3395
},
{
"epoch": 1.16,
"learning_rate": 2.0180079917905223e-05,
"loss": 0.2209,
"step": 3396
},
{
"epoch": 1.16,
"learning_rate": 2.016638486254242e-05,
"loss": 0.1937,
"step": 3397
},
{
"epoch": 1.16,
"learning_rate": 2.0152691313849027e-05,
"loss": 0.2345,
"step": 3398
},
{
"epoch": 1.16,
"learning_rate": 2.0138999276093413e-05,
"loss": 0.1822,
"step": 3399
},
{
"epoch": 1.16,
"learning_rate": 2.0125308753543482e-05,
"loss": 0.2013,
"step": 3400
},
{
"epoch": 1.16,
"learning_rate": 2.0111619750466652e-05,
"loss": 0.2158,
"step": 3401
},
{
"epoch": 1.16,
"learning_rate": 2.0097932271129877e-05,
"loss": 0.2366,
"step": 3402
},
{
"epoch": 1.16,
"learning_rate": 2.0084246319799636e-05,
"loss": 0.1824,
"step": 3403
},
{
"epoch": 1.16,
"learning_rate": 2.007056190074193e-05,
"loss": 0.2494,
"step": 3404
},
{
"epoch": 1.16,
"learning_rate": 2.0056879018222284e-05,
"loss": 0.2374,
"step": 3405
},
{
"epoch": 1.16,
"learning_rate": 2.0043197676505737e-05,
"loss": 0.2099,
"step": 3406
},
{
"epoch": 1.16,
"learning_rate": 2.0029517879856858e-05,
"loss": 0.215,
"step": 3407
},
{
"epoch": 1.16,
"learning_rate": 2.0015839632539715e-05,
"loss": 0.2102,
"step": 3408
},
{
"epoch": 1.16,
"learning_rate": 2.0002162938817924e-05,
"loss": 0.2548,
"step": 3409
},
{
"epoch": 1.16,
"learning_rate": 1.998848780295459e-05,
"loss": 0.2489,
"step": 3410
},
{
"epoch": 1.16,
"learning_rate": 1.997481422921235e-05,
"loss": 0.2428,
"step": 3411
},
{
"epoch": 1.16,
"learning_rate": 1.9961142221853337e-05,
"loss": 0.2188,
"step": 3412
},
{
"epoch": 1.16,
"learning_rate": 1.994747178513921e-05,
"loss": 0.214,
"step": 3413
},
{
"epoch": 1.16,
"learning_rate": 1.9933802923331128e-05,
"loss": 0.2196,
"step": 3414
},
{
"epoch": 1.17,
"learning_rate": 1.9920135640689773e-05,
"loss": 0.2306,
"step": 3415
},
{
"epoch": 1.17,
"learning_rate": 1.9906469941475313e-05,
"loss": 0.2302,
"step": 3416
},
{
"epoch": 1.17,
"learning_rate": 1.989280582994746e-05,
"loss": 0.1938,
"step": 3417
},
{
"epoch": 1.17,
"learning_rate": 1.9879143310365386e-05,
"loss": 0.2185,
"step": 3418
},
{
"epoch": 1.17,
"learning_rate": 1.9865482386987794e-05,
"loss": 0.1971,
"step": 3419
},
{
"epoch": 1.17,
"learning_rate": 1.9851823064072887e-05,
"loss": 0.2384,
"step": 3420
},
{
"epoch": 1.17,
"learning_rate": 1.9838165345878352e-05,
"loss": 0.2445,
"step": 3421
},
{
"epoch": 1.17,
"learning_rate": 1.98245092366614e-05,
"loss": 0.2193,
"step": 3422
},
{
"epoch": 1.17,
"learning_rate": 1.981085474067873e-05,
"loss": 0.2127,
"step": 3423
},
{
"epoch": 1.17,
"learning_rate": 1.9797201862186536e-05,
"loss": 0.236,
"step": 3424
},
{
"epoch": 1.17,
"learning_rate": 1.978355060544051e-05,
"loss": 0.1892,
"step": 3425
},
{
"epoch": 1.17,
"learning_rate": 1.976990097469583e-05,
"loss": 0.2054,
"step": 3426
},
{
"epoch": 1.17,
"learning_rate": 1.9756252974207183e-05,
"loss": 0.2171,
"step": 3427
},
{
"epoch": 1.17,
"learning_rate": 1.9742606608228738e-05,
"loss": 0.226,
"step": 3428
},
{
"epoch": 1.17,
"learning_rate": 1.9728961881014158e-05,
"loss": 0.2384,
"step": 3429
},
{
"epoch": 1.17,
"learning_rate": 1.9715318796816582e-05,
"loss": 0.2084,
"step": 3430
},
{
"epoch": 1.17,
"learning_rate": 1.970167735988867e-05,
"loss": 0.2015,
"step": 3431
},
{
"epoch": 1.17,
"learning_rate": 1.9688037574482532e-05,
"loss": 0.2322,
"step": 3432
},
{
"epoch": 1.17,
"learning_rate": 1.9674399444849784e-05,
"loss": 0.2043,
"step": 3433
},
{
"epoch": 1.17,
"learning_rate": 1.966076297524151e-05,
"loss": 0.2032,
"step": 3434
},
{
"epoch": 1.17,
"learning_rate": 1.9647128169908304e-05,
"loss": 0.2258,
"step": 3435
},
{
"epoch": 1.17,
"learning_rate": 1.9633495033100212e-05,
"loss": 0.2475,
"step": 3436
},
{
"epoch": 1.17,
"learning_rate": 1.9619863569066777e-05,
"loss": 0.2619,
"step": 3437
},
{
"epoch": 1.17,
"learning_rate": 1.9606233782057008e-05,
"loss": 0.1935,
"step": 3438
},
{
"epoch": 1.17,
"learning_rate": 1.959260567631941e-05,
"loss": 0.2355,
"step": 3439
},
{
"epoch": 1.17,
"learning_rate": 1.9578979256101936e-05,
"loss": 0.2409,
"step": 3440
},
{
"epoch": 1.17,
"learning_rate": 1.9565354525652043e-05,
"loss": 0.2235,
"step": 3441
},
{
"epoch": 1.17,
"learning_rate": 1.955173148921665e-05,
"loss": 0.2327,
"step": 3442
},
{
"epoch": 1.17,
"learning_rate": 1.9538110151042142e-05,
"loss": 0.2251,
"step": 3443
},
{
"epoch": 1.18,
"learning_rate": 1.9524490515374363e-05,
"loss": 0.2191,
"step": 3444
},
{
"epoch": 1.18,
"learning_rate": 1.9510872586458674e-05,
"loss": 0.2071,
"step": 3445
},
{
"epoch": 1.18,
"learning_rate": 1.9497256368539858e-05,
"loss": 0.248,
"step": 3446
},
{
"epoch": 1.18,
"learning_rate": 1.9483641865862168e-05,
"loss": 0.2401,
"step": 3447
},
{
"epoch": 1.18,
"learning_rate": 1.947002908266935e-05,
"loss": 0.24,
"step": 3448
},
{
"epoch": 1.18,
"learning_rate": 1.945641802320459e-05,
"loss": 0.2356,
"step": 3449
},
{
"epoch": 1.18,
"learning_rate": 1.944280869171055e-05,
"loss": 0.2203,
"step": 3450
},
{
"epoch": 1.18,
"learning_rate": 1.9429201092429336e-05,
"loss": 0.2345,
"step": 3451
},
{
"epoch": 1.18,
"learning_rate": 1.9415595229602535e-05,
"loss": 0.2126,
"step": 3452
},
{
"epoch": 1.18,
"learning_rate": 1.9401991107471175e-05,
"loss": 0.2489,
"step": 3453
},
{
"epoch": 1.18,
"learning_rate": 1.938838873027576e-05,
"loss": 0.1952,
"step": 3454
},
{
"epoch": 1.18,
"learning_rate": 1.9374788102256237e-05,
"loss": 0.2378,
"step": 3455
},
{
"epoch": 1.18,
"learning_rate": 1.9361189227652004e-05,
"loss": 0.255,
"step": 3456
},
{
"epoch": 1.18,
"learning_rate": 1.934759211070193e-05,
"loss": 0.2181,
"step": 3457
},
{
"epoch": 1.18,
"learning_rate": 1.93339967556443e-05,
"loss": 0.2181,
"step": 3458
},
{
"epoch": 1.18,
"learning_rate": 1.932040316671691e-05,
"loss": 0.2165,
"step": 3459
},
{
"epoch": 1.18,
"learning_rate": 1.930681134815695e-05,
"loss": 0.1885,
"step": 3460
},
{
"epoch": 1.18,
"learning_rate": 1.9293221304201083e-05,
"loss": 0.2354,
"step": 3461
},
{
"epoch": 1.18,
"learning_rate": 1.927963303908542e-05,
"loss": 0.2422,
"step": 3462
},
{
"epoch": 1.18,
"learning_rate": 1.9266046557045503e-05,
"loss": 0.2097,
"step": 3463
},
{
"epoch": 1.18,
"learning_rate": 1.9252461862316328e-05,
"loss": 0.2215,
"step": 3464
},
{
"epoch": 1.18,
"learning_rate": 1.923887895913234e-05,
"loss": 0.2101,
"step": 3465
},
{
"epoch": 1.18,
"learning_rate": 1.922529785172741e-05,
"loss": 0.1893,
"step": 3466
},
{
"epoch": 1.18,
"learning_rate": 1.921171854433486e-05,
"loss": 0.2233,
"step": 3467
},
{
"epoch": 1.18,
"learning_rate": 1.919814104118746e-05,
"loss": 0.2385,
"step": 3468
},
{
"epoch": 1.18,
"learning_rate": 1.918456534651739e-05,
"loss": 0.2419,
"step": 3469
},
{
"epoch": 1.18,
"learning_rate": 1.9170991464556296e-05,
"loss": 0.2191,
"step": 3470
},
{
"epoch": 1.18,
"learning_rate": 1.9157419399535232e-05,
"loss": 0.1935,
"step": 3471
},
{
"epoch": 1.18,
"learning_rate": 1.9143849155684705e-05,
"loss": 0.2043,
"step": 3472
},
{
"epoch": 1.18,
"learning_rate": 1.9130280737234652e-05,
"loss": 0.2287,
"step": 3473
},
{
"epoch": 1.19,
"learning_rate": 1.9116714148414444e-05,
"loss": 0.1927,
"step": 3474
},
{
"epoch": 1.19,
"learning_rate": 1.9103149393452866e-05,
"loss": 0.2146,
"step": 3475
},
{
"epoch": 1.19,
"learning_rate": 1.908958647657814e-05,
"loss": 0.2334,
"step": 3476
},
{
"epoch": 1.19,
"learning_rate": 1.907602540201792e-05,
"loss": 0.26,
"step": 3477
},
{
"epoch": 1.19,
"learning_rate": 1.9062466173999283e-05,
"loss": 0.2331,
"step": 3478
},
{
"epoch": 1.19,
"learning_rate": 1.904890879674872e-05,
"loss": 0.2737,
"step": 3479
},
{
"epoch": 1.19,
"learning_rate": 1.9035353274492162e-05,
"loss": 0.1937,
"step": 3480
},
{
"epoch": 1.19,
"learning_rate": 1.9021799611454953e-05,
"loss": 0.2307,
"step": 3481
},
{
"epoch": 1.19,
"learning_rate": 1.9008247811861856e-05,
"loss": 0.2047,
"step": 3482
},
{
"epoch": 1.19,
"learning_rate": 1.8994697879937058e-05,
"loss": 0.2039,
"step": 3483
},
{
"epoch": 1.19,
"learning_rate": 1.8981149819904153e-05,
"loss": 0.2296,
"step": 3484
},
{
"epoch": 1.19,
"learning_rate": 1.8967603635986173e-05,
"loss": 0.2364,
"step": 3485
},
{
"epoch": 1.19,
"learning_rate": 1.8954059332405534e-05,
"loss": 0.2034,
"step": 3486
},
{
"epoch": 1.19,
"learning_rate": 1.8940516913384104e-05,
"loss": 0.2345,
"step": 3487
},
{
"epoch": 1.19,
"learning_rate": 1.8926976383143137e-05,
"loss": 0.2418,
"step": 3488
},
{
"epoch": 1.19,
"learning_rate": 1.8913437745903307e-05,
"loss": 0.2032,
"step": 3489
},
{
"epoch": 1.19,
"learning_rate": 1.8899901005884685e-05,
"loss": 0.1885,
"step": 3490
},
{
"epoch": 1.19,
"learning_rate": 1.888636616730677e-05,
"loss": 0.2082,
"step": 3491
},
{
"epoch": 1.19,
"learning_rate": 1.887283323438845e-05,
"loss": 0.2332,
"step": 3492
},
{
"epoch": 1.19,
"learning_rate": 1.8859302211348045e-05,
"loss": 0.265,
"step": 3493
},
{
"epoch": 1.19,
"learning_rate": 1.8845773102403252e-05,
"loss": 0.2072,
"step": 3494
},
{
"epoch": 1.19,
"learning_rate": 1.8832245911771186e-05,
"loss": 0.2207,
"step": 3495
},
{
"epoch": 1.19,
"learning_rate": 1.8818720643668358e-05,
"loss": 0.2335,
"step": 3496
},
{
"epoch": 1.19,
"learning_rate": 1.880519730231068e-05,
"loss": 0.2238,
"step": 3497
},
{
"epoch": 1.19,
"learning_rate": 1.8791675891913472e-05,
"loss": 0.2454,
"step": 3498
},
{
"epoch": 1.19,
"learning_rate": 1.8778156416691433e-05,
"loss": 0.2261,
"step": 3499
},
{
"epoch": 1.19,
"learning_rate": 1.8764638880858694e-05,
"loss": 0.2191,
"step": 3500
},
{
"epoch": 1.19,
"learning_rate": 1.8751123288628747e-05,
"loss": 0.2387,
"step": 3501
},
{
"epoch": 1.19,
"learning_rate": 1.8737609644214492e-05,
"loss": 0.23,
"step": 3502
},
{
"epoch": 1.2,
"learning_rate": 1.872409795182822e-05,
"loss": 0.2046,
"step": 3503
},
{
"epoch": 1.2,
"learning_rate": 1.8710588215681613e-05,
"loss": 0.2219,
"step": 3504
},
{
"epoch": 1.2,
"learning_rate": 1.8697080439985738e-05,
"loss": 0.2335,
"step": 3505
},
{
"epoch": 1.2,
"learning_rate": 1.8683574628951068e-05,
"loss": 0.1937,
"step": 3506
},
{
"epoch": 1.2,
"learning_rate": 1.8670070786787448e-05,
"loss": 0.2226,
"step": 3507
},
{
"epoch": 1.2,
"learning_rate": 1.865656891770411e-05,
"loss": 0.2254,
"step": 3508
},
{
"epoch": 1.2,
"learning_rate": 1.864306902590968e-05,
"loss": 0.2048,
"step": 3509
},
{
"epoch": 1.2,
"learning_rate": 1.8629571115612153e-05,
"loss": 0.2349,
"step": 3510
},
{
"epoch": 1.2,
"learning_rate": 1.8616075191018934e-05,
"loss": 0.2195,
"step": 3511
},
{
"epoch": 1.2,
"learning_rate": 1.8602581256336776e-05,
"loss": 0.2348,
"step": 3512
},
{
"epoch": 1.2,
"learning_rate": 1.8589089315771824e-05,
"loss": 0.2333,
"step": 3513
},
{
"epoch": 1.2,
"learning_rate": 1.8575599373529616e-05,
"loss": 0.182,
"step": 3514
},
{
"epoch": 1.2,
"learning_rate": 1.8562111433815056e-05,
"loss": 0.2447,
"step": 3515
},
{
"epoch": 1.2,
"learning_rate": 1.854862550083241e-05,
"loss": 0.2185,
"step": 3516
},
{
"epoch": 1.2,
"learning_rate": 1.8535141578785345e-05,
"loss": 0.2155,
"step": 3517
},
{
"epoch": 1.2,
"learning_rate": 1.8521659671876875e-05,
"loss": 0.2328,
"step": 3518
},
{
"epoch": 1.2,
"learning_rate": 1.8508179784309405e-05,
"loss": 0.2071,
"step": 3519
},
{
"epoch": 1.2,
"learning_rate": 1.8494701920284703e-05,
"loss": 0.2055,
"step": 3520
},
{
"epoch": 1.2,
"learning_rate": 1.848122608400391e-05,
"loss": 0.2082,
"step": 3521
},
{
"epoch": 1.2,
"learning_rate": 1.8467752279667526e-05,
"loss": 0.2516,
"step": 3522
},
{
"epoch": 1.2,
"learning_rate": 1.845428051147543e-05,
"loss": 0.2275,
"step": 3523
},
{
"epoch": 1.2,
"learning_rate": 1.8440810783626855e-05,
"loss": 0.2445,
"step": 3524
},
{
"epoch": 1.2,
"learning_rate": 1.842734310032041e-05,
"loss": 0.2176,
"step": 3525
},
{
"epoch": 1.2,
"learning_rate": 1.8413877465754053e-05,
"loss": 0.2316,
"step": 3526
},
{
"epoch": 1.2,
"learning_rate": 1.8400413884125106e-05,
"loss": 0.2204,
"step": 3527
},
{
"epoch": 1.2,
"learning_rate": 1.838695235963027e-05,
"loss": 0.2013,
"step": 3528
},
{
"epoch": 1.2,
"learning_rate": 1.8373492896465584e-05,
"loss": 0.2265,
"step": 3529
},
{
"epoch": 1.2,
"learning_rate": 1.836003549882645e-05,
"loss": 0.2117,
"step": 3530
},
{
"epoch": 1.2,
"learning_rate": 1.8346580170907617e-05,
"loss": 0.2299,
"step": 3531
},
{
"epoch": 1.21,
"learning_rate": 1.8333126916903215e-05,
"loss": 0.2233,
"step": 3532
},
{
"epoch": 1.21,
"learning_rate": 1.8319675741006704e-05,
"loss": 0.2184,
"step": 3533
},
{
"epoch": 1.21,
"learning_rate": 1.8306226647410902e-05,
"loss": 0.2071,
"step": 3534
},
{
"epoch": 1.21,
"learning_rate": 1.829277964030798e-05,
"loss": 0.2408,
"step": 3535
},
{
"epoch": 1.21,
"learning_rate": 1.8279334723889454e-05,
"loss": 0.2391,
"step": 3536
},
{
"epoch": 1.21,
"learning_rate": 1.82658919023462e-05,
"loss": 0.2126,
"step": 3537
},
{
"epoch": 1.21,
"learning_rate": 1.825245117986843e-05,
"loss": 0.225,
"step": 3538
},
{
"epoch": 1.21,
"learning_rate": 1.8239012560645706e-05,
"loss": 0.2021,
"step": 3539
},
{
"epoch": 1.21,
"learning_rate": 1.822557604886693e-05,
"loss": 0.2437,
"step": 3540
},
{
"epoch": 1.21,
"learning_rate": 1.8212141648720346e-05,
"loss": 0.1965,
"step": 3541
},
{
"epoch": 1.21,
"learning_rate": 1.8198709364393557e-05,
"loss": 0.225,
"step": 3542
},
{
"epoch": 1.21,
"learning_rate": 1.818527920007349e-05,
"loss": 0.2136,
"step": 3543
},
{
"epoch": 1.21,
"learning_rate": 1.8171851159946408e-05,
"loss": 0.2166,
"step": 3544
},
{
"epoch": 1.21,
"learning_rate": 1.815842524819793e-05,
"loss": 0.2764,
"step": 3545
},
{
"epoch": 1.21,
"learning_rate": 1.814500146901299e-05,
"loss": 0.1969,
"step": 3546
},
{
"epoch": 1.21,
"learning_rate": 1.8131579826575872e-05,
"loss": 0.2003,
"step": 3547
},
{
"epoch": 1.21,
"learning_rate": 1.8118160325070194e-05,
"loss": 0.2384,
"step": 3548
},
{
"epoch": 1.21,
"learning_rate": 1.8104742968678887e-05,
"loss": 0.2278,
"step": 3549
},
{
"epoch": 1.21,
"learning_rate": 1.8091327761584244e-05,
"loss": 0.2281,
"step": 3550
},
{
"epoch": 1.21,
"learning_rate": 1.807791470796787e-05,
"loss": 0.2204,
"step": 3551
},
{
"epoch": 1.21,
"learning_rate": 1.80645038120107e-05,
"loss": 0.2073,
"step": 3552
},
{
"epoch": 1.21,
"learning_rate": 1.8051095077892998e-05,
"loss": 0.2269,
"step": 3553
},
{
"epoch": 1.21,
"learning_rate": 1.8037688509794347e-05,
"loss": 0.2198,
"step": 3554
},
{
"epoch": 1.21,
"learning_rate": 1.8024284111893665e-05,
"loss": 0.23,
"step": 3555
},
{
"epoch": 1.21,
"learning_rate": 1.80108818883692e-05,
"loss": 0.2267,
"step": 3556
},
{
"epoch": 1.21,
"learning_rate": 1.79974818433985e-05,
"loss": 0.2142,
"step": 3557
},
{
"epoch": 1.21,
"learning_rate": 1.798408398115846e-05,
"loss": 0.2508,
"step": 3558
},
{
"epoch": 1.21,
"learning_rate": 1.7970688305825272e-05,
"loss": 0.225,
"step": 3559
},
{
"epoch": 1.21,
"learning_rate": 1.7957294821574456e-05,
"loss": 0.2021,
"step": 3560
},
{
"epoch": 1.21,
"learning_rate": 1.7943903532580854e-05,
"loss": 0.2195,
"step": 3561
},
{
"epoch": 1.22,
"learning_rate": 1.7930514443018603e-05,
"loss": 0.2432,
"step": 3562
},
{
"epoch": 1.22,
"learning_rate": 1.7917127557061188e-05,
"loss": 0.2296,
"step": 3563
},
{
"epoch": 1.22,
"learning_rate": 1.7903742878881387e-05,
"loss": 0.2082,
"step": 3564
},
{
"epoch": 1.22,
"learning_rate": 1.789036041265128e-05,
"loss": 0.2289,
"step": 3565
},
{
"epoch": 1.22,
"learning_rate": 1.7876980162542272e-05,
"loss": 0.2318,
"step": 3566
},
{
"epoch": 1.22,
"learning_rate": 1.7863602132725084e-05,
"loss": 0.2017,
"step": 3567
},
{
"epoch": 1.22,
"learning_rate": 1.7850226327369725e-05,
"loss": 0.2247,
"step": 3568
},
{
"epoch": 1.22,
"learning_rate": 1.7836852750645518e-05,
"loss": 0.228,
"step": 3569
},
{
"epoch": 1.22,
"learning_rate": 1.7823481406721106e-05,
"loss": 0.216,
"step": 3570
},
{
"epoch": 1.22,
"learning_rate": 1.7810112299764418e-05,
"loss": 0.2089,
"step": 3571
},
{
"epoch": 1.22,
"learning_rate": 1.7796745433942702e-05,
"loss": 0.2155,
"step": 3572
},
{
"epoch": 1.22,
"learning_rate": 1.7783380813422485e-05,
"loss": 0.2168,
"step": 3573
},
{
"epoch": 1.22,
"learning_rate": 1.777001844236961e-05,
"loss": 0.1927,
"step": 3574
},
{
"epoch": 1.22,
"learning_rate": 1.775665832494921e-05,
"loss": 0.2291,
"step": 3575
},
{
"epoch": 1.22,
"learning_rate": 1.774330046532573e-05,
"loss": 0.1973,
"step": 3576
},
{
"epoch": 1.22,
"learning_rate": 1.77299448676629e-05,
"loss": 0.2042,
"step": 3577
},
{
"epoch": 1.22,
"learning_rate": 1.771659153612375e-05,
"loss": 0.2051,
"step": 3578
},
{
"epoch": 1.22,
"learning_rate": 1.770324047487059e-05,
"loss": 0.2187,
"step": 3579
},
{
"epoch": 1.22,
"learning_rate": 1.768989168806504e-05,
"loss": 0.2038,
"step": 3580
},
{
"epoch": 1.22,
"learning_rate": 1.7676545179867998e-05,
"loss": 0.2423,
"step": 3581
},
{
"epoch": 1.22,
"learning_rate": 1.766320095443967e-05,
"loss": 0.2269,
"step": 3582
},
{
"epoch": 1.22,
"learning_rate": 1.764985901593952e-05,
"loss": 0.2201,
"step": 3583
},
{
"epoch": 1.22,
"learning_rate": 1.7636519368526337e-05,
"loss": 0.2369,
"step": 3584
},
{
"epoch": 1.22,
"learning_rate": 1.762318201635817e-05,
"loss": 0.2246,
"step": 3585
},
{
"epoch": 1.22,
"learning_rate": 1.760984696359236e-05,
"loss": 0.2166,
"step": 3586
},
{
"epoch": 1.22,
"learning_rate": 1.7596514214385518e-05,
"loss": 0.2076,
"step": 3587
},
{
"epoch": 1.22,
"learning_rate": 1.7583183772893558e-05,
"loss": 0.2368,
"step": 3588
},
{
"epoch": 1.22,
"learning_rate": 1.756985564327167e-05,
"loss": 0.2272,
"step": 3589
},
{
"epoch": 1.22,
"learning_rate": 1.7556529829674318e-05,
"loss": 0.2319,
"step": 3590
},
{
"epoch": 1.23,
"learning_rate": 1.7543206336255236e-05,
"loss": 0.2059,
"step": 3591
},
{
"epoch": 1.23,
"learning_rate": 1.7529885167167453e-05,
"loss": 0.1967,
"step": 3592
},
{
"epoch": 1.23,
"learning_rate": 1.751656632656326e-05,
"loss": 0.1975,
"step": 3593
},
{
"epoch": 1.23,
"learning_rate": 1.750324981859422e-05,
"loss": 0.2276,
"step": 3594
},
{
"epoch": 1.23,
"learning_rate": 1.7489935647411194e-05,
"loss": 0.2052,
"step": 3595
},
{
"epoch": 1.23,
"learning_rate": 1.7476623817164266e-05,
"loss": 0.2193,
"step": 3596
},
{
"epoch": 1.23,
"learning_rate": 1.746331433200286e-05,
"loss": 0.2099,
"step": 3597
},
{
"epoch": 1.23,
"learning_rate": 1.74500071960756e-05,
"loss": 0.2385,
"step": 3598
},
{
"epoch": 1.23,
"learning_rate": 1.7436702413530427e-05,
"loss": 0.2291,
"step": 3599
},
{
"epoch": 1.23,
"learning_rate": 1.742339998851451e-05,
"loss": 0.2197,
"step": 3600
},
{
"epoch": 1.23,
"learning_rate": 1.7410099925174307e-05,
"loss": 0.2211,
"step": 3601
},
{
"epoch": 1.23,
"learning_rate": 1.739680222765554e-05,
"loss": 0.2326,
"step": 3602
},
{
"epoch": 1.23,
"learning_rate": 1.7383506900103187e-05,
"loss": 0.2009,
"step": 3603
},
{
"epoch": 1.23,
"learning_rate": 1.737021394666149e-05,
"loss": 0.2151,
"step": 3604
},
{
"epoch": 1.23,
"learning_rate": 1.7356923371473942e-05,
"loss": 0.2273,
"step": 3605
},
{
"epoch": 1.23,
"learning_rate": 1.7343635178683308e-05,
"loss": 0.2216,
"step": 3606
},
{
"epoch": 1.23,
"learning_rate": 1.73303493724316e-05,
"loss": 0.2136,
"step": 3607
},
{
"epoch": 1.23,
"learning_rate": 1.7317065956860098e-05,
"loss": 0.1768,
"step": 3608
},
{
"epoch": 1.23,
"learning_rate": 1.7303784936109326e-05,
"loss": 0.2115,
"step": 3609
},
{
"epoch": 1.23,
"learning_rate": 1.729050631431905e-05,
"loss": 0.2451,
"step": 3610
},
{
"epoch": 1.23,
"learning_rate": 1.727723009562833e-05,
"loss": 0.2003,
"step": 3611
},
{
"epoch": 1.23,
"learning_rate": 1.7263956284175436e-05,
"loss": 0.2059,
"step": 3612
},
{
"epoch": 1.23,
"learning_rate": 1.7250684884097906e-05,
"loss": 0.2283,
"step": 3613
},
{
"epoch": 1.23,
"learning_rate": 1.723741589953251e-05,
"loss": 0.2275,
"step": 3614
},
{
"epoch": 1.23,
"learning_rate": 1.7224149334615294e-05,
"loss": 0.2294,
"step": 3615
},
{
"epoch": 1.23,
"learning_rate": 1.7210885193481523e-05,
"loss": 0.2501,
"step": 3616
},
{
"epoch": 1.23,
"learning_rate": 1.7197623480265722e-05,
"loss": 0.2076,
"step": 3617
},
{
"epoch": 1.23,
"learning_rate": 1.718436419910165e-05,
"loss": 0.195,
"step": 3618
},
{
"epoch": 1.23,
"learning_rate": 1.7171107354122308e-05,
"loss": 0.2197,
"step": 3619
},
{
"epoch": 1.24,
"learning_rate": 1.715785294945994e-05,
"loss": 0.2199,
"step": 3620
},
{
"epoch": 1.24,
"learning_rate": 1.7144600989246044e-05,
"loss": 0.2029,
"step": 3621
},
{
"epoch": 1.24,
"learning_rate": 1.713135147761133e-05,
"loss": 0.2052,
"step": 3622
},
{
"epoch": 1.24,
"learning_rate": 1.7118104418685766e-05,
"loss": 0.217,
"step": 3623
},
{
"epoch": 1.24,
"learning_rate": 1.710485981659853e-05,
"loss": 0.2257,
"step": 3624
},
{
"epoch": 1.24,
"learning_rate": 1.7091617675478073e-05,
"loss": 0.2256,
"step": 3625
},
{
"epoch": 1.24,
"learning_rate": 1.7078377999452043e-05,
"loss": 0.2128,
"step": 3626
},
{
"epoch": 1.24,
"learning_rate": 1.706514079264734e-05,
"loss": 0.2488,
"step": 3627
},
{
"epoch": 1.24,
"learning_rate": 1.7051906059190088e-05,
"loss": 0.2345,
"step": 3628
},
{
"epoch": 1.24,
"learning_rate": 1.7038673803205636e-05,
"loss": 0.2379,
"step": 3629
},
{
"epoch": 1.24,
"learning_rate": 1.7025444028818566e-05,
"loss": 0.205,
"step": 3630
},
{
"epoch": 1.24,
"learning_rate": 1.7012216740152687e-05,
"loss": 0.2171,
"step": 3631
},
{
"epoch": 1.24,
"learning_rate": 1.6998991941331028e-05,
"loss": 0.2073,
"step": 3632
},
{
"epoch": 1.24,
"learning_rate": 1.698576963647584e-05,
"loss": 0.2515,
"step": 3633
},
{
"epoch": 1.24,
"learning_rate": 1.697254982970861e-05,
"loss": 0.2474,
"step": 3634
},
{
"epoch": 1.24,
"learning_rate": 1.6959332525150046e-05,
"loss": 0.203,
"step": 3635
},
{
"epoch": 1.24,
"learning_rate": 1.6946117726920054e-05,
"loss": 0.2282,
"step": 3636
},
{
"epoch": 1.24,
"learning_rate": 1.6932905439137776e-05,
"loss": 0.253,
"step": 3637
},
{
"epoch": 1.24,
"learning_rate": 1.6919695665921558e-05,
"loss": 0.2347,
"step": 3638
},
{
"epoch": 1.24,
"learning_rate": 1.6906488411389e-05,
"loss": 0.2397,
"step": 3639
},
{
"epoch": 1.24,
"learning_rate": 1.689328367965686e-05,
"loss": 0.1672,
"step": 3640
},
{
"epoch": 1.24,
"learning_rate": 1.688008147484117e-05,
"loss": 0.2351,
"step": 3641
},
{
"epoch": 1.24,
"learning_rate": 1.686688180105712e-05,
"loss": 0.2069,
"step": 3642
},
{
"epoch": 1.24,
"learning_rate": 1.6853684662419142e-05,
"loss": 0.2254,
"step": 3643
},
{
"epoch": 1.24,
"learning_rate": 1.684049006304087e-05,
"loss": 0.2039,
"step": 3644
},
{
"epoch": 1.24,
"learning_rate": 1.6827298007035146e-05,
"loss": 0.2278,
"step": 3645
},
{
"epoch": 1.24,
"learning_rate": 1.6814108498514015e-05,
"loss": 0.2134,
"step": 3646
},
{
"epoch": 1.24,
"learning_rate": 1.6800921541588744e-05,
"loss": 0.1957,
"step": 3647
},
{
"epoch": 1.24,
"learning_rate": 1.6787737140369788e-05,
"loss": 0.2221,
"step": 3648
},
{
"epoch": 1.24,
"learning_rate": 1.6774555298966808e-05,
"loss": 0.2486,
"step": 3649
},
{
"epoch": 1.25,
"learning_rate": 1.676137602148868e-05,
"loss": 0.2328,
"step": 3650
},
{
"epoch": 1.25,
"learning_rate": 1.6748199312043452e-05,
"loss": 0.233,
"step": 3651
},
{
"epoch": 1.25,
"learning_rate": 1.6735025174738404e-05,
"loss": 0.2425,
"step": 3652
},
{
"epoch": 1.25,
"learning_rate": 1.672185361368e-05,
"loss": 0.2202,
"step": 3653
},
{
"epoch": 1.25,
"learning_rate": 1.670868463297391e-05,
"loss": 0.209,
"step": 3654
},
{
"epoch": 1.25,
"learning_rate": 1.6695518236724976e-05,
"loss": 0.212,
"step": 3655
},
{
"epoch": 1.25,
"learning_rate": 1.6682354429037257e-05,
"loss": 0.2138,
"step": 3656
},
{
"epoch": 1.25,
"learning_rate": 1.6669193214013994e-05,
"loss": 0.2086,
"step": 3657
},
{
"epoch": 1.25,
"learning_rate": 1.665603459575763e-05,
"loss": 0.2368,
"step": 3658
},
{
"epoch": 1.25,
"learning_rate": 1.664287857836978e-05,
"loss": 0.3149,
"step": 3659
},
{
"epoch": 1.25,
"learning_rate": 1.6629725165951277e-05,
"loss": 0.2442,
"step": 3660
},
{
"epoch": 1.25,
"learning_rate": 1.6616574362602118e-05,
"loss": 0.2208,
"step": 3661
},
{
"epoch": 1.25,
"learning_rate": 1.6603426172421487e-05,
"loss": 0.2031,
"step": 3662
},
{
"epoch": 1.25,
"learning_rate": 1.6590280599507773e-05,
"loss": 0.2123,
"step": 3663
},
{
"epoch": 1.25,
"learning_rate": 1.657713764795853e-05,
"loss": 0.216,
"step": 3664
},
{
"epoch": 1.25,
"learning_rate": 1.6563997321870502e-05,
"loss": 0.1914,
"step": 3665
},
{
"epoch": 1.25,
"learning_rate": 1.655085962533961e-05,
"loss": 0.2499,
"step": 3666
},
{
"epoch": 1.25,
"learning_rate": 1.6537724562460975e-05,
"loss": 0.1992,
"step": 3667
},
{
"epoch": 1.25,
"learning_rate": 1.652459213732887e-05,
"loss": 0.2262,
"step": 3668
},
{
"epoch": 1.25,
"learning_rate": 1.6511462354036762e-05,
"loss": 0.2332,
"step": 3669
},
{
"epoch": 1.25,
"learning_rate": 1.6498335216677293e-05,
"loss": 0.2334,
"step": 3670
},
{
"epoch": 1.25,
"learning_rate": 1.6485210729342266e-05,
"loss": 0.1947,
"step": 3671
},
{
"epoch": 1.25,
"learning_rate": 1.6472088896122676e-05,
"loss": 0.2077,
"step": 3672
},
{
"epoch": 1.25,
"learning_rate": 1.6458969721108687e-05,
"loss": 0.2316,
"step": 3673
},
{
"epoch": 1.25,
"learning_rate": 1.644585320838963e-05,
"loss": 0.2232,
"step": 3674
},
{
"epoch": 1.25,
"learning_rate": 1.6432739362054007e-05,
"loss": 0.2123,
"step": 3675
},
{
"epoch": 1.25,
"learning_rate": 1.641962818618949e-05,
"loss": 0.2061,
"step": 3676
},
{
"epoch": 1.25,
"learning_rate": 1.640651968488291e-05,
"loss": 0.2112,
"step": 3677
},
{
"epoch": 1.25,
"learning_rate": 1.639341386222029e-05,
"loss": 0.2006,
"step": 3678
},
{
"epoch": 1.26,
"learning_rate": 1.6380310722286784e-05,
"loss": 0.2062,
"step": 3679
},
{
"epoch": 1.26,
"learning_rate": 1.6367210269166727e-05,
"loss": 0.221,
"step": 3680
},
{
"epoch": 1.26,
"learning_rate": 1.6354112506943635e-05,
"loss": 0.1921,
"step": 3681
},
{
"epoch": 1.26,
"learning_rate": 1.6341017439700147e-05,
"loss": 0.2318,
"step": 3682
},
{
"epoch": 1.26,
"learning_rate": 1.632792507151809e-05,
"loss": 0.2253,
"step": 3683
},
{
"epoch": 1.26,
"learning_rate": 1.6314835406478434e-05,
"loss": 0.2255,
"step": 3684
},
{
"epoch": 1.26,
"learning_rate": 1.6301748448661312e-05,
"loss": 0.2274,
"step": 3685
},
{
"epoch": 1.26,
"learning_rate": 1.628866420214603e-05,
"loss": 0.1994,
"step": 3686
},
{
"epoch": 1.26,
"learning_rate": 1.6275582671011018e-05,
"loss": 0.2198,
"step": 3687
},
{
"epoch": 1.26,
"learning_rate": 1.626250385933388e-05,
"loss": 0.2135,
"step": 3688
},
{
"epoch": 1.26,
"learning_rate": 1.624942777119136e-05,
"loss": 0.2193,
"step": 3689
},
{
"epoch": 1.26,
"learning_rate": 1.6236354410659364e-05,
"loss": 0.2331,
"step": 3690
},
{
"epoch": 1.26,
"learning_rate": 1.622328378181295e-05,
"loss": 0.2184,
"step": 3691
},
{
"epoch": 1.26,
"learning_rate": 1.6210215888726315e-05,
"loss": 0.2253,
"step": 3692
},
{
"epoch": 1.26,
"learning_rate": 1.6197150735472797e-05,
"loss": 0.2196,
"step": 3693
},
{
"epoch": 1.26,
"learning_rate": 1.6184088326124907e-05,
"loss": 0.2226,
"step": 3694
},
{
"epoch": 1.26,
"learning_rate": 1.6171028664754274e-05,
"loss": 0.2098,
"step": 3695
},
{
"epoch": 1.26,
"learning_rate": 1.615797175543168e-05,
"loss": 0.2196,
"step": 3696
},
{
"epoch": 1.26,
"learning_rate": 1.614491760222705e-05,
"loss": 0.2164,
"step": 3697
},
{
"epoch": 1.26,
"learning_rate": 1.6131866209209444e-05,
"loss": 0.2343,
"step": 3698
},
{
"epoch": 1.26,
"learning_rate": 1.6118817580447076e-05,
"loss": 0.219,
"step": 3699
},
{
"epoch": 1.26,
"learning_rate": 1.610577172000728e-05,
"loss": 0.2352,
"step": 3700
},
{
"epoch": 1.26,
"learning_rate": 1.6092728631956543e-05,
"loss": 0.1883,
"step": 3701
},
{
"epoch": 1.26,
"learning_rate": 1.6079688320360476e-05,
"loss": 0.2247,
"step": 3702
},
{
"epoch": 1.26,
"learning_rate": 1.6066650789283823e-05,
"loss": 0.2093,
"step": 3703
},
{
"epoch": 1.26,
"learning_rate": 1.6053616042790484e-05,
"loss": 0.2143,
"step": 3704
},
{
"epoch": 1.26,
"learning_rate": 1.6040584084943462e-05,
"loss": 0.2241,
"step": 3705
},
{
"epoch": 1.26,
"learning_rate": 1.6027554919804912e-05,
"loss": 0.226,
"step": 3706
},
{
"epoch": 1.26,
"learning_rate": 1.6014528551436088e-05,
"loss": 0.214,
"step": 3707
},
{
"epoch": 1.27,
"learning_rate": 1.6001504983897425e-05,
"loss": 0.2634,
"step": 3708
},
{
"epoch": 1.27,
"learning_rate": 1.598848422124844e-05,
"loss": 0.222,
"step": 3709
},
{
"epoch": 1.27,
"learning_rate": 1.597546626754779e-05,
"loss": 0.2073,
"step": 3710
},
{
"epoch": 1.27,
"learning_rate": 1.596245112685325e-05,
"loss": 0.1945,
"step": 3711
},
{
"epoch": 1.27,
"learning_rate": 1.5949438803221734e-05,
"loss": 0.203,
"step": 3712
},
{
"epoch": 1.27,
"learning_rate": 1.5936429300709264e-05,
"loss": 0.2126,
"step": 3713
},
{
"epoch": 1.27,
"learning_rate": 1.592342262337099e-05,
"loss": 0.2113,
"step": 3714
},
{
"epoch": 1.27,
"learning_rate": 1.5910418775261178e-05,
"loss": 0.2106,
"step": 3715
},
{
"epoch": 1.27,
"learning_rate": 1.5897417760433205e-05,
"loss": 0.2264,
"step": 3716
},
{
"epoch": 1.27,
"learning_rate": 1.588441958293958e-05,
"loss": 0.2054,
"step": 3717
},
{
"epoch": 1.27,
"learning_rate": 1.5871424246831917e-05,
"loss": 0.2043,
"step": 3718
},
{
"epoch": 1.27,
"learning_rate": 1.585843175616095e-05,
"loss": 0.2333,
"step": 3719
},
{
"epoch": 1.27,
"learning_rate": 1.584544211497652e-05,
"loss": 0.2289,
"step": 3720
},
{
"epoch": 1.27,
"learning_rate": 1.5832455327327573e-05,
"loss": 0.1879,
"step": 3721
},
{
"epoch": 1.27,
"learning_rate": 1.5819471397262193e-05,
"loss": 0.2564,
"step": 3722
},
{
"epoch": 1.27,
"learning_rate": 1.5806490328827556e-05,
"loss": 0.2387,
"step": 3723
},
{
"epoch": 1.27,
"learning_rate": 1.579351212606993e-05,
"loss": 0.3985,
"step": 3724
},
{
"epoch": 1.27,
"learning_rate": 1.578053679303472e-05,
"loss": 0.2095,
"step": 3725
},
{
"epoch": 1.27,
"learning_rate": 1.5767564333766417e-05,
"loss": 0.2244,
"step": 3726
},
{
"epoch": 1.27,
"learning_rate": 1.5754594752308623e-05,
"loss": 0.2252,
"step": 3727
},
{
"epoch": 1.27,
"learning_rate": 1.574162805270403e-05,
"loss": 0.2167,
"step": 3728
},
{
"epoch": 1.27,
"learning_rate": 1.5728664238994455e-05,
"loss": 0.2147,
"step": 3729
},
{
"epoch": 1.27,
"learning_rate": 1.571570331522081e-05,
"loss": 0.185,
"step": 3730
},
{
"epoch": 1.27,
"learning_rate": 1.570274528542308e-05,
"loss": 0.2443,
"step": 3731
},
{
"epoch": 1.27,
"learning_rate": 1.5689790153640382e-05,
"loss": 0.2418,
"step": 3732
},
{
"epoch": 1.27,
"learning_rate": 1.5676837923910914e-05,
"loss": 0.2119,
"step": 3733
},
{
"epoch": 1.27,
"learning_rate": 1.566388860027196e-05,
"loss": 0.2034,
"step": 3734
},
{
"epoch": 1.27,
"learning_rate": 1.5650942186759914e-05,
"loss": 0.2296,
"step": 3735
},
{
"epoch": 1.27,
"learning_rate": 1.5637998687410266e-05,
"loss": 0.2108,
"step": 3736
},
{
"epoch": 1.27,
"learning_rate": 1.562505810625758e-05,
"loss": 0.2266,
"step": 3737
},
{
"epoch": 1.28,
"learning_rate": 1.5612120447335525e-05,
"loss": 0.2106,
"step": 3738
},
{
"epoch": 1.28,
"learning_rate": 1.5599185714676857e-05,
"loss": 0.2061,
"step": 3739
},
{
"epoch": 1.28,
"learning_rate": 1.558625391231341e-05,
"loss": 0.2108,
"step": 3740
},
{
"epoch": 1.28,
"learning_rate": 1.5573325044276107e-05,
"loss": 0.2017,
"step": 3741
},
{
"epoch": 1.28,
"learning_rate": 1.5560399114594964e-05,
"loss": 0.2145,
"step": 3742
},
{
"epoch": 1.28,
"learning_rate": 1.5547476127299088e-05,
"loss": 0.2098,
"step": 3743
},
{
"epoch": 1.28,
"learning_rate": 1.5534556086416648e-05,
"loss": 0.224,
"step": 3744
},
{
"epoch": 1.28,
"learning_rate": 1.5521638995974907e-05,
"loss": 0.2151,
"step": 3745
},
{
"epoch": 1.28,
"learning_rate": 1.5508724860000214e-05,
"loss": 0.1961,
"step": 3746
},
{
"epoch": 1.28,
"learning_rate": 1.5495813682517967e-05,
"loss": 0.1908,
"step": 3747
},
{
"epoch": 1.28,
"learning_rate": 1.548290546755269e-05,
"loss": 0.2068,
"step": 3748
},
{
"epoch": 1.28,
"learning_rate": 1.5470000219127944e-05,
"loss": 0.2079,
"step": 3749
},
{
"epoch": 1.28,
"learning_rate": 1.5457097941266382e-05,
"loss": 0.2183,
"step": 3750
},
{
"epoch": 1.28,
"learning_rate": 1.5444198637989737e-05,
"loss": 0.2012,
"step": 3751
},
{
"epoch": 1.28,
"learning_rate": 1.5431302313318796e-05,
"loss": 0.2142,
"step": 3752
},
{
"epoch": 1.28,
"learning_rate": 1.541840897127343e-05,
"loss": 0.2369,
"step": 3753
},
{
"epoch": 1.28,
"learning_rate": 1.5405518615872577e-05,
"loss": 0.2294,
"step": 3754
},
{
"epoch": 1.28,
"learning_rate": 1.5392631251134244e-05,
"loss": 0.2036,
"step": 3755
},
{
"epoch": 1.28,
"learning_rate": 1.5379746881075513e-05,
"loss": 0.2218,
"step": 3756
},
{
"epoch": 1.28,
"learning_rate": 1.5366865509712526e-05,
"loss": 0.2164,
"step": 3757
},
{
"epoch": 1.28,
"learning_rate": 1.535398714106048e-05,
"loss": 0.1849,
"step": 3758
},
{
"epoch": 1.28,
"learning_rate": 1.5341111779133656e-05,
"loss": 0.2555,
"step": 3759
},
{
"epoch": 1.28,
"learning_rate": 1.5328239427945385e-05,
"loss": 0.2162,
"step": 3760
},
{
"epoch": 1.28,
"learning_rate": 1.5315370091508063e-05,
"loss": 0.2105,
"step": 3761
},
{
"epoch": 1.28,
"learning_rate": 1.530250377383315e-05,
"loss": 0.225,
"step": 3762
},
{
"epoch": 1.28,
"learning_rate": 1.528964047893115e-05,
"loss": 0.2122,
"step": 3763
},
{
"epoch": 1.28,
"learning_rate": 1.527678021081165e-05,
"loss": 0.2164,
"step": 3764
},
{
"epoch": 1.28,
"learning_rate": 1.526392297348328e-05,
"loss": 0.2165,
"step": 3765
},
{
"epoch": 1.28,
"learning_rate": 1.5251068770953713e-05,
"loss": 0.2351,
"step": 3766
},
{
"epoch": 1.29,
"learning_rate": 1.5238217607229693e-05,
"loss": 0.241,
"step": 3767
},
{
"epoch": 1.29,
"learning_rate": 1.5225369486317013e-05,
"loss": 0.2042,
"step": 3768
},
{
"epoch": 1.29,
"learning_rate": 1.521252441222052e-05,
"loss": 0.2103,
"step": 3769
},
{
"epoch": 1.29,
"learning_rate": 1.5199682388944097e-05,
"loss": 0.2111,
"step": 3770
},
{
"epoch": 1.29,
"learning_rate": 1.5186843420490698e-05,
"loss": 0.2105,
"step": 3771
},
{
"epoch": 1.29,
"learning_rate": 1.517400751086231e-05,
"loss": 0.2354,
"step": 3772
},
{
"epoch": 1.29,
"learning_rate": 1.5161174664059957e-05,
"loss": 0.2308,
"step": 3773
},
{
"epoch": 1.29,
"learning_rate": 1.5148344884083743e-05,
"loss": 0.2149,
"step": 3774
},
{
"epoch": 1.29,
"learning_rate": 1.5135518174932778e-05,
"loss": 0.2494,
"step": 3775
},
{
"epoch": 1.29,
"learning_rate": 1.512269454060524e-05,
"loss": 0.2396,
"step": 3776
},
{
"epoch": 1.29,
"learning_rate": 1.5109873985098325e-05,
"loss": 0.2226,
"step": 3777
},
{
"epoch": 1.29,
"learning_rate": 1.5097056512408303e-05,
"loss": 0.1964,
"step": 3778
},
{
"epoch": 1.29,
"learning_rate": 1.5084242126530456e-05,
"loss": 0.223,
"step": 3779
},
{
"epoch": 1.29,
"learning_rate": 1.5071430831459116e-05,
"loss": 0.2309,
"step": 3780
},
{
"epoch": 1.29,
"learning_rate": 1.5058622631187636e-05,
"loss": 0.216,
"step": 3781
},
{
"epoch": 1.29,
"learning_rate": 1.5045817529708426e-05,
"loss": 0.2299,
"step": 3782
},
{
"epoch": 1.29,
"learning_rate": 1.5033015531012918e-05,
"loss": 0.2284,
"step": 3783
},
{
"epoch": 1.29,
"learning_rate": 1.5020216639091574e-05,
"loss": 0.223,
"step": 3784
},
{
"epoch": 1.29,
"learning_rate": 1.5007420857933898e-05,
"loss": 0.2145,
"step": 3785
},
{
"epoch": 1.29,
"learning_rate": 1.4994628191528409e-05,
"loss": 0.1884,
"step": 3786
},
{
"epoch": 1.29,
"learning_rate": 1.4981838643862678e-05,
"loss": 0.2282,
"step": 3787
},
{
"epoch": 1.29,
"learning_rate": 1.4969052218923286e-05,
"loss": 0.2122,
"step": 3788
},
{
"epoch": 1.29,
"learning_rate": 1.495626892069584e-05,
"loss": 0.1979,
"step": 3789
},
{
"epoch": 1.29,
"learning_rate": 1.4943488753164972e-05,
"loss": 0.2284,
"step": 3790
},
{
"epoch": 1.29,
"learning_rate": 1.4930711720314363e-05,
"loss": 0.2126,
"step": 3791
},
{
"epoch": 1.29,
"learning_rate": 1.4917937826126688e-05,
"loss": 0.2133,
"step": 3792
},
{
"epoch": 1.29,
"learning_rate": 1.4905167074583649e-05,
"loss": 0.2012,
"step": 3793
},
{
"epoch": 1.29,
"learning_rate": 1.4892399469665973e-05,
"loss": 0.2165,
"step": 3794
},
{
"epoch": 1.29,
"learning_rate": 1.4879635015353415e-05,
"loss": 0.2556,
"step": 3795
},
{
"epoch": 1.3,
"learning_rate": 1.4866873715624733e-05,
"loss": 0.2292,
"step": 3796
},
{
"epoch": 1.3,
"learning_rate": 1.4854115574457706e-05,
"loss": 0.2266,
"step": 3797
},
{
"epoch": 1.3,
"learning_rate": 1.4841360595829129e-05,
"loss": 0.2251,
"step": 3798
},
{
"epoch": 1.3,
"learning_rate": 1.4828608783714814e-05,
"loss": 0.2104,
"step": 3799
},
{
"epoch": 1.3,
"learning_rate": 1.4815860142089588e-05,
"loss": 0.2172,
"step": 3800
},
{
"epoch": 1.3,
"learning_rate": 1.480311467492728e-05,
"loss": 0.2402,
"step": 3801
},
{
"epoch": 1.3,
"learning_rate": 1.479037238620074e-05,
"loss": 0.2224,
"step": 3802
},
{
"epoch": 1.3,
"learning_rate": 1.477763327988182e-05,
"loss": 0.2259,
"step": 3803
},
{
"epoch": 1.3,
"learning_rate": 1.476489735994137e-05,
"loss": 0.2156,
"step": 3804
},
{
"epoch": 1.3,
"learning_rate": 1.4752164630349286e-05,
"loss": 0.2097,
"step": 3805
},
{
"epoch": 1.3,
"learning_rate": 1.4739435095074428e-05,
"loss": 0.2238,
"step": 3806
},
{
"epoch": 1.3,
"learning_rate": 1.4726708758084673e-05,
"loss": 0.2217,
"step": 3807
},
{
"epoch": 1.3,
"learning_rate": 1.471398562334691e-05,
"loss": 0.2431,
"step": 3808
},
{
"epoch": 1.3,
"learning_rate": 1.470126569482702e-05,
"loss": 0.2007,
"step": 3809
},
{
"epoch": 1.3,
"learning_rate": 1.4688548976489892e-05,
"loss": 0.2346,
"step": 3810
},
{
"epoch": 1.3,
"learning_rate": 1.4675835472299403e-05,
"loss": 0.2069,
"step": 3811
},
{
"epoch": 1.3,
"learning_rate": 1.4663125186218435e-05,
"loss": 0.2361,
"step": 3812
},
{
"epoch": 1.3,
"learning_rate": 1.4650418122208876e-05,
"loss": 0.2047,
"step": 3813
},
{
"epoch": 1.3,
"learning_rate": 1.4637714284231591e-05,
"loss": 0.2137,
"step": 3814
},
{
"epoch": 1.3,
"learning_rate": 1.4625013676246457e-05,
"loss": 0.1989,
"step": 3815
},
{
"epoch": 1.3,
"learning_rate": 1.4612316302212336e-05,
"loss": 0.1949,
"step": 3816
},
{
"epoch": 1.3,
"learning_rate": 1.459962216608708e-05,
"loss": 0.2077,
"step": 3817
},
{
"epoch": 1.3,
"learning_rate": 1.4586931271827514e-05,
"loss": 0.2144,
"step": 3818
},
{
"epoch": 1.3,
"learning_rate": 1.4574243623389516e-05,
"loss": 0.2217,
"step": 3819
},
{
"epoch": 1.3,
"learning_rate": 1.4561559224727877e-05,
"loss": 0.2585,
"step": 3820
},
{
"epoch": 1.3,
"learning_rate": 1.4548878079796425e-05,
"loss": 0.2251,
"step": 3821
},
{
"epoch": 1.3,
"learning_rate": 1.4536200192547933e-05,
"loss": 0.2038,
"step": 3822
},
{
"epoch": 1.3,
"learning_rate": 1.4523525566934196e-05,
"loss": 0.1931,
"step": 3823
},
{
"epoch": 1.3,
"learning_rate": 1.4510854206905986e-05,
"loss": 0.1995,
"step": 3824
},
{
"epoch": 1.31,
"learning_rate": 1.449818611641303e-05,
"loss": 0.2072,
"step": 3825
},
{
"epoch": 1.31,
"learning_rate": 1.448552129940407e-05,
"loss": 0.2208,
"step": 3826
},
{
"epoch": 1.31,
"learning_rate": 1.4472859759826798e-05,
"loss": 0.2177,
"step": 3827
},
{
"epoch": 1.31,
"learning_rate": 1.4460201501627908e-05,
"loss": 0.2155,
"step": 3828
},
{
"epoch": 1.31,
"learning_rate": 1.4447546528753068e-05,
"loss": 0.2257,
"step": 3829
},
{
"epoch": 1.31,
"learning_rate": 1.4434894845146896e-05,
"loss": 0.2248,
"step": 3830
},
{
"epoch": 1.31,
"learning_rate": 1.4422246454753027e-05,
"loss": 0.2428,
"step": 3831
},
{
"epoch": 1.31,
"learning_rate": 1.4409601361514017e-05,
"loss": 0.1855,
"step": 3832
},
{
"epoch": 1.31,
"learning_rate": 1.439695956937146e-05,
"loss": 0.227,
"step": 3833
},
{
"epoch": 1.31,
"learning_rate": 1.4384321082265862e-05,
"loss": 0.2117,
"step": 3834
},
{
"epoch": 1.31,
"learning_rate": 1.4371685904136736e-05,
"loss": 0.3071,
"step": 3835
},
{
"epoch": 1.31,
"learning_rate": 1.4359054038922531e-05,
"loss": 0.1867,
"step": 3836
},
{
"epoch": 1.31,
"learning_rate": 1.4346425490560697e-05,
"loss": 0.2184,
"step": 3837
},
{
"epoch": 1.31,
"learning_rate": 1.4333800262987638e-05,
"loss": 0.2195,
"step": 3838
},
{
"epoch": 1.31,
"learning_rate": 1.4321178360138705e-05,
"loss": 0.1978,
"step": 3839
},
{
"epoch": 1.31,
"learning_rate": 1.430855978594824e-05,
"loss": 0.2029,
"step": 3840
},
{
"epoch": 1.31,
"learning_rate": 1.4295944544349523e-05,
"loss": 0.2427,
"step": 3841
},
{
"epoch": 1.31,
"learning_rate": 1.4283332639274815e-05,
"loss": 0.219,
"step": 3842
},
{
"epoch": 1.31,
"learning_rate": 1.4270724074655339e-05,
"loss": 0.1888,
"step": 3843
},
{
"epoch": 1.31,
"learning_rate": 1.4258118854421244e-05,
"loss": 0.2183,
"step": 3844
},
{
"epoch": 1.31,
"learning_rate": 1.4245516982501683e-05,
"loss": 0.2555,
"step": 3845
},
{
"epoch": 1.31,
"learning_rate": 1.4232918462824713e-05,
"loss": 0.2232,
"step": 3846
},
{
"epoch": 1.31,
"learning_rate": 1.4220323299317411e-05,
"loss": 0.218,
"step": 3847
},
{
"epoch": 1.31,
"learning_rate": 1.4207731495905747e-05,
"loss": 0.2079,
"step": 3848
},
{
"epoch": 1.31,
"learning_rate": 1.419514305651467e-05,
"loss": 0.2421,
"step": 3849
},
{
"epoch": 1.31,
"learning_rate": 1.4182557985068095e-05,
"loss": 0.2112,
"step": 3850
},
{
"epoch": 1.31,
"learning_rate": 1.416997628548885e-05,
"loss": 0.2263,
"step": 3851
},
{
"epoch": 1.31,
"learning_rate": 1.4157397961698755e-05,
"loss": 0.2329,
"step": 3852
},
{
"epoch": 1.31,
"learning_rate": 1.4144823017618528e-05,
"loss": 0.2397,
"step": 3853
},
{
"epoch": 1.31,
"learning_rate": 1.4132251457167877e-05,
"loss": 0.2096,
"step": 3854
},
{
"epoch": 1.32,
"learning_rate": 1.4119683284265456e-05,
"loss": 0.2445,
"step": 3855
},
{
"epoch": 1.32,
"learning_rate": 1.4107118502828814e-05,
"loss": 0.2625,
"step": 3856
},
{
"epoch": 1.32,
"learning_rate": 1.4094557116774501e-05,
"loss": 0.1911,
"step": 3857
},
{
"epoch": 1.32,
"learning_rate": 1.4081999130017964e-05,
"loss": 0.1976,
"step": 3858
},
{
"epoch": 1.32,
"learning_rate": 1.4069444546473621e-05,
"loss": 0.2247,
"step": 3859
},
{
"epoch": 1.32,
"learning_rate": 1.4056893370054814e-05,
"loss": 0.2148,
"step": 3860
},
{
"epoch": 1.32,
"learning_rate": 1.4044345604673836e-05,
"loss": 0.2108,
"step": 3861
},
{
"epoch": 1.32,
"learning_rate": 1.4031801254241889e-05,
"loss": 0.2206,
"step": 3862
},
{
"epoch": 1.32,
"learning_rate": 1.4019260322669143e-05,
"loss": 0.2214,
"step": 3863
},
{
"epoch": 1.32,
"learning_rate": 1.4006722813864692e-05,
"loss": 0.222,
"step": 3864
},
{
"epoch": 1.32,
"learning_rate": 1.399418873173654e-05,
"loss": 0.2124,
"step": 3865
},
{
"epoch": 1.32,
"learning_rate": 1.398165808019167e-05,
"loss": 0.257,
"step": 3866
},
{
"epoch": 1.32,
"learning_rate": 1.3969130863135938e-05,
"loss": 0.1928,
"step": 3867
},
{
"epoch": 1.32,
"learning_rate": 1.3956607084474178e-05,
"loss": 0.2491,
"step": 3868
},
{
"epoch": 1.32,
"learning_rate": 1.3944086748110135e-05,
"loss": 0.1977,
"step": 3869
},
{
"epoch": 1.32,
"learning_rate": 1.3931569857946466e-05,
"loss": 0.4025,
"step": 3870
},
{
"epoch": 1.32,
"learning_rate": 1.3919056417884785e-05,
"loss": 0.2342,
"step": 3871
},
{
"epoch": 1.32,
"learning_rate": 1.3906546431825596e-05,
"loss": 0.2318,
"step": 3872
},
{
"epoch": 1.32,
"learning_rate": 1.3894039903668346e-05,
"loss": 0.2514,
"step": 3873
},
{
"epoch": 1.32,
"learning_rate": 1.3881536837311409e-05,
"loss": 0.1932,
"step": 3874
},
{
"epoch": 1.32,
"learning_rate": 1.3869037236652071e-05,
"loss": 0.1881,
"step": 3875
},
{
"epoch": 1.32,
"learning_rate": 1.3856541105586545e-05,
"loss": 0.2364,
"step": 3876
},
{
"epoch": 1.32,
"learning_rate": 1.3844048448009942e-05,
"loss": 0.2505,
"step": 3877
},
{
"epoch": 1.32,
"learning_rate": 1.3831559267816322e-05,
"loss": 0.2396,
"step": 3878
},
{
"epoch": 1.32,
"learning_rate": 1.3819073568898621e-05,
"loss": 0.1922,
"step": 3879
},
{
"epoch": 1.32,
"learning_rate": 1.3806591355148732e-05,
"loss": 0.1825,
"step": 3880
},
{
"epoch": 1.32,
"learning_rate": 1.3794112630457444e-05,
"loss": 0.2364,
"step": 3881
},
{
"epoch": 1.32,
"learning_rate": 1.3781637398714442e-05,
"loss": 0.2548,
"step": 3882
},
{
"epoch": 1.32,
"learning_rate": 1.3769165663808353e-05,
"loss": 0.1998,
"step": 3883
},
{
"epoch": 1.33,
"learning_rate": 1.3756697429626686e-05,
"loss": 0.2069,
"step": 3884
},
{
"epoch": 1.33,
"learning_rate": 1.374423270005587e-05,
"loss": 0.2152,
"step": 3885
},
{
"epoch": 1.33,
"learning_rate": 1.3731771478981259e-05,
"loss": 0.2211,
"step": 3886
},
{
"epoch": 1.33,
"learning_rate": 1.371931377028708e-05,
"loss": 0.2306,
"step": 3887
},
{
"epoch": 1.33,
"learning_rate": 1.3706859577856485e-05,
"loss": 0.2126,
"step": 3888
},
{
"epoch": 1.33,
"learning_rate": 1.3694408905571532e-05,
"loss": 0.2152,
"step": 3889
},
{
"epoch": 1.33,
"learning_rate": 1.3681961757313183e-05,
"loss": 0.1926,
"step": 3890
},
{
"epoch": 1.33,
"learning_rate": 1.366951813696128e-05,
"loss": 0.203,
"step": 3891
},
{
"epoch": 1.33,
"learning_rate": 1.36570780483946e-05,
"loss": 0.1777,
"step": 3892
},
{
"epoch": 1.33,
"learning_rate": 1.364464149549078e-05,
"loss": 0.2178,
"step": 3893
},
{
"epoch": 1.33,
"learning_rate": 1.3632208482126376e-05,
"loss": 0.2227,
"step": 3894
},
{
"epoch": 1.33,
"learning_rate": 1.3619779012176861e-05,
"loss": 0.2068,
"step": 3895
},
{
"epoch": 1.33,
"learning_rate": 1.3607353089516559e-05,
"loss": 0.2204,
"step": 3896
},
{
"epoch": 1.33,
"learning_rate": 1.3594930718018728e-05,
"loss": 0.2344,
"step": 3897
},
{
"epoch": 1.33,
"learning_rate": 1.3582511901555487e-05,
"loss": 0.2009,
"step": 3898
},
{
"epoch": 1.33,
"learning_rate": 1.3570096643997867e-05,
"loss": 0.2117,
"step": 3899
},
{
"epoch": 1.33,
"learning_rate": 1.3557684949215799e-05,
"loss": 0.244,
"step": 3900
},
{
"epoch": 1.33,
"learning_rate": 1.3545276821078057e-05,
"loss": 0.1967,
"step": 3901
},
{
"epoch": 1.33,
"learning_rate": 1.3532872263452379e-05,
"loss": 0.2136,
"step": 3902
},
{
"epoch": 1.33,
"learning_rate": 1.352047128020531e-05,
"loss": 0.202,
"step": 3903
},
{
"epoch": 1.33,
"learning_rate": 1.3508073875202342e-05,
"loss": 0.1823,
"step": 3904
},
{
"epoch": 1.33,
"learning_rate": 1.3495680052307807e-05,
"loss": 0.2288,
"step": 3905
},
{
"epoch": 1.33,
"learning_rate": 1.3483289815384954e-05,
"loss": 0.2367,
"step": 3906
},
{
"epoch": 1.33,
"learning_rate": 1.3470903168295904e-05,
"loss": 0.2232,
"step": 3907
},
{
"epoch": 1.33,
"learning_rate": 1.3458520114901639e-05,
"loss": 0.2236,
"step": 3908
},
{
"epoch": 1.33,
"learning_rate": 1.3446140659062056e-05,
"loss": 0.2543,
"step": 3909
},
{
"epoch": 1.33,
"learning_rate": 1.3433764804635895e-05,
"loss": 0.2175,
"step": 3910
},
{
"epoch": 1.33,
"learning_rate": 1.34213925554808e-05,
"loss": 0.2017,
"step": 3911
},
{
"epoch": 1.33,
"learning_rate": 1.340902391545329e-05,
"loss": 0.198,
"step": 3912
},
{
"epoch": 1.34,
"learning_rate": 1.3396658888408733e-05,
"loss": 0.2378,
"step": 3913
},
{
"epoch": 1.34,
"learning_rate": 1.3384297478201407e-05,
"loss": 0.2259,
"step": 3914
},
{
"epoch": 1.34,
"learning_rate": 1.3371939688684419e-05,
"loss": 0.2122,
"step": 3915
},
{
"epoch": 1.34,
"learning_rate": 1.3359585523709806e-05,
"loss": 0.2133,
"step": 3916
},
{
"epoch": 1.34,
"learning_rate": 1.3347234987128415e-05,
"loss": 0.2093,
"step": 3917
},
{
"epoch": 1.34,
"learning_rate": 1.3334888082790007e-05,
"loss": 0.2569,
"step": 3918
},
{
"epoch": 1.34,
"learning_rate": 1.3322544814543176e-05,
"loss": 0.2493,
"step": 3919
},
{
"epoch": 1.34,
"learning_rate": 1.3310205186235408e-05,
"loss": 0.2346,
"step": 3920
},
{
"epoch": 1.34,
"learning_rate": 1.3297869201713053e-05,
"loss": 0.1942,
"step": 3921
},
{
"epoch": 1.34,
"learning_rate": 1.3285536864821305e-05,
"loss": 0.2053,
"step": 3922
},
{
"epoch": 1.34,
"learning_rate": 1.3273208179404245e-05,
"loss": 0.2152,
"step": 3923
},
{
"epoch": 1.34,
"learning_rate": 1.3260883149304792e-05,
"loss": 0.1924,
"step": 3924
},
{
"epoch": 1.34,
"learning_rate": 1.3248561778364746e-05,
"loss": 0.1947,
"step": 3925
},
{
"epoch": 1.34,
"learning_rate": 1.323624407042477e-05,
"loss": 0.227,
"step": 3926
},
{
"epoch": 1.34,
"learning_rate": 1.322393002932435e-05,
"loss": 0.2185,
"step": 3927
},
{
"epoch": 1.34,
"learning_rate": 1.3211619658901876e-05,
"loss": 0.239,
"step": 3928
},
{
"epoch": 1.34,
"learning_rate": 1.3199312962994536e-05,
"loss": 0.199,
"step": 3929
},
{
"epoch": 1.34,
"learning_rate": 1.3187009945438459e-05,
"loss": 0.2166,
"step": 3930
},
{
"epoch": 1.34,
"learning_rate": 1.3174710610068536e-05,
"loss": 0.2198,
"step": 3931
},
{
"epoch": 1.34,
"learning_rate": 1.3162414960718561e-05,
"loss": 0.2173,
"step": 3932
},
{
"epoch": 1.34,
"learning_rate": 1.3150123001221185e-05,
"loss": 0.1957,
"step": 3933
},
{
"epoch": 1.34,
"learning_rate": 1.3137834735407863e-05,
"loss": 0.2131,
"step": 3934
},
{
"epoch": 1.34,
"learning_rate": 1.3125550167108955e-05,
"loss": 0.2261,
"step": 3935
},
{
"epoch": 1.34,
"learning_rate": 1.3113269300153618e-05,
"loss": 0.2308,
"step": 3936
},
{
"epoch": 1.34,
"learning_rate": 1.3100992138369888e-05,
"loss": 0.1867,
"step": 3937
},
{
"epoch": 1.34,
"learning_rate": 1.3088718685584644e-05,
"loss": 0.2146,
"step": 3938
},
{
"epoch": 1.34,
"learning_rate": 1.307644894562359e-05,
"loss": 0.1942,
"step": 3939
},
{
"epoch": 1.34,
"learning_rate": 1.3064182922311296e-05,
"loss": 0.2208,
"step": 3940
},
{
"epoch": 1.34,
"learning_rate": 1.3051920619471145e-05,
"loss": 0.185,
"step": 3941
},
{
"epoch": 1.34,
"learning_rate": 1.3039662040925396e-05,
"loss": 0.2309,
"step": 3942
},
{
"epoch": 1.35,
"learning_rate": 1.30274071904951e-05,
"loss": 0.2138,
"step": 3943
},
{
"epoch": 1.35,
"learning_rate": 1.301515607200021e-05,
"loss": 0.2107,
"step": 3944
},
{
"epoch": 1.35,
"learning_rate": 1.3002908689259451e-05,
"loss": 0.2046,
"step": 3945
},
{
"epoch": 1.35,
"learning_rate": 1.2990665046090425e-05,
"loss": 0.2017,
"step": 3946
},
{
"epoch": 1.35,
"learning_rate": 1.2978425146309564e-05,
"loss": 0.1899,
"step": 3947
},
{
"epoch": 1.35,
"learning_rate": 1.2966188993732103e-05,
"loss": 0.2103,
"step": 3948
},
{
"epoch": 1.35,
"learning_rate": 1.2953956592172151e-05,
"loss": 0.2291,
"step": 3949
},
{
"epoch": 1.35,
"learning_rate": 1.2941727945442611e-05,
"loss": 0.2221,
"step": 3950
},
{
"epoch": 1.35,
"learning_rate": 1.2929503057355235e-05,
"loss": 0.202,
"step": 3951
},
{
"epoch": 1.35,
"learning_rate": 1.291728193172062e-05,
"loss": 0.2023,
"step": 3952
},
{
"epoch": 1.35,
"learning_rate": 1.2905064572348146e-05,
"loss": 0.236,
"step": 3953
},
{
"epoch": 1.35,
"learning_rate": 1.2892850983046062e-05,
"loss": 0.2156,
"step": 3954
},
{
"epoch": 1.35,
"learning_rate": 1.2880641167621405e-05,
"loss": 0.2231,
"step": 3955
},
{
"epoch": 1.35,
"learning_rate": 1.2868435129880064e-05,
"loss": 0.2223,
"step": 3956
},
{
"epoch": 1.35,
"learning_rate": 1.2856232873626739e-05,
"loss": 0.1925,
"step": 3957
},
{
"epoch": 1.35,
"learning_rate": 1.2844034402664956e-05,
"loss": 0.2089,
"step": 3958
},
{
"epoch": 1.35,
"learning_rate": 1.2831839720797067e-05,
"loss": 0.2051,
"step": 3959
},
{
"epoch": 1.35,
"learning_rate": 1.2819648831824216e-05,
"loss": 0.1919,
"step": 3960
},
{
"epoch": 1.35,
"learning_rate": 1.28074617395464e-05,
"loss": 0.2268,
"step": 3961
},
{
"epoch": 1.35,
"learning_rate": 1.2795278447762393e-05,
"loss": 0.2405,
"step": 3962
},
{
"epoch": 1.35,
"learning_rate": 1.2783098960269823e-05,
"loss": 0.219,
"step": 3963
},
{
"epoch": 1.35,
"learning_rate": 1.2770923280865122e-05,
"loss": 0.1935,
"step": 3964
},
{
"epoch": 1.35,
"learning_rate": 1.2758751413343511e-05,
"loss": 0.2242,
"step": 3965
},
{
"epoch": 1.35,
"learning_rate": 1.2746583361499053e-05,
"loss": 0.2179,
"step": 3966
},
{
"epoch": 1.35,
"learning_rate": 1.2734419129124603e-05,
"loss": 0.209,
"step": 3967
},
{
"epoch": 1.35,
"learning_rate": 1.2722258720011838e-05,
"loss": 0.2171,
"step": 3968
},
{
"epoch": 1.35,
"learning_rate": 1.2710102137951224e-05,
"loss": 0.2253,
"step": 3969
},
{
"epoch": 1.35,
"learning_rate": 1.2697949386732055e-05,
"loss": 0.2376,
"step": 3970
},
{
"epoch": 1.35,
"learning_rate": 1.2685800470142429e-05,
"loss": 0.2257,
"step": 3971
},
{
"epoch": 1.36,
"learning_rate": 1.267365539196923e-05,
"loss": 0.2275,
"step": 3972
},
{
"epoch": 1.36,
"learning_rate": 1.266151415599818e-05,
"loss": 0.2031,
"step": 3973
},
{
"epoch": 1.36,
"learning_rate": 1.2649376766013757e-05,
"loss": 0.2029,
"step": 3974
},
{
"epoch": 1.36,
"learning_rate": 1.2637243225799283e-05,
"loss": 0.2054,
"step": 3975
},
{
"epoch": 1.36,
"learning_rate": 1.2625113539136846e-05,
"loss": 0.2137,
"step": 3976
},
{
"epoch": 1.36,
"learning_rate": 1.2612987709807356e-05,
"loss": 0.2146,
"step": 3977
},
{
"epoch": 1.36,
"learning_rate": 1.2600865741590523e-05,
"loss": 0.1926,
"step": 3978
},
{
"epoch": 1.36,
"learning_rate": 1.2588747638264822e-05,
"loss": 0.2167,
"step": 3979
},
{
"epoch": 1.36,
"learning_rate": 1.2576633403607568e-05,
"loss": 0.2102,
"step": 3980
},
{
"epoch": 1.36,
"learning_rate": 1.2564523041394826e-05,
"loss": 0.2067,
"step": 3981
},
{
"epoch": 1.36,
"learning_rate": 1.255241655540149e-05,
"loss": 0.1933,
"step": 3982
},
{
"epoch": 1.36,
"learning_rate": 1.254031394940123e-05,
"loss": 0.2274,
"step": 3983
},
{
"epoch": 1.36,
"learning_rate": 1.2528215227166495e-05,
"loss": 0.2089,
"step": 3984
},
{
"epoch": 1.36,
"learning_rate": 1.2516120392468544e-05,
"loss": 0.229,
"step": 3985
},
{
"epoch": 1.36,
"learning_rate": 1.2504029449077421e-05,
"loss": 0.2255,
"step": 3986
},
{
"epoch": 1.36,
"learning_rate": 1.249194240076195e-05,
"loss": 0.2107,
"step": 3987
},
{
"epoch": 1.36,
"learning_rate": 1.2479859251289732e-05,
"loss": 0.2263,
"step": 3988
},
{
"epoch": 1.36,
"learning_rate": 1.2467780004427173e-05,
"loss": 0.1895,
"step": 3989
},
{
"epoch": 1.36,
"learning_rate": 1.245570466393946e-05,
"loss": 0.2222,
"step": 3990
},
{
"epoch": 1.36,
"learning_rate": 1.244363323359054e-05,
"loss": 0.2083,
"step": 3991
},
{
"epoch": 1.36,
"learning_rate": 1.2431565717143171e-05,
"loss": 0.2014,
"step": 3992
},
{
"epoch": 1.36,
"learning_rate": 1.241950211835886e-05,
"loss": 0.2029,
"step": 3993
},
{
"epoch": 1.36,
"learning_rate": 1.2407442440997932e-05,
"loss": 0.2306,
"step": 3994
},
{
"epoch": 1.36,
"learning_rate": 1.2395386688819444e-05,
"loss": 0.2065,
"step": 3995
},
{
"epoch": 1.36,
"learning_rate": 1.2383334865581261e-05,
"loss": 0.1666,
"step": 3996
},
{
"epoch": 1.36,
"learning_rate": 1.2371286975040029e-05,
"loss": 0.2153,
"step": 3997
},
{
"epoch": 1.36,
"learning_rate": 1.2359243020951119e-05,
"loss": 0.2259,
"step": 3998
},
{
"epoch": 1.36,
"learning_rate": 1.2347203007068758e-05,
"loss": 0.2119,
"step": 3999
},
{
"epoch": 1.36,
"learning_rate": 1.2335166937145859e-05,
"loss": 0.2116,
"step": 4000
},
{
"epoch": 1.37,
"learning_rate": 1.232313481493417e-05,
"loss": 0.2369,
"step": 4001
},
{
"epoch": 1.37,
"learning_rate": 1.2311106644184157e-05,
"loss": 0.206,
"step": 4002
},
{
"epoch": 1.37,
"learning_rate": 1.2299082428645092e-05,
"loss": 0.1658,
"step": 4003
},
{
"epoch": 1.37,
"learning_rate": 1.2287062172065012e-05,
"loss": 0.2183,
"step": 4004
},
{
"epoch": 1.37,
"learning_rate": 1.2275045878190688e-05,
"loss": 0.2121,
"step": 4005
},
{
"epoch": 1.37,
"learning_rate": 1.2263033550767697e-05,
"loss": 0.1875,
"step": 4006
},
{
"epoch": 1.37,
"learning_rate": 1.2251025193540344e-05,
"loss": 0.2037,
"step": 4007
},
{
"epoch": 1.37,
"learning_rate": 1.2239020810251716e-05,
"loss": 0.2158,
"step": 4008
},
{
"epoch": 1.37,
"learning_rate": 1.2227020404643671e-05,
"loss": 0.2166,
"step": 4009
},
{
"epoch": 1.37,
"learning_rate": 1.2215023980456797e-05,
"loss": 0.2252,
"step": 4010
},
{
"epoch": 1.37,
"learning_rate": 1.2203031541430471e-05,
"loss": 0.2264,
"step": 4011
},
{
"epoch": 1.37,
"learning_rate": 1.219104309130279e-05,
"loss": 0.2007,
"step": 4012
},
{
"epoch": 1.37,
"learning_rate": 1.217905863381067e-05,
"loss": 0.199,
"step": 4013
},
{
"epoch": 1.37,
"learning_rate": 1.2167078172689717e-05,
"loss": 0.2382,
"step": 4014
},
{
"epoch": 1.37,
"learning_rate": 1.2155101711674324e-05,
"loss": 0.2134,
"step": 4015
},
{
"epoch": 1.37,
"learning_rate": 1.2143129254497646e-05,
"loss": 0.2164,
"step": 4016
},
{
"epoch": 1.37,
"learning_rate": 1.2131160804891556e-05,
"loss": 0.2177,
"step": 4017
},
{
"epoch": 1.37,
"learning_rate": 1.2119196366586717e-05,
"loss": 0.2095,
"step": 4018
},
{
"epoch": 1.37,
"learning_rate": 1.21072359433125e-05,
"loss": 0.2099,
"step": 4019
},
{
"epoch": 1.37,
"learning_rate": 1.2095279538797072e-05,
"loss": 0.2256,
"step": 4020
},
{
"epoch": 1.37,
"learning_rate": 1.2083327156767302e-05,
"loss": 0.2339,
"step": 4021
},
{
"epoch": 1.37,
"learning_rate": 1.2071378800948831e-05,
"loss": 0.2084,
"step": 4022
},
{
"epoch": 1.37,
"learning_rate": 1.2059434475066053e-05,
"loss": 0.1948,
"step": 4023
},
{
"epoch": 1.37,
"learning_rate": 1.2047494182842073e-05,
"loss": 0.2364,
"step": 4024
},
{
"epoch": 1.37,
"learning_rate": 1.2035557927998778e-05,
"loss": 0.2275,
"step": 4025
},
{
"epoch": 1.37,
"learning_rate": 1.2023625714256743e-05,
"loss": 0.2117,
"step": 4026
},
{
"epoch": 1.37,
"learning_rate": 1.201169754533536e-05,
"loss": 0.2136,
"step": 4027
},
{
"epoch": 1.37,
"learning_rate": 1.1999773424952687e-05,
"loss": 0.213,
"step": 4028
},
{
"epoch": 1.37,
"learning_rate": 1.198785335682556e-05,
"loss": 0.2231,
"step": 4029
},
{
"epoch": 1.37,
"learning_rate": 1.1975937344669549e-05,
"loss": 0.2164,
"step": 4030
},
{
"epoch": 1.38,
"learning_rate": 1.1964025392198937e-05,
"loss": 0.2292,
"step": 4031
},
{
"epoch": 1.38,
"learning_rate": 1.1952117503126778e-05,
"loss": 0.2216,
"step": 4032
},
{
"epoch": 1.38,
"learning_rate": 1.1940213681164816e-05,
"loss": 0.235,
"step": 4033
},
{
"epoch": 1.38,
"learning_rate": 1.1928313930023563e-05,
"loss": 0.2031,
"step": 4034
},
{
"epoch": 1.38,
"learning_rate": 1.1916418253412253e-05,
"loss": 0.2157,
"step": 4035
},
{
"epoch": 1.38,
"learning_rate": 1.1904526655038834e-05,
"loss": 0.2002,
"step": 4036
},
{
"epoch": 1.38,
"learning_rate": 1.189263913861001e-05,
"loss": 0.2044,
"step": 4037
},
{
"epoch": 1.38,
"learning_rate": 1.1880755707831178e-05,
"loss": 0.196,
"step": 4038
},
{
"epoch": 1.38,
"learning_rate": 1.1868876366406492e-05,
"loss": 0.2202,
"step": 4039
},
{
"epoch": 1.38,
"learning_rate": 1.1857001118038821e-05,
"loss": 0.2218,
"step": 4040
},
{
"epoch": 1.38,
"learning_rate": 1.1845129966429755e-05,
"loss": 0.2332,
"step": 4041
},
{
"epoch": 1.38,
"learning_rate": 1.1833262915279622e-05,
"loss": 0.2107,
"step": 4042
},
{
"epoch": 1.38,
"learning_rate": 1.1821399968287435e-05,
"loss": 0.1997,
"step": 4043
},
{
"epoch": 1.38,
"learning_rate": 1.1809541129150981e-05,
"loss": 0.213,
"step": 4044
},
{
"epoch": 1.38,
"learning_rate": 1.179768640156671e-05,
"loss": 0.2105,
"step": 4045
},
{
"epoch": 1.38,
"learning_rate": 1.1785835789229837e-05,
"loss": 0.1787,
"step": 4046
},
{
"epoch": 1.38,
"learning_rate": 1.1773989295834262e-05,
"loss": 0.2239,
"step": 4047
},
{
"epoch": 1.38,
"learning_rate": 1.176214692507262e-05,
"loss": 0.2153,
"step": 4048
},
{
"epoch": 1.38,
"learning_rate": 1.1750308680636266e-05,
"loss": 0.2205,
"step": 4049
},
{
"epoch": 1.38,
"learning_rate": 1.1738474566215238e-05,
"loss": 0.2014,
"step": 4050
},
{
"epoch": 1.38,
"learning_rate": 1.1726644585498326e-05,
"loss": 0.2094,
"step": 4051
},
{
"epoch": 1.38,
"learning_rate": 1.1714818742172995e-05,
"loss": 0.2178,
"step": 4052
},
{
"epoch": 1.38,
"learning_rate": 1.1702997039925445e-05,
"loss": 0.2223,
"step": 4053
},
{
"epoch": 1.38,
"learning_rate": 1.1691179482440578e-05,
"loss": 0.22,
"step": 4054
},
{
"epoch": 1.38,
"learning_rate": 1.1679366073402004e-05,
"loss": 0.2011,
"step": 4055
},
{
"epoch": 1.38,
"learning_rate": 1.166755681649205e-05,
"loss": 0.2338,
"step": 4056
},
{
"epoch": 1.38,
"learning_rate": 1.1655751715391722e-05,
"loss": 0.2126,
"step": 4057
},
{
"epoch": 1.38,
"learning_rate": 1.1643950773780763e-05,
"loss": 0.2109,
"step": 4058
},
{
"epoch": 1.38,
"learning_rate": 1.1632153995337583e-05,
"loss": 0.1969,
"step": 4059
},
{
"epoch": 1.39,
"learning_rate": 1.1620361383739328e-05,
"loss": 0.2028,
"step": 4060
},
{
"epoch": 1.39,
"learning_rate": 1.1608572942661838e-05,
"loss": 0.2499,
"step": 4061
},
{
"epoch": 1.39,
"learning_rate": 1.1596788675779632e-05,
"loss": 0.2421,
"step": 4062
},
{
"epoch": 1.39,
"learning_rate": 1.1585008586765959e-05,
"loss": 0.2045,
"step": 4063
},
{
"epoch": 1.39,
"learning_rate": 1.1573232679292733e-05,
"loss": 0.2115,
"step": 4064
},
{
"epoch": 1.39,
"learning_rate": 1.1561460957030587e-05,
"loss": 0.2056,
"step": 4065
},
{
"epoch": 1.39,
"learning_rate": 1.1549693423648858e-05,
"loss": 0.2423,
"step": 4066
},
{
"epoch": 1.39,
"learning_rate": 1.1537930082815543e-05,
"loss": 0.2108,
"step": 4067
},
{
"epoch": 1.39,
"learning_rate": 1.1526170938197358e-05,
"loss": 0.243,
"step": 4068
},
{
"epoch": 1.39,
"learning_rate": 1.1514415993459706e-05,
"loss": 0.2121,
"step": 4069
},
{
"epoch": 1.39,
"learning_rate": 1.1502665252266695e-05,
"loss": 0.2344,
"step": 4070
},
{
"epoch": 1.39,
"learning_rate": 1.1490918718281084e-05,
"loss": 0.2368,
"step": 4071
},
{
"epoch": 1.39,
"learning_rate": 1.1479176395164364e-05,
"loss": 0.1929,
"step": 4072
},
{
"epoch": 1.39,
"learning_rate": 1.1467438286576676e-05,
"loss": 0.2314,
"step": 4073
},
{
"epoch": 1.39,
"learning_rate": 1.1455704396176873e-05,
"loss": 0.2244,
"step": 4074
},
{
"epoch": 1.39,
"learning_rate": 1.1443974727622494e-05,
"loss": 0.2022,
"step": 4075
},
{
"epoch": 1.39,
"learning_rate": 1.1432249284569737e-05,
"loss": 0.1995,
"step": 4076
},
{
"epoch": 1.39,
"learning_rate": 1.1420528070673519e-05,
"loss": 0.222,
"step": 4077
},
{
"epoch": 1.39,
"learning_rate": 1.1408811089587399e-05,
"loss": 0.2074,
"step": 4078
},
{
"epoch": 1.39,
"learning_rate": 1.1397098344963642e-05,
"loss": 0.1964,
"step": 4079
},
{
"epoch": 1.39,
"learning_rate": 1.13853898404532e-05,
"loss": 0.2144,
"step": 4080
},
{
"epoch": 1.39,
"learning_rate": 1.1373685579705673e-05,
"loss": 0.2177,
"step": 4081
},
{
"epoch": 1.39,
"learning_rate": 1.1361985566369359e-05,
"loss": 0.2086,
"step": 4082
},
{
"epoch": 1.39,
"learning_rate": 1.1350289804091235e-05,
"loss": 0.1965,
"step": 4083
},
{
"epoch": 1.39,
"learning_rate": 1.1338598296516956e-05,
"loss": 0.1948,
"step": 4084
},
{
"epoch": 1.39,
"learning_rate": 1.1326911047290817e-05,
"loss": 0.2014,
"step": 4085
},
{
"epoch": 1.39,
"learning_rate": 1.1315228060055821e-05,
"loss": 0.1986,
"step": 4086
},
{
"epoch": 1.39,
"learning_rate": 1.1303549338453646e-05,
"loss": 0.211,
"step": 4087
},
{
"epoch": 1.39,
"learning_rate": 1.12918748861246e-05,
"loss": 0.241,
"step": 4088
},
{
"epoch": 1.4,
"learning_rate": 1.128020470670771e-05,
"loss": 0.2194,
"step": 4089
},
{
"epoch": 1.4,
"learning_rate": 1.1268538803840626e-05,
"loss": 0.193,
"step": 4090
},
{
"epoch": 1.4,
"learning_rate": 1.1256877181159694e-05,
"loss": 0.2231,
"step": 4091
},
{
"epoch": 1.4,
"learning_rate": 1.1245219842299931e-05,
"loss": 0.2312,
"step": 4092
},
{
"epoch": 1.4,
"learning_rate": 1.1233566790894987e-05,
"loss": 0.2291,
"step": 4093
},
{
"epoch": 1.4,
"learning_rate": 1.1221918030577208e-05,
"loss": 0.2452,
"step": 4094
},
{
"epoch": 1.4,
"learning_rate": 1.1210273564977564e-05,
"loss": 0.2206,
"step": 4095
},
{
"epoch": 1.4,
"learning_rate": 1.119863339772575e-05,
"loss": 0.1998,
"step": 4096
},
{
"epoch": 1.4,
"learning_rate": 1.118699753245005e-05,
"loss": 0.2244,
"step": 4097
},
{
"epoch": 1.4,
"learning_rate": 1.1175365972777462e-05,
"loss": 0.2212,
"step": 4098
},
{
"epoch": 1.4,
"learning_rate": 1.1163738722333597e-05,
"loss": 0.2025,
"step": 4099
},
{
"epoch": 1.4,
"learning_rate": 1.1152115784742758e-05,
"loss": 0.211,
"step": 4100
},
{
"epoch": 1.4,
"learning_rate": 1.1140497163627897e-05,
"loss": 0.2284,
"step": 4101
},
{
"epoch": 1.4,
"learning_rate": 1.1128882862610595e-05,
"loss": 0.2189,
"step": 4102
},
{
"epoch": 1.4,
"learning_rate": 1.1117272885311128e-05,
"loss": 0.2079,
"step": 4103
},
{
"epoch": 1.4,
"learning_rate": 1.1105667235348376e-05,
"loss": 0.21,
"step": 4104
},
{
"epoch": 1.4,
"learning_rate": 1.1094065916339912e-05,
"loss": 0.2151,
"step": 4105
},
{
"epoch": 1.4,
"learning_rate": 1.1082468931901952e-05,
"loss": 0.2081,
"step": 4106
},
{
"epoch": 1.4,
"learning_rate": 1.107087628564933e-05,
"loss": 0.2113,
"step": 4107
},
{
"epoch": 1.4,
"learning_rate": 1.1059287981195569e-05,
"loss": 0.2193,
"step": 4108
},
{
"epoch": 1.4,
"learning_rate": 1.1047704022152792e-05,
"loss": 0.1895,
"step": 4109
},
{
"epoch": 1.4,
"learning_rate": 1.1036124412131832e-05,
"loss": 0.1883,
"step": 4110
},
{
"epoch": 1.4,
"learning_rate": 1.1024549154742098e-05,
"loss": 0.2156,
"step": 4111
},
{
"epoch": 1.4,
"learning_rate": 1.1012978253591687e-05,
"loss": 0.2139,
"step": 4112
},
{
"epoch": 1.4,
"learning_rate": 1.1001411712287332e-05,
"loss": 0.2204,
"step": 4113
},
{
"epoch": 1.4,
"learning_rate": 1.0989849534434383e-05,
"loss": 0.2254,
"step": 4114
},
{
"epoch": 1.4,
"learning_rate": 1.097829172363686e-05,
"loss": 0.2142,
"step": 4115
},
{
"epoch": 1.4,
"learning_rate": 1.0966738283497394e-05,
"loss": 0.2167,
"step": 4116
},
{
"epoch": 1.4,
"learning_rate": 1.0955189217617277e-05,
"loss": 0.2117,
"step": 4117
},
{
"epoch": 1.4,
"learning_rate": 1.0943644529596434e-05,
"loss": 0.197,
"step": 4118
},
{
"epoch": 1.41,
"learning_rate": 1.0932104223033402e-05,
"loss": 0.2144,
"step": 4119
},
{
"epoch": 1.41,
"learning_rate": 1.0920568301525392e-05,
"loss": 0.1995,
"step": 4120
},
{
"epoch": 1.41,
"learning_rate": 1.0909036768668204e-05,
"loss": 0.2053,
"step": 4121
},
{
"epoch": 1.41,
"learning_rate": 1.0897509628056304e-05,
"loss": 0.2266,
"step": 4122
},
{
"epoch": 1.41,
"learning_rate": 1.0885986883282775e-05,
"loss": 0.1984,
"step": 4123
},
{
"epoch": 1.41,
"learning_rate": 1.0874468537939342e-05,
"loss": 0.2126,
"step": 4124
},
{
"epoch": 1.41,
"learning_rate": 1.0862954595616328e-05,
"loss": 0.2195,
"step": 4125
},
{
"epoch": 1.41,
"learning_rate": 1.0851445059902718e-05,
"loss": 0.2063,
"step": 4126
},
{
"epoch": 1.41,
"learning_rate": 1.0839939934386114e-05,
"loss": 0.2934,
"step": 4127
},
{
"epoch": 1.41,
"learning_rate": 1.0828439222652723e-05,
"loss": 0.2183,
"step": 4128
},
{
"epoch": 1.41,
"learning_rate": 1.0816942928287408e-05,
"loss": 0.2066,
"step": 4129
},
{
"epoch": 1.41,
"learning_rate": 1.0805451054873623e-05,
"loss": 0.2028,
"step": 4130
},
{
"epoch": 1.41,
"learning_rate": 1.0793963605993468e-05,
"loss": 0.2136,
"step": 4131
},
{
"epoch": 1.41,
"learning_rate": 1.0782480585227666e-05,
"loss": 0.2286,
"step": 4132
},
{
"epoch": 1.41,
"learning_rate": 1.077100199615553e-05,
"loss": 0.2328,
"step": 4133
},
{
"epoch": 1.41,
"learning_rate": 1.075952784235503e-05,
"loss": 0.2145,
"step": 4134
},
{
"epoch": 1.41,
"learning_rate": 1.0748058127402717e-05,
"loss": 0.1868,
"step": 4135
},
{
"epoch": 1.41,
"learning_rate": 1.0736592854873783e-05,
"loss": 0.2257,
"step": 4136
},
{
"epoch": 1.41,
"learning_rate": 1.0725132028342033e-05,
"loss": 0.2094,
"step": 4137
},
{
"epoch": 1.41,
"learning_rate": 1.0713675651379875e-05,
"loss": 0.2282,
"step": 4138
},
{
"epoch": 1.41,
"learning_rate": 1.0702223727558352e-05,
"loss": 0.2405,
"step": 4139
},
{
"epoch": 1.41,
"learning_rate": 1.0690776260447084e-05,
"loss": 0.2077,
"step": 4140
},
{
"epoch": 1.41,
"learning_rate": 1.0679333253614332e-05,
"loss": 0.2184,
"step": 4141
},
{
"epoch": 1.41,
"learning_rate": 1.066789471062695e-05,
"loss": 0.2106,
"step": 4142
},
{
"epoch": 1.41,
"learning_rate": 1.0656460635050405e-05,
"loss": 0.2052,
"step": 4143
},
{
"epoch": 1.41,
"learning_rate": 1.0645031030448788e-05,
"loss": 0.1872,
"step": 4144
},
{
"epoch": 1.41,
"learning_rate": 1.0633605900384764e-05,
"loss": 0.2255,
"step": 4145
},
{
"epoch": 1.41,
"learning_rate": 1.0622185248419634e-05,
"loss": 0.2226,
"step": 4146
},
{
"epoch": 1.41,
"learning_rate": 1.0610769078113272e-05,
"loss": 0.2314,
"step": 4147
},
{
"epoch": 1.42,
"learning_rate": 1.059935739302419e-05,
"loss": 0.1993,
"step": 4148
},
{
"epoch": 1.42,
"learning_rate": 1.0587950196709484e-05,
"loss": 0.2309,
"step": 4149
},
{
"epoch": 1.42,
"learning_rate": 1.0576547492724838e-05,
"loss": 0.2026,
"step": 4150
},
{
"epoch": 1.42,
"learning_rate": 1.0565149284624558e-05,
"loss": 0.2016,
"step": 4151
},
{
"epoch": 1.42,
"learning_rate": 1.055375557596154e-05,
"loss": 0.2057,
"step": 4152
},
{
"epoch": 1.42,
"learning_rate": 1.0542366370287288e-05,
"loss": 0.2157,
"step": 4153
},
{
"epoch": 1.42,
"learning_rate": 1.0530981671151871e-05,
"loss": 0.2173,
"step": 4154
},
{
"epoch": 1.42,
"learning_rate": 1.0519601482103992e-05,
"loss": 0.1787,
"step": 4155
},
{
"epoch": 1.42,
"learning_rate": 1.0508225806690913e-05,
"loss": 0.2324,
"step": 4156
},
{
"epoch": 1.42,
"learning_rate": 1.0496854648458518e-05,
"loss": 0.1948,
"step": 4157
},
{
"epoch": 1.42,
"learning_rate": 1.0485488010951277e-05,
"loss": 0.2143,
"step": 4158
},
{
"epoch": 1.42,
"learning_rate": 1.0474125897712228e-05,
"loss": 0.1942,
"step": 4159
},
{
"epoch": 1.42,
"learning_rate": 1.0462768312283033e-05,
"loss": 0.197,
"step": 4160
},
{
"epoch": 1.42,
"learning_rate": 1.0451415258203912e-05,
"loss": 0.2152,
"step": 4161
},
{
"epoch": 1.42,
"learning_rate": 1.044006673901369e-05,
"loss": 0.207,
"step": 4162
},
{
"epoch": 1.42,
"learning_rate": 1.0428722758249784e-05,
"loss": 0.2468,
"step": 4163
},
{
"epoch": 1.42,
"learning_rate": 1.0417383319448174e-05,
"loss": 0.2039,
"step": 4164
},
{
"epoch": 1.42,
"learning_rate": 1.040604842614344e-05,
"loss": 0.2083,
"step": 4165
},
{
"epoch": 1.42,
"learning_rate": 1.0394718081868743e-05,
"loss": 0.2175,
"step": 4166
},
{
"epoch": 1.42,
"learning_rate": 1.0383392290155838e-05,
"loss": 0.2727,
"step": 4167
},
{
"epoch": 1.42,
"learning_rate": 1.037207105453503e-05,
"loss": 0.1683,
"step": 4168
},
{
"epoch": 1.42,
"learning_rate": 1.0360754378535226e-05,
"loss": 0.2011,
"step": 4169
},
{
"epoch": 1.42,
"learning_rate": 1.034944226568392e-05,
"loss": 0.2224,
"step": 4170
},
{
"epoch": 1.42,
"learning_rate": 1.0338134719507156e-05,
"loss": 0.2081,
"step": 4171
},
{
"epoch": 1.42,
"learning_rate": 1.0326831743529586e-05,
"loss": 0.1954,
"step": 4172
},
{
"epoch": 1.42,
"learning_rate": 1.0315533341274403e-05,
"loss": 0.2033,
"step": 4173
},
{
"epoch": 1.42,
"learning_rate": 1.03042395162634e-05,
"loss": 0.1954,
"step": 4174
},
{
"epoch": 1.42,
"learning_rate": 1.029295027201695e-05,
"loss": 0.2598,
"step": 4175
},
{
"epoch": 1.42,
"learning_rate": 1.0281665612053962e-05,
"loss": 0.1743,
"step": 4176
},
{
"epoch": 1.43,
"learning_rate": 1.0270385539891955e-05,
"loss": 0.213,
"step": 4177
},
{
"epoch": 1.43,
"learning_rate": 1.0259110059046989e-05,
"loss": 0.1963,
"step": 4178
},
{
"epoch": 1.43,
"learning_rate": 1.0247839173033707e-05,
"loss": 0.2064,
"step": 4179
},
{
"epoch": 1.43,
"learning_rate": 1.0236572885365325e-05,
"loss": 0.2494,
"step": 4180
},
{
"epoch": 1.43,
"learning_rate": 1.0225311199553622e-05,
"loss": 0.2055,
"step": 4181
},
{
"epoch": 1.43,
"learning_rate": 1.0214054119108923e-05,
"loss": 0.1929,
"step": 4182
},
{
"epoch": 1.43,
"learning_rate": 1.0202801647540142e-05,
"loss": 0.2171,
"step": 4183
},
{
"epoch": 1.43,
"learning_rate": 1.019155378835476e-05,
"loss": 0.1944,
"step": 4184
},
{
"epoch": 1.43,
"learning_rate": 1.0180310545058785e-05,
"loss": 0.2345,
"step": 4185
},
{
"epoch": 1.43,
"learning_rate": 1.016907192115683e-05,
"loss": 0.2135,
"step": 4186
},
{
"epoch": 1.43,
"learning_rate": 1.015783792015203e-05,
"loss": 0.2174,
"step": 4187
},
{
"epoch": 1.43,
"learning_rate": 1.0146608545546105e-05,
"loss": 0.2378,
"step": 4188
},
{
"epoch": 1.43,
"learning_rate": 1.0135383800839333e-05,
"loss": 0.1846,
"step": 4189
},
{
"epoch": 1.43,
"learning_rate": 1.0124163689530522e-05,
"loss": 0.1904,
"step": 4190
},
{
"epoch": 1.43,
"learning_rate": 1.011294821511707e-05,
"loss": 0.2576,
"step": 4191
},
{
"epoch": 1.43,
"learning_rate": 1.0101737381094895e-05,
"loss": 0.221,
"step": 4192
},
{
"epoch": 1.43,
"learning_rate": 1.0090531190958514e-05,
"loss": 0.2381,
"step": 4193
},
{
"epoch": 1.43,
"learning_rate": 1.0079329648200948e-05,
"loss": 0.2136,
"step": 4194
},
{
"epoch": 1.43,
"learning_rate": 1.0068132756313794e-05,
"loss": 0.222,
"step": 4195
},
{
"epoch": 1.43,
"learning_rate": 1.005694051878721e-05,
"loss": 0.2034,
"step": 4196
},
{
"epoch": 1.43,
"learning_rate": 1.0045752939109876e-05,
"loss": 0.2164,
"step": 4197
},
{
"epoch": 1.43,
"learning_rate": 1.0034570020769043e-05,
"loss": 0.1969,
"step": 4198
},
{
"epoch": 1.43,
"learning_rate": 1.0023391767250484e-05,
"loss": 0.2273,
"step": 4199
},
{
"epoch": 1.43,
"learning_rate": 1.0012218182038546e-05,
"loss": 0.1719,
"step": 4200
},
{
"epoch": 1.43,
"learning_rate": 1.0001049268616117e-05,
"loss": 0.2072,
"step": 4201
},
{
"epoch": 1.43,
"learning_rate": 9.989885030464598e-06,
"loss": 0.2191,
"step": 4202
},
{
"epoch": 1.43,
"learning_rate": 9.978725471063977e-06,
"loss": 0.2009,
"step": 4203
},
{
"epoch": 1.43,
"learning_rate": 9.967570593892742e-06,
"loss": 0.2127,
"step": 4204
},
{
"epoch": 1.43,
"learning_rate": 9.956420402427957e-06,
"loss": 0.2192,
"step": 4205
},
{
"epoch": 1.44,
"learning_rate": 9.945274900145188e-06,
"loss": 0.1987,
"step": 4206
},
{
"epoch": 1.44,
"learning_rate": 9.934134090518593e-06,
"loss": 0.1953,
"step": 4207
},
{
"epoch": 1.44,
"learning_rate": 9.922997977020809e-06,
"loss": 0.1929,
"step": 4208
},
{
"epoch": 1.44,
"learning_rate": 9.911866563123042e-06,
"loss": 0.2259,
"step": 4209
},
{
"epoch": 1.44,
"learning_rate": 9.900739852295041e-06,
"loss": 0.2324,
"step": 4210
},
{
"epoch": 1.44,
"learning_rate": 9.889617848005054e-06,
"loss": 0.1879,
"step": 4211
},
{
"epoch": 1.44,
"learning_rate": 9.878500553719893e-06,
"loss": 0.2042,
"step": 4212
},
{
"epoch": 1.44,
"learning_rate": 9.867387972904885e-06,
"loss": 0.213,
"step": 4213
},
{
"epoch": 1.44,
"learning_rate": 9.856280109023897e-06,
"loss": 0.1981,
"step": 4214
},
{
"epoch": 1.44,
"learning_rate": 9.845176965539333e-06,
"loss": 0.2321,
"step": 4215
},
{
"epoch": 1.44,
"learning_rate": 9.834078545912095e-06,
"loss": 0.2361,
"step": 4216
},
{
"epoch": 1.44,
"learning_rate": 9.822984853601652e-06,
"loss": 0.2291,
"step": 4217
},
{
"epoch": 1.44,
"learning_rate": 9.811895892065967e-06,
"loss": 0.2255,
"step": 4218
},
{
"epoch": 1.44,
"learning_rate": 9.800811664761545e-06,
"loss": 0.1958,
"step": 4219
},
{
"epoch": 1.44,
"learning_rate": 9.789732175143413e-06,
"loss": 0.224,
"step": 4220
},
{
"epoch": 1.44,
"learning_rate": 9.77865742666512e-06,
"loss": 0.2042,
"step": 4221
},
{
"epoch": 1.44,
"learning_rate": 9.76758742277875e-06,
"loss": 0.1992,
"step": 4222
},
{
"epoch": 1.44,
"learning_rate": 9.756522166934873e-06,
"loss": 0.1756,
"step": 4223
},
{
"epoch": 1.44,
"learning_rate": 9.745461662582622e-06,
"loss": 0.2292,
"step": 4224
},
{
"epoch": 1.44,
"learning_rate": 9.734405913169612e-06,
"loss": 0.2378,
"step": 4225
},
{
"epoch": 1.44,
"learning_rate": 9.723354922141998e-06,
"loss": 0.2,
"step": 4226
},
{
"epoch": 1.44,
"learning_rate": 9.712308692944458e-06,
"loss": 0.2213,
"step": 4227
},
{
"epoch": 1.44,
"learning_rate": 9.701267229020153e-06,
"loss": 0.2293,
"step": 4228
},
{
"epoch": 1.44,
"learning_rate": 9.690230533810798e-06,
"loss": 0.2094,
"step": 4229
},
{
"epoch": 1.44,
"learning_rate": 9.679198610756588e-06,
"loss": 0.2135,
"step": 4230
},
{
"epoch": 1.44,
"learning_rate": 9.66817146329626e-06,
"loss": 0.2345,
"step": 4231
},
{
"epoch": 1.44,
"learning_rate": 9.65714909486703e-06,
"loss": 0.219,
"step": 4232
},
{
"epoch": 1.44,
"learning_rate": 9.646131508904654e-06,
"loss": 0.1884,
"step": 4233
},
{
"epoch": 1.44,
"learning_rate": 9.635118708843386e-06,
"loss": 0.2437,
"step": 4234
},
{
"epoch": 1.44,
"learning_rate": 9.624110698115982e-06,
"loss": 0.2031,
"step": 4235
},
{
"epoch": 1.45,
"learning_rate": 9.613107480153723e-06,
"loss": 0.209,
"step": 4236
},
{
"epoch": 1.45,
"learning_rate": 9.60210905838637e-06,
"loss": 0.2075,
"step": 4237
},
{
"epoch": 1.45,
"learning_rate": 9.591115436242215e-06,
"loss": 0.2134,
"step": 4238
},
{
"epoch": 1.45,
"learning_rate": 9.580126617148025e-06,
"loss": 0.1841,
"step": 4239
},
{
"epoch": 1.45,
"learning_rate": 9.5691426045291e-06,
"loss": 0.1943,
"step": 4240
},
{
"epoch": 1.45,
"learning_rate": 9.558163401809231e-06,
"loss": 0.2129,
"step": 4241
},
{
"epoch": 1.45,
"learning_rate": 9.547189012410693e-06,
"loss": 0.2228,
"step": 4242
},
{
"epoch": 1.45,
"learning_rate": 9.536219439754292e-06,
"loss": 0.2271,
"step": 4243
},
{
"epoch": 1.45,
"learning_rate": 9.525254687259302e-06,
"loss": 0.2238,
"step": 4244
},
{
"epoch": 1.45,
"learning_rate": 9.514294758343509e-06,
"loss": 0.2207,
"step": 4245
},
{
"epoch": 1.45,
"learning_rate": 9.50333965642321e-06,
"loss": 0.2038,
"step": 4246
},
{
"epoch": 1.45,
"learning_rate": 9.492389384913158e-06,
"loss": 0.1983,
"step": 4247
},
{
"epoch": 1.45,
"learning_rate": 9.481443947226636e-06,
"loss": 0.2237,
"step": 4248
},
{
"epoch": 1.45,
"learning_rate": 9.470503346775414e-06,
"loss": 0.2403,
"step": 4249
},
{
"epoch": 1.45,
"learning_rate": 9.459567586969748e-06,
"loss": 0.2191,
"step": 4250
},
{
"epoch": 1.45,
"learning_rate": 9.448636671218372e-06,
"loss": 0.2344,
"step": 4251
},
{
"epoch": 1.45,
"learning_rate": 9.437710602928535e-06,
"loss": 0.2031,
"step": 4252
},
{
"epoch": 1.45,
"learning_rate": 9.42678938550597e-06,
"loss": 0.2448,
"step": 4253
},
{
"epoch": 1.45,
"learning_rate": 9.41587302235487e-06,
"loss": 0.2102,
"step": 4254
},
{
"epoch": 1.45,
"learning_rate": 9.404961516877964e-06,
"loss": 0.1956,
"step": 4255
},
{
"epoch": 1.45,
"learning_rate": 9.394054872476419e-06,
"loss": 0.1964,
"step": 4256
},
{
"epoch": 1.45,
"learning_rate": 9.383153092549918e-06,
"loss": 0.2275,
"step": 4257
},
{
"epoch": 1.45,
"learning_rate": 9.372256180496605e-06,
"loss": 0.213,
"step": 4258
},
{
"epoch": 1.45,
"learning_rate": 9.361364139713131e-06,
"loss": 0.207,
"step": 4259
},
{
"epoch": 1.45,
"learning_rate": 9.350476973594615e-06,
"loss": 0.1993,
"step": 4260
},
{
"epoch": 1.45,
"learning_rate": 9.33959468553465e-06,
"loss": 0.2028,
"step": 4261
},
{
"epoch": 1.45,
"learning_rate": 9.328717278925323e-06,
"loss": 0.2403,
"step": 4262
},
{
"epoch": 1.45,
"learning_rate": 9.317844757157188e-06,
"loss": 0.1885,
"step": 4263
},
{
"epoch": 1.45,
"learning_rate": 9.306977123619293e-06,
"loss": 0.2206,
"step": 4264
},
{
"epoch": 1.46,
"learning_rate": 9.29611438169913e-06,
"loss": 0.2195,
"step": 4265
},
{
"epoch": 1.46,
"learning_rate": 9.285256534782702e-06,
"loss": 0.2464,
"step": 4266
},
{
"epoch": 1.46,
"learning_rate": 9.274403586254473e-06,
"loss": 0.2054,
"step": 4267
},
{
"epoch": 1.46,
"learning_rate": 9.263555539497362e-06,
"loss": 0.2477,
"step": 4268
},
{
"epoch": 1.46,
"learning_rate": 9.252712397892798e-06,
"loss": 0.2426,
"step": 4269
},
{
"epoch": 1.46,
"learning_rate": 9.241874164820635e-06,
"loss": 0.2027,
"step": 4270
},
{
"epoch": 1.46,
"learning_rate": 9.231040843659233e-06,
"loss": 0.1856,
"step": 4271
},
{
"epoch": 1.46,
"learning_rate": 9.22021243778542e-06,
"loss": 0.2365,
"step": 4272
},
{
"epoch": 1.46,
"learning_rate": 9.20938895057446e-06,
"loss": 0.1934,
"step": 4273
},
{
"epoch": 1.46,
"learning_rate": 9.198570385400121e-06,
"loss": 0.1878,
"step": 4274
},
{
"epoch": 1.46,
"learning_rate": 9.187756745634608e-06,
"loss": 0.2061,
"step": 4275
},
{
"epoch": 1.46,
"learning_rate": 9.176948034648606e-06,
"loss": 0.2226,
"step": 4276
},
{
"epoch": 1.46,
"learning_rate": 9.166144255811266e-06,
"loss": 0.2053,
"step": 4277
},
{
"epoch": 1.46,
"learning_rate": 9.155345412490193e-06,
"loss": 0.1882,
"step": 4278
},
{
"epoch": 1.46,
"learning_rate": 9.144551508051466e-06,
"loss": 0.2161,
"step": 4279
},
{
"epoch": 1.46,
"learning_rate": 9.1337625458596e-06,
"loss": 0.1814,
"step": 4280
},
{
"epoch": 1.46,
"learning_rate": 9.1229785292776e-06,
"loss": 0.2035,
"step": 4281
},
{
"epoch": 1.46,
"learning_rate": 9.112199461666901e-06,
"loss": 0.2127,
"step": 4282
},
{
"epoch": 1.46,
"learning_rate": 9.101425346387421e-06,
"loss": 0.2322,
"step": 4283
},
{
"epoch": 1.46,
"learning_rate": 9.090656186797508e-06,
"loss": 0.2364,
"step": 4284
},
{
"epoch": 1.46,
"learning_rate": 9.079891986253988e-06,
"loss": 0.2016,
"step": 4285
},
{
"epoch": 1.46,
"learning_rate": 9.069132748112142e-06,
"loss": 0.2087,
"step": 4286
},
{
"epoch": 1.46,
"learning_rate": 9.058378475725673e-06,
"loss": 0.1974,
"step": 4287
},
{
"epoch": 1.46,
"learning_rate": 9.047629172446781e-06,
"loss": 0.2118,
"step": 4288
},
{
"epoch": 1.46,
"learning_rate": 9.036884841626064e-06,
"loss": 0.2187,
"step": 4289
},
{
"epoch": 1.46,
"learning_rate": 9.026145486612639e-06,
"loss": 0.218,
"step": 4290
},
{
"epoch": 1.46,
"learning_rate": 9.015411110754002e-06,
"loss": 0.202,
"step": 4291
},
{
"epoch": 1.46,
"learning_rate": 9.004681717396139e-06,
"loss": 0.1965,
"step": 4292
},
{
"epoch": 1.46,
"learning_rate": 8.99395730988348e-06,
"loss": 0.1891,
"step": 4293
},
{
"epoch": 1.47,
"learning_rate": 8.983237891558876e-06,
"loss": 0.1897,
"step": 4294
},
{
"epoch": 1.47,
"learning_rate": 8.97252346576366e-06,
"loss": 0.1818,
"step": 4295
},
{
"epoch": 1.47,
"learning_rate": 8.961814035837569e-06,
"loss": 0.2195,
"step": 4296
},
{
"epoch": 1.47,
"learning_rate": 8.951109605118807e-06,
"loss": 0.2197,
"step": 4297
},
{
"epoch": 1.47,
"learning_rate": 8.940410176944028e-06,
"loss": 0.2407,
"step": 4298
},
{
"epoch": 1.47,
"learning_rate": 8.929715754648297e-06,
"loss": 0.2329,
"step": 4299
},
{
"epoch": 1.47,
"learning_rate": 8.919026341565152e-06,
"loss": 0.1951,
"step": 4300
},
{
"epoch": 1.47,
"learning_rate": 8.908341941026533e-06,
"loss": 0.21,
"step": 4301
},
{
"epoch": 1.47,
"learning_rate": 8.897662556362847e-06,
"loss": 0.2263,
"step": 4302
},
{
"epoch": 1.47,
"learning_rate": 8.886988190902929e-06,
"loss": 0.1822,
"step": 4303
},
{
"epoch": 1.47,
"learning_rate": 8.876318847974058e-06,
"loss": 0.1796,
"step": 4304
},
{
"epoch": 1.47,
"learning_rate": 8.865654530901918e-06,
"loss": 0.2254,
"step": 4305
},
{
"epoch": 1.47,
"learning_rate": 8.854995243010655e-06,
"loss": 0.2202,
"step": 4306
},
{
"epoch": 1.47,
"learning_rate": 8.844340987622851e-06,
"loss": 0.1917,
"step": 4307
},
{
"epoch": 1.47,
"learning_rate": 8.833691768059488e-06,
"loss": 0.1789,
"step": 4308
},
{
"epoch": 1.47,
"learning_rate": 8.823047587640012e-06,
"loss": 0.1896,
"step": 4309
},
{
"epoch": 1.47,
"learning_rate": 8.812408449682269e-06,
"loss": 0.2185,
"step": 4310
},
{
"epoch": 1.47,
"learning_rate": 8.801774357502554e-06,
"loss": 0.217,
"step": 4311
},
{
"epoch": 1.47,
"learning_rate": 8.791145314415594e-06,
"loss": 0.2088,
"step": 4312
},
{
"epoch": 1.47,
"learning_rate": 8.780521323734516e-06,
"loss": 0.2223,
"step": 4313
},
{
"epoch": 1.47,
"learning_rate": 8.769902388770899e-06,
"loss": 0.2108,
"step": 4314
},
{
"epoch": 1.47,
"learning_rate": 8.75928851283472e-06,
"loss": 0.1977,
"step": 4315
},
{
"epoch": 1.47,
"learning_rate": 8.748679699234403e-06,
"loss": 0.2228,
"step": 4316
},
{
"epoch": 1.47,
"learning_rate": 8.738075951276784e-06,
"loss": 0.2167,
"step": 4317
},
{
"epoch": 1.47,
"learning_rate": 8.727477272267117e-06,
"loss": 0.223,
"step": 4318
},
{
"epoch": 1.47,
"learning_rate": 8.716883665509094e-06,
"loss": 0.192,
"step": 4319
},
{
"epoch": 1.47,
"learning_rate": 8.70629513430479e-06,
"loss": 0.2149,
"step": 4320
},
{
"epoch": 1.47,
"learning_rate": 8.695711681954737e-06,
"loss": 0.2242,
"step": 4321
},
{
"epoch": 1.47,
"learning_rate": 8.685133311757853e-06,
"loss": 0.2196,
"step": 4322
},
{
"epoch": 1.47,
"learning_rate": 8.674560027011488e-06,
"loss": 0.2306,
"step": 4323
},
{
"epoch": 1.48,
"learning_rate": 8.663991831011415e-06,
"loss": 0.2188,
"step": 4324
},
{
"epoch": 1.48,
"learning_rate": 8.653428727051793e-06,
"loss": 0.1828,
"step": 4325
},
{
"epoch": 1.48,
"learning_rate": 8.642870718425227e-06,
"loss": 0.1966,
"step": 4326
},
{
"epoch": 1.48,
"learning_rate": 8.632317808422702e-06,
"loss": 0.23,
"step": 4327
},
{
"epoch": 1.48,
"learning_rate": 8.621770000333635e-06,
"loss": 0.2117,
"step": 4328
},
{
"epoch": 1.48,
"learning_rate": 8.611227297445857e-06,
"loss": 0.2095,
"step": 4329
},
{
"epoch": 1.48,
"learning_rate": 8.600689703045578e-06,
"loss": 0.2356,
"step": 4330
},
{
"epoch": 1.48,
"learning_rate": 8.590157220417445e-06,
"loss": 0.2319,
"step": 4331
},
{
"epoch": 1.48,
"learning_rate": 8.5796298528445e-06,
"loss": 0.2535,
"step": 4332
},
{
"epoch": 1.48,
"learning_rate": 8.569107603608206e-06,
"loss": 0.1928,
"step": 4333
},
{
"epoch": 1.48,
"learning_rate": 8.558590475988395e-06,
"loss": 0.2453,
"step": 4334
},
{
"epoch": 1.48,
"learning_rate": 8.548078473263344e-06,
"loss": 0.1915,
"step": 4335
},
{
"epoch": 1.48,
"learning_rate": 8.53757159870969e-06,
"loss": 0.362,
"step": 4336
},
{
"epoch": 1.48,
"learning_rate": 8.527069855602512e-06,
"loss": 0.1975,
"step": 4337
},
{
"epoch": 1.48,
"learning_rate": 8.516573247215273e-06,
"loss": 0.1986,
"step": 4338
},
{
"epoch": 1.48,
"learning_rate": 8.506081776819821e-06,
"loss": 0.229,
"step": 4339
},
{
"epoch": 1.48,
"learning_rate": 8.495595447686434e-06,
"loss": 0.2313,
"step": 4340
},
{
"epoch": 1.48,
"learning_rate": 8.485114263083749e-06,
"loss": 0.203,
"step": 4341
},
{
"epoch": 1.48,
"learning_rate": 8.474638226278827e-06,
"loss": 0.2237,
"step": 4342
},
{
"epoch": 1.48,
"learning_rate": 8.464167340537132e-06,
"loss": 0.2111,
"step": 4343
},
{
"epoch": 1.48,
"learning_rate": 8.453701609122486e-06,
"loss": 0.2063,
"step": 4344
},
{
"epoch": 1.48,
"learning_rate": 8.443241035297133e-06,
"loss": 0.1943,
"step": 4345
},
{
"epoch": 1.48,
"learning_rate": 8.432785622321707e-06,
"loss": 0.2011,
"step": 4346
},
{
"epoch": 1.48,
"learning_rate": 8.422335373455232e-06,
"loss": 0.2046,
"step": 4347
},
{
"epoch": 1.48,
"learning_rate": 8.411890291955105e-06,
"loss": 0.1798,
"step": 4348
},
{
"epoch": 1.48,
"learning_rate": 8.401450381077134e-06,
"loss": 0.2125,
"step": 4349
},
{
"epoch": 1.48,
"learning_rate": 8.391015644075514e-06,
"loss": 0.2107,
"step": 4350
},
{
"epoch": 1.48,
"learning_rate": 8.380586084202807e-06,
"loss": 0.2288,
"step": 4351
},
{
"epoch": 1.48,
"learning_rate": 8.370161704709992e-06,
"loss": 0.228,
"step": 4352
},
{
"epoch": 1.49,
"learning_rate": 8.359742508846394e-06,
"loss": 0.26,
"step": 4353
},
{
"epoch": 1.49,
"learning_rate": 8.34932849985976e-06,
"loss": 0.2003,
"step": 4354
},
{
"epoch": 1.49,
"learning_rate": 8.338919680996207e-06,
"loss": 0.197,
"step": 4355
},
{
"epoch": 1.49,
"learning_rate": 8.328516055500218e-06,
"loss": 0.2137,
"step": 4356
},
{
"epoch": 1.49,
"learning_rate": 8.318117626614685e-06,
"loss": 0.2305,
"step": 4357
},
{
"epoch": 1.49,
"learning_rate": 8.307724397580854e-06,
"loss": 0.1997,
"step": 4358
},
{
"epoch": 1.49,
"learning_rate": 8.29733637163837e-06,
"loss": 0.1766,
"step": 4359
},
{
"epoch": 1.49,
"learning_rate": 8.286953552025245e-06,
"loss": 0.1736,
"step": 4360
},
{
"epoch": 1.49,
"learning_rate": 8.276575941977881e-06,
"loss": 0.2095,
"step": 4361
},
{
"epoch": 1.49,
"learning_rate": 8.266203544731033e-06,
"loss": 0.2144,
"step": 4362
},
{
"epoch": 1.49,
"learning_rate": 8.255836363517852e-06,
"loss": 0.2415,
"step": 4363
},
{
"epoch": 1.49,
"learning_rate": 8.245474401569863e-06,
"loss": 0.2024,
"step": 4364
},
{
"epoch": 1.49,
"learning_rate": 8.235117662116942e-06,
"loss": 0.2101,
"step": 4365
},
{
"epoch": 1.49,
"learning_rate": 8.22476614838737e-06,
"loss": 0.2067,
"step": 4366
},
{
"epoch": 1.49,
"learning_rate": 8.214419863607758e-06,
"loss": 0.178,
"step": 4367
},
{
"epoch": 1.49,
"learning_rate": 8.20407881100313e-06,
"loss": 0.2185,
"step": 4368
},
{
"epoch": 1.49,
"learning_rate": 8.19374299379686e-06,
"loss": 0.2375,
"step": 4369
},
{
"epoch": 1.49,
"learning_rate": 8.183412415210675e-06,
"loss": 0.2196,
"step": 4370
},
{
"epoch": 1.49,
"learning_rate": 8.1730870784647e-06,
"loss": 0.2081,
"step": 4371
},
{
"epoch": 1.49,
"learning_rate": 8.162766986777396e-06,
"loss": 0.2331,
"step": 4372
},
{
"epoch": 1.49,
"learning_rate": 8.152452143365607e-06,
"loss": 0.1825,
"step": 4373
},
{
"epoch": 1.49,
"learning_rate": 8.142142551444543e-06,
"loss": 0.2025,
"step": 4374
},
{
"epoch": 1.49,
"learning_rate": 8.131838214227763e-06,
"loss": 0.2188,
"step": 4375
},
{
"epoch": 1.49,
"learning_rate": 8.12153913492721e-06,
"loss": 0.2125,
"step": 4376
},
{
"epoch": 1.49,
"learning_rate": 8.11124531675316e-06,
"loss": 0.2244,
"step": 4377
},
{
"epoch": 1.49,
"learning_rate": 8.100956762914275e-06,
"loss": 0.2103,
"step": 4378
},
{
"epoch": 1.49,
"learning_rate": 8.09067347661755e-06,
"loss": 0.2032,
"step": 4379
},
{
"epoch": 1.49,
"learning_rate": 8.08039546106836e-06,
"loss": 0.199,
"step": 4380
},
{
"epoch": 1.49,
"learning_rate": 8.070122719470439e-06,
"loss": 0.2091,
"step": 4381
},
{
"epoch": 1.5,
"learning_rate": 8.059855255025848e-06,
"loss": 0.2231,
"step": 4382
},
{
"epoch": 1.5,
"learning_rate": 8.049593070935044e-06,
"loss": 0.2242,
"step": 4383
},
{
"epoch": 1.5,
"learning_rate": 8.039336170396796e-06,
"loss": 0.2162,
"step": 4384
},
{
"epoch": 1.5,
"learning_rate": 8.029084556608266e-06,
"loss": 0.2277,
"step": 4385
},
{
"epoch": 1.5,
"learning_rate": 8.018838232764924e-06,
"loss": 0.2022,
"step": 4386
},
{
"epoch": 1.5,
"learning_rate": 8.00859720206065e-06,
"loss": 0.2159,
"step": 4387
},
{
"epoch": 1.5,
"learning_rate": 7.998361467687612e-06,
"loss": 0.1998,
"step": 4388
},
{
"epoch": 1.5,
"learning_rate": 7.988131032836366e-06,
"loss": 0.2328,
"step": 4389
},
{
"epoch": 1.5,
"learning_rate": 7.977905900695814e-06,
"loss": 0.1936,
"step": 4390
},
{
"epoch": 1.5,
"learning_rate": 7.96768607445318e-06,
"loss": 0.1904,
"step": 4391
},
{
"epoch": 1.5,
"learning_rate": 7.957471557294064e-06,
"loss": 0.1927,
"step": 4392
},
{
"epoch": 1.5,
"learning_rate": 7.947262352402385e-06,
"loss": 0.216,
"step": 4393
},
{
"epoch": 1.5,
"learning_rate": 7.937058462960425e-06,
"loss": 0.1965,
"step": 4394
},
{
"epoch": 1.5,
"learning_rate": 7.92685989214881e-06,
"loss": 0.188,
"step": 4395
},
{
"epoch": 1.5,
"learning_rate": 7.91666664314649e-06,
"loss": 0.2158,
"step": 4396
},
{
"epoch": 1.5,
"learning_rate": 7.90647871913078e-06,
"loss": 0.1947,
"step": 4397
},
{
"epoch": 1.5,
"learning_rate": 7.896296123277306e-06,
"loss": 0.2007,
"step": 4398
},
{
"epoch": 1.5,
"learning_rate": 7.886118858760056e-06,
"loss": 0.2188,
"step": 4399
},
{
"epoch": 1.5,
"learning_rate": 7.875946928751355e-06,
"loss": 0.2259,
"step": 4400
},
{
"epoch": 1.5,
"learning_rate": 7.865780336421855e-06,
"loss": 0.2101,
"step": 4401
},
{
"epoch": 1.5,
"learning_rate": 7.855619084940557e-06,
"loss": 0.2016,
"step": 4402
},
{
"epoch": 1.5,
"learning_rate": 7.84546317747478e-06,
"loss": 0.1818,
"step": 4403
},
{
"epoch": 1.5,
"learning_rate": 7.835312617190197e-06,
"loss": 0.2236,
"step": 4404
},
{
"epoch": 1.5,
"learning_rate": 7.825167407250789e-06,
"loss": 0.2048,
"step": 4405
},
{
"epoch": 1.5,
"learning_rate": 7.815027550818893e-06,
"loss": 0.2274,
"step": 4406
},
{
"epoch": 1.5,
"learning_rate": 7.804893051055176e-06,
"loss": 0.1923,
"step": 4407
},
{
"epoch": 1.5,
"learning_rate": 7.794763911118614e-06,
"loss": 0.2041,
"step": 4408
},
{
"epoch": 1.5,
"learning_rate": 7.784640134166537e-06,
"loss": 0.1982,
"step": 4409
},
{
"epoch": 1.5,
"learning_rate": 7.77452172335458e-06,
"loss": 0.2153,
"step": 4410
},
{
"epoch": 1.5,
"learning_rate": 7.764408681836732e-06,
"loss": 0.2455,
"step": 4411
},
{
"epoch": 1.51,
"learning_rate": 7.75430101276528e-06,
"loss": 0.2105,
"step": 4412
},
{
"epoch": 1.51,
"learning_rate": 7.74419871929086e-06,
"loss": 0.2258,
"step": 4413
},
{
"epoch": 1.51,
"learning_rate": 7.73410180456242e-06,
"loss": 0.1856,
"step": 4414
},
{
"epoch": 1.51,
"learning_rate": 7.724010271727233e-06,
"loss": 0.2298,
"step": 4415
},
{
"epoch": 1.51,
"learning_rate": 7.713924123930905e-06,
"loss": 0.2076,
"step": 4416
},
{
"epoch": 1.51,
"learning_rate": 7.70384336431734e-06,
"loss": 0.2201,
"step": 4417
},
{
"epoch": 1.51,
"learning_rate": 7.693767996028791e-06,
"loss": 0.1724,
"step": 4418
},
{
"epoch": 1.51,
"learning_rate": 7.6836980222058e-06,
"loss": 0.2161,
"step": 4419
},
{
"epoch": 1.51,
"learning_rate": 7.673633445987252e-06,
"loss": 0.204,
"step": 4420
},
{
"epoch": 1.51,
"learning_rate": 7.66357427051035e-06,
"loss": 0.2247,
"step": 4421
},
{
"epoch": 1.51,
"learning_rate": 7.653520498910588e-06,
"loss": 0.2022,
"step": 4422
},
{
"epoch": 1.51,
"learning_rate": 7.643472134321808e-06,
"loss": 0.2348,
"step": 4423
},
{
"epoch": 1.51,
"learning_rate": 7.633429179876137e-06,
"loss": 0.2288,
"step": 4424
},
{
"epoch": 1.51,
"learning_rate": 7.6233916387040335e-06,
"loss": 0.2327,
"step": 4425
},
{
"epoch": 1.51,
"learning_rate": 7.613359513934274e-06,
"loss": 0.2101,
"step": 4426
},
{
"epoch": 1.51,
"learning_rate": 7.603332808693922e-06,
"loss": 0.1969,
"step": 4427
},
{
"epoch": 1.51,
"learning_rate": 7.5933115261083765e-06,
"loss": 0.1912,
"step": 4428
},
{
"epoch": 1.51,
"learning_rate": 7.5832956693013355e-06,
"loss": 0.1782,
"step": 4429
},
{
"epoch": 1.51,
"learning_rate": 7.573285241394815e-06,
"loss": 0.2215,
"step": 4430
},
{
"epoch": 1.51,
"learning_rate": 7.563280245509114e-06,
"loss": 0.2069,
"step": 4431
},
{
"epoch": 1.51,
"learning_rate": 7.5532806847628616e-06,
"loss": 0.1971,
"step": 4432
},
{
"epoch": 1.51,
"learning_rate": 7.543286562272997e-06,
"loss": 0.2196,
"step": 4433
},
{
"epoch": 1.51,
"learning_rate": 7.533297881154738e-06,
"loss": 0.2356,
"step": 4434
},
{
"epoch": 1.51,
"learning_rate": 7.5233146445216366e-06,
"loss": 0.1617,
"step": 4435
},
{
"epoch": 1.51,
"learning_rate": 7.513336855485517e-06,
"loss": 0.2172,
"step": 4436
},
{
"epoch": 1.51,
"learning_rate": 7.503364517156536e-06,
"loss": 0.2122,
"step": 4437
},
{
"epoch": 1.51,
"learning_rate": 7.493397632643123e-06,
"loss": 0.1836,
"step": 4438
},
{
"epoch": 1.51,
"learning_rate": 7.4834362050520275e-06,
"loss": 0.2115,
"step": 4439
},
{
"epoch": 1.51,
"learning_rate": 7.4734802374882974e-06,
"loss": 0.2056,
"step": 4440
},
{
"epoch": 1.52,
"learning_rate": 7.46352973305526e-06,
"loss": 0.1927,
"step": 4441
},
{
"epoch": 1.52,
"learning_rate": 7.4535846948545595e-06,
"loss": 0.2057,
"step": 4442
},
{
"epoch": 1.52,
"learning_rate": 7.443645125986132e-06,
"loss": 0.2267,
"step": 4443
},
{
"epoch": 1.52,
"learning_rate": 7.433711029548207e-06,
"loss": 0.2022,
"step": 4444
},
{
"epoch": 1.52,
"learning_rate": 7.423782408637298e-06,
"loss": 0.1978,
"step": 4445
},
{
"epoch": 1.52,
"learning_rate": 7.413859266348228e-06,
"loss": 0.2057,
"step": 4446
},
{
"epoch": 1.52,
"learning_rate": 7.403941605774109e-06,
"loss": 0.1967,
"step": 4447
},
{
"epoch": 1.52,
"learning_rate": 7.39402943000633e-06,
"loss": 0.2091,
"step": 4448
},
{
"epoch": 1.52,
"learning_rate": 7.384122742134594e-06,
"loss": 0.1968,
"step": 4449
},
{
"epoch": 1.52,
"learning_rate": 7.3742215452468645e-06,
"loss": 0.2333,
"step": 4450
},
{
"epoch": 1.52,
"learning_rate": 7.364325842429417e-06,
"loss": 0.1713,
"step": 4451
},
{
"epoch": 1.52,
"learning_rate": 7.354435636766815e-06,
"loss": 0.207,
"step": 4452
},
{
"epoch": 1.52,
"learning_rate": 7.3445509313418845e-06,
"loss": 0.1795,
"step": 4453
},
{
"epoch": 1.52,
"learning_rate": 7.3346717292357704e-06,
"loss": 0.2385,
"step": 4454
},
{
"epoch": 1.52,
"learning_rate": 7.324798033527866e-06,
"loss": 0.1988,
"step": 4455
},
{
"epoch": 1.52,
"learning_rate": 7.314929847295873e-06,
"loss": 0.2357,
"step": 4456
},
{
"epoch": 1.52,
"learning_rate": 7.305067173615776e-06,
"loss": 0.1998,
"step": 4457
},
{
"epoch": 1.52,
"learning_rate": 7.295210015561829e-06,
"loss": 0.1982,
"step": 4458
},
{
"epoch": 1.52,
"learning_rate": 7.285358376206583e-06,
"loss": 0.2165,
"step": 4459
},
{
"epoch": 1.52,
"learning_rate": 7.275512258620842e-06,
"loss": 0.2134,
"step": 4460
},
{
"epoch": 1.52,
"learning_rate": 7.265671665873719e-06,
"loss": 0.1991,
"step": 4461
},
{
"epoch": 1.52,
"learning_rate": 7.255836601032579e-06,
"loss": 0.2046,
"step": 4462
},
{
"epoch": 1.52,
"learning_rate": 7.246007067163091e-06,
"loss": 0.2267,
"step": 4463
},
{
"epoch": 1.52,
"learning_rate": 7.236183067329166e-06,
"loss": 0.2057,
"step": 4464
},
{
"epoch": 1.52,
"learning_rate": 7.2263646045930195e-06,
"loss": 0.2323,
"step": 4465
},
{
"epoch": 1.52,
"learning_rate": 7.216551682015138e-06,
"loss": 0.2055,
"step": 4466
},
{
"epoch": 1.52,
"learning_rate": 7.206744302654256e-06,
"loss": 0.2052,
"step": 4467
},
{
"epoch": 1.52,
"learning_rate": 7.196942469567417e-06,
"loss": 0.2093,
"step": 4468
},
{
"epoch": 1.52,
"learning_rate": 7.187146185809895e-06,
"loss": 0.266,
"step": 4469
},
{
"epoch": 1.53,
"learning_rate": 7.177355454435272e-06,
"loss": 0.2042,
"step": 4470
},
{
"epoch": 1.53,
"learning_rate": 7.167570278495375e-06,
"loss": 0.1891,
"step": 4471
},
{
"epoch": 1.53,
"learning_rate": 7.157790661040311e-06,
"loss": 0.2312,
"step": 4472
},
{
"epoch": 1.53,
"learning_rate": 7.1480166051184575e-06,
"loss": 0.202,
"step": 4473
},
{
"epoch": 1.53,
"learning_rate": 7.138248113776435e-06,
"loss": 0.2314,
"step": 4474
},
{
"epoch": 1.53,
"learning_rate": 7.128485190059167e-06,
"loss": 0.2382,
"step": 4475
},
{
"epoch": 1.53,
"learning_rate": 7.118727837009798e-06,
"loss": 0.2031,
"step": 4476
},
{
"epoch": 1.53,
"learning_rate": 7.108976057669769e-06,
"loss": 0.2109,
"step": 4477
},
{
"epoch": 1.53,
"learning_rate": 7.099229855078782e-06,
"loss": 0.1945,
"step": 4478
},
{
"epoch": 1.53,
"learning_rate": 7.089489232274776e-06,
"loss": 0.1873,
"step": 4479
},
{
"epoch": 1.53,
"learning_rate": 7.079754192293983e-06,
"loss": 0.2289,
"step": 4480
},
{
"epoch": 1.53,
"learning_rate": 7.070024738170861e-06,
"loss": 0.2285,
"step": 4481
},
{
"epoch": 1.53,
"learning_rate": 7.060300872938156e-06,
"loss": 0.2115,
"step": 4482
},
{
"epoch": 1.53,
"learning_rate": 7.0505825996268535e-06,
"loss": 0.2261,
"step": 4483
},
{
"epoch": 1.53,
"learning_rate": 7.040869921266211e-06,
"loss": 0.2343,
"step": 4484
},
{
"epoch": 1.53,
"learning_rate": 7.0311628408837345e-06,
"loss": 0.2566,
"step": 4485
},
{
"epoch": 1.53,
"learning_rate": 7.021461361505174e-06,
"loss": 0.2008,
"step": 4486
},
{
"epoch": 1.53,
"learning_rate": 7.011765486154556e-06,
"loss": 0.2014,
"step": 4487
},
{
"epoch": 1.53,
"learning_rate": 7.002075217854134e-06,
"loss": 0.1825,
"step": 4488
},
{
"epoch": 1.53,
"learning_rate": 6.992390559624446e-06,
"loss": 0.2139,
"step": 4489
},
{
"epoch": 1.53,
"learning_rate": 6.982711514484241e-06,
"loss": 0.21,
"step": 4490
},
{
"epoch": 1.53,
"learning_rate": 6.973038085450556e-06,
"loss": 0.232,
"step": 4491
},
{
"epoch": 1.53,
"learning_rate": 6.963370275538666e-06,
"loss": 0.2157,
"step": 4492
},
{
"epoch": 1.53,
"learning_rate": 6.953708087762076e-06,
"loss": 0.2072,
"step": 4493
},
{
"epoch": 1.53,
"learning_rate": 6.944051525132567e-06,
"loss": 0.231,
"step": 4494
},
{
"epoch": 1.53,
"learning_rate": 6.934400590660137e-06,
"loss": 0.2105,
"step": 4495
},
{
"epoch": 1.53,
"learning_rate": 6.924755287353055e-06,
"loss": 0.2445,
"step": 4496
},
{
"epoch": 1.53,
"learning_rate": 6.915115618217824e-06,
"loss": 0.2173,
"step": 4497
},
{
"epoch": 1.53,
"learning_rate": 6.905481586259194e-06,
"loss": 0.202,
"step": 4498
},
{
"epoch": 1.53,
"learning_rate": 6.895853194480159e-06,
"loss": 0.1948,
"step": 4499
},
{
"epoch": 1.54,
"learning_rate": 6.886230445881939e-06,
"loss": 0.2257,
"step": 4500
},
{
"epoch": 1.54,
"learning_rate": 6.876613343464023e-06,
"loss": 0.1916,
"step": 4501
},
{
"epoch": 1.54,
"learning_rate": 6.867001890224109e-06,
"loss": 0.2089,
"step": 4502
},
{
"epoch": 1.54,
"learning_rate": 6.857396089158158e-06,
"loss": 0.2115,
"step": 4503
},
{
"epoch": 1.54,
"learning_rate": 6.847795943260365e-06,
"loss": 0.1848,
"step": 4504
},
{
"epoch": 1.54,
"learning_rate": 6.8382014555231495e-06,
"loss": 0.1977,
"step": 4505
},
{
"epoch": 1.54,
"learning_rate": 6.828612628937187e-06,
"loss": 0.2047,
"step": 4506
},
{
"epoch": 1.54,
"learning_rate": 6.8190294664913615e-06,
"loss": 0.2257,
"step": 4507
},
{
"epoch": 1.54,
"learning_rate": 6.809451971172817e-06,
"loss": 0.2067,
"step": 4508
},
{
"epoch": 1.54,
"learning_rate": 6.799880145966927e-06,
"loss": 0.2119,
"step": 4509
},
{
"epoch": 1.54,
"learning_rate": 6.790313993857281e-06,
"loss": 0.2238,
"step": 4510
},
{
"epoch": 1.54,
"learning_rate": 6.780753517825711e-06,
"loss": 0.2413,
"step": 4511
},
{
"epoch": 1.54,
"learning_rate": 6.771198720852287e-06,
"loss": 0.211,
"step": 4512
},
{
"epoch": 1.54,
"learning_rate": 6.761649605915307e-06,
"loss": 0.2097,
"step": 4513
},
{
"epoch": 1.54,
"learning_rate": 6.752106175991277e-06,
"loss": 0.1938,
"step": 4514
},
{
"epoch": 1.54,
"learning_rate": 6.742568434054961e-06,
"loss": 0.1526,
"step": 4515
},
{
"epoch": 1.54,
"learning_rate": 6.733036383079322e-06,
"loss": 0.2444,
"step": 4516
},
{
"epoch": 1.54,
"learning_rate": 6.723510026035568e-06,
"loss": 0.1931,
"step": 4517
},
{
"epoch": 1.54,
"learning_rate": 6.7139893658931334e-06,
"loss": 0.2001,
"step": 4518
},
{
"epoch": 1.54,
"learning_rate": 6.70447440561966e-06,
"loss": 0.2274,
"step": 4519
},
{
"epoch": 1.54,
"learning_rate": 6.694965148181034e-06,
"loss": 0.1891,
"step": 4520
},
{
"epoch": 1.54,
"learning_rate": 6.685461596541339e-06,
"loss": 0.2177,
"step": 4521
},
{
"epoch": 1.54,
"learning_rate": 6.6759637536629035e-06,
"loss": 0.2241,
"step": 4522
},
{
"epoch": 1.54,
"learning_rate": 6.666471622506271e-06,
"loss": 0.2167,
"step": 4523
},
{
"epoch": 1.54,
"learning_rate": 6.6569852060301864e-06,
"loss": 0.2501,
"step": 4524
},
{
"epoch": 1.54,
"learning_rate": 6.64750450719164e-06,
"loss": 0.1978,
"step": 4525
},
{
"epoch": 1.54,
"learning_rate": 6.638029528945822e-06,
"loss": 0.2104,
"step": 4526
},
{
"epoch": 1.54,
"learning_rate": 6.628560274246154e-06,
"loss": 0.2085,
"step": 4527
},
{
"epoch": 1.54,
"learning_rate": 6.619096746044251e-06,
"loss": 0.1965,
"step": 4528
},
{
"epoch": 1.55,
"learning_rate": 6.609638947289964e-06,
"loss": 0.1838,
"step": 4529
},
{
"epoch": 1.55,
"learning_rate": 6.600186880931358e-06,
"loss": 0.2141,
"step": 4530
},
{
"epoch": 1.55,
"learning_rate": 6.590740549914687e-06,
"loss": 0.1948,
"step": 4531
},
{
"epoch": 1.55,
"learning_rate": 6.5812999571844505e-06,
"loss": 0.2071,
"step": 4532
},
{
"epoch": 1.55,
"learning_rate": 6.571865105683325e-06,
"loss": 0.2174,
"step": 4533
},
{
"epoch": 1.55,
"learning_rate": 6.562435998352229e-06,
"loss": 0.2361,
"step": 4534
},
{
"epoch": 1.55,
"learning_rate": 6.553012638130279e-06,
"loss": 0.202,
"step": 4535
},
{
"epoch": 1.55,
"learning_rate": 6.543595027954785e-06,
"loss": 0.1965,
"step": 4536
},
{
"epoch": 1.55,
"learning_rate": 6.5341831707612935e-06,
"loss": 0.2034,
"step": 4537
},
{
"epoch": 1.55,
"learning_rate": 6.524777069483526e-06,
"loss": 0.2015,
"step": 4538
},
{
"epoch": 1.55,
"learning_rate": 6.515376727053432e-06,
"loss": 0.2032,
"step": 4539
},
{
"epoch": 1.55,
"learning_rate": 6.505982146401163e-06,
"loss": 0.1871,
"step": 4540
},
{
"epoch": 1.55,
"learning_rate": 6.496593330455073e-06,
"loss": 0.201,
"step": 4541
},
{
"epoch": 1.55,
"learning_rate": 6.487210282141706e-06,
"loss": 0.1832,
"step": 4542
},
{
"epoch": 1.55,
"learning_rate": 6.477833004385828e-06,
"loss": 0.2056,
"step": 4543
},
{
"epoch": 1.55,
"learning_rate": 6.468461500110404e-06,
"loss": 0.1989,
"step": 4544
},
{
"epoch": 1.55,
"learning_rate": 6.4590957722365755e-06,
"loss": 0.2092,
"step": 4545
},
{
"epoch": 1.55,
"learning_rate": 6.449735823683723e-06,
"loss": 0.1931,
"step": 4546
},
{
"epoch": 1.55,
"learning_rate": 6.440381657369379e-06,
"loss": 0.1967,
"step": 4547
},
{
"epoch": 1.55,
"learning_rate": 6.431033276209314e-06,
"loss": 0.1791,
"step": 4548
},
{
"epoch": 1.55,
"learning_rate": 6.421690683117482e-06,
"loss": 0.2163,
"step": 4549
},
{
"epoch": 1.55,
"learning_rate": 6.412353881006017e-06,
"loss": 0.2085,
"step": 4550
},
{
"epoch": 1.55,
"learning_rate": 6.403022872785278e-06,
"loss": 0.2188,
"step": 4551
},
{
"epoch": 1.55,
"learning_rate": 6.393697661363781e-06,
"loss": 0.1812,
"step": 4552
},
{
"epoch": 1.55,
"learning_rate": 6.38437824964827e-06,
"loss": 0.203,
"step": 4553
},
{
"epoch": 1.55,
"learning_rate": 6.375064640543663e-06,
"loss": 0.2094,
"step": 4554
},
{
"epoch": 1.55,
"learning_rate": 6.365756836953071e-06,
"loss": 0.2048,
"step": 4555
},
{
"epoch": 1.55,
"learning_rate": 6.356454841777807e-06,
"loss": 0.2258,
"step": 4556
},
{
"epoch": 1.55,
"learning_rate": 6.347158657917346e-06,
"loss": 0.1728,
"step": 4557
},
{
"epoch": 1.56,
"learning_rate": 6.337868288269389e-06,
"loss": 0.1935,
"step": 4558
},
{
"epoch": 1.56,
"learning_rate": 6.328583735729785e-06,
"loss": 0.2145,
"step": 4559
},
{
"epoch": 1.56,
"learning_rate": 6.3193050031925995e-06,
"loss": 0.2175,
"step": 4560
},
{
"epoch": 1.56,
"learning_rate": 6.310032093550081e-06,
"loss": 0.195,
"step": 4561
},
{
"epoch": 1.56,
"learning_rate": 6.300765009692641e-06,
"loss": 0.1903,
"step": 4562
},
{
"epoch": 1.56,
"learning_rate": 6.291503754508906e-06,
"loss": 0.1768,
"step": 4563
},
{
"epoch": 1.56,
"learning_rate": 6.282248330885656e-06,
"loss": 0.209,
"step": 4564
},
{
"epoch": 1.56,
"learning_rate": 6.272998741707869e-06,
"loss": 0.2177,
"step": 4565
},
{
"epoch": 1.56,
"learning_rate": 6.263754989858717e-06,
"loss": 0.1752,
"step": 4566
},
{
"epoch": 1.56,
"learning_rate": 6.254517078219518e-06,
"loss": 0.1914,
"step": 4567
},
{
"epoch": 1.56,
"learning_rate": 6.245285009669799e-06,
"loss": 0.2303,
"step": 4568
},
{
"epoch": 1.56,
"learning_rate": 6.236058787087254e-06,
"loss": 0.2626,
"step": 4569
},
{
"epoch": 1.56,
"learning_rate": 6.226838413347766e-06,
"loss": 0.2153,
"step": 4570
},
{
"epoch": 1.56,
"learning_rate": 6.217623891325369e-06,
"loss": 0.1811,
"step": 4571
},
{
"epoch": 1.56,
"learning_rate": 6.208415223892308e-06,
"loss": 0.1781,
"step": 4572
},
{
"epoch": 1.56,
"learning_rate": 6.199212413918967e-06,
"loss": 0.203,
"step": 4573
},
{
"epoch": 1.56,
"learning_rate": 6.190015464273932e-06,
"loss": 0.1934,
"step": 4574
},
{
"epoch": 1.56,
"learning_rate": 6.180824377823957e-06,
"loss": 0.1917,
"step": 4575
},
{
"epoch": 1.56,
"learning_rate": 6.17163915743395e-06,
"loss": 0.211,
"step": 4576
},
{
"epoch": 1.56,
"learning_rate": 6.162459805967019e-06,
"loss": 0.2391,
"step": 4577
},
{
"epoch": 1.56,
"learning_rate": 6.153286326284413e-06,
"loss": 0.2045,
"step": 4578
},
{
"epoch": 1.56,
"learning_rate": 6.144118721245573e-06,
"loss": 0.2171,
"step": 4579
},
{
"epoch": 1.56,
"learning_rate": 6.134956993708099e-06,
"loss": 0.1989,
"step": 4580
},
{
"epoch": 1.56,
"learning_rate": 6.1258011465277655e-06,
"loss": 0.1926,
"step": 4581
},
{
"epoch": 1.56,
"learning_rate": 6.116651182558516e-06,
"loss": 0.2158,
"step": 4582
},
{
"epoch": 1.56,
"learning_rate": 6.107507104652438e-06,
"loss": 0.182,
"step": 4583
},
{
"epoch": 1.56,
"learning_rate": 6.0983689156598166e-06,
"loss": 0.1958,
"step": 4584
},
{
"epoch": 1.56,
"learning_rate": 6.089236618429068e-06,
"loss": 0.3798,
"step": 4585
},
{
"epoch": 1.56,
"learning_rate": 6.080110215806797e-06,
"loss": 0.2264,
"step": 4586
},
{
"epoch": 1.56,
"learning_rate": 6.070989710637773e-06,
"loss": 0.1877,
"step": 4587
},
{
"epoch": 1.57,
"learning_rate": 6.0618751057649e-06,
"loss": 0.2178,
"step": 4588
},
{
"epoch": 1.57,
"learning_rate": 6.052766404029272e-06,
"loss": 0.212,
"step": 4589
},
{
"epoch": 1.57,
"learning_rate": 6.043663608270123e-06,
"loss": 0.2058,
"step": 4590
},
{
"epoch": 1.57,
"learning_rate": 6.0345667213248554e-06,
"loss": 0.2186,
"step": 4591
},
{
"epoch": 1.57,
"learning_rate": 6.0254757460290355e-06,
"loss": 0.213,
"step": 4592
},
{
"epoch": 1.57,
"learning_rate": 6.016390685216369e-06,
"loss": 0.2129,
"step": 4593
},
{
"epoch": 1.57,
"learning_rate": 6.007311541718732e-06,
"loss": 0.1823,
"step": 4594
},
{
"epoch": 1.57,
"learning_rate": 5.998238318366159e-06,
"loss": 0.2109,
"step": 4595
},
{
"epoch": 1.57,
"learning_rate": 5.989171017986834e-06,
"loss": 0.1848,
"step": 4596
},
{
"epoch": 1.57,
"learning_rate": 5.980109643407078e-06,
"loss": 0.2089,
"step": 4597
},
{
"epoch": 1.57,
"learning_rate": 5.971054197451401e-06,
"loss": 0.1943,
"step": 4598
},
{
"epoch": 1.57,
"learning_rate": 5.962004682942429e-06,
"loss": 0.1901,
"step": 4599
},
{
"epoch": 1.57,
"learning_rate": 5.9529611027009606e-06,
"loss": 0.2236,
"step": 4600
},
{
"epoch": 1.57,
"learning_rate": 5.9439234595459445e-06,
"loss": 0.2116,
"step": 4601
},
{
"epoch": 1.57,
"learning_rate": 5.934891756294458e-06,
"loss": 0.2213,
"step": 4602
},
{
"epoch": 1.57,
"learning_rate": 5.925865995761762e-06,
"loss": 0.2105,
"step": 4603
},
{
"epoch": 1.57,
"learning_rate": 5.916846180761229e-06,
"loss": 0.2324,
"step": 4604
},
{
"epoch": 1.57,
"learning_rate": 5.907832314104397e-06,
"loss": 0.2132,
"step": 4605
},
{
"epoch": 1.57,
"learning_rate": 5.898824398600958e-06,
"loss": 0.2025,
"step": 4606
},
{
"epoch": 1.57,
"learning_rate": 5.8898224370587204e-06,
"loss": 0.2008,
"step": 4607
},
{
"epoch": 1.57,
"learning_rate": 5.880826432283668e-06,
"loss": 0.1974,
"step": 4608
},
{
"epoch": 1.57,
"learning_rate": 5.871836387079907e-06,
"loss": 0.2128,
"step": 4609
},
{
"epoch": 1.57,
"learning_rate": 5.862852304249705e-06,
"loss": 0.2054,
"step": 4610
},
{
"epoch": 1.57,
"learning_rate": 5.853874186593444e-06,
"loss": 0.1821,
"step": 4611
},
{
"epoch": 1.57,
"learning_rate": 5.844902036909667e-06,
"loss": 0.3183,
"step": 4612
},
{
"epoch": 1.57,
"learning_rate": 5.8359358579950615e-06,
"loss": 0.1832,
"step": 4613
},
{
"epoch": 1.57,
"learning_rate": 5.826975652644428e-06,
"loss": 0.1743,
"step": 4614
},
{
"epoch": 1.57,
"learning_rate": 5.818021423650738e-06,
"loss": 0.1994,
"step": 4615
},
{
"epoch": 1.57,
"learning_rate": 5.809073173805066e-06,
"loss": 0.184,
"step": 4616
},
{
"epoch": 1.58,
"learning_rate": 5.80013090589665e-06,
"loss": 0.1929,
"step": 4617
},
{
"epoch": 1.58,
"learning_rate": 5.791194622712856e-06,
"loss": 0.2053,
"step": 4618
},
{
"epoch": 1.58,
"learning_rate": 5.782264327039169e-06,
"loss": 0.1901,
"step": 4619
},
{
"epoch": 1.58,
"learning_rate": 5.773340021659238e-06,
"loss": 0.1933,
"step": 4620
},
{
"epoch": 1.58,
"learning_rate": 5.764421709354811e-06,
"loss": 0.1946,
"step": 4621
},
{
"epoch": 1.58,
"learning_rate": 5.755509392905792e-06,
"loss": 0.2215,
"step": 4622
},
{
"epoch": 1.58,
"learning_rate": 5.7466030750902035e-06,
"loss": 0.1981,
"step": 4623
},
{
"epoch": 1.58,
"learning_rate": 5.737702758684218e-06,
"loss": 0.2069,
"step": 4624
},
{
"epoch": 1.58,
"learning_rate": 5.728808446462103e-06,
"loss": 0.181,
"step": 4625
},
{
"epoch": 1.58,
"learning_rate": 5.719920141196281e-06,
"loss": 0.2071,
"step": 4626
},
{
"epoch": 1.58,
"learning_rate": 5.711037845657302e-06,
"loss": 0.231,
"step": 4627
},
{
"epoch": 1.58,
"learning_rate": 5.702161562613826e-06,
"loss": 0.1995,
"step": 4628
},
{
"epoch": 1.58,
"learning_rate": 5.693291294832656e-06,
"loss": 0.1849,
"step": 4629
},
{
"epoch": 1.58,
"learning_rate": 5.6844270450786994e-06,
"loss": 0.1891,
"step": 4630
},
{
"epoch": 1.58,
"learning_rate": 5.6755688161150126e-06,
"loss": 0.2152,
"step": 4631
},
{
"epoch": 1.58,
"learning_rate": 5.666716610702766e-06,
"loss": 0.1831,
"step": 4632
},
{
"epoch": 1.58,
"learning_rate": 5.657870431601236e-06,
"loss": 0.2391,
"step": 4633
},
{
"epoch": 1.58,
"learning_rate": 5.649030281567849e-06,
"loss": 0.2374,
"step": 4634
},
{
"epoch": 1.58,
"learning_rate": 5.640196163358128e-06,
"loss": 0.2343,
"step": 4635
},
{
"epoch": 1.58,
"learning_rate": 5.631368079725724e-06,
"loss": 0.2085,
"step": 4636
},
{
"epoch": 1.58,
"learning_rate": 5.622546033422416e-06,
"loss": 0.2139,
"step": 4637
},
{
"epoch": 1.58,
"learning_rate": 5.613730027198089e-06,
"loss": 0.2235,
"step": 4638
},
{
"epoch": 1.58,
"learning_rate": 5.604920063800756e-06,
"loss": 0.1787,
"step": 4639
},
{
"epoch": 1.58,
"learning_rate": 5.596116145976529e-06,
"loss": 0.1991,
"step": 4640
},
{
"epoch": 1.58,
"learning_rate": 5.587318276469661e-06,
"loss": 0.2136,
"step": 4641
},
{
"epoch": 1.58,
"learning_rate": 5.57852645802249e-06,
"loss": 0.2057,
"step": 4642
},
{
"epoch": 1.58,
"learning_rate": 5.569740693375488e-06,
"loss": 0.1877,
"step": 4643
},
{
"epoch": 1.58,
"learning_rate": 5.56096098526725e-06,
"loss": 0.2146,
"step": 4644
},
{
"epoch": 1.58,
"learning_rate": 5.552187336434445e-06,
"loss": 0.1815,
"step": 4645
},
{
"epoch": 1.59,
"learning_rate": 5.543419749611895e-06,
"loss": 0.2239,
"step": 4646
},
{
"epoch": 1.59,
"learning_rate": 5.534658227532499e-06,
"loss": 0.1758,
"step": 4647
},
{
"epoch": 1.59,
"learning_rate": 5.525902772927296e-06,
"loss": 0.2243,
"step": 4648
},
{
"epoch": 1.59,
"learning_rate": 5.517153388525404e-06,
"loss": 0.2233,
"step": 4649
},
{
"epoch": 1.59,
"learning_rate": 5.508410077054068e-06,
"loss": 0.2336,
"step": 4650
},
{
"epoch": 1.59,
"learning_rate": 5.49967284123864e-06,
"loss": 0.1994,
"step": 4651
},
{
"epoch": 1.59,
"learning_rate": 5.490941683802572e-06,
"loss": 0.1945,
"step": 4652
},
{
"epoch": 1.59,
"learning_rate": 5.482216607467427e-06,
"loss": 0.2182,
"step": 4653
},
{
"epoch": 1.59,
"learning_rate": 5.473497614952855e-06,
"loss": 0.2172,
"step": 4654
},
{
"epoch": 1.59,
"learning_rate": 5.464784708976639e-06,
"loss": 0.2008,
"step": 4655
},
{
"epoch": 1.59,
"learning_rate": 5.4560778922546325e-06,
"loss": 0.2215,
"step": 4656
},
{
"epoch": 1.59,
"learning_rate": 5.447377167500817e-06,
"loss": 0.2069,
"step": 4657
},
{
"epoch": 1.59,
"learning_rate": 5.4386825374272694e-06,
"loss": 0.2166,
"step": 4658
},
{
"epoch": 1.59,
"learning_rate": 5.4299940047441485e-06,
"loss": 0.1909,
"step": 4659
},
{
"epoch": 1.59,
"learning_rate": 5.421311572159743e-06,
"loss": 0.2216,
"step": 4660
},
{
"epoch": 1.59,
"learning_rate": 5.4126352423804095e-06,
"loss": 0.1769,
"step": 4661
},
{
"epoch": 1.59,
"learning_rate": 5.403965018110621e-06,
"loss": 0.22,
"step": 4662
},
{
"epoch": 1.59,
"learning_rate": 5.395300902052955e-06,
"loss": 0.2138,
"step": 4663
},
{
"epoch": 1.59,
"learning_rate": 5.386642896908048e-06,
"loss": 0.2437,
"step": 4664
},
{
"epoch": 1.59,
"learning_rate": 5.377991005374683e-06,
"loss": 0.1924,
"step": 4665
},
{
"epoch": 1.59,
"learning_rate": 5.369345230149697e-06,
"loss": 0.2169,
"step": 4666
},
{
"epoch": 1.59,
"learning_rate": 5.360705573928043e-06,
"loss": 0.2021,
"step": 4667
},
{
"epoch": 1.59,
"learning_rate": 5.3520720394027446e-06,
"loss": 0.2276,
"step": 4668
},
{
"epoch": 1.59,
"learning_rate": 5.34344462926494e-06,
"loss": 0.222,
"step": 4669
},
{
"epoch": 1.59,
"learning_rate": 5.3348233462038545e-06,
"loss": 0.203,
"step": 4670
},
{
"epoch": 1.59,
"learning_rate": 5.326208192906787e-06,
"loss": 0.1946,
"step": 4671
},
{
"epoch": 1.59,
"learning_rate": 5.317599172059148e-06,
"loss": 0.198,
"step": 4672
},
{
"epoch": 1.59,
"learning_rate": 5.308996286344414e-06,
"loss": 0.1733,
"step": 4673
},
{
"epoch": 1.59,
"learning_rate": 5.300399538444173e-06,
"loss": 0.1952,
"step": 4674
},
{
"epoch": 1.6,
"learning_rate": 5.291808931038075e-06,
"loss": 0.1971,
"step": 4675
},
{
"epoch": 1.6,
"learning_rate": 5.283224466803874e-06,
"loss": 0.1831,
"step": 4676
},
{
"epoch": 1.6,
"learning_rate": 5.2746461484174115e-06,
"loss": 0.2306,
"step": 4677
},
{
"epoch": 1.6,
"learning_rate": 5.266073978552583e-06,
"loss": 0.1927,
"step": 4678
},
{
"epoch": 1.6,
"learning_rate": 5.257507959881419e-06,
"loss": 0.219,
"step": 4679
},
{
"epoch": 1.6,
"learning_rate": 5.24894809507398e-06,
"loss": 0.2079,
"step": 4680
},
{
"epoch": 1.6,
"learning_rate": 5.24039438679845e-06,
"loss": 0.178,
"step": 4681
},
{
"epoch": 1.6,
"learning_rate": 5.2318468377210605e-06,
"loss": 0.2212,
"step": 4682
},
{
"epoch": 1.6,
"learning_rate": 5.223305450506144e-06,
"loss": 0.2071,
"step": 4683
},
{
"epoch": 1.6,
"learning_rate": 5.2147702278161155e-06,
"loss": 0.2021,
"step": 4684
},
{
"epoch": 1.6,
"learning_rate": 5.206241172311443e-06,
"loss": 0.2162,
"step": 4685
},
{
"epoch": 1.6,
"learning_rate": 5.197718286650705e-06,
"loss": 0.1972,
"step": 4686
},
{
"epoch": 1.6,
"learning_rate": 5.189201573490526e-06,
"loss": 0.2363,
"step": 4687
},
{
"epoch": 1.6,
"learning_rate": 5.180691035485629e-06,
"loss": 0.2063,
"step": 4688
},
{
"epoch": 1.6,
"learning_rate": 5.17218667528881e-06,
"loss": 0.2091,
"step": 4689
},
{
"epoch": 1.6,
"learning_rate": 5.163688495550917e-06,
"loss": 0.1901,
"step": 4690
},
{
"epoch": 1.6,
"learning_rate": 5.155196498920903e-06,
"loss": 0.2253,
"step": 4691
},
{
"epoch": 1.6,
"learning_rate": 5.1467106880457694e-06,
"loss": 0.1782,
"step": 4692
},
{
"epoch": 1.6,
"learning_rate": 5.1382310655706145e-06,
"loss": 0.2091,
"step": 4693
},
{
"epoch": 1.6,
"learning_rate": 5.129757634138571e-06,
"loss": 0.2389,
"step": 4694
},
{
"epoch": 1.6,
"learning_rate": 5.121290396390873e-06,
"loss": 0.1905,
"step": 4695
},
{
"epoch": 1.6,
"learning_rate": 5.112829354966822e-06,
"loss": 0.2176,
"step": 4696
},
{
"epoch": 1.6,
"learning_rate": 5.104374512503765e-06,
"loss": 0.202,
"step": 4697
},
{
"epoch": 1.6,
"learning_rate": 5.095925871637144e-06,
"loss": 0.2185,
"step": 4698
},
{
"epoch": 1.6,
"learning_rate": 5.087483435000442e-06,
"loss": 0.1889,
"step": 4699
},
{
"epoch": 1.6,
"learning_rate": 5.079047205225237e-06,
"loss": 0.1836,
"step": 4700
},
{
"epoch": 1.6,
"learning_rate": 5.070617184941142e-06,
"loss": 0.2145,
"step": 4701
},
{
"epoch": 1.6,
"learning_rate": 5.0621933767758586e-06,
"loss": 0.196,
"step": 4702
},
{
"epoch": 1.6,
"learning_rate": 5.0537757833551445e-06,
"loss": 0.1719,
"step": 4703
},
{
"epoch": 1.6,
"learning_rate": 5.04536440730281e-06,
"loss": 0.2024,
"step": 4704
},
{
"epoch": 1.61,
"learning_rate": 5.036959251240742e-06,
"loss": 0.2154,
"step": 4705
},
{
"epoch": 1.61,
"learning_rate": 5.02856031778888e-06,
"loss": 0.2059,
"step": 4706
},
{
"epoch": 1.61,
"learning_rate": 5.020167609565235e-06,
"loss": 0.2096,
"step": 4707
},
{
"epoch": 1.61,
"learning_rate": 5.011781129185855e-06,
"loss": 0.1884,
"step": 4708
},
{
"epoch": 1.61,
"learning_rate": 5.003400879264866e-06,
"loss": 0.2261,
"step": 4709
},
{
"epoch": 1.61,
"learning_rate": 4.9950268624144565e-06,
"loss": 0.2378,
"step": 4710
},
{
"epoch": 1.61,
"learning_rate": 4.986659081244849e-06,
"loss": 0.2065,
"step": 4711
},
{
"epoch": 1.61,
"learning_rate": 4.978297538364343e-06,
"loss": 0.1961,
"step": 4712
},
{
"epoch": 1.61,
"learning_rate": 4.969942236379279e-06,
"loss": 0.215,
"step": 4713
},
{
"epoch": 1.61,
"learning_rate": 4.961593177894064e-06,
"loss": 0.2911,
"step": 4714
},
{
"epoch": 1.61,
"learning_rate": 4.9532503655111614e-06,
"loss": 0.2291,
"step": 4715
},
{
"epoch": 1.61,
"learning_rate": 4.944913801831067e-06,
"loss": 0.1852,
"step": 4716
},
{
"epoch": 1.61,
"learning_rate": 4.936583489452351e-06,
"loss": 0.1745,
"step": 4717
},
{
"epoch": 1.61,
"learning_rate": 4.928259430971618e-06,
"loss": 0.209,
"step": 4718
},
{
"epoch": 1.61,
"learning_rate": 4.919941628983537e-06,
"loss": 0.1935,
"step": 4719
},
{
"epoch": 1.61,
"learning_rate": 4.91163008608082e-06,
"loss": 0.1882,
"step": 4720
},
{
"epoch": 1.61,
"learning_rate": 4.903324804854231e-06,
"loss": 0.1918,
"step": 4721
},
{
"epoch": 1.61,
"learning_rate": 4.895025787892585e-06,
"loss": 0.2097,
"step": 4722
},
{
"epoch": 1.61,
"learning_rate": 4.886733037782728e-06,
"loss": 0.1972,
"step": 4723
},
{
"epoch": 1.61,
"learning_rate": 4.8784465571095755e-06,
"loss": 0.2094,
"step": 4724
},
{
"epoch": 1.61,
"learning_rate": 4.870166348456068e-06,
"loss": 0.208,
"step": 4725
},
{
"epoch": 1.61,
"learning_rate": 4.861892414403213e-06,
"loss": 0.2078,
"step": 4726
},
{
"epoch": 1.61,
"learning_rate": 4.853624757530035e-06,
"loss": 0.2343,
"step": 4727
},
{
"epoch": 1.61,
"learning_rate": 4.84536338041362e-06,
"loss": 0.1804,
"step": 4728
},
{
"epoch": 1.61,
"learning_rate": 4.837108285629108e-06,
"loss": 0.1944,
"step": 4729
},
{
"epoch": 1.61,
"learning_rate": 4.828859475749645e-06,
"loss": 0.1774,
"step": 4730
},
{
"epoch": 1.61,
"learning_rate": 4.820616953346458e-06,
"loss": 0.2175,
"step": 4731
},
{
"epoch": 1.61,
"learning_rate": 4.8123807209887765e-06,
"loss": 0.1945,
"step": 4732
},
{
"epoch": 1.61,
"learning_rate": 4.804150781243902e-06,
"loss": 0.1961,
"step": 4733
},
{
"epoch": 1.62,
"learning_rate": 4.795927136677153e-06,
"loss": 0.2028,
"step": 4734
},
{
"epoch": 1.62,
"learning_rate": 4.787709789851894e-06,
"loss": 0.1868,
"step": 4735
},
{
"epoch": 1.62,
"learning_rate": 4.779498743329538e-06,
"loss": 0.1942,
"step": 4736
},
{
"epoch": 1.62,
"learning_rate": 4.771293999669502e-06,
"loss": 0.2212,
"step": 4737
},
{
"epoch": 1.62,
"learning_rate": 4.763095561429276e-06,
"loss": 0.1758,
"step": 4738
},
{
"epoch": 1.62,
"learning_rate": 4.7549034311643535e-06,
"loss": 0.2266,
"step": 4739
},
{
"epoch": 1.62,
"learning_rate": 4.746717611428278e-06,
"loss": 0.1893,
"step": 4740
},
{
"epoch": 1.62,
"learning_rate": 4.73853810477263e-06,
"loss": 0.2023,
"step": 4741
},
{
"epoch": 1.62,
"learning_rate": 4.7303649137470066e-06,
"loss": 0.1897,
"step": 4742
},
{
"epoch": 1.62,
"learning_rate": 4.7221980408990534e-06,
"loss": 0.2233,
"step": 4743
},
{
"epoch": 1.62,
"learning_rate": 4.714037488774428e-06,
"loss": 0.1879,
"step": 4744
},
{
"epoch": 1.62,
"learning_rate": 4.705883259916832e-06,
"loss": 0.2124,
"step": 4745
},
{
"epoch": 1.62,
"learning_rate": 4.697735356867999e-06,
"loss": 0.2198,
"step": 4746
},
{
"epoch": 1.62,
"learning_rate": 4.689593782167665e-06,
"loss": 0.2044,
"step": 4747
},
{
"epoch": 1.62,
"learning_rate": 4.681458538353636e-06,
"loss": 0.2066,
"step": 4748
},
{
"epoch": 1.62,
"learning_rate": 4.673329627961703e-06,
"loss": 0.3547,
"step": 4749
},
{
"epoch": 1.62,
"learning_rate": 4.6652070535257134e-06,
"loss": 0.2164,
"step": 4750
},
{
"epoch": 1.62,
"learning_rate": 4.657090817577511e-06,
"loss": 0.1854,
"step": 4751
},
{
"epoch": 1.62,
"learning_rate": 4.6489809226469976e-06,
"loss": 0.2151,
"step": 4752
},
{
"epoch": 1.62,
"learning_rate": 4.6408773712620645e-06,
"loss": 0.2252,
"step": 4753
},
{
"epoch": 1.62,
"learning_rate": 4.632780165948647e-06,
"loss": 0.2138,
"step": 4754
},
{
"epoch": 1.62,
"learning_rate": 4.624689309230704e-06,
"loss": 0.1811,
"step": 4755
},
{
"epoch": 1.62,
"learning_rate": 4.616604803630198e-06,
"loss": 0.1858,
"step": 4756
},
{
"epoch": 1.62,
"learning_rate": 4.60852665166713e-06,
"loss": 0.2088,
"step": 4757
},
{
"epoch": 1.62,
"learning_rate": 4.600454855859507e-06,
"loss": 0.2012,
"step": 4758
},
{
"epoch": 1.62,
"learning_rate": 4.592389418723361e-06,
"loss": 0.1852,
"step": 4759
},
{
"epoch": 1.62,
"learning_rate": 4.5843303427727495e-06,
"loss": 0.1889,
"step": 4760
},
{
"epoch": 1.62,
"learning_rate": 4.576277630519718e-06,
"loss": 0.1941,
"step": 4761
},
{
"epoch": 1.62,
"learning_rate": 4.568231284474381e-06,
"loss": 0.194,
"step": 4762
},
{
"epoch": 1.63,
"learning_rate": 4.560191307144812e-06,
"loss": 0.2162,
"step": 4763
},
{
"epoch": 1.63,
"learning_rate": 4.552157701037141e-06,
"loss": 0.21,
"step": 4764
},
{
"epoch": 1.63,
"learning_rate": 4.54413046865548e-06,
"loss": 0.2153,
"step": 4765
},
{
"epoch": 1.63,
"learning_rate": 4.5361096125019765e-06,
"loss": 0.1881,
"step": 4766
},
{
"epoch": 1.63,
"learning_rate": 4.528095135076793e-06,
"loss": 0.2057,
"step": 4767
},
{
"epoch": 1.63,
"learning_rate": 4.520087038878079e-06,
"loss": 0.2063,
"step": 4768
},
{
"epoch": 1.63,
"learning_rate": 4.512085326402024e-06,
"loss": 0.2218,
"step": 4769
},
{
"epoch": 1.63,
"learning_rate": 4.5040900001427995e-06,
"loss": 0.2099,
"step": 4770
},
{
"epoch": 1.63,
"learning_rate": 4.496101062592611e-06,
"loss": 0.2001,
"step": 4771
},
{
"epoch": 1.63,
"learning_rate": 4.488118516241665e-06,
"loss": 0.1831,
"step": 4772
},
{
"epoch": 1.63,
"learning_rate": 4.480142363578163e-06,
"loss": 0.201,
"step": 4773
},
{
"epoch": 1.63,
"learning_rate": 4.4721726070883375e-06,
"loss": 0.174,
"step": 4774
},
{
"epoch": 1.63,
"learning_rate": 4.464209249256393e-06,
"loss": 0.2065,
"step": 4775
},
{
"epoch": 1.63,
"learning_rate": 4.456252292564583e-06,
"loss": 0.1857,
"step": 4776
},
{
"epoch": 1.63,
"learning_rate": 4.44830173949313e-06,
"loss": 0.2284,
"step": 4777
},
{
"epoch": 1.63,
"learning_rate": 4.440357592520281e-06,
"loss": 0.1849,
"step": 4778
},
{
"epoch": 1.63,
"learning_rate": 4.432419854122266e-06,
"loss": 0.2132,
"step": 4779
},
{
"epoch": 1.63,
"learning_rate": 4.4244885267733375e-06,
"loss": 0.2159,
"step": 4780
},
{
"epoch": 1.63,
"learning_rate": 4.416563612945751e-06,
"loss": 0.184,
"step": 4781
},
{
"epoch": 1.63,
"learning_rate": 4.408645115109739e-06,
"loss": 0.2064,
"step": 4782
},
{
"epoch": 1.63,
"learning_rate": 4.400733035733559e-06,
"loss": 0.1927,
"step": 4783
},
{
"epoch": 1.63,
"learning_rate": 4.392827377283448e-06,
"loss": 0.2076,
"step": 4784
},
{
"epoch": 1.63,
"learning_rate": 4.384928142223657e-06,
"loss": 0.1917,
"step": 4785
},
{
"epoch": 1.63,
"learning_rate": 4.377035333016436e-06,
"loss": 0.1873,
"step": 4786
},
{
"epoch": 1.63,
"learning_rate": 4.3691489521220125e-06,
"loss": 0.1767,
"step": 4787
},
{
"epoch": 1.63,
"learning_rate": 4.36126900199863e-06,
"loss": 0.2162,
"step": 4788
},
{
"epoch": 1.63,
"learning_rate": 4.353395485102518e-06,
"loss": 0.2058,
"step": 4789
},
{
"epoch": 1.63,
"learning_rate": 4.345528403887913e-06,
"loss": 0.2052,
"step": 4790
},
{
"epoch": 1.63,
"learning_rate": 4.337667760807018e-06,
"loss": 0.2078,
"step": 4791
},
{
"epoch": 1.63,
"learning_rate": 4.329813558310059e-06,
"loss": 0.2102,
"step": 4792
},
{
"epoch": 1.64,
"learning_rate": 4.321965798845246e-06,
"loss": 0.1896,
"step": 4793
},
{
"epoch": 1.64,
"learning_rate": 4.314124484858761e-06,
"loss": 0.2071,
"step": 4794
},
{
"epoch": 1.64,
"learning_rate": 4.306289618794809e-06,
"loss": 0.1916,
"step": 4795
},
{
"epoch": 1.64,
"learning_rate": 4.298461203095555e-06,
"loss": 0.1865,
"step": 4796
},
{
"epoch": 1.64,
"learning_rate": 4.290639240201177e-06,
"loss": 0.2025,
"step": 4797
},
{
"epoch": 1.64,
"learning_rate": 4.2828237325498345e-06,
"loss": 0.1981,
"step": 4798
},
{
"epoch": 1.64,
"learning_rate": 4.275014682577658e-06,
"loss": 0.1935,
"step": 4799
},
{
"epoch": 1.64,
"learning_rate": 4.267212092718798e-06,
"loss": 0.1998,
"step": 4800
},
{
"epoch": 1.64,
"learning_rate": 4.259415965405356e-06,
"loss": 0.1814,
"step": 4801
},
{
"epoch": 1.64,
"learning_rate": 4.251626303067441e-06,
"loss": 0.1888,
"step": 4802
},
{
"epoch": 1.64,
"learning_rate": 4.243843108133144e-06,
"loss": 0.2162,
"step": 4803
},
{
"epoch": 1.64,
"learning_rate": 4.236066383028545e-06,
"loss": 0.2145,
"step": 4804
},
{
"epoch": 1.64,
"learning_rate": 4.228296130177684e-06,
"loss": 0.1822,
"step": 4805
},
{
"epoch": 1.64,
"learning_rate": 4.22053235200261e-06,
"loss": 0.186,
"step": 4806
},
{
"epoch": 1.64,
"learning_rate": 4.212775050923345e-06,
"loss": 0.1785,
"step": 4807
},
{
"epoch": 1.64,
"learning_rate": 4.205024229357882e-06,
"loss": 0.1993,
"step": 4808
},
{
"epoch": 1.64,
"learning_rate": 4.197279889722211e-06,
"loss": 0.2211,
"step": 4809
},
{
"epoch": 1.64,
"learning_rate": 4.189542034430285e-06,
"loss": 0.2048,
"step": 4810
},
{
"epoch": 1.64,
"learning_rate": 4.181810665894051e-06,
"loss": 0.1951,
"step": 4811
},
{
"epoch": 1.64,
"learning_rate": 4.174085786523427e-06,
"loss": 0.2221,
"step": 4812
},
{
"epoch": 1.64,
"learning_rate": 4.1663673987263e-06,
"loss": 0.2092,
"step": 4813
},
{
"epoch": 1.64,
"learning_rate": 4.158655504908557e-06,
"loss": 0.1734,
"step": 4814
},
{
"epoch": 1.64,
"learning_rate": 4.15095010747403e-06,
"loss": 0.2233,
"step": 4815
},
{
"epoch": 1.64,
"learning_rate": 4.1432512088245474e-06,
"loss": 0.2165,
"step": 4816
},
{
"epoch": 1.64,
"learning_rate": 4.135558811359907e-06,
"loss": 0.2132,
"step": 4817
},
{
"epoch": 1.64,
"learning_rate": 4.127872917477879e-06,
"loss": 0.2031,
"step": 4818
},
{
"epoch": 1.64,
"learning_rate": 4.120193529574215e-06,
"loss": 0.1854,
"step": 4819
},
{
"epoch": 1.64,
"learning_rate": 4.112520650042614e-06,
"loss": 0.2084,
"step": 4820
},
{
"epoch": 1.64,
"learning_rate": 4.104854281274778e-06,
"loss": 0.2223,
"step": 4821
},
{
"epoch": 1.65,
"learning_rate": 4.097194425660353e-06,
"loss": 0.1793,
"step": 4822
},
{
"epoch": 1.65,
"learning_rate": 4.089541085586968e-06,
"loss": 0.2024,
"step": 4823
},
{
"epoch": 1.65,
"learning_rate": 4.081894263440231e-06,
"loss": 0.2017,
"step": 4824
},
{
"epoch": 1.65,
"learning_rate": 4.074253961603688e-06,
"loss": 0.2116,
"step": 4825
},
{
"epoch": 1.65,
"learning_rate": 4.066620182458886e-06,
"loss": 0.1969,
"step": 4826
},
{
"epoch": 1.65,
"learning_rate": 4.058992928385314e-06,
"loss": 0.2021,
"step": 4827
},
{
"epoch": 1.65,
"learning_rate": 4.051372201760439e-06,
"loss": 0.2009,
"step": 4828
},
{
"epoch": 1.65,
"learning_rate": 4.043758004959699e-06,
"loss": 0.2322,
"step": 4829
},
{
"epoch": 1.65,
"learning_rate": 4.036150340356479e-06,
"loss": 0.1876,
"step": 4830
},
{
"epoch": 1.65,
"learning_rate": 4.0285492103221385e-06,
"loss": 0.2026,
"step": 4831
},
{
"epoch": 1.65,
"learning_rate": 4.020954617226003e-06,
"loss": 0.2079,
"step": 4832
},
{
"epoch": 1.65,
"learning_rate": 4.013366563435364e-06,
"loss": 0.1769,
"step": 4833
},
{
"epoch": 1.65,
"learning_rate": 4.005785051315453e-06,
"loss": 0.2056,
"step": 4834
},
{
"epoch": 1.65,
"learning_rate": 3.9982100832294895e-06,
"loss": 0.1868,
"step": 4835
},
{
"epoch": 1.65,
"learning_rate": 3.99064166153863e-06,
"loss": 0.1833,
"step": 4836
},
{
"epoch": 1.65,
"learning_rate": 3.983079788602004e-06,
"loss": 0.2108,
"step": 4837
},
{
"epoch": 1.65,
"learning_rate": 3.975524466776704e-06,
"loss": 0.2402,
"step": 4838
},
{
"epoch": 1.65,
"learning_rate": 3.967975698417761e-06,
"loss": 0.1996,
"step": 4839
},
{
"epoch": 1.65,
"learning_rate": 3.96043348587819e-06,
"loss": 0.1803,
"step": 4840
},
{
"epoch": 1.65,
"learning_rate": 3.952897831508934e-06,
"loss": 0.1821,
"step": 4841
},
{
"epoch": 1.65,
"learning_rate": 3.9453687376589086e-06,
"loss": 0.2138,
"step": 4842
},
{
"epoch": 1.65,
"learning_rate": 3.937846206674992e-06,
"loss": 0.2124,
"step": 4843
},
{
"epoch": 1.65,
"learning_rate": 3.930330240901986e-06,
"loss": 0.1884,
"step": 4844
},
{
"epoch": 1.65,
"learning_rate": 3.922820842682692e-06,
"loss": 0.2233,
"step": 4845
},
{
"epoch": 1.65,
"learning_rate": 3.915318014357819e-06,
"loss": 0.2115,
"step": 4846
},
{
"epoch": 1.65,
"learning_rate": 3.907821758266059e-06,
"loss": 0.2004,
"step": 4847
},
{
"epoch": 1.65,
"learning_rate": 3.900332076744034e-06,
"loss": 0.1955,
"step": 4848
},
{
"epoch": 1.65,
"learning_rate": 3.892848972126331e-06,
"loss": 0.2161,
"step": 4849
},
{
"epoch": 1.65,
"learning_rate": 3.885372446745492e-06,
"loss": 0.1881,
"step": 4850
},
{
"epoch": 1.66,
"learning_rate": 3.877902502931979e-06,
"loss": 0.2116,
"step": 4851
},
{
"epoch": 1.66,
"learning_rate": 3.8704391430142425e-06,
"loss": 0.2078,
"step": 4852
},
{
"epoch": 1.66,
"learning_rate": 3.862982369318646e-06,
"loss": 0.2266,
"step": 4853
},
{
"epoch": 1.66,
"learning_rate": 3.855532184169519e-06,
"loss": 0.2008,
"step": 4854
},
{
"epoch": 1.66,
"learning_rate": 3.84808858988914e-06,
"loss": 0.2033,
"step": 4855
},
{
"epoch": 1.66,
"learning_rate": 3.8406515887977116e-06,
"loss": 0.196,
"step": 4856
},
{
"epoch": 1.66,
"learning_rate": 3.833221183213412e-06,
"loss": 0.1925,
"step": 4857
},
{
"epoch": 1.66,
"learning_rate": 3.825797375452325e-06,
"loss": 0.222,
"step": 4858
},
{
"epoch": 1.66,
"learning_rate": 3.818380167828528e-06,
"loss": 0.2329,
"step": 4859
},
{
"epoch": 1.66,
"learning_rate": 3.8109695626539932e-06,
"loss": 0.2118,
"step": 4860
},
{
"epoch": 1.66,
"learning_rate": 3.8035655622386644e-06,
"loss": 0.1924,
"step": 4861
},
{
"epoch": 1.66,
"learning_rate": 3.7961681688904073e-06,
"loss": 0.1948,
"step": 4862
},
{
"epoch": 1.66,
"learning_rate": 3.788777384915046e-06,
"loss": 0.208,
"step": 4863
},
{
"epoch": 1.66,
"learning_rate": 3.7813932126163353e-06,
"loss": 0.18,
"step": 4864
},
{
"epoch": 1.66,
"learning_rate": 3.7740156542959648e-06,
"loss": 0.2228,
"step": 4865
},
{
"epoch": 1.66,
"learning_rate": 3.7666447122535755e-06,
"loss": 0.2393,
"step": 4866
},
{
"epoch": 1.66,
"learning_rate": 3.759280388786729e-06,
"loss": 0.2161,
"step": 4867
},
{
"epoch": 1.66,
"learning_rate": 3.751922686190937e-06,
"loss": 0.2179,
"step": 4868
},
{
"epoch": 1.66,
"learning_rate": 3.7445716067596503e-06,
"loss": 0.2125,
"step": 4869
},
{
"epoch": 1.66,
"learning_rate": 3.7372271527842367e-06,
"loss": 0.1925,
"step": 4870
},
{
"epoch": 1.66,
"learning_rate": 3.72988932655402e-06,
"loss": 0.2304,
"step": 4871
},
{
"epoch": 1.66,
"learning_rate": 3.722558130356235e-06,
"loss": 0.1691,
"step": 4872
},
{
"epoch": 1.66,
"learning_rate": 3.715233566476084e-06,
"loss": 0.1878,
"step": 4873
},
{
"epoch": 1.66,
"learning_rate": 3.707915637196663e-06,
"loss": 0.1967,
"step": 4874
},
{
"epoch": 1.66,
"learning_rate": 3.7006043447990272e-06,
"loss": 0.2023,
"step": 4875
},
{
"epoch": 1.66,
"learning_rate": 3.69329969156216e-06,
"loss": 0.2268,
"step": 4876
},
{
"epoch": 1.66,
"learning_rate": 3.686001679762954e-06,
"loss": 0.2046,
"step": 4877
},
{
"epoch": 1.66,
"learning_rate": 3.6787103116762616e-06,
"loss": 0.233,
"step": 4878
},
{
"epoch": 1.66,
"learning_rate": 3.6714255895748394e-06,
"loss": 0.1911,
"step": 4879
},
{
"epoch": 1.66,
"learning_rate": 3.6641475157293855e-06,
"loss": 0.2124,
"step": 4880
},
{
"epoch": 1.67,
"learning_rate": 3.6568760924085314e-06,
"loss": 0.1766,
"step": 4881
},
{
"epoch": 1.67,
"learning_rate": 3.6496113218788154e-06,
"loss": 0.1984,
"step": 4882
},
{
"epoch": 1.67,
"learning_rate": 3.642353206404725e-06,
"loss": 0.1997,
"step": 4883
},
{
"epoch": 1.67,
"learning_rate": 3.6351017482486483e-06,
"loss": 0.2026,
"step": 4884
},
{
"epoch": 1.67,
"learning_rate": 3.6278569496709213e-06,
"loss": 0.2067,
"step": 4885
},
{
"epoch": 1.67,
"learning_rate": 3.6206188129297963e-06,
"loss": 0.2011,
"step": 4886
},
{
"epoch": 1.67,
"learning_rate": 3.613387340281449e-06,
"loss": 0.1895,
"step": 4887
},
{
"epoch": 1.67,
"learning_rate": 3.606162533979968e-06,
"loss": 0.2233,
"step": 4888
},
{
"epoch": 1.67,
"learning_rate": 3.5989443962773763e-06,
"loss": 0.1954,
"step": 4889
},
{
"epoch": 1.67,
"learning_rate": 3.5917329294236217e-06,
"loss": 0.1735,
"step": 4890
},
{
"epoch": 1.67,
"learning_rate": 3.5845281356665567e-06,
"loss": 0.1979,
"step": 4891
},
{
"epoch": 1.67,
"learning_rate": 3.5773300172519673e-06,
"loss": 0.2164,
"step": 4892
},
{
"epoch": 1.67,
"learning_rate": 3.5701385764235513e-06,
"loss": 0.2029,
"step": 4893
},
{
"epoch": 1.67,
"learning_rate": 3.562953815422926e-06,
"loss": 0.1996,
"step": 4894
},
{
"epoch": 1.67,
"learning_rate": 3.555775736489639e-06,
"loss": 0.2375,
"step": 4895
},
{
"epoch": 1.67,
"learning_rate": 3.548604341861128e-06,
"loss": 0.2261,
"step": 4896
},
{
"epoch": 1.67,
"learning_rate": 3.5414396337727805e-06,
"loss": 0.1966,
"step": 4897
},
{
"epoch": 1.67,
"learning_rate": 3.5342816144578685e-06,
"loss": 0.1958,
"step": 4898
},
{
"epoch": 1.67,
"learning_rate": 3.527130286147598e-06,
"loss": 0.2453,
"step": 4899
},
{
"epoch": 1.67,
"learning_rate": 3.5199856510710867e-06,
"loss": 0.1817,
"step": 4900
},
{
"epoch": 1.67,
"learning_rate": 3.5128477114553637e-06,
"loss": 0.2165,
"step": 4901
},
{
"epoch": 1.67,
"learning_rate": 3.5057164695253774e-06,
"loss": 0.198,
"step": 4902
},
{
"epoch": 1.67,
"learning_rate": 3.4985919275039692e-06,
"loss": 0.1812,
"step": 4903
},
{
"epoch": 1.67,
"learning_rate": 3.491474087611918e-06,
"loss": 0.2166,
"step": 4904
},
{
"epoch": 1.67,
"learning_rate": 3.4843629520678904e-06,
"loss": 0.233,
"step": 4905
},
{
"epoch": 1.67,
"learning_rate": 3.4772585230884808e-06,
"loss": 0.2024,
"step": 4906
},
{
"epoch": 1.67,
"learning_rate": 3.4701608028881866e-06,
"loss": 0.1955,
"step": 4907
},
{
"epoch": 1.67,
"learning_rate": 3.4630697936794076e-06,
"loss": 0.2197,
"step": 4908
},
{
"epoch": 1.67,
"learning_rate": 3.4559854976724678e-06,
"loss": 0.2048,
"step": 4909
},
{
"epoch": 1.68,
"learning_rate": 3.448907917075575e-06,
"loss": 0.1961,
"step": 4910
},
{
"epoch": 1.68,
"learning_rate": 3.441837054094868e-06,
"loss": 0.2093,
"step": 4911
},
{
"epoch": 1.68,
"learning_rate": 3.4347729109343725e-06,
"loss": 0.1801,
"step": 4912
},
{
"epoch": 1.68,
"learning_rate": 3.427715489796035e-06,
"loss": 0.1943,
"step": 4913
},
{
"epoch": 1.68,
"learning_rate": 3.4206647928796965e-06,
"loss": 0.1681,
"step": 4914
},
{
"epoch": 1.68,
"learning_rate": 3.413620822383107e-06,
"loss": 0.2033,
"step": 4915
},
{
"epoch": 1.68,
"learning_rate": 3.4065835805019207e-06,
"loss": 0.1988,
"step": 4916
},
{
"epoch": 1.68,
"learning_rate": 3.399553069429684e-06,
"loss": 0.2189,
"step": 4917
},
{
"epoch": 1.68,
"learning_rate": 3.3925292913578653e-06,
"loss": 0.2019,
"step": 4918
},
{
"epoch": 1.68,
"learning_rate": 3.3855122484758055e-06,
"loss": 0.1999,
"step": 4919
},
{
"epoch": 1.68,
"learning_rate": 3.3785019429707743e-06,
"loss": 0.1971,
"step": 4920
},
{
"epoch": 1.68,
"learning_rate": 3.371498377027932e-06,
"loss": 0.173,
"step": 4921
},
{
"epoch": 1.68,
"learning_rate": 3.3645015528303263e-06,
"loss": 0.1608,
"step": 4922
},
{
"epoch": 1.68,
"learning_rate": 3.357511472558922e-06,
"loss": 0.2057,
"step": 4923
},
{
"epoch": 1.68,
"learning_rate": 3.350528138392564e-06,
"loss": 0.1789,
"step": 4924
},
{
"epoch": 1.68,
"learning_rate": 3.343551552508009e-06,
"loss": 0.2022,
"step": 4925
},
{
"epoch": 1.68,
"learning_rate": 3.336581717079906e-06,
"loss": 0.1942,
"step": 4926
},
{
"epoch": 1.68,
"learning_rate": 3.3296186342807845e-06,
"loss": 0.211,
"step": 4927
},
{
"epoch": 1.68,
"learning_rate": 3.3226623062811076e-06,
"loss": 0.1791,
"step": 4928
},
{
"epoch": 1.68,
"learning_rate": 3.3157127352491852e-06,
"loss": 0.2121,
"step": 4929
},
{
"epoch": 1.68,
"learning_rate": 3.3087699233512627e-06,
"loss": 0.2051,
"step": 4930
},
{
"epoch": 1.68,
"learning_rate": 3.301833872751442e-06,
"loss": 0.2279,
"step": 4931
},
{
"epoch": 1.68,
"learning_rate": 3.2949045856117437e-06,
"loss": 0.1687,
"step": 4932
},
{
"epoch": 1.68,
"learning_rate": 3.2879820640920766e-06,
"loss": 0.2072,
"step": 4933
},
{
"epoch": 1.68,
"learning_rate": 3.281066310350228e-06,
"loss": 0.213,
"step": 4934
},
{
"epoch": 1.68,
"learning_rate": 3.2741573265418905e-06,
"loss": 0.2003,
"step": 4935
},
{
"epoch": 1.68,
"learning_rate": 3.2672551148206306e-06,
"loss": 0.2123,
"step": 4936
},
{
"epoch": 1.68,
"learning_rate": 3.2603596773379237e-06,
"loss": 0.1762,
"step": 4937
},
{
"epoch": 1.68,
"learning_rate": 3.253471016243112e-06,
"loss": 0.1947,
"step": 4938
},
{
"epoch": 1.69,
"learning_rate": 3.2465891336834394e-06,
"loss": 0.193,
"step": 4939
},
{
"epoch": 1.69,
"learning_rate": 3.239714031804042e-06,
"loss": 0.1995,
"step": 4940
},
{
"epoch": 1.69,
"learning_rate": 3.2328457127479144e-06,
"loss": 0.2237,
"step": 4941
},
{
"epoch": 1.69,
"learning_rate": 3.225984178655983e-06,
"loss": 0.234,
"step": 4942
},
{
"epoch": 1.69,
"learning_rate": 3.2191294316670117e-06,
"loss": 0.2023,
"step": 4943
},
{
"epoch": 1.69,
"learning_rate": 3.2122814739176833e-06,
"loss": 0.208,
"step": 4944
},
{
"epoch": 1.69,
"learning_rate": 3.20544030754254e-06,
"loss": 0.1889,
"step": 4945
},
{
"epoch": 1.69,
"learning_rate": 3.1986059346740234e-06,
"loss": 0.1815,
"step": 4946
},
{
"epoch": 1.69,
"learning_rate": 3.1917783574424572e-06,
"loss": 0.2047,
"step": 4947
},
{
"epoch": 1.69,
"learning_rate": 3.1849575779760303e-06,
"loss": 0.1949,
"step": 4948
},
{
"epoch": 1.69,
"learning_rate": 3.1781435984008347e-06,
"loss": 0.1985,
"step": 4949
},
{
"epoch": 1.69,
"learning_rate": 3.171336420840826e-06,
"loss": 0.185,
"step": 4950
},
{
"epoch": 1.69,
"learning_rate": 3.164536047417846e-06,
"loss": 0.2099,
"step": 4951
},
{
"epoch": 1.69,
"learning_rate": 3.1577424802516215e-06,
"loss": 0.1961,
"step": 4952
},
{
"epoch": 1.69,
"learning_rate": 3.1509557214597447e-06,
"loss": 0.2058,
"step": 4953
},
{
"epoch": 1.69,
"learning_rate": 3.144175773157698e-06,
"loss": 0.1775,
"step": 4954
},
{
"epoch": 1.69,
"learning_rate": 3.137402637458825e-06,
"loss": 0.1957,
"step": 4955
},
{
"epoch": 1.69,
"learning_rate": 3.130636316474375e-06,
"loss": 0.187,
"step": 4956
},
{
"epoch": 1.69,
"learning_rate": 3.1238768123134395e-06,
"loss": 0.2198,
"step": 4957
},
{
"epoch": 1.69,
"learning_rate": 3.1171241270830013e-06,
"loss": 0.2152,
"step": 4958
},
{
"epoch": 1.69,
"learning_rate": 3.110378262887928e-06,
"loss": 0.2012,
"step": 4959
},
{
"epoch": 1.69,
"learning_rate": 3.103639221830937e-06,
"loss": 0.2351,
"step": 4960
},
{
"epoch": 1.69,
"learning_rate": 3.096907006012639e-06,
"loss": 0.1881,
"step": 4961
},
{
"epoch": 1.69,
"learning_rate": 3.090181617531501e-06,
"loss": 0.2219,
"step": 4962
},
{
"epoch": 1.69,
"learning_rate": 3.083463058483882e-06,
"loss": 0.2233,
"step": 4963
},
{
"epoch": 1.69,
"learning_rate": 3.0767513309639893e-06,
"loss": 0.1946,
"step": 4964
},
{
"epoch": 1.69,
"learning_rate": 3.070046437063917e-06,
"loss": 0.1989,
"step": 4965
},
{
"epoch": 1.69,
"learning_rate": 3.063348378873632e-06,
"loss": 0.1858,
"step": 4966
},
{
"epoch": 1.69,
"learning_rate": 3.0566571584809497e-06,
"loss": 0.1876,
"step": 4967
},
{
"epoch": 1.69,
"learning_rate": 3.049972777971577e-06,
"loss": 0.1954,
"step": 4968
},
{
"epoch": 1.7,
"learning_rate": 3.043295239429064e-06,
"loss": 0.1896,
"step": 4969
},
{
"epoch": 1.7,
"learning_rate": 3.0366245449348685e-06,
"loss": 0.1985,
"step": 4970
},
{
"epoch": 1.7,
"learning_rate": 3.029960696568268e-06,
"loss": 0.2068,
"step": 4971
},
{
"epoch": 1.7,
"learning_rate": 3.023303696406435e-06,
"loss": 0.1868,
"step": 4972
},
{
"epoch": 1.7,
"learning_rate": 3.0166535465244063e-06,
"loss": 0.1831,
"step": 4973
},
{
"epoch": 1.7,
"learning_rate": 3.010010248995063e-06,
"loss": 0.1914,
"step": 4974
},
{
"epoch": 1.7,
"learning_rate": 3.0033738058891784e-06,
"loss": 0.1814,
"step": 4975
},
{
"epoch": 1.7,
"learning_rate": 2.9967442192753632e-06,
"loss": 0.2057,
"step": 4976
},
{
"epoch": 1.7,
"learning_rate": 2.9901214912201075e-06,
"loss": 0.2002,
"step": 4977
},
{
"epoch": 1.7,
"learning_rate": 2.983505623787766e-06,
"loss": 0.19,
"step": 4978
},
{
"epoch": 1.7,
"learning_rate": 2.976896619040537e-06,
"loss": 0.22,
"step": 4979
},
{
"epoch": 1.7,
"learning_rate": 2.970294479038499e-06,
"loss": 0.1934,
"step": 4980
},
{
"epoch": 1.7,
"learning_rate": 2.963699205839576e-06,
"loss": 0.197,
"step": 4981
},
{
"epoch": 1.7,
"learning_rate": 2.9571108014995573e-06,
"loss": 0.1641,
"step": 4982
},
{
"epoch": 1.7,
"learning_rate": 2.950529268072094e-06,
"loss": 0.2041,
"step": 4983
},
{
"epoch": 1.7,
"learning_rate": 2.9439546076086926e-06,
"loss": 0.2088,
"step": 4984
},
{
"epoch": 1.7,
"learning_rate": 2.9373868221587245e-06,
"loss": 0.1884,
"step": 4985
},
{
"epoch": 1.7,
"learning_rate": 2.9308259137694e-06,
"loss": 0.1774,
"step": 4986
},
{
"epoch": 1.7,
"learning_rate": 2.9242718844858075e-06,
"loss": 0.2409,
"step": 4987
},
{
"epoch": 1.7,
"learning_rate": 2.9177247363508715e-06,
"loss": 0.2049,
"step": 4988
},
{
"epoch": 1.7,
"learning_rate": 2.91118447140539e-06,
"loss": 0.2209,
"step": 4989
},
{
"epoch": 1.7,
"learning_rate": 2.9046510916879932e-06,
"loss": 0.2072,
"step": 4990
},
{
"epoch": 1.7,
"learning_rate": 2.8981245992351887e-06,
"loss": 0.2123,
"step": 4991
},
{
"epoch": 1.7,
"learning_rate": 2.891604996081329e-06,
"loss": 0.1878,
"step": 4992
},
{
"epoch": 1.7,
"learning_rate": 2.885092284258606e-06,
"loss": 0.186,
"step": 4993
},
{
"epoch": 1.7,
"learning_rate": 2.878586465797087e-06,
"loss": 0.1983,
"step": 4994
},
{
"epoch": 1.7,
"learning_rate": 2.872087542724669e-06,
"loss": 0.2065,
"step": 4995
},
{
"epoch": 1.7,
"learning_rate": 2.8655955170671073e-06,
"loss": 0.1874,
"step": 4996
},
{
"epoch": 1.7,
"learning_rate": 2.859110390848016e-06,
"loss": 0.1902,
"step": 4997
},
{
"epoch": 1.71,
"learning_rate": 2.852632166088848e-06,
"loss": 0.2098,
"step": 4998
},
{
"epoch": 1.71,
"learning_rate": 2.846160844808915e-06,
"loss": 0.1922,
"step": 4999
},
{
"epoch": 1.71,
"learning_rate": 2.839696429025357e-06,
"loss": 0.1825,
"step": 5000
},
{
"epoch": 1.71,
"learning_rate": 2.8332389207531868e-06,
"loss": 0.1825,
"step": 5001
},
{
"epoch": 1.71,
"learning_rate": 2.8267883220052414e-06,
"loss": 0.2136,
"step": 5002
},
{
"epoch": 1.71,
"learning_rate": 2.8203446347922213e-06,
"loss": 0.187,
"step": 5003
},
{
"epoch": 1.71,
"learning_rate": 2.8139078611226668e-06,
"loss": 0.2003,
"step": 5004
},
{
"epoch": 1.71,
"learning_rate": 2.8074780030029584e-06,
"loss": 0.1928,
"step": 5005
},
{
"epoch": 1.71,
"learning_rate": 2.8010550624373273e-06,
"loss": 0.2119,
"step": 5006
},
{
"epoch": 1.71,
"learning_rate": 2.7946390414278435e-06,
"loss": 0.1885,
"step": 5007
},
{
"epoch": 1.71,
"learning_rate": 2.788229941974421e-06,
"loss": 0.1803,
"step": 5008
},
{
"epoch": 1.71,
"learning_rate": 2.7818277660748293e-06,
"loss": 0.1862,
"step": 5009
},
{
"epoch": 1.71,
"learning_rate": 2.775432515724649e-06,
"loss": 0.2046,
"step": 5010
},
{
"epoch": 1.71,
"learning_rate": 2.7690441929173416e-06,
"loss": 0.2231,
"step": 5011
},
{
"epoch": 1.71,
"learning_rate": 2.762662799644178e-06,
"loss": 0.2011,
"step": 5012
},
{
"epoch": 1.71,
"learning_rate": 2.756288337894283e-06,
"loss": 0.2122,
"step": 5013
},
{
"epoch": 1.71,
"learning_rate": 2.749920809654616e-06,
"loss": 0.2099,
"step": 5014
},
{
"epoch": 1.71,
"learning_rate": 2.7435602169099823e-06,
"loss": 0.1766,
"step": 5015
},
{
"epoch": 1.71,
"learning_rate": 2.737206561643013e-06,
"loss": 0.1985,
"step": 5016
},
{
"epoch": 1.71,
"learning_rate": 2.7308598458341855e-06,
"loss": 0.2042,
"step": 5017
},
{
"epoch": 1.71,
"learning_rate": 2.7245200714618197e-06,
"loss": 0.188,
"step": 5018
},
{
"epoch": 1.71,
"learning_rate": 2.718187240502054e-06,
"loss": 0.208,
"step": 5019
},
{
"epoch": 1.71,
"learning_rate": 2.711861354928885e-06,
"loss": 0.2143,
"step": 5020
},
{
"epoch": 1.71,
"learning_rate": 2.7055424167141185e-06,
"loss": 0.1828,
"step": 5021
},
{
"epoch": 1.71,
"learning_rate": 2.6992304278274206e-06,
"loss": 0.2053,
"step": 5022
},
{
"epoch": 1.71,
"learning_rate": 2.6929253902362757e-06,
"loss": 0.2038,
"step": 5023
},
{
"epoch": 1.71,
"learning_rate": 2.6866273059059975e-06,
"loss": 0.2367,
"step": 5024
},
{
"epoch": 1.71,
"learning_rate": 2.6803361767997583e-06,
"loss": 0.203,
"step": 5025
},
{
"epoch": 1.71,
"learning_rate": 2.6740520048785272e-06,
"loss": 0.2124,
"step": 5026
},
{
"epoch": 1.72,
"learning_rate": 2.66777479210113e-06,
"loss": 0.1891,
"step": 5027
},
{
"epoch": 1.72,
"learning_rate": 2.6615045404242117e-06,
"loss": 0.2017,
"step": 5028
},
{
"epoch": 1.72,
"learning_rate": 2.6552412518022484e-06,
"loss": 0.2095,
"step": 5029
},
{
"epoch": 1.72,
"learning_rate": 2.648984928187559e-06,
"loss": 0.209,
"step": 5030
},
{
"epoch": 1.72,
"learning_rate": 2.6427355715302664e-06,
"loss": 0.1802,
"step": 5031
},
{
"epoch": 1.72,
"learning_rate": 2.6364931837783462e-06,
"loss": 0.2226,
"step": 5032
},
{
"epoch": 1.72,
"learning_rate": 2.6302577668775837e-06,
"loss": 0.1919,
"step": 5033
},
{
"epoch": 1.72,
"learning_rate": 2.6240293227716013e-06,
"loss": 0.1735,
"step": 5034
},
{
"epoch": 1.72,
"learning_rate": 2.6178078534018546e-06,
"loss": 0.1827,
"step": 5035
},
{
"epoch": 1.72,
"learning_rate": 2.611593360707601e-06,
"loss": 0.238,
"step": 5036
},
{
"epoch": 1.72,
"learning_rate": 2.6053858466259546e-06,
"loss": 0.1969,
"step": 5037
},
{
"epoch": 1.72,
"learning_rate": 2.5991853130918175e-06,
"loss": 0.2012,
"step": 5038
},
{
"epoch": 1.72,
"learning_rate": 2.592991762037958e-06,
"loss": 0.1876,
"step": 5039
},
{
"epoch": 1.72,
"learning_rate": 2.586805195394934e-06,
"loss": 0.2164,
"step": 5040
},
{
"epoch": 1.72,
"learning_rate": 2.5806256150911477e-06,
"loss": 0.2099,
"step": 5041
},
{
"epoch": 1.72,
"learning_rate": 2.574453023052803e-06,
"loss": 0.2268,
"step": 5042
},
{
"epoch": 1.72,
"learning_rate": 2.568287421203941e-06,
"loss": 0.1988,
"step": 5043
},
{
"epoch": 1.72,
"learning_rate": 2.5621288114664297e-06,
"loss": 0.2119,
"step": 5044
},
{
"epoch": 1.72,
"learning_rate": 2.5559771957599344e-06,
"loss": 0.1744,
"step": 5045
},
{
"epoch": 1.72,
"learning_rate": 2.549832576001965e-06,
"loss": 0.1894,
"step": 5046
},
{
"epoch": 1.72,
"learning_rate": 2.5436949541078307e-06,
"loss": 0.192,
"step": 5047
},
{
"epoch": 1.72,
"learning_rate": 2.5375643319906715e-06,
"loss": 0.2261,
"step": 5048
},
{
"epoch": 1.72,
"learning_rate": 2.5314407115614473e-06,
"loss": 0.1849,
"step": 5049
},
{
"epoch": 1.72,
"learning_rate": 2.5253240947289232e-06,
"loss": 0.1859,
"step": 5050
},
{
"epoch": 1.72,
"learning_rate": 2.5192144833996954e-06,
"loss": 0.2286,
"step": 5051
},
{
"epoch": 1.72,
"learning_rate": 2.5131118794781553e-06,
"loss": 0.1969,
"step": 5052
},
{
"epoch": 1.72,
"learning_rate": 2.507016284866548e-06,
"loss": 0.1976,
"step": 5053
},
{
"epoch": 1.72,
"learning_rate": 2.500927701464889e-06,
"loss": 0.187,
"step": 5054
},
{
"epoch": 1.72,
"learning_rate": 2.4948461311710413e-06,
"loss": 0.2119,
"step": 5055
},
{
"epoch": 1.73,
"learning_rate": 2.488771575880669e-06,
"loss": 0.1912,
"step": 5056
},
{
"epoch": 1.73,
"learning_rate": 2.4827040374872435e-06,
"loss": 0.2118,
"step": 5057
},
{
"epoch": 1.73,
"learning_rate": 2.4766435178820657e-06,
"loss": 0.2116,
"step": 5058
},
{
"epoch": 1.73,
"learning_rate": 2.47059001895423e-06,
"loss": 0.2093,
"step": 5059
},
{
"epoch": 1.73,
"learning_rate": 2.4645435425906556e-06,
"loss": 0.1812,
"step": 5060
},
{
"epoch": 1.73,
"learning_rate": 2.4585040906760755e-06,
"loss": 0.2151,
"step": 5061
},
{
"epoch": 1.73,
"learning_rate": 2.4524716650930117e-06,
"loss": 0.1912,
"step": 5062
},
{
"epoch": 1.73,
"learning_rate": 2.4464462677218273e-06,
"loss": 0.1985,
"step": 5063
},
{
"epoch": 1.73,
"learning_rate": 2.4404279004406616e-06,
"loss": 0.2142,
"step": 5064
},
{
"epoch": 1.73,
"learning_rate": 2.434416565125494e-06,
"loss": 0.1864,
"step": 5065
},
{
"epoch": 1.73,
"learning_rate": 2.4284122636500788e-06,
"loss": 0.2069,
"step": 5066
},
{
"epoch": 1.73,
"learning_rate": 2.422414997886019e-06,
"loss": 0.2049,
"step": 5067
},
{
"epoch": 1.73,
"learning_rate": 2.4164247697026827e-06,
"loss": 0.216,
"step": 5068
},
{
"epoch": 1.73,
"learning_rate": 2.410441580967271e-06,
"loss": 0.1891,
"step": 5069
},
{
"epoch": 1.73,
"learning_rate": 2.4044654335447865e-06,
"loss": 0.2227,
"step": 5070
},
{
"epoch": 1.73,
"learning_rate": 2.3984963292980223e-06,
"loss": 0.2075,
"step": 5071
},
{
"epoch": 1.73,
"learning_rate": 2.3925342700876014e-06,
"loss": 0.1708,
"step": 5072
},
{
"epoch": 1.73,
"learning_rate": 2.3865792577719204e-06,
"loss": 0.1981,
"step": 5073
},
{
"epoch": 1.73,
"learning_rate": 2.3806312942072083e-06,
"loss": 0.1997,
"step": 5074
},
{
"epoch": 1.73,
"learning_rate": 2.3746903812474823e-06,
"loss": 0.1741,
"step": 5075
},
{
"epoch": 1.73,
"learning_rate": 2.3687565207445584e-06,
"loss": 0.2142,
"step": 5076
},
{
"epoch": 1.73,
"learning_rate": 2.362829714548065e-06,
"loss": 0.1865,
"step": 5077
},
{
"epoch": 1.73,
"learning_rate": 2.3569099645054216e-06,
"loss": 0.2072,
"step": 5078
},
{
"epoch": 1.73,
"learning_rate": 2.350997272461855e-06,
"loss": 0.1928,
"step": 5079
},
{
"epoch": 1.73,
"learning_rate": 2.3450916402603885e-06,
"loss": 0.1974,
"step": 5080
},
{
"epoch": 1.73,
"learning_rate": 2.339193069741849e-06,
"loss": 0.1846,
"step": 5081
},
{
"epoch": 1.73,
"learning_rate": 2.3333015627448635e-06,
"loss": 0.2041,
"step": 5082
},
{
"epoch": 1.73,
"learning_rate": 2.3274171211058458e-06,
"loss": 0.2205,
"step": 5083
},
{
"epoch": 1.73,
"learning_rate": 2.3215397466590184e-06,
"loss": 0.1974,
"step": 5084
},
{
"epoch": 1.73,
"learning_rate": 2.3156694412363926e-06,
"loss": 0.1912,
"step": 5085
},
{
"epoch": 1.74,
"learning_rate": 2.309806206667786e-06,
"loss": 0.2296,
"step": 5086
},
{
"epoch": 1.74,
"learning_rate": 2.303950044780809e-06,
"loss": 0.2157,
"step": 5087
},
{
"epoch": 1.74,
"learning_rate": 2.298100957400859e-06,
"loss": 0.1921,
"step": 5088
},
{
"epoch": 1.74,
"learning_rate": 2.292258946351139e-06,
"loss": 0.2208,
"step": 5089
},
{
"epoch": 1.74,
"learning_rate": 2.28642401345264e-06,
"loss": 0.2128,
"step": 5090
},
{
"epoch": 1.74,
"learning_rate": 2.280596160524154e-06,
"loss": 0.2089,
"step": 5091
},
{
"epoch": 1.74,
"learning_rate": 2.27477538938225e-06,
"loss": 0.2277,
"step": 5092
},
{
"epoch": 1.74,
"learning_rate": 2.268961701841307e-06,
"loss": 0.1874,
"step": 5093
},
{
"epoch": 1.74,
"learning_rate": 2.2631550997134877e-06,
"loss": 0.2075,
"step": 5094
},
{
"epoch": 1.74,
"learning_rate": 2.2573555848087513e-06,
"loss": 0.205,
"step": 5095
},
{
"epoch": 1.74,
"learning_rate": 2.2515631589348497e-06,
"loss": 0.2177,
"step": 5096
},
{
"epoch": 1.74,
"learning_rate": 2.2457778238973046e-06,
"loss": 0.1893,
"step": 5097
},
{
"epoch": 1.74,
"learning_rate": 2.239999581499455e-06,
"loss": 0.21,
"step": 5098
},
{
"epoch": 1.74,
"learning_rate": 2.2342284335424097e-06,
"loss": 0.2175,
"step": 5099
},
{
"epoch": 1.74,
"learning_rate": 2.228464381825077e-06,
"loss": 0.2128,
"step": 5100
},
{
"epoch": 1.74,
"learning_rate": 2.2227074281441525e-06,
"loss": 0.2111,
"step": 5101
},
{
"epoch": 1.74,
"learning_rate": 2.216957574294107e-06,
"loss": 0.2093,
"step": 5102
},
{
"epoch": 1.74,
"learning_rate": 2.21121482206722e-06,
"loss": 0.1866,
"step": 5103
},
{
"epoch": 1.74,
"learning_rate": 2.20547917325353e-06,
"loss": 0.1894,
"step": 5104
},
{
"epoch": 1.74,
"learning_rate": 2.199750629640887e-06,
"loss": 0.212,
"step": 5105
},
{
"epoch": 1.74,
"learning_rate": 2.1940291930149154e-06,
"loss": 0.192,
"step": 5106
},
{
"epoch": 1.74,
"learning_rate": 2.1883148651590134e-06,
"loss": 0.1974,
"step": 5107
},
{
"epoch": 1.74,
"learning_rate": 2.18260764785439e-06,
"loss": 0.2234,
"step": 5108
},
{
"epoch": 1.74,
"learning_rate": 2.176907542880011e-06,
"loss": 0.1978,
"step": 5109
},
{
"epoch": 1.74,
"learning_rate": 2.171214552012643e-06,
"loss": 0.2232,
"step": 5110
},
{
"epoch": 1.74,
"learning_rate": 2.1655286770268234e-06,
"loss": 0.1711,
"step": 5111
},
{
"epoch": 1.74,
"learning_rate": 2.1598499196948757e-06,
"loss": 0.2009,
"step": 5112
},
{
"epoch": 1.74,
"learning_rate": 2.1541782817869143e-06,
"loss": 0.2127,
"step": 5113
},
{
"epoch": 1.74,
"learning_rate": 2.148513765070817e-06,
"loss": 0.2006,
"step": 5114
},
{
"epoch": 1.75,
"learning_rate": 2.1428563713122545e-06,
"loss": 0.2348,
"step": 5115
},
{
"epoch": 1.75,
"learning_rate": 2.137206102274672e-06,
"loss": 0.1983,
"step": 5116
},
{
"epoch": 1.75,
"learning_rate": 2.1315629597192966e-06,
"loss": 0.2138,
"step": 5117
},
{
"epoch": 1.75,
"learning_rate": 2.1259269454051296e-06,
"loss": 0.2011,
"step": 5118
},
{
"epoch": 1.75,
"learning_rate": 2.1202980610889545e-06,
"loss": 0.181,
"step": 5119
},
{
"epoch": 1.75,
"learning_rate": 2.1146763085253373e-06,
"loss": 0.1999,
"step": 5120
},
{
"epoch": 1.75,
"learning_rate": 2.1090616894666013e-06,
"loss": 0.2174,
"step": 5121
},
{
"epoch": 1.75,
"learning_rate": 2.1034542056628804e-06,
"loss": 0.1772,
"step": 5122
},
{
"epoch": 1.75,
"learning_rate": 2.097853858862045e-06,
"loss": 0.2295,
"step": 5123
},
{
"epoch": 1.75,
"learning_rate": 2.0922606508097716e-06,
"loss": 0.1793,
"step": 5124
},
{
"epoch": 1.75,
"learning_rate": 2.086674583249493e-06,
"loss": 0.2374,
"step": 5125
},
{
"epoch": 1.75,
"learning_rate": 2.0810956579224245e-06,
"loss": 0.2222,
"step": 5126
},
{
"epoch": 1.75,
"learning_rate": 2.075523876567556e-06,
"loss": 0.2057,
"step": 5127
},
{
"epoch": 1.75,
"learning_rate": 2.0699592409216456e-06,
"loss": 0.2078,
"step": 5128
},
{
"epoch": 1.75,
"learning_rate": 2.0644017527192306e-06,
"loss": 0.1579,
"step": 5129
},
{
"epoch": 1.75,
"learning_rate": 2.0588514136926084e-06,
"loss": 0.2337,
"step": 5130
},
{
"epoch": 1.75,
"learning_rate": 2.0533082255718594e-06,
"loss": 0.1836,
"step": 5131
},
{
"epoch": 1.75,
"learning_rate": 2.04777219008484e-06,
"loss": 0.1715,
"step": 5132
},
{
"epoch": 1.75,
"learning_rate": 2.0422433089571562e-06,
"loss": 0.1987,
"step": 5133
},
{
"epoch": 1.75,
"learning_rate": 2.0367215839122045e-06,
"loss": 0.1842,
"step": 5134
},
{
"epoch": 1.75,
"learning_rate": 2.0312070166711328e-06,
"loss": 0.2104,
"step": 5135
},
{
"epoch": 1.75,
"learning_rate": 2.025699608952883e-06,
"loss": 0.2084,
"step": 5136
},
{
"epoch": 1.75,
"learning_rate": 2.020199362474137e-06,
"loss": 0.1832,
"step": 5137
},
{
"epoch": 1.75,
"learning_rate": 2.014706278949363e-06,
"loss": 0.2122,
"step": 5138
},
{
"epoch": 1.75,
"learning_rate": 2.009220360090794e-06,
"loss": 0.2,
"step": 5139
},
{
"epoch": 1.75,
"learning_rate": 2.0037416076084198e-06,
"loss": 0.1981,
"step": 5140
},
{
"epoch": 1.75,
"learning_rate": 1.998270023210011e-06,
"loss": 0.1848,
"step": 5141
},
{
"epoch": 1.75,
"learning_rate": 1.9928056086010866e-06,
"loss": 0.204,
"step": 5142
},
{
"epoch": 1.75,
"learning_rate": 1.9873483654849505e-06,
"loss": 0.2219,
"step": 5143
},
{
"epoch": 1.76,
"learning_rate": 1.981898295562651e-06,
"loss": 0.1989,
"step": 5144
},
{
"epoch": 1.76,
"learning_rate": 1.976455400533017e-06,
"loss": 0.228,
"step": 5145
},
{
"epoch": 1.76,
"learning_rate": 1.971019682092637e-06,
"loss": 0.1928,
"step": 5146
},
{
"epoch": 1.76,
"learning_rate": 1.9655911419358518e-06,
"loss": 0.1958,
"step": 5147
},
{
"epoch": 1.76,
"learning_rate": 1.9601697817547807e-06,
"loss": 0.2052,
"step": 5148
},
{
"epoch": 1.76,
"learning_rate": 1.9547556032392863e-06,
"loss": 0.2032,
"step": 5149
},
{
"epoch": 1.76,
"learning_rate": 1.949348608077017e-06,
"loss": 0.1938,
"step": 5150
},
{
"epoch": 1.76,
"learning_rate": 1.9439487979533597e-06,
"loss": 0.2101,
"step": 5151
},
{
"epoch": 1.76,
"learning_rate": 1.93855617455147e-06,
"loss": 0.2122,
"step": 5152
},
{
"epoch": 1.76,
"learning_rate": 1.933170739552273e-06,
"loss": 0.1874,
"step": 5153
},
{
"epoch": 1.76,
"learning_rate": 1.927792494634434e-06,
"loss": 0.2028,
"step": 5154
},
{
"epoch": 1.76,
"learning_rate": 1.9224214414743954e-06,
"loss": 0.1972,
"step": 5155
},
{
"epoch": 1.76,
"learning_rate": 1.91705758174634e-06,
"loss": 0.1821,
"step": 5156
},
{
"epoch": 1.76,
"learning_rate": 1.9117009171222223e-06,
"loss": 0.2035,
"step": 5157
},
{
"epoch": 1.76,
"learning_rate": 1.9063514492717538e-06,
"loss": 0.1843,
"step": 5158
},
{
"epoch": 1.76,
"learning_rate": 1.9010091798623897e-06,
"loss": 0.2026,
"step": 5159
},
{
"epoch": 1.76,
"learning_rate": 1.8956741105593617e-06,
"loss": 0.1883,
"step": 5160
},
{
"epoch": 1.76,
"learning_rate": 1.890346243025637e-06,
"loss": 0.1935,
"step": 5161
},
{
"epoch": 1.76,
"learning_rate": 1.8850255789219452e-06,
"loss": 0.1744,
"step": 5162
},
{
"epoch": 1.76,
"learning_rate": 1.879712119906779e-06,
"loss": 0.2116,
"step": 5163
},
{
"epoch": 1.76,
"learning_rate": 1.8744058676363752e-06,
"loss": 0.1933,
"step": 5164
},
{
"epoch": 1.76,
"learning_rate": 1.8691068237647296e-06,
"loss": 0.1575,
"step": 5165
},
{
"epoch": 1.76,
"learning_rate": 1.8638149899435853e-06,
"loss": 0.1889,
"step": 5166
},
{
"epoch": 1.76,
"learning_rate": 1.8585303678224447e-06,
"loss": 0.2802,
"step": 5167
},
{
"epoch": 1.76,
"learning_rate": 1.8532529590485543e-06,
"loss": 0.1924,
"step": 5168
},
{
"epoch": 1.76,
"learning_rate": 1.847982765266923e-06,
"loss": 0.2141,
"step": 5169
},
{
"epoch": 1.76,
"learning_rate": 1.8427197881202979e-06,
"loss": 0.1856,
"step": 5170
},
{
"epoch": 1.76,
"learning_rate": 1.8374640292491863e-06,
"loss": 0.1965,
"step": 5171
},
{
"epoch": 1.76,
"learning_rate": 1.8322154902918497e-06,
"loss": 0.1938,
"step": 5172
},
{
"epoch": 1.76,
"learning_rate": 1.82697417288428e-06,
"loss": 0.1887,
"step": 5173
},
{
"epoch": 1.77,
"learning_rate": 1.821740078660239e-06,
"loss": 0.189,
"step": 5174
},
{
"epoch": 1.77,
"learning_rate": 1.8165132092512194e-06,
"loss": 0.1882,
"step": 5175
},
{
"epoch": 1.77,
"learning_rate": 1.8112935662864762e-06,
"loss": 0.1919,
"step": 5176
},
{
"epoch": 1.77,
"learning_rate": 1.8060811513930077e-06,
"loss": 0.2037,
"step": 5177
},
{
"epoch": 1.77,
"learning_rate": 1.8008759661955533e-06,
"loss": 0.2098,
"step": 5178
},
{
"epoch": 1.77,
"learning_rate": 1.7956780123166123e-06,
"loss": 0.1916,
"step": 5179
},
{
"epoch": 1.77,
"learning_rate": 1.7904872913764081e-06,
"loss": 0.1738,
"step": 5180
},
{
"epoch": 1.77,
"learning_rate": 1.7853038049929328e-06,
"loss": 0.1794,
"step": 5181
},
{
"epoch": 1.77,
"learning_rate": 1.7801275547819047e-06,
"loss": 0.2138,
"step": 5182
},
{
"epoch": 1.77,
"learning_rate": 1.774958542356797e-06,
"loss": 0.2059,
"step": 5183
},
{
"epoch": 1.77,
"learning_rate": 1.7697967693288326e-06,
"loss": 0.2126,
"step": 5184
},
{
"epoch": 1.77,
"learning_rate": 1.7646422373069599e-06,
"loss": 0.1901,
"step": 5185
},
{
"epoch": 1.77,
"learning_rate": 1.7594949478978857e-06,
"loss": 0.1819,
"step": 5186
},
{
"epoch": 1.77,
"learning_rate": 1.7543549027060485e-06,
"loss": 0.1993,
"step": 5187
},
{
"epoch": 1.77,
"learning_rate": 1.7492221033336392e-06,
"loss": 0.1852,
"step": 5188
},
{
"epoch": 1.77,
"learning_rate": 1.7440965513805863e-06,
"loss": 0.2095,
"step": 5189
},
{
"epoch": 1.77,
"learning_rate": 1.738978248444545e-06,
"loss": 0.2115,
"step": 5190
},
{
"epoch": 1.77,
"learning_rate": 1.7338671961209417e-06,
"loss": 0.2098,
"step": 5191
},
{
"epoch": 1.77,
"learning_rate": 1.7287633960029136e-06,
"loss": 0.1895,
"step": 5192
},
{
"epoch": 1.77,
"learning_rate": 1.7236668496813574e-06,
"loss": 0.1955,
"step": 5193
},
{
"epoch": 1.77,
"learning_rate": 1.7185775587448855e-06,
"loss": 0.2264,
"step": 5194
},
{
"epoch": 1.77,
"learning_rate": 1.713495524779879e-06,
"loss": 0.1895,
"step": 5195
},
{
"epoch": 1.77,
"learning_rate": 1.7084207493704291e-06,
"loss": 0.1992,
"step": 5196
},
{
"epoch": 1.77,
"learning_rate": 1.7033532340983787e-06,
"loss": 0.1911,
"step": 5197
},
{
"epoch": 1.77,
"learning_rate": 1.6982929805433145e-06,
"loss": 0.224,
"step": 5198
},
{
"epoch": 1.77,
"learning_rate": 1.6932399902825386e-06,
"loss": 0.1956,
"step": 5199
},
{
"epoch": 1.77,
"learning_rate": 1.6881942648911076e-06,
"loss": 0.1835,
"step": 5200
},
{
"epoch": 1.77,
"learning_rate": 1.683155805941805e-06,
"loss": 0.2053,
"step": 5201
},
{
"epoch": 1.77,
"learning_rate": 1.6781246150051499e-06,
"loss": 0.1998,
"step": 5202
},
{
"epoch": 1.78,
"learning_rate": 1.673100693649407e-06,
"loss": 0.1991,
"step": 5203
},
{
"epoch": 1.78,
"learning_rate": 1.6680840434405459e-06,
"loss": 0.2192,
"step": 5204
},
{
"epoch": 1.78,
"learning_rate": 1.6630746659423103e-06,
"loss": 0.1896,
"step": 5205
},
{
"epoch": 1.78,
"learning_rate": 1.6580725627161452e-06,
"loss": 0.1804,
"step": 5206
},
{
"epoch": 1.78,
"learning_rate": 1.6530777353212429e-06,
"loss": 0.1823,
"step": 5207
},
{
"epoch": 1.78,
"learning_rate": 1.6480901853145158e-06,
"loss": 0.1965,
"step": 5208
},
{
"epoch": 1.78,
"learning_rate": 1.6431099142506234e-06,
"loss": 0.2242,
"step": 5209
},
{
"epoch": 1.78,
"learning_rate": 1.6381369236819516e-06,
"loss": 0.1894,
"step": 5210
},
{
"epoch": 1.78,
"learning_rate": 1.633171215158602e-06,
"loss": 0.2149,
"step": 5211
},
{
"epoch": 1.78,
"learning_rate": 1.6282127902284312e-06,
"loss": 0.1992,
"step": 5212
},
{
"epoch": 1.78,
"learning_rate": 1.6232616504370025e-06,
"loss": 0.177,
"step": 5213
},
{
"epoch": 1.78,
"learning_rate": 1.6183177973276259e-06,
"loss": 0.2228,
"step": 5214
},
{
"epoch": 1.78,
"learning_rate": 1.61338123244133e-06,
"loss": 0.2058,
"step": 5215
},
{
"epoch": 1.78,
"learning_rate": 1.6084519573168727e-06,
"loss": 0.2128,
"step": 5216
},
{
"epoch": 1.78,
"learning_rate": 1.6035299734907444e-06,
"loss": 0.212,
"step": 5217
},
{
"epoch": 1.78,
"learning_rate": 1.5986152824971512e-06,
"loss": 0.2048,
"step": 5218
},
{
"epoch": 1.78,
"learning_rate": 1.5937078858680483e-06,
"loss": 0.2219,
"step": 5219
},
{
"epoch": 1.78,
"learning_rate": 1.5888077851330923e-06,
"loss": 0.1908,
"step": 5220
},
{
"epoch": 1.78,
"learning_rate": 1.5839149818196842e-06,
"loss": 0.1935,
"step": 5221
},
{
"epoch": 1.78,
"learning_rate": 1.579029477452934e-06,
"loss": 0.2171,
"step": 5222
},
{
"epoch": 1.78,
"learning_rate": 1.5741512735556935e-06,
"loss": 0.1974,
"step": 5223
},
{
"epoch": 1.78,
"learning_rate": 1.5692803716485293e-06,
"loss": 0.2074,
"step": 5224
},
{
"epoch": 1.78,
"learning_rate": 1.5644167732497273e-06,
"loss": 0.1724,
"step": 5225
},
{
"epoch": 1.78,
"learning_rate": 1.5595604798753105e-06,
"loss": 0.1855,
"step": 5226
},
{
"epoch": 1.78,
"learning_rate": 1.5547114930390099e-06,
"loss": 0.2011,
"step": 5227
},
{
"epoch": 1.78,
"learning_rate": 1.5498698142522917e-06,
"loss": 0.1673,
"step": 5228
},
{
"epoch": 1.78,
"learning_rate": 1.5450354450243426e-06,
"loss": 0.1802,
"step": 5229
},
{
"epoch": 1.78,
"learning_rate": 1.5402083868620549e-06,
"loss": 0.1898,
"step": 5230
},
{
"epoch": 1.78,
"learning_rate": 1.5353886412700662e-06,
"loss": 0.2107,
"step": 5231
},
{
"epoch": 1.79,
"learning_rate": 1.5305762097507115e-06,
"loss": 0.209,
"step": 5232
},
{
"epoch": 1.79,
"learning_rate": 1.5257710938040682e-06,
"loss": 0.1926,
"step": 5233
},
{
"epoch": 1.79,
"learning_rate": 1.520973294927916e-06,
"loss": 0.2005,
"step": 5234
},
{
"epoch": 1.79,
"learning_rate": 1.5161828146177586e-06,
"loss": 0.2018,
"step": 5235
},
{
"epoch": 1.79,
"learning_rate": 1.5113996543668297e-06,
"loss": 0.1711,
"step": 5236
},
{
"epoch": 1.79,
"learning_rate": 1.5066238156660612e-06,
"loss": 0.1967,
"step": 5237
},
{
"epoch": 1.79,
"learning_rate": 1.501855300004118e-06,
"loss": 0.1601,
"step": 5238
},
{
"epoch": 1.79,
"learning_rate": 1.4970941088673751e-06,
"loss": 0.2079,
"step": 5239
},
{
"epoch": 1.79,
"learning_rate": 1.4923402437399287e-06,
"loss": 0.2052,
"step": 5240
},
{
"epoch": 1.79,
"learning_rate": 1.4875937061035904e-06,
"loss": 0.1998,
"step": 5241
},
{
"epoch": 1.79,
"learning_rate": 1.4828544974378828e-06,
"loss": 0.2041,
"step": 5242
},
{
"epoch": 1.79,
"learning_rate": 1.478122619220057e-06,
"loss": 0.2029,
"step": 5243
},
{
"epoch": 1.79,
"learning_rate": 1.4733980729250586e-06,
"loss": 0.1881,
"step": 5244
},
{
"epoch": 1.79,
"learning_rate": 1.4686808600255648e-06,
"loss": 0.2222,
"step": 5245
},
{
"epoch": 1.79,
"learning_rate": 1.4639709819919634e-06,
"loss": 0.1763,
"step": 5246
},
{
"epoch": 1.79,
"learning_rate": 1.4592684402923579e-06,
"loss": 0.1923,
"step": 5247
},
{
"epoch": 1.79,
"learning_rate": 1.4545732363925508e-06,
"loss": 0.2089,
"step": 5248
},
{
"epoch": 1.79,
"learning_rate": 1.449885371756074e-06,
"loss": 0.1936,
"step": 5249
},
{
"epoch": 1.79,
"learning_rate": 1.4452048478441698e-06,
"loss": 0.1857,
"step": 5250
},
{
"epoch": 1.79,
"learning_rate": 1.4405316661157798e-06,
"loss": 0.2156,
"step": 5251
},
{
"epoch": 1.79,
"learning_rate": 1.4358658280275716e-06,
"loss": 0.1892,
"step": 5252
},
{
"epoch": 1.79,
"learning_rate": 1.43120733503391e-06,
"loss": 0.2369,
"step": 5253
},
{
"epoch": 1.79,
"learning_rate": 1.4265561885868834e-06,
"loss": 0.1888,
"step": 5254
},
{
"epoch": 1.79,
"learning_rate": 1.4219123901362873e-06,
"loss": 0.1821,
"step": 5255
},
{
"epoch": 1.79,
"learning_rate": 1.4172759411296143e-06,
"loss": 0.2182,
"step": 5256
},
{
"epoch": 1.79,
"learning_rate": 1.412646843012086e-06,
"loss": 0.2041,
"step": 5257
},
{
"epoch": 1.79,
"learning_rate": 1.4080250972266173e-06,
"loss": 0.1847,
"step": 5258
},
{
"epoch": 1.79,
"learning_rate": 1.403410705213834e-06,
"loss": 0.191,
"step": 5259
},
{
"epoch": 1.79,
"learning_rate": 1.3988036684120769e-06,
"loss": 0.2141,
"step": 5260
},
{
"epoch": 1.79,
"learning_rate": 1.3942039882573894e-06,
"loss": 0.202,
"step": 5261
},
{
"epoch": 1.8,
"learning_rate": 1.389611666183524e-06,
"loss": 0.205,
"step": 5262
},
{
"epoch": 1.8,
"learning_rate": 1.3850267036219279e-06,
"loss": 0.1936,
"step": 5263
},
{
"epoch": 1.8,
"learning_rate": 1.3804491020017768e-06,
"loss": 0.2153,
"step": 5264
},
{
"epoch": 1.8,
"learning_rate": 1.375878862749927e-06,
"loss": 0.1703,
"step": 5265
},
{
"epoch": 1.8,
"learning_rate": 1.3713159872909581e-06,
"loss": 0.1981,
"step": 5266
},
{
"epoch": 1.8,
"learning_rate": 1.366760477047152e-06,
"loss": 0.2051,
"step": 5267
},
{
"epoch": 1.8,
"learning_rate": 1.3622123334384812e-06,
"loss": 0.1801,
"step": 5268
},
{
"epoch": 1.8,
"learning_rate": 1.3576715578826392e-06,
"loss": 0.1919,
"step": 5269
},
{
"epoch": 1.8,
"learning_rate": 1.353138151795011e-06,
"loss": 0.1921,
"step": 5270
},
{
"epoch": 1.8,
"learning_rate": 1.3486121165886934e-06,
"loss": 0.2176,
"step": 5271
},
{
"epoch": 1.8,
"learning_rate": 1.3440934536744803e-06,
"loss": 0.2392,
"step": 5272
},
{
"epoch": 1.8,
"learning_rate": 1.3395821644608619e-06,
"loss": 0.2037,
"step": 5273
},
{
"epoch": 1.8,
"learning_rate": 1.3350782503540437e-06,
"loss": 0.2321,
"step": 5274
},
{
"epoch": 1.8,
"learning_rate": 1.3305817127579223e-06,
"loss": 0.1791,
"step": 5275
},
{
"epoch": 1.8,
"learning_rate": 1.3260925530741015e-06,
"loss": 0.1712,
"step": 5276
},
{
"epoch": 1.8,
"learning_rate": 1.3216107727018755e-06,
"loss": 0.1819,
"step": 5277
},
{
"epoch": 1.8,
"learning_rate": 1.3171363730382524e-06,
"loss": 0.1922,
"step": 5278
},
{
"epoch": 1.8,
"learning_rate": 1.3126693554779219e-06,
"loss": 0.2091,
"step": 5279
},
{
"epoch": 1.8,
"learning_rate": 1.3082097214132894e-06,
"loss": 0.1891,
"step": 5280
},
{
"epoch": 1.8,
"learning_rate": 1.3037574722344543e-06,
"loss": 0.1867,
"step": 5281
},
{
"epoch": 1.8,
"learning_rate": 1.2993126093292007e-06,
"loss": 0.1989,
"step": 5282
},
{
"epoch": 1.8,
"learning_rate": 1.2948751340830373e-06,
"loss": 0.2103,
"step": 5283
},
{
"epoch": 1.8,
"learning_rate": 1.290445047879138e-06,
"loss": 0.1871,
"step": 5284
},
{
"epoch": 1.8,
"learning_rate": 1.2860223520983984e-06,
"loss": 0.1916,
"step": 5285
},
{
"epoch": 1.8,
"learning_rate": 1.2816070481194042e-06,
"loss": 0.2103,
"step": 5286
},
{
"epoch": 1.8,
"learning_rate": 1.2771991373184217e-06,
"loss": 0.2053,
"step": 5287
},
{
"epoch": 1.8,
"learning_rate": 1.2727986210694432e-06,
"loss": 0.1546,
"step": 5288
},
{
"epoch": 1.8,
"learning_rate": 1.2684055007441242e-06,
"loss": 0.2044,
"step": 5289
},
{
"epoch": 1.8,
"learning_rate": 1.264019777711839e-06,
"loss": 0.1942,
"step": 5290
},
{
"epoch": 1.81,
"learning_rate": 1.2596414533396383e-06,
"loss": 0.2074,
"step": 5291
},
{
"epoch": 1.81,
"learning_rate": 1.255270528992275e-06,
"loss": 0.1994,
"step": 5292
},
{
"epoch": 1.81,
"learning_rate": 1.2509070060322037e-06,
"loss": 0.1848,
"step": 5293
},
{
"epoch": 1.81,
"learning_rate": 1.246550885819553e-06,
"loss": 0.2173,
"step": 5294
},
{
"epoch": 1.81,
"learning_rate": 1.242202169712159e-06,
"loss": 0.2015,
"step": 5295
},
{
"epoch": 1.81,
"learning_rate": 1.2378608590655428e-06,
"loss": 0.2062,
"step": 5296
},
{
"epoch": 1.81,
"learning_rate": 1.2335269552329165e-06,
"loss": 0.2004,
"step": 5297
},
{
"epoch": 1.81,
"learning_rate": 1.2292004595651968e-06,
"loss": 0.2192,
"step": 5298
},
{
"epoch": 1.81,
"learning_rate": 1.2248813734109687e-06,
"loss": 0.2054,
"step": 5299
},
{
"epoch": 1.81,
"learning_rate": 1.2205696981165276e-06,
"loss": 0.1891,
"step": 5300
},
{
"epoch": 1.81,
"learning_rate": 1.2162654350258402e-06,
"loss": 0.1714,
"step": 5301
},
{
"epoch": 1.81,
"learning_rate": 1.2119685854805884e-06,
"loss": 0.1988,
"step": 5302
},
{
"epoch": 1.81,
"learning_rate": 1.207679150820118e-06,
"loss": 0.1932,
"step": 5303
},
{
"epoch": 1.81,
"learning_rate": 1.2033971323814786e-06,
"loss": 0.2095,
"step": 5304
},
{
"epoch": 1.81,
"learning_rate": 1.1991225314993998e-06,
"loss": 0.2717,
"step": 5305
},
{
"epoch": 1.81,
"learning_rate": 1.1948553495063047e-06,
"loss": 0.1825,
"step": 5306
},
{
"epoch": 1.81,
"learning_rate": 1.1905955877323016e-06,
"loss": 0.1895,
"step": 5307
},
{
"epoch": 1.81,
"learning_rate": 1.1863432475051838e-06,
"loss": 0.1861,
"step": 5308
},
{
"epoch": 1.81,
"learning_rate": 1.1820983301504413e-06,
"loss": 0.2144,
"step": 5309
},
{
"epoch": 1.81,
"learning_rate": 1.177860836991232e-06,
"loss": 0.1725,
"step": 5310
},
{
"epoch": 1.81,
"learning_rate": 1.1736307693484132e-06,
"loss": 0.1809,
"step": 5311
},
{
"epoch": 1.81,
"learning_rate": 1.1694081285405307e-06,
"loss": 0.2092,
"step": 5312
},
{
"epoch": 1.81,
"learning_rate": 1.1651929158838004e-06,
"loss": 0.1937,
"step": 5313
},
{
"epoch": 1.81,
"learning_rate": 1.1609851326921412e-06,
"loss": 0.1991,
"step": 5314
},
{
"epoch": 1.81,
"learning_rate": 1.1567847802771343e-06,
"loss": 0.2362,
"step": 5315
},
{
"epoch": 1.81,
"learning_rate": 1.1525918599480713e-06,
"loss": 0.1973,
"step": 5316
},
{
"epoch": 1.81,
"learning_rate": 1.148406373011901e-06,
"loss": 0.1899,
"step": 5317
},
{
"epoch": 1.81,
"learning_rate": 1.1442283207732746e-06,
"loss": 0.1924,
"step": 5318
},
{
"epoch": 1.81,
"learning_rate": 1.1400577045345167e-06,
"loss": 0.2398,
"step": 5319
},
{
"epoch": 1.82,
"learning_rate": 1.1358945255956322e-06,
"loss": 0.189,
"step": 5320
},
{
"epoch": 1.82,
"learning_rate": 1.131738785254316e-06,
"loss": 0.1805,
"step": 5321
},
{
"epoch": 1.82,
"learning_rate": 1.1275904848059322e-06,
"loss": 0.2007,
"step": 5322
},
{
"epoch": 1.82,
"learning_rate": 1.123449625543538e-06,
"loss": 0.203,
"step": 5323
},
{
"epoch": 1.82,
"learning_rate": 1.1193162087578702e-06,
"loss": 0.22,
"step": 5324
},
{
"epoch": 1.82,
"learning_rate": 1.1151902357373345e-06,
"loss": 0.1801,
"step": 5325
},
{
"epoch": 1.82,
"learning_rate": 1.1110717077680327e-06,
"loss": 0.2012,
"step": 5326
},
{
"epoch": 1.82,
"learning_rate": 1.106960626133724e-06,
"loss": 0.202,
"step": 5327
},
{
"epoch": 1.82,
"learning_rate": 1.1028569921158721e-06,
"loss": 0.1964,
"step": 5328
},
{
"epoch": 1.82,
"learning_rate": 1.0987608069935928e-06,
"loss": 0.2044,
"step": 5329
},
{
"epoch": 1.82,
"learning_rate": 1.0946720720437092e-06,
"loss": 0.2042,
"step": 5330
},
{
"epoch": 1.82,
"learning_rate": 1.0905907885406936e-06,
"loss": 0.2342,
"step": 5331
},
{
"epoch": 1.82,
"learning_rate": 1.0865169577567169e-06,
"loss": 0.2223,
"step": 5332
},
{
"epoch": 1.82,
"learning_rate": 1.0824505809616192e-06,
"loss": 0.205,
"step": 5333
},
{
"epoch": 1.82,
"learning_rate": 1.0783916594229115e-06,
"loss": 0.1993,
"step": 5334
},
{
"epoch": 1.82,
"learning_rate": 1.0743401944057901e-06,
"loss": 0.229,
"step": 5335
},
{
"epoch": 1.82,
"learning_rate": 1.070296187173117e-06,
"loss": 0.1962,
"step": 5336
},
{
"epoch": 1.82,
"learning_rate": 1.0662596389854423e-06,
"loss": 0.1843,
"step": 5337
},
{
"epoch": 1.82,
"learning_rate": 1.0622305511009872e-06,
"loss": 0.1788,
"step": 5338
},
{
"epoch": 1.82,
"learning_rate": 1.0582089247756332e-06,
"loss": 0.2164,
"step": 5339
},
{
"epoch": 1.82,
"learning_rate": 1.0541947612629582e-06,
"loss": 0.191,
"step": 5340
},
{
"epoch": 1.82,
"learning_rate": 1.0501880618141974e-06,
"loss": 0.1773,
"step": 5341
},
{
"epoch": 1.82,
"learning_rate": 1.0461888276782655e-06,
"loss": 0.2201,
"step": 5342
},
{
"epoch": 1.82,
"learning_rate": 1.0421970601017516e-06,
"loss": 0.2187,
"step": 5343
},
{
"epoch": 1.82,
"learning_rate": 1.038212760328916e-06,
"loss": 0.2007,
"step": 5344
},
{
"epoch": 1.82,
"learning_rate": 1.034235929601693e-06,
"loss": 0.2076,
"step": 5345
},
{
"epoch": 1.82,
"learning_rate": 1.0302665691596798e-06,
"loss": 0.2136,
"step": 5346
},
{
"epoch": 1.82,
"learning_rate": 1.0263046802401589e-06,
"loss": 0.1973,
"step": 5347
},
{
"epoch": 1.82,
"learning_rate": 1.022350264078073e-06,
"loss": 0.198,
"step": 5348
},
{
"epoch": 1.82,
"learning_rate": 1.0184033219060419e-06,
"loss": 0.2157,
"step": 5349
},
{
"epoch": 1.83,
"learning_rate": 1.0144638549543534e-06,
"loss": 0.1768,
"step": 5350
},
{
"epoch": 1.83,
"learning_rate": 1.0105318644509615e-06,
"loss": 0.1769,
"step": 5351
},
{
"epoch": 1.83,
"learning_rate": 1.006607351621497e-06,
"loss": 0.1767,
"step": 5352
},
{
"epoch": 1.83,
"learning_rate": 1.0026903176892532e-06,
"loss": 0.1976,
"step": 5353
},
{
"epoch": 1.83,
"learning_rate": 9.98780763875201e-07,
"loss": 0.1996,
"step": 5354
},
{
"epoch": 1.83,
"learning_rate": 9.948786913979658e-07,
"loss": 0.1808,
"step": 5355
},
{
"epoch": 1.83,
"learning_rate": 9.90984101473852e-07,
"loss": 0.1915,
"step": 5356
},
{
"epoch": 1.83,
"learning_rate": 9.870969953168302e-07,
"loss": 0.1721,
"step": 5357
},
{
"epoch": 1.83,
"learning_rate": 9.83217374138537e-07,
"loss": 0.2012,
"step": 5358
},
{
"epoch": 1.83,
"learning_rate": 9.793452391482771e-07,
"loss": 0.1849,
"step": 5359
},
{
"epoch": 1.83,
"learning_rate": 9.75480591553013e-07,
"loss": 0.2259,
"step": 5360
},
{
"epoch": 1.83,
"learning_rate": 9.71623432557392e-07,
"loss": 0.1785,
"step": 5361
},
{
"epoch": 1.83,
"learning_rate": 9.677737633637052e-07,
"loss": 0.2109,
"step": 5362
},
{
"epoch": 1.83,
"learning_rate": 9.639315851719232e-07,
"loss": 0.1961,
"step": 5363
},
{
"epoch": 1.83,
"learning_rate": 9.600968991796828e-07,
"loss": 0.1942,
"step": 5364
},
{
"epoch": 1.83,
"learning_rate": 9.562697065822744e-07,
"loss": 0.1926,
"step": 5365
},
{
"epoch": 1.83,
"learning_rate": 9.524500085726612e-07,
"loss": 0.205,
"step": 5366
},
{
"epoch": 1.83,
"learning_rate": 9.48637806341468e-07,
"loss": 0.2107,
"step": 5367
},
{
"epoch": 1.83,
"learning_rate": 9.448331010769806e-07,
"loss": 0.1957,
"step": 5368
},
{
"epoch": 1.83,
"learning_rate": 9.410358939651587e-07,
"loss": 0.204,
"step": 5369
},
{
"epoch": 1.83,
"learning_rate": 9.372461861896025e-07,
"loss": 0.1972,
"step": 5370
},
{
"epoch": 1.83,
"learning_rate": 9.334639789316035e-07,
"loss": 0.2303,
"step": 5371
},
{
"epoch": 1.83,
"learning_rate": 9.296892733700907e-07,
"loss": 0.2322,
"step": 5372
},
{
"epoch": 1.83,
"learning_rate": 9.259220706816701e-07,
"loss": 0.1985,
"step": 5373
},
{
"epoch": 1.83,
"learning_rate": 9.221623720405998e-07,
"loss": 0.1973,
"step": 5374
},
{
"epoch": 1.83,
"learning_rate": 9.184101786188031e-07,
"loss": 0.178,
"step": 5375
},
{
"epoch": 1.83,
"learning_rate": 9.146654915858671e-07,
"loss": 0.2143,
"step": 5376
},
{
"epoch": 1.83,
"learning_rate": 9.109283121090273e-07,
"loss": 0.2043,
"step": 5377
},
{
"epoch": 1.83,
"learning_rate": 9.071986413531963e-07,
"loss": 0.1876,
"step": 5378
},
{
"epoch": 1.84,
"learning_rate": 9.034764804809276e-07,
"loss": 0.2156,
"step": 5379
},
{
"epoch": 1.84,
"learning_rate": 8.997618306524486e-07,
"loss": 0.2114,
"step": 5380
},
{
"epoch": 1.84,
"learning_rate": 8.960546930256386e-07,
"loss": 0.2102,
"step": 5381
},
{
"epoch": 1.84,
"learning_rate": 8.923550687560372e-07,
"loss": 0.168,
"step": 5382
},
{
"epoch": 1.84,
"learning_rate": 8.886629589968415e-07,
"loss": 0.2013,
"step": 5383
},
{
"epoch": 1.84,
"learning_rate": 8.84978364898903e-07,
"loss": 0.1942,
"step": 5384
},
{
"epoch": 1.84,
"learning_rate": 8.813012876107391e-07,
"loss": 0.2032,
"step": 5385
},
{
"epoch": 1.84,
"learning_rate": 8.776317282785163e-07,
"loss": 0.1832,
"step": 5386
},
{
"epoch": 1.84,
"learning_rate": 8.739696880460613e-07,
"loss": 0.207,
"step": 5387
},
{
"epoch": 1.84,
"learning_rate": 8.7031516805485e-07,
"loss": 0.2273,
"step": 5388
},
{
"epoch": 1.84,
"learning_rate": 8.666681694440293e-07,
"loss": 0.1927,
"step": 5389
},
{
"epoch": 1.84,
"learning_rate": 8.630286933503873e-07,
"loss": 0.2078,
"step": 5390
},
{
"epoch": 1.84,
"learning_rate": 8.59396740908372e-07,
"loss": 0.2143,
"step": 5391
},
{
"epoch": 1.84,
"learning_rate": 8.557723132500916e-07,
"loss": 0.2049,
"step": 5392
},
{
"epoch": 1.84,
"learning_rate": 8.521554115052954e-07,
"loss": 0.1994,
"step": 5393
},
{
"epoch": 1.84,
"learning_rate": 8.485460368014036e-07,
"loss": 0.1907,
"step": 5394
},
{
"epoch": 1.84,
"learning_rate": 8.449441902634775e-07,
"loss": 0.1867,
"step": 5395
},
{
"epoch": 1.84,
"learning_rate": 8.413498730142383e-07,
"loss": 0.2105,
"step": 5396
},
{
"epoch": 1.84,
"learning_rate": 8.377630861740566e-07,
"loss": 0.2069,
"step": 5397
},
{
"epoch": 1.84,
"learning_rate": 8.341838308609517e-07,
"loss": 0.2066,
"step": 5398
},
{
"epoch": 1.84,
"learning_rate": 8.306121081906148e-07,
"loss": 0.1869,
"step": 5399
},
{
"epoch": 1.84,
"learning_rate": 8.270479192763636e-07,
"loss": 0.1858,
"step": 5400
},
{
"epoch": 1.84,
"learning_rate": 8.234912652291843e-07,
"loss": 0.2038,
"step": 5401
},
{
"epoch": 1.84,
"learning_rate": 8.199421471577068e-07,
"loss": 0.2024,
"step": 5402
},
{
"epoch": 1.84,
"learning_rate": 8.164005661682156e-07,
"loss": 0.2129,
"step": 5403
},
{
"epoch": 1.84,
"learning_rate": 8.12866523364647e-07,
"loss": 0.2096,
"step": 5404
},
{
"epoch": 1.84,
"learning_rate": 8.093400198485784e-07,
"loss": 0.2084,
"step": 5405
},
{
"epoch": 1.84,
"learning_rate": 8.058210567192498e-07,
"loss": 0.2135,
"step": 5406
},
{
"epoch": 1.84,
"learning_rate": 8.023096350735421e-07,
"loss": 0.2,
"step": 5407
},
{
"epoch": 1.85,
"learning_rate": 7.988057560059909e-07,
"loss": 0.1987,
"step": 5408
},
{
"epoch": 1.85,
"learning_rate": 7.95309420608778e-07,
"loss": 0.2035,
"step": 5409
},
{
"epoch": 1.85,
"learning_rate": 7.91820629971729e-07,
"loss": 0.2234,
"step": 5410
},
{
"epoch": 1.85,
"learning_rate": 7.883393851823295e-07,
"loss": 0.2119,
"step": 5411
},
{
"epoch": 1.85,
"learning_rate": 7.848656873256977e-07,
"loss": 0.2209,
"step": 5412
},
{
"epoch": 1.85,
"learning_rate": 7.8139953748462e-07,
"loss": 0.1696,
"step": 5413
},
{
"epoch": 1.85,
"learning_rate": 7.779409367395102e-07,
"loss": 0.1841,
"step": 5414
},
{
"epoch": 1.85,
"learning_rate": 7.744898861684363e-07,
"loss": 0.2397,
"step": 5415
},
{
"epoch": 1.85,
"learning_rate": 7.710463868471185e-07,
"loss": 0.2153,
"step": 5416
},
{
"epoch": 1.85,
"learning_rate": 7.676104398489093e-07,
"loss": 0.1935,
"step": 5417
},
{
"epoch": 1.85,
"learning_rate": 7.641820462448268e-07,
"loss": 0.2035,
"step": 5418
},
{
"epoch": 1.85,
"learning_rate": 7.607612071035136e-07,
"loss": 0.2031,
"step": 5419
},
{
"epoch": 1.85,
"learning_rate": 7.573479234912723e-07,
"loss": 0.2083,
"step": 5420
},
{
"epoch": 1.85,
"learning_rate": 7.539421964720461e-07,
"loss": 0.18,
"step": 5421
},
{
"epoch": 1.85,
"learning_rate": 7.505440271074221e-07,
"loss": 0.2358,
"step": 5422
},
{
"epoch": 1.85,
"learning_rate": 7.471534164566335e-07,
"loss": 0.2064,
"step": 5423
},
{
"epoch": 1.85,
"learning_rate": 7.437703655765488e-07,
"loss": 0.2073,
"step": 5424
},
{
"epoch": 1.85,
"learning_rate": 7.403948755216938e-07,
"loss": 0.1994,
"step": 5425
},
{
"epoch": 1.85,
"learning_rate": 7.370269473442299e-07,
"loss": 0.1881,
"step": 5426
},
{
"epoch": 1.85,
"learning_rate": 7.336665820939587e-07,
"loss": 0.2274,
"step": 5427
},
{
"epoch": 1.85,
"learning_rate": 7.303137808183341e-07,
"loss": 0.1919,
"step": 5428
},
{
"epoch": 1.85,
"learning_rate": 7.269685445624396e-07,
"loss": 0.1777,
"step": 5429
},
{
"epoch": 1.85,
"learning_rate": 7.236308743690162e-07,
"loss": 0.2042,
"step": 5430
},
{
"epoch": 1.85,
"learning_rate": 7.20300771278426e-07,
"loss": 0.2074,
"step": 5431
},
{
"epoch": 1.85,
"learning_rate": 7.169782363286915e-07,
"loss": 0.2313,
"step": 5432
},
{
"epoch": 1.85,
"learning_rate": 7.136632705554646e-07,
"loss": 0.2173,
"step": 5433
},
{
"epoch": 1.85,
"learning_rate": 7.103558749920442e-07,
"loss": 0.1871,
"step": 5434
},
{
"epoch": 1.85,
"learning_rate": 7.070560506693691e-07,
"loss": 0.2148,
"step": 5435
},
{
"epoch": 1.85,
"learning_rate": 7.037637986160112e-07,
"loss": 0.2087,
"step": 5436
},
{
"epoch": 1.85,
"learning_rate": 7.004791198581939e-07,
"loss": 0.2221,
"step": 5437
},
{
"epoch": 1.86,
"learning_rate": 6.972020154197623e-07,
"loss": 0.194,
"step": 5438
},
{
"epoch": 1.86,
"learning_rate": 6.939324863222213e-07,
"loss": 0.2213,
"step": 5439
},
{
"epoch": 1.86,
"learning_rate": 6.906705335846975e-07,
"loss": 0.1874,
"step": 5440
},
{
"epoch": 1.86,
"learning_rate": 6.874161582239663e-07,
"loss": 0.224,
"step": 5441
},
{
"epoch": 1.86,
"learning_rate": 6.841693612544415e-07,
"loss": 0.1782,
"step": 5442
},
{
"epoch": 1.86,
"learning_rate": 6.809301436881632e-07,
"loss": 0.2054,
"step": 5443
},
{
"epoch": 1.86,
"learning_rate": 6.77698506534824e-07,
"loss": 0.2179,
"step": 5444
},
{
"epoch": 1.86,
"learning_rate": 6.744744508017403e-07,
"loss": 0.198,
"step": 5445
},
{
"epoch": 1.86,
"learning_rate": 6.71257977493872e-07,
"loss": 0.1958,
"step": 5446
},
{
"epoch": 1.86,
"learning_rate": 6.680490876138201e-07,
"loss": 0.2205,
"step": 5447
},
{
"epoch": 1.86,
"learning_rate": 6.648477821618093e-07,
"loss": 0.2241,
"step": 5448
},
{
"epoch": 1.86,
"learning_rate": 6.616540621357165e-07,
"loss": 0.2279,
"step": 5449
},
{
"epoch": 1.86,
"learning_rate": 6.584679285310341e-07,
"loss": 0.2145,
"step": 5450
},
{
"epoch": 1.86,
"learning_rate": 6.552893823409095e-07,
"loss": 0.2029,
"step": 5451
},
{
"epoch": 1.86,
"learning_rate": 6.521184245561141e-07,
"loss": 0.1898,
"step": 5452
},
{
"epoch": 1.86,
"learning_rate": 6.489550561650514e-07,
"loss": 0.1726,
"step": 5453
},
{
"epoch": 1.86,
"learning_rate": 6.457992781537691e-07,
"loss": 0.1981,
"step": 5454
},
{
"epoch": 1.86,
"learning_rate": 6.426510915059414e-07,
"loss": 0.1916,
"step": 5455
},
{
"epoch": 1.86,
"learning_rate": 6.395104972028859e-07,
"loss": 0.1768,
"step": 5456
},
{
"epoch": 1.86,
"learning_rate": 6.363774962235336e-07,
"loss": 0.1618,
"step": 5457
},
{
"epoch": 1.86,
"learning_rate": 6.332520895444727e-07,
"loss": 0.2064,
"step": 5458
},
{
"epoch": 1.86,
"learning_rate": 6.301342781399045e-07,
"loss": 0.1987,
"step": 5459
},
{
"epoch": 1.86,
"learning_rate": 6.270240629816765e-07,
"loss": 0.2214,
"step": 5460
},
{
"epoch": 1.86,
"learning_rate": 6.239214450392633e-07,
"loss": 0.1865,
"step": 5461
},
{
"epoch": 1.86,
"learning_rate": 6.208264252797663e-07,
"loss": 0.1905,
"step": 5462
},
{
"epoch": 1.86,
"learning_rate": 6.177390046679304e-07,
"loss": 0.2165,
"step": 5463
},
{
"epoch": 1.86,
"learning_rate": 6.146591841661165e-07,
"loss": 0.1704,
"step": 5464
},
{
"epoch": 1.86,
"learning_rate": 6.115869647343287e-07,
"loss": 0.2042,
"step": 5465
},
{
"epoch": 1.86,
"learning_rate": 6.085223473302038e-07,
"loss": 0.1673,
"step": 5466
},
{
"epoch": 1.87,
"learning_rate": 6.054653329089916e-07,
"loss": 0.2021,
"step": 5467
},
{
"epoch": 1.87,
"learning_rate": 6.024159224235909e-07,
"loss": 0.2164,
"step": 5468
},
{
"epoch": 1.87,
"learning_rate": 5.99374116824522e-07,
"loss": 0.1967,
"step": 5469
},
{
"epoch": 1.87,
"learning_rate": 5.963399170599376e-07,
"loss": 0.1902,
"step": 5470
},
{
"epoch": 1.87,
"learning_rate": 5.933133240756117e-07,
"loss": 0.1952,
"step": 5471
},
{
"epoch": 1.87,
"learning_rate": 5.902943388149562e-07,
"loss": 0.1988,
"step": 5472
},
{
"epoch": 1.87,
"learning_rate": 5.872829622190101e-07,
"loss": 0.199,
"step": 5473
},
{
"epoch": 1.87,
"learning_rate": 5.842791952264365e-07,
"loss": 0.1792,
"step": 5474
},
{
"epoch": 1.87,
"learning_rate": 5.812830387735307e-07,
"loss": 0.1683,
"step": 5475
},
{
"epoch": 1.87,
"learning_rate": 5.782944937942153e-07,
"loss": 0.2214,
"step": 5476
},
{
"epoch": 1.87,
"learning_rate": 5.753135612200338e-07,
"loss": 0.2014,
"step": 5477
},
{
"epoch": 1.87,
"learning_rate": 5.72340241980171e-07,
"loss": 0.1809,
"step": 5478
},
{
"epoch": 1.87,
"learning_rate": 5.693745370014241e-07,
"loss": 0.1988,
"step": 5479
},
{
"epoch": 1.87,
"learning_rate": 5.664164472082262e-07,
"loss": 0.1981,
"step": 5480
},
{
"epoch": 1.87,
"learning_rate": 5.634659735226256e-07,
"loss": 0.1898,
"step": 5481
},
{
"epoch": 1.87,
"learning_rate": 5.605231168643144e-07,
"loss": 0.2168,
"step": 5482
},
{
"epoch": 1.87,
"learning_rate": 5.575878781505922e-07,
"loss": 0.1934,
"step": 5483
},
{
"epoch": 1.87,
"learning_rate": 5.546602582963966e-07,
"loss": 0.1731,
"step": 5484
},
{
"epoch": 1.87,
"learning_rate": 5.517402582142838e-07,
"loss": 0.1925,
"step": 5485
},
{
"epoch": 1.87,
"learning_rate": 5.488278788144396e-07,
"loss": 0.205,
"step": 5486
},
{
"epoch": 1.87,
"learning_rate": 5.459231210046711e-07,
"loss": 0.1882,
"step": 5487
},
{
"epoch": 1.87,
"learning_rate": 5.430259856904041e-07,
"loss": 0.2024,
"step": 5488
},
{
"epoch": 1.87,
"learning_rate": 5.40136473774705e-07,
"loss": 0.2179,
"step": 5489
},
{
"epoch": 1.87,
"learning_rate": 5.372545861582451e-07,
"loss": 0.1755,
"step": 5490
},
{
"epoch": 1.87,
"learning_rate": 5.343803237393281e-07,
"loss": 0.1889,
"step": 5491
},
{
"epoch": 1.87,
"learning_rate": 5.315136874138871e-07,
"loss": 0.1883,
"step": 5492
},
{
"epoch": 1.87,
"learning_rate": 5.286546780754659e-07,
"loss": 0.1917,
"step": 5493
},
{
"epoch": 1.87,
"learning_rate": 5.258032966152376e-07,
"loss": 0.1901,
"step": 5494
},
{
"epoch": 1.87,
"learning_rate": 5.229595439219914e-07,
"loss": 0.1963,
"step": 5495
},
{
"epoch": 1.88,
"learning_rate": 5.201234208821543e-07,
"loss": 0.1953,
"step": 5496
},
{
"epoch": 1.88,
"learning_rate": 5.172949283797579e-07,
"loss": 0.2077,
"step": 5497
},
{
"epoch": 1.88,
"learning_rate": 5.14474067296461e-07,
"loss": 0.1828,
"step": 5498
},
{
"epoch": 1.88,
"learning_rate": 5.11660838511549e-07,
"loss": 0.1905,
"step": 5499
},
{
"epoch": 1.88,
"learning_rate": 5.088552429019178e-07,
"loss": 0.1968,
"step": 5500
},
{
"epoch": 1.88,
"learning_rate": 5.060572813420927e-07,
"loss": 0.1968,
"step": 5501
},
{
"epoch": 1.88,
"learning_rate": 5.032669547042179e-07,
"loss": 0.2014,
"step": 5502
},
{
"epoch": 1.88,
"learning_rate": 5.004842638580532e-07,
"loss": 0.2053,
"step": 5503
},
{
"epoch": 1.88,
"learning_rate": 4.977092096709852e-07,
"loss": 0.2301,
"step": 5504
},
{
"epoch": 1.88,
"learning_rate": 4.949417930080136e-07,
"loss": 0.1954,
"step": 5505
},
{
"epoch": 1.88,
"learning_rate": 4.921820147317652e-07,
"loss": 0.2175,
"step": 5506
},
{
"epoch": 1.88,
"learning_rate": 4.894298757024712e-07,
"loss": 0.1956,
"step": 5507
},
{
"epoch": 1.88,
"learning_rate": 4.866853767780011e-07,
"loss": 0.1772,
"step": 5508
},
{
"epoch": 1.88,
"learning_rate": 4.839485188138259e-07,
"loss": 0.2058,
"step": 5509
},
{
"epoch": 1.88,
"learning_rate": 4.812193026630496e-07,
"loss": 0.1643,
"step": 5510
},
{
"epoch": 1.88,
"learning_rate": 4.784977291763775e-07,
"loss": 0.1985,
"step": 5511
},
{
"epoch": 1.88,
"learning_rate": 4.7578379920215077e-07,
"loss": 0.2277,
"step": 5512
},
{
"epoch": 1.88,
"learning_rate": 4.730775135863147e-07,
"loss": 0.2059,
"step": 5513
},
{
"epoch": 1.88,
"learning_rate": 4.703788731724362e-07,
"loss": 0.2152,
"step": 5514
},
{
"epoch": 1.88,
"learning_rate": 4.6768787880170086e-07,
"loss": 0.1824,
"step": 5515
},
{
"epoch": 1.88,
"learning_rate": 4.650045313129042e-07,
"loss": 0.204,
"step": 5516
},
{
"epoch": 1.88,
"learning_rate": 4.62328831542469e-07,
"loss": 0.2056,
"step": 5517
},
{
"epoch": 1.88,
"learning_rate": 4.5966078032442796e-07,
"loss": 0.1984,
"step": 5518
},
{
"epoch": 1.88,
"learning_rate": 4.5700037849042697e-07,
"loss": 0.239,
"step": 5519
},
{
"epoch": 1.88,
"learning_rate": 4.5434762686973333e-07,
"loss": 0.21,
"step": 5520
},
{
"epoch": 1.88,
"learning_rate": 4.517025262892244e-07,
"loss": 0.2094,
"step": 5521
},
{
"epoch": 1.88,
"learning_rate": 4.490650775733962e-07,
"loss": 0.2013,
"step": 5522
},
{
"epoch": 1.88,
"learning_rate": 4.464352815443579e-07,
"loss": 0.2015,
"step": 5523
},
{
"epoch": 1.88,
"learning_rate": 4.4381313902183694e-07,
"loss": 0.1961,
"step": 5524
},
{
"epoch": 1.89,
"learning_rate": 4.4119865082317413e-07,
"loss": 0.1904,
"step": 5525
},
{
"epoch": 1.89,
"learning_rate": 4.385918177633147e-07,
"loss": 0.2015,
"step": 5526
},
{
"epoch": 1.89,
"learning_rate": 4.359926406548337e-07,
"loss": 0.1713,
"step": 5527
},
{
"epoch": 1.89,
"learning_rate": 4.3340112030790525e-07,
"loss": 0.2204,
"step": 5528
},
{
"epoch": 1.89,
"learning_rate": 4.308172575303249e-07,
"loss": 0.1657,
"step": 5529
},
{
"epoch": 1.89,
"learning_rate": 4.2824105312750385e-07,
"loss": 0.1993,
"step": 5530
},
{
"epoch": 1.89,
"learning_rate": 4.256725079024554e-07,
"loss": 0.1966,
"step": 5531
},
{
"epoch": 1.89,
"learning_rate": 4.2311162265581684e-07,
"loss": 0.1967,
"step": 5532
},
{
"epoch": 1.89,
"learning_rate": 4.205583981858302e-07,
"loss": 0.1924,
"step": 5533
},
{
"epoch": 1.89,
"learning_rate": 4.1801283528835334e-07,
"loss": 0.1718,
"step": 5534
},
{
"epoch": 1.89,
"learning_rate": 4.154749347568543e-07,
"loss": 0.1909,
"step": 5535
},
{
"epoch": 1.89,
"learning_rate": 4.1294469738241413e-07,
"loss": 0.2063,
"step": 5536
},
{
"epoch": 1.89,
"learning_rate": 4.104221239537215e-07,
"loss": 0.1932,
"step": 5537
},
{
"epoch": 1.89,
"learning_rate": 4.0790721525708065e-07,
"loss": 0.1988,
"step": 5538
},
{
"epoch": 1.89,
"learning_rate": 4.05399972076409e-07,
"loss": 0.1837,
"step": 5539
},
{
"epoch": 1.89,
"learning_rate": 4.029003951932259e-07,
"loss": 0.2055,
"step": 5540
},
{
"epoch": 1.89,
"learning_rate": 4.004084853866663e-07,
"loss": 0.1762,
"step": 5541
},
{
"epoch": 1.89,
"learning_rate": 3.9792424343347567e-07,
"loss": 0.2094,
"step": 5542
},
{
"epoch": 1.89,
"learning_rate": 3.9544767010800954e-07,
"loss": 0.1958,
"step": 5543
},
{
"epoch": 1.89,
"learning_rate": 3.9297876618223097e-07,
"loss": 0.1929,
"step": 5544
},
{
"epoch": 1.89,
"learning_rate": 3.905175324257104e-07,
"loss": 0.2143,
"step": 5545
},
{
"epoch": 1.89,
"learning_rate": 3.880639696056371e-07,
"loss": 0.2198,
"step": 5546
},
{
"epoch": 1.89,
"learning_rate": 3.8561807848679634e-07,
"loss": 0.2081,
"step": 5547
},
{
"epoch": 1.89,
"learning_rate": 3.8317985983158946e-07,
"loss": 0.19,
"step": 5548
},
{
"epoch": 1.89,
"learning_rate": 3.807493144000307e-07,
"loss": 0.2102,
"step": 5549
},
{
"epoch": 1.89,
"learning_rate": 3.7832644294973075e-07,
"loss": 0.2152,
"step": 5550
},
{
"epoch": 1.89,
"learning_rate": 3.75911246235916e-07,
"loss": 0.163,
"step": 5551
},
{
"epoch": 1.89,
"learning_rate": 3.735037250114176e-07,
"loss": 0.2107,
"step": 5552
},
{
"epoch": 1.89,
"learning_rate": 3.711038800266825e-07,
"loss": 0.185,
"step": 5553
},
{
"epoch": 1.89,
"learning_rate": 3.687117120297484e-07,
"loss": 0.1745,
"step": 5554
},
{
"epoch": 1.9,
"learning_rate": 3.663272217662772e-07,
"loss": 0.2122,
"step": 5555
},
{
"epoch": 1.9,
"learning_rate": 3.6395040997952714e-07,
"loss": 0.1965,
"step": 5556
},
{
"epoch": 1.9,
"learning_rate": 3.615812774103666e-07,
"loss": 0.1847,
"step": 5557
},
{
"epoch": 1.9,
"learning_rate": 3.592198247972689e-07,
"loss": 0.1997,
"step": 5558
},
{
"epoch": 1.9,
"learning_rate": 3.568660528763146e-07,
"loss": 0.2153,
"step": 5559
},
{
"epoch": 1.9,
"learning_rate": 3.54519962381189e-07,
"loss": 0.197,
"step": 5560
},
{
"epoch": 1.9,
"learning_rate": 3.5218155404318496e-07,
"loss": 0.186,
"step": 5561
},
{
"epoch": 1.9,
"learning_rate": 3.498508285912e-07,
"loss": 0.1985,
"step": 5562
},
{
"epoch": 1.9,
"learning_rate": 3.4752778675173624e-07,
"loss": 0.1919,
"step": 5563
},
{
"epoch": 1.9,
"learning_rate": 3.4521242924889785e-07,
"loss": 0.1971,
"step": 5564
},
{
"epoch": 1.9,
"learning_rate": 3.429047568044019e-07,
"loss": 0.1943,
"step": 5565
},
{
"epoch": 1.9,
"learning_rate": 3.4060477013756197e-07,
"loss": 0.1922,
"step": 5566
},
{
"epoch": 1.9,
"learning_rate": 3.3831246996530175e-07,
"loss": 0.1974,
"step": 5567
},
{
"epoch": 1.9,
"learning_rate": 3.3602785700214133e-07,
"loss": 0.1881,
"step": 5568
},
{
"epoch": 1.9,
"learning_rate": 3.3375093196021113e-07,
"loss": 0.2071,
"step": 5569
},
{
"epoch": 1.9,
"learning_rate": 3.3148169554924614e-07,
"loss": 0.2004,
"step": 5570
},
{
"epoch": 1.9,
"learning_rate": 3.2922014847658053e-07,
"loss": 0.2066,
"step": 5571
},
{
"epoch": 1.9,
"learning_rate": 3.2696629144715607e-07,
"loss": 0.187,
"step": 5572
},
{
"epoch": 1.9,
"learning_rate": 3.247201251635079e-07,
"loss": 0.1933,
"step": 5573
},
{
"epoch": 1.9,
"learning_rate": 3.2248165032578715e-07,
"loss": 0.1783,
"step": 5574
},
{
"epoch": 1.9,
"learning_rate": 3.202508676317384e-07,
"loss": 0.1912,
"step": 5575
},
{
"epoch": 1.9,
"learning_rate": 3.18027777776711e-07,
"loss": 0.1918,
"step": 5576
},
{
"epoch": 1.9,
"learning_rate": 3.15812381453659e-07,
"loss": 0.2002,
"step": 5577
},
{
"epoch": 1.9,
"learning_rate": 3.1360467935313e-07,
"loss": 0.1991,
"step": 5578
},
{
"epoch": 1.9,
"learning_rate": 3.114046721632874e-07,
"loss": 0.2023,
"step": 5579
},
{
"epoch": 1.9,
"learning_rate": 3.092123605698827e-07,
"loss": 0.206,
"step": 5580
},
{
"epoch": 1.9,
"learning_rate": 3.07027745256272e-07,
"loss": 0.2019,
"step": 5581
},
{
"epoch": 1.9,
"learning_rate": 3.048508269034217e-07,
"loss": 0.173,
"step": 5582
},
{
"epoch": 1.9,
"learning_rate": 3.026816061898835e-07,
"loss": 0.2288,
"step": 5583
},
{
"epoch": 1.91,
"learning_rate": 3.0052008379182205e-07,
"loss": 0.2082,
"step": 5584
},
{
"epoch": 1.91,
"learning_rate": 2.9836626038299566e-07,
"loss": 0.1869,
"step": 5585
},
{
"epoch": 1.91,
"learning_rate": 2.9622013663476443e-07,
"loss": 0.2179,
"step": 5586
},
{
"epoch": 1.91,
"learning_rate": 2.940817132160961e-07,
"loss": 0.188,
"step": 5587
},
{
"epoch": 1.91,
"learning_rate": 2.919509907935408e-07,
"loss": 0.179,
"step": 5588
},
{
"epoch": 1.91,
"learning_rate": 2.8982797003126726e-07,
"loss": 0.1887,
"step": 5589
},
{
"epoch": 1.91,
"learning_rate": 2.877126515910294e-07,
"loss": 0.2128,
"step": 5590
},
{
"epoch": 1.91,
"learning_rate": 2.856050361321916e-07,
"loss": 0.1866,
"step": 5591
},
{
"epoch": 1.91,
"learning_rate": 2.8350512431170317e-07,
"loss": 0.1643,
"step": 5592
},
{
"epoch": 1.91,
"learning_rate": 2.8141291678412674e-07,
"loss": 0.2188,
"step": 5593
},
{
"epoch": 1.91,
"learning_rate": 2.793284142016156e-07,
"loss": 0.1759,
"step": 5594
},
{
"epoch": 1.91,
"learning_rate": 2.772516172139222e-07,
"loss": 0.2056,
"step": 5595
},
{
"epoch": 1.91,
"learning_rate": 2.7518252646839827e-07,
"loss": 0.2072,
"step": 5596
},
{
"epoch": 1.91,
"learning_rate": 2.731211426099917e-07,
"loss": 0.1989,
"step": 5597
},
{
"epoch": 1.91,
"learning_rate": 2.7106746628125234e-07,
"loss": 0.1862,
"step": 5598
},
{
"epoch": 1.91,
"learning_rate": 2.6902149812231827e-07,
"loss": 0.2267,
"step": 5599
},
{
"epoch": 1.91,
"learning_rate": 2.6698323877093754e-07,
"loss": 0.2061,
"step": 5600
},
{
"epoch": 1.91,
"learning_rate": 2.649526888624493e-07,
"loss": 0.2111,
"step": 5601
},
{
"epoch": 1.91,
"learning_rate": 2.6292984902978323e-07,
"loss": 0.225,
"step": 5602
},
{
"epoch": 1.91,
"learning_rate": 2.609147199034767e-07,
"loss": 0.1933,
"step": 5603
},
{
"epoch": 1.91,
"learning_rate": 2.58907302111655e-07,
"loss": 0.2007,
"step": 5604
},
{
"epoch": 1.91,
"learning_rate": 2.5690759628004545e-07,
"loss": 0.1981,
"step": 5605
},
{
"epoch": 1.91,
"learning_rate": 2.5491560303197157e-07,
"loss": 0.2082,
"step": 5606
},
{
"epoch": 1.91,
"learning_rate": 2.529313229883451e-07,
"loss": 0.2179,
"step": 5607
},
{
"epoch": 1.91,
"learning_rate": 2.5095475676768507e-07,
"loss": 0.1923,
"step": 5608
},
{
"epoch": 1.91,
"learning_rate": 2.48985904986096e-07,
"loss": 0.2015,
"step": 5609
},
{
"epoch": 1.91,
"learning_rate": 2.470247682572868e-07,
"loss": 0.2017,
"step": 5610
},
{
"epoch": 1.91,
"learning_rate": 2.450713471925492e-07,
"loss": 0.1898,
"step": 5611
},
{
"epoch": 1.91,
"learning_rate": 2.4312564240078216e-07,
"loss": 0.2109,
"step": 5612
},
{
"epoch": 1.92,
"learning_rate": 2.411876544884728e-07,
"loss": 0.2291,
"step": 5613
},
{
"epoch": 1.92,
"learning_rate": 2.392573840597073e-07,
"loss": 0.2056,
"step": 5614
},
{
"epoch": 1.92,
"learning_rate": 2.3733483171616e-07,
"loss": 0.1877,
"step": 5615
},
{
"epoch": 1.92,
"learning_rate": 2.35419998057107e-07,
"loss": 0.2096,
"step": 5616
},
{
"epoch": 1.92,
"learning_rate": 2.3351288367940982e-07,
"loss": 0.2012,
"step": 5617
},
{
"epoch": 1.92,
"learning_rate": 2.3161348917753178e-07,
"loss": 0.1964,
"step": 5618
},
{
"epoch": 1.92,
"learning_rate": 2.2972181514352432e-07,
"loss": 0.2142,
"step": 5619
},
{
"epoch": 1.92,
"learning_rate": 2.2783786216703517e-07,
"loss": 0.2367,
"step": 5620
},
{
"epoch": 1.92,
"learning_rate": 2.2596163083530575e-07,
"loss": 0.1637,
"step": 5621
},
{
"epoch": 1.92,
"learning_rate": 2.2409312173317098e-07,
"loss": 0.1839,
"step": 5622
},
{
"epoch": 1.92,
"learning_rate": 2.222323354430539e-07,
"loss": 0.1986,
"step": 5623
},
{
"epoch": 1.92,
"learning_rate": 2.2037927254497936e-07,
"loss": 0.2213,
"step": 5624
},
{
"epoch": 1.92,
"learning_rate": 2.1853393361655206e-07,
"loss": 0.2075,
"step": 5625
},
{
"epoch": 1.92,
"learning_rate": 2.1669631923297852e-07,
"loss": 0.1861,
"step": 5626
},
{
"epoch": 1.92,
"learning_rate": 2.1486642996705885e-07,
"loss": 0.237,
"step": 5627
},
{
"epoch": 1.92,
"learning_rate": 2.1304426638917574e-07,
"loss": 0.2062,
"step": 5628
},
{
"epoch": 1.92,
"learning_rate": 2.1122982906731647e-07,
"loss": 0.2136,
"step": 5629
},
{
"epoch": 1.92,
"learning_rate": 2.0942311856704532e-07,
"loss": 0.2055,
"step": 5630
},
{
"epoch": 1.92,
"learning_rate": 2.0762413545153126e-07,
"loss": 0.2185,
"step": 5631
},
{
"epoch": 1.92,
"learning_rate": 2.0583288028152848e-07,
"loss": 0.1983,
"step": 5632
},
{
"epoch": 1.92,
"learning_rate": 2.0404935361537924e-07,
"loss": 0.2014,
"step": 5633
},
{
"epoch": 1.92,
"learning_rate": 2.022735560090222e-07,
"loss": 0.1726,
"step": 5634
},
{
"epoch": 1.92,
"learning_rate": 2.005054880159868e-07,
"loss": 0.1856,
"step": 5635
},
{
"epoch": 1.92,
"learning_rate": 1.9874515018739327e-07,
"loss": 0.2006,
"step": 5636
},
{
"epoch": 1.92,
"learning_rate": 1.9699254307194436e-07,
"loss": 0.2232,
"step": 5637
},
{
"epoch": 1.92,
"learning_rate": 1.95247667215942e-07,
"loss": 0.1882,
"step": 5638
},
{
"epoch": 1.92,
"learning_rate": 1.935105231632789e-07,
"loss": 0.2277,
"step": 5639
},
{
"epoch": 1.92,
"learning_rate": 1.9178111145542743e-07,
"loss": 0.203,
"step": 5640
},
{
"epoch": 1.92,
"learning_rate": 1.9005943263146198e-07,
"loss": 0.215,
"step": 5641
},
{
"epoch": 1.92,
"learning_rate": 1.8834548722803935e-07,
"loss": 0.1927,
"step": 5642
},
{
"epoch": 1.93,
"learning_rate": 1.866392757794072e-07,
"loss": 0.1919,
"step": 5643
},
{
"epoch": 1.93,
"learning_rate": 1.8494079881740122e-07,
"loss": 0.1679,
"step": 5644
},
{
"epoch": 1.93,
"learning_rate": 1.832500568714507e-07,
"loss": 0.1851,
"step": 5645
},
{
"epoch": 1.93,
"learning_rate": 1.8156705046857293e-07,
"loss": 0.1776,
"step": 5646
},
{
"epoch": 1.93,
"learning_rate": 1.7989178013336494e-07,
"loss": 0.198,
"step": 5647
},
{
"epoch": 1.93,
"learning_rate": 1.7822424638802292e-07,
"loss": 0.2285,
"step": 5648
},
{
"epoch": 1.93,
"learning_rate": 1.7656444975232834e-07,
"loss": 0.2015,
"step": 5649
},
{
"epoch": 1.93,
"learning_rate": 1.749123907436534e-07,
"loss": 0.2078,
"step": 5650
},
{
"epoch": 1.93,
"learning_rate": 1.7326806987695288e-07,
"loss": 0.209,
"step": 5651
},
{
"epoch": 1.93,
"learning_rate": 1.7163148766476957e-07,
"loss": 0.2012,
"step": 5652
},
{
"epoch": 1.93,
"learning_rate": 1.7000264461724257e-07,
"loss": 0.2048,
"step": 5653
},
{
"epoch": 1.93,
"learning_rate": 1.6838154124208806e-07,
"loss": 0.2011,
"step": 5654
},
{
"epoch": 1.93,
"learning_rate": 1.667681780446212e-07,
"loss": 0.2178,
"step": 5655
},
{
"epoch": 1.93,
"learning_rate": 1.6516255552772864e-07,
"loss": 0.186,
"step": 5656
},
{
"epoch": 1.93,
"learning_rate": 1.6356467419189892e-07,
"loss": 0.2055,
"step": 5657
},
{
"epoch": 1.93,
"learning_rate": 1.6197453453520028e-07,
"loss": 0.1954,
"step": 5658
},
{
"epoch": 1.93,
"learning_rate": 1.603921370532918e-07,
"loss": 0.2058,
"step": 5659
},
{
"epoch": 1.93,
"learning_rate": 1.5881748223941507e-07,
"loss": 0.2186,
"step": 5660
},
{
"epoch": 1.93,
"learning_rate": 1.572505705844024e-07,
"loss": 0.2105,
"step": 5661
},
{
"epoch": 1.93,
"learning_rate": 1.556914025766687e-07,
"loss": 0.2088,
"step": 5662
},
{
"epoch": 1.93,
"learning_rate": 1.5413997870221409e-07,
"loss": 0.2069,
"step": 5663
},
{
"epoch": 1.93,
"learning_rate": 1.5259629944463226e-07,
"loss": 0.1967,
"step": 5664
},
{
"epoch": 1.93,
"learning_rate": 1.510603652850995e-07,
"loss": 0.199,
"step": 5665
},
{
"epoch": 1.93,
"learning_rate": 1.4953217670237163e-07,
"loss": 0.2181,
"step": 5666
},
{
"epoch": 1.93,
"learning_rate": 1.4801173417279547e-07,
"loss": 0.1743,
"step": 5667
},
{
"epoch": 1.93,
"learning_rate": 1.4649903817030574e-07,
"loss": 0.19,
"step": 5668
},
{
"epoch": 1.93,
"learning_rate": 1.44994089166417e-07,
"loss": 0.2001,
"step": 5669
},
{
"epoch": 1.93,
"learning_rate": 1.434968876302345e-07,
"loss": 0.2019,
"step": 5670
},
{
"epoch": 1.93,
"learning_rate": 1.420074340284433e-07,
"loss": 0.2061,
"step": 5671
},
{
"epoch": 1.94,
"learning_rate": 1.4052572882531644e-07,
"loss": 0.2179,
"step": 5672
},
{
"epoch": 1.94,
"learning_rate": 1.3905177248271228e-07,
"loss": 0.2009,
"step": 5673
},
{
"epoch": 1.94,
"learning_rate": 1.3758556546007162e-07,
"loss": 0.1867,
"step": 5674
},
{
"epoch": 1.94,
"learning_rate": 1.3612710821441777e-07,
"loss": 0.1959,
"step": 5675
},
{
"epoch": 1.94,
"learning_rate": 1.346764012003676e-07,
"loss": 0.2036,
"step": 5676
},
{
"epoch": 1.94,
"learning_rate": 1.3323344487010947e-07,
"loss": 0.1875,
"step": 5677
},
{
"epoch": 1.94,
"learning_rate": 1.3179823967342798e-07,
"loss": 0.2191,
"step": 5678
},
{
"epoch": 1.94,
"learning_rate": 1.303707860576847e-07,
"loss": 0.1882,
"step": 5679
},
{
"epoch": 1.94,
"learning_rate": 1.2895108446782378e-07,
"loss": 0.2092,
"step": 5680
},
{
"epoch": 1.94,
"learning_rate": 1.275391353463773e-07,
"loss": 0.2102,
"step": 5681
},
{
"epoch": 1.94,
"learning_rate": 1.2613493913345708e-07,
"loss": 0.1924,
"step": 5682
},
{
"epoch": 1.94,
"learning_rate": 1.2473849626676304e-07,
"loss": 0.2358,
"step": 5683
},
{
"epoch": 1.94,
"learning_rate": 1.2334980718157474e-07,
"loss": 0.2468,
"step": 5684
},
{
"epoch": 1.94,
"learning_rate": 1.2196887231075427e-07,
"loss": 0.218,
"step": 5685
},
{
"epoch": 1.94,
"learning_rate": 1.2059569208475173e-07,
"loss": 0.229,
"step": 5686
},
{
"epoch": 1.94,
"learning_rate": 1.1923026693158867e-07,
"loss": 0.2418,
"step": 5687
},
{
"epoch": 1.94,
"learning_rate": 1.1787259727688571e-07,
"loss": 0.2114,
"step": 5688
},
{
"epoch": 1.94,
"learning_rate": 1.1652268354383211e-07,
"loss": 0.1743,
"step": 5689
},
{
"epoch": 1.94,
"learning_rate": 1.1518052615320518e-07,
"loss": 0.212,
"step": 5690
},
{
"epoch": 1.94,
"learning_rate": 1.1384612552336749e-07,
"loss": 0.2144,
"step": 5691
},
{
"epoch": 1.94,
"learning_rate": 1.1251948207025575e-07,
"loss": 0.2172,
"step": 5692
},
{
"epoch": 1.94,
"learning_rate": 1.1120059620739753e-07,
"loss": 0.177,
"step": 5693
},
{
"epoch": 1.94,
"learning_rate": 1.0988946834589454e-07,
"loss": 0.1961,
"step": 5694
},
{
"epoch": 1.94,
"learning_rate": 1.0858609889443927e-07,
"loss": 0.1868,
"step": 5695
},
{
"epoch": 1.94,
"learning_rate": 1.0729048825929566e-07,
"loss": 0.1937,
"step": 5696
},
{
"epoch": 1.94,
"learning_rate": 1.0600263684431289e-07,
"loss": 0.1941,
"step": 5697
},
{
"epoch": 1.94,
"learning_rate": 1.0472254505092816e-07,
"loss": 0.1892,
"step": 5698
},
{
"epoch": 1.94,
"learning_rate": 1.0345021327815008e-07,
"loss": 0.1827,
"step": 5699
},
{
"epoch": 1.94,
"learning_rate": 1.021856419225753e-07,
"loss": 0.2054,
"step": 5700
},
{
"epoch": 1.95,
"learning_rate": 1.009288313783746e-07,
"loss": 0.2222,
"step": 5701
},
{
"epoch": 1.95,
"learning_rate": 9.967978203730965e-08,
"loss": 0.2366,
"step": 5702
},
{
"epoch": 1.95,
"learning_rate": 9.843849428871343e-08,
"loss": 0.1987,
"step": 5703
},
{
"epoch": 1.95,
"learning_rate": 9.7204968519507e-08,
"loss": 0.2251,
"step": 5704
},
{
"epoch": 1.95,
"learning_rate": 9.597920511418556e-08,
"loss": 0.2154,
"step": 5705
},
{
"epoch": 1.95,
"learning_rate": 9.476120445482683e-08,
"loss": 0.1842,
"step": 5706
},
{
"epoch": 1.95,
"learning_rate": 9.355096692109377e-08,
"loss": 0.2084,
"step": 5707
},
{
"epoch": 1.95,
"learning_rate": 9.234849289022073e-08,
"loss": 0.1895,
"step": 5708
},
{
"epoch": 1.95,
"learning_rate": 9.115378273702735e-08,
"loss": 0.1867,
"step": 5709
},
{
"epoch": 1.95,
"learning_rate": 8.996683683391572e-08,
"loss": 0.1794,
"step": 5710
},
{
"epoch": 1.95,
"learning_rate": 8.87876555508621e-08,
"loss": 0.185,
"step": 5711
},
{
"epoch": 1.95,
"learning_rate": 8.761623925542528e-08,
"loss": 0.2042,
"step": 5712
},
{
"epoch": 1.95,
"learning_rate": 8.645258831274372e-08,
"loss": 0.2182,
"step": 5713
},
{
"epoch": 1.95,
"learning_rate": 8.529670308553562e-08,
"loss": 0.189,
"step": 5714
},
{
"epoch": 1.95,
"learning_rate": 8.414858393409886e-08,
"loss": 0.2032,
"step": 5715
},
{
"epoch": 1.95,
"learning_rate": 8.300823121630552e-08,
"loss": 0.206,
"step": 5716
},
{
"epoch": 1.95,
"learning_rate": 8.187564528761293e-08,
"loss": 0.2022,
"step": 5717
},
{
"epoch": 1.95,
"learning_rate": 8.075082650105814e-08,
"loss": 0.1852,
"step": 5718
},
{
"epoch": 1.95,
"learning_rate": 7.963377520725235e-08,
"loss": 0.1827,
"step": 5719
},
{
"epoch": 1.95,
"learning_rate": 7.852449175438925e-08,
"loss": 0.1942,
"step": 5720
},
{
"epoch": 1.95,
"learning_rate": 7.742297648823671e-08,
"loss": 0.1812,
"step": 5721
},
{
"epoch": 1.95,
"learning_rate": 7.632922975214784e-08,
"loss": 0.209,
"step": 5722
},
{
"epoch": 1.95,
"learning_rate": 7.524325188704717e-08,
"loss": 0.2212,
"step": 5723
},
{
"epoch": 1.95,
"learning_rate": 7.41650432314417e-08,
"loss": 0.1985,
"step": 5724
},
{
"epoch": 1.95,
"learning_rate": 7.309460412141811e-08,
"loss": 0.1753,
"step": 5725
},
{
"epoch": 1.95,
"learning_rate": 7.203193489064009e-08,
"loss": 0.2008,
"step": 5726
},
{
"epoch": 1.95,
"learning_rate": 7.097703587034543e-08,
"loss": 0.193,
"step": 5727
},
{
"epoch": 1.95,
"learning_rate": 6.992990738935446e-08,
"loss": 0.2031,
"step": 5728
},
{
"epoch": 1.95,
"learning_rate": 6.889054977406439e-08,
"loss": 0.2094,
"step": 5729
},
{
"epoch": 1.95,
"learning_rate": 6.785896334844665e-08,
"loss": 0.1883,
"step": 5730
},
{
"epoch": 1.96,
"learning_rate": 6.683514843405791e-08,
"loss": 0.2034,
"step": 5731
},
{
"epoch": 1.96,
"learning_rate": 6.581910535002623e-08,
"loss": 0.2067,
"step": 5732
},
{
"epoch": 1.96,
"learning_rate": 6.481083441305935e-08,
"loss": 0.1922,
"step": 5733
},
{
"epoch": 1.96,
"learning_rate": 6.381033593744201e-08,
"loss": 0.2366,
"step": 5734
},
{
"epoch": 1.96,
"learning_rate": 6.281761023503308e-08,
"loss": 0.2181,
"step": 5735
},
{
"epoch": 1.96,
"learning_rate": 6.183265761527668e-08,
"loss": 0.1866,
"step": 5736
},
{
"epoch": 1.96,
"learning_rate": 6.085547838518557e-08,
"loss": 0.1943,
"step": 5737
},
{
"epoch": 1.96,
"learning_rate": 5.988607284935221e-08,
"loss": 0.1888,
"step": 5738
},
{
"epoch": 1.96,
"learning_rate": 5.892444130995156e-08,
"loss": 0.2047,
"step": 5739
},
{
"epoch": 1.96,
"learning_rate": 5.79705840667244e-08,
"loss": 0.1817,
"step": 5740
},
{
"epoch": 1.96,
"learning_rate": 5.702450141699678e-08,
"loss": 0.2188,
"step": 5741
},
{
"epoch": 1.96,
"learning_rate": 5.6086193655668917e-08,
"loss": 0.1887,
"step": 5742
},
{
"epoch": 1.96,
"learning_rate": 5.515566107521797e-08,
"loss": 0.2093,
"step": 5743
},
{
"epoch": 1.96,
"learning_rate": 5.423290396569525e-08,
"loss": 0.1949,
"step": 5744
},
{
"epoch": 1.96,
"learning_rate": 5.331792261473456e-08,
"loss": 0.2358,
"step": 5745
},
{
"epoch": 1.96,
"learning_rate": 5.241071730753555e-08,
"loss": 0.1763,
"step": 5746
},
{
"epoch": 1.96,
"learning_rate": 5.151128832688312e-08,
"loss": 0.178,
"step": 5747
},
{
"epoch": 1.96,
"learning_rate": 5.061963595313635e-08,
"loss": 0.22,
"step": 5748
},
{
"epoch": 1.96,
"learning_rate": 4.973576046422568e-08,
"loss": 0.2019,
"step": 5749
},
{
"epoch": 1.96,
"learning_rate": 4.885966213566684e-08,
"loss": 0.2189,
"step": 5750
},
{
"epoch": 1.96,
"learning_rate": 4.799134124053861e-08,
"loss": 0.1885,
"step": 5751
},
{
"epoch": 1.96,
"learning_rate": 4.713079804950504e-08,
"loss": 0.1911,
"step": 5752
},
{
"epoch": 1.96,
"learning_rate": 4.627803283080434e-08,
"loss": 0.2208,
"step": 5753
},
{
"epoch": 1.96,
"learning_rate": 4.5433045850246106e-08,
"loss": 0.1771,
"step": 5754
},
{
"epoch": 1.96,
"learning_rate": 4.4595837371222415e-08,
"loss": 0.1864,
"step": 5755
},
{
"epoch": 1.96,
"learning_rate": 4.376640765469398e-08,
"loss": 0.1946,
"step": 5756
},
{
"epoch": 1.96,
"learning_rate": 4.294475695919842e-08,
"loss": 0.1869,
"step": 5757
},
{
"epoch": 1.96,
"learning_rate": 4.213088554085309e-08,
"loss": 0.1737,
"step": 5758
},
{
"epoch": 1.96,
"learning_rate": 4.1324793653341187e-08,
"loss": 0.199,
"step": 5759
},
{
"epoch": 1.97,
"learning_rate": 4.052648154793393e-08,
"loss": 0.1851,
"step": 5760
},
{
"epoch": 1.97,
"learning_rate": 3.9735949473465596e-08,
"loss": 0.2227,
"step": 5761
},
{
"epoch": 1.97,
"learning_rate": 3.8953197676350195e-08,
"loss": 0.1874,
"step": 5762
},
{
"epoch": 1.97,
"learning_rate": 3.817822640058144e-08,
"loss": 0.2026,
"step": 5763
},
{
"epoch": 1.97,
"learning_rate": 3.74110358877161e-08,
"loss": 0.2035,
"step": 5764
},
{
"epoch": 1.97,
"learning_rate": 3.6651626376899006e-08,
"loss": 0.1897,
"step": 5765
},
{
"epoch": 1.97,
"learning_rate": 3.589999810483802e-08,
"loss": 0.2212,
"step": 5766
},
{
"epoch": 1.97,
"learning_rate": 3.515615130582073e-08,
"loss": 0.1629,
"step": 5767
},
{
"epoch": 1.97,
"learning_rate": 3.4420086211714443e-08,
"loss": 0.2195,
"step": 5768
},
{
"epoch": 1.97,
"learning_rate": 3.3691803051949525e-08,
"loss": 0.1969,
"step": 5769
},
{
"epoch": 1.97,
"learning_rate": 3.2971302053538823e-08,
"loss": 0.1907,
"step": 5770
},
{
"epoch": 1.97,
"learning_rate": 3.225858344106936e-08,
"loss": 0.1827,
"step": 5771
},
{
"epoch": 1.97,
"learning_rate": 3.155364743669675e-08,
"loss": 0.1781,
"step": 5772
},
{
"epoch": 1.97,
"learning_rate": 3.085649426015358e-08,
"loss": 0.1861,
"step": 5773
},
{
"epoch": 1.97,
"learning_rate": 3.016712412875211e-08,
"loss": 0.167,
"step": 5774
},
{
"epoch": 1.97,
"learning_rate": 2.948553725736769e-08,
"loss": 0.2217,
"step": 5775
},
{
"epoch": 1.97,
"learning_rate": 2.8811733858460922e-08,
"loss": 0.2249,
"step": 5776
},
{
"epoch": 1.97,
"learning_rate": 2.814571414205547e-08,
"loss": 0.199,
"step": 5777
},
{
"epoch": 1.97,
"learning_rate": 2.7487478315757488e-08,
"loss": 0.2043,
"step": 5778
},
{
"epoch": 1.97,
"learning_rate": 2.6837026584741743e-08,
"loss": 0.2908,
"step": 5779
},
{
"epoch": 1.97,
"learning_rate": 2.619435915175994e-08,
"loss": 0.2092,
"step": 5780
},
{
"epoch": 1.97,
"learning_rate": 2.5559476217132393e-08,
"loss": 0.1903,
"step": 5781
},
{
"epoch": 1.97,
"learning_rate": 2.4932377978761912e-08,
"loss": 0.1738,
"step": 5782
},
{
"epoch": 1.97,
"learning_rate": 2.431306463211436e-08,
"loss": 0.201,
"step": 5783
},
{
"epoch": 1.97,
"learning_rate": 2.3701536370232536e-08,
"loss": 0.1948,
"step": 5784
},
{
"epoch": 1.97,
"learning_rate": 2.3097793383738962e-08,
"loss": 0.2032,
"step": 5785
},
{
"epoch": 1.97,
"learning_rate": 2.2501835860821997e-08,
"loss": 0.216,
"step": 5786
},
{
"epoch": 1.97,
"learning_rate": 2.191366398724415e-08,
"loss": 0.1894,
"step": 5787
},
{
"epoch": 1.97,
"learning_rate": 2.13332779463421e-08,
"loss": 0.1956,
"step": 5788
},
{
"epoch": 1.98,
"learning_rate": 2.076067791902947e-08,
"loss": 0.2059,
"step": 5789
},
{
"epoch": 1.98,
"learning_rate": 2.019586408378571e-08,
"loss": 0.202,
"step": 5790
},
{
"epoch": 1.98,
"learning_rate": 1.9638836616669988e-08,
"loss": 0.202,
"step": 5791
},
{
"epoch": 1.98,
"learning_rate": 1.908959569131008e-08,
"loss": 0.1903,
"step": 5792
},
{
"epoch": 1.98,
"learning_rate": 1.8548141478907932e-08,
"loss": 0.1901,
"step": 5793
},
{
"epoch": 1.98,
"learning_rate": 1.801447414823687e-08,
"loss": 0.2198,
"step": 5794
},
{
"epoch": 1.98,
"learning_rate": 1.7488593865647163e-08,
"loss": 0.1771,
"step": 5795
},
{
"epoch": 1.98,
"learning_rate": 1.6970500795057685e-08,
"loss": 0.2108,
"step": 5796
},
{
"epoch": 1.98,
"learning_rate": 1.6460195097958708e-08,
"loss": 0.2076,
"step": 5797
},
{
"epoch": 1.98,
"learning_rate": 1.595767693342298e-08,
"loss": 0.1855,
"step": 5798
},
{
"epoch": 1.98,
"learning_rate": 1.5462946458083548e-08,
"loss": 0.2009,
"step": 5799
},
{
"epoch": 1.98,
"learning_rate": 1.4976003826150388e-08,
"loss": 0.19,
"step": 5800
},
{
"epoch": 1.98,
"learning_rate": 1.4496849189410411e-08,
"loss": 0.2025,
"step": 5801
},
{
"epoch": 1.98,
"learning_rate": 1.4025482697216375e-08,
"loss": 0.1857,
"step": 5802
},
{
"epoch": 1.98,
"learning_rate": 1.3561904496495193e-08,
"loss": 0.1908,
"step": 5803
},
{
"epoch": 1.98,
"learning_rate": 1.3106114731750718e-08,
"loss": 0.2049,
"step": 5804
},
{
"epoch": 1.98,
"learning_rate": 1.2658113545055416e-08,
"loss": 0.2222,
"step": 5805
},
{
"epoch": 1.98,
"learning_rate": 1.2217901076050364e-08,
"loss": 0.1908,
"step": 5806
},
{
"epoch": 1.98,
"learning_rate": 1.1785477461956352e-08,
"loss": 0.1852,
"step": 5807
},
{
"epoch": 1.98,
"learning_rate": 1.1360842837562779e-08,
"loss": 0.1892,
"step": 5808
},
{
"epoch": 1.98,
"learning_rate": 1.0943997335227663e-08,
"loss": 0.1756,
"step": 5809
},
{
"epoch": 1.98,
"learning_rate": 1.0534941084885952e-08,
"loss": 0.1845,
"step": 5810
},
{
"epoch": 1.98,
"learning_rate": 1.0133674214043987e-08,
"loss": 0.214,
"step": 5811
},
{
"epoch": 1.98,
"learning_rate": 9.740196847779492e-09,
"loss": 0.2155,
"step": 5812
},
{
"epoch": 1.98,
"learning_rate": 9.354509108741583e-09,
"loss": 0.1857,
"step": 5813
},
{
"epoch": 1.98,
"learning_rate": 8.976611117153532e-09,
"loss": 0.2049,
"step": 5814
},
{
"epoch": 1.98,
"learning_rate": 8.60650299080168e-09,
"loss": 0.1897,
"step": 5815
},
{
"epoch": 1.98,
"learning_rate": 8.2441848450604e-09,
"loss": 0.2168,
"step": 5816
},
{
"epoch": 1.98,
"learning_rate": 7.889656792858802e-09,
"loss": 0.2032,
"step": 5817
},
{
"epoch": 1.98,
"learning_rate": 7.542918944711264e-09,
"loss": 0.2001,
"step": 5818
},
{
"epoch": 1.99,
"learning_rate": 7.20397140869522e-09,
"loss": 0.1982,
"step": 5819
},
{
"epoch": 1.99,
"learning_rate": 6.872814290465046e-09,
"loss": 0.1858,
"step": 5820
},
{
"epoch": 1.99,
"learning_rate": 6.549447693240951e-09,
"loss": 0.1903,
"step": 5821
},
{
"epoch": 1.99,
"learning_rate": 6.233871717822859e-09,
"loss": 0.183,
"step": 5822
},
{
"epoch": 1.99,
"learning_rate": 5.926086462573755e-09,
"loss": 0.212,
"step": 5823
},
{
"epoch": 1.99,
"learning_rate": 5.62609202343356e-09,
"loss": 0.2218,
"step": 5824
},
{
"epoch": 1.99,
"learning_rate": 5.333888493916361e-09,
"loss": 0.2188,
"step": 5825
},
{
"epoch": 1.99,
"learning_rate": 5.049475965096528e-09,
"loss": 0.1888,
"step": 5826
},
{
"epoch": 1.99,
"learning_rate": 4.772854525633697e-09,
"loss": 0.1977,
"step": 5827
},
{
"epoch": 1.99,
"learning_rate": 4.504024261750561e-09,
"loss": 0.2133,
"step": 5828
},
{
"epoch": 1.99,
"learning_rate": 4.242985257241205e-09,
"loss": 0.1875,
"step": 5829
},
{
"epoch": 1.99,
"learning_rate": 3.98973759347665e-09,
"loss": 0.2196,
"step": 5830
},
{
"epoch": 1.99,
"learning_rate": 3.7442813493937525e-09,
"loss": 0.2298,
"step": 5831
},
{
"epoch": 1.99,
"learning_rate": 3.5066166015035317e-09,
"loss": 0.1786,
"step": 5832
},
{
"epoch": 1.99,
"learning_rate": 3.2767434238883953e-09,
"loss": 0.1917,
"step": 5833
},
{
"epoch": 1.99,
"learning_rate": 3.054661888196586e-09,
"loss": 0.3939,
"step": 5834
},
{
"epoch": 1.99,
"learning_rate": 2.840372063658836e-09,
"loss": 0.2102,
"step": 5835
},
{
"epoch": 1.99,
"learning_rate": 2.633874017068938e-09,
"loss": 0.187,
"step": 5836
},
{
"epoch": 1.99,
"learning_rate": 2.4351678127892964e-09,
"loss": 0.2074,
"step": 5837
},
{
"epoch": 1.99,
"learning_rate": 2.2442535127648046e-09,
"loss": 0.1909,
"step": 5838
},
{
"epoch": 1.99,
"learning_rate": 2.0611311765006416e-09,
"loss": 0.1871,
"step": 5839
},
{
"epoch": 1.99,
"learning_rate": 1.885800861076148e-09,
"loss": 0.1926,
"step": 5840
},
{
"epoch": 1.99,
"learning_rate": 1.7182626211448282e-09,
"loss": 0.1993,
"step": 5841
},
{
"epoch": 1.99,
"learning_rate": 1.5585165089287979e-09,
"loss": 0.2263,
"step": 5842
},
{
"epoch": 1.99,
"learning_rate": 1.406562574224335e-09,
"loss": 0.1815,
"step": 5843
},
{
"epoch": 1.99,
"learning_rate": 1.2624008643935537e-09,
"loss": 0.2173,
"step": 5844
},
{
"epoch": 1.99,
"learning_rate": 1.126031424372731e-09,
"loss": 0.1833,
"step": 5845
},
{
"epoch": 1.99,
"learning_rate": 9.97454296669531e-10,
"loss": 0.2005,
"step": 5846
},
{
"epoch": 1.99,
"learning_rate": 8.766695213657805e-10,
"loss": 0.1769,
"step": 5847
},
{
"epoch": 2.0,
"learning_rate": 7.63677136103591e-10,
"loss": 0.1884,
"step": 5848
},
{
"epoch": 2.0,
"learning_rate": 6.584771761103392e-10,
"loss": 0.2114,
"step": 5849
},
{
"epoch": 2.0,
"learning_rate": 5.610696741736865e-10,
"loss": 0.1711,
"step": 5850
},
{
"epoch": 2.0,
"learning_rate": 4.714546606582326e-10,
"loss": 0.1924,
"step": 5851
},
{
"epoch": 2.0,
"learning_rate": 3.896321634971889e-10,
"loss": 0.1858,
"step": 5852
},
{
"epoch": 2.0,
"learning_rate": 3.156022081923782e-10,
"loss": 0.1967,
"step": 5853
},
{
"epoch": 2.0,
"learning_rate": 2.493648178253372e-10,
"loss": 0.1942,
"step": 5854
},
{
"epoch": 2.0,
"learning_rate": 1.9092001303511188e-10,
"loss": 0.1908,
"step": 5855
},
{
"epoch": 2.0,
"learning_rate": 1.4026781204601324e-10,
"loss": 0.2095,
"step": 5856
},
{
"epoch": 2.0,
"learning_rate": 9.740823064263715e-11,
"loss": 0.1726,
"step": 5857
},
{
"epoch": 2.0,
"learning_rate": 6.234128218374214e-11,
"loss": 0.1983,
"step": 5858
},
{
"epoch": 2.0,
"learning_rate": 3.5066977605024975e-11,
"loss": 0.1943,
"step": 5859
},
{
"epoch": 2.0,
"learning_rate": 1.5585325405242933e-11,
"loss": 0.1994,
"step": 5860
},
{
"epoch": 2.0,
"learning_rate": 3.896331654540397e-12,
"loss": 0.1922,
"step": 5861
},
{
"epoch": 2.0,
"learning_rate": 0.0,
"loss": 0.1528,
"step": 5862
},
{
"epoch": 2.0,
"eval_loss": 0.3155210018157959,
"eval_runtime": 77.0758,
"eval_samples_per_second": 9.886,
"eval_steps_per_second": 0.623,
"step": 5862
},
{
"epoch": 2.0,
"step": 5862,
"total_flos": 1.553226491795551e+19,
"train_loss": 0.28463143815169845,
"train_runtime": 161659.4686,
"train_samples_per_second": 2.32,
"train_steps_per_second": 0.036
}
],
"logging_steps": 1.0,
"max_steps": 5862,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 1.553226491795551e+19,
"trial_name": null,
"trial_params": null
}