6020 lines
119 KiB
JSON
6020 lines
119 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9995701848205272,
|
|
"eval_steps": 200000,
|
|
"global_step": 100000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 9.990303975370099e-06,
|
|
"loss": 1.6732,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 9.980308073689788e-06,
|
|
"loss": 1.3978,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 9.970312172009477e-06,
|
|
"loss": 1.3189,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 9.960316270329166e-06,
|
|
"loss": 1.294,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 9.950320368648854e-06,
|
|
"loss": 1.269,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.940324466968543e-06,
|
|
"loss": 1.2399,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.930328565288232e-06,
|
|
"loss": 1.2272,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.920332663607922e-06,
|
|
"loss": 1.2275,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.91033676192761e-06,
|
|
"loss": 1.204,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.9003408602473e-06,
|
|
"loss": 1.2009,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.89034495856699e-06,
|
|
"loss": 1.1841,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.880349056886678e-06,
|
|
"loss": 1.1711,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.870353155206366e-06,
|
|
"loss": 1.1601,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.860357253526055e-06,
|
|
"loss": 1.1601,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 9.850361351845744e-06,
|
|
"loss": 1.1313,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.840365450165434e-06,
|
|
"loss": 1.1443,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.830369548485121e-06,
|
|
"loss": 1.144,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.82037364680481e-06,
|
|
"loss": 1.1347,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.8103777451245e-06,
|
|
"loss": 1.1323,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.800381843444189e-06,
|
|
"loss": 1.1229,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.790385941763878e-06,
|
|
"loss": 1.1027,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.780390040083566e-06,
|
|
"loss": 1.1113,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.770394138403255e-06,
|
|
"loss": 1.1093,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.760398236722944e-06,
|
|
"loss": 1.0941,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.750402335042633e-06,
|
|
"loss": 1.0985,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.740406433362322e-06,
|
|
"loss": 1.0881,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.73041053168201e-06,
|
|
"loss": 1.106,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.7204146300017e-06,
|
|
"loss": 1.0959,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.710418728321388e-06,
|
|
"loss": 1.0974,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.700422826641078e-06,
|
|
"loss": 1.0783,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.690426924960767e-06,
|
|
"loss": 1.0669,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.680431023280456e-06,
|
|
"loss": 1.081,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.670435121600145e-06,
|
|
"loss": 1.0611,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.660439219919835e-06,
|
|
"loss": 1.0572,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.650443318239524e-06,
|
|
"loss": 1.0654,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.640447416559211e-06,
|
|
"loss": 1.0671,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.6304515148789e-06,
|
|
"loss": 1.0655,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.62045561319859e-06,
|
|
"loss": 1.0507,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.610459711518279e-06,
|
|
"loss": 1.0482,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.600463809837968e-06,
|
|
"loss": 1.0543,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.590467908157656e-06,
|
|
"loss": 1.054,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.580472006477345e-06,
|
|
"loss": 1.0551,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.570476104797034e-06,
|
|
"loss": 1.0425,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.560480203116723e-06,
|
|
"loss": 1.0417,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.550484301436413e-06,
|
|
"loss": 1.0477,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.5404883997561e-06,
|
|
"loss": 1.0298,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.53049249807579e-06,
|
|
"loss": 1.0305,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.520496596395479e-06,
|
|
"loss": 1.0279,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.510500694715168e-06,
|
|
"loss": 1.0308,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.500504793034855e-06,
|
|
"loss": 1.0328,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.490508891354544e-06,
|
|
"loss": 1.027,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.480512989674234e-06,
|
|
"loss": 1.0251,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.470517087993923e-06,
|
|
"loss": 1.0273,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.460521186313612e-06,
|
|
"loss": 1.0258,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.450525284633301e-06,
|
|
"loss": 1.0239,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.44052938295299e-06,
|
|
"loss": 1.0263,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.43053348127268e-06,
|
|
"loss": 1.009,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.420537579592367e-06,
|
|
"loss": 1.005,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.410541677912057e-06,
|
|
"loss": 1.0067,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.400545776231746e-06,
|
|
"loss": 1.0088,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.390549874551435e-06,
|
|
"loss": 1.0019,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.380553972871124e-06,
|
|
"loss": 0.9987,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.370558071190813e-06,
|
|
"loss": 1.0008,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.360562169510503e-06,
|
|
"loss": 0.9994,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.35056626783019e-06,
|
|
"loss": 0.9955,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.34057036614988e-06,
|
|
"loss": 0.9904,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.330574464469569e-06,
|
|
"loss": 0.9859,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.320578562789258e-06,
|
|
"loss": 0.9792,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.310582661108945e-06,
|
|
"loss": 0.995,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.300586759428635e-06,
|
|
"loss": 0.9833,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.290590857748324e-06,
|
|
"loss": 0.9749,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.280594956068013e-06,
|
|
"loss": 0.9891,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.270599054387702e-06,
|
|
"loss": 0.9881,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.26060315270739e-06,
|
|
"loss": 0.9832,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.250607251027079e-06,
|
|
"loss": 0.9816,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.240611349346768e-06,
|
|
"loss": 0.9743,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.230615447666457e-06,
|
|
"loss": 0.9834,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.220619545986147e-06,
|
|
"loss": 0.9774,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.210623644305836e-06,
|
|
"loss": 0.9845,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.200627742625523e-06,
|
|
"loss": 0.9652,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.190631840945213e-06,
|
|
"loss": 0.969,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.180635939264902e-06,
|
|
"loss": 0.9679,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.170640037584591e-06,
|
|
"loss": 0.9624,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.16064413590428e-06,
|
|
"loss": 0.9813,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.15064823422397e-06,
|
|
"loss": 0.9702,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.140652332543659e-06,
|
|
"loss": 0.9634,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.130656430863348e-06,
|
|
"loss": 0.9598,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.120660529183035e-06,
|
|
"loss": 0.9662,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.110664627502725e-06,
|
|
"loss": 0.9588,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.100668725822414e-06,
|
|
"loss": 0.9451,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.090672824142103e-06,
|
|
"loss": 0.967,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.080676922461792e-06,
|
|
"loss": 0.9545,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.07068102078148e-06,
|
|
"loss": 0.9568,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.060685119101169e-06,
|
|
"loss": 0.9483,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.050689217420858e-06,
|
|
"loss": 0.9444,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.040693315740547e-06,
|
|
"loss": 0.9379,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.030697414060237e-06,
|
|
"loss": 0.9445,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.020701512379924e-06,
|
|
"loss": 0.939,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.010705610699613e-06,
|
|
"loss": 0.9506,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.000709709019303e-06,
|
|
"loss": 0.941,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 8.990713807338992e-06,
|
|
"loss": 0.9444,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 8.98071790565868e-06,
|
|
"loss": 0.9526,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 8.970722003978369e-06,
|
|
"loss": 0.9294,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 8.960726102298058e-06,
|
|
"loss": 0.9332,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 8.950730200617747e-06,
|
|
"loss": 0.9357,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.940734298937436e-06,
|
|
"loss": 0.9408,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.930738397257125e-06,
|
|
"loss": 0.9301,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.920742495576815e-06,
|
|
"loss": 0.9462,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.910746593896504e-06,
|
|
"loss": 0.9295,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.900750692216193e-06,
|
|
"loss": 0.9418,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.890754790535882e-06,
|
|
"loss": 0.9315,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.88075888885557e-06,
|
|
"loss": 0.9356,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.870762987175259e-06,
|
|
"loss": 0.9216,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.860767085494948e-06,
|
|
"loss": 0.9298,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 8.850771183814638e-06,
|
|
"loss": 0.9207,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.840775282134325e-06,
|
|
"loss": 0.923,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.830779380454014e-06,
|
|
"loss": 0.91,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.820783478773703e-06,
|
|
"loss": 0.9171,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.810787577093393e-06,
|
|
"loss": 0.9179,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.800791675413082e-06,
|
|
"loss": 0.9185,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.79079577373277e-06,
|
|
"loss": 0.9071,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.780799872052459e-06,
|
|
"loss": 0.9142,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.770803970372148e-06,
|
|
"loss": 0.9079,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.760808068691837e-06,
|
|
"loss": 0.9105,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.750812167011526e-06,
|
|
"loss": 0.9027,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.740816265331214e-06,
|
|
"loss": 0.9074,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.730820363650903e-06,
|
|
"loss": 0.9064,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.720824461970592e-06,
|
|
"loss": 0.8988,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.710828560290282e-06,
|
|
"loss": 0.9,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.70083265860997e-06,
|
|
"loss": 0.888,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.69083675692966e-06,
|
|
"loss": 0.9012,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.68084085524935e-06,
|
|
"loss": 0.9036,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.670844953569038e-06,
|
|
"loss": 0.9023,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.660849051888726e-06,
|
|
"loss": 0.9017,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.650853150208415e-06,
|
|
"loss": 0.8999,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.640857248528104e-06,
|
|
"loss": 0.895,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.630861346847794e-06,
|
|
"loss": 0.9,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.620865445167483e-06,
|
|
"loss": 0.907,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.610869543487172e-06,
|
|
"loss": 0.8963,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.60087364180686e-06,
|
|
"loss": 0.8953,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.590877740126549e-06,
|
|
"loss": 0.9034,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.580881838446238e-06,
|
|
"loss": 0.8916,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.570885936765927e-06,
|
|
"loss": 0.8964,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.560890035085616e-06,
|
|
"loss": 0.8894,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.550894133405304e-06,
|
|
"loss": 0.8894,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.540898231724993e-06,
|
|
"loss": 0.8941,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.530902330044682e-06,
|
|
"loss": 0.8888,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.520906428364372e-06,
|
|
"loss": 0.8898,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.510910526684059e-06,
|
|
"loss": 0.8812,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.500914625003748e-06,
|
|
"loss": 0.8833,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.490918723323438e-06,
|
|
"loss": 0.8907,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.480922821643127e-06,
|
|
"loss": 0.8732,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.470926919962816e-06,
|
|
"loss": 0.8894,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.460931018282505e-06,
|
|
"loss": 0.8731,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.450935116602193e-06,
|
|
"loss": 0.8811,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.440939214921882e-06,
|
|
"loss": 0.8649,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.430943313241571e-06,
|
|
"loss": 0.8654,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.42094741156126e-06,
|
|
"loss": 0.8688,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.41095150988095e-06,
|
|
"loss": 0.8713,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.400955608200639e-06,
|
|
"loss": 0.8694,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.390959706520328e-06,
|
|
"loss": 0.8617,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.380963804840017e-06,
|
|
"loss": 0.8764,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.370967903159706e-06,
|
|
"loss": 0.8627,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.360972001479394e-06,
|
|
"loss": 0.8749,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.350976099799083e-06,
|
|
"loss": 0.8702,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.340980198118772e-06,
|
|
"loss": 0.868,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.330984296438462e-06,
|
|
"loss": 0.8545,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.32098839475815e-06,
|
|
"loss": 0.865,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.310992493077838e-06,
|
|
"loss": 0.864,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.300996591397528e-06,
|
|
"loss": 0.8676,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.291000689717217e-06,
|
|
"loss": 0.8636,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.281004788036906e-06,
|
|
"loss": 0.8555,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.271008886356594e-06,
|
|
"loss": 0.8548,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.261012984676283e-06,
|
|
"loss": 0.8546,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.251017082995972e-06,
|
|
"loss": 0.8606,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.241021181315661e-06,
|
|
"loss": 0.8485,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.23102527963535e-06,
|
|
"loss": 0.8421,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.221029377955038e-06,
|
|
"loss": 0.8468,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.211033476274727e-06,
|
|
"loss": 0.8493,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.201037574594416e-06,
|
|
"loss": 0.8508,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.191041672914106e-06,
|
|
"loss": 0.8441,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.181045771233795e-06,
|
|
"loss": 0.8554,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.171049869553484e-06,
|
|
"loss": 0.8458,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.161053967873173e-06,
|
|
"loss": 0.8433,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.151058066192863e-06,
|
|
"loss": 0.8494,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.141062164512552e-06,
|
|
"loss": 0.842,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.13106626283224e-06,
|
|
"loss": 0.849,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.121070361151928e-06,
|
|
"loss": 0.8382,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.111074459471618e-06,
|
|
"loss": 0.8385,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.101078557791307e-06,
|
|
"loss": 0.8397,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.091082656110996e-06,
|
|
"loss": 0.8538,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.081086754430684e-06,
|
|
"loss": 0.8392,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.071090852750373e-06,
|
|
"loss": 0.8379,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.061094951070062e-06,
|
|
"loss": 0.8409,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.051099049389751e-06,
|
|
"loss": 0.8332,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 8.04110314770944e-06,
|
|
"loss": 0.8292,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 8.031107246029128e-06,
|
|
"loss": 0.8382,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 8.021111344348817e-06,
|
|
"loss": 0.8329,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 8.011115442668507e-06,
|
|
"loss": 0.825,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 8.001119540988196e-06,
|
|
"loss": 0.8274,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 7.991123639307883e-06,
|
|
"loss": 0.8341,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 7.981127737627572e-06,
|
|
"loss": 0.8302,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 7.971131835947262e-06,
|
|
"loss": 0.8247,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 7.961135934266951e-06,
|
|
"loss": 0.8355,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 7.95114003258664e-06,
|
|
"loss": 0.8298,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.94114413090633e-06,
|
|
"loss": 0.8282,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.931148229226019e-06,
|
|
"loss": 0.8202,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.921152327545708e-06,
|
|
"loss": 0.8203,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.911156425865395e-06,
|
|
"loss": 0.8295,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.901160524185085e-06,
|
|
"loss": 0.8229,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.891164622504774e-06,
|
|
"loss": 0.818,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.881168720824463e-06,
|
|
"loss": 0.8154,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.871172819144152e-06,
|
|
"loss": 0.8166,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.861176917463841e-06,
|
|
"loss": 0.8104,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.851181015783529e-06,
|
|
"loss": 0.8178,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.841185114103218e-06,
|
|
"loss": 0.8025,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.831189212422907e-06,
|
|
"loss": 0.8099,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.821193310742597e-06,
|
|
"loss": 0.8119,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.811197409062286e-06,
|
|
"loss": 0.8049,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.801201507381973e-06,
|
|
"loss": 0.8157,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.791205605701663e-06,
|
|
"loss": 0.8031,
|
|
"step": 22100
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.781209704021352e-06,
|
|
"loss": 0.8061,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.771213802341041e-06,
|
|
"loss": 0.8175,
|
|
"step": 22300
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.76121790066073e-06,
|
|
"loss": 0.8002,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.751221998980418e-06,
|
|
"loss": 0.8115,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.741226097300107e-06,
|
|
"loss": 0.8095,
|
|
"step": 22600
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.731230195619796e-06,
|
|
"loss": 0.7981,
|
|
"step": 22700
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.721234293939485e-06,
|
|
"loss": 0.802,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.711238392259175e-06,
|
|
"loss": 0.7983,
|
|
"step": 22900
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.701242490578862e-06,
|
|
"loss": 0.7987,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.691246588898551e-06,
|
|
"loss": 0.8006,
|
|
"step": 23100
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.68125068721824e-06,
|
|
"loss": 0.8005,
|
|
"step": 23200
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.67125478553793e-06,
|
|
"loss": 0.7952,
|
|
"step": 23300
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.661258883857619e-06,
|
|
"loss": 0.7929,
|
|
"step": 23400
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.651262982177308e-06,
|
|
"loss": 0.7874,
|
|
"step": 23500
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.641267080496997e-06,
|
|
"loss": 0.7971,
|
|
"step": 23600
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.631271178816687e-06,
|
|
"loss": 0.7972,
|
|
"step": 23700
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.621275277136375e-06,
|
|
"loss": 0.7909,
|
|
"step": 23800
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.611279375456063e-06,
|
|
"loss": 0.8056,
|
|
"step": 23900
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.601283473775753e-06,
|
|
"loss": 0.7985,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.591287572095442e-06,
|
|
"loss": 0.7947,
|
|
"step": 24100
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.581291670415131e-06,
|
|
"loss": 0.7843,
|
|
"step": 24200
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.5712957687348194e-06,
|
|
"loss": 0.8008,
|
|
"step": 24300
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.561299867054508e-06,
|
|
"loss": 0.786,
|
|
"step": 24400
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.551303965374197e-06,
|
|
"loss": 0.7837,
|
|
"step": 24500
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.541308063693886e-06,
|
|
"loss": 0.7978,
|
|
"step": 24600
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.5313121620135755e-06,
|
|
"loss": 0.7869,
|
|
"step": 24700
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.521316260333264e-06,
|
|
"loss": 0.7944,
|
|
"step": 24800
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.511320358652952e-06,
|
|
"loss": 0.7884,
|
|
"step": 24900
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.5013244569726414e-06,
|
|
"loss": 0.7783,
|
|
"step": 25000
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.491328555292331e-06,
|
|
"loss": 0.7859,
|
|
"step": 25100
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.48133265361202e-06,
|
|
"loss": 0.7741,
|
|
"step": 25200
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.471336751931708e-06,
|
|
"loss": 0.7788,
|
|
"step": 25300
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.4613408502513975e-06,
|
|
"loss": 0.7745,
|
|
"step": 25400
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.451344948571087e-06,
|
|
"loss": 0.7833,
|
|
"step": 25500
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.441349046890776e-06,
|
|
"loss": 0.7831,
|
|
"step": 25600
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.431353145210465e-06,
|
|
"loss": 0.781,
|
|
"step": 25700
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.421357243530153e-06,
|
|
"loss": 0.7754,
|
|
"step": 25800
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.411361341849842e-06,
|
|
"loss": 0.7647,
|
|
"step": 25900
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.401365440169531e-06,
|
|
"loss": 0.7641,
|
|
"step": 26000
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.39136953848922e-06,
|
|
"loss": 0.7744,
|
|
"step": 26100
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.3813736368089095e-06,
|
|
"loss": 0.774,
|
|
"step": 26200
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.371377735128598e-06,
|
|
"loss": 0.7651,
|
|
"step": 26300
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.361381833448287e-06,
|
|
"loss": 0.7605,
|
|
"step": 26400
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.3513859317679755e-06,
|
|
"loss": 0.7753,
|
|
"step": 26500
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.341390030087665e-06,
|
|
"loss": 0.7682,
|
|
"step": 26600
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.331394128407353e-06,
|
|
"loss": 0.7663,
|
|
"step": 26700
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.321398226727042e-06,
|
|
"loss": 0.7652,
|
|
"step": 26800
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.3114023250467315e-06,
|
|
"loss": 0.7623,
|
|
"step": 26900
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.301406423366421e-06,
|
|
"loss": 0.768,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.29141052168611e-06,
|
|
"loss": 0.7627,
|
|
"step": 27100
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.2814146200057975e-06,
|
|
"loss": 0.7555,
|
|
"step": 27200
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.271418718325487e-06,
|
|
"loss": 0.7667,
|
|
"step": 27300
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.261422816645176e-06,
|
|
"loss": 0.7544,
|
|
"step": 27400
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.251426914964865e-06,
|
|
"loss": 0.7584,
|
|
"step": 27500
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.241431013284554e-06,
|
|
"loss": 0.764,
|
|
"step": 27600
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.231435111604243e-06,
|
|
"loss": 0.766,
|
|
"step": 27700
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.221439209923932e-06,
|
|
"loss": 0.7607,
|
|
"step": 27800
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.211443308243621e-06,
|
|
"loss": 0.748,
|
|
"step": 27900
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.20144740656331e-06,
|
|
"loss": 0.7565,
|
|
"step": 28000
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.191451504882998e-06,
|
|
"loss": 0.7553,
|
|
"step": 28100
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.181455603202687e-06,
|
|
"loss": 0.7592,
|
|
"step": 28200
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.171459701522376e-06,
|
|
"loss": 0.7463,
|
|
"step": 28300
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.1614637998420656e-06,
|
|
"loss": 0.7468,
|
|
"step": 28400
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.151467898161755e-06,
|
|
"loss": 0.7543,
|
|
"step": 28500
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.141471996481443e-06,
|
|
"loss": 0.759,
|
|
"step": 28600
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.1314760948011315e-06,
|
|
"loss": 0.7443,
|
|
"step": 28700
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.121480193120821e-06,
|
|
"loss": 0.7498,
|
|
"step": 28800
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.11148429144051e-06,
|
|
"loss": 0.7458,
|
|
"step": 28900
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.101488389760199e-06,
|
|
"loss": 0.7495,
|
|
"step": 29000
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.0914924880798875e-06,
|
|
"loss": 0.7317,
|
|
"step": 29100
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.081496586399577e-06,
|
|
"loss": 0.7429,
|
|
"step": 29200
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.071500684719266e-06,
|
|
"loss": 0.7284,
|
|
"step": 29300
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.061504783038955e-06,
|
|
"loss": 0.7388,
|
|
"step": 29400
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 7.0515088813586444e-06,
|
|
"loss": 0.7364,
|
|
"step": 29500
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 7.041512979678332e-06,
|
|
"loss": 0.7364,
|
|
"step": 29600
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 7.031517077998021e-06,
|
|
"loss": 0.7358,
|
|
"step": 29700
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 7.02152117631771e-06,
|
|
"loss": 0.7347,
|
|
"step": 29800
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 7.0115252746374e-06,
|
|
"loss": 0.7338,
|
|
"step": 29900
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 7.001529372957088e-06,
|
|
"loss": 0.7403,
|
|
"step": 30000
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.991533471276777e-06,
|
|
"loss": 0.7319,
|
|
"step": 30100
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.981537569596466e-06,
|
|
"loss": 0.7356,
|
|
"step": 30200
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.971541667916155e-06,
|
|
"loss": 0.725,
|
|
"step": 30300
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.961545766235844e-06,
|
|
"loss": 0.7262,
|
|
"step": 30400
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.951549864555532e-06,
|
|
"loss": 0.7348,
|
|
"step": 30500
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.941553962875222e-06,
|
|
"loss": 0.7276,
|
|
"step": 30600
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.931558061194911e-06,
|
|
"loss": 0.7252,
|
|
"step": 30700
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.9215621595146e-06,
|
|
"loss": 0.7324,
|
|
"step": 30800
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.911566257834289e-06,
|
|
"loss": 0.7426,
|
|
"step": 30900
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.901570356153977e-06,
|
|
"loss": 0.7332,
|
|
"step": 31000
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.891574454473666e-06,
|
|
"loss": 0.7355,
|
|
"step": 31100
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.881578552793355e-06,
|
|
"loss": 0.7296,
|
|
"step": 31200
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.8715826511130444e-06,
|
|
"loss": 0.7284,
|
|
"step": 31300
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.861586749432733e-06,
|
|
"loss": 0.7217,
|
|
"step": 31400
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.851590847752422e-06,
|
|
"loss": 0.7216,
|
|
"step": 31500
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.841594946072111e-06,
|
|
"loss": 0.7139,
|
|
"step": 31600
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.8315990443918005e-06,
|
|
"loss": 0.7227,
|
|
"step": 31700
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.82160314271149e-06,
|
|
"loss": 0.7221,
|
|
"step": 31800
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.811607241031177e-06,
|
|
"loss": 0.7082,
|
|
"step": 31900
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.8016113393508664e-06,
|
|
"loss": 0.72,
|
|
"step": 32000
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.791615437670556e-06,
|
|
"loss": 0.7134,
|
|
"step": 32100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.781619535990245e-06,
|
|
"loss": 0.719,
|
|
"step": 32200
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.771623634309934e-06,
|
|
"loss": 0.7083,
|
|
"step": 32300
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.761627732629622e-06,
|
|
"loss": 0.7148,
|
|
"step": 32400
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.751631830949311e-06,
|
|
"loss": 0.7229,
|
|
"step": 32500
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.741635929269e-06,
|
|
"loss": 0.7173,
|
|
"step": 32600
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.731640027588689e-06,
|
|
"loss": 0.7118,
|
|
"step": 32700
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.7216441259083785e-06,
|
|
"loss": 0.7072,
|
|
"step": 32800
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.711648224228067e-06,
|
|
"loss": 0.7174,
|
|
"step": 32900
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.701652322547756e-06,
|
|
"loss": 0.7125,
|
|
"step": 33000
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.691656420867445e-06,
|
|
"loss": 0.705,
|
|
"step": 33100
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.6816605191871345e-06,
|
|
"loss": 0.7023,
|
|
"step": 33200
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.671664617506822e-06,
|
|
"loss": 0.7114,
|
|
"step": 33300
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.661668715826511e-06,
|
|
"loss": 0.7038,
|
|
"step": 33400
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.6516728141462005e-06,
|
|
"loss": 0.7051,
|
|
"step": 33500
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.64167691246589e-06,
|
|
"loss": 0.6988,
|
|
"step": 33600
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.631681010785579e-06,
|
|
"loss": 0.7131,
|
|
"step": 33700
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.621685109105267e-06,
|
|
"loss": 0.6983,
|
|
"step": 33800
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.6116892074249565e-06,
|
|
"loss": 0.6978,
|
|
"step": 33900
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.601693305744645e-06,
|
|
"loss": 0.6941,
|
|
"step": 34000
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.591697404064334e-06,
|
|
"loss": 0.7028,
|
|
"step": 34100
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.581701502384023e-06,
|
|
"loss": 0.6925,
|
|
"step": 34200
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.571705600703712e-06,
|
|
"loss": 0.6993,
|
|
"step": 34300
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.561709699023401e-06,
|
|
"loss": 0.6894,
|
|
"step": 34400
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.55171379734309e-06,
|
|
"loss": 0.7054,
|
|
"step": 34500
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.541717895662779e-06,
|
|
"loss": 0.708,
|
|
"step": 34600
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.531721993982467e-06,
|
|
"loss": 0.6977,
|
|
"step": 34700
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.521726092302156e-06,
|
|
"loss": 0.6997,
|
|
"step": 34800
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.511730190621845e-06,
|
|
"loss": 0.6984,
|
|
"step": 34900
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.5017342889415345e-06,
|
|
"loss": 0.6955,
|
|
"step": 35000
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.491738387261224e-06,
|
|
"loss": 0.6885,
|
|
"step": 35100
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.481742485580912e-06,
|
|
"loss": 0.6926,
|
|
"step": 35200
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.471746583900601e-06,
|
|
"loss": 0.6927,
|
|
"step": 35300
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.4617506822202906e-06,
|
|
"loss": 0.6877,
|
|
"step": 35400
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.45175478053998e-06,
|
|
"loss": 0.6911,
|
|
"step": 35500
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.441758878859668e-06,
|
|
"loss": 0.6959,
|
|
"step": 35600
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.4317629771793565e-06,
|
|
"loss": 0.6741,
|
|
"step": 35700
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.421767075499046e-06,
|
|
"loss": 0.6853,
|
|
"step": 35800
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.411771173818735e-06,
|
|
"loss": 0.6892,
|
|
"step": 35900
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.401775272138424e-06,
|
|
"loss": 0.6864,
|
|
"step": 36000
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.3917793704581125e-06,
|
|
"loss": 0.6843,
|
|
"step": 36100
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.381783468777801e-06,
|
|
"loss": 0.6959,
|
|
"step": 36200
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.37178756709749e-06,
|
|
"loss": 0.6783,
|
|
"step": 36300
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.361791665417179e-06,
|
|
"loss": 0.6741,
|
|
"step": 36400
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 6.3517957637368686e-06,
|
|
"loss": 0.6771,
|
|
"step": 36500
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.341799862056557e-06,
|
|
"loss": 0.6699,
|
|
"step": 36600
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.331803960376246e-06,
|
|
"loss": 0.684,
|
|
"step": 36700
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.321808058695935e-06,
|
|
"loss": 0.675,
|
|
"step": 36800
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.311812157015625e-06,
|
|
"loss": 0.6857,
|
|
"step": 36900
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.301816255335314e-06,
|
|
"loss": 0.6782,
|
|
"step": 37000
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.291820353655001e-06,
|
|
"loss": 0.6692,
|
|
"step": 37100
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.2818244519746906e-06,
|
|
"loss": 0.6789,
|
|
"step": 37200
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.27182855029438e-06,
|
|
"loss": 0.6692,
|
|
"step": 37300
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.261832648614069e-06,
|
|
"loss": 0.668,
|
|
"step": 37400
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 6.251836746933758e-06,
|
|
"loss": 0.6657,
|
|
"step": 37500
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.241840845253447e-06,
|
|
"loss": 0.6699,
|
|
"step": 37600
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.231844943573136e-06,
|
|
"loss": 0.6695,
|
|
"step": 37700
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.221849041892824e-06,
|
|
"loss": 0.6613,
|
|
"step": 37800
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.211853140212513e-06,
|
|
"loss": 0.6711,
|
|
"step": 37900
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.201857238532202e-06,
|
|
"loss": 0.6647,
|
|
"step": 38000
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.191861336851891e-06,
|
|
"loss": 0.6625,
|
|
"step": 38100
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.18186543517158e-06,
|
|
"loss": 0.6593,
|
|
"step": 38200
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.1718695334912694e-06,
|
|
"loss": 0.6718,
|
|
"step": 38300
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.161873631810959e-06,
|
|
"loss": 0.6642,
|
|
"step": 38400
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 6.151877730130646e-06,
|
|
"loss": 0.6564,
|
|
"step": 38500
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.141881828450335e-06,
|
|
"loss": 0.6584,
|
|
"step": 38600
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.131885926770025e-06,
|
|
"loss": 0.6569,
|
|
"step": 38700
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.121890025089714e-06,
|
|
"loss": 0.6643,
|
|
"step": 38800
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.111894123409403e-06,
|
|
"loss": 0.6603,
|
|
"step": 38900
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.101898221729091e-06,
|
|
"loss": 0.6656,
|
|
"step": 39000
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.091902320048781e-06,
|
|
"loss": 0.664,
|
|
"step": 39100
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.08190641836847e-06,
|
|
"loss": 0.6555,
|
|
"step": 39200
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.071910516688159e-06,
|
|
"loss": 0.6534,
|
|
"step": 39300
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.061914615007847e-06,
|
|
"loss": 0.6555,
|
|
"step": 39400
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 6.051918713327536e-06,
|
|
"loss": 0.6525,
|
|
"step": 39500
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 6.041922811647225e-06,
|
|
"loss": 0.6548,
|
|
"step": 39600
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 6.031926909966914e-06,
|
|
"loss": 0.6439,
|
|
"step": 39700
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 6.0219310082866035e-06,
|
|
"loss": 0.6604,
|
|
"step": 39800
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 6.011935106606291e-06,
|
|
"loss": 0.6473,
|
|
"step": 39900
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 6.00193920492598e-06,
|
|
"loss": 0.6473,
|
|
"step": 40000
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.9919433032456694e-06,
|
|
"loss": 0.6405,
|
|
"step": 40100
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.981947401565359e-06,
|
|
"loss": 0.6438,
|
|
"step": 40200
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.971951499885048e-06,
|
|
"loss": 0.6455,
|
|
"step": 40300
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.961955598204736e-06,
|
|
"loss": 0.6465,
|
|
"step": 40400
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.9519596965244255e-06,
|
|
"loss": 0.6487,
|
|
"step": 40500
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.941963794844115e-06,
|
|
"loss": 0.6455,
|
|
"step": 40600
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.931967893163804e-06,
|
|
"loss": 0.6373,
|
|
"step": 40700
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.921971991483493e-06,
|
|
"loss": 0.6366,
|
|
"step": 40800
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.911976089803181e-06,
|
|
"loss": 0.6443,
|
|
"step": 40900
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.90198018812287e-06,
|
|
"loss": 0.6314,
|
|
"step": 41000
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.891984286442559e-06,
|
|
"loss": 0.6372,
|
|
"step": 41100
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.881988384762248e-06,
|
|
"loss": 0.6376,
|
|
"step": 41200
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.871992483081937e-06,
|
|
"loss": 0.6455,
|
|
"step": 41300
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.861996581401626e-06,
|
|
"loss": 0.6482,
|
|
"step": 41400
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.852000679721314e-06,
|
|
"loss": 0.6345,
|
|
"step": 41500
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.8420047780410035e-06,
|
|
"loss": 0.6328,
|
|
"step": 41600
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.832008876360693e-06,
|
|
"loss": 0.627,
|
|
"step": 41700
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.822012974680381e-06,
|
|
"loss": 0.6328,
|
|
"step": 41800
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.81201707300007e-06,
|
|
"loss": 0.6312,
|
|
"step": 41900
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.8020211713197595e-06,
|
|
"loss": 0.6348,
|
|
"step": 42000
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.792025269639449e-06,
|
|
"loss": 0.6356,
|
|
"step": 42100
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.782029367959138e-06,
|
|
"loss": 0.6383,
|
|
"step": 42200
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.7720334662788255e-06,
|
|
"loss": 0.6286,
|
|
"step": 42300
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.762037564598515e-06,
|
|
"loss": 0.6327,
|
|
"step": 42400
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.752041662918204e-06,
|
|
"loss": 0.6379,
|
|
"step": 42500
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.742045761237893e-06,
|
|
"loss": 0.6377,
|
|
"step": 42600
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.7320498595575815e-06,
|
|
"loss": 0.6294,
|
|
"step": 42700
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.722053957877271e-06,
|
|
"loss": 0.614,
|
|
"step": 42800
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.71205805619696e-06,
|
|
"loss": 0.6243,
|
|
"step": 42900
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.702062154516649e-06,
|
|
"loss": 0.6283,
|
|
"step": 43000
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.6920662528363375e-06,
|
|
"loss": 0.6191,
|
|
"step": 43100
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.682070351156026e-06,
|
|
"loss": 0.6263,
|
|
"step": 43200
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.672074449475715e-06,
|
|
"loss": 0.6143,
|
|
"step": 43300
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.662078547795404e-06,
|
|
"loss": 0.6165,
|
|
"step": 43400
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.6520826461150936e-06,
|
|
"loss": 0.6137,
|
|
"step": 43500
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.642086744434783e-06,
|
|
"loss": 0.6217,
|
|
"step": 43600
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.63209084275447e-06,
|
|
"loss": 0.6211,
|
|
"step": 43700
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.6220949410741595e-06,
|
|
"loss": 0.6216,
|
|
"step": 43800
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.612099039393849e-06,
|
|
"loss": 0.6126,
|
|
"step": 43900
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.602103137713538e-06,
|
|
"loss": 0.6142,
|
|
"step": 44000
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.592107236033227e-06,
|
|
"loss": 0.6099,
|
|
"step": 44100
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.5821113343529156e-06,
|
|
"loss": 0.6162,
|
|
"step": 44200
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.572115432672605e-06,
|
|
"loss": 0.6153,
|
|
"step": 44300
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.562119530992294e-06,
|
|
"loss": 0.6074,
|
|
"step": 44400
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 5.552123629311983e-06,
|
|
"loss": 0.6148,
|
|
"step": 44500
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.542127727631671e-06,
|
|
"loss": 0.6129,
|
|
"step": 44600
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.53213182595136e-06,
|
|
"loss": 0.6148,
|
|
"step": 44700
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.522135924271049e-06,
|
|
"loss": 0.6004,
|
|
"step": 44800
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.512140022590738e-06,
|
|
"loss": 0.6047,
|
|
"step": 44900
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.502144120910428e-06,
|
|
"loss": 0.6136,
|
|
"step": 45000
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.492148219230116e-06,
|
|
"loss": 0.6016,
|
|
"step": 45100
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.482152317549805e-06,
|
|
"loss": 0.6083,
|
|
"step": 45200
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.472156415869494e-06,
|
|
"loss": 0.6135,
|
|
"step": 45300
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.462160514189183e-06,
|
|
"loss": 0.6077,
|
|
"step": 45400
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 5.452164612508872e-06,
|
|
"loss": 0.6039,
|
|
"step": 45500
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.44216871082856e-06,
|
|
"loss": 0.6045,
|
|
"step": 45600
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.43217280914825e-06,
|
|
"loss": 0.6093,
|
|
"step": 45700
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.422176907467939e-06,
|
|
"loss": 0.6107,
|
|
"step": 45800
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.412181005787628e-06,
|
|
"loss": 0.6076,
|
|
"step": 45900
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.4021851041073156e-06,
|
|
"loss": 0.5965,
|
|
"step": 46000
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.392189202427005e-06,
|
|
"loss": 0.6015,
|
|
"step": 46100
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.382193300746694e-06,
|
|
"loss": 0.6067,
|
|
"step": 46200
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.372197399066383e-06,
|
|
"loss": 0.5977,
|
|
"step": 46300
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.3622014973860724e-06,
|
|
"loss": 0.5941,
|
|
"step": 46400
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 5.352205595705761e-06,
|
|
"loss": 0.6005,
|
|
"step": 46500
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.34220969402545e-06,
|
|
"loss": 0.5917,
|
|
"step": 46600
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.332213792345139e-06,
|
|
"loss": 0.5976,
|
|
"step": 46700
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.3222178906648285e-06,
|
|
"loss": 0.6022,
|
|
"step": 46800
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.312221988984517e-06,
|
|
"loss": 0.5941,
|
|
"step": 46900
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.302226087304205e-06,
|
|
"loss": 0.5894,
|
|
"step": 47000
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.2922301856238944e-06,
|
|
"loss": 0.5959,
|
|
"step": 47100
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.282234283943584e-06,
|
|
"loss": 0.6019,
|
|
"step": 47200
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.272238382263273e-06,
|
|
"loss": 0.5858,
|
|
"step": 47300
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.262242480582962e-06,
|
|
"loss": 0.5898,
|
|
"step": 47400
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 5.25224657890265e-06,
|
|
"loss": 0.5963,
|
|
"step": 47500
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.242250677222339e-06,
|
|
"loss": 0.5891,
|
|
"step": 47600
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.232254775542028e-06,
|
|
"loss": 0.5905,
|
|
"step": 47700
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.222258873861717e-06,
|
|
"loss": 0.5944,
|
|
"step": 47800
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.212262972181406e-06,
|
|
"loss": 0.585,
|
|
"step": 47900
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.202267070501095e-06,
|
|
"loss": 0.5844,
|
|
"step": 48000
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.192271168820784e-06,
|
|
"loss": 0.586,
|
|
"step": 48100
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.182275267140473e-06,
|
|
"loss": 0.5918,
|
|
"step": 48200
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.1722793654601625e-06,
|
|
"loss": 0.5756,
|
|
"step": 48300
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.16228346377985e-06,
|
|
"loss": 0.5863,
|
|
"step": 48400
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 5.152287562099539e-06,
|
|
"loss": 0.5903,
|
|
"step": 48500
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.1422916604192285e-06,
|
|
"loss": 0.5843,
|
|
"step": 48600
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.132295758738918e-06,
|
|
"loss": 0.5743,
|
|
"step": 48700
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.122299857058607e-06,
|
|
"loss": 0.5789,
|
|
"step": 48800
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.112303955378295e-06,
|
|
"loss": 0.5777,
|
|
"step": 48900
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.1023080536979845e-06,
|
|
"loss": 0.5762,
|
|
"step": 49000
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.092312152017673e-06,
|
|
"loss": 0.5751,
|
|
"step": 49100
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.082316250337362e-06,
|
|
"loss": 0.5698,
|
|
"step": 49200
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.0723203486570505e-06,
|
|
"loss": 0.5774,
|
|
"step": 49300
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.06232444697674e-06,
|
|
"loss": 0.5868,
|
|
"step": 49400
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 5.052328545296429e-06,
|
|
"loss": 0.5693,
|
|
"step": 49500
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 5.042332643616118e-06,
|
|
"loss": 0.5721,
|
|
"step": 49600
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 5.032336741935807e-06,
|
|
"loss": 0.5667,
|
|
"step": 49700
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 5.022340840255495e-06,
|
|
"loss": 0.5655,
|
|
"step": 49800
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 5.012344938575184e-06,
|
|
"loss": 0.572,
|
|
"step": 49900
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 5.002349036894873e-06,
|
|
"loss": 0.5827,
|
|
"step": 50000
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.9923531352145625e-06,
|
|
"loss": 0.5666,
|
|
"step": 50100
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.982357233534251e-06,
|
|
"loss": 0.569,
|
|
"step": 50200
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.97236133185394e-06,
|
|
"loss": 0.5689,
|
|
"step": 50300
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.962365430173629e-06,
|
|
"loss": 0.5698,
|
|
"step": 50400
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.9523695284933186e-06,
|
|
"loss": 0.5726,
|
|
"step": 50500
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.942373626813007e-06,
|
|
"loss": 0.575,
|
|
"step": 50600
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.932377725132696e-06,
|
|
"loss": 0.5653,
|
|
"step": 50700
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.922381823452385e-06,
|
|
"loss": 0.571,
|
|
"step": 50800
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.912385921772074e-06,
|
|
"loss": 0.5665,
|
|
"step": 50900
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.902390020091763e-06,
|
|
"loss": 0.5631,
|
|
"step": 51000
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.892394118411451e-06,
|
|
"loss": 0.5666,
|
|
"step": 51100
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.8823982167311406e-06,
|
|
"loss": 0.5605,
|
|
"step": 51200
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.87240231505083e-06,
|
|
"loss": 0.5664,
|
|
"step": 51300
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.862406413370518e-06,
|
|
"loss": 0.5583,
|
|
"step": 51400
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.852410511690207e-06,
|
|
"loss": 0.5577,
|
|
"step": 51500
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.842414610009897e-06,
|
|
"loss": 0.559,
|
|
"step": 51600
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.832418708329585e-06,
|
|
"loss": 0.5554,
|
|
"step": 51700
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.822422806649274e-06,
|
|
"loss": 0.5578,
|
|
"step": 51800
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.812426904968963e-06,
|
|
"loss": 0.5585,
|
|
"step": 51900
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.802431003288653e-06,
|
|
"loss": 0.5555,
|
|
"step": 52000
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.792435101608341e-06,
|
|
"loss": 0.5559,
|
|
"step": 52100
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.78243919992803e-06,
|
|
"loss": 0.5547,
|
|
"step": 52200
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.7724432982477186e-06,
|
|
"loss": 0.5612,
|
|
"step": 52300
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.762447396567408e-06,
|
|
"loss": 0.5531,
|
|
"step": 52400
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.752451494887096e-06,
|
|
"loss": 0.5511,
|
|
"step": 52500
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.742455593206785e-06,
|
|
"loss": 0.5535,
|
|
"step": 52600
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.732459691526475e-06,
|
|
"loss": 0.5539,
|
|
"step": 52700
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.722463789846163e-06,
|
|
"loss": 0.549,
|
|
"step": 52800
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.712467888165852e-06,
|
|
"loss": 0.5489,
|
|
"step": 52900
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.702471986485541e-06,
|
|
"loss": 0.5489,
|
|
"step": 53000
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.692476084805231e-06,
|
|
"loss": 0.5561,
|
|
"step": 53100
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.682480183124919e-06,
|
|
"loss": 0.5433,
|
|
"step": 53200
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.672484281444608e-06,
|
|
"loss": 0.5429,
|
|
"step": 53300
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.6624883797642974e-06,
|
|
"loss": 0.5369,
|
|
"step": 53400
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.652492478083986e-06,
|
|
"loss": 0.5521,
|
|
"step": 53500
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.642496576403675e-06,
|
|
"loss": 0.5414,
|
|
"step": 53600
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.632500674723363e-06,
|
|
"loss": 0.5415,
|
|
"step": 53700
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.622504773043053e-06,
|
|
"loss": 0.5442,
|
|
"step": 53800
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.612508871362741e-06,
|
|
"loss": 0.5392,
|
|
"step": 53900
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.60251296968243e-06,
|
|
"loss": 0.5385,
|
|
"step": 54000
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.5925170680021194e-06,
|
|
"loss": 0.5385,
|
|
"step": 54100
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.582521166321809e-06,
|
|
"loss": 0.5348,
|
|
"step": 54200
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.572525264641498e-06,
|
|
"loss": 0.5387,
|
|
"step": 54300
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.562529362961186e-06,
|
|
"loss": 0.536,
|
|
"step": 54400
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.5525334612808755e-06,
|
|
"loss": 0.5332,
|
|
"step": 54500
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.542537559600565e-06,
|
|
"loss": 0.5299,
|
|
"step": 54600
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.532541657920253e-06,
|
|
"loss": 0.5403,
|
|
"step": 54700
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.522545756239942e-06,
|
|
"loss": 0.5365,
|
|
"step": 54800
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.512549854559631e-06,
|
|
"loss": 0.534,
|
|
"step": 54900
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.50255395287932e-06,
|
|
"loss": 0.5358,
|
|
"step": 55000
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.492558051199008e-06,
|
|
"loss": 0.5323,
|
|
"step": 55100
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.4825621495186975e-06,
|
|
"loss": 0.5297,
|
|
"step": 55200
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.472566247838387e-06,
|
|
"loss": 0.5333,
|
|
"step": 55300
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.462570346158076e-06,
|
|
"loss": 0.5285,
|
|
"step": 55400
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.452574444477764e-06,
|
|
"loss": 0.5328,
|
|
"step": 55500
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.4425785427974535e-06,
|
|
"loss": 0.5395,
|
|
"step": 55600
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.432582641117143e-06,
|
|
"loss": 0.5259,
|
|
"step": 55700
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.422586739436831e-06,
|
|
"loss": 0.5231,
|
|
"step": 55800
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.41259083775652e-06,
|
|
"loss": 0.5244,
|
|
"step": 55900
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.4025949360762095e-06,
|
|
"loss": 0.5202,
|
|
"step": 56000
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.392599034395898e-06,
|
|
"loss": 0.5236,
|
|
"step": 56100
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.382603132715587e-06,
|
|
"loss": 0.5271,
|
|
"step": 56200
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.3726072310352755e-06,
|
|
"loss": 0.518,
|
|
"step": 56300
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.362611329354965e-06,
|
|
"loss": 0.5178,
|
|
"step": 56400
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.352615427674654e-06,
|
|
"loss": 0.5288,
|
|
"step": 56500
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.342619525994342e-06,
|
|
"loss": 0.5158,
|
|
"step": 56600
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.3326236243140315e-06,
|
|
"loss": 0.5155,
|
|
"step": 56700
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.322627722633721e-06,
|
|
"loss": 0.5211,
|
|
"step": 56800
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.31263182095341e-06,
|
|
"loss": 0.5161,
|
|
"step": 56900
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.302635919273098e-06,
|
|
"loss": 0.526,
|
|
"step": 57000
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.2926400175927875e-06,
|
|
"loss": 0.5179,
|
|
"step": 57100
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.282644115912476e-06,
|
|
"loss": 0.5239,
|
|
"step": 57200
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.272648214232165e-06,
|
|
"loss": 0.5117,
|
|
"step": 57300
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.262652312551854e-06,
|
|
"loss": 0.5185,
|
|
"step": 57400
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.252656410871543e-06,
|
|
"loss": 0.5149,
|
|
"step": 57500
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.242660509191232e-06,
|
|
"loss": 0.5135,
|
|
"step": 57600
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.23266460751092e-06,
|
|
"loss": 0.5147,
|
|
"step": 57700
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.2226687058306095e-06,
|
|
"loss": 0.5162,
|
|
"step": 57800
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.212672804150299e-06,
|
|
"loss": 0.5109,
|
|
"step": 57900
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.202676902469988e-06,
|
|
"loss": 0.5093,
|
|
"step": 58000
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.192681000789677e-06,
|
|
"loss": 0.5135,
|
|
"step": 58100
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.1826850991093655e-06,
|
|
"loss": 0.5058,
|
|
"step": 58200
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.172689197429055e-06,
|
|
"loss": 0.5082,
|
|
"step": 58300
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.162693295748743e-06,
|
|
"loss": 0.5122,
|
|
"step": 58400
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.152697394068432e-06,
|
|
"loss": 0.5127,
|
|
"step": 58500
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.142701492388122e-06,
|
|
"loss": 0.505,
|
|
"step": 58600
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.13270559070781e-06,
|
|
"loss": 0.5058,
|
|
"step": 58700
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.122709689027499e-06,
|
|
"loss": 0.5049,
|
|
"step": 58800
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.1127137873471875e-06,
|
|
"loss": 0.5055,
|
|
"step": 58900
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.102717885666877e-06,
|
|
"loss": 0.5061,
|
|
"step": 59000
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.092721983986566e-06,
|
|
"loss": 0.5015,
|
|
"step": 59100
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.082726082306255e-06,
|
|
"loss": 0.5052,
|
|
"step": 59200
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.0727301806259436e-06,
|
|
"loss": 0.5028,
|
|
"step": 59300
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.062734278945633e-06,
|
|
"loss": 0.504,
|
|
"step": 59400
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.052738377265322e-06,
|
|
"loss": 0.5079,
|
|
"step": 59500
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.04274247558501e-06,
|
|
"loss": 0.4977,
|
|
"step": 59600
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.0327465739047e-06,
|
|
"loss": 0.4993,
|
|
"step": 59700
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.022750672224388e-06,
|
|
"loss": 0.4933,
|
|
"step": 59800
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.012754770544077e-06,
|
|
"loss": 0.4999,
|
|
"step": 59900
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.002758868863766e-06,
|
|
"loss": 0.4964,
|
|
"step": 60000
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 3.992762967183455e-06,
|
|
"loss": 0.5002,
|
|
"step": 60100
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 3.982767065503144e-06,
|
|
"loss": 0.4923,
|
|
"step": 60200
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 3.972771163822832e-06,
|
|
"loss": 0.4876,
|
|
"step": 60300
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 3.962775262142522e-06,
|
|
"loss": 0.4963,
|
|
"step": 60400
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 3.952779360462211e-06,
|
|
"loss": 0.5016,
|
|
"step": 60500
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.9427834587819e-06,
|
|
"loss": 0.497,
|
|
"step": 60600
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.932787557101589e-06,
|
|
"loss": 0.4883,
|
|
"step": 60700
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.922791655421278e-06,
|
|
"loss": 0.4895,
|
|
"step": 60800
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.912795753740967e-06,
|
|
"loss": 0.4914,
|
|
"step": 60900
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.902799852060655e-06,
|
|
"loss": 0.4935,
|
|
"step": 61000
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.8928039503803444e-06,
|
|
"loss": 0.4904,
|
|
"step": 61100
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.882808048700033e-06,
|
|
"loss": 0.493,
|
|
"step": 61200
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.872812147019722e-06,
|
|
"loss": 0.4843,
|
|
"step": 61300
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.862816245339411e-06,
|
|
"loss": 0.4867,
|
|
"step": 61400
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 3.8528203436591e-06,
|
|
"loss": 0.4933,
|
|
"step": 61500
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.842824441978789e-06,
|
|
"loss": 0.4823,
|
|
"step": 61600
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.832828540298478e-06,
|
|
"loss": 0.4873,
|
|
"step": 61700
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.822832638618167e-06,
|
|
"loss": 0.4841,
|
|
"step": 61800
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.812836736937856e-06,
|
|
"loss": 0.4867,
|
|
"step": 61900
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.802840835257545e-06,
|
|
"loss": 0.4837,
|
|
"step": 62000
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.7928449335772337e-06,
|
|
"loss": 0.4861,
|
|
"step": 62100
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.7828490318969224e-06,
|
|
"loss": 0.4799,
|
|
"step": 62200
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.7728531302166117e-06,
|
|
"loss": 0.4803,
|
|
"step": 62300
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.7628572285363e-06,
|
|
"loss": 0.4808,
|
|
"step": 62400
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 3.7528613268559893e-06,
|
|
"loss": 0.4829,
|
|
"step": 62500
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.7428654251756785e-06,
|
|
"loss": 0.4802,
|
|
"step": 62600
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.7328695234953673e-06,
|
|
"loss": 0.4816,
|
|
"step": 62700
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.7228736218150565e-06,
|
|
"loss": 0.4847,
|
|
"step": 62800
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.712877720134745e-06,
|
|
"loss": 0.4831,
|
|
"step": 62900
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.702881818454434e-06,
|
|
"loss": 0.4816,
|
|
"step": 63000
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.692885916774123e-06,
|
|
"loss": 0.474,
|
|
"step": 63100
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.6828900150938117e-06,
|
|
"loss": 0.479,
|
|
"step": 63200
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.672894113413501e-06,
|
|
"loss": 0.4773,
|
|
"step": 63300
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.6628982117331897e-06,
|
|
"loss": 0.4765,
|
|
"step": 63400
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 3.652902310052879e-06,
|
|
"loss": 0.4813,
|
|
"step": 63500
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.6429064083725673e-06,
|
|
"loss": 0.4786,
|
|
"step": 63600
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.6329105066922565e-06,
|
|
"loss": 0.4815,
|
|
"step": 63700
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.6229146050119453e-06,
|
|
"loss": 0.4771,
|
|
"step": 63800
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.6129187033316345e-06,
|
|
"loss": 0.4769,
|
|
"step": 63900
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.6029228016513233e-06,
|
|
"loss": 0.4739,
|
|
"step": 64000
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.592926899971012e-06,
|
|
"loss": 0.4776,
|
|
"step": 64100
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.5829309982907013e-06,
|
|
"loss": 0.4695,
|
|
"step": 64200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.5729350966103897e-06,
|
|
"loss": 0.4606,
|
|
"step": 64300
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.562939194930079e-06,
|
|
"loss": 0.4716,
|
|
"step": 64400
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 3.5529432932497677e-06,
|
|
"loss": 0.4677,
|
|
"step": 64500
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.542947391569457e-06,
|
|
"loss": 0.4712,
|
|
"step": 64600
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.532951489889146e-06,
|
|
"loss": 0.4682,
|
|
"step": 64700
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.5229555882088345e-06,
|
|
"loss": 0.4672,
|
|
"step": 64800
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.5129596865285237e-06,
|
|
"loss": 0.4691,
|
|
"step": 64900
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.502963784848212e-06,
|
|
"loss": 0.4656,
|
|
"step": 65000
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.4929678831679013e-06,
|
|
"loss": 0.4619,
|
|
"step": 65100
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.4829719814875905e-06,
|
|
"loss": 0.4595,
|
|
"step": 65200
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.4729760798072793e-06,
|
|
"loss": 0.4688,
|
|
"step": 65300
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.4629801781269686e-06,
|
|
"loss": 0.4658,
|
|
"step": 65400
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 3.452984276446657e-06,
|
|
"loss": 0.4598,
|
|
"step": 65500
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.442988374766346e-06,
|
|
"loss": 0.4623,
|
|
"step": 65600
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.432992473086035e-06,
|
|
"loss": 0.4558,
|
|
"step": 65700
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.4229965714057237e-06,
|
|
"loss": 0.4675,
|
|
"step": 65800
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.413000669725413e-06,
|
|
"loss": 0.4548,
|
|
"step": 65900
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.4030047680451018e-06,
|
|
"loss": 0.4635,
|
|
"step": 66000
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.393008866364791e-06,
|
|
"loss": 0.4551,
|
|
"step": 66100
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.3830129646844793e-06,
|
|
"loss": 0.4619,
|
|
"step": 66200
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.3730170630041686e-06,
|
|
"loss": 0.4581,
|
|
"step": 66300
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.3630211613238574e-06,
|
|
"loss": 0.4606,
|
|
"step": 66400
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 3.3530252596435466e-06,
|
|
"loss": 0.4521,
|
|
"step": 66500
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.3430293579632354e-06,
|
|
"loss": 0.4602,
|
|
"step": 66600
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.333033456282924e-06,
|
|
"loss": 0.4598,
|
|
"step": 66700
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.3230375546026134e-06,
|
|
"loss": 0.4567,
|
|
"step": 66800
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.3130416529223018e-06,
|
|
"loss": 0.4515,
|
|
"step": 66900
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.303045751241991e-06,
|
|
"loss": 0.449,
|
|
"step": 67000
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.2930498495616798e-06,
|
|
"loss": 0.4539,
|
|
"step": 67100
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.283053947881369e-06,
|
|
"loss": 0.4501,
|
|
"step": 67200
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.2730580462010582e-06,
|
|
"loss": 0.4509,
|
|
"step": 67300
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.2630621445207466e-06,
|
|
"loss": 0.4487,
|
|
"step": 67400
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 3.253066242840436e-06,
|
|
"loss": 0.4492,
|
|
"step": 67500
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.2430703411601246e-06,
|
|
"loss": 0.4477,
|
|
"step": 67600
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.2330744394798134e-06,
|
|
"loss": 0.4444,
|
|
"step": 67700
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.223078537799502e-06,
|
|
"loss": 0.4431,
|
|
"step": 67800
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.2130826361191914e-06,
|
|
"loss": 0.4444,
|
|
"step": 67900
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.2030867344388806e-06,
|
|
"loss": 0.4446,
|
|
"step": 68000
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.193090832758569e-06,
|
|
"loss": 0.4443,
|
|
"step": 68100
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.1830949310782582e-06,
|
|
"loss": 0.4503,
|
|
"step": 68200
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.173099029397947e-06,
|
|
"loss": 0.4501,
|
|
"step": 68300
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.1631031277176362e-06,
|
|
"loss": 0.4432,
|
|
"step": 68400
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 3.153107226037325e-06,
|
|
"loss": 0.4398,
|
|
"step": 68500
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.143111324357014e-06,
|
|
"loss": 0.446,
|
|
"step": 68600
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.133115422676703e-06,
|
|
"loss": 0.4467,
|
|
"step": 68700
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.1231195209963914e-06,
|
|
"loss": 0.4381,
|
|
"step": 68800
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.1131236193160806e-06,
|
|
"loss": 0.4365,
|
|
"step": 68900
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.1031277176357694e-06,
|
|
"loss": 0.4557,
|
|
"step": 69000
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.0931318159554586e-06,
|
|
"loss": 0.4466,
|
|
"step": 69100
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.083135914275148e-06,
|
|
"loss": 0.4401,
|
|
"step": 69200
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.0731400125948362e-06,
|
|
"loss": 0.4382,
|
|
"step": 69300
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.0631441109145255e-06,
|
|
"loss": 0.4375,
|
|
"step": 69400
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 3.0531482092342143e-06,
|
|
"loss": 0.4404,
|
|
"step": 69500
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 3.043152307553903e-06,
|
|
"loss": 0.434,
|
|
"step": 69600
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 3.033156405873592e-06,
|
|
"loss": 0.4341,
|
|
"step": 69700
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 3.023160504193281e-06,
|
|
"loss": 0.4348,
|
|
"step": 69800
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 3.0131646025129703e-06,
|
|
"loss": 0.4451,
|
|
"step": 69900
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 3.0031687008326587e-06,
|
|
"loss": 0.44,
|
|
"step": 70000
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 2.993172799152348e-06,
|
|
"loss": 0.439,
|
|
"step": 70100
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 2.9831768974720367e-06,
|
|
"loss": 0.4355,
|
|
"step": 70200
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 2.973180995791726e-06,
|
|
"loss": 0.4304,
|
|
"step": 70300
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 2.9631850941114143e-06,
|
|
"loss": 0.4335,
|
|
"step": 70400
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 2.9531891924311035e-06,
|
|
"loss": 0.4355,
|
|
"step": 70500
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.9431932907507927e-06,
|
|
"loss": 0.4289,
|
|
"step": 70600
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.933197389070481e-06,
|
|
"loss": 0.426,
|
|
"step": 70700
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.9232014873901703e-06,
|
|
"loss": 0.4282,
|
|
"step": 70800
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.913205585709859e-06,
|
|
"loss": 0.4296,
|
|
"step": 70900
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.9032096840295483e-06,
|
|
"loss": 0.4262,
|
|
"step": 71000
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.8932137823492367e-06,
|
|
"loss": 0.4289,
|
|
"step": 71100
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.883217880668926e-06,
|
|
"loss": 0.4217,
|
|
"step": 71200
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.873221978988615e-06,
|
|
"loss": 0.4215,
|
|
"step": 71300
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.863226077308304e-06,
|
|
"loss": 0.4342,
|
|
"step": 71400
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 2.8532301756279927e-06,
|
|
"loss": 0.4274,
|
|
"step": 71500
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.8432342739476815e-06,
|
|
"loss": 0.422,
|
|
"step": 71600
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.8332383722673707e-06,
|
|
"loss": 0.4217,
|
|
"step": 71700
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.82324247058706e-06,
|
|
"loss": 0.425,
|
|
"step": 71800
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.8132465689067483e-06,
|
|
"loss": 0.4239,
|
|
"step": 71900
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.8032506672264375e-06,
|
|
"loss": 0.4246,
|
|
"step": 72000
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.7932547655461263e-06,
|
|
"loss": 0.4259,
|
|
"step": 72100
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.7832588638658155e-06,
|
|
"loss": 0.422,
|
|
"step": 72200
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.773262962185504e-06,
|
|
"loss": 0.4205,
|
|
"step": 72300
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.763267060505193e-06,
|
|
"loss": 0.4225,
|
|
"step": 72400
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 2.7532711588248823e-06,
|
|
"loss": 0.4144,
|
|
"step": 72500
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.7432752571445707e-06,
|
|
"loss": 0.4194,
|
|
"step": 72600
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.73327935546426e-06,
|
|
"loss": 0.4251,
|
|
"step": 72700
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.7232834537839487e-06,
|
|
"loss": 0.4178,
|
|
"step": 72800
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.713287552103638e-06,
|
|
"loss": 0.4189,
|
|
"step": 72900
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.7032916504233263e-06,
|
|
"loss": 0.4227,
|
|
"step": 73000
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.6932957487430155e-06,
|
|
"loss": 0.4183,
|
|
"step": 73100
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.6832998470627048e-06,
|
|
"loss": 0.4157,
|
|
"step": 73200
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.6733039453823936e-06,
|
|
"loss": 0.421,
|
|
"step": 73300
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.6633080437020824e-06,
|
|
"loss": 0.4197,
|
|
"step": 73400
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 2.653312142021771e-06,
|
|
"loss": 0.4119,
|
|
"step": 73500
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.6433162403414604e-06,
|
|
"loss": 0.4087,
|
|
"step": 73600
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.6333203386611487e-06,
|
|
"loss": 0.4137,
|
|
"step": 73700
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.623324436980838e-06,
|
|
"loss": 0.4124,
|
|
"step": 73800
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.613328535300527e-06,
|
|
"loss": 0.4092,
|
|
"step": 73900
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.603332633620216e-06,
|
|
"loss": 0.4101,
|
|
"step": 74000
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.593336731939905e-06,
|
|
"loss": 0.4115,
|
|
"step": 74100
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.5833408302595936e-06,
|
|
"loss": 0.4104,
|
|
"step": 74200
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.5733449285792828e-06,
|
|
"loss": 0.409,
|
|
"step": 74300
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.563349026898971e-06,
|
|
"loss": 0.415,
|
|
"step": 74400
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 2.5533531252186604e-06,
|
|
"loss": 0.4013,
|
|
"step": 74500
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.5433572235383496e-06,
|
|
"loss": 0.4008,
|
|
"step": 74600
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.5333613218580384e-06,
|
|
"loss": 0.4131,
|
|
"step": 74700
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.5233654201777276e-06,
|
|
"loss": 0.4123,
|
|
"step": 74800
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.513369518497416e-06,
|
|
"loss": 0.4132,
|
|
"step": 74900
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.503373616817105e-06,
|
|
"loss": 0.4111,
|
|
"step": 75000
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.493377715136794e-06,
|
|
"loss": 0.4043,
|
|
"step": 75100
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.483381813456483e-06,
|
|
"loss": 0.4106,
|
|
"step": 75200
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.473385911776172e-06,
|
|
"loss": 0.4051,
|
|
"step": 75300
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.463390010095861e-06,
|
|
"loss": 0.4004,
|
|
"step": 75400
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.45339410841555e-06,
|
|
"loss": 0.4012,
|
|
"step": 75500
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.443398206735239e-06,
|
|
"loss": 0.4039,
|
|
"step": 75600
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.4334023050549276e-06,
|
|
"loss": 0.403,
|
|
"step": 75700
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.4234064033746164e-06,
|
|
"loss": 0.4111,
|
|
"step": 75800
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.4134105016943056e-06,
|
|
"loss": 0.3985,
|
|
"step": 75900
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.4034146000139944e-06,
|
|
"loss": 0.4118,
|
|
"step": 76000
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.3934186983336836e-06,
|
|
"loss": 0.397,
|
|
"step": 76100
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.3834227966533724e-06,
|
|
"loss": 0.3907,
|
|
"step": 76200
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.3734268949730612e-06,
|
|
"loss": 0.3998,
|
|
"step": 76300
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.36343099329275e-06,
|
|
"loss": 0.3988,
|
|
"step": 76400
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.3534350916124392e-06,
|
|
"loss": 0.3976,
|
|
"step": 76500
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.343439189932128e-06,
|
|
"loss": 0.402,
|
|
"step": 76600
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.333443288251817e-06,
|
|
"loss": 0.3991,
|
|
"step": 76700
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.323447386571506e-06,
|
|
"loss": 0.402,
|
|
"step": 76800
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.313451484891195e-06,
|
|
"loss": 0.3953,
|
|
"step": 76900
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.3034555832108836e-06,
|
|
"loss": 0.3934,
|
|
"step": 77000
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.2934596815305724e-06,
|
|
"loss": 0.3893,
|
|
"step": 77100
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.2834637798502617e-06,
|
|
"loss": 0.4021,
|
|
"step": 77200
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.2734678781699505e-06,
|
|
"loss": 0.3921,
|
|
"step": 77300
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.2634719764896397e-06,
|
|
"loss": 0.3945,
|
|
"step": 77400
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.2534760748093285e-06,
|
|
"loss": 0.3959,
|
|
"step": 77500
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2434801731290173e-06,
|
|
"loss": 0.3946,
|
|
"step": 77600
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.233484271448706e-06,
|
|
"loss": 0.3946,
|
|
"step": 77700
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2234883697683953e-06,
|
|
"loss": 0.386,
|
|
"step": 77800
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.213492468088084e-06,
|
|
"loss": 0.3997,
|
|
"step": 77900
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.203496566407773e-06,
|
|
"loss": 0.3903,
|
|
"step": 78000
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.193500664727462e-06,
|
|
"loss": 0.3926,
|
|
"step": 78100
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.183504763047151e-06,
|
|
"loss": 0.3835,
|
|
"step": 78200
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.1735088613668397e-06,
|
|
"loss": 0.3882,
|
|
"step": 78300
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.163512959686529e-06,
|
|
"loss": 0.3914,
|
|
"step": 78400
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.1535170580062177e-06,
|
|
"loss": 0.3881,
|
|
"step": 78500
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1435211563259065e-06,
|
|
"loss": 0.3884,
|
|
"step": 78600
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1335252546455953e-06,
|
|
"loss": 0.3876,
|
|
"step": 78700
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1235293529652845e-06,
|
|
"loss": 0.3857,
|
|
"step": 78800
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1135334512849733e-06,
|
|
"loss": 0.3794,
|
|
"step": 78900
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.103537549604662e-06,
|
|
"loss": 0.3873,
|
|
"step": 79000
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.0935416479243513e-06,
|
|
"loss": 0.3831,
|
|
"step": 79100
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.08354574624404e-06,
|
|
"loss": 0.3805,
|
|
"step": 79200
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.073549844563729e-06,
|
|
"loss": 0.3866,
|
|
"step": 79300
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.063553942883418e-06,
|
|
"loss": 0.3887,
|
|
"step": 79400
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.053558041203107e-06,
|
|
"loss": 0.3812,
|
|
"step": 79500
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0435621395227957e-06,
|
|
"loss": 0.3807,
|
|
"step": 79600
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.033566237842485e-06,
|
|
"loss": 0.3764,
|
|
"step": 79700
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0235703361621737e-06,
|
|
"loss": 0.3812,
|
|
"step": 79800
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0135744344818625e-06,
|
|
"loss": 0.3721,
|
|
"step": 79900
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0035785328015513e-06,
|
|
"loss": 0.3796,
|
|
"step": 80000
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9935826311212405e-06,
|
|
"loss": 0.3769,
|
|
"step": 80100
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9835867294409293e-06,
|
|
"loss": 0.3933,
|
|
"step": 80200
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.973590827760618e-06,
|
|
"loss": 0.3763,
|
|
"step": 80300
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9635949260803074e-06,
|
|
"loss": 0.3776,
|
|
"step": 80400
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.953599024399996e-06,
|
|
"loss": 0.3797,
|
|
"step": 80500
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.943603122719685e-06,
|
|
"loss": 0.381,
|
|
"step": 80600
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.933607221039374e-06,
|
|
"loss": 0.38,
|
|
"step": 80700
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.923611319359063e-06,
|
|
"loss": 0.3772,
|
|
"step": 80800
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.9136154176787518e-06,
|
|
"loss": 0.3724,
|
|
"step": 80900
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.9036195159984408e-06,
|
|
"loss": 0.3808,
|
|
"step": 81000
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8936236143181298e-06,
|
|
"loss": 0.3799,
|
|
"step": 81100
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8836277126378186e-06,
|
|
"loss": 0.3779,
|
|
"step": 81200
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8736318109575074e-06,
|
|
"loss": 0.3841,
|
|
"step": 81300
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8636359092771966e-06,
|
|
"loss": 0.3795,
|
|
"step": 81400
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8536400075968856e-06,
|
|
"loss": 0.3698,
|
|
"step": 81500
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.8436441059165744e-06,
|
|
"loss": 0.3682,
|
|
"step": 81600
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.8336482042362632e-06,
|
|
"loss": 0.3751,
|
|
"step": 81700
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.8236523025559522e-06,
|
|
"loss": 0.3708,
|
|
"step": 81800
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.813656400875641e-06,
|
|
"loss": 0.3635,
|
|
"step": 81900
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.80366049919533e-06,
|
|
"loss": 0.3749,
|
|
"step": 82000
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.793664597515019e-06,
|
|
"loss": 0.36,
|
|
"step": 82100
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.783668695834708e-06,
|
|
"loss": 0.3696,
|
|
"step": 82200
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7736727941543968e-06,
|
|
"loss": 0.3669,
|
|
"step": 82300
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7636768924740858e-06,
|
|
"loss": 0.3714,
|
|
"step": 82400
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.7536809907937746e-06,
|
|
"loss": 0.3643,
|
|
"step": 82500
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7436850891134636e-06,
|
|
"loss": 0.3672,
|
|
"step": 82600
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7336891874331526e-06,
|
|
"loss": 0.3738,
|
|
"step": 82700
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7236932857528416e-06,
|
|
"loss": 0.3739,
|
|
"step": 82800
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7136973840725304e-06,
|
|
"loss": 0.3593,
|
|
"step": 82900
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.7037014823922194e-06,
|
|
"loss": 0.3703,
|
|
"step": 83000
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.6937055807119082e-06,
|
|
"loss": 0.3653,
|
|
"step": 83100
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.683709679031597e-06,
|
|
"loss": 0.3647,
|
|
"step": 83200
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.673713777351286e-06,
|
|
"loss": 0.3707,
|
|
"step": 83300
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.6637178756709752e-06,
|
|
"loss": 0.3667,
|
|
"step": 83400
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.653721973990664e-06,
|
|
"loss": 0.3677,
|
|
"step": 83500
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.6437260723103528e-06,
|
|
"loss": 0.3628,
|
|
"step": 83600
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.6337301706300418e-06,
|
|
"loss": 0.3608,
|
|
"step": 83700
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.6237342689497306e-06,
|
|
"loss": 0.363,
|
|
"step": 83800
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.6137383672694196e-06,
|
|
"loss": 0.3646,
|
|
"step": 83900
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.6037424655891086e-06,
|
|
"loss": 0.3572,
|
|
"step": 84000
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.5937465639087977e-06,
|
|
"loss": 0.3531,
|
|
"step": 84100
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.5837506622284864e-06,
|
|
"loss": 0.3645,
|
|
"step": 84200
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.5737547605481755e-06,
|
|
"loss": 0.3635,
|
|
"step": 84300
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.5637588588678642e-06,
|
|
"loss": 0.3623,
|
|
"step": 84400
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.5537629571875533e-06,
|
|
"loss": 0.3583,
|
|
"step": 84500
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.543767055507242e-06,
|
|
"loss": 0.362,
|
|
"step": 84600
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.5337711538269313e-06,
|
|
"loss": 0.3616,
|
|
"step": 84700
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.52377525214662e-06,
|
|
"loss": 0.3517,
|
|
"step": 84800
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.513779350466309e-06,
|
|
"loss": 0.3615,
|
|
"step": 84900
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.5037834487859979e-06,
|
|
"loss": 0.3553,
|
|
"step": 85000
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.4937875471056867e-06,
|
|
"loss": 0.3582,
|
|
"step": 85100
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.4837916454253757e-06,
|
|
"loss": 0.3581,
|
|
"step": 85200
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.4737957437450645e-06,
|
|
"loss": 0.3648,
|
|
"step": 85300
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.4637998420647537e-06,
|
|
"loss": 0.3561,
|
|
"step": 85400
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.4538039403844425e-06,
|
|
"loss": 0.3541,
|
|
"step": 85500
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.4438080387041315e-06,
|
|
"loss": 0.3578,
|
|
"step": 85600
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.4338121370238203e-06,
|
|
"loss": 0.3656,
|
|
"step": 85700
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.4238162353435093e-06,
|
|
"loss": 0.3484,
|
|
"step": 85800
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.413820333663198e-06,
|
|
"loss": 0.3539,
|
|
"step": 85900
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.4038244319828873e-06,
|
|
"loss": 0.3559,
|
|
"step": 86000
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.393828530302576e-06,
|
|
"loss": 0.3499,
|
|
"step": 86100
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.3838326286222651e-06,
|
|
"loss": 0.3553,
|
|
"step": 86200
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.373836726941954e-06,
|
|
"loss": 0.3655,
|
|
"step": 86300
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.3638408252616427e-06,
|
|
"loss": 0.3574,
|
|
"step": 86400
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.3538449235813317e-06,
|
|
"loss": 0.3513,
|
|
"step": 86500
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.3438490219010205e-06,
|
|
"loss": 0.3615,
|
|
"step": 86600
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.3338531202207097e-06,
|
|
"loss": 0.3558,
|
|
"step": 86700
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.3238572185403985e-06,
|
|
"loss": 0.3517,
|
|
"step": 86800
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.3138613168600875e-06,
|
|
"loss": 0.3538,
|
|
"step": 86900
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.3038654151797763e-06,
|
|
"loss": 0.3583,
|
|
"step": 87000
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.2938695134994653e-06,
|
|
"loss": 0.3598,
|
|
"step": 87100
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.2838736118191541e-06,
|
|
"loss": 0.3459,
|
|
"step": 87200
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.2738777101388433e-06,
|
|
"loss": 0.3526,
|
|
"step": 87300
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.2638818084585321e-06,
|
|
"loss": 0.3477,
|
|
"step": 87400
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 1.2538859067782211e-06,
|
|
"loss": 0.343,
|
|
"step": 87500
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.24389000509791e-06,
|
|
"loss": 0.3425,
|
|
"step": 87600
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.233894103417599e-06,
|
|
"loss": 0.3503,
|
|
"step": 87700
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.223898201737288e-06,
|
|
"loss": 0.3471,
|
|
"step": 87800
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.2139023000569767e-06,
|
|
"loss": 0.3512,
|
|
"step": 87900
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.2039063983766655e-06,
|
|
"loss": 0.347,
|
|
"step": 88000
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.1939104966963545e-06,
|
|
"loss": 0.3472,
|
|
"step": 88100
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.1839145950160436e-06,
|
|
"loss": 0.3523,
|
|
"step": 88200
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.1739186933357324e-06,
|
|
"loss": 0.3402,
|
|
"step": 88300
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.1639227916554214e-06,
|
|
"loss": 0.3494,
|
|
"step": 88400
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.1539268899751104e-06,
|
|
"loss": 0.3389,
|
|
"step": 88500
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.1439309882947992e-06,
|
|
"loss": 0.3484,
|
|
"step": 88600
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.1339350866144882e-06,
|
|
"loss": 0.3446,
|
|
"step": 88700
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.1239391849341772e-06,
|
|
"loss": 0.348,
|
|
"step": 88800
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.113943283253866e-06,
|
|
"loss": 0.3432,
|
|
"step": 88900
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.103947381573555e-06,
|
|
"loss": 0.3412,
|
|
"step": 89000
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.0939514798932438e-06,
|
|
"loss": 0.3523,
|
|
"step": 89100
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.0839555782129328e-06,
|
|
"loss": 0.3461,
|
|
"step": 89200
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.0739596765326218e-06,
|
|
"loss": 0.339,
|
|
"step": 89300
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.0639637748523106e-06,
|
|
"loss": 0.3418,
|
|
"step": 89400
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.0539678731719996e-06,
|
|
"loss": 0.3502,
|
|
"step": 89500
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.0439719714916886e-06,
|
|
"loss": 0.3451,
|
|
"step": 89600
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.0339760698113774e-06,
|
|
"loss": 0.3388,
|
|
"step": 89700
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.0239801681310664e-06,
|
|
"loss": 0.3398,
|
|
"step": 89800
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.0139842664507552e-06,
|
|
"loss": 0.3428,
|
|
"step": 89900
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 1.0039883647704442e-06,
|
|
"loss": 0.3447,
|
|
"step": 90000
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 9.93992463090133e-07,
|
|
"loss": 0.347,
|
|
"step": 90100
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 9.83996561409822e-07,
|
|
"loss": 0.3359,
|
|
"step": 90200
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 9.74000659729511e-07,
|
|
"loss": 0.3342,
|
|
"step": 90300
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 9.640047580491998e-07,
|
|
"loss": 0.3371,
|
|
"step": 90400
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 9.540088563688888e-07,
|
|
"loss": 0.3436,
|
|
"step": 90500
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 9.440129546885777e-07,
|
|
"loss": 0.3484,
|
|
"step": 90600
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 9.340170530082666e-07,
|
|
"loss": 0.3445,
|
|
"step": 90700
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 9.240211513279556e-07,
|
|
"loss": 0.3412,
|
|
"step": 90800
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 9.140252496476445e-07,
|
|
"loss": 0.3401,
|
|
"step": 90900
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 9.040293479673334e-07,
|
|
"loss": 0.3436,
|
|
"step": 91000
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 8.940334462870224e-07,
|
|
"loss": 0.3415,
|
|
"step": 91100
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 8.840375446067113e-07,
|
|
"loss": 0.3397,
|
|
"step": 91200
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 8.740416429264002e-07,
|
|
"loss": 0.3355,
|
|
"step": 91300
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 8.640457412460891e-07,
|
|
"loss": 0.3378,
|
|
"step": 91400
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 8.540498395657781e-07,
|
|
"loss": 0.3424,
|
|
"step": 91500
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 8.44053937885467e-07,
|
|
"loss": 0.3361,
|
|
"step": 91600
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 8.34058036205156e-07,
|
|
"loss": 0.333,
|
|
"step": 91700
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 8.24062134524845e-07,
|
|
"loss": 0.3361,
|
|
"step": 91800
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 8.140662328445339e-07,
|
|
"loss": 0.3312,
|
|
"step": 91900
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 8.040703311642227e-07,
|
|
"loss": 0.3375,
|
|
"step": 92000
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 7.940744294839118e-07,
|
|
"loss": 0.3342,
|
|
"step": 92100
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 7.840785278036006e-07,
|
|
"loss": 0.3373,
|
|
"step": 92200
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 7.740826261232895e-07,
|
|
"loss": 0.3317,
|
|
"step": 92300
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 7.640867244429784e-07,
|
|
"loss": 0.3309,
|
|
"step": 92400
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 7.540908227626674e-07,
|
|
"loss": 0.3331,
|
|
"step": 92500
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 7.440949210823563e-07,
|
|
"loss": 0.3329,
|
|
"step": 92600
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 7.340990194020452e-07,
|
|
"loss": 0.3318,
|
|
"step": 92700
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 7.241031177217342e-07,
|
|
"loss": 0.3377,
|
|
"step": 92800
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 7.141072160414231e-07,
|
|
"loss": 0.3346,
|
|
"step": 92900
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 7.04111314361112e-07,
|
|
"loss": 0.3381,
|
|
"step": 93000
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 6.94115412680801e-07,
|
|
"loss": 0.333,
|
|
"step": 93100
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 6.841195110004899e-07,
|
|
"loss": 0.3322,
|
|
"step": 93200
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 6.741236093201788e-07,
|
|
"loss": 0.3288,
|
|
"step": 93300
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 6.641277076398677e-07,
|
|
"loss": 0.3337,
|
|
"step": 93400
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 6.541318059595567e-07,
|
|
"loss": 0.3327,
|
|
"step": 93500
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 6.441359042792456e-07,
|
|
"loss": 0.3358,
|
|
"step": 93600
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 6.341400025989344e-07,
|
|
"loss": 0.3337,
|
|
"step": 93700
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 6.241441009186234e-07,
|
|
"loss": 0.3312,
|
|
"step": 93800
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 6.141481992383123e-07,
|
|
"loss": 0.3299,
|
|
"step": 93900
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 6.041522975580013e-07,
|
|
"loss": 0.3324,
|
|
"step": 94000
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 5.941563958776902e-07,
|
|
"loss": 0.3343,
|
|
"step": 94100
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 5.841604941973791e-07,
|
|
"loss": 0.3296,
|
|
"step": 94200
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 5.74164592517068e-07,
|
|
"loss": 0.3346,
|
|
"step": 94300
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 5.64168690836757e-07,
|
|
"loss": 0.3376,
|
|
"step": 94400
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 5.541727891564459e-07,
|
|
"loss": 0.3314,
|
|
"step": 94500
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 5.441768874761348e-07,
|
|
"loss": 0.3316,
|
|
"step": 94600
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 5.341809857958237e-07,
|
|
"loss": 0.3231,
|
|
"step": 94700
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 5.241850841155126e-07,
|
|
"loss": 0.3315,
|
|
"step": 94800
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 5.141891824352016e-07,
|
|
"loss": 0.3288,
|
|
"step": 94900
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 5.041932807548905e-07,
|
|
"loss": 0.3297,
|
|
"step": 95000
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.941973790745794e-07,
|
|
"loss": 0.3308,
|
|
"step": 95100
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.842014773942684e-07,
|
|
"loss": 0.326,
|
|
"step": 95200
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.742055757139573e-07,
|
|
"loss": 0.3281,
|
|
"step": 95300
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.6420967403364625e-07,
|
|
"loss": 0.3285,
|
|
"step": 95400
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.542137723533352e-07,
|
|
"loss": 0.3269,
|
|
"step": 95500
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 4.442178706730241e-07,
|
|
"loss": 0.3311,
|
|
"step": 95600
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 4.34221968992713e-07,
|
|
"loss": 0.3251,
|
|
"step": 95700
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 4.2422606731240196e-07,
|
|
"loss": 0.3262,
|
|
"step": 95800
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 4.1423016563209086e-07,
|
|
"loss": 0.3305,
|
|
"step": 95900
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 4.042342639517798e-07,
|
|
"loss": 0.3242,
|
|
"step": 96000
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 3.942383622714687e-07,
|
|
"loss": 0.3282,
|
|
"step": 96100
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 3.8424246059115767e-07,
|
|
"loss": 0.3285,
|
|
"step": 96200
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 3.742465589108466e-07,
|
|
"loss": 0.3266,
|
|
"step": 96300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 3.642506572305355e-07,
|
|
"loss": 0.3262,
|
|
"step": 96400
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 3.542547555502245e-07,
|
|
"loss": 0.3276,
|
|
"step": 96500
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 3.4425885386991333e-07,
|
|
"loss": 0.3281,
|
|
"step": 96600
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 3.342629521896023e-07,
|
|
"loss": 0.3238,
|
|
"step": 96700
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 3.2426705050929124e-07,
|
|
"loss": 0.3222,
|
|
"step": 96800
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 3.1427114882898014e-07,
|
|
"loss": 0.3299,
|
|
"step": 96900
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 3.042752471486691e-07,
|
|
"loss": 0.3224,
|
|
"step": 97000
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 2.94279345468358e-07,
|
|
"loss": 0.329,
|
|
"step": 97100
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 2.8428344378804695e-07,
|
|
"loss": 0.3213,
|
|
"step": 97200
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 2.7428754210773585e-07,
|
|
"loss": 0.3283,
|
|
"step": 97300
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 2.6429164042742475e-07,
|
|
"loss": 0.3227,
|
|
"step": 97400
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 2.542957387471137e-07,
|
|
"loss": 0.3243,
|
|
"step": 97500
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 2.4429983706680266e-07,
|
|
"loss": 0.3229,
|
|
"step": 97600
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 2.3430393538649156e-07,
|
|
"loss": 0.3247,
|
|
"step": 97700
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 2.243080337061805e-07,
|
|
"loss": 0.3163,
|
|
"step": 97800
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 2.143121320258694e-07,
|
|
"loss": 0.3288,
|
|
"step": 97900
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 2.0431623034555834e-07,
|
|
"loss": 0.3295,
|
|
"step": 98000
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.9432032866524727e-07,
|
|
"loss": 0.321,
|
|
"step": 98100
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.843244269849362e-07,
|
|
"loss": 0.3158,
|
|
"step": 98200
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.743285253046251e-07,
|
|
"loss": 0.3238,
|
|
"step": 98300
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.6433262362431403e-07,
|
|
"loss": 0.3251,
|
|
"step": 98400
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.5433672194400295e-07,
|
|
"loss": 0.3278,
|
|
"step": 98500
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.443408202636919e-07,
|
|
"loss": 0.3232,
|
|
"step": 98600
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.343449185833808e-07,
|
|
"loss": 0.3259,
|
|
"step": 98700
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.2434901690306976e-07,
|
|
"loss": 0.3223,
|
|
"step": 98800
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.1435311522275868e-07,
|
|
"loss": 0.3285,
|
|
"step": 98900
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.0435721354244762e-07,
|
|
"loss": 0.3249,
|
|
"step": 99000
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 9.436131186213653e-08,
|
|
"loss": 0.3269,
|
|
"step": 99100
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 8.436541018182545e-08,
|
|
"loss": 0.3265,
|
|
"step": 99200
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 7.436950850151439e-08,
|
|
"loss": 0.322,
|
|
"step": 99300
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 6.43736068212033e-08,
|
|
"loss": 0.3186,
|
|
"step": 99400
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 5.437770514089224e-08,
|
|
"loss": 0.324,
|
|
"step": 99500
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 4.4381803460581166e-08,
|
|
"loss": 0.3219,
|
|
"step": 99600
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 3.4385901780270094e-08,
|
|
"loss": 0.3218,
|
|
"step": 99700
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 2.4390000099959022e-08,
|
|
"loss": 0.3188,
|
|
"step": 99800
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 1.4394098419647945e-08,
|
|
"loss": 0.3218,
|
|
"step": 99900
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 4.3981967393368716e-09,
|
|
"loss": 0.3233,
|
|
"step": 100000
|
|
}
|
|
],
|
|
"logging_steps": 100,
|
|
"max_steps": 100043,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 10000,
|
|
"total_flos": 5668611072983040.0,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|